summaryrefslogtreecommitdiff
path: root/system/easy-kernel/0100-linux-5.15.132.patch
diff options
context:
space:
mode:
authorA. Wilcox <AWilcox@Wilcox-Tech.com>2023-10-02 00:03:08 -0500
committerA. Wilcox <AWilcox@Wilcox-Tech.com>2023-10-03 21:56:52 -0500
commitee71f0098a7782c4a95c2f1d7ee614569214be1c (patch)
tree120b55e6bde7f7f7f726ba8b34355bccbbc79482 /system/easy-kernel/0100-linux-5.15.132.patch
parent8b868f32f7e1b57c90da65157ea59eb6ce337cc9 (diff)
downloadpackages-ee71f0098a7782c4a95c2f1d7ee614569214be1c.tar.gz
packages-ee71f0098a7782c4a95c2f1d7ee614569214be1c.tar.bz2
packages-ee71f0098a7782c4a95c2f1d7ee614569214be1c.tar.xz
packages-ee71f0098a7782c4a95c2f1d7ee614569214be1c.zip
system/easy-kernel: Update to 5.15.132-mc6
* Adds exFAT module to all architectures. * Adds Hyper-V guest support to aarch64. * Fixes audio output issues on some PowerBook models.
Diffstat (limited to 'system/easy-kernel/0100-linux-5.15.132.patch')
-rw-r--r--system/easy-kernel/0100-linux-5.15.132.patch811450
1 files changed, 811450 insertions, 0 deletions
diff --git a/system/easy-kernel/0100-linux-5.15.132.patch b/system/easy-kernel/0100-linux-5.15.132.patch
new file mode 100644
index 000000000..7e12c3aa5
--- /dev/null
+++ b/system/easy-kernel/0100-linux-5.15.132.patch
@@ -0,0 +1,811450 @@
+diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
+index 889ed45be4ca6..2d5a5913b5f28 100644
+--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
++++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
+@@ -51,7 +51,7 @@ Date: Dec 2014
+ KernelVersion: 4.0
+ Description: Default output terminal descriptors
+
+- All attributes read only:
++ All attributes read only except bSourceID:
+
+ ============== =============================================
+ iTerminal index of string descriptor
+diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata
+index 9ab0ef1dd1c72..299e0d1dc1619 100644
+--- a/Documentation/ABI/testing/sysfs-ata
++++ b/Documentation/ABI/testing/sysfs-ata
+@@ -107,13 +107,14 @@ Description:
+ described in ATA8 7.16 and 7.17. Only valid if
+ the device is not a PM.
+
+- pio_mode: (RO) Transfer modes supported by the device when
+- in PIO mode. Mostly used by PATA device.
++ pio_mode: (RO) PIO transfer mode used by the device.
++ Mostly used by PATA devices.
+
+- xfer_mode: (RO) Current transfer mode
++ xfer_mode: (RO) Current transfer mode. Mostly used by
++ PATA devices.
+
+- dma_mode: (RO) Transfer modes supported by the device when
+- in DMA mode. Mostly used by PATA device.
++ dma_mode: (RO) DMA transfer mode used by the device.
++ Mostly used by PATA devices.
+
+ class: (RO) Device class. Can be "ata" for disk,
+ "atapi" for packet device, "pmp" for PM, or
+diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
+index 6ad47a67521c7..f41e767e702bd 100644
+--- a/Documentation/ABI/testing/sysfs-bus-iio
++++ b/Documentation/ABI/testing/sysfs-bus-iio
+@@ -188,7 +188,7 @@ Description:
+ Raw capacitance measurement from channel Y. Units after
+ application of scale and offset are nanofarads.
+
+-What: /sys/.../iio:deviceX/in_capacitanceY-in_capacitanceZ_raw
++What: /sys/.../iio:deviceX/in_capacitanceY-capacitanceZ_raw
+ KernelVersion: 3.2
+ Contact: linux-iio@vger.kernel.org
+ Description:
+diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610
+index 308a6756d3bf3..491ead8044888 100644
+--- a/Documentation/ABI/testing/sysfs-bus-iio-vf610
++++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610
+@@ -1,4 +1,4 @@
+-What: /sys/bus/iio/devices/iio:deviceX/conversion_mode
++What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode
+ KernelVersion: 4.2
+ Contact: linux-iio@vger.kernel.org
+ Description:
+diff --git a/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor b/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor
+index d76cd3946434d..e9ef69aef20b1 100644
+--- a/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor
++++ b/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor
+@@ -5,6 +5,9 @@ Contact: linux-mtd@lists.infradead.org
+ Description: (RO) The JEDEC ID of the SPI NOR flash as reported by the
+ flash device.
+
++ The attribute is not present if the flash doesn't support
++ the "Read JEDEC ID" command (9Fh). This is the case for
++ non-JEDEC compliant flashes.
+
+ What: /sys/bus/spi/devices/.../spi-nor/manufacturer
+ Date: April 2021
+diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
+index b46ef147616ab..eecbd16033493 100644
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -511,15 +511,18 @@ Description: information about CPUs heterogeneity.
+ cpu_capacity: capacity of cpu#.
+
+ What: /sys/devices/system/cpu/vulnerabilities
++ /sys/devices/system/cpu/vulnerabilities/gather_data_sampling
++ /sys/devices/system/cpu/vulnerabilities/itlb_multihit
++ /sys/devices/system/cpu/vulnerabilities/l1tf
++ /sys/devices/system/cpu/vulnerabilities/mds
+ /sys/devices/system/cpu/vulnerabilities/meltdown
++ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
++ /sys/devices/system/cpu/vulnerabilities/retbleed
++ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+- /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+- /sys/devices/system/cpu/vulnerabilities/l1tf
+- /sys/devices/system/cpu/vulnerabilities/mds
+ /sys/devices/system/cpu/vulnerabilities/srbds
+ /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+- /sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ Date: January 2018
+ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description: Information about CPU vulnerabilities
+diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
+index ac2947b989504..3d5de44cbbee9 100644
+--- a/Documentation/ABI/testing/sysfs-driver-xen-blkback
++++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
+@@ -42,5 +42,5 @@ KernelVersion: 5.10
+ Contact: SeongJae Park <sjpark@amazon.de>
+ Description:
+ Whether to enable the persistent grants feature or not. Note
+- that this option only takes effect on newly created backends.
++ that this option only takes effect on newly connected backends.
+ The default is Y (enable).
+diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
+index 28008905615f0..1f7659aa085c2 100644
+--- a/Documentation/ABI/testing/sysfs-driver-xen-blkfront
++++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
+@@ -15,5 +15,5 @@ KernelVersion: 5.10
+ Contact: SeongJae Park <sjpark@amazon.de>
+ Description:
+ Whether to enable the persistent grants feature or not. Note
+- that this option only takes effect on newly created frontends.
++ that this option only takes effect on newly connected frontends.
+ The default is Y (enable).
+diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
+index f627e705e663b..48d41b6696270 100644
+--- a/Documentation/ABI/testing/sysfs-fs-f2fs
++++ b/Documentation/ABI/testing/sysfs-fs-f2fs
+@@ -425,6 +425,7 @@ Description: Show status of f2fs superblock in real time.
+ 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
+ 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
+ 0x2000 SBI_IS_RESIZEFS resizefs is in process
++ 0x4000 SBI_IS_FREEZING freefs is in process
+ ====== ===================== =================================
+
+ What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
+diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count
+new file mode 100644
+index 0000000000000..156cca9dbc960
+--- /dev/null
++++ b/Documentation/ABI/testing/sysfs-kernel-oops_count
+@@ -0,0 +1,6 @@
++What: /sys/kernel/oops_count
++Date: November 2022
++KernelVersion: 6.2.0
++Contact: Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
++Description:
++ Shows how many times the system has Oopsed since last boot.
+diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count
+new file mode 100644
+index 0000000000000..90a029813717d
+--- /dev/null
++++ b/Documentation/ABI/testing/sysfs-kernel-warn_count
+@@ -0,0 +1,6 @@
++What: /sys/kernel/warn_count
++Date: November 2022
++KernelVersion: 6.2.0
++Contact: Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
++Description:
++ Shows how many times the system has Warned since last boot.
+diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
+index f2b3439edcc2c..5e40b3f437f90 100644
+--- a/Documentation/accounting/psi.rst
++++ b/Documentation/accounting/psi.rst
+@@ -37,11 +37,7 @@ Pressure interface
+ Pressure information for each resource is exported through the
+ respective file in /proc/pressure/ -- cpu, memory, and io.
+
+-The format for CPU is as such::
+-
+- some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+-
+-and for memory and IO::
++The format is as such::
+
+ some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+@@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
+ still doing productive work. As such, time spent in this subset of the
+ stall state is tracked separately and exported in the "full" averages.
+
++CPU full is undefined at the system level, but has been reported
++since 5.13, so it is set to zero for backward compatibility.
++
+ The ratios (in %) are tracked as recent trends over ten, sixty, and
+ three hundred second windows, which gives insight into short term events
+ as well as medium and long term trends. The total absolute stall time
+@@ -92,7 +91,8 @@ Triggers can be set on more than one psi metric and more than one trigger
+ for the same psi metric can be specified. However for each trigger a separate
+ file descriptor is required to be able to poll it separately from others,
+ therefore for each trigger a separate open() syscall should be made even
+-when opening the same psi interface file.
++when opening the same psi interface file. Write operations to a file descriptor
++with an already existing psi trigger will fail with EBUSY.
+
+ Monitors activate only when system enters stall state for the monitored
+ psi metric and deactivates upon exit from the stall state. While system is
+diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
+index 41191b5fb69d9..dd913eefbf312 100644
+--- a/Documentation/admin-guide/cgroup-v1/memory.rst
++++ b/Documentation/admin-guide/cgroup-v1/memory.rst
+@@ -84,6 +84,8 @@ Brief summary of control files.
+ memory.swappiness set/show swappiness parameter of vmscan
+ (See sysctl's vm.swappiness)
+ memory.move_charge_at_immigrate set/show controls of moving charges
++ This knob is deprecated and shouldn't be
++ used.
+ memory.oom_control set/show oom controls.
+ memory.numa_stat show the number of memory usage per numa
+ node
+@@ -723,8 +725,15 @@ NOTE2:
+ It is recommended to set the soft limit always below the hard limit,
+ otherwise the hard limit will take precedence.
+
+-8. Move charges at task migration
+-=================================
++8. Move charges at task migration (DEPRECATED!)
++===============================================
++
++THIS IS DEPRECATED!
++
++It's expensive and unreliable! It's better practice to launch workload
++tasks directly from inside their target cgroup. Use dedicated workload
++cgroups to allow fine-grained policy adjustments without having to
++move physical pages between control domains.
+
+ Users can move charges associated with a task along with task migration, that
+ is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
+diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
+index f170d88202588..3766bf8a1c20e 100644
+--- a/Documentation/admin-guide/cifs/usage.rst
++++ b/Documentation/admin-guide/cifs/usage.rst
+@@ -734,10 +734,9 @@ SecurityFlags Flags which control security negotiation and
+ using weaker password hashes is 0x37037 (lanman,
+ plaintext, ntlm, ntlmv2, signing allowed). Some
+ SecurityFlags require the corresponding menuconfig
+- options to be enabled (lanman and plaintext require
+- CONFIG_CIFS_WEAK_PW_HASH for example). Enabling
+- plaintext authentication currently requires also
+- enabling lanman authentication in the security flags
++ options to be enabled. Enabling plaintext
++ authentication currently requires also enabling
++ lanman authentication in the security flags
+ because the cifs module only supports sending
+ laintext passwords using the older lanman dialect
+ form of the session setup SMB. (e.g. for authentication
+diff --git a/Documentation/admin-guide/device-mapper/dm-init.rst b/Documentation/admin-guide/device-mapper/dm-init.rst
+index e5242ff17e9b7..981d6a9076994 100644
+--- a/Documentation/admin-guide/device-mapper/dm-init.rst
++++ b/Documentation/admin-guide/device-mapper/dm-init.rst
+@@ -123,3 +123,11 @@ Other examples (per target):
+ 0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
+ fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
+ 51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
++
++For setups using device-mapper on top of asynchronously probed block
++devices (MMC, USB, ..), it may be necessary to tell dm-init to
++explicitly wait for them to become available before setting up the
++device-mapper tables. This can be done with the "dm-mod.waitfor="
++module parameter, which takes a list of devices to wait for::
++
++ dm-mod.waitfor=<device1>[,..,<deviceN>]
+diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
+index 10429779a91ab..724e028d1858b 100644
+--- a/Documentation/admin-guide/device-mapper/writecache.rst
++++ b/Documentation/admin-guide/device-mapper/writecache.rst
+@@ -78,16 +78,16 @@ Status:
+ 2. the number of blocks
+ 3. the number of free blocks
+ 4. the number of blocks under writeback
+-5. the number of read requests
+-6. the number of read requests that hit the cache
+-7. the number of write requests
+-8. the number of write requests that hit uncommitted block
+-9. the number of write requests that hit committed block
+-10. the number of write requests that bypass the cache
+-11. the number of write requests that are allocated in the cache
++5. the number of read blocks
++6. the number of read blocks that hit the cache
++7. the number of write blocks
++8. the number of write blocks that hit uncommitted block
++9. the number of write blocks that hit committed block
++10. the number of write blocks that bypass the cache
++11. the number of write blocks that are allocated in the cache
+ 12. the number of write requests that are blocked on the freelist
+ 13. the number of flush requests
+-14. the number of discard requests
++14. the number of discarded blocks
+
+ Messages:
+ flush
+diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt
+index 922c23bb4372a..c07dc0ee860e7 100644
+--- a/Documentation/admin-guide/devices.txt
++++ b/Documentation/admin-guide/devices.txt
+@@ -2339,13 +2339,7 @@
+ disks (see major number 3) except that the limit on
+ partitions is 31.
+
+- 162 char Raw block device interface
+- 0 = /dev/rawctl Raw I/O control device
+- 1 = /dev/raw/raw1 First raw I/O device
+- 2 = /dev/raw/raw2 Second raw I/O device
+- ...
+- max minor number of raw device is set by kernel config
+- MAX_RAW_DEVS or raw module parameter 'max_raw_devs'
++ 162 char Used for (now removed) raw block device interface
+
+ 163 char
+
+diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
+new file mode 100644
+index 0000000000000..ec6e9f5bcf9e8
+--- /dev/null
++++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
+@@ -0,0 +1,92 @@
++
++.. SPDX-License-Identifier: GPL-2.0
++
++Cross-Thread Return Address Predictions
++=======================================
++
++Certain AMD and Hygon processors are subject to a cross-thread return address
++predictions vulnerability. When running in SMT mode and one sibling thread
++transitions out of C0 state, the other sibling thread could use return target
++predictions from the sibling thread that transitioned out of C0.
++
++The Spectre v2 mitigations protect the Linux kernel, as it fills the return
++address prediction entries with safe targets when context switching to the idle
++thread. However, KVM does allow a VMM to prevent exiting guest mode when
++transitioning out of C0. This could result in a guest-controlled return target
++being consumed by the sibling thread.
++
++Affected processors
++-------------------
++
++The following CPUs are vulnerable:
++
++ - AMD Family 17h processors
++ - Hygon Family 18h processors
++
++Related CVEs
++------------
++
++The following CVE entry is related to this issue:
++
++ ============== =======================================
++ CVE-2022-27672 Cross-Thread Return Address Predictions
++ ============== =======================================
++
++Problem
++-------
++
++Affected SMT-capable processors support 1T and 2T modes of execution when SMT
++is enabled. In 2T mode, both threads in a core are executing code. For the
++processor core to enter 1T mode, it is required that one of the threads
++requests to transition out of the C0 state. This can be communicated with the
++HLT instruction or with an MWAIT instruction that requests non-C0.
++When the thread re-enters the C0 state, the processor transitions back
++to 2T mode, assuming the other thread is also still in C0 state.
++
++In affected processors, the return address predictor (RAP) is partitioned
++depending on the SMT mode. For instance, in 2T mode each thread uses a private
++16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon
++transition between 1T/2T mode, the RAP contents are not modified but the RAP
++pointers (which control the next return target to use for predictions) may
++change. This behavior may result in return targets from one SMT thread being
++used by RET predictions in the sibling thread following a 1T/2T switch. In
++particular, a RET instruction executed immediately after a transition to 1T may
++use a return target from the thread that just became idle. In theory, this
++could lead to information disclosure if the return targets used do not come
++from trustworthy code.
++
++Attack scenarios
++----------------
++
++An attack can be mounted on affected processors by performing a series of CALL
++instructions with targeted return locations and then transitioning out of C0
++state.
++
++Mitigation mechanism
++--------------------
++
++Before entering idle state, the kernel context switches to the idle thread. The
++context switch fills the RAP entries (referred to as the RSB in Linux) with safe
++targets by performing a sequence of CALL instructions.
++
++Prevent a guest VM from directly putting the processor into an idle state by
++intercepting HLT and MWAIT instructions.
++
++Both mitigations are required to fully address this issue.
++
++Mitigation control on the kernel command line
++---------------------------------------------
++
++Use existing Spectre v2 mitigations that will fill the RSB on context switch.
++
++Mitigation control for KVM - module parameter
++---------------------------------------------
++
++By default, the KVM hypervisor mitigates this issue by intercepting guest
++attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS
++capability to override those interceptions, but since this is not common, the
++mitigation that covers this path is not enabled by default.
++
++The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on
++using the boolean module parameter mitigate_smt_rsb, e.g.:
++ kvm.mitigate_smt_rsb=1
+diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
+new file mode 100644
+index 0000000000000..264bfa937f7de
+--- /dev/null
++++ b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
+@@ -0,0 +1,109 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++GDS - Gather Data Sampling
++==========================
++
++Gather Data Sampling is a hardware vulnerability which allows unprivileged
++speculative access to data which was previously stored in vector registers.
++
++Problem
++-------
++When a gather instruction performs loads from memory, different data elements
++are merged into the destination vector register. However, when a gather
++instruction that is transiently executed encounters a fault, stale data from
++architectural or internal vector registers may get transiently forwarded to the
++destination vector register instead. This will allow a malicious attacker to
++infer stale data using typical side channel techniques like cache timing
++attacks. GDS is a purely sampling-based attack.
++
++The attacker uses gather instructions to infer the stale vector register data.
++The victim does not need to do anything special other than use the vector
++registers. The victim does not need to use gather instructions to be
++vulnerable.
++
++Because the buffers are shared between Hyper-Threads cross Hyper-Thread attacks
++are possible.
++
++Attack scenarios
++----------------
++Without mitigation, GDS can infer stale data across virtually all
++permission boundaries:
++
++ Non-enclaves can infer SGX enclave data
++ Userspace can infer kernel data
++ Guests can infer data from hosts
++ Guest can infer guest from other guests
++ Users can infer data from other users
++
++Because of this, it is important to ensure that the mitigation stays enabled in
++lower-privilege contexts like guests and when running outside SGX enclaves.
++
++The hardware enforces the mitigation for SGX. Likewise, VMMs should ensure
++that guests are not allowed to disable the GDS mitigation. If a host erred and
++allowed this, a guest could theoretically disable GDS mitigation, mount an
++attack, and re-enable it.
++
++Mitigation mechanism
++--------------------
++This issue is mitigated in microcode. The microcode defines the following new
++bits:
++
++ ================================ === ============================
++ IA32_ARCH_CAPABILITIES[GDS_CTRL] R/O Enumerates GDS vulnerability
++ and mitigation support.
++ IA32_ARCH_CAPABILITIES[GDS_NO] R/O Processor is not vulnerable.
++ IA32_MCU_OPT_CTRL[GDS_MITG_DIS] R/W Disables the mitigation
++ 0 by default.
++ IA32_MCU_OPT_CTRL[GDS_MITG_LOCK] R/W Locks GDS_MITG_DIS=0. Writes
++ to GDS_MITG_DIS are ignored
++ Can't be cleared once set.
++ ================================ === ============================
++
++GDS can also be mitigated on systems that don't have updated microcode by
++disabling AVX. This can be done by setting gather_data_sampling="force" or
++"clearcpuid=avx" on the kernel command-line.
++
++If used, these options will disable AVX use by turning off XSAVE YMM support.
++However, the processor will still enumerate AVX support. Userspace that
++does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM
++support will break.
++
++Mitigation control on the kernel command line
++---------------------------------------------
++The mitigation can be disabled by setting "gather_data_sampling=off" or
++"mitigations=off" on the kernel command line. Not specifying either will default
++to the mitigation being enabled. Specifying "gather_data_sampling=force" will
++use the microcode mitigation when available or disable AVX on affected systems
++where the microcode hasn't been updated to include the mitigation.
++
++GDS System Information
++------------------------
++The kernel provides vulnerability status information through sysfs. For
++GDS this can be accessed by the following sysfs file:
++
++/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
++
++The possible values contained in this file are:
++
++ ============================== =============================================
++ Not affected Processor not vulnerable.
++ Vulnerable Processor vulnerable and mitigation disabled.
++ Vulnerable: No microcode Processor vulnerable and microcode is missing
++ mitigation.
++ Mitigation: AVX disabled,
++ no microcode Processor is vulnerable and microcode is missing
++ mitigation. AVX disabled as mitigation.
++ Mitigation: Microcode Processor is vulnerable and mitigation is in
++ effect.
++ Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in
++ effect and cannot be disabled.
++ Unknown: Dependent on
++ hypervisor status Running on a virtual guest processor that is
++ affected but with no way to know if host
++ processor is mitigated or vulnerable.
++ ============================== =============================================
++
++GDS Default mitigation
++----------------------
++The updated microcode will enable the mitigation by default. The kernel's
++default action is to leave the mitigation enabled.
+diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
+index 8cbc711cda935..6828102baaa7a 100644
+--- a/Documentation/admin-guide/hw-vuln/index.rst
++++ b/Documentation/admin-guide/hw-vuln/index.rst
+@@ -17,3 +17,7 @@ are configurable at compile, boot or run time.
+ special-register-buffer-data-sampling.rst
+ core-scheduling.rst
+ l1d_flush.rst
++ processor_mmio_stale_data.rst
++ cross-thread-rsb.rst
++ gather_data_sampling.rst
++ srso
+diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+new file mode 100644
+index 0000000000000..c98fd11907cc8
+--- /dev/null
++++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+@@ -0,0 +1,260 @@
++=========================================
++Processor MMIO Stale Data Vulnerabilities
++=========================================
++
++Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O
++(MMIO) vulnerabilities that can expose data. The sequences of operations for
++exposing data range from simple to very complex. Because most of the
++vulnerabilities require the attacker to have access to MMIO, many environments
++are not affected. System environments using virtualization where MMIO access is
++provided to untrusted guests may need mitigation. These vulnerabilities are
++not transient execution attacks. However, these vulnerabilities may propagate
++stale data into core fill buffers where the data can subsequently be inferred
++by an unmitigated transient execution attack. Mitigation for these
++vulnerabilities includes a combination of microcode update and software
++changes, depending on the platform and usage model. Some of these mitigations
++are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or
++those used to mitigate Special Register Buffer Data Sampling (SRBDS).
++
++Data Propagators
++================
++Propagators are operations that result in stale data being copied or moved from
++one microarchitectural buffer or register to another. Processor MMIO Stale Data
++Vulnerabilities are operations that may result in stale data being directly
++read into an architectural, software-visible state or sampled from a buffer or
++register.
++
++Fill Buffer Stale Data Propagator (FBSDP)
++-----------------------------------------
++Stale data may propagate from fill buffers (FB) into the non-coherent portion
++of the uncore on some non-coherent writes. Fill buffer propagation by itself
++does not make stale data architecturally visible. Stale data must be propagated
++to a location where it is subject to reading or sampling.
++
++Sideband Stale Data Propagator (SSDP)
++-------------------------------------
++The sideband stale data propagator (SSDP) is limited to the client (including
++Intel Xeon server E3) uncore implementation. The sideband response buffer is
++shared by all client cores. For non-coherent reads that go to sideband
++destinations, the uncore logic returns 64 bytes of data to the core, including
++both requested data and unrequested stale data, from a transaction buffer and
++the sideband response buffer. As a result, stale data from the sideband
++response and transaction buffers may now reside in a core fill buffer.
++
++Primary Stale Data Propagator (PSDP)
++------------------------------------
++The primary stale data propagator (PSDP) is limited to the client (including
++Intel Xeon server E3) uncore implementation. Similar to the sideband response
++buffer, the primary response buffer is shared by all client cores. For some
++processors, MMIO primary reads will return 64 bytes of data to the core fill
++buffer including both requested data and unrequested stale data. This is
++similar to the sideband stale data propagator.
++
++Vulnerabilities
++===============
++Device Register Partial Write (DRPW) (CVE-2022-21166)
++-----------------------------------------------------
++Some endpoint MMIO registers incorrectly handle writes that are smaller than
++the register size. Instead of aborting the write or only copying the correct
++subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than
++specified by the write transaction may be written to the register. On
++processors affected by FBSDP, this may expose stale data from the fill buffers
++of the core that created the write transaction.
++
++Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)
++----------------------------------------------------
++After propagators may have moved data around the uncore and copied stale data
++into client core fill buffers, processors affected by MFBDS can leak data from
++the fill buffer. It is limited to the client (including Intel Xeon server E3)
++uncore implementation.
++
++Shared Buffers Data Read (SBDR) (CVE-2022-21123)
++------------------------------------------------
++It is similar to Shared Buffer Data Sampling (SBDS) except that the data is
++directly read into the architectural software-visible state. It is limited to
++the client (including Intel Xeon server E3) uncore implementation.
++
++Affected Processors
++===================
++Not all the CPUs are affected by all the variants. For instance, most
++processors for the server market (excluding Intel Xeon E3 processors) are
++impacted by only Device Register Partial Write (DRPW).
++
++Below is the list of affected Intel processors [#f1]_:
++
++ =================== ============ =========
++ Common name Family_Model Steppings
++ =================== ============ =========
++ HASWELL_X 06_3FH 2,4
++ SKYLAKE_L 06_4EH 3
++ BROADWELL_X 06_4FH All
++ SKYLAKE_X 06_55H 3,4,6,7,11
++ BROADWELL_D 06_56H 3,4,5
++ SKYLAKE 06_5EH 3
++ ICELAKE_X 06_6AH 4,5,6
++ ICELAKE_D 06_6CH 1
++ ICELAKE_L 06_7EH 5
++ ATOM_TREMONT_D 06_86H All
++ LAKEFIELD 06_8AH 1
++ KABYLAKE_L 06_8EH 9 to 12
++ ATOM_TREMONT 06_96H 1
++ ATOM_TREMONT_L 06_9CH 0
++ KABYLAKE 06_9EH 9 to 13
++ COMETLAKE 06_A5H 2,3,5
++ COMETLAKE_L 06_A6H 0,1
++ ROCKETLAKE 06_A7H 1
++ =================== ============ =========
++
++If a CPU is in the affected processor list, but not affected by a variant, it
++is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later
++section, mitigation largely remains the same for all the variants, i.e. to
++clear the CPU fill buffers via VERW instruction.
++
++New bits in MSRs
++================
++Newer processors and microcode update on existing affected processors added new
++bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
++specific variants of Processor MMIO Stale Data vulnerabilities and mitigation
++capability.
++
++MSR IA32_ARCH_CAPABILITIES
++--------------------------
++Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the
++ Shared Buffers Data Read (SBDR) vulnerability or the sideband stale
++ data propagator (SSDP).
++Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer
++ Stale Data Propagator (FBSDP).
++Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data
++ Propagator (PSDP).
++Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer
++ values as part of MD_CLEAR operations. Processors that do not
++ enumerate MDS_NO (meaning they are affected by MDS) but that do
++ enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate
++ FB_CLEAR as part of their MD_CLEAR support.
++Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR
++ IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS
++ bit can be set to cause the VERW instruction to not perform the
++ FB_CLEAR action. Not all processors that support FB_CLEAR will support
++ FB_CLEAR_CTRL.
++
++MSR IA32_MCU_OPT_CTRL
++---------------------
++Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR
++action. This may be useful to reduce the performance impact of FB_CLEAR in
++cases where system software deems it warranted (for example, when performance
++is more critical, or the untrusted software has no MMIO access). Note that
++FB_CLEAR_DIS has no impact on enumeration (for example, it does not change
++FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors
++that enumerate FB_CLEAR.
++
++Mitigation
++==========
++Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the
++same mitigation strategy to force the CPU to clear the affected buffers before
++an attacker can extract the secrets.
++
++This is achieved by using the otherwise unused and obsolete VERW instruction in
++combination with a microcode update. The microcode clears the affected CPU
++buffers when the VERW instruction is executed.
++
++Kernel reuses the MDS function to invoke the buffer clearing:
++
++ mds_clear_cpu_buffers()
++
++On MDS affected CPUs, the kernel already invokes CPU buffer clear on
++kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
++additional mitigation is needed on such CPUs.
++
++For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker
++with MMIO capability. Therefore, VERW is not required for kernel/userspace. For
++virtualization case, VERW is only needed at VMENTER for a guest with MMIO
++capability.
++
++Mitigation points
++-----------------
++Return to user space
++^^^^^^^^^^^^^^^^^^^^
++Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation
++needed.
++
++C-State transition
++^^^^^^^^^^^^^^^^^^
++Control register writes by CPU during C-state transition can propagate data
++from fill buffer to uncore buffers. Execute VERW before C-state transition to
++clear CPU fill buffers.
++
++Guest entry point
++^^^^^^^^^^^^^^^^^
++Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise
++execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by
++MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO
++Stale Data vulnerabilities, so there is no need to execute VERW for such guests.
++
++Mitigation control on the kernel command line
++---------------------------------------------
++The kernel command line allows to control the Processor MMIO Stale Data
++mitigations at boot time with the option "mmio_stale_data=". The valid
++arguments for this option are:
++
++ ========== =================================================================
++ full If the CPU is vulnerable, enable mitigation; CPU buffer clearing
++ on exit to userspace and when entering a VM. Idle transitions are
++ protected as well. It does not automatically disable SMT.
++ full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the
++ complete mitigation.
++ off Disables mitigation completely.
++ ========== =================================================================
++
++If the CPU is affected and mmio_stale_data=off is not supplied on the kernel
++command line, then the kernel selects the appropriate mitigation.
++
++Mitigation status information
++-----------------------------
++The Linux kernel provides a sysfs interface to enumerate the current
++vulnerability status of the system: whether the system is vulnerable, and
++which mitigations are active. The relevant sysfs file is:
++
++ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
++
++The possible values in this file are:
++
++ .. list-table::
++
++ * - 'Not affected'
++ - The processor is not vulnerable
++ * - 'Vulnerable'
++ - The processor is vulnerable, but no mitigation enabled
++ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
++ - The processor is vulnerable, but microcode is not updated. The
++ mitigation is enabled on a best effort basis.
++ * - 'Mitigation: Clear CPU buffers'
++ - The processor is vulnerable and the CPU buffer clearing mitigation is
++ enabled.
++ * - 'Unknown: No mitigations'
++ - The processor vulnerability status is unknown because it is
++ out of Servicing period. Mitigation is not attempted.
++
++Definitions:
++------------
++
++Servicing period: The process of providing functional and security updates to
++Intel processors or platforms, utilizing the Intel Platform Update (IPU)
++process or other similar mechanisms.
++
++End of Servicing Updates (ESU): ESU is the date at which Intel will no
++longer provide Servicing, such as through IPU or other similar update
++processes. ESU dates will typically be aligned to end of quarter.
++
++If the processor is vulnerable then the following information is appended to
++the above information:
++
++ ======================== ===========================================
++ 'SMT vulnerable' SMT is enabled
++ 'SMT disabled' SMT is disabled
++ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
++ ======================== ===========================================
++
++References
++----------
++.. [#f1] Affected Processors
++ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
+diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
+index e05e581af5cfe..0fba3758d0da8 100644
+--- a/Documentation/admin-guide/hw-vuln/spectre.rst
++++ b/Documentation/admin-guide/hw-vuln/spectre.rst
+@@ -60,8 +60,8 @@ privileged data touched during the speculative execution.
+ Spectre variant 1 attacks take advantage of speculative execution of
+ conditional branches, while Spectre variant 2 attacks use speculative
+ execution of indirect branches to leak privileged memory.
+-See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
+-:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
++See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[6] <spec_ref6>`
++:ref:`[7] <spec_ref7>` :ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
+
+ Spectre variant 1 (Bounds Check Bypass)
+ ---------------------------------------
+@@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the
+ speculative execution's side effects left in level 1 cache to infer the
+ victim's data.
+
++Yet another variant 2 attack vector is for the attacker to poison the
++Branch History Buffer (BHB) to speculatively steer an indirect branch
++to a specific Branch Target Buffer (BTB) entry, even if the entry isn't
++associated with the source address of the indirect branch. Specifically,
++the BHB might be shared across privilege levels even in the presence of
++Enhanced IBRS.
++
++Currently the only known real-world BHB attack vector is via
++unprivileged eBPF. Therefore, it's highly recommended to not enable
++unprivileged eBPF, especially when eIBRS is used (without retpolines).
++For a full mitigation against BHB attacks, it's recommended to use
++retpolines (or eIBRS combined with retpolines).
++
+ Attack scenarios
+ ----------------
+
+@@ -364,13 +377,15 @@ The possible values in this file are:
+
+ - Kernel status:
+
+- ==================================== =================================
+- 'Not affected' The processor is not vulnerable
+- 'Vulnerable' Vulnerable, no mitigation
+- 'Mitigation: Full generic retpoline' Software-focused mitigation
+- 'Mitigation: Full AMD retpoline' AMD-specific software mitigation
+- 'Mitigation: Enhanced IBRS' Hardware-focused mitigation
+- ==================================== =================================
++ ======================================== =================================
++ 'Not affected' The processor is not vulnerable
++ 'Mitigation: None' Vulnerable, no mitigation
++ 'Mitigation: Retpolines' Use Retpoline thunks
++ 'Mitigation: LFENCE' Use LFENCE instructions
++ 'Mitigation: Enhanced IBRS' Hardware-focused mitigation
++ 'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines
++ 'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE
++ ======================================== =================================
+
+ - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
+ used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
+@@ -407,6 +422,14 @@ The possible values in this file are:
+ 'RSB filling' Protection of RSB on context switch enabled
+ ============= ===========================================
+
++ - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
++
++ =========================== =======================================================
++ 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled
++ 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable
++ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
++ =========================== =======================================================
++
+ Full mitigation might require a microcode update from the CPU
+ vendor. When the necessary microcode is not available, the kernel will
+ report vulnerability.
+@@ -456,8 +479,16 @@ Spectre variant 2
+ On Intel Skylake-era systems the mitigation covers most, but not all,
+ cases. See :ref:`[3] <spec_ref3>` for more details.
+
+- On CPUs with hardware mitigation for Spectre variant 2 (e.g. Enhanced
+- IBRS on x86), retpoline is automatically disabled at run time.
++ On CPUs with hardware mitigation for Spectre variant 2 (e.g. IBRS
++ or enhanced IBRS on x86), retpoline is automatically disabled at run time.
++
++ Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
++ boot, by setting the IBRS bit, and they're automatically protected against
++ Spectre v2 variant attacks, including cross-thread branch target injections
++ on SMT systems (STIBP). In other words, eIBRS enables STIBP too.
++
++ Legacy IBRS systems clear the IBRS bit on exit to userspace and
++ therefore explicitly enable STIBP for that
+
+ The retpoline mitigation is turned on by default on vulnerable
+ CPUs. It can be forced on or off by the administrator
+@@ -468,7 +499,7 @@ Spectre variant 2
+ before invoking any firmware code to prevent Spectre variant 2 exploits
+ using the firmware.
+
+- Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y
++ Using kernel address space randomization (CONFIG_RANDOMIZE_BASE=y
+ and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes
+ attacks on the kernel generally more difficult.
+
+@@ -481,9 +512,12 @@ Spectre variant 2
+ For Spectre variant 2 mitigation, individual user programs
+ can be compiled with return trampolines for indirect branches.
+ This protects them from consuming poisoned entries in the branch
+- target buffer left by malicious software. Alternatively, the
+- programs can disable their indirect branch speculation via prctl()
+- (See :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
++ target buffer left by malicious software.
++
++ On legacy IBRS systems, at return to userspace, implicit STIBP is disabled
++ because the kernel clears the IBRS bit. In this case, the userspace programs
++ can disable indirect branch speculation via prctl() (See
++ :ref:`Documentation/userspace-api/spec_ctrl.rst <set_spec_ctrl>`).
+ On x86, this will turn on STIBP to guard against attacks from the
+ sibling thread when the user program is running, and use IBPB to
+ flush the branch target buffer when switching to/from the program.
+@@ -584,12 +618,13 @@ kernel command line.
+
+ Specific mitigations can also be selected manually:
+
+- retpoline
+- replace indirect branches
+- retpoline,generic
+- google's original retpoline
+- retpoline,amd
+- AMD-specific minimal thunk
++ retpoline auto pick between generic,lfence
++ retpoline,generic Retpolines
++ retpoline,lfence LFENCE; indirect branch
++ retpoline,amd alias for retpoline,lfence
++ eibrs enhanced IBRS
++ eibrs,retpoline enhanced IBRS + Retpolines
++ eibrs,lfence enhanced IBRS + LFENCE
+
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+@@ -730,7 +765,7 @@ AMD white papers:
+
+ .. _spec_ref6:
+
+-[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
++[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf>`_.
+
+ ARM white papers:
+
+diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst
+new file mode 100644
+index 0000000000000..f79cb11b080f6
+--- /dev/null
++++ b/Documentation/admin-guide/hw-vuln/srso.rst
+@@ -0,0 +1,133 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++Speculative Return Stack Overflow (SRSO)
++========================================
++
++This is a mitigation for the speculative return stack overflow (SRSO)
++vulnerability found on AMD processors. The mechanism is by now the well
++known scenario of poisoning CPU functional units - the Branch Target
++Buffer (BTB) and Return Address Predictor (RAP) in this case - and then
++tricking the elevated privilege domain (the kernel) into leaking
++sensitive data.
++
++AMD CPUs predict RET instructions using a Return Address Predictor (aka
++Return Address Stack/Return Stack Buffer). In some cases, a non-architectural
++CALL instruction (i.e., an instruction predicted to be a CALL but is
++not actually a CALL) can create an entry in the RAP which may be used
++to predict the target of a subsequent RET instruction.
++
++The specific circumstances that lead to this varies by microarchitecture
++but the concern is that an attacker can mis-train the CPU BTB to predict
++non-architectural CALL instructions in kernel space and use this to
++control the speculative target of a subsequent kernel RET, potentially
++leading to information disclosure via a speculative side-channel.
++
++The issue is tracked under CVE-2023-20569.
++
++Affected processors
++-------------------
++
++AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older
++processors have not been investigated.
++
++System information and options
++------------------------------
++
++First of all, it is required that the latest microcode be loaded for
++mitigations to be effective.
++
++The sysfs file showing SRSO mitigation status is:
++
++ /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
++
++The possible values in this file are:
++
++ - 'Not affected' The processor is not vulnerable
++
++ - 'Vulnerable: no microcode' The processor is vulnerable, no
++ microcode extending IBPB functionality
++ to address the vulnerability has been
++ applied.
++
++ - 'Mitigation: microcode' Extended IBPB functionality microcode
++ patch has been applied. It does not
++ address User->Kernel and Guest->Host
++ transitions protection but it does
++ address User->User and VM->VM attack
++ vectors.
++
++ (spec_rstack_overflow=microcode)
++
++ - 'Mitigation: safe RET' Software-only mitigation. It complements
++ the extended IBPB microcode patch
++ functionality by addressing User->Kernel
++ and Guest->Host transitions protection.
++
++ Selected by default or by
++ spec_rstack_overflow=safe-ret
++
++ - 'Mitigation: IBPB' Similar protection as "safe RET" above
++ but employs an IBPB barrier on privilege
++ domain crossings (User->Kernel,
++ Guest->Host).
++
++ (spec_rstack_overflow=ibpb)
++
++ - 'Mitigation: IBPB on VMEXIT' Mitigation addressing the cloud provider
++ scenario - the Guest->Host transitions
++ only.
++
++ (spec_rstack_overflow=ibpb-vmexit)
++
++In order to exploit vulnerability, an attacker needs to:
++
++ - gain local access on the machine
++
++ - break kASLR
++
++ - find gadgets in the running kernel in order to use them in the exploit
++
++ - potentially create and pin an additional workload on the sibling
++ thread, depending on the microarchitecture (not necessary on fam 0x19)
++
++ - run the exploit
++
++Considering the performance implications of each mitigation type, the
++default one is 'Mitigation: safe RET' which should take care of most
++attack vectors, including the local User->Kernel one.
++
++As always, the user is advised to keep her/his system up-to-date by
++applying software updates regularly.
++
++The default setting will be reevaluated when needed and especially when
++new attack vectors appear.
++
++As one can surmise, 'Mitigation: safe RET' does come at the cost of some
++performance depending on the workload. If one trusts her/his userspace
++and does not want to suffer the performance impact, one can always
++disable the mitigation with spec_rstack_overflow=off.
++
++Similarly, 'Mitigation: IBPB' is another full mitigation type employing
++an indrect branch prediction barrier after having applied the required
++microcode patch for one's system. This mitigation comes also at
++a performance cost.
++
++Mitigation: safe RET
++--------------------
++
++The mitigation works by ensuring all RET instructions speculate to
++a controlled location, similar to how speculation is controlled in the
++retpoline sequence. To accomplish this, the __x86_return_thunk forces
++the CPU to mispredict every function return using a 'safe return'
++sequence.
++
++To ensure the safety of this mitigation, the kernel must ensure that the
++safe return sequence is itself free from attacker interference. In Zen3
++and Zen4, this is accomplished by creating a BTB alias between the
++untraining function srso_alias_untrain_ret() and the safe return
++function srso_alias_safe_ret() which results in evicting a potentially
++poisoned BTB entry and using that safe one for all function returns.
++
++In older Zen1 and Zen2, this is accomplished using a reinterpretation
++technique similar to Retbleed one: srso_untrain_ret() and
++srso_safe_ret().
+diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt
+index 82aecdcae8a6c..030de95e3e6b2 100644
+--- a/Documentation/admin-guide/kdump/gdbmacros.txt
++++ b/Documentation/admin-guide/kdump/gdbmacros.txt
+@@ -312,10 +312,10 @@ define dmesg
+ set var $prev_flags = $info->flags
+ end
+
+- set var $id = ($id + 1) & $id_mask
+ if ($id == $end_id)
+ loop_break
+ end
++ set var $id = ($id + 1) & $id_mask
+ end
+ end
+ document dmesg
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 43dc35fe5bc03..2cd4d66ab64c2 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -916,10 +916,6 @@
+
+ debugpat [X86] Enable PAT debugging
+
+- decnet.addr= [HW,NET]
+- Format: <area>[,<node>]
+- See also Documentation/networking/decnet.rst.
+-
+ default_hugepagesz=
+ [HW] The size of the default HugeTLB page. This is
+ the size represented by the legacy /proc/ hugepages
+@@ -1505,6 +1501,26 @@
+ Format: off | on
+ default: on
+
++ gather_data_sampling=
++ [X86,INTEL] Control the Gather Data Sampling (GDS)
++ mitigation.
++
++ Gather Data Sampling is a hardware vulnerability which
++ allows unprivileged speculative access to data which was
++ previously stored in vector registers.
++
++ This issue is mitigated by default in updated microcode.
++ The mitigation may have a performance impact but can be
++ disabled. On systems without the microcode mitigation
++ disabling AVX serves as a mitigation.
++
++ force: Disable AVX to mitigate systems without
++ microcode mitigation. No effect if the microcode
++ mitigation is present. Known to cause crashes in
++ userspace with buggy AVX enumeration.
++
++ off: Disable GDS mitigation.
++
+ gcov_persist= [GCOV] When non-zero (default), profiling data for
+ kernel modules is saved and remains accessible via
+ debugfs, even when the module is unloaded/reloaded.
+@@ -1690,6 +1706,8 @@
+ architectures force reset to be always executed
+ i8042.unlock [HW] Unlock (ignore) the keylock
+ i8042.kbdreset [HW] Reset device connected to KBD port
++ i8042.probe_defer
++ [HW] Allow deferred probing upon i8042 probe errors
+
+ i810= [HW,DRM]
+
+@@ -2198,24 +2216,57 @@
+
+ ivrs_ioapic [HW,X86-64]
+ Provide an override to the IOAPIC-ID<->DEVICE-ID
+- mapping provided in the IVRS ACPI table. For
+- example, to map IOAPIC-ID decimal 10 to
+- PCI device 00:14.0 write the parameter as:
++ mapping provided in the IVRS ACPI table.
++ By default, PCI segment is 0, and can be omitted.
++
++ For example, to map IOAPIC-ID decimal 10 to
++ PCI segment 0x1 and PCI device 00:14.0,
++ write the parameter as:
++ ivrs_ioapic=10@0001:00:14.0
++
++ Deprecated formats:
++ * To map IOAPIC-ID decimal 10 to PCI device 00:14.0
++ write the parameter as:
+ ivrs_ioapic[10]=00:14.0
++ * To map IOAPIC-ID decimal 10 to PCI segment 0x1 and
++ PCI device 00:14.0 write the parameter as:
++ ivrs_ioapic[10]=0001:00:14.0
+
+ ivrs_hpet [HW,X86-64]
+ Provide an override to the HPET-ID<->DEVICE-ID
+- mapping provided in the IVRS ACPI table. For
+- example, to map HPET-ID decimal 0 to
+- PCI device 00:14.0 write the parameter as:
++ mapping provided in the IVRS ACPI table.
++ By default, PCI segment is 0, and can be omitted.
++
++ For example, to map HPET-ID decimal 10 to
++ PCI segment 0x1 and PCI device 00:14.0,
++ write the parameter as:
++ ivrs_hpet=10@0001:00:14.0
++
++ Deprecated formats:
++ * To map HPET-ID decimal 0 to PCI device 00:14.0
++ write the parameter as:
+ ivrs_hpet[0]=00:14.0
++ * To map HPET-ID decimal 10 to PCI segment 0x1 and
++ PCI device 00:14.0 write the parameter as:
++ ivrs_ioapic[10]=0001:00:14.0
+
+ ivrs_acpihid [HW,X86-64]
+ Provide an override to the ACPI-HID:UID<->DEVICE-ID
+- mapping provided in the IVRS ACPI table. For
+- example, to map UART-HID:UID AMD0020:0 to
+- PCI device 00:14.5 write the parameter as:
++ mapping provided in the IVRS ACPI table.
++ By default, PCI segment is 0, and can be omitted.
++
++ For example, to map UART-HID:UID AMD0020:0 to
++ PCI segment 0x1 and PCI device ID 00:14.5,
++ write the parameter as:
++ ivrs_acpihid=AMD0020:0@0001:00:14.5
++
++ Deprecated formats:
++ * To map UART-HID:UID AMD0020:0 to PCI segment is 0,
++ PCI device ID 00:14.5, write the parameter as:
+ ivrs_acpihid[00:14.5]=AMD0020:0
++ * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and
++ PCI device ID 00:14.5, write the parameter as:
++ ivrs_acpihid[0001:00:14.5]=AMD0020:0
+
+ js= [HW,JOY] Analog joystick
+ See Documentation/input/joydev/joystick.rst.
+@@ -2403,8 +2454,12 @@
+ Default is 1 (enabled)
+
+ kvm-intel.emulate_invalid_guest_state=
+- [KVM,Intel] Enable emulation of invalid guest states
+- Default is 0 (disabled)
++ [KVM,Intel] Disable emulation of invalid guest state.
++ Ignored if kvm-intel.enable_unrestricted_guest=1, as
++ guest state is never invalid for unrestricted guests.
++ This param doesn't apply to nested guests (L2), as KVM
++ never emulates invalid L2 guest state.
++ Default is 1 (enabled)
+
+ kvm-intel.flexpriority=
+ [KVM,Intel] Disable FlexPriority feature (TPR shadow).
+@@ -2999,20 +3054,23 @@
+ Disable all optional CPU mitigations. This
+ improves system performance, but it may also
+ expose users to several CPU vulnerabilities.
+- Equivalent to: nopti [X86,PPC]
++ Equivalent to: gather_data_sampling=off [X86]
+ kpti=0 [ARM64]
+- nospectre_v1 [X86,PPC]
++ kvm.nx_huge_pages=off [X86]
++ l1tf=off [X86]
++ mds=off [X86]
++ mmio_stale_data=off [X86]
++ no_entry_flush [PPC]
++ no_uaccess_flush [PPC]
+ nobp=0 [S390]
++ nopti [X86,PPC]
++ nospectre_v1 [X86,PPC]
+ nospectre_v2 [X86,PPC,S390,ARM64]
+- spectre_v2_user=off [X86]
++ retbleed=off [X86]
+ spec_store_bypass_disable=off [X86,PPC]
++ spectre_v2_user=off [X86]
+ ssbd=force-off [ARM64]
+- l1tf=off [X86]
+- mds=off [X86]
+ tsx_async_abort=off [X86]
+- kvm.nx_huge_pages=off [X86]
+- no_entry_flush [PPC]
+- no_uaccess_flush [PPC]
+
+ Exceptions:
+ This does not have any effect on
+@@ -3034,6 +3092,8 @@
+ Equivalent to: l1tf=flush,nosmt [X86]
+ mds=full,nosmt [X86]
+ tsx_async_abort=full,nosmt [X86]
++ mmio_stale_data=full,nosmt [X86]
++ retbleed=auto,nosmt [X86]
+
+ mminit_loglevel=
+ [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+@@ -3043,6 +3103,40 @@
+ log everything. Information is printed at KERN_DEBUG
+ so loglevel=8 may also need to be specified.
+
++ mmio_stale_data=
++ [X86,INTEL] Control mitigation for the Processor
++ MMIO Stale Data vulnerabilities.
++
++ Processor MMIO Stale Data is a class of
++ vulnerabilities that may expose data after an MMIO
++ operation. Exposed data could originate or end in
++ the same CPU buffers as affected by MDS and TAA.
++ Therefore, similar to MDS and TAA, the mitigation
++ is to clear the affected CPU buffers.
++
++ This parameter controls the mitigation. The
++ options are:
++
++ full - Enable mitigation on vulnerable CPUs
++
++ full,nosmt - Enable mitigation and disable SMT on
++ vulnerable CPUs.
++
++ off - Unconditionally disable mitigation
++
++ On MDS or TAA affected machines,
++ mmio_stale_data=off can be prevented by an active
++ MDS or TAA mitigation as these vulnerabilities are
++ mitigated with the same mechanism so in order to
++ disable this mitigation, you need to specify
++ mds=off and tsx_async_abort=off too.
++
++ Not specifying this option is equivalent to
++ mmio_stale_data=full.
++
++ For details see:
++ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
++
+ module.sig_enforce
+ [KNL] When CONFIG_MODULE_SIG is set, this means that
+ modules without (valid) signatures will fail to load.
+@@ -3446,8 +3540,7 @@
+ difficult since unequal pointers can no longer be
+ compared. However, if this command-line option is
+ specified, then all normal pointers will have their true
+- value printed. Pointers printed via %pK may still be
+- hashed. This option should only be specified when
++ value printed. This option should only be specified when
+ debugging the kernel. Please do not use on production
+ kernels.
+
+@@ -4302,6 +4395,12 @@
+ fully seed the kernel's CRNG. Default is controlled
+ by CONFIG_RANDOM_TRUST_CPU.
+
++ random.trust_bootloader={on,off}
++ [KNL] Enable or disable trusting the use of a
++ seed passed by the bootloader (if available) to
++ fully seed the kernel's CRNG. Default is controlled
++ by CONFIG_RANDOM_TRUST_BOOTLOADER.
++
+ randomize_kstack_offset=
+ [KNL] Enable or disable kernel stack offset
+ randomization, which provides roughly 5 bits of
+@@ -4921,6 +5020,43 @@
+
+ retain_initrd [RAM] Keep initrd memory after extraction
+
++ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
++ Speculative Code Execution with Return Instructions)
++ vulnerability.
++
++ AMD-based UNRET and IBPB mitigations alone do not stop
++ sibling threads from influencing the predictions of other
++ sibling threads. For that reason, STIBP is used on pro-
++ cessors that support it, and mitigate SMT on processors
++ that don't.
++
++ off - no mitigation
++ auto - automatically select a migitation
++ auto,nosmt - automatically select a mitigation,
++ disabling SMT if necessary for
++ the full mitigation (only on Zen1
++ and older without STIBP).
++ ibpb - On AMD, mitigate short speculation
++ windows on basic block boundaries too.
++ Safe, highest perf impact. It also
++ enables STIBP if present. Not suitable
++ on Intel.
++ ibpb,nosmt - Like "ibpb" above but will disable SMT
++ when STIBP is not available. This is
++ the alternative for systems which do not
++ have STIBP.
++ unret - Force enable untrained return thunks,
++ only effective on AMD f15h-f17h based
++ systems.
++ unret,nosmt - Like unret, but will disable SMT when STIBP
++ is not available. This is the alternative for
++ systems which do not have STIBP.
++
++ Selecting 'auto' will choose a mitigation method at run
++ time according to the CPU.
++
++ Not specifying this option is equivalent to retbleed=auto.
++
+ rfkill.default_state=
+ 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
+ etc. communication is blocked by default.
+@@ -5261,8 +5397,13 @@
+ Specific mitigations can also be selected manually:
+
+ retpoline - replace indirect branches
+- retpoline,generic - google's original retpoline
+- retpoline,amd - AMD-specific minimal thunk
++ retpoline,generic - Retpolines
++ retpoline,lfence - LFENCE; indirect branch
++ retpoline,amd - alias for retpoline,lfence
++ eibrs - enhanced IBRS
++ eibrs,retpoline - enhanced IBRS + Retpolines
++ eibrs,lfence - enhanced IBRS + LFENCE
++ ibrs - use IBRS to protect kernel
+
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+@@ -5309,6 +5450,17 @@
+ Not specifying this option is equivalent to
+ spectre_v2_user=auto.
+
++ spec_rstack_overflow=
++ [X86] Control RAS overflow mitigation on AMD Zen CPUs
++
++ off - Disable mitigation
++ microcode - Enable microcode mitigation only
++ safe-ret - Enable sw-only safe RET mitigation (default)
++ ibpb - Enable mitigation by issuing IBPB on
++ kernel entry
++ ibpb-vmexit - Issue IBPB only on VMEXIT
++ (cloud-specific mitigation)
++
+ spec_store_bypass_disable=
+ [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ (Speculative Store Bypass vulnerability)
+@@ -5618,10 +5770,6 @@
+ -1: disable all critical trip points in all thermal zones
+ <degrees C>: override all critical trip points
+
+- thermal.nocrt= [HW,ACPI]
+- Set to disable actions on ACPI thermal zone
+- critical and hot trip points.
+-
+ thermal.off= [HW,ACPI]
+ 1: disable ACPI thermal control
+
+@@ -6349,6 +6497,13 @@
+ improve timer resolution at the expense of processing
+ more timer interrupts.
+
++ xen.balloon_boot_timeout= [XEN]
++ The time (in seconds) to wait before giving up to boot
++ in case initial ballooning fails to free enough memory.
++ Applies only when running as HVM or PVH guest and
++ started with less memory configured than allowed at
++ max. Default is 180.
++
+ xen.event_eoi_delay= [XEN]
+ How long to delay EOI handling in case of event
+ storms (jiffies). Default is 10.
+diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
+index fb578fbbb76ca..49857ce1cd03e 100644
+--- a/Documentation/admin-guide/mm/pagemap.rst
++++ b/Documentation/admin-guide/mm/pagemap.rst
+@@ -23,7 +23,7 @@ There are four components to pagemap:
+ * Bit 56 page exclusively mapped (since 4.2)
+ * Bit 57 pte is uffd-wp write-protected (since 5.13) (see
+ :ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`)
+- * Bits 57-60 zero
++ * Bits 58-60 zero
+ * Bit 61 page is file-page or shared-anon (since 3.5)
+ * Bit 62 page swapped
+ * Bit 63 page present
+diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
+index aec2cd2aaea73..19754beb5a4e6 100644
+--- a/Documentation/admin-guide/pm/cpuidle.rst
++++ b/Documentation/admin-guide/pm/cpuidle.rst
+@@ -612,8 +612,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
+ by default this way, for example.
+
+ The other kernel command line parameters controlling CPU idle time management
+-described below are only relevant for the *x86* architecture and some of
+-them affect Intel processors only.
++described below are only relevant for the *x86* architecture and references
++to ``intel_idle`` affect Intel processors only.
+
+ The *x86* architecture support code recognizes three kernel command line
+ options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
+@@ -635,10 +635,13 @@ idle, so it very well may hurt single-thread computations performance as well as
+ energy-efficiency. Thus using it for performance reasons may not be a good idea
+ at all.]
+
+-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
+-``acpi_idle`` to be used (as long as all of the information needed by it is
+-there in the system's ACPI tables), but it is not allowed to use the
+-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
++The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
++the CPU to enter idle states. When this option is used, the ``acpi_idle``
++driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
++running Intel processors, this option disables the ``intel_idle`` driver
++and forces the use of the ``acpi_idle`` driver instead. Note that in either
++case, ``acpi_idle`` driver will function only if all the information needed
++by it is in the system's ACPI tables.
+
+ In addition to the architecture-level kernel command line options affecting CPU
+ idle time management, there are parameters affecting individual ``CPUIdle``
+diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst
+index 82e29837d5898..5a6993795bd26 100644
+--- a/Documentation/admin-guide/security-bugs.rst
++++ b/Documentation/admin-guide/security-bugs.rst
+@@ -63,31 +63,28 @@ information submitted to the security list and any followup discussions
+ of the report are treated confidentially even after the embargo has been
+ lifted, in perpetuity.
+
+-Coordination
+-------------
+-
+-Fixes for sensitive bugs, such as those that might lead to privilege
+-escalations, may need to be coordinated with the private
+-<linux-distros@vs.openwall.org> mailing list so that distribution vendors
+-are well prepared to issue a fixed kernel upon public disclosure of the
+-upstream fix. Distros will need some time to test the proposed patch and
+-will generally request at least a few days of embargo, and vendor update
+-publication prefers to happen Tuesday through Thursday. When appropriate,
+-the security team can assist with this coordination, or the reporter can
+-include linux-distros from the start. In this case, remember to prefix
+-the email Subject line with "[vs]" as described in the linux-distros wiki:
+-<http://oss-security.openwall.org/wiki/mailing-lists/distros#how-to-use-the-lists>
++Coordination with other groups
++------------------------------
++
++The kernel security team strongly recommends that reporters of potential
++security issues NEVER contact the "linux-distros" mailing list until
++AFTER discussing it with the kernel security team. Do not Cc: both
++lists at once. You may contact the linux-distros mailing list after a
++fix has been agreed on and you fully understand the requirements that
++doing so will impose on you and the kernel community.
++
++The different lists have different goals and the linux-distros rules do
++not contribute to actually fixing any potential security problems.
+
+ CVE assignment
+ --------------
+
+-The security team does not normally assign CVEs, nor do we require them
+-for reports or fixes, as this can needlessly complicate the process and
+-may delay the bug handling. If a reporter wishes to have a CVE identifier
+-assigned ahead of public disclosure, they will need to contact the private
+-linux-distros list, described above. When such a CVE identifier is known
+-before a patch is provided, it is desirable to mention it in the commit
+-message if the reporter agrees.
++The security team does not assign CVEs, nor do we require them for
++reports or fixes, as this can needlessly complicate the process and may
++delay the bug handling. If a reporter wishes to have a CVE identifier
++assigned, they should find one by themselves, for example by contacting
++MITRE directly. However under no circumstances will a patch inclusion
++be delayed to wait for a CVE identifier to arrive.
+
+ Non-disclosure agreements
+ -------------------------
+diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
+index 426162009ce99..48b91c485c993 100644
+--- a/Documentation/admin-guide/sysctl/kernel.rst
++++ b/Documentation/admin-guide/sysctl/kernel.rst
+@@ -671,6 +671,15 @@ This is the default behavior.
+ an oops event is detected.
+
+
++oops_limit
++==========
++
++Number of kernel oopses after which the kernel should panic when
++``panic_on_oops`` is not set. Setting this to 0 disables checking
++the count. Setting this to 1 has the same effect as setting
++``panic_on_oops=1``. The default value is 10000.
++
++
+ osrelease, ostype & version
+ ===========================
+
+@@ -795,6 +804,7 @@ bit 1 print system memory info
+ bit 2 print timer info
+ bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
+ bit 4 print ftrace buffer
++bit 5 print all printk messages in buffer
+ ===== ============================================
+
+ So for example to print tasks and memory info on panic, user can::
+@@ -1013,28 +1023,22 @@ This is a directory, with the following entries:
+ * ``boot_id``: a UUID generated the first time this is retrieved, and
+ unvarying after that;
+
++* ``uuid``: a UUID generated every time this is retrieved (this can
++ thus be used to generate UUIDs at will);
++
+ * ``entropy_avail``: the pool's entropy count, in bits;
+
+ * ``poolsize``: the entropy pool size, in bits;
+
+ * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
+- number of seconds between urandom pool reseeding).
+-
+-* ``uuid``: a UUID generated every time this is retrieved (this can
+- thus be used to generate UUIDs at will);
++ number of seconds between urandom pool reseeding). This file is
++ writable for compatibility purposes, but writing to it has no effect
++ on any RNG behavior;
+
+ * ``write_wakeup_threshold``: when the entropy count drops below this
+ (as a number of bits), processes waiting to write to ``/dev/random``
+- are woken up.
+-
+-If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
+-defined, these additional entries are present:
+-
+-* ``add_interrupt_avg_cycles``: the average number of cycles between
+- interrupts used to feed the pool;
+-
+-* ``add_interrupt_avg_deviation``: the standard deviation seen on the
+- number of cycles between interrupts used to feed the pool.
++ are woken up. This file is writable for compatibility purposes, but
++ writing to it has no effect on any RNG behavior.
+
+
+ randomize_va_space
+@@ -1099,7 +1103,7 @@ task_delayacct
+ ===============
+
+ Enables/disables task delay accounting (see
+-:doc:`accounting/delay-accounting.rst`). Enabling this feature incurs
++Documentation/accounting/delay-accounting.rst. Enabling this feature incurs
+ a small amount of overhead in the scheduler but is useful for debugging
+ and performance tuning. It is required by some tools such as iotop.
+
+@@ -1490,6 +1494,16 @@ entry will default to 2 instead of 0.
+ 2 Unprivileged calls to ``bpf()`` are disabled
+ = =============================================================
+
++
++warn_limit
++==========
++
++Number of kernel warnings after which the kernel should panic when
++``panic_on_warn`` is not set. Setting this to 0 disables checking
++the warning count. Setting this to 1 has the same effect as setting
++``panic_on_warn=1``. The default value is 0.
++
++
+ watchdog
+ ========
+
+diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
+index 4150f74c521a8..806ecd5957a4d 100644
+--- a/Documentation/admin-guide/sysctl/net.rst
++++ b/Documentation/admin-guide/sysctl/net.rst
+@@ -34,13 +34,14 @@ Table : Subdirectories in /proc/sys/net
+ ========= =================== = ========== ==================
+ Directory Content Directory Content
+ ========= =================== = ========== ==================
+- core General parameter appletalk Appletalk protocol
+- unix Unix domain sockets netrom NET/ROM
+- 802 E802 protocol ax25 AX25
+- ethernet Ethernet protocol rose X.25 PLP layer
+- ipv4 IP version 4 x25 X.25 protocol
+- bridge Bridging decnet DEC net
+- ipv6 IP version 6 tipc TIPC
++ 802 E802 protocol mptcp Multipath TCP
++ appletalk Appletalk protocol netfilter Network Filter
++ ax25 AX25 netrom NET/ROM
++ bridge Bridging rose X.25 PLP layer
++ core General parameter tipc TIPC
++ ethernet Ethernet protocol unix Unix domain sockets
++ ipv4 IP version 4 x25 X.25 protocol
++ ipv6 IP version 6
+ ========= =================== = ========== ==================
+
+ 1. /proc/sys/net/core - Network core options
+@@ -271,7 +272,7 @@ poll cycle or the number of packets processed reaches netdev_budget.
+ netdev_max_backlog
+ ------------------
+
+-Maximum number of packets, queued on the INPUT side, when the interface
++Maximum number of packets, queued on the INPUT side, when the interface
+ receives packets faster than kernel can process them.
+
+ netdev_rss_key
+diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
+index 5e795202111f2..f4804ce37c58b 100644
+--- a/Documentation/admin-guide/sysctl/vm.rst
++++ b/Documentation/admin-guide/sysctl/vm.rst
+@@ -948,7 +948,7 @@ how much memory needs to be free before kswapd goes back to sleep.
+
+ The unit is in fractions of 10,000. The default value of 10 means the
+ distances between watermarks are 0.1% of the available memory in the
+-node/system. The maximum value is 1000, or 10% of memory.
++node/system. The maximum value is 3000, or 30% of memory.
+
+ A high rate of threads entering direct reclaim (allocstall) or kswapd
+ going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
+diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
+index 328e0c454fbd4..749ae970c3195 100644
+--- a/Documentation/arm64/cpu-feature-registers.rst
++++ b/Documentation/arm64/cpu-feature-registers.rst
+@@ -235,7 +235,15 @@ infrastructure:
+ | DPB | [3-0] | y |
+ +------------------------------+---------+---------+
+
+- 6) ID_AA64MMFR2_EL1 - Memory model feature register 2
++ 6) ID_AA64MMFR0_EL1 - Memory model feature register 0
++
++ +------------------------------+---------+---------+
++ | Name | bits | visible |
++ +------------------------------+---------+---------+
++ | ECV | [63-60] | y |
++ +------------------------------+---------+---------+
++
++ 7) ID_AA64MMFR2_EL1 - Memory model feature register 2
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+@@ -243,7 +251,7 @@ infrastructure:
+ | AT | [35-32] | y |
+ +------------------------------+---------+---------+
+
+- 7) ID_AA64ZFR0_EL1 - SVE feature ID register 0
++ 8) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+@@ -267,6 +275,23 @@ infrastructure:
+ | SVEVer | [3-0] | y |
+ +------------------------------+---------+---------+
+
++ 8) ID_AA64MMFR1_EL1 - Memory model feature register 1
++
++ +------------------------------+---------+---------+
++ | Name | bits | visible |
++ +------------------------------+---------+---------+
++ | AFP | [47-44] | y |
++ +------------------------------+---------+---------+
++
++ 9) ID_AA64ISAR2_EL1 - Instruction set attribute register 2
++
++ +------------------------------+---------+---------+
++ | Name | bits | visible |
++ +------------------------------+---------+---------+
++ | RPRES | [7-4] | y |
++ +------------------------------+---------+---------+
++
++
+ Appendix I: Example
+ -------------------
+
+diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
+index ec1a5a63c1d09..b72ff17d600ae 100644
+--- a/Documentation/arm64/elf_hwcaps.rst
++++ b/Documentation/arm64/elf_hwcaps.rst
+@@ -247,6 +247,18 @@ HWCAP2_MTE
+ Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
+ by Documentation/arm64/memory-tagging-extension.rst.
+
++HWCAP2_ECV
++
++ Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001.
++
++HWCAP2_AFP
++
++ Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001.
++
++HWCAP2_RPRES
++
++ Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
++
+ 4. Unused AT_HWCAP bits
+ -----------------------
+
+diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
+index d410a47ffa57a..83a75e16e54de 100644
+--- a/Documentation/arm64/silicon-errata.rst
++++ b/Documentation/arm64/silicon-errata.rst
+@@ -68,6 +68,8 @@ stable kernels.
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A55 | #1530923 | ARM64_ERRATUM_1530923 |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A55 | #2441007 | ARM64_ERRATUM_2441007 |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A57 | #852523 | N/A |
+@@ -76,10 +78,14 @@ stable kernels.
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A72 | #853709 | N/A |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+@@ -92,14 +98,34 @@ stable kernels.
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Neoverse-N1 | #1349291 | N/A |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM | MMU-500 | #841119,826419 | N/A |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM | MMU-600 | #1076982,1209401| N/A |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM | MMU-700 | #2268618,2812531| N/A |
+++----------------+-----------------+-----------------+-----------------------------+
+ +----------------+-----------------+-----------------+-----------------------------+
+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
+ +----------------+-----------------+-----------------+-----------------------------+
+@@ -163,6 +189,9 @@ stable kernels.
+ +----------------+-----------------+-----------------+-----------------------------+
+ | Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 |
+ +----------------+-----------------+-----------------+-----------------------------+
++| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 |
+++----------------+-----------------+-----------------+-----------------------------+
++
+ +----------------+-----------------+-----------------+-----------------------------+
+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
+ +----------------+-----------------+-----------------+-----------------------------+
+diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt
+index 093cdaefdb373..d8b101c97031b 100644
+--- a/Documentation/atomic_bitops.txt
++++ b/Documentation/atomic_bitops.txt
+@@ -59,7 +59,7 @@ Like with atomic_t, the rule of thumb is:
+ - RMW operations that have a return value are fully ordered.
+
+ - RMW operations that are conditional are unordered on FAILURE,
+- otherwise the above rules apply. In the case of test_and_{}_bit() operations,
++ otherwise the above rules apply. In the case of test_and_set_bit_lock(),
+ if the bit in memory is unchanged by the operation then it is deemed to have
+ failed.
+
+diff --git a/Documentation/conf.py b/Documentation/conf.py
+index 948a97d6387dd..76b31798f94ff 100644
+--- a/Documentation/conf.py
++++ b/Documentation/conf.py
+@@ -161,7 +161,7 @@ finally:
+ #
+ # This is also used if you do content translation via gettext catalogs.
+ # Usually you set "language" from the command line for these cases.
+-language = None
++language = 'en'
+
+ # There are two options for replacing |today|: either, you set today to some
+ # non-false value, then it is used:
+diff --git a/Documentation/dev-tools/gdb-kernel-debugging.rst b/Documentation/dev-tools/gdb-kernel-debugging.rst
+index 8e0f1fe8d17ad..895285c037c72 100644
+--- a/Documentation/dev-tools/gdb-kernel-debugging.rst
++++ b/Documentation/dev-tools/gdb-kernel-debugging.rst
+@@ -39,6 +39,10 @@ Setup
+ this mode. In this case, you should build the kernel with
+ CONFIG_RANDOMIZE_BASE disabled if the architecture supports KASLR.
+
++- Build the gdb scripts (required on kernels v5.1 and above)::
++
++ make scripts_gdb
++
+ - Enable the gdb stub of QEMU/KVM, either
+
+ - at VM startup time by appending "-s" to the QEMU command line
+diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst
+index 0fbe3308bf37f..48244d32780f6 100644
+--- a/Documentation/dev-tools/kfence.rst
++++ b/Documentation/dev-tools/kfence.rst
+@@ -231,10 +231,14 @@ Guarded allocations are set up based on the sample interval. After expiration
+ of the sample interval, the next allocation through the main allocator (SLAB or
+ SLUB) returns a guarded allocation from the KFENCE object pool (allocation
+ sizes up to PAGE_SIZE are supported). At this point, the timer is reset, and
+-the next allocation is set up after the expiration of the interval. To "gate" a
+-KFENCE allocation through the main allocator's fast-path without overhead,
+-KFENCE relies on static branches via the static keys infrastructure. The static
+-branch is toggled to redirect the allocation to KFENCE.
++the next allocation is set up after the expiration of the interval.
++
++When using ``CONFIG_KFENCE_STATIC_KEYS=y``, KFENCE allocations are "gated"
++through the main allocator's fast-path by relying on static branches via the
++static keys infrastructure. The static branch is toggled to redirect the
++allocation to KFENCE. Depending on sample interval, target workloads, and
++system architecture, this may perform better than the simple dynamic branch.
++Careful benchmarking is recommended.
+
+ KFENCE objects each reside on a dedicated page, at either the left or right
+ page boundaries selected at random. The pages to the left and right of the
+diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
+index e77635c5422c6..fa8b31660cadd 100644
+--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
++++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
+@@ -119,6 +119,9 @@ Boards (incomplete list of examples):
+ - OMAP3 BeagleBoard : Low cost community board
+ compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3"
+
++- OMAP3 BeagleBoard A to B4 : Early BeagleBoard revisions A to B4 with a timer quirk
++ compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3"
++
+ - OMAP3 Tobi with Overo : Commercial expansion board with daughter board
+ compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3430", "ti,omap3"
+
+diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
+index 880ddafc634e6..a702a18d845e9 100644
+--- a/Documentation/devicetree/bindings/arm/qcom.yaml
++++ b/Documentation/devicetree/bindings/arm/qcom.yaml
+@@ -135,28 +135,34 @@ properties:
+ - const: qcom,msm8974
+
+ - items:
+- - enum:
+- - alcatel,idol347
+- - const: qcom,msm8916-mtp/1
+ - const: qcom,msm8916-mtp
++ - const: qcom,msm8916-mtp/1
+ - const: qcom,msm8916
+
+ - items:
+ - enum:
+- - longcheer,l8150
++ - alcatel,idol347
+ - samsung,a3u-eur
+ - samsung,a5u-eur
+ - const: qcom,msm8916
+
++ - items:
++ - const: longcheer,l8150
++ - const: qcom,msm8916-v1-qrd/9-v1
++ - const: qcom,msm8916
++
+ - items:
+ - enum:
+ - sony,karin_windy
++ - const: qcom,apq8094
++
++ - items:
++ - enum:
+ - sony,karin-row
+ - sony,satsuki-row
+ - sony,sumire-row
+ - sony,suzuran-row
+- - qcom,msm8994
+- - const: qcom,apq8094
++ - const: qcom,msm8994
+
+ - items:
+ - const: qcom,msm8996-mtp
+diff --git a/Documentation/devicetree/bindings/ata/ahci-ceva.txt b/Documentation/devicetree/bindings/ata/ahci-ceva.txt
+deleted file mode 100644
+index bfb6da0281ecd..0000000000000
+--- a/Documentation/devicetree/bindings/ata/ahci-ceva.txt
++++ /dev/null
+@@ -1,63 +0,0 @@
+-Binding for CEVA AHCI SATA Controller
+-
+-Required properties:
+- - reg: Physical base address and size of the controller's register area.
+- - compatible: Compatibility string. Must be 'ceva,ahci-1v84'.
+- - clocks: Input clock specifier. Refer to common clock bindings.
+- - interrupts: Interrupt specifier. Refer to interrupt binding.
+- - ceva,p0-cominit-params: OOB timing value for COMINIT parameter for port 0.
+- - ceva,p1-cominit-params: OOB timing value for COMINIT parameter for port 1.
+- The fields for the above parameter must be as shown below:
+- ceva,pN-cominit-params = /bits/ 8 <CIBGMN CIBGMX CIBGN CINMP>;
+- CINMP : COMINIT Negate Minimum Period.
+- CIBGN : COMINIT Burst Gap Nominal.
+- CIBGMX: COMINIT Burst Gap Maximum.
+- CIBGMN: COMINIT Burst Gap Minimum.
+- - ceva,p0-comwake-params: OOB timing value for COMWAKE parameter for port 0.
+- - ceva,p1-comwake-params: OOB timing value for COMWAKE parameter for port 1.
+- The fields for the above parameter must be as shown below:
+- ceva,pN-comwake-params = /bits/ 8 <CWBGMN CWBGMX CWBGN CWNMP>;
+- CWBGMN: COMWAKE Burst Gap Minimum.
+- CWBGMX: COMWAKE Burst Gap Maximum.
+- CWBGN: COMWAKE Burst Gap Nominal.
+- CWNMP: COMWAKE Negate Minimum Period.
+- - ceva,p0-burst-params: Burst timing value for COM parameter for port 0.
+- - ceva,p1-burst-params: Burst timing value for COM parameter for port 1.
+- The fields for the above parameter must be as shown below:
+- ceva,pN-burst-params = /bits/ 8 <BMX BNM SFD PTST>;
+- BMX: COM Burst Maximum.
+- BNM: COM Burst Nominal.
+- SFD: Signal Failure Detection value.
+- PTST: Partial to Slumber timer value.
+- - ceva,p0-retry-params: Retry interval timing value for port 0.
+- - ceva,p1-retry-params: Retry interval timing value for port 1.
+- The fields for the above parameter must be as shown below:
+- ceva,pN-retry-params = /bits/ 16 <RIT RCT>;
+- RIT: Retry Interval Timer.
+- RCT: Rate Change Timer.
+-
+-Optional properties:
+- - ceva,broken-gen2: limit to gen1 speed instead of gen2.
+- - phys: phandle for the PHY device
+- - resets: phandle to the reset controller for the SATA IP
+-
+-Examples:
+- ahci@fd0c0000 {
+- compatible = "ceva,ahci-1v84";
+- reg = <0xfd0c0000 0x200>;
+- interrupt-parent = <&gic>;
+- interrupts = <0 133 4>;
+- clocks = <&clkc SATA_CLK_ID>;
+- ceva,p0-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
+- ceva,p0-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
+- ceva,p0-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
+- ceva,p0-retry-params = /bits/ 16 <0x0216 0x7F06>;
+-
+- ceva,p1-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
+- ceva,p1-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
+- ceva,p1-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
+- ceva,p1-retry-params = /bits/ 16 <0x0216 0x7F06>;
+- ceva,broken-gen2;
+- phys = <&psgtr 1 PHY_TYPE_SATA 1 1>;
+- resets = <&zynqmp_reset ZYNQMP_RESET_SATA>;
+- };
+diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
+new file mode 100644
+index 0000000000000..71364c6081ff5
+--- /dev/null
++++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
+@@ -0,0 +1,189 @@
++# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
++%YAML 1.2
++---
++$id: http://devicetree.org/schemas/ata/ceva,ahci-1v84.yaml#
++$schema: http://devicetree.org/meta-schemas/core.yaml#
++
++title: Ceva AHCI SATA Controller
++
++maintainers:
++ - Piyush Mehta <piyush.mehta@xilinx.com>
++
++description: |
++ The Ceva SATA controller mostly conforms to the AHCI interface with some
++ special extensions to add functionality, is a high-performance dual-port
++ SATA host controller with an AHCI compliant command layer which supports
++ advanced features such as native command queuing and frame information
++ structure (FIS) based switching for systems employing port multipliers.
++
++properties:
++ compatible:
++ const: ceva,ahci-1v84
++
++ reg:
++ maxItems: 1
++
++ clocks:
++ maxItems: 1
++
++ dma-coherent: true
++
++ interrupts:
++ maxItems: 1
++
++ iommus:
++ maxItems: 4
++
++ power-domains:
++ maxItems: 1
++
++ ceva,p0-cominit-params:
++ $ref: /schemas/types.yaml#/definitions/uint8-array
++ description: |
++ OOB timing value for COMINIT parameter for port 0.
++ The fields for the above parameter must be as shown below:-
++ ceva,p0-cominit-params = /bits/ 8 <CIBGMN CIBGMX CIBGN CINMP>;
++ items:
++ - description: CINMP - COMINIT Negate Minimum Period.
++ - description: CIBGN - COMINIT Burst Gap Nominal.
++ - description: CIBGMX - COMINIT Burst Gap Maximum.
++ - description: CIBGMN - COMINIT Burst Gap Minimum.
++
++ ceva,p0-comwake-params:
++ $ref: /schemas/types.yaml#/definitions/uint8-array
++ description: |
++ OOB timing value for COMWAKE parameter for port 0.
++ The fields for the above parameter must be as shown below:-
++ ceva,p0-comwake-params = /bits/ 8 <CWBGMN CWBGMX CWBGN CWNMP>;
++ items:
++ - description: CWBGMN - COMWAKE Burst Gap Minimum.
++ - description: CWBGMX - COMWAKE Burst Gap Maximum.
++ - description: CWBGN - COMWAKE Burst Gap Nominal.
++ - description: CWNMP - COMWAKE Negate Minimum Period.
++
++ ceva,p0-burst-params:
++ $ref: /schemas/types.yaml#/definitions/uint8-array
++ description: |
++ Burst timing value for COM parameter for port 0.
++ The fields for the above parameter must be as shown below:-
++ ceva,p0-burst-params = /bits/ 8 <BMX BNM SFD PTST>;
++ items:
++ - description: BMX - COM Burst Maximum.
++ - description: BNM - COM Burst Nominal.
++ - description: SFD - Signal Failure Detection value.
++ - description: PTST - Partial to Slumber timer value.
++
++ ceva,p0-retry-params:
++ $ref: /schemas/types.yaml#/definitions/uint16-array
++ description: |
++ Retry interval timing value for port 0.
++ The fields for the above parameter must be as shown below:-
++ ceva,p0-retry-params = /bits/ 16 <RIT RCT>;
++ items:
++ - description: RIT - Retry Interval Timer.
++ - description: RCT - Rate Change Timer.
++
++ ceva,p1-cominit-params:
++ $ref: /schemas/types.yaml#/definitions/uint8-array
++ description: |
++ OOB timing value for COMINIT parameter for port 1.
++ The fields for the above parameter must be as shown below:-
++ ceva,p1-cominit-params = /bits/ 8 <CIBGMN CIBGMX CIBGN CINMP>;
++ items:
++ - description: CINMP - COMINIT Negate Minimum Period.
++ - description: CIBGN - COMINIT Burst Gap Nominal.
++ - description: CIBGMX - COMINIT Burst Gap Maximum.
++ - description: CIBGMN - COMINIT Burst Gap Minimum.
++
++ ceva,p1-comwake-params:
++ $ref: /schemas/types.yaml#/definitions/uint8-array
++ description: |
++ OOB timing value for COMWAKE parameter for port 1.
++ The fields for the above parameter must be as shown below:-
++ ceva,p1-comwake-params = /bits/ 8 <CWBGMN CWBGMX CWBGN CWNMP>;
++ items:
++ - description: CWBGMN - COMWAKE Burst Gap Minimum.
++ - description: CWBGMX - COMWAKE Burst Gap Maximum.
++ - description: CWBGN - COMWAKE Burst Gap Nominal.
++ - description: CWNMP - COMWAKE Negate Minimum Period.
++
++ ceva,p1-burst-params:
++ $ref: /schemas/types.yaml#/definitions/uint8-array
++ description: |
++ Burst timing value for COM parameter for port 1.
++ The fields for the above parameter must be as shown below:-
++ ceva,p1-burst-params = /bits/ 8 <BMX BNM SFD PTST>;
++ items:
++ - description: BMX - COM Burst Maximum.
++ - description: BNM - COM Burst Nominal.
++ - description: SFD - Signal Failure Detection value.
++ - description: PTST - Partial to Slumber timer value.
++
++ ceva,p1-retry-params:
++ $ref: /schemas/types.yaml#/definitions/uint16-array
++ description: |
++ Retry interval timing value for port 1.
++ The fields for the above parameter must be as shown below:-
++ ceva,pN-retry-params = /bits/ 16 <RIT RCT>;
++ items:
++ - description: RIT - Retry Interval Timer.
++ - description: RCT - Rate Change Timer.
++
++ ceva,broken-gen2:
++ $ref: /schemas/types.yaml#/definitions/flag
++ description: |
++ limit to gen1 speed instead of gen2.
++
++ phys:
++ maxItems: 1
++
++ phy-names:
++ items:
++ - const: sata-phy
++
++ resets:
++ maxItems: 1
++
++required:
++ - compatible
++ - reg
++ - clocks
++ - interrupts
++ - ceva,p0-cominit-params
++ - ceva,p0-comwake-params
++ - ceva,p0-burst-params
++ - ceva,p0-retry-params
++ - ceva,p1-cominit-params
++ - ceva,p1-comwake-params
++ - ceva,p1-burst-params
++ - ceva,p1-retry-params
++
++additionalProperties: false
++
++examples:
++ - |
++ #include <dt-bindings/clock/xlnx-zynqmp-clk.h>
++ #include <dt-bindings/interrupt-controller/irq.h>
++ #include <dt-bindings/power/xlnx-zynqmp-power.h>
++ #include <dt-bindings/reset/xlnx-zynqmp-resets.h>
++ #include <dt-bindings/clock/xlnx-zynqmp-clk.h>
++ #include <dt-bindings/phy/phy.h>
++
++ sata: ahci@fd0c0000 {
++ compatible = "ceva,ahci-1v84";
++ reg = <0xfd0c0000 0x200>;
++ interrupt-parent = <&gic>;
++ interrupts = <0 133 IRQ_TYPE_LEVEL_HIGH>;
++ clocks = <&zynqmp_clk SATA_REF>;
++ ceva,p0-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
++ ceva,p0-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
++ ceva,p0-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
++ ceva,p0-retry-params = /bits/ 16 <0x0216 0x7F06>;
++ ceva,p1-cominit-params = /bits/ 8 <0x0F 0x25 0x18 0x29>;
++ ceva,p1-comwake-params = /bits/ 8 <0x04 0x0B 0x08 0x0F>;
++ ceva,p1-burst-params = /bits/ 8 <0x0A 0x08 0x4A 0x06>;
++ ceva,p1-retry-params = /bits/ 16 <0x0216 0x7F06>;
++ ceva,broken-gen2;
++ phys = <&psgtr 1 PHY_TYPE_SATA 1 1>;
++ resets = <&zynqmp_reset ZYNQMP_RESET_SATA>;
++ };
+diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml
+index 5a5b2214f0cae..005e0edd4609a 100644
+--- a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml
++++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml
+@@ -22,16 +22,32 @@ properties:
+ const: qcom,gcc-msm8996
+
+ clocks:
++ minItems: 3
+ items:
+ - description: XO source
+ - description: Second XO source
+ - description: Sleep clock source
++ - description: PCIe 0 PIPE clock (optional)
++ - description: PCIe 1 PIPE clock (optional)
++ - description: PCIe 2 PIPE clock (optional)
++ - description: USB3 PIPE clock (optional)
++ - description: UFS RX symbol 0 clock (optional)
++ - description: UFS RX symbol 1 clock (optional)
++ - description: UFS TX symbol 0 clock (optional)
+
+ clock-names:
++ minItems: 3
+ items:
+ - const: cxo
+ - const: cxo2
+ - const: sleep_clk
++ - const: pcie_0_pipe_clk_src
++ - const: pcie_1_pipe_clk_src
++ - const: pcie_2_pipe_clk_src
++ - const: usb3_phy_pipe_clk_src
++ - const: ufs_rx_symbol_0_clk_src
++ - const: ufs_rx_symbol_1_clk_src
++ - const: ufs_tx_symbol_0_clk_src
+
+ '#clock-cells':
+ const: 1
+diff --git a/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml b/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
+index 229af98b1d305..7cd88bc3a67d7 100644
+--- a/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
++++ b/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
+@@ -16,8 +16,6 @@ description: |
+ reads required input clock frequencies from the devicetree and acts as clock
+ provider for all clock consumers of PS clocks.
+
+-select: false
+-
+ properties:
+ compatible:
+ const: xlnx,versal-clk
+diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
+index cf5a208f2f105..343598c9f473b 100644
+--- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
++++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml
+@@ -10,6 +10,9 @@ title: Amlogic specific extensions to the Synopsys Designware HDMI Controller
+ maintainers:
+ - Neil Armstrong <narmstrong@baylibre.com>
+
++allOf:
++ - $ref: /schemas/sound/name-prefix.yaml#
++
+ description: |
+ The Amlogic Meson Synopsys Designware Integration is composed of
+ - A Synopsys DesignWare HDMI Controller IP
+@@ -99,6 +102,8 @@ properties:
+ "#sound-dai-cells":
+ const: 0
+
++ sound-name-prefix: true
++
+ required:
+ - compatible
+ - reg
+diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
+index 851cb07812173..047fd69e03770 100644
+--- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
++++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml
+@@ -78,6 +78,10 @@ properties:
+ interrupts:
+ maxItems: 1
+
++ amlogic,canvas:
++ description: should point to a canvas provider node
++ $ref: /schemas/types.yaml#/definitions/phandle
++
+ power-domains:
+ maxItems: 1
+ description: phandle to the associated power domain
+@@ -106,6 +110,7 @@ required:
+ - port@1
+ - "#address-cells"
+ - "#size-cells"
++ - amlogic,canvas
+
+ additionalProperties: false
+
+@@ -118,6 +123,7 @@ examples:
+ interrupts = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
++ amlogic,canvas = <&canvas>;
+
+ /* CVBS VDAC output port */
+ port@0 {
+diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
+index 35426fde86106..4b2cd556483c0 100644
+--- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
++++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
+@@ -31,7 +31,7 @@ properties:
+ - description: Display byte clock
+ - description: Display byte interface clock
+ - description: Display pixel clock
+- - description: Display escape clock
++ - description: Display core clock
+ - description: Display AHB clock
+ - description: Display AXI clock
+
+@@ -64,6 +64,18 @@ properties:
+ Indicates if the DSI controller is driving a panel which needs
+ 2 DSI links.
+
++ qcom,master-dsi:
++ type: boolean
++ description: |
++ Indicates if the DSI controller is the master DSI controller when
++ qcom,dual-dsi-mode enabled.
++
++ qcom,sync-dual-dsi:
++ type: boolean
++ description: |
++ Indicates if the DSI controller needs to sync the other DSI controller
++ with MIPI DCS commands when qcom,dual-dsi-mode enabled.
++
+ assigned-clocks:
+ minItems: 2
+ maxItems: 2
+@@ -135,8 +147,6 @@ required:
+ - phy-names
+ - assigned-clocks
+ - assigned-clock-parents
+- - power-domains
+- - operating-points-v2
+ - ports
+
+ additionalProperties: false
+diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
+index 4399715953e1a..4dd5eed50506a 100644
+--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
++++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml
+@@ -39,7 +39,6 @@ required:
+ - compatible
+ - reg
+ - reg-names
+- - vdds-supply
+
+ unevaluatedProperties: false
+
+diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
+index 064df50e21a5c..23355ac67d3d1 100644
+--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
++++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml
+@@ -37,7 +37,6 @@ required:
+ - compatible
+ - reg
+ - reg-names
+- - vcca-supply
+
+ unevaluatedProperties: false
+
+diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
+index 69eecaa64b187..ddb0ac4c29d44 100644
+--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
++++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml
+@@ -34,6 +34,10 @@ properties:
+ vddio-supply:
+ description: Phandle to vdd-io regulator device node.
+
++ qcom,dsi-phy-regulator-ldo-mode:
++ type: boolean
++ description: Indicates if the LDO mode PHY regulator is wanted.
++
+ required:
+ - compatible
+ - reg
+diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml
+index 0cebaaefda032..419c3b2ac5a6f 100644
+--- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml
++++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml
+@@ -72,6 +72,7 @@ examples:
+ dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>;
+ rotation = <270>;
++ backlight = <&backlight>;
+ };
+ };
+
+diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
+index b6e1ebfaf3666..bb3cbc30d9121 100644
+--- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
++++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
+@@ -64,7 +64,7 @@ if:
+ then:
+ properties:
+ clocks:
+- maxItems: 2
++ minItems: 2
+
+ required:
+ - clock-names
+diff --git a/Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt b/Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt
+index 8a9f3559335b5..7e14e26676ec9 100644
+--- a/Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt
++++ b/Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt
+@@ -34,8 +34,8 @@ Example:
+ Use specific request line passing from dma
+ For example, MMC request line is 5
+
+- sdhci: sdhci@98e00000 {
+- compatible = "moxa,moxart-sdhci";
++ mmc: mmc@98e00000 {
++ compatible = "moxa,moxart-mmc";
+ reg = <0x98e00000 0x5C>;
+ interrupts = <5 0>;
+ clocks = <&clk_apb>;
+diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt
+index 146e554b3c676..2a80e272cd666 100644
+--- a/Documentation/devicetree/bindings/gpio/gpio-altera.txt
++++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt
+@@ -9,8 +9,9 @@ Required properties:
+ - The second cell is reserved and is currently unused.
+ - gpio-controller : Marks the device node as a GPIO controller.
+ - interrupt-controller: Mark the device node as an interrupt controller
+-- #interrupt-cells : Should be 1. The interrupt type is fixed in the hardware.
++- #interrupt-cells : Should be 2. The interrupt type is fixed in the hardware.
+ - The first cell is the GPIO offset number within the GPIO controller.
++ - The second cell is the interrupt trigger type and level flags.
+ - interrupts: Specify the interrupt.
+ - altr,interrupt-type: Specifies the interrupt trigger type the GPIO
+ hardware is synthesized. This field is required if the Altera GPIO controller
+@@ -38,6 +39,6 @@ gpio_altr: gpio@ff200000 {
+ altr,interrupt-type = <IRQ_TYPE_EDGE_RISING>;
+ #gpio-cells = <2>;
+ gpio-controller;
+- #interrupt-cells = <1>;
++ #interrupt-cells = <2>;
+ interrupt-controller;
+ };
+diff --git a/Documentation/devicetree/bindings/gpio/gpio-zynq.yaml b/Documentation/devicetree/bindings/gpio/gpio-zynq.yaml
+index 378da2649e668..980f92ad9eba2 100644
+--- a/Documentation/devicetree/bindings/gpio/gpio-zynq.yaml
++++ b/Documentation/devicetree/bindings/gpio/gpio-zynq.yaml
+@@ -11,7 +11,11 @@ maintainers:
+
+ properties:
+ compatible:
+- const: xlnx,zynq-gpio-1.0
++ enum:
++ - xlnx,zynq-gpio-1.0
++ - xlnx,zynqmp-gpio-1.0
++ - xlnx,versal-gpio-1.0
++ - xlnx,pmc-gpio-1.0
+
+ reg:
+ maxItems: 1
+diff --git a/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml b/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
+new file mode 100644
+index 0000000000000..ba54d6998f2ee
+--- /dev/null
++++ b/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
+@@ -0,0 +1,88 @@
++# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
++%YAML 1.2
++---
++$id: http://devicetree.org/schemas/iio/accel/adi,adxl355.yaml#
++$schema: http://devicetree.org/meta-schemas/core.yaml#
++
++title: Analog Devices ADXL355 3-Axis, Low noise MEMS Accelerometer
++
++maintainers:
++ - Puranjay Mohan <puranjay12@gmail.com>
++
++description: |
++ Analog Devices ADXL355 3-Axis, Low noise MEMS Accelerometer that supports
++ both I2C & SPI interfaces
++ https://www.analog.com/en/products/adxl355.html
++
++properties:
++ compatible:
++ enum:
++ - adi,adxl355
++
++ reg:
++ maxItems: 1
++
++ interrupts:
++ minItems: 1
++ maxItems: 3
++ description: |
++ Type for DRDY should be IRQ_TYPE_EDGE_RISING.
++ Three configurable interrupt lines exist.
++
++ interrupt-names:
++ description: Specify which interrupt line is in use.
++ items:
++ enum:
++ - INT1
++ - INT2
++ - DRDY
++ minItems: 1
++ maxItems: 3
++
++ vdd-supply:
++ description: Regulator that provides power to the sensor
++
++ vddio-supply:
++ description: Regulator that provides power to the bus
++
++ spi-max-frequency: true
++
++required:
++ - compatible
++ - reg
++
++additionalProperties: false
++
++examples:
++ - |
++ #include <dt-bindings/gpio/gpio.h>
++ #include <dt-bindings/interrupt-controller/irq.h>
++ i2c {
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ /* Example for a I2C device node */
++ accelerometer@1d {
++ compatible = "adi,adxl355";
++ reg = <0x1d>;
++ interrupt-parent = <&gpio>;
++ interrupts = <25 IRQ_TYPE_EDGE_RISING>;
++ interrupt-names = "DRDY";
++ };
++ };
++ - |
++ #include <dt-bindings/gpio/gpio.h>
++ #include <dt-bindings/interrupt-controller/irq.h>
++ spi {
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ accelerometer@0 {
++ compatible = "adi,adxl355";
++ reg = <0>;
++ spi-max-frequency = <1000000>;
++ interrupt-parent = <&gpio>;
++ interrupts = <25 IRQ_TYPE_EDGE_RISING>;
++ interrupt-names = "DRDY";
++ };
++ };
+diff --git a/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml b/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml
+index c115e2e99bd9a..4a7b1385fdc7e 100644
+--- a/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml
++++ b/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml
+@@ -86,7 +86,7 @@ patternProperties:
+ of the MAX chips to the GyroADC, while MISO line of each Maxim
+ ADC connects to a shared input pin of the GyroADC.
+ enum:
+- - adi,7476
++ - adi,ad7476
+ - fujitsu,mb88101a
+ - maxim,max1162
+ - maxim,max11100
+diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml
+index d5c54813ce872..a8f7720d1e3e2 100644
+--- a/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml
++++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml
+@@ -54,7 +54,7 @@ examples:
+
+ ad5766@0 {
+ compatible = "adi,ad5766";
+- output-range-microvolts = <(-5000) 5000>;
++ output-range-microvolts = <(-5000000) 5000000>;
+ reg = <0>;
+ spi-cpol;
+ spi-max-frequency = <1000000>;
+diff --git a/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml b/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml
+index b6bbc312a7cf7..1414ba9977c16 100644
+--- a/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml
++++ b/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml
+@@ -24,8 +24,10 @@ properties:
+
+ interrupts:
+ minItems: 1
++ maxItems: 2
+ description:
+ Should be configured with type IRQ_TYPE_EDGE_RISING.
++ If two interrupts are provided, expected order is INT1 and INT2.
+
+ required:
+ - compatible
+diff --git a/Documentation/devicetree/bindings/input/hid-over-i2c.txt b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
+index c76bafaf98d2f..34c43d3bddfd1 100644
+--- a/Documentation/devicetree/bindings/input/hid-over-i2c.txt
++++ b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
+@@ -32,6 +32,8 @@ device-specific compatible properties, which should be used in addition to the
+ - vdd-supply: phandle of the regulator that provides the supply voltage.
+ - post-power-on-delay-ms: time required by the device after enabling its regulators
+ or powering it on, before it is ready for communication.
++- touchscreen-inverted-x: See touchscreen.txt
++- touchscreen-inverted-y: See touchscreen.txt
+
+ Example:
+
+diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
+index 877183cf42787..1ef849dc74d7e 100644
+--- a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
++++ b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
+@@ -79,6 +79,8 @@ properties:
+
+ properties:
+ data-lanes:
++ description:
++ Note that 'fsl,imx7-mipi-csi2' only supports up to 2 data lines.
+ items:
+ minItems: 1
+ maxItems: 4
+@@ -91,18 +93,6 @@ properties:
+ required:
+ - data-lanes
+
+- allOf:
+- - if:
+- properties:
+- compatible:
+- contains:
+- const: fsl,imx7-mipi-csi2
+- then:
+- properties:
+- data-lanes:
+- items:
+- maxItems: 2
+-
+ port@1:
+ $ref: /schemas/graph.yaml#/properties/port
+ description:
+diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml
+index 2353f6cf3c805..750720e0123a0 100644
+--- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml
++++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml
+@@ -106,7 +106,6 @@ allOf:
+ - mediatek,mt2701-smi-larb
+ - mediatek,mt2712-smi-larb
+ - mediatek,mt6779-smi-larb
+- - mediatek,mt8167-smi-larb
+ - mediatek,mt8192-smi-larb
+
+ then:
+diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
+index bd217e6f5018a..5cd144a9ec992 100644
+--- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml
++++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
+@@ -55,7 +55,7 @@ patternProperties:
+ properties:
+ reg:
+ description:
+- Contains the native Ready/Busy IDs.
++ Contains the chip-select IDs.
+
+ nand-ecc-engine:
+ allOf:
+@@ -184,7 +184,7 @@ examples:
+ nand-use-soft-ecc-engine;
+ nand-ecc-algo = "bch";
+
+- /* controller specific properties */
++ /* NAND chip specific properties */
+ };
+
+ nand@1 {
+diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
+index fbdc2083bec4f..20ee96584aba2 100644
+--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
++++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
+@@ -23,6 +23,7 @@ properties:
+ - brcm,bcm4345c5
+ - brcm,bcm43540-bt
+ - brcm,bcm4335a0
++ - brcm,bcm4349-bt
+
+ shutdown-gpios:
+ maxItems: 1
+diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+index 0968b40aef1e8..e3501bfa22e90 100644
+--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
++++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+@@ -31,7 +31,7 @@ tcan4x5x: tcan4x5x@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ spi-max-frequency = <10000000>;
+- bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
++ bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
+ device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
+diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+index 2766fe45bb98b..ee42328a109dc 100644
+--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
++++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+@@ -91,6 +91,14 @@ properties:
+ compensate for the board being designed with the lanes
+ swapped.
+
++ enet-phy-lane-no-swap:
++ $ref: /schemas/types.yaml#/definitions/flag
++ description:
++ If set, indicates that PHY will disable swap of the
++ TX/RX lanes. This property allows the PHY to work correcly after
++ e.g. wrong bootstrap configuration caused by issues in PCB
++ layout design.
++
+ eee-broken-100tx:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+index b8a0b392b24ea..c52ec1ee7df6e 100644
+--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
++++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+@@ -106,6 +106,10 @@ properties:
+ - const: imem
+ - const: config
+
++ qcom,qmp:
++ $ref: /schemas/types.yaml#/definitions/phandle
++ description: phandle to the AOSS side-channel message RAM
++
+ qcom,smem-states:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description: State bits used in by the AP to signal the modem.
+@@ -221,6 +225,8 @@ examples:
+ "imem",
+ "config";
+
++ qcom,qmp = <&aoss_qmp>;
++
+ qcom,smem-states = <&ipa_smp2p_out 0>,
+ <&ipa_smp2p_out 1>;
+ qcom,smem-state-names = "ipa-clock-enabled-valid",
+diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+index c115c95ee584e..5b8db76b6cdd7 100644
+--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
++++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+@@ -53,20 +53,18 @@ properties:
+ - allwinner,sun8i-r40-emac
+ - allwinner,sun8i-v3s-emac
+ - allwinner,sun50i-a64-emac
+- - loongson,ls2k-dwmac
+- - loongson,ls7a-dwmac
+ - amlogic,meson6-dwmac
+ - amlogic,meson8b-dwmac
+ - amlogic,meson8m2-dwmac
+ - amlogic,meson-gxbb-dwmac
+ - amlogic,meson-axg-dwmac
+- - loongson,ls2k-dwmac
+- - loongson,ls7a-dwmac
+ - ingenic,jz4775-mac
+ - ingenic,x1000-mac
+ - ingenic,x1600-mac
+ - ingenic,x1830-mac
+ - ingenic,x2000-mac
++ - loongson,ls2k-dwmac
++ - loongson,ls7a-dwmac
+ - rockchip,px30-gmac
+ - rockchip,rk3128-gmac
+ - rockchip,rk3228-gmac
+diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+index acea1cd444fd5..9b0548264a397 100644
+--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
++++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+@@ -14,9 +14,6 @@ description: |+
+ This PCIe host controller is based on the Synopsys DesignWare PCIe IP
+ and thus inherits all the common properties defined in snps,dw-pcie.yaml.
+
+-allOf:
+- - $ref: /schemas/pci/snps,dw-pcie.yaml#
+-
+ properties:
+ compatible:
+ enum:
+@@ -59,7 +56,7 @@ properties:
+ - const: pcie
+ - const: pcie_bus
+ - const: pcie_phy
+- - const: pcie_inbound_axi for imx6sx-pcie, pcie_aux for imx8mq-pcie
++ - enum: [ pcie_inbound_axi, pcie_aux ]
+
+ num-lanes:
+ const: 1
+@@ -166,6 +163,47 @@ required:
+ - clocks
+ - clock-names
+
++allOf:
++ - $ref: /schemas/pci/snps,dw-pcie.yaml#
++ - if:
++ properties:
++ compatible:
++ contains:
++ const: fsl,imx6sx-pcie
++ then:
++ properties:
++ clock-names:
++ items:
++ - {}
++ - {}
++ - {}
++ - const: pcie_inbound_axi
++ - if:
++ properties:
++ compatible:
++ contains:
++ const: fsl,imx8mq-pcie
++ then:
++ properties:
++ clock-names:
++ items:
++ - {}
++ - {}
++ - {}
++ - const: pcie_aux
++ - if:
++ properties:
++ compatible:
++ not:
++ contains:
++ enum:
++ - fsl,imx6sx-pcie
++ - fsl,imx8mq-pcie
++ then:
++ properties:
++ clock-names:
++ maxItems: 3
++
+ unevaluatedProperties: false
+
+ examples:
+diff --git a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
+index 30b6396d83c83..aea0e2bcdd778 100644
+--- a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
++++ b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
+@@ -36,7 +36,7 @@ properties:
+ - const: mpu
+
+ interrupts:
+- maxItems: 1
++ maxItems: 2
+
+ clocks:
+ items:
+@@ -94,8 +94,9 @@ examples:
+ #interrupt-cells = <1>;
+ ranges = <0x81000000 0 0x40000000 0 0x40000000 0 0x00010000>,
+ <0x82000000 0 0x50000000 0 0x50000000 0 0x20000000>;
+- interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-names = "intr";
++ interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-names = "msi", "intr";
+ interrupt-map-mask = <0 0 0 7>;
+ interrupt-map =
+ <0 0 0 1 &gic GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH
+diff --git a/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml
+new file mode 100644
+index 0000000000000..ff86c87309a41
+--- /dev/null
++++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml
+@@ -0,0 +1,78 @@
++# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
++# Copyright 2019 BayLibre, SAS
++%YAML 1.2
++---
++$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb2-phy.yaml#"
++$schema: "http://devicetree.org/meta-schemas/core.yaml#"
++
++title: Amlogic G12A USB2 PHY
++
++maintainers:
++ - Neil Armstrong <narmstrong@baylibre.com>
++
++properties:
++ compatible:
++ enum:
++ - amlogic,g12a-usb2-phy
++ - amlogic,a1-usb2-phy
++
++ reg:
++ maxItems: 1
++
++ clocks:
++ maxItems: 1
++
++ clock-names:
++ items:
++ - const: xtal
++
++ resets:
++ maxItems: 1
++
++ reset-names:
++ items:
++ - const: phy
++
++ "#phy-cells":
++ const: 0
++
++ phy-supply:
++ description:
++ Phandle to a regulator that provides power to the PHY. This
++ regulator will be managed during the PHY power on/off sequence.
++
++required:
++ - compatible
++ - reg
++ - clocks
++ - clock-names
++ - resets
++ - reset-names
++ - "#phy-cells"
++
++if:
++ properties:
++ compatible:
++ enum:
++ - amlogic,meson-a1-usb-ctrl
++
++then:
++ properties:
++ power-domains:
++ maxItems: 1
++ required:
++ - power-domains
++
++additionalProperties: false
++
++examples:
++ - |
++ phy@36000 {
++ compatible = "amlogic,g12a-usb2-phy";
++ reg = <0x36000 0x2000>;
++ clocks = <&xtal>;
++ clock-names = "xtal";
++ resets = <&phy_reset>;
++ reset-names = "phy";
++ #phy-cells = <0>;
++ };
+diff --git a/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml
+new file mode 100644
+index 0000000000000..84738644e3989
+--- /dev/null
++++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml
+@@ -0,0 +1,59 @@
++# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
++# Copyright 2019 BayLibre, SAS
++%YAML 1.2
++---
++$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb3-pcie-phy.yaml#"
++$schema: "http://devicetree.org/meta-schemas/core.yaml#"
++
++title: Amlogic G12A USB3 + PCIE Combo PHY
++
++maintainers:
++ - Neil Armstrong <narmstrong@baylibre.com>
++
++properties:
++ compatible:
++ enum:
++ - amlogic,g12a-usb3-pcie-phy
++
++ reg:
++ maxItems: 1
++
++ clocks:
++ maxItems: 1
++
++ clock-names:
++ items:
++ - const: ref_clk
++
++ resets:
++ maxItems: 1
++
++ reset-names:
++ items:
++ - const: phy
++
++ "#phy-cells":
++ const: 1
++
++required:
++ - compatible
++ - reg
++ - clocks
++ - clock-names
++ - resets
++ - reset-names
++ - "#phy-cells"
++
++additionalProperties: false
++
++examples:
++ - |
++ phy@46000 {
++ compatible = "amlogic,g12a-usb3-pcie-phy";
++ reg = <0x46000 0x2000>;
++ clocks = <&ref_clk>;
++ clock-names = "ref_clk";
++ resets = <&phy_reset>;
++ reset-names = "phy";
++ #phy-cells = <1>;
++ };
+diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
+deleted file mode 100644
+index 399ebde454095..0000000000000
+--- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
++++ /dev/null
+@@ -1,78 +0,0 @@
+-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+-# Copyright 2019 BayLibre, SAS
+-%YAML 1.2
+----
+-$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb2-phy.yaml#"
+-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+-
+-title: Amlogic G12A USB2 PHY
+-
+-maintainers:
+- - Neil Armstrong <narmstrong@baylibre.com>
+-
+-properties:
+- compatible:
+- enum:
+- - amlogic,meson-g12a-usb2-phy
+- - amlogic,meson-a1-usb2-phy
+-
+- reg:
+- maxItems: 1
+-
+- clocks:
+- maxItems: 1
+-
+- clock-names:
+- items:
+- - const: xtal
+-
+- resets:
+- maxItems: 1
+-
+- reset-names:
+- items:
+- - const: phy
+-
+- "#phy-cells":
+- const: 0
+-
+- phy-supply:
+- description:
+- Phandle to a regulator that provides power to the PHY. This
+- regulator will be managed during the PHY power on/off sequence.
+-
+-required:
+- - compatible
+- - reg
+- - clocks
+- - clock-names
+- - resets
+- - reset-names
+- - "#phy-cells"
+-
+-if:
+- properties:
+- compatible:
+- enum:
+- - amlogic,meson-a1-usb-ctrl
+-
+-then:
+- properties:
+- power-domains:
+- maxItems: 1
+- required:
+- - power-domains
+-
+-additionalProperties: false
+-
+-examples:
+- - |
+- phy@36000 {
+- compatible = "amlogic,meson-g12a-usb2-phy";
+- reg = <0x36000 0x2000>;
+- clocks = <&xtal>;
+- clock-names = "xtal";
+- resets = <&phy_reset>;
+- reset-names = "phy";
+- #phy-cells = <0>;
+- };
+diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml
+deleted file mode 100644
+index 453c083cf44cb..0000000000000
+--- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml
++++ /dev/null
+@@ -1,59 +0,0 @@
+-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+-# Copyright 2019 BayLibre, SAS
+-%YAML 1.2
+----
+-$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml#"
+-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+-
+-title: Amlogic G12A USB3 + PCIE Combo PHY
+-
+-maintainers:
+- - Neil Armstrong <narmstrong@baylibre.com>
+-
+-properties:
+- compatible:
+- enum:
+- - amlogic,meson-g12a-usb3-pcie-phy
+-
+- reg:
+- maxItems: 1
+-
+- clocks:
+- maxItems: 1
+-
+- clock-names:
+- items:
+- - const: ref_clk
+-
+- resets:
+- maxItems: 1
+-
+- reset-names:
+- items:
+- - const: phy
+-
+- "#phy-cells":
+- const: 1
+-
+-required:
+- - compatible
+- - reg
+- - clocks
+- - clock-names
+- - resets
+- - reset-names
+- - "#phy-cells"
+-
+-additionalProperties: false
+-
+-examples:
+- - |
+- phy@46000 {
+- compatible = "amlogic,meson-g12a-usb3-pcie-phy";
+- reg = <0x46000 0x2000>;
+- clocks = <&ref_clk>;
+- clock-names = "ref_clk";
+- resets = <&phy_reset>;
+- reset-names = "phy";
+- #phy-cells = <1>;
+- };
+diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
+index ad2866c997383..fcd82df3aebbd 100644
+--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
++++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
+@@ -58,7 +58,7 @@ patternProperties:
+ $ref: "/schemas/types.yaml#/definitions/string"
+ enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2,
+ ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4,
+- EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP,
++ EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP,
+ GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
+ GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10,
+ I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5,
+diff --git a/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml b/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml
+index 4fe35e650909c..8dcdd32c2e015 100644
+--- a/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml
++++ b/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml
+@@ -138,7 +138,7 @@ examples:
+ clocks = <&sys_clk>;
+ pinctrl-0 = <&sgpio2_pins>;
+ pinctrl-names = "default";
+- reg = <0x1101059c 0x100>;
++ reg = <0x1101059c 0x118>;
+ microchip,sgpio-port-ranges = <0 0>, <16 18>, <28 31>;
+ bus-frequency = <25000000>;
+ sgpio_in2: gpio@0 {
+diff --git a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml
+index 61dd5af80db67..37402c370fbbc 100644
+--- a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml
++++ b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml
+@@ -31,7 +31,7 @@ properties:
+ $ref: "regulator.yaml#"
+
+ properties:
+- regulator-name:
++ regulator-compatible:
+ pattern: "^vbuck[1-4]$"
+
+ additionalProperties: false
+@@ -55,7 +55,7 @@ examples:
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1193750>;
+ regulator-enable-ramp-delay = <256>;
+- regulator-allowed-modes = <0 1 2 4>;
++ regulator-allowed-modes = <0 1 2>;
+ };
+
+ vbuck3 {
+@@ -63,7 +63,7 @@ examples:
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1193750>;
+ regulator-enable-ramp-delay = <256>;
+- regulator-allowed-modes = <0 1 2 4>;
++ regulator-allowed-modes = <0 1 2>;
+ };
+ };
+ };
+diff --git a/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml b/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml
+index f70f2e758a002..e66aac0ad735e 100644
+--- a/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml
++++ b/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml
+@@ -47,12 +47,6 @@ properties:
+ description:
+ Properties for single LDO regulator.
+
+- properties:
+- regulator-name:
+- pattern: "^LDO[1-5]$"
+- description:
+- should be "LDO1", ..., "LDO5"
+-
+ unevaluatedProperties: false
+
+ "^BUCK[1-6]$":
+@@ -62,11 +56,6 @@ properties:
+ Properties for single BUCK regulator.
+
+ properties:
+- regulator-name:
+- pattern: "^BUCK[1-6]$"
+- description:
+- should be "BUCK1", ..., "BUCK6"
+-
+ nxp,dvs-run-voltage:
+ $ref: "/schemas/types.yaml#/definitions/uint32"
+ minimum: 600000
+diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
+index 093edda0c8dfc..6cd83d920155f 100644
+--- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
++++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
+@@ -13,6 +13,14 @@ common regulator binding documented in:
+
+
+ Required properties of the main device node (the parent!):
++ - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used
++ for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines.
++
++ [1] If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
++ property is specified, then all the eight voltage values for the
++ 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified.
++
++Optional properties of the main device node (the parent!):
+ - s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+ units for buck2 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+@@ -25,26 +33,13 @@ Required properties of the main device node (the parent!):
+ units for buck4 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+
+- - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used
+- for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines.
+-
+- [1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+- property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage'
+- property should specify atleast one voltage level (which would be a
+- safe operating voltage).
+-
+- If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+- property is specified, then all the eight voltage values for the
+- 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified.
+-
+-Optional properties of the main device node (the parent!):
+ - s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs.
+ - s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs.
+ - s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs.
+
+ Additional properties required if either of the optional properties are used:
+
+- - s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from
++ - s5m8767,pmic-buck-default-dvs-idx: Default voltage setting selected from
+ the possible 8 options selectable by the dvs gpios. The value of this
+ property should be between 0 and 7. If not specified or if out of range, the
+ default value of this property is set to 0.
+diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
+index 2b1f916038972..b72ec404adcd7 100644
+--- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
++++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
+@@ -47,7 +47,7 @@ properties:
+ const: 2
+
+ cache-sets:
+- const: 1024
++ enum: [1024, 2048]
+
+ cache-size:
+ const: 2097152
+@@ -85,6 +85,8 @@ then:
+ description: |
+ Must contain entries for DirError, DataError and DataFail signals.
+ maxItems: 3
++ cache-sets:
++ const: 1024
+
+ else:
+ properties:
+@@ -92,6 +94,8 @@ else:
+ description: |
+ Must contain entries for DirError, DataError, DataFail, DirFail signals.
+ minItems: 4
++ cache-sets:
++ const: 2048
+
+ additionalProperties: false
+
+diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml
+index 6b8731f7f2fba..1a8d9bf89feb6 100644
+--- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml
++++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml
+@@ -79,7 +79,7 @@ properties:
+ - description: Error interrupt
+ - description: Receive buffer full interrupt
+ - description: Transmit buffer empty interrupt
+- - description: Transmit End interrupt
++ - description: Break interrupt
+ - items:
+ - description: Error interrupt
+ - description: Receive buffer full interrupt
+@@ -94,7 +94,7 @@ properties:
+ - const: eri
+ - const: rxi
+ - const: txi
+- - const: tei
++ - const: bri
+ - items:
+ - const: eri
+ - const: rxi
+diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml
+index cc3fe5ed7421e..1b0062e3c1a4b 100644
+--- a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml
++++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml
+@@ -34,6 +34,8 @@ properties:
+ - qcom,rpm-ipq6018
+ - qcom,rpm-msm8226
+ - qcom,rpm-msm8916
++ - qcom,rpm-msm8936
++ - qcom,rpm-msm8953
+ - qcom,rpm-msm8974
+ - qcom,rpm-msm8976
+ - qcom,rpm-msm8996
+@@ -57,6 +59,7 @@ if:
+ - qcom,rpm-apq8084
+ - qcom,rpm-msm8916
+ - qcom,rpm-msm8974
++ - qcom,rpm-msm8953
+ then:
+ required:
+ - qcom,smd-channels
+diff --git a/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml b/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml
+index 2e35aeaa8781d..89e3819c6127a 100644
+--- a/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml
++++ b/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml
+@@ -61,7 +61,7 @@ patternProperties:
+ description: phandle of the CPU DAI
+
+ patternProperties:
+- "^codec-[0-9]+$":
++ "^codec(-[0-9]+)?$":
+ type: object
+ description: |-
+ Codecs:
+diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt b/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt
+index 5d6ea66a863fe..1f75feec3dec6 100644
+--- a/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt
++++ b/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt
+@@ -109,7 +109,7 @@ audio-codec@1{
+ reg = <1 0>;
+ interrupts = <&msmgpio 54 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "intr2"
+- reset-gpios = <&msmgpio 64 0>;
++ reset-gpios = <&msmgpio 64 GPIO_ACTIVE_LOW>;
+ slim-ifc-dev = <&wc9335_ifd>;
+ clock-names = "mclk", "native";
+ clocks = <&rpmcc RPM_SMD_DIV_CLK1>,
+diff --git a/Documentation/devicetree/bindings/sound/tas2562.yaml b/Documentation/devicetree/bindings/sound/tas2562.yaml
+index acd4bbe697315..4adaf92233c8e 100644
+--- a/Documentation/devicetree/bindings/sound/tas2562.yaml
++++ b/Documentation/devicetree/bindings/sound/tas2562.yaml
+@@ -52,7 +52,9 @@ properties:
+ description: TDM TX current sense time slot.
+
+ '#sound-dai-cells':
+- const: 1
++ # The codec has a single DAI, the #sound-dai-cells=<1>; case is left in for backward
++ # compatibility but is deprecated.
++ enum: [0, 1]
+
+ required:
+ - compatible
+@@ -69,7 +71,7 @@ examples:
+ codec: codec@4c {
+ compatible = "ti,tas2562";
+ reg = <0x4c>;
+- #sound-dai-cells = <1>;
++ #sound-dai-cells = <0>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <14>;
+ shutdown-gpios = <&gpio1 15 0>;
+diff --git a/Documentation/devicetree/bindings/sound/tas2764.yaml b/Documentation/devicetree/bindings/sound/tas2764.yaml
+index 5bf8c76ecda11..1ffe1a01668fe 100644
+--- a/Documentation/devicetree/bindings/sound/tas2764.yaml
++++ b/Documentation/devicetree/bindings/sound/tas2764.yaml
+@@ -46,7 +46,9 @@ properties:
+ description: TDM TX voltage sense time slot.
+
+ '#sound-dai-cells':
+- const: 1
++ # The codec has a single DAI, the #sound-dai-cells=<1>; case is left in for backward
++ # compatibility but is deprecated.
++ enum: [0, 1]
+
+ required:
+ - compatible
+@@ -63,7 +65,7 @@ examples:
+ codec: codec@38 {
+ compatible = "ti,tas2764";
+ reg = <0x38>;
+- #sound-dai-cells = <1>;
++ #sound-dai-cells = <0>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <14>;
+ reset-gpios = <&gpio1 15 0>;
+diff --git a/Documentation/devicetree/bindings/sound/tas2770.yaml b/Documentation/devicetree/bindings/sound/tas2770.yaml
+index 027bebf4e8cf5..aceba9ed813ef 100644
+--- a/Documentation/devicetree/bindings/sound/tas2770.yaml
++++ b/Documentation/devicetree/bindings/sound/tas2770.yaml
+@@ -54,7 +54,9 @@ properties:
+ - 1 # Falling edge
+
+ '#sound-dai-cells':
+- const: 1
++ # The codec has a single DAI, the #sound-dai-cells=<1>; case is left in for backward
++ # compatibility but is deprecated.
++ enum: [0, 1]
+
+ required:
+ - compatible
+@@ -71,7 +73,7 @@ examples:
+ codec: codec@41 {
+ compatible = "ti,tas2770";
+ reg = <0x41>;
+- #sound-dai-cells = <1>;
++ #sound-dai-cells = <0>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <14>;
+ reset-gpio = <&gpio1 15 0>;
+diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
+index 35a8045b2c70d..53627c6e2ae32 100644
+--- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
++++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
+@@ -106,7 +106,7 @@ examples:
+ dma-names = "rx", "tx";
+
+ flash@0 {
+- compatible = "spi-nor";
++ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <104000000>;
+ spi-tx-bus-width = <2>;
+diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
+index ef5698f426b2c..392204a08e96c 100644
+--- a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
++++ b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
+@@ -45,6 +45,7 @@ properties:
+ maxItems: 2
+
+ interconnect-names:
++ minItems: 1
+ items:
+ - const: qspi-config
+ - const: qspi-memory
+diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.yaml b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
+index 9787be21318e6..82d0ca5c00f3b 100644
+--- a/Documentation/devicetree/bindings/spi/spi-cadence.yaml
++++ b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
+@@ -49,6 +49,13 @@ properties:
+ enum: [ 0, 1 ]
+ default: 0
+
++required:
++ - compatible
++ - reg
++ - interrupts
++ - clock-names
++ - clocks
++
+ unevaluatedProperties: false
+
+ examples:
+diff --git a/Documentation/devicetree/bindings/spi/spi-mxic.txt b/Documentation/devicetree/bindings/spi/spi-mxic.txt
+index 529f2dab2648a..7bcbb229b78bb 100644
+--- a/Documentation/devicetree/bindings/spi/spi-mxic.txt
++++ b/Documentation/devicetree/bindings/spi/spi-mxic.txt
+@@ -8,11 +8,13 @@ Required properties:
+ - reg: should contain 2 entries, one for the registers and one for the direct
+ mapping area
+ - reg-names: should contain "regs" and "dirmap"
+-- interrupts: interrupt line connected to the SPI controller
+ - clock-names: should contain "ps_clk", "send_clk" and "send_dly_clk"
+ - clocks: should contain 3 entries for the "ps_clk", "send_clk" and
+ "send_dly_clk" clocks
+
++Optional properties:
++- interrupts: interrupt line connected to the SPI controller
++
+ Example:
+
+ spi@43c30000 {
+diff --git a/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml b/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
+index ea72c8001256f..fafde1c06be67 100644
+--- a/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
++++ b/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
+@@ -30,6 +30,13 @@ properties:
+ clocks:
+ maxItems: 2
+
++required:
++ - compatible
++ - reg
++ - interrupts
++ - clock-names
++ - clocks
++
+ unevaluatedProperties: false
+
+ examples:
+diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
+index a07de5ed0ca6a..2d34f3ccb2572 100644
+--- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
++++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
+@@ -199,12 +199,11 @@ patternProperties:
+
+ contribution:
+ $ref: /schemas/types.yaml#/definitions/uint32
+- minimum: 0
+- maximum: 100
+ description:
+- The percentage contribution of the cooling devices at the
+- specific trip temperature referenced in this map
+- to this thermal zone
++ The cooling contribution to the thermal zone of the referred
++ cooling device at the referred trip point. The contribution is
++ a ratio of the sum of all cooling contributions within a
++ thermal zone.
+
+ required:
+ - trip
+diff --git a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
+index dc9d6ed0781d2..5d0bfea2c087e 100644
+--- a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
++++ b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
+@@ -64,7 +64,7 @@ properties:
+ description:
+ size of memory intended as internal memory for endpoints
+ buffers expressed in KB
+- $ref: /schemas/types.yaml#/definitions/uint32
++ $ref: /schemas/types.yaml#/definitions/uint16
+
+ cdns,phyrst-a-enable:
+ description: Enable resetting of PHY if Rx fail is detected
+diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
+index 8913497624de2..cb5da1df8d405 100644
+--- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml
++++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
+@@ -135,7 +135,8 @@ properties:
+ Phandle of a companion.
+
+ phys:
+- maxItems: 1
++ minItems: 1
++ maxItems: 3
+
+ phy-names:
+ const: usb
+diff --git a/Documentation/devicetree/bindings/usb/generic-ohci.yaml b/Documentation/devicetree/bindings/usb/generic-ohci.yaml
+index acbf94fa5f74a..d5fd3aa53ed29 100644
+--- a/Documentation/devicetree/bindings/usb/generic-ohci.yaml
++++ b/Documentation/devicetree/bindings/usb/generic-ohci.yaml
+@@ -102,7 +102,8 @@ properties:
+ Overrides the detected port count
+
+ phys:
+- maxItems: 1
++ minItems: 1
++ maxItems: 3
+
+ phy-names:
+ const: usb
+diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml
+index 11f7bacd4e2b0..620cbf00bedb5 100644
+--- a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml
++++ b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml
+@@ -56,6 +56,7 @@ properties:
+ - description: optional, wakeup interrupt used to support runtime PM
+
+ interrupt-names:
++ minItems: 1
+ items:
+ - const: host
+ - const: wakeup
+diff --git a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml
+index 078fb78895937..5d1e49d823c96 100644
+--- a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml
++++ b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml
+@@ -240,7 +240,7 @@ properties:
+ description:
+ High-Speed PHY interface selection between UTMI+ and ULPI when the
+ DWC_USB3_HSPHY_INTERFACE has value 3.
+- $ref: /schemas/types.yaml#/definitions/uint8
++ $ref: /schemas/types.yaml#/definitions/string
+ enum: [utmi, ulpi]
+
+ snps,quirk-frame-length-adjustment:
+diff --git a/Documentation/devicetree/bindings/usb/usb-hcd.yaml b/Documentation/devicetree/bindings/usb/usb-hcd.yaml
+index 56853c17af667..1dc3d5d7b44fe 100644
+--- a/Documentation/devicetree/bindings/usb/usb-hcd.yaml
++++ b/Documentation/devicetree/bindings/usb/usb-hcd.yaml
+@@ -33,7 +33,7 @@ patternProperties:
+ "^.*@[0-9a-f]{1,2}$":
+ description: The hard wired USB devices
+ type: object
+- $ref: /usb/usb-device.yaml
++ $ref: /schemas/usb/usb-device.yaml
+
+ additionalProperties: true
+
+diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
+index 76cb9586ee00c..93cd77a6e92c0 100644
+--- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
++++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml
+@@ -39,8 +39,8 @@ properties:
+ samsung,syscon-phandle:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+- Phandle to the PMU system controller node (in case of Exynos5250
+- and Exynos5420).
++ Phandle to the PMU system controller node (in case of Exynos5250,
++ Exynos5420 and Exynos7).
+
+ required:
+ - compatible
+@@ -58,6 +58,7 @@ allOf:
+ enum:
+ - samsung,exynos5250-wdt
+ - samsung,exynos5420-wdt
++ - samsung,exynos7-wdt
+ then:
+ required:
+ - samsung,syscon-phandle
+diff --git a/Documentation/devicetree/overlay-notes.rst b/Documentation/devicetree/overlay-notes.rst
+index b2b8db765b8c6..e139f22b363e9 100644
+--- a/Documentation/devicetree/overlay-notes.rst
++++ b/Documentation/devicetree/overlay-notes.rst
+@@ -119,10 +119,32 @@ Finally, if you need to remove all overlays in one-go, just call
+ of_overlay_remove_all() which will remove every single one in the correct
+ order.
+
+-In addition, there is the option to register notifiers that get called on
++There is the option to register notifiers that get called on
+ overlay operations. See of_overlay_notifier_register/unregister and
+ enum of_overlay_notify_action for details.
+
+-Note that a notifier callback is not supposed to store pointers to a device
+-tree node or its content beyond OF_OVERLAY_POST_REMOVE corresponding to the
+-respective node it received.
++A notifier callback for OF_OVERLAY_PRE_APPLY, OF_OVERLAY_POST_APPLY, or
++OF_OVERLAY_PRE_REMOVE may store pointers to a device tree node in the overlay
++or its content but these pointers must not persist past the notifier callback
++for OF_OVERLAY_POST_REMOVE. The memory containing the overlay will be
++kfree()ed after OF_OVERLAY_POST_REMOVE notifiers are called. Note that the
++memory will be kfree()ed even if the notifier for OF_OVERLAY_POST_REMOVE
++returns an error.
++
++The changeset notifiers in drivers/of/dynamic.c are a second type of notifier
++that could be triggered by applying or removing an overlay. These notifiers
++are not allowed to store pointers to a device tree node in the overlay
++or its content. The overlay code does not protect against such pointers
++remaining active when the memory containing the overlay is freed as a result
++of removing the overlay.
++
++Any other code that retains a pointer to the overlay nodes or data is
++considered to be a bug because after removing the overlay the pointer
++will refer to freed memory.
++
++Users of overlays must be especially aware of the overall operations that
++occur on the system to ensure that other kernel code does not retain any
++pointers to the overlay nodes or data. Any example of an inadvertent use
++of such pointers is if a driver or subsystem module is loaded after an
++overlay has been applied, and the driver or subsystem scans the entire
++devicetree or a large portion of it, including the overlay nodes.
+diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst
+index ee268d445d38b..d2e1d8b58e7dc 100644
+--- a/Documentation/driver-api/dmaengine/dmatest.rst
++++ b/Documentation/driver-api/dmaengine/dmatest.rst
+@@ -143,13 +143,14 @@ Part 5 - Handling channel allocation
+ Allocating Channels
+ -------------------
+
+-Channels are required to be configured prior to starting the test run.
+-Attempting to run the test without configuring the channels will fail.
++Channels do not need to be configured prior to starting a test run. Attempting
++to run the test without configuring the channels will result in testing any
++channels that are available.
+
+ Example::
+
+ % echo 1 > /sys/module/dmatest/parameters/run
+- dmatest: Could not start test, no channels configured
++ dmatest: No channels configured, continue with any
+
+ Channels are registered using the "channel" parameter. Channels can be requested by their
+ name, once requested, the channel is registered and a pending thread is added to the test list.
+diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst
+index 94a2d7f01d999..d3cfa73cbb2b4 100644
+--- a/Documentation/driver-api/firewire.rst
++++ b/Documentation/driver-api/firewire.rst
+@@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module
+ Firewire char device data structures
+ ====================================
+
+-.. include:: /ABI/stable/firewire-cdev
++.. include:: ../ABI/stable/firewire-cdev
+ :literal:
+
+ .. kernel-doc:: include/uapi/linux/firewire-cdev.h
+@@ -28,7 +28,7 @@ Firewire char device data structures
+ Firewire device probing and sysfs interfaces
+ ============================================
+
+-.. include:: /ABI/stable/sysfs-bus-firewire
++.. include:: ../ABI/stable/sysfs-bus-firewire
+ :literal:
+
+ .. kernel-doc:: drivers/firewire/core-device.c
+diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst
+index b81794e0cfbb9..06ac89adaafba 100644
+--- a/Documentation/driver-api/firmware/other_interfaces.rst
++++ b/Documentation/driver-api/firmware/other_interfaces.rst
+@@ -13,6 +13,12 @@ EDD Interfaces
+ .. kernel-doc:: drivers/firmware/edd.c
+ :internal:
+
++Generic System Framebuffers Interface
++-------------------------------------
++
++.. kernel-doc:: drivers/firmware/sysfb.c
++ :export:
++
+ Intel Stratix10 SoC Service Layer
+ ---------------------------------
+ Some features of the Intel Stratix10 SoC require a level of privilege
+diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst
+index 64fe7db080e52..252aeb639bc40 100644
+--- a/Documentation/driver-api/generic-counter.rst
++++ b/Documentation/driver-api/generic-counter.rst
+@@ -247,7 +247,7 @@ for defining a counter device.
+ .. kernel-doc:: include/linux/counter.h
+ :internal:
+
+-.. kernel-doc:: drivers/counter/counter.c
++.. kernel-doc:: drivers/counter/counter-core.c
+ :export:
+
+ Implementation
+diff --git a/Documentation/driver-api/spi.rst b/Documentation/driver-api/spi.rst
+index f64cb666498aa..f28887045049d 100644
+--- a/Documentation/driver-api/spi.rst
++++ b/Documentation/driver-api/spi.rst
+@@ -25,8 +25,8 @@ hardware, which may be as simple as a set of GPIO pins or as complex as
+ a pair of FIFOs connected to dual DMA engines on the other side of the
+ SPI shift register (maximizing throughput). Such drivers bridge between
+ whatever bus they sit on (often the platform bus) and SPI, and expose
+-the SPI side of their device as a :c:type:`struct spi_master
+-<spi_master>`. SPI devices are children of that master,
++the SPI side of their device as a :c:type:`struct spi_controller
++<spi_controller>`. SPI devices are children of that master,
+ represented as a :c:type:`struct spi_device <spi_device>` and
+ manufactured from :c:type:`struct spi_board_info
+ <spi_board_info>` descriptors which are usually provided by
+diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
+index 4a25c5eb6f072..8c47847755a68 100644
+--- a/Documentation/fault-injection/fault-injection.rst
++++ b/Documentation/fault-injection/fault-injection.rst
+@@ -83,9 +83,7 @@ configuration of fault-injection capabilities.
+ - /sys/kernel/debug/fail*/times:
+
+ specifies how many times failures may happen at most. A value of -1
+- means "no limit". Note, though, that this file only accepts unsigned
+- values. So, if you want to specify -1, you better use 'printf' instead
+- of 'echo', e.g.: $ printf %#x -1 > times
++ means "no limit".
+
+ - /sys/kernel/debug/fail*/space:
+
+@@ -277,7 +275,7 @@ Application Examples
+ echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
+ echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+ echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+- printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
++ echo -1 > /sys/kernel/debug/$FAILTYPE/times
+ echo 0 > /sys/kernel/debug/$FAILTYPE/space
+ echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+ echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+@@ -331,7 +329,7 @@ Application Examples
+ echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+ echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+ echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+- printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
++ echo -1 > /sys/kernel/debug/$FAILTYPE/times
+ echo 0 > /sys/kernel/debug/$FAILTYPE/space
+ echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+ echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+@@ -362,7 +360,7 @@ Application Examples
+ echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+ echo 100 > /sys/kernel/debug/$FAILTYPE/probability
+ echo 0 > /sys/kernel/debug/$FAILTYPE/interval
+- printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
++ echo -1 > /sys/kernel/debug/$FAILTYPE/times
+ echo 0 > /sys/kernel/debug/$FAILTYPE/space
+ echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
+
+diff --git a/Documentation/filesystems/autofs-mount-control.rst b/Documentation/filesystems/autofs-mount-control.rst
+index bf4b511cdbe85..b5a379d25c40b 100644
+--- a/Documentation/filesystems/autofs-mount-control.rst
++++ b/Documentation/filesystems/autofs-mount-control.rst
+@@ -196,7 +196,7 @@ information and return operation results::
+ struct args_ismountpoint ismountpoint;
+ };
+
+- char path[0];
++ char path[];
+ };
+
+ The ioctlfd field is a mount point file descriptor of an autofs mount
+diff --git a/Documentation/filesystems/autofs.rst b/Documentation/filesystems/autofs.rst
+index 681c6a492bc0c..1b495768e7aaf 100644
+--- a/Documentation/filesystems/autofs.rst
++++ b/Documentation/filesystems/autofs.rst
+@@ -467,7 +467,7 @@ Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure::
+ struct args_ismountpoint ismountpoint;
+ };
+
+- char path[0];
++ char path[];
+ };
+
+ For the **OPEN_MOUNT** and **IS_MOUNTPOINT** commands, the target
+diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst
+index 504ba940c36c1..dccd61c7c5c3b 100644
+--- a/Documentation/filesystems/directory-locking.rst
++++ b/Documentation/filesystems/directory-locking.rst
+@@ -22,12 +22,11 @@ exclusive.
+ 3) object removal. Locking rules: caller locks parent, finds victim,
+ locks victim and calls the method. Locks are exclusive.
+
+-4) rename() that is _not_ cross-directory. Locking rules: caller locks
+-the parent and finds source and target. In case of exchange (with
+-RENAME_EXCHANGE in flags argument) lock both. In any case,
+-if the target already exists, lock it. If the source is a non-directory,
+-lock it. If we need to lock both, lock them in inode pointer order.
+-Then call the method. All locks are exclusive.
++4) rename() that is _not_ cross-directory. Locking rules: caller locks the
++parent and finds source and target. We lock both (provided they exist). If we
++need to lock two inodes of different type (dir vs non-dir), we lock directory
++first. If we need to lock two inodes of the same type, lock them in inode
++pointer order. Then call the method. All locks are exclusive.
+ NB: we might get away with locking the source (and target in exchange
+ case) shared.
+
+@@ -44,15 +43,17 @@ All locks are exclusive.
+ rules:
+
+ * lock the filesystem
+- * lock parents in "ancestors first" order.
++ * lock parents in "ancestors first" order. If one is not ancestor of
++ the other, lock them in inode pointer order.
+ * find source and target.
+ * if old parent is equal to or is a descendent of target
+ fail with -ENOTEMPTY
+ * if new parent is equal to or is a descendent of source
+ fail with -ELOOP
+- * If it's an exchange, lock both the source and the target.
+- * If the target exists, lock it. If the source is a non-directory,
+- lock it. If we need to lock both, do so in inode pointer order.
++ * Lock both the source and the target provided they exist. If we
++ need to lock two inodes of different type (dir vs non-dir), we lock
++ the directory first. If we need to lock two inodes of the same type,
++ lock them in inode pointer order.
+ * call the method.
+
+ All ->i_rwsem are taken exclusive. Again, we might get away with locking
+@@ -66,8 +67,9 @@ If no directory is its own ancestor, the scheme above is deadlock-free.
+
+ Proof:
+
+- First of all, at any moment we have a partial ordering of the
+- objects - A < B iff A is an ancestor of B.
++ First of all, at any moment we have a linear ordering of the
++ objects - A < B iff (A is an ancestor of B) or (B is not an ancestor
++ of A and ptr(A) < ptr(B)).
+
+ That ordering can change. However, the following is true:
+
+diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst
+index 54386a010a8d7..871d2da7a0a91 100644
+--- a/Documentation/filesystems/ext4/attributes.rst
++++ b/Documentation/filesystems/ext4/attributes.rst
+@@ -76,7 +76,7 @@ The beginning of an extended attribute block is in
+ - Checksum of the extended attribute block.
+ * - 0x14
+ - \_\_u32
+- - h\_reserved[2]
++ - h\_reserved[3]
+ - Zero.
+
+ The checksum is calculated against the FS UUID, the 64-bit block number
+diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
+index 09de6ebbbdfa2..7fe50b0bccde9 100644
+--- a/Documentation/filesystems/f2fs.rst
++++ b/Documentation/filesystems/f2fs.rst
+@@ -197,6 +197,7 @@ fault_type=%d Support configuring fault injection type, should be
+ FAULT_DISCARD 0x000002000
+ FAULT_WRITE_IO 0x000004000
+ FAULT_SLAB_ALLOC 0x000008000
++ FAULT_DQUOT_INIT 0x000010000
+ =================== ===========
+ mode=%s Control block allocation mode which supports "adaptive"
+ and "lfs". In "lfs" mode, there should be no random
+diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
+index 0eb799d9d05a2..7940a45d39522 100644
+--- a/Documentation/filesystems/fscrypt.rst
++++ b/Documentation/filesystems/fscrypt.rst
+@@ -176,11 +176,11 @@ Master Keys
+
+ Each encrypted directory tree is protected by a *master key*. Master
+ keys can be up to 64 bytes long, and must be at least as long as the
+-greater of the key length needed by the contents and filenames
+-encryption modes being used. For example, if AES-256-XTS is used for
+-contents encryption, the master key must be 64 bytes (512 bits). Note
+-that the XTS mode is defined to require a key twice as long as that
+-required by the underlying block cipher.
++greater of the security strength of the contents and filenames
++encryption modes being used. For example, if any AES-256 mode is
++used, the master key must be at least 256 bits, i.e. 32 bytes. A
++stricter requirement applies if the key is used by a v1 encryption
++policy and AES-256-XTS is used; such keys must be 64 bytes.
+
+ To "unlock" an encrypted directory tree, userspace must provide the
+ appropriate master key. There can be any number of master keys, each
+diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
+index 1229a75ec75dd..7a879ec3b6bf0 100644
+--- a/Documentation/filesystems/idmappings.rst
++++ b/Documentation/filesystems/idmappings.rst
+@@ -952,75 +952,3 @@ The raw userspace id that is put on disk is ``u1000`` so when the user takes
+ their home directory back to their home computer where they are assigned
+ ``u1000`` using the initial idmapping and mount the filesystem with the initial
+ idmapping they will see all those files owned by ``u1000``.
+-
+-Shortcircuting
+---------------
+-
+-Currently, the implementation of idmapped mounts enforces that the filesystem
+-is mounted with the initial idmapping. The reason is simply that none of the
+-filesystems that we targeted were mountable with a non-initial idmapping. But
+-that might change soon enough. As we've seen above, thanks to the properties of
+-idmappings the translation works for both filesystems mounted with the initial
+-idmapping and filesystem with non-initial idmappings.
+-
+-Based on this current restriction to filesystem mounted with the initial
+-idmapping two noticeable shortcuts have been taken:
+-
+-1. We always stash a reference to the initial user namespace in ``struct
+- vfsmount``. Idmapped mounts are thus mounts that have a non-initial user
+- namespace attached to them.
+-
+- In order to support idmapped mounts this needs to be changed. Instead of
+- stashing the initial user namespace the user namespace the filesystem was
+- mounted with must be stashed. An idmapped mount is then any mount that has
+- a different user namespace attached then the filesystem was mounted with.
+- This has no user-visible consequences.
+-
+-2. The translation algorithms in ``mapped_fs*id()`` and ``i_*id_into_mnt()``
+- are simplified.
+-
+- Let's consider ``mapped_fs*id()`` first. This function translates the
+- caller's kernel id into a kernel id in the filesystem's idmapping via
+- a mount's idmapping. The full algorithm is::
+-
+- mapped_fsuid(kid):
+- /* Map the kernel id up into a userspace id in the mount's idmapping. */
+- from_kuid(mount-idmapping, kid) = uid
+-
+- /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
+- make_kuid(filesystem-idmapping, uid) = kuid
+-
+- We know that the filesystem is always mounted with the initial idmapping as
+- we enforce this in ``mount_setattr()``. So this can be shortened to::
+-
+- mapped_fsuid(kid):
+- /* Map the kernel id up into a userspace id in the mount's idmapping. */
+- from_kuid(mount-idmapping, kid) = uid
+-
+- /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
+- KUIDT_INIT(uid) = kuid
+-
+- Similarly, for ``i_*id_into_mnt()`` which translated the filesystem's kernel
+- id into a mount's kernel id::
+-
+- i_uid_into_mnt(kid):
+- /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
+- from_kuid(filesystem-idmapping, kid) = uid
+-
+- /* Map the userspace id down into a kernel id in the mounts's idmapping. */
+- make_kuid(mount-idmapping, uid) = kuid
+-
+- Again, we know that the filesystem is always mounted with the initial
+- idmapping as we enforce this in ``mount_setattr()``. So this can be
+- shortened to::
+-
+- i_uid_into_mnt(kid):
+- /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
+- __kuid_val(kid) = uid
+-
+- /* Map the userspace id down into a kernel id in the mounts's idmapping. */
+- make_kuid(mount-idmapping, uid) = kuid
+-
+-Handling filesystems mounted with non-initial idmappings requires that the
+-translation functions be converted to their full form. They can still be
+-shortcircuited on non-idmapped mounts. This has no user-visible consequences.
+diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
+index bf5c48066fac5..a15527940b461 100644
+--- a/Documentation/filesystems/vfs.rst
++++ b/Documentation/filesystems/vfs.rst
+@@ -274,6 +274,9 @@ or bottom half).
+ This is specifically for the inode itself being marked dirty,
+ not its data. If the update needs to be persisted by fdatasync(),
+ then I_DIRTY_DATASYNC will be set in the flags argument.
++ I_DIRTY_TIME will be set in the flags in case lazytime is enabled
++ and struct inode has times updated since the last ->dirty_inode
++ call.
+
+ ``write_inode``
+ this method is called when the VFS needs to write an inode to
+@@ -1207,7 +1210,7 @@ defined:
+ return
+ -ECHILD and it will be called again in ref-walk mode.
+
+-``_weak_revalidate``
++``d_weak_revalidate``
+ called when the VFS needs to revalidate a "jumped" dentry. This
+ is called when a path-walk ends at dentry that was not acquired
+ by doing a lookup in the parent directory. This includes "/",
+diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst
+index c042176e17078..50ac87fa22958 100644
+--- a/Documentation/firmware-guide/acpi/apei/einj.rst
++++ b/Documentation/firmware-guide/acpi/apei/einj.rst
+@@ -168,7 +168,7 @@ An error injection example::
+ 0x00000008 Memory Correctable
+ 0x00000010 Memory Uncorrectable non-fatal
+ # echo 0x12345000 > param1 # Set memory address for injection
+- # echo $((-1 << 12)) > param2 # Mask 0xfffffffffffff000 - anywhere in this page
++ # echo 0xfffffffffffff000 > param2 # Mask - anywhere in this page
+ # echo 0x8 > error_type # Choose correctable memory error
+ # echo 1 > error_inject # Inject now
+
+diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
+index b7ad47df49de0..8b65b32e6e40e 100644
+--- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
++++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
+@@ -5,7 +5,7 @@
+ Referencing hierarchical data nodes
+ ===================================
+
+-:Copyright: |copy| 2018 Intel Corporation
++:Copyright: |copy| 2018, 2021 Intel Corporation
+ :Author: Sakari Ailus <sakari.ailus@linux.intel.com>
+
+ ACPI in general allows referring to device objects in the tree only.
+@@ -52,12 +52,14 @@ the ANOD object which is also the final target node of the reference.
+ Name (NOD0, Package() {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
++ Package () { "reg", 0 },
+ Package () { "random-property", 3 },
+ }
+ })
+ Name (NOD1, Package() {
+ ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+ Package () {
++ Package () { "reg", 1 },
+ Package () { "anothernode", "ANOD" },
+ }
+ })
+@@ -74,7 +76,11 @@ the ANOD object which is also the final target node of the reference.
+ Name (_DSD, Package () {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
+- Package () { "reference", ^DEV0, "node@1", "anothernode" },
++ Package () {
++ "reference", Package () {
++ ^DEV0, "node@1", "anothernode"
++ }
++ },
+ }
+ })
+ }
+diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
+index 204ebdaadb45a..03021dfa0dd81 100644
+--- a/Documentation/gpu/i915.rst
++++ b/Documentation/gpu/i915.rst
+@@ -183,25 +183,25 @@ Frame Buffer Compression (FBC)
+ Display Refresh Rate Switching (DRRS)
+ -------------------------------------
+
+-.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
++.. kernel-doc:: drivers/gpu/drm/i915/display/intel_drrs.c
+ :doc: Display Refresh Rate Switching (DRRS)
+
+-.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
++.. kernel-doc:: drivers/gpu/drm/i915/display/intel_drrs.c
+ :functions: intel_dp_set_drrs_state
+
+-.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
++.. kernel-doc:: drivers/gpu/drm/i915/display/intel_drrs.c
+ :functions: intel_edp_drrs_enable
+
+-.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
++.. kernel-doc:: drivers/gpu/drm/i915/display/intel_drrs.c
+ :functions: intel_edp_drrs_disable
+
+-.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
++.. kernel-doc:: drivers/gpu/drm/i915/display/intel_drrs.c
+ :functions: intel_edp_drrs_invalidate
+
+-.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
++.. kernel-doc:: drivers/gpu/drm/i915/display/intel_drrs.c
+ :functions: intel_edp_drrs_flush
+
+-.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
++.. kernel-doc:: drivers/gpu/drm/i915/display/intel_drrs.c
+ :functions: intel_dp_drrs_init
+
+ DPIO
+diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
+index 12e61869939e8..67de1e94fdf76 100644
+--- a/Documentation/gpu/todo.rst
++++ b/Documentation/gpu/todo.rst
+@@ -311,27 +311,6 @@ Contact: Daniel Vetter, Noralf Tronnes
+
+ Level: Advanced
+
+-Garbage collect fbdev scrolling acceleration
+---------------------------------------------
+-
+-Scroll acceleration is disabled in fbcon by hard-wiring p->scrollmode =
+-SCROLL_REDRAW. There's a ton of code this will allow us to remove:
+-
+-- lots of code in fbcon.c
+-
+-- a bunch of the hooks in fbcon_ops, maybe the remaining hooks could be called
+- directly instead of the function table (with a switch on p->rotate)
+-
+-- fb_copyarea is unused after this, and can be deleted from all drivers
+-
+-Note that not all acceleration code can be deleted, since clearing and cursor
+-support is still accelerated, which might be good candidates for further
+-deletion projects.
+-
+-Contact: Daniel Vetter
+-
+-Level: Intermediate
+-
+ idr_init_base()
+ ---------------
+
+diff --git a/Documentation/hwmon/ftsteutates.rst b/Documentation/hwmon/ftsteutates.rst
+index 58a2483d8d0da..198fa8e2819da 100644
+--- a/Documentation/hwmon/ftsteutates.rst
++++ b/Documentation/hwmon/ftsteutates.rst
+@@ -22,6 +22,10 @@ enhancements. It can monitor up to 4 voltages, 16 temperatures and
+ 8 fans. It also contains an integrated watchdog which is currently
+ implemented in this driver.
+
++The 4 voltages require a board-specific multiplier, since the BMC can
++only measure voltages up to 3.3V and thus relies on voltage dividers.
++Consult your motherboard manual for details.
++
+ To clear a temperature or fan alarm, execute the following command with the
+ correct path to the alarm file::
+
+diff --git a/Documentation/hwmon/lm90.rst b/Documentation/hwmon/lm90.rst
+index 3da8c6e06a365..05391fb4042d9 100644
+--- a/Documentation/hwmon/lm90.rst
++++ b/Documentation/hwmon/lm90.rst
+@@ -265,6 +265,16 @@ Supported chips:
+
+ https://www.ti.com/litv/pdf/sbos686
+
++ * Texas Instruments TMP461
++
++ Prefix: 'tmp461'
++
++ Addresses scanned: I2C 0x48 through 0x4F
++
++ Datasheet: Publicly available at TI website
++
++ https://www.ti.com/lit/gpn/tmp461
++
+ Author: Jean Delvare <jdelvare@suse.de>
+
+
+diff --git a/Documentation/input/joydev/joystick.rst b/Documentation/input/joydev/joystick.rst
+index f615906a0821b..6d721396717a2 100644
+--- a/Documentation/input/joydev/joystick.rst
++++ b/Documentation/input/joydev/joystick.rst
+@@ -517,6 +517,7 @@ All I-Force devices are supported by the iforce module. This includes:
+ * AVB Mag Turbo Force
+ * AVB Top Shot Pegasus
+ * AVB Top Shot Force Feedback Racing Wheel
++* Boeder Force Feedback Wheel
+ * Logitech WingMan Force
+ * Logitech WingMan Force Wheel
+ * Guillemot Race Leader Force Feedback
+diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
+index 90bc3f51eda97..d431718921b79 100644
+--- a/Documentation/kernel-hacking/locking.rst
++++ b/Documentation/kernel-hacking/locking.rst
+@@ -1352,7 +1352,7 @@ Mutex API reference
+ Futex API reference
+ ===================
+
+-.. kernel-doc:: kernel/futex.c
++.. kernel-doc:: kernel/futex/core.c
+ :internal:
+
+ Further reading
+diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst
+index ddada4a537493..4fd7b70fcde19 100644
+--- a/Documentation/locking/locktypes.rst
++++ b/Documentation/locking/locktypes.rst
+@@ -439,11 +439,9 @@ preemption. The following substitution works on both kernels::
+ spin_lock(&p->lock);
+ p->count += this_cpu_read(var2);
+
+-On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable()
+-which makes the above code fully equivalent. On a PREEMPT_RT kernel
+ migrate_disable() ensures that the task is pinned on the current CPU which
+ in turn guarantees that the per-CPU access to var1 and var2 are staying on
+-the same CPU.
++the same CPU while the task remains preemptible.
+
+ The migrate_disable() substitution is not valid for the following
+ scenario::
+@@ -456,9 +454,8 @@ scenario::
+ p = this_cpu_ptr(&var1);
+ p->val = func2();
+
+-While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because
+-here migrate_disable() does not protect against reentrancy from a
+-preempting task. A correct substitution for this case is::
++This breaks because migrate_disable() does not protect against reentrancy from
++a preempting task. A correct substitution for this case is::
+
+ func()
+ {
+diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
+index 60b217b436be6..5b77b9e5ac7e6 100644
+--- a/Documentation/networking/af_xdp.rst
++++ b/Documentation/networking/af_xdp.rst
+@@ -433,6 +433,15 @@ start N bytes into the buffer leaving the first N bytes for the
+ application to use. The final option is the flags field, but it will
+ be dealt with in separate sections for each UMEM flag.
+
++SO_BINDTODEVICE setsockopt
++--------------------------
++
++This is a generic SOL_SOCKET option that can be used to tie AF_XDP
++socket to a particular network interface. It is useful when a socket
++is created by a privileged process and passed to a non-privileged one.
++Once the option is set, kernel will refuse attempts to bind that socket
++to a different interface. Updating the value requires CAP_NET_RAW.
++
+ XDP_STATISTICS getsockopt
+ -------------------------
+
+diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
+index 31cfd7d674a6c..ab98373535ea6 100644
+--- a/Documentation/networking/bonding.rst
++++ b/Documentation/networking/bonding.rst
+@@ -196,11 +196,12 @@ ad_actor_sys_prio
+ ad_actor_system
+
+ In an AD system, this specifies the mac-address for the actor in
+- protocol packet exchanges (LACPDUs). The value cannot be NULL or
+- multicast. It is preferred to have the local-admin bit set for this
+- mac but driver does not enforce it. If the value is not given then
+- system defaults to using the masters' mac address as actors' system
+- address.
++ protocol packet exchanges (LACPDUs). The value cannot be a multicast
++ address. If the all-zeroes MAC is specified, bonding will internally
++ use the MAC of the bond itself. It is preferred to have the
++ local-admin bit set for this mac but driver does not enforce it. If
++ the value is not given then system defaults to using the masters'
++ mac address as actors' system address.
+
+ This parameter has effect only in 802.3ad mode and is available through
+ SysFs interface.
+@@ -421,6 +422,17 @@ arp_all_targets
+ consider the slave up only when all of the arp_ip_targets
+ are reachable
+
++arp_missed_max
++
++ Specifies the number of arp_interval monitor checks that must
++ fail in order for an interface to be marked down by the ARP monitor.
++
++ In order to provide orderly failover semantics, backup interfaces
++ are permitted an extra monitor check (i.e., they must fail
++ arp_missed_max + 1 times before being marked down).
++
++ The default value is 2, and the allowable range is 1 - 255.
++
+ downdelay
+
+ Specifies the time, in milliseconds, to wait before disabling
+diff --git a/Documentation/networking/decnet.rst b/Documentation/networking/decnet.rst
+deleted file mode 100644
+index b8bc11ff8370d..0000000000000
+--- a/Documentation/networking/decnet.rst
++++ /dev/null
+@@ -1,243 +0,0 @@
+-.. SPDX-License-Identifier: GPL-2.0
+-
+-=========================================
+-Linux DECnet Networking Layer Information
+-=========================================
+-
+-1. Other documentation....
+-==========================
+-
+- - Project Home Pages
+- - http://www.chygwyn.com/ - Kernel info
+- - http://linux-decnet.sourceforge.net/ - Userland tools
+- - http://www.sourceforge.net/projects/linux-decnet/ - Status page
+-
+-2. Configuring the kernel
+-=========================
+-
+-Be sure to turn on the following options:
+-
+- - CONFIG_DECNET (obviously)
+- - CONFIG_PROC_FS (to see what's going on)
+- - CONFIG_SYSCTL (for easy configuration)
+-
+-if you want to try out router support (not properly debugged yet)
+-you'll need the following options as well...
+-
+- - CONFIG_DECNET_ROUTER (to be able to add/delete routes)
+- - CONFIG_NETFILTER (will be required for the DECnet routing daemon)
+-
+-Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
+-that you need it, in general you won't and it can cause ifconfig to
+-malfunction.
+-
+-Run time configuration has changed slightly from the 2.4 system. If you
+-want to configure an endnode, then the simplified procedure is as follows:
+-
+- - Set the MAC address on your ethernet card before starting _any_ other
+- network protocols.
+-
+-As soon as your network card is brought into the UP state, DECnet should
+-start working. If you need something more complicated or are unsure how
+-to set the MAC address, see the next section. Also all configurations which
+-worked with 2.4 will work under 2.5 with no change.
+-
+-3. Command line options
+-=======================
+-
+-You can set a DECnet address on the kernel command line for compatibility
+-with the 2.4 configuration procedure, but in general it's not needed any more.
+-If you do st a DECnet address on the command line, it has only one purpose
+-which is that its added to the addresses on the loopback device.
+-
+-With 2.4 kernels, DECnet would only recognise addresses as local if they
+-were added to the loopback device. In 2.5, any local interface address
+-can be used to loop back to the local machine. Of course this does not
+-prevent you adding further addresses to the loopback device if you
+-want to.
+-
+-N.B. Since the address list of an interface determines the addresses for
+-which "hello" messages are sent, if you don't set an address on the loopback
+-interface then you won't see any entries in /proc/net/neigh for the local
+-host until such time as you start a connection. This doesn't affect the
+-operation of the local communications in any other way though.
+-
+-The kernel command line takes options looking like the following::
+-
+- decnet.addr=1,2
+-
+-the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels
+-and early 2.3.xx kernels, you must use a comma when specifying the
+-DECnet address like this. For more recent 2.3.xx kernels, you may
+-use almost any character except space, although a `.` would be the most
+-obvious choice :-)
+-
+-There used to be a third number specifying the node type. This option
+-has gone away in favour of a per interface node type. This is now set
+-using /proc/sys/net/decnet/conf/<dev>/forwarding. This file can be
+-set with a single digit, 0=EndNode, 1=L1 Router and 2=L2 Router.
+-
+-There are also equivalent options for modules. The node address can
+-also be set through the /proc/sys/net/decnet/ files, as can other system
+-parameters.
+-
+-Currently the only supported devices are ethernet and ip_gre. The
+-ethernet address of your ethernet card has to be set according to the DECnet
+-address of the node in order for it to be autoconfigured (and then appear in
+-/proc/net/decnet_dev). There is a utility available at the above
+-FTP sites called dn2ethaddr which can compute the correct ethernet
+-address to use. The address can be set by ifconfig either before or
+-at the time the device is brought up. If you are using RedHat you can
+-add the line::
+-
+- MACADDR=AA:00:04:00:03:04
+-
+-or something similar, to /etc/sysconfig/network-scripts/ifcfg-eth0 or
+-wherever your network card's configuration lives. Setting the MAC address
+-of your ethernet card to an address starting with "hi-ord" will cause a
+-DECnet address which matches to be added to the interface (which you can
+-verify with iproute2).
+-
+-The default device for routing can be set through the /proc filesystem
+-by setting /proc/sys/net/decnet/default_device to the
+-device you want DECnet to route packets out of when no specific route
+-is available. Usually this will be eth0, for example::
+-
+- echo -n "eth0" >/proc/sys/net/decnet/default_device
+-
+-If you don't set the default device, then it will default to the first
+-ethernet card which has been autoconfigured as described above. You can
+-confirm that by looking in the default_device file of course.
+-
+-There is a list of what the other files under /proc/sys/net/decnet/ do
+-on the kernel patch web site (shown above).
+-
+-4. Run time kernel configuration
+-================================
+-
+-
+-This is either done through the sysctl/proc interface (see the kernel web
+-pages for details on what the various options do) or through the iproute2
+-package in the same way as IPv4/6 configuration is performed.
+-
+-Documentation for iproute2 is included with the package, although there is
+-as yet no specific section on DECnet, most of the features apply to both
+-IP and DECnet, albeit with DECnet addresses instead of IP addresses and
+-a reduced functionality.
+-
+-If you want to configure a DECnet router you'll need the iproute2 package
+-since its the _only_ way to add and delete routes currently. Eventually
+-there will be a routing daemon to send and receive routing messages for
+-each interface and update the kernel routing tables accordingly. The
+-routing daemon will use netfilter to listen to routing packets, and
+-rtnetlink to update the kernels routing tables.
+-
+-The DECnet raw socket layer has been removed since it was there purely
+-for use by the routing daemon which will now use netfilter (a much cleaner
+-and more generic solution) instead.
+-
+-5. How can I tell if its working?
+-=================================
+-
+-Here is a quick guide of what to look for in order to know if your DECnet
+-kernel subsystem is working.
+-
+- - Is the node address set (see /proc/sys/net/decnet/node_address)
+- - Is the node of the correct type
+- (see /proc/sys/net/decnet/conf/<dev>/forwarding)
+- - Is the Ethernet MAC address of each Ethernet card set to match
+- the DECnet address. If in doubt use the dn2ethaddr utility available
+- at the ftp archive.
+- - If the previous two steps are satisfied, and the Ethernet card is up,
+- you should find that it is listed in /proc/net/decnet_dev and also
+- that it appears as a directory in /proc/sys/net/decnet/conf/. The
+- loopback device (lo) should also appear and is required to communicate
+- within a node.
+- - If you have any DECnet routers on your network, they should appear
+- in /proc/net/decnet_neigh, otherwise this file will only contain the
+- entry for the node itself (if it doesn't check to see if lo is up).
+- - If you want to send to any node which is not listed in the
+- /proc/net/decnet_neigh file, you'll need to set the default device
+- to point to an Ethernet card with connection to a router. This is
+- again done with the /proc/sys/net/decnet/default_device file.
+- - Try starting a simple server and client, like the dnping/dnmirror
+- over the loopback interface. With luck they should communicate.
+- For this step and those after, you'll need the DECnet library
+- which can be obtained from the above ftp sites as well as the
+- actual utilities themselves.
+- - If this seems to work, then try talking to a node on your local
+- network, and see if you can obtain the same results.
+- - At this point you are on your own... :-)
+-
+-6. How to send a bug report
+-===========================
+-
+-If you've found a bug and want to report it, then there are several things
+-you can do to help me work out exactly what it is that is wrong. Useful
+-information (_most_ of which _is_ _essential_) includes:
+-
+- - What kernel version are you running ?
+- - What version of the patch are you running ?
+- - How far though the above set of tests can you get ?
+- - What is in the /proc/decnet* files and /proc/sys/net/decnet/* files ?
+- - Which services are you running ?
+- - Which client caused the problem ?
+- - How much data was being transferred ?
+- - Was the network congested ?
+- - How can the problem be reproduced ?
+- - Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of
+- tcpdump don't understand how to dump DECnet properly, so including
+- the hex listing of the packet contents is _essential_, usually the -x flag.
+- You may also need to increase the length grabbed with the -s flag. The
+- -e flag also provides very useful information (ethernet MAC addresses))
+-
+-7. MAC FAQ
+-==========
+-
+-A quick FAQ on ethernet MAC addresses to explain how Linux and DECnet
+-interact and how to get the best performance from your hardware.
+-
+-Ethernet cards are designed to normally only pass received network frames
+-to a host computer when they are addressed to it, or to the broadcast address.
+-
+-Linux has an interface which allows the setting of extra addresses for
+-an ethernet card to listen to. If the ethernet card supports it, the
+-filtering operation will be done in hardware, if not the extra unwanted packets
+-received will be discarded by the host computer. In the latter case,
+-significant processor time and bus bandwidth can be used up on a busy
+-network (see the NAPI documentation for a longer explanation of these
+-effects).
+-
+-DECnet makes use of this interface to allow running DECnet on an ethernet
+-card which has already been configured using TCP/IP (presumably using the
+-built in MAC address of the card, as usual) and/or to allow multiple DECnet
+-addresses on each physical interface. If you do this, be aware that if your
+-ethernet card doesn't support perfect hashing in its MAC address filter
+-then your computer will be doing more work than required. Some cards
+-will simply set themselves into promiscuous mode in order to receive
+-packets from the DECnet specified addresses. So if you have one of these
+-cards its better to set the MAC address of the card as described above
+-to gain the best efficiency. Better still is to use a card which supports
+-NAPI as well.
+-
+-
+-8. Mailing list
+-===============
+-
+-If you are keen to get involved in development, or want to ask questions
+-about configuration, or even just report bugs, then there is a mailing
+-list that you can join, details are at:
+-
+-http://sourceforge.net/mail/?group_id=4993
+-
+-9. Legal Info
+-=============
+-
+-The Linux DECnet project team have placed their code under the GPL. The
+-software is provided "as is" and without warranty express or implied.
+-DECnet is a trademark of Compaq. This software is not a product of
+-Compaq. We acknowledge the help of people at Compaq in providing extra
+-documentation above and beyond what was previously publicly available.
+-
+-Steve Whitehouse <SteveW@ACM.org>
+-
+diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
+index f1d5233e5e510..0a233b17c664e 100644
+--- a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
++++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
+@@ -440,6 +440,22 @@ NOTE: For 82599-based network connections, if you are enabling jumbo frames in
+ a virtual function (VF), jumbo frames must first be enabled in the physical
+ function (PF). The VF MTU setting cannot be larger than the PF MTU.
+
++NBASE-T Support
++---------------
++The ixgbe driver supports NBASE-T on some devices. However, the advertisement
++of NBASE-T speeds is suppressed by default, to accommodate broken network
++switches which cannot cope with advertised NBASE-T speeds. Use the ethtool
++command to enable advertising NBASE-T speeds on devices which support it::
++
++ ethtool -s eth? advertise 0x1800000001028
++
++On Linux systems with INTERFACES(5), this can be specified as a pre-up command
++in /etc/network/interfaces so that the interface is always brought up with
++NBASE-T support, e.g.::
++
++ iface eth? inet dhcp
++ pre-up ethtool -s eth? advertise 0x1800000001028 || true
++
+ Generic Receive Offload, aka GRO
+ --------------------------------
+ The driver supports the in-kernel software implementation of GRO. GRO has
+diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
+index 58bc8cd367c67..fdfc73d4c90a8 100644
+--- a/Documentation/networking/index.rst
++++ b/Documentation/networking/index.rst
+@@ -46,7 +46,6 @@ Contents:
+ cdc_mbim
+ dccp
+ dctcp
+- decnet
+ dns_resolver
+ driver
+ eql
+diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
+index d91ab28718d49..7890b395e629b 100644
+--- a/Documentation/networking/ip-sysctl.rst
++++ b/Documentation/networking/ip-sysctl.rst
+@@ -322,6 +322,8 @@ tcp_app_win - INTEGER
+ Reserve max(window/2^tcp_app_win, mss) of window for application
+ buffer. Value 0 is special, it means that nothing is reserved.
+
++ Possible values are [0, 31], inclusive.
++
+ Default: 31
+
+ tcp_autocorking - BOOLEAN
+@@ -1063,7 +1065,7 @@ cipso_cache_enable - BOOLEAN
+ cipso_cache_bucket_size - INTEGER
+ The CIPSO label cache consists of a fixed size hash table with each
+ hash bucket containing a number of cache entries. This variable limits
+- the number of entries in each hash bucket; the larger the value the
++ the number of entries in each hash bucket; the larger the value is, the
+ more CIPSO label mappings that can be cached. When the number of
+ entries in a given hash bucket reaches this limit adding new entries
+ causes the oldest entry in the bucket to be removed to make room.
+@@ -1157,7 +1159,7 @@ ip_autobind_reuse - BOOLEAN
+ option should only be set by experts.
+ Default: 0
+
+-ip_dynaddr - BOOLEAN
++ip_dynaddr - INTEGER
+ If set non-zero, enables support for dynamic addresses.
+ If set to a non-zero value larger than 1, a kernel log
+ message will be printed when dynamic address rewriting
+@@ -2808,7 +2810,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
+ Default: 4K
+
+ sctp_wmem - vector of 3 INTEGERs: min, default, max
+- Currently this tunable has no effect.
++ Only the first value ("min") is used, "default" and "max" are
++ ignored.
++
++ min: Minimum size of send buffer that can be used by SCTP sockets.
++ It is guaranteed to each SCTP socket (but not association) even
++ under moderate memory pressure.
++
++ Default: 4K
+
+ addr_scope_policy - INTEGER
+ Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
+index 2afccc63856ee..1cfbf1add2fc9 100644
+--- a/Documentation/networking/ipvs-sysctl.rst
++++ b/Documentation/networking/ipvs-sysctl.rst
+@@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER
+
+ 0: disable any special handling on port reuse. The new
+ connection will be delivered to the same real server that was
+- servicing the previous connection. This will effectively
+- disable expire_nodest_conn.
++ servicing the previous connection.
+
+ bit 1: enable rescheduling of new connections when it is safe.
+ That is, whenever expire_nodest_conn and for TCP sockets, when
+diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst
+index e899f14a4ba24..43da2cc2e3b9b 100644
+--- a/Documentation/process/code-of-conduct-interpretation.rst
++++ b/Documentation/process/code-of-conduct-interpretation.rst
+@@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're
+ uncertain how to handle situations that come up. It will not be
+ considered a violation report unless you want it to be. If you are
+ uncertain about approaching the TAB or any other maintainers, please
+-reach out to our conflict mediator, Mishi Choudhary <mishi@linux.com>.
++reach out to our conflict mediator, Joanna Lee <jlee@linuxfoundation.org>.
+
+ In the end, "be kind to each other" is really what the end goal is for
+ everybody. We know everyone is human and we all fail at times, but the
+diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
+index 8ced754a5a0f6..f3484f60eae59 100644
+--- a/Documentation/process/deprecated.rst
++++ b/Documentation/process/deprecated.rst
+@@ -70,6 +70,9 @@ Instead, the 2-factor form of the allocator should be used::
+
+ foo = kmalloc_array(count, size, GFP_KERNEL);
+
++Specifically, kmalloc() can be replaced with kmalloc_array(), and
++kzalloc() can be replaced with kcalloc().
++
+ If no 2-factor form is available, the saturate-on-overflow helpers should
+ be used::
+
+@@ -90,9 +93,20 @@ Instead, use the helper::
+ array usage and switch to a `flexible array member
+ <#zero-length-and-one-element-arrays>`_ instead.
+
+-See array_size(), array3_size(), and struct_size(),
+-for more details as well as the related check_add_overflow() and
+-check_mul_overflow() family of functions.
++For other calculations, please compose the use of the size_mul(),
++size_add(), and size_sub() helpers. For example, in the case of::
++
++ foo = krealloc(current_size + chunk_size * (count - 3), GFP_KERNEL);
++
++Instead, use the helpers::
++
++ foo = krealloc(size_add(current_size,
++ size_mul(chunk_size,
++ size_sub(count, 3))), GFP_KERNEL);
++
++For more details, also see array3_size() and flex_array_size(),
++as well as the related check_mul_overflow(), check_add_overflow(),
++check_sub_overflow(), and check_shl_overflow() family of functions.
+
+ simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull()
+ ----------------------------------------------------------------------
+diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
+index 003c865e9c212..fbcb48bc2a903 100644
+--- a/Documentation/process/stable-kernel-rules.rst
++++ b/Documentation/process/stable-kernel-rules.rst
+@@ -168,7 +168,16 @@ Trees
+ - The finalized and tagged releases of all stable kernels can be found
+ in separate branches per version at:
+
+- https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git
++ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
++
++ - The release candidate of all stable kernel versions can be found at:
++
++ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/
++
++ .. warning::
++ The -stable-rc tree is a snapshot in time of the stable-queue tree and
++ will change frequently, hence will be rebased often. It should only be
++ used for testing purposes (e.g. to be consumed by CI systems).
+
+
+ Review committee
+diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
+index 8ad6b93f91e6d..025272139539c 100644
+--- a/Documentation/process/submitting-patches.rst
++++ b/Documentation/process/submitting-patches.rst
+@@ -72,7 +72,7 @@ as you intend it to.
+
+ The maintainer will thank you if you write your patch description in a
+ form which can be easily pulled into Linux's source code management
+-system, ``git``, as a "commit log". See :ref:`explicit_in_reply_to`.
++system, ``git``, as a "commit log". See :ref:`the_canonical_patch_format`.
+
+ Solve only one problem per patch. If your description starts to get
+ long, that's a sign that you probably need to split up your patch.
+diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
+index b7f98930d38d3..a2ec11da38b04 100644
+--- a/Documentation/riscv/vm-layout.rst
++++ b/Documentation/riscv/vm-layout.rst
+@@ -48,7 +48,7 @@ RISC-V Linux Kernel SV39
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan
+- ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap
++ ffffffcefea00000 | -196 GB | ffffffcefeffffff | 6 MB | fixmap
+ ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io
+ ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap
+ ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space
+diff --git a/Documentation/scsi/scsi_mid_low_api.rst b/Documentation/scsi/scsi_mid_low_api.rst
+index 63ddea2b96408..7c06e7fb9a316 100644
+--- a/Documentation/scsi/scsi_mid_low_api.rst
++++ b/Documentation/scsi/scsi_mid_low_api.rst
+@@ -1190,11 +1190,11 @@ Members of interest:
+ - pointer to scsi_device object that this command is
+ associated with.
+ resid
+- - an LLD should set this signed integer to the requested
++ - an LLD should set this unsigned integer to the requested
+ transfer length (i.e. 'request_bufflen') less the number
+ of bytes that are actually transferred. 'resid' is
+ preset to 0 so an LLD can ignore it if it cannot detect
+- underruns (overruns should be rare). If possible an LLD
++ underruns (overruns should not be reported). An LLD
+ should set 'resid' prior to invoking 'done'. The most
+ interesting case is data transfers from a SCSI target
+ device (e.g. READs) that underrun.
+diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst
+index 65f61695f5618..5d093fb4896b5 100644
+--- a/Documentation/sound/alsa-configuration.rst
++++ b/Documentation/sound/alsa-configuration.rst
+@@ -2237,7 +2237,7 @@ implicit_fb
+ Apply the generic implicit feedback sync mode. When this is set
+ and the playback stream sync mode is ASYNC, the driver tries to
+ tie an adjacent ASYNC capture stream as the implicit feedback
+- source.
++ source. This is equivalent with quirk_flags bit 17.
+ use_vmalloc
+ Use vmalloc() for allocations of the PCM buffers (default: yes).
+ For architectures with non-coherent memory like ARM or MIPS, the
+@@ -2279,6 +2279,8 @@ quirk_flags
+ * bit 14: Ignore errors for mixer access
+ * bit 15: Support generic DSD raw U32_BE format
+ * bit 16: Set up the interface at first like UAC1
++ * bit 17: Apply the generic implicit feedback sync mode
++ * bit 18: Don't apply implicit feedback sync mode
+
+ This module supports multiple devices, autoprobe and hotplugging.
+
+diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst
+index 0ea967d345838..1204304500147 100644
+--- a/Documentation/sound/hd-audio/models.rst
++++ b/Documentation/sound/hd-audio/models.rst
+@@ -261,6 +261,10 @@ alc-sense-combo
+ huawei-mbx-stereo
+ Enable initialization verbs for Huawei MBX stereo speakers;
+ might be risky, try this at your own risk
++alc298-samsung-headphone
++ Samsung laptops with ALC298
++alc256-samsung-headphone
++ Samsung laptops with ALC256
+
+ ALC66x/67x/892
+ ==============
+@@ -326,6 +330,8 @@ usi-headset
+ Headset support on USI machines
+ dual-codecs
+ Lenovo laptops with dual codecs
++alc285-hp-amp-init
++ HP laptops which require speaker amplifier initialization (ALC285)
+
+ ALC680
+ ======
+@@ -698,7 +704,7 @@ ref
+ no-jd
+ BIOS setup but without jack-detection
+ intel
+- Intel DG45* mobos
++ Intel D*45* mobos
+ dell-m6-amic
+ Dell desktops/laptops with analog mics
+ dell-m6-dmic
+diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py
+index eeb394b39e2cc..8b416bfd75ac1 100644
+--- a/Documentation/sphinx/load_config.py
++++ b/Documentation/sphinx/load_config.py
+@@ -3,7 +3,7 @@
+
+ import os
+ import sys
+-from sphinx.util.pycompat import execfile_
++from sphinx.util.osutil import fs_encoding
+
+ # ------------------------------------------------------------------------------
+ def loadConfig(namespace):
+@@ -48,7 +48,9 @@ def loadConfig(namespace):
+ sys.stdout.write("load additional sphinx-config: %s\n" % config_file)
+ config = namespace.copy()
+ config['__file__'] = config_file
+- execfile_(config_file, config)
++ with open(config_file, 'rb') as f:
++ code = compile(f.read(), fs_encoding, 'exec')
++ exec(code, config)
+ del config['__file__']
+ namespace.update(config)
+ else:
+diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt
+index 9a35f50798a65..2c573541ab712 100644
+--- a/Documentation/sphinx/requirements.txt
++++ b/Documentation/sphinx/requirements.txt
+@@ -1,2 +1,4 @@
++# jinja2>=3.1 is not compatible with Sphinx<4.0
++jinja2<3.1
+ sphinx_rtd_theme
+ Sphinx==2.4.4
+diff --git a/Documentation/trace/coresight/coresight-config.rst b/Documentation/trace/coresight/coresight-config.rst
+index a4e3ef2952401..6ed13398ca2ce 100644
+--- a/Documentation/trace/coresight/coresight-config.rst
++++ b/Documentation/trace/coresight/coresight-config.rst
+@@ -211,19 +211,13 @@ also declared in the perf 'cs_etm' event infrastructure so that they can
+ be selected when running trace under perf::
+
+ $ ls /sys/devices/cs_etm
+- configurations format perf_event_mux_interval_ms sinks type
+- events nr_addr_filters power
++ cpu0 cpu2 events nr_addr_filters power subsystem uevent
++ cpu1 cpu3 format perf_event_mux_interval_ms sinks type
+
+-Key directories here are 'configurations' - which lists the loaded
+-configurations, and 'events' - a generic perf directory which allows
+-selection on the perf command line.::
++The key directory here is 'events' - a generic perf directory which allows
++selection on the perf command line. As with the sinks entries, this provides
++a hash of the configuration name.
+
+- $ ls configurations/
+- autofdo
+- $ cat configurations/autofdo
+- 0xa7c3dddd
+-
+-As with the sinks entries, this provides a hash of the configuration name.
+ The entry in the 'events' directory uses perfs built in syntax generator
+ to substitute the syntax for the name when evaluating the command::
+
+diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
+index 8ddb9b09451c8..c47f381d0c002 100644
+--- a/Documentation/trace/events.rst
++++ b/Documentation/trace/events.rst
+@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes
+ prev_comm ~ "*sh*"
+ prev_comm ~ "ba*sh"
+
++If the field is a pointer that points into user space (for example
++"filename" from sys_enter_openat), then you have to append ".ustring" to the
++field name::
++
++ filename.ustring ~ "password"
++
++As the kernel will have to know how to retrieve the memory that the pointer
++is at from user space.
++
+ 5.2 Setting filters
+ -------------------
+
+@@ -230,6 +239,16 @@ Currently the caret ('^') for an error always appears at the beginning of
+ the filter string; the error message should still be useful though
+ even without more accurate position info.
+
++5.2.1 Filter limitations
++------------------------
++
++If a filter is placed on a string pointer ``(char *)`` that does not point
++to a string on the ring buffer, but instead points to kernel or user space
++memory, then, for safety reasons, at most 1024 bytes of the content is
++copied onto a temporary buffer to do the compare. If the copy of the memory
++faults (the pointer points to memory that should not be accessed), then the
++string compare will be treated as not matching.
++
+ 5.3 Clearing filters
+ --------------------
+
+diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
+index 4e5b26f03d5b1..d036946bce7ab 100644
+--- a/Documentation/trace/ftrace.rst
++++ b/Documentation/trace/ftrace.rst
+@@ -2929,7 +2929,7 @@ Produces::
+ bash-1994 [000] .... 4342.324898: ima_get_action <-process_measurement
+ bash-1994 [000] .... 4342.324898: ima_match_policy <-ima_get_action
+ bash-1994 [000] .... 4342.324899: do_truncate <-do_last
+- bash-1994 [000] .... 4342.324899: should_remove_suid <-do_truncate
++ bash-1994 [000] .... 4342.324899: setattr_should_drop_suidgid <-do_truncate
+ bash-1994 [000] .... 4342.324899: notify_change <-do_truncate
+ bash-1994 [000] .... 4342.324900: current_fs_time <-notify_change
+ bash-1994 [000] .... 4342.324900: current_kernel_time <-current_fs_time
+diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
+index 533415644c54d..a78350a8fed43 100644
+--- a/Documentation/trace/histogram.rst
++++ b/Documentation/trace/histogram.rst
+@@ -39,7 +39,7 @@ Documentation written by Tom Zanussi
+ will use the event's kernel stacktrace as the key. The keywords
+ 'keys' or 'key' can be used to specify keys, and the keywords
+ 'values', 'vals', or 'val' can be used to specify values. Compound
+- keys consisting of up to two fields can be specified by the 'keys'
++ keys consisting of up to three fields can be specified by the 'keys'
+ keyword. Hashing a compound key produces a unique entry in the
+ table for each unique combination of component keys, and can be
+ useful for providing more fine-grained summaries of event data.
+diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
+index b175d88f31ebb..15e4bfa2bd83c 100644
+--- a/Documentation/trace/kprobetrace.rst
++++ b/Documentation/trace/kprobetrace.rst
+@@ -58,8 +58,8 @@ Synopsis of kprobe_events
+ NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
+ FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
+ (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
+- (x8/x16/x32/x64), "string", "ustring" and bitfield
+- are supported.
++ (x8/x16/x32/x64), "string", "ustring", "symbol", "symstr"
++ and bitfield are supported.
+
+ (\*1) only for the probe on function entry (offs == 0).
+ (\*2) only for return probe.
+@@ -96,6 +96,10 @@ offset, and container-size (usually 32). The syntax is::
+
+ Symbol type('symbol') is an alias of u32 or u64 type (depends on BITS_PER_LONG)
+ which shows given pointer in "symbol+offset" style.
++On the other hand, symbol-string type ('symstr') converts the given address to
++"symbol+offset/symbolsize" style and stores it as a null-terminated string.
++With 'symstr' type, you can filter the event with wildcard pattern of the
++symbols, and you don't need to solve symbol name by yourself.
+ For $comm, the default type is "string"; any other type is invalid.
+
+ .. _user_mem_access:
+diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
+index 1efb8293bf1f0..9d6387e7b083b 100644
+--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
++++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
+@@ -1396,7 +1396,7 @@ Riferimento per l'API dei Mutex
+ Riferimento per l'API dei Futex
+ ===============================
+
+-.. kernel-doc:: kernel/futex.c
++.. kernel-doc:: kernel/futex/core.c
+ :internal:
+
+ Approfondimenti
+diff --git a/Documentation/tty/device_drivers/oxsemi-tornado.rst b/Documentation/tty/device_drivers/oxsemi-tornado.rst
+new file mode 100644
+index 0000000000000..0180d8bb08818
+--- /dev/null
++++ b/Documentation/tty/device_drivers/oxsemi-tornado.rst
+@@ -0,0 +1,129 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++====================================================================
++Notes on Oxford Semiconductor PCIe (Tornado) 950 serial port devices
++====================================================================
++
++Oxford Semiconductor PCIe (Tornado) 950 serial port devices are driven
++by a fixed 62.5MHz clock input derived from the 100MHz PCI Express clock.
++
++The baud rate produced by the baud generator is obtained from this input
++frequency by dividing it by the clock prescaler, which can be set to any
++value from 1 to 63.875 in increments of 0.125, and then the usual 16-bit
++divisor is used as with the original 8250, to divide the frequency by a
++value from 1 to 65535. Finally a programmable oversampling rate is used
++that can take any value from 4 to 16 to divide the frequency further and
++determine the actual baud rate used. Baud rates from 15625000bps down
++to 0.933bps can be obtained this way.
++
++By default the oversampling rate is set to 16 and the clock prescaler is
++set to 33.875, meaning that the frequency to be used as the reference
++for the usual 16-bit divisor is 115313.653, which is close enough to the
++frequency of 115200 used by the original 8250 for the same values to be
++used for the divisor to obtain the requested baud rates by software that
++is unaware of the extra clock controls available.
++
++The oversampling rate is programmed with the TCR register and the clock
++prescaler is programmed with the CPR/CPR2 register pair[1][2][3][4].
++To switch away from the default value of 33.875 for the prescaler the
++the enhanced mode has to be explicitly enabled though, by setting bit 4
++of the EFR. In that mode setting bit 7 in the MCR enables the prescaler
++or otherwise it is bypassed as if the value of 1 was used. Additionally
++writing any value to CPR clears CPR2 for compatibility with old software
++written for older conventional PCI Oxford Semiconductor devices that do
++not have the extra prescaler's 9th bit in CPR2, so the CPR/CPR2 register
++pair has to be programmed in the right order.
++
++By using these parameters rates from 15625000bps down to 1bps can be
++obtained, with either exact or highly-accurate actual bit rates for
++standard and many non-standard rates.
++
++Here are the figures for the standard and some non-standard baud rates
++(including those quoted in Oxford Semiconductor documentation), giving
++the requested rate (r), the actual rate yielded (a) and its deviation
++from the requested rate (d), and the values of the oversampling rate
++(tcr), the clock prescaler (cpr) and the divisor (div) produced by the
++new `get_divisor' handler:
++
++r: 15625000, a: 15625000.00, d: 0.0000%, tcr: 4, cpr: 1.000, div: 1
++r: 12500000, a: 12500000.00, d: 0.0000%, tcr: 5, cpr: 1.000, div: 1
++r: 10416666, a: 10416666.67, d: 0.0000%, tcr: 6, cpr: 1.000, div: 1
++r: 8928571, a: 8928571.43, d: 0.0000%, tcr: 7, cpr: 1.000, div: 1
++r: 7812500, a: 7812500.00, d: 0.0000%, tcr: 8, cpr: 1.000, div: 1
++r: 4000000, a: 4000000.00, d: 0.0000%, tcr: 5, cpr: 3.125, div: 1
++r: 3686400, a: 3676470.59, d: -0.2694%, tcr: 8, cpr: 2.125, div: 1
++r: 3500000, a: 3496503.50, d: -0.0999%, tcr: 13, cpr: 1.375, div: 1
++r: 3000000, a: 2976190.48, d: -0.7937%, tcr: 14, cpr: 1.500, div: 1
++r: 2500000, a: 2500000.00, d: 0.0000%, tcr: 10, cpr: 2.500, div: 1
++r: 2000000, a: 2000000.00, d: 0.0000%, tcr: 10, cpr: 3.125, div: 1
++r: 1843200, a: 1838235.29, d: -0.2694%, tcr: 16, cpr: 2.125, div: 1
++r: 1500000, a: 1492537.31, d: -0.4975%, tcr: 5, cpr: 8.375, div: 1
++r: 1152000, a: 1152073.73, d: 0.0064%, tcr: 14, cpr: 3.875, div: 1
++r: 921600, a: 919117.65, d: -0.2694%, tcr: 16, cpr: 2.125, div: 2
++r: 576000, a: 576036.87, d: 0.0064%, tcr: 14, cpr: 3.875, div: 2
++r: 460800, a: 460829.49, d: 0.0064%, tcr: 7, cpr: 3.875, div: 5
++r: 230400, a: 230414.75, d: 0.0064%, tcr: 14, cpr: 3.875, div: 5
++r: 115200, a: 115207.37, d: 0.0064%, tcr: 14, cpr: 1.250, div: 31
++r: 57600, a: 57603.69, d: 0.0064%, tcr: 8, cpr: 3.875, div: 35
++r: 38400, a: 38402.46, d: 0.0064%, tcr: 14, cpr: 3.875, div: 30
++r: 19200, a: 19201.23, d: 0.0064%, tcr: 8, cpr: 3.875, div: 105
++r: 9600, a: 9600.06, d: 0.0006%, tcr: 9, cpr: 1.125, div: 643
++r: 4800, a: 4799.98, d: -0.0004%, tcr: 7, cpr: 2.875, div: 647
++r: 2400, a: 2400.02, d: 0.0008%, tcr: 9, cpr: 2.250, div: 1286
++r: 1200, a: 1200.00, d: 0.0000%, tcr: 14, cpr: 2.875, div: 1294
++r: 300, a: 300.00, d: 0.0000%, tcr: 11, cpr: 2.625, div: 7215
++r: 200, a: 200.00, d: 0.0000%, tcr: 16, cpr: 1.250, div: 15625
++r: 150, a: 150.00, d: 0.0000%, tcr: 13, cpr: 2.250, div: 14245
++r: 134, a: 134.00, d: 0.0000%, tcr: 11, cpr: 2.625, div: 16153
++r: 110, a: 110.00, d: 0.0000%, tcr: 12, cpr: 1.000, div: 47348
++r: 75, a: 75.00, d: 0.0000%, tcr: 4, cpr: 5.875, div: 35461
++r: 50, a: 50.00, d: 0.0000%, tcr: 16, cpr: 1.250, div: 62500
++r: 25, a: 25.00, d: 0.0000%, tcr: 16, cpr: 2.500, div: 62500
++r: 4, a: 4.00, d: 0.0000%, tcr: 16, cpr: 20.000, div: 48828
++r: 2, a: 2.00, d: 0.0000%, tcr: 16, cpr: 40.000, div: 48828
++r: 1, a: 1.00, d: 0.0000%, tcr: 16, cpr: 63.875, div: 61154
++
++With the baud base set to 15625000 and the unsigned 16-bit UART_DIV_MAX
++limitation imposed by `serial8250_get_baud_rate' standard baud rates
++below 300bps become unavailable in the regular way, e.g. the rate of
++200bps requires the baud base to be divided by 78125 and that is beyond
++the unsigned 16-bit range. The historic spd_cust feature can still be
++used by encoding the values for, the prescaler, the oversampling rate
++and the clock divisor (DLM/DLL) as follows to obtain such rates if so
++required:
++
++ 31 29 28 20 19 16 15 0
+++-----+-----------------+-------+-------------------------------+
++|0 0 0| CPR2:CPR | TCR | DLM:DLL |
+++-----+-----------------+-------+-------------------------------+
++
++Use a value such encoded for the `custom_divisor' field along with the
++ASYNC_SPD_CUST flag set in the `flags' field in `struct serial_struct'
++passed with the TIOCSSERIAL ioctl(2), such as with the setserial(8)
++utility and its `divisor' and `spd_cust' parameters, and the select
++the baud rate of 38400bps. Note that the value of 0 in TCR sets the
++oversampling rate to 16 and prescaler values below 1 in CPR2/CPR are
++clamped by the driver to 1.
++
++For example the value of 0x1f4004e2 will set CPR2/CPR, TCR and DLM/DLL
++respectively to 0x1f4, 0x0 and 0x04e2, choosing the prescaler value,
++the oversampling rate and the clock divisor of 62.500, 16 and 1250
++respectively. These parameters will set the baud rate for the serial
++port to 62500000 / 62.500 / 1250 / 16 = 50bps.
++
++References:
++
++[1] "OXPCIe200 PCI Express Multi-Port Bridge", Oxford Semiconductor,
++ Inc., DS-0045, 10 Nov 2008, Section "950 Mode", pp. 64-65
++
++[2] "OXPCIe952 PCI Express Bridge to Dual Serial & Parallel Port",
++ Oxford Semiconductor, Inc., DS-0046, Mar 06 08, Section "950 Mode",
++ p. 20
++
++[3] "OXPCIe954 PCI Express Bridge to Quad Serial Port", Oxford
++ Semiconductor, Inc., DS-0047, Feb 08, Section "950 Mode", p. 20
++
++[4] "OXPCIe958 PCI Express Bridge to Octal Serial Port", Oxford
++ Semiconductor, Inc., DS-0048, Feb 08, Section "950 Mode", p. 20
++
++Maciej W. Rozycki <macro@orcam.me.uk>
+diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
+index 6655d929a3518..404ecb6d0f87f 100644
+--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
++++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
+@@ -304,7 +304,6 @@ Code Seq# Include File Comments
+ 0x89 00-06 arch/x86/include/asm/sockios.h
+ 0x89 0B-DF linux/sockios.h
+ 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
+-0x89 E0-EF linux/dn.h PROTOPRIVATE range
+ 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
+ 0x8B all linux/wireless.h
+ 0x8C 00-3F WiNRADiO driver
+diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
+index f35552ff19ba8..b68e7a51009f8 100644
+--- a/Documentation/userspace-api/landlock.rst
++++ b/Documentation/userspace-api/landlock.rst
+@@ -267,8 +267,8 @@ restrict such paths with dedicated ruleset flags.
+ Ruleset layers
+ --------------
+
+-There is a limit of 64 layers of stacked rulesets. This can be an issue for a
+-task willing to enforce a new ruleset in complement to its 64 inherited
++There is a limit of 16 layers of stacked rulesets. This can be an issue for a
++task willing to enforce a new ruleset in complement to its 16 inherited
+ rulesets. Once this limit is reached, sys_landlock_restrict_self() returns
+ E2BIG. It is then strongly suggested to carefully build rulesets once in the
+ life of a thread, especially for applications able to launch other applications
+diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+index 976d34445a246..f1421cf1a1b31 100644
+--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
++++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+@@ -3326,15 +3326,15 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
+ * - __u8
+ - ``poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+ - PocStCurrBefore as described in section 8.3.2 "Decoding process for reference
+- picture set.
++ picture set": provides the index of the short term before references in DPB array.
+ * - __u8
+ - ``poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+ - PocStCurrAfter as described in section 8.3.2 "Decoding process for reference
+- picture set.
++ picture set": provides the index of the short term after references in DPB array.
+ * - __u8
+ - ``poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+ - PocLtCurr as described in section 8.3.2 "Decoding process for reference
+- picture set.
++ picture set": provides the index of the long term references in DPB array.
+ * - __u64
+ - ``flags``
+ - See :ref:`Decode Parameters Flags <hevc_decode_params_flags>`
+diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
+index a6729c8cf0634..ec38299f9428a 100644
+--- a/Documentation/virt/kvm/api.rst
++++ b/Documentation/virt/kvm/api.rst
+@@ -4117,6 +4117,18 @@ not holding a previously reported uncorrected error).
+ :Parameters: struct kvm_s390_cmma_log (in, out)
+ :Returns: 0 on success, a negative value on error
+
++Errors:
++
++ ====== =============================================================
++ ENOMEM not enough memory can be allocated to complete the task
++ ENXIO if CMMA is not enabled
++ EINVAL if KVM_S390_CMMA_PEEK is not set but migration mode was not enabled
++ EINVAL if KVM_S390_CMMA_PEEK is not set but dirty tracking has been
++ disabled (and thus migration mode was automatically disabled)
++ EFAULT if the userspace address is invalid or if no page table is
++ present for the addresses (e.g. when using hugepages).
++ ====== =============================================================
++
+ This ioctl is used to get the values of the CMMA bits on the s390
+ architecture. It is meant to be used in two scenarios:
+
+@@ -4197,12 +4209,6 @@ mask is unused.
+
+ values points to the userspace buffer where the result will be stored.
+
+-This ioctl can fail with -ENOMEM if not enough memory can be allocated to
+-complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
+-KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
+--EFAULT if the userspace address is invalid or if no page table is
+-present for the addresses (e.g. when using hugepages).
+-
+ 4.108 KVM_S390_SET_CMMA_BITS
+ ----------------------------
+
+@@ -7265,3 +7271,63 @@ The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
+ of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace
+ the hypercalls whose corresponding bit is in the argument, and return
+ ENOSYS for the others.
++
++9. Known KVM API problems
++=========================
++
++In some cases, KVM's API has some inconsistencies or common pitfalls
++that userspace need to be aware of. This section details some of
++these issues.
++
++Most of them are architecture specific, so the section is split by
++architecture.
++
++9.1. x86
++--------
++
++``KVM_GET_SUPPORTED_CPUID`` issues
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++In general, ``KVM_GET_SUPPORTED_CPUID`` is designed so that it is possible
++to take its result and pass it directly to ``KVM_SET_CPUID2``. This section
++documents some cases in which that requires some care.
++
++Local APIC features
++~~~~~~~~~~~~~~~~~~~
++
++CPU[EAX=1]:ECX[21] (X2APIC) is reported by ``KVM_GET_SUPPORTED_CPUID``,
++but it can only be enabled if ``KVM_CREATE_IRQCHIP`` or
++``KVM_ENABLE_CAP(KVM_CAP_IRQCHIP_SPLIT)`` are used to enable in-kernel emulation of
++the local APIC.
++
++The same is true for the ``KVM_FEATURE_PV_UNHALT`` paravirtualized feature.
++
++CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``.
++It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
++has enabled in-kernel emulation of the local APIC.
++
++CPU topology
++~~~~~~~~~~~~
++
++Several CPUID values include topology information for the host CPU:
++0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different
++versions of KVM return different values for this information and userspace
++should not rely on it. Currently they return all zeroes.
++
++If userspace wishes to set up a guest topology, it should be careful that
++the values of these three leaves differ for each CPU. In particular,
++the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX
++for 0x8000001e; the latter also encodes the core id and node id in bits
++7:0 of EBX and ECX respectively.
++
++Obsolete ioctls and capabilities
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++KVM_CAP_DISABLE_QUIRKS does not let userspace know which quirks are actually
++available. Use ``KVM_CHECK_EXTENSION(KVM_CAP_DISABLE_QUIRKS2)`` instead if
++available.
++
++Ordering of KVM_GET_*/KVM_SET_* ioctls
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++TBD
+diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst
+index 0aa5b1cfd700c..147efec626e52 100644
+--- a/Documentation/virt/kvm/devices/vm.rst
++++ b/Documentation/virt/kvm/devices/vm.rst
+@@ -215,6 +215,7 @@ KVM_S390_VM_TOD_EXT).
+ :Parameters: address of a buffer in user space to store the data (u8) to
+ :Returns: -EFAULT if the given address is not accessible from kernel space;
+ -EINVAL if setting the TOD clock extension to != 0 is not supported
++ -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor)
+
+ 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
+ -----------------------------------
+@@ -224,6 +225,7 @@ the POP (u64).
+
+ :Parameters: address of a buffer in user space to store the data (u64) to
+ :Returns: -EFAULT if the given address is not accessible from kernel space
++ -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor)
+
+ 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
+ -----------------------------------
+@@ -237,6 +239,7 @@ it, it is stored as 0 and not allowed to be set to a value != 0.
+ (kvm_s390_vm_tod_clock) to
+ :Returns: -EFAULT if the given address is not accessible from kernel space;
+ -EINVAL if setting the TOD clock extension to != 0 is not supported
++ -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor)
+
+ 4. GROUP: KVM_S390_VM_CRYPTO
+ ============================
+@@ -299,6 +302,10 @@ Allows userspace to start migration mode, needed for PGSTE migration.
+ Setting this attribute when migration mode is already active will have
+ no effects.
+
++Dirty tracking must be enabled on all memslots, else -EINVAL is returned. When
++dirty tracking is disabled on any memslot, migration mode is automatically
++stopped.
++
+ :Parameters: none
+ :Returns: -ENOMEM if there is not enough free memory to start migration mode;
+ -EINVAL if the state of the VM is invalid (e.g. no memory defined);
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 3b79fd441dde8..9216b9c85ce92 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -434,6 +434,7 @@ ACPI VIOT DRIVER
+ M: Jean-Philippe Brucker <jean-philippe@linaro.org>
+ L: linux-acpi@vger.kernel.org
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Maintained
+ F: drivers/acpi/viot.c
+ F: include/linux/acpi_viot.h
+@@ -941,6 +942,7 @@ AMD IOMMU (AMD-VI)
+ M: Joerg Roedel <joro@8bytes.org>
+ R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Maintained
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+ F: drivers/iommu/amd/
+@@ -1248,7 +1250,7 @@ APEX EMBEDDED SYSTEMS STX104 IIO DRIVER
+ M: William Breathitt Gray <vilhelm.gray@gmail.com>
+ L: linux-iio@vger.kernel.org
+ S: Maintained
+-F: drivers/iio/adc/stx104.c
++F: drivers/iio/addac/stx104.c
+
+ APM DRIVER
+ M: Jiri Kosina <jikos@kernel.org>
+@@ -3112,7 +3114,7 @@ F: drivers/net/ieee802154/atusb.h
+ AUDIT SUBSYSTEM
+ M: Paul Moore <paul@paul-moore.com>
+ M: Eric Paris <eparis@redhat.com>
+-L: linux-audit@redhat.com (moderated for non-subscribers)
++L: audit@vger.kernel.org
+ S: Supported
+ W: https://github.com/linux-audit
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git
+@@ -3405,6 +3407,7 @@ F: net/sched/act_bpf.c
+ F: net/sched/cls_bpf.c
+ F: samples/bpf/
+ F: scripts/bpf_doc.py
++F: scripts/pahole-version.sh
+ F: tools/bpf/
+ F: tools/lib/bpf/
+ F: tools/testing/selftests/bpf/
+@@ -4663,6 +4666,7 @@ T: git git://git.samba.org/sfrench/cifs-2.6.git
+ F: Documentation/admin-guide/cifs/
+ F: fs/cifs/
+ F: fs/smbfs_common/
++F: include/uapi/linux/cifs
+
+ COMPACTPCI HOTPLUG CORE
+ M: Scott Murray <scott@spiteful.org>
+@@ -4810,7 +4814,6 @@ F: Documentation/ABI/testing/sysfs-bus-counter
+ F: Documentation/driver-api/generic-counter.rst
+ F: drivers/counter/
+ F: include/linux/counter.h
+-F: include/linux/counter_enum.h
+
+ CP2615 I2C DRIVER
+ M: Bence Csókás <bence98@sch.bme.hu>
+@@ -5203,13 +5206,6 @@ F: include/linux/tfrc.h
+ F: include/uapi/linux/dccp.h
+ F: net/dccp/
+
+-DECnet NETWORK LAYER
+-L: linux-decnet-user@lists.sourceforge.net
+-S: Orphan
+-W: http://linux-decnet.sourceforge.net
+-F: Documentation/networking/decnet.rst
+-F: net/decnet/
+-
+ DECSTATION PLATFORM SUPPORT
+ M: "Maciej W. Rozycki" <macro@orcam.me.uk>
+ L: linux-mips@vger.kernel.org
+@@ -5602,6 +5598,7 @@ M: Christoph Hellwig <hch@lst.de>
+ M: Marek Szyprowski <m.szyprowski@samsung.com>
+ R: Robin Murphy <robin.murphy@arm.com>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Supported
+ W: http://git.infradead.org/users/hch/dma-mapping.git
+ T: git git://git.infradead.org/users/hch/dma-mapping.git
+@@ -5614,6 +5611,7 @@ F: kernel/dma/
+ DMA MAPPING BENCHMARK
+ M: Barry Song <song.bao.hua@hisilicon.com>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ F: kernel/dma/map_benchmark.c
+ F: tools/testing/selftests/dma/
+
+@@ -7024,7 +7022,6 @@ F: drivers/net/mdio/fwnode_mdio.c
+ F: drivers/net/mdio/of_mdio.c
+ F: drivers/net/pcs/
+ F: drivers/net/phy/
+-F: drivers/of/of_net.c
+ F: include/dt-bindings/net/qca-ar803x.h
+ F: include/linux/*mdio*.h
+ F: include/linux/mdio/*.h
+@@ -7036,6 +7033,7 @@ F: include/linux/platform_data/mdio-gpio.h
+ F: include/trace/events/mdio.h
+ F: include/uapi/linux/mdio.h
+ F: include/uapi/linux/mii.h
++F: net/core/of_net.c
+
+ EXFAT FILE SYSTEM
+ M: Namjae Jeon <linkinjeon@kernel.org>
+@@ -7115,6 +7113,7 @@ F: drivers/gpu/drm/exynos/exynos_dp*
+ EXYNOS SYSMMU (IOMMU) driver
+ M: Marek Szyprowski <m.szyprowski@samsung.com>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Maintained
+ F: drivers/iommu/exynos-iommu.c
+
+@@ -7239,9 +7238,6 @@ F: include/linux/fs.h
+ F: include/linux/fs_types.h
+ F: include/uapi/linux/fs.h
+ F: include/uapi/linux/openat2.h
+-X: fs/io-wq.c
+-X: fs/io-wq.h
+-X: fs/io_uring.c
+
+ FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
+ M: Riku Voipio <riku.voipio@iki.fi>
+@@ -7744,7 +7740,7 @@ F: Documentation/locking/*futex*
+ F: include/asm-generic/futex.h
+ F: include/linux/futex.h
+ F: include/uapi/linux/futex.h
+-F: kernel/futex.c
++F: kernel/futex/*
+ F: tools/perf/bench/futex*
+ F: tools/testing/selftests/futex/
+
+@@ -7947,9 +7943,10 @@ F: drivers/media/usb/go7007/
+
+ GOODIX TOUCHSCREEN
+ M: Bastien Nocera <hadess@hadess.net>
++M: Hans de Goede <hdegoede@redhat.com>
+ L: linux-input@vger.kernel.org
+ S: Maintained
+-F: drivers/input/touchscreen/goodix.c
++F: drivers/input/touchscreen/goodix*
+
+ GOOGLE ETHERNET DRIVERS
+ M: Jeroen de Borst <jeroendb@google.com>
+@@ -8565,7 +8562,7 @@ F: drivers/net/wireless/intersil/hostap/
+ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
+ L: platform-driver-x86@vger.kernel.org
+ S: Orphan
+-F: drivers/platform/x86/tc1100-wmi.c
++F: drivers/platform/x86/hp/tc1100-wmi.c
+
+ HPET: High Precision Event Timers driver
+ M: Clemens Ladisch <clemens@ladisch.de>
+@@ -9457,6 +9454,7 @@ INTEL IOMMU (VT-d)
+ M: David Woodhouse <dwmw2@infradead.org>
+ M: Lu Baolu <baolu.lu@linux.intel.com>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Supported
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+ F: drivers/iommu/intel/
+@@ -9793,6 +9791,7 @@ IOMMU DRIVERS
+ M: Joerg Roedel <joro@8bytes.org>
+ M: Will Deacon <will@kernel.org>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Maintained
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+ F: Documentation/devicetree/bindings/iommu/
+@@ -9810,9 +9809,7 @@ L: io-uring@vger.kernel.org
+ S: Maintained
+ T: git git://git.kernel.dk/linux-block
+ T: git git://git.kernel.dk/liburing
+-F: fs/io-wq.c
+-F: fs/io-wq.h
+-F: fs/io_uring.c
++F: io_uring/
+ F: include/linux/io_uring.h
+ F: include/uapi/linux/io_uring.h
+ F: tools/io_uring/
+@@ -10835,7 +10832,7 @@ M: Eric Piel <eric.piel@tremplin-utc.net>
+ S: Maintained
+ F: Documentation/misc-devices/lis3lv02d.rst
+ F: drivers/misc/lis3lv02d/
+-F: drivers/platform/x86/hp_accel.c
++F: drivers/platform/x86/hp/hp_accel.c
+
+ LIST KUNIT TEST
+ M: David Gow <davidgow@google.com>
+@@ -11795,6 +11792,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c
+ MEDIATEK IOMMU DRIVER
+ M: Yong Wu <yong.wu@mediatek.com>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+ S: Supported
+ F: Documentation/devicetree/bindings/iommu/mediatek*
+@@ -15554,6 +15552,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c
+ QUALCOMM IOMMU
+ M: Rob Clark <robdclark@gmail.com>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ L: linux-arm-msm@vger.kernel.org
+ S: Maintained
+ F: drivers/iommu/arm/arm-smmu/qcom_iommu.c
+@@ -15720,6 +15719,8 @@ F: arch/mips/generic/board-ranchu.c
+
+ RANDOM NUMBER DRIVER
+ M: "Theodore Ts'o" <tytso@mit.edu>
++M: Jason A. Donenfeld <Jason@zx2c4.com>
++T: git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
+ S: Maintained
+ F: drivers/char/random.c
+
+@@ -17980,6 +17981,7 @@ F: arch/x86/boot/video*
+ SWIOTLB SUBSYSTEM
+ M: Christoph Hellwig <hch@infradead.org>
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Supported
+ W: http://git.infradead.org/users/hch/dma-mapping.git
+ T: git git://git.infradead.org/users/hch/dma-mapping.git
+@@ -20560,12 +20562,14 @@ M: Juergen Gross <jgross@suse.com>
+ M: Stefano Stabellini <sstabellini@kernel.org>
+ L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
+ L: iommu@lists.linux-foundation.org
++L: iommu@lists.linux.dev
+ S: Supported
+ F: arch/x86/xen/*swiotlb*
+ F: drivers/xen/*swiotlb*
+
+ XFS FILESYSTEM
+ C: irc://irc.oftc.net/xfs
++M: Leah Rumancik <leah.rumancik@gmail.com>
+ M: Darrick J. Wong <djwong@kernel.org>
+ M: linux-xfs@vger.kernel.org
+ L: linux-xfs@vger.kernel.org
+diff --git a/Makefile b/Makefile
+index ed6e7ec60eff6..b2ff07a0176be 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 15
+-SUBLEVEL = 0
++SUBLEVEL = 132
+ EXTRAVERSION =
+ NAME = Trick or Treat
+
+@@ -93,10 +93,17 @@ endif
+
+ # If the user is running make -s (silent mode), suppress echoing of
+ # commands
++# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
+
+-ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)
+- quiet=silent_
+- KBUILD_VERBOSE = 0
++ifeq ($(filter 3.%,$(MAKE_VERSION)),)
++silence:=$(findstring s,$(firstword -$(MAKEFLAGS)))
++else
++silence:=$(findstring s,$(filter-out --%,$(MAKEFLAGS)))
++endif
++
++ifeq ($(silence),s)
++quiet=silent_
++KBUILD_VERBOSE = 0
+ endif
+
+ export quiet Q KBUILD_VERBOSE
+@@ -430,6 +437,7 @@ else
+ HOSTCC = gcc
+ HOSTCXX = g++
+ endif
++HOSTPKG_CONFIG = pkg-config
+
+ export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
+ -O2 -fomit-frame-pointer -std=gnu89
+@@ -480,6 +488,8 @@ LZ4 = lz4c
+ XZ = xz
+ ZSTD = zstd
+
++PAHOLE_FLAGS = $(shell PAHOLE=$(PAHOLE) $(srctree)/scripts/pahole-flags.sh)
++
+ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
+ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF)
+ NOSTDINC_FLAGS :=
+@@ -523,7 +533,7 @@ KBUILD_LDFLAGS_MODULE :=
+ KBUILD_LDFLAGS :=
+ CLANG_FLAGS :=
+
+-export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
++export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG
+ export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
+ export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
+ export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
+@@ -534,6 +544,7 @@ export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
+ export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
+ export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
+ export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
++export PAHOLE_FLAGS
+
+ # Files to ignore in find ... statements
+
+@@ -687,12 +698,19 @@ endif
+
+ ifdef CONFIG_CC_IS_GCC
+ RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
++RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix)
+ RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
+ endif
+ ifdef CONFIG_CC_IS_CLANG
+ RETPOLINE_CFLAGS := -mretpoline-external-thunk
+ RETPOLINE_VDSO_CFLAGS := -mretpoline
+ endif
++
++ifdef CONFIG_RETHUNK
++RETHUNK_CFLAGS := -mfunction-return=thunk-extern
++RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
++endif
++
+ export RETPOLINE_CFLAGS
+ export RETPOLINE_VDSO_CFLAGS
+
+@@ -811,6 +829,9 @@ endif
+ KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+ KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+
++# These result in bogus false positives
++KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
++
+ ifdef CONFIG_FRAME_POINTER
+ KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
+ else
+@@ -831,12 +852,12 @@ endif
+
+ # Initialize all stack variables with a zero value.
+ ifdef CONFIG_INIT_STACK_ALL_ZERO
+-# Future support for zero initialization is still being debated, see
+-# https://bugs.llvm.org/show_bug.cgi?id=45497. These flags are subject to being
+-# renamed or dropped.
+ KBUILD_CFLAGS += -ftrivial-auto-var-init=zero
++ifdef CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER
++# https://github.com/llvm/llvm-project/issues/44842
+ KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang
+ endif
++endif
+
+ # While VLAs have been removed, GCC produces unreachable stack probes
+ # for the randomize_kstack_offset feature. Disable it for all compilers.
+@@ -857,7 +878,9 @@ else
+ DEBUG_CFLAGS += -g
+ endif
+
+-ifndef CONFIG_AS_IS_LLVM
++ifdef CONFIG_AS_IS_LLVM
++KBUILD_AFLAGS += -g
++else
+ KBUILD_AFLAGS += -Wa,-gdwarf-2
+ endif
+
+@@ -865,6 +888,7 @@ ifndef CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
+ dwarf-version-$(CONFIG_DEBUG_INFO_DWARF4) := 4
+ dwarf-version-$(CONFIG_DEBUG_INFO_DWARF5) := 5
+ DEBUG_CFLAGS += -gdwarf-$(dwarf-version-y)
++KBUILD_AFLAGS += -gdwarf-$(dwarf-version-y)
+ endif
+
+ ifdef CONFIG_DEBUG_INFO_REDUCED
+@@ -1008,6 +1032,21 @@ ifdef CONFIG_CC_IS_GCC
+ KBUILD_CFLAGS += -Wno-maybe-uninitialized
+ endif
+
++ifdef CONFIG_CC_IS_GCC
++# The allocators already balk at large sizes, so silence the compiler
++# warnings for bounds checks involving those possible values. While
++# -Wno-alloc-size-larger-than would normally be used here, earlier versions
++# of gcc (<9.1) weirdly don't handle the option correctly when _other_
++# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
++# doesn't work (as it is documented to), silently resolving to "0" prior to
++# version 9.1 (and producing an error more recently). Numeric values larger
++# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
++# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
++# choice, we must perform a versioned check to disable this warning.
++# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
++KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0901, -Wno-alloc-size-larger-than)
++endif
++
+ # disable invalid "can't wrap" optimizations for signed / pointers
+ KBUILD_CFLAGS += -fno-strict-overflow
+
+@@ -1053,6 +1092,11 @@ KBUILD_CFLAGS += $(KCFLAGS)
+ KBUILD_LDFLAGS_MODULE += --build-id=sha1
+ LDFLAGS_vmlinux += --build-id=sha1
+
++KBUILD_LDFLAGS += -z noexecstack
++ifeq ($(CONFIG_LD_IS_BFD),y)
++KBUILD_LDFLAGS += $(call ld-option,--no-warn-rwx-segments)
++endif
++
+ ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
+ LDFLAGS_vmlinux += $(call ld-option, -X,)
+ endif
+@@ -1115,7 +1159,9 @@ export MODORDER := $(extmod_prefix)modules.order
+ export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps
+
+ ifeq ($(KBUILD_EXTMOD),)
+-core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
++core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/
++core-$(CONFIG_BLOCK) += block/
++core-$(CONFIG_IO_URING) += io_uring/
+
+ vmlinux-dirs := $(patsubst %/,%,$(filter %/, \
+ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
+@@ -1125,13 +1171,11 @@ vmlinux-alldirs := $(sort $(vmlinux-dirs) Documentation \
+ $(patsubst %/,%,$(filter %/, $(core-) \
+ $(drivers-) $(libs-))))
+
+-subdir-modorder := $(addsuffix modules.order,$(filter %/, \
+- $(core-y) $(core-m) $(libs-y) $(libs-m) \
+- $(drivers-y) $(drivers-m)))
+-
+ build-dirs := $(vmlinux-dirs)
+ clean-dirs := $(vmlinux-alldirs)
+
++subdir-modorder := $(addsuffix /modules.order, $(build-dirs))
++
+ # Externally visible symbols (used by link-vmlinux.sh)
+ KBUILD_VMLINUX_OBJS := $(head-y) $(patsubst %/,%/built-in.a, $(core-y))
+ KBUILD_VMLINUX_OBJS += $(addsuffix built-in.a, $(filter %/, $(libs-y)))
+@@ -1160,7 +1204,7 @@ KBUILD_MODULES := 1
+
+ autoksyms_recursive: descend modules.order
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
+- "$(MAKE) -f $(srctree)/Makefile vmlinux"
++ "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive"
+ endif
+
+ autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h)
+@@ -1301,8 +1345,7 @@ hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
+
+ PHONY += headers
+ headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
+- $(if $(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/Kbuild),, \
+- $(error Headers not exportable for the $(SRCARCH) architecture))
++ $(if $(filter um, $(SRCARCH)), $(error Headers not exportable for UML))
+ $(Q)$(MAKE) $(hdr-inst)=include/uapi
+ $(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi
+
+@@ -1792,7 +1835,9 @@ quiet_cmd_depmod = DEPMOD $(MODLIB)
+
+ modules_install:
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
++ifndef modules_sign_only
+ $(call cmd,depmod)
++endif
+
+ else # CONFIG_MODULES
+
+@@ -1807,6 +1852,8 @@ modules modules_install:
+ @echo >&2 '***'
+ @exit 1
+
++KBUILD_MODULES :=
++
+ endif # CONFIG_MODULES
+
+ # Single targets
+@@ -1832,18 +1879,12 @@ $(single-ko): single_modpost
+ $(single-no-ko): descend
+ @:
+
+-ifeq ($(KBUILD_EXTMOD),)
+-# For the single build of in-tree modules, use a temporary file to avoid
+-# the situation of modules_install installing an invalid modules.order.
+-MODORDER := .modules.tmp
+-endif
+-
++# Remove MODORDER when done because it is not the real one.
+ PHONY += single_modpost
+ single_modpost: $(single-no-ko) modules_prepare
+ $(Q){ $(foreach m, $(single-ko), echo $(extmod_prefix)$m;) } > $(MODORDER)
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+-
+-KBUILD_MODULES := 1
++ $(Q)rm -f $(MODORDER)
+
+ export KBUILD_SINGLE_TARGETS := $(addprefix $(extmod_prefix), $(single-no-ko))
+
+@@ -1851,10 +1892,8 @@ export KBUILD_SINGLE_TARGETS := $(addprefix $(extmod_prefix), $(single-no-ko))
+ build-dirs := $(foreach d, $(build-dirs), \
+ $(if $(filter $(d)/%, $(KBUILD_SINGLE_TARGETS)), $(d)))
+
+-endif
++KBUILD_MODULES := 1
+
+-ifndef CONFIG_MODULES
+-KBUILD_MODULES :=
+ endif
+
+ # Handle descending into subdirectories listed in $(build-dirs)
+diff --git a/arch/Kconfig b/arch/Kconfig
+index 8df1c71026435..b45c699c2bac3 100644
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -200,6 +200,9 @@ config HAVE_NMI
+ config TRACE_IRQFLAGS_SUPPORT
+ bool
+
++config TRACE_IRQFLAGS_NMI_SUPPORT
++ bool
++
+ #
+ # An arch should select this if it provides all these things:
+ #
+@@ -261,6 +264,9 @@ config ARCH_HAS_DMA_SET_UNCACHED
+ config ARCH_HAS_DMA_CLEAR_UNCACHED
+ bool
+
++config ARCH_HAS_CPU_FINALIZE_INIT
++ bool
++
+ # Select if arch init_task must go in the __init_task_data section
+ config ARCH_TASK_STRUCT_ON_STACK
+ bool
+@@ -1141,6 +1147,7 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+ config RANDOMIZE_KSTACK_OFFSET_DEFAULT
+ bool "Randomize kernel stack offset on syscall entry"
+ depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
++ depends on INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION >= 140000
+ help
+ The kernel stack offset can be randomized (after pt_regs) by
+ roughly 5 bits of entropy, frustrating memory corruption
+@@ -1234,6 +1241,9 @@ config RELR
+ config ARCH_HAS_MEM_ENCRYPT
+ bool
+
++config ARCH_HAS_CC_PLATFORM
++ bool
++
+ config HAVE_SPARSE_SYSCALL_NR
+ bool
+ help
+diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
+index 08b430d25a315..7cf92d172dce9 100644
+--- a/arch/alpha/boot/tools/objstrip.c
++++ b/arch/alpha/boot/tools/objstrip.c
+@@ -148,7 +148,7 @@ main (int argc, char *argv[])
+ #ifdef __ELF__
+ elf = (struct elfhdr *) buf;
+
+- if (elf->e_ident[0] == 0x7f && str_has_prefix((char *)elf->e_ident + 1, "ELF")) {
++ if (memcmp(&elf->e_ident[EI_MAG0], ELFMAG, SELFMAG) == 0) {
+ if (elf->e_type != ET_EXEC) {
+ fprintf(stderr, "%s: %s is not an ELF executable\n",
+ prog_name, inname);
+diff --git a/arch/alpha/include/asm/bugs.h b/arch/alpha/include/asm/bugs.h
+deleted file mode 100644
+index 78030d1c7e7e0..0000000000000
+--- a/arch/alpha/include/asm/bugs.h
++++ /dev/null
+@@ -1,20 +0,0 @@
+-/*
+- * include/asm-alpha/bugs.h
+- *
+- * Copyright (C) 1994 Linus Torvalds
+- */
+-
+-/*
+- * This is included by init/main.c to check for architecture-dependent bugs.
+- *
+- * Needs:
+- * void check_bugs(void);
+- */
+-
+-/*
+- * I don't know of any alpha bugs yet.. Nice chip
+- */
+-
+-static void check_bugs(void)
+-{
+-}
+diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
+index 18f48a6f2ff6d..8f3f5eecba28b 100644
+--- a/arch/alpha/include/asm/page.h
++++ b/arch/alpha/include/asm/page.h
+@@ -18,7 +18,7 @@ extern void clear_page(void *page);
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+
+ #define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+- alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr)
++ alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+
+ extern void copy_page(void * _to, void * _from);
+diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
+index 2592356e32154..0ce1eee0924b1 100644
+--- a/arch/alpha/include/asm/thread_info.h
++++ b/arch/alpha/include/asm/thread_info.h
+@@ -77,7 +77,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
+
+ /* Work to do on interrupt/exception return. */
+ #define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+- _TIF_NOTIFY_RESUME)
++ _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL)
+
+ /* Work to do on any return to userspace. */
+ #define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \
+diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h
+index b565cc6f408e9..f89798da8a147 100644
+--- a/arch/alpha/include/asm/timex.h
++++ b/arch/alpha/include/asm/timex.h
+@@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void)
+ __asm__ __volatile__ ("rpcc %0" : "=r"(ret));
+ return ret;
+ }
++#define get_cycles get_cycles
+
+ #endif
+diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
+index e227f3a29a43c..c41a5a9c3b9f2 100644
+--- a/arch/alpha/kernel/entry.S
++++ b/arch/alpha/kernel/entry.S
+@@ -469,8 +469,10 @@ entSys:
+ #ifdef CONFIG_AUDITSYSCALL
+ lda $6, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+ and $3, $6, $3
+-#endif
+ bne $3, strace
++#else
++ blbs $3, strace /* check for SYSCALL_TRACE in disguise */
++#endif
+ beq $4, 1f
+ ldq $27, 0($5)
+ 1: jsr $26, ($27), sys_ni_syscall
+diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
+index f6d2946edbd24..15f2effd6baf8 100644
+--- a/arch/alpha/kernel/irq.c
++++ b/arch/alpha/kernel/irq.c
+@@ -60,7 +60,7 @@ int irq_select_affinity(unsigned int irq)
+ cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
+ last_cpu = cpu;
+
+- cpumask_copy(irq_data_get_affinity_mask(data), cpumask_of(cpu));
++ irq_data_update_affinity(data, cpumask_of(cpu));
+ chip->irq_set_affinity(data, cpumask_of(cpu), false);
+ return 0;
+ }
+diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c
+index 5b60c248de9ea..cbefa5a773846 100644
+--- a/arch/alpha/kernel/module.c
++++ b/arch/alpha/kernel/module.c
+@@ -146,10 +146,8 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
+ base = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr;
+ symtab = (Elf64_Sym *)sechdrs[symindex].sh_addr;
+
+- /* The small sections were sorted to the end of the segment.
+- The following should definitely cover them. */
+- gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000;
+ got = sechdrs[me->arch.gotsecindex].sh_addr;
++ gp = got + 0x8000;
+
+ for (i = 0; i < n; i++) {
+ unsigned long r_sym = ELF64_R_SYM (rela[i].r_info);
+diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
+index ce3077946e1d9..fb3025396ac96 100644
+--- a/arch/alpha/kernel/rtc.c
++++ b/arch/alpha/kernel/rtc.c
+@@ -80,7 +80,12 @@ init_rtc_epoch(void)
+ static int
+ alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
+ {
+- mc146818_get_time(tm);
++ int ret = mc146818_get_time(tm);
++
++ if (ret < 0) {
++ dev_err_ratelimited(dev, "unable to read current time\n");
++ return ret;
++ }
+
+ /* Adjust for non-default epochs. It's easier to depend on the
+ generic __get_rtc_time and adjust the epoch here than create
+diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
+index b4fbbba30aa2b..8c4c14a171e23 100644
+--- a/arch/alpha/kernel/setup.c
++++ b/arch/alpha/kernel/setup.c
+@@ -385,8 +385,7 @@ setup_memory(void *kernel_end)
+ #endif /* CONFIG_BLK_DEV_INITRD */
+ }
+
+-int __init
+-page_is_ram(unsigned long pfn)
++int page_is_ram(unsigned long pfn)
+ {
+ struct memclust_struct * cluster;
+ struct memdesc_struct * memdesc;
+diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
+index 90635ef5dafac..6dc952b0df4a9 100644
+--- a/arch/alpha/kernel/srmcons.c
++++ b/arch/alpha/kernel/srmcons.c
+@@ -59,7 +59,7 @@ srmcons_do_receive_chars(struct tty_port *port)
+ } while((result.bits.status & 1) && (++loops < 10));
+
+ if (count)
+- tty_schedule_flip(port);
++ tty_flip_buffer_push(port);
+
+ return count;
+ }
+diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
+index e805106409f76..afaf4f6ad0f49 100644
+--- a/arch/alpha/kernel/traps.c
++++ b/arch/alpha/kernel/traps.c
+@@ -192,7 +192,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
+ local_irq_enable();
+ while (1);
+ }
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ #ifndef CONFIG_MATHEMU
+@@ -235,7 +235,21 @@ do_entIF(unsigned long type, struct pt_regs *regs)
+ {
+ int signo, code;
+
+- if ((regs->ps & ~IPL_MAX) == 0) {
++ if (type == 3) { /* FEN fault */
++ /* Irritating users can call PAL_clrfen to disable the
++ FPU for the process. The kernel will then trap in
++ do_switch_stack and undo_switch_stack when we try
++ to save and restore the FP registers.
++
++ Given that GCC by default generates code that uses the
++ FP registers, PAL_clrfen is not useful except for DoS
++ attacks. So turn the bleeding FPU back on and be done
++ with it. */
++ current_thread_info()->pcb.flags |= 1;
++ __reload_thread(&current_thread_info()->pcb);
++ return;
++ }
++ if (!user_mode(regs)) {
+ if (type == 1) {
+ const unsigned int *data
+ = (const unsigned int *) regs->pc;
+@@ -368,20 +382,6 @@ do_entIF(unsigned long type, struct pt_regs *regs)
+ }
+ break;
+
+- case 3: /* FEN fault */
+- /* Irritating users can call PAL_clrfen to disable the
+- FPU for the process. The kernel will then trap in
+- do_switch_stack and undo_switch_stack when we try
+- to save and restore the FP registers.
+-
+- Given that GCC by default generates code that uses the
+- FP registers, PAL_clrfen is not useful except for DoS
+- attacks. So turn the bleeding FPU back on and be done
+- with it. */
+- current_thread_info()->pcb.flags |= 1;
+- __reload_thread(&current_thread_info()->pcb);
+- return;
+-
+ case 5: /* illoc */
+ default: /* unexpected instruction-fault type */
+ ;
+@@ -577,7 +577,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg,
+
+ printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n",
+ pc, va, opcode, reg);
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+
+ got_exception:
+ /* Ok, we caught the exception, but we don't want it. Is there
+@@ -632,7 +632,7 @@ got_exception:
+ local_irq_enable();
+ while (1);
+ }
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ /*
+diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
+index eee5102c3d889..e9193d52222ea 100644
+--- a/arch/alpha/mm/fault.c
++++ b/arch/alpha/mm/fault.c
+@@ -204,7 +204,7 @@ retry:
+ printk(KERN_ALERT "Unable to handle kernel paging request at "
+ "virtual address %016lx\n", address);
+ die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16);
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+
+ /* We ran out of memory, or some other thing happened to us that
+ made us unable to handle the page fault gracefully. */
+diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
+index 088d348781c1c..0b7c902c72ba8 100644
+--- a/arch/arc/include/asm/atomic-llsc.h
++++ b/arch/arc/include/asm/atomic-llsc.h
+@@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \
+ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
+ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ } \
+
+ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \
+@@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
+ : [val] "=&r" (val) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ \
+ return val; \
+ }
+@@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+ [orig] "=&r" (orig) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ \
+ return orig; \
+ }
+diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
+index c5a8010fdc97d..9089f34baac3b 100644
+--- a/arch/arc/include/asm/atomic64-arcv2.h
++++ b/arch/arc/include/asm/atomic64-arcv2.h
+@@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
+ " bnz 1b \n" \
+ : "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); \
++ : "cc", "memory"); \
+ } \
+
+ #define ATOMIC64_OP_RETURN(op, op1, op2) \
+@@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
+ " bnz 1b \n" \
+ : [val] "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); /* memory clobber comes from smp_mb() */ \
++ : "cc", "memory"); \
+ \
+ return val; \
+ }
+@@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
+ " bnz 1b \n" \
+ : "=&r"(orig), "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); /* memory clobber comes from smp_mb() */ \
++ : "cc", "memory"); \
+ \
+ return orig; \
+ }
+diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
+index 8f777d6441a5d..80347382a3800 100644
+--- a/arch/arc/include/asm/io.h
++++ b/arch/arc/include/asm/io.h
+@@ -32,7 +32,7 @@ static inline void ioport_unmap(void __iomem *addr)
+ {
+ }
+
+-extern void iounmap(const void __iomem *addr);
++extern void iounmap(const volatile void __iomem *addr);
+
+ /*
+ * io{read,write}{16,32}be() macros
+diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
+index c9434ff3aa4ce..8a3fb71e9cfad 100644
+--- a/arch/arc/include/asm/linkage.h
++++ b/arch/arc/include/asm/linkage.h
+@@ -8,6 +8,10 @@
+
+ #include <asm/dwarf.h>
+
++#define ASM_NL ` /* use '`' to mark new line in macro */
++#define __ALIGN .align 4
++#define __ALIGN_STR __stringify(__ALIGN)
++
+ #ifdef __ASSEMBLY__
+
+ .macro ST2 e, o, off
+@@ -28,10 +32,6 @@
+ #endif
+ .endm
+
+-#define ASM_NL ` /* use '`' to mark new line in macro */
+-#define __ALIGN .align 4
+-#define __ALIGN_STR __stringify(__ALIGN)
+-
+ /* annotation for data we want in DCCM - if enabled in .config */
+ .macro ARCFP_DATA nm
+ #ifdef CONFIG_ARC_HAS_DCCM
+diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
+index 8084ef2f64910..4e7a19cb8e528 100644
+--- a/arch/arc/include/asm/pgtable-levels.h
++++ b/arch/arc/include/asm/pgtable-levels.h
+@@ -163,7 +163,7 @@
+ #define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
+ #define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
+ #define set_pmd(pmdp, pmd) (*(pmdp) = pmd)
+-#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
++#define pmd_pgtable(pmd) ((pgtable_t) pmd_page(pmd))
+
+ /*
+ * 4th level paging: pte
+diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
+index dd77a0c8f740b..66ba549b520fc 100644
+--- a/arch/arc/kernel/entry.S
++++ b/arch/arc/kernel/entry.S
+@@ -196,6 +196,7 @@ tracesys_exit:
+ st r0, [sp, PT_r0] ; sys call return value in pt_regs
+
+ ;POST Sys Call Ptrace Hook
++ mov r0, sp ; pt_regs needed
+ bl @syscall_trace_exit
+ b ret_from_exception ; NOT ret_from_system_call at is saves r0 which
+ ; we'd done before calling post hook above
+diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
+index 3793876f42d9b..5f7f5aab361f1 100644
+--- a/arch/arc/kernel/process.c
++++ b/arch/arc/kernel/process.c
+@@ -43,7 +43,7 @@ SYSCALL_DEFINE0(arc_gettls)
+ return task_thread_info(current)->thr_ptr;
+ }
+
+-SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
++SYSCALL_DEFINE3(arc_usr_cmpxchg, int __user *, uaddr, int, expected, int, new)
+ {
+ struct pt_regs *regs = current_pt_regs();
+ u32 uval;
+@@ -294,7 +294,7 @@ int elf_check_arch(const struct elf32_hdr *x)
+ eflags = x->e_flags;
+ if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
+ pr_err("ABI mismatch - you need newer toolchain\n");
+- force_sigsegv(SIGSEGV);
++ force_fatal_sig(SIGSEGV);
+ return 0;
+ }
+
+diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
+index 0ee75aca6e109..712c2311daefb 100644
+--- a/arch/arc/mm/ioremap.c
++++ b/arch/arc/mm/ioremap.c
+@@ -94,7 +94,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+ EXPORT_SYMBOL(ioremap_prot);
+
+
+-void iounmap(const void __iomem *addr)
++void iounmap(const volatile void __iomem *addr)
+ {
+ /* weird double cast to handle phys_addr_t > 32 bits */
+ if (arc_uncached_addr_space((phys_addr_t)(u32)addr))
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index dcf2df6da98f0..f2fbb170d813c 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -4,6 +4,7 @@ config ARM
+ default y
+ select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
++ select ARCH_HAS_CPU_FINALIZE_INIT if MMU
+ select ARCH_HAS_DEBUG_VIRTUAL if MMU
+ select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
+ select ARCH_HAS_ELF_RANDOMIZE
+@@ -1455,6 +1456,7 @@ config HIGHMEM
+ bool "High Memory Support"
+ depends on MMU
+ select KMAP_LOCAL
++ select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
+ help
+ The address space of ARM processors is only 4 Gigabytes large
+ and it has to accommodate user address space, kernel address
+@@ -1740,7 +1742,6 @@ config CMDLINE
+ choice
+ prompt "Kernel command line type" if CMDLINE != ""
+ default CMDLINE_FROM_BOOTLOADER
+- depends on ATAGS
+
+ config CMDLINE_FROM_BOOTLOADER
+ bool "Use bootloader kernel arguments if available"
+diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
+index 98436702e0c7e..644875d73ba15 100644
+--- a/arch/arm/Kconfig.debug
++++ b/arch/arm/Kconfig.debug
+@@ -410,12 +410,12 @@ choice
+ Say Y here if you want kernel low-level debugging support
+ on i.MX25.
+
+- config DEBUG_IMX21_IMX27_UART
+- bool "i.MX21 and i.MX27 Debug UART"
+- depends on SOC_IMX21 || SOC_IMX27
++ config DEBUG_IMX27_UART
++ bool "i.MX27 Debug UART"
++ depends on SOC_IMX27
+ help
+ Say Y here if you want kernel low-level debugging support
+- on i.MX21 or i.MX27.
++ on i.MX27.
+
+ config DEBUG_IMX28_UART
+ bool "i.MX28 Debug UART"
+@@ -1481,7 +1481,7 @@ config DEBUG_IMX_UART_PORT
+ int "i.MX Debug UART Port Selection"
+ depends on DEBUG_IMX1_UART || \
+ DEBUG_IMX25_UART || \
+- DEBUG_IMX21_IMX27_UART || \
++ DEBUG_IMX27_UART || \
+ DEBUG_IMX31_UART || \
+ DEBUG_IMX35_UART || \
+ DEBUG_IMX50_UART || \
+@@ -1540,12 +1540,12 @@ config DEBUG_LL_INCLUDE
+ default "debug/icedcc.S" if DEBUG_ICEDCC
+ default "debug/imx.S" if DEBUG_IMX1_UART || \
+ DEBUG_IMX25_UART || \
+- DEBUG_IMX21_IMX27_UART || \
++ DEBUG_IMX27_UART || \
+ DEBUG_IMX31_UART || \
+ DEBUG_IMX35_UART || \
+ DEBUG_IMX50_UART || \
+ DEBUG_IMX51_UART || \
+- DEBUG_IMX53_UART ||\
++ DEBUG_IMX53_UART || \
+ DEBUG_IMX6Q_UART || \
+ DEBUG_IMX6SL_UART || \
+ DEBUG_IMX6SX_UART || \
+diff --git a/arch/arm/Makefile b/arch/arm/Makefile
+index 847c31e7c3687..fa45837b8065c 100644
+--- a/arch/arm/Makefile
++++ b/arch/arm/Makefile
+@@ -60,15 +60,15 @@ KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra)
+ # Note that GCC does not numerically define an architecture version
+ # macro, but instead defines a whole series of macros which makes
+ # testing for a specific architecture or later rather impossible.
+-arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m
+-arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
+-arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
++arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m
++arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 -march=armv7-a
++arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 -march=armv6
+ # Only override the compiler option if ARMv6. The ARMv6K extensions are
+ # always available in ARMv7
+ ifeq ($(CONFIG_CPU_32v6),y)
+-arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
++arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 -march=armv6k
+ endif
+-arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
++arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 -march=armv5te
+ arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t
+ arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4
+ arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m
+@@ -82,7 +82,7 @@ tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi
+ tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi
+ tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi
+ tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi
+-tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
++tune-$(CONFIG_CPU_ARM946E) =-mtune=arm9e
+ tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi
+ tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi
+ tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi
+@@ -90,11 +90,11 @@ tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi
+ tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi
+ tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110
+ tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100
+-tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
+-tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
+-tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale)
+-tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
+-tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
++tune-$(CONFIG_CPU_XSCALE) =-mtune=xscale
++tune-$(CONFIG_CPU_XSC3) =-mtune=xscale
++tune-$(CONFIG_CPU_FEROCEON) =-mtune=xscale
++tune-$(CONFIG_CPU_V6) =-mtune=arm1136j-s
++tune-$(CONFIG_CPU_V6K) =-mtune=arm1136j-s
+
+ # Evaluate tune cc-option calls now
+ tune-y := $(tune-y)
+diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
+index c0e7a745103e2..230030c130853 100644
+--- a/arch/arm/boot/compressed/efi-header.S
++++ b/arch/arm/boot/compressed/efi-header.S
+@@ -9,16 +9,22 @@
+ #include <linux/sizes.h>
+
+ .macro __nop
+-#ifdef CONFIG_EFI_STUB
+- @ This is almost but not quite a NOP, since it does clobber the
+- @ condition flags. But it is the best we can do for EFI, since
+- @ PE/COFF expects the magic string "MZ" at offset 0, while the
+- @ ARM/Linux boot protocol expects an executable instruction
+- @ there.
+- .inst MZ_MAGIC | (0x1310 << 16) @ tstne r0, #0x4d000
+-#else
+ AR_CLASS( mov r0, r0 )
+ M_CLASS( nop.w )
++ .endm
++
++ .macro __initial_nops
++#ifdef CONFIG_EFI_STUB
++ @ This is a two-instruction NOP, which happens to bear the
++ @ PE/COFF signature "MZ" in the first two bytes, so the kernel
++ @ is accepted as an EFI binary. Booting via the UEFI stub
++ @ will not execute those instructions, but the ARM/Linux
++ @ boot protocol does, so we need some NOPs here.
++ .inst MZ_MAGIC | (0xe225 << 16) @ eor r5, r5, 0x4d000
++ eor r5, r5, 0x4d000 @ undo previous insn
++#else
++ __nop
++ __nop
+ #endif
+ .endm
+
+diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
+index b1cb1972361b8..bf79f2f78d232 100644
+--- a/arch/arm/boot/compressed/head.S
++++ b/arch/arm/boot/compressed/head.S
+@@ -203,7 +203,8 @@ start:
+ * were patching the initial instructions of the kernel, i.e
+ * had started to exploit this "patch area".
+ */
+- .rept 7
++ __initial_nops
++ .rept 5
+ __nop
+ .endr
+ #ifndef CONFIG_THUMB2_KERNEL
+diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
+index 1bcb68ac4b011..3fcb3e62dc569 100644
+--- a/arch/arm/boot/compressed/vmlinux.lds.S
++++ b/arch/arm/boot/compressed/vmlinux.lds.S
+@@ -23,6 +23,7 @@ SECTIONS
+ *(.ARM.extab*)
+ *(.note.*)
+ *(.rel.*)
++ *(.printk_index)
+ /*
+ * Discard any r/w data - this produces a link error if we have any,
+ * which is required for PIC decompression. Local data generates
+@@ -57,6 +58,7 @@ SECTIONS
+ *(.rodata)
+ *(.rodata.*)
+ *(.data.rel.ro)
++ *(.data.rel.ro.*)
+ }
+ .piggydata : {
+ *(.piggydata)
+diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
+index 7e0934180724d..7a72fc636a7a7 100644
+--- a/arch/arm/boot/dts/Makefile
++++ b/arch/arm/boot/dts/Makefile
+@@ -127,6 +127,7 @@ dtb-$(CONFIG_ARCH_BCM_5301X) += \
+ bcm47094-luxul-xwr-3150-v1.dtb \
+ bcm47094-netgear-r8500.dtb \
+ bcm47094-phicomm-k3.dtb \
++ bcm53015-meraki-mr26.dtb \
+ bcm53016-meraki-mr32.dtb \
+ bcm94708.dtb \
+ bcm94709.dtb \
+@@ -779,6 +780,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \
+ logicpd-som-lv-37xx-devkit.dtb \
+ omap3430-sdp.dtb \
+ omap3-beagle.dtb \
++ omap3-beagle-ab4.dtb \
+ omap3-beagle-xm.dtb \
+ omap3-beagle-xm-ab.dtb \
+ omap3-cm-t3517.dtb \
+diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi
+index 124026fa0d095..f207499461b34 100644
+--- a/arch/arm/boot/dts/am335x-pcm-953.dtsi
++++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi
+@@ -12,22 +12,20 @@
+ compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx";
+
+ /* Power */
+- regulators {
+- vcc3v3: fixedregulator@1 {
+- compatible = "regulator-fixed";
+- regulator-name = "vcc3v3";
+- regulator-min-microvolt = <3300000>;
+- regulator-max-microvolt = <3300000>;
+- regulator-boot-on;
+- };
++ vcc3v3: fixedregulator1 {
++ compatible = "regulator-fixed";
++ regulator-name = "vcc3v3";
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
++ regulator-boot-on;
++ };
+
+- vcc1v8: fixedregulator@2 {
+- compatible = "regulator-fixed";
+- regulator-name = "vcc1v8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- regulator-boot-on;
+- };
++ vcc1v8: fixedregulator2 {
++ compatible = "regulator-fixed";
++ regulator-name = "vcc1v8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ regulator-boot-on;
+ };
+
+ /* User IO */
+diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi
+index c9629cb5ccd1e..9a750883b987b 100644
+--- a/arch/arm/boot/dts/am33xx-l4.dtsi
++++ b/arch/arm/boot/dts/am33xx-l4.dtsi
+@@ -1500,8 +1500,7 @@
+ mmc1: mmc@0 {
+ compatible = "ti,am335-sdhci";
+ ti,needs-special-reset;
+- dmas = <&edma_xbar 24 0 0
+- &edma_xbar 25 0 0>;
++ dmas = <&edma 24 0>, <&edma 25 0>;
+ dma-names = "tx", "rx";
+ interrupts = <64>;
+ reg = <0x0 0x1000>;
+diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts
+index 0d2fac98ce7d2..c8b80f156ec98 100644
+--- a/arch/arm/boot/dts/am3517-evm.dts
++++ b/arch/arm/boot/dts/am3517-evm.dts
+@@ -161,6 +161,8 @@
+
+ /* HS USB Host PHY on PORT 1 */
+ hsusb1_phy: hsusb1_phy {
++ pinctrl-names = "default";
++ pinctrl-0 = <&hsusb1_rst_pins>;
+ compatible = "usb-nop-xceiv";
+ reset-gpios = <&gpio2 25 GPIO_ACTIVE_LOW>; /* gpio_57 */
+ #phy-cells = <0>;
+@@ -168,7 +170,9 @@
+ };
+
+ &davinci_emac {
+- status = "okay";
++ pinctrl-names = "default";
++ pinctrl-0 = <&ethernet_pins>;
++ status = "okay";
+ };
+
+ &davinci_mdio {
+@@ -193,6 +197,8 @@
+ };
+
+ &i2c2 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&i2c2_pins>;
+ clock-frequency = <400000>;
+ /* User DIP swithes [1:8] / User LEDS [1:2] */
+ tca6416: gpio@21 {
+@@ -205,6 +211,8 @@
+ };
+
+ &i2c3 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&i2c3_pins>;
+ clock-frequency = <400000>;
+ };
+
+@@ -223,6 +231,8 @@
+ };
+
+ &usbhshost {
++ pinctrl-names = "default";
++ pinctrl-0 = <&hsusb1_pins>;
+ port1-mode = "ehci-phy";
+ };
+
+@@ -231,8 +241,35 @@
+ };
+
+ &omap3_pmx_core {
+- pinctrl-names = "default";
+- pinctrl-0 = <&hsusb1_rst_pins>;
++
++ ethernet_pins: pinmux_ethernet_pins {
++ pinctrl-single,pins = <
++ OMAP3_CORE1_IOPAD(0x21fe, PIN_INPUT | MUX_MODE0) /* rmii_mdio_data */
++ OMAP3_CORE1_IOPAD(0x2200, MUX_MODE0) /* rmii_mdio_clk */
++ OMAP3_CORE1_IOPAD(0x2202, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd0 */
++ OMAP3_CORE1_IOPAD(0x2204, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd1 */
++ OMAP3_CORE1_IOPAD(0x2206, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_crs_dv */
++ OMAP3_CORE1_IOPAD(0x2208, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_rxer */
++ OMAP3_CORE1_IOPAD(0x220a, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd0 */
++ OMAP3_CORE1_IOPAD(0x220c, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd1 */
++ OMAP3_CORE1_IOPAD(0x220e, PIN_OUTPUT_PULLDOWN |MUX_MODE0) /* rmii_txen */
++ OMAP3_CORE1_IOPAD(0x2210, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_50mhz_clk */
++ >;
++ };
++
++ i2c2_pins: pinmux_i2c2_pins {
++ pinctrl-single,pins = <
++ OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_scl */
++ OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_sda */
++ >;
++ };
++
++ i2c3_pins: pinmux_i2c3_pins {
++ pinctrl-single,pins = <
++ OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_scl */
++ OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_sda */
++ >;
++ };
+
+ leds_pins: pinmux_leds_pins {
+ pinctrl-single,pins = <
+@@ -300,8 +337,6 @@
+ };
+
+ &omap3_pmx_core2 {
+- pinctrl-names = "default";
+- pinctrl-0 = <&hsusb1_pins>;
+
+ hsusb1_pins: pinmux_hsusb1_pins {
+ pinctrl-single,pins = <
+diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi
+index 8b669e2eafec4..f7b680f6c48ad 100644
+--- a/arch/arm/boot/dts/am3517-som.dtsi
++++ b/arch/arm/boot/dts/am3517-som.dtsi
+@@ -69,6 +69,8 @@
+ };
+
+ &i2c1 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&i2c1_pins>;
+ clock-frequency = <400000>;
+
+ s35390a: s35390a@30 {
+@@ -179,6 +181,13 @@
+
+ &omap3_pmx_core {
+
++ i2c1_pins: pinmux_i2c1_pins {
++ pinctrl-single,pins = <
++ OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_scl */
++ OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_sda */
++ >;
++ };
++
+ wl12xx_buffer_pins: pinmux_wl12xx_buffer_pins {
+ pinctrl-single,pins = <
+ OMAP3_CORE1_IOPAD(0x2156, PIN_OUTPUT | MUX_MODE4) /* mmc1_dat7.gpio_129 */
+diff --git a/arch/arm/boot/dts/am5748.dtsi b/arch/arm/boot/dts/am5748.dtsi
+index c260aa1a85bdb..a1f029e9d1f3d 100644
+--- a/arch/arm/boot/dts/am5748.dtsi
++++ b/arch/arm/boot/dts/am5748.dtsi
+@@ -25,6 +25,10 @@
+ status = "disabled";
+ };
+
++&usb4_tm {
++ status = "disabled";
++};
++
+ &atl_tm {
+ status = "disabled";
+ };
+diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
+index 2e94f32d9dfca..5de82729eb7ed 100644
+--- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
++++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
+@@ -527,7 +527,7 @@
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <31 0>;
+- pendown-gpio = <&gpio1 31 0>;
++ pendown-gpio = <&gpio1 31 GPIO_ACTIVE_LOW>;
+
+
+ ti,x-min = /bits/ 16 <0x0>;
+diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi
+index 46e6d3ed8f35a..c042c416a94a3 100644
+--- a/arch/arm/boot/dts/armada-370.dtsi
++++ b/arch/arm/boot/dts/armada-370.dtsi
+@@ -74,7 +74,7 @@
+
+ pcie2: pcie@2,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82002800 0 0x80000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x80000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
+index 7f2f24a29e6c1..352a2f7ba3114 100644
+--- a/arch/arm/boot/dts/armada-375.dtsi
++++ b/arch/arm/boot/dts/armada-375.dtsi
+@@ -582,7 +582,7 @@
+
+ pcie1: pcie@2,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x44000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm/boot/dts/armada-380.dtsi b/arch/arm/boot/dts/armada-380.dtsi
+index cff1269f3fbfd..7146cc8f082af 100644
+--- a/arch/arm/boot/dts/armada-380.dtsi
++++ b/arch/arm/boot/dts/armada-380.dtsi
+@@ -79,7 +79,7 @@
+ /* x1 port */
+ pcie@2,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x40000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x40000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -98,7 +98,7 @@
+ /* x1 port */
+ pcie@3,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>;
++ assigned-addresses = <0x82001800 0 0x44000 0 0x2000>;
+ reg = <0x1800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm/boot/dts/armada-385-turris-omnia.dts b/arch/arm/boot/dts/armada-385-turris-omnia.dts
+index 5bd6a66d2c2b4..e7649c795699c 100644
+--- a/arch/arm/boot/dts/armada-385-turris-omnia.dts
++++ b/arch/arm/boot/dts/armada-385-turris-omnia.dts
+@@ -23,6 +23,12 @@
+ stdout-path = &uart0;
+ };
+
++ aliases {
++ ethernet0 = &eth0;
++ ethernet1 = &eth1;
++ ethernet2 = &eth2;
++ };
++
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x40000000>; /* 1024 MB */
+@@ -450,7 +456,17 @@
+ };
+ };
+
+- /* port 6 is connected to eth0 */
++ ports@6 {
++ reg = <6>;
++ label = "cpu";
++ ethernet = <&eth0>;
++ phy-mode = "rgmii-id";
++
++ fixed-link {
++ speed = <1000>;
++ full-duplex;
++ };
++ };
+ };
+ };
+ };
+@@ -471,7 +487,7 @@
+ marvell,function = "spi0";
+ };
+
+- spi0cs1_pins: spi0cs1-pins {
++ spi0cs2_pins: spi0cs2-pins {
+ marvell,pins = "mpp26";
+ marvell,function = "spi0";
+ };
+@@ -506,7 +522,7 @@
+ };
+ };
+
+- /* MISO, MOSI, SCLK and CS1 are routed to pin header CN11 */
++ /* MISO, MOSI, SCLK and CS2 are routed to pin header CN11 */
+ };
+
+ &uart0 {
+diff --git a/arch/arm/boot/dts/armada-385.dtsi b/arch/arm/boot/dts/armada-385.dtsi
+index f0022d10c7159..f081f7cb66e5f 100644
+--- a/arch/arm/boot/dts/armada-385.dtsi
++++ b/arch/arm/boot/dts/armada-385.dtsi
+@@ -84,7 +84,7 @@
+ /* x1 port */
+ pcie2: pcie@2,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x40000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x40000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -103,7 +103,7 @@
+ /* x1 port */
+ pcie3: pcie@3,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>;
++ assigned-addresses = <0x82001800 0 0x44000 0 0x2000>;
+ reg = <0x1800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -125,7 +125,7 @@
+ */
+ pcie4: pcie@4,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>;
++ assigned-addresses = <0x82002000 0 0x48000 0 0x2000>;
+ reg = <0x2000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
+index 9b1a24cc5e91f..df3c8d1d8f641 100644
+--- a/arch/arm/boot/dts/armada-38x.dtsi
++++ b/arch/arm/boot/dts/armada-38x.dtsi
+@@ -168,7 +168,7 @@
+ };
+
+ uart0: serial@12000 {
+- compatible = "marvell,armada-38x-uart";
++ compatible = "marvell,armada-38x-uart", "ns16550a";
+ reg = <0x12000 0x100>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+@@ -178,7 +178,7 @@
+ };
+
+ uart1: serial@12100 {
+- compatible = "marvell,armada-38x-uart";
++ compatible = "marvell,armada-38x-uart", "ns16550a";
+ reg = <0x12100 0x100>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi
+index e0b7c20998312..9525e7b7f4360 100644
+--- a/arch/arm/boot/dts/armada-39x.dtsi
++++ b/arch/arm/boot/dts/armada-39x.dtsi
+@@ -453,7 +453,7 @@
+ /* x1 port */
+ pcie@2,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x40000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x40000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -472,7 +472,7 @@
+ /* x1 port */
+ pcie@3,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>;
++ assigned-addresses = <0x82001800 0 0x44000 0 0x2000>;
+ reg = <0x1800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -494,7 +494,7 @@
+ */
+ pcie@4,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>;
++ assigned-addresses = <0x82002000 0 0x48000 0 0x2000>;
+ reg = <0x2000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm/boot/dts/armada-xp-mv78230.dtsi b/arch/arm/boot/dts/armada-xp-mv78230.dtsi
+index 8558bf6bb54c6..d55fe162fc7f0 100644
+--- a/arch/arm/boot/dts/armada-xp-mv78230.dtsi
++++ b/arch/arm/boot/dts/armada-xp-mv78230.dtsi
+@@ -97,7 +97,7 @@
+
+ pcie2: pcie@2,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x44000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -115,7 +115,7 @@
+
+ pcie3: pcie@3,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>;
++ assigned-addresses = <0x82001800 0 0x48000 0 0x2000>;
+ reg = <0x1800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -133,7 +133,7 @@
+
+ pcie4: pcie@4,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x4c000 0 0x2000>;
++ assigned-addresses = <0x82002000 0 0x4c000 0 0x2000>;
+ reg = <0x2000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -151,7 +151,7 @@
+
+ pcie5: pcie@5,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x80000 0 0x2000>;
++ assigned-addresses = <0x82002800 0 0x80000 0 0x2000>;
+ reg = <0x2800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
+index 2d85fe8ac3272..fdcc818199401 100644
+--- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi
++++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
+@@ -112,7 +112,7 @@
+
+ pcie2: pcie@2,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x44000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -130,7 +130,7 @@
+
+ pcie3: pcie@3,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>;
++ assigned-addresses = <0x82001800 0 0x48000 0 0x2000>;
+ reg = <0x1800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -148,7 +148,7 @@
+
+ pcie4: pcie@4,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x4c000 0 0x2000>;
++ assigned-addresses = <0x82002000 0 0x4c000 0 0x2000>;
+ reg = <0x2000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -166,7 +166,7 @@
+
+ pcie5: pcie@5,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x80000 0 0x2000>;
++ assigned-addresses = <0x82002800 0 0x80000 0 0x2000>;
+ reg = <0x2800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -184,7 +184,7 @@
+
+ pcie6: pcie@6,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x84000 0 0x2000>;
++ assigned-addresses = <0x82003000 0 0x84000 0 0x2000>;
+ reg = <0x3000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -202,7 +202,7 @@
+
+ pcie7: pcie@7,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x88000 0 0x2000>;
++ assigned-addresses = <0x82003800 0 0x88000 0 0x2000>;
+ reg = <0x3800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -220,7 +220,7 @@
+
+ pcie8: pcie@8,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x8c000 0 0x2000>;
++ assigned-addresses = <0x82004000 0 0x8c000 0 0x2000>;
+ reg = <0x4000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+@@ -238,7 +238,7 @@
+
+ pcie9: pcie@9,0 {
+ device_type = "pci";
+- assigned-addresses = <0x82000800 0 0x42000 0 0x2000>;
++ assigned-addresses = <0x82004800 0 0x42000 0 0x2000>;
+ reg = <0x4800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm/boot/dts/aspeed-ast2500-evb.dts b/arch/arm/boot/dts/aspeed-ast2500-evb.dts
+index 1d24b394ea4c3..a497dd135491b 100644
+--- a/arch/arm/boot/dts/aspeed-ast2500-evb.dts
++++ b/arch/arm/boot/dts/aspeed-ast2500-evb.dts
+@@ -5,7 +5,7 @@
+
+ / {
+ model = "AST2500 EVB";
+- compatible = "aspeed,ast2500";
++ compatible = "aspeed,ast2500-evb", "aspeed,ast2500";
+
+ aliases {
+ serial4 = &uart5;
+diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts b/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts
+index dd7148060c4a3..d0a5c2ff0fec4 100644
+--- a/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts
++++ b/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts
+@@ -5,6 +5,7 @@
+
+ / {
+ model = "AST2600 A1 EVB";
++ compatible = "aspeed,ast2600-evb-a1", "aspeed,ast2600";
+
+ /delete-node/regulator-vcc-sdhci0;
+ /delete-node/regulator-vcc-sdhci1;
+diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb.dts b/arch/arm/boot/dts/aspeed-ast2600-evb.dts
+index b7eb552640cbf..b8e55bf167aa8 100644
+--- a/arch/arm/boot/dts/aspeed-ast2600-evb.dts
++++ b/arch/arm/boot/dts/aspeed-ast2600-evb.dts
+@@ -8,7 +8,7 @@
+
+ / {
+ model = "AST2600 EVB";
+- compatible = "aspeed,ast2600";
++ compatible = "aspeed,ast2600-evb-a1", "aspeed,ast2600";
+
+ aliases {
+ serial4 = &uart5;
+@@ -103,7 +103,7 @@
+ &mac0 {
+ status = "okay";
+
+- phy-mode = "rgmii";
++ phy-mode = "rgmii-rxid";
+ phy-handle = <&ethphy0>;
+
+ pinctrl-names = "default";
+@@ -114,7 +114,7 @@
+ &mac1 {
+ status = "okay";
+
+- phy-mode = "rgmii";
++ phy-mode = "rgmii-rxid";
+ phy-handle = <&ethphy1>;
+
+ pinctrl-names = "default";
+diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts
+index 9b4cf5ebe6d5f..c62aff908ab48 100644
+--- a/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts
++++ b/arch/arm/boot/dts/aspeed-bmc-asrock-e3c246d4i.dts
+@@ -63,7 +63,7 @@
+ status = "okay";
+ m25p,fast-read;
+ label = "bmc";
+- spi-max-frequency = <100000000>; /* 100 MHz */
++ spi-max-frequency = <50000000>; /* 50 MHz */
+ #include "openbmc-flash-layout.dtsi"
+ };
+ };
+diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts
+index 2efd70666738c..af7ea7cab8cfa 100644
+--- a/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts
++++ b/arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts
+@@ -231,6 +231,21 @@
+ gpios = <&gpio0 ASPEED_GPIO(P, 4) GPIO_ACTIVE_LOW>;
+ };
+ };
++
++ iio-hwmon {
++ compatible = "iio-hwmon";
++ io-channels = <&adc1 7>;
++ };
++};
++
++&adc1 {
++ status = "okay";
++ aspeed,int-vref-microvolt = <2500000>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_adc8_default &pinctrl_adc9_default
++ &pinctrl_adc10_default &pinctrl_adc11_default
++ &pinctrl_adc12_default &pinctrl_adc13_default
++ &pinctrl_adc14_default &pinctrl_adc15_default>;
+ };
+
+ &gpio0 {
+diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
+index 6419c9762c0b6..6c9f34396a3ae 100644
+--- a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
++++ b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts
+@@ -246,6 +246,21 @@
+ linux,code = <11>;
+ };
+ };
++
++ iio-hwmon {
++ compatible = "iio-hwmon";
++ io-channels = <&adc1 7>;
++ };
++};
++
++&adc1 {
++ status = "okay";
++ aspeed,int-vref-microvolt = <2500000>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_adc8_default &pinctrl_adc9_default
++ &pinctrl_adc10_default &pinctrl_adc11_default
++ &pinctrl_adc12_default &pinctrl_adc13_default
++ &pinctrl_adc14_default &pinctrl_adc15_default>;
+ };
+
+ &ehci1 {
+diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
+index 6dde51c2aed3f..ac07c240419a2 100644
+--- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
++++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
+@@ -117,11 +117,6 @@
+ groups = "FWSPID";
+ };
+
+- pinctrl_fwqspid_default: fwqspid_default {
+- function = "FWQSPID";
+- groups = "FWQSPID";
+- };
+-
+ pinctrl_fwspiwp_default: fwspiwp_default {
+ function = "FWSPIWP";
+ groups = "FWSPIWP";
+@@ -653,12 +648,12 @@
+ };
+
+ pinctrl_qspi1_default: qspi1_default {
+- function = "QSPI1";
++ function = "SPI1";
+ groups = "QSPI1";
+ };
+
+ pinctrl_qspi2_default: qspi2_default {
+- function = "QSPI2";
++ function = "SPI2";
+ groups = "QSPI2";
+ };
+
+diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi
+index 1b47be1704f83..e5724b1a2e20d 100644
+--- a/arch/arm/boot/dts/aspeed-g6.dtsi
++++ b/arch/arm/boot/dts/aspeed-g6.dtsi
+@@ -364,6 +364,41 @@
+ status = "disabled";
+ };
+
++ adc0: adc@1e6e9000 {
++ compatible = "aspeed,ast2600-adc0";
++ reg = <0x1e6e9000 0x100>;
++ clocks = <&syscon ASPEED_CLK_APB2>;
++ resets = <&syscon ASPEED_RESET_ADC>;
++ interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
++ #io-channel-cells = <1>;
++ status = "disabled";
++ };
++
++ adc1: adc@1e6e9100 {
++ compatible = "aspeed,ast2600-adc1";
++ reg = <0x1e6e9100 0x100>;
++ clocks = <&syscon ASPEED_CLK_APB2>;
++ resets = <&syscon ASPEED_RESET_ADC>;
++ interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
++ #io-channel-cells = <1>;
++ status = "disabled";
++ };
++
++ sbc: secure-boot-controller@1e6f2000 {
++ compatible = "aspeed,ast2600-sbc";
++ reg = <0x1e6f2000 0x1000>;
++ };
++
++ video: video@1e700000 {
++ compatible = "aspeed,ast2600-video-engine";
++ reg = <0x1e700000 0x1000>;
++ clocks = <&syscon ASPEED_CLK_GATE_VCLK>,
++ <&syscon ASPEED_CLK_GATE_ECLK>;
++ clock-names = "vclk", "eclk";
++ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
++ status = "disabled";
++ };
++
+ gpio0: gpio@1e780000 {
+ #gpio-cells = <2>;
+ gpio-controller;
+diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts
+index b1068cca42287..fd8dc1183b3e8 100644
+--- a/arch/arm/boot/dts/at91-sam9x60ek.dts
++++ b/arch/arm/boot/dts/at91-sam9x60ek.dts
+@@ -233,10 +233,9 @@
+ status = "okay";
+
+ eeprom@53 {
+- compatible = "atmel,24c32";
++ compatible = "atmel,24c02";
+ reg = <0x53>;
+ pagesize = <16>;
+- size = <128>;
+ status = "okay";
+ };
+ };
+diff --git a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
+index 025a78310e3ab..a818e8ebd638f 100644
+--- a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
++++ b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
+@@ -68,8 +68,8 @@
+ regulators {
+ vdd_3v3: VDD_IO {
+ regulator-name = "VDD_IO";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -87,8 +87,8 @@
+
+ vddio_ddr: VDD_DDR {
+ regulator-name = "VDD_DDR";
+- regulator-min-microvolt = <600000>;
+- regulator-max-microvolt = <1850000>;
++ regulator-min-microvolt = <1200000>;
++ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -110,8 +110,8 @@
+
+ vdd_core: VDD_CORE {
+ regulator-name = "VDD_CORE";
+- regulator-min-microvolt = <600000>;
+- regulator-max-microvolt = <1850000>;
++ regulator-min-microvolt = <1250000>;
++ regulator-max-microvolt = <1250000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -152,8 +152,8 @@
+
+ LDO1 {
+ regulator-name = "LDO1";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+
+ regulator-state-standby {
+@@ -167,9 +167,8 @@
+
+ LDO2 {
+ regulator-name = "LDO2";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
+- regulator-always-on;
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <3300000>;
+
+ regulator-state-standby {
+ regulator-on-in-suspend;
+diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts
+index e06b58724ca83..4ebbbe65c0cee 100644
+--- a/arch/arm/boot/dts/at91-sama5d2_icp.dts
++++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts
+@@ -197,8 +197,8 @@
+ regulators {
+ vdd_io_reg: VDD_IO {
+ regulator-name = "VDD_IO";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -216,8 +216,8 @@
+
+ VDD_DDR {
+ regulator-name = "VDD_DDR";
+- regulator-min-microvolt = <600000>;
+- regulator-max-microvolt = <1850000>;
++ regulator-min-microvolt = <1350000>;
++ regulator-max-microvolt = <1350000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -235,8 +235,8 @@
+
+ VDD_CORE {
+ regulator-name = "VDD_CORE";
+- regulator-min-microvolt = <600000>;
+- regulator-max-microvolt = <1850000>;
++ regulator-min-microvolt = <1250000>;
++ regulator-max-microvolt = <1250000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -258,7 +258,6 @@
+ regulator-max-microvolt = <1850000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+- regulator-always-on;
+
+ regulator-state-standby {
+ regulator-on-in-suspend;
+@@ -273,8 +272,8 @@
+
+ LDO1 {
+ regulator-name = "LDO1";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
++ regulator-min-microvolt = <2500000>;
++ regulator-max-microvolt = <2500000>;
+ regulator-always-on;
+
+ regulator-state-standby {
+@@ -288,8 +287,8 @@
+
+ LDO2 {
+ regulator-name = "LDO2";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+
+ regulator-state-standby {
+@@ -323,21 +322,21 @@
+ status = "okay";
+
+ eeprom@50 {
+- compatible = "atmel,24c32";
++ compatible = "atmel,24c02";
+ reg = <0x50>;
+ pagesize = <16>;
+ status = "okay";
+ };
+
+ eeprom@52 {
+- compatible = "atmel,24c32";
++ compatible = "atmel,24c02";
+ reg = <0x52>;
+ pagesize = <16>;
+ status = "disabled";
+ };
+
+ eeprom@53 {
+- compatible = "atmel,24c32";
++ compatible = "atmel,24c02";
+ reg = <0x53>;
+ pagesize = <16>;
+ status = "disabled";
+diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+index d72c042f28507..a49c2966b41e2 100644
+--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+@@ -57,8 +57,8 @@
+ };
+
+ spi0: spi@f0004000 {
+- pinctrl-names = "default";
+- pinctrl-0 = <&pinctrl_spi0_cs>;
++ pinctrl-names = "default", "cs";
++ pinctrl-1 = <&pinctrl_spi0_cs>;
+ cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>;
+ status = "okay";
+ };
+@@ -171,8 +171,8 @@
+ };
+
+ spi1: spi@f8008000 {
+- pinctrl-names = "default";
+- pinctrl-0 = <&pinctrl_spi1_cs>;
++ pinctrl-names = "default", "cs";
++ pinctrl-1 = <&pinctrl_spi1_cs>;
+ cs-gpios = <&pioC 25 0>;
+ status = "okay";
+ };
+diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+index d241c24f0d836..e519d27479362 100644
+--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+@@ -81,8 +81,8 @@
+ };
+
+ spi1: spi@fc018000 {
+- pinctrl-names = "default";
+- pinctrl-0 = <&pinctrl_spi0_cs>;
++ pinctrl-names = "default", "cs";
++ pinctrl-1 = <&pinctrl_spi1_cs>;
+ cs-gpios = <&pioB 21 0>;
+ status = "okay";
+ };
+@@ -140,7 +140,7 @@
+ atmel,pins =
+ <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
+ };
+- pinctrl_spi0_cs: spi0_cs_default {
++ pinctrl_spi1_cs: spi1_cs_default {
+ atmel,pins =
+ <AT91_PIOB 21 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
+ };
+diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts
+index f3d6aaa3a78dc..0ba856066ffb2 100644
+--- a/arch/arm/boot/dts/at91-sama7g5ek.dts
++++ b/arch/arm/boot/dts/at91-sama7g5ek.dts
+@@ -169,8 +169,8 @@
+ regulators {
+ vdd_3v3: VDD_IO {
+ regulator-name = "VDD_IO";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -188,8 +188,8 @@
+
+ vddioddr: VDD_DDR {
+ regulator-name = "VDD_DDR";
+- regulator-min-microvolt = <1300000>;
+- regulator-max-microvolt = <1450000>;
++ regulator-min-microvolt = <1350000>;
++ regulator-max-microvolt = <1350000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -209,8 +209,8 @@
+
+ vddcore: VDD_CORE {
+ regulator-name = "VDD_CORE";
+- regulator-min-microvolt = <1100000>;
+- regulator-max-microvolt = <1850000>;
++ regulator-min-microvolt = <1150000>;
++ regulator-max-microvolt = <1150000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-always-on;
+@@ -228,8 +228,8 @@
+
+ vddcpu: VDD_OTHER {
+ regulator-name = "VDD_OTHER";
+- regulator-min-microvolt = <1125000>;
+- regulator-max-microvolt = <1850000>;
++ regulator-min-microvolt = <1050000>;
++ regulator-max-microvolt = <1250000>;
+ regulator-initial-mode = <2>;
+ regulator-allowed-modes = <2>, <4>;
+ regulator-ramp-delay = <3125>;
+@@ -248,8 +248,8 @@
+
+ vldo1: LDO1 {
+ regulator-name = "LDO1";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <3700000>;
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+
+ regulator-state-standby {
+@@ -403,7 +403,7 @@
+ pinctrl_flx3_default: flx3_default {
+ pinmux = <PIN_PD16__FLEXCOM3_IO0>,
+ <PIN_PD17__FLEXCOM3_IO1>;
+- bias-disable;
++ bias-pull-up;
+ };
+
+ pinctrl_flx4_default: flx4_default {
+@@ -659,7 +659,7 @@
+ };
+
+ &shdwc {
+- atmel,shdwc-debouncer = <976>;
++ debounce-delay-us = <976>;
+ status = "okay";
+
+ input@0 {
+diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts
+index 3ca97b47c69ce..7e5c598e7e68f 100644
+--- a/arch/arm/boot/dts/at91-tse850-3.dts
++++ b/arch/arm/boot/dts/at91-tse850-3.dts
+@@ -262,7 +262,7 @@
+ &macb1 {
+ status = "okay";
+
+- phy-mode = "rgmii";
++ phy-mode = "rmii";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi
+index d1181ead18e5a..21344fbc89e5e 100644
+--- a/arch/arm/boot/dts/at91rm9200.dtsi
++++ b/arch/arm/boot/dts/at91rm9200.dtsi
+@@ -660,7 +660,7 @@
+ compatible = "atmel,at91rm9200-udc";
+ reg = <0xfffb0000 0x4000>;
+ interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>;
+- clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>;
++ clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>;
+ clock-names = "pclk", "hclk";
+ status = "disabled";
+ };
+diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts
+index beed819609e8d..8f3b483bb64dd 100644
+--- a/arch/arm/boot/dts/at91sam9261ek.dts
++++ b/arch/arm/boot/dts/at91sam9261ek.dts
+@@ -156,7 +156,7 @@
+ compatible = "ti,ads7843";
+ interrupts-extended = <&pioC 2 IRQ_TYPE_EDGE_BOTH>;
+ spi-max-frequency = <3000000>;
+- pendown-gpio = <&pioC 2 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&pioC 2 GPIO_ACTIVE_LOW>;
+
+ ti,x-min = /bits/ 16 <150>;
+ ti,x-max = /bits/ 16 <3830>;
+diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+index 87bb39060e8be..4783e657b4cb6 100644
+--- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
++++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+@@ -39,6 +39,13 @@
+
+ };
+
++ usb1 {
++ pinctrl_usb1_vbus_gpio: usb1_vbus_gpio {
++ atmel,pins =
++ <AT91_PIOC 5 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>; /* PC5 GPIO */
++ };
++ };
++
+ mmc0_slot1 {
+ pinctrl_board_mmc0_slot1: mmc0_slot1-board {
+ atmel,pins =
+@@ -84,6 +91,8 @@
+ };
+
+ usb1: gadget@fffa4000 {
++ pinctrl-0 = <&pinctrl_usb1_vbus_gpio>;
++ pinctrl-names = "default";
+ atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+ };
+@@ -219,6 +228,12 @@
+ wm8731: wm8731@1b {
+ compatible = "wm8731";
+ reg = <0x1b>;
++
++ /* PCK0 at 12MHz */
++ clocks = <&pmc PMC_TYPE_SYSTEM 8>;
++ clock-names = "mclk";
++ assigned-clocks = <&pmc PMC_TYPE_SYSTEM 8>;
++ assigned-clock-rates = <12000000>;
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
+index 748df7955ae67..e96ddb2e26e2c 100644
+--- a/arch/arm/boot/dts/bcm-nsp.dtsi
++++ b/arch/arm/boot/dts/bcm-nsp.dtsi
+@@ -77,7 +77,7 @@
+ interrupt-affinity = <&cpu0>, <&cpu1>;
+ };
+
+- mpcore@19000000 {
++ mpcore-bus@19000000 {
+ compatible = "simple-bus";
+ ranges = <0x00000000 0x19000000 0x00023000>;
+ #address-cells = <1>;
+@@ -219,7 +219,7 @@
+ status = "disabled";
+ };
+
+- sdio: sdhci@21000 {
++ sdio: mmc@21000 {
+ compatible = "brcm,sdhci-iproc-cygnus";
+ reg = <0x21000 0x100>;
+ interrupts = <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>;
+diff --git a/arch/arm/boot/dts/bcm2711-rpi-400.dts b/arch/arm/boot/dts/bcm2711-rpi-400.dts
+index f4d2fc20397c7..c53d9eb0b8027 100644
+--- a/arch/arm/boot/dts/bcm2711-rpi-400.dts
++++ b/arch/arm/boot/dts/bcm2711-rpi-400.dts
+@@ -28,12 +28,12 @@
+ &expgpio {
+ gpio-line-names = "BT_ON",
+ "WL_ON",
+- "",
++ "PWR_LED_OFF",
+ "GLOBAL_RESET",
+ "VDD_SD_IO_SEL",
+- "CAM_GPIO",
++ "GLOBAL_SHUTDOWN",
+ "SD_PWR_ON",
+- "SD_OC_N";
++ "SHUTDOWN_REQUEST";
+ };
+
+ &genet_mdio {
+diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
+index 3b60297af7f60..89af57482bc8f 100644
+--- a/arch/arm/boot/dts/bcm2711.dtsi
++++ b/arch/arm/boot/dts/bcm2711.dtsi
+@@ -290,6 +290,7 @@
+
+ hvs: hvs@7e400000 {
+ compatible = "brcm,bcm2711-hvs";
++ reg = <0x7e400000 0x8000>;
+ interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+@@ -458,12 +459,26 @@
+ #size-cells = <0>;
+ enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit
+
++ /* Source for d/i-cache-line-size and d/i-cache-sets
++ * https://developer.arm.com/documentation/100095/0003
++ * /Level-1-Memory-System/About-the-L1-memory-system?lang=en
++ * Source for d/i-cache-size
++ * https://www.raspberrypi.com/documentation/computers
++ * /processors.html#bcm2711
++ */
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a72";
+ reg = <0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000d8>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ i-cache-size = <0xc000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set
++ next-level-cache = <&l2>;
+ };
+
+ cpu1: cpu@1 {
+@@ -472,6 +487,13 @@
+ reg = <1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000e0>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ i-cache-size = <0xc000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set
++ next-level-cache = <&l2>;
+ };
+
+ cpu2: cpu@2 {
+@@ -480,6 +502,13 @@
+ reg = <2>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000e8>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ i-cache-size = <0xc000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set
++ next-level-cache = <&l2>;
+ };
+
+ cpu3: cpu@3 {
+@@ -488,6 +517,28 @@
+ reg = <3>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000f0>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ i-cache-size = <0xc000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set
++ next-level-cache = <&l2>;
++ };
++
++ /* Source for d/i-cache-line-size and d/i-cache-sets
++ * https://developer.arm.com/documentation/100095/0003
++ * /Level-2-Memory-System/About-the-L2-memory-system?lang=en
++ * Source for d/i-cache-size
++ * https://www.raspberrypi.com/documentation/computers
++ * /processors.html#bcm2711
++ */
++ l2: l2-cache0 {
++ compatible = "cache";
++ cache-size = <0x100000>;
++ cache-line-size = <64>;
++ cache-sets = <1024>; // 1MiB(size)/64(line-size)=16384ways/16-way set
++ cache-level = <2>;
+ };
+ };
+
+@@ -506,11 +557,17 @@
+ #address-cells = <3>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+- interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
++ interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "pcie", "msi";
+ interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+ interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143
++ IRQ_TYPE_LEVEL_HIGH>,
++ <0 0 0 2 &gicv2 GIC_SPI 144
++ IRQ_TYPE_LEVEL_HIGH>,
++ <0 0 0 3 &gicv2 GIC_SPI 145
++ IRQ_TYPE_LEVEL_HIGH>,
++ <0 0 0 4 &gicv2 GIC_SPI 146
+ IRQ_TYPE_LEVEL_HIGH>;
+ msi-controller;
+ msi-parent = <&pcie0>;
+@@ -576,6 +633,8 @@
+ <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+
++ gpio-ranges = <&gpio 0 0 58>;
++
+ gpclk0_gpio49: gpclk0_gpio49 {
+ pin-gpclk {
+ pins = "gpio49";
+diff --git a/arch/arm/boot/dts/bcm2835-rpi-b.dts b/arch/arm/boot/dts/bcm2835-rpi-b.dts
+index 1b63d6b19750b..25d87212cefd3 100644
+--- a/arch/arm/boot/dts/bcm2835-rpi-b.dts
++++ b/arch/arm/boot/dts/bcm2835-rpi-b.dts
+@@ -53,18 +53,17 @@
+ "GPIO18",
+ "NC", /* GPIO19 */
+ "NC", /* GPIO20 */
+- "GPIO21",
++ "CAM_GPIO0",
+ "GPIO22",
+ "GPIO23",
+ "GPIO24",
+ "GPIO25",
+ "NC", /* GPIO26 */
+- "CAM_GPIO0",
+- /* Binary number representing build/revision */
+- "CONFIG0",
+- "CONFIG1",
+- "CONFIG2",
+- "CONFIG3",
++ "GPIO27",
++ "GPIO28",
++ "GPIO29",
++ "GPIO30",
++ "GPIO31",
+ "NC", /* GPIO32 */
+ "NC", /* GPIO33 */
+ "NC", /* GPIO34 */
+diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
+index 33b2b77aa47db..00582eb2c12e2 100644
+--- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
++++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
+@@ -74,16 +74,18 @@
+ "GPIO27",
+ "SDA0",
+ "SCL0",
+- "NC", /* GPIO30 */
+- "NC", /* GPIO31 */
+- "NC", /* GPIO32 */
+- "NC", /* GPIO33 */
+- "NC", /* GPIO34 */
+- "NC", /* GPIO35 */
+- "NC", /* GPIO36 */
+- "NC", /* GPIO37 */
+- "NC", /* GPIO38 */
+- "NC", /* GPIO39 */
++ /* Used by BT module */
++ "CTS0",
++ "RTS0",
++ "TXD0",
++ "RXD0",
++ /* Used by Wifi */
++ "SD1_CLK",
++ "SD1_CMD",
++ "SD1_DATA0",
++ "SD1_DATA1",
++ "SD1_DATA2",
++ "SD1_DATA3",
+ "CAM_GPIO1", /* GPIO40 */
+ "WL_ON", /* GPIO41 */
+ "NC", /* GPIO42 */
+diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
+index 61010266ca9a3..90472e76a313e 100644
+--- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
++++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
+@@ -45,7 +45,7 @@
+ #gpio-cells = <2>;
+ gpio-line-names = "BT_ON",
+ "WL_ON",
+- "STATUS_LED_R",
++ "PWR_LED_R",
+ "LAN_RUN",
+ "",
+ "CAM_GPIO0",
+diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts
+index 588d9411ceb61..3dfce4312dfc4 100644
+--- a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts
++++ b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts
+@@ -63,8 +63,8 @@
+ "GPIO43",
+ "GPIO44",
+ "GPIO45",
+- "GPIO46",
+- "GPIO47",
++ "SMPS_SCL",
++ "SMPS_SDA",
+ /* Used by eMMC */
+ "SD_CLK_R",
+ "SD_CMD_R",
+diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi
+index 0199ec98cd616..5dbdebc462594 100644
+--- a/arch/arm/boot/dts/bcm2837.dtsi
++++ b/arch/arm/boot/dts/bcm2837.dtsi
+@@ -40,12 +40,26 @@
+ #size-cells = <0>;
+ enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit
+
++ /* Source for d/i-cache-line-size and d/i-cache-sets
++ * https://developer.arm.com/documentation/ddi0500/e/level-1-memory-system
++ * /about-the-l1-memory-system?lang=en
++ *
++ * Source for d/i-cache-size
++ * https://magpi.raspberrypi.com/articles/raspberry-pi-3-specs-benchmarks
++ */
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000d8>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set
++ i-cache-size = <0x8000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ next-level-cache = <&l2>;
+ };
+
+ cpu1: cpu@1 {
+@@ -54,6 +68,13 @@
+ reg = <1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000e0>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set
++ i-cache-size = <0x8000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ next-level-cache = <&l2>;
+ };
+
+ cpu2: cpu@2 {
+@@ -62,6 +83,13 @@
+ reg = <2>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000e8>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set
++ i-cache-size = <0x8000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ next-level-cache = <&l2>;
+ };
+
+ cpu3: cpu@3 {
+@@ -70,6 +98,27 @@
+ reg = <3>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0x000000f0>;
++ d-cache-size = <0x8000>;
++ d-cache-line-size = <64>;
++ d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set
++ i-cache-size = <0x8000>;
++ i-cache-line-size = <64>;
++ i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set
++ next-level-cache = <&l2>;
++ };
++
++ /* Source for cache-line-size + cache-sets
++ * https://developer.arm.com/documentation/ddi0500
++ * /e/level-2-memory-system/about-the-l2-memory-system?lang=en
++ * Source for cache-size
++ * https://datasheets.raspberrypi.com/cm/cm1-and-cm3-datasheet.pdf
++ */
++ l2: l2-cache0 {
++ compatible = "cache";
++ cache-size = <0x80000>;
++ cache-line-size = <64>;
++ cache-sets = <512>; // 512KiB(size)/64(line-size)=8192ways/16-way set
++ cache-level = <2>;
+ };
+ };
+ };
+diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
+index a3e06b6809476..c113661a6668f 100644
+--- a/arch/arm/boot/dts/bcm283x.dtsi
++++ b/arch/arm/boot/dts/bcm283x.dtsi
+@@ -126,6 +126,8 @@
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
++ gpio-ranges = <&gpio 0 0 54>;
++
+ /* Defines common pin muxing groups
+ *
+ * While each pin can have its mux selected
+diff --git a/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts b/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts
+index cd797b4202ad8..01c48faabfade 100644
+--- a/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts
++++ b/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts
+@@ -19,7 +19,8 @@
+
+ memory@0 {
+ device_type = "memory";
+- reg = <0x00000000 0x08000000>;
++ reg = <0x00000000 0x08000000>,
++ <0x88000000 0x08000000>;
+ };
+
+ gpio-keys {
+diff --git a/arch/arm/boot/dts/bcm4708-netgear-r6250.dts b/arch/arm/boot/dts/bcm4708-netgear-r6250.dts
+index 61c7b137607e5..7900aac4f35a9 100644
+--- a/arch/arm/boot/dts/bcm4708-netgear-r6250.dts
++++ b/arch/arm/boot/dts/bcm4708-netgear-r6250.dts
+@@ -20,7 +20,7 @@
+ bootargs = "console=ttyS0,115200 earlycon";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>,
+ <0x88000000 0x08000000>;
+diff --git a/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts b/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts
+index 6c6bb7b17d27a..7546c8d07bcd7 100644
+--- a/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts
++++ b/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts
+@@ -19,7 +19,7 @@
+ bootargs = "console=ttyS0,115200";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>,
+ <0x88000000 0x08000000>;
+diff --git a/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts b/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts
+index d29e7f80ea6aa..beae9eab9cb8c 100644
+--- a/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts
++++ b/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts
+@@ -19,7 +19,7 @@
+ bootargs = "console=ttyS0,115200";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>,
+ <0x88000000 0x18000000>;
+diff --git a/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts b/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts
+index 9b6887d477d86..7879f7d7d9c33 100644
+--- a/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts
++++ b/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts
+@@ -16,7 +16,7 @@
+ bootargs = "console=ttyS0,115200";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>,
+ <0x88000000 0x08000000>;
+diff --git a/arch/arm/boot/dts/bcm4709-netgear-r7000.dts b/arch/arm/boot/dts/bcm4709-netgear-r7000.dts
+index 7989a53597d4f..56d309dbc6b0d 100644
+--- a/arch/arm/boot/dts/bcm4709-netgear-r7000.dts
++++ b/arch/arm/boot/dts/bcm4709-netgear-r7000.dts
+@@ -19,7 +19,7 @@
+ bootargs = "console=ttyS0,115200";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>,
+ <0x88000000 0x08000000>;
+diff --git a/arch/arm/boot/dts/bcm4709-netgear-r8000.dts b/arch/arm/boot/dts/bcm4709-netgear-r8000.dts
+index 87b655be674c5..184e3039aa864 100644
+--- a/arch/arm/boot/dts/bcm4709-netgear-r8000.dts
++++ b/arch/arm/boot/dts/bcm4709-netgear-r8000.dts
+@@ -30,7 +30,7 @@
+ bootargs = "console=ttyS0,115200";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>,
+ <0x88000000 0x08000000>;
+diff --git a/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts b/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts
+index f806be5da7237..c2a266a439d05 100644
+--- a/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts
++++ b/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts
+@@ -15,7 +15,7 @@
+ bootargs = "console=ttyS0,115200 earlycon";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>;
+ };
+diff --git a/arch/arm/boot/dts/bcm47094-linksys-panamera.dts b/arch/arm/boot/dts/bcm47094-linksys-panamera.dts
+index 05d4f2931772b..9bef6b9bfa8d9 100644
+--- a/arch/arm/boot/dts/bcm47094-linksys-panamera.dts
++++ b/arch/arm/boot/dts/bcm47094-linksys-panamera.dts
+@@ -129,7 +129,7 @@
+ };
+ };
+
+- mdio-bus-mux@18003000 {
++ mdio-mux@18003000 {
+
+ /* BIT(9) = 1 => external mdio */
+ mdio@200 {
+diff --git a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts
+index 452b8d0ab180e..b0d8a688141d3 100644
+--- a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts
++++ b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts
+@@ -16,7 +16,7 @@
+ bootargs = "earlycon";
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>,
+ <0x88000000 0x18000000>;
+diff --git a/arch/arm/boot/dts/bcm47189-luxul-xap-1440.dts b/arch/arm/boot/dts/bcm47189-luxul-xap-1440.dts
+index 57ca1cfaecd8e..00e688b45d981 100644
+--- a/arch/arm/boot/dts/bcm47189-luxul-xap-1440.dts
++++ b/arch/arm/boot/dts/bcm47189-luxul-xap-1440.dts
+@@ -46,3 +46,16 @@
+ };
+ };
+ };
++
++&gmac0 {
++ phy-mode = "rgmii";
++ phy-handle = <&bcm54210e>;
++
++ mdio {
++ /delete-node/ switch@1e;
++
++ bcm54210e: ethernet-phy@0 {
++ reg = <0>;
++ };
++ };
++};
+diff --git a/arch/arm/boot/dts/bcm47189-luxul-xap-810.dts b/arch/arm/boot/dts/bcm47189-luxul-xap-810.dts
+index 2e1a7e382cb7a..78c80a5d3f4fa 100644
+--- a/arch/arm/boot/dts/bcm47189-luxul-xap-810.dts
++++ b/arch/arm/boot/dts/bcm47189-luxul-xap-810.dts
+@@ -83,3 +83,16 @@
+ };
+ };
+ };
++
++&gmac0 {
++ phy-mode = "rgmii";
++ phy-handle = <&bcm54210e>;
++
++ mdio {
++ /delete-node/ switch@1e;
++
++ bcm54210e: ethernet-phy@0 {
++ reg = <0>;
++ };
++ };
++};
+diff --git a/arch/arm/boot/dts/bcm53015-meraki-mr26.dts b/arch/arm/boot/dts/bcm53015-meraki-mr26.dts
+new file mode 100644
+index 0000000000000..ca2266b936ee2
+--- /dev/null
++++ b/arch/arm/boot/dts/bcm53015-meraki-mr26.dts
+@@ -0,0 +1,166 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++/*
++ * Broadcom BCM470X / BCM5301X ARM platform code.
++ * DTS for Meraki MR26 / Codename: Venom
++ *
++ * Copyright (C) 2022 Christian Lamparter <chunkeey@gmail.com>
++ */
++
++/dts-v1/;
++
++#include "bcm4708.dtsi"
++#include "bcm5301x-nand-cs0-bch8.dtsi"
++#include <dt-bindings/leds/common.h>
++
++/ {
++ compatible = "meraki,mr26", "brcm,bcm53015", "brcm,bcm4708";
++ model = "Meraki MR26";
++
++ memory@0 {
++ reg = <0x00000000 0x08000000>;
++ device_type = "memory";
++ };
++
++ leds {
++ compatible = "gpio-leds";
++
++ led-0 {
++ function = LED_FUNCTION_FAULT;
++ color = <LED_COLOR_ID_AMBER>;
++ gpios = <&chipcommon 13 GPIO_ACTIVE_HIGH>;
++ panic-indicator;
++ };
++ led-1 {
++ function = LED_FUNCTION_INDICATOR;
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&chipcommon 12 GPIO_ACTIVE_HIGH>;
++ };
++ };
++
++ keys {
++ compatible = "gpio-keys";
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ key-restart {
++ label = "Reset";
++ linux,code = <KEY_RESTART>;
++ gpios = <&chipcommon 11 GPIO_ACTIVE_LOW>;
++ };
++ };
++};
++
++&uart0 {
++ clock-frequency = <50000000>;
++ /delete-property/ clocks;
++};
++
++&uart1 {
++ status = "disabled";
++};
++
++&gmac0 {
++ status = "okay";
++};
++
++&gmac1 {
++ status = "disabled";
++};
++&gmac2 {
++ status = "disabled";
++};
++&gmac3 {
++ status = "disabled";
++};
++
++&nandcs {
++ nand-ecc-algo = "hw";
++
++ partitions {
++ compatible = "fixed-partitions";
++ #address-cells = <0x1>;
++ #size-cells = <0x1>;
++
++ partition@0 {
++ label = "u-boot";
++ reg = <0x0 0x200000>;
++ read-only;
++ };
++
++ partition@200000 {
++ label = "u-boot-env";
++ reg = <0x200000 0x200000>;
++ /* empty */
++ };
++
++ partition@400000 {
++ label = "u-boot-backup";
++ reg = <0x400000 0x200000>;
++ /* empty */
++ };
++
++ partition@600000 {
++ label = "u-boot-env-backup";
++ reg = <0x600000 0x200000>;
++ /* empty */
++ };
++
++ partition@800000 {
++ label = "ubi";
++ reg = <0x800000 0x7780000>;
++ };
++ };
++};
++
++&srab {
++ status = "okay";
++
++ ports {
++ port@0 {
++ reg = <0>;
++ label = "poe";
++ };
++
++ port@5 {
++ reg = <5>;
++ label = "cpu";
++ ethernet = <&gmac0>;
++
++ fixed-link {
++ speed = <1000>;
++ full-duplex;
++ };
++ };
++ };
++};
++
++&i2c0 {
++ status = "okay";
++
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinmux_i2c>;
++
++ clock-frequency = <100000>;
++
++ ina219@40 {
++ compatible = "ti,ina219"; /* PoE power */
++ reg = <0x40>;
++ shunt-resistor = <60000>; /* = 60 mOhms */
++ };
++
++ eeprom@56 {
++ compatible = "atmel,24c64";
++ reg = <0x56>;
++ pagesize = <32>;
++ read-only;
++ #address-cells = <1>;
++ #size-cells = <1>;
++
++ /* it's empty */
++ };
++};
++
++&thermal {
++ status = "disabled";
++ /* does not work, reads 418 degree Celsius */
++};
+diff --git a/arch/arm/boot/dts/bcm53016-meraki-mr32.dts b/arch/arm/boot/dts/bcm53016-meraki-mr32.dts
+index 3b978dc8997a4..edf9910100b02 100644
+--- a/arch/arm/boot/dts/bcm53016-meraki-mr32.dts
++++ b/arch/arm/boot/dts/bcm53016-meraki-mr32.dts
+@@ -20,7 +20,7 @@
+ bootargs = " console=ttyS0,115200n8 earlycon";
+ };
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x08000000>;
+ device_type = "memory";
+ };
+@@ -195,3 +195,25 @@
+ };
+ };
+ };
++
++&srab {
++ status = "okay";
++
++ ports {
++ port@0 {
++ reg = <0>;
++ label = "poe";
++ };
++
++ port@5 {
++ reg = <5>;
++ label = "cpu";
++ ethernet = <&gmac0>;
++
++ fixed-link {
++ speed = <1000>;
++ full-duplex;
++ };
++ };
++ };
++};
+diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
+index f92089290ccd5..b4b73ab996264 100644
+--- a/arch/arm/boot/dts/bcm5301x.dtsi
++++ b/arch/arm/boot/dts/bcm5301x.dtsi
+@@ -19,7 +19,7 @@
+ #size-cells = <1>;
+ interrupt-parent = <&gic>;
+
+- chipcommonA@18000000 {
++ chipcommon-a-bus@18000000 {
+ compatible = "simple-bus";
+ ranges = <0x00000000 0x18000000 0x00001000>;
+ #address-cells = <1>;
+@@ -44,7 +44,7 @@
+ };
+ };
+
+- mpcore@19000000 {
++ mpcore-bus@19000000 {
+ compatible = "simple-bus";
+ ranges = <0x00000000 0x19000000 0x00023000>;
+ #address-cells = <1>;
+@@ -242,6 +242,8 @@
+
+ gpio-controller;
+ #gpio-cells = <2>;
++ interrupt-controller;
++ #interrupt-cells = <2>;
+ };
+
+ pcie0: pcie@12000 {
+@@ -369,8 +371,8 @@
+ #address-cells = <1>;
+ };
+
+- mdio-bus-mux@18003000 {
+- compatible = "mdio-mux-mmioreg";
++ mdio-mux@18003000 {
++ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+@@ -408,27 +410,27 @@
+ i2c0: i2c@18009000 {
+ compatible = "brcm,iproc-i2c";
+ reg = <0x18009000 0x50>;
+- interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
++ interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+- dmu@1800c000 {
++ dmu-bus@1800c000 {
+ compatible = "simple-bus";
+ ranges = <0 0x1800c000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+- cru@100 {
+- compatible = "simple-bus";
++ cru-bus@100 {
++ compatible = "brcm,ns-cru", "simple-mfd";
+ reg = <0x100 0x1a4>;
+ ranges;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+- lcpll0: lcpll0@100 {
++ lcpll0: clock-controller@100 {
+ #clock-cells = <1>;
+ compatible = "brcm,nsp-lcpll0";
+ reg = <0x100 0x14>;
+@@ -437,7 +439,7 @@
+ "sdio", "ddr_phy";
+ };
+
+- genpll: genpll@140 {
++ genpll: clock-controller@140 {
+ #clock-cells = <1>;
+ compatible = "brcm,nsp-genpll";
+ reg = <0x140 0x24>;
+@@ -448,7 +450,12 @@
+ "sata1", "sata2";
+ };
+
+- pinctrl: pin-controller@1c0 {
++ syscon@180 {
++ compatible = "brcm,cru-clkset", "syscon";
++ reg = <0x180 0x4>;
++ };
++
++ pinctrl: pinctrl@1c0 {
+ compatible = "brcm,bcm4708-pinmux";
+ reg = <0x1c0 0x24>;
+ reg-names = "cru_gpio_control";
+@@ -535,7 +542,6 @@
+ "spi_lr_session_done",
+ "spi_lr_overread";
+ clocks = <&iprocmed>;
+- clock-names = "iprocmed";
+ num-cs = <2>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm/boot/dts/bcm53573.dtsi b/arch/arm/boot/dts/bcm53573.dtsi
+index 51546fccc6168..933b6a380c367 100644
+--- a/arch/arm/boot/dts/bcm53573.dtsi
++++ b/arch/arm/boot/dts/bcm53573.dtsi
+@@ -127,6 +127,9 @@
+
+ pcie0: pcie@2000 {
+ reg = <0x00002000 0x1000>;
++
++ #address-cells = <3>;
++ #size-cells = <2>;
+ };
+
+ usb2: usb2@4000 {
+diff --git a/arch/arm/boot/dts/bcm94708.dts b/arch/arm/boot/dts/bcm94708.dts
+index 3d13e46c69494..d9eb2040b9631 100644
+--- a/arch/arm/boot/dts/bcm94708.dts
++++ b/arch/arm/boot/dts/bcm94708.dts
+@@ -38,7 +38,7 @@
+ model = "NorthStar SVK (BCM94708)";
+ compatible = "brcm,bcm94708", "brcm,bcm4708";
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>;
+ };
+diff --git a/arch/arm/boot/dts/bcm94709.dts b/arch/arm/boot/dts/bcm94709.dts
+index 5017b7b259cbe..618c812eef73e 100644
+--- a/arch/arm/boot/dts/bcm94709.dts
++++ b/arch/arm/boot/dts/bcm94709.dts
+@@ -38,7 +38,7 @@
+ model = "NorthStar SVK (BCM94709)";
+ compatible = "brcm,bcm94709", "brcm,bcm4709", "brcm,bcm4708";
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>;
+ };
+diff --git a/arch/arm/boot/dts/bcm947189acdbmr.dts b/arch/arm/boot/dts/bcm947189acdbmr.dts
+index b0b8c774a37f9..1f0be30e54435 100644
+--- a/arch/arm/boot/dts/bcm947189acdbmr.dts
++++ b/arch/arm/boot/dts/bcm947189acdbmr.dts
+@@ -60,9 +60,9 @@
+ spi {
+ compatible = "spi-gpio";
+ num-chipselects = <1>;
+- gpio-sck = <&chipcommon 21 0>;
+- gpio-miso = <&chipcommon 22 0>;
+- gpio-mosi = <&chipcommon 23 0>;
++ sck-gpios = <&chipcommon 21 0>;
++ miso-gpios = <&chipcommon 22 0>;
++ mosi-gpios = <&chipcommon 23 0>;
+ cs-gpios = <&chipcommon 24 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi
+index 89e0bdaf3a85f..726d353eda686 100644
+--- a/arch/arm/boot/dts/dove.dtsi
++++ b/arch/arm/boot/dts/dove.dtsi
+@@ -129,7 +129,7 @@
+ pcie1: pcie@2 {
+ device_type = "pci";
+ status = "disabled";
+- assigned-addresses = <0x82002800 0 0x80000 0 0x2000>;
++ assigned-addresses = <0x82001000 0 0x80000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ clocks = <&gate_clk 5>;
+ marvell,pcie-port = <1>;
+diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
+index 956a26d52a4c3..5733e3a4ea8e7 100644
+--- a/arch/arm/boot/dts/dra7-l4.dtsi
++++ b/arch/arm/boot/dts/dra7-l4.dtsi
+@@ -3482,8 +3482,7 @@
+ ti,timer-pwm;
+ };
+ };
+-
+- target-module@2c000 { /* 0x4882c000, ap 17 02.0 */
++ timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */
+ compatible = "ti,sysc-omap4-timer", "ti,sysc";
+ reg = <0x2c000 0x4>,
+ <0x2c010 0x4>;
+@@ -3511,7 +3510,7 @@
+ };
+ };
+
+- target-module@2e000 { /* 0x4882e000, ap 19 14.0 */
++ timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */
+ compatible = "ti,sysc-omap4-timer", "ti,sysc";
+ reg = <0x2e000 0x4>,
+ <0x2e010 0x4>;
+@@ -4189,11 +4188,11 @@
+ reg = <0x1d0010 0x4>;
+ reg-names = "sysc";
+ ti,sysc-midle = <SYSC_IDLE_FORCE>,
+- <SYSC_IDLE_NO>,
+- <SYSC_IDLE_SMART>;
++ <SYSC_IDLE_NO>;
+ ti,sysc-sidle = <SYSC_IDLE_FORCE>,
+ <SYSC_IDLE_NO>,
+ <SYSC_IDLE_SMART>;
++ power-domains = <&prm_vpe>;
+ clocks = <&vpe_clkctrl DRA7_VPE_VPE_CLKCTRL 0>;
+ clock-names = "fck";
+ #address-cells = <1>;
+diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
+index dfc1ef8ef6aea..61a3fb3e2a2f9 100644
+--- a/arch/arm/boot/dts/dra7.dtsi
++++ b/arch/arm/boot/dts/dra7.dtsi
+@@ -1320,20 +1320,20 @@
+ };
+
+ /* Local timers, see ARM architected timer wrap erratum i940 */
+-&timer3_target {
++&timer15_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+- assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>;
++ assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
+ };
+ };
+
+-&timer4_target {
++&timer16_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+- assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>;
++ assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
+ };
+ };
+diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi
+index cfb239d5186ac..54b4de6a5925d 100644
+--- a/arch/arm/boot/dts/e60k02.dtsi
++++ b/arch/arm/boot/dts/e60k02.dtsi
+@@ -302,6 +302,7 @@
+
+ &usbotg1 {
+ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_usbotg1>;
+ disable-over-current;
+ srp-disable;
+ hnp-disable;
+diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts
+index f6ba5e4260404..7562497c45dd8 100644
+--- a/arch/arm/boot/dts/exynos3250-rinato.dts
++++ b/arch/arm/boot/dts/exynos3250-rinato.dts
+@@ -249,7 +249,7 @@
+ i80-if-timings {
+ cs-setup = <0>;
+ wr-setup = <0>;
+- wr-act = <1>;
++ wr-active = <1>;
+ wr-hold = <0>;
+ };
+ };
+diff --git a/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
+index 021d9fc1b4923..27a1a89526655 100644
+--- a/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
++++ b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
+@@ -10,7 +10,7 @@
+ / {
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+- thermal-sensors = <&tmu 0>;
++ thermal-sensors = <&tmu>;
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ trips {
+diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
+index eab77a66ae8f2..201e2fe7ed0cc 100644
+--- a/arch/arm/boot/dts/exynos4.dtsi
++++ b/arch/arm/boot/dts/exynos4.dtsi
+@@ -605,7 +605,7 @@
+ status = "disabled";
+
+ hdmi_i2c_phy: hdmiphy@38 {
+- compatible = "exynos4210-hdmiphy";
++ compatible = "samsung,exynos4210-hdmiphy";
+ reg = <0x38>;
+ };
+ };
+diff --git a/arch/arm/boot/dts/exynos4210-i9100.dts b/arch/arm/boot/dts/exynos4210-i9100.dts
+index 55922176807e6..93880bdbcad98 100644
+--- a/arch/arm/boot/dts/exynos4210-i9100.dts
++++ b/arch/arm/boot/dts/exynos4210-i9100.dts
+@@ -200,8 +200,8 @@
+ power-on-delay = <10>;
+ reset-delay = <10>;
+
+- panel-width-mm = <90>;
+- panel-height-mm = <154>;
++ panel-width-mm = <56>;
++ panel-height-mm = <93>;
+
+ display-timings {
+ timing {
+@@ -827,7 +827,7 @@
+ compatible = "brcm,bcm4330-bt";
+
+ shutdown-gpios = <&gpl0 4 GPIO_ACTIVE_HIGH>;
+- reset-gpios = <&gpl1 0 GPIO_ACTIVE_HIGH>;
++ reset-gpios = <&gpl1 0 GPIO_ACTIVE_LOW>;
+ device-wakeup-gpios = <&gpx3 1 GPIO_ACTIVE_HIGH>;
+ host-wakeup-gpios = <&gpx2 6 GPIO_ACTIVE_HIGH>;
+ };
+diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
+index 7e7d65ce6585f..ac62d8dc70b19 100644
+--- a/arch/arm/boot/dts/exynos4210.dtsi
++++ b/arch/arm/boot/dts/exynos4210.dtsi
+@@ -393,7 +393,6 @@
+ &cpu_thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+- thermal-sensors = <&tmu 0>;
+ };
+
+ &gic {
+diff --git a/arch/arm/boot/dts/exynos4412-itop-elite.dts b/arch/arm/boot/dts/exynos4412-itop-elite.dts
+index 47431307cb3cf..fbfc04f9a04cc 100644
+--- a/arch/arm/boot/dts/exynos4412-itop-elite.dts
++++ b/arch/arm/boot/dts/exynos4412-itop-elite.dts
+@@ -179,7 +179,7 @@
+ compatible = "wlf,wm8960";
+ reg = <0x1a>;
+ clocks = <&pmu_system_controller 0>;
+- clock-names = "MCLK1";
++ clock-names = "mclk";
+ wlf,shared-lrclk;
+ #sound-dai-cells = <0>;
+ };
+diff --git a/arch/arm/boot/dts/exynos4412-midas.dtsi b/arch/arm/boot/dts/exynos4412-midas.dtsi
+index 968c7943653e2..49843e016828e 100644
+--- a/arch/arm/boot/dts/exynos4412-midas.dtsi
++++ b/arch/arm/boot/dts/exynos4412-midas.dtsi
+@@ -585,7 +585,7 @@
+ clocks = <&camera 1>;
+ clock-names = "extclk";
+ samsung,camclk-out = <1>;
+- gpios = <&gpm1 6 GPIO_ACTIVE_HIGH>;
++ gpios = <&gpm1 6 GPIO_ACTIVE_LOW>;
+
+ port {
+ is_s5k6a3_ep: endpoint {
+diff --git a/arch/arm/boot/dts/exynos4412-origen.dts b/arch/arm/boot/dts/exynos4412-origen.dts
+index 5479ef09f9f36..0acb05f0a2b7c 100644
+--- a/arch/arm/boot/dts/exynos4412-origen.dts
++++ b/arch/arm/boot/dts/exynos4412-origen.dts
+@@ -95,7 +95,7 @@
+ };
+
+ &ehci {
+- samsung,vbus-gpio = <&gpx3 5 1>;
++ samsung,vbus-gpio = <&gpx3 5 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+ phys = <&exynos_usbphy 2>, <&exynos_usbphy 3>;
+ phy-names = "hsic0", "hsic1";
+diff --git a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi
+index d31a68672bfac..d7d756614edd1 100644
+--- a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi
++++ b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi
+@@ -260,7 +260,7 @@
+ };
+
+ uart3_data: uart3-data {
+- samsung,pins = "gpa1-4", "gpa1-4";
++ samsung,pins = "gpa1-4", "gpa1-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+ samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
+diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts
+index 39bbe18145cf2..e4861415a0fe5 100644
+--- a/arch/arm/boot/dts/exynos5250-smdk5250.dts
++++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts
+@@ -118,6 +118,9 @@
+ status = "okay";
+ ddc = <&i2c_2>;
+ hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>;
++ vdd-supply = <&ldo8_reg>;
++ vdd_osc-supply = <&ldo10_reg>;
++ vdd_pll-supply = <&ldo8_reg>;
+ };
+
+ &i2c_0 {
+@@ -126,7 +129,7 @@
+ samsung,i2c-max-bus-freq = <20000>;
+
+ eeprom@50 {
+- compatible = "samsung,s524ad0xd1";
++ compatible = "samsung,s524ad0xd1", "atmel,24c128";
+ reg = <0x50>;
+ };
+
+@@ -286,7 +289,7 @@
+ samsung,i2c-max-bus-freq = <20000>;
+
+ eeprom@51 {
+- compatible = "samsung,s524ad0xd1";
++ compatible = "samsung,s524ad0xd1", "atmel,24c128";
+ reg = <0x51>;
+ };
+
+diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
+index 4ffa9253b566c..de0275df807fb 100644
+--- a/arch/arm/boot/dts/exynos5250.dtsi
++++ b/arch/arm/boot/dts/exynos5250.dtsi
+@@ -1119,7 +1119,7 @@
+ &cpu_thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+- thermal-sensors = <&tmu 0>;
++ thermal-sensors = <&tmu>;
+
+ cooling-maps {
+ map0 {
+diff --git a/arch/arm/boot/dts/exynos5410-odroidxu.dts b/arch/arm/boot/dts/exynos5410-odroidxu.dts
+index 884fef55836cf..3765f5ba03f25 100644
+--- a/arch/arm/boot/dts/exynos5410-odroidxu.dts
++++ b/arch/arm/boot/dts/exynos5410-odroidxu.dts
+@@ -120,7 +120,6 @@
+ };
+
+ &cpu0_thermal {
+- thermal-sensors = <&tmu_cpu0 0>;
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+diff --git a/arch/arm/boot/dts/exynos5420-smdk5420.dts b/arch/arm/boot/dts/exynos5420-smdk5420.dts
+index a4f0e3ffedbd3..07f65213aae65 100644
+--- a/arch/arm/boot/dts/exynos5420-smdk5420.dts
++++ b/arch/arm/boot/dts/exynos5420-smdk5420.dts
+@@ -124,6 +124,9 @@
+ hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_hpd_irq>;
++ vdd-supply = <&ldo6_reg>;
++ vdd_osc-supply = <&ldo7_reg>;
++ vdd_pll-supply = <&ldo6_reg>;
+ };
+
+ &hsi2c_4 {
+diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
+index e23e8ffb093fa..4fb4804830afe 100644
+--- a/arch/arm/boot/dts/exynos5420.dtsi
++++ b/arch/arm/boot/dts/exynos5420.dtsi
+@@ -602,7 +602,7 @@
+ };
+
+ mipi_phy: mipi-video-phy {
+- compatible = "samsung,s5pv210-mipi-video-phy";
++ compatible = "samsung,exynos5420-mipi-video-phy";
+ syscon = <&pmu_system_controller>;
+ #phy-cells = <1>;
+ };
+diff --git a/arch/arm/boot/dts/exynos5422-odroidhc1.dts b/arch/arm/boot/dts/exynos5422-odroidhc1.dts
+index d91f7fa2cf808..e57d3e464434f 100644
+--- a/arch/arm/boot/dts/exynos5422-odroidhc1.dts
++++ b/arch/arm/boot/dts/exynos5422-odroidhc1.dts
+@@ -29,7 +29,7 @@
+
+ thermal-zones {
+ cpu0_thermal: cpu0-thermal {
+- thermal-sensors = <&tmu_cpu0 0>;
++ thermal-sensors = <&tmu_cpu0>;
+ trips {
+ cpu0_alert0: cpu-alert-0 {
+ temperature = <70000>; /* millicelsius */
+@@ -84,7 +84,7 @@
+ };
+ };
+ cpu1_thermal: cpu1-thermal {
+- thermal-sensors = <&tmu_cpu1 0>;
++ thermal-sensors = <&tmu_cpu1>;
+ trips {
+ cpu1_alert0: cpu-alert-0 {
+ temperature = <70000>;
+@@ -128,7 +128,7 @@
+ };
+ };
+ cpu2_thermal: cpu2-thermal {
+- thermal-sensors = <&tmu_cpu2 0>;
++ thermal-sensors = <&tmu_cpu2>;
+ trips {
+ cpu2_alert0: cpu-alert-0 {
+ temperature = <70000>;
+@@ -172,7 +172,7 @@
+ };
+ };
+ cpu3_thermal: cpu3-thermal {
+- thermal-sensors = <&tmu_cpu3 0>;
++ thermal-sensors = <&tmu_cpu3>;
+ trips {
+ cpu3_alert0: cpu-alert-0 {
+ temperature = <70000>;
+@@ -216,7 +216,7 @@
+ };
+ };
+ gpu_thermal: gpu-thermal {
+- thermal-sensors = <&tmu_gpu 0>;
++ thermal-sensors = <&tmu_gpu>;
+ trips {
+ gpu_alert0: gpu-alert-0 {
+ temperature = <70000>;
+diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
+index e35af40a55cb8..0b27e968c6fd2 100644
+--- a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
++++ b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
+@@ -50,7 +50,7 @@
+
+ thermal-zones {
+ cpu0_thermal: cpu0-thermal {
+- thermal-sensors = <&tmu_cpu0 0>;
++ thermal-sensors = <&tmu_cpu0>;
+ polling-delay-passive = <250>;
+ polling-delay = <0>;
+ trips {
+@@ -139,7 +139,7 @@
+ };
+ };
+ cpu1_thermal: cpu1-thermal {
+- thermal-sensors = <&tmu_cpu1 0>;
++ thermal-sensors = <&tmu_cpu1>;
+ polling-delay-passive = <250>;
+ polling-delay = <0>;
+ trips {
+@@ -212,7 +212,7 @@
+ };
+ };
+ cpu2_thermal: cpu2-thermal {
+- thermal-sensors = <&tmu_cpu2 0>;
++ thermal-sensors = <&tmu_cpu2>;
+ polling-delay-passive = <250>;
+ polling-delay = <0>;
+ trips {
+@@ -285,7 +285,7 @@
+ };
+ };
+ cpu3_thermal: cpu3-thermal {
+- thermal-sensors = <&tmu_cpu3 0>;
++ thermal-sensors = <&tmu_cpu3>;
+ polling-delay-passive = <250>;
+ polling-delay = <0>;
+ trips {
+@@ -358,7 +358,7 @@
+ };
+ };
+ gpu_thermal: gpu-thermal {
+- thermal-sensors = <&tmu_gpu 0>;
++ thermal-sensors = <&tmu_gpu>;
+ polling-delay-passive = <250>;
+ polling-delay = <0>;
+ trips {
+diff --git a/arch/arm/boot/dts/gemini-nas4220b.dts b/arch/arm/boot/dts/gemini-nas4220b.dts
+index 13112a8a5dd88..6544c730340fa 100644
+--- a/arch/arm/boot/dts/gemini-nas4220b.dts
++++ b/arch/arm/boot/dts/gemini-nas4220b.dts
+@@ -84,7 +84,7 @@
+ partitions {
+ compatible = "redboot-fis";
+ /* Eraseblock at 0xfe0000 */
+- fis-index-block = <0x1fc>;
++ fis-index-block = <0x7f>;
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts
+index 8cbaf1c811745..3b609d987d883 100644
+--- a/arch/arm/boot/dts/imx23-evk.dts
++++ b/arch/arm/boot/dts/imx23-evk.dts
+@@ -79,7 +79,6 @@
+ MX23_PAD_LCD_RESET__GPIO_1_18
+ MX23_PAD_PWM3__GPIO_1_29
+ MX23_PAD_PWM4__GPIO_1_30
+- MX23_PAD_SSP1_DETECT__SSP1_DETECT
+ >;
+ fsl,drive-strength = <MXS_DRIVE_4mA>;
+ fsl,voltage = <MXS_VOLTAGE_HIGH>;
+diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
+index 7f4c602454a5f..ce3d6360a7efb 100644
+--- a/arch/arm/boot/dts/imx23.dtsi
++++ b/arch/arm/boot/dts/imx23.dtsi
+@@ -59,7 +59,7 @@
+ reg = <0x80000000 0x2000>;
+ };
+
+- dma_apbh: dma-apbh@80004000 {
++ dma_apbh: dma-controller@80004000 {
+ compatible = "fsl,imx23-dma-apbh";
+ reg = <0x80004000 0x2000>;
+ interrupts = <0 14 20 0
+diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
+index fdcca82c9986f..bd8ea2ec24575 100644
+--- a/arch/arm/boot/dts/imx25.dtsi
++++ b/arch/arm/boot/dts/imx25.dtsi
+@@ -515,7 +515,7 @@
+ #interrupt-cells = <2>;
+ };
+
+- sdma: sdma@53fd4000 {
++ sdma: dma-controller@53fd4000 {
+ compatible = "fsl,imx25-sdma";
+ reg = <0x53fd4000 0x4000>;
+ clocks = <&clks 112>, <&clks 68>;
+diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts
+index 7e2b0f198dfad..1053b7c584d81 100644
+--- a/arch/arm/boot/dts/imx28-evk.dts
++++ b/arch/arm/boot/dts/imx28-evk.dts
+@@ -129,7 +129,7 @@
+ pinctrl-0 = <&spi2_pins_a>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "sst,sst25vf016b", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx28-m28evk.dts b/arch/arm/boot/dts/imx28-m28evk.dts
+index f3bddc5ada4b8..13acdc7916b9b 100644
+--- a/arch/arm/boot/dts/imx28-m28evk.dts
++++ b/arch/arm/boot/dts/imx28-m28evk.dts
+@@ -33,7 +33,7 @@
+ pinctrl-0 = <&spi2_pins_a>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "m25p80", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx28-sps1.dts b/arch/arm/boot/dts/imx28-sps1.dts
+index 43be7a6a769bc..90928db0df701 100644
+--- a/arch/arm/boot/dts/imx28-sps1.dts
++++ b/arch/arm/boot/dts/imx28-sps1.dts
+@@ -51,7 +51,7 @@
+ pinctrl-0 = <&spi2_pins_a>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "everspin,mr25h256", "mr25h256";
+diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
+index 84d0176d51933..10eab221bc053 100644
+--- a/arch/arm/boot/dts/imx28.dtsi
++++ b/arch/arm/boot/dts/imx28.dtsi
+@@ -78,7 +78,7 @@
+ status = "disabled";
+ };
+
+- dma_apbh: dma-apbh@80004000 {
++ dma_apbh: dma-controller@80004000 {
+ compatible = "fsl,imx28-dma-apbh";
+ reg = <0x80004000 0x2000>;
+ interrupts = <82 83 84 85
+diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi
+index 948d2a543f8d1..c85866e73a7b9 100644
+--- a/arch/arm/boot/dts/imx31.dtsi
++++ b/arch/arm/boot/dts/imx31.dtsi
+@@ -297,7 +297,7 @@
+ #interrupt-cells = <2>;
+ };
+
+- sdma: sdma@53fd4000 {
++ sdma: dma-controller@53fd4000 {
+ compatible = "fsl,imx31-sdma";
+ reg = <0x53fd4000 0x4000>;
+ interrupts = <34>;
+diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi
+index 8e41c8b7bd705..d650f54c3fc6b 100644
+--- a/arch/arm/boot/dts/imx35.dtsi
++++ b/arch/arm/boot/dts/imx35.dtsi
+@@ -284,7 +284,7 @@
+ #interrupt-cells = <2>;
+ };
+
+- sdma: sdma@53fd4000 {
++ sdma: dma-controller@53fd4000 {
+ compatible = "fsl,imx35-sdma";
+ reg = <0x53fd4000 0x4000>;
+ clocks = <&clks 9>, <&clks 65>;
+diff --git a/arch/arm/boot/dts/imx50.dtsi b/arch/arm/boot/dts/imx50.dtsi
+index a969f335b2402..2560f8514ebed 100644
+--- a/arch/arm/boot/dts/imx50.dtsi
++++ b/arch/arm/boot/dts/imx50.dtsi
+@@ -421,7 +421,7 @@
+ status = "disabled";
+ };
+
+- sdma: sdma@63fb0000 {
++ sdma: dma-controller@63fb0000 {
+ compatible = "fsl,imx50-sdma", "fsl,imx35-sdma";
+ reg = <0x63fb0000 0x4000>;
+ interrupts = <6>;
+diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi
+index 01cfcbe5928e8..b3ab0c000d9d1 100644
+--- a/arch/arm/boot/dts/imx51.dtsi
++++ b/arch/arm/boot/dts/imx51.dtsi
+@@ -498,7 +498,7 @@
+ status = "disabled";
+ };
+
+- sdma: sdma@83fb0000 {
++ sdma: dma-controller@83fb0000 {
+ compatible = "fsl,imx51-sdma", "fsl,imx35-sdma";
+ reg = <0x83fb0000 0x4000>;
+ interrupts = <6>;
+diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts
+index 4f88e96d81ddb..d5c68d1ea707c 100644
+--- a/arch/arm/boot/dts/imx53-m53menlo.dts
++++ b/arch/arm/boot/dts/imx53-m53menlo.dts
+@@ -53,6 +53,31 @@
+ };
+ };
+
++ lvds-decoder {
++ compatible = "ti,ds90cf364a", "lvds-decoder";
++
++ ports {
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ port@0 {
++ reg = <0>;
++
++ lvds_decoder_in: endpoint {
++ remote-endpoint = <&lvds0_out>;
++ };
++ };
++
++ port@1 {
++ reg = <1>;
++
++ lvds_decoder_out: endpoint {
++ remote-endpoint = <&panel_in>;
++ };
++ };
++ };
++ };
++
+ panel {
+ compatible = "edt,etm0700g0dh6";
+ pinctrl-0 = <&pinctrl_display_gpio>;
+@@ -61,7 +86,7 @@
+
+ port {
+ panel_in: endpoint {
+- remote-endpoint = <&lvds0_out>;
++ remote-endpoint = <&lvds_decoder_out>;
+ };
+ };
+ };
+@@ -450,7 +475,7 @@
+ reg = <2>;
+
+ lvds0_out: endpoint {
+- remote-endpoint = <&panel_in>;
++ remote-endpoint = <&lvds_decoder_in>;
+ };
+ };
+ };
+diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts
+index 37d0cffea99c5..70c4a4852256c 100644
+--- a/arch/arm/boot/dts/imx53-ppd.dts
++++ b/arch/arm/boot/dts/imx53-ppd.dts
+@@ -488,7 +488,7 @@
+ scl-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+
+- i2c-switch@70 {
++ i2c-mux@70 {
+ compatible = "nxp,pca9547";
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
+index 2cf3909cca2f8..ca1bea42cc0e2 100644
+--- a/arch/arm/boot/dts/imx53.dtsi
++++ b/arch/arm/boot/dts/imx53.dtsi
+@@ -710,7 +710,7 @@
+ status = "disabled";
+ };
+
+- sdma: sdma@63fb0000 {
++ sdma: dma-controller@63fb0000 {
+ compatible = "fsl,imx53-sdma", "fsl,imx35-sdma";
+ reg = <0x63fb0000 0x4000>;
+ interrupts = <6>;
+diff --git a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts b/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts
+index b4a9523e325b4..864dc5018451f 100644
+--- a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts
++++ b/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts
+@@ -297,7 +297,11 @@
+ phy-mode = "rmii";
+ phy-reset-gpios = <&gpio1 18 GPIO_ACTIVE_LOW>;
+ phy-handle = <&phy>;
+- clocks = <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET>, <&rmii_clk>;
++ clocks = <&clks IMX6QDL_CLK_ENET>,
++ <&clks IMX6QDL_CLK_ENET>,
++ <&rmii_clk>,
++ <&clks IMX6QDL_CLK_ENET_REF>;
++ clock-names = "ipg", "ahb", "ptp", "enet_out";
+ status = "okay";
+
+ mdio {
+diff --git a/arch/arm/boot/dts/imx6dl-prtrvt.dts b/arch/arm/boot/dts/imx6dl-prtrvt.dts
+index 5ac84445e9cc1..90e01de8c2c15 100644
+--- a/arch/arm/boot/dts/imx6dl-prtrvt.dts
++++ b/arch/arm/boot/dts/imx6dl-prtrvt.dts
+@@ -126,6 +126,10 @@
+ status = "disabled";
+ };
+
++&usbotg {
++ disable-over-current;
++};
++
+ &vpu {
+ status = "disabled";
+ };
+diff --git a/arch/arm/boot/dts/imx6dl-rex-basic.dts b/arch/arm/boot/dts/imx6dl-rex-basic.dts
+index 0f1616bfa9a80..b72f8ea1e6f6c 100644
+--- a/arch/arm/boot/dts/imx6dl-rex-basic.dts
++++ b/arch/arm/boot/dts/imx6dl-rex-basic.dts
+@@ -19,7 +19,7 @@
+ };
+
+ &ecspi3 {
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "sst,sst25vf016b", "jedec,spi-nor";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi
+index fdd81fdc3f357..cd3183c36488a 100644
+--- a/arch/arm/boot/dts/imx6dl.dtsi
++++ b/arch/arm/boot/dts/imx6dl.dtsi
+@@ -84,6 +84,9 @@
+ ocram: sram@900000 {
+ compatible = "mmio-sram";
+ reg = <0x00900000 0x20000>;
++ ranges = <0 0x00900000 0x20000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ clocks = <&clks IMX6QDL_CLK_OCRAM>;
+ };
+
+diff --git a/arch/arm/boot/dts/imx6q-ba16.dtsi b/arch/arm/boot/dts/imx6q-ba16.dtsi
+index 6330d75f8f390..f266f1b7e0cfc 100644
+--- a/arch/arm/boot/dts/imx6q-ba16.dtsi
++++ b/arch/arm/boot/dts/imx6q-ba16.dtsi
+@@ -142,7 +142,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: n25q032@0 {
++ flash: flash@0 {
+ compatible = "jedec,spi-nor";
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi
+index 10922375c51e1..ead83091e193a 100644
+--- a/arch/arm/boot/dts/imx6q-bx50v3.dtsi
++++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi
+@@ -160,7 +160,7 @@
+ pinctrl-0 = <&pinctrl_ecspi5>;
+ status = "okay";
+
+- m25_eeprom: m25p80@0 {
++ m25_eeprom: flash@0 {
+ compatible = "atmel,at25";
+ spi-max-frequency = <10000000>;
+ size = <0x8000>;
+diff --git a/arch/arm/boot/dts/imx6q-cm-fx6.dts b/arch/arm/boot/dts/imx6q-cm-fx6.dts
+index bfb530f29d9de..1ad41c944b4b9 100644
+--- a/arch/arm/boot/dts/imx6q-cm-fx6.dts
++++ b/arch/arm/boot/dts/imx6q-cm-fx6.dts
+@@ -260,7 +260,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- m25p80@0 {
++ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "st,m25p", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6q-dmo-edmqmx6.dts b/arch/arm/boot/dts/imx6q-dmo-edmqmx6.dts
+index c713ac03b3b92..9591848cbd37c 100644
+--- a/arch/arm/boot/dts/imx6q-dmo-edmqmx6.dts
++++ b/arch/arm/boot/dts/imx6q-dmo-edmqmx6.dts
+@@ -102,7 +102,7 @@
+ cs-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "m25p80", "jedec,spi-nor";
+ spi-max-frequency = <40000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6q-dms-ba16.dts b/arch/arm/boot/dts/imx6q-dms-ba16.dts
+index 48fb47e715f6d..137db38f0d27b 100644
+--- a/arch/arm/boot/dts/imx6q-dms-ba16.dts
++++ b/arch/arm/boot/dts/imx6q-dms-ba16.dts
+@@ -47,7 +47,7 @@
+ pinctrl-0 = <&pinctrl_ecspi5>;
+ status = "okay";
+
+- m25_eeprom: m25p80@0 {
++ m25_eeprom: flash@0 {
+ compatible = "atmel,at25256B", "atmel,at25";
+ spi-max-frequency = <20000000>;
+ size = <0x8000>;
+diff --git a/arch/arm/boot/dts/imx6q-gw5400-a.dts b/arch/arm/boot/dts/imx6q-gw5400-a.dts
+index 4cde45d5c90c8..e894faba571f9 100644
+--- a/arch/arm/boot/dts/imx6q-gw5400-a.dts
++++ b/arch/arm/boot/dts/imx6q-gw5400-a.dts
+@@ -137,7 +137,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "sst,w25q256", "jedec,spi-nor";
+ spi-max-frequency = <30000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6q-marsboard.dts b/arch/arm/boot/dts/imx6q-marsboard.dts
+index 05ee283882290..cc18010023942 100644
+--- a/arch/arm/boot/dts/imx6q-marsboard.dts
++++ b/arch/arm/boot/dts/imx6q-marsboard.dts
+@@ -100,7 +100,7 @@
+ cs-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+- m25p80@0 {
++ flash@0 {
+ compatible = "microchip,sst25vf016b";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts
+index b4605edfd2ab8..d8fa83effd638 100644
+--- a/arch/arm/boot/dts/imx6q-prti6q.dts
++++ b/arch/arm/boot/dts/imx6q-prti6q.dts
+@@ -364,8 +364,8 @@
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_wifi>;
+ interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>;
+- ref-clock-frequency = "38400000";
+- tcxo-clock-frequency = "19200000";
++ ref-clock-frequency = <38400000>;
++ tcxo-clock-frequency = <19200000>;
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/imx6q-rex-pro.dts b/arch/arm/boot/dts/imx6q-rex-pro.dts
+index 1767e1a3cd53a..271f4b2d9b9f0 100644
+--- a/arch/arm/boot/dts/imx6q-rex-pro.dts
++++ b/arch/arm/boot/dts/imx6q-rex-pro.dts
+@@ -19,7 +19,7 @@
+ };
+
+ &ecspi3 {
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "sst,sst25vf032b", "jedec,spi-nor";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
+index 9caba4529c718..a8069e0a8fe82 100644
+--- a/arch/arm/boot/dts/imx6q.dtsi
++++ b/arch/arm/boot/dts/imx6q.dtsi
+@@ -163,6 +163,9 @@
+ ocram: sram@900000 {
+ compatible = "mmio-sram";
+ reg = <0x00900000 0x40000>;
++ ranges = <0 0x00900000 0x40000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ clocks = <&clks IMX6QDL_CLK_OCRAM>;
+ };
+
+diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
+index 30fa349f9d054..a696873dc1abe 100644
+--- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
+@@ -286,6 +286,8 @@
+ codec: sgtl5000@a {
+ compatible = "fsl,sgtl5000";
+ reg = <0x0a>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_sgtl5000>;
+ clocks = <&clks IMX6QDL_CLK_CKO>;
+ VDDA-supply = <&reg_module_3v3_audio>;
+ VDDIO-supply = <&reg_module_3v3>;
+@@ -516,8 +518,6 @@
+ MX6QDL_PAD_DISP0_DAT21__AUD4_TXD 0x130b0
+ MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS 0x130b0
+ MX6QDL_PAD_DISP0_DAT23__AUD4_RXD 0x130b0
+- /* SGTL5000 sys_mclk */
+- MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0
+ >;
+ };
+
+@@ -810,6 +810,12 @@
+ >;
+ };
+
++ pinctrl_sgtl5000: sgtl5000grp {
++ fsl,pins = <
++ MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0
++ >;
++ };
++
+ pinctrl_spdif: spdifgrp {
+ fsl,pins = <
+ MX6QDL_PAD_GPIO_16__SPDIF_IN 0x1b0b0
+diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi
+index e21f6ac864e54..baa197c90060e 100644
+--- a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi
+@@ -96,7 +96,7 @@
+ pinctrl-0 = <&pinctrl_ecspi4>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q128a11", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
+index 563bf9d44fe0d..2ba577e602e7f 100644
+--- a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
+@@ -131,7 +131,7 @@
+ pinctrl-0 = <&pinctrl_ecspi4>;
+ status = "okay";
+
+- flash: m25p80@1 {
++ flash: flash@1 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q128a11", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/imx6qdl-colibri.dtsi
+index 4e2a309c93fa8..1e86b38147080 100644
+--- a/arch/arm/boot/dts/imx6qdl-colibri.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-colibri.dtsi
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0+ OR MIT
+ /*
+- * Copyright 2014-2020 Toradex
++ * Copyright 2014-2022 Toradex
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2011 Linaro Ltd.
+ */
+@@ -132,7 +132,7 @@
+ clock-frequency = <100000>;
+ pinctrl-names = "default", "gpio";
+ pinctrl-0 = <&pinctrl_i2c2>;
+- pinctrl-0 = <&pinctrl_i2c2_gpio>;
++ pinctrl-1 = <&pinctrl_i2c2_gpio>;
+ scl-gpios = <&gpio2 30 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&gpio3 16 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ status = "okay";
+@@ -488,7 +488,7 @@
+ >;
+ };
+
+- pinctrl_i2c2_gpio: i2c2grp {
++ pinctrl_i2c2_gpio: i2c2gpiogrp {
+ fsl,pins = <
+ MX6QDL_PAD_EIM_EB2__GPIO2_IO30 0x4001b8b1
+ MX6QDL_PAD_EIM_D16__GPIO3_IO16 0x4001b8b1
+diff --git a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi
+index 648f5fcb72e65..2c1d6f28e6950 100644
+--- a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi
+@@ -35,7 +35,7 @@
+ pinctrl-0 = <&pinctrl_ecspi3>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "sst,sst25vf040b", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi
+index 4bc4371e6bae5..4b81a975c979d 100644
+--- a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi
+@@ -632,7 +632,6 @@
+ &uart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart1>;
+- uart-has-rtscts;
+ rts-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+ };
+diff --git a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi
+index 68e5ab2e27e22..6bb4855d13ce5 100644
+--- a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi
+@@ -29,7 +29,7 @@
+
+ user-pb {
+ label = "user_pb";
+- gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>;
++ gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>;
+ linux,code = <BTN_0>;
+ };
+
+diff --git a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi
+index 8e23cec7149e5..696427b487f01 100644
+--- a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi
+@@ -26,7 +26,7 @@
+
+ user-pb {
+ label = "user_pb";
+- gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>;
++ gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>;
+ linux,code = <BTN_0>;
+ };
+
+diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
+index b167b33bd108d..683f6e58ab230 100644
+--- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
+@@ -51,16 +51,6 @@
+ vin-supply = <&reg_3p3v_s5>;
+ };
+
+- reg_3p3v_s0: regulator-3p3v-s0 {
+- compatible = "regulator-fixed";
+- regulator-name = "V_3V3_S0";
+- regulator-min-microvolt = <3300000>;
+- regulator-max-microvolt = <3300000>;
+- regulator-always-on;
+- regulator-boot-on;
+- vin-supply = <&reg_3p3v_s5>;
+- };
+-
+ reg_3p3v_s5: regulator-3p3v-s5 {
+ compatible = "regulator-fixed";
+ regulator-name = "V_3V3_S5";
+@@ -258,8 +248,8 @@
+ status = "okay";
+
+ /* default boot source: workaround #1 for errata ERR006282 */
+- smarc_flash: spi-flash@0 {
+- compatible = "winbond,w25q16dw", "jedec,spi-nor";
++ smarc_flash: flash@0 {
++ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <20000000>;
+ };
+@@ -273,6 +263,10 @@
+ phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
+ };
+
++&hdmi {
++ ddc-i2c-bus = <&i2c2>;
++};
++
+ &i2c_intern {
+ pmic@8 {
+ compatible = "fsl,pfuze100";
+@@ -397,7 +391,7 @@
+
+ /* HDMI_CTRL */
+ &i2c2 {
+- clock-frequency = <375000>;
++ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c2>;
+ };
+diff --git a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi
+index ac34709e97413..0ad4cb4f1e828 100644
+--- a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi
+@@ -179,7 +179,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "microchip,sst25vf016b";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
+index c96f4d7e1e0d8..beaa2dcd436ce 100644
+--- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
+@@ -321,7 +321,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "microchip,sst25vf016b";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi
+index 92d09a3ebe0ee..ee7e2371f94bd 100644
+--- a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi
+@@ -252,7 +252,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "microchip,sst25vf016b";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
+index 49da30d7510c4..904d5d051d63c 100644
+--- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
+@@ -237,7 +237,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "sst,sst25vf016b", "jedec,spi-nor";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6qdl-prti6q.dtsi b/arch/arm/boot/dts/imx6qdl-prti6q.dtsi
+index 19578f660b092..70dfa07a16981 100644
+--- a/arch/arm/boot/dts/imx6qdl-prti6q.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-prti6q.dtsi
+@@ -69,6 +69,7 @@
+ vbus-supply = <&reg_usb_h1_vbus>;
+ phy_type = "utmi";
+ dr_mode = "host";
++ disable-over-current;
+ status = "okay";
+ };
+
+@@ -78,10 +79,18 @@
+ pinctrl-0 = <&pinctrl_usbotg>;
+ phy_type = "utmi";
+ dr_mode = "host";
+- disable-over-current;
++ over-current-active-low;
+ status = "okay";
+ };
+
++&usbphynop1 {
++ status = "disabled";
++};
++
++&usbphynop2 {
++ status = "disabled";
++};
++
+ &usdhc1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usdhc1>;
+diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+index 5e58740d40c5b..1368a47620372 100644
+--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+@@ -272,7 +272,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1 &pinctrl_ecspi1_cs>;
+ status = "disabled"; /* pin conflict with WEIM NOR */
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "st,m25p32", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
+index eb9a0b104f1c3..901b9a761b66e 100644
+--- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
+@@ -313,7 +313,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "sst,sst25vf016b", "jedec,spi-nor";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+index 0c0105468a2fe..37482a9023fce 100644
+--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+@@ -197,7 +197,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "st,m25p32", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi
+index fded07f370b39..d6ba4b2a60f6f 100644
+--- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi
+@@ -226,7 +226,7 @@
+ reg = <0x28>;
+ #gpio-cells = <2>;
+ gpio-controller;
+- ngpio = <32>;
++ ngpios = <62>;
+ };
+
+ sgtl5000: codec@a {
+diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+index d07d8f83456d2..ccfa8e320be62 100644
+--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+@@ -5,6 +5,8 @@
+ * Author: Fabio Estevam <fabio.estevam@freescale.com>
+ */
+
++#include <dt-bindings/gpio/gpio.h>
++
+ / {
+ aliases {
+ backlight = &backlight;
+@@ -226,6 +228,7 @@
+ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059
+ MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059
+ MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059
++ MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0
+ >;
+ };
+
+@@ -304,7 +307,7 @@
+ &usdhc3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usdhc3>;
+- non-removable;
++ cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
+ status = "okay";
+ };
+
+diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
+index b62a0dbb033ff..ec6fba5ee8fde 100644
+--- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi
+@@ -309,6 +309,7 @@
+
+ ethphy: ethernet-phy@1 {
+ reg = <1>;
++ qca,clk-out-frequency = <125000000>;
+ };
+ };
+ };
+diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
+index 89c342f3a7c2f..8b6327e64819c 100644
+--- a/arch/arm/boot/dts/imx6qdl.dtsi
++++ b/arch/arm/boot/dts/imx6qdl.dtsi
+@@ -150,7 +150,7 @@
+ interrupt-parent = <&gpc>;
+ ranges;
+
+- dma_apbh: dma-apbh@110000 {
++ dma_apbh: dma-controller@110000 {
+ compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x00110000 0x2000>;
+ interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
+@@ -763,7 +763,7 @@
+ regulator-name = "vddpu";
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <1450000>;
+- regulator-enable-ramp-delay = <150>;
++ regulator-enable-ramp-delay = <380>;
+ anatop-reg-offset = <0x140>;
+ anatop-vol-bit-shift = <9>;
+ anatop-vol-bit-width = <5>;
+@@ -930,7 +930,7 @@
+ interrupts = <0 125 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+- sdma: sdma@20ec000 {
++ sdma: dma-controller@20ec000 {
+ compatible = "fsl,imx6q-sdma", "fsl,imx35-sdma";
+ reg = <0x020ec000 0x4000>;
+ interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>;
+diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi
+index b310f13a53f22..4d23c92aa8a6b 100644
+--- a/arch/arm/boot/dts/imx6qp.dtsi
++++ b/arch/arm/boot/dts/imx6qp.dtsi
+@@ -9,12 +9,18 @@
+ ocram2: sram@940000 {
+ compatible = "mmio-sram";
+ reg = <0x00940000 0x20000>;
++ ranges = <0 0x00940000 0x20000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ clocks = <&clks IMX6QDL_CLK_OCRAM>;
+ };
+
+ ocram3: sram@960000 {
+ compatible = "mmio-sram";
+ reg = <0x00960000 0x20000>;
++ ranges = <0 0x00960000 0x20000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ clocks = <&clks IMX6QDL_CLK_OCRAM>;
+ };
+
+diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts
+index 25f6f2fb1555e..f16c830f1e918 100644
+--- a/arch/arm/boot/dts/imx6sl-evk.dts
++++ b/arch/arm/boot/dts/imx6sl-evk.dts
+@@ -137,7 +137,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "st,m25p32", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts
+index a17b8bbbdb956..f2231cb1e32df 100644
+--- a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts
++++ b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts
+@@ -597,6 +597,7 @@
+
+ &usbotg1 {
+ pinctrl-names = "default";
++ pinctrl-0 = <&pinctrl_usbotg1>;
+ disable-over-current;
+ srp-disable;
+ hnp-disable;
+diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi
+index 997b96c1c47b9..0e0139246ad21 100644
+--- a/arch/arm/boot/dts/imx6sl.dtsi
++++ b/arch/arm/boot/dts/imx6sl.dtsi
+@@ -117,6 +117,9 @@
+ ocram: sram@900000 {
+ compatible = "mmio-sram";
+ reg = <0x00900000 0x20000>;
++ ranges = <0 0x00900000 0x20000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ clocks = <&clks IMX6SL_CLK_OCRAM>;
+ };
+
+@@ -749,7 +752,7 @@
+ interrupts = <0 6 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+- sdma: sdma@20ec000 {
++ sdma: dma-controller@20ec000 {
+ compatible = "fsl,imx6sl-sdma", "fsl,imx6q-sdma";
+ reg = <0x020ec000 0x4000>;
+ interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>;
+diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi
+index 04f8d637a5019..3659fd5ecfa62 100644
+--- a/arch/arm/boot/dts/imx6sll.dtsi
++++ b/arch/arm/boot/dts/imx6sll.dtsi
+@@ -51,20 +51,18 @@
+ device_type = "cpu";
+ reg = <0>;
+ next-level-cache = <&L2>;
+- operating-points = <
++ operating-points =
+ /* kHz uV */
+- 996000 1275000
+- 792000 1175000
+- 396000 1075000
+- 198000 975000
+- >;
+- fsl,soc-operating-points = <
++ <996000 1275000>,
++ <792000 1175000>,
++ <396000 1075000>,
++ <198000 975000>;
++ fsl,soc-operating-points =
+ /* ARM kHz SOC-PU uV */
+- 996000 1175000
+- 792000 1175000
+- 396000 1175000
+- 198000 1175000
+- >;
++ <996000 1175000>,
++ <792000 1175000>,
++ <396000 1175000>,
++ <198000 1175000>;
+ clock-latency = <61036>; /* two CLK32 periods */
+ #cooling-cells = <2>;
+ clocks = <&clks IMX6SLL_CLK_ARM>,
+@@ -117,6 +115,9 @@
+ ocram: sram@900000 {
+ compatible = "mmio-sram";
+ reg = <0x00900000 0x20000>;
++ ranges = <0 0x00900000 0x20000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ };
+
+ intc: interrupt-controller@a01000 {
+@@ -551,7 +552,7 @@
+ reg = <0x020ca000 0x1000>;
+ interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6SLL_CLK_USBPHY2>;
+- phy-reg_3p0-supply = <&reg_3p0>;
++ phy-3p0-supply = <&reg_3p0>;
+ fsl,anatop = <&anatop>;
+ };
+
+diff --git a/arch/arm/boot/dts/imx6sx-nitrogen6sx.dts b/arch/arm/boot/dts/imx6sx-nitrogen6sx.dts
+index 66af78e83b701..a2c79bcf9a11c 100644
+--- a/arch/arm/boot/dts/imx6sx-nitrogen6sx.dts
++++ b/arch/arm/boot/dts/imx6sx-nitrogen6sx.dts
+@@ -107,7 +107,7 @@
+ pinctrl-0 = <&pinctrl_ecspi1>;
+ status = "okay";
+
+- flash: m25p80@0 {
++ flash: flash@0 {
+ compatible = "microchip,sst25vf016b";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6sx-sdb-reva.dts b/arch/arm/boot/dts/imx6sx-sdb-reva.dts
+index dce5dcf96c255..7dda42553f4bc 100644
+--- a/arch/arm/boot/dts/imx6sx-sdb-reva.dts
++++ b/arch/arm/boot/dts/imx6sx-sdb-reva.dts
+@@ -123,7 +123,7 @@
+ pinctrl-0 = <&pinctrl_qspi2>;
+ status = "okay";
+
+- flash0: s25fl128s@0 {
++ flash0: flash@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -133,7 +133,7 @@
+ spi-tx-bus-width = <4>;
+ };
+
+- flash1: s25fl128s@2 {
++ flash1: flash@2 {
+ reg = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts
+index 99f4cf777a384..969cfe920d252 100644
+--- a/arch/arm/boot/dts/imx6sx-sdb.dts
++++ b/arch/arm/boot/dts/imx6sx-sdb.dts
+@@ -108,7 +108,7 @@
+ pinctrl-0 = <&pinctrl_qspi2>;
+ status = "okay";
+
+- flash0: n25q256a@0 {
++ flash0: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q256a", "jedec,spi-nor";
+@@ -118,7 +118,7 @@
+ reg = <0>;
+ };
+
+- flash1: n25q256a@2 {
++ flash1: flash@2 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q256a", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
+index 8516730778df8..7a3d85e7a5fa7 100644
+--- a/arch/arm/boot/dts/imx6sx.dtsi
++++ b/arch/arm/boot/dts/imx6sx.dtsi
+@@ -164,12 +164,18 @@
+ ocram_s: sram@8f8000 {
+ compatible = "mmio-sram";
+ reg = <0x008f8000 0x4000>;
++ ranges = <0 0x008f8000 0x4000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ clocks = <&clks IMX6SX_CLK_OCRAM_S>;
+ };
+
+ ocram: sram@900000 {
+ compatible = "mmio-sram";
+ reg = <0x00900000 0x20000>;
++ ranges = <0 0x00900000 0x20000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ clocks = <&clks IMX6SX_CLK_OCRAM>;
+ };
+
+@@ -203,7 +209,7 @@
+ power-domains = <&pd_pu>;
+ };
+
+- dma_apbh: dma-apbh@1804000 {
++ dma_apbh: dma-controller@1804000 {
+ compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x01804000 0x2000>;
+ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
+@@ -842,7 +848,7 @@
+ reg = <0x020e4000 0x4000>;
+ };
+
+- sdma: sdma@20ec000 {
++ sdma: dma-controller@20ec000 {
+ compatible = "fsl,imx6sx-sdma", "fsl,imx6q-sdma";
+ reg = <0x020ec000 0x4000>;
+ interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+@@ -975,6 +981,8 @@
+ <&clks IMX6SX_CLK_USDHC1>;
+ clock-names = "ipg", "ahb", "per";
+ bus-width = <4>;
++ fsl,tuning-start-tap = <20>;
++ fsl,tuning-step= <2>;
+ status = "disabled";
+ };
+
+@@ -987,6 +995,8 @@
+ <&clks IMX6SX_CLK_USDHC2>;
+ clock-names = "ipg", "ahb", "per";
+ bus-width = <4>;
++ fsl,tuning-start-tap = <20>;
++ fsl,tuning-step= <2>;
+ status = "disabled";
+ };
+
+@@ -999,6 +1009,8 @@
+ <&clks IMX6SX_CLK_USDHC3>;
+ clock-names = "ipg", "ahb", "per";
+ bus-width = <4>;
++ fsl,tuning-start-tap = <20>;
++ fsl,tuning-step= <2>;
+ status = "disabled";
+ };
+
+diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
+index a3fde3316c736..1a18c41ce385a 100644
+--- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
++++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
+@@ -286,7 +286,7 @@
+ pinctrl-0 = <&pinctrl_qspi>;
+ status = "okay";
+
+- flash0: n25q256a@0 {
++ flash0: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q256a", "jedec,spi-nor";
+diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi
+index 47d3ce5d255fa..acd936540d898 100644
+--- a/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi
++++ b/arch/arm/boot/dts/imx6ul-kontron-n6310-som.dtsi
+@@ -19,7 +19,7 @@
+ };
+
+ &qspi {
+- spi-flash@0 {
++ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "spi-nand";
+diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6311-som.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6311-som.dtsi
+index a095a7654ac65..29ed38dce5802 100644
+--- a/arch/arm/boot/dts/imx6ul-kontron-n6311-som.dtsi
++++ b/arch/arm/boot/dts/imx6ul-kontron-n6311-som.dtsi
+@@ -18,7 +18,7 @@
+ };
+
+ &qspi {
+- spi-flash@0 {
++ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "spi-nand";
+diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi
+index 2a449a3c1ae27..09a83dbdf6510 100644
+--- a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi
++++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi
+@@ -19,7 +19,7 @@
+ pinctrl-0 = <&pinctrl_ecspi2>;
+ status = "okay";
+
+- spi-flash@0 {
++ flash@0 {
+ compatible = "mxicy,mx25v8035f", "jedec,spi-nor";
+ spi-max-frequency = <50000000>;
+ reg = <0>;
+diff --git a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts
+index 162dc259edc8c..5a74c7f68eb62 100644
+--- a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts
++++ b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts
+@@ -32,7 +32,7 @@
+ };
+
+ &i2c2 {
+- clock_frequency = <100000>;
++ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c2>;
+ status = "okay";
+diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
+index afeec01f65228..ad92409349fa2 100644
+--- a/arch/arm/boot/dts/imx6ul.dtsi
++++ b/arch/arm/boot/dts/imx6ul.dtsi
+@@ -64,20 +64,18 @@
+ clock-frequency = <696000000>;
+ clock-latency = <61036>; /* two CLK32 periods */
+ #cooling-cells = <2>;
+- operating-points = <
++ operating-points =
+ /* kHz uV */
+- 696000 1275000
+- 528000 1175000
+- 396000 1025000
+- 198000 950000
+- >;
+- fsl,soc-operating-points = <
++ <696000 1275000>,
++ <528000 1175000>,
++ <396000 1025000>,
++ <198000 950000>;
++ fsl,soc-operating-points =
+ /* KHz uV */
+- 696000 1275000
+- 528000 1175000
+- 396000 1175000
+- 198000 1175000
+- >;
++ <696000 1275000>,
++ <528000 1175000>,
++ <396000 1175000>,
++ <198000 1175000>;
+ clocks = <&clks IMX6UL_CLK_ARM>,
+ <&clks IMX6UL_CLK_PLL2_BUS>,
+ <&clks IMX6UL_CLK_PLL2_PFD2>,
+@@ -149,6 +147,9 @@
+ ocram: sram@900000 {
+ compatible = "mmio-sram";
+ reg = <0x00900000 0x20000>;
++ ranges = <0 0x00900000 0x20000>;
++ #address-cells = <1>;
++ #size-cells = <1>;
+ };
+
+ intc: interrupt-controller@a01000 {
+@@ -163,7 +164,7 @@
+ <0x00a06000 0x2000>;
+ };
+
+- dma_apbh: dma-apbh@1804000 {
++ dma_apbh: dma-controller@1804000 {
+ compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x01804000 0x2000>;
+ interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
+@@ -543,7 +544,7 @@
+ };
+
+ kpp: keypad@20b8000 {
+- compatible = "fsl,imx6ul-kpp", "fsl,imx6q-kpp", "fsl,imx21-kpp";
++ compatible = "fsl,imx6ul-kpp", "fsl,imx21-kpp";
+ reg = <0x020b8000 0x4000>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6UL_CLK_KPP>;
+@@ -743,7 +744,7 @@
+ status = "disabled";
+ };
+
+- sdma: sdma@20ec000 {
++ sdma: dma-controller@20ec000 {
+ compatible = "fsl,imx6ul-sdma", "fsl,imx6q-sdma",
+ "fsl,imx35-sdma";
+ reg = <0x020ec000 0x4000>;
+@@ -998,7 +999,7 @@
+ };
+
+ csi: csi@21c4000 {
+- compatible = "fsl,imx6ul-csi", "fsl,imx7-csi";
++ compatible = "fsl,imx6ul-csi";
+ reg = <0x021c4000 0x4000>;
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6UL_CLK_CSI>;
+@@ -1007,7 +1008,7 @@
+ };
+
+ lcdif: lcdif@21c8000 {
+- compatible = "fsl,imx6ul-lcdif", "fsl,imx28-lcdif";
++ compatible = "fsl,imx6ul-lcdif", "fsl,imx6sx-lcdif";
+ reg = <0x021c8000 0x4000>;
+ interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6UL_CLK_LCDIF_PIX>,
+@@ -1028,7 +1029,7 @@
+ qspi: spi@21e0000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+- compatible = "fsl,imx6ul-qspi", "fsl,imx6sx-qspi";
++ compatible = "fsl,imx6ul-qspi";
+ reg = <0x021e0000 0x4000>, <0x60000000 0x10000000>;
+ reg-names = "QuadSPI", "QuadSPI-memory";
+ interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>;
+diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi
+index 0cdbf7b6e7285..b6fc879e9dbe6 100644
+--- a/arch/arm/boot/dts/imx6ull-colibri.dtsi
++++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi
+@@ -37,7 +37,7 @@
+
+ reg_sd1_vmmc: regulator-sd1-vmmc {
+ compatible = "regulator-gpio";
+- gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>;
++ gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_snvs_reg_sd>;
+ regulator-always-on;
+diff --git a/arch/arm/boot/dts/imx6ull-kontron-n6411-som.dtsi b/arch/arm/boot/dts/imx6ull-kontron-n6411-som.dtsi
+index b7e984284e1ad..d000606c07049 100644
+--- a/arch/arm/boot/dts/imx6ull-kontron-n6411-som.dtsi
++++ b/arch/arm/boot/dts/imx6ull-kontron-n6411-som.dtsi
+@@ -18,7 +18,7 @@
+ };
+
+ &qspi {
+- spi-flash@0 {
++ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "spi-nand";
+diff --git a/arch/arm/boot/dts/imx6ull-pinfunc.h b/arch/arm/boot/dts/imx6ull-pinfunc.h
+index eb025a9d47592..7328d4ef8559f 100644
+--- a/arch/arm/boot/dts/imx6ull-pinfunc.h
++++ b/arch/arm/boot/dts/imx6ull-pinfunc.h
+@@ -82,6 +82,6 @@
+ #define MX6ULL_PAD_CSI_DATA04__ESAI_TX_FS 0x01F4 0x0480 0x0000 0x9 0x0
+ #define MX6ULL_PAD_CSI_DATA05__ESAI_TX_CLK 0x01F8 0x0484 0x0000 0x9 0x0
+ #define MX6ULL_PAD_CSI_DATA06__ESAI_TX5_RX0 0x01FC 0x0488 0x0000 0x9 0x0
+-#define MX6ULL_PAD_CSI_DATA07__ESAI_T0 0x0200 0x048C 0x0000 0x9 0x0
++#define MX6ULL_PAD_CSI_DATA07__ESAI_TX0 0x0200 0x048C 0x0000 0x9 0x0
+
+ #endif /* __DTS_IMX6ULL_PINFUNC_H */
+diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi
+index 62b771c1d5a9a..f1c60b0cb143e 100644
+--- a/arch/arm/boot/dts/imx7-colibri.dtsi
++++ b/arch/arm/boot/dts/imx7-colibri.dtsi
+@@ -40,7 +40,7 @@
+
+ dailink_master: simple-audio-card,codec {
+ sound-dai = <&codec>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ };
+ };
+ };
+@@ -293,7 +293,7 @@
+ compatible = "fsl,sgtl5000";
+ #sound-dai-cells = <0>;
+ reg = <0x0a>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sai1_mclk>;
+ VDDA-supply = <&reg_module_3v3_avdd>;
+diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi
+index 5e6bef230dc75..b55a7792a8391 100644
+--- a/arch/arm/boot/dts/imx7-mba7.dtsi
++++ b/arch/arm/boot/dts/imx7-mba7.dtsi
+@@ -264,7 +264,7 @@
+ tlv320aic32x4: audio-codec@18 {
+ compatible = "ti,tlv320aic32x4";
+ reg = <0x18>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ clock-names = "mclk";
+ ldoin-supply = <&reg_audio_3v3>;
+ iov-supply = <&reg_audio_3v3>;
+diff --git a/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi b/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi
+index af39e5370fa12..045e4413d3390 100644
+--- a/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi
++++ b/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi
+@@ -13,6 +13,10 @@
+ };
+ };
+
++&cpu1 {
++ cpu-supply = <&reg_DCDC2>;
++};
++
+ &gpio6 {
+ gpio-line-names = "",
+ "",
+diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts
+index e0751e6ba3c0f..a31de900139d6 100644
+--- a/arch/arm/boot/dts/imx7d-nitrogen7.dts
++++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts
+@@ -288,7 +288,7 @@
+ codec: wm8960@1a {
+ compatible = "wlf,wm8960";
+ reg = <0x1a>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ clock-names = "mclk";
+ wlf,shared-lrclk;
+ };
+diff --git a/arch/arm/boot/dts/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/imx7d-pico-dwarf.dts
+index 5162fe227d1ea..fdc10563f1473 100644
+--- a/arch/arm/boot/dts/imx7d-pico-dwarf.dts
++++ b/arch/arm/boot/dts/imx7d-pico-dwarf.dts
+@@ -32,7 +32,7 @@
+ };
+
+ &i2c1 {
+- clock_frequency = <100000>;
++ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c1>;
+ status = "okay";
+@@ -52,7 +52,7 @@
+ };
+
+ &i2c4 {
+- clock_frequency = <100000>;
++ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c1>;
+ status = "okay";
+diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts
+index 7b2198a9372c6..6ad39dca70096 100644
+--- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts
++++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts
+@@ -31,7 +31,7 @@
+
+ dailink_master: simple-audio-card,codec {
+ sound-dai = <&sgtl5000>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ };
+ };
+ };
+@@ -41,7 +41,7 @@
+ #sound-dai-cells = <0>;
+ reg = <0x0a>;
+ compatible = "fsl,sgtl5000";
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ VDDA-supply = <&reg_2p5v>;
+ VDDIO-supply = <&reg_vref_1v8>;
+ };
+@@ -64,7 +64,7 @@
+ interrupt-parent = <&gpio2>;
+ interrupts = <7 0>;
+ spi-max-frequency = <1000000>;
+- pendown-gpio = <&gpio2 7 0>;
++ pendown-gpio = <&gpio2 7 GPIO_ACTIVE_LOW>;
+ vcc-supply = <&reg_3p3v>;
+ ti,x-min = /bits/ 16 <0>;
+ ti,x-max = /bits/ 16 <4095>;
+diff --git a/arch/arm/boot/dts/imx7d-pico-nymph.dts b/arch/arm/boot/dts/imx7d-pico-nymph.dts
+index 104a85254adbb..5afb1674e0125 100644
+--- a/arch/arm/boot/dts/imx7d-pico-nymph.dts
++++ b/arch/arm/boot/dts/imx7d-pico-nymph.dts
+@@ -43,7 +43,7 @@
+ };
+
+ &i2c1 {
+- clock_frequency = <100000>;
++ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c1>;
+ status = "okay";
+@@ -64,7 +64,7 @@
+ };
+
+ &i2c2 {
+- clock_frequency = <100000>;
++ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c2>;
+ status = "okay";
+diff --git a/arch/arm/boot/dts/imx7d-pico-pi.dts b/arch/arm/boot/dts/imx7d-pico-pi.dts
+index 70bea95c06d83..f263e391e24cb 100644
+--- a/arch/arm/boot/dts/imx7d-pico-pi.dts
++++ b/arch/arm/boot/dts/imx7d-pico-pi.dts
+@@ -31,7 +31,7 @@
+
+ dailink_master: simple-audio-card,codec {
+ sound-dai = <&sgtl5000>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ };
+ };
+ };
+@@ -41,7 +41,7 @@
+ #sound-dai-cells = <0>;
+ reg = <0x0a>;
+ compatible = "fsl,sgtl5000";
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ VDDA-supply = <&reg_2p5v>;
+ VDDIO-supply = <&reg_vref_1v8>;
+ };
+diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts
+index 4a0d83784d7d1..4e62ed2df11dd 100644
+--- a/arch/arm/boot/dts/imx7d-sdb.dts
++++ b/arch/arm/boot/dts/imx7d-sdb.dts
+@@ -205,13 +205,8 @@
+ pinctrl-0 = <&pinctrl_tsc2046_pendown>;
+ interrupt-parent = <&gpio2>;
+ interrupts = <29 0>;
+- pendown-gpio = <&gpio2 29 GPIO_ACTIVE_HIGH>;
+- ti,x-min = /bits/ 16 <0>;
+- ti,x-max = /bits/ 16 <0>;
+- ti,y-min = /bits/ 16 <0>;
+- ti,y-max = /bits/ 16 <0>;
+- ti,pressure-max = /bits/ 16 <0>;
+- ti,x-plate-ohms = /bits/ 16 <400>;
++ pendown-gpio = <&gpio2 29 GPIO_ACTIVE_LOW>;
++ touchscreen-max-pressure = <255>;
+ wakeup-source;
+ };
+ };
+@@ -385,14 +380,14 @@
+ codec: wm8960@1a {
+ compatible = "wlf,wm8960";
+ reg = <0x1a>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ clock-names = "mclk";
+ wlf,shared-lrclk;
+ wlf,hp-cfg = <2 2 3>;
+ wlf,gpio-cfg = <1 3>;
+ assigned-clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_SRC>,
+ <&clks IMX7D_PLL_AUDIO_POST_DIV>,
+- <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ assigned-clock-parents = <&clks IMX7D_PLL_AUDIO_POST_DIV>;
+ assigned-clock-rates = <0>, <884736000>, <12288000>;
+ };
+diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts
+index 569bbd84e371a..558b064da743c 100644
+--- a/arch/arm/boot/dts/imx7s-warp.dts
++++ b/arch/arm/boot/dts/imx7s-warp.dts
+@@ -75,7 +75,7 @@
+
+ dailink_master: simple-audio-card,codec {
+ sound-dai = <&codec>;
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ };
+ };
+ };
+@@ -232,7 +232,7 @@
+ #sound-dai-cells = <0>;
+ reg = <0x0a>;
+ compatible = "fsl,sgtl5000";
+- clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>;
++ clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sai1_mclk>;
+ VDDA-supply = <&vgen4_reg>;
+diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
+index 1843fc0538709..c978aab1d0e3d 100644
+--- a/arch/arm/boot/dts/imx7s.dtsi
++++ b/arch/arm/boot/dts/imx7s.dtsi
+@@ -104,6 +104,7 @@
+ compatible = "usb-nop-xceiv";
+ clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>;
+ clock-names = "main_clk";
++ power-domains = <&pgc_hsic_phy>;
+ #phy-cells = <0>;
+ };
+
+@@ -496,7 +497,7 @@
+
+ mux: mux-controller {
+ compatible = "mmio-mux";
+- #mux-control-cells = <0>;
++ #mux-control-cells = <1>;
+ mux-reg-masks = <0x14 0x00000010>;
+ };
+
+@@ -1135,7 +1136,6 @@
+ compatible = "fsl,imx7d-usb", "fsl,imx27-usb";
+ reg = <0x30b30000 0x200>;
+ interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
+- power-domains = <&pgc_hsic_phy>;
+ clocks = <&clks IMX7D_USB_CTRL_CLK>;
+ fsl,usbphy = <&usbphynop3>;
+ fsl,usbmisc = <&usbmisc3 0>;
+@@ -1166,6 +1166,8 @@
+ <&clks IMX7D_USDHC1_ROOT_CLK>;
+ clock-names = "ipg", "ahb", "per";
+ bus-width = <4>;
++ fsl,tuning-step = <2>;
++ fsl,tuning-start-tap = <20>;
+ status = "disabled";
+ };
+
+@@ -1178,6 +1180,8 @@
+ <&clks IMX7D_USDHC2_ROOT_CLK>;
+ clock-names = "ipg", "ahb", "per";
+ bus-width = <4>;
++ fsl,tuning-step = <2>;
++ fsl,tuning-start-tap = <20>;
+ status = "disabled";
+ };
+
+@@ -1190,6 +1194,8 @@
+ <&clks IMX7D_USDHC3_ROOT_CLK>;
+ clock-names = "ipg", "ahb", "per";
+ bus-width = <4>;
++ fsl,tuning-step = <2>;
++ fsl,tuning-start-tap = <20>;
+ status = "disabled";
+ };
+
+@@ -1206,7 +1212,7 @@
+ status = "disabled";
+ };
+
+- sdma: sdma@30bd0000 {
++ sdma: dma-controller@30bd0000 {
+ compatible = "fsl,imx7d-sdma", "fsl,imx35-sdma";
+ reg = <0x30bd0000 0x10000>;
+ interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+@@ -1239,14 +1245,13 @@
+ };
+ };
+
+- dma_apbh: dma-apbh@33000000 {
++ dma_apbh: dma-controller@33000000 {
+ compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh";
+ reg = <0x33000000 0x2000>;
+ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3";
+ #dma-cells = <1>;
+ dma-channels = <4>;
+ clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>;
+diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi
+index b7ea37ad4e55c..bcec98b964114 100644
+--- a/arch/arm/boot/dts/imx7ulp.dtsi
++++ b/arch/arm/boot/dts/imx7ulp.dtsi
+@@ -259,7 +259,7 @@
+ interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+ assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
+- assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
++ assigned-clock-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
+ timeout-sec = <40>;
+ };
+
+diff --git a/arch/arm/boot/dts/integratorap.dts b/arch/arm/boot/dts/integratorap.dts
+index 67d1f9b24a52f..8600c0548525e 100644
+--- a/arch/arm/boot/dts/integratorap.dts
++++ b/arch/arm/boot/dts/integratorap.dts
+@@ -153,6 +153,7 @@
+
+ pci: pciv3@62000000 {
+ compatible = "arm,integrator-ap-pci", "v3,v360epc-pci";
++ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+diff --git a/arch/arm/boot/dts/iwg20d-q7-common.dtsi b/arch/arm/boot/dts/iwg20d-q7-common.dtsi
+index bc857676d1910..c13d2f6e1a38f 100644
+--- a/arch/arm/boot/dts/iwg20d-q7-common.dtsi
++++ b/arch/arm/boot/dts/iwg20d-q7-common.dtsi
+@@ -49,7 +49,7 @@
+ lcd_backlight: backlight {
+ compatible = "pwm-backlight";
+
+- pwms = <&pwm3 0 5000000 0>;
++ pwms = <&pwm3 0 5000000>;
+ brightness-levels = <0 4 8 16 32 64 128 255>;
+ default-brightness-level = <7>;
+ enable-gpios = <&gpio5 14 GPIO_ACTIVE_HIGH>;
+diff --git a/arch/arm/boot/dts/kirkwood-lsxl.dtsi b/arch/arm/boot/dts/kirkwood-lsxl.dtsi
+index 7b151acb99846..88b70ba1c8fee 100644
+--- a/arch/arm/boot/dts/kirkwood-lsxl.dtsi
++++ b/arch/arm/boot/dts/kirkwood-lsxl.dtsi
+@@ -10,6 +10,11 @@
+
+ ocp@f1000000 {
+ pinctrl: pin-controller@10000 {
++ /* Non-default UART pins */
++ pmx_uart0: pmx-uart0 {
++ marvell,pins = "mpp4", "mpp5";
++ };
++
+ pmx_power_hdd: pmx-power-hdd {
+ marvell,pins = "mpp10";
+ marvell,function = "gpo";
+@@ -213,22 +218,11 @@
+ &mdio {
+ status = "okay";
+
+- ethphy0: ethernet-phy@0 {
+- reg = <0>;
+- };
+-
+ ethphy1: ethernet-phy@8 {
+ reg = <8>;
+ };
+ };
+
+-&eth0 {
+- status = "okay";
+- ethernet0-port@0 {
+- phy-handle = <&ethphy0>;
+- };
+-};
+-
+ &eth1 {
+ status = "okay";
+ ethernet1-port@0 {
+diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
+index 2a0a98fe67f06..3240c67e0c392 100644
+--- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
++++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
+@@ -11,3 +11,18 @@
+ model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit";
+ compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3";
+ };
++
++&omap3_pmx_core2 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&hsusb2_2_pins>;
++ hsusb2_2_pins: pinmux_hsusb2_2_pins {
++ pinctrl-single,pins = <
++ OMAP3430_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */
++ OMAP3430_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */
++ OMAP3430_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */
++ OMAP3430_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */
++ OMAP3430_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */
++ OMAP3430_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */
++ >;
++ };
++};
+diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+index a604d92221a4f..c757f0d7781c1 100644
+--- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
++++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+@@ -11,3 +11,18 @@
+ model = "LogicPD Zoom DM3730 SOM-LV Development Kit";
+ compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3630", "ti,omap3";
+ };
++
++&omap3_pmx_core2 {
++ pinctrl-names = "default";
++ pinctrl-0 = <&hsusb2_2_pins>;
++ hsusb2_2_pins: pinmux_hsusb2_2_pins {
++ pinctrl-single,pins = <
++ OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */
++ OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */
++ OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */
++ OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */
++ OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */
++ OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */
++ >;
++ };
++};
+diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
+index b56524cc7fe27..55b619c99e24d 100644
+--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
++++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
+@@ -265,21 +265,6 @@
+ };
+ };
+
+-&omap3_pmx_core2 {
+- pinctrl-names = "default";
+- pinctrl-0 = <&hsusb2_2_pins>;
+- hsusb2_2_pins: pinmux_hsusb2_2_pins {
+- pinctrl-single,pins = <
+- OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */
+- OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */
+- OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */
+- OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */
+- OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */
+- OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */
+- >;
+- };
+-};
+-
+ &uart2 {
+ interrupts-extended = <&intc 73 &omap3_pmx_core OMAP3_UART2_RX>;
+ pinctrl-names = "default";
+diff --git a/arch/arm/boot/dts/ls1021a-tsn.dts b/arch/arm/boot/dts/ls1021a-tsn.dts
+index 9d8f0c2a8aba3..aca78b5eddf20 100644
+--- a/arch/arm/boot/dts/ls1021a-tsn.dts
++++ b/arch/arm/boot/dts/ls1021a-tsn.dts
+@@ -251,7 +251,7 @@
+
+ flash@0 {
+ /* Rev. A uses 64MB flash, Rev. B & C use 32MB flash */
+- compatible = "jedec,spi-nor", "s25fl256s1", "s25fl512s";
++ compatible = "jedec,spi-nor";
+ spi-max-frequency = <20000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
+index 4fce81422943b..f3b8540750b61 100644
+--- a/arch/arm/boot/dts/ls1021a.dtsi
++++ b/arch/arm/boot/dts/ls1021a.dtsi
+@@ -329,39 +329,6 @@
+ #thermal-sensor-cells = <1>;
+ };
+
+- thermal-zones {
+- cpu_thermal: cpu-thermal {
+- polling-delay-passive = <1000>;
+- polling-delay = <5000>;
+-
+- thermal-sensors = <&tmu 0>;
+-
+- trips {
+- cpu_alert: cpu-alert {
+- temperature = <85000>;
+- hysteresis = <2000>;
+- type = "passive";
+- };
+- cpu_crit: cpu-crit {
+- temperature = <95000>;
+- hysteresis = <2000>;
+- type = "critical";
+- };
+- };
+-
+- cooling-maps {
+- map0 {
+- trip = <&cpu_alert>;
+- cooling-device =
+- <&cpu0 THERMAL_NO_LIMIT
+- THERMAL_NO_LIMIT>,
+- <&cpu1 THERMAL_NO_LIMIT
+- THERMAL_NO_LIMIT>;
+- };
+- };
+- };
+- };
+-
+ dspi0: spi@2100000 {
+ compatible = "fsl,ls1021a-v1.0-dspi";
+ #address-cells = <1>;
+@@ -1016,4 +983,37 @@
+ big-endian;
+ };
+ };
++
++ thermal-zones {
++ cpu_thermal: cpu-thermal {
++ polling-delay-passive = <1000>;
++ polling-delay = <5000>;
++
++ thermal-sensors = <&tmu 0>;
++
++ trips {
++ cpu_alert: cpu-alert {
++ temperature = <85000>;
++ hysteresis = <2000>;
++ type = "passive";
++ };
++ cpu_crit: cpu-crit {
++ temperature = <95000>;
++ hysteresis = <2000>;
++ type = "critical";
++ };
++ };
++
++ cooling-maps {
++ map0 {
++ trip = <&cpu_alert>;
++ cooling-device =
++ <&cpu0 THERMAL_NO_LIMIT
++ THERMAL_NO_LIMIT>,
++ <&cpu1 THERMAL_NO_LIMIT
++ THERMAL_NO_LIMIT>;
++ };
++ };
++ };
++ };
+ };
+diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
+index 3be7cba603d5a..26eaba3fa96f3 100644
+--- a/arch/arm/boot/dts/meson.dtsi
++++ b/arch/arm/boot/dts/meson.dtsi
+@@ -59,7 +59,7 @@
+ };
+
+ uart_A: serial@84c0 {
+- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
++ compatible = "amlogic,meson6-uart";
+ reg = <0x84c0 0x18>;
+ interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
+ fifo-size = <128>;
+@@ -67,7 +67,7 @@
+ };
+
+ uart_B: serial@84dc {
+- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
++ compatible = "amlogic,meson6-uart";
+ reg = <0x84dc 0x18>;
+ interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
+ status = "disabled";
+@@ -105,7 +105,7 @@
+ };
+
+ uart_C: serial@8700 {
+- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
++ compatible = "amlogic,meson6-uart";
+ reg = <0x8700 0x18>;
+ interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>;
+ status = "disabled";
+@@ -228,7 +228,7 @@
+ };
+
+ uart_AO: serial@4c0 {
+- compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart";
++ compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart";
+ reg = <0x4c0 0x18>;
+ interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>;
+ status = "disabled";
+diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi
+index f80ddc98d3a2b..72828b9d4281d 100644
+--- a/arch/arm/boot/dts/meson8.dtsi
++++ b/arch/arm/boot/dts/meson8.dtsi
+@@ -736,27 +736,27 @@
+ };
+
+ &uart_AO {
+- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8-uart", "amlogic,meson-ao-uart";
++ clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &uart_A {
+- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8-uart";
++ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &uart_B {
+- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8-uart";
++ clocks = <&xtal>, <&clkc CLKID_UART1>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &uart_C {
+- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8-uart";
++ clocks = <&xtal>, <&clkc CLKID_UART2>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &usb0 {
+diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi
+index b49b7cbaed4ee..cfd4a909a7a70 100644
+--- a/arch/arm/boot/dts/meson8b.dtsi
++++ b/arch/arm/boot/dts/meson8b.dtsi
+@@ -724,27 +724,27 @@
+ };
+
+ &uart_AO {
+- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8b-uart", "amlogic,meson-ao-uart";
++ clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &uart_A {
+- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8b-uart";
++ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &uart_B {
+- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8b-uart";
++ clocks = <&xtal>, <&clkc CLKID_UART1>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &uart_C {
+- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
+- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>;
+- clock-names = "baud", "xtal", "pclk";
++ compatible = "amlogic,meson8b-uart";
++ clocks = <&xtal>, <&clkc CLKID_UART2>, <&clkc CLKID_CLK81>;
++ clock-names = "xtal", "pclk", "baud";
+ };
+
+ &usb0 {
+diff --git a/arch/arm/boot/dts/moxart-uc7112lx.dts b/arch/arm/boot/dts/moxart-uc7112lx.dts
+index eb5291b0ee3aa..e07b807b4cec5 100644
+--- a/arch/arm/boot/dts/moxart-uc7112lx.dts
++++ b/arch/arm/boot/dts/moxart-uc7112lx.dts
+@@ -79,7 +79,7 @@
+ clocks = <&ref12>;
+ };
+
+-&sdhci {
++&mmc {
+ status = "okay";
+ };
+
+diff --git a/arch/arm/boot/dts/moxart.dtsi b/arch/arm/boot/dts/moxart.dtsi
+index f5f070a874823..764832ddfa78a 100644
+--- a/arch/arm/boot/dts/moxart.dtsi
++++ b/arch/arm/boot/dts/moxart.dtsi
+@@ -93,8 +93,8 @@
+ clock-names = "PCLK";
+ };
+
+- sdhci: sdhci@98e00000 {
+- compatible = "moxa,moxart-sdhci";
++ mmc: mmc@98e00000 {
++ compatible = "moxa,moxart-mmc";
+ reg = <0x98e00000 0x5C>;
+ interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_apb>;
+diff --git a/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts b/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts
+index eb6eb21cb2a44..33c8d5b3d679a 100644
+--- a/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts
++++ b/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts
+@@ -366,7 +366,7 @@
+ spi-max-frequency = <20000000>;
+ spi-rx-bus-width = <2>;
+ label = "bmc";
+- partitions@80000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts b/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts
+index d4ff49939a3d9..bbe18618f5c56 100644
+--- a/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts
++++ b/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts
+@@ -142,7 +142,7 @@
+ reg = <0>;
+ spi-rx-bus-width = <2>;
+
+- partitions@80000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts b/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts
+index 82a104b2a65f1..8e3425cb8e8b9 100644
+--- a/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts
++++ b/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts
+@@ -388,7 +388,7 @@
+ spi-max-frequency = <5000000>;
+ spi-rx-bus-width = <2>;
+ label = "bmc";
+- partitions@80000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -422,7 +422,7 @@
+ reg = <1>;
+ spi-max-frequency = <5000000>;
+ spi-rx-bus-width = <2>;
+- partitions@88000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -447,7 +447,7 @@
+ reg = <0>;
+ spi-max-frequency = <5000000>;
+ spi-rx-bus-width = <2>;
+- partitions@A0000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/nuvoton-npcm750-evb.dts b/arch/arm/boot/dts/nuvoton-npcm750-evb.dts
+index 0334641f88292..cf274c926711a 100644
+--- a/arch/arm/boot/dts/nuvoton-npcm750-evb.dts
++++ b/arch/arm/boot/dts/nuvoton-npcm750-evb.dts
+@@ -74,7 +74,7 @@
+ spi-rx-bus-width = <2>;
+ reg = <0>;
+ spi-max-frequency = <5000000>;
+- partitions@80000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -135,7 +135,7 @@
+ spi-rx-bus-width = <2>;
+ reg = <0>;
+ spi-max-frequency = <5000000>;
+- partitions@A0000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts b/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts
+index 767e0ac0df7c5..7fe7efee28acb 100644
+--- a/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts
++++ b/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts
+@@ -107,7 +107,7 @@
+ reg = <0>;
+ spi-rx-bus-width = <2>;
+
+- partitions@80000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -146,7 +146,7 @@
+ reg = <1>;
+ npcm,fiu-rx-bus-width = <2>;
+
+- partitions@88000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -173,7 +173,7 @@
+ reg = <0>;
+ spi-rx-bus-width = <2>;
+
+- partitions@A0000000 {
++ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+diff --git a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi
+index 7f6aefd134514..e7534fe9c53cf 100644
+--- a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi
++++ b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi
+@@ -29,7 +29,7 @@
+ compatible = "smsc,lan9221","smsc,lan9115";
+ bank-width = <2>;
+
+- gpmc,mux-add-data;
++ gpmc,mux-add-data = <0>;
+ gpmc,cs-on-ns = <0>;
+ gpmc,cs-rd-off-ns = <42>;
+ gpmc,cs-wr-off-ns = <36>;
+diff --git a/arch/arm/boot/dts/omap3-beagle-ab4.dts b/arch/arm/boot/dts/omap3-beagle-ab4.dts
+new file mode 100644
+index 0000000000000..990ff2d846868
+--- /dev/null
++++ b/arch/arm/boot/dts/omap3-beagle-ab4.dts
+@@ -0,0 +1,47 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/dts-v1/;
++
++#include "omap3-beagle.dts"
++
++/ {
++ model = "TI OMAP3 BeagleBoard A to B4";
++ compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3";
++};
++
++/*
++ * Workaround for capacitor C70 issue, see "Boards revision A and < B5"
++ * section at https://elinux.org/BeagleBoard_Community
++ */
++
++/* Unusable as clocksource because of unreliable oscillator */
++&counter32k {
++ status = "disabled";
++};
++
++/* Unusable as clockevent because of unreliable oscillator, allow to idle */
++&timer1_target {
++ /delete-property/ti,no-reset-on-init;
++ /delete-property/ti,no-idle;
++ timer@0 {
++ /delete-property/ti,timer-alwon;
++ };
++};
++
++/* Preferred always-on timer for clocksource */
++&timer12_target {
++ ti,no-reset-on-init;
++ ti,no-idle;
++ timer@0 {
++ /* Always clocked by secure_32k_fck */
++ };
++};
++
++/* Preferred timer for clockevent */
++&timer2_target {
++ ti,no-reset-on-init;
++ ti,no-idle;
++ timer@0 {
++ assigned-clocks = <&gpt2_fck>;
++ assigned-clock-parents = <&sys_ck>;
++ };
++};
+diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
+index f9f34b8458e91..0548b391334fd 100644
+--- a/arch/arm/boot/dts/omap3-beagle.dts
++++ b/arch/arm/boot/dts/omap3-beagle.dts
+@@ -304,39 +304,6 @@
+ phys = <0 &hsusb2_phy>;
+ };
+
+-/* Unusable as clocksource because of unreliable oscillator */
+-&counter32k {
+- status = "disabled";
+-};
+-
+-/* Unusable as clockevent because if unreliable oscillator, allow to idle */
+-&timer1_target {
+- /delete-property/ti,no-reset-on-init;
+- /delete-property/ti,no-idle;
+- timer@0 {
+- /delete-property/ti,timer-alwon;
+- };
+-};
+-
+-/* Preferred always-on timer for clocksource */
+-&timer12_target {
+- ti,no-reset-on-init;
+- ti,no-idle;
+- timer@0 {
+- /* Always clocked by secure_32k_fck */
+- };
+-};
+-
+-/* Preferred timer for clockevent */
+-&timer2_target {
+- ti,no-reset-on-init;
+- ti,no-idle;
+- timer@0 {
+- assigned-clocks = <&gpt2_fck>;
+- assigned-clock-parents = <&sys_ck>;
+- };
+-};
+-
+ &twl_gpio {
+ ti,use-leds;
+ /* pullups: BIT(1) */
+diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
+index e61b8a2bfb7de..51baedf1603bd 100644
+--- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi
++++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
+@@ -227,7 +227,7 @@
+
+ interrupt-parent = <&gpio2>;
+ interrupts = <25 0>; /* gpio_57 */
+- pendown-gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&gpio2 25 GPIO_ACTIVE_LOW>;
+
+ ti,x-min = /bits/ 16 <0x0>;
+ ti,x-max = /bits/ 16 <0x0fff>;
+diff --git a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi
+index 2c19d6e255bdc..6883ccb45600b 100644
+--- a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi
++++ b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi
+@@ -158,6 +158,24 @@
+ status = "disabled";
+ };
+
++/* Unusable as clockevent because if unreliable oscillator, allow to idle */
++&timer1_target {
++ /delete-property/ti,no-reset-on-init;
++ /delete-property/ti,no-idle;
++ timer@0 {
++ /delete-property/ti,timer-alwon;
++ };
++};
++
++/* Preferred timer for clockevent */
++&timer12_target {
++ ti,no-reset-on-init;
++ ti,no-idle;
++ timer@0 {
++ /* Always clocked by secure_32k_fck */
++ };
++};
++
+ &twl_gpio {
+ ti,use-leds;
+ /*
+diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi
+index 3decc2d78a6ca..a7f99ae0c1fe9 100644
+--- a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi
++++ b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi
+@@ -54,7 +54,7 @@
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <27 0>; /* gpio_27 */
+- pendown-gpio = <&gpio1 27 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&gpio1 27 GPIO_ACTIVE_LOW>;
+
+ ti,x-min = /bits/ 16 <0x0>;
+ ti,x-max = /bits/ 16 <0x0fff>;
+diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
+index c2995a280729d..162d0726b0080 100644
+--- a/arch/arm/boot/dts/omap3-devkit8000.dts
++++ b/arch/arm/boot/dts/omap3-devkit8000.dts
+@@ -14,36 +14,3 @@
+ display2 = &tv0;
+ };
+ };
+-
+-/* Unusable as clocksource because of unreliable oscillator */
+-&counter32k {
+- status = "disabled";
+-};
+-
+-/* Unusable as clockevent because if unreliable oscillator, allow to idle */
+-&timer1_target {
+- /delete-property/ti,no-reset-on-init;
+- /delete-property/ti,no-idle;
+- timer@0 {
+- /delete-property/ti,timer-alwon;
+- };
+-};
+-
+-/* Preferred always-on timer for clocksource */
+-&timer12_target {
+- ti,no-reset-on-init;
+- ti,no-idle;
+- timer@0 {
+- /* Always clocked by secure_32k_fck */
+- };
+-};
+-
+-/* Preferred timer for clockevent */
+-&timer2_target {
+- ti,no-reset-on-init;
+- ti,no-idle;
+- timer@0 {
+- assigned-clocks = <&gpt2_fck>;
+- assigned-clock-parents = <&sys_ck>;
+- };
+-};
+diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
+index 938cc691bb2fe..bb5e00b36d8dc 100644
+--- a/arch/arm/boot/dts/omap3-gta04.dtsi
++++ b/arch/arm/boot/dts/omap3-gta04.dtsi
+@@ -31,6 +31,8 @@
+ aliases {
+ display0 = &lcd;
+ display1 = &tv0;
++ /delete-property/ mmc2;
++ /delete-property/ mmc3;
+ };
+
+ ldo_3v3: fixedregulator {
+@@ -515,7 +517,7 @@
+ compatible = "bosch,bma180";
+ reg = <0x41>;
+ pinctrl-names = "default";
+- pintcrl-0 = <&bma180_pins>;
++ pinctrl-0 = <&bma180_pins>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_115 */
+ };
+@@ -607,6 +609,22 @@
+ clock-frequency = <100000>;
+ };
+
++&mcspi1 {
++ status = "disabled";
++};
++
++&mcspi2 {
++ status = "disabled";
++};
++
++&mcspi3 {
++ status = "disabled";
++};
++
++&mcspi4 {
++ status = "disabled";
++};
++
+ &usb_otg_hs {
+ interface-type = <0>;
+ usb-phy = <&usb2_phy>;
+diff --git a/arch/arm/boot/dts/omap3-gta04a5one.dts b/arch/arm/boot/dts/omap3-gta04a5one.dts
+index 9db9fe67cd63b..95df45cc70c09 100644
+--- a/arch/arm/boot/dts/omap3-gta04a5one.dts
++++ b/arch/arm/boot/dts/omap3-gta04a5one.dts
+@@ -5,9 +5,11 @@
+
+ #include "omap3-gta04a5.dts"
+
+-&omap3_pmx_core {
++/ {
+ model = "Goldelico GTA04A5/Letux 2804 with OneNAND";
++};
+
++&omap3_pmx_core {
+ gpmc_pins: pinmux_gpmc_pins {
+ pinctrl-single,pins = <
+
+diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+index 73d477898ec2a..06e7cf96c6639 100644
+--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
++++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+@@ -311,7 +311,7 @@
+ interrupt-parent = <&gpio1>;
+ interrupts = <8 0>; /* boot6 / gpio_8 */
+ spi-max-frequency = <1000000>;
+- pendown-gpio = <&gpio1 8 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&gpio1 8 GPIO_ACTIVE_LOW>;
+ vcc-supply = <&reg_vcc3>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&tsc2048_pins>;
+diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
+index 32335d4ce478b..d40c3d2c4914e 100644
+--- a/arch/arm/boot/dts/omap3-n900.dts
++++ b/arch/arm/boot/dts/omap3-n900.dts
+@@ -8,6 +8,7 @@
+
+ #include "omap34xx.dtsi"
+ #include <dt-bindings/input/input.h>
++#include <dt-bindings/leds/common.h>
+
+ /*
+ * Default secure signed bootloader (Nokia X-Loader) does not enable L3 firewall
+@@ -630,63 +631,92 @@
+ };
+
+ lp5523: lp5523@32 {
++ #address-cells = <1>;
++ #size-cells = <0>;
+ compatible = "national,lp5523";
+ reg = <0x32>;
+ clock-mode = /bits/ 8 <0>; /* LP55XX_CLOCK_AUTO */
+- enable-gpio = <&gpio2 9 GPIO_ACTIVE_HIGH>; /* 41 */
++ enable-gpios = <&gpio2 9 GPIO_ACTIVE_HIGH>; /* 41 */
+
+- chan0 {
++ led@0 {
++ reg = <0>;
+ chan-name = "lp5523:kb1";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_WHITE>;
++ function = LED_FUNCTION_KBD_BACKLIGHT;
+ };
+
+- chan1 {
++ led@1 {
++ reg = <1>;
+ chan-name = "lp5523:kb2";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_WHITE>;
++ function = LED_FUNCTION_KBD_BACKLIGHT;
+ };
+
+- chan2 {
++ led@2 {
++ reg = <2>;
+ chan-name = "lp5523:kb3";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_WHITE>;
++ function = LED_FUNCTION_KBD_BACKLIGHT;
+ };
+
+- chan3 {
++ led@3 {
++ reg = <3>;
+ chan-name = "lp5523:kb4";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_WHITE>;
++ function = LED_FUNCTION_KBD_BACKLIGHT;
+ };
+
+- chan4 {
++ led@4 {
++ reg = <4>;
+ chan-name = "lp5523:b";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_BLUE>;
++ function = LED_FUNCTION_STATUS;
+ };
+
+- chan5 {
++ led@5 {
++ reg = <5>;
+ chan-name = "lp5523:g";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_GREEN>;
++ function = LED_FUNCTION_STATUS;
+ };
+
+- chan6 {
++ led@6 {
++ reg = <6>;
+ chan-name = "lp5523:r";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_RED>;
++ function = LED_FUNCTION_STATUS;
+ };
+
+- chan7 {
++ led@7 {
++ reg = <7>;
+ chan-name = "lp5523:kb5";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_WHITE>;
++ function = LED_FUNCTION_KBD_BACKLIGHT;
+ };
+
+- chan8 {
++ led@8 {
++ reg = <8>;
+ chan-name = "lp5523:kb6";
+ led-cur = /bits/ 8 <50>;
+ max-cur = /bits/ 8 <100>;
++ color = <LED_COLOR_ID_WHITE>;
++ function = LED_FUNCTION_KBD_BACKLIGHT;
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
+index 1d6e88f99eb31..c3570acc35fad 100644
+--- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
++++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
+@@ -149,7 +149,7 @@
+
+ interrupt-parent = <&gpio4>;
+ interrupts = <18 0>; /* gpio_114 */
+- pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>;
+
+ ti,x-min = /bits/ 16 <0x0>;
+ ti,x-max = /bits/ 16 <0x0fff>;
+diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
+index 7e30f9d45790e..d95a0e130058c 100644
+--- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
++++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
+@@ -160,7 +160,7 @@
+
+ interrupt-parent = <&gpio4>;
+ interrupts = <18 0>; /* gpio_114 */
+- pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>;
+
+ ti,x-min = /bits/ 16 <0x0>;
+ ti,x-max = /bits/ 16 <0x0fff>;
+diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
+index e5da3bc6f1050..218a10c0d8159 100644
+--- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
++++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
+@@ -22,7 +22,7 @@
+ compatible = "smsc,lan9221","smsc,lan9115";
+ bank-width = <2>;
+
+- gpmc,mux-add-data;
++ gpmc,mux-add-data = <0>;
+ gpmc,cs-on-ns = <0>;
+ gpmc,cs-rd-off-ns = <42>;
+ gpmc,cs-wr-off-ns = <36>;
+diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
+index 37608af6c07f5..ca6d777ebf843 100644
+--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
++++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
+@@ -651,7 +651,7 @@
+ pinctrl-0 = <&penirq_pins>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <30 IRQ_TYPE_NONE>; /* GPIO_94 */
+- pendown-gpio = <&gpio3 30 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&gpio3 30 GPIO_ACTIVE_LOW>;
+ vcc-supply = <&vaux4>;
+
+ ti,x-min = /bits/ 16 <0>;
+diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
+index ca759b7b8a580..e62ea8b6d53fd 100644
+--- a/arch/arm/boot/dts/omap5-cm-t54.dts
++++ b/arch/arm/boot/dts/omap5-cm-t54.dts
+@@ -354,7 +354,7 @@
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <15 0>; /* gpio1_wk15 */
+- pendown-gpio = <&gpio1 15 GPIO_ACTIVE_HIGH>;
++ pendown-gpio = <&gpio1 15 GPIO_ACTIVE_LOW>;
+
+
+ ti,x-min = /bits/ 16 <0x0>;
+diff --git a/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi b/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi
+index 31f59de5190b8..7af41361c4800 100644
+--- a/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi
++++ b/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi
+@@ -28,7 +28,7 @@ partitions {
+ label = "rofs";
+ };
+
+- rwfs@6000000 {
++ rwfs@2a00000 {
+ reg = <0x2a00000 0x1600000>; // 22MB
+ label = "rwfs";
+ };
+diff --git a/arch/arm/boot/dts/openbmc-flash-layout.dtsi b/arch/arm/boot/dts/openbmc-flash-layout.dtsi
+index 6c26524e93e11..b47e14063c380 100644
+--- a/arch/arm/boot/dts/openbmc-flash-layout.dtsi
++++ b/arch/arm/boot/dts/openbmc-flash-layout.dtsi
+@@ -20,7 +20,7 @@ partitions {
+ label = "kernel";
+ };
+
+- rofs@c0000 {
++ rofs@4c0000 {
+ reg = <0x4c0000 0x1740000>;
+ label = "rofs";
+ };
+diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi
+index 90846a7655b49..dde4364892bf0 100644
+--- a/arch/arm/boot/dts/ox820.dtsi
++++ b/arch/arm/boot/dts/ox820.dtsi
+@@ -287,7 +287,7 @@
+ clocks = <&armclk>;
+ };
+
+- gic: gic@1000 {
++ gic: interrupt-controller@1000 {
+ compatible = "arm,arm11mp-gic";
+ interrupt-controller;
+ #interrupt-cells = <3>;
+diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi
+index d1c1c6aab2b87..0e830476fefd2 100644
+--- a/arch/arm/boot/dts/qcom-apq8064.dtsi
++++ b/arch/arm/boot/dts/qcom-apq8064.dtsi
+@@ -1571,7 +1571,7 @@
+ };
+
+ etb@1a01000 {
+- compatible = "coresight-etb10", "arm,primecell";
++ compatible = "arm,coresight-etb10", "arm,primecell";
+ reg = <0x1a01000 0x1000>;
+
+ clocks = <&rpmcc RPM_QDSS_CLK>;
+diff --git a/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1-c1.dts b/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1-c1.dts
+index b0f476ff017f9..aadca9bf416cb 100644
+--- a/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1-c1.dts
++++ b/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1-c1.dts
+@@ -11,9 +11,9 @@
+ dma@7984000 {
+ status = "okay";
+ };
+-
+- qpic-nand@79b0000 {
+- status = "okay";
+- };
+ };
+ };
++
++&nand {
++ status = "okay";
++};
+diff --git a/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1.dtsi b/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1.dtsi
+index 7a337dc087417..726aa30eeef54 100644
+--- a/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1.dtsi
++++ b/arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1.dtsi
+@@ -102,10 +102,10 @@
+ status = "okay";
+ perst-gpio = <&tlmm 38 0x1>;
+ };
+-
+- qpic-nand@79b0000 {
+- pinctrl-0 = <&nand_pins>;
+- pinctrl-names = "default";
+- };
+ };
+ };
++
++&nand {
++ pinctrl-0 = <&nand_pins>;
++ pinctrl-names = "default";
++};
+diff --git a/arch/arm/boot/dts/qcom-ipq4019-ap.dk07.1.dtsi b/arch/arm/boot/dts/qcom-ipq4019-ap.dk07.1.dtsi
+index 94872518b5a23..9988b9eab8035 100644
+--- a/arch/arm/boot/dts/qcom-ipq4019-ap.dk07.1.dtsi
++++ b/arch/arm/boot/dts/qcom-ipq4019-ap.dk07.1.dtsi
+@@ -65,11 +65,11 @@
+ dma@7984000 {
+ status = "okay";
+ };
+-
+- qpic-nand@79b0000 {
+- pinctrl-0 = <&nand_pins>;
+- pinctrl-names = "default";
+- status = "okay";
+- };
+ };
+ };
++
++&nand {
++ pinctrl-0 = <&nand_pins>;
++ pinctrl-names = "default";
++ status = "okay";
++};
+diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi
+index ff1bdb10ad198..9dcf308b3ad49 100644
+--- a/arch/arm/boot/dts/qcom-ipq4019.dtsi
++++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi
+@@ -142,7 +142,8 @@
+ clocks {
+ sleep_clk: sleep_clk {
+ compatible = "fixed-clock";
+- clock-frequency = <32768>;
++ clock-frequency = <32000>;
++ clock-output-names = "gcc_sleep_clk_src";
+ #clock-cells = <0>;
+ };
+
+@@ -423,8 +424,8 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x81000000 0 0x40200000 0x40200000 0 0x00100000>,
+- <0x82000000 0 0x40300000 0x40300000 0 0x00d00000>;
++ ranges = <0x81000000 0x0 0x00000000 0x40200000 0x0 0x00100000>,
++ <0x82000000 0x0 0x40300000 0x40300000 0x0 0x00d00000>;
+
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+diff --git a/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts b/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts
+index f7ea2e5dd1914..971d2e2292600 100644
+--- a/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts
++++ b/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts
+@@ -19,12 +19,12 @@
+ stdout-path = "serial0:115200n8";
+ };
+
+- memory@0 {
++ memory@42000000 {
+ reg = <0x42000000 0x3e000000>;
+ device_type = "memory";
+ };
+
+- mdio0: mdio@0 {
++ mdio0: mdio-0 {
+ status = "okay";
+ compatible = "virtual,mdio-gpio";
+ gpios = <&qcom_pinmux 1 GPIO_ACTIVE_HIGH>,
+@@ -91,7 +91,7 @@
+ };
+ };
+
+- mdio1: mdio@1 {
++ mdio1: mdio-1 {
+ status = "okay";
+ compatible = "virtual,mdio-gpio";
+ gpios = <&qcom_pinmux 11 GPIO_ACTIVE_HIGH>,
+diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi
+index 4139d3817bd6f..f4139411c41ed 100644
+--- a/arch/arm/boot/dts/qcom-ipq8064.dtsi
++++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi
+@@ -808,8 +808,8 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x81000000 0 0x0fe00000 0x0fe00000 0 0x00100000 /* downstream I/O */
+- 0x82000000 0 0x08000000 0x08000000 0 0x07e00000>; /* non-prefetchable memory */
++ ranges = <0x81000000 0x0 0x00000000 0x0fe00000 0x0 0x00010000 /* I/O */
++ 0x82000000 0x0 0x08000000 0x08000000 0x0 0x07e00000>; /* MEM */
+
+ interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+@@ -859,8 +859,8 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x81000000 0 0x31e00000 0x31e00000 0 0x00100000 /* downstream I/O */
+- 0x82000000 0 0x2e000000 0x2e000000 0 0x03e00000>; /* non-prefetchable memory */
++ ranges = <0x81000000 0x0 0x00000000 0x31e00000 0x0 0x00010000 /* I/O */
++ 0x82000000 0x0 0x2e000000 0x2e000000 0x0 0x03e00000>; /* MEM */
+
+ interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+@@ -910,8 +910,8 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x81000000 0 0x35e00000 0x35e00000 0 0x00100000 /* downstream I/O */
+- 0x82000000 0 0x32000000 0x32000000 0 0x03e00000>; /* non-prefetchable memory */
++ ranges = <0x81000000 0x0 0x00000000 0x35e00000 0x0 0x00010000 /* I/O */
++ 0x82000000 0x0 0x32000000 0x32000000 0x0 0x03e00000>; /* MEM */
+
+ interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+diff --git a/arch/arm/boot/dts/qcom-mdm9615.dtsi b/arch/arm/boot/dts/qcom-mdm9615.dtsi
+index dda2ceec6591a..ad9b52d53ef9b 100644
+--- a/arch/arm/boot/dts/qcom-mdm9615.dtsi
++++ b/arch/arm/boot/dts/qcom-mdm9615.dtsi
+@@ -324,6 +324,7 @@
+
+ pmicgpio: gpio@150 {
+ compatible = "qcom,pm8018-gpio", "qcom,ssbi-gpio";
++ reg = <0x150>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ gpio-controller;
+diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi
+index 172ea3c70eac2..c197927e7435f 100644
+--- a/arch/arm/boot/dts/qcom-msm8960.dtsi
++++ b/arch/arm/boot/dts/qcom-msm8960.dtsi
+@@ -146,7 +146,9 @@
+ reg = <0x108000 0x1000>;
+ qcom,ipc = <&l2cc 0x8 2>;
+
+- interrupts = <0 19 0>, <0 21 0>, <0 22 0>;
++ interrupts = <GIC_SPI 19 IRQ_TYPE_EDGE_RISING>,
++ <GIC_SPI 21 IRQ_TYPE_EDGE_RISING>,
++ <GIC_SPI 22 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "ack", "err", "wakeup";
+
+ regulators {
+@@ -192,7 +194,7 @@
+ compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
+ reg = <0x16440000 0x1000>,
+ <0x16400000 0x1000>;
+- interrupts = <0 154 0x0>;
++ interrupts = <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc GSBI5_UART_CLK>, <&gcc GSBI5_H_CLK>;
+ clock-names = "core", "iface";
+ status = "disabled";
+@@ -318,7 +320,7 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x16080000 0x1000>;
+- interrupts = <0 147 0>;
++ interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ spi-max-frequency = <24000000>;
+ cs-gpios = <&msmgpio 8 0>;
+
+diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi
+index 78ec496d5bc30..05d51839d40a1 100644
+--- a/arch/arm/boot/dts/qcom-msm8974.dtsi
++++ b/arch/arm/boot/dts/qcom-msm8974.dtsi
+@@ -718,7 +718,7 @@
+ blsp2_uart7: serial@f995d000 {
+ compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+ reg = <0xf995d000 0x1000>;
+- interrupts = <GIC_SPI 113 IRQ_TYPE_NONE>;
++ interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc GCC_BLSP2_UART1_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
+ clock-names = "core", "iface";
+ status = "disabled";
+@@ -1589,8 +1589,8 @@
+ #phy-cells = <0>;
+ qcom,dsi-phy-index = <0>;
+
+- clocks = <&mmcc MDSS_AHB_CLK>;
+- clock-names = "iface";
++ clocks = <&mmcc MDSS_AHB_CLK>, <&xo_board>;
++ clock-names = "iface", "ref";
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/qcom-pm8841.dtsi b/arch/arm/boot/dts/qcom-pm8841.dtsi
+index 2fd59c440903d..c73e5b149ac5e 100644
+--- a/arch/arm/boot/dts/qcom-pm8841.dtsi
++++ b/arch/arm/boot/dts/qcom-pm8841.dtsi
+@@ -25,6 +25,7 @@
+ compatible = "qcom,spmi-temp-alarm";
+ reg = <0x2400>;
+ interrupts = <4 0x24 0 IRQ_TYPE_EDGE_RISING>;
++ #thermal-sensor-cells = <0>;
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi
+index 1e6ce035f76a9..9d62487f6c8ff 100644
+--- a/arch/arm/boot/dts/qcom-sdx55.dtsi
++++ b/arch/arm/boot/dts/qcom-sdx55.dtsi
+@@ -205,7 +205,7 @@
+ blsp1_uart3: serial@831000 {
+ compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+ reg = <0x00831000 0x200>;
+- interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_LOW>;
++ interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc 30>,
+ <&gcc 9>;
+ clock-names = "core", "iface";
+@@ -334,12 +334,10 @@
+ clocks = <&rpmhcc RPMH_IPA_CLK>;
+ clock-names = "core";
+
+- interconnects = <&system_noc MASTER_IPA &system_noc SLAVE_SNOC_MEM_NOC_GC>,
+- <&mem_noc MASTER_SNOC_GC_MEM_NOC &mc_virt SLAVE_EBI_CH0>,
++ interconnects = <&system_noc MASTER_IPA &mc_virt SLAVE_EBI_CH0>,
+ <&system_noc MASTER_IPA &system_noc SLAVE_OCIMEM>,
+ <&mem_noc MASTER_AMPSS_M0 &system_noc SLAVE_IPA_CFG>;
+- interconnect-names = "memory-a",
+- "memory-b",
++ interconnect-names = "memory",
+ "imem",
+ "config";
+
+@@ -504,7 +502,7 @@
+ };
+
+ apps_smmu: iommu@15000000 {
+- compatible = "qcom,sdx55-smmu-500", "arm,mmu-500";
++ compatible = "qcom,sdx55-smmu-500", "qcom,smmu-500", "arm,mmu-500";
+ reg = <0x15000000 0x20000>;
+ #iommu-cells = <2>;
+ #global-interrupts = <1>;
+diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts
+index 2a7e6624efb93..94216f870b57c 100644
+--- a/arch/arm/boot/dts/rk3036-evb.dts
++++ b/arch/arm/boot/dts/rk3036-evb.dts
+@@ -31,11 +31,10 @@
+ &i2c1 {
+ status = "okay";
+
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+- clock-frequency = <32768>;
+ clock-output-names = "xin32k";
+ };
+ };
+diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts
+index 36c0945f43b22..3718fac62841c 100644
+--- a/arch/arm/boot/dts/rk3188-radxarock.dts
++++ b/arch/arm/boot/dts/rk3188-radxarock.dts
+@@ -71,7 +71,7 @@
+ #sound-dai-cells = <0>;
+ };
+
+- ir_recv: gpio-ir-receiver {
++ ir_recv: ir-receiver {
+ compatible = "gpio-ir-receiver";
+ gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi
+index 2c606494b78c4..e07b1d79c470a 100644
+--- a/arch/arm/boot/dts/rk3188.dtsi
++++ b/arch/arm/boot/dts/rk3188.dtsi
+@@ -378,7 +378,7 @@
+ rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>;
+ };
+
+- lcdc1_rgb24: ldcd1-rgb24 {
++ lcdc1_rgb24: lcdc1-rgb24 {
+ rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>,
+ <2 RK_PA1 1 &pcfg_pull_none>,
+ <2 RK_PA2 1 &pcfg_pull_none>,
+@@ -606,7 +606,6 @@
+
+ &global_timer {
+ interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
+- status = "disabled";
+ };
+
+ &local_timer {
+diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi
+index 75af99c76d7ea..f31cf1df892b2 100644
+--- a/arch/arm/boot/dts/rk322x.dtsi
++++ b/arch/arm/boot/dts/rk322x.dtsi
+@@ -718,8 +718,8 @@
+ interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+ assigned-clocks = <&cru SCLK_HDMI_PHY>;
+ assigned-clock-parents = <&hdmi_phy>;
+- clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>;
+- clock-names = "isfr", "iahb", "cec";
++ clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>;
++ clock-names = "iahb", "isfr", "cec";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>;
+ resets = <&cru SRST_HDMI_P>;
+diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts
+index be695b8c1f672..8a635c2431274 100644
+--- a/arch/arm/boot/dts/rk3288-evb-act8846.dts
++++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts
+@@ -54,7 +54,7 @@
+ vin-supply = <&vcc_sys>;
+ };
+
+- hym8563@51 {
++ rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+
+diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi
+index 7fb582302b326..74ba7e21850a5 100644
+--- a/arch/arm/boot/dts/rk3288-firefly.dtsi
++++ b/arch/arm/boot/dts/rk3288-firefly.dtsi
+@@ -233,11 +233,10 @@
+ vin-supply = <&vcc_sys>;
+ };
+
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+- clock-frequency = <32768>;
+ clock-output-names = "xin32k";
+ interrupt-parent = <&gpio7>;
+ interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>;
+diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts
+index 713f55e143c69..db1eb648e0e1a 100644
+--- a/arch/arm/boot/dts/rk3288-miqi.dts
++++ b/arch/arm/boot/dts/rk3288-miqi.dts
+@@ -162,11 +162,10 @@
+ vin-supply = <&vcc_sys>;
+ };
+
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+- clock-frequency = <32768>;
+ clock-output-names = "xin32k";
+ };
+
+diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts
+index c4d1d142d8c68..bc44606ca05d8 100644
+--- a/arch/arm/boot/dts/rk3288-rock2-square.dts
++++ b/arch/arm/boot/dts/rk3288-rock2-square.dts
+@@ -165,11 +165,10 @@
+ };
+
+ &i2c0 {
+- hym8563: hym8563@51 {
++ hym8563: rtc@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+- clock-frequency = <32768>;
+ clock-output-names = "xin32k";
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>;
+diff --git a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi
+index 0ae2bd150e372..793951655b73b 100644
+--- a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi
++++ b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi
+@@ -241,7 +241,6 @@
+ interrupt-parent = <&gpio5>;
+ interrupts = <RK_PC3 IRQ_TYPE_LEVEL_LOW>;
+ #clock-cells = <0>;
+- clock-frequency = <32768>;
+ clock-output-names = "hym8563";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hym8563_int>;
+diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
+index 4dcdcf17c9776..2e6138eeacd15 100644
+--- a/arch/arm/boot/dts/rk3288.dtsi
++++ b/arch/arm/boot/dts/rk3288.dtsi
+@@ -940,7 +940,7 @@
+ status = "disabled";
+ };
+
+- spdif: sound@ff88b0000 {
++ spdif: sound@ff8b0000 {
+ compatible = "rockchip,rk3288-spdif", "rockchip,rk3066-spdif";
+ reg = <0x0 0xff8b0000 0x0 0x10000>;
+ #sound-dai-cells = <0>;
+@@ -971,7 +971,7 @@
+ status = "disabled";
+ };
+
+- crypto: cypto-controller@ff8a0000 {
++ crypto: crypto@ff8a0000 {
+ compatible = "rockchip,rk3288-crypto";
+ reg = <0x0 0xff8a0000 0x0 0x4000>;
+ interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+@@ -1180,6 +1180,7 @@
+ clock-names = "dp", "pclk";
+ phys = <&edp_phy>;
+ phy-names = "dp";
++ power-domains = <&power RK3288_PD_VIO>;
+ resets = <&cru SRST_EDP>;
+ reset-names = "dp";
+ rockchip,grf = <&grf>;
+diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi
+index 616a828e0c6e4..17e89d30de781 100644
+--- a/arch/arm/boot/dts/rk3xxx.dtsi
++++ b/arch/arm/boot/dts/rk3xxx.dtsi
+@@ -76,6 +76,13 @@
+ reg = <0x1013c200 0x20>;
+ interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>;
+ clocks = <&cru CORE_PERI>;
++ status = "disabled";
++ /* The clock source and the sched_clock provided by the arm_global_timer
++ * on Rockchip rk3066a/rk3188 are quite unstable because their rates
++ * depend on the CPU frequency.
++ * Keep the arm_global_timer disabled in order to have the
++ * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default.
++ */
+ };
+
+ local_timer: local-timer@1013c600 {
+diff --git a/arch/arm/boot/dts/s3c6410-mini6410.dts b/arch/arm/boot/dts/s3c6410-mini6410.dts
+index 285555b9ed943..0b07b3c319604 100644
+--- a/arch/arm/boot/dts/s3c6410-mini6410.dts
++++ b/arch/arm/boot/dts/s3c6410-mini6410.dts
+@@ -51,7 +51,7 @@
+
+ ethernet@18000000 {
+ compatible = "davicom,dm9000";
+- reg = <0x18000000 0x2 0x18000004 0x2>;
++ reg = <0x18000000 0x2>, <0x18000004 0x2>;
+ interrupt-parent = <&gpn>;
+ interrupts = <7 IRQ_TYPE_LEVEL_HIGH>;
+ davicom,no-eeprom;
+@@ -193,12 +193,12 @@
+ };
+
+ &pinctrl0 {
+- gpio_leds: gpio-leds {
++ gpio_leds: gpio-leds-pins {
+ samsung,pins = "gpk-4", "gpk-5", "gpk-6", "gpk-7";
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- gpio_keys: gpio-keys {
++ gpio_keys: gpio-keys-pins {
+ samsung,pins = "gpn-0", "gpn-1", "gpn-2", "gpn-3",
+ "gpn-4", "gpn-5", "gpl-11", "gpl-12";
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+diff --git a/arch/arm/boot/dts/s3c64xx-pinctrl.dtsi b/arch/arm/boot/dts/s3c64xx-pinctrl.dtsi
+index 8e9594d64b579..0a3186d57cb56 100644
+--- a/arch/arm/boot/dts/s3c64xx-pinctrl.dtsi
++++ b/arch/arm/boot/dts/s3c64xx-pinctrl.dtsi
+@@ -16,111 +16,111 @@
+ * Pin banks
+ */
+
+- gpa: gpa {
++ gpa: gpa-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpb: gpb {
++ gpb: gpb-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpc: gpc {
++ gpc: gpc-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpd: gpd {
++ gpd: gpd-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpe: gpe {
++ gpe: gpe-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+- gpf: gpf {
++ gpf: gpf-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpg: gpg {
++ gpg: gpg-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gph: gph {
++ gph: gph-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpi: gpi {
++ gpi: gpi-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+- gpj: gpj {
++ gpj: gpj-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+- gpk: gpk {
++ gpk: gpk-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+- gpl: gpl {
++ gpl: gpl-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpm: gpm {
++ gpm: gpm-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpn: gpn {
++ gpn: gpn-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpo: gpo {
++ gpo: gpo-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpp: gpp {
++ gpp: gpp-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+- gpq: gpq {
++ gpq: gpq-gpio-bank {
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+@@ -131,225 +131,225 @@
+ * Pin groups
+ */
+
+- uart0_data: uart0-data {
++ uart0_data: uart0-data-pins {
+ samsung,pins = "gpa-0", "gpa-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- uart0_fctl: uart0-fctl {
++ uart0_fctl: uart0-fctl-pins {
+ samsung,pins = "gpa-2", "gpa-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- uart1_data: uart1-data {
++ uart1_data: uart1-data-pins {
+ samsung,pins = "gpa-4", "gpa-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- uart1_fctl: uart1-fctl {
++ uart1_fctl: uart1-fctl-pins {
+ samsung,pins = "gpa-6", "gpa-7";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- uart2_data: uart2-data {
++ uart2_data: uart2-data-pins {
+ samsung,pins = "gpb-0", "gpb-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- uart3_data: uart3-data {
++ uart3_data: uart3-data-pins {
+ samsung,pins = "gpb-2", "gpb-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- ext_dma_0: ext-dma-0 {
++ ext_dma_0: ext-dma-0-pins {
+ samsung,pins = "gpb-0", "gpb-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- ext_dma_1: ext-dma-1 {
++ ext_dma_1: ext-dma-1-pins {
+ samsung,pins = "gpb-2", "gpb-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- irda_data_0: irda-data-0 {
++ irda_data_0: irda-data-0-pins {
+ samsung,pins = "gpb-0", "gpb-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- irda_data_1: irda-data-1 {
++ irda_data_1: irda-data-1-pins {
+ samsung,pins = "gpb-2", "gpb-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- irda_sdbw: irda-sdbw {
++ irda_sdbw: irda-sdbw-pins {
+ samsung,pins = "gpb-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- i2c0_bus: i2c0-bus {
++ i2c0_bus: i2c0-bus-pins {
+ samsung,pins = "gpb-5", "gpb-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_UP>;
+ };
+
+- i2c1_bus: i2c1-bus {
++ i2c1_bus: i2c1-bus-pins {
+ /* S3C6410-only */
+ samsung,pins = "gpb-2", "gpb-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_6>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_UP>;
+ };
+
+- spi0_bus: spi0-bus {
++ spi0_bus: spi0-bus-pins {
+ samsung,pins = "gpc-0", "gpc-1", "gpc-2";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_UP>;
+ };
+
+- spi0_cs: spi0-cs {
++ spi0_cs: spi0-cs-pins {
+ samsung,pins = "gpc-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- spi1_bus: spi1-bus {
++ spi1_bus: spi1-bus-pins {
+ samsung,pins = "gpc-4", "gpc-5", "gpc-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_UP>;
+ };
+
+- spi1_cs: spi1-cs {
++ spi1_cs: spi1-cs-pins {
+ samsung,pins = "gpc-7";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd0_cmd: sd0-cmd {
++ sd0_cmd: sd0-cmd-pins {
+ samsung,pins = "gpg-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd0_clk: sd0-clk {
++ sd0_clk: sd0-clk-pins {
+ samsung,pins = "gpg-0";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd0_bus1: sd0-bus1 {
++ sd0_bus1: sd0-bus1-pins {
+ samsung,pins = "gpg-2";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd0_bus4: sd0-bus4 {
++ sd0_bus4: sd0-bus4-pins {
+ samsung,pins = "gpg-2", "gpg-3", "gpg-4", "gpg-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd0_cd: sd0-cd {
++ sd0_cd: sd0-cd-pins {
+ samsung,pins = "gpg-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_UP>;
+ };
+
+- sd1_cmd: sd1-cmd {
++ sd1_cmd: sd1-cmd-pins {
+ samsung,pins = "gph-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd1_clk: sd1-clk {
++ sd1_clk: sd1-clk-pins {
+ samsung,pins = "gph-0";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd1_bus1: sd1-bus1 {
++ sd1_bus1: sd1-bus1-pins {
+ samsung,pins = "gph-2";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd1_bus4: sd1-bus4 {
++ sd1_bus4: sd1-bus4-pins {
+ samsung,pins = "gph-2", "gph-3", "gph-4", "gph-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd1_bus8: sd1-bus8 {
++ sd1_bus8: sd1-bus8-pins {
+ samsung,pins = "gph-2", "gph-3", "gph-4", "gph-5",
+ "gph-6", "gph-7", "gph-8", "gph-9";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd1_cd: sd1-cd {
++ sd1_cd: sd1-cd-pins {
+ samsung,pins = "gpg-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_UP>;
+ };
+
+- sd2_cmd: sd2-cmd {
++ sd2_cmd: sd2-cmd-pins {
+ samsung,pins = "gpc-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd2_clk: sd2-clk {
++ sd2_clk: sd2-clk-pins {
+ samsung,pins = "gpc-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd2_bus1: sd2-bus1 {
++ sd2_bus1: sd2-bus1-pins {
+ samsung,pins = "gph-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- sd2_bus4: sd2-bus4 {
++ sd2_bus4: sd2-bus4-pins {
+ samsung,pins = "gph-6", "gph-7", "gph-8", "gph-9";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- i2s0_bus: i2s0-bus {
++ i2s0_bus: i2s0-bus-pins {
+ samsung,pins = "gpd-0", "gpd-2", "gpd-3", "gpd-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- i2s0_cdclk: i2s0-cdclk {
++ i2s0_cdclk: i2s0-cdclk-pins {
+ samsung,pins = "gpd-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- i2s1_bus: i2s1-bus {
++ i2s1_bus: i2s1-bus-pins {
+ samsung,pins = "gpe-0", "gpe-2", "gpe-3", "gpe-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- i2s1_cdclk: i2s1-cdclk {
++ i2s1_cdclk: i2s1-cdclk-pins {
+ samsung,pins = "gpe-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- i2s2_bus: i2s2-bus {
++ i2s2_bus: i2s2-bus-pins {
+ /* S3C6410-only */
+ samsung,pins = "gpc-4", "gpc-5", "gpc-6", "gph-6",
+ "gph-8", "gph-9";
+@@ -357,50 +357,50 @@
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- i2s2_cdclk: i2s2-cdclk {
++ i2s2_cdclk: i2s2-cdclk-pins {
+ /* S3C6410-only */
+ samsung,pins = "gph-7";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_5>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- pcm0_bus: pcm0-bus {
++ pcm0_bus: pcm0-bus-pins {
+ samsung,pins = "gpd-0", "gpd-2", "gpd-3", "gpd-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- pcm0_extclk: pcm0-extclk {
++ pcm0_extclk: pcm0-extclk-pins {
+ samsung,pins = "gpd-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- pcm1_bus: pcm1-bus {
++ pcm1_bus: pcm1-bus-pins {
+ samsung,pins = "gpe-0", "gpe-2", "gpe-3", "gpe-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- pcm1_extclk: pcm1-extclk {
++ pcm1_extclk: pcm1-extclk-pins {
+ samsung,pins = "gpe-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- ac97_bus_0: ac97-bus-0 {
++ ac97_bus_0: ac97-bus-0-pins {
+ samsung,pins = "gpd-0", "gpd-1", "gpd-2", "gpd-3", "gpd-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- ac97_bus_1: ac97-bus-1 {
++ ac97_bus_1: ac97-bus-1-pins {
+ samsung,pins = "gpe-0", "gpe-1", "gpe-2", "gpe-3", "gpe-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- cam_port: cam-port {
++ cam_port: cam-port-pins {
+ samsung,pins = "gpf-0", "gpf-1", "gpf-2", "gpf-4",
+ "gpf-5", "gpf-6", "gpf-7", "gpf-8",
+ "gpf-9", "gpf-10", "gpf-11", "gpf-12";
+@@ -408,242 +408,242 @@
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- cam_rst: cam-rst {
++ cam_rst: cam-rst-pins {
+ samsung,pins = "gpf-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- cam_field: cam-field {
++ cam_field: cam-field-pins {
+ /* S3C6410-only */
+ samsung,pins = "gpb-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- pwm_extclk: pwm-extclk {
++ pwm_extclk: pwm-extclk-pins {
+ samsung,pins = "gpf-13";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- pwm0_out: pwm0-out {
++ pwm0_out: pwm0-out-pins {
+ samsung,pins = "gpf-14";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- pwm1_out: pwm1-out {
++ pwm1_out: pwm1-out-pins {
+ samsung,pins = "gpf-15";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- clkout0: clkout-0 {
++ clkout0: clkout-0-pins {
+ samsung,pins = "gpf-14";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col0_0: keypad-col0-0 {
++ keypad_col0_0: keypad-col0-0-pins {
+ samsung,pins = "gph-0";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col1_0: keypad-col1-0 {
++ keypad_col1_0: keypad-col1-0-pins {
+ samsung,pins = "gph-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col2_0: keypad-col2-0 {
++ keypad_col2_0: keypad-col2-0-pins {
+ samsung,pins = "gph-2";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col3_0: keypad-col3-0 {
++ keypad_col3_0: keypad-col3-0-pins {
+ samsung,pins = "gph-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col4_0: keypad-col4-0 {
++ keypad_col4_0: keypad-col4-0-pins {
+ samsung,pins = "gph-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col5_0: keypad-col5-0 {
++ keypad_col5_0: keypad-col5-0-pins {
+ samsung,pins = "gph-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col6_0: keypad-col6-0 {
++ keypad_col6_0: keypad-col6-0-pins {
+ samsung,pins = "gph-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col7_0: keypad-col7-0 {
++ keypad_col7_0: keypad-col7-0-pins {
+ samsung,pins = "gph-7";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_4>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col0_1: keypad-col0-1 {
++ keypad_col0_1: keypad-col0-1-pins {
+ samsung,pins = "gpl-0";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col1_1: keypad-col1-1 {
++ keypad_col1_1: keypad-col1-1-pins {
+ samsung,pins = "gpl-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col2_1: keypad-col2-1 {
++ keypad_col2_1: keypad-col2-1-pins {
+ samsung,pins = "gpl-2";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col3_1: keypad-col3-1 {
++ keypad_col3_1: keypad-col3-1-pins {
+ samsung,pins = "gpl-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col4_1: keypad-col4-1 {
++ keypad_col4_1: keypad-col4-1-pins {
+ samsung,pins = "gpl-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col5_1: keypad-col5-1 {
++ keypad_col5_1: keypad-col5-1-pins {
+ samsung,pins = "gpl-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col6_1: keypad-col6-1 {
++ keypad_col6_1: keypad-col6-1-pins {
+ samsung,pins = "gpl-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_col7_1: keypad-col7-1 {
++ keypad_col7_1: keypad-col7-1-pins {
+ samsung,pins = "gpl-7";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row0_0: keypad-row0-0 {
++ keypad_row0_0: keypad-row0-0-pins {
+ samsung,pins = "gpk-8";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row1_0: keypad-row1-0 {
++ keypad_row1_0: keypad-row1-0-pins {
+ samsung,pins = "gpk-9";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row2_0: keypad-row2-0 {
++ keypad_row2_0: keypad-row2-0-pins {
+ samsung,pins = "gpk-10";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row3_0: keypad-row3-0 {
++ keypad_row3_0: keypad-row3-0-pins {
+ samsung,pins = "gpk-11";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row4_0: keypad-row4-0 {
++ keypad_row4_0: keypad-row4-0-pins {
+ samsung,pins = "gpk-12";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row5_0: keypad-row5-0 {
++ keypad_row5_0: keypad-row5-0-pins {
+ samsung,pins = "gpk-13";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row6_0: keypad-row6-0 {
++ keypad_row6_0: keypad-row6-0-pins {
+ samsung,pins = "gpk-14";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row7_0: keypad-row7-0 {
++ keypad_row7_0: keypad-row7-0-pins {
+ samsung,pins = "gpk-15";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row0_1: keypad-row0-1 {
++ keypad_row0_1: keypad-row0-1-pins {
+ samsung,pins = "gpn-0";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row1_1: keypad-row1-1 {
++ keypad_row1_1: keypad-row1-1-pins {
+ samsung,pins = "gpn-1";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row2_1: keypad-row2-1 {
++ keypad_row2_1: keypad-row2-1-pins {
+ samsung,pins = "gpn-2";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row3_1: keypad-row3-1 {
++ keypad_row3_1: keypad-row3-1-pins {
+ samsung,pins = "gpn-3";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row4_1: keypad-row4-1 {
++ keypad_row4_1: keypad-row4-1-pins {
+ samsung,pins = "gpn-4";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row5_1: keypad-row5-1 {
++ keypad_row5_1: keypad-row5-1-pins {
+ samsung,pins = "gpn-5";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row6_1: keypad-row6-1 {
++ keypad_row6_1: keypad-row6-1-pins {
+ samsung,pins = "gpn-6";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- keypad_row7_1: keypad-row7-1 {
++ keypad_row7_1: keypad-row7-1-pins {
+ samsung,pins = "gpn-7";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- lcd_ctrl: lcd-ctrl {
++ lcd_ctrl: lcd-ctrl-pins {
+ samsung,pins = "gpj-8", "gpj-9", "gpj-10", "gpj-11";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- lcd_data16: lcd-data-width16 {
++ lcd_data16: lcd-data-width16-pins {
+ samsung,pins = "gpi-3", "gpi-4", "gpi-5", "gpi-6",
+ "gpi-7", "gpi-10", "gpi-11", "gpi-12",
+ "gpi-13", "gpi-14", "gpi-15", "gpj-3",
+@@ -652,7 +652,7 @@
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- lcd_data18: lcd-data-width18 {
++ lcd_data18: lcd-data-width18-pins {
+ samsung,pins = "gpi-2", "gpi-3", "gpi-4", "gpi-5",
+ "gpi-6", "gpi-7", "gpi-10", "gpi-11",
+ "gpi-12", "gpi-13", "gpi-14", "gpi-15",
+@@ -662,7 +662,7 @@
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- lcd_data24: lcd-data-width24 {
++ lcd_data24: lcd-data-width24-pins {
+ samsung,pins = "gpi-0", "gpi-1", "gpi-2", "gpi-3",
+ "gpi-4", "gpi-5", "gpi-6", "gpi-7",
+ "gpi-8", "gpi-9", "gpi-10", "gpi-11",
+@@ -673,7 +673,7 @@
+ samsung,pin-pud = <S3C64XX_PIN_PULL_NONE>;
+ };
+
+- hsi_bus: hsi-bus {
++ hsi_bus: hsi-bus-pins {
+ samsung,pins = "gpk-0", "gpk-1", "gpk-2", "gpk-3",
+ "gpk-4", "gpk-5", "gpk-6", "gpk-7";
+ samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi
+index 160f8cd9a68da..b6d55a782c208 100644
+--- a/arch/arm/boot/dts/s5pv210-aries.dtsi
++++ b/arch/arm/boot/dts/s5pv210-aries.dtsi
+@@ -564,7 +564,6 @@
+ reset-gpios = <&mp05 5 GPIO_ACTIVE_LOW>;
+ vdd3-supply = <&ldo7_reg>;
+ vci-supply = <&ldo17_reg>;
+- spi-cs-high;
+ spi-max-frequency = <1200000>;
+
+ pinctrl-names = "default";
+@@ -636,7 +635,7 @@
+ };
+
+ &i2s0 {
+- dmas = <&pdma0 9>, <&pdma0 10>, <&pdma0 11>;
++ dmas = <&pdma0 10>, <&pdma0 9>, <&pdma0 11>;
+ status = "okay";
+ };
+
+@@ -895,7 +894,7 @@
+ device-wakeup-gpios = <&gpg3 4 GPIO_ACTIVE_HIGH>;
+ interrupt-parent = <&gph2>;
+ interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-names = "host-wake";
++ interrupt-names = "host-wakeup";
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/s5pv210-smdkv210.dts b/arch/arm/boot/dts/s5pv210-smdkv210.dts
+index fbae768d65e27..901e7197b1368 100644
+--- a/arch/arm/boot/dts/s5pv210-smdkv210.dts
++++ b/arch/arm/boot/dts/s5pv210-smdkv210.dts
+@@ -41,7 +41,7 @@
+
+ ethernet@a8000000 {
+ compatible = "davicom,dm9000";
+- reg = <0xA8000000 0x2 0xA8000002 0x2>;
++ reg = <0xa8000000 0x2>, <0xa8000002 0x2>;
+ interrupt-parent = <&gph1>;
+ interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+ local-mac-address = [00 00 de ad be ef];
+@@ -55,6 +55,14 @@
+ default-brightness-level = <6>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pwm3_out>;
++ power-supply = <&dc5v_reg>;
++ };
++
++ dc5v_reg: regulator-0 {
++ compatible = "regulator-fixed";
++ regulator-name = "DC5V";
++ regulator-min-microvolt = <5000000>;
++ regulator-max-microvolt = <5000000>;
+ };
+ };
+
+diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi
+index 353ba7b09a0c0..415d0f555858c 100644
+--- a/arch/arm/boot/dts/s5pv210.dtsi
++++ b/arch/arm/boot/dts/s5pv210.dtsi
+@@ -239,8 +239,8 @@
+ reg = <0xeee30000 0x1000>;
+ interrupt-parent = <&vic2>;
+ interrupts = <16>;
+- dma-names = "rx", "tx", "tx-sec";
+- dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>;
++ dma-names = "tx", "rx", "tx-sec";
++ dmas = <&pdma1 10>, <&pdma1 9>, <&pdma1 11>;
+ clock-names = "iis",
+ "i2s_opclk0",
+ "i2s_opclk1";
+@@ -259,8 +259,8 @@
+ reg = <0xe2100000 0x1000>;
+ interrupt-parent = <&vic2>;
+ interrupts = <17>;
+- dma-names = "rx", "tx";
+- dmas = <&pdma1 12>, <&pdma1 13>;
++ dma-names = "tx", "rx";
++ dmas = <&pdma1 13>, <&pdma1 12>;
+ clock-names = "iis", "i2s_opclk0";
+ clocks = <&clocks CLK_I2S1>, <&clocks SCLK_AUDIO1>;
+ pinctrl-names = "default";
+@@ -274,8 +274,8 @@
+ reg = <0xe2a00000 0x1000>;
+ interrupt-parent = <&vic2>;
+ interrupts = <18>;
+- dma-names = "rx", "tx";
+- dmas = <&pdma1 14>, <&pdma1 15>;
++ dma-names = "tx", "rx";
++ dmas = <&pdma1 15>, <&pdma1 14>;
+ clock-names = "iis", "i2s_opclk0";
+ clocks = <&clocks CLK_I2S2>, <&clocks SCLK_AUDIO2>;
+ pinctrl-names = "default";
+@@ -582,7 +582,7 @@
+ interrupts = <29>;
+ clocks = <&clocks CLK_CSIS>,
+ <&clocks SCLK_CSIS>;
+- clock-names = "clk_csis",
++ clock-names = "csis",
+ "sclk_csis";
+ bus-width = <4>;
+ status = "disabled";
+diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi
+index ec45ced3cde68..e1e0dec8cc1f2 100644
+--- a/arch/arm/boot/dts/sam9x60.dtsi
++++ b/arch/arm/boot/dts/sam9x60.dtsi
+@@ -567,7 +567,7 @@
+ mpddrc: mpddrc@ffffe800 {
+ compatible = "microchip,sam9x60-ddramc", "atmel,sama5d3-ddramc";
+ reg = <0xffffe800 0x200>;
+- clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_CORE PMC_MCK>;
++ clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_PERIPHERAL 49>;
+ clock-names = "ddrck", "mpddr";
+ };
+
+diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
+index 801969c113d64..4c87c2aa8fc86 100644
+--- a/arch/arm/boot/dts/sama5d2.dtsi
++++ b/arch/arm/boot/dts/sama5d2.dtsi
+@@ -413,7 +413,7 @@
+ pmecc: ecc-engine@f8014070 {
+ compatible = "atmel,sama5d2-pmecc";
+ reg = <0xf8014070 0x490>,
+- <0xf8014500 0x100>;
++ <0xf8014500 0x200>;
+ };
+ };
+
+@@ -1125,7 +1125,7 @@
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>;
+ clock-names = "pclk", "gclk";
+ assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>;
+- assigned-parrents = <&pmc PMC_TYPE_GCK 55>;
++ assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>;
+ status = "disabled";
+ };
+
+diff --git a/arch/arm/boot/dts/sama7g5-pinfunc.h b/arch/arm/boot/dts/sama7g5-pinfunc.h
+index 22fe9e522a97b..6e87f0d4b8fce 100644
+--- a/arch/arm/boot/dts/sama7g5-pinfunc.h
++++ b/arch/arm/boot/dts/sama7g5-pinfunc.h
+@@ -261,7 +261,7 @@
+ #define PIN_PB2__FLEXCOM6_IO0 PINMUX_PIN(PIN_PB2, 2, 1)
+ #define PIN_PB2__ADTRG PINMUX_PIN(PIN_PB2, 3, 1)
+ #define PIN_PB2__A20 PINMUX_PIN(PIN_PB2, 4, 1)
+-#define PIN_PB2__FLEXCOM11_IO0 PINMUX_PIN(PIN_PB2, 6, 3)
++#define PIN_PB2__FLEXCOM11_IO1 PINMUX_PIN(PIN_PB2, 6, 3)
+ #define PIN_PB3 35
+ #define PIN_PB3__GPIO PINMUX_PIN(PIN_PB3, 0, 0)
+ #define PIN_PB3__RF1 PINMUX_PIN(PIN_PB3, 1, 1)
+@@ -765,7 +765,7 @@
+ #define PIN_PD20__PCK0 PINMUX_PIN(PIN_PD20, 1, 3)
+ #define PIN_PD20__FLEXCOM2_IO3 PINMUX_PIN(PIN_PD20, 2, 2)
+ #define PIN_PD20__PWMH3 PINMUX_PIN(PIN_PD20, 3, 4)
+-#define PIN_PD20__CANTX4 PINMUX_PIN(PIN_PD20, 5, 2)
++#define PIN_PD20__CANTX4 PINMUX_PIN(PIN_PD20, 4, 2)
+ #define PIN_PD20__FLEXCOM5_IO0 PINMUX_PIN(PIN_PD20, 6, 5)
+ #define PIN_PD21 117
+ #define PIN_PD21__GPIO PINMUX_PIN(PIN_PD21, 0, 0)
+diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi
+index 6c58c151c6d9e..a63a8e768654f 100644
+--- a/arch/arm/boot/dts/sama7g5.dtsi
++++ b/arch/arm/boot/dts/sama7g5.dtsi
+@@ -319,8 +319,6 @@
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(7)>,
+ <&dma0 AT91_XDMAC_DT_PERID(8)>;
+ dma-names = "rx", "tx";
+- atmel,use-dma-rx;
+- atmel,use-dma-tx;
+ status = "disabled";
+ };
+ };
+@@ -485,8 +483,6 @@
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(21)>,
+ <&dma0 AT91_XDMAC_DT_PERID(22)>;
+ dma-names = "rx", "tx";
+- atmel,use-dma-rx;
+- atmel,use-dma-tx;
+ status = "disabled";
+ };
+ };
+@@ -511,8 +507,6 @@
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(23)>,
+ <&dma0 AT91_XDMAC_DT_PERID(24)>;
+ dma-names = "rx", "tx";
+- atmel,use-dma-rx;
+- atmel,use-dma-tx;
+ status = "disabled";
+ };
+ };
+@@ -559,7 +553,6 @@
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+- interrupt-parent;
+ reg = <0xe8c11000 0x1000>,
+ <0xe8c12000 0x2000>;
+ };
+diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi
+index 0b021eef0b538..b8c5dd7860cb2 100644
+--- a/arch/arm/boot/dts/socfpga.dtsi
++++ b/arch/arm/boot/dts/socfpga.dtsi
+@@ -46,7 +46,7 @@
+ <0xff113000 0x1000>;
+ };
+
+- intc: intc@fffed000 {
++ intc: interrupt-controller@fffed000 {
+ compatible = "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ interrupt-controller;
+@@ -782,7 +782,7 @@
+ };
+
+ qspi: spi@ff705000 {
+- compatible = "cdns,qspi-nor";
++ compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xff705000 0x1000>,
+diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi
+index a574ea91d9d3f..f1e50d2e623a3 100644
+--- a/arch/arm/boot/dts/socfpga_arria10.dtsi
++++ b/arch/arm/boot/dts/socfpga_arria10.dtsi
+@@ -38,7 +38,7 @@
+ <0xff113000 0x1000>;
+ };
+
+- intc: intc@ffffd000 {
++ intc: interrupt-controller@ffffd000 {
+ compatible = "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ interrupt-controller;
+@@ -756,7 +756,7 @@
+ };
+
+ qspi: spi@ff809000 {
+- compatible = "cdns,qspi-nor";
++ compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xff809000 0x100>,
+diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts
+index 2b645642b9352..2a745522404d6 100644
+--- a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts
++++ b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts
+@@ -12,7 +12,7 @@
+ flash0: n25q00@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q00aa";
++ compatible = "micron,mt25qu02g", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <100000000>;
+
+diff --git a/arch/arm/boot/dts/socfpga_arria5_socdk.dts b/arch/arm/boot/dts/socfpga_arria5_socdk.dts
+index 90e676e7019f2..1b02d46496a85 100644
+--- a/arch/arm/boot/dts/socfpga_arria5_socdk.dts
++++ b/arch/arm/boot/dts/socfpga_arria5_socdk.dts
+@@ -119,7 +119,7 @@
+ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q256a";
++ compatible = "micron,n25q256a", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <100000000>;
+
+diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts
+index 6f138b2b26163..51bb436784e24 100644
+--- a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts
++++ b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts
+@@ -124,7 +124,7 @@
+ flash0: n25q00@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q00";
++ compatible = "micron,mt25qu02g", "jedec,spi-nor";
+ reg = <0>; /* chip select */
+ spi-max-frequency = <100000000>;
+
+diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
+index c155ff02eb6e0..cae9ddd5ed38b 100644
+--- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
++++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
+@@ -169,7 +169,7 @@
+ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q00";
++ compatible = "micron,mt25qu02g", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <100000000>;
+
+diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts
+index 8d5d3996f6f27..ca18b959e6559 100644
+--- a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts
++++ b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts
+@@ -80,7 +80,7 @@
+ flash: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q256a";
++ compatible = "micron,n25q256a", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <100000000>;
+ m25p,fast-read;
+diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts
+index 99a71757cdf46..3f7aa7bf0863a 100644
+--- a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts
++++ b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts
+@@ -116,7 +116,7 @@
+ flash0: n25q512a@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q512a";
++ compatible = "micron,n25q512a", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <100000000>;
+
+diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
+index a060718758b67..25874e1b9c829 100644
+--- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
++++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
+@@ -224,7 +224,7 @@
+ n25q128@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q128";
++ compatible = "micron,n25q128", "jedec,spi-nor";
+ reg = <0>; /* chip select */
+ spi-max-frequency = <100000000>;
+ m25p,fast-read;
+@@ -241,7 +241,7 @@
+ n25q00@1 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "n25q00";
++ compatible = "micron,mt25qu02g", "jedec,spi-nor";
+ reg = <1>; /* chip select */
+ spi-max-frequency = <100000000>;
+ m25p,fast-read;
+diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi
+index 1a8f5e8b10e3a..66cd473ecb617 100644
+--- a/arch/arm/boot/dts/spear1340.dtsi
++++ b/arch/arm/boot/dts/spear1340.dtsi
+@@ -136,9 +136,9 @@
+ reg = <0xb4100000 0x1000>;
+ interrupts = <0 105 0x4>;
+ status = "disabled";
+- dmas = <&dwdma0 12 0 1>,
+- <&dwdma0 13 1 0>;
+- dma-names = "tx", "rx";
++ dmas = <&dwdma0 13 0 1>,
++ <&dwdma0 12 1 0>;
++ dma-names = "rx", "tx";
+ };
+
+ thermal@e07008c4 {
+diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
+index c87b881b2c8bb..9135533676879 100644
+--- a/arch/arm/boot/dts/spear13xx.dtsi
++++ b/arch/arm/boot/dts/spear13xx.dtsi
+@@ -284,9 +284,9 @@
+ #size-cells = <0>;
+ interrupts = <0 31 0x4>;
+ status = "disabled";
+- dmas = <&dwdma0 4 0 0>,
+- <&dwdma0 5 0 0>;
+- dma-names = "tx", "rx";
++ dmas = <&dwdma0 5 0 0>,
++ <&dwdma0 4 0 0>;
++ dma-names = "rx", "tx";
+ };
+
+ rtc@e0580000 {
+diff --git a/arch/arm/boot/dts/spear320-hmi.dts b/arch/arm/boot/dts/spear320-hmi.dts
+index 367ba48aac3e5..5c562fb4886f4 100644
+--- a/arch/arm/boot/dts/spear320-hmi.dts
++++ b/arch/arm/boot/dts/spear320-hmi.dts
+@@ -242,7 +242,7 @@
+ irq-trigger = <0x1>;
+
+ stmpegpio: stmpe-gpio {
+- compatible = "stmpe,gpio";
++ compatible = "st,stmpe-gpio";
+ reg = <0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi
+index fd41243a0b2c0..9d5a04a46b14e 100644
+--- a/arch/arm/boot/dts/spear600.dtsi
++++ b/arch/arm/boot/dts/spear600.dtsi
+@@ -47,7 +47,7 @@
+ compatible = "arm,pl110", "arm,primecell";
+ reg = <0xfc200000 0x1000>;
+ interrupt-parent = <&vic1>;
+- interrupts = <12>;
++ interrupts = <13>;
+ status = "disabled";
+ };
+
+diff --git a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts
+index 952606e607ed6..ce62ba877da12 100644
+--- a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts
++++ b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts
+@@ -544,8 +544,8 @@
+ reg = <0x19>;
+ vdd-supply = <&ab8500_ldo_aux1_reg>; // 3V
+ vddio-supply = <&ab8500_ldo_aux2_reg>; // 1.8V
+- mount-matrix = "0", "-1", "0",
+- "1", "0", "0",
++ mount-matrix = "0", "1", "0",
++ "-1", "0", "0",
+ "0", "0", "1";
+ };
+ };
+diff --git a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts
+index fabc390ccb0cf..6c9e812ef03f4 100644
+--- a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts
++++ b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts
+@@ -502,8 +502,8 @@
+ accelerometer@18 {
+ compatible = "bosch,bma222e";
+ reg = <0x18>;
+- mount-matrix = "0", "1", "0",
+- "-1", "0", "0",
++ mount-matrix = "0", "-1", "0",
++ "1", "0", "0",
+ "0", "0", "1";
+ vddio-supply = <&ab8500_ldo_aux2_reg>; // 1.8V
+ vdd-supply = <&ab8500_ldo_aux1_reg>; // 3V
+diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
+index 264f3e9b5fce5..7fab746e0570e 100644
+--- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
++++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
+@@ -181,10 +181,6 @@
+ cap-sd-highspeed;
+ cap-mmc-highspeed;
+ /* All direction control is used */
+- st,sig-dir-cmd;
+- st,sig-dir-dat0;
+- st,sig-dir-dat2;
+- st,sig-dir-dat31;
+ st,sig-pin-fbclk;
+ full-pwr-cycle;
+ vmmc-supply = <&ab8500_ldo_aux3_reg>;
+@@ -292,10 +288,10 @@
+ };
+
+ ab8500_ldo_aux2 {
+- /* Supplies the Cypress TMA140 touchscreen only with 3.3V */
++ /* Supplies the Cypress TMA140 touchscreen only with 3.0V */
+ regulator-name = "AUX2";
+- regulator-min-microvolt = <3300000>;
+- regulator-max-microvolt = <3300000>;
++ regulator-min-microvolt = <3000000>;
++ regulator-max-microvolt = <3000000>;
+ };
+
+ ab8500_ldo_aux3 {
+@@ -314,9 +310,9 @@
+
+ ab8500_ldo_aux5 {
+ regulator-name = "AUX5";
++ /* Intended for 1V8 for touchscreen but actually left unused */
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <2790000>;
+- regulator-always-on;
+ };
+
+ ab8500_ldo_aux6 {
+diff --git a/arch/arm/boot/dts/stm32f429-disco.dts b/arch/arm/boot/dts/stm32f429-disco.dts
+index 075ac57d0bf4a..6435e099c6326 100644
+--- a/arch/arm/boot/dts/stm32f429-disco.dts
++++ b/arch/arm/boot/dts/stm32f429-disco.dts
+@@ -192,7 +192,7 @@
+
+ display: display@1{
+ /* Connect panel-ilitek-9341 to ltdc */
+- compatible = "st,sf-tc240t-9370-t";
++ compatible = "st,sf-tc240t-9370-t", "ilitek,ili9341";
+ reg = <1>;
+ spi-3wire;
+ spi-max-frequency = <10000000>;
+diff --git a/arch/arm/boot/dts/stm32f7-pinctrl.dtsi b/arch/arm/boot/dts/stm32f7-pinctrl.dtsi
+index 1cf8a23c26448..7f40b34401a9d 100644
+--- a/arch/arm/boot/dts/stm32f7-pinctrl.dtsi
++++ b/arch/arm/boot/dts/stm32f7-pinctrl.dtsi
+@@ -284,6 +284,88 @@
+ slew-rate = <2>;
+ };
+ };
++
++ can1_pins_a: can1-0 {
++ pins1 {
++ pinmux = <STM32_PINMUX('A', 12, AF9)>; /* CAN1_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('A', 11, AF9)>; /* CAN1_RX */
++ bias-pull-up;
++ };
++ };
++
++ can1_pins_b: can1-1 {
++ pins1 {
++ pinmux = <STM32_PINMUX('B', 9, AF9)>; /* CAN1_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('B', 8, AF9)>; /* CAN1_RX */
++ bias-pull-up;
++ };
++ };
++
++ can1_pins_c: can1-2 {
++ pins1 {
++ pinmux = <STM32_PINMUX('D', 1, AF9)>; /* CAN1_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('D', 0, AF9)>; /* CAN1_RX */
++ bias-pull-up;
++
++ };
++ };
++
++ can1_pins_d: can1-3 {
++ pins1 {
++ pinmux = <STM32_PINMUX('H', 13, AF9)>; /* CAN1_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('H', 14, AF9)>; /* CAN1_RX */
++ bias-pull-up;
++
++ };
++ };
++
++ can2_pins_a: can2-0 {
++ pins1 {
++ pinmux = <STM32_PINMUX('B', 6, AF9)>; /* CAN2_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('B', 5, AF9)>; /* CAN2_RX */
++ bias-pull-up;
++ };
++ };
++
++ can2_pins_b: can2-1 {
++ pins1 {
++ pinmux = <STM32_PINMUX('B', 13, AF9)>; /* CAN2_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('B', 12, AF9)>; /* CAN2_RX */
++ bias-pull-up;
++ };
++ };
++
++ can3_pins_a: can3-0 {
++ pins1 {
++ pinmux = <STM32_PINMUX('A', 15, AF11)>; /* CAN3_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('A', 8, AF11)>; /* CAN3_RX */
++ bias-pull-up;
++ };
++ };
++
++ can3_pins_b: can3-1 {
++ pins1 {
++ pinmux = <STM32_PINMUX('B', 4, AF11)>; /* CAN3_TX */
++ };
++ pins2 {
++ pinmux = <STM32_PINMUX('B', 3, AF11)>; /* CAN3_RX */
++ bias-pull-up;
++ };
++ };
+ };
+ };
+ };
+diff --git a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi
+index 5b60ecbd718f0..d3553e0f0187e 100644
+--- a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi
++++ b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi
+@@ -1179,7 +1179,7 @@
+ };
+ };
+
+- sai2a_pins_c: sai2a-4 {
++ sai2a_pins_c: sai2a-2 {
+ pins {
+ pinmux = <STM32_PINMUX('D', 13, AF10)>, /* SAI2_SCK_A */
+ <STM32_PINMUX('D', 11, AF10)>, /* SAI2_SD_A */
+@@ -1190,7 +1190,7 @@
+ };
+ };
+
+- sai2a_sleep_pins_c: sai2a-5 {
++ sai2a_sleep_pins_c: sai2a-sleep-2 {
+ pins {
+ pinmux = <STM32_PINMUX('D', 13, ANALOG)>, /* SAI2_SCK_A */
+ <STM32_PINMUX('D', 11, ANALOG)>, /* SAI2_SD_A */
+@@ -1235,14 +1235,14 @@
+ };
+ };
+
+- sai2b_pins_c: sai2a-4 {
++ sai2b_pins_c: sai2b-2 {
+ pins1 {
+ pinmux = <STM32_PINMUX('F', 11, AF10)>; /* SAI2_SD_B */
+ bias-disable;
+ };
+ };
+
+- sai2b_sleep_pins_c: sai2a-sleep-5 {
++ sai2b_sleep_pins_c: sai2b-sleep-2 {
+ pins {
+ pinmux = <STM32_PINMUX('F', 11, ANALOG)>; /* SAI2_SD_B */
+ };
+diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi
+index bd289bf5d2690..e0d4833187988 100644
+--- a/arch/arm/boot/dts/stm32mp151.dtsi
++++ b/arch/arm/boot/dts/stm32mp151.dtsi
+@@ -553,7 +553,7 @@
+ compatible = "st,stm32-cec";
+ reg = <0x40016000 0x400>;
+ interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+- clocks = <&rcc CEC_K>, <&clk_lse>;
++ clocks = <&rcc CEC_K>, <&rcc CEC>;
+ clock-names = "cec", "hdmi-cec";
+ status = "disabled";
+ };
+@@ -824,7 +824,7 @@
+ #sound-dai-cells = <0>;
+
+ compatible = "st,stm32-sai-sub-a";
+- reg = <0x4 0x1c>;
++ reg = <0x4 0x20>;
+ clocks = <&rcc SAI1_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 87 0x400 0x01>;
+@@ -834,7 +834,7 @@
+ sai1b: audio-controller@4400a024 {
+ #sound-dai-cells = <0>;
+ compatible = "st,stm32-sai-sub-b";
+- reg = <0x24 0x1c>;
++ reg = <0x24 0x20>;
+ clocks = <&rcc SAI1_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 88 0x400 0x01>;
+@@ -855,7 +855,7 @@
+ sai2a: audio-controller@4400b004 {
+ #sound-dai-cells = <0>;
+ compatible = "st,stm32-sai-sub-a";
+- reg = <0x4 0x1c>;
++ reg = <0x4 0x20>;
+ clocks = <&rcc SAI2_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 89 0x400 0x01>;
+@@ -865,7 +865,7 @@
+ sai2b: audio-controller@4400b024 {
+ #sound-dai-cells = <0>;
+ compatible = "st,stm32-sai-sub-b";
+- reg = <0x24 0x1c>;
++ reg = <0x24 0x20>;
+ clocks = <&rcc SAI2_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 90 0x400 0x01>;
+@@ -886,7 +886,7 @@
+ sai3a: audio-controller@4400c004 {
+ #sound-dai-cells = <0>;
+ compatible = "st,stm32-sai-sub-a";
+- reg = <0x04 0x1c>;
++ reg = <0x04 0x20>;
+ clocks = <&rcc SAI3_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 113 0x400 0x01>;
+@@ -896,7 +896,7 @@
+ sai3b: audio-controller@4400c024 {
+ #sound-dai-cells = <0>;
+ compatible = "st,stm32-sai-sub-b";
+- reg = <0x24 0x1c>;
++ reg = <0x24 0x20>;
+ clocks = <&rcc SAI3_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 114 0x400 0x01>;
+@@ -1271,7 +1271,7 @@
+ sai4a: audio-controller@50027004 {
+ #sound-dai-cells = <0>;
+ compatible = "st,stm32-sai-sub-a";
+- reg = <0x04 0x1c>;
++ reg = <0x04 0x20>;
+ clocks = <&rcc SAI4_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 99 0x400 0x01>;
+@@ -1281,7 +1281,7 @@
+ sai4b: audio-controller@50027024 {
+ #sound-dai-cells = <0>;
+ compatible = "st,stm32-sai-sub-b";
+- reg = <0x24 0x1c>;
++ reg = <0x24 0x20>;
+ clocks = <&rcc SAI4_K>;
+ clock-names = "sai_ck";
+ dmas = <&dmamux1 100 0x400 0x01>;
+@@ -1452,7 +1452,7 @@
+ usbh_ohci: usb@5800c000 {
+ compatible = "generic-ohci";
+ reg = <0x5800c000 0x1000>;
+- clocks = <&rcc USBH>;
++ clocks = <&usbphyc>, <&rcc USBH>;
+ resets = <&rcc USBH_R>;
+ interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+@@ -1461,7 +1461,7 @@
+ usbh_ehci: usb@5800d000 {
+ compatible = "generic-ehci";
+ reg = <0x5800d000 0x1000>;
+- clocks = <&rcc USBH>;
++ clocks = <&usbphyc>, <&rcc USBH>;
+ resets = <&rcc USBH_R>;
+ interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+ companion = <&usbh_ohci>;
+diff --git a/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts b/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts
+index 2e3c9fbb4eb36..275167f26fd9d 100644
+--- a/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts
++++ b/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts
+@@ -13,7 +13,6 @@
+ /dts-v1/;
+
+ #include "stm32mp157.dtsi"
+-#include "stm32mp15xc.dtsi"
+ #include "stm32mp15xx-dhcor-som.dtsi"
+ #include "stm32mp15xx-dhcor-avenger96.dtsi"
+
+diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
+index fbf3826933e4d..7c0aa59accc55 100644
+--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
++++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
+@@ -137,10 +137,13 @@
+
+ sound {
+ compatible = "audio-graph-card";
+- routing =
+- "MIC_IN", "Capture",
+- "Capture", "Mic Bias",
+- "Playback", "HP_OUT";
++ widgets = "Headphone", "Headphone Jack",
++ "Line", "Line In Jack",
++ "Microphone", "Microphone Jack";
++ routing = "Headphone Jack", "HP_OUT",
++ "LINE_IN", "Line In Jack",
++ "MIC_IN", "Microphone Jack",
++ "Microphone Jack", "Mic Bias";
+ dais = <&sai2a_port &sai2b_port>;
+ status = "okay";
+ };
+diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi
+index 6885948f3024e..5f65d96435f6e 100644
+--- a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi
++++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi
+@@ -88,7 +88,7 @@
+
+ sound {
+ compatible = "audio-graph-card";
+- label = "STM32MP1-AV96-HDMI";
++ label = "STM32-AV96-HDMI";
+ dais = <&sai2a_port>;
+ status = "okay";
+ };
+@@ -100,7 +100,7 @@
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+
+- gpios = <&gpioz 3 GPIO_ACTIVE_HIGH>;
++ gpio = <&gpioz 3 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+ };
+@@ -141,6 +141,7 @@
+ compatible = "snps,dwmac-mdio";
+ reset-gpios = <&gpioz 2 GPIO_ACTIVE_LOW>;
+ reset-delay-us = <1000>;
++ reset-post-delay-us = <1000>;
+
+ phy0: ethernet-phy@7 {
+ reg = <7>;
+@@ -266,6 +267,12 @@
+ };
+ };
+ };
++
++ dh_mac_eeprom: eeprom@53 {
++ compatible = "atmel,24c02";
++ reg = <0x53>;
++ pagesize = <16>;
++ };
+ };
+
+ &ltdc {
+diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
+index 2b0ac605549d7..108d934a186b4 100644
+--- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
++++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
+@@ -171,12 +171,6 @@
+ status = "disabled";
+ };
+ };
+-
+- eeprom@53 {
+- compatible = "atmel,24c02";
+- reg = <0x53>;
+- pagesize = <16>;
+- };
+ };
+
+ &iwdg2 {
+@@ -202,7 +196,7 @@
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-rx-bus-width = <4>;
+- spi-max-frequency = <108000000>;
++ spi-max-frequency = <50000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ };
+diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi
+index 899bfe04aeb91..a76173e8a2a17 100644
+--- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi
++++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi
+@@ -249,7 +249,7 @@
+ stusb1600@28 {
+ compatible = "st,stusb1600";
+ reg = <0x28>;
+- interrupts = <11 IRQ_TYPE_EDGE_FALLING>;
++ interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&gpioi>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&stusb1600_pins_a>;
+@@ -438,7 +438,7 @@
+ i2s2_port: port {
+ i2s2_endpoint: endpoint {
+ remote-endpoint = <&sii9022_tx_endpoint>;
+- format = "i2s";
++ dai-format = "i2s";
+ mclk-fs = <256>;
+ };
+ };
+diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
+index 2beddbb3c5183..b3d1bdfb5118e 100644
+--- a/arch/arm/boot/dts/sun8i-a33.dtsi
++++ b/arch/arm/boot/dts/sun8i-a33.dtsi
+@@ -46,7 +46,7 @@
+ #include <dt-bindings/thermal/thermal.h>
+
+ / {
+- cpu0_opp_table: opp_table0 {
++ cpu0_opp_table: opp-table-cpu {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+@@ -164,7 +164,7 @@
+ io-channels = <&ths>;
+ };
+
+- mali_opp_table: gpu-opp-table {
++ mali_opp_table: opp-table-gpu {
+ compatible = "operating-points-v2";
+
+ opp-144000000 {
+diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi
+index ac97eac91349b..82fdb04122caa 100644
+--- a/arch/arm/boot/dts/sun8i-a83t.dtsi
++++ b/arch/arm/boot/dts/sun8i-a83t.dtsi
+@@ -200,7 +200,7 @@
+ status = "disabled";
+ };
+
+- cpu0_opp_table: opp_table0 {
++ cpu0_opp_table: opp-table-cluster0 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+@@ -253,7 +253,7 @@
+ };
+ };
+
+- cpu1_opp_table: opp_table1 {
++ cpu1_opp_table: opp-table-cluster1 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+index f19ed981da9d9..3706216ffb40b 100644
+--- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
++++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+@@ -169,7 +169,7 @@
+ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+- compatible = "mxicy,mx25l1606e", "winbond,w25q128";
++ compatible = "mxicy,mx25l1606e", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>;
+ };
+diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-duo2.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-duo2.dts
+index 8e7dfcffe1fbe..355f7844fd55e 100644
+--- a/arch/arm/boot/dts/sun8i-h3-nanopi-duo2.dts
++++ b/arch/arm/boot/dts/sun8i-h3-nanopi-duo2.dts
+@@ -57,7 +57,7 @@
+ regulator-ramp-delay = <50>; /* 4ms */
+
+ enable-active-high;
+- enable-gpio = <&r_pio 0 8 GPIO_ACTIVE_HIGH>; /* PL8 */
++ enable-gpios = <&r_pio 0 8 GPIO_ACTIVE_HIGH>; /* PL8 */
+ gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6 */
+ gpios-states = <0x1>;
+ states = <1100000 0>, <1300000 1>;
+diff --git a/arch/arm/boot/dts/sun8i-h3.dtsi b/arch/arm/boot/dts/sun8i-h3.dtsi
+index 4e89701df91f8..ae4f933abb895 100644
+--- a/arch/arm/boot/dts/sun8i-h3.dtsi
++++ b/arch/arm/boot/dts/sun8i-h3.dtsi
+@@ -44,7 +44,7 @@
+ #include <dt-bindings/thermal/thermal.h>
+
+ / {
+- cpu0_opp_table: opp_table0 {
++ cpu0_opp_table: opp-table-cpu {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+@@ -112,7 +112,7 @@
+ };
+ };
+
+- gpu_opp_table: gpu-opp-table {
++ gpu_opp_table: opp-table-gpu {
+ compatible = "operating-points-v2";
+
+ opp-120000000 {
+diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi
+index b30bc1a25ebb9..084323d5c61cb 100644
+--- a/arch/arm/boot/dts/sun8i-v3s.dtsi
++++ b/arch/arm/boot/dts/sun8i-v3s.dtsi
+@@ -593,6 +593,17 @@
+ #size-cells = <0>;
+ };
+
++ gic: interrupt-controller@1c81000 {
++ compatible = "arm,gic-400";
++ reg = <0x01c81000 0x1000>,
++ <0x01c82000 0x2000>,
++ <0x01c84000 0x2000>,
++ <0x01c86000 0x2000>;
++ interrupt-controller;
++ #interrupt-cells = <3>;
++ interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
++ };
++
+ csi1: camera@1cb4000 {
+ compatible = "allwinner,sun8i-v3s-csi";
+ reg = <0x01cb4000 0x3000>;
+@@ -604,16 +615,5 @@
+ resets = <&ccu RST_BUS_CSI>;
+ status = "disabled";
+ };
+-
+- gic: interrupt-controller@1c81000 {
+- compatible = "arm,gic-400";
+- reg = <0x01c81000 0x1000>,
+- <0x01c82000 0x2000>,
+- <0x01c84000 0x2000>,
+- <0x01c86000 0x2000>;
+- interrupt-controller;
+- #interrupt-cells = <3>;
+- interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+- };
+ };
+ };
+diff --git a/arch/arm/boot/dts/suniv-f1c100s.dtsi b/arch/arm/boot/dts/suniv-f1c100s.dtsi
+index 6100d3b75f613..def8301014487 100644
+--- a/arch/arm/boot/dts/suniv-f1c100s.dtsi
++++ b/arch/arm/boot/dts/suniv-f1c100s.dtsi
+@@ -104,8 +104,10 @@
+
+ wdt: watchdog@1c20ca0 {
+ compatible = "allwinner,suniv-f1c100s-wdt",
+- "allwinner,sun4i-a10-wdt";
++ "allwinner,sun6i-a31-wdt";
+ reg = <0x01c20ca0 0x20>;
++ interrupts = <16>;
++ clocks = <&osc32k>;
+ };
+
+ uart0: serial@1c25000 {
+diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts
+index 1d2aac2cb6d03..fdc1d64dfff9d 100644
+--- a/arch/arm/boot/dts/tegra124-nyan-big.dts
++++ b/arch/arm/boot/dts/tegra124-nyan-big.dts
+@@ -13,12 +13,15 @@
+ "google,nyan-big-rev1", "google,nyan-big-rev0",
+ "google,nyan-big", "google,nyan", "nvidia,tegra124";
+
+- panel: panel {
+- compatible = "auo,b133xtn01";
+-
+- power-supply = <&vdd_3v3_panel>;
+- backlight = <&backlight>;
+- ddc-i2c-bus = <&dpaux>;
++ host1x@50000000 {
++ dpaux@545c0000 {
++ aux-bus {
++ panel: panel {
++ compatible = "auo,b133xtn01";
++ backlight = <&backlight>;
++ };
++ };
++ };
+ };
+
+ mmc@700b0400 { /* SD Card on this bus */
+diff --git a/arch/arm/boot/dts/tegra124-nyan-blaze.dts b/arch/arm/boot/dts/tegra124-nyan-blaze.dts
+index 677babde6460e..abdf4456826f8 100644
+--- a/arch/arm/boot/dts/tegra124-nyan-blaze.dts
++++ b/arch/arm/boot/dts/tegra124-nyan-blaze.dts
+@@ -15,12 +15,15 @@
+ "google,nyan-blaze-rev0", "google,nyan-blaze",
+ "google,nyan", "nvidia,tegra124";
+
+- panel: panel {
+- compatible = "samsung,ltn140at29-301";
+-
+- power-supply = <&vdd_3v3_panel>;
+- backlight = <&backlight>;
+- ddc-i2c-bus = <&dpaux>;
++ host1x@50000000 {
++ dpaux@545c0000 {
++ aux-bus {
++ panel: panel {
++ compatible = "samsung,ltn140at29-301";
++ backlight = <&backlight>;
++ };
++ };
++ };
+ };
+
+ sound {
+diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts
+index e6b54ac1ebd1a..84e2d24065e9a 100644
+--- a/arch/arm/boot/dts/tegra124-venice2.dts
++++ b/arch/arm/boot/dts/tegra124-venice2.dts
+@@ -48,6 +48,13 @@
+ dpaux@545c0000 {
+ vdd-supply = <&vdd_3v3_panel>;
+ status = "okay";
++
++ aux-bus {
++ panel: panel {
++ compatible = "lg,lp129qe";
++ backlight = <&backlight>;
++ };
++ };
+ };
+ };
+
+@@ -1079,13 +1086,6 @@
+ };
+ };
+
+- panel: panel {
+- compatible = "lg,lp129qe";
+- power-supply = <&vdd_3v3_panel>;
+- backlight = <&backlight>;
+- ddc-i2c-bus = <&dpaux>;
+- };
+-
+ vdd_mux: regulator@0 {
+ compatible = "regulator-fixed";
+ regulator-name = "+VDD_MUX";
+diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi
+index dd4d506683de7..7f14f0d005c3e 100644
+--- a/arch/arm/boot/dts/tegra20-tamonten.dtsi
++++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi
+@@ -183,8 +183,8 @@
+ };
+ conf_ata {
+ nvidia,pins = "ata", "atb", "atc", "atd", "ate",
+- "cdev1", "cdev2", "dap1", "dtb", "gma",
+- "gmb", "gmc", "gmd", "gme", "gpu7",
++ "cdev1", "cdev2", "dap1", "dtb", "dtf",
++ "gma", "gmb", "gmc", "gmd", "gme", "gpu7",
+ "gpv", "i2cp", "irrx", "irtx", "pta",
+ "rm", "slxa", "slxk", "spia", "spib",
+ "uac";
+@@ -203,7 +203,7 @@
+ };
+ conf_crtp {
+ nvidia,pins = "crtp", "dap2", "dap3", "dap4",
+- "dtc", "dte", "dtf", "gpu", "sdio1",
++ "dtc", "dte", "gpu", "sdio1",
+ "slxc", "slxd", "spdi", "spdo", "spig",
+ "uda";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi
+index e81e5937a60ae..03301ddb3403a 100644
+--- a/arch/arm/boot/dts/uniphier-pxs2.dtsi
++++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi
+@@ -597,8 +597,8 @@
+ compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+ status = "disabled";
+ reg = <0x65a00000 0xcd00>;
+- interrupt-names = "host", "peripheral";
+- interrupts = <0 134 4>, <0 135 4>;
++ interrupt-names = "dwc_usb3";
++ interrupts = <0 134 4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>;
+ clock-names = "ref", "bus_early", "suspend";
+@@ -693,8 +693,8 @@
+ compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+ status = "disabled";
+ reg = <0x65c00000 0xcd00>;
+- interrupt-names = "host", "peripheral";
+- interrupts = <0 137 4>, <0 138 4>;
++ interrupt-names = "dwc_usb3";
++ interrupts = <0 137 4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>;
+ clock-names = "ref", "bus_early", "suspend";
+diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
+index 3b88209bacea2..ff1f9a1bcfcfc 100644
+--- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
++++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
+@@ -132,6 +132,7 @@
+ reg = <0x2c0f0000 0x1000>;
+ interrupts = <0 84 4>;
+ cache-level = <2>;
++ cache-unified;
+ };
+
+ pmu {
+diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
+index 043ddd70372f0..36d5299b2baa8 100644
+--- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
++++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
+@@ -343,7 +343,7 @@
+ };
+
+ &i2c2 {
+- tca9548@70 {
++ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ pinctrl-0 = <&pinctrl_i2c_mux_reset>;
+ pinctrl-names = "default";
+diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+index de79dcfd32e62..ba2001f373158 100644
+--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
++++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+@@ -340,7 +340,7 @@
+ };
+
+ &i2c2 {
+- tca9548@70 {
++ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ pinctrl-0 = <&pinctrl_i2c_mux_reset>;
+ pinctrl-names = "default";
+diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig
+index 383c632eba7bd..1e244a9287902 100644
+--- a/arch/arm/configs/bcm2835_defconfig
++++ b/arch/arm/configs/bcm2835_defconfig
+@@ -108,6 +108,7 @@ CONFIG_MEDIA_SUPPORT=y
+ CONFIG_MEDIA_CAMERA_SUPPORT=y
+ CONFIG_DRM=y
+ CONFIG_DRM_VC4=y
++CONFIG_FB=y
+ CONFIG_FB_SIMPLE=y
+ CONFIG_FRAMEBUFFER_CONSOLE=y
+ CONFIG_SOUND=y
+diff --git a/arch/arm/configs/cm_x300_defconfig b/arch/arm/configs/cm_x300_defconfig
+index 502a9d870ca44..45769d0ddd4ef 100644
+--- a/arch/arm/configs/cm_x300_defconfig
++++ b/arch/arm/configs/cm_x300_defconfig
+@@ -146,7 +146,6 @@ CONFIG_NFS_V3_ACL=y
+ CONFIG_NFS_V4=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_PARTITION_ADVANCED=y
+ CONFIG_NLS_CODEPAGE_437=m
+ CONFIG_NLS_ISO8859_1=m
+diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
+index a49e699e52de3..ec84d80096b1c 100644
+--- a/arch/arm/configs/ezx_defconfig
++++ b/arch/arm/configs/ezx_defconfig
+@@ -314,7 +314,6 @@ CONFIG_NFSD_V3_ACL=y
+ CONFIG_SMB_FS=m
+ CONFIG_CIFS=m
+ CONFIG_CIFS_STATS=y
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_CODEPAGE_437=m
+diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
+index 118c4c927f264..6db871d4e0775 100644
+--- a/arch/arm/configs/imote2_defconfig
++++ b/arch/arm/configs/imote2_defconfig
+@@ -288,7 +288,6 @@ CONFIG_NFSD_V3_ACL=y
+ CONFIG_SMB_FS=m
+ CONFIG_CIFS=m
+ CONFIG_CIFS_STATS=y
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_CODEPAGE_437=m
+diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig
+index 80a3ae02d7594..e883cb6dc6459 100644
+--- a/arch/arm/configs/multi_v5_defconfig
++++ b/arch/arm/configs/multi_v5_defconfig
+@@ -188,6 +188,7 @@ CONFIG_REGULATOR=y
+ CONFIG_REGULATOR_FIXED_VOLTAGE=y
+ CONFIG_MEDIA_SUPPORT=y
+ CONFIG_MEDIA_CAMERA_SUPPORT=y
++CONFIG_MEDIA_PLATFORM_SUPPORT=y
+ CONFIG_V4L_PLATFORM_DRIVERS=y
+ CONFIG_VIDEO_ASPEED=m
+ CONFIG_VIDEO_ATMEL_ISI=m
+@@ -195,6 +196,7 @@ CONFIG_DRM=y
+ CONFIG_DRM_ATMEL_HLCDC=m
+ CONFIG_DRM_PANEL_SIMPLE=y
+ CONFIG_DRM_ASPEED_GFX=m
++CONFIG_FB=y
+ CONFIG_FB_IMX=y
+ CONFIG_FB_ATMEL=y
+ CONFIG_BACKLIGHT_ATMEL_LCDC=y
+diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
+index 33572998dbbe0..2eafcf1c5d89c 100644
+--- a/arch/arm/configs/multi_v7_defconfig
++++ b/arch/arm/configs/multi_v7_defconfig
+@@ -696,7 +696,6 @@ CONFIG_DRM_IMX_LDB=m
+ CONFIG_DRM_IMX_HDMI=m
+ CONFIG_DRM_ATMEL_HLCDC=m
+ CONFIG_DRM_RCAR_DU=m
+-CONFIG_DRM_RCAR_LVDS=y
+ CONFIG_DRM_SUN4I=m
+ CONFIG_DRM_MSM=m
+ CONFIG_DRM_FSL_DCU=m
+diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
+index ca32446b187f5..f53086ddc48b0 100644
+--- a/arch/arm/configs/mxs_defconfig
++++ b/arch/arm/configs/mxs_defconfig
+@@ -93,6 +93,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y
+ CONFIG_DRM=y
+ CONFIG_DRM_PANEL_SEIKO_43WVF1G=y
+ CONFIG_DRM_MXSFB=y
++CONFIG_FB=y
+ CONFIG_FB_MODE_HELPERS=y
+ CONFIG_LCD_CLASS_DEVICE=y
+ CONFIG_BACKLIGHT_CLASS_DEVICE=y
+diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
+index 23595fc5a29a9..907d6512821ad 100644
+--- a/arch/arm/configs/nhk8815_defconfig
++++ b/arch/arm/configs/nhk8815_defconfig
+@@ -127,7 +127,6 @@ CONFIG_NFS_FS=y
+ CONFIG_NFS_V3_ACL=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ASCII=y
+ CONFIG_NLS_ISO8859_1=y
+diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
+index 58f4834289e63..dedaaae3d0d8a 100644
+--- a/arch/arm/configs/pxa_defconfig
++++ b/arch/arm/configs/pxa_defconfig
+@@ -699,7 +699,6 @@ CONFIG_NFSD_V3_ACL=y
+ CONFIG_NFSD_V4=y
+ CONFIG_CIFS=m
+ CONFIG_CIFS_STATS=y
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_DEFAULT="utf8"
+diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig
+index 3b206a31902ff..065553326b391 100644
+--- a/arch/arm/configs/spear13xx_defconfig
++++ b/arch/arm/configs/spear13xx_defconfig
+@@ -61,7 +61,6 @@ CONFIG_SERIAL_AMBA_PL011=y
+ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+ # CONFIG_HW_RANDOM is not set
+ CONFIG_RAW_DRIVER=y
+-CONFIG_MAX_RAW_DEVS=8192
+ CONFIG_I2C=y
+ CONFIG_I2C_DESIGNWARE_PLATFORM=y
+ CONFIG_SPI=y
+diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig
+index fc5f71c765edc..afca722d6605c 100644
+--- a/arch/arm/configs/spear3xx_defconfig
++++ b/arch/arm/configs/spear3xx_defconfig
+@@ -41,7 +41,6 @@ CONFIG_SERIAL_AMBA_PL011=y
+ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+ # CONFIG_HW_RANDOM is not set
+ CONFIG_RAW_DRIVER=y
+-CONFIG_MAX_RAW_DEVS=8192
+ CONFIG_I2C=y
+ CONFIG_I2C_DESIGNWARE_PLATFORM=y
+ CONFIG_SPI=y
+diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig
+index 52a56b8ce6a71..bc32c02cb86b1 100644
+--- a/arch/arm/configs/spear6xx_defconfig
++++ b/arch/arm/configs/spear6xx_defconfig
+@@ -36,7 +36,6 @@ CONFIG_INPUT_FF_MEMLESS=y
+ CONFIG_SERIAL_AMBA_PL011=y
+ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+ CONFIG_RAW_DRIVER=y
+-CONFIG_MAX_RAW_DEVS=8192
+ CONFIG_I2C=y
+ CONFIG_I2C_DESIGNWARE_PLATFORM=y
+ CONFIG_SPI=y
+diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
+index 2b575792363e5..149a5bd6b88c1 100644
+--- a/arch/arm/crypto/Kconfig
++++ b/arch/arm/crypto/Kconfig
+@@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM
+ using optimized ARM assembler and NEON, when available.
+
+ config CRYPTO_BLAKE2S_ARM
+- tristate "BLAKE2s digest algorithm (ARM)"
++ bool "BLAKE2s digest algorithm (ARM)"
+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+ help
+ BLAKE2s digest algorithm optimized with ARM scalar instructions. This
+@@ -102,6 +102,8 @@ config CRYPTO_AES_ARM_BS
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_AES
++ select CRYPTO_AES
++ select CRYPTO_CBC
+ select CRYPTO_SIMD
+ help
+ Use a faster and more secure NEON based implementation of AES in CBC,
+diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
+index eafa898ba6a73..971e74546fb1b 100644
+--- a/arch/arm/crypto/Makefile
++++ b/arch/arm/crypto/Makefile
+@@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+ obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
+ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
+-obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o
++obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
+ obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
+ obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+ obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
+@@ -31,7 +31,7 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+ sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
+ sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
+ sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
+-blake2s-arm-y := blake2s-core.o blake2s-glue.o
++libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
+ blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
+ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
+ sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
+diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S
+index 86345751bbf3a..df40e46601f10 100644
+--- a/arch/arm/crypto/blake2s-core.S
++++ b/arch/arm/crypto/blake2s-core.S
+@@ -167,8 +167,8 @@
+ .endm
+
+ //
+-// void blake2s_compress_arch(struct blake2s_state *state,
+-// const u8 *block, size_t nblocks, u32 inc);
++// void blake2s_compress(struct blake2s_state *state,
++// const u8 *block, size_t nblocks, u32 inc);
+ //
+ // Only the first three fields of struct blake2s_state are used:
+ // u32 h[8]; (inout)
+@@ -176,7 +176,7 @@
+ // u32 f[2]; (in)
+ //
+ .align 5
+-ENTRY(blake2s_compress_arch)
++ENTRY(blake2s_compress)
+ push {r0-r2,r4-r11,lr} // keep this an even number
+
+ .Lnext_block:
+@@ -303,4 +303,4 @@ ENTRY(blake2s_compress_arch)
+ str r3, [r12], #4
+ bne 1b
+ b .Lcopy_block_done
+-ENDPROC(blake2s_compress_arch)
++ENDPROC(blake2s_compress)
+diff --git a/arch/arm/crypto/blake2s-glue.c b/arch/arm/crypto/blake2s-glue.c
+index f2cc1e5fc9ec1..0238a70d9581e 100644
+--- a/arch/arm/crypto/blake2s-glue.c
++++ b/arch/arm/crypto/blake2s-glue.c
+@@ -1,78 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * BLAKE2s digest algorithm, ARM scalar implementation
+- *
+- * Copyright 2020 Google LLC
+- */
+
+ #include <crypto/internal/blake2s.h>
+-#include <crypto/internal/hash.h>
+-
+ #include <linux/module.h>
+
+ /* defined in blake2s-core.S */
+-EXPORT_SYMBOL(blake2s_compress_arch);
+-
+-static int crypto_blake2s_update_arm(struct shash_desc *desc,
+- const u8 *in, unsigned int inlen)
+-{
+- return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch);
+-}
+-
+-static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
+-{
+- return crypto_blake2s_final(desc, out, blake2s_compress_arch);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size) \
+- { \
+- .base.cra_name = name, \
+- .base.cra_driver_name = driver_name, \
+- .base.cra_priority = 200, \
+- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
+- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
+- .base.cra_module = THIS_MODULE, \
+- .digestsize = digest_size, \
+- .setkey = crypto_blake2s_setkey, \
+- .init = crypto_blake2s_init, \
+- .update = crypto_blake2s_update_arm, \
+- .final = crypto_blake2s_final_arm, \
+- .descsize = sizeof(struct blake2s_state), \
+- }
+-
+-static struct shash_alg blake2s_arm_algs[] = {
+- BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_arm_mod_init(void)
+-{
+- return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+- crypto_register_shashes(blake2s_arm_algs,
+- ARRAY_SIZE(blake2s_arm_algs)) : 0;
+-}
+-
+-static void __exit blake2s_arm_mod_exit(void)
+-{
+- if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+- crypto_unregister_shashes(blake2s_arm_algs,
+- ARRAY_SIZE(blake2s_arm_algs));
+-}
+-
+-module_init(blake2s_arm_mod_init);
+-module_exit(blake2s_arm_mod_exit);
+-
+-MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
+-MODULE_LICENSE("GPL");
+-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-arm");
++EXPORT_SYMBOL(blake2s_compress);
+diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
+index 413abfb42989e..f82a819eb0dbb 100644
+--- a/arch/arm/include/asm/arch_gicv3.h
++++ b/arch/arm/include/asm/arch_gicv3.h
+@@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \
+ return read_sysreg(a32); \
+ } \
+
++CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1)
+ CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
+ CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
+ CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
+@@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
+
+ /* Low-level accessors */
+
+-static inline void gic_write_eoir(u32 irq)
+-{
+- write_sysreg(irq, ICC_EOIR1);
+- isb();
+-}
+-
+ static inline void gic_write_dir(u32 val)
+ {
+ write_sysreg(val, ICC_DIR);
+diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
+index e2b1fd558bf3c..5d52da168ab3a 100644
+--- a/arch/arm/include/asm/assembler.h
++++ b/arch/arm/include/asm/assembler.h
+@@ -107,6 +107,16 @@
+ .endm
+ #endif
+
++#if __LINUX_ARM_ARCH__ < 7
++ .macro dsb, args
++ mcr p15, 0, r0, c7, c10, 4
++ .endm
++
++ .macro isb, args
++ mcr p15, 0, r0, c7, c5, 4
++ .endm
++#endif
++
+ .macro asm_trace_hardirqs_off, save=1
+ #if defined(CONFIG_TRACE_IRQFLAGS)
+ .if \save
+@@ -259,6 +269,7 @@
+ */
+ #define ALT_UP(instr...) \
+ .pushsection ".alt.smp.init", "a" ;\
++ .align 2 ;\
+ .long 9998b - . ;\
+ 9997: instr ;\
+ .if . - 9997b == 2 ;\
+@@ -270,6 +281,7 @@
+ .popsection
+ #define ALT_UP_B(label) \
+ .pushsection ".alt.smp.init", "a" ;\
++ .align 2 ;\
+ .long 9998b - . ;\
+ W(b) . + (label - 9998b) ;\
+ .popsection
+@@ -314,6 +326,23 @@
+ #endif
+ .endm
+
++/*
++ * Raw SMP data memory barrier
++ */
++ .macro __smp_dmb mode
++#if __LINUX_ARM_ARCH__ >= 7
++ .ifeqs "\mode","arm"
++ dmb ish
++ .else
++ W(dmb) ish
++ .endif
++#elif __LINUX_ARM_ARCH__ == 6
++ mcr p15, 0, r0, c7, c10, 5 @ dmb
++#else
++ .error "Incompatible SMP platform"
++#endif
++ .endm
++
+ #if defined(CONFIG_CPU_V7M)
+ /*
+ * setmode is used to assert to be in svc mode during boot. For v7-M
+diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h
+index 97a312ba08401..fe385551edeca 100644
+--- a/arch/arm/include/asm/bugs.h
++++ b/arch/arm/include/asm/bugs.h
+@@ -1,7 +1,5 @@
+ /* SPDX-License-Identifier: GPL-2.0-only */
+ /*
+- * arch/arm/include/asm/bugs.h
+- *
+ * Copyright (C) 1995-2003 Russell King
+ */
+ #ifndef __ASM_BUGS_H
+@@ -10,10 +8,8 @@
+ extern void check_writebuffer_bugs(void);
+
+ #ifdef CONFIG_MMU
+-extern void check_bugs(void);
+ extern void check_other_bugs(void);
+ #else
+-#define check_bugs() do { } while (0)
+ #define check_other_bugs() do { } while (0)
+ #endif
+
+diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
+index a81dda65c5762..45180a2cc47cb 100644
+--- a/arch/arm/include/asm/dma.h
++++ b/arch/arm/include/asm/dma.h
+@@ -10,7 +10,7 @@
+ #else
+ #define MAX_DMA_ADDRESS ({ \
+ extern phys_addr_t arm_dma_zone_size; \
+- arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
++ arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \
+ (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
+ #endif
+
+diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
+index a6f3b179e8a94..27218eabbf9a0 100644
+--- a/arch/arm/include/asm/efi.h
++++ b/arch/arm/include/asm/efi.h
+@@ -17,7 +17,6 @@
+
+ #ifdef CONFIG_EFI
+ void efi_init(void);
+-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+
+ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
+index dfc6bfa430121..24486dad9e19b 100644
+--- a/arch/arm/include/asm/entry-macro-multi.S
++++ b/arch/arm/include/asm/entry-macro-multi.S
+@@ -13,28 +13,4 @@
+ @
+ badrne lr, 1b
+ bne asm_do_IRQ
+-
+-#ifdef CONFIG_SMP
+- /*
+- * XXX
+- *
+- * this macro assumes that irqstat (r2) and base (r6) are
+- * preserved from get_irqnr_and_base above
+- */
+- ALT_SMP(test_for_ipi r0, r2, r6, lr)
+- ALT_UP_B(9997f)
+- movne r1, sp
+- badrne lr, 1b
+- bne do_IPI
+-#endif
+-9997:
+- .endm
+-
+- .macro arch_irq_handler, symbol_name
+- .align 5
+- .global \symbol_name
+-\symbol_name:
+- mov r8, lr
+- arch_irq_handler_default
+- ret r8
+ .endm
+diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
+index f74944c6fe8d3..79d246ac93ab6 100644
+--- a/arch/arm/include/asm/io.h
++++ b/arch/arm/include/asm/io.h
+@@ -436,6 +436,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
+ #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+ extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
++extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
++ unsigned long flags);
++#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+ #endif
+
+ /*
+diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
+index 92282558caf7c..2b8970d8e5a2f 100644
+--- a/arch/arm/include/asm/mach/map.h
++++ b/arch/arm/include/asm/mach/map.h
+@@ -27,6 +27,7 @@ enum {
+ MT_HIGH_VECTORS,
+ MT_MEMORY_RWX,
+ MT_MEMORY_RW,
++ MT_MEMORY_RO,
+ MT_ROM,
+ MT_MEMORY_RWX_NONCACHED,
+ MT_MEMORY_RW_DTCM,
+diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
+index fe87397c3d8c6..bdbc1e590891e 100644
+--- a/arch/arm/include/asm/perf_event.h
++++ b/arch/arm/include/asm/perf_event.h
+@@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
+
+ #define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->ARM_pc = (__ip); \
+- (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \
++ frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \
+ (regs)->ARM_sp = current_stack_pointer; \
+ (regs)->ARM_cpsr = SVC_MODE; \
+ }
+diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
+index d16aba48fa0a4..090011394477f 100644
+--- a/arch/arm/include/asm/pgtable-nommu.h
++++ b/arch/arm/include/asm/pgtable-nommu.h
+@@ -44,12 +44,6 @@
+
+ typedef pte_t *pte_addr_t;
+
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-#define ZERO_PAGE(vaddr) (virt_to_page(0))
+-
+ /*
+ * Mark the prot value as uncacheable and unbufferable.
+ */
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index cd1f84bb40aea..a25c4303fc0e6 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -10,6 +10,15 @@
+ #include <linux/const.h>
+ #include <asm/proc-fns.h>
+
++#ifndef __ASSEMBLY__
++/*
++ * ZERO_PAGE is a global shared page that is always zero: used
++ * for zero-mapped memory areas etc..
++ */
++extern struct page *empty_zero_page;
++#define ZERO_PAGE(vaddr) (empty_zero_page)
++#endif
++
+ #ifndef CONFIG_MMU
+
+ #include <asm-generic/pgtable-nopud.h>
+@@ -156,13 +165,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ #define __S111 __PAGE_SHARED_EXEC
+
+ #ifndef __ASSEMBLY__
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-extern struct page *empty_zero_page;
+-#define ZERO_PAGE(vaddr) (empty_zero_page)
+-
+
+ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
+index 9e6b972863077..8aeff55aebfaf 100644
+--- a/arch/arm/include/asm/processor.h
++++ b/arch/arm/include/asm/processor.h
+@@ -96,6 +96,7 @@ unsigned long get_wchan(struct task_struct *p);
+ #define __ALT_SMP_ASM(smp, up) \
+ "9998: " smp "\n" \
+ " .pushsection \".alt.smp.init\", \"a\"\n" \
++ " .align 2\n" \
+ " .long 9998b - .\n" \
+ " " up "\n" \
+ " .popsection\n"
+diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
+index 93051e2f402c8..1408a6a15d0e0 100644
+--- a/arch/arm/include/asm/ptrace.h
++++ b/arch/arm/include/asm/ptrace.h
+@@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+ ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \
+ })
+
++
++/*
++ * Update ITSTATE after normal execution of an IT block instruction.
++ *
++ * The 8 IT state bits are split into two parts in CPSR:
++ * ITSTATE<1:0> are in CPSR<26:25>
++ * ITSTATE<7:2> are in CPSR<15:10>
++ */
++static inline unsigned long it_advance(unsigned long cpsr)
++{
++ if ((cpsr & 0x06000400) == 0) {
++ /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
++ cpsr &= ~PSR_IT_MASK;
++ } else {
++ /* We need to shift left ITSTATE<4:0> */
++ const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
++ unsigned long it = cpsr & mask;
++ it <<= 1;
++ it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
++ it &= mask;
++ cpsr &= ~mask;
++ cpsr |= it;
++ }
++ return cpsr;
++}
++
+ #endif /* __ASSEMBLY__ */
+ #endif
+diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
+index 5d508f5d56c49..fc11ddf13b8fd 100644
+--- a/arch/arm/include/asm/smp.h
++++ b/arch/arm/include/asm/smp.h
+@@ -24,11 +24,6 @@ struct seq_file;
+ */
+ extern void show_ipi_list(struct seq_file *, int);
+
+-/*
+- * Called from assembly code, this handles an IPI.
+- */
+-asmlinkage void do_IPI(int ipinr, struct pt_regs *regs);
+-
+ /*
+ * Called from C code, this handles an IPI.
+ */
+diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h
+new file mode 100644
+index 0000000000000..85f9e538fb325
+--- /dev/null
++++ b/arch/arm/include/asm/spectre.h
+@@ -0,0 +1,38 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#ifndef __ASM_SPECTRE_H
++#define __ASM_SPECTRE_H
++
++enum {
++ SPECTRE_UNAFFECTED,
++ SPECTRE_MITIGATED,
++ SPECTRE_VULNERABLE,
++};
++
++enum {
++ __SPECTRE_V2_METHOD_BPIALL,
++ __SPECTRE_V2_METHOD_ICIALLU,
++ __SPECTRE_V2_METHOD_SMC,
++ __SPECTRE_V2_METHOD_HVC,
++ __SPECTRE_V2_METHOD_LOOP8,
++};
++
++enum {
++ SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL),
++ SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU),
++ SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC),
++ SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC),
++ SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8),
++};
++
++#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
++void spectre_v2_update_state(unsigned int state, unsigned int methods);
++#else
++static inline void spectre_v2_update_state(unsigned int state,
++ unsigned int methods)
++{}
++#endif
++
++int spectre_bhb_update_vectors(unsigned int method);
++
++#endif
+diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h
+index 6f5d627c44a3c..f46b3c570f92e 100644
+--- a/arch/arm/include/asm/sync_bitops.h
++++ b/arch/arm/include/asm/sync_bitops.h
+@@ -14,14 +14,35 @@
+ * ops which are SMP safe even on a UP kernel.
+ */
+
++/*
++ * Unordered
++ */
++
+ #define sync_set_bit(nr, p) _set_bit(nr, p)
+ #define sync_clear_bit(nr, p) _clear_bit(nr, p)
+ #define sync_change_bit(nr, p) _change_bit(nr, p)
+-#define sync_test_and_set_bit(nr, p) _test_and_set_bit(nr, p)
+-#define sync_test_and_clear_bit(nr, p) _test_and_clear_bit(nr, p)
+-#define sync_test_and_change_bit(nr, p) _test_and_change_bit(nr, p)
+ #define sync_test_bit(nr, addr) test_bit(nr, addr)
+-#define arch_sync_cmpxchg arch_cmpxchg
+
++/*
++ * Fully ordered
++ */
++
++int _sync_test_and_set_bit(int nr, volatile unsigned long * p);
++#define sync_test_and_set_bit(nr, p) _sync_test_and_set_bit(nr, p)
++
++int _sync_test_and_clear_bit(int nr, volatile unsigned long * p);
++#define sync_test_and_clear_bit(nr, p) _sync_test_and_clear_bit(nr, p)
++
++int _sync_test_and_change_bit(int nr, volatile unsigned long * p);
++#define sync_test_and_change_bit(nr, p) _sync_test_and_change_bit(nr, p)
++
++#define arch_sync_cmpxchg(ptr, old, new) \
++({ \
++ __typeof__(*(ptr)) __ret; \
++ __smp_mb__before_atomic(); \
++ __ret = arch_cmpxchg_relaxed((ptr), (old), (new)); \
++ __smp_mb__after_atomic(); \
++ __ret; \
++})
+
+ #endif
+diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
+index 24c19d63ff0a1..95bf70ebd878e 100644
+--- a/arch/arm/include/asm/syscall.h
++++ b/arch/arm/include/asm/syscall.h
+@@ -25,6 +25,9 @@ static inline int syscall_get_nr(struct task_struct *task,
+ if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
+ return task_thread_info(task)->abi_syscall;
+
++ if (task_thread_info(task)->abi_syscall == -1)
++ return -1;
++
+ return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
+ }
+
+diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
+index 9a18da3e10cc3..b682189a2b5df 100644
+--- a/arch/arm/include/asm/thread_info.h
++++ b/arch/arm/include/asm/thread_info.h
+@@ -129,15 +129,16 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define TIF_NEED_RESCHED 1 /* rescheduling necessary */
+ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
+ #define TIF_UPROBE 3 /* breakpointed or singlestepping */
+-#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
+-#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
+-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
+-#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
+-#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */
++#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */
+
+ #define TIF_USING_IWMMXT 17
+ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */
+-#define TIF_RESTORE_SIGMASK 20
++#define TIF_RESTORE_SIGMASK 19
++#define TIF_SYSCALL_TRACE 20 /* syscall trace active */
++#define TIF_SYSCALL_AUDIT 21 /* syscall auditing active */
++#define TIF_SYSCALL_TRACEPOINT 22 /* syscall tracepoint instrumentation */
++#define TIF_SECCOMP 23 /* seccomp syscall filtering active */
++
+
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
+index 7c3b3671d6c25..6d1337c169cd3 100644
+--- a/arch/arm/include/asm/timex.h
++++ b/arch/arm/include/asm/timex.h
+@@ -11,5 +11,6 @@
+
+ typedef unsigned long cycles_t;
+ #define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
++#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
+
+ #endif
+diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
+index 36fbc33292526..32dbfd81f42a4 100644
+--- a/arch/arm/include/asm/uaccess.h
++++ b/arch/arm/include/asm/uaccess.h
+@@ -11,6 +11,7 @@
+ #include <linux/string.h>
+ #include <asm/memory.h>
+ #include <asm/domain.h>
++#include <asm/unaligned.h>
+ #include <asm/unified.h>
+ #include <asm/compiler.h>
+
+@@ -497,7 +498,10 @@ do { \
+ } \
+ default: __err = __get_user_bad(); break; \
+ } \
+- *(type *)(dst) = __val; \
++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) \
++ put_unaligned(__val, (type *)(dst)); \
++ else \
++ *(type *)(dst) = __val; /* aligned by caller */ \
+ if (__err) \
+ goto err_label; \
+ } while (0)
+@@ -507,7 +511,9 @@ do { \
+ const type *__pk_ptr = (dst); \
+ unsigned long __dst = (unsigned long)__pk_ptr; \
+ int __err = 0; \
+- type __val = *(type *)src; \
++ type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
++ ? get_unaligned((type *)(src)) \
++ : *(type *)(src); /* aligned by caller */ \
+ switch (sizeof(type)) { \
+ case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \
+ case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \
+diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h
+index 4a91428c324db..fad45c884e988 100644
+--- a/arch/arm/include/asm/vmlinux.lds.h
++++ b/arch/arm/include/asm/vmlinux.lds.h
+@@ -26,6 +26,19 @@
+ #define ARM_MMU_DISCARD(x) x
+ #endif
+
++/*
++ * ld.lld does not support NOCROSSREFS:
++ * https://github.com/ClangBuiltLinux/linux/issues/1609
++ */
++#ifdef CONFIG_LD_IS_LLD
++#define NOCROSSREFS
++#endif
++
++/* Set start/end symbol names to the LMA for the section */
++#define ARM_LMA(sym, section) \
++ sym##_start = LOADADDR(section); \
++ sym##_end = LOADADDR(section) + SIZEOF(section)
++
+ #define PROC_INFO \
+ . = ALIGN(4); \
+ __proc_info_begin = .; \
+@@ -110,19 +123,31 @@
+ * only thing that matters is their relative offsets
+ */
+ #define ARM_VECTORS \
+- __vectors_start = .; \
+- .vectors 0xffff0000 : AT(__vectors_start) { \
+- *(.vectors) \
++ __vectors_lma = .; \
++ OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \
++ .vectors { \
++ *(.vectors) \
++ } \
++ .vectors.bhb.loop8 { \
++ *(.vectors.bhb.loop8) \
++ } \
++ .vectors.bhb.bpiall { \
++ *(.vectors.bhb.bpiall) \
++ } \
+ } \
+- . = __vectors_start + SIZEOF(.vectors); \
+- __vectors_end = .; \
++ ARM_LMA(__vectors, .vectors); \
++ ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8); \
++ ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall); \
++ . = __vectors_lma + SIZEOF(.vectors) + \
++ SIZEOF(.vectors.bhb.loop8) + \
++ SIZEOF(.vectors.bhb.bpiall); \
+ \
+- __stubs_start = .; \
+- .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \
++ __stubs_lma = .; \
++ .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \
+ *(.stubs) \
+ } \
+- . = __stubs_start + SIZEOF(.stubs); \
+- __stubs_end = .; \
++ ARM_LMA(__stubs, .stubs); \
++ . = __stubs_lma + SIZEOF(.stubs); \
+ \
+ PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
+
+diff --git a/arch/arm/include/debug/imx-uart.h b/arch/arm/include/debug/imx-uart.h
+index c8eb83d4b8964..3edbb3c5b42bf 100644
+--- a/arch/arm/include/debug/imx-uart.h
++++ b/arch/arm/include/debug/imx-uart.h
+@@ -11,13 +11,6 @@
+ #define IMX1_UART_BASE_ADDR(n) IMX1_UART##n##_BASE_ADDR
+ #define IMX1_UART_BASE(n) IMX1_UART_BASE_ADDR(n)
+
+-#define IMX21_UART1_BASE_ADDR 0x1000a000
+-#define IMX21_UART2_BASE_ADDR 0x1000b000
+-#define IMX21_UART3_BASE_ADDR 0x1000c000
+-#define IMX21_UART4_BASE_ADDR 0x1000d000
+-#define IMX21_UART_BASE_ADDR(n) IMX21_UART##n##_BASE_ADDR
+-#define IMX21_UART_BASE(n) IMX21_UART_BASE_ADDR(n)
+-
+ #define IMX25_UART1_BASE_ADDR 0x43f90000
+ #define IMX25_UART2_BASE_ADDR 0x43f94000
+ #define IMX25_UART3_BASE_ADDR 0x5000c000
+@@ -26,6 +19,13 @@
+ #define IMX25_UART_BASE_ADDR(n) IMX25_UART##n##_BASE_ADDR
+ #define IMX25_UART_BASE(n) IMX25_UART_BASE_ADDR(n)
+
++#define IMX27_UART1_BASE_ADDR 0x1000a000
++#define IMX27_UART2_BASE_ADDR 0x1000b000
++#define IMX27_UART3_BASE_ADDR 0x1000c000
++#define IMX27_UART4_BASE_ADDR 0x1000d000
++#define IMX27_UART_BASE_ADDR(n) IMX27_UART##n##_BASE_ADDR
++#define IMX27_UART_BASE(n) IMX27_UART_BASE_ADDR(n)
++
+ #define IMX31_UART1_BASE_ADDR 0x43f90000
+ #define IMX31_UART2_BASE_ADDR 0x43f94000
+ #define IMX31_UART3_BASE_ADDR 0x5000c000
+@@ -112,10 +112,10 @@
+
+ #ifdef CONFIG_DEBUG_IMX1_UART
+ #define UART_PADDR IMX_DEBUG_UART_BASE(IMX1)
+-#elif defined(CONFIG_DEBUG_IMX21_IMX27_UART)
+-#define UART_PADDR IMX_DEBUG_UART_BASE(IMX21)
+ #elif defined(CONFIG_DEBUG_IMX25_UART)
+ #define UART_PADDR IMX_DEBUG_UART_BASE(IMX25)
++#elif defined(CONFIG_DEBUG_IMX27_UART)
++#define UART_PADDR IMX_DEBUG_UART_BASE(IMX27)
+ #elif defined(CONFIG_DEBUG_IMX31_UART)
+ #define UART_PADDR IMX_DEBUG_UART_BASE(IMX31)
+ #elif defined(CONFIG_DEBUG_IMX35_UART)
+diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
+index ae295a3bcfefd..6ef3b535b7bf7 100644
+--- a/arch/arm/kernel/Makefile
++++ b/arch/arm/kernel/Makefile
+@@ -106,4 +106,6 @@ endif
+
+ obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o
+
++obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o
++
+ extra-y := $(head-y) vmlinux.lds
+diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c
+index 14c8dbbb7d2df..087bce6ec8e9b 100644
+--- a/arch/arm/kernel/bugs.c
++++ b/arch/arm/kernel/bugs.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ #include <linux/init.h>
++#include <linux/cpu.h>
+ #include <asm/bugs.h>
+ #include <asm/proc-fns.h>
+
+@@ -11,7 +12,7 @@ void check_other_bugs(void)
+ #endif
+ }
+
+-void __init check_bugs(void)
++void __init arch_cpu_finalize_init(void)
+ {
+ check_writebuffer_bugs();
+ check_other_bugs();
+diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
+index 241b73d64df73..68261a83b7ad8 100644
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -597,11 +597,9 @@ call_fpe:
+ tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2
+ reteq lr
+ and r8, r0, #0x00000f00 @ mask out CP number
+- THUMB( lsr r8, r8, #8 )
+ mov r7, #1
+- add r6, r10, #TI_USED_CP
+- ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[]
+- THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[]
++ add r6, r10, r8, lsr #8 @ add used_cp[] array offset first
++ strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[]
+ #ifdef CONFIG_IWMMXT
+ @ Test if we need to give access to iWMMXt coprocessors
+ ldr r5, [r10, #TI_FLAGS]
+@@ -610,7 +608,7 @@ call_fpe:
+ bcs iwmmxt_task_enable
+ #endif
+ ARM( add pc, pc, r8, lsr #6 )
+- THUMB( lsl r8, r8, #2 )
++ THUMB( lsr r8, r8, #6 )
+ THUMB( add pc, r8 )
+ nop
+
+@@ -1002,12 +1000,11 @@ vector_\name:
+ sub lr, lr, #\correction
+ .endif
+
+- @
+- @ Save r0, lr_<exception> (parent PC) and spsr_<exception>
+- @ (parent CPSR)
+- @
++ @ Save r0, lr_<exception> (parent PC)
+ stmia sp, {r0, lr} @ save r0, lr
+- mrs lr, spsr
++
++ @ Save spsr_<exception> (parent CPSR)
++2: mrs lr, spsr
+ str lr, [sp, #8] @ save spsr
+
+ @
+@@ -1028,6 +1025,44 @@ vector_\name:
+ movs pc, lr @ branch to handler in SVC mode
+ ENDPROC(vector_\name)
+
++#ifdef CONFIG_HARDEN_BRANCH_HISTORY
++ .subsection 1
++ .align 5
++vector_bhb_loop8_\name:
++ .if \correction
++ sub lr, lr, #\correction
++ .endif
++
++ @ Save r0, lr_<exception> (parent PC)
++ stmia sp, {r0, lr}
++
++ @ bhb workaround
++ mov r0, #8
++3: W(b) . + 4
++ subs r0, r0, #1
++ bne 3b
++ dsb
++ isb
++ b 2b
++ENDPROC(vector_bhb_loop8_\name)
++
++vector_bhb_bpiall_\name:
++ .if \correction
++ sub lr, lr, #\correction
++ .endif
++
++ @ Save r0, lr_<exception> (parent PC)
++ stmia sp, {r0, lr}
++
++ @ bhb workaround
++ mcr p15, 0, r0, c7, c5, 6 @ BPIALL
++ @ isb not needed due to "movs pc, lr" in the vector stub
++ @ which gives a "context synchronisation".
++ b 2b
++ENDPROC(vector_bhb_bpiall_\name)
++ .previous
++#endif
++
+ .align 2
+ @ handler addresses follow this label
+ 1:
+@@ -1036,6 +1071,10 @@ ENDPROC(vector_\name)
+ .section .stubs, "ax", %progbits
+ @ This must be the first word
+ .word vector_swi
++#ifdef CONFIG_HARDEN_BRANCH_HISTORY
++ .word vector_bhb_loop8_swi
++ .word vector_bhb_bpiall_swi
++#endif
+
+ vector_rst:
+ ARM( swi SYS_ERROR0 )
+@@ -1150,8 +1189,10 @@ vector_addrexcptn:
+ * FIQ "NMI" handler
+ *-----------------------------------------------------------------------------
+ * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
+- * systems.
++ * systems. This must be the last vector stub, so lets place it in its own
++ * subsection.
+ */
++ .subsection 2
+ vector_stub fiq, FIQ_MODE, 4
+
+ .long __fiq_usr @ 0 (USR_26 / USR_32)
+@@ -1184,6 +1225,30 @@ vector_addrexcptn:
+ W(b) vector_irq
+ W(b) vector_fiq
+
++#ifdef CONFIG_HARDEN_BRANCH_HISTORY
++ .section .vectors.bhb.loop8, "ax", %progbits
++.L__vectors_bhb_loop8_start:
++ W(b) vector_rst
++ W(b) vector_bhb_loop8_und
++ W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004
++ W(b) vector_bhb_loop8_pabt
++ W(b) vector_bhb_loop8_dabt
++ W(b) vector_addrexcptn
++ W(b) vector_bhb_loop8_irq
++ W(b) vector_bhb_loop8_fiq
++
++ .section .vectors.bhb.bpiall, "ax", %progbits
++.L__vectors_bhb_bpiall_start:
++ W(b) vector_rst
++ W(b) vector_bhb_bpiall_und
++ W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008
++ W(b) vector_bhb_bpiall_pabt
++ W(b) vector_bhb_bpiall_dabt
++ W(b) vector_addrexcptn
++ W(b) vector_bhb_bpiall_irq
++ W(b) vector_bhb_bpiall_fiq
++#endif
++
+ .data
+ .align 2
+
+diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
+index d9c99db50243f..e7bfdd10bbcd3 100644
+--- a/arch/arm/kernel/entry-common.S
++++ b/arch/arm/kernel/entry-common.S
+@@ -101,6 +101,7 @@ slow_work_pending:
+ cmp r0, #0
+ beq no_work_pending
+ movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
++ str scno, [tsk, #TI_ABI_SYSCALL] @ make sure tracers see update
+ ldmia sp, {r0 - r6} @ have to reload r0 - r6
+ b local_restart @ ... and off we go
+ ENDPROC(ret_fast_syscall)
+@@ -153,6 +154,29 @@ ENDPROC(ret_from_fork)
+ *-----------------------------------------------------------------------------
+ */
+
++ .align 5
++#ifdef CONFIG_HARDEN_BRANCH_HISTORY
++ENTRY(vector_bhb_loop8_swi)
++ sub sp, sp, #PT_REGS_SIZE
++ stmia sp, {r0 - r12}
++ mov r8, #8
++1: b 2f
++2: subs r8, r8, #1
++ bne 1b
++ dsb
++ isb
++ b 3f
++ENDPROC(vector_bhb_loop8_swi)
++
++ .align 5
++ENTRY(vector_bhb_bpiall_swi)
++ sub sp, sp, #PT_REGS_SIZE
++ stmia sp, {r0 - r12}
++ mcr p15, 0, r8, c7, c5, 6 @ BPIALL
++ isb
++ b 3f
++ENDPROC(vector_bhb_bpiall_swi)
++#endif
+ .align 5
+ ENTRY(vector_swi)
+ #ifdef CONFIG_CPU_V7M
+@@ -160,6 +184,7 @@ ENTRY(vector_swi)
+ #else
+ sub sp, sp, #PT_REGS_SIZE
+ stmia sp, {r0 - r12} @ Calling r0 - r12
++3:
+ ARM( add r8, sp, #S_PC )
+ ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr
+ THUMB( mov r8, sp )
+diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
+index a74289ebc8036..5f1b1ce10473a 100644
+--- a/arch/arm/kernel/entry-ftrace.S
++++ b/arch/arm/kernel/entry-ftrace.S
+@@ -22,10 +22,7 @@
+ * mcount can be thought of as a function called in the middle of a subroutine
+ * call. As such, it needs to be transparent for both the caller and the
+ * callee: the original lr needs to be restored when leaving mcount, and no
+- * registers should be clobbered. (In the __gnu_mcount_nc implementation, we
+- * clobber the ip register. This is OK because the ARM calling convention
+- * allows it to be clobbered in subroutines and doesn't use it to hold
+- * parameters.)
++ * registers should be clobbered.
+ *
+ * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}"
+ * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c).
+@@ -70,26 +67,25 @@
+
+ .macro __ftrace_regs_caller
+
+- sub sp, sp, #8 @ space for PC and CPSR OLD_R0,
++ str lr, [sp, #-8]! @ store LR as PC and make space for CPSR/OLD_R0,
+ @ OLD_R0 will overwrite previous LR
+
+- add ip, sp, #12 @ move in IP the value of SP as it was
+- @ before the push {lr} of the mcount mechanism
++ ldr lr, [sp, #8] @ get previous LR
+
+- str lr, [sp, #0] @ store LR instead of PC
++ str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR
+
+- ldr lr, [sp, #8] @ get previous LR
++ str lr, [sp, #-4]! @ store previous LR as LR
+
+- str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR
++ add lr, sp, #16 @ move in LR the value of SP as it was
++ @ before the push {lr} of the mcount mechanism
+
+- stmdb sp!, {ip, lr}
+- stmdb sp!, {r0-r11, lr}
++ push {r0-r11, ip, lr}
+
+ @ stack content at this point:
+ @ 0 4 48 52 56 60 64 68 72
+- @ R0 | R1 | ... | LR | SP + 4 | previous LR | LR | PSR | OLD_R0 |
++ @ R0 | R1 | ... | IP | SP + 4 | previous LR | LR | PSR | OLD_R0 |
+
+- mov r3, sp @ struct pt_regs*
++ mov r3, sp @ struct pt_regs*
+
+ ldr r2, =function_trace_op
+ ldr r2, [r2] @ pointer to the current
+@@ -112,11 +108,9 @@ ftrace_graph_regs_call:
+ #endif
+
+ @ pop saved regs
+- ldmia sp!, {r0-r12} @ restore r0 through r12
+- ldr ip, [sp, #8] @ restore PC
+- ldr lr, [sp, #4] @ restore LR
+- ldr sp, [sp, #0] @ restore SP
+- mov pc, ip @ return
++ pop {r0-r11, ip, lr} @ restore r0 through r12
++ ldr lr, [sp], #4 @ restore LR
++ ldr pc, [sp], #12
+ .endm
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+@@ -132,11 +126,9 @@ ftrace_graph_regs_call:
+ bl prepare_ftrace_return
+
+ @ pop registers saved in ftrace_regs_caller
+- ldmia sp!, {r0-r12} @ restore r0 through r12
+- ldr ip, [sp, #8] @ restore PC
+- ldr lr, [sp, #4] @ restore LR
+- ldr sp, [sp, #0] @ restore SP
+- mov pc, ip @ return
++ pop {r0-r11, ip, lr} @ restore r0 through r12
++ ldr lr, [sp], #4 @ restore LR
++ ldr pc, [sp], #12
+
+ .endm
+ #endif
+@@ -202,16 +194,17 @@ ftrace_graph_call\suffix:
+ .endm
+
+ .macro mcount_exit
+- ldmia sp!, {r0-r3, ip, lr}
+- ret ip
++ ldmia sp!, {r0-r3}
++ ldr lr, [sp, #4]
++ ldr pc, [sp], #8
+ .endm
+
+ ENTRY(__gnu_mcount_nc)
+ UNWIND(.fnstart)
+ #ifdef CONFIG_DYNAMIC_FTRACE
+- mov ip, lr
+- ldmia sp!, {lr}
+- ret ip
++ push {lr}
++ ldr lr, [sp, #4]
++ ldr pc, [sp], #8
+ #else
+ __mcount
+ #endif
+diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
+index 0fc814bbc34b1..8796a69c78e00 100644
+--- a/arch/arm/kernel/head-nommu.S
++++ b/arch/arm/kernel/head-nommu.S
+@@ -114,6 +114,7 @@ ENTRY(secondary_startup)
+ add r12, r12, r10
+ ret r12
+ 1: bl __after_proc_init
++ ldr r7, __secondary_data @ reload r7
+ ldr sp, [r7, #12] @ set up the stack pointer
+ mov fp, #0
+ b secondary_start_kernel
+diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
+index 7bd30c0a4280d..22f937e6f3ffb 100644
+--- a/arch/arm/kernel/kgdb.c
++++ b/arch/arm/kernel/kgdb.c
+@@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
+ return 0;
+ }
+
+-static struct undef_hook kgdb_brkpt_hook = {
++static struct undef_hook kgdb_brkpt_arm_hook = {
+ .instr_mask = 0xffffffff,
+ .instr_val = KGDB_BREAKINST,
+- .cpsr_mask = MODE_MASK,
++ .cpsr_mask = PSR_T_BIT | MODE_MASK,
+ .cpsr_val = SVC_MODE,
+ .fn = kgdb_brk_fn
+ };
+
+-static struct undef_hook kgdb_compiled_brkpt_hook = {
++static struct undef_hook kgdb_brkpt_thumb_hook = {
++ .instr_mask = 0xffff,
++ .instr_val = KGDB_BREAKINST & 0xffff,
++ .cpsr_mask = PSR_T_BIT | MODE_MASK,
++ .cpsr_val = PSR_T_BIT | SVC_MODE,
++ .fn = kgdb_brk_fn
++};
++
++static struct undef_hook kgdb_compiled_brkpt_arm_hook = {
+ .instr_mask = 0xffffffff,
+ .instr_val = KGDB_COMPILED_BREAK,
+- .cpsr_mask = MODE_MASK,
++ .cpsr_mask = PSR_T_BIT | MODE_MASK,
+ .cpsr_val = SVC_MODE,
+ .fn = kgdb_compiled_brk_fn
+ };
+
++static struct undef_hook kgdb_compiled_brkpt_thumb_hook = {
++ .instr_mask = 0xffff,
++ .instr_val = KGDB_COMPILED_BREAK & 0xffff,
++ .cpsr_mask = PSR_T_BIT | MODE_MASK,
++ .cpsr_val = PSR_T_BIT | SVC_MODE,
++ .fn = kgdb_compiled_brk_fn
++};
++
+ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+ {
+ struct pt_regs *regs = args->regs;
+@@ -210,8 +226,10 @@ int kgdb_arch_init(void)
+ if (ret != 0)
+ return ret;
+
+- register_undef_hook(&kgdb_brkpt_hook);
+- register_undef_hook(&kgdb_compiled_brkpt_hook);
++ register_undef_hook(&kgdb_brkpt_arm_hook);
++ register_undef_hook(&kgdb_brkpt_thumb_hook);
++ register_undef_hook(&kgdb_compiled_brkpt_arm_hook);
++ register_undef_hook(&kgdb_compiled_brkpt_thumb_hook);
+
+ return 0;
+ }
+@@ -224,8 +242,10 @@ int kgdb_arch_init(void)
+ */
+ void kgdb_arch_exit(void)
+ {
+- unregister_undef_hook(&kgdb_brkpt_hook);
+- unregister_undef_hook(&kgdb_compiled_brkpt_hook);
++ unregister_undef_hook(&kgdb_brkpt_arm_hook);
++ unregister_undef_hook(&kgdb_brkpt_thumb_hook);
++ unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook);
++ unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook);
+ unregister_die_notifier(&kgdb_notifier);
+ }
+
+diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
+index 1fc309b41f944..8d809724cde52 100644
+--- a/arch/arm/kernel/module-plts.c
++++ b/arch/arm/kernel/module-plts.c
+@@ -256,7 +256,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ /* sort by type and symbol index */
+ sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
+
+- if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
++ if (!module_init_layout_section(secstrings + dstsec->sh_name))
+ core_plts += count_plts(syms, dstsec->sh_addr, rels,
+ numrels, s->sh_info);
+ else
+diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
+index 3b69a76d341e7..1626dfc6f6ce6 100644
+--- a/arch/arm/kernel/perf_callchain.c
++++ b/arch/arm/kernel/perf_callchain.c
+@@ -62,9 +62,10 @@ user_backtrace(struct frame_tail __user *tail,
+ void
+ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ struct frame_tail __user *tail;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+@@ -98,9 +99,10 @@ callchain_trace(struct stackframe *fr,
+ void
+ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ struct stackframe fr;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+@@ -111,18 +113,21 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
+
+ unsigned long perf_instruction_pointer(struct pt_regs *regs)
+ {
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+- return perf_guest_cbs->get_guest_ip();
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
++
++ if (guest_cbs && guest_cbs->is_in_guest())
++ return guest_cbs->get_guest_ip();
+
+ return instruction_pointer(regs);
+ }
+
+ unsigned long perf_misc_flags(struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ int misc = 0;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+- if (perf_guest_cbs->is_user_mode())
++ if (guest_cbs && guest_cbs->is_in_guest()) {
++ if (guest_cbs->is_user_mode())
+ misc |= PERF_RECORD_MISC_GUEST_USER;
+ else
+ misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
+index 43b963ea4a0e2..71c98ca3a455a 100644
+--- a/arch/arm/kernel/ptrace.c
++++ b/arch/arm/kernel/ptrace.c
+@@ -786,8 +786,9 @@ long arch_ptrace(struct task_struct *child, long request,
+ break;
+
+ case PTRACE_SET_SYSCALL:
+- task_thread_info(child)->abi_syscall = data &
+- __NR_SYSCALL_MASK;
++ if (data != -1)
++ data &= __NR_SYSCALL_MASK;
++ task_thread_info(child)->abi_syscall = data;
+ ret = 0;
+ break;
+
+diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
+index a41e27ace391f..539897ac28284 100644
+--- a/arch/arm/kernel/signal.c
++++ b/arch/arm/kernel/signal.c
+@@ -708,6 +708,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x18);
+ static_assert(offsetof(siginfo_t, si_pkey) == 0x14);
+ static_assert(offsetof(siginfo_t, si_perf_data) == 0x10);
+ static_assert(offsetof(siginfo_t, si_perf_type) == 0x14);
++static_assert(offsetof(siginfo_t, si_perf_flags) == 0x18);
+ static_assert(offsetof(siginfo_t, si_band) == 0x0c);
+ static_assert(offsetof(siginfo_t, si_fd) == 0x10);
+ static_assert(offsetof(siginfo_t, si_call_addr) == 0x0c);
+diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
+index 842427ff2b3cb..23d369ab7e037 100644
+--- a/arch/arm/kernel/smp.c
++++ b/arch/arm/kernel/smp.c
+@@ -622,11 +622,6 @@ static void ipi_complete(unsigned int cpu)
+ /*
+ * Main handler for inter-processor interrupts
+ */
+-asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
+-{
+- handle_IPI(ipinr, regs);
+-}
+-
+ static void do_handle_IPI(int ipinr)
+ {
+ unsigned int cpu = smp_processor_id();
+diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c
+new file mode 100644
+index 0000000000000..0dcefc36fb7a0
+--- /dev/null
++++ b/arch/arm/kernel/spectre.c
+@@ -0,0 +1,71 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include <linux/bpf.h>
++#include <linux/cpu.h>
++#include <linux/device.h>
++
++#include <asm/spectre.h>
++
++static bool _unprivileged_ebpf_enabled(void)
++{
++#ifdef CONFIG_BPF_SYSCALL
++ return !sysctl_unprivileged_bpf_disabled;
++#else
++ return false;
++#endif
++}
++
++ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
++}
++
++static unsigned int spectre_v2_state;
++static unsigned int spectre_v2_methods;
++
++void spectre_v2_update_state(unsigned int state, unsigned int method)
++{
++ if (state > spectre_v2_state)
++ spectre_v2_state = state;
++ spectre_v2_methods |= method;
++}
++
++ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ const char *method;
++
++ if (spectre_v2_state == SPECTRE_UNAFFECTED)
++ return sprintf(buf, "%s\n", "Not affected");
++
++ if (spectre_v2_state != SPECTRE_MITIGATED)
++ return sprintf(buf, "%s\n", "Vulnerable");
++
++ if (_unprivileged_ebpf_enabled())
++ return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
++
++ switch (spectre_v2_methods) {
++ case SPECTRE_V2_METHOD_BPIALL:
++ method = "Branch predictor hardening";
++ break;
++
++ case SPECTRE_V2_METHOD_ICIALLU:
++ method = "I-cache invalidation";
++ break;
++
++ case SPECTRE_V2_METHOD_SMC:
++ case SPECTRE_V2_METHOD_HVC:
++ method = "Firmware call";
++ break;
++
++ case SPECTRE_V2_METHOD_LOOP8:
++ method = "History overwrite";
++ break;
++
++ default:
++ method = "Multiple mitigations";
++ break;
++ }
++
++ return sprintf(buf, "Mitigation: %s\n", method);
++}
+diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
+index 76ea4178a55cb..8247749998259 100644
+--- a/arch/arm/kernel/stacktrace.c
++++ b/arch/arm/kernel/stacktrace.c
+@@ -53,18 +53,17 @@ int notrace unwind_frame(struct stackframe *frame)
+ return -EINVAL;
+
+ frame->sp = frame->fp;
+- frame->fp = *(unsigned long *)(fp);
+- frame->pc = frame->lr;
+- frame->lr = *(unsigned long *)(fp + 4);
++ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
++ frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4));
+ #else
+ /* check current frame pointer is within bounds */
+ if (fp < low + 12 || fp > high - 4)
+ return -EINVAL;
+
+ /* restore the registers from the stack frame */
+- frame->fp = *(unsigned long *)(fp - 12);
+- frame->sp = *(unsigned long *)(fp - 8);
+- frame->pc = *(unsigned long *)(fp - 4);
++ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12));
++ frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8));
++ frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4));
+ #endif
+
+ return 0;
+diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
+index 6166ba38bf994..b74bfcf94fb1a 100644
+--- a/arch/arm/kernel/swp_emulate.c
++++ b/arch/arm/kernel/swp_emulate.c
+@@ -195,7 +195,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
+ destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data);
+
+ /* Check access in reasonable access range for both SWP and SWPB */
+- if (!access_ok((address & ~3), 4)) {
++ if (!access_ok((void __user *)(address & ~3), 4)) {
+ pr_debug("SWP{B} emulation: access to %p not allowed!\n",
+ (void *)address);
+ res = -EFAULT;
+diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
+index 195dff58bafc7..91e757bb054e6 100644
+--- a/arch/arm/kernel/traps.c
++++ b/arch/arm/kernel/traps.c
+@@ -30,6 +30,7 @@
+ #include <linux/atomic.h>
+ #include <asm/cacheflush.h>
+ #include <asm/exception.h>
++#include <asm/spectre.h>
+ #include <asm/unistd.h>
+ #include <asm/traps.h>
+ #include <asm/ptrace.h>
+@@ -333,7 +334,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+ if (panic_on_oops)
+ panic("Fatal exception");
+ if (signr)
+- do_exit(signr);
++ make_task_dead(signr);
+ }
+
+ /*
+@@ -574,7 +575,7 @@ do_cache_op(unsigned long start, unsigned long end, int flags)
+ if (end < start || flags)
+ return -EINVAL;
+
+- if (!access_ok(start, end - start))
++ if (!access_ok((void __user *)start, end - start))
+ return -EFAULT;
+
+ return __do_cache_op(start, end);
+@@ -787,10 +788,59 @@ static inline void __init kuser_init(void *vectors)
+ }
+ #endif
+
++#ifndef CONFIG_CPU_V7M
++static void copy_from_lma(void *vma, void *lma_start, void *lma_end)
++{
++ memcpy(vma, lma_start, lma_end - lma_start);
++}
++
++static void flush_vectors(void *vma, size_t offset, size_t size)
++{
++ unsigned long start = (unsigned long)vma + offset;
++ unsigned long end = start + size;
++
++ flush_icache_range(start, end);
++}
++
++#ifdef CONFIG_HARDEN_BRANCH_HISTORY
++int spectre_bhb_update_vectors(unsigned int method)
++{
++ extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[];
++ extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[];
++ void *vec_start, *vec_end;
++
++ if (system_state > SYSTEM_SCHEDULING) {
++ pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n",
++ smp_processor_id());
++ return SPECTRE_VULNERABLE;
++ }
++
++ switch (method) {
++ case SPECTRE_V2_METHOD_LOOP8:
++ vec_start = __vectors_bhb_loop8_start;
++ vec_end = __vectors_bhb_loop8_end;
++ break;
++
++ case SPECTRE_V2_METHOD_BPIALL:
++ vec_start = __vectors_bhb_bpiall_start;
++ vec_end = __vectors_bhb_bpiall_end;
++ break;
++
++ default:
++ pr_err("CPU%u: unknown Spectre BHB state %d\n",
++ smp_processor_id(), method);
++ return SPECTRE_VULNERABLE;
++ }
++
++ copy_from_lma(vectors_page, vec_start, vec_end);
++ flush_vectors(vectors_page, 0, vec_end - vec_start);
++
++ return SPECTRE_MITIGATED;
++}
++#endif
++
+ void __init early_trap_init(void *vectors_base)
+ {
+-#ifndef CONFIG_CPU_V7M
+- unsigned long vectors = (unsigned long)vectors_base;
+ extern char __stubs_start[], __stubs_end[];
+ extern char __vectors_start[], __vectors_end[];
+ unsigned i;
+@@ -811,17 +861,20 @@ void __init early_trap_init(void *vectors_base)
+ * into the vector page, mapped at 0xffff0000, and ensure these
+ * are visible to the instruction stream.
+ */
+- memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
+- memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
++ copy_from_lma(vectors_base, __vectors_start, __vectors_end);
++ copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end);
+
+ kuser_init(vectors_base);
+
+- flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
++ flush_vectors(vectors_base, 0, PAGE_SIZE * 2);
++}
+ #else /* ifndef CONFIG_CPU_V7M */
++void __init early_trap_init(void *vectors_base)
++{
+ /*
+ * on V7-M there is no need to copy the vector table to a dedicated
+ * memory area. The address is configurable and so a table in the kernel
+ * image can be used.
+ */
+-#endif
+ }
++#endif
+diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
+index 59fdf257bf8be..d91ed8e4310c2 100644
+--- a/arch/arm/kernel/unwind.c
++++ b/arch/arm/kernel/unwind.c
+@@ -301,6 +301,29 @@ static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl,
+ return URC_OK;
+ }
+
++static unsigned long unwind_decode_uleb128(struct unwind_ctrl_block *ctrl)
++{
++ unsigned long bytes = 0;
++ unsigned long insn;
++ unsigned long result = 0;
++
++ /*
++ * unwind_get_byte() will advance `ctrl` one instruction at a time, so
++ * loop until we get an instruction byte where bit 7 is not set.
++ *
++ * Note: This decodes a maximum of 4 bytes to output 28 bits data where
++ * max is 0xfffffff: that will cover a vsp increment of 1073742336, hence
++ * it is sufficient for unwinding the stack.
++ */
++ do {
++ insn = unwind_get_byte(ctrl);
++ result |= (insn & 0x7f) << (bytes * 7);
++ bytes++;
++ } while (!!(insn & 0x80) && (bytes != sizeof(result)));
++
++ return result;
++}
++
+ /*
+ * Execute the current unwind instruction.
+ */
+@@ -354,7 +377,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
+ if (ret)
+ goto error;
+ } else if (insn == 0xb2) {
+- unsigned long uleb128 = unwind_get_byte(ctrl);
++ unsigned long uleb128 = unwind_decode_uleb128(ctrl);
+
+ ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
+ } else {
+diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
+index 95bd359912889..f069d1b2318e6 100644
+--- a/arch/arm/lib/bitops.h
++++ b/arch/arm/lib/bitops.h
+@@ -28,7 +28,7 @@ UNWIND( .fnend )
+ ENDPROC(\name )
+ .endm
+
+- .macro testop, name, instr, store
++ .macro __testop, name, instr, store, barrier
+ ENTRY( \name )
+ UNWIND( .fnstart )
+ ands ip, r1, #3
+@@ -38,7 +38,7 @@ UNWIND( .fnstart )
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
+ mov r3, r2, lsl r3 @ create mask
+- smp_dmb
++ \barrier
+ #if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+ .arch_extension mp
+ ALT_SMP(W(pldw) [r1])
+@@ -50,13 +50,21 @@ UNWIND( .fnstart )
+ strex ip, r2, [r1]
+ cmp ip, #0
+ bne 1b
+- smp_dmb
++ \barrier
+ cmp r0, #0
+ movne r0, #1
+ 2: bx lr
+ UNWIND( .fnend )
+ ENDPROC(\name )
+ .endm
++
++ .macro testop, name, instr, store
++ __testop \name, \instr, \store, smp_dmb
++ .endm
++
++ .macro sync_testop, name, instr, store
++ __testop \name, \instr, \store, __smp_dmb
++ .endm
+ #else
+ .macro bitop, name, instr
+ ENTRY( \name )
+diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
+index b5e8b9ae4c7d4..7fd3600db8efd 100644
+--- a/arch/arm/lib/findbit.S
++++ b/arch/arm/lib/findbit.S
+@@ -40,8 +40,8 @@ ENDPROC(_find_first_zero_bit_le)
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ ENTRY(_find_next_zero_bit_le)
+- teq r1, #0
+- beq 3b
++ cmp r2, r1
++ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+@@ -81,8 +81,8 @@ ENDPROC(_find_first_bit_le)
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ ENTRY(_find_next_bit_le)
+- teq r1, #0
+- beq 3b
++ cmp r2, r1
++ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+@@ -115,8 +115,8 @@ ENTRY(_find_first_zero_bit_be)
+ ENDPROC(_find_first_zero_bit_be)
+
+ ENTRY(_find_next_zero_bit_be)
+- teq r1, #0
+- beq 3b
++ cmp r2, r1
++ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+@@ -149,8 +149,8 @@ ENTRY(_find_first_bit_be)
+ ENDPROC(_find_first_bit_be)
+
+ ENTRY(_find_next_bit_be)
+- teq r1, #0
+- beq 3b
++ cmp r2, r1
++ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
+index 4ebecc67e6e04..f13fe9bc2399a 100644
+--- a/arch/arm/lib/testchangebit.S
++++ b/arch/arm/lib/testchangebit.S
+@@ -10,3 +10,7 @@
+ .text
+
+ testop _test_and_change_bit, eor, str
++
++#if __LINUX_ARM_ARCH__ >= 6
++sync_testop _sync_test_and_change_bit, eor, str
++#endif
+diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
+index 009afa0f5b4a7..4d2c5ca620ebf 100644
+--- a/arch/arm/lib/testclearbit.S
++++ b/arch/arm/lib/testclearbit.S
+@@ -10,3 +10,7 @@
+ .text
+
+ testop _test_and_clear_bit, bicne, strne
++
++#if __LINUX_ARM_ARCH__ >= 6
++sync_testop _sync_test_and_clear_bit, bicne, strne
++#endif
+diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
+index f3192e55acc87..649dbab65d8d0 100644
+--- a/arch/arm/lib/testsetbit.S
++++ b/arch/arm/lib/testsetbit.S
+@@ -10,3 +10,7 @@
+ .text
+
+ testop _test_and_set_bit, orreq, streq
++
++#if __LINUX_ARM_ARCH__ >= 6
++sync_testop _sync_test_and_set_bit, orreq, streq
++#endif
+diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
+index 106f83a5ea6d2..35e03f6a62127 100644
+--- a/arch/arm/lib/uaccess_with_memcpy.c
++++ b/arch/arm/lib/uaccess_with_memcpy.c
+@@ -121,7 +121,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
+ tocopy = n;
+
+ ua_flags = uaccess_save_and_enable();
+- memcpy((void *)to, from, tocopy);
++ __memcpy((void *)to, from, tocopy);
+ uaccess_restore(ua_flags);
+ to += tocopy;
+ from += tocopy;
+@@ -188,7 +188,7 @@ __clear_user_memset(void __user *addr, unsigned long n)
+ tocopy = n;
+
+ ua_flags = uaccess_save_and_enable();
+- memset((void *)addr, 0, tocopy);
++ __memset((void *)addr, 0, tocopy);
+ uaccess_restore(ua_flags);
+ addr += tocopy;
+ n -= tocopy;
+diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
+index b99dd8e1c93f1..7ba6cf8261626 100644
+--- a/arch/arm/lib/xor-neon.c
++++ b/arch/arm/lib/xor-neon.c
+@@ -26,8 +26,9 @@ MODULE_LICENSE("GPL");
+ * While older versions of GCC do not generate incorrect code, they fail to
+ * recognize the parallel nature of these functions, and emit plain ARM code,
+ * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
++ *
++ * #warning This code requires at least version 4.6 of GCC
+ */
+-#warning This code requires at least version 4.6 of GCC
+ #endif
+
+ #pragma GCC diagnostic ignored "-Wunused-variable"
+diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
+index 8711d6824c1fa..c8cc993ca8ca1 100644
+--- a/arch/arm/mach-at91/pm.c
++++ b/arch/arm/mach-at91/pm.c
+@@ -146,7 +146,7 @@ static const struct wakeup_source_info ws_info[] = {
+
+ static const struct of_device_id sama5d2_ws_ids[] = {
+ { .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] },
+- { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] },
++ { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] },
+ { .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] },
+ { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] },
+ { .compatible = "usb-ohci", .data = &ws_info[2] },
+@@ -157,24 +157,24 @@ static const struct of_device_id sama5d2_ws_ids[] = {
+ };
+
+ static const struct of_device_id sam9x60_ws_ids[] = {
+- { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] },
++ { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] },
+ { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] },
+ { .compatible = "usb-ohci", .data = &ws_info[2] },
+ { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] },
+ { .compatible = "usb-ehci", .data = &ws_info[2] },
+- { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] },
++ { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] },
+ { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] },
+ { /* sentinel */ }
+ };
+
+ static const struct of_device_id sama7g5_ws_ids[] = {
+- { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] },
++ { .compatible = "microchip,sama7g5-rtc", .data = &ws_info[1] },
+ { .compatible = "microchip,sama7g5-ohci", .data = &ws_info[2] },
+ { .compatible = "usb-ohci", .data = &ws_info[2] },
+ { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] },
+ { .compatible = "usb-ehci", .data = &ws_info[2] },
+ { .compatible = "microchip,sama7g5-sdhci", .data = &ws_info[3] },
+- { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] },
++ { .compatible = "microchip,sama7g5-rtt", .data = &ws_info[4] },
+ { /* sentinel */ }
+ };
+
+@@ -350,9 +350,41 @@ extern u32 at91_pm_suspend_in_sram_sz;
+
+ static int at91_suspend_finish(unsigned long val)
+ {
++ unsigned char modified_gray_code[] = {
++ 0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x04, 0x05, 0x0c, 0x0d,
++ 0x0e, 0x0f, 0x0a, 0x0b, 0x08, 0x09, 0x18, 0x19, 0x1a, 0x1b,
++ 0x1e, 0x1f, 0x1c, 0x1d, 0x14, 0x15, 0x16, 0x17, 0x12, 0x13,
++ 0x10, 0x11,
++ };
++ unsigned int tmp, index;
+ int i;
+
+ if (soc_pm.data.mode == AT91_PM_BACKUP && soc_pm.data.ramc_phy) {
++ /*
++ * Bootloader will perform DDR recalibration and will try to
++ * restore the ZQ0SR0 with the value saved here. But the
++ * calibration is buggy and restoring some values from ZQ0SR0
++ * is forbidden and risky thus we need to provide processed
++ * values for these (modified gray code values).
++ */
++ tmp = readl(soc_pm.data.ramc_phy + DDR3PHY_ZQ0SR0);
++
++ /* Store pull-down output impedance select. */
++ index = (tmp >> DDR3PHY_ZQ0SR0_PDO_OFF) & 0x1f;
++ soc_pm.bu->ddr_phy_calibration[0] = modified_gray_code[index];
++
++ /* Store pull-up output impedance select. */
++ index = (tmp >> DDR3PHY_ZQ0SR0_PUO_OFF) & 0x1f;
++ soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
++
++ /* Store pull-down on-die termination impedance select. */
++ index = (tmp >> DDR3PHY_ZQ0SR0_PDODT_OFF) & 0x1f;
++ soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
++
++ /* Store pull-up on-die termination impedance select. */
++ index = (tmp >> DDR3PHY_ZQ0SRO_PUODT_OFF) & 0x1f;
++ soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
++
+ /*
+ * The 1st 8 words of memory might get corrupted in the process
+ * of DDR PHY recalibration; it is saved here in securam and it
+@@ -841,10 +873,6 @@ static int __init at91_pm_backup_init(void)
+ of_scan_flat_dt(at91_pm_backup_scan_memcs, &located);
+ if (!located)
+ goto securam_fail;
+-
+- /* DDR3PHY_ZQ0SR0 */
+- soc_pm.bu->ddr_phy_calibration[0] = readl(soc_pm.data.ramc_phy +
+- 0x188);
+ }
+
+ return 0;
+diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
+index fdb4f63ecde4b..2f0a370a13096 100644
+--- a/arch/arm/mach-at91/pm_suspend.S
++++ b/arch/arm/mach-at91/pm_suspend.S
+@@ -169,12 +169,23 @@ sr_ena_2:
+ cmp tmp1, #UDDRC_STAT_SELFREF_TYPE_SW
+ bne sr_ena_2
+
+- /* Put DDR PHY's DLL in bypass mode for non-backup modes. */
++ /* Disable DX DLLs for non-backup modes. */
+ cmp r7, #AT91_PM_BACKUP
+ beq sr_ena_3
+- ldr tmp1, [r3, #DDR3PHY_PIR]
+- orr tmp1, tmp1, #DDR3PHY_PIR_DLLBYP
+- str tmp1, [r3, #DDR3PHY_PIR]
++
++ /* Do not soft reset the AC DLL. */
++ ldr tmp1, [r3, DDR3PHY_ACDLLCR]
++ bic tmp1, tmp1, DDR3PHY_ACDLLCR_DLLSRST
++ str tmp1, [r3, DDR3PHY_ACDLLCR]
++
++ /* Disable DX DLLs. */
++ ldr tmp1, [r3, #DDR3PHY_DX0DLLCR]
++ orr tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++ str tmp1, [r3, #DDR3PHY_DX0DLLCR]
++
++ ldr tmp1, [r3, #DDR3PHY_DX1DLLCR]
++ orr tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++ str tmp1, [r3, #DDR3PHY_DX1DLLCR]
+
+ sr_ena_3:
+ /* Power down DDR PHY data receivers. */
+@@ -221,10 +232,14 @@ sr_ena_3:
+ bic tmp1, tmp1, #DDR3PHY_DSGCR_ODTPDD_ODT0
+ str tmp1, [r3, #DDR3PHY_DSGCR]
+
+- /* Take DDR PHY's DLL out of bypass mode. */
+- ldr tmp1, [r3, #DDR3PHY_PIR]
+- bic tmp1, tmp1, #DDR3PHY_PIR_DLLBYP
+- str tmp1, [r3, #DDR3PHY_PIR]
++ /* Enable DX DLLs. */
++ ldr tmp1, [r3, #DDR3PHY_DX0DLLCR]
++ bic tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++ str tmp1, [r3, #DDR3PHY_DX0DLLCR]
++
++ ldr tmp1, [r3, #DDR3PHY_DX1DLLCR]
++ bic tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++ str tmp1, [r3, #DDR3PHY_DX1DLLCR]
+
+ /* Enable quasi-dynamic programming. */
+ mov tmp1, #0
+diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c
+index 512943eae30a5..2e203626eda52 100644
+--- a/arch/arm/mach-axxia/platsmp.c
++++ b/arch/arm/mach-axxia/platsmp.c
+@@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle)
+ return -ENOENT;
+
+ syscon = of_iomap(syscon_np, 0);
++ of_node_put(syscon_np);
+ if (!syscon)
+ return -ENOMEM;
+
+diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c
+index 43829e49ad93f..347bfb7f03e2c 100644
+--- a/arch/arm/mach-bcm/bcm_kona_smc.c
++++ b/arch/arm/mach-bcm/bcm_kona_smc.c
+@@ -52,6 +52,7 @@ int __init bcm_kona_smc_init(void)
+ return -ENODEV;
+
+ prop_val = of_get_address(node, 0, &prop_size, NULL);
++ of_node_put(node);
+ if (!prop_val)
+ return -EINVAL;
+
+diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
+index e4f4b20b83a2d..3fc4ec830e3a3 100644
+--- a/arch/arm/mach-cns3xxx/core.c
++++ b/arch/arm/mach-cns3xxx/core.c
+@@ -372,6 +372,7 @@ static void __init cns3xxx_init(void)
+ /* De-Asscer SATA Reset */
+ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA));
+ }
++ of_node_put(dn);
+
+ dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci");
+ if (of_device_is_available(dn)) {
+@@ -385,6 +386,7 @@ static void __init cns3xxx_init(void)
+ cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO));
+ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO));
+ }
++ of_node_put(dn);
+
+ pm_power_off = cns3xxx_power_off;
+
+diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
+index 428012687a802..7f7f6bae21c2d 100644
+--- a/arch/arm/mach-davinci/board-da850-evm.c
++++ b/arch/arm/mach-davinci/board-da850-evm.c
+@@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void)
+ int ret;
+ u32 val;
+ struct davinci_soc_info *soc_info = &davinci_soc_info;
+- u8 rmii_en = soc_info->emac_pdata->rmii_en;
++ u8 rmii_en;
+
+ if (!machine_is_davinci_da850_evm())
+ return 0;
+
++ rmii_en = soc_info->emac_pdata->rmii_en;
++
+ cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG);
+
+ val = __raw_readl(cfg_chip3_base);
+diff --git a/arch/arm/mach-ep93xx/timer-ep93xx.c b/arch/arm/mach-ep93xx/timer-ep93xx.c
+index dd4b164d18317..a9efa7bc2fa12 100644
+--- a/arch/arm/mach-ep93xx/timer-ep93xx.c
++++ b/arch/arm/mach-ep93xx/timer-ep93xx.c
+@@ -9,6 +9,7 @@
+ #include <linux/io.h>
+ #include <asm/mach/time.h>
+ #include "soc.h"
++#include "platform.h"
+
+ /*************************************************************************
+ * Timer handling for EP93xx
+@@ -60,7 +61,7 @@ static u64 notrace ep93xx_read_sched_clock(void)
+ return ret;
+ }
+
+-u64 ep93xx_clocksource_read(struct clocksource *c)
++static u64 ep93xx_clocksource_read(struct clocksource *c)
+ {
+ u64 ret;
+
+diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
+index 5a48abac6af49..4b554cc8fa58a 100644
+--- a/arch/arm/mach-exynos/Kconfig
++++ b/arch/arm/mach-exynos/Kconfig
+@@ -18,7 +18,6 @@ menuconfig ARCH_EXYNOS
+ select EXYNOS_PMU
+ select EXYNOS_SROM
+ select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS
+- select GPIOLIB
+ select HAVE_ARM_ARCH_TIMER if ARCH_EXYNOS5
+ select HAVE_ARM_SCU if SMP
+ select HAVE_S3C2410_I2C if I2C
+diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
+index 8b48326be9fd5..51a247ca4da8c 100644
+--- a/arch/arm/mach-exynos/exynos.c
++++ b/arch/arm/mach-exynos/exynos.c
+@@ -149,6 +149,7 @@ static void exynos_map_pmu(void)
+ np = of_find_matching_node(NULL, exynos_dt_pmu_match);
+ if (np)
+ pmu_base_addr = of_iomap(np, 0);
++ of_node_put(np);
+ }
+
+ static void __init exynos_init_irq(void)
+diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c
+index a56cc64deeb8f..9ce93e0b6cdc3 100644
+--- a/arch/arm/mach-hisi/platsmp.c
++++ b/arch/arm/mach-hisi/platsmp.c
+@@ -67,14 +67,17 @@ static void __init hi3xxx_smp_prepare_cpus(unsigned int max_cpus)
+ }
+ ctrl_base = of_iomap(np, 0);
+ if (!ctrl_base) {
++ of_node_put(np);
+ pr_err("failed to map address\n");
+ return;
+ }
+ if (of_property_read_u32(np, "smp-offset", &offset) < 0) {
++ of_node_put(np);
+ pr_err("failed to find smp-offset property\n");
+ return;
+ }
+ ctrl_base += offset;
++ of_node_put(np);
+ }
+ }
+
+@@ -160,6 +163,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle)
+ if (WARN_ON(!node))
+ return -1;
+ ctrl_base = of_iomap(node, 0);
++ of_node_put(node);
+
+ /* set the secondary core boot from DDR */
+ remap_reg_value = readl_relaxed(ctrl_base + REG_SC_CTRL);
+diff --git a/arch/arm/mach-imx/cpu-imx25.c b/arch/arm/mach-imx/cpu-imx25.c
+index b2e1963f473de..2ee2d2813d577 100644
+--- a/arch/arm/mach-imx/cpu-imx25.c
++++ b/arch/arm/mach-imx/cpu-imx25.c
+@@ -23,6 +23,7 @@ static int mx25_read_cpu_rev(void)
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx25-iim");
+ iim_base = of_iomap(np, 0);
++ of_node_put(np);
+ BUG_ON(!iim_base);
+ rev = readl(iim_base + MXC_IIMSREV);
+ iounmap(iim_base);
+diff --git a/arch/arm/mach-imx/cpu-imx27.c b/arch/arm/mach-imx/cpu-imx27.c
+index bf70e13bbe9ee..1d28939083683 100644
+--- a/arch/arm/mach-imx/cpu-imx27.c
++++ b/arch/arm/mach-imx/cpu-imx27.c
+@@ -28,6 +28,7 @@ static int mx27_read_cpu_rev(void)
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx27-ccm");
+ ccm_base = of_iomap(np, 0);
++ of_node_put(np);
+ BUG_ON(!ccm_base);
+ /*
+ * now we have access to the IO registers. As we need
+diff --git a/arch/arm/mach-imx/cpu-imx31.c b/arch/arm/mach-imx/cpu-imx31.c
+index b9c24b851d1ab..35c544924e509 100644
+--- a/arch/arm/mach-imx/cpu-imx31.c
++++ b/arch/arm/mach-imx/cpu-imx31.c
+@@ -39,6 +39,7 @@ static int mx31_read_cpu_rev(void)
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx31-iim");
+ iim_base = of_iomap(np, 0);
++ of_node_put(np);
+ BUG_ON(!iim_base);
+
+ /* read SREV register from IIM module */
+diff --git a/arch/arm/mach-imx/cpu-imx35.c b/arch/arm/mach-imx/cpu-imx35.c
+index 80e7d8ab9f1bb..1fe75b39c2d99 100644
+--- a/arch/arm/mach-imx/cpu-imx35.c
++++ b/arch/arm/mach-imx/cpu-imx35.c
+@@ -21,6 +21,7 @@ static int mx35_read_cpu_rev(void)
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx35-iim");
+ iim_base = of_iomap(np, 0);
++ of_node_put(np);
+ BUG_ON(!iim_base);
+
+ rev = imx_readl(iim_base + MXC_IIMSREV);
+diff --git a/arch/arm/mach-imx/cpu-imx5.c b/arch/arm/mach-imx/cpu-imx5.c
+index ad56263778f93..a67c89bf155dd 100644
+--- a/arch/arm/mach-imx/cpu-imx5.c
++++ b/arch/arm/mach-imx/cpu-imx5.c
+@@ -28,6 +28,7 @@ static u32 imx5_read_srev_reg(const char *compat)
+
+ np = of_find_compatible_node(NULL, NULL, compat);
+ iim_base = of_iomap(np, 0);
++ of_node_put(np);
+ WARN_ON(!iim_base);
+
+ srev = readl(iim_base + IIM_SREV) & 0xff;
+diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
+index af12668d0bf51..b9efe9da06e0b 100644
+--- a/arch/arm/mach-imx/mmdc.c
++++ b/arch/arm/mach-imx/mmdc.c
+@@ -99,6 +99,7 @@ struct mmdc_pmu {
+ cpumask_t cpu;
+ struct hrtimer hrtimer;
+ unsigned int active_events;
++ int id;
+ struct device *dev;
+ struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
+ struct hlist_node node;
+@@ -433,8 +434,6 @@ static enum hrtimer_restart mmdc_pmu_timer_handler(struct hrtimer *hrtimer)
+ static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc,
+ void __iomem *mmdc_base, struct device *dev)
+ {
+- int mmdc_num;
+-
+ *pmu_mmdc = (struct mmdc_pmu) {
+ .pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+@@ -452,15 +451,16 @@ static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc,
+ .active_events = 0,
+ };
+
+- mmdc_num = ida_simple_get(&mmdc_ida, 0, 0, GFP_KERNEL);
++ pmu_mmdc->id = ida_simple_get(&mmdc_ida, 0, 0, GFP_KERNEL);
+
+- return mmdc_num;
++ return pmu_mmdc->id;
+ }
+
+ static int imx_mmdc_remove(struct platform_device *pdev)
+ {
+ struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev);
+
++ ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
+ cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
+ perf_pmu_unregister(&pmu_mmdc->pmu);
+ iounmap(pmu_mmdc->mmdc_base);
+@@ -474,7 +474,6 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
+ {
+ struct mmdc_pmu *pmu_mmdc;
+ char *name;
+- int mmdc_num;
+ int ret;
+ const struct of_device_id *of_id =
+ of_match_device(imx_mmdc_dt_ids, &pdev->dev);
+@@ -497,14 +496,14 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
+ cpuhp_mmdc_state = ret;
+ }
+
+- mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
+- pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk;
+- if (mmdc_num == 0)
+- name = "mmdc";
+- else
+- name = devm_kasprintf(&pdev->dev,
+- GFP_KERNEL, "mmdc%d", mmdc_num);
++ ret = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
++ if (ret < 0)
++ goto pmu_free;
+
++ name = devm_kasprintf(&pdev->dev,
++ GFP_KERNEL, "mmdc%d", ret);
++
++ pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk;
+ pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data;
+
+ hrtimer_init(&pmu_mmdc->hrtimer, CLOCK_MONOTONIC,
+@@ -525,6 +524,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
+
+ pmu_register_err:
+ pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret);
++ ida_simple_remove(&mmdc_ida, pmu_mmdc->id);
+ cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
+ hrtimer_cancel(&pmu_mmdc->hrtimer);
+ pmu_free:
+diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S
+index 8e6766d4621eb..341e5d9a6616d 100644
+--- a/arch/arm/mach-iop32x/include/mach/entry-macro.S
++++ b/arch/arm/mach-iop32x/include/mach/entry-macro.S
+@@ -20,7 +20,7 @@
+ mrc p6, 0, \irqstat, c8, c0, 0 @ Read IINTSRC
+ cmp \irqstat, #0
+ clzne \irqnr, \irqstat
+- rsbne \irqnr, \irqnr, #31
++ rsbne \irqnr, \irqnr, #32
+ .endm
+
+ .macro arch_ret_to_user, tmp1, tmp2
+diff --git a/arch/arm/mach-iop32x/include/mach/irqs.h b/arch/arm/mach-iop32x/include/mach/irqs.h
+index c4e78df428e86..e09ae5f48aec5 100644
+--- a/arch/arm/mach-iop32x/include/mach/irqs.h
++++ b/arch/arm/mach-iop32x/include/mach/irqs.h
+@@ -9,6 +9,6 @@
+ #ifndef __IRQS_H
+ #define __IRQS_H
+
+-#define NR_IRQS 32
++#define NR_IRQS 33
+
+ #endif
+diff --git a/arch/arm/mach-iop32x/irq.c b/arch/arm/mach-iop32x/irq.c
+index 2d48bf1398c10..d1e8824cbd824 100644
+--- a/arch/arm/mach-iop32x/irq.c
++++ b/arch/arm/mach-iop32x/irq.c
+@@ -32,14 +32,14 @@ static void intstr_write(u32 val)
+ static void
+ iop32x_irq_mask(struct irq_data *d)
+ {
+- iop32x_mask &= ~(1 << d->irq);
++ iop32x_mask &= ~(1 << (d->irq - 1));
+ intctl_write(iop32x_mask);
+ }
+
+ static void
+ iop32x_irq_unmask(struct irq_data *d)
+ {
+- iop32x_mask |= 1 << d->irq;
++ iop32x_mask |= 1 << (d->irq - 1);
+ intctl_write(iop32x_mask);
+ }
+
+@@ -65,7 +65,7 @@ void __init iop32x_init_irq(void)
+ machine_is_em7210())
+ *IOP3XX_PCIIRSR = 0x0f;
+
+- for (i = 0; i < NR_IRQS; i++) {
++ for (i = 1; i < NR_IRQS; i++) {
+ irq_set_chip_and_handler(i, &ext_chip, handle_level_irq);
+ irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE);
+ }
+diff --git a/arch/arm/mach-iop32x/irqs.h b/arch/arm/mach-iop32x/irqs.h
+index 69858e4e905d1..e1dfc8b4e7d7e 100644
+--- a/arch/arm/mach-iop32x/irqs.h
++++ b/arch/arm/mach-iop32x/irqs.h
+@@ -7,36 +7,40 @@
+ #ifndef __IOP32X_IRQS_H
+ #define __IOP32X_IRQS_H
+
++/* Interrupts in Linux start at 1, hardware starts at 0 */
++
++#define IOP_IRQ(x) ((x) + 1)
++
+ /*
+ * IOP80321 chipset interrupts
+ */
+-#define IRQ_IOP32X_DMA0_EOT 0
+-#define IRQ_IOP32X_DMA0_EOC 1
+-#define IRQ_IOP32X_DMA1_EOT 2
+-#define IRQ_IOP32X_DMA1_EOC 3
+-#define IRQ_IOP32X_AA_EOT 6
+-#define IRQ_IOP32X_AA_EOC 7
+-#define IRQ_IOP32X_CORE_PMON 8
+-#define IRQ_IOP32X_TIMER0 9
+-#define IRQ_IOP32X_TIMER1 10
+-#define IRQ_IOP32X_I2C_0 11
+-#define IRQ_IOP32X_I2C_1 12
+-#define IRQ_IOP32X_MESSAGING 13
+-#define IRQ_IOP32X_ATU_BIST 14
+-#define IRQ_IOP32X_PERFMON 15
+-#define IRQ_IOP32X_CORE_PMU 16
+-#define IRQ_IOP32X_BIU_ERR 17
+-#define IRQ_IOP32X_ATU_ERR 18
+-#define IRQ_IOP32X_MCU_ERR 19
+-#define IRQ_IOP32X_DMA0_ERR 20
+-#define IRQ_IOP32X_DMA1_ERR 21
+-#define IRQ_IOP32X_AA_ERR 23
+-#define IRQ_IOP32X_MSG_ERR 24
+-#define IRQ_IOP32X_SSP 25
+-#define IRQ_IOP32X_XINT0 27
+-#define IRQ_IOP32X_XINT1 28
+-#define IRQ_IOP32X_XINT2 29
+-#define IRQ_IOP32X_XINT3 30
+-#define IRQ_IOP32X_HPI 31
++#define IRQ_IOP32X_DMA0_EOT IOP_IRQ(0)
++#define IRQ_IOP32X_DMA0_EOC IOP_IRQ(1)
++#define IRQ_IOP32X_DMA1_EOT IOP_IRQ(2)
++#define IRQ_IOP32X_DMA1_EOC IOP_IRQ(3)
++#define IRQ_IOP32X_AA_EOT IOP_IRQ(6)
++#define IRQ_IOP32X_AA_EOC IOP_IRQ(7)
++#define IRQ_IOP32X_CORE_PMON IOP_IRQ(8)
++#define IRQ_IOP32X_TIMER0 IOP_IRQ(9)
++#define IRQ_IOP32X_TIMER1 IOP_IRQ(10)
++#define IRQ_IOP32X_I2C_0 IOP_IRQ(11)
++#define IRQ_IOP32X_I2C_1 IOP_IRQ(12)
++#define IRQ_IOP32X_MESSAGING IOP_IRQ(13)
++#define IRQ_IOP32X_ATU_BIST IOP_IRQ(14)
++#define IRQ_IOP32X_PERFMON IOP_IRQ(15)
++#define IRQ_IOP32X_CORE_PMU IOP_IRQ(16)
++#define IRQ_IOP32X_BIU_ERR IOP_IRQ(17)
++#define IRQ_IOP32X_ATU_ERR IOP_IRQ(18)
++#define IRQ_IOP32X_MCU_ERR IOP_IRQ(19)
++#define IRQ_IOP32X_DMA0_ERR IOP_IRQ(20)
++#define IRQ_IOP32X_DMA1_ERR IOP_IRQ(21)
++#define IRQ_IOP32X_AA_ERR IOP_IRQ(23)
++#define IRQ_IOP32X_MSG_ERR IOP_IRQ(24)
++#define IRQ_IOP32X_SSP IOP_IRQ(25)
++#define IRQ_IOP32X_XINT0 IOP_IRQ(27)
++#define IRQ_IOP32X_XINT1 IOP_IRQ(28)
++#define IRQ_IOP32X_XINT2 IOP_IRQ(29)
++#define IRQ_IOP32X_XINT3 IOP_IRQ(30)
++#define IRQ_IOP32X_HPI IOP_IRQ(31)
+
+ #endif
+diff --git a/arch/arm/mach-mediatek/Kconfig b/arch/arm/mach-mediatek/Kconfig
+index 9e0f592d87d8e..35a3430c7942d 100644
+--- a/arch/arm/mach-mediatek/Kconfig
++++ b/arch/arm/mach-mediatek/Kconfig
+@@ -30,6 +30,7 @@ config MACH_MT7623
+ config MACH_MT7629
+ bool "MediaTek MT7629 SoCs support"
+ default ARCH_MEDIATEK
++ select HAVE_ARM_ARCH_TIMER
+
+ config MACH_MT8127
+ bool "MediaTek MT8127 SoCs support"
+diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c
+index 4b8ad728bb42a..32ac60b89fdcc 100644
+--- a/arch/arm/mach-meson/platsmp.c
++++ b/arch/arm/mach-meson/platsmp.c
+@@ -71,6 +71,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible,
+ }
+
+ sram_base = of_iomap(node, 0);
++ of_node_put(node);
+ if (!sram_base) {
+ pr_err("Couldn't map SRAM registers\n");
+ return;
+@@ -91,6 +92,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible,
+ }
+
+ scu_base = of_iomap(node, 0);
++ of_node_put(node);
+ if (!scu_base) {
+ pr_err("Couldn't map SCU registers\n");
+ return;
+diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c
+index 6794e2db1ad5f..ecc46c31004f6 100644
+--- a/arch/arm/mach-mmp/sram.c
++++ b/arch/arm/mach-mmp/sram.c
+@@ -72,6 +72,8 @@ static int sram_probe(struct platform_device *pdev)
+ if (!info)
+ return -ENOMEM;
+
++ platform_set_drvdata(pdev, info);
++
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (res == NULL) {
+ dev_err(&pdev->dev, "no memory resource defined\n");
+@@ -107,8 +109,6 @@ static int sram_probe(struct platform_device *pdev)
+ list_add(&info->node, &sram_bank_list);
+ mutex_unlock(&sram_lock);
+
+- platform_set_drvdata(pdev, info);
+-
+ dev_info(&pdev->dev, "initialized\n");
+ return 0;
+
+@@ -127,17 +127,19 @@ static int sram_remove(struct platform_device *pdev)
+ struct sram_bank_info *info;
+
+ info = platform_get_drvdata(pdev);
+- if (info == NULL)
+- return -ENODEV;
+
+- mutex_lock(&sram_lock);
+- list_del(&info->node);
+- mutex_unlock(&sram_lock);
++ if (info->sram_size) {
++ mutex_lock(&sram_lock);
++ list_del(&info->node);
++ mutex_unlock(&sram_lock);
++
++ gen_pool_destroy(info->gpool);
++ iounmap(info->sram_virt);
++ kfree(info->pool_name);
++ }
+
+- gen_pool_destroy(info->gpool);
+- iounmap(info->sram_virt);
+- kfree(info->pool_name);
+ kfree(info);
++
+ return 0;
+ }
+
+diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
+index 41b2e8abc9e69..708816caf859c 100644
+--- a/arch/arm/mach-mmp/time.c
++++ b/arch/arm/mach-mmp/time.c
+@@ -43,18 +43,21 @@
+ static void __iomem *mmp_timer_base = TIMERS_VIRT_BASE;
+
+ /*
+- * FIXME: the timer needs some delay to stablize the counter capture
++ * Read the timer through the CVWR register. Delay is required after requesting
++ * a read. The CR register cannot be directly read due to metastability issues
++ * documented in the PXA168 software manual.
+ */
+ static inline uint32_t timer_read(void)
+ {
+- int delay = 100;
++ uint32_t val;
++ int delay = 3;
+
+ __raw_writel(1, mmp_timer_base + TMR_CVWR(1));
+
+ while (delay--)
+- cpu_relax();
++ val = __raw_readl(mmp_timer_base + TMR_CVWR(1));
+
+- return __raw_readl(mmp_timer_base + TMR_CVWR(1));
++ return val;
+ }
+
+ static u64 notrace mmp_read_sched_clock(void)
+diff --git a/arch/arm/mach-mstar/Kconfig b/arch/arm/mach-mstar/Kconfig
+index cd300eeedc206..0bf4d312bcfd9 100644
+--- a/arch/arm/mach-mstar/Kconfig
++++ b/arch/arm/mach-mstar/Kconfig
+@@ -3,6 +3,7 @@ menuconfig ARCH_MSTARV7
+ depends on ARCH_MULTI_V7
+ select ARM_GIC
+ select ARM_HEAVY_MB
++ select HAVE_ARM_ARCH_TIMER
+ select MST_IRQ
+ select MSTAR_MSC313_MPLL
+ help
+diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
+index 25c9d184fa4c6..1c57ac4016493 100644
+--- a/arch/arm/mach-mxs/mach-mxs.c
++++ b/arch/arm/mach-mxs/mach-mxs.c
+@@ -393,8 +393,10 @@ static void __init mxs_machine_init(void)
+
+ root = of_find_node_by_path("/");
+ ret = of_property_read_string(root, "model", &soc_dev_attr->machine);
+- if (ret)
++ if (ret) {
++ kfree(soc_dev_attr);
+ return;
++ }
+
+ soc_dev_attr->family = "Freescale MXS Family";
+ soc_dev_attr->soc_id = mxs_get_soc_id();
+diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
+index 9d4a0ab50a468..d63d5eb8d8fdf 100644
+--- a/arch/arm/mach-omap1/clock.c
++++ b/arch/arm/mach-omap1/clock.c
+@@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(clockfw_lock);
+ unsigned long omap1_uart_recalc(struct clk *clk)
+ {
+ unsigned int val = __raw_readl(clk->enable_reg);
+- return val & clk->enable_bit ? 48000000 : 12000000;
++ return val & 1 << clk->enable_bit ? 48000000 : 12000000;
+ }
+
+ unsigned long omap1_sossi_recalc(struct clk *clk)
+diff --git a/arch/arm/mach-omap1/timer.c b/arch/arm/mach-omap1/timer.c
+index 0411d5508d637..7046d7fa7a0aa 100644
+--- a/arch/arm/mach-omap1/timer.c
++++ b/arch/arm/mach-omap1/timer.c
+@@ -165,7 +165,7 @@ err_free_pdata:
+ kfree(pdata);
+
+ err_free_pdev:
+- platform_device_unregister(pdev);
++ platform_device_put(pdev);
+
+ return ret;
+ }
+diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
+index 1610c567a6a3a..10d2f078e4a8e 100644
+--- a/arch/arm/mach-omap2/board-generic.c
++++ b/arch/arm/mach-omap2/board-generic.c
+@@ -13,6 +13,7 @@
+ #include <linux/of_platform.h>
+ #include <linux/irqdomain.h>
+ #include <linux/clocksource.h>
++#include <linux/clockchips.h>
+
+ #include <asm/setup.h>
+ #include <asm/mach/arch.h>
+diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
+index 6daaa645ae5d9..8d829f3dafe76 100644
+--- a/arch/arm/mach-omap2/display.c
++++ b/arch/arm/mach-omap2/display.c
+@@ -211,6 +211,7 @@ static int __init omapdss_init_fbdev(void)
+ node = of_find_node_by_name(NULL, "omap4_padconf_global");
+ if (node)
+ omap4_dsi_mux_syscon = syscon_node_to_regmap(node);
++ of_node_put(node);
+
+ return 0;
+ }
+@@ -259,13 +260,15 @@ static int __init omapdss_init_of(void)
+
+ if (!pdev) {
+ pr_err("Unable to find DSS platform device\n");
++ of_node_put(node);
+ return -ENODEV;
+ }
+
+ r = of_platform_populate(node, NULL, NULL, &pdev->dev);
++ put_device(&pdev->dev);
++ of_node_put(node);
+ if (r) {
+ pr_err("Unable to populate DSS submodule devices\n");
+- put_device(&pdev->dev);
+ return r;
+ }
+
+diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
+index 5c3845730dbf5..0b80f8bcd3047 100644
+--- a/arch/arm/mach-omap2/omap4-common.c
++++ b/arch/arm/mach-omap2/omap4-common.c
+@@ -314,10 +314,12 @@ void __init omap_gic_of_init(void)
+
+ np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-gic");
+ gic_dist_base_addr = of_iomap(np, 0);
++ of_node_put(np);
+ WARN_ON(!gic_dist_base_addr);
+
+ np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-twd-timer");
+ twd_base = of_iomap(np, 0);
++ of_node_put(np);
+ WARN_ON(!twd_base);
+
+ skip_errata_init:
+diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
+index 0c2936c7a3799..a5e9cffcac10c 100644
+--- a/arch/arm/mach-omap2/omap_hwmod.c
++++ b/arch/arm/mach-omap2/omap_hwmod.c
+@@ -752,8 +752,10 @@ static int __init _init_clkctrl_providers(void)
+
+ for_each_matching_node(np, ti_clkctrl_match_table) {
+ ret = _setup_clkctrl_provider(np);
+- if (ret)
++ if (ret) {
++ of_node_put(np);
+ break;
++ }
+ }
+
+ return ret;
+diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
+index 765809b214e71..bf50acd6b8a3d 100644
+--- a/arch/arm/mach-omap2/pdata-quirks.c
++++ b/arch/arm/mach-omap2/pdata-quirks.c
+@@ -587,6 +587,8 @@ pdata_quirks_init_clocks(const struct of_device_id *omap_dt_match_table)
+
+ of_platform_populate(np, omap_dt_match_table,
+ omap_auxdata_lookup, NULL);
++
++ of_node_put(np);
+ }
+ }
+
+diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c
+index 0a5b87e2a4b07..37d23ae2e9dbe 100644
+--- a/arch/arm/mach-omap2/powerdomain.c
++++ b/arch/arm/mach-omap2/powerdomain.c
+@@ -174,7 +174,7 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag)
+ break;
+ case PWRDM_STATE_PREV:
+ prev = pwrdm_read_prev_pwrst(pwrdm);
+- if (pwrdm->state != prev)
++ if (prev >= 0 && pwrdm->state != prev)
+ pwrdm->state_counter[prev]++;
+ if (prev == PWRDM_POWER_RET)
+ _update_logic_membank_counters(pwrdm);
+diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c
+index 1b442b1285693..63e73e9b82bc6 100644
+--- a/arch/arm/mach-omap2/prm3xxx.c
++++ b/arch/arm/mach-omap2/prm3xxx.c
+@@ -708,6 +708,7 @@ static int omap3xxx_prm_late_init(void)
+ }
+
+ irq_num = of_irq_get(np, 0);
++ of_node_put(np);
+ if (irq_num == -EPROBE_DEFER)
+ return irq_num;
+
+diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
+index 620ba69c8f114..5677c4a08f376 100644
+--- a/arch/arm/mach-omap2/timer.c
++++ b/arch/arm/mach-omap2/timer.c
+@@ -76,6 +76,7 @@ static void __init realtime_counter_init(void)
+ }
+
+ rate = clk_get_rate(sys_clk);
++ clk_put(sys_clk);
+
+ if (soc_is_dra7xx()) {
+ /*
+diff --git a/arch/arm/mach-orion5x/board-dt.c b/arch/arm/mach-orion5x/board-dt.c
+index 3d36f1d951964..3f651df3a71cf 100644
+--- a/arch/arm/mach-orion5x/board-dt.c
++++ b/arch/arm/mach-orion5x/board-dt.c
+@@ -63,6 +63,9 @@ static void __init orion5x_dt_init(void)
+ if (of_machine_is_compatible("maxtor,shared-storage-2"))
+ mss2_init();
+
++ if (of_machine_is_compatible("lacie,d2-network"))
++ d2net_init();
++
+ of_platform_default_populate(NULL, orion5x_auxdata_lookup, NULL);
+ }
+
+diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h
+index eb96009e21c4c..b9cfdb4564568 100644
+--- a/arch/arm/mach-orion5x/common.h
++++ b/arch/arm/mach-orion5x/common.h
+@@ -75,6 +75,12 @@ extern void mss2_init(void);
+ static inline void mss2_init(void) {}
+ #endif
+
++#ifdef CONFIG_MACH_D2NET_DT
++void d2net_init(void);
++#else
++static inline void d2net_init(void) {}
++#endif
++
+ /*****************************************************************************
+ * Helpers to access Orion registers
+ ****************************************************************************/
+diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
+index 2e35354b61f56..167e871f059ef 100644
+--- a/arch/arm/mach-pxa/cm-x300.c
++++ b/arch/arm/mach-pxa/cm-x300.c
+@@ -354,13 +354,13 @@ static struct platform_device cm_x300_spi_gpio = {
+ static struct gpiod_lookup_table cm_x300_spi_gpiod_table = {
+ .dev_id = "spi_gpio",
+ .table = {
+- GPIO_LOOKUP("gpio-pxa", GPIO_LCD_SCL,
++ GPIO_LOOKUP("pca9555.1", GPIO_LCD_SCL - GPIO_LCD_BASE,
+ "sck", GPIO_ACTIVE_HIGH),
+- GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DIN,
++ GPIO_LOOKUP("pca9555.1", GPIO_LCD_DIN - GPIO_LCD_BASE,
+ "mosi", GPIO_ACTIVE_HIGH),
+- GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DOUT,
++ GPIO_LOOKUP("pca9555.1", GPIO_LCD_DOUT - GPIO_LCD_BASE,
+ "miso", GPIO_ACTIVE_HIGH),
+- GPIO_LOOKUP("gpio-pxa", GPIO_LCD_CS,
++ GPIO_LOOKUP("pca9555.1", GPIO_LCD_CS - GPIO_LCD_BASE,
+ "cs", GPIO_ACTIVE_HIGH),
+ { },
+ },
+diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
+index cd9fa465b9b2a..9aee8e0f2bb1d 100644
+--- a/arch/arm/mach-pxa/magician.c
++++ b/arch/arm/mach-pxa/magician.c
+@@ -681,7 +681,7 @@ static struct platform_device bq24022 = {
+ static struct gpiod_lookup_table bq24022_gpiod_table = {
+ .dev_id = "gpio-regulator",
+ .table = {
+- GPIO_LOOKUP("gpio-pxa", EGPIO_MAGICIAN_BQ24022_ISET2,
++ GPIO_LOOKUP("htc-egpio-0", EGPIO_MAGICIAN_BQ24022_ISET2 - MAGICIAN_EGPIO_BASE,
+ NULL, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-pxa", GPIO30_MAGICIAN_BQ24022_nCHARGE_EN,
+ "enable", GPIO_ACTIVE_LOW),
+diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
+index 83cfbb882a2d4..7f6bd7f069e49 100644
+--- a/arch/arm/mach-pxa/sharpsl_pm.c
++++ b/arch/arm/mach-pxa/sharpsl_pm.c
+@@ -220,8 +220,6 @@ void sharpsl_battery_kick(void)
+ {
+ schedule_delayed_work(&sharpsl_bat, msecs_to_jiffies(125));
+ }
+-EXPORT_SYMBOL(sharpsl_battery_kick);
+-
+
+ static void sharpsl_battery_thread(struct work_struct *private_)
+ {
+diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
+index 371008e9bb029..264de0bc97d68 100644
+--- a/arch/arm/mach-pxa/spitz.c
++++ b/arch/arm/mach-pxa/spitz.c
+@@ -9,7 +9,6 @@
+ */
+
+ #include <linux/kernel.h>
+-#include <linux/module.h> /* symbol_get ; symbol_put */
+ #include <linux/platform_device.h>
+ #include <linux/delay.h>
+ #include <linux/gpio_keys.h>
+@@ -514,17 +513,6 @@ static struct pxa2xx_spi_chip spitz_ads7846_chip = {
+ .gpio_cs = SPITZ_GPIO_ADS7846_CS,
+ };
+
+-static void spitz_bl_kick_battery(void)
+-{
+- void (*kick_batt)(void);
+-
+- kick_batt = symbol_get(sharpsl_battery_kick);
+- if (kick_batt) {
+- kick_batt();
+- symbol_put(sharpsl_battery_kick);
+- }
+-}
+-
+ static struct gpiod_lookup_table spitz_lcdcon_gpio_table = {
+ .dev_id = "spi2.1",
+ .table = {
+@@ -552,7 +540,7 @@ static struct corgi_lcd_platform_data spitz_lcdcon_info = {
+ .max_intensity = 0x2f,
+ .default_intensity = 0x1f,
+ .limit_mask = 0x0b,
+- .kick_battery = spitz_bl_kick_battery,
++ .kick_battery = sharpsl_battery_kick,
+ };
+
+ static struct pxa2xx_spi_chip spitz_lcdcon_chip = {
+diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
+index 431709725d02b..ded5e343e1984 100644
+--- a/arch/arm/mach-pxa/tosa.c
++++ b/arch/arm/mach-pxa/tosa.c
+@@ -296,9 +296,9 @@ static struct gpiod_lookup_table tosa_mci_gpio_table = {
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_nSD_DETECT,
+ "cd", GPIO_ACTIVE_LOW),
+- GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_SD_WP,
++ GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_SD_WP - TOSA_SCOOP_GPIO_BASE,
+ "wp", GPIO_ACTIVE_LOW),
+- GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_PWR_ON,
++ GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_PWR_ON - TOSA_SCOOP_GPIO_BASE,
+ "power", GPIO_ACTIVE_HIGH),
+ { },
+ },
+diff --git a/arch/arm/mach-s3c/irq-s3c24xx.c b/arch/arm/mach-s3c/irq-s3c24xx.c
+index 3edc5f614eefc..c1c2f041ad3b1 100644
+--- a/arch/arm/mach-s3c/irq-s3c24xx.c
++++ b/arch/arm/mach-s3c/irq-s3c24xx.c
+@@ -361,11 +361,25 @@ static inline int s3c24xx_handle_intc(struct s3c_irq_intc *intc,
+ static asmlinkage void __exception_irq_entry s3c24xx_handle_irq(struct pt_regs *regs)
+ {
+ do {
+- if (likely(s3c_intc[0]))
+- if (s3c24xx_handle_intc(s3c_intc[0], regs, 0))
+- continue;
++ /*
++ * For platform based machines, neither ERR nor NULL can happen here.
++ * The s3c24xx_handle_irq() will be set as IRQ handler iff this succeeds:
++ *
++ * s3c_intc[0] = s3c24xx_init_intc()
++ *
++ * If this fails, the next calls to s3c24xx_init_intc() won't be executed.
++ *
++ * For DT machine, s3c_init_intc_of() could set the IRQ handler without
++ * setting s3c_intc[0] only if it was called with num_ctrl=0. There is no
++ * such code path, so again the s3c_intc[0] will have a valid pointer if
++ * set_handle_irq() is called.
++ *
++ * Therefore in s3c24xx_handle_irq(), the s3c_intc[0] is always something.
++ */
++ if (s3c24xx_handle_intc(s3c_intc[0], regs, 0))
++ continue;
+
+- if (s3c_intc[2])
++ if (!IS_ERR_OR_NULL(s3c_intc[2]))
+ if (s3c24xx_handle_intc(s3c_intc[2], regs, 64))
+ continue;
+
+diff --git a/arch/arm/mach-s3c/mach-jive.c b/arch/arm/mach-s3c/mach-jive.c
+index 0785638a9069b..7d15b84ae217e 100644
+--- a/arch/arm/mach-s3c/mach-jive.c
++++ b/arch/arm/mach-s3c/mach-jive.c
+@@ -236,11 +236,11 @@ static int __init jive_mtdset(char *options)
+ unsigned long set;
+
+ if (options == NULL || options[0] == '\0')
+- return 0;
++ return 1;
+
+ if (kstrtoul(options, 10, &set)) {
+ printk(KERN_ERR "failed to parse mtdset=%s\n", options);
+- return 0;
++ return 1;
+ }
+
+ switch (set) {
+@@ -255,7 +255,7 @@ static int __init jive_mtdset(char *options)
+ "using default.", set);
+ }
+
+- return 0;
++ return 1;
+ }
+
+ /* parse the mtdset= option given to the kernel command line */
+diff --git a/arch/arm/mach-s3c/s3c64xx.c b/arch/arm/mach-s3c/s3c64xx.c
+index 4dfb648142f2a..17f0065031490 100644
+--- a/arch/arm/mach-s3c/s3c64xx.c
++++ b/arch/arm/mach-s3c/s3c64xx.c
+@@ -173,7 +173,8 @@ static struct samsung_pwm_variant s3c64xx_pwm_variant = {
+ .tclk_mask = (1 << 7) | (1 << 6) | (1 << 5),
+ };
+
+-void __init s3c64xx_set_timer_source(unsigned int event, unsigned int source)
++void __init s3c64xx_set_timer_source(enum s3c64xx_timer_mode event,
++ enum s3c64xx_timer_mode source)
+ {
+ s3c64xx_pwm_variant.output_mask = BIT(SAMSUNG_PWM_NUM) - 1;
+ s3c64xx_pwm_variant.output_mask &= ~(BIT(event) | BIT(source));
+diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
+index 1dbe98948ce30..9627c4cf3e41d 100644
+--- a/arch/arm/mach-sa1100/jornada720_ssp.c
++++ b/arch/arm/mach-sa1100/jornada720_ssp.c
+@@ -1,5 +1,5 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+-/**
++/*
+ * arch/arm/mac-sa1100/jornada720_ssp.c
+ *
+ * Copyright (C) 2006/2007 Kristoffer Ericson <Kristoffer.Ericson@gmail.com>
+@@ -26,6 +26,7 @@ static unsigned long jornada_ssp_flags;
+
+ /**
+ * jornada_ssp_reverse - reverses input byte
++ * @byte: input byte to reverse
+ *
+ * we need to reverse all data we receive from the mcu due to its physical location
+ * returns : 01110111 -> 11101110
+@@ -46,6 +47,7 @@ EXPORT_SYMBOL(jornada_ssp_reverse);
+
+ /**
+ * jornada_ssp_byte - waits for ready ssp bus and sends byte
++ * @byte: input byte to transmit
+ *
+ * waits for fifo buffer to clear and then transmits, if it doesn't then we will
+ * timeout after <timeout> rounds. Needs mcu running before its called.
+@@ -77,6 +79,7 @@ EXPORT_SYMBOL(jornada_ssp_byte);
+
+ /**
+ * jornada_ssp_inout - decide if input is command or trading byte
++ * @byte: input byte to send (may be %TXDUMMY)
+ *
+ * returns : (jornada_ssp_byte(byte)) on success
+ * : %-ETIMEDOUT on timeout failure
+diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
+index ee949255ced3f..ba44cec5e59ac 100644
+--- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
++++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
+@@ -125,6 +125,7 @@ remove:
+
+ list_for_each_entry_safe(pos, tmp, &quirk_list, list) {
+ list_del(&pos->list);
++ of_node_put(pos->np);
+ kfree(pos);
+ }
+
+@@ -154,8 +155,10 @@ static int __init rcar_gen2_regulator_quirk(void)
+ return -ENODEV;
+
+ for_each_matching_node_and_match(np, rcar_gen2_quirk_match, &id) {
+- if (!of_device_is_available(np))
++ if (!of_device_is_available(np)) {
++ of_node_put(np);
+ break;
++ }
+
+ ret = of_property_read_u32(np, "reg", &addr);
+ if (ret) /* Skip invalid entry and continue */
+@@ -164,6 +167,7 @@ static int __init rcar_gen2_regulator_quirk(void)
+ quirk = kzalloc(sizeof(*quirk), GFP_KERNEL);
+ if (!quirk) {
+ ret = -ENOMEM;
++ of_node_put(np);
+ goto err_mem;
+ }
+
+@@ -171,11 +175,12 @@ static int __init rcar_gen2_regulator_quirk(void)
+ memcpy(&quirk->i2c_msg, id->data, sizeof(quirk->i2c_msg));
+
+ quirk->id = id;
+- quirk->np = np;
++ quirk->np = of_node_get(np);
+ quirk->i2c_msg.addr = addr;
+
+ ret = of_irq_parse_one(np, 0, argsa);
+ if (ret) { /* Skip invalid entry and continue */
++ of_node_put(np);
+ kfree(quirk);
+ continue;
+ }
+@@ -222,6 +227,7 @@ err_free:
+ err_mem:
+ list_for_each_entry_safe(pos, tmp, &quirk_list, list) {
+ list_del(&pos->list);
++ of_node_put(pos->np);
+ kfree(pos);
+ }
+
+diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig
+index 43ddec677c0b3..594edf9bbea44 100644
+--- a/arch/arm/mach-socfpga/Kconfig
++++ b/arch/arm/mach-socfpga/Kconfig
+@@ -2,6 +2,7 @@
+ menuconfig ARCH_INTEL_SOCFPGA
+ bool "Altera SOCFPGA family"
+ depends on ARCH_MULTI_V7
++ select ARCH_HAS_RESET_CONTROLLER
+ select ARCH_SUPPORTS_BIG_ENDIAN
+ select ARM_AMBA
+ select ARM_GIC
+@@ -18,6 +19,7 @@ menuconfig ARCH_INTEL_SOCFPGA
+ select PL310_ERRATA_727915
+ select PL310_ERRATA_753970 if PL310
+ select PL310_ERRATA_769419
++ select RESET_CONTROLLER
+
+ if ARCH_INTEL_SOCFPGA
+ config SOCFPGA_SUSPEND
+diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
+index fc2608b18a0d0..18f01190dcfd4 100644
+--- a/arch/arm/mach-socfpga/core.h
++++ b/arch/arm/mach-socfpga/core.h
+@@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr;
+ u32 socfpga_sdram_self_refresh(u32 sdr_base);
+ extern unsigned int socfpga_sdram_self_refresh_sz;
+
+-extern char secondary_trampoline, secondary_trampoline_end;
++extern char secondary_trampoline[], secondary_trampoline_end[];
+
+ extern unsigned long socfpga_cpu1start_addr;
+
+diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c
+index fbb80b883e5dd..201191cf68f32 100644
+--- a/arch/arm/mach-socfpga/platsmp.c
++++ b/arch/arm/mach-socfpga/platsmp.c
+@@ -20,14 +20,14 @@
+
+ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
+ {
+- int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
++ int trampoline_size = secondary_trampoline_end - secondary_trampoline;
+
+ if (socfpga_cpu1start_addr) {
+ /* This will put CPU #1 into reset. */
+ writel(RSTMGR_MPUMODRST_CPU1,
+ rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST);
+
+- memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
++ memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
+
+ writel(__pa_symbol(secondary_startup),
+ sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff));
+@@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
+
+ static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle)
+ {
+- int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
++ int trampoline_size = secondary_trampoline_end - secondary_trampoline;
+
+ if (socfpga_cpu1start_addr) {
+ writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr +
+ SOCFPGA_A10_RSTMGR_MODMPURST);
+- memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
++ memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
+
+ writel(__pa_symbol(secondary_startup),
+ sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff));
+diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
+index a0554d7d04f7c..e1adc098f89ac 100644
+--- a/arch/arm/mach-vexpress/dcscb.c
++++ b/arch/arm/mach-vexpress/dcscb.c
+@@ -144,6 +144,7 @@ static int __init dcscb_init(void)
+ if (!node)
+ return -ENODEV;
+ dcscb_base = of_iomap(node, 0);
++ of_node_put(node);
+ if (!dcscb_base)
+ return -EADDRNOTAVAIL;
+ cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
+diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
+index 1da11bdb1dfbd..1c6500c4e6a17 100644
+--- a/arch/arm/mach-vexpress/spc.c
++++ b/arch/arm/mach-vexpress/spc.c
+@@ -580,7 +580,7 @@ static int __init ve_spc_clk_init(void)
+ }
+
+ cluster = topology_physical_package_id(cpu_dev->id);
+- if (init_opp_table[cluster])
++ if (cluster < 0 || init_opp_table[cluster])
+ continue;
+
+ if (ve_init_opp_table(cpu_dev))
+diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
+index e1ca6a5732d27..15e8a321a713b 100644
+--- a/arch/arm/mach-zynq/common.c
++++ b/arch/arm/mach-zynq/common.c
+@@ -77,6 +77,7 @@ static int __init zynq_get_revision(void)
+ }
+
+ zynq_devcfg_base = of_iomap(np, 0);
++ of_node_put(np);
+ if (!zynq_devcfg_base) {
+ pr_err("%s: Unable to map I/O memory\n", __func__);
+ return -1;
+diff --git a/arch/arm/mach-zynq/slcr.c b/arch/arm/mach-zynq/slcr.c
+index 37707614885a5..9765b3f4c2fc5 100644
+--- a/arch/arm/mach-zynq/slcr.c
++++ b/arch/arm/mach-zynq/slcr.c
+@@ -213,6 +213,7 @@ int __init zynq_early_slcr_init(void)
+ zynq_slcr_regmap = syscon_regmap_lookup_by_compatible("xlnx,zynq-slcr");
+ if (IS_ERR(zynq_slcr_regmap)) {
+ pr_err("%s: failed to find zynq-slcr\n", __func__);
++ of_node_put(np);
+ return -ENODEV;
+ }
+
+diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
+index 8355c38958942..f43cdc1cfbaed 100644
+--- a/arch/arm/mm/Kconfig
++++ b/arch/arm/mm/Kconfig
+@@ -750,7 +750,7 @@ config CPU_BIG_ENDIAN
+ config CPU_ENDIAN_BE8
+ bool
+ depends on CPU_BIG_ENDIAN
+- default CPU_V6 || CPU_V6K || CPU_V7
++ default CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M
+ help
+ Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors.
+
+@@ -830,6 +830,7 @@ config CPU_BPREDICT_DISABLE
+
+ config CPU_SPECTRE
+ bool
++ select GENERIC_CPU_VULNERABILITIES
+
+ config HARDEN_BRANCH_PREDICTOR
+ bool "Harden the branch predictor against aliasing attacks" if EXPERT
+@@ -850,6 +851,16 @@ config HARDEN_BRANCH_PREDICTOR
+
+ If unsure, say Y.
+
++config HARDEN_BRANCH_HISTORY
++ bool "Harden Spectre style attacks against branch history" if EXPERT
++ depends on CPU_SPECTRE
++ default y
++ help
++ Speculation attacks against some high-performance processors can
++ make use of branch history to influence future speculation. When
++ taking an exception, a sequence of branches overwrites the branch
++ history, or branch history is invalidated.
++
+ config TLS_REG_EMUL
+ bool
+ select NEED_KUSER_HELPERS
+diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
+index ea81e89e77400..bcefe3f51744c 100644
+--- a/arch/arm/mm/alignment.c
++++ b/arch/arm/mm/alignment.c
+@@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ if (type == TYPE_LDST)
+ do_alignment_finish_ldst(addr, instr, regs, offset);
+
++ if (thumb_mode(regs))
++ regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
++
+ return 0;
+
+ bad_or_fault:
+diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
+index fb688003d156e..712da6a81b23f 100644
+--- a/arch/arm/mm/dump.c
++++ b/arch/arm/mm/dump.c
+@@ -346,7 +346,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+ addr = start + i * PMD_SIZE;
+ domain = get_domain_name(pmd);
+ if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd))
+- note_page(st, addr, 3, pmd_val(*pmd), domain);
++ note_page(st, addr, 4, pmd_val(*pmd), domain);
+ else
+ walk_pte(st, pmd, addr, domain);
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index efa4020250315..af5177801fb10 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -125,7 +125,7 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+ show_pte(KERN_ALERT, mm, addr);
+ die("Oops", regs, fsr);
+ bust_spinlocks(0);
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ }
+
+ /*
+diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
+index 80fb5a4a5c050..2660bdfcad4d0 100644
+--- a/arch/arm/mm/ioremap.c
++++ b/arch/arm/mm/ioremap.c
+@@ -479,3 +479,11 @@ void __init early_ioremap_init(void)
+ {
+ early_ioremap_setup();
+ }
++
++bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
++ unsigned long flags)
++{
++ unsigned long pfn = PHYS_PFN(offset);
++
++ return memblock_is_map_memory(pfn);
++}
+diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c
+index 9c348042a7244..948ada4a2938c 100644
+--- a/arch/arm/mm/kasan_init.c
++++ b/arch/arm/mm/kasan_init.c
+@@ -226,7 +226,7 @@ void __init kasan_init(void)
+ BUILD_BUG_ON(pgd_index(KASAN_SHADOW_START) !=
+ pgd_index(KASAN_SHADOW_END));
+ memcpy(tmp_pmd_table,
+- pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)),
++ (void*)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)),
+ sizeof(tmp_pmd_table));
+ set_pgd(&tmp_pgd_table[pgd_index(KASAN_SHADOW_START)],
+ __pgd(__pa(tmp_pmd_table) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
+@@ -264,12 +264,17 @@ void __init kasan_init(void)
+
+ /*
+ * 1. The module global variables are in MODULES_VADDR ~ MODULES_END,
+- * so we need to map this area.
++ * so we need to map this area if CONFIG_KASAN_VMALLOC=n. With
++ * VMALLOC support KASAN will manage this region dynamically,
++ * refer to kasan_populate_vmalloc() and ARM's implementation of
++ * module_alloc().
+ * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR
+ * ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't
+ * use kasan_populate_zero_shadow.
+ */
+- create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE));
++ if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && IS_ENABLED(CONFIG_MODULES))
++ create_mapping((void *)MODULES_VADDR, (void *)(MODULES_END));
++ create_mapping((void *)PKMAP_BASE, (void *)(PKMAP_BASE + PMD_SIZE));
+
+ /*
+ * KAsan may reuse the contents of kasan_early_shadow_pte directly, so
+diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
+index a4e0060051070..83a91e0ab8480 100644
+--- a/arch/arm/mm/mmu.c
++++ b/arch/arm/mm/mmu.c
+@@ -212,12 +212,14 @@ early_param("ecc", early_ecc);
+ static int __init early_cachepolicy(char *p)
+ {
+ pr_warn("cachepolicy kernel parameter not supported without cp15\n");
++ return 0;
+ }
+ early_param("cachepolicy", early_cachepolicy);
+
+ static int __init noalign_setup(char *__unused)
+ {
+ pr_warn("noalign kernel parameter not supported without cp15\n");
++ return 1;
+ }
+ __setup("noalign", noalign_setup);
+
+@@ -294,6 +296,17 @@ static struct mem_type mem_types[] __ro_after_init = {
+ .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+ .domain = DOMAIN_KERNEL,
+ },
++ [MT_MEMORY_RO] = {
++ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
++ L_PTE_XN | L_PTE_RDONLY,
++ .prot_l1 = PMD_TYPE_TABLE,
++#ifdef CONFIG_ARM_LPAE
++ .prot_sect = PMD_TYPE_SECT | L_PMD_SECT_RDONLY | PMD_SECT_AP2,
++#else
++ .prot_sect = PMD_TYPE_SECT,
++#endif
++ .domain = DOMAIN_KERNEL,
++ },
+ [MT_ROM] = {
+ .prot_sect = PMD_TYPE_SECT,
+ .domain = DOMAIN_KERNEL,
+@@ -390,9 +403,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
+ BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START);
+ BUG_ON(idx >= __end_of_fixed_addresses);
+
+- /* we only support device mappings until pgprot_kernel has been set */
++ /* We support only device mappings before pgprot_kernel is set. */
+ if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
+- pgprot_val(pgprot_kernel) == 0))
++ pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0))
+ return;
+
+ if (pgprot_val(prot))
+@@ -487,6 +500,7 @@ static void __init build_mem_type_table(void)
+
+ /* Also setup NX memory mapping */
+ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
++ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN;
+ }
+ if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+ /*
+@@ -566,6 +580,7 @@ static void __init build_mem_type_table(void)
+ mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+ mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+ mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
++ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+ #endif
+
+ /*
+@@ -585,6 +600,8 @@ static void __init build_mem_type_table(void)
+ mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
+ mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
++ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S;
++ mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
+ mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
+@@ -645,6 +662,8 @@ static void __init build_mem_type_table(void)
+ mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
+ mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
++ mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd;
++ mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
+ mem_types[MT_ROM].prot_sect |= cp->pmd;
+@@ -1358,7 +1377,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
+ map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK);
+ map.virtual = FDT_FIXED_BASE;
+ map.length = FDT_FIXED_SIZE;
+- map.type = MT_ROM;
++ map.type = MT_MEMORY_RO;
+ create_mapping(&map);
+ }
+
+diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
+index 2658f52903da6..80613674deb5b 100644
+--- a/arch/arm/mm/nommu.c
++++ b/arch/arm/mm/nommu.c
+@@ -26,6 +26,13 @@
+
+ unsigned long vectors_base;
+
++/*
++ * empty_zero_page is a special page that is used for
++ * zero-initialized data and COW.
++ */
++struct page *empty_zero_page;
++EXPORT_SYMBOL(empty_zero_page);
++
+ #ifdef CONFIG_ARM_MPU
+ struct mpu_rgn_info mpu_rgn_info;
+ #endif
+@@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void)
+ */
+ void __init paging_init(const struct machine_desc *mdesc)
+ {
++ void *zero_page;
++
+ early_trap_init((void *)vectors_base);
+ mpu_setup();
++
++ /* allocate the zero page. */
++ zero_page = (void *)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
++ if (!zero_page)
++ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
++ __func__, PAGE_SIZE, PAGE_SIZE);
++
+ bootmem_init();
++
++ empty_zero_page = virt_to_page(zero_page);
++ flush_dcache_page(empty_zero_page);
+ }
+
+ /*
+diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
+index 114c05ab4dd91..8bc7a2d6d6c7f 100644
+--- a/arch/arm/mm/proc-v7-bugs.c
++++ b/arch/arm/mm/proc-v7-bugs.c
+@@ -6,8 +6,35 @@
+ #include <asm/cp15.h>
+ #include <asm/cputype.h>
+ #include <asm/proc-fns.h>
++#include <asm/spectre.h>
+ #include <asm/system_misc.h>
+
++#ifdef CONFIG_ARM_PSCI
++static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
++{
++ struct arm_smccc_res res;
++
++ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
++ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
++
++ switch ((int)res.a0) {
++ case SMCCC_RET_SUCCESS:
++ return SPECTRE_MITIGATED;
++
++ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
++ return SPECTRE_UNAFFECTED;
++
++ default:
++ return SPECTRE_VULNERABLE;
++ }
++}
++#else
++static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
++{
++ return SPECTRE_VULNERABLE;
++}
++#endif
++
+ #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+ DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
+
+@@ -36,13 +63,60 @@ static void __maybe_unused call_hvc_arch_workaround_1(void)
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+ }
+
+-static void cpu_v7_spectre_init(void)
++static unsigned int spectre_v2_install_workaround(unsigned int method)
+ {
+ const char *spectre_v2_method = NULL;
+ int cpu = smp_processor_id();
+
+ if (per_cpu(harden_branch_predictor_fn, cpu))
+- return;
++ return SPECTRE_MITIGATED;
++
++ switch (method) {
++ case SPECTRE_V2_METHOD_BPIALL:
++ per_cpu(harden_branch_predictor_fn, cpu) =
++ harden_branch_predictor_bpiall;
++ spectre_v2_method = "BPIALL";
++ break;
++
++ case SPECTRE_V2_METHOD_ICIALLU:
++ per_cpu(harden_branch_predictor_fn, cpu) =
++ harden_branch_predictor_iciallu;
++ spectre_v2_method = "ICIALLU";
++ break;
++
++ case SPECTRE_V2_METHOD_HVC:
++ per_cpu(harden_branch_predictor_fn, cpu) =
++ call_hvc_arch_workaround_1;
++ cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
++ spectre_v2_method = "hypervisor";
++ break;
++
++ case SPECTRE_V2_METHOD_SMC:
++ per_cpu(harden_branch_predictor_fn, cpu) =
++ call_smc_arch_workaround_1;
++ cpu_do_switch_mm = cpu_v7_smc_switch_mm;
++ spectre_v2_method = "firmware";
++ break;
++ }
++
++ if (spectre_v2_method)
++ pr_info("CPU%u: Spectre v2: using %s workaround\n",
++ smp_processor_id(), spectre_v2_method);
++
++ return SPECTRE_MITIGATED;
++}
++#else
++static unsigned int spectre_v2_install_workaround(unsigned int method)
++{
++ pr_info_once("Spectre V2: workarounds disabled by configuration\n");
++
++ return SPECTRE_VULNERABLE;
++}
++#endif
++
++static void cpu_v7_spectre_v2_init(void)
++{
++ unsigned int state, method = 0;
+
+ switch (read_cpuid_part()) {
+ case ARM_CPU_PART_CORTEX_A8:
+@@ -51,69 +125,133 @@ static void cpu_v7_spectre_init(void)
+ case ARM_CPU_PART_CORTEX_A17:
+ case ARM_CPU_PART_CORTEX_A73:
+ case ARM_CPU_PART_CORTEX_A75:
+- per_cpu(harden_branch_predictor_fn, cpu) =
+- harden_branch_predictor_bpiall;
+- spectre_v2_method = "BPIALL";
++ state = SPECTRE_MITIGATED;
++ method = SPECTRE_V2_METHOD_BPIALL;
+ break;
+
+ case ARM_CPU_PART_CORTEX_A15:
+ case ARM_CPU_PART_BRAHMA_B15:
+- per_cpu(harden_branch_predictor_fn, cpu) =
+- harden_branch_predictor_iciallu;
+- spectre_v2_method = "ICIALLU";
++ state = SPECTRE_MITIGATED;
++ method = SPECTRE_V2_METHOD_ICIALLU;
+ break;
+
+-#ifdef CONFIG_ARM_PSCI
+ case ARM_CPU_PART_BRAHMA_B53:
+ /* Requires no workaround */
++ state = SPECTRE_UNAFFECTED;
+ break;
++
+ default:
+ /* Other ARM CPUs require no workaround */
+- if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
++ if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) {
++ state = SPECTRE_UNAFFECTED;
+ break;
++ }
++
+ fallthrough;
+- /* Cortex A57/A72 require firmware workaround */
+- case ARM_CPU_PART_CORTEX_A57:
+- case ARM_CPU_PART_CORTEX_A72: {
+- struct arm_smccc_res res;
+
+- arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+- if ((int)res.a0 != 0)
+- return;
++ /* Cortex A57/A72 require firmware workaround */
++ case ARM_CPU_PART_CORTEX_A57:
++ case ARM_CPU_PART_CORTEX_A72:
++ state = spectre_v2_get_cpu_fw_mitigation_state();
++ if (state != SPECTRE_MITIGATED)
++ break;
+
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+- per_cpu(harden_branch_predictor_fn, cpu) =
+- call_hvc_arch_workaround_1;
+- cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
+- spectre_v2_method = "hypervisor";
++ method = SPECTRE_V2_METHOD_HVC;
+ break;
+
+ case SMCCC_CONDUIT_SMC:
+- per_cpu(harden_branch_predictor_fn, cpu) =
+- call_smc_arch_workaround_1;
+- cpu_do_switch_mm = cpu_v7_smc_switch_mm;
+- spectre_v2_method = "firmware";
++ method = SPECTRE_V2_METHOD_SMC;
+ break;
+
+ default:
++ state = SPECTRE_VULNERABLE;
+ break;
+ }
+ }
+-#endif
++
++ if (state == SPECTRE_MITIGATED)
++ state = spectre_v2_install_workaround(method);
++
++ spectre_v2_update_state(state, method);
++}
++
++#ifdef CONFIG_HARDEN_BRANCH_HISTORY
++static int spectre_bhb_method;
++
++static const char *spectre_bhb_method_name(int method)
++{
++ switch (method) {
++ case SPECTRE_V2_METHOD_LOOP8:
++ return "loop";
++
++ case SPECTRE_V2_METHOD_BPIALL:
++ return "BPIALL";
++
++ default:
++ return "unknown";
+ }
++}
+
+- if (spectre_v2_method)
+- pr_info("CPU%u: Spectre v2: using %s workaround\n",
+- smp_processor_id(), spectre_v2_method);
++static int spectre_bhb_install_workaround(int method)
++{
++ if (spectre_bhb_method != method) {
++ if (spectre_bhb_method) {
++ pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n",
++ smp_processor_id());
++
++ return SPECTRE_VULNERABLE;
++ }
++
++ if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE)
++ return SPECTRE_VULNERABLE;
++
++ spectre_bhb_method = method;
++
++ pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n",
++ smp_processor_id(), spectre_bhb_method_name(method));
++ }
++
++ return SPECTRE_MITIGATED;
+ }
+ #else
+-static void cpu_v7_spectre_init(void)
++static int spectre_bhb_install_workaround(int method)
+ {
++ return SPECTRE_VULNERABLE;
+ }
+ #endif
+
++static void cpu_v7_spectre_bhb_init(void)
++{
++ unsigned int state, method = 0;
++
++ switch (read_cpuid_part()) {
++ case ARM_CPU_PART_CORTEX_A15:
++ case ARM_CPU_PART_BRAHMA_B15:
++ case ARM_CPU_PART_CORTEX_A57:
++ case ARM_CPU_PART_CORTEX_A72:
++ state = SPECTRE_MITIGATED;
++ method = SPECTRE_V2_METHOD_LOOP8;
++ break;
++
++ case ARM_CPU_PART_CORTEX_A73:
++ case ARM_CPU_PART_CORTEX_A75:
++ state = SPECTRE_MITIGATED;
++ method = SPECTRE_V2_METHOD_BPIALL;
++ break;
++
++ default:
++ state = SPECTRE_UNAFFECTED;
++ break;
++ }
++
++ if (state == SPECTRE_MITIGATED)
++ state = spectre_bhb_install_workaround(method);
++
++ spectre_v2_update_state(state, method);
++}
++
+ static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
+ u32 mask, const char *msg)
+ {
+@@ -142,16 +280,18 @@ static bool check_spectre_auxcr(bool *warned, u32 bit)
+ void cpu_v7_ca8_ibe(void)
+ {
+ if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)))
+- cpu_v7_spectre_init();
++ cpu_v7_spectre_v2_init();
+ }
+
+ void cpu_v7_ca15_ibe(void)
+ {
+ if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
+- cpu_v7_spectre_init();
++ cpu_v7_spectre_v2_init();
++ cpu_v7_spectre_bhb_init();
+ }
+
+ void cpu_v7_bugs_init(void)
+ {
+- cpu_v7_spectre_init();
++ cpu_v7_spectre_v2_init();
++ cpu_v7_spectre_bhb_init();
+ }
+diff --git a/arch/arm/nwfpe/Makefile b/arch/arm/nwfpe/Makefile
+index 303400fa2cdf7..2aec85ab1e8b9 100644
+--- a/arch/arm/nwfpe/Makefile
++++ b/arch/arm/nwfpe/Makefile
+@@ -11,3 +11,9 @@ nwfpe-y += fpa11.o fpa11_cpdo.o fpa11_cpdt.o \
+ entry.o
+
+ nwfpe-$(CONFIG_FPE_NWFPE_XP) += extended_cpdo.o
++
++# Try really hard to avoid generating calls to __aeabi_uldivmod() from
++# float64_rem() due to loop elision.
++ifdef CONFIG_CC_IS_CLANG
++CFLAGS_softfloat.o += -mllvm -replexitval=never
++endif
+diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h
+index 9731735989921..facc889d05eee 100644
+--- a/arch/arm/probes/decode.h
++++ b/arch/arm/probes/decode.h
+@@ -14,6 +14,7 @@
+ #include <linux/types.h>
+ #include <linux/stddef.h>
+ #include <asm/probes.h>
++#include <asm/ptrace.h>
+ #include <asm/kprobes.h>
+
+ void __init arm_probes_decode_init(void);
+@@ -35,31 +36,6 @@ void __init find_str_pc_offset(void);
+ #endif
+
+
+-/*
+- * Update ITSTATE after normal execution of an IT block instruction.
+- *
+- * The 8 IT state bits are split into two parts in CPSR:
+- * ITSTATE<1:0> are in CPSR<26:25>
+- * ITSTATE<7:2> are in CPSR<15:10>
+- */
+-static inline unsigned long it_advance(unsigned long cpsr)
+- {
+- if ((cpsr & 0x06000400) == 0) {
+- /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
+- cpsr &= ~PSR_IT_MASK;
+- } else {
+- /* We need to shift left ITSTATE<4:0> */
+- const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
+- unsigned long it = cpsr & mask;
+- it <<= 1;
+- it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
+- it &= mask;
+- cpsr &= ~mask;
+- cpsr |= it;
+- }
+- return cpsr;
+-}
+-
+ static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs)
+ {
+ long cpsr = regs->ARM_cpsr;
+diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile
+index 14db56f49f0a3..6159010dac4a6 100644
+--- a/arch/arm/probes/kprobes/Makefile
++++ b/arch/arm/probes/kprobes/Makefile
+@@ -1,4 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
++KASAN_SANITIZE_actions-common.o := n
++KASAN_SANITIZE_actions-arm.o := n
++KASAN_SANITIZE_actions-thumb.o := n
+ obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o
+ obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o
+ test-kprobes-objs := test-core.o
+diff --git a/arch/arm/probes/kprobes/checkers-common.c b/arch/arm/probes/kprobes/checkers-common.c
+index 4d720990cf2a3..eba7ac4725c02 100644
+--- a/arch/arm/probes/kprobes/checkers-common.c
++++ b/arch/arm/probes/kprobes/checkers-common.c
+@@ -40,7 +40,7 @@ enum probes_insn checker_stack_use_imm_0xx(probes_opcode_t insn,
+ * Different from other insn uses imm8, the real addressing offset of
+ * STRD in T32 encoding should be imm8 * 4. See ARMARM description.
+ */
+-enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn,
++static enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn,
+ struct arch_probes_insn *asi,
+ const struct decode_header *h)
+ {
+diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
+index 9d8634e2f12f7..3bd017f6e256a 100644
+--- a/arch/arm/probes/kprobes/core.c
++++ b/arch/arm/probes/kprobes/core.c
+@@ -11,6 +11,8 @@
+ * Copyright (C) 2007 Marvell Ltd.
+ */
+
++#define pr_fmt(fmt) "kprobes: " fmt
++
+ #include <linux/kernel.h>
+ #include <linux/kprobes.h>
+ #include <linux/module.h>
+@@ -231,7 +233,7 @@ singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+ * kprobe, and that level is reserved for user kprobe handlers, so we can't
+ * risk encountering a new kprobe in an interrupt handler.
+ */
+-void __kprobes kprobe_handler(struct pt_regs *regs)
++static void __kprobes kprobe_handler(struct pt_regs *regs)
+ {
+ struct kprobe *p, *cur;
+ struct kprobe_ctlblk *kcb;
+@@ -278,7 +280,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
+ break;
+ case KPROBE_REENTER:
+ /* A nested probe was hit in FIQ, it is a BUG */
+- pr_warn("Unrecoverable kprobe detected.\n");
++ pr_warn("Failed to recover from reentered kprobes.\n");
+ dump_kprobe(p);
+ fallthrough;
+ default:
+diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
+index c78180172120f..e20304f1d8bc9 100644
+--- a/arch/arm/probes/kprobes/opt-arm.c
++++ b/arch/arm/probes/kprobes/opt-arm.c
+@@ -145,8 +145,6 @@ __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+ }
+ }
+
+-extern void kprobe_handler(struct pt_regs *regs);
+-
+ static void
+ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+ {
+diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
+index c562832b86272..171c7076b89f4 100644
+--- a/arch/arm/probes/kprobes/test-core.c
++++ b/arch/arm/probes/kprobes/test-core.c
+@@ -720,7 +720,7 @@ static const char coverage_register_lookup[16] = {
+ [REG_TYPE_NOSPPCX] = COVERAGE_ANY_REG | COVERAGE_SP,
+ };
+
+-unsigned coverage_start_registers(const struct decode_header *h)
++static unsigned coverage_start_registers(const struct decode_header *h)
+ {
+ unsigned regs = 0;
+ int i;
+diff --git a/arch/arm/probes/kprobes/test-core.h b/arch/arm/probes/kprobes/test-core.h
+index f1d5583e7bbbb..7054d9fae2ea0 100644
+--- a/arch/arm/probes/kprobes/test-core.h
++++ b/arch/arm/probes/kprobes/test-core.h
+@@ -454,3 +454,7 @@ void kprobe_thumb32_test_cases(void);
+ #else
+ void kprobe_arm_test_cases(void);
+ #endif
++
++void __kprobes_test_case_start(void);
++void __kprobes_test_case_end_16(void);
++void __kprobes_test_case_end_32(void);
+diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
+index 84a1cea1f43b9..309648c17f486 100644
+--- a/arch/arm/xen/p2m.c
++++ b/arch/arm/xen/p2m.c
+@@ -63,11 +63,12 @@ out:
+
+ unsigned long __pfn_to_mfn(unsigned long pfn)
+ {
+- struct rb_node *n = phys_to_mach.rb_node;
++ struct rb_node *n;
+ struct xen_p2m_entry *entry;
+ unsigned long irqflags;
+
+ read_lock_irqsave(&p2m_lock, irqflags);
++ n = phys_to_mach.rb_node;
+ while (n) {
+ entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
+ if (entry->pfn <= pfn &&
+@@ -152,10 +153,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn,
+ int rc;
+ unsigned long irqflags;
+ struct xen_p2m_entry *p2m_entry;
+- struct rb_node *n = phys_to_mach.rb_node;
++ struct rb_node *n;
+
+ if (mfn == INVALID_P2M_ENTRY) {
+ write_lock_irqsave(&p2m_lock, irqflags);
++ n = phys_to_mach.rb_node;
+ while (n) {
+ p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
+ if (p2m_entry->pfn <= pfn &&
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index fee914c716aa2..5ab4b0520eabb 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -154,7 +154,6 @@ config ARM64
+ select HAVE_ARCH_KGDB
+ select HAVE_ARCH_MMAP_RND_BITS
+ select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+- select HAVE_ARCH_PFN_VALID
+ select HAVE_ARCH_PREL32_RELOCATIONS
+ select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+ select HAVE_ARCH_SECCOMP_FILTER
+@@ -221,6 +220,7 @@ config ARM64
+ select THREAD_INFO_IN_TASK
+ select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
+ select TRACE_IRQFLAGS_SUPPORT
++ select TRACE_IRQFLAGS_NMI_SUPPORT
+ help
+ ARM 64-bit (AArch64) Linux support.
+
+@@ -487,6 +487,22 @@ config ARM64_ERRATUM_834220
+
+ If unsure, say Y.
+
++config ARM64_ERRATUM_1742098
++ bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
++ depends on COMPAT
++ default y
++ help
++ This option removes the AES hwcap for aarch32 user-space to
++ workaround erratum 1742098 on Cortex-A57 and Cortex-A72.
++
++ Affected parts may corrupt the AES state if an interrupt is
++ taken between a pair of AES instructions. These instructions
++ are only present if the cryptography extensions are present.
++ All software should have a fallback implementation for CPUs
++ that don't implement the cryptography extensions.
++
++ If unsure, say Y.
++
+ config ARM64_ERRATUM_845719
+ bool "Cortex-A53: 845719: a load might read incorrect data"
+ depends on COMPAT
+@@ -596,6 +612,23 @@ config ARM64_ERRATUM_1530923
+ config ARM64_WORKAROUND_REPEAT_TLBI
+ bool
+
++config ARM64_ERRATUM_2441007
++ bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
++ default y
++ select ARM64_WORKAROUND_REPEAT_TLBI
++ help
++ This option adds a workaround for ARM Cortex-A55 erratum #2441007.
++
++ Under very rare circumstances, affected Cortex-A55 CPUs
++ may not handle a race between a break-before-make sequence on one
++ CPU, and another CPU accessing the same page. This could allow a
++ store to a page that has been unmapped.
++
++ Work around this by adding the affected CPUs to the list that needs
++ TLB sequences to be done twice.
++
++ If unsure, say Y.
++
+ config ARM64_ERRATUM_1286807
+ bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
+ default y
+@@ -666,6 +699,155 @@ config ARM64_ERRATUM_1508412
+
+ If unsure, say Y.
+
++config ARM64_ERRATUM_2441009
++ bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
++ default y
++ select ARM64_WORKAROUND_REPEAT_TLBI
++ help
++ This option adds a workaround for ARM Cortex-A510 erratum #2441009.
++
++ Under very rare circumstances, affected Cortex-A510 CPUs
++ may not handle a race between a break-before-make sequence on one
++ CPU, and another CPU accessing the same page. This could allow a
++ store to a page that has been unmapped.
++
++ Work around this by adding the affected CPUs to the list that needs
++ TLB sequences to be done twice.
++
++ If unsure, say Y.
++
++config ARM64_ERRATUM_2457168
++ bool "Cortex-A510: 2457168: workaround for AMEVCNTR01 incrementing incorrectly"
++ depends on ARM64_AMU_EXTN
++ default y
++ help
++ This option adds the workaround for ARM Cortex-A510 erratum 2457168.
++
++ The AMU counter AMEVCNTR01 (constant counter) should increment at the same rate
++ as the system counter. On affected Cortex-A510 cores AMEVCNTR01 increments
++ incorrectly giving a significantly higher output value.
++
++ Work around this problem by returning 0 when reading the affected counter in
++ key locations that results in disabling all users of this counter. This effect
++ is the same to firmware disabling affected counters.
++
++ If unsure, say Y.
++
++config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
++ bool
++
++config ARM64_ERRATUM_2119858
++ bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode"
++ default y
++ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
++ depends on CORESIGHT_TRBE
++ select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
++ help
++ This option adds the workaround for ARM Cortex-A710 erratum 2119858.
++
++ Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace
++ data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
++ the event of a WRAP event.
++
++ Work around the issue by always making sure we move the TRBPTR_EL1 by
++ 256 bytes before enabling the buffer and filling the first 256 bytes of
++ the buffer with ETM ignore packets upon disabling.
++
++ If unsure, say Y.
++
++config ARM64_ERRATUM_2139208
++ bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode"
++ default y
++ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
++ depends on CORESIGHT_TRBE
++ select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
++ help
++ This option adds the workaround for ARM Neoverse-N2 erratum 2139208.
++
++ Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace
++ data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
++ the event of a WRAP event.
++
++ Work around the issue by always making sure we move the TRBPTR_EL1 by
++ 256 bytes before enabling the buffer and filling the first 256 bytes of
++ the buffer with ETM ignore packets upon disabling.
++
++ If unsure, say Y.
++
++config ARM64_WORKAROUND_TSB_FLUSH_FAILURE
++ bool
++
++config ARM64_ERRATUM_2054223
++ bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace"
++ default y
++ select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
++ help
++ Enable workaround for ARM Cortex-A710 erratum 2054223
++
++ Affected cores may fail to flush the trace data on a TSB instruction, when
++ the PE is in trace prohibited state. This will cause losing a few bytes
++ of the trace cached.
++
++ Workaround is to issue two TSB consecutively on affected cores.
++
++ If unsure, say Y.
++
++config ARM64_ERRATUM_2067961
++ bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace"
++ default y
++ select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
++ help
++ Enable workaround for ARM Neoverse-N2 erratum 2067961
++
++ Affected cores may fail to flush the trace data on a TSB instruction, when
++ the PE is in trace prohibited state. This will cause losing a few bytes
++ of the trace cached.
++
++ Workaround is to issue two TSB consecutively on affected cores.
++
++ If unsure, say Y.
++
++config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
++ bool
++
++config ARM64_ERRATUM_2253138
++ bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range"
++ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
++ depends on CORESIGHT_TRBE
++ default y
++ select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
++ help
++ This option adds the workaround for ARM Neoverse-N2 erratum 2253138.
++
++ Affected Neoverse-N2 cores might write to an out-of-range address, not reserved
++ for TRBE. Under some conditions, the TRBE might generate a write to the next
++ virtually addressed page following the last page of the TRBE address space
++ (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
++
++ Work around this in the driver by always making sure that there is a
++ page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
++
++ If unsure, say Y.
++
++config ARM64_ERRATUM_2224489
++ bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range"
++ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
++ depends on CORESIGHT_TRBE
++ default y
++ select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
++ help
++ This option adds the workaround for ARM Cortex-A710 erratum 2224489.
++
++ Affected Cortex-A710 cores might write to an out-of-range address, not reserved
++ for TRBE. Under some conditions, the TRBE might generate a write to the next
++ virtually addressed page following the last page of the TRBE address space
++ (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
++
++ Work around this in the driver by always making sure that there is a
++ page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
++
++ If unsure, say Y.
++
+ config CAVIUM_ERRATUM_22375
+ bool "Cavium erratum 22375, 24313"
+ default y
+@@ -1053,9 +1235,6 @@ config HW_PERF_EVENTS
+ def_bool y
+ depends on ARM_PMU
+
+-config ARCH_HAS_FILTER_PGPROT
+- def_bool y
+-
+ # Supported by clang >= 7.0
+ config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+@@ -1184,6 +1363,15 @@ config UNMAP_KERNEL_AT_EL0
+
+ If unsure, say Y.
+
++config MITIGATE_SPECTRE_BRANCH_HISTORY
++ bool "Mitigate Spectre style attacks against branch history" if EXPERT
++ default y
++ help
++ Speculation attacks against some high-performance processors can
++ make use of branch history to influence future speculation.
++ When taking an exception from user-space, a sequence of branches
++ or a firmware call overwrites the branch history.
++
+ config RODATA_FULL_DEFAULT_ENABLED
+ bool "Apply r/o permissions of VM areas also to their linear aliases"
+ default y
+@@ -1264,7 +1452,8 @@ config KUSER_HELPERS
+
+ config COMPAT_VDSO
+ bool "Enable vDSO for 32-bit applications"
+- depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != ""
++ depends on !CPU_BIG_ENDIAN
++ depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != ""
+ select GENERIC_COMPAT_VDSO
+ default y
+ help
+@@ -1602,6 +1791,8 @@ config ARM64_BTI_KERNEL
+ depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
+ depends on !CC_IS_GCC || GCC_VERSION >= 100100
++ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106671
++ depends on !CC_IS_GCC
+ # https://github.com/llvm/llvm-project/commit/a88c722e687e6780dcd6a58718350dc76fcc4cc9
+ depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
+ depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
+diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
+index b0ce18d4cc98c..d7772a4c34fe7 100644
+--- a/arch/arm64/Kconfig.platforms
++++ b/arch/arm64/Kconfig.platforms
+@@ -259,6 +259,7 @@ config ARCH_INTEL_SOCFPGA
+
+ config ARCH_SYNQUACER
+ bool "Socionext SynQuacer SoC Family"
++ select IRQ_FASTEOI_HIERARCHY_HANDLERS
+
+ config ARCH_TEGRA
+ bool "NVIDIA Tegra SoC Family"
+diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi
+index cc321c04f1219..f6d7d7f7fdabe 100644
+--- a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi
++++ b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi
+@@ -343,19 +343,19 @@
+ };
+
+ thermal-zones {
+- cpu-thermal-zone {
++ cpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&ths 0>;
+ };
+
+- ddr-thermal-zone {
++ ddr-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&ths 2>;
+ };
+
+- gpu-thermal-zone {
++ gpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&ths 1>;
+diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi
+index 578c37490d901..e39db51eb4489 100644
+--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi
++++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi
+@@ -4,7 +4,7 @@
+ */
+
+ / {
+- cpu0_opp_table: opp_table0 {
++ cpu0_opp_table: opp-table-cpu {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
+index 097a5511523ad..09eee653d5caa 100644
+--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
++++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
+@@ -40,7 +40,7 @@
+ leds {
+ compatible = "gpio-leds";
+
+- status {
++ led-0 {
+ label = "orangepi:green:status";
+ gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */
+ };
+diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi
+index b2657201957eb..1afad8b437d72 100644
+--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi
++++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi
+@@ -2,7 +2,7 @@
+ // Copyright (C) 2020 Chen-Yu Tsai <wens@csie.org>
+
+ / {
+- cpu_opp_table: cpu-opp-table {
++ cpu_opp_table: opp-table-cpu {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts
+index d13980ed7a79a..7ec5ac850a0dc 100644
+--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts
++++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts
+@@ -69,7 +69,7 @@
+ pinctrl-0 = <&emac_rgmii_pins>;
+ phy-supply = <&reg_gmac_3v3>;
+ phy-handle = <&ext_rgmii_phy>;
+- phy-mode = "rgmii";
++ phy-mode = "rgmii-id";
+ status = "okay";
+ };
+
+diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
+index 578a63dedf466..9988e87ea7b3d 100644
+--- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
++++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
+@@ -217,7 +217,7 @@
+ };
+ };
+
+- gpu_thermal {
++ gpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&ths 1>;
+diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi
+index 8c6e8536b69fa..0baf0f8e4d272 100644
+--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi
++++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi
+@@ -3,7 +3,7 @@
+ // Copyright (C) 2020 Clément Péron <peron.clem@gmail.com>
+
+ / {
+- cpu_opp_table: cpu-opp-table {
++ cpu_opp_table: opp-table-cpu {
+ compatible = "allwinner,sun50i-h6-operating-points";
+ nvmem-cells = <&cpu_speed_grade>;
+ opp-shared;
+diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+index d301ac0d406bf..3ec301bd08a91 100644
+--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
++++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+@@ -594,7 +594,7 @@
+ };
+
+ qspi: spi@ff8d2000 {
+- compatible = "cdns,qspi-nor";
++ compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xff8d2000 0x100>,
+diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+index 46e558ab7729b..f0e8af12442a4 100644
+--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
++++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+@@ -129,7 +129,7 @@
+ status = "okay";
+ clock-frequency = <100000>;
+ i2c-sda-falling-time-ns = <890>; /* hcnt */
+- i2c-sdl-falling-time-ns = <890>; /* lcnt */
++ i2c-scl-falling-time-ns = <890>; /* lcnt */
+
+ adc@14 {
+ compatible = "lltc,ltc2497";
+diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+index f9b4a39683cf4..92ac3c86ebd56 100644
+--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
++++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+@@ -162,7 +162,7 @@
+ status = "okay";
+ clock-frequency = <100000>;
+ i2c-sda-falling-time-ns = <890>; /* hcnt */
+- i2c-sdl-falling-time-ns = <890>; /* lcnt */
++ i2c-scl-falling-time-ns = <890>; /* lcnt */
+
+ adc@14 {
+ compatible = "lltc,ltc2497";
+diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
+index 3f5254eeb47b1..db5a1f4653135 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
+@@ -152,7 +152,7 @@
+ scpi_clocks: clocks {
+ compatible = "arm,scpi-clocks";
+
+- scpi_dvfs: clock-controller {
++ scpi_dvfs: clocks-0 {
+ compatible = "arm,scpi-dvfs-clocks";
+ #clock-cells = <1>;
+ clock-indices = <0>;
+@@ -161,7 +161,7 @@
+ };
+
+ scpi_sensors: sensors {
+- compatible = "amlogic,meson-gxbb-scpi-sensors";
++ compatible = "amlogic,meson-gxbb-scpi-sensors", "arm,scpi-sensors";
+ #thermal-sensor-cells = <1>;
+ };
+ };
+@@ -1885,7 +1885,7 @@
+ sd_emmc_b: sd@5000 {
+ compatible = "amlogic,meson-axg-mmc";
+ reg = <0x0 0x5000 0x0 0x800>;
+- interrupts = <GIC_SPI 217 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ clocks = <&clkc CLKID_SD_EMMC_B>,
+ <&clkc CLKID_SD_EMMC_B_CLK0>,
+@@ -1897,7 +1897,7 @@
+ sd_emmc_c: mmc@7000 {
+ compatible = "amlogic,meson-axg-mmc";
+ reg = <0x0 0x7000 0x0 0x800>;
+- interrupts = <GIC_SPI 218 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ clocks = <&clkc CLKID_SD_EMMC_C>,
+ <&clkc CLKID_SD_EMMC_C_CLK0>,
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+index 00c6f53290d43..369334076467a 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+@@ -58,7 +58,7 @@
+ secure-monitor = <&sm>;
+ };
+
+- gpu_opp_table: gpu-opp-table {
++ gpu_opp_table: opp-table-gpu {
+ compatible = "operating-points-v2";
+
+ opp-124999998 {
+@@ -107,6 +107,12 @@
+ no-map;
+ };
+
++ /* 32 MiB reserved for ARM Trusted Firmware (BL32) */
++ secmon_reserved_bl32: secmon@5300000 {
++ reg = <0x0 0x05300000 0x0 0x2000000>;
++ no-map;
++ };
++
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+@@ -1604,10 +1610,9 @@
+
+ dmc: bus@38000 {
+ compatible = "simple-bus";
+- reg = <0x0 0x38000 0x0 0x400>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+- ranges = <0x0 0x0 0x0 0x38000 0x0 0x400>;
++ ranges = <0x0 0x0 0x0 0x38000 0x0 0x2000>;
+
+ canvas: video-lut@48 {
+ compatible = "amlogic,canvas";
+@@ -1727,7 +1732,7 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+- internal_ephy: ethernet_phy@8 {
++ internal_ephy: ethernet-phy@8 {
+ compatible = "ethernet-phy-id0180.3301",
+ "ethernet-phy-ieee802.3-c22";
+ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
+@@ -2324,7 +2329,7 @@
+ sd_emmc_a: sd@ffe03000 {
+ compatible = "amlogic,meson-axg-mmc";
+ reg = <0x0 0xffe03000 0x0 0x800>;
+- interrupts = <GIC_SPI 189 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ clocks = <&clkc CLKID_SD_EMMC_A>,
+ <&clkc CLKID_SD_EMMC_A_CLK0>,
+@@ -2336,7 +2341,7 @@
+ sd_emmc_b: sd@ffe05000 {
+ compatible = "amlogic,meson-axg-mmc";
+ reg = <0x0 0xffe05000 0x0 0x800>;
+- interrupts = <GIC_SPI 190 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ clocks = <&clkc CLKID_SD_EMMC_B>,
+ <&clkc CLKID_SD_EMMC_B_CLK0>,
+@@ -2348,7 +2353,7 @@
+ sd_emmc_c: mmc@ffe07000 {
+ compatible = "amlogic,meson-axg-mmc";
+ reg = <0x0 0xffe07000 0x0 0x800>;
+- interrupts = <GIC_SPI 191 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ clocks = <&clkc CLKID_SD_EMMC_C>,
+ <&clkc CLKID_SD_EMMC_C_CLK0>,
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
+index 81269ccc24968..4fb31c2ba31c4 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
+@@ -139,7 +139,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&dc_in>;
++ pwm-supply = <&dc_in>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -157,14 +157,6 @@
+ regulator-always-on;
+ };
+
+- reserved-memory {
+- /* TEE Reserved Memory */
+- bl32_reserved: bl32@5000000 {
+- reg = <0x0 0x05300000 0x0 0x2000000>;
+- no-map;
+- };
+- };
+-
+ sdio_pwrseq: sdio-pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts
+index a26bfe72550fe..4b5d11e56364d 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts
+@@ -139,7 +139,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&main_12v>;
++ pwm-supply = <&main_12v>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
+index 579f3d02d613e..b4e86196e3468 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts
+@@ -139,7 +139,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&dc_in>;
++ pwm-supply = <&dc_in>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi
+index fb0ab27d1f642..6eaceb717d617 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi
+@@ -57,26 +57,6 @@
+ compatible = "operating-points-v2";
+ opp-shared;
+
+- opp-100000000 {
+- opp-hz = /bits/ 64 <100000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-250000000 {
+- opp-hz = /bits/ 64 <250000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-500000000 {
+- opp-hz = /bits/ 64 <500000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-667000000 {
+- opp-hz = /bits/ 64 <666666666>;
+- opp-microvolt = <731000>;
+- };
+-
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <731000>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi
+index d61f43052a344..8e9ad1e51d665 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi
+@@ -11,26 +11,6 @@
+ compatible = "operating-points-v2";
+ opp-shared;
+
+- opp-100000000 {
+- opp-hz = /bits/ 64 <100000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-250000000 {
+- opp-hz = /bits/ 64 <250000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-500000000 {
+- opp-hz = /bits/ 64 <500000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-667000000 {
+- opp-hz = /bits/ 64 <667000000>;
+- opp-microvolt = <731000>;
+- };
+-
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <761000>;
+@@ -71,26 +51,6 @@
+ compatible = "operating-points-v2";
+ opp-shared;
+
+- opp-100000000 {
+- opp-hz = /bits/ 64 <100000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-250000000 {
+- opp-hz = /bits/ 64 <250000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-500000000 {
+- opp-hz = /bits/ 64 <500000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-667000000 {
+- opp-hz = /bits/ 64 <667000000>;
+- opp-microvolt = <731000>;
+- };
+-
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <731000>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi
+index f42cf4b8af2d4..16dd409051b40 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi
+@@ -18,7 +18,7 @@
+ regulator-min-microvolt = <690000>;
+ regulator-max-microvolt = <1050000>;
+
+- vin-supply = <&dc_in>;
++ pwm-supply = <&dc_in>;
+
+ pwms = <&pwm_ab 0 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -37,7 +37,7 @@
+ regulator-min-microvolt = <690000>;
+ regulator-max-microvolt = <1050000>;
+
+- vin-supply = <&vsys_3v3>;
++ pwm-supply = <&vsys_3v3>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
+index 344573e157a7b..d33e54b5e1969 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
+@@ -17,7 +17,7 @@
+ rtc1 = &vrtc;
+ };
+
+- dioo2133: audio-amplifier-0 {
++ dio2133: audio-amplifier-0 {
+ compatible = "simple-audio-amplifier";
+ enable-gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_HIGH>;
+ VCC-supply = <&vcc_5v>;
+@@ -130,7 +130,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&main_12v>;
++ pwm-supply = <&main_12v>;
+
+ pwms = <&pwm_ab 0 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -149,7 +149,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&main_12v>;
++ pwm-supply = <&main_12v>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -217,7 +217,7 @@
+ audio-widgets = "Line", "Lineout";
+ audio-aux-devs = <&tdmout_b>, <&tdmout_c>, <&tdmin_a>,
+ <&tdmin_b>, <&tdmin_c>, <&tdmin_lb>,
+- <&dioo2133>;
++ <&dio2133>;
+ audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1",
+ "TDMOUT_B IN 1", "FRDDR_B OUT 1",
+ "TDMOUT_B IN 2", "FRDDR_C OUT 1",
+@@ -607,7 +607,7 @@
+ pinctrl-0 = <&nor_pins>;
+ pinctrl-names = "default";
+
+- mx25u64: spi-flash@0 {
++ mx25u64: flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "mxicy,mx25u6435f", "jedec,spi-nor";
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi
+index 1e5d0ee5d541b..44c23c984034c 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi
+@@ -11,26 +11,6 @@
+ compatible = "operating-points-v2";
+ opp-shared;
+
+- opp-100000000 {
+- opp-hz = /bits/ 64 <100000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-250000000 {
+- opp-hz = /bits/ 64 <250000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-500000000 {
+- opp-hz = /bits/ 64 <500000000>;
+- opp-microvolt = <731000>;
+- };
+-
+- opp-667000000 {
+- opp-hz = /bits/ 64 <667000000>;
+- opp-microvolt = <731000>;
+- };
+-
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <731000>;
+@@ -76,26 +56,6 @@
+ compatible = "operating-points-v2";
+ opp-shared;
+
+- opp-100000000 {
+- opp-hz = /bits/ 64 <100000000>;
+- opp-microvolt = <751000>;
+- };
+-
+- opp-250000000 {
+- opp-hz = /bits/ 64 <250000000>;
+- opp-microvolt = <751000>;
+- };
+-
+- opp-500000000 {
+- opp-hz = /bits/ 64 <500000000>;
+- opp-microvolt = <751000>;
+- };
+-
+- opp-667000000 {
+- opp-hz = /bits/ 64 <667000000>;
+- opp-microvolt = <751000>;
+- };
+-
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <771000>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi
+index feb0885047400..b40d2c1002c92 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi
+@@ -96,7 +96,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&main_12v>;
++ pwm-supply = <&main_12v>;
+
+ pwms = <&pwm_ab 0 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -115,7 +115,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&main_12v>;
++ pwm-supply = <&main_12v>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi
+index 2d7032f41e4b5..772c220c8f496 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-gx-libretech-pc.dtsi
+@@ -17,7 +17,7 @@
+ io-channel-names = "buttons";
+ keyup-threshold-microvolt = <1800000>;
+
+- update-button {
++ button-update {
+ label = "update";
+ linux,code = <KEY_VENDOR>;
+ press-threshold-microvolt = <1300000>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+index 6b457b2c30a4b..32cc9fab4490f 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+@@ -49,6 +49,12 @@
+ no-map;
+ };
+
++ /* 32 MiB reserved for ARM Trusted Firmware (BL32) */
++ secmon_reserved_bl32: secmon@5300000 {
++ reg = <0x0 0x05300000 0x0 0x2000000>;
++ no-map;
++ };
++
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+@@ -226,7 +232,7 @@
+ reg = <0x14 0x10>;
+ };
+
+- eth_mac: eth_mac@34 {
++ eth_mac: eth-mac@34 {
+ reg = <0x34 0x10>;
+ };
+
+@@ -243,7 +249,7 @@
+ scpi_clocks: clocks {
+ compatible = "arm,scpi-clocks";
+
+- scpi_dvfs: scpi_clocks@0 {
++ scpi_dvfs: clocks-0 {
+ compatible = "arm,scpi-dvfs-clocks";
+ #clock-cells = <1>;
+ clock-indices = <0>;
+@@ -525,7 +531,7 @@
+ #size-cells = <2>;
+ ranges = <0x0 0x0 0x0 0xc8834000 0x0 0x2000>;
+
+- hwrng: rng {
++ hwrng: rng@0 {
+ compatible = "amlogic,meson-rng";
+ reg = <0x0 0x0 0x0 0x4>;
+ };
+@@ -596,21 +602,21 @@
+ sd_emmc_a: mmc@70000 {
+ compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc";
+ reg = <0x0 0x70000 0x0 0x800>;
+- interrupts = <GIC_SPI 216 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 216 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ sd_emmc_b: mmc@72000 {
+ compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc";
+ reg = <0x0 0x72000 0x0 0x800>;
+- interrupts = <GIC_SPI 217 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ sd_emmc_c: mmc@74000 {
+ compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc";
+ reg = <0x0 0x74000 0x0 0x800>;
+- interrupts = <GIC_SPI 218 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+ };
+diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-kii-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-kii-pro.dts
+index e8394a8269ee1..802faf7e4e3cb 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-kii-pro.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-kii-pro.dts
+@@ -16,7 +16,7 @@
+
+ leds {
+ compatible = "gpio-leds";
+- status {
++ led {
+ gpios = <&gpio_ao GPIOAO_13 GPIO_ACTIVE_LOW>;
+ default-state = "off";
+ color = <LED_COLOR_ID_RED>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
+index a350fee1264d7..a4d34398da358 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi
+@@ -6,6 +6,7 @@
+ */
+
+ #include "meson-gxbb.dtsi"
++#include <dt-bindings/gpio/gpio.h>
+
+ / {
+ aliases {
+@@ -64,6 +65,7 @@
+ regulator-name = "VDDIO_AO18";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
++ regulator-always-on;
+ };
+
+ vcc_3v3: regulator-vcc_3v3 {
+@@ -161,6 +163,7 @@
+ status = "okay";
+ pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>;
+ pinctrl-names = "default";
++ hdmi-supply = <&vddio_ao18>;
+ };
+
+ &hdmi_tx_tmds_port {
+diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-phicomm-n1.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-phicomm-n1.dts
+index 9ef210f17b4aa..393d3cb33b9ee 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-phicomm-n1.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-phicomm-n1.dts
+@@ -18,7 +18,7 @@
+ leds {
+ compatible = "gpio-leds";
+
+- status {
++ led {
+ label = "n1:white:status";
+ gpios = <&gpio_ao GPIOAO_9 GPIO_ACTIVE_HIGH>;
+ default-state = "on";
+diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-sml5442tw.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-sml5442tw.dts
+index b331a013572f3..c490dbbf063bf 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-sml5442tw.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-sml5442tw.dts
+@@ -79,6 +79,5 @@
+ enable-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>;
+ max-speed = <2000000>;
+ clocks = <&wifi32k>;
+- clock-names = "lpo";
+ };
+ };
+diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+index c3ac531c4f84a..3500229350522 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+@@ -759,7 +759,7 @@
+ };
+ };
+
+- eth-phy-mux {
++ eth-phy-mux@55c {
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
+index effaa138b5f98..38ebe98ba9c6b 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
+@@ -17,13 +17,13 @@
+ compatible = "bananapi,bpi-m5", "amlogic,sm1";
+ model = "Banana Pi BPI-M5";
+
+- adc_keys {
++ adc-keys {
+ compatible = "adc-keys";
+ io-channels = <&saradc 2>;
+ io-channel-names = "buttons";
+ keyup-threshold-microvolt = <1800000>;
+
+- key {
++ button-sw3 {
+ label = "SW3";
+ linux,code = <BTN_3>;
+ press-threshold-microvolt = <1700000>;
+@@ -123,7 +123,7 @@
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+
+- enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>;
++ enable-gpio = <&gpio_ao GPIOE_2 GPIO_OPEN_DRAIN>;
+ enable-active-high;
+ regulator-always-on;
+
+@@ -173,7 +173,7 @@
+ regulator-min-microvolt = <690000>;
+ regulator-max-microvolt = <1050000>;
+
+- vin-supply = <&dc_in>;
++ pwm-supply = <&dc_in>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -437,6 +437,7 @@
+ "",
+ "eMMC_RST#", /* BOOT_12 */
+ "eMMC_DS", /* BOOT_13 */
++ "", "",
+ /* GPIOC */
+ "SD_D0_B", /* GPIOC_0 */
+ "SD_D1_B", /* GPIOC_1 */
+diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
+index f2c0981435944..9c0b544e22098 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts
+@@ -24,7 +24,7 @@
+ regulator-min-microvolt = <690000>;
+ regulator-max-microvolt = <1050000>;
+
+- vin-supply = <&vsys_3v3>;
++ pwm-supply = <&vsys_3v3>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
+index f3f953225bf5b..15fece2e63205 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-hc4.dts
+@@ -76,9 +76,17 @@
+ };
+
+ &cpu_thermal {
++ trips {
++ cpu_active: cpu-active {
++ temperature = <60000>; /* millicelsius */
++ hysteresis = <2000>; /* millicelsius */
++ type = "active";
++ };
++ };
++
+ cooling-maps {
+ map {
+- trip = <&cpu_passive>;
++ trip = <&cpu_active>;
+ cooling-device = <&fan0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
+index fd0ad85c165ba..76ad052fbf0c9 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
+@@ -48,7 +48,7 @@
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc_5v>;
+
+- enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>;
++ enable-gpio = <&gpio_ao GPIOE_2 GPIO_OPEN_DRAIN>;
+ enable-active-high;
+ regulator-always-on;
+
+@@ -116,7 +116,7 @@
+ regulator-min-microvolt = <721000>;
+ regulator-max-microvolt = <1022000>;
+
+- vin-supply = <&main_12v>;
++ pwm-supply = <&main_12v>;
+
+ pwms = <&pwm_AO_cd 1 1250 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -263,6 +263,10 @@
+ reg = <0>;
+ max-speed = <1000>;
+
++ reset-assert-us = <10000>;
++ reset-deassert-us = <80000>;
++ reset-gpios = <&gpio GPIOZ_15 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>;
++
+ interrupt-parent = <&gpio_intc>;
+ /* MAC_INTR on GPIOZ_14 */
+ interrupts = <26 IRQ_TYPE_LEVEL_LOW>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
+index 2194a778973f1..a5d79f2f7c196 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
++++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
+@@ -185,7 +185,7 @@
+ regulator-min-microvolt = <690000>;
+ regulator-max-microvolt = <1050000>;
+
+- vin-supply = <&dc_in>;
++ pwm-supply = <&dc_in>;
+
+ pwms = <&pwm_AO_cd 1 1500 0>;
+ pwm-dutycycle-range = <100 0>;
+@@ -203,14 +203,6 @@
+ regulator-always-on;
+ };
+
+- reserved-memory {
+- /* TEE Reserved Memory */
+- bl32_reserved: bl32@5000000 {
+- reg = <0x0 0x05300000 0x0 0x2000000>;
+- no-map;
+- };
+- };
+-
+ sdio_pwrseq: sdio-pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
+diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
+index 3d8b1f4f2001b..78bdbd2ccc9de 100644
+--- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
++++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
+@@ -95,26 +95,6 @@
+ compatible = "operating-points-v2";
+ opp-shared;
+
+- opp-100000000 {
+- opp-hz = /bits/ 64 <100000000>;
+- opp-microvolt = <730000>;
+- };
+-
+- opp-250000000 {
+- opp-hz = /bits/ 64 <250000000>;
+- opp-microvolt = <730000>;
+- };
+-
+- opp-500000000 {
+- opp-hz = /bits/ 64 <500000000>;
+- opp-microvolt = <730000>;
+- };
+-
+- opp-667000000 {
+- opp-hz = /bits/ 64 <666666666>;
+- opp-microvolt = <750000>;
+- };
+-
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <770000>;
+diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi
+index 6288e104a0893..a00b0f14c222f 100644
+--- a/arch/arm64/boot/dts/arm/juno-base.dtsi
++++ b/arch/arm64/boot/dts/arm/juno-base.dtsi
+@@ -26,7 +26,8 @@
+ compatible = "arm,mhu", "arm,primecell";
+ reg = <0x0 0x2b1f0000 0x0 0x1000>;
+ interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+- <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
++ <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+ #mbox-cells = <1>;
+ clocks = <&soc_refclk100mhz>;
+ clock-names = "apb_pclk";
+@@ -543,8 +544,7 @@
+ <0x02000000 0x00 0x50000000 0x00 0x50000000 0x0 0x08000000>,
+ <0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>;
+ /* Standard AXI Translation entries as programmed by EDK2 */
+- dma-ranges = <0x02000000 0x0 0x2c1c0000 0x0 0x2c1c0000 0x0 0x00040000>,
+- <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>,
++ dma-ranges = <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>,
+ <0x43000000 0x8 0x00000000 0x8 0x00000000 0x2 0x00000000>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 7>;
+@@ -597,12 +597,26 @@
+ polling-delay = <1000>;
+ polling-delay-passive = <100>;
+ thermal-sensors = <&scpi_sensors0 0>;
++ trips {
++ pmic_crit0: trip0 {
++ temperature = <90000>;
++ hysteresis = <2000>;
++ type = "critical";
++ };
++ };
+ };
+
+ soc {
+ polling-delay = <1000>;
+ polling-delay-passive = <100>;
+ thermal-sensors = <&scpi_sensors0 3>;
++ trips {
++ soc_crit0: trip0 {
++ temperature = <80000>;
++ hysteresis = <2000>;
++ type = "critical";
++ };
++ };
+ };
+
+ big_cluster_thermal_zone: big-cluster {
+diff --git a/arch/arm64/boot/dts/broadcom/Makefile b/arch/arm64/boot/dts/broadcom/Makefile
+index 11eae3e3a9447..bce0a12554539 100644
+--- a/arch/arm64/boot/dts/broadcom/Makefile
++++ b/arch/arm64/boot/dts/broadcom/Makefile
+@@ -6,6 +6,6 @@ dtb-$(CONFIG_ARCH_BCM2835) += bcm2711-rpi-400.dtb \
+ bcm2837-rpi-3-b-plus.dtb \
+ bcm2837-rpi-cm3-io3.dtb
+
+-subdir-y += bcm4908
++subdir-y += bcmbca
+ subdir-y += northstar2
+ subdir-y += stingray
+diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/Makefile b/arch/arm64/boot/dts/broadcom/bcm4908/Makefile
+deleted file mode 100644
+index cc75854519ac3..0000000000000
+--- a/arch/arm64/boot/dts/broadcom/bcm4908/Makefile
++++ /dev/null
+@@ -1,4 +0,0 @@
+-# SPDX-License-Identifier: GPL-2.0
+-dtb-$(CONFIG_ARCH_BCM4908) += bcm4906-netgear-r8000p.dtb
+-dtb-$(CONFIG_ARCH_BCM4908) += bcm4906-tplink-archer-c2300-v1.dtb
+-dtb-$(CONFIG_ARCH_BCM4908) += bcm4908-asus-gt-ac5300.dtb
+diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906-netgear-r8000p.dts b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906-netgear-r8000p.dts
+deleted file mode 100644
+index 2dd028438c22c..0000000000000
+--- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906-netgear-r8000p.dts
++++ /dev/null
+@@ -1,157 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+-
+-#include <dt-bindings/gpio/gpio.h>
+-#include <dt-bindings/input/input.h>
+-#include <dt-bindings/leds/common.h>
+-
+-#include "bcm4906.dtsi"
+-
+-/ {
+- compatible = "netgear,r8000p", "brcm,bcm4906", "brcm,bcm4908";
+- model = "Netgear R8000P";
+-
+- memory@0 {
+- device_type = "memory";
+- reg = <0x00 0x00 0x00 0x20000000>;
+- };
+-
+- leds {
+- compatible = "gpio-leds";
+-
+- led-power-white {
+- function = LED_FUNCTION_POWER;
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 8 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-power-amber {
+- function = LED_FUNCTION_POWER;
+- color = <LED_COLOR_ID_AMBER>;
+- gpios = <&gpio0 9 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-wps {
+- function = LED_FUNCTION_WPS;
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-2ghz {
+- function = "2ghz";
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 14 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-5ghz-1 {
+- function = "5ghz-1";
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 15 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-5ghz-2 {
+- function = "5ghz-2";
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 16 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-usb2 {
+- function = "usb2";
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 17 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-usb3 {
+- function = "usb3";
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 18 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-wifi {
+- function = "wifi";
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 56 GPIO_ACTIVE_LOW>;
+- };
+- };
+-};
+-
+-&enet {
+- nvmem-cells = <&base_mac_addr>;
+- nvmem-cell-names = "mac-address";
+-};
+-
+-&usb_phy {
+- brcm,ioc = <1>;
+- status = "okay";
+-};
+-
+-&ehci {
+- status = "okay";
+-};
+-
+-&ohci {
+- status = "okay";
+-};
+-
+-&xhci {
+- status = "okay";
+-};
+-
+-&ports {
+- port@0 {
+- label = "lan4";
+- };
+-
+- port@1 {
+- label = "lan3";
+- };
+-
+- port@2 {
+- label = "lan2";
+- };
+-
+- port@3 {
+- label = "lan1";
+- };
+-
+- port@7 {
+- reg = <7>;
+- phy-mode = "internal";
+- phy-handle = <&phy12>;
+- label = "wan";
+- };
+-};
+-
+-&nandcs {
+- nand-ecc-strength = <4>;
+- nand-ecc-step-size = <512>;
+- nand-on-flash-bbt;
+-
+- #address-cells = <1>;
+- #size-cells = <0>;
+-
+- partitions {
+- compatible = "fixed-partitions";
+- #address-cells = <1>;
+- #size-cells = <1>;
+-
+- partition@0 {
+- compatible = "nvmem-cells";
+- label = "cferom";
+- reg = <0x0 0x100000>;
+-
+- #address-cells = <1>;
+- #size-cells = <1>;
+- ranges = <0 0x0 0x100000>;
+-
+- base_mac_addr: mac@106a0 {
+- reg = <0x106a0 0x6>;
+- };
+- };
+-
+- partition@100000 {
+- compatible = "brcm,bcm4908-firmware";
+- label = "firmware";
+- reg = <0x100000 0x4400000>;
+- };
+- };
+-};
+diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906-tplink-archer-c2300-v1.dts b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906-tplink-archer-c2300-v1.dts
+deleted file mode 100644
+index b63eefab48bd5..0000000000000
+--- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906-tplink-archer-c2300-v1.dts
++++ /dev/null
+@@ -1,182 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+-
+-#include <dt-bindings/gpio/gpio.h>
+-#include <dt-bindings/input/input.h>
+-#include <dt-bindings/leds/common.h>
+-
+-#include "bcm4906.dtsi"
+-
+-/ {
+- compatible = "tplink,archer-c2300-v1", "brcm,bcm4906", "brcm,bcm4908";
+- model = "TP-Link Archer C2300 V1";
+-
+- memory@0 {
+- device_type = "memory";
+- reg = <0x00 0x00 0x00 0x20000000>;
+- };
+-
+- leds {
+- compatible = "gpio-leds";
+-
+- led-power {
+- function = LED_FUNCTION_POWER;
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-2ghz {
+- function = "2ghz";
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 2 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-5ghz {
+- function = "5ghz";
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 3 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-wan-amber {
+- function = LED_FUNCTION_WAN;
+- color = <LED_COLOR_ID_AMBER>;
+- gpios = <&gpio0 4 GPIO_ACTIVE_HIGH>;
+- };
+-
+- led-wan-blue {
+- function = LED_FUNCTION_WAN;
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-lan {
+- function = LED_FUNCTION_LAN;
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 12 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-wps {
+- function = LED_FUNCTION_WPS;
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 14 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-usb2 {
+- function = "usb2";
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 15 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-usb3 {
+- function = "usbd3";
+- color = <LED_COLOR_ID_BLUE>;
+- gpios = <&gpio0 17 GPIO_ACTIVE_LOW>;
+- };
+-
+- led-brightness {
+- function = LED_FUNCTION_BACKLIGHT;
+- color = <LED_COLOR_ID_WHITE>;
+- gpios = <&gpio0 19 GPIO_ACTIVE_LOW>;
+- };
+- };
+-
+- gpio-keys-polled {
+- compatible = "gpio-keys-polled";
+- poll-interval = <100>;
+-
+- brightness {
+- label = "LEDs";
+- linux,code = <KEY_BRIGHTNESS_ZERO>;
+- gpios = <&gpio0 18 GPIO_ACTIVE_LOW>;
+- };
+-
+- wps {
+- label = "WPS";
+- linux,code = <KEY_WPS_BUTTON>;
+- gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+- };
+-
+- wifi {
+- label = "WiFi";
+- linux,code = <KEY_RFKILL>;
+- gpios = <&gpio0 22 GPIO_ACTIVE_LOW>;
+- };
+-
+- restart {
+- label = "Reset";
+- linux,code = <KEY_RESTART>;
+- gpios = <&gpio0 23 GPIO_ACTIVE_LOW>;
+- };
+- };
+-};
+-
+-&usb_phy {
+- brcm,ioc = <1>;
+- status = "okay";
+-};
+-
+-&ehci {
+- status = "okay";
+-};
+-
+-&ohci {
+- status = "okay";
+-};
+-
+-&xhci {
+- status = "okay";
+-};
+-
+-&ports {
+- port@0 {
+- label = "lan4";
+- };
+-
+- port@1 {
+- label = "lan3";
+- };
+-
+- port@2 {
+- label = "lan2";
+- };
+-
+- port@3 {
+- label = "lan1";
+- };
+-
+- port@7 {
+- reg = <7>;
+- phy-mode = "internal";
+- phy-handle = <&phy12>;
+- label = "wan";
+- };
+-};
+-
+-&nandcs {
+- nand-ecc-strength = <4>;
+- nand-ecc-step-size = <512>;
+- nand-on-flash-bbt;
+-
+- #address-cells = <1>;
+- #size-cells = <0>;
+-
+- partitions {
+- compatible = "brcm,bcm4908-partitions";
+- #address-cells = <1>;
+- #size-cells = <1>;
+-
+- partition@0 {
+- label = "cferom";
+- reg = <0x0 0x100000>;
+- };
+-
+- partition@100000 {
+- compatible = "brcm,bcm4908-firmware";
+- reg = <0x100000 0x3900000>;
+- };
+-
+- partition@5800000 {
+- compatible = "brcm,bcm4908-firmware";
+- reg = <0x3a00000 0x3900000>;
+- };
+- };
+-};
+diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi
+deleted file mode 100644
+index 66023d5535247..0000000000000
+--- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi
++++ /dev/null
+@@ -1,18 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+-
+-#include "bcm4908.dtsi"
+-
+-/ {
+- cpus {
+- /delete-node/ cpu@2;
+-
+- /delete-node/ cpu@3;
+- };
+-
+- pmu {
+- compatible = "arm,cortex-a53-pmu";
+- interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+- <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-affinity = <&cpu0>, <&cpu1>;
+- };
+-};
+diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908-asus-gt-ac5300.dts b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908-asus-gt-ac5300.dts
+deleted file mode 100644
+index 169fbb7cfd342..0000000000000
+--- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908-asus-gt-ac5300.dts
++++ /dev/null
+@@ -1,159 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+-
+-#include <dt-bindings/gpio/gpio.h>
+-#include <dt-bindings/input/input.h>
+-
+-#include "bcm4908.dtsi"
+-
+-/ {
+- compatible = "asus,gt-ac5300", "brcm,bcm4908";
+- model = "Asus GT-AC5300";
+-
+- memory@0 {
+- device_type = "memory";
+- reg = <0x00 0x00 0x00 0x40000000>;
+- };
+-
+- gpio-keys-polled {
+- compatible = "gpio-keys-polled";
+- poll-interval = <100>;
+-
+- wifi {
+- label = "WiFi";
+- linux,code = <KEY_RFKILL>;
+- gpios = <&gpio0 28 GPIO_ACTIVE_LOW>;
+- };
+-
+- wps {
+- label = "WPS";
+- linux,code = <KEY_WPS_BUTTON>;
+- gpios = <&gpio0 29 GPIO_ACTIVE_LOW>;
+- };
+-
+- restart {
+- label = "Reset";
+- linux,code = <KEY_RESTART>;
+- gpios = <&gpio0 30 GPIO_ACTIVE_LOW>;
+- };
+-
+- brightness {
+- label = "LEDs";
+- linux,code = <KEY_BRIGHTNESS_ZERO>;
+- gpios = <&gpio0 31 GPIO_ACTIVE_LOW>;
+- };
+- };
+-};
+-
+-&enet {
+- nvmem-cells = <&base_mac_addr>;
+- nvmem-cell-names = "mac-address";
+-};
+-
+-&usb_phy {
+- brcm,ioc = <1>;
+- status = "okay";
+-};
+-
+-&ehci {
+- status = "okay";
+-};
+-
+-&ohci {
+- status = "okay";
+-};
+-
+-&xhci {
+- status = "okay";
+-};
+-
+-&ports {
+- port@0 {
+- label = "lan2";
+- };
+-
+- port@1 {
+- label = "lan1";
+- };
+-
+- port@2 {
+- label = "lan6";
+- };
+-
+- port@3 {
+- label = "lan5";
+- };
+-
+- /* External BCM53134S switch */
+- port@7 {
+- label = "sw";
+- reg = <7>;
+- phy-mode = "rgmii";
+-
+- fixed-link {
+- speed = <1000>;
+- full-duplex;
+- };
+- };
+-};
+-
+-&mdio {
+- /* lan8 */
+- ethernet-phy@0 {
+- reg = <0>;
+- };
+-
+- /* lan7 */
+- ethernet-phy@1 {
+- reg = <1>;
+- };
+-
+- /* lan4 */
+- ethernet-phy@2 {
+- reg = <2>;
+- };
+-
+- /* lan3 */
+- ethernet-phy@3 {
+- reg = <3>;
+- };
+-};
+-
+-&nandcs {
+- nand-ecc-strength = <4>;
+- nand-ecc-step-size = <512>;
+- nand-on-flash-bbt;
+- brcm,nand-has-wp;
+-
+- #address-cells = <1>;
+- #size-cells = <0>;
+-
+- partitions {
+- compatible = "brcm,bcm4908-partitions";
+- #address-cells = <1>;
+- #size-cells = <1>;
+-
+- partition@0 {
+- compatible = "nvmem-cells";
+- label = "cferom";
+- reg = <0x0 0x100000>;
+-
+- #address-cells = <1>;
+- #size-cells = <1>;
+- ranges = <0 0x0 0x100000>;
+-
+- base_mac_addr: mac@106a0 {
+- reg = <0x106a0 0x6>;
+- };
+- };
+-
+- partition@100000 {
+- compatible = "brcm,bcm4908-firmware";
+- reg = <0x100000 0x5700000>;
+- };
+-
+- partition@5800000 {
+- compatible = "brcm,bcm4908-firmware";
+- reg = <0x5800000 0x5700000>;
+- };
+- };
+-};
+diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi
+deleted file mode 100644
+index a5a64d17d9ea6..0000000000000
+--- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi
++++ /dev/null
+@@ -1,337 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
+-
+-#include <dt-bindings/interrupt-controller/irq.h>
+-#include <dt-bindings/interrupt-controller/arm-gic.h>
+-#include <dt-bindings/phy/phy.h>
+-#include <dt-bindings/soc/bcm-pmb.h>
+-
+-/dts-v1/;
+-
+-/ {
+- interrupt-parent = <&gic>;
+-
+- #address-cells = <2>;
+- #size-cells = <2>;
+-
+- aliases {
+- serial0 = &uart0;
+- };
+-
+- chosen {
+- stdout-path = "serial0:115200n8";
+- };
+-
+- cpus {
+- #address-cells = <1>;
+- #size-cells = <0>;
+-
+- cpu0: cpu@0 {
+- device_type = "cpu";
+- compatible = "brcm,brahma-b53";
+- reg = <0x0>;
+- next-level-cache = <&l2>;
+- };
+-
+- cpu1: cpu@1 {
+- device_type = "cpu";
+- compatible = "brcm,brahma-b53";
+- reg = <0x1>;
+- enable-method = "spin-table";
+- cpu-release-addr = <0x0 0xfff8>;
+- next-level-cache = <&l2>;
+- };
+-
+- cpu2: cpu@2 {
+- device_type = "cpu";
+- compatible = "brcm,brahma-b53";
+- reg = <0x2>;
+- enable-method = "spin-table";
+- cpu-release-addr = <0x0 0xfff8>;
+- next-level-cache = <&l2>;
+- };
+-
+- cpu3: cpu@3 {
+- device_type = "cpu";
+- compatible = "brcm,brahma-b53";
+- reg = <0x3>;
+- enable-method = "spin-table";
+- cpu-release-addr = <0x0 0xfff8>;
+- next-level-cache = <&l2>;
+- };
+-
+- l2: l2-cache0 {
+- compatible = "cache";
+- };
+- };
+-
+- axi@81000000 {
+- compatible = "simple-bus";
+- #address-cells = <1>;
+- #size-cells = <1>;
+- ranges = <0x00 0x00 0x81000000 0x4000>;
+-
+- gic: interrupt-controller@1000 {
+- compatible = "arm,gic-400";
+- #interrupt-cells = <3>;
+- #address-cells = <0>;
+- interrupt-controller;
+- reg = <0x1000 0x1000>,
+- <0x2000 0x2000>;
+- };
+- };
+-
+- timer {
+- compatible = "arm,armv8-timer";
+- interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+- <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+- <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+- <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+- };
+-
+- pmu {
+- compatible = "arm,cortex-a53-pmu";
+- interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+- <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+- <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
+- <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
+- };
+-
+- clocks {
+- periph_clk: periph_clk {
+- compatible = "fixed-clock";
+- #clock-cells = <0>;
+- clock-frequency = <50000000>;
+- clock-output-names = "periph";
+- };
+- };
+-
+- soc {
+- compatible = "simple-bus";
+- #address-cells = <1>;
+- #size-cells = <1>;
+- ranges = <0x00 0x00 0x80000000 0x281000>;
+-
+- enet: ethernet@2000 {
+- compatible = "brcm,bcm4908-enet";
+- reg = <0x2000 0x1000>;
+-
+- interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
+- <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-names = "rx", "tx";
+- };
+-
+- usb_phy: usb-phy@c200 {
+- compatible = "brcm,bcm4908-usb-phy";
+- reg = <0xc200 0x100>;
+- reg-names = "ctrl";
+- power-domains = <&pmb BCM_PMB_HOST_USB>;
+- dr_mode = "host";
+- brcm,has-xhci;
+- brcm,has-eohci;
+- #phy-cells = <1>;
+- status = "disabled";
+- };
+-
+- ehci: usb@c300 {
+- compatible = "generic-ehci";
+- reg = <0xc300 0x100>;
+- interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+- phys = <&usb_phy PHY_TYPE_USB2>;
+- status = "disabled";
+- };
+-
+- ohci: usb@c400 {
+- compatible = "generic-ohci";
+- reg = <0xc400 0x100>;
+- interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
+- phys = <&usb_phy PHY_TYPE_USB2>;
+- status = "disabled";
+- };
+-
+- xhci: usb@d000 {
+- compatible = "generic-xhci";
+- reg = <0xd000 0x8c8>;
+- interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+- phys = <&usb_phy PHY_TYPE_USB3>;
+- status = "disabled";
+- };
+-
+- bus@80000 {
+- compatible = "simple-bus";
+- #size-cells = <1>;
+- #address-cells = <1>;
+- ranges = <0 0x80000 0x50000>;
+-
+- ethernet-switch@0 {
+- compatible = "brcm,bcm4908-switch";
+- reg = <0x0 0x40000>,
+- <0x40000 0x110>,
+- <0x40340 0x30>,
+- <0x40380 0x30>,
+- <0x40600 0x34>,
+- <0x40800 0x208>;
+- reg-names = "core", "reg", "intrl2_0",
+- "intrl2_1", "fcb", "acb";
+- interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+- <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
+- brcm,num-gphy = <5>;
+- brcm,num-rgmii-ports = <2>;
+-
+- #address-cells = <1>;
+- #size-cells = <0>;
+-
+- ports: ports {
+- #address-cells = <1>;
+- #size-cells = <0>;
+-
+- port@0 {
+- reg = <0>;
+- phy-mode = "internal";
+- phy-handle = <&phy8>;
+- };
+-
+- port@1 {
+- reg = <1>;
+- phy-mode = "internal";
+- phy-handle = <&phy9>;
+- };
+-
+- port@2 {
+- reg = <2>;
+- phy-mode = "internal";
+- phy-handle = <&phy10>;
+- };
+-
+- port@3 {
+- reg = <3>;
+- phy-mode = "internal";
+- phy-handle = <&phy11>;
+- };
+-
+- port@8 {
+- reg = <8>;
+- phy-mode = "internal";
+- ethernet = <&enet>;
+-
+- fixed-link {
+- speed = <1000>;
+- full-duplex;
+- };
+- };
+- };
+- };
+-
+- mdio: mdio@405c0 {
+- compatible = "brcm,unimac-mdio";
+- reg = <0x405c0 0x8>;
+- reg-names = "mdio";
+- #size-cells = <0>;
+- #address-cells = <1>;
+-
+- phy8: ethernet-phy@8 {
+- reg = <8>;
+- };
+-
+- phy9: ethernet-phy@9 {
+- reg = <9>;
+- };
+-
+- phy10: ethernet-phy@a {
+- reg = <10>;
+- };
+-
+- phy11: ethernet-phy@b {
+- reg = <11>;
+- };
+-
+- phy12: ethernet-phy@c {
+- reg = <12>;
+- };
+- };
+- };
+-
+- procmon: syscon@280000 {
+- compatible = "simple-bus";
+- reg = <0x280000 0x1000>;
+- ranges;
+-
+- #address-cells = <1>;
+- #size-cells = <1>;
+-
+- pmb: power-controller@2800c0 {
+- compatible = "brcm,bcm4908-pmb";
+- reg = <0x2800c0 0x40>;
+- #power-domain-cells = <1>;
+- };
+- };
+- };
+-
+- bus@ff800000 {
+- compatible = "simple-bus";
+- #address-cells = <1>;
+- #size-cells = <1>;
+- ranges = <0x00 0x00 0xff800000 0x3000>;
+-
+- timer: timer@400 {
+- compatible = "brcm,bcm6328-timer", "syscon";
+- reg = <0x400 0x3c>;
+- };
+-
+- gpio0: gpio-controller@500 {
+- compatible = "brcm,bcm6345-gpio";
+- reg-names = "dirout", "dat";
+- reg = <0x500 0x28>, <0x528 0x28>;
+-
+- #gpio-cells = <2>;
+- gpio-controller;
+- };
+-
+- uart0: serial@640 {
+- compatible = "brcm,bcm6345-uart";
+- reg = <0x640 0x18>;
+- interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+- clocks = <&periph_clk>;
+- clock-names = "periph";
+- status = "okay";
+- };
+-
+- nand@1800 {
+- #address-cells = <1>;
+- #size-cells = <0>;
+- compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+- reg = <0x1800 0x600>, <0x2000 0x10>;
+- reg-names = "nand", "nand-int-base";
+- interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+- interrupt-names = "nand";
+- status = "okay";
+-
+- nandcs: nand@0 {
+- compatible = "brcm,nandcs";
+- reg = <0>;
+- };
+- };
+-
+- misc@2600 {
+- compatible = "brcm,misc", "simple-mfd";
+- reg = <0x2600 0xe4>;
+-
+- #address-cells = <1>;
+- #size-cells = <1>;
+- ranges = <0x00 0x2600 0xe4>;
+-
+- reset-controller@2644 {
+- compatible = "brcm,bcm4908-misc-pcie-reset";
+- reg = <0x44 0x04>;
+- #reset-cells = <1>;
+- };
+- };
+-
+- reboot {
+- compatible = "syscon-reboot";
+- regmap = <&timer>;
+- offset = <0x34>;
+- mask = <1>;
+- };
+- };
+-};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/Makefile b/arch/arm64/boot/dts/broadcom/bcmbca/Makefile
+new file mode 100644
+index 0000000000000..dc68357849a9b
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/Makefile
+@@ -0,0 +1,10 @@
++# SPDX-License-Identifier: GPL-2.0
++dtb-$(CONFIG_ARCH_BCMBCA) += \
++ bcm4906-netgear-r8000p.dtb \
++ bcm4906-tplink-archer-c2300-v1.dtb \
++ bcm4908-asus-gt-ac5300.dtb \
++ bcm4908-netgear-raxe500.dtb \
++ bcm4912-asus-gt-ax6000.dtb \
++ bcm94912.dtb \
++ bcm963158.dtb \
++ bcm96858.dtb
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-netgear-r8000p.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-netgear-r8000p.dts
+new file mode 100644
+index 0000000000000..2dd028438c22c
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-netgear-r8000p.dts
+@@ -0,0 +1,157 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++
++#include <dt-bindings/gpio/gpio.h>
++#include <dt-bindings/input/input.h>
++#include <dt-bindings/leds/common.h>
++
++#include "bcm4906.dtsi"
++
++/ {
++ compatible = "netgear,r8000p", "brcm,bcm4906", "brcm,bcm4908";
++ model = "Netgear R8000P";
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x00 0x00 0x00 0x20000000>;
++ };
++
++ leds {
++ compatible = "gpio-leds";
++
++ led-power-white {
++ function = LED_FUNCTION_POWER;
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 8 GPIO_ACTIVE_LOW>;
++ };
++
++ led-power-amber {
++ function = LED_FUNCTION_POWER;
++ color = <LED_COLOR_ID_AMBER>;
++ gpios = <&gpio0 9 GPIO_ACTIVE_LOW>;
++ };
++
++ led-wps {
++ function = LED_FUNCTION_WPS;
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
++ };
++
++ led-2ghz {
++ function = "2ghz";
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 14 GPIO_ACTIVE_LOW>;
++ };
++
++ led-5ghz-1 {
++ function = "5ghz-1";
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 15 GPIO_ACTIVE_LOW>;
++ };
++
++ led-5ghz-2 {
++ function = "5ghz-2";
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 16 GPIO_ACTIVE_LOW>;
++ };
++
++ led-usb2 {
++ function = "usb2";
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 17 GPIO_ACTIVE_LOW>;
++ };
++
++ led-usb3 {
++ function = "usb3";
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 18 GPIO_ACTIVE_LOW>;
++ };
++
++ led-wifi {
++ function = "wifi";
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 56 GPIO_ACTIVE_LOW>;
++ };
++ };
++};
++
++&enet {
++ nvmem-cells = <&base_mac_addr>;
++ nvmem-cell-names = "mac-address";
++};
++
++&usb_phy {
++ brcm,ioc = <1>;
++ status = "okay";
++};
++
++&ehci {
++ status = "okay";
++};
++
++&ohci {
++ status = "okay";
++};
++
++&xhci {
++ status = "okay";
++};
++
++&ports {
++ port@0 {
++ label = "lan4";
++ };
++
++ port@1 {
++ label = "lan3";
++ };
++
++ port@2 {
++ label = "lan2";
++ };
++
++ port@3 {
++ label = "lan1";
++ };
++
++ port@7 {
++ reg = <7>;
++ phy-mode = "internal";
++ phy-handle = <&phy12>;
++ label = "wan";
++ };
++};
++
++&nandcs {
++ nand-ecc-strength = <4>;
++ nand-ecc-step-size = <512>;
++ nand-on-flash-bbt;
++
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ partitions {
++ compatible = "fixed-partitions";
++ #address-cells = <1>;
++ #size-cells = <1>;
++
++ partition@0 {
++ compatible = "nvmem-cells";
++ label = "cferom";
++ reg = <0x0 0x100000>;
++
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0 0x0 0x100000>;
++
++ base_mac_addr: mac@106a0 {
++ reg = <0x106a0 0x6>;
++ };
++ };
++
++ partition@100000 {
++ compatible = "brcm,bcm4908-firmware";
++ label = "firmware";
++ reg = <0x100000 0x4400000>;
++ };
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-tplink-archer-c2300-v1.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-tplink-archer-c2300-v1.dts
+new file mode 100644
+index 0000000000000..b63eefab48bd5
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-tplink-archer-c2300-v1.dts
+@@ -0,0 +1,182 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++
++#include <dt-bindings/gpio/gpio.h>
++#include <dt-bindings/input/input.h>
++#include <dt-bindings/leds/common.h>
++
++#include "bcm4906.dtsi"
++
++/ {
++ compatible = "tplink,archer-c2300-v1", "brcm,bcm4906", "brcm,bcm4908";
++ model = "TP-Link Archer C2300 V1";
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x00 0x00 0x00 0x20000000>;
++ };
++
++ leds {
++ compatible = "gpio-leds";
++
++ led-power {
++ function = LED_FUNCTION_POWER;
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
++ };
++
++ led-2ghz {
++ function = "2ghz";
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 2 GPIO_ACTIVE_LOW>;
++ };
++
++ led-5ghz {
++ function = "5ghz";
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 3 GPIO_ACTIVE_LOW>;
++ };
++
++ led-wan-amber {
++ function = LED_FUNCTION_WAN;
++ color = <LED_COLOR_ID_AMBER>;
++ gpios = <&gpio0 4 GPIO_ACTIVE_HIGH>;
++ };
++
++ led-wan-blue {
++ function = LED_FUNCTION_WAN;
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
++ };
++
++ led-lan {
++ function = LED_FUNCTION_LAN;
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 12 GPIO_ACTIVE_LOW>;
++ };
++
++ led-wps {
++ function = LED_FUNCTION_WPS;
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 14 GPIO_ACTIVE_LOW>;
++ };
++
++ led-usb2 {
++ function = "usb2";
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 15 GPIO_ACTIVE_LOW>;
++ };
++
++ led-usb3 {
++ function = "usbd3";
++ color = <LED_COLOR_ID_BLUE>;
++ gpios = <&gpio0 17 GPIO_ACTIVE_LOW>;
++ };
++
++ led-brightness {
++ function = LED_FUNCTION_BACKLIGHT;
++ color = <LED_COLOR_ID_WHITE>;
++ gpios = <&gpio0 19 GPIO_ACTIVE_LOW>;
++ };
++ };
++
++ gpio-keys-polled {
++ compatible = "gpio-keys-polled";
++ poll-interval = <100>;
++
++ brightness {
++ label = "LEDs";
++ linux,code = <KEY_BRIGHTNESS_ZERO>;
++ gpios = <&gpio0 18 GPIO_ACTIVE_LOW>;
++ };
++
++ wps {
++ label = "WPS";
++ linux,code = <KEY_WPS_BUTTON>;
++ gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
++ };
++
++ wifi {
++ label = "WiFi";
++ linux,code = <KEY_RFKILL>;
++ gpios = <&gpio0 22 GPIO_ACTIVE_LOW>;
++ };
++
++ restart {
++ label = "Reset";
++ linux,code = <KEY_RESTART>;
++ gpios = <&gpio0 23 GPIO_ACTIVE_LOW>;
++ };
++ };
++};
++
++&usb_phy {
++ brcm,ioc = <1>;
++ status = "okay";
++};
++
++&ehci {
++ status = "okay";
++};
++
++&ohci {
++ status = "okay";
++};
++
++&xhci {
++ status = "okay";
++};
++
++&ports {
++ port@0 {
++ label = "lan4";
++ };
++
++ port@1 {
++ label = "lan3";
++ };
++
++ port@2 {
++ label = "lan2";
++ };
++
++ port@3 {
++ label = "lan1";
++ };
++
++ port@7 {
++ reg = <7>;
++ phy-mode = "internal";
++ phy-handle = <&phy12>;
++ label = "wan";
++ };
++};
++
++&nandcs {
++ nand-ecc-strength = <4>;
++ nand-ecc-step-size = <512>;
++ nand-on-flash-bbt;
++
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ partitions {
++ compatible = "brcm,bcm4908-partitions";
++ #address-cells = <1>;
++ #size-cells = <1>;
++
++ partition@0 {
++ label = "cferom";
++ reg = <0x0 0x100000>;
++ };
++
++ partition@100000 {
++ compatible = "brcm,bcm4908-firmware";
++ reg = <0x100000 0x3900000>;
++ };
++
++ partition@5800000 {
++ compatible = "brcm,bcm4908-firmware";
++ reg = <0x3a00000 0x3900000>;
++ };
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906.dtsi
+new file mode 100644
+index 0000000000000..d084c33d5ca82
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4906.dtsi
+@@ -0,0 +1,26 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++
++#include "bcm4908.dtsi"
++
++/ {
++ cpus {
++ /delete-node/ cpu@2;
++
++ /delete-node/ cpu@3;
++ };
++
++ timer {
++ compatible = "arm,armv8-timer";
++ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
++ };
++
++ pmu {
++ compatible = "arm,cortex-a53-pmu";
++ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-affinity = <&cpu0>, <&cpu1>;
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts
+new file mode 100644
+index 0000000000000..169fbb7cfd342
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts
+@@ -0,0 +1,159 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++
++#include <dt-bindings/gpio/gpio.h>
++#include <dt-bindings/input/input.h>
++
++#include "bcm4908.dtsi"
++
++/ {
++ compatible = "asus,gt-ac5300", "brcm,bcm4908";
++ model = "Asus GT-AC5300";
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x00 0x00 0x00 0x40000000>;
++ };
++
++ gpio-keys-polled {
++ compatible = "gpio-keys-polled";
++ poll-interval = <100>;
++
++ wifi {
++ label = "WiFi";
++ linux,code = <KEY_RFKILL>;
++ gpios = <&gpio0 28 GPIO_ACTIVE_LOW>;
++ };
++
++ wps {
++ label = "WPS";
++ linux,code = <KEY_WPS_BUTTON>;
++ gpios = <&gpio0 29 GPIO_ACTIVE_LOW>;
++ };
++
++ restart {
++ label = "Reset";
++ linux,code = <KEY_RESTART>;
++ gpios = <&gpio0 30 GPIO_ACTIVE_LOW>;
++ };
++
++ brightness {
++ label = "LEDs";
++ linux,code = <KEY_BRIGHTNESS_ZERO>;
++ gpios = <&gpio0 31 GPIO_ACTIVE_LOW>;
++ };
++ };
++};
++
++&enet {
++ nvmem-cells = <&base_mac_addr>;
++ nvmem-cell-names = "mac-address";
++};
++
++&usb_phy {
++ brcm,ioc = <1>;
++ status = "okay";
++};
++
++&ehci {
++ status = "okay";
++};
++
++&ohci {
++ status = "okay";
++};
++
++&xhci {
++ status = "okay";
++};
++
++&ports {
++ port@0 {
++ label = "lan2";
++ };
++
++ port@1 {
++ label = "lan1";
++ };
++
++ port@2 {
++ label = "lan6";
++ };
++
++ port@3 {
++ label = "lan5";
++ };
++
++ /* External BCM53134S switch */
++ port@7 {
++ label = "sw";
++ reg = <7>;
++ phy-mode = "rgmii";
++
++ fixed-link {
++ speed = <1000>;
++ full-duplex;
++ };
++ };
++};
++
++&mdio {
++ /* lan8 */
++ ethernet-phy@0 {
++ reg = <0>;
++ };
++
++ /* lan7 */
++ ethernet-phy@1 {
++ reg = <1>;
++ };
++
++ /* lan4 */
++ ethernet-phy@2 {
++ reg = <2>;
++ };
++
++ /* lan3 */
++ ethernet-phy@3 {
++ reg = <3>;
++ };
++};
++
++&nandcs {
++ nand-ecc-strength = <4>;
++ nand-ecc-step-size = <512>;
++ nand-on-flash-bbt;
++ brcm,nand-has-wp;
++
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ partitions {
++ compatible = "brcm,bcm4908-partitions";
++ #address-cells = <1>;
++ #size-cells = <1>;
++
++ partition@0 {
++ compatible = "nvmem-cells";
++ label = "cferom";
++ reg = <0x0 0x100000>;
++
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0 0x0 0x100000>;
++
++ base_mac_addr: mac@106a0 {
++ reg = <0x106a0 0x6>;
++ };
++ };
++
++ partition@100000 {
++ compatible = "brcm,bcm4908-firmware";
++ reg = <0x100000 0x5700000>;
++ };
++
++ partition@5800000 {
++ compatible = "brcm,bcm4908-firmware";
++ reg = <0x5800000 0x5700000>;
++ };
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-netgear-raxe500.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-netgear-raxe500.dts
+new file mode 100644
+index 0000000000000..3c2cf2d238b6f
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-netgear-raxe500.dts
+@@ -0,0 +1,50 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++
++#include "bcm4908.dtsi"
++
++/ {
++ compatible = "netgear,raxe500", "brcm,bcm4908";
++ model = "Netgear RAXE500";
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x00 0x00 0x00 0x40000000>;
++ };
++};
++
++&ehci {
++ status = "okay";
++};
++
++&ohci {
++ status = "okay";
++};
++
++&xhci {
++ status = "okay";
++};
++
++&ports {
++ port@0 {
++ label = "lan4";
++ };
++
++ port@1 {
++ label = "lan3";
++ };
++
++ port@2 {
++ label = "lan2";
++ };
++
++ port@3 {
++ label = "lan1";
++ };
++
++ port@7 {
++ reg = <7>;
++ phy-mode = "internal";
++ phy-handle = <&phy12>;
++ label = "wan";
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi
+new file mode 100644
+index 0000000000000..b7db95ce0bbf2
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi
+@@ -0,0 +1,339 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++
++#include <dt-bindings/interrupt-controller/irq.h>
++#include <dt-bindings/interrupt-controller/arm-gic.h>
++#include <dt-bindings/phy/phy.h>
++#include <dt-bindings/soc/bcm-pmb.h>
++
++/dts-v1/;
++
++/ {
++ interrupt-parent = <&gic>;
++
++ #address-cells = <2>;
++ #size-cells = <2>;
++
++ aliases {
++ serial0 = &uart0;
++ };
++
++ chosen {
++ stdout-path = "serial0:115200n8";
++ };
++
++ cpus {
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ cpu0: cpu@0 {
++ device_type = "cpu";
++ compatible = "brcm,brahma-b53";
++ reg = <0x0>;
++ enable-method = "spin-table";
++ cpu-release-addr = <0x0 0xfff8>;
++ next-level-cache = <&l2>;
++ };
++
++ cpu1: cpu@1 {
++ device_type = "cpu";
++ compatible = "brcm,brahma-b53";
++ reg = <0x1>;
++ enable-method = "spin-table";
++ cpu-release-addr = <0x0 0xfff8>;
++ next-level-cache = <&l2>;
++ };
++
++ cpu2: cpu@2 {
++ device_type = "cpu";
++ compatible = "brcm,brahma-b53";
++ reg = <0x2>;
++ enable-method = "spin-table";
++ cpu-release-addr = <0x0 0xfff8>;
++ next-level-cache = <&l2>;
++ };
++
++ cpu3: cpu@3 {
++ device_type = "cpu";
++ compatible = "brcm,brahma-b53";
++ reg = <0x3>;
++ enable-method = "spin-table";
++ cpu-release-addr = <0x0 0xfff8>;
++ next-level-cache = <&l2>;
++ };
++
++ l2: l2-cache0 {
++ compatible = "cache";
++ };
++ };
++
++ axi@81000000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x00 0x00 0x81000000 0x4000>;
++
++ gic: interrupt-controller@1000 {
++ compatible = "arm,gic-400";
++ #interrupt-cells = <3>;
++ #address-cells = <0>;
++ interrupt-controller;
++ reg = <0x1000 0x1000>,
++ <0x2000 0x2000>;
++ };
++ };
++
++ timer {
++ compatible = "arm,armv8-timer";
++ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
++ };
++
++ pmu {
++ compatible = "arm,cortex-a53-pmu";
++ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
++ };
++
++ clocks {
++ periph_clk: periph_clk {
++ compatible = "fixed-clock";
++ #clock-cells = <0>;
++ clock-frequency = <50000000>;
++ clock-output-names = "periph";
++ };
++ };
++
++ soc {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x00 0x00 0x80000000 0x281000>;
++
++ enet: ethernet@2000 {
++ compatible = "brcm,bcm4908-enet";
++ reg = <0x2000 0x1000>;
++
++ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-names = "rx", "tx";
++ };
++
++ usb_phy: usb-phy@c200 {
++ compatible = "brcm,bcm4908-usb-phy";
++ reg = <0xc200 0x100>;
++ reg-names = "ctrl";
++ power-domains = <&pmb BCM_PMB_HOST_USB>;
++ dr_mode = "host";
++ brcm,has-xhci;
++ brcm,has-eohci;
++ #phy-cells = <1>;
++ status = "disabled";
++ };
++
++ ehci: usb@c300 {
++ compatible = "generic-ehci";
++ reg = <0xc300 0x100>;
++ interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
++ phys = <&usb_phy PHY_TYPE_USB2>;
++ status = "disabled";
++ };
++
++ ohci: usb@c400 {
++ compatible = "generic-ohci";
++ reg = <0xc400 0x100>;
++ interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
++ phys = <&usb_phy PHY_TYPE_USB2>;
++ status = "disabled";
++ };
++
++ xhci: usb@d000 {
++ compatible = "generic-xhci";
++ reg = <0xd000 0x8c8>;
++ interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
++ phys = <&usb_phy PHY_TYPE_USB3>;
++ status = "disabled";
++ };
++
++ bus@80000 {
++ compatible = "simple-bus";
++ #size-cells = <1>;
++ #address-cells = <1>;
++ ranges = <0 0x80000 0x50000>;
++
++ ethernet-switch@0 {
++ compatible = "brcm,bcm4908-switch";
++ reg = <0x0 0x40000>,
++ <0x40000 0x110>,
++ <0x40340 0x30>,
++ <0x40380 0x30>,
++ <0x40600 0x34>,
++ <0x40800 0x208>;
++ reg-names = "core", "reg", "intrl2_0",
++ "intrl2_1", "fcb", "acb";
++ interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
++ brcm,num-gphy = <5>;
++ brcm,num-rgmii-ports = <2>;
++
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ ports: ports {
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ port@0 {
++ reg = <0>;
++ phy-mode = "internal";
++ phy-handle = <&phy8>;
++ };
++
++ port@1 {
++ reg = <1>;
++ phy-mode = "internal";
++ phy-handle = <&phy9>;
++ };
++
++ port@2 {
++ reg = <2>;
++ phy-mode = "internal";
++ phy-handle = <&phy10>;
++ };
++
++ port@3 {
++ reg = <3>;
++ phy-mode = "internal";
++ phy-handle = <&phy11>;
++ };
++
++ port@8 {
++ reg = <8>;
++ phy-mode = "internal";
++ ethernet = <&enet>;
++
++ fixed-link {
++ speed = <1000>;
++ full-duplex;
++ };
++ };
++ };
++ };
++
++ mdio: mdio@405c0 {
++ compatible = "brcm,unimac-mdio";
++ reg = <0x405c0 0x8>;
++ reg-names = "mdio";
++ #size-cells = <0>;
++ #address-cells = <1>;
++
++ phy8: ethernet-phy@8 {
++ reg = <8>;
++ };
++
++ phy9: ethernet-phy@9 {
++ reg = <9>;
++ };
++
++ phy10: ethernet-phy@a {
++ reg = <10>;
++ };
++
++ phy11: ethernet-phy@b {
++ reg = <11>;
++ };
++
++ phy12: ethernet-phy@c {
++ reg = <12>;
++ };
++ };
++ };
++
++ procmon: bus@280000 {
++ compatible = "simple-bus";
++ reg = <0x280000 0x1000>;
++ ranges;
++
++ #address-cells = <1>;
++ #size-cells = <1>;
++
++ pmb: power-controller@2800c0 {
++ compatible = "brcm,bcm4908-pmb";
++ reg = <0x2800c0 0x40>;
++ #power-domain-cells = <1>;
++ };
++ };
++ };
++
++ bus@ff800000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x00 0x00 0xff800000 0x3000>;
++
++ twd: timer-mfd@400 {
++ compatible = "brcm,bcm4908-twd", "simple-mfd", "syscon";
++ reg = <0x400 0x4c>;
++ };
++
++ gpio0: gpio-controller@500 {
++ compatible = "brcm,bcm6345-gpio";
++ reg-names = "dirout", "dat";
++ reg = <0x500 0x28>, <0x528 0x28>;
++
++ #gpio-cells = <2>;
++ gpio-controller;
++ };
++
++ uart0: serial@640 {
++ compatible = "brcm,bcm6345-uart";
++ reg = <0x640 0x18>;
++ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
++ clocks = <&periph_clk>;
++ clock-names = "refclk";
++ status = "okay";
++ };
++
++ nand@1800 {
++ #address-cells = <1>;
++ #size-cells = <0>;
++ compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
++ reg = <0x1800 0x600>, <0x2000 0x10>;
++ reg-names = "nand", "nand-int-base";
++ interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-names = "nand_ctlrdy";
++ status = "okay";
++
++ nandcs: nand@0 {
++ compatible = "brcm,nandcs";
++ reg = <0>;
++ };
++ };
++
++ misc@2600 {
++ compatible = "brcm,misc", "simple-mfd";
++ reg = <0x2600 0xe4>;
++
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x00 0x2600 0xe4>;
++
++ reset-controller@2644 {
++ compatible = "brcm,bcm4908-misc-pcie-reset";
++ reg = <0x44 0x04>;
++ #reset-cells = <1>;
++ };
++ };
++ };
++
++ reboot {
++ compatible = "syscon-reboot";
++ regmap = <&twd>;
++ offset = <0x34>;
++ mask = <1>;
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4912-asus-gt-ax6000.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4912-asus-gt-ax6000.dts
+new file mode 100644
+index 0000000000000..ed554666e95ea
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4912-asus-gt-ax6000.dts
+@@ -0,0 +1,19 @@
++// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
++
++/dts-v1/;
++
++#include "bcm4912.dtsi"
++
++/ {
++ compatible = "asus,gt-ax6000", "brcm,bcm4912", "brcm,bcmbca";
++ model = "Asus GT-AX6000";
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x00 0x00 0x00 0x40000000>;
++ };
++};
++
++&uart0 {
++ status = "okay";
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4912.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4912.dtsi
+new file mode 100644
+index 0000000000000..3d016c2ce6759
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4912.dtsi
+@@ -0,0 +1,128 @@
++// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
++/*
++ * Copyright 2022 Broadcom Ltd.
++ */
++
++#include <dt-bindings/interrupt-controller/irq.h>
++#include <dt-bindings/interrupt-controller/arm-gic.h>
++
++/ {
++ compatible = "brcm,bcm4912", "brcm,bcmbca";
++ #address-cells = <2>;
++ #size-cells = <2>;
++
++ interrupt-parent = <&gic>;
++
++ cpus {
++ #address-cells = <2>;
++ #size-cells = <0>;
++
++ B53_0: cpu@0 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x0>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_1: cpu@1 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x1>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_2: cpu@2 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x2>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_3: cpu@3 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x3>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ L2_0: l2-cache0 {
++ compatible = "cache";
++ };
++ };
++
++ timer {
++ compatible = "arm,armv8-timer";
++ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
++ };
++
++ pmu: pmu {
++ compatible = "arm,cortex-a53-pmu";
++ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-affinity = <&B53_0>, <&B53_1>,
++ <&B53_2>, <&B53_3>;
++ };
++
++ clocks: clocks {
++ periph_clk: periph-clk {
++ compatible = "fixed-clock";
++ #clock-cells = <0>;
++ clock-frequency = <200000000>;
++ };
++ uart_clk: uart-clk {
++ compatible = "fixed-factor-clock";
++ #clock-cells = <0>;
++ clocks = <&periph_clk>;
++ clock-div = <4>;
++ clock-mult = <1>;
++ };
++ };
++
++ psci {
++ compatible = "arm,psci-0.2";
++ method = "smc";
++ };
++
++ axi@81000000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x0 0x0 0x81000000 0x8000>;
++
++ gic: interrupt-controller@1000 {
++ compatible = "arm,gic-400";
++ #interrupt-cells = <3>;
++ interrupt-controller;
++ interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
++ reg = <0x1000 0x1000>,
++ <0x2000 0x2000>,
++ <0x4000 0x2000>,
++ <0x6000 0x2000>;
++ };
++ };
++
++ bus@ff800000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x0 0x0 0xff800000 0x800000>;
++
++ uart0: serial@12000 {
++ compatible = "arm,pl011", "arm,primecell";
++ reg = <0x12000 0x1000>;
++ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
++ clocks = <&uart_clk>, <&uart_clk>;
++ clock-names = "uartclk", "apb_pclk";
++ status = "disabled";
++ };
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm63158.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm63158.dtsi
+new file mode 100644
+index 0000000000000..13629702f70b8
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm63158.dtsi
+@@ -0,0 +1,128 @@
++// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
++/*
++ * Copyright 2022 Broadcom Ltd.
++ */
++
++#include <dt-bindings/interrupt-controller/irq.h>
++#include <dt-bindings/interrupt-controller/arm-gic.h>
++
++/ {
++ compatible = "brcm,bcm63158", "brcm,bcmbca";
++ #address-cells = <2>;
++ #size-cells = <2>;
++
++ interrupt-parent = <&gic>;
++
++ cpus {
++ #address-cells = <2>;
++ #size-cells = <0>;
++
++ B53_0: cpu@0 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x0>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_1: cpu@1 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x1>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_2: cpu@2 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x2>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_3: cpu@3 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x3>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ L2_0: l2-cache0 {
++ compatible = "cache";
++ };
++ };
++
++ timer {
++ compatible = "arm,armv8-timer";
++ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
++ };
++
++ pmu: pmu {
++ compatible = "arm,cortex-a53-pmu";
++ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-affinity = <&B53_0>, <&B53_1>,
++ <&B53_2>, <&B53_3>;
++ };
++
++ clocks: clocks {
++ periph_clk: periph-clk {
++ compatible = "fixed-clock";
++ #clock-cells = <0>;
++ clock-frequency = <200000000>;
++ };
++ uart_clk: uart-clk {
++ compatible = "fixed-factor-clock";
++ #clock-cells = <0>;
++ clocks = <&periph_clk>;
++ clock-div = <4>;
++ clock-mult = <1>;
++ };
++ };
++
++ psci {
++ compatible = "arm,psci-0.2";
++ method = "smc";
++ };
++
++ axi@81000000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x0 0x0 0x81000000 0x8000>;
++
++ gic: interrupt-controller@1000 {
++ compatible = "arm,gic-400";
++ #interrupt-cells = <3>;
++ interrupt-controller;
++ interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
++ reg = <0x1000 0x1000>,
++ <0x2000 0x2000>,
++ <0x4000 0x2000>,
++ <0x6000 0x2000>;
++ };
++ };
++
++ bus@ff800000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x0 0x0 0xff800000 0x800000>;
++
++ uart0: serial@12000 {
++ compatible = "arm,pl011", "arm,primecell";
++ reg = <0x12000 0x1000>;
++ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
++ clocks = <&uart_clk>, <&uart_clk>;
++ clock-names = "uartclk", "apb_pclk";
++ status = "disabled";
++ };
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm6858.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm6858.dtsi
+new file mode 100644
+index 0000000000000..29a880c6c8588
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm6858.dtsi
+@@ -0,0 +1,121 @@
++// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
++/*
++ * Copyright 2022 Broadcom Ltd.
++ */
++
++#include <dt-bindings/interrupt-controller/irq.h>
++#include <dt-bindings/interrupt-controller/arm-gic.h>
++
++/ {
++ compatible = "brcm,bcm6858", "brcm,bcmbca";
++ #address-cells = <2>;
++ #size-cells = <2>;
++
++ interrupt-parent = <&gic>;
++
++ cpus {
++ #address-cells = <2>;
++ #size-cells = <0>;
++
++ B53_0: cpu@0 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x0>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_1: cpu@1 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x1>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_2: cpu@2 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x2>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++
++ B53_3: cpu@3 {
++ compatible = "brcm,brahma-b53";
++ device_type = "cpu";
++ reg = <0x0 0x3>;
++ next-level-cache = <&L2_0>;
++ enable-method = "psci";
++ };
++ L2_0: l2-cache0 {
++ compatible = "cache";
++ };
++ };
++
++ timer {
++ compatible = "arm,armv8-timer";
++ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
++ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
++ };
++
++ pmu: pmu {
++ compatible = "arm,armv8-pmuv3";
++ interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-affinity = <&B53_0>, <&B53_1>,
++ <&B53_2>, <&B53_3>;
++ };
++
++ clocks: clocks {
++ periph_clk:periph-clk {
++ compatible = "fixed-clock";
++ #clock-cells = <0>;
++ clock-frequency = <200000000>;
++ };
++ };
++
++ psci {
++ compatible = "arm,psci-0.2";
++ method = "smc";
++ };
++
++ axi@81000000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x0 0x0 0x81000000 0x8000>;
++
++ gic: interrupt-controller@1000 {
++ compatible = "arm,gic-400";
++ #interrupt-cells = <3>;
++ interrupt-controller;
++ reg = <0x1000 0x1000>, /* GICD */
++ <0x2000 0x2000>, /* GICC */
++ <0x4000 0x2000>, /* GICH */
++ <0x6000 0x2000>; /* GICV */
++ interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) |
++ IRQ_TYPE_LEVEL_HIGH)>;
++ };
++ };
++
++ bus@ff800000 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x0 0x0 0xff800000 0x62000>;
++
++ uart0: serial@640 {
++ compatible = "brcm,bcm6345-uart";
++ reg = <0x640 0x18>;
++ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
++ clocks = <&periph_clk>;
++ clock-names = "refclk";
++ status = "disabled";
++ };
++ };
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm94912.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm94912.dts
+new file mode 100644
+index 0000000000000..a3623e6f6919c
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm94912.dts
+@@ -0,0 +1,30 @@
++// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
++/*
++ * Copyright 2022 Broadcom Ltd.
++ */
++
++/dts-v1/;
++
++#include "bcm4912.dtsi"
++
++/ {
++ model = "Broadcom BCM94912 Reference Board";
++ compatible = "brcm,bcm94912", "brcm,bcm4912", "brcm,bcmbca";
++
++ aliases {
++ serial0 = &uart0;
++ };
++
++ chosen {
++ stdout-path = "serial0:115200n8";
++ };
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x0 0x0 0x0 0x08000000>;
++ };
++};
++
++&uart0 {
++ status = "okay";
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm963158.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm963158.dts
+new file mode 100644
+index 0000000000000..eba07e0b1ca6f
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm963158.dts
+@@ -0,0 +1,30 @@
++// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
++/*
++ * Copyright 2022 Broadcom Ltd.
++ */
++
++/dts-v1/;
++
++#include "bcm63158.dtsi"
++
++/ {
++ model = "Broadcom BCM963158 Reference Board";
++ compatible = "brcm,bcm963158", "brcm,bcm63158", "brcm,bcmbca";
++
++ aliases {
++ serial0 = &uart0;
++ };
++
++ chosen {
++ stdout-path = "serial0:115200n8";
++ };
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x0 0x0 0x0 0x08000000>;
++ };
++};
++
++&uart0 {
++ status = "okay";
++};
+diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm96858.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm96858.dts
+new file mode 100644
+index 0000000000000..0cbf582f5d545
+--- /dev/null
++++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm96858.dts
+@@ -0,0 +1,30 @@
++// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
++/*
++ * Copyright 2022 Broadcom Ltd.
++ */
++
++/dts-v1/;
++
++#include "bcm6858.dtsi"
++
++/ {
++ model = "Broadcom BCM96858 Reference Board";
++ compatible = "brcm,bcm96858", "brcm,bcm6858", "brcm,bcmbca";
++
++ aliases {
++ serial0 = &uart0;
++ };
++
++ chosen {
++ stdout-path = "serial0:115200n8";
++ };
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x0 0x0 0x0 0x08000000>;
++ };
++};
++
++&uart0 {
++ status = "okay";
++};
+diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts
+index ec19fbf928a14..12a4b1c03390c 100644
+--- a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts
++++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts
+@@ -111,8 +111,8 @@
+ compatible = "silabs,si3226x";
+ reg = <0>;
+ spi-max-frequency = <5000000>;
+- spi-cpha = <1>;
+- spi-cpol = <1>;
++ spi-cpha;
++ spi-cpol;
+ pl022,hierarchy = <0>;
+ pl022,interface = <0>;
+ pl022,slave-tx-disable = <0>;
+@@ -135,8 +135,8 @@
+ at25,byte-len = <0x8000>;
+ at25,addr-mode = <2>;
+ at25,page-size = <64>;
+- spi-cpha = <1>;
+- spi-cpol = <1>;
++ spi-cpha;
++ spi-cpol;
+ pl022,hierarchy = <0>;
+ pl022,interface = <0>;
+ pl022,slave-tx-disable = <0>;
+diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
+index 2cfeaf3b0a876..8c218689fef70 100644
+--- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
++++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
+@@ -687,7 +687,7 @@
+ };
+ };
+
+- sata: ahci@663f2000 {
++ sata: sata@663f2000 {
+ compatible = "brcm,iproc-ahci", "generic-ahci";
+ reg = <0x663f2000 0x1000>;
+ dma-coherent;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts
+index e22c5e77fecdc..9615f3b9ee608 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts
+@@ -110,7 +110,7 @@
+ &i2c0 {
+ status = "okay";
+
+- pca9547@77 {
++ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts
+index 79f155dedb2d0..e662677a6e28f 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts
+@@ -15,6 +15,7 @@
+ compatible = "fsl,ls1012a-rdb", "fsl,ls1012a";
+
+ aliases {
++ serial0 = &duart0;
+ mmc0 = &esdhc0;
+ mmc1 = &esdhc1;
+ };
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
+index bfd14b64567e4..2f92e62ecafe9 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
+@@ -272,11 +272,6 @@
+ vcc-supply = <&sb_3v3>;
+ };
+
+- rtc@51 {
+- compatible = "nxp,pcf2129";
+- reg = <0x51>;
+- };
+-
+ eeprom@56 {
+ compatible = "atmel,24c512";
+ reg = <0x56>;
+@@ -318,6 +313,15 @@
+
+ };
+
++&i2c1 {
++ status = "okay";
++
++ rtc@51 {
++ compatible = "nxp,pcf2129";
++ reg = <0x51>;
++ };
++};
++
+ &enetc_port1 {
+ phy-handle = <&qds_phy1>;
+ phy-connection-type = "rgmii-id";
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
+index fea167d222cfe..14856bc79b221 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts
+@@ -70,7 +70,7 @@
+ &i2c0 {
+ status = "okay";
+
+- pca9547@77 {
++ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+index 01b01e3204118..35d1939e690b0 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+@@ -536,9 +536,9 @@
+ clock-names = "i2c";
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(1)>;
+- dmas = <&edma0 1 39>,
+- <&edma0 1 38>;
+- dma-names = "tx", "rx";
++ dmas = <&edma0 1 38>,
++ <&edma0 1 39>;
++ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
+index eec62c63dafe2..9ee9928f71b49 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts
+@@ -76,7 +76,7 @@
+ &i2c0 {
+ status = "okay";
+
+- pca9547@77 {
++ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+index 687fea6d8afa4..4e7bd04d97984 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+@@ -499,9 +499,9 @@
+ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(2)>;
+- dmas = <&edma0 1 39>,
+- <&edma0 1 38>;
+- dma-names = "tx", "rx";
++ dmas = <&edma0 1 38>,
++ <&edma0 1 39>;
++ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts
+index 41d8b15f25a54..aa52ff73ff9e0 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts
+@@ -53,7 +53,7 @@
+ &i2c0 {
+ status = "okay";
+
+- i2c-switch@77 {
++ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
+index 1bfbce69cc8b7..ee8e932628d17 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts
+@@ -136,7 +136,7 @@
+ &i2c0 {
+ status = "okay";
+
+- i2c-switch@77 {
++ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts
+index 3063851c2fb91..a9c6682a3955e 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts
+@@ -38,7 +38,6 @@
+ powerdn {
+ label = "External Power Down";
+ gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
+- interrupts = <&gpio1 17 IRQ_TYPE_EDGE_FALLING>;
+ linux,code = <KEY_POWER>;
+ };
+
+@@ -46,7 +45,6 @@
+ admin {
+ label = "ADMIN button";
+ gpios = <&gpio3 8 GPIO_ACTIVE_HIGH>;
+- interrupts = <&gpio3 8 IRQ_TYPE_EDGE_RISING>;
+ linux,code = <KEY_WPS_BUTTON>;
+ };
+ };
+@@ -247,7 +245,7 @@
+ &i2c3 {
+ status = "okay";
+
+- i2c-switch@70 {
++ i2c-mux@70 {
+ compatible = "nxp,pca9540";
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+index f85e437f80b73..63441028622a6 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+@@ -758,6 +758,9 @@
+ little-endian;
+ #address-cells = <1>;
+ #size-cells = <0>;
++ clock-frequency = <2500000>;
++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
++ QORIQ_CLK_PLL_DIV(1)>;
+ status = "disabled";
+ };
+
+@@ -767,6 +770,9 @@
+ little-endian;
+ #address-cells = <1>;
+ #size-cells = <0>;
++ clock-frequency = <2500000>;
++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
++ QORIQ_CLK_PLL_DIV(1)>;
+ status = "disabled";
+ };
+
+@@ -847,7 +853,7 @@
+ };
+
+ cluster1_core0_watchdog: wdt@c000000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc000000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+@@ -857,7 +863,7 @@
+ };
+
+ cluster1_core1_watchdog: wdt@c010000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc010000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+@@ -867,7 +873,7 @@
+ };
+
+ cluster1_core2_watchdog: wdt@c020000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc020000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+@@ -877,7 +883,7 @@
+ };
+
+ cluster1_core3_watchdog: wdt@c030000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc030000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+@@ -887,7 +893,7 @@
+ };
+
+ cluster2_core0_watchdog: wdt@c100000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc100000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+@@ -897,7 +903,7 @@
+ };
+
+ cluster2_core1_watchdog: wdt@c110000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc110000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+@@ -907,7 +913,7 @@
+ };
+
+ cluster2_core2_watchdog: wdt@c120000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc120000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+@@ -917,7 +923,7 @@
+ };
+
+ cluster2_core3_watchdog: wdt@c130000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc130000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>,
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
+index 10d2fe0919651..8d96d18c3697a 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi
+@@ -44,7 +44,7 @@
+
+ &i2c0 {
+ status = "okay";
+- pca9547@77 {
++ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi
+index 4b71c4fcb35f6..787e408da0024 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi
+@@ -44,7 +44,7 @@
+
+ &i2c0 {
+ status = "okay";
+- pca9547@75 {
++ i2c-mux@75 {
+ compatible = "nxp,pca9547";
+ reg = <0x75>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+index 801ba9612d361..12e59777363fe 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+@@ -387,7 +387,7 @@
+ };
+
+ cluster1_core0_watchdog: wdt@c000000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc000000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -397,7 +397,7 @@
+ };
+
+ cluster1_core1_watchdog: wdt@c010000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc010000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -407,7 +407,7 @@
+ };
+
+ cluster2_core0_watchdog: wdt@c100000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc100000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -417,7 +417,7 @@
+ };
+
+ cluster2_core1_watchdog: wdt@c110000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc110000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -427,7 +427,7 @@
+ };
+
+ cluster3_core0_watchdog: wdt@c200000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc200000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -437,7 +437,7 @@
+ };
+
+ cluster3_core1_watchdog: wdt@c210000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc210000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -447,7 +447,7 @@
+ };
+
+ cluster4_core0_watchdog: wdt@c300000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc300000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -457,7 +457,7 @@
+ };
+
+ cluster4_core1_watchdog: wdt@c310000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xc310000 0x0 0x1000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>,
+@@ -525,6 +525,9 @@
+ little-endian;
+ #address-cells = <1>;
+ #size-cells = <0>;
++ clock-frequency = <2500000>;
++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
++ QORIQ_CLK_PLL_DIV(2)>;
+ status = "disabled";
+ };
+
+@@ -534,6 +537,9 @@
+ little-endian;
+ #address-cells = <1>;
+ #size-cells = <0>;
++ clock-frequency = <2500000>;
++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
++ QORIQ_CLK_PLL_DIV(2)>;
+ status = "disabled";
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi
+index afb455210bd07..d32a52ab00a42 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi
+@@ -54,7 +54,7 @@
+ &i2c0 {
+ status = "okay";
+
+- i2c-switch@77 {
++ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+index c4b1a59ba424b..1bc7f538f6905 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
++++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+@@ -719,7 +719,7 @@
+ clock-names = "i2c";
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>;
+- scl-gpio = <&gpio2 15 GPIO_ACTIVE_HIGH>;
++ scl-gpios = <&gpio2 15 GPIO_ACTIVE_HIGH>;
+ status = "disabled";
+ };
+
+@@ -768,7 +768,7 @@
+ clock-names = "i2c";
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(16)>;
+- scl-gpio = <&gpio2 16 GPIO_ACTIVE_HIGH>;
++ scl-gpios = <&gpio2 16 GPIO_ACTIVE_HIGH>;
+ status = "disabled";
+ };
+
+@@ -1369,6 +1369,9 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+ little-endian;
++ clock-frequency = <2500000>;
++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
++ QORIQ_CLK_PLL_DIV(2)>;
+ status = "disabled";
+ };
+
+@@ -1379,6 +1382,9 @@
+ little-endian;
+ #address-cells = <1>;
+ #size-cells = <0>;
++ clock-frequency = <2500000>;
++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
++ QORIQ_CLK_PLL_DIV(2)>;
+ status = "disabled";
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+index a79f42a9618ec..639220dbff008 100644
+--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+@@ -38,9 +38,9 @@ conn_subsys: bus@5b000000 {
+ interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0x5b010000 0x10000>;
+ clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
+- <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+- <&sdhc0_lpcg IMX_LPCG_CLK_0>;
+- clock-names = "ipg", "per", "ahb";
++ <&sdhc0_lpcg IMX_LPCG_CLK_0>,
++ <&sdhc0_lpcg IMX_LPCG_CLK_5>;
++ clock-names = "ipg", "ahb", "per";
+ power-domains = <&pd IMX_SC_R_SDHC_0>;
+ status = "disabled";
+ };
+@@ -49,9 +49,9 @@ conn_subsys: bus@5b000000 {
+ interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0x5b020000 0x10000>;
+ clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
+- <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+- <&sdhc1_lpcg IMX_LPCG_CLK_0>;
+- clock-names = "ipg", "per", "ahb";
++ <&sdhc1_lpcg IMX_LPCG_CLK_0>,
++ <&sdhc1_lpcg IMX_LPCG_CLK_5>;
++ clock-names = "ipg", "ahb", "per";
+ power-domains = <&pd IMX_SC_R_SDHC_1>;
+ fsl,tuning-start-tap = <20>;
+ fsl,tuning-step= <2>;
+@@ -62,9 +62,9 @@ conn_subsys: bus@5b000000 {
+ interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0x5b030000 0x10000>;
+ clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
+- <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+- <&sdhc2_lpcg IMX_LPCG_CLK_0>;
+- clock-names = "ipg", "per", "ahb";
++ <&sdhc2_lpcg IMX_LPCG_CLK_0>,
++ <&sdhc2_lpcg IMX_LPCG_CLK_5>;
++ clock-names = "ipg", "ahb", "per";
+ power-domains = <&pd IMX_SC_R_SDHC_2>;
+ status = "disabled";
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+index 960a802b8b6ee..c33892711138f 100644
+--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+@@ -26,6 +26,8 @@ dma_subsys: bus@5a000000 {
+ clocks = <&uart0_lpcg IMX_LPCG_CLK_4>,
+ <&uart0_lpcg IMX_LPCG_CLK_0>;
+ clock-names = "ipg", "baud";
++ assigned-clocks = <&clk IMX_SC_R_UART_0 IMX_SC_PM_CLK_PER>;
++ assigned-clock-rates = <80000000>;
+ power-domains = <&pd IMX_SC_R_UART_0>;
+ status = "disabled";
+ };
+@@ -36,6 +38,8 @@ dma_subsys: bus@5a000000 {
+ clocks = <&uart1_lpcg IMX_LPCG_CLK_4>,
+ <&uart1_lpcg IMX_LPCG_CLK_0>;
+ clock-names = "ipg", "baud";
++ assigned-clocks = <&clk IMX_SC_R_UART_1 IMX_SC_PM_CLK_PER>;
++ assigned-clock-rates = <80000000>;
+ power-domains = <&pd IMX_SC_R_UART_1>;
+ status = "disabled";
+ };
+@@ -46,6 +50,8 @@ dma_subsys: bus@5a000000 {
+ clocks = <&uart2_lpcg IMX_LPCG_CLK_4>,
+ <&uart2_lpcg IMX_LPCG_CLK_0>;
+ clock-names = "ipg", "baud";
++ assigned-clocks = <&clk IMX_SC_R_UART_2 IMX_SC_PM_CLK_PER>;
++ assigned-clock-rates = <80000000>;
+ power-domains = <&pd IMX_SC_R_UART_2>;
+ status = "disabled";
+ };
+@@ -56,6 +62,8 @@ dma_subsys: bus@5a000000 {
+ clocks = <&uart3_lpcg IMX_LPCG_CLK_4>,
+ <&uart3_lpcg IMX_LPCG_CLK_0>;
+ clock-names = "ipg", "baud";
++ assigned-clocks = <&clk IMX_SC_R_UART_3 IMX_SC_PM_CLK_PER>;
++ assigned-clock-rates = <80000000>;
+ power-domains = <&pd IMX_SC_R_UART_3>;
+ status = "disabled";
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi
+index 6f5e63696ec0a..bb18354c10f08 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi
+@@ -70,7 +70,7 @@
+ &ecspi2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_espi2>;
+- cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
++ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ eeprom@0 {
+@@ -166,6 +166,7 @@
+ pinctrl-0 = <&pinctrl_uart3>;
+ assigned-clocks = <&clk IMX8MM_CLK_UART3>;
+ assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_80M>;
++ uart-has-rtscts;
+ status = "okay";
+ };
+
+@@ -185,7 +186,7 @@
+ MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82
+ MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82
+ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82
+- MX8MM_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41
++ MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41
+ >;
+ };
+
+@@ -236,6 +237,8 @@
+ fsl,pins = <
+ MX8MM_IOMUXC_ECSPI1_SCLK_UART3_DCE_RX 0x40
+ MX8MM_IOMUXC_ECSPI1_MOSI_UART3_DCE_TX 0x40
++ MX8MM_IOMUXC_ECSPI1_MISO_UART3_DCE_CTS_B 0x40
++ MX8MM_IOMUXC_ECSPI1_SS0_UART3_DCE_RTS_B 0x40
+ >;
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
+index e033d0257b5a1..ff5324e94ee82 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
+@@ -136,7 +136,7 @@
+ rohm,reset-snvs-powered;
+
+ #clock-cells = <0>;
+- clocks = <&osc_32k 0>;
++ clocks = <&osc_32k>;
+ clock-output-names = "clk-32k-out";
+
+ regulators {
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts
+index e99e7644ff392..49d7470812eef 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts
+@@ -123,8 +123,8 @@
+
+ ethphy: ethernet-phy@0 {
+ reg = <0>;
+- reset-assert-us = <100>;
+- reset-deassert-us = <100>;
++ reset-assert-us = <1>;
++ reset-deassert-us = <15000>;
+ reset-gpios = <&gpio4 27 GPIO_ACTIVE_LOW>;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
+index 74c09891600f2..0e8f0d7161ad0 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts
+@@ -214,7 +214,7 @@
+ pinctrl-0 = <&pinctrl_i2c3>;
+ status = "okay";
+
+- i2cmux@70 {
++ i2c-mux@70 {
+ compatible = "nxp,pca9540";
+ reg = <0x70>;
+ #address-cells = <1>;
+@@ -247,7 +247,7 @@
+ compatible = "wlf,wm8960";
+ reg = <0x1a>;
+ clocks = <&clk IMX8MM_CLK_SAI1_ROOT>;
+- clock-names = "mclk1";
++ clock-names = "mclk";
+ wlf,shared-lrclk;
+ #sound-dai-cells = <0>;
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h
+index a003e6af33533..56271abfb7e09 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h
++++ b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h
+@@ -601,7 +601,7 @@
+ #define MX8MM_IOMUXC_UART1_RXD_GPIO5_IO22 0x234 0x49C 0x000 0x5 0x0
+ #define MX8MM_IOMUXC_UART1_RXD_TPSMP_HDATA24 0x234 0x49C 0x000 0x7 0x0
+ #define MX8MM_IOMUXC_UART1_TXD_UART1_DCE_TX 0x238 0x4A0 0x000 0x0 0x0
+-#define MX8MM_IOMUXC_UART1_TXD_UART1_DTE_RX 0x238 0x4A0 0x4F4 0x0 0x0
++#define MX8MM_IOMUXC_UART1_TXD_UART1_DTE_RX 0x238 0x4A0 0x4F4 0x0 0x1
+ #define MX8MM_IOMUXC_UART1_TXD_ECSPI3_MOSI 0x238 0x4A0 0x000 0x1 0x0
+ #define MX8MM_IOMUXC_UART1_TXD_GPIO5_IO23 0x238 0x4A0 0x000 0x5 0x0
+ #define MX8MM_IOMUXC_UART1_TXD_TPSMP_HDATA25 0x238 0x4A0 0x000 0x7 0x0
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
+index 1dc9d187601c5..a0bd540f27d3d 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
+@@ -89,12 +89,12 @@
+ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+
+ ti,x-min = /bits/ 16 <125>;
+- touchscreen-size-x = /bits/ 16 <4008>;
++ touchscreen-size-x = <4008>;
+ ti,y-min = /bits/ 16 <282>;
+- touchscreen-size-y = /bits/ 16 <3864>;
++ touchscreen-size-y = <3864>;
+ ti,x-plate-ohms = /bits/ 16 <180>;
+- touchscreen-max-pressure = /bits/ 16 <255>;
+- touchscreen-average-samples = /bits/ 16 <10>;
++ touchscreen-max-pressure = <255>;
++ touchscreen-average-samples = <10>;
+ ti,debounce-tol = /bits/ 16 <3>;
+ ti,debounce-rep = /bits/ 16 <1>;
+ ti,settle-delay-usec = /bits/ 16 <150>;
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
+index 8e4a0ce99790b..7ea909a4c1d5e 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
+@@ -103,12 +103,14 @@
+
+ &usbotg1 {
+ dr_mode = "otg";
++ over-current-active-low;
+ vbus-supply = <&reg_usb_otg1_vbus>;
+ status = "okay";
+ };
+
+ &usbotg2 {
+ dr_mode = "host";
++ disable-over-current;
+ status = "okay";
+ };
+
+@@ -166,7 +168,7 @@
+ fsl,pins = <
+ MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
+ MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6
+- MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
++ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6
+ MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6
+ >;
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
+index b7c91bdc21dd9..806ee21651d1f 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
+@@ -139,12 +139,14 @@
+
+ &usbotg1 {
+ dr_mode = "otg";
++ over-current-active-low;
+ vbus-supply = <&reg_usb_otg1_vbus>;
+ status = "okay";
+ };
+
+ &usbotg2 {
+ dr_mode = "host";
++ disable-over-current;
+ vbus-supply = <&reg_usb_otg2_vbus>;
+ status = "okay";
+ };
+@@ -231,7 +233,7 @@
+ fsl,pins = <
+ MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
+ MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6
+- MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
++ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6
+ MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6
+ >;
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
+index d2ffd62a3bd46..942fed2eed643 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
+@@ -166,12 +166,14 @@
+
+ &usbotg1 {
+ dr_mode = "otg";
++ over-current-active-low;
+ vbus-supply = <&reg_usb_otg1_vbus>;
+ status = "okay";
+ };
+
+ &usbotg2 {
+ dr_mode = "host";
++ disable-over-current;
+ vbus-supply = <&reg_usb_otg2_vbus>;
+ status = "okay";
+ };
+@@ -280,7 +282,7 @@
+ fsl,pins = <
+ MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
+ MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6
+- MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
++ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6
+ MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6
+ >;
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
+index bafd5c8ea4e28..f7e41e5c2c7bc 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
+@@ -675,6 +675,7 @@
+ &usbotg2 {
+ dr_mode = "host";
+ vbus-supply = <&reg_usb2_vbus>;
++ over-current-active-low;
+ status = "okay";
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+index 2f632e8ca3880..2a67122c5624c 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+@@ -530,7 +530,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+- imx8mm_uid: unique-id@410 {
++ imx8mm_uid: unique-id@4 {
+ reg = <0x4 0x8>;
+ };
+
+@@ -1014,10 +1014,10 @@
+ clocks = <&clk IMX8MM_CLK_NAND_USDHC_BUS_RAWNAND_CLK>;
+ };
+
+- gpmi: nand-controller@33002000{
++ gpmi: nand-controller@33002000 {
+ compatible = "fsl,imx8mm-gpmi-nand", "fsl,imx7d-gpmi-nand";
+ #address-cells = <1>;
+- #size-cells = <1>;
++ #size-cells = <0>;
+ reg = <0x33002000 0x2000>, <0x33004000 0x4000>;
+ reg-names = "gpmi-nand", "bch";
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi
+index 376ca8ff72133..4fc22448e411f 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi
+@@ -81,7 +81,7 @@
+ &ecspi2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_espi2>;
+- cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
++ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ eeprom@0 {
+@@ -176,6 +176,7 @@
+ pinctrl-0 = <&pinctrl_uart3>;
+ assigned-clocks = <&clk IMX8MN_CLK_UART3>;
+ assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_80M>;
++ uart-has-rtscts;
+ status = "okay";
+ };
+
+@@ -202,7 +203,7 @@
+ MX8MN_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82
+ MX8MN_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82
+ MX8MN_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82
+- MX8MN_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41
++ MX8MN_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41
+ >;
+ };
+
+@@ -259,6 +260,8 @@
+ fsl,pins = <
+ MX8MN_IOMUXC_ECSPI1_SCLK_UART3_DCE_RX 0x40
+ MX8MN_IOMUXC_ECSPI1_MOSI_UART3_DCE_TX 0x40
++ MX8MN_IOMUXC_ECSPI1_MISO_UART3_DCE_CTS_B 0x40
++ MX8MN_IOMUXC_ECSPI1_SS0_UART3_DCE_RTS_B 0x40
+ >;
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
+index 7dfee715a2c4d..d8ce217c60166 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
+@@ -59,6 +59,10 @@
+ interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+ rohm,reset-snvs-powered;
+
++ #clock-cells = <0>;
++ clocks = <&osc_32k 0>;
++ clock-output-names = "clk-32k-out";
++
+ regulators {
+ buck1_reg: BUCK1 {
+ regulator-name = "buck1";
+diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+index b16c7caf34c11..faafefe562e4b 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+@@ -70,12 +70,12 @@
+ pendown-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+
+ ti,x-min = /bits/ 16 <125>;
+- touchscreen-size-x = /bits/ 16 <4008>;
++ touchscreen-size-x = <4008>;
+ ti,y-min = /bits/ 16 <282>;
+- touchscreen-size-y = /bits/ 16 <3864>;
++ touchscreen-size-y = <3864>;
+ ti,x-plate-ohms = /bits/ 16 <180>;
+- touchscreen-max-pressure = /bits/ 16 <255>;
+- touchscreen-average-samples = /bits/ 16 <10>;
++ touchscreen-max-pressure = <255>;
++ touchscreen-average-samples = <10>;
+ ti,debounce-tol = /bits/ 16 <3>;
+ ti,debounce-rep = /bits/ 16 <1>;
+ ti,settle-delay-usec = /bits/ 16 <150>;
+@@ -98,11 +98,17 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+- ethphy: ethernet-phy@4 {
++ ethphy: ethernet-phy@4 { /* AR8033 or ADIN1300 */
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <4>;
+ reset-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <10000>;
++ /*
++ * Deassert delay:
++ * ADIN1300 requires 5ms.
++ * AR8033 requires 1ms.
++ */
++ reset-deassert-us = <20000>;
+ };
+ };
+ };
+@@ -345,7 +351,7 @@
+ MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91
+ MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91
+ MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f
+- MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19
++ MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x159
+ >;
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+index da6c942fb7f9d..16a5efba17f39 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+@@ -263,8 +263,9 @@
+ ranges;
+
+ sai2: sai@30020000 {
+- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
++ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
+ reg = <0x30020000 0x10000>;
++ #sound-dai-cells = <0>;
+ interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX8MN_CLK_SAI2_IPG>,
+ <&clk IMX8MN_CLK_DUMMY>,
+@@ -277,8 +278,9 @@
+ };
+
+ sai3: sai@30030000 {
+- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
++ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
+ reg = <0x30030000 0x10000>;
++ #sound-dai-cells = <0>;
+ interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX8MN_CLK_SAI3_IPG>,
+ <&clk IMX8MN_CLK_DUMMY>,
+@@ -291,8 +293,9 @@
+ };
+
+ sai5: sai@30050000 {
+- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
++ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
+ reg = <0x30050000 0x10000>;
++ #sound-dai-cells = <0>;
+ interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX8MN_CLK_SAI5_IPG>,
+ <&clk IMX8MN_CLK_DUMMY>,
+@@ -307,8 +310,9 @@
+ };
+
+ sai6: sai@30060000 {
+- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
++ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
+ reg = <0x30060000 0x10000>;
++ #sound-dai-cells = <0>;
+ interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX8MN_CLK_SAI6_IPG>,
+ <&clk IMX8MN_CLK_DUMMY>,
+@@ -364,8 +368,9 @@
+ };
+
+ sai7: sai@300b0000 {
+- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
++ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
+ reg = <0x300b0000 0x10000>;
++ #sound-dai-cells = <0>;
+ interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk IMX8MN_CLK_SAI7_IPG>,
+ <&clk IMX8MN_CLK_DUMMY>,
+@@ -533,7 +538,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+- imx8mn_uid: unique-id@410 {
++ imx8mn_uid: unique-id@4 {
+ reg = <0x4 0x8>;
+ };
+
+@@ -998,7 +1003,7 @@
+ gpmi: nand-controller@33002000 {
+ compatible = "fsl,imx8mn-gpmi-nand", "fsl,imx7d-gpmi-nand";
+ #address-cells = <1>;
+- #size-cells = <1>;
++ #size-cells = <0>;
+ reg = <0x33002000 0x2000>, <0x33004000 0x4000>;
+ reg-names = "gpmi-nand", "bch";
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
+index 7b99fad6e4d6e..5c9fb39dd99e5 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
+@@ -285,21 +285,21 @@
+ &iomuxc {
+ pinctrl_eqos: eqosgrp {
+ fsl,pins = <
+- MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3
+- MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3
+- MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91
+- MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91
+- MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91
+- MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91
+- MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91
+- MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91
+- MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f
+- MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f
+- MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f
+- MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f
+- MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f
+- MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f
+- MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x19
++ MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2
++ MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2
++ MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90
++ MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90
++ MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90
++ MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90
++ MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90
++ MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90
++ MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16
++ MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16
++ MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16
++ MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16
++ MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16
++ MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16
++ MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x10
+ >;
+ };
+
+@@ -351,21 +351,21 @@
+
+ pinctrl_gpio_led: gpioledgrp {
+ fsl,pins = <
+- MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x19
++ MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x140
+ >;
+ };
+
+ pinctrl_i2c1: i2c1grp {
+ fsl,pins = <
+- MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3
+- MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3
++ MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2
++ MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2
+ >;
+ };
+
+ pinctrl_i2c3: i2c3grp {
+ fsl,pins = <
+- MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3
+- MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3
++ MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2
++ MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2
+ >;
+ };
+
+@@ -377,20 +377,20 @@
+
+ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
+ fsl,pins = <
+- MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41
++ MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40
+ >;
+ };
+
+ pinctrl_uart2: uart2grp {
+ fsl,pins = <
+- MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49
+- MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49
++ MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x140
++ MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x140
+ >;
+ };
+
+ pinctrl_usb1_vbus: usb1grp {
+ fsl,pins = <
+- MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x19
++ MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x10
+ >;
+ };
+
+@@ -402,7 +402,7 @@
+ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0
+ MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0
+ MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0
+- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
++ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
+ >;
+ };
+
+@@ -414,7 +414,7 @@
+ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4
+ MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4
+ MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4
+- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
++ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
+ >;
+ };
+
+@@ -426,7 +426,7 @@
+ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6
+ MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6
+ MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6
+- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
++ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
+ >;
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts
+index 984a6b9ded8d7..6aa720bafe289 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts
+@@ -116,48 +116,48 @@
+ &iomuxc {
+ pinctrl_eqos: eqosgrp {
+ fsl,pins = <
+- MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3
+- MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3
+- MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91
+- MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91
+- MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91
+- MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91
+- MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91
+- MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91
+- MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f
+- MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f
+- MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f
+- MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f
+- MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f
+- MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f
++ MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2
++ MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2
++ MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90
++ MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90
++ MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90
++ MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90
++ MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90
++ MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90
++ MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16
++ MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16
++ MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16
++ MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16
++ MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16
++ MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16
+ MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x10
+ >;
+ };
+
+ pinctrl_i2c2: i2c2grp {
+ fsl,pins = <
+- MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3
+- MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3
++ MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2
++ MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2
+ >;
+ };
+
+ pinctrl_i2c2_gpio: i2c2gpiogrp {
+ fsl,pins = <
+- MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e3
+- MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e3
++ MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e2
++ MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e2
+ >;
+ };
+
+ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
+ fsl,pins = <
+- MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41
++ MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40
+ >;
+ };
+
+ pinctrl_uart1: uart1grp {
+ fsl,pins = <
+- MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x49
+- MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x49
++ MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x40
++ MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x40
+ >;
+ };
+
+@@ -175,7 +175,7 @@
+ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0
+ MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0
+ MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0
+- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
++ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
+ >;
+ };
+
+@@ -187,7 +187,7 @@
+ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4
+ MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4
+ MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4
+- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
++ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
+ >;
+ };
+
+@@ -199,7 +199,7 @@
+ MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6
+ MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6
+ MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6
+- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
++ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
+ >;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
+index fc178eebf8aa4..8e189d8997941 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
+@@ -98,7 +98,6 @@
+
+ regulators {
+ buck1: BUCK1 {
+- regulator-compatible = "BUCK1";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <2187500>;
+ regulator-boot-on;
+@@ -107,7 +106,6 @@
+ };
+
+ buck2: BUCK2 {
+- regulator-compatible = "BUCK2";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <2187500>;
+ regulator-boot-on;
+@@ -116,7 +114,6 @@
+ };
+
+ buck4: BUCK4 {
+- regulator-compatible = "BUCK4";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <3400000>;
+ regulator-boot-on;
+@@ -124,7 +121,6 @@
+ };
+
+ buck5: BUCK5 {
+- regulator-compatible = "BUCK5";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <3400000>;
+ regulator-boot-on;
+@@ -132,7 +128,6 @@
+ };
+
+ buck6: BUCK6 {
+- regulator-compatible = "BUCK6";
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <3400000>;
+ regulator-boot-on;
+@@ -140,7 +135,6 @@
+ };
+
+ ldo1: LDO1 {
+- regulator-compatible = "LDO1";
+ regulator-min-microvolt = <1600000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+@@ -148,7 +142,6 @@
+ };
+
+ ldo2: LDO2 {
+- regulator-compatible = "LDO2";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1150000>;
+ regulator-boot-on;
+@@ -156,7 +149,6 @@
+ };
+
+ ldo3: LDO3 {
+- regulator-compatible = "LDO3";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+@@ -164,7 +156,6 @@
+ };
+
+ ldo4: LDO4 {
+- regulator-compatible = "LDO4";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+@@ -172,7 +163,6 @@
+ };
+
+ ldo5: LDO5 {
+- regulator-compatible = "LDO5";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+index 9b07b26230a11..ab670b5d641b1 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+@@ -358,7 +358,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+- imx8mp_uid: unique-id@420 {
++ imx8mp_uid: unique-id@8 {
+ reg = <0x8 0x8>;
+ };
+
+@@ -912,7 +912,7 @@
+ interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
+ phys = <&usb3_phy0>, <&usb3_phy0>;
+ phy-names = "usb2-phy", "usb3-phy";
+- snps,dis-u2-freeclk-exists-quirk;
++ snps,gfladj-refclk-lpm-sel-quirk;
+ };
+
+ };
+@@ -953,7 +953,7 @@
+ interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
+ phys = <&usb3_phy1>, <&usb3_phy1>;
+ phy-names = "usb2-phy", "usb3-phy";
+- snps,dis-u2-freeclk-exists-quirk;
++ snps,gfladj-refclk-lpm-sel-quirk;
+ };
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
+index 460ef0d86540a..c86cd20d4e709 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
+@@ -967,6 +967,7 @@
+ interrupts = <20 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_gauge>;
++ power-supplies = <&bq25895>;
+ maxim,over-heat-temp = <700>;
+ maxim,over-volt = <4500>;
+ maxim,rsns-microohm = <5000>;
+diff --git a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
+index f70fb32b96b0c..cf14ab5f7404c 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts
+@@ -133,7 +133,7 @@
+ pinctrl-0 = <&pinctrl_i2c1>;
+ status = "okay";
+
+- i2cmux@70 {
++ i2c-mux@70 {
+ compatible = "nxp,pca9546";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c1_pca9546>;
+@@ -216,7 +216,7 @@
+ pinctrl-0 = <&pinctrl_i2c4>;
+ status = "okay";
+
+- pca9546: i2cmux@70 {
++ pca9546: i2c-mux@70 {
+ compatible = "nxp,pca9546";
+ reg = <0x70>;
+ #address-cells = <1>;
+diff --git a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts
+index 5d5aa6537225f..6e6182709d220 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts
+@@ -339,7 +339,7 @@
+ bus-width = <4>;
+ non-removable;
+ no-sd;
+- no-emmc;
++ no-mmc;
+ status = "okay";
+
+ brcmf: wifi@1 {
+@@ -359,7 +359,7 @@
+ cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>;
+ bus-width = <4>;
+ no-sdio;
+- no-emmc;
++ no-mmc;
+ disable-wp;
+ status = "okay";
+ };
+diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+index 4066b16126552..2a698c5b87bcd 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+@@ -524,11 +524,9 @@
+ <&clk IMX8MQ_VIDEO_PLL1>,
+ <&clk IMX8MQ_VIDEO_PLL1_OUT>;
+ assigned-clock-rates = <0>, <0>, <0>, <594000000>;
+- interconnects = <&noc IMX8MQ_ICM_LCDIF &noc IMX8MQ_ICS_DRAM>;
+- interconnect-names = "dram";
+ status = "disabled";
+
+- port@0 {
++ port {
+ lcdif_mipi_dsi: endpoint {
+ remote-endpoint = <&mipi_dsi_lcdif_in>;
+ };
+@@ -559,7 +557,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+- imx8mq_uid: soc-uid@410 {
++ imx8mq_uid: soc-uid@4 {
+ reg = <0x4 0x8>;
+ };
+
+@@ -1125,8 +1123,8 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+- port@0 {
+- reg = <0>;
++ port@1 {
++ reg = <1>;
+
+ csi1_mipi_ep: endpoint {
+ remote-endpoint = <&csi1_ep>;
+@@ -1177,8 +1175,8 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+- port@0 {
+- reg = <0>;
++ port@1 {
++ reg = <1>;
+
+ csi2_mipi_ep: endpoint {
+ remote-endpoint = <&csi2_ep>;
+diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
+index ce9d3f0b98fc0..607cd6b4e9721 100644
+--- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
++++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
+@@ -82,8 +82,8 @@
+ pinctrl-0 = <&pinctrl_usdhc2>;
+ bus-width = <4>;
+ vmmc-supply = <&reg_usdhc2_vmmc>;
+- cd-gpios = <&lsio_gpio4 22 GPIO_ACTIVE_LOW>;
+- wp-gpios = <&lsio_gpio4 21 GPIO_ACTIVE_HIGH>;
++ cd-gpios = <&lsio_gpio5 22 GPIO_ACTIVE_LOW>;
++ wp-gpios = <&lsio_gpio5 21 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8qm.dtsi b/arch/arm64/boot/dts/freescale/imx8qm.dtsi
+index aebbe2b84aa13..a143f38bc78bd 100644
+--- a/arch/arm64/boot/dts/freescale/imx8qm.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8qm.dtsi
+@@ -155,7 +155,7 @@
+ };
+
+ clk: clock-controller {
+- compatible = "fsl,imx8qxp-clk", "fsl,scu-clk";
++ compatible = "fsl,imx8qm-clk", "fsl,scu-clk";
+ #clock-cells = <2>;
+ };
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
+index 863232a47004c..4497763d57ccf 100644
+--- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
++++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts
+@@ -61,7 +61,7 @@
+ pinctrl-0 = <&pinctrl_lpi2c1 &pinctrl_ioexp_rst>;
+ status = "okay";
+
+- i2c-switch@71 {
++ i2c-mux@71 {
+ compatible = "nxp,pca9646", "nxp,pca9546";
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+index 2d5c1a348716a..6eabec2602e23 100644
+--- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
++++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+@@ -1087,7 +1087,7 @@
+ };
+
+ watchdog0: watchdog@e8a06000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xe8a06000 0x0 0x1000>;
+ interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&crg_ctrl HI3660_OSC32K>,
+@@ -1096,7 +1096,7 @@
+ };
+
+ watchdog1: watchdog@e8a07000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xe8a07000 0x0 0x1000>;
+ interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&crg_ctrl HI3660_OSC32K>,
+diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+index dde9371dc5451..e4860b8a638ec 100644
+--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
++++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+@@ -840,7 +840,7 @@
+ };
+
+ watchdog0: watchdog@f8005000 {
+- compatible = "arm,sp805-wdt", "arm,primecell";
++ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0 0xf8005000 0x0 0x1000>;
+ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ao_ctrl HI6220_WDT0_PCLK>,
+diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
+index 163f33b46e4f7..f4270cf189962 100644
+--- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
++++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi
+@@ -502,7 +502,7 @@
+ };
+
+ usb0: usb@ffb00000 {
+- compatible = "snps,dwc2";
++ compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2";
+ reg = <0xffb00000 0x40000>;
+ interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+ phys = <&usbphy0>;
+@@ -515,7 +515,7 @@
+ };
+
+ usb1: usb@ffb40000 {
+- compatible = "snps,dwc2";
++ compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2";
+ reg = <0xffb40000 0x40000>;
+ interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+ phys = <&usbphy0>;
+@@ -628,7 +628,7 @@
+ };
+
+ qspi: spi@ff8d2000 {
+- compatible = "cdns,qspi-nor";
++ compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xff8d2000 0x100>,
+diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts
+index c5eb3604dd5b7..119db6b541b7b 100644
+--- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts
++++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts
+@@ -71,10 +71,6 @@
+
+ &spi0 {
+ flash@0 {
+- spi-max-frequency = <108000000>;
+- spi-rx-bus-width = <4>;
+- spi-tx-bus-width = <4>;
+-
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+@@ -112,7 +108,6 @@
+
+ &usb3 {
+ usb-phy = <&usb3_phy>;
+- status = "disabled";
+ };
+
+ &mdio {
+diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+index 04da07ae44208..b276dd77df83c 100644
+--- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
++++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+@@ -18,6 +18,7 @@
+
+ aliases {
+ spi0 = &spi0;
++ ethernet0 = &eth0;
+ ethernet1 = &eth1;
+ mmc0 = &sdhci0;
+ mmc1 = &sdhci1;
+@@ -124,9 +125,12 @@
+ /delete-property/ mrvl,i2c-fast-mode;
+ status = "okay";
+
++ /* MCP7940MT-I/MNY RTC */
+ rtc@6f {
+ compatible = "microchip,mcp7940x";
+ reg = <0x6f>;
++ interrupt-parent = <&gpiosb>;
++ interrupts = <5 0>; /* GPIO2_5 */
+ };
+ };
+
+@@ -138,7 +142,9 @@
+ /*
+ * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property
+ * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and
+- * 2 size cells and also expects that the second range starts at 16 MB offset. If these
++ * 2 size cells and also expects that the second range starts at 16 MB offset. Also it
++ * expects that first range uses same address for PCI (child) and CPU (parent) cells (so
++ * no remapping) and that this address is the lowest from all specified ranges. If these
+ * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address
+ * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window
+ * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB.
+@@ -147,6 +153,9 @@
+ * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7
+ * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf
+ * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33
++ * Bug related to requirement of same child and parent addresses for first range is fixed
++ * in U-Boot version 2022.04 by following commit:
++ * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17
+ */
+ #address-cells = <3>;
+ #size-cells = <2>;
+diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+index 9acc5d2b5a002..0adc194e46d15 100644
+--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
++++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+@@ -497,7 +497,7 @@
+ * (totaling 127 MiB) for MEM.
+ */
+ ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x07f00000 /* Port 0 MEM */
+- 0x81000000 0 0xefff0000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */
++ 0x81000000 0 0x00000000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */
+ interrupt-map-mask = <0 0 0 7>;
+ interrupt-map = <0 0 0 1 &pcie_intc 0>,
+ <0 0 0 2 &pcie_intc 1>,
+diff --git a/arch/arm64/boot/dts/marvell/cn9130.dtsi b/arch/arm64/boot/dts/marvell/cn9130.dtsi
+index a2b7e5ec979d3..327b04134134f 100644
+--- a/arch/arm64/boot/dts/marvell/cn9130.dtsi
++++ b/arch/arm64/boot/dts/marvell/cn9130.dtsi
+@@ -11,6 +11,13 @@
+ model = "Marvell Armada CN9130 SoC";
+ compatible = "marvell,cn9130", "marvell,armada-ap807-quad",
+ "marvell,armada-ap807";
++
++ aliases {
++ gpio1 = &cp0_gpio1;
++ gpio2 = &cp0_gpio2;
++ spi1 = &cp0_spi0;
++ spi2 = &cp0_spi1;
++ };
+ };
+
+ /*
+@@ -35,3 +42,11 @@
+ #undef CP11X_PCIE0_BASE
+ #undef CP11X_PCIE1_BASE
+ #undef CP11X_PCIE2_BASE
++
++&cp0_gpio1 {
++ status = "okay";
++};
++
++&cp0_gpio2 {
++ status = "okay";
++};
+diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
+index 7d369fdd3117f..9d20cabf4f699 100644
+--- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
++++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
+@@ -26,14 +26,14 @@
+ stdout-path = "serial0:921600n8";
+ };
+
+- cpus_fixed_vproc0: fixedregulator@0 {
++ cpus_fixed_vproc0: regulator-vproc-buck0 {
+ compatible = "regulator-fixed";
+ regulator-name = "vproc_buck0";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ };
+
+- cpus_fixed_vproc1: fixedregulator@1 {
++ cpus_fixed_vproc1: regulator-vproc-buck1 {
+ compatible = "regulator-fixed";
+ regulator-name = "vproc_buck1";
+ regulator-min-microvolt = <1000000>;
+@@ -50,7 +50,7 @@
+ id-gpio = <&pio 14 GPIO_ACTIVE_HIGH>;
+ };
+
+- usb_p0_vbus: regulator@2 {
++ usb_p0_vbus: regulator-usb-p0-vbus {
+ compatible = "regulator-fixed";
+ regulator-name = "p0_vbus";
+ regulator-min-microvolt = <5000000>;
+@@ -59,7 +59,7 @@
+ enable-active-high;
+ };
+
+- usb_p1_vbus: regulator@3 {
++ usb_p1_vbus: regulator-usb-p1-vbus {
+ compatible = "regulator-fixed";
+ regulator-name = "p1_vbus";
+ regulator-min-microvolt = <5000000>;
+@@ -68,7 +68,7 @@
+ enable-active-high;
+ };
+
+- usb_p2_vbus: regulator@4 {
++ usb_p2_vbus: regulator-usb-p2-vbus {
+ compatible = "regulator-fixed";
+ regulator-name = "p2_vbus";
+ regulator-min-microvolt = <5000000>;
+@@ -77,7 +77,7 @@
+ enable-active-high;
+ };
+
+- usb_p3_vbus: regulator@5 {
++ usb_p3_vbus: regulator-usb-p3-vbus {
+ compatible = "regulator-fixed";
+ regulator-name = "p3_vbus";
+ regulator-min-microvolt = <5000000>;
+diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
+index a9cca9c146fdc..993a03d7fff14 100644
+--- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
+@@ -160,70 +160,70 @@
+ #clock-cells = <0>;
+ };
+
+- clk26m: oscillator@0 {
++ clk26m: oscillator-26m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ clock-output-names = "clk26m";
+ };
+
+- clk32k: oscillator@1 {
++ clk32k: oscillator-32k {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "clk32k";
+ };
+
+- clkfpc: oscillator@2 {
++ clkfpc: oscillator-50m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <50000000>;
+ clock-output-names = "clkfpc";
+ };
+
+- clkaud_ext_i_0: oscillator@3 {
++ clkaud_ext_i_0: oscillator-aud0 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <6500000>;
+ clock-output-names = "clkaud_ext_i_0";
+ };
+
+- clkaud_ext_i_1: oscillator@4 {
++ clkaud_ext_i_1: oscillator-aud1 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <196608000>;
+ clock-output-names = "clkaud_ext_i_1";
+ };
+
+- clkaud_ext_i_2: oscillator@5 {
++ clkaud_ext_i_2: oscillator-aud2 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <180633600>;
+ clock-output-names = "clkaud_ext_i_2";
+ };
+
+- clki2si0_mck_i: oscillator@6 {
++ clki2si0_mck_i: oscillator-i2s0 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <30000000>;
+ clock-output-names = "clki2si0_mck_i";
+ };
+
+- clki2si1_mck_i: oscillator@7 {
++ clki2si1_mck_i: oscillator-i2s1 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <30000000>;
+ clock-output-names = "clki2si1_mck_i";
+ };
+
+- clki2si2_mck_i: oscillator@8 {
++ clki2si2_mck_i: oscillator-i2s2 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <30000000>;
+ clock-output-names = "clki2si2_mck_i";
+ };
+
+- clktdmin_mclk_i: oscillator@9 {
++ clktdmin_mclk_i: oscillator-mclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <30000000>;
+@@ -266,7 +266,7 @@
+ reg = <0 0x10005000 0 0x1000>;
+ };
+
+- pio: pinctrl@10005000 {
++ pio: pinctrl@1000b000 {
+ compatible = "mediatek,mt2712-pinctrl";
+ reg = <0 0x1000b000 0 0x1000>;
+ mediatek,pctl-regmap = <&syscfg_pctl_a>;
+diff --git a/arch/arm64/boot/dts/mediatek/mt6779.dtsi b/arch/arm64/boot/dts/mediatek/mt6779.dtsi
+index 9bdf5145966c5..dde9ce137b4f1 100644
+--- a/arch/arm64/boot/dts/mediatek/mt6779.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt6779.dtsi
+@@ -88,14 +88,14 @@
+ interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_LOW 0>;
+ };
+
+- clk26m: oscillator@0 {
++ clk26m: oscillator-26m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ clock-output-names = "clk26m";
+ };
+
+- clk32k: oscillator@1 {
++ clk32k: oscillator-32k {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+@@ -117,7 +117,7 @@
+ compatible = "simple-bus";
+ ranges;
+
+- gic: interrupt-controller@0c000000 {
++ gic: interrupt-controller@c000000 {
+ compatible = "arm,gic-v3";
+ #interrupt-cells = <4>;
+ interrupt-parent = <&gic>;
+@@ -138,7 +138,7 @@
+
+ };
+
+- sysirq: intpol-controller@0c53a650 {
++ sysirq: intpol-controller@c53a650 {
+ compatible = "mediatek,mt6779-sysirq",
+ "mediatek,mt6577-sysirq";
+ interrupt-controller;
+diff --git a/arch/arm64/boot/dts/mediatek/mt6797.dtsi b/arch/arm64/boot/dts/mediatek/mt6797.dtsi
+index 15616231022a2..c3677d77e0a45 100644
+--- a/arch/arm64/boot/dts/mediatek/mt6797.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt6797.dtsi
+@@ -95,7 +95,7 @@
+ };
+ };
+
+- clk26m: oscillator@0 {
++ clk26m: oscillator-26m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+index 2f77dc40b9b82..6b99d903b4791 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
++++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+@@ -49,7 +49,7 @@
+ wps {
+ label = "wps";
+ linux,code = <KEY_WPS_BUTTON>;
+- gpios = <&pio 102 GPIO_ACTIVE_HIGH>;
++ gpios = <&pio 102 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+index 890a942ec6082..a4c48b2abd209 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+@@ -428,6 +428,7 @@
+ pwm: pwm@11006000 {
+ compatible = "mediatek,mt7622-pwm";
+ reg = <0 0x11006000 0 0x1000>;
++ #pwm-cells = <2>;
+ interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&topckgen CLK_TOP_PWM_SEL>,
+ <&pericfg CLK_PERI_PWM_PD>,
+diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+index 8e9cf36a9a41a..6529962edd4e9 100644
+--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+@@ -281,6 +281,10 @@
+ };
+ };
+
++&gic {
++ mediatek,broken-save-restore-fw;
++};
++
+ &gpu {
+ mali-supply = <&mt6358_vgpu_reg>;
+ sram-supply = <&mt6358_vsram_gpu_reg>;
+diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+index 409cf827970cf..81fde34ffd52a 100644
+--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+@@ -299,6 +299,15 @@
+ method = "smc";
+ };
+
++ clk13m: fixed-factor-clock-13m {
++ compatible = "fixed-factor-clock";
++ #clock-cells = <0>;
++ clocks = <&clk26m>;
++ clock-div = <2>;
++ clock-mult = <1>;
++ clock-output-names = "clk13m";
++ };
++
+ clk26m: oscillator {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+@@ -610,8 +619,7 @@
+ "mediatek,mt6765-timer";
+ reg = <0 0x10017000 0 0x1000>;
+ interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_HIGH>;
+- clocks = <&topckgen CLK_TOP_CLK13M>;
+- clock-names = "clk13m";
++ clocks = <&clk13m>;
+ };
+
+ iommu: iommu@10205000 {
+@@ -1212,7 +1220,7 @@
+ <GIC_SPI 278 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-names = "job", "mmu", "gpu";
+
+- clocks = <&topckgen CLK_TOP_MFGPLL_CK>;
++ clocks = <&mfgcfg CLK_MFG_BG3D>;
+
+ power-domains =
+ <&spm MT8183_POWER_DOMAIN_MFG_CORE0>,
+diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
+index 9757138a8bbd8..72f444405ebfe 100644
+--- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
+@@ -39,9 +39,10 @@
+ reg = <0x000>;
+ enable-method = "psci";
+ clock-frequency = <1701000000>;
+- cpu-idle-states = <&cpuoff_l &clusteroff_l>;
++ cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>;
+ next-level-cache = <&l2_0>;
+- capacity-dmips-mhz = <530>;
++ performance-domains = <&performance 0>;
++ capacity-dmips-mhz = <427>;
+ };
+
+ cpu1: cpu@100 {
+@@ -50,9 +51,10 @@
+ reg = <0x100>;
+ enable-method = "psci";
+ clock-frequency = <1701000000>;
+- cpu-idle-states = <&cpuoff_l &clusteroff_l>;
++ cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>;
+ next-level-cache = <&l2_0>;
+- capacity-dmips-mhz = <530>;
++ performance-domains = <&performance 0>;
++ capacity-dmips-mhz = <427>;
+ };
+
+ cpu2: cpu@200 {
+@@ -61,9 +63,10 @@
+ reg = <0x200>;
+ enable-method = "psci";
+ clock-frequency = <1701000000>;
+- cpu-idle-states = <&cpuoff_l &clusteroff_l>;
++ cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>;
+ next-level-cache = <&l2_0>;
+- capacity-dmips-mhz = <530>;
++ performance-domains = <&performance 0>;
++ capacity-dmips-mhz = <427>;
+ };
+
+ cpu3: cpu@300 {
+@@ -72,9 +75,10 @@
+ reg = <0x300>;
+ enable-method = "psci";
+ clock-frequency = <1701000000>;
+- cpu-idle-states = <&cpuoff_l &clusteroff_l>;
++ cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>;
+ next-level-cache = <&l2_0>;
+- capacity-dmips-mhz = <530>;
++ performance-domains = <&performance 0>;
++ capacity-dmips-mhz = <427>;
+ };
+
+ cpu4: cpu@400 {
+@@ -83,8 +87,9 @@
+ reg = <0x400>;
+ enable-method = "psci";
+ clock-frequency = <2171000000>;
+- cpu-idle-states = <&cpuoff_b &clusteroff_b>;
++ cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>;
+ next-level-cache = <&l2_1>;
++ performance-domains = <&performance 1>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+@@ -94,8 +99,9 @@
+ reg = <0x500>;
+ enable-method = "psci";
+ clock-frequency = <2171000000>;
+- cpu-idle-states = <&cpuoff_b &clusteroff_b>;
++ cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>;
+ next-level-cache = <&l2_1>;
++ performance-domains = <&performance 1>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+@@ -105,8 +111,9 @@
+ reg = <0x600>;
+ enable-method = "psci";
+ clock-frequency = <2171000000>;
+- cpu-idle-states = <&cpuoff_b &clusteroff_b>;
++ cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>;
+ next-level-cache = <&l2_1>;
++ performance-domains = <&performance 1>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+@@ -116,8 +123,9 @@
+ reg = <0x700>;
+ enable-method = "psci";
+ clock-frequency = <2171000000>;
+- cpu-idle-states = <&cpuoff_b &clusteroff_b>;
++ cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>;
+ next-level-cache = <&l2_1>;
++ performance-domains = <&performance 1>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+@@ -135,19 +143,16 @@
+ core3 {
+ cpu = <&cpu3>;
+ };
+- };
+-
+- cluster1 {
+- core0 {
++ core4 {
+ cpu = <&cpu4>;
+ };
+- core1 {
++ core5 {
+ cpu = <&cpu5>;
+ };
+- core2 {
++ core6 {
+ cpu = <&cpu6>;
+ };
+- core3 {
++ core7 {
+ cpu = <&cpu7>;
+ };
+ };
+@@ -168,8 +173,8 @@
+ };
+
+ idle-states {
+- entry-method = "arm,psci";
+- cpuoff_l: cpuoff_l {
++ entry-method = "psci";
++ cpu_sleep_l: cpu-sleep-l {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x00010001>;
+ local-timer-stop;
+@@ -177,7 +182,7 @@
+ exit-latency-us = <140>;
+ min-residency-us = <780>;
+ };
+- cpuoff_b: cpuoff_b {
++ cpu_sleep_b: cpu-sleep-b {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x00010001>;
+ local-timer-stop;
+@@ -185,7 +190,7 @@
+ exit-latency-us = <145>;
+ min-residency-us = <720>;
+ };
+- clusteroff_l: clusteroff_l {
++ cluster_sleep_l: cluster-sleep-l {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x01010002>;
+ local-timer-stop;
+@@ -193,7 +198,7 @@
+ exit-latency-us = <155>;
+ min-residency-us = <860>;
+ };
+- clusteroff_b: clusteroff_b {
++ cluster_sleep_b: cluster-sleep-b {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x01010002>;
+ local-timer-stop;
+@@ -237,6 +242,12 @@
+ compatible = "simple-bus";
+ ranges;
+
++ performance: performance-controller@11bc10 {
++ compatible = "mediatek,cpufreq-hw";
++ reg = <0 0x0011bc10 0 0x120>, <0 0x0011bd30 0 0x120>;
++ #performance-domain-cells = <1>;
++ };
++
+ gic: interrupt-controller@c000000 {
+ compatible = "arm,gic-v3";
+ #interrupt-cells = <4>;
+@@ -433,7 +444,7 @@
+ clock-names = "spi", "sf", "axi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+- status = "disable";
++ status = "disabled";
+ };
+
+ i2c3: i2c3@11cb0000 {
+diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
+index fcddec14738d8..54514d62398f2 100644
+--- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
++++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
+@@ -17,7 +17,7 @@
+ };
+
+ firmware {
+- optee: optee@4fd00000 {
++ optee: optee {
+ compatible = "linaro,optee-tz";
+ method = "smc";
+ };
+@@ -210,7 +210,7 @@
+ };
+ };
+
+- i2c0_pins_a: i2c0@0 {
++ i2c0_pins_a: i2c0 {
+ pins1 {
+ pinmux = <MT8516_PIN_58_SDA0__FUNC_SDA0_0>,
+ <MT8516_PIN_59_SCL0__FUNC_SCL0_0>;
+@@ -218,7 +218,7 @@
+ };
+ };
+
+- i2c2_pins_a: i2c2@0 {
++ i2c2_pins_a: i2c2 {
+ pins1 {
+ pinmux = <MT8516_PIN_60_SDA2__FUNC_SDA2_0>,
+ <MT8516_PIN_61_SCL2__FUNC_SCL2_0>;
+diff --git a/arch/arm64/boot/dts/microchip/sparx5.dtsi b/arch/arm64/boot/dts/microchip/sparx5.dtsi
+index 787ebcec121d6..a6405059636c3 100644
+--- a/arch/arm64/boot/dts/microchip/sparx5.dtsi
++++ b/arch/arm64/boot/dts/microchip/sparx5.dtsi
+@@ -61,7 +61,7 @@
+ interrupt-affinity = <&cpu0>, <&cpu1>;
+ };
+
+- psci {
++ psci: psci {
+ compatible = "arm,psci-0.2";
+ method = "smc";
+ };
+diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi
+index 9d1a082de3e29..32bb76b3202a0 100644
+--- a/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi
++++ b/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi
+@@ -6,6 +6,18 @@
+ /dts-v1/;
+ #include "sparx5.dtsi"
+
++&psci {
++ status = "disabled";
++};
++
++&cpu0 {
++ enable-method = "spin-table";
++};
++
++&cpu1 {
++ enable-method = "spin-table";
++};
++
+ &uart0 {
+ status = "okay";
+ };
+diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+index e94f8add1a400..5b0bc9aa1a426 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
++++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+@@ -1079,7 +1079,7 @@
+
+ ccplex@e000000 {
+ compatible = "nvidia,tegra186-ccplex-cluster";
+- reg = <0x0 0x0e000000 0x0 0x3fffff>;
++ reg = <0x0 0x0e000000 0x0 0x400000>;
+
+ nvidia,bpmp = <&bpmp>;
+ };
+@@ -1583,6 +1583,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x0 0x30000000 0x50000>;
++ no-memory-wc;
+
+ cpu_bpmp_tx: sram@4e000 {
+ reg = <0x4e000 0x1000>;
+@@ -1635,7 +1636,7 @@
+ iommus = <&smmu TEGRA186_SID_BPMP>;
+ mboxes = <&hsp_top0 TEGRA_HSP_MBOX_TYPE_DB
+ TEGRA_HSP_DB_MASTER_BPMP>;
+- shmem = <&cpu_bpmp_tx &cpu_bpmp_rx>;
++ shmem = <&cpu_bpmp_tx>, <&cpu_bpmp_rx>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
+index c4058ee36fecb..1a444705517f3 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
++++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
+@@ -75,7 +75,7 @@
+
+ /* SDMMC1 (SD/MMC) */
+ mmc@3400000 {
+- cd-gpios = <&gpio TEGRA194_MAIN_GPIO(A, 0) GPIO_ACTIVE_LOW>;
++ cd-gpios = <&gpio TEGRA194_MAIN_GPIO(G, 7) GPIO_ACTIVE_LOW>;
+ };
+
+ /* SDMMC4 (eMMC) */
+diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+index c8250a3f7891f..ca71b71d801a6 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
++++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+@@ -818,9 +818,8 @@
+ <&bpmp TEGRA194_CLK_HDA2CODEC_2X>;
+ clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+ resets = <&bpmp TEGRA194_RESET_HDA>,
+- <&bpmp TEGRA194_RESET_HDA2HDMICODEC>,
+- <&bpmp TEGRA194_RESET_HDA2CODEC_2X>;
+- reset-names = "hda", "hda2hdmi", "hda2codec_2x";
++ <&bpmp TEGRA194_RESET_HDA2HDMICODEC>;
++ reset-names = "hda", "hda2hdmi";
+ power-domains = <&bpmp TEGRA194_POWER_DOMAIN_DISP>;
+ interconnects = <&mc TEGRA194_MEMORY_CLIENT_HDAR &emc>,
+ <&mc TEGRA194_MEMORY_CLIENT_HDAW &emc>;
+@@ -2250,6 +2249,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x0 0x40000000 0x50000>;
++ no-memory-wc;
+
+ cpu_bpmp_tx: sram@4e000 {
+ reg = <0x4e000 0x1000>;
+@@ -2268,7 +2268,7 @@
+ compatible = "nvidia,tegra186-bpmp";
+ mboxes = <&hsp_top0 TEGRA_HSP_MBOX_TYPE_DB
+ TEGRA_HSP_DB_MASTER_BPMP>;
+- shmem = <&cpu_bpmp_tx &cpu_bpmp_rx>;
++ shmem = <&cpu_bpmp_tx>, <&cpu_bpmp_rx>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
+index 26b3f98a211c2..f88dc820389b2 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi
++++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi
+@@ -1355,8 +1355,9 @@
+ <&tegra_car TEGRA210_CLK_DFLL_REF>,
+ <&tegra_car TEGRA210_CLK_I2C5>;
+ clock-names = "soc", "ref", "i2c";
+- resets = <&tegra_car TEGRA210_RST_DFLL_DVCO>;
+- reset-names = "dvco";
++ resets = <&tegra_car TEGRA210_RST_DFLL_DVCO>,
++ <&tegra_car 155>;
++ reset-names = "dvco", "dfll";
+ #clock-cells = <0>;
+ clock-output-names = "dfllCPU_out";
+ status = "disabled";
+diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+index f0efb3a628040..2b47845722206 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
++++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+@@ -122,21 +122,22 @@
+ };
+ };
+
+- sysram@40000000 {
++ sram@40000000 {
+ compatible = "nvidia,tegra234-sysram", "mmio-sram";
+- reg = <0x0 0x40000000 0x0 0x50000>;
++ reg = <0x0 0x40000000 0x0 0x80000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+- ranges = <0x0 0x0 0x40000000 0x50000>;
++ ranges = <0x0 0x0 0x40000000 0x80000>;
++ no-memory-wc;
+
+- cpu_bpmp_tx: shmem@4e000 {
+- reg = <0x4e000 0x1000>;
++ cpu_bpmp_tx: sram@70000 {
++ reg = <0x70000 0x1000>;
+ label = "cpu-bpmp-tx";
+ pool;
+ };
+
+- cpu_bpmp_rx: shmem@4f000 {
+- reg = <0x4f000 0x1000>;
++ cpu_bpmp_rx: sram@71000 {
++ reg = <0x71000 0x1000>;
+ label = "cpu-bpmp-rx";
+ pool;
+ };
+@@ -146,7 +147,7 @@
+ compatible = "nvidia,tegra234-bpmp", "nvidia,tegra186-bpmp";
+ mboxes = <&hsp_top0 TEGRA_HSP_MBOX_TYPE_DB
+ TEGRA_HSP_DB_MASTER_BPMP>;
+- shmem = <&cpu_bpmp_tx &cpu_bpmp_rx>;
++ shmem = <&cpu_bpmp_tx>, <&cpu_bpmp_rx>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
+index f3c0dbfd0a232..ad4c2ccec63ee 100644
+--- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
++++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts
+@@ -5,9 +5,847 @@
+
+ /dts-v1/;
+
+-#include "apq8016-sbc.dtsi"
++#include "msm8916-pm8916.dtsi"
++#include <dt-bindings/gpio/gpio.h>
++#include <dt-bindings/input/input.h>
++#include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
++#include <dt-bindings/pinctrl/qcom,pmic-mpp.h>
++#include <dt-bindings/sound/apq8016-lpass.h>
+
+ / {
+ model = "Qualcomm Technologies, Inc. APQ 8016 SBC";
+ compatible = "qcom,apq8016-sbc", "qcom,apq8016";
++
++ aliases {
++ serial0 = &blsp1_uart2;
++ serial1 = &blsp1_uart1;
++ usid0 = &pm8916_0;
++ i2c0 = &blsp_i2c2;
++ i2c1 = &blsp_i2c6;
++ i2c3 = &blsp_i2c4;
++ spi0 = &blsp_spi5;
++ spi1 = &blsp_spi3;
++ };
++
++ chosen {
++ stdout-path = "serial0";
++ };
++
++ camera_vdddo_1v8: camera-vdddo-1v8 {
++ compatible = "regulator-fixed";
++ regulator-name = "camera_vdddo";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ regulator-always-on;
++ };
++
++ camera_vdda_2v8: camera-vdda-2v8 {
++ compatible = "regulator-fixed";
++ regulator-name = "camera_vdda";
++ regulator-min-microvolt = <2800000>;
++ regulator-max-microvolt = <2800000>;
++ regulator-always-on;
++ };
++
++ camera_vddd_1v5: camera-vddd-1v5 {
++ compatible = "regulator-fixed";
++ regulator-name = "camera_vddd";
++ regulator-min-microvolt = <1500000>;
++ regulator-max-microvolt = <1500000>;
++ regulator-always-on;
++ };
++
++ reserved-memory {
++ ramoops@bff00000 {
++ compatible = "ramoops";
++ reg = <0x0 0xbff00000 0x0 0x100000>;
++
++ record-size = <0x20000>;
++ console-size = <0x20000>;
++ ftrace-size = <0x20000>;
++ };
++ };
++
++ usb2513 {
++ compatible = "smsc,usb3503";
++ reset-gpios = <&pm8916_gpios 3 GPIO_ACTIVE_LOW>;
++ initial-mode = <1>;
++ };
++
++ usb_id: usb-id {
++ compatible = "linux,extcon-usb-gpio";
++ id-gpio = <&msmgpio 121 GPIO_ACTIVE_HIGH>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&usb_id_default>;
++ };
++
++ hdmi-out {
++ compatible = "hdmi-connector";
++ type = "a";
++
++ port {
++ hdmi_con: endpoint {
++ remote-endpoint = <&adv7533_out>;
++ };
++ };
++ };
++
++ gpio-keys {
++ compatible = "gpio-keys";
++ #address-cells = <1>;
++ #size-cells = <0>;
++ autorepeat;
++
++ pinctrl-names = "default";
++ pinctrl-0 = <&msm_key_volp_n_default>;
++
++ button@0 {
++ label = "Volume Up";
++ linux,code = <KEY_VOLUMEUP>;
++ gpios = <&msmgpio 107 GPIO_ACTIVE_LOW>;
++ };
++ };
++
++ leds {
++ pinctrl-names = "default";
++ pinctrl-0 = <&msmgpio_leds>,
++ <&pm8916_gpios_leds>,
++ <&pm8916_mpps_leds>;
++
++ compatible = "gpio-leds";
++
++ led@1 {
++ label = "apq8016-sbc:green:user1";
++ gpios = <&msmgpio 21 GPIO_ACTIVE_HIGH>;
++ linux,default-trigger = "heartbeat";
++ default-state = "off";
++ };
++
++ led@2 {
++ label = "apq8016-sbc:green:user2";
++ gpios = <&msmgpio 120 GPIO_ACTIVE_HIGH>;
++ linux,default-trigger = "mmc0";
++ default-state = "off";
++ };
++
++ led@3 {
++ label = "apq8016-sbc:green:user3";
++ gpios = <&pm8916_gpios 1 GPIO_ACTIVE_HIGH>;
++ linux,default-trigger = "mmc1";
++ default-state = "off";
++ };
++
++ led@4 {
++ label = "apq8016-sbc:green:user4";
++ gpios = <&pm8916_gpios 2 GPIO_ACTIVE_HIGH>;
++ linux,default-trigger = "none";
++ panic-indicator;
++ default-state = "off";
++ };
++
++ led@5 {
++ label = "apq8016-sbc:yellow:wlan";
++ gpios = <&pm8916_mpps 2 GPIO_ACTIVE_HIGH>;
++ linux,default-trigger = "phy0tx";
++ default-state = "off";
++ };
++
++ led@6 {
++ label = "apq8016-sbc:blue:bt";
++ gpios = <&pm8916_mpps 3 GPIO_ACTIVE_HIGH>;
++ linux,default-trigger = "bluetooth-power";
++ default-state = "off";
++ };
++ };
++};
++
++&blsp_dma {
++ status = "okay";
++};
++
++&blsp_i2c2 {
++ /* On Low speed expansion */
++ status = "okay";
++ label = "LS-I2C0";
++};
++
++&blsp_i2c4 {
++ /* On High speed expansion */
++ status = "okay";
++ label = "HS-I2C2";
++
++ adv_bridge: bridge@39 {
++ status = "okay";
++
++ compatible = "adi,adv7533";
++ reg = <0x39>;
++
++ interrupt-parent = <&msmgpio>;
++ interrupts = <31 IRQ_TYPE_EDGE_FALLING>;
++
++ adi,dsi-lanes = <4>;
++ clocks = <&rpmcc RPM_SMD_BB_CLK2>;
++ clock-names = "cec";
++
++ pd-gpios = <&msmgpio 32 GPIO_ACTIVE_HIGH>;
++
++ avdd-supply = <&pm8916_l6>;
++ v1p2-supply = <&pm8916_l6>;
++ v3p3-supply = <&pm8916_l17>;
++
++ pinctrl-names = "default","sleep";
++ pinctrl-0 = <&adv7533_int_active &adv7533_switch_active>;
++ pinctrl-1 = <&adv7533_int_suspend &adv7533_switch_suspend>;
++ #sound-dai-cells = <1>;
++
++ ports {
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ port@0 {
++ reg = <0>;
++ adv7533_in: endpoint {
++ remote-endpoint = <&dsi0_out>;
++ };
++ };
++
++ port@1 {
++ reg = <1>;
++ adv7533_out: endpoint {
++ remote-endpoint = <&hdmi_con>;
++ };
++ };
++ };
++ };
++};
++
++&blsp_i2c6 {
++ /* On Low speed expansion */
++ status = "okay";
++ label = "LS-I2C1";
++};
++
++&blsp_spi3 {
++ /* On High speed expansion */
++ status = "okay";
++ label = "HS-SPI1";
++};
++
++&blsp_spi5 {
++ /* On Low speed expansion */
++ status = "okay";
++ label = "LS-SPI0";
++};
++
++&blsp1_uart1 {
++ status = "okay";
++ label = "LS-UART0";
++};
++
++&blsp1_uart2 {
++ status = "okay";
++ label = "LS-UART1";
++};
++
++&camss {
++ status = "okay";
++ ports {
++ port@0 {
++ reg = <0>;
++ csiphy0_ep: endpoint {
++ clock-lanes = <1>;
++ data-lanes = <0 2>;
++ remote-endpoint = <&ov5640_ep>;
++ status = "okay";
++ };
++ };
++ };
++};
++
++&cci {
++ status = "okay";
++};
++
++&cci_i2c0 {
++ camera_rear@3b {
++ compatible = "ovti,ov5640";
++ reg = <0x3b>;
++
++ enable-gpios = <&msmgpio 34 GPIO_ACTIVE_HIGH>;
++ reset-gpios = <&msmgpio 35 GPIO_ACTIVE_LOW>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&camera_rear_default>;
++
++ clocks = <&gcc GCC_CAMSS_MCLK0_CLK>;
++ clock-names = "xclk";
++ clock-frequency = <23880000>;
++
++ DOVDD-supply = <&camera_vdddo_1v8>;
++ AVDD-supply = <&camera_vdda_2v8>;
++ DVDD-supply = <&camera_vddd_1v5>;
++
++ /* No camera mezzanine by default */
++ status = "disabled";
++
++ port {
++ ov5640_ep: endpoint {
++ clock-lanes = <1>;
++ data-lanes = <0 2>;
++ remote-endpoint = <&csiphy0_ep>;
++ };
++ };
++ };
++};
++
++&dsi0_out {
++ data-lanes = <0 1 2 3>;
++ remote-endpoint = <&adv7533_in>;
++};
++
++&lpass {
++ status = "okay";
++};
++
++&mdss {
++ status = "okay";
++};
++
++&mpss {
++ status = "okay";
++
++ firmware-name = "qcom/apq8016/mba.mbn", "qcom/apq8016/modem.mbn";
++};
++
++&pm8916_resin {
++ status = "okay";
++ linux,code = <KEY_VOLUMEDOWN>;
++};
++
++&pronto {
++ status = "okay";
++
++ firmware-name = "qcom/apq8016/wcnss.mbn";
++};
++
++&sdhc_1 {
++ status = "okay";
++
++ pinctrl-names = "default", "sleep";
++ pinctrl-0 = <&sdc1_clk_on &sdc1_cmd_on &sdc1_data_on>;
++ pinctrl-1 = <&sdc1_clk_off &sdc1_cmd_off &sdc1_data_off>;
++};
++
++&sdhc_2 {
++ status = "okay";
++
++ pinctrl-names = "default", "sleep";
++ pinctrl-0 = <&sdc2_clk_on &sdc2_cmd_on &sdc2_data_on &sdc2_cd_on>;
++ pinctrl-1 = <&sdc2_clk_off &sdc2_cmd_off &sdc2_data_off &sdc2_cd_off>;
++
++ cd-gpios = <&msmgpio 38 GPIO_ACTIVE_LOW>;
++};
++
++&sound {
++ status = "okay";
++
++ pinctrl-0 = <&cdc_pdm_lines_act &ext_sec_tlmm_lines_act &ext_mclk_tlmm_lines_act>;
++ pinctrl-1 = <&cdc_pdm_lines_sus &ext_sec_tlmm_lines_sus &ext_mclk_tlmm_lines_sus>;
++ pinctrl-names = "default", "sleep";
++ qcom,model = "DB410c";
++ qcom,audio-routing =
++ "AMIC2", "MIC BIAS Internal2",
++ "AMIC3", "MIC BIAS External1";
++
++ external-dai-link@0 {
++ link-name = "ADV7533";
++ cpu {
++ sound-dai = <&lpass MI2S_QUATERNARY>;
++ };
++ codec {
++ sound-dai = <&adv_bridge 0>;
++ };
++ };
++
++ internal-codec-playback-dai-link@0 {
++ link-name = "WCD";
++ cpu {
++ sound-dai = <&lpass MI2S_PRIMARY>;
++ };
++ codec {
++ sound-dai = <&lpass_codec 0>, <&wcd_codec 0>;
++ };
++ };
++
++ internal-codec-capture-dai-link@0 {
++ link-name = "WCD-Capture";
++ cpu {
++ sound-dai = <&lpass MI2S_TERTIARY>;
++ };
++ codec {
++ sound-dai = <&lpass_codec 1>, <&wcd_codec 1>;
++ };
++ };
++};
++
++&usb {
++ status = "okay";
++ extcon = <&usb_id>, <&usb_id>;
++
++ pinctrl-names = "default", "device";
++ pinctrl-0 = <&usb_sw_sel_pm &usb_hub_reset_pm>;
++ pinctrl-1 = <&usb_sw_sel_pm_device &usb_hub_reset_pm_device>;
++};
++
++&usb_hs_phy {
++ extcon = <&usb_id>;
++};
++
++&wcd_codec {
++ clocks = <&gcc GCC_CODEC_DIGCODEC_CLK>;
++ clock-names = "mclk";
++ qcom,mbhc-vthreshold-low = <75 150 237 450 500>;
++ qcom,mbhc-vthreshold-high = <75 150 237 450 500>;
++};
++
++&wcnss_ctrl {
++ firmware-name = "qcom/apq8016/WCNSS_qcom_wlan_nv_sbc.bin";
++};
++
++/* Enable CoreSight */
++&cti0 { status = "okay"; };
++&cti1 { status = "okay"; };
++&cti12 { status = "okay"; };
++&cti13 { status = "okay"; };
++&cti14 { status = "okay"; };
++&cti15 { status = "okay"; };
++&debug0 { status = "okay"; };
++&debug1 { status = "okay"; };
++&debug2 { status = "okay"; };
++&debug3 { status = "okay"; };
++&etf { status = "okay"; };
++&etm0 { status = "okay"; };
++&etm1 { status = "okay"; };
++&etm2 { status = "okay"; };
++&etm3 { status = "okay"; };
++&etr { status = "okay"; };
++&funnel0 { status = "okay"; };
++&funnel1 { status = "okay"; };
++&replicator { status = "okay"; };
++&stm { status = "okay"; };
++&tpiu { status = "okay"; };
++
++&smd_rpm_regulators {
++ vdd_l1_l2_l3-supply = <&pm8916_s3>;
++ vdd_l4_l5_l6-supply = <&pm8916_s4>;
++ vdd_l7-supply = <&pm8916_s4>;
++
++ s3 {
++ regulator-min-microvolt = <1250000>;
++ regulator-max-microvolt = <1350000>;
++ };
++
++ s4 {
++ regulator-min-microvolt = <1850000>;
++ regulator-max-microvolt = <2150000>;
++
++ regulator-always-on;
++ regulator-boot-on;
++ };
++
++ l1 {
++ regulator-min-microvolt = <1225000>;
++ regulator-max-microvolt = <1225000>;
++ };
++
++ l2 {
++ regulator-min-microvolt = <1200000>;
++ regulator-max-microvolt = <1200000>;
++ };
++
++ l4 {
++ regulator-min-microvolt = <2050000>;
++ regulator-max-microvolt = <2050000>;
++ };
++
++ l5 {
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++
++ l6 {
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++
++ l7 {
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++
++ l8 {
++ regulator-min-microvolt = <2900000>;
++ regulator-max-microvolt = <2900000>;
++ };
++
++ l9 {
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
++ };
++
++ l10 {
++ regulator-min-microvolt = <2800000>;
++ regulator-max-microvolt = <2800000>;
++ };
++
++ l11 {
++ regulator-min-microvolt = <2950000>;
++ regulator-max-microvolt = <2950000>;
++ regulator-allow-set-load;
++ regulator-system-load = <200000>;
++ };
++
++ l12 {
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <2950000>;
++ };
++
++ l13 {
++ regulator-min-microvolt = <3075000>;
++ regulator-max-microvolt = <3075000>;
++ };
++
++ l14 {
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <3300000>;
++ };
++
++ /*
++ * The 96Boards specification expects a 1.8V power rail on the low-speed
++ * expansion connector that is able to provide at least 0.18W / 100 mA.
++ * L15/L16 are connected in parallel to provide 55 mA each. A minimum load
++ * must be specified to ensure the regulators are not put in LPM where they
++ * would only provide 5 mA.
++ */
++ l15 {
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ regulator-system-load = <50000>;
++ regulator-allow-set-load;
++ regulator-always-on;
++ };
++
++ l16 {
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ regulator-system-load = <50000>;
++ regulator-allow-set-load;
++ regulator-always-on;
++ };
++
++ l17 {
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
++ };
++
++ l18 {
++ regulator-min-microvolt = <2700000>;
++ regulator-max-microvolt = <2700000>;
++ };
++};
++
++/*
++ * 2mA drive strength is not enough when connecting multiple
++ * I2C devices with different pull up resistors.
++ */
++&i2c2_default {
++ drive-strength = <16>;
++};
++
++&i2c4_default {
++ drive-strength = <16>;
++};
++
++&i2c6_default {
++ drive-strength = <16>;
++};
++
++/*
++ * GPIO name legend: proper name = the GPIO line is used as GPIO
++ * NC = not connected (pin out but not routed from the chip to
++ * anything the board)
++ * "[PER]" = pin is muxed for [peripheral] (not GPIO)
++ * LSEC = Low Speed External Connector
++ * HSEC = High Speed External Connector
++ *
++ * Line names are taken from the schematic "DragonBoard410c"
++ * dated monday, august 31, 2015. Page 5 in particular.
++ *
++ * For the lines routed to the external connectors the
++ * lines are named after the 96Boards CE Specification 1.0,
++ * Appendix "Expansion Connector Signal Description".
++ *
++ * When the 96Board naming of a line and the schematic name of
++ * the same line are in conflict, the 96Board specification
++ * takes precedence, which means that the external UART on the
++ * LSEC is named UART0 while the schematic and SoC names this
++ * UART3. This is only for the informational lines i.e. "[FOO]",
++ * the GPIO named lines "GPIO-A" thru "GPIO-L" are the only
++ * ones actually used for GPIO.
++ */
++
++&msmgpio {
++ gpio-line-names =
++ "[UART0_TX]", /* GPIO_0, LSEC pin 5 */
++ "[UART0_RX]", /* GPIO_1, LSEC pin 7 */
++ "[UART0_CTS_N]", /* GPIO_2, LSEC pin 3 */
++ "[UART0_RTS_N]", /* GPIO_3, LSEC pin 9 */
++ "[UART1_TX]", /* GPIO_4, LSEC pin 11 */
++ "[UART1_RX]", /* GPIO_5, LSEC pin 13 */
++ "[I2C0_SDA]", /* GPIO_8, LSEC pin 17 */
++ "[I2C0_SCL]", /* GPIO_7, LSEC pin 15 */
++ "[SPI1_DOUT]", /* SPI1_MOSI, HSEC pin 1 */
++ "[SPI1_DIN]", /* SPI1_MISO, HSEC pin 11 */
++ "[SPI1_CS]", /* SPI1_CS_N, HSEC pin 7 */
++ "[SPI1_SCLK]", /* SPI1_CLK, HSEC pin 9 */
++ "GPIO-B", /* LS_EXP_GPIO_B, LSEC pin 24 */
++ "GPIO-C", /* LS_EXP_GPIO_C, LSEC pin 25 */
++ "[I2C3_SDA]", /* HSEC pin 38 */
++ "[I2C3_SCL]", /* HSEC pin 36 */
++ "[SPI0_MOSI]", /* LSEC pin 14 */
++ "[SPI0_MISO]", /* LSEC pin 10 */
++ "[SPI0_CS_N]", /* LSEC pin 12 */
++ "[SPI0_CLK]", /* LSEC pin 8 */
++ "HDMI_HPD_N", /* GPIO 20 */
++ "USR_LED_1_CTRL",
++ "[I2C1_SDA]", /* GPIO_22, LSEC pin 21 */
++ "[I2C1_SCL]", /* GPIO_23, LSEC pin 19 */
++ "GPIO-G", /* LS_EXP_GPIO_G, LSEC pin 29 */
++ "GPIO-H", /* LS_EXP_GPIO_H, LSEC pin 30 */
++ "[CSI0_MCLK]", /* HSEC pin 15 */
++ "[CSI1_MCLK]", /* HSEC pin 17 */
++ "GPIO-K", /* LS_EXP_GPIO_K, LSEC pin 33 */
++ "[I2C2_SDA]", /* HSEC pin 34 */
++ "[I2C2_SCL]", /* HSEC pin 32 */
++ "DSI2HDMI_INT_N",
++ "DSI_SW_SEL_APQ",
++ "GPIO-L", /* LS_EXP_GPIO_L, LSEC pin 34 */
++ "GPIO-J", /* LS_EXP_GPIO_J, LSEC pin 32 */
++ "GPIO-I", /* LS_EXP_GPIO_I, LSEC pin 31 */
++ "GPIO-A", /* LS_EXP_GPIO_A, LSEC pin 23 */
++ "FORCED_USB_BOOT",
++ "SD_CARD_DET_N",
++ "[WCSS_BT_SSBI]",
++ "[WCSS_WLAN_DATA_2]", /* GPIO 40 */
++ "[WCSS_WLAN_DATA_1]",
++ "[WCSS_WLAN_DATA_0]",
++ "[WCSS_WLAN_SET]",
++ "[WCSS_WLAN_CLK]",
++ "[WCSS_FM_SSBI]",
++ "[WCSS_FM_SDI]",
++ "[WCSS_BT_DAT_CTL]",
++ "[WCSS_BT_DAT_STB]",
++ "NC",
++ "NC", /* GPIO 50 */
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC", /* GPIO 60 */
++ "NC",
++ "NC",
++ "[CDC_PDM0_CLK]",
++ "[CDC_PDM0_SYNC]",
++ "[CDC_PDM0_TX0]",
++ "[CDC_PDM0_RX0]",
++ "[CDC_PDM0_RX1]",
++ "[CDC_PDM0_RX2]",
++ "GPIO-D", /* LS_EXP_GPIO_D, LSEC pin 26 */
++ "NC", /* GPIO 70 */
++ "NC",
++ "NC",
++ "NC",
++ "NC", /* GPIO 74 */
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "BOOT_CONFIG_0", /* GPIO 80 */
++ "BOOT_CONFIG_1",
++ "BOOT_CONFIG_2",
++ "BOOT_CONFIG_3",
++ "NC",
++ "NC",
++ "BOOT_CONFIG_5",
++ "NC",
++ "NC",
++ "NC",
++ "NC", /* GPIO 90 */
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC", /* GPIO 100 */
++ "NC",
++ "NC",
++ "NC",
++ "SSBI_GPS",
++ "NC",
++ "NC",
++ "KEY_VOLP_N",
++ "NC",
++ "NC",
++ "[LS_EXP_MI2S_WS]", /* GPIO 110 */
++ "NC",
++ "NC",
++ "[LS_EXP_MI2S_SCK]",
++ "[LS_EXP_MI2S_DATA0]",
++ "GPIO-E", /* LS_EXP_GPIO_E, LSEC pin 27 */
++ "NC",
++ "[DSI2HDMI_MI2S_WS]",
++ "[DSI2HDMI_MI2S_SCK]",
++ "[DSI2HDMI_MI2S_DATA0]",
++ "USR_LED_2_CTRL", /* GPIO 120 */
++ "SB_HS_ID";
++
++ msmgpio_leds: msmgpio-leds {
++ pins = "gpio21", "gpio120";
++ function = "gpio";
++
++ output-low;
++ };
++
++ usb_id_default: usb-id-default {
++ pins = "gpio121";
++ function = "gpio";
++
++ drive-strength = <8>;
++ input-enable;
++ bias-pull-up;
++ };
++
++ adv7533_int_active: adv533-int-active {
++ pins = "gpio31";
++ function = "gpio";
++
++ drive-strength = <16>;
++ bias-disable;
++ };
++
++ adv7533_int_suspend: adv7533-int-suspend {
++ pins = "gpio31";
++ function = "gpio";
++
++ drive-strength = <2>;
++ bias-disable;
++ };
++
++ adv7533_switch_active: adv7533-switch-active {
++ pins = "gpio32";
++ function = "gpio";
++
++ drive-strength = <16>;
++ bias-disable;
++ };
++
++ adv7533_switch_suspend: adv7533-switch-suspend {
++ pins = "gpio32";
++ function = "gpio";
++
++ drive-strength = <2>;
++ bias-disable;
++ };
++
++ msm_key_volp_n_default: msm-key-volp-n-default {
++ pins = "gpio107";
++ function = "gpio";
++
++ drive-strength = <8>;
++ input-enable;
++ bias-pull-up;
++ };
++};
++
++&pm8916_gpios {
++ gpio-line-names =
++ "USR_LED_3_CTRL",
++ "USR_LED_4_CTRL",
++ "USB_HUB_RESET_N_PM",
++ "USB_SW_SEL_PM";
++
++ usb_hub_reset_pm: usb-hub-reset-pm {
++ pins = "gpio3";
++ function = PMIC_GPIO_FUNC_NORMAL;
++
++ input-disable;
++ output-high;
++ };
++
++ usb_hub_reset_pm_device: usb-hub-reset-pm-device {
++ pins = "gpio3";
++ function = PMIC_GPIO_FUNC_NORMAL;
++
++ output-low;
++ };
++
++ usb_sw_sel_pm: usb-sw-sel-pm {
++ pins = "gpio4";
++ function = PMIC_GPIO_FUNC_NORMAL;
++
++ power-source = <PM8916_GPIO_VPH>;
++ input-disable;
++ output-high;
++ };
++
++ usb_sw_sel_pm_device: usb-sw-sel-pm-device {
++ pins = "gpio4";
++ function = PMIC_GPIO_FUNC_NORMAL;
++
++ power-source = <PM8916_GPIO_VPH>;
++ input-disable;
++ output-low;
++ };
++
++ pm8916_gpios_leds: pm8916-gpios-leds {
++ pins = "gpio1", "gpio2";
++ function = PMIC_GPIO_FUNC_NORMAL;
++
++ output-low;
++ };
++};
++
++&pm8916_mpps {
++ gpio-line-names =
++ "VDD_PX_BIAS",
++ "WLAN_LED_CTRL",
++ "BT_LED_CTRL",
++ "GPIO-F"; /* LS_EXP_GPIO_F, LSEC pin 28 */
++
++ pinctrl-names = "default";
++ pinctrl-0 = <&ls_exp_gpio_f>;
++
++ ls_exp_gpio_f: pm8916-mpp4-state {
++ pins = "mpp4";
++ function = "digital";
++
++ output-low;
++ power-source = <PM8916_MPP_L5>; // 1.8V
++ };
++
++ pm8916_mpps_leds: pm8916-mpps-state {
++ pins = "mpp2", "mpp3";
++ function = "digital";
++
++ output-low;
++ };
+ };
+diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
+deleted file mode 100644
+index f8d8f3e3664ec..0000000000000
+--- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
++++ /dev/null
+@@ -1,826 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+- */
+-
+-#include "msm8916-pm8916.dtsi"
+-#include <dt-bindings/gpio/gpio.h>
+-#include <dt-bindings/input/input.h>
+-#include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+-#include <dt-bindings/pinctrl/qcom,pmic-mpp.h>
+-#include <dt-bindings/sound/apq8016-lpass.h>
+-
+-/ {
+- aliases {
+- serial0 = &blsp1_uart2;
+- serial1 = &blsp1_uart1;
+- usid0 = &pm8916_0;
+- i2c0 = &blsp_i2c2;
+- i2c1 = &blsp_i2c6;
+- i2c3 = &blsp_i2c4;
+- spi0 = &blsp_spi5;
+- spi1 = &blsp_spi3;
+- };
+-
+- chosen {
+- stdout-path = "serial0";
+- };
+-
+- camera_vdddo_1v8: camera-vdddo-1v8 {
+- compatible = "regulator-fixed";
+- regulator-name = "camera_vdddo";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- regulator-always-on;
+- };
+-
+- camera_vdda_2v8: camera-vdda-2v8 {
+- compatible = "regulator-fixed";
+- regulator-name = "camera_vdda";
+- regulator-min-microvolt = <2800000>;
+- regulator-max-microvolt = <2800000>;
+- regulator-always-on;
+- };
+-
+- camera_vddd_1v5: camera-vddd-1v5 {
+- compatible = "regulator-fixed";
+- regulator-name = "camera_vddd";
+- regulator-min-microvolt = <1500000>;
+- regulator-max-microvolt = <1500000>;
+- regulator-always-on;
+- };
+-
+- reserved-memory {
+- ramoops@bff00000 {
+- compatible = "ramoops";
+- reg = <0x0 0xbff00000 0x0 0x100000>;
+-
+- record-size = <0x20000>;
+- console-size = <0x20000>;
+- ftrace-size = <0x20000>;
+- };
+- };
+-
+- usb2513 {
+- compatible = "smsc,usb3503";
+- reset-gpios = <&pm8916_gpios 3 GPIO_ACTIVE_LOW>;
+- initial-mode = <1>;
+- };
+-
+- usb_id: usb-id {
+- compatible = "linux,extcon-usb-gpio";
+- id-gpio = <&msmgpio 121 GPIO_ACTIVE_HIGH>;
+- pinctrl-names = "default";
+- pinctrl-0 = <&usb_id_default>;
+- };
+-
+- hdmi-out {
+- compatible = "hdmi-connector";
+- type = "a";
+-
+- port {
+- hdmi_con: endpoint {
+- remote-endpoint = <&adv7533_out>;
+- };
+- };
+- };
+-
+- gpio-keys {
+- compatible = "gpio-keys";
+- #address-cells = <1>;
+- #size-cells = <0>;
+- autorepeat;
+-
+- pinctrl-names = "default";
+- pinctrl-0 = <&msm_key_volp_n_default>;
+-
+- button@0 {
+- label = "Volume Up";
+- linux,code = <KEY_VOLUMEUP>;
+- gpios = <&msmgpio 107 GPIO_ACTIVE_LOW>;
+- };
+- };
+-
+- leds {
+- pinctrl-names = "default";
+- pinctrl-0 = <&msmgpio_leds>,
+- <&pm8916_gpios_leds>,
+- <&pm8916_mpps_leds>;
+-
+- compatible = "gpio-leds";
+-
+- led@1 {
+- label = "apq8016-sbc:green:user1";
+- gpios = <&msmgpio 21 GPIO_ACTIVE_HIGH>;
+- linux,default-trigger = "heartbeat";
+- default-state = "off";
+- };
+-
+- led@2 {
+- label = "apq8016-sbc:green:user2";
+- gpios = <&msmgpio 120 GPIO_ACTIVE_HIGH>;
+- linux,default-trigger = "mmc0";
+- default-state = "off";
+- };
+-
+- led@3 {
+- label = "apq8016-sbc:green:user3";
+- gpios = <&pm8916_gpios 1 GPIO_ACTIVE_HIGH>;
+- linux,default-trigger = "mmc1";
+- default-state = "off";
+- };
+-
+- led@4 {
+- label = "apq8016-sbc:green:user4";
+- gpios = <&pm8916_gpios 2 GPIO_ACTIVE_HIGH>;
+- linux,default-trigger = "none";
+- panic-indicator;
+- default-state = "off";
+- };
+-
+- led@5 {
+- label = "apq8016-sbc:yellow:wlan";
+- gpios = <&pm8916_mpps 2 GPIO_ACTIVE_HIGH>;
+- linux,default-trigger = "phy0tx";
+- default-state = "off";
+- };
+-
+- led@6 {
+- label = "apq8016-sbc:blue:bt";
+- gpios = <&pm8916_mpps 3 GPIO_ACTIVE_HIGH>;
+- linux,default-trigger = "bluetooth-power";
+- default-state = "off";
+- };
+- };
+-};
+-
+-&blsp_dma {
+- status = "okay";
+-};
+-
+-&blsp_i2c2 {
+- /* On Low speed expansion */
+- status = "okay";
+- label = "LS-I2C0";
+-};
+-
+-&blsp_i2c4 {
+- /* On High speed expansion */
+- status = "okay";
+- label = "HS-I2C2";
+-
+- adv_bridge: bridge@39 {
+- status = "okay";
+-
+- compatible = "adi,adv7533";
+- reg = <0x39>;
+-
+- interrupt-parent = <&msmgpio>;
+- interrupts = <31 IRQ_TYPE_EDGE_FALLING>;
+-
+- adi,dsi-lanes = <4>;
+- clocks = <&rpmcc RPM_SMD_BB_CLK2>;
+- clock-names = "cec";
+-
+- pd-gpios = <&msmgpio 32 GPIO_ACTIVE_HIGH>;
+-
+- avdd-supply = <&pm8916_l6>;
+- v1p2-supply = <&pm8916_l6>;
+- v3p3-supply = <&pm8916_l17>;
+-
+- pinctrl-names = "default","sleep";
+- pinctrl-0 = <&adv7533_int_active &adv7533_switch_active>;
+- pinctrl-1 = <&adv7533_int_suspend &adv7533_switch_suspend>;
+- #sound-dai-cells = <1>;
+-
+- ports {
+- #address-cells = <1>;
+- #size-cells = <0>;
+-
+- port@0 {
+- reg = <0>;
+- adv7533_in: endpoint {
+- remote-endpoint = <&dsi0_out>;
+- };
+- };
+-
+- port@1 {
+- reg = <1>;
+- adv7533_out: endpoint {
+- remote-endpoint = <&hdmi_con>;
+- };
+- };
+- };
+- };
+-};
+-
+-&blsp_i2c6 {
+- /* On Low speed expansion */
+- status = "okay";
+- label = "LS-I2C1";
+-};
+-
+-&blsp_spi3 {
+- /* On High speed expansion */
+- status = "okay";
+- label = "HS-SPI1";
+-};
+-
+-&blsp_spi5 {
+- /* On Low speed expansion */
+- status = "okay";
+- label = "LS-SPI0";
+-};
+-
+-&blsp1_uart1 {
+- status = "okay";
+- label = "LS-UART0";
+-};
+-
+-&blsp1_uart2 {
+- status = "okay";
+- label = "LS-UART1";
+-};
+-
+-&camss {
+- status = "okay";
+- ports {
+- port@0 {
+- reg = <0>;
+- csiphy0_ep: endpoint {
+- clock-lanes = <1>;
+- data-lanes = <0 2>;
+- remote-endpoint = <&ov5640_ep>;
+- status = "okay";
+- };
+- };
+- };
+-};
+-
+-&cci {
+- status = "okay";
+-};
+-
+-&cci_i2c0 {
+- camera_rear@3b {
+- compatible = "ovti,ov5640";
+- reg = <0x3b>;
+-
+- enable-gpios = <&msmgpio 34 GPIO_ACTIVE_HIGH>;
+- reset-gpios = <&msmgpio 35 GPIO_ACTIVE_LOW>;
+- pinctrl-names = "default";
+- pinctrl-0 = <&camera_rear_default>;
+-
+- clocks = <&gcc GCC_CAMSS_MCLK0_CLK>;
+- clock-names = "xclk";
+- clock-frequency = <23880000>;
+-
+- vdddo-supply = <&camera_vdddo_1v8>;
+- vdda-supply = <&camera_vdda_2v8>;
+- vddd-supply = <&camera_vddd_1v5>;
+-
+- /* No camera mezzanine by default */
+- status = "disabled";
+-
+- port {
+- ov5640_ep: endpoint {
+- clock-lanes = <1>;
+- data-lanes = <0 2>;
+- remote-endpoint = <&csiphy0_ep>;
+- };
+- };
+- };
+-};
+-
+-&dsi0_out {
+- data-lanes = <0 1 2 3>;
+- remote-endpoint = <&adv7533_in>;
+-};
+-
+-&lpass {
+- status = "okay";
+-};
+-
+-&mdss {
+- status = "okay";
+-};
+-
+-&pm8916_resin {
+- status = "okay";
+- linux,code = <KEY_VOLUMEDOWN>;
+-};
+-
+-&pronto {
+- status = "okay";
+-};
+-
+-&sdhc_1 {
+- status = "okay";
+-
+- pinctrl-names = "default", "sleep";
+- pinctrl-0 = <&sdc1_clk_on &sdc1_cmd_on &sdc1_data_on>;
+- pinctrl-1 = <&sdc1_clk_off &sdc1_cmd_off &sdc1_data_off>;
+-};
+-
+-&sdhc_2 {
+- status = "okay";
+-
+- pinctrl-names = "default", "sleep";
+- pinctrl-0 = <&sdc2_clk_on &sdc2_cmd_on &sdc2_data_on &sdc2_cd_on>;
+- pinctrl-1 = <&sdc2_clk_off &sdc2_cmd_off &sdc2_data_off &sdc2_cd_off>;
+-
+- cd-gpios = <&msmgpio 38 GPIO_ACTIVE_LOW>;
+-};
+-
+-&sound {
+- status = "okay";
+-
+- pinctrl-0 = <&cdc_pdm_lines_act &ext_sec_tlmm_lines_act &ext_mclk_tlmm_lines_act>;
+- pinctrl-1 = <&cdc_pdm_lines_sus &ext_sec_tlmm_lines_sus &ext_mclk_tlmm_lines_sus>;
+- pinctrl-names = "default", "sleep";
+- qcom,model = "DB410c";
+- qcom,audio-routing =
+- "AMIC2", "MIC BIAS Internal2",
+- "AMIC3", "MIC BIAS External1";
+-
+- external-dai-link@0 {
+- link-name = "ADV7533";
+- cpu {
+- sound-dai = <&lpass MI2S_QUATERNARY>;
+- };
+- codec {
+- sound-dai = <&adv_bridge 0>;
+- };
+- };
+-
+- internal-codec-playback-dai-link@0 {
+- link-name = "WCD";
+- cpu {
+- sound-dai = <&lpass MI2S_PRIMARY>;
+- };
+- codec {
+- sound-dai = <&lpass_codec 0>, <&wcd_codec 0>;
+- };
+- };
+-
+- internal-codec-capture-dai-link@0 {
+- link-name = "WCD-Capture";
+- cpu {
+- sound-dai = <&lpass MI2S_TERTIARY>;
+- };
+- codec {
+- sound-dai = <&lpass_codec 1>, <&wcd_codec 1>;
+- };
+- };
+-};
+-
+-&usb {
+- status = "okay";
+- extcon = <&usb_id>, <&usb_id>;
+-
+- pinctrl-names = "default", "device";
+- pinctrl-0 = <&usb_sw_sel_pm &usb_hub_reset_pm>;
+- pinctrl-1 = <&usb_sw_sel_pm_device &usb_hub_reset_pm_device>;
+-};
+-
+-&usb_hs_phy {
+- extcon = <&usb_id>;
+-};
+-
+-&wcd_codec {
+- clocks = <&gcc GCC_CODEC_DIGCODEC_CLK>;
+- clock-names = "mclk";
+- qcom,mbhc-vthreshold-low = <75 150 237 450 500>;
+- qcom,mbhc-vthreshold-high = <75 150 237 450 500>;
+-};
+-
+-/* Enable CoreSight */
+-&cti0 { status = "okay"; };
+-&cti1 { status = "okay"; };
+-&cti12 { status = "okay"; };
+-&cti13 { status = "okay"; };
+-&cti14 { status = "okay"; };
+-&cti15 { status = "okay"; };
+-&debug0 { status = "okay"; };
+-&debug1 { status = "okay"; };
+-&debug2 { status = "okay"; };
+-&debug3 { status = "okay"; };
+-&etf { status = "okay"; };
+-&etm0 { status = "okay"; };
+-&etm1 { status = "okay"; };
+-&etm2 { status = "okay"; };
+-&etm3 { status = "okay"; };
+-&etr { status = "okay"; };
+-&funnel0 { status = "okay"; };
+-&funnel1 { status = "okay"; };
+-&replicator { status = "okay"; };
+-&stm { status = "okay"; };
+-&tpiu { status = "okay"; };
+-
+-&smd_rpm_regulators {
+- vdd_l1_l2_l3-supply = <&pm8916_s3>;
+- vdd_l4_l5_l6-supply = <&pm8916_s4>;
+- vdd_l7-supply = <&pm8916_s4>;
+-
+- s3 {
+- regulator-min-microvolt = <375000>;
+- regulator-max-microvolt = <1562000>;
+- };
+-
+- s4 {
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+-
+- regulator-always-on;
+- regulator-boot-on;
+- };
+-
+- l1 {
+- regulator-min-microvolt = <375000>;
+- regulator-max-microvolt = <1525000>;
+- };
+-
+- l2 {
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <1200000>;
+- };
+-
+- l4 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l5 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l6 {
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+-
+- l7 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l8 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l9 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l10 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l11 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- regulator-allow-set-load;
+- regulator-system-load = <200000>;
+- };
+-
+- l12 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l13 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l14 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- /**
+- * 1.8v required on LS expansion
+- * for mezzanine boards
+- */
+- l15 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- regulator-always-on;
+- };
+-
+- l16 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-
+- l17 {
+- regulator-min-microvolt = <3300000>;
+- regulator-max-microvolt = <3300000>;
+- };
+-
+- l18 {
+- regulator-min-microvolt = <1750000>;
+- regulator-max-microvolt = <3337000>;
+- };
+-};
+-
+-/*
+- * 2mA drive strength is not enough when connecting multiple
+- * I2C devices with different pull up resistors.
+- */
+-&i2c2_default {
+- drive-strength = <16>;
+-};
+-
+-&i2c4_default {
+- drive-strength = <16>;
+-};
+-
+-&i2c6_default {
+- drive-strength = <16>;
+-};
+-
+-/*
+- * GPIO name legend: proper name = the GPIO line is used as GPIO
+- * NC = not connected (pin out but not routed from the chip to
+- * anything the board)
+- * "[PER]" = pin is muxed for [peripheral] (not GPIO)
+- * LSEC = Low Speed External Connector
+- * HSEC = High Speed External Connector
+- *
+- * Line names are taken from the schematic "DragonBoard410c"
+- * dated monday, august 31, 2015. Page 5 in particular.
+- *
+- * For the lines routed to the external connectors the
+- * lines are named after the 96Boards CE Specification 1.0,
+- * Appendix "Expansion Connector Signal Description".
+- *
+- * When the 96Board naming of a line and the schematic name of
+- * the same line are in conflict, the 96Board specification
+- * takes precedence, which means that the external UART on the
+- * LSEC is named UART0 while the schematic and SoC names this
+- * UART3. This is only for the informational lines i.e. "[FOO]",
+- * the GPIO named lines "GPIO-A" thru "GPIO-L" are the only
+- * ones actually used for GPIO.
+- */
+-
+-&msmgpio {
+- gpio-line-names =
+- "[UART0_TX]", /* GPIO_0, LSEC pin 5 */
+- "[UART0_RX]", /* GPIO_1, LSEC pin 7 */
+- "[UART0_CTS_N]", /* GPIO_2, LSEC pin 3 */
+- "[UART0_RTS_N]", /* GPIO_3, LSEC pin 9 */
+- "[UART1_TX]", /* GPIO_4, LSEC pin 11 */
+- "[UART1_RX]", /* GPIO_5, LSEC pin 13 */
+- "[I2C0_SDA]", /* GPIO_8, LSEC pin 17 */
+- "[I2C0_SCL]", /* GPIO_7, LSEC pin 15 */
+- "[SPI1_DOUT]", /* SPI1_MOSI, HSEC pin 1 */
+- "[SPI1_DIN]", /* SPI1_MISO, HSEC pin 11 */
+- "[SPI1_CS]", /* SPI1_CS_N, HSEC pin 7 */
+- "[SPI1_SCLK]", /* SPI1_CLK, HSEC pin 9 */
+- "GPIO-B", /* LS_EXP_GPIO_B, LSEC pin 24 */
+- "GPIO-C", /* LS_EXP_GPIO_C, LSEC pin 25 */
+- "[I2C3_SDA]", /* HSEC pin 38 */
+- "[I2C3_SCL]", /* HSEC pin 36 */
+- "[SPI0_MOSI]", /* LSEC pin 14 */
+- "[SPI0_MISO]", /* LSEC pin 10 */
+- "[SPI0_CS_N]", /* LSEC pin 12 */
+- "[SPI0_CLK]", /* LSEC pin 8 */
+- "HDMI_HPD_N", /* GPIO 20 */
+- "USR_LED_1_CTRL",
+- "[I2C1_SDA]", /* GPIO_22, LSEC pin 21 */
+- "[I2C1_SCL]", /* GPIO_23, LSEC pin 19 */
+- "GPIO-G", /* LS_EXP_GPIO_G, LSEC pin 29 */
+- "GPIO-H", /* LS_EXP_GPIO_H, LSEC pin 30 */
+- "[CSI0_MCLK]", /* HSEC pin 15 */
+- "[CSI1_MCLK]", /* HSEC pin 17 */
+- "GPIO-K", /* LS_EXP_GPIO_K, LSEC pin 33 */
+- "[I2C2_SDA]", /* HSEC pin 34 */
+- "[I2C2_SCL]", /* HSEC pin 32 */
+- "DSI2HDMI_INT_N",
+- "DSI_SW_SEL_APQ",
+- "GPIO-L", /* LS_EXP_GPIO_L, LSEC pin 34 */
+- "GPIO-J", /* LS_EXP_GPIO_J, LSEC pin 32 */
+- "GPIO-I", /* LS_EXP_GPIO_I, LSEC pin 31 */
+- "GPIO-A", /* LS_EXP_GPIO_A, LSEC pin 23 */
+- "FORCED_USB_BOOT",
+- "SD_CARD_DET_N",
+- "[WCSS_BT_SSBI]",
+- "[WCSS_WLAN_DATA_2]", /* GPIO 40 */
+- "[WCSS_WLAN_DATA_1]",
+- "[WCSS_WLAN_DATA_0]",
+- "[WCSS_WLAN_SET]",
+- "[WCSS_WLAN_CLK]",
+- "[WCSS_FM_SSBI]",
+- "[WCSS_FM_SDI]",
+- "[WCSS_BT_DAT_CTL]",
+- "[WCSS_BT_DAT_STB]",
+- "NC",
+- "NC", /* GPIO 50 */
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC", /* GPIO 60 */
+- "NC",
+- "NC",
+- "[CDC_PDM0_CLK]",
+- "[CDC_PDM0_SYNC]",
+- "[CDC_PDM0_TX0]",
+- "[CDC_PDM0_RX0]",
+- "[CDC_PDM0_RX1]",
+- "[CDC_PDM0_RX2]",
+- "GPIO-D", /* LS_EXP_GPIO_D, LSEC pin 26 */
+- "NC", /* GPIO 70 */
+- "NC",
+- "NC",
+- "NC",
+- "NC", /* GPIO 74 */
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "BOOT_CONFIG_0", /* GPIO 80 */
+- "BOOT_CONFIG_1",
+- "BOOT_CONFIG_2",
+- "BOOT_CONFIG_3",
+- "NC",
+- "NC",
+- "BOOT_CONFIG_5",
+- "NC",
+- "NC",
+- "NC",
+- "NC", /* GPIO 90 */
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC", /* GPIO 100 */
+- "NC",
+- "NC",
+- "NC",
+- "SSBI_GPS",
+- "NC",
+- "NC",
+- "KEY_VOLP_N",
+- "NC",
+- "NC",
+- "[LS_EXP_MI2S_WS]", /* GPIO 110 */
+- "NC",
+- "NC",
+- "[LS_EXP_MI2S_SCK]",
+- "[LS_EXP_MI2S_DATA0]",
+- "GPIO-E", /* LS_EXP_GPIO_E, LSEC pin 27 */
+- "NC",
+- "[DSI2HDMI_MI2S_WS]",
+- "[DSI2HDMI_MI2S_SCK]",
+- "[DSI2HDMI_MI2S_DATA0]",
+- "USR_LED_2_CTRL", /* GPIO 120 */
+- "SB_HS_ID";
+-
+- msmgpio_leds: msmgpio-leds {
+- pins = "gpio21", "gpio120";
+- function = "gpio";
+-
+- output-low;
+- };
+-
+- usb_id_default: usb-id-default {
+- pins = "gpio121";
+- function = "gpio";
+-
+- drive-strength = <8>;
+- input-enable;
+- bias-pull-up;
+- };
+-
+- adv7533_int_active: adv533-int-active {
+- pins = "gpio31";
+- function = "gpio";
+-
+- drive-strength = <16>;
+- bias-disable;
+- };
+-
+- adv7533_int_suspend: adv7533-int-suspend {
+- pins = "gpio31";
+- function = "gpio";
+-
+- drive-strength = <2>;
+- bias-disable;
+- };
+-
+- adv7533_switch_active: adv7533-switch-active {
+- pins = "gpio32";
+- function = "gpio";
+-
+- drive-strength = <16>;
+- bias-disable;
+- };
+-
+- adv7533_switch_suspend: adv7533-switch-suspend {
+- pins = "gpio32";
+- function = "gpio";
+-
+- drive-strength = <2>;
+- bias-disable;
+- };
+-
+- msm_key_volp_n_default: msm-key-volp-n-default {
+- pins = "gpio107";
+- function = "gpio";
+-
+- drive-strength = <8>;
+- input-enable;
+- bias-pull-up;
+- };
+-};
+-
+-&pm8916_gpios {
+- gpio-line-names =
+- "USR_LED_3_CTRL",
+- "USR_LED_4_CTRL",
+- "USB_HUB_RESET_N_PM",
+- "USB_SW_SEL_PM";
+-
+- usb_hub_reset_pm: usb-hub-reset-pm {
+- pins = "gpio3";
+- function = PMIC_GPIO_FUNC_NORMAL;
+-
+- input-disable;
+- output-high;
+- };
+-
+- usb_hub_reset_pm_device: usb-hub-reset-pm-device {
+- pins = "gpio3";
+- function = PMIC_GPIO_FUNC_NORMAL;
+-
+- output-low;
+- };
+-
+- usb_sw_sel_pm: usb-sw-sel-pm {
+- pins = "gpio4";
+- function = PMIC_GPIO_FUNC_NORMAL;
+-
+- power-source = <PM8916_GPIO_VPH>;
+- input-disable;
+- output-high;
+- };
+-
+- usb_sw_sel_pm_device: usb-sw-sel-pm-device {
+- pins = "gpio4";
+- function = PMIC_GPIO_FUNC_NORMAL;
+-
+- power-source = <PM8916_GPIO_VPH>;
+- input-disable;
+- output-low;
+- };
+-
+- pm8916_gpios_leds: pm8916-gpios-leds {
+- pins = "gpio1", "gpio2";
+- function = PMIC_GPIO_FUNC_NORMAL;
+-
+- output-low;
+- };
+-};
+-
+-&pm8916_mpps {
+- gpio-line-names =
+- "VDD_PX_BIAS",
+- "WLAN_LED_CTRL",
+- "BT_LED_CTRL",
+- "GPIO-F"; /* LS_EXP_GPIO_F, LSEC pin 28 */
+-
+- pinctrl-names = "default";
+- pinctrl-0 = <&ls_exp_gpio_f>;
+-
+- ls_exp_gpio_f: pm8916-mpp4 {
+- pins = "mpp4";
+- function = "digital";
+-
+- output-low;
+- power-source = <PM8916_MPP_L5>; // 1.8V
+- };
+-
+- pm8916_mpps_leds: pm8916-mpps-leds {
+- pins = "mpp2", "mpp3";
+- function = "digital";
+-
+- output-low;
+- };
+-};
+diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dts b/arch/arm64/boot/dts/qcom/apq8096-db820c.dts
+index 757afa27424dd..d01a512634cfe 100644
+--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dts
++++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dts
+@@ -5,9 +5,1077 @@
+
+ /dts-v1/;
+
+-#include "apq8096-db820c.dtsi"
++#include "msm8996.dtsi"
++#include "pm8994.dtsi"
++#include "pmi8994.dtsi"
++#include <dt-bindings/input/input.h>
++#include <dt-bindings/gpio/gpio.h>
++#include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
++#include <dt-bindings/sound/qcom,q6afe.h>
++#include <dt-bindings/sound/qcom,q6asm.h>
++
++/*
++ * GPIO name legend: proper name = the GPIO line is used as GPIO
++ * NC = not connected (pin out but not routed from the chip to
++ * anything the board)
++ * "[PER]" = pin is muxed for [peripheral] (not GPIO)
++ * LSEC = Low Speed External Connector
++ * P HSEC = Primary High Speed External Connector
++ * S HSEC = Secondary High Speed External Connector
++ * J14 = Camera Connector
++ * TP = Test Points
++ *
++ * Line names are taken from the schematic "DragonBoard 820c",
++ * drawing no: LM25-P2751-1
++ *
++ * For the lines routed to the external connectors the
++ * lines are named after the 96Boards CE Specification 1.0,
++ * Appendix "Expansion Connector Signal Description".
++ *
++ * When the 96Board naming of a line and the schematic name of
++ * the same line are in conflict, the 96Board specification
++ * takes precedence, which means that the external UART on the
++ * LSEC is named UART0 while the schematic and SoC names this
++ * UART3. This is only for the informational lines i.e. "[FOO]",
++ * the GPIO named lines "GPIO-A" thru "GPIO-L" are the only
++ * ones actually used for GPIO.
++ */
+
+ / {
+ model = "Qualcomm Technologies, Inc. DB820c";
+ compatible = "arrow,apq8096-db820c", "qcom,apq8096-sbc", "qcom,apq8096";
++
++ aliases {
++ serial0 = &blsp2_uart2;
++ serial1 = &blsp2_uart3;
++ serial2 = &blsp1_uart2;
++ i2c0 = &blsp1_i2c3;
++ i2c1 = &blsp2_i2c1;
++ i2c2 = &blsp2_i2c1;
++ spi0 = &blsp1_spi1;
++ spi1 = &blsp2_spi6;
++ };
++
++ chosen {
++ stdout-path = "serial0:115200n8";
++ };
++
++ clocks {
++ compatible = "simple-bus";
++ divclk4: divclk4 {
++ compatible = "fixed-clock";
++ #clock-cells = <0>;
++ clock-frequency = <32768>;
++ clock-output-names = "divclk4";
++
++ pinctrl-names = "default";
++ pinctrl-0 = <&divclk4_pin_a>;
++ };
++
++ div1_mclk: divclk1 {
++ compatible = "gpio-gate-clock";
++ pinctrl-0 = <&audio_mclk>;
++ pinctrl-names = "default";
++ clocks = <&rpmcc RPM_SMD_DIV_CLK1>;
++ #clock-cells = <0>;
++ enable-gpios = <&pm8994_gpios 15 0>;
++ };
++ };
++
++ gpio_keys {
++ compatible = "gpio-keys";
++ #address-cells = <1>;
++ #size-cells = <0>;
++ autorepeat;
++
++ pinctrl-names = "default";
++ pinctrl-0 = <&volume_up_gpio>;
++
++ button@0 {
++ label = "Volume Up";
++ linux,code = <KEY_VOLUMEUP>;
++ gpios = <&pm8994_gpios 2 GPIO_ACTIVE_LOW>;
++ };
++ };
++
++ usb2_id: usb2-id {
++ compatible = "linux,extcon-usb-gpio";
++ id-gpio = <&pmi8994_gpios 6 GPIO_ACTIVE_HIGH>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&usb2_vbus_det_gpio>;
++ };
++
++ usb3_id: usb3-id {
++ compatible = "linux,extcon-usb-gpio";
++ id-gpio = <&pm8994_gpios 22 GPIO_ACTIVE_HIGH>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&usb3_vbus_det_gpio>;
++ };
++
++ vph_pwr: vph-pwr-regulator {
++ compatible = "regulator-fixed";
++ regulator-name = "vph_pwr";
++ regulator-always-on;
++ regulator-boot-on;
++
++ regulator-min-microvolt = <3700000>;
++ regulator-max-microvolt = <3700000>;
++ };
++
++ wlan_en: wlan-en-1-8v {
++ pinctrl-names = "default";
++ pinctrl-0 = <&wlan_en_gpios>;
++ compatible = "regulator-fixed";
++ regulator-name = "wlan-en-regulator";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++
++ gpio = <&pm8994_gpios 8 0>;
++
++ /* WLAN card specific delay */
++ startup-delay-us = <70000>;
++ enable-active-high;
++ };
++};
++
++&blsp1_i2c3 {
++ /* On Low speed expansion */
++ label = "LS-I2C0";
++ status = "okay";
++};
++
++&blsp1_spi1 {
++ /* On Low speed expansion */
++ label = "LS-SPI0";
++ status = "okay";
++};
++
++&blsp1_uart2 {
++ label = "BT-UART";
++ status = "okay";
++
++ bluetooth {
++ compatible = "qcom,qca6174-bt";
++
++ /* bt_disable_n gpio */
++ enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
++
++ clocks = <&divclk4>;
++ };
++};
++
++&adsp_pil {
++ status = "okay";
++};
++
++&blsp2_i2c1 {
++ /* On High speed expansion */
++ label = "HS-I2C2";
++ status = "okay";
++};
++
++&blsp2_i2c1 {
++ /* On Low speed expansion */
++ label = "LS-I2C1";
++ status = "okay";
++};
++
++&blsp2_spi6 {
++ /* On High speed expansion */
++ label = "HS-SPI1";
++ status = "okay";
++};
++
++&blsp2_uart2 {
++ label = "LS-UART1";
++ status = "okay";
++ pinctrl-names = "default", "sleep";
++ pinctrl-0 = <&blsp2_uart2_2pins_default>;
++ pinctrl-1 = <&blsp2_uart2_2pins_sleep>;
++};
++
++&blsp2_uart3 {
++ label = "LS-UART0";
++ status = "disabled";
++ pinctrl-names = "default", "sleep";
++ pinctrl-0 = <&blsp2_uart3_4pins_default>;
++ pinctrl-1 = <&blsp2_uart3_4pins_sleep>;
++};
++
++&camss {
++ vdda-supply = <&vreg_l2a_1p25>;
++};
++
++&gpu {
++ status = "okay";
++};
++
++&hdmi {
++ status = "okay";
++
++ pinctrl-names = "default", "sleep";
++ pinctrl-0 = <&hdmi_hpd_active &hdmi_ddc_active>;
++ pinctrl-1 = <&hdmi_hpd_suspend &hdmi_ddc_suspend>;
++
++ core-vdda-supply = <&vreg_l12a_1p8>;
++ core-vcc-supply = <&vreg_s4a_1p8>;
++};
++
++&hdmi_phy {
++ status = "okay";
++
++ vddio-supply = <&vreg_l12a_1p8>;
++ vcca-supply = <&vreg_l28a_0p925>;
++ #phy-cells = <0>;
++};
++
++&hsusb_phy1 {
++ status = "okay";
++
++ vdda-pll-supply = <&vreg_l12a_1p8>;
++ vdda-phy-dpdm-supply = <&vreg_l24a_3p075>;
++};
++
++&hsusb_phy2 {
++ status = "okay";
++
++ vdda-pll-supply = <&vreg_l12a_1p8>;
++ vdda-phy-dpdm-supply = <&vreg_l24a_3p075>;
++};
++
++&mdp {
++ status = "okay";
++};
++
++&mdss {
++ status = "okay";
++};
++
++&mmcc {
++ vdd-gfx-supply = <&vdd_gfx>;
++};
++
++&pm8994_resin {
++ status = "okay";
++ linux,code = <KEY_VOLUMEDOWN>;
++};
++
++&tlmm {
++ gpio-line-names =
++ "[SPI0_DOUT]", /* GPIO_0, BLSP1_SPI_MOSI, LSEC pin 14 */
++ "[SPI0_DIN]", /* GPIO_1, BLSP1_SPI_MISO, LSEC pin 10 */
++ "[SPI0_CS]", /* GPIO_2, BLSP1_SPI_CS_N, LSEC pin 12 */
++ "[SPI0_SCLK]", /* GPIO_3, BLSP1_SPI_CLK, LSEC pin 8 */
++ "[UART1_TxD]", /* GPIO_4, BLSP8_UART_TX, LSEC pin 11 */
++ "[UART1_RxD]", /* GPIO_5, BLSP8_UART_RX, LSEC pin 13 */
++ "[I2C1_SDA]", /* GPIO_6, BLSP8_I2C_SDA, LSEC pin 21 */
++ "[I2C1_SCL]", /* GPIO_7, BLSP8_I2C_SCL, LSEC pin 19 */
++ "GPIO-H", /* GPIO_8, LCD0_RESET_N, LSEC pin 30 */
++ "TP93", /* GPIO_9 */
++ "GPIO-G", /* GPIO_10, MDP_VSYNC_P, LSEC pin 29 */
++ "[MDP_VSYNC_S]", /* GPIO_11, S HSEC pin 55 */
++ "NC", /* GPIO_12 */
++ "[CSI0_MCLK]", /* GPIO_13, CAM_MCLK0, P HSEC pin 15 */
++ "[CAM_MCLK1]", /* GPIO_14, J14 pin 11 */
++ "[CSI1_MCLK]", /* GPIO_15, CAM_MCLK2, P HSEC pin 17 */
++ "TP99", /* GPIO_16 */
++ "[I2C2_SDA]", /* GPIO_17, CCI_I2C_SDA0, P HSEC pin 34 */
++ "[I2C2_SCL]", /* GPIO_18, CCI_I2C_SCL0, P HSEC pin 32 */
++ "[CCI_I2C_SDA1]", /* GPIO_19, S HSEC pin 38 */
++ "[CCI_I2C_SCL1]", /* GPIO_20, S HSEC pin 36 */
++ "FLASH_STROBE_EN", /* GPIO_21, S HSEC pin 5 */
++ "FLASH_STROBE_TRIG", /* GPIO_22, S HSEC pin 1 */
++ "GPIO-K", /* GPIO_23, CAM2_RST_N, LSEC pin 33 */
++ "GPIO-D", /* GPIO_24, LSEC pin 26 */
++ "GPIO-I", /* GPIO_25, CAM0_RST_N, LSEC pin 31 */
++ "GPIO-J", /* GPIO_26, CAM0_STANDBY_N, LSEC pin 32 */
++ "BLSP6_I2C_SDA", /* GPIO_27 */
++ "BLSP6_I2C_SCL", /* GPIO_28 */
++ "GPIO-B", /* GPIO_29, TS0_RESET_N, LSEC pin 24 */
++ "GPIO30", /* GPIO_30, S HSEC pin 4 */
++ "HDMI_CEC", /* GPIO_31 */
++ "HDMI_DDC_CLOCK", /* GPIO_32 */
++ "HDMI_DDC_DATA", /* GPIO_33 */
++ "HDMI_HOT_PLUG_DETECT", /* GPIO_34 */
++ "PCIE0_RST_N", /* GPIO_35 */
++ "PCIE0_CLKREQ_N", /* GPIO_36 */
++ "PCIE0_WAKE", /* GPIO_37 */
++ "SD_CARD_DET_N", /* GPIO_38 */
++ "TSIF1_SYNC", /* GPIO_39, S HSEC pin 48 */
++ "W_DISABLE_N", /* GPIO_40 */
++ "[BLSP9_UART_TX]", /* GPIO_41 */
++ "[BLSP9_UART_RX]", /* GPIO_42 */
++ "[BLSP2_UART_CTS_N]", /* GPIO_43 */
++ "[BLSP2_UART_RFR_N]", /* GPIO_44 */
++ "[BLSP3_UART_TX]", /* GPIO_45 */
++ "[BLSP3_UART_RX]", /* GPIO_46 */
++ "[I2C0_SDA]", /* GPIO_47, LS_I2C0_SDA, LSEC pin 17 */
++ "[I2C0_SCL]", /* GPIO_48, LS_I2C0_SCL, LSEC pin 15 */
++ "[UART0_TxD]", /* GPIO_49, BLSP9_UART_TX, LSEC pin 5 */
++ "[UART0_RxD]", /* GPIO_50, BLSP9_UART_RX, LSEC pin 7 */
++ "[UART0_CTS]", /* GPIO_51, BLSP9_UART_CTS_N, LSEC pin 3 */
++ "[UART0_RTS]", /* GPIO_52, BLSP9_UART_RFR_N, LSEC pin 9 */
++ "[CODEC_INT1_N]", /* GPIO_53 */
++ "[CODEC_INT2_N]", /* GPIO_54 */
++ "[BLSP7_I2C_SDA]", /* GPIO_55 */
++ "[BLSP7_I2C_SCL]", /* GPIO_56 */
++ "MI2S_MCLK", /* GPIO_57, S HSEC pin 3 */
++ "[PCM_CLK]", /* GPIO_58, QUA_MI2S_SCK, LSEC pin 18 */
++ "[PCM_FS]", /* GPIO_59, QUA_MI2S_WS, LSEC pin 16 */
++ "[PCM_DO]", /* GPIO_60, QUA_MI2S_DATA0, LSEC pin 20 */
++ "[PCM_DI]", /* GPIO_61, QUA_MI2S_DATA1, LSEC pin 22 */
++ "GPIO-E", /* GPIO_62, LSEC pin 27 */
++ "TP87", /* GPIO_63 */
++ "[CODEC_RST_N]", /* GPIO_64 */
++ "[PCM1_CLK]", /* GPIO_65 */
++ "[PCM1_SYNC]", /* GPIO_66 */
++ "[PCM1_DIN]", /* GPIO_67 */
++ "[PCM1_DOUT]", /* GPIO_68 */
++ "AUDIO_REF_CLK", /* GPIO_69 */
++ "SLIMBUS_CLK", /* GPIO_70 */
++ "SLIMBUS_DATA0", /* GPIO_71 */
++ "SLIMBUS_DATA1", /* GPIO_72 */
++ "NC", /* GPIO_73 */
++ "NC", /* GPIO_74 */
++ "NC", /* GPIO_75 */
++ "NC", /* GPIO_76 */
++ "TP94", /* GPIO_77 */
++ "NC", /* GPIO_78 */
++ "TP95", /* GPIO_79 */
++ "GPIO-A", /* GPIO_80, MEMS_RESET_N, LSEC pin 23 */
++ "TP88", /* GPIO_81 */
++ "TP89", /* GPIO_82 */
++ "TP90", /* GPIO_83 */
++ "TP91", /* GPIO_84 */
++ "[SD_DAT0]", /* GPIO_85, BLSP12_SPI_MOSI, P HSEC pin 1 */
++ "[SD_CMD]", /* GPIO_86, BLSP12_SPI_MISO, P HSEC pin 11 */
++ "[SD_DAT3]", /* GPIO_87, BLSP12_SPI_CS_N, P HSEC pin 7 */
++ "[SD_SCLK]", /* GPIO_88, BLSP12_SPI_CLK, P HSEC pin 9 */
++ "TSIF1_CLK", /* GPIO_89, S HSEC pin 42 */
++ "TSIF1_EN", /* GPIO_90, S HSEC pin 46 */
++ "TSIF1_DATA", /* GPIO_91, S HSEC pin 44 */
++ "NC", /* GPIO_92 */
++ "TSIF2_CLK", /* GPIO_93, S HSEC pin 52 */
++ "TSIF2_EN", /* GPIO_94, S HSEC pin 56 */
++ "TSIF2_DATA", /* GPIO_95, S HSEC pin 54 */
++ "TSIF2_SYNC", /* GPIO_96, S HSEC pin 58 */
++ "NC", /* GPIO_97 */
++ "CAM1_STANDBY_N", /* GPIO_98 */
++ "NC", /* GPIO_99 */
++ "NC", /* GPIO_100 */
++ "[LCD1_RESET_N]", /* GPIO_101, S HSEC pin 51 */
++ "BOOT_CONFIG1", /* GPIO_102 */
++ "USB_HUB_RESET", /* GPIO_103 */
++ "CAM1_RST_N", /* GPIO_104 */
++ "NC", /* GPIO_105 */
++ "NC", /* GPIO_106 */
++ "NC", /* GPIO_107 */
++ "NC", /* GPIO_108 */
++ "NC", /* GPIO_109 */
++ "NC", /* GPIO_110 */
++ "NC", /* GPIO_111 */
++ "NC", /* GPIO_112 */
++ "PMI8994_BUA", /* GPIO_113 */
++ "PCIE2_RST_N", /* GPIO_114 */
++ "PCIE2_CLKREQ_N", /* GPIO_115 */
++ "PCIE2_WAKE", /* GPIO_116 */
++ "SSC_IRQ_0", /* GPIO_117 */
++ "SSC_IRQ_1", /* GPIO_118 */
++ "SSC_IRQ_2", /* GPIO_119 */
++ "NC", /* GPIO_120 */
++ "GPIO121", /* GPIO_121, S HSEC pin 2 */
++ "NC", /* GPIO_122 */
++ "SSC_IRQ_6", /* GPIO_123 */
++ "SSC_IRQ_7", /* GPIO_124 */
++ "GPIO-C", /* GPIO_125, TS_INT0, LSEC pin 25 */
++ "BOOT_CONFIG5", /* GPIO_126 */
++ "NC", /* GPIO_127 */
++ "NC", /* GPIO_128 */
++ "BOOT_CONFIG7", /* GPIO_129 */
++ "PCIE1_RST_N", /* GPIO_130 */
++ "PCIE1_CLKREQ_N", /* GPIO_131 */
++ "PCIE1_WAKE", /* GPIO_132 */
++ "GPIO-L", /* GPIO_133, CAM2_STANDBY_N, LSEC pin 34 */
++ "NC", /* GPIO_134 */
++ "NC", /* GPIO_135 */
++ "BOOT_CONFIG8", /* GPIO_136 */
++ "NC", /* GPIO_137 */
++ "NC", /* GPIO_138 */
++ "GPS_SSBI2", /* GPIO_139 */
++ "GPS_SSBI1", /* GPIO_140 */
++ "NC", /* GPIO_141 */
++ "NC", /* GPIO_142 */
++ "NC", /* GPIO_143 */
++ "BOOT_CONFIG6", /* GPIO_144 */
++ "NC", /* GPIO_145 */
++ "NC", /* GPIO_146 */
++ "NC", /* GPIO_147 */
++ "NC", /* GPIO_148 */
++ "NC"; /* GPIO_149 */
++
++ sdc2_cd_on: sdc2_cd_on {
++ mux {
++ pins = "gpio38";
++ function = "gpio";
++ };
++
++ config {
++ pins = "gpio38";
++ bias-pull-up; /* pull up */
++ drive-strength = <16>; /* 16 MA */
++ };
++ };
++
++ sdc2_cd_off: sdc2_cd_off {
++ mux {
++ pins = "gpio38";
++ function = "gpio";
++ };
++
++ config {
++ pins = "gpio38";
++ bias-pull-up; /* pull up */
++ drive-strength = <2>; /* 2 MA */
++ };
++ };
++
++ hdmi_hpd_active: hdmi_hpd_active {
++ mux {
++ pins = "gpio34";
++ function = "hdmi_hot";
++ };
++
++ config {
++ pins = "gpio34";
++ bias-pull-down;
++ drive-strength = <16>;
++ };
++ };
++
++ hdmi_hpd_suspend: hdmi_hpd_suspend {
++ mux {
++ pins = "gpio34";
++ function = "hdmi_hot";
++ };
++
++ config {
++ pins = "gpio34";
++ bias-pull-down;
++ drive-strength = <2>;
++ };
++ };
++
++ hdmi_ddc_active: hdmi_ddc_active {
++ mux {
++ pins = "gpio32", "gpio33";
++ function = "hdmi_ddc";
++ };
++
++ config {
++ pins = "gpio32", "gpio33";
++ drive-strength = <2>;
++ bias-pull-up;
++ };
++ };
++
++ hdmi_ddc_suspend: hdmi_ddc_suspend {
++ mux {
++ pins = "gpio32", "gpio33";
++ function = "hdmi_ddc";
++ };
++
++ config {
++ pins = "gpio32", "gpio33";
++ drive-strength = <2>;
++ bias-pull-down;
++ };
++ };
++};
++
++&pcie0 {
++ status = "okay";
++ perst-gpio = <&tlmm 35 GPIO_ACTIVE_LOW>;
++ vddpe-3v3-supply = <&wlan_en>;
++ vdda-supply = <&vreg_l28a_0p925>;
++};
++
++&pcie1 {
++ status = "okay";
++ perst-gpio = <&tlmm 130 GPIO_ACTIVE_LOW>;
++ vdda-supply = <&vreg_l28a_0p925>;
++};
++
++&pcie2 {
++ status = "okay";
++ perst-gpio = <&tlmm 114 GPIO_ACTIVE_LOW>;
++ vdda-supply = <&vreg_l28a_0p925>;
++};
++
++&pcie_phy {
++ status = "okay";
++
++ vdda-phy-supply = <&vreg_l28a_0p925>;
++ vdda-pll-supply = <&vreg_l12a_1p8>;
++};
++
++&pm8994_gpios {
++ gpio-line-names =
++ "NC",
++ "KEY_VOLP_N",
++ "NC",
++ "BL1_PWM",
++ "GPIO-F", /* BL0_PWM, LSEC pin 28 */
++ "BL1_EN",
++ "NC",
++ "WLAN_EN",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "DIVCLK1",
++ "DIVCLK2",
++ "DIVCLK3",
++ "DIVCLK4",
++ "BT_EN",
++ "PMIC_SLB",
++ "PMIC_BUA",
++ "USB_VBUS_DET";
++
++ pinctrl-names = "default";
++ pinctrl-0 = <&ls_exp_gpio_f &bt_en_gpios>;
++
++ ls_exp_gpio_f: pm8994_gpio5 {
++ pinconf {
++ pins = "gpio5";
++ output-low;
++ power-source = <2>; // PM8994_GPIO_S4, 1.8V
++ };
++ };
++
++ bt_en_gpios: bt_en_gpios {
++ pinconf {
++ pins = "gpio19";
++ function = PMIC_GPIO_FUNC_NORMAL;
++ output-low;
++ power-source = <PM8994_GPIO_S4>; // 1.8V
++ qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
++ bias-pull-down;
++ };
++ };
++
++ wlan_en_gpios: wlan_en_gpios {
++ pinconf {
++ pins = "gpio8";
++ function = PMIC_GPIO_FUNC_NORMAL;
++ output-low;
++ power-source = <PM8994_GPIO_S4>; // 1.8V
++ qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
++ bias-pull-down;
++ };
++ };
++
++ audio_mclk: clk_div1 {
++ pinconf {
++ pins = "gpio15";
++ function = "func1";
++ power-source = <PM8994_GPIO_S4>; // 1.8V
++ };
++ };
++
++ volume_up_gpio: pm8996_gpio2 {
++ pinconf {
++ pins = "gpio2";
++ function = "normal";
++ input-enable;
++ drive-push-pull;
++ bias-pull-up;
++ qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
++ power-source = <PM8994_GPIO_S4>; // 1.8V
++ };
++ };
++
++ divclk4_pin_a: divclk4 {
++ pinconf {
++ pins = "gpio18";
++ function = PMIC_GPIO_FUNC_FUNC2;
++
++ bias-disable;
++ power-source = <PM8994_GPIO_S4>;
++ };
++ };
++
++ usb3_vbus_det_gpio: pm8996_gpio22 {
++ pinconf {
++ pins = "gpio22";
++ function = PMIC_GPIO_FUNC_NORMAL;
++ input-enable;
++ bias-pull-down;
++ qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
++ power-source = <PM8994_GPIO_S4>; // 1.8V
++ };
++ };
++};
++
++&pm8994_mpps {
++ gpio-line-names =
++ "VDDPX_BIAS",
++ "WIFI_LED",
++ "NC",
++ "BT_LED",
++ "PM_MPP05",
++ "PM_MPP06",
++ "PM_MPP07",
++ "NC";
++};
++
++&pm8994_spmi_regulators {
++ qcom,saw-reg = <&saw3>;
++ s9 {
++ qcom,saw-slave;
++ };
++ s10 {
++ qcom,saw-slave;
++ };
++ s11 {
++ qcom,saw-leader;
++ regulator-always-on;
++ regulator-min-microvolt = <980000>;
++ regulator-max-microvolt = <980000>;
++ };
++};
++
++&pmi8994_gpios {
++ gpio-line-names =
++ "NC",
++ "SPKR_AMP_EN1",
++ "SPKR_AMP_EN2",
++ "TP61",
++ "NC",
++ "USB2_VBUS_DET",
++ "NC",
++ "NC",
++ "NC",
++ "NC";
++
++ usb2_vbus_det_gpio: pmi8996_gpio6 {
++ pinconf {
++ pins = "gpio6";
++ function = PMIC_GPIO_FUNC_NORMAL;
++ input-enable;
++ bias-pull-down;
++ qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
++ power-source = <PM8994_GPIO_S4>; // 1.8V
++ };
++ };
++};
++
++&pmi8994_spmi_regulators {
++ vdd_gfx: s2@1700 {
++ reg = <0x1700 0x100>;
++ regulator-name = "VDD_GFX";
++ regulator-min-microvolt = <980000>;
++ regulator-max-microvolt = <980000>;
++ };
++};
++
++&rpm_requests {
++ pm8994-regulators {
++ compatible = "qcom,rpm-pm8994-regulators";
++
++ vdd_s1-supply = <&vph_pwr>;
++ vdd_s2-supply = <&vph_pwr>;
++ vdd_s3-supply = <&vph_pwr>;
++ vdd_s4-supply = <&vph_pwr>;
++ vdd_s5-supply = <&vph_pwr>;
++ vdd_s6-supply = <&vph_pwr>;
++ vdd_s7-supply = <&vph_pwr>;
++ vdd_s8-supply = <&vph_pwr>;
++ vdd_s9-supply = <&vph_pwr>;
++ vdd_s10-supply = <&vph_pwr>;
++ vdd_s11-supply = <&vph_pwr>;
++ vdd_s12-supply = <&vph_pwr>;
++ vdd_l1-supply = <&vreg_s1b_1p025>;
++ vdd_l2_l26_l28-supply = <&vreg_s3a_1p3>;
++ vdd_l3_l11-supply = <&vreg_s3a_1p3>;
++ vdd_l4_l27_l31-supply = <&vreg_s3a_1p3>;
++ vdd_l5_l7-supply = <&vreg_s5a_2p15>;
++ vdd_l6_l12_l32-supply = <&vreg_s5a_2p15>;
++ vdd_l8_l16_l30-supply = <&vph_pwr>;
++ vdd_l9_l10_l18_l22-supply = <&vph_pwr_bbyp>;
++ vdd_l13_l19_l23_l24-supply = <&vph_pwr_bbyp>;
++ vdd_l14_l15-supply = <&vreg_s5a_2p15>;
++ vdd_l17_l29-supply = <&vph_pwr_bbyp>;
++ vdd_l20_l21-supply = <&vph_pwr_bbyp>;
++ vdd_l25-supply = <&vreg_s3a_1p3>;
++ vdd_lvs1_lvs2-supply = <&vreg_s4a_1p8>;
++
++ vreg_s3a_1p3: s3 {
++ regulator-name = "vreg_s3a_1p3";
++ regulator-min-microvolt = <1300000>;
++ regulator-max-microvolt = <1300000>;
++ };
++
++ /**
++ * 1.8v required on LS expansion
++ * for mezzanine boards
++ */
++ vreg_s4a_1p8: s4 {
++ regulator-name = "vreg_s4a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ regulator-always-on;
++ };
++ vreg_s5a_2p15: s5 {
++ regulator-name = "vreg_s5a_2p15";
++ regulator-min-microvolt = <2150000>;
++ regulator-max-microvolt = <2150000>;
++ };
++ vreg_s7a_1p0: s7 {
++ regulator-name = "vreg_s7a_1p0";
++ regulator-min-microvolt = <800000>;
++ regulator-max-microvolt = <800000>;
++ };
++
++ vreg_l1a_1p0: l1 {
++ regulator-name = "vreg_l1a_1p0";
++ regulator-min-microvolt = <1000000>;
++ regulator-max-microvolt = <1000000>;
++ };
++ vreg_l2a_1p25: l2 {
++ regulator-name = "vreg_l2a_1p25";
++ regulator-min-microvolt = <1250000>;
++ regulator-max-microvolt = <1250000>;
++ };
++ vreg_l3a_0p875: l3 {
++ regulator-name = "vreg_l3a_0p875";
++ regulator-min-microvolt = <850000>;
++ regulator-max-microvolt = <850000>;
++ };
++ vreg_l4a_1p225: l4 {
++ regulator-name = "vreg_l4a_1p225";
++ regulator-min-microvolt = <1225000>;
++ regulator-max-microvolt = <1225000>;
++ };
++ vreg_l6a_1p2: l6 {
++ regulator-name = "vreg_l6a_1p2";
++ regulator-min-microvolt = <1200000>;
++ regulator-max-microvolt = <1200000>;
++ };
++ vreg_l8a_1p8: l8 {
++ regulator-name = "vreg_l8a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++ vreg_l9a_1p8: l9 {
++ regulator-name = "vreg_l9a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++ vreg_l10a_1p8: l10 {
++ regulator-name = "vreg_l10a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++ vreg_l11a_1p15: l11 {
++ regulator-name = "vreg_l11a_1p15";
++ regulator-min-microvolt = <1150000>;
++ regulator-max-microvolt = <1150000>;
++ };
++ vreg_l12a_1p8: l12 {
++ regulator-name = "vreg_l12a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++ vreg_l13a_2p95: l13 {
++ regulator-name = "vreg_l13a_2p95";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <2950000>;
++ };
++ vreg_l14a_1p8: l14 {
++ regulator-name = "vreg_l14a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++ vreg_l15a_1p8: l15 {
++ regulator-name = "vreg_l15a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++ vreg_l16a_2p7: l16 {
++ regulator-name = "vreg_l16a_2p7";
++ regulator-min-microvolt = <2700000>;
++ regulator-max-microvolt = <2700000>;
++ };
++ vreg_l17a_2p8: l17 {
++ regulator-name = "vreg_l17a_2p8";
++ regulator-min-microvolt = <2500000>;
++ regulator-max-microvolt = <2500000>;
++ };
++ vreg_l18a_2p85: l18 {
++ regulator-name = "vreg_l18a_2p85";
++ regulator-min-microvolt = <2700000>;
++ regulator-max-microvolt = <2900000>;
++ };
++ vreg_l19a_2p8: l19 {
++ regulator-name = "vreg_l19a_2p8";
++ regulator-min-microvolt = <3000000>;
++ regulator-max-microvolt = <3000000>;
++ };
++ vreg_l20a_2p95: l20 {
++ regulator-name = "vreg_l20a_2p95";
++ regulator-min-microvolt = <2950000>;
++ regulator-max-microvolt = <2950000>;
++ regulator-allow-set-load;
++ };
++ vreg_l21a_2p95: l21 {
++ regulator-name = "vreg_l21a_2p95";
++ regulator-min-microvolt = <2950000>;
++ regulator-max-microvolt = <2950000>;
++ regulator-allow-set-load;
++ regulator-system-load = <200000>;
++ };
++ vreg_l22a_3p0: l22 {
++ regulator-name = "vreg_l22a_3p0";
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
++ };
++ vreg_l23a_2p8: l23 {
++ regulator-name = "vreg_l23a_2p8";
++ regulator-min-microvolt = <2800000>;
++ regulator-max-microvolt = <2800000>;
++ };
++ vreg_l24a_3p075: l24 {
++ regulator-name = "vreg_l24a_3p075";
++ regulator-min-microvolt = <3075000>;
++ regulator-max-microvolt = <3075000>;
++ };
++ vreg_l25a_1p2: l25 {
++ regulator-name = "vreg_l25a_1p2";
++ regulator-min-microvolt = <1200000>;
++ regulator-max-microvolt = <1200000>;
++ regulator-allow-set-load;
++ };
++ vreg_l26a_0p8: l27 {
++ regulator-name = "vreg_l26a_0p8";
++ regulator-min-microvolt = <1000000>;
++ regulator-max-microvolt = <1000000>;
++ };
++ vreg_l28a_0p925: l28 {
++ regulator-name = "vreg_l28a_0p925";
++ regulator-min-microvolt = <925000>;
++ regulator-max-microvolt = <925000>;
++ regulator-allow-set-load;
++ };
++ vreg_l29a_2p8: l29 {
++ regulator-name = "vreg_l29a_2p8";
++ regulator-min-microvolt = <2800000>;
++ regulator-max-microvolt = <2800000>;
++ };
++ vreg_l30a_1p8: l30 {
++ regulator-name = "vreg_l30a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++ vreg_l32a_1p8: l32 {
++ regulator-name = "vreg_l32a_1p8";
++ regulator-min-microvolt = <1800000>;
++ regulator-max-microvolt = <1800000>;
++ };
++
++ vreg_lvs1a_1p8: lvs1 {
++ regulator-name = "vreg_lvs1a_1p8";
++ };
++
++ vreg_lvs2a_1p8: lvs2 {
++ regulator-name = "vreg_lvs2a_1p8";
++ };
++ };
++
++ pmi8994-regulators {
++ compatible = "qcom,rpm-pmi8994-regulators";
++
++ vdd_s1-supply = <&vph_pwr>;
++ vdd_s2-supply = <&vph_pwr>;
++ vdd_s3-supply = <&vph_pwr>;
++ vdd_bst_byp-supply = <&vph_pwr>;
++
++ vph_pwr_bbyp: boost-bypass {
++ regulator-name = "vph_pwr_bbyp";
++ regulator-min-microvolt = <3300000>;
++ regulator-max-microvolt = <3300000>;
++ };
++
++ vreg_s1b_1p025: s1 {
++ regulator-name = "vreg_s1b_1p025";
++ regulator-min-microvolt = <1025000>;
++ regulator-max-microvolt = <1025000>;
++ };
++ };
++};
++
++&sdhc2 {
++ /* External SD card */
++ pinctrl-names = "default", "sleep";
++ pinctrl-0 = <&sdc2_state_on &sdc2_cd_on>;
++ pinctrl-1 = <&sdc2_state_off &sdc2_cd_off>;
++ cd-gpios = <&tlmm 38 GPIO_ACTIVE_LOW>;
++ vmmc-supply = <&vreg_l21a_2p95>;
++ vqmmc-supply = <&vreg_l13a_2p95>;
++ status = "okay";
++};
++
++&q6asmdai {
++ dai@0 {
++ reg = <0>;
++ };
++
++ dai@1 {
++ reg = <1>;
++ };
++
++ dai@2 {
++ reg = <2>;
++ };
++};
++
++&sound {
++ compatible = "qcom,apq8096-sndcard";
++ model = "DB820c";
++ audio-routing = "RX_BIAS", "MCLK",
++ "MM_DL1", "MultiMedia1 Playback",
++ "MM_DL2", "MultiMedia2 Playback",
++ "MultiMedia3 Capture", "MM_UL3";
++
++ mm1-dai-link {
++ link-name = "MultiMedia1";
++ cpu {
++ sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA1>;
++ };
++ };
++
++ mm2-dai-link {
++ link-name = "MultiMedia2";
++ cpu {
++ sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA2>;
++ };
++ };
++
++ mm3-dai-link {
++ link-name = "MultiMedia3";
++ cpu {
++ sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA3>;
++ };
++ };
++
++ hdmi-dai-link {
++ link-name = "HDMI";
++ cpu {
++ sound-dai = <&q6afedai HDMI_RX>;
++ };
++
++ platform {
++ sound-dai = <&q6routing>;
++ };
++
++ codec {
++ sound-dai = <&hdmi 0>;
++ };
++ };
++
++ slim-dai-link {
++ link-name = "SLIM Playback";
++ cpu {
++ sound-dai = <&q6afedai SLIMBUS_6_RX>;
++ };
++
++ platform {
++ sound-dai = <&q6routing>;
++ };
++
++ codec {
++ sound-dai = <&wcd9335 6>;
++ };
++ };
++
++ slimcap-dai-link {
++ link-name = "SLIM Capture";
++ cpu {
++ sound-dai = <&q6afedai SLIMBUS_0_TX>;
++ };
++
++ platform {
++ sound-dai = <&q6routing>;
++ };
++
++ codec {
++ sound-dai = <&wcd9335 1>;
++ };
++ };
++};
++
++&ufsphy {
++ status = "okay";
++
++ vdda-phy-supply = <&vreg_l28a_0p925>;
++ vdda-pll-supply = <&vreg_l12a_1p8>;
++ vddp-ref-clk-supply = <&vreg_l25a_1p2>;
++};
++
++&ufshc {
++ status = "okay";
++
++ vcc-supply = <&vreg_l20a_2p95>;
++ vccq-supply = <&vreg_l25a_1p2>;
++ vccq2-supply = <&vreg_s4a_1p8>;
++
++ vcc-max-microamp = <600000>;
++ vccq-max-microamp = <450000>;
++ vccq2-max-microamp = <450000>;
++};
++
++&usb2 {
++ status = "okay";
++ extcon = <&usb2_id>;
++
++ dwc3@7600000 {
++ extcon = <&usb2_id>;
++ dr_mode = "otg";
++ maximum-speed = "high-speed";
++ };
++};
++
++&usb3 {
++ status = "okay";
++ extcon = <&usb3_id>;
++
++ dwc3@6a00000 {
++ extcon = <&usb3_id>;
++ dr_mode = "otg";
++ };
++};
++
++&usb3phy {
++ status = "okay";
++
++ vdda-phy-supply = <&vreg_l28a_0p925>;
++ vdda-pll-supply = <&vreg_l12a_1p8>;
++
++};
++
++&venus {
++ status = "okay";
++};
++
++&wcd9335 {
++ clock-names = "mclk", "slimbus";
++ clocks = <&div1_mclk>,
++ <&rpmcc RPM_SMD_BB_CLK1>;
++
++ vdd-buck-supply = <&vreg_s4a_1p8>;
++ vdd-buck-sido-supply = <&vreg_s4a_1p8>;
++ vdd-tx-supply = <&vreg_s4a_1p8>;
++ vdd-rx-supply = <&vreg_s4a_1p8>;
++ vdd-io-supply = <&vreg_s4a_1p8>;
+ };
+diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+deleted file mode 100644
+index 51e17094d7b18..0000000000000
+--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
++++ /dev/null
+@@ -1,1105 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+- */
+-
+-#include "msm8996.dtsi"
+-#include "pm8994.dtsi"
+-#include "pmi8994.dtsi"
+-#include <dt-bindings/input/input.h>
+-#include <dt-bindings/gpio/gpio.h>
+-#include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+-#include <dt-bindings/sound/qcom,q6afe.h>
+-#include <dt-bindings/sound/qcom,q6asm.h>
+-
+-/*
+- * GPIO name legend: proper name = the GPIO line is used as GPIO
+- * NC = not connected (pin out but not routed from the chip to
+- * anything the board)
+- * "[PER]" = pin is muxed for [peripheral] (not GPIO)
+- * LSEC = Low Speed External Connector
+- * P HSEC = Primary High Speed External Connector
+- * S HSEC = Secondary High Speed External Connector
+- * J14 = Camera Connector
+- * TP = Test Points
+- *
+- * Line names are taken from the schematic "DragonBoard 820c",
+- * drawing no: LM25-P2751-1
+- *
+- * For the lines routed to the external connectors the
+- * lines are named after the 96Boards CE Specification 1.0,
+- * Appendix "Expansion Connector Signal Description".
+- *
+- * When the 96Board naming of a line and the schematic name of
+- * the same line are in conflict, the 96Board specification
+- * takes precedence, which means that the external UART on the
+- * LSEC is named UART0 while the schematic and SoC names this
+- * UART3. This is only for the informational lines i.e. "[FOO]",
+- * the GPIO named lines "GPIO-A" thru "GPIO-L" are the only
+- * ones actually used for GPIO.
+- */
+-
+-/ {
+- aliases {
+- serial0 = &blsp2_uart2;
+- serial1 = &blsp2_uart3;
+- serial2 = &blsp1_uart2;
+- i2c0 = &blsp1_i2c3;
+- i2c1 = &blsp2_i2c1;
+- i2c2 = &blsp2_i2c1;
+- spi0 = &blsp1_spi1;
+- spi1 = &blsp2_spi6;
+- };
+-
+- chosen {
+- stdout-path = "serial0:115200n8";
+- };
+-
+- clocks {
+- compatible = "simple-bus";
+- divclk4: divclk4 {
+- compatible = "fixed-clock";
+- #clock-cells = <0>;
+- clock-frequency = <32768>;
+- clock-output-names = "divclk4";
+-
+- pinctrl-names = "default";
+- pinctrl-0 = <&divclk4_pin_a>;
+- };
+-
+- div1_mclk: divclk1 {
+- compatible = "gpio-gate-clock";
+- pinctrl-0 = <&audio_mclk>;
+- pinctrl-names = "default";
+- clocks = <&rpmcc RPM_SMD_DIV_CLK1>;
+- #clock-cells = <0>;
+- enable-gpios = <&pm8994_gpios 15 0>;
+- };
+- };
+-
+- gpio_keys {
+- compatible = "gpio-keys";
+- #address-cells = <1>;
+- #size-cells = <0>;
+- autorepeat;
+-
+- pinctrl-names = "default";
+- pinctrl-0 = <&volume_up_gpio>;
+-
+- button@0 {
+- label = "Volume Up";
+- linux,code = <KEY_VOLUMEUP>;
+- gpios = <&pm8994_gpios 2 GPIO_ACTIVE_LOW>;
+- };
+- };
+-
+- usb2_id: usb2-id {
+- compatible = "linux,extcon-usb-gpio";
+- id-gpio = <&pmi8994_gpios 6 GPIO_ACTIVE_HIGH>;
+- pinctrl-names = "default";
+- pinctrl-0 = <&usb2_vbus_det_gpio>;
+- };
+-
+- usb3_id: usb3-id {
+- compatible = "linux,extcon-usb-gpio";
+- id-gpio = <&pm8994_gpios 22 GPIO_ACTIVE_HIGH>;
+- pinctrl-names = "default";
+- pinctrl-0 = <&usb3_vbus_det_gpio>;
+- };
+-
+- vph_pwr: vph-pwr-regulator {
+- compatible = "regulator-fixed";
+- regulator-name = "vph_pwr";
+- regulator-always-on;
+- regulator-boot-on;
+-
+- regulator-min-microvolt = <3700000>;
+- regulator-max-microvolt = <3700000>;
+- };
+-
+- wlan_en: wlan-en-1-8v {
+- pinctrl-names = "default";
+- pinctrl-0 = <&wlan_en_gpios>;
+- compatible = "regulator-fixed";
+- regulator-name = "wlan-en-regulator";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+-
+- gpio = <&pm8994_gpios 8 0>;
+-
+- /* WLAN card specific delay */
+- startup-delay-us = <70000>;
+- enable-active-high;
+- };
+-};
+-
+-&blsp1_i2c3 {
+- /* On Low speed expansion */
+- label = "LS-I2C0";
+- status = "okay";
+-};
+-
+-&blsp1_spi1 {
+- /* On Low speed expansion */
+- label = "LS-SPI0";
+- status = "okay";
+-};
+-
+-&blsp1_uart2 {
+- label = "BT-UART";
+- status = "okay";
+- pinctrl-names = "default", "sleep";
+- pinctrl-0 = <&blsp1_uart2_default>;
+- pinctrl-1 = <&blsp1_uart2_sleep>;
+-
+- bluetooth {
+- compatible = "qcom,qca6174-bt";
+-
+- /* bt_disable_n gpio */
+- enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
+-
+- clocks = <&divclk4>;
+- };
+-};
+-
+-&adsp_pil {
+- status = "okay";
+-};
+-
+-&blsp2_i2c1 {
+- /* On High speed expansion */
+- label = "HS-I2C2";
+- status = "okay";
+-};
+-
+-&blsp2_i2c1 {
+- /* On Low speed expansion */
+- label = "LS-I2C1";
+- status = "okay";
+-};
+-
+-&blsp2_spi6 {
+- /* On High speed expansion */
+- label = "HS-SPI1";
+- status = "okay";
+-};
+-
+-&blsp2_uart2 {
+- label = "LS-UART1";
+- status = "okay";
+- pinctrl-names = "default", "sleep";
+- pinctrl-0 = <&blsp2_uart2_2pins_default>;
+- pinctrl-1 = <&blsp2_uart2_2pins_sleep>;
+-};
+-
+-&blsp2_uart3 {
+- label = "LS-UART0";
+- status = "disabled";
+- pinctrl-names = "default", "sleep";
+- pinctrl-0 = <&blsp2_uart3_4pins_default>;
+- pinctrl-1 = <&blsp2_uart3_4pins_sleep>;
+-};
+-
+-&camss {
+- vdda-supply = <&vreg_l2a_1p25>;
+-};
+-
+-&gpu {
+- status = "okay";
+-};
+-
+-&hdmi {
+- status = "okay";
+-
+- pinctrl-names = "default", "sleep";
+- pinctrl-0 = <&hdmi_hpd_active &hdmi_ddc_active>;
+- pinctrl-1 = <&hdmi_hpd_suspend &hdmi_ddc_suspend>;
+-
+- core-vdda-supply = <&vreg_l12a_1p8>;
+- core-vcc-supply = <&vreg_s4a_1p8>;
+-};
+-
+-&hdmi_phy {
+- status = "okay";
+-
+- vddio-supply = <&vreg_l12a_1p8>;
+- vcca-supply = <&vreg_l28a_0p925>;
+- #phy-cells = <0>;
+-};
+-
+-&hsusb_phy1 {
+- status = "okay";
+-
+- vdda-pll-supply = <&vreg_l12a_1p8>;
+- vdda-phy-dpdm-supply = <&vreg_l24a_3p075>;
+-};
+-
+-&hsusb_phy2 {
+- status = "okay";
+-
+- vdda-pll-supply = <&vreg_l12a_1p8>;
+- vdda-phy-dpdm-supply = <&vreg_l24a_3p075>;
+-};
+-
+-&mdp {
+- status = "okay";
+-};
+-
+-&mdss {
+- status = "okay";
+-};
+-
+-&mmcc {
+- vdd-gfx-supply = <&vdd_gfx>;
+-};
+-
+-&pm8994_resin {
+- status = "okay";
+- linux,code = <KEY_VOLUMEDOWN>;
+-};
+-
+-&tlmm {
+- gpio-line-names =
+- "[SPI0_DOUT]", /* GPIO_0, BLSP1_SPI_MOSI, LSEC pin 14 */
+- "[SPI0_DIN]", /* GPIO_1, BLSP1_SPI_MISO, LSEC pin 10 */
+- "[SPI0_CS]", /* GPIO_2, BLSP1_SPI_CS_N, LSEC pin 12 */
+- "[SPI0_SCLK]", /* GPIO_3, BLSP1_SPI_CLK, LSEC pin 8 */
+- "[UART1_TxD]", /* GPIO_4, BLSP8_UART_TX, LSEC pin 11 */
+- "[UART1_RxD]", /* GPIO_5, BLSP8_UART_RX, LSEC pin 13 */
+- "[I2C1_SDA]", /* GPIO_6, BLSP8_I2C_SDA, LSEC pin 21 */
+- "[I2C1_SCL]", /* GPIO_7, BLSP8_I2C_SCL, LSEC pin 19 */
+- "GPIO-H", /* GPIO_8, LCD0_RESET_N, LSEC pin 30 */
+- "TP93", /* GPIO_9 */
+- "GPIO-G", /* GPIO_10, MDP_VSYNC_P, LSEC pin 29 */
+- "[MDP_VSYNC_S]", /* GPIO_11, S HSEC pin 55 */
+- "NC", /* GPIO_12 */
+- "[CSI0_MCLK]", /* GPIO_13, CAM_MCLK0, P HSEC pin 15 */
+- "[CAM_MCLK1]", /* GPIO_14, J14 pin 11 */
+- "[CSI1_MCLK]", /* GPIO_15, CAM_MCLK2, P HSEC pin 17 */
+- "TP99", /* GPIO_16 */
+- "[I2C2_SDA]", /* GPIO_17, CCI_I2C_SDA0, P HSEC pin 34 */
+- "[I2C2_SCL]", /* GPIO_18, CCI_I2C_SCL0, P HSEC pin 32 */
+- "[CCI_I2C_SDA1]", /* GPIO_19, S HSEC pin 38 */
+- "[CCI_I2C_SCL1]", /* GPIO_20, S HSEC pin 36 */
+- "FLASH_STROBE_EN", /* GPIO_21, S HSEC pin 5 */
+- "FLASH_STROBE_TRIG", /* GPIO_22, S HSEC pin 1 */
+- "GPIO-K", /* GPIO_23, CAM2_RST_N, LSEC pin 33 */
+- "GPIO-D", /* GPIO_24, LSEC pin 26 */
+- "GPIO-I", /* GPIO_25, CAM0_RST_N, LSEC pin 31 */
+- "GPIO-J", /* GPIO_26, CAM0_STANDBY_N, LSEC pin 32 */
+- "BLSP6_I2C_SDA", /* GPIO_27 */
+- "BLSP6_I2C_SCL", /* GPIO_28 */
+- "GPIO-B", /* GPIO_29, TS0_RESET_N, LSEC pin 24 */
+- "GPIO30", /* GPIO_30, S HSEC pin 4 */
+- "HDMI_CEC", /* GPIO_31 */
+- "HDMI_DDC_CLOCK", /* GPIO_32 */
+- "HDMI_DDC_DATA", /* GPIO_33 */
+- "HDMI_HOT_PLUG_DETECT", /* GPIO_34 */
+- "PCIE0_RST_N", /* GPIO_35 */
+- "PCIE0_CLKREQ_N", /* GPIO_36 */
+- "PCIE0_WAKE", /* GPIO_37 */
+- "SD_CARD_DET_N", /* GPIO_38 */
+- "TSIF1_SYNC", /* GPIO_39, S HSEC pin 48 */
+- "W_DISABLE_N", /* GPIO_40 */
+- "[BLSP9_UART_TX]", /* GPIO_41 */
+- "[BLSP9_UART_RX]", /* GPIO_42 */
+- "[BLSP2_UART_CTS_N]", /* GPIO_43 */
+- "[BLSP2_UART_RFR_N]", /* GPIO_44 */
+- "[BLSP3_UART_TX]", /* GPIO_45 */
+- "[BLSP3_UART_RX]", /* GPIO_46 */
+- "[I2C0_SDA]", /* GPIO_47, LS_I2C0_SDA, LSEC pin 17 */
+- "[I2C0_SCL]", /* GPIO_48, LS_I2C0_SCL, LSEC pin 15 */
+- "[UART0_TxD]", /* GPIO_49, BLSP9_UART_TX, LSEC pin 5 */
+- "[UART0_RxD]", /* GPIO_50, BLSP9_UART_RX, LSEC pin 7 */
+- "[UART0_CTS]", /* GPIO_51, BLSP9_UART_CTS_N, LSEC pin 3 */
+- "[UART0_RTS]", /* GPIO_52, BLSP9_UART_RFR_N, LSEC pin 9 */
+- "[CODEC_INT1_N]", /* GPIO_53 */
+- "[CODEC_INT2_N]", /* GPIO_54 */
+- "[BLSP7_I2C_SDA]", /* GPIO_55 */
+- "[BLSP7_I2C_SCL]", /* GPIO_56 */
+- "MI2S_MCLK", /* GPIO_57, S HSEC pin 3 */
+- "[PCM_CLK]", /* GPIO_58, QUA_MI2S_SCK, LSEC pin 18 */
+- "[PCM_FS]", /* GPIO_59, QUA_MI2S_WS, LSEC pin 16 */
+- "[PCM_DO]", /* GPIO_60, QUA_MI2S_DATA0, LSEC pin 20 */
+- "[PCM_DI]", /* GPIO_61, QUA_MI2S_DATA1, LSEC pin 22 */
+- "GPIO-E", /* GPIO_62, LSEC pin 27 */
+- "TP87", /* GPIO_63 */
+- "[CODEC_RST_N]", /* GPIO_64 */
+- "[PCM1_CLK]", /* GPIO_65 */
+- "[PCM1_SYNC]", /* GPIO_66 */
+- "[PCM1_DIN]", /* GPIO_67 */
+- "[PCM1_DOUT]", /* GPIO_68 */
+- "AUDIO_REF_CLK", /* GPIO_69 */
+- "SLIMBUS_CLK", /* GPIO_70 */
+- "SLIMBUS_DATA0", /* GPIO_71 */
+- "SLIMBUS_DATA1", /* GPIO_72 */
+- "NC", /* GPIO_73 */
+- "NC", /* GPIO_74 */
+- "NC", /* GPIO_75 */
+- "NC", /* GPIO_76 */
+- "TP94", /* GPIO_77 */
+- "NC", /* GPIO_78 */
+- "TP95", /* GPIO_79 */
+- "GPIO-A", /* GPIO_80, MEMS_RESET_N, LSEC pin 23 */
+- "TP88", /* GPIO_81 */
+- "TP89", /* GPIO_82 */
+- "TP90", /* GPIO_83 */
+- "TP91", /* GPIO_84 */
+- "[SD_DAT0]", /* GPIO_85, BLSP12_SPI_MOSI, P HSEC pin 1 */
+- "[SD_CMD]", /* GPIO_86, BLSP12_SPI_MISO, P HSEC pin 11 */
+- "[SD_DAT3]", /* GPIO_87, BLSP12_SPI_CS_N, P HSEC pin 7 */
+- "[SD_SCLK]", /* GPIO_88, BLSP12_SPI_CLK, P HSEC pin 9 */
+- "TSIF1_CLK", /* GPIO_89, S HSEC pin 42 */
+- "TSIF1_EN", /* GPIO_90, S HSEC pin 46 */
+- "TSIF1_DATA", /* GPIO_91, S HSEC pin 44 */
+- "NC", /* GPIO_92 */
+- "TSIF2_CLK", /* GPIO_93, S HSEC pin 52 */
+- "TSIF2_EN", /* GPIO_94, S HSEC pin 56 */
+- "TSIF2_DATA", /* GPIO_95, S HSEC pin 54 */
+- "TSIF2_SYNC", /* GPIO_96, S HSEC pin 58 */
+- "NC", /* GPIO_97 */
+- "CAM1_STANDBY_N", /* GPIO_98 */
+- "NC", /* GPIO_99 */
+- "NC", /* GPIO_100 */
+- "[LCD1_RESET_N]", /* GPIO_101, S HSEC pin 51 */
+- "BOOT_CONFIG1", /* GPIO_102 */
+- "USB_HUB_RESET", /* GPIO_103 */
+- "CAM1_RST_N", /* GPIO_104 */
+- "NC", /* GPIO_105 */
+- "NC", /* GPIO_106 */
+- "NC", /* GPIO_107 */
+- "NC", /* GPIO_108 */
+- "NC", /* GPIO_109 */
+- "NC", /* GPIO_110 */
+- "NC", /* GPIO_111 */
+- "NC", /* GPIO_112 */
+- "PMI8994_BUA", /* GPIO_113 */
+- "PCIE2_RST_N", /* GPIO_114 */
+- "PCIE2_CLKREQ_N", /* GPIO_115 */
+- "PCIE2_WAKE", /* GPIO_116 */
+- "SSC_IRQ_0", /* GPIO_117 */
+- "SSC_IRQ_1", /* GPIO_118 */
+- "SSC_IRQ_2", /* GPIO_119 */
+- "NC", /* GPIO_120 */
+- "GPIO121", /* GPIO_121, S HSEC pin 2 */
+- "NC", /* GPIO_122 */
+- "SSC_IRQ_6", /* GPIO_123 */
+- "SSC_IRQ_7", /* GPIO_124 */
+- "GPIO-C", /* GPIO_125, TS_INT0, LSEC pin 25 */
+- "BOOT_CONFIG5", /* GPIO_126 */
+- "NC", /* GPIO_127 */
+- "NC", /* GPIO_128 */
+- "BOOT_CONFIG7", /* GPIO_129 */
+- "PCIE1_RST_N", /* GPIO_130 */
+- "PCIE1_CLKREQ_N", /* GPIO_131 */
+- "PCIE1_WAKE", /* GPIO_132 */
+- "GPIO-L", /* GPIO_133, CAM2_STANDBY_N, LSEC pin 34 */
+- "NC", /* GPIO_134 */
+- "NC", /* GPIO_135 */
+- "BOOT_CONFIG8", /* GPIO_136 */
+- "NC", /* GPIO_137 */
+- "NC", /* GPIO_138 */
+- "GPS_SSBI2", /* GPIO_139 */
+- "GPS_SSBI1", /* GPIO_140 */
+- "NC", /* GPIO_141 */
+- "NC", /* GPIO_142 */
+- "NC", /* GPIO_143 */
+- "BOOT_CONFIG6", /* GPIO_144 */
+- "NC", /* GPIO_145 */
+- "NC", /* GPIO_146 */
+- "NC", /* GPIO_147 */
+- "NC", /* GPIO_148 */
+- "NC"; /* GPIO_149 */
+-
+- sdc2_cd_on: sdc2_cd_on {
+- mux {
+- pins = "gpio38";
+- function = "gpio";
+- };
+-
+- config {
+- pins = "gpio38";
+- bias-pull-up; /* pull up */
+- drive-strength = <16>; /* 16 MA */
+- };
+- };
+-
+- sdc2_cd_off: sdc2_cd_off {
+- mux {
+- pins = "gpio38";
+- function = "gpio";
+- };
+-
+- config {
+- pins = "gpio38";
+- bias-pull-up; /* pull up */
+- drive-strength = <2>; /* 2 MA */
+- };
+- };
+-
+- blsp1_uart2_default: blsp1_uart2_default {
+- mux {
+- pins = "gpio41", "gpio42", "gpio43", "gpio44";
+- function = "blsp_uart2";
+- };
+-
+- config {
+- pins = "gpio41", "gpio42", "gpio43", "gpio44";
+- drive-strength = <16>;
+- bias-disable;
+- };
+- };
+-
+- blsp1_uart2_sleep: blsp1_uart2_sleep {
+- mux {
+- pins = "gpio41", "gpio42", "gpio43", "gpio44";
+- function = "gpio";
+- };
+-
+- config {
+- pins = "gpio41", "gpio42", "gpio43", "gpio44";
+- drive-strength = <2>;
+- bias-disable;
+- };
+- };
+-
+- hdmi_hpd_active: hdmi_hpd_active {
+- mux {
+- pins = "gpio34";
+- function = "hdmi_hot";
+- };
+-
+- config {
+- pins = "gpio34";
+- bias-pull-down;
+- drive-strength = <16>;
+- };
+- };
+-
+- hdmi_hpd_suspend: hdmi_hpd_suspend {
+- mux {
+- pins = "gpio34";
+- function = "hdmi_hot";
+- };
+-
+- config {
+- pins = "gpio34";
+- bias-pull-down;
+- drive-strength = <2>;
+- };
+- };
+-
+- hdmi_ddc_active: hdmi_ddc_active {
+- mux {
+- pins = "gpio32", "gpio33";
+- function = "hdmi_ddc";
+- };
+-
+- config {
+- pins = "gpio32", "gpio33";
+- drive-strength = <2>;
+- bias-pull-up;
+- };
+- };
+-
+- hdmi_ddc_suspend: hdmi_ddc_suspend {
+- mux {
+- pins = "gpio32", "gpio33";
+- function = "hdmi_ddc";
+- };
+-
+- config {
+- pins = "gpio32", "gpio33";
+- drive-strength = <2>;
+- bias-pull-down;
+- };
+- };
+-};
+-
+-&pcie0 {
+- status = "okay";
+- perst-gpio = <&tlmm 35 GPIO_ACTIVE_LOW>;
+- vddpe-3v3-supply = <&wlan_en>;
+- vdda-supply = <&vreg_l28a_0p925>;
+-};
+-
+-&pcie1 {
+- status = "okay";
+- perst-gpio = <&tlmm 130 GPIO_ACTIVE_LOW>;
+- vdda-supply = <&vreg_l28a_0p925>;
+-};
+-
+-&pcie2 {
+- status = "okay";
+- perst-gpio = <&tlmm 114 GPIO_ACTIVE_LOW>;
+- vdda-supply = <&vreg_l28a_0p925>;
+-};
+-
+-&pcie_phy {
+- status = "okay";
+-
+- vdda-phy-supply = <&vreg_l28a_0p925>;
+- vdda-pll-supply = <&vreg_l12a_1p8>;
+-};
+-
+-&pm8994_gpios {
+- gpio-line-names =
+- "NC",
+- "KEY_VOLP_N",
+- "NC",
+- "BL1_PWM",
+- "GPIO-F", /* BL0_PWM, LSEC pin 28 */
+- "BL1_EN",
+- "NC",
+- "WLAN_EN",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "NC",
+- "DIVCLK1",
+- "DIVCLK2",
+- "DIVCLK3",
+- "DIVCLK4",
+- "BT_EN",
+- "PMIC_SLB",
+- "PMIC_BUA",
+- "USB_VBUS_DET";
+-
+- pinctrl-names = "default";
+- pinctrl-0 = <&ls_exp_gpio_f &bt_en_gpios>;
+-
+- ls_exp_gpio_f: pm8994_gpio5 {
+- pinconf {
+- pins = "gpio5";
+- output-low;
+- power-source = <2>; // PM8994_GPIO_S4, 1.8V
+- };
+- };
+-
+- bt_en_gpios: bt_en_gpios {
+- pinconf {
+- pins = "gpio19";
+- function = PMIC_GPIO_FUNC_NORMAL;
+- output-low;
+- power-source = <PM8994_GPIO_S4>; // 1.8V
+- qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+- bias-pull-down;
+- };
+- };
+-
+- wlan_en_gpios: wlan_en_gpios {
+- pinconf {
+- pins = "gpio8";
+- function = PMIC_GPIO_FUNC_NORMAL;
+- output-low;
+- power-source = <PM8994_GPIO_S4>; // 1.8V
+- qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+- bias-pull-down;
+- };
+- };
+-
+- audio_mclk: clk_div1 {
+- pinconf {
+- pins = "gpio15";
+- function = "func1";
+- power-source = <PM8994_GPIO_S4>; // 1.8V
+- };
+- };
+-
+- volume_up_gpio: pm8996_gpio2 {
+- pinconf {
+- pins = "gpio2";
+- function = "normal";
+- input-enable;
+- drive-push-pull;
+- bias-pull-up;
+- qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
+- power-source = <PM8994_GPIO_S4>; // 1.8V
+- };
+- };
+-
+- divclk4_pin_a: divclk4 {
+- pinconf {
+- pins = "gpio18";
+- function = PMIC_GPIO_FUNC_FUNC2;
+-
+- bias-disable;
+- power-source = <PM8994_GPIO_S4>;
+- };
+- };
+-
+- usb3_vbus_det_gpio: pm8996_gpio22 {
+- pinconf {
+- pins = "gpio22";
+- function = PMIC_GPIO_FUNC_NORMAL;
+- input-enable;
+- bias-pull-down;
+- qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
+- power-source = <PM8994_GPIO_S4>; // 1.8V
+- };
+- };
+-};
+-
+-&pm8994_mpps {
+- gpio-line-names =
+- "VDDPX_BIAS",
+- "WIFI_LED",
+- "NC",
+- "BT_LED",
+- "PM_MPP05",
+- "PM_MPP06",
+- "PM_MPP07",
+- "NC";
+-};
+-
+-&pm8994_spmi_regulators {
+- qcom,saw-reg = <&saw3>;
+- s9 {
+- qcom,saw-slave;
+- };
+- s10 {
+- qcom,saw-slave;
+- };
+- s11 {
+- qcom,saw-leader;
+- regulator-always-on;
+- regulator-min-microvolt = <980000>;
+- regulator-max-microvolt = <980000>;
+- };
+-};
+-
+-&pmi8994_gpios {
+- gpio-line-names =
+- "NC",
+- "SPKR_AMP_EN1",
+- "SPKR_AMP_EN2",
+- "TP61",
+- "NC",
+- "USB2_VBUS_DET",
+- "NC",
+- "NC",
+- "NC",
+- "NC";
+-
+- usb2_vbus_det_gpio: pmi8996_gpio6 {
+- pinconf {
+- pins = "gpio6";
+- function = PMIC_GPIO_FUNC_NORMAL;
+- input-enable;
+- bias-pull-down;
+- qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
+- power-source = <PM8994_GPIO_S4>; // 1.8V
+- };
+- };
+-};
+-
+-&pmi8994_spmi_regulators {
+- vdd_gfx: s2@1700 {
+- reg = <0x1700 0x100>;
+- regulator-name = "VDD_GFX";
+- regulator-min-microvolt = <980000>;
+- regulator-max-microvolt = <980000>;
+- };
+-};
+-
+-&rpm_requests {
+- pm8994-regulators {
+- compatible = "qcom,rpm-pm8994-regulators";
+-
+- vdd_s1-supply = <&vph_pwr>;
+- vdd_s2-supply = <&vph_pwr>;
+- vdd_s3-supply = <&vph_pwr>;
+- vdd_s4-supply = <&vph_pwr>;
+- vdd_s5-supply = <&vph_pwr>;
+- vdd_s6-supply = <&vph_pwr>;
+- vdd_s7-supply = <&vph_pwr>;
+- vdd_s8-supply = <&vph_pwr>;
+- vdd_s9-supply = <&vph_pwr>;
+- vdd_s10-supply = <&vph_pwr>;
+- vdd_s11-supply = <&vph_pwr>;
+- vdd_s12-supply = <&vph_pwr>;
+- vdd_l1-supply = <&vreg_s1b_1p025>;
+- vdd_l2_l26_l28-supply = <&vreg_s3a_1p3>;
+- vdd_l3_l11-supply = <&vreg_s3a_1p3>;
+- vdd_l4_l27_l31-supply = <&vreg_s3a_1p3>;
+- vdd_l5_l7-supply = <&vreg_s5a_2p15>;
+- vdd_l6_l12_l32-supply = <&vreg_s5a_2p15>;
+- vdd_l8_l16_l30-supply = <&vph_pwr>;
+- vdd_l9_l10_l18_l22-supply = <&vph_pwr_bbyp>;
+- vdd_l13_l19_l23_l24-supply = <&vph_pwr_bbyp>;
+- vdd_l14_l15-supply = <&vreg_s5a_2p15>;
+- vdd_l17_l29-supply = <&vph_pwr_bbyp>;
+- vdd_l20_l21-supply = <&vph_pwr_bbyp>;
+- vdd_l25-supply = <&vreg_s3a_1p3>;
+- vdd_lvs1_lvs2-supply = <&vreg_s4a_1p8>;
+-
+- vreg_s3a_1p3: s3 {
+- regulator-name = "vreg_s3a_1p3";
+- regulator-min-microvolt = <1300000>;
+- regulator-max-microvolt = <1300000>;
+- };
+-
+- /**
+- * 1.8v required on LS expansion
+- * for mezzanine boards
+- */
+- vreg_s4a_1p8: s4 {
+- regulator-name = "vreg_s4a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- regulator-always-on;
+- };
+- vreg_s5a_2p15: s5 {
+- regulator-name = "vreg_s5a_2p15";
+- regulator-min-microvolt = <2150000>;
+- regulator-max-microvolt = <2150000>;
+- };
+- vreg_s7a_1p0: s7 {
+- regulator-name = "vreg_s7a_1p0";
+- regulator-min-microvolt = <800000>;
+- regulator-max-microvolt = <800000>;
+- };
+-
+- vreg_l1a_1p0: l1 {
+- regulator-name = "vreg_l1a_1p0";
+- regulator-min-microvolt = <1000000>;
+- regulator-max-microvolt = <1000000>;
+- };
+- vreg_l2a_1p25: l2 {
+- regulator-name = "vreg_l2a_1p25";
+- regulator-min-microvolt = <1250000>;
+- regulator-max-microvolt = <1250000>;
+- };
+- vreg_l3a_0p875: l3 {
+- regulator-name = "vreg_l3a_0p875";
+- regulator-min-microvolt = <850000>;
+- regulator-max-microvolt = <850000>;
+- };
+- vreg_l4a_1p225: l4 {
+- regulator-name = "vreg_l4a_1p225";
+- regulator-min-microvolt = <1225000>;
+- regulator-max-microvolt = <1225000>;
+- };
+- vreg_l6a_1p2: l6 {
+- regulator-name = "vreg_l6a_1p2";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <1200000>;
+- };
+- vreg_l8a_1p8: l8 {
+- regulator-name = "vreg_l8a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+- vreg_l9a_1p8: l9 {
+- regulator-name = "vreg_l9a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+- vreg_l10a_1p8: l10 {
+- regulator-name = "vreg_l10a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+- vreg_l11a_1p15: l11 {
+- regulator-name = "vreg_l11a_1p15";
+- regulator-min-microvolt = <1150000>;
+- regulator-max-microvolt = <1150000>;
+- };
+- vreg_l12a_1p8: l12 {
+- regulator-name = "vreg_l12a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+- vreg_l13a_2p95: l13 {
+- regulator-name = "vreg_l13a_2p95";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <2950000>;
+- };
+- vreg_l14a_1p8: l14 {
+- regulator-name = "vreg_l14a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+- vreg_l15a_1p8: l15 {
+- regulator-name = "vreg_l15a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+- vreg_l16a_2p7: l16 {
+- regulator-name = "vreg_l16a_2p7";
+- regulator-min-microvolt = <2700000>;
+- regulator-max-microvolt = <2700000>;
+- };
+- vreg_l17a_2p8: l17 {
+- regulator-name = "vreg_l17a_2p8";
+- regulator-min-microvolt = <2500000>;
+- regulator-max-microvolt = <2500000>;
+- };
+- vreg_l18a_2p85: l18 {
+- regulator-name = "vreg_l18a_2p85";
+- regulator-min-microvolt = <2700000>;
+- regulator-max-microvolt = <2900000>;
+- };
+- vreg_l19a_2p8: l19 {
+- regulator-name = "vreg_l19a_2p8";
+- regulator-min-microvolt = <3000000>;
+- regulator-max-microvolt = <3000000>;
+- };
+- vreg_l20a_2p95: l20 {
+- regulator-name = "vreg_l20a_2p95";
+- regulator-min-microvolt = <2950000>;
+- regulator-max-microvolt = <2950000>;
+- regulator-allow-set-load;
+- };
+- vreg_l21a_2p95: l21 {
+- regulator-name = "vreg_l21a_2p95";
+- regulator-min-microvolt = <2950000>;
+- regulator-max-microvolt = <2950000>;
+- regulator-allow-set-load;
+- regulator-system-load = <200000>;
+- };
+- vreg_l22a_3p0: l22 {
+- regulator-name = "vreg_l22a_3p0";
+- regulator-min-microvolt = <3300000>;
+- regulator-max-microvolt = <3300000>;
+- };
+- vreg_l23a_2p8: l23 {
+- regulator-name = "vreg_l23a_2p8";
+- regulator-min-microvolt = <2800000>;
+- regulator-max-microvolt = <2800000>;
+- };
+- vreg_l24a_3p075: l24 {
+- regulator-name = "vreg_l24a_3p075";
+- regulator-min-microvolt = <3075000>;
+- regulator-max-microvolt = <3075000>;
+- };
+- vreg_l25a_1p2: l25 {
+- regulator-name = "vreg_l25a_1p2";
+- regulator-min-microvolt = <1200000>;
+- regulator-max-microvolt = <1200000>;
+- regulator-allow-set-load;
+- };
+- vreg_l26a_0p8: l27 {
+- regulator-name = "vreg_l26a_0p8";
+- regulator-min-microvolt = <1000000>;
+- regulator-max-microvolt = <1000000>;
+- };
+- vreg_l28a_0p925: l28 {
+- regulator-name = "vreg_l28a_0p925";
+- regulator-min-microvolt = <925000>;
+- regulator-max-microvolt = <925000>;
+- regulator-allow-set-load;
+- };
+- vreg_l29a_2p8: l29 {
+- regulator-name = "vreg_l29a_2p8";
+- regulator-min-microvolt = <2800000>;
+- regulator-max-microvolt = <2800000>;
+- };
+- vreg_l30a_1p8: l30 {
+- regulator-name = "vreg_l30a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+- vreg_l32a_1p8: l32 {
+- regulator-name = "vreg_l32a_1p8";
+- regulator-min-microvolt = <1800000>;
+- regulator-max-microvolt = <1800000>;
+- };
+-
+- vreg_lvs1a_1p8: lvs1 {
+- regulator-name = "vreg_lvs1a_1p8";
+- };
+-
+- vreg_lvs2a_1p8: lvs2 {
+- regulator-name = "vreg_lvs2a_1p8";
+- };
+- };
+-
+- pmi8994-regulators {
+- compatible = "qcom,rpm-pmi8994-regulators";
+-
+- vdd_s1-supply = <&vph_pwr>;
+- vdd_s2-supply = <&vph_pwr>;
+- vdd_s3-supply = <&vph_pwr>;
+- vdd_bst_byp-supply = <&vph_pwr>;
+-
+- vph_pwr_bbyp: boost-bypass {
+- regulator-name = "vph_pwr_bbyp";
+- regulator-min-microvolt = <3300000>;
+- regulator-max-microvolt = <3300000>;
+- };
+-
+- vreg_s1b_1p025: s1 {
+- regulator-name = "vreg_s1b_1p025";
+- regulator-min-microvolt = <1025000>;
+- regulator-max-microvolt = <1025000>;
+- };
+- };
+-};
+-
+-&sdhc2 {
+- /* External SD card */
+- pinctrl-names = "default", "sleep";
+- pinctrl-0 = <&sdc2_state_on &sdc2_cd_on>;
+- pinctrl-1 = <&sdc2_state_off &sdc2_cd_off>;
+- cd-gpios = <&tlmm 38 GPIO_ACTIVE_LOW>;
+- vmmc-supply = <&vreg_l21a_2p95>;
+- vqmmc-supply = <&vreg_l13a_2p95>;
+- status = "okay";
+-};
+-
+-&q6asmdai {
+- dai@0 {
+- reg = <0>;
+- };
+-
+- dai@1 {
+- reg = <1>;
+- };
+-
+- dai@2 {
+- reg = <2>;
+- };
+-};
+-
+-&sound {
+- compatible = "qcom,apq8096-sndcard";
+- model = "DB820c";
+- audio-routing = "RX_BIAS", "MCLK",
+- "MM_DL1", "MultiMedia1 Playback",
+- "MM_DL2", "MultiMedia2 Playback",
+- "MultiMedia3 Capture", "MM_UL3";
+-
+- mm1-dai-link {
+- link-name = "MultiMedia1";
+- cpu {
+- sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA1>;
+- };
+- };
+-
+- mm2-dai-link {
+- link-name = "MultiMedia2";
+- cpu {
+- sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA2>;
+- };
+- };
+-
+- mm3-dai-link {
+- link-name = "MultiMedia3";
+- cpu {
+- sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA3>;
+- };
+- };
+-
+- hdmi-dai-link {
+- link-name = "HDMI";
+- cpu {
+- sound-dai = <&q6afedai HDMI_RX>;
+- };
+-
+- platform {
+- sound-dai = <&q6routing>;
+- };
+-
+- codec {
+- sound-dai = <&hdmi 0>;
+- };
+- };
+-
+- slim-dai-link {
+- link-name = "SLIM Playback";
+- cpu {
+- sound-dai = <&q6afedai SLIMBUS_6_RX>;
+- };
+-
+- platform {
+- sound-dai = <&q6routing>;
+- };
+-
+- codec {
+- sound-dai = <&wcd9335 6>;
+- };
+- };
+-
+- slimcap-dai-link {
+- link-name = "SLIM Capture";
+- cpu {
+- sound-dai = <&q6afedai SLIMBUS_0_TX>;
+- };
+-
+- platform {
+- sound-dai = <&q6routing>;
+- };
+-
+- codec {
+- sound-dai = <&wcd9335 1>;
+- };
+- };
+-};
+-
+-&ufsphy {
+- status = "okay";
+-
+- vdda-phy-supply = <&vreg_l28a_0p925>;
+- vdda-pll-supply = <&vreg_l12a_1p8>;
+- vddp-ref-clk-supply = <&vreg_l25a_1p2>;
+-};
+-
+-&ufshc {
+- status = "okay";
+-
+- vcc-supply = <&vreg_l20a_2p95>;
+- vccq-supply = <&vreg_l25a_1p2>;
+- vccq2-supply = <&vreg_s4a_1p8>;
+-
+- vcc-max-microamp = <600000>;
+- vccq-max-microamp = <450000>;
+- vccq2-max-microamp = <450000>;
+-};
+-
+-&usb2 {
+- status = "okay";
+- extcon = <&usb2_id>;
+-
+- dwc3@7600000 {
+- extcon = <&usb2_id>;
+- dr_mode = "otg";
+- maximum-speed = "high-speed";
+- };
+-};
+-
+-&usb3 {
+- status = "okay";
+- extcon = <&usb3_id>;
+-
+- dwc3@6a00000 {
+- extcon = <&usb3_id>;
+- dr_mode = "otg";
+- };
+-};
+-
+-&usb3phy {
+- status = "okay";
+-
+- vdda-phy-supply = <&vreg_l28a_0p925>;
+- vdda-pll-supply = <&vreg_l12a_1p8>;
+-
+-};
+-
+-&venus {
+- status = "okay";
+-};
+-
+-&wcd9335 {
+- clock-names = "mclk", "slimbus";
+- clocks = <&div1_mclk>,
+- <&rpmcc RPM_SMD_BB_CLK1>;
+-
+- vdd-buck-supply = <&vreg_s4a_1p8>;
+- vdd-buck-sido-supply = <&vreg_s4a_1p8>;
+- vdd-tx-supply = <&vreg_s4a_1p8>;
+- vdd-rx-supply = <&vreg_s4a_1p8>;
+- vdd-io-supply = <&vreg_s4a_1p8>;
+-};
+diff --git a/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts b/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts
+index a57c60070cdc2..f0a98ab1616ab 100644
+--- a/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts
++++ b/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts
+@@ -26,7 +26,7 @@
+
+ v1p05: v1p05-regulator {
+ compatible = "regulator-fixed";
+- reglator-name = "v1p05";
++ regulator-name = "v1p05";
+ regulator-always-on;
+ regulator-boot-on;
+
+@@ -38,7 +38,7 @@
+
+ v12_poe: v12-poe-regulator {
+ compatible = "regulator-fixed";
+- reglator-name = "v12_poe";
++ regulator-name = "v12_poe";
+ regulator-always-on;
+ regulator-boot-on;
+
+diff --git a/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts b/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts
+index 5aec183087128..5310259d03dc5 100644
+--- a/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts
++++ b/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts
+@@ -37,6 +37,8 @@
+
+ &spi_0 {
+ cs-select = <0>;
++ pinctrl-0 = <&spi_0_pins>;
++ pinctrl-names = "default";
+ status = "okay";
+
+ m25p80@0 {
+diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
+index d2fe58e0eb7aa..3ca198f866c3c 100644
+--- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi
++++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
+@@ -200,7 +200,7 @@
+ clock-names = "bam_clk";
+ #dma-cells = <1>;
+ qcom,ee = <1>;
+- qcom,controlled-remotely = <1>;
++ qcom,controlled-remotely;
+ qcom,config-pipe-trust-reg = <0>;
+ };
+
+@@ -221,7 +221,7 @@
+ interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
+ gpio-controller;
+ #gpio-cells = <2>;
+- gpio-ranges = <&tlmm 0 80>;
++ gpio-ranges = <&tlmm 0 0 80>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+@@ -401,7 +401,7 @@
+ reset-names = "phy",
+ "common";
+
+- pcie_phy0: lane@84200 {
++ pcie_phy0: phy@84200 {
+ reg = <0x0 0x84200 0x0 0x16c>, /* Serdes Tx */
+ <0x0 0x84400 0x0 0x200>, /* Serdes Rx */
+ <0x0 0x84800 0x0 0x4f4>; /* PCS: Lane0, COM, PCIE */
+@@ -433,10 +433,8 @@
+ phys = <&pcie_phy0>;
+ phy-names = "pciephy";
+
+- ranges = <0x81000000 0 0x20200000 0 0x20200000
+- 0 0x10000>, /* downstream I/O */
+- <0x82000000 0 0x20220000 0 0x20220000
+- 0 0xfde0000>; /* non-prefetchable memory */
++ ranges = <0x81000000 0x0 0x00000000 0x0 0x20200000 0x0 0x10000>,
++ <0x82000000 0x0 0x20220000 0x0 0x20220000 0x0 0xfde0000>;
+
+ interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+diff --git a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts
+index cc08dc4eb56a5..68698cdf56c46 100644
+--- a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts
++++ b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts
+@@ -60,11 +60,11 @@
+ perst-gpio = <&tlmm 58 0x1>;
+ };
+
+-&pcie_phy0 {
++&pcie_qmp0 {
+ status = "okay";
+ };
+
+-&pcie_phy1 {
++&pcie_qmp1 {
+ status = "okay";
+ };
+
+diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
+index db333001df4d6..17eeff106bab7 100644
+--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi
++++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
+@@ -13,7 +13,7 @@
+ clocks {
+ sleep_clk: sleep_clk {
+ compatible = "fixed-clock";
+- clock-frequency = <32000>;
++ clock-frequency = <32768>;
+ #clock-cells = <0>;
+ };
+
+@@ -106,7 +106,7 @@
+ reset-names = "phy","common";
+ status = "disabled";
+
+- usb1_ssphy: lane@58200 {
++ usb1_ssphy: phy@58200 {
+ reg = <0x00058200 0x130>, /* Tx */
+ <0x00058400 0x200>, /* Rx */
+ <0x00058800 0x1f8>, /* PCS */
+@@ -114,7 +114,7 @@
+ #phy-cells = <0>;
+ clocks = <&gcc GCC_USB1_PIPE_CLK>;
+ clock-names = "pipe0";
+- clock-output-names = "gcc_usb1_pipe_clk_src";
++ clock-output-names = "usb3phy_1_cc_pipe_clk";
+ };
+ };
+
+@@ -149,7 +149,7 @@
+ reset-names = "phy","common";
+ status = "disabled";
+
+- usb0_ssphy: lane@78200 {
++ usb0_ssphy: phy@78200 {
+ reg = <0x00078200 0x130>, /* Tx */
+ <0x00078400 0x200>, /* Rx */
+ <0x00078800 0x1f8>, /* PCS */
+@@ -157,7 +157,7 @@
+ #phy-cells = <0>;
+ clocks = <&gcc GCC_USB0_PIPE_CLK>;
+ clock-names = "pipe0";
+- clock-output-names = "gcc_usb0_pipe_clk_src";
++ clock-output-names = "usb3phy_0_cc_pipe_clk";
+ };
+ };
+
+@@ -174,34 +174,61 @@
+ status = "disabled";
+ };
+
+- pcie_phy0: phy@86000 {
+- compatible = "qcom,ipq8074-qmp-pcie-phy";
+- reg = <0x00086000 0x1000>;
+- #phy-cells = <0>;
+- clocks = <&gcc GCC_PCIE0_PIPE_CLK>;
+- clock-names = "pipe_clk";
+- clock-output-names = "pcie20_phy0_pipe_clk";
++ pcie_qmp0: phy@84000 {
++ compatible = "qcom,ipq8074-qmp-gen3-pcie-phy";
++ reg = <0x00084000 0x1bc>;
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges;
+
++ clocks = <&gcc GCC_PCIE0_AUX_CLK>,
++ <&gcc GCC_PCIE0_AHB_CLK>;
++ clock-names = "aux", "cfg_ahb";
+ resets = <&gcc GCC_PCIE0_PHY_BCR>,
+ <&gcc GCC_PCIE0PHY_PHY_BCR>;
+ reset-names = "phy",
+ "common";
+ status = "disabled";
++
++ pcie_phy0: phy@84200 {
++ reg = <0x84200 0x16c>,
++ <0x84400 0x200>,
++ <0x84800 0x1f0>,
++ <0x84c00 0xf4>;
++ #phy-cells = <0>;
++ #clock-cells = <0>;
++ clocks = <&gcc GCC_PCIE0_PIPE_CLK>;
++ clock-names = "pipe0";
++ clock-output-names = "pcie20_phy0_pipe_clk";
++ };
+ };
+
+- pcie_phy1: phy@8e000 {
++ pcie_qmp1: phy@8e000 {
+ compatible = "qcom,ipq8074-qmp-pcie-phy";
+- reg = <0x0008e000 0x1000>;
+- #phy-cells = <0>;
+- clocks = <&gcc GCC_PCIE1_PIPE_CLK>;
+- clock-names = "pipe_clk";
+- clock-output-names = "pcie20_phy1_pipe_clk";
++ reg = <0x0008e000 0x1c4>;
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges;
+
++ clocks = <&gcc GCC_PCIE1_AUX_CLK>,
++ <&gcc GCC_PCIE1_AHB_CLK>;
++ clock-names = "aux", "cfg_ahb";
+ resets = <&gcc GCC_PCIE1_PHY_BCR>,
+ <&gcc GCC_PCIE1PHY_PHY_BCR>;
+ reset-names = "phy",
+ "common";
+ status = "disabled";
++
++ pcie_phy1: phy@8e200 {
++ reg = <0x8e200 0x130>,
++ <0x8e400 0x200>,
++ <0x8e800 0x1f8>;
++ #phy-cells = <0>;
++ #clock-cells = <0>;
++ clocks = <&gcc GCC_PCIE1_PIPE_CLK>;
++ clock-names = "pipe0";
++ clock-output-names = "pcie20_phy1_pipe_clk";
++ };
+ };
+
+ prng: rng@e3000 {
+@@ -220,7 +247,7 @@
+ clock-names = "bam_clk";
+ #dma-cells = <1>;
+ qcom,ee = <1>;
+- qcom,controlled-remotely = <1>;
++ qcom,controlled-remotely;
+ status = "disabled";
+ };
+
+@@ -437,7 +464,7 @@
+ status = "disabled";
+ };
+
+- qpic_nand: nand@79b0000 {
++ qpic_nand: nand-controller@79b0000 {
+ compatible = "qcom,ipq8074-nand";
+ reg = <0x079b0000 0x10000>;
+ #address-cells = <1>;
+@@ -634,10 +661,8 @@
+ phys = <&pcie_phy1>;
+ phy-names = "pciephy";
+
+- ranges = <0x81000000 0 0x10200000 0x10200000
+- 0 0x100000 /* downstream I/O */
+- 0x82000000 0 0x10300000 0x10300000
+- 0 0xd00000>; /* non-prefetchable memory */
++ ranges = <0x81000000 0x0 0x00000000 0x10200000 0x0 0x10000>, /* I/O */
++ <0x82000000 0x0 0x10220000 0x10220000 0x0 0xfde0000>; /* MEM */
+
+ interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+@@ -680,26 +705,26 @@
+ };
+
+ pcie0: pci@20000000 {
+- compatible = "qcom,pcie-ipq8074";
++ compatible = "qcom,pcie-ipq8074-gen3";
+ reg = <0x20000000 0xf1d>,
+ <0x20000f20 0xa8>,
+- <0x00080000 0x2000>,
++ <0x20001000 0x1000>,
++ <0x00080000 0x4000>,
+ <0x20100000 0x1000>;
+- reg-names = "dbi", "elbi", "parf", "config";
++ reg-names = "dbi", "elbi", "atu", "parf", "config";
+ device_type = "pci";
+ linux,pci-domain = <0>;
+ bus-range = <0x00 0xff>;
+ num-lanes = <1>;
++ max-link-speed = <3>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ phys = <&pcie_phy0>;
+ phy-names = "pciephy";
+
+- ranges = <0x81000000 0 0x20200000 0x20200000
+- 0 0x100000 /* downstream I/O */
+- 0x82000000 0 0x20300000 0x20300000
+- 0 0xd00000>; /* non-prefetchable memory */
++ ranges = <0x81000000 0x0 0x00000000 0x20200000 0x0 0x10000>, /* I/O */
++ <0x82000000 0x0 0x20220000 0x20220000 0x0 0xfde0000>; /* MEM */
+
+ interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+@@ -717,28 +742,30 @@
+ clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>,
+ <&gcc GCC_PCIE0_AXI_M_CLK>,
+ <&gcc GCC_PCIE0_AXI_S_CLK>,
+- <&gcc GCC_PCIE0_AHB_CLK>,
+- <&gcc GCC_PCIE0_AUX_CLK>;
+-
++ <&gcc GCC_PCIE0_AXI_S_BRIDGE_CLK>,
++ <&gcc GCC_PCIE0_RCHNG_CLK>;
+ clock-names = "iface",
+ "axi_m",
+ "axi_s",
+- "ahb",
+- "aux";
++ "axi_bridge",
++ "rchng";
++
+ resets = <&gcc GCC_PCIE0_PIPE_ARES>,
+ <&gcc GCC_PCIE0_SLEEP_ARES>,
+ <&gcc GCC_PCIE0_CORE_STICKY_ARES>,
+ <&gcc GCC_PCIE0_AXI_MASTER_ARES>,
+ <&gcc GCC_PCIE0_AXI_SLAVE_ARES>,
+ <&gcc GCC_PCIE0_AHB_ARES>,
+- <&gcc GCC_PCIE0_AXI_MASTER_STICKY_ARES>;
++ <&gcc GCC_PCIE0_AXI_MASTER_STICKY_ARES>,
++ <&gcc GCC_PCIE0_AXI_SLAVE_STICKY_ARES>;
+ reset-names = "pipe",
+ "sleep",
+ "sticky",
+ "axi_m",
+ "axi_s",
+ "ahb",
+- "axi_m_sticky";
++ "axi_m_sticky",
++ "axi_s_sticky";
+ status = "disabled";
+ };
+ };
+diff --git a/arch/arm64/boot/dts/qcom/msm8916-mtp.dts b/arch/arm64/boot/dts/qcom/msm8916-mtp.dts
+index d66c155387850..7c0ceb3cff45e 100644
+--- a/arch/arm64/boot/dts/qcom/msm8916-mtp.dts
++++ b/arch/arm64/boot/dts/qcom/msm8916-mtp.dts
+@@ -5,9 +5,22 @@
+
+ /dts-v1/;
+
+-#include "msm8916-mtp.dtsi"
++#include "msm8916-pm8916.dtsi"
+
+ / {
+ model = "Qualcomm Technologies, Inc. MSM 8916 MTP";
+ compatible = "qcom,msm8916-mtp", "qcom,msm8916-mtp/1", "qcom,msm8916";
++
++ aliases {
++ serial0 = &blsp1_uart2;
++ usid0 = &pm8916_0;
++ };
++
++ chosen {
++ stdout-path = "serial0";
++ };
++};
++
++&blsp1_uart2 {
++ status = "okay";
+ };
+diff --git a/arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi
+deleted file mode 100644
+index 1bd05046cdeba..0000000000000
+--- a/arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi
++++ /dev/null
+@@ -1,21 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+- */
+-
+-#include "msm8916-pm8916.dtsi"
+-
+-/ {
+- aliases {
+- serial0 = &blsp1_uart2;
+- usid0 = &pm8916_0;
+- };
+-
+- chosen {
+- stdout-path = "serial0";
+- };
+-};
+-
+-&blsp1_uart2 {
+- status = "okay";
+-};
+diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
+index 3f85e34a8ce6f..fcc9f757c9e14 100644
+--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
+@@ -19,8 +19,8 @@
+ #size-cells = <2>;
+
+ aliases {
+- sdhc1 = &sdhc_1; /* SDC1 eMMC slot */
+- sdhc2 = &sdhc_2; /* SDC2 SD card slot */
++ mmc0 = &sdhc_1; /* SDC1 eMMC slot */
++ mmc1 = &sdhc_2; /* SDC2 SD card slot */
+ };
+
+ chosen { };
+@@ -445,7 +445,7 @@
+ };
+ };
+
+- rpm_msg_ram: memory@60000 {
++ rpm_msg_ram: sram@60000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0x00060000 0x8000>;
+ };
+@@ -1064,7 +1064,7 @@
+ };
+ };
+
+- camss: camss@1b00000 {
++ camss: camss@1b0ac00 {
+ compatible = "qcom,msm8916-camss";
+ reg = <0x01b0ac00 0x200>,
+ <0x01b00030 0x4>,
+@@ -1307,7 +1307,7 @@
+ };
+
+ mpss: remoteproc@4080000 {
+- compatible = "qcom,msm8916-mss-pil", "qcom,q6v5-pil";
++ compatible = "qcom,msm8916-mss-pil";
+ reg = <0x04080000 0x100>,
+ <0x04020000 0x040>;
+
+@@ -1384,11 +1384,17 @@
+ lpass: audio-controller@7708000 {
+ status = "disabled";
+ compatible = "qcom,lpass-cpu-apq8016";
++
++ /*
++ * Note: Unlike the name would suggest, the SEC_I2S_CLK
++ * is actually only used by Tertiary MI2S while
++ * Primary/Secondary MI2S both use the PRI_I2S_CLK.
++ */
+ clocks = <&gcc GCC_ULTAUDIO_AHBFABRIC_IXFABRIC_CLK>,
+ <&gcc GCC_ULTAUDIO_PCNOC_MPORT_CLK>,
+ <&gcc GCC_ULTAUDIO_PCNOC_SWAY_CLK>,
+ <&gcc GCC_ULTAUDIO_LPAIF_PRI_I2S_CLK>,
+- <&gcc GCC_ULTAUDIO_LPAIF_SEC_I2S_CLK>,
++ <&gcc GCC_ULTAUDIO_LPAIF_PRI_I2S_CLK>,
+ <&gcc GCC_ULTAUDIO_LPAIF_SEC_I2S_CLK>,
+ <&gcc GCC_ULTAUDIO_LPAIF_AUX_I2S_CLK>;
+
+@@ -1741,8 +1747,8 @@
+ <&rpmpd MSM8916_VDDMX>;
+ power-domain-names = "cx", "mx";
+
+- qcom,state = <&wcnss_smp2p_out 0>;
+- qcom,state-names = "stop";
++ qcom,smem-states = <&wcnss_smp2p_out 0>;
++ qcom,smem-state-names = "stop";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&wcnss_pin_a>;
+@@ -1765,7 +1771,7 @@
+
+ label = "pronto";
+
+- wcnss {
++ wcnss_ctrl: wcnss {
+ compatible = "qcom,wcnss";
+ qcom,smd-channels = "WCNSS_CTRL";
+
+diff --git a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts
+index 1ccca83292ac9..60fcb024c8879 100644
+--- a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts
++++ b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts
+@@ -1,7 +1,8 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /* Copyright (c) 2015, LGE Inc. All rights reserved.
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+- * Copyright (c) 2021, Petr Vorel <petr.vorel@gmail.com>
++ * Copyright (c) 2021-2022, Petr Vorel <petr.vorel@gmail.com>
++ * Copyright (c) 2022, Dominik Kobinski <dominikkobinski314@gmail.com>
+ */
+
+ /dts-v1/;
+@@ -13,6 +14,9 @@
+ /* cont_splash_mem has different memory mapping */
+ /delete-node/ &cont_splash_mem;
+
++/* disabled on downstream, conflicts with cont_splash_mem */
++/delete-node/ &dfps_data_mem;
++
+ / {
+ model = "LG Nexus 5X";
+ compatible = "lg,bullhead", "qcom,msm8992";
+@@ -47,7 +51,17 @@
+ };
+
+ cont_splash_mem: memory@3400000 {
+- reg = <0 0x03400000 0 0x1200000>;
++ reg = <0 0x03400000 0 0xc00000>;
++ no-map;
++ };
++
++ reserved@5000000 {
++ reg = <0x0 0x05000000 0x0 0x1a00000>;
++ no-map;
++ };
++
++ reserved@6c00000 {
++ reg = <0x0 0x06c00000 0x0 0x400000>;
+ no-map;
+ };
+ };
+@@ -74,7 +88,7 @@
+ vdd_l17_29-supply = <&vph_pwr>;
+ vdd_l20_21-supply = <&vph_pwr>;
+ vdd_l25-supply = <&pm8994_s5>;
+- vdd_lvs1_2 = <&pm8994_s4>;
++ vdd_lvs1_2-supply = <&pm8994_s4>;
+
+ /* S1, S2, S6 and S12 are managed by RPMPD */
+
+diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts
+index 357d55496e750..d08659c606b9a 100644
+--- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts
++++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts
+@@ -11,6 +11,12 @@
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/input/gpio-keys.h>
+
++/delete-node/ &adsp_mem;
++/delete-node/ &audio_mem;
++/delete-node/ &mpss_mem;
++/delete-node/ &peripheral_region;
++/delete-node/ &rmtfs_mem;
++
+ / {
+ model = "Xiaomi Mi 4C";
+ compatible = "xiaomi,libra", "qcom,msm8992";
+@@ -60,24 +66,66 @@
+ #size-cells = <2>;
+ ranges;
+
+- /* This is for getting crash logs using Android downstream kernels */
+- ramoops@dfc00000 {
+- compatible = "ramoops";
+- reg = <0x0 0xdfc00000 0x0 0x40000>;
+- console-size = <0x10000>;
+- record-size = <0x10000>;
+- ftrace-size = <0x10000>;
+- pmsg-size = <0x20000>;
++ memory_hole: hole@6400000 {
++ reg = <0 0x06400000 0 0x600000>;
++ no-map;
++ };
++
++ memory_hole2: hole2@6c00000 {
++ reg = <0 0x06c00000 0 0x2400000>;
++ no-map;
++ };
++
++ mpss_mem: mpss@9000000 {
++ reg = <0 0x09000000 0 0x5a00000>;
++ no-map;
++ };
++
++ tzapp: tzapp@ea00000 {
++ reg = <0 0x0ea00000 0 0x1900000>;
++ no-map;
+ };
+
+- modem_region: modem_region@9000000 {
+- reg = <0x0 0x9000000 0x0 0x5a00000>;
++ mdm_rfsa_mem: mdm-rfsa@ca0b0000 {
++ reg = <0 0xca0b0000 0 0x10000>;
+ no-map;
+ };
+
+- tzapp: modem_region@ea00000 {
+- reg = <0x0 0xea00000 0x0 0x1900000>;
++ rmtfs_mem: rmtfs@ca100000 {
++ compatible = "qcom,rmtfs-mem";
++ reg = <0 0xca100000 0 0x180000>;
+ no-map;
++
++ qcom,client-id = <1>;
++ };
++
++ audio_mem: audio@cb400000 {
++ reg = <0 0xcb000000 0 0x400000>;
++ no-mem;
++ };
++
++ qseecom_mem: qseecom@cb400000 {
++ reg = <0 0xcb400000 0 0x1c00000>;
++ no-mem;
++ };
++
++ adsp_rfsa_mem: adsp-rfsa@cd000000 {
++ reg = <0 0xcd000000 0 0x10000>;
++ no-map;
++ };
++
++ sensor_rfsa_mem: sensor-rfsa@cd010000 {
++ reg = <0 0xcd010000 0 0x10000>;
++ no-map;
++ };
++
++ ramoops@dfc00000 {
++ compatible = "ramoops";
++ reg = <0 0xdfc00000 0 0x40000>;
++ console-size = <0x10000>;
++ record-size = <0x10000>;
++ ftrace-size = <0x10000>;
++ pmsg-size = <0x20000>;
+ };
+ };
+ };
+@@ -120,9 +168,21 @@
+ status = "okay";
+ };
+
+-&peripheral_region {
+- reg = <0x0 0x7400000 0x0 0x1c00000>;
+- no-map;
++&pm8994_spmi_regulators {
++ VDD_APC0: s8 {
++ regulator-min-microvolt = <680000>;
++ regulator-max-microvolt = <1180000>;
++ regulator-always-on;
++ regulator-boot-on;
++ };
++
++ /* APC1 is 3-phase, but quoting downstream, s11 is "the gang leader" */
++ VDD_APC1: s11 {
++ regulator-min-microvolt = <700000>;
++ regulator-max-microvolt = <1225000>;
++ regulator-always-on;
++ regulator-boot-on;
++ };
+ };
+
+ &rpm_requests {
+@@ -142,7 +202,7 @@
+ vdd_l17_29-supply = <&vph_pwr>;
+ vdd_l20_21-supply = <&vph_pwr>;
+ vdd_l25-supply = <&pm8994_s5>;
+- vdd_lvs1_2 = <&pm8994_s4>;
++ vdd_lvs1_2-supply = <&pm8994_s4>;
+
+ /* S1, S2, S6 and S12 are managed by RPMPD */
+
+diff --git a/arch/arm64/boot/dts/qcom/msm8992.dtsi b/arch/arm64/boot/dts/qcom/msm8992.dtsi
+index 58fe58cc77036..765e1f1989b58 100644
+--- a/arch/arm64/boot/dts/qcom/msm8992.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8992.dtsi
+@@ -14,10 +14,6 @@
+ compatible = "qcom,rpmcc-msm8992";
+ };
+
+-&tcsr_mutex {
+- compatible = "qcom,sfpb-mutex";
+-};
+-
+ &timer {
+ interrupts = <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+diff --git a/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi b/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
+index 3a3790a52a2ce..e2d08915ec426 100644
+--- a/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
+@@ -540,8 +540,7 @@
+ };
+
+ &pmi8994_spmi_regulators {
+- vdd_gfx: s2@1700 {
+- reg = <0x1700 0x100>;
++ vdd_gfx: s2 {
+ regulator-min-microvolt = <980000>;
+ regulator-max-microvolt = <980000>;
+ };
+diff --git a/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi b/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi
+index 48de66bf19c4c..55198190bbeaa 100644
+--- a/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8994-sony-xperia-kitakami.dtsi
+@@ -183,8 +183,7 @@
+ * power domain.. which still isn't enough and forces us to bind
+ * OXILI_CX and OXILI_GX together!
+ */
+- vdd_gfx: s2@1700 {
+- reg = <0x1700 0x100>;
++ vdd_gfx: s2 {
+ regulator-name = "VDD_GFX";
+ regulator-min-microvolt = <980000>;
+ regulator-max-microvolt = <980000>;
+diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi
+index 986fe60dec5fb..4447ed146b3ac 100644
+--- a/arch/arm64/boot/dts/qcom/msm8994.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi
+@@ -93,7 +93,7 @@
+ CPU6: cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+- reg = <0x0 0x101>;
++ reg = <0x0 0x102>;
+ enable-method = "psci";
+ next-level-cache = <&L2_1>;
+ };
+@@ -101,7 +101,7 @@
+ CPU7: cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+- reg = <0x0 0x101>;
++ reg = <0x0 0x103>;
+ enable-method = "psci";
+ next-level-cache = <&L2_1>;
+ };
+@@ -183,8 +183,8 @@
+ no-map;
+ };
+
+- cont_splash_mem: memory@3800000 {
+- reg = <0 0x03800000 0 0x2400000>;
++ cont_splash_mem: memory@3401000 {
++ reg = <0 0x03401000 0 0x2200000>;
+ no-map;
+ };
+
+@@ -498,7 +498,7 @@
+ #dma-cells = <1>;
+ qcom,ee = <0>;
+ qcom,controlled-remotely;
+- num-channels = <18>;
++ num-channels = <24>;
+ qcom,num-ees = <4>;
+ };
+
+@@ -634,7 +634,7 @@
+ #dma-cells = <1>;
+ qcom,ee = <0>;
+ qcom,controlled-remotely;
+- num-channels = <18>;
++ num-channels = <24>;
+ qcom,num-ees = <4>;
+ };
+
+@@ -715,7 +715,7 @@
+ reg = <0xfc400000 0x2000>;
+ };
+
+- rpm_msg_ram: memory@fc428000 {
++ rpm_msg_ram: sram@fc428000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0xfc428000 0x4000>;
+ };
+@@ -725,7 +725,7 @@
+ reg = <0xfc4ab000 0x4>;
+ };
+
+- spmi_bus: spmi@fc4c0000 {
++ spmi_bus: spmi@fc4cf000 {
+ compatible = "qcom,spmi-pmic-arb";
+ reg = <0xfc4cf000 0x1000>,
+ <0xfc4cb000 0x1000>,
+diff --git a/arch/arm64/boot/dts/qcom/msm8996-mtp.dts b/arch/arm64/boot/dts/qcom/msm8996-mtp.dts
+index 45ed594c1b9c2..7d9fc35bc7a06 100644
+--- a/arch/arm64/boot/dts/qcom/msm8996-mtp.dts
++++ b/arch/arm64/boot/dts/qcom/msm8996-mtp.dts
+@@ -5,9 +5,31 @@
+
+ /dts-v1/;
+
+-#include "msm8996-mtp.dtsi"
++#include "msm8996.dtsi"
+
+ / {
+ model = "Qualcomm Technologies, Inc. MSM 8996 MTP";
+ compatible = "qcom,msm8996-mtp";
++
++ aliases {
++ serial0 = &blsp2_uart2;
++ };
++
++ chosen {
++ stdout-path = "serial0";
++ };
++
++ soc {
++ serial@75b0000 {
++ status = "okay";
++ };
++ };
++};
++
++&hdmi {
++ status = "okay";
++};
++
++&hdmi_phy {
++ status = "okay";
+ };
+diff --git a/arch/arm64/boot/dts/qcom/msm8996-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8996-mtp.dtsi
+deleted file mode 100644
+index ac43a91f11048..0000000000000
+--- a/arch/arm64/boot/dts/qcom/msm8996-mtp.dtsi
++++ /dev/null
+@@ -1,30 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+- */
+-
+-#include "msm8996.dtsi"
+-
+-/ {
+- aliases {
+- serial0 = &blsp2_uart2;
+- };
+-
+- chosen {
+- stdout-path = "serial0";
+- };
+-
+- soc {
+- serial@75b0000 {
+- status = "okay";
+- };
+- };
+-};
+-
+-&hdmi {
+- status = "okay";
+-};
+-
+-&hdmi_phy {
+- status = "okay";
+-};
+diff --git a/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi b/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi
+index 507396c4d23b6..e85f7cf4a56ce 100644
+--- a/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi
+@@ -620,6 +620,7 @@
+ &pmi8994_wled {
+ status = "okay";
+ default-brightness = <512>;
++ qcom,num-strings = <3>;
+ };
+
+ &rpm_requests {
+@@ -938,10 +939,6 @@
+ };
+ };
+
+-/*
+- * For reasons that are currently unknown (but probably related to fusb301), USB takes about
+- * 6 minutes to wake up (nothing interesting in kernel logs), but then it works as it should.
+- */
+ &usb3 {
+ status = "okay";
+ qcom,select-utmi-as-pipe-clk;
+@@ -950,6 +947,7 @@
+ &usb3_dwc3 {
+ extcon = <&usb3_id>;
+ dr_mode = "peripheral";
++ maximum-speed = "high-speed";
+ phys = <&hsusb_phy1>;
+ phy-names = "usb2-phy";
+ snps,hird-threshold = /bits/ 8 <0>;
+diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
+index 52df22ab3f6ae..8a7c651785073 100644
+--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
+@@ -142,82 +142,92 @@
+ /* Nominal fmax for now */
+ opp-307200000 {
+ opp-hz = /bits/ 64 <307200000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-422400000 {
+ opp-hz = /bits/ 64 <422400000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-480000000 {
+ opp-hz = /bits/ 64 <480000000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-556800000 {
+ opp-hz = /bits/ 64 <556800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-652800000 {
+ opp-hz = /bits/ 64 <652800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-729600000 {
+ opp-hz = /bits/ 64 <729600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-844800000 {
+ opp-hz = /bits/ 64 <844800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-960000000 {
+ opp-hz = /bits/ 64 <960000000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1036800000 {
+ opp-hz = /bits/ 64 <1036800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1113600000 {
+ opp-hz = /bits/ 64 <1113600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1190400000 {
+ opp-hz = /bits/ 64 <1190400000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1228800000 {
+ opp-hz = /bits/ 64 <1228800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1324800000 {
+ opp-hz = /bits/ 64 <1324800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x5>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1363200000 {
++ opp-hz = /bits/ 64 <1363200000>;
++ opp-supported-hw = <0x2>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1401600000 {
+ opp-hz = /bits/ 64 <1401600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x5>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1478400000 {
+ opp-hz = /bits/ 64 <1478400000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x1>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1497600000 {
++ opp-hz = /bits/ 64 <1497600000>;
++ opp-supported-hw = <0x04>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1593600000 {
+ opp-hz = /bits/ 64 <1593600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x1>;
+ clock-latency-ns = <200000>;
+ };
+ };
+@@ -230,127 +240,137 @@
+ /* Nominal fmax for now */
+ opp-307200000 {
+ opp-hz = /bits/ 64 <307200000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-403200000 {
+ opp-hz = /bits/ 64 <403200000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-480000000 {
+ opp-hz = /bits/ 64 <480000000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-556800000 {
+ opp-hz = /bits/ 64 <556800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-652800000 {
+ opp-hz = /bits/ 64 <652800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-729600000 {
+ opp-hz = /bits/ 64 <729600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-806400000 {
+ opp-hz = /bits/ 64 <806400000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-883200000 {
+ opp-hz = /bits/ 64 <883200000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-940800000 {
+ opp-hz = /bits/ 64 <940800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1036800000 {
+ opp-hz = /bits/ 64 <1036800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1113600000 {
+ opp-hz = /bits/ 64 <1113600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1190400000 {
+ opp-hz = /bits/ 64 <1190400000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1248000000 {
+ opp-hz = /bits/ 64 <1248000000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1324800000 {
+ opp-hz = /bits/ 64 <1324800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1401600000 {
+ opp-hz = /bits/ 64 <1401600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1478400000 {
+ opp-hz = /bits/ 64 <1478400000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1555200000 {
+ opp-hz = /bits/ 64 <1555200000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1632000000 {
+ opp-hz = /bits/ 64 <1632000000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1708800000 {
+ opp-hz = /bits/ 64 <1708800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1785600000 {
+ opp-hz = /bits/ 64 <1785600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x7>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1804800000 {
++ opp-hz = /bits/ 64 <1804800000>;
++ opp-supported-hw = <0x6>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1824000000 {
+ opp-hz = /bits/ 64 <1824000000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x1>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1900800000 {
++ opp-hz = /bits/ 64 <1900800000>;
++ opp-supported-hw = <0x4>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1920000000 {
+ opp-hz = /bits/ 64 <1920000000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x1>;
+ clock-latency-ns = <200000>;
+ };
+ opp-1996800000 {
+ opp-hz = /bits/ 64 <1996800000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x1>;
+ clock-latency-ns = <200000>;
+ };
+ opp-2073600000 {
+ opp-hz = /bits/ 64 <2073600000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x1>;
+ clock-latency-ns = <200000>;
+ };
+ opp-2150400000 {
+ opp-hz = /bits/ 64 <2150400000>;
+- opp-supported-hw = <0x77>;
++ opp-supported-hw = <0x1>;
+ clock-latency-ns = <200000>;
+ };
+ };
+@@ -598,7 +618,7 @@
+ reset-names = "phy", "common", "cfg";
+ status = "disabled";
+
+- pciephy_0: lane@35000 {
++ pciephy_0: phy@35000 {
+ reg = <0x00035000 0x130>,
+ <0x00035200 0x200>,
+ <0x00035400 0x1dc>;
+@@ -611,7 +631,7 @@
+ reset-names = "lane0";
+ };
+
+- pciephy_1: lane@36000 {
++ pciephy_1: phy@36000 {
+ reg = <0x00036000 0x130>,
+ <0x00036200 0x200>,
+ <0x00036400 0x1dc>;
+@@ -624,7 +644,7 @@
+ reset-names = "lane1";
+ };
+
+- pciephy_2: lane@37000 {
++ pciephy_2: phy@37000 {
+ reg = <0x00037000 0x130>,
+ <0x00037200 0x200>,
+ <0x00037400 0x1dc>;
+@@ -638,7 +658,7 @@
+ };
+ };
+
+- rpm_msg_ram: memory@68000 {
++ rpm_msg_ram: sram@68000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0x00068000 0x6000>;
+ };
+@@ -965,9 +985,6 @@
+ nvmem-cells = <&speedbin_efuse>;
+ nvmem-cell-names = "speed_bin";
+
+- qcom,gpu-quirk-two-pass-use-wfi;
+- qcom,gpu-quirk-fault-detect-mask;
+-
+ operating-points-v2 = <&gpu_opp_table>;
+
+ status = "disabled";
+@@ -978,17 +995,17 @@
+ compatible ="operating-points-v2";
+
+ /*
+- * 624Mhz and 560Mhz are only available on speed
+- * bin (1 << 0). All the rest are available on
+- * all bins of the hardware
++ * 624Mhz is only available on speed bins 0 and 3.
++ * 560Mhz is only available on speed bins 0, 2 and 3.
++ * All the rest are available on all bins of the hardware.
+ */
+ opp-624000000 {
+ opp-hz = /bits/ 64 <624000000>;
+- opp-supported-hw = <0x01>;
++ opp-supported-hw = <0x09>;
+ };
+ opp-560000000 {
+ opp-hz = /bits/ 64 <560000000>;
+- opp-supported-hw = <0x01>;
++ opp-supported-hw = <0x0d>;
+ };
+ opp-510000000 {
+ opp-hz = /bits/ 64 <510000000>;
+@@ -1211,6 +1228,20 @@
+ };
+ };
+
++ blsp1_uart2_default: blsp1-uart2-default {
++ pins = "gpio41", "gpio42", "gpio43", "gpio44";
++ function = "blsp_uart2";
++ drive-strength = <16>;
++ bias-disable;
++ };
++
++ blsp1_uart2_sleep: blsp1-uart2-sleep {
++ pins = "gpio41", "gpio42", "gpio43", "gpio44";
++ function = "gpio";
++ drive-strength = <2>;
++ bias-disable;
++ };
++
+ blsp1_i2c3_default: blsp1-i2c2-default {
+ pins = "gpio47", "gpio48";
+ function = "blsp_i2c3";
+@@ -1538,8 +1569,8 @@
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+- ranges = <0x01000000 0x0 0x0c200000 0x0c200000 0x0 0x100000>,
+- <0x02000000 0x0 0x0c300000 0x0c300000 0x0 0xd00000>;
++ ranges = <0x01000000 0x0 0x00000000 0x0c200000 0x0 0x100000>,
++ <0x02000000 0x0 0x0c300000 0x0c300000 0x0 0xd00000>;
+
+ device_type = "pci";
+
+@@ -1592,8 +1623,8 @@
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+- ranges = <0x01000000 0x0 0x0d200000 0x0d200000 0x0 0x100000>,
+- <0x02000000 0x0 0x0d300000 0x0d300000 0x0 0xd00000>;
++ ranges = <0x01000000 0x0 0x00000000 0x0d200000 0x0 0x100000>,
++ <0x02000000 0x0 0x0d300000 0x0d300000 0x0 0xd00000>;
+
+ device_type = "pci";
+
+@@ -1643,8 +1674,8 @@
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+- ranges = <0x01000000 0x0 0x0e200000 0x0e200000 0x0 0x100000>,
+- <0x02000000 0x0 0x0e300000 0x0e300000 0x0 0x1d00000>;
++ ranges = <0x01000000 0x0 0x00000000 0x0e200000 0x0 0x100000>,
++ <0x02000000 0x0 0x0e300000 0x0e300000 0x0 0x1d00000>;
+
+ device_type = "pci";
+
+@@ -1746,7 +1777,7 @@
+ reset-names = "ufsphy";
+ status = "disabled";
+
+- ufsphy_lane: lanes@627400 {
++ ufsphy_lane: phy@627400 {
+ reg = <0x627400 0x12c>,
+ <0x627600 0x200>,
+ <0x627c00 0x1b4>;
+@@ -1754,7 +1785,7 @@
+ };
+ };
+
+- camss: camss@a00000 {
++ camss: camss@a34000 {
+ compatible = "qcom,msm8996-camss";
+ reg = <0x00a34000 0x1000>,
+ <0x00a00030 0x4>,
+@@ -2578,8 +2609,11 @@
+ interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>;
+ phys = <&hsusb_phy1>, <&ssusb_phy_0>;
+ phy-names = "usb2-phy", "usb3-phy";
++ snps,hird-threshold = /bits/ 8 <0>;
+ snps,dis_u2_susphy_quirk;
+ snps,dis_enblslpm_quirk;
++ snps,is-utmi-l1-suspend;
++ tx-fifo-resize;
+ };
+ };
+
+@@ -2601,7 +2635,7 @@
+ reset-names = "phy", "common";
+ status = "disabled";
+
+- ssusb_phy_0: lane@7410200 {
++ ssusb_phy_0: phy@7410200 {
+ reg = <0x07410200 0x200>,
+ <0x07410400 0x130>,
+ <0x07410600 0x1a8>;
+@@ -2704,6 +2738,9 @@
+ clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>,
+ <&gcc GCC_BLSP1_AHB_CLK>;
+ clock-names = "core", "iface";
++ pinctrl-names = "default", "sleep";
++ pinctrl-0 = <&blsp1_uart2_default>;
++ pinctrl-1 = <&blsp1_uart2_sleep>;
+ dmas = <&blsp1_dma 2>, <&blsp1_dma 3>;
+ dma-names = "tx", "rx";
+ status = "disabled";
+@@ -2865,6 +2902,9 @@
+ #size-cells = <1>;
+ ranges;
+
++ interrupts = <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-names = "hs_phy_irq";
++
+ clocks = <&gcc GCC_PERIPH_NOC_USB20_AHB_CLK>,
+ <&gcc GCC_USB20_MASTER_CLK>,
+ <&gcc GCC_USB20_MOCK_UTMI_CLK>,
+diff --git a/arch/arm64/boot/dts/qcom/msm8996pro.dtsi b/arch/arm64/boot/dts/qcom/msm8996pro.dtsi
+new file mode 100644
+index 0000000000000..63e1b4ec7a360
+--- /dev/null
++++ b/arch/arm64/boot/dts/qcom/msm8996pro.dtsi
+@@ -0,0 +1,266 @@
++// SPDX-License-Identifier: BSD-3-Clause
++/*
++ * Copyright (c) 2022, Linaro Limited
++ */
++
++#include "msm8996.dtsi"
++
++/ {
++ /delete-node/ opp-table-cluster0;
++ /delete-node/ opp-table-cluster1;
++
++ /*
++ * On MSM8996 Pro the cpufreq driver shifts speed bins into the high
++ * nibble of supported hw, so speed bin 0 becomes 0x10, speed bin 1
++ * becomes 0x20, speed 2 becomes 0x40.
++ */
++
++ cluster0_opp: opp-table-cluster0 {
++ compatible = "operating-points-v2-kryo-cpu";
++ nvmem-cells = <&speedbin_efuse>;
++ opp-shared;
++
++ opp-307200000 {
++ opp-hz = /bits/ 64 <307200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-384000000 {
++ opp-hz = /bits/ 64 <384000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-460800000 {
++ opp-hz = /bits/ 64 <460800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-537600000 {
++ opp-hz = /bits/ 64 <537600000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-614400000 {
++ opp-hz = /bits/ 64 <614400000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-691200000 {
++ opp-hz = /bits/ 64 <691200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-768000000 {
++ opp-hz = /bits/ 64 <768000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-844800000 {
++ opp-hz = /bits/ 64 <844800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-902400000 {
++ opp-hz = /bits/ 64 <902400000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-979200000 {
++ opp-hz = /bits/ 64 <979200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1056000000 {
++ opp-hz = /bits/ 64 <1056000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1132800000 {
++ opp-hz = /bits/ 64 <1132800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1209600000 {
++ opp-hz = /bits/ 64 <1209600000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1286400000 {
++ opp-hz = /bits/ 64 <1286400000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1363200000 {
++ opp-hz = /bits/ 64 <1363200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1440000000 {
++ opp-hz = /bits/ 64 <1440000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1516800000 {
++ opp-hz = /bits/ 64 <1516800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1593600000 {
++ opp-hz = /bits/ 64 <1593600000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1996800000 {
++ opp-hz = /bits/ 64 <1996800000>;
++ opp-supported-hw = <0x20>;
++ clock-latency-ns = <200000>;
++ };
++ opp-2188800000 {
++ opp-hz = /bits/ 64 <2188800000>;
++ opp-supported-hw = <0x10>;
++ clock-latency-ns = <200000>;
++ };
++ };
++
++ cluster1_opp: opp-table-cluster1 {
++ compatible = "operating-points-v2-kryo-cpu";
++ nvmem-cells = <&speedbin_efuse>;
++ opp-shared;
++
++ opp-307200000 {
++ opp-hz = /bits/ 64 <307200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-384000000 {
++ opp-hz = /bits/ 64 <384000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-460800000 {
++ opp-hz = /bits/ 64 <460800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-537600000 {
++ opp-hz = /bits/ 64 <537600000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-614400000 {
++ opp-hz = /bits/ 64 <614400000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-691200000 {
++ opp-hz = /bits/ 64 <691200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-748800000 {
++ opp-hz = /bits/ 64 <748800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-825600000 {
++ opp-hz = /bits/ 64 <825600000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-902400000 {
++ opp-hz = /bits/ 64 <902400000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-979200000 {
++ opp-hz = /bits/ 64 <979200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1056000000 {
++ opp-hz = /bits/ 64 <1056000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1132800000 {
++ opp-hz = /bits/ 64 <1132800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1209600000 {
++ opp-hz = /bits/ 64 <1209600000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1286400000 {
++ opp-hz = /bits/ 64 <1286400000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1363200000 {
++ opp-hz = /bits/ 64 <1363200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1440000000 {
++ opp-hz = /bits/ 64 <1440000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1516800000 {
++ opp-hz = /bits/ 64 <1516800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1593600000 {
++ opp-hz = /bits/ 64 <1593600000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1670400000 {
++ opp-hz = /bits/ 64 <1670400000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1747200000 {
++ opp-hz = /bits/ 64 <1747200000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1824000000 {
++ opp-hz = /bits/ 64 <1824000000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1900800000 {
++ opp-hz = /bits/ 64 <1900800000>;
++ opp-supported-hw = <0x70>;
++ clock-latency-ns = <200000>;
++ };
++ opp-1977600000 {
++ opp-hz = /bits/ 64 <1977600000>;
++ opp-supported-hw = <0x30>;
++ clock-latency-ns = <200000>;
++ };
++ opp-2054400000 {
++ opp-hz = /bits/ 64 <2054400000>;
++ opp-supported-hw = <0x30>;
++ clock-latency-ns = <200000>;
++ };
++ opp-2150400000 {
++ opp-hz = /bits/ 64 <2150400000>;
++ opp-supported-hw = <0x30>;
++ clock-latency-ns = <200000>;
++ };
++ opp-2246400000 {
++ opp-hz = /bits/ 64 <2246400000>;
++ opp-supported-hw = <0x10>;
++ clock-latency-ns = <200000>;
++ };
++ opp-2342400000 {
++ opp-hz = /bits/ 64 <2342400000>;
++ opp-supported-hw = <0x10>;
++ clock-latency-ns = <200000>;
++ };
++ };
++};
+diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi
+index 34039b5c80175..b7d72b0d579e4 100644
+--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
++++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
+@@ -308,38 +308,42 @@
+ LITTLE_CPU_SLEEP_0: cpu-sleep-0-0 {
+ compatible = "arm,idle-state";
+ idle-state-name = "little-retention";
++ /* CPU Retention (C2D), L2 Active */
+ arm,psci-suspend-param = <0x00000002>;
+ entry-latency-us = <81>;
+ exit-latency-us = <86>;
+- min-residency-us = <200>;
++ min-residency-us = <504>;
+ };
+
+ LITTLE_CPU_SLEEP_1: cpu-sleep-0-1 {
+ compatible = "arm,idle-state";
+ idle-state-name = "little-power-collapse";
++ /* CPU + L2 Power Collapse (C3, D4) */
+ arm,psci-suspend-param = <0x40000003>;
+- entry-latency-us = <273>;
+- exit-latency-us = <612>;
+- min-residency-us = <1000>;
++ entry-latency-us = <814>;
++ exit-latency-us = <4562>;
++ min-residency-us = <9183>;
+ local-timer-stop;
+ };
+
+ BIG_CPU_SLEEP_0: cpu-sleep-1-0 {
+ compatible = "arm,idle-state";
+ idle-state-name = "big-retention";
++ /* CPU Retention (C2D), L2 Active */
+ arm,psci-suspend-param = <0x00000002>;
+ entry-latency-us = <79>;
+ exit-latency-us = <82>;
+- min-residency-us = <200>;
++ min-residency-us = <1302>;
+ };
+
+ BIG_CPU_SLEEP_1: cpu-sleep-1-1 {
+ compatible = "arm,idle-state";
+ idle-state-name = "big-power-collapse";
++ /* CPU + L2 Power Collapse (C3, D4) */
+ arm,psci-suspend-param = <0x40000003>;
+- entry-latency-us = <336>;
+- exit-latency-us = <525>;
+- min-residency-us = <1000>;
++ entry-latency-us = <724>;
++ exit-latency-us = <2027>;
++ min-residency-us = <9419>;
+ local-timer-stop;
+ };
+ };
+@@ -857,7 +861,7 @@
+ reg = <0x00100000 0xb0000>;
+ };
+
+- rpm_msg_ram: memory@778000 {
++ rpm_msg_ram: sram@778000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0x00778000 0x7000>;
+ };
+@@ -947,7 +951,7 @@
+ phy-names = "pciephy";
+ status = "disabled";
+
+- ranges = <0x01000000 0x0 0x1b200000 0x1b200000 0x0 0x100000>,
++ ranges = <0x01000000 0x0 0x00000000 0x1b200000 0x0 0x100000>,
+ <0x02000000 0x0 0x1b300000 0x1b300000 0x0 0xd00000>;
+
+ #interrupt-cells = <1>;
+@@ -990,7 +994,7 @@
+ vdda-phy-supply = <&vreg_l1a_0p875>;
+ vdda-pll-supply = <&vreg_l2a_1p2>;
+
+- pciephy: lane@1c06800 {
++ pciephy: phy@1c06800 {
+ reg = <0x01c06200 0x128>, <0x01c06400 0x1fc>, <0x01c06800 0x20c>;
+ #phy-cells = <0>;
+
+@@ -1062,7 +1066,7 @@
+ reset-names = "ufsphy";
+ resets = <&ufshc 0>;
+
+- ufsphy_lanes: lanes@1da7400 {
++ ufsphy_lanes: phy@1da7400 {
+ reg = <0x01da7400 0x128>,
+ <0x01da7600 0x1fc>,
+ <0x01da7c00 0x1dc>,
+@@ -1469,7 +1473,7 @@
+ compatible = "arm,coresight-stm", "arm,primecell";
+ reg = <0x06002000 0x1000>,
+ <0x16280000 0x180000>;
+- reg-names = "stm-base", "stm-data-base";
++ reg-names = "stm-base", "stm-stimulus-base";
+ status = "disabled";
+
+ clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>;
+@@ -1995,7 +1999,7 @@
+ <&gcc GCC_USB3PHY_PHY_BCR>;
+ reset-names = "phy", "common";
+
+- usb1_ssphy: lane@c010200 {
++ usb1_ssphy: phy@c010200 {
+ reg = <0xc010200 0x128>,
+ <0xc010400 0x200>,
+ <0xc010c00 0x20c>,
+diff --git a/arch/arm64/boot/dts/qcom/pm660.dtsi b/arch/arm64/boot/dts/qcom/pm660.dtsi
+index e847d7209afc6..affc736d154ad 100644
+--- a/arch/arm64/boot/dts/qcom/pm660.dtsi
++++ b/arch/arm64/boot/dts/qcom/pm660.dtsi
+@@ -152,7 +152,7 @@
+ qcom,pre-scaling = <1 3>;
+ };
+
+- vcoin: vcoin@83 {
++ vcoin: vcoin@85 {
+ reg = <ADC5_VCOIN>;
+ qcom,decimation = <1024>;
+ qcom,pre-scaling = <1 3>;
+diff --git a/arch/arm64/boot/dts/qcom/pm660l.dtsi b/arch/arm64/boot/dts/qcom/pm660l.dtsi
+index 05086cbe573be..902e15d05a95b 100644
+--- a/arch/arm64/boot/dts/qcom/pm660l.dtsi
++++ b/arch/arm64/boot/dts/qcom/pm660l.dtsi
+@@ -67,9 +67,10 @@
+
+ pm660l_wled: leds@d800 {
+ compatible = "qcom,pm660l-wled";
+- reg = <0xd800 0xd900>;
+- interrupts = <0x3 0xd8 0x1 IRQ_TYPE_EDGE_RISING>;
+- interrupt-names = "ovp";
++ reg = <0xd800>, <0xd900>;
++ interrupts = <0x3 0xd8 0x1 IRQ_TYPE_EDGE_RISING>,
++ <0x3 0xd8 0x2 IRQ_TYPE_EDGE_RISING>;
++ interrupt-names = "ovp", "short";
+ label = "backlight";
+
+ qcom,switching-freq = <800>;
+diff --git a/arch/arm64/boot/dts/qcom/pm8916.dtsi b/arch/arm64/boot/dts/qcom/pm8916.dtsi
+index f931cb0de231f..42180f1b5dbbb 100644
+--- a/arch/arm64/boot/dts/qcom/pm8916.dtsi
++++ b/arch/arm64/boot/dts/qcom/pm8916.dtsi
+@@ -86,7 +86,6 @@
+ rtc@6000 {
+ compatible = "qcom,pm8941-rtc";
+ reg = <0x6000>;
+- reg-names = "rtc", "alarm";
+ interrupts = <0x0 0x61 0x1 IRQ_TYPE_EDGE_RISING>;
+ };
+
+diff --git a/arch/arm64/boot/dts/qcom/pmi8994.dtsi b/arch/arm64/boot/dts/qcom/pmi8994.dtsi
+index b4ac900ab115f..38cf0f14e8798 100644
+--- a/arch/arm64/boot/dts/qcom/pmi8994.dtsi
++++ b/arch/arm64/boot/dts/qcom/pmi8994.dtsi
+@@ -35,14 +35,12 @@
+
+ pmi8994_wled: wled@d800 {
+ compatible = "qcom,pmi8994-wled";
+- reg = <0xd800 0xd900>;
+- interrupts = <3 0xd8 0x02 IRQ_TYPE_EDGE_RISING>;
+- interrupt-names = "short";
+- qcom,num-strings = <3>;
+- /* Yes, all four strings *have to* be defined or things won't work. */
+- qcom,enabled-strings = <0 1 2 3>;
++ reg = <0xd800>, <0xd900>;
++ interrupts = <0x3 0xd8 0x1 IRQ_TYPE_EDGE_RISING>,
++ <0x3 0xd8 0x2 IRQ_TYPE_EDGE_RISING>;
++ interrupt-names = "ovp", "short";
+ qcom,cabc;
+- qcom,eternal-pfet;
++ qcom,external-pfet;
+ status = "disabled";
+ };
+ };
+diff --git a/arch/arm64/boot/dts/qcom/pmi8998.dtsi b/arch/arm64/boot/dts/qcom/pmi8998.dtsi
+index d230c510d4b7d..ef29e80c442c7 100644
+--- a/arch/arm64/boot/dts/qcom/pmi8998.dtsi
++++ b/arch/arm64/boot/dts/qcom/pmi8998.dtsi
+@@ -41,5 +41,17 @@
+ interrupt-names = "sc-err", "ocp";
+ };
+ };
++
++ pmi8998_wled: leds@d800 {
++ compatible = "qcom,pmi8998-wled";
++ reg = <0xd800>, <0xd900>;
++ interrupts = <0x3 0xd8 0x1 IRQ_TYPE_EDGE_RISING>,
++ <0x3 0xd8 0x2 IRQ_TYPE_EDGE_RISING>;
++ interrupt-names = "ovp", "short";
++ label = "backlight";
++
++ status = "disabled";
++ };
++
+ };
+ };
+diff --git a/arch/arm64/boot/dts/qcom/pmk8350.dtsi b/arch/arm64/boot/dts/qcom/pmk8350.dtsi
+index 04fc2632a0b20..9e99fcf269dfd 100644
+--- a/arch/arm64/boot/dts/qcom/pmk8350.dtsi
++++ b/arch/arm64/boot/dts/qcom/pmk8350.dtsi
+@@ -16,8 +16,9 @@
+ #size-cells = <0>;
+
+ pmk8350_pon: pon@1300 {
+- compatible = "qcom,pm8998-pon";
+- reg = <0x1300>;
++ compatible = "qcom,pmk8350-pon";
++ reg = <0x1300>, <0x800>;
++ reg-names = "hlos", "pbs";
+
+ pwrkey {
+ compatible = "qcom,pmk8350-pwrkey";
+@@ -44,7 +45,7 @@
+ };
+
+ pmk8350_adc_tm: adc-tm@3400 {
+- compatible = "qcom,adc-tm7";
++ compatible = "qcom,spmi-adc-tm5-gen2";
+ reg = <0x3400>;
+ interrupts = <0x0 0x34 0x0 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "threshold";
+diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi
+index 339790ba585de..fd0d634a373fc 100644
+--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
++++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
+@@ -318,7 +318,7 @@
+ status = "disabled";
+ };
+
+- rpm_msg_ram: memory@60000 {
++ rpm_msg_ram: sram@60000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0x00060000 0x6000>;
+ };
+@@ -548,7 +548,7 @@
+ compatible = "snps,dwc3";
+ reg = <0x07580000 0xcd00>;
+ interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+- phys = <&usb2_phy_sec>, <&usb3_phy>;
++ phys = <&usb2_phy_prim>, <&usb3_phy>;
+ phy-names = "usb2-phy", "usb3-phy";
+ snps,has-lpm-erratum;
+ snps,hird-threshold = /bits/ 8 <0x10>;
+@@ -577,7 +577,7 @@
+ compatible = "snps,dwc3";
+ reg = <0x078c0000 0xcc00>;
+ interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+- phys = <&usb2_phy_prim>;
++ phys = <&usb2_phy_sec>;
+ phy-names = "usb2-phy";
+ snps,has-lpm-erratum;
+ snps,hird-threshold = /bits/ 8 <0x10>;
+@@ -775,7 +775,7 @@
+
+ clocks = <&gcc GCC_PCIE_0_PIPE_CLK>;
+ resets = <&gcc GCC_PCIEPHY_0_PHY_BCR>,
+- <&gcc 21>;
++ <&gcc GCC_PCIE_0_PIPE_ARES>;
+ reset-names = "phy", "pipe";
+
+ clock-output-names = "pcie_0_pipe_clk";
+@@ -1305,12 +1305,12 @@
+ <&gcc GCC_PCIE_0_SLV_AXI_CLK>;
+ clock-names = "iface", "aux", "master_bus", "slave_bus";
+
+- resets = <&gcc 18>,
+- <&gcc 17>,
+- <&gcc 15>,
+- <&gcc 19>,
++ resets = <&gcc GCC_PCIE_0_AXI_MASTER_ARES>,
++ <&gcc GCC_PCIE_0_AXI_SLAVE_ARES>,
++ <&gcc GCC_PCIE_0_AXI_MASTER_STICKY_ARES>,
++ <&gcc GCC_PCIE_0_CORE_STICKY_ARES>,
+ <&gcc GCC_PCIE_0_BCR>,
+- <&gcc 16>;
++ <&gcc GCC_PCIE_0_AHB_ARES>;
+ reset-names = "axi_m",
+ "axi_s",
+ "axi_m_sticky",
+diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
+index 28d5b5528516b..d3449cb52defe 100644
+--- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
++++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
+@@ -27,7 +27,7 @@
+ };
+
+ /* Fixed crystal oscillator dedicated to MCP2518FD */
+- clk40M: can_clock {
++ clk40M: can-clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <40000000>;
+@@ -113,7 +113,7 @@
+ };
+ };
+
+- pm8150l-thermal {
++ pm8150l-pcb-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&pm8150l_adc_tm 1>;
+diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts
+index 5ae2ddc65f7e4..56a789a5789e6 100644
+--- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts
++++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts
+@@ -43,7 +43,6 @@
+
+ regulator-always-on;
+ regulator-boot-on;
+- regulator-allow-set-load;
+
+ vin-supply = <&vreg_3p3>;
+ };
+@@ -114,6 +113,9 @@
+ regulator-max-microvolt = <880000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7a_1p8: ldo7 {
+@@ -129,6 +131,9 @@
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l11a_0p8: ldo11 {
+@@ -235,6 +240,9 @@
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7c_1p8: ldo7 {
+@@ -250,6 +258,9 @@
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l10c_3p3: ldo10 {
+diff --git a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi
+index d8ed1d7b4ec76..4b306a59d9bec 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi
+@@ -16,3 +16,11 @@
+ &cpu6_opp12 {
+ opp-peak-kBps = <8532000 23347200>;
+ };
++
++&cpu6_opp13 {
++ opp-peak-kBps = <8532000 23347200>;
++};
++
++&cpu6_opp14 {
++ opp-peak-kBps = <8532000 23347200>;
++};
+diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi
+index a758e4d226122..81098aa9687ba 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi
+@@ -33,7 +33,7 @@ ap_h1_spi: &spi0 {};
+ polling-delay = <0>;
+
+ thermal-sensors = <&pm6150_adc_tm 1>;
+- sustainable-power = <814>;
++ sustainable-power = <965>;
+
+ trips {
+ skin_temp_alert0: trip-point0 {
+diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor-limozeen-nots-r4.dts b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor-limozeen-nots-r4.dts
+index 6ebde0828550c..8a98a6f849c4f 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor-limozeen-nots-r4.dts
++++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor-limozeen-nots-r4.dts
+@@ -26,7 +26,7 @@
+ interrupt-parent = <&tlmm>;
+ interrupts = <58 IRQ_TYPE_EDGE_FALLING>;
+
+- vcc-supply = <&pp3300_fp_tp>;
++ vdd-supply = <&pp3300_fp_tp>;
+ hid-descr-addr = <0x20>;
+
+ wakeup-source;
+diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi
+index a246dbd74cc11..b7b5264888b7c 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi
+@@ -44,7 +44,7 @@ ap_h1_spi: &spi0 {};
+ };
+
+ &cpu6_thermal {
+- sustainable-power = <948>;
++ sustainable-power = <1124>;
+ };
+
+ &cpu7_alert0 {
+@@ -56,7 +56,7 @@ ap_h1_spi: &spi0 {};
+ };
+
+ &cpu7_thermal {
+- sustainable-power = <948>;
++ sustainable-power = <1124>;
+ };
+
+ &cpu8_alert0 {
+@@ -68,7 +68,7 @@ ap_h1_spi: &spi0 {};
+ };
+
+ &cpu8_thermal {
+- sustainable-power = <948>;
++ sustainable-power = <1124>;
+ };
+
+ &cpu9_alert0 {
+@@ -80,7 +80,7 @@ ap_h1_spi: &spi0 {};
+ };
+
+ &cpu9_thermal {
+- sustainable-power = <948>;
++ sustainable-power = <1124>;
+ };
+
+ &gpio_keys {
+diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+index 70c88c37de321..a9d36ac6cb90e 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+@@ -42,6 +42,7 @@
+ */
+
+ /delete-node/ &hyp_mem;
++/delete-node/ &ipa_fw_mem;
+ /delete-node/ &xbl_mem;
+ /delete-node/ &aop_mem;
+ /delete-node/ &sec_apps_mem;
+diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
+index c8921e2d6480f..12816d60e2494 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
+@@ -137,8 +137,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1024>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <415>;
++ dynamic-power-coefficient = <137>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
+@@ -162,8 +162,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1024>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <415>;
++ dynamic-power-coefficient = <137>;
+ next-level-cache = <&L2_100>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+@@ -184,8 +184,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1024>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <415>;
++ dynamic-power-coefficient = <137>;
+ next-level-cache = <&L2_200>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+@@ -206,8 +206,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1024>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <415>;
++ dynamic-power-coefficient = <137>;
+ next-level-cache = <&L2_300>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+@@ -228,8 +228,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1024>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <415>;
++ dynamic-power-coefficient = <137>;
+ next-level-cache = <&L2_400>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+@@ -250,8 +250,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1024>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <415>;
++ dynamic-power-coefficient = <137>;
+ next-level-cache = <&L2_500>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+@@ -272,8 +272,8 @@
+ cpu-idle-states = <&BIG_CPU_SLEEP_0
+ &BIG_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1740>;
+- dynamic-power-coefficient = <405>;
++ capacity-dmips-mhz = <1024>;
++ dynamic-power-coefficient = <480>;
+ next-level-cache = <&L2_600>;
+ operating-points-v2 = <&cpu6_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+@@ -294,8 +294,8 @@
+ cpu-idle-states = <&BIG_CPU_SLEEP_0
+ &BIG_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <1740>;
+- dynamic-power-coefficient = <405>;
++ capacity-dmips-mhz = <1024>;
++ dynamic-power-coefficient = <480>;
+ next-level-cache = <&L2_700>;
+ operating-points-v2 = <&cpu6_opp_table>;
+ interconnects = <&gem_noc MASTER_APPSS_PROC 3 &mc_virt SLAVE_EBI1 3>,
+@@ -1460,6 +1460,8 @@
+ "imem",
+ "config";
+
++ qcom,qmp = <&aoss_qmp>;
++
+ qcom,smem-states = <&ipa_smp2p_out 0>,
+ <&ipa_smp2p_out 1>;
+ qcom,smem-state-names = "ipa-clock-enabled-valid",
+@@ -3239,8 +3241,8 @@
+ interrupts-extended = <&pdc 1 IRQ_TYPE_LEVEL_HIGH>;
+ qcom,ee = <0>;
+ qcom,channel = <0>;
+- #address-cells = <1>;
+- #size-cells = <1>;
++ #address-cells = <2>;
++ #size-cells = <0>;
+ interrupt-controller;
+ #interrupt-cells = <4>;
+ cell-index = <0>;
+@@ -3616,7 +3618,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 1>;
+- sustainable-power = <768>;
++ sustainable-power = <1052>;
+
+ trips {
+ cpu0_alert0: trip-point0 {
+@@ -3665,7 +3667,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 2>;
+- sustainable-power = <768>;
++ sustainable-power = <1052>;
+
+ trips {
+ cpu1_alert0: trip-point0 {
+@@ -3714,7 +3716,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 3>;
+- sustainable-power = <768>;
++ sustainable-power = <1052>;
+
+ trips {
+ cpu2_alert0: trip-point0 {
+@@ -3763,7 +3765,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 4>;
+- sustainable-power = <768>;
++ sustainable-power = <1052>;
+
+ trips {
+ cpu3_alert0: trip-point0 {
+@@ -3812,7 +3814,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 5>;
+- sustainable-power = <768>;
++ sustainable-power = <1052>;
+
+ trips {
+ cpu4_alert0: trip-point0 {
+@@ -3861,7 +3863,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 6>;
+- sustainable-power = <768>;
++ sustainable-power = <1052>;
+
+ trips {
+ cpu5_alert0: trip-point0 {
+@@ -3910,7 +3912,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 9>;
+- sustainable-power = <1202>;
++ sustainable-power = <1425>;
+
+ trips {
+ cpu6_alert0: trip-point0 {
+@@ -3951,7 +3953,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 10>;
+- sustainable-power = <1202>;
++ sustainable-power = <1425>;
+
+ trips {
+ cpu7_alert0: trip-point0 {
+@@ -3992,7 +3994,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 11>;
+- sustainable-power = <1202>;
++ sustainable-power = <1425>;
+
+ trips {
+ cpu8_alert0: trip-point0 {
+@@ -4033,7 +4035,7 @@
+ polling-delay = <0>;
+
+ thermal-sensors = <&tsens0 12>;
+- sustainable-power = <1202>;
++ sustainable-power = <1425>;
+
+ trips {
+ cpu9_alert0: trip-point0 {
+diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
+index fd78f16181ddd..fb6473a0aa4b3 100644
+--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
+@@ -429,7 +429,7 @@
+ <&rpmhcc RPMH_CXO_CLK_A>, <&sleep_clk>,
+ <0>, <0>, <0>, <0>, <0>, <0>;
+ clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk",
+- "pcie_0_pipe_clk", "pcie_1_pipe-clk",
++ "pcie_0_pipe_clk", "pcie_1_pipe_clk",
+ "ufs_phy_rx_symbol_0_clk", "ufs_phy_rx_symbol_1_clk",
+ "ufs_phy_tx_symbol_0_clk",
+ "usb3_phy_wrapper_gcc_usb30_pipe_clk";
+@@ -615,6 +615,8 @@
+ interconnect-names = "memory",
+ "config";
+
++ qcom,qmp = <&aoss_qmp>;
++
+ qcom,smem-states = <&ipa_smp2p_out 0>,
+ <&ipa_smp2p_out 1>;
+ qcom,smem-state-names = "ipa-clock-enabled-valid",
+@@ -1258,15 +1260,11 @@
+ dp_phy: dp-phy@88ea200 {
+ reg = <0 0x088ea200 0 0x200>,
+ <0 0x088ea400 0 0x200>,
+- <0 0x088eac00 0 0x400>,
++ <0 0x088eaa00 0 0x200>,
+ <0 0x088ea600 0 0x200>,
+- <0 0x088ea800 0 0x200>,
+- <0 0x088eaa00 0 0x100>;
++ <0 0x088ea800 0 0x200>;
+ #phy-cells = <0>;
+ #clock-cells = <1>;
+- clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>;
+- clock-names = "pipe0";
+- clock-output-names = "usb3_phy_pipe_clk_src";
+ };
+ };
+
+@@ -1496,8 +1494,8 @@
+ interrupts-extended = <&pdc 1 IRQ_TYPE_LEVEL_HIGH>;
+ qcom,ee = <0>;
+ qcom,channel = <0>;
+- #address-cells = <1>;
+- #size-cells = <1>;
++ #address-cells = <2>;
++ #size-cells = <0>;
+ interrupt-controller;
+ #interrupt-cells = <4>;
+ };
+diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi
+index 9c7f87e42fccd..e00c0577cef70 100644
+--- a/arch/arm64/boot/dts/qcom/sdm630.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi
+@@ -8,6 +8,7 @@
+ #include <dt-bindings/clock/qcom,gpucc-sdm660.h>
+ #include <dt-bindings/clock/qcom,mmcc-sdm660.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
++#include <dt-bindings/interconnect/qcom,sdm660.h>
+ #include <dt-bindings/power/qcom-rpmpd.h>
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+@@ -541,7 +542,7 @@
+ <&sleep_clk>;
+ };
+
+- rpm_msg_ram: memory@778000 {
++ rpm_msg_ram: sram@778000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0x00778000 0x7000>;
+ };
+@@ -767,7 +768,7 @@
+ pins = "gpio17", "gpio18", "gpio19";
+ function = "gpio";
+ drive-strength = <2>;
+- bias-no-pull;
++ bias-disable;
+ };
+ };
+
+@@ -1041,11 +1042,13 @@
+ nvmem-cells = <&gpu_speed_bin>;
+ nvmem-cell-names = "speed_bin";
+
+- interconnects = <&gnoc 1 &bimc 5>;
++ interconnects = <&bimc MASTER_OXILI &bimc SLAVE_EBI>;
+ interconnect-names = "gfx-mem";
+
+ operating-points-v2 = <&gpu_sdm630_opp_table>;
+
++ status = "disabled";
++
+ gpu_sdm630_opp_table: opp-table {
+ compatible = "operating-points-v2";
+ opp-775000000 {
+@@ -1251,7 +1254,7 @@
+ #phy-cells = <0>;
+
+ clocks = <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>,
+- <&gcc GCC_RX1_USB2_CLKREF_CLK>;
++ <&gcc GCC_RX0_USB2_CLKREF_CLK>;
+ clock-names = "cfg_ahb", "ref";
+
+ resets = <&gcc GCC_QUSB2PHY_PRIM_BCR>;
+@@ -1831,7 +1834,7 @@
+ };
+ };
+
+- camss: camss@ca00000 {
++ camss: camss@ca00020 {
+ compatible = "qcom,sdm660-camss";
+ reg = <0x0c824000 0x1000>,
+ <0x0ca00120 0x4>,
+diff --git a/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts b/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts
+index bba1c2bce2131..0afe9eee025e1 100644
+--- a/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts
++++ b/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts
+@@ -18,7 +18,7 @@
+ };
+
+ &sdc2_state_on {
+- pinconf-clk {
++ clk {
+ drive-strength = <14>;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+index dfd1b42c07fd5..3566db1d7357e 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+@@ -1299,7 +1299,7 @@ ap_ts_i2c: &i2c14 {
+ config {
+ pins = "gpio126";
+ function = "gpio";
+- bias-no-pull;
++ bias-disable;
+ drive-strength = <2>;
+ output-low;
+ };
+@@ -1309,7 +1309,7 @@ ap_ts_i2c: &i2c14 {
+ config {
+ pins = "gpio126";
+ function = "gpio";
+- bias-no-pull;
++ bias-disable;
+ drive-strength = <2>;
+ output-high;
+ };
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+index 2d5533dd4ec2d..5ce270f0b2ec1 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
++++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+@@ -896,7 +896,7 @@
+ };
+
+ wcd_intr_default: wcd_intr_default {
+- pins = <54>;
++ pins = "gpio54";
+ function = "gpio";
+
+ input-enable;
+@@ -1045,7 +1045,10 @@
+
+ /* PINCTRL - additions to nodes defined in sdm845.dtsi */
+ &qup_spi2_default {
+- drive-strength = <16>;
++ pinconf {
++ pins = "gpio27", "gpio28", "gpio29", "gpio30";
++ drive-strength = <16>;
++ };
+ };
+
+ &qup_uart3_default{
+diff --git a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts
+index c60c8c640e17f..736951fabb7a9 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts
++++ b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts
+@@ -221,7 +221,7 @@
+ panel@0 {
+ compatible = "tianma,fhd-video";
+ reg = <0>;
+- vddi0-supply = <&vreg_l14a_1p8>;
++ vddio-supply = <&vreg_l14a_1p8>;
+ vddpos-supply = <&lab>;
+ vddneg-supply = <&ibb>;
+
+diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
+index b3b9119261844..6a0e30cbf88f1 100644
+--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
++++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
+@@ -196,8 +196,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <607>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <611>;
++ dynamic-power-coefficient = <154>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -221,8 +221,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <607>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <611>;
++ dynamic-power-coefficient = <154>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -243,8 +243,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <607>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <611>;
++ dynamic-power-coefficient = <154>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -265,8 +265,8 @@
+ cpu-idle-states = <&LITTLE_CPU_SLEEP_0
+ &LITTLE_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- capacity-dmips-mhz = <607>;
+- dynamic-power-coefficient = <100>;
++ capacity-dmips-mhz = <611>;
++ dynamic-power-coefficient = <154>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -288,7 +288,7 @@
+ cpu-idle-states = <&BIG_CPU_SLEEP_0
+ &BIG_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- dynamic-power-coefficient = <396>;
++ dynamic-power-coefficient = <442>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ operating-points-v2 = <&cpu4_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -310,7 +310,7 @@
+ cpu-idle-states = <&BIG_CPU_SLEEP_0
+ &BIG_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- dynamic-power-coefficient = <396>;
++ dynamic-power-coefficient = <442>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ operating-points-v2 = <&cpu4_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -332,7 +332,7 @@
+ cpu-idle-states = <&BIG_CPU_SLEEP_0
+ &BIG_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- dynamic-power-coefficient = <396>;
++ dynamic-power-coefficient = <442>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ operating-points-v2 = <&cpu4_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -354,7 +354,7 @@
+ cpu-idle-states = <&BIG_CPU_SLEEP_0
+ &BIG_CPU_SLEEP_1
+ &CLUSTER_SLEEP_0>;
+- dynamic-power-coefficient = <396>;
++ dynamic-power-coefficient = <442>;
+ qcom,freq-domain = <&cpufreq_hw 1>;
+ operating-points-v2 = <&cpu4_opp_table>;
+ interconnects = <&gladiator_noc MASTER_APPSS_PROC 3 &mem_noc SLAVE_EBI1 3>,
+@@ -1074,6 +1074,7 @@
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
++ power-domains = <&rpmhpd SDM845_CX>;
+ };
+
+ qfprom@784000 {
+@@ -1988,8 +1989,8 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x01000000 0x0 0x60200000 0 0x60200000 0x0 0x100000>,
+- <0x02000000 0x0 0x60300000 0 0x60300000 0x0 0xd00000>;
++ ranges = <0x01000000 0x0 0x00000000 0x0 0x60200000 0x0 0x100000>,
++ <0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0xd00000>;
+
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+@@ -2064,7 +2065,7 @@
+
+ status = "disabled";
+
+- pcie0_lane: lanes@1c06200 {
++ pcie0_lane: phy@1c06200 {
+ reg = <0 0x01c06200 0 0x128>,
+ <0 0x01c06400 0 0x1fc>,
+ <0 0x01c06800 0 0x218>,
+@@ -2093,7 +2094,7 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x01000000 0x0 0x40200000 0x0 0x40200000 0x0 0x100000>,
++ ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
+ <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
+
+ interrupts = <GIC_SPI 307 IRQ_TYPE_EDGE_RISING>;
+@@ -2174,7 +2175,7 @@
+
+ status = "disabled";
+
+- pcie1_lane: lanes@1c06200 {
++ pcie1_lane: phy@1c06200 {
+ reg = <0 0x01c0a800 0 0x800>,
+ <0 0x01c0a800 0 0x800>,
+ <0 0x01c0b800 0 0x400>;
+@@ -2282,7 +2283,7 @@
+ <0 0>,
+ <0 0>,
+ <0 0>,
+- <0 300000000>;
++ <75000000 300000000>;
+
+ status = "disabled";
+ };
+@@ -2302,7 +2303,7 @@
+ reset-names = "ufsphy";
+ status = "disabled";
+
+- ufs_mem_phy_lanes: lanes@1d87400 {
++ ufs_mem_phy_lanes: phy@1d87400 {
+ reg = <0 0x01d87400 0 0x108>,
+ <0 0x01d87600 0 0x1e0>,
+ <0 0x01d87c00 0 0x1dc>,
+@@ -2316,11 +2317,11 @@
+ compatible = "qcom,bam-v1.7.0";
+ reg = <0 0x01dc4000 0 0x24000>;
+ interrupts = <GIC_SPI 272 IRQ_TYPE_LEVEL_HIGH>;
+- clocks = <&rpmhcc 15>;
++ clocks = <&rpmhcc RPMH_CE_CLK>;
+ clock-names = "bam_clk";
+ #dma-cells = <1>;
+ qcom,ee = <0>;
+- qcom,controlled-remotely = <1>;
++ qcom,controlled-remotely;
+ iommus = <&apps_smmu 0x704 0x1>,
+ <&apps_smmu 0x706 0x1>,
+ <&apps_smmu 0x714 0x1>,
+@@ -2331,8 +2332,8 @@
+ compatible = "qcom,crypto-v5.4";
+ reg = <0 0x01dfa000 0 0x6000>;
+ clocks = <&gcc GCC_CE1_AHB_CLK>,
+- <&gcc GCC_CE1_AHB_CLK>,
+- <&rpmhcc 15>;
++ <&gcc GCC_CE1_AXI_CLK>,
++ <&rpmhcc RPMH_CE_CLK>;
+ clock-names = "iface", "bus", "core";
+ dmas = <&cryptobam 6>, <&cryptobam 7>;
+ dma-names = "rx", "tx";
+@@ -3608,10 +3609,10 @@
+ #clock-cells = <0>;
+ clock-frequency = <9600000>;
+ clock-output-names = "mclk";
+- qcom,micbias1-millivolt = <1800>;
+- qcom,micbias2-millivolt = <1800>;
+- qcom,micbias3-millivolt = <1800>;
+- qcom,micbias4-millivolt = <1800>;
++ qcom,micbias1-microvolt = <1800000>;
++ qcom,micbias2-microvolt = <1800000>;
++ qcom,micbias3-microvolt = <1800000>;
++ qcom,micbias4-microvolt = <1800000>;
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -3699,7 +3700,7 @@
+ <&gcc GCC_USB3_PHY_PRIM_BCR>;
+ reset-names = "phy", "common";
+
+- usb_1_ssphy: lanes@88e9200 {
++ usb_1_ssphy: phy@88e9200 {
+ reg = <0 0x088e9200 0 0x128>,
+ <0 0x088e9400 0 0x200>,
+ <0 0x088e9c00 0 0x218>,
+@@ -3732,7 +3733,7 @@
+ <&gcc GCC_USB3_PHY_SEC_BCR>;
+ reset-names = "phy", "common";
+
+- usb_2_ssphy: lane@88eb200 {
++ usb_2_ssphy: phy@88eb200 {
+ reg = <0 0x088eb200 0 0x128>,
+ <0 0x088eb400 0 0x1fc>,
+ <0 0x088eb800 0 0x218>,
+@@ -3921,7 +3922,7 @@
+ #reset-cells = <1>;
+ };
+
+- camss: camss@a00000 {
++ camss: camss@acb3000 {
+ compatible = "qcom,sdm845-camss";
+
+ reg = <0 0xacb3000 0 0x1000>,
+@@ -4147,7 +4148,7 @@
+
+ power-domains = <&dispcc MDSS_GDSC>;
+
+- clocks = <&gcc GCC_DISP_AHB_CLK>,
++ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&dispcc DISP_CC_MDSS_MDP_CLK>;
+ clock-names = "iface", "core";
+
+diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+index 2ba23aa582a18..834fb463f99ec 100644
+--- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
++++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts
+@@ -475,8 +475,10 @@
+ };
+
+ &qup_i2c12_default {
+- drive-strength = <2>;
+- bias-disable;
++ pinmux {
++ drive-strength = <2>;
++ bias-disable;
++ };
+ };
+
+ &qup_uart6_default {
+@@ -518,6 +520,10 @@
+ dai@1 {
+ reg = <1>;
+ };
++
++ dai@2 {
++ reg = <2>;
++ };
+ };
+
+ &sound {
+@@ -530,6 +536,7 @@
+ "SpkrLeft IN", "SPK1 OUT",
+ "SpkrRight IN", "SPK2 OUT",
+ "MM_DL1", "MultiMedia1 Playback",
++ "MM_DL3", "MultiMedia3 Playback",
+ "MultiMedia2 Capture", "MM_UL2";
+
+ mm1-dai-link {
+@@ -546,6 +553,13 @@
+ };
+ };
+
++ mm3-dai-link {
++ link-name = "MultiMedia3";
++ cpu {
++ sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA3>;
++ };
++ };
++
+ slim-dai-link {
+ link-name = "SLIM Playback";
+ cpu {
+@@ -575,6 +589,21 @@
+ sound-dai = <&wcd9340 1>;
+ };
+ };
++
++ slim-wcd-dai-link {
++ link-name = "SLIM WCD Playback";
++ cpu {
++ sound-dai = <&q6afedai SLIMBUS_1_RX>;
++ };
++
++ platform {
++ sound-dai = <&q6routing>;
++ };
++
++ codec {
++ sound-dai = <&wcd9340 2>;
++ };
++ };
+ };
+
+ &tlmm {
+diff --git a/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts b/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts
+index 58b6b2742d3f9..47f8e5397ebba 100644
+--- a/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts
++++ b/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts
+@@ -88,11 +88,19 @@
+ status = "okay";
+ };
+
+-&sdc2_state_off {
++&sdc2_off_state {
+ sd-cd {
+ pins = "gpio98";
++ drive-strength = <2>;
+ bias-disable;
++ };
++};
++
++&sdc2_on_state {
++ sd-cd {
++ pins = "gpio98";
+ drive-strength = <2>;
++ bias-pull-up;
+ };
+ };
+
+@@ -102,32 +110,6 @@
+
+ &tlmm {
+ gpio-reserved-ranges = <22 2>, <28 6>;
+-
+- sdc2_state_on: sdc2-on {
+- clk {
+- pins = "sdc2_clk";
+- bias-disable;
+- drive-strength = <16>;
+- };
+-
+- cmd {
+- pins = "sdc2_cmd";
+- bias-pull-up;
+- drive-strength = <10>;
+- };
+-
+- data {
+- pins = "sdc2_data";
+- bias-pull-up;
+- drive-strength = <10>;
+- };
+-
+- sd-cd {
+- pins = "gpio98";
+- bias-pull-up;
+- drive-strength = <2>;
+- };
+- };
+ };
+
+ &usb3 {
+diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi
+index 2b37ce6a9f9c5..2e4fe2bc1e0a8 100644
+--- a/arch/arm64/boot/dts/qcom/sm6125.dtsi
++++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi
+@@ -336,23 +336,43 @@
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+- sdc2_state_off: sdc2-off {
++ sdc2_off_state: sdc2-off-state {
+ clk {
+ pins = "sdc2_clk";
+- bias-disable;
+ drive-strength = <2>;
++ bias-disable;
+ };
+
+ cmd {
+ pins = "sdc2_cmd";
++ drive-strength = <2>;
+ bias-pull-up;
++ };
++
++ data {
++ pins = "sdc2_data";
+ drive-strength = <2>;
++ bias-pull-up;
++ };
++ };
++
++ sdc2_on_state: sdc2-on-state {
++ clk {
++ pins = "sdc2_clk";
++ drive-strength = <16>;
++ bias-disable;
++ };
++
++ cmd {
++ pins = "sdc2_cmd";
++ drive-strength = <10>;
++ bias-pull-up;
+ };
+
+ data {
+ pins = "sdc2_data";
++ drive-strength = <10>;
+ bias-pull-up;
+- drive-strength = <2>;
+ };
+ };
+ };
+@@ -372,15 +392,15 @@
+ reg = <0x01613000 0x180>;
+ #phy-cells = <0>;
+
+- clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+- <&gcc GCC_AHB2PHY_USB_CLK>;
+- clock-names = "ref", "cfg_ahb";
++ clocks = <&gcc GCC_AHB2PHY_USB_CLK>,
++ <&rpmcc RPM_SMD_XO_CLK_SRC>;
++ clock-names = "cfg_ahb", "ref";
+
+ resets = <&gcc GCC_QUSB2PHY_PRIM_BCR>;
+ status = "disabled";
+ };
+
+- rpm_msg_ram: memory@45f0000 {
++ rpm_msg_ram: sram@45f0000 {
+ compatible = "qcom,rpm-msg-ram";
+ reg = <0x045f0000 0x7000>;
+ };
+@@ -388,7 +408,7 @@
+ sdhc_1: sdhci@4744000 {
+ compatible = "qcom,sm6125-sdhci", "qcom,sdhci-msm-v5";
+ reg = <0x04744000 0x1000>, <0x04745000 0x1000>;
+- reg-names = "hc", "core";
++ reg-names = "hc", "cqhci";
+
+ interrupts = <GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>;
+@@ -417,8 +437,8 @@
+ <&xo_board>;
+ clock-names = "iface", "core", "xo";
+
+- pinctrl-0 = <&sdc2_state_on>;
+- pinctrl-1 = <&sdc2_state_off>;
++ pinctrl-0 = <&sdc2_on_state>;
++ pinctrl-1 = <&sdc2_off_state>;
+ pinctrl-names = "default", "sleep";
+
+ bus-width = <4>;
+diff --git a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi
+index 014fe3a315489..04c71f74ab72d 100644
+--- a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi
++++ b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi
+@@ -33,9 +33,10 @@
+ framebuffer: framebuffer@9c000000 {
+ compatible = "simple-framebuffer";
+ reg = <0 0x9c000000 0 0x2300000>;
+- width = <1644>;
+- height = <3840>;
+- stride = <(1644 * 4)>;
++ /* Griffin BL initializes in 2.5k mode, not 4k */
++ width = <1096>;
++ height = <2560>;
++ stride = <(1096 * 4)>;
+ format = "a8r8g8b8";
+ /*
+ * That's (going to be) a lot of clocks, but it's necessary due
+@@ -348,6 +349,9 @@
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7c_3p0: ldo7 {
+@@ -367,6 +371,9 @@
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l10c_3p3: ldo10 {
+diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
+index ef0232c2cf45b..e8cb20c4cbf22 100644
+--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
++++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
+@@ -1131,7 +1131,7 @@
+ clocks = <&gcc GCC_QUPV3_WRAP0_S7_CLK>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&qup_i2c7_default>;
+- interrupts = <GIC_SPI 607 IRQ_TYPE_LEVEL_HIGH>;
++ interrupts = <GIC_SPI 608 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+@@ -1692,12 +1692,12 @@
+ reset-names = "ufsphy";
+ status = "disabled";
+
+- ufs_mem_phy_lanes: lanes@1d87400 {
+- reg = <0 0x01d87400 0 0x108>,
+- <0 0x01d87600 0 0x1e0>,
+- <0 0x01d87c00 0 0x1dc>,
+- <0 0x01d87800 0 0x108>,
+- <0 0x01d87a00 0 0x1e0>;
++ ufs_mem_phy_lanes: phy@1d87400 {
++ reg = <0 0x01d87400 0 0x16c>,
++ <0 0x01d87600 0 0x200>,
++ <0 0x01d87c00 0 0x200>,
++ <0 0x01d87800 0 0x16c>,
++ <0 0x01d87a00 0 0x200>;
+ #phy-cells = <0>;
+ };
+ };
+@@ -3010,7 +3010,7 @@
+ <&gcc GCC_USB3_PHY_PRIM_BCR>;
+ reset-names = "phy", "common";
+
+- usb_1_ssphy: lanes@88e9200 {
++ usb_1_ssphy: phy@88e9200 {
+ reg = <0 0x088e9200 0 0x200>,
+ <0 0x088e9400 0 0x200>,
+ <0 0x088e9c00 0 0x218>,
+@@ -3043,7 +3043,7 @@
+ <&gcc GCC_USB3_PHY_SEC_BCR>;
+ reset-names = "phy", "common";
+
+- usb_2_ssphy: lane@88eb200 {
++ usb_2_ssphy: phy@88eb200 {
+ reg = <0 0x088eb200 0 0x200>,
+ <0 0x088eb400 0 0x200>,
+ <0 0x088eb800 0 0x800>,
+@@ -3434,9 +3434,9 @@
+ qcom,tcs-offset = <0xd00>;
+ qcom,drv-id = <2>;
+ qcom,tcs-config = <ACTIVE_TCS 2>,
+- <SLEEP_TCS 1>,
+- <WAKE_TCS 1>,
+- <CONTROL_TCS 0>;
++ <SLEEP_TCS 3>,
++ <WAKE_TCS 3>,
++ <CONTROL_TCS 1>;
+
+ rpmhcc: clock-controller {
+ compatible = "qcom,sm8150-rpmh-clk";
+diff --git a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx203.dts b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx203.dts
+index 79afeb07f4a24..792911af1637b 100644
+--- a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx203.dts
++++ b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx203.dts
+@@ -13,3 +13,236 @@
+ };
+
+ /delete-node/ &vreg_l7f_1p8;
++
++&pm8009_gpios {
++ gpio-line-names = "NC", /* GPIO_1 */
++ "CAM_PWR_LD_EN",
++ "WIDEC_PWR_EN",
++ "NC";
++};
++
++&pm8150_gpios {
++ gpio-line-names = "VOL_DOWN_N", /* GPIO_1 */
++ "OPTION_2",
++ "NC",
++ "PM_SLP_CLK_IN",
++ "OPTION_1",
++ "NC",
++ "NC",
++ "SP_ARI_PWR_ALARM",
++ "NC",
++ "NC"; /* GPIO_10 */
++};
++
++&pm8150b_gpios {
++ gpio-line-names = "SNAPSHOT_N", /* GPIO_1 */
++ "FOCUS_N",
++ "NC",
++ "NC",
++ "RF_LCD_ID_EN",
++ "NC",
++ "NC",
++ "LCD_ID",
++ "NC",
++ "WLC_EN_N", /* GPIO_10 */
++ "NC",
++ "RF_ID";
++};
++
++&pm8150l_gpios {
++ gpio-line-names = "NC", /* GPIO_1 */
++ "PM3003A_EN",
++ "NC",
++ "NC",
++ "NC",
++ "AUX2_THERM",
++ "BB_HP_EN",
++ "FP_LDO_EN",
++ "PMX_RESET_N",
++ "AUX3_THERM", /* GPIO_10 */
++ "DTV_PWR_EN",
++ "PM3003A_MODE";
++};
++
++&tlmm {
++ gpio-line-names = "AP_CTI_IN", /* GPIO_0 */
++ "MDM2AP_ERR_FATAL",
++ "AP_CTI_OUT",
++ "MDM2AP_STATUS",
++ "NFC_I2C_SDA",
++ "NFC_I2C_SCL",
++ "NFC_EN",
++ "NFC_CLK_REQ",
++ "NFC_ESE_PWR_REQ",
++ "DVDT_WRT_DET_AND",
++ "SPK_AMP_RESET_N", /* GPIO_10 */
++ "SPK_AMP_INT_N",
++ "APPS_I2C_1_SDA",
++ "APPS_I2C_1_SCL",
++ "NC",
++ "TX_GTR_THRES_IN",
++ "HST_BT_UART_CTS",
++ "HST_BT_UART_RFR",
++ "HST_BT_UART_TX",
++ "HST_BT_UART_RX",
++ "HST_WLAN_EN", /* GPIO_20 */
++ "HST_BT_EN",
++ "RGBC_IR_PWR_EN",
++ "FP_INT_N",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NFC_ESE_SPI_MISO",
++ "NFC_ESE_SPI_MOSI",
++ "NFC_ESE_SPI_SCLK", /* GPIO_30 */
++ "NFC_ESE_SPI_CS_N",
++ "WCD_RST_N",
++ "NC",
++ "SDM_DEBUG_UART_TX",
++ "SDM_DEBUG_UART_RX",
++ "TS_I2C_SDA",
++ "TS_I2C_SCL",
++ "TS_INT_N",
++ "FP_SPI_MISO", /* GPIO_40 */
++ "FP_SPI_MOSI",
++ "FP_SPI_SCLK",
++ "FP_SPI_CS_N",
++ "APPS_I2C_0_SDA",
++ "APPS_I2C_0_SCL",
++ "DISP_ERR_FG",
++ "UIM2_DETECT_EN",
++ "NC",
++ "NC",
++ "NC", /* GPIO_50 */
++ "NC",
++ "MDM_UART_CTS",
++ "MDM_UART_RFR",
++ "MDM_UART_TX",
++ "MDM_UART_RX",
++ "AP2MDM_STATUS",
++ "AP2MDM_ERR_FATAL",
++ "MDM_IPC_HS_UART_TX",
++ "MDM_IPC_HS_UART_RX",
++ "NC", /* GPIO_60 */
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "USB_CC_DIR",
++ "DISP_VSYNC",
++ "NC",
++ "NC",
++ "CAM_PWR_B_CS",
++ "NC", /* GPIO_70 */
++ "CAM_PWR_A_CS",
++ "SBU_SW_SEL",
++ "SBU_SW_OE",
++ "FP_RESET_N",
++ "FP_RESET_N",
++ "DISP_RESET_N",
++ "DEBUG_GPIO0",
++ "TRAY_DET",
++ "CAM2_RST_N",
++ "PCIE0_RST_N",
++ "PCIE0_CLK_REQ_N", /* GPIO_80 */
++ "PCIE0_WAKE_N",
++ "DVDT_ENABLE",
++ "DVDT_WRT_DET_OR",
++ "NC",
++ "PCIE2_RST_N",
++ "PCIE2_CLK_REQ_N",
++ "PCIE2_WAKE_N",
++ "MDM_VFR_IRQ0",
++ "MDM_VFR_IRQ1",
++ "SW_SERVICE", /* GPIO_90 */
++ "CAM_SOF",
++ "CAM1_RST_N",
++ "CAM0_RST_N",
++ "CAM0_MCLK",
++ "CAM1_MCLK",
++ "CAM2_MCLK",
++ "CAM3_MCLK",
++ "CAM4_MCLK",
++ "TOF_RST_N",
++ "NC", /* GPIO_100 */
++ "CCI0_I2C_SDA",
++ "CCI0_I2C_SCL",
++ "CCI1_I2C_SDA",
++ "CCI1_I2C_SCL_",
++ "CCI2_I2C_SDA",
++ "CCI2_I2C_SCL",
++ "CCI3_I2C_SDA",
++ "CCI3_I2C_SCL",
++ "CAM3_RST_N",
++ "NFC_DWL_REQ", /* GPIO_110 */
++ "NFC_IRQ",
++ "XVS",
++ "NC",
++ "RF_ID_EXTENSION",
++ "SPK_AMP_I2C_SDA",
++ "SPK_AMP_I2C_SCL",
++ "NC",
++ "NC",
++ "WLC_I2C_SDA",
++ "WLC_I2C_SCL", /* GPIO_120 */
++ "ACC_COVER_OPEN",
++ "ALS_PROX_INT_N",
++ "ACCEL_INT",
++ "WLAN_SW_CTRL",
++ "CAMSENSOR_I2C_SDA",
++ "CAMSENSOR_I2C_SCL",
++ "UDON_SWITCH_SEL",
++ "WDOG_DISABLE",
++ "BAROMETER_INT",
++ "NC", /* GPIO_130 */
++ "NC",
++ "FORCED_USB_BOOT",
++ "NC",
++ "NC",
++ "WLC_INT_N",
++ "NC",
++ "NC",
++ "RGBC_IR_INT",
++ "NC",
++ "NC", /* GPIO_140 */
++ "NC",
++ "BT_SLIMBUS_CLK",
++ "BT_SLIMBUS_DATA",
++ "HW_ID_0",
++ "HW_ID_1",
++ "WCD_SWR_TX_CLK",
++ "WCD_SWR_TX_DATA0",
++ "WCD_SWR_TX_DATA1",
++ "WCD_SWR_RX_CLK",
++ "WCD_SWR_RX_DATA0", /* GPIO_150 */
++ "WCD_SWR_RX_DATA1",
++ "SDM_DMIC_CLK1",
++ "SDM_DMIC_DATA1",
++ "SDM_DMIC_CLK2",
++ "SDM_DMIC_DATA2",
++ "SPK_AMP_I2S_CLK",
++ "SPK_AMP_I2S_WS",
++ "SPK_AMP_I2S_ASP_DIN",
++ "SPK_AMP_I2S_ASP_DOUT",
++ "COMPASS_I2C_SDA", /* GPIO_160 */
++ "COMPASS_I2C_SCL",
++ "NC",
++ "NC",
++ "SSC_SPI_1_MISO",
++ "SSC_SPI_1_MOSI",
++ "SSC_SPI_1_CLK",
++ "SSC_SPI_1_CS_N",
++ "NC",
++ "NC",
++ "SSC_SENSOR_I2C_SDA", /* GPIO_170 */
++ "SSC_SENSOR_I2C_SCL",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "HST_BLE_SNS_UART6_TX",
++ "HST_BLE_SNS_UART6_RX",
++ "HST_WLAN_UART_TX",
++ "HST_WLAN_UART_RX";
++};
+diff --git a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx206.dts b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx206.dts
+index 16c96e8385348..b0f4ecc911144 100644
+--- a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx206.dts
++++ b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo-pdx206.dts
+@@ -19,6 +19,8 @@
+ };
+
+ &gpio_keys {
++ pinctrl-0 = <&focus_n &snapshot_n &vol_down_n &g_assist_n>;
++
+ g-assist-key {
+ label = "Google Assistant Key";
+ linux,code = <KEY_LEFTMETA>;
+@@ -29,6 +31,247 @@
+ };
+ };
+
++&pm8009_gpios {
++ gpio-line-names = "NC", /* GPIO_1 */
++ "NC",
++ "WIDEC_PWR_EN",
++ "NC";
++};
++
++&pm8150_gpios {
++ gpio-line-names = "VOL_DOWN_N", /* GPIO_1 */
++ "OPTION_2",
++ "NC",
++ "PM_SLP_CLK_IN",
++ "OPTION_1",
++ "G_ASSIST_N",
++ "NC",
++ "SP_ARI_PWR_ALARM",
++ "NC",
++ "NC"; /* GPIO_10 */
++
++ g_assist_n: g-assist-n-state {
++ pins = "gpio6";
++ function = "normal";
++ power-source = <1>;
++ bias-pull-up;
++ input-enable;
++ };
++};
++
++&pm8150b_gpios {
++ gpio-line-names = "SNAPSHOT_N", /* GPIO_1 */
++ "FOCUS_N",
++ "NC",
++ "NC",
++ "RF_LCD_ID_EN",
++ "NC",
++ "NC",
++ "LCD_ID",
++ "NC",
++ "NC", /* GPIO_10 */
++ "NC",
++ "RF_ID";
++};
++
++&pm8150l_gpios {
++ gpio-line-names = "NC", /* GPIO_1 */
++ "PM3003A_EN",
++ "NC",
++ "NC",
++ "NC",
++ "AUX2_THERM",
++ "BB_HP_EN",
++ "FP_LDO_EN",
++ "PMX_RESET_N",
++ "NC", /* GPIO_10 */
++ "NC",
++ "PM3003A_MODE";
++};
++
++&tlmm {
++ gpio-line-names = "AP_CTI_IN", /* GPIO_0 */
++ "MDM2AP_ERR_FATAL",
++ "AP_CTI_OUT",
++ "MDM2AP_STATUS",
++ "NFC_I2C_SDA",
++ "NFC_I2C_SCL",
++ "NFC_EN",
++ "NFC_CLK_REQ",
++ "NFC_ESE_PWR_REQ",
++ "DVDT_WRT_DET_AND",
++ "SPK_AMP_RESET_N", /* GPIO_10 */
++ "SPK_AMP_INT_N",
++ "APPS_I2C_1_SDA",
++ "APPS_I2C_1_SCL",
++ "NC",
++ "TX_GTR_THRES_IN",
++ "HST_BT_UART_CTS",
++ "HST_BT_UART_RFR",
++ "HST_BT_UART_TX",
++ "HST_BT_UART_RX",
++ "HST_WLAN_EN", /* GPIO_20 */
++ "HST_BT_EN",
++ "RGBC_IR_PWR_EN",
++ "FP_INT_N",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NFC_ESE_SPI_MISO",
++ "NFC_ESE_SPI_MOSI",
++ "NFC_ESE_SPI_SCLK", /* GPIO_30 */
++ "NFC_ESE_SPI_CS_N",
++ "WCD_RST_N",
++ "NC",
++ "SDM_DEBUG_UART_TX",
++ "SDM_DEBUG_UART_RX",
++ "TS_I2C_SDA",
++ "TS_I2C_SCL",
++ "TS_INT_N",
++ "FP_SPI_MISO", /* GPIO_40 */
++ "FP_SPI_MOSI",
++ "FP_SPI_SCLK",
++ "FP_SPI_CS_N",
++ "APPS_I2C_0_SDA",
++ "APPS_I2C_0_SCL",
++ "DISP_ERR_FG",
++ "UIM2_DETECT_EN",
++ "NC",
++ "NC",
++ "NC", /* GPIO_50 */
++ "NC",
++ "MDM_UART_CTS",
++ "MDM_UART_RFR",
++ "MDM_UART_TX",
++ "MDM_UART_RX",
++ "AP2MDM_STATUS",
++ "AP2MDM_ERR_FATAL",
++ "MDM_IPC_HS_UART_TX",
++ "MDM_IPC_HS_UART_RX",
++ "NC", /* GPIO_60 */
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "USB_CC_DIR",
++ "DISP_VSYNC",
++ "NC",
++ "NC",
++ "CAM_PWR_B_CS",
++ "NC", /* GPIO_70 */
++ "FRONTC_PWR_EN",
++ "SBU_SW_SEL",
++ "SBU_SW_OE",
++ "FP_RESET_N",
++ "FP_RESET_N",
++ "DISP_RESET_N",
++ "DEBUG_GPIO0",
++ "TRAY_DET",
++ "CAM2_RST_N",
++ "PCIE0_RST_N",
++ "PCIE0_CLK_REQ_N", /* GPIO_80 */
++ "PCIE0_WAKE_N",
++ "DVDT_ENABLE",
++ "DVDT_WRT_DET_OR",
++ "NC",
++ "PCIE2_RST_N",
++ "PCIE2_CLK_REQ_N",
++ "PCIE2_WAKE_N",
++ "MDM_VFR_IRQ0",
++ "MDM_VFR_IRQ1",
++ "SW_SERVICE", /* GPIO_90 */
++ "CAM_SOF",
++ "CAM1_RST_N",
++ "CAM0_RST_N",
++ "CAM0_MCLK",
++ "CAM1_MCLK",
++ "CAM2_MCLK",
++ "CAM3_MCLK",
++ "NC",
++ "NC",
++ "NC", /* GPIO_100 */
++ "CCI0_I2C_SDA",
++ "CCI0_I2C_SCL",
++ "CCI1_I2C_SDA",
++ "CCI1_I2C_SCL_",
++ "CCI2_I2C_SDA",
++ "CCI2_I2C_SCL",
++ "CCI3_I2C_SDA",
++ "CCI3_I2C_SCL",
++ "CAM3_RST_N",
++ "NFC_DWL_REQ", /* GPIO_110 */
++ "NFC_IRQ",
++ "XVS",
++ "NC",
++ "RF_ID_EXTENSION",
++ "SPK_AMP_I2C_SDA",
++ "SPK_AMP_I2C_SCL",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "ACC_COVER_OPEN",
++ "ALS_PROX_INT_N",
++ "ACCEL_INT",
++ "WLAN_SW_CTRL",
++ "CAMSENSOR_I2C_SDA",
++ "CAMSENSOR_I2C_SCL",
++ "UDON_SWITCH_SEL",
++ "WDOG_DISABLE",
++ "BAROMETER_INT",
++ "NC", /* GPIO_130 */
++ "NC",
++ "FORCED_USB_BOOT",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "RGBC_IR_INT",
++ "NC",
++ "NC", /* GPIO_140 */
++ "NC",
++ "BT_SLIMBUS_CLK",
++ "BT_SLIMBUS_DATA",
++ "HW_ID_0",
++ "HW_ID_1",
++ "WCD_SWR_TX_CLK",
++ "WCD_SWR_TX_DATA0",
++ "WCD_SWR_TX_DATA1",
++ "WCD_SWR_RX_CLK",
++ "WCD_SWR_RX_DATA0", /* GPIO_150 */
++ "WCD_SWR_RX_DATA1",
++ "SDM_DMIC_CLK1",
++ "SDM_DMIC_DATA1",
++ "SDM_DMIC_CLK2",
++ "SDM_DMIC_DATA2",
++ "SPK_AMP_I2S_CLK",
++ "SPK_AMP_I2S_WS",
++ "SPK_AMP_I2S_ASP_DIN",
++ "SPK_AMP_I2S_ASP_DOUT",
++ "COMPASS_I2C_SDA", /* GPIO_160 */
++ "COMPASS_I2C_SCL",
++ "NC",
++ "NC",
++ "SSC_SPI_1_MISO",
++ "SSC_SPI_1_MOSI",
++ "SSC_SPI_1_CLK",
++ "SSC_SPI_1_CS_N",
++ "NC",
++ "NC",
++ "SSC_SENSOR_I2C_SDA", /* GPIO_170 */
++ "SSC_SENSOR_I2C_SCL",
++ "NC",
++ "NC",
++ "NC",
++ "NC",
++ "HST_BLE_SNS_UART6_TX",
++ "HST_BLE_SNS_UART6_RX",
++ "HST_WLAN_UART_TX",
++ "HST_WLAN_UART_RX";
++};
++
+ &vreg_l2f_1p3 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+diff --git a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi
+index d63f7a9bc4e9a..e622cbe167b0d 100644
+--- a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi
++++ b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi
+@@ -26,9 +26,10 @@
+ framebuffer: framebuffer@9c000000 {
+ compatible = "simple-framebuffer";
+ reg = <0 0x9c000000 0 0x2300000>;
+- width = <1644>;
+- height = <3840>;
+- stride = <(1644 * 4)>;
++ /* pdx203 BL initializes in 2.5k mode, not 4k */
++ width = <1096>;
++ height = <2560>;
++ stride = <(1096 * 4)>;
+ format = "a8r8g8b8";
+ /*
+ * That's a lot of clocks, but it's necessary due
+@@ -50,12 +51,26 @@
+ gpio_keys: gpio-keys {
+ compatible = "gpio-keys";
+
+- /*
+- * Camera focus (light press) and camera snapshot (full press)
+- * seem not to work properly.. Adding the former one stalls the CPU
+- * and the latter kills the volume down key for whatever reason. In any
+- * case, they are both on &pm8150b_gpios: camera focus(2), camera snapshot(1).
+- */
++ pinctrl-0 = <&focus_n &snapshot_n &vol_down_n>;
++ pinctrl-names = "default";
++
++ key-camera-focus {
++ label = "Camera Focus";
++ linux,code = <KEY_CAMERA_FOCUS>;
++ gpios = <&pm8150b_gpios 2 GPIO_ACTIVE_LOW>;
++ debounce-interval = <15>;
++ linux,can-disable;
++ wakeup-source;
++ };
++
++ key-camera-snapshot {
++ label = "Camera Snapshot";
++ linux,code = <KEY_CAMERA>;
++ gpios = <&pm8150b_gpios 1 GPIO_ACTIVE_LOW>;
++ debounce-interval = <15>;
++ linux,can-disable;
++ wakeup-source;
++ };
+
+ vol-down {
+ label = "Volume Down";
+@@ -317,6 +332,9 @@
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7c_2p85: ldo7 {
+@@ -339,6 +357,9 @@
+ regulator-max-microvolt = <2960000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l10c_3p3: ldo10 {
+@@ -511,6 +532,34 @@
+ vdda-pll-supply = <&vreg_l9a_1p2>;
+ };
+
++&pm8150_gpios {
++ vol_down_n: vol-down-n-state {
++ pins = "gpio1";
++ function = "normal";
++ power-source = <0>;
++ bias-pull-up;
++ input-enable;
++ };
++};
++
++&pm8150b_gpios {
++ snapshot_n: snapshot-n-state {
++ pins = "gpio1";
++ function = "normal";
++ power-source = <0>;
++ bias-pull-up;
++ input-enable;
++ };
++
++ focus_n: focus-n-state {
++ pins = "gpio2";
++ function = "normal";
++ power-source = <0>;
++ bias-pull-up;
++ input-enable;
++ };
++};
++
+ &pon_pwrkey {
+ status = "okay";
+ };
+@@ -585,7 +634,7 @@
+ pins = "gpio39";
+ function = "gpio";
+ drive-strength = <2>;
+- bias-disabled;
++ bias-disable;
+ input-enable;
+ };
+
+diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
+index d12e4cbfc8527..5d6551e1fcd8d 100644
+--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
++++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
+@@ -97,7 +97,7 @@
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ capacity-dmips-mhz = <448>;
+- dynamic-power-coefficient = <205>;
++ dynamic-power-coefficient = <105>;
+ next-level-cache = <&L2_0>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ #cooling-cells = <2>;
+@@ -116,7 +116,7 @@
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ capacity-dmips-mhz = <448>;
+- dynamic-power-coefficient = <205>;
++ dynamic-power-coefficient = <105>;
+ next-level-cache = <&L2_100>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ #cooling-cells = <2>;
+@@ -132,7 +132,7 @@
+ reg = <0x0 0x200>;
+ enable-method = "psci";
+ capacity-dmips-mhz = <448>;
+- dynamic-power-coefficient = <205>;
++ dynamic-power-coefficient = <105>;
+ next-level-cache = <&L2_200>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ #cooling-cells = <2>;
+@@ -148,7 +148,7 @@
+ reg = <0x0 0x300>;
+ enable-method = "psci";
+ capacity-dmips-mhz = <448>;
+- dynamic-power-coefficient = <205>;
++ dynamic-power-coefficient = <105>;
+ next-level-cache = <&L2_300>;
+ qcom,freq-domain = <&cpufreq_hw 0>;
+ #cooling-cells = <2>;
+@@ -1393,8 +1393,8 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x01000000 0x0 0x60200000 0 0x60200000 0x0 0x100000>,
+- <0x02000000 0x0 0x60300000 0 0x60300000 0x0 0x3d00000>;
++ ranges = <0x01000000 0x0 0x00000000 0x0 0x60200000 0x0 0x100000>,
++ <0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0x3d00000>;
+
+ interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+@@ -1434,11 +1434,12 @@
+ phys = <&pcie0_lane>;
+ phy-names = "pciephy";
+
+- perst-gpio = <&tlmm 79 GPIO_ACTIVE_LOW>;
+- enable-gpio = <&tlmm 81 GPIO_ACTIVE_HIGH>;
++ perst-gpios = <&tlmm 79 GPIO_ACTIVE_LOW>;
++ wake-gpios = <&tlmm 81 GPIO_ACTIVE_HIGH>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie0_default_state>;
++ dma-coherent;
+
+ status = "disabled";
+ };
+@@ -1463,7 +1464,7 @@
+
+ status = "disabled";
+
+- pcie0_lane: lanes@1c06200 {
++ pcie0_lane: phy@1c06200 {
+ reg = <0 0x1c06200 0 0x170>, /* tx */
+ <0 0x1c06400 0 0x200>, /* rx */
+ <0 0x1c06800 0 0x1f0>, /* pcs */
+@@ -1472,6 +1473,8 @@
+ clock-names = "pipe0";
+
+ #phy-cells = <0>;
++
++ #clock-cells = <0>;
+ clock-output-names = "pcie_0_pipe_clk";
+ };
+ };
+@@ -1492,10 +1495,10 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x01000000 0x0 0x40200000 0x0 0x40200000 0x0 0x100000>,
++ ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
+ <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
+
+- interrupts = <GIC_SPI 306 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0x7>;
+@@ -1538,11 +1541,12 @@
+ phys = <&pcie1_lane>;
+ phy-names = "pciephy";
+
+- perst-gpio = <&tlmm 82 GPIO_ACTIVE_LOW>;
+- enable-gpio = <&tlmm 84 GPIO_ACTIVE_HIGH>;
++ perst-gpios = <&tlmm 82 GPIO_ACTIVE_LOW>;
++ wake-gpios = <&tlmm 84 GPIO_ACTIVE_HIGH>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie1_default_state>;
++ dma-coherent;
+
+ status = "disabled";
+ };
+@@ -1567,7 +1571,7 @@
+
+ status = "disabled";
+
+- pcie1_lane: lanes@1c0e200 {
++ pcie1_lane: phy@1c0e200 {
+ reg = <0 0x1c0e200 0 0x170>, /* tx0 */
+ <0 0x1c0e400 0 0x200>, /* rx0 */
+ <0 0x1c0ea00 0 0x1f0>, /* pcs */
+@@ -1578,6 +1582,8 @@
+ clock-names = "pipe0";
+
+ #phy-cells = <0>;
++
++ #clock-cells = <0>;
+ clock-output-names = "pcie_1_pipe_clk";
+ };
+ };
+@@ -1598,10 +1604,10 @@
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+- ranges = <0x01000000 0x0 0x64200000 0x0 0x64200000 0x0 0x100000>,
++ ranges = <0x01000000 0x0 0x00000000 0x0 0x64200000 0x0 0x100000>,
+ <0x02000000 0x0 0x64300000 0x0 0x64300000 0x0 0x3d00000>;
+
+- interrupts = <GIC_SPI 236 IRQ_TYPE_EDGE_RISING>;
++ interrupts = <GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0x7>;
+@@ -1644,11 +1650,12 @@
+ phys = <&pcie2_lane>;
+ phy-names = "pciephy";
+
+- perst-gpio = <&tlmm 85 GPIO_ACTIVE_LOW>;
+- enable-gpio = <&tlmm 87 GPIO_ACTIVE_HIGH>;
++ perst-gpios = <&tlmm 85 GPIO_ACTIVE_LOW>;
++ wake-gpios = <&tlmm 87 GPIO_ACTIVE_HIGH>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pcie2_default_state>;
++ dma-coherent;
+
+ status = "disabled";
+ };
+@@ -1673,7 +1680,7 @@
+
+ status = "disabled";
+
+- pcie2_lane: lanes@1c16200 {
++ pcie2_lane: phy@1c16200 {
+ reg = <0 0x1c16200 0 0x170>, /* tx0 */
+ <0 0x1c16400 0 0x200>, /* rx0 */
+ <0 0x1c16a00 0 0x1f0>, /* pcs */
+@@ -1684,6 +1691,8 @@
+ clock-names = "pipe0";
+
+ #phy-cells = <0>;
++
++ #clock-cells = <0>;
+ clock-output-names = "pcie_2_pipe_clk";
+ };
+ };
+@@ -1750,12 +1759,12 @@
+ reset-names = "ufsphy";
+ status = "disabled";
+
+- ufs_mem_phy_lanes: lanes@1d87400 {
+- reg = <0 0x01d87400 0 0x108>,
+- <0 0x01d87600 0 0x1e0>,
+- <0 0x01d87c00 0 0x1dc>,
+- <0 0x01d87800 0 0x108>,
+- <0 0x01d87a00 0 0x1e0>;
++ ufs_mem_phy_lanes: phy@1d87400 {
++ reg = <0 0x01d87400 0 0x16c>,
++ <0 0x01d87600 0 0x200>,
++ <0 0x01d87c00 0 0x200>,
++ <0 0x01d87800 0 0x16c>,
++ <0 0x01d87a00 0 0x200>;
+ #phy-cells = <0>;
+ };
+ };
+@@ -1927,7 +1936,7 @@
+ pins = "gpio7";
+ function = "dmic1_data";
+ drive-strength = <2>;
+- pull-down;
++ bias-pull-down;
+ input-enable;
+ };
+ };
+@@ -2300,15 +2309,11 @@
+ dp_phy: dp-phy@88ea200 {
+ reg = <0 0x088ea200 0 0x200>,
+ <0 0x088ea400 0 0x200>,
+- <0 0x088eac00 0 0x400>,
++ <0 0x088eaa00 0 0x200>,
+ <0 0x088ea600 0 0x200>,
+- <0 0x088ea800 0 0x200>,
+- <0 0x088eaa00 0 0x100>;
++ <0 0x088ea800 0 0x200>;
+ #phy-cells = <0>;
+ #clock-cells = <1>;
+- clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>;
+- clock-names = "pipe0";
+- clock-output-names = "usb3_phy_pipe_clk_src";
+ };
+ };
+
+@@ -2330,7 +2335,7 @@
+ <&gcc GCC_USB3_PHY_SEC_BCR>;
+ reset-names = "phy", "common";
+
+- usb_2_ssphy: lanes@88eb200 {
++ usb_2_ssphy: phy@88eb200 {
+ reg = <0 0x088eb200 0 0x200>,
+ <0 0x088eb400 0 0x200>,
+ <0 0x088eb800 0 0x800>;
+diff --git a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts
+index 56093e260ddfd..9ea0d7233add0 100644
+--- a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts
++++ b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts
+@@ -108,6 +108,9 @@
+ regulator-max-microvolt = <888000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l6b_1p2: ldo6 {
+@@ -116,6 +119,9 @@
+ regulator-max-microvolt = <1208000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l7b_2p96: ldo7 {
+@@ -124,6 +130,9 @@
+ regulator-max-microvolt = <2504000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+
+ vreg_l9b_1p2: ldo9 {
+@@ -132,6 +141,9 @@
+ regulator-max-microvolt = <1200000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ regulator-allow-set-load;
++ regulator-allowed-modes =
++ <RPMH_REGULATOR_MODE_LPM
++ RPMH_REGULATOR_MODE_HPM>;
+ };
+ };
+
+diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
+index e91cd8a5e5356..b0ba63b5869d2 100644
+--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
++++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
+@@ -35,6 +35,24 @@
+ clock-frequency = <32000>;
+ #clock-cells = <0>;
+ };
++
++ ufs_phy_rx_symbol_0_clk: ufs-phy-rx-symbol-0 {
++ compatible = "fixed-clock";
++ clock-frequency = <1000>;
++ #clock-cells = <0>;
++ };
++
++ ufs_phy_rx_symbol_1_clk: ufs-phy-rx-symbol-1 {
++ compatible = "fixed-clock";
++ clock-frequency = <1000>;
++ #clock-cells = <0>;
++ };
++
++ ufs_phy_tx_symbol_0_clk: ufs-phy-tx-symbol-0 {
++ compatible = "fixed-clock";
++ clock-frequency = <1000>;
++ #clock-cells = <0>;
++ };
+ };
+
+ cpus {
+@@ -43,7 +61,7 @@
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-a55";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ next-level-cache = <&L2_0>;
+@@ -60,7 +78,7 @@
+
+ CPU1: cpu@100 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-a55";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ next-level-cache = <&L2_100>;
+@@ -74,7 +92,7 @@
+
+ CPU2: cpu@200 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-a55";
+ reg = <0x0 0x200>;
+ enable-method = "psci";
+ next-level-cache = <&L2_200>;
+@@ -88,7 +106,7 @@
+
+ CPU3: cpu@300 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-a55";
+ reg = <0x0 0x300>;
+ enable-method = "psci";
+ next-level-cache = <&L2_300>;
+@@ -102,7 +120,7 @@
+
+ CPU4: cpu@400 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-a78";
+ reg = <0x0 0x400>;
+ enable-method = "psci";
+ next-level-cache = <&L2_400>;
+@@ -116,7 +134,7 @@
+
+ CPU5: cpu@500 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-a78";
+ reg = <0x0 0x500>;
+ enable-method = "psci";
+ next-level-cache = <&L2_500>;
+@@ -131,7 +149,7 @@
+
+ CPU6: cpu@600 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-a78";
+ reg = <0x0 0x600>;
+ enable-method = "psci";
+ next-level-cache = <&L2_600>;
+@@ -145,7 +163,7 @@
+
+ CPU7: cpu@700 {
+ device_type = "cpu";
+- compatible = "qcom,kryo685";
++ compatible = "arm,cortex-x1";
+ reg = <0x0 0x700>;
+ enable-method = "psci";
+ next-level-cache = <&L2_700>;
+@@ -443,8 +461,30 @@
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+- clock-names = "bi_tcxo", "sleep_clk";
+- clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>;
++ clock-names = "bi_tcxo",
++ "sleep_clk",
++ "pcie_0_pipe_clk",
++ "pcie_1_pipe_clk",
++ "ufs_card_rx_symbol_0_clk",
++ "ufs_card_rx_symbol_1_clk",
++ "ufs_card_tx_symbol_0_clk",
++ "ufs_phy_rx_symbol_0_clk",
++ "ufs_phy_rx_symbol_1_clk",
++ "ufs_phy_tx_symbol_0_clk",
++ "usb3_phy_wrapper_gcc_usb30_pipe_clk",
++ "usb3_uni_phy_sec_gcc_usb30_pipe_clk";
++ clocks = <&rpmhcc RPMH_CXO_CLK>,
++ <&sleep_clk>,
++ <0>,
++ <0>,
++ <0>,
++ <0>,
++ <0>,
++ <&ufs_phy_rx_symbol_0_clk>,
++ <&ufs_phy_rx_symbol_1_clk>,
++ <&ufs_phy_tx_symbol_0_clk>,
++ <0>,
++ <0>;
+ };
+
+ ipcc: mailbox@408000 {
+@@ -696,6 +736,8 @@
+ interconnect-names = "memory",
+ "config";
+
++ qcom,qmp = <&aoss_qmp>;
++
+ qcom,smem-states = <&ipa_smp2p_out 0>,
+ <&ipa_smp2p_out 1>;
+ qcom,smem-state-names = "ipa-clock-enabled-valid",
+@@ -939,7 +981,7 @@
+ qcom,tcs-offset = <0xd00>;
+ qcom,drv-id = <2>;
+ qcom,tcs-config = <ACTIVE_TCS 2>, <SLEEP_TCS 3>,
+- <WAKE_TCS 3>, <CONTROL_TCS 1>;
++ <WAKE_TCS 3>, <CONTROL_TCS 0>;
+
+ rpmhcc: clock-controller {
+ compatible = "qcom,sm8350-rpmh-clk";
+@@ -1010,6 +1052,13 @@
+ <0 0x18593000 0 0x1000>;
+ reg-names = "freq-domain0", "freq-domain1", "freq-domain2";
+
++ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
++ <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
++ interrupt-names = "dcvsh-irq-0",
++ "dcvsh-irq-1",
++ "dcvsh-irq-2";
++
+ clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_GPLL0>;
+ clock-names = "xo", "alternate";
+
+@@ -1060,14 +1109,14 @@
+ <75000000 300000000>,
+ <0 0>,
+ <0 0>,
+- <75000000 300000000>,
+- <75000000 300000000>;
++ <0 0>,
++ <0 0>;
+ status = "disabled";
+ };
+
+ ufs_mem_phy: phy@1d87000 {
+ compatible = "qcom,sm8350-qmp-ufs-phy";
+- reg = <0 0x01d87000 0 0xe10>;
++ reg = <0 0x01d87000 0 0x1c4>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ #clock-cells = <1>;
+@@ -1081,12 +1130,12 @@
+ reset-names = "ufsphy";
+ status = "disabled";
+
+- ufs_mem_phy_lanes: lanes@1d87400 {
+- reg = <0 0x01d87400 0 0x108>,
+- <0 0x01d87600 0 0x1e0>,
+- <0 0x01d87c00 0 0x1dc>,
+- <0 0x01d87800 0 0x108>,
+- <0 0x01d87a00 0 0x1e0>;
++ ufs_mem_phy_lanes: phy@1d87400 {
++ reg = <0 0x01d87400 0 0x188>,
++ <0 0x01d87600 0 0x200>,
++ <0 0x01d87c00 0 0x200>,
++ <0 0x01d87800 0 0x188>,
++ <0 0x01d87a00 0 0x200>;
+ #phy-cells = <0>;
+ #clock-cells = <0>;
+ };
+@@ -2185,7 +2234,7 @@
+ };
+ };
+
+- camera-thermal-bottom {
++ cam-thermal-bottom {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+
+diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi
+index 2692cc64bff61..f1ab4943c295c 100644
+--- a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi
++++ b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi
+@@ -146,7 +146,7 @@
+ };
+ };
+
+- reg_audio: regulator_audio {
++ reg_audio: regulator-audio {
+ compatible = "regulator-fixed";
+ regulator-name = "audio-1.8V";
+ regulator-min-microvolt = <1800000>;
+@@ -174,7 +174,7 @@
+ vin-supply = <&reg_lcd>;
+ };
+
+- reg_cam0: regulator_camera {
++ reg_cam0: regulator-cam0 {
+ compatible = "regulator-fixed";
+ regulator-name = "reg_cam0";
+ regulator-min-microvolt = <1800000>;
+@@ -183,7 +183,7 @@
+ enable-active-high;
+ };
+
+- reg_cam1: regulator_camera {
++ reg_cam1: regulator-cam1 {
+ compatible = "regulator-fixed";
+ regulator-name = "reg_cam1";
+ regulator-min-microvolt = <1800000>;
+@@ -432,20 +432,6 @@
+ };
+ };
+
+- /* 0 - lcd_reset */
+- /* 1 - lcd_pwr */
+- /* 2 - lcd_select */
+- /* 3 - backlight-enable */
+- /* 4 - Touch_shdwn */
+- /* 5 - LCD_H_pol */
+- /* 6 - lcd_V_pol */
+- gpio_exp1: gpio@20 {
+- compatible = "onnn,pca9654";
+- reg = <0x20>;
+- gpio-controller;
+- #gpio-cells = <2>;
+- };
+-
+ touchscreen@26 {
+ compatible = "ilitek,ili2117";
+ reg = <0x26>;
+@@ -477,6 +463,16 @@
+ };
+ };
+ };
++
++ gpio_exp1: gpio@70 {
++ compatible = "nxp,pca9538";
++ reg = <0x70>;
++ gpio-controller;
++ #gpio-cells = <2>;
++ gpio-line-names = "lcd_reset", "lcd_pwr", "lcd_select",
++ "backlight-enable", "Touch_shdwn",
++ "LCD_H_pol", "lcd_V_pol";
++ };
+ };
+
+ &lvds0 {
+diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi
+index 090dc9c4f57b5..937d17a426b66 100644
+--- a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi
++++ b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi
+@@ -50,6 +50,7 @@
+ &avb {
+ pinctrl-0 = <&avb_pins>;
+ pinctrl-names = "default";
++ phy-mode = "rgmii-rxid";
+ phy-handle = <&phy0>;
+ rx-internal-delay-ps = <1800>;
+ tx-internal-delay-ps = <2000>;
+diff --git a/arch/arm64/boot/dts/renesas/cat875.dtsi b/arch/arm64/boot/dts/renesas/cat875.dtsi
+index 801ea54b027c4..20f8adc635e72 100644
+--- a/arch/arm64/boot/dts/renesas/cat875.dtsi
++++ b/arch/arm64/boot/dts/renesas/cat875.dtsi
+@@ -18,6 +18,7 @@
+ pinctrl-names = "default";
+ renesas,no-ether-link;
+ phy-handle = <&phy0>;
++ phy-mode = "rgmii-id";
+ status = "okay";
+
+ phy0: ethernet-phy@0 {
+diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+index 6f4fffacfca21..e70aa5a087402 100644
+--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+@@ -2784,7 +2784,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -2799,7 +2799,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -2814,7 +2814,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
+index 0f7bdfc90a0dc..6c5694fa66900 100644
+--- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
+@@ -2629,7 +2629,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -2644,7 +2644,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -2659,7 +2659,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+index d597772c4c37e..50189209b6605 100644
+--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+@@ -49,17 +49,14 @@
+ opp-shared;
+ opp-800000000 {
+ opp-hz = /bits/ 64 <800000000>;
+- opp-microvolt = <820000>;
+ clock-latency-ns = <300000>;
+ };
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+- opp-microvolt = <820000>;
+ clock-latency-ns = <300000>;
+ };
+ opp-1200000000 {
+ opp-hz = /bits/ 64 <1200000000>;
+- opp-microvolt = <820000>;
+ clock-latency-ns = <300000>;
+ opp-suspend;
+ };
+@@ -1953,7 +1950,7 @@
+ cpu-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <0>;
+- thermal-sensors = <&thermal 0>;
++ thermal-sensors = <&thermal>;
+ sustainable-power = <717>;
+
+ cooling-maps {
+diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+index 379a1300272ba..62209ab6deb9a 100644
+--- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+@@ -2904,7 +2904,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -2919,7 +2919,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -2934,7 +2934,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
+index 1768a3e6bb8da..193d81be40fc4 100644
+--- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
+@@ -3375,7 +3375,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -3390,7 +3390,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -3405,7 +3405,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
+index 2bd8169735d35..b526e4f0ee6a8 100644
+--- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
+@@ -2972,7 +2972,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -2987,7 +2987,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -3002,7 +3002,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
+index 041473aa5cd09..21fc95397c3c2 100644
+--- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
+@@ -2719,7 +2719,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -2734,7 +2734,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -2749,7 +2749,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+index 08df75606430b..f9679a4dd85fa 100644
+--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+@@ -2784,7 +2784,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -2799,7 +2799,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -2814,7 +2814,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+index 6347d15e66b64..21fe602bd25af 100644
+--- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+@@ -1580,7 +1580,7 @@
+ };
+
+ thermal-zones {
+- thermal-sensor-1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -1599,7 +1599,7 @@
+ };
+ };
+
+- thermal-sensor-2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+index 0ea300a8147d0..adcb03fa23148 100644
+--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+@@ -60,17 +60,14 @@
+ opp-shared;
+ opp-800000000 {
+ opp-hz = /bits/ 64 <800000000>;
+- opp-microvolt = <820000>;
+ clock-latency-ns = <300000>;
+ };
+ opp-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+- opp-microvolt = <820000>;
+ clock-latency-ns = <300000>;
+ };
+ opp-1200000000 {
+ opp-hz = /bits/ 64 <1200000000>;
+- opp-microvolt = <820000>;
+ clock-latency-ns = <300000>;
+ opp-suspend;
+ };
+@@ -2102,7 +2099,7 @@
+ cpu-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <0>;
+- thermal-sensors = <&thermal 0>;
++ thermal-sensors = <&thermal>;
+ sustainable-power = <717>;
+
+ cooling-maps {
+diff --git a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi
+index 631d520cebee5..26899fb768a73 100644
+--- a/arch/arm64/boot/dts/renesas/r8a779a0.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a779a0.dtsi
+@@ -1149,7 +1149,7 @@
+ };
+
+ thermal-zones {
+- sensor_thermal1: sensor-thermal1 {
++ sensor1_thermal: sensor1-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 0>;
+@@ -1163,7 +1163,7 @@
+ };
+ };
+
+- sensor_thermal2: sensor-thermal2 {
++ sensor2_thermal: sensor2-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 1>;
+@@ -1177,7 +1177,7 @@
+ };
+ };
+
+- sensor_thermal3: sensor-thermal3 {
++ sensor3_thermal: sensor3-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 2>;
+@@ -1191,7 +1191,7 @@
+ };
+ };
+
+- sensor_thermal4: sensor-thermal4 {
++ sensor4_thermal: sensor4-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 3>;
+@@ -1205,7 +1205,7 @@
+ };
+ };
+
+- sensor_thermal5: sensor-thermal5 {
++ sensor5_thermal: sensor5-thermal {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&tsc 4>;
+diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
+index 61bd4df09df0d..26cb5f14f9c7a 100644
+--- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
++++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
+@@ -270,7 +270,7 @@
+ };
+
+ scif1_pins: scif1 {
+- groups = "scif1_data_b", "scif1_ctrl";
++ groups = "scif1_data_b";
+ function = "scif1";
+ };
+
+@@ -330,7 +330,6 @@
+ &scif1 {
+ pinctrl-0 = <&scif1_pins>;
+ pinctrl-names = "default";
+- uart-has-rtscts;
+
+ status = "okay";
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi
+index 7249871530ab9..5eecbefa8a336 100644
+--- a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi
++++ b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi
+@@ -2,8 +2,8 @@
+ /*
+ * Copyright (c) 2020 Fuzhou Rockchip Electronics Co., Ltd
+ * Copyright (c) 2020 Engicam srl
+- * Copyright (c) 2020 Amarula Solutons
+- * Copyright (c) 2020 Amarula Solutons(India)
++ * Copyright (c) 2020 Amarula Solutions
++ * Copyright (c) 2020 Amarula Solutions(India)
+ */
+
+ #include <dt-bindings/gpio/gpio.h>
+diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi
+index 248ebb61aa790..5200d0bbd9e9c 100644
+--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
++++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
+@@ -711,7 +711,7 @@
+ clock-names = "pclk", "timer";
+ };
+
+- dmac: dmac@ff240000 {
++ dmac: dma-controller@ff240000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0x0 0xff240000 0x0 0x4000>;
+ interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
+index 665b2e69455dd..7ea48167747c6 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
+@@ -19,7 +19,7 @@
+ stdout-path = "serial2:1500000n8";
+ };
+
+- ir_rx {
++ ir-receiver {
+ compatible = "gpio-ir-receiver";
+ gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>;
+ pinctrl-names = "default";
+@@ -97,7 +97,7 @@
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ regulator-boot-on;
+- vim-supply = <&vcc_io>;
++ vin-supply = <&vcc_io>;
+ };
+
+ vdd_core: vdd-core {
+diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+index aa22a0c222655..5d5d9574088ca 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+@@ -96,7 +96,6 @@
+ linux,default-trigger = "heartbeat";
+ gpios = <&rk805 1 GPIO_ACTIVE_LOW>;
+ default-state = "on";
+- mode = <0x23>;
+ };
+
+ user_led: led-1 {
+@@ -104,7 +103,6 @@
+ linux,default-trigger = "mmc1";
+ gpios = <&rk805 0 GPIO_ACTIVE_LOW>;
+ default-state = "off";
+- mode = <0x05>;
+ };
+ };
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+index 8c821acb21ffb..3cbe83e6fb9a4 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+@@ -489,7 +489,7 @@
+ status = "disabled";
+ };
+
+- dmac: dmac@ff1f0000 {
++ dmac: dma-controller@ff1f0000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0x0 0xff1f0000 0x0 0x4000>;
+ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+@@ -599,7 +599,7 @@
+
+ gpu: gpu@ff300000 {
+ compatible = "rockchip,rk3328-mali", "arm,mali-450";
+- reg = <0x0 0xff300000 0x0 0x40000>;
++ reg = <0x0 0xff300000 0x0 0x30000>;
+ interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>,
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts
+index c4dd2a6b48368..f81ce3240342c 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts
+@@ -770,8 +770,8 @@
+ sd-uhs-sdr104;
+
+ /* Power supply */
+- vqmmc-supply = &vcc1v8_s3; /* IO line */
+- vmmc-supply = &vcc_sdio; /* card's power */
++ vqmmc-supply = <&vcc1v8_s3>; /* IO line */
++ vmmc-supply = <&vcc_sdio>; /* card's power */
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+index e6c1c94c8d69c..07737b65d7a3d 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+@@ -87,3 +87,8 @@
+ };
+ };
+ };
++
++&wlan_host_wake_l {
++ /* Kevin has an external pull up, but Bob does not. */
++ rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>;
++};
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+index 1384dabbdf406..739937f70f8d0 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+@@ -237,6 +237,14 @@
+ &edp {
+ status = "okay";
+
++ /*
++ * eDP PHY/clk don't sync reliably at anything other than 24 MHz. Only
++ * set this here, because rk3399-gru.dtsi ensures we can generate this
++ * off GPLL=600MHz, whereas some other RK3399 boards may not.
++ */
++ assigned-clocks = <&cru PCLK_EDP>;
++ assigned-clock-rates = <24000000>;
++
+ ports {
+ edp_out: port@1 {
+ reg = <1>;
+@@ -395,6 +403,7 @@ ap_i2c_tp: &i2c5 {
+ };
+
+ wlan_host_wake_l: wlan-host-wake-l {
++ /* Kevin has an external pull up, but Bob does not */
+ rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+index c1bcc8ca3769d..2f8e117109699 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+@@ -286,7 +286,7 @@
+
+ sound: sound {
+ compatible = "rockchip,rk3399-gru-sound";
+- rockchip,cpu = <&i2s0 &i2s2>;
++ rockchip,cpu = <&i2s0 &spdif>;
+ };
+ };
+
+@@ -437,10 +437,6 @@ ap_i2c_audio: &i2c8 {
+ status = "okay";
+ };
+
+-&i2s2 {
+- status = "okay";
+-};
+-
+ &io_domains {
+ status = "okay";
+
+@@ -537,6 +533,17 @@ ap_i2c_audio: &i2c8 {
+ vqmmc-supply = <&ppvar_sd_card_io>;
+ };
+
++&spdif {
++ status = "okay";
++
++ /*
++ * SPDIF is routed internally to DP; we either don't use these pins, or
++ * mux them to something else.
++ */
++ /delete-property/ pinctrl-0;
++ /delete-property/ pinctrl-names;
++};
++
+ &spi1 {
+ status = "okay";
+
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi
+index d5c7648c841dc..f1fcc6b5b402c 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi
+@@ -705,7 +705,6 @@
+ &sdhci {
+ bus-width = <8>;
+ mmc-hs400-1_8v;
+- mmc-hs400-enhanced-strobe;
+ non-removable;
+ status = "okay";
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
+index 738cfd21df3ef..354f54767bad8 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
+@@ -269,6 +269,7 @@
+ clock-output-names = "xin32k", "rk808-clkout2";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_int_l>;
++ rockchip,system-power-controller;
+ vcc1-supply = <&vcc5v0_sys>;
+ vcc2-supply = <&vcc5v0_sys>;
+ vcc3-supply = <&vcc5v0_sys>;
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts
+index 7c93f840bc64f..e890166e7fd43 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts
+@@ -55,7 +55,7 @@
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+- vim-supply = <&vcc3v3_sys>;
++ vin-supply = <&vcc3v3_sys>;
+ };
+
+ vcc3v3_sys: vcc3v3-sys {
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
+index 2b5f001ff4a61..9e5d07f5712e6 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
+@@ -385,10 +385,6 @@
+ };
+ };
+
+-&cdn_dp {
+- status = "okay";
+-};
+-
+ &cpu_b0 {
+ cpu-supply = <&vdd_cpu_b>;
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+index 292bb7e80cf35..f07f4b8231f91 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+@@ -207,7 +207,7 @@
+ cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>;
+ disable-wp;
+- max-frequency = <150000000>;
++ max-frequency = <40000000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
+ vmmc-supply = <&vcc3v3_baseboard>;
+@@ -232,6 +232,7 @@
+
+ &usbdrd_dwc3_0 {
+ dr_mode = "otg";
++ extcon = <&extcon_usb3>;
+ status = "okay";
+ };
+
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+index fb67db4619ea0..7b27079fd6116 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+@@ -25,6 +25,13 @@
+ };
+ };
+
++ extcon_usb3: extcon-usb3 {
++ compatible = "linux,extcon-usb-gpio";
++ id-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>;
++ pinctrl-names = "default";
++ pinctrl-0 = <&usb3_id>;
++ };
++
+ clkin_gmac: external-gmac-clock {
+ compatible = "fixed-clock";
+ clock-frequency = <125000000>;
+@@ -55,7 +62,6 @@
+ vcc5v0_host: vcc5v0-host-regulator {
+ compatible = "regulator-fixed";
+ gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_LOW>;
+- enable-active-low;
+ pinctrl-names = "default";
+ pinctrl-0 = <&vcc5v0_host_en>;
+ regulator-name = "vcc5v0_host";
+@@ -422,9 +428,22 @@
+ <4 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
++
++ usb3 {
++ usb3_id: usb3-id {
++ rockchip,pins =
++ <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
++ };
++ };
+ };
+
+ &sdhci {
++ /*
++ * Signal integrity isn't great at 200MHz but 100MHz has proven stable
++ * enough.
++ */
++ max-frequency = <100000000>;
++
+ bus-width = <8>;
+ mmc-hs400-1_8v;
+ mmc-hs400-enhanced-strobe;
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+index b28888ea9262e..8b70e831aff23 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+@@ -446,7 +446,6 @@
+ &i2s1 {
+ rockchip,playback-channels = <2>;
+ rockchip,capture-channels = <2>;
+- status = "okay";
+ };
+
+ &i2s2 {
+@@ -457,7 +456,7 @@
+ status = "okay";
+
+ bt656-supply = <&vcc_3v0>;
+- audio-supply = <&vcc_3v0>;
++ audio-supply = <&vcc1v8_codec>;
+ sdmmc-supply = <&vcc_sdio>;
+ gpio1830-supply = <&vcc_3v0>;
+ };
+@@ -596,9 +595,9 @@
+ };
+
+ &sdhci {
++ max-frequency = <150000000>;
+ bus-width = <8>;
+- mmc-hs400-1_8v;
+- mmc-hs400-enhanced-strobe;
++ mmc-hs200-1_8v;
+ non-removable;
+ status = "okay";
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+index 3871c7fd83b00..4255e2d7a72fc 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+@@ -1477,6 +1477,7 @@
+ reg = <0xf780 0x24>;
+ clocks = <&sdhci>;
+ clock-names = "emmcclk";
++ drive-impedance-ohm = <50>;
+ #phy-cells = <0>;
+ status = "disabled";
+ };
+@@ -1487,7 +1488,6 @@
+ clock-names = "refclk";
+ #phy-cells = <1>;
+ resets = <&cru SRST_PCIEPHY>;
+- drive-impedance-ohm = <50>;
+ reset-names = "phy";
+ status = "disabled";
+ };
+@@ -1802,10 +1802,10 @@
+ interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&cru PCLK_HDMI_CTRL>,
+ <&cru SCLK_HDMI_SFR>,
+- <&cru PLL_VPLL>,
++ <&cru SCLK_HDMI_CEC>,
+ <&cru PCLK_VIO_GRF>,
+- <&cru SCLK_HDMI_CEC>;
+- clock-names = "iahb", "isfr", "vpll", "grf", "cec";
++ <&cru PLL_VPLL>;
++ clock-names = "iahb", "isfr", "cec", "grf", "vpll";
+ power-domains = <&power RK3399_PD_HDCP>;
+ reg-io-width = <4>;
+ rockchip,grf = <&grf>;
+diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+index be97da1322580..ba75adedbf79b 100644
+--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
++++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+@@ -599,8 +599,8 @@
+ compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+ status = "disabled";
+ reg = <0x65a00000 0xcd00>;
+- interrupt-names = "host", "peripheral";
+- interrupts = <0 134 4>, <0 135 4>;
++ interrupt-names = "dwc_usb3";
++ interrupts = <0 134 4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>;
+ clock-names = "ref", "bus_early", "suspend";
+@@ -701,8 +701,8 @@
+ compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+ status = "disabled";
+ reg = <0x65c00000 0xcd00>;
+- interrupt-names = "host", "peripheral";
+- interrupts = <0 137 4>, <0 138 4>;
++ interrupt-names = "dwc_usb3";
++ interrupts = <0 137 4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>;
+ clock-names = "ref", "bus_early", "suspend";
+diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+index 42d1d219a3fd2..d195b97ab2eef 100644
+--- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+@@ -59,7 +59,10 @@
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */
+- <0x00 0x01840000 0x00 0xC0000>; /* GICR */
++ <0x00 0x01840000 0x00 0xC0000>, /* GICR */
++ <0x01 0x00000000 0x00 0x2000>, /* GICC */
++ <0x01 0x00010000 0x00 0x1000>, /* GICH */
++ <0x01 0x00020000 0x00 0x2000>; /* GICV */
+ /*
+ * vcpumntirq:
+ * virtual CPU interface maintenance interrupt
+@@ -453,13 +456,11 @@
+ clock-names = "clk_ahb", "clk_xin";
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
+- mmc-hs400-1_8v;
+ ti,trm-icp = <0x2>;
+ ti,otap-del-sel-legacy = <0x0>;
+ ti,otap-del-sel-mmc-hs = <0x0>;
+ ti,otap-del-sel-ddr52 = <0x6>;
+ ti,otap-del-sel-hs200 = <0x7>;
+- ti,otap-del-sel-hs400 = <0x4>;
+ };
+
+ sdhci1: mmc@fa00000 {
+diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
+index 59cc58f7d0c87..93e684bbd66cd 100644
+--- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
+@@ -10,7 +10,6 @@
+ compatible = "ti,am64-uart", "ti,am654-uart";
+ reg = <0x00 0x04a00000 0x00 0x100>;
+ interrupts = <GIC_SPI 185 IRQ_TYPE_LEVEL_HIGH>;
+- clock-frequency = <48000000>;
+ current-speed = <115200>;
+ power-domains = <&k3_pds 149 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 149 0>;
+@@ -21,7 +20,6 @@
+ compatible = "ti,am64-uart", "ti,am654-uart";
+ reg = <0x00 0x04a10000 0x00 0x100>;
+ interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>;
+- clock-frequency = <48000000>;
+ current-speed = <115200>;
+ power-domains = <&k3_pds 160 TI_SCI_PD_EXCLUSIVE>;
+ clocks = <&k3_clks 160 0>;
+diff --git a/arch/arm64/boot/dts/ti/k3-am64.dtsi b/arch/arm64/boot/dts/ti/k3-am64.dtsi
+index de6805b0c72c1..e589c58f60885 100644
+--- a/arch/arm64/boot/dts/ti/k3-am64.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am64.dtsi
+@@ -85,6 +85,7 @@
+ <0x00 0x68000000 0x00 0x68000000 0x00 0x08000000>, /* PCIe DAT0 */
+ <0x00 0x70000000 0x00 0x70000000 0x00 0x00200000>, /* OC SRAM */
+ <0x00 0x78000000 0x00 0x78000000 0x00 0x00800000>, /* Main R5FSS */
++ <0x01 0x00000000 0x01 0x00000000 0x00 0x00310000>, /* A53 PERIPHBASE */
+ <0x06 0x00000000 0x06 0x00000000 0x01 0x00000000>, /* PCIe DAT1 */
+ <0x05 0x00000000 0x05 0x00000000 0x01 0x00000000>, /* FSS0 DAT3 */
+
+diff --git a/arch/arm64/boot/dts/ti/k3-am642.dtsi b/arch/arm64/boot/dts/ti/k3-am642.dtsi
+index e2b397c884018..8a76f4821b11b 100644
+--- a/arch/arm64/boot/dts/ti/k3-am642.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am642.dtsi
+@@ -60,6 +60,6 @@
+ cache-level = <2>;
+ cache-size = <0x40000>;
+ cache-line-size = <64>;
+- cache-sets = <512>;
++ cache-sets = <256>;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+index ba4e5d3e1ed7a..4f232f575ab2a 100644
+--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+@@ -35,7 +35,10 @@
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */
+- <0x00 0x01880000 0x00 0x90000>; /* GICR */
++ <0x00 0x01880000 0x00 0x90000>, /* GICR */
++ <0x00 0x6f000000 0x00 0x2000>, /* GICC */
++ <0x00 0x6f010000 0x00 0x1000>, /* GICH */
++ <0x00 0x6f020000 0x00 0x2000>; /* GICV */
+ /*
+ * vcpumntirq:
+ * virtual CPU interface maintenance interrupt
+@@ -117,7 +120,6 @@
+ dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>,
+ <&main_udmap 0x4001>;
+ dma-names = "tx", "rx1", "rx2";
+- dma-coherent;
+
+ rng: rng@4e10000 {
+ compatible = "inside-secure,safexcel-eip76";
+diff --git a/arch/arm64/boot/dts/ti/k3-am65.dtsi b/arch/arm64/boot/dts/ti/k3-am65.dtsi
+index a9fc1af03f27f..1607db9b32dd2 100644
+--- a/arch/arm64/boot/dts/ti/k3-am65.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am65.dtsi
+@@ -84,6 +84,7 @@
+ <0x00 0x46000000 0x00 0x46000000 0x00 0x00200000>,
+ <0x00 0x47000000 0x00 0x47000000 0x00 0x00068400>,
+ <0x00 0x50000000 0x00 0x50000000 0x00 0x8000000>,
++ <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A53 PERIPHBASE */
+ <0x00 0x70000000 0x00 0x70000000 0x00 0x200000>,
+ <0x05 0x00000000 0x05 0x00000000 0x01 0x0000000>,
+ <0x07 0x00000000 0x07 0x00000000 0x01 0x0000000>;
+diff --git a/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts
+index d14f3c18b65fc..ee244df75eaea 100644
+--- a/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts
++++ b/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts
+@@ -77,28 +77,28 @@
+ };
+ };
+
+-&wkup_pmx0 {
++&wkup_pmx2 {
+ mcu_cpsw_pins_default: mcu-cpsw-pins-default {
+ pinctrl-single,pins = <
+- J721E_WKUP_IOPAD(0x0068, PIN_OUTPUT, 0) /* MCU_RGMII1_TX_CTL */
+- J721E_WKUP_IOPAD(0x006c, PIN_INPUT, 0) /* MCU_RGMII1_RX_CTL */
+- J721E_WKUP_IOPAD(0x0070, PIN_OUTPUT, 0) /* MCU_RGMII1_TD3 */
+- J721E_WKUP_IOPAD(0x0074, PIN_OUTPUT, 0) /* MCU_RGMII1_TD2 */
+- J721E_WKUP_IOPAD(0x0078, PIN_OUTPUT, 0) /* MCU_RGMII1_TD1 */
+- J721E_WKUP_IOPAD(0x007c, PIN_OUTPUT, 0) /* MCU_RGMII1_TD0 */
+- J721E_WKUP_IOPAD(0x0088, PIN_INPUT, 0) /* MCU_RGMII1_RD3 */
+- J721E_WKUP_IOPAD(0x008c, PIN_INPUT, 0) /* MCU_RGMII1_RD2 */
+- J721E_WKUP_IOPAD(0x0090, PIN_INPUT, 0) /* MCU_RGMII1_RD1 */
+- J721E_WKUP_IOPAD(0x0094, PIN_INPUT, 0) /* MCU_RGMII1_RD0 */
+- J721E_WKUP_IOPAD(0x0080, PIN_OUTPUT, 0) /* MCU_RGMII1_TXC */
+- J721E_WKUP_IOPAD(0x0084, PIN_INPUT, 0) /* MCU_RGMII1_RXC */
++ J721E_WKUP_IOPAD(0x0000, PIN_OUTPUT, 0) /* MCU_RGMII1_TX_CTL */
++ J721E_WKUP_IOPAD(0x0004, PIN_INPUT, 0) /* MCU_RGMII1_RX_CTL */
++ J721E_WKUP_IOPAD(0x0008, PIN_OUTPUT, 0) /* MCU_RGMII1_TD3 */
++ J721E_WKUP_IOPAD(0x000c, PIN_OUTPUT, 0) /* MCU_RGMII1_TD2 */
++ J721E_WKUP_IOPAD(0x0010, PIN_OUTPUT, 0) /* MCU_RGMII1_TD1 */
++ J721E_WKUP_IOPAD(0x0014, PIN_OUTPUT, 0) /* MCU_RGMII1_TD0 */
++ J721E_WKUP_IOPAD(0x0020, PIN_INPUT, 0) /* MCU_RGMII1_RD3 */
++ J721E_WKUP_IOPAD(0x0024, PIN_INPUT, 0) /* MCU_RGMII1_RD2 */
++ J721E_WKUP_IOPAD(0x0028, PIN_INPUT, 0) /* MCU_RGMII1_RD1 */
++ J721E_WKUP_IOPAD(0x002c, PIN_INPUT, 0) /* MCU_RGMII1_RD0 */
++ J721E_WKUP_IOPAD(0x0018, PIN_OUTPUT, 0) /* MCU_RGMII1_TXC */
++ J721E_WKUP_IOPAD(0x001c, PIN_INPUT, 0) /* MCU_RGMII1_RXC */
+ >;
+ };
+
+ mcu_mdio_pins_default: mcu-mdio1-pins-default {
+ pinctrl-single,pins = <
+- J721E_WKUP_IOPAD(0x009c, PIN_OUTPUT, 0) /* (L1) MCU_MDIO0_MDC */
+- J721E_WKUP_IOPAD(0x0098, PIN_INPUT, 0) /* (L4) MCU_MDIO0_MDIO */
++ J721E_WKUP_IOPAD(0x0034, PIN_OUTPUT, 0) /* (L1) MCU_MDIO0_MDC */
++ J721E_WKUP_IOPAD(0x0030, PIN_INPUT, 0) /* (L4) MCU_MDIO0_MDIO */
+ >;
+ };
+ };
+@@ -131,15 +131,17 @@
+ >;
+ };
+
+- main_usbss0_pins_default: main-usbss0-pins-default {
++ vdd_sd_dv_pins_default: vdd-sd-dv-pins-default {
+ pinctrl-single,pins = <
+- J721E_IOPAD(0x120, PIN_OUTPUT, 0) /* (T4) USB0_DRVVBUS */
++ J721E_IOPAD(0xd0, PIN_OUTPUT, 7) /* (T5) SPI0_D1.GPIO0_55 */
+ >;
+ };
++};
+
+- vdd_sd_dv_pins_default: vdd-sd-dv-pins-default {
++&main_pmx1 {
++ main_usbss0_pins_default: main-usbss0-pins-default {
+ pinctrl-single,pins = <
+- J721E_IOPAD(0xd0, PIN_OUTPUT, 7) /* (T5) SPI0_D1.GPIO0_55 */
++ J721E_IOPAD(0x04, PIN_OUTPUT, 0) /* (T4) USB0_DRVVBUS */
+ >;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
+index e8a41d09b45f2..b1df17525dea5 100644
+--- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
+@@ -32,7 +32,7 @@
+ #size-cells = <1>;
+ ranges = <0x00 0x00 0x00100000 0x1c000>;
+
+- serdes_ln_ctrl: serdes-ln-ctrl@4080 {
++ serdes_ln_ctrl: mux-controller@4080 {
+ compatible = "mmio-mux";
+ #mux-control-cells = <1>;
+ mux-reg-masks = <0x4080 0x3>, <0x4084 0x3>, /* SERDES0 lane0/1 select */
+@@ -54,7 +54,10 @@
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */
+- <0x00 0x01900000 0x00 0x100000>; /* GICR */
++ <0x00 0x01900000 0x00 0x100000>, /* GICR */
++ <0x00 0x6f000000 0x00 0x2000>, /* GICC */
++ <0x00 0x6f010000 0x00 0x1000>, /* GICH */
++ <0x00 0x6f020000 0x00 0x2000>; /* GICV */
+
+ /* vcpumntirq: virtual CPU interface maintenance interrupt */
+ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+@@ -292,7 +295,16 @@
+ main_pmx0: pinctrl@11c000 {
+ compatible = "pinctrl-single";
+ /* Proxy 0 addressing */
+- reg = <0x00 0x11c000 0x00 0x2b4>;
++ reg = <0x00 0x11c000 0x00 0x10c>;
++ #pinctrl-cells = <1>;
++ pinctrl-single,register-width = <32>;
++ pinctrl-single,function-mask = <0xffffffff>;
++ };
++
++ main_pmx1: pinctrl@11c11c {
++ compatible = "pinctrl-single";
++ /* Proxy 0 addressing */
++ reg = <0x00 0x11c11c 0x00 0xc>;
+ #pinctrl-cells = <1>;
+ pinctrl-single,register-width = <32>;
+ pinctrl-single,function-mask = <0xffffffff>;
+@@ -606,10 +618,10 @@
+ clock-names = "fck";
+ #address-cells = <3>;
+ #size-cells = <2>;
+- bus-range = <0x0 0xf>;
++ bus-range = <0x0 0xff>;
+ cdns,no-bar-match-nbits = <64>;
+- vendor-id = /bits/ 16 <0x104c>;
+- device-id = /bits/ 16 <0xb00f>;
++ vendor-id = <0x104c>;
++ device-id = <0xb00f>;
+ msi-map = <0x0 &gic_its 0x0 0x10000>;
+ dma-coherent;
+ ranges = <0x01000000 0x0 0x18001000 0x00 0x18001000 0x0 0x0010000>,
+diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+index 1044ec6c4b0d4..8185c1627c6f1 100644
+--- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+@@ -56,7 +56,34 @@
+ wkup_pmx0: pinctrl@4301c000 {
+ compatible = "pinctrl-single";
+ /* Proxy 0 addressing */
+- reg = <0x00 0x4301c000 0x00 0x178>;
++ reg = <0x00 0x4301c000 0x00 0x34>;
++ #pinctrl-cells = <1>;
++ pinctrl-single,register-width = <32>;
++ pinctrl-single,function-mask = <0xffffffff>;
++ };
++
++ wkup_pmx1: pinctrl@0x4301c038 {
++ compatible = "pinctrl-single";
++ /* Proxy 0 addressing */
++ reg = <0x00 0x4301c038 0x00 0x8>;
++ #pinctrl-cells = <1>;
++ pinctrl-single,register-width = <32>;
++ pinctrl-single,function-mask = <0xffffffff>;
++ };
++
++ wkup_pmx2: pinctrl@0x4301c068 {
++ compatible = "pinctrl-single";
++ /* Proxy 0 addressing */
++ reg = <0x00 0x4301c068 0x00 0xec>;
++ #pinctrl-cells = <1>;
++ pinctrl-single,register-width = <32>;
++ pinctrl-single,function-mask = <0xffffffff>;
++ };
++
++ wkup_pmx3: pinctrl@0x4301c174 {
++ compatible = "pinctrl-single";
++ /* Proxy 0 addressing */
++ reg = <0x00 0x4301c174 0x00 0x20>;
+ #pinctrl-cells = <1>;
+ pinctrl-single,register-width = <32>;
+ pinctrl-single,function-mask = <0xffffffff>;
+diff --git a/arch/arm64/boot/dts/ti/k3-j7200.dtsi b/arch/arm64/boot/dts/ti/k3-j7200.dtsi
+index b7005b8031495..afe99f3920ccd 100644
+--- a/arch/arm64/boot/dts/ti/k3-j7200.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-j7200.dtsi
+@@ -60,7 +60,7 @@
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+- d-cache-sets = <128>;
++ d-cache-sets = <256>;
+ next-level-cache = <&L2_0>;
+ };
+
+@@ -74,7 +74,7 @@
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+- d-cache-sets = <128>;
++ d-cache-sets = <256>;
+ next-level-cache = <&L2_0>;
+ };
+ };
+@@ -84,7 +84,7 @@
+ cache-level = <2>;
+ cache-size = <0x100000>;
+ cache-line-size = <64>;
+- cache-sets = <2048>;
++ cache-sets = <1024>;
+ next-level-cache = <&msmc_l3>;
+ };
+
+@@ -127,6 +127,7 @@
+ <0x00 0x00a40000 0x00 0x00a40000 0x00 0x00000800>, /* timesync router */
+ <0x00 0x01000000 0x00 0x01000000 0x00 0x0d000000>, /* Most peripherals */
+ <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>, /* MAIN NAVSS */
++ <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */
+ <0x00 0x70000000 0x00 0x70000000 0x00 0x00800000>, /* MSMC RAM */
+ <0x00 0x18000000 0x00 0x18000000 0x00 0x08000000>, /* PCIe1 DAT0 */
+ <0x41 0x00000000 0x41 0x00000000 0x01 0x00000000>, /* PCIe1 DAT1 */
+diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+index cf3482376c1e6..d662eeb7d80a7 100644
+--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+@@ -42,7 +42,7 @@
+ #size-cells = <1>;
+ ranges = <0x0 0x0 0x00100000 0x1c000>;
+
+- serdes_ln_ctrl: mux@4080 {
++ serdes_ln_ctrl: mux-controller@4080 {
+ compatible = "mmio-mux";
+ reg = <0x00004080 0x50>;
+ #mux-control-cells = <1>;
+@@ -76,7 +76,10 @@
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */
+- <0x00 0x01900000 0x00 0x100000>; /* GICR */
++ <0x00 0x01900000 0x00 0x100000>, /* GICR */
++ <0x00 0x6f000000 0x00 0x2000>, /* GICC */
++ <0x00 0x6f010000 0x00 0x1000>, /* GICH */
++ <0x00 0x6f020000 0x00 0x2000>; /* GICV */
+
+ /* vcpumntirq: virtual CPU interface maintenance interrupt */
+ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+@@ -333,7 +336,6 @@
+ dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>,
+ <&main_udmap 0x4001>;
+ dma-names = "tx", "rx1", "rx2";
+- dma-coherent;
+
+ rng: rng@4e10000 {
+ compatible = "inside-secure,safexcel-eip76";
+@@ -610,7 +612,7 @@
+ clock-names = "fck";
+ #address-cells = <3>;
+ #size-cells = <2>;
+- bus-range = <0x0 0xf>;
++ bus-range = <0x0 0xff>;
+ vendor-id = <0x104c>;
+ device-id = <0xb00d>;
+ msi-map = <0x0 &gic_its 0x0 0x10000>;
+@@ -636,7 +638,7 @@
+ clocks = <&k3_clks 239 1>;
+ clock-names = "fck";
+ max-functions = /bits/ 8 <6>;
+- max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>;
++ max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>;
+ dma-coherent;
+ };
+
+@@ -658,7 +660,7 @@
+ clock-names = "fck";
+ #address-cells = <3>;
+ #size-cells = <2>;
+- bus-range = <0x0 0xf>;
++ bus-range = <0x0 0xff>;
+ vendor-id = <0x104c>;
+ device-id = <0xb00d>;
+ msi-map = <0x0 &gic_its 0x10000 0x10000>;
+@@ -684,7 +686,7 @@
+ clocks = <&k3_clks 240 1>;
+ clock-names = "fck";
+ max-functions = /bits/ 8 <6>;
+- max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>;
++ max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>;
+ dma-coherent;
+ };
+
+@@ -706,7 +708,7 @@
+ clock-names = "fck";
+ #address-cells = <3>;
+ #size-cells = <2>;
+- bus-range = <0x0 0xf>;
++ bus-range = <0x0 0xff>;
+ vendor-id = <0x104c>;
+ device-id = <0xb00d>;
+ msi-map = <0x0 &gic_its 0x20000 0x10000>;
+@@ -732,7 +734,7 @@
+ clocks = <&k3_clks 241 1>;
+ clock-names = "fck";
+ max-functions = /bits/ 8 <6>;
+- max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>;
++ max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>;
+ dma-coherent;
+ };
+
+@@ -754,7 +756,7 @@
+ clock-names = "fck";
+ #address-cells = <3>;
+ #size-cells = <2>;
+- bus-range = <0x0 0xf>;
++ bus-range = <0x0 0xff>;
+ vendor-id = <0x104c>;
+ device-id = <0xb00d>;
+ msi-map = <0x0 &gic_its 0x30000 0x10000>;
+@@ -780,7 +782,7 @@
+ clocks = <&k3_clks 242 1>;
+ clock-names = "fck";
+ max-functions = /bits/ 8 <6>;
+- max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>;
++ max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>;
+ dma-coherent;
+ #address-cells = <2>;
+ #size-cells = <2>;
+@@ -1049,7 +1051,6 @@
+ ti,itap-del-sel-mmc-hs = <0xa>;
+ ti,itap-del-sel-ddr52 = <0x3>;
+ ti,trm-icp = <0x8>;
+- ti,strobe-sel = <0x77>;
+ dma-coherent;
+ };
+
+diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi
+index f0587fde147e6..2cd8883de5b53 100644
+--- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi
+@@ -61,7 +61,7 @@
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+- d-cache-sets = <128>;
++ d-cache-sets = <256>;
+ next-level-cache = <&L2_0>;
+ };
+
+@@ -75,7 +75,7 @@
+ i-cache-sets = <256>;
+ d-cache-size = <0x8000>;
+ d-cache-line-size = <64>;
+- d-cache-sets = <128>;
++ d-cache-sets = <256>;
+ next-level-cache = <&L2_0>;
+ };
+ };
+@@ -85,7 +85,7 @@
+ cache-level = <2>;
+ cache-size = <0x100000>;
+ cache-line-size = <64>;
+- cache-sets = <2048>;
++ cache-sets = <1024>;
+ next-level-cache = <&msmc_l3>;
+ };
+
+@@ -136,6 +136,7 @@
+ <0x00 0x0e000000 0x00 0x0e000000 0x00 0x01800000>, /* PCIe Core*/
+ <0x00 0x10000000 0x00 0x10000000 0x00 0x10000000>, /* PCIe DAT */
+ <0x00 0x64800000 0x00 0x64800000 0x00 0x00800000>, /* C71 */
++ <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */
+ <0x44 0x00000000 0x44 0x00000000 0x00 0x08000000>, /* PCIe2 DAT */
+ <0x44 0x10000000 0x44 0x10000000 0x00 0x08000000>, /* PCIe3 DAT */
+ <0x4d 0x80800000 0x4d 0x80800000 0x00 0x00800000>, /* C66_0 */
+diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts
+index 4a86efa32d687..f7124e15f0ff6 100644
+--- a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts
++++ b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts
+@@ -131,7 +131,7 @@
+ reg = <0>;
+
+ partition@0 {
+- label = "data";
++ label = "spi0-data";
+ reg = <0x0 0x100000>;
+ };
+ };
+@@ -149,7 +149,7 @@
+ reg = <0>;
+
+ partition@0 {
+- label = "data";
++ label = "spi1-data";
+ reg = <0x0 0x84000>;
+ };
+ };
+diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+index 28dccb891a535..8278876ad33fa 100644
+--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
++++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+@@ -792,7 +792,7 @@
+ };
+
+ uart0: serial@ff000000 {
+- compatible = "cdns,uart-r1p12", "xlnx,xuartps";
++ compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12";
+ status = "disabled";
+ interrupt-parent = <&gic>;
+ interrupts = <0 21 4>;
+@@ -802,7 +802,7 @@
+ };
+
+ uart1: serial@ff010000 {
+- compatible = "cdns,uart-r1p12", "xlnx,xuartps";
++ compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12";
+ status = "disabled";
+ interrupt-parent = <&gic>;
+ interrupts = <0 22 4>;
+diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
+index 545197bc05013..4972a81d40d60 100644
+--- a/arch/arm64/configs/defconfig
++++ b/arch/arm64/configs/defconfig
+@@ -921,7 +921,7 @@ CONFIG_DMADEVICES=y
+ CONFIG_DMA_BCM2835=y
+ CONFIG_DMA_SUN6I=m
+ CONFIG_FSL_EDMA=y
+-CONFIG_IMX_SDMA=y
++CONFIG_IMX_SDMA=m
+ CONFIG_K3_DMA=y
+ CONFIG_MV_XOR=y
+ CONFIG_MV_XOR_V2=y
+diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
+index 55f19450091b2..1a5406e599bab 100644
+--- a/arch/arm64/crypto/Kconfig
++++ b/arch/arm64/crypto/Kconfig
+@@ -59,6 +59,7 @@ config CRYPTO_GHASH_ARM64_CE
+ select CRYPTO_HASH
+ select CRYPTO_GF128MUL
+ select CRYPTO_LIB_AES
++ select CRYPTO_AEAD
+
+ config CRYPTO_CRCT10DIF_ARM64_CE
+ tristate "CRCT10DIF digest algorithm using PMULL instructions"
+diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
+index 9c3d86e397bf3..1fae18ba11ed1 100644
+--- a/arch/arm64/crypto/poly1305-glue.c
++++ b/arch/arm64/crypto/poly1305-glue.c
+@@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ {
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+- poly1305_init_arch(dctx, src);
++ poly1305_init_arm64(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
+index 4ad22c3135dbb..5a0f792492af0 100644
+--- a/arch/arm64/include/asm/arch_gicv3.h
++++ b/arch/arm64/include/asm/arch_gicv3.h
+@@ -26,12 +26,6 @@
+ * sets the GP register's most significant bits to 0 with an explicit cast.
+ */
+
+-static inline void gic_write_eoir(u32 irq)
+-{
+- write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+- isb();
+-}
+-
+ static __always_inline void gic_write_dir(u32 irq)
+ {
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
+index bfa58409a4d4d..448a575db8e8e 100644
+--- a/arch/arm64/include/asm/assembler.h
++++ b/arch/arm64/include/asm/assembler.h
+@@ -107,6 +107,13 @@
+ hint #20
+ .endm
+
++/*
++ * Clear Branch History instruction
++ */
++ .macro clearbhb
++ hint #22
++ .endm
++
+ /*
+ * Speculation barrier
+ */
+@@ -830,4 +837,50 @@ alternative_endif
+
+ #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
+
++ .macro __mitigate_spectre_bhb_loop tmp
++#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++alternative_cb spectre_bhb_patch_loop_iter
++ mov \tmp, #32 // Patched to correct the immediate
++alternative_cb_end
++.Lspectre_bhb_loop\@:
++ b . + 4
++ subs \tmp, \tmp, #1
++ b.ne .Lspectre_bhb_loop\@
++ sb
++#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++ .endm
++
++ .macro mitigate_spectre_bhb_loop tmp
++#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++alternative_cb spectre_bhb_patch_loop_mitigation_enable
++ b .L_spectre_bhb_loop_done\@ // Patched to NOP
++alternative_cb_end
++ __mitigate_spectre_bhb_loop \tmp
++.L_spectre_bhb_loop_done\@:
++#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++ .endm
++
++ /* Save/restores x0-x3 to the stack */
++ .macro __mitigate_spectre_bhb_fw
++#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++ stp x0, x1, [sp, #-16]!
++ stp x2, x3, [sp, #-16]!
++ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3
++alternative_cb smccc_patch_fw_mitigation_conduit
++ nop // Patched to SMC/HVC #0
++alternative_cb_end
++ ldp x2, x3, [sp], #16
++ ldp x0, x1, [sp], #16
++#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++ .endm
++
++ .macro mitigate_spectre_bhb_clear_insn
++#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++alternative_cb spectre_bhb_patch_clearbhb
++ /* Patched to NOP when not supported */
++ clearbhb
++ isb
++alternative_cb_end
++#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++ .endm
+ #endif /* __ASM_ASSEMBLER_H */
+diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
+index 13869b76b58cd..abd302e521c06 100644
+--- a/arch/arm64/include/asm/atomic_ll_sc.h
++++ b/arch/arm64/include/asm/atomic_ll_sc.h
+@@ -12,19 +12,6 @@
+
+ #include <linux/stringify.h>
+
+-#ifdef CONFIG_ARM64_LSE_ATOMICS
+-#define __LL_SC_FALLBACK(asm_ops) \
+-" b 3f\n" \
+-" .subsection 1\n" \
+-"3:\n" \
+-asm_ops "\n" \
+-" b 4f\n" \
+-" .previous\n" \
+-"4:\n"
+-#else
+-#define __LL_SC_FALLBACK(asm_ops) asm_ops
+-#endif
+-
+ #ifndef CONFIG_CC_HAS_K_CONSTRAINT
+ #define K
+ #endif
+@@ -43,12 +30,11 @@ __ll_sc_atomic_##op(int i, atomic_t *v) \
+ int result; \
+ \
+ asm volatile("// atomic_" #op "\n" \
+- __LL_SC_FALLBACK( \
+-" prfm pstl1strm, %2\n" \
+-"1: ldxr %w0, %2\n" \
+-" " #asm_op " %w0, %w0, %w3\n" \
+-" stxr %w1, %w0, %2\n" \
+-" cbnz %w1, 1b\n") \
++ " prfm pstl1strm, %2\n" \
++ "1: ldxr %w0, %2\n" \
++ " " #asm_op " %w0, %w0, %w3\n" \
++ " stxr %w1, %w0, %2\n" \
++ " cbnz %w1, 1b\n" \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i)); \
+ }
+@@ -61,13 +47,12 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
+ int result; \
+ \
+ asm volatile("// atomic_" #op "_return" #name "\n" \
+- __LL_SC_FALLBACK( \
+-" prfm pstl1strm, %2\n" \
+-"1: ld" #acq "xr %w0, %2\n" \
+-" " #asm_op " %w0, %w0, %w3\n" \
+-" st" #rel "xr %w1, %w0, %2\n" \
+-" cbnz %w1, 1b\n" \
+-" " #mb ) \
++ " prfm pstl1strm, %2\n" \
++ "1: ld" #acq "xr %w0, %2\n" \
++ " " #asm_op " %w0, %w0, %w3\n" \
++ " st" #rel "xr %w1, %w0, %2\n" \
++ " cbnz %w1, 1b\n" \
++ " " #mb \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+@@ -83,13 +68,12 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \
+ int val, result; \
+ \
+ asm volatile("// atomic_fetch_" #op #name "\n" \
+- __LL_SC_FALLBACK( \
+-" prfm pstl1strm, %3\n" \
+-"1: ld" #acq "xr %w0, %3\n" \
+-" " #asm_op " %w1, %w0, %w4\n" \
+-" st" #rel "xr %w2, %w1, %3\n" \
+-" cbnz %w2, 1b\n" \
+-" " #mb ) \
++ " prfm pstl1strm, %3\n" \
++ "1: ld" #acq "xr %w0, %3\n" \
++ " " #asm_op " %w1, %w0, %w4\n" \
++ " st" #rel "xr %w2, %w1, %3\n" \
++ " cbnz %w2, 1b\n" \
++ " " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+@@ -142,12 +126,11 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v) \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_" #op "\n" \
+- __LL_SC_FALLBACK( \
+-" prfm pstl1strm, %2\n" \
+-"1: ldxr %0, %2\n" \
+-" " #asm_op " %0, %0, %3\n" \
+-" stxr %w1, %0, %2\n" \
+-" cbnz %w1, 1b") \
++ " prfm pstl1strm, %2\n" \
++ "1: ldxr %0, %2\n" \
++ " " #asm_op " %0, %0, %3\n" \
++ " stxr %w1, %0, %2\n" \
++ " cbnz %w1, 1b" \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i)); \
+ }
+@@ -160,13 +143,12 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_" #op "_return" #name "\n" \
+- __LL_SC_FALLBACK( \
+-" prfm pstl1strm, %2\n" \
+-"1: ld" #acq "xr %0, %2\n" \
+-" " #asm_op " %0, %0, %3\n" \
+-" st" #rel "xr %w1, %0, %2\n" \
+-" cbnz %w1, 1b\n" \
+-" " #mb ) \
++ " prfm pstl1strm, %2\n" \
++ "1: ld" #acq "xr %0, %2\n" \
++ " " #asm_op " %0, %0, %3\n" \
++ " st" #rel "xr %w1, %0, %2\n" \
++ " cbnz %w1, 1b\n" \
++ " " #mb \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+@@ -176,19 +158,18 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
+
+ #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
+ static inline long \
+-__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
++__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
+ { \
+ s64 result, val; \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_fetch_" #op #name "\n" \
+- __LL_SC_FALLBACK( \
+-" prfm pstl1strm, %3\n" \
+-"1: ld" #acq "xr %0, %3\n" \
+-" " #asm_op " %1, %0, %4\n" \
+-" st" #rel "xr %w2, %1, %3\n" \
+-" cbnz %w2, 1b\n" \
+-" " #mb ) \
++ " prfm pstl1strm, %3\n" \
++ "1: ld" #acq "xr %0, %3\n" \
++ " " #asm_op " %1, %0, %4\n" \
++ " st" #rel "xr %w2, %1, %3\n" \
++ " cbnz %w2, 1b\n" \
++ " " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+@@ -240,15 +221,14 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
+ unsigned long tmp;
+
+ asm volatile("// atomic64_dec_if_positive\n"
+- __LL_SC_FALLBACK(
+-" prfm pstl1strm, %2\n"
+-"1: ldxr %0, %2\n"
+-" subs %0, %0, #1\n"
+-" b.lt 2f\n"
+-" stlxr %w1, %0, %2\n"
+-" cbnz %w1, 1b\n"
+-" dmb ish\n"
+-"2:")
++ " prfm pstl1strm, %2\n"
++ "1: ldxr %0, %2\n"
++ " subs %0, %0, #1\n"
++ " b.lt 2f\n"
++ " stlxr %w1, %0, %2\n"
++ " cbnz %w1, 1b\n"
++ " dmb ish\n"
++ "2:"
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
+ :
+ : "cc", "memory");
+@@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
+ old = (u##sz)old; \
+ \
+ asm volatile( \
+- __LL_SC_FALLBACK( \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
+@@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
+ " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
+ " " #mb "\n" \
+- "2:") \
++ "2:" \
+ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
+ [v] "+Q" (*(u##sz *)ptr) \
+ : [old] __stringify(constraint) "r" (old), [new] "r" (new) \
+@@ -326,7 +305,6 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \
+ unsigned long tmp, ret; \
+ \
+ asm volatile("// __cmpxchg_double" #name "\n" \
+- __LL_SC_FALLBACK( \
+ " prfm pstl1strm, %2\n" \
+ "1: ldxp %0, %1, %2\n" \
+ " eor %0, %0, %3\n" \
+@@ -336,8 +314,8 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \
+ " st" #rel "xp %w0, %5, %6, %2\n" \
+ " cbnz %w0, 1b\n" \
+ " " #mb "\n" \
+- "2:") \
+- : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \
++ "2:" \
++ : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \
+ : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
+ : cl); \
+ \
+diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
+index da3280f639cd7..28e96118c1e5a 100644
+--- a/arch/arm64/include/asm/atomic_lse.h
++++ b/arch/arm64/include/asm/atomic_lse.h
+@@ -11,11 +11,11 @@
+ #define __ASM_ATOMIC_LSE_H
+
+ #define ATOMIC_OP(op, asm_op) \
+-static inline void __lse_atomic_##op(int i, atomic_t *v) \
++static inline void __lse_atomic_##op(int i, atomic_t *v) \
+ { \
+ asm volatile( \
+ __LSE_PREAMBLE \
+-" " #asm_op " %w[i], %[v]\n" \
++ " " #asm_op " %w[i], %[v]\n" \
+ : [i] "+r" (i), [v] "+Q" (v->counter) \
+ : "r" (v)); \
+ }
+@@ -32,7 +32,7 @@ static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \
+ { \
+ asm volatile( \
+ __LSE_PREAMBLE \
+-" " #asm_op #mb " %w[i], %w[i], %[v]" \
++ " " #asm_op #mb " %w[i], %w[i], %[v]" \
+ : [i] "+r" (i), [v] "+Q" (v->counter) \
+ : "r" (v) \
+ : cl); \
+@@ -130,7 +130,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \
+ " add %w[i], %w[i], %w[tmp]" \
+ : [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
+ : "r" (v) \
+- : cl); \
++ : cl); \
+ \
+ return i; \
+ }
+@@ -168,7 +168,7 @@ static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \
+ { \
+ asm volatile( \
+ __LSE_PREAMBLE \
+-" " #asm_op " %[i], %[v]\n" \
++ " " #asm_op " %[i], %[v]\n" \
+ : [i] "+r" (i), [v] "+Q" (v->counter) \
+ : "r" (v)); \
+ }
+@@ -185,7 +185,7 @@ static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
+ { \
+ asm volatile( \
+ __LSE_PREAMBLE \
+-" " #asm_op #mb " %[i], %[i], %[v]" \
++ " " #asm_op #mb " %[i], %[i], %[v]" \
+ : [i] "+r" (i), [v] "+Q" (v->counter) \
+ : "r" (v) \
+ : cl); \
+@@ -272,7 +272,7 @@ static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
+ }
+
+ #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
+-static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \
++static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\
+ { \
+ unsigned long tmp; \
+ \
+@@ -403,7 +403,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \
+ " eor %[old2], %[old2], %[oldval2]\n" \
+ " orr %[old1], %[old1], %[old2]" \
+ : [old1] "+&r" (x0), [old2] "+&r" (x1), \
+- [v] "+Q" (*(unsigned long *)ptr) \
++ [v] "+Q" (*(__uint128_t *)ptr) \
+ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
+ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
+ : cl); \
+diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
+index 451e11e5fd23b..1c5a005984582 100644
+--- a/arch/arm64/include/asm/barrier.h
++++ b/arch/arm64/include/asm/barrier.h
+@@ -23,7 +23,7 @@
+ #define dsb(opt) asm volatile("dsb " #opt : : : "memory")
+
+ #define psb_csync() asm volatile("hint #17" : : : "memory")
+-#define tsb_csync() asm volatile("hint #18" : : : "memory")
++#define __tsb_csync() asm volatile("hint #18" : : : "memory")
+ #define csdb() asm volatile("hint #20" : : : "memory")
+
+ #ifdef CONFIG_ARM64_PSEUDO_NMI
+@@ -46,6 +46,20 @@
+ #define dma_rmb() dmb(oshld)
+ #define dma_wmb() dmb(oshst)
+
++
++#define tsb_csync() \
++ do { \
++ /* \
++ * CPUs affected by Arm Erratum 2054223 or 2067961 needs \
++ * another TSB to ensure the trace is flushed. The barriers \
++ * don't have to be strictly back to back, as long as the \
++ * CPU is in trace prohibited state. \
++ */ \
++ if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \
++ __tsb_csync(); \
++ __tsb_csync(); \
++ } while (0)
++
+ /*
+ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
+ * and 0 otherwise.
+diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
+index 0f6d16faa5402..a58e366f0b074 100644
+--- a/arch/arm64/include/asm/cpu.h
++++ b/arch/arm64/include/asm/cpu.h
+@@ -51,6 +51,7 @@ struct cpuinfo_arm64 {
+ u64 reg_id_aa64dfr1;
+ u64 reg_id_aa64isar0;
+ u64 reg_id_aa64isar1;
++ u64 reg_id_aa64isar2;
+ u64 reg_id_aa64mmfr0;
+ u64 reg_id_aa64mmfr1;
+ u64 reg_id_aa64mmfr2;
+diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
+index ef6be92b1921a..a77b5f49b3a6c 100644
+--- a/arch/arm64/include/asm/cpufeature.h
++++ b/arch/arm64/include/asm/cpufeature.h
+@@ -637,6 +637,35 @@ static inline bool cpu_supports_mixed_endian_el0(void)
+ return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+ }
+
++
++static inline bool supports_csv2p3(int scope)
++{
++ u64 pfr0;
++ u8 csv2_val;
++
++ if (scope == SCOPE_LOCAL_CPU)
++ pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
++ else
++ pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
++
++ csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
++ ID_AA64PFR0_CSV2_SHIFT);
++ return csv2_val == 3;
++}
++
++static inline bool supports_clearbhb(int scope)
++{
++ u64 isar2;
++
++ if (scope == SCOPE_LOCAL_CPU)
++ isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
++ else
++ isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
++
++ return cpuid_feature_extract_unsigned_field(isar2,
++ ID_AA64ISAR2_CLEARBHB_SHIFT);
++}
++
+ const struct cpumask *system_32bit_el0_cpumask(void);
+ DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
+diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
+index 6231e1f0abe7e..9cf5d9551e991 100644
+--- a/arch/arm64/include/asm/cputype.h
++++ b/arch/arm64/include/asm/cputype.h
+@@ -41,7 +41,7 @@
+ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
+
+ #define MIDR_CPU_MODEL(imp, partnum) \
+- (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
++ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \
+ (0xf << MIDR_ARCHITECTURE_SHIFT) | \
+ ((partnum) << MIDR_PARTNUM_SHIFT))
+
+@@ -60,6 +60,7 @@
+ #define ARM_CPU_IMP_FUJITSU 0x46
+ #define ARM_CPU_IMP_HISI 0x48
+ #define ARM_CPU_IMP_APPLE 0x61
++#define ARM_CPU_IMP_AMPERE 0xC0
+
+ #define ARM_CPU_PART_AEM_V8 0xD0F
+ #define ARM_CPU_PART_FOUNDATION 0xD00
+@@ -73,6 +74,15 @@
+ #define ARM_CPU_PART_CORTEX_A76 0xD0B
+ #define ARM_CPU_PART_NEOVERSE_N1 0xD0C
+ #define ARM_CPU_PART_CORTEX_A77 0xD0D
++#define ARM_CPU_PART_NEOVERSE_V1 0xD40
++#define ARM_CPU_PART_CORTEX_A78 0xD41
++#define ARM_CPU_PART_CORTEX_A78AE 0xD42
++#define ARM_CPU_PART_CORTEX_X1 0xD44
++#define ARM_CPU_PART_CORTEX_A510 0xD46
++#define ARM_CPU_PART_CORTEX_A710 0xD47
++#define ARM_CPU_PART_CORTEX_X2 0xD48
++#define ARM_CPU_PART_NEOVERSE_N2 0xD49
++#define ARM_CPU_PART_CORTEX_A78C 0xD4B
+
+ #define APM_CPU_PART_POTENZA 0x000
+
+@@ -103,6 +113,8 @@
+ #define APPLE_CPU_PART_M1_ICESTORM 0x022
+ #define APPLE_CPU_PART_M1_FIRESTORM 0x023
+
++#define AMPERE_CPU_PART_AMPERE1 0xAC3
++
+ #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+ #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+ #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
+@@ -113,6 +125,15 @@
+ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
+ #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
+ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
++#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
++#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
++#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
++#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
++#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
++#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
++#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
++#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
++#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+ #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+ #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
+ #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
+@@ -133,6 +154,7 @@
+ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+ #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
+ #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
++#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+
+ /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
+ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
+diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
+index 657c921fd784a..8de1a840ad974 100644
+--- a/arch/arm64/include/asm/debug-monitors.h
++++ b/arch/arm64/include/asm/debug-monitors.h
+@@ -76,7 +76,7 @@ struct task_struct;
+
+ struct step_hook {
+ struct list_head node;
+- int (*fn)(struct pt_regs *regs, unsigned int esr);
++ int (*fn)(struct pt_regs *regs, unsigned long esr);
+ };
+
+ void register_user_step_hook(struct step_hook *hook);
+@@ -87,7 +87,7 @@ void unregister_kernel_step_hook(struct step_hook *hook);
+
+ struct break_hook {
+ struct list_head node;
+- int (*fn)(struct pt_regs *regs, unsigned int esr);
++ int (*fn)(struct pt_regs *regs, unsigned long esr);
+ u16 imm;
+ u16 mask; /* These bits are ignored when comparing with imm */
+ };
+@@ -116,6 +116,7 @@ void user_regs_reset_single_step(struct user_pt_regs *regs,
+ void kernel_enable_single_step(struct pt_regs *regs);
+ void kernel_disable_single_step(void);
+ int kernel_active_single_step(void);
++void kernel_rewind_single_step(struct pt_regs *regs);
+
+ #ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int reinstall_suspended_bps(struct pt_regs *regs);
+diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
+index d3e1825337be3..53cbbb96f7ebf 100644
+--- a/arch/arm64/include/asm/efi.h
++++ b/arch/arm64/include/asm/efi.h
+@@ -14,7 +14,6 @@
+
+ #ifdef CONFIG_EFI
+ extern void efi_init(void);
+-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+ #else
+ #define efi_init()
+ #endif
+@@ -26,6 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+ ({ \
+ efi_virtmap_load(); \
+ __efi_fpsimd_begin(); \
++ raw_spin_lock(&efi_rt_lock); \
+ })
+
+ #define arch_efi_call_virt(p, f, args...) \
+@@ -37,10 +37,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+
+ #define arch_efi_call_virt_teardown() \
+ ({ \
++ raw_spin_unlock(&efi_rt_lock); \
+ __efi_fpsimd_end(); \
+ efi_virtmap_unload(); \
+ })
+
++extern raw_spinlock_t efi_rt_lock;
+ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+
+ #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
+index 3198acb2aad8c..7f3c87f7a0cec 100644
+--- a/arch/arm64/include/asm/el2_setup.h
++++ b/arch/arm64/include/asm/el2_setup.h
+@@ -106,7 +106,7 @@
+ msr_s SYS_ICC_SRE_EL2, x0
+ isb // Make sure SRE is now set
+ mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
+- tbz x0, #0, 1f // and check that it sticks
++ tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks
+ msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
+ .Lskip_gicv3_\@:
+ .endm
+diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
+index 29f97eb3dad41..9f91c8906edd9 100644
+--- a/arch/arm64/include/asm/esr.h
++++ b/arch/arm64/include/asm/esr.h
+@@ -68,6 +68,7 @@
+ #define ESR_ELx_EC_MAX (0x3F)
+
+ #define ESR_ELx_EC_SHIFT (26)
++#define ESR_ELx_EC_WIDTH (6)
+ #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
+ #define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
+
+@@ -323,14 +324,14 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/types.h>
+
+-static inline bool esr_is_data_abort(u32 esr)
++static inline bool esr_is_data_abort(unsigned long esr)
+ {
+- const u32 ec = ESR_ELx_EC(esr);
++ const unsigned long ec = ESR_ELx_EC(esr);
+
+ return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
+ }
+
+-const char *esr_get_class_string(u32 esr);
++const char *esr_get_class_string(unsigned long esr);
+ #endif /* __ASSEMBLY */
+
+ #endif /* __ASM_ESR_H */
+diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
+index 339477dca5513..0e6535aa78c2f 100644
+--- a/arch/arm64/include/asm/exception.h
++++ b/arch/arm64/include/asm/exception.h
+@@ -19,9 +19,9 @@
+ #define __exception_irq_entry __kprobes
+ #endif
+
+-static inline u32 disr_to_esr(u64 disr)
++static inline unsigned long disr_to_esr(u64 disr)
+ {
+- unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
++ unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+
+ if ((disr & DISR_EL1_IDS) == 0)
+ esr |= (disr & DISR_EL1_ESR_MASK);
+@@ -57,23 +57,23 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+ void (*func)(struct pt_regs *));
+ asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
+
+-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
++void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
+ void do_undefinstr(struct pt_regs *regs);
+ void do_bti(struct pt_regs *regs);
+-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
++void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
+ struct pt_regs *regs);
+-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
+-void do_sve_acc(unsigned int esr, struct pt_regs *regs);
+-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
+-void do_sysinstr(unsigned int esr, struct pt_regs *regs);
+-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
+-void do_cp15instr(unsigned int esr, struct pt_regs *regs);
++void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
++void do_sve_acc(unsigned long esr, struct pt_regs *regs);
++void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
++void do_sysinstr(unsigned long esr, struct pt_regs *regs);
++void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
++void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
++void do_cp15instr(unsigned long esr, struct pt_regs *regs);
+ void do_el0_svc(struct pt_regs *regs);
+ void do_el0_svc_compat(struct pt_regs *regs);
+-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
+-void do_serror(struct pt_regs *regs, unsigned int esr);
++void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
++void do_serror(struct pt_regs *regs, unsigned long esr);
+ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
+
+-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
++void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
+ #endif /* __ASM_EXCEPTION_H */
+diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
+index b15eb4a3e6b20..840a35ed92ec8 100644
+--- a/arch/arm64/include/asm/extable.h
++++ b/arch/arm64/include/asm/extable.h
+@@ -22,15 +22,6 @@ struct exception_table_entry
+
+ #define ARCH_HAS_RELATIVE_EXTABLE
+
+-static inline bool in_bpf_jit(struct pt_regs *regs)
+-{
+- if (!IS_ENABLED(CONFIG_BPF_JIT))
+- return false;
+-
+- return regs->pc >= BPF_JIT_REGION_START &&
+- regs->pc < BPF_JIT_REGION_END;
+-}
+-
+ #ifdef CONFIG_BPF_JIT
+ int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs);
+diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
+index 4335800201c97..daff882883f92 100644
+--- a/arch/arm64/include/asm/fixmap.h
++++ b/arch/arm64/include/asm/fixmap.h
+@@ -62,9 +62,11 @@ enum fixed_addresses {
+ #endif /* CONFIG_ACPI_APEI_GHES */
+
+ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
++ FIX_ENTRY_TRAMP_TEXT3,
++ FIX_ENTRY_TRAMP_TEXT2,
++ FIX_ENTRY_TRAMP_TEXT1,
+ FIX_ENTRY_TRAMP_DATA,
+- FIX_ENTRY_TRAMP_TEXT,
+-#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
++#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
+ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+ __end_of_permanent_fixed_addresses,
+
+diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
+index 8c129db8232a6..f68fbb2074730 100644
+--- a/arch/arm64/include/asm/hwcap.h
++++ b/arch/arm64/include/asm/hwcap.h
+@@ -105,6 +105,9 @@
+ #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG)
+ #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI)
+ #define KERNEL_HWCAP_MTE __khwcap2_feature(MTE)
++#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV)
++#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
++#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
+
+ /*
+ * This yields a mask that user programs can use to figure out what
+diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
+index 6b776c8667b20..b02f0c328c8e4 100644
+--- a/arch/arm64/include/asm/insn.h
++++ b/arch/arm64/include/asm/insn.h
+@@ -65,6 +65,7 @@ enum aarch64_insn_hint_cr_op {
+ AARCH64_INSN_HINT_PSB = 0x11 << 5,
+ AARCH64_INSN_HINT_TSB = 0x12 << 5,
+ AARCH64_INSN_HINT_CSDB = 0x14 << 5,
++ AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5,
+
+ AARCH64_INSN_HINT_BTI = 0x20 << 5,
+ AARCH64_INSN_HINT_BTIC = 0x22 << 5,
+diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
+index 7fd836bea7eb4..3995652daf81a 100644
+--- a/arch/arm64/include/asm/io.h
++++ b/arch/arm64/include/asm/io.h
+@@ -192,4 +192,8 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
+ extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
++extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
++ unsigned long flags);
++#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
++
+ #endif /* __ASM_IO_H */
+diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
+index 96dc0f7da258d..a971d462f531c 100644
+--- a/arch/arm64/include/asm/kernel-pgtable.h
++++ b/arch/arm64/include/asm/kernel-pgtable.h
+@@ -103,8 +103,8 @@
+ /*
+ * Initial memory map attributes.
+ */
+-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
++#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
++#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN)
+
+ #if ARM64_KERNEL_USES_PMD_MAPS
+ #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
+index 327120c0089fe..f67a561e0935e 100644
+--- a/arch/arm64/include/asm/kvm_arm.h
++++ b/arch/arm64/include/asm/kvm_arm.h
+@@ -91,7 +91,7 @@
+ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+
+ /* TCR_EL2 Registers bits */
+-#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
++#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
+ #define TCR_EL2_TBI (1 << 20)
+ #define TCR_EL2_PS_SHIFT 16
+ #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
+@@ -276,7 +276,7 @@
+ #define CPTR_EL2_TFP_SHIFT 10
+
+ /* Hyp Coprocessor Trap Register */
+-#define CPTR_EL2_TCPAC (1 << 31)
++#define CPTR_EL2_TCPAC (1U << 31)
+ #define CPTR_EL2_TAM (1 << 30)
+ #define CPTR_EL2_TTA (1 << 20)
+ #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
+diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
+index fd418955e31e6..64f8a90d33277 100644
+--- a/arch/arm64/include/asm/kvm_emulate.h
++++ b/arch/arm64/include/asm/kvm_emulate.h
+@@ -366,8 +366,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
+
+ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+ {
+- if (kvm_vcpu_abt_iss1tw(vcpu))
+- return true;
++ if (kvm_vcpu_abt_iss1tw(vcpu)) {
++ /*
++ * Only a permission fault on a S1PTW should be
++ * considered as a write. Otherwise, page tables baked
++ * in a read-only memslot will result in an exception
++ * being delivered in the guest.
++ *
++ * The drawback is that we end-up faulting twice if the
++ * guest is using any of HW AF/DB: a translation fault
++ * to map the page containing the PT (read only at
++ * first), then a permission fault to allow the flags
++ * to be set.
++ */
++ switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
++ case ESR_ELx_FSC_PERM:
++ return true;
++ default:
++ return false;
++ }
++ }
+
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ return false;
+diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
+index f8be56d5342ba..1713630bf8f5a 100644
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -711,6 +711,11 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
+ ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
+ }
+
++static inline bool kvm_system_needs_idmapped_vectors(void)
++{
++ return cpus_have_const_cap(ARM64_SPECTRE_V3A);
++}
++
+ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
+
+ static inline void kvm_arch_hardware_unsetup(void) {}
+@@ -790,6 +795,10 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
+ #define kvm_vcpu_has_pmu(vcpu) \
+ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
+
++#define kvm_supports_32bit_el0() \
++ (system_supports_32bit_el0() && \
++ !static_branch_unlikely(&arm64_mismatched_32bit_el0))
++
+ int kvm_trng_call(struct kvm_vcpu *vcpu);
+ #ifdef CONFIG_KVM
+ extern phys_addr_t hyp_mem_base;
+diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
+index f1745a8434144..05886322c300c 100644
+--- a/arch/arm64/include/asm/memory.h
++++ b/arch/arm64/include/asm/memory.h
+@@ -44,11 +44,8 @@
+ #define _PAGE_OFFSET(va) (-(UL(1) << (va)))
+ #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
+ #define KIMAGE_VADDR (MODULES_END)
+-#define BPF_JIT_REGION_START (_PAGE_END(VA_BITS_MIN))
+-#define BPF_JIT_REGION_SIZE (SZ_128M)
+-#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+ #define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
+-#define MODULES_VADDR (BPF_JIT_REGION_END)
++#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
+ #define MODULES_VSIZE (SZ_128M)
+ #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
+ #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
+diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
+index a11ccadd47d29..094701ec5500b 100644
+--- a/arch/arm64/include/asm/module.lds.h
++++ b/arch/arm64/include/asm/module.lds.h
+@@ -1,8 +1,8 @@
+ SECTIONS {
+ #ifdef CONFIG_ARM64_MODULE_PLTS
+- .plt 0 (NOLOAD) : { BYTE(0) }
+- .init.plt 0 (NOLOAD) : { BYTE(0) }
+- .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
++ .plt 0 : { BYTE(0) }
++ .init.plt 0 : { BYTE(0) }
++ .text.ftrace_trampoline 0 : { BYTE(0) }
+ #endif
+
+ #ifdef CONFIG_KASAN_SW_TAGS
+diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
+index 22420e1f8c037..592aabb25b0e7 100644
+--- a/arch/arm64/include/asm/mte-kasan.h
++++ b/arch/arm64/include/asm/mte-kasan.h
+@@ -5,6 +5,7 @@
+ #ifndef __ASM_MTE_KASAN_H
+ #define __ASM_MTE_KASAN_H
+
++#include <asm/compiler.h>
+ #include <asm/mte-def.h>
+
+ #ifndef __ASSEMBLY__
+@@ -84,10 +85,12 @@ static inline void __dc_gzva(u64 p)
+ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
+ bool init)
+ {
+- u64 curr, mask, dczid_bs, end1, end2, end3;
++ u64 curr, mask, dczid, dczid_bs, dczid_dzp, end1, end2, end3;
+
+ /* Read DC G(Z)VA block size from the system register. */
+- dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf);
++ dczid = read_cpuid(DCZID_EL0);
++ dczid_bs = 4ul << (dczid & 0xf);
++ dczid_dzp = (dczid >> 4) & 1;
+
+ curr = (u64)__tag_set(addr, tag);
+ mask = dczid_bs - 1;
+@@ -106,7 +109,7 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
+ */
+ #define SET_MEMTAG_RANGE(stg_post, dc_gva) \
+ do { \
+- if (size >= 2 * dczid_bs) { \
++ if (!dczid_dzp && size >= 2 * dczid_bs) {\
+ do { \
+ curr = stg_post(curr); \
+ } while (curr < end1); \
+diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
+index 02511650cffe5..3e368ca66623b 100644
+--- a/arch/arm64/include/asm/mte.h
++++ b/arch/arm64/include/asm/mte.h
+@@ -40,7 +40,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte);
+ void mte_copy_page_tags(void *kto, const void *kfrom);
+ void mte_thread_init_user(void);
+ void mte_thread_switch(struct task_struct *next);
++void mte_cpu_setup(void);
+ void mte_suspend_enter(void);
++void mte_suspend_exit(void);
+ long set_mte_ctrl(struct task_struct *task, unsigned long arg);
+ long get_mte_ctrl(struct task_struct *task);
+ int mte_ptrace_copy_tags(struct task_struct *child, long request,
+@@ -69,6 +71,9 @@ static inline void mte_thread_switch(struct task_struct *next)
+ static inline void mte_suspend_enter(void)
+ {
+ }
++static inline void mte_suspend_exit(void)
++{
++}
+ static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
+ {
+ return 0;
+diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
+index f98c91bbd7c17..993a27ea6f543 100644
+--- a/arch/arm64/include/asm/page.h
++++ b/arch/arm64/include/asm/page.h
+@@ -41,7 +41,6 @@ void tag_clear_highpage(struct page *to);
+
+ typedef struct page *pgtable_t;
+
+-int pfn_valid(unsigned long pfn);
+ int pfn_is_map_memory(unsigned long pfn);
+
+ #include <asm/memory.h>
+diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
+index 8433a2058eb15..237224484d0f6 100644
+--- a/arch/arm64/include/asm/pgalloc.h
++++ b/arch/arm64/include/asm/pgalloc.h
+@@ -76,7 +76,7 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
+ static inline void
+ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+ {
+- VM_BUG_ON(mm != &init_mm);
++ VM_BUG_ON(mm && mm != &init_mm);
+ __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
+ }
+
+diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
+index 40085e53f573d..66671ff051835 100644
+--- a/arch/arm64/include/asm/pgtable-hwdef.h
++++ b/arch/arm64/include/asm/pgtable-hwdef.h
+@@ -273,6 +273,8 @@
+ #define TCR_NFD1 (UL(1) << 54)
+ #define TCR_E0PD0 (UL(1) << 55)
+ #define TCR_E0PD1 (UL(1) << 56)
++#define TCR_TCMA0 (UL(1) << 57)
++#define TCR_TCMA1 (UL(1) << 58)
+
+ /*
+ * TTBR.
+diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
+index 7032f04c8ac6e..b1e1b74d993c3 100644
+--- a/arch/arm64/include/asm/pgtable-prot.h
++++ b/arch/arm64/include/asm/pgtable-prot.h
+@@ -92,7 +92,7 @@ extern bool arm64_use_ng_mappings;
+ #define __P001 PAGE_READONLY
+ #define __P010 PAGE_READONLY
+ #define __P011 PAGE_READONLY
+-#define __P100 PAGE_EXECONLY
++#define __P100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */
+ #define __P101 PAGE_READONLY_EXEC
+ #define __P110 PAGE_READONLY_EXEC
+ #define __P111 PAGE_READONLY_EXEC
+@@ -101,7 +101,7 @@ extern bool arm64_use_ng_mappings;
+ #define __S001 PAGE_READONLY
+ #define __S010 PAGE_SHARED
+ #define __S011 PAGE_SHARED
+-#define __S100 PAGE_EXECONLY
++#define __S100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */
+ #define __S101 PAGE_READONLY_EXEC
+ #define __S110 PAGE_SHARED_EXEC
+ #define __S111 PAGE_SHARED_EXEC
+diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
+index dfa76afa0ccff..ed57717cd0040 100644
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -67,9 +67,15 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+ * page table entry, taking care of 52-bit addresses.
+ */
+ #ifdef CONFIG_ARM64_PA_BITS_52
+-#define __pte_to_phys(pte) \
+- ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36))
+-#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK)
++static inline phys_addr_t __pte_to_phys(pte_t pte)
++{
++ return (pte_val(pte) & PTE_ADDR_LOW) |
++ ((pte_val(pte) & PTE_ADDR_HIGH) << 36);
++}
++static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
++{
++ return (phys | (phys >> 36)) & PTE_ADDR_MASK;
++}
+ #else
+ #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
+ #define __phys_to_pte_val(phys) (phys)
+@@ -529,7 +535,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ PMD_TYPE_TABLE)
+ #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
+ PMD_TYPE_SECT)
+-#define pmd_leaf(pmd) pmd_sect(pmd)
++#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd))
+ #define pmd_bad(pmd) (!pmd_table(pmd))
+
+ #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
+@@ -619,7 +625,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+ #define pud_none(pud) (!pud_val(pud))
+ #define pud_bad(pud) (!pud_table(pud))
+ #define pud_present(pud) pte_present(pud_pte(pud))
+-#define pud_leaf(pud) pud_sect(pud)
++#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
+ #define pud_valid(pud) pte_valid(pud_pte(pud))
+
+ static inline void set_pud(pud_t *pudp, pud_t pud)
+@@ -1011,18 +1017,6 @@ static inline bool arch_wants_old_prefaulted_pte(void)
+ }
+ #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
+
+-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+-{
+- if (cpus_have_const_cap(ARM64_HAS_EPAN))
+- return prot;
+-
+- if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY))
+- return prot;
+-
+- return PAGE_READONLY_EXEC;
+-}
+-
+-
+ #endif /* !__ASSEMBLY__ */
+
+ #endif /* __ASM_PGTABLE_H */
+diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
+index ee2bdc1b9f5bb..7364530de0a77 100644
+--- a/arch/arm64/include/asm/processor.h
++++ b/arch/arm64/include/asm/processor.h
+@@ -204,8 +204,9 @@ void tls_preserve_current_state(void);
+
+ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
+ {
++ s32 previous_syscall = regs->syscallno;
+ memset(regs, 0, sizeof(*regs));
+- forget_syscall(regs);
++ regs->syscallno = previous_syscall;
+ regs->pc = pc;
+
+ if (system_uses_irq_prio_masking())
+@@ -239,13 +240,13 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
+ }
+ #endif
+
+-static inline bool is_ttbr0_addr(unsigned long addr)
++static __always_inline bool is_ttbr0_addr(unsigned long addr)
+ {
+ /* entry assembly clears tags for TTBR0 addrs */
+ return addr < TASK_SIZE;
+ }
+
+-static inline bool is_ttbr1_addr(unsigned long addr)
++static __always_inline bool is_ttbr1_addr(unsigned long addr)
+ {
+ /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+@@ -335,12 +336,10 @@ long get_tagged_addr_ctrl(struct task_struct *task);
+ * of header definitions for the use of task_stack_page.
+ */
+
+-#define current_top_of_stack() \
+-({ \
+- struct stack_info _info; \
+- BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \
+- _info.high; \
+-})
++/*
++ * The top of the current task's task stack
++ */
++#define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE)
+ #define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL))
+
+ #endif /* __ASSEMBLY__ */
+diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
+index 1bce62fa908a3..56f7b1d4d54b9 100644
+--- a/arch/arm64/include/asm/rwonce.h
++++ b/arch/arm64/include/asm/rwonce.h
+@@ -5,7 +5,7 @@
+ #ifndef __ASM_RWONCE_H
+ #define __ASM_RWONCE_H
+
+-#ifdef CONFIG_LTO
++#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__)
+
+ #include <linux/compiler_types.h>
+ #include <asm/alternative-macros.h>
+@@ -66,7 +66,7 @@
+ })
+
+ #endif /* !BUILD_VDSO */
+-#endif /* CONFIG_LTO */
++#endif /* CONFIG_LTO && !__ASSEMBLY__ */
+
+ #include <asm-generic/rwonce.h>
+
+diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
+index 8297bccf07845..5cd4d09bc69d7 100644
+--- a/arch/arm64/include/asm/scs.h
++++ b/arch/arm64/include/asm/scs.h
+@@ -9,15 +9,16 @@
+ #ifdef CONFIG_SHADOW_CALL_STACK
+ scs_sp .req x18
+
+- .macro scs_load tsk
+- ldr scs_sp, [\tsk, #TSK_TI_SCS_SP]
++ .macro scs_load_current
++ get_current_task scs_sp
++ ldr scs_sp, [scs_sp, #TSK_TI_SCS_SP]
+ .endm
+
+ .macro scs_save tsk
+ str scs_sp, [\tsk, #TSK_TI_SCS_SP]
+ .endm
+ #else
+- .macro scs_load tsk
++ .macro scs_load_current
+ .endm
+
+ .macro scs_save tsk
+diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
+index 7bea1d705dd64..6bb0258fb4aa1 100644
+--- a/arch/arm64/include/asm/sdei.h
++++ b/arch/arm64/include/asm/sdei.h
+@@ -17,6 +17,9 @@
+
+ #include <asm/virt.h>
+
++DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
++DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
++
+ extern unsigned long sdei_exit_mode;
+
+ /* Software Delegated Exception entry point from firmware*/
+@@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
+ unsigned long pc,
+ unsigned long pstate);
+
++/* Abort a running handler. Context is discarded. */
++void __sdei_handler_abort(void);
++
+ /*
+ * The above entry point does the minimum to call C code. This function does
+ * anything else, before calling the driver.
+diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
+index e4ad9db53af1d..552891e626e53 100644
+--- a/arch/arm64/include/asm/sections.h
++++ b/arch/arm64/include/asm/sections.h
+@@ -22,4 +22,9 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
+ extern char __mmuoff_data_start[], __mmuoff_data_end[];
+ extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
+
++static inline size_t entry_tramp_text_size(void)
++{
++ return __entry_tramp_text_end - __entry_tramp_text_start;
++}
++
+ #endif /* __ASM_SECTIONS_H */
+diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
+index f62ca39da6c5a..aa3d3607d5c8d 100644
+--- a/arch/arm64/include/asm/spectre.h
++++ b/arch/arm64/include/asm/spectre.h
+@@ -67,7 +67,8 @@ struct bp_hardening_data {
+
+ DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+-static inline void arm64_apply_bp_hardening(void)
++/* Called during entry so must be __always_inline */
++static __always_inline void arm64_apply_bp_hardening(void)
+ {
+ struct bp_hardening_data *d;
+
+@@ -93,5 +94,9 @@ void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
+
+ enum mitigation_state arm64_get_meltdown_state(void);
+
++enum mitigation_state arm64_get_spectre_bhb_state(void);
++bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
++u8 spectre_bhb_loop_affected(int scope);
++void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+ #endif /* __ASSEMBLY__ */
+ #endif /* __ASM_SPECTRE_H */
+diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
+index b383b4802a7bd..d30217c21eff7 100644
+--- a/arch/arm64/include/asm/syscall_wrapper.h
++++ b/arch/arm64/include/asm/syscall_wrapper.h
+@@ -8,7 +8,7 @@
+ #ifndef __ASM_SYSCALL_WRAPPER_H
+ #define __ASM_SYSCALL_WRAPPER_H
+
+-struct pt_regs;
++#include <asm/ptrace.h>
+
+ #define SC_ARM64_REGS_TO_ARGS(x, ...) \
+ __MAP(x,__SC_ARGS \
+diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
+index b268082d67edd..543eb08fa8e5f 100644
+--- a/arch/arm64/include/asm/sysreg.h
++++ b/arch/arm64/include/asm/sysreg.h
+@@ -109,8 +109,14 @@
+ #define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+
+ #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
++#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
++#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
+ #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
++#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
++#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
+ #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
++#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
++#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
+
+ /*
+ * System registers, organised loosely by encoding but grouped together
+@@ -180,6 +186,7 @@
+
+ #define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
+ #define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
++#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2)
+
+ #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
+ #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
+@@ -764,6 +771,21 @@
+ #define ID_AA64ISAR1_GPI_NI 0x0
+ #define ID_AA64ISAR1_GPI_IMP_DEF 0x1
+
++/* id_aa64isar2 */
++#define ID_AA64ISAR2_CLEARBHB_SHIFT 28
++#define ID_AA64ISAR2_RPRES_SHIFT 4
++#define ID_AA64ISAR2_WFXT_SHIFT 0
++
++#define ID_AA64ISAR2_RPRES_8BIT 0x0
++#define ID_AA64ISAR2_RPRES_12BIT 0x1
++/*
++ * Value 0x1 has been removed from the architecture, and is
++ * reserved, but has not yet been removed from the ARM ARM
++ * as of ARM DDI 0487G.b.
++ */
++#define ID_AA64ISAR2_WFXT_NI 0x0
++#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2
++
+ /* id_aa64pfr0 */
+ #define ID_AA64PFR0_CSV3_SHIFT 60
+ #define ID_AA64PFR0_CSV2_SHIFT 56
+@@ -881,6 +903,8 @@
+ #endif
+
+ /* id_aa64mmfr1 */
++#define ID_AA64MMFR1_ECBHB_SHIFT 60
++#define ID_AA64MMFR1_AFP_SHIFT 44
+ #define ID_AA64MMFR1_ETS_SHIFT 36
+ #define ID_AA64MMFR1_TWED_SHIFT 32
+ #define ID_AA64MMFR1_XNX_SHIFT 28
+@@ -1076,10 +1100,6 @@
+ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */
+ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+
+-/* TCR EL1 Bit Definitions */
+-#define SYS_TCR_EL1_TCMA1 (BIT(58))
+-#define SYS_TCR_EL1_TCMA0 (BIT(57))
+-
+ /* GCR_EL1 Definitions */
+ #define SYS_GCR_EL1_RRND (BIT(16))
+ #define SYS_GCR_EL1_EXCL_MASK 0xffffUL
+diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
+index 305a7157c6a6a..0eb7709422e29 100644
+--- a/arch/arm64/include/asm/system_misc.h
++++ b/arch/arm64/include/asm/system_misc.h
+@@ -23,9 +23,9 @@ void die(const char *msg, struct pt_regs *regs, int err);
+ struct siginfo;
+ void arm64_notify_die(const char *str, struct pt_regs *regs,
+ int signo, int sicode, unsigned long far,
+- int err);
++ unsigned long err);
+
+-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
++void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long,
+ struct pt_regs *),
+ int sig, int code, const char *name);
+
+diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
+index 54f32a0675dff..6e5826470bea6 100644
+--- a/arch/arm64/include/asm/traps.h
++++ b/arch/arm64/include/asm/traps.h
+@@ -24,7 +24,7 @@ struct undef_hook {
+
+ void register_undef_hook(struct undef_hook *hook);
+ void unregister_undef_hook(struct undef_hook *hook);
+-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err);
++void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
+ void arm64_notify_segfault(unsigned long addr);
+ void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+ void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
+@@ -57,7 +57,7 @@ static inline int in_entry_text(unsigned long ptr)
+ * errors share the same encoding as an all-zeros encoding from a CPU that
+ * doesn't support RAS.
+ */
+-static inline bool arm64_is_ras_serror(u32 esr)
++static inline bool arm64_is_ras_serror(unsigned long esr)
+ {
+ WARN_ON(preemptible());
+
+@@ -77,9 +77,9 @@ static inline bool arm64_is_ras_serror(u32 esr)
+ * We treat them as Uncontainable.
+ * Non-RAS SError's are reported as Uncontained/Uncategorized.
+ */
+-static inline u32 arm64_ras_serror_get_severity(u32 esr)
++static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr)
+ {
+- u32 aet = esr & ESR_ELx_AET;
++ unsigned long aet = esr & ESR_ELx_AET;
+
+ if (!arm64_is_ras_serror(esr)) {
+ /* Not a RAS error, we can't interpret the ESR. */
+@@ -98,6 +98,6 @@ static inline u32 arm64_ras_serror_get_severity(u32 esr)
+ return aet;
+ }
+
+-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr);
+-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);
++bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr);
++void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr);
+ #endif
+diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
+index 190b494e22ab9..0fd6056ba412b 100644
+--- a/arch/arm64/include/asm/uaccess.h
++++ b/arch/arm64/include/asm/uaccess.h
+@@ -292,12 +292,22 @@ do { \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+ } while (0)
+
++/*
++ * We must not call into the scheduler between uaccess_ttbr0_enable() and
++ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
++ * we must evaluate these outside of the critical section.
++ */
+ #define __raw_get_user(x, ptr, err) \
+ do { \
++ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
++ __typeof__(x) __rgu_val; \
+ __chk_user_ptr(ptr); \
++ \
+ uaccess_ttbr0_enable(); \
+- __raw_get_mem("ldtr", x, ptr, err); \
++ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \
+ uaccess_ttbr0_disable(); \
++ \
++ (x) = __rgu_val; \
+ } while (0)
+
+ #define __get_user_error(x, ptr, err) \
+@@ -321,14 +331,22 @@ do { \
+
+ #define get_user __get_user
+
++/*
++ * We must not call into the scheduler between __uaccess_enable_tco_async() and
++ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
++ * functions, we must evaluate these outside of the critical section.
++ */
+ #define __get_kernel_nofault(dst, src, type, err_label) \
+ do { \
++ __typeof__(dst) __gkn_dst = (dst); \
++ __typeof__(src) __gkn_src = (src); \
+ int __gkn_err = 0; \
+ \
+ __uaccess_enable_tco_async(); \
+- __raw_get_mem("ldr", *((type *)(dst)), \
+- (__force type *)(src), __gkn_err); \
++ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
++ (__force type *)(__gkn_src), __gkn_err); \
+ __uaccess_disable_tco_async(); \
++ \
+ if (unlikely(__gkn_err)) \
+ goto err_label; \
+ } while (0)
+@@ -367,11 +385,19 @@ do { \
+ } \
+ } while (0)
+
++/*
++ * We must not call into the scheduler between uaccess_ttbr0_enable() and
++ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
++ * we must evaluate these outside of the critical section.
++ */
+ #define __raw_put_user(x, ptr, err) \
+ do { \
+- __chk_user_ptr(ptr); \
++ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
++ __typeof__(*(ptr)) __rpu_val = (x); \
++ __chk_user_ptr(__rpu_ptr); \
++ \
+ uaccess_ttbr0_enable(); \
+- __raw_put_mem("sttr", x, ptr, err); \
++ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \
+ uaccess_ttbr0_disable(); \
+ } while (0)
+
+@@ -396,14 +422,22 @@ do { \
+
+ #define put_user __put_user
+
++/*
++ * We must not call into the scheduler between __uaccess_enable_tco_async() and
++ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
++ * functions, we must evaluate these outside of the critical section.
++ */
+ #define __put_kernel_nofault(dst, src, type, err_label) \
+ do { \
++ __typeof__(dst) __pkn_dst = (dst); \
++ __typeof__(src) __pkn_src = (src); \
+ int __pkn_err = 0; \
+ \
+ __uaccess_enable_tco_async(); \
+- __raw_put_mem("str", *((type *)(src)), \
+- (__force type *)(dst), __pkn_err); \
++ __raw_put_mem("str", *((type *)(__pkn_src)), \
++ (__force type *)(__pkn_dst), __pkn_err); \
+ __uaccess_disable_tco_async(); \
++ \
+ if (unlikely(__pkn_err)) \
+ goto err_label; \
+ } while(0)
+diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
+new file mode 100644
+index 0000000000000..bc9a2145f4194
+--- /dev/null
++++ b/arch/arm64/include/asm/vectors.h
+@@ -0,0 +1,73 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (C) 2022 ARM Ltd.
++ */
++#ifndef __ASM_VECTORS_H
++#define __ASM_VECTORS_H
++
++#include <linux/bug.h>
++#include <linux/percpu.h>
++
++#include <asm/fixmap.h>
++
++extern char vectors[];
++extern char tramp_vectors[];
++extern char __bp_harden_el1_vectors[];
++
++/*
++ * Note: the order of this enum corresponds to two arrays in entry.S:
++ * tramp_vecs and __bp_harden_el1_vectors. By default the canonical
++ * 'full fat' vectors are used directly.
++ */
++enum arm64_bp_harden_el1_vectors {
++#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++ /*
++ * Perform the BHB loop mitigation, before branching to the canonical
++ * vectors.
++ */
++ EL1_VECTOR_BHB_LOOP,
++
++ /*
++ * Make the SMC call for firmware mitigation, before branching to the
++ * canonical vectors.
++ */
++ EL1_VECTOR_BHB_FW,
++
++ /*
++ * Use the ClearBHB instruction, before branching to the canonical
++ * vectors.
++ */
++ EL1_VECTOR_BHB_CLEAR_INSN,
++#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++
++ /*
++ * Remap the kernel before branching to the canonical vectors.
++ */
++ EL1_VECTOR_KPTI,
++};
++
++#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++#define EL1_VECTOR_BHB_LOOP -1
++#define EL1_VECTOR_BHB_FW -1
++#define EL1_VECTOR_BHB_CLEAR_INSN -1
++#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++
++/* The vectors to use on return from EL0. e.g. to remap the kernel */
++DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
++
++#ifndef CONFIG_UNMAP_KERNEL_AT_EL0
++#define TRAMP_VALIAS 0ul
++#endif
++
++static inline const char *
++arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
++{
++ if (arm64_kernel_unmapped_at_el0())
++ return (char *)(TRAMP_VALIAS + SZ_2K * slot);
++
++ WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
++
++ return __bp_harden_el1_vectors + SZ_2K * slot;
++}
++
++#endif /* __ASM_VECTORS_H */
+diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
+index b8f41aa234ee1..f03731847d9df 100644
+--- a/arch/arm64/include/uapi/asm/hwcap.h
++++ b/arch/arm64/include/uapi/asm/hwcap.h
+@@ -75,5 +75,8 @@
+ #define HWCAP2_RNG (1 << 16)
+ #define HWCAP2_BTI (1 << 17)
+ #define HWCAP2_MTE (1 << 18)
++#define HWCAP2_ECV (1 << 19)
++#define HWCAP2_AFP (1 << 20)
++#define HWCAP2_RPRES (1 << 21)
+
+ #endif /* _UAPI__ASM_HWCAP_H */
+diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
+index b3edde68bc3e0..323e251ed37bc 100644
+--- a/arch/arm64/include/uapi/asm/kvm.h
++++ b/arch/arm64/include/uapi/asm/kvm.h
+@@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags {
+ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
+ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
+
++#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3)
++#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0
++#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1
++#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2
++
+ /* SVE registers */
+ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)
+
+diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
+index 3f1490bfb938a..749e31475e413 100644
+--- a/arch/arm64/kernel/Makefile
++++ b/arch/arm64/kernel/Makefile
+@@ -74,6 +74,10 @@ obj-$(CONFIG_ARM64_MTE) += mte.o
+ obj-y += vdso-wrap.o
+ obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
+
++# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
++$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
++$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
++
+ obj-y += probes/
+ head-y := head.o
+ extra-y += $(head-y) vmlinux.lds
+diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
+index 3fb79b76e9d96..7bbf5104b7b7b 100644
+--- a/arch/arm64/kernel/alternative.c
++++ b/arch/arm64/kernel/alternative.c
+@@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature)
+ /*
+ * Check if the target PC is within an alternative block.
+ */
+-static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
++static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
+ {
+ unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt);
+ return !(pc >= replptr && pc <= (replptr + alt->alt_len));
+@@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
+
+ #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
+
+-static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
++static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
+ {
+ u32 insn;
+
+@@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
+ return insn;
+ }
+
+-static void patch_alternative(struct alt_instr *alt,
++static noinstr void patch_alternative(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+ {
+ __le32 *replptr;
+diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
+index 0e86e8b9ceddf..c5da9d1e954a0 100644
+--- a/arch/arm64/kernel/armv8_deprecated.c
++++ b/arch/arm64/kernel/armv8_deprecated.c
+@@ -59,6 +59,7 @@ struct insn_emulation {
+ static LIST_HEAD(insn_emulation);
+ static int nr_insn_emulated __initdata;
+ static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
++static DEFINE_MUTEX(insn_emulation_mutex);
+
+ static void register_emulation_hooks(struct insn_emulation_ops *ops)
+ {
+@@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
+ loff_t *ppos)
+ {
+ int ret = 0;
+- struct insn_emulation *insn = (struct insn_emulation *) table->data;
++ struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode);
+ enum insn_emulation_mode prev_mode = insn->current_mode;
+
+- table->data = &insn->current_mode;
++ mutex_lock(&insn_emulation_mutex);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (ret || !write || prev_mode == insn->current_mode)
+@@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
+ update_insn_emulation_mode(insn, INSN_UNDEF);
+ }
+ ret:
+- table->data = insn;
++ mutex_unlock(&insn_emulation_mutex);
+ return ret;
+ }
+
+@@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void)
+ sysctl->maxlen = sizeof(int);
+
+ sysctl->procname = insn->ops->name;
+- sysctl->data = insn;
++ sysctl->data = &insn->current_mode;
+ sysctl->extra1 = &insn->min;
+ sysctl->extra2 = &insn->max;
+ sysctl->proc_handler = emulation_proc_handler;
+diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
+index 587543c6c51cb..97c42be71338a 100644
+--- a/arch/arm64/kernel/cacheinfo.c
++++ b/arch/arm64/kernel/cacheinfo.c
+@@ -45,7 +45,8 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
+
+ int init_cache_level(unsigned int cpu)
+ {
+- unsigned int ctype, level, leaves, fw_level;
++ unsigned int ctype, level, leaves;
++ int fw_level;
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+
+ for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
+@@ -63,6 +64,9 @@ int init_cache_level(unsigned int cpu)
+ else
+ fw_level = acpi_find_last_cache_level(cpu);
+
++ if (fw_level < 0)
++ return fw_level;
++
+ if (level < fw_level) {
+ /*
+ * some external caches not specified in CLIDR_EL1
+diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
+index e2c20c036442f..bf69a20bc27f9 100644
+--- a/arch/arm64/kernel/cpu_errata.c
++++ b/arch/arm64/kernel/cpu_errata.c
+@@ -209,6 +209,21 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
+ {
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+ },
++ {
++ /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */
++ ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
++ },
++#endif
++#ifdef CONFIG_ARM64_ERRATUM_2441007
++ {
++ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
++ },
++#endif
++#ifdef CONFIG_ARM64_ERRATUM_2441009
++ {
++ /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */
++ ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
++ },
+ #endif
+ {},
+ };
+@@ -340,6 +355,50 @@ static const struct midr_range erratum_1463225[] = {
+ };
+ #endif
+
++#ifdef CONFIG_ARM64_ERRATUM_1742098
++static struct midr_range broken_aarch32_aes[] = {
++ MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
++ {},
++};
++#endif
++
++#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
++static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
++#ifdef CONFIG_ARM64_ERRATUM_2139208
++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
++#endif
++#ifdef CONFIG_ARM64_ERRATUM_2119858
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
++#endif
++ {},
++};
++#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */
++
++#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE
++static const struct midr_range tsb_flush_fail_cpus[] = {
++#ifdef CONFIG_ARM64_ERRATUM_2067961
++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
++#endif
++#ifdef CONFIG_ARM64_ERRATUM_2054223
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
++#endif
++ {},
++};
++#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */
++
++#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
++static struct midr_range trbe_write_out_of_range_cpus[] = {
++#ifdef CONFIG_ARM64_ERRATUM_2253138
++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
++#endif
++#ifdef CONFIG_ARM64_ERRATUM_2224489
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
++#endif
++ {},
++};
++#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
++
+ const struct arm64_cpu_capabilities arm64_errata[] = {
+ #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
+ {
+@@ -425,7 +484,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
+ #endif
+ #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
+ {
+- .desc = "Qualcomm erratum 1009, or ARM erratum 1286807",
++ .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009",
+ .capability = ARM64_WORKAROUND_REPEAT_TLBI,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = cpucap_multi_entry_cap_matches,
+@@ -464,6 +523,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
+ .matches = has_spectre_v4,
+ .cpu_enable = spectre_v4_enable_mitigation,
+ },
++ {
++ .desc = "Spectre-BHB",
++ .capability = ARM64_SPECTRE_BHB,
++ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
++ .matches = is_spectre_bhb_affected,
++ .cpu_enable = spectre_bhb_enable_mitigation,
++ },
+ #ifdef CONFIG_ARM64_ERRATUM_1418040
+ {
+ .desc = "ARM erratum 1418040",
+@@ -534,6 +600,52 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
+ },
+ #endif
++
++#ifdef CONFIG_ARM64_ERRATUM_2457168
++ {
++ .desc = "ARM erratum 2457168",
++ .capability = ARM64_WORKAROUND_2457168,
++ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
++ /* Cortex-A510 r0p0-r1p1 */
++ CAP_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1)
++ },
++#endif
++#ifdef CONFIG_ARM64_ERRATUM_1742098
++ {
++ .desc = "ARM erratum 1742098",
++ .capability = ARM64_WORKAROUND_1742098,
++ CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
++ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
++ },
++#endif
++#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
++ {
++ /*
++ * The erratum work around is handled within the TRBE
++ * driver and can be applied per-cpu. So, we can allow
++ * a late CPU to come online with this erratum.
++ */
++ .desc = "ARM erratum 2119858 or 2139208",
++ .capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
++ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
++ CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus),
++ },
++#endif
++#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE
++ {
++ .desc = "ARM erratum 2067961 or 2054223",
++ .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE,
++ ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus),
++ },
++#endif
++#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
++ {
++ .desc = "ARM erratum 2253138 or 2224489",
++ .capability = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE,
++ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
++ CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
++ },
++#endif
+ {
+ }
+ };
+diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
+index 6ec7036ef7e18..d4ee345ff4296 100644
+--- a/arch/arm64/kernel/cpufeature.c
++++ b/arch/arm64/kernel/cpufeature.c
+@@ -73,10 +73,13 @@
+ #include <linux/mm.h>
+ #include <linux/cpu.h>
+ #include <linux/kasan.h>
++#include <linux/percpu.h>
++
+ #include <asm/cpu.h>
+ #include <asm/cpufeature.h>
+ #include <asm/cpu_ops.h>
+ #include <asm/fpsimd.h>
++#include <asm/hwcap.h>
+ #include <asm/insn.h>
+ #include <asm/kvm_host.h>
+ #include <asm/mmu_context.h>
+@@ -85,6 +88,7 @@
+ #include <asm/smp.h>
+ #include <asm/sysreg.h>
+ #include <asm/traps.h>
++#include <asm/vectors.h>
+ #include <asm/virt.h>
+
+ /* Kernel representation of AT_HWCAP and AT_HWCAP2 */
+@@ -110,6 +114,8 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
+ bool arm64_use_ng_mappings = false;
+ EXPORT_SYMBOL(arm64_use_ng_mappings);
+
++DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
++
+ /*
+ * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
+ * support it?
+@@ -225,6 +231,12 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_END,
+ };
+
++static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
++ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0),
++ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
++ ARM64_FTR_END,
++};
++
+ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
+@@ -279,7 +291,7 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
+ };
+
+ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
++ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0),
+ /*
+@@ -325,6 +337,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+ };
+
+ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
++ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0),
+@@ -524,7 +537,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = {
+
+ static const struct arm64_ftr_bits ftr_id_dfr0[] = {
+ /* [31:28] TraceFilt */
+- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf),
++ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0),
+@@ -573,15 +586,19 @@ static const struct arm64_ftr_bits ftr_raz[] = {
+ ARM64_FTR_END,
+ };
+
+-#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \
++#define __ARM64_FTR_REG_OVERRIDE(id_str, id, table, ovr) { \
+ .sys_id = id, \
+ .reg = &(struct arm64_ftr_reg){ \
+- .name = #id, \
++ .name = id_str, \
+ .override = (ovr), \
+ .ftr_bits = &((table)[0]), \
+ }}
+
+-#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override)
++#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) \
++ __ARM64_FTR_REG_OVERRIDE(#id, id, table, ovr)
++
++#define ARM64_FTR_REG(id, table) \
++ __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
+
+ struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
+ struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
+@@ -633,6 +650,7 @@ static const struct __ftr_reg_entry {
+ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
+ &id_aa64isar1_override),
++ ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
+
+ /* Op1 = 0, CRn = 0, CRm = 7 */
+ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+@@ -929,6 +947,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
+ init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
++ init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
+@@ -1147,6 +1166,8 @@ void update_cpu_features(int cpu,
+ info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
+ info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
++ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu,
++ info->reg_id_aa64isar2, boot->reg_id_aa64isar2);
+
+ /*
+ * Differing PARange support is fine as long as all peripherals and
+@@ -1268,6 +1289,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
+ read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
++ read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
+
+ read_sysreg_case(SYS_CNTFRQ_EL0);
+ read_sysreg_case(SYS_CTR_EL0);
+@@ -1575,6 +1597,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
+
+ int cpu = smp_processor_id();
+
++ if (__this_cpu_read(this_cpu_vector) == vectors) {
++ const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
++
++ __this_cpu_write(this_cpu_vector, v);
++ }
++
+ /*
+ * We don't need to rewrite the page-tables if either we've done
+ * it already or we have KASLR enabled and therefore have not
+@@ -1709,7 +1737,10 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
+ pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
+ smp_processor_id());
+ cpumask_set_cpu(smp_processor_id(), &amu_cpus);
+- update_freq_counters_refs();
++
++ /* 0 reference values signal broken/disabled counters */
++ if (!this_cpu_has_cap(ARM64_WORKAROUND_2457168))
++ update_freq_counters_refs();
+ }
+ }
+
+@@ -1872,7 +1903,8 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
+ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+ {
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
+- isb();
++
++ mte_cpu_setup();
+
+ /*
+ * Clear the tags in the zero page. This needs to be done via the
+@@ -1885,6 +1917,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+ }
+ #endif /* CONFIG_ARM64_MTE */
+
++static void elf_hwcap_fixup(void)
++{
++#ifdef CONFIG_ARM64_ERRATUM_1742098
++ if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
++ compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
++#endif /* ARM64_ERRATUM_1742098 */
++}
++
+ #ifdef CONFIG_KVM
+ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
+ {
+@@ -2451,6 +2491,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
+ #ifdef CONFIG_ARM64_MTE
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+ #endif /* CONFIG_ARM64_MTE */
++ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
++ HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
++ HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+ {},
+ };
+
+@@ -2909,8 +2952,10 @@ void __init setup_cpu_features(void)
+ setup_system_capabilities();
+ setup_elf_hwcaps(arm64_elf_hwcaps);
+
+- if (system_supports_32bit_el0())
++ if (system_supports_32bit_el0()) {
+ setup_elf_hwcaps(compat_elf_hwcaps);
++ elf_hwcap_fixup();
++ }
+
+ if (system_uses_ttbr0_pan())
+ pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+@@ -2962,6 +3007,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
+ cpu_active_mask);
+ get_cpu_device(lucky_winner)->offline_disabled = true;
+ setup_elf_hwcaps(compat_elf_hwcaps);
++ elf_hwcap_fixup();
+ pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
+ cpu, lucky_winner);
+ return 0;
+diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
+index 03991eeff6430..3006f43248084 100644
+--- a/arch/arm64/kernel/cpuidle.c
++++ b/arch/arm64/kernel/cpuidle.c
+@@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
+ struct acpi_lpi_state *lpi;
+ struct acpi_processor *pr = per_cpu(processors, cpu);
+
++ if (unlikely(!pr || !pr->flags.has_lpi))
++ return -EINVAL;
++
+ /*
+ * If the PSCI cpu_suspend function hook has not been initialized
+ * idle states must not be enabled, so bail out
+@@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
+ if (!psci_ops.cpu_suspend)
+ return -EOPNOTSUPP;
+
+- if (unlikely(!pr || !pr->flags.has_lpi))
+- return -EINVAL;
+-
+ count = pr->power.count - 1;
+ if (count <= 0)
+ return -ENODEV;
+diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
+index 87731fea5e418..591c18a889a56 100644
+--- a/arch/arm64/kernel/cpuinfo.c
++++ b/arch/arm64/kernel/cpuinfo.c
+@@ -94,6 +94,9 @@ static const char *const hwcap_str[] = {
+ [KERNEL_HWCAP_RNG] = "rng",
+ [KERNEL_HWCAP_BTI] = "bti",
+ [KERNEL_HWCAP_MTE] = "mte",
++ [KERNEL_HWCAP_ECV] = "ecv",
++ [KERNEL_HWCAP_AFP] = "afp",
++ [KERNEL_HWCAP_RPRES] = "rpres",
+ };
+
+ #ifdef CONFIG_COMPAT
+@@ -390,6 +393,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
+ info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
+ info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
+ info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
++ info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
+ info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+ info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
+diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
+index 4f3661eeb7ec6..732f0890416de 100644
+--- a/arch/arm64/kernel/debug-monitors.c
++++ b/arch/arm64/kernel/debug-monitors.c
+@@ -202,7 +202,7 @@ void unregister_kernel_step_hook(struct step_hook *hook)
+ * So we call all the registered handlers, until the right handler is
+ * found which returns zero.
+ */
+-static int call_step_hook(struct pt_regs *regs, unsigned int esr)
++static int call_step_hook(struct pt_regs *regs, unsigned long esr)
+ {
+ struct step_hook *hook;
+ struct list_head *list;
+@@ -238,7 +238,7 @@ static void send_user_sigtrap(int si_code)
+ "User debug trap");
+ }
+
+-static int single_step_handler(unsigned long unused, unsigned int esr,
++static int single_step_handler(unsigned long unused, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ bool handler_found = false;
+@@ -299,11 +299,11 @@ void unregister_kernel_break_hook(struct break_hook *hook)
+ unregister_debug_hook(&hook->node);
+ }
+
+-static int call_break_hook(struct pt_regs *regs, unsigned int esr)
++static int call_break_hook(struct pt_regs *regs, unsigned long esr)
+ {
+ struct break_hook *hook;
+ struct list_head *list;
+- int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
++ int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL;
+
+ list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
+
+@@ -312,7 +312,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
+ * entirely not preemptible, and we can use rcu list safely here.
+ */
+ list_for_each_entry_rcu(hook, list, node) {
+- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
++ unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+
+ if ((comment & ~hook->mask) == hook->imm)
+ fn = hook->fn;
+@@ -322,7 +322,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
+ }
+ NOKPROBE_SYMBOL(call_break_hook);
+
+-static int brk_handler(unsigned long unused, unsigned int esr,
++static int brk_handler(unsigned long unused, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+@@ -438,6 +438,11 @@ int kernel_active_single_step(void)
+ }
+ NOKPROBE_SYMBOL(kernel_active_single_step);
+
++void kernel_rewind_single_step(struct pt_regs *regs)
++{
++ set_regs_spsr_ss(regs);
++}
++
+ /* ptrace API */
+ void user_enable_single_step(struct task_struct *task)
+ {
+diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
+index 75691a2641c1c..2d3c4b02393e4 100644
+--- a/arch/arm64/kernel/efi-rt-wrapper.S
++++ b/arch/arm64/kernel/efi-rt-wrapper.S
+@@ -4,6 +4,7 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/assembler.h>
+
+ SYM_FUNC_START(__efi_rt_asm_wrapper)
+ stp x29, x30, [sp, #-32]!
+@@ -16,6 +17,12 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
+ */
+ stp x1, x18, [sp, #16]
+
++ ldr_l x16, efi_rt_stack_top
++ mov sp, x16
++#ifdef CONFIG_SHADOW_CALL_STACK
++ str x18, [sp, #-16]!
++#endif
++
+ /*
+ * We are lucky enough that no EFI runtime services take more than
+ * 5 arguments, so all are passed in registers rather than via the
+@@ -29,6 +36,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
+ mov x4, x6
+ blr x8
+
++ mov sp, x29
+ ldp x1, x2, [sp, #16]
+ cmp x2, x18
+ ldp x29, x30, [sp], #32
+@@ -42,6 +50,10 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+- mov x18, x2
++#ifdef CONFIG_SHADOW_CALL_STACK
++ ldr_l x18, efi_rt_stack_top
++ ldr x18, [x18, #-16]
++#endif
++
+ b efi_handle_corrupted_x18 // tail call
+ SYM_FUNC_END(__efi_rt_asm_wrapper)
+diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
+index e1be6c429810d..9669f3fa2aefe 100644
+--- a/arch/arm64/kernel/efi.c
++++ b/arch/arm64/kernel/efi.c
+@@ -12,6 +12,14 @@
+
+ #include <asm/efi.h>
+
++static bool region_is_misaligned(const efi_memory_desc_t *md)
++{
++ if (PAGE_SIZE == EFI_PAGE_SIZE)
++ return false;
++ return !PAGE_ALIGNED(md->phys_addr) ||
++ !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT);
++}
++
+ /*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+@@ -25,14 +33,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return PROT_DEVICE_nGnRE;
+
+- if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr),
+- "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?"))
++ if (region_is_misaligned(md)) {
++ static bool __initdata code_is_misaligned;
++
+ /*
+- * If the region is not aligned to the page size of the OS, we
+- * can not use strict permissions, since that would also affect
+- * the mapping attributes of the adjacent regions.
++ * Regions that are not aligned to the OS page size cannot be
++ * mapped with strict permissions, as those might interfere
++ * with the permissions that are needed by the adjacent
++ * region's mapping. However, if we haven't encountered any
++ * misaligned runtime code regions so far, we can safely use
++ * non-executable permissions for non-code regions.
+ */
+- return pgprot_val(PAGE_KERNEL_EXEC);
++ code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE);
++
++ return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC)
++ : pgprot_val(PAGE_KERNEL);
++ }
+
+ /* R-- */
+ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
+@@ -63,19 +79,16 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
+ bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
+ md->type == EFI_RUNTIME_SERVICES_DATA);
+
+- if (!PAGE_ALIGNED(md->phys_addr) ||
+- !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) {
+- /*
+- * If the end address of this region is not aligned to page
+- * size, the mapping is rounded up, and may end up sharing a
+- * page frame with the next UEFI memory region. If we create
+- * a block entry now, we may need to split it again when mapping
+- * the next region, and support for that is going to be removed
+- * from the MMU routines. So avoid block mappings altogether in
+- * that case.
+- */
++ /*
++ * If this region is not aligned to the page size used by the OS, the
++ * mapping will be rounded outwards, and may end up sharing a page
++ * frame with an adjacent runtime memory region. Given that the page
++ * table descriptor covering the shared page will be rewritten when the
++ * adjacent region gets mapped, we must avoid block mappings here so we
++ * don't have to worry about splitting them when that happens.
++ */
++ if (region_is_misaligned(md))
+ page_mappings_only = true;
+- }
+
+ create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
+ md->num_pages << EFI_PAGE_SHIFT,
+@@ -102,6 +115,9 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm,
+ BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
+ md->type != EFI_RUNTIME_SERVICES_DATA);
+
++ if (region_is_misaligned(md))
++ return 0;
++
+ /*
+ * Calling apply_to_page_range() is only safe on regions that are
+ * guaranteed to be mapped down to pages. Since we are only called
+@@ -128,3 +144,30 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
+ pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f);
+ return s;
+ }
++
++DEFINE_RAW_SPINLOCK(efi_rt_lock);
++
++asmlinkage u64 *efi_rt_stack_top __ro_after_init;
++
++/* EFI requires 8 KiB of stack space for runtime services */
++static_assert(THREAD_SIZE >= SZ_8K);
++
++static int __init arm64_efi_rt_init(void)
++{
++ void *p;
++
++ if (!efi_enabled(EFI_RUNTIME_SERVICES))
++ return 0;
++
++ p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
++ NUMA_NO_NODE, &&l);
++l: if (!p) {
++ pr_warn("Failed to allocate EFI runtime stack\n");
++ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
++ return -ENOMEM;
++ }
++
++ efi_rt_stack_top = p + THREAD_SIZE;
++ return 0;
++}
++core_initcall(arm64_efi_rt_init);
+diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
+index 32f9796c4ffe7..fc91dad1579ab 100644
+--- a/arch/arm64/kernel/entry-common.c
++++ b/arch/arm64/kernel/entry-common.c
+@@ -72,7 +72,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
+ if (interrupts_enabled(regs)) {
+ if (regs->exit_rcu) {
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ rcu_irq_exit();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ return;
+@@ -117,7 +117,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
+ static __always_inline void __exit_to_user_mode(void)
+ {
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ user_enter_irqoff();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ }
+@@ -175,7 +175,7 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
+ ftrace_nmi_exit();
+ if (restore) {
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ }
+
+ rcu_nmi_exit();
+@@ -211,7 +211,7 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+
+ if (restore) {
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ }
+
+ rcu_nmi_exit();
+@@ -273,13 +273,13 @@ extern void (*handle_arch_irq)(struct pt_regs *);
+ extern void (*handle_arch_fiq)(struct pt_regs *);
+
+ static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
+- unsigned int esr)
++ unsigned long esr)
+ {
+ arm64_enter_nmi(regs);
+
+ console_verbose();
+
+- pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n",
++ pr_crit("Unhandled %s exception on CPU%d, ESR 0x%016lx -- %s\n",
+ vector, smp_processor_id(), esr,
+ esr_get_class_string(esr));
+
+@@ -320,7 +320,8 @@ static void cortex_a76_erratum_1463225_svc_handler(void)
+ __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
+ }
+
+-static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
++static __always_inline bool
++cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+ {
+ if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
+ return false;
+@@ -795,7 +796,7 @@ UNHANDLED(el0t, 32, error)
+ #ifdef CONFIG_VMAP_STACK
+ asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
+ {
+- unsigned int esr = read_sysreg(esr_el1);
++ unsigned long esr = read_sysreg(esr_el1);
+ unsigned long far = read_sysreg(far_el1);
+
+ arm64_enter_nmi(regs);
+diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
+index b3e4f9a088b1a..8cf970d219f5d 100644
+--- a/arch/arm64/kernel/entry-ftrace.S
++++ b/arch/arm64/kernel/entry-ftrace.S
+@@ -77,11 +77,17 @@
+ .endm
+
+ SYM_CODE_START(ftrace_regs_caller)
++#ifdef BTI_C
++ BTI_C
++#endif
+ ftrace_regs_entry 1
+ b ftrace_common
+ SYM_CODE_END(ftrace_regs_caller)
+
+ SYM_CODE_START(ftrace_caller)
++#ifdef BTI_C
++ BTI_C
++#endif
+ ftrace_regs_entry 0
+ b ftrace_common
+ SYM_CODE_END(ftrace_caller)
+diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
+index bc6d5a970a131..bdc5f744249bb 100644
+--- a/arch/arm64/kernel/entry.S
++++ b/arch/arm64/kernel/entry.S
+@@ -37,18 +37,21 @@
+
+ .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
+ .align 7
+-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
++.Lventry_start\@:
+ .if \el == 0
+-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
++ /*
++ * This must be the first instruction of the EL0 vector entries. It is
++ * skipped by the trampoline vectors, to trigger the cleanup.
++ */
++ b .Lskip_tramp_vectors_cleanup\@
+ .if \regsize == 64
+ mrs x30, tpidrro_el0
+ msr tpidrro_el0, xzr
+ .else
+ mov x30, xzr
+ .endif
+-alternative_else_nop_endif
++.Lskip_tramp_vectors_cleanup\@:
+ .endif
+-#endif
+
+ sub sp, sp, #PT_REGS_SIZE
+ #ifdef CONFIG_VMAP_STACK
+@@ -95,11 +98,15 @@ alternative_else_nop_endif
+ mrs x0, tpidrro_el0
+ #endif
+ b el\el\ht\()_\regsize\()_\label
++.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
+ .endm
+
+- .macro tramp_alias, dst, sym
++ .macro tramp_alias, dst, sym, tmp
+ mov_q \dst, TRAMP_VALIAS
+- add \dst, \dst, #(\sym - .entry.tramp.text)
++ adr_l \tmp, \sym
++ add \dst, \dst, \tmp
++ adr_l \tmp, .entry.tramp.text
++ sub \dst, \dst, \tmp
+ .endm
+
+ /*
+@@ -116,7 +123,7 @@ alternative_cb_end
+ tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+ mov w1, #\state
+-alternative_cb spectre_v4_patch_fw_mitigation_conduit
++alternative_cb smccc_patch_fw_mitigation_conduit
+ nop // Patched to SMC/HVC #0
+ alternative_cb_end
+ .L__asm_ssbd_skip\@:
+@@ -265,7 +272,7 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
+ alternative_else_nop_endif
+ 1:
+
+- scs_load tsk
++ scs_load_current
+ .else
+ add x21, sp, #PT_REGS_SIZE
+ get_current_task tsk
+@@ -413,21 +420,26 @@ alternative_else_nop_endif
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x28, x29, [sp, #16 * 14]
+- ldr lr, [sp, #S_LR]
+- add sp, sp, #PT_REGS_SIZE // restore sp
+
+ .if \el == 0
+-alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
++alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
++ ldr lr, [sp, #S_LR]
++ add sp, sp, #PT_REGS_SIZE // restore sp
++ eret
++alternative_else_nop_endif
+ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ bne 4f
+- msr far_el1, x30
+- tramp_alias x30, tramp_exit_native
++ msr far_el1, x29
++ tramp_alias x30, tramp_exit_native, x29
+ br x30
+ 4:
+- tramp_alias x30, tramp_exit_compat
++ tramp_alias x30, tramp_exit_compat, x29
+ br x30
+ #endif
+ .else
++ ldr lr, [sp, #S_LR]
++ add sp, sp, #PT_REGS_SIZE // restore sp
++
+ /* Ensure any device/NC reads complete */
+ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
+
+@@ -594,12 +606,6 @@ SYM_CODE_END(ret_to_user)
+
+ .popsection // .entry.text
+
+-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+-/*
+- * Exception vectors trampoline.
+- */
+- .pushsection ".entry.tramp.text", "ax"
+-
+ // Move from tramp_pg_dir to swapper_pg_dir
+ .macro tramp_map_kernel, tmp
+ mrs \tmp, ttbr1_el1
+@@ -633,12 +639,47 @@ alternative_else_nop_endif
+ */
+ .endm
+
+- .macro tramp_ventry, regsize = 64
++ .macro tramp_data_page dst
++ adr_l \dst, .entry.tramp.text
++ sub \dst, \dst, PAGE_SIZE
++ .endm
++
++ .macro tramp_data_read_var dst, var
++#ifdef CONFIG_RANDOMIZE_BASE
++ tramp_data_page \dst
++ add \dst, \dst, #:lo12:__entry_tramp_data_\var
++ ldr \dst, [\dst]
++#else
++ ldr \dst, =\var
++#endif
++ .endm
++
++#define BHB_MITIGATION_NONE 0
++#define BHB_MITIGATION_LOOP 1
++#define BHB_MITIGATION_FW 2
++#define BHB_MITIGATION_INSN 3
++
++ .macro tramp_ventry, vector_start, regsize, kpti, bhb
+ .align 7
+ 1:
+ .if \regsize == 64
+ msr tpidrro_el0, x30 // Restored in kernel_ventry
+ .endif
++
++ .if \bhb == BHB_MITIGATION_LOOP
++ /*
++ * This sequence must appear before the first indirect branch. i.e. the
++ * ret out of tramp_ventry. It appears here because x30 is free.
++ */
++ __mitigate_spectre_bhb_loop x30
++ .endif // \bhb == BHB_MITIGATION_LOOP
++
++ .if \bhb == BHB_MITIGATION_INSN
++ clearbhb
++ isb
++ .endif // \bhb == BHB_MITIGATION_INSN
++
++ .if \kpti == 1
+ /*
+ * Defend against branch aliasing attacks by pushing a dummy
+ * entry onto the return stack and using a RET instruction to
+@@ -648,46 +689,75 @@ alternative_else_nop_endif
+ b .
+ 2:
+ tramp_map_kernel x30
+-#ifdef CONFIG_RANDOMIZE_BASE
+- adr x30, tramp_vectors + PAGE_SIZE
+ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
+- ldr x30, [x30]
+-#else
+- ldr x30, =vectors
+-#endif
++ tramp_data_read_var x30, vectors
+ alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
+- prfm plil1strm, [x30, #(1b - tramp_vectors)]
++ prfm plil1strm, [x30, #(1b - \vector_start)]
+ alternative_else_nop_endif
++
+ msr vbar_el1, x30
+- add x30, x30, #(1b - tramp_vectors)
+ isb
++ .else
++ ldr x30, =vectors
++ .endif // \kpti == 1
++
++ .if \bhb == BHB_MITIGATION_FW
++ /*
++ * The firmware sequence must appear before the first indirect branch.
++ * i.e. the ret out of tramp_ventry. But it also needs the stack to be
++ * mapped to save/restore the registers the SMC clobbers.
++ */
++ __mitigate_spectre_bhb_fw
++ .endif // \bhb == BHB_MITIGATION_FW
++
++ add x30, x30, #(1b - \vector_start + 4)
+ ret
++.org 1b + 128 // Did we overflow the ventry slot?
+ .endm
+
+ .macro tramp_exit, regsize = 64
+- adr x30, tramp_vectors
++ tramp_data_read_var x30, this_cpu_vector
++ get_this_cpu_offset x29
++ ldr x30, [x30, x29]
++
+ msr vbar_el1, x30
+- tramp_unmap_kernel x30
++ ldr lr, [sp, #S_LR]
++ tramp_unmap_kernel x29
+ .if \regsize == 64
+- mrs x30, far_el1
++ mrs x29, far_el1
+ .endif
++ add sp, sp, #PT_REGS_SIZE // restore sp
+ eret
+ sb
+ .endm
+
+- .align 11
+-SYM_CODE_START_NOALIGN(tramp_vectors)
++ .macro generate_tramp_vector, kpti, bhb
++.Lvector_start\@:
+ .space 0x400
+
+- tramp_ventry
+- tramp_ventry
+- tramp_ventry
+- tramp_ventry
++ .rept 4
++ tramp_ventry .Lvector_start\@, 64, \kpti, \bhb
++ .endr
++ .rept 4
++ tramp_ventry .Lvector_start\@, 32, \kpti, \bhb
++ .endr
++ .endm
+
+- tramp_ventry 32
+- tramp_ventry 32
+- tramp_ventry 32
+- tramp_ventry 32
++#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
++/*
++ * Exception vectors trampoline.
++ * The order must match __bp_harden_el1_vectors and the
++ * arm64_bp_harden_el1_vectors enum.
++ */
++ .pushsection ".entry.tramp.text", "ax"
++ .align 11
++SYM_CODE_START_NOALIGN(tramp_vectors)
++#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP
++ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW
++ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN
++#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE
+ SYM_CODE_END(tramp_vectors)
+
+ SYM_CODE_START(tramp_exit_native)
+@@ -704,12 +774,56 @@ SYM_CODE_END(tramp_exit_compat)
+ .pushsection ".rodata", "a"
+ .align PAGE_SHIFT
+ SYM_DATA_START(__entry_tramp_data_start)
++__entry_tramp_data_vectors:
+ .quad vectors
++#ifdef CONFIG_ARM_SDE_INTERFACE
++__entry_tramp_data___sdei_asm_handler:
++ .quad __sdei_asm_handler
++#endif /* CONFIG_ARM_SDE_INTERFACE */
++__entry_tramp_data_this_cpu_vector:
++ .quad this_cpu_vector
+ SYM_DATA_END(__entry_tramp_data_start)
+ .popsection // .rodata
+ #endif /* CONFIG_RANDOMIZE_BASE */
+ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
++/*
++ * Exception vectors for spectre mitigations on entry from EL1 when
++ * kpti is not in use.
++ */
++ .macro generate_el1_vector, bhb
++.Lvector_start\@:
++ kernel_ventry 1, t, 64, sync // Synchronous EL1t
++ kernel_ventry 1, t, 64, irq // IRQ EL1t
++ kernel_ventry 1, t, 64, fiq // FIQ EL1h
++ kernel_ventry 1, t, 64, error // Error EL1t
++
++ kernel_ventry 1, h, 64, sync // Synchronous EL1h
++ kernel_ventry 1, h, 64, irq // IRQ EL1h
++ kernel_ventry 1, h, 64, fiq // FIQ EL1h
++ kernel_ventry 1, h, 64, error // Error EL1h
++
++ .rept 4
++ tramp_ventry .Lvector_start\@, 64, 0, \bhb
++ .endr
++ .rept 4
++ tramp_ventry .Lvector_start\@, 32, 0, \bhb
++ .endr
++ .endm
++
++/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */
++ .pushsection ".entry.text", "ax"
++ .align 11
++SYM_CODE_START(__bp_harden_el1_vectors)
++#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
++ generate_el1_vector bhb=BHB_MITIGATION_LOOP
++ generate_el1_vector bhb=BHB_MITIGATION_FW
++ generate_el1_vector bhb=BHB_MITIGATION_INSN
++#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
++SYM_CODE_END(__bp_harden_el1_vectors)
++ .popsection
++
++
+ /*
+ * Register switch for AArch64. The callee-saved registers need to be saved
+ * and restored. On entry:
+@@ -741,7 +855,7 @@ SYM_FUNC_START(cpu_switch_to)
+ msr sp_el0, x1
+ ptrauth_keys_install_kernel x1, x8, x9, x10
+ scs_save x0
+- scs_load x1
++ scs_load_current
+ ret
+ SYM_FUNC_END(cpu_switch_to)
+ NOKPROBE(cpu_switch_to)
+@@ -769,19 +883,19 @@ NOKPROBE(ret_from_fork)
+ */
+ SYM_FUNC_START(call_on_irq_stack)
+ #ifdef CONFIG_SHADOW_CALL_STACK
+- stp scs_sp, xzr, [sp, #-16]!
++ get_current_task x16
++ scs_save x16
+ ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17
+ #endif
++
+ /* Create a frame record to save our LR and SP (implicit in FP) */
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ ldr_this_cpu x16, irq_stack_ptr, x17
+- mov x15, #IRQ_STACK_SIZE
+- add x16, x16, x15
+
+ /* Move to the new stack and call the function there */
+- mov sp, x16
++ add sp, x16, #IRQ_STACK_SIZE
+ blr x1
+
+ /*
+@@ -790,9 +904,7 @@ SYM_FUNC_START(call_on_irq_stack)
+ */
+ mov sp, x29
+ ldp x29, x30, [sp], #16
+-#ifdef CONFIG_SHADOW_CALL_STACK
+- ldp scs_sp, xzr, [sp], #16
+-#endif
++ scs_load_current
+ ret
+ SYM_FUNC_END(call_on_irq_stack)
+ NOKPROBE(call_on_irq_stack)
+@@ -835,14 +947,7 @@ SYM_CODE_START(__sdei_asm_entry_trampoline)
+ * Remember whether to unmap the kernel on exit.
+ */
+ 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
+-
+-#ifdef CONFIG_RANDOMIZE_BASE
+- adr x4, tramp_vectors + PAGE_SIZE
+- add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
+- ldr x4, [x4]
+-#else
+- ldr x4, =__sdei_asm_handler
+-#endif
++ tramp_data_read_var x4, __sdei_asm_handler
+ br x4
+ SYM_CODE_END(__sdei_asm_entry_trampoline)
+ NOKPROBE(__sdei_asm_entry_trampoline)
+@@ -865,13 +970,6 @@ SYM_CODE_END(__sdei_asm_exit_trampoline)
+ NOKPROBE(__sdei_asm_exit_trampoline)
+ .ltorg
+ .popsection // .entry.tramp.text
+-#ifdef CONFIG_RANDOMIZE_BASE
+-.pushsection ".rodata", "a"
+-SYM_DATA_START(__sdei_asm_trampoline_next_handler)
+- .quad __sdei_asm_handler
+-SYM_DATA_END(__sdei_asm_trampoline_next_handler)
+-.popsection // .rodata
+-#endif /* CONFIG_RANDOMIZE_BASE */
+ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
+ /*
+@@ -907,9 +1005,13 @@ SYM_CODE_START(__sdei_asm_handler)
+
+ mov x19, x1
+
+-#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
++ /* Store the registered-event for crash_smp_send_stop() */
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+-#endif
++ cbnz w4, 1f
++ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
++ b 2f
++1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
++2: str x19, [x5]
+
+ #ifdef CONFIG_VMAP_STACK
+ /*
+@@ -974,14 +1076,33 @@ SYM_CODE_START(__sdei_asm_handler)
+
+ ldr_l x2, sdei_exit_mode
+
++ /* Clear the registered-event seen by crash_smp_send_stop() */
++ ldrb w3, [x4, #SDEI_EVENT_PRIORITY]
++ cbnz w3, 1f
++ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
++ b 2f
++1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
++2: str xzr, [x5]
++
+ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
+ sdei_handler_exit exit_mode=x2
+ alternative_else_nop_endif
+
+ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+- tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline
++ tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3
+ br x5
+ #endif
+ SYM_CODE_END(__sdei_asm_handler)
+ NOKPROBE(__sdei_asm_handler)
++
++SYM_CODE_START(__sdei_handler_abort)
++ mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
++ adr x1, 1f
++ ldr_l x2, sdei_exit_mode
++ sdei_handler_exit exit_mode=x2
++ // exit the handler and jump to the next instruction.
++ // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx.
++1: ret
++SYM_CODE_END(__sdei_handler_abort)
++NOKPROBE(__sdei_handler_abort)
+ #endif /* CONFIG_ARM_SDE_INTERFACE */
+diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
+index ff4962750b3d0..7a3fcf21b18a7 100644
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -930,7 +930,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
+ * would have disabled the SVE access trap for userspace during
+ * ret_to_user, making an SVE access trap impossible in that case.
+ */
+-void do_sve_acc(unsigned int esr, struct pt_regs *regs)
++void do_sve_acc(unsigned long esr, struct pt_regs *regs)
+ {
+ /* Even if we chose not to use SVE, the hardware could still trap: */
+ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
+@@ -972,7 +972,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+ /*
+ * Trapped FP/ASIMD access.
+ */
+-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
++void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
+ {
+ /* TODO: implement lazy context saving/restoring */
+ WARN_ON(1);
+@@ -981,7 +981,7 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+ /*
+ * Raise a SIGFPE for the current process.
+ */
+-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
++void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
+ {
+ unsigned int si_code = FPE_FLTUNK;
+
+diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
+index 7f467bd9db7a3..dba774f3b8d7c 100644
+--- a/arch/arm64/kernel/ftrace.c
++++ b/arch/arm64/kernel/ftrace.c
+@@ -78,47 +78,76 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+ }
+
+ /*
+- * Turn on the call to ftrace_caller() in instrumented function
++ * Find the address the callsite must branch to in order to reach '*addr'.
++ *
++ * Due to the limited range of 'BL' instructions, modules may be placed too far
++ * away to branch directly and must use a PLT.
++ *
++ * Returns true when '*addr' contains a reachable target address, or has been
++ * modified to contain a PLT address. Returns false otherwise.
+ */
+-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
++static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
++ struct module *mod,
++ unsigned long *addr)
+ {
+ unsigned long pc = rec->ip;
+- u32 old, new;
+- long offset = (long)pc - (long)addr;
++ long offset = (long)*addr - (long)pc;
++ struct plt_entry *plt;
+
+- if (offset < -SZ_128M || offset >= SZ_128M) {
+- struct module *mod;
+- struct plt_entry *plt;
++ /*
++ * When the target is within range of the 'BL' instruction, use 'addr'
++ * as-is and branch to that directly.
++ */
++ if (offset >= -SZ_128M && offset < SZ_128M)
++ return true;
+
+- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+- return -EINVAL;
++ /*
++ * When the target is outside of the range of a 'BL' instruction, we
++ * must use a PLT to reach it. We can only place PLTs for modules, and
++ * only when module PLT support is built-in.
++ */
++ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
++ return false;
+
+- /*
+- * On kernels that support module PLTs, the offset between the
+- * branch instruction and its target may legally exceed the
+- * range of an ordinary relative 'bl' opcode. In this case, we
+- * need to branch via a trampoline in the module.
+- *
+- * NOTE: __module_text_address() must be called with preemption
+- * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+- * retains its validity throughout the remainder of this code.
+- */
++ /*
++ * 'mod' is only set at module load time, but if we end up
++ * dealing with an out-of-range condition, we can assume it
++ * is due to a module being loaded far away from the kernel.
++ *
++ * NOTE: __module_text_address() must be called with preemption
++ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
++ * retains its validity throughout the remainder of this code.
++ */
++ if (!mod) {
+ preempt_disable();
+ mod = __module_text_address(pc);
+ preempt_enable();
++ }
+
+- if (WARN_ON(!mod))
+- return -EINVAL;
+-
+- plt = get_ftrace_plt(mod, addr);
+- if (!plt) {
+- pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
+- return -EINVAL;
+- }
++ if (WARN_ON(!mod))
++ return false;
+
+- addr = (unsigned long)plt;
++ plt = get_ftrace_plt(mod, *addr);
++ if (!plt) {
++ pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
++ return false;
+ }
+
++ *addr = (unsigned long)plt;
++ return true;
++}
++
++/*
++ * Turn on the call to ftrace_caller() in instrumented function
++ */
++int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
++{
++ unsigned long pc = rec->ip;
++ u32 old, new;
++
++ if (!ftrace_find_callable_addr(rec, NULL, &addr))
++ return -EINVAL;
++
+ old = aarch64_insn_gen_nop();
+ new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+
+@@ -132,6 +161,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
++ if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
++ return -EINVAL;
++ if (!ftrace_find_callable_addr(rec, NULL, &addr))
++ return -EINVAL;
++
+ old = aarch64_insn_gen_branch_imm(pc, old_addr,
+ AARCH64_INSN_BRANCH_LINK);
+ new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+@@ -181,54 +215,30 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+ {
+ unsigned long pc = rec->ip;
+- bool validate = true;
+ u32 old = 0, new;
+- long offset = (long)pc - (long)addr;
+-
+- if (offset < -SZ_128M || offset >= SZ_128M) {
+- u32 replaced;
+-
+- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+- return -EINVAL;
+
+- /*
+- * 'mod' is only set at module load time, but if we end up
+- * dealing with an out-of-range condition, we can assume it
+- * is due to a module being loaded far away from the kernel.
+- */
+- if (!mod) {
+- preempt_disable();
+- mod = __module_text_address(pc);
+- preempt_enable();
+-
+- if (WARN_ON(!mod))
+- return -EINVAL;
+- }
+-
+- /*
+- * The instruction we are about to patch may be a branch and
+- * link instruction that was redirected via a PLT entry. In
+- * this case, the normal validation will fail, but we can at
+- * least check that we are dealing with a branch and link
+- * instruction that points into the right module.
+- */
+- if (aarch64_insn_read((void *)pc, &replaced))
+- return -EFAULT;
+-
+- if (!aarch64_insn_is_bl(replaced) ||
+- !within_module(pc + aarch64_get_branch_offset(replaced),
+- mod))
+- return -EINVAL;
++ new = aarch64_insn_gen_nop();
+
+- validate = false;
+- } else {
+- old = aarch64_insn_gen_branch_imm(pc, addr,
+- AARCH64_INSN_BRANCH_LINK);
++ /*
++ * When using mcount, callsites in modules may have been initalized to
++ * call an arbitrary module PLT (which redirects to the _mcount stub)
++ * rather than the ftrace PLT we'll use at runtime (which redirects to
++ * the ftrace trampoline). We can ignore the old PLT when initializing
++ * the callsite.
++ *
++ * Note: 'mod' is only set at module load time.
++ */
++ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
++ IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) {
++ return aarch64_insn_patch_text_nosync((void *)pc, new);
+ }
+
+- new = aarch64_insn_gen_nop();
++ if (!ftrace_find_callable_addr(rec, mod, &addr))
++ return -EINVAL;
+
+- return ftrace_modify_code(pc, old, new, validate);
++ old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
++
++ return ftrace_modify_code(pc, old, new, true);
+ }
+
+ void arch_ftrace_update_code(int command)
+diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
+index 17962452e31de..512a921edad59 100644
+--- a/arch/arm64/kernel/head.S
++++ b/arch/arm64/kernel/head.S
+@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
+ subs x1, x1, #64
+ b.ne 1b
+
+- mov x7, SWAPPER_MM_MMUFLAGS
++ mov_q x7, SWAPPER_MM_MMUFLAGS
+
+ /*
+ * Create the identity mapping.
+@@ -409,7 +409,7 @@ SYM_FUNC_END(__create_page_tables)
+ stp xzr, xzr, [sp, #S_STACKFRAME]
+ add x29, sp, #S_STACKFRAME
+
+- scs_load \tsk
++ scs_load_current
+
+ adr_l \tmp1, __per_cpu_offset
+ ldr w\tmp2, [\tsk, #TSK_CPU]
+diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
+index 712e97c03e54c..2a7f21314cde6 100644
+--- a/arch/arm64/kernel/hw_breakpoint.c
++++ b/arch/arm64/kernel/hw_breakpoint.c
+@@ -617,7 +617,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
+ /*
+ * Debug exception handlers.
+ */
+-static int breakpoint_handler(unsigned long unused, unsigned int esr,
++static int breakpoint_handler(unsigned long unused, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ int i, step = 0, *kernel_step;
+@@ -751,7 +751,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr,
+ return step;
+ }
+
+-static int watchpoint_handler(unsigned long addr, unsigned int esr,
++static int watchpoint_handler(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ int i, step = 0, *kernel_step, access, closest_match = 0;
+diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
+index c96a9a0043bf4..e03e60f9482b4 100644
+--- a/arch/arm64/kernel/image-vars.h
++++ b/arch/arm64/kernel/image-vars.h
+@@ -66,6 +66,10 @@ KVM_NVHE_ALIAS(kvm_patch_vector_branch);
+ KVM_NVHE_ALIAS(kvm_update_va_mask);
+ KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
+ KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0);
++KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter);
++KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
++KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
++KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
+
+ /* Global kernel state accessed by nVHE hyp code. */
+ KVM_NVHE_ALIAS(kvm_vgic_global_state);
+diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
+index 2aede780fb80c..4e1f983df3d1c 100644
+--- a/arch/arm64/kernel/kgdb.c
++++ b/arch/arm64/kernel/kgdb.c
+@@ -224,6 +224,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
+ */
+ if (!kernel_active_single_step())
+ kernel_enable_single_step(linux_regs);
++ else
++ kernel_rewind_single_step(linux_regs);
+ err = 0;
+ break;
+ default:
+@@ -232,14 +234,14 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
+ return err;
+ }
+
+-static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
++static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
+ {
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+ return DBG_HOOK_HANDLED;
+ }
+ NOKPROBE_SYMBOL(kgdb_brk_fn)
+
+-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
++static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
+ {
+ compiled_break = 1;
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+@@ -248,7 +250,7 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+ }
+ NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
+
+-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
++static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
+ {
+ if (!kgdb_single_step)
+ return DBG_HOOK_ERROR;
+diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
+index 63634b4d72c15..59c648d518488 100644
+--- a/arch/arm64/kernel/machine_kexec_file.c
++++ b/arch/arm64/kernel/machine_kexec_file.c
+@@ -149,6 +149,7 @@ int load_other_segments(struct kimage *image,
+ initrd_len, cmdline, 0);
+ if (!dtb) {
+ pr_err("Preparing for new dtb failed\n");
++ ret = -EINVAL;
+ goto out_err;
+ }
+
+diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
+index e53493d8b208b..08fcbcb40d882 100644
+--- a/arch/arm64/kernel/module-plts.c
++++ b/arch/arm64/kernel/module-plts.c
+@@ -7,6 +7,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
++#include <linux/moduleloader.h>
+ #include <linux/sort.h>
+
+ static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
+@@ -342,7 +343,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ if (nents)
+ sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+- if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
++ if (!module_init_layout_section(secstrings + dstsec->sh_name))
+ core_plts += count_plts(syms, rels, numrels,
+ sechdrs[i].sh_info, dstsec);
+ else
+diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
+index b5ec010c481f3..309a27553c875 100644
+--- a/arch/arm64/kernel/module.c
++++ b/arch/arm64/kernel/module.c
+@@ -36,7 +36,7 @@ void *module_alloc(unsigned long size)
+ module_alloc_end = MODULES_END;
+
+ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+- module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
++ module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
+ NUMA_NO_NODE, __builtin_return_address(0));
+
+ if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+
+- if (p && (kasan_module_alloc(p, size) < 0)) {
++ if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
+ vfree(p);
+ return NULL;
+ }
+diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
+index e5e801bc53122..a3898bac5ae6f 100644
+--- a/arch/arm64/kernel/mte.c
++++ b/arch/arm64/kernel/mte.c
+@@ -53,7 +53,12 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
+ * the new page->flags are visible before the tags were updated.
+ */
+ smp_wmb();
+- mte_clear_page_tags(page_address(page));
++ /*
++ * Test PG_mte_tagged again in case it was racing with another
++ * set_pte_at().
++ */
++ if (!test_and_set_bit(PG_mte_tagged, &page->flags))
++ mte_clear_page_tags(page_address(page));
+ }
+
+ void mte_sync_tags(pte_t old_pte, pte_t pte)
+@@ -69,10 +74,13 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
+
+ /* if PG_mte_tagged is set, tags have already been initialised */
+ for (i = 0; i < nr_pages; i++, page++) {
+- if (!test_and_set_bit(PG_mte_tagged, &page->flags))
++ if (!test_bit(PG_mte_tagged, &page->flags))
+ mte_sync_page_tags(page, old_pte, check_swap,
+ pte_is_tagged);
+ }
++
++ /* ensure the tags are visible before the PTE is set */
++ smp_wmb();
+ }
+
+ int memcmp_pages(struct page *page1, struct page *page2)
+@@ -210,6 +218,49 @@ void mte_thread_switch(struct task_struct *next)
+ mte_check_tfsr_el1();
+ }
+
++void mte_cpu_setup(void)
++{
++ u64 rgsr;
++
++ /*
++ * CnP must be enabled only after the MAIR_EL1 register has been set
++ * up. Inconsistent MAIR_EL1 between CPUs sharing the same TLB may
++ * lead to the wrong memory type being used for a brief window during
++ * CPU power-up.
++ *
++ * CnP is not a boot feature so MTE gets enabled before CnP, but let's
++ * make sure that is the case.
++ */
++ BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT);
++ BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT);
++
++ /* Normal Tagged memory type at the corresponding MAIR index */
++ sysreg_clear_set(mair_el1,
++ MAIR_ATTRIDX(MAIR_ATTR_MASK, MT_NORMAL_TAGGED),
++ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_TAGGED,
++ MT_NORMAL_TAGGED));
++
++ write_sysreg_s(KERNEL_GCR_EL1, SYS_GCR_EL1);
++
++ /*
++ * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
++ * RGSR_EL1.SEED must be non-zero for IRG to produce
++ * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
++ * must initialize it.
++ */
++ rgsr = (read_sysreg(CNTVCT_EL0) & SYS_RGSR_EL1_SEED_MASK) <<
++ SYS_RGSR_EL1_SEED_SHIFT;
++ if (rgsr == 0)
++ rgsr = 1 << SYS_RGSR_EL1_SEED_SHIFT;
++ write_sysreg_s(rgsr, SYS_RGSR_EL1);
++
++ /* clear any pending tag check faults in TFSR*_EL1 */
++ write_sysreg_s(0, SYS_TFSR_EL1);
++ write_sysreg_s(0, SYS_TFSRE0_EL1);
++
++ local_flush_tlb_all();
++}
++
+ void mte_suspend_enter(void)
+ {
+ if (!system_supports_mte())
+@@ -226,6 +277,14 @@ void mte_suspend_enter(void)
+ mte_check_tfsr_el1();
+ }
+
++void mte_suspend_exit(void)
++{
++ if (!system_supports_mte())
++ return;
++
++ mte_cpu_setup();
++}
++
+ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
+ {
+ u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
+diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
+index 75fed4460407d..57c7c211f8c71 100644
+--- a/arch/arm64/kernel/paravirt.c
++++ b/arch/arm64/kernel/paravirt.c
+@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
+ DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+ struct pv_time_stolen_time_region {
+- struct pvclock_vcpu_stolen_time *kaddr;
++ struct pvclock_vcpu_stolen_time __rcu *kaddr;
+ };
+
+ static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
+@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
+ /* return stolen time in ns by asking the hypervisor */
+ static u64 para_steal_clock(int cpu)
+ {
++ struct pvclock_vcpu_stolen_time *kaddr = NULL;
+ struct pv_time_stolen_time_region *reg;
++ u64 ret = 0;
+
+ reg = per_cpu_ptr(&stolen_time_region, cpu);
+
+@@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu)
+ * online notification callback runs. Until the callback
+ * has run we just return zero.
+ */
+- if (!reg->kaddr)
++ rcu_read_lock();
++ kaddr = rcu_dereference(reg->kaddr);
++ if (!kaddr) {
++ rcu_read_unlock();
+ return 0;
++ }
+
+- return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
++ ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
++ rcu_read_unlock();
++ return ret;
+ }
+
+ static int stolen_time_cpu_down_prepare(unsigned int cpu)
+ {
++ struct pvclock_vcpu_stolen_time *kaddr = NULL;
+ struct pv_time_stolen_time_region *reg;
+
+ reg = this_cpu_ptr(&stolen_time_region);
+ if (!reg->kaddr)
+ return 0;
+
+- memunmap(reg->kaddr);
+- memset(reg, 0, sizeof(*reg));
++ kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
++ synchronize_rcu();
++ memunmap(kaddr);
+
+ return 0;
+ }
+
+ static int stolen_time_cpu_online(unsigned int cpu)
+ {
++ struct pvclock_vcpu_stolen_time *kaddr = NULL;
+ struct pv_time_stolen_time_region *reg;
+ struct arm_smccc_res res;
+
+@@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu)
+ if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
+ return -EINVAL;
+
+- reg->kaddr = memremap(res.a0,
++ kaddr = memremap(res.a0,
+ sizeof(struct pvclock_vcpu_stolen_time),
+ MEMREMAP_WB);
+
++ rcu_assign_pointer(reg->kaddr, kaddr);
++
+ if (!reg->kaddr) {
+ pr_warn("Failed to map stolen time data structure\n");
+ return -ENOMEM;
+ }
+
+- if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+- le32_to_cpu(reg->kaddr->attributes) != 0) {
++ if (le32_to_cpu(kaddr->revision) != 0 ||
++ le32_to_cpu(kaddr->attributes) != 0) {
+ pr_warn_once("Unexpected revision or attributes in stolen time data\n");
+ return -ENXIO;
+ }
+diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
+index 771f543464e06..33e0fabc0b79b 100644
+--- a/arch/arm64/kernel/patching.c
++++ b/arch/arm64/kernel/patching.c
+@@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+ int i, ret = 0;
+ struct aarch64_insn_patch *pp = arg;
+
+- /* The first CPU becomes master */
+- if (atomic_inc_return(&pp->cpu_count) == 1) {
++ /* The last CPU becomes master */
++ if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) {
+ for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+ ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+ pp->new_insns[i]);
+diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
+index 4a72c27273097..86d9f20131723 100644
+--- a/arch/arm64/kernel/perf_callchain.c
++++ b/arch/arm64/kernel/perf_callchain.c
+@@ -102,7 +102,9 @@ compat_user_backtrace(struct compat_frame_tail __user *tail,
+ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
++
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+@@ -147,9 +149,10 @@ static bool callchain_trace(void *data, unsigned long pc)
+ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ struct stackframe frame;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+@@ -160,18 +163,21 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+
+ unsigned long perf_instruction_pointer(struct pt_regs *regs)
+ {
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+- return perf_guest_cbs->get_guest_ip();
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
++
++ if (guest_cbs && guest_cbs->is_in_guest())
++ return guest_cbs->get_guest_ip();
+
+ return instruction_pointer(regs);
+ }
+
+ unsigned long perf_misc_flags(struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ int misc = 0;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+- if (perf_guest_cbs->is_user_mode())
++ if (guest_cbs && guest_cbs->is_in_guest()) {
++ if (guest_cbs->is_user_mode())
+ misc |= PERF_RECORD_MISC_GUEST_USER;
+ else
+ misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
+index 6dbcc89f66627..2162b6fd7251d 100644
+--- a/arch/arm64/kernel/probes/kprobes.c
++++ b/arch/arm64/kernel/probes/kprobes.c
+@@ -7,6 +7,9 @@
+ * Copyright (C) 2013 Linaro Limited.
+ * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
+ */
++
++#define pr_fmt(fmt) "kprobes: " fmt
++
+ #include <linux/extable.h>
+ #include <linux/kasan.h>
+ #include <linux/kernel.h>
+@@ -218,7 +221,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+- pr_warn("Unrecoverable kprobe detected.\n");
++ pr_warn("Failed to recover from reentered kprobes.\n");
+ dump_kprobe(p);
+ BUG();
+ break;
+@@ -332,7 +335,7 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
+ }
+
+ static int __kprobes
+-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
++kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
+ {
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long addr = instruction_pointer(regs);
+@@ -356,7 +359,7 @@ static struct break_hook kprobes_break_ss_hook = {
+ };
+
+ static int __kprobes
+-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
++kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+ {
+ kprobe_handler(regs);
+ return DBG_HOOK_HANDLED;
+diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
+index 9be668f3f0341..d49aef2657cdf 100644
+--- a/arch/arm64/kernel/probes/uprobes.c
++++ b/arch/arm64/kernel/probes/uprobes.c
+@@ -166,7 +166,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
+ }
+
+ static int uprobe_breakpoint_handler(struct pt_regs *regs,
+- unsigned int esr)
++ unsigned long esr)
+ {
+ if (uprobe_pre_sstep_notifier(regs))
+ return DBG_HOOK_HANDLED;
+@@ -175,7 +175,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
+ }
+
+ static int uprobe_single_step_handler(struct pt_regs *regs,
+- unsigned int esr)
++ unsigned long esr)
+ {
+ struct uprobe_task *utask = current->utask;
+
+diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
+index 40adb8cdbf5af..23efabcb00b85 100644
+--- a/arch/arm64/kernel/process.c
++++ b/arch/arm64/kernel/process.c
+@@ -439,34 +439,26 @@ static void entry_task_switch(struct task_struct *next)
+
+ /*
+ * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
+- * Assuming the virtual counter is enabled at the beginning of times:
+- *
+- * - disable access when switching from a 64bit task to a 32bit task
+- * - enable access when switching from a 32bit task to a 64bit task
++ * Ensure access is disabled when switching to a 32bit task, ensure
++ * access is enabled when switching to a 64bit task.
+ */
+-static void erratum_1418040_thread_switch(struct task_struct *prev,
+- struct task_struct *next)
++static void erratum_1418040_thread_switch(struct task_struct *next)
+ {
+- bool prev32, next32;
+- u64 val;
+-
+- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
+- return;
+-
+- prev32 = is_compat_thread(task_thread_info(prev));
+- next32 = is_compat_thread(task_thread_info(next));
+-
+- if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
++ if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
++ !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
+ return;
+
+- val = read_sysreg(cntkctl_el1);
+-
+- if (!next32)
+- val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
++ if (is_compat_thread(task_thread_info(next)))
++ sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
+ else
+- val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
++ sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
++}
+
+- write_sysreg(val, cntkctl_el1);
++static void erratum_1418040_new_exec(void)
++{
++ preempt_disable();
++ erratum_1418040_thread_switch(current);
++ preempt_enable();
+ }
+
+ /*
+@@ -501,7 +493,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
+ contextidr_thread_switch(next);
+ entry_task_switch(next);
+ ssbs_thread_switch(next);
+- erratum_1418040_thread_switch(prev, next);
++ erratum_1418040_thread_switch(next);
+ ptrauth_thread_switch_user(next);
+
+ /*
+@@ -613,6 +605,7 @@ void arch_setup_new_exec(void)
+ current->mm->context.flags = mmflags;
+ ptrauth_thread_init_user();
+ mte_thread_init_user();
++ erratum_1418040_new_exec();
+
+ if (task_spec_ssb_noexec(current)) {
+ arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
+diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
+index 902e4084c4775..428cfabd11c49 100644
+--- a/arch/arm64/kernel/proton-pack.c
++++ b/arch/arm64/kernel/proton-pack.c
+@@ -18,15 +18,18 @@
+ */
+
+ #include <linux/arm-smccc.h>
++#include <linux/bpf.h>
+ #include <linux/cpu.h>
+ #include <linux/device.h>
+ #include <linux/nospec.h>
+ #include <linux/prctl.h>
+ #include <linux/sched/task_stack.h>
+
++#include <asm/debug-monitors.h>
+ #include <asm/insn.h>
+ #include <asm/spectre.h>
+ #include <asm/traps.h>
++#include <asm/vectors.h>
+ #include <asm/virt.h>
+
+ /*
+@@ -96,14 +99,51 @@ static bool spectre_v2_mitigations_off(void)
+ return ret;
+ }
+
++static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
++{
++ switch (bhb_state) {
++ case SPECTRE_UNAFFECTED:
++ return "";
++ default:
++ case SPECTRE_VULNERABLE:
++ return ", but not BHB";
++ case SPECTRE_MITIGATED:
++ return ", BHB";
++ }
++}
++
++static bool _unprivileged_ebpf_enabled(void)
++{
++#ifdef CONFIG_BPF_SYSCALL
++ return !sysctl_unprivileged_bpf_disabled;
++#else
++ return false;
++#endif
++}
++
+ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
++ enum mitigation_state bhb_state = arm64_get_spectre_bhb_state();
++ const char *bhb_str = get_bhb_affected_string(bhb_state);
++ const char *v2_str = "Branch predictor hardening";
++
+ switch (spectre_v2_state) {
+ case SPECTRE_UNAFFECTED:
+- return sprintf(buf, "Not affected\n");
++ if (bhb_state == SPECTRE_UNAFFECTED)
++ return sprintf(buf, "Not affected\n");
++
++ /*
++ * Platforms affected by Spectre-BHB can't report
++ * "Not affected" for Spectre-v2.
++ */
++ v2_str = "CSV2";
++ fallthrough;
+ case SPECTRE_MITIGATED:
+- return sprintf(buf, "Mitigation: Branch predictor hardening\n");
++ if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled())
++ return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
++
++ return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str);
+ case SPECTRE_VULNERABLE:
+ fallthrough;
+ default:
+@@ -193,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn)
+ __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT);
+ }
+
+-static void call_smc_arch_workaround_1(void)
++/* Called during entry so must be noinstr */
++static noinstr void call_smc_arch_workaround_1(void)
+ {
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+ }
+
+-static void call_hvc_arch_workaround_1(void)
++/* Called during entry so must be noinstr */
++static noinstr void call_hvc_arch_workaround_1(void)
+ {
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+ }
+
+-static void qcom_link_stack_sanitisation(void)
++/* Called during entry so must be noinstr */
++static noinstr void qcom_link_stack_sanitisation(void)
+ {
+ u64 tmp;
+
+@@ -554,9 +597,9 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt,
+ * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction
+ * to call into firmware to adjust the mitigation state.
+ */
+-void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt,
+- __le32 *origptr,
+- __le32 *updptr, int nr_inst)
++void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt,
++ __le32 *origptr,
++ __le32 *updptr, int nr_inst)
+ {
+ u32 insn;
+
+@@ -770,3 +813,351 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+ return -ENODEV;
+ }
+ }
++
++/*
++ * Spectre BHB.
++ *
++ * A CPU is either:
++ * - Mitigated by a branchy loop a CPU specific number of times, and listed
++ * in our "loop mitigated list".
++ * - Mitigated in software by the firmware Spectre v2 call.
++ * - Has the ClearBHB instruction to perform the mitigation.
++ * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no
++ * software mitigation in the vectors is needed.
++ * - Has CSV2.3, so is unaffected.
++ */
++static enum mitigation_state spectre_bhb_state;
++
++enum mitigation_state arm64_get_spectre_bhb_state(void)
++{
++ return spectre_bhb_state;
++}
++
++enum bhb_mitigation_bits {
++ BHB_LOOP,
++ BHB_FW,
++ BHB_HW,
++ BHB_INSN,
++};
++static unsigned long system_bhb_mitigations;
++
++/*
++ * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
++ * SCOPE_SYSTEM call will give the right answer.
++ */
++u8 spectre_bhb_loop_affected(int scope)
++{
++ u8 k = 0;
++ static u8 max_bhb_k;
++
++ if (scope == SCOPE_LOCAL_CPU) {
++ static const struct midr_range spectre_bhb_k32_list[] = {
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
++ {},
++ };
++ static const struct midr_range spectre_bhb_k24_list[] = {
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
++ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
++ {},
++ };
++ static const struct midr_range spectre_bhb_k11_list[] = {
++ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
++ {},
++ };
++ static const struct midr_range spectre_bhb_k8_list[] = {
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
++ {},
++ };
++
++ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
++ k = 32;
++ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
++ k = 24;
++ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
++ k = 11;
++ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
++ k = 8;
++
++ max_bhb_k = max(max_bhb_k, k);
++ } else {
++ k = max_bhb_k;
++ }
++
++ return k;
++}
++
++static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
++{
++ int ret;
++ struct arm_smccc_res res;
++
++ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
++ ARM_SMCCC_ARCH_WORKAROUND_3, &res);
++
++ ret = res.a0;
++ switch (ret) {
++ case SMCCC_RET_SUCCESS:
++ return SPECTRE_MITIGATED;
++ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
++ return SPECTRE_UNAFFECTED;
++ default:
++ fallthrough;
++ case SMCCC_RET_NOT_SUPPORTED:
++ return SPECTRE_VULNERABLE;
++ }
++}
++
++static bool is_spectre_bhb_fw_affected(int scope)
++{
++ static bool system_affected;
++ enum mitigation_state fw_state;
++ bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
++ static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
++ {},
++ };
++ bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
++ spectre_bhb_firmware_mitigated_list);
++
++ if (scope != SCOPE_LOCAL_CPU)
++ return system_affected;
++
++ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
++ if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
++ system_affected = true;
++ return true;
++ }
++
++ return false;
++}
++
++static bool supports_ecbhb(int scope)
++{
++ u64 mmfr1;
++
++ if (scope == SCOPE_LOCAL_CPU)
++ mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
++ else
++ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
++
++ return cpuid_feature_extract_unsigned_field(mmfr1,
++ ID_AA64MMFR1_ECBHB_SHIFT);
++}
++
++bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
++ int scope)
++{
++ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
++
++ if (supports_csv2p3(scope))
++ return false;
++
++ if (supports_clearbhb(scope))
++ return true;
++
++ if (spectre_bhb_loop_affected(scope))
++ return true;
++
++ if (is_spectre_bhb_fw_affected(scope))
++ return true;
++
++ return false;
++}
++
++static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
++{
++ const char *v = arm64_get_bp_hardening_vector(slot);
++
++ if (slot < 0)
++ return;
++
++ __this_cpu_write(this_cpu_vector, v);
++
++ /*
++ * When KPTI is in use, the vectors are switched when exiting to
++ * user-space.
++ */
++ if (arm64_kernel_unmapped_at_el0())
++ return;
++
++ write_sysreg(v, vbar_el1);
++ isb();
++}
++
++void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
++{
++ bp_hardening_cb_t cpu_cb;
++ enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
++ struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
++
++ if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
++ return;
++
++ if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
++ /* No point mitigating Spectre-BHB alone. */
++ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
++ pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
++ } else if (cpu_mitigations_off()) {
++ pr_info_once("spectre-bhb mitigation disabled by command line option\n");
++ } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
++ state = SPECTRE_MITIGATED;
++ set_bit(BHB_HW, &system_bhb_mitigations);
++ } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) {
++ /*
++ * Ensure KVM uses the indirect vector which will have ClearBHB
++ * added.
++ */
++ if (!data->slot)
++ data->slot = HYP_VECTOR_INDIRECT;
++
++ this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
++ state = SPECTRE_MITIGATED;
++ set_bit(BHB_INSN, &system_bhb_mitigations);
++ } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
++ /*
++ * Ensure KVM uses the indirect vector which will have the
++ * branchy-loop added. A57/A72-r0 will already have selected
++ * the spectre-indirect vector, which is sufficient for BHB
++ * too.
++ */
++ if (!data->slot)
++ data->slot = HYP_VECTOR_INDIRECT;
++
++ this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
++ state = SPECTRE_MITIGATED;
++ set_bit(BHB_LOOP, &system_bhb_mitigations);
++ } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
++ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
++ if (fw_state == SPECTRE_MITIGATED) {
++ /*
++ * Ensure KVM uses one of the spectre bp_hardening
++ * vectors. The indirect vector doesn't include the EL3
++ * call, so needs upgrading to
++ * HYP_VECTOR_SPECTRE_INDIRECT.
++ */
++ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
++ data->slot += 1;
++
++ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
++
++ /*
++ * The WA3 call in the vectors supersedes the WA1 call
++ * made during context-switch. Uninstall any firmware
++ * bp_hardening callback.
++ */
++ cpu_cb = spectre_v2_get_sw_mitigation_cb();
++ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
++ __this_cpu_write(bp_hardening_data.fn, NULL);
++
++ state = SPECTRE_MITIGATED;
++ set_bit(BHB_FW, &system_bhb_mitigations);
++ }
++ }
++
++ update_mitigation_state(&spectre_bhb_state, state);
++}
++
++/* Patched to NOP when enabled */
++void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
++ __le32 *origptr,
++ __le32 *updptr, int nr_inst)
++{
++ BUG_ON(nr_inst != 1);
++
++ if (test_bit(BHB_LOOP, &system_bhb_mitigations))
++ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
++}
++
++/* Patched to NOP when enabled */
++void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt,
++ __le32 *origptr,
++ __le32 *updptr, int nr_inst)
++{
++ BUG_ON(nr_inst != 1);
++
++ if (test_bit(BHB_FW, &system_bhb_mitigations))
++ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
++}
++
++/* Patched to correct the immediate */
++void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
++ __le32 *origptr, __le32 *updptr, int nr_inst)
++{
++ u8 rd;
++ u32 insn;
++ u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
++
++ BUG_ON(nr_inst != 1); /* MOV -> MOV */
++
++ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY))
++ return;
++
++ insn = le32_to_cpu(*origptr);
++ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
++ insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
++ AARCH64_INSN_VARIANT_64BIT,
++ AARCH64_INSN_MOVEWIDE_ZERO);
++ *updptr++ = cpu_to_le32(insn);
++}
++
++/* Patched to mov WA3 when supported */
++void noinstr spectre_bhb_patch_wa3(struct alt_instr *alt,
++ __le32 *origptr, __le32 *updptr, int nr_inst)
++{
++ u8 rd;
++ u32 insn;
++
++ BUG_ON(nr_inst != 1); /* MOV -> MOV */
++
++ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
++ !test_bit(BHB_FW, &system_bhb_mitigations))
++ return;
++
++ insn = le32_to_cpu(*origptr);
++ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
++
++ insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_ORR,
++ AARCH64_INSN_VARIANT_32BIT,
++ AARCH64_INSN_REG_ZR, rd,
++ ARM_SMCCC_ARCH_WORKAROUND_3);
++ if (WARN_ON_ONCE(insn == AARCH64_BREAK_FAULT))
++ return;
++
++ *updptr++ = cpu_to_le32(insn);
++}
++
++/* Patched to NOP when not supported */
++void __init spectre_bhb_patch_clearbhb(struct alt_instr *alt,
++ __le32 *origptr, __le32 *updptr, int nr_inst)
++{
++ BUG_ON(nr_inst != 2);
++
++ if (test_bit(BHB_INSN, &system_bhb_mitigations))
++ return;
++
++ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
++ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
++}
++
++#ifdef CONFIG_BPF_SYSCALL
++#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n"
++void unpriv_ebpf_notify(int new_state)
++{
++ if (spectre_v2_state == SPECTRE_VULNERABLE ||
++ spectre_bhb_state != SPECTRE_MITIGATED)
++ return;
++
++ if (!new_state)
++ pr_err("WARNING: %s", EBPF_WARN);
++}
++#endif
+diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
+index 47f77d1234cb6..532611d07bdcb 100644
+--- a/arch/arm64/kernel/sdei.c
++++ b/arch/arm64/kernel/sdei.c
+@@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
+ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
+ #endif
+
++DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
++DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
++
+ static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
+ {
+ unsigned long *p;
+diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
+index c287b9407f287..b3e1beccf4588 100644
+--- a/arch/arm64/kernel/signal.c
++++ b/arch/arm64/kernel/signal.c
+@@ -577,10 +577,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
+ {
+ int err;
+
+- err = sigframe_alloc(user, &user->fpsimd_offset,
+- sizeof(struct fpsimd_context));
+- if (err)
+- return err;
++ if (system_supports_fpsimd()) {
++ err = sigframe_alloc(user, &user->fpsimd_offset,
++ sizeof(struct fpsimd_context));
++ if (err)
++ return err;
++ }
+
+ /* fault information, if valid */
+ if (add_all || current->thread.fault_code) {
+@@ -1010,6 +1012,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x28);
+ static_assert(offsetof(siginfo_t, si_pkey) == 0x20);
+ static_assert(offsetof(siginfo_t, si_perf_data) == 0x18);
+ static_assert(offsetof(siginfo_t, si_perf_type) == 0x20);
++static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);
+ static_assert(offsetof(siginfo_t, si_band) == 0x10);
+ static_assert(offsetof(siginfo_t, si_fd) == 0x18);
+ static_assert(offsetof(siginfo_t, si_call_addr) == 0x10);
+diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
+index d984282b979f8..4700f8522d27b 100644
+--- a/arch/arm64/kernel/signal32.c
++++ b/arch/arm64/kernel/signal32.c
+@@ -487,6 +487,7 @@ static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18);
+ static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14);
+ static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
+ static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
++static_assert(offsetof(compat_siginfo_t, si_perf_flags) == 0x18);
+ static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c);
+ static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10);
+ static_assert(offsetof(compat_siginfo_t, si_call_addr) == 0x0c);
+diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
+index 6f6ff072acbde..bc29cc044a4d7 100644
+--- a/arch/arm64/kernel/smp.c
++++ b/arch/arm64/kernel/smp.c
+@@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void)
+ * Log the CPU info before it is marked online and might get read.
+ */
+ cpuinfo_store_cpu();
++ store_cpu_topology(cpu);
+
+ /*
+ * Enable GIC and timers.
+@@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void)
+
+ ipi_setup(cpu);
+
+- store_cpu_topology(cpu);
+ numa_add_cpu(cpu);
+
+ /*
+@@ -1073,10 +1073,8 @@ void crash_smp_send_stop(void)
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+- if (num_other_online_cpus() == 0) {
+- sdei_mask_local_cpu();
+- return;
+- }
++ if (num_other_online_cpus() == 0)
++ goto skip_ipi;
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+@@ -1095,7 +1093,9 @@ void crash_smp_send_stop(void)
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+
++skip_ipi:
+ sdei_mask_local_cpu();
++ sdei_handler_abort();
+ }
+
+ bool smp_crash_stop_failed(void)
+diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
+index 8982a2b78acfc..3b8dc538a4c42 100644
+--- a/arch/arm64/kernel/stacktrace.c
++++ b/arch/arm64/kernel/stacktrace.c
+@@ -33,7 +33,7 @@
+ */
+
+
+-void start_backtrace(struct stackframe *frame, unsigned long fp,
++notrace void start_backtrace(struct stackframe *frame, unsigned long fp,
+ unsigned long pc)
+ {
+ frame->fp = fp;
+@@ -55,6 +55,7 @@ void start_backtrace(struct stackframe *frame, unsigned long fp,
+ frame->prev_fp = 0;
+ frame->prev_type = STACK_TYPE_UNKNOWN;
+ }
++NOKPROBE_SYMBOL(start_backtrace);
+
+ /*
+ * Unwind from one frame record (A) to the next frame record (B).
+diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
+index 19ee7c33769d3..d473ec204fef7 100644
+--- a/arch/arm64/kernel/suspend.c
++++ b/arch/arm64/kernel/suspend.c
+@@ -43,6 +43,8 @@ void notrace __cpu_suspend_exit(void)
+ {
+ unsigned int cpu = smp_processor_id();
+
++ mte_suspend_exit();
++
+ /*
+ * We are resuming from reset with the idmap active in TTBR0_EL1.
+ * We must uninstall the idmap and restore the expected MMU
+diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
+index db5159a3055fc..b88a52f7188fc 100644
+--- a/arch/arm64/kernel/sys_compat.c
++++ b/arch/arm64/kernel/sys_compat.c
+@@ -114,6 +114,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno)
+ addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4);
+
+ arm64_notify_die("Oops - bad compat syscall(2)", regs,
+- SIGILL, ILL_ILLTRP, addr, scno);
++ SIGILL, ILL_ILLTRP, addr, 0);
+ return 0;
+ }
+diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
+index 4dd14a6620c17..877c68f472822 100644
+--- a/arch/arm64/kernel/topology.c
++++ b/arch/arm64/kernel/topology.c
+@@ -22,46 +22,6 @@
+ #include <asm/cputype.h>
+ #include <asm/topology.h>
+
+-void store_cpu_topology(unsigned int cpuid)
+-{
+- struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
+- u64 mpidr;
+-
+- if (cpuid_topo->package_id != -1)
+- goto topology_populated;
+-
+- mpidr = read_cpuid_mpidr();
+-
+- /* Uniprocessor systems can rely on default topology values */
+- if (mpidr & MPIDR_UP_BITMASK)
+- return;
+-
+- /*
+- * This would be the place to create cpu topology based on MPIDR.
+- *
+- * However, it cannot be trusted to depict the actual topology; some
+- * pieces of the architecture enforce an artificial cap on Aff0 values
+- * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an
+- * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up
+- * having absolutely no relationship to the actual underlying system
+- * topology, and cannot be reasonably used as core / package ID.
+- *
+- * If the MT bit is set, Aff0 *could* be used to define a thread ID, but
+- * we still wouldn't be able to obtain a sane core ID. This means we
+- * need to entirely ignore MPIDR for any topology deduction.
+- */
+- cpuid_topo->thread_id = -1;
+- cpuid_topo->core_id = cpuid;
+- cpuid_topo->package_id = cpu_to_node(cpuid);
+-
+- pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
+- cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
+- cpuid_topo->thread_id, mpidr);
+-
+-topology_populated:
+- update_siblings_masks(cpuid);
+-}
+-
+ #ifdef CONFIG_ACPI
+ static bool __init acpi_cpu_is_threaded(int cpu)
+ {
+@@ -249,7 +209,7 @@ static void amu_fie_setup(const struct cpumask *cpus)
+ for_each_cpu(cpu, cpus) {
+ if (!freq_counters_valid(cpu) ||
+ freq_inv_set_max_ratio(cpu,
+- cpufreq_get_hw_max_freq(cpu) * 1000,
++ cpufreq_get_hw_max_freq(cpu) * 1000ULL,
+ arch_timer_get_rate()))
+ return;
+ }
+@@ -308,12 +268,25 @@ core_initcall(init_amu_fie);
+
+ static void cpu_read_corecnt(void *val)
+ {
++ /*
++ * A value of 0 can be returned if the current CPU does not support AMUs
++ * or if the counter is disabled for this CPU. A return value of 0 at
++ * counter read is properly handled as an error case by the users of the
++ * counter.
++ */
+ *(u64 *)val = read_corecnt();
+ }
+
+ static void cpu_read_constcnt(void *val)
+ {
+- *(u64 *)val = read_constcnt();
++ /*
++ * Return 0 if the current CPU is affected by erratum 2457168. A value
++ * of 0 is also returned if the current CPU does not support AMUs or if
++ * the counter is disabled. A return value of 0 at counter read is
++ * properly handled as an error case by the users of the counter.
++ */
++ *(u64 *)val = this_cpu_has_cap(ARM64_WORKAROUND_2457168) ?
++ 0UL : read_constcnt();
+ }
+
+ static inline
+@@ -340,7 +313,22 @@ int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
+ */
+ bool cpc_ffh_supported(void)
+ {
+- return freq_counters_valid(get_cpu_with_amu_feat());
++ int cpu = get_cpu_with_amu_feat();
++
++ /*
++ * FFH is considered supported if there is at least one present CPU that
++ * supports AMUs. Using FFH to read core and reference counters for CPUs
++ * that do not support AMUs, have counters disabled or that are affected
++ * by errata, will result in a return value of 0.
++ *
++ * This is done to allow any enabled and valid counters to be read
++ * through FFH, knowing that potentially returning 0 as counter value is
++ * properly handled by the users of these counters.
++ */
++ if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
++ return false;
++
++ return true;
+ }
+
+ int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val)
+diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
+index b03e383d944ab..21e69a991bc83 100644
+--- a/arch/arm64/kernel/traps.c
++++ b/arch/arm64/kernel/traps.c
+@@ -235,7 +235,7 @@ void die(const char *str, struct pt_regs *regs, int err)
+ raw_spin_unlock_irqrestore(&die_lock, flags);
+
+ if (ret != NOTIFY_STOP)
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ static void arm64_show_signal(int signo, const char *str)
+@@ -243,7 +243,7 @@ static void arm64_show_signal(int signo, const char *str)
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ struct task_struct *tsk = current;
+- unsigned int esr = tsk->thread.fault_code;
++ unsigned long esr = tsk->thread.fault_code;
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ /* Leave if the signal won't be shown */
+@@ -254,7 +254,7 @@ static void arm64_show_signal(int signo, const char *str)
+
+ pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk));
+ if (esr)
+- pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr);
++ pr_cont("%s, ESR 0x%016lx, ", esr_get_class_string(esr), esr);
+
+ pr_cont("%s", str);
+ print_vma_addr(KERN_CONT " in ", regs->pc);
+@@ -288,7 +288,7 @@ void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far,
+
+ void arm64_notify_die(const char *str, struct pt_regs *regs,
+ int signo, int sicode, unsigned long far,
+- int err)
++ unsigned long err)
+ {
+ if (user_mode(regs)) {
+ WARN_ON(regs != current_pt_regs());
+@@ -440,7 +440,7 @@ exit:
+ return fn ? fn(regs, instr) : 1;
+ }
+
+-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err)
++void force_signal_inject(int signal, int code, unsigned long address, unsigned long err)
+ {
+ const char *desc;
+ struct pt_regs *regs = current_pt_regs();
+@@ -507,7 +507,7 @@ void do_bti(struct pt_regs *regs)
+ }
+ NOKPROBE_SYMBOL(do_bti);
+
+-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr)
++void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr)
+ {
+ /*
+ * Unexpected FPAC exception or pointer authentication failure in
+@@ -538,7 +538,7 @@ NOKPROBE_SYMBOL(do_ptrauth_fault);
+ uaccess_ttbr0_disable(); \
+ }
+
+-static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
++static void user_cache_maint_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ unsigned long tagged_address, address;
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
+@@ -578,7 +578,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
+
+-static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
++static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
+@@ -597,7 +597,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
+
+-static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
++static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
+
+@@ -605,7 +605,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
+
+-static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
++static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
+
+@@ -613,7 +613,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
+
+-static void mrs_handler(unsigned int esr, struct pt_regs *regs)
++static void mrs_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ u32 sysreg, rt;
+
+@@ -624,15 +624,15 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs)
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ }
+
+-static void wfi_handler(unsigned int esr, struct pt_regs *regs)
++static void wfi_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
+
+ struct sys64_hook {
+- unsigned int esr_mask;
+- unsigned int esr_val;
+- void (*handler)(unsigned int esr, struct pt_regs *regs);
++ unsigned long esr_mask;
++ unsigned long esr_val;
++ void (*handler)(unsigned long esr, struct pt_regs *regs);
+ };
+
+ static const struct sys64_hook sys64_hooks[] = {
+@@ -675,7 +675,7 @@ static const struct sys64_hook sys64_hooks[] = {
+ };
+
+ #ifdef CONFIG_COMPAT
+-static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
++static bool cp15_cond_valid(unsigned long esr, struct pt_regs *regs)
+ {
+ int cond;
+
+@@ -695,7 +695,7 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
+ return aarch32_opcode_cond_checks[cond](regs->pstate);
+ }
+
+-static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
++static void compat_cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
+
+@@ -712,7 +712,7 @@ static const struct sys64_hook cp15_32_hooks[] = {
+ {},
+ };
+
+-static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
++static void compat_cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
+ {
+ int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
+ int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
+@@ -732,7 +732,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
+ {},
+ };
+
+-void do_cp15instr(unsigned int esr, struct pt_regs *regs)
++void do_cp15instr(unsigned long esr, struct pt_regs *regs)
+ {
+ const struct sys64_hook *hook, *hook_base;
+
+@@ -773,7 +773,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs)
+ NOKPROBE_SYMBOL(do_cp15instr);
+ #endif
+
+-void do_sysinstr(unsigned int esr, struct pt_regs *regs)
++void do_sysinstr(unsigned long esr, struct pt_regs *regs)
+ {
+ const struct sys64_hook *hook;
+
+@@ -837,7 +837,7 @@ static const char *esr_class_str[] = {
+ [ESR_ELx_EC_BRK64] = "BRK (AArch64)",
+ };
+
+-const char *esr_get_class_string(u32 esr)
++const char *esr_get_class_string(unsigned long esr)
+ {
+ return esr_class_str[ESR_ELx_EC(esr)];
+ }
+@@ -846,7 +846,7 @@ const char *esr_get_class_string(u32 esr)
+ * bad_el0_sync handles unexpected, but potentially recoverable synchronous
+ * exceptions taken from EL0.
+ */
+-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
++void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
+ {
+ unsigned long pc = instruction_pointer(regs);
+
+@@ -862,7 +862,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+ DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
+ __aligned(16);
+
+-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
++void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far)
+ {
+ unsigned long tsk_stk = (unsigned long)current->stack;
+ unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
+@@ -871,7 +871,7 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
+ console_verbose();
+ pr_emerg("Insufficient stack space to handle exception!");
+
+- pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
++ pr_emerg("ESR: 0x%016lx -- %s\n", esr, esr_get_class_string(esr));
+ pr_emerg("FAR: 0x%016lx\n", far);
+
+ pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
+@@ -892,11 +892,11 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
+ }
+ #endif
+
+-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
++void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
+ {
+ console_verbose();
+
+- pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
++ pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
+ smp_processor_id(), esr, esr_get_class_string(esr));
+ if (regs)
+ __show_regs(regs);
+@@ -907,9 +907,9 @@ void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
+ unreachable();
+ }
+
+-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
++bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr)
+ {
+- u32 aet = arm64_ras_serror_get_severity(esr);
++ unsigned long aet = arm64_ras_serror_get_severity(esr);
+
+ switch (aet) {
+ case ESR_ELx_AET_CE: /* corrected error */
+@@ -939,7 +939,7 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
+ }
+ }
+
+-void do_serror(struct pt_regs *regs, unsigned int esr)
++void do_serror(struct pt_regs *regs, unsigned long esr)
+ {
+ /* non-RAS errors are not containable */
+ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
+@@ -960,7 +960,7 @@ int is_valid_bugaddr(unsigned long addr)
+ return 1;
+ }
+
+-static int bug_handler(struct pt_regs *regs, unsigned int esr)
++static int bug_handler(struct pt_regs *regs, unsigned long esr)
+ {
+ switch (report_bug(regs->pc, regs)) {
+ case BUG_TRAP_TYPE_BUG:
+@@ -985,10 +985,10 @@ static struct break_hook bug_break_hook = {
+ .imm = BUG_BRK_IMM,
+ };
+
+-static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
++static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
+ {
+ pr_err("%s generated an invalid instruction at %pS!\n",
+- in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching",
++ "Kernel text patching",
+ (void *)instruction_pointer(regs));
+
+ /* We cannot handle this */
+@@ -1007,7 +1007,7 @@ static struct break_hook fault_break_hook = {
+ #define KASAN_ESR_SIZE_MASK 0x0f
+ #define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
+
+-static int kasan_handler(struct pt_regs *regs, unsigned int esr)
++static int kasan_handler(struct pt_regs *regs, unsigned long esr)
+ {
+ bool recover = esr & KASAN_ESR_RECOVER;
+ bool write = esr & KASAN_ESR_WRITE;
+@@ -1050,11 +1050,11 @@ static struct break_hook kasan_break_hook = {
+ * Initial handler for AArch64 BRK exceptions
+ * This handler only used until debug_traps_init().
+ */
+-int __init early_brk64(unsigned long addr, unsigned int esr,
++int __init early_brk64(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ #ifdef CONFIG_KASAN_SW_TAGS
+- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
++ unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+
+ if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
+ return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
+diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
+index a61fc4f989b37..55dd15c9745da 100644
+--- a/arch/arm64/kernel/vdso.c
++++ b/arch/arm64/kernel/vdso.c
+@@ -314,7 +314,7 @@ static int aarch32_alloc_kuser_vdso_page(void)
+
+ memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
+ kuser_sz);
+- aarch32_vectors_page = virt_to_page(vdso_page);
++ aarch32_vectors_page = virt_to_page((void *)vdso_page);
+ return 0;
+ }
+
+diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
+index 945e6bb326e3e..b5d8f72e8b32e 100644
+--- a/arch/arm64/kernel/vdso/Makefile
++++ b/arch/arm64/kernel/vdso/Makefile
+@@ -48,9 +48,6 @@ GCOV_PROFILE := n
+ targets += vdso.lds
+ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+-# Force dependency (incbin is bad)
+-$(obj)/vdso.o : $(obj)/vdso.so
+-
+ # Link rule for the .so file, .lds has to be first
+ $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
+ $(call if_changed,vdsold_and_vdso_check)
+diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
+index 3dba0c4f8f42b..83e9399e38368 100644
+--- a/arch/arm64/kernel/vdso32/Makefile
++++ b/arch/arm64/kernel/vdso32/Makefile
+@@ -10,18 +10,15 @@ include $(srctree)/lib/vdso/Makefile
+
+ # Same as cc-*option, but using CC_COMPAT instead of CC
+ ifeq ($(CONFIG_CC_IS_CLANG), y)
+-CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
+-
+ CC_COMPAT ?= $(CC)
+-CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS)
+-
+-ifneq ($(LLVM),)
+-LD_COMPAT ?= $(LD)
++CC_COMPAT += --target=arm-linux-gnueabi
+ else
+-LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld
++CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
+ endif
++
++ifeq ($(CONFIG_LD_IS_LLD), y)
++LD_COMPAT ?= $(LD)
+ else
+-CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
+ LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld
+ endif
+
+@@ -40,16 +37,13 @@ cc32-as-instr = $(call try-run,\
+ # As a result we set our own flags here.
+
+ # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
+-VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
++VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc
++VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null)
+ VDSO_CPPFLAGS += $(LINUXINCLUDE)
+
+ # Common C and assembly flags
+ # From top-level Makefile
+ VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+-ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),)
+-VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
+-endif
+-
+ VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
+ ifdef CONFIG_DEBUG_INFO
+ VDSO_CAFLAGS += -g
+@@ -150,9 +144,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
+ targets += vdso.lds
+ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+-# Force dependency (vdso.s includes vdso.so through incbin)
+-$(obj)/vdso.o: $(obj)/vdso.so
+-
+ include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
+diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
+index f6b1a88245db2..184abd7c4206e 100644
+--- a/arch/arm64/kernel/vmlinux.lds.S
++++ b/arch/arm64/kernel/vmlinux.lds.S
+@@ -330,7 +330,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
+ <= SZ_4K, "Hibernate exit text too big or misaligned")
+ #endif
+ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+-ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
++ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
+ "Entry trampoline text too big")
+ #endif
+ #ifdef CONFIG_KVM
+diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
+index fe102cd2e5183..3fe816c244cec 100644
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -712,8 +712,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
+ if (likely(!vcpu_mode_is_32bit(vcpu)))
+ return false;
+
+- return !system_supports_32bit_el0() ||
+- static_branch_unlikely(&arm64_mismatched_32bit_el0);
++ return !kvm_supports_32bit_el0();
+ }
+
+ /**
+@@ -755,6 +754,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
+ xfer_to_guest_mode_work_pending();
+ }
+
++/*
++ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
++ * the vCPU is running.
++ *
++ * This must be noinstr as instrumentation may make use of RCU, and this is not
++ * safe during the EQS.
++ */
++static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
++{
++ int ret;
++
++ guest_state_enter_irqoff();
++ ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
++ guest_state_exit_irqoff();
++
++ return ret;
++}
++
+ /**
+ * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
+ * @vcpu: The VCPU pointer
+@@ -845,9 +862,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ * Enter the guest
+ */
+ trace_kvm_entry(*vcpu_pc(vcpu));
+- guest_enter_irqoff();
++ guest_timing_enter_irqoff();
+
+- ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
++ ret = kvm_arm_vcpu_enter_exit(vcpu);
+
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ vcpu->stat.exits++;
+@@ -882,26 +899,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ kvm_arch_vcpu_ctxsync_fp(vcpu);
+
+ /*
+- * We may have taken a host interrupt in HYP mode (ie
+- * while executing the guest). This interrupt is still
+- * pending, as we haven't serviced it yet!
++ * We must ensure that any pending interrupts are taken before
++ * we exit guest timing so that timer ticks are accounted as
++ * guest time. Transiently unmask interrupts so that any
++ * pending interrupts are taken.
+ *
+- * We're now back in SVC mode, with interrupts
+- * disabled. Enabling the interrupts now will have
+- * the effect of taking the interrupt again, in SVC
+- * mode this time.
++ * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
++ * context synchronization event) is necessary to ensure that
++ * pending interrupts are taken.
+ */
+ local_irq_enable();
++ isb();
++ local_irq_disable();
++
++ guest_timing_exit_irqoff();
++
++ local_irq_enable();
+
+- /*
+- * We do local_irq_enable() before calling guest_exit() so
+- * that if a timer interrupt hits while running the guest we
+- * account that tick as being spent in the guest. We enable
+- * preemption after calling guest_exit() so that if we get
+- * preempted we make sure ticks after that is not counted as
+- * guest time.
+- */
+- guest_exit();
+ trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+
+ /* Exit types that need handling before we can be preempted */
+@@ -1443,10 +1457,8 @@ static int kvm_init_vector_slots(void)
+ base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
+ kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
+
+- if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
+- return 0;
+-
+- if (!has_vhe()) {
++ if (kvm_system_needs_idmapped_vectors() &&
++ !is_protected_kvm_enabled()) {
+ err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
+ __BP_HARDEN_HYP_VECS_SZ, &base);
+ if (err)
+@@ -1971,31 +1983,50 @@ out_err:
+ return err;
+ }
+
+-static void _kvm_host_prot_finalize(void *discard)
++static void _kvm_host_prot_finalize(void *arg)
+ {
+- WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
++ int *err = arg;
++
++ if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
++ WRITE_ONCE(*err, -EINVAL);
+ }
+
+-static int finalize_hyp_mode(void)
++static int pkvm_drop_host_privileges(void)
+ {
+- if (!is_protected_kvm_enabled())
+- return 0;
+-
+- /*
+- * Exclude HYP BSS from kmemleak so that it doesn't get peeked
+- * at, which would end badly once the section is inaccessible.
+- * None of other sections should ever be introspected.
+- */
+- kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
++ int ret = 0;
+
+ /*
+ * Flip the static key upfront as that may no longer be possible
+ * once the host stage 2 is installed.
+ */
+ static_branch_enable(&kvm_protected_mode_initialized);
+- on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
+
+- return 0;
++ /*
++ * Fixup the boot mode so that we don't take spurious round
++ * trips via EL2 on cpu_resume. Flush to the PoC for a good
++ * measure, so that it can be observed by a CPU coming out of
++ * suspend with the MMU off.
++ */
++ __boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1;
++ dcache_clean_poc((unsigned long)__boot_cpu_mode,
++ (unsigned long)(__boot_cpu_mode + 2));
++
++ on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
++ return ret;
++}
++
++static int finalize_hyp_mode(void)
++{
++ if (!is_protected_kvm_enabled())
++ return 0;
++
++ /*
++ * Exclude HYP sections from kmemleak so that they don't get peeked
++ * at, which would end badly once inaccessible.
++ */
++ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
++ kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size);
++ return pkvm_drop_host_privileges();
+ }
+
+ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
+diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
+index 5ce26bedf23c0..94108e2e09179 100644
+--- a/arch/arm64/kvm/guest.c
++++ b/arch/arm64/kvm/guest.c
+@@ -242,7 +242,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+ u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
+ switch (mode) {
+ case PSR_AA32_MODE_USR:
+- if (!system_supports_32bit_el0())
++ if (!kvm_supports_32bit_el0())
+ return -EINVAL;
+ break;
+ case PSR_AA32_MODE_FIQ:
+diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
+index 275a27368a04c..a5ab5215094ee 100644
+--- a/arch/arm64/kvm/handle_exit.c
++++ b/arch/arm64/kvm/handle_exit.c
+@@ -226,6 +226,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
+ {
+ struct kvm_run *run = vcpu->run;
+
++ if (ARM_SERROR_PENDING(exception_index)) {
++ /*
++ * The SError is handled by handle_exit_early(). If the guest
++ * survives it will re-execute the original instruction.
++ */
++ return 1;
++ }
++
+ exception_index = ARM_EXCEPTION_CODE(exception_index);
+
+ switch (exception_index) {
+diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
+index 0418399e0a201..aa06e28f2991f 100644
+--- a/arch/arm64/kvm/hyp/exception.c
++++ b/arch/arm64/kvm/hyp/exception.c
+@@ -13,6 +13,7 @@
+ #include <hyp/adjust_pc.h>
+ #include <linux/kvm_host.h>
+ #include <asm/kvm_emulate.h>
++#include <asm/kvm_mmu.h>
+
+ #if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
+ #error Hypervisor code only!
+@@ -38,7 +39,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+
+ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
+ {
+- write_sysreg_el1(val, SYS_SPSR);
++ if (has_vhe())
++ write_sysreg_el1(val, SYS_SPSR);
++ else
++ __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
+ }
+
+ static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
+@@ -112,7 +116,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
+ new |= (old & PSR_C_BIT);
+ new |= (old & PSR_V_BIT);
+
+- if (kvm_has_mte(vcpu->kvm))
++ if (kvm_has_mte(kern_hyp_va(vcpu->kvm)))
+ new |= PSR_TCO_BIT;
+
+ new |= (old & PSR_DIT_BIT);
+diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
+index 9aa9b73475c95..7839d075729b1 100644
+--- a/arch/arm64/kvm/hyp/hyp-entry.S
++++ b/arch/arm64/kvm/hyp/hyp-entry.S
+@@ -44,7 +44,7 @@
+ el1_sync: // Guest trapped into EL2
+
+ mrs x0, esr_el2
+- lsr x0, x0, #ESR_ELx_EC_SHIFT
++ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
+ cmp x0, #ESR_ELx_EC_HVC64
+ ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
+ b.ne el1_trap
+@@ -62,6 +62,10 @@ el1_sync: // Guest trapped into EL2
+ /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
+ ARM_SMCCC_ARCH_WORKAROUND_2)
++ cbz w1, wa_epilogue
++
++ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \
++ ARM_SMCCC_ARCH_WORKAROUND_3)
+ cbnz w1, el1_trap
+
+ wa_epilogue:
+@@ -192,7 +196,10 @@ SYM_CODE_END(__kvm_hyp_vector)
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
++ alternative_cb spectre_bhb_patch_wa3
++ /* Patched to mov WA3 when supported */
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
++ alternative_cb_end
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ add sp, sp, #(8 * 2)
+@@ -205,6 +212,8 @@ SYM_CODE_END(__kvm_hyp_vector)
+ spectrev2_smccc_wa1_smc
+ .else
+ stp x0, x1, [sp, #-16]!
++ mitigate_spectre_bhb_loop x0
++ mitigate_spectre_bhb_clear_insn
+ .endif
+ .if \indirect != 0
+ alternative_cb kvm_patch_vector_branch
+diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
+index a0e78a6027be0..ecd41844eda09 100644
+--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
+@@ -416,10 +416,17 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+ */
+ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
++ /*
++ * Save PSTATE early so that we can evaluate the vcpu mode
++ * early on.
++ */
++ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
++
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
+
+- if (ARM_SERROR_PENDING(*exit_code)) {
++ if (ARM_SERROR_PENDING(*exit_code) &&
++ ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) {
+ u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+ /*
+diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+index de7e14c862e6c..7ecca8b078519 100644
+--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
++++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+@@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
+ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+ {
+ ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
+- ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
++ /*
++ * Guest PSTATE gets saved at guest fixup time in all
++ * cases. We still need to handle the nVHE host side here.
++ */
++ if (!has_vhe() && ctxt->__hyp_running_vcpu)
++ ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
+diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
+index 8d741f71377f4..964c2134ea1e5 100644
+--- a/arch/arm64/kvm/hyp/nvhe/Makefile
++++ b/arch/arm64/kvm/hyp/nvhe/Makefile
+@@ -83,6 +83,10 @@ quiet_cmd_hypcopy = HYPCOPY $@
+ # Remove ftrace, Shadow Call Stack, and CFI CFLAGS.
+ # This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations.
+ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS))
++# Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile'
++# when profile optimization is applied. gen-hyprel does not support SHT_REL and
++# causes a build failure. Remove profile optimization flags.
++KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
+
+ # KVM nVHE code is run at a different exception code with a different map, so
+ # compiler instrumentation that inserts callbacks or checks into the code may
+diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
+index 4b652ffb591d4..d310d2b2c8b40 100644
+--- a/arch/arm64/kvm/hyp/nvhe/host.S
++++ b/arch/arm64/kvm/hyp/nvhe/host.S
+@@ -115,7 +115,7 @@ SYM_FUNC_END(__hyp_do_panic)
+ .L__vect_start\@:
+ stp x0, x1, [sp, #-16]!
+ mrs x0, esr_el2
+- lsr x0, x0, #ESR_ELx_EC_SHIFT
++ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
+ cmp x0, #ESR_ELx_EC_HVC64
+ b.ne __host_exit
+
+diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
+index 2fabeceb889a9..5146fb1705054 100644
+--- a/arch/arm64/kvm/hyp/nvhe/mm.c
++++ b/arch/arm64/kvm/hyp/nvhe/mm.c
+@@ -146,8 +146,10 @@ int hyp_map_vectors(void)
+ phys_addr_t phys;
+ void *bp_base;
+
+- if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
++ if (!kvm_system_needs_idmapped_vectors()) {
++ __hyp_bp_vect_base = __bp_harden_hyp_vecs;
+ return 0;
++ }
+
+ phys = __hyp_pa(__bp_harden_hyp_vecs);
+ bp_base = (void *)__pkvm_create_private_mapping(phys,
+diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
+index 57c27846320f4..58ad9c5ba3112 100644
+--- a/arch/arm64/kvm/hyp/nvhe/setup.c
++++ b/arch/arm64/kvm/hyp/nvhe/setup.c
+@@ -177,7 +177,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
+
+ phys = kvm_pte_to_phys(pte);
+ if (!addr_is_memory(phys))
+- return 0;
++ return -EINVAL;
+
+ /*
+ * Adjust the host stage-2 mappings to match the ownership attributes
+@@ -206,8 +206,18 @@ static int finalize_host_mappings(void)
+ .cb = finalize_host_mappings_walker,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
++ int i, ret;
++
++ for (i = 0; i < hyp_memblock_nr; i++) {
++ struct memblock_region *reg = &hyp_memory[i];
++ u64 start = (u64)hyp_phys_to_virt(reg->base);
++
++ ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker);
++ if (ret)
++ return ret;
++ }
+
+- return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
++ return 0;
+ }
+
+ void __noreturn __pkvm_init_finalise(void)
+diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
+index a34b01cc8ab9e..4db5409f40c4f 100644
+--- a/arch/arm64/kvm/hyp/nvhe/switch.c
++++ b/arch/arm64/kvm/hyp/nvhe/switch.c
+@@ -279,5 +279,5 @@ void __noreturn hyp_panic(void)
+
+ asmlinkage void kvm_unexpected_el2_exception(void)
+ {
+- return __kvm_unexpected_el2_exception();
++ __kvm_unexpected_el2_exception();
+ }
+diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
+index f8ceebe4982eb..4c77ff556f0ae 100644
+--- a/arch/arm64/kvm/hyp/pgtable.c
++++ b/arch/arm64/kvm/hyp/pgtable.c
+@@ -921,13 +921,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ */
+ stage2_put_pte(ptep, mmu, addr, level, mm_ops);
+
+- if (need_flush) {
+- kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+-
+- dcache_clean_inval_poc((unsigned long)pte_follow,
+- (unsigned long)pte_follow +
+- kvm_granule_size(level));
+- }
++ if (need_flush && mm_ops->dcache_clean_inval_poc)
++ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
++ kvm_granule_size(level));
+
+ if (childp)
+ mm_ops->put_page(childp);
+@@ -1089,15 +1085,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ struct kvm_pgtable *pgt = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
+ kvm_pte_t pte = *ptep;
+- kvm_pte_t *pte_follow;
+
+ if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
+ return 0;
+
+- pte_follow = kvm_pte_follow(pte, mm_ops);
+- dcache_clean_inval_poc((unsigned long)pte_follow,
+- (unsigned long)pte_follow +
+- kvm_granule_size(level));
++ if (mm_ops->dcache_clean_inval_poc)
++ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
++ kvm_granule_size(level));
+ return 0;
+ }
+
+diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
+index ded2c66675f06..813e6e2178c16 100644
+--- a/arch/arm64/kvm/hyp/vhe/switch.c
++++ b/arch/arm64/kvm/hyp/vhe/switch.c
+@@ -10,6 +10,7 @@
+ #include <linux/kvm_host.h>
+ #include <linux/types.h>
+ #include <linux/jump_label.h>
++#include <linux/percpu.h>
+ #include <uapi/linux/psci.h>
+
+ #include <kvm/arm_psci.h>
+@@ -25,6 +26,7 @@
+ #include <asm/debug-monitors.h>
+ #include <asm/processor.h>
+ #include <asm/thread_info.h>
++#include <asm/vectors.h>
+
+ /* VHE specific context */
+ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
+@@ -68,7 +70,7 @@ NOKPROBE_SYMBOL(__activate_traps);
+
+ static void __deactivate_traps(struct kvm_vcpu *vcpu)
+ {
+- extern char vectors[]; /* kernel exception vectors */
++ const char *host_vectors = vectors;
+
+ ___deactivate_traps(vcpu);
+
+@@ -82,7 +84,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+
+ write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
+- write_sysreg(vectors, vbar_el1);
++
++ if (!arm64_kernel_unmapped_at_el0())
++ host_vectors = __this_cpu_read(this_cpu_vector);
++ write_sysreg(host_vectors, vbar_el1);
+ }
+ NOKPROBE_SYMBOL(__deactivate_traps);
+
+@@ -215,5 +220,5 @@ void __noreturn hyp_panic(void)
+
+ asmlinkage void kvm_unexpected_el2_exception(void)
+ {
+- return __kvm_unexpected_el2_exception();
++ __kvm_unexpected_el2_exception();
+ }
+diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
+index 30da78f72b3b3..202b8c455724b 100644
+--- a/arch/arm64/kvm/hypercalls.c
++++ b/arch/arm64/kvm/hypercalls.c
+@@ -107,6 +107,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+ break;
+ }
+ break;
++ case ARM_SMCCC_ARCH_WORKAROUND_3:
++ switch (arm64_get_spectre_bhb_state()) {
++ case SPECTRE_VULNERABLE:
++ break;
++ case SPECTRE_MITIGATED:
++ val[0] = SMCCC_RET_SUCCESS;
++ break;
++ case SPECTRE_UNAFFECTED:
++ val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED;
++ break;
++ }
++ break;
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ val[0] = SMCCC_RET_SUCCESS;
+ break;
+diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
+index 69bd1732a299f..38a8095744a07 100644
+--- a/arch/arm64/kvm/mmu.c
++++ b/arch/arm64/kvm/mmu.c
+@@ -468,14 +468,33 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
+ CONFIG_PGTABLE_LEVELS),
+ .mm_ops = &kvm_user_mm_ops,
+ };
++ unsigned long flags;
+ kvm_pte_t pte = 0; /* Keep GCC quiet... */
+ u32 level = ~0;
+ int ret;
+
++ /*
++ * Disable IRQs so that we hazard against a concurrent
++ * teardown of the userspace page tables (which relies on
++ * IPI-ing threads).
++ */
++ local_irq_save(flags);
+ ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
+- VM_BUG_ON(ret);
+- VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
+- VM_BUG_ON(!(pte & PTE_VALID));
++ local_irq_restore(flags);
++
++ if (ret)
++ return ret;
++
++ /*
++ * Not seeing an error, but not updating level? Something went
++ * deeply wrong...
++ */
++ if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
++ return -EFAULT;
++
++ /* Oops, the userspace PTs are gone... Replay the fault */
++ if (!kvm_pte_valid(pte))
++ return -EAGAIN;
+
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
+ }
+@@ -826,7 +845,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
+ *
+ * Returns the size of the mapping.
+ */
+-static unsigned long
++static long
+ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long hva, kvm_pfn_t *pfnp,
+ phys_addr_t *ipap)
+@@ -838,8 +857,15 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ * sure that the HVA and IPA are sufficiently aligned and that the
+ * block map is contained within the memslot.
+ */
+- if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+- get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
++ if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
++ int sz = get_user_mapping_size(kvm, hva);
++
++ if (sz < 0)
++ return sz;
++
++ if (sz < PMD_SIZE)
++ return PAGE_SIZE;
++
+ /*
+ * The address we faulted on is backed by a transparent huge
+ * page. However, because we map the compound huge page and
+@@ -957,7 +983,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ kvm_pfn_t pfn;
+ bool logging_active = memslot_is_logging(memslot);
+ unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
+- unsigned long vma_pagesize, fault_granule;
++ long vma_pagesize, fault_granule;
+ enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
+ struct kvm_pgtable *pgt;
+
+@@ -971,6 +997,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ return -EFAULT;
+ }
+
++ /*
++ * Permission faults just need to update the existing leaf entry,
++ * and so normally don't require allocations from the memcache. The
++ * only exception to this is when dirty logging is enabled at runtime
++ * and a write fault needs to collapse a block entry into a table.
++ */
++ if (fault_status != FSC_PERM ||
++ (logging_active && write_fault)) {
++ ret = kvm_mmu_topup_memory_cache(memcache,
++ kvm_mmu_cache_min_pages(kvm));
++ if (ret)
++ return ret;
++ }
++
+ /*
+ * Let's check if we will get back a huge page backed by hugetlbfs, or
+ * get block mapping for device MMIO region.
+@@ -1025,36 +1065,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ fault_ipa &= ~(vma_pagesize - 1);
+
+ gfn = fault_ipa >> PAGE_SHIFT;
+- mmap_read_unlock(current->mm);
+-
+- /*
+- * Permission faults just need to update the existing leaf entry,
+- * and so normally don't require allocations from the memcache. The
+- * only exception to this is when dirty logging is enabled at runtime
+- * and a write fault needs to collapse a block entry into a table.
+- */
+- if (fault_status != FSC_PERM || (logging_active && write_fault)) {
+- ret = kvm_mmu_topup_memory_cache(memcache,
+- kvm_mmu_cache_min_pages(kvm));
+- if (ret)
+- return ret;
+- }
+
+- mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ /*
+- * Ensure the read of mmu_notifier_seq happens before we call
+- * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
+- * the page we just got a reference to gets unmapped before we have a
+- * chance to grab the mmu_lock, which ensure that if the page gets
+- * unmapped afterwards, the call to kvm_unmap_gfn will take it away
+- * from us again properly. This smp_rmb() interacts with the smp_wmb()
+- * in kvm_mmu_notifier_invalidate_<page|range_end>.
++ * Read mmu_notifier_seq so that KVM can detect if the results of
++ * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
++ * acquiring kvm->mmu_lock.
+ *
+- * Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is
+- * used to avoid unnecessary overhead introduced to locate the memory
+- * slot because it's always fixed even @gfn is adjusted for huge pages.
++ * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
++ * with the smp_wmb() in kvm_dec_notifier_count().
+ */
+- smp_rmb();
++ mmu_seq = vcpu->kvm->mmu_notifier_seq;
++ mmap_read_unlock(current->mm);
+
+ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+ write_fault, &writable, NULL);
+@@ -1104,6 +1125,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
+ hva, &pfn,
+ &fault_ipa);
++
++ if (vma_pagesize < 0) {
++ ret = vma_pagesize;
++ goto out_unlock;
++ }
+ }
+
+ if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
+index 2af3c37445e00..886048c083638 100644
+--- a/arch/arm64/kvm/pmu-emul.c
++++ b/arch/arm64/kvm/pmu-emul.c
+@@ -554,6 +554,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+ }
+ }
++ kvm_vcpu_pmu_restore_guest(vcpu);
+ }
+
+ /**
+diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
+index 74c47d4202534..be03ea3e775a8 100644
+--- a/arch/arm64/kvm/psci.c
++++ b/arch/arm64/kvm/psci.c
+@@ -406,7 +406,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
+
+ int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
+ {
+- return 3; /* PSCI version and two workaround registers */
++ return 4; /* PSCI version and three workaround registers */
+ }
+
+ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+@@ -420,6 +420,9 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+ if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
+ return -EFAULT;
+
++ if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++))
++ return -EFAULT;
++
+ return 0;
+ }
+
+@@ -459,6 +462,17 @@ static int get_kernel_wa_level(u64 regid)
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ }
++ break;
++ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
++ switch (arm64_get_spectre_bhb_state()) {
++ case SPECTRE_VULNERABLE:
++ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
++ case SPECTRE_MITIGATED:
++ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
++ case SPECTRE_UNAFFECTED:
++ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
++ }
++ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
+ }
+
+ return -EINVAL;
+@@ -475,6 +489,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
++ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+ break;
+ default:
+@@ -493,6 +508,8 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+ u64 val;
+ int wa_level;
+
++ if (KVM_REG_SIZE(reg->id) != sizeof(val))
++ return -ENOENT;
+ if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+@@ -520,6 +537,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+ }
+
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
++ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
+ return -EINVAL;
+
+diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
+index 1d46e185f31e1..d00170d7ddf5e 100644
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -649,7 +649,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+ */
+ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
+ | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+- if (!system_supports_32bit_el0())
++ if (!kvm_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
+ __vcpu_sys_reg(vcpu, r->reg) = val;
+ }
+@@ -698,11 +698,10 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0);
+ val &= ~ARMV8_PMU_PMCR_MASK;
+ val |= p->regval & ARMV8_PMU_PMCR_MASK;
+- if (!system_supports_32bit_el0())
++ if (!kvm_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ kvm_pmu_handle_pmcr(vcpu, val);
+- kvm_vcpu_pmu_restore_guest(vcpu);
+ } else {
+ /* PMCR.P & PMCR.C are RAZ */
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0)
+@@ -1518,7 +1517,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
+ /* CRm=6 */
+ ID_SANITISED(ID_AA64ISAR0_EL1),
+ ID_SANITISED(ID_AA64ISAR1_EL1),
+- ID_UNALLOCATED(6,2),
++ ID_SANITISED(ID_AA64ISAR2_EL1),
+ ID_UNALLOCATED(6,3),
+ ID_UNALLOCATED(6,4),
+ ID_UNALLOCATED(6,5),
+diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
+index 61728c543eb9c..1d534283378a3 100644
+--- a/arch/arm64/kvm/vgic/vgic-its.c
++++ b/arch/arm64/kvm/vgic/vgic-its.c
+@@ -2096,7 +2096,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
+
+ memset(entry, 0, esz);
+
+- while (len > 0) {
++ while (true) {
+ int next_offset;
+ size_t byte_offset;
+
+@@ -2109,6 +2109,9 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
+ return next_offset;
+
+ byte_offset = next_offset * esz;
++ if (byte_offset >= len)
++ break;
++
+ id += next_offset;
+ gpa += byte_offset;
+ len -= byte_offset;
+diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+index 5f9014ae595b7..508aee9f88535 100644
+--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
++++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+@@ -418,11 +418,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
+ vgic_mmio_read_pending, vgic_mmio_write_spending,
+- NULL, vgic_uaccess_write_spending, 1,
++ vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending,
+- NULL, vgic_uaccess_write_cpending, 1,
++ vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
+ vgic_mmio_read_active, vgic_mmio_write_sactive,
+diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
+index 48c6067fc5ecb..55630ca2c325b 100644
+--- a/arch/arm64/kvm/vgic/vgic-mmio.c
++++ b/arch/arm64/kvm/vgic/vgic-mmio.c
+@@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
+ return 0;
+ }
+
+-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+- gpa_t addr, unsigned int len)
++static unsigned long __read_pending(struct kvm_vcpu *vcpu,
++ gpa_t addr, unsigned int len,
++ bool is_user)
+ {
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ u32 value = 0;
+@@ -248,6 +249,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ IRQCHIP_STATE_PENDING,
+ &val);
+ WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
++ } else if (!is_user && vgic_irq_is_mapped_level(irq)) {
++ val = vgic_get_phys_line_level(irq);
+ } else {
+ val = irq_is_pending(irq);
+ }
+@@ -261,6 +264,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ return value;
+ }
+
++unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
++ gpa_t addr, unsigned int len)
++{
++ return __read_pending(vcpu, addr, len, false);
++}
++
++unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
++ gpa_t addr, unsigned int len)
++{
++ return __read_pending(vcpu, addr, len, true);
++}
++
+ static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
+ {
+ return (vgic_irq_is_sgi(irq->intid) &&
+diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h
+index fefcca2b14dc7..dcea440159855 100644
+--- a/arch/arm64/kvm/vgic/vgic-mmio.h
++++ b/arch/arm64/kvm/vgic/vgic-mmio.h
+@@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
+ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
++unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
++ gpa_t addr, unsigned int len);
++
+ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
+index 21a6207fb2eed..8eb70451323b6 100644
+--- a/arch/arm64/kvm/vgic/vgic-v3.c
++++ b/arch/arm64/kvm/vgic/vgic-v3.c
+@@ -347,26 +347,23 @@ retry:
+ * The deactivation of the doorbell interrupt will trigger the
+ * unmapping of the associated vPE.
+ */
+-static void unmap_all_vpes(struct vgic_dist *dist)
++static void unmap_all_vpes(struct kvm *kvm)
+ {
+- struct irq_desc *desc;
++ struct vgic_dist *dist = &kvm->arch.vgic;
+ int i;
+
+- for (i = 0; i < dist->its_vm.nr_vpes; i++) {
+- desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
+- irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
+- }
++ for (i = 0; i < dist->its_vm.nr_vpes; i++)
++ free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i));
+ }
+
+-static void map_all_vpes(struct vgic_dist *dist)
++static void map_all_vpes(struct kvm *kvm)
+ {
+- struct irq_desc *desc;
++ struct vgic_dist *dist = &kvm->arch.vgic;
+ int i;
+
+- for (i = 0; i < dist->its_vm.nr_vpes; i++) {
+- desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
+- irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
+- }
++ for (i = 0; i < dist->its_vm.nr_vpes; i++)
++ WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i),
++ dist->its_vm.vpes[i]->irq));
+ }
+
+ /**
+@@ -391,7 +388,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
+ * and enabling of the doorbells have already been done.
+ */
+ if (kvm_vgic_global_state.has_gicv4_1) {
+- unmap_all_vpes(dist);
++ unmap_all_vpes(kvm);
+ vlpi_avail = true;
+ }
+
+@@ -441,7 +438,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
+
+ out:
+ if (vlpi_avail)
+- map_all_vpes(dist);
++ map_all_vpes(kvm);
+
+ return ret;
+ }
+diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
+index c1845d8f5f7e7..f507e3fcffce3 100644
+--- a/arch/arm64/kvm/vgic/vgic-v4.c
++++ b/arch/arm64/kvm/vgic/vgic-v4.c
+@@ -222,6 +222,11 @@ void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val)
+ *val = !!(*ptr & mask);
+ }
+
++int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq)
++{
++ return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu);
++}
++
+ /**
+ * vgic_v4_init - Initialize the GICv4 data structures
+ * @kvm: Pointer to the VM being initialized
+@@ -282,8 +287,7 @@ int vgic_v4_init(struct kvm *kvm)
+ irq_flags &= ~IRQ_NOAUTOEN;
+ irq_set_status_flags(irq, irq_flags);
+
+- ret = request_irq(irq, vgic_v4_doorbell_handler,
+- 0, "vcpu", vcpu);
++ ret = vgic_v4_request_vpe_irq(vcpu, irq);
+ if (ret) {
+ kvm_err("failed to allocate vcpu IRQ%d\n", irq);
+ /*
+diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
+index 14a9218641f57..36021c31a706a 100644
+--- a/arch/arm64/kvm/vgic/vgic.h
++++ b/arch/arm64/kvm/vgic/vgic.h
+@@ -321,5 +321,6 @@ int vgic_v4_init(struct kvm *kvm);
+ void vgic_v4_teardown(struct kvm *kvm);
+ void vgic_v4_configure_vsgis(struct kvm *kvm);
+ void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val);
++int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq);
+
+ #endif
+diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
+index b84b179edba3a..1fd5d790ab800 100644
+--- a/arch/arm64/lib/clear_page.S
++++ b/arch/arm64/lib/clear_page.S
+@@ -16,6 +16,7 @@
+ */
+ SYM_FUNC_START_PI(clear_page)
+ mrs x1, dczid_el0
++ tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */
+ and w1, w1, #0xf
+ mov x2, #4
+ lsl x1, x2, x1
+@@ -25,5 +26,14 @@ SYM_FUNC_START_PI(clear_page)
+ tst x0, #(PAGE_SIZE - 1)
+ b.ne 1b
+ ret
++
++2: stnp xzr, xzr, [x0]
++ stnp xzr, xzr, [x0, #16]
++ stnp xzr, xzr, [x0, #32]
++ stnp xzr, xzr, [x0, #48]
++ add x0, x0, #64
++ tst x0, #(PAGE_SIZE - 1)
++ b.ne 2b
++ ret
+ SYM_FUNC_END_PI(clear_page)
+ EXPORT_SYMBOL(clear_page)
+diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c
+index 78b87a64ca0a3..2432683e48a61 100644
+--- a/arch/arm64/lib/csum.c
++++ b/arch/arm64/lib/csum.c
+@@ -24,7 +24,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
+ const u64 *ptr;
+ u64 data, sum64 = 0;
+
+- if (unlikely(len == 0))
++ if (unlikely(len <= 0))
+ return 0;
+
+ offset = (unsigned long)buff & 7;
+diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
+index e83643b3995f4..f531dcb95174a 100644
+--- a/arch/arm64/lib/mte.S
++++ b/arch/arm64/lib/mte.S
+@@ -43,17 +43,23 @@ SYM_FUNC_END(mte_clear_page_tags)
+ * x0 - address to the beginning of the page
+ */
+ SYM_FUNC_START(mte_zero_clear_page_tags)
++ and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag
+ mrs x1, dczid_el0
++ tbnz x1, #4, 2f // Branch if DC GZVA is prohibited
+ and w1, w1, #0xf
+ mov x2, #4
+ lsl x1, x2, x1
+- and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag
+
+ 1: dc gzva, x0
+ add x0, x0, x1
+ tst x0, #(PAGE_SIZE - 1)
+ b.ne 1b
+ ret
++
++2: stz2g x0, [x0], #(MTE_GRANULE_SIZE * 2)
++ tst x0, #(PAGE_SIZE - 1)
++ b.ne 2b
++ ret
+ SYM_FUNC_END(mte_zero_clear_page_tags)
+
+ /*
+diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
+index e42bcfcd37e6f..a4884b97e9a81 100644
+--- a/arch/arm64/lib/strncmp.S
++++ b/arch/arm64/lib/strncmp.S
+@@ -1,9 +1,9 @@
+ /* SPDX-License-Identifier: GPL-2.0-only */
+ /*
+- * Copyright (c) 2013-2021, Arm Limited.
++ * Copyright (c) 2013-2022, Arm Limited.
+ *
+ * Adapted from the original at:
+- * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S
++ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strncmp.S
+ */
+
+ #include <linux/linkage.h>
+@@ -11,14 +11,14 @@
+
+ /* Assumptions:
+ *
+- * ARMv8-a, AArch64
++ * ARMv8-a, AArch64.
++ * MTE compatible.
+ */
+
+ #define L(label) .L ## label
+
+ #define REP8_01 0x0101010101010101
+ #define REP8_7f 0x7f7f7f7f7f7f7f7f
+-#define REP8_80 0x8080808080808080
+
+ /* Parameters and result. */
+ #define src1 x0
+@@ -39,10 +39,24 @@
+ #define tmp3 x10
+ #define zeroones x11
+ #define pos x12
+-#define limit_wd x13
+-#define mask x14
+-#define endloop x15
++#define mask x13
++#define endloop x14
+ #define count mask
++#define offset pos
++#define neg_offset x15
++
++/* Define endian dependent shift operations.
++ On big-endian early bytes are at MSB and on little-endian LSB.
++ LS_FW means shifting towards early bytes.
++ LS_BK means shifting towards later bytes.
++ */
++#ifdef __AARCH64EB__
++#define LS_FW lsl
++#define LS_BK lsr
++#else
++#define LS_FW lsr
++#define LS_BK lsl
++#endif
+
+ SYM_FUNC_START_WEAK_PI(strncmp)
+ cbz limit, L(ret0)
+@@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp)
+ and count, src1, #7
+ b.ne L(misaligned8)
+ cbnz count, L(mutual_align)
+- /* Calculate the number of full and partial words -1. */
+- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
+
+ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+@@ -64,56 +75,52 @@ L(loop_aligned):
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ L(start_realigned):
+- subs limit_wd, limit_wd, #1
++ subs limit, limit, #8
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
++ csinv endloop, diff, xzr, hi /* Last Dword or differences. */
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ ccmp endloop, #0, #0, eq
+ b.eq L(loop_aligned)
+ /* End of main loop */
+
+- /* Not reached the limit, must have found the end or a diff. */
+- tbz limit_wd, #63, L(not_limit)
+-
+- /* Limit % 8 == 0 => all bytes significant. */
+- ands limit, limit, #7
+- b.eq L(not_limit)
+-
+- lsl limit, limit, #3 /* Bits -> bytes. */
+- mov mask, #~0
+-#ifdef __AARCH64EB__
+- lsr mask, mask, limit
+-#else
+- lsl mask, mask, limit
+-#endif
+- bic data1, data1, mask
+- bic data2, data2, mask
+-
+- /* Make sure that the NUL byte is marked in the syndrome. */
+- orr has_nul, has_nul, mask
+-
+-L(not_limit):
++L(full_check):
++#ifndef __AARCH64EB__
+ orr syndrome, diff, has_nul
+-
+-#ifndef __AARCH64EB__
++ add limit, limit, 8 /* Rewind limit to before last subs. */
++L(syndrome_check):
++ /* Limit was reached. Check if the NUL byte or the difference
++ is before the limit. */
+ rev syndrome, syndrome
+ rev data1, data1
+- /* The MS-non-zero bit of the syndrome marks either the first bit
+- that is different, or the top bit of the first zero byte.
+- Shifting left now will bring the critical information into the
+- top bits. */
+ clz pos, syndrome
+ rev data2, data2
+ lsl data1, data1, pos
++ cmp limit, pos, lsr #3
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
++ csel result, result, xzr, hi
+ ret
+ #else
++ /* Not reached the limit, must have found the end or a diff. */
++ tbz limit, #63, L(not_limit)
++ add tmp1, limit, 8
++ cbz limit, L(not_limit)
++
++ lsl limit, tmp1, #3 /* Bits -> bytes. */
++ mov mask, #~0
++ lsr mask, mask, limit
++ bic data1, data1, mask
++ bic data2, data2, mask
++
++ /* Make sure that the NUL byte is marked in the syndrome. */
++ orr has_nul, has_nul, mask
++
++L(not_limit):
+ /* For big-endian we cannot use the trick with the syndrome value
+ as carry-propagation can corrupt the upper bits if the trailing
+ bytes in the string contain 0x01. */
+@@ -134,10 +141,11 @@ L(not_limit):
+ rev has_nul, has_nul
+ orr syndrome, diff, has_nul
+ clz pos, syndrome
+- /* The MS-non-zero bit of the syndrome marks either the first bit
+- that is different, or the top bit of the first zero byte.
++ /* The most-significant-non-zero bit of the syndrome marks either the
++ first bit that is different, or the top bit of the first zero byte.
+ Shifting left now will bring the critical information into the
+ top bits. */
++L(end_quick):
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+@@ -159,22 +167,12 @@ L(mutual_align):
+ neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+-#ifdef __AARCH64EB__
+- /* Big-endian. Early bytes are at MSB. */
+- lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
+-#else
+- /* Little-endian. Early bytes are at LSB. */
+- lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
+-#endif
+- and tmp3, limit_wd, #7
+- lsr limit_wd, limit_wd, #3
+- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
+- add limit, limit, count
+- add tmp3, tmp3, count
++ LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */
++ /* Adjust the limit and ensure it doesn't overflow. */
++ adds limit, limit, count
++ csinv limit, limit, xzr, lo
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+- add limit_wd, limit_wd, tmp3, lsr #3
+ b L(start_realigned)
+
+ .p2align 4
+@@ -197,13 +195,11 @@ L(done):
+ /* Align the SRC1 to a dword by doing a bytewise compare and then do
+ the dword loop. */
+ L(try_misaligned_words):
+- lsr limit_wd, limit, #3
+- cbz count, L(do_misaligned)
++ cbz count, L(src1_aligned)
+
+ neg count, count
+ and count, count, #7
+ sub limit, limit, count
+- lsr limit_wd, limit, #3
+
+ L(page_end_loop):
+ ldrb data1w, [src1], #1
+@@ -214,48 +210,100 @@ L(page_end_loop):
+ subs count, count, #1
+ b.hi L(page_end_loop)
+
+-L(do_misaligned):
+- /* Prepare ourselves for the next page crossing. Unlike the aligned
+- loop, we fetch 1 less dword because we risk crossing bounds on
+- SRC2. */
+- mov count, #8
+- subs limit_wd, limit_wd, #1
+- b.lo L(done_loop)
+-L(loop_misaligned):
+- and tmp2, src2, #0xff8
+- eor tmp2, tmp2, #0xff8
+- cbz tmp2, L(page_end_loop)
++ /* The following diagram explains the comparison of misaligned strings.
++ The bytes are shown in natural order. For little-endian, it is
++ reversed in the registers. The "x" bytes are before the string.
++ The "|" separates data that is loaded at one time.
++ src1 | a a a a a a a a | b b b c c c c c | . . .
++ src2 | x x x x x a a a a a a a a b b b | c c c c c . . .
++
++ After shifting in each step, the data looks like this:
++ STEP_A STEP_B STEP_C
++ data1 a a a a a a a a b b b c c c c c b b b c c c c c
++ data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c
+
++ The bytes with "0" are eliminated from the syndrome via mask.
++
++ Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
++ time from SRC2. The comparison happens in 3 steps. After each step
++ the loop can exit, or read from SRC1 or SRC2. */
++L(src1_aligned):
++ /* Calculate offset from 8 byte alignment to string start in bits. No
++ need to mask offset since shifts are ignoring upper bits. */
++ lsl offset, src2, #3
++ bic src2, src2, #0xf
++ mov mask, -1
++ neg neg_offset, offset
+ ldr data1, [src1], #8
+- ldr data2, [src2], #8
+- sub tmp1, data1, zeroones
+- orr tmp2, data1, #REP8_7f
+- eor diff, data1, data2 /* Non-zero if differences found. */
+- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+- ccmp diff, #0, #0, eq
+- b.ne L(not_limit)
+- subs limit_wd, limit_wd, #1
+- b.pl L(loop_misaligned)
++ ldp tmp1, tmp2, [src2], #16
++ LS_BK mask, mask, neg_offset
++ and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */
++ /* Skip the first compare if data in tmp1 is irrelevant. */
++ tbnz offset, 6, L(misaligned_mid_loop)
+
+-L(done_loop):
+- /* We found a difference or a NULL before the limit was reached. */
+- and limit, limit, #7
+- cbz limit, L(not_limit)
+- /* Read the last word. */
+- sub src1, src1, 8
+- sub src2, src2, 8
+- ldr data1, [src1, limit]
+- ldr data2, [src2, limit]
+- sub tmp1, data1, zeroones
+- orr tmp2, data1, #REP8_7f
++L(loop_misaligned):
++ /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
++ LS_FW data2, tmp1, offset
++ LS_BK tmp1, tmp2, neg_offset
++ subs limit, limit, #8
++ orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/
++ sub has_nul, data1, zeroones
+ eor diff, data1, data2 /* Non-zero if differences found. */
+- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+- ccmp diff, #0, #0, eq
+- b.ne L(not_limit)
++ orr tmp3, data1, #REP8_7f
++ csinv endloop, diff, xzr, hi /* If limit, set to all ones. */
++ bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */
++ orr tmp3, endloop, has_nul
++ cbnz tmp3, L(full_check)
++
++ ldr data1, [src1], #8
++L(misaligned_mid_loop):
++ /* STEP_B: Compare first part of data1 to second part of tmp2. */
++ LS_FW data2, tmp2, offset
++#ifdef __AARCH64EB__
++ /* For big-endian we do a byte reverse to avoid carry-propagation
++ problem described above. This way we can reuse the has_nul in the
++ next step and also use syndrome value trick at the end. */
++ rev tmp3, data1
++ #define data1_fixed tmp3
++#else
++ #define data1_fixed data1
++#endif
++ sub has_nul, data1_fixed, zeroones
++ orr tmp3, data1_fixed, #REP8_7f
++ eor diff, data2, data1 /* Non-zero if differences found. */
++ bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */
++#ifdef __AARCH64EB__
++ rev has_nul, has_nul
++#endif
++ cmp limit, neg_offset, lsr #3
++ orr syndrome, diff, has_nul
++ bic syndrome, syndrome, mask /* Ignore later bytes. */
++ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
++ cbnz tmp3, L(syndrome_check)
++
++ /* STEP_C: Compare second part of data1 to first part of tmp1. */
++ ldp tmp1, tmp2, [src2], #16
++ cmp limit, #8
++ LS_BK data2, tmp1, neg_offset
++ eor diff, data2, data1 /* Non-zero if differences found. */
++ orr syndrome, diff, has_nul
++ and syndrome, syndrome, mask /* Ignore earlier bytes. */
++ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
++ cbnz tmp3, L(syndrome_check)
++
++ ldr data1, [src1], #8
++ sub limit, limit, #8
++ b L(loop_misaligned)
++
++#ifdef __AARCH64EB__
++L(syndrome_check):
++ clz pos, syndrome
++ cmp pos, limit, lsl #3
++ b.lo L(end_quick)
++#endif
+
+ L(ret0):
+ mov result, #0
+ ret
+-
+ SYM_FUNC_END_PI(strncmp)
+ EXPORT_SYMBOL_NOHWKASAN(strncmp)
+diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
+index 5051b3c1a4f12..79164e4390369 100644
+--- a/arch/arm64/mm/cache.S
++++ b/arch/arm64/mm/cache.S
+@@ -231,8 +231,6 @@ SYM_FUNC_END_PI(__dma_flush_area)
+ */
+ SYM_FUNC_START_PI(__dma_map_area)
+ add x1, x0, x1
+- cmp w2, #DMA_FROM_DEVICE
+- b.eq __dma_inv_area
+ b __dma_clean_area
+ SYM_FUNC_END_PI(__dma_map_area)
+
+diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
+index b5447e53cd73e..b44931deb227b 100644
+--- a/arch/arm64/mm/copypage.c
++++ b/arch/arm64/mm/copypage.c
+@@ -16,14 +16,15 @@
+
+ void copy_highpage(struct page *to, struct page *from)
+ {
+- struct page *kto = page_address(to);
+- struct page *kfrom = page_address(from);
++ void *kto = page_address(to);
++ void *kfrom = page_address(from);
+
+ copy_page(kto, kfrom);
+
++ page_kasan_tag_reset(to);
++
+ if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
+ set_bit(PG_mte_tagged, &to->flags);
+- page_kasan_tag_reset(to);
+ /*
+ * We need smp_wmb() in between setting the flags and clearing the
+ * tags because if another thread reads page->flags and builds a
+diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
+index aa0060178343a..60a8b6a8a42b5 100644
+--- a/arch/arm64/mm/extable.c
++++ b/arch/arm64/mm/extable.c
+@@ -9,14 +9,19 @@
+ int fixup_exception(struct pt_regs *regs)
+ {
+ const struct exception_table_entry *fixup;
++ unsigned long addr;
+
+- fixup = search_exception_tables(instruction_pointer(regs));
+- if (!fixup)
+- return 0;
++ addr = instruction_pointer(regs);
+
+- if (in_bpf_jit(regs))
++ /* Search the BPF tables first, these are formatted differently */
++ fixup = search_bpf_extables(addr);
++ if (fixup)
+ return arm64_bpf_fixup_exception(fixup, regs);
+
++ fixup = search_exception_tables(addr);
++ if (!fixup)
++ return 0;
++
+ regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
+ return 1;
+ }
+diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
+index 9ae24e3b72be1..6327620397142 100644
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -43,7 +43,7 @@
+ #include <asm/traps.h>
+
+ struct fault_info {
+- int (*fn)(unsigned long far, unsigned int esr,
++ int (*fn)(unsigned long far, unsigned long esr,
+ struct pt_regs *regs);
+ int sig;
+ int code;
+@@ -53,17 +53,17 @@ struct fault_info {
+ static const struct fault_info fault_info[];
+ static struct fault_info debug_fault_info[];
+
+-static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
++static inline const struct fault_info *esr_to_fault_info(unsigned long esr)
+ {
+ return fault_info + (esr & ESR_ELx_FSC);
+ }
+
+-static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
++static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr)
+ {
+ return debug_fault_info + DBG_ESR_EVT(esr);
+ }
+
+-static void data_abort_decode(unsigned int esr)
++static void data_abort_decode(unsigned long esr)
+ {
+ pr_alert("Data abort info:\n");
+
+@@ -85,11 +85,11 @@ static void data_abort_decode(unsigned int esr)
+ (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
+ }
+
+-static void mem_abort_decode(unsigned int esr)
++static void mem_abort_decode(unsigned long esr)
+ {
+ pr_alert("Mem abort info:\n");
+
+- pr_alert(" ESR = 0x%08x\n", esr);
++ pr_alert(" ESR = 0x%016lx\n", esr);
+ pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n",
+ ESR_ELx_EC(esr), esr_get_class_string(esr),
+ (esr & ESR_ELx_IL) ? 32 : 16);
+@@ -99,7 +99,7 @@ static void mem_abort_decode(unsigned int esr)
+ pr_alert(" EA = %lu, S1PTW = %lu\n",
+ (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
+ (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
+- pr_alert(" FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC),
++ pr_alert(" FSC = 0x%02lx: %s\n", (esr & ESR_ELx_FSC),
+ esr_to_fault_info(esr)->name);
+
+ if (esr_is_data_abort(esr))
+@@ -229,20 +229,20 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
+ return 1;
+ }
+
+-static bool is_el1_instruction_abort(unsigned int esr)
++static bool is_el1_instruction_abort(unsigned long esr)
+ {
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+ }
+
+-static bool is_el1_data_abort(unsigned int esr)
++static bool is_el1_data_abort(unsigned long esr)
+ {
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR;
+ }
+
+-static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
++static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+ {
+- unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
++ unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+ if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
+ return false;
+@@ -258,7 +258,7 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
+ }
+
+ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
+- unsigned int esr,
++ unsigned long esr,
+ struct pt_regs *regs)
+ {
+ unsigned long flags;
+@@ -290,7 +290,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
+ }
+
+ static void die_kernel_fault(const char *msg, unsigned long addr,
+- unsigned int esr, struct pt_regs *regs)
++ unsigned long esr, struct pt_regs *regs)
+ {
+ bust_spinlocks(1);
+
+@@ -302,11 +302,11 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
+ show_pte(addr);
+ die("Oops", regs, esr);
+ bust_spinlocks(0);
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ }
+
+ #ifdef CONFIG_KASAN_HW_TAGS
+-static void report_tag_fault(unsigned long addr, unsigned int esr,
++static void report_tag_fault(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ /*
+@@ -318,11 +318,11 @@ static void report_tag_fault(unsigned long addr, unsigned int esr,
+ }
+ #else
+ /* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
+-static inline void report_tag_fault(unsigned long addr, unsigned int esr,
++static inline void report_tag_fault(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs) { }
+ #endif
+
+-static void do_tag_recovery(unsigned long addr, unsigned int esr,
++static void do_tag_recovery(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+ {
+
+@@ -337,9 +337,9 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr,
+ isb();
+ }
+
+-static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
++static bool is_el1_mte_sync_tag_check_fault(unsigned long esr)
+ {
+- unsigned int fsc = esr & ESR_ELx_FSC;
++ unsigned long fsc = esr & ESR_ELx_FSC;
+
+ if (!is_el1_data_abort(esr))
+ return false;
+@@ -350,7 +350,12 @@ static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
+ return false;
+ }
+
+-static void __do_kernel_fault(unsigned long addr, unsigned int esr,
++static bool is_translation_fault(unsigned long esr)
++{
++ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
++}
++
++static void __do_kernel_fault(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ const char *msg;
+@@ -382,7 +387,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+ } else if (addr < PAGE_SIZE) {
+ msg = "NULL pointer dereference";
+ } else {
+- if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
++ if (is_translation_fault(esr) &&
++ kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
+ return;
+
+ msg = "paging request";
+@@ -391,7 +397,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+ die_kernel_fault(msg, addr, esr, regs);
+ }
+
+-static void set_thread_esr(unsigned long address, unsigned int esr)
++static void set_thread_esr(unsigned long address, unsigned long esr)
+ {
+ current->thread.fault_address = address;
+
+@@ -439,7 +445,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
+ * exception level). Fail safe by not providing an ESR
+ * context record at all.
+ */
+- WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
++ WARN(1, "ESR 0x%lx is not DABT or IABT from EL0\n", esr);
+ esr = 0;
+ break;
+ }
+@@ -448,7 +454,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
+ current->thread.fault_code = esr;
+ }
+
+-static void do_bad_area(unsigned long far, unsigned int esr,
++static void do_bad_area(unsigned long far, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ unsigned long addr = untagged_addr(far);
+@@ -467,8 +473,8 @@ static void do_bad_area(unsigned long far, unsigned int esr,
+ }
+ }
+
+-#define VM_FAULT_BADMAP 0x010000
+-#define VM_FAULT_BADACCESS 0x020000
++#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
++#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
+
+ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
+ unsigned int mm_flags, unsigned long vm_flags,
+@@ -499,7 +505,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
+ return handle_mm_fault(vma, addr, mm_flags, regs);
+ }
+
+-static bool is_el0_instruction_abort(unsigned int esr)
++static bool is_el0_instruction_abort(unsigned long esr)
+ {
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
+ }
+@@ -508,12 +514,12 @@ static bool is_el0_instruction_abort(unsigned int esr)
+ * Note: not valid for EL1 DC IVAC, but we never use that such that it
+ * should fault. EL0 cannot issue DC IVAC (undef).
+ */
+-static bool is_write_abort(unsigned int esr)
++static bool is_write_abort(unsigned long esr)
+ {
+ return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
+ }
+
+-static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
++static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ const struct fault_info *inf;
+@@ -671,7 +677,7 @@ no_context:
+ }
+
+ static int __kprobes do_translation_fault(unsigned long far,
+- unsigned int esr,
++ unsigned long esr,
+ struct pt_regs *regs)
+ {
+ unsigned long addr = untagged_addr(far);
+@@ -683,19 +689,19 @@ static int __kprobes do_translation_fault(unsigned long far,
+ return 0;
+ }
+
+-static int do_alignment_fault(unsigned long far, unsigned int esr,
++static int do_alignment_fault(unsigned long far, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ do_bad_area(far, esr, regs);
+ return 0;
+ }
+
+-static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs)
++static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
+ {
+ return 1; /* "fault" */
+ }
+
+-static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
++static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
+ {
+ const struct fault_info *inf;
+ unsigned long siaddr;
+@@ -725,7 +731,7 @@ static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
+ return 0;
+ }
+
+-static int do_tag_check_fault(unsigned long far, unsigned int esr,
++static int do_tag_check_fault(unsigned long far, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ /*
+@@ -805,7 +811,7 @@ static const struct fault_info fault_info[] = {
+ { do_bad, SIGKILL, SI_KERNEL, "unknown 63" },
+ };
+
+-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
++void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs)
+ {
+ const struct fault_info *inf = esr_to_fault_info(esr);
+ unsigned long addr = untagged_addr(far);
+@@ -828,14 +834,14 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
+ }
+ NOKPROBE_SYMBOL(do_mem_abort);
+
+-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
++void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs)
+ {
+ arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
+ addr, esr);
+ }
+ NOKPROBE_SYMBOL(do_sp_pc_abort);
+
+-int __init early_brk64(unsigned long addr, unsigned int esr,
++int __init early_brk64(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs);
+
+ /*
+@@ -855,7 +861,7 @@ static struct fault_info __refdata debug_fault_info[] = {
+ };
+
+ void __init hook_debug_fault_code(int nr,
+- int (*fn)(unsigned long, unsigned int, struct pt_regs *),
++ int (*fn)(unsigned long, unsigned long, struct pt_regs *),
+ int sig, int code, const char *name)
+ {
+ BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
+@@ -888,7 +894,7 @@ static void debug_exception_exit(struct pt_regs *regs)
+ }
+ NOKPROBE_SYMBOL(debug_exception_exit);
+
+-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
++void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
+ struct pt_regs *regs)
+ {
+ const struct fault_info *inf = esr_to_debug_fault_info(esr);
+diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
+index 37a81754d9b61..3b269c7567984 100644
+--- a/arch/arm64/mm/init.c
++++ b/arch/arm64/mm/init.c
+@@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
+ * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
+ * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
+ * otherwise it is empty.
++ *
++ * Memory reservation for crash kernel either done early or deferred
++ * depending on DMA memory zones configs (ZONE_DMA) --
++ *
++ * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
++ * here instead of max_zone_phys(). This lets early reservation of
++ * crash kernel memory which has a dependency on arm64_dma_phys_limit.
++ * Reserving memory early for crash kernel allows linear creation of block
++ * mappings (greater than page-granularity) for all the memory bank rangs.
++ * In this scheme a comparatively quicker boot is observed.
++ *
++ * If ZONE_DMA configs are defined, crash kernel memory reservation
++ * is delayed until DMA zone memory range size initilazation performed in
++ * zone_sizes_init(). The defer is necessary to steer clear of DMA zone
++ * memory range to avoid overlap allocation. So crash kernel memory boundaries
++ * are not known when mapping all bank memory ranges, which otherwise means
++ * not possible to exclude crash kernel range from creating block mappings
++ * so page-granularity mappings are created for the entire memory range.
++ * Hence a slightly slower boot is observed.
++ *
++ * Note: Page-granularity mapppings are necessary for crash kernel memory
++ * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
+ */
+-phys_addr_t arm64_dma_phys_limit __ro_after_init;
++#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
++phys_addr_t __ro_after_init arm64_dma_phys_limit;
++#else
++phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
++#endif
+
+ #ifdef CONFIG_KEXEC_CORE
+ /*
+@@ -153,50 +179,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
+ if (!arm64_dma_phys_limit)
+ arm64_dma_phys_limit = dma32_phys_limit;
+ #endif
+- if (!arm64_dma_phys_limit)
+- arm64_dma_phys_limit = PHYS_MASK + 1;
+ max_zone_pfns[ZONE_NORMAL] = max;
+
+ free_area_init(max_zone_pfns);
+ }
+
+-int pfn_valid(unsigned long pfn)
+-{
+- phys_addr_t addr = PFN_PHYS(pfn);
+- struct mem_section *ms;
+-
+- /*
+- * Ensure the upper PAGE_SHIFT bits are clear in the
+- * pfn. Else it might lead to false positives when
+- * some of the upper bits are set, but the lower bits
+- * match a valid pfn.
+- */
+- if (PHYS_PFN(addr) != pfn)
+- return 0;
+-
+- if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
+- return 0;
+-
+- ms = __pfn_to_section(pfn);
+- if (!valid_section(ms))
+- return 0;
+-
+- /*
+- * ZONE_DEVICE memory does not have the memblock entries.
+- * memblock_is_map_memory() check for ZONE_DEVICE based
+- * addresses will always fail. Even the normal hotplugged
+- * memory will never have MEMBLOCK_NOMAP flag set in their
+- * memblock entries. Skip memblock search for all non early
+- * memory sections covering all of hotplug memory including
+- * both normal and ZONE_DEVICE based.
+- */
+- if (!early_section(ms))
+- return pfn_section_valid(ms, pfn);
+-
+- return memblock_is_memory(addr);
+-}
+-EXPORT_SYMBOL(pfn_valid);
+-
+ int pfn_is_map_memory(unsigned long pfn)
+ {
+ phys_addr_t addr = PFN_PHYS(pfn);
+@@ -352,6 +339,9 @@ void __init arm64_memblock_init(void)
+
+ early_init_fdt_scan_reserved_mem();
+
++ if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
++ reserve_crashkernel();
++
+ high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
+ }
+
+@@ -398,7 +388,8 @@ void __init bootmem_init(void)
+ * request_standard_resources() depends on crashkernel's memory being
+ * reserved, so do it here.
+ */
+- reserve_crashkernel();
++ if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
++ reserve_crashkernel();
+
+ memblock_dump_all();
+ }
+diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
+index b7c81dacabf07..b21f91cd830db 100644
+--- a/arch/arm64/mm/ioremap.c
++++ b/arch/arm64/mm/ioremap.c
+@@ -99,3 +99,11 @@ void __init early_ioremap_init(void)
+ {
+ early_ioremap_setup();
+ }
++
++bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
++ unsigned long flags)
++{
++ unsigned long pfn = PHYS_PFN(offset);
++
++ return pfn_is_map_memory(pfn);
++}
+diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
+index a38f54cd638c2..77ada00280d93 100644
+--- a/arch/arm64/mm/mmap.c
++++ b/arch/arm64/mm/mmap.c
+@@ -7,8 +7,10 @@
+
+ #include <linux/io.h>
+ #include <linux/memblock.h>
++#include <linux/mm.h>
+ #include <linux/types.h>
+
++#include <asm/cpufeature.h>
+ #include <asm/page.h>
+
+ /*
+@@ -38,3 +40,18 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
+ {
+ return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK);
+ }
++
++static int __init adjust_protection_map(void)
++{
++ /*
++ * With Enhanced PAN we can honour the execute-only permissions as
++ * there is no PAN override with such mappings.
++ */
++ if (cpus_have_const_cap(ARM64_HAS_EPAN)) {
++ protection_map[VM_EXEC] = PAGE_EXECONLY;
++ protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
++ }
++
++ return 0;
++}
++arch_initcall(adjust_protection_map);
+diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
+index cfd9deb347c38..6680689242df3 100644
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+ static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+
+ static DEFINE_SPINLOCK(swapper_pgdir_lock);
++static DEFINE_MUTEX(fixmap_lock);
+
+ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+@@ -328,6 +329,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+ }
+ BUG_ON(p4d_bad(p4d));
+
++ /*
++ * No need for locking during early boot. And it doesn't work as
++ * expected with KASLR enabled.
++ */
++ if (system_state != SYSTEM_BOOTING)
++ mutex_lock(&fixmap_lock);
+ pudp = pud_set_fixmap_offset(p4dp, addr);
+ do {
+ pud_t old_pud = READ_ONCE(*pudp);
+@@ -358,6 +365,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+ } while (pudp++, addr = next, addr != end);
+
+ pud_clear_fixmap();
++ if (system_state != SYSTEM_BOOTING)
++ mutex_unlock(&fixmap_lock);
+ }
+
+ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+@@ -516,7 +525,7 @@ static void __init map_mem(pgd_t *pgdp)
+ */
+ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
+
+- if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
++ if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
+ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+
+ /*
+@@ -527,6 +536,17 @@ static void __init map_mem(pgd_t *pgdp)
+ */
+ memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+
++#ifdef CONFIG_KEXEC_CORE
++ if (crash_mem_map) {
++ if (IS_ENABLED(CONFIG_ZONE_DMA) ||
++ IS_ENABLED(CONFIG_ZONE_DMA32))
++ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
++ else if (crashk_res.end)
++ memblock_mark_nomap(crashk_res.start,
++ resource_size(&crashk_res));
++ }
++#endif
++
+ /* map all the memory banks */
+ for_each_mem_range(i, &start, &end) {
+ if (start >= end)
+@@ -553,6 +573,25 @@ static void __init map_mem(pgd_t *pgdp)
+ __map_memblock(pgdp, kernel_start, kernel_end,
+ PAGE_KERNEL, NO_CONT_MAPPINGS);
+ memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
++
++ /*
++ * Use page-level mappings here so that we can shrink the region
++ * in page granularity and put back unused memory to buddy system
++ * through /sys/kernel/kexec_crash_size interface.
++ */
++#ifdef CONFIG_KEXEC_CORE
++ if (crash_mem_map &&
++ !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
++ if (crashk_res.end) {
++ __map_memblock(pgdp, crashk_res.start,
++ crashk_res.end + 1,
++ PAGE_KERNEL,
++ NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
++ memblock_clear_nomap(crashk_res.start,
++ resource_size(&crashk_res));
++ }
++ }
++#endif
+ }
+
+ void mark_rodata_ro(void)
+@@ -616,6 +655,8 @@ early_param("rodata", parse_rodata);
+ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ static int __init map_entry_trampoline(void)
+ {
++ int i;
++
+ pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+ phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
+
+@@ -624,11 +665,15 @@ static int __init map_entry_trampoline(void)
+
+ /* Map only the text into the trampoline page table */
+ memset(tramp_pg_dir, 0, PGD_SIZE);
+- __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
+- prot, __pgd_pgtable_alloc, 0);
++ __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
++ entry_tramp_text_size(), prot,
++ __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS);
+
+ /* Map both the text and data into the kernel page table */
+- __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
++ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
++ __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
++ pa_start + i * PAGE_SIZE, prot);
++
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ extern char __entry_tramp_data_start[];
+
+@@ -1499,6 +1544,11 @@ int arch_add_memory(int nid, u64 start, u64 size,
+ if (ret)
+ __remove_pgd_mapping(swapper_pg_dir,
+ __phys_to_virt(start), size);
++ else {
++ max_pfn = PFN_UP(start + size);
++ max_low_pfn = max_pfn;
++ }
++
+ return ret;
+ }
+
+diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
+index 7c4ef56265ee1..fd6cabc6d033a 100644
+--- a/arch/arm64/mm/mteswap.c
++++ b/arch/arm64/mm/mteswap.c
+@@ -62,7 +62,12 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page)
+ * the new page->flags are visible before the tags were updated.
+ */
+ smp_wmb();
+- mte_restore_page_tags(page_address(page), tags);
++ /*
++ * Test PG_mte_tagged again in case it was racing with another
++ * set_pte_at().
++ */
++ if (!test_and_set_bit(PG_mte_tagged, &page->flags))
++ mte_restore_page_tags(page_address(page), tags);
+
+ return true;
+ }
+diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
+index d35c90d2e47ad..1a9684b114745 100644
+--- a/arch/arm64/mm/proc.S
++++ b/arch/arm64/mm/proc.S
+@@ -46,18 +46,20 @@
+ #endif
+
+ #ifdef CONFIG_KASAN_HW_TAGS
+-#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
+-#else
++#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1
++#elif defined(CONFIG_ARM64_MTE)
+ /*
+ * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
+ * TBI being enabled at EL1.
+ */
+ #define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
++#else
++#define TCR_MTE_FLAGS 0
+ #endif
+
+ /*
+ * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and
+- * changed during __cpu_setup to Normal Tagged if the system supports MTE.
++ * changed during mte_cpu_setup to Normal Tagged if the system supports MTE.
+ */
+ #define MAIR_EL1_SET \
+ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
+@@ -421,46 +423,8 @@ SYM_FUNC_START(__cpu_setup)
+ mov_q mair, MAIR_EL1_SET
+ mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
+ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
+- TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
+-
+-#ifdef CONFIG_ARM64_MTE
+- /*
+- * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
+- * (ID_AA64PFR1_EL1[11:8] > 1).
+- */
+- mrs x10, ID_AA64PFR1_EL1
+- ubfx x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4
+- cmp x10, #ID_AA64PFR1_MTE
+- b.lt 1f
+-
+- /* Normal Tagged memory type at the corresponding MAIR index */
+- mov x10, #MAIR_ATTR_NORMAL_TAGGED
+- bfi mair, x10, #(8 * MT_NORMAL_TAGGED), #8
++ TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
+
+- mov x10, #KERNEL_GCR_EL1
+- msr_s SYS_GCR_EL1, x10
+-
+- /*
+- * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+- * RGSR_EL1.SEED must be non-zero for IRG to produce
+- * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+- * must initialize it.
+- */
+- mrs x10, CNTVCT_EL0
+- ands x10, x10, #SYS_RGSR_EL1_SEED_MASK
+- csinc x10, x10, xzr, ne
+- lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
+- msr_s SYS_RGSR_EL1, x10
+-
+- /* clear any pending tag check faults in TFSR*_EL1 */
+- msr_s SYS_TFSR_EL1, xzr
+- msr_s SYS_TFSRE0_EL1, xzr
+-
+- /* set the TCR_EL1 bits */
+- mov_q x10, TCR_MTE_FLAGS
+- orr tcr, tcr, x10
+-1:
+-#endif
+ tcr_clear_errata_bits tcr, x9, x5
+
+ #ifdef CONFIG_ARM64_VA_BITS_52
+diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
+index 1c403536c9bb0..9bc4066c5bf33 100644
+--- a/arch/arm64/mm/ptdump.c
++++ b/arch/arm64/mm/ptdump.c
+@@ -41,8 +41,6 @@ static struct addr_marker address_markers[] = {
+ { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" },
+ { KASAN_SHADOW_END, "Kasan shadow end" },
+ #endif
+- { BPF_JIT_REGION_START, "BPF start" },
+- { BPF_JIT_REGION_END, "BPF end" },
+ { MODULES_VADDR, "Modules start" },
+ { MODULES_END, "Modules end" },
+ { VMALLOC_START, "vmalloc() area" },
+diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
+index 803e7773fa869..4895b4d7e150f 100644
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -788,7 +788,10 @@ emit_cond_jmp:
+ u64 imm64;
+
+ imm64 = (u64)insn1.imm << 32 | (u32)imm;
+- emit_a64_mov_i64(dst, imm64, ctx);
++ if (bpf_pseudo_func(insn))
++ emit_addr_mov_i64(dst, imm64, ctx);
++ else
++ emit_a64_mov_i64(dst, imm64, ctx);
+
+ return 1;
+ }
+@@ -1042,15 +1045,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+ goto out_off;
+ }
+
+- /* 1. Initial fake pass to compute ctx->idx. */
+-
+- /* Fake pass to fill in ctx->offset. */
+- if (build_body(&ctx, extra_pass)) {
++ /*
++ * 1. Initial fake pass to compute ctx->idx and ctx->offset.
++ *
++ * BPF line info needs ctx->offset[i] to be the offset of
++ * instruction[i] in jited image, so build prologue first.
++ */
++ if (build_prologue(&ctx, was_classic)) {
+ prog = orig_prog;
+ goto out_off;
+ }
+
+- if (build_prologue(&ctx, was_classic)) {
++ if (build_body(&ctx, extra_pass)) {
+ prog = orig_prog;
+ goto out_off;
+ }
+@@ -1110,6 +1116,7 @@ skip_init_ctx:
+ bpf_jit_binary_free(header);
+ prog->bpf_func = NULL;
+ prog->jited = 0;
++ prog->jited_len = 0;
+ goto out_off;
+ }
+ bpf_jit_binary_lock_ro(header);
+@@ -1123,6 +1130,11 @@ skip_init_ctx:
+ prog->jited_len = prog_size;
+
+ if (!prog->is_func || extra_pass) {
++ int i;
++
++ /* offset[prog->len] is the size of program */
++ for (i = 0; i <= prog->len; i++)
++ ctx.offset[i] *= AARCH64_INSN_SIZE;
+ bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
+ out_off:
+ kfree(ctx.offset);
+@@ -1138,15 +1150,12 @@ out:
+
+ u64 bpf_jit_alloc_exec_limit(void)
+ {
+- return BPF_JIT_REGION_SIZE;
++ return VMALLOC_END - VMALLOC_START;
+ }
+
+ void *bpf_jit_alloc_exec(unsigned long size)
+ {
+- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
+- BPF_JIT_REGION_END, GFP_KERNEL,
+- PAGE_KERNEL, 0, NUMA_NO_NODE,
+- __builtin_return_address(0));
++ return vmalloc(size);
+ }
+
+ void bpf_jit_free_exec(void *addr)
+diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
+index 49305c2e6dfd3..fcaeec5a51258 100644
+--- a/arch/arm64/tools/cpucaps
++++ b/arch/arm64/tools/cpucaps
+@@ -42,6 +42,7 @@ MTE
+ SPECTRE_V2
+ SPECTRE_V3A
+ SPECTRE_V4
++SPECTRE_BHB
+ SSBS
+ SVE
+ UNMAP_KERNEL_AT_EL0
+@@ -53,6 +54,11 @@ WORKAROUND_1418040
+ WORKAROUND_1463225
+ WORKAROUND_1508412
+ WORKAROUND_1542419
++WORKAROUND_1742098
++WORKAROUND_2457168
++WORKAROUND_TRBE_OVERWRITE_FILL_MODE
++WORKAROUND_TSB_FLUSH_FAILURE
++WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+ WORKAROUND_CAVIUM_23154
+ WORKAROUND_CAVIUM_27456
+ WORKAROUND_CAVIUM_30115
+diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c
+index cb2a0d94a144d..2df115d0e2105 100644
+--- a/arch/csky/abiv1/alignment.c
++++ b/arch/csky/abiv1/alignment.c
+@@ -294,7 +294,7 @@ bad_area:
+ __func__, opcode, rz, rx, imm, addr);
+ show_regs(regs);
+ bust_spinlocks(0);
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ }
+
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
+diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h
+index c40f06ee8d3ef..ac5a54f57d407 100644
+--- a/arch/csky/include/asm/uaccess.h
++++ b/arch/csky/include/asm/uaccess.h
+@@ -3,14 +3,13 @@
+ #ifndef __ASM_CSKY_UACCESS_H
+ #define __ASM_CSKY_UACCESS_H
+
+-#define user_addr_max() \
+- (uaccess_kernel() ? KERNEL_DS.seg : get_fs().seg)
++#define user_addr_max() (current_thread_info()->addr_limit.seg)
+
+ static inline int __access_ok(unsigned long addr, unsigned long size)
+ {
+- unsigned long limit = current_thread_info()->addr_limit.seg;
++ unsigned long limit = user_addr_max();
+
+- return ((addr < limit) && ((addr + size) < limit));
++ return (size <= limit) && (addr <= (limit - size));
+ }
+ #define __access_ok __access_ok
+
+diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c
+index ab55e98ee8f62..75e1f9df5f604 100644
+--- a/arch/csky/kernel/perf_callchain.c
++++ b/arch/csky/kernel/perf_callchain.c
+@@ -49,7 +49,7 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
+ {
+ struct stackframe buftail;
+ unsigned long lr = 0;
+- unsigned long *user_frame_tail = (unsigned long *)fp;
++ unsigned long __user *user_frame_tail = (unsigned long __user *)fp;
+
+ /* Check accessibility of one struct frame_tail beyond */
+ if (!access_ok(user_frame_tail, sizeof(buftail)))
+@@ -86,10 +86,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
+ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ unsigned long fp = 0;
+
+ /* C-SKY does not support virtualization. */
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
++ if (guest_cbs && guest_cbs->is_in_guest())
+ return;
+
+ fp = regs->regs[4];
+@@ -110,10 +111,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ struct stackframe fr;
+
+ /* C-SKY does not support virtualization. */
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ pr_warn("C-SKY does not support perf in guest mode!");
+ return;
+ }
+diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
+index 8fffa34d4e1c5..bd92ac376e157 100644
+--- a/arch/csky/kernel/probes/kprobes.c
++++ b/arch/csky/kernel/probes/kprobes.c
+@@ -1,5 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0+
+
++#define pr_fmt(fmt) "kprobes: " fmt
++
+ #include <linux/kprobes.h>
+ #include <linux/extable.h>
+ #include <linux/slab.h>
+@@ -28,7 +30,7 @@ static int __kprobes patch_text_cb(void *priv)
+ struct csky_insn_patch *param = priv;
+ unsigned int addr = (unsigned int)param->addr;
+
+- if (atomic_inc_return(&param->cpu_count) == 1) {
++ if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) {
+ *(u16 *) addr = cpu_to_le16(param->opcode);
+ dcache_wb_range(addr, addr + 2);
+ atomic_inc(&param->cpu_count);
+@@ -77,10 +79,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
+ {
+ unsigned long probe_addr = (unsigned long)p->addr;
+
+- if (probe_addr & 0x1) {
+- pr_warn("Address not aligned.\n");
+- return -EINVAL;
+- }
++ if (probe_addr & 0x1)
++ return -EILSEQ;
+
+ /* copy instruction */
+ p->opcode = le32_to_cpu(*p->addr);
+@@ -124,6 +124,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
+
+ void __kprobes arch_remove_kprobe(struct kprobe *p)
+ {
++ if (p->ainsn.api.insn) {
++ free_insn_slot(p->ainsn.api.insn, 0);
++ p->ainsn.api.insn = NULL;
++ }
+ }
+
+ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+@@ -225,7 +229,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+- pr_warn("Unrecoverable kprobe detected.\n");
++ pr_warn("Failed to recover from reentered kprobes.\n");
+ dump_kprobe(p);
+ BUG();
+ break;
+diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
+index c7b763d2f526e..8867ddf3e6c77 100644
+--- a/arch/csky/kernel/signal.c
++++ b/arch/csky/kernel/signal.c
+@@ -136,7 +136,7 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
+ static int
+ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+ {
+- struct rt_sigframe *frame;
++ struct rt_sigframe __user *frame;
+ int err = 0;
+
+ frame = get_sigframe(ksig, regs, sizeof(*frame));
+diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c
+index e5fbf8653a215..6e426fba01193 100644
+--- a/arch/csky/kernel/traps.c
++++ b/arch/csky/kernel/traps.c
+@@ -109,7 +109,7 @@ void die(struct pt_regs *regs, const char *str)
+ if (panic_on_oops)
+ panic("Fatal exception");
+ if (ret != NOTIFY_STOP)
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
+@@ -209,7 +209,7 @@ asmlinkage void do_trap_illinsn(struct pt_regs *regs)
+
+ asmlinkage void do_trap_fpe(struct pt_regs *regs)
+ {
+-#ifdef CONFIG_CPU_HAS_FP
++#ifdef CONFIG_CPU_HAS_FPU
+ return fpu_fpe(regs);
+ #else
+ do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->pc,
+@@ -219,7 +219,7 @@ asmlinkage void do_trap_fpe(struct pt_regs *regs)
+
+ asmlinkage void do_trap_priv(struct pt_regs *regs)
+ {
+-#ifdef CONFIG_CPU_HAS_FP
++#ifdef CONFIG_CPU_HAS_FPU
+ if (user_mode(regs) && fpu_libc_helper(regs))
+ return;
+ #endif
+diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
+index 466ad949818a6..7215a46b6b8eb 100644
+--- a/arch/csky/mm/fault.c
++++ b/arch/csky/mm/fault.c
+@@ -67,7 +67,7 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr)
+ pr_alert("Unable to handle kernel paging request at virtual "
+ "addr 0x%08lx, pc: 0x%08lx\n", addr, regs->pc);
+ die(regs, "Oops");
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ }
+
+ static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
+index bdbe988d8dbcf..a92c39e03802e 100644
+--- a/arch/h8300/kernel/traps.c
++++ b/arch/h8300/kernel/traps.c
+@@ -17,6 +17,7 @@
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/sched/debug.h>
++#include <linux/sched/task.h>
+ #include <linux/mm_types.h>
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+@@ -106,7 +107,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err)
+ dump(fp);
+
+ spin_unlock_irq(&die_lock);
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ static int kstack_depth_to_print = 24;
+diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c
+index d4bc9c16f2df9..b465441f490df 100644
+--- a/arch/h8300/mm/fault.c
++++ b/arch/h8300/mm/fault.c
+@@ -51,7 +51,7 @@ asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
+ printk(" at virtual address %08lx\n", address);
+ if (!user_mode(regs))
+ die("Oops", regs, error_code);
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+
+ return 1;
+ }
+diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h
+deleted file mode 100644
+index ee6c61423a058..0000000000000
+--- a/arch/hexagon/include/asm/timer-regs.h
++++ /dev/null
+@@ -1,26 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-only */
+-/*
+- * Timer support for Hexagon
+- *
+- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+- */
+-
+-#ifndef _ASM_TIMER_REGS_H
+-#define _ASM_TIMER_REGS_H
+-
+-/* This stuff should go into a platform specific file */
+-#define TCX0_CLK_RATE 19200
+-#define TIMER_ENABLE 0
+-#define TIMER_CLR_ON_MATCH 1
+-
+-/*
+- * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
+- * release 1.1, and then it's "adjustable" and probably not defaulted.
+- */
+-#define RTOS_TIMER_INT 3
+-#ifdef CONFIG_HEXAGON_COMET
+-#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
+-#endif
+-#define SLEEP_CLK_RATE 32000
+-
+-#endif
+diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h
+index 8d4ec76fceb45..dfe69e118b2be 100644
+--- a/arch/hexagon/include/asm/timex.h
++++ b/arch/hexagon/include/asm/timex.h
+@@ -7,11 +7,10 @@
+ #define _ASM_TIMEX_H
+
+ #include <asm-generic/timex.h>
+-#include <asm/timer-regs.h>
+ #include <asm/hexagon_vm.h>
+
+ /* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */
+-#define CLOCK_TICK_RATE TCX0_CLK_RATE
++#define CLOCK_TICK_RATE 19200
+
+ #define ARCH_HAS_READ_CURRENT_TIMER
+
+diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
+index ef5bfef8d490c..719ba3f3c45cd 100644
+--- a/arch/hexagon/include/asm/uaccess.h
++++ b/arch/hexagon/include/asm/uaccess.h
+@@ -25,17 +25,17 @@
+ * Returns true (nonzero) if the memory block *may* be valid, false (zero)
+ * if it is definitely invalid.
+ *
+- * User address space in Hexagon, like x86, goes to 0xbfffffff, so the
+- * simple MSB-based tests used by MIPS won't work. Some further
+- * optimization is probably possible here, but for now, keep it
+- * reasonably simple and not *too* slow. After all, we've got the
+- * MMU for backup.
+ */
++#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
++#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE)
+
+-#define __access_ok(addr, size) \
+- ((get_fs().seg == KERNEL_DS.seg) || \
+- (((unsigned long)addr < get_fs().seg) && \
+- (unsigned long)size < (get_fs().seg - (unsigned long)addr)))
++static inline int __access_ok(unsigned long addr, unsigned long size)
++{
++ unsigned long limit = TASK_SIZE;
++
++ return (size <= limit) && (addr <= (limit - size));
++}
++#define __access_ok __access_ok
+
+ /*
+ * When a kernel-mode page fault is taken, the faulting instruction
+diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c
+index feffe527ac929..febc95714d756 100644
+--- a/arch/hexagon/kernel/time.c
++++ b/arch/hexagon/kernel/time.c
+@@ -17,9 +17,10 @@
+ #include <linux/of_irq.h>
+ #include <linux/module.h>
+
+-#include <asm/timer-regs.h>
+ #include <asm/hexagon_vm.h>
+
++#define TIMER_ENABLE BIT(0)
++
+ /*
+ * For the clocksource we need:
+ * pcycle frequency (600MHz)
+@@ -33,6 +34,13 @@ cycles_t pcycle_freq_mhz;
+ cycles_t thread_freq_mhz;
+ cycles_t sleep_clk_freq;
+
++/*
++ * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
++ * release 1.1, and then it's "adjustable" and probably not defaulted.
++ */
++#define RTOS_TIMER_INT 3
++#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
++
+ static struct resource rtos_timer_resources[] = {
+ {
+ .start = RTOS_TIMER_REGS_ADDR,
+@@ -80,7 +88,7 @@ static int set_next_event(unsigned long delta, struct clock_event_device *evt)
+ iowrite32(0, &rtos_timer->clear);
+
+ iowrite32(delta, &rtos_timer->match);
+- iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable);
++ iowrite32(TIMER_ENABLE, &rtos_timer->enable);
+ return 0;
+ }
+
+diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
+index edfc35dafeb19..1240f038cce02 100644
+--- a/arch/hexagon/kernel/traps.c
++++ b/arch/hexagon/kernel/traps.c
+@@ -214,7 +214,7 @@ int die(const char *str, struct pt_regs *regs, long err)
+ panic("Fatal exception");
+
+ oops_exit();
+- do_exit(err);
++ make_task_dead(err);
+ return 0;
+ }
+
+diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c
+index d35d69d6588c4..55f75392857b0 100644
+--- a/arch/hexagon/lib/io.c
++++ b/arch/hexagon/lib/io.c
+@@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *addr, void *data, int len)
+ *dst++ = *src;
+
+ }
++EXPORT_SYMBOL(__raw_readsw);
+
+ /*
+ * __raw_writesw - read words a short at a time
+@@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, const void *data, int len)
+
+
+ }
++EXPORT_SYMBOL(__raw_writesw);
+
+ /* Pretty sure len is pre-adjusted for the length of the access already */
+ void __raw_readsl(const void __iomem *addr, void *data, int len)
+@@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *addr, void *data, int len)
+
+
+ }
++EXPORT_SYMBOL(__raw_readsl);
+
+ void __raw_writesl(void __iomem *addr, const void *data, int len)
+ {
+@@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, const void *data, int len)
+
+
+ }
++EXPORT_SYMBOL(__raw_writesl);
+diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
+index 1e33666fa679b..89869aff8ca29 100644
+--- a/arch/ia64/Kconfig
++++ b/arch/ia64/Kconfig
+@@ -8,6 +8,7 @@ menu "Processor type and features"
+
+ config IA64
+ bool
++ select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_DMA_MARK_CLEAN
+ select ARCH_HAS_STRNCPY_FROM_USER
+ select ARCH_HAS_STRNLEN_USER
+@@ -323,7 +324,7 @@ config ARCH_PROC_KCORE_TEXT
+ depends on PROC_KCORE
+
+ config IA64_MCA_RECOVERY
+- tristate "MCA recovery from errors other than TLB."
++ bool "MCA recovery from errors other than TLB."
+
+ config IA64_PALINFO
+ tristate "/proc/pal support"
+diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
+index 40ca23bd228d6..2ce008e2d1644 100644
+--- a/arch/ia64/Kconfig.debug
++++ b/arch/ia64/Kconfig.debug
+@@ -39,7 +39,7 @@ config DISABLE_VHPT
+
+ config IA64_DEBUG_CMPXCHG
+ bool "Turn on compare-and-exchange bug checking (slow!)"
+- depends on DEBUG_KERNEL
++ depends on DEBUG_KERNEL && PRINTK
+ help
+ Selecting this option turns on bug checking for the IA-64
+ compare-and-exchange instructions. This is slow! Itaniums
+diff --git a/arch/ia64/include/asm/bugs.h b/arch/ia64/include/asm/bugs.h
+deleted file mode 100644
+index 0d6b9bded56c6..0000000000000
+--- a/arch/ia64/include/asm/bugs.h
++++ /dev/null
+@@ -1,20 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * This is included by init/main.c to check for architecture-dependent bugs.
+- *
+- * Needs:
+- * void check_bugs(void);
+- *
+- * Based on <asm-alpha/bugs.h>.
+- *
+- * Modified 1998, 1999, 2003
+- * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
+- */
+-#ifndef _ASM_IA64_BUGS_H
+-#define _ASM_IA64_BUGS_H
+-
+-#include <asm/processor.h>
+-
+-extern void check_bugs (void);
+-
+-#endif /* _ASM_IA64_BUGS_H */
+diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
+index 2d8bcdc27d7f8..05e7c9ad1a965 100644
+--- a/arch/ia64/include/asm/processor.h
++++ b/arch/ia64/include/asm/processor.h
+@@ -542,7 +542,7 @@ ia64_get_irr(unsigned int vector)
+ {
+ unsigned int reg = vector / 64;
+ unsigned int bit = vector % 64;
+- u64 irr;
++ unsigned long irr;
+
+ switch (reg) {
+ case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
+diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
+index 869a3ac6bf23a..7ccc077a60bed 100644
+--- a/arch/ia64/include/asm/timex.h
++++ b/arch/ia64/include/asm/timex.h
+@@ -39,6 +39,7 @@ get_cycles (void)
+ ret = ia64_getreg(_IA64_REG_AR_ITC);
+ return ret;
+ }
++#define get_cycles get_cycles
+
+ extern void ia64_cpu_local_tick (void);
+ extern unsigned long long ia64_native_sched_clock (void);
+diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
+index 35adcf89035ad..99300850abc19 100644
+--- a/arch/ia64/kernel/iosapic.c
++++ b/arch/ia64/kernel/iosapic.c
+@@ -834,7 +834,7 @@ iosapic_unregister_intr (unsigned int gsi)
+ if (iosapic_intr_info[irq].count == 0) {
+ #ifdef CONFIG_SMP
+ /* Clear affinity */
+- cpumask_setall(irq_get_affinity_mask(irq));
++ irq_data_update_affinity(irq_get_irq_data(irq), cpu_all_mask);
+ #endif
+ /* Clear the interrupt information */
+ iosapic_intr_info[irq].dest = 0;
+diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
+index ecef17c7c35b1..275b9ea58c643 100644
+--- a/arch/ia64/kernel/irq.c
++++ b/arch/ia64/kernel/irq.c
+@@ -57,8 +57,8 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+ {
+ if (irq < NR_IRQS) {
+- cpumask_copy(irq_get_affinity_mask(irq),
+- cpumask_of(cpu_logical_id(hwid)));
++ irq_data_update_affinity(irq_get_irq_data(irq),
++ cpumask_of(cpu_logical_id(hwid)));
+ irq_redir[irq] = (char) (redir & 0xff);
+ }
+ }
+diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
+index 441ed04b10378..d4048518a1d7d 100644
+--- a/arch/ia64/kernel/kprobes.c
++++ b/arch/ia64/kernel/kprobes.c
+@@ -398,7 +398,8 @@ static void kretprobe_trampoline(void)
+
+ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+ {
+- regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL);
++ regs->cr_iip = __kretprobe_trampoline_handler(regs,
++ dereference_function_descriptor(kretprobe_trampoline), NULL);
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+@@ -414,7 +415,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ ri->fp = NULL;
+
+ /* Replace the return addr with trampoline addr */
+- regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
++ regs->b0 = (unsigned long)dereference_function_descriptor(kretprobe_trampoline);
+ }
+
+ /* Check the instruction in the slot is break */
+@@ -902,14 +903,14 @@ static struct kprobe trampoline_p = {
+ int __init arch_init_kprobes(void)
+ {
+ trampoline_p.addr =
+- (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
++ dereference_function_descriptor(kretprobe_trampoline);
+ return register_kprobe(&trampoline_p);
+ }
+
+ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+ {
+ if (p->addr ==
+- (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip)
++ dereference_function_descriptor(kretprobe_trampoline))
+ return 1;
+
+ return 0;
+diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
+index 5bfc79be4cefe..23c203639a968 100644
+--- a/arch/ia64/kernel/mca_drv.c
++++ b/arch/ia64/kernel/mca_drv.c
+@@ -176,7 +176,7 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
+ spin_unlock(&mca_bh_lock);
+
+ /* This process is about to be killed itself */
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ }
+
+ /**
+diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
+index df5c28f252e3d..025e5133c860c 100644
+--- a/arch/ia64/kernel/msi_ia64.c
++++ b/arch/ia64/kernel/msi_ia64.c
+@@ -37,7 +37,7 @@ static int ia64_set_msi_irq_affinity(struct irq_data *idata,
+ msg.data = data;
+
+ pci_write_msi_msg(irq, &msg);
+- cpumask_copy(irq_data_get_affinity_mask(idata), cpumask_of(cpu));
++ irq_data_update_affinity(idata, cpumask_of(cpu));
+
+ return 0;
+ }
+@@ -132,7 +132,7 @@ static int dmar_msi_set_affinity(struct irq_data *data,
+ msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+
+ dmar_msi_write(irq, &msg);
+- cpumask_copy(irq_data_get_affinity_mask(data), mask);
++ irq_data_update_affinity(data, mask);
+
+ return 0;
+ }
+diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
+index a25ab9b37953e..bb99b543dc672 100644
+--- a/arch/ia64/kernel/salinfo.c
++++ b/arch/ia64/kernel/salinfo.c
+@@ -581,7 +581,7 @@ static int salinfo_cpu_pre_down(unsigned int cpu)
+ * 'data' contains an integer that corresponds to the feature we're
+ * testing
+ */
+-static int proc_salinfo_show(struct seq_file *m, void *v)
++static int __maybe_unused proc_salinfo_show(struct seq_file *m, void *v)
+ {
+ unsigned long data = (unsigned long)v;
+ seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
+diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
+index 31fb84de2d214..041681e5de472 100644
+--- a/arch/ia64/kernel/setup.c
++++ b/arch/ia64/kernel/setup.c
+@@ -1070,8 +1070,7 @@ cpu_init (void)
+ }
+ }
+
+-void __init
+-check_bugs (void)
++void __init arch_cpu_finalize_init(void)
+ {
+ ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
+ (unsigned long) __end___mckinley_e9_bundles);
+diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
+index e13cb905930fb..753642366e12e 100644
+--- a/arch/ia64/kernel/traps.c
++++ b/arch/ia64/kernel/traps.c
+@@ -85,7 +85,7 @@ die (const char *str, struct pt_regs *regs, long err)
+ if (panic_on_oops)
+ panic("Fatal exception");
+
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ return 0;
+ }
+
+diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
+index 42e025cfbd088..9817caba07026 100644
+--- a/arch/ia64/mm/contig.c
++++ b/arch/ia64/mm/contig.c
+@@ -77,7 +77,7 @@ skip:
+ return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+ }
+
+-static inline void
++static inline __init void
+ alloc_per_cpu_data(void)
+ {
+ size_t size = PERCPU_PAGE_SIZE * num_possible_cpus();
+diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
+index 02de2e70c5874..4796cccbf74f3 100644
+--- a/arch/ia64/mm/fault.c
++++ b/arch/ia64/mm/fault.c
+@@ -259,7 +259,7 @@ retry:
+ regs = NULL;
+ bust_spinlocks(0);
+ if (regs)
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ return;
+
+ out_of_memory:
+diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
+index f993cb36c0626..921db957d2e67 100644
+--- a/arch/ia64/mm/hugetlbpage.c
++++ b/arch/ia64/mm/hugetlbpage.c
+@@ -58,7 +58,7 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
+
+ pgd = pgd_offset(mm, taddr);
+ if (pgd_present(*pgd)) {
+- p4d = p4d_offset(pgd, addr);
++ p4d = p4d_offset(pgd, taddr);
+ if (p4d_present(*p4d)) {
+ pud = pud_offset(p4d, taddr);
+ if (pud_present(*pud)) {
+diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
+index d6579ec3ea324..4c7b1f50e3b7d 100644
+--- a/arch/ia64/mm/numa.c
++++ b/arch/ia64/mm/numa.c
+@@ -75,5 +75,6 @@ int memory_add_physaddr_to_nid(u64 addr)
+ return 0;
+ return nid;
+ }
++EXPORT_SYMBOL(memory_add_physaddr_to_nid);
+ #endif
+ #endif
+diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
+index acb55a41260dd..2bcdd7d3a1ada 100644
+--- a/arch/ia64/pci/fixup.c
++++ b/arch/ia64/pci/fixup.c
+@@ -76,5 +76,5 @@ static void pci_fixup_video(struct pci_dev *pdev)
+ }
+ }
+ }
+-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
++DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
++ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+diff --git a/arch/m68k/68000/entry.S b/arch/m68k/68000/entry.S
+index 997b549330156..7d63e2f1555a0 100644
+--- a/arch/m68k/68000/entry.S
++++ b/arch/m68k/68000/entry.S
+@@ -45,6 +45,8 @@ do_trace:
+ jbsr syscall_trace_enter
+ RESTORE_SWITCH_STACK
+ addql #4,%sp
++ addql #1,%d0
++ jeq ret_from_exception
+ movel %sp@(PT_OFF_ORIG_D0),%d1
+ movel #-ENOSYS,%d0
+ cmpl #NR_syscalls,%d1
+diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
+index 0b50da08a9c56..810056d8ea678 100644
+--- a/arch/m68k/Kconfig
++++ b/arch/m68k/Kconfig
+@@ -4,6 +4,7 @@ config M68K
+ default y
+ select ARCH_32BIT_OFF_T
+ select ARCH_HAS_BINFMT_FLAT
++ select ARCH_HAS_CPU_FINALIZE_INIT if MMU
+ select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
+diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
+index 277d61a094637..29558055c71bf 100644
+--- a/arch/m68k/Kconfig.cpu
++++ b/arch/m68k/Kconfig.cpu
+@@ -338,7 +338,7 @@ comment "Processor Specific Options"
+
+ config M68KFPU_EMU
+ bool "Math emulation support"
+- depends on MMU
++ depends on M68KCLASSIC && FPU
+ help
+ At some point in the future, this will cause floating-point math
+ instructions to be emulated by the kernel on machines that lack a
+diff --git a/arch/m68k/Kconfig.devices b/arch/m68k/Kconfig.devices
+index 6a87b4a5fcac2..e6e3efac18407 100644
+--- a/arch/m68k/Kconfig.devices
++++ b/arch/m68k/Kconfig.devices
+@@ -19,6 +19,7 @@ config HEARTBEAT
+ # We have a dedicated heartbeat LED. :-)
+ config PROC_HARDWARE
+ bool "/proc/hardware support"
++ depends on PROC_FS
+ help
+ Say Y here to support the /proc/hardware file, which gives you
+ access to information about the machine you're running on,
+diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine
+index 36fa0c3ef1296..946853a08502e 100644
+--- a/arch/m68k/Kconfig.machine
++++ b/arch/m68k/Kconfig.machine
+@@ -203,6 +203,7 @@ config INIT_LCD
+ config MEMORY_RESERVE
+ int "Memory reservation (MiB)"
+ depends on (UCSIMM || UCDIMM)
++ default 0
+ help
+ Reserve certain memory regions on 68x328 based boards.
+
+@@ -334,6 +335,7 @@ comment "Machine Options"
+
+ config UBOOT
+ bool "Support for U-Boot command line parameters"
++ depends on COLDFIRE
+ help
+ If you say Y here kernel will try to collect command
+ line parameters from the initial u-boot stack.
+diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
+index 0386252e9d043..7dab46728aeda 100644
+--- a/arch/m68k/coldfire/device.c
++++ b/arch/m68k/coldfire/device.c
+@@ -480,7 +480,7 @@ static struct platform_device mcf_i2c5 = {
+ #endif /* MCFI2C_BASE5 */
+ #endif /* IS_ENABLED(CONFIG_I2C_IMX) */
+
+-#if IS_ENABLED(CONFIG_MCF_EDMA)
++#ifdef MCFEDMA_BASE
+
+ static const struct dma_slave_map mcf_edma_map[] = {
+ { "dreq0", "rx-tx", MCF_EDMA_FILTER_PARAM(0) },
+@@ -552,7 +552,7 @@ static struct platform_device mcf_edma = {
+ .platform_data = &mcf_edma_data,
+ }
+ };
+-#endif /* IS_ENABLED(CONFIG_MCF_EDMA) */
++#endif /* MCFEDMA_BASE */
+
+ #ifdef MCFSDHC_BASE
+ static struct mcf_esdhc_platform_data mcf_esdhc_data = {
+@@ -581,7 +581,7 @@ static struct platform_device mcf_esdhc = {
+ };
+ #endif /* MCFSDHC_BASE */
+
+-#if IS_ENABLED(CONFIG_CAN_FLEXCAN)
++#ifdef MCFFLEXCAN_SIZE
+
+ #include <linux/can/platform/flexcan.h>
+
+@@ -620,7 +620,7 @@ static struct platform_device mcf_flexcan0 = {
+ .resource = mcf5441x_flexcan0_resource,
+ .dev.platform_data = &mcf5441x_flexcan_info,
+ };
+-#endif /* IS_ENABLED(CONFIG_CAN_FLEXCAN) */
++#endif /* MCFFLEXCAN_SIZE */
+
+ static struct platform_device *mcf_devices[] __initdata = {
+ &mcf_uart,
+@@ -651,13 +651,13 @@ static struct platform_device *mcf_devices[] __initdata = {
+ &mcf_i2c5,
+ #endif
+ #endif
+-#if IS_ENABLED(CONFIG_MCF_EDMA)
++#ifdef MCFEDMA_BASE
+ &mcf_edma,
+ #endif
+ #ifdef MCFSDHC_BASE
+ &mcf_esdhc,
+ #endif
+-#if IS_ENABLED(CONFIG_CAN_FLEXCAN)
++#ifdef MCFFLEXCAN_SIZE
+ &mcf_flexcan0,
+ #endif
+ };
+diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S
+index 9f337c70243a3..35104c5417ff4 100644
+--- a/arch/m68k/coldfire/entry.S
++++ b/arch/m68k/coldfire/entry.S
+@@ -90,6 +90,8 @@ ENTRY(system_call)
+ jbsr syscall_trace_enter
+ RESTORE_SWITCH_STACK
+ addql #4,%sp
++ addql #1,%d0
++ jeq ret_from_exception
+ movel %d3,%a0
+ jbsr %a0@
+ movel %d0,%sp@(PT_OFF_D0) /* save the return value */
+diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S
+index 439395aa6fb42..081922c72daaa 100644
+--- a/arch/m68k/fpsp040/skeleton.S
++++ b/arch/m68k/fpsp040/skeleton.S
+@@ -499,13 +499,13 @@ in_ea:
+ dbf %d0,morein
+ rts
+
+- .section .fixup,#alloc,#execinstr
++ .section .fixup,"ax"
+ .even
+ 1:
+ jbsr fpsp040_die
+ jbra .Lnotkern
+
+- .section __ex_table,#alloc
++ .section __ex_table,"a"
+ .align 4
+
+ .long in_ea,1b
+diff --git a/arch/m68k/ifpsp060/os.S b/arch/m68k/ifpsp060/os.S
+index 7a0d6e4280665..89e2ec224ab6c 100644
+--- a/arch/m68k/ifpsp060/os.S
++++ b/arch/m68k/ifpsp060/os.S
+@@ -379,11 +379,11 @@ _060_real_access:
+
+
+ | Execption handling for movs access to illegal memory
+- .section .fixup,#alloc,#execinstr
++ .section .fixup,"ax"
+ .even
+ 1: moveq #-1,%d1
+ rts
+-.section __ex_table,#alloc
++.section __ex_table,"a"
+ .align 4
+ .long dmrbuae,1b
+ .long dmrwuae,1b
+diff --git a/arch/m68k/include/asm/bugs.h b/arch/m68k/include/asm/bugs.h
+deleted file mode 100644
+index 745530651e0bf..0000000000000
+--- a/arch/m68k/include/asm/bugs.h
++++ /dev/null
+@@ -1,21 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * include/asm-m68k/bugs.h
+- *
+- * Copyright (C) 1994 Linus Torvalds
+- */
+-
+-/*
+- * This is included by init/main.c to check for architecture-dependent bugs.
+- *
+- * Needs:
+- * void check_bugs(void);
+- */
+-
+-#ifdef CONFIG_MMU
+-extern void check_bugs(void); /* in arch/m68k/kernel/setup.c */
+-#else
+-static void check_bugs(void)
+-{
+-}
+-#endif
+diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
+index 87151d67d91e7..bce5ca56c3883 100644
+--- a/arch/m68k/include/asm/pgtable_no.h
++++ b/arch/m68k/include/asm/pgtable_no.h
+@@ -42,7 +42,8 @@ extern void paging_init(void);
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+-#define ZERO_PAGE(vaddr) (virt_to_page(0))
++extern void *empty_zero_page;
++#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+ /*
+ * All 32bit addresses are effectively valid for vmalloc...
+diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h
+index 80eb2396d01eb..3ba40bc1dfaa9 100644
+--- a/arch/m68k/include/asm/raw_io.h
++++ b/arch/m68k/include/asm/raw_io.h
+@@ -80,14 +80,14 @@
+ ({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; })
+
+ #define rom_out_8(addr, b) \
+- ({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \
++ (void)({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \
+ __w = ((*(__force volatile u8 *) ((_addr | 0x10000) + (__v<<1)))); })
+ #define rom_out_be16(addr, w) \
+- ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
++ (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
+ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v & 0xFF)<<1)))); \
+ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v >> 8)<<1)))); })
+ #define rom_out_le16(addr, w) \
+- ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
++ (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \
+ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v >> 8)<<1)))); \
+ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v & 0xFF)<<1)))); })
+
+diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h
+index 6a21d93582805..f4a7a340f4cae 100644
+--- a/arch/m68k/include/asm/timex.h
++++ b/arch/m68k/include/asm/timex.h
+@@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void)
+ {
+ if (mach_random_get_entropy)
+ return mach_random_get_entropy();
+- return 0;
++ return random_get_entropy_fallback();
+ }
+ #define random_get_entropy random_get_entropy
+
+diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
+index ba670523885c8..60b786eb2254e 100644
+--- a/arch/m68k/include/asm/uaccess.h
++++ b/arch/m68k/include/asm/uaccess.h
+@@ -12,14 +12,17 @@
+ #include <asm/extable.h>
+
+ /* We let the MMU do all checking */
+-static inline int access_ok(const void __user *addr,
++static inline int access_ok(const void __user *ptr,
+ unsigned long size)
+ {
+- /*
+- * XXX: for !CONFIG_CPU_HAS_ADDRESS_SPACES this really needs to check
+- * for TASK_SIZE!
+- */
+- return 1;
++ unsigned long limit = TASK_SIZE;
++ unsigned long addr = (unsigned long)ptr;
++
++ if (IS_ENABLED(CONFIG_CPU_HAS_ADDRESS_SPACES) ||
++ !IS_ENABLED(CONFIG_MMU))
++ return 1;
++
++ return (size <= limit) && (addr <= (limit - size));
+ }
+
+ /*
+diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
+index 9434fca68de5d..9f3663facaa0e 100644
+--- a/arch/m68k/kernel/entry.S
++++ b/arch/m68k/kernel/entry.S
+@@ -184,9 +184,12 @@ do_trace_entry:
+ jbsr syscall_trace
+ RESTORE_SWITCH_STACK
+ addql #4,%sp
++ addql #1,%d0 | optimization for cmpil #-1,%d0
++ jeq ret_from_syscall
+ movel %sp@(PT_OFF_ORIG_D0),%d0
+ cmpl #NR_syscalls,%d0
+ jcs syscall
++ jra ret_from_syscall
+ badsys:
+ movel #-ENOSYS,%sp@(PT_OFF_D0)
+ jra ret_from_syscall
+diff --git a/arch/m68k/kernel/relocate_kernel.S b/arch/m68k/kernel/relocate_kernel.S
+index ab0f1e7d46535..f7667079e08e9 100644
+--- a/arch/m68k/kernel/relocate_kernel.S
++++ b/arch/m68k/kernel/relocate_kernel.S
+@@ -26,7 +26,7 @@ ENTRY(relocate_new_kernel)
+ lea %pc@(.Lcopy),%a4
+ 2: addl #0x00000000,%a4 /* virt_to_phys() */
+
+- .section ".m68k_fixup","aw"
++ .section .m68k_fixup,"aw"
+ .long M68K_FIXUP_MEMOFFSET, 2b+2
+ .previous
+
+@@ -49,7 +49,7 @@ ENTRY(relocate_new_kernel)
+ lea %pc@(.Lcont040),%a4
+ 5: addl #0x00000000,%a4 /* virt_to_phys() */
+
+- .section ".m68k_fixup","aw"
++ .section .m68k_fixup,"aw"
+ .long M68K_FIXUP_MEMOFFSET, 5b+2
+ .previous
+
+diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
+index 4b51bfd38e5f2..868641a536236 100644
+--- a/arch/m68k/kernel/setup_mm.c
++++ b/arch/m68k/kernel/setup_mm.c
+@@ -10,6 +10,7 @@
+ */
+
+ #include <linux/kernel.h>
++#include <linux/cpu.h>
+ #include <linux/mm.h>
+ #include <linux/sched.h>
+ #include <linux/delay.h>
+@@ -87,15 +88,8 @@ void (*mach_sched_init) (void) __initdata = NULL;
+ void (*mach_init_IRQ) (void) __initdata = NULL;
+ void (*mach_get_model) (char *model);
+ void (*mach_get_hardware_list) (struct seq_file *m);
+-/* machine dependent timer functions */
+-int (*mach_hwclk) (int, struct rtc_time*);
+-EXPORT_SYMBOL(mach_hwclk);
+ unsigned int (*mach_get_ss)(void);
+-int (*mach_get_rtc_pll)(struct rtc_pll_info *);
+-int (*mach_set_rtc_pll)(struct rtc_pll_info *);
+ EXPORT_SYMBOL(mach_get_ss);
+-EXPORT_SYMBOL(mach_get_rtc_pll);
+-EXPORT_SYMBOL(mach_set_rtc_pll);
+ void (*mach_reset)( void );
+ void (*mach_halt)( void );
+ void (*mach_power_off)( void );
+@@ -519,7 +513,7 @@ static int __init proc_hardware_init(void)
+ module_init(proc_hardware_init);
+ #endif
+
+-void check_bugs(void)
++void __init arch_cpu_finalize_init(void)
+ {
+ #if defined(CONFIG_FPU) && !defined(CONFIG_M68KFPU_EMU)
+ if (m68k_fputype == 0) {
+diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
+index 5e4104f07a443..19eea73d3c170 100644
+--- a/arch/m68k/kernel/setup_no.c
++++ b/arch/m68k/kernel/setup_no.c
+@@ -50,7 +50,6 @@ char __initdata command_line[COMMAND_LINE_SIZE];
+
+ /* machine dependent timer functions */
+ void (*mach_sched_init)(void) __initdata = NULL;
+-int (*mach_hwclk) (int, struct rtc_time*);
+
+ /* machine dependent reboot functions */
+ void (*mach_reset)(void);
+diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
+index 338817d0cb3fb..6cc68f29ab13c 100644
+--- a/arch/m68k/kernel/signal.c
++++ b/arch/m68k/kernel/signal.c
+@@ -625,6 +625,7 @@ static inline void siginfo_build_tests(void)
+ /* _sigfault._perf */
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14);
++ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x18);
+
+ /* _sigpoll */
+ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c);
+@@ -857,11 +858,17 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *
+ }
+
+ static inline void __user *
+-get_sigframe(struct ksignal *ksig, size_t frame_size)
++get_sigframe(struct ksignal *ksig, struct pt_regs *tregs, size_t frame_size)
+ {
+ unsigned long usp = sigsp(rdusp(), ksig);
++ unsigned long gap = 0;
+
+- return (void __user *)((usp - frame_size) & -8UL);
++ if (CPU_IS_020_OR_030 && tregs->format == 0xb) {
++ /* USP is unreliable so use worst-case value */
++ gap = 256;
++ }
++
++ return (void __user *)((usp - gap - frame_size) & -8UL);
+ }
+
+ static int setup_frame(struct ksignal *ksig, sigset_t *set,
+@@ -879,7 +886,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
+ return -EFAULT;
+ }
+
+- frame = get_sigframe(ksig, sizeof(*frame) + fsize);
++ frame = get_sigframe(ksig, tregs, sizeof(*frame) + fsize);
+
+ if (fsize)
+ err |= copy_to_user (frame + 1, regs + 1, fsize);
+@@ -951,7 +958,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ return -EFAULT;
+ }
+
+- frame = get_sigframe(ksig, sizeof(*frame));
++ frame = get_sigframe(ksig, tregs, sizeof(*frame));
+
+ if (fsize)
+ err |= copy_to_user (&frame->uc.uc_extra, regs + 1, fsize);
+diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
+index 340ffeea0a9dc..a97600b2af502 100644
+--- a/arch/m68k/kernel/time.c
++++ b/arch/m68k/kernel/time.c
+@@ -63,6 +63,15 @@ void timer_heartbeat(void)
+ #endif /* CONFIG_HEARTBEAT */
+
+ #ifdef CONFIG_M68KCLASSIC
++/* machine dependent timer functions */
++int (*mach_hwclk) (int, struct rtc_time*);
++EXPORT_SYMBOL(mach_hwclk);
++
++int (*mach_get_rtc_pll)(struct rtc_pll_info *);
++int (*mach_set_rtc_pll)(struct rtc_pll_info *);
++EXPORT_SYMBOL(mach_get_rtc_pll);
++EXPORT_SYMBOL(mach_set_rtc_pll);
++
+ #if !IS_BUILTIN(CONFIG_RTC_DRV_GENERIC)
+ void read_persistent_clock64(struct timespec64 *ts)
+ {
+diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
+index 9718ce94cc845..6f647742a6ca9 100644
+--- a/arch/m68k/kernel/traps.c
++++ b/arch/m68k/kernel/traps.c
+@@ -30,6 +30,7 @@
+ #include <linux/init.h>
+ #include <linux/ptrace.h>
+ #include <linux/kallsyms.h>
++#include <linux/extable.h>
+
+ #include <asm/setup.h>
+ #include <asm/fpu.h>
+@@ -544,7 +545,8 @@ static inline void bus_error030 (struct frame *fp)
+ errorcode |= 2;
+
+ if (mmusr & (MMU_I | MMU_WP)) {
+- if (ssw & 4) {
++ /* We might have an exception table for this PC */
++ if (ssw & 4 && !search_exception_tables(fp->ptregs.pc)) {
+ pr_err("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+ ssw & RW ? "read" : "write",
+ fp->un.fmtb.daddr,
+@@ -1131,7 +1133,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr)
+ pr_crit("%s: %08x\n", str, nr);
+ show_registers(fp);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ asmlinkage void set_esp0(unsigned long ssp)
+@@ -1145,7 +1147,7 @@ asmlinkage void set_esp0(unsigned long ssp)
+ */
+ asmlinkage void fpsp040_die(void)
+ {
+- force_sigsegv(SIGSEGV);
++ force_exit_sig(SIGSEGV);
+ }
+
+ #ifdef CONFIG_M68KFPU_EMU
+diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
+index ef46e77e97a5b..fcb3a0d8421c5 100644
+--- a/arch/m68k/mm/fault.c
++++ b/arch/m68k/mm/fault.c
+@@ -48,7 +48,7 @@ int send_fault_sig(struct pt_regs *regs)
+ pr_alert("Unable to handle kernel access");
+ pr_cont(" at virtual address %p\n", addr);
+ die_if_kernel("Oops", regs, 0 /*error_code*/);
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ }
+
+ return 1;
+diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
+index d2a8ef9f89787..3fe96979d2c62 100644
+--- a/arch/microblaze/include/asm/uaccess.h
++++ b/arch/microblaze/include/asm/uaccess.h
+@@ -39,24 +39,13 @@
+
+ # define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
+
+-static inline int access_ok(const void __user *addr, unsigned long size)
++static inline int __access_ok(unsigned long addr, unsigned long size)
+ {
+- if (!size)
+- goto ok;
++ unsigned long limit = user_addr_max();
+
+- if ((get_fs().seg < ((unsigned long)addr)) ||
+- (get_fs().seg < ((unsigned long)addr + size - 1))) {
+- pr_devel("ACCESS fail at 0x%08x (size 0x%x), seg 0x%08x\n",
+- (__force u32)addr, (u32)size,
+- (u32)get_fs().seg);
+- return 0;
+- }
+-ok:
+- pr_devel("ACCESS OK at 0x%08x (size 0x%x), seg 0x%08x\n",
+- (__force u32)addr, (u32)size,
+- (u32)get_fs().seg);
+- return 1;
++ return (size <= limit) && (addr <= (limit - size));
+ }
++#define access_ok(addr, size) __access_ok((unsigned long)addr, size)
+
+ # define __FIXUP_SECTION ".section .fixup,\"ax\"\n"
+ # define __EX_TABLE_SECTION ".section __ex_table,\"a\"\n"
+@@ -141,27 +130,27 @@ extern long __user_bad(void);
+
+ #define __get_user(x, ptr) \
+ ({ \
+- unsigned long __gu_val = 0; \
+ long __gu_err; \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+- __get_user_asm("lbu", (ptr), __gu_val, __gu_err); \
++ __get_user_asm("lbu", (ptr), x, __gu_err); \
+ break; \
+ case 2: \
+- __get_user_asm("lhu", (ptr), __gu_val, __gu_err); \
++ __get_user_asm("lhu", (ptr), x, __gu_err); \
+ break; \
+ case 4: \
+- __get_user_asm("lw", (ptr), __gu_val, __gu_err); \
++ __get_user_asm("lw", (ptr), x, __gu_err); \
+ break; \
+- case 8: \
+- __gu_err = __copy_from_user(&__gu_val, ptr, 8); \
+- if (__gu_err) \
+- __gu_err = -EFAULT; \
++ case 8: { \
++ __u64 __x = 0; \
++ __gu_err = raw_copy_from_user(&__x, ptr, 8) ? \
++ -EFAULT : 0; \
++ (x) = (typeof(x))(typeof((x) - (x)))__x; \
+ break; \
++ } \
+ default: \
+ /* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\
+ } \
+- x = (__force __typeof__(*(ptr))) __gu_val; \
+ __gu_err; \
+ })
+
+diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
+index 908788497b287..fd153d5fab982 100644
+--- a/arch/microblaze/kernel/exceptions.c
++++ b/arch/microblaze/kernel/exceptions.c
+@@ -44,10 +44,10 @@ void die(const char *str, struct pt_regs *fp, long err)
+ pr_warn("Oops: %s, sig: %ld\n", str, err);
+ show_regs(fp);
+ spin_unlock_irq(&die_lock);
+- /* do_exit() should take care of panic'ing from an interrupt
++ /* make_task_dead() should take care of panic'ing from an interrupt
+ * context so we don't handle it here
+ */
+- do_exit(err);
++ make_task_dead(err);
+ }
+
+ /* for user application debugging */
+diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
+index 584081df89c28..6e3f36c841e5d 100644
+--- a/arch/mips/Kbuild.platforms
++++ b/arch/mips/Kbuild.platforms
+@@ -38,4 +38,4 @@ platform-$(CONFIG_MACH_TX49XX) += txx9/
+ platform-$(CONFIG_MACH_VR41XX) += vr41xx/
+
+ # include the platform specific files
+-include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y))
++include $(patsubst %/, $(srctree)/arch/mips/%/Platform, $(platform-y))
+diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
+index 6b8f591c5054c..13b09c7516e91 100644
+--- a/arch/mips/Kconfig
++++ b/arch/mips/Kconfig
+@@ -4,6 +4,7 @@ config MIPS
+ default y
+ select ARCH_32BIT_OFF_T if !64BIT
+ select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
++ select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_DEBUG_VIRTUAL if !64BIT
+ select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_KCOV
+@@ -81,6 +82,7 @@ config MIPS
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+ select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_NMI
++ select HAVE_PATA_PLATFORM
+ select HAVE_PERF_EVENTS
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+@@ -332,6 +334,9 @@ config BCM63XX
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_BIG_ENDIAN
+ select SYS_HAS_EARLY_PRINTK
++ select SYS_HAS_CPU_BMIPS32_3300
++ select SYS_HAS_CPU_BMIPS4350
++ select SYS_HAS_CPU_BMIPS4380
+ select SWAP_IO_SPACE
+ select GPIOLIB
+ select MIPS_L1_CACHE_SHIFT_4
+@@ -1379,6 +1384,7 @@ config CPU_LOONGSON64
+ select MIPS_ASID_BITS_VARIABLE
+ select MIPS_PGD_C0_CONTEXT
+ select MIPS_L1_CACHE_SHIFT_6
++ select MIPS_FP_SUPPORT
+ select GPIOLIB
+ select SWIOTLB
+ select HAVE_KVM
+@@ -1989,6 +1995,10 @@ config SYS_HAS_CPU_MIPS64_R1
+ config SYS_HAS_CPU_MIPS64_R2
+ bool
+
++config SYS_HAS_CPU_MIPS64_R5
++ bool
++ select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT
++
+ config SYS_HAS_CPU_MIPS64_R6
+ bool
+ select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT
+@@ -2153,7 +2163,7 @@ config CPU_SUPPORTS_ADDRWINCFG
+ bool
+ config CPU_SUPPORTS_HUGEPAGES
+ bool
+- depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA))
++ depends on !(32BIT && (PHYS_ADDR_T_64BIT || EVA))
+ config MIPS_PGD_C0_CONTEXT
+ bool
+ depends on 64BIT
+@@ -3185,7 +3195,7 @@ config STACKTRACE_SUPPORT
+ config PGTABLE_LEVELS
+ int
+ default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
+- default 3 if 64BIT && !PAGE_SIZE_64KB
++ default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48)
+ default 2
+
+ config MIPS_AUTO_PFN_OFFSET
+diff --git a/arch/mips/Makefile b/arch/mips/Makefile
+index ea3cd080a1c7d..151e98698f763 100644
+--- a/arch/mips/Makefile
++++ b/arch/mips/Makefile
+@@ -254,7 +254,9 @@ endif
+ #
+ # Board-dependent options and extra files
+ #
++ifdef need-compiler
+ include $(srctree)/arch/mips/Kbuild.platforms
++endif
+
+ ifdef CONFIG_PHYSICAL_START
+ load-y = $(CONFIG_PHYSICAL_START)
+@@ -277,8 +279,8 @@ ifdef CONFIG_64BIT
+ endif
+ endif
+
+- ifeq ($(KBUILD_SYM32)$(call cc-option-yn,-msym32), yy)
+- cflags-y += -msym32 -DKBUILD_64BIT_SYM32
++ ifeq ($(KBUILD_SYM32), y)
++ cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
+ else
+ ifeq ($(CONFIG_CPU_DADDI_WORKAROUNDS), y)
+ $(error CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32)
+@@ -319,7 +321,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+
+ KBUILD_LDFLAGS += -m $(ld-emul)
+
+-ifdef CONFIG_MIPS
++ifdef need-compiler
+ CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
+ egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
+ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
+diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
+index 4ca2c28878e0f..e9ee9ab90a0c6 100644
+--- a/arch/mips/alchemy/common/dbdma.c
++++ b/arch/mips/alchemy/common/dbdma.c
+@@ -30,6 +30,7 @@
+ *
+ */
+
++#include <linux/dma-map-ops.h> /* for dma_default_coherent */
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/slab.h>
+@@ -623,17 +624,18 @@ u32 au1xxx_dbdma_put_source(u32 chanid, dma_addr_t buf, int nbytes, u32 flags)
+ dp->dscr_cmd0 &= ~DSCR_CMD0_IE;
+
+ /*
+- * There is an errata on the Au1200/Au1550 parts that could result
+- * in "stale" data being DMA'ed. It has to do with the snoop logic on
+- * the cache eviction buffer. DMA_NONCOHERENT is on by default for
+- * these parts. If it is fixed in the future, these dma_cache_inv will
+- * just be nothing more than empty macros. See io.h.
++ * There is an erratum on certain Au1200/Au1550 revisions that could
++ * result in "stale" data being DMA'ed. It has to do with the snoop
++ * logic on the cache eviction buffer. dma_default_coherent is set
++ * to false on these parts.
+ */
+- dma_cache_wback_inv((unsigned long)buf, nbytes);
++ if (!dma_default_coherent)
++ dma_cache_wback_inv(KSEG0ADDR(buf), nbytes);
+ dp->dscr_cmd0 |= DSCR_CMD0_V; /* Let it rip */
+ wmb(); /* drain writebuffer */
+ dma_cache_wback_inv((unsigned long)dp, sizeof(*dp));
+ ctp->chan_ptr->ddma_dbell = 0;
++ wmb(); /* force doorbell write out to dma engine */
+
+ /* Get next descriptor pointer. */
+ ctp->put_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr));
+@@ -685,17 +687,18 @@ u32 au1xxx_dbdma_put_dest(u32 chanid, dma_addr_t buf, int nbytes, u32 flags)
+ dp->dscr_source1, dp->dscr_dest0, dp->dscr_dest1);
+ #endif
+ /*
+- * There is an errata on the Au1200/Au1550 parts that could result in
+- * "stale" data being DMA'ed. It has to do with the snoop logic on the
+- * cache eviction buffer. DMA_NONCOHERENT is on by default for these
+- * parts. If it is fixed in the future, these dma_cache_inv will just
+- * be nothing more than empty macros. See io.h.
++ * There is an erratum on certain Au1200/Au1550 revisions that could
++ * result in "stale" data being DMA'ed. It has to do with the snoop
++ * logic on the cache eviction buffer. dma_default_coherent is set
++ * to false on these parts.
+ */
+- dma_cache_inv((unsigned long)buf, nbytes);
++ if (!dma_default_coherent)
++ dma_cache_inv(KSEG0ADDR(buf), nbytes);
+ dp->dscr_cmd0 |= DSCR_CMD0_V; /* Let it rip */
+ wmb(); /* drain writebuffer */
+ dma_cache_wback_inv((unsigned long)dp, sizeof(*dp));
+ ctp->chan_ptr->ddma_dbell = 0;
++ wmb(); /* force doorbell write out to dma engine */
+
+ /* Get next descriptor pointer. */
+ ctp->put_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr));
+diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c
+index 2c52ee27b4f25..50de86eb8784c 100644
+--- a/arch/mips/alchemy/devboards/db1000.c
++++ b/arch/mips/alchemy/devboards/db1000.c
+@@ -14,7 +14,6 @@
+ #include <linux/interrupt.h>
+ #include <linux/leds.h>
+ #include <linux/mmc/host.h>
+-#include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/pm.h>
+ #include <linux/spi/spi.h>
+@@ -167,12 +166,7 @@ static struct platform_device db1x00_audio_dev = {
+
+ static irqreturn_t db1100_mmc_cd(int irq, void *ptr)
+ {
+- void (*mmc_cd)(struct mmc_host *, unsigned long);
+- /* link against CONFIG_MMC=m */
+- mmc_cd = symbol_get(mmc_detect_change);
+- mmc_cd(ptr, msecs_to_jiffies(500));
+- symbol_put(mmc_detect_change);
+-
++ mmc_detect_change(ptr, msecs_to_jiffies(500));
+ return IRQ_HANDLED;
+ }
+
+diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
+index 1864eb935ca57..76080c71a2a7b 100644
+--- a/arch/mips/alchemy/devboards/db1200.c
++++ b/arch/mips/alchemy/devboards/db1200.c
+@@ -10,7 +10,6 @@
+ #include <linux/gpio.h>
+ #include <linux/i2c.h>
+ #include <linux/init.h>
+-#include <linux/module.h>
+ #include <linux/interrupt.h>
+ #include <linux/io.h>
+ #include <linux/leds.h>
+@@ -340,14 +339,7 @@ static irqreturn_t db1200_mmc_cd(int irq, void *ptr)
+
+ static irqreturn_t db1200_mmc_cdfn(int irq, void *ptr)
+ {
+- void (*mmc_cd)(struct mmc_host *, unsigned long);
+-
+- /* link against CONFIG_MMC=m */
+- mmc_cd = symbol_get(mmc_detect_change);
+- if (mmc_cd) {
+- mmc_cd(ptr, msecs_to_jiffies(200));
+- symbol_put(mmc_detect_change);
+- }
++ mmc_detect_change(ptr, msecs_to_jiffies(200));
+
+ msleep(100); /* debounce */
+ if (irq == DB1200_SD0_INSERT_INT)
+@@ -431,14 +423,7 @@ static irqreturn_t pb1200_mmc1_cd(int irq, void *ptr)
+
+ static irqreturn_t pb1200_mmc1_cdfn(int irq, void *ptr)
+ {
+- void (*mmc_cd)(struct mmc_host *, unsigned long);
+-
+- /* link against CONFIG_MMC=m */
+- mmc_cd = symbol_get(mmc_detect_change);
+- if (mmc_cd) {
+- mmc_cd(ptr, msecs_to_jiffies(200));
+- symbol_put(mmc_detect_change);
+- }
++ mmc_detect_change(ptr, msecs_to_jiffies(200));
+
+ msleep(100); /* debounce */
+ if (irq == PB1200_SD1_INSERT_INT)
+diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c
+index cd72eaa1168f7..ca71e5ed51abd 100644
+--- a/arch/mips/alchemy/devboards/db1300.c
++++ b/arch/mips/alchemy/devboards/db1300.c
+@@ -17,7 +17,6 @@
+ #include <linux/interrupt.h>
+ #include <linux/ata_platform.h>
+ #include <linux/mmc/host.h>
+-#include <linux/module.h>
+ #include <linux/mtd/mtd.h>
+ #include <linux/mtd/platnand.h>
+ #include <linux/platform_device.h>
+@@ -459,14 +458,7 @@ static irqreturn_t db1300_mmc_cd(int irq, void *ptr)
+
+ static irqreturn_t db1300_mmc_cdfn(int irq, void *ptr)
+ {
+- void (*mmc_cd)(struct mmc_host *, unsigned long);
+-
+- /* link against CONFIG_MMC=m. We can only be called once MMC core has
+- * initialized the controller, so symbol_get() should always succeed.
+- */
+- mmc_cd = symbol_get(mmc_detect_change);
+- mmc_cd(ptr, msecs_to_jiffies(200));
+- symbol_put(mmc_detect_change);
++ mmc_detect_change(ptr, msecs_to_jiffies(200));
+
+ msleep(100); /* debounce */
+ if (irq == DB1300_SD1_INSERT_INT)
+diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
+index 0a63721d0fbf3..5a33d6b48d779 100644
+--- a/arch/mips/bcm47xx/prom.c
++++ b/arch/mips/bcm47xx/prom.c
+@@ -86,7 +86,7 @@ static __init void prom_init_mem(void)
+ pr_debug("Assume 128MB RAM\n");
+ break;
+ }
+- if (!memcmp(prom_init, prom_init + mem, 32))
++ if (!memcmp((void *)prom_init, (void *)prom_init + mem, 32))
+ break;
+ }
+ lowmem = mem;
+@@ -159,7 +159,7 @@ void __init bcm47xx_prom_highmem_init(void)
+
+ off = EXTVBASE + __pa(off);
+ for (extmem = 128 << 20; extmem < 512 << 20; extmem <<= 1) {
+- if (!memcmp(prom_init, (void *)(off + extmem), 16))
++ if (!memcmp((void *)prom_init, (void *)(off + extmem), 16))
+ break;
+ }
+ extmem -= lowmem;
+diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c
+index 5a3e325275d0d..86a6e25908664 100644
+--- a/arch/mips/bcm63xx/clk.c
++++ b/arch/mips/bcm63xx/clk.c
+@@ -361,6 +361,8 @@ static struct clk clk_periph = {
+ */
+ int clk_enable(struct clk *clk)
+ {
++ if (!clk)
++ return 0;
+ mutex_lock(&clocks_mutex);
+ clk_enable_unlocked(clk);
+ mutex_unlock(&clocks_mutex);
+@@ -381,6 +383,18 @@ void clk_disable(struct clk *clk)
+
+ EXPORT_SYMBOL(clk_disable);
+
++struct clk *clk_get_parent(struct clk *clk)
++{
++ return NULL;
++}
++EXPORT_SYMBOL(clk_get_parent);
++
++int clk_set_parent(struct clk *clk, struct clk *parent)
++{
++ return 0;
++}
++EXPORT_SYMBOL(clk_set_parent);
++
+ unsigned long clk_get_rate(struct clk *clk)
+ {
+ if (!clk)
+diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
+index 915ce4b189c15..76c5d8e4d6e2d 100644
+--- a/arch/mips/bmips/dma.c
++++ b/arch/mips/bmips/dma.c
+@@ -64,6 +64,8 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+ return dma_addr;
+ }
+
++bool bmips_rac_flush_disable;
++
+ void arch_sync_dma_for_cpu_all(void)
+ {
+ void __iomem *cbr = BMIPS_GET_CBR();
+@@ -74,6 +76,9 @@ void arch_sync_dma_for_cpu_all(void)
+ boot_cpu_type() != CPU_BMIPS4380)
+ return;
+
++ if (unlikely(bmips_rac_flush_disable))
++ return;
++
+ /* Flush stale data out of the readahead cache */
+ cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
+ __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG);
+diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
+index 31bcfa4e08b99..45c7cf582348e 100644
+--- a/arch/mips/bmips/setup.c
++++ b/arch/mips/bmips/setup.c
+@@ -34,6 +34,8 @@
+ #define REG_BCM6328_OTP ((void __iomem *)CKSEG1ADDR(0x1000062c))
+ #define BCM6328_TP1_DISABLED BIT(9)
+
++extern bool bmips_rac_flush_disable;
++
+ static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000;
+
+ struct bmips_quirk {
+@@ -103,6 +105,12 @@ static void bcm6358_quirks(void)
+ * disable SMP for now
+ */
+ bmips_smp_enabled = 0;
++
++ /*
++ * RAC flush causes kernel panics on BCM6358 when booting from TP1
++ * because the bootloader is not initializing it properly.
++ */
++ bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31));
+ }
+
+ static void bcm6368_quirks(void)
+diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
+index 3548b3b452699..705b9e7f8035a 100644
+--- a/arch/mips/boot/compressed/Makefile
++++ b/arch/mips/boot/compressed/Makefile
+@@ -56,6 +56,8 @@ $(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c
+
+ vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
+
++vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o $(obj)/ashldi3.o $(obj)/clz_ctz.o
++
+ extra-y += ashldi3.c
+ $(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE
+ $(call if_changed,shipped)
+@@ -64,6 +66,10 @@ extra-y += bswapsi.c
+ $(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE
+ $(call if_changed,shipped)
+
++extra-y += bswapdi.c
++$(obj)/bswapdi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE
++ $(call if_changed,shipped)
++
+ targets := $(notdir $(vmlinuzobjs-y))
+
+ targets += vmlinux.bin
+diff --git a/arch/mips/boot/compressed/clz_ctz.c b/arch/mips/boot/compressed/clz_ctz.c
+new file mode 100644
+index 0000000000000..b4a1b6eb2f8ad
+--- /dev/null
++++ b/arch/mips/boot/compressed/clz_ctz.c
+@@ -0,0 +1,2 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include "../../../../lib/clz_ctz.c"
+diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
+index a688809beebca..74d49dc134384 100644
+--- a/arch/mips/boot/dts/ingenic/ci20.dts
++++ b/arch/mips/boot/dts/ingenic/ci20.dts
+@@ -99,7 +99,7 @@
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+
+- gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
++ gpio = <&gpf 15 GPIO_ACTIVE_LOW>;
+ enable-active-high;
+ };
+ };
+diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
+index 9e34f433b9b58..efbbddaf0fde5 100644
+--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
++++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
+@@ -450,7 +450,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+- eth0_addr: eth-mac-addr@0x22 {
++ eth0_addr: eth-mac-addr@22 {
+ reg = <0x22 0x6>;
+ };
+ };
+diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+index 1daa0c6b6f4ea..572a053e30ed5 100644
+--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
++++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+@@ -211,7 +211,7 @@ union cvmx_helper_link_info __cvmx_helper_board_link_get(int ipd_port)
+ {
+ union cvmx_helper_link_info result;
+
+- WARN(!octeon_is_simulation(),
++ WARN_ONCE(!octeon_is_simulation(),
+ "Using deprecated link status - please update your DT");
+
+ /* Unless we fix it later, all links are defaulted to down */
+diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c
+index 6044ff4710022..a18ad2daf0052 100644
+--- a/arch/mips/cavium-octeon/executive/cvmx-helper.c
++++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c
+@@ -1100,7 +1100,7 @@ union cvmx_helper_link_info cvmx_helper_link_get(int ipd_port)
+ if (index == 0)
+ result = __cvmx_helper_rgmii_link_get(ipd_port);
+ else {
+- WARN(1, "Using deprecated link status - please update your DT");
++ WARN_ONCE(1, "Using deprecated link status - please update your DT");
+ result.s.full_duplex = 1;
+ result.s.link_up = 1;
+ result.s.speed = 1000;
+diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
+index be5d4afcd30f9..353dfeee0a6d3 100644
+--- a/arch/mips/cavium-octeon/octeon-irq.c
++++ b/arch/mips/cavium-octeon/octeon-irq.c
+@@ -127,6 +127,16 @@ static void octeon_irq_free_cd(struct irq_domain *d, unsigned int irq)
+ static int octeon_irq_force_ciu_mapping(struct irq_domain *domain,
+ int irq, int line, int bit)
+ {
++ struct device_node *of_node;
++ int ret;
++
++ of_node = irq_domain_get_of_node(domain);
++ if (!of_node)
++ return -EINVAL;
++ ret = irq_alloc_desc_at(irq, of_node_to_nid(of_node));
++ if (ret < 0)
++ return ret;
++
+ return irq_domain_associate(domain, irq, line << 6 | bit);
+ }
+
+diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
+index 0a515cde1c183..25860fba6218d 100644
+--- a/arch/mips/cavium-octeon/octeon-memcpy.S
++++ b/arch/mips/cavium-octeon/octeon-memcpy.S
+@@ -74,7 +74,7 @@
+ #define EXC(inst_reg,addr,handler) \
+ 9: inst_reg, addr; \
+ .section __ex_table,"a"; \
+- PTR 9b, handler; \
++ PTR_WD 9b, handler; \
+ .previous
+
+ /*
+diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
+index d56e9b9d2e434..ce05c0dd3acd7 100644
+--- a/arch/mips/cavium-octeon/octeon-platform.c
++++ b/arch/mips/cavium-octeon/octeon-platform.c
+@@ -86,11 +86,12 @@ static void octeon2_usb_clocks_start(struct device *dev)
+ "refclk-frequency", &clock_rate);
+ if (i) {
+ dev_err(dev, "No UCTL \"refclk-frequency\"\n");
++ of_node_put(uctl_node);
+ goto exit;
+ }
+ i = of_property_read_string(uctl_node,
+ "refclk-type", &clock_type);
+-
++ of_node_put(uctl_node);
+ if (!i && strcmp("crystal", clock_type) == 0)
+ is_crystal_clock = true;
+ }
+@@ -328,6 +329,7 @@ static int __init octeon_ehci_device_init(void)
+
+ pd->dev.platform_data = &octeon_ehci_pdata;
+ octeon_ehci_hw_start(&pd->dev);
++ put_device(&pd->dev);
+
+ return ret;
+ }
+@@ -391,6 +393,7 @@ static int __init octeon_ohci_device_init(void)
+
+ pd->dev.platform_data = &octeon_ohci_pdata;
+ octeon_ohci_hw_start(&pd->dev);
++ put_device(&pd->dev);
+
+ return ret;
+ }
+diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c
+index 6e4d3619137af..4df919d26b082 100644
+--- a/arch/mips/cavium-octeon/octeon-usb.c
++++ b/arch/mips/cavium-octeon/octeon-usb.c
+@@ -537,6 +537,7 @@ static int __init dwc3_octeon_device_init(void)
+ devm_iounmap(&pdev->dev, base);
+ devm_release_mem_region(&pdev->dev, res->start,
+ resource_size(res));
++ put_device(&pdev->dev);
+ }
+ } while (node != NULL);
+
+diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig
+index 85f1955b4b004..4a81297e21a72 100644
+--- a/arch/mips/configs/decstation_64_defconfig
++++ b/arch/mips/configs/decstation_64_defconfig
+@@ -53,8 +53,6 @@ CONFIG_IPV6_SUBTREES=y
+ CONFIG_NETWORK_SECMARK=y
+ CONFIG_IP_SCTP=m
+ CONFIG_VLAN_8021Q=m
+-CONFIG_DECNET=m
+-CONFIG_DECNET_ROUTER=y
+ # CONFIG_WIRELESS is not set
+ # CONFIG_UEVENT_HELPER is not set
+ # CONFIG_FW_LOADER is not set
+diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
+index 30a6eafdb1d01..fd35454bae4ce 100644
+--- a/arch/mips/configs/decstation_defconfig
++++ b/arch/mips/configs/decstation_defconfig
+@@ -49,8 +49,6 @@ CONFIG_IPV6_SUBTREES=y
+ CONFIG_NETWORK_SECMARK=y
+ CONFIG_IP_SCTP=m
+ CONFIG_VLAN_8021Q=m
+-CONFIG_DECNET=m
+-CONFIG_DECNET_ROUTER=y
+ # CONFIG_WIRELESS is not set
+ # CONFIG_UEVENT_HELPER is not set
+ # CONFIG_FW_LOADER is not set
+diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
+index e2b58dbf4aa9a..7ed8f4c7cbdd9 100644
+--- a/arch/mips/configs/decstation_r4k_defconfig
++++ b/arch/mips/configs/decstation_r4k_defconfig
+@@ -48,8 +48,6 @@ CONFIG_IPV6_SUBTREES=y
+ CONFIG_NETWORK_SECMARK=y
+ CONFIG_IP_SCTP=m
+ CONFIG_VLAN_8021Q=m
+-CONFIG_DECNET=m
+-CONFIG_DECNET_ROUTER=y
+ # CONFIG_WIRELESS is not set
+ # CONFIG_UEVENT_HELPER is not set
+ # CONFIG_FW_LOADER is not set
+diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
+index 5c24ac7fdf56d..ba47c5e929b7f 100644
+--- a/arch/mips/configs/fuloong2e_defconfig
++++ b/arch/mips/configs/fuloong2e_defconfig
+@@ -206,7 +206,6 @@ CONFIG_NFSD_V3_ACL=y
+ CONFIG_NFSD_V4=y
+ CONFIG_CIFS=m
+ CONFIG_CIFS_STATS2=y
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_CIFS_DEBUG2=y
+diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
+index 5cb91509bb7cf..fc2e9b50d2862 100644
+--- a/arch/mips/configs/gpr_defconfig
++++ b/arch/mips/configs/gpr_defconfig
+@@ -69,7 +69,6 @@ CONFIG_IP_NF_RAW=m
+ CONFIG_IP_NF_ARPTABLES=m
+ CONFIG_IP_NF_ARPFILTER=m
+ CONFIG_IP_NF_ARP_MANGLE=m
+-CONFIG_DECNET_NF_GRABULATOR=m
+ CONFIG_BRIDGE_NF_EBTABLES=m
+ CONFIG_BRIDGE_EBT_BROUTE=m
+ CONFIG_BRIDGE_EBT_T_FILTER=m
+@@ -99,7 +98,6 @@ CONFIG_ATM_MPOA=m
+ CONFIG_ATM_BR2684=m
+ CONFIG_BRIDGE=m
+ CONFIG_VLAN_8021Q=m
+-CONFIG_DECNET=m
+ CONFIG_LLC2=m
+ CONFIG_ATALK=m
+ CONFIG_DEV_APPLETALK=m
+diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
+index 614af02d83e6e..6fb9bc29f4a03 100644
+--- a/arch/mips/configs/malta_qemu_32r6_defconfig
++++ b/arch/mips/configs/malta_qemu_32r6_defconfig
+@@ -165,7 +165,6 @@ CONFIG_TMPFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_CODEPAGE_437=m
+diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig
+index 9c051f8fd3300..eb72df528243a 100644
+--- a/arch/mips/configs/maltaaprp_defconfig
++++ b/arch/mips/configs/maltaaprp_defconfig
+@@ -166,7 +166,6 @@ CONFIG_TMPFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_CODEPAGE_437=m
+diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
+index 2e90d97551d6f..1fb40d310f49c 100644
+--- a/arch/mips/configs/maltasmvp_defconfig
++++ b/arch/mips/configs/maltasmvp_defconfig
+@@ -167,7 +167,6 @@ CONFIG_TMPFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_CODEPAGE_437=m
+diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
+index d1f7fdb27284b..75cb778c61496 100644
+--- a/arch/mips/configs/maltasmvp_eva_defconfig
++++ b/arch/mips/configs/maltasmvp_eva_defconfig
+@@ -169,7 +169,6 @@ CONFIG_TMPFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_CODEPAGE_437=m
+diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig
+index 48e5bd4924522..7b4f247dc60cc 100644
+--- a/arch/mips/configs/maltaup_defconfig
++++ b/arch/mips/configs/maltaup_defconfig
+@@ -165,7 +165,6 @@ CONFIG_TMPFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_NLS_CODEPAGE_437=m
+diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
+index 205d3b34528c3..42e75cb72e6be 100644
+--- a/arch/mips/configs/mtx1_defconfig
++++ b/arch/mips/configs/mtx1_defconfig
+@@ -116,7 +116,6 @@ CONFIG_IP6_NF_FILTER=m
+ CONFIG_IP6_NF_TARGET_REJECT=m
+ CONFIG_IP6_NF_MANGLE=m
+ CONFIG_IP6_NF_RAW=m
+-CONFIG_DECNET_NF_GRABULATOR=m
+ CONFIG_BRIDGE_NF_EBTABLES=m
+ CONFIG_BRIDGE_EBT_BROUTE=m
+ CONFIG_BRIDGE_EBT_T_FILTER=m
+@@ -146,7 +145,6 @@ CONFIG_ATM_MPOA=m
+ CONFIG_ATM_BR2684=m
+ CONFIG_BRIDGE=m
+ CONFIG_VLAN_8021Q=m
+-CONFIG_DECNET=m
+ CONFIG_LLC2=m
+ CONFIG_ATALK=m
+ CONFIG_DEV_APPLETALK=m
+diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
+index 32c2906117232..3700c6ccd80ba 100644
+--- a/arch/mips/configs/nlm_xlp_defconfig
++++ b/arch/mips/configs/nlm_xlp_defconfig
+@@ -200,7 +200,6 @@ CONFIG_IP6_NF_TARGET_REJECT=m
+ CONFIG_IP6_NF_MANGLE=m
+ CONFIG_IP6_NF_RAW=m
+ CONFIG_IP6_NF_SECURITY=m
+-CONFIG_DECNET_NF_GRABULATOR=m
+ CONFIG_BRIDGE_NF_EBTABLES=m
+ CONFIG_BRIDGE_EBT_BROUTE=m
+ CONFIG_BRIDGE_EBT_T_FILTER=m
+@@ -234,7 +233,6 @@ CONFIG_ATM_BR2684=m
+ CONFIG_BRIDGE=m
+ CONFIG_VLAN_8021Q=m
+ CONFIG_VLAN_8021Q_GVRP=y
+-CONFIG_DECNET=m
+ CONFIG_LLC2=m
+ CONFIG_ATALK=m
+ CONFIG_DEV_APPLETALK=m
+diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
+index bf9b9244929ec..00e7264789a4c 100644
+--- a/arch/mips/configs/nlm_xlr_defconfig
++++ b/arch/mips/configs/nlm_xlr_defconfig
+@@ -198,7 +198,6 @@ CONFIG_IP6_NF_TARGET_REJECT=m
+ CONFIG_IP6_NF_MANGLE=m
+ CONFIG_IP6_NF_RAW=m
+ CONFIG_IP6_NF_SECURITY=m
+-CONFIG_DECNET_NF_GRABULATOR=m
+ CONFIG_BRIDGE_NF_EBTABLES=m
+ CONFIG_BRIDGE_EBT_BROUTE=m
+ CONFIG_BRIDGE_EBT_T_FILTER=m
+@@ -232,7 +231,6 @@ CONFIG_ATM_BR2684=m
+ CONFIG_BRIDGE=m
+ CONFIG_VLAN_8021Q=m
+ CONFIG_VLAN_8021Q_GVRP=y
+-CONFIG_DECNET=m
+ CONFIG_LLC2=m
+ CONFIG_ATALK=m
+ CONFIG_DEV_APPLETALK=m
+diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
+index 3dc2da2bee0df..aa02825b3ed87 100644
+--- a/arch/mips/configs/rm200_defconfig
++++ b/arch/mips/configs/rm200_defconfig
+@@ -116,7 +116,6 @@ CONFIG_IP6_NF_FILTER=m
+ CONFIG_IP6_NF_TARGET_REJECT=m
+ CONFIG_IP6_NF_MANGLE=m
+ CONFIG_IP6_NF_RAW=m
+-CONFIG_DECNET_NF_GRABULATOR=m
+ CONFIG_BRIDGE_NF_EBTABLES=m
+ CONFIG_BRIDGE_EBT_BROUTE=m
+ CONFIG_BRIDGE_EBT_T_FILTER=m
+@@ -137,7 +136,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
+ CONFIG_BRIDGE_EBT_SNAT=m
+ CONFIG_BRIDGE_EBT_LOG=m
+ CONFIG_BRIDGE=m
+-CONFIG_DECNET=m
+ CONFIG_NET_SCHED=y
+ CONFIG_NET_SCH_CBQ=m
+ CONFIG_NET_SCH_HTB=m
+diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
+index ea5b5a83f1e11..011d1d678840a 100644
+--- a/arch/mips/dec/int-handler.S
++++ b/arch/mips/dec/int-handler.S
+@@ -131,7 +131,7 @@
+ */
+ mfc0 t0,CP0_CAUSE # get pending interrupts
+ mfc0 t1,CP0_STATUS
+-#ifdef CONFIG_32BIT
++#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT)
+ lw t2,cpu_fpu_mask
+ #endif
+ andi t0,ST0_IM # CAUSE.CE may be non-zero!
+@@ -139,7 +139,7 @@
+
+ beqz t0,spurious
+
+-#ifdef CONFIG_32BIT
++#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT)
+ and t2,t0
+ bnez t2,fpu # handle FPU immediately
+ #endif
+@@ -280,7 +280,7 @@ handle_it:
+ j dec_irq_dispatch
+ nop
+
+-#ifdef CONFIG_32BIT
++#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT)
+ fpu:
+ lw t0,fpu_kstat_irq
+ nop
+diff --git a/arch/mips/dec/prom/Makefile b/arch/mips/dec/prom/Makefile
+index d95016016b42b..2bad87551203b 100644
+--- a/arch/mips/dec/prom/Makefile
++++ b/arch/mips/dec/prom/Makefile
+@@ -6,4 +6,4 @@
+
+ lib-y += init.o memory.o cmdline.o identify.o console.o
+
+-lib-$(CONFIG_32BIT) += locore.o
++lib-$(CONFIG_CPU_R3000) += locore.o
+diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
+index eaad0ed4b523b..99b9b29750db3 100644
+--- a/arch/mips/dec/setup.c
++++ b/arch/mips/dec/setup.c
+@@ -746,7 +746,8 @@ void __init arch_init_irq(void)
+ dec_interrupt[DEC_IRQ_HALT] = -1;
+
+ /* Register board interrupts: FPU and cascade. */
+- if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) {
++ if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT) &&
++ dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) {
+ struct irq_desc *desc_fpu;
+ int irq_fpu;
+
+diff --git a/arch/mips/fw/lib/cmdline.c b/arch/mips/fw/lib/cmdline.c
+index f24cbb4a39b50..892765b742bbc 100644
+--- a/arch/mips/fw/lib/cmdline.c
++++ b/arch/mips/fw/lib/cmdline.c
+@@ -53,7 +53,7 @@ char *fw_getenv(char *envname)
+ {
+ char *result = NULL;
+
+- if (_fw_envp != NULL) {
++ if (_fw_envp != NULL && fw_envp(0) != NULL) {
+ /*
+ * Return a pointer to the given environment variable.
+ * YAMON uses "name", "value" pairs, while U-Boot uses
+diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c
+index a3aa22c77cadc..a07a5edbcda78 100644
+--- a/arch/mips/generic/yamon-dt.c
++++ b/arch/mips/generic/yamon-dt.c
+@@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array(
+ __init int yamon_dt_append_memory(void *fdt,
+ const struct yamon_mem_region *regions)
+ {
+- unsigned long phys_memsize, memsize;
++ unsigned long phys_memsize = 0, memsize;
+ __be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES];
+ unsigned int mem_entries;
+ int i, err, mem_off;
+diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h
+index 2f8ce94ebaafe..cc69f1deb1ca8 100644
+--- a/arch/mips/include/asm/asm.h
++++ b/arch/mips/include/asm/asm.h
+@@ -276,7 +276,7 @@ symbol = value
+
+ #define PTR_SCALESHIFT 2
+
+-#define PTR .word
++#define PTR_WD .word
+ #define PTRSIZE 4
+ #define PTRLOG 2
+ #endif
+@@ -301,7 +301,7 @@ symbol = value
+
+ #define PTR_SCALESHIFT 3
+
+-#define PTR .dword
++#define PTR_WD .dword
+ #define PTRSIZE 8
+ #define PTRLOG 3
+ #endif
+diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h
+index d72dc6e1cf3cd..8d4cf29861b87 100644
+--- a/arch/mips/include/asm/bugs.h
++++ b/arch/mips/include/asm/bugs.h
+@@ -1,17 +1,11 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+ /*
+- * This is included by init/main.c to check for architecture-dependent bugs.
+- *
+ * Copyright (C) 2007 Maciej W. Rozycki
+- *
+- * Needs:
+- * void check_bugs(void);
+ */
+ #ifndef _ASM_BUGS_H
+ #define _ASM_BUGS_H
+
+ #include <linux/bug.h>
+-#include <linux/delay.h>
+ #include <linux/smp.h>
+
+ #include <asm/cpu.h>
+@@ -30,17 +24,6 @@ static inline void check_bugs_early(void)
+ check_bugs64_early();
+ }
+
+-static inline void check_bugs(void)
+-{
+- unsigned int cpu = smp_processor_id();
+-
+- cpu_data[cpu].udelay_val = loops_per_jiffy;
+- check_bugs32();
+-
+- if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+- check_bugs64();
+-}
+-
+ static inline int r4k_daddiu_bug(void)
+ {
+ if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
+index 0b983800f48b7..66a8b293fd80b 100644
+--- a/arch/mips/include/asm/cmpxchg.h
++++ b/arch/mips/include/asm/cmpxchg.h
+@@ -249,6 +249,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
+ /* Load 64 bits from ptr */
+ " " __SYNC(full, loongson3_war) " \n"
+ "1: lld %L0, %3 # __cmpxchg64 \n"
++ " .set pop \n"
+ /*
+ * Split the 64 bit value we loaded into the 2 registers that hold the
+ * ret variable.
+@@ -276,12 +277,14 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
+ " or %L1, %L1, $at \n"
+ " .set at \n"
+ # endif
++ " .set push \n"
++ " .set " MIPS_ISA_ARCH_LEVEL " \n"
+ /* Attempt to store new at ptr */
+ " scd %L1, %2 \n"
+ /* If we failed, loop! */
+ "\t" __SC_BEQZ "%L1, 1b \n"
+- " .set pop \n"
+ "2: " __SYNC(full, loongson3_war) " \n"
++ " .set pop \n"
+ : "=&r"(ret),
+ "=&r"(tmp),
+ "=" GCC_OFF_SMALL_ASM() (*(unsigned long long *)ptr)
+diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
+index 3d71081afc55f..e69833213e792 100644
+--- a/arch/mips/include/asm/cpu-features.h
++++ b/arch/mips/include/asm/cpu-features.h
+@@ -124,7 +124,24 @@
+ #define cpu_has_tx39_cache __opt(MIPS_CPU_TX39_CACHE)
+ #endif
+ #ifndef cpu_has_octeon_cache
+-#define cpu_has_octeon_cache 0
++#define cpu_has_octeon_cache \
++({ \
++ int __res; \
++ \
++ switch (boot_cpu_type()) { \
++ case CPU_CAVIUM_OCTEON: \
++ case CPU_CAVIUM_OCTEON_PLUS: \
++ case CPU_CAVIUM_OCTEON2: \
++ case CPU_CAVIUM_OCTEON3: \
++ __res = 1; \
++ break; \
++ \
++ default: \
++ __res = 0; \
++ } \
++ \
++ __res; \
++})
+ #endif
+ /* Don't override `cpu_has_fpu' to 1 or the "nofpu" option won't work. */
+ #ifndef cpu_has_fpu
+@@ -351,7 +368,7 @@
+ ({ \
+ int __res; \
+ \
+- switch (current_cpu_type()) { \
++ switch (boot_cpu_type()) { \
+ case CPU_M14KC: \
+ case CPU_74K: \
+ case CPU_1074K: \
+diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h
+index 62c7dfb90e06c..908e96e3a3117 100644
+--- a/arch/mips/include/asm/dec/prom.h
++++ b/arch/mips/include/asm/dec/prom.h
+@@ -43,16 +43,11 @@
+ */
+ #define REX_PROM_MAGIC 0x30464354
+
+-#ifdef CONFIG_64BIT
+-
+-#define prom_is_rex(magic) 1 /* KN04 and KN05 are REX PROMs. */
+-
+-#else /* !CONFIG_64BIT */
+-
+-#define prom_is_rex(magic) ((magic) == REX_PROM_MAGIC)
+-
+-#endif /* !CONFIG_64BIT */
+-
++/* KN04 and KN05 are REX PROMs, so only do the check for R3k systems. */
++static inline bool prom_is_rex(u32 magic)
++{
++ return !IS_ENABLED(CONFIG_CPU_R3000) || magic == REX_PROM_MAGIC;
++}
+
+ /*
+ * 3MIN/MAXINE PROM entry points for DS5000/1xx's, DS5000/xx's and
+@@ -75,7 +70,7 @@
+ */
+ typedef struct {
+ int pagesize;
+- unsigned char bitmap[0];
++ unsigned char bitmap[];
+ } memmap;
+
+
+diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
+index b463f2aa5a613..db497a8167da2 100644
+--- a/arch/mips/include/asm/ftrace.h
++++ b/arch/mips/include/asm/ftrace.h
+@@ -32,7 +32,7 @@ do { \
+ ".previous\n" \
+ \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR) "\t1b, 3b\n\t" \
++ STR(PTR_WD) "\t1b, 3b\n\t" \
+ ".previous\n" \
+ \
+ : [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\
+@@ -54,7 +54,7 @@ do { \
+ ".previous\n" \
+ \
+ ".section\t__ex_table,\"a\"\n\t"\
+- STR(PTR) "\t1b, 3b\n\t" \
++ STR(PTR_WD) "\t1b, 3b\n\t" \
+ ".previous\n" \
+ \
+ : [tmp_err] "=r" (error) \
+diff --git a/arch/mips/include/asm/fw/fw.h b/arch/mips/include/asm/fw/fw.h
+index d0ef8b4892bbe..d0494ce4b3373 100644
+--- a/arch/mips/include/asm/fw/fw.h
++++ b/arch/mips/include/asm/fw/fw.h
+@@ -26,6 +26,6 @@ extern char *fw_getcmdline(void);
+ extern void fw_meminit(void);
+ extern char *fw_getenv(char *name);
+ extern unsigned long fw_getenvl(char *name);
+-extern void fw_init_early_console(char port);
++extern void fw_init_early_console(void);
+
+ #endif /* __ASM_FW_H_ */
+diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
+index 696f6b0093776..cfd9e15817774 100644
+--- a/arch/mips/include/asm/kvm_host.h
++++ b/arch/mips/include/asm/kvm_host.h
+@@ -318,7 +318,7 @@ struct kvm_vcpu_arch {
+ unsigned int aux_inuse;
+
+ /* COP0 State */
+- struct mips_coproc *cop0;
++ struct mips_coproc cop0;
+
+ /* Resume PC after MMIO completion */
+ unsigned long io_pc;
+@@ -699,7 +699,7 @@ static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
+ static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu)
+ {
+ return kvm_mips_guest_can_have_fpu(vcpu) &&
+- kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP;
++ kvm_read_c0_guest_config1(&vcpu->cop0) & MIPS_CONF1_FP;
+ }
+
+ static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu)
+@@ -711,7 +711,7 @@ static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu)
+ static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu)
+ {
+ return kvm_mips_guest_can_have_msa(vcpu) &&
+- kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA;
++ kvm_read_c0_guest_config3(&vcpu->cop0) & MIPS_CONF3_MSA;
+ }
+
+ struct kvm_mips_callbacks {
+diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
+index ecda7295ddcd1..3fa6340903882 100644
+--- a/arch/mips/include/asm/local.h
++++ b/arch/mips/include/asm/local.h
+@@ -5,6 +5,7 @@
+ #include <linux/percpu.h>
+ #include <linux/bitops.h>
+ #include <linux/atomic.h>
++#include <asm/asm.h>
+ #include <asm/cmpxchg.h>
+ #include <asm/compiler.h>
+ #include <asm/war.h>
+@@ -39,7 +40,7 @@ static __inline__ long local_add_return(long i, local_t * l)
+ " .set arch=r4000 \n"
+ __SYNC(full, loongson3_war) " \n"
+ "1:" __LL "%1, %2 # local_add_return \n"
+- " addu %0, %1, %3 \n"
++ __stringify(LONG_ADDU) " %0, %1, %3 \n"
+ __SC "%0, %2 \n"
+ " beqzl %0, 1b \n"
+ " addu %0, %1, %3 \n"
+@@ -55,7 +56,7 @@ static __inline__ long local_add_return(long i, local_t * l)
+ " .set "MIPS_ISA_ARCH_LEVEL" \n"
+ __SYNC(full, loongson3_war) " \n"
+ "1:" __LL "%1, %2 # local_add_return \n"
+- " addu %0, %1, %3 \n"
++ __stringify(LONG_ADDU) " %0, %1, %3 \n"
+ __SC "%0, %2 \n"
+ " beqz %0, 1b \n"
+ " addu %0, %1, %3 \n"
+@@ -88,7 +89,7 @@ static __inline__ long local_sub_return(long i, local_t * l)
+ " .set arch=r4000 \n"
+ __SYNC(full, loongson3_war) " \n"
+ "1:" __LL "%1, %2 # local_sub_return \n"
+- " subu %0, %1, %3 \n"
++ __stringify(LONG_SUBU) " %0, %1, %3 \n"
+ __SC "%0, %2 \n"
+ " beqzl %0, 1b \n"
+ " subu %0, %1, %3 \n"
+@@ -104,7 +105,7 @@ static __inline__ long local_sub_return(long i, local_t * l)
+ " .set "MIPS_ISA_ARCH_LEVEL" \n"
+ __SYNC(full, loongson3_war) " \n"
+ "1:" __LL "%1, %2 # local_sub_return \n"
+- " subu %0, %1, %3 \n"
++ __stringify(LONG_SUBU) " %0, %1, %3 \n"
+ __SC "%0, %2 \n"
+ " beqz %0, 1b \n"
+ " subu %0, %1, %3 \n"
+diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
+index 58f829c9b6c70..79d6fd249583f 100644
+--- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
++++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h
+@@ -26,7 +26,6 @@
+ #define cpu_has_3k_cache 0
+ #define cpu_has_4k_cache 1
+ #define cpu_has_tx39_cache 0
+-#define cpu_has_fpu 1
+ #define cpu_has_nofpuex 0
+ #define cpu_has_32fpr 1
+ #define cpu_has_counter 1
+diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
+index 49a93e82c2528..2635b6ba1cb54 100644
+--- a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
++++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h
+@@ -29,7 +29,6 @@
+ #define cpu_has_3k_cache 0
+ #define cpu_has_4k_cache 1
+ #define cpu_has_tx39_cache 0
+-#define cpu_has_fpu 1
+ #define cpu_has_nofpuex 0
+ #define cpu_has_32fpr 1
+ #define cpu_has_counter 1
+diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+index 13373c5144f89..efb41b3519747 100644
+--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
++++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+@@ -32,7 +32,7 @@
+ nop
+ /* Loongson-3A R2/R3 */
+ andi t0, (PRID_IMP_MASK | PRID_REV_MASK)
+- slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
++ slti t0, t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+ bnez t0, 2f
+ nop
+ 1:
+@@ -63,7 +63,7 @@
+ nop
+ /* Loongson-3A R2/R3 */
+ andi t0, (PRID_IMP_MASK | PRID_REV_MASK)
+- slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
++ slti t0, t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)
+ bnez t0, 2f
+ nop
+ 1:
+diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h
+index 6bbf082dd149e..79d5bb0e06d63 100644
+--- a/arch/mips/include/asm/mach-ralink/mt7621.h
++++ b/arch/mips/include/asm/mach-ralink/mt7621.h
+@@ -7,10 +7,12 @@
+ #ifndef _MT7621_REGS_H_
+ #define _MT7621_REGS_H_
+
++#define IOMEM(x) ((void __iomem *)(KSEG1ADDR(x)))
++
+ #define MT7621_PALMBUS_BASE 0x1C000000
+ #define MT7621_PALMBUS_SIZE 0x03FFFFFF
+
+-#define MT7621_SYSC_BASE 0x1E000000
++#define MT7621_SYSC_BASE IOMEM(0x1E000000)
+
+ #define SYSC_REG_CHIP_NAME0 0x00
+ #define SYSC_REG_CHIP_NAME1 0x04
+diff --git a/arch/mips/include/asm/mach-rc32434/pci.h b/arch/mips/include/asm/mach-rc32434/pci.h
+index 9a6eefd127571..3eb767c8a4eec 100644
+--- a/arch/mips/include/asm/mach-rc32434/pci.h
++++ b/arch/mips/include/asm/mach-rc32434/pci.h
+@@ -374,7 +374,7 @@ struct pci_msu {
+ PCI_CFG04_STAT_SSE | \
+ PCI_CFG04_STAT_PE)
+
+-#define KORINA_CNFG1 ((KORINA_STAT<<16)|KORINA_CMD)
++#define KORINA_CNFG1 (KORINA_STAT | KORINA_CMD)
+
+ #define KORINA_REVID 0
+ #define KORINA_CLASS_CODE 0
+diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
+index aeae2effa123d..23c67c0871b17 100644
+--- a/arch/mips/include/asm/mips-cm.h
++++ b/arch/mips/include/asm/mips-cm.h
+@@ -11,6 +11,7 @@
+ #ifndef __MIPS_ASM_MIPS_CM_H__
+ #define __MIPS_ASM_MIPS_CM_H__
+
++#include <linux/bitfield.h>
+ #include <linux/bitops.h>
+ #include <linux/errno.h>
+
+@@ -153,8 +154,8 @@ GCR_ACCESSOR_RO(32, 0x030, rev)
+ #define CM_GCR_REV_MINOR GENMASK(7, 0)
+
+ #define CM_ENCODE_REV(major, minor) \
+- (((major) << __ffs(CM_GCR_REV_MAJOR)) | \
+- ((minor) << __ffs(CM_GCR_REV_MINOR)))
++ (FIELD_PREP(CM_GCR_REV_MAJOR, major) | \
++ FIELD_PREP(CM_GCR_REV_MINOR, minor))
+
+ #define CM_REV_CM2 CM_ENCODE_REV(6, 0)
+ #define CM_REV_CM2_5 CM_ENCODE_REV(7, 0)
+@@ -362,10 +363,10 @@ static inline int mips_cm_revision(void)
+ static inline unsigned int mips_cm_max_vp_width(void)
+ {
+ extern int smp_num_siblings;
+- uint32_t cfg;
+
+ if (mips_cm_revision() >= CM_REV_CM3)
+- return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW;
++ return FIELD_GET(CM_GCR_SYS_CONFIG2_MAXVPW,
++ read_gcr_sys_config2());
+
+ if (mips_cm_present()) {
+ /*
+@@ -373,8 +374,7 @@ static inline unsigned int mips_cm_max_vp_width(void)
+ * number of VP(E)s, and if that ever changes then this will
+ * need revisiting.
+ */
+- cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE;
+- return (cfg >> __ffs(CM_GCR_Cx_CONFIG_PVPE)) + 1;
++ return FIELD_GET(CM_GCR_Cx_CONFIG_PVPE, read_gcr_cl_config()) + 1;
+ }
+
+ if (IS_ENABLED(CONFIG_SMP))
+diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
+index 0e6bf220db618..6c61e0a639249 100644
+--- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h
++++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
+@@ -318,7 +318,7 @@ enum cvmx_chip_types_enum {
+
+ /* Functions to return string based on type */
+ #define ENUM_BRD_TYPE_CASE(x) \
+- case x: return(#x + 16); /* Skip CVMX_BOARD_TYPE_ */
++ case x: return (&#x[16]); /* Skip CVMX_BOARD_TYPE_ */
+ static inline const char *cvmx_board_type_to_string(enum
+ cvmx_board_types_enum type)
+ {
+@@ -410,7 +410,7 @@ static inline const char *cvmx_board_type_to_string(enum
+ }
+
+ #define ENUM_CHIP_TYPE_CASE(x) \
+- case x: return(#x + 15); /* Skip CVMX_CHIP_TYPE */
++ case x: return (&#x[15]); /* Skip CVMX_CHIP_TYPE */
+ static inline const char *cvmx_chip_type_to_string(enum
+ cvmx_chip_types_enum type)
+ {
+diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
+index c7925d0e98746..867e9c3db76e9 100644
+--- a/arch/mips/include/asm/pgalloc.h
++++ b/arch/mips/include/asm/pgalloc.h
+@@ -15,6 +15,7 @@
+
+ #define __HAVE_ARCH_PMD_ALLOC_ONE
+ #define __HAVE_ARCH_PUD_ALLOC_ONE
++#define __HAVE_ARCH_PGD_FREE
+ #include <asm-generic/pgalloc.h>
+
+ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+@@ -48,6 +49,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ extern void pgd_init(unsigned long page);
+ extern pgd_t *pgd_alloc(struct mm_struct *mm);
+
++static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
++{
++ free_pages((unsigned long)pgd, PGD_ORDER);
++}
++
+ #define __pte_free_tlb(tlb,pte,address) \
+ do { \
+ pgtable_pte_page_dtor(pte); \
+diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
+index af3788589ee6d..431a1c9d53fc7 100644
+--- a/arch/mips/include/asm/r4kcache.h
++++ b/arch/mips/include/asm/r4kcache.h
+@@ -119,7 +119,7 @@ static inline void flush_scache_line(unsigned long addr)
+ " j 2b \n" \
+ " .previous \n" \
+ " .section __ex_table,\"a\" \n" \
+- " "STR(PTR)" 1b, 3b \n" \
++ " "STR(PTR_WD)" 1b, 3b \n" \
+ " .previous" \
+ : "+r" (__err) \
+ : "i" (op), "r" (addr), "i" (-EFAULT)); \
+@@ -142,7 +142,7 @@ static inline void flush_scache_line(unsigned long addr)
+ " j 2b \n" \
+ " .previous \n" \
+ " .section __ex_table,\"a\" \n" \
+- " "STR(PTR)" 1b, 3b \n" \
++ " "STR(PTR_WD)" 1b, 3b \n" \
+ " .previous" \
+ : "+r" (__err) \
+ : "i" (op), "r" (addr), "i" (-EFAULT)); \
+diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h
+index bb36a400203df..8c56b862fd9c2 100644
+--- a/arch/mips/include/asm/setup.h
++++ b/arch/mips/include/asm/setup.h
+@@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base,
+ unsigned int reg_shift, unsigned int timeout) {}
+ #endif
+
+-extern void set_handler(unsigned long offset, void *addr, unsigned long len);
++void set_handler(unsigned long offset, const void *addr, unsigned long len);
+ extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len);
+
+ typedef void (*vi_handler_t)(void);
+diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
+index 25fa651c937d5..ebdf4d910af2f 100644
+--- a/arch/mips/include/asm/syscall.h
++++ b/arch/mips/include/asm/syscall.h
+@@ -38,7 +38,7 @@ static inline bool mips_syscall_is_indirect(struct task_struct *task,
+ static inline long syscall_get_nr(struct task_struct *task,
+ struct pt_regs *regs)
+ {
+- return current_thread_info()->syscall;
++ return task_thread_info(task)->syscall;
+ }
+
+ static inline void mips_syscall_update_nr(struct task_struct *task,
+diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
+index b05bb70a2e46f..2e107886f97ac 100644
+--- a/arch/mips/include/asm/timex.h
++++ b/arch/mips/include/asm/timex.h
+@@ -40,9 +40,9 @@
+ typedef unsigned int cycles_t;
+
+ /*
+- * On R4000/R4400 before version 5.0 an erratum exists such that if the
+- * cycle counter is read in the exact moment that it is matching the
+- * compare register, no interrupt will be generated.
++ * On R4000/R4400 an erratum exists such that if the cycle counter is
++ * read in the exact moment that it is matching the compare register,
++ * no interrupt will be generated.
+ *
+ * There is a suggested workaround and also the erratum can't strike if
+ * the compare interrupt isn't being used as the clock source device.
+@@ -63,7 +63,7 @@ static inline int can_use_mips_counter(unsigned int prid)
+ if (!__builtin_constant_p(cpu_has_counter))
+ asm volatile("" : "=m" (cpu_data[0].options));
+ if (likely(cpu_has_counter &&
+- prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0))))
++ prid > (PRID_IMP_R4000 | PRID_REV_ENCODE_44(15, 15))))
+ return 1;
+ else
+ return 0;
+@@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void)
+ else
+ return 0; /* no usable counter */
+ }
++#define get_cycles get_cycles
+
+ /*
+ * Like get_cycles - but where c0_count is not available we desperately
+ * use c0_random in an attempt to get at least a little bit of entropy.
+- *
+- * R6000 and R6000A neither have a count register nor a random register.
+- * That leaves no entropy source in the CPU itself.
+ */
+ static inline unsigned long random_get_entropy(void)
+ {
+- unsigned int prid = read_c0_prid();
+- unsigned int imp = prid & PRID_IMP_MASK;
++ unsigned int c0_random;
+
+- if (can_use_mips_counter(prid))
++ if (can_use_mips_counter(read_c0_prid()))
+ return read_c0_count();
+- else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
+- return read_c0_random();
++
++ if (cpu_has_3kex)
++ c0_random = (read_c0_random() >> 8) & 0x3f;
+ else
+- return 0; /* no usable register */
++ c0_random = read_c0_random() & 0x3f;
++ return (random_get_entropy_fallback() << 6) | (0x3f - c0_random);
+ }
+ #define random_get_entropy random_get_entropy
+
+diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h
+index 2022b18944b97..9af0f4d3d288c 100644
+--- a/arch/mips/include/asm/unaligned-emul.h
++++ b/arch/mips/include/asm/unaligned-emul.h
+@@ -20,8 +20,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -41,8 +41,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -74,10 +74,10 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -102,8 +102,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -125,8 +125,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -145,8 +145,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -178,10 +178,10 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -223,14 +223,14 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
+- STR(PTR)"\t5b, 11b\n\t" \
+- STR(PTR)"\t6b, 11b\n\t" \
+- STR(PTR)"\t7b, 11b\n\t" \
+- STR(PTR)"\t8b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t5b, 11b\n\t" \
++ STR(PTR_WD)"\t6b, 11b\n\t" \
++ STR(PTR_WD)"\t7b, 11b\n\t" \
++ STR(PTR_WD)"\t8b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -255,8 +255,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT));\
+@@ -276,8 +276,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT)); \
+@@ -296,8 +296,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT)); \
+@@ -325,10 +325,10 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT) \
+@@ -365,14 +365,14 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
+- STR(PTR)"\t5b, 11b\n\t" \
+- STR(PTR)"\t6b, 11b\n\t" \
+- STR(PTR)"\t7b, 11b\n\t" \
+- STR(PTR)"\t8b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t5b, 11b\n\t" \
++ STR(PTR_WD)"\t6b, 11b\n\t" \
++ STR(PTR_WD)"\t7b, 11b\n\t" \
++ STR(PTR_WD)"\t8b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT) \
+@@ -398,8 +398,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -419,8 +419,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -452,10 +452,10 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -481,8 +481,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -504,8 +504,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -524,8 +524,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -557,10 +557,10 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -602,14 +602,14 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
+- STR(PTR)"\t5b, 11b\n\t" \
+- STR(PTR)"\t6b, 11b\n\t" \
+- STR(PTR)"\t7b, 11b\n\t" \
+- STR(PTR)"\t8b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t5b, 11b\n\t" \
++ STR(PTR_WD)"\t6b, 11b\n\t" \
++ STR(PTR_WD)"\t7b, 11b\n\t" \
++ STR(PTR_WD)"\t8b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (value), "=r" (res) \
+ : "r" (addr), "i" (-EFAULT)); \
+@@ -632,8 +632,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT));\
+@@ -653,8 +653,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT)); \
+@@ -673,8 +673,8 @@ do { \
+ "j\t3b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 4b\n\t" \
+- STR(PTR)"\t2b, 4b\n\t" \
++ STR(PTR_WD)"\t1b, 4b\n\t" \
++ STR(PTR_WD)"\t2b, 4b\n\t" \
+ ".previous" \
+ : "=r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT)); \
+@@ -703,10 +703,10 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT) \
+@@ -743,14 +743,14 @@ do { \
+ "j\t10b\n\t" \
+ ".previous\n\t" \
+ ".section\t__ex_table,\"a\"\n\t" \
+- STR(PTR)"\t1b, 11b\n\t" \
+- STR(PTR)"\t2b, 11b\n\t" \
+- STR(PTR)"\t3b, 11b\n\t" \
+- STR(PTR)"\t4b, 11b\n\t" \
+- STR(PTR)"\t5b, 11b\n\t" \
+- STR(PTR)"\t6b, 11b\n\t" \
+- STR(PTR)"\t7b, 11b\n\t" \
+- STR(PTR)"\t8b, 11b\n\t" \
++ STR(PTR_WD)"\t1b, 11b\n\t" \
++ STR(PTR_WD)"\t2b, 11b\n\t" \
++ STR(PTR_WD)"\t3b, 11b\n\t" \
++ STR(PTR_WD)"\t4b, 11b\n\t" \
++ STR(PTR_WD)"\t5b, 11b\n\t" \
++ STR(PTR_WD)"\t6b, 11b\n\t" \
++ STR(PTR_WD)"\t7b, 11b\n\t" \
++ STR(PTR_WD)"\t8b, 11b\n\t" \
+ ".previous" \
+ : "=&r" (res) \
+ : "r" (value), "r" (addr), "i" (-EFAULT) \
+diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
+index 630fcb4cb30e7..8ebcc298bf759 100644
+--- a/arch/mips/kernel/cpu-probe.c
++++ b/arch/mips/kernel/cpu-probe.c
+@@ -1734,9 +1734,10 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c)
+
+ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
+ {
+- decode_configs(c);
++ c->cputype = CPU_LOONGSON64;
+
+ /* All Loongson processors covered here define ExcCode 16 as GSExc. */
++ decode_configs(c);
+ c->options |= MIPS_CPU_GSEXCEX;
+
+ switch (c->processor_id & PRID_IMP_MASK) {
+@@ -1746,7 +1747,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
+ case PRID_REV_LOONGSON2K_R1_1:
+ case PRID_REV_LOONGSON2K_R1_2:
+ case PRID_REV_LOONGSON2K_R1_3:
+- c->cputype = CPU_LOONGSON64;
+ __cpu_name[cpu] = "Loongson-2K";
+ set_elf_platform(cpu, "gs264e");
+ set_isa(c, MIPS_CPU_ISA_M64R2);
+@@ -1759,14 +1759,12 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
+ switch (c->processor_id & PRID_REV_MASK) {
+ case PRID_REV_LOONGSON3A_R2_0:
+ case PRID_REV_LOONGSON3A_R2_1:
+- c->cputype = CPU_LOONGSON64;
+ __cpu_name[cpu] = "ICT Loongson-3";
+ set_elf_platform(cpu, "loongson3a");
+ set_isa(c, MIPS_CPU_ISA_M64R2);
+ break;
+ case PRID_REV_LOONGSON3A_R3_0:
+ case PRID_REV_LOONGSON3A_R3_1:
+- c->cputype = CPU_LOONGSON64;
+ __cpu_name[cpu] = "ICT Loongson-3";
+ set_elf_platform(cpu, "loongson3a");
+ set_isa(c, MIPS_CPU_ISA_M64R2);
+@@ -1786,7 +1784,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
+ c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */
+ break;
+ case PRID_IMP_LOONGSON_64G:
+- c->cputype = CPU_LOONGSON64;
+ __cpu_name[cpu] = "ICT Loongson-3";
+ set_elf_platform(cpu, "loongson3a");
+ set_isa(c, MIPS_CPU_ISA_M64R2);
+diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
+index 662c8db9f45ba..9f5b1247b4ba4 100644
+--- a/arch/mips/kernel/jump_label.c
++++ b/arch/mips/kernel/jump_label.c
+@@ -56,7 +56,7 @@ void arch_jump_label_transform(struct jump_entry *e,
+ * The branch offset must fit in the instruction's 26
+ * bit field.
+ */
+- WARN_ON((offset >= BIT(25)) ||
++ WARN_ON((offset >= (long)BIT(25)) ||
+ (offset < -(long)BIT(25)));
+
+ insn.j_format.opcode = bc6_op;
+diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
+index 75bff0f773198..b0934a0d7aedd 100644
+--- a/arch/mips/kernel/kprobes.c
++++ b/arch/mips/kernel/kprobes.c
+@@ -11,6 +11,8 @@
+ * Copyright (C) IBM Corporation, 2002, 2004
+ */
+
++#define pr_fmt(fmt) "kprobes: " fmt
++
+ #include <linux/kprobes.h>
+ #include <linux/preempt.h>
+ #include <linux/uaccess.h>
+@@ -80,8 +82,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
+ insn = p->addr[0];
+
+ if (insn_has_ll_or_sc(insn)) {
+- pr_notice("Kprobes for ll and sc instructions are not"
+- "supported\n");
++ pr_notice("Kprobes for ll and sc instructions are not supported\n");
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -219,7 +220,7 @@ static int evaluate_branch_instruction(struct kprobe *p, struct pt_regs *regs,
+ return 0;
+
+ unaligned:
+- pr_notice("%s: unaligned epc - sending SIGBUS.\n", current->comm);
++ pr_notice("Failed to emulate branch instruction because of unaligned epc - sending SIGBUS to %s.\n", current->comm);
+ force_sig(SIGBUS);
+ return -EFAULT;
+
+@@ -238,10 +239,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs,
+ regs->cp0_epc = (unsigned long)p->addr;
+ else if (insn_has_delayslot(p->opcode)) {
+ ret = evaluate_branch_instruction(p, regs, kcb);
+- if (ret < 0) {
+- pr_notice("Kprobes: Error in evaluating branch\n");
++ if (ret < 0)
+ return;
+- }
+ }
+ regs->cp0_epc = (unsigned long)&p->ainsn.insn[0];
+ }
+diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
+index 90f1c3df1f0e4..b4f7d950c8468 100644
+--- a/arch/mips/kernel/mips-cm.c
++++ b/arch/mips/kernel/mips-cm.c
+@@ -221,8 +221,7 @@ static void mips_cm_probe_l2sync(void)
+ phys_addr_t addr;
+
+ /* L2-only sync was introduced with CM major revision 6 */
+- major_rev = (read_gcr_rev() & CM_GCR_REV_MAJOR) >>
+- __ffs(CM_GCR_REV_MAJOR);
++ major_rev = FIELD_GET(CM_GCR_REV_MAJOR, read_gcr_rev());
+ if (major_rev < 6)
+ return;
+
+@@ -306,13 +305,13 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core,
+ preempt_disable();
+
+ if (cm_rev >= CM_REV_CM3) {
+- val = core << __ffs(CM3_GCR_Cx_OTHER_CORE);
+- val |= vp << __ffs(CM3_GCR_Cx_OTHER_VP);
++ val = FIELD_PREP(CM3_GCR_Cx_OTHER_CORE, core) |
++ FIELD_PREP(CM3_GCR_Cx_OTHER_VP, vp);
+
+ if (cm_rev >= CM_REV_CM3_5) {
+ val |= CM_GCR_Cx_OTHER_CLUSTER_EN;
+- val |= cluster << __ffs(CM_GCR_Cx_OTHER_CLUSTER);
+- val |= block << __ffs(CM_GCR_Cx_OTHER_BLOCK);
++ val |= FIELD_PREP(CM_GCR_Cx_OTHER_CLUSTER, cluster);
++ val |= FIELD_PREP(CM_GCR_Cx_OTHER_BLOCK, block);
+ } else {
+ WARN_ON(cluster != 0);
+ WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL);
+@@ -342,7 +341,7 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core,
+ spin_lock_irqsave(&per_cpu(cm_core_lock, curr_core),
+ per_cpu(cm_core_lock_flags, curr_core));
+
+- val = core << __ffs(CM_GCR_Cx_OTHER_CORENUM);
++ val = FIELD_PREP(CM_GCR_Cx_OTHER_CORENUM, core);
+ }
+
+ write_gcr_cl_other(val);
+@@ -386,8 +385,8 @@ void mips_cm_error_report(void)
+ cm_other = read_gcr_error_mult();
+
+ if (revision < CM_REV_CM3) { /* CM2 */
+- cause = cm_error >> __ffs(CM_GCR_ERROR_CAUSE_ERRTYPE);
+- ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND);
++ cause = FIELD_GET(CM_GCR_ERROR_CAUSE_ERRTYPE, cm_error);
++ ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other);
+
+ if (!cause)
+ return;
+@@ -445,8 +444,8 @@ void mips_cm_error_report(void)
+ ulong core_id_bits, vp_id_bits, cmd_bits, cmd_group_bits;
+ ulong cm3_cca_bits, mcp_bits, cm3_tr_bits, sched_bit;
+
+- cause = cm_error >> __ffs64(CM3_GCR_ERROR_CAUSE_ERRTYPE);
+- ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND);
++ cause = FIELD_GET(CM3_GCR_ERROR_CAUSE_ERRTYPE, cm_error);
++ ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other);
+
+ if (!cause)
+ return;
+diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c
+index 8d2535123f11c..d005be84c482b 100644
+--- a/arch/mips/kernel/mips-cpc.c
++++ b/arch/mips/kernel/mips-cpc.c
+@@ -27,6 +27,7 @@ phys_addr_t __weak mips_cpc_default_phys_base(void)
+ cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+ if (cpc_node) {
+ err = of_address_to_resource(cpc_node, 0, &res);
++ of_node_put(cpc_node);
+ if (!err)
+ return res.start;
+ }
+diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
+index a39ec755e4c24..750fe569862b6 100644
+--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
++++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
+@@ -1258,10 +1258,10 @@ fpu_emul:
+ " j 10b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -1333,10 +1333,10 @@ fpu_emul:
+ " j 10b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -1404,10 +1404,10 @@ fpu_emul:
+ " j 9b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -1474,10 +1474,10 @@ fpu_emul:
+ " j 9b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -1589,14 +1589,14 @@ fpu_emul:
+ " j 9b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
+- STR(PTR) " 5b,8b\n"
+- STR(PTR) " 6b,8b\n"
+- STR(PTR) " 7b,8b\n"
+- STR(PTR) " 0b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
++ STR(PTR_WD) " 5b,8b\n"
++ STR(PTR_WD) " 6b,8b\n"
++ STR(PTR_WD) " 7b,8b\n"
++ STR(PTR_WD) " 0b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -1708,14 +1708,14 @@ fpu_emul:
+ " j 9b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
+- STR(PTR) " 5b,8b\n"
+- STR(PTR) " 6b,8b\n"
+- STR(PTR) " 7b,8b\n"
+- STR(PTR) " 0b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
++ STR(PTR_WD) " 5b,8b\n"
++ STR(PTR_WD) " 6b,8b\n"
++ STR(PTR_WD) " 7b,8b\n"
++ STR(PTR_WD) " 0b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -1827,14 +1827,14 @@ fpu_emul:
+ " j 9b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
+- STR(PTR) " 5b,8b\n"
+- STR(PTR) " 6b,8b\n"
+- STR(PTR) " 7b,8b\n"
+- STR(PTR) " 0b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
++ STR(PTR_WD) " 5b,8b\n"
++ STR(PTR_WD) " 6b,8b\n"
++ STR(PTR_WD) " 7b,8b\n"
++ STR(PTR_WD) " 0b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -1945,14 +1945,14 @@ fpu_emul:
+ " j 9b\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,8b\n"
+- STR(PTR) " 2b,8b\n"
+- STR(PTR) " 3b,8b\n"
+- STR(PTR) " 4b,8b\n"
+- STR(PTR) " 5b,8b\n"
+- STR(PTR) " 6b,8b\n"
+- STR(PTR) " 7b,8b\n"
+- STR(PTR) " 0b,8b\n"
++ STR(PTR_WD) " 1b,8b\n"
++ STR(PTR_WD) " 2b,8b\n"
++ STR(PTR_WD) " 3b,8b\n"
++ STR(PTR_WD) " 4b,8b\n"
++ STR(PTR_WD) " 5b,8b\n"
++ STR(PTR_WD) " 6b,8b\n"
++ STR(PTR_WD) " 7b,8b\n"
++ STR(PTR_WD) " 0b,8b\n"
+ " .previous\n"
+ " .set pop\n"
+ : "+&r"(rt), "=&r"(rs),
+@@ -2007,7 +2007,7 @@ fpu_emul:
+ "j 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,3b\n"
++ STR(PTR_WD) " 1b,3b\n"
+ ".previous\n"
+ : "=&r"(res), "+&r"(err)
+ : "r"(vaddr), "i"(SIGSEGV)
+@@ -2065,7 +2065,7 @@ fpu_emul:
+ "j 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,3b\n"
++ STR(PTR_WD) " 1b,3b\n"
+ ".previous\n"
+ : "+&r"(res), "+&r"(err)
+ : "r"(vaddr), "i"(SIGSEGV));
+@@ -2126,7 +2126,7 @@ fpu_emul:
+ "j 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,3b\n"
++ STR(PTR_WD) " 1b,3b\n"
+ ".previous\n"
+ : "=&r"(res), "+&r"(err)
+ : "r"(vaddr), "i"(SIGSEGV)
+@@ -2189,7 +2189,7 @@ fpu_emul:
+ "j 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+- STR(PTR) " 1b,3b\n"
++ STR(PTR_WD) " 1b,3b\n"
+ ".previous\n"
+ : "+&r"(res), "+&r"(err)
+ : "r"(vaddr), "i"(SIGSEGV));
+diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
+index 4184d641f05e0..33a02f3814f58 100644
+--- a/arch/mips/kernel/proc.c
++++ b/arch/mips/kernel/proc.c
+@@ -172,7 +172,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
+ {
+ unsigned long i = *pos;
+
+- return i < NR_CPUS ? (void *) (i + 1) : NULL;
++ return i < nr_cpu_ids ? (void *) (i + 1) : NULL;
+ }
+
+ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
+index 12e58053544fc..2748c55820c24 100644
+--- a/arch/mips/kernel/r2300_fpu.S
++++ b/arch/mips/kernel/r2300_fpu.S
+@@ -23,14 +23,14 @@
+ #define EX(a,b) \
+ 9: a,##b; \
+ .section __ex_table,"a"; \
+- PTR 9b,fault; \
++ PTR_WD 9b,fault; \
+ .previous
+
+ #define EX2(a,b) \
+ 9: a,##b; \
+ .section __ex_table,"a"; \
+- PTR 9b,bad_stack; \
+- PTR 9b+4,bad_stack; \
++ PTR_WD 9b,fault; \
++ PTR_WD 9b+4,fault; \
+ .previous
+
+ .set mips1
+diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
+index b91e911064756..2e687c60bc4f1 100644
+--- a/arch/mips/kernel/r4k_fpu.S
++++ b/arch/mips/kernel/r4k_fpu.S
+@@ -31,7 +31,7 @@
+ .ex\@: \insn \reg, \src
+ .set pop
+ .section __ex_table,"a"
+- PTR .ex\@, fault
++ PTR_WD .ex\@, fault
+ .previous
+ .endm
+
+diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
+index f3c908abdbb80..f5b2ef979b437 100644
+--- a/arch/mips/kernel/relocate_kernel.S
++++ b/arch/mips/kernel/relocate_kernel.S
+@@ -145,12 +145,11 @@ LEAF(kexec_smp_wait)
+ * kexec_args[0..3] are used to prepare register values.
+ */
+
+-kexec_args:
+- EXPORT(kexec_args)
+-arg0: PTR 0x0
+-arg1: PTR 0x0
+-arg2: PTR 0x0
+-arg3: PTR 0x0
++EXPORT(kexec_args)
++arg0: PTR_WD 0x0
++arg1: PTR_WD 0x0
++arg2: PTR_WD 0x0
++arg3: PTR_WD 0x0
+ .size kexec_args,PTRSIZE*4
+
+ #ifdef CONFIG_SMP
+@@ -159,31 +158,27 @@ arg3: PTR 0x0
+ * their registers a0-a3. secondary_kexec_args[0..3] are used
+ * to prepare register values.
+ */
+-secondary_kexec_args:
+- EXPORT(secondary_kexec_args)
+-s_arg0: PTR 0x0
+-s_arg1: PTR 0x0
+-s_arg2: PTR 0x0
+-s_arg3: PTR 0x0
++EXPORT(secondary_kexec_args)
++s_arg0: PTR_WD 0x0
++s_arg1: PTR_WD 0x0
++s_arg2: PTR_WD 0x0
++s_arg3: PTR_WD 0x0
+ .size secondary_kexec_args,PTRSIZE*4
+ kexec_flag:
+ LONG 0x1
+
+ #endif
+
+-kexec_start_address:
+- EXPORT(kexec_start_address)
+- PTR 0x0
++EXPORT(kexec_start_address)
++ PTR_WD 0x0
+ .size kexec_start_address, PTRSIZE
+
+-kexec_indirection_page:
+- EXPORT(kexec_indirection_page)
+- PTR 0
++EXPORT(kexec_indirection_page)
++ PTR_WD 0
+ .size kexec_indirection_page, PTRSIZE
+
+ relocate_new_kernel_end:
+
+-relocate_new_kernel_size:
+- EXPORT(relocate_new_kernel_size)
+- PTR relocate_new_kernel_end - relocate_new_kernel
++EXPORT(relocate_new_kernel_size)
++ PTR_WD relocate_new_kernel_end - relocate_new_kernel
+ .size relocate_new_kernel_size, PTRSIZE
+diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
+index b1b2e106f7118..9bfce5f75f601 100644
+--- a/arch/mips/kernel/scall32-o32.S
++++ b/arch/mips/kernel/scall32-o32.S
+@@ -72,10 +72,10 @@ loads_done:
+ .set pop
+
+ .section __ex_table,"a"
+- PTR load_a4, bad_stack_a4
+- PTR load_a5, bad_stack_a5
+- PTR load_a6, bad_stack_a6
+- PTR load_a7, bad_stack_a7
++ PTR_WD load_a4, bad_stack_a4
++ PTR_WD load_a5, bad_stack_a5
++ PTR_WD load_a6, bad_stack_a6
++ PTR_WD load_a7, bad_stack_a7
+ .previous
+
+ lw t0, TI_FLAGS($28) # syscall tracing enabled?
+@@ -216,7 +216,7 @@ einval: li v0, -ENOSYS
+ #endif /* CONFIG_MIPS_MT_FPAFF */
+
+ #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+-#define __SYSCALL(nr, entry) PTR entry
++#define __SYSCALL(nr, entry) PTR_WD entry
+ .align 2
+ .type sys_call_table, @object
+ EXPORT(sys_call_table)
+diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
+index f650c55a17dc5..97456b2ca7dc3 100644
+--- a/arch/mips/kernel/scall64-n32.S
++++ b/arch/mips/kernel/scall64-n32.S
+@@ -101,7 +101,7 @@ not_n32_scall:
+
+ END(handle_sysn32)
+
+-#define __SYSCALL(nr, entry) PTR entry
++#define __SYSCALL(nr, entry) PTR_WD entry
+ .type sysn32_call_table, @object
+ EXPORT(sysn32_call_table)
+ #include <asm/syscall_table_n32.h>
+diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
+index 5d7bfc65e4d0b..5f6ed4b4c3993 100644
+--- a/arch/mips/kernel/scall64-n64.S
++++ b/arch/mips/kernel/scall64-n64.S
+@@ -109,7 +109,7 @@ illegal_syscall:
+ j n64_syscall_exit
+ END(handle_sys64)
+
+-#define __SYSCALL(nr, entry) PTR entry
++#define __SYSCALL(nr, entry) PTR_WD entry
+ .align 3
+ .type sys_call_table, @object
+ EXPORT(sys_call_table)
+diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
+index cedc8bd888046..d3c2616cba226 100644
+--- a/arch/mips/kernel/scall64-o32.S
++++ b/arch/mips/kernel/scall64-o32.S
+@@ -73,10 +73,10 @@ load_a7: lw a7, 28(t0) # argument #8 from usp
+ loads_done:
+
+ .section __ex_table,"a"
+- PTR load_a4, bad_stack_a4
+- PTR load_a5, bad_stack_a5
+- PTR load_a6, bad_stack_a6
+- PTR load_a7, bad_stack_a7
++ PTR_WD load_a4, bad_stack_a4
++ PTR_WD load_a5, bad_stack_a5
++ PTR_WD load_a6, bad_stack_a6
++ PTR_WD load_a7, bad_stack_a7
+ .previous
+
+ li t1, _TIF_WORK_SYSCALL_ENTRY
+@@ -214,7 +214,7 @@ einval: li v0, -ENOSYS
+ END(sys32_syscall)
+
+ #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
+-#define __SYSCALL(nr, entry) PTR entry
++#define __SYSCALL(nr, entry) PTR_WD entry
+ .align 3
+ .type sys32_call_table,@object
+ EXPORT(sys32_call_table)
+diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
+index f979adfd4fc20..145f905fb3623 100644
+--- a/arch/mips/kernel/setup.c
++++ b/arch/mips/kernel/setup.c
+@@ -11,6 +11,8 @@
+ * Copyright (C) 2000, 2001, 2002, 2007 Maciej W. Rozycki
+ */
+ #include <linux/init.h>
++#include <linux/cpu.h>
++#include <linux/delay.h>
+ #include <linux/ioport.h>
+ #include <linux/export.h>
+ #include <linux/screen_info.h>
+@@ -156,10 +158,6 @@ static unsigned long __init init_initrd(void)
+ pr_err("initrd start must be page aligned\n");
+ goto disable;
+ }
+- if (initrd_start < PAGE_OFFSET) {
+- pr_err("initrd start < PAGE_OFFSET\n");
+- goto disable;
+- }
+
+ /*
+ * Sanitize initrd addresses. For example firmware
+@@ -172,6 +170,11 @@ static unsigned long __init init_initrd(void)
+ initrd_end = (unsigned long)__va(end);
+ initrd_start = (unsigned long)__va(__pa(initrd_start));
+
++ if (initrd_start < PAGE_OFFSET) {
++ pr_err("initrd start < PAGE_OFFSET\n");
++ goto disable;
++ }
++
+ ROOT_DEV = Root_RAM0;
+ return PFN_UP(end);
+ disable:
+@@ -803,9 +806,20 @@ early_param("coherentio", setcoherentio);
+
+ static int __init setnocoherentio(char *str)
+ {
+- dma_default_coherent = true;
++ dma_default_coherent = false;
+ pr_info("Software DMA cache coherency (command line)\n");
+ return 0;
+ }
+ early_param("nocoherentio", setnocoherentio);
+ #endif
++
++void __init arch_cpu_finalize_init(void)
++{
++ unsigned int cpu = smp_processor_id();
++
++ cpu_data[cpu].udelay_val = loops_per_jiffy;
++ check_bugs32();
++
++ if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
++ check_bugs64();
++}
+diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
+index d542fb7af3ba2..1986d13094100 100644
+--- a/arch/mips/kernel/smp.c
++++ b/arch/mips/kernel/smp.c
+@@ -351,6 +351,9 @@ asmlinkage void start_secondary(void)
+ cpu = smp_processor_id();
+ cpu_data[cpu].udelay_val = loops_per_jiffy;
+
++ set_cpu_sibling_map(cpu);
++ set_cpu_core_map(cpu);
++
+ cpumask_set_cpu(cpu, &cpu_coherent_mask);
+ notify_cpu_starting(cpu);
+
+@@ -362,9 +365,6 @@ asmlinkage void start_secondary(void)
+ /* The CPU is running and counters synchronised, now mark it online */
+ set_cpu_online(cpu, true);
+
+- set_cpu_sibling_map(cpu);
+- set_cpu_core_map(cpu);
+-
+ calculate_cpu_foreign_map();
+
+ /*
+diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
+index 2afa3eef486a9..ae93a607ddf7e 100644
+--- a/arch/mips/kernel/syscall.c
++++ b/arch/mips/kernel/syscall.c
+@@ -122,8 +122,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
+ " j 3b \n"
+ " .previous \n"
+ " .section __ex_table,\"a\" \n"
+- " "STR(PTR)" 1b, 4b \n"
+- " "STR(PTR)" 2b, 4b \n"
++ " "STR(PTR_WD)" 1b, 4b \n"
++ " "STR(PTR_WD)" 2b, 4b \n"
+ " .previous \n"
+ " .set pop \n"
+ : [old] "=&r" (old),
+@@ -152,8 +152,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
+ " j 3b \n"
+ " .previous \n"
+ " .section __ex_table,\"a\" \n"
+- " "STR(PTR)" 1b, 5b \n"
+- " "STR(PTR)" 2b, 5b \n"
++ " "STR(PTR_WD)" 1b, 5b \n"
++ " "STR(PTR_WD)" 2b, 5b \n"
+ " .previous \n"
+ " .set pop \n"
+ : [old] "=&r" (old),
+@@ -240,12 +240,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op)
+ {
+ return -ENOSYS;
+ }
+-
+-/*
+- * If we ever come here the user sp is bad. Zap the process right away.
+- * Due to the bad stack signaling wouldn't work.
+- */
+-asmlinkage void bad_stack(void)
+-{
+- do_exit(SIGSEGV);
+-}
+diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
+index caa01457dce60..ed339d7979f3f 100644
+--- a/arch/mips/kernel/time.c
++++ b/arch/mips/kernel/time.c
+@@ -141,15 +141,10 @@ static __init int cpu_has_mfc0_count_bug(void)
+ case CPU_R4400MC:
+ /*
+ * The published errata for the R4400 up to 3.0 say the CPU
+- * has the mfc0 from count bug.
++ * has the mfc0 from count bug. This seems the last version
++ * produced.
+ */
+- if ((current_cpu_data.processor_id & 0xff) <= 0x30)
+- return 1;
+-
+- /*
+- * we assume newer revisions are ok
+- */
+- return 0;
++ return 1;
+ }
+
+ return 0;
+diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
+index 6f07362de5cec..afb2c955d99ef 100644
+--- a/arch/mips/kernel/traps.c
++++ b/arch/mips/kernel/traps.c
+@@ -416,7 +416,7 @@ void __noreturn die(const char *str, struct pt_regs *regs)
+ if (regs && kexec_should_crash(current))
+ crash_kexec(regs);
+
+- do_exit(sig);
++ make_task_dead(sig);
+ }
+
+ extern struct exception_table_entry __start___dbe_table[];
+@@ -2085,19 +2085,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs)
+ * If no shadow set is selected then use the default handler
+ * that does normal register saving and standard interrupt exit
+ */
+- extern char except_vec_vi, except_vec_vi_lui;
+- extern char except_vec_vi_ori, except_vec_vi_end;
+- extern char rollback_except_vec_vi;
+- char *vec_start = using_rollback_handler() ?
+- &rollback_except_vec_vi : &except_vec_vi;
++ extern const u8 except_vec_vi[], except_vec_vi_lui[];
++ extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
++ extern const u8 rollback_except_vec_vi[];
++ const u8 *vec_start = using_rollback_handler() ?
++ rollback_except_vec_vi : except_vec_vi;
+ #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
+- const int lui_offset = &except_vec_vi_lui - vec_start + 2;
+- const int ori_offset = &except_vec_vi_ori - vec_start + 2;
++ const int lui_offset = except_vec_vi_lui - vec_start + 2;
++ const int ori_offset = except_vec_vi_ori - vec_start + 2;
+ #else
+- const int lui_offset = &except_vec_vi_lui - vec_start;
+- const int ori_offset = &except_vec_vi_ori - vec_start;
++ const int lui_offset = except_vec_vi_lui - vec_start;
++ const int ori_offset = except_vec_vi_ori - vec_start;
+ #endif
+- const int handler_len = &except_vec_vi_end - vec_start;
++ const int handler_len = except_vec_vi_end - vec_start;
+
+ if (handler_len > VECTORSPACING) {
+ /*
+@@ -2305,7 +2305,7 @@ void per_cpu_trap_init(bool is_boot_cpu)
+ }
+
+ /* Install CPU exception handler */
+-void set_handler(unsigned long offset, void *addr, unsigned long size)
++void set_handler(unsigned long offset, const void *addr, unsigned long size)
+ {
+ #ifdef CONFIG_CPU_MICROMIPS
+ memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size);
+diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
+index 3d0cf471f2fe1..b2cc2c2dd4bfc 100644
+--- a/arch/mips/kernel/vdso.c
++++ b/arch/mips/kernel/vdso.c
+@@ -159,7 +159,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+ /* Map GIC user page. */
+ if (gic_size) {
+ gic_base = (unsigned long)mips_gic_base + MIPS_GIC_USER_OFS;
+- gic_pfn = virt_to_phys((void *)gic_base) >> PAGE_SHIFT;
++ gic_pfn = PFN_DOWN(__pa(gic_base));
+
+ ret = io_remap_pfn_range(vma, base, gic_pfn, gic_size,
+ pgprot_noncached(vma->vm_page_prot));
+diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
+index 1f98947fe715d..91d6a5360bb9c 100644
+--- a/arch/mips/kernel/vmlinux.lds.S
++++ b/arch/mips/kernel/vmlinux.lds.S
+@@ -15,6 +15,8 @@
+ #define EMITS_PT_NOTE
+ #endif
+
++#define RUNTIME_DISCARD_EXIT
++
+ #include <asm-generic/vmlinux.lds.h>
+
+ #undef mips
+diff --git a/arch/mips/kernel/vpe-cmp.c b/arch/mips/kernel/vpe-cmp.c
+index e673603e11e5d..92140edb3ce3e 100644
+--- a/arch/mips/kernel/vpe-cmp.c
++++ b/arch/mips/kernel/vpe-cmp.c
+@@ -75,7 +75,6 @@ ATTRIBUTE_GROUPS(vpe);
+
+ static void vpe_device_release(struct device *cd)
+ {
+- kfree(cd);
+ }
+
+ static struct class vpe_class = {
+@@ -157,6 +156,7 @@ out_dev:
+ device_del(&vpe_device);
+
+ out_class:
++ put_device(&vpe_device);
+ class_unregister(&vpe_class);
+
+ out_chrdev:
+@@ -169,7 +169,7 @@ void __exit vpe_module_exit(void)
+ {
+ struct vpe *v, *n;
+
+- device_del(&vpe_device);
++ device_unregister(&vpe_device);
+ class_unregister(&vpe_class);
+ unregister_chrdev(major, VPE_MODULE_NAME);
+
+diff --git a/arch/mips/kernel/vpe-mt.c b/arch/mips/kernel/vpe-mt.c
+index bad6b0891b2b5..84a82b551ec35 100644
+--- a/arch/mips/kernel/vpe-mt.c
++++ b/arch/mips/kernel/vpe-mt.c
+@@ -313,7 +313,6 @@ ATTRIBUTE_GROUPS(vpe);
+
+ static void vpe_device_release(struct device *cd)
+ {
+- kfree(cd);
+ }
+
+ static struct class vpe_class = {
+@@ -497,6 +496,7 @@ out_dev:
+ device_del(&vpe_device);
+
+ out_class:
++ put_device(&vpe_device);
+ class_unregister(&vpe_class);
+
+ out_chrdev:
+@@ -509,7 +509,7 @@ void __exit vpe_module_exit(void)
+ {
+ struct vpe *v, *n;
+
+- device_del(&vpe_device);
++ device_unregister(&vpe_device);
+ class_unregister(&vpe_class);
+ unregister_chrdev(major, VPE_MODULE_NAME);
+
+diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
+index 22e745e49b0ab..3e80b0b2deaab 100644
+--- a/arch/mips/kvm/emulate.c
++++ b/arch/mips/kvm/emulate.c
+@@ -312,7 +312,7 @@ int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
+ */
+ int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+
+ return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) ||
+ (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC);
+@@ -384,7 +384,7 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
+ */
+ static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ ktime_t expires, threshold;
+ u32 count, compare;
+ int running;
+@@ -444,7 +444,7 @@ static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
+ */
+ u32 kvm_mips_read_count(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+
+ /* If count disabled just read static copy of count */
+ if (kvm_mips_count_disabled(vcpu))
+@@ -502,7 +502,7 @@ ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
+ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
+ ktime_t now, u32 count)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ u32 compare;
+ u64 delta;
+ ktime_t expire;
+@@ -603,7 +603,7 @@ resume:
+ */
+ void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ ktime_t now;
+
+ /* Calculate bias */
+@@ -649,7 +649,7 @@ void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz)
+ */
+ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ int dc;
+ ktime_t now;
+ u32 count;
+@@ -696,7 +696,7 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
+ */
+ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ int dc;
+ u32 old_compare = kvm_read_c0_guest_compare(cop0);
+ s32 delta = compare - old_compare;
+@@ -779,7 +779,7 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
+ */
+ static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ u32 count;
+ ktime_t now;
+
+@@ -806,7 +806,7 @@ static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu)
+ */
+ void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+
+ kvm_set_c0_guest_cause(cop0, CAUSEF_DC);
+ if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC))
+@@ -826,7 +826,7 @@ void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu)
+ */
+ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ u32 count;
+
+ kvm_clear_c0_guest_cause(cop0, CAUSEF_DC);
+@@ -852,7 +852,7 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu)
+ */
+ int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ s64 changed = count_ctl ^ vcpu->arch.count_ctl;
+ s64 delta;
+ ktime_t expire, now;
+diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
+index 75c6f264c626c..6b15ac9786583 100644
+--- a/arch/mips/kvm/mips.c
++++ b/arch/mips/kvm/mips.c
+@@ -652,7 +652,7 @@ static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
+ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
+ int ret;
+ s64 v;
+@@ -764,7 +764,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
+ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
+ s64 v;
+ s64 vs[2];
+@@ -1104,7 +1104,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+ {
+ return kvm_mips_pending_timer(vcpu) ||
+- kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI;
++ kvm_read_c0_guest_cause(&vcpu->arch.cop0) & C_TI;
+ }
+
+ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
+@@ -1128,7 +1128,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
+ kvm_debug("\thi: 0x%08lx\n", vcpu->arch.hi);
+ kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo);
+
+- cop0 = vcpu->arch.cop0;
++ cop0 = &vcpu->arch.cop0;
+ kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n",
+ kvm_read_c0_guest_status(cop0),
+ kvm_read_c0_guest_cause(cop0));
+@@ -1250,7 +1250,7 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+
+ case EXCCODE_TLBS:
+ kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n",
+- cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
++ cause, kvm_read_c0_guest_status(&vcpu->arch.cop0), opc,
+ badvaddr);
+
+ ++vcpu->stat.tlbmiss_st_exits;
+@@ -1322,7 +1322,7 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+ kvm_get_badinstr(opc, vcpu, &inst);
+ kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
+ exccode, opc, inst, badvaddr,
+- kvm_read_c0_guest_status(vcpu->arch.cop0));
++ kvm_read_c0_guest_status(&vcpu->arch.cop0));
+ kvm_arch_vcpu_dump_regs(vcpu);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+@@ -1384,7 +1384,7 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+ /* Enable FPU for guest and restore context */
+ void kvm_own_fpu(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ unsigned int sr, cfg5;
+
+ preempt_disable();
+@@ -1428,7 +1428,7 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
+ /* Enable MSA for guest and restore context */
+ void kvm_own_msa(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ unsigned int sr, cfg5;
+
+ preempt_disable();
+diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c
+index 53f851a615542..3e6682018fbe6 100644
+--- a/arch/mips/kvm/stats.c
++++ b/arch/mips/kvm/stats.c
+@@ -54,9 +54,9 @@ void kvm_mips_dump_stats(struct kvm_vcpu *vcpu)
+ kvm_info("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id);
+ for (i = 0; i < N_MIPS_COPROC_REGS; i++) {
+ for (j = 0; j < N_MIPS_COPROC_SEL; j++) {
+- if (vcpu->arch.cop0->stat[i][j])
++ if (vcpu->arch.cop0.stat[i][j])
+ kvm_info("%s[%d]: %lu\n", kvm_cop0_str[i], j,
+- vcpu->arch.cop0->stat[i][j]);
++ vcpu->arch.cop0.stat[i][j]);
+ }
+ }
+ #endif
+diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h
+index a8c7fd7bf6d26..136c3535a1cbb 100644
+--- a/arch/mips/kvm/trace.h
++++ b/arch/mips/kvm/trace.h
+@@ -322,11 +322,11 @@ TRACE_EVENT_FN(kvm_guest_mode_change,
+ ),
+
+ TP_fast_assign(
+- __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0);
++ __entry->epc = kvm_read_c0_guest_epc(&vcpu->arch.cop0);
+ __entry->pc = vcpu->arch.pc;
+- __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0);
+- __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0);
+- __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0);
++ __entry->badvaddr = kvm_read_c0_guest_badvaddr(&vcpu->arch.cop0);
++ __entry->status = kvm_read_c0_guest_status(&vcpu->arch.cop0);
++ __entry->cause = kvm_read_c0_guest_cause(&vcpu->arch.cop0);
+ ),
+
+ TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx",
+diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
+index 4adca5abbc72d..717f883333164 100644
+--- a/arch/mips/kvm/vz.c
++++ b/arch/mips/kvm/vz.c
+@@ -422,7 +422,7 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
+ */
+ static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ u32 cause, compare;
+
+ compare = kvm_read_sw_gc0_compare(cop0);
+@@ -517,7 +517,7 @@ static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu,
+ */
+ static void kvm_vz_save_timer(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ u32 gctl0, compare, cause;
+
+ gctl0 = read_c0_guestctl0();
+@@ -863,7 +863,7 @@ static unsigned long mips_process_maar(unsigned int op, unsigned long val)
+
+ static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+
+ val &= MIPS_MAARI_INDEX;
+ if (val == MIPS_MAARI_INDEX)
+@@ -876,7 +876,7 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
+ u32 *opc, u32 cause,
+ struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ enum emulation_result er = EMULATE_DONE;
+ u32 rt, rd, sel;
+ unsigned long curr_pc;
+@@ -1905,7 +1905,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 *v)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ unsigned int idx;
+
+ switch (reg->id) {
+@@ -2075,7 +2075,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
+ case KVM_REG_MIPS_CP0_MAARI:
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+- *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0);
++ *v = kvm_read_sw_gc0_maari(&vcpu->arch.cop0);
+ break;
+ #ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXT:
+@@ -2129,7 +2129,7 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 v)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ unsigned int idx;
+ int ret = 0;
+ unsigned int cur, change;
+@@ -2556,7 +2556,7 @@ static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu)
+
+ static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ bool migrated, all;
+
+ /*
+@@ -2698,7 +2698,7 @@ static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+
+ static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+
+ if (current->flags & PF_VCPU)
+ kvm_vz_vcpu_save_wired(vcpu);
+@@ -3070,7 +3070,7 @@ static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu)
+
+ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu)
+ {
+- struct mips_coproc *cop0 = vcpu->arch.cop0;
++ struct mips_coproc *cop0 = &vcpu->arch.cop0;
+ unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */
+
+ /*
+diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
+index dd819e31fcbbf..2d5a0bcb0cec1 100644
+--- a/arch/mips/lantiq/clk.c
++++ b/arch/mips/lantiq/clk.c
+@@ -50,6 +50,7 @@ struct clk *clk_get_io(void)
+ {
+ return &cpu_clk_generic[2];
+ }
++EXPORT_SYMBOL_GPL(clk_get_io);
+
+ struct clk *clk_get_ppe(void)
+ {
+@@ -158,6 +159,18 @@ void clk_deactivate(struct clk *clk)
+ }
+ EXPORT_SYMBOL(clk_deactivate);
+
++struct clk *clk_get_parent(struct clk *clk)
++{
++ return NULL;
++}
++EXPORT_SYMBOL(clk_get_parent);
++
++int clk_set_parent(struct clk *clk, struct clk *parent)
++{
++ return 0;
++}
++EXPORT_SYMBOL(clk_set_parent);
++
+ static inline u32 get_counter_resolution(void)
+ {
+ u32 res;
+diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c
+index 42222f849bd25..446a2536999bf 100644
+--- a/arch/mips/lantiq/falcon/sysctrl.c
++++ b/arch/mips/lantiq/falcon/sysctrl.c
+@@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module,
+ {
+ struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL);
+
++ if (!clk)
++ return;
+ clk->cl.dev_id = dev;
+ clk->cl.con_id = NULL;
+ clk->cl.clk = clk;
+diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
+index 63dccb2ed08b2..53fcc672a2944 100644
+--- a/arch/mips/lantiq/xway/dma.c
++++ b/arch/mips/lantiq/xway/dma.c
+@@ -11,6 +11,7 @@
+ #include <linux/export.h>
+ #include <linux/spinlock.h>
+ #include <linux/clk.h>
++#include <linux/delay.h>
+ #include <linux/err.h>
+ #include <linux/of.h>
+
+@@ -30,6 +31,7 @@
+ #define LTQ_DMA_PCTRL 0x44
+ #define LTQ_DMA_IRNEN 0xf4
+
++#define DMA_ID_CHNR GENMASK(26, 20) /* channel number */
+ #define DMA_DESCPT BIT(3) /* descriptor complete irq */
+ #define DMA_TX BIT(8) /* TX channel direction */
+ #define DMA_CHAN_ON BIT(0) /* channel on / off bit */
+@@ -39,8 +41,11 @@
+ #define DMA_IRQ_ACK 0x7e /* IRQ status register */
+ #define DMA_POLL BIT(31) /* turn on channel polling */
+ #define DMA_CLK_DIV4 BIT(6) /* polling clock divider */
+-#define DMA_2W_BURST BIT(1) /* 2 word burst length */
+-#define DMA_MAX_CHANNEL 20 /* the soc has 20 channels */
++#define DMA_PCTRL_2W_BURST 0x1 /* 2 word burst length */
++#define DMA_PCTRL_4W_BURST 0x2 /* 4 word burst length */
++#define DMA_PCTRL_8W_BURST 0x3 /* 8 word burst length */
++#define DMA_TX_BURST_SHIFT 4 /* tx burst shift */
++#define DMA_RX_BURST_SHIFT 2 /* rx burst shift */
+ #define DMA_ETOP_ENDIANNESS (0xf << 8) /* endianness swap etop channels */
+ #define DMA_WEIGHT (BIT(17) | BIT(16)) /* default channel wheight */
+
+@@ -191,7 +196,8 @@ ltq_dma_init_port(int p)
+ break;
+
+ case DMA_PORT_DEU:
+- ltq_dma_w32((DMA_2W_BURST << 4) | (DMA_2W_BURST << 2),
++ ltq_dma_w32((DMA_PCTRL_2W_BURST << DMA_TX_BURST_SHIFT) |
++ (DMA_PCTRL_2W_BURST << DMA_RX_BURST_SHIFT),
+ LTQ_DMA_PCTRL);
+ break;
+
+@@ -206,7 +212,7 @@ ltq_dma_init(struct platform_device *pdev)
+ {
+ struct clk *clk;
+ struct resource *res;
+- unsigned id;
++ unsigned int id, nchannels;
+ int i;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+@@ -222,21 +228,24 @@ ltq_dma_init(struct platform_device *pdev)
+ clk_enable(clk);
+ ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL);
+
++ usleep_range(1, 10);
++
+ /* disable all interrupts */
+ ltq_dma_w32(0, LTQ_DMA_IRNEN);
+
+ /* reset/configure each channel */
+- for (i = 0; i < DMA_MAX_CHANNEL; i++) {
++ id = ltq_dma_r32(LTQ_DMA_ID);
++ nchannels = ((id & DMA_ID_CHNR) >> 20);
++ for (i = 0; i < nchannels; i++) {
+ ltq_dma_w32(i, LTQ_DMA_CS);
+ ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL);
+ ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL);
+ ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+ }
+
+- id = ltq_dma_r32(LTQ_DMA_ID);
+ dev_info(&pdev->dev,
+ "Init done - hw rev: %X, ports: %d, channels: %d\n",
+- id & 0x1f, (id >> 16) & 0xf, id >> 20);
++ id & 0x1f, (id >> 16) & 0xf, nchannels);
+
+ return 0;
+ }
+diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c
+index 3d5683e75cf1e..200fe9ff641d6 100644
+--- a/arch/mips/lantiq/xway/gptu.c
++++ b/arch/mips/lantiq/xway/gptu.c
+@@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con,
+ {
+ struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL);
+
++ if (!clk)
++ return;
+ clk->cl.dev_id = dev_name(dev);
+ clk->cl.con_id = con;
+ clk->cl.clk = clk;
+diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
+index 917fac1636b71..084f6caba5f23 100644
+--- a/arch/mips/lantiq/xway/sysctrl.c
++++ b/arch/mips/lantiq/xway/sysctrl.c
+@@ -315,6 +315,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate,
+ {
+ struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL);
+
++ if (!clk)
++ return;
+ clk->cl.dev_id = dev;
+ clk->cl.con_id = con;
+ clk->cl.clk = clk;
+@@ -338,6 +340,8 @@ static void clkdev_add_cgu(const char *dev, const char *con,
+ {
+ struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL);
+
++ if (!clk)
++ return;
+ clk->cl.dev_id = dev;
+ clk->cl.con_id = con;
+ clk->cl.clk = clk;
+@@ -356,24 +360,28 @@ static void clkdev_add_pci(void)
+ struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL);
+
+ /* main pci clock */
+- clk->cl.dev_id = "17000000.pci";
+- clk->cl.con_id = NULL;
+- clk->cl.clk = clk;
+- clk->rate = CLOCK_33M;
+- clk->rates = valid_pci_rates;
+- clk->enable = pci_enable;
+- clk->disable = pmu_disable;
+- clk->module = 0;
+- clk->bits = PMU_PCI;
+- clkdev_add(&clk->cl);
++ if (clk) {
++ clk->cl.dev_id = "17000000.pci";
++ clk->cl.con_id = NULL;
++ clk->cl.clk = clk;
++ clk->rate = CLOCK_33M;
++ clk->rates = valid_pci_rates;
++ clk->enable = pci_enable;
++ clk->disable = pmu_disable;
++ clk->module = 0;
++ clk->bits = PMU_PCI;
++ clkdev_add(&clk->cl);
++ }
+
+ /* use internal/external bus clock */
+- clk_ext->cl.dev_id = "17000000.pci";
+- clk_ext->cl.con_id = "external";
+- clk_ext->cl.clk = clk_ext;
+- clk_ext->enable = pci_ext_enable;
+- clk_ext->disable = pci_ext_disable;
+- clkdev_add(&clk_ext->cl);
++ if (clk_ext) {
++ clk_ext->cl.dev_id = "17000000.pci";
++ clk_ext->cl.con_id = "external";
++ clk_ext->cl.clk = clk_ext;
++ clk_ext->enable = pci_ext_enable;
++ clk_ext->disable = pci_ext_disable;
++ clkdev_add(&clk_ext->cl);
++ }
+ }
+
+ /* xway socs can generate clocks on gpio pins */
+@@ -393,9 +401,15 @@ static void clkdev_add_clkout(void)
+ char *name;
+
+ name = kzalloc(sizeof("clkout0"), GFP_KERNEL);
++ if (!name)
++ continue;
+ sprintf(name, "clkout%d", i);
+
+ clk = kzalloc(sizeof(struct clk), GFP_KERNEL);
++ if (!clk) {
++ kfree(name);
++ continue;
++ }
+ clk->cl.dev_id = "1f103000.cgu";
+ clk->cl.con_id = name;
+ clk->cl.clk = clk;
+diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
+index a46db08071953..7767137c3e49a 100644
+--- a/arch/mips/lib/csum_partial.S
++++ b/arch/mips/lib/csum_partial.S
+@@ -347,7 +347,7 @@ EXPORT_SYMBOL(csum_partial)
+ .if \mode == LEGACY_MODE; \
+ 9: insn reg, addr; \
+ .section __ex_table,"a"; \
+- PTR 9b, .L_exc; \
++ PTR_WD 9b, .L_exc; \
+ .previous; \
+ /* This is enabled in EVA mode */ \
+ .else; \
+@@ -356,7 +356,7 @@ EXPORT_SYMBOL(csum_partial)
+ ((\to == USEROP) && (type == ST_INSN)); \
+ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \
+ .section __ex_table,"a"; \
+- PTR 9b, .L_exc; \
++ PTR_WD 9b, .L_exc; \
+ .previous; \
+ .else; \
+ /* EVA without exception */ \
+diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
+index 277c32296636d..18a43f2e29c81 100644
+--- a/arch/mips/lib/memcpy.S
++++ b/arch/mips/lib/memcpy.S
+@@ -116,7 +116,7 @@
+ .if \mode == LEGACY_MODE; \
+ 9: insn reg, addr; \
+ .section __ex_table,"a"; \
+- PTR 9b, handler; \
++ PTR_WD 9b, handler; \
+ .previous; \
+ /* This is assembled in EVA mode */ \
+ .else; \
+@@ -125,7 +125,7 @@
+ ((\to == USEROP) && (type == ST_INSN)); \
+ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \
+ .section __ex_table,"a"; \
+- PTR 9b, handler; \
++ PTR_WD 9b, handler; \
+ .previous; \
+ .else; \
+ /* \
+diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
+index b0baa3c79fad0..0b342bae9a98c 100644
+--- a/arch/mips/lib/memset.S
++++ b/arch/mips/lib/memset.S
+@@ -52,7 +52,7 @@
+ 9: ___BUILD_EVA_INSN(insn, reg, addr); \
+ .endif; \
+ .section __ex_table,"a"; \
+- PTR 9b, handler; \
++ PTR_WD 9b, handler; \
+ .previous
+
+ .macro f_fill64 dst, offset, val, fixup, mode
+diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
+index 556acf684d7be..13aaa9927ad12 100644
+--- a/arch/mips/lib/strncpy_user.S
++++ b/arch/mips/lib/strncpy_user.S
+@@ -15,7 +15,7 @@
+ #define EX(insn,reg,addr,handler) \
+ 9: insn reg, addr; \
+ .section __ex_table,"a"; \
+- PTR 9b, handler; \
++ PTR_WD 9b, handler; \
+ .previous
+
+ /*
+@@ -59,7 +59,7 @@ LEAF(__strncpy_from_user_asm)
+ jr ra
+
+ .section __ex_table,"a"
+- PTR 1b, .Lfault
++ PTR_WD 1b, .Lfault
+ .previous
+
+ EXPORT_SYMBOL(__strncpy_from_user_asm)
+diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
+index 92b63f20ec05f..6de31b616f9c1 100644
+--- a/arch/mips/lib/strnlen_user.S
++++ b/arch/mips/lib/strnlen_user.S
+@@ -14,7 +14,7 @@
+ #define EX(insn,reg,addr,handler) \
+ 9: insn reg, addr; \
+ .section __ex_table,"a"; \
+- PTR 9b, handler; \
++ PTR_WD 9b, handler; \
+ .previous
+
+ /*
+diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c
+index 794c96c2a4cdd..311dc1580bbde 100644
+--- a/arch/mips/loongson32/common/platform.c
++++ b/arch/mips/loongson32/common/platform.c
+@@ -98,7 +98,7 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ if (plat_dat->bus_id) {
+ __raw_writel(__raw_readl(LS1X_MUX_CTRL0) | GMAC1_USE_UART1 |
+ GMAC1_USE_UART0, LS1X_MUX_CTRL0);
+- switch (plat_dat->interface) {
++ switch (plat_dat->phy_interface) {
+ case PHY_INTERFACE_MODE_RGMII:
+ val &= ~(GMAC1_USE_TXCLK | GMAC1_USE_PWM23);
+ break;
+@@ -107,12 +107,12 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ break;
+ default:
+ pr_err("unsupported mii mode %d\n",
+- plat_dat->interface);
++ plat_dat->phy_interface);
+ return -ENOTSUPP;
+ }
+ val &= ~GMAC1_SHUT;
+ } else {
+- switch (plat_dat->interface) {
++ switch (plat_dat->phy_interface) {
+ case PHY_INTERFACE_MODE_RGMII:
+ val &= ~(GMAC0_USE_TXCLK | GMAC0_USE_PWM01);
+ break;
+@@ -121,7 +121,7 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ break;
+ default:
+ pr_err("unsupported mii mode %d\n",
+- plat_dat->interface);
++ plat_dat->phy_interface);
+ return -ENOTSUPP;
+ }
+ val &= ~GMAC0_SHUT;
+@@ -131,7 +131,7 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ plat_dat = dev_get_platdata(&pdev->dev);
+
+ val &= ~PHY_INTF_SELI;
+- if (plat_dat->interface == PHY_INTERFACE_MODE_RMII)
++ if (plat_dat->phy_interface == PHY_INTERFACE_MODE_RMII)
+ val |= 0x4 << PHY_INTF_SELI_SHIFT;
+ __raw_writel(val, LS1X_MUX_CTRL1);
+
+@@ -146,9 +146,9 @@ static struct plat_stmmacenet_data ls1x_eth0_pdata = {
+ .bus_id = 0,
+ .phy_addr = -1,
+ #if defined(CONFIG_LOONGSON1_LS1B)
+- .interface = PHY_INTERFACE_MODE_MII,
++ .phy_interface = PHY_INTERFACE_MODE_MII,
+ #elif defined(CONFIG_LOONGSON1_LS1C)
+- .interface = PHY_INTERFACE_MODE_RMII,
++ .phy_interface = PHY_INTERFACE_MODE_RMII,
+ #endif
+ .mdio_bus_data = &ls1x_mdio_bus_data,
+ .dma_cfg = &ls1x_eth_dma_cfg,
+@@ -186,7 +186,7 @@ struct platform_device ls1x_eth0_pdev = {
+ static struct plat_stmmacenet_data ls1x_eth1_pdata = {
+ .bus_id = 1,
+ .phy_addr = -1,
+- .interface = PHY_INTERFACE_MODE_MII,
++ .phy_interface = PHY_INTERFACE_MODE_MII,
+ .mdio_bus_data = &ls1x_mdio_bus_data,
+ .dma_cfg = &ls1x_eth_dma_cfg,
+ .has_gmac = 1,
+diff --git a/arch/mips/loongson32/ls1c/board.c b/arch/mips/loongson32/ls1c/board.c
+index e9de6da0ce51f..9dcfe9de55b0a 100644
+--- a/arch/mips/loongson32/ls1c/board.c
++++ b/arch/mips/loongson32/ls1c/board.c
+@@ -15,7 +15,6 @@ static struct platform_device *ls1c_platform_devices[] __initdata = {
+ static int __init ls1c_platform_init(void)
+ {
+ ls1x_serial_set_uartclk(&ls1x_uart_pdev);
+- ls1x_rtc_set_extclk(&ls1x_rtc_pdev);
+
+ return platform_add_devices(ls1c_platform_devices,
+ ARRAY_SIZE(ls1c_platform_devices));
+diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c
+index 758d5d26aaaa2..e420800043b08 100644
+--- a/arch/mips/loongson64/reset.c
++++ b/arch/mips/loongson64/reset.c
+@@ -16,6 +16,7 @@
+ #include <asm/bootinfo.h>
+ #include <asm/idle.h>
+ #include <asm/reboot.h>
++#include <asm/bug.h>
+
+ #include <loongson.h>
+ #include <boot_param.h>
+@@ -159,8 +160,17 @@ static int __init mips_reboot_setup(void)
+
+ #ifdef CONFIG_KEXEC
+ kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL);
++ if (WARN_ON(!kexec_argv))
++ return -ENOMEM;
++
+ kdump_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL);
++ if (WARN_ON(!kdump_argv))
++ return -ENOMEM;
++
+ kexec_envp = kmalloc(KEXEC_ENVP_SIZE, GFP_KERNEL);
++ if (WARN_ON(!kexec_envp))
++ return -ENOMEM;
++
+ fw_arg1 = KEXEC_ARGV_ADDR;
+ memcpy(kexec_envp, (void *)fw_arg2, KEXEC_ENVP_SIZE);
+
+diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c
+index 9a29e94d3db1d..3115d4de982c5 100644
+--- a/arch/mips/loongson64/vbios_quirk.c
++++ b/arch/mips/loongson64/vbios_quirk.c
+@@ -3,7 +3,7 @@
+ #include <linux/pci.h>
+ #include <loongson.h>
+
+-static void pci_fixup_radeon(struct pci_dev *pdev)
++static void pci_fixup_video(struct pci_dev *pdev)
+ {
+ struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+
+@@ -22,8 +22,7 @@ static void pci_fixup_radeon(struct pci_dev *pdev)
+ res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+ IORESOURCE_PCI_FIXED;
+
+- dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n",
+- PCI_ROM_RESOURCE, res);
++ dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", res);
+ }
+-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615,
+- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon);
++DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, 0x9615,
++ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+diff --git a/arch/mips/mm/physaddr.c b/arch/mips/mm/physaddr.c
+index a1ced5e449511..f9b8c85e98433 100644
+--- a/arch/mips/mm/physaddr.c
++++ b/arch/mips/mm/physaddr.c
+@@ -5,6 +5,7 @@
+ #include <linux/mmdebug.h>
+ #include <linux/mm.h>
+
++#include <asm/addrspace.h>
+ #include <asm/sections.h>
+ #include <asm/io.h>
+ #include <asm/page.h>
+@@ -12,15 +13,6 @@
+
+ static inline bool __debug_virt_addr_valid(unsigned long x)
+ {
+- /* high_memory does not get immediately defined, and there
+- * are early callers of __pa() against PAGE_OFFSET
+- */
+- if (!high_memory && x >= PAGE_OFFSET)
+- return true;
+-
+- if (high_memory && x >= PAGE_OFFSET && x < (unsigned long)high_memory)
+- return true;
+-
+ /*
+ * MAX_DMA_ADDRESS is a virtual address that may not correspond to an
+ * actual physical address. Enough code relies on
+@@ -30,7 +22,9 @@ static inline bool __debug_virt_addr_valid(unsigned long x)
+ if (x == MAX_DMA_ADDRESS)
+ return true;
+
+- return false;
++ return x >= PAGE_OFFSET && (KSEGX(x) < KSEG2 ||
++ IS_ENABLED(CONFIG_EVA) ||
++ !IS_ENABLED(CONFIG_HIGHMEM));
+ }
+
+ phys_addr_t __virt_to_phys(volatile const void *x)
+diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
+index 9adad24c2e65e..3471a089bc05f 100644
+--- a/arch/mips/mm/tlbex.c
++++ b/arch/mips/mm/tlbex.c
+@@ -634,7 +634,7 @@ static __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
+ return;
+ }
+
+- if (cpu_has_rixi && !!_PAGE_NO_EXEC) {
++ if (cpu_has_rixi && _PAGE_NO_EXEC != 0) {
+ if (fill_includes_sw_bits) {
+ UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL));
+ } else {
+@@ -2167,16 +2167,14 @@ static void build_r4000_tlb_load_handler(void)
+ uasm_i_tlbr(&p);
+
+ switch (current_cpu_type()) {
+- default:
+- if (cpu_has_mips_r2_exec_hazard) {
+- uasm_i_ehb(&p);
+- fallthrough;
+-
+ case CPU_CAVIUM_OCTEON:
+ case CPU_CAVIUM_OCTEON_PLUS:
+ case CPU_CAVIUM_OCTEON2:
+- break;
+- }
++ break;
++ default:
++ if (cpu_has_mips_r2_exec_hazard)
++ uasm_i_ehb(&p);
++ break;
+ }
+
+ /* Examine entrylo 0 or 1 based on ptr. */
+@@ -2243,15 +2241,14 @@ static void build_r4000_tlb_load_handler(void)
+ uasm_i_tlbr(&p);
+
+ switch (current_cpu_type()) {
+- default:
+- if (cpu_has_mips_r2_exec_hazard) {
+- uasm_i_ehb(&p);
+-
+ case CPU_CAVIUM_OCTEON:
+ case CPU_CAVIUM_OCTEON_PLUS:
+ case CPU_CAVIUM_OCTEON2:
+- break;
+- }
++ break;
++ default:
++ if (cpu_has_mips_r2_exec_hazard)
++ uasm_i_ehb(&p);
++ break;
+ }
+
+ /* Examine entrylo 0 or 1 based on ptr. */
+@@ -2576,7 +2573,7 @@ static void check_pabits(void)
+ unsigned long entry;
+ unsigned pabits, fillbits;
+
+- if (!cpu_has_rixi || !_PAGE_NO_EXEC) {
++ if (!cpu_has_rixi || _PAGE_NO_EXEC == 0) {
+ /*
+ * We'll only be making use of the fact that we can rotate bits
+ * into the fill if the CPU supports RIXI, so don't bother
+diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c
+index 25372e62783b5..3cd1b408fa1cb 100644
+--- a/arch/mips/pic32/pic32mzda/early_console.c
++++ b/arch/mips/pic32/pic32mzda/early_console.c
+@@ -27,7 +27,7 @@
+ #define U_BRG(x) (UART_BASE(x) + 0x40)
+
+ static void __iomem *uart_base;
+-static char console_port = -1;
++static int console_port = -1;
+
+ static int __init configure_uart_pins(int port)
+ {
+@@ -47,7 +47,7 @@ static int __init configure_uart_pins(int port)
+ return 0;
+ }
+
+-static void __init configure_uart(char port, int baud)
++static void __init configure_uart(int port, int baud)
+ {
+ u32 pbclk;
+
+@@ -60,7 +60,7 @@ static void __init configure_uart(char port, int baud)
+ uart_base + PIC32_SET(U_STA(port)));
+ }
+
+-static void __init setup_early_console(char port, int baud)
++static void __init setup_early_console(int port, int baud)
+ {
+ if (configure_uart_pins(port))
+ return;
+@@ -130,16 +130,15 @@ _out:
+ return baud;
+ }
+
+-void __init fw_init_early_console(char port)
++void __init fw_init_early_console(void)
+ {
+ char *arch_cmdline = pic32_getcmdline();
+- int baud = -1;
++ int baud, port;
+
+ uart_base = ioremap(PIC32_BASE_UART, 0xc00);
+
+ baud = get_baud_from_cmdline(arch_cmdline);
+- if (port == -1)
+- port = get_port_from_cmdline(arch_cmdline);
++ port = get_port_from_cmdline(arch_cmdline);
+
+ if (port == -1)
+ port = EARLY_CONSOLE_PORT;
+diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c
+index 764f2d022fae4..429830afff54f 100644
+--- a/arch/mips/pic32/pic32mzda/init.c
++++ b/arch/mips/pic32/pic32mzda/init.c
+@@ -47,7 +47,7 @@ void __init plat_mem_setup(void)
+ strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+
+ #ifdef CONFIG_EARLY_PRINTK
+- fw_init_early_console(-1);
++ fw_init_early_console();
+ #endif
+ pic32_config_init();
+ }
+diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
+index bdf53807d7c2b..bea857c9da8b7 100644
+--- a/arch/mips/ralink/ill_acc.c
++++ b/arch/mips/ralink/ill_acc.c
+@@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void)
+ pdev = of_find_device_by_node(np);
+ if (!pdev) {
+ pr_err("%pOFn: failed to lookup pdev\n", np);
++ of_node_put(np);
+ return -EINVAL;
+ }
+
+diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
+index bd71f5b142383..0db23bcf2a970 100644
+--- a/arch/mips/ralink/mt7621.c
++++ b/arch/mips/ralink/mt7621.c
+@@ -20,31 +20,42 @@
+
+ #include "common.h"
+
+-static void *detect_magic __initdata = detect_memory_region;
++#define MT7621_MEM_TEST_PATTERN 0xaa5555aa
++
++static u32 detect_magic __initdata;
++static struct ralink_soc_info *soc_info_ptr;
+
+ phys_addr_t mips_cpc_default_phys_base(void)
+ {
+ panic("Cannot detect cpc address");
+ }
+
++static bool __init mt7621_addr_wraparound_test(phys_addr_t size)
++{
++ void *dm = (void *)KSEG1ADDR(&detect_magic);
++
++ if (CPHYSADDR(dm + size) >= MT7621_LOWMEM_MAX_SIZE)
++ return true;
++ __raw_writel(MT7621_MEM_TEST_PATTERN, dm);
++ if (__raw_readl(dm) != __raw_readl(dm + size))
++ return false;
++ __raw_writel(~MT7621_MEM_TEST_PATTERN, dm);
++ return __raw_readl(dm) == __raw_readl(dm + size);
++}
++
+ static void __init mt7621_memory_detect(void)
+ {
+- void *dm = &detect_magic;
+ phys_addr_t size;
+
+- for (size = 32 * SZ_1M; size < 256 * SZ_1M; size <<= 1) {
+- if (!__builtin_memcmp(dm, dm + size, sizeof(detect_magic)))
+- break;
++ for (size = 32 * SZ_1M; size <= 256 * SZ_1M; size <<= 1) {
++ if (mt7621_addr_wraparound_test(size)) {
++ memblock_add(MT7621_LOWMEM_BASE, size);
++ return;
++ }
+ }
+
+- if ((size == 256 * SZ_1M) &&
+- (CPHYSADDR(dm + size) < MT7621_LOWMEM_MAX_SIZE) &&
+- __builtin_memcmp(dm, dm + size, sizeof(detect_magic))) {
+- memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE);
+- memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE);
+- } else {
+- memblock_add(MT7621_LOWMEM_BASE, size);
+- }
++ memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE);
++ memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE);
+ }
+
+ void __init ralink_of_remap(void)
+@@ -56,41 +67,83 @@ void __init ralink_of_remap(void)
+ panic("Failed to remap core resources");
+ }
+
+-static void soc_dev_init(struct ralink_soc_info *soc_info, u32 rev)
++static unsigned int __init mt7621_get_soc_name0(void)
++{
++ return __raw_readl(MT7621_SYSC_BASE + SYSC_REG_CHIP_NAME0);
++}
++
++static unsigned int __init mt7621_get_soc_name1(void)
++{
++ return __raw_readl(MT7621_SYSC_BASE + SYSC_REG_CHIP_NAME1);
++}
++
++static bool __init mt7621_soc_valid(void)
++{
++ if (mt7621_get_soc_name0() == MT7621_CHIP_NAME0 &&
++ mt7621_get_soc_name1() == MT7621_CHIP_NAME1)
++ return true;
++ else
++ return false;
++}
++
++static const char __init *mt7621_get_soc_id(void)
++{
++ if (mt7621_soc_valid())
++ return "MT7621";
++ else
++ return "invalid";
++}
++
++static unsigned int __init mt7621_get_soc_rev(void)
++{
++ return __raw_readl(MT7621_SYSC_BASE + SYSC_REG_CHIP_REV);
++}
++
++static unsigned int __init mt7621_get_soc_ver(void)
++{
++ return (mt7621_get_soc_rev() >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK;
++}
++
++static unsigned int __init mt7621_get_soc_eco(void)
++{
++ return (mt7621_get_soc_rev() & CHIP_REV_ECO_MASK);
++}
++
++static const char __init *mt7621_get_soc_revision(void)
++{
++ if (mt7621_get_soc_rev() == 1 && mt7621_get_soc_eco() == 1)
++ return "E2";
++ else
++ return "E1";
++}
++
++static int __init mt7621_soc_dev_init(void)
+ {
+ struct soc_device *soc_dev;
+ struct soc_device_attribute *soc_dev_attr;
+
+ soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
+ if (!soc_dev_attr)
+- return;
++ return -ENOMEM;
+
+ soc_dev_attr->soc_id = "mt7621";
+ soc_dev_attr->family = "Ralink";
++ soc_dev_attr->revision = mt7621_get_soc_revision();
+
+- if (((rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK) == 1 &&
+- (rev & CHIP_REV_ECO_MASK) == 1)
+- soc_dev_attr->revision = "E2";
+- else
+- soc_dev_attr->revision = "E1";
+-
+- soc_dev_attr->data = soc_info;
++ soc_dev_attr->data = soc_info_ptr;
+
+ soc_dev = soc_device_register(soc_dev_attr);
+ if (IS_ERR(soc_dev)) {
+ kfree(soc_dev_attr);
+- return;
++ return PTR_ERR(soc_dev);
+ }
++
++ return 0;
+ }
++device_initcall(mt7621_soc_dev_init);
+
+ void __init prom_soc_init(struct ralink_soc_info *soc_info)
+ {
+- void __iomem *sysc = (void __iomem *) KSEG1ADDR(MT7621_SYSC_BASE);
+- unsigned char *name = NULL;
+- u32 n0;
+- u32 n1;
+- u32 rev;
+-
+ /* Early detection of CMP support */
+ mips_cm_probe();
+ mips_cpc_probe();
+@@ -113,27 +166,23 @@ void __init prom_soc_init(struct ralink_soc_info *soc_info)
+ __sync();
+ }
+
+- n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0);
+- n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1);
+-
+- if (n0 == MT7621_CHIP_NAME0 && n1 == MT7621_CHIP_NAME1) {
+- name = "MT7621";
++ if (mt7621_soc_valid())
+ soc_info->compatible = "mediatek,mt7621-soc";
+- } else {
+- panic("mt7621: unknown SoC, n0:%08x n1:%08x\n", n0, n1);
+- }
++ else
++ panic("mt7621: unknown SoC, n0:%08x n1:%08x\n",
++ mt7621_get_soc_name0(),
++ mt7621_get_soc_name1());
+ ralink_soc = MT762X_SOC_MT7621AT;
+- rev = __raw_readl(sysc + SYSC_REG_CHIP_REV);
+
+ snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
+ "MediaTek %s ver:%u eco:%u",
+- name,
+- (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
+- (rev & CHIP_REV_ECO_MASK));
++ mt7621_get_soc_id(),
++ mt7621_get_soc_ver(),
++ mt7621_get_soc_eco());
+
+ soc_info->mem_detect = mt7621_memory_detect;
+
+- soc_dev_init(soc_info, rev);
++ soc_info_ptr = soc_info;
+
+ if (!register_cps_smp_ops())
+ return;
+diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
+index 04684990e28ef..b7f6f782d9a13 100644
+--- a/arch/mips/rb532/devices.c
++++ b/arch/mips/rb532/devices.c
+@@ -301,11 +301,9 @@ static int __init plat_setup_devices(void)
+ static int __init setup_kmac(char *s)
+ {
+ printk(KERN_INFO "korina mac = %s\n", s);
+- if (!mac_pton(s, korina_dev0_data.mac)) {
++ if (!mac_pton(s, korina_dev0_data.mac))
+ printk(KERN_ERR "Invalid mac\n");
+- return -EINVAL;
+- }
+- return 0;
++ return 1;
+ }
+
+ __setup("kmac=", setup_kmac);
+diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
+index 000ede156bdc0..5143d1cf8984c 100644
+--- a/arch/mips/sgi-ip27/ip27-xtalk.c
++++ b/arch/mips/sgi-ip27/ip27-xtalk.c
+@@ -27,15 +27,18 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
+ {
+ struct xtalk_bridge_platform_data *bd;
+ struct sgi_w1_platform_data *wd;
+- struct platform_device *pdev;
++ struct platform_device *pdev_wd;
++ struct platform_device *pdev_bd;
+ struct resource w1_res;
+ unsigned long offset;
+
+ offset = NODE_OFFSET(nasid);
+
+ wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+- if (!wd)
+- goto no_mem;
++ if (!wd) {
++ pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget);
++ return;
++ }
+
+ snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx",
+ offset + (widget << SWIN_SIZE_BITS));
+@@ -46,22 +49,35 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
+ w1_res.end = w1_res.start + 3;
+ w1_res.flags = IORESOURCE_MEM;
+
+- pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
+- if (!pdev) {
+- kfree(wd);
+- goto no_mem;
++ pdev_wd = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO);
++ if (!pdev_wd) {
++ pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget);
++ goto err_kfree_wd;
++ }
++ if (platform_device_add_resources(pdev_wd, &w1_res, 1)) {
++ pr_warn("xtalk:n%d/%x bridge failed to add platform resources.\n", nasid, widget);
++ goto err_put_pdev_wd;
++ }
++ if (platform_device_add_data(pdev_wd, wd, sizeof(*wd))) {
++ pr_warn("xtalk:n%d/%x bridge failed to add platform data.\n", nasid, widget);
++ goto err_put_pdev_wd;
++ }
++ if (platform_device_add(pdev_wd)) {
++ pr_warn("xtalk:n%d/%x bridge failed to add platform device.\n", nasid, widget);
++ goto err_put_pdev_wd;
+ }
+- platform_device_add_resources(pdev, &w1_res, 1);
+- platform_device_add_data(pdev, wd, sizeof(*wd));
+- platform_device_add(pdev);
++ /* platform_device_add_data() duplicates the data */
++ kfree(wd);
+
+ bd = kzalloc(sizeof(*bd), GFP_KERNEL);
+- if (!bd)
+- goto no_mem;
+- pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO);
+- if (!pdev) {
+- kfree(bd);
+- goto no_mem;
++ if (!bd) {
++ pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget);
++ goto err_unregister_pdev_wd;
++ }
++ pdev_bd = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO);
++ if (!pdev_bd) {
++ pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget);
++ goto err_kfree_bd;
+ }
+
+
+@@ -82,13 +98,31 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
+ bd->io.flags = IORESOURCE_IO;
+ bd->io_offset = offset;
+
+- platform_device_add_data(pdev, bd, sizeof(*bd));
+- platform_device_add(pdev);
++ if (platform_device_add_data(pdev_bd, bd, sizeof(*bd))) {
++ pr_warn("xtalk:n%d/%x bridge failed to add platform data.\n", nasid, widget);
++ goto err_put_pdev_bd;
++ }
++ if (platform_device_add(pdev_bd)) {
++ pr_warn("xtalk:n%d/%x bridge failed to add platform device.\n", nasid, widget);
++ goto err_put_pdev_bd;
++ }
++ /* platform_device_add_data() duplicates the data */
++ kfree(bd);
+ pr_info("xtalk:n%d/%x bridge widget\n", nasid, widget);
+ return;
+
+-no_mem:
+- pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget);
++err_put_pdev_bd:
++ platform_device_put(pdev_bd);
++err_kfree_bd:
++ kfree(bd);
++err_unregister_pdev_wd:
++ platform_device_unregister(pdev_wd);
++ return;
++err_put_pdev_wd:
++ platform_device_put(pdev_wd);
++err_kfree_wd:
++ kfree(wd);
++ return;
+ }
+
+ static int probe_one_port(nasid_t nasid, int widget, int masterwid)
+diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
+index 240bb68ec2478..ff3ba7e778901 100644
+--- a/arch/mips/sni/time.c
++++ b/arch/mips/sni/time.c
+@@ -18,14 +18,14 @@ static int a20r_set_periodic(struct clock_event_device *evt)
+ {
+ *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0x34;
+ wmb();
+- *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV;
++ *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV & 0xff;
+ wmb();
+ *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV >> 8;
+ wmb();
+
+ *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0xb4;
+ wmb();
+- *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV;
++ *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV & 0xff;
+ wmb();
+ *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV >> 8;
+ wmb();
+diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c
+index 7b7f25b4b057e..9240bcdbe74e4 100644
+--- a/arch/mips/vr41xx/common/icu.c
++++ b/arch/mips/vr41xx/common/icu.c
+@@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq)
+
+ printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2);
+
+- atomic_inc(&irq_err_count);
+-
+ return -1;
+ }
+
+diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
+index d4cbf069dc224..37a40981deb3b 100644
+--- a/arch/nds32/include/asm/uaccess.h
++++ b/arch/nds32/include/asm/uaccess.h
+@@ -70,9 +70,7 @@ static inline void set_fs(mm_segment_t fs)
+ * versions are void (ie, don't return a value as such).
+ */
+
+-#define get_user __get_user \
+-
+-#define __get_user(x, ptr) \
++#define get_user(x, ptr) \
+ ({ \
+ long __gu_err = 0; \
+ __get_user_check((x), (ptr), __gu_err); \
+@@ -85,6 +83,14 @@ static inline void set_fs(mm_segment_t fs)
+ (void)0; \
+ })
+
++#define __get_user(x, ptr) \
++({ \
++ long __gu_err = 0; \
++ const __typeof__(*(ptr)) __user *__p = (ptr); \
++ __get_user_err((x), __p, (__gu_err)); \
++ __gu_err; \
++})
++
+ #define __get_user_check(x, ptr, err) \
+ ({ \
+ const __typeof__(*(ptr)) __user *__p = (ptr); \
+@@ -165,12 +171,18 @@ do { \
+ : "r"(addr), "i"(-EFAULT) \
+ : "cc")
+
+-#define put_user __put_user \
++#define put_user(x, ptr) \
++({ \
++ long __pu_err = 0; \
++ __put_user_check((x), (ptr), __pu_err); \
++ __pu_err; \
++})
+
+ #define __put_user(x, ptr) \
+ ({ \
+ long __pu_err = 0; \
+- __put_user_err((x), (ptr), __pu_err); \
++ __typeof__(*(ptr)) __user *__p = (ptr); \
++ __put_user_err((x), __p, __pu_err); \
+ __pu_err; \
+ })
+
+diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
+index 9edd7ed7d7bf8..701c09a668de4 100644
+--- a/arch/nds32/kernel/fpu.c
++++ b/arch/nds32/kernel/fpu.c
+@@ -223,7 +223,7 @@ inline void handle_fpu_exception(struct pt_regs *regs)
+ }
+ } else if (fpcsr & FPCSR_mskRIT) {
+ if (!user_mode(regs))
+- do_exit(SIGILL);
++ make_task_dead(SIGILL);
+ si_signo = SIGILL;
+ }
+
+diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
+index 0ce6f9f307e6a..f387919607813 100644
+--- a/arch/nds32/kernel/perf_event_cpu.c
++++ b/arch/nds32/kernel/perf_event_cpu.c
+@@ -1363,6 +1363,7 @@ void
+ perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ unsigned long fp = 0;
+ unsigned long gp = 0;
+ unsigned long lp = 0;
+@@ -1371,7 +1372,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+
+ leaf_fp = 0;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+@@ -1479,9 +1480,10 @@ void
+ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ struct stackframe fr;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+@@ -1493,20 +1495,23 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+
+ unsigned long perf_instruction_pointer(struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
++
+ /* However, NDS32 does not support virtualization */
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+- return perf_guest_cbs->get_guest_ip();
++ if (guest_cbs && guest_cbs->is_in_guest())
++ return guest_cbs->get_guest_ip();
+
+ return instruction_pointer(regs);
+ }
+
+ unsigned long perf_misc_flags(struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ int misc = 0;
+
+ /* However, NDS32 does not support virtualization */
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+- if (perf_guest_cbs->is_user_mode())
++ if (guest_cbs && guest_cbs->is_in_guest()) {
++ if (guest_cbs->is_user_mode())
+ misc |= PERF_RECORD_MISC_GUEST_USER;
+ else
+ misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
+index f06421c645aff..b90030e8e546f 100644
+--- a/arch/nds32/kernel/traps.c
++++ b/arch/nds32/kernel/traps.c
+@@ -141,7 +141,7 @@ void die(const char *str, struct pt_regs *regs, int err)
+
+ bust_spinlocks(0);
+ spin_unlock_irq(&die_lock);
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ EXPORT_SYMBOL(die);
+@@ -240,7 +240,7 @@ void unhandled_interruption(struct pt_regs *regs)
+ pr_emerg("unhandled_interruption\n");
+ show_regs(regs);
+ if (!user_mode(regs))
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ force_sig(SIGKILL);
+ }
+
+@@ -251,7 +251,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr,
+ addr, type);
+ show_regs(regs);
+ if (!user_mode(regs))
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ force_sig(SIGKILL);
+ }
+
+@@ -278,7 +278,7 @@ void do_revinsn(struct pt_regs *regs)
+ pr_emerg("Reserved Instruction\n");
+ show_regs(regs);
+ if (!user_mode(regs))
+- do_exit(SIGILL);
++ make_task_dead(SIGILL);
+ force_sig(SIGILL);
+ }
+
+diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile
+index 37dfc7e584bce..0b704c1f379f5 100644
+--- a/arch/nios2/boot/Makefile
++++ b/arch/nios2/boot/Makefile
+@@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
+ $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
+
+-$(obj)/vmImage: $(obj)/vmlinux.gz
++$(obj)/vmImage: $(obj)/vmlinux.gz FORCE
+ $(call if_changed,uimage)
+ @$(kecho) 'Kernel: $@ is ready'
+
+diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts
+index 56339bef3247d..0e7e5b0dd685c 100644
+--- a/arch/nios2/boot/dts/10m50_devboard.dts
++++ b/arch/nios2/boot/dts/10m50_devboard.dts
+@@ -97,7 +97,7 @@
+ rx-fifo-depth = <8192>;
+ tx-fifo-depth = <8192>;
+ address-bits = <48>;
+- max-frame-size = <1518>;
++ max-frame-size = <1500>;
+ local-mac-address = [00 00 00 00 00 00];
+ altr,has-supplementary-unicast;
+ altr,enable-sup-addr = <1>;
+diff --git a/arch/nios2/boot/dts/3c120_devboard.dts b/arch/nios2/boot/dts/3c120_devboard.dts
+index d10fb81686c7e..3ee3169063797 100644
+--- a/arch/nios2/boot/dts/3c120_devboard.dts
++++ b/arch/nios2/boot/dts/3c120_devboard.dts
+@@ -106,7 +106,7 @@
+ interrupt-names = "rx_irq", "tx_irq";
+ rx-fifo-depth = <8192>;
+ tx-fifo-depth = <8192>;
+- max-frame-size = <1518>;
++ max-frame-size = <1500>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ phy-mode = "rgmii-id";
+ phy-handle = <&phy0>;
+diff --git a/arch/nios2/include/asm/entry.h b/arch/nios2/include/asm/entry.h
+index cf37f55efbc22..bafb7b2ca59fc 100644
+--- a/arch/nios2/include/asm/entry.h
++++ b/arch/nios2/include/asm/entry.h
+@@ -50,7 +50,8 @@
+ stw r13, PT_R13(sp)
+ stw r14, PT_R14(sp)
+ stw r15, PT_R15(sp)
+- stw r2, PT_ORIG_R2(sp)
++ movi r24, -1
++ stw r24, PT_ORIG_R2(sp)
+ stw r7, PT_ORIG_R7(sp)
+
+ stw ra, PT_RA(sp)
+diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h
+index 6424621448728..9da34c3022a27 100644
+--- a/arch/nios2/include/asm/ptrace.h
++++ b/arch/nios2/include/asm/ptrace.h
+@@ -74,6 +74,8 @@ extern void show_regs(struct pt_regs *);
+ ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE)\
+ - 1)
+
++#define force_successful_syscall_return() (current_pt_regs()->orig_r2 = -1)
++
+ int do_syscall_trace_enter(void);
+ void do_syscall_trace_exit(void);
+ #endif /* __ASSEMBLY__ */
+diff --git a/arch/nios2/include/asm/timex.h b/arch/nios2/include/asm/timex.h
+index a769f871b28d9..40a1adc9bd03e 100644
+--- a/arch/nios2/include/asm/timex.h
++++ b/arch/nios2/include/asm/timex.h
+@@ -8,5 +8,8 @@
+ typedef unsigned long cycles_t;
+
+ extern cycles_t get_cycles(void);
++#define get_cycles get_cycles
++
++#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
+
+ #endif
+diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
+index ba9340e96fd4c..ca9285a915efa 100644
+--- a/arch/nios2/include/asm/uaccess.h
++++ b/arch/nios2/include/asm/uaccess.h
+@@ -88,6 +88,7 @@ extern __must_check long strnlen_user(const char __user *s, long n);
+ /* Optimized macros */
+ #define __get_user_asm(val, insn, addr, err) \
+ { \
++ unsigned long __gu_val; \
+ __asm__ __volatile__( \
+ " movi %0, %3\n" \
+ "1: " insn " %1, 0(%2)\n" \
+@@ -96,14 +97,20 @@ extern __must_check long strnlen_user(const char __user *s, long n);
+ " .section __ex_table,\"a\"\n" \
+ " .word 1b, 2b\n" \
+ " .previous" \
+- : "=&r" (err), "=r" (val) \
++ : "=&r" (err), "=r" (__gu_val) \
+ : "r" (addr), "i" (-EFAULT)); \
++ val = (__force __typeof__(*(addr)))__gu_val; \
+ }
+
+-#define __get_user_unknown(val, size, ptr, err) do { \
++extern void __get_user_unknown(void);
++
++#define __get_user_8(val, ptr, err) do { \
++ u64 __val = 0; \
+ err = 0; \
+- if (__copy_from_user(&(val), ptr, size)) { \
++ if (raw_copy_from_user(&(__val), ptr, sizeof(val))) { \
+ err = -EFAULT; \
++ } else { \
++ val = (typeof(val))(typeof((val) - (val)))__val; \
+ } \
+ } while (0)
+
+@@ -119,8 +126,11 @@ do { \
+ case 4: \
+ __get_user_asm(val, "ldw", ptr, err); \
+ break; \
++ case 8: \
++ __get_user_8(val, ptr, err); \
++ break; \
+ default: \
+- __get_user_unknown(val, size, ptr, err); \
++ __get_user_unknown(); \
+ break; \
+ } \
+ } while (0)
+@@ -129,9 +139,7 @@ do { \
+ ({ \
+ long __gu_err = -EFAULT; \
+ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
+- unsigned long __gu_val = 0; \
+- __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
+- (x) = (__force __typeof__(x))__gu_val; \
++ __get_user_common(x, sizeof(*(ptr)), __gu_ptr, __gu_err); \
+ __gu_err; \
+ })
+
+@@ -139,11 +147,9 @@ do { \
+ ({ \
+ long __gu_err = -EFAULT; \
+ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
+- unsigned long __gu_val = 0; \
+ if (access_ok( __gu_ptr, sizeof(*__gu_ptr))) \
+- __get_user_common(__gu_val, sizeof(*__gu_ptr), \
++ __get_user_common(x, sizeof(*__gu_ptr), \
+ __gu_ptr, __gu_err); \
+- (x) = (__force __typeof__(x))__gu_val; \
+ __gu_err; \
+ })
+
+diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S
+index 0794cd7803dfe..99f0a65e62347 100644
+--- a/arch/nios2/kernel/entry.S
++++ b/arch/nios2/kernel/entry.S
+@@ -185,6 +185,7 @@ ENTRY(handle_system_call)
+ ldw r5, PT_R5(sp)
+
+ local_restart:
++ stw r2, PT_ORIG_R2(sp)
+ /* Check that the requested system call is within limits */
+ movui r1, __NR_syscalls
+ bgeu r2, r1, ret_invsyscall
+@@ -192,7 +193,6 @@ local_restart:
+ movhi r11, %hiadj(sys_call_table)
+ add r1, r1, r11
+ ldw r1, %lo(sys_call_table)(r1)
+- beq r1, r0, ret_invsyscall
+
+ /* Check if we are being traced */
+ GET_THREAD_INFO r11
+@@ -213,6 +213,9 @@ local_restart:
+ translate_rc_and_ret:
+ movi r1, 0
+ bge r2, zero, 3f
++ ldw r1, PT_ORIG_R2(sp)
++ addi r1, r1, 1
++ beq r1, zero, 3f
+ sub r2, zero, r2
+ movi r1, 1
+ 3:
+@@ -255,9 +258,9 @@ traced_system_call:
+ ldw r6, PT_R6(sp)
+ ldw r7, PT_R7(sp)
+
+- /* Fetch the syscall function, we don't need to check the boundaries
+- * since this is already done.
+- */
++ /* Fetch the syscall function. */
++ movui r1, __NR_syscalls
++ bgeu r2, r1, traced_invsyscall
+ slli r1, r2, 2
+ movhi r11,%hiadj(sys_call_table)
+ add r1, r1, r11
+@@ -276,6 +279,9 @@ traced_system_call:
+ translate_rc_and_ret2:
+ movi r1, 0
+ bge r2, zero, 4f
++ ldw r1, PT_ORIG_R2(sp)
++ addi r1, r1, 1
++ beq r1, zero, 4f
+ sub r2, zero, r2
+ movi r1, 1
+ 4:
+@@ -287,6 +293,11 @@ end_translate_rc_and_ret2:
+ RESTORE_SWITCH_STACK
+ br ret_from_exception
+
++ /* If the syscall number was invalid return ENOSYS */
++traced_invsyscall:
++ movi r2, -ENOSYS
++ br translate_rc_and_ret2
++
+ Luser_return:
+ GET_THREAD_INFO r11 /* get thread_info pointer */
+ ldw r10, TI_FLAGS(r11) /* get thread_info->flags */
+@@ -336,9 +347,6 @@ external_interrupt:
+ /* skip if no interrupt is pending */
+ beq r12, r0, ret_from_interrupt
+
+- movi r24, -1
+- stw r24, PT_ORIG_R2(sp)
+-
+ /*
+ * Process an external hardware interrupt.
+ */
+diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c
+index 2009ae2d3c3bb..68d626c4f1ba7 100644
+--- a/arch/nios2/kernel/signal.c
++++ b/arch/nios2/kernel/signal.c
+@@ -36,10 +36,10 @@ struct rt_sigframe {
+
+ static inline int rt_restore_ucontext(struct pt_regs *regs,
+ struct switch_stack *sw,
+- struct ucontext *uc, int *pr2)
++ struct ucontext __user *uc, int *pr2)
+ {
+ int temp;
+- unsigned long *gregs = uc->uc_mcontext.gregs;
++ unsigned long __user *gregs = uc->uc_mcontext.gregs;
+ int err;
+
+ /* Always make any pending restarted system calls return -EINTR */
+@@ -102,10 +102,11 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw)
+ {
+ struct pt_regs *regs = (struct pt_regs *)(sw + 1);
+ /* Verify, can we follow the stack back */
+- struct rt_sigframe *frame = (struct rt_sigframe *) regs->sp;
++ struct rt_sigframe __user *frame;
+ sigset_t set;
+ int rval;
+
++ frame = (struct rt_sigframe __user *) regs->sp;
+ if (!access_ok(frame, sizeof(*frame)))
+ goto badframe;
+
+@@ -124,10 +125,10 @@ badframe:
+ return 0;
+ }
+
+-static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs)
++static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs)
+ {
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+- unsigned long *gregs = uc->uc_mcontext.gregs;
++ unsigned long __user *gregs = uc->uc_mcontext.gregs;
+ int err = 0;
+
+ err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version);
+@@ -162,8 +163,9 @@ static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs)
+ return err;
+ }
+
+-static inline void *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
+- size_t frame_size)
++static inline void __user *get_sigframe(struct ksignal *ksig,
++ struct pt_regs *regs,
++ size_t frame_size)
+ {
+ unsigned long usp;
+
+@@ -174,13 +176,13 @@ static inline void *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
+ usp = sigsp(usp, ksig);
+
+ /* Verify, is it 32 or 64 bit aligned */
+- return (void *)((usp - frame_size) & -8UL);
++ return (void __user *)((usp - frame_size) & -8UL);
+ }
+
+ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+ {
+- struct rt_sigframe *frame;
++ struct rt_sigframe __user *frame;
+ int err = 0;
+
+ frame = get_sigframe(ksig, regs, sizeof(*frame));
+@@ -240,7 +242,7 @@ static int do_signal(struct pt_regs *regs)
+ /*
+ * If we were from a system call, check for system call restarting...
+ */
+- if (regs->orig_r2 >= 0) {
++ if (regs->orig_r2 >= 0 && regs->r1) {
+ continue_addr = regs->ea;
+ restart_addr = continue_addr - 4;
+ retval = regs->r2;
+@@ -262,6 +264,7 @@ static int do_signal(struct pt_regs *regs)
+ regs->ea = restart_addr;
+ break;
+ }
++ regs->orig_r2 = -1;
+ }
+
+ if (get_signal(&ksig)) {
+diff --git a/arch/nios2/kernel/syscall_table.c b/arch/nios2/kernel/syscall_table.c
+index 6176d63023c1d..c2875a6dd5a4a 100644
+--- a/arch/nios2/kernel/syscall_table.c
++++ b/arch/nios2/kernel/syscall_table.c
+@@ -13,5 +13,6 @@
+ #define __SYSCALL(nr, call) [nr] = (call),
+
+ void *sys_call_table[__NR_syscalls] = {
++ [0 ... __NR_syscalls-1] = sys_ni_syscall,
+ #include <asm/unistd.h>
+ };
+diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
+index 596986a74a26d..85ac49d64cf73 100644
+--- a/arch/nios2/kernel/traps.c
++++ b/arch/nios2/kernel/traps.c
+@@ -37,10 +37,10 @@ void die(const char *str, struct pt_regs *regs, long err)
+ show_regs(regs);
+ spin_unlock_irq(&die_lock);
+ /*
+- * do_exit() should take care of panic'ing from an interrupt
++ * make_task_dead() should take care of panic'ing from an interrupt
+ * context so we don't handle it here
+ */
+- do_exit(err);
++ make_task_dead(err);
+ }
+
+ void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr)
+diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h
+index c298061c70a7e..8aa3e78181e9a 100644
+--- a/arch/openrisc/include/asm/io.h
++++ b/arch/openrisc/include/asm/io.h
+@@ -31,7 +31,7 @@
+ void __iomem *ioremap(phys_addr_t offset, unsigned long size);
+
+ #define iounmap iounmap
+-extern void iounmap(void __iomem *addr);
++extern void iounmap(volatile void __iomem *addr);
+
+ #include <asm-generic/io.h>
+
+diff --git a/arch/openrisc/include/asm/syscalls.h b/arch/openrisc/include/asm/syscalls.h
+index 3a7eeae6f56a8..aa1c7e98722e3 100644
+--- a/arch/openrisc/include/asm/syscalls.h
++++ b/arch/openrisc/include/asm/syscalls.h
+@@ -22,9 +22,11 @@ asmlinkage long sys_or1k_atomic(unsigned long type, unsigned long *v1,
+
+ asmlinkage long __sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid, int tls);
++asmlinkage long __sys_clone3(struct clone_args __user *uargs, size_t size);
+ asmlinkage long __sys_fork(void);
+
+ #define sys_clone __sys_clone
++#define sys_clone3 __sys_clone3
+ #define sys_fork __sys_fork
+
+ #endif /* __ASM_OPENRISC_SYSCALLS_H */
+diff --git a/arch/openrisc/include/asm/timex.h b/arch/openrisc/include/asm/timex.h
+index d52b4e536e3f9..5487fa93dd9be 100644
+--- a/arch/openrisc/include/asm/timex.h
++++ b/arch/openrisc/include/asm/timex.h
+@@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void)
+ {
+ return mfspr(SPR_TTCR);
+ }
++#define get_cycles get_cycles
+
+ /* This isn't really used any more */
+ #define CLOCK_TICK_RATE 1000
+diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
+index 1b16d97e7da7f..a82b2caaa560d 100644
+--- a/arch/openrisc/kernel/dma.c
++++ b/arch/openrisc/kernel/dma.c
+@@ -33,7 +33,7 @@ page_set_nocache(pte_t *pte, unsigned long addr,
+ * Flush the page out of the TLB so that the new page flags get
+ * picked up next time there's an access
+ */
+- flush_tlb_page(NULL, addr);
++ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ /* Flush page out of dcache */
+ for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
+@@ -56,7 +56,7 @@ page_clear_nocache(pte_t *pte, unsigned long addr,
+ * Flush the page out of the TLB so that the new page flags get
+ * picked up next time there's an access
+ */
+- flush_tlb_page(NULL, addr);
++ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ return 0;
+ }
+diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
+index edaa775a648e6..d32906e89aafd 100644
+--- a/arch/openrisc/kernel/entry.S
++++ b/arch/openrisc/kernel/entry.S
+@@ -173,7 +173,6 @@ handler: ;\
+ l.sw PT_GPR28(r1),r28 ;\
+ l.sw PT_GPR29(r1),r29 ;\
+ /* r30 already save */ ;\
+-/* l.sw PT_GPR30(r1),r30*/ ;\
+ l.sw PT_GPR31(r1),r31 ;\
+ TRACE_IRQS_OFF_ENTRY ;\
+ /* Store -1 in orig_gpr11 for non-syscall exceptions */ ;\
+@@ -211,9 +210,8 @@ handler: ;\
+ l.sw PT_GPR27(r1),r27 ;\
+ l.sw PT_GPR28(r1),r28 ;\
+ l.sw PT_GPR29(r1),r29 ;\
+- /* r31 already saved */ ;\
+- l.sw PT_GPR30(r1),r30 ;\
+-/* l.sw PT_GPR31(r1),r31 */ ;\
++ /* r30 already saved */ ;\
++ l.sw PT_GPR31(r1),r31 ;\
+ /* Store -1 in orig_gpr11 for non-syscall exceptions */ ;\
+ l.addi r30,r0,-1 ;\
+ l.sw PT_ORIG_GPR11(r1),r30 ;\
+@@ -1170,6 +1168,11 @@ ENTRY(__sys_clone)
+ l.j _fork_save_extra_regs_and_call
+ l.nop
+
++ENTRY(__sys_clone3)
++ l.movhi r29,hi(sys_clone3)
++ l.j _fork_save_extra_regs_and_call
++ l.ori r29,r29,lo(sys_clone3)
++
+ ENTRY(__sys_fork)
+ l.movhi r29,hi(sys_fork)
+ l.ori r29,r29,lo(sys_fork)
+diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
+index 15f1b38dfe03b..871f4c8588595 100644
+--- a/arch/openrisc/kernel/head.S
++++ b/arch/openrisc/kernel/head.S
+@@ -521,6 +521,15 @@ _start:
+ l.ori r3,r0,0x1
+ l.mtspr r0,r3,SPR_SR
+
++ /*
++ * Start the TTCR as early as possible, so that the RNG can make use of
++ * measurements of boot time from the earliest opportunity. Especially
++ * important is that the TTCR does not return zero by the time we reach
++ * rand_initialize().
++ */
++ l.movhi r3,hi(SPR_TTMR_CR)
++ l.mtspr r0,r3,SPR_TTMR
++
+ CLEAR_GPR(r1)
+ CLEAR_GPR(r2)
+ CLEAR_GPR(r3)
+diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
+index 415e209732a3d..ba78766cf00b5 100644
+--- a/arch/openrisc/kernel/smp.c
++++ b/arch/openrisc/kernel/smp.c
+@@ -272,7 +272,7 @@ static inline void ipi_flush_tlb_range(void *info)
+ local_flush_tlb_range(NULL, fd->addr1, fd->addr2);
+ }
+
+-static void smp_flush_tlb_range(struct cpumask *cmask, unsigned long start,
++static void smp_flush_tlb_range(const struct cpumask *cmask, unsigned long start,
+ unsigned long end)
+ {
+ unsigned int cpuid;
+@@ -320,7 +320,9 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+ void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+ {
+- smp_flush_tlb_range(mm_cpumask(vma->vm_mm), start, end);
++ const struct cpumask *cmask = vma ? mm_cpumask(vma->vm_mm)
++ : cpu_online_mask;
++ smp_flush_tlb_range(cmask, start, end);
+ }
+
+ /* Instruction cache invalidate - performed on each cpu */
+diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
+index aa1e709405acd..9df1d85bfe1d1 100644
+--- a/arch/openrisc/kernel/traps.c
++++ b/arch/openrisc/kernel/traps.c
+@@ -212,7 +212,7 @@ void die(const char *str, struct pt_regs *regs, long err)
+ __asm__ __volatile__("l.nop 1");
+ do {} while (1);
+ #endif
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ /* This is normally the 'Oops' routine */
+diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
+index daae13a76743b..8ec0dafecf257 100644
+--- a/arch/openrisc/mm/ioremap.c
++++ b/arch/openrisc/mm/ioremap.c
+@@ -77,7 +77,7 @@ void __iomem *__ref ioremap(phys_addr_t addr, unsigned long size)
+ }
+ EXPORT_SYMBOL(ioremap);
+
+-void iounmap(void __iomem *addr)
++void iounmap(volatile void __iomem *addr)
+ {
+ /* If the page is from the fixmap pool then we just clear out
+ * the fixmap mapping.
+diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
+index 27a8b49af11fc..117b0f882750a 100644
+--- a/arch/parisc/Kconfig
++++ b/arch/parisc/Kconfig
+@@ -9,6 +9,7 @@ config PARISC
+ select ARCH_WANT_FRAME_POINTERS
+ select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_STRICT_KERNEL_RWX
++ select ARCH_HAS_STRICT_MODULE_RWX
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_NO_SG_CHAIN
+ select ARCH_SUPPORTS_HUGETLBFS if PA20
+@@ -141,10 +142,10 @@ menu "Processor type and features"
+
+ choice
+ prompt "Processor type"
+- default PA7000
++ default PA7000 if "$(ARCH)" = "parisc"
+
+ config PA7000
+- bool "PA7000/PA7100"
++ bool "PA7000/PA7100" if "$(ARCH)" = "parisc"
+ help
+ This is the processor type of your CPU. This information is
+ used for optimizing purposes. In order to compile a kernel
+@@ -155,21 +156,21 @@ config PA7000
+ which is required on some machines.
+
+ config PA7100LC
+- bool "PA7100LC"
++ bool "PA7100LC" if "$(ARCH)" = "parisc"
+ help
+ Select this option for the PCX-L processor, as used in the
+ 712, 715/64, 715/80, 715/100, 715/100XC, 725/100, 743, 748,
+ D200, D210, D300, D310 and E-class
+
+ config PA7200
+- bool "PA7200"
++ bool "PA7200" if "$(ARCH)" = "parisc"
+ help
+ Select this option for the PCX-T' processor, as used in the
+ C100, C110, J100, J110, J210XC, D250, D260, D350, D360,
+ K100, K200, K210, K220, K400, K410 and K420
+
+ config PA7300LC
+- bool "PA7300LC"
++ bool "PA7300LC" if "$(ARCH)" = "parisc"
+ help
+ Select this option for the PCX-L2 processor, as used in the
+ 744, A180, B132L, B160L, B180L, C132L, C160L, C180L,
+@@ -219,7 +220,8 @@ config MLONGCALLS
+ Enabling this option will probably slow down your kernel.
+
+ config 64BIT
+- bool "64-bit kernel"
++ def_bool y if "$(ARCH)" = "parisc64"
++ bool "64-bit kernel" if "$(ARCH)" = "parisc"
+ depends on PA8X00
+ help
+ Enable this if you want to support 64bit kernel on PA-RISC platform.
+diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
+index fcde3ffa02213..fadb098de1545 100644
+--- a/arch/parisc/Makefile
++++ b/arch/parisc/Makefile
+@@ -17,7 +17,12 @@
+ # Mike Shaver, Helge Deller and Martin K. Petersen
+ #
+
++ifdef CONFIG_PARISC_SELF_EXTRACT
++boot := arch/parisc/boot
++KBUILD_IMAGE := $(boot)/bzImage
++else
+ KBUILD_IMAGE := vmlinuz
++endif
+
+ NM = sh $(srctree)/arch/parisc/nm
+ CHECKFLAGS += -D__hppa__=1
+diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
+index a39250cb7dfcf..fd8c1ebd27470 100644
+--- a/arch/parisc/include/asm/assembly.h
++++ b/arch/parisc/include/asm/assembly.h
+@@ -72,10 +72,6 @@
+
+ #include <asm/asmregs.h>
+
+- sp = 30
+- gp = 27
+- ipsw = 22
+-
+ /*
+ * We provide two versions of each macro to convert from physical
+ * to virtual and vice versa. The "_r1" versions take one argument
+diff --git a/arch/parisc/include/asm/bugs.h b/arch/parisc/include/asm/bugs.h
+deleted file mode 100644
+index 0a7f9db6bd1c7..0000000000000
+--- a/arch/parisc/include/asm/bugs.h
++++ /dev/null
+@@ -1,20 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * include/asm-parisc/bugs.h
+- *
+- * Copyright (C) 1999 Mike Shaver
+- */
+-
+-/*
+- * This is included by init/main.c to check for architecture-dependent bugs.
+- *
+- * Needs:
+- * void check_bugs(void);
+- */
+-
+-#include <asm/processor.h>
+-
+-static inline void check_bugs(void)
+-{
+-// identify_cpu(&boot_cpu_data);
+-}
+diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
+index eef0096db5f88..2f4c45f60ae1e 100644
+--- a/arch/parisc/include/asm/cacheflush.h
++++ b/arch/parisc/include/asm/cacheflush.h
+@@ -53,6 +53,11 @@ extern void flush_dcache_page(struct page *page);
+
+ #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
+ #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
++#define flush_dcache_mmap_lock_irqsave(mapping, flags) \
++ xa_lock_irqsave(&mapping->i_pages, flags)
++#define flush_dcache_mmap_unlock_irqrestore(mapping, flags) \
++ xa_unlock_irqrestore(&mapping->i_pages, flags)
++
+
+ #define flush_icache_page(vma,page) do { \
+ flush_kernel_dcache_page_addr(page_address(page)); \
+diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h
+index c4cd6360f9964..55d29c4f716e6 100644
+--- a/arch/parisc/include/asm/fb.h
++++ b/arch/parisc/include/asm/fb.h
+@@ -12,9 +12,13 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ }
+
++#if defined(CONFIG_FB_STI)
++int fb_is_primary_device(struct fb_info *info);
++#else
+ static inline int fb_is_primary_device(struct fb_info *info)
+ {
+ return 0;
+ }
++#endif
+
+ #endif /* _ASM_FB_H_ */
+diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
+index fceb9cf02fb3a..71aa0921d6c72 100644
+--- a/arch/parisc/include/asm/futex.h
++++ b/arch/parisc/include/asm/futex.h
+@@ -16,7 +16,7 @@ static inline void
+ _futex_spin_lock_irqsave(u32 __user *uaddr, unsigned long int *flags)
+ {
+ extern u32 lws_lock_start[];
+- long index = ((long)uaddr & 0x3f8) >> 1;
++ long index = ((long)uaddr & 0x7f8) >> 1;
+ arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
+ local_irq_save(*flags);
+ arch_spin_lock(s);
+@@ -26,7 +26,7 @@ static inline void
+ _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
+ {
+ extern u32 lws_lock_start[];
+- long index = ((long)uaddr & 0x3f8) >> 1;
++ long index = ((long)uaddr & 0x7f8) >> 1;
+ arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
+ arch_spin_unlock(s);
+ local_irq_restore(*flags);
+diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h
+index 9d3d7737c58b1..a005ebc547793 100644
+--- a/arch/parisc/include/asm/hardware.h
++++ b/arch/parisc/include/asm/hardware.h
+@@ -10,12 +10,12 @@
+ #define SVERSION_ANY_ID PA_SVERSION_ANY_ID
+
+ struct hp_hardware {
+- unsigned short hw_type:5; /* HPHW_xxx */
+- unsigned short hversion;
+- unsigned long sversion:28;
+- unsigned short opt;
+- const char name[80]; /* The hardware description */
+-};
++ unsigned int hw_type:8; /* HPHW_xxx */
++ unsigned int hversion:12;
++ unsigned int sversion:12;
++ unsigned char opt;
++ unsigned char name[59]; /* The hardware description */
++} __packed;
+
+ struct parisc_device;
+
+diff --git a/arch/parisc/include/asm/led.h b/arch/parisc/include/asm/led.h
+index 6de13d08a3886..b70b9094fb7cd 100644
+--- a/arch/parisc/include/asm/led.h
++++ b/arch/parisc/include/asm/led.h
+@@ -11,8 +11,8 @@
+ #define LED1 0x02
+ #define LED0 0x01 /* bottom (or furthest left) LED */
+
+-#define LED_LAN_TX LED0 /* for LAN transmit activity */
+-#define LED_LAN_RCV LED1 /* for LAN receive activity */
++#define LED_LAN_RCV LED0 /* for LAN receive activity */
++#define LED_LAN_TX LED1 /* for LAN transmit activity */
+ #define LED_DISK_IO LED2 /* for disk activity */
+ #define LED_HEARTBEAT LED3 /* heartbeat */
+
+diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
+index b388d81765883..2f48e0a80d9c6 100644
+--- a/arch/parisc/include/asm/pdc.h
++++ b/arch/parisc/include/asm/pdc.h
+@@ -81,6 +81,7 @@ int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
+ int pdc_do_reset(void);
+ int pdc_soft_power_info(unsigned long *power_reg);
+ int pdc_soft_power_button(int sw_control);
++int pdc_soft_power_button_panic(int sw_control);
+ void pdc_io_reset(void);
+ void pdc_io_reset_devices(void);
+ int pdc_iodc_getc(void);
+diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
+index 7badd872f05ac..3e7cf882639fb 100644
+--- a/arch/parisc/include/asm/pgtable.h
++++ b/arch/parisc/include/asm/pgtable.h
+@@ -76,6 +76,8 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+ purge_tlb_end(flags);
+ }
+
++extern void __update_cache(pte_t pte);
++
+ /* Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+@@ -83,11 +85,14 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+ #define set_pte(pteptr, pteval) \
+ do { \
+ *(pteptr) = (pteval); \
+- barrier(); \
++ mb(); \
+ } while(0)
+
+ #define set_pte_at(mm, addr, pteptr, pteval) \
+ do { \
++ if (pte_present(pteval) && \
++ pte_user(pteval)) \
++ __update_cache(pteval); \
+ *(pteptr) = (pteval); \
+ purge_tlb_entries(mm, addr); \
+ } while (0)
+@@ -303,6 +308,7 @@ extern unsigned long *empty_zero_page;
+
+ #define pte_none(x) (pte_val(x) == 0)
+ #define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
++#define pte_user(x) (pte_val(x) & _PAGE_USER)
+ #define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0))
+
+ #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK)
+@@ -410,7 +416,7 @@ extern void paging_init (void);
+
+ #define PG_dcache_dirty PG_arch_1
+
+-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
++#define update_mmu_cache(vms,addr,ptep) __update_cache(*ptep)
+
+ /* Encode and de-code a swap entry */
+
+diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
+index 4b9e3d707571b..2b3010ade00e7 100644
+--- a/arch/parisc/include/asm/rt_sigframe.h
++++ b/arch/parisc/include/asm/rt_sigframe.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_PARISC_RT_SIGFRAME_H
+ #define _ASM_PARISC_RT_SIGFRAME_H
+
+-#define SIGRETURN_TRAMP 3
++#define SIGRETURN_TRAMP 4
+ #define SIGRESTARTBLOCK_TRAMP 5
+ #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
+
+diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
+index a303ae9a77f41..16ee41e77174f 100644
+--- a/arch/parisc/include/asm/special_insns.h
++++ b/arch/parisc/include/asm/special_insns.h
+@@ -2,28 +2,32 @@
+ #ifndef __PARISC_SPECIAL_INSNS_H
+ #define __PARISC_SPECIAL_INSNS_H
+
+-#define lpa(va) ({ \
+- unsigned long pa; \
+- __asm__ __volatile__( \
+- "copy %%r0,%0\n\t" \
+- "lpa %%r0(%1),%0" \
+- : "=r" (pa) \
+- : "r" (va) \
+- : "memory" \
+- ); \
+- pa; \
++#define lpa(va) ({ \
++ unsigned long pa; \
++ __asm__ __volatile__( \
++ "copy %%r0,%0\n" \
++ "8:\tlpa %%r0(%1),%0\n" \
++ "9:\n" \
++ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
++ : "=&r" (pa) \
++ : "r" (va) \
++ : "memory" \
++ ); \
++ pa; \
+ })
+
+-#define lpa_user(va) ({ \
+- unsigned long pa; \
+- __asm__ __volatile__( \
+- "copy %%r0,%0\n\t" \
+- "lpa %%r0(%%sr3,%1),%0" \
+- : "=r" (pa) \
+- : "r" (va) \
+- : "memory" \
+- ); \
+- pa; \
++#define lpa_user(va) ({ \
++ unsigned long pa; \
++ __asm__ __volatile__( \
++ "copy %%r0,%0\n" \
++ "8:\tlpa %%r0(%%sr3,%1),%0\n" \
++ "9:\n" \
++ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \
++ : "=&r" (pa) \
++ : "r" (va) \
++ : "memory" \
++ ); \
++ pa; \
+ })
+
+ #define mfctl(reg) ({ \
+diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h
+index 06b510f8172e3..b4622cb06a75e 100644
+--- a/arch/parisc/include/asm/timex.h
++++ b/arch/parisc/include/asm/timex.h
+@@ -13,9 +13,10 @@
+
+ typedef unsigned long cycles_t;
+
+-static inline cycles_t get_cycles (void)
++static inline cycles_t get_cycles(void)
+ {
+ return mfctl(16);
+ }
++#define get_cycles get_cycles
+
+ #endif
+diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h
+index 8ecc1f0c0483d..d0e090a2c000d 100644
+--- a/arch/parisc/include/asm/traps.h
++++ b/arch/parisc/include/asm/traps.h
+@@ -17,6 +17,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err);
+ const char *trap_name(unsigned long code);
+ void do_page_fault(struct pt_regs *regs, unsigned long code,
+ unsigned long address);
++int handle_nadtlb_fault(struct pt_regs *regs);
+ #endif
+
+ #endif
+diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
+index 9e3c010c0f61e..5f1f3eea5aa5f 100644
+--- a/arch/parisc/include/uapi/asm/mman.h
++++ b/arch/parisc/include/uapi/asm/mman.h
+@@ -49,31 +49,30 @@
+ #define MADV_DONTFORK 10 /* don't inherit across fork */
+ #define MADV_DOFORK 11 /* do inherit across fork */
+
+-#define MADV_COLD 20 /* deactivate these pages */
+-#define MADV_PAGEOUT 21 /* reclaim these pages */
++#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
++#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
+
+-#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
+-#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
++#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
++#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
+
+-#define MADV_MERGEABLE 65 /* KSM may merge identical pages */
+-#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
++#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
++ overrides the coredump filter bits */
++#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
+
+-#define MADV_HUGEPAGE 67 /* Worth backing with hugepages */
+-#define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */
++#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
++#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+
+-#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump,
+- overrides the coredump filter bits */
+-#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */
++#define MADV_COLD 20 /* deactivate these pages */
++#define MADV_PAGEOUT 21 /* reclaim these pages */
+
+-#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */
+-#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */
++#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
++#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
+
+ #define MADV_HWPOISON 100 /* poison a page for testing */
+ #define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
+
+ /* compatibility flags */
+ #define MAP_FILE 0
+-#define MAP_VARIABLE 0
+
+ #define PKEY_DISABLE_ACCESS 0x1
+ #define PKEY_DISABLE_WRITE 0x2
+diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh
+index 056d588befdd6..70d3cffb02515 100644
+--- a/arch/parisc/install.sh
++++ b/arch/parisc/install.sh
+@@ -39,6 +39,7 @@ verify "$3"
+ if [ -n "${INSTALLKERNEL}" ]; then
+ if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+ if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
++ if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi
+ fi
+
+ # Default install
+diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
+index 39e02227e2310..c473c2f395a0a 100644
+--- a/arch/parisc/kernel/cache.c
++++ b/arch/parisc/kernel/cache.c
+@@ -46,9 +46,6 @@ void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
+ */
+ DEFINE_SPINLOCK(pa_tlb_flush_lock);
+
+-/* Swapper page setup lock. */
+-DEFINE_SPINLOCK(pa_swapper_pg_lock);
+-
+ #if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+ int pa_serialize_tlb_flushes __ro_after_init;
+ #endif
+@@ -83,9 +80,9 @@ EXPORT_SYMBOL(flush_cache_all_local);
+ #define pfn_va(pfn) __va(PFN_PHYS(pfn))
+
+ void
+-update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
++__update_cache(pte_t pte)
+ {
+- unsigned long pfn = pte_pfn(*ptep);
++ unsigned long pfn = pte_pfn(pte);
+ struct page *page;
+
+ /* We don't have pte special. As a result, we can be called with
+@@ -327,6 +324,7 @@ void flush_dcache_page(struct page *page)
+ struct vm_area_struct *mpnt;
+ unsigned long offset;
+ unsigned long addr, old_addr = 0;
++ unsigned long flags;
+ pgoff_t pgoff;
+
+ if (mapping && !mapping_mapped(mapping)) {
+@@ -346,7 +344,7 @@ void flush_dcache_page(struct page *page)
+ * declared as MAP_PRIVATE or MAP_SHARED), so we only need
+ * to flush one address here for them all to become coherent */
+
+- flush_dcache_mmap_lock(mapping);
++ flush_dcache_mmap_lock_irqsave(mapping, flags);
+ vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
+ offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
+ addr = mpnt->vm_start + offset;
+@@ -369,7 +367,7 @@ void flush_dcache_page(struct page *page)
+ old_addr = addr;
+ }
+ }
+- flush_dcache_mmap_unlock(mapping);
++ flush_dcache_mmap_unlock_irqrestore(mapping, flags);
+ }
+ EXPORT_SYMBOL(flush_dcache_page);
+
+diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
+index 776d624a7207b..e7ee0c0c91d35 100644
+--- a/arch/parisc/kernel/drivers.c
++++ b/arch/parisc/kernel/drivers.c
+@@ -520,7 +520,6 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path)
+ dev->id.hversion_rev = iodc_data[1] & 0x0f;
+ dev->id.sversion = ((iodc_data[4] & 0x0f) << 16) |
+ (iodc_data[5] << 8) | iodc_data[6];
+- dev->hpa.name = parisc_pathname(dev);
+ dev->hpa.start = hpa;
+ /* This is awkward. The STI spec says that gfx devices may occupy
+ * 32MB or 64MB. Unfortunately, we don't know how to tell whether
+@@ -534,10 +533,10 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path)
+ dev->hpa.end = hpa + 0xfff;
+ }
+ dev->hpa.flags = IORESOURCE_MEM;
+- name = parisc_hardware_description(&dev->id);
+- if (name) {
+- strlcpy(dev->name, name, sizeof(dev->name));
+- }
++ dev->hpa.name = dev->name;
++ name = parisc_hardware_description(&dev->id) ? : "unknown";
++ snprintf(dev->name, sizeof(dev->name), "%s [%s]",
++ name, parisc_pathname(dev));
+
+ /* Silently fail things like mouse ports which are subsumed within
+ * the keyboard controller
+@@ -883,15 +882,13 @@ void __init walk_central_bus(void)
+ &root);
+ }
+
+-static void print_parisc_device(struct parisc_device *dev)
++static __init void print_parisc_device(struct parisc_device *dev)
+ {
+- char hw_path[64];
+- static int count;
++ static int count __initdata;
+
+- print_pa_hwpath(dev, hw_path);
+- pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
+- ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type,
+- dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
++ pr_info("%d. %s at %pap { type:%d, hv:%#x, sv:%#x, rev:%#x }",
++ ++count, dev->name, &(dev->hpa.start), dev->id.hw_type,
++ dev->id.hversion, dev->id.sversion, dev->id.hversion_rev);
+
+ if (dev->num_addrs) {
+ int k;
+@@ -1080,7 +1077,7 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)
+
+
+
+-static int print_one_device(struct device * dev, void * data)
++static __init int print_one_device(struct device * dev, void * data)
+ {
+ struct parisc_device * pdev = to_parisc_device(dev);
+
+diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
+index 9f939afe6b88c..437c8d31f3907 100644
+--- a/arch/parisc/kernel/entry.S
++++ b/arch/parisc/kernel/entry.S
+@@ -1834,8 +1834,8 @@ syscall_restore:
+ LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
+
+ /* Are we being ptraced? */
+- ldw TASK_FLAGS(%r1),%r19
+- ldi _TIF_SYSCALL_TRACE_MASK,%r2
++ LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19
++ ldi _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2
+ and,COND(=) %r19,%r2,%r0
+ b,n syscall_restore_rfi
+
+diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
+index 7034227dbdf32..5385e0fe98426 100644
+--- a/arch/parisc/kernel/firmware.c
++++ b/arch/parisc/kernel/firmware.c
+@@ -1158,15 +1158,18 @@ int __init pdc_soft_power_info(unsigned long *power_reg)
+ }
+
+ /*
+- * pdc_soft_power_button - Control the soft power button behaviour
+- * @sw_control: 0 for hardware control, 1 for software control
++ * pdc_soft_power_button{_panic} - Control the soft power button behaviour
++ * @sw_control: 0 for hardware control, 1 for software control
+ *
+ *
+ * This PDC function places the soft power button under software or
+ * hardware control.
+- * Under software control the OS may control to when to allow to shut
+- * down the system. Under hardware control pressing the power button
++ * Under software control the OS may control to when to allow to shut
++ * down the system. Under hardware control pressing the power button
+ * powers off the system immediately.
++ *
++ * The _panic version relies on spin_trylock to prevent deadlock
++ * on panic path.
+ */
+ int pdc_soft_power_button(int sw_control)
+ {
+@@ -1180,6 +1183,22 @@ int pdc_soft_power_button(int sw_control)
+ return retval;
+ }
+
++int pdc_soft_power_button_panic(int sw_control)
++{
++ int retval;
++ unsigned long flags;
++
++ if (!spin_trylock_irqsave(&pdc_lock, flags)) {
++ pr_emerg("Couldn't enable soft power button\n");
++ return -EBUSY; /* ignored by the panic notifier */
++ }
++
++ retval = mem_pdc_call(PDC_SOFT_POWER, PDC_SOFT_POWER_ENABLE, __pa(pdc_result), sw_control);
++ spin_unlock_irqrestore(&pdc_lock, flags);
++
++ return retval;
++}
++
+ /*
+ * pdc_io_reset - Hack to avoid overlapping range registers of Bridges devices.
+ * Primarily a problem on T600 (which parisc-linux doesn't support) but
+@@ -1230,7 +1249,7 @@ static char __attribute__((aligned(64))) iodc_dbuf[4096];
+ */
+ int pdc_iodc_print(const unsigned char *str, unsigned count)
+ {
+- unsigned int i;
++ unsigned int i, found = 0;
+ unsigned long flags;
+
+ for (i = 0; i < count;) {
+@@ -1239,6 +1258,7 @@ int pdc_iodc_print(const unsigned char *str, unsigned count)
+ iodc_dbuf[i+0] = '\r';
+ iodc_dbuf[i+1] = '\n';
+ i += 2;
++ found = 1;
+ goto print;
+ default:
+ iodc_dbuf[i] = str[i];
+@@ -1255,7 +1275,7 @@ print:
+ __pa(iodc_retbuf), 0, __pa(iodc_dbuf), i, 0);
+ spin_unlock_irqrestore(&pdc_lock, flags);
+
+- return i;
++ return i - found;
+ }
+
+ #if !defined(BOOTLOADER)
+diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
+index aa93d775c34db..598d0938449da 100644
+--- a/arch/parisc/kernel/head.S
++++ b/arch/parisc/kernel/head.S
+@@ -22,7 +22,7 @@
+ #include <linux/init.h>
+ #include <linux/pgtable.h>
+
+- .level PA_ASM_LEVEL
++ .level 1.1
+
+ __INITDATA
+ ENTRY(boot_args)
+@@ -69,6 +69,47 @@ $bss_loop:
+ stw,ma %arg2,4(%r1)
+ stw,ma %arg3,4(%r1)
+
++#if !defined(CONFIG_64BIT) && defined(CONFIG_PA20)
++ /* This 32-bit kernel was compiled for PA2.0 CPUs. Check current CPU
++ * and halt kernel if we detect a PA1.x CPU. */
++ ldi 32,%r10
++ mtctl %r10,%cr11
++ .level 2.0
++ mfctl,w %cr11,%r10
++ .level 1.1
++ comib,<>,n 0,%r10,$cpu_ok
++
++ load32 PA(msg1),%arg0
++ ldi msg1_end-msg1,%arg1
++$iodc_panic:
++ copy %arg0, %r10
++ copy %arg1, %r11
++ load32 PA(init_stack),%sp
++#define MEM_CONS 0x3A0
++ ldw MEM_CONS+32(%r0),%arg0 // HPA
++ ldi ENTRY_IO_COUT,%arg1
++ ldw MEM_CONS+36(%r0),%arg2 // SPA
++ ldw MEM_CONS+8(%r0),%arg3 // layers
++ load32 PA(__bss_start),%r1
++ stw %r1,-52(%sp) // arg4
++ stw %r0,-56(%sp) // arg5
++ stw %r10,-60(%sp) // arg6 = ptr to text
++ stw %r11,-64(%sp) // arg7 = len
++ stw %r0,-68(%sp) // arg8
++ load32 PA(.iodc_panic_ret), %rp
++ ldw MEM_CONS+40(%r0),%r1 // ENTRY_IODC
++ bv,n (%r1)
++.iodc_panic_ret:
++ b . /* wait endless with ... */
++ or %r10,%r10,%r10 /* qemu idle sleep */
++msg1: .ascii "Can't boot kernel which was built for PA8x00 CPUs on this machine.\r\n"
++msg1_end:
++
++$cpu_ok:
++#endif
++
++ .level PA_ASM_LEVEL
++
+ /* Initialize startup VM. Just map first 16/32 MB of memory */
+ load32 PA(swapper_pg_dir),%r4
+ mtctl %r4,%cr24 /* Initialize kernel root pointer */
+diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
+index 0d46b19dc4d3d..e6cc38ef69458 100644
+--- a/arch/parisc/kernel/irq.c
++++ b/arch/parisc/kernel/irq.c
+@@ -333,7 +333,7 @@ unsigned long txn_affinity_addr(unsigned int irq, int cpu)
+ {
+ #ifdef CONFIG_SMP
+ struct irq_data *d = irq_get_irq_data(irq);
+- cpumask_copy(irq_data_get_affinity_mask(d), cpumask_of(cpu));
++ irq_data_update_affinity(d, cpumask_of(cpu));
+ #endif
+
+ return per_cpu(cpu_data, cpu).txn_addr;
+diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c
+index 80a0ab372802d..e59574f65e641 100644
+--- a/arch/parisc/kernel/patch.c
++++ b/arch/parisc/kernel/patch.c
+@@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags,
+
+ *need_unmap = 1;
+ set_fixmap(fixmap, page_to_phys(page));
+- if (flags)
+- raw_spin_lock_irqsave(&patch_lock, *flags);
+- else
+- __acquire(&patch_lock);
++ raw_spin_lock_irqsave(&patch_lock, *flags);
+
+ return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+ }
+@@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
+ {
+ clear_fixmap(fixmap);
+
+- if (flags)
+- raw_spin_unlock_irqrestore(&patch_lock, *flags);
+- else
+- __release(&patch_lock);
++ raw_spin_unlock_irqrestore(&patch_lock, *flags);
+ }
+
+ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
+@@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
+ int mapped;
+
+ /* Make sure we don't have any aliases in cache */
+- flush_kernel_vmap_range(addr, len);
+- flush_icache_range(start, end);
++ flush_kernel_dcache_range_asm(start, end);
++ flush_kernel_icache_range_asm(start, end);
++ flush_tlb_kernel_range(start, end);
+
+ p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped);
+
+@@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
+ * We're crossing a page boundary, so
+ * need to remap
+ */
+- flush_kernel_vmap_range((void *)fixmap,
+- (p-fixmap) * sizeof(*p));
++ flush_kernel_dcache_range_asm((unsigned long)fixmap,
++ (unsigned long)p);
++ flush_tlb_kernel_range((unsigned long)fixmap,
++ (unsigned long)p);
+ if (mapped)
+ patch_unmap(FIX_TEXT_POKE0, &flags);
+ p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags,
+@@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len)
+ }
+ }
+
+- flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p));
++ flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p);
++ flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p);
+ if (mapped)
+ patch_unmap(FIX_TEXT_POKE0, &flags);
+- flush_icache_range(start, end);
+ }
+
+ void __kprobes __patch_text(void *addr, u32 insn)
+diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
+index 36a57aa38e87e..3b0227b17c070 100644
+--- a/arch/parisc/kernel/pci-dma.c
++++ b/arch/parisc/kernel/pci-dma.c
+@@ -446,11 +446,27 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
+ {
++ /*
++ * fdc: The data cache line is written back to memory, if and only if
++ * it is dirty, and then invalidated from the data cache.
++ */
+ flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
+ }
+
+ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
+ {
+- flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
++ unsigned long addr = (unsigned long) phys_to_virt(paddr);
++
++ switch (dir) {
++ case DMA_TO_DEVICE:
++ case DMA_BIDIRECTIONAL:
++ flush_kernel_dcache_range(addr, size);
++ return;
++ case DMA_FROM_DEVICE:
++ purge_kernel_dcache_range_asm(addr, addr + size);
++ return;
++ default:
++ BUG();
++ }
+ }
+diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
+index 38ec4ae812396..4f36c16aec860 100644
+--- a/arch/parisc/kernel/process.c
++++ b/arch/parisc/kernel/process.c
+@@ -120,13 +120,18 @@ void machine_power_off(void)
+ /* It seems we have no way to power the system off via
+ * software. The user has to press the button himself. */
+
+- printk(KERN_EMERG "System shut down completed.\n"
+- "Please power this system off now.");
++ printk("Power off or press RETURN to reboot.\n");
+
+ /* prevent soft lockup/stalled CPU messages for endless loop. */
+ rcu_sysrq_start();
+ lockup_detector_soft_poweroff();
+- for (;;);
++ while (1) {
++ /* reboot if user presses RETURN key */
++ if (pdc_iodc_getc() == 13) {
++ printk("Rebooting...\n");
++ machine_restart(NULL);
++ }
++ }
+ }
+
+ void (*pm_power_off)(void);
+diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
+index 1b6129e7d776b..ccdbcfdfe4e21 100644
+--- a/arch/parisc/kernel/processor.c
++++ b/arch/parisc/kernel/processor.c
+@@ -372,10 +372,18 @@ int
+ show_cpuinfo (struct seq_file *m, void *v)
+ {
+ unsigned long cpu;
++ char cpu_name[60], *p;
++
++ /* strip PA path from CPU name to not confuse lscpu */
++ strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
++ p = strrchr(cpu_name, '[');
++ if (p)
++ *(--p) = 0;
+
+ for_each_online_cpu(cpu) {
+- const struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu);
+ #ifdef CONFIG_SMP
++ const struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu);
++
+ if (0 == cpuinfo->hpa)
+ continue;
+ #endif
+@@ -418,11 +426,9 @@ show_cpuinfo (struct seq_file *m, void *v)
+ }
+ seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities);
+
+- seq_printf(m, "model\t\t: %s\n"
+- "model name\t: %s\n",
++ seq_printf(m, "model\t\t: %s - %s\n",
+ boot_cpu_data.pdc.sys_model_name,
+- cpuinfo->dev ?
+- cpuinfo->dev->name : "Unknown");
++ cpu_name);
+
+ seq_printf(m, "hversion\t: 0x%08x\n"
+ "sversion\t: 0x%08x\n",
+diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
+index 65de6c4c9354d..b9398e805978d 100644
+--- a/arch/parisc/kernel/ptrace.c
++++ b/arch/parisc/kernel/ptrace.c
+@@ -127,6 +127,12 @@ long arch_ptrace(struct task_struct *child, long request,
+ unsigned long tmp;
+ long ret = -EIO;
+
++ unsigned long user_regs_struct_size = sizeof(struct user_regs_struct);
++#ifdef CONFIG_64BIT
++ if (is_compat_task())
++ user_regs_struct_size /= 2;
++#endif
++
+ switch (request) {
+
+ /* Read the word at location addr in the USER area. For ptraced
+@@ -182,14 +188,14 @@ long arch_ptrace(struct task_struct *child, long request,
+ return copy_regset_to_user(child,
+ task_user_regset_view(current),
+ REGSET_GENERAL,
+- 0, sizeof(struct user_regs_struct),
++ 0, user_regs_struct_size,
+ datap);
+
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child,
+ task_user_regset_view(current),
+ REGSET_GENERAL,
+- 0, sizeof(struct user_regs_struct),
++ 0, user_regs_struct_size,
+ datap);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state. */
+@@ -303,6 +309,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ }
+ }
+ break;
++ case PTRACE_GETREGS:
++ case PTRACE_SETREGS:
++ case PTRACE_GETFPREGS:
++ case PTRACE_SETFPREGS:
++ return arch_ptrace(child, request, addr, data);
+
+ default:
+ ret = compat_ptrace_request(child, request, addr, data);
+diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
+index 2b16d8d6598f1..c37010a135865 100644
+--- a/arch/parisc/kernel/real2.S
++++ b/arch/parisc/kernel/real2.S
+@@ -248,9 +248,6 @@ ENTRY_CFI(real64_call_asm)
+ /* save fn */
+ copy %arg2, %r31
+
+- /* set up the new ap */
+- ldo 64(%arg1), %r29
+-
+ /* load up the arg registers from the saved arg area */
+ /* 32-bit calling convention passes first 4 args in registers */
+ ldd 0*REG_SZ(%arg1), %arg0 /* note overwriting arg0 */
+@@ -262,7 +259,9 @@ ENTRY_CFI(real64_call_asm)
+ ldd 7*REG_SZ(%arg1), %r19
+ ldd 1*REG_SZ(%arg1), %arg1 /* do this one last! */
+
++ /* set up real-mode stack and real-mode ap */
+ tophys_r1 %sp
++ ldo -16(%sp), %r29 /* Reference param save area */
+
+ b,l rfi_virt2real,%r2
+ nop
+diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
+index cceb09855e03f..3fb86ee507dd5 100644
+--- a/arch/parisc/kernel/setup.c
++++ b/arch/parisc/kernel/setup.c
+@@ -150,6 +150,8 @@ void __init setup_arch(char **cmdline_p)
+ #ifdef CONFIG_PA11
+ dma_ops_init();
+ #endif
++
++ clear_sched_clock_stable();
+ }
+
+ /*
+diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
+index bbfe23c40c016..0fb06d87b3a5c 100644
+--- a/arch/parisc/kernel/signal.c
++++ b/arch/parisc/kernel/signal.c
+@@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
+ already in userspace. The first words of tramp are used to
+ save the previous sigrestartblock trampoline that might be
+ on the stack. We start the sigreturn trampoline at
+- SIGRESTARTBLOCK_TRAMP. */
++ SIGRESTARTBLOCK_TRAMP+X. */
+ err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
+ &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
+- err |= __put_user(INSN_BLE_SR2_R0,
+- &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
+ err |= __put_user(INSN_LDI_R20,
++ &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
++ err |= __put_user(INSN_BLE_SR2_R0,
+ &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
++ err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
+
+- start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
+- end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
++ start = (unsigned long) &frame->tramp[0];
++ end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+ flush_user_dcache_range_asm(start, end);
+ flush_user_icache_range_asm(start, end);
+
+ /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
+- * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
++ * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
+ * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
+ */
+ rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
+diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
+index a5bdbb5678b72..f166250f2d064 100644
+--- a/arch/parisc/kernel/signal32.h
++++ b/arch/parisc/kernel/signal32.h
+@@ -36,7 +36,7 @@ struct compat_regfile {
+ compat_int_t rf_sar;
+ };
+
+-#define COMPAT_SIGRETURN_TRAMP 3
++#define COMPAT_SIGRETURN_TRAMP 4
+ #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
+ #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
+ COMPAT_SIGRESTARTBLOCK_TRAMP)
+diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
+index 1405b603b91b6..cf92ece20b757 100644
+--- a/arch/parisc/kernel/smp.c
++++ b/arch/parisc/kernel/smp.c
+@@ -29,6 +29,7 @@
+ #include <linux/bitops.h>
+ #include <linux/ftrace.h>
+ #include <linux/cpu.h>
++#include <linux/kgdb.h>
+
+ #include <linux/atomic.h>
+ #include <asm/current.h>
+@@ -69,7 +70,10 @@ enum ipi_message_type {
+ IPI_CALL_FUNC,
+ IPI_CPU_START,
+ IPI_CPU_STOP,
+- IPI_CPU_TEST
++ IPI_CPU_TEST,
++#ifdef CONFIG_KGDB
++ IPI_ENTER_KGDB,
++#endif
+ };
+
+
+@@ -167,7 +171,12 @@ ipi_interrupt(int irq, void *dev_id)
+ case IPI_CPU_TEST:
+ smp_debug(100, KERN_DEBUG "CPU%d is alive!\n", this_cpu);
+ break;
+-
++#ifdef CONFIG_KGDB
++ case IPI_ENTER_KGDB:
++ smp_debug(100, KERN_DEBUG "CPU%d ENTER_KGDB\n", this_cpu);
++ kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
++ break;
++#endif
+ default:
+ printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n",
+ this_cpu, which);
+@@ -226,6 +235,12 @@ send_IPI_allbutself(enum ipi_message_type op)
+ }
+ }
+
++#ifdef CONFIG_KGDB
++void kgdb_roundup_cpus(void)
++{
++ send_IPI_allbutself(IPI_ENTER_KGDB);
++}
++#endif
+
+ inline void
+ smp_send_stop(void) { send_IPI_allbutself(IPI_CPU_STOP); }
+diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
+index 5f12537318ab2..31950882e272f 100644
+--- a/arch/parisc/kernel/sys_parisc.c
++++ b/arch/parisc/kernel/sys_parisc.c
+@@ -463,3 +463,30 @@ asmlinkage long parisc_inotify_init1(int flags)
+ flags = FIX_O_NONBLOCK(flags);
+ return sys_inotify_init1(flags);
+ }
++
++/*
++ * madvise() wrapper
++ *
++ * Up to kernel v6.1 parisc has different values than all other
++ * platforms for the MADV_xxx flags listed below.
++ * To keep binary compatibility with existing userspace programs
++ * translate the former values to the new values.
++ *
++ * XXX: Remove this wrapper in year 2025 (or later)
++ */
++
++asmlinkage notrace long parisc_madvise(unsigned long start, size_t len_in, int behavior)
++{
++ switch (behavior) {
++ case 65: behavior = MADV_MERGEABLE; break;
++ case 66: behavior = MADV_UNMERGEABLE; break;
++ case 67: behavior = MADV_HUGEPAGE; break;
++ case 68: behavior = MADV_NOHUGEPAGE; break;
++ case 69: behavior = MADV_DONTDUMP; break;
++ case 70: behavior = MADV_DODUMP; break;
++ case 71: behavior = MADV_WIPEONFORK; break;
++ case 72: behavior = MADV_KEEPONFORK; break;
++ }
++
++ return sys_madvise(start, len_in, behavior);
++}
+diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
+index 3f24a0af1e047..9842dcb2041e5 100644
+--- a/arch/parisc/kernel/syscall.S
++++ b/arch/parisc/kernel/syscall.S
+@@ -478,7 +478,7 @@ lws_start:
+ extrd,u %r1,PSW_W_BIT,1,%r1
+ /* sp must be aligned on 4, so deposit the W bit setting into
+ * the bottom of sp temporarily */
+- or,ev %r1,%r30,%r30
++ or,od %r1,%r30,%r30
+
+ /* Clip LWS number to a 32-bit value for 32-bit processes */
+ depdi 0, 31, 32, %r20
+diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
+index bf751e0732b70..50c759f11c25d 100644
+--- a/arch/parisc/kernel/syscalls/syscall.tbl
++++ b/arch/parisc/kernel/syscalls/syscall.tbl
+@@ -131,7 +131,7 @@
+ 116 common sysinfo sys_sysinfo compat_sys_sysinfo
+ 117 common shutdown sys_shutdown
+ 118 common fsync sys_fsync
+-119 common madvise sys_madvise
++119 common madvise parisc_madvise
+ 120 common clone sys_clone_wrapper
+ 121 common setdomainname sys_setdomainname
+ 122 common sendfile sys_sendfile compat_sys_sendfile
+@@ -413,7 +413,7 @@
+ 412 32 utimensat_time64 sys_utimensat sys_utimensat
+ 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+ 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
++416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
+ 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+ 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
+ 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
+diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
+index 9fb1e794831b0..d8e59a1000ab7 100644
+--- a/arch/parisc/kernel/time.c
++++ b/arch/parisc/kernel/time.c
+@@ -249,30 +249,12 @@ void __init time_init(void)
+ static int __init init_cr16_clocksource(void)
+ {
+ /*
+- * The cr16 interval timers are not syncronized across CPUs on
+- * different sockets, so mark them unstable and lower rating on
+- * multi-socket SMP systems.
++ * The cr16 interval timers are not synchronized across CPUs.
+ */
+ if (num_online_cpus() > 1 && !running_on_qemu) {
+- int cpu;
+- unsigned long cpu0_loc;
+- cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
+-
+- for_each_online_cpu(cpu) {
+- if (cpu == 0)
+- continue;
+- if ((cpu0_loc != 0) &&
+- (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
+- continue;
+-
+- /* mark sched_clock unstable */
+- clear_sched_clock_stable();
+-
+- clocksource_cr16.name = "cr16_unstable";
+- clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
+- clocksource_cr16.rating = 0;
+- break;
+- }
++ clocksource_cr16.name = "cr16_unstable";
++ clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
++ clocksource_cr16.rating = 0;
+ }
+
+ /* register at clocksource framework */
+diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
+index 747c328fb8862..dc1bc77b9fa69 100644
+--- a/arch/parisc/kernel/traps.c
++++ b/arch/parisc/kernel/traps.c
+@@ -268,7 +268,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
+ panic("Fatal exception");
+
+ oops_exit();
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ /* gdb uses break 4,8 */
+@@ -305,8 +305,8 @@ static void handle_break(struct pt_regs *regs)
+ #endif
+
+ #ifdef CONFIG_KGDB
+- if (unlikely(iir == PARISC_KGDB_COMPILED_BREAK_INSN ||
+- iir == PARISC_KGDB_BREAK_INSN)) {
++ if (unlikely((iir == PARISC_KGDB_COMPILED_BREAK_INSN ||
++ iir == PARISC_KGDB_BREAK_INSN)) && !user_mode(regs)) {
+ kgdb_handle_exception(9, SIGTRAP, 0, regs);
+ return;
+ }
+@@ -661,6 +661,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
+ by hand. Technically we need to emulate:
+ fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw
+ */
++ if (code == 17 && handle_nadtlb_fault(regs))
++ return;
+ fault_address = regs->ior;
+ fault_space = regs->isr;
+ break;
+@@ -729,6 +731,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
+ }
+ mmap_read_unlock(current->mm);
+ }
++ /* CPU could not fetch instruction, so clear stale IIR value. */
++ regs->iir = 0xbaadf00d;
+ fallthrough;
+ case 27:
+ /* Data memory protection ID trap */
+@@ -782,7 +786,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
+ * unless pagefault_disable() was called before.
+ */
+
+- if (fault_space == 0 && !faulthandler_disabled())
++ if (faulthandler_disabled() || fault_space == 0)
+ {
+ /* Clean up and return if in exception table. */
+ if (fixup_exception(regs))
+diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
+index 237d20dd5622d..cc6ed74960501 100644
+--- a/arch/parisc/kernel/unaligned.c
++++ b/arch/parisc/kernel/unaligned.c
+@@ -107,7 +107,7 @@
+ #define R1(i) (((i)>>21)&0x1f)
+ #define R2(i) (((i)>>16)&0x1f)
+ #define R3(i) ((i)&0x1f)
+-#define FR3(i) ((((i)<<1)&0x1f)|(((i)>>6)&1))
++#define FR3(i) ((((i)&0x1f)<<1)|(((i)>>6)&1))
+ #define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0))
+ #define IM5_2(i) IM((i)>>16,5)
+ #define IM5_3(i) IM((i),5)
+@@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
+ : "r" (val), "r" (regs->ior), "r" (regs->isr)
+ : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
+
+- return 0;
++ return ret;
+ }
+ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
+ {
+@@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
+ __asm__ __volatile__ (
+ " mtsp %4, %%sr1\n"
+ " zdep %2, 29, 2, %%r19\n"
+-" dep %%r0, 31, 2, %2\n"
++" dep %%r0, 31, 2, %3\n"
+ " mtsar %%r19\n"
+ " zvdepi -2, 32, %%r19\n"
+ "1: ldw 0(%%sr1,%3),%%r20\n"
+@@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
+ " andcm %%r21, %%r19, %%r21\n"
+ " or %1, %%r20, %1\n"
+ " or %2, %%r21, %2\n"
+-"3: stw %1,0(%%sr1,%1)\n"
++"3: stw %1,0(%%sr1,%3)\n"
+ "4: stw %%r1,4(%%sr1,%3)\n"
+ "5: stw %2,8(%%sr1,%3)\n"
+ " copy %%r0, %0\n"
+@@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs)
+ ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */
+ break;
+ }
+-#ifdef CONFIG_PA20
+ switch (regs->iir & OPCODE2_MASK)
+ {
+ case OPCODE_FLDD_L:
+@@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs)
+ flop=1;
+ ret = emulate_std(regs, R2(regs->iir),1);
+ break;
++#ifdef CONFIG_PA20
+ case OPCODE_LDD_L:
+ ret = emulate_ldd(regs, R2(regs->iir),0);
+ break;
+ case OPCODE_STD_L:
+ ret = emulate_std(regs, R2(regs->iir),0);
+ break;
+- }
+ #endif
++ }
+ switch (regs->iir & OPCODE3_MASK)
+ {
+ case OPCODE_FLDW_L:
+ flop=1;
+- ret = emulate_ldw(regs, R2(regs->iir),0);
++ ret = emulate_ldw(regs, R2(regs->iir), 1);
+ break;
+ case OPCODE_LDW_M:
+- ret = emulate_ldw(regs, R2(regs->iir),1);
++ ret = emulate_ldw(regs, R2(regs->iir), 0);
+ break;
+
+ case OPCODE_FSTW_L:
+diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
+index 87ae476d1c4f5..86a57fb0e6fae 100644
+--- a/arch/parisc/kernel/unwind.c
++++ b/arch/parisc/kernel/unwind.c
+@@ -21,6 +21,8 @@
+ #include <asm/ptrace.h>
+
+ #include <asm/unwind.h>
++#include <asm/switch_to.h>
++#include <asm/sections.h>
+
+ /* #define DEBUG 1 */
+ #ifdef DEBUG
+@@ -203,6 +205,11 @@ int __init unwind_init(void)
+ return 0;
+ }
+
++static bool pc_is_kernel_fn(unsigned long pc, void *fn)
++{
++ return (unsigned long)dereference_kernel_function_descriptor(fn) == pc;
++}
++
+ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int frame_size)
+ {
+ /*
+@@ -221,7 +228,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
+ extern void * const _call_on_stack;
+ #endif /* CONFIG_IRQSTACKS */
+
+- if (pc == (unsigned long) &handle_interruption) {
++ if (pc_is_kernel_fn(pc, handle_interruption)) {
+ struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN);
+ dbg("Unwinding through handle_interruption()\n");
+ info->prev_sp = regs->gr[30];
+@@ -229,13 +236,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
+ return 1;
+ }
+
+- if (pc == (unsigned long) &ret_from_kernel_thread ||
+- pc == (unsigned long) &syscall_exit) {
++ if (pc_is_kernel_fn(pc, ret_from_kernel_thread) ||
++ pc_is_kernel_fn(pc, syscall_exit)) {
+ info->prev_sp = info->prev_ip = 0;
+ return 1;
+ }
+
+- if (pc == (unsigned long) &intr_return) {
++ if (pc_is_kernel_fn(pc, intr_return)) {
+ struct pt_regs *regs;
+
+ dbg("Found intr_return()\n");
+@@ -246,20 +253,20 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
+ return 1;
+ }
+
+- if (pc == (unsigned long) &_switch_to_ret) {
++ if (pc_is_kernel_fn(pc, _switch_to) ||
++ pc_is_kernel_fn(pc, _switch_to_ret)) {
+ info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE;
+ info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET);
+ return 1;
+ }
+
+ #ifdef CONFIG_IRQSTACKS
+- if (pc == (unsigned long) &_call_on_stack) {
++ if (pc_is_kernel_fn(pc, _call_on_stack)) {
+ info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ);
+ info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET);
+ return 1;
+ }
+ #endif
+-
+ return 0;
+ }
+
+diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
+index 367f6397bda7a..8603850580857 100644
+--- a/arch/parisc/lib/iomap.c
++++ b/arch/parisc/lib/iomap.c
+@@ -346,6 +346,16 @@ u64 ioread64be(const void __iomem *addr)
+ return *((u64 *)addr);
+ }
+
++u64 ioread64_lo_hi(const void __iomem *addr)
++{
++ u32 low, high;
++
++ low = ioread32(addr);
++ high = ioread32(addr + sizeof(u32));
++
++ return low + ((u64)high << 32);
++}
++
+ u64 ioread64_hi_lo(const void __iomem *addr)
+ {
+ u32 low, high;
+@@ -419,6 +429,12 @@ void iowrite64be(u64 datum, void __iomem *addr)
+ }
+ }
+
++void iowrite64_lo_hi(u64 val, void __iomem *addr)
++{
++ iowrite32(val, addr);
++ iowrite32(val >> 32, addr + sizeof(u32));
++}
++
+ void iowrite64_hi_lo(u64 val, void __iomem *addr)
+ {
+ iowrite32(val >> 32, addr + sizeof(u32));
+@@ -530,6 +546,7 @@ EXPORT_SYMBOL(ioread32);
+ EXPORT_SYMBOL(ioread32be);
+ EXPORT_SYMBOL(ioread64);
+ EXPORT_SYMBOL(ioread64be);
++EXPORT_SYMBOL(ioread64_lo_hi);
+ EXPORT_SYMBOL(ioread64_hi_lo);
+ EXPORT_SYMBOL(iowrite8);
+ EXPORT_SYMBOL(iowrite16);
+@@ -538,6 +555,7 @@ EXPORT_SYMBOL(iowrite32);
+ EXPORT_SYMBOL(iowrite32be);
+ EXPORT_SYMBOL(iowrite64);
+ EXPORT_SYMBOL(iowrite64be);
++EXPORT_SYMBOL(iowrite64_lo_hi);
+ EXPORT_SYMBOL(iowrite64_hi_lo);
+ EXPORT_SYMBOL(ioread8_rep);
+ EXPORT_SYMBOL(ioread16_rep);
+diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
+index 716960f5d92ea..5faa3cff47387 100644
+--- a/arch/parisc/mm/fault.c
++++ b/arch/parisc/mm/fault.c
+@@ -424,3 +424,92 @@ no_context:
+ goto no_context;
+ pagefault_out_of_memory();
+ }
++
++/* Handle non-access data TLB miss faults.
++ *
++ * For probe instructions, accesses to userspace are considered allowed
++ * if they lie in a valid VMA and the access type matches. We are not
++ * allowed to handle MM faults here so there may be situations where an
++ * actual access would fail even though a probe was successful.
++ */
++int
++handle_nadtlb_fault(struct pt_regs *regs)
++{
++ unsigned long insn = regs->iir;
++ int breg, treg, xreg, val = 0;
++ struct vm_area_struct *vma, *prev_vma;
++ struct task_struct *tsk;
++ struct mm_struct *mm;
++ unsigned long address;
++ unsigned long acc_type;
++
++ switch (insn & 0x380) {
++ case 0x280:
++ /* FDC instruction */
++ fallthrough;
++ case 0x380:
++ /* PDC and FIC instructions */
++ if (printk_ratelimit()) {
++ pr_warn("BUG: nullifying cache flush/purge instruction\n");
++ show_regs(regs);
++ }
++ if (insn & 0x20) {
++ /* Base modification */
++ breg = (insn >> 21) & 0x1f;
++ xreg = (insn >> 16) & 0x1f;
++ if (breg && xreg)
++ regs->gr[breg] += regs->gr[xreg];
++ }
++ regs->gr[0] |= PSW_N;
++ return 1;
++
++ case 0x180:
++ /* PROBE instruction */
++ treg = insn & 0x1f;
++ if (regs->isr) {
++ tsk = current;
++ mm = tsk->mm;
++ if (mm) {
++ /* Search for VMA */
++ address = regs->ior;
++ mmap_read_lock(mm);
++ vma = find_vma_prev(mm, address, &prev_vma);
++ mmap_read_unlock(mm);
++
++ /*
++ * Check if access to the VMA is okay.
++ * We don't allow for stack expansion.
++ */
++ acc_type = (insn & 0x40) ? VM_WRITE : VM_READ;
++ if (vma
++ && address >= vma->vm_start
++ && (vma->vm_flags & acc_type) == acc_type)
++ val = 1;
++ }
++ }
++ if (treg)
++ regs->gr[treg] = val;
++ regs->gr[0] |= PSW_N;
++ return 1;
++
++ case 0x300:
++ /* LPA instruction */
++ if (insn & 0x20) {
++ /* Base modification */
++ breg = (insn >> 21) & 0x1f;
++ xreg = (insn >> 16) & 0x1f;
++ if (breg && xreg)
++ regs->gr[breg] += regs->gr[xreg];
++ }
++ treg = insn & 0x1f;
++ if (treg)
++ regs->gr[treg] = 0;
++ regs->gr[0] |= PSW_N;
++ return 1;
++
++ default:
++ break;
++ }
++
++ return 0;
++}
+diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
+index 24426a7e1a5e5..cc15d737fda64 100644
+--- a/arch/parisc/mm/fixmap.c
++++ b/arch/parisc/mm/fixmap.c
+@@ -20,12 +20,9 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+ pte_t *pte;
+
+ if (pmd_none(*pmd))
+- pmd = pmd_alloc(NULL, pud, vaddr);
+-
+- pte = pte_offset_kernel(pmd, vaddr);
+- if (pte_none(*pte))
+ pte = pte_alloc_kernel(pmd, vaddr);
+
++ pte = pte_offset_kernel(pmd, vaddr);
+ set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX));
+ flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+ }
+diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
+index 3f7d6d5b56ac8..e5c18313b5d4f 100644
+--- a/arch/parisc/mm/init.c
++++ b/arch/parisc/mm/init.c
+@@ -341,9 +341,9 @@ static void __init setup_bootmem(void)
+
+ static bool kernel_set_to_readonly;
+
+-static void __init map_pages(unsigned long start_vaddr,
+- unsigned long start_paddr, unsigned long size,
+- pgprot_t pgprot, int force)
++static void __ref map_pages(unsigned long start_vaddr,
++ unsigned long start_paddr, unsigned long size,
++ pgprot_t pgprot, int force)
+ {
+ pmd_t *pmd;
+ pte_t *pg_table;
+@@ -453,7 +453,7 @@ void __init set_kernel_text_rw(int enable_read_write)
+ flush_tlb_all();
+ }
+
+-void __ref free_initmem(void)
++void free_initmem(void)
+ {
+ unsigned long init_begin = (unsigned long)__init_begin;
+ unsigned long init_end = (unsigned long)__init_end;
+@@ -467,7 +467,6 @@ void __ref free_initmem(void)
+ /* The init text pages are marked R-X. We have to
+ * flush the icache and mark them RW-
+ *
+- * This is tricky, because map_pages is in the init section.
+ * Do a dummy remap of the data section first (the data
+ * section is already PAGE_KERNEL) to pull in the TLB entries
+ * for map_kernel */
+@@ -842,9 +841,9 @@ void flush_tlb_all(void)
+ {
+ int do_recycle;
+
+- __inc_irq_stat(irq_tlb_count);
+ do_recycle = 0;
+ spin_lock(&sid_lock);
++ __inc_irq_stat(irq_tlb_count);
+ if (dirty_space_ids > RECYCLE_THRESHOLD) {
+ BUG_ON(recycle_inuse); /* FIXME: Use a semaphore/wait queue here */
+ get_dirty_sids(&recycle_ndirty,recycle_dirty_array);
+@@ -863,8 +862,8 @@ void flush_tlb_all(void)
+ #else
+ void flush_tlb_all(void)
+ {
+- __inc_irq_stat(irq_tlb_count);
+ spin_lock(&sid_lock);
++ __inc_irq_stat(irq_tlb_count);
+ flush_tlb_all_local(NULL);
+ recycle_sids();
+ spin_unlock(&sid_lock);
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index ba5b661893588..27222b75d2a4b 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -138,7 +138,7 @@ config PPC
+ select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
+ select ARCH_HAS_SET_MEMORY
+- select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
++ select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION
+ select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32
+ select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAS_UACCESS_FLUSHCACHE
+@@ -150,7 +150,7 @@ config PPC
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_STACKWALK
+ select ARCH_SUPPORTS_ATOMIC_RMW
+- select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64
++ select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
+ select ARCH_USE_BUILTIN_BSWAP
+ select ARCH_USE_CMPXCHG_LOCKREF if PPC64
+ select ARCH_USE_MEMTEST
+@@ -190,7 +190,7 @@ config PPC
+ select HAVE_ARCH_JUMP_LABEL_RELATIVE
+ select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
+ select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
+- select HAVE_ARCH_KFENCE if PPC32
++ select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x
+ select HAVE_ARCH_KGDB
+ select HAVE_ARCH_MMAP_RND_BITS
+ select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+@@ -217,7 +217,6 @@ config PPC
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
+ select HAVE_IOREMAP_PROT
+- select HAVE_IRQ_EXIT_ON_IRQ_STACK
+ select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
+@@ -354,6 +353,10 @@ config ARCH_SUSPEND_NONZERO_CPU
+ def_bool y
+ depends on PPC_POWERNV || PPC_PSERIES
+
++config ARCH_HAS_ADD_PAGES
++ def_bool y
++ depends on ARCH_ENABLE_MEMORY_HOTPLUG
++
+ config PPC_DCR_NATIVE
+ bool
+
+@@ -768,7 +771,6 @@ config THREAD_SHIFT
+ range 13 15
+ default "15" if PPC_256K_PAGES
+ default "14" if PPC64
+- default "14" if KASAN
+ default "13"
+ help
+ Used to define the stack size. The default is almost always what you
+diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
+index 192f0ed0097ff..80ce54f59fae8 100644
+--- a/arch/powerpc/Kconfig.debug
++++ b/arch/powerpc/Kconfig.debug
+@@ -240,7 +240,7 @@ config PPC_EARLY_DEBUG_40x
+
+ config PPC_EARLY_DEBUG_CPM
+ bool "Early serial debugging for Freescale CPM-based serial ports"
+- depends on SERIAL_CPM
++ depends on SERIAL_CPM=y
+ help
+ Select this to enable early debugging for Freescale chips
+ using a CPM-based serial port. This assumes that the bootwrapper
+diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
+index aa6808e706470..3dc75040a7563 100644
+--- a/arch/powerpc/Makefile
++++ b/arch/powerpc/Makefile
+@@ -17,23 +17,6 @@ HAS_BIARCH := $(call cc-option-yn, -m32)
+ # Set default 32 bits cross compilers for vdso and boot wrapper
+ CROSS32_COMPILE ?=
+
+-ifeq ($(HAS_BIARCH),y)
+-ifeq ($(CROSS32_COMPILE),)
+-ifdef CONFIG_PPC32
+-# These options will be overridden by any -mcpu option that the CPU
+-# or platform code sets later on the command line, but they are needed
+-# to set a sane 32-bit cpu target for the 64-bit cross compiler which
+-# may default to the wrong ISA.
+-KBUILD_CFLAGS += -mcpu=powerpc
+-KBUILD_AFLAGS += -mcpu=powerpc
+-endif
+-endif
+-endif
+-
+-ifdef CONFIG_PPC_BOOK3S_32
+-KBUILD_CFLAGS += -mcpu=powerpc
+-endif
+-
+ # If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
+ # ppc64_defconfig because we have nothing better to go on.
+ uname := $(shell uname -m)
+@@ -109,7 +92,7 @@ aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+
+ ifeq ($(HAS_BIARCH),y)
+ KBUILD_CFLAGS += -m$(BITS)
+-KBUILD_AFLAGS += -m$(BITS) -Wl,-a$(BITS)
++KBUILD_AFLAGS += -m$(BITS)
+ KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION)
+ endif
+
+@@ -171,9 +154,9 @@ CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8
+ CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8)
+ else
+ CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5))
+-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4)
++CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4
+ endif
+-else
++else ifdef CONFIG_PPC_BOOK3E_64
+ CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
+ endif
+
+@@ -185,6 +168,7 @@ endif
+ endif
+
+ CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU))
++AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU))
+
+ # Altivec option not allowed with e500mc64 in GCC.
+ ifdef CONFIG_ALTIVEC
+@@ -195,14 +179,6 @@ endif
+ CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU)
+ CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU))
+
+-ifdef CONFIG_PPC32
+-ifdef CONFIG_PPC_E500MC
+-CFLAGS-y += $(call cc-option,-mcpu=e500mc,-mcpu=powerpc)
+-else
+-CFLAGS-$(CONFIG_E500) += $(call cc-option,-mcpu=8540 -msoft-float,-mcpu=powerpc)
+-endif
+-endif
+-
+ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
+
+ KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr)
+@@ -468,3 +444,11 @@ checkbin:
+ echo -n '*** Please use a different binutils version.' ; \
+ false ; \
+ fi
++ @if test "x${CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT}" = "xy" -a \
++ "x${CONFIG_LD_IS_BFD}" = "xy" -a \
++ "${CONFIG_LD_VERSION}" = "23700" ; then \
++ echo -n '*** binutils 2.37 drops unused section symbols, which recordmcount ' ; \
++ echo 'is unable to handle.' ; \
++ echo '*** Please use a different binutils version.' ; \
++ false ; \
++ fi
+diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
+index 089ee3ea55c8a..1d51b9e21172c 100644
+--- a/arch/powerpc/boot/Makefile
++++ b/arch/powerpc/boot/Makefile
+@@ -34,6 +34,7 @@ endif
+
+ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
++ $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \
+ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
+ $(LINUXINCLUDE)
+
+diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
+index 1d83966f5ef64..e8f10a5996593 100644
+--- a/arch/powerpc/boot/crt0.S
++++ b/arch/powerpc/boot/crt0.S
+@@ -226,16 +226,19 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */
+ #ifdef __powerpc64__
+
+ #define PROM_FRAME_SIZE 512
+-#define SAVE_GPR(n, base) std n,8*(n)(base)
+-#define REST_GPR(n, base) ld n,8*(n)(base)
+-#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
+-#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
+-#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
+-#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
+-#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base)
+-#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base)
+-#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
+-#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
++
++.macro OP_REGS op, width, start, end, base, offset
++ .Lreg=\start
++ .rept (\end - \start + 1)
++ \op .Lreg,\offset+\width*.Lreg(\base)
++ .Lreg=.Lreg+1
++ .endr
++.endm
++
++#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, 0
++#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, 0
++#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
++#define REST_GPR(n, base) REST_GPRS(n, n, base)
+
+ /* prom handles the jump into and return from firmware. The prom args pointer
+ is loaded in r3. */
+@@ -246,9 +249,7 @@ prom:
+ stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+ SAVE_GPR(2, r1)
+- SAVE_GPR(13, r1)
+- SAVE_8GPRS(14, r1)
+- SAVE_10GPRS(22, r1)
++ SAVE_GPRS(13, 31, r1)
+ mfcr r10
+ std r10,8*32(r1)
+ mfmsr r10
+@@ -283,9 +284,7 @@ prom:
+
+ /* Restore other registers */
+ REST_GPR(2, r1)
+- REST_GPR(13, r1)
+- REST_8GPRS(14, r1)
+- REST_10GPRS(22, r1)
++ REST_GPRS(13, 31, r1)
+ ld r10,8*32(r1)
+ mtcr r10
+
+diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts
+index 408b486b13dff..cd589539f313f 100644
+--- a/arch/powerpc/boot/dts/charon.dts
++++ b/arch/powerpc/boot/dts/charon.dts
+@@ -35,7 +35,7 @@
+ };
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x08000000>; // 128MB
+ };
+diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
+index 0e5e9d3acf79f..19a14e62e65f4 100644
+--- a/arch/powerpc/boot/dts/digsy_mtc.dts
++++ b/arch/powerpc/boot/dts/digsy_mtc.dts
+@@ -16,7 +16,7 @@
+ model = "intercontrol,digsy-mtc";
+ compatible = "intercontrol,digsy-mtc";
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x02000000>; // 32MB
+ };
+
+diff --git a/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
+new file mode 100644
+index 0000000000000..7e2a90cde72e5
+--- /dev/null
++++ b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
+@@ -0,0 +1,51 @@
++/*
++ * e500v1 Power ISA Device Tree Source (include)
++ *
++ * Copyright 2012 Freescale Semiconductor Inc.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions are met:
++ * * Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * * Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the distribution.
++ * * Neither the name of Freescale Semiconductor nor the
++ * names of its contributors may be used to endorse or promote products
++ * derived from this software without specific prior written permission.
++ *
++ *
++ * ALTERNATIVELY, this software may be distributed under the terms of the
++ * GNU General Public License ("GPL") as published by the Free Software
++ * Foundation, either version 2 of that License or (at your option) any
++ * later version.
++ *
++ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
++ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
++ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++/ {
++ cpus {
++ power-isa-version = "2.03";
++ power-isa-b; // Base
++ power-isa-e; // Embedded
++ power-isa-atb; // Alternate Time Base
++ power-isa-cs; // Cache Specification
++ power-isa-e.le; // Embedded.Little-Endian
++ power-isa-e.pm; // Embedded.Performance Monitor
++ power-isa-ecl; // Embedded Cache Locking
++ power-isa-mmc; // Memory Coherence
++ power-isa-sp; // Signal Processing Engine
++ power-isa-sp.fs; // SPE.Embedded Float Scalar Single
++ power-isa-sp.fv; // SPE.Embedded Float Vector
++ mmu-type = "power-embedded";
++ };
++};
+diff --git a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts
+index 18a885130538a..e03ae130162ba 100644
+--- a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts
++++ b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts
+@@ -7,7 +7,7 @@
+
+ /dts-v1/;
+
+-/include/ "e500v2_power_isa.dtsi"
++/include/ "e500v1_power_isa.dtsi"
+
+ / {
+ model = "MPC8540ADS";
+diff --git a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts
+index ac381e7b1c60e..a2a6c5cf852e9 100644
+--- a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts
++++ b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts
+@@ -7,7 +7,7 @@
+
+ /dts-v1/;
+
+-/include/ "e500v2_power_isa.dtsi"
++/include/ "e500v1_power_isa.dtsi"
+
+ / {
+ model = "MPC8541CDS";
+diff --git a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts
+index 9f58db2a7e661..901b6ff06dfbb 100644
+--- a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts
++++ b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts
+@@ -7,7 +7,7 @@
+
+ /dts-v1/;
+
+-/include/ "e500v2_power_isa.dtsi"
++/include/ "e500v1_power_isa.dtsi"
+
+ / {
+ model = "MPC8555CDS";
+diff --git a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts
+index a24722ccaebf1..c2f9aea78b29f 100644
+--- a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts
++++ b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts
+@@ -7,7 +7,7 @@
+
+ /dts-v1/;
+
+-/include/ "e500v2_power_isa.dtsi"
++/include/ "e500v1_power_isa.dtsi"
+
+ / {
+ model = "MPC8560ADS";
+diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi
+new file mode 100644
+index 0000000000000..437dab3fc0176
+--- /dev/null
++++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi
+@@ -0,0 +1,44 @@
++// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
++/*
++ * QorIQ FMan v3 10g port #2 device tree stub [ controller @ offset 0x400000 ]
++ *
++ * Copyright 2022 Sean Anderson <sean.anderson@seco.com>
++ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
++ */
++
++fman@400000 {
++ fman0_rx_0x08: port@88000 {
++ cell-index = <0x8>;
++ compatible = "fsl,fman-v3-port-rx";
++ reg = <0x88000 0x1000>;
++ fsl,fman-10g-port;
++ };
++
++ fman0_tx_0x28: port@a8000 {
++ cell-index = <0x28>;
++ compatible = "fsl,fman-v3-port-tx";
++ reg = <0xa8000 0x1000>;
++ fsl,fman-10g-port;
++ };
++
++ ethernet@e0000 {
++ cell-index = <0>;
++ compatible = "fsl,fman-memac";
++ reg = <0xe0000 0x1000>;
++ fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
++ ptp-timer = <&ptp_timer0>;
++ pcsphy-handle = <&pcsphy0>;
++ };
++
++ mdio@e1000 {
++ #address-cells = <1>;
++ #size-cells = <0>;
++ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
++ reg = <0xe1000 0x1000>;
++ fsl,erratum-a011043; /* must ignore read errors */
++
++ pcsphy0: ethernet-phy@0 {
++ reg = <0x0>;
++ };
++ };
++};
+diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi
+new file mode 100644
+index 0000000000000..ad116b17850a8
+--- /dev/null
++++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi
+@@ -0,0 +1,44 @@
++// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
++/*
++ * QorIQ FMan v3 10g port #3 device tree stub [ controller @ offset 0x400000 ]
++ *
++ * Copyright 2022 Sean Anderson <sean.anderson@seco.com>
++ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
++ */
++
++fman@400000 {
++ fman0_rx_0x09: port@89000 {
++ cell-index = <0x9>;
++ compatible = "fsl,fman-v3-port-rx";
++ reg = <0x89000 0x1000>;
++ fsl,fman-10g-port;
++ };
++
++ fman0_tx_0x29: port@a9000 {
++ cell-index = <0x29>;
++ compatible = "fsl,fman-v3-port-tx";
++ reg = <0xa9000 0x1000>;
++ fsl,fman-10g-port;
++ };
++
++ ethernet@e2000 {
++ cell-index = <1>;
++ compatible = "fsl,fman-memac";
++ reg = <0xe2000 0x1000>;
++ fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
++ ptp-timer = <&ptp_timer0>;
++ pcsphy-handle = <&pcsphy1>;
++ };
++
++ mdio@e3000 {
++ #address-cells = <1>;
++ #size-cells = <0>;
++ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
++ reg = <0xe3000 0x1000>;
++ fsl,erratum-a011043; /* must ignore read errors */
++
++ pcsphy1: ethernet-phy@0 {
++ reg = <0x0>;
++ };
++ };
++};
+diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
+index c90702b04a530..48e5cd61599c6 100644
+--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
++++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
+@@ -79,6 +79,7 @@ fman0: fman@400000 {
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfc000 0x1000>;
++ fsl,erratum-a009885;
+ };
+
+ xmdio0: mdio@fd000 {
+@@ -86,6 +87,7 @@ fman0: fman@400000 {
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfd000 0x1000>;
++ fsl,erratum-a009885;
+ };
+ };
+
+diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
+new file mode 100644
+index 0000000000000..d4f5f159d6f23
+--- /dev/null
++++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
+@@ -0,0 +1,29 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * T1040RDB-REV-A Device Tree Source
++ *
++ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
++ *
++ */
++
++#include "t1040rdb.dts"
++
++/ {
++ model = "fsl,T1040RDB-REV-A";
++};
++
++&seville_port0 {
++ label = "ETH5";
++};
++
++&seville_port2 {
++ label = "ETH7";
++};
++
++&seville_port4 {
++ label = "ETH9";
++};
++
++&seville_port6 {
++ label = "ETH11";
++};
+diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+index af0c8a6f56138..b6733e7e65805 100644
+--- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
++++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+@@ -119,7 +119,7 @@
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_0>;
+ phy-mode = "qsgmii";
+- label = "ETH5";
++ label = "ETH3";
+ status = "okay";
+ };
+
+@@ -135,7 +135,7 @@
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_2>;
+ phy-mode = "qsgmii";
+- label = "ETH7";
++ label = "ETH5";
+ status = "okay";
+ };
+
+@@ -151,7 +151,7 @@
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_4>;
+ phy-mode = "qsgmii";
+- label = "ETH9";
++ label = "ETH7";
+ status = "okay";
+ };
+
+@@ -167,7 +167,7 @@
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_6>;
+ phy-mode = "qsgmii";
+- label = "ETH11";
++ label = "ETH9";
+ status = "okay";
+ };
+
+diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
+index 099a598c74c00..bfe1ed5be3374 100644
+--- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
++++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
+@@ -139,12 +139,12 @@
+ fman@400000 {
+ ethernet@e6000 {
+ phy-handle = <&phy_rgmii_0>;
+- phy-connection-type = "rgmii";
++ phy-connection-type = "rgmii-id";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_1>;
+- phy-connection-type = "rgmii";
++ phy-connection-type = "rgmii-id";
+ };
+
+ mdio0: mdio@fc000 {
+diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
+index ecbb447920bc6..27714dc2f04a5 100644
+--- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
++++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
+@@ -609,8 +609,8 @@
+ /include/ "qoriq-bman1.dtsi"
+
+ /include/ "qoriq-fman3-0.dtsi"
+-/include/ "qoriq-fman3-0-1g-0.dtsi"
+-/include/ "qoriq-fman3-0-1g-1.dtsi"
++/include/ "qoriq-fman3-0-10g-2.dtsi"
++/include/ "qoriq-fman3-0-10g-3.dtsi"
+ /include/ "qoriq-fman3-0-1g-2.dtsi"
+ /include/ "qoriq-fman3-0-1g-3.dtsi"
+ /include/ "qoriq-fman3-0-1g-4.dtsi"
+@@ -659,3 +659,19 @@
+ interrupts = <16 2 1 9>;
+ };
+ };
++
++&fman0_rx_0x08 {
++ /delete-property/ fsl,fman-10g-port;
++};
++
++&fman0_tx_0x28 {
++ /delete-property/ fsl,fman-10g-port;
++};
++
++&fman0_rx_0x09 {
++ /delete-property/ fsl,fman-10g-port;
++};
++
++&fman0_tx_0x29 {
++ /delete-property/ fsl,fman-10g-port;
++};
+diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts
+index cb2782dd6132c..e7b194775d783 100644
+--- a/arch/powerpc/boot/dts/lite5200.dts
++++ b/arch/powerpc/boot/dts/lite5200.dts
+@@ -32,7 +32,7 @@
+ };
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x04000000>; // 64MB
+ };
+diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts
+index 2b86c81f90485..547cbe726ff23 100644
+--- a/arch/powerpc/boot/dts/lite5200b.dts
++++ b/arch/powerpc/boot/dts/lite5200b.dts
+@@ -31,7 +31,7 @@
+ led4 { gpios = <&gpio_simple 2 1>; };
+ };
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x10000000>; // 256MB
+ };
+
+diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts
+index 61cae9dcddef4..f3188018faceb 100644
+--- a/arch/powerpc/boot/dts/media5200.dts
++++ b/arch/powerpc/boot/dts/media5200.dts
+@@ -32,7 +32,7 @@
+ };
+ };
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x08000000>; // 128MB RAM
+ };
+
+diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi
+index 648fe31795f49..8b796f3b11da7 100644
+--- a/arch/powerpc/boot/dts/mpc5200b.dtsi
++++ b/arch/powerpc/boot/dts/mpc5200b.dtsi
+@@ -33,7 +33,7 @@
+ };
+ };
+
+- memory: memory {
++ memory: memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x04000000>; // 64MB
+ };
+diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts
+index 24a46f65e5299..e0a8d3034417f 100644
+--- a/arch/powerpc/boot/dts/o2d.dts
++++ b/arch/powerpc/boot/dts/o2d.dts
+@@ -12,7 +12,7 @@
+ model = "ifm,o2d";
+ compatible = "ifm,o2d";
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x08000000>; // 128MB
+ };
+
+diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi
+index 6661955a2be47..b55a9e5bd828c 100644
+--- a/arch/powerpc/boot/dts/o2d.dtsi
++++ b/arch/powerpc/boot/dts/o2d.dtsi
+@@ -19,7 +19,7 @@
+ model = "ifm,o2d";
+ compatible = "ifm,o2d";
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x04000000>; // 64MB
+ };
+
+diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts
+index eeba7f5507d5d..c2eedbd1f5fcb 100644
+--- a/arch/powerpc/boot/dts/o2dnt2.dts
++++ b/arch/powerpc/boot/dts/o2dnt2.dts
+@@ -12,7 +12,7 @@
+ model = "ifm,o2dnt2";
+ compatible = "ifm,o2d";
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x08000000>; // 128MB
+ };
+
+diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts
+index fd00396b0593e..e4c1bdd412716 100644
+--- a/arch/powerpc/boot/dts/o3dnt.dts
++++ b/arch/powerpc/boot/dts/o3dnt.dts
+@@ -12,7 +12,7 @@
+ model = "ifm,o3dnt";
+ compatible = "ifm,o2d";
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x04000000>; // 64MB
+ };
+
+diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts
+index 780e13d99e7b8..1895bc95900cc 100644
+--- a/arch/powerpc/boot/dts/pcm032.dts
++++ b/arch/powerpc/boot/dts/pcm032.dts
+@@ -20,7 +20,7 @@
+ model = "phytec,pcm032";
+ compatible = "phytec,pcm032";
+
+- memory {
++ memory@0 {
+ reg = <0x00000000 0x08000000>; // 128MB
+ };
+
+diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts
+index 9ed0bc78967e1..5bb25a9e40a01 100644
+--- a/arch/powerpc/boot/dts/tqm5200.dts
++++ b/arch/powerpc/boot/dts/tqm5200.dts
+@@ -32,7 +32,7 @@
+ };
+ };
+
+- memory {
++ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x04000000>; // 64MB
+ };
+diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
+index 6697c5e6682f1..7f7e7add44e7b 100644
+--- a/arch/powerpc/configs/ppc6xx_defconfig
++++ b/arch/powerpc/configs/ppc6xx_defconfig
+@@ -243,8 +243,6 @@ CONFIG_ATM_LANE=m
+ CONFIG_ATM_BR2684=m
+ CONFIG_BRIDGE=m
+ CONFIG_VLAN_8021Q=m
+-CONFIG_DECNET=m
+-CONFIG_DECNET_ROUTER=y
+ CONFIG_ATALK=m
+ CONFIG_DEV_APPLETALK=m
+ CONFIG_IPDDP=m
+@@ -1022,7 +1020,6 @@ CONFIG_NFSD=m
+ CONFIG_NFSD_V3_ACL=y
+ CONFIG_NFSD_V4=y
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_UPCALL=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
+index b183629f1bcfb..6011977d43c98 100644
+--- a/arch/powerpc/configs/pseries_defconfig
++++ b/arch/powerpc/configs/pseries_defconfig
+@@ -41,6 +41,7 @@ CONFIG_DTL=y
+ CONFIG_SCANLOG=m
+ CONFIG_PPC_SMLPAR=y
+ CONFIG_IBMEBUS=y
++CONFIG_LIBNVDIMM=m
+ CONFIG_PAPR_SCM=m
+ CONFIG_PPC_SVM=y
+ # CONFIG_PPC_PMAC is not set
+@@ -190,7 +191,6 @@ CONFIG_HVCS=m
+ CONFIG_VIRTIO_CONSOLE=m
+ CONFIG_IBM_BSR=m
+ CONFIG_RAW_DRIVER=y
+-CONFIG_MAX_RAW_DEVS=1024
+ CONFIG_I2C_CHARDEV=y
+ CONFIG_FB=y
+ CONFIG_FIRMWARE_EDID=y
+diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
+index 948d100a29343..fa6bc440cf4ac 100644
+--- a/arch/powerpc/crypto/md5-asm.S
++++ b/arch/powerpc/crypto/md5-asm.S
+@@ -38,15 +38,11 @@
+
+ #define INITIALIZE \
+ PPC_STLU r1,-INT_FRAME_SIZE(r1); \
+- SAVE_8GPRS(14, r1); /* push registers onto stack */ \
+- SAVE_4GPRS(22, r1); \
+- SAVE_GPR(26, r1)
++ SAVE_GPRS(14, 26, r1) /* push registers onto stack */
+
+ #define FINALIZE \
+- REST_8GPRS(14, r1); /* pop registers from stack */ \
+- REST_4GPRS(22, r1); \
+- REST_GPR(26, r1); \
+- addi r1,r1,INT_FRAME_SIZE;
++ REST_GPRS(14, 26, r1); /* pop registers from stack */ \
++ addi r1,r1,INT_FRAME_SIZE
+
+ #ifdef __BIG_ENDIAN__
+ #define LOAD_DATA(reg, off) \
+diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S
+index 23e248beff716..f0d5ed557ab14 100644
+--- a/arch/powerpc/crypto/sha1-powerpc-asm.S
++++ b/arch/powerpc/crypto/sha1-powerpc-asm.S
+@@ -125,8 +125,7 @@
+
+ _GLOBAL(powerpc_sha_transform)
+ PPC_STLU r1,-INT_FRAME_SIZE(r1)
+- SAVE_8GPRS(14, r1)
+- SAVE_10GPRS(22, r1)
++ SAVE_GPRS(14, 31, r1)
+
+ /* Load up A - E */
+ lwz RA(0),0(r3) /* A */
+@@ -184,7 +183,6 @@ _GLOBAL(powerpc_sha_transform)
+ stw RD(0),12(r3)
+ stw RE(0),16(r3)
+
+- REST_8GPRS(14, r1)
+- REST_10GPRS(22, r1)
++ REST_GPRS(14, 31, r1)
+ addi r1,r1,INT_FRAME_SIZE
+ blr
+diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
+index 9a53e29680f41..258174304904b 100644
+--- a/arch/powerpc/include/asm/archrandom.h
++++ b/arch/powerpc/include/asm/archrandom.h
+@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ #endif /* CONFIG_ARCH_RANDOM */
+
+ #ifdef CONFIG_PPC_POWERNV
+-int powernv_hwrng_present(void);
+ int powernv_get_random_long(unsigned long *v);
+-int powernv_get_random_real_mode(unsigned long *v);
+-#else
+-static inline int powernv_hwrng_present(void) { return 0; }
+-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
+ #endif
+
+ #endif /* _ASM_POWERPC_ARCHRANDOM_H */
+diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+index f5be185cbdf8d..94ad7acfd0565 100644
+--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
++++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+@@ -143,6 +143,8 @@ static __always_inline void update_user_segments(u32 val)
+ update_user_segment(15, val);
+ }
+
++int __init find_free_bat(void);
++unsigned int bat_block_size(unsigned long base, unsigned long top);
+ #endif /* !__ASSEMBLY__ */
+
+ /* We happily ignore the smaller BATs on 601, we don't actually use
+diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
+index 609c80f671943..f8b94f78403f1 100644
+--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
++++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
+@@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte)
+ #ifndef __ASSEMBLY__
+
+ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
++void unmap_kernel_page(unsigned long va);
+
+ #endif /* !__ASSEMBLY__ */
+
+diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
+index 5d34a8646f081..6866d860d4f30 100644
+--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
++++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
+@@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p
+ return hash__map_kernel_page(ea, pa, prot);
+ }
+
++void unmap_kernel_page(unsigned long va);
++
+ static inline int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+diff --git a/arch/powerpc/include/asm/bpf_perf_event.h b/arch/powerpc/include/asm/bpf_perf_event.h
+new file mode 100644
+index 0000000000000..e8a7b4ffb58c2
+--- /dev/null
++++ b/arch/powerpc/include/asm/bpf_perf_event.h
+@@ -0,0 +1,9 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H
++#define _ASM_POWERPC_BPF_PERF_EVENT_H
++
++#include <asm/ptrace.h>
++
++typedef struct user_pt_regs bpf_user_pt_regs_t;
++
++#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */
+diff --git a/arch/powerpc/include/asm/bugs.h b/arch/powerpc/include/asm/bugs.h
+deleted file mode 100644
+index 01b8f6ca4dbbc..0000000000000
+--- a/arch/powerpc/include/asm/bugs.h
++++ /dev/null
+@@ -1,15 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-or-later */
+-#ifndef _ASM_POWERPC_BUGS_H
+-#define _ASM_POWERPC_BUGS_H
+-
+-/*
+- */
+-
+-/*
+- * This file is included by 'init/main.c' to check for
+- * architecture-dependent bugs.
+- */
+-
+-static inline void check_bugs(void) { }
+-
+-#endif /* _ASM_POWERPC_BUGS_H */
+diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
+index 947b5b9c44241..a832aeafe5601 100644
+--- a/arch/powerpc/include/asm/fixmap.h
++++ b/arch/powerpc/include/asm/fixmap.h
+@@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx,
+ BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+ else if (WARN_ON(idx >= __end_of_fixed_addresses))
+ return;
+-
+- map_kernel_page(__fix_to_virt(idx), phys, flags);
++ if (pgprot_val(flags))
++ map_kernel_page(__fix_to_virt(idx), phys, flags);
++ else
++ unmap_kernel_page(__fix_to_virt(idx));
+ }
+
+ #define __early_set_fixmap __set_fixmap
+diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
+index debe8c4f70626..02d32d6422cd8 100644
+--- a/arch/powerpc/include/asm/ftrace.h
++++ b/arch/powerpc/include/asm/ftrace.h
+@@ -96,7 +96,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
+ #endif /* PPC64_ELF_ABI_v1 */
+ #endif /* CONFIG_FTRACE_SYSCALLS */
+
+-#ifdef CONFIG_PPC64
++#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER)
+ #include <asm/paca.h>
+
+ static inline void this_cpu_disable_ftrace(void)
+@@ -120,11 +120,13 @@ static inline u8 this_cpu_get_ftrace_enabled(void)
+ return get_paca()->ftrace_enabled;
+ }
+
++void ftrace_free_init_tramp(void);
+ #else /* CONFIG_PPC64 */
+ static inline void this_cpu_disable_ftrace(void) { }
+ static inline void this_cpu_enable_ftrace(void) { }
+ static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { }
+ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
++static inline void ftrace_free_init_tramp(void) { }
+ #endif /* CONFIG_PPC64 */
+ #endif /* !__ASSEMBLY__ */
+
+diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
+index 21cc571ea9c2d..5c98a950eca0d 100644
+--- a/arch/powerpc/include/asm/hw_irq.h
++++ b/arch/powerpc/include/asm/hw_irq.h
+@@ -224,6 +224,42 @@ static inline bool arch_irqs_disabled(void)
+ return arch_irqs_disabled_flags(arch_local_save_flags());
+ }
+
++static inline void set_pmi_irq_pending(void)
++{
++ /*
++ * Invoked from PMU callback functions to set PMI bit in the paca.
++ * This has to be called with irq's disabled (via hard_irq_disable()).
++ */
++ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
++ WARN_ON_ONCE(mfmsr() & MSR_EE);
++
++ get_paca()->irq_happened |= PACA_IRQ_PMI;
++}
++
++static inline void clear_pmi_irq_pending(void)
++{
++ /*
++ * Invoked from PMU callback functions to clear the pending PMI bit
++ * in the paca.
++ */
++ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
++ WARN_ON_ONCE(mfmsr() & MSR_EE);
++
++ get_paca()->irq_happened &= ~PACA_IRQ_PMI;
++}
++
++static inline bool pmi_irq_pending(void)
++{
++ /*
++ * Invoked from PMU callback functions to check if there is a pending
++ * PMI bit in the paca.
++ */
++ if (get_paca()->irq_happened & PACA_IRQ_PMI)
++ return true;
++
++ return false;
++}
++
+ #ifdef CONFIG_PPC_BOOK3S
+ /*
+ * To support disabling and enabling of irq with PMI, set of
+@@ -408,6 +444,10 @@ static inline void do_hard_irq_enable(void)
+ BUILD_BUG();
+ }
+
++static inline void clear_pmi_irq_pending(void) { }
++static inline void set_pmi_irq_pending(void) { }
++static inline bool pmi_irq_pending(void) { return false; }
++
+ static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
+ {
+ }
+diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
+index 4f897993b7107..699a88584ae16 100644
+--- a/arch/powerpc/include/asm/imc-pmu.h
++++ b/arch/powerpc/include/asm/imc-pmu.h
+@@ -137,7 +137,7 @@ struct imc_pmu {
+ * are inited.
+ */
+ struct imc_pmu_ref {
+- struct mutex lock;
++ spinlock_t lock;
+ unsigned int id;
+ int refc;
+ };
+diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
+index a1d238255f077..a07960066b5fa 100644
+--- a/arch/powerpc/include/asm/interrupt.h
++++ b/arch/powerpc/include/asm/interrupt.h
+@@ -567,7 +567,7 @@ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
+ DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
+
+ /* hash_utils.c */
+-DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault);
++DECLARE_INTERRUPT_HANDLER(do_hash_fault);
+
+ /* fault.c */
+ DECLARE_INTERRUPT_HANDLER(do_page_fault);
+diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
+index f130783c8301d..a4fe1292909e6 100644
+--- a/arch/powerpc/include/asm/io.h
++++ b/arch/powerpc/include/asm/io.h
+@@ -359,25 +359,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr)
+ */
+ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr)
+ {
+- __asm__ __volatile__("stbcix %0,0,%1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ stbcix %0,0,%1; \
++ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
+ }
+
+ static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr)
+ {
+- __asm__ __volatile__("sthcix %0,0,%1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ sthcix %0,0,%1; \
++ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
+ }
+
+ static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr)
+ {
+- __asm__ __volatile__("stwcix %0,0,%1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ stwcix %0,0,%1; \
++ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
+ }
+
+ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
+ {
+- __asm__ __volatile__("stdcix %0,0,%1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ stdcix %0,0,%1; \
++ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
+ }
+
+@@ -389,7 +401,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr)
+ static inline u8 __raw_rm_readb(volatile void __iomem *paddr)
+ {
+ u8 ret;
+- __asm__ __volatile__("lbzcix %0,0, %1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ lbzcix %0,0, %1; \
++ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
+ }
+@@ -397,7 +412,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr)
+ static inline u16 __raw_rm_readw(volatile void __iomem *paddr)
+ {
+ u16 ret;
+- __asm__ __volatile__("lhzcix %0,0, %1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ lhzcix %0,0, %1; \
++ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
+ }
+@@ -405,7 +423,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr)
+ static inline u32 __raw_rm_readl(volatile void __iomem *paddr)
+ {
+ u32 ret;
+- __asm__ __volatile__("lwzcix %0,0, %1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ lwzcix %0,0, %1; \
++ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
+ }
+@@ -413,7 +434,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr)
+ static inline u64 __raw_rm_readq(volatile void __iomem *paddr)
+ {
+ u64 ret;
+- __asm__ __volatile__("ldcix %0,0, %1"
++ __asm__ __volatile__(".machine push; \
++ .machine power6; \
++ ldcix %0,0, %1; \
++ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
+ }
+diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
+index 19b6942c6969a..eaf3a562bf1ed 100644
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -39,7 +39,6 @@ struct kvm_nested_guest {
+ pgd_t *shadow_pgtable; /* our page table for this guest */
+ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
+ u64 process_table; /* process table entry for this guest */
+- u64 hfscr; /* HFSCR that the L1 requested for this nested guest */
+ long refcnt; /* number of pointers to this struct */
+ struct mutex tlb_lock; /* serialize page faults and tlbies */
+ struct kvm_nested_guest *next;
+diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
+index 080a7feb77318..0d81a9bf37650 100644
+--- a/arch/powerpc/include/asm/kvm_host.h
++++ b/arch/powerpc/include/asm/kvm_host.h
+@@ -814,6 +814,7 @@ struct kvm_vcpu_arch {
+
+ /* For support of nested guests */
+ struct kvm_nested_guest *nested;
++ u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */
+ u32 nested_vcpu_id;
+ gpa_t nested_io_gpr;
+ #endif
+diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
+index c390ec377baed..1412e643122e4 100644
+--- a/arch/powerpc/include/asm/lppaca.h
++++ b/arch/powerpc/include/asm/lppaca.h
+@@ -45,6 +45,7 @@
+ #include <asm/types.h>
+ #include <asm/mmu.h>
+ #include <asm/firmware.h>
++#include <asm/paca.h>
+
+ /*
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+@@ -123,13 +124,23 @@ struct lppaca {
+ */
+ #define LPPACA_OLD_SHARED_PROC 2
+
+-static inline bool lppaca_shared_proc(struct lppaca *l)
++#ifdef CONFIG_PPC_PSERIES
++/*
++ * All CPUs should have the same shared proc value, so directly access the PACA
++ * to avoid false positives from DEBUG_PREEMPT.
++ */
++static inline bool lppaca_shared_proc(void)
+ {
++ struct lppaca *l = local_paca->lppaca_ptr;
++
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return false;
+ return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
+ }
+
++#define get_lppaca() (get_paca()->lppaca_ptr)
++#endif
++
+ /*
+ * SLB shadow buffer structure as defined in the PAPR. The save_area
+ * contains adjacent ESID and VSID pairs for each shadowed SLB. The
+diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
+index f06ae00f2a65e..63ea4693ccea6 100644
+--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
++++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
+@@ -64,6 +64,7 @@ extern int icache_44x_need_flush;
+ #ifndef __ASSEMBLY__
+
+ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
++void unmap_kernel_page(unsigned long va);
+
+ #endif /* !__ASSEMBLY__ */
+
+@@ -193,10 +194,12 @@ static inline pte_t pte_wrprotect(pte_t pte)
+ }
+ #endif
+
++#ifndef pte_mkexec
+ static inline pte_t pte_mkexec(pte_t pte)
+ {
+ return __pte(pte_val(pte) | _PAGE_EXEC);
+ }
++#endif
+
+ #define pmd_none(pmd) (!pmd_val(pmd))
+ #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
+@@ -306,30 +309,29 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ }
+
+ #define __HAVE_ARCH_PTEP_SET_WRPROTECT
++#ifndef ptep_set_wrprotect
+ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+ {
+- unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
+- unsigned long set = pte_val(pte_wrprotect(__pte(0)));
+-
+- pte_update(mm, addr, ptep, clr, set, 0);
++ pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+ }
++#endif
+
++#ifndef __ptep_set_access_flags
+ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+ pte_t *ptep, pte_t entry,
+ unsigned long address,
+ int psize)
+ {
+- pte_t pte_set = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(0)))));
+- pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0)))));
+- unsigned long set = pte_val(entry) & pte_val(pte_set);
+- unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr);
++ unsigned long set = pte_val(entry) &
++ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+ int huge = psize > mmu_virtual_psize ? 1 : 0;
+
+- pte_update(vma->vm_mm, address, ptep, clr, set, huge);
++ pte_update(vma->vm_mm, address, ptep, 0, set, huge);
+
+ flush_tlb_page(vma, address);
+ }
++#endif
+
+ static inline int pte_young(pte_t pte)
+ {
+diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+index fcc48d590d888..1a89ebdc3acc9 100644
+--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
++++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+@@ -136,6 +136,28 @@ static inline pte_t pte_mkhuge(pte_t pte)
+
+ #define pte_mkhuge pte_mkhuge
+
++static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
++ unsigned long clr, unsigned long set, int huge);
++
++static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
++{
++ pte_update(mm, addr, ptep, 0, _PAGE_RO, 0);
++}
++#define ptep_set_wrprotect ptep_set_wrprotect
++
++static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
++ pte_t entry, unsigned long address, int psize)
++{
++ unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_EXEC);
++ unsigned long clr = ~pte_val(entry) & _PAGE_RO;
++ int huge = psize > mmu_virtual_psize ? 1 : 0;
++
++ pte_update(vma->vm_mm, address, ptep, clr, set, huge);
++
++ flush_tlb_page(vma, address);
++}
++#define __ptep_set_access_flags __ptep_set_access_flags
++
+ static inline unsigned long pgd_leaf_size(pgd_t pgd)
+ {
+ if (pgd_val(pgd) & _PMD_PAGE_8M)
+diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
+index d081704b13fb9..2225991c69b55 100644
+--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
++++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
+@@ -118,11 +118,6 @@ static inline pte_t pte_wrprotect(pte_t pte)
+ return __pte(pte_val(pte) & ~_PAGE_RW);
+ }
+
+-static inline pte_t pte_mkexec(pte_t pte)
+-{
+- return __pte(pte_val(pte) | _PAGE_EXEC);
+-}
+-
+ #define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
+ #define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
+
+@@ -313,6 +308,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+ #define __swp_entry_to_pte(x) __pte((x).val)
+
+ int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
++void unmap_kernel_page(unsigned long va);
+ extern int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys);
+diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
+index 813918f407653..f798640422c2d 100644
+--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
++++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
+@@ -48,7 +48,7 @@
+ #define _PAGE_WRITETHRU 0x800000 /* W: cache write-through */
+
+ /* "Higher level" linux bit combinations */
+-#define _PAGE_EXEC _PAGE_BAP_UX /* .. and was cache cleaned */
++#define _PAGE_EXEC (_PAGE_BAP_SX | _PAGE_BAP_UX) /* .. and was cache cleaned */
+ #define _PAGE_RW (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
+ #define _PAGE_KERNEL_RW (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
+ #define _PAGE_KERNEL_RO (_PAGE_BAP_SR)
+@@ -93,11 +93,11 @@
+ /* Permission masks used to generate the __P and __S table */
+ #define PAGE_NONE __pgprot(_PAGE_BASE)
+ #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+-#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
++#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_BAP_UX)
+ #define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
+-#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
++#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX)
+ #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
+-#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
++#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX)
+
+ #ifndef __ASSEMBLY__
+ static inline pte_t pte_mkprivileged(pte_t pte)
+@@ -113,6 +113,16 @@ static inline pte_t pte_mkuser(pte_t pte)
+ }
+
+ #define pte_mkuser pte_mkuser
++
++static inline pte_t pte_mkexec(pte_t pte)
++{
++ if (pte_val(pte) & _PAGE_BAP_UR)
++ return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX);
++ else
++ return __pte((pte_val(pte) & ~_PAGE_BAP_UX) | _PAGE_BAP_SX);
++}
++#define pte_mkexec pte_mkexec
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* __KERNEL__ */
+diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
+index dc05a862e72a1..02c089c5493ad 100644
+--- a/arch/powerpc/include/asm/paca.h
++++ b/arch/powerpc/include/asm/paca.h
+@@ -14,7 +14,6 @@
+
+ #include <linux/string.h>
+ #include <asm/types.h>
+-#include <asm/lppaca.h>
+ #include <asm/mmu.h>
+ #include <asm/page.h>
+ #ifdef CONFIG_PPC_BOOK3E
+@@ -46,14 +45,11 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
+ #define get_paca() local_paca
+ #endif
+
+-#ifdef CONFIG_PPC_PSERIES
+-#define get_lppaca() (get_paca()->lppaca_ptr)
+-#endif
+-
+ #define get_slb_shadow() (get_paca()->slb_shadow_ptr)
+
+ struct task_struct;
+ struct rtas_args;
++struct lppaca;
+
+ /*
+ * Defines the layout of the paca.
+@@ -263,7 +259,6 @@ struct paca_struct {
+ u64 l1d_flush_size;
+ #endif
+ #ifdef CONFIG_PPC_PSERIES
+- struct rtas_args *rtas_args_reentrant;
+ u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */
+ #endif /* CONFIG_PPC_PSERIES */
+
+diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
+index 254687258f42b..03ae544eb6cc4 100644
+--- a/arch/powerpc/include/asm/page.h
++++ b/arch/powerpc/include/asm/page.h
+@@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn)
+ #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
+ #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+
+-#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
++#define virt_addr_valid(vaddr) ({ \
++ unsigned long _addr = (unsigned long)vaddr; \
++ _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \
++ pfn_valid(virt_to_pfn(_addr)); \
++})
+
+ /*
+ * On Book-E parts we need __va to parse the device tree and we can't
+@@ -212,6 +216,9 @@ static inline bool pfn_valid(unsigned long pfn)
+ #define __pa(x) ((phys_addr_t)(unsigned long)(x) - VIRT_PHYS_OFFSET)
+ #else
+ #ifdef CONFIG_PPC64
++
++#define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x))
++
+ /*
+ * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET
+ * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit.
+@@ -219,13 +226,13 @@ static inline bool pfn_valid(unsigned long pfn)
+ */
+ #define __va(x) \
+ ({ \
+- VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \
++ VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET); \
+ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \
+ })
+
+ #define __pa(x) \
+ ({ \
+- VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \
++ VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \
+ (unsigned long)(x) & 0x0fffffffffffffffUL; \
+ })
+
+diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
+index bcb7b5f917be6..0a333ac716e0b 100644
+--- a/arch/powerpc/include/asm/paravirt.h
++++ b/arch/powerpc/include/asm/paravirt.h
+@@ -6,6 +6,7 @@
+ #include <asm/smp.h>
+ #ifdef CONFIG_PPC64
+ #include <asm/paca.h>
++#include <asm/lppaca.h>
+ #include <asm/hvcall.h>
+ #endif
+
+@@ -97,7 +98,23 @@ static inline bool vcpu_is_preempted(int cpu)
+
+ #ifdef CONFIG_PPC_SPLPAR
+ if (!is_kvm_guest()) {
+- int first_cpu = cpu_first_thread_sibling(smp_processor_id());
++ int first_cpu;
++
++ /*
++ * The result of vcpu_is_preempted() is used in a
++ * speculative way, and is always subject to invalidation
++ * by events internal and external to Linux. While we can
++ * be called in preemptable context (in the Linux sense),
++ * we're not accessing per-cpu resources in a way that can
++ * race destructively with Linux scheduler preemption and
++ * migration, and callers can tolerate the potential for
++ * error introduced by sampling the CPU index without
++ * pinning the task to it. So it is permissible to use
++ * raw_smp_processor_id() here to defeat the preempt debug
++ * warnings that can arise from using smp_processor_id()
++ * in arbitrary contexts.
++ */
++ first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
+
+ /*
+ * Preemption can only happen at core granularity. This CPU
+diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
+index 83e0f701ebc67..217d8fb246354 100644
+--- a/arch/powerpc/include/asm/plpar_wrappers.h
++++ b/arch/powerpc/include/asm/plpar_wrappers.h
+@@ -9,6 +9,7 @@
+
+ #include <asm/hvcall.h>
+ #include <asm/paca.h>
++#include <asm/lppaca.h>
+ #include <asm/page.h>
+
+ static inline long poll_pending(void)
+diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
+index baea657bc8687..536d997539bb7 100644
+--- a/arch/powerpc/include/asm/ppc-opcode.h
++++ b/arch/powerpc/include/asm/ppc-opcode.h
+@@ -249,6 +249,7 @@
+ #define PPC_INST_COPY 0x7c20060c
+ #define PPC_INST_DCBA 0x7c0005ec
+ #define PPC_INST_DCBA_MASK 0xfc0007fe
++#define PPC_INST_DSSALL 0x7e00066c
+ #define PPC_INST_ISEL 0x7c00001e
+ #define PPC_INST_ISEL_MASK 0xfc00003e
+ #define PPC_INST_LSWI 0x7c0004aa
+@@ -498,6 +499,7 @@
+ #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+ #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
++#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+ #define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i))
+@@ -575,6 +577,7 @@
+ #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b))
+ #define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b))
+ #define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b))
++#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL)
+ #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
+ #define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b))
+ #define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c))
+diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
+index 1c538a9a11e09..f21e6bde17a1e 100644
+--- a/arch/powerpc/include/asm/ppc_asm.h
++++ b/arch/powerpc/include/asm/ppc_asm.h
+@@ -16,30 +16,41 @@
+
+ #define SZL (BITS_PER_LONG/8)
+
++/*
++ * This expands to a sequence of operations with reg incrementing from
++ * start to end inclusive, of this form:
++ *
++ * op reg, (offset + (width * reg))(base)
++ *
++ * Note that offset is not the offset of the first operation unless start
++ * is zero (or width is zero).
++ */
++.macro OP_REGS op, width, start, end, base, offset
++ .Lreg=\start
++ .rept (\end - \start + 1)
++ \op .Lreg, \offset + \width * .Lreg(\base)
++ .Lreg=.Lreg+1
++ .endr
++.endm
++
+ /*
+ * Macros for storing registers into and loading registers from
+ * exception frames.
+ */
+ #ifdef __powerpc64__
+-#define SAVE_GPR(n, base) std n,GPR0+8*(n)(base)
+-#define REST_GPR(n, base) ld n,GPR0+8*(n)(base)
+-#define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base)
+-#define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base)
++#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0
++#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0
++#define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base)
++#define REST_NVGPRS(base) REST_GPRS(14, 31, base)
+ #else
+-#define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base)
+-#define REST_GPR(n, base) lwz n,GPR0+4*(n)(base)
+-#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base)
+-#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base)
++#define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0
++#define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0
++#define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base)
++#define REST_NVGPRS(base) REST_GPRS(13, 31, base)
+ #endif
+
+-#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
+-#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
+-#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
+-#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
+-#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base)
+-#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base)
+-#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
+-#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
++#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
++#define REST_GPR(n, base) REST_GPRS(n, n, base)
+
+ #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base)
+ #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base)
+diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
+index 9dc97d2f9d27e..a05b34cf5f408 100644
+--- a/arch/powerpc/include/asm/rtas.h
++++ b/arch/powerpc/include/asm/rtas.h
+@@ -240,7 +240,6 @@ extern struct rtas_t rtas;
+ extern int rtas_token(const char *service);
+ extern int rtas_service_present(const char *service);
+ extern int rtas_call(int token, int, int, int *, ...);
+-int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...);
+ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
+ int nret, ...);
+ extern void __noreturn rtas_restart(char *cmd);
+diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
+index 6e4af4492a144..e92d39c0cd1d9 100644
+--- a/arch/powerpc/include/asm/sections.h
++++ b/arch/powerpc/include/asm/sections.h
+@@ -6,22 +6,10 @@
+ #include <linux/elf.h>
+ #include <linux/uaccess.h>
+
+-#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+-
+ #include <asm-generic/sections.h>
+
+-extern bool init_mem_is_free;
+-
+-static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+-{
+- if (!init_mem_is_free)
+- return 0;
+-
+- return addr >= (unsigned long)__init_begin &&
+- addr < (unsigned long)__init_end;
+-}
+-
+ extern char __head_end[];
++extern char __srwx_boundary[];
+
+ #ifdef __powerpc64__
+
+diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h
+index b040094f79202..7ebc807aa8cc8 100644
+--- a/arch/powerpc/include/asm/set_memory.h
++++ b/arch/powerpc/include/asm/set_memory.h
+@@ -6,6 +6,8 @@
+ #define SET_MEMORY_RW 1
+ #define SET_MEMORY_NX 2
+ #define SET_MEMORY_X 3
++#define SET_MEMORY_NP 4 /* Set memory non present */
++#define SET_MEMORY_P 5 /* Set memory present */
+
+ int change_memory_attr(unsigned long addr, int numpages, long action);
+
+@@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages)
+ return change_memory_attr(addr, numpages, SET_MEMORY_X);
+ }
+
+-int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot);
++static inline int set_memory_np(unsigned long addr, int numpages)
++{
++ return change_memory_attr(addr, numpages, SET_MEMORY_NP);
++}
++
++static inline int set_memory_p(unsigned long addr, int numpages)
++{
++ return change_memory_attr(addr, numpages, SET_MEMORY_P);
++}
+
+ #endif
+diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
+index 8985791a2ba57..3c037a12c84db 100644
+--- a/arch/powerpc/include/asm/simple_spinlock.h
++++ b/arch/powerpc/include/asm/simple_spinlock.h
+@@ -48,10 +48,11 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+ {
+ unsigned long tmp, token;
++ unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+ token = LOCK_TOKEN;
+ __asm__ __volatile__(
+-"1: lwarx %0,0,%2,1\n\
++"1: lwarx %0,0,%2,%[eh]\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n\
+ stwcx. %1,0,%2\n\
+@@ -59,7 +60,7 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+ PPC_ACQUIRE_BARRIER
+ "2:"
+ : "=&r" (tmp)
+- : "r" (token), "r" (&lock->slock)
++ : "r" (token), "r" (&lock->slock), [eh] "n" (eh)
+ : "cr0", "memory");
+
+ return tmp;
+@@ -177,9 +178,10 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
+ static inline long __arch_read_trylock(arch_rwlock_t *rw)
+ {
+ long tmp;
++ unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+ __asm__ __volatile__(
+-"1: lwarx %0,0,%1,1\n"
++"1: lwarx %0,0,%1,%[eh]\n"
+ __DO_SIGN_EXTEND
+ " addic. %0,%0,1\n\
+ ble- 2f\n"
+@@ -187,7 +189,7 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw)
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "2:" : "=&r" (tmp)
+- : "r" (&rw->lock)
++ : "r" (&rw->lock), [eh] "n" (eh)
+ : "cr0", "xer", "memory");
+
+ return tmp;
+@@ -200,17 +202,18 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw)
+ static inline long __arch_write_trylock(arch_rwlock_t *rw)
+ {
+ long tmp, token;
++ unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+ token = WRLOCK_TOKEN;
+ __asm__ __volatile__(
+-"1: lwarx %0,0,%2,1\n\
++"1: lwarx %0,0,%2,%[eh]\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n"
+ " stwcx. %1,0,%2\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "2:" : "=&r" (tmp)
+- : "r" (token), "r" (&rw->lock)
++ : "r" (token), "r" (&rw->lock), [eh] "n" (eh)
+ : "cr0", "memory");
+
+ return tmp;
+diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
+index c60ebd04b2ed9..61b968d9fba7c 100644
+--- a/arch/powerpc/include/asm/syscall.h
++++ b/arch/powerpc/include/asm/syscall.h
+@@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
+ unsigned long val, mask = -1UL;
+ unsigned int n = 6;
+
+- if (is_32bit_task())
++ if (is_tsk_32bit_task(task))
+ mask = 0xffffffff;
+
+ while (n--) {
+@@ -115,7 +115,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
+
+ static inline int syscall_get_arch(struct task_struct *task)
+ {
+- if (is_32bit_task())
++ if (is_tsk_32bit_task(task))
+ return AUDIT_ARCH_PPC;
+ else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ return AUDIT_ARCH_PPC64LE;
+diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
+index 7ee66ae5444d1..0e85d7aa395d0 100644
+--- a/arch/powerpc/include/asm/syscalls.h
++++ b/arch/powerpc/include/asm/syscalls.h
+@@ -8,6 +8,18 @@
+ #include <linux/types.h>
+ #include <linux/compat.h>
+
++/*
++ * long long munging:
++ * The 32 bit ABI passes long longs in an odd even register pair.
++ * High and low parts are swapped depending on endian mode,
++ * so define a macro (similar to mips linux32) to handle that.
++ */
++#ifdef __LITTLE_ENDIAN__
++#define merge_64(low, high) (((u64)high << 32) | low)
++#else
++#define merge_64(high, low) (((u64)high << 32) | low)
++#endif
++
+ struct rtas_args;
+
+ asmlinkage long sys_mmap(unsigned long addr, size_t len,
+diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
+index b4ec6c7dd72ee..87013ac2a6401 100644
+--- a/arch/powerpc/include/asm/thread_info.h
++++ b/arch/powerpc/include/asm/thread_info.h
+@@ -14,10 +14,16 @@
+
+ #ifdef __KERNEL__
+
+-#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT
++#ifdef CONFIG_KASAN
++#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1)
++#else
++#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT
++#endif
++
++#if defined(CONFIG_VMAP_STACK) && MIN_THREAD_SHIFT < PAGE_SHIFT
+ #define THREAD_SHIFT PAGE_SHIFT
+ #else
+-#define THREAD_SHIFT CONFIG_THREAD_SHIFT
++#define THREAD_SHIFT MIN_THREAD_SHIFT
+ #endif
+
+ #define THREAD_SIZE (1 << THREAD_SHIFT)
+@@ -165,8 +171,10 @@ static inline bool test_thread_local_flags(unsigned int flags)
+
+ #ifdef CONFIG_COMPAT
+ #define is_32bit_task() (test_thread_flag(TIF_32BIT))
++#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT))
+ #else
+ #define is_32bit_task() (IS_ENABLED(CONFIG_PPC32))
++#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32))
+ #endif
+
+ #if defined(CONFIG_PPC64)
+diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
+index fa2e76e4093a3..14b4489de52c5 100644
+--- a/arch/powerpc/include/asm/timex.h
++++ b/arch/powerpc/include/asm/timex.h
+@@ -19,6 +19,7 @@ static inline cycles_t get_cycles(void)
+ {
+ return mftb();
+ }
++#define get_cycles get_cycles
+
+ #endif /* __KERNEL__ */
+ #endif /* _ASM_POWERPC_TIMEX_H */
+diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
+index 22c79ab400060..b2680070d65d6 100644
+--- a/arch/powerpc/include/asm/uaccess.h
++++ b/arch/powerpc/include/asm/uaccess.h
+@@ -125,8 +125,11 @@ do { \
+ */
+ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
+ __asm__ __volatile__( \
++ ".machine push\n" \
++ ".machine altivec\n" \
+ "1: lvx 0,0,%1 # get user\n" \
+ " stvx 0,0,%2 # put kernel\n" \
++ ".machine pop\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
+index 57573d9c1e091..56834a8a14654 100644
+--- a/arch/powerpc/include/asm/vas.h
++++ b/arch/powerpc/include/asm/vas.h
+@@ -112,7 +112,7 @@ static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref)
+ * Receive window attributes specified by the (in-kernel) owner of window.
+ */
+ struct vas_rx_win_attr {
+- void *rx_fifo;
++ u64 rx_fifo;
+ int rx_fifo_size;
+ int wcreds_max;
+
+diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
+index f3f4710d4ff52..99129b0cd8b8a 100644
+--- a/arch/powerpc/include/asm/word-at-a-time.h
++++ b/arch/powerpc/include/asm/word-at-a-time.h
+@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
+ return leading_zero_bits >> 3;
+ }
+
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+ unsigned long rhs = val | c->low_bits;
+ *data = rhs;
+diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h
+deleted file mode 100644
+index 5e1e648aeec4c..0000000000000
+--- a/arch/powerpc/include/uapi/asm/bpf_perf_event.h
++++ /dev/null
+@@ -1,9 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+-#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+-#define _UAPI__ASM_BPF_PERF_EVENT_H__
+-
+-#include <asm/ptrace.h>
+-
+-typedef struct user_pt_regs bpf_user_pt_regs_t;
+-
+-#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
+diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
+index 7be36c1e1db6d..ed91d5b9ffc63 100644
+--- a/arch/powerpc/kernel/Makefile
++++ b/arch/powerpc/kernel/Makefile
+@@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC
+ CFLAGS_btext.o += -fPIC
+ endif
+
++CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+@@ -19,6 +20,7 @@ CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ CFLAGS_prom_init.o += -fno-stack-protector
+ CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_prom_init.o += -ffreestanding
++CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
+
+ ifdef CONFIG_FUNCTION_TRACER
+ # Do not trace early boot code
+@@ -196,3 +198,6 @@ clean-files := vmlinux.lds
+ # Force dependency (incbin is bad)
+ $(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg
+ $(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg
++
++# for cleaning
++subdir- += vdso32 vdso64
+diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
+index 803c2a45b22ac..1cffb5e7c38d6 100644
+--- a/arch/powerpc/kernel/btext.c
++++ b/arch/powerpc/kernel/btext.c
+@@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout)
+ rc = btext_initialize(np);
+ printk("result: %d\n", rc);
+ }
+- if (rc == 0)
++ if (rc == 0) {
++ of_node_put(np);
+ break;
++ }
+ }
+ return rc;
+ }
+diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
+index 038ce8d9061d1..8920862ffd791 100644
+--- a/arch/powerpc/kernel/dma-iommu.c
++++ b/arch/powerpc/kernel/dma-iommu.c
+@@ -144,7 +144,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
+ /* We support DMA to/from any memory page via the iommu */
+ int dma_iommu_dma_supported(struct device *dev, u64 mask)
+ {
+- struct iommu_table *tbl = get_iommu_table_base(dev);
++ struct iommu_table *tbl;
+
+ if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
+ /*
+@@ -162,6 +162,8 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
+ return 1;
+ }
+
++ tbl = get_iommu_table_base(dev);
++
+ if (!tbl) {
+ dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
+ return 0;
+diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
+index 61fdd53cdd9af..c62dd98159653 100644
+--- a/arch/powerpc/kernel/entry_32.S
++++ b/arch/powerpc/kernel/entry_32.S
+@@ -90,8 +90,7 @@ transfer_to_syscall:
+ stw r12,8(r1)
+ stw r2,_TRAP(r1)
+ SAVE_GPR(0, r1)
+- SAVE_4GPRS(3, r1)
+- SAVE_2GPRS(7, r1)
++ SAVE_GPRS(3, 8, r1)
+ addi r2,r10,-THREAD
+ SAVE_NVGPRS(r1)
+
+@@ -139,7 +138,7 @@ syscall_exit_finish:
+ mtxer r5
+ lwz r0,GPR0(r1)
+ lwz r3,GPR3(r1)
+- REST_8GPRS(4,r1)
++ REST_GPRS(4, 11, r1)
+ lwz r12,GPR12(r1)
+ b 1b
+
+@@ -232,9 +231,9 @@ fast_exception_return:
+ beq 3f /* if not, we've got problems */
+ #endif
+
+-2: REST_4GPRS(3, r11)
++2: REST_GPRS(3, 6, r11)
+ lwz r10,_CCR(r11)
+- REST_2GPRS(1, r11)
++ REST_GPRS(1, 2, r11)
+ mtcr r10
+ lwz r10,_LINK(r11)
+ mtlr r10
+@@ -298,16 +297,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ * the reliable stack unwinder later on. Clear it.
+ */
+ stw r0,8(r1)
+- REST_4GPRS(7, r1)
+- REST_2GPRS(11, r1)
++ REST_GPRS(7, 12, r1)
+
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
+
+- REST_4GPRS(2, r1)
+- REST_GPR(6, r1)
++ REST_GPRS(2, 6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ rfi
+@@ -341,8 +338,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ lwz r6,_CCR(r1)
+ li r0,0
+
+- REST_4GPRS(7, r1)
+- REST_2GPRS(11, r1)
++ REST_GPRS(7, 12, r1)
+
+ mtlr r3
+ mtctr r4
+@@ -354,7 +350,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ */
+ stw r0,8(r1)
+
+- REST_4GPRS(2, r1)
++ REST_GPRS(2, 5, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+@@ -430,8 +426,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
+ bne interrupt_return; \
+ lwz r0,GPR0(r1); \
+ lwz r2,GPR2(r1); \
+- REST_4GPRS(3, r1); \
+- REST_2GPRS(7, r1); \
++ REST_GPRS(3, 8, r1); \
+ lwz r10,_XER(r1); \
+ lwz r11,_CTR(r1); \
+ mtspr SPRN_XER,r10; \
+diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
+index 70cff7b49e172..07a1448146e27 100644
+--- a/arch/powerpc/kernel/entry_64.S
++++ b/arch/powerpc/kernel/entry_64.S
+@@ -330,22 +330,22 @@ _GLOBAL(enter_rtas)
+ clrldi r4,r4,2 /* convert to realmode address */
+ mtlr r4
+
+- li r0,0
+- ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
+- andc r0,r6,r0
+-
+- li r9,1
+- rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
+- ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
+- andc r6,r0,r9
+-
+ __enter_rtas:
+- sync /* disable interrupts so SRR0/1 */
+- mtmsrd r0 /* don't get trashed */
+-
+ LOAD_REG_ADDR(r4, rtas)
+ ld r5,RTASENTRY(r4) /* get the rtas->entry value */
+ ld r4,RTASBASE(r4) /* get the rtas->base value */
++
++ /*
++ * RTAS runs in 32-bit big endian real mode, but leave MSR[RI] on as we
++ * may hit NMI (SRESET or MCE) while in RTAS. RTAS should disable RI in
++ * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S]
++ * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if
++ * MSR[S] is set, it will remain when entering RTAS.
++ */
++ LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI)
++
++ li r0,0
++ mtmsrd r0,1 /* disable RI before using SRR0/1 */
+
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r6
+diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
+index 711c66b76df1a..67dc4e3179a02 100644
+--- a/arch/powerpc/kernel/exceptions-64e.S
++++ b/arch/powerpc/kernel/exceptions-64e.S
+@@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+
+ stdcx. r0,0,r1 /* to clear the reservation */
+
+- REST_4GPRS(2, r1)
+- REST_4GPRS(6, r1)
++ REST_GPRS(2, 9, r1)
+
+ ld r10,_CTR(r1)
+ ld r11,_XER(r1)
+@@ -375,9 +374,7 @@ ret_from_mc_except:
+ exc_##n##_common: \
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+- std r9,GPR9(r1); /* save r9 in stackframe */ \
++ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
+ std r10,_NIP(r1); /* save SRR0 to stackframe */ \
+ std r11,_MSR(r1); /* save SRR1 to stackframe */ \
+ beq 2f; /* if from kernel mode */ \
+@@ -1061,9 +1058,7 @@ bad_stack_book3e:
+ std r11,_ESR(r1)
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+- std r9,GPR9(r1); /* save r9 in stackframe */ \
++ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
+ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
+ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
+ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
+@@ -1077,8 +1072,7 @@ bad_stack_book3e:
+ std r10,_LINK(r1)
+ std r11,_CTR(r1)
+ std r12,_XER(r1)
+- SAVE_10GPRS(14,r1)
+- SAVE_8GPRS(24,r1)
++ SAVE_GPRS(14, 31, r1)
+ lhz r12,PACA_TRAP_SAVE(r13)
+ std r12,_TRAP(r1)
+ addi r11,r1,INT_FRAME_SIZE
+diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
+index eaf1f72131a18..277eccf0f0868 100644
+--- a/arch/powerpc/kernel/exceptions-64s.S
++++ b/arch/powerpc/kernel/exceptions-64s.S
+@@ -574,8 +574,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,IAREA+EX_CTR(r13)
+ std r10,_CTR(r1)
+ std r2,GPR2(r1) /* save r2 in stackframe */
+- SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
+- SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
++ SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */
+ mflr r9 /* Get LR, later save to stack */
+ ld r2,PACATOC(r13) /* get kernel TOC into r2 */
+ std r9,_LINK(r1)
+@@ -693,8 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ mtlr r9
+ ld r9,_CCR(r1)
+ mtcr r9
+- REST_8GPRS(2, r1)
+- REST_4GPRS(10, r1)
++ REST_GPRS(2, 13, r1)
+ REST_GPR(0, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
+index b7ceb041743c9..d496dc5151aa1 100644
+--- a/arch/powerpc/kernel/fadump.c
++++ b/arch/powerpc/kernel/fadump.c
+@@ -642,6 +642,7 @@ int __init fadump_reserve_mem(void)
+ return ret;
+ error_out:
+ fw_dump.fadump_enabled = 0;
++ fw_dump.reserve_dump_area_size = 0;
+ return 0;
+ }
+
+@@ -861,7 +862,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
+ sizeof(struct fadump_memory_range));
+ return 0;
+ }
+-
+ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ u64 base, u64 end)
+ {
+@@ -880,7 +880,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
+
+- if ((start + size) == base)
++ /*
++ * Boot memory area needs separate PT_LOAD segment(s) as it
++ * is moved to a different location at the time of crash.
++ * So, fold only if the region is not boot memory area.
++ */
++ if ((start + size) == base && start >= fw_dump.boot_mem_top)
+ is_adjacent = true;
+ }
+ if (!is_adjacent) {
+@@ -1641,6 +1646,14 @@ int __init setup_fadump(void)
+ else if (fw_dump.reserve_dump_area_size)
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+
++ /*
++ * In case of panic, fadump is triggered via ppc_panic_event()
++ * panic notifier. Setting crash_kexec_post_notifiers to 'true'
++ * lets panic() function take crash friendly path before panic
++ * notifiers are invoked.
++ */
++ crash_kexec_post_notifiers = true;
++
+ return 1;
+ }
+ subsys_initcall(setup_fadump);
+diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
+index c7022c41cc314..20328f72f9f2b 100644
+--- a/arch/powerpc/kernel/firmware.c
++++ b/arch/powerpc/kernel/firmware.c
+@@ -31,11 +31,10 @@ int __init check_kvm_guest(void)
+ if (!hyper_node)
+ return 0;
+
+- if (!of_device_is_compatible(hyper_node, "linux,kvm"))
+- return 0;
+-
+- static_branch_enable(&kvm_guest);
++ if (of_device_is_compatible(hyper_node, "linux,kvm"))
++ static_branch_enable(&kvm_guest);
+
++ of_node_put(hyper_node);
+ return 0;
+ }
+ core_initcall(check_kvm_guest); // before kvm_guest_init()
+diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
+index 6b1ec9e3541b9..261c79bdbe53f 100644
+--- a/arch/powerpc/kernel/head_32.h
++++ b/arch/powerpc/kernel/head_32.h
+@@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
+ stw r10,8(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+- SAVE_4GPRS(3, r1)
+- SAVE_2GPRS(7, r1)
++ SAVE_GPRS(3, 8, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+@@ -202,11 +201,11 @@ vmap_stack_overflow:
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, TASK_CPU - THREAD(r1)
+ slwi r1, r1, 3
+- addis r1, r1, emergency_ctx@ha
++ addis r1, r1, emergency_ctx-PAGE_OFFSET@ha
+ #else
+- lis r1, emergency_ctx@ha
++ lis r1, emergency_ctx-PAGE_OFFSET@ha
+ #endif
+- lwz r1, emergency_ctx@l(r1)
++ lwz r1, emergency_ctx-PAGE_OFFSET@l(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ EXCEPTION_PROLOG_2 0 vmap_stack_overflow
+ prepare_transfer_to_handler
+diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
+index 7d72ee5ab387c..e783860bea838 100644
+--- a/arch/powerpc/kernel/head_40x.S
++++ b/arch/powerpc/kernel/head_40x.S
+@@ -27,6 +27,7 @@
+
+ #include <linux/init.h>
+ #include <linux/pgtable.h>
++#include <linux/sizes.h>
+ #include <asm/processor.h>
+ #include <asm/page.h>
+ #include <asm/mmu.h>
+@@ -650,7 +651,7 @@ start_here:
+ b . /* prevent prefetch past rfi */
+
+ /* Set up the initial MMU state so we can do the first level of
+- * kernel initialization. This maps the first 16 MBytes of memory 1:1
++ * kernel initialization. This maps the first 32 MBytes of memory 1:1
+ * virtual to physical and more importantly sets the cache mode.
+ */
+ initial_mmu:
+@@ -687,6 +688,12 @@ initial_mmu:
+ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
+ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+
++ li r0,62 /* TLB slot 62 */
++ addis r4,r4,SZ_16M@h
++ addis r3,r3,SZ_16M@h
++ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
++ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
++
+ isync
+
+ /* Establish the exception vector base
+diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
+index 9bdb95f5694f7..0d073b9fd52c5 100644
+--- a/arch/powerpc/kernel/head_8xx.S
++++ b/arch/powerpc/kernel/head_8xx.S
+@@ -733,6 +733,7 @@ _GLOBAL(mmu_pin_tlb)
+ #ifdef CONFIG_PIN_TLB_DATA
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
++ li r8, 0
+ #ifdef CONFIG_PIN_TLB_IMMR
+ li r0, 3
+ #else
+@@ -741,26 +742,26 @@ _GLOBAL(mmu_pin_tlb)
+ mtctr r0
+ cmpwi r4, 0
+ beq 4f
+- LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+ LOAD_REG_ADDR(r9, _sinittext)
+
+ 2: ori r0, r6, MD_EVALID
++ ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+- mtspr SPRN_MD_RPN, r8
++ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+-
+-4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
++4:
+ 2: ori r0, r6, MD_EVALID
++ ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+- mtspr SPRN_MD_RPN, r8
++ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+@@ -781,7 +782,7 @@ _GLOBAL(mmu_pin_tlb)
+ #endif
+ #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+ lis r0, (MD_RSV4I | MD_TWAM)@h
+- mtspr SPRN_MI_CTR, r0
++ mtspr SPRN_MD_CTR, r0
+ #endif
+ mtspr SPRN_SRR1, r10
+ mtspr SPRN_SRR0, r11
+diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
+index e5503420b6c6d..bb6d5d0fc4ac8 100644
+--- a/arch/powerpc/kernel/head_booke.h
++++ b/arch/powerpc/kernel/head_booke.h
+@@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION
+ stw r10, 8(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+- SAVE_4GPRS(3, r1)
+- SAVE_2GPRS(7, r1)
++ SAVE_GPRS(3, 8, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+@@ -465,12 +464,21 @@ label:
+ bl do_page_fault; \
+ b interrupt_return
+
++/*
++ * Instruction TLB Error interrupt handlers may call InstructionStorage
++ * directly without clearing ESR, so the ESR at this point may be left over
++ * from a prior interrupt.
++ *
++ * In any case, do_page_fault for BOOK3E does not use ESR and always expects
++ * dsisr to be 0. ESR_DST from a prior store in particular would confuse fault
++ * handling.
++ */
+ #define INSTRUCTION_STORAGE_EXCEPTION \
+ START_EXCEPTION(InstructionStorage) \
+- NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \
+- mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
++ NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \
++ li r5,0; /* Store 0 in regs->esr (dsisr) */ \
+ stw r5,_ESR(r11); \
+- stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \
++ stw r12, _DEAR(r11); /* Set regs->dear (dar) to SRR0 */ \
+ prepare_transfer_to_handler; \
+ bl do_page_fault; \
+ b interrupt_return
+diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
+index 1f835539fda42..77cd4c5a2d631 100644
+--- a/arch/powerpc/kernel/idle.c
++++ b/arch/powerpc/kernel/idle.c
+@@ -37,7 +37,7 @@ static int __init powersave_off(char *arg)
+ {
+ ppc_md.power_save = NULL;
+ cpuidle_disable = IDLE_POWERSAVE_OFF;
+- return 0;
++ return 1;
+ }
+ __setup("powersave=off", powersave_off);
+
+@@ -82,7 +82,7 @@ void power4_idle(void)
+ return;
+
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+- asm volatile("DSSALL ; sync" ::: "memory");
++ asm volatile(PPC_DSSALL " ; sync" ::: "memory");
+
+ power4_idle_nap();
+
+diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
+index 13cad9297d822..3c097356366b8 100644
+--- a/arch/powerpc/kernel/idle_6xx.S
++++ b/arch/powerpc/kernel/idle_6xx.S
+@@ -129,7 +129,7 @@ BEGIN_FTR_SECTION
+ END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
+ mtspr SPRN_HID0,r4
+ BEGIN_FTR_SECTION
+- DSSALL
++ PPC_DSSALL
+ sync
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
+diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
+index de10a26972581..e93f67c3af76b 100644
+--- a/arch/powerpc/kernel/interrupt.c
++++ b/arch/powerpc/kernel/interrupt.c
+@@ -53,16 +53,18 @@ static inline bool exit_must_hard_disable(void)
+ */
+ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+ {
++ bool must_hard_disable = (exit_must_hard_disable() || !restartable);
++
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+- if (exit_must_hard_disable() || !restartable)
++ if (must_hard_disable)
+ __hard_EE_RI_disable();
+
+ #ifdef CONFIG_PPC64
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+- if (exit_must_hard_disable() || !restartable) {
++ if (must_hard_disable) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+@@ -148,7 +150,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
+ */
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
+- current_thread_info()->flags |= _TIF_RESTOREALL;
++ set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
+
+ /*
+ * If the system call was made with a transaction active, doom it and
+@@ -266,7 +268,7 @@ static void check_return_regs_valid(struct pt_regs *regs)
+ if (trap_is_scv(regs))
+ return;
+
+- trap = regs->trap;
++ trap = TRAP(regs);
+ // EE in HV mode sets HSRRs like 0xea0
+ if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+ trap = 0xea0;
+@@ -529,7 +531,6 @@ void preempt_schedule_irq(void);
+
+ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+ {
+- unsigned long flags;
+ unsigned long ret = 0;
+ unsigned long kuap;
+ bool stack_store = current_thread_info()->flags &
+@@ -546,7 +547,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+
+ kuap = kuap_get_and_assert_locked();
+
+- local_irq_save(flags);
++ local_irq_disable();
+
+ if (!arch_irq_disabled_regs(regs)) {
+ /* Returning to a kernel context with local irqs enabled. */
+diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
+index ec950b08a8dcc..ff8c8c03f41ac 100644
+--- a/arch/powerpc/kernel/interrupt_64.S
++++ b/arch/powerpc/kernel/interrupt_64.S
+@@ -30,21 +30,25 @@ COMPAT_SYS_CALL_TABLE:
+ .ifc \srr,srr
+ mfspr r11,SPRN_SRR0
+ ld r12,_NIP(r1)
++ clrrdi r11,r11,2
++ clrrdi r12,r12,2
+ 100: tdne r11,r12
+- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
++ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_SRR1
+ ld r12,_MSR(r1)
+ 100: tdne r11,r12
+- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
++ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .else
+ mfspr r11,SPRN_HSRR0
+ ld r12,_NIP(r1)
++ clrrdi r11,r11,2
++ clrrdi r12,r12,2
+ 100: tdne r11,r12
+- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
++ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_HSRR1
+ ld r12,_MSR(r1)
+ 100: tdne r11,r12
+- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
++ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .endif
+ #endif
+ .endm
+@@ -162,10 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+- ld r2,GPR2(r1)
+- ld r3,GPR3(r1)
+- ld r13,GPR13(r1)
+- ld r1,GPR1(r1)
++ REST_GPRS(2, 3, r1)
++ REST_GPR(13, r1)
++ REST_GPR(1, r1)
+ RFSCV_TO_USER
+ b . /* prevent speculative execution */
+
+@@ -183,9 +186,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ mtctr r3
+ mtlr r4
+ mtspr SPRN_XER,r5
+- REST_10GPRS(2, r1)
+- REST_2GPRS(12, r1)
+- ld r1,GPR1(r1)
++ REST_GPRS(2, 13, r1)
++ REST_GPR(1, r1)
+ RFI_TO_USER
+ .Lsyscall_vectored_\name\()_rst_end:
+
+@@ -374,10 +376,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+- ld r2,GPR2(r1)
+- ld r3,GPR3(r1)
+- ld r13,GPR13(r1)
+- ld r1,GPR1(r1)
++ REST_GPRS(2, 3, r1)
++ REST_GPR(13, r1)
++ REST_GPR(1, r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+@@ -388,8 +389,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ mtctr r3
+ mtspr SPRN_XER,r4
+ ld r0,GPR0(r1)
+- REST_8GPRS(4, r1)
+- ld r12,GPR12(r1)
++ REST_GPRS(4, 12, r1)
+ b .Lsyscall_restore_regs_cont
+ .Lsyscall_rst_end:
+
+@@ -518,17 +518,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ ld r6,_XER(r1)
+ li r0,0
+
+- REST_4GPRS(7, r1)
+- REST_2GPRS(11, r1)
+- REST_GPR(13, r1)
++ REST_GPRS(7, 13, r1)
+
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
+
+- REST_4GPRS(2, r1)
+- REST_GPR(6, r1)
++ REST_GPRS(2, 6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ .ifc \srr,srr
+@@ -625,8 +622,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ ld r6,_CCR(r1)
+ li r0,0
+
+- REST_4GPRS(7, r1)
+- REST_2GPRS(11, r1)
++ REST_GPRS(7, 12, r1)
+
+ mtlr r3
+ mtctr r4
+@@ -638,7 +634,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ */
+ std r0,STACK_FRAME_OVERHEAD-16(r1)
+
+- REST_4GPRS(2, r1)
++ REST_GPRS(2, 5, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
+index 07093b7cdcb9a..b858f186e9a70 100644
+--- a/arch/powerpc/kernel/iommu.c
++++ b/arch/powerpc/kernel/iommu.c
+@@ -68,11 +68,9 @@ static void iommu_debugfs_add(struct iommu_table *tbl)
+ static void iommu_debugfs_del(struct iommu_table *tbl)
+ {
+ char name[10];
+- struct dentry *liobn_entry;
+
+ sprintf(name, "%08lx", tbl->it_index);
+- liobn_entry = debugfs_lookup(name, iommu_debugfs_dir);
+- debugfs_remove(liobn_entry);
++ debugfs_lookup_and_remove(name, iommu_debugfs_dir);
+ }
+ #else
+ static void iommu_debugfs_add(struct iommu_table *tbl){}
+@@ -174,17 +172,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb,
+ return 0;
+ }
+
+-static struct notifier_block fail_iommu_bus_notifier = {
++/*
++ * PCI and VIO buses need separate notifier_block structs, since they're linked
++ * list nodes. Sharing a notifier_block would mean that any notifiers later
++ * registered for PCI buses would also get called by VIO buses and vice versa.
++ */
++static struct notifier_block fail_iommu_pci_bus_notifier = {
++ .notifier_call = fail_iommu_bus_notify
++};
++
++#ifdef CONFIG_IBMVIO
++static struct notifier_block fail_iommu_vio_bus_notifier = {
+ .notifier_call = fail_iommu_bus_notify
+ };
++#endif
+
+ static int __init fail_iommu_setup(void)
+ {
+ #ifdef CONFIG_PCI
+- bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
++ bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
+ #endif
+ #ifdef CONFIG_IBMVIO
+- bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
++ bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
+ #endif
+
+ return 0;
+@@ -776,6 +785,11 @@ bool iommu_table_in_use(struct iommu_table *tbl)
+ /* ignore reserved bit0 */
+ if (tbl->it_offset == 0)
+ start = 1;
++
++ /* Simple case with no reserved MMIO32 region */
++ if (!tbl->it_reserved_start && !tbl->it_reserved_end)
++ return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size;
++
+ end = tbl->it_reserved_start - tbl->it_offset;
+ if (find_next_bit(tbl->it_map, end, start) != end)
+ return true;
+diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
+index 7a7cd6bda53ea..61552f57db0ba 100644
+--- a/arch/powerpc/kernel/kprobes.c
++++ b/arch/powerpc/kernel/kprobes.c
+@@ -140,7 +140,13 @@ int arch_prepare_kprobe(struct kprobe *p)
+ preempt_disable();
+ prev = get_kprobe(p->addr - 1);
+ preempt_enable_no_resched();
+- if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
++
++ /*
++ * When prev is a ftrace-based kprobe, we don't have an insn, and it
++ * doesn't probe for prefixed instruction.
++ */
++ if (prev && !kprobe_ftrace(prev) &&
++ ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
+ printk("Cannot register a kprobe on the second word of prefixed instruction\n");
+ ret = -EINVAL;
+ }
+diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
+index 617eba82531cb..6568823cf3063 100644
+--- a/arch/powerpc/kernel/kvm.c
++++ b/arch/powerpc/kernel/kvm.c
+@@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
+ on_each_cpu(kvm_map_magic_page, &features, 1);
+
+ /* Quick self-test to see if the mapping works */
+- if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
++ if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
++ sizeof(u32))) {
+ kvm_patching_worked = false;
+ return;
+ }
+diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
+index 225511d73bef5..f2e03ed423d0f 100644
+--- a/arch/powerpc/kernel/l2cr_6xx.S
++++ b/arch/powerpc/kernel/l2cr_6xx.S
+@@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
+
+ /* Stop DST streams */
+ BEGIN_FTR_SECTION
+- DSSALL
++ PPC_DSSALL
+ sync
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+@@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
+ isync
+
+ /* Stop DST streams */
+- DSSALL
++ PPC_DSSALL
+ sync
+
+ /* Get the current enable bit of the L3CR into r4 */
+@@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+ _GLOBAL(__flush_disable_L1)
+ /* Stop pending alitvec streams and memory accesses */
+ BEGIN_FTR_SECTION
+- DSSALL
++ PPC_DSSALL
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ sync
+
+diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
+index ed04a3ba66fe8..40a583e9d3c70 100644
+--- a/arch/powerpc/kernel/module.c
++++ b/arch/powerpc/kernel/module.c
+@@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr,
+ }
+
+ static __always_inline void *
+-__module_alloc(unsigned long size, unsigned long start, unsigned long end)
++__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
+ {
+ pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
++ gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
+
+ /*
+ * Don't do huge page allocations for modules yet until more testing
+ * is done. STRICT_MODULE_RWX may require extra work to support this
+ * too.
+ */
+- return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot,
++ return __vmalloc_node_range(size, 1, start, end, gfp, prot,
+ VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ }
+@@ -114,13 +115,13 @@ void *module_alloc(unsigned long size)
+
+ /* First try within 32M limit from _etext to avoid branch trampolines */
+ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
+- ptr = __module_alloc(size, limit, MODULES_END);
++ ptr = __module_alloc(size, limit, MODULES_END, true);
+
+ if (!ptr)
+- ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
++ ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
+
+ return ptr;
+ #else
+- return __module_alloc(size, VMALLOC_START, VMALLOC_END);
++ return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
+ #endif
+ }
+diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
+index 6baa676e7cb60..5d77d3f5fbb56 100644
+--- a/arch/powerpc/kernel/module_64.c
++++ b/arch/powerpc/kernel/module_64.c
+@@ -422,11 +422,17 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
+ const char *name)
+ {
+ long reladdr;
++ func_desc_t desc;
++ int i;
+
+ if (is_mprofile_ftrace_call(name))
+ return create_ftrace_stub(entry, addr, me);
+
+- memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));
++ for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) {
++ if (patch_instruction(&entry->jump[i],
++ ppc_inst(ppc64_stub_insns[i])))
++ return 0;
++ }
+
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+@@ -437,10 +443,24 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+- entry->jump[0] |= PPC_HA(reladdr);
+- entry->jump[1] |= PPC_LO(reladdr);
+- entry->funcdata = func_desc(addr);
+- entry->magic = STUB_MAGIC;
++ if (patch_instruction(&entry->jump[0],
++ ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
++ return 0;
++
++ if (patch_instruction(&entry->jump[1],
++ ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
++ return 0;
++
++ // func_desc_t is 8 bytes if ABIv2, else 16 bytes
++ desc = func_desc(addr);
++ for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) {
++ if (patch_instruction(((u32 *)&entry->funcdata) + i,
++ ppc_inst(((u32 *)(&desc))[i])))
++ return 0;
++ }
++
++ if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC)))
++ return 0;
+
+ return 1;
+ }
+@@ -495,8 +515,11 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me)
+ me->name, *instruction, instruction);
+ return 0;
+ }
++
+ /* ld r2,R2_STACK_OFFSET(r1) */
+- *instruction = PPC_INST_LD_TOC;
++ if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC)))
++ return 0;
++
+ return 1;
+ }
+
+@@ -636,9 +659,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
+ }
+
+ /* Only replace bits 2 through 26 */
+- *(uint32_t *)location
+- = (*(uint32_t *)location & ~0x03fffffc)
++ value = (*(uint32_t *)location & ~0x03fffffc)
+ | (value & 0x03fffffc);
++
++ if (patch_instruction((u32 *)location, ppc_inst(value)))
++ return -EFAULT;
++
+ break;
+
+ case R_PPC64_REL64:
+diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
+index 19ea3312403ca..5c7f0b4b784b2 100644
+--- a/arch/powerpc/kernel/optprobes_head.S
++++ b/arch/powerpc/kernel/optprobes_head.S
+@@ -10,8 +10,8 @@
+ #include <asm/asm-offsets.h>
+
+ #ifdef CONFIG_PPC64
+-#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base)
+-#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base)
++#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base)
++#define REST_30GPRS(base) REST_GPRS(2, 31, base)
+ #define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop
+ #else
+ #define SAVE_30GPRS(base) stmw r2, GPR2(base)
+diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
+index 9bd30cac852bf..2de557663a96c 100644
+--- a/arch/powerpc/kernel/paca.c
++++ b/arch/powerpc/kernel/paca.c
+@@ -16,7 +16,6 @@
+ #include <asm/kexec.h>
+ #include <asm/svm.h>
+ #include <asm/ultravisor.h>
+-#include <asm/rtas.h>
+
+ #include "setup.h"
+
+@@ -172,30 +171,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
+
+ #endif /* CONFIG_PPC_BOOK3S_64 */
+
+-#ifdef CONFIG_PPC_PSERIES
+-/**
+- * new_rtas_args() - Allocates rtas args
+- * @cpu: CPU number
+- * @limit: Memory limit for this allocation
+- *
+- * Allocates a struct rtas_args and return it's pointer,
+- * if not in Hypervisor mode
+- *
+- * Return: Pointer to allocated rtas_args
+- * NULL if CPU in Hypervisor Mode
+- */
+-static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit)
+-{
+- limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX);
+-
+- if (early_cpu_has_feature(CPU_FTR_HVMODE))
+- return NULL;
+-
+- return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES,
+- limit, cpu);
+-}
+-#endif /* CONFIG_PPC_PSERIES */
+-
+ /* The Paca is an array with one entry per processor. Each contains an
+ * lppaca, which contains the information shared between the
+ * hypervisor and Linux.
+@@ -234,10 +209,6 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
+ /* For now -- if we have threads this will be adjusted later */
+ new_paca->tcd_ptr = &new_paca->tcd;
+ #endif
+-
+-#ifdef CONFIG_PPC_PSERIES
+- new_paca->rtas_args_reentrant = NULL;
+-#endif
+ }
+
+ /* Put the paca pointer into r13 and SPRG_PACA */
+@@ -309,9 +280,6 @@ void __init allocate_paca(int cpu)
+ #endif
+ #ifdef CONFIG_PPC_BOOK3S_64
+ paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+-#endif
+-#ifdef CONFIG_PPC_PSERIES
+- paca->rtas_args_reentrant = new_rtas_args(cpu, limit);
+ #endif
+ paca_struct_size += sizeof(struct paca_struct);
+ }
+diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
+index c3573430919d2..1aabb82b5f375 100644
+--- a/arch/powerpc/kernel/pci-common.c
++++ b/arch/powerpc/kernel/pci-common.c
+@@ -67,23 +67,35 @@ void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
+ pci_dma_ops = dma_ops;
+ }
+
+-/*
+- * This function should run under locking protection, specifically
+- * hose_spinlock.
+- */
+ static int get_phb_number(struct device_node *dn)
+ {
+ int ret, phb_id = -1;
+- u32 prop_32;
+ u64 prop;
+
+ /*
+ * Try fixed PHB numbering first, by checking archs and reading
+- * the respective device-tree properties. Firstly, try powernv by
+- * reading "ibm,opal-phbid", only present in OPAL environment.
++ * the respective device-tree properties. Firstly, try reading
++ * standard "linux,pci-domain", then try reading "ibm,opal-phbid"
++ * (only present in powernv OPAL environment), then try device-tree
++ * alias and as the last try to use lower bits of "reg" property.
+ */
+- ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
++ ret = of_get_pci_domain_nr(dn);
++ if (ret >= 0) {
++ prop = ret;
++ ret = 0;
++ }
++ if (ret)
++ ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
++
+ if (ret) {
++ ret = of_alias_get_id(dn, "pci");
++ if (ret >= 0) {
++ prop = ret;
++ ret = 0;
++ }
++ }
++ if (ret) {
++ u32 prop_32;
+ ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+ prop = prop_32;
+ }
+@@ -91,18 +103,20 @@ static int get_phb_number(struct device_node *dn)
+ if (!ret)
+ phb_id = (int)(prop & (MAX_PHBS - 1));
+
++ spin_lock(&hose_spinlock);
++
+ /* We need to be sure to not use the same PHB number twice. */
+ if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap))
+- return phb_id;
++ goto out_unlock;
+
+- /*
+- * If not pseries nor powernv, or if fixed PHB numbering tried to add
+- * the same PHB number twice, then fallback to dynamic PHB numbering.
+- */
++ /* If everything fails then fallback to dynamic PHB numbering. */
+ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
+ BUG_ON(phb_id >= MAX_PHBS);
+ set_bit(phb_id, phb_bitmap);
+
++out_unlock:
++ spin_unlock(&hose_spinlock);
++
+ return phb_id;
+ }
+
+@@ -113,10 +127,13 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
+ phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL);
+ if (phb == NULL)
+ return NULL;
+- spin_lock(&hose_spinlock);
++
+ phb->global_number = get_phb_number(dev);
++
++ spin_lock(&hose_spinlock);
+ list_add_tail(&phb->list_node, &hose_list);
+ spin_unlock(&hose_spinlock);
++
+ phb->dn = dev;
+ phb->is_dynamic = slab_is_available();
+ #ifdef CONFIG_PPC64
+diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
+index 61571ae239530..335767cea1373 100644
+--- a/arch/powerpc/kernel/pci_dn.c
++++ b/arch/powerpc/kernel/pci_dn.c
+@@ -330,6 +330,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+ INIT_LIST_HEAD(&pdn->list);
+ parent = of_get_parent(dn);
+ pdn->parent = parent ? PCI_DN(parent) : NULL;
++ of_node_put(parent);
+ if (pdn->parent)
+ list_add_tail(&pdn->list, &pdn->parent->child_list);
+
+diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
+index 2d4d21bb46a97..235ae24284519 100644
+--- a/arch/powerpc/kernel/ppc_save_regs.S
++++ b/arch/powerpc/kernel/ppc_save_regs.S
+@@ -21,60 +21,33 @@
+ * different ABIs, though).
+ */
+ _GLOBAL(ppc_save_regs)
+- PPC_STL r0,0*SZL(r3)
++ /* This allows stack frame accessor macros and offsets to be used */
++ subi r3,r3,STACK_FRAME_OVERHEAD
++ PPC_STL r0,GPR0(r3)
+ #ifdef CONFIG_PPC32
+- stmw r2, 2*SZL(r3)
++ stmw r2,GPR2(r3)
+ #else
+- PPC_STL r2,2*SZL(r3)
+- PPC_STL r3,3*SZL(r3)
+- PPC_STL r4,4*SZL(r3)
+- PPC_STL r5,5*SZL(r3)
+- PPC_STL r6,6*SZL(r3)
+- PPC_STL r7,7*SZL(r3)
+- PPC_STL r8,8*SZL(r3)
+- PPC_STL r9,9*SZL(r3)
+- PPC_STL r10,10*SZL(r3)
+- PPC_STL r11,11*SZL(r3)
+- PPC_STL r12,12*SZL(r3)
+- PPC_STL r13,13*SZL(r3)
+- PPC_STL r14,14*SZL(r3)
+- PPC_STL r15,15*SZL(r3)
+- PPC_STL r16,16*SZL(r3)
+- PPC_STL r17,17*SZL(r3)
+- PPC_STL r18,18*SZL(r3)
+- PPC_STL r19,19*SZL(r3)
+- PPC_STL r20,20*SZL(r3)
+- PPC_STL r21,21*SZL(r3)
+- PPC_STL r22,22*SZL(r3)
+- PPC_STL r23,23*SZL(r3)
+- PPC_STL r24,24*SZL(r3)
+- PPC_STL r25,25*SZL(r3)
+- PPC_STL r26,26*SZL(r3)
+- PPC_STL r27,27*SZL(r3)
+- PPC_STL r28,28*SZL(r3)
+- PPC_STL r29,29*SZL(r3)
+- PPC_STL r30,30*SZL(r3)
+- PPC_STL r31,31*SZL(r3)
++ SAVE_GPRS(2, 31, r3)
+ lbz r0,PACAIRQSOFTMASK(r13)
+- PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,SOFTE(r3)
+ #endif
+- /* go up one stack frame for SP */
+- PPC_LL r4,0(r1)
+- PPC_STL r4,1*SZL(r3)
++ /* store current SP */
++ PPC_STL r1,GPR1(r3)
+ /* get caller's LR */
++ PPC_LL r4,0(r1)
+ PPC_LL r0,LRSAVE(r4)
+- PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,_LINK(r3)
+ mflr r0
+- PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,_NIP(r3)
+ mfmsr r0
+- PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,_MSR(r3)
+ mfctr r0
+- PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,_CTR(r3)
+ mfxer r0
+- PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,_XER(r3)
+ mfcr r0
+- PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,_CCR(r3)
+ li r0,0
+- PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+- PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3)
++ PPC_STL r0,_TRAP(r3)
++ PPC_STL r0,ORIG_GPR3(r3)
+ blr
+diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
+index 50436b52c2133..c590e12199132 100644
+--- a/arch/powerpc/kernel/process.c
++++ b/arch/powerpc/kernel/process.c
+@@ -1818,7 +1818,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
+ tm_reclaim_current(0);
+ #endif
+
+- memset(regs->gpr, 0, sizeof(regs->gpr));
++ memset(&regs->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0]));
+ regs->ctr = 0;
+ regs->link = 0;
+ regs->xer = 0;
+@@ -2124,12 +2124,12 @@ static unsigned long __get_wchan(struct task_struct *p)
+ return 0;
+
+ do {
+- sp = *(unsigned long *)sp;
++ sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
+ task_is_running(p))
+ return 0;
+ if (count > 0) {
+- ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
++ ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
+ if (!in_sched_functions(ip))
+ return ip;
+ }
+diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
+index 2e67588f6f6e6..86ffbabd26c6e 100644
+--- a/arch/powerpc/kernel/prom.c
++++ b/arch/powerpc/kernel/prom.c
+@@ -751,6 +751,13 @@ void __init early_init_devtree(void *params)
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+
++ /*
++ * As generic code authors expect to be able to use static keys
++ * in early_param() handlers, we initialize the static keys just
++ * before parsing early params (it's fine to call jump_label_init()
++ * more than once).
++ */
++ jump_label_init();
+ parse_early_param();
+
+ /* make sure we've parsed cmdline for mem= before this */
+diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
+index 18b04b08b9833..f845065c860e3 100644
+--- a/arch/powerpc/kernel/prom_init.c
++++ b/arch/powerpc/kernel/prom_init.c
+@@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
+
+ /* Check if the phy-handle property exists - bail if it does */
+ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
+- if (!rv)
++ if (rv <= 0)
+ return;
+
+ /*
+diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
+index b183ab9c5107c..dfa5f729f774d 100644
+--- a/arch/powerpc/kernel/prom_init_check.sh
++++ b/arch/powerpc/kernel/prom_init_check.sh
+@@ -13,7 +13,7 @@
+ # If you really need to reference something from prom_init.o add
+ # it to the list below:
+
+-grep "^CONFIG_KASAN=y$" .config >/dev/null
++grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null
+ if [ $? -eq 0 ]
+ then
+ MEM_FUNCS="__memcpy __memset"
+diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
+index 5dca19361316e..09c49632bfe59 100644
+--- a/arch/powerpc/kernel/ptrace/ptrace-fpu.c
++++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
+@@ -17,9 +17,13 @@ int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data)
+
+ #ifdef CONFIG_PPC_FPU_REGS
+ flush_fp_to_thread(child);
+- if (fpidx < (PT_FPSCR - PT_FPR0))
+- memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
+- else
++ if (fpidx < (PT_FPSCR - PT_FPR0)) {
++ if (IS_ENABLED(CONFIG_PPC32))
++ // On 32-bit the index we are passed refers to 32-bit words
++ *data = ((u32 *)child->thread.fp_state.fpr)[fpidx];
++ else
++ memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
++ } else
+ *data = child->thread.fp_state.fpscr;
+ #else
+ *data = 0;
+@@ -39,9 +43,13 @@ int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data)
+
+ #ifdef CONFIG_PPC_FPU_REGS
+ flush_fp_to_thread(child);
+- if (fpidx < (PT_FPSCR - PT_FPR0))
+- memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
+- else
++ if (fpidx < (PT_FPSCR - PT_FPR0)) {
++ if (IS_ENABLED(CONFIG_PPC32))
++ // On 32-bit the index we are passed refers to 32-bit words
++ ((u32 *)child->thread.fp_state.fpr)[fpidx] = data;
++ else
++ memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
++ } else
+ child->thread.fp_state.fpscr = data;
+ #endif
+
+diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
+index b8be1d6668b59..54dfa6a2aec8f 100644
+--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
++++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
+@@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+ static int ppr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+ {
++ if (!target->thread.regs)
++ return -EINVAL;
++
+ return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
+ }
+
+@@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+ {
++ if (!target->thread.regs)
++ return -EINVAL;
++
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->ppr, 0, sizeof(u64));
+ }
+diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
+index 7c7093c17c45e..ff5e46dbf7c50 100644
+--- a/arch/powerpc/kernel/ptrace/ptrace.c
++++ b/arch/powerpc/kernel/ptrace/ptrace.c
+@@ -446,4 +446,7 @@ void __init pt_regs_check(void)
+ * real registers.
+ */
+ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
++
++ // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible
++ BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX));
+ }
+diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
+index 02d4719bf43a8..232e4549defe1 100644
+--- a/arch/powerpc/kernel/reloc_64.S
++++ b/arch/powerpc/kernel/reloc_64.S
+@@ -8,8 +8,10 @@
+ #include <asm/ppc_asm.h>
+
+ RELA = 7
+-RELACOUNT = 0x6ffffff9
++RELASZ = 8
++RELAENT = 9
+ R_PPC64_RELATIVE = 22
++R_PPC64_UADDR64 = 43
+
+ /*
+ * r3 = desired final address of kernel
+@@ -25,29 +27,38 @@ _GLOBAL(relocate)
+ add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */
+ ld r10,(p_st - 0b)(r12)
+ add r10,r10,r12 /* r10 has runtime addr of _stext */
++ ld r13,(p_sym - 0b)(r12)
++ add r13,r13,r12 /* r13 has runtime addr of .dynsym */
+
+ /*
+- * Scan the dynamic section for the RELA and RELACOUNT entries.
++ * Scan the dynamic section for the RELA, RELASZ and RELAENT entries.
+ */
+ li r7,0
+ li r8,0
+-1: ld r6,0(r11) /* get tag */
++.Ltags:
++ ld r6,0(r11) /* get tag */
+ cmpdi r6,0
+- beq 4f /* end of list */
++ beq .Lend_of_list /* end of list */
+ cmpdi r6,RELA
+ bne 2f
+ ld r7,8(r11) /* get RELA pointer in r7 */
+- b 3f
+-2: addis r6,r6,(-RELACOUNT)@ha
+- cmpdi r6,RELACOUNT@l
++ b 4f
++2: cmpdi r6,RELASZ
+ bne 3f
+- ld r8,8(r11) /* get RELACOUNT value in r8 */
+-3: addi r11,r11,16
+- b 1b
+-4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */
++ ld r8,8(r11) /* get RELASZ value in r8 */
++ b 4f
++3: cmpdi r6,RELAENT
++ bne 4f
++ ld r12,8(r11) /* get RELAENT value in r12 */
++4: addi r11,r11,16
++ b .Ltags
++.Lend_of_list:
++ cmpdi r7,0 /* check we have RELA, RELASZ, RELAENT */
+ cmpdi cr1,r8,0
+- beq 6f
+- beq cr1,6f
++ beq .Lout
++ beq cr1,.Lout
++ cmpdi r12,0
++ beq .Lout
+
+ /*
+ * Work out linktime address of _stext and hence the
+@@ -62,23 +73,39 @@ _GLOBAL(relocate)
+
+ /*
+ * Run through the list of relocations and process the
+- * R_PPC64_RELATIVE ones.
++ * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones.
+ */
++ divd r8,r8,r12 /* RELASZ / RELAENT */
+ mtctr r8
+-5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */
++.Lrels: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */
+ cmpdi r0,R_PPC64_RELATIVE
+- bne 6f
++ bne .Luaddr64
+ ld r6,0(r9) /* reloc->r_offset */
+ ld r0,16(r9) /* reloc->r_addend */
++ b .Lstore
++.Luaddr64:
++ srdi r14,r0,32 /* ELF64_R_SYM(reloc->r_info) */
++ clrldi r0,r0,32
++ cmpdi r0,R_PPC64_UADDR64
++ bne .Lnext
++ ld r6,0(r9)
++ ld r0,16(r9)
++ mulli r14,r14,24 /* 24 == sizeof(elf64_sym) */
++ add r14,r14,r13 /* elf64_sym[ELF64_R_SYM] */
++ ld r14,8(r14)
++ add r0,r0,r14
++.Lstore:
+ add r0,r0,r3
+ stdx r0,r7,r6
+- addi r9,r9,24
+- bdnz 5b
+-
+-6: blr
++.Lnext:
++ add r9,r9,r12
++ bdnz .Lrels
++.Lout:
++ blr
+
+ .balign 8
+ p_dyn: .8byte __dynamic_start - 0b
+ p_rela: .8byte __rela_dyn_start - 0b
++p_sym: .8byte __dynamic_symtab - 0b
+ p_st: .8byte _stext - 0b
+
+diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
+index ff80bbad22a58..a4cd2484dbca2 100644
+--- a/arch/powerpc/kernel/rtas.c
++++ b/arch/powerpc/kernel/rtas.c
+@@ -42,13 +42,21 @@
+ #include <asm/time.h>
+ #include <asm/mmu.h>
+ #include <asm/topology.h>
+-#include <asm/paca.h>
+
+ /* This is here deliberately so it's only used in this file */
+ void enter_rtas(unsigned long);
+
+ static inline void do_enter_rtas(unsigned long args)
+ {
++ unsigned long msr;
++
++ /*
++ * Make sure MSR[RI] is currently enabled as it will be forced later
++ * in enter_rtas.
++ */
++ msr = mfmsr();
++ BUG_ON(!(msr & MSR_RI));
++
+ enter_rtas(args);
+
+ srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
+@@ -407,7 +415,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
+ buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
+ }
+ if (buf)
+- memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
++ memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
+ }
+
+ return buf;
+@@ -780,6 +788,7 @@ void __noreturn rtas_halt(void)
+
+ /* Must be in the RMO region, so we place it here */
+ static char rtas_os_term_buf[2048];
++static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE;
+
+ void rtas_os_term(char *str)
+ {
+@@ -791,16 +800,20 @@ void rtas_os_term(char *str)
+ * this property may terminate the partition which we want to avoid
+ * since it interferes with panic_timeout.
+ */
+- if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") ||
+- RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term"))
++ if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE)
+ return;
+
+ snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
+
++ /*
++ * Keep calling as long as RTAS returns a "try again" status,
++ * but don't use rtas_busy_delay(), which potentially
++ * schedules.
++ */
+ do {
+- status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL,
++ status = rtas_call(ibm_os_term_token, 1, 1, NULL,
+ __pa(rtas_os_term_buf));
+- } while (rtas_busy_delay(status));
++ } while (rtas_busy_delay_time(status));
+
+ if (status != 0)
+ printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
+@@ -836,59 +849,6 @@ void rtas_activate_firmware(void)
+ pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
+ }
+
+-#ifdef CONFIG_PPC_PSERIES
+-/**
+- * rtas_call_reentrant() - Used for reentrant rtas calls
+- * @token: Token for desired reentrant RTAS call
+- * @nargs: Number of Input Parameters
+- * @nret: Number of Output Parameters
+- * @outputs: Array of outputs
+- * @...: Inputs for desired RTAS call
+- *
+- * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off",
+- * "ibm,get-xive" and "ibm,set-xive" are currently reentrant.
+- * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but
+- * PACA one instead.
+- *
+- * Return: -1 on error,
+- * First output value of RTAS call if (nret > 0),
+- * 0 otherwise,
+- */
+-int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
+-{
+- va_list list;
+- struct rtas_args *args;
+- unsigned long flags;
+- int i, ret = 0;
+-
+- if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
+- return -1;
+-
+- local_irq_save(flags);
+- preempt_disable();
+-
+- /* We use the per-cpu (PACA) rtas args buffer */
+- args = local_paca->rtas_args_reentrant;
+-
+- va_start(list, outputs);
+- va_rtas_call_unlocked(args, token, nargs, nret, list);
+- va_end(list);
+-
+- if (nret > 1 && outputs)
+- for (i = 0; i < nret - 1; ++i)
+- outputs[i] = be32_to_cpu(args->rets[i + 1]);
+-
+- if (nret > 0)
+- ret = be32_to_cpu(args->rets[0]);
+-
+- local_irq_restore(flags);
+- preempt_enable();
+-
+- return ret;
+-}
+-
+-#endif /* CONFIG_PPC_PSERIES */
+-
+ /**
+ * Find a specific pseries error log in an RTAS extended event log.
+ * @log: RTAS error/event log
+@@ -974,7 +934,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = {
+ { "get-time-of-day", -1, -1, -1, -1, -1 },
+ { "ibm,get-vpd", -1, 0, -1, 1, 2 },
+ { "ibm,lpar-perftools", -1, 2, 3, -1, -1 },
+- { "ibm,platform-dump", -1, 4, 5, -1, -1 },
++ { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */
+ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 },
+ { "ibm,scan-log-dump", -1, 0, 1, -1, -1 },
+ { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 },
+@@ -1023,6 +983,15 @@ static bool block_rtas_call(int token, int nargs,
+ size = 1;
+
+ end = base + size - 1;
++
++ /*
++ * Special case for ibm,platform-dump - NULL buffer
++ * address is used to indicate end of dump processing
++ */
++ if (!strcmp(f->name, "ibm,platform-dump") &&
++ base == 0)
++ return false;
++
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+@@ -1203,6 +1172,13 @@ void __init rtas_initialize(void)
+ no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
+ rtas.entry = no_entry ? rtas.base : entry;
+
++ /*
++ * Discover these now to avoid device tree lookups in the
++ * panic path.
++ */
++ if (of_property_read_bool(rtas.dev, "ibm,extended-os-term"))
++ ibm_os_term_token = rtas_token("ibm,os-term");
++
+ /* If RTAS was found, allocate the RMO buffer for it and look for
+ * the stop-self token if any
+ */
+@@ -1235,6 +1211,12 @@ int __init early_init_dt_scan_rtas(unsigned long node,
+ entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
+ sizep = of_get_flat_dt_prop(node, "rtas-size", NULL);
+
++#ifdef CONFIG_PPC64
++ /* need this feature to decide the crashkernel offset */
++ if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL))
++ powerpc_firmware_features |= FW_FEATURE_LPAR;
++#endif
++
+ if (basep && entryp && sizep) {
+ rtas.base = *basep;
+ rtas.entry = *entryp;
+diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
+index a99179d835382..56bd0aa30f930 100644
+--- a/arch/powerpc/kernel/rtas_flash.c
++++ b/arch/powerpc/kernel/rtas_flash.c
+@@ -710,9 +710,9 @@ static int __init rtas_flash_init(void)
+ if (!rtas_validate_flash_data.buf)
+ return -ENOMEM;
+
+- flash_block_cache = kmem_cache_create("rtas_flash_cache",
+- RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
+- NULL);
++ flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
++ RTAS_BLK_SIZE, RTAS_BLK_SIZE,
++ 0, 0, RTAS_BLK_SIZE, NULL);
+ if (!flash_block_cache) {
+ printk(KERN_ERR "%s: failed to create block cache\n",
+ __func__);
+diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
+index 15fb5ea1b9eaf..cd6fc64ad9ca6 100644
+--- a/arch/powerpc/kernel/security.c
++++ b/arch/powerpc/kernel/security.c
+@@ -363,26 +363,27 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
+
+ static int ssb_prctl_get(struct task_struct *task)
+ {
++ /*
++ * The STF_BARRIER feature is on by default, so if it's off that means
++ * firmware has explicitly said the CPU is not vulnerable via either
++ * the hypercall or device tree.
++ */
++ if (!security_ftr_enabled(SEC_FTR_STF_BARRIER))
++ return PR_SPEC_NOT_AFFECTED;
++
++ /*
++ * If the system's CPU has no known barrier (see setup_stf_barrier())
++ * then assume that the CPU is not vulnerable.
++ */
+ if (stf_enabled_flush_types == STF_BARRIER_NONE)
+- /*
+- * We don't have an explicit signal from firmware that we're
+- * vulnerable or not, we only have certain CPU revisions that
+- * are known to be vulnerable.
+- *
+- * We assume that if we're on another CPU, where the barrier is
+- * NONE, then we are not vulnerable.
+- */
+ return PR_SPEC_NOT_AFFECTED;
+- else
+- /*
+- * If we do have a barrier type then we are vulnerable. The
+- * barrier is not a global or per-process mitigation, so the
+- * only value we can report here is PR_SPEC_ENABLE, which
+- * appears as "vulnerable" in /proc.
+- */
+- return PR_SPEC_ENABLE;
+-
+- return -EINVAL;
++
++ /*
++ * Otherwise the CPU is vulnerable. The barrier is not a global or
++ * per-process mitigation, so the only value that can be reported here
++ * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc.
++ */
++ return PR_SPEC_ENABLE;
+ }
+
+ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
+index a0a78aba2083e..1ee4640a26413 100644
+--- a/arch/powerpc/kernel/secvar-sysfs.c
++++ b/arch/powerpc/kernel/secvar-sysfs.c
+@@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *format;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+- if (!of_device_is_available(node))
+- return -ENODEV;
++ if (!of_device_is_available(node)) {
++ rc = -ENODEV;
++ goto out;
++ }
+
+ rc = of_property_read_string(node, "format", &format);
+ if (rc)
+- return rc;
++ goto out;
+
+ rc = sprintf(buf, "%s\n", format);
+
++out:
+ of_node_put(node);
+
+ return rc;
+diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
+index 1f07317964e49..618aeccdf6918 100644
+--- a/arch/powerpc/kernel/signal.h
++++ b/arch/powerpc/kernel/signal.h
+@@ -25,8 +25,14 @@ static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
+
+ return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+ }
+-#define unsafe_get_user_sigset(dst, src, label) \
+- unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label)
++#define unsafe_get_user_sigset(dst, src, label) do { \
++ sigset_t *__dst = dst; \
++ const sigset_t __user *__src = src; \
++ int i; \
++ \
++ for (i = 0; i < _NSIG_WORDS; i++) \
++ unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \
++} while (0)
+
+ #ifdef CONFIG_VSX
+ extern unsigned long copy_vsx_to_user(void __user *to,
+diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
+index 0608581967f09..68ed8ecf64fcc 100644
+--- a/arch/powerpc/kernel/signal_32.c
++++ b/arch/powerpc/kernel/signal_32.c
+@@ -258,8 +258,9 @@ static void prepare_save_user_regs(int ctx_has_vsx_region)
+ #endif
+ }
+
+-static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
+- struct mcontext __user *tm_frame, int ctx_has_vsx_region)
++static __always_inline int
++__unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
++ struct mcontext __user *tm_frame, int ctx_has_vsx_region)
+ {
+ unsigned long msr = regs->msr;
+
+@@ -358,8 +359,9 @@ static void prepare_save_tm_user_regs(void)
+ current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+ }
+
+-static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+- struct mcontext __user *tm_frame, unsigned long msr)
++static __always_inline int
++save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
++ struct mcontext __user *tm_frame, unsigned long msr)
+ {
+ /* Save both sets of general registers */
+ unsafe_save_general_regs(&current->thread.ckpt_regs, frame, failed);
+@@ -438,8 +440,9 @@ failed:
+ #else
+ static void prepare_save_tm_user_regs(void) { }
+
+-static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+- struct mcontext __user *tm_frame, unsigned long msr)
++static __always_inline int
++save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
++ struct mcontext __user *tm_frame, unsigned long msr)
+ {
+ return 0;
+ }
+@@ -1048,7 +1051,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+ if (new_ctx == NULL)
+ return 0;
+ if (!access_ok(new_ctx, ctx_size) ||
+- fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
++ fault_in_readable((char __user *)new_ctx, ctx_size))
+ return -EFAULT;
+
+ /*
+@@ -1062,8 +1065,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+ * or if another thread unmaps the region containing the context.
+ * We kill the task with a SIGSEGV in this situation.
+ */
+- if (do_setcontext(new_ctx, regs, 0))
+- do_exit(SIGSEGV);
++ if (do_setcontext(new_ctx, regs, 0)) {
++ force_exit_sig(SIGSEGV);
++ return -EFAULT;
++ }
+
+ set_thread_flag(TIF_RESTOREALL);
+ return 0;
+@@ -1237,7 +1242,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
+ #endif
+
+ if (!access_ok(ctx, sizeof(*ctx)) ||
+- fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
++ fault_in_readable((char __user *)ctx, sizeof(*ctx)))
+ return -EFAULT;
+
+ /*
+diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
+index 1831bba0582e1..d1e1fc0acbea3 100644
+--- a/arch/powerpc/kernel/signal_64.c
++++ b/arch/powerpc/kernel/signal_64.c
+@@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+ if (new_ctx == NULL)
+ return 0;
+ if (!access_ok(new_ctx, ctx_size) ||
+- fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
++ fault_in_readable((char __user *)new_ctx, ctx_size))
+ return -EFAULT;
+
+ /*
+@@ -703,15 +703,18 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+ * We kill the task with a SIGSEGV in this situation.
+ */
+
+- if (__get_user_sigset(&set, &new_ctx->uc_sigmask))
+- do_exit(SIGSEGV);
++ if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
++ force_exit_sig(SIGSEGV);
++ return -EFAULT;
++ }
+ set_current_blocked(&set);
+
+ if (!user_read_access_begin(new_ctx, ctx_size))
+ return -EFAULT;
+ if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+ user_read_access_end();
+- do_exit(SIGSEGV);
++ force_exit_sig(SIGSEGV);
++ return -EFAULT;
+ }
+ user_read_access_end();
+
+diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
+index 605bab448f847..fb95f92dcfac6 100644
+--- a/arch/powerpc/kernel/smp.c
++++ b/arch/powerpc/kernel/smp.c
+@@ -61,6 +61,7 @@
+ #include <asm/cpu_has_feature.h>
+ #include <asm/ftrace.h>
+ #include <asm/kup.h>
++#include <asm/fadump.h>
+
+ #ifdef DEBUG
+ #include <asm/udbg.h>
+@@ -620,6 +621,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+ }
+ #endif
+
++#ifdef CONFIG_NMI_IPI
++static void crash_stop_this_cpu(struct pt_regs *regs)
++#else
++static void crash_stop_this_cpu(void *dummy)
++#endif
++{
++ /*
++ * Just busy wait here and avoid marking CPU as offline to ensure
++ * register data is captured appropriately.
++ */
++ while (1)
++ cpu_relax();
++}
++
++void crash_smp_send_stop(void)
++{
++ static bool stopped = false;
++
++ /*
++ * In case of fadump, register data for all CPUs is captured by f/w
++ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
++ * this rtas call to avoid tricky post processing of those CPUs'
++ * backtraces.
++ */
++ if (should_fadump_crash())
++ return;
++
++ if (stopped)
++ return;
++
++ stopped = true;
++
++#ifdef CONFIG_NMI_IPI
++ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000);
++#else
++ smp_call_function(crash_stop_this_cpu, NULL, 0);
++#endif /* CONFIG_NMI_IPI */
++}
++
+ #ifdef CONFIG_NMI_IPI
+ static void nmi_stop_this_cpu(struct pt_regs *regs)
+ {
+@@ -1640,10 +1680,12 @@ void start_secondary(void *unused)
+ BUG();
+ }
+
++#ifdef CONFIG_PROFILING
+ int setup_profiling_timer(unsigned int multiplier)
+ {
+ return 0;
+ }
++#endif
+
+ static void fixup_topology(void)
+ {
+diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
+index f73f4d72fea43..e0cbd63007f21 100644
+--- a/arch/powerpc/kernel/swsusp_32.S
++++ b/arch/powerpc/kernel/swsusp_32.S
+@@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume)
+ #ifdef CONFIG_ALTIVEC
+ /* Stop pending alitvec streams and memory accesses */
+ BEGIN_FTR_SECTION
+- DSSALL
++ PPC_DSSALL
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ #endif
+ sync
+diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
+index 6d3189830dd32..068a268a8013e 100644
+--- a/arch/powerpc/kernel/swsusp_asm64.S
++++ b/arch/powerpc/kernel/swsusp_asm64.S
+@@ -142,7 +142,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
+ _GLOBAL(swsusp_arch_resume)
+ /* Stop pending alitvec streams and memory accesses */
+ BEGIN_FTR_SECTION
+- DSSALL
++ PPC_DSSALL
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ sync
+
+diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
+index 16ff0399a2574..719bfc6d1e3f5 100644
+--- a/arch/powerpc/kernel/sys_ppc32.c
++++ b/arch/powerpc/kernel/sys_ppc32.c
+@@ -56,18 +56,6 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
+ return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
+ }
+
+-/*
+- * long long munging:
+- * The 32 bit ABI passes long longs in an odd even register pair.
+- * High and low parts are swapped depending on endian mode,
+- * so define a macro (similar to mips linux32) to handle that.
+- */
+-#ifdef __LITTLE_ENDIAN__
+-#define merge_64(low, high) ((u64)high << 32) | low
+-#else
+-#define merge_64(high, low) ((u64)high << 32) | low
+-#endif
+-
+ compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
+ u32 reg6, u32 pos1, u32 pos2)
+ {
+@@ -94,7 +82,7 @@ asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
+ asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2,
+ u32 len1, u32 len2)
+ {
+- return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2,
++ return ksys_fallocate(fd, mode, merge_64(offset1, offset2),
+ merge_64(len1, len2));
+ }
+
+diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
+index 825931e400df7..e3edcf8f7cae5 100644
+--- a/arch/powerpc/kernel/syscalls.c
++++ b/arch/powerpc/kernel/syscalls.c
+@@ -99,8 +99,8 @@ long ppc64_personality(unsigned long personality)
+ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
+ u32 len_high, u32 len_low)
+ {
+- return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low,
+- (u64)len_high << 32 | len_low, advice);
++ return ksys_fadvise64_64(fd, merge_64(offset_high, offset_low),
++ merge_64(len_high, len_low), advice);
+ }
+
+ SYSCALL_DEFINE0(switch_endian)
+diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
+index cb3358886203e..6c1db3b6de2dc 100644
+--- a/arch/powerpc/kernel/systbl.S
++++ b/arch/powerpc/kernel/systbl.S
+@@ -18,6 +18,7 @@
+ .p2align 3
+ #define __SYSCALL(nr, entry) .8byte entry
+ #else
++ .p2align 2
+ #define __SYSCALL(nr, entry) .long entry
+ #endif
+
+diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
+index 934d8ae66cc63..4406d7a89558b 100644
+--- a/arch/powerpc/kernel/time.c
++++ b/arch/powerpc/kernel/time.c
+@@ -450,7 +450,7 @@ void vtime_flush(struct task_struct *tsk)
+ #define calc_cputime_factors()
+ #endif
+
+-void __delay(unsigned long loops)
++void __no_kcsan __delay(unsigned long loops)
+ {
+ unsigned long start;
+
+@@ -471,7 +471,7 @@ void __delay(unsigned long loops)
+ }
+ EXPORT_SYMBOL(__delay);
+
+-void udelay(unsigned long usecs)
++void __no_kcsan udelay(unsigned long usecs)
+ {
+ __delay(tb_ticks_per_usec * usecs);
+ }
+diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
+index 2b91f233b05d5..5a0f023a26e90 100644
+--- a/arch/powerpc/kernel/tm.S
++++ b/arch/powerpc/kernel/tm.S
+@@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim)
+
+ /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
+ SAVE_GPR(0, r7) /* user r0 */
+- SAVE_GPR(2, r7) /* user r2 */
+- SAVE_4GPRS(3, r7) /* user r3-r6 */
+- SAVE_GPR(8, r7) /* user r8 */
+- SAVE_GPR(9, r7) /* user r9 */
+- SAVE_GPR(10, r7) /* user r10 */
++ SAVE_GPRS(2, 6, r7) /* user r2-r6 */
++ SAVE_GPRS(8, 10, r7) /* user r8-r10 */
+ ld r3, GPR1(r1) /* user r1 */
+ ld r4, GPR7(r1) /* user r7 */
+ ld r5, GPR11(r1) /* user r11 */
+@@ -445,12 +442,9 @@ restore_gprs:
+ ld r6, THREAD_TM_PPR(r3)
+
+ REST_GPR(0, r7) /* GPR0 */
+- REST_2GPRS(2, r7) /* GPR2-3 */
+- REST_GPR(4, r7) /* GPR4 */
+- REST_4GPRS(8, r7) /* GPR8-11 */
+- REST_2GPRS(12, r7) /* GPR12-13 */
+-
+- REST_NVGPRS(r7) /* GPR14-31 */
++ REST_GPRS(2, 4, r7) /* GPR2-4 */
++ REST_GPRS(8, 12, r7) /* GPR8-12 */
++ REST_GPRS(14, 31, r7) /* GPR14-31 */
+
+ /* Load up PPR and DSCR here so we don't run with user values for long */
+ mtspr SPRN_DSCR, r5
+@@ -486,18 +480,24 @@ restore_gprs:
+ REST_GPR(6, r7)
+
+ /*
+- * Store r1 and r5 on the stack so that we can access them after we
+- * clear MSR RI.
++ * Store user r1 and r5 and r13 on the stack (in the unused save
++ * areas / compiler reserved areas), so that we can access them after
++ * we clear MSR RI.
+ */
+
+ REST_GPR(5, r7)
+ std r5, -8(r1)
+- ld r5, GPR1(r7)
++ ld r5, GPR13(r7)
+ std r5, -16(r1)
++ ld r5, GPR1(r7)
++ std r5, -24(r1)
+
+ REST_GPR(7, r7)
+
+- /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */
++ /* Stash the stack pointer away for use after recheckpoint */
++ std r1, PACAR1(r13)
++
++ /* Clear MSR RI since we are about to clobber r13. EE is already off */
+ li r5, 0
+ mtmsrd r5, 1
+
+@@ -508,9 +508,9 @@ restore_gprs:
+ * until we turn MSR RI back on.
+ */
+
+- SET_SCRATCH0(r1)
+ ld r5, -8(r1)
+- ld r1, -16(r1)
++ ld r13, -16(r1)
++ ld r1, -24(r1)
+
+ /* Commit register state as checkpointed state: */
+ TRECHKPT
+@@ -526,9 +526,9 @@ restore_gprs:
+ */
+
+ GET_PACA(r13)
+- GET_SCRATCH0(r1)
++ ld r1, PACAR1(r13)
+
+- /* R1 is restored, so we are recoverable again. EE is still off */
++ /* R13, R1 is restored, so we are recoverable again. EE is still off */
+ li r4, MSR_RI
+ mtmsrd r4, 1
+
+diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
+index d89c5df4f2062..660040c2d7b54 100644
+--- a/arch/powerpc/kernel/trace/ftrace.c
++++ b/arch/powerpc/kernel/trace/ftrace.c
+@@ -336,9 +336,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
+
+ /* Is this a known long jump tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+- if (!ftrace_tramps[i])
+- break;
+- else if (ftrace_tramps[i] == tramp)
++ if (ftrace_tramps[i] == tramp)
+ return 0;
+
+ /* Is this a known plt tramp? */
+@@ -881,6 +879,17 @@ void arch_ftrace_update_code(int command)
+
+ extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
++void ftrace_free_init_tramp(void)
++{
++ int i;
++
++ for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
++ if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
++ ftrace_tramps[i] = 0;
++ return;
++ }
++}
++
+ int __init ftrace_dyn_arch_init(void)
+ {
+ int i;
+diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+index f9fd5f743eba3..d636fc755f608 100644
+--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
++++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+@@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller)
+
+ /* Save all gprs to pt_regs */
+ SAVE_GPR(0, r1)
+- SAVE_10GPRS(2, r1)
++ SAVE_GPRS(2, 11, r1)
+
+ /* Ok to continue? */
+ lbz r3, PACA_FTRACE_ENABLED(r13)
+ cmpdi r3, 0
+ beq ftrace_no_trace
+
+- SAVE_10GPRS(12, r1)
+- SAVE_10GPRS(22, r1)
++ SAVE_GPRS(12, 31, r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE
+@@ -108,10 +107,8 @@ ftrace_regs_call:
+ #endif
+
+ /* Restore gprs */
+- REST_GPR(0,r1)
+- REST_10GPRS(2,r1)
+- REST_10GPRS(12,r1)
+- REST_10GPRS(22,r1)
++ REST_GPR(0, r1)
++ REST_GPRS(2, 31, r1)
+
+ /* Restore possibly modified LR */
+ ld r0, _LINK(r1)
+@@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller)
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+- SAVE_8GPRS(3, r1)
++ SAVE_GPRS(3, 10, r1)
+
+ lbz r3, PACA_FTRACE_ENABLED(r13)
+ cmpdi r3, 0
+@@ -194,7 +191,7 @@ ftrace_call:
+ mtctr r3
+
+ /* Restore gprs */
+- REST_8GPRS(3,r1)
++ REST_GPRS(3, 10, r1)
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
+index 11741703d26e0..a08bb7cefdc54 100644
+--- a/arch/powerpc/kernel/traps.c
++++ b/arch/powerpc/kernel/traps.c
+@@ -245,7 +245,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
+
+ if (panic_on_oops)
+ panic("Fatal exception");
+- do_exit(signr);
++ make_task_dead(signr);
+ }
+ NOKPROBE_SYMBOL(oops_end);
+
+@@ -792,9 +792,9 @@ int machine_check_generic(struct pt_regs *regs)
+ void die_mce(const char *str, struct pt_regs *regs, long err)
+ {
+ /*
+- * The machine check wants to kill the interrupted context, but
+- * do_exit() checks for in_interrupt() and panics in that case, so
+- * exit the irq/nmi before calling die.
++ * The machine check wants to kill the interrupted context,
++ * but make_task_dead() checks for in_interrupt() and panics
++ * in that case, so exit the irq/nmi before calling die.
+ */
+ if (in_nmi())
+ nmi_exit();
+diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
+index 40bdefe9caa73..d4531902d8c67 100644
+--- a/arch/powerpc/kernel/vmlinux.lds.S
++++ b/arch/powerpc/kernel/vmlinux.lds.S
+@@ -8,6 +8,7 @@
+ #define BSS_FIRST_SECTIONS *(.bss.prominit)
+ #define EMITS_PT_NOTE
+ #define RO_EXCEPTION_TABLE_ALIGN 0
++#define RUNTIME_DISCARD_EXIT
+
+ #define SOFT_MASK_TABLE(align) \
+ . = ALIGN(align); \
+@@ -32,6 +33,10 @@
+
+ #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT)
+
++#if STRICT_ALIGN_SIZE < PAGE_SIZE
++#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT"
++#endif
++
+ ENTRY(_stext)
+
+ PHDRS {
+@@ -204,12 +209,16 @@ SECTIONS
+ }
+ #endif
+
++ /*
++ * Various code relies on __init_begin being at the strict RWX boundary.
++ */
++ . = ALIGN(STRICT_ALIGN_SIZE);
++ __srwx_boundary = .;
++ __init_begin = .;
++
+ /*
+ * Init sections discarded at runtime
+ */
+- . = ALIGN(STRICT_ALIGN_SIZE);
+- __init_begin = .;
+- . = ALIGN(PAGE_SIZE);
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ _sinittext = .;
+ INIT_TEXT
+@@ -275,9 +284,7 @@ SECTIONS
+ . = ALIGN(8);
+ .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
+ {
+-#ifdef CONFIG_PPC32
+ __dynamic_symtab = .;
+-#endif
+ *(.dynsym)
+ }
+ .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
+@@ -401,9 +408,12 @@ SECTIONS
+ DISCARDS
+ /DISCARD/ : {
+ *(*.EMB.apuinfo)
+- *(.glink .iplt .plt .rela* .comment)
++ *(.glink .iplt .plt .comment)
+ *(.gnu.version*)
+ *(.gnu.attributes)
+ *(.eh_frame)
++#ifndef CONFIG_RELOCATABLE
++ *(.rela*)
++#endif
+ }
+ }
+diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
+index f9ea0e5357f92..ad94a2c6b7337 100644
+--- a/arch/powerpc/kernel/watchdog.c
++++ b/arch/powerpc/kernel/watchdog.c
+@@ -135,6 +135,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
+ {
+ cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
+ cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
++ /*
++ * See wd_smp_clear_cpu_pending()
++ */
++ smp_mb();
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = tb;
+ cpumask_andnot(&wd_smp_cpus_pending,
+@@ -187,6 +191,12 @@ static void watchdog_smp_panic(int cpu, u64 tb)
+ if (sysctl_hardlockup_all_cpu_backtrace)
+ trigger_allbutself_cpu_backtrace();
+
++ /*
++ * Force flush any remote buffers that might be stuck in IRQ context
++ * and therefore could not run their irq_work.
++ */
++ printk_trigger_flush();
++
+ if (hardlockup_panic)
+ nmi_panic(NULL, "Hard LOCKUP");
+
+@@ -215,13 +225,44 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+ wd_smp_unlock(&flags);
++ } else {
++ /*
++ * The last CPU to clear pending should have reset the
++ * watchdog so we generally should not find it empty
++ * here if our CPU was clear. However it could happen
++ * due to a rare race with another CPU taking the
++ * last CPU out of the mask concurrently.
++ *
++ * We can't add a warning for it. But just in case
++ * there is a problem with the watchdog that is causing
++ * the mask to not be reset, try to kick it along here.
++ */
++ if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
++ goto none_pending;
+ }
+ return;
+ }
++
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
++
++ /*
++ * Order the store to clear pending with the load(s) to check all
++ * words in the pending mask to check they are all empty. This orders
++ * with the same barrier on another CPU. This prevents two CPUs
++ * clearing the last 2 pending bits, but neither seeing the other's
++ * store when checking if the mask is empty, and missing an empty
++ * mask, which ends with a false positive.
++ */
++ smp_mb();
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ unsigned long flags;
+
++none_pending:
++ /*
++ * Double check under lock because more than one CPU could see
++ * a clear mask with the lockless check after clearing their
++ * pending bits.
++ */
+ wd_smp_lock(&flags);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = tb;
+@@ -312,8 +353,12 @@ void arch_touch_nmi_watchdog(void)
+ {
+ unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
+ int cpu = smp_processor_id();
+- u64 tb = get_tb();
++ u64 tb;
+
++ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
++ return;
++
++ tb = get_tb();
+ if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
+ per_cpu(wd_timer_tb, cpu) = tb;
+ wd_smp_clear_cpu_pending(cpu, tb);
+diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
+index 48525e8b57300..71b1bfdadd76a 100644
+--- a/arch/powerpc/kexec/core.c
++++ b/arch/powerpc/kexec/core.c
+@@ -147,11 +147,18 @@ void __init reserve_crashkernel(void)
+ if (!crashk_res.start) {
+ #ifdef CONFIG_PPC64
+ /*
+- * On 64bit we split the RMO in half but cap it at half of
+- * a small SLB (128MB) since the crash kernel needs to place
+- * itself and some stacks to be in the first segment.
++ * On the LPAR platform place the crash kernel to mid of
++ * RMA size (512MB or more) to ensure the crash kernel
++ * gets enough space to place itself and some stack to be
++ * in the first segment. At the same time normal kernel
++ * also get enough space to allocate memory for essential
++ * system resource in the first segment. Keep the crash
++ * kernel starts at 128MB offset on other platforms.
+ */
+- crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
++ if (firmware_has_feature(FW_FEATURE_LPAR))
++ crashk_res.start = ppc64_rma_size / 2;
++ else
++ crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+ #else
+ crashk_res.start = KDUMP_KERNELBASE;
+ #endif
+diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
+index e3ab9df6cf199..6cfcd20d46686 100644
+--- a/arch/powerpc/kvm/book3s_32_sr.S
++++ b/arch/powerpc/kvm/book3s_32_sr.S
+@@ -122,11 +122,27 @@
+
+ /* 0x0 - 0xb */
+
+- /* 'current->mm' needs to be in r4 */
+- tophys(r4, r2)
+- lwz r4, MM(r4)
+- tophys(r4, r4)
+- /* This only clobbers r0, r3, r4 and r5 */
++ /* switch_mmu_context() needs paging, let's enable it */
++ mfmsr r9
++ ori r11, r9, MSR_DR
++ mtmsr r11
++ sync
++
++ /* switch_mmu_context() clobbers r12, rescue it */
++ SAVE_GPR(12, r1)
++
++ /* Calling switch_mmu_context(<inv>, current->mm, <inv>); */
++ lwz r4, MM(r2)
+ bl switch_mmu_context
+
++ /* restore r12 */
++ REST_GPR(12, r1)
++
++ /* Disable paging again */
++ mfmsr r9
++ li r6, MSR_DR
++ andc r9, r9, r6
++ mtmsr r9
++ sync
++
+ .endm
+diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
+index 983b8c18bc31e..a644003603da1 100644
+--- a/arch/powerpc/kvm/book3s_64_entry.S
++++ b/arch/powerpc/kvm/book3s_64_entry.S
+@@ -407,10 +407,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
+ */
+ ld r10,HSTATE_SCRATCH0(r13)
+ cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
+- beq machine_check_common
++ beq .Lcall_machine_check_common
+
+ cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
+- beq system_reset_common
++ beq .Lcall_system_reset_common
+
+ b .
++
++.Lcall_machine_check_common:
++ b machine_check_common
++
++.Lcall_system_reset_common:
++ b system_reset_common
+ #endif
+diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
+index 6365087f31602..3cb2e05a7ee83 100644
+--- a/arch/powerpc/kvm/book3s_64_vio.c
++++ b/arch/powerpc/kvm/book3s_64_vio.c
+@@ -421,13 +421,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ tbl[idx % TCES_PER_PAGE] = tce;
+ }
+
+-static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
+- unsigned long entry)
++static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
++ struct iommu_table *tbl, unsigned long entry)
+ {
+- unsigned long hpa = 0;
+- enum dma_data_direction dir = DMA_NONE;
++ unsigned long i;
++ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
++ unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
++
++ for (i = 0; i < subpages; ++i) {
++ unsigned long hpa = 0;
++ enum dma_data_direction dir = DMA_NONE;
+
+- iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
++ iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
++ }
+ }
+
+ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+@@ -486,6 +492,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+ break;
+ }
+
++ iommu_tce_kill(tbl, io_entry, subpages);
++
+ return ret;
+ }
+
+@@ -545,6 +553,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm,
+ break;
+ }
+
++ iommu_tce_kill(tbl, io_entry, subpages);
++
+ return ret;
+ }
+
+@@ -591,10 +601,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
+ entry, ua, dir);
+
+- iommu_tce_kill(stit->tbl, entry, 1);
+
+ if (ret != H_SUCCESS) {
+- kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
++ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
+ goto unlock_exit;
+ }
+ }
+@@ -670,13 +679,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ */
+ if (get_user(tce, tces + i)) {
+ ret = H_TOO_HARD;
+- goto invalidate_exit;
++ goto unlock_exit;
+ }
+ tce = be64_to_cpu(tce);
+
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
+ ret = H_PARAMETER;
+- goto invalidate_exit;
++ goto unlock_exit;
+ }
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+@@ -685,19 +694,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ iommu_tce_direction(tce));
+
+ if (ret != H_SUCCESS) {
+- kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
+- entry);
+- goto invalidate_exit;
++ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
++ entry + i);
++ goto unlock_exit;
+ }
+ }
+
+ kvmppc_tce_put(stt, entry + i, tce);
+ }
+
+-invalidate_exit:
+- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+- iommu_tce_kill(stit->tbl, entry, npages);
+-
+ unlock_exit:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+@@ -736,20 +741,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ continue;
+
+ if (ret == H_TOO_HARD)
+- goto invalidate_exit;
++ return ret;
+
+ WARN_ON_ONCE(1);
+- kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
++ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
+ }
+ }
+
+ for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
+-invalidate_exit:
+- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+- iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
+-
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
+diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
+index 870b7f0c7ea56..fdeda6a9cff44 100644
+--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
++++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
+@@ -247,13 +247,19 @@ static void iommu_tce_kill_rm(struct iommu_table *tbl,
+ tbl->it_ops->tce_kill(tbl, entry, pages, true);
+ }
+
+-static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
+- unsigned long entry)
++static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt,
++ struct iommu_table *tbl, unsigned long entry)
+ {
+- unsigned long hpa = 0;
+- enum dma_data_direction dir = DMA_NONE;
++ unsigned long i;
++ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
++ unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
++
++ for (i = 0; i < subpages; ++i) {
++ unsigned long hpa = 0;
++ enum dma_data_direction dir = DMA_NONE;
+
+- iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
++ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir);
++ }
+ }
+
+ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+@@ -316,6 +322,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+ break;
+ }
+
++ iommu_tce_kill_rm(tbl, io_entry, subpages);
++
+ return ret;
+ }
+
+@@ -379,6 +387,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm,
+ break;
+ }
+
++ iommu_tce_kill_rm(tbl, io_entry, subpages);
++
+ return ret;
+ }
+
+@@ -420,10 +430,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
+ stit->tbl, entry, ua, dir);
+
+- iommu_tce_kill_rm(stit->tbl, entry, 1);
+-
+ if (ret != H_SUCCESS) {
+- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry);
+ return ret;
+ }
+ }
+@@ -561,7 +569,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ ua = 0;
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) {
+ ret = H_PARAMETER;
+- goto invalidate_exit;
++ goto unlock_exit;
+ }
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+@@ -570,19 +578,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ iommu_tce_direction(tce));
+
+ if (ret != H_SUCCESS) {
+- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
+- entry);
+- goto invalidate_exit;
++ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl,
++ entry + i);
++ goto unlock_exit;
+ }
+ }
+
+ kvmppc_rm_tce_put(stt, entry + i, tce);
+ }
+
+-invalidate_exit:
+- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+- iommu_tce_kill_rm(stit->tbl, entry, npages);
+-
+ unlock_exit:
+ if (!prereg)
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
+@@ -620,20 +624,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ continue;
+
+ if (ret == H_TOO_HARD)
+- goto invalidate_exit;
++ return ret;
+
+ WARN_ON_ONCE_RM(1);
+- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
++ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i);
+ }
+ }
+
+ for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+ kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
+-invalidate_exit:
+- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+- iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
+-
+ return ret;
+ }
+
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 2acb1c96cfafd..eba77096c4430 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1731,7 +1731,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
+
+ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_nested_guest *nested = vcpu->arch.nested;
+ int r;
+ int srcu_idx;
+
+@@ -1831,7 +1830,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+ * it into a HEAI.
+ */
+ if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+- (nested->hfscr & (1UL << cause))) {
++ (vcpu->arch.nested_hfscr & (1UL << cause))) {
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+
+ /*
+@@ -3726,7 +3725,20 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
+
+ kvmppc_set_host_core(pcpu);
+
+- guest_exit_irqoff();
++ context_tracking_guest_exit();
++ if (!vtime_accounting_enabled_this_cpu()) {
++ local_irq_enable();
++ /*
++ * Service IRQs here before vtime_account_guest_exit() so any
++ * ticks that occurred while running the guest are accounted to
++ * the guest. If vtime accounting is enabled, accounting uses
++ * TB rather than ticks, so it can be done without enabling
++ * interrupts here, which has the problem that it accounts
++ * interrupt processing overhead to the host.
++ */
++ local_irq_disable();
++ }
++ vtime_account_guest_exit();
+
+ local_irq_enable();
+
+@@ -4510,7 +4522,20 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
+
+ kvmppc_set_host_core(pcpu);
+
+- guest_exit_irqoff();
++ context_tracking_guest_exit();
++ if (!vtime_accounting_enabled_this_cpu()) {
++ local_irq_enable();
++ /*
++ * Service IRQs here before vtime_account_guest_exit() so any
++ * ticks that occurred while running the guest are accounted to
++ * the guest. If vtime accounting is enabled, accounting uses
++ * TB rather than ticks, so it can be done without enabling
++ * interrupts here, which has the problem that it accounts
++ * interrupt processing overhead to the host.
++ */
++ local_irq_disable();
++ }
++ vtime_account_guest_exit();
+
+ local_irq_enable();
+
+@@ -4835,8 +4860,12 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+ unsigned long npages = mem->memory_size >> PAGE_SHIFT;
+
+ if (change == KVM_MR_CREATE) {
+- slot->arch.rmap = vzalloc(array_size(npages,
+- sizeof(*slot->arch.rmap)));
++ unsigned long size = array_size(npages, sizeof(*slot->arch.rmap));
++
++ if ((size >> PAGE_SHIFT) > totalram_pages())
++ return -ENOMEM;
++
++ slot->arch.rmap = vzalloc(size);
+ if (!slot->arch.rmap)
+ return -ENOMEM;
+ }
+@@ -5206,6 +5235,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+ lpcr &= LPCR_PECE | LPCR_LPES;
+ } else {
++ /*
++ * The L2 LPES mode will be set by the L0 according to whether
++ * or not it needs to take external interrupts in HV mode.
++ */
+ lpcr = 0;
+ }
+ lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+@@ -6072,8 +6105,11 @@ static int kvmppc_book3s_init_hv(void)
+ if (r)
+ return r;
+
+- if (kvmppc_radix_possible())
++ if (kvmppc_radix_possible()) {
+ r = kvmppc_radix_init();
++ if (r)
++ return r;
++ }
+
+ r = kvmppc_uvmem_init();
+ if (r < 0)
+diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
+index fcf4760a3a0ea..b148629b7f033 100644
+--- a/arch/powerpc/kvm/book3s_hv_builtin.c
++++ b/arch/powerpc/kvm/book3s_hv_builtin.c
+@@ -20,7 +20,7 @@
+ #include <asm/interrupt.h>
+ #include <asm/kvm_ppc.h>
+ #include <asm/kvm_book3s.h>
+-#include <asm/archrandom.h>
++#include <asm/machdep.h>
+ #include <asm/xics.h>
+ #include <asm/xive.h>
+ #include <asm/dbell.h>
+@@ -177,13 +177,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+
+ int kvmppc_hwrng_present(void)
+ {
+- return powernv_hwrng_present();
++ return ppc_md.get_random_seed != NULL;
+ }
+ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+
+ long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
+ {
+- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
++ if (ppc_md.get_random_seed &&
++ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
+ return H_SUCCESS;
+
+ return H_HARDWARE;
+@@ -695,6 +696,7 @@ static void flush_guest_tlb(struct kvm *kvm)
+ "r" (0) : "memory");
+ }
+ asm volatile("ptesync": : :"memory");
++ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
+ } else {
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+@@ -705,7 +707,9 @@ static void flush_guest_tlb(struct kvm *kvm)
+ rb += PPC_BIT(51); /* increment set number */
+ }
+ asm volatile("ptesync": : :"memory");
+- asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
++ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
++ if (cpu_has_feature(CPU_FTR_ARCH_300))
++ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+ }
+ }
+
+diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
+index ed8a2c9f56299..ddea14e5cb5e4 100644
+--- a/arch/powerpc/kvm/book3s_hv_nested.c
++++ b/arch/powerpc/kvm/book3s_hv_nested.c
+@@ -261,8 +261,7 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+ /*
+ * Don't let L1 change LPCR bits for the L2 except these:
+ */
+- mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+- LPCR_LPES | LPCR_MER;
++ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER;
+
+ /*
+ * Additional filtering is required depending on hardware
+@@ -362,7 +361,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
+ /* set L1 state to L2 state */
+ vcpu->arch.nested = l2;
+ vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+- l2->hfscr = l2_hv.hfscr;
++ vcpu->arch.nested_hfscr = l2_hv.hfscr;
+ vcpu->arch.regs = l2_regs;
+
+ /* Guest must always run with ME enabled, HV disabled. */
+@@ -582,7 +581,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
+ if (eaddr & (0xFFFUL << 52))
+ return H_PARAMETER;
+
+- buf = kzalloc(n, GFP_KERNEL);
++ buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
+ if (!buf)
+ return H_NO_MEM;
+
+diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
+index 961b3d70483ca..a0e0c28408c07 100644
+--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
++++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
+@@ -7,15 +7,6 @@
+ #include <asm/ppc-opcode.h>
+
+ #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+-static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+-{
+- struct kvmppc_vcore *vc = vcpu->arch.vcore;
+- u64 tb = mftb() - vc->tb_offset_applied;
+-
+- vcpu->arch.cur_activity = next;
+- vcpu->arch.cur_tb_start = tb;
+-}
+-
+ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+ {
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+@@ -47,8 +38,8 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
+ curr->seqcount = seq + 2;
+ }
+
+-#define start_timing(vcpu, next) __start_timing(vcpu, next)
+-#define end_timing(vcpu) __start_timing(vcpu, NULL)
++#define start_timing(vcpu, next) __accumulate_time(vcpu, next)
++#define end_timing(vcpu) __accumulate_time(vcpu, NULL)
+ #define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
+ #else
+ #define start_timing(vcpu, next) do {} while (0)
+diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
+index d4bca93b79f6d..6fa634599bc90 100644
+--- a/arch/powerpc/kvm/book3s_hv_ras.c
++++ b/arch/powerpc/kvm/book3s_hv_ras.c
+@@ -9,6 +9,7 @@
+ #include <linux/kvm.h>
+ #include <linux/kvm_host.h>
+ #include <linux/kernel.h>
++#include <asm/lppaca.h>
+ #include <asm/opal.h>
+ #include <asm/mce.h>
+ #include <asm/machdep.h>
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index eb776d0c5d8e9..81fc1e0ebe9a8 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -2005,7 +2005,7 @@ hcall_real_table:
+ .globl hcall_real_table_end
+ hcall_real_table_end:
+
+-_GLOBAL(kvmppc_h_set_xdabr)
++_GLOBAL_TOC(kvmppc_h_set_xdabr)
+ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
+ andi. r0, r5, DABRX_USER | DABRX_KERNEL
+ beq 6f
+@@ -2015,7 +2015,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
+ 6: li r3, H_PARAMETER
+ blr
+
+-_GLOBAL(kvmppc_h_set_dabr)
++_GLOBAL_TOC(kvmppc_h_set_dabr)
+ EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
+ li r5, DABRX_USER | DABRX_KERNEL
+ 3:
+@@ -2711,8 +2711,7 @@ kvmppc_bad_host_intr:
+ std r0, GPR0(r1)
+ std r9, GPR1(r1)
+ std r2, GPR2(r1)
+- SAVE_4GPRS(3, r1)
+- SAVE_2GPRS(7, r1)
++ SAVE_GPRS(3, 8, r1)
+ srdi r0, r12, 32
+ clrldi r12, r12, 32
+ std r0, _CCR(r1)
+@@ -2735,7 +2734,7 @@ kvmppc_bad_host_intr:
+ ld r9, HSTATE_SCRATCH2(r13)
+ ld r12, HSTATE_SCRATCH0(r13)
+ GET_SCRATCH0(r0)
+- SAVE_4GPRS(9, r1)
++ SAVE_GPRS(9, 12, r1)
+ std r0, GPR13(r1)
+ SAVE_NVGPRS(r1)
+ ld r5, HSTATE_CFAR(r13)
+diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
+index a7061ee3b1577..3d4ee75b0fb76 100644
+--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
++++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
+@@ -251,7 +251,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+- p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns)));
++ p->pfns = vcalloc(slot->npages, sizeof(*p->pfns));
+ if (!p->pfns) {
+ kfree(p);
+ return -ENOMEM;
+@@ -360,13 +360,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
+ static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ struct kvm *kvm, unsigned long *gfn)
+ {
+- struct kvmppc_uvmem_slot *p;
++ struct kvmppc_uvmem_slot *p = NULL, *iter;
+ bool ret = false;
+ unsigned long i;
+
+- list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
+- if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
++ list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
++ if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
++ p = iter;
+ break;
++ }
+ if (!p)
+ return ret;
+ /*
+diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
+index 977801c83aff8..8c15c90dd3a97 100644
+--- a/arch/powerpc/kvm/booke.c
++++ b/arch/powerpc/kvm/booke.c
+@@ -1042,7 +1042,21 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+ }
+
+ trace_kvm_exit(exit_nr, vcpu);
+- guest_exit_irqoff();
++
++ context_tracking_guest_exit();
++ if (!vtime_accounting_enabled_this_cpu()) {
++ local_irq_enable();
++ /*
++ * Service IRQs here before vtime_account_guest_exit() so any
++ * ticks that occurred while running the guest are accounted to
++ * the guest. If vtime accounting is enabled, accounting uses
++ * TB rather than ticks, so it can be done without enabling
++ * interrupts here, which has the problem that it accounts
++ * interrupt processing overhead to the host.
++ */
++ local_irq_disable();
++ }
++ vtime_account_guest_exit();
+
+ local_irq_enable();
+
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index b4e6f70b97b94..ee305455bd8db 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -1507,7 +1507,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
+ {
+ enum emulation_result emulated = EMULATE_DONE;
+
+- if (vcpu->arch.mmio_vsx_copy_nums > 2)
++ if (vcpu->arch.mmio_vmx_copy_nums > 2)
+ return EMULATE_FAIL;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+@@ -1604,7 +1604,7 @@ int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
+ unsigned int index = rs & KVM_MMIO_REG_MASK;
+ enum emulation_result emulated = EMULATE_DONE;
+
+- if (vcpu->arch.mmio_vsx_copy_nums > 2)
++ if (vcpu->arch.mmio_vmx_copy_nums > 2)
+ return EMULATE_FAIL;
+
+ vcpu->arch.io_gpr = rs;
+diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
+index 99a7c9132422c..54be64203b2ab 100644
+--- a/arch/powerpc/lib/Makefile
++++ b/arch/powerpc/lib/Makefile
+@@ -19,6 +19,9 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
+ CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
+ endif
+
++CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
++CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
++
+ obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o
+
+ ifndef CONFIG_KASAN
+diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
+index cda17bee5afea..c3e06922468b3 100644
+--- a/arch/powerpc/lib/feature-fixups.c
++++ b/arch/powerpc/lib/feature-fixups.c
+@@ -228,6 +228,7 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+
+ static bool stf_exit_reentrant = false;
+ static bool rfi_exit_reentrant = false;
++static DEFINE_MUTEX(exit_flush_lock);
+
+ static int __do_stf_barrier_fixups(void *data)
+ {
+@@ -253,6 +254,9 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
+ * low level interrupt exit code before patching. After the patching,
+ * if allowed, then flip the branch to allow fast exits.
+ */
++
++ // Prevent static key update races with do_rfi_flush_fixups()
++ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
+@@ -264,6 +268,8 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
++
++ mutex_unlock(&exit_flush_lock);
+ }
+
+ void do_uaccess_flush_fixups(enum l1d_flush_type types)
+@@ -486,6 +492,9 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
+ * without stop_machine, so this could be achieved with a broadcast
+ * IPI instead, but this matches the stf sequence.
+ */
++
++ // Prevent static key update races with do_stf_barrier_fixups()
++ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_rfi_flush_fixups, &types, NULL);
+@@ -497,6 +506,8 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
++
++ mutex_unlock(&exit_flush_lock);
+ }
+
+ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
+index d8d5f901cee1c..2d39b7c246e30 100644
+--- a/arch/powerpc/lib/sstep.c
++++ b/arch/powerpc/lib/sstep.c
+@@ -112,9 +112,9 @@ static nokprobe_inline long address_ok(struct pt_regs *regs,
+ {
+ if (!user_mode(regs))
+ return 1;
+- if (__access_ok(ea, nb))
++ if (access_ok((void __user *)ea, nb))
+ return 1;
+- if (__access_ok(ea, 1))
++ if (access_ok((void __user *)ea, 1))
+ /* Access overlaps the end of the user region */
+ regs->dar = TASK_SIZE_MAX - 1;
+ else
+@@ -1014,7 +1014,10 @@ NOKPROBE_SYMBOL(emulate_dcbz);
+
+ #define __put_user_asmx(x, addr, err, op, cr) \
+ __asm__ __volatile__( \
++ ".machine push\n" \
++ ".machine power8\n" \
+ "1: " op " %2,0,%3\n" \
++ ".machine pop\n" \
+ " mfcr %1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+@@ -1027,7 +1030,10 @@ NOKPROBE_SYMBOL(emulate_dcbz);
+
+ #define __get_user_asmx(x, addr, err, op) \
+ __asm__ __volatile__( \
++ ".machine push\n" \
++ ".machine power8\n" \
+ "1: "op" %1,0,%2\n" \
++ ".machine pop\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+@@ -3181,12 +3187,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
+ case BARRIER_EIEIO:
+ eieio();
+ break;
++#ifdef CONFIG_PPC64
+ case BARRIER_LWSYNC:
+ asm volatile("lwsync" : : : "memory");
+ break;
+ case BARRIER_PTESYNC:
+ asm volatile("ptesync" : : : "memory");
+ break;
++#endif
+ }
+ break;
+
+@@ -3304,7 +3312,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
+ __put_user_asmx(op->val, ea, err, "stbcx.", cr);
+ break;
+ case 2:
+- __put_user_asmx(op->val, ea, err, "stbcx.", cr);
++ __put_user_asmx(op->val, ea, err, "sthcx.", cr);
+ break;
+ #endif
+ case 4:
+diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
+index 9ef941d958d80..5473f9d03df3a 100644
+--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
++++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
+@@ -37,7 +37,7 @@ _GLOBAL(exec_instr)
+ * The stack pointer (GPR1) and the thread pointer (GPR13) are not
+ * saved as these should not be modified anyway.
+ */
+- SAVE_2GPRS(2, r1)
++ SAVE_GPRS(2, 3, r1)
+ SAVE_NVGPRS(r1)
+
+ /*
+@@ -75,8 +75,7 @@ _GLOBAL(exec_instr)
+
+ /* Load GPRs from pt_regs */
+ REST_GPR(0, r31)
+- REST_10GPRS(2, r31)
+- REST_GPR(12, r31)
++ REST_GPRS(2, 12, r31)
+ REST_NVGPRS(r31)
+
+ /* Placeholder for the test instruction */
+@@ -99,8 +98,7 @@ _GLOBAL(exec_instr)
+ subi r3, r3, GPR0
+ SAVE_GPR(0, r3)
+ SAVE_GPR(2, r3)
+- SAVE_8GPRS(4, r3)
+- SAVE_GPR(12, r3)
++ SAVE_GPRS(4, 12, r3)
+ SAVE_NVGPRS(r3)
+
+ /* Save resulting LR to pt_regs */
+diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
+index 39b84e7452e1b..aa3bb8da1cb9b 100644
+--- a/arch/powerpc/math-emu/math_efp.c
++++ b/arch/powerpc/math-emu/math_efp.c
+@@ -17,6 +17,7 @@
+
+ #include <linux/types.h>
+ #include <linux/prctl.h>
++#include <linux/module.h>
+
+ #include <linux/uaccess.h>
+ #include <asm/reg.h>
+diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
+index 27061583a0107..692c336e4f55b 100644
+--- a/arch/powerpc/mm/book3s32/mmu.c
++++ b/arch/powerpc/mm/book3s32/mmu.c
+@@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
+ return 0;
+ }
+
+-static int find_free_bat(void)
++int __init find_free_bat(void)
+ {
+ int b;
+ int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+@@ -100,7 +100,7 @@ static int find_free_bat(void)
+ * - block size has to be a power of two. This is calculated by finding the
+ * highest bit set to 1.
+ */
+-static unsigned int block_size(unsigned long base, unsigned long top)
++unsigned int bat_block_size(unsigned long base, unsigned long top)
+ {
+ unsigned int max_size = SZ_256M;
+ unsigned int base_shift = (ffs(base) - 1) & 31;
+@@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
+ int idx;
+
+ while ((idx = find_free_bat()) != -1 && base != top) {
+- unsigned int size = block_size(base, top);
++ unsigned int size = bat_block_size(base, top);
+
+ if (size < 128 << 10)
+ break;
+@@ -159,8 +159,11 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
+ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+ {
+ unsigned long done;
+- unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
++ unsigned long border = (unsigned long)__srwx_boundary - PAGE_OFFSET;
++ unsigned long size;
+
++ size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET);
++ setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X);
+
+ if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) {
+ pr_debug_once("Read-Write memory mapped without BATs\n");
+@@ -196,18 +199,17 @@ void mmu_mark_initmem_nx(void)
+ int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ int i;
+ unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
+- unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
++ unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K);
+ unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
+ unsigned long size;
+
+- for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
+- size = block_size(base, top);
++ for (i = 0; i < nb - 1 && base < top;) {
++ size = bat_block_size(base, top);
+ setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+ base += size;
+ }
+ if (base < top) {
+- size = block_size(base, top);
+- size = max(size, 128UL << 10);
++ size = bat_block_size(base, top);
+ if ((top - base) > size) {
+ size <<= 1;
+ if (strict_kernel_rwx_enabled() && base + size > border)
+@@ -247,10 +249,9 @@ void mmu_mark_rodata_ro(void)
+ }
+
+ /*
+- * Set up one of the I/D BAT (block address translation) register pairs.
++ * Set up one of the D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+- * On 603+, only set IBAT when _PAGE_EXEC is set
+ */
+ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
+ unsigned int size, pgprot_t prot)
+@@ -286,10 +287,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
+ /* G bit must be zero in IBATs */
+ flags &= ~_PAGE_EXEC;
+ }
+- if (flags & _PAGE_EXEC)
+- bat[0] = bat[1];
+- else
+- bat[0].batu = bat[0].batl = 0;
+
+ bat_addrs[index].start = virt;
+ bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
+index c145776d3ae5e..7bfd88c4b5470 100644
+--- a/arch/powerpc/mm/book3s64/hash_utils.c
++++ b/arch/powerpc/mm/book3s64/hash_utils.c
+@@ -1522,8 +1522,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+ }
+ EXPORT_SYMBOL_GPL(hash_page);
+
+-DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
+-DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
++DEFINE_INTERRUPT_HANDLER(do_hash_fault)
+ {
+ unsigned long ea = regs->dar;
+ unsigned long dsisr = regs->dsisr;
+@@ -1582,35 +1581,6 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
+ }
+ }
+
+-/*
+- * The _RAW interrupt entry checks for the in_nmi() case before
+- * running the full handler.
+- */
+-DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
+-{
+- /*
+- * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
+- * don't call hash_page, just fail the fault. This is required to
+- * prevent re-entrancy problems in the hash code, namely perf
+- * interrupts hitting while something holds H_PAGE_BUSY, and taking a
+- * hash fault. See the comment in hash_preload().
+- *
+- * We come here as a result of a DSI at a point where we don't want
+- * to call hash_page, such as when we are accessing memory (possibly
+- * user memory) inside a PMU interrupt that occurred while interrupts
+- * were soft-disabled. We want to invoke the exception handler for
+- * the access, or panic if there isn't a handler.
+- */
+- if (unlikely(in_nmi())) {
+- do_bad_page_fault_segv(regs);
+- return 0;
+- }
+-
+- __do_hash_fault(regs);
+-
+- return 0;
+-}
+-
+ #ifdef CONFIG_PPC_MM_SLICES
+ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
+ {
+@@ -1677,26 +1647,18 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
+ #endif /* CONFIG_PPC_64K_PAGES */
+
+ /*
+- * __hash_page_* must run with interrupts off, as it sets the
+- * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any
+- * time and may take a hash fault reading the user stack, see
+- * read_user_stack_slow() in the powerpc/perf code.
+- *
+- * If that takes a hash fault on the same page as we lock here, it
+- * will bail out when seeing H_PAGE_BUSY set, and retry the access
+- * leading to an infinite loop.
++ * __hash_page_* must run with interrupts off, including PMI interrupts
++ * off, as it sets the H_PAGE_BUSY bit.
+ *
+- * Disabling interrupts here does not prevent perf interrupts, but it
+- * will prevent them taking hash faults (see the NMI test in
+- * do_hash_page), then read_user_stack's copy_from_user_nofault will
+- * fail and perf will fall back to read_user_stack_slow(), which
+- * walks the Linux page tables.
++ * It's otherwise possible for perf interrupts to hit at any time and
++ * may take a hash fault reading the user stack, which could take a
++ * hash miss and deadlock on the same H_PAGE_BUSY bit.
+ *
+ * Interrupts must also be off for the duration of the
+ * mm_is_thread_local test and update, to prevent preempt running the
+ * mm on another CPU (XXX: this may be racy vs kthread_use_mm).
+ */
+- local_irq_save(flags);
++ powerpc_local_irq_pmu_save(flags);
+
+ /* Is that local to this CPU ? */
+ if (mm_is_thread_local(mm))
+@@ -1721,7 +1683,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
+ mm_ctx_user_psize(&mm->context),
+ pte_val(*ptep));
+
+- local_irq_restore(flags);
++ powerpc_local_irq_pmu_restore(flags);
+ }
+
+ /*
+diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
+index ae20add7954a0..832dfc59fc6c6 100644
+--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
++++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
+@@ -232,6 +232,14 @@ void radix__mark_rodata_ro(void)
+ end = (unsigned long)__init_begin;
+
+ radix__change_memory_range(start, end, _PAGE_WRITE);
++
++ for (start = PAGE_OFFSET; start < (unsigned long)_stext; start += PAGE_SIZE) {
++ end = start + PAGE_SIZE;
++ if (overlaps_interrupt_vector_text(start, end))
++ radix__change_memory_range(start, end, _PAGE_WRITE);
++ else
++ break;
++ }
+ }
+
+ void radix__mark_initmem_nx(void)
+@@ -260,8 +268,24 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
+ static unsigned long next_boundary(unsigned long addr, unsigned long end)
+ {
+ #ifdef CONFIG_STRICT_KERNEL_RWX
+- if (addr < __pa_symbol(__init_begin))
+- return __pa_symbol(__init_begin);
++ unsigned long stext_phys;
++
++ stext_phys = __pa_symbol(_stext);
++
++ // Relocatable kernel running at non-zero real address
++ if (stext_phys != 0) {
++ // The end of interrupts code at zero is a rodata boundary
++ unsigned long end_intr = __pa_symbol(__end_interrupts) - stext_phys;
++ if (addr < end_intr)
++ return end_intr;
++
++ // Start of relocated kernel text is a rodata boundary
++ if (addr < stext_phys)
++ return stext_phys;
++ }
++
++ if (addr < __pa_symbol(__srwx_boundary))
++ return __pa_symbol(__srwx_boundary);
+ #endif
+ return end;
+ }
+@@ -740,9 +764,9 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
+ }
+
+ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
+- unsigned long end)
++ unsigned long end, bool direct)
+ {
+- unsigned long next;
++ unsigned long next, pages = 0;
+ pte_t *pte;
+
+ pte = pte_start + pte_index(addr);
+@@ -764,13 +788,16 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
+ }
+
+ pte_clear(&init_mm, addr, pte);
++ pages++;
+ }
++ if (direct)
++ update_page_count(mmu_virtual_psize, -pages);
+ }
+
+ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+- unsigned long end)
++ unsigned long end, bool direct)
+ {
+- unsigned long next;
++ unsigned long next, pages = 0;
+ pte_t *pte_base;
+ pmd_t *pmd;
+
+@@ -788,19 +815,22 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
++ pages++;
+ continue;
+ }
+
+ pte_base = (pte_t *)pmd_page_vaddr(*pmd);
+- remove_pte_table(pte_base, addr, next);
++ remove_pte_table(pte_base, addr, next, direct);
+ free_pte_table(pte_base, pmd);
+ }
++ if (direct)
++ update_page_count(MMU_PAGE_2M, -pages);
+ }
+
+ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
+- unsigned long end)
++ unsigned long end, bool direct)
+ {
+- unsigned long next;
++ unsigned long next, pages = 0;
+ pmd_t *pmd_base;
+ pud_t *pud;
+
+@@ -818,16 +848,20 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pud);
++ pages++;
+ continue;
+ }
+
+ pmd_base = pud_pgtable(*pud);
+- remove_pmd_table(pmd_base, addr, next);
++ remove_pmd_table(pmd_base, addr, next, direct);
+ free_pmd_table(pmd_base, pud);
+ }
++ if (direct)
++ update_page_count(MMU_PAGE_1G, -pages);
+ }
+
+-static void __meminit remove_pagetable(unsigned long start, unsigned long end)
++static void __meminit remove_pagetable(unsigned long start, unsigned long end,
++ bool direct)
+ {
+ unsigned long addr, next;
+ pud_t *pud_base;
+@@ -856,7 +890,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
+ }
+
+ pud_base = p4d_pgtable(*p4d);
+- remove_pud_table(pud_base, addr, next);
++ remove_pud_table(pud_base, addr, next, direct);
+ free_pud_table(pud_base, p4d);
+ }
+
+@@ -879,7 +913,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
+
+ int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
+ {
+- remove_pagetable(start, end);
++ remove_pagetable(start, end, true);
+ return 0;
+ }
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+@@ -915,7 +949,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+ {
+- remove_pagetable(start, start + page_size);
++ remove_pagetable(start, start + page_size, false);
+ }
+ #endif
+ #endif
+@@ -954,15 +988,6 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
+ pmd = *pmdp;
+ pmd_clear(pmdp);
+
+- /*
+- * pmdp collapse_flush need to ensure that there are no parallel gup
+- * walk after this call. This is needed so that we can have stable
+- * page ref count when collapsing a page. We don't allow a collapse page
+- * if we have gup taken on the page. We can ensure that by sending IPI
+- * because gup walk happens with IRQ disabled.
+- */
+- serialize_against_pte_lookup(vma->vm_mm);
+-
+ radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
+
+ return pmd;
+@@ -1030,8 +1055,8 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+ pte_t entry, unsigned long address, int psize)
+ {
+ struct mm_struct *mm = vma->vm_mm;
+- unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
+- _PAGE_RW | _PAGE_EXEC);
++ unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_SOFT_DIRTY |
++ _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+ unsigned long change = pte_val(entry) ^ pte_val(*ptep);
+ /*
+@@ -1093,7 +1118,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+
+ int pud_clear_huge(pud_t *pud)
+ {
+- if (pud_huge(*pud)) {
++ if (pud_is_leaf(*pud)) {
+ pud_clear(pud);
+ return 1;
+ }
+@@ -1140,7 +1165,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+
+ int pmd_clear_huge(pmd_t *pmd)
+ {
+- if (pmd_huge(*pmd)) {
++ if (pmd_is_leaf(*pmd)) {
+ pmd_clear(pmd);
+ return 1;
+ }
+diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
+index 7724af19ed7e6..6972fd5d423c0 100644
+--- a/arch/powerpc/mm/book3s64/radix_tlb.c
++++ b/arch/powerpc/mm/book3s64/radix_tlb.c
+@@ -127,21 +127,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+
+-static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+- unsigned long lpid,
+- unsigned long ric)
+-{
+- unsigned long rb, rs, prs, r;
+-
+- rb = PPC_BIT(53); /* IS = 1 */
+- rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+- prs = 1; /* process scoped */
+- r = 1; /* radix format */
+-
+- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+- trace_tlbie(0, 0, rb, rs, ric, prs, r);
+-}
+ static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
+ {
+ unsigned long rb,rs,prs,r;
+@@ -202,23 +187,6 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+
+-static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+- unsigned long lpid,
+- unsigned long ap, unsigned long ric)
+-{
+- unsigned long rb, rs, prs, r;
+-
+- rb = va & ~(PPC_BITMASK(52, 63));
+- rb |= ap << PPC_BITLSHIFT(58);
+- rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+- prs = 1; /* process scoped */
+- r = 1; /* radix format */
+-
+- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+- trace_tlbie(0, 0, rb, rs, ric, prs, r);
+-}
+-
+ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+ {
+@@ -264,22 +232,6 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
+ }
+ }
+
+-static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+- unsigned long pid,
+- unsigned long lpid,
+- unsigned long ap)
+-{
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+- asm volatile("ptesync" : : : "memory");
+- __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+- }
+-
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+- asm volatile("ptesync" : : : "memory");
+- __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+- }
+-}
+-
+ static inline void fixup_tlbie_pid(unsigned long pid)
+ {
+ /*
+@@ -299,26 +251,6 @@ static inline void fixup_tlbie_pid(unsigned long pid)
+ }
+ }
+
+-static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+-{
+- /*
+- * We can use any address for the invalidation, pick one which is
+- * probably unused as an optimisation.
+- */
+- unsigned long va = ((1UL << 52) - 1);
+-
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+- asm volatile("ptesync" : : : "memory");
+- __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+- }
+-
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+- asm volatile("ptesync" : : : "memory");
+- __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+- RIC_FLUSH_TLB);
+- }
+-}
+-
+ static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap)
+ {
+@@ -416,31 +348,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+-static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+- unsigned long ric)
+-{
+- asm volatile("ptesync" : : : "memory");
+-
+- /*
+- * Workaround the fact that the "ric" argument to __tlbie_pid
+- * must be a compile-time contraint to match the "i" constraint
+- * in the asm statement.
+- */
+- switch (ric) {
+- case RIC_FLUSH_TLB:
+- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+- fixup_tlbie_pid_lpid(pid, lpid);
+- break;
+- case RIC_FLUSH_PWC:
+- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+- break;
+- case RIC_FLUSH_ALL:
+- default:
+- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+- fixup_tlbie_pid_lpid(pid, lpid);
+- }
+- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+-}
+ struct tlbiel_pid {
+ unsigned long pid;
+ unsigned long ric;
+@@ -566,20 +473,6 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ fixup_tlbie_va_range(addr - page_size, pid, ap);
+ }
+
+-static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+- unsigned long pid, unsigned long lpid,
+- unsigned long page_size,
+- unsigned long psize)
+-{
+- unsigned long addr;
+- unsigned long ap = mmu_get_ap(psize);
+-
+- for (addr = start; addr < end; addr += page_size)
+- __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+-
+- fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+-}
+-
+ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+ {
+@@ -660,18 +553,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+-static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+- unsigned long pid, unsigned long lpid,
+- unsigned long page_size,
+- unsigned long psize, bool also_pwc)
+-{
+- asm volatile("ptesync" : : : "memory");
+- if (also_pwc)
+- __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+- __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+-}
+-
+ static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+@@ -1171,15 +1052,12 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+ }
+ }
+ } else {
+- bool hflush = false;
++ bool hflush;
+ unsigned long hstart, hend;
+
+- if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+- hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+- hend = end & PMD_MASK;
+- if (hstart < hend)
+- hflush = true;
+- }
++ hstart = (start + PMD_SIZE - 1) & PMD_MASK;
++ hend = end & PMD_MASK;
++ hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend;
+
+ if (type == FLUSH_TYPE_LOCAL) {
+ asm volatile("ptesync": : :"memory");
+@@ -1471,6 +1349,127 @@ void radix__flush_tlb_all(void)
+ }
+
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
++static __always_inline void __tlbie_pid_lpid(unsigned long pid,
++ unsigned long lpid,
++ unsigned long ric)
++{
++ unsigned long rb, rs, prs, r;
++
++ rb = PPC_BIT(53); /* IS = 1 */
++ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
++ prs = 1; /* process scoped */
++ r = 1; /* radix format */
++
++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ trace_tlbie(0, 0, rb, rs, ric, prs, r);
++}
++
++static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
++ unsigned long lpid,
++ unsigned long ap, unsigned long ric)
++{
++ unsigned long rb, rs, prs, r;
++
++ rb = va & ~(PPC_BITMASK(52, 63));
++ rb |= ap << PPC_BITLSHIFT(58);
++ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
++ prs = 1; /* process scoped */
++ r = 1; /* radix format */
++
++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ trace_tlbie(0, 0, rb, rs, ric, prs, r);
++}
++
++static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
++{
++ /*
++ * We can use any address for the invalidation, pick one which is
++ * probably unused as an optimisation.
++ */
++ unsigned long va = ((1UL << 52) - 1);
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ asm volatile("ptesync" : : : "memory");
++ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
++ }
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++ asm volatile("ptesync" : : : "memory");
++ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
++ RIC_FLUSH_TLB);
++ }
++}
++
++static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
++ unsigned long ric)
++{
++ asm volatile("ptesync" : : : "memory");
++
++ /*
++ * Workaround the fact that the "ric" argument to __tlbie_pid
++ * must be a compile-time contraint to match the "i" constraint
++ * in the asm statement.
++ */
++ switch (ric) {
++ case RIC_FLUSH_TLB:
++ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
++ fixup_tlbie_pid_lpid(pid, lpid);
++ break;
++ case RIC_FLUSH_PWC:
++ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
++ break;
++ case RIC_FLUSH_ALL:
++ default:
++ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
++ fixup_tlbie_pid_lpid(pid, lpid);
++ }
++ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
++}
++
++static inline void fixup_tlbie_va_range_lpid(unsigned long va,
++ unsigned long pid,
++ unsigned long lpid,
++ unsigned long ap)
++{
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ asm volatile("ptesync" : : : "memory");
++ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
++ }
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++ asm volatile("ptesync" : : : "memory");
++ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
++ }
++}
++
++static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long lpid,
++ unsigned long page_size,
++ unsigned long psize)
++{
++ unsigned long addr;
++ unsigned long ap = mmu_get_ap(psize);
++
++ for (addr = start; addr < end; addr += page_size)
++ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
++
++ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
++}
++
++static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long lpid,
++ unsigned long page_size,
++ unsigned long psize, bool also_pwc)
++{
++ asm volatile("ptesync" : : : "memory");
++ if (also_pwc)
++ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
++ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
++ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
++}
++
+ /*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
+index f0037bcc47a0e..a4fd2901189c5 100644
+--- a/arch/powerpc/mm/book3s64/slb.c
++++ b/arch/powerpc/mm/book3s64/slb.c
+@@ -14,6 +14,7 @@
+ #include <asm/mmu.h>
+ #include <asm/mmu_context.h>
+ #include <asm/paca.h>
++#include <asm/lppaca.h>
+ #include <asm/ppc-opcode.h>
+ #include <asm/cputable.h>
+ #include <asm/cacheflush.h>
+diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
+index a8d0ce85d39ad..4a15172dfef29 100644
+--- a/arch/powerpc/mm/fault.c
++++ b/arch/powerpc/mm/fault.c
+@@ -568,18 +568,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault);
+ static void __bad_page_fault(struct pt_regs *regs, int sig)
+ {
+ int is_write = page_fault_is_write(regs->dsisr);
++ const char *msg;
+
+ /* kernel has accessed a bad area */
+
++ if (regs->dar < PAGE_SIZE)
++ msg = "Kernel NULL pointer dereference";
++ else
++ msg = "Unable to handle kernel data access";
++
+ switch (TRAP(regs)) {
+ case INTERRUPT_DATA_STORAGE:
+- case INTERRUPT_DATA_SEGMENT:
+ case INTERRUPT_H_DATA_STORAGE:
+- pr_alert("BUG: %s on %s at 0x%08lx\n",
+- regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
+- "Unable to handle kernel data access",
++ pr_alert("BUG: %s on %s at 0x%08lx\n", msg,
+ is_write ? "write" : "read", regs->dar);
+ break;
++ case INTERRUPT_DATA_SEGMENT:
++ pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar);
++ break;
+ case INTERRUPT_INST_STORAGE:
+ case INTERRUPT_INST_SEGMENT:
+ pr_alert("BUG: Unable to handle kernel instruction fetch%s",
+diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
+index 386be136026e8..db040f34c0046 100644
+--- a/arch/powerpc/mm/init_64.c
++++ b/arch/powerpc/mm/init_64.c
+@@ -188,7 +188,7 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star
+ unsigned long nr_pfn = page_size / sizeof(struct page);
+ unsigned long start_pfn = page_to_pfn((struct page *)start);
+
+- if ((start_pfn + nr_pfn) > altmap->end_pfn)
++ if ((start_pfn + nr_pfn - 1) > altmap->end_pfn)
+ return true;
+
+ if (start_pfn < altmap->base_pfn)
+@@ -313,8 +313,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
+ start = ALIGN_DOWN(start, page_size);
+ if (altmap) {
+ alt_start = altmap->base_pfn;
+- alt_end = altmap->base_pfn + altmap->reserve +
+- altmap->free + altmap->alloc + altmap->align;
++ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+ }
+
+ pr_debug("vmemmap_free %lx...%lx\n", start, end);
+diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
+index bb1a5408b86b2..8636b17c6a20f 100644
+--- a/arch/powerpc/mm/kasan/Makefile
++++ b/arch/powerpc/mm/kasan/Makefile
+@@ -1,6 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+
+ KASAN_SANITIZE := n
++KCOV_INSTRUMENT := n
+
+ obj-$(CONFIG_PPC32) += kasan_init_32.o
+ obj-$(CONFIG_PPC_8xx) += 8xx.o
+diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
+index 202bd260a0095..450a67ef0bbe1 100644
+--- a/arch/powerpc/mm/kasan/book3s_32.c
++++ b/arch/powerpc/mm/kasan/book3s_32.c
+@@ -10,47 +10,51 @@ int __init kasan_init_region(void *start, size_t size)
+ {
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+- unsigned long k_cur = k_start;
+- int k_size = k_end - k_start;
+- int k_size_base = 1 << (ffs(k_size) - 1);
++ unsigned long k_nobat = k_start;
++ unsigned long k_cur;
++ phys_addr_t phys;
+ int ret;
+- void *block;
+
+- block = memblock_alloc(k_size, k_size_base);
+-
+- if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
+- int k_size_more = 1 << (ffs(k_size - k_size_base) - 1);
+-
+- setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
+- if (k_size_more >= SZ_128K)
+- setbat(-1, k_start + k_size_base, __pa(block) + k_size_base,
+- k_size_more, PAGE_KERNEL);
+- if (v_block_mapped(k_start))
+- k_cur = k_start + k_size_base;
+- if (v_block_mapped(k_start + k_size_base))
+- k_cur = k_start + k_size_base + k_size_more;
+-
+- update_bats();
++ while (k_nobat < k_end) {
++ unsigned int k_size = bat_block_size(k_nobat, k_end);
++ int idx = find_free_bat();
++
++ if (idx == -1)
++ break;
++ if (k_size < SZ_128K)
++ break;
++ phys = memblock_phys_alloc_range(k_size, k_size, 0,
++ MEMBLOCK_ALLOC_ANYWHERE);
++ if (!phys)
++ break;
++
++ setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
++ k_nobat += k_size;
+ }
++ if (k_nobat != k_start)
++ update_bats();
+
+- if (!block)
+- block = memblock_alloc(k_size, PAGE_SIZE);
+- if (!block)
+- return -ENOMEM;
++ if (k_nobat < k_end) {
++ phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
++ MEMBLOCK_ALLOC_ANYWHERE);
++ if (!phys)
++ return -ENOMEM;
++ }
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+- kasan_update_early_region(k_start, k_cur, __pte(0));
++ kasan_update_early_region(k_start, k_nobat, __pte(0));
+
+- for (; k_cur < k_end; k_cur += PAGE_SIZE) {
++ for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+- void *va = block + k_cur - k_start;
+- pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
++ pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
++ memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
++
+ return 0;
+ }
+diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
+index cf8770b1a692e..f3e4d069e0ba7 100644
+--- a/arch/powerpc/mm/kasan/kasan_init_32.c
++++ b/arch/powerpc/mm/kasan/kasan_init_32.c
+@@ -83,13 +83,12 @@ void __init
+ kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte)
+ {
+ unsigned long k_cur;
+- phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+ for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+ pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+- if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
++ if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page)))
+ continue;
+
+ __set_pte_at(&init_mm, k_cur, ptep, pte, 0);
+diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
+index c3c4e31462eca..6902f453c7451 100644
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -20,8 +20,9 @@
+ #include <asm/machdep.h>
+ #include <asm/rtas.h>
+ #include <asm/kasan.h>
+-#include <asm/sparsemem.h>
+ #include <asm/svm.h>
++#include <asm/mmzone.h>
++#include <asm/ftrace.h>
+
+ #include <mm/mmu_decl.h>
+
+@@ -103,6 +104,37 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
+ vm_unmap_aliases();
+ }
+
++/*
++ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
++ * updating.
++ */
++static void update_end_of_memory_vars(u64 start, u64 size)
++{
++ unsigned long end_pfn = PFN_UP(start + size);
++
++ if (end_pfn > max_pfn) {
++ max_pfn = end_pfn;
++ max_low_pfn = end_pfn;
++ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
++ }
++}
++
++int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
++ struct mhp_params *params)
++{
++ int ret;
++
++ ret = __add_pages(nid, start_pfn, nr_pages, params);
++ if (ret)
++ return ret;
++
++ /* update max_pfn, max_low_pfn and high_memory */
++ update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
++ nr_pages << PAGE_SHIFT);
++
++ return ret;
++}
++
+ int __ref arch_add_memory(int nid, u64 start, u64 size,
+ struct mhp_params *params)
+ {
+@@ -113,7 +145,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
+ rc = arch_create_linear_mapping(nid, start, size, params);
+ if (rc)
+ return rc;
+- rc = __add_pages(nid, start_pfn, nr_pages, params);
++ rc = add_pages(nid, start_pfn, nr_pages, params);
+ if (rc)
+ arch_remove_linear_mapping(start, size);
+ return rc;
+@@ -314,6 +346,7 @@ void free_initmem(void)
+ mark_initmem_nx();
+ init_mem_is_free = true;
+ free_initmem_default(POISON_FREE_INITMEM);
++ ftrace_free_init_tramp();
+ }
+
+ /*
+diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
+index 74246536b8326..aca34d37b5197 100644
+--- a/arch/powerpc/mm/mmu_context.c
++++ b/arch/powerpc/mm/mmu_context.c
+@@ -81,7 +81,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ * context
+ */
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+- asm volatile ("dssall");
++ asm volatile (PPC_DSSALL);
+
+ if (!new_on_cpu)
+ membarrier_arch_switch_mm(prev, next, tsk);
+diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
+index 0df9fe29dd567..5348e1f9eb940 100644
+--- a/arch/powerpc/mm/nohash/8xx.c
++++ b/arch/powerpc/mm/nohash/8xx.c
+@@ -183,8 +183,8 @@ void mmu_mark_initmem_nx(void)
+ unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
+- mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false);
+- mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
++ if (!debug_pagealloc_enabled_or_kfence())
++ mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
+
+ mmu_pin_tlb(block_mapped_ram, false);
+ }
+diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
+index 77884e24281dd..3d845e001c874 100644
+--- a/arch/powerpc/mm/nohash/book3e_pgtable.c
++++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
+@@ -95,8 +95,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ if (p4d_none(*p4dp)) {
+- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+- p4d_populate(&init_mm, p4dp, pmdp);
++ pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
++ p4d_populate(&init_mm, p4dp, pudp);
+ }
+ pudp = pud_offset(p4dp, ea);
+ if (pud_none(*pudp)) {
+@@ -105,7 +105,7 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (!pmd_present(*pmdp)) {
+- ptep = early_alloc_pgtable(PAGE_SIZE);
++ ptep = early_alloc_pgtable(PTE_TABLE_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
+index 4c74e8a5482bf..c555ad9fa00b1 100644
+--- a/arch/powerpc/mm/nohash/kaslr_booke.c
++++ b/arch/powerpc/mm/nohash/kaslr_booke.c
+@@ -18,7 +18,6 @@
+ #include <asm/prom.h>
+ #include <asm/kdump.h>
+ #include <mm/mmu_decl.h>
+-#include <generated/compile.h>
+ #include <generated/utsrelease.h>
+
+ struct regions {
+@@ -36,10 +35,6 @@ struct regions {
+ int reserved_mem_size_cells;
+ };
+
+-/* Simplified build-specific string for starting entropy. */
+-static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+- LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+-
+ struct regions __initdata regions;
+
+ static __init void kaslr_get_cmdline(void *fdt)
+@@ -72,7 +67,8 @@ static unsigned long __init get_boot_seed(void *fdt)
+ {
+ unsigned long hash = 0;
+
+- hash = rotate_xor(hash, build_str, sizeof(build_str));
++ /* build-specific string for starting entropy. */
++ hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+ hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+ return hash;
+diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
+index bf24451f3e71f..9235e720e3572 100644
+--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
++++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
+@@ -222,7 +222,7 @@ tlb_miss_kernel_bolted:
+
+ tlb_miss_fault_bolted:
+ /* We need to check if it was an instruction miss */
+- andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
++ andi. r10,r11,_PAGE_BAP_UX|_PAGE_BAP_SX
+ bne itlb_miss_fault_bolted
+ dtlb_miss_fault_bolted:
+ tlb_epilog_bolted
+@@ -239,7 +239,7 @@ itlb_miss_fault_bolted:
+ srdi r15,r16,60 /* get region */
+ bne- itlb_miss_fault_bolted
+
+- li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
++ li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */
+
+ /* We do the user/kernel test for the PID here along with the RW test
+ */
+@@ -614,7 +614,7 @@ itlb_miss_fault_e6500:
+
+ /* We do the user/kernel test for the PID here along with the RW test
+ */
+- li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
++ li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */
+ oris r11,r11,_PAGE_ACCESSED@h
+
+ cmpldi cr0,r15,0 /* Check for user region */
+@@ -734,7 +734,7 @@ normal_tlb_miss_done:
+
+ normal_tlb_miss_access_fault:
+ /* We need to check if it was an instruction miss */
+- andi. r10,r11,_PAGE_EXEC
++ andi. r10,r11,_PAGE_BAP_UX
+ bne 1f
+ ld r14,EX_TLB_DEAR(r12)
+ ld r15,EX_TLB_ESR(r12)
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index 6f14c8fb6359d..9c038c8cebebc 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -367,6 +367,7 @@ void update_numa_distance(struct device_node *node)
+ WARN(numa_distance_table[nid][nid] == -1,
+ "NUMA distance details for node %d not provided\n", nid);
+ }
++EXPORT_SYMBOL_GPL(update_numa_distance);
+
+ /*
+ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+@@ -376,9 +377,9 @@ static void initialize_form2_numa_distance_lookup_table(void)
+ {
+ int i, j;
+ struct device_node *root;
+- const __u8 *numa_dist_table;
++ const __u8 *form2_distances;
+ const __be32 *numa_lookup_index;
+- int numa_dist_table_length;
++ int form2_distances_length;
+ int max_numa_index, distance_index;
+
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+@@ -392,45 +393,41 @@ static void initialize_form2_numa_distance_lookup_table(void)
+ max_numa_index = of_read_number(&numa_lookup_index[0], 1);
+
+ /* first element of the array is the size and is encode-int */
+- numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
+- numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
++ form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL);
++ form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1);
+ /* Skip the size which is encoded int */
+- numa_dist_table += sizeof(__be32);
++ form2_distances += sizeof(__be32);
+
+- pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
+- numa_dist_table_length, max_numa_index);
++ pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n",
++ form2_distances_length, max_numa_index);
+
+ for (i = 0; i < max_numa_index; i++)
+ /* +1 skip the max_numa_index in the property */
+ numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
+
+
+- if (numa_dist_table_length != max_numa_index * max_numa_index) {
++ if (form2_distances_length != max_numa_index * max_numa_index) {
+ WARN(1, "Wrong NUMA distance information\n");
+- /* consider everybody else just remote. */
+- for (i = 0; i < max_numa_index; i++) {
+- for (j = 0; j < max_numa_index; j++) {
+- int nodeA = numa_id_index_table[i];
+- int nodeB = numa_id_index_table[j];
+-
+- if (nodeA == nodeB)
+- numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
+- else
+- numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
+- }
+- }
++ form2_distances = NULL; // don't use it
+ }
+-
+ distance_index = 0;
+ for (i = 0; i < max_numa_index; i++) {
+ for (j = 0; j < max_numa_index; j++) {
+ int nodeA = numa_id_index_table[i];
+ int nodeB = numa_id_index_table[j];
+-
+- numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
+- pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
++ int dist;
++
++ if (form2_distances)
++ dist = form2_distances[distance_index++];
++ else if (nodeA == nodeB)
++ dist = LOCAL_DISTANCE;
++ else
++ dist = REMOTE_DISTANCE;
++ numa_distance_table[nodeA][nodeB] = dist;
++ pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist);
+ }
+ }
++
+ of_node_put(root);
+ }
+
+@@ -960,7 +957,9 @@ static int __init parse_numa_properties(void)
+ of_node_put(cpu);
+ }
+
+- node_set_online(nid);
++ /* node_set_online() is an UB if 'nid' is negative */
++ if (likely(nid >= 0))
++ node_set_online(nid);
+ }
+
+ get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
+diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
+index edea388e9d3fb..85753e32a4de9 100644
+--- a/arch/powerpc/mm/pageattr.c
++++ b/arch/powerpc/mm/pageattr.c
+@@ -15,12 +15,14 @@
+ #include <asm/pgtable.h>
+
+
++static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr,
++ unsigned long old, unsigned long new)
++{
++ return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0);
++}
++
+ /*
+- * Updates the attributes of a page in three steps:
+- *
+- * 1. take the page_table_lock
+- * 2. install the new entry with the updated attributes
+- * 3. flush the TLB
++ * Updates the attributes of a page atomically.
+ *
+ * This sequence is safe against concurrent updates, and also allows updating the
+ * attributes of a page currently being executed or accessed.
+@@ -28,41 +30,39 @@
+ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
+ {
+ long action = (long)data;
+- pte_t pte;
+
+- spin_lock(&init_mm.page_table_lock);
+-
+- pte = ptep_get(ptep);
+-
+- /* modify the PTE bits as desired, then apply */
++ /* modify the PTE bits as desired */
+ switch (action) {
+ case SET_MEMORY_RO:
+- pte = pte_wrprotect(pte);
++ /* Don't clear DIRTY bit */
++ pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO);
+ break;
+ case SET_MEMORY_RW:
+- pte = pte_mkwrite(pte_mkdirty(pte));
++ pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW);
+ break;
+ case SET_MEMORY_NX:
+- pte = pte_exprotect(pte);
++ pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO);
+ break;
+ case SET_MEMORY_X:
+- pte = pte_mkexec(pte);
++ pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX);
++ break;
++ case SET_MEMORY_NP:
++ pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0);
++ break;
++ case SET_MEMORY_P:
++ pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+- pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0);
+-
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync": : :"memory");
+
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+- spin_unlock(&init_mm.page_table_lock);
+-
+ return 0;
+ }
+
+@@ -96,36 +96,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action)
+ return apply_to_existing_page_range(&init_mm, start, size,
+ change_page_attr, (void *)action);
+ }
+-
+-/*
+- * Set the attributes of a page:
+- *
+- * This function is used by PPC32 at the end of init to set final kernel memory
+- * protection. It includes changing the maping of the page it is executing from
+- * and data pages it is using.
+- */
+-static int set_page_attr(pte_t *ptep, unsigned long addr, void *data)
+-{
+- pgprot_t prot = __pgprot((unsigned long)data);
+-
+- spin_lock(&init_mm.page_table_lock);
+-
+- set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot));
+- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+-
+- spin_unlock(&init_mm.page_table_lock);
+-
+- return 0;
+-}
+-
+-int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot)
+-{
+- unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+- unsigned long sz = numpages * PAGE_SIZE;
+-
+- if (numpages <= 0)
+- return 0;
+-
+- return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr,
+- (void *)pgprot_val(prot));
+-}
+diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
+index cd16b407f47e1..9a93c1a5aa1d1 100644
+--- a/arch/powerpc/mm/pgtable.c
++++ b/arch/powerpc/mm/pgtable.c
+@@ -203,6 +203,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ __set_pte_at(mm, addr, ptep, pte, 0);
+ }
+
++void unmap_kernel_page(unsigned long va)
++{
++ pmd_t *pmdp = pmd_off_k(va);
++ pte_t *ptep = pte_offset_kernel(pmdp, va);
++
++ pte_clear(&init_mm, va, ptep);
++ flush_tlb_kernel_range(va, va + PAGE_SIZE);
++}
++
+ /*
+ * This is called when relaxing access to a PTE. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
+index dcf5ecca19d99..502e3d3d1dbf7 100644
+--- a/arch/powerpc/mm/pgtable_32.c
++++ b/arch/powerpc/mm/pgtable_32.c
+@@ -138,10 +138,12 @@ void mark_initmem_nx(void)
+ unsigned long numpages = PFN_UP((unsigned long)_einittext) -
+ PFN_DOWN((unsigned long)_sinittext);
+
+- if (v_block_mapped((unsigned long)_sinittext))
+- mmu_mark_initmem_nx();
+- else
+- set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL);
++ mmu_mark_initmem_nx();
++
++ if (!v_block_mapped((unsigned long)_sinittext)) {
++ set_memory_nx((unsigned long)_sinittext, numpages);
++ set_memory_rw((unsigned long)_sinittext, numpages);
++ }
+ }
+
+ #ifdef CONFIG_STRICT_KERNEL_RWX
+@@ -155,25 +157,21 @@ void mark_rodata_ro(void)
+ return;
+ }
+
+- numpages = PFN_UP((unsigned long)_etext) -
+- PFN_DOWN((unsigned long)_stext);
+-
+- set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX);
+ /*
+- * mark .rodata as read only. Use __init_begin rather than __end_rodata
+- * to cover NOTES and EXCEPTION_TABLE.
++ * mark .text and .rodata as read only. Use __init_begin rather than
++ * __end_rodata to cover NOTES and EXCEPTION_TABLE.
+ */
+ numpages = PFN_UP((unsigned long)__init_begin) -
+- PFN_DOWN((unsigned long)__start_rodata);
++ PFN_DOWN((unsigned long)_stext);
+
+- set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO);
++ set_memory_ro((unsigned long)_stext, numpages);
+
+ // mark_initmem_nx() should have already run by now
+ ptdump_check_wx();
+ }
+ #endif
+
+-#ifdef CONFIG_DEBUG_PAGEALLOC
++#if defined(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && defined(CONFIG_DEBUG_PAGEALLOC)
+ void __kernel_map_pages(struct page *page, int numpages, int enable)
+ {
+ unsigned long addr = (unsigned long)page_address(page);
+@@ -182,8 +180,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
+ return;
+
+ if (enable)
+- set_memory_attr(addr, numpages, PAGE_KERNEL);
++ set_memory_p(addr, numpages);
+ else
+- set_memory_attr(addr, numpages, __pgprot(0));
++ set_memory_np(addr, numpages);
+ }
+ #endif /* CONFIG_DEBUG_PAGEALLOC */
+diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
+index 78c8cf01db5f9..175aabf101e87 100644
+--- a/arch/powerpc/mm/pgtable_64.c
++++ b/arch/powerpc/mm/pgtable_64.c
+@@ -102,7 +102,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift);
+ struct page *p4d_page(p4d_t p4d)
+ {
+ if (p4d_is_leaf(p4d)) {
+- VM_WARN_ON(!p4d_huge(p4d));
++ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
++ VM_WARN_ON(!p4d_huge(p4d));
+ return pte_page(p4d_pte(p4d));
+ }
+ return virt_to_page(p4d_pgtable(p4d));
+@@ -112,7 +113,8 @@ struct page *p4d_page(p4d_t p4d)
+ struct page *pud_page(pud_t pud)
+ {
+ if (pud_is_leaf(pud)) {
+- VM_WARN_ON(!pud_huge(pud));
++ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
++ VM_WARN_ON(!pud_huge(pud));
+ return pte_page(pud_pte(pud));
+ }
+ return virt_to_page(pud_pgtable(pud));
+@@ -125,7 +127,13 @@ struct page *pud_page(pud_t pud)
+ struct page *pmd_page(pmd_t pmd)
+ {
+ if (pmd_is_leaf(pmd)) {
+- VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
++ /*
++ * vmalloc_to_page may be called on any vmap address (not only
++ * vmalloc), and it uses pmd_page() etc., when huge vmap is
++ * enabled so these checks can't be used.
++ */
++ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
++ VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
+ return pte_page(pmd_pte(pmd));
+ }
+ return virt_to_page(pmd_page_vaddr(pmd));
+diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
+index bf251191e78d9..32bfb215c4858 100644
+--- a/arch/powerpc/mm/ptdump/ptdump.c
++++ b/arch/powerpc/mm/ptdump/ptdump.c
+@@ -183,7 +183,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
+ {
+ pte_t pte = __pte(st->current_flags);
+
+- if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
++ if (!IS_ENABLED(CONFIG_DEBUG_WX) || !st->check_wx)
+ return;
+
+ if (!pte_write(pte) || !pte_exec(pte))
+diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
+index 03607ab90c66f..f884760ca5cfe 100644
+--- a/arch/powerpc/mm/ptdump/shared.c
++++ b/arch/powerpc/mm/ptdump/shared.c
+@@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = {
+ .clear = " ",
+ }, {
+ .mask = _PAGE_RW,
+- .val = _PAGE_RW,
+- .set = "rw",
+- .clear = "r ",
++ .val = 0,
++ .set = "r ",
++ .clear = "rw",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
+index fcbf7a917c566..8acf8a611a265 100644
+--- a/arch/powerpc/net/bpf_jit_comp.c
++++ b/arch/powerpc/net/bpf_jit_comp.c
+@@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
+ memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
+ }
+
+-/* Fix the branch target addresses for subprog calls */
+-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+- struct codegen_context *ctx, u32 *addrs)
++/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
++static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
++ struct codegen_context *ctx, u32 *addrs)
+ {
+ const struct bpf_insn *insn = fp->insnsi;
+ bool func_addr_fixed;
+ u64 func_addr;
+ u32 tmp_idx;
+- int i, ret;
++ int i, j, ret;
+
+ for (i = 0; i < fp->len; i++) {
+ /*
+@@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+ * of the JITed sequence remains unchanged.
+ */
+ ctx->idx = tmp_idx;
++ } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
++ tmp_idx = ctx->idx;
++ ctx->idx = addrs[i] / 4;
++#ifdef CONFIG_PPC32
++ PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm);
++ PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm);
++ for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
++ EMIT(PPC_RAW_NOP());
++#else
++ func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
++ PPC_LI64(b2p[insn[i].dst_reg], func_addr);
++ /* overwrite rest with nops */
++ for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
++ EMIT(PPC_RAW_NOP());
++#endif
++ ctx->idx = tmp_idx;
++ i++;
+ }
+ }
+
+@@ -193,13 +210,13 @@ skip_init_ctx:
+ /*
+ * Do not touch the prologue and epilogue as they will remain
+ * unchanged. Only fix the branch target address for subprog
+- * calls in the body.
++ * calls in the body, and ldimm64 instructions.
+ *
+ * This does not change the offsets and lengths of the subprog
+ * call instruction sequences and hence, the size of the JITed
+ * image as well.
+ */
+- bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
++ bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
+
+ /* There is no need to perform the usual passes. */
+ goto skip_codegen_passes;
+@@ -241,8 +258,8 @@ skip_codegen_passes:
+ fp->jited_len = alloclen;
+
+ bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+- bpf_jit_binary_lock_ro(bpf_hdr);
+ if (!fp->is_func || extra_pass) {
++ bpf_jit_binary_lock_ro(bpf_hdr);
+ bpf_prog_fill_jited_linfo(fp, addrs);
+ out_addrs:
+ kfree(addrs);
+diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
+index 0da31d41d4131..bce5eda85170f 100644
+--- a/arch/powerpc/net/bpf_jit_comp32.c
++++ b/arch/powerpc/net/bpf_jit_comp32.c
+@@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
+
+ if (image && rel < 0x2000000 && rel >= -0x2000000) {
+ PPC_BL_ABS(func);
++ EMIT(PPC_RAW_NOP());
++ EMIT(PPC_RAW_NOP());
++ EMIT(PPC_RAW_NOP());
+ } else {
+ /* Load function address into r0 */
+ EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
+@@ -289,6 +292,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
+ bool func_addr_fixed;
+ u64 func_addr;
+ u32 true_cond;
++ u32 tmp_idx;
++ int j;
+
+ /*
+ * addrs[] maps a BPF bytecode address into a real offset from
+@@ -836,8 +841,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
+ * 16 byte instruction that uses two 'struct bpf_insn'
+ */
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
++ tmp_idx = ctx->idx;
+ PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+ PPC_LI32(dst_reg, (u32)insn[i].imm);
++ /* padding to allow full 4 instructions for later patching */
++ for (j = ctx->idx - tmp_idx; j < 4; j++)
++ EMIT(PPC_RAW_NOP());
+ /* Adjust for two bpf instructions */
+ addrs[++i] = ctx->idx * 4;
+ break;
+diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
+index 8b5157ccfebae..57e1b6680365c 100644
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -318,6 +318,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
+ u64 imm64;
+ u32 true_cond;
+ u32 tmp_idx;
++ int j;
+
+ /*
+ * addrs[] maps a BPF bytecode address into a real offset from
+@@ -632,17 +633,21 @@ bpf_alu32_trunc:
+ EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
+ break;
+ case 64:
+- /*
+- * Way easier and faster(?) to store the value
+- * into stack and then use ldbrx
+- *
+- * ctx->seen will be reliable in pass2, but
+- * the instructions generated will remain the
+- * same across all passes
+- */
++ /* Store the value to stack and then use byte-reverse loads */
+ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
+ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
+- EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
++ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
++ EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
++ } else {
++ EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1]));
++ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
++ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
++ EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4));
++ EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1]));
++ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
++ EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32));
++ EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2]));
++ }
+ break;
+ }
+ break;
+@@ -806,9 +811,13 @@ emit_clear:
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ imm64 = ((u64)(u32) insn[i].imm) |
+ (((u64)(u32) insn[i+1].imm) << 32);
++ tmp_idx = ctx->idx;
++ PPC_LI64(dst_reg, imm64);
++ /* padding to allow full 5 instructions for later patching */
++ for (j = ctx->idx - tmp_idx; j < 5; j++)
++ EMIT(PPC_RAW_NOP());
+ /* Adjust for two bpf instructions */
+ addrs[++i] = ctx->idx * 4;
+- PPC_LI64(dst_reg, imm64);
+ break;
+
+ /*
+diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
+index 2f46e31c76129..4f53d0b97539b 100644
+--- a/arch/powerpc/perf/Makefile
++++ b/arch/powerpc/perf/Makefile
+@@ -3,11 +3,11 @@
+ obj-y += callchain.o callchain_$(BITS).o perf_regs.o
+ obj-$(CONFIG_COMPAT) += callchain_32.o
+
+-obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
++obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o
+ obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
+ power5+-pmu.o power6-pmu.o power7-pmu.o \
+ isa207-common.o power8-pmu.o power9-pmu.o \
+- generic-compat-pmu.o power10-pmu.o
++ generic-compat-pmu.o power10-pmu.o bhrb.o
+ obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
+
+ obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
+diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
+index 082f6d0308a47..8718289c051dd 100644
+--- a/arch/powerpc/perf/callchain.c
++++ b/arch/powerpc/perf/callchain.c
+@@ -61,6 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
+ next_sp = fp[0];
+
+ if (next_sp == sp + STACK_INT_FRAME_SIZE &&
++ validate_sp(sp, current, STACK_INT_FRAME_SIZE) &&
+ fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ /*
+ * This looks like an interrupt frame for an
+diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
+index d6fa6e25234f4..19a8d051ddf10 100644
+--- a/arch/powerpc/perf/callchain.h
++++ b/arch/powerpc/perf/callchain.h
+@@ -2,7 +2,6 @@
+ #ifndef _POWERPC_PERF_CALLCHAIN_H
+ #define _POWERPC_PERF_CALLCHAIN_H
+
+-int read_user_stack_slow(const void __user *ptr, void *buf, int nb);
+ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+@@ -26,17 +25,11 @@ static inline int __read_user_stack(const void __user *ptr, void *ret,
+ size_t size)
+ {
+ unsigned long addr = (unsigned long)ptr;
+- int rc;
+
+ if (addr > TASK_SIZE - size || (addr & (size - 1)))
+ return -EFAULT;
+
+- rc = copy_from_user_nofault(ret, ptr, size);
+-
+- if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc)
+- return read_user_stack_slow(ptr, ret, size);
+-
+- return rc;
++ return copy_from_user_nofault(ret, ptr, size);
+ }
+
+ #endif /* _POWERPC_PERF_CALLCHAIN_H */
+diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
+index 8d0df4226328d..488e8a21a11ea 100644
+--- a/arch/powerpc/perf/callchain_64.c
++++ b/arch/powerpc/perf/callchain_64.c
+@@ -18,33 +18,6 @@
+
+ #include "callchain.h"
+
+-/*
+- * On 64-bit we don't want to invoke hash_page on user addresses from
+- * interrupt context, so if the access faults, we read the page tables
+- * to find which page (if any) is mapped and access it directly. Radix
+- * has no need for this so it doesn't use read_user_stack_slow.
+- */
+-int read_user_stack_slow(const void __user *ptr, void *buf, int nb)
+-{
+-
+- unsigned long addr = (unsigned long) ptr;
+- unsigned long offset;
+- struct page *page;
+- void *kaddr;
+-
+- if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) {
+- kaddr = page_address(page);
+-
+- /* align address to page boundary */
+- offset = addr & ~PAGE_MASK;
+-
+- memcpy(buf, kaddr + offset, nb);
+- put_page(page);
+- return 0;
+- }
+- return -EFAULT;
+-}
+-
+ static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret)
+ {
+ return __read_user_stack(ptr, ret, sizeof(*ret));
+diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
+index 73e62e9b179bc..1078784b74c9b 100644
+--- a/arch/powerpc/perf/core-book3s.c
++++ b/arch/powerpc/perf/core-book3s.c
+@@ -857,6 +857,19 @@ static void write_pmc(int idx, unsigned long val)
+ }
+ }
+
++static int any_pmc_overflown(struct cpu_hw_events *cpuhw)
++{
++ int i, idx;
++
++ for (i = 0; i < cpuhw->n_events; i++) {
++ idx = cpuhw->event[i]->hw.idx;
++ if ((idx) && ((int)read_pmc(idx) < 0))
++ return idx;
++ }
++
++ return 0;
++}
++
+ /* Called from sysrq_handle_showregs() */
+ void perf_event_print_debug(void)
+ {
+@@ -1281,11 +1294,13 @@ static void power_pmu_disable(struct pmu *pmu)
+
+ /*
+ * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56
++ * Also clear PMXE to disable PMI's getting triggered in some
++ * corner cases during PMU disable.
+ */
+ val = mmcr0 = mfspr(SPRN_MMCR0);
+ val |= MMCR0_FC;
+ val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
+- MMCR0_FC56);
++ MMCR0_PMXE | MMCR0_FC56);
+ /* Set mmcr0 PMCCEXT for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCR0_PMCCEXT;
+@@ -1299,6 +1314,29 @@ static void power_pmu_disable(struct pmu *pmu)
+ mb();
+ isync();
+
++ /*
++ * Some corner cases could clear the PMU counter overflow
++ * while a masked PMI is pending. One such case is when
++ * a PMI happens during interrupt replay and perf counter
++ * values are cleared by PMU callbacks before replay.
++ *
++ * Disable the interrupt by clearing the paca bit for PMI
++ * since we are disabling the PMU now. Otherwise provide a
++ * warning if there is PMI pending, but no counter is found
++ * overflown.
++ *
++ * Since power_pmu_disable runs under local_irq_save, it
++ * could happen that code hits a PMC overflow without PMI
++ * pending in paca. Hence only clear PMI pending if it was
++ * set.
++ *
++ * If a PMI is pending, then MSR[EE] must be disabled (because
++ * the masked PMI handler disabling EE). So it is safe to
++ * call clear_pmi_irq_pending().
++ */
++ if (pmi_irq_pending())
++ clear_pmi_irq_pending();
++
+ val = mmcra = cpuhw->mmcr.mmcra;
+
+ /*
+@@ -1390,6 +1428,15 @@ static void power_pmu_enable(struct pmu *pmu)
+ * (possibly updated for removal of events).
+ */
+ if (!cpuhw->n_added) {
++ /*
++ * If there is any active event with an overflown PMC
++ * value, set back PACA_IRQ_PMI which would have been
++ * cleared in power_pmu_disable().
++ */
++ hard_irq_disable();
++ if (any_pmc_overflown(cpuhw))
++ set_pmi_irq_pending();
++
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ if (ppmu->flags & PPMU_ARCH_31)
+@@ -2337,6 +2384,14 @@ static void __perf_event_interrupt(struct pt_regs *regs)
+ break;
+ }
+ }
++
++ /*
++ * Clear PACA_IRQ_PMI in case it was set by
++ * set_pmi_irq_pending() when PMU was enabled
++ * after accounting for interrupts.
++ */
++ clear_pmi_irq_pending();
++
+ if (!active)
+ /* reset non active counters that have overflowed */
+ write_pmc(i + 1, 0);
+@@ -2356,6 +2411,13 @@ static void __perf_event_interrupt(struct pt_regs *regs)
+ }
+ }
+ }
++
++ /*
++ * During system wide profling or while specific CPU is monitored for an
++ * event, some corner cases could cause PMC to overflow in idle path. This
++ * will trigger a PMI after waking up from idle. Since counter values are _not_
++ * saved/restored in idle path, can lead to below "Can't find PMC" message.
++ */
+ if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+ printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
+
+diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
+index ee721f420a7ba..1a53ab08447cb 100644
+--- a/arch/powerpc/perf/core-fsl-emb.c
++++ b/arch/powerpc/perf/core-fsl-emb.c
+@@ -645,7 +645,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *event;
+ unsigned long val;
+- int found = 0;
+
+ for (i = 0; i < ppmu->n_counter; ++i) {
+ event = cpuhw->event[i];
+@@ -654,7 +653,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
+ if ((int)val < 0) {
+ if (event) {
+ /* event has overflowed */
+- found = 1;
+ record_and_restart(event, val, regs);
+ } else {
+ /*
+@@ -672,11 +670,13 @@ static void perf_event_interrupt(struct pt_regs *regs)
+ isync();
+ }
+
+-void hw_perf_event_setup(int cpu)
++static int fsl_emb_pmu_prepare_cpu(unsigned int cpu)
+ {
+ struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+ memset(cpuhw, 0, sizeof(*cpuhw));
++
++ return 0;
+ }
+
+ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
+@@ -689,6 +689,8 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
+ pmu->name);
+
+ perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
++ cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
++ fsl_emb_pmu_prepare_cpu, NULL);
+
+ return 0;
+ }
+diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
+index 8965b4463d433..5e86371a20c78 100644
+--- a/arch/powerpc/perf/hv-gpci-requests.h
++++ b/arch/powerpc/perf/hv-gpci-requests.h
+@@ -79,6 +79,7 @@ REQUEST(__field(0, 8, partition_id)
+ )
+ #include I(REQUEST_END)
+
++#ifdef ENABLE_EVENTS_COUNTERINFO_V6
+ /*
+ * Not available for counter_info_version >= 0x8, use
+ * run_instruction_cycles_by_partition(0x100) instead.
+@@ -92,6 +93,7 @@ REQUEST(__field(0, 8, partition_id)
+ __count(0x10, 8, cycles)
+ )
+ #include I(REQUEST_END)
++#endif
+
+ #define REQUEST_NAME system_performance_capabilities
+ #define REQUEST_NUM 0x40
+@@ -103,6 +105,7 @@ REQUEST(__field(0, 1, perf_collect_privileged)
+ )
+ #include I(REQUEST_END)
+
++#ifdef ENABLE_EVENTS_COUNTERINFO_V6
+ #define REQUEST_NAME processor_bus_utilization_abc_links
+ #define REQUEST_NUM 0x50
+ #define REQUEST_IDX_KIND "hw_chip_id=?"
+@@ -194,6 +197,7 @@ REQUEST(__field(0, 4, phys_processor_idx)
+ __count(0x28, 8, instructions_completed)
+ )
+ #include I(REQUEST_END)
++#endif
+
+ /* Processor_core_power_mode (0x95) skipped, no counters */
+ /* Affinity_domain_information_by_virtual_processor (0xA0) skipped,
+diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
+index c756228a081fb..28b770bbc10b4 100644
+--- a/arch/powerpc/perf/hv-gpci.c
++++ b/arch/powerpc/perf/hv-gpci.c
+@@ -72,7 +72,7 @@ static struct attribute_group format_group = {
+
+ static struct attribute_group event_group = {
+ .name = "events",
+- .attrs = hv_gpci_event_attrs,
++ /* .attrs is set in init */
+ };
+
+ #define HV_CAPS_ATTR(_name, _format) \
+@@ -330,6 +330,7 @@ static int hv_gpci_init(void)
+ int r;
+ unsigned long hret;
+ struct hv_perf_caps caps;
++ struct hv_gpci_request_buffer *arg;
+
+ hv_gpci_assert_offsets_correct();
+
+@@ -353,6 +354,36 @@ static int hv_gpci_init(void)
+ /* sampling not supported */
+ h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
++ arg = (void *)get_cpu_var(hv_gpci_reqb);
++ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
++
++ /*
++ * hcall H_GET_PERF_COUNTER_INFO populates the output
++ * counter_info_version value based on the system hypervisor.
++ * Pass the counter request 0x10 corresponds to request type
++ * 'Dispatch_timebase_by_processor', to get the supported
++ * counter_info_version.
++ */
++ arg->params.counter_request = cpu_to_be32(0x10);
++
++ r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
++ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
++ if (r) {
++ pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
++ arg->params.counter_info_version_out = 0x8;
++ }
++
++ /*
++ * Use counter_info_version_out value to assign
++ * required hv-gpci event list.
++ */
++ if (arg->params.counter_info_version_out >= 0x8)
++ event_group.attrs = hv_gpci_event_attrs;
++ else
++ event_group.attrs = hv_gpci_event_attrs_v6;
++
++ put_cpu_var(hv_gpci_reqb);
++
+ r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
+ if (r)
+ return r;
+diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
+index 4d108262bed79..c72020912dea5 100644
+--- a/arch/powerpc/perf/hv-gpci.h
++++ b/arch/powerpc/perf/hv-gpci.h
+@@ -26,6 +26,7 @@ enum {
+ #define REQUEST_FILE "../hv-gpci-requests.h"
+ #define NAME_LOWER hv_gpci
+ #define NAME_UPPER HV_GPCI
++#define ENABLE_EVENTS_COUNTERINFO_V6
+ #include "req-gen/perf.h"
+ #undef REQUEST_FILE
+ #undef NAME_LOWER
+diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
+index e106909ff9c37..b8a100b9736c7 100644
+--- a/arch/powerpc/perf/imc-pmu.c
++++ b/arch/powerpc/perf/imc-pmu.c
+@@ -13,6 +13,7 @@
+ #include <asm/cputhreads.h>
+ #include <asm/smp.h>
+ #include <linux/string.h>
++#include <linux/spinlock.h>
+
+ /* Nest IMC data structures and variables */
+
+@@ -49,7 +50,7 @@ static int trace_imc_mem_size;
+ * core and trace-imc
+ */
+ static struct imc_pmu_ref imc_global_refc = {
+- .lock = __MUTEX_INITIALIZER(imc_global_refc.lock),
++ .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock),
+ .id = 0,
+ .refc = 0,
+ };
+@@ -393,7 +394,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+ get_hard_smp_processor_id(cpu));
+ /*
+ * If this is the last cpu in this chip then, skip the reference
+- * count mutex lock and make the reference count on this chip zero.
++ * count lock and make the reference count on this chip zero.
+ */
+ ref = get_nest_pmu_ref(cpu);
+ if (!ref)
+@@ -455,15 +456,15 @@ static void nest_imc_counters_release(struct perf_event *event)
+ /*
+ * See if we need to disable the nest PMU.
+ * If no events are currently in use, then we have to take a
+- * mutex to ensure that we don't race with another task doing
++ * lock to ensure that we don't race with another task doing
+ * enable or disable the nest counters.
+ */
+ ref = get_nest_pmu_ref(event->cpu);
+ if (!ref)
+ return;
+
+- /* Take the mutex lock for this node and then decrement the reference count */
+- mutex_lock(&ref->lock);
++ /* Take the lock for this node and then decrement the reference count */
++ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ /*
+ * The scenario where this is true is, when perf session is
+@@ -475,7 +476,7 @@ static void nest_imc_counters_release(struct perf_event *event)
+ * an OPAL call to disable the engine in that node.
+ *
+ */
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ return;
+ }
+ ref->refc--;
+@@ -483,7 +484,7 @@ static void nest_imc_counters_release(struct perf_event *event)
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
+ return;
+ }
+@@ -491,7 +492,7 @@ static void nest_imc_counters_release(struct perf_event *event)
+ WARN(1, "nest-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ }
+
+ static int nest_imc_event_init(struct perf_event *event)
+@@ -550,26 +551,25 @@ static int nest_imc_event_init(struct perf_event *event)
+
+ /*
+ * Get the imc_pmu_ref struct for this node.
+- * Take the mutex lock and then increment the count of nest pmu events
+- * inited.
++ * Take the lock and then increment the count of nest pmu events inited.
+ */
+ ref = get_nest_pmu_ref(event->cpu);
+ if (!ref)
+ return -EINVAL;
+
+- mutex_lock(&ref->lock);
++ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("nest-imc: Unable to start the counters for node %d\n",
+ node_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+
+ event->destroy = nest_imc_counters_release;
+ return 0;
+@@ -605,9 +605,8 @@ static int core_imc_mem_init(int cpu, int size)
+ return -ENOMEM;
+ mem_info->vbase = page_address(page);
+
+- /* Init the mutex */
+ core_imc_refc[core_id].id = core_id;
+- mutex_init(&core_imc_refc[core_id].lock);
++ spin_lock_init(&core_imc_refc[core_id].lock);
+
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
+ __pa((void *)mem_info->vbase),
+@@ -696,9 +695,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
+ perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
+ } else {
+ /*
+- * If this is the last cpu in this core then, skip taking refernce
+- * count mutex lock for this core and directly zero "refc" for
+- * this core.
++ * If this is the last cpu in this core then skip taking reference
++ * count lock for this core and directly zero "refc" for this core.
+ */
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+@@ -713,11 +711,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
+ * last cpu in this core and core-imc event running
+ * in this cpu.
+ */
+- mutex_lock(&imc_global_refc.lock);
++ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_CORE)
+ imc_global_refc.refc--;
+
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+ }
+ return 0;
+ }
+@@ -732,7 +730,7 @@ static int core_imc_pmu_cpumask_init(void)
+
+ static void reset_global_refc(struct perf_event *event)
+ {
+- mutex_lock(&imc_global_refc.lock);
++ spin_lock(&imc_global_refc.lock);
+ imc_global_refc.refc--;
+
+ /*
+@@ -744,7 +742,7 @@ static void reset_global_refc(struct perf_event *event)
+ imc_global_refc.refc = 0;
+ imc_global_refc.id = 0;
+ }
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+ }
+
+ static void core_imc_counters_release(struct perf_event *event)
+@@ -757,17 +755,17 @@ static void core_imc_counters_release(struct perf_event *event)
+ /*
+ * See if we need to disable the IMC PMU.
+ * If no events are currently in use, then we have to take a
+- * mutex to ensure that we don't race with another task doing
++ * lock to ensure that we don't race with another task doing
+ * enable or disable the core counters.
+ */
+ core_id = event->cpu / threads_per_core;
+
+- /* Take the mutex lock and decrement the refernce count for this core */
++ /* Take the lock and decrement the refernce count for this core */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return;
+
+- mutex_lock(&ref->lock);
++ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ /*
+ * The scenario where this is true is, when perf session is
+@@ -779,7 +777,7 @@ static void core_imc_counters_release(struct perf_event *event)
+ * an OPAL call to disable the engine in that core.
+ *
+ */
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ return;
+ }
+ ref->refc--;
+@@ -787,7 +785,7 @@ static void core_imc_counters_release(struct perf_event *event)
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+@@ -795,7 +793,7 @@ static void core_imc_counters_release(struct perf_event *event)
+ WARN(1, "core-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+
+ reset_global_refc(event);
+ }
+@@ -833,7 +831,6 @@ static int core_imc_event_init(struct perf_event *event)
+ if ((!pcmi->vbase))
+ return -ENODEV;
+
+- /* Get the core_imc mutex for this core */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+@@ -841,22 +838,22 @@ static int core_imc_event_init(struct perf_event *event)
+ /*
+ * Core pmu units are enabled only when it is used.
+ * See if this is triggered for the first time.
+- * If yes, take the mutex lock and enable the core counters.
++ * If yes, take the lock and enable the core counters.
+ * If not, just increment the count in core_imc_refc struct.
+ */
+- mutex_lock(&ref->lock);
++ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("core-imc: Unable to start the counters for core %d\n",
+ core_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+
+ /*
+ * Since the system can run either in accumulation or trace-mode
+@@ -867,7 +864,7 @@ static int core_imc_event_init(struct perf_event *event)
+ * to know whether any other trace/thread imc
+ * events are running.
+ */
+- mutex_lock(&imc_global_refc.lock);
++ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
+ /*
+ * No other trace/thread imc events are running in
+@@ -876,10 +873,10 @@ static int core_imc_event_init(struct perf_event *event)
+ imc_global_refc.id = IMC_DOMAIN_CORE;
+ imc_global_refc.refc++;
+ } else {
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+
+ event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
+ event->destroy = core_imc_counters_release;
+@@ -951,10 +948,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+ /* Reduce the refc if thread-imc event running on this cpu */
+- mutex_lock(&imc_global_refc.lock);
++ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_THREAD)
+ imc_global_refc.refc--;
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+
+ return 0;
+ }
+@@ -994,7 +991,7 @@ static int thread_imc_event_init(struct perf_event *event)
+ if (!target)
+ return -EINVAL;
+
+- mutex_lock(&imc_global_refc.lock);
++ spin_lock(&imc_global_refc.lock);
+ /*
+ * Check if any other trace/core imc events are running in the
+ * system, if not set the global id to thread-imc.
+@@ -1003,10 +1000,10 @@ static int thread_imc_event_init(struct perf_event *event)
+ imc_global_refc.id = IMC_DOMAIN_THREAD;
+ imc_global_refc.refc++;
+ } else {
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+
+ event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
+@@ -1128,25 +1125,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
+ /*
+ * imc pmus are enabled only when it is used.
+ * See if this is triggered for the first time.
+- * If yes, take the mutex lock and enable the counters.
++ * If yes, take the lock and enable the counters.
+ * If not, just increment the count in ref count struct.
+ */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+- mutex_lock(&ref->lock);
++ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("thread-imc: Unable to start the counter\
+ for core %d\n", core_id);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ return 0;
+ }
+
+@@ -1163,12 +1160,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
+ return;
+ }
+
+- mutex_lock(&ref->lock);
++ spin_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("thread-imc: Unable to stop the counters\
+ for core %d\n", core_id);
+ return;
+@@ -1176,7 +1173,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+
+ /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+@@ -1217,9 +1214,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size)
+ }
+ }
+
+- /* Init the mutex, if not already */
+ trace_imc_refc[core_id].id = core_id;
+- mutex_init(&trace_imc_refc[core_id].lock);
++ spin_lock_init(&trace_imc_refc[core_id].lock);
+
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+@@ -1239,10 +1235,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+ * Reduce the refc if any trace-imc event running
+ * on this cpu.
+ */
+- mutex_lock(&imc_global_refc.lock);
++ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_TRACE)
+ imc_global_refc.refc--;
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+
+ return 0;
+ }
+@@ -1364,17 +1360,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags)
+ }
+
+ mtspr(SPRN_LDBAR, ldbar_value);
+- mutex_lock(&ref->lock);
++ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ return 0;
+ }
+
+@@ -1407,19 +1403,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
+ return;
+ }
+
+- mutex_lock(&ref->lock);
++ spin_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+- mutex_unlock(&ref->lock);
++ spin_unlock(&ref->lock);
+
+ trace_imc_event_stop(event, flags);
+ }
+@@ -1441,7 +1437,7 @@ static int trace_imc_event_init(struct perf_event *event)
+ * no other thread is running any core/thread imc
+ * events
+ */
+- mutex_lock(&imc_global_refc.lock);
++ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
+ /*
+ * No core/thread imc events are running in the
+@@ -1450,14 +1446,18 @@ static int trace_imc_event_init(struct perf_event *event)
+ imc_global_refc.id = IMC_DOMAIN_TRACE;
+ imc_global_refc.refc++;
+ } else {
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+- mutex_unlock(&imc_global_refc.lock);
++ spin_unlock(&imc_global_refc.lock);
+
+ event->hw.idx = -1;
+
+- event->pmu->task_ctx_nr = perf_hw_context;
++ /*
++ * There can only be a single PMU for perf_hw_context events which is assigned to
++ * core PMU. Hence use "perf_sw_context" for trace_imc.
++ */
++ event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
+ return 0;
+ }
+@@ -1522,10 +1522,10 @@ static int init_nest_pmu_ref(void)
+ i = 0;
+ for_each_node(nid) {
+ /*
+- * Mutex lock to avoid races while tracking the number of
++ * Take the lock to avoid races while tracking the number of
+ * sessions using the chip's nest pmu units.
+ */
+- mutex_init(&nest_imc_refc[i].lock);
++ spin_lock_init(&nest_imc_refc[i].lock);
+
+ /*
+ * Loop to init the "id" with the node_id. Variable "i" initialized to
+diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
+index f92bf5f6b74f1..027a2add780e8 100644
+--- a/arch/powerpc/perf/isa207-common.c
++++ b/arch/powerpc/perf/isa207-common.c
+@@ -108,7 +108,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+ *mmcra |= MMCRA_SDAR_MODE_TLB;
+ }
+
+-static u64 p10_thresh_cmp_val(u64 value)
++static int p10_thresh_cmp_val(u64 value)
+ {
+ int exp = 0;
+ u64 result = value;
+@@ -139,7 +139,7 @@ static u64 p10_thresh_cmp_val(u64 value)
+ * exponent is also zero.
+ */
+ if (!(value & 0xC0) && exp)
+- result = 0;
++ result = -1;
+ else
+ result = (exp << 8) | value;
+ }
+@@ -187,7 +187,7 @@ static bool is_thresh_cmp_valid(u64 event)
+ unsigned int cmp, exp;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+- return p10_thresh_cmp_val(event) != 0;
++ return p10_thresh_cmp_val(event) >= 0;
+
+ /*
+ * Check the mantissa upper two bits are not zero, unless the
+@@ -456,12 +456,14 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp,
+ value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+ mask |= p10_CNST_THRESH_CMP_MASK;
+ value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
+- }
++ } else if (event_is_threshold(event))
++ return -1;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
+ mask |= CNST_THRESH_MASK;
+ value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+- }
++ } else if (event_is_threshold(event))
++ return -1;
+ } else {
+ /*
+ * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
+diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
+index 93be7197d2502..564f14097f07b 100644
+--- a/arch/powerpc/perf/power10-events-list.h
++++ b/arch/powerpc/perf/power10-events-list.h
+@@ -9,10 +9,10 @@
+ /*
+ * Power10 event codes.
+ */
+-EVENT(PM_RUN_CYC, 0x600f4);
++EVENT(PM_CYC, 0x600f4);
+ EVENT(PM_DISP_STALL_CYC, 0x100f8);
+ EVENT(PM_EXEC_STALL, 0x30008);
+-EVENT(PM_RUN_INST_CMPL, 0x500fa);
++EVENT(PM_INST_CMPL, 0x500fa);
+ EVENT(PM_BR_CMPL, 0x4d05e);
+ EVENT(PM_BR_MPRED_CMPL, 0x400f6);
+ EVENT(PM_BR_FIN, 0x2f04a);
+@@ -50,8 +50,8 @@ EVENT(PM_DTLB_MISS, 0x300fc);
+ /* ITLB Reloaded */
+ EVENT(PM_ITLB_MISS, 0x400fc);
+
+-EVENT(PM_RUN_CYC_ALT, 0x0001e);
+-EVENT(PM_RUN_INST_CMPL_ALT, 0x00002);
++EVENT(PM_CYC_ALT, 0x0001e);
++EVENT(PM_INST_CMPL_ALT, 0x00002);
+
+ /*
+ * Memory Access Events
+diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
+index f9d64c63bb4a7..07ca62d084d9d 100644
+--- a/arch/powerpc/perf/power10-pmu.c
++++ b/arch/powerpc/perf/power10-pmu.c
+@@ -91,8 +91,8 @@ extern u64 PERF_REG_EXTENDED_MASK;
+
+ /* Table of alternatives, sorted by column 0 */
+ static const unsigned int power10_event_alternatives[][MAX_ALT] = {
+- { PM_RUN_CYC_ALT, PM_RUN_CYC },
+- { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
++ { PM_INST_CMPL_ALT, PM_INST_CMPL },
++ { PM_CYC_ALT, PM_CYC },
+ };
+
+ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+@@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev)
+ return 0;
+ }
+
+-GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC);
+-GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL);
++GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
++GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+ GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
+ GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+ GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
+@@ -148,8 +148,8 @@ CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+ CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+ static struct attribute *power10_events_attr_dd1[] = {
+- GENERIC_EVENT_PTR(PM_RUN_CYC),
+- GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
++ GENERIC_EVENT_PTR(PM_CYC),
++ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+@@ -173,8 +173,8 @@ static struct attribute *power10_events_attr_dd1[] = {
+ };
+
+ static struct attribute *power10_events_attr[] = {
+- GENERIC_EVENT_PTR(PM_RUN_CYC),
+- GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
++ GENERIC_EVENT_PTR(PM_CYC),
++ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_FIN),
+ GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+@@ -271,8 +271,8 @@ static const struct attribute_group *power10_pmu_attr_groups[] = {
+ };
+
+ static int power10_generic_events_dd1[] = {
+- [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC,
+- [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL,
++ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
++ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+@@ -280,8 +280,8 @@ static int power10_generic_events_dd1[] = {
+ };
+
+ static int power10_generic_events[] = {
+- [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC,
+- [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL,
++ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
++ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+@@ -548,6 +548,24 @@ static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+
+ #undef C
+
++/*
++ * Set the MMCR0[CC56RUN] bit to enable counting for
++ * PMC5 and PMC6 regardless of the state of CTRL[RUN],
++ * so that we can use counters 5 and 6 as PM_INST_CMPL and
++ * PM_CYC.
++ */
++static int power10_compute_mmcr(u64 event[], int n_ev,
++ unsigned int hwc[], struct mmcr_regs *mmcr,
++ struct perf_event *pevents[], u32 flags)
++{
++ int ret;
++
++ ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
++ if (!ret)
++ mmcr->mmcr0 |= MMCR0_C56RUN;
++ return ret;
++}
++
+ static struct power_pmu power10_pmu = {
+ .name = "POWER10",
+ .n_counter = MAX_PMU_COUNTERS,
+@@ -555,7 +573,7 @@ static struct power_pmu power10_pmu = {
+ .test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
+- .compute_mmcr = isa207_compute_mmcr,
++ .compute_mmcr = power10_compute_mmcr,
+ .config_bhrb = power10_config_bhrb,
+ .bhrb_filter_map = power10_bhrb_filter_map,
+ .get_constraint = isa207_get_constraint,
+diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
+index ff3382140d7e6..cbdd074ee2a70 100644
+--- a/arch/powerpc/perf/power9-pmu.c
++++ b/arch/powerpc/perf/power9-pmu.c
+@@ -133,11 +133,11 @@ int p9_dd22_bl_ev[] = {
+
+ /* Table of alternatives, sorted by column 0 */
+ static const unsigned int power9_event_alternatives[][MAX_ALT] = {
+- { PM_INST_DISP, PM_INST_DISP_ALT },
+- { PM_RUN_CYC_ALT, PM_RUN_CYC },
+- { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
+- { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
+ { PM_BR_2PATH, PM_BR_2PATH_ALT },
++ { PM_INST_DISP, PM_INST_DISP_ALT },
++ { PM_RUN_CYC_ALT, PM_RUN_CYC },
++ { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
++ { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
+ };
+
+ static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h
+index fa9bc804e67af..6b2a59fefffa7 100644
+--- a/arch/powerpc/perf/req-gen/perf.h
++++ b/arch/powerpc/perf/req-gen/perf.h
+@@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING( \
+ #define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
+ r_fields
+
++/* Generate event list for platforms with counter_info_version 0x6 or below */
++static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = {
++#include REQUEST_FILE
++ NULL
++};
++
++/*
++ * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci
++ * events were deprecated for platform firmware that supports
++ * counter_info_version 0x8 or above.
++ * Those deprecated events are still part of platform firmware that
++ * support counter_info_version 0x6 and below. As per the getPerfCountInfo
++ * v1.018 documentation there is no counter_info_version 0x7.
++ * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of
++ * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms
++ * that supports counter_info_version 0x8 or above.
++ */
++#undef ENABLE_EVENTS_COUNTERINFO_V6
++
++/* Generate event list for platforms with counter_info_version 0x8 or above*/
+ static __maybe_unused struct attribute *hv_gpci_event_attrs[] = {
+ #include REQUEST_FILE
+ NULL
+diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
+index b299e43f5ef94..823397c802def 100644
+--- a/arch/powerpc/platforms/44x/fsp2.c
++++ b/arch/powerpc/platforms/44x/fsp2.c
+@@ -208,6 +208,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
+ if (irq == NO_IRQ) {
+ pr_err("device tree node %pOFn is missing a interrupt",
+ np);
++ of_node_put(np);
+ return;
+ }
+
+@@ -215,6 +216,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
+ if (rc) {
+ pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d",
+ np, rc);
++ of_node_put(np);
+ return;
+ }
+ }
+diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c
+index ae8b812c92029..2481e78c04234 100644
+--- a/arch/powerpc/platforms/4xx/cpm.c
++++ b/arch/powerpc/platforms/4xx/cpm.c
+@@ -327,6 +327,6 @@ late_initcall(cpm_init);
+ static int __init cpm_powersave_off(char *arg)
+ {
+ cpm.powersave_off = 1;
+- return 0;
++ return 1;
+ }
+ __setup("powersave=off", cpm_powersave_off);
+diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
+index 30342b60aa63f..42c3d40355d90 100644
+--- a/arch/powerpc/platforms/512x/clock-commonclk.c
++++ b/arch/powerpc/platforms/512x/clock-commonclk.c
+@@ -984,7 +984,7 @@ static void mpc5121_clk_provide_migration_support(void)
+
+ #define NODE_PREP do { \
+ of_address_to_resource(np, 0, &res); \
+- snprintf(devname, sizeof(devname), "%08x.%s", res.start, np->name); \
++ snprintf(devname, sizeof(devname), "%pa.%s", &res.start, np->name); \
+ } while (0)
+
+ #define NODE_CHK(clkname, clkitem, regnode, regflag) do { \
+diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+index b91ebebd9ff20..e0049b7df2125 100644
+--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
++++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+@@ -530,6 +530,7 @@ static int mpc52xx_lpbfifo_probe(struct platform_device *op)
+ err_bcom_rx_irq:
+ bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
+ err_bcom_rx:
++ free_irq(lpbfifo.irq, &lpbfifo);
+ err_irq:
+ iounmap(lpbfifo.regs);
+ lpbfifo.regs = NULL;
+diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+index b6133a237a709..6e18d07035680 100644
+--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
++++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+@@ -106,7 +106,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
+
+ goto next;
+ unreg:
+- platform_device_del(pdev);
++ platform_device_put(pdev);
+ err:
+ pr_err("%pOF: registration failed\n", np);
+ next:
+diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
+index 60e4e97a929db..260fbad7967b2 100644
+--- a/arch/powerpc/platforms/85xx/Makefile
++++ b/arch/powerpc/platforms/85xx/Makefile
+@@ -3,7 +3,9 @@
+ # Makefile for the PowerPC 85xx linux kernel.
+ #
+ obj-$(CONFIG_SMP) += smp.o
+-obj-$(CONFIG_FSL_PMC) += mpc85xx_pm_ops.o
++ifneq ($(CONFIG_FSL_CORENET_RCPM),y)
++obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o
++endif
+
+ obj-y += common.o
+
+diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+index 7c0133f558d02..4a8af80011a6f 100644
+--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
++++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+@@ -17,6 +17,7 @@
+
+ static struct ccsr_guts __iomem *guts;
+
++#ifdef CONFIG_FSL_PMC
+ static void mpc85xx_irq_mask(int cpu)
+ {
+
+@@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu)
+ {
+
+ }
++#endif
+
+ static void mpc85xx_freeze_time_base(bool freeze)
+ {
+@@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = {
+
+ static const struct fsl_pm_ops mpc85xx_pm_ops = {
+ .freeze_time_base = mpc85xx_freeze_time_base,
++#ifdef CONFIG_FSL_PMC
+ .irq_mask = mpc85xx_irq_mask,
+ .irq_unmask = mpc85xx_irq_unmask,
+ .cpu_die = mpc85xx_cpu_die,
+ .cpu_up_prepare = mpc85xx_cpu_up_prepare,
++#endif
+ };
+
+ int __init mpc85xx_setup_pmc(void)
+@@ -94,9 +98,8 @@ int __init mpc85xx_setup_pmc(void)
+ pr_err("Could not map guts node address\n");
+ return -ENOMEM;
+ }
++ qoriq_pm_ops = &mpc85xx_pm_ops;
+ }
+
+- qoriq_pm_ops = &mpc85xx_pm_ops;
+-
+ return 0;
+ }
+diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
+index c6df294054fe9..d7081e9af65c7 100644
+--- a/arch/powerpc/platforms/85xx/smp.c
++++ b/arch/powerpc/platforms/85xx/smp.c
+@@ -40,7 +40,6 @@ struct epapr_spin_table {
+ u32 pir;
+ };
+
+-#ifdef CONFIG_HOTPLUG_CPU
+ static u64 timebase;
+ static int tb_req;
+ static int tb_valid;
+@@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void)
+ local_irq_restore(flags);
+ }
+
++#ifdef CONFIG_HOTPLUG_CPU
+ static void smp_85xx_cpu_offline_self(void)
+ {
+ unsigned int cpu = smp_processor_id();
+@@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu)
+ local_irq_save(flags);
+ hard_irq_disable();
+
+- if (qoriq_pm_ops)
++ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+ qoriq_pm_ops->cpu_up_prepare(cpu);
+
+ /* if cpu is not spinning, reset it */
+@@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr)
+ booting_thread_hwid = cpu_thread_in_core(nr);
+ primary = cpu_first_thread_sibling(nr);
+
+- if (qoriq_pm_ops)
++ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+ qoriq_pm_ops->cpu_up_prepare(nr);
+
+ /*
+@@ -495,21 +495,21 @@ void __init mpc85xx_smp_init(void)
+ smp_85xx_ops.probe = NULL;
+ }
+
+-#ifdef CONFIG_HOTPLUG_CPU
+ #ifdef CONFIG_FSL_CORENET_RCPM
++ /* Assign a value to qoriq_pm_ops on PPC_E500MC */
+ fsl_rcpm_init();
+-#endif
+-
+-#ifdef CONFIG_FSL_PMC
++#else
++ /* Assign a value to qoriq_pm_ops on !PPC_E500MC */
+ mpc85xx_setup_pmc();
+ #endif
+ if (qoriq_pm_ops) {
+ smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
+ smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
++#ifdef CONFIG_HOTPLUG_CPU
+ smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
+ smp_85xx_ops.cpu_die = qoriq_cpu_kill;
+- }
+ #endif
++ }
+ smp_ops = &smp_85xx_ops;
+
+ #ifdef CONFIG_KEXEC_CORE
+diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
+index c58b6f1c40e35..3ef5e9fd3a9b6 100644
+--- a/arch/powerpc/platforms/8xx/cpm1.c
++++ b/arch/powerpc/platforms/8xx/cpm1.c
+@@ -280,6 +280,7 @@ cpm_setbrg(uint brg, uint rate)
+ out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) |
+ CPM_BRG_EN | CPM_BRG_DIV16);
+ }
++EXPORT_SYMBOL(cpm_setbrg);
+
+ struct cpm_ioport16 {
+ __be16 dir, par, odr_sor, dat, intr;
+diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
+index f2ba837249d69..04a6abf14c295 100644
+--- a/arch/powerpc/platforms/8xx/pic.c
++++ b/arch/powerpc/platforms/8xx/pic.c
+@@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void)
+ if (mpc8xx_pic_host == NULL) {
+ printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
+ ret = -ENOMEM;
++ goto out;
+ }
+
+ ret = 0;
+diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
+index a208997ade88b..1b1e67ff9d211 100644
+--- a/arch/powerpc/platforms/Kconfig.cputype
++++ b/arch/powerpc/platforms/Kconfig.cputype
+@@ -111,6 +111,7 @@ config PPC_BOOK3S_64
+
+ config PPC_BOOK3E_64
+ bool "Embedded processors"
++ select PPC_FSL_BOOK3E
+ select PPC_FPU # Make it a choice ?
+ select PPC_SMP_MUXED_IPI
+ select PPC_DOORBELL
+@@ -136,9 +137,9 @@ config GENERIC_CPU
+ depends on PPC64 && CPU_LITTLE_ENDIAN
+ select ARCH_HAS_FAST_MULTIPLIER
+
+-config GENERIC_CPU
++config POWERPC_CPU
+ bool "Generic 32 bits powerpc"
+- depends on PPC32 && !PPC_8xx
++ depends on PPC32 && !PPC_8xx && !PPC_85xx
+
+ config CELL_CPU
+ bool "Cell Broadband Engine"
+@@ -169,11 +170,11 @@ config POWER9_CPU
+
+ config E5500_CPU
+ bool "Freescale e5500"
+- depends on E500
++ depends on PPC64 && E500
+
+ config E6500_CPU
+ bool "Freescale e6500"
+- depends on E500
++ depends on PPC64 && E500
+
+ config 860_CPU
+ bool "8xx family"
+@@ -192,11 +193,23 @@ config G4_CPU
+ depends on PPC_BOOK3S_32
+ select ALTIVEC
+
++config E500_CPU
++ bool "e500 (8540)"
++ depends on PPC_85xx && !PPC_E500MC
++
++config E500MC_CPU
++ bool "e500mc"
++ depends on PPC_85xx && PPC_E500MC
++
++config TOOLCHAIN_DEFAULT_CPU
++ bool "Rely on the toolchain's implicit default CPU"
++ depends on PPC32
++
+ endchoice
+
+ config TARGET_CPU_BOOL
+ bool
+- default !GENERIC_CPU
++ default !GENERIC_CPU && !TOOLCHAIN_DEFAULT_CPU
+
+ config TARGET_CPU
+ string
+@@ -211,6 +224,9 @@ config TARGET_CPU
+ default "e300c2" if E300C2_CPU
+ default "e300c3" if E300C3_CPU
+ default "G4" if G4_CPU
++ default "8540" if E500_CPU
++ default "e500mc" if E500MC_CPU
++ default "powerpc" if POWERPC_CPU
+
+ config PPC_BOOK3S
+ def_bool y
+@@ -287,7 +303,7 @@ config FSL_BOOKE
+ config PPC_FSL_BOOK3E
+ bool
+ select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
+- select FSL_EMB_PERFMON
++ imply FSL_EMB_PERFMON
+ select PPC_SMP_MUXED_IPI
+ select PPC_DOORBELL
+ default y if FSL_BOOKE
+diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
+index 30172e52e16b7..4d82c92ddd523 100644
+--- a/arch/powerpc/platforms/book3s/vas-api.c
++++ b/arch/powerpc/platforms/book3s/vas-api.c
+@@ -303,7 +303,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+ return -EINVAL;
+ }
+
+- if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) {
++ if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) {
+ pr_err("VAS API is not registered\n");
+ return -EACCES;
+ }
+@@ -373,7 +373,7 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+ return -EINVAL;
+ }
+
+- if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) {
++ if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) {
+ pr_err("%s(): VAS API is not registered\n", __func__);
+ return -EACCES;
+ }
+diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
+index 82335e364c440..f630693c8de72 100644
+--- a/arch/powerpc/platforms/cell/axon_msi.c
++++ b/arch/powerpc/platforms/cell/axon_msi.c
+@@ -226,6 +226,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+ if (!prop) {
+ dev_dbg(&dev->dev,
+ "axon_msi: no msi-address-(32|64) properties found\n");
++ of_node_put(dn);
+ return -ENOENT;
+ }
+
+diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
+index fa08699aedeb8..d32f24de84798 100644
+--- a/arch/powerpc/platforms/cell/iommu.c
++++ b/arch/powerpc/platforms/cell/iommu.c
+@@ -977,6 +977,7 @@ static int __init cell_iommu_fixed_mapping_init(void)
+ if (hbase < dbase || (hend > (dbase + dsize))) {
+ pr_debug("iommu: hash window doesn't fit in"
+ "real DMA window\n");
++ of_node_put(np);
+ return -1;
+ }
+ }
+diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
+index 5b9a7e9f144b3..dff8d5e7ab82b 100644
+--- a/arch/powerpc/platforms/cell/pervasive.c
++++ b/arch/powerpc/platforms/cell/pervasive.c
+@@ -78,6 +78,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs)
+ switch (regs->msr & SRR1_WAKEMASK) {
+ case SRR1_WAKEDEC:
+ set_dec(1);
++ break;
+ case SRR1_WAKEEE:
+ /*
+ * Handle these when interrupts get re-enabled and we take
+diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
+index bed05b644c2c5..ed37a93bf858a 100644
+--- a/arch/powerpc/platforms/cell/spufs/inode.c
++++ b/arch/powerpc/platforms/cell/spufs/inode.c
+@@ -659,6 +659,7 @@ spufs_init_isolated_loader(void)
+ return;
+
+ loader = of_get_property(dn, "loader", &size);
++ of_node_put(dn);
+ if (!loader)
+ return;
+
+diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+index 609bda2ad5dd2..4d9200bdba78c 100644
+--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
++++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+@@ -145,7 +145,7 @@ static struct irq_domain * __init flipper_pic_init(struct device_node *np)
+ }
+ io_base = ioremap(res.start, resource_size(&res));
+
+- pr_info("controller at 0x%08x mapped to 0x%p\n", res.start, io_base);
++ pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base);
+
+ __flipper_quiesce(io_base);
+
+diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+index 15396333a90bd..132e5c175e2d6 100644
+--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
++++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+@@ -171,7 +171,7 @@ static struct irq_domain *hlwd_pic_init(struct device_node *np)
+ return NULL;
+ }
+
+- pr_info("controller at 0x%08x mapped to 0x%p\n", res.start, io_base);
++ pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base);
+
+ __hlwd_quiesce(io_base);
+
+@@ -214,6 +214,7 @@ void hlwd_pic_probe(void)
+ irq_set_chained_handler(cascade_virq,
+ hlwd_pic_irq_cascade);
+ hlwd_irq_host = host;
++ of_node_put(np);
+ break;
+ }
+ }
+diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
+index a802ef957d63e..458a63a30e803 100644
+--- a/arch/powerpc/platforms/embedded6xx/wii.c
++++ b/arch/powerpc/platforms/embedded6xx/wii.c
+@@ -89,8 +89,8 @@ static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible)
+
+ hw_regs = ioremap(res.start, resource_size(&res));
+ if (hw_regs) {
+- pr_info("%s at 0x%08x mapped to 0x%p\n", name,
+- res.start, hw_regs);
++ pr_info("%s at 0x%pa mapped to 0x%p\n", name,
++ &res.start, hw_regs);
+ }
+
+ out_put:
+diff --git a/arch/powerpc/platforms/microwatt/microwatt.h b/arch/powerpc/platforms/microwatt/microwatt.h
+new file mode 100644
+index 0000000000000..335417e95e66f
+--- /dev/null
++++ b/arch/powerpc/platforms/microwatt/microwatt.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _MICROWATT_H
++#define _MICROWATT_H
++
++void microwatt_rng_init(void);
++
++#endif /* _MICROWATT_H */
+diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c
+index 3d8ee6eb7dada..8cb161533e6aa 100644
+--- a/arch/powerpc/platforms/microwatt/rng.c
++++ b/arch/powerpc/platforms/microwatt/rng.c
+@@ -11,6 +11,7 @@
+ #include <asm/archrandom.h>
+ #include <asm/cputable.h>
+ #include <asm/machdep.h>
++#include "microwatt.h"
+
+ #define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+@@ -29,7 +30,7 @@ int microwatt_get_random_darn(unsigned long *v)
+ return 1;
+ }
+
+-static __init int rng_init(void)
++void __init microwatt_rng_init(void)
+ {
+ unsigned long val;
+ int i;
+@@ -37,12 +38,7 @@ static __init int rng_init(void)
+ for (i = 0; i < 10; i++) {
+ if (microwatt_get_random_darn(&val)) {
+ ppc_md.get_random_seed = microwatt_get_random_darn;
+- return 0;
++ return;
+ }
+ }
+-
+- pr_warn("Unable to use DARN for get_random_seed()\n");
+-
+- return -EIO;
+ }
+-machine_subsys_initcall(, rng_init);
+diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c
+index 0b02603bdb747..6b32539395a48 100644
+--- a/arch/powerpc/platforms/microwatt/setup.c
++++ b/arch/powerpc/platforms/microwatt/setup.c
+@@ -16,6 +16,8 @@
+ #include <asm/xics.h>
+ #include <asm/udbg.h>
+
++#include "microwatt.h"
++
+ static void __init microwatt_init_IRQ(void)
+ {
+ xics_init();
+@@ -32,10 +34,16 @@ static int __init microwatt_populate(void)
+ }
+ machine_arch_initcall(microwatt, microwatt_populate);
+
++static void __init microwatt_setup_arch(void)
++{
++ microwatt_rng_init();
++}
++
+ define_machine(microwatt) {
+ .name = "microwatt",
+ .probe = microwatt_probe,
+ .init_IRQ = microwatt_init_IRQ,
++ .setup_arch = microwatt_setup_arch,
+ .progress = udbg_progress,
+ .calibrate_decr = generic_calibrate_decr,
+ };
+diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
+index ced2254154860..b8ae56e9f4146 100644
+--- a/arch/powerpc/platforms/powermac/cache.S
++++ b/arch/powerpc/platforms/powermac/cache.S
+@@ -48,7 +48,7 @@ flush_disable_75x:
+
+ /* Stop DST streams */
+ BEGIN_FTR_SECTION
+- DSSALL
++ PPC_DSSALL
+ sync
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+@@ -197,7 +197,7 @@ flush_disable_745x:
+ isync
+
+ /* Stop prefetch streams */
+- DSSALL
++ PPC_DSSALL
+ sync
+
+ /* Disable L2 prefetching */
+diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
+index f77a59b5c2e1a..df89d916236d9 100644
+--- a/arch/powerpc/platforms/powermac/low_i2c.c
++++ b/arch/powerpc/platforms/powermac/low_i2c.c
+@@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host,
+ bus->close = kw_i2c_close;
+ bus->xfer = kw_i2c_xfer;
+ mutex_init(&bus->mutex);
++ lockdep_register_key(&bus->lock_key);
+ lockdep_set_class(&bus->mutex, &bus->lock_key);
+ if (controller == busnode)
+ bus->flags = pmac_i2c_multibus;
+@@ -810,6 +811,7 @@ static void __init pmu_i2c_probe(void)
+ bus->hostdata = bus + 1;
+ bus->xfer = pmu_i2c_xfer;
+ mutex_init(&bus->mutex);
++ lockdep_register_key(&bus->lock_key);
+ lockdep_set_class(&bus->mutex, &bus->lock_key);
+ bus->flags = pmac_i2c_multibus;
+ list_add(&bus->link, &pmac_i2c_busses);
+@@ -933,6 +935,7 @@ static void __init smu_i2c_probe(void)
+ bus->hostdata = bus + 1;
+ bus->xfer = smu_i2c_xfer;
+ mutex_init(&bus->mutex);
++ lockdep_register_key(&bus->lock_key);
+ lockdep_set_class(&bus->mutex, &bus->lock_key);
+ bus->flags = 0;
+ list_add(&bus->link, &pmac_i2c_busses);
+diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
+index 9a360ced663b0..e23a51a05f99a 100644
+--- a/arch/powerpc/platforms/powernv/opal-fadump.c
++++ b/arch/powerpc/platforms/powernv/opal-fadump.c
+@@ -60,7 +60,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+ addr = be64_to_cpu(addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+ opal_fdm_active = (void *)addr;
+- if (opal_fdm_active->registered_regions == 0)
++ if (be16_to_cpu(opal_fdm_active->registered_regions) == 0)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
+@@ -95,17 +95,17 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf);
+ static void opal_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct opal_fadump_mem_struct *fdm)
+ {
+- pr_debug("Boot memory regions count: %d\n", fdm->region_cnt);
++ pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt));
+
+ /*
+ * The destination address of the first boot memory region is the
+ * destination address of boot memory regions.
+ */
+- fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest;
++ fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest);
+ pr_debug("Destination address of boot memory regions: %#016llx\n",
+ fadump_conf->boot_mem_dest_addr);
+
+- fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr;
++ fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr);
+ }
+
+ /*
+@@ -126,9 +126,9 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+ fadump_conf->boot_memory_size = 0;
+
+ pr_debug("Boot memory regions:\n");
+- for (i = 0; i < fdm->region_cnt; i++) {
+- base = fdm->rgn[i].src;
+- size = fdm->rgn[i].size;
++ for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) {
++ base = be64_to_cpu(fdm->rgn[i].src);
++ size = be64_to_cpu(fdm->rgn[i].size);
+ pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
+
+ fadump_conf->boot_mem_addr[i] = base;
+@@ -143,7 +143,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+- fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest;
++ fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest);
+
+ /*
+ * Rarely, but it can so happen that system crashes before all
+@@ -155,13 +155,14 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+ * Hope the memory that could not be preserved only has pages
+ * that are usually filtered out while saving the vmcore.
+ */
+- if (fdm->region_cnt > fdm->registered_regions) {
++ if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) {
+ pr_warn("Not all memory regions were saved!!!\n");
+ pr_warn(" Unsaved memory regions:\n");
+- i = fdm->registered_regions;
+- while (i < fdm->region_cnt) {
++ i = be16_to_cpu(fdm->registered_regions);
++ while (i < be16_to_cpu(fdm->region_cnt)) {
+ pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
+- i, fdm->rgn[i].src, fdm->rgn[i].size);
++ i, be64_to_cpu(fdm->rgn[i].src),
++ be64_to_cpu(fdm->rgn[i].size));
+ i++;
+ }
+
+@@ -170,7 +171,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+ }
+
+ fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size);
+- fadump_conf->boot_mem_regs_cnt = fdm->region_cnt;
++ fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt);
+ opal_fadump_update_config(fadump_conf, fdm);
+ }
+
+@@ -178,35 +179,38 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+ static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
+ {
+ fdm->version = OPAL_FADUMP_VERSION;
+- fdm->region_cnt = 0;
+- fdm->registered_regions = 0;
+- fdm->fadumphdr_addr = 0;
++ fdm->region_cnt = cpu_to_be16(0);
++ fdm->registered_regions = cpu_to_be16(0);
++ fdm->fadumphdr_addr = cpu_to_be64(0);
+ }
+
+ static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+ {
+ u64 addr = fadump_conf->reserve_dump_area_start;
++ u16 reg_cnt;
+ int i;
+
+ opal_fdm = __va(fadump_conf->kernel_metadata);
+ opal_fadump_init_metadata(opal_fdm);
+
+ /* Boot memory regions */
++ reg_cnt = be16_to_cpu(opal_fdm->region_cnt);
+ for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
+- opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i];
+- opal_fdm->rgn[i].dest = addr;
+- opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i];
++ opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]);
++ opal_fdm->rgn[i].dest = cpu_to_be64(addr);
++ opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]);
+
+- opal_fdm->region_cnt++;
++ reg_cnt++;
+ addr += fadump_conf->boot_mem_sz[i];
+ }
++ opal_fdm->region_cnt = cpu_to_be16(reg_cnt);
+
+ /*
+ * Kernel metadata is passed to f/w and retrieved in capture kerenl.
+ * So, use it to save fadump header address instead of calculating it.
+ */
+- opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest +
+- fadump_conf->boot_memory_size);
++ opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) +
++ fadump_conf->boot_memory_size);
+
+ opal_fadump_update_config(fadump_conf, opal_fdm);
+
+@@ -269,18 +273,21 @@ static u64 opal_fadump_get_bootmem_min(void)
+ static int opal_fadump_register(struct fw_dump *fadump_conf)
+ {
+ s64 rc = OPAL_PARAMETER;
++ u16 registered_regs;
+ int i, err = -EIO;
+
+- for (i = 0; i < opal_fdm->region_cnt; i++) {
++ registered_regs = be16_to_cpu(opal_fdm->registered_regions);
++ for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) {
+ rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
+- opal_fdm->rgn[i].src,
+- opal_fdm->rgn[i].dest,
+- opal_fdm->rgn[i].size);
++ be64_to_cpu(opal_fdm->rgn[i].src),
++ be64_to_cpu(opal_fdm->rgn[i].dest),
++ be64_to_cpu(opal_fdm->rgn[i].size));
+ if (rc != OPAL_SUCCESS)
+ break;
+
+- opal_fdm->registered_regions++;
++ registered_regs++;
+ }
++ opal_fdm->registered_regions = cpu_to_be16(registered_regs);
+
+ switch (rc) {
+ case OPAL_SUCCESS:
+@@ -291,7 +298,8 @@ static int opal_fadump_register(struct fw_dump *fadump_conf)
+ case OPAL_RESOURCE:
+ /* If MAX regions limit in f/w is hit, warn and proceed. */
+ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
+- (opal_fdm->region_cnt - opal_fdm->registered_regions));
++ (be16_to_cpu(opal_fdm->region_cnt) -
++ be16_to_cpu(opal_fdm->registered_regions)));
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+@@ -312,7 +320,7 @@ static int opal_fadump_register(struct fw_dump *fadump_conf)
+ * If some regions were registered before OPAL_MPIPL_ADD_RANGE
+ * OPAL call failed, unregister all regions.
+ */
+- if ((err < 0) && (opal_fdm->registered_regions > 0))
++ if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0))
+ opal_fadump_unregister(fadump_conf);
+
+ return err;
+@@ -328,7 +336,7 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf)
+ return -EIO;
+ }
+
+- opal_fdm->registered_regions = 0;
++ opal_fdm->registered_regions = cpu_to_be16(0);
+ fadump_conf->dump_registered = 0;
+ return 0;
+ }
+@@ -563,19 +571,20 @@ static void opal_fadump_region_show(struct fw_dump *fadump_conf,
+ else
+ fdm_ptr = opal_fdm;
+
+- for (i = 0; i < fdm_ptr->region_cnt; i++) {
++ for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) {
+ /*
+ * Only regions that are registered for MPIPL
+ * would have dump data.
+ */
+ if ((fadump_conf->dump_active) &&
+- (i < fdm_ptr->registered_regions))
+- dumped_bytes = fdm_ptr->rgn[i].size;
++ (i < be16_to_cpu(fdm_ptr->registered_regions)))
++ dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size);
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+- fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest);
++ be64_to_cpu(fdm_ptr->rgn[i].src),
++ be64_to_cpu(fdm_ptr->rgn[i].dest));
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+- fdm_ptr->rgn[i].size, dumped_bytes);
++ be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes);
+ }
+
+ /* Dump is active. Show reserved area start address. */
+@@ -624,6 +633,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+ {
+ const __be32 *prop;
+ unsigned long dn;
++ __be64 be_addr;
+ u64 addr = 0;
+ int i, len;
+ s64 ret;
+@@ -680,13 +690,13 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+ if (!prop)
+ return;
+
+- ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
+- if ((ret != OPAL_SUCCESS) || !addr) {
++ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr);
++ if ((ret != OPAL_SUCCESS) || !be_addr) {
+ pr_err("Failed to get Kernel metadata (%lld)\n", ret);
+ return;
+ }
+
+- addr = be64_to_cpu(addr);
++ addr = be64_to_cpu(be_addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+
+ opal_fdm_active = __va(addr);
+@@ -697,14 +707,14 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+ }
+
+ /* Kernel regions not registered with f/w for MPIPL */
+- if (opal_fdm_active->registered_regions == 0) {
++ if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) {
+ opal_fdm_active = NULL;
+ return;
+ }
+
+- ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+- if (addr) {
+- addr = be64_to_cpu(addr);
++ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr);
++ if (be_addr) {
++ addr = be64_to_cpu(be_addr);
+ pr_debug("CPU metadata addr: %llx\n", addr);
+ opal_cpu_metadata = __va(addr);
+ }
+diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h
+index f1e9ecf548c5d..3f715efb0aa6e 100644
+--- a/arch/powerpc/platforms/powernv/opal-fadump.h
++++ b/arch/powerpc/platforms/powernv/opal-fadump.h
+@@ -31,14 +31,14 @@
+ * OPAL FADump kernel metadata
+ *
+ * The address of this structure will be registered with f/w for retrieving
+- * and processing during crash dump.
++ * in the capture kernel to process the crash dump.
+ */
+ struct opal_fadump_mem_struct {
+ u8 version;
+ u8 reserved[3];
+- u16 region_cnt; /* number of regions */
+- u16 registered_regions; /* Regions registered for MPIPL */
+- u64 fadumphdr_addr;
++ __be16 region_cnt; /* number of regions */
++ __be16 registered_regions; /* Regions registered for MPIPL */
++ __be64 fadumphdr_addr;
+ struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS];
+ } __packed;
+
+@@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+ for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+ reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+ val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) :
+- reg_entry->reg_val);
++ (u64)(reg_entry->reg_val));
+ opal_fadump_set_regval_regnum(regs,
+ be32_to_cpu(reg_entry->reg_type),
+ be32_to_cpu(reg_entry->reg_num),
+diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
+index 1e5d51db40f84..5390c888db162 100644
+--- a/arch/powerpc/platforms/powernv/opal-lpc.c
++++ b/arch/powerpc/platforms/powernv/opal-lpc.c
+@@ -396,6 +396,7 @@ void __init opal_lpc_init(void)
+ if (!of_get_property(np, "primary", NULL))
+ continue;
+ opal_lpc_chip_id = of_get_ibm_chip_id(np);
++ of_node_put(np);
+ break;
+ }
+ if (opal_lpc_chip_id < 0)
+diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
+index a191f4c60ce71..113bdb151f687 100644
+--- a/arch/powerpc/platforms/powernv/opal-prd.c
++++ b/arch/powerpc/platforms/powernv/opal-prd.c
+@@ -369,6 +369,12 @@ static struct notifier_block opal_prd_event_nb = {
+ .priority = 0,
+ };
+
++static struct notifier_block opal_prd_event_nb2 = {
++ .notifier_call = opal_prd_msg_notifier,
++ .next = NULL,
++ .priority = 0,
++};
++
+ static int opal_prd_probe(struct platform_device *pdev)
+ {
+ int rc;
+@@ -390,9 +396,10 @@ static int opal_prd_probe(struct platform_device *pdev)
+ return rc;
+ }
+
+- rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb);
++ rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2);
+ if (rc) {
+ pr_err("Couldn't register PRD2 event notifier\n");
++ opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+ return rc;
+ }
+
+@@ -401,6 +408,8 @@ static int opal_prd_probe(struct platform_device *pdev)
+ pr_err("failed to register miscdev\n");
+ opal_message_notifier_unregister(OPAL_MSG_PRD,
+ &opal_prd_event_nb);
++ opal_message_notifier_unregister(OPAL_MSG_PRD2,
++ &opal_prd_event_nb2);
+ return rc;
+ }
+
+@@ -411,6 +420,7 @@ static int opal_prd_remove(struct platform_device *pdev)
+ {
+ misc_deregister(&opal_prd_dev);
+ opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
++ opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2);
+ return 0;
+ }
+
+diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
+index e9d18519e650b..5178ec6f3715c 100644
+--- a/arch/powerpc/platforms/powernv/opal.c
++++ b/arch/powerpc/platforms/powernv/opal.c
+@@ -892,6 +892,7 @@ static void opal_export_attrs(void)
+ kobj = kobject_create_and_add("exports", opal_kobj);
+ if (!kobj) {
+ pr_warn("kobject_create_and_add() of exports failed\n");
++ of_node_put(np);
+ return;
+ }
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index 3dd35c327d1c5..624822a810193 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -1618,6 +1618,7 @@ found:
+ tbl->it_ops = &pnv_ioda1_iommu_ops;
+ pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
+ pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
++ tbl->it_index = (phb->hose->global_number << 16) | pe->pe_number;
+ if (!iommu_init_table(tbl, phb->hose->node, 0, 0))
+ panic("Failed to initialize iommu table");
+
+@@ -1788,6 +1789,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
+ res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
+ }
+
++ tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number;
+ if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end))
+ rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+ else
+diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
+index 28aac933a4391..e3e52ff2cbf58 100644
+--- a/arch/powerpc/platforms/powernv/pci-sriov.c
++++ b/arch/powerpc/platforms/powernv/pci-sriov.c
+@@ -600,12 +600,12 @@ static void pnv_pci_sriov_disable(struct pci_dev *pdev)
+ struct pnv_iov_data *iov;
+
+ iov = pnv_iov_get(pdev);
+- num_vfs = iov->num_vfs;
+- base_pe = iov->vf_pe_arr[0].pe_number;
+-
+ if (WARN_ON(!iov))
+ return;
+
++ num_vfs = iov->num_vfs;
++ base_pe = iov->vf_pe_arr[0].pe_number;
++
+ /* Release VF PEs */
+ pnv_ioda_release_vf_PE(pdev);
+
+diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
+index 11df4e16a1cc3..528946ee7a777 100644
+--- a/arch/powerpc/platforms/powernv/powernv.h
++++ b/arch/powerpc/platforms/powernv/powernv.h
+@@ -42,4 +42,6 @@ ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count);
+ u32 memcons_get_size(struct memcons *mc);
+ struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name);
+
++void pnv_rng_init(void);
++
+ #endif /* _POWERNV_H */
+diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
+index 72c25295c1c2b..5f81ff9b5265f 100644
+--- a/arch/powerpc/platforms/powernv/rng.c
++++ b/arch/powerpc/platforms/powernv/rng.c
+@@ -17,6 +17,7 @@
+ #include <asm/prom.h>
+ #include <asm/machdep.h>
+ #include <asm/smp.h>
++#include "powernv.h"
+
+ #define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+@@ -28,22 +29,16 @@ struct powernv_rng {
+
+ static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
+
+-
+-int powernv_hwrng_present(void)
+-{
+- struct powernv_rng *rng;
+-
+- rng = get_cpu_var(powernv_rng);
+- put_cpu_var(rng);
+- return rng != NULL;
+-}
+-
+ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+ {
+ unsigned long parity;
+
+ /* Calculate the parity of the value */
+- asm ("popcntd %0,%1" : "=r" (parity) : "r" (val));
++ asm (".machine push; \
++ .machine power7; \
++ popcntd %0,%1; \
++ .machine pop;"
++ : "=r" (parity) : "r" (val));
+
+ /* xor our value with the previous mask */
+ val ^= rng->mask;
+@@ -54,17 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+ return val;
+ }
+
+-int powernv_get_random_real_mode(unsigned long *v)
+-{
+- struct powernv_rng *rng;
+-
+- rng = raw_cpu_read(powernv_rng);
+-
+- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+-
+- return 1;
+-}
+-
+ static int powernv_get_random_darn(unsigned long *v)
+ {
+ unsigned long val;
+@@ -94,9 +78,6 @@ static int initialise_darn(void)
+ return 0;
+ }
+ }
+-
+- pr_warn("Unable to use DARN for get_random_seed()\n");
+-
+ return -EIO;
+ }
+
+@@ -104,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
+ {
+ struct powernv_rng *rng;
+
+- rng = get_cpu_var(powernv_rng);
+-
+- *v = rng_whiten(rng, in_be64(rng->regs));
+-
+- put_cpu_var(rng);
+-
++ if (mfmsr() & MSR_DR) {
++ rng = get_cpu_var(powernv_rng);
++ *v = rng_whiten(rng, in_be64(rng->regs));
++ put_cpu_var(rng);
++ } else {
++ rng = raw_cpu_read(powernv_rng);
++ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
++ }
+ return 1;
+ }
+ EXPORT_SYMBOL_GPL(powernv_get_random_long);
+@@ -159,32 +142,59 @@ static __init int rng_create(struct device_node *dn)
+
+ rng_init_per_cpu(rng, dn);
+
+- pr_info_once("Registering arch random hook.\n");
+-
+ ppc_md.get_random_seed = powernv_get_random_long;
+
+ return 0;
+ }
+
+-static __init int rng_init(void)
++static int __init pnv_get_random_long_early(unsigned long *v)
+ {
+ struct device_node *dn;
+- int rc;
+-
+- for_each_compatible_node(dn, NULL, "ibm,power-rng") {
+- rc = rng_create(dn);
+- if (rc) {
+- pr_err("Failed creating rng for %pOF (%d).\n",
+- dn, rc);
+- continue;
+- }
+
+- /* Create devices for hwrng driver */
+- of_platform_device_create(dn, NULL, NULL);
+- }
++ if (!slab_is_available())
++ return 0;
++
++ if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early,
++ NULL) != pnv_get_random_long_early)
++ return 0;
++
++ for_each_compatible_node(dn, NULL, "ibm,power-rng")
++ rng_create(dn);
+
+- initialise_darn();
++ if (!ppc_md.get_random_seed)
++ return 0;
++ return ppc_md.get_random_seed(v);
++}
++
++void __init pnv_rng_init(void)
++{
++ struct device_node *dn;
++
++ /* Prefer darn over the rest. */
++ if (!initialise_darn())
++ return;
++
++ dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng");
++ if (dn)
++ ppc_md.get_random_seed = pnv_get_random_long_early;
++
++ of_node_put(dn);
++}
++
++static int __init pnv_rng_late_init(void)
++{
++ struct device_node *dn;
++ unsigned long v;
++
++ /* In case it wasn't called during init for some other reason. */
++ if (ppc_md.get_random_seed == pnv_get_random_long_early)
++ pnv_get_random_long_early(&v);
++
++ if (ppc_md.get_random_seed == powernv_get_random_long) {
++ for_each_compatible_node(dn, NULL, "ibm,power-rng")
++ of_platform_device_create(dn, NULL, NULL);
++ }
+
+ return 0;
+ }
+-machine_subsys_initcall(powernv, rng_init);
++machine_subsys_initcall(powernv, pnv_rng_late_init);
+diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
+index a8db3f1530639..1b3c7e04a7af5 100644
+--- a/arch/powerpc/platforms/powernv/setup.c
++++ b/arch/powerpc/platforms/powernv/setup.c
+@@ -190,6 +190,8 @@ static void __init pnv_setup_arch(void)
+ pnv_check_guarded_cores();
+
+ /* XXX PMCS */
++
++ pnv_rng_init();
+ }
+
+ static void __init pnv_init(void)
+diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c
+index e4a00ad06f9d3..67c8c4b2d8b17 100644
+--- a/arch/powerpc/platforms/powernv/ultravisor.c
++++ b/arch/powerpc/platforms/powernv/ultravisor.c
+@@ -55,6 +55,7 @@ static int __init uv_init(void)
+ return -ENODEV;
+
+ uv_memcons = memcons_init(node, "memcons");
++ of_node_put(node);
+ if (!uv_memcons)
+ return -ENOENT;
+
+diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c
+index a7aabc18039eb..c1bfad56447d4 100644
+--- a/arch/powerpc/platforms/powernv/vas-fault.c
++++ b/arch/powerpc/platforms/powernv/vas-fault.c
+@@ -216,7 +216,7 @@ int vas_setup_fault_window(struct vas_instance *vinst)
+ vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT);
+
+ attr.rx_fifo_size = vinst->fault_fifo_size;
+- attr.rx_fifo = vinst->fault_fifo;
++ attr.rx_fifo = __pa(vinst->fault_fifo);
+
+ /*
+ * Max creds is based on number of CRBs can fit in the FIFO.
+diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
+index 0f8d39fbf2b21..b664838008c12 100644
+--- a/arch/powerpc/platforms/powernv/vas-window.c
++++ b/arch/powerpc/platforms/powernv/vas-window.c
+@@ -404,7 +404,7 @@ static void init_winctx_regs(struct pnv_vas_window *window,
+ *
+ * See also: Design note in function header.
+ */
+- val = __pa(winctx->rx_fifo);
++ val = winctx->rx_fifo;
+ val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
+ write_hvwc_reg(window, VREG(LFIFO_BAR), val);
+
+@@ -739,7 +739,7 @@ static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin,
+ */
+ winctx->fifo_disable = true;
+ winctx->intr_disable = true;
+- winctx->rx_fifo = NULL;
++ winctx->rx_fifo = 0;
+ }
+
+ winctx->lnotify_lpid = rxattr->lnotify_lpid;
+@@ -1310,8 +1310,8 @@ int vas_win_close(struct vas_window *vwin)
+ /* if send window, drop reference to matching receive window */
+ if (window->tx_win) {
+ if (window->user_win) {
+- put_vas_user_win_ref(&vwin->task_ref);
+ mm_context_remove_vas_window(vwin->task_ref.mm);
++ put_vas_user_win_ref(&vwin->task_ref);
+ }
+ put_rx_win(window->rxwin);
+ }
+diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
+index 8bb08e395de05..08d9d3d5a22b0 100644
+--- a/arch/powerpc/platforms/powernv/vas.h
++++ b/arch/powerpc/platforms/powernv/vas.h
+@@ -376,7 +376,7 @@ struct pnv_vas_window {
+ * is a container for the register fields in the window context.
+ */
+ struct vas_winctx {
+- void *rx_fifo;
++ u64 rx_fifo;
+ int rx_fifo_size;
+ int wcreds_max;
+ int rsvd_txbuf_count;
+diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
+index 09fafcf2d3a06..f51fd4ac3f0b6 100644
+--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
++++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
+@@ -845,18 +845,8 @@ static int __init eeh_pseries_init(void)
+ return -EINVAL;
+ }
+
+- /* Initialize error log lock and size */
+- spin_lock_init(&slot_errbuf_lock);
+- eeh_error_buf_size = rtas_token("rtas-error-log-max");
+- if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+- pr_info("%s: unknown EEH error log size\n",
+- __func__);
+- eeh_error_buf_size = 1024;
+- } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+- pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
+- __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+- eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+- }
++ /* Initialize error log size */
++ eeh_error_buf_size = rtas_get_error_log_max();
+
+ /* Set EEH probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index a52af8fbf5711..ec5d84b4958c5 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -85,19 +85,24 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+ static void iommu_pseries_free_group(struct iommu_table_group *table_group,
+ const char *node_name)
+ {
+- struct iommu_table *tbl;
+-
+ if (!table_group)
+ return;
+
+- tbl = table_group->tables[0];
+ #ifdef CONFIG_IOMMU_API
+ if (table_group->group) {
+ iommu_group_put(table_group->group);
+ BUG_ON(table_group->group);
+ }
+ #endif
+- iommu_tce_table_put(tbl);
++
++ /* Default DMA window table is at index 0, while DDW at 1. SR-IOV
++ * adapters only have table on index 1.
++ */
++ if (table_group->tables[0])
++ iommu_tce_table_put(table_group->tables[0]);
++
++ if (table_group->tables[1])
++ iommu_tce_table_put(table_group->tables[1]);
+
+ kfree(table_group);
+ }
+@@ -306,13 +311,22 @@ static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+ {
+ u64 rc;
++ long rpages = npages;
++ unsigned long limit;
+
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
+ return tce_free_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift, npages);
+
+- rc = plpar_tce_stuff((u64)tbl->it_index,
+- (u64)tcenum << tbl->it_page_shift, 0, npages);
++ do {
++ limit = min_t(unsigned long, rpages, 512);
++
++ rc = plpar_tce_stuff((u64)tbl->it_index,
++ (u64)tcenum << tbl->it_page_shift, 0, limit);
++
++ rpages -= limit;
++ tcenum += limit;
++ } while (rpages > 0 && !rc);
+
+ if (rc && printk_ratelimit()) {
+ printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
+@@ -1092,15 +1106,6 @@ static phys_addr_t ddw_memory_hotplug_max(void)
+ phys_addr_t max_addr = memory_hotplug_max();
+ struct device_node *memory;
+
+- /*
+- * The "ibm,pmemory" can appear anywhere in the address space.
+- * Assuming it is still backed by page structs, set the upper limit
+- * for the huge DMA window as MAX_PHYSMEM_BITS.
+- */
+- if (of_find_node_by_type(NULL, "ibm,pmemory"))
+- return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ?
+- (phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS);
+-
+ for_each_node_by_type(memory, "memory") {
+ unsigned long start, size;
+ int n_mem_addr_cells, n_mem_size_cells, len;
+@@ -1365,8 +1370,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+ len = order_base_2(query.largest_available_block << page_shift);
+ win_name = DMA64_PROPNAME;
+ } else {
+- direct_mapping = true;
+- win_name = DIRECT64_PROPNAME;
++ direct_mapping = !default_win_removed ||
++ (len == MAX_PHYSMEM_BITS) ||
++ (!pmem_present && (len == max_ram_len));
++ win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
+ }
+
+ ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
+diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
+index 3df6bdfea475a..d133597a84ca0 100644
+--- a/arch/powerpc/platforms/pseries/lpar.c
++++ b/arch/powerpc/platforms/pseries/lpar.c
+@@ -638,16 +638,8 @@ static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
+
+ static int __init vcpudispatch_stats_procfs_init(void)
+ {
+- /*
+- * Avoid smp_processor_id while preemptible. All CPUs should have
+- * the same value for lppaca_shared_proc.
+- */
+- preempt_disable();
+- if (!lppaca_shared_proc(get_lppaca())) {
+- preempt_enable();
++ if (!lppaca_shared_proc())
+ return 0;
+- }
+- preempt_enable();
+
+ if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
+ &vcpudispatch_stats_proc_ops))
+diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
+index f71eac74ea92a..19503a8797823 100644
+--- a/arch/powerpc/platforms/pseries/lparcfg.c
++++ b/arch/powerpc/platforms/pseries/lparcfg.c
+@@ -205,7 +205,7 @@ static void parse_ppp_data(struct seq_file *m)
+ ppp_data.active_system_procs);
+
+ /* pool related entries are appropriate for shared configs */
+- if (lppaca_shared_proc(get_lppaca())) {
++ if (lppaca_shared_proc()) {
+ unsigned long pool_idle_time, pool_procs;
+
+ seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+@@ -529,7 +529,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n",
+- lppaca_shared_proc(get_lppaca()));
++ lppaca_shared_proc());
+
+ #ifdef CONFIG_PPC_BOOK3S_64
+ seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
+index e83e0891272d3..210a37a065fb7 100644
+--- a/arch/powerpc/platforms/pseries/mobility.c
++++ b/arch/powerpc/platforms/pseries/mobility.c
+@@ -63,6 +63,27 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
+
+ static int delete_dt_node(struct device_node *dn)
+ {
++ struct device_node *pdn;
++ bool is_platfac;
++
++ pdn = of_get_parent(dn);
++ is_platfac = of_node_is_type(dn, "ibm,platform-facilities") ||
++ of_node_is_type(pdn, "ibm,platform-facilities");
++ of_node_put(pdn);
++
++ /*
++ * The drivers that bind to nodes in the platform-facilities
++ * hierarchy don't support node removal, and the removal directive
++ * from firmware is always followed by an add of an equivalent
++ * node. The capability (e.g. RNG, encryption, compression)
++ * represented by the node is never interrupted by the migration.
++ * So ignore changes to this part of the tree.
++ */
++ if (is_platfac) {
++ pr_notice("ignoring remove operation for %pOFfp\n", dn);
++ return 0;
++ }
++
+ pr_debug("removing node %pOFfp\n", dn);
+ dlpar_detach_node(dn);
+ return 0;
+@@ -222,6 +243,19 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
+ if (!dn)
+ return -ENOENT;
+
++ /*
++ * Since delete_dt_node() ignores this node type, this is the
++ * necessary counterpart. We also know that a platform-facilities
++ * node returned from dlpar_configure_connector() has children
++ * attached, and dlpar_attach_node() only adds the parent, leaking
++ * the children. So ignore these on the add side for now.
++ */
++ if (of_node_is_type(dn, "ibm,platform-facilities")) {
++ pr_notice("ignoring add operation for %pOF\n", dn);
++ dlpar_free_cc_nodes(dn);
++ return 0;
++ }
++
+ rc = dlpar_attach_node(dn, parent_dn);
+ if (rc)
+ dlpar_free_cc_nodes(dn);
+diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
+index f48e87ac89c9b..3cfcc748052e9 100644
+--- a/arch/powerpc/platforms/pseries/papr_scm.c
++++ b/arch/powerpc/platforms/pseries/papr_scm.c
+@@ -1159,6 +1159,13 @@ static int papr_scm_probe(struct platform_device *pdev)
+ return -ENODEV;
+ }
+
++ /*
++ * open firmware platform device create won't update the NUMA
++ * distance table. For PAPR SCM devices we use numa_map_to_online_node()
++ * to find the nearest online NUMA node and that requires correct
++ * distance table information.
++ */
++ update_numa_distance(dn);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
+index 90c9d3531694b..4ba8245681192 100644
+--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
++++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
+@@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb)
+
+ pseries_msi_free_domains(phb);
+
++ /* Keep a reference so phb isn't freed yet */
++ get_device(&host_bridge->dev);
++
+ /* Remove the PCI bus and unregister the bridge device from sysfs */
+ phb->bus = NULL;
+ pci_remove_bus(b);
+@@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb)
+ * the pcibios_free_controller_deferred() callback;
+ * see pseries_root_bridge_prepare().
+ */
++ put_device(&host_bridge->dev);
+
+ return 0;
+ }
+diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
+index 3544778e06d01..2a97cc20fe8fe 100644
+--- a/arch/powerpc/platforms/pseries/pseries.h
++++ b/arch/powerpc/platforms/pseries/pseries.h
+@@ -115,4 +115,6 @@ extern u32 pseries_security_flavor;
+ void pseries_setup_security_mitigations(void);
+ void pseries_lpar_read_hblkrm_characteristics(void);
+
++void pseries_rng_init(void);
++
+ #endif /* _PSERIES_PSERIES_H */
+diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
+index 6268545947b83..6ddfdeaace9ef 100644
+--- a/arch/powerpc/platforms/pseries/rng.c
++++ b/arch/powerpc/platforms/pseries/rng.c
+@@ -10,6 +10,7 @@
+ #include <asm/archrandom.h>
+ #include <asm/machdep.h>
+ #include <asm/plpar_wrappers.h>
++#include "pseries.h"
+
+
+ static int pseries_get_random_long(unsigned long *v)
+@@ -24,19 +25,13 @@ static int pseries_get_random_long(unsigned long *v)
+ return 0;
+ }
+
+-static __init int rng_init(void)
++void __init pseries_rng_init(void)
+ {
+ struct device_node *dn;
+
+ dn = of_find_compatible_node(NULL, NULL, "ibm,random");
+ if (!dn)
+- return -ENODEV;
+-
+- pr_info("Registering arch random hook.\n");
+-
++ return;
+ ppc_md.get_random_seed = pseries_get_random_long;
+-
+ of_node_put(dn);
+- return 0;
+ }
+-machine_subsys_initcall(pseries, rng_init);
+diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
+index f79126f16258a..d25053755c8b8 100644
+--- a/arch/powerpc/platforms/pseries/setup.c
++++ b/arch/powerpc/platforms/pseries/setup.c
+@@ -816,7 +816,7 @@ static void __init pSeries_setup_arch(void)
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ vpa_init(boot_cpuid);
+
+- if (lppaca_shared_proc(get_lppaca())) {
++ if (lppaca_shared_proc()) {
+ static_branch_enable(&shared_processor);
+ pv_spinlocks_init();
+ }
+@@ -840,6 +840,8 @@ static void __init pSeries_setup_arch(void)
+
+ if (swiotlb_force == SWIOTLB_FORCE)
+ ppc_swiotlb_enable = 1;
++
++ pseries_rng_init();
+ }
+
+ static void pseries_panic(char *str)
+diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
+index b043e3936d215..b54f6fc27896f 100644
+--- a/arch/powerpc/platforms/pseries/vas.c
++++ b/arch/powerpc/platforms/pseries/vas.c
+@@ -324,7 +324,7 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+ * So no unpacking needs to be done.
+ */
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+- VPHN_FLAG_VCPU, smp_processor_id());
++ VPHN_FLAG_VCPU, hard_smp_processor_id());
+ if (rc != H_SUCCESS) {
+ pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+ goto out;
+@@ -441,8 +441,8 @@ static int vas_deallocate_window(struct vas_window *vwin)
+ atomic_dec(&caps->used_lpar_creds);
+ mutex_unlock(&vas_pseries_mutex);
+
+- put_vas_user_win_ref(&vwin->task_ref);
+ mm_context_remove_vas_window(vwin->task_ref.mm);
++ put_vas_user_win_ref(&vwin->task_ref);
+
+ kfree(win);
+ return 0;
+diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
+index 348f595810523..d08239ae2bcd2 100644
+--- a/arch/powerpc/purgatory/Makefile
++++ b/arch/powerpc/purgatory/Makefile
+@@ -4,6 +4,11 @@ KASAN_SANITIZE := n
+
+ targets += trampoline_$(BITS).o purgatory.ro kexec-purgatory.c
+
++# When profile-guided optimization is enabled, llvm emits two different
++# overlapping text sections, which is not supported by kexec. Remove profile
++# optimization flags.
++KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
++
+ LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
+
+ $(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE
+diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
+index 1d33b7a5ea832..dc774b204c061 100644
+--- a/arch/powerpc/sysdev/dart_iommu.c
++++ b/arch/powerpc/sysdev/dart_iommu.c
+@@ -404,9 +404,10 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
+ }
+
+ /* Initialize the DART HW */
+- if (dart_init(dn) != 0)
++ if (dart_init(dn) != 0) {
++ of_node_put(dn);
+ return;
+-
++ }
+ /*
+ * U4 supports a DART bypass, we use it for 64-bit capable devices to
+ * improve performance. However, that only works for devices connected
+@@ -419,6 +420,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
+
+ /* Setup pci_dma ops */
+ set_pci_dma_ops(&dma_iommu_ops);
++ of_node_put(dn);
+ }
+
+ #ifdef CONFIG_PM
+diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S
+index efeeb1b885a17..329b9c4ae5429 100644
+--- a/arch/powerpc/sysdev/dcr-low.S
++++ b/arch/powerpc/sysdev/dcr-low.S
+@@ -11,7 +11,7 @@
+ #include <asm/export.h>
+
+ #define DCR_ACCESS_PROLOG(table) \
+- cmpli cr0,r3,1024; \
++ cmplwi cr0,r3,1024; \
+ rlwinm r3,r3,4,18,27; \
+ lis r5,table@h; \
+ ori r5,r5,table@l; \
+diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
+index 8963eaffb1b7b..39186ad6b3c3a 100644
+--- a/arch/powerpc/sysdev/fsl_gtm.c
++++ b/arch/powerpc/sysdev/fsl_gtm.c
+@@ -86,7 +86,7 @@ static LIST_HEAD(gtms);
+ */
+ struct gtm_timer *gtm_get_timer16(void)
+ {
+- struct gtm *gtm = NULL;
++ struct gtm *gtm;
+ int i;
+
+ list_for_each_entry(gtm, &gtms, list_node) {
+@@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void)
+ spin_unlock_irq(&gtm->lock);
+ }
+
+- if (gtm)
++ if (!list_empty(&gtms))
+ return ERR_PTR(-EBUSY);
+ return ERR_PTR(-ENODEV);
+ }
+diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
+index e6b06c3f81973..c55ccec0a1690 100644
+--- a/arch/powerpc/sysdev/fsl_msi.c
++++ b/arch/powerpc/sysdev/fsl_msi.c
+@@ -211,8 +211,10 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+ dev_err(&pdev->dev,
+ "node %pOF has an invalid fsl,msi phandle %u\n",
+ hose->dn, np->phandle);
++ of_node_put(np);
+ return -EINVAL;
+ }
++ of_node_put(np);
+ }
+
+ for_each_pci_msi_entry(entry, pdev) {
+diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
+index b8f76f3fd9941..a14a88e5025e5 100644
+--- a/arch/powerpc/sysdev/fsl_pci.c
++++ b/arch/powerpc/sysdev/fsl_pci.c
+@@ -520,6 +520,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+ struct resource rsrc;
+ const int *bus_range;
+ u8 hdr_type, progif;
++ u32 class_code;
+ struct device_node *dev;
+ struct ccsr_pci __iomem *pci;
+ u16 temp;
+@@ -593,6 +594,13 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+ PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS;
+ if (fsl_pcie_check_link(hose))
+ hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
++ /* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */
++ if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) {
++ early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code);
++ class_code &= 0xff;
++ class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
++ early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code);
++ }
+ } else {
+ /*
+ * Set PBFR(PCI Bus Function Register)[10] = 1 to
+diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
+index 1d7a412056959..5ffaa60f1fa09 100644
+--- a/arch/powerpc/sysdev/fsl_pci.h
++++ b/arch/powerpc/sysdev/fsl_pci.h
+@@ -18,6 +18,7 @@ struct platform_device;
+
+ #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */
+ #define PCIE_LTSSM_L0 0x16 /* L0 state */
++#define PCIE_FSL_CSR_CLASSCODE 0x474 /* FSL GPEX CSR */
+ #define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */
+ #define PCIE_IP_REV_3_0 0x02080300 /* PCIE IP block version Rev3.0 */
+ #define PIWAR_EN 0x80000000 /* Enable */
+diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
+index ff7906b48ca1e..1bfc9afa8a1a1 100644
+--- a/arch/powerpc/sysdev/fsl_rio.c
++++ b/arch/powerpc/sysdev/fsl_rio.c
+@@ -505,8 +505,10 @@ int fsl_rio_setup(struct platform_device *dev)
+ if (rc) {
+ dev_err(&dev->dev, "Can't get %pOF property 'reg'\n",
+ rmu_node);
++ of_node_put(rmu_node);
+ goto err_rmu;
+ }
++ of_node_put(rmu_node);
+ rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs));
+ if (!rmu_regs_win) {
+ dev_err(&dev->dev, "Unable to map rmu register window\n");
+diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
+index 042bb38fa5c24..a06297aa3f1be 100644
+--- a/arch/powerpc/sysdev/tsi108_pci.c
++++ b/arch/powerpc/sysdev/tsi108_pci.c
+@@ -216,9 +216,8 @@ int __init tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary)
+
+ (hose)->ops = &tsi108_direct_pci_ops;
+
+- printk(KERN_INFO "Found tsi108 PCI host bridge at 0x%08x. "
+- "Firmware bus number: %d->%d\n",
+- rsrc.start, hose->first_busno, hose->last_busno);
++ pr_info("Found tsi108 PCI host bridge at 0x%pa. Firmware bus number: %d->%d\n",
++ &rsrc.start, hose->first_busno, hose->last_busno);
+
+ /* Interpret the "ranges" property */
+ /* This also maps the I/O region and sets isa_io/mem_base */
+diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
+index 675d708863d57..db0452e7c3515 100644
+--- a/arch/powerpc/sysdev/xics/icp-opal.c
++++ b/arch/powerpc/sysdev/xics/icp-opal.c
+@@ -196,6 +196,7 @@ int icp_opal_init(void)
+
+ printk("XICS: Using OPAL ICP fallbacks\n");
+
++ of_node_put(np);
+ return 0;
+ }
+
+diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
+index b9da317b7a2d7..4533d4a46ece2 100644
+--- a/arch/powerpc/sysdev/xics/ics-rtas.c
++++ b/arch/powerpc/sysdev/xics/ics-rtas.c
+@@ -37,8 +37,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
+
+ server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+
+- call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
+- server, DEFAULT_PRIORITY);
++ call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
++ DEFAULT_PRIORITY);
+ if (call_status != 0) {
+ printk(KERN_ERR
+ "%s: ibm_set_xive irq %u server %x returned %d\n",
+@@ -47,7 +47,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
+ }
+
+ /* Now unmask the interrupt (often a no-op) */
+- call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq);
++ call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+ if (call_status != 0) {
+ printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+ __func__, hw_irq, call_status);
+@@ -69,7 +69,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+ if (hw_irq == XICS_IPI)
+ return;
+
+- call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq);
++ call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+ if (call_status != 0) {
+ printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+ __func__, hw_irq, call_status);
+@@ -77,8 +77,8 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+ }
+
+ /* Have to set XIVE to 0xff to be able to remove a slot */
+- call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
+- xics_default_server, 0xff);
++ call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
++ xics_default_server, 0xff);
+ if (call_status != 0) {
+ printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+ __func__, hw_irq, call_status);
+@@ -109,7 +109,7 @@ static int ics_rtas_set_affinity(struct irq_data *d,
+ if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+ return -1;
+
+- status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq);
++ status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+ if (status) {
+ printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+@@ -127,8 +127,8 @@ static int ics_rtas_set_affinity(struct irq_data *d,
+ pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq,
+ hw_irq, irq_server);
+
+- status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL,
+- hw_irq, irq_server, xics_status[1]);
++ status = rtas_call(ibm_set_xive, 3, 1, NULL,
++ hw_irq, irq_server, xics_status[1]);
+
+ if (status) {
+ printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+@@ -159,7 +159,7 @@ static int ics_rtas_check(struct ics *ics, unsigned int hw_irq)
+ return -EINVAL;
+
+ /* Check if RTAS knows about this interrupt */
+- rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq);
++ rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+ if (rc)
+ return -ENXIO;
+
+@@ -175,7 +175,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+ {
+ int rc, status[2];
+
+- rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec);
++ rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+ if (rc)
+ return -1;
+ return status[0];
+diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
+index 97796c6b63f04..785c292d104b7 100644
+--- a/arch/powerpc/sysdev/xive/Kconfig
++++ b/arch/powerpc/sysdev/xive/Kconfig
+@@ -3,7 +3,6 @@ config PPC_XIVE
+ bool
+ select PPC_SMP_MUXED_IPI
+ select HARDIRQS_SW_RESEND
+- select IRQ_DOMAIN_NOMAP
+
+ config PPC_XIVE_NATIVE
+ bool
+diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
+index c5d75c02ad8b5..7b69299c29123 100644
+--- a/arch/powerpc/sysdev/xive/common.c
++++ b/arch/powerpc/sysdev/xive/common.c
+@@ -1443,8 +1443,7 @@ static const struct irq_domain_ops xive_irq_domain_ops = {
+
+ static void __init xive_init_host(struct device_node *np)
+ {
+- xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ,
+- &xive_irq_domain_ops, NULL);
++ xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
+ if (WARN_ON(xive_irq_domain == NULL))
+ return;
+ irq_set_default_host(xive_irq_domain);
+diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
+index f143b6f111ac0..43bd2579d942b 100644
+--- a/arch/powerpc/sysdev/xive/spapr.c
++++ b/arch/powerpc/sysdev/xive/spapr.c
+@@ -13,6 +13,7 @@
+ #include <linux/of.h>
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
++#include <linux/bitmap.h>
+ #include <linux/cpumask.h>
+ #include <linux/mm.h>
+ #include <linux/delay.h>
+@@ -55,7 +56,7 @@ static int xive_irq_bitmap_add(int base, int count)
+ spin_lock_init(&xibm->lock);
+ xibm->base = base;
+ xibm->count = count;
+- xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
++ xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL);
+ if (!xibm->bitmap) {
+ kfree(xibm);
+ return -ENOMEM;
+@@ -67,6 +68,17 @@ static int xive_irq_bitmap_add(int base, int count)
+ return 0;
+ }
+
++static void xive_irq_bitmap_remove_all(void)
++{
++ struct xive_irq_bitmap *xibm, *tmp;
++
++ list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) {
++ list_del(&xibm->list);
++ bitmap_free(xibm->bitmap);
++ kfree(xibm);
++ }
++}
++
+ static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm)
+ {
+ int irq;
+@@ -425,6 +437,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+
+ data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+ if (!data->trig_mmio) {
++ iounmap(data->eoi_mmio);
+ pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+ return -ENOMEM;
+ }
+@@ -653,6 +666,9 @@ static int xive_spapr_debug_show(struct seq_file *m, void *private)
+ struct xive_irq_bitmap *xibm;
+ char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
++ if (!buf)
++ return -ENOMEM;
++
+ list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+ memset(buf, 0, PAGE_SIZE);
+ bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count);
+@@ -701,6 +717,7 @@ static bool xive_get_max_prio(u8 *max_prio)
+ }
+
+ reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len);
++ of_node_put(rootdn);
+ if (!reg) {
+ pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n");
+ return false;
+@@ -800,7 +817,7 @@ bool __init xive_spapr_init(void)
+ u32 val;
+ u32 len;
+ const __be32 *reg;
+- int i;
++ int i, err;
+
+ if (xive_spapr_disabled())
+ return false;
+@@ -816,32 +833,35 @@ bool __init xive_spapr_init(void)
+ /* Resource 1 is the OS ring TIMA */
+ if (of_address_to_resource(np, 1, &r)) {
+ pr_err("Failed to get thread mgmnt area resource\n");
+- return false;
++ goto err_put;
+ }
+ tima = ioremap(r.start, resource_size(&r));
+ if (!tima) {
+ pr_err("Failed to map thread mgmnt area\n");
+- return false;
++ goto err_put;
+ }
+
+ if (!xive_get_max_prio(&max_prio))
+- return false;
++ goto err_unmap;
+
+ /* Feed the IRQ number allocator with the ranges given in the DT */
+ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len);
+ if (!reg) {
+ pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n");
+- return false;
++ goto err_unmap;
+ }
+
+ if (len % (2 * sizeof(u32)) != 0) {
+ pr_err("invalid 'ibm,xive-lisn-ranges' property\n");
+- return false;
++ goto err_unmap;
+ }
+
+- for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2)
+- xive_irq_bitmap_add(be32_to_cpu(reg[0]),
+- be32_to_cpu(reg[1]));
++ for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) {
++ err = xive_irq_bitmap_add(be32_to_cpu(reg[0]),
++ be32_to_cpu(reg[1]));
++ if (err < 0)
++ goto err_mem_free;
++ }
+
+ /* Iterate the EQ sizes and pick one */
+ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) {
+@@ -852,10 +872,19 @@ bool __init xive_spapr_init(void)
+
+ /* Initialize XIVE core with our backend */
+ if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio))
+- return false;
++ goto err_mem_free;
+
++ of_node_put(np);
+ pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+ return true;
++
++err_mem_free:
++ xive_irq_bitmap_remove_all();
++err_unmap:
++ iounmap(tima);
++err_put:
++ of_node_put(np);
++ return false;
+ }
+
+ machine_arch_initcall(pseries, xive_core_debug_init);
+diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh
+index 014e00e74d2b6..63792af004170 100755
+--- a/arch/powerpc/tools/relocs_check.sh
++++ b/arch/powerpc/tools/relocs_check.sh
+@@ -39,6 +39,7 @@ $objdump -R "$vmlinux" |
+ # R_PPC_NONE
+ grep -F -w -v 'R_PPC64_RELATIVE
+ R_PPC64_NONE
++R_PPC64_UADDR64
+ R_PPC_ADDR16_LO
+ R_PPC_ADDR16_HI
+ R_PPC_ADDR16_HA
+@@ -54,9 +55,3 @@ fi
+ num_bad=$(echo "$bad_relocs" | wc -l)
+ echo "WARNING: $num_bad bad relocations"
+ echo "$bad_relocs"
+-
+-# If we see this type of relocation it's an idication that
+-# we /may/ be using an old version of binutils.
+-if echo "$bad_relocs" | grep -q -F -w R_PPC64_UADDR64; then
+- echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel"
+-fi
+diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
+index dd8241c009e53..8b5277c3b1476 100644
+--- a/arch/powerpc/xmon/xmon.c
++++ b/arch/powerpc/xmon/xmon.c
+@@ -59,6 +59,7 @@
+ #ifdef CONFIG_PPC64
+ #include <asm/hvcall.h>
+ #include <asm/paca.h>
++#include <asm/lppaca.h>
+ #endif
+
+ #include "nonstdio.h"
+@@ -1528,9 +1529,9 @@ bpt_cmds(void)
+ cmd = inchar();
+
+ switch (cmd) {
+- static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
+- int mode;
+- case 'd': /* bd - hardware data breakpoint */
++ case 'd': { /* bd - hardware data breakpoint */
++ static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
++ int mode;
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+@@ -1563,6 +1564,7 @@ bpt_cmds(void)
+
+ force_enable_xmon();
+ break;
++ }
+
+ case 'i': /* bi - hardware instr breakpoint */
+ if (xmon_is_ro) {
+@@ -3264,8 +3266,7 @@ static void show_task(struct task_struct *volatile tsk)
+ * appropriate for calling from xmon. This could be moved
+ * to a common, generic, routine used by both.
+ */
+- state = (p_state == 0) ? 'R' :
+- (p_state < 0) ? 'U' :
++ state = (p_state == TASK_RUNNING) ? 'R' :
+ (p_state & TASK_UNINTERRUPTIBLE) ? 'D' :
+ (p_state & TASK_STOPPED) ? 'T' :
+ (p_state & TASK_TRACED) ? 'C' :
+diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
+index f076cee11af69..8dd7f01ee031d 100644
+--- a/arch/riscv/Kconfig
++++ b/arch/riscv/Kconfig
+@@ -23,6 +23,7 @@ config RISCV
+ select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_HAS_KCOV
+ select ARCH_HAS_MMIOWB
++ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_SET_DIRECT_MAP if MMU
+ select ARCH_HAS_SET_MEMORY if MMU
+@@ -46,7 +47,7 @@ config RISCV
+ select CLINT_TIMER if !MMU
+ select COMMON_CLK
+ select EDAC_SUPPORT
+- select GENERIC_ARCH_TOPOLOGY if SMP
++ select GENERIC_ARCH_TOPOLOGY
+ select GENERIC_ATOMIC64 if !64BIT
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select GENERIC_EARLY_IOREMAP
+@@ -158,10 +159,9 @@ config PA_BITS
+
+ config PAGE_OFFSET
+ hex
+- default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
++ default 0xC0000000 if 32BIT
+ default 0x80000000 if 64BIT && !MMU
+- default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
+- default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
++ default 0xffffffe000000000 if 64BIT
+
+ config KASAN_SHADOW_OFFSET
+ hex
+@@ -270,24 +270,6 @@ config MODULE_SECTIONS
+ bool
+ select HAVE_MOD_ARCH_SPECIFIC
+
+-choice
+- prompt "Maximum Physical Memory"
+- default MAXPHYSMEM_1GB if 32BIT
+- default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
+- default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
+-
+- config MAXPHYSMEM_1GB
+- depends on 32BIT
+- bool "1GiB"
+- config MAXPHYSMEM_2GB
+- depends on 64BIT && CMODEL_MEDLOW
+- bool "2GiB"
+- config MAXPHYSMEM_128GB
+- depends on 64BIT && CMODEL_MEDANY
+- bool "128GiB"
+-endchoice
+-
+-
+ config SMP
+ bool "Symmetric Multi-Processing"
+ help
+@@ -380,6 +362,28 @@ config RISCV_BASE_PMU
+
+ endmenu
+
++config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
++ def_bool y
++ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
++ depends on AS_IS_GNU && AS_VERSION >= 23800
++ help
++ Newer binutils versions default to ISA spec version 20191213 which
++ moves some instructions from the I extension to the Zicsr and Zifencei
++ extensions.
++
++config TOOLCHAIN_NEEDS_OLD_ISA_SPEC
++ def_bool y
++ depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
++ # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16
++ depends on CC_IS_CLANG && CLANG_VERSION < 170000
++ help
++ Certain versions of clang do not support zicsr and zifencei via -march
++ but newer versions of binutils require it for the reasons noted in the
++ help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This
++ option causes an older ISA spec compatible with these older versions
++ of clang to be passed to GAS, which has the same result as passing zicsr
++ and zifencei to -march.
++
+ config FPU
+ bool "FPU support"
+ default y
+diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
+index b44d6ecdb46e5..0aacd7052585b 100644
+--- a/arch/riscv/Kconfig.erratas
++++ b/arch/riscv/Kconfig.erratas
+@@ -2,6 +2,7 @@ menu "CPU errata selection"
+
+ config RISCV_ERRATA_ALTERNATIVE
+ bool "RISC-V alternative scheme"
++ depends on !XIP_KERNEL
+ default y
+ help
+ This Kconfig allows the kernel to automatically patch the
+diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
+index 30676ebb16ebd..46a534f047931 100644
+--- a/arch/riscv/Kconfig.socs
++++ b/arch/riscv/Kconfig.socs
+@@ -14,8 +14,8 @@ config SOC_SIFIVE
+ select CLK_SIFIVE
+ select CLK_SIFIVE_PRCI
+ select SIFIVE_PLIC
+- select RISCV_ERRATA_ALTERNATIVE
+- select ERRATA_SIFIVE
++ select RISCV_ERRATA_ALTERNATIVE if !XIP_KERNEL
++ select ERRATA_SIFIVE if !XIP_KERNEL
+ help
+ This enables support for SiFive SoC platform hardware.
+
+diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
+index 0eb4568fbd290..0f17c6b6b7294 100644
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -13,7 +13,11 @@ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+- CC_FLAGS_FTRACE := -fpatchable-function-entry=8
++ifeq ($(CONFIG_RISCV_ISA_C),y)
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++else
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=2
++endif
+ endif
+
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
+@@ -39,6 +43,7 @@ else
+ endif
+
+ ifeq ($(CONFIG_LD_IS_LLD),y)
++ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 150000; echo $$?),0)
+ KBUILD_CFLAGS += -mno-relax
+ KBUILD_AFLAGS += -mno-relax
+ ifndef CONFIG_AS_IS_LLVM
+@@ -46,12 +51,21 @@ ifndef CONFIG_AS_IS_LLVM
+ KBUILD_AFLAGS += -Wa,-mno-relax
+ endif
+ endif
++endif
+
+ # ISA string setting
+ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
+ riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
+ riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
+ riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
++
++ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC
++KBUILD_CFLAGS += -Wa,-misa-spec=2.2
++KBUILD_AFLAGS += -Wa,-misa-spec=2.2
++else
++riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei
++endif
++
+ KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
+ KBUILD_AFLAGS += -march=$(riscv-march-y)
+
+@@ -68,7 +82,11 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
+ KBUILD_CFLAGS += -fno-omit-frame-pointer
+ endif
+
++# Avoid generating .eh_frame sections.
++KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
++
+ KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
++KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
+
+ # GCC versions that support the "-mstrict-align" option default to allowing
+ # unaligned accesses. While unaligned accesses are explicitly allowed in the
+@@ -108,11 +126,13 @@ PHONY += vdso_install
+ vdso_install:
+ $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+
++ifeq ($(KBUILD_EXTMOD),)
+ ifeq ($(CONFIG_MMU),y)
+ prepare: vdso_prepare
+ vdso_prepare: prepare0
+ $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
+ endif
++endif
+
+ ifneq ($(CONFIG_XIP_KERNEL),y)
+ ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
+diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
+index 5e8ca81424821..fa9162e3afa3f 100644
+--- a/arch/riscv/boot/dts/canaan/k210.dtsi
++++ b/arch/riscv/boot/dts/canaan/k210.dtsi
+@@ -65,6 +65,18 @@
+ compatible = "riscv,cpu-intc";
+ };
+ };
++
++ cpu-map {
++ cluster0 {
++ core0 {
++ cpu = <&cpu0>;
++ };
++
++ core1 {
++ cpu = <&cpu1>;
++ };
++ };
++ };
+ };
+
+ sram: memory@80000000 {
+@@ -113,7 +125,8 @@
+ compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
+ reg = <0xC000000 0x4000000>;
+ interrupt-controller;
+- interrupts-extended = <&cpu0_intc 11 &cpu1_intc 11>;
++ interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>,
++ <&cpu1_intc 11>, <&cpu1_intc 9>;
+ riscv,ndev = <65>;
+ };
+
+diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+index 0bcaf35045e79..82e7f8069ae77 100644
+--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
++++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+@@ -203,6 +203,8 @@
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
++ spi-tx-bus-width = <4>;
++ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+index ac8a03f5867ad..8d335233853a7 100644
+--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
++++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+@@ -205,6 +205,8 @@
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
++ spi-tx-bus-width = <4>;
++ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+index 623998194bc18..6703cfc055887 100644
+--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
++++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+@@ -213,6 +213,8 @@
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
++ spi-tx-bus-width = <4>;
++ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+index cf605ba0d67e4..ac0b56f7d2c9f 100644
+--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
++++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+@@ -178,6 +178,8 @@
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
++ spi-tx-bus-width = <4>;
++ spi-rx-bus-width = <4>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+index b254c60589a1c..cce5eca31f257 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+@@ -12,7 +12,7 @@
+ #address-cells = <2>;
+ #size-cells = <2>;
+ model = "Microchip PolarFire-SoC Icicle Kit";
+- compatible = "microchip,mpfs-icicle-kit";
++ compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
+
+ aliases {
+ ethernet0 = &emac1;
+@@ -56,8 +56,17 @@
+ status = "okay";
+ };
+
+-&sdcard {
++&mmc {
+ status = "okay";
++
++ bus-width = <4>;
++ disable-wp;
++ cap-sd-highspeed;
++ card-detect-delay = <200>;
++ sd-uhs-sdr12;
++ sd-uhs-sdr25;
++ sd-uhs-sdr50;
++ sd-uhs-sdr104;
+ };
+
+ &emac0 {
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+index 9d2fbbc1f7778..4ef4bcb748729 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+@@ -6,11 +6,8 @@
+ / {
+ #address-cells = <2>;
+ #size-cells = <2>;
+- model = "Microchip MPFS Icicle Kit";
+- compatible = "microchip,mpfs-icicle-kit";
+-
+- chosen {
+- };
++ model = "Microchip PolarFire SoC";
++ compatible = "microchip,mpfs";
+
+ cpus {
+ #address-cells = <1>;
+@@ -262,39 +259,14 @@
+ status = "disabled";
+ };
+
+- emmc: mmc@20008000 {
++ /* Common node entry for emmc/sd */
++ mmc: mmc@20008000 {
+ compatible = "cdns,sd4hc";
+ reg = <0x0 0x20008000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <88 89>;
+ pinctrl-names = "default";
+ clocks = <&clkcfg 6>;
+- bus-width = <4>;
+- cap-mmc-highspeed;
+- mmc-ddr-3_3v;
+- max-frequency = <200000000>;
+- non-removable;
+- no-sd;
+- no-sdio;
+- voltage-ranges = <3300 3300>;
+- status = "disabled";
+- };
+-
+- sdcard: sdhc@20008000 {
+- compatible = "cdns,sd4hc";
+- reg = <0x0 0x20008000 0x0 0x1000>;
+- interrupt-parent = <&plic>;
+- interrupts = <88>;
+- pinctrl-names = "default";
+- clocks = <&clkcfg 6>;
+- bus-width = <4>;
+- disable-wp;
+- cap-sd-highspeed;
+- card-detect-delay = <200>;
+- sd-uhs-sdr12;
+- sd-uhs-sdr25;
+- sd-uhs-sdr50;
+- sd-uhs-sdr104;
+ max-frequency = <200000000>;
+ status = "disabled";
+ };
+diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+index 7db8610534834..64c06c9b41dc8 100644
+--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
++++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+@@ -166,7 +166,7 @@
+ clocks = <&prci PRCI_CLK_TLCLK>;
+ status = "disabled";
+ };
+- dma: dma@3000000 {
++ dma: dma-controller@3000000 {
+ compatible = "sifive,fu540-c000-pdma";
+ reg = <0x0 0x3000000 0x0 0x8000>;
+ interrupt-parent = <&plic0>;
+diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
+index abbb960f90a00..f72bb158a7ab3 100644
+--- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
++++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
+@@ -134,6 +134,30 @@
+ interrupt-controller;
+ };
+ };
++
++ cpu-map {
++ cluster0 {
++ core0 {
++ cpu = <&cpu0>;
++ };
++
++ core1 {
++ cpu = <&cpu1>;
++ };
++
++ core2 {
++ cpu = <&cpu2>;
++ };
++
++ core3 {
++ cpu = <&cpu3>;
++ };
++
++ core4 {
++ cpu = <&cpu4>;
++ };
++ };
++ };
+ };
+ soc {
+ #address-cells = <2>;
+@@ -304,7 +328,7 @@
+ bus-range = <0x0 0xff>;
+ ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */
+ <0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */
+- <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */
++ <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x10000000>, /* mem */
+ <0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */
+ num-lanes = <0x8>;
+ interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>;
+diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+index 60846e88ae4b1..2f4d677c9c4ff 100644
+--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
++++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+@@ -3,6 +3,8 @@
+
+ #include "fu540-c000.dtsi"
+ #include <dt-bindings/gpio/gpio.h>
++#include <dt-bindings/leds/common.h>
++#include <dt-bindings/pwm/pwm.h>
+
+ /* Clock frequency (in Hz) of the PCB crystal for rtcclk */
+ #define RTCCLK_FREQ 1000000
+@@ -46,6 +48,42 @@
+ compatible = "gpio-restart";
+ gpios = <&gpio 10 GPIO_ACTIVE_LOW>;
+ };
++
++ led-controller {
++ compatible = "pwm-leds";
++
++ led-d1 {
++ pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>;
++ active-low;
++ color = <LED_COLOR_ID_GREEN>;
++ max-brightness = <255>;
++ label = "d1";
++ };
++
++ led-d2 {
++ pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>;
++ active-low;
++ color = <LED_COLOR_ID_GREEN>;
++ max-brightness = <255>;
++ label = "d2";
++ };
++
++ led-d3 {
++ pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>;
++ active-low;
++ color = <LED_COLOR_ID_GREEN>;
++ max-brightness = <255>;
++ label = "d3";
++ };
++
++ led-d4 {
++ pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>;
++ active-low;
++ color = <LED_COLOR_ID_GREEN>;
++ max-brightness = <255>;
++ label = "d4";
++ };
++ };
+ };
+
+ &uart0 {
+@@ -80,6 +118,7 @@
+ spi-max-frequency = <20000000>;
+ voltage-ranges = <3300 3300>;
+ disable-wp;
++ gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+index 2e4ea84f27e77..b40990210fb50 100644
+--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
++++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+@@ -2,6 +2,7 @@
+ /* Copyright (c) 2020 SiFive, Inc */
+
+ #include "fu740-c000.dtsi"
++#include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ /* Clock frequency (in Hz) of the PCB crystal for rtcclk */
+@@ -228,6 +229,7 @@
+ spi-max-frequency = <20000000>;
+ voltage-ranges = <3300 3300>;
+ disable-wp;
++ gpios = <&gpio 15 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
+index 4ebc80315f013..f2a2f9c9ed49c 100644
+--- a/arch/riscv/configs/defconfig
++++ b/arch/riscv/configs/defconfig
+@@ -72,9 +72,11 @@ CONFIG_GPIOLIB=y
+ CONFIG_GPIO_SIFIVE=y
+ # CONFIG_PTP_1588_CLOCK is not set
+ CONFIG_POWER_RESET=y
+-CONFIG_DRM=y
+-CONFIG_DRM_RADEON=y
+-CONFIG_DRM_VIRTIO_GPU=y
++CONFIG_DRM=m
++CONFIG_DRM_RADEON=m
++CONFIG_DRM_NOUVEAU=m
++CONFIG_DRM_VIRTIO_GPU=m
++CONFIG_FB=y
+ CONFIG_FRAMEBUFFER_CONSOLE=y
+ CONFIG_USB=y
+ CONFIG_USB_XHCI_HCD=y
+diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
+index b16a2a12c82a8..3b9f83221f9c2 100644
+--- a/arch/riscv/configs/nommu_k210_defconfig
++++ b/arch/riscv/configs/nommu_k210_defconfig
+@@ -29,8 +29,6 @@ CONFIG_EMBEDDED=y
+ CONFIG_SLOB=y
+ # CONFIG_MMU is not set
+ CONFIG_SOC_CANAAN=y
+-CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
+-CONFIG_MAXPHYSMEM_2GB=y
+ CONFIG_SMP=y
+ CONFIG_NR_CPUS=2
+ CONFIG_CMDLINE="earlycon console=ttySIF0"
+diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
+index 61f887f654199..15d1fd0a70184 100644
+--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
++++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
+@@ -21,11 +21,9 @@ CONFIG_EMBEDDED=y
+ CONFIG_SLOB=y
+ # CONFIG_MMU is not set
+ CONFIG_SOC_CANAAN=y
+-CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
+-CONFIG_MAXPHYSMEM_2GB=y
+ CONFIG_SMP=y
+ CONFIG_NR_CPUS=2
+-CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
++CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro"
+ CONFIG_CMDLINE_FORCE=y
+ # CONFIG_SECCOMP is not set
+ # CONFIG_STACKPROTECTOR is not set
+diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
+index e046a0babde43..f224be697785f 100644
+--- a/arch/riscv/configs/nommu_virt_defconfig
++++ b/arch/riscv/configs/nommu_virt_defconfig
+@@ -27,7 +27,6 @@ CONFIG_SLOB=y
+ # CONFIG_SLAB_MERGE_DEFAULT is not set
+ # CONFIG_MMU is not set
+ CONFIG_SOC_VIRT=y
+-CONFIG_MAXPHYSMEM_2GB=y
+ CONFIG_SMP=y
+ CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
+ CONFIG_CMDLINE_FORCE=y
+diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
+index 434ef5b645998..cdd113e7a2912 100644
+--- a/arch/riscv/configs/rv32_defconfig
++++ b/arch/riscv/configs/rv32_defconfig
+@@ -71,6 +71,7 @@ CONFIG_POWER_RESET=y
+ CONFIG_DRM=y
+ CONFIG_DRM_RADEON=y
+ CONFIG_DRM_VIRTIO_GPU=y
++CONFIG_FB=y
+ CONFIG_FRAMEBUFFER_CONSOLE=y
+ CONFIG_USB=y
+ CONFIG_USB_XHCI_HCD=y
+diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
+index 67406c3763890..0377ce0fcc726 100644
+--- a/arch/riscv/include/asm/alternative-macros.h
++++ b/arch/riscv/include/asm/alternative-macros.h
+@@ -23,9 +23,9 @@
+ 888 :
+ \new_c
+ 889 :
+- .previous
+ .org . - (889b - 888b) + (887b - 886b)
+ .org . - (887b - 886b) + (889b - 888b)
++ .previous
+ .endif
+ .endm
+
+@@ -60,9 +60,9 @@
+ "888 :\n" \
+ new_c "\n" \
+ "889 :\n" \
+- ".previous\n" \
+ ".org . - (887b - 886b) + (889b - 888b)\n" \
+ ".org . - (889b - 888b) + (887b - 886b)\n" \
++ ".previous\n" \
+ ".endif\n"
+
+ #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \
+diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
+index 618d7c5af1a2d..e15a1c9f1cf88 100644
+--- a/arch/riscv/include/asm/asm.h
++++ b/arch/riscv/include/asm/asm.h
+@@ -23,6 +23,7 @@
+ #define REG_L __REG_SEL(ld, lw)
+ #define REG_S __REG_SEL(sd, sw)
+ #define REG_SC __REG_SEL(sc.d, sc.w)
++#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq)
+ #define REG_ASM __REG_SEL(.dword, .word)
+ #define SZREG __REG_SEL(8, 4)
+ #define LGREG __REG_SEL(3, 2)
+diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
+index 49b398fe99f1b..1bb8662875dda 100644
+--- a/arch/riscv/include/asm/efi.h
++++ b/arch/riscv/include/asm/efi.h
+@@ -10,10 +10,10 @@
+ #include <asm/mmu_context.h>
+ #include <asm/ptrace.h>
+ #include <asm/tlbflush.h>
++#include <asm/pgalloc.h>
+
+ #ifdef CONFIG_EFI
+ extern void efi_init(void);
+-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+ #else
+ #define efi_init()
+ #endif
+@@ -21,7 +21,10 @@ extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+
+-#define arch_efi_call_virt_setup() efi_virtmap_load()
++#define arch_efi_call_virt_setup() ({ \
++ sync_kernel_mappings(efi_mm.pgd); \
++ efi_virtmap_load(); \
++ })
+ #define arch_efi_call_virt_teardown() efi_virtmap_unload()
+
+ #define arch_efi_call_virt(p, f, args...) p->f(args)
+diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
+index 54cbf07fb4e96..8839cd2b28d14 100644
+--- a/arch/riscv/include/asm/fixmap.h
++++ b/arch/riscv/include/asm/fixmap.h
+@@ -22,6 +22,14 @@
+ */
+ enum fixed_addresses {
+ FIX_HOLE,
++ /*
++ * The fdt fixmap mapping must be PMD aligned and will be mapped
++ * using PMD entries in fixmap_pmd in 64-bit and a PGD entry in 32-bit.
++ */
++ FIX_FDT_END,
++ FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
++
++ /* Below fixmaps will be mapped using fixmap_pte */
+ FIX_PTE,
+ FIX_PMD,
+ FIX_TEXT_POKE1,
+diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
+index 04dad33800418..d47d87c2d7e3d 100644
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -42,6 +42,14 @@ struct dyn_arch_ftrace {
+ * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+ * return address (original pc + 4)
+ *
++ *<ftrace enable>:
++ * 0: auipc t0/ra, 0x?
++ * 4: jalr t0/ra, ?(t0/ra)
++ *
++ *<ftrace disable>:
++ * 0: nop
++ * 4: nop
++ *
+ * Dynamic ftrace generates probes to call sites, so we must deal with
+ * both auipc and jalr at the same time.
+ */
+@@ -52,25 +60,43 @@ struct dyn_arch_ftrace {
+ #define AUIPC_OFFSET_MASK (0xfffff000)
+ #define AUIPC_PAD (0x00001000)
+ #define JALR_SHIFT 20
+-#define JALR_BASIC (0x000080e7)
+-#define AUIPC_BASIC (0x00000097)
++#define JALR_RA (0x000080e7)
++#define AUIPC_RA (0x00000097)
++#define JALR_T0 (0x000282e7)
++#define AUIPC_T0 (0x00000297)
+ #define NOP4 (0x00000013)
+
+-#define make_call(caller, callee, call) \
++#define to_jalr_t0(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
++
++#define to_auipc_t0(offset) \
++ ((offset & JALR_SIGN_MASK) ? \
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0))
++
++#define make_call_t0(caller, callee, call) \
+ do { \
+- call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
+- call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_t0(offset); \
++ call[1] = to_jalr_t0(offset); \
+ } while (0)
+
+-#define to_jalr_insn(offset) \
+- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
++#define to_jalr_ra(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
+
+-#define to_auipc_insn(offset) \
++#define to_auipc_ra(offset) \
+ ((offset & JALR_SIGN_MASK) ? \
+- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \
+- ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
++
++#define make_call_ra(caller, callee, call) \
++do { \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_ra(offset); \
++ call[1] = to_jalr_ra(offset); \
++} while (0)
+
+ /*
+ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+@@ -83,6 +109,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+ #define ftrace_init_nop ftrace_init_nop
+ #endif
+
+-#endif
++#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ #endif /* _ASM_RISCV_FTRACE_H */
+diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
+index a5c2ca1d1cd8b..ec19d6afc8965 100644
+--- a/arch/riscv/include/asm/hugetlb.h
++++ b/arch/riscv/include/asm/hugetlb.h
+@@ -5,4 +5,10 @@
+ #include <asm-generic/hugetlb.h>
+ #include <asm/page.h>
+
++static inline void arch_clear_hugepage_flags(struct page *page)
++{
++ clear_bit(PG_dcache_clean, &page->flags);
++}
++#define arch_clear_hugepage_flags arch_clear_hugepage_flags
++
+ #endif /* _ASM_RISCV_HUGETLB_H */
+diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
+index 69605a4742706..92080a2279372 100644
+--- a/arch/riscv/include/asm/io.h
++++ b/arch/riscv/include/asm/io.h
+@@ -101,9 +101,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr))
+ __io_reads_ins(ins, u8, b, __io_pbr(), __io_par(addr))
+ __io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr))
+ __io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr))
+-#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count)
+-#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count)
+-#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count)
++#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count)
++#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count)
++#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count)
+
+ __io_writes_outs(writes, u8, b, __io_bw(), __io_aw())
+ __io_writes_outs(writes, u16, w, __io_bw(), __io_aw())
+@@ -115,22 +115,22 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw())
+ __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw())
+ __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw())
+ __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
+-#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count)
+-#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count)
+-#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count)
++#define outsb(addr, buffer, count) __outsb(PCI_IOBASE + (addr), buffer, count)
++#define outsw(addr, buffer, count) __outsw(PCI_IOBASE + (addr), buffer, count)
++#define outsl(addr, buffer, count) __outsl(PCI_IOBASE + (addr), buffer, count)
+
+ #ifdef CONFIG_64BIT
+ __io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr))
+ #define readsq(addr, buffer, count) __readsq(addr, buffer, count)
+
+ __io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr))
+-#define insq(addr, buffer, count) __insq((void __iomem *)addr, buffer, count)
++#define insq(addr, buffer, count) __insq(PCI_IOBASE + (addr), buffer, count)
+
+ __io_writes_outs(writes, u64, q, __io_bw(), __io_aw())
+ #define writesq(addr, buffer, count) __writesq(addr, buffer, count)
+
+ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw())
+-#define outsq(addr, buffer, count) __outsq((void __iomem *)addr, buffer, count)
++#define outsq(addr, buffer, count) __outsq(PCI_IOBASE + (addr), buffer, count)
+ #endif
+
+ #include <asm-generic/io.h>
+diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h
+index d6c277992f76a..b53891964ae03 100644
+--- a/arch/riscv/include/asm/irq_work.h
++++ b/arch/riscv/include/asm/irq_work.h
+@@ -4,7 +4,7 @@
+
+ static inline bool arch_irq_work_has_interrupt(void)
+ {
+- return true;
++ return IS_ENABLED(CONFIG_SMP);
+ }
+ extern void arch_irq_work_raise(void);
+ #endif /* _ASM_RISCV_IRQ_WORK_H */
+diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h
+index 38af2ec7b9bf9..729991e8f7825 100644
+--- a/arch/riscv/include/asm/jump_label.h
++++ b/arch/riscv/include/asm/jump_label.h
+@@ -18,6 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key,
+ bool branch)
+ {
+ asm_volatile_goto(
++ " .align 2 \n\t"
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
+@@ -39,6 +40,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key,
+ bool branch)
+ {
+ asm_volatile_goto(
++ " .align 2 \n\t"
+ " .option push \n\t"
+ " .option norelax \n\t"
+ " .option norvc \n\t"
+diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h
+index aff6c33ab0c08..4c58ee7f95ecf 100644
+--- a/arch/riscv/include/asm/mmio.h
++++ b/arch/riscv/include/asm/mmio.h
+@@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
+ * Relaxed I/O memory access primitives. These follow the Device memory
+ * ordering rules but do not guarantee any ordering relative to Normal memory
+ * accesses. These are defined to order the indicated access (either a read or
+- * write) with all other I/O memory accesses. Since the platform specification
+- * defines that all I/O regions are strongly ordered on channel 2, no explicit
+- * fences are required to enforce this ordering.
++ * write) with all other I/O memory accesses to the same peripheral. Since the
++ * platform specification defines that all I/O regions are strongly ordered on
++ * channel 0, no explicit fences are required to enforce this ordering.
+ */
+ /* FIXME: These are now the same as asm-generic */
+ #define __io_rbr() do {} while (0)
+@@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
+ #endif
+
+ /*
+- * I/O memory access primitives. Reads are ordered relative to any
+- * following Normal memory access. Writes are ordered relative to any prior
+- * Normal memory access. The memory barriers here are necessary as RISC-V
++ * I/O memory access primitives. Reads are ordered relative to any following
++ * Normal memory read and delay() loop. Writes are ordered relative to any
++ * prior Normal memory write. The memory barriers here are necessary as RISC-V
+ * doesn't define any ordering between the memory space and the I/O space.
+ */
+ #define __io_br() do {} while (0)
+-#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory")
+-#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory")
++#define __io_ar(v) ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); })
++#define __io_bw() ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); })
+ #define __io_aw() mmiowb_set_pending()
+
+ #define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
+diff --git a/arch/riscv/include/asm/module.lds.h b/arch/riscv/include/asm/module.lds.h
+index 4254ff2ff0494..1075beae1ac64 100644
+--- a/arch/riscv/include/asm/module.lds.h
++++ b/arch/riscv/include/asm/module.lds.h
+@@ -2,8 +2,8 @@
+ /* Copyright (C) 2017 Andes Technology Corporation */
+ #ifdef CONFIG_MODULE_SECTIONS
+ SECTIONS {
+- .plt (NOLOAD) : { BYTE(0) }
+- .got (NOLOAD) : { BYTE(0) }
+- .got.plt (NOLOAD) : { BYTE(0) }
++ .plt : { BYTE(0) }
++ .got : { BYTE(0) }
++ .got.plt : { BYTE(0) }
+ }
+ #endif
+diff --git a/arch/riscv/include/asm/parse_asm.h b/arch/riscv/include/asm/parse_asm.h
+index f36368de839f5..3cd00332d70f5 100644
+--- a/arch/riscv/include/asm/parse_asm.h
++++ b/arch/riscv/include/asm/parse_asm.h
+@@ -3,6 +3,9 @@
+ * Copyright (C) 2020 SiFive
+ */
+
++#ifndef _ASM_RISCV_INSN_H
++#define _ASM_RISCV_INSN_H
++
+ #include <linux/bits.h>
+
+ /* The bit field of immediate value in I-type instruction */
+@@ -217,3 +220,5 @@ static inline bool is_ ## INSN_NAME ## _insn(long insn) \
+ (RVC_X(x_, RVC_B_IMM_5_OPOFF, RVC_B_IMM_5_MASK) << RVC_B_IMM_5_OFF) | \
+ (RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \
+ (RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); })
++
++#endif /* _ASM_RISCV_INSN_H */
+diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
+index 9a7d7346001ee..98d9de07cba17 100644
+--- a/arch/riscv/include/asm/patch.h
++++ b/arch/riscv/include/asm/patch.h
+@@ -9,4 +9,6 @@
+ int patch_text_nosync(void *addr, const void *insns, size_t len);
+ int patch_text(void *addr, u32 insn);
+
++extern int riscv_patch_in_stop_machine;
++
+ #endif /* _ASM_RISCV_PATCH_H */
+diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
+index 0af6933a7100d..98e0403324823 100644
+--- a/arch/riscv/include/asm/pgalloc.h
++++ b/arch/riscv/include/asm/pgalloc.h
+@@ -38,6 +38,13 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ }
+ #endif /* __PAGETABLE_PMD_FOLDED */
+
++static inline void sync_kernel_mappings(pgd_t *pgd)
++{
++ memcpy(pgd + USER_PTRS_PER_PGD,
++ init_mm.pgd + USER_PTRS_PER_PGD,
++ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
++}
++
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ pgd_t *pgd;
+@@ -46,9 +53,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ if (likely(pgd != NULL)) {
+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+ /* Copy kernel mappings */
+- memcpy(pgd + USER_PTRS_PER_PGD,
+- init_mm.pgd + USER_PTRS_PER_PGD,
+- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
++ sync_kernel_mappings(pgd);
+ }
+ return pgd;
+ }
+diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
+index 39b550310ec64..397cb945b16eb 100644
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -66,9 +66,13 @@
+
+ #define FIXADDR_TOP PCI_IO_START
+ #ifdef CONFIG_64BIT
+-#define FIXADDR_SIZE PMD_SIZE
++#define MAX_FDT_SIZE PMD_SIZE
++#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M)
++#define FIXADDR_SIZE (PMD_SIZE + FIX_FDT_SIZE)
+ #else
+-#define FIXADDR_SIZE PGDIR_SIZE
++#define MAX_FDT_SIZE PGDIR_SIZE
++#define FIX_FDT_SIZE MAX_FDT_SIZE
++#define FIXADDR_SIZE (PGDIR_SIZE + FIX_FDT_SIZE)
+ #endif
+ #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
+index a7d2811f35365..62d0e6e61da83 100644
+--- a/arch/riscv/include/asm/smp.h
++++ b/arch/riscv/include/asm/smp.h
+@@ -43,7 +43,6 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
+ void arch_send_call_function_single_ipi(int cpu);
+
+ int riscv_hartid_to_cpuid(int hartid);
+-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
+
+ /* Set custom IPI operations */
+ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
+@@ -85,13 +84,6 @@ static inline unsigned long cpuid_to_hartid_map(int cpu)
+ return boot_cpu_hartid;
+ }
+
+-static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
+- struct cpumask *out)
+-{
+- cpumask_clear(out);
+- cpumask_set_cpu(boot_cpu_hartid, out);
+-}
+-
+ static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
+ {
+ }
+@@ -102,6 +94,8 @@ static inline void riscv_clear_ipi(void)
+
+ #endif /* CONFIG_SMP */
+
++void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
++
+ #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP)
+ bool cpu_has_hotplug(unsigned int cpu);
+ #else
+diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
+index 60da0dcacf145..42d97043e5376 100644
+--- a/arch/riscv/include/asm/thread_info.h
++++ b/arch/riscv/include/asm/thread_info.h
+@@ -11,11 +11,17 @@
+ #include <asm/page.h>
+ #include <linux/const.h>
+
++#ifdef CONFIG_KASAN
++#define KASAN_STACK_ORDER 1
++#else
++#define KASAN_STACK_ORDER 0
++#endif
++
+ /* thread information allocation */
+ #ifdef CONFIG_64BIT
+-#define THREAD_SIZE_ORDER (2)
++#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
+ #else
+-#define THREAD_SIZE_ORDER (1)
++#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER)
+ #endif
+ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+
+@@ -36,6 +42,9 @@
+
+ #ifndef __ASSEMBLY__
+
++extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
++extern unsigned long spin_shadow_stack;
++
+ #include <asm/processor.h>
+ #include <asm/csr.h>
+
+diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
+index 507cae273bc62..d6a7428f6248d 100644
+--- a/arch/riscv/include/asm/timex.h
++++ b/arch/riscv/include/asm/timex.h
+@@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void)
+ static inline unsigned long random_get_entropy(void)
+ {
+ if (unlikely(clint_time_val == NULL))
+- return 0;
++ return random_get_entropy_fallback();
+ return get_cycles();
+ }
+ #define random_get_entropy() random_get_entropy()
+diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
+index 801019381dea3..a09196f8de688 100644
+--- a/arch/riscv/include/asm/tlbflush.h
++++ b/arch/riscv/include/asm/tlbflush.h
+@@ -12,6 +12,8 @@
+ #include <asm/errata_list.h>
+
+ #ifdef CONFIG_MMU
++extern unsigned long asid_mask;
++
+ static inline void local_flush_tlb_all(void)
+ {
+ __asm__ __volatile__ ("sfence.vma" : : : "memory");
+diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
+index f314ff44c48d1..d4d628af21a45 100644
+--- a/arch/riscv/include/asm/uaccess.h
++++ b/arch/riscv/include/asm/uaccess.h
+@@ -216,7 +216,7 @@ do { \
+ might_fault(); \
+ access_ok(__p, sizeof(*__p)) ? \
+ __get_user((x), __p) : \
+- ((x) = 0, -EFAULT); \
++ ((x) = (__force __typeof__(x))0, -EFAULT); \
+ })
+
+ #define __put_user_asm(insn, x, ptr, err) \
+diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
+index 6c316093a1e59..977ee6181dabf 100644
+--- a/arch/riscv/include/asm/unistd.h
++++ b/arch/riscv/include/asm/unistd.h
+@@ -9,7 +9,6 @@
+ */
+
+ #define __ARCH_WANT_SYS_CLONE
+-#define __ARCH_WANT_MEMFD_SECRET
+
+ #include <uapi/asm/unistd.h>
+
+diff --git a/arch/riscv/include/uapi/asm/setup.h b/arch/riscv/include/uapi/asm/setup.h
+new file mode 100644
+index 0000000000000..66b13a5228808
+--- /dev/null
++++ b/arch/riscv/include/uapi/asm/setup.h
+@@ -0,0 +1,8 @@
++/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
++
++#ifndef _UAPI_ASM_RISCV_SETUP_H
++#define _UAPI_ASM_RISCV_SETUP_H
++
++#define COMMAND_LINE_SIZE 1024
++
++#endif /* _UAPI_ASM_RISCV_SETUP_H */
+diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
+index 8062996c2dfd0..d95fbf5846b0b 100644
+--- a/arch/riscv/include/uapi/asm/unistd.h
++++ b/arch/riscv/include/uapi/asm/unistd.h
+@@ -21,6 +21,7 @@
+ #endif /* __LP64__ */
+
+ #define __ARCH_WANT_SYS_CLONE3
++#define __ARCH_WANT_MEMFD_SECRET
+
+ #include <asm-generic/unistd.h>
+
+diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
+index 3397ddac1a30c..16308ef1e5787 100644
+--- a/arch/riscv/kernel/Makefile
++++ b/arch/riscv/kernel/Makefile
+@@ -50,6 +50,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
+ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
+ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
+
++obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
++
+ obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o
+ obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
+ obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
+diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
+index df84e0c13db18..66ddfba1cfbef 100644
+--- a/arch/riscv/kernel/cpu-hotplug.c
++++ b/arch/riscv/kernel/cpu-hotplug.c
+@@ -12,6 +12,7 @@
+ #include <linux/sched/hotplug.h>
+ #include <asm/irq.h>
+ #include <asm/cpu_ops.h>
++#include <asm/numa.h>
+ #include <asm/sbi.h>
+
+ void cpu_stop(void);
+@@ -46,6 +47,7 @@ int __cpu_disable(void)
+ return ret;
+
+ remove_cpu_topology(cpu);
++ numa_remove_cpu(cpu);
+ set_cpu_online(cpu, false);
+ irq_migrate_all_off_this_cpu();
+
+diff --git a/arch/riscv/kernel/crash_save_regs.S b/arch/riscv/kernel/crash_save_regs.S
+index 7832fb763abac..b2a1908c0463e 100644
+--- a/arch/riscv/kernel/crash_save_regs.S
++++ b/arch/riscv/kernel/crash_save_regs.S
+@@ -44,7 +44,7 @@ SYM_CODE_START(riscv_crash_save_regs)
+ REG_S t6, PT_T6(a0) /* x31 */
+
+ csrr t1, CSR_STATUS
+- csrr t2, CSR_EPC
++ auipc t2, 0x0
+ csrr t3, CSR_TVAL
+ csrr t4, CSR_CAUSE
+
+diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c
+index 0241592982314..1aa540350abd3 100644
+--- a/arch/riscv/kernel/efi.c
++++ b/arch/riscv/kernel/efi.c
+@@ -65,7 +65,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
+
+ if (md->attribute & EFI_MEMORY_RO) {
+ val = pte_val(pte) & ~_PAGE_WRITE;
+- val = pte_val(pte) | _PAGE_READ;
++ val |= _PAGE_READ;
+ pte = __pte(val);
+ }
+ if (md->attribute & EFI_MEMORY_XP) {
+diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
+index 98f502654edd3..5ca2860cc06cd 100644
+--- a/arch/riscv/kernel/entry.S
++++ b/arch/riscv/kernel/entry.S
+@@ -108,7 +108,7 @@ _save_context:
+ .option pop
+
+ #ifdef CONFIG_TRACE_IRQFLAGS
+- call trace_hardirqs_off
++ call __trace_hardirqs_off
+ #endif
+
+ #ifdef CONFIG_CONTEXT_TRACKING
+@@ -144,7 +144,7 @@ skip_context_tracking:
+ li t0, EXC_BREAKPOINT
+ beq s4, t0, 1f
+ #ifdef CONFIG_TRACE_IRQFLAGS
+- call trace_hardirqs_on
++ call __trace_hardirqs_on
+ #endif
+ csrs CSR_STATUS, SR_IE
+
+@@ -235,7 +235,7 @@ ret_from_exception:
+ REG_L s0, PT_STATUS(sp)
+ csrc CSR_STATUS, SR_IE
+ #ifdef CONFIG_TRACE_IRQFLAGS
+- call trace_hardirqs_off
++ call __trace_hardirqs_off
+ #endif
+ #ifdef CONFIG_RISCV_M_MODE
+ /* the MPP value is too large to be used as an immediate arg for addi */
+@@ -271,10 +271,10 @@ restore_all:
+ REG_L s1, PT_STATUS(sp)
+ andi t0, s1, SR_PIE
+ beqz t0, 1f
+- call trace_hardirqs_on
++ call __trace_hardirqs_on
+ j 2f
+ 1:
+- call trace_hardirqs_off
++ call __trace_hardirqs_off
+ 2:
+ #endif
+ REG_L a0, PT_STATUS(sp)
+@@ -387,6 +387,19 @@ handle_syscall_trace_exit:
+
+ #ifdef CONFIG_VMAP_STACK
+ handle_kernel_stack_overflow:
++ /*
++ * Takes the psuedo-spinlock for the shadow stack, in case multiple
++ * harts are concurrently overflowing their kernel stacks. We could
++ * store any value here, but since we're overflowing the kernel stack
++ * already we only have SP to use as a scratch register. So we just
++ * swap in the address of the spinlock, as that's definately non-zero.
++ *
++ * Pairs with a store_release in handle_bad_stack().
++ */
++1: la sp, spin_shadow_stack
++ REG_AMOSWAP_AQ sp, sp, (sp)
++ bnez sp, 1b
++
+ la sp, shadow_stack
+ addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
+
+diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
+index 7f1e5203de886..1bf92cfa6764e 100644
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -15,11 +15,21 @@
+ int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+ {
+ mutex_lock(&text_mutex);
++
++ /*
++ * The code sequences we use for ftrace can't be patched while the
++ * kernel is running, so we need to use stop_machine() to modify them
++ * for now. This doesn't play nice with text_mutex, we use this flag
++ * to elide the check.
++ */
++ riscv_patch_in_stop_machine = true;
++
+ return 0;
+ }
+
+ int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+ {
++ riscv_patch_in_stop_machine = false;
+ mutex_unlock(&text_mutex);
+ return 0;
+ }
+@@ -57,12 +67,15 @@ static int ftrace_check_current_call(unsigned long hook_pos,
+ }
+
+ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+- bool enable)
++ bool enable, bool ra)
+ {
+ unsigned int call[2];
+ unsigned int nops[2] = {NOP4, NOP4};
+
+- make_call(hook_pos, target, call);
++ if (ra)
++ make_call_ra(hook_pos, target, call);
++ else
++ make_call_t0(hook_pos, target, call);
+
+ /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
+ if (patch_text_nosync
+@@ -72,42 +85,13 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+ return 0;
+ }
+
+-/*
+- * Put 5 instructions with 16 bytes at the front of function within
+- * patchable function entry nops' area.
+- *
+- * 0: REG_S ra, -SZREG(sp)
+- * 1: auipc ra, 0x?
+- * 2: jalr -?(ra)
+- * 3: REG_L ra, -SZREG(sp)
+- *
+- * So the opcodes is:
+- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+- * 1: 0x???????? -> auipc
+- * 2: 0x???????? -> jalr
+- * 3: 0xff813083 (ld)/0xffc12083 (lw)
+- */
+-#if __riscv_xlen == 64
+-#define INSN0 0xfe113c23
+-#define INSN3 0xff813083
+-#elif __riscv_xlen == 32
+-#define INSN0 0xfe112e23
+-#define INSN3 0xffc12083
+-#endif
+-
+-#define FUNC_ENTRY_SIZE 16
+-#define FUNC_ENTRY_JMP 4
+-
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+- unsigned int call[4] = {INSN0, 0, 0, INSN3};
+- unsigned long target = addr;
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned int call[2];
+
+- call[1] = to_auipc_insn((unsigned int)(target - caller));
+- call[2] = to_jalr_insn((unsigned int)(target - caller));
++ make_call_t0(rec->ip, addr, call);
+
+- if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+@@ -116,15 +100,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+ {
+- unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
++ unsigned int nops[2] = {NOP4, NOP4};
+
+- if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+ }
+
+-
+ /*
+ * This is called early on, and isn't wrapped by
+ * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold
+@@ -136,9 +119,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+ {
+ int out;
+
+- ftrace_arch_code_modify_prepare();
++ mutex_lock(&text_mutex);
+ out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
+- ftrace_arch_code_modify_post_process();
++ mutex_unlock(&text_mutex);
+
+ return out;
+ }
+@@ -146,10 +129,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+ int ftrace_update_ftrace_func(ftrace_func_t func)
+ {
+ int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ if (!ret) {
+ ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ }
+
+ return ret;
+@@ -166,16 +149,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+ {
+ unsigned int call[2];
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned long caller = rec->ip;
+ int ret;
+
+- make_call(caller, old_addr, call);
++ make_call_t0(caller, old_addr, call);
+ ret = ftrace_check_current_call(caller, call);
+
+ if (ret)
+ return ret;
+
+- return __ftrace_modify_call(caller, addr, true);
++ return __ftrace_modify_call(caller, addr, true, false);
+ }
+ #endif
+
+@@ -210,12 +193,12 @@ int ftrace_enable_ftrace_graph_caller(void)
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ }
+
+ int ftrace_disable_ftrace_graph_caller(void)
+@@ -223,12 +206,12 @@ int ftrace_disable_ftrace_graph_caller(void)
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
+index 52c5ff9804c55..4c3c7592b6fc8 100644
+--- a/arch/riscv/kernel/head.S
++++ b/arch/riscv/kernel/head.S
+@@ -301,6 +301,7 @@ clear_bss_done:
+ REG_S a0, (a2)
+
+ /* Initialize page tables and relocate to virtual addresses */
++ la tp, init_task
+ la sp, init_thread_union + THREAD_SIZE
+ XIP_FIXUP_OFFSET sp
+ #ifdef CONFIG_BUILTIN_DTB
+diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
+index a80b52a74f58c..059c5e216ae75 100644
+--- a/arch/riscv/kernel/kexec_relocate.S
++++ b/arch/riscv/kernel/kexec_relocate.S
+@@ -159,25 +159,15 @@ SYM_CODE_START(riscv_kexec_norelocate)
+ * s0: (const) Phys address to jump to
+ * s1: (const) Phys address of the FDT image
+ * s2: (const) The hartid of the current hart
+- * s3: (const) kernel_map.va_pa_offset, used when switching MMU off
+ */
+ mv s0, a1
+ mv s1, a2
+ mv s2, a3
+- mv s3, a4
+
+ /* Disable / cleanup interrupts */
+ csrw CSR_SIE, zero
+ csrw CSR_SIP, zero
+
+- /* Switch to physical addressing */
+- la s4, 1f
+- sub s4, s4, s3
+- csrw CSR_STVEC, s4
+- csrw CSR_SATP, zero
+-
+-.align 2
+-1:
+ /* Pass the arguments to the next kernel / Cleanup*/
+ mv a0, s2
+ mv a1, s1
+@@ -214,7 +204,15 @@ SYM_CODE_START(riscv_kexec_norelocate)
+ csrw CSR_SCAUSE, zero
+ csrw CSR_SSCRATCH, zero
+
+- jalr zero, a2, 0
++ /*
++ * Switch to physical addressing
++ * This will also trigger a jump to CSR_STVEC
++ * which in this case is the address of the new
++ * kernel.
++ */
++ csrw CSR_STVEC, a2
++ csrw CSR_SATP, zero
++
+ SYM_CODE_END(riscv_kexec_norelocate)
+
+ .section ".rodata"
+diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
+index e6eca271a4d60..db41c676e5a26 100644
+--- a/arch/riscv/kernel/machine_kexec.c
++++ b/arch/riscv/kernel/machine_kexec.c
+@@ -15,6 +15,8 @@
+ #include <linux/compiler.h> /* For unreachable() */
+ #include <linux/cpu.h> /* For cpu_down() */
+ #include <linux/reboot.h>
++#include <linux/interrupt.h>
++#include <linux/irq.h>
+
+ /*
+ * kexec_image_info - Print received image details
+@@ -65,7 +67,9 @@ machine_kexec_prepare(struct kimage *image)
+ if (image->segment[i].memsz <= sizeof(fdt))
+ continue;
+
+- if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
++ if (image->file_mode)
++ memcpy(&fdt, image->segment[i].buf, sizeof(fdt));
++ else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
+ continue;
+
+ if (fdt_check_header(&fdt))
+@@ -136,19 +140,70 @@ void machine_shutdown(void)
+ #endif
+ }
+
++/* Override the weak function in kernel/panic.c */
++void crash_smp_send_stop(void)
++{
++ static int cpus_stopped;
++
++ /*
++ * This function can be called twice in panic path, but obviously
++ * we execute this only once.
++ */
++ if (cpus_stopped)
++ return;
++
++ smp_send_stop();
++ cpus_stopped = 1;
++}
++
++static void machine_kexec_mask_interrupts(void)
++{
++ unsigned int i;
++ struct irq_desc *desc;
++
++ for_each_irq_desc(i, desc) {
++ struct irq_chip *chip;
++ int ret;
++
++ chip = irq_desc_get_chip(desc);
++ if (!chip)
++ continue;
++
++ /*
++ * First try to remove the active state. If this
++ * fails, try to EOI the interrupt.
++ */
++ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
++
++ if (ret && irqd_irq_inprogress(&desc->irq_data) &&
++ chip->irq_eoi)
++ chip->irq_eoi(&desc->irq_data);
++
++ if (chip->irq_mask)
++ chip->irq_mask(&desc->irq_data);
++
++ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
++ chip->irq_disable(&desc->irq_data);
++ }
++}
++
+ /*
+ * machine_crash_shutdown - Prepare to kexec after a kernel crash
+ *
+ * This function is called by crash_kexec just before machine_kexec
+- * below and its goal is similar to machine_shutdown, but in case of
+- * a kernel crash. Since we don't handle such cases yet, this function
+- * is empty.
++ * and its goal is to shutdown non-crashing cpus and save registers.
+ */
+ void
+ machine_crash_shutdown(struct pt_regs *regs)
+ {
++ local_irq_disable();
++
++ /* shutdown non-crashing cpus */
++ crash_smp_send_stop();
++
+ crash_save_cpu(regs, smp_processor_id());
+- machine_shutdown();
++ machine_kexec_mask_interrupts();
++
+ pr_info("Starting crashdump kernel...\n");
+ }
+
+@@ -169,7 +224,8 @@ machine_kexec(struct kimage *image)
+ struct kimage_arch *internal = &image->arch;
+ unsigned long jump_addr = (unsigned long) image->start;
+ unsigned long first_ind_entry = (unsigned long) &image->head;
+- unsigned long this_hart_id = raw_smp_processor_id();
++ unsigned long this_cpu_id = __smp_processor_id();
++ unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
+ unsigned long fdt_addr = internal->fdt_addr;
+ void *control_code_buffer = page_address(image->control_code_page);
+ riscv_kexec_method kexec_method = NULL;
+diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
+index d171eca623b6f..125de818d1bab 100644
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,8 +13,8 @@
+
+ .text
+
+-#define FENTRY_RA_OFFSET 12
+-#define ABI_SIZE_ON_STACK 72
++#define FENTRY_RA_OFFSET 8
++#define ABI_SIZE_ON_STACK 80
+ #define ABI_A0 0
+ #define ABI_A1 8
+ #define ABI_A2 16
+@@ -23,10 +23,10 @@
+ #define ABI_A5 40
+ #define ABI_A6 48
+ #define ABI_A7 56
+-#define ABI_RA 64
++#define ABI_T0 64
++#define ABI_RA 72
+
+ .macro SAVE_ABI
+- addi sp, sp, -SZREG
+ addi sp, sp, -ABI_SIZE_ON_STACK
+
+ REG_S a0, ABI_A0(sp)
+@@ -37,6 +37,7 @@
+ REG_S a5, ABI_A5(sp)
+ REG_S a6, ABI_A6(sp)
+ REG_S a7, ABI_A7(sp)
++ REG_S t0, ABI_T0(sp)
+ REG_S ra, ABI_RA(sp)
+ .endm
+
+@@ -49,24 +50,18 @@
+ REG_L a5, ABI_A5(sp)
+ REG_L a6, ABI_A6(sp)
+ REG_L a7, ABI_A7(sp)
++ REG_L t0, ABI_T0(sp)
+ REG_L ra, ABI_RA(sp)
+
+ addi sp, sp, ABI_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ .macro SAVE_ALL
+- addi sp, sp, -SZREG
+ addi sp, sp, -PT_SIZE_ON_STACK
+
+- REG_S x1, PT_EPC(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_L x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
++ REG_S t0, PT_EPC(sp)
+ REG_S x1, PT_RA(sp)
+- REG_L x1, PT_EPC(sp)
+-
+ REG_S x2, PT_SP(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+@@ -100,15 +95,11 @@
+ .endm
+
+ .macro RESTORE_ALL
++ REG_L t0, PT_EPC(sp)
+ REG_L x1, PT_RA(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_S x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
+- REG_L x1, PT_EPC(sp)
+ REG_L x2, PT_SP(sp)
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+- REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+@@ -137,17 +128,16 @@
+ REG_L x31, PT_T6(sp)
+
+ addi sp, sp, PT_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ ENTRY(ftrace_caller)
+ SAVE_ABI
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, ABI_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_call:
+@@ -155,8 +145,8 @@ ftrace_call:
+ call ftrace_stub
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- addi a0, sp, ABI_SIZE_ON_STACK
+- REG_L a1, ABI_RA(sp)
++ addi a0, sp, ABI_RA
++ REG_L a1, ABI_T0(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+ #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
+@@ -166,17 +156,17 @@ ftrace_graph_call:
+ call ftrace_stub
+ #endif
+ RESTORE_ABI
+- ret
++ jr t0
+ ENDPROC(ftrace_caller)
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ENTRY(ftrace_regs_caller)
+ SAVE_ALL
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, PT_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_regs_call:
+@@ -196,6 +186,6 @@ ftrace_graph_regs_call:
+ #endif
+
+ RESTORE_ALL
+- ret
++ jr t0
+ ENDPROC(ftrace_regs_caller)
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
+index 68a9e3d1fe16a..4a48287513c37 100644
+--- a/arch/riscv/kernel/module.c
++++ b/arch/riscv/kernel/module.c
+@@ -13,6 +13,19 @@
+ #include <linux/pgtable.h>
+ #include <asm/sections.h>
+
++/*
++ * The auipc+jalr instruction pair can reach any PC-relative offset
++ * in the range [-2^31 - 2^11, 2^31 - 2^11)
++ */
++static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
++{
++#ifdef CONFIG_32BIT
++ return true;
++#else
++ return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11));
++#endif
++}
++
+ static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
+ {
+ if (v != (u32)v) {
+@@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
+ ptrdiff_t offset = (void *)v - (void *)location;
+ s32 hi20;
+
+- if (offset != (s32)offset) {
++ if (!riscv_insn_valid_32bit_offset(offset)) {
+ pr_err(
+ "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
+ me->name, (long long)v, location);
+@@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
+ Elf_Addr v)
+ {
+ ptrdiff_t offset = (void *)v - (void *)location;
+- s32 fill_v = offset;
+ u32 hi20, lo12;
+
+- if (offset != fill_v) {
++ if (!riscv_insn_valid_32bit_offset(offset)) {
+ /* Only emit the plt entry if offset over 32-bit range */
+ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
+ offset = module_emit_plt_entry(me, v);
+@@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
+ Elf_Addr v)
+ {
+ ptrdiff_t offset = (void *)v - (void *)location;
+- s32 fill_v = offset;
+ u32 hi20, lo12;
+
+- if (offset != fill_v) {
++ if (!riscv_insn_valid_32bit_offset(offset)) {
+ pr_err(
+ "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
+ me->name, (long long)v, location);
+diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
+index 0b552873a5778..e099961453cca 100644
+--- a/arch/riscv/kernel/patch.c
++++ b/arch/riscv/kernel/patch.c
+@@ -11,6 +11,7 @@
+ #include <asm/kprobes.h>
+ #include <asm/cacheflush.h>
+ #include <asm/fixmap.h>
++#include <asm/ftrace.h>
+ #include <asm/patch.h>
+
+ struct patch_insn {
+@@ -19,6 +20,8 @@ struct patch_insn {
+ atomic_t cpu_count;
+ };
+
++int riscv_patch_in_stop_machine = false;
++
+ #ifdef CONFIG_MMU
+ /*
+ * The fix_to_virt(, idx) needs a const value (not a dynamic variable of
+@@ -59,8 +62,15 @@ static int patch_insn_write(void *addr, const void *insn, size_t len)
+ * Before reaching here, it was expected to lock the text_mutex
+ * already, so we don't need to give another lock here and could
+ * ensure that it was safe between each cores.
++ *
++ * We're currently using stop_machine() for ftrace & kprobes, and while
++ * that ensures text_mutex is held before installing the mappings it
++ * does not ensure text_mutex is held by the calling thread. That's
++ * safe but triggers a lockdep failure, so just elide it for that
++ * specific case.
+ */
+- lockdep_assert_held(&text_mutex);
++ if (!riscv_patch_in_stop_machine)
++ lockdep_assert_held(&text_mutex);
+
+ if (across_pages)
+ patch_map(addr + len, FIX_TEXT_POKE1);
+@@ -104,7 +114,7 @@ static int patch_text_cb(void *data)
+ struct patch_insn *patch = data;
+ int ret = 0;
+
+- if (atomic_inc_return(&patch->cpu_count) == 1) {
++ if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
+ ret =
+ patch_text_nosync(patch->addr, &patch->insn,
+ GET_INSN_LENGTH(patch->insn));
+@@ -121,13 +131,25 @@ NOKPROBE_SYMBOL(patch_text_cb);
+
+ int patch_text(void *addr, u32 insn)
+ {
++ int ret;
+ struct patch_insn patch = {
+ .addr = addr,
+ .insn = insn,
+ .cpu_count = ATOMIC_INIT(0),
+ };
+
+- return stop_machine_cpuslocked(patch_text_cb,
+- &patch, cpu_online_mask);
++ /*
++ * kprobes takes text_mutex, before calling patch_text(), but as we call
++ * calls stop_machine(), the lockdep assertion in patch_insn_write()
++ * gets confused by the context in which the lock is taken.
++ * Instead, ensure the lock is held before calling stop_machine(), and
++ * set riscv_patch_in_stop_machine to skip the check in
++ * patch_insn_write().
++ */
++ lockdep_assert_held(&text_mutex);
++ riscv_patch_in_stop_machine = true;
++ ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask);
++ riscv_patch_in_stop_machine = false;
++ return ret;
+ }
+ NOKPROBE_SYMBOL(patch_text);
+diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
+index 0bb1854dce833..357f985041cb9 100644
+--- a/arch/riscv/kernel/perf_callchain.c
++++ b/arch/riscv/kernel/perf_callchain.c
+@@ -15,8 +15,8 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
+ {
+ struct stackframe buftail;
+ unsigned long ra = 0;
+- unsigned long *user_frame_tail =
+- (unsigned long *)(fp - sizeof(struct stackframe));
++ unsigned long __user *user_frame_tail =
++ (unsigned long __user *)(fp - sizeof(struct stackframe));
+
+ /* Check accessibility of one struct frame_tail beyond */
+ if (!access_ok(user_frame_tail, sizeof(buftail)))
+@@ -56,10 +56,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
+ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ unsigned long fp = 0;
+
+ /* RISC-V does not support perf in guest mode. */
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
++ if (guest_cbs && guest_cbs->is_in_guest())
+ return;
+
+ fp = regs->s0;
+@@ -72,14 +73,16 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+
+ static bool fill_callchain(void *entry, unsigned long pc)
+ {
+- return perf_callchain_store(entry, pc);
++ return perf_callchain_store(entry, pc) == 0;
+ }
+
+ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
++
+ /* RISC-V does not support perf in guest mode. */
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ pr_warn("RISC-V does not support perf in guest mode!");
+ return;
+ }
+diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
+index 00088dc6da4b6..7548b1d62509c 100644
+--- a/arch/riscv/kernel/probes/kprobes.c
++++ b/arch/riscv/kernel/probes/kprobes.c
+@@ -1,5 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0+
+
++#define pr_fmt(fmt) "kprobes: " fmt
++
+ #include <linux/kprobes.h>
+ #include <linux/extable.h>
+ #include <linux/slab.h>
+@@ -46,18 +48,35 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+ post_kprobe_handler(p, kcb, regs);
+ }
+
+-int __kprobes arch_prepare_kprobe(struct kprobe *p)
++static bool __kprobes arch_check_kprobe(struct kprobe *p)
+ {
+- unsigned long probe_addr = (unsigned long)p->addr;
++ unsigned long tmp = (unsigned long)p->addr - p->offset;
++ unsigned long addr = (unsigned long)p->addr;
+
+- if (probe_addr & 0x1) {
+- pr_warn("Address not aligned.\n");
++ while (tmp <= addr) {
++ if (tmp == addr)
++ return true;
+
+- return -EINVAL;
++ tmp += GET_INSN_LENGTH(*(u16 *)tmp);
+ }
+
++ return false;
++}
++
++int __kprobes arch_prepare_kprobe(struct kprobe *p)
++{
++ u16 *insn = (u16 *)p->addr;
++
++ if ((unsigned long)insn & 0x1)
++ return -EILSEQ;
++
++ if (!arch_check_kprobe(p))
++ return -EILSEQ;
++
+ /* copy instruction */
+- p->opcode = *p->addr;
++ p->opcode = (kprobe_opcode_t)(*insn++);
++ if (GET_INSN_LENGTH(p->opcode) == 4)
++ p->opcode |= (kprobe_opcode_t)(*insn) << 16;
+
+ /* decode instruction */
+ switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) {
+@@ -191,7 +210,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+- pr_warn("Unrecoverable kprobe detected.\n");
++ pr_warn("Failed to recover from reentered kprobes.\n");
+ dump_kprobe(p);
+ BUG();
+ break;
+diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
+index d73e96f6ed7c5..a20568bd1f1a8 100644
+--- a/arch/riscv/kernel/probes/simulate-insn.c
++++ b/arch/riscv/kernel/probes/simulate-insn.c
+@@ -71,11 +71,11 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg
+ u32 rd_index = (opcode >> 7) & 0x1f;
+ u32 rs1_index = (opcode >> 15) & 0x1f;
+
+- ret = rv_insn_reg_set_val(regs, rd_index, addr + 4);
++ ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr);
+ if (!ret)
+ return ret;
+
+- ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr);
++ ret = rv_insn_reg_set_val(regs, rd_index, addr + 4);
+ if (!ret)
+ return ret;
+
+diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
+index cb6ff7dccb92e..de8474146a9b6 100644
+--- a/arch/riscv/kernel/probes/simulate-insn.h
++++ b/arch/riscv/kernel/probes/simulate-insn.h
+@@ -31,9 +31,9 @@ __RISCV_INSN_FUNCS(fence, 0x7f, 0x0f);
+ } while (0)
+
+ __RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001);
+-__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002);
++__RISCV_INSN_FUNCS(c_jr, 0xf07f, 0x8002);
+ __RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001);
+-__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002);
++__RISCV_INSN_FUNCS(c_jalr, 0xf07f, 0x9002);
+ __RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001);
+ __RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001);
+ __RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002);
+diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
+index 7a057b5f0adc7..194f166b2cc40 100644
+--- a/arch/riscv/kernel/probes/uprobes.c
++++ b/arch/riscv/kernel/probes/uprobes.c
+@@ -59,8 +59,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+
+ instruction_pointer_set(regs, utask->xol_vaddr);
+
+- regs->status &= ~SR_SPIE;
+-
+ return 0;
+ }
+
+@@ -69,11 +67,10 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+ struct uprobe_task *utask = current->utask;
+
+ WARN_ON_ONCE(current->thread.bad_cause != UPROBE_TRAP_NR);
++ current->thread.bad_cause = utask->autask.saved_cause;
+
+ instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size);
+
+- regs->status |= SR_SPIE;
+-
+ return 0;
+ }
+
+@@ -106,13 +103,12 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+ {
+ struct uprobe_task *utask = current->utask;
+
++ current->thread.bad_cause = utask->autask.saved_cause;
+ /*
+ * Task has received a fatal signal, so reset back to probbed
+ * address.
+ */
+ instruction_pointer_set(regs, utask->vaddr);
+-
+- regs->status &= ~SR_SPIE;
+ }
+
+ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
+index 03ac3aa611f59..bda3bc2947186 100644
+--- a/arch/riscv/kernel/process.c
++++ b/arch/riscv/kernel/process.c
+@@ -124,6 +124,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
+ {
+ struct pt_regs *childregs = task_pt_regs(p);
+
++ memset(&p->thread.s, 0, sizeof(p->thread.s));
++
+ /* p->thread holds context to be restored by __switch_to() */
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ /* Kernel thread */
+diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
+index ee5878d968cc1..9c842c41684ac 100644
+--- a/arch/riscv/kernel/reset.c
++++ b/arch/riscv/kernel/reset.c
+@@ -12,7 +12,7 @@ static void default_power_off(void)
+ wait_for_interrupt();
+ }
+
+-void (*pm_power_off)(void) = default_power_off;
++void (*pm_power_off)(void) = NULL;
+ EXPORT_SYMBOL(pm_power_off);
+
+ void machine_restart(char *cmd)
+@@ -23,10 +23,16 @@ void machine_restart(char *cmd)
+
+ void machine_halt(void)
+ {
+- pm_power_off();
++ if (pm_power_off != NULL)
++ pm_power_off();
++ else
++ default_power_off();
+ }
+
+ void machine_power_off(void)
+ {
+- pm_power_off();
++ if (pm_power_off != NULL)
++ pm_power_off();
++ else
++ default_power_off();
+ }
+diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
+index b9620e5f00baf..8cc147491c675 100644
+--- a/arch/riscv/kernel/setup.c
++++ b/arch/riscv/kernel/setup.c
+@@ -59,6 +59,16 @@ atomic_t hart_lottery __section(".sdata")
+ unsigned long boot_cpu_hartid;
+ static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
++void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
++{
++ int cpu;
++
++ cpumask_clear(out);
++ for_each_cpu(cpu, in)
++ cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
++}
++EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
++
+ /*
+ * Place kernel memory regions on the resource tree so that
+ * kexec-tools can retrieve them from /proc/iomem. While there
+@@ -189,7 +199,7 @@ static void __init init_resources(void)
+ res = &mem_res[res_idx--];
+
+ res->name = "Reserved";
+- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
++ res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE;
+ res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1;
+
+@@ -214,7 +224,7 @@ static void __init init_resources(void)
+
+ if (unlikely(memblock_is_nomap(region))) {
+ res->name = "Reserved";
+- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
++ res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE;
+ } else {
+ res->name = "System RAM";
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+@@ -250,10 +260,10 @@ static void __init parse_dtb(void)
+ pr_info("Machine model: %s\n", name);
+ dump_stack_set_arch_desc("%s (DT)", name);
+ }
+- return;
++ } else {
++ pr_err("No DTB passed to the kernel\n");
+ }
+
+- pr_err("No DTB passed to the kernel\n");
+ #ifdef CONFIG_CMDLINE_FORCE
+ strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ pr_info("Forcing kernel command line to: %s\n", boot_command_line);
+@@ -276,10 +286,7 @@ void __init setup_arch(char **cmdline_p)
+ #if IS_ENABLED(CONFIG_BUILTIN_DTB)
+ unflatten_and_copy_device_tree();
+ #else
+- if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa))))
+- unflatten_device_tree();
+- else
+- pr_err("No DTB found in kernel mappings\n");
++ unflatten_device_tree();
+ #endif
+ misc_mem_init();
+
+@@ -320,10 +327,11 @@ subsys_initcall(topology_init);
+
+ void free_initmem(void)
+ {
+- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+- set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
+- IS_ENABLED(CONFIG_64BIT) ?
+- set_memory_rw : set_memory_rw_nx);
++ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
++ set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx);
++ if (IS_ENABLED(CONFIG_64BIT))
++ set_kernel_memory(__init_begin, __init_end, set_memory_nx);
++ }
+
+ free_initmem_default(POISON_FREE_INITMEM);
+ }
+diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
+index c2d5ecbe55264..8892569aad23b 100644
+--- a/arch/riscv/kernel/signal.c
++++ b/arch/riscv/kernel/signal.c
+@@ -16,6 +16,7 @@
+ #include <asm/vdso.h>
+ #include <asm/switch_to.h>
+ #include <asm/csr.h>
++#include <asm/cacheflush.h>
+
+ extern u32 __user_rt_sigreturn[2];
+
+@@ -121,6 +122,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+
++ regs->cause = -1UL;
++
+ return regs->a0;
+
+ badframe:
+@@ -176,6 +179,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ {
+ struct rt_sigframe __user *frame;
+ long err = 0;
++ unsigned long __maybe_unused addr;
+
+ frame = get_sigframe(ksig, regs, sizeof(*frame));
+ if (!access_ok(frame, sizeof(*frame)))
+@@ -204,7 +208,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn,
+ sizeof(frame->sigreturn_code)))
+ return -EFAULT;
+- regs->ra = (unsigned long)&frame->sigreturn_code;
++
++ addr = (unsigned long)&frame->sigreturn_code;
++ /* Make sure the two instructions are pushed to icache. */
++ flush_icache_range(addr, addr + sizeof(frame->sigreturn_code));
++
++ regs->ra = addr;
+ #endif /* CONFIG_MMU */
+
+ /*
+diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
+index 921d9d7df4001..d0147294691d9 100644
+--- a/arch/riscv/kernel/smp.c
++++ b/arch/riscv/kernel/smp.c
+@@ -59,16 +59,6 @@ int riscv_hartid_to_cpuid(int hartid)
+ return -ENOENT;
+ }
+
+-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
+-{
+- int cpu;
+-
+- cpumask_clear(out);
+- for_each_cpu(cpu, in)
+- cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
+-}
+-EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
+-
+ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+ {
+ return phys_id == cpuid_to_hartid_map(cpu);
+diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
+index bd82375db51a6..0f323e935dd89 100644
+--- a/arch/riscv/kernel/smpboot.c
++++ b/arch/riscv/kernel/smpboot.c
+@@ -53,6 +53,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ unsigned int curr_cpuid;
+
+ curr_cpuid = smp_processor_id();
++ store_cpu_topology(curr_cpuid);
+ numa_store_cpu_info(curr_cpuid);
+ numa_add_cpu(curr_cpuid);
+
+@@ -165,9 +166,9 @@ asmlinkage __visible void smp_callin(void)
+ mmgrab(mm);
+ current->active_mm = mm;
+
++ store_cpu_topology(curr_cpuid);
+ notify_cpu_starting(curr_cpuid);
+ numa_add_cpu(curr_cpuid);
+- update_siblings_masks(curr_cpuid);
+ set_cpu_online(curr_cpuid, 1);
+
+ /*
+diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
+index 315db3d0229bf..894ae66421a76 100644
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -22,15 +22,17 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+ bool (*fn)(void *, unsigned long), void *arg)
+ {
+ unsigned long fp, sp, pc;
++ int level = 0;
+
+ if (regs) {
+ fp = frame_pointer(regs);
+ sp = user_stack_pointer(regs);
+ pc = instruction_pointer(regs);
+ } else if (task == NULL || task == current) {
+- fp = (unsigned long)__builtin_frame_address(1);
+- sp = (unsigned long)__builtin_frame_address(0);
+- pc = (unsigned long)__builtin_return_address(0);
++ fp = (unsigned long)__builtin_frame_address(0);
++ sp = sp_in_global;
++ pc = (unsigned long)walk_stackframe;
++ level = -1;
+ } else {
+ /* task blocked in __switch_to */
+ fp = task->thread.s[0];
+@@ -42,7 +44,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+ unsigned long low, high;
+ struct stackframe *frame;
+
+- if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
++ if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc))))
+ break;
+
+ /* Validate frame pointer */
+@@ -59,7 +61,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+ } else {
+ fp = frame->fp;
+ pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+- (unsigned long *)(fp - 8));
++ &frame->ra);
+ }
+
+ }
+@@ -92,7 +94,7 @@ void notrace walk_stackframe(struct task_struct *task,
+ while (!kstack_end(ksp)) {
+ if (__kernel_text_address(pc) && unlikely(!fn(arg, pc)))
+ break;
+- pc = (*ksp++) - 0x4;
++ pc = READ_ONCE_NOCHECK(*ksp++) - 0x4;
+ }
+ }
+
+diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
+index 12f8a7fce78b1..bb402685057a2 100644
+--- a/arch/riscv/kernel/sys_riscv.c
++++ b/arch/riscv/kernel/sys_riscv.c
+@@ -18,10 +18,6 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
+ if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
+ return -EINVAL;
+
+- if ((prot & PROT_WRITE) && (prot & PROT_EXEC))
+- if (unlikely(!(prot & PROT_READ)))
+- return -EINVAL;
+-
+ return ksys_mmap_pgoff(addr, len, prot, flags, fd,
+ offset >> (PAGE_SHIFT - page_shift_offset));
+ }
+diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
+index 8217b0f67c6cb..1cf21db4fcc77 100644
+--- a/arch/riscv/kernel/time.c
++++ b/arch/riscv/kernel/time.c
+@@ -5,6 +5,7 @@
+ */
+
+ #include <linux/of_clk.h>
++#include <linux/clockchips.h>
+ #include <linux/clocksource.h>
+ #include <linux/delay.h>
+ #include <asm/sbi.h>
+@@ -29,6 +30,8 @@ void __init time_init(void)
+
+ of_clk_init(NULL);
+ timer_probe();
++
++ tick_setup_hrtimer_broadcast();
+ }
+
+ void clocksource_arch_init(struct clocksource *cs)
+diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
+new file mode 100644
+index 0000000000000..095ac976d7da1
+--- /dev/null
++++ b/arch/riscv/kernel/trace_irq.c
+@@ -0,0 +1,27 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
++ */
++
++#include <linux/irqflags.h>
++#include <linux/kprobes.h>
++#include "trace_irq.h"
++
++/*
++ * trace_hardirqs_on/off require the caller to setup frame pointer properly.
++ * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel.
++ * Here we add one extra level so they can be safely called by low
++ * level entry code which $fp is used for other purpose.
++ */
++
++void __trace_hardirqs_on(void)
++{
++ trace_hardirqs_on();
++}
++NOKPROBE_SYMBOL(__trace_hardirqs_on);
++
++void __trace_hardirqs_off(void)
++{
++ trace_hardirqs_off();
++}
++NOKPROBE_SYMBOL(__trace_hardirqs_off);
+diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
+new file mode 100644
+index 0000000000000..99fe67377e5ed
+--- /dev/null
++++ b/arch/riscv/kernel/trace_irq.h
+@@ -0,0 +1,11 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
++ */
++#ifndef __TRACE_IRQ_H
++#define __TRACE_IRQ_H
++
++void __trace_hardirqs_on(void);
++void __trace_hardirqs_off(void);
++
++#endif /* __TRACE_IRQ_H */
+diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
+index 0daaa3e4630d4..4f38b3c47e6d5 100644
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -16,12 +16,14 @@
+ #include <linux/mm.h>
+ #include <linux/module.h>
+ #include <linux/irq.h>
++#include <linux/kexec.h>
+
+ #include <asm/asm-prototypes.h>
+ #include <asm/bug.h>
++#include <asm/csr.h>
+ #include <asm/processor.h>
+ #include <asm/ptrace.h>
+-#include <asm/csr.h>
++#include <asm/thread_info.h>
+
+ int show_unhandled_signals = 1;
+
+@@ -31,22 +33,29 @@ void die(struct pt_regs *regs, const char *str)
+ {
+ static int die_counter;
+ int ret;
++ long cause;
++ unsigned long flags;
+
+ oops_enter();
+
+- spin_lock_irq(&die_lock);
++ spin_lock_irqsave(&die_lock, flags);
+ console_verbose();
+ bust_spinlocks(1);
+
+ pr_emerg("%s [#%d]\n", str, ++die_counter);
+ print_modules();
+- show_regs(regs);
++ if (regs)
++ show_regs(regs);
+
+- ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV);
++ cause = regs ? regs->cause : -1;
++ ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV);
++
++ if (kexec_should_crash(current))
++ crash_kexec(regs);
+
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+- spin_unlock_irq(&die_lock);
++ spin_unlock_irqrestore(&die_lock, flags);
+ oops_exit();
+
+ if (in_interrupt())
+@@ -54,7 +63,7 @@ void die(struct pt_regs *regs, const char *str)
+ if (panic_on_oops)
+ panic("Fatal exception");
+ if (ret != NOTIFY_STOP)
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
+@@ -206,18 +215,36 @@ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
+ * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used
+ * to get per-cpu overflow stack(get_overflow_stack).
+ */
+-long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)];
++long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16);
+ asmlinkage unsigned long get_overflow_stack(void)
+ {
+ return (unsigned long)this_cpu_ptr(overflow_stack) +
+ OVERFLOW_STACK_SIZE;
+ }
+
++/*
++ * A pseudo spinlock to protect the shadow stack from being used by multiple
++ * harts concurrently. This isn't a real spinlock because the lock side must
++ * be taken without a valid stack and only a single register, it's only taken
++ * while in the process of panicing anyway so the performance and error
++ * checking a proper spinlock gives us doesn't matter.
++ */
++unsigned long spin_shadow_stack;
++
+ asmlinkage void handle_bad_stack(struct pt_regs *regs)
+ {
+ unsigned long tsk_stk = (unsigned long)current->stack;
+ unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
+
++ /*
++ * We're done with the shadow stack by this point, as we're on the
++ * overflow stack. Tell any other concurrent overflowing harts that
++ * they can proceed with panicing by releasing the pseudo-spinlock.
++ *
++ * This pairs with an amoswap.aq in handle_kernel_stack_overflow.
++ */
++ smp_store_release(&spin_shadow_stack, 0);
++
+ console_verbose();
+
+ pr_emerg("Insufficient stack space to handle exception!\n");
+diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
+index f2e065671e4d5..06e6b27f3bcc9 100644
+--- a/arch/riscv/kernel/vdso/Makefile
++++ b/arch/riscv/kernel/vdso/Makefile
+@@ -17,6 +17,7 @@ vdso-syms += flush_icache
+ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+
+ ccflags-y := -fno-stack-protector
++ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+ ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
+@@ -28,9 +29,12 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+ obj-y += vdso.o
+ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
++ifneq ($(filter vgettimeofday, $(vdso-syms)),)
++CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY
++endif
+
+ # Disable -pg to prevent insert call site
+-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
++CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+
+ # Disable profiling and instrumentation for VDSO code
+ GCOV_PROFILE := n
+diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
+index e9111f700af08..3729cb28aac8d 100644
+--- a/arch/riscv/kernel/vdso/vdso.lds.S
++++ b/arch/riscv/kernel/vdso/vdso.lds.S
+@@ -65,9 +65,11 @@ VERSION
+ LINUX_4.15 {
+ global:
+ __vdso_rt_sigreturn;
++#ifdef HAS_VGETTIMEOFDAY
+ __vdso_gettimeofday;
+ __vdso_clock_gettime;
+ __vdso_clock_getres;
++#endif
+ __vdso_getcpu;
+ __vdso_flush_icache;
+ local: *;
+diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
+index 07d1d2152ba5c..e0609e1f0864d 100644
+--- a/arch/riscv/lib/memmove.S
++++ b/arch/riscv/lib/memmove.S
+@@ -1,64 +1,316 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com>
++ */
+
+ #include <linux/linkage.h>
+ #include <asm/asm.h>
+
+-ENTRY(__memmove)
+-WEAK(memmove)
+- move t0, a0
+- move t1, a1
+-
+- beq a0, a1, exit_memcpy
+- beqz a2, exit_memcpy
+- srli t2, a2, 0x2
+-
+- slt t3, a0, a1
+- beqz t3, do_reverse
+-
+- andi a2, a2, 0x3
+- li t4, 1
+- beqz t2, byte_copy
+-
+-word_copy:
+- lw t3, 0(a1)
+- addi t2, t2, -1
+- addi a1, a1, 4
+- sw t3, 0(a0)
+- addi a0, a0, 4
+- bnez t2, word_copy
+- beqz a2, exit_memcpy
+- j byte_copy
+-
+-do_reverse:
+- add a0, a0, a2
+- add a1, a1, a2
+- andi a2, a2, 0x3
+- li t4, -1
+- beqz t2, reverse_byte_copy
+-
+-reverse_word_copy:
+- addi a1, a1, -4
+- addi t2, t2, -1
+- lw t3, 0(a1)
+- addi a0, a0, -4
+- sw t3, 0(a0)
+- bnez t2, reverse_word_copy
+- beqz a2, exit_memcpy
+-
+-reverse_byte_copy:
+- addi a0, a0, -1
+- addi a1, a1, -1
++SYM_FUNC_START(__memmove)
++SYM_FUNC_START_WEAK(memmove)
++ /*
++ * Returns
++ * a0 - dest
++ *
++ * Parameters
++ * a0 - Inclusive first byte of dest
++ * a1 - Inclusive first byte of src
++ * a2 - Length of copy n
++ *
++ * Because the return matches the parameter register a0,
++ * we will not clobber or modify that register.
++ *
++ * Note: This currently only works on little-endian.
++ * To port to big-endian, reverse the direction of shifts
++ * in the 2 misaligned fixup copy loops.
++ */
+
++ /* Return if nothing to do */
++ beq a0, a1, return_from_memmove
++ beqz a2, return_from_memmove
++
++ /*
++ * Register Uses
++ * Forward Copy: a1 - Index counter of src
++ * Reverse Copy: a4 - Index counter of src
++ * Forward Copy: t3 - Index counter of dest
++ * Reverse Copy: t4 - Index counter of dest
++ * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest
++ * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest
++ * Both Copy Modes: t0 - Link / Temporary for load-store
++ * Both Copy Modes: t1 - Temporary for load-store
++ * Both Copy Modes: t2 - Temporary for load-store
++ * Both Copy Modes: a5 - dest to src alignment offset
++ * Both Copy Modes: a6 - Shift ammount
++ * Both Copy Modes: a7 - Inverse Shift ammount
++ * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops
++ */
++
++ /*
++ * Solve for some register values now.
++ * Byte copy does not need t5 or t6.
++ */
++ mv t3, a0
++ add t4, a0, a2
++ add a4, a1, a2
++
++ /*
++ * Byte copy if copying less than (2 * SZREG) bytes. This can
++ * cause problems with the bulk copy implementation and is
++ * small enough not to bother.
++ */
++ andi t0, a2, -(2 * SZREG)
++ beqz t0, byte_copy
++
++ /*
++ * Now solve for t5 and t6.
++ */
++ andi t5, t3, -SZREG
++ andi t6, t4, -SZREG
++ /*
++ * If dest(Register t3) rounded down to the nearest naturally
++ * aligned SZREG address, does not equal dest, then add SZREG
++ * to find the low-bound of SZREG alignment in the dest memory
++ * region. Note that this could overshoot the dest memory
++ * region if n is less than SZREG. This is one reason why
++ * we always byte copy if n is less than SZREG.
++ * Otherwise, dest is already naturally aligned to SZREG.
++ */
++ beq t5, t3, 1f
++ addi t5, t5, SZREG
++ 1:
++
++ /*
++ * If the dest and src are co-aligned to SZREG, then there is
++ * no need for the full rigmarole of a full misaligned fixup copy.
++ * Instead, do a simpler co-aligned copy.
++ */
++ xor t0, a0, a1
++ andi t1, t0, (SZREG - 1)
++ beqz t1, coaligned_copy
++ /* Fall through to misaligned fixup copy */
++
++misaligned_fixup_copy:
++ bltu a1, a0, misaligned_fixup_copy_reverse
++
++misaligned_fixup_copy_forward:
++ jal t0, byte_copy_until_aligned_forward
++
++ andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
++ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
++ sub a5, a1, t3 /* Find the difference between src and dest */
++ andi a1, a1, -SZREG /* Align the src pointer */
++ addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/
++
++ /*
++ * Compute The Inverse Shift
++ * a7 = XLEN - a6 = XLEN + -a6
++ * 2s complement negation to find the negative: -a6 = ~a6 + 1
++ * Add that to XLEN. XLEN = SZREG * 8.
++ */
++ not a7, a6
++ addi a7, a7, (SZREG * 8 + 1)
++
++ /*
++ * Fix Misalignment Copy Loop - Forward
++ * load_val0 = load_ptr[0];
++ * do {
++ * load_val1 = load_ptr[1];
++ * store_ptr += 2;
++ * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7});
++ *
++ * if (store_ptr == {a2})
++ * break;
++ *
++ * load_val0 = load_ptr[2];
++ * load_ptr += 2;
++ * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7});
++ *
++ * } while (store_ptr != store_ptr_end);
++ * store_ptr = store_ptr_end;
++ */
++
++ REG_L t0, (0 * SZREG)(a1)
++ 1:
++ REG_L t1, (1 * SZREG)(a1)
++ addi t3, t3, (2 * SZREG)
++ srl t0, t0, a6
++ sll t2, t1, a7
++ or t2, t0, t2
++ REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3)
++
++ beq t3, a2, 2f
++
++ REG_L t0, (2 * SZREG)(a1)
++ addi a1, a1, (2 * SZREG)
++ srl t1, t1, a6
++ sll t2, t0, a7
++ or t2, t1, t2
++ REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3)
++
++ bne t3, t6, 1b
++ 2:
++ mv t3, t6 /* Fix the dest pointer in case the loop was broken */
++
++ add a1, t3, a5 /* Restore the src pointer */
++ j byte_copy_forward /* Copy any remaining bytes */
++
++misaligned_fixup_copy_reverse:
++ jal t0, byte_copy_until_aligned_reverse
++
++ andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
++ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
++ sub a5, a4, t4 /* Find the difference between src and dest */
++ andi a4, a4, -SZREG /* Align the src pointer */
++ addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/
++
++ /*
++ * Compute The Inverse Shift
++ * a7 = XLEN - a6 = XLEN + -a6
++ * 2s complement negation to find the negative: -a6 = ~a6 + 1
++ * Add that to XLEN. XLEN = SZREG * 8.
++ */
++ not a7, a6
++ addi a7, a7, (SZREG * 8 + 1)
++
++ /*
++ * Fix Misalignment Copy Loop - Reverse
++ * load_val1 = load_ptr[0];
++ * do {
++ * load_val0 = load_ptr[-1];
++ * store_ptr -= 2;
++ * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7});
++ *
++ * if (store_ptr == {a2})
++ * break;
++ *
++ * load_val1 = load_ptr[-2];
++ * load_ptr -= 2;
++ * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7});
++ *
++ * } while (store_ptr != store_ptr_end);
++ * store_ptr = store_ptr_end;
++ */
++
++ REG_L t1, ( 0 * SZREG)(a4)
++ 1:
++ REG_L t0, (-1 * SZREG)(a4)
++ addi t4, t4, (-2 * SZREG)
++ sll t1, t1, a7
++ srl t2, t0, a6
++ or t2, t1, t2
++ REG_S t2, ( 1 * SZREG)(t4)
++
++ beq t4, a2, 2f
++
++ REG_L t1, (-2 * SZREG)(a4)
++ addi a4, a4, (-2 * SZREG)
++ sll t0, t0, a7
++ srl t2, t1, a6
++ or t2, t0, t2
++ REG_S t2, ( 0 * SZREG)(t4)
++
++ bne t4, t5, 1b
++ 2:
++ mv t4, t5 /* Fix the dest pointer in case the loop was broken */
++
++ add a4, t4, a5 /* Restore the src pointer */
++ j byte_copy_reverse /* Copy any remaining bytes */
++
++/*
++ * Simple copy loops for SZREG co-aligned memory locations.
++ * These also make calls to do byte copies for any unaligned
++ * data at their terminations.
++ */
++coaligned_copy:
++ bltu a1, a0, coaligned_copy_reverse
++
++coaligned_copy_forward:
++ jal t0, byte_copy_until_aligned_forward
++
++ 1:
++ REG_L t1, ( 0 * SZREG)(a1)
++ addi a1, a1, SZREG
++ addi t3, t3, SZREG
++ REG_S t1, (-1 * SZREG)(t3)
++ bne t3, t6, 1b
++
++ j byte_copy_forward /* Copy any remaining bytes */
++
++coaligned_copy_reverse:
++ jal t0, byte_copy_until_aligned_reverse
++
++ 1:
++ REG_L t1, (-1 * SZREG)(a4)
++ addi a4, a4, -SZREG
++ addi t4, t4, -SZREG
++ REG_S t1, ( 0 * SZREG)(t4)
++ bne t4, t5, 1b
++
++ j byte_copy_reverse /* Copy any remaining bytes */
++
++/*
++ * These are basically sub-functions within the function. They
++ * are used to byte copy until the dest pointer is in alignment.
++ * At which point, a bulk copy method can be used by the
++ * calling code. These work on the same registers as the bulk
++ * copy loops. Therefore, the register values can be picked
++ * up from where they were left and we avoid code duplication
++ * without any overhead except the call in and return jumps.
++ */
++byte_copy_until_aligned_forward:
++ beq t3, t5, 2f
++ 1:
++ lb t1, 0(a1)
++ addi a1, a1, 1
++ addi t3, t3, 1
++ sb t1, -1(t3)
++ bne t3, t5, 1b
++ 2:
++ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
++
++byte_copy_until_aligned_reverse:
++ beq t4, t6, 2f
++ 1:
++ lb t1, -1(a4)
++ addi a4, a4, -1
++ addi t4, t4, -1
++ sb t1, 0(t4)
++ bne t4, t6, 1b
++ 2:
++ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
++
++/*
++ * Simple byte copy loops.
++ * These will byte copy until they reach the end of data to copy.
++ * At that point, they will call to return from memmove.
++ */
+ byte_copy:
+- lb t3, 0(a1)
+- addi a2, a2, -1
+- sb t3, 0(a0)
+- add a1, a1, t4
+- add a0, a0, t4
+- bnez a2, byte_copy
+-
+-exit_memcpy:
+- move a0, t0
+- move a1, t1
+- ret
+-END(__memmove)
++ bltu a1, a0, byte_copy_reverse
++
++byte_copy_forward:
++ beq t3, t4, 2f
++ 1:
++ lb t1, 0(a1)
++ addi a1, a1, 1
++ addi t3, t3, 1
++ sb t1, -1(t3)
++ bne t3, t4, 1b
++ 2:
++ ret
++
++byte_copy_reverse:
++ beq t4, t3, 2f
++ 1:
++ lb t1, -1(a4)
++ addi a4, a4, -1
++ addi t4, t4, -1
++ sb t1, 0(t4)
++ bne t4, t3, 1b
++ 2:
++
++return_from_memmove:
++ ret
++
++SYM_FUNC_END(memmove)
++SYM_FUNC_END(__memmove)
+diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
+index 63bc691cff91b..4fe436a0eec2c 100644
+--- a/arch/riscv/lib/uaccess.S
++++ b/arch/riscv/lib/uaccess.S
+@@ -19,8 +19,11 @@ ENTRY(__asm_copy_from_user)
+ li t6, SR_SUM
+ csrs CSR_STATUS, t6
+
+- /* Save for return value */
+- mv t5, a2
++ /*
++ * Save the terminal address which will be used to compute the number
++ * of bytes copied in case of a fixup exception.
++ */
++ add t5, a0, a2
+
+ /*
+ * Register allocation for code below:
+@@ -173,6 +176,13 @@ ENTRY(__asm_copy_from_user)
+ csrc CSR_STATUS, t6
+ li a0, 0
+ ret
++
++ /* Exception fixup code */
++10:
++ /* Disable access to user memory */
++ csrc CSR_STATUS, t6
++ sub a0, t5, a0
++ ret
+ ENDPROC(__asm_copy_to_user)
+ ENDPROC(__asm_copy_from_user)
+ EXPORT_SYMBOL(__asm_copy_to_user)
+@@ -218,19 +228,12 @@ ENTRY(__clear_user)
+ addi a0, a0, 1
+ bltu a0, a3, 5b
+ j 3b
+-ENDPROC(__clear_user)
+-EXPORT_SYMBOL(__clear_user)
+
+- .section .fixup,"ax"
+- .balign 4
+- /* Fixup code for __copy_user(10) and __clear_user(11) */
+-10:
+- /* Disable access to user memory */
+- csrs CSR_STATUS, t6
+- mv a0, t5
+- ret
++ /* Exception fixup code */
+ 11:
+- csrs CSR_STATUS, t6
+- mv a0, a1
++ /* Disable access to user memory */
++ csrc CSR_STATUS, t6
++ sub a0, a3, a0
+ ret
+- .previous
++ENDPROC(__clear_user)
++EXPORT_SYMBOL(__clear_user)
+diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
+index 7ebaef10ea1b6..ac7a25298a04a 100644
+--- a/arch/riscv/mm/Makefile
++++ b/arch/riscv/mm/Makefile
+@@ -24,6 +24,9 @@ obj-$(CONFIG_KASAN) += kasan_init.o
+ ifdef CONFIG_KASAN
+ KASAN_SANITIZE_kasan_init.o := n
+ KASAN_SANITIZE_init.o := n
++ifdef CONFIG_DEBUG_VIRTUAL
++KASAN_SANITIZE_physaddr.o := n
++endif
+ endif
+
+ obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
+diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
+index 89f81067e09ed..2ae1201cff886 100644
+--- a/arch/riscv/mm/cacheflush.c
++++ b/arch/riscv/mm/cacheflush.c
+@@ -85,7 +85,9 @@ void flush_icache_pte(pte_t pte)
+ {
+ struct page *page = pte_page(pte);
+
+- if (!test_and_set_bit(PG_dcache_clean, &page->flags))
++ if (!test_bit(PG_dcache_clean, &page->flags)) {
+ flush_icache_all();
++ set_bit(PG_dcache_clean, &page->flags);
++ }
+ }
+ #endif /* CONFIG_MMU */
+diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
+index ee3459cb6750b..64bfb4575f3e6 100644
+--- a/arch/riscv/mm/context.c
++++ b/arch/riscv/mm/context.c
+@@ -22,7 +22,7 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
+
+ static unsigned long asid_bits;
+ static unsigned long num_asids;
+-static unsigned long asid_mask;
++unsigned long asid_mask;
+
+ static atomic_long_t current_version;
+
+@@ -205,12 +205,24 @@ static void set_mm_noasid(struct mm_struct *mm)
+ local_flush_tlb_all();
+ }
+
+-static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
++static inline void set_mm(struct mm_struct *prev,
++ struct mm_struct *next, unsigned int cpu)
+ {
+- if (static_branch_unlikely(&use_asid_allocator))
+- set_mm_asid(mm, cpu);
+- else
+- set_mm_noasid(mm);
++ /*
++ * The mm_cpumask indicates which harts' TLBs contain the virtual
++ * address mapping of the mm. Compared to noasid, using asid
++ * can't guarantee that stale TLB entries are invalidated because
++ * the asid mechanism wouldn't flush TLB for every switch_mm for
++ * performance. So when using asid, keep all CPUs footmarks in
++ * cpumask() until mm reset.
++ */
++ cpumask_set_cpu(cpu, mm_cpumask(next));
++ if (static_branch_unlikely(&use_asid_allocator)) {
++ set_mm_asid(next, cpu);
++ } else {
++ cpumask_clear_cpu(cpu, mm_cpumask(prev));
++ set_mm_noasid(next);
++ }
+ }
+
+ static int __init asids_init(void)
+@@ -262,7 +274,8 @@ static int __init asids_init(void)
+ }
+ early_initcall(asids_init);
+ #else
+-static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
++static inline void set_mm(struct mm_struct *prev,
++ struct mm_struct *next, unsigned int cpu)
+ {
+ /* Nothing to do here when there is no MMU */
+ }
+@@ -315,10 +328,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ */
+ cpu = smp_processor_id();
+
+- cpumask_clear_cpu(cpu, mm_cpumask(prev));
+- cpumask_set_cpu(cpu, mm_cpumask(next));
+-
+- set_mm(next, cpu);
++ set_mm(prev, next, cpu);
+
+ flush_icache_deferred(next, cpu);
+ }
+diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
+index aa08dd2f8faec..884a3c76573cf 100644
+--- a/arch/riscv/mm/fault.c
++++ b/arch/riscv/mm/fault.c
+@@ -31,7 +31,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
+
+ bust_spinlocks(0);
+ die(regs, "Oops");
+- do_exit(SIGKILL);
++ make_task_dead(SIGKILL);
+ }
+
+ static inline void no_context(struct pt_regs *regs, unsigned long addr)
+@@ -188,7 +188,8 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
+ }
+ break;
+ case EXC_LOAD_PAGE_FAULT:
+- if (!(vma->vm_flags & VM_READ)) {
++ /* Write implies read */
++ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) {
+ return true;
+ }
+ break;
+@@ -270,10 +271,12 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
+- if (!user_mode(regs) && addr < TASK_SIZE &&
+- unlikely(!(regs->status & SR_SUM)))
+- die_kernel_fault("access to user memory without uaccess routines",
+- addr, regs);
++ if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) {
++ if (fixup_exception(regs))
++ return;
++
++ die_kernel_fault("access to user memory without uaccess routines", addr, regs);
++ }
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
+diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
+index c0cddf0fc22db..d7115acab3501 100644
+--- a/arch/riscv/mm/init.c
++++ b/arch/riscv/mm/init.c
+@@ -49,7 +49,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+ EXPORT_SYMBOL(empty_zero_page);
+
+ extern char _start[];
+-#define DTB_EARLY_BASE_VA PGDIR_SIZE
+ void *_dtb_early_va __initdata;
+ uintptr_t _dtb_early_pa __initdata;
+
+@@ -100,6 +99,10 @@ static void __init print_vm_layout(void)
+ (unsigned long)VMEMMAP_END);
+ print_mlm("vmalloc", (unsigned long)VMALLOC_START,
+ (unsigned long)VMALLOC_END);
++#ifdef CONFIG_64BIT
++ print_mlm("modules", (unsigned long)MODULES_VADDR,
++ (unsigned long)MODULES_END);
++#endif
+ print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
+ (unsigned long)high_memory);
+ #ifdef CONFIG_64BIT
+@@ -187,10 +190,10 @@ static void __init setup_bootmem(void)
+
+
+ phys_ram_end = memblock_end_of_DRAM();
+-#ifndef CONFIG_64BIT
+ #ifndef CONFIG_XIP_KERNEL
+ phys_ram_base = memblock_start_of_DRAM();
+ #endif
++#ifndef CONFIG_64BIT
+ /*
+ * memblock allocator is not aware of the fact that last 4K bytes of
+ * the addressable memory can not be mapped because of IS_ERR_VALUE
+@@ -212,6 +215,14 @@ static void __init setup_bootmem(void)
+ set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
+
+ reserve_initrd_mem();
++
++ /*
++ * No allocation should be done before reserving the memory as defined
++ * in the device tree, otherwise the allocation could end up in a
++ * reserved region.
++ */
++ early_init_fdt_scan_reserved_mem();
++
+ /*
+ * If DTB is built in, no need to reserve its memblock.
+ * Otherwise, do reserve it but avoid using
+@@ -221,11 +232,9 @@ static void __init setup_bootmem(void)
+ if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
+ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+
+- early_init_fdt_scan_reserved_mem();
+ dma_contiguous_reserve(dma32_phys_limit);
+ if (IS_ENABLED(CONFIG_64BIT))
+ hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+- memblock_allow_resize();
+ }
+
+ #ifdef CONFIG_MMU
+@@ -245,9 +254,9 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+ static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
+
+ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+-static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+
+ #ifdef CONFIG_XIP_KERNEL
++#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
+ #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
+ #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
+ #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
+@@ -451,6 +460,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
+ }
+
+ #ifdef CONFIG_XIP_KERNEL
++#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
+ /* called from head.S with MMU off */
+ asmlinkage void __init __copy_data(void)
+ {
+@@ -558,24 +568,27 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
+ * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
+ * entry.
+ */
+-static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
++static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va,
++ uintptr_t dtb_pa)
+ {
+ #ifndef CONFIG_BUILTIN_DTB
+ uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
+
+- create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+- IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+- PGDIR_SIZE,
+- IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
++ /* Make sure the fdt fixmap address is always aligned on PMD size */
++ BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
+
+- if (IS_ENABLED(CONFIG_64BIT)) {
+- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
++ /* In 32-bit only, the fdt lies in its own PGD */
++ if (!IS_ENABLED(CONFIG_64BIT)) {
++ create_pgd_mapping(early_pg_dir, fix_fdt_va,
++ pa, MAX_FDT_SIZE, PAGE_KERNEL);
++ } else {
++ create_pmd_mapping(fixmap_pmd, fix_fdt_va,
+ pa, PMD_SIZE, PAGE_KERNEL);
+- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
++ create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE,
+ pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+ }
+
+- dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
++ dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1));
+ #else
+ /*
+ * For 64-bit kernel, __va can't be used since it would return a linear
+@@ -663,7 +676,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+ create_kernel_page_table(early_pg_dir, true);
+
+ /* Setup early mapping for FDT early scan */
+- create_fdt_early_page_table(early_pg_dir, dtb_pa);
++ create_fdt_early_page_table(__fix_to_virt(FIX_FDT), dtb_pa);
+
+ /*
+ * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
+@@ -700,6 +713,7 @@ static void __init setup_vm_final(void)
+ {
+ uintptr_t va, map_size;
+ phys_addr_t pa, start, end;
++ unsigned long idx __maybe_unused;
+ u64 i;
+
+ /**
+@@ -713,6 +727,16 @@ static void __init setup_vm_final(void)
+ pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
+ #endif
+ /* Setup swapper PGD for fixmap */
++#if !defined(CONFIG_64BIT)
++ /*
++ * In 32-bit, the device tree lies in a pgd entry, so it must be copied
++ * directly in swapper_pg_dir in addition to the pgd entry that points
++ * to fixmap_pte.
++ */
++ idx = pgd_index(__fix_to_virt(FIX_FDT));
++
++ set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]);
++#endif
+ create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
+ __pa_symbol(fixmap_pgd_next),
+ PGDIR_SIZE, PAGE_TABLE);
+@@ -813,13 +837,22 @@ static void __init reserve_crashkernel(void)
+ /*
+ * Current riscv boot protocol requires 2MB alignment for
+ * RV64 and 4MB alignment for RV32 (hugepage size)
++ *
++ * Try to alloc from 32bit addressible physical memory so that
++ * swiotlb can work on the crash kernel.
+ */
+ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+- search_start, search_end);
++ search_start,
++ min(search_end, (unsigned long)(SZ_4G - 1)));
+ if (crash_base == 0) {
+- pr_warn("crashkernel: couldn't allocate %lldKB\n",
+- crash_size >> 10);
+- return;
++ /* Try again without restricting region to 32bit addressible memory */
++ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
++ search_start, search_end);
++ if (crash_base == 0) {
++ pr_warn("crashkernel: couldn't allocate %lldKB\n",
++ crash_size >> 10);
++ return;
++ }
+ }
+
+ pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
+@@ -834,6 +867,9 @@ void __init paging_init(void)
+ {
+ setup_bootmem();
+ setup_vm_final();
++
++ /* Depend on that Linear Mapping is ready */
++ memblock_allow_resize();
+ }
+
+ void __init misc_mem_init(void)
+diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
+index 54294f83513d1..e26e367a3d9ef 100644
+--- a/arch/riscv/mm/kasan_init.c
++++ b/arch/riscv/mm/kasan_init.c
+@@ -22,8 +22,7 @@ asmlinkage void __init kasan_early_init(void)
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ set_pte(kasan_early_shadow_pte + i,
+- mk_pte(virt_to_page(kasan_early_shadow_page),
+- PAGE_KERNEL));
++ pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL));
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ set_pmd(kasan_early_shadow_pmd + i,
+diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
+index 5e49e4b4a4ccc..ea3d61de065b3 100644
+--- a/arch/riscv/mm/pageattr.c
++++ b/arch/riscv/mm/pageattr.c
+@@ -118,10 +118,10 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
+ if (!numpages)
+ return 0;
+
+- mmap_read_lock(&init_mm);
++ mmap_write_lock(&init_mm);
+ ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+ &masks);
+- mmap_read_unlock(&init_mm);
++ mmap_write_unlock(&init_mm);
+
+ flush_tlb_kernel_range(start, end);
+
+@@ -217,18 +217,26 @@ bool kernel_page_present(struct page *page)
+ pgd = pgd_offset_k(addr);
+ if (!pgd_present(*pgd))
+ return false;
++ if (pgd_leaf(*pgd))
++ return true;
+
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ return false;
++ if (p4d_leaf(*p4d))
++ return true;
+
+ pud = pud_offset(p4d, addr);
+ if (!pud_present(*pud))
+ return false;
++ if (pud_leaf(*pud))
++ return true;
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ return false;
++ if (pmd_leaf(*pmd))
++ return true;
+
+ pte = pte_offset_kernel(pmd, addr);
+ return pte_present(*pte);
+diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
+index 64f8201237c24..39d18fc07b9c6 100644
+--- a/arch/riscv/mm/tlbflush.c
++++ b/arch/riscv/mm/tlbflush.c
+@@ -43,7 +43,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
+ /* check if the tlbflush needs to be sent to other CPUs */
+ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
+ if (static_branch_unlikely(&use_asid_allocator)) {
+- unsigned long asid = atomic_long_read(&mm->context.id);
++ unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
+
+ if (broadcast) {
+ riscv_cpuid_to_hartid_mask(cmask, &hmask);
+diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
+index 75c1e99968675..ef336fe160044 100644
+--- a/arch/riscv/net/bpf_jit.h
++++ b/arch/riscv/net/bpf_jit.h
+@@ -69,6 +69,7 @@ struct rv_jit_context {
+ struct bpf_prog *prog;
+ u16 *insns; /* RV insns */
+ int ninsns;
++ int prologue_len;
+ int epilogue_offset;
+ int *offset; /* BPF to RV */
+ unsigned long flags;
+@@ -214,8 +215,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
+ int from, to;
+
+ off++; /* BPF branch is from PC+1, RV is from PC */
+- from = (insn > 0) ? ctx->offset[insn - 1] : 0;
+- to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
++ from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len;
++ to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len;
+ return ninsns_rvoff(to - from);
+ }
+
+diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
+index 3af4131c22c7a..2e3f1a626a3af 100644
+--- a/arch/riscv/net/bpf_jit_comp64.c
++++ b/arch/riscv/net/bpf_jit_comp64.c
+@@ -120,6 +120,25 @@ static bool in_auipc_jalr_range(s64 val)
+ val < ((1L << 31) - (1L << 11));
+ }
+
++/* Emit fixed-length instructions for address */
++static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
++{
++ u64 ip = (u64)(ctx->insns + ctx->ninsns);
++ s64 off = addr - ip;
++ s64 upper = (off + (1 << 11)) >> 12;
++ s64 lower = off & 0xfff;
++
++ if (extra_pass && !in_auipc_jalr_range(off)) {
++ pr_err("bpf-jit: target offset 0x%llx is out of range\n", off);
++ return -ERANGE;
++ }
++
++ emit(rv_auipc(rd, upper), ctx);
++ emit(rv_addi(rd, rd, lower), ctx);
++ return 0;
++}
++
++/* Emit variable-length instructions for 32-bit and 64-bit imm */
+ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
+ {
+ /* Note that the immediate from the add is sign-extended,
+@@ -887,7 +906,15 @@ out_be:
+ u64 imm64;
+
+ imm64 = (u64)insn1.imm << 32 | (u32)imm;
+- emit_imm(rd, imm64, ctx);
++ if (bpf_pseudo_func(insn)) {
++ /* fixed-length insns for extra jit pass */
++ ret = emit_addr(rd, imm64, extra_pass, ctx);
++ if (ret)
++ return ret;
++ } else {
++ emit_imm(rd, imm64, ctx);
++ }
++
+ return 1;
+ }
+
+diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
+index 753d85bdfad07..b95c60f663d44 100644
+--- a/arch/riscv/net/bpf_jit_core.c
++++ b/arch/riscv/net/bpf_jit_core.c
+@@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+ prog = orig_prog;
+ goto out_offset;
+ }
++
++ if (build_body(ctx, extra_pass, NULL)) {
++ prog = orig_prog;
++ goto out_offset;
++ }
++
+ for (i = 0; i < prog->len; i++) {
+ prev_ninsns += 32;
+ ctx->offset[i] = prev_ninsns;
+@@ -91,11 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+ for (i = 0; i < NR_JIT_ITERATIONS; i++) {
+ pass++;
+ ctx->ninsns = 0;
++
++ bpf_jit_build_prologue(ctx);
++ ctx->prologue_len = ctx->ninsns;
++
+ if (build_body(ctx, extra_pass, ctx->offset)) {
+ prog = orig_prog;
+ goto out_offset;
+ }
+- bpf_jit_build_prologue(ctx);
++
+ ctx->epilogue_offset = ctx->ninsns;
+ bpf_jit_build_epilogue(ctx);
+
+@@ -154,6 +164,9 @@ skip_init_ctx:
+
+ if (!prog->is_func || extra_pass) {
+ bpf_jit_binary_lock_ro(jit_data->header);
++ for (i = 0; i < prog->len; i++)
++ ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
++ bpf_prog_fill_jited_linfo(prog, ctx->offset);
+ out_offset:
+ kfree(ctx->offset);
+ kfree(jit_data);
+diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
+index b86de61b8caa2..e402fa964f235 100644
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -47,7 +47,7 @@ config ARCH_SUPPORTS_UPROBES
+ config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN
+- default 0x18000000000000
++ default 0x1C000000000000
+
+ config S390
+ def_bool y
+@@ -516,7 +516,6 @@ config KEXEC
+ config KEXEC_FILE
+ bool "kexec file based system call"
+ select KEXEC_CORE
+- select BUILD_BIN2C
+ depends on CRYPTO
+ depends on CRYPTO_SHA256
+ depends on CRYPTO_SHA256_S390
+diff --git a/arch/s390/Makefile b/arch/s390/Makefile
+index 450b351dfa8ef..dc840ba0b016a 100644
+--- a/arch/s390/Makefile
++++ b/arch/s390/Makefile
+@@ -29,9 +29,20 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbac
+ KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
+ KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
+ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
++KBUILD_CFLAGS_DECOMPRESSOR += -fPIE
+ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
+ KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
+ KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
++
++ifdef CONFIG_CC_IS_GCC
++ ifeq ($(call cc-ifversion, -ge, 1200, y), y)
++ ifeq ($(call cc-ifversion, -lt, 1300, y), y)
++ KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
++ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds)
++ endif
++ endif
++endif
++
+ UTS_MACHINE := s390x
+ STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384)
+ CHECKFLAGS += -D__s390__ -D__s390x__
+@@ -79,10 +90,12 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
+ KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
+
+ ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
+-cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
+-ifeq ($(call cc-option,-mstack-size=8192),)
+-cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
+-endif
++ CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
++ ifeq ($(call cc-option,-mstack-size=8192),)
++ CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
++ endif
++ export CC_FLAGS_CHECK_STACK
++ cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
+ endif
+
+ ifdef CONFIG_EXPOLINE
+diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c
+index e27c2140d6206..623f6775d01d7 100644
+--- a/arch/s390/boot/compressed/decompressor.c
++++ b/arch/s390/boot/compressed/decompressor.c
+@@ -80,6 +80,6 @@ void *decompress_kernel(void)
+ void *output = (void *)decompress_offset;
+
+ __decompress(_compressed_start, _compressed_end - _compressed_start,
+- NULL, NULL, output, 0, NULL, error);
++ NULL, NULL, output, vmlinux.image_size, NULL, error);
+ return output;
+ }
+diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h
+index a59f75c5b0490..f75cc31a77dd9 100644
+--- a/arch/s390/boot/compressed/decompressor.h
++++ b/arch/s390/boot/compressed/decompressor.h
+@@ -24,6 +24,7 @@ struct vmlinux_info {
+ unsigned long dynsym_start;
+ unsigned long rela_dyn_start;
+ unsigned long rela_dyn_end;
++ unsigned long amode31_size;
+ };
+
+ /* Symbols defined by linker scripts */
+diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
+index 918e05137d4c6..1686a852534fc 100644
+--- a/arch/s390/boot/compressed/vmlinux.lds.S
++++ b/arch/s390/boot/compressed/vmlinux.lds.S
+@@ -93,8 +93,17 @@ SECTIONS
+ _compressed_start = .;
+ *(.vmlinux.bin.compressed)
+ _compressed_end = .;
+- FILL(0xff);
+- . = ALIGN(4096);
++ }
++
++#define SB_TRAILER_SIZE 32
++ /* Trailer needed for Secure Boot */
++ . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
++ . = ALIGN(4096) - SB_TRAILER_SIZE;
++ .sb.trailer : {
++ QUAD(0)
++ QUAD(0)
++ QUAD(0)
++ QUAD(0x000000207a49504c)
+ }
+ _end = .;
+
+diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
+index 9b14045065b6e..74b5cd2648622 100644
+--- a/arch/s390/boot/ipl_report.c
++++ b/arch/s390/boot/ipl_report.c
+@@ -57,11 +57,19 @@ repeat:
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
+ intersects(initrd_data.start, initrd_data.size, safe_addr, size))
+ safe_addr = initrd_data.start + initrd_data.size;
++ if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) {
++ safe_addr = (unsigned long)comps + comps->len;
++ goto repeat;
++ }
+ for_each_rb_entry(comp, comps)
+ if (intersects(safe_addr, size, comp->addr, comp->len)) {
+ safe_addr = comp->addr + comp->len;
+ goto repeat;
+ }
++ if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
++ safe_addr = (unsigned long)certs + certs->len;
++ goto repeat;
++ }
+ for_each_rb_entry(cert, certs)
+ if (intersects(safe_addr, size, cert->addr, cert->len)) {
+ safe_addr = cert->addr + cert->len;
+diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
+index 2f949cd9076b8..17a32707d17e0 100644
+--- a/arch/s390/boot/mem_detect.c
++++ b/arch/s390/boot/mem_detect.c
+@@ -165,7 +165,7 @@ static void search_mem_end(void)
+
+ unsigned long detect_memory(void)
+ {
+- unsigned long max_physmem_end;
++ unsigned long max_physmem_end = 0;
+
+ sclp_early_get_memsize(&max_physmem_end);
+
+diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
+index 6dc8d0a538640..1aa11a8f57dd8 100644
+--- a/arch/s390/boot/startup.c
++++ b/arch/s390/boot/startup.c
+@@ -15,6 +15,7 @@
+ #include "uv.h"
+
+ unsigned long __bootdata_preserved(__kaslr_offset);
++unsigned long __bootdata(__amode31_base);
+ unsigned long __bootdata_preserved(VMALLOC_START);
+ unsigned long __bootdata_preserved(VMALLOC_END);
+ struct page *__bootdata_preserved(vmemmap);
+@@ -148,82 +149,56 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
+
+ static void setup_kernel_memory_layout(void)
+ {
+- bool vmalloc_size_verified = false;
+- unsigned long vmemmap_off;
+- unsigned long vspace_left;
++ unsigned long vmemmap_start;
+ unsigned long rte_size;
+ unsigned long pages;
+- unsigned long vmax;
+
+ pages = ident_map_size / PAGE_SIZE;
+ /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+ vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
+
+ /* choose kernel address space layout: 4 or 3 levels. */
+- vmemmap_off = round_up(ident_map_size, _REGION3_SIZE);
++ vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
+ if (IS_ENABLED(CONFIG_KASAN) ||
+ vmalloc_size > _REGION2_SIZE ||
+- vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE)
+- vmax = _REGION1_SIZE;
+- else
+- vmax = _REGION2_SIZE;
+-
+- /* keep vmemmap_off aligned to a top level region table entry */
+- rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE;
+- MODULES_END = vmax;
+- if (is_prot_virt_host()) {
+- /*
+- * forcing modules and vmalloc area under the ultravisor
+- * secure storage limit, so that any vmalloc allocation
+- * we do could be used to back secure guest storage.
+- */
+- adjust_to_uv_max(&MODULES_END);
+- }
+-
+-#ifdef CONFIG_KASAN
+- if (MODULES_END < vmax) {
+- /* force vmalloc and modules below kasan shadow */
+- MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
++ vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
++ _REGION2_SIZE) {
++ MODULES_END = _REGION1_SIZE;
++ rte_size = _REGION2_SIZE;
+ } else {
+- /*
+- * leave vmalloc and modules above kasan shadow but make
+- * sure they don't overlap with it
+- */
+- vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN);
+- vmalloc_size_verified = true;
+- vspace_left = KASAN_SHADOW_START;
++ MODULES_END = _REGION2_SIZE;
++ rte_size = _REGION3_SIZE;
+ }
++ /*
++ * forcing modules and vmalloc area under the ultravisor
++ * secure storage limit, so that any vmalloc allocation
++ * we do could be used to back secure guest storage.
++ */
++ adjust_to_uv_max(&MODULES_END);
++#ifdef CONFIG_KASAN
++ /* force vmalloc and modules below kasan shadow */
++ MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+ #endif
+ MODULES_VADDR = MODULES_END - MODULES_LEN;
+ VMALLOC_END = MODULES_VADDR;
+
+- if (vmalloc_size_verified) {
+- VMALLOC_START = VMALLOC_END - vmalloc_size;
+- } else {
+- vmemmap_off = round_up(ident_map_size, rte_size);
++ /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
++ vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
++ VMALLOC_START = VMALLOC_END - vmalloc_size;
+
+- if (vmemmap_off + vmemmap_size > VMALLOC_END ||
+- vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) {
+- /*
+- * allow vmalloc area to occupy up to 1/2 of
+- * the rest virtual space left.
+- */
+- vmalloc_size = min(vmalloc_size, VMALLOC_END / 2);
+- }
+- VMALLOC_START = VMALLOC_END - vmalloc_size;
+- vspace_left = VMALLOC_START;
+- }
+-
+- pages = vspace_left / (PAGE_SIZE + sizeof(struct page));
++ /* split remaining virtual space between 1:1 mapping & vmemmap array */
++ pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ pages = SECTION_ALIGN_UP(pages);
+- vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size);
+- /* keep vmemmap left most starting from a fresh region table entry */
+- vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size));
+- /* take care that identity map is lower then vmemmap */
+- ident_map_size = min(ident_map_size, vmemmap_off);
++ /* keep vmemmap_start aligned to a top level region table entry */
++ vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
++ /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
++ vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
++ /* make sure identity map doesn't overlay with vmemmap */
++ ident_map_size = min(ident_map_size, vmemmap_start);
+ vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
+- VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START);
+- vmemmap = (struct page *)vmemmap_off;
++ /* make sure vmemmap doesn't overlay with vmalloc area */
++ VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
++ vmemmap = (struct page *)vmemmap_start;
+ }
+
+ /*
+@@ -259,6 +234,12 @@ static void offset_vmlinux_info(unsigned long offset)
+ vmlinux.dynsym_start += offset;
+ }
+
++static unsigned long reserve_amode31(unsigned long safe_addr)
++{
++ __amode31_base = PAGE_ALIGN(safe_addr);
++ return safe_addr + vmlinux.amode31_size;
++}
++
+ void startup_kernel(void)
+ {
+ unsigned long random_lma;
+@@ -273,6 +254,7 @@ void startup_kernel(void)
+ setup_lpp();
+ store_ipl_parmblock();
+ safe_addr = mem_safe_offset();
++ safe_addr = reserve_amode31(safe_addr);
+ safe_addr = read_ipl_report(safe_addr);
+ uv_query_info();
+ rescue_initrd(safe_addr);
+diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
+index 54c7536f2482d..1023e9d43d443 100644
+--- a/arch/s390/crypto/aes_s390.c
++++ b/arch/s390/crypto/aes_s390.c
+@@ -701,7 +701,7 @@ static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw,
+ unsigned int nbytes)
+ {
+ gw->walk_bytes_remain -= nbytes;
+- scatterwalk_unmap(&gw->walk);
++ scatterwalk_unmap(gw->walk_ptr);
+ scatterwalk_advance(&gw->walk, nbytes);
+ scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain);
+ gw->walk_ptr = NULL;
+@@ -776,7 +776,7 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
+ goto out;
+ }
+
+- scatterwalk_unmap(&gw->walk);
++ scatterwalk_unmap(gw->walk_ptr);
+ gw->walk_ptr = NULL;
+
+ gw->ptr = gw->buf;
+diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
+index 56007c763902a..1f2d40993c4d2 100644
+--- a/arch/s390/crypto/arch_random.c
++++ b/arch/s390/crypto/arch_random.c
+@@ -4,232 +4,15 @@
+ *
+ * Copyright IBM Corp. 2017, 2020
+ * Author(s): Harald Freudenberger
+- *
+- * The s390_arch_random_generate() function may be called from random.c
+- * in interrupt context. So this implementation does the best to be very
+- * fast. There is a buffer of random data which is asynchronously checked
+- * and filled by a workqueue thread.
+- * If there are enough bytes in the buffer the s390_arch_random_generate()
+- * just delivers these bytes. Otherwise false is returned until the
+- * worker thread refills the buffer.
+- * The worker fills the rng buffer by pulling fresh entropy from the
+- * high quality (but slow) true hardware random generator. This entropy
+- * is then spread over the buffer with an pseudo random generator PRNG.
+- * As the arch_get_random_seed_long() fetches 8 bytes and the calling
+- * function add_interrupt_randomness() counts this as 1 bit entropy the
+- * distribution needs to make sure there is in fact 1 bit entropy contained
+- * in 8 bytes of the buffer. The current values pull 32 byte entropy
+- * and scatter this into a 2048 byte buffer. So 8 byte in the buffer
+- * will contain 1 bit of entropy.
+- * The worker thread is rescheduled based on the charge level of the
+- * buffer but at least with 500 ms delay to avoid too much CPU consumption.
+- * So the max. amount of rng data delivered via arch_get_random_seed is
+- * limited to 4k bytes per second.
+ */
+
+ #include <linux/kernel.h>
+ #include <linux/atomic.h>
+ #include <linux/random.h>
+-#include <linux/slab.h>
+ #include <linux/static_key.h>
+-#include <linux/workqueue.h>
+-#include <linux/moduleparam.h>
+ #include <asm/cpacf.h>
+
+ DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
+
+ atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
+ EXPORT_SYMBOL(s390_arch_random_counter);
+-
+-#define ARCH_REFILL_TICKS (HZ/2)
+-#define ARCH_PRNG_SEED_SIZE 32
+-#define ARCH_RNG_BUF_SIZE 2048
+-
+-static DEFINE_SPINLOCK(arch_rng_lock);
+-static u8 *arch_rng_buf;
+-static unsigned int arch_rng_buf_idx;
+-
+-static void arch_rng_refill_buffer(struct work_struct *);
+-static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer);
+-
+-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes)
+-{
+- /* max hunk is ARCH_RNG_BUF_SIZE */
+- if (nbytes > ARCH_RNG_BUF_SIZE)
+- return false;
+-
+- /* lock rng buffer */
+- if (!spin_trylock(&arch_rng_lock))
+- return false;
+-
+- /* try to resolve the requested amount of bytes from the buffer */
+- arch_rng_buf_idx -= nbytes;
+- if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) {
+- memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes);
+- atomic64_add(nbytes, &s390_arch_random_counter);
+- spin_unlock(&arch_rng_lock);
+- return true;
+- }
+-
+- /* not enough bytes in rng buffer, refill is done asynchronously */
+- spin_unlock(&arch_rng_lock);
+-
+- return false;
+-}
+-EXPORT_SYMBOL(s390_arch_random_generate);
+-
+-static void arch_rng_refill_buffer(struct work_struct *unused)
+-{
+- unsigned int delay = ARCH_REFILL_TICKS;
+-
+- spin_lock(&arch_rng_lock);
+- if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) {
+- /* buffer is exhausted and needs refill */
+- u8 seed[ARCH_PRNG_SEED_SIZE];
+- u8 prng_wa[240];
+- /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */
+- cpacf_trng(NULL, 0, seed, sizeof(seed));
+- /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */
+- memset(prng_wa, 0, sizeof(prng_wa));
+- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+- &prng_wa, NULL, 0, seed, sizeof(seed));
+- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+- &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0);
+- arch_rng_buf_idx = ARCH_RNG_BUF_SIZE;
+- }
+- delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE;
+- spin_unlock(&arch_rng_lock);
+-
+- /* kick next check */
+- queue_delayed_work(system_long_wq, &arch_rng_work, delay);
+-}
+-
+-/*
+- * Here follows the implementation of s390_arch_get_random_long().
+- *
+- * The random longs to be pulled by arch_get_random_long() are
+- * prepared in an 4K buffer which is filled from the NIST 800-90
+- * compliant s390 drbg. By default the random long buffer is refilled
+- * 256 times before the drbg itself needs a reseed. The reseed of the
+- * drbg is done with 32 bytes fetched from the high quality (but slow)
+- * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256
+- * bits of entropy are spread over 256 * 4KB = 1MB serving 131072
+- * arch_get_random_long() invocations before reseeded.
+- *
+- * How often the 4K random long buffer is refilled with the drbg
+- * before the drbg is reseeded can be adjusted. There is a module
+- * parameter 's390_arch_rnd_long_drbg_reseed' accessible via
+- * /sys/module/arch_random/parameters/rndlong_drbg_reseed
+- * or as kernel command line parameter
+- * arch_random.rndlong_drbg_reseed=<value>
+- * This parameter tells how often the drbg fills the 4K buffer before
+- * it is re-seeded by fresh entropy from the trng.
+- * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64
+- * KB with 32 bytes of fresh entropy pulled from the trng. So a value
+- * of 16 would result in 256 bits entropy per 64 KB.
+- * A value of 256 results in 1MB of drbg output before a reseed of the
+- * drbg is done. So this would spread the 256 bits of entropy among 1MB.
+- * Setting this parameter to 0 forces the reseed to take place every
+- * time the 4K buffer is depleted, so the entropy rises to 256 bits
+- * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With
+- * setting this parameter to negative values all this effort is
+- * disabled, arch_get_random long() returns false and thus indicating
+- * that the arch_get_random_long() feature is disabled at all.
+- */
+-
+-static unsigned long rndlong_buf[512];
+-static DEFINE_SPINLOCK(rndlong_lock);
+-static int rndlong_buf_index;
+-
+-static int rndlong_drbg_reseed = 256;
+-module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600);
+-MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed");
+-
+-static inline void refill_rndlong_buf(void)
+-{
+- static u8 prng_ws[240];
+- static int drbg_counter;
+-
+- if (--drbg_counter < 0) {
+- /* need to re-seed the drbg */
+- u8 seed[32];
+-
+- /* fetch seed from trng */
+- cpacf_trng(NULL, 0, seed, sizeof(seed));
+- /* seed drbg */
+- memset(prng_ws, 0, sizeof(prng_ws));
+- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+- &prng_ws, NULL, 0, seed, sizeof(seed));
+- /* re-init counter for drbg */
+- drbg_counter = rndlong_drbg_reseed;
+- }
+-
+- /* fill the arch_get_random_long buffer from drbg */
+- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws,
+- (u8 *) rndlong_buf, sizeof(rndlong_buf),
+- NULL, 0);
+-}
+-
+-bool s390_arch_get_random_long(unsigned long *v)
+-{
+- bool rc = false;
+- unsigned long flags;
+-
+- /* arch_get_random_long() disabled ? */
+- if (rndlong_drbg_reseed < 0)
+- return false;
+-
+- /* try to lock the random long lock */
+- if (!spin_trylock_irqsave(&rndlong_lock, flags))
+- return false;
+-
+- if (--rndlong_buf_index >= 0) {
+- /* deliver next long value from the buffer */
+- *v = rndlong_buf[rndlong_buf_index];
+- rc = true;
+- goto out;
+- }
+-
+- /* buffer is depleted and needs refill */
+- if (in_interrupt()) {
+- /* delay refill in interrupt context to next caller */
+- rndlong_buf_index = 0;
+- goto out;
+- }
+-
+- /* refill random long buffer */
+- refill_rndlong_buf();
+- rndlong_buf_index = ARRAY_SIZE(rndlong_buf);
+-
+- /* and provide one random long */
+- *v = rndlong_buf[--rndlong_buf_index];
+- rc = true;
+-
+-out:
+- spin_unlock_irqrestore(&rndlong_lock, flags);
+- return rc;
+-}
+-EXPORT_SYMBOL(s390_arch_get_random_long);
+-
+-static int __init s390_arch_random_init(void)
+-{
+- /* all the needed PRNO subfunctions available ? */
+- if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) &&
+- cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
+-
+- /* alloc arch random working buffer */
+- arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL);
+- if (!arch_rng_buf)
+- return -ENOMEM;
+-
+- /* kick worker queue job to fill the random buffer */
+- queue_delayed_work(system_long_wq,
+- &arch_rng_work, ARCH_REFILL_TICKS);
+-
+- /* enable arch random to the outside world */
+- static_branch_enable(&s390_arch_random_available);
+- }
+-
+- return 0;
+-}
+-arch_initcall(s390_arch_random_init);
+diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
+index a279b7d23a5e2..621322eb0e681 100644
+--- a/arch/s390/crypto/paes_s390.c
++++ b/arch/s390/crypto/paes_s390.c
+@@ -35,7 +35,7 @@
+ * and padding is also possible, the limits need to be generous.
+ */
+ #define PAES_MIN_KEYSIZE 16
+-#define PAES_MAX_KEYSIZE 320
++#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE
+
+ static u8 *ctrblk;
+ static DEFINE_MUTEX(ctrblk_lock);
+diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
+index f0bc4dc3e9bf0..6511d15ace45e 100644
+--- a/arch/s390/hypfs/hypfs_diag.c
++++ b/arch/s390/hypfs/hypfs_diag.c
+@@ -437,7 +437,7 @@ __init int hypfs_diag_init(void)
+ int rc;
+
+ if (diag204_probe()) {
+- pr_err("The hardware system does not support hypfs\n");
++ pr_info("The hardware system does not support hypfs\n");
+ return -ENODATA;
+ }
+
+diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
+index 33f973ff97442..e8f15dbb89d02 100644
+--- a/arch/s390/hypfs/hypfs_vm.c
++++ b/arch/s390/hypfs/hypfs_vm.c
+@@ -20,6 +20,7 @@
+
+ static char local_guest[] = " ";
+ static char all_guests[] = "* ";
++static char *all_groups = all_guests;
+ static char *guest_query;
+
+ struct diag2fc_data {
+@@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr)
+
+ memcpy(parm_list.userid, query, NAME_LEN);
+ ASCEBC(parm_list.userid, NAME_LEN);
+- parm_list.addr = (unsigned long) addr ;
++ memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
++ ASCEBC(parm_list.aci_grp, NAME_LEN);
++ parm_list.addr = (unsigned long)addr;
+ parm_list.size = size;
+ parm_list.fmt = 0x02;
+- memset(parm_list.aci_grp, 0x40, NAME_LEN);
+ rc = -1;
+
+ diag_stat_inc(DIAG_STAT_X2FC);
+diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
+index 5c97f48cea91d..ee919bfc81867 100644
+--- a/arch/s390/hypfs/inode.c
++++ b/arch/s390/hypfs/inode.c
+@@ -496,9 +496,9 @@ fail_hypfs_sprp_exit:
+ hypfs_vm_exit();
+ fail_hypfs_diag_exit:
+ hypfs_diag_exit();
++ pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+ fail_dbfs_exit:
+ hypfs_dbfs_exit();
+- pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+ return rc;
+ }
+ device_initcall(hypfs_init)
+diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
+index 3afbee21dc1f1..859e6d87b108b 100644
+--- a/arch/s390/include/asm/ap.h
++++ b/arch/s390/include/asm/ap.h
+@@ -236,7 +236,10 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
+ union {
+ unsigned long value;
+ struct ap_qirq_ctrl qirqctrl;
+- struct ap_queue_status status;
++ struct {
++ u32 _pad;
++ struct ap_queue_status status;
++ };
+ } reg1;
+ void *reg2 = ind;
+
+@@ -250,7 +253,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ : [reg1] "+&d" (reg1)
+ : [reg0] "d" (reg0), [reg2] "d" (reg2)
+- : "cc", "0", "1", "2");
++ : "cc", "memory", "0", "1", "2");
+
+ return reg1.status;
+ }
+@@ -287,7 +290,10 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
+ unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22);
+ union {
+ unsigned long value;
+- struct ap_queue_status status;
++ struct {
++ u32 _pad;
++ struct ap_queue_status status;
++ };
+ } reg1;
+ unsigned long reg2;
+
+diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
+index 5dc712fde3c7f..4120c428dc378 100644
+--- a/arch/s390/include/asm/archrandom.h
++++ b/arch/s390/include/asm/archrandom.h
+@@ -2,7 +2,7 @@
+ /*
+ * Kernel interface for the s390 arch_random_* functions
+ *
+- * Copyright IBM Corp. 2017, 2020
++ * Copyright IBM Corp. 2017, 2022
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+@@ -14,18 +14,15 @@
+ #ifdef CONFIG_ARCH_RANDOM
+
+ #include <linux/static_key.h>
++#include <linux/preempt.h>
+ #include <linux/atomic.h>
++#include <asm/cpacf.h>
+
+ DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
+ extern atomic64_t s390_arch_random_counter;
+
+-bool s390_arch_get_random_long(unsigned long *v);
+-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
+-
+ static inline bool __must_check arch_get_random_long(unsigned long *v)
+ {
+- if (static_branch_likely(&s390_arch_random_available))
+- return s390_arch_get_random_long(v);
+ return false;
+ }
+
+@@ -36,16 +33,22 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
+
+ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+ {
+- if (static_branch_likely(&s390_arch_random_available)) {
+- return s390_arch_random_generate((u8 *)v, sizeof(*v));
++ if (static_branch_likely(&s390_arch_random_available) &&
++ in_task()) {
++ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
++ atomic64_add(sizeof(*v), &s390_arch_random_counter);
++ return true;
+ }
+ return false;
+ }
+
+ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ {
+- if (static_branch_likely(&s390_arch_random_available)) {
+- return s390_arch_random_generate((u8 *)v, sizeof(*v));
++ if (static_branch_likely(&s390_arch_random_available) &&
++ in_task()) {
++ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
++ atomic64_add(sizeof(*v), &s390_arch_random_counter);
++ return true;
+ }
+ return false;
+ }
+diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
+index 1effac6a01520..1c4f585dd39b6 100644
+--- a/arch/s390/include/asm/cio.h
++++ b/arch/s390/include/asm/cio.h
+@@ -369,7 +369,7 @@ void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
+ struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
+
+ /* Function from drivers/s390/cio/chsc.c */
+-int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
++int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta);
+ int chsc_sstpi(void *page, void *result, size_t size);
+ int chsc_stzi(void *page, void *result, size_t size);
+ int chsc_sgib(u32 origin);
+diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
+index 0d90cbeb89b43..a0914bc6c9bdd 100644
+--- a/arch/s390/include/asm/cpu_mf.h
++++ b/arch/s390/include/asm/cpu_mf.h
+@@ -128,19 +128,21 @@ struct hws_combined_entry {
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+ } __packed;
+
+-struct hws_trailer_entry {
+- union {
+- struct {
+- unsigned int f:1; /* 0 - Block Full Indicator */
+- unsigned int a:1; /* 1 - Alert request control */
+- unsigned int t:1; /* 2 - Timestamp format */
+- unsigned int :29; /* 3 - 31: Reserved */
+- unsigned int bsdes:16; /* 32-47: size of basic SDE */
+- unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
+- };
+- unsigned long long flags; /* 0 - 63: All indicators */
++union hws_trailer_header {
++ struct {
++ unsigned int f:1; /* 0 - Block Full Indicator */
++ unsigned int a:1; /* 1 - Alert request control */
++ unsigned int t:1; /* 2 - Timestamp format */
++ unsigned int :29; /* 3 - 31: Reserved */
++ unsigned int bsdes:16; /* 32-47: size of basic SDE */
++ unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
++ unsigned long long overflow; /* 64 - Overflow Count */
+ };
+- unsigned long long overflow; /* 64 - sample Overflow count */
++ __uint128_t val;
++};
++
++struct hws_trailer_entry {
++ union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */
+ unsigned char timestamp[16]; /* 16 - 31 timestamp */
+ unsigned long long reserved1; /* 32 -Reserved */
+ unsigned long long reserved2; /* */
+@@ -287,14 +289,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+ }
+
+-#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
+-#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+-
+ /* Return TOD timestamp contained in an trailer entry */
+ static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+ {
+ /* TOD in STCKE format */
+- if (te->t)
++ if (te->header.t)
+ return *((unsigned long long *) &te->timestamp[1]);
+
+ /* TOD in STCK format */
+diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
+index 04dc65f8901dc..80b93c06a2bbe 100644
+--- a/arch/s390/include/asm/ctl_reg.h
++++ b/arch/s390/include/asm/ctl_reg.h
+@@ -72,8 +72,17 @@ static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
+ __ctl_load(reg, cr, cr);
+ }
+
+-void smp_ctl_set_bit(int cr, int bit);
+-void smp_ctl_clear_bit(int cr, int bit);
++void smp_ctl_set_clear_bit(int cr, int bit, bool set);
++
++static inline void ctl_set_bit(int cr, int bit)
++{
++ smp_ctl_set_clear_bit(cr, bit, true);
++}
++
++static inline void ctl_clear_bit(int cr, int bit)
++{
++ smp_ctl_set_clear_bit(cr, bit, false);
++}
+
+ union ctlreg0 {
+ unsigned long val;
+@@ -128,8 +137,5 @@ union ctlreg15 {
+ };
+ };
+
+-#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
+-#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
+-
+ #endif /* __ASSEMBLY__ */
+ #endif /* __ASM_CTL_REG_H */
+diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
+index 19a55e1e3a0c5..5fc91a90657e7 100644
+--- a/arch/s390/include/asm/debug.h
++++ b/arch/s390/include/asm/debug.h
+@@ -4,8 +4,8 @@
+ *
+ * Copyright IBM Corp. 1999, 2020
+ */
+-#ifndef DEBUG_H
+-#define DEBUG_H
++#ifndef _ASM_S390_DEBUG_H
++#define _ASM_S390_DEBUG_H
+
+ #include <linux/string.h>
+ #include <linux/spinlock.h>
+@@ -487,4 +487,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas);
+
+ #endif /* MODULE */
+
+-#endif /* DEBUG_H */
++#endif /* _ASM_S390_DEBUG_H */
+diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
+index 16dc57dd90b30..8511f0e59290f 100644
+--- a/arch/s390/include/asm/extable.h
++++ b/arch/s390/include/asm/extable.h
+@@ -69,8 +69,13 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+ {
+ a->fixup = b->fixup + delta;
+ b->fixup = tmp.fixup - delta;
+- a->handler = b->handler + delta;
+- b->handler = tmp.handler - delta;
++ a->handler = b->handler;
++ if (a->handler)
++ a->handler += delta;
++ b->handler = tmp.handler;
++ if (b->handler)
++ b->handler -= delta;
+ }
++#define swap_ex_entry_fixup swap_ex_entry_fixup
+
+ #endif
+diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
+index c22debfcebf12..bf15767b729f9 100644
+--- a/arch/s390/include/asm/futex.h
++++ b/arch/s390/include/asm/futex.h
+@@ -16,7 +16,8 @@
+ "3: jl 1b\n" \
+ " lhi %0,0\n" \
+ "4: sacf 768\n" \
+- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
++ EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
++ EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
+ "=m" (*uaddr) \
+ : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
+diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
+index 40264f60b0da9..f4073106e1f39 100644
+--- a/arch/s390/include/asm/gmap.h
++++ b/arch/s390/include/asm/gmap.h
+@@ -148,4 +148,6 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr);
+ int gmap_mark_unmergeable(void);
+ void s390_reset_acc(struct mm_struct *mm);
++void s390_unlist_old_asce(struct gmap *gmap);
++int s390_replace_asce(struct gmap *gmap);
+ #endif /* _ASM_S390_GMAP_H */
+diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
+index 60f9241e5e4a6..d3642fb634bd9 100644
+--- a/arch/s390/include/asm/hugetlb.h
++++ b/arch/s390/include/asm/hugetlb.h
+@@ -28,9 +28,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ static inline int prepare_hugepage_range(struct file *file,
+ unsigned long addr, unsigned long len)
+ {
+- if (len & ~HPAGE_MASK)
++ struct hstate *h = hstate_file(file);
++
++ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+- if (addr & ~HPAGE_MASK)
++ if (addr & ~huge_page_mask(h))
+ return -EINVAL;
+ return 0;
+ }
+diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
+index ea398a05f6432..63098df81c9f2 100644
+--- a/arch/s390/include/asm/kexec.h
++++ b/arch/s390/include/asm/kexec.h
+@@ -9,6 +9,8 @@
+ #ifndef _S390_KEXEC_H
+ #define _S390_KEXEC_H
+
++#include <linux/module.h>
++
+ #include <asm/processor.h>
+ #include <asm/page.h>
+ #include <asm/setup.h>
+@@ -74,7 +76,21 @@ void *kexec_file_add_components(struct kimage *image,
+ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+ unsigned long addr);
+
++#define ARCH_HAS_KIMAGE_ARCH
++
++struct kimage_arch {
++ void *ipl_buf;
++};
++
+ extern const struct kexec_file_ops s390_kexec_image_ops;
+ extern const struct kexec_file_ops s390_kexec_elf_ops;
+
++#ifdef CONFIG_KEXEC_FILE
++struct purgatory_info;
++int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
++ Elf_Shdr *section,
++ const Elf_Shdr *relsec,
++ const Elf_Shdr *symtab);
++#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
++#endif
+ #endif /*_S390_KEXEC_H */
+diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
+index 3c89279d2a4b1..147a8d547ef9e 100644
+--- a/arch/s390/include/asm/os_info.h
++++ b/arch/s390/include/asm/os_info.h
+@@ -39,7 +39,7 @@ u32 os_info_csum(struct os_info *os_info);
+
+ #ifdef CONFIG_CRASH_DUMP
+ void *os_info_old_entry(int nr, unsigned long *size);
+-int copy_oldmem_kernel(void *dst, void *src, size_t count);
++int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+ #else
+ static inline void *os_info_old_entry(int nr, unsigned long *size)
+ {
+diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
+index e4dc64cc9c555..287bb88f76986 100644
+--- a/arch/s390/include/asm/pci_io.h
++++ b/arch/s390/include/asm/pci_io.h
+@@ -14,12 +14,13 @@
+
+ /* I/O Map */
+ #define ZPCI_IOMAP_SHIFT 48
+-#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL
++#define ZPCI_IOMAP_ADDR_SHIFT 62
++#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT)
+ #define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1)
+ #define ZPCI_IOMAP_MAX_ENTRIES \
+- ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
++ (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT))
+ #define ZPCI_IOMAP_ADDR_IDX_MASK \
+- (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
++ ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK)
+
+ struct zpci_iomap_entry {
+ u32 fh;
+diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
+index cb5fc06904354..081837b391e35 100644
+--- a/arch/s390/include/asm/percpu.h
++++ b/arch/s390/include/asm/percpu.h
+@@ -31,7 +31,7 @@
+ pcp_op_T__ *ptr__; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+- prev__ = *ptr__; \
++ prev__ = READ_ONCE(*ptr__); \
+ do { \
+ old__ = prev__; \
+ new__ = old__ op (val); \
+diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
+index d9d5350cc3ec3..bf15da0fedbca 100644
+--- a/arch/s390/include/asm/preempt.h
++++ b/arch/s390/include/asm/preempt.h
+@@ -46,10 +46,17 @@ static inline bool test_preempt_need_resched(void)
+
+ static inline void __preempt_count_add(int val)
+ {
+- if (__builtin_constant_p(val) && (val >= -128) && (val <= 127))
+- __atomic_add_const(val, &S390_lowcore.preempt_count);
+- else
+- __atomic_add(val, &S390_lowcore.preempt_count);
++ /*
++ * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
++ * enabled, gcc 12 fails to handle __builtin_constant_p().
++ */
++ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
++ if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
++ __atomic_add_const(val, &S390_lowcore.preempt_count);
++ return;
++ }
++ }
++ __atomic_add(val, &S390_lowcore.preempt_count);
+ }
+
+ static inline void __preempt_count_sub(int val)
+diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
+index 879b8e3f609cd..d7ca76bb2720f 100644
+--- a/arch/s390/include/asm/processor.h
++++ b/arch/s390/include/asm/processor.h
+@@ -318,14 +318,21 @@ extern void (*s390_base_pgm_handler_fn)(void);
+
+ #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
+
+-extern int memcpy_real(void *, void *, size_t);
++extern int memcpy_real(void *, unsigned long, size_t);
+ extern void memcpy_absolute(void *, void *, size_t);
+
+-#define mem_assign_absolute(dest, val) do { \
+- __typeof__(dest) __tmp = (val); \
+- \
+- BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \
+- memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
++#define put_abs_lowcore(member, x) do { \
++ unsigned long __abs_address = offsetof(struct lowcore, member); \
++ __typeof__(((struct lowcore *)0)->member) __tmp = (x); \
++ \
++ memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \
++} while (0)
++
++#define get_abs_lowcore(x, member) do { \
++ unsigned long __abs_address = offsetof(struct lowcore, member); \
++ __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \
++ \
++ memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \
+ } while (0)
+
+ extern int s390_isolate_bp(void);
+diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
+index 50d9b04ecbd14..bc50ee0e91ff1 100644
+--- a/arch/s390/include/asm/timex.h
++++ b/arch/s390/include/asm/timex.h
+@@ -201,6 +201,7 @@ static inline cycles_t get_cycles(void)
+ {
+ return (cycles_t) get_tod_clock() >> 2;
+ }
++#define get_cycles get_cycles
+
+ int get_phys_clock(unsigned long *clock);
+ void init_cpu_timer(void);
+diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
+index ce550d06abc36..3379694e9a42f 100644
+--- a/arch/s390/include/asm/uaccess.h
++++ b/arch/s390/include/asm/uaccess.h
+@@ -245,7 +245,7 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
+ return __clear_user(to, n);
+ }
+
+-int copy_to_user_real(void __user *dest, void *src, unsigned long count);
++int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count);
+ void *s390_kernel_write(void *dst, const void *src, size_t size);
+
+ #define HAVE_GET_KERNEL_NOFAULT
+diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
+index b57da93385888..9242d7ad71e79 100644
+--- a/arch/s390/kernel/asm-offsets.c
++++ b/arch/s390/kernel/asm-offsets.c
+@@ -128,6 +128,8 @@ int main(void)
+ OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
+ /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+ OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
++ OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info);
++ OFFSET(__LC_OS_INFO, lowcore, os_info);
+ /* hardware defined lowcore locations 0x1000 - 0x18ff */
+ OFFSET(__LC_MCESAD, lowcore, mcesad);
+ OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
+diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
+index d72a6df058d79..8722bd07c6079 100644
+--- a/arch/s390/kernel/crash_dump.c
++++ b/arch/s390/kernel/crash_dump.c
+@@ -44,7 +44,7 @@ struct save_area {
+ u64 fprs[16];
+ u32 fpc;
+ u32 prefix;
+- u64 todpreg;
++ u32 todpreg;
+ u64 timer;
+ u64 todcmp;
+ u64 vxrs_low[16];
+@@ -132,28 +132,27 @@ static inline void *load_real_addr(void *addr)
+ /*
+ * Copy memory of the old, dumped system to a kernel space virtual address
+ */
+-int copy_oldmem_kernel(void *dst, void *src, size_t count)
++int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+ {
+- unsigned long from, len;
++ unsigned long len;
+ void *ra;
+ int rc;
+
+ while (count) {
+- from = __pa(src);
+- if (!oldmem_data.start && from < sclp.hsa_size) {
++ if (!oldmem_data.start && src < sclp.hsa_size) {
+ /* Copy from zfcp/nvme dump HSA area */
+- len = min(count, sclp.hsa_size - from);
+- rc = memcpy_hsa_kernel(dst, from, len);
++ len = min(count, sclp.hsa_size - src);
++ rc = memcpy_hsa_kernel(dst, src, len);
+ if (rc)
+ return rc;
+ } else {
+ /* Check for swapped kdump oldmem areas */
+- if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
+- from -= oldmem_data.start;
+- len = min(count, oldmem_data.size - from);
+- } else if (oldmem_data.start && from < oldmem_data.size) {
+- len = min(count, oldmem_data.size - from);
+- from += oldmem_data.start;
++ if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
++ src -= oldmem_data.start;
++ len = min(count, oldmem_data.size - src);
++ } else if (oldmem_data.start && src < oldmem_data.size) {
++ len = min(count, oldmem_data.size - src);
++ src += oldmem_data.start;
+ } else {
+ len = count;
+ }
+@@ -163,7 +162,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count)
+ } else {
+ ra = dst;
+ }
+- if (memcpy_real(ra, (void *) from, len))
++ if (memcpy_real(ra, src, len))
+ return -EFAULT;
+ }
+ dst += len;
+@@ -176,31 +175,30 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count)
+ /*
+ * Copy memory of the old, dumped system to a user space virtual address
+ */
+-static int copy_oldmem_user(void __user *dst, void *src, size_t count)
++static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
+ {
+- unsigned long from, len;
++ unsigned long len;
+ int rc;
+
+ while (count) {
+- from = __pa(src);
+- if (!oldmem_data.start && from < sclp.hsa_size) {
++ if (!oldmem_data.start && src < sclp.hsa_size) {
+ /* Copy from zfcp/nvme dump HSA area */
+- len = min(count, sclp.hsa_size - from);
+- rc = memcpy_hsa_user(dst, from, len);
++ len = min(count, sclp.hsa_size - src);
++ rc = memcpy_hsa_user(dst, src, len);
+ if (rc)
+ return rc;
+ } else {
+ /* Check for swapped kdump oldmem areas */
+- if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) {
+- from -= oldmem_data.size;
+- len = min(count, oldmem_data.size - from);
+- } else if (oldmem_data.start && from < oldmem_data.size) {
+- len = min(count, oldmem_data.size - from);
+- from += oldmem_data.start;
++ if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
++ src -= oldmem_data.start;
++ len = min(count, oldmem_data.size - src);
++ } else if (oldmem_data.start && src < oldmem_data.size) {
++ len = min(count, oldmem_data.size - src);
++ src += oldmem_data.start;
+ } else {
+ len = count;
+ }
+- rc = copy_to_user_real(dst, (void *) from, count);
++ rc = copy_to_user_real(dst, src, len);
+ if (rc)
+ return rc;
+ }
+@@ -217,12 +215,12 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count)
+ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+ {
+- void *src;
++ unsigned long src;
+ int rc;
+
+ if (!csize)
+ return 0;
+- src = (void *) (pfn << PAGE_SHIFT) + offset;
++ src = pfn_to_phys(pfn) + offset;
+ if (userbuf)
+ rc = copy_oldmem_user((void __force __user *) buf, src, csize);
+ else
+@@ -429,10 +427,10 @@ static void *nt_prpsinfo(void *ptr)
+ static void *get_vmcoreinfo_old(unsigned long *size)
+ {
+ char nt_name[11], *vmcoreinfo;
++ unsigned long addr;
+ Elf64_Nhdr note;
+- void *addr;
+
+- if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
++ if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr)))
+ return NULL;
+ memset(nt_name, 0, sizeof(nt_name));
+ if (copy_oldmem_kernel(&note, addr, sizeof(note)))
+diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
+index db1bc00229caf..272ef8597e208 100644
+--- a/arch/s390/kernel/dumpstack.c
++++ b/arch/s390/kernel/dumpstack.c
+@@ -224,5 +224,5 @@ void die(struct pt_regs *regs, const char *str)
+ if (panic_on_oops)
+ panic("Fatal exception: panic_on_oops");
+ oops_exit();
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
+index 4c9b967290ae0..d530eb4dc413f 100644
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -248,6 +248,10 @@ ENTRY(sie64a)
+ BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ .Lsie_entry:
+ sie 0(%r14)
++# Let the next instruction be NOP to avoid triggering a machine check
++# and handling it in a guest as result of the instruction execution.
++ nopr 7
++.Lsie_leave:
+ BPOFF
+ BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ .Lsie_skip:
+@@ -536,7 +540,7 @@ ENTRY(mcck_int_handler)
+ jno .Lmcck_panic
+ #if IS_ENABLED(CONFIG_KVM)
+ OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f
+- OUTSIDE %r9,.Lsie_entry,.Lsie_skip,4f
++ OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f
+ oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+ j 5f
+ 4: CHKSTG .Lmcck_panic
+diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
+index 7f2696e8d511e..6083090be1f46 100644
+--- a/arch/s390/kernel/entry.h
++++ b/arch/s390/kernel/entry.h
+@@ -70,5 +70,6 @@ extern struct exception_table_entry _stop_amode31_ex_table[];
+ #define __amode31_data __section(".amode31.data")
+ #define __amode31_ref __section(".amode31.refs")
+ extern long _start_amode31_refs[], _end_amode31_refs[];
++extern unsigned long __amode31_base;
+
+ #endif /* _ENTRY_H */
+diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
+index 1d94ffdf347bb..5d0c45c13b5fa 100644
+--- a/arch/s390/kernel/ftrace.c
++++ b/arch/s390/kernel/ftrace.c
+@@ -80,17 +80,6 @@ asm(
+
+ #ifdef CONFIG_MODULES
+ static char *ftrace_plt;
+-
+-asm(
+- " .data\n"
+- "ftrace_plt_template:\n"
+- " basr %r1,%r0\n"
+- " lg %r1,0f-.(%r1)\n"
+- " br %r1\n"
+- "0: .quad ftrace_caller\n"
+- "ftrace_plt_template_end:\n"
+- " .previous\n"
+-);
+ #endif /* CONFIG_MODULES */
+
+ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
+@@ -116,7 +105,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
+
+ bool ftrace_need_init_nop(void)
+ {
+- return ftrace_shared_hotpatch_trampoline(NULL);
++ return true;
+ }
+
+ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+@@ -175,28 +164,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ return 0;
+ }
+
+-static void ftrace_generate_nop_insn(struct ftrace_insn *insn)
+-{
+- /* brcl 0,0 */
+- insn->opc = 0xc004;
+- insn->disp = 0;
+-}
+-
+-static void ftrace_generate_call_insn(struct ftrace_insn *insn,
+- unsigned long ip)
+-{
+- unsigned long target;
+-
+- /* brasl r0,ftrace_caller */
+- target = FTRACE_ADDR;
+-#ifdef CONFIG_MODULES
+- if (is_module_addr((void *)ip))
+- target = (unsigned long)ftrace_plt;
+-#endif /* CONFIG_MODULES */
+- insn->opc = 0xc005;
+- insn->disp = (target - ip) / 2;
+-}
+-
+ static void brcl_disable(void *brcl)
+ {
+ u8 op = 0x04; /* set mask field to zero */
+@@ -207,23 +174,7 @@ static void brcl_disable(void *brcl)
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+ {
+- struct ftrace_insn orig, new, old;
+-
+- if (ftrace_shared_hotpatch_trampoline(NULL)) {
+- brcl_disable((void *)rec->ip);
+- return 0;
+- }
+-
+- if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
+- return -EFAULT;
+- /* Replace ftrace call with a nop. */
+- ftrace_generate_call_insn(&orig, rec->ip);
+- ftrace_generate_nop_insn(&new);
+-
+- /* Verify that the to be replaced code matches what we expect. */
+- if (memcmp(&orig, &old, sizeof(old)))
+- return -EINVAL;
+- s390_kernel_write((void *) rec->ip, &new, sizeof(new));
++ brcl_disable((void *)rec->ip);
+ return 0;
+ }
+
+@@ -236,23 +187,7 @@ static void brcl_enable(void *brcl)
+
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+- struct ftrace_insn orig, new, old;
+-
+- if (ftrace_shared_hotpatch_trampoline(NULL)) {
+- brcl_enable((void *)rec->ip);
+- return 0;
+- }
+-
+- if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
+- return -EFAULT;
+- /* Replace nop with an ftrace call. */
+- ftrace_generate_nop_insn(&orig);
+- ftrace_generate_call_insn(&new, rec->ip);
+-
+- /* Verify that the to be replaced code matches what we expect. */
+- if (memcmp(&orig, &old, sizeof(old)))
+- return -EINVAL;
+- s390_kernel_write((void *) rec->ip, &new, sizeof(new));
++ brcl_enable((void *)rec->ip);
+ return 0;
+ }
+
+@@ -269,10 +204,7 @@ int __init ftrace_dyn_arch_init(void)
+
+ void arch_ftrace_update_code(int command)
+ {
+- if (ftrace_shared_hotpatch_trampoline(NULL))
+- ftrace_modify_all_code(command);
+- else
+- ftrace_run_stop_machine(command);
++ ftrace_modify_all_code(command);
+ }
+
+ static void __ftrace_sync(void *dummy)
+@@ -281,10 +213,8 @@ static void __ftrace_sync(void *dummy)
+
+ int ftrace_arch_code_modify_post_process(void)
+ {
+- if (ftrace_shared_hotpatch_trampoline(NULL)) {
+- /* Send SIGP to the other CPUs, so they see the new code. */
+- smp_call_function(__ftrace_sync, NULL, 1);
+- }
++ /* Send SIGP to the other CPUs, so they see the new code. */
++ smp_call_function(__ftrace_sync, NULL, 1);
+ return 0;
+ }
+
+@@ -299,10 +229,6 @@ static int __init ftrace_plt_init(void)
+ panic("cannot allocate ftrace plt\n");
+
+ start = ftrace_shared_hotpatch_trampoline(&end);
+- if (!start) {
+- start = ftrace_plt_template;
+- end = ftrace_plt_template_end;
+- }
+ memcpy(ftrace_plt, start, end - start);
+ set_memory_ro((unsigned long)ftrace_plt, 1);
+ return 0;
+diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
+index 4bf1ee293f2b3..a0da049e73609 100644
+--- a/arch/s390/kernel/idle.c
++++ b/arch/s390/kernel/idle.c
+@@ -44,7 +44,7 @@ void account_idle_time_irq(void)
+ S390_lowcore.last_update_timer = idle->timer_idle_exit;
+ }
+
+-void arch_cpu_idle(void)
++void noinstr arch_cpu_idle(void)
+ {
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long idle_time;
+diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
+index e2cc35775b996..834b1ec5dd7a0 100644
+--- a/arch/s390/kernel/ipl.c
++++ b/arch/s390/kernel/ipl.c
+@@ -502,6 +502,8 @@ static struct attribute_group ipl_ccw_attr_group_lpar = {
+
+ static struct attribute *ipl_unknown_attrs[] = {
+ &sys_ipl_type_attr.attr,
++ &sys_ipl_secure_attr.attr,
++ &sys_ipl_has_secure_attr.attr,
+ NULL,
+ };
+
+@@ -1646,8 +1648,8 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
+
+ csum = (__force unsigned int)
+ csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+- mem_assign_absolute(S390_lowcore.ipib, ipib);
+- mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
++ put_abs_lowcore(ipib, ipib);
++ put_abs_lowcore(ipib_checksum, csum);
+ dump_run(trigger);
+ }
+
+@@ -2156,7 +2158,7 @@ void *ipl_report_finish(struct ipl_report *report)
+
+ buf = vzalloc(report->size);
+ if (!buf)
+- return ERR_PTR(-ENOMEM);
++ goto out;
+ ptr = buf;
+
+ memcpy(ptr, report->ipib, report->ipib->hdr.len);
+@@ -2195,6 +2197,7 @@ void *ipl_report_finish(struct ipl_report *report)
+ }
+
+ BUG_ON(ptr > buf + report->size);
++out:
+ return buf;
+ }
+
+diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
+index 3a3145c4a3ba4..be5d432b902e0 100644
+--- a/arch/s390/kernel/irq.c
++++ b/arch/s390/kernel/irq.c
+@@ -138,7 +138,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ int from_idle;
+
+- irq_enter();
++ irq_enter_rcu();
+
+ if (user_mode(regs))
+ update_timer_sys();
+@@ -155,7 +155,8 @@ void noinstr do_io_irq(struct pt_regs *regs)
+ do_irq_async(regs, IO_INTERRUPT);
+ } while (MACHINE_IS_LPAR && irq_pending(regs));
+
+- irq_exit();
++ irq_exit_rcu();
++
+ set_irq_regs(old_regs);
+ irqentry_exit(regs, state);
+
+@@ -169,7 +170,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ int from_idle;
+
+- irq_enter();
++ irq_enter_rcu();
+
+ if (user_mode(regs))
+ update_timer_sys();
+@@ -184,7 +185,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
+
+ do_irq_async(regs, EXT_INTERRUPT);
+
+- irq_exit();
++ irq_exit_rcu();
+ set_irq_regs(old_regs);
+ irqentry_exit(regs, state);
+
+diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
+index 52d056a5f89fc..fbc0bf417ec66 100644
+--- a/arch/s390/kernel/kprobes.c
++++ b/arch/s390/kernel/kprobes.c
+@@ -7,6 +7,8 @@
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
++#define pr_fmt(fmt) "kprobes: " fmt
++
+ #include <linux/moduleloader.h>
+ #include <linux/kprobes.h>
+ #include <linux/ptrace.h>
+@@ -231,6 +233,7 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb)
+ {
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
++ kcb->prev_kprobe.kp = NULL;
+ }
+ NOKPROBE_SYMBOL(pop_kprobe);
+
+@@ -259,7 +262,7 @@ static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
+ * is a BUG. The code path resides in the .kprobes.text
+ * section and is executed with interrupts disabled.
+ */
+- pr_err("Invalid kprobe detected.\n");
++ pr_err("Failed to recover from reentered kprobes.\n");
+ dump_kprobe(p);
+ BUG();
+ }
+@@ -392,12 +395,11 @@ static int post_kprobe_handler(struct pt_regs *regs)
+ if (!p)
+ return 0;
+
++ resume_execution(p, regs);
+ if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+-
+- resume_execution(p, regs);
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
+
+diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
+index 0505e55a62979..4b95684fbe46e 100644
+--- a/arch/s390/kernel/machine_kexec.c
++++ b/arch/s390/kernel/machine_kexec.c
+@@ -227,7 +227,7 @@ void arch_crash_save_vmcoreinfo(void)
+ vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
+ vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+- mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
++ put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
+ }
+
+ void machine_shutdown(void)
+diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
+index f9e4baa64b675..c7fd818512890 100644
+--- a/arch/s390/kernel/machine_kexec_file.c
++++ b/arch/s390/kernel/machine_kexec_file.c
+@@ -12,6 +12,7 @@
+ #include <linux/kexec.h>
+ #include <linux/module_signature.h>
+ #include <linux/verification.h>
++#include <linux/vmalloc.h>
+ #include <asm/boot_data.h>
+ #include <asm/ipl.h>
+ #include <asm/setup.h>
+@@ -28,6 +29,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+ const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+ struct module_signature *ms;
+ unsigned long sig_len;
++ int ret;
+
+ /* Skip signature verification when not secure IPLed. */
+ if (!ipl_secure_flag)
+@@ -62,11 +64,18 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+ return -EBADMSG;
+ }
+
+- return verify_pkcs7_signature(kernel, kernel_len,
+- kernel + kernel_len, sig_len,
+- VERIFY_USE_PLATFORM_KEYRING,
+- VERIFYING_MODULE_SIGNATURE,
+- NULL, NULL);
++ ret = verify_pkcs7_signature(kernel, kernel_len,
++ kernel + kernel_len, sig_len,
++ VERIFY_USE_SECONDARY_KEYRING,
++ VERIFYING_MODULE_SIGNATURE,
++ NULL, NULL);
++ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
++ ret = verify_pkcs7_signature(kernel, kernel_len,
++ kernel + kernel_len, sig_len,
++ VERIFY_USE_PLATFORM_KEYRING,
++ VERIFYING_MODULE_SIGNATURE,
++ NULL, NULL);
++ return ret;
+ }
+ #endif /* CONFIG_KEXEC_SIG */
+
+@@ -170,13 +179,12 @@ static int kexec_file_add_ipl_report(struct kimage *image,
+ struct kexec_buf buf;
+ unsigned long addr;
+ void *ptr, *end;
++ int ret;
+
+ buf.image = image;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+- if (image->type == KEXEC_TYPE_CRASH)
+- buf.mem += crashk_res.start;
+
+ ptr = (void *)ipl_cert_list_addr;
+ end = ptr + ipl_cert_list_size;
+@@ -199,9 +207,13 @@ static int kexec_file_add_ipl_report(struct kimage *image,
+ ptr += len;
+ }
+
++ ret = -ENOMEM;
+ buf.buffer = ipl_report_finish(data->report);
++ if (!buf.buffer)
++ goto out;
+ buf.bufsz = data->report->size;
+ buf.memsz = buf.bufsz;
++ image->arch.ipl_buf = buf.buffer;
+
+ data->memsz += buf.memsz;
+
+@@ -209,7 +221,12 @@ static int kexec_file_add_ipl_report(struct kimage *image,
+ data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
+ *lc_ipl_parmblock_ptr = (__u32)buf.mem;
+
+- return kexec_add_buffer(&buf);
++ if (image->type == KEXEC_TYPE_CRASH)
++ buf.mem += crashk_res.start;
++
++ ret = kexec_add_buffer(&buf);
++out:
++ return ret;
+ }
+
+ void *kexec_file_add_components(struct kimage *image,
+@@ -269,6 +286,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ {
+ Elf_Rela *relas;
+ int i, r_type;
++ int ret;
+
+ relas = (void *)pi->ehdr + relsec->sh_offset;
+
+@@ -303,7 +321,15 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ addr = section->sh_addr + relas[i].r_offset;
+
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+- arch_kexec_do_relocs(r_type, loc, val, addr);
++
++ if (r_type == R_390_PLT32DBL)
++ r_type = R_390_PC32DBL;
++
++ ret = arch_kexec_do_relocs(r_type, loc, val, addr);
++ if (ret) {
++ pr_err("Unknown rela relocation: %d\n", r_type);
++ return -ENOEXEC;
++ }
+ }
+ return 0;
+ }
+@@ -321,3 +347,11 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+
+ return kexec_image_probe_default(image, buf, buf_len);
+ }
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++ vfree(image->arch.ipl_buf);
++ image->arch.ipl_buf = NULL;
++
++ return kexec_image_post_load_cleanup_default(image);
++}
+diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
+index b01ba460b7cad..b032e556eeb71 100644
+--- a/arch/s390/kernel/module.c
++++ b/arch/s390/kernel/module.c
+@@ -33,18 +33,19 @@
+ #define DEBUGP(fmt , ...)
+ #endif
+
+-#define PLT_ENTRY_SIZE 20
++#define PLT_ENTRY_SIZE 22
+
+ void *module_alloc(unsigned long size)
+ {
++ gfp_t gfp_mask = GFP_KERNEL;
+ void *p;
+
+ if (PAGE_ALIGN(size) > MODULES_LEN)
+ return NULL;
+ p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+- GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
++ gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
+ __builtin_return_address(0));
+- if (p && (kasan_module_alloc(p, size) < 0)) {
++ if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
+ vfree(p);
+ return NULL;
+ }
+@@ -340,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
+ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
+ if (info->plt_initialized == 0) {
+- unsigned int insn[5];
+- unsigned int *ip = me->core_layout.base +
+- me->arch.plt_offset +
+- info->plt_offset;
+-
+- insn[0] = 0x0d10e310; /* basr 1,0 */
+- insn[1] = 0x100a0004; /* lg 1,10(1) */
++ unsigned char insn[PLT_ENTRY_SIZE];
++ char *plt_base;
++ char *ip;
++
++ plt_base = me->core_layout.base + me->arch.plt_offset;
++ ip = plt_base + info->plt_offset;
++ *(int *)insn = 0x0d10e310; /* basr 1,0 */
++ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
+- unsigned int *ij;
+- ij = me->core_layout.base +
+- me->arch.plt_offset +
+- me->arch.plt_size - PLT_ENTRY_SIZE;
+- insn[2] = 0xa7f40000 + /* j __jump_r1 */
+- (unsigned int)(u16)
+- (((unsigned long) ij - 8 -
+- (unsigned long) ip) / 2);
++ char *jump_r1;
++
++ jump_r1 = plt_base + me->arch.plt_size -
++ PLT_ENTRY_SIZE;
++ /* brcl 0xf,__jump_r1 */
++ *(short *)&insn[8] = 0xc0f4;
++ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
+ } else {
+- insn[2] = 0x07f10000; /* br %r1 */
++ *(int *)&insn[8] = 0x07f10000; /* br %r1 */
+ }
+- insn[3] = (unsigned int) (val >> 32);
+- insn[4] = (unsigned int) val;
++ *(long *)&insn[14] = val;
+
+ write(ip, insn, sizeof(insn));
+ info->plt_initialized = 1;
+diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
+index 20f8e1868853f..d4f071e73a0a6 100644
+--- a/arch/s390/kernel/nmi.c
++++ b/arch/s390/kernel/nmi.c
+@@ -62,7 +62,7 @@ static inline unsigned long nmi_get_mcesa_size(void)
+ * The structure is required for machine check happening early in
+ * the boot process.
+ */
+-static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
++static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
+
+ void __init nmi_alloc_boot_cpu(struct lowcore *lc)
+ {
+@@ -175,7 +175,7 @@ void __s390_handle_mcck(void)
+ "malfunction (code 0x%016lx).\n", mcck.mcck_code);
+ printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+ current->comm, current->pid);
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+ }
+
+@@ -273,7 +273,14 @@ static int notrace s390_validate_registers(union mci mci, int umode)
+ /* Validate vector registers */
+ union ctlreg0 cr0;
+
+- if (!mci.vr) {
++ /*
++ * The vector validity must only be checked if not running a
++ * KVM guest. For KVM guests the machine check is forwarded by
++ * KVM and it is the responsibility of the guest to take
++ * appropriate actions. The host vector or FPU values have been
++ * saved by KVM and will be restored by KVM.
++ */
++ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
+ /*
+ * Vector registers can't be restored. If the kernel
+ * currently uses vector registers the system is
+@@ -316,11 +323,21 @@ static int notrace s390_validate_registers(union mci mci, int umode)
+ if (cr2.gse) {
+ if (!mci.gs) {
+ /*
+- * Guarded storage register can't be restored and
+- * the current processes uses guarded storage.
+- * It has to be terminated.
++ * 2 cases:
++ * - machine check in kernel or userspace
++ * - machine check while running SIE (KVM guest)
++ * For kernel or userspace the userspace values of
++ * guarded storage control can not be recreated, the
++ * process must be terminated.
++ * For SIE the guest values of guarded storage can not
++ * be recreated. This is either due to a bug or due to
++ * GS being disabled in the guest. The guest will be
++ * notified by KVM code and the guests machine check
++ * handling must take care of this. The host values
++ * are saved by KVM and are not affected.
+ */
+- kill_task = 1;
++ if (!test_cpu_flag(CIF_MCCK_GUEST))
++ kill_task = 1;
+ } else {
+ load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
+ }
+diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
+index 4bef35b79b938..1acc2e05d70f0 100644
+--- a/arch/s390/kernel/os_info.c
++++ b/arch/s390/kernel/os_info.c
+@@ -15,6 +15,7 @@
+ #include <asm/checksum.h>
+ #include <asm/lowcore.h>
+ #include <asm/os_info.h>
++#include <asm/asm-offsets.h>
+
+ /*
+ * OS info structure has to be page aligned
+@@ -45,7 +46,7 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size)
+ */
+ void os_info_entry_add(int nr, void *ptr, u64 size)
+ {
+- os_info.entry[nr].addr = (u64)(unsigned long)ptr;
++ os_info.entry[nr].addr = __pa(ptr);
+ os_info.entry[nr].size = size;
+ os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
+ os_info.csum = os_info_csum(&os_info);
+@@ -62,7 +63,7 @@ void __init os_info_init(void)
+ os_info.version_minor = OS_INFO_VERSION_MINOR;
+ os_info.magic = OS_INFO_MAGIC;
+ os_info.csum = os_info_csum(&os_info);
+- mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr);
++ put_abs_lowcore(os_info, __pa(ptr));
+ }
+
+ #ifdef CONFIG_CRASH_DUMP
+@@ -90,7 +91,7 @@ static void os_info_old_alloc(int nr, int align)
+ goto fail;
+ }
+ buf_align = PTR_ALIGN(buf, align);
+- if (copy_oldmem_kernel(buf_align, (void *) addr, size)) {
++ if (copy_oldmem_kernel(buf_align, addr, size)) {
+ msg = "copy failed";
+ goto fail_free;
+ }
+@@ -123,15 +124,14 @@ static void os_info_old_init(void)
+ return;
+ if (!oldmem_data.start)
+ goto fail;
+- if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr)))
++ if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
+ goto fail;
+ if (addr == 0 || addr % PAGE_SIZE)
+ goto fail;
+ os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+ if (!os_info_old)
+ goto fail;
+- if (copy_oldmem_kernel(os_info_old, (void *) addr,
+- sizeof(*os_info_old)))
++ if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old)))
+ goto fail_free;
+ if (os_info_old->magic != OS_INFO_MAGIC)
+ goto fail_free;
+diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
+index 4a99154fe6514..d2a2a18b55808 100644
+--- a/arch/s390/kernel/perf_cpum_cf.c
++++ b/arch/s390/kernel/perf_cpum_cf.c
+@@ -516,6 +516,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
+ return err;
+ }
+
++/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
++ * attribute::type values:
++ * - PERF_TYPE_HARDWARE:
++ * - pmu->type:
++ * Handle both type of invocations identical. They address the same hardware.
++ * The result is different when event modifiers exclude_kernel and/or
++ * exclude_user are also set.
++ */
++static int cpumf_pmu_event_type(struct perf_event *event)
++{
++ u64 ev = event->attr.config;
++
++ if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
++ cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
++ cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
++ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
++ return PERF_TYPE_HARDWARE;
++ return PERF_TYPE_RAW;
++}
++
+ static int cpumf_pmu_event_init(struct perf_event *event)
+ {
+ unsigned int type = event->attr.type;
+@@ -525,7 +545,7 @@ static int cpumf_pmu_event_init(struct perf_event *event)
+ err = __hw_perf_event_init(event, type);
+ else if (event->pmu->type == type)
+ /* Registered as unknown PMU */
+- err = __hw_perf_event_init(event, PERF_TYPE_RAW);
++ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
+ else
+ return -ENOENT;
+
+@@ -687,8 +707,10 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
+ false);
+ if (cfdiag_diffctr(cpuhw, event->hw.config_base))
+ cfdiag_push_sample(event, cpuhw);
+- } else
++ } else if (cpuhw->flags & PMU_F_RESERVED) {
++ /* Only update when PMU not hotplugged off */
+ hw_perf_event_update(event);
++ }
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+ }
+diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
+index db62def4ef28e..4e6fadaeaa1a6 100644
+--- a/arch/s390/kernel/perf_cpum_sf.c
++++ b/arch/s390/kernel/perf_cpum_sf.c
+@@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
+
+ static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+ {
+- unsigned long sdb, *trailer;
++ struct hws_trailer_entry *te;
++ unsigned long sdb;
+
+ /* Allocate and initialize sample-data-block */
+ sdb = get_zeroed_page(gfp_flags);
+ if (!sdb)
+ return -ENOMEM;
+- trailer = trailer_entry_ptr(sdb);
+- *trailer = SDB_TE_ALERT_REQ_MASK;
++ te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
++ te->header.a = 1;
+
+ /* Link SDB into the sample-data-block-table */
+ *sdbt = sdb;
+@@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ "%s: Found unknown"
+ " sampling data entry: te->f %i"
+ " basic.def %#4x (%p)\n", __func__,
+- te->f, sample->def, sample);
++ te->header.f, sample->def, sample);
+ /* Sample slot is not yet written or other record.
+ *
+ * This condition can occur if the buffer was reused
+@@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ * that are not full. Stop processing if the first
+ * invalid format was detected.
+ */
+- if (!te->f)
++ if (!te->header.f)
+ break;
+ }
+
+@@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ }
+ }
+
++static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
++{
++ asm volatile(
++ " cdsg %[old],%[new],%[ptr]\n"
++ : [old] "+d" (old), [ptr] "+QS" (*ptr)
++ : [new] "d" (new)
++ : "memory", "cc");
++ return old;
++}
++
+ /* hw_perf_event_update() - Process sampling buffer
+ * @event: The perf event
+ * @flush_all: Flag to also flush partially filled sample-data-blocks
+@@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ */
+ static void hw_perf_event_update(struct perf_event *event, int flush_all)
+ {
++ unsigned long long event_overflow, sampl_overflow, num_sdb;
++ union hws_trailer_header old, prev, new;
+ struct hw_perf_event *hwc = &event->hw;
+ struct hws_trailer_entry *te;
+ unsigned long *sdbt;
+- unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+ int done;
+
+ /*
+@@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+ /* Leave loop if no more work to do (block full indicator) */
+- if (!te->f) {
++ if (!te->header.f) {
+ done = 1;
+ if (!flush_all)
+ break;
+ }
+
+ /* Check the sample overflow count */
+- if (te->overflow)
++ if (te->header.overflow)
+ /* Account sample overflows and, if a particular limit
+ * is reached, extend the sampling buffer.
+ * For details, see sfb_account_overflows().
+ */
+- sampl_overflow += te->overflow;
++ sampl_overflow += te->header.overflow;
+
+ /* Timestamps are valid for full sample-data-blocks only */
+ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
+ "overflow %llu timestamp %#llx\n",
+- __func__, (unsigned long)sdbt, te->overflow,
+- (te->f) ? trailer_timestamp(te) : 0ULL);
++ __func__, (unsigned long)sdbt, te->header.overflow,
++ (te->header.f) ? trailer_timestamp(te) : 0ULL);
+
+ /* Collect all samples from a single sample-data-block and
+ * flag if an (perf) event overflow happened. If so, the PMU
+@@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
+ num_sdb++;
+
+ /* Reset trailer (using compare-double-and-swap) */
++ /* READ_ONCE() 16 byte header */
++ prev.val = __cdsg(&te->header.val, 0, 0);
+ do {
+- te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+- te_flags |= SDB_TE_ALERT_REQ_MASK;
+- } while (!cmpxchg_double(&te->flags, &te->overflow,
+- te->flags, te->overflow,
+- te_flags, 0ULL));
++ old.val = prev.val;
++ new.val = prev.val;
++ new.f = 0;
++ new.a = 1;
++ new.overflow = 0;
++ prev.val = __cdsg(&te->header.val, old.val, new.val);
++ } while (prev.val != old.val);
+
+ /* Advance to next sample-data-block */
+ sdbt++;
+@@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle)
+ range_scan = AUX_SDB_NUM_ALERT(aux);
+ for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+- if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
++ if (!te->header.f)
+ break;
+ }
+ /* i is num of SDBs which are full */
+@@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle)
+
+ /* Remove alert indicators in the buffer */
+ te = aux_sdb_trailer(aux, aux->alert_mark);
+- te->flags &= ~SDB_TE_ALERT_REQ_MASK;
++ te->header.a = 0;
+
+ debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
+ __func__, i, range_scan, aux->head);
+@@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle,
+ idx = aux->empty_mark + 1;
+ for (i = 0; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+- te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
+- SDB_TE_ALERT_REQ_MASK);
+- te->overflow = 0;
++ te->header.f = 0;
++ te->header.a = 0;
++ te->header.overflow = 0;
+ }
+ /* Save the position of empty SDBs */
+ aux->empty_mark = aux->head + range - 1;
+@@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
+ /* Set alert indicator */
+ aux->alert_mark = aux->head + range/2 - 1;
+ te = aux_sdb_trailer(aux, aux->alert_mark);
+- te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
++ te->header.a = 1;
+
+ /* Reset hardware buffer head */
+ head = AUX_SDB_INDEX(aux, aux->head);
+@@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle,
+ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+ unsigned long long *overflow)
+ {
+- unsigned long long orig_overflow, orig_flags, new_flags;
++ union hws_trailer_header old, prev, new;
+ struct hws_trailer_entry *te;
+
+ te = aux_sdb_trailer(aux, alert_index);
++ /* READ_ONCE() 16 byte header */
++ prev.val = __cdsg(&te->header.val, 0, 0);
+ do {
+- orig_flags = te->flags;
+- *overflow = orig_overflow = te->overflow;
+- if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
++ old.val = prev.val;
++ new.val = prev.val;
++ *overflow = old.overflow;
++ if (old.f) {
+ /*
+ * SDB is already set by hardware.
+ * Abort and try to set somewhere
+@@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+ */
+ return false;
+ }
+- new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
+- } while (!cmpxchg_double(&te->flags, &te->overflow,
+- orig_flags, orig_overflow,
+- new_flags, 0ULL));
++ new.a = 1;
++ new.overflow = 0;
++ prev.val = __cdsg(&te->header.val, old.val, new.val);
++ } while (prev.val != old.val);
+ return true;
+ }
+
+@@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
+ unsigned long long *overflow)
+ {
+- unsigned long long orig_overflow, orig_flags, new_flags;
+ unsigned long i, range_scan, idx, idx_old;
++ union hws_trailer_header old, prev, new;
++ unsigned long long orig_overflow;
+ struct hws_trailer_entry *te;
+
+ debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
+@@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
+ idx_old = idx = aux->empty_mark + 1;
+ for (i = 0; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
++ /* READ_ONCE() 16 byte header */
++ prev.val = __cdsg(&te->header.val, 0, 0);
+ do {
+- orig_flags = te->flags;
+- orig_overflow = te->overflow;
+- new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
++ old.val = prev.val;
++ new.val = prev.val;
++ orig_overflow = old.overflow;
++ new.f = 0;
++ new.overflow = 0;
+ if (idx == aux->alert_mark)
+- new_flags |= SDB_TE_ALERT_REQ_MASK;
++ new.a = 1;
+ else
+- new_flags &= ~SDB_TE_ALERT_REQ_MASK;
+- } while (!cmpxchg_double(&te->flags, &te->overflow,
+- orig_flags, orig_overflow,
+- new_flags, 0ULL));
++ new.a = 0;
++ prev.val = __cdsg(&te->header.val, old.val, new.val);
++ } while (prev.val != old.val);
+ *overflow += orig_overflow;
+ }
+
+diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
+index ea7729bebaa07..a7f8db73984b0 100644
+--- a/arch/s390/kernel/perf_event.c
++++ b/arch/s390/kernel/perf_event.c
+@@ -30,7 +30,7 @@ static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+ if (!stack)
+ return NULL;
+
+- return (struct kvm_s390_sie_block *) stack->empty1[0];
++ return (struct kvm_s390_sie_block *)stack->empty1[1];
+ }
+
+ static bool is_in_guest(struct pt_regs *regs)
+diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
+index 350e94d0cac23..d015cb1027fa1 100644
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+
+ memcpy(dst, src, arch_task_struct_size);
+ dst->thread.fpu.regs = dst->thread.fpu.fprs;
++
++ /*
++ * Don't transfer over the runtime instrumentation or the guarded
++ * storage control block pointers. These fields are cleared here instead
++ * of in copy_thread() to avoid premature freeing of associated memory
++ * on fork() failure. Wait to clear the RI flag because ->stack still
++ * refers to the source thread.
++ */
++ dst->thread.ri_cb = NULL;
++ dst->thread.gs_cb = NULL;
++ dst->thread.gs_bc_cb = NULL;
++
+ return 0;
+ }
+
+@@ -149,13 +161,11 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
+ frame->childregs.flags = 0;
+ if (new_stackp)
+ frame->childregs.gprs[15] = new_stackp;
+-
+- /* Don't copy runtime instrumentation info */
+- p->thread.ri_cb = NULL;
++ /*
++ * Clear the runtime instrumentation flag after the above childregs
++ * copy. The CB pointer was already cleared in arch_dup_task_struct().
++ */
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+- /* Don't copy guarded storage control block */
+- p->thread.gs_cb = NULL;
+- p->thread.gs_bc_cb = NULL;
+
+ /* Set a new TLS ? */
+ if (clone_flags & CLONE_SETTLS) {
+diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
+index 0ea3d02b378de..516c21baf3ad3 100644
+--- a/arch/s390/kernel/ptrace.c
++++ b/arch/s390/kernel/ptrace.c
+@@ -481,9 +481,7 @@ long arch_ptrace(struct task_struct *child, long request,
+ }
+ return 0;
+ case PTRACE_GET_LAST_BREAK:
+- put_user(child->thread.last_break,
+- (unsigned long __user *) data);
+- return 0;
++ return put_user(child->thread.last_break, (unsigned long __user *)data);
+ case PTRACE_ENABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+@@ -837,9 +835,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ }
+ return 0;
+ case PTRACE_GET_LAST_BREAK:
+- put_user(child->thread.last_break,
+- (unsigned int __user *) data);
+- return 0;
++ return put_user(child->thread.last_break, (unsigned int __user *)data);
+ }
+ return compat_ptrace_request(child, request, addr, data);
+ }
+diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
+index 67e5fff96ee06..4dfe37b068898 100644
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -95,10 +95,10 @@ EXPORT_SYMBOL(console_irq);
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .amode31 section.
+ */
+-unsigned long __amode31_ref __samode31 = __pa(&_samode31);
+-unsigned long __amode31_ref __eamode31 = __pa(&_eamode31);
+-unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31);
+-unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31);
++unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
++unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
++unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
++unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
+ struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
+ struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
+
+@@ -149,6 +149,7 @@ struct mem_detect_info __bootdata(mem_detect);
+ struct initrd_data __bootdata(initrd_data);
+
+ unsigned long __bootdata_preserved(__kaslr_offset);
++unsigned long __bootdata(__amode31_base);
+ unsigned int __bootdata_preserved(zlib_dfltcc_support);
+ EXPORT_SYMBOL(zlib_dfltcc_support);
+ u64 __bootdata_preserved(stfle_fac_list[16]);
+@@ -478,11 +479,12 @@ static void __init setup_lowcore_dat_off(void)
+ lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+
+ /* Setup absolute zero lowcore */
+- mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
+- mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
+- mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
+- mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
+- mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
++ put_abs_lowcore(restart_stack, lc->restart_stack);
++ put_abs_lowcore(restart_fn, lc->restart_fn);
++ put_abs_lowcore(restart_data, lc->restart_data);
++ put_abs_lowcore(restart_source, lc->restart_source);
++ put_abs_lowcore(restart_psw, lc->restart_psw);
++ put_abs_lowcore(mcesad, lc->mcesad);
+
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
+@@ -499,6 +501,7 @@ static void __init setup_lowcore_dat_off(void)
+ static void __init setup_lowcore_dat_on(void)
+ {
+ struct lowcore *lc = lowcore_ptr[0];
++ int cr;
+
+ __ctl_clear_bit(0, 28);
+ S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
+@@ -507,10 +510,10 @@ static void __init setup_lowcore_dat_on(void)
+ S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
+ __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
+ __ctl_set_bit(0, 28);
+- mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS);
+- mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw);
+- memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area,
+- sizeof(S390_lowcore.cregs_save_area));
++ put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
++ put_abs_lowcore(program_new_psw, lc->program_new_psw);
++ for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
++ put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
+ }
+
+ static struct resource code_resource = {
+@@ -633,14 +636,6 @@ static struct notifier_block kdump_mem_nb = {
+
+ #endif
+
+-/*
+- * Make sure that the area above identity mapping is protected
+- */
+-static void __init reserve_above_ident_map(void)
+-{
+- memblock_reserve(ident_map_size, ULONG_MAX);
+-}
+-
+ /*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+@@ -804,12 +799,12 @@ static void __init check_initrd(void)
+ */
+ static void __init reserve_kernel(void)
+ {
+- unsigned long start_pfn = PFN_UP(__pa(_end));
+-
+ memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+- memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP);
+- memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
+- - (unsigned long)_stext);
++ memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
++ memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
++ memblock_reserve(__amode31_base, __eamode31 - __samode31);
++ memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
++ memblock_reserve(__pa(_stext), _end - _stext);
+ }
+
+ static void __init setup_memory(void)
+@@ -824,27 +819,18 @@ static void __init setup_memory(void)
+ storage_key_init_range(start, end);
+
+ psw_set_key(PAGE_DEFAULT_KEY);
+-
+- /* Only cosmetics */
+- memblock_enforce_memory_limit(memblock_end_of_DRAM());
+ }
+
+ static void __init relocate_amode31_section(void)
+ {
+- unsigned long amode31_addr, amode31_size;
+- long amode31_offset;
++ unsigned long amode31_size = __eamode31 - __samode31;
++ long amode31_offset = __amode31_base - __samode31;
+ long *ptr;
+
+- /* Allocate a new AMODE31 capable memory region */
+- amode31_size = __eamode31 - __samode31;
+ pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
+- amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE);
+- if (!amode31_addr)
+- panic("Failed to allocate memory for AMODE31 section\n");
+- amode31_offset = amode31_addr - __samode31;
+
+ /* Move original AMODE31 section to the new one */
+- memmove((void *)amode31_addr, (void *)__samode31, amode31_size);
++ memmove((void *)__amode31_base, (void *)__samode31, amode31_size);
+ /* Zero out the old AMODE31 section to catch invalid accesses within it */
+ memset((void *)__samode31, 0, amode31_size);
+
+@@ -891,6 +877,11 @@ static void __init setup_randomness(void)
+ if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+ add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
+ memblock_free((unsigned long) vmms, PAGE_SIZE);
++
++#ifdef CONFIG_ARCH_RANDOM
++ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
++ static_branch_enable(&s390_arch_random_available);
++#endif
+ }
+
+ /*
+@@ -1005,11 +996,11 @@ void __init setup_arch(char **cmdline_p)
+ setup_control_program_code();
+
+ /* Do some memory reservations *before* memory is added to memblock */
+- reserve_above_ident_map();
+ reserve_kernel();
+ reserve_initrd();
+ reserve_certificate_list();
+ reserve_mem_detect_info();
++ memblock_set_current_limit(ident_map_size);
+ memblock_allow_resize();
+
+ /* Get information about *all* installed memory */
+diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
+index 1a04e5bdf6555..35af70ed58fc7 100644
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -328,10 +328,17 @@ static void pcpu_delegate(struct pcpu *pcpu,
+ /* Stop target cpu (if func returns this stops the current cpu). */
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ /* Restart func on the target cpu and stop the current cpu. */
+- mem_assign_absolute(lc->restart_stack, stack);
+- mem_assign_absolute(lc->restart_fn, (unsigned long) func);
+- mem_assign_absolute(lc->restart_data, (unsigned long) data);
+- mem_assign_absolute(lc->restart_source, source_cpu);
++ if (lc) {
++ lc->restart_stack = stack;
++ lc->restart_fn = (unsigned long)func;
++ lc->restart_data = (unsigned long)data;
++ lc->restart_source = source_cpu;
++ } else {
++ put_abs_lowcore(restart_stack, stack);
++ put_abs_lowcore(restart_fn, (unsigned long)func);
++ put_abs_lowcore(restart_data, (unsigned long)data);
++ put_abs_lowcore(restart_source, source_cpu);
++ }
+ __bpon();
+ asm volatile(
+ "0: sigp 0,%0,%2 # sigp restart to target cpu\n"
+@@ -572,39 +579,27 @@ static void smp_ctl_bit_callback(void *info)
+ }
+
+ static DEFINE_SPINLOCK(ctl_lock);
+-static unsigned long ctlreg;
+
+-/*
+- * Set a bit in a control register of all cpus
+- */
+-void smp_ctl_set_bit(int cr, int bit)
++void smp_ctl_set_clear_bit(int cr, int bit, bool set)
+ {
+- struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
+-
+- spin_lock(&ctl_lock);
+- memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
+- __set_bit(bit, &ctlreg);
+- memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
+- spin_unlock(&ctl_lock);
+- on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+-}
+-EXPORT_SYMBOL(smp_ctl_set_bit);
+-
+-/*
+- * Clear a bit in a control register of all cpus
+- */
+-void smp_ctl_clear_bit(int cr, int bit)
+-{
+- struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
++ struct ec_creg_mask_parms parms = { .cr = cr, };
++ u64 ctlreg;
+
++ if (set) {
++ parms.orval = 1UL << bit;
++ parms.andval = -1UL;
++ } else {
++ parms.orval = 0;
++ parms.andval = ~(1UL << bit);
++ }
+ spin_lock(&ctl_lock);
+- memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
+- __clear_bit(bit, &ctlreg);
+- memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
++ get_abs_lowcore(ctlreg, cregs_save_area[cr]);
++ ctlreg = (ctlreg & parms.andval) | parms.orval;
++ put_abs_lowcore(cregs_save_area[cr], ctlreg);
+ spin_unlock(&ctl_lock);
+ on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+ }
+-EXPORT_SYMBOL(smp_ctl_clear_bit);
++EXPORT_SYMBOL(smp_ctl_set_clear_bit);
+
+ #ifdef CONFIG_CRASH_DUMP
+
+@@ -675,7 +670,7 @@ static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
+ void *regs = (void *) page;
+
+ if (is_boot_cpu)
+- copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512);
++ copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
+ else
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page);
+ save_area_add_regs(sa, regs);
+diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
+index 4d141e2c132e5..2ea7f208f0e73 100644
+--- a/arch/s390/kernel/sthyi.c
++++ b/arch/s390/kernel/sthyi.c
+@@ -459,9 +459,9 @@ static int sthyi_update_cache(u64 *rc)
+ *
+ * Fills the destination with system information returned by the STHYI
+ * instruction. The data is generated by emulation or execution of STHYI,
+- * if available. The return value is the condition code that would be
+- * returned, the rc parameter is the return code which is passed in
+- * register R2 + 1.
++ * if available. The return value is either a negative error value or
++ * the condition code that would be returned, the rc parameter is the
++ * return code which is passed in register R2 + 1.
+ */
+ int sthyi_fill(void *dst, u64 *rc)
+ {
+diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
+index 326cb8f75f58e..f0a1484ee00b0 100644
+--- a/arch/s390/kernel/time.c
++++ b/arch/s390/kernel/time.c
+@@ -364,7 +364,7 @@ static inline int check_sync_clock(void)
+ * Apply clock delta to the global data structures.
+ * This is called once on the CPU that performed the clock sync.
+ */
+-static void clock_sync_global(unsigned long delta)
++static void clock_sync_global(long delta)
+ {
+ unsigned long now, adj;
+ struct ptff_qto qto;
+@@ -400,7 +400,7 @@ static void clock_sync_global(unsigned long delta)
+ * Apply clock delta to the per-CPU data structures of this CPU.
+ * This is called for each online CPU after the call to clock_sync_global.
+ */
+-static void clock_sync_local(unsigned long delta)
++static void clock_sync_local(long delta)
+ {
+ /* Add the delta to the clock comparator. */
+ if (S390_lowcore.clock_comparator != clock_comparator_max) {
+@@ -424,7 +424,7 @@ static void __init time_init_wq(void)
+ struct clock_sync_data {
+ atomic_t cpus;
+ int in_sync;
+- unsigned long clock_delta;
++ long clock_delta;
+ };
+
+ /*
+@@ -544,7 +544,7 @@ static int stpinfo_valid(void)
+ static int stp_sync_clock(void *data)
+ {
+ struct clock_sync_data *sync = data;
+- u64 clock_delta, flags;
++ long clock_delta, flags;
+ static int first;
+ int rc;
+
+diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
+index 58f8291950cbf..1f4f37a26c26d 100644
+--- a/arch/s390/kernel/topology.c
++++ b/arch/s390/kernel/topology.c
+@@ -96,7 +96,7 @@ out:
+ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
+ {
+ static cpumask_t mask;
+- int i;
++ unsigned int max_cpu;
+
+ cpumask_clear(&mask);
+ if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
+@@ -105,9 +105,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
+ if (topology_mode != TOPOLOGY_MODE_HW)
+ goto out;
+ cpu -= cpu % (smp_cpu_mtid + 1);
+- for (i = 0; i <= smp_cpu_mtid; i++) {
+- if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
+- cpumask_set_cpu(cpu + i, &mask);
++ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
++ for (; cpu <= max_cpu; cpu++) {
++ if (cpumask_test_cpu(cpu, &cpu_setup_mask))
++ cpumask_set_cpu(cpu, &mask);
+ }
+ out:
+ cpumask_copy(dst, &mask);
+@@ -124,25 +125,26 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
+ unsigned int core;
+
+ for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
+- unsigned int rcore;
+- int lcpu, i;
++ unsigned int max_cpu, rcore;
++ int cpu;
+
+ rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+- lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+- if (lcpu < 0)
++ cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
++ if (cpu < 0)
+ continue;
+- for (i = 0; i <= smp_cpu_mtid; i++) {
+- topo = &cpu_topology[lcpu + i];
++ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
++ for (; cpu <= max_cpu; cpu++) {
++ topo = &cpu_topology[cpu];
+ topo->drawer_id = drawer->id;
+ topo->book_id = book->id;
+ topo->socket_id = socket->id;
+ topo->core_id = rcore;
+- topo->thread_id = lcpu + i;
++ topo->thread_id = cpu;
+ topo->dedicated = tl_core->d;
+- cpumask_set_cpu(lcpu + i, &drawer->mask);
+- cpumask_set_cpu(lcpu + i, &book->mask);
+- cpumask_set_cpu(lcpu + i, &socket->mask);
+- smp_cpu_set_polarization(lcpu + i, tl_core->pp);
++ cpumask_set_cpu(cpu, &drawer->mask);
++ cpumask_set_cpu(cpu, &book->mask);
++ cpumask_set_cpu(cpu, &socket->mask);
++ smp_cpu_set_polarization(cpu, tl_core->pp);
+ }
+ }
+ }
+diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
+index bcefc2173de45..4044826d72ae5 100644
+--- a/arch/s390/kernel/traps.c
++++ b/arch/s390/kernel/traps.c
+@@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs)
+ {
+ if (user_mode(regs)) {
+ report_user_fault(regs, SIGSEGV, 0);
+- do_exit(SIGSEGV);
++ force_exit_sig(SIGSEGV);
+ } else
+ die(regs, "Unknown program exception");
+ }
+@@ -142,10 +142,10 @@ static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
+ do_trap(regs, SIGFPE, si_code, "floating point exception");
+ }
+
+-static void translation_exception(struct pt_regs *regs)
++static void translation_specification_exception(struct pt_regs *regs)
+ {
+ /* May never happen. */
+- panic("Translation exception");
++ panic("Translation-Specification Exception");
+ }
+
+ static void illegal_op(struct pt_regs *regs)
+@@ -374,7 +374,7 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
+ [0x0f] = hfp_divide_exception,
+ [0x10] = do_dat_exception,
+ [0x11] = do_dat_exception,
+- [0x12] = translation_exception,
++ [0x12] = translation_specification_exception,
+ [0x13] = special_op_exception,
+ [0x14] = default_trap_handler,
+ [0x15] = operand_exception,
+diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
+index 5a656c7b7a67a..f95ccbd396925 100644
+--- a/arch/s390/kernel/uv.c
++++ b/arch/s390/kernel/uv.c
+@@ -212,7 +212,7 @@ again:
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+- vma = find_vma(gmap->mm, uaddr);
++ vma = vma_lookup(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
+index e3e6ac5686df5..245bddfe9bc0e 100644
+--- a/arch/s390/kernel/vdso32/Makefile
++++ b/arch/s390/kernel/vdso32/Makefile
+@@ -22,7 +22,7 @@ KBUILD_AFLAGS_32 += -m31 -s
+ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+ KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+
+-LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \
++LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
+ --hash-style=both --build-id=sha1 -melf_s390 -T
+
+ $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
+index 6568de2367010..1605ba45ac4c0 100644
+--- a/arch/s390/kernel/vdso64/Makefile
++++ b/arch/s390/kernel/vdso64/Makefile
+@@ -8,8 +8,9 @@ ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
+ include $(srctree)/lib/vdso/Makefile
+ obj-vdso64 = vdso_user_wrapper.o note.o
+ obj-cvdso64 = vdso64_generic.o getcpu.o
+-CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
+-CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
++VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
++CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
++CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+
+ # Build rules
+
+@@ -24,8 +25,8 @@ KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
+ KBUILD_AFLAGS_64 += -m64 -s
+
+ KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+-KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
+-ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \
++KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin
++ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
+ --hash-style=both --build-id=sha1 -T
+
+ $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
+diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
+index 63bdb9e1bfc13..853b80770c6df 100644
+--- a/arch/s390/kernel/vmlinux.lds.S
++++ b/arch/s390/kernel/vmlinux.lds.S
+@@ -17,6 +17,8 @@
+ /* Handle ro_after_init data on our own. */
+ #define RO_AFTER_INIT_DATA
+
++#define RUNTIME_DISCARD_EXIT
++
+ #define EMITS_PT_NOTE
+
+ #include <asm-generic/vmlinux.lds.h>
+@@ -80,6 +82,7 @@ SECTIONS
+ _end_amode31_refs = .;
+ }
+
++ . = ALIGN(PAGE_SIZE);
+ _edata = .; /* End of data section */
+
+ /* will be freed after init */
+@@ -132,6 +135,7 @@ SECTIONS
+ /*
+ * Table with the patch locations to undo expolines
+ */
++ . = ALIGN(4);
+ .nospec_call_table : {
+ __nospec_call_start = . ;
+ *(.s390_indirect*)
+@@ -193,6 +197,7 @@ SECTIONS
+
+ BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+
++ . = ALIGN(PAGE_SIZE);
+ _end = . ;
+
+ /*
+@@ -212,6 +217,7 @@ SECTIONS
+ QUAD(__dynsym_start) /* dynsym_start */
+ QUAD(__rela_dyn_start) /* rela_dyn_start */
+ QUAD(__rela_dyn_end) /* rela_dyn_end */
++ QUAD(_eamode31 - _samode31) /* amode31_size */
+ } :NONE
+
+ /* Debugging sections. */
+@@ -223,5 +229,6 @@ SECTIONS
+ DISCARDS
+ /DISCARD/ : {
+ *(.eh_frame)
++ *(.interp)
+ }
+ }
+diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
+index 807fa9da1e721..3c65b8258ae67 100644
+--- a/arch/s390/kvm/diag.c
++++ b/arch/s390/kvm/diag.c
+@@ -166,6 +166,7 @@ static int diag9c_forwarding_overrun(void)
+ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+ {
+ struct kvm_vcpu *tcpu;
++ int tcpu_cpu;
+ int tid;
+
+ tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+@@ -181,14 +182,15 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+ goto no_yield;
+
+ /* target guest VCPU already running */
+- if (READ_ONCE(tcpu->cpu) >= 0) {
++ tcpu_cpu = READ_ONCE(tcpu->cpu);
++ if (tcpu_cpu >= 0) {
+ if (!diag9c_forwarding_hz || diag9c_forwarding_overrun())
+ goto no_yield;
+
+ /* target host CPU already running */
+- if (!vcpu_is_preempted(tcpu->cpu))
++ if (!vcpu_is_preempted(tcpu_cpu))
+ goto no_yield;
+- smp_yield_cpu(tcpu->cpu);
++ smp_yield_cpu(tcpu_cpu);
+ VCPU_EVENT(vcpu, 5,
+ "diag time slice end directed to %d: yield forwarded",
+ tid);
+diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
+index 2bd8f854f1b41..458b42b50b8cb 100644
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -271,10 +271,18 @@ static int handle_prog(struct kvm_vcpu *vcpu)
+ * handle_external_interrupt - used for external interruption interceptions
+ * @vcpu: virtual cpu
+ *
+- * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
+- * the new PSW does not have external interrupts disabled. In the first case,
+- * we've got to deliver the interrupt manually, and in the second case, we
+- * drop to userspace to handle the situation there.
++ * This interception occurs if:
++ * - the CPUSTAT_EXT_INT bit was already set when the external interrupt
++ * occurred. In this case, the interrupt needs to be injected manually to
++ * preserve interrupt priority.
++ * - the external new PSW has external interrupts enabled, which will cause an
++ * interruption loop. We drop to userspace in this case.
++ *
++ * The latter case can be detected by inspecting the external mask bit in the
++ * external new psw.
++ *
++ * Under PV, only the latter case can occur, since interrupt priorities are
++ * handled in the ultravisor.
+ */
+ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+ {
+@@ -285,10 +293,18 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+
+ vcpu->stat.exit_external_interrupt++;
+
+- rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+- if (rc)
+- return rc;
+- /* We can not handle clock comparator or timer interrupt with bad PSW */
++ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
++ newpsw = vcpu->arch.sie_block->gpsw;
++ } else {
++ rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
++ if (rc)
++ return rc;
++ }
++
++ /*
++ * Clock comparator or timer interrupt with external interrupt enabled
++ * will cause interrupt loop. Drop to userspace.
++ */
+ if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+ (newpsw.mask & PSW_MASK_EXT))
+ return -EOPNOTSUPP;
+@@ -373,8 +389,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
+ */
+ int handle_sthyi(struct kvm_vcpu *vcpu)
+ {
+- int reg1, reg2, r = 0;
+- u64 code, addr, cc = 0, rc = 0;
++ int reg1, reg2, cc = 0, r = 0;
++ u64 code, addr, rc = 0;
+ struct sthyi_sctns *sctns = NULL;
+
+ if (!test_kvm_facility(vcpu->kvm, 74))
+@@ -405,7 +421,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
+ return -ENOMEM;
+
+ cc = sthyi_fill(sctns, &rc);
+-
++ if (cc < 0) {
++ free_page((unsigned long)sctns);
++ return cc;
++ }
+ out:
+ if (!cc) {
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+@@ -523,12 +542,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+
+ static int handle_pv_notification(struct kvm_vcpu *vcpu)
+ {
++ int ret;
++
+ if (vcpu->arch.sie_block->ipa == 0xb210)
+ return handle_pv_spx(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb220)
+ return handle_pv_sclp(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb9a4)
+ return handle_pv_uvc(vcpu);
++ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
++ /*
++ * Besides external call, other SIGP orders also cause a
++ * 108 (pv notify) intercept. In contrast to external call,
++ * these orders need to be emulated and hence the appropriate
++ * place to handle them is in handle_instruction().
++ * So first try kvm_s390_handle_sigp_pei() and if that isn't
++ * successful, go on with handle_instruction().
++ */
++ ret = kvm_s390_handle_sigp_pei(vcpu);
++ if (!ret)
++ return ret;
++ }
+
+ return handle_instruction(vcpu);
+ }
+diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
+index 2245f4b8d3629..ca7d09f098092 100644
+--- a/arch/s390/kvm/interrupt.c
++++ b/arch/s390/kvm/interrupt.c
+@@ -81,8 +81,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+- union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
++ union esca_sigp_ctrl new_val = {0}, old_val;
+
++ old_val = READ_ONCE(*sigp_ctrl);
+ new_val.scn = src_id;
+ new_val.c = 1;
+ old_val.c = 0;
+@@ -93,8 +94,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+- union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
++ union bsca_sigp_ctrl new_val = {0}, old_val;
+
++ old_val = READ_ONCE(*sigp_ctrl);
+ new_val.scn = src_id;
+ new_val.c = 1;
+ old_val.c = 0;
+@@ -124,16 +126,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+- union esca_sigp_ctrl old = *sigp_ctrl;
++ union esca_sigp_ctrl old;
+
++ old = READ_ONCE(*sigp_ctrl);
+ expect = old.value;
+ rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+- union bsca_sigp_ctrl old = *sigp_ctrl;
++ union bsca_sigp_ctrl old;
+
++ old = READ_ONCE(*sigp_ctrl);
+ expect = old.value;
+ rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ }
+@@ -2115,6 +2119,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
+ return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+ }
+
++int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu)
++{
++ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
++
++ return test_bit(IRQ_PEND_RESTART, &li->pending_irqs);
++}
++
+ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
+ {
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
+index 1c97493d21e10..eb97db59b2365 100644
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -1117,6 +1117,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
+ return 0;
+ }
+
++static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
++
+ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+ struct kvm_s390_vm_tod_clock gtod;
+@@ -1126,7 +1128,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+
+ if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
+ return -EINVAL;
+- kvm_s390_set_tod_clock(kvm, &gtod);
++ __kvm_s390_set_tod_clock(kvm, &gtod);
+
+ VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
+ gtod.epoch_idx, gtod.tod);
+@@ -1157,7 +1159,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
+ sizeof(gtod.tod)))
+ return -EFAULT;
+
+- kvm_s390_set_tod_clock(kvm, &gtod);
++ __kvm_s390_set_tod_clock(kvm, &gtod);
+ VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
+ return 0;
+ }
+@@ -1169,6 +1171,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+ if (attr->flags)
+ return -EINVAL;
+
++ mutex_lock(&kvm->lock);
++ /*
++ * For protected guests, the TOD is managed by the ultravisor, so trying
++ * to change it will never bring the expected results.
++ */
++ if (kvm_s390_pv_is_protected(kvm)) {
++ ret = -EOPNOTSUPP;
++ goto out_unlock;
++ }
++
+ switch (attr->attr) {
+ case KVM_S390_VM_TOD_EXT:
+ ret = kvm_s390_set_tod_ext(kvm, attr);
+@@ -1183,6 +1195,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+ ret = -ENXIO;
+ break;
+ }
++
++out_unlock:
++ mutex_unlock(&kvm->lock);
+ return ret;
+ }
+
+@@ -2015,6 +2030,10 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
+ ms = slots->memslots + slotidx;
+ ofs = 0;
+ }
++
++ if (cur_gfn < ms->base_gfn)
++ ofs = 0;
++
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
+ while ((slotidx > 0) && (ofs >= ms->npages)) {
+ slotidx--;
+@@ -3447,7 +3466,7 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
+ {
+ /* do not poll with more than halt_poll_max_steal percent of steal time */
+ if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
+- halt_poll_max_steal) {
++ READ_ONCE(halt_poll_max_steal)) {
+ vcpu->stat.halt_no_poll_steal++;
+ return true;
+ }
+@@ -3913,14 +3932,12 @@ retry:
+ return 0;
+ }
+
+-void kvm_s390_set_tod_clock(struct kvm *kvm,
+- const struct kvm_s390_vm_tod_clock *gtod)
++static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
+ {
+ struct kvm_vcpu *vcpu;
+ union tod_clock clk;
+ int i;
+
+- mutex_lock(&kvm->lock);
+ preempt_disable();
+
+ store_tod_clock_ext(&clk);
+@@ -3941,7 +3958,15 @@ void kvm_s390_set_tod_clock(struct kvm *kvm,
+
+ kvm_s390_vcpu_unblock_all(kvm);
+ preempt_enable();
++}
++
++int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
++{
++ if (!mutex_trylock(&kvm->lock))
++ return 0;
++ __kvm_s390_set_tod_clock(kvm, gtod);
+ mutex_unlock(&kvm->lock);
++ return 1;
+ }
+
+ /**
+@@ -4642,10 +4667,15 @@ int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+ }
+ }
+
+- /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
++ /*
++ * Set the VCPU to STOPPED and THEN clear the interrupt flag,
++ * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
++ * have been fully processed. This will ensure that the VCPU
++ * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
++ */
++ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
+ kvm_s390_clear_stop_irq(vcpu);
+
+- kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
+ __disable_ibs_on_vcpu(vcpu);
+
+ for (i = 0; i < online_vcpus; i++) {
+@@ -4703,6 +4733,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+ return -EINVAL;
+ if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+ return -E2BIG;
++ if (!kvm_s390_pv_cpu_is_protected(vcpu))
++ return -EINVAL;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_SIDA_READ:
+@@ -5038,6 +5070,23 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
++
++ if (!kvm->arch.migration_mode)
++ return 0;
++
++ /*
++ * Turn off migration mode when:
++ * - userspace creates a new memslot with dirty logging off,
++ * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
++ * dirty logging is turned off.
++ * Migration mode expects dirty page logging being enabled to store
++ * its dirty bitmap.
++ */
++ if (change != KVM_MR_DELETE &&
++ !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
++ WARN(kvm_s390_vm_stop_migration(kvm),
++ "Failed to stop migration mode");
++
+ return 0;
+ }
+
+diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
+index 52bc8fbaa60ac..a2fde6d69057b 100644
+--- a/arch/s390/kvm/kvm-s390.h
++++ b/arch/s390/kvm/kvm-s390.h
+@@ -326,8 +326,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
+
+ /* implemented in kvm-s390.c */
+-void kvm_s390_set_tod_clock(struct kvm *kvm,
+- const struct kvm_s390_vm_tod_clock *gtod);
++int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
+ long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
+ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+@@ -418,6 +417,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm);
+ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
+ extern struct kvm_device_ops kvm_flic_ops;
+ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
++int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu);
+ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+ int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+ void __user *buf, int len);
+diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
+index 53da4ceb16a3a..6a765fe22eafc 100644
+--- a/arch/s390/kvm/priv.c
++++ b/arch/s390/kvm/priv.c
+@@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
+- kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
++ /*
++ * To set the TOD clock the kvm lock must be taken, but the vcpu lock
++ * is already held in handle_set_clock. The usual lock order is the
++ * opposite. As SCK is deprecated and should not be used in several
++ * cases, for example when the multiple epoch facility or TOD clock
++ * steering facility is installed (see Principles of Operation), a
++ * slow path can be used. If the lock can not be taken via try_lock,
++ * the instruction will be retried via -EAGAIN at a later point in
++ * time.
++ */
++ if (!kvm_s390_try_set_tod_clock(vcpu->kvm, &gtod)) {
++ kvm_s390_retry_instr(vcpu);
++ return -EAGAIN;
++ }
+
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+@@ -397,6 +410,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
+ mmap_read_unlock(current->mm);
+ if (rc == -EFAULT)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
++ if (rc == -EAGAIN)
++ continue;
+ if (rc < 0)
+ return rc;
+ start += PAGE_SIZE;
+diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
+index c8841f476e913..b906658ffc2ed 100644
+--- a/arch/s390/kvm/pv.c
++++ b/arch/s390/kvm/pv.c
+@@ -16,18 +16,17 @@
+
+ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+ {
+- int cc = 0;
++ int cc;
+
+- if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+- cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+- UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
++ if (!kvm_s390_pv_cpu_get_handle(vcpu))
++ return 0;
++
++ cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
++
++ KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
++ vcpu->vcpu_id, *rc, *rrc);
++ WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
+
+- KVM_UV_EVENT(vcpu->kvm, 3,
+- "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+- vcpu->vcpu_id, *rc, *rrc);
+- WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x",
+- *rc, *rrc);
+- }
+ /* Intended memory leak for something that should never happen. */
+ if (!cc)
+ free_pages(vcpu->arch.pv.stor_base,
+@@ -169,10 +168,13 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+ atomic_set(&kvm->mm->context.is_protected, 0);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+- /* Inteded memory leak on "impossible" error */
+- if (!cc)
++ /* Intended memory leak on "impossible" error */
++ if (!cc) {
+ kvm_s390_pv_dealloc_vm(kvm);
+- return cc ? -EIO : 0;
++ return 0;
++ }
++ s390_replace_asce(kvm->arch.gmap);
++ return -EIO;
+ }
+
+ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+@@ -196,7 +198,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+ uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+ uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+- cc = uv_call(0, (u64)&uvcb);
++ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
+index 683036c1c92a8..52800279686c0 100644
+--- a/arch/s390/kvm/sigp.c
++++ b/arch/s390/kvm/sigp.c
+@@ -288,6 +288,34 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+
++ /*
++ * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders
++ * are processed asynchronously. Until the affected VCPU finishes
++ * its work and calls back into KVM to clear the (RESTART or STOP)
++ * interrupt, we need to return any new non-reset orders "busy".
++ *
++ * This is important because a single VCPU could issue:
++ * 1) SIGP STOP $DESTINATION
++ * 2) SIGP SENSE $DESTINATION
++ *
++ * If the SIGP SENSE would not be rejected as "busy", it could
++ * return an incorrect answer as to whether the VCPU is STOPPED
++ * or OPERATING.
++ */
++ if (order_code != SIGP_INITIAL_CPU_RESET &&
++ order_code != SIGP_CPU_RESET) {
++ /*
++ * Lockless check. Both SIGP STOP and SIGP (RE)START
++ * properly synchronize everything while processing
++ * their orders, while the guest cannot observe a
++ * difference when issuing other orders from two
++ * different VCPUs.
++ */
++ if (kvm_s390_is_stop_irq_pending(dst_vcpu) ||
++ kvm_s390_is_restart_irq_pending(dst_vcpu))
++ return SIGP_CC_BUSY;
++ }
++
+ switch (order_code) {
+ case SIGP_SENSE:
+ vcpu->stat.instruction_sigp_sense++;
+@@ -464,9 +492,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+ struct kvm_vcpu *dest_vcpu;
+ u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+
+- trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+-
+ if (order_code == SIGP_EXTERNAL_CALL) {
++ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
++
+ dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+ BUG_ON(dest_vcpu == NULL);
+
+diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
+index acda4b6fc8518..e07bc0d3df6ff 100644
+--- a/arch/s390/kvm/vsie.c
++++ b/arch/s390/kvm/vsie.c
+@@ -169,7 +169,8 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+ sizeof(struct kvm_s390_apcb0)))
+ return -EFAULT;
+
+- bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
++ bitmap_and(apcb_s, apcb_s, apcb_h,
++ BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
+
+ return 0;
+ }
+@@ -191,7 +192,8 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+ sizeof(struct kvm_s390_apcb1)))
+ return -EFAULT;
+
+- bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
++ bitmap_and(apcb_s, apcb_s, apcb_h,
++ BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
+
+ return 0;
+ }
+@@ -538,8 +540,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
+ scb_s->eca |= scb_o->eca & ECA_CEI;
+ /* Epoch Extension */
+- if (test_kvm_facility(vcpu->kvm, 139))
++ if (test_kvm_facility(vcpu->kvm, 139)) {
+ scb_s->ecd |= scb_o->ecd & ECD_MEF;
++ scb_s->epdx = scb_o->epdx;
++ }
+
+ /* etoken */
+ if (test_kvm_facility(vcpu->kvm, 156))
+diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
+index ecf327d743a03..c0635cf787e31 100644
+--- a/arch/s390/lib/test_unwind.c
++++ b/arch/s390/lib/test_unwind.c
+@@ -171,10 +171,11 @@ static noinline int unwindme_func4(struct unwindme *u)
+ }
+
+ /*
+- * trigger specification exception
++ * Trigger operation exception; use insn notation to bypass
++ * llvm's integrated assembler sanity checks.
+ */
+ asm volatile(
+- " mvcl %%r1,%%r1\n"
++ " .insn e,0x0000\n" /* illegal opcode */
+ "0: nopr %%r7\n"
+ EX_TABLE(0b, 0b)
+ :);
+diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
+index a596e69d3c474..25be1424d393b 100644
+--- a/arch/s390/lib/uaccess.c
++++ b/arch/s390/lib/uaccess.c
+@@ -212,7 +212,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
+ asm volatile(
+ " llilh 0,%[spec]\n"
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+- " jz 4f\n"
++ "6: jz 4f\n"
+ "1: algr %0,%2\n"
+ " slgr %1,%2\n"
+ " j 0b\n"
+@@ -222,12 +222,12 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
+ " clgr %0,%3\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+- " slgr %0,%3\n"
++ "7: slgr %0,%3\n"
+ " j 5f\n"
+ "4: slgr %0,%0\n"
+ "5:\n"
+- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+- : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
++ EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b)
++ : "+&a" (size), "+&a" (to), "+a" (tmp1), "=&a" (tmp2)
+ : "a" (empty_zero_page), [spec] "K" (0x81UL)
+ : "cc", "memory", "0");
+ return size;
+diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
+index 5060956b8e7d6..1bc42ce265990 100644
+--- a/arch/s390/mm/extmem.c
++++ b/arch/s390/mm/extmem.c
+@@ -289,15 +289,17 @@ segment_overlaps_others (struct dcss_segment *seg)
+
+ /*
+ * real segment loading function, called from segment_load
++ * Must return either an error code < 0, or the segment type code >= 0
+ */
+ static int
+ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end)
+ {
+ unsigned long start_addr, end_addr, dummy;
+ struct dcss_segment *seg;
+- int rc, diag_cc;
++ int rc, diag_cc, segtype;
+
+ start_addr = end_addr = 0;
++ segtype = -1;
+ seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
+ if (seg == NULL) {
+ rc = -ENOMEM;
+@@ -326,9 +328,9 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
+ seg->res_name[8] = '\0';
+ strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name));
+ seg->res->name = seg->res_name;
+- rc = seg->vm_segtype;
+- if (rc == SEG_TYPE_SC ||
+- ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
++ segtype = seg->vm_segtype;
++ if (segtype == SEG_TYPE_SC ||
++ ((segtype == SEG_TYPE_SR || segtype == SEG_TYPE_ER) && !do_nonshared))
+ seg->res->flags |= IORESOURCE_READONLY;
+
+ /* Check for overlapping resources before adding the mapping. */
+@@ -386,7 +388,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
+ out_free:
+ kfree(seg);
+ out:
+- return rc;
++ return rc < 0 ? rc : segtype;
+ }
+
+ /*
+diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
+index 212632d57db9c..c930dff312df3 100644
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -397,7 +397,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
+ flags = FAULT_FLAG_DEFAULT;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+- if (access == VM_WRITE || is_write)
++ if (is_write)
++ access = VM_WRITE;
++ if (access == VM_WRITE)
+ flags |= FAULT_FLAG_WRITE;
+ mmap_read_lock(mm);
+
+diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
+index 4d3b33ce81c62..a2c872de29a66 100644
+--- a/arch/s390/mm/gmap.c
++++ b/arch/s390/mm/gmap.c
+@@ -672,6 +672,7 @@ EXPORT_SYMBOL_GPL(gmap_fault);
+ */
+ void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+ {
++ struct vm_area_struct *vma;
+ unsigned long vmaddr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+@@ -681,11 +682,17 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+ gaddr >> PMD_SHIFT);
+ if (vmaddr) {
+ vmaddr |= gaddr & ~PMD_MASK;
++
++ vma = vma_lookup(gmap->mm, vmaddr);
++ if (!vma || is_vm_hugetlb_page(vma))
++ return;
++
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+- if (likely(ptep))
++ if (likely(ptep)) {
+ ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
+- pte_unmap_unlock(ptep, ptl);
++ pte_unmap_unlock(ptep, ptl);
++ }
+ }
+ }
+ EXPORT_SYMBOL_GPL(__gmap_zap);
+@@ -2594,6 +2601,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
+ return 0;
+ }
+
++/*
++ * Give a chance to schedule after setting a key to 256 pages.
++ * We only hold the mm lock, which is a rwsem and the kvm srcu.
++ * Both can sleep.
++ */
++static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
++ unsigned long next, struct mm_walk *walk)
++{
++ cond_resched();
++ return 0;
++}
++
+ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
+ unsigned long hmask, unsigned long next,
+ struct mm_walk *walk)
+@@ -2616,12 +2635,14 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
+ end = start + HPAGE_SIZE - 1;
+ __storage_key_init_range(start, end);
+ set_bit(PG_arch_1, &page->flags);
++ cond_resched();
+ return 0;
+ }
+
+ static const struct mm_walk_ops enable_skey_walk_ops = {
+ .hugetlb_entry = __s390_enable_skey_hugetlb,
+ .pte_entry = __s390_enable_skey_pte,
++ .pmd_entry = __s390_enable_skey_pmd,
+ };
+
+ int s390_enable_skey(void)
+@@ -2705,3 +2726,90 @@ void s390_reset_acc(struct mm_struct *mm)
+ mmput(mm);
+ }
+ EXPORT_SYMBOL_GPL(s390_reset_acc);
++
++/**
++ * s390_unlist_old_asce - Remove the topmost level of page tables from the
++ * list of page tables of the gmap.
++ * @gmap: the gmap whose table is to be removed
++ *
++ * On s390x, KVM keeps a list of all pages containing the page tables of the
++ * gmap (the CRST list). This list is used at tear down time to free all
++ * pages that are now not needed anymore.
++ *
++ * This function removes the topmost page of the tree (the one pointed to by
++ * the ASCE) from the CRST list.
++ *
++ * This means that it will not be freed when the VM is torn down, and needs
++ * to be handled separately by the caller, unless a leak is actually
++ * intended. Notice that this function will only remove the page from the
++ * list, the page will still be used as a top level page table (and ASCE).
++ */
++void s390_unlist_old_asce(struct gmap *gmap)
++{
++ struct page *old;
++
++ old = virt_to_page(gmap->table);
++ spin_lock(&gmap->guest_table_lock);
++ list_del(&old->lru);
++ /*
++ * Sometimes the topmost page might need to be "removed" multiple
++ * times, for example if the VM is rebooted into secure mode several
++ * times concurrently, or if s390_replace_asce fails after calling
++ * s390_remove_old_asce and is attempted again later. In that case
++ * the old asce has been removed from the list, and therefore it
++ * will not be freed when the VM terminates, but the ASCE is still
++ * in use and still pointed to.
++ * A subsequent call to replace_asce will follow the pointer and try
++ * to remove the same page from the list again.
++ * Therefore it's necessary that the page of the ASCE has valid
++ * pointers, so list_del can work (and do nothing) without
++ * dereferencing stale or invalid pointers.
++ */
++ INIT_LIST_HEAD(&old->lru);
++ spin_unlock(&gmap->guest_table_lock);
++}
++EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
++
++/**
++ * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
++ * @gmap: the gmap whose ASCE needs to be replaced
++ *
++ * If the allocation of the new top level page table fails, the ASCE is not
++ * replaced.
++ * In any case, the old ASCE is always removed from the gmap CRST list.
++ * Therefore the caller has to make sure to save a pointer to it
++ * beforehand, unless a leak is actually intended.
++ */
++int s390_replace_asce(struct gmap *gmap)
++{
++ unsigned long asce;
++ struct page *page;
++ void *table;
++
++ s390_unlist_old_asce(gmap);
++
++ page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
++ if (!page)
++ return -ENOMEM;
++ page->index = 0;
++ table = page_to_virt(page);
++ memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
++
++ /*
++ * The caller has to deal with the old ASCE, but here we make sure
++ * the new one is properly added to the CRST list, so that
++ * it will be freed when the VM is torn down.
++ */
++ spin_lock(&gmap->guest_table_lock);
++ list_add(&page->lru, &gmap->crst_list);
++ spin_unlock(&gmap->guest_table_lock);
++
++ /* Set new table origin while preserving existing ASCE control bits */
++ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
++ WRITE_ONCE(gmap->asce, asce);
++ WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
++ WRITE_ONCE(gmap->table, table);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(s390_replace_asce);
+diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
+index 9663ce3625bcd..2ed198b4f7d02 100644
+--- a/arch/s390/mm/maccess.c
++++ b/arch/s390/mm/maccess.c
+@@ -123,7 +123,7 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
+ /*
+ * Copy memory in real mode (kernel to kernel)
+ */
+-int memcpy_real(void *dest, void *src, size_t count)
++int memcpy_real(void *dest, unsigned long src, size_t count)
+ {
+ unsigned long _dest = (unsigned long)dest;
+ unsigned long _src = (unsigned long)src;
+@@ -175,7 +175,7 @@ void memcpy_absolute(void *dest, void *src, size_t count)
+ /*
+ * Copy memory from kernel (real) to user (virtual)
+ */
+-int copy_to_user_real(void __user *dest, void *src, unsigned long count)
++int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count)
+ {
+ int offs = 0, size, rc;
+ char *buf;
+diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
+index 781965f7210eb..91e478e09b54b 100644
+--- a/arch/s390/mm/pgalloc.c
++++ b/arch/s390/mm/pgalloc.c
+@@ -244,13 +244,15 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
+ /* Free 2K page table fragment of a 4K page */
+ bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.lock);
+- mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
++ mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
+ mask >>= 24;
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.lock);
++ mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
++ mask >>= 24;
+ if (mask != 0)
+ return;
+ } else {
+diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
+index 034721a68d8fd..df0adb7e2fe8e 100644
+--- a/arch/s390/mm/pgtable.c
++++ b/arch/s390/mm/pgtable.c
+@@ -429,22 +429,36 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
+ }
+
+ #ifdef CONFIG_PGSTE
+-static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
++static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
+ {
++ struct vm_area_struct *vma;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+- pmd_t *pmd;
++
++ /* We need a valid VMA, otherwise this is clearly a fault. */
++ vma = vma_lookup(mm, addr);
++ if (!vma)
++ return -EFAULT;
+
+ pgd = pgd_offset(mm, addr);
+- p4d = p4d_alloc(mm, pgd, addr);
+- if (!p4d)
+- return NULL;
+- pud = pud_alloc(mm, p4d, addr);
+- if (!pud)
+- return NULL;
+- pmd = pmd_alloc(mm, pud, addr);
+- return pmd;
++ if (!pgd_present(*pgd))
++ return -ENOENT;
++
++ p4d = p4d_offset(pgd, addr);
++ if (!p4d_present(*p4d))
++ return -ENOENT;
++
++ pud = pud_offset(p4d, addr);
++ if (!pud_present(*pud))
++ return -ENOENT;
++
++ /* Large PUDs are not supported yet. */
++ if (pud_large(*pud))
++ return -EFAULT;
++
++ *pmdp = pmd_offset(pud, addr);
++ return 0;
+ }
+ #endif
+
+@@ -734,7 +748,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
+ ptev = pte_val(*ptep);
+ if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
++ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
+ }
+@@ -778,8 +792,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+- pmdp = pmd_alloc_map(mm, addr);
+- if (unlikely(!pmdp))
++ if (pmd_lookup(mm, addr, &pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+@@ -881,8 +894,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
+ pte_t *ptep;
+ int cc = 0;
+
+- pmdp = pmd_alloc_map(mm, addr);
+- if (unlikely(!pmdp))
++ if (pmd_lookup(mm, addr, &pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+@@ -935,15 +947,24 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+- pmdp = pmd_alloc_map(mm, addr);
+- if (unlikely(!pmdp))
++ /*
++ * If we don't have a PTE table and if there is no huge page mapped,
++ * the storage key is 0.
++ */
++ *key = 0;
++
++ switch (pmd_lookup(mm, addr, &pmdp)) {
++ case -ENOENT:
++ return 0;
++ case 0:
++ break;
++ default:
+ return -EFAULT;
++ }
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+- /* Not yet mapped memory has a zero key */
+ spin_unlock(ptl);
+- *key = 0;
+ return 0;
+ }
+
+@@ -988,6 +1009,7 @@ EXPORT_SYMBOL(get_guest_storage_key);
+ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ unsigned long *oldpte, unsigned long *oldpgste)
+ {
++ struct vm_area_struct *vma;
+ unsigned long pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+@@ -997,6 +1019,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ WARN_ON_ONCE(orc > ESSA_MAX);
+ if (unlikely(orc > ESSA_MAX))
+ return -EINVAL;
++
++ vma = vma_lookup(mm, hva);
++ if (!vma || is_vm_hugetlb_page(vma))
++ return -EFAULT;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+@@ -1089,10 +1115,14 @@ EXPORT_SYMBOL(pgste_perform_essa);
+ int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
+ unsigned long bits, unsigned long value)
+ {
++ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pgste_t new;
+ pte_t *ptep;
+
++ vma = vma_lookup(mm, hva);
++ if (!vma || is_vm_hugetlb_page(vma))
++ return -EFAULT;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+@@ -1117,9 +1147,13 @@ EXPORT_SYMBOL(set_pgste_bits);
+ */
+ int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
+ {
++ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
++ vma = vma_lookup(mm, hva);
++ if (!vma || is_vm_hugetlb_page(vma))
++ return -EFAULT;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
+index 2b1c6d916cf9c..39912629b0619 100644
+--- a/arch/s390/mm/vmem.c
++++ b/arch/s390/mm/vmem.c
+@@ -297,7 +297,7 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start)
+ if (end > VMALLOC_START)
+ return;
+ #ifdef CONFIG_KASAN
+- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
++ if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
+ return;
+ #endif
+ pmd = pmd_offset(pud, start);
+@@ -372,7 +372,7 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start)
+ if (end > VMALLOC_START)
+ return;
+ #ifdef CONFIG_KASAN
+- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
++ if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
+ return;
+ #endif
+
+@@ -426,7 +426,7 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
+ if (end > VMALLOC_START)
+ return;
+ #ifdef CONFIG_KASAN
+- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
++ if (start < KASAN_SHADOW_END && end > KASAN_SHADOW_START)
+ return;
+ #endif
+
+diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
+index b833155ce8381..56c4cecdbbf9e 100644
+--- a/arch/s390/pci/pci.c
++++ b/arch/s390/pci/pci.c
+@@ -69,6 +69,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
+ list_for_each_entry(tmp, &zpci_list, entry) {
+ if (tmp->fid == fid) {
+ zdev = tmp;
++ zpci_zdev_get(zdev);
+ break;
+ }
+ }
+@@ -502,8 +503,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
+ return r;
+ }
+
+-int zpci_setup_bus_resources(struct zpci_dev *zdev,
+- struct list_head *resources)
++int zpci_setup_bus_resources(struct zpci_dev *zdev)
+ {
+ unsigned long addr, size, flags;
+ struct resource *res;
+@@ -539,7 +539,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ return -ENOMEM;
+ }
+ zdev->bars[i].res = res;
+- pci_add_resource(resources, res);
+ }
+ zdev->has_resources = 1;
+
+@@ -548,17 +547,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
+
+ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
+ {
++ struct resource *res;
+ int i;
+
++ pci_lock_rescan_remove();
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+- if (!zdev->bars[i].size || !zdev->bars[i].res)
++ res = zdev->bars[i].res;
++ if (!res)
+ continue;
+
++ release_resource(res);
++ pci_bus_remove_resource(zdev->zbus->bus, res);
+ zpci_free_iomap(zdev, zdev->bars[i].map_idx);
+- release_resource(zdev->bars[i].res);
+- kfree(zdev->bars[i].res);
++ zdev->bars[i].res = NULL;
++ kfree(res);
+ }
+ zdev->has_resources = 0;
++ pci_unlock_rescan_remove();
+ }
+
+ int pcibios_add_device(struct pci_dev *pdev)
+diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
+index 5d77acbd1c872..cc7e5b22ccfb3 100644
+--- a/arch/s390/pci/pci_bus.c
++++ b/arch/s390/pci/pci_bus.c
+@@ -41,9 +41,7 @@ static int zpci_nb_devices;
+ */
+ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
+ {
+- struct resource_entry *window, *n;
+- struct resource *res;
+- int rc;
++ int rc, i;
+
+ if (!zdev_enabled(zdev)) {
+ rc = zpci_enable_device(zdev);
+@@ -57,10 +55,10 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
+ }
+
+ if (!zdev->has_resources) {
+- zpci_setup_bus_resources(zdev, &zdev->zbus->resources);
+- resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) {
+- res = window->res;
+- pci_bus_add_resource(zdev->zbus->bus, res, 0);
++ zpci_setup_bus_resources(zdev);
++ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
++ if (zdev->bars[i].res)
++ pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
+ }
+ }
+
+diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
+index e359d2686178b..c5aa9a2e5e3e5 100644
+--- a/arch/s390/pci/pci_bus.h
++++ b/arch/s390/pci/pci_bus.h
+@@ -19,7 +19,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
+ void zpci_release_device(struct kref *kref);
+ static inline void zpci_zdev_put(struct zpci_dev *zdev)
+ {
+- kref_put(&zdev->kref, zpci_release_device);
++ if (zdev)
++ kref_put(&zdev->kref, zpci_release_device);
+ }
+
+ static inline void zpci_zdev_get(struct zpci_dev *zdev)
+@@ -29,8 +30,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev)
+
+ int zpci_alloc_domain(int domain);
+ void zpci_free_domain(int domain);
+-int zpci_setup_bus_resources(struct zpci_dev *zdev,
+- struct list_head *resources);
++int zpci_setup_bus_resources(struct zpci_dev *zdev);
+
+ static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus,
+ unsigned int devfn)
+diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
+index be077b39da336..5011d27461fd3 100644
+--- a/arch/s390/pci/pci_clp.c
++++ b/arch/s390/pci/pci_clp.c
+@@ -22,6 +22,8 @@
+ #include <asm/clp.h>
+ #include <uapi/asm/clp.h>
+
++#include "pci_bus.h"
++
+ bool zpci_unique_uid;
+
+ void update_uid_checking(bool new)
+@@ -403,8 +405,11 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+- if (!zdev)
+- zpci_create_device(entry->fid, entry->fh, entry->config_state);
++ if (zdev) {
++ zpci_zdev_put(zdev);
++ return;
++ }
++ zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ }
+
+ int clp_scan_pci_devices(void)
+diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
+index 5b8d647523f96..6d57625b8ed99 100644
+--- a/arch/s390/pci/pci_event.c
++++ b/arch/s390/pci/pci_event.c
+@@ -62,10 +62,12 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
+ pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+ if (!pdev)
+- return;
++ goto no_pdev;
+
+ pdev->error_state = pci_channel_io_perm_failure;
+ pci_dev_put(pdev);
++no_pdev:
++ zpci_zdev_put(zdev);
+ }
+
+ void zpci_event_error(void *data)
+@@ -94,6 +96,7 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
+ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
+ {
+ struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
++ bool existing_zdev = !!zdev;
+ enum zpci_state state;
+
+ zpci_err("avail CCDF:\n");
+@@ -156,6 +159,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
+ default:
+ break;
+ }
++ if (existing_zdev)
++ zpci_zdev_put(zdev);
+ }
+
+ void zpci_event_availability(void *data)
+diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
+index c5b35ea129cfa..b94163ee5632c 100644
+--- a/arch/s390/pci/pci_mmio.c
++++ b/arch/s390/pci/pci_mmio.c
+@@ -63,7 +63,7 @@ static inline int __pcistg_mio_inuser(
+ asm volatile (
+ " sacf 256\n"
+ "0: llgc %[tmp],0(%[src])\n"
+- " sllg %[val],%[val],8\n"
++ "4: sllg %[val],%[val],8\n"
+ " aghi %[src],1\n"
+ " ogr %[val],%[tmp]\n"
+ " brctg %[cnt],0b\n"
+@@ -71,7 +71,7 @@ static inline int __pcistg_mio_inuser(
+ "2: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "3: sacf 768\n"
+- EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
++ EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ :
+ [src] "+a" (src), [cnt] "+d" (cnt),
+ [val] "+d" (val), [tmp] "=d" (tmp),
+@@ -214,10 +214,10 @@ static inline int __pcilg_mio_inuser(
+ "2: ahi %[shift],-8\n"
+ " srlg %[tmp],%[val],0(%[shift])\n"
+ "3: stc %[tmp],0(%[dst])\n"
+- " aghi %[dst],1\n"
++ "5: aghi %[dst],1\n"
+ " brctg %[cnt],2b\n"
+ "4: sacf 768\n"
+- EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b)
++ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
+ :
+ [ioaddr_len] "+&d" (ioaddr_len.pair),
+ [cc] "+d" (cc), [val] "=d" (val),
+diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
+index 360ada80d20c3..d22ec8acb13c5 100644
+--- a/arch/s390/purgatory/Makefile
++++ b/arch/s390/purgatory/Makefile
+@@ -26,6 +26,7 @@ KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
+ KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
+ KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+ KBUILD_CFLAGS += -fno-stack-protector
++KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+ KBUILD_CFLAGS += $(CLANG_FLAGS)
+ KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
+ KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
+diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
+index 6904f4bdbf004..101b95f26a91c 100644
+--- a/arch/sh/Kconfig
++++ b/arch/sh/Kconfig
+@@ -7,6 +7,7 @@ config SUPERH
+ select ARCH_HAVE_CUSTOM_GPIO_H
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
+ select ARCH_HAS_BINFMT_FLAT if !MMU
++ select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_PTE_SPECIAL
+diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
+index 958f790273ab9..c449e7c1b20ff 100644
+--- a/arch/sh/Kconfig.debug
++++ b/arch/sh/Kconfig.debug
+@@ -15,7 +15,7 @@ config SH_STANDARD_BIOS
+
+ config STACK_DEBUG
+ bool "Check for stack overflows"
+- depends on DEBUG_KERNEL
++ depends on DEBUG_KERNEL && PRINTK
+ help
+ This option will cause messages to be printed if free stack space
+ drops below a certain limit. Saying Y here will add overhead to
+@@ -54,6 +54,7 @@ config DUMP_CODE
+
+ config DWARF_UNWINDER
+ bool "Enable the DWARF unwinder for stacktraces"
++ depends on DEBUG_KERNEL
+ select FRAME_POINTER
+ default n
+ help
+diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
+index bac8a058ebd7c..05bd42dde107b 100644
+--- a/arch/sh/boards/mach-ap325rxa/setup.c
++++ b/arch/sh/boards/mach-ap325rxa/setup.c
+@@ -530,7 +530,7 @@ static int __init ap325rxa_devices_setup(void)
+ device_initialize(&ap325rxa_ceu_device.dev);
+ dma_declare_coherent_memory(&ap325rxa_ceu_device.dev,
+ ceu_dma_membase, ceu_dma_membase,
+- ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
++ CEU_BUFFER_MEMORY_SIZE);
+
+ platform_device_add(&ap325rxa_ceu_device);
+
+diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
+index bab91a99124e1..9730a992dab33 100644
+--- a/arch/sh/boards/mach-ecovec24/setup.c
++++ b/arch/sh/boards/mach-ecovec24/setup.c
+@@ -1454,15 +1454,13 @@ static int __init arch_setup(void)
+ device_initialize(&ecovec_ceu_devices[0]->dev);
+ dma_declare_coherent_memory(&ecovec_ceu_devices[0]->dev,
+ ceu0_dma_membase, ceu0_dma_membase,
+- ceu0_dma_membase +
+- CEU_BUFFER_MEMORY_SIZE - 1);
++ CEU_BUFFER_MEMORY_SIZE);
+ platform_device_add(ecovec_ceu_devices[0]);
+
+ device_initialize(&ecovec_ceu_devices[1]->dev);
+ dma_declare_coherent_memory(&ecovec_ceu_devices[1]->dev,
+ ceu1_dma_membase, ceu1_dma_membase,
+- ceu1_dma_membase +
+- CEU_BUFFER_MEMORY_SIZE - 1);
++ CEU_BUFFER_MEMORY_SIZE);
+ platform_device_add(ecovec_ceu_devices[1]);
+
+ gpiod_add_lookup_table(&cn12_power_gpiod_table);
+diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
+index eeb5ce341efdd..4a1caa3e7cf5a 100644
+--- a/arch/sh/boards/mach-kfr2r09/setup.c
++++ b/arch/sh/boards/mach-kfr2r09/setup.c
+@@ -603,7 +603,7 @@ static int __init kfr2r09_devices_setup(void)
+ device_initialize(&kfr2r09_ceu_device.dev);
+ dma_declare_coherent_memory(&kfr2r09_ceu_device.dev,
+ ceu_dma_membase, ceu_dma_membase,
+- ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
++ CEU_BUFFER_MEMORY_SIZE);
+
+ platform_device_add(&kfr2r09_ceu_device);
+
+diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
+index 6703a2122c0d6..bd4ccd9f8dd06 100644
+--- a/arch/sh/boards/mach-migor/setup.c
++++ b/arch/sh/boards/mach-migor/setup.c
+@@ -604,7 +604,7 @@ static int __init migor_devices_setup(void)
+ device_initialize(&migor_ceu_device.dev);
+ dma_declare_coherent_memory(&migor_ceu_device.dev,
+ ceu_dma_membase, ceu_dma_membase,
+- ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
++ CEU_BUFFER_MEMORY_SIZE);
+
+ platform_device_add(&migor_ceu_device);
+
+diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
+index 8d6541ba01865..edc7712e4a804 100644
+--- a/arch/sh/boards/mach-se/7724/setup.c
++++ b/arch/sh/boards/mach-se/7724/setup.c
+@@ -940,15 +940,13 @@ static int __init devices_setup(void)
+ device_initialize(&ms7724se_ceu_devices[0]->dev);
+ dma_declare_coherent_memory(&ms7724se_ceu_devices[0]->dev,
+ ceu0_dma_membase, ceu0_dma_membase,
+- ceu0_dma_membase +
+- CEU_BUFFER_MEMORY_SIZE - 1);
++ CEU_BUFFER_MEMORY_SIZE);
+ platform_device_add(ms7724se_ceu_devices[0]);
+
+ device_initialize(&ms7724se_ceu_devices[1]->dev);
+ dma_declare_coherent_memory(&ms7724se_ceu_devices[1]->dev,
+ ceu1_dma_membase, ceu1_dma_membase,
+- ceu1_dma_membase +
+- CEU_BUFFER_MEMORY_SIZE - 1);
++ CEU_BUFFER_MEMORY_SIZE);
+ platform_device_add(ms7724se_ceu_devices[1]);
+
+ return platform_add_devices(ms7724se_devices,
+diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
+index ba887f1351be6..cd5c58916c65a 100644
+--- a/arch/sh/configs/titan_defconfig
++++ b/arch/sh/configs/titan_defconfig
+@@ -242,7 +242,6 @@ CONFIG_NFSD=y
+ CONFIG_NFSD_V3=y
+ CONFIG_SMB_FS=m
+ CONFIG_CIFS=m
+-CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_PARTITION_ADVANCED=y
+ CONFIG_NLS_CODEPAGE_437=m
+ CONFIG_NLS_ASCII=m
+diff --git a/arch/sh/drivers/dma/dma-sh.c b/arch/sh/drivers/dma/dma-sh.c
+index 96c626c2cd0a4..306fba1564e5e 100644
+--- a/arch/sh/drivers/dma/dma-sh.c
++++ b/arch/sh/drivers/dma/dma-sh.c
+@@ -18,6 +18,18 @@
+ #include <cpu/dma-register.h>
+ #include <cpu/dma.h>
+
++/*
++ * Some of the SoCs feature two DMAC modules. In such a case, the channels are
++ * distributed equally among them.
++ */
++#ifdef SH_DMAC_BASE1
++#define SH_DMAC_NR_MD_CH (CONFIG_NR_ONCHIP_DMA_CHANNELS / 2)
++#else
++#define SH_DMAC_NR_MD_CH CONFIG_NR_ONCHIP_DMA_CHANNELS
++#endif
++
++#define SH_DMAC_CH_SZ 0x10
++
+ /*
+ * Define the default configuration for dual address memory-memory transfer.
+ * The 0x400 value represents auto-request, external->external.
+@@ -29,7 +41,7 @@ static unsigned long dma_find_base(unsigned int chan)
+ unsigned long base = SH_DMAC_BASE0;
+
+ #ifdef SH_DMAC_BASE1
+- if (chan >= 6)
++ if (chan >= SH_DMAC_NR_MD_CH)
+ base = SH_DMAC_BASE1;
+ #endif
+
+@@ -40,13 +52,13 @@ static unsigned long dma_base_addr(unsigned int chan)
+ {
+ unsigned long base = dma_find_base(chan);
+
+- /* Normalize offset calculation */
+- if (chan >= 9)
+- chan -= 6;
+- if (chan >= 4)
+- base += 0x10;
++ chan = (chan % SH_DMAC_NR_MD_CH) * SH_DMAC_CH_SZ;
++
++ /* DMAOR is placed inside the channel register space. Step over it. */
++ if (chan >= DMAOR)
++ base += SH_DMAC_CH_SZ;
+
+- return base + (chan * 0x10);
++ return base + chan;
+ }
+
+ #ifdef CONFIG_SH_DMA_IRQ_MULTI
+@@ -250,12 +262,11 @@ static int sh_dmac_get_dma_residue(struct dma_channel *chan)
+ #define NR_DMAOR 1
+ #endif
+
+-/*
+- * DMAOR bases are broken out amongst channel groups. DMAOR0 manages
+- * channels 0 - 5, DMAOR1 6 - 11 (optional).
+- */
+-#define dmaor_read_reg(n) __raw_readw(dma_find_base((n)*6))
+-#define dmaor_write_reg(n, data) __raw_writew(data, dma_find_base(n)*6)
++#define dmaor_read_reg(n) __raw_readw(dma_find_base((n) * \
++ SH_DMAC_NR_MD_CH) + DMAOR)
++#define dmaor_write_reg(n, data) __raw_writew(data, \
++ dma_find_base((n) * \
++ SH_DMAC_NR_MD_CH) + DMAOR)
+
+ static inline int dmaor_reset(int no)
+ {
+diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h
+deleted file mode 100644
+index fe52abb69cea3..0000000000000
+--- a/arch/sh/include/asm/bugs.h
++++ /dev/null
+@@ -1,74 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef __ASM_SH_BUGS_H
+-#define __ASM_SH_BUGS_H
+-
+-/*
+- * This is included by init/main.c to check for architecture-dependent bugs.
+- *
+- * Needs:
+- * void check_bugs(void);
+- */
+-
+-/*
+- * I don't know of any Super-H bugs yet.
+- */
+-
+-#include <asm/processor.h>
+-
+-extern void select_idle_routine(void);
+-
+-static void __init check_bugs(void)
+-{
+- extern unsigned long loops_per_jiffy;
+- char *p = &init_utsname()->machine[2]; /* "sh" */
+-
+- select_idle_routine();
+-
+- current_cpu_data.loops_per_jiffy = loops_per_jiffy;
+-
+- switch (current_cpu_data.family) {
+- case CPU_FAMILY_SH2:
+- *p++ = '2';
+- break;
+- case CPU_FAMILY_SH2A:
+- *p++ = '2';
+- *p++ = 'a';
+- break;
+- case CPU_FAMILY_SH3:
+- *p++ = '3';
+- break;
+- case CPU_FAMILY_SH4:
+- *p++ = '4';
+- break;
+- case CPU_FAMILY_SH4A:
+- *p++ = '4';
+- *p++ = 'a';
+- break;
+- case CPU_FAMILY_SH4AL_DSP:
+- *p++ = '4';
+- *p++ = 'a';
+- *p++ = 'l';
+- *p++ = '-';
+- *p++ = 'd';
+- *p++ = 's';
+- *p++ = 'p';
+- break;
+- case CPU_FAMILY_UNKNOWN:
+- /*
+- * Specifically use CPU_FAMILY_UNKNOWN rather than
+- * default:, so we're able to have the compiler whine
+- * about unhandled enumerations.
+- */
+- break;
+- }
+-
+- printk("CPU: %s\n", get_cpu_subtype(&current_cpu_data));
+-
+-#ifndef __LITTLE_ENDIAN__
+- /* 'eb' means 'Endian Big' */
+- *p++ = 'e';
+- *p++ = 'b';
+-#endif
+- *p = '\0';
+-}
+-#endif /* __ASM_SH_BUGS_H */
+diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
+index cf9a3ec32406f..fba90e670ed41 100644
+--- a/arch/sh/include/asm/io.h
++++ b/arch/sh/include/asm/io.h
+@@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
+ #endif /* CONFIG_HAVE_IOREMAP_PROT */
+
+ #else /* CONFIG_MMU */
+-#define iounmap(addr) do { } while (0)
+-#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset))
++static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
++{
++ return (void __iomem *)(unsigned long)offset;
++}
++
++static inline void iounmap(volatile void __iomem *addr) { }
+ #endif /* CONFIG_MMU */
+
+ #define ioremap_uc ioremap
+diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
+index 3820d698846e0..97af2d9b02693 100644
+--- a/arch/sh/include/asm/processor.h
++++ b/arch/sh/include/asm/processor.h
+@@ -167,6 +167,8 @@ extern unsigned int instruction_size(unsigned int insn);
+ #define instruction_size(insn) (2)
+ #endif
+
++void select_idle_routine(void);
++
+ #endif /* __ASSEMBLY__ */
+
+ #include <asm/processor_32.h>
+diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
+index aa92cc933889d..6c7966e627758 100644
+--- a/arch/sh/include/asm/processor_32.h
++++ b/arch/sh/include/asm/processor_32.h
+@@ -50,6 +50,7 @@
+ #define SR_FD 0x00008000
+ #define SR_MD 0x40000000
+
++#define SR_USER_MASK 0x00000303 // M, Q, S, T bits
+ /*
+ * DSP structure and data
+ */
+diff --git a/arch/sh/include/asm/sections.h b/arch/sh/include/asm/sections.h
+index 8edb824049b9e..0cb0ca149ac34 100644
+--- a/arch/sh/include/asm/sections.h
++++ b/arch/sh/include/asm/sections.h
+@@ -4,7 +4,7 @@
+
+ #include <asm-generic/sections.h>
+
+-extern long __machvec_start, __machvec_end;
++extern char __machvec_start[], __machvec_end[];
+ extern char __uncached_start, __uncached_end;
+ extern char __start_eh_frame[], __stop_eh_frame[];
+
+diff --git a/arch/sh/include/asm/sfp-machine.h b/arch/sh/include/asm/sfp-machine.h
+index cbc7cf8c97ce6..2d2423478b71d 100644
+--- a/arch/sh/include/asm/sfp-machine.h
++++ b/arch/sh/include/asm/sfp-machine.h
+@@ -13,6 +13,14 @@
+ #ifndef _SFP_MACHINE_H
+ #define _SFP_MACHINE_H
+
++#ifdef __BIG_ENDIAN__
++#define __BYTE_ORDER __BIG_ENDIAN
++#define __LITTLE_ENDIAN 0
++#else
++#define __BYTE_ORDER __LITTLE_ENDIAN
++#define __BIG_ENDIAN 0
++#endif
++
+ #define _FP_W_TYPE_SIZE 32
+ #define _FP_W_TYPE unsigned long
+ #define _FP_WS_TYPE signed long
+diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
+index ae354a2931e7e..fd6db0ab19288 100644
+--- a/arch/sh/kernel/cpu/fpu.c
++++ b/arch/sh/kernel/cpu/fpu.c
+@@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs)
+ }
+
+ if (!tsk_used_math(tsk)) {
+- local_irq_enable();
++ int ret;
+ /*
+ * does a slab alloc which can sleep
+ */
+- if (init_fpu(tsk)) {
++ local_irq_enable();
++ ret = init_fpu(tsk);
++ local_irq_disable();
++ if (ret) {
+ /*
+ * ran out of memory!
+ */
+- do_group_exit(SIGKILL);
++ force_sig(SIGKILL);
+ return;
+ }
+- local_irq_disable();
+ }
+
+ grab_fpu(regs);
+diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c
+index d342ea08843f6..70a07f4f2142f 100644
+--- a/arch/sh/kernel/cpu/sh2/probe.c
++++ b/arch/sh/kernel/cpu/sh2/probe.c
+@@ -21,7 +21,7 @@ static int __init scan_cache(unsigned long node, const char *uname,
+ if (!of_flat_dt_is_compatible(node, "jcore,cache"))
+ return 0;
+
+- j2_ccr_base = (u32 __iomem *)of_flat_dt_translate_address(node);
++ j2_ccr_base = ioremap(of_flat_dt_translate_address(node), 4);
+
+ return 1;
+ }
+diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
+index d432164b23b7c..c31ec0fea3003 100644
+--- a/arch/sh/kernel/cpu/sh4/sq.c
++++ b/arch/sh/kernel/cpu/sh4/sq.c
+@@ -381,7 +381,7 @@ static int __init sq_api_init(void)
+ if (unlikely(!sq_cache))
+ return ret;
+
+- sq_bitmap = kzalloc(size, GFP_KERNEL);
++ sq_bitmap = kcalloc(size, sizeof(long), GFP_KERNEL);
+ if (unlikely(!sq_bitmap))
+ goto out;
+
+diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+index f8a2bec0f260b..1261dc7b84e8b 100644
+--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
++++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+@@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus)
+ BUILD_BUG_ON(SMP_MSG_NR >= 8);
+
+ for (i = 0; i < SMP_MSG_NR; i++)
+- request_irq(104 + i, ipi_interrupt_handler,
+- IRQF_PERCPU, "IPI", (void *)(long)i);
++ if (request_irq(104 + i, ipi_interrupt_handler,
++ IRQF_PERCPU, "IPI", (void *)(long)i))
++ pr_err("Failed to request irq %d\n", i);
+
+ for (i = 0; i < max_cpus; i++)
+ set_cpu_present(i, true);
+diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S
+index 4adbd4ade3194..b603b7968b388 100644
+--- a/arch/sh/kernel/head_32.S
++++ b/arch/sh/kernel/head_32.S
+@@ -64,7 +64,7 @@ ENTRY(_stext)
+ ldc r0, r6_bank
+ #endif
+
+-#ifdef CONFIG_OF_FLATTREE
++#ifdef CONFIG_OF_EARLY_FLATTREE
+ mov r4, r12 ! Store device tree blob pointer in r12
+ #endif
+
+@@ -315,7 +315,7 @@ ENTRY(_stext)
+ 10:
+ #endif
+
+-#ifdef CONFIG_OF_FLATTREE
++#ifdef CONFIG_OF_EARLY_FLATTREE
+ mov.l 8f, r0 ! Make flat device tree available early.
+ jsr @r0
+ mov r12, r4
+@@ -346,7 +346,7 @@ ENTRY(stack_start)
+ 5: .long start_kernel
+ 6: .long cpu_init
+ 7: .long init_thread_union
+-#if defined(CONFIG_OF_FLATTREE)
++#if defined(CONFIG_OF_EARLY_FLATTREE)
+ 8: .long sh_fdt_init
+ #endif
+
+diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
+index f59814983bd59..a80b2a5b25c7f 100644
+--- a/arch/sh/kernel/idle.c
++++ b/arch/sh/kernel/idle.c
+@@ -14,6 +14,7 @@
+ #include <linux/irqflags.h>
+ #include <linux/smp.h>
+ #include <linux/atomic.h>
++#include <asm/processor.h>
+ #include <asm/smp.h>
+ #include <asm/bl_bit.h>
+
+diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
+index d606679a211e1..57efaf5b82ae0 100644
+--- a/arch/sh/kernel/machvec.c
++++ b/arch/sh/kernel/machvec.c
+@@ -20,8 +20,8 @@
+ #define MV_NAME_SIZE 32
+
+ #define for_each_mv(mv) \
+- for ((mv) = (struct sh_machine_vector *)&__machvec_start; \
+- (mv) && (unsigned long)(mv) < (unsigned long)&__machvec_end; \
++ for ((mv) = (struct sh_machine_vector *)__machvec_start; \
++ (mv) && (unsigned long)(mv) < (unsigned long)__machvec_end; \
+ (mv)++)
+
+ static struct sh_machine_vector * __init get_mv_byname(const char *name)
+@@ -87,8 +87,8 @@ void __init sh_mv_setup(void)
+ if (!machvec_selected) {
+ unsigned long machvec_size;
+
+- machvec_size = ((unsigned long)&__machvec_end -
+- (unsigned long)&__machvec_start);
++ machvec_size = ((unsigned long)__machvec_end -
++ (unsigned long)__machvec_start);
+
+ /*
+ * Sanity check for machvec section alignment. Ensure
+@@ -102,7 +102,7 @@ void __init sh_mv_setup(void)
+ * vector (usually the only one) from .machvec.init.
+ */
+ if (machvec_size >= sizeof(struct sh_machine_vector))
+- sh_mv = *(struct sh_machine_vector *)&__machvec_start;
++ sh_mv = *(struct sh_machine_vector *)__machvec_start;
+ }
+
+ pr_notice("Booting machvec: %s\n", get_system_type());
+diff --git a/arch/sh/kernel/nmi_debug.c b/arch/sh/kernel/nmi_debug.c
+index 11777867c6f5f..a212b645b4cf8 100644
+--- a/arch/sh/kernel/nmi_debug.c
++++ b/arch/sh/kernel/nmi_debug.c
+@@ -49,7 +49,7 @@ static int __init nmi_debug_setup(char *str)
+ register_die_notifier(&nmi_debug_nb);
+
+ if (*str != '=')
+- return 0;
++ return 1;
+
+ for (p = str + 1; *p; p = sep + 1) {
+ sep = strchr(p, ',');
+@@ -70,6 +70,6 @@ static int __init nmi_debug_setup(char *str)
+ break;
+ }
+
+- return 0;
++ return 1;
+ }
+ __setup("nmi_debug", nmi_debug_setup);
+diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
+index 1fcb6659822a3..cf7c0f72f2935 100644
+--- a/arch/sh/kernel/setup.c
++++ b/arch/sh/kernel/setup.c
+@@ -43,6 +43,7 @@
+ #include <asm/smp.h>
+ #include <asm/mmu_context.h>
+ #include <asm/mmzone.h>
++#include <asm/processor.h>
+ #include <asm/sparsemem.h>
+ #include <asm/platform_early.h>
+
+@@ -244,7 +245,7 @@ void __init __weak plat_early_device_setup(void)
+ {
+ }
+
+-#ifdef CONFIG_OF_FLATTREE
++#ifdef CONFIG_OF_EARLY_FLATTREE
+ void __ref sh_fdt_init(phys_addr_t dt_phys)
+ {
+ static int done = 0;
+@@ -326,7 +327,7 @@ void __init setup_arch(char **cmdline_p)
+ /* Let earlyprintk output early console messages */
+ sh_early_platform_driver_probe("earlyprintk", 1, 1);
+
+-#ifdef CONFIG_OF_FLATTREE
++#ifdef CONFIG_OF_EARLY_FLATTREE
+ #ifdef CONFIG_USE_BUILTIN_DTB
+ unflatten_and_copy_device_tree();
+ #else
+@@ -354,3 +355,57 @@ int test_mode_pin(int pin)
+ {
+ return sh_mv.mv_mode_pins() & pin;
+ }
++
++void __init arch_cpu_finalize_init(void)
++{
++ char *p = &init_utsname()->machine[2]; /* "sh" */
++
++ select_idle_routine();
++
++ current_cpu_data.loops_per_jiffy = loops_per_jiffy;
++
++ switch (current_cpu_data.family) {
++ case CPU_FAMILY_SH2:
++ *p++ = '2';
++ break;
++ case CPU_FAMILY_SH2A:
++ *p++ = '2';
++ *p++ = 'a';
++ break;
++ case CPU_FAMILY_SH3:
++ *p++ = '3';
++ break;
++ case CPU_FAMILY_SH4:
++ *p++ = '4';
++ break;
++ case CPU_FAMILY_SH4A:
++ *p++ = '4';
++ *p++ = 'a';
++ break;
++ case CPU_FAMILY_SH4AL_DSP:
++ *p++ = '4';
++ *p++ = 'a';
++ *p++ = 'l';
++ *p++ = '-';
++ *p++ = 'd';
++ *p++ = 's';
++ *p++ = 'p';
++ break;
++ case CPU_FAMILY_UNKNOWN:
++ /*
++ * Specifically use CPU_FAMILY_UNKNOWN rather than
++ * default:, so we're able to have the compiler whine
++ * about unhandled enumerations.
++ */
++ break;
++ }
++
++ pr_info("CPU: %s\n", get_cpu_subtype(&current_cpu_data));
++
++#ifndef __LITTLE_ENDIAN__
++ /* 'eb' means 'Endian Big' */
++ *p++ = 'e';
++ *p++ = 'b';
++#endif
++ *p = '\0';
++}
+diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
+index dd3092911efad..dc13702003f0f 100644
+--- a/arch/sh/kernel/signal_32.c
++++ b/arch/sh/kernel/signal_32.c
+@@ -115,6 +115,7 @@ static int
+ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *r0_p)
+ {
+ unsigned int err = 0;
++ unsigned int sr = regs->sr & ~SR_USER_MASK;
+
+ #define COPY(x) err |= __get_user(regs->x, &sc->sc_##x)
+ COPY(regs[1]);
+@@ -130,6 +131,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *r0_p
+ COPY(sr); COPY(pc);
+ #undef COPY
+
++ regs->sr = (regs->sr & SR_USER_MASK) | sr;
++
+ #ifdef CONFIG_SH_FPU
+ if (boot_cpu_data.flags & CPU_HAS_FPU) {
+ int owned_fp;
+diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
+index e76b221570999..361b764700b74 100644
+--- a/arch/sh/kernel/traps.c
++++ b/arch/sh/kernel/traps.c
+@@ -57,7 +57,7 @@ void die(const char *str, struct pt_regs *regs, long err)
+ if (panic_on_oops)
+ panic("Fatal exception");
+
+- do_exit(SIGSEGV);
++ make_task_dead(SIGSEGV);
+ }
+
+ void die_if_kernel(const char *str, struct pt_regs *regs, long err)
+diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
+index 3161b9ccd2a57..b6276a3521d73 100644
+--- a/arch/sh/kernel/vmlinux.lds.S
++++ b/arch/sh/kernel/vmlinux.lds.S
+@@ -4,6 +4,7 @@
+ * Written by Niibe Yutaka and Paul Mundt
+ */
+ OUTPUT_ARCH(sh)
++#define RUNTIME_DISCARD_EXIT
+ #include <asm/thread_info.h>
+ #include <asm/cache.h>
+ #include <asm/vmlinux.lds.h>
+diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
+index e8be0eca0444a..615ba932c398e 100644
+--- a/arch/sh/math-emu/math.c
++++ b/arch/sh/math-emu/math.c
+@@ -467,109 +467,6 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg
+ return id_sys(fregs, regs, code);
+ }
+
+-/**
+- * denormal_to_double - Given denormalized float number,
+- * store double float
+- *
+- * @fpu: Pointer to sh_fpu_soft structure
+- * @n: Index to FP register
+- */
+-static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n)
+-{
+- unsigned long du, dl;
+- unsigned long x = fpu->fpul;
+- int exp = 1023 - 126;
+-
+- if (x != 0 && (x & 0x7f800000) == 0) {
+- du = (x & 0x80000000);
+- while ((x & 0x00800000) == 0) {
+- x <<= 1;
+- exp--;
+- }
+- x &= 0x007fffff;
+- du |= (exp << 20) | (x >> 3);
+- dl = x << 29;
+-
+- fpu->fp_regs[n] = du;
+- fpu->fp_regs[n+1] = dl;
+- }
+-}
+-
+-/**
+- * ieee_fpe_handler - Handle denormalized number exception
+- *
+- * @regs: Pointer to register structure
+- *
+- * Returns 1 when it's handled (should not cause exception).
+- */
+-static int ieee_fpe_handler(struct pt_regs *regs)
+-{
+- unsigned short insn = *(unsigned short *)regs->pc;
+- unsigned short finsn;
+- unsigned long nextpc;
+- int nib[4] = {
+- (insn >> 12) & 0xf,
+- (insn >> 8) & 0xf,
+- (insn >> 4) & 0xf,
+- insn & 0xf};
+-
+- if (nib[0] == 0xb ||
+- (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
+- regs->pr = regs->pc + 4;
+-
+- if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
+- nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
+- finsn = *(unsigned short *) (regs->pc + 2);
+- } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
+- if (regs->sr & 1)
+- nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
+- else
+- nextpc = regs->pc + 4;
+- finsn = *(unsigned short *) (regs->pc + 2);
+- } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
+- if (regs->sr & 1)
+- nextpc = regs->pc + 4;
+- else
+- nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
+- finsn = *(unsigned short *) (regs->pc + 2);
+- } else if (nib[0] == 0x4 && nib[3] == 0xb &&
+- (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
+- nextpc = regs->regs[nib[1]];
+- finsn = *(unsigned short *) (regs->pc + 2);
+- } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
+- (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
+- nextpc = regs->pc + 4 + regs->regs[nib[1]];
+- finsn = *(unsigned short *) (regs->pc + 2);
+- } else if (insn == 0x000b) { /* rts */
+- nextpc = regs->pr;
+- finsn = *(unsigned short *) (regs->pc + 2);
+- } else {
+- nextpc = regs->pc + 2;
+- finsn = insn;
+- }
+-
+- if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
+- struct task_struct *tsk = current;
+-
+- if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) {
+- /* FPU error */
+- denormal_to_double (&tsk->thread.xstate->softfpu,
+- (finsn >> 8) & 0xf);
+- tsk->thread.xstate->softfpu.fpscr &=
+- ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
+- task_thread_info(tsk)->status |= TS_USEDFPU;
+- } else {
+- force_sig_fault(SIGFPE, FPE_FLTINV,
+- (void __user *)regs->pc);
+- }
+-
+- regs->pc = nextpc;
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+ /**
+ * fpu_init - Initialize FPU registers
+ * @fpu: Pointer to software emulated FPU registers.
+diff --git a/arch/sh/math-emu/sfp-util.h b/arch/sh/math-emu/sfp-util.h
+index 784f541344f36..bda50762b3d33 100644
+--- a/arch/sh/math-emu/sfp-util.h
++++ b/arch/sh/math-emu/sfp-util.h
+@@ -67,7 +67,3 @@
+ } while (0)
+
+ #define abort() return 0
+-
+-#define __BYTE_ORDER __LITTLE_ENDIAN
+-
+-
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index b120ed947f50b..1176f0de6a0f4 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -52,6 +52,7 @@ config SPARC
+ config SPARC32
+ def_bool !64BIT
+ select ARCH_32BIT_OFF_T
++ select ARCH_HAS_CPU_FINALIZE_INIT if !SMP
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select GENERIC_ATOMIC64
+ select CLZ_TAB
+@@ -286,7 +287,7 @@ config FORCE_MAX_ZONEORDER
+ This config option is actually maximum order plus one. For example,
+ a value of 13 means that the largest free memory block is 2^12 pages.
+
+-if SPARC64
++if SPARC64 || COMPILE_TEST
+ source "kernel/power/Kconfig"
+ endif
+
+diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
+index 849236d4eca48..45e5c76d449ea 100644
+--- a/arch/sparc/boot/Makefile
++++ b/arch/sparc/boot/Makefile
+@@ -22,7 +22,7 @@ ifeq ($(CONFIG_SPARC64),y)
+
+ # Actual linking
+
+-$(obj)/zImage: $(obj)/image
++$(obj)/zImage: $(obj)/image FORCE
+ $(call if_changed,gzip)
+ @echo ' kernel: $@ is ready'
+
+@@ -31,7 +31,7 @@ $(obj)/vmlinux.aout: vmlinux FORCE
+ @echo ' kernel: $@ is ready'
+ else
+
+-$(obj)/zImage: $(obj)/image
++$(obj)/zImage: $(obj)/image FORCE
+ $(call if_changed,strip)
+ @echo ' kernel: $@ is ready'
+
+@@ -44,7 +44,7 @@ OBJCOPYFLAGS_image.bin := -S -O binary -R .note -R .comment
+ $(obj)/image.bin: $(obj)/image FORCE
+ $(call if_changed,objcopy)
+
+-$(obj)/image.gz: $(obj)/image.bin
++$(obj)/image.gz: $(obj)/image.bin FORCE
+ $(call if_changed,gzip)
+
+ UIMAGE_LOADADDR = $(CONFIG_UBOOT_LOAD_ADDR)
+@@ -56,7 +56,7 @@ quiet_cmd_uimage.o = UIMAGE.O $@
+ -r -b binary $@ -o $@.o
+
+ targets += uImage
+-$(obj)/uImage: $(obj)/image.gz
++$(obj)/uImage: $(obj)/image.gz FORCE
+ $(call if_changed,uimage)
+ $(call if_changed,uimage.o)
+ @echo ' Image $@ is ready'
+diff --git a/arch/sparc/include/asm/bugs.h b/arch/sparc/include/asm/bugs.h
+deleted file mode 100644
+index 02fa369b9c21f..0000000000000
+--- a/arch/sparc/include/asm/bugs.h
++++ /dev/null
+@@ -1,18 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/* include/asm/bugs.h: Sparc probes for various bugs.
+- *
+- * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
+- */
+-
+-#ifdef CONFIG_SPARC32
+-#include <asm/cpudata.h>
+-#endif
+-
+-extern unsigned long loops_per_jiffy;
+-
+-static void __init check_bugs(void)
+-{
+-#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP)
+- cpu_data(0).udelay_val = loops_per_jiffy;
+-#endif
+-}
+diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h
+index 542915b462097..f86326a6f89e0 100644
+--- a/arch/sparc/include/asm/timex_32.h
++++ b/arch/sparc/include/asm/timex_32.h
+@@ -9,8 +9,6 @@
+
+ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
+
+-/* XXX Maybe do something better at some point... -DaveM */
+-typedef unsigned long cycles_t;
+-#define get_cycles() (0)
++#include <asm-generic/timex.h>
+
+ #endif
+diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
+index c8e0dd99f3700..c9d1ba4f311b9 100644
+--- a/arch/sparc/kernel/setup_32.c
++++ b/arch/sparc/kernel/setup_32.c
+@@ -412,3 +412,10 @@ static int __init topology_init(void)
+ }
+
+ subsys_initcall(topology_init);
++
++#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP)
++void __init arch_cpu_finalize_init(void)
++{
++ cpu_data(0).udelay_val = loops_per_jiffy;
++}
++#endif
+diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
+index 6cc124a3bb98a..90ff7ff94ea7f 100644
+--- a/arch/sparc/kernel/signal32.c
++++ b/arch/sparc/kernel/signal32.c
+@@ -780,5 +780,6 @@ static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18);
+ static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14);
+ static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
+ static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
++static_assert(offsetof(compat_siginfo_t, si_perf_flags) == 0x18);
+ static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c);
+ static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10);
+diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
+index 02f3ad55dfe31..74f80443b195f 100644
+--- a/arch/sparc/kernel/signal_32.c
++++ b/arch/sparc/kernel/signal_32.c
+@@ -65,7 +65,7 @@ struct rt_signal_frame {
+ */
+ static inline bool invalid_frame_pointer(void __user *fp, int fplen)
+ {
+- if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen))
++ if ((((unsigned long) fp) & 15) || !access_ok(fp, fplen))
+ return true;
+
+ return false;
+@@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs,
+ get_sigframe(ksig, regs, sigframe_size);
+
+ if (invalid_frame_pointer(sf, sigframe_size)) {
+- do_exit(SIGILL);
++ force_exit_sig(SIGILL);
+ return -EINVAL;
+ }
+
+@@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs,
+ sf = (struct rt_signal_frame __user *)
+ get_sigframe(ksig, regs, sigframe_size);
+ if (invalid_frame_pointer(sf, sigframe_size)) {
+- do_exit(SIGILL);
++ force_exit_sig(SIGILL);
+ return -EINVAL;
+ }
+
+diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
+index 2a78d2af12655..6eeb766987d1a 100644
+--- a/arch/sparc/kernel/signal_64.c
++++ b/arch/sparc/kernel/signal_64.c
+@@ -590,5 +590,6 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x28);
+ static_assert(offsetof(siginfo_t, si_pkey) == 0x20);
+ static_assert(offsetof(siginfo_t, si_perf_data) == 0x18);
+ static_assert(offsetof(siginfo_t, si_perf_type) == 0x20);
++static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);
+ static_assert(offsetof(siginfo_t, si_band) == 0x10);
+ static_assert(offsetof(siginfo_t, si_fd) == 0x14);
+diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
+index 5630e5a395e0d..179aabfa712ea 100644
+--- a/arch/sparc/kernel/traps_32.c
++++ b/arch/sparc/kernel/traps_32.c
+@@ -86,9 +86,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
+ }
+ printk("Instruction DUMP:");
+ instruction_dump ((unsigned long *) regs->pc);
+- if(regs->psr & PSR_PS)
+- do_exit(SIGKILL);
+- do_exit(SIGSEGV);
++ make_task_dead((regs->psr & PSR_PS) ? SIGKILL : SIGSEGV);
+ }
+
+ void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
+diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
+index 6863025ed56d2..21077821f4272 100644
+--- a/arch/sparc/kernel/traps_64.c
++++ b/arch/sparc/kernel/traps_64.c
+@@ -2559,9 +2559,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
+ }
+ if (panic_on_oops)
+ panic("Fatal exception");
+- if (regs->tstate & TSTATE_PRIV)
+- do_exit(SIGKILL);
+- do_exit(SIGSEGV);
++ make_task_dead((regs->tstate & TSTATE_PRIV)? SIGKILL : SIGSEGV);
+ }
+ EXPORT_SYMBOL(die_if_kernel);
+
+diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
+index 69a6ba6e92937..8f20862ccc83e 100644
+--- a/arch/sparc/kernel/windows.c
++++ b/arch/sparc/kernel/windows.c
+@@ -121,8 +121,10 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who)
+
+ if ((sp & 7) ||
+ copy_to_user((char __user *) sp, &tp->reg_window[window],
+- sizeof(struct reg_window32)))
+- do_exit(SIGILL);
++ sizeof(struct reg_window32))) {
++ force_exit_sig(SIGILL);
++ return;
++ }
+ }
+ tp->w_saved = 0;
+ }
+diff --git a/arch/um/.gitignore b/arch/um/.gitignore
+index 6323e5571887e..d69ea5b562cee 100644
+--- a/arch/um/.gitignore
++++ b/arch/um/.gitignore
+@@ -2,3 +2,4 @@
+ kernel/config.c
+ kernel/config.tmp
+ kernel/vmlinux.lds
++kernel/capflags.c
+diff --git a/arch/um/Kconfig b/arch/um/Kconfig
+index c18b45f75d41f..b0584453d2a0b 100644
+--- a/arch/um/Kconfig
++++ b/arch/um/Kconfig
+@@ -6,6 +6,7 @@ config UML
+ bool
+ default y
+ select ARCH_EPHEMERAL_INODES
++ select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_KCOV
+ select ARCH_HAS_STRNCPY_FROM_USER
+ select ARCH_HAS_STRNLEN_USER
+diff --git a/arch/um/Makefile b/arch/um/Makefile
+index f2fe63bfd819f..3dbd0e3b660ea 100644
+--- a/arch/um/Makefile
++++ b/arch/um/Makefile
+@@ -132,15 +132,23 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT)
+ # The wrappers will select whether using "malloc" or the kernel allocator.
+ LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
+
++# Avoid binutils 2.39+ warnings by marking the stack non-executable and
++# ignorning warnings for the kallsyms sections.
++LDFLAGS_EXECSTACK = -z noexecstack
++ifeq ($(CONFIG_LD_IS_BFD),y)
++LDFLAGS_EXECSTACK += $(call ld-option,--no-warn-rwx-segments)
++endif
++
+ LD_FLAGS_CMDLINE = $(foreach opt,$(KBUILD_LDFLAGS),-Wl,$(opt))
+
+ # Used by link-vmlinux.sh which has special support for um link
+ export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE)
++export LDFLAGS_vmlinux := $(LDFLAGS_EXECSTACK)
+
+ # When cleaning we don't include .config, so we don't include
+ # TT or skas makefiles and don't clean skas_ptregs.h.
+ CLEAN_FILES += linux x.i gmon.out
+-MRPROPER_FILES += arch/$(SUBARCH)/include/generated
++MRPROPER_FILES += $(HOST_DIR)/include/generated
+
+ archclean:
+ @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
+diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
+index fb51bd206dbed..4d7f99a02c1eb 100644
+--- a/arch/um/configs/i386_defconfig
++++ b/arch/um/configs/i386_defconfig
+@@ -35,6 +35,7 @@ CONFIG_TTY_CHAN=y
+ CONFIG_XTERM_CHAN=y
+ CONFIG_CON_CHAN="pts"
+ CONFIG_SSL_CHAN="pts"
++CONFIG_SOUND=m
+ CONFIG_UML_SOUND=m
+ CONFIG_DEVTMPFS=y
+ CONFIG_DEVTMPFS_MOUNT=y
+diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
+index 477b873174243..4bdd83008f623 100644
+--- a/arch/um/configs/x86_64_defconfig
++++ b/arch/um/configs/x86_64_defconfig
+@@ -33,6 +33,7 @@ CONFIG_TTY_CHAN=y
+ CONFIG_XTERM_CHAN=y
+ CONFIG_CON_CHAN="pts"
+ CONFIG_SSL_CHAN="pts"
++CONFIG_SOUND=m
+ CONFIG_UML_SOUND=m
+ CONFIG_DEVTMPFS=y
+ CONFIG_DEVTMPFS_MOUNT=y
+diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
+index f145842c40b94..3dd74d369f995 100644
+--- a/arch/um/drivers/Kconfig
++++ b/arch/um/drivers/Kconfig
+@@ -104,24 +104,14 @@ config SSL_CHAN
+
+ config UML_SOUND
+ tristate "Sound support"
++ depends on SOUND
++ select SOUND_OSS_CORE
+ help
+ This option enables UML sound support. If enabled, it will pull in
+- soundcore and the UML hostaudio relay, which acts as a intermediary
++ the UML hostaudio relay, which acts as a intermediary
+ between the host's dsp and mixer devices and the UML sound system.
+ It is safe to say 'Y' here.
+
+-config SOUND
+- tristate
+- default UML_SOUND
+-
+-config SOUND_OSS_CORE
+- bool
+- default UML_SOUND
+-
+-config HOSTAUDIO
+- tristate
+- default UML_SOUND
+-
+ endmenu
+
+ menu "UML Network Devices"
+diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
+index 803666e85414a..dc0e6fe77de10 100644
+--- a/arch/um/drivers/Makefile
++++ b/arch/um/drivers/Makefile
+@@ -16,7 +16,8 @@ mconsole-objs := mconsole_kern.o mconsole_user.o
+ hostaudio-objs := hostaudio_kern.o
+ ubd-objs := ubd_kern.o ubd_user.o
+ port-objs := port_kern.o port_user.o
+-harddog-objs := harddog_kern.o harddog_user.o
++harddog-objs := harddog_kern.o
++harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
+ rtc-objs := rtc_kern.o rtc_user.o
+
+ LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
+@@ -53,13 +54,14 @@ obj-$(CONFIG_UML_NET) += net.o
+ obj-$(CONFIG_MCONSOLE) += mconsole.o
+ obj-$(CONFIG_MMAPPER) += mmapper_kern.o
+ obj-$(CONFIG_BLK_DEV_UBD) += ubd.o
+-obj-$(CONFIG_HOSTAUDIO) += hostaudio.o
++obj-$(CONFIG_UML_SOUND) += hostaudio.o
+ obj-$(CONFIG_NULL_CHAN) += null.o
+ obj-$(CONFIG_PORT_CHAN) += port.o
+ obj-$(CONFIG_PTY_CHAN) += pty.o
+ obj-$(CONFIG_TTY_CHAN) += tty.o
+ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
+ obj-$(CONFIG_UML_WATCHDOG) += harddog.o
++obj-y += $(harddog-builtin-y) $(harddog-builtin-m)
+ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
+ obj-$(CONFIG_UML_RANDOM) += random.o
+ obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
+diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
+index 62997055c4547..26a702a065154 100644
+--- a/arch/um/drivers/chan_kern.c
++++ b/arch/um/drivers/chan_kern.c
+@@ -133,7 +133,7 @@ static void line_timer_cb(struct work_struct *work)
+ struct line *line = container_of(work, struct line, task.work);
+
+ if (!line->throttled)
+- chan_interrupt(line, line->driver->read_irq);
++ chan_interrupt(line, line->read_irq);
+ }
+
+ int enable_chan(struct line *line)
+@@ -195,9 +195,9 @@ void free_irqs(void)
+ chan = list_entry(ele, struct chan, free_list);
+
+ if (chan->input && chan->enabled)
+- um_free_irq(chan->line->driver->read_irq, chan);
++ um_free_irq(chan->line->read_irq, chan);
+ if (chan->output && chan->enabled)
+- um_free_irq(chan->line->driver->write_irq, chan);
++ um_free_irq(chan->line->write_irq, chan);
+ chan->enabled = 0;
+ }
+ }
+@@ -215,9 +215,9 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
+ spin_unlock_irqrestore(&irqs_to_free_lock, flags);
+ } else {
+ if (chan->input && chan->enabled)
+- um_free_irq(chan->line->driver->read_irq, chan);
++ um_free_irq(chan->line->read_irq, chan);
+ if (chan->output && chan->enabled)
+- um_free_irq(chan->line->driver->write_irq, chan);
++ um_free_irq(chan->line->write_irq, chan);
+ chan->enabled = 0;
+ }
+ if (chan->ops->close != NULL)
+diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
+index 6040817c036f3..25727ed648b72 100644
+--- a/arch/um/drivers/chan_user.c
++++ b/arch/um/drivers/chan_user.c
+@@ -220,7 +220,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
+ unsigned long *stack_out)
+ {
+ struct winch_data data;
+- int fds[2], n, err;
++ int fds[2], n, err, pid;
+ char c;
+
+ err = os_pipe(fds, 1, 1);
+@@ -238,8 +238,9 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
+ * problem with /dev/net/tun, which if held open by this
+ * thread, prevents the TUN/TAP device from being reused.
+ */
+- err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out);
+- if (err < 0) {
++ pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out);
++ if (pid < 0) {
++ err = pid;
+ printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n",
+ -err);
+ goto out_close;
+@@ -263,7 +264,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
+ goto out_close;
+ }
+
+- return err;
++ return pid;
+
+ out_close:
+ close(fds[1]);
+diff --git a/arch/um/drivers/harddog.h b/arch/um/drivers/harddog.h
+new file mode 100644
+index 0000000000000..6d9ea60e7133e
+--- /dev/null
++++ b/arch/um/drivers/harddog.h
+@@ -0,0 +1,9 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef UM_WATCHDOG_H
++#define UM_WATCHDOG_H
++
++int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock);
++void stop_watchdog(int in_fd, int out_fd);
++int ping_watchdog(int fd);
++
++#endif /* UM_WATCHDOG_H */
+diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
+index e6d4f43deba82..60d1c6cab8a95 100644
+--- a/arch/um/drivers/harddog_kern.c
++++ b/arch/um/drivers/harddog_kern.c
+@@ -47,6 +47,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/uaccess.h>
+ #include "mconsole.h"
++#include "harddog.h"
+
+ MODULE_LICENSE("GPL");
+
+@@ -60,8 +61,6 @@ static int harddog_out_fd = -1;
+ * Allow only one person to hold it open
+ */
+
+-extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock);
+-
+ static int harddog_open(struct inode *inode, struct file *file)
+ {
+ int err = -EBUSY;
+@@ -92,8 +91,6 @@ err:
+ return err;
+ }
+
+-extern void stop_watchdog(int in_fd, int out_fd);
+-
+ static int harddog_release(struct inode *inode, struct file *file)
+ {
+ /*
+@@ -112,8 +109,6 @@ static int harddog_release(struct inode *inode, struct file *file)
+ return 0;
+ }
+
+-extern int ping_watchdog(int fd);
+-
+ static ssize_t harddog_write(struct file *file, const char __user *data, size_t len,
+ loff_t *ppos)
+ {
+diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
+index 070468d22e394..9ed89304975ed 100644
+--- a/arch/um/drivers/harddog_user.c
++++ b/arch/um/drivers/harddog_user.c
+@@ -7,6 +7,7 @@
+ #include <unistd.h>
+ #include <errno.h>
+ #include <os.h>
++#include "harddog.h"
+
+ struct dog_data {
+ int stdin_fd;
+diff --git a/arch/um/drivers/harddog_user_exp.c b/arch/um/drivers/harddog_user_exp.c
+new file mode 100644
+index 0000000000000..c74d4b815d143
+--- /dev/null
++++ b/arch/um/drivers/harddog_user_exp.c
+@@ -0,0 +1,9 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <linux/export.h>
++#include "harddog.h"
++
++#if IS_MODULE(CONFIG_UML_WATCHDOG)
++EXPORT_SYMBOL(start_watchdog);
++EXPORT_SYMBOL(stop_watchdog);
++EXPORT_SYMBOL(ping_watchdog);
++#endif
+diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
+index 8febf95da96e1..02b0befd67632 100644
+--- a/arch/um/drivers/line.c
++++ b/arch/um/drivers/line.c
+@@ -139,7 +139,7 @@ static int flush_buffer(struct line *line)
+ count = line->buffer + LINE_BUFSIZE - line->head;
+
+ n = write_chan(line->chan_out, line->head, count,
+- line->driver->write_irq);
++ line->write_irq);
+ if (n < 0)
+ return n;
+ if (n == count) {
+@@ -156,7 +156,7 @@ static int flush_buffer(struct line *line)
+
+ count = line->tail - line->head;
+ n = write_chan(line->chan_out, line->head, count,
+- line->driver->write_irq);
++ line->write_irq);
+
+ if (n < 0)
+ return n;
+@@ -195,7 +195,7 @@ int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
+ ret = buffer_data(line, buf, len);
+ else {
+ n = write_chan(line->chan_out, buf, len,
+- line->driver->write_irq);
++ line->write_irq);
+ if (n < 0) {
+ ret = n;
+ goto out_up;
+@@ -215,7 +215,7 @@ void line_throttle(struct tty_struct *tty)
+ {
+ struct line *line = tty->driver_data;
+
+- deactivate_chan(line->chan_in, line->driver->read_irq);
++ deactivate_chan(line->chan_in, line->read_irq);
+ line->throttled = 1;
+ }
+
+@@ -224,7 +224,7 @@ void line_unthrottle(struct tty_struct *tty)
+ struct line *line = tty->driver_data;
+
+ line->throttled = 0;
+- chan_interrupt(line, line->driver->read_irq);
++ chan_interrupt(line, line->read_irq);
+ }
+
+ static irqreturn_t line_write_interrupt(int irq, void *data)
+@@ -260,19 +260,23 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
+ int err;
+
+ if (input) {
+- err = um_request_irq(driver->read_irq, fd, IRQ_READ,
+- line_interrupt, IRQF_SHARED,
++ err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_READ,
++ line_interrupt, 0,
+ driver->read_irq_name, data);
+ if (err < 0)
+ return err;
++
++ line->read_irq = err;
+ }
+
+ if (output) {
+- err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+- line_write_interrupt, IRQF_SHARED,
++ err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_WRITE,
++ line_write_interrupt, 0,
+ driver->write_irq_name, data);
+ if (err < 0)
+ return err;
++
++ line->write_irq = err;
+ }
+
+ return 0;
+diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
+index bdb16b96e76fd..f15be75a3bf3b 100644
+--- a/arch/um/drivers/line.h
++++ b/arch/um/drivers/line.h
+@@ -23,9 +23,7 @@ struct line_driver {
+ const short minor_start;
+ const short type;
+ const short subtype;
+- const int read_irq;
+ const char *read_irq_name;
+- const int write_irq;
+ const char *write_irq_name;
+ struct mc_device mc;
+ struct tty_driver *driver;
+@@ -35,6 +33,8 @@ struct line {
+ struct tty_port port;
+ int valid;
+
++ int read_irq, write_irq;
++
+ char *init_str;
+ struct list_head chan_list;
+ struct chan *chan_in, *chan_out;
+diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
+index 6ead1e2404576..8ca67a6926830 100644
+--- a/arch/um/drivers/mconsole_kern.c
++++ b/arch/um/drivers/mconsole_kern.c
+@@ -224,7 +224,7 @@ void mconsole_go(struct mc_request *req)
+
+ void mconsole_stop(struct mc_request *req)
+ {
+- deactivate_fd(req->originating_fd, MCONSOLE_IRQ);
++ block_signals();
+ os_set_fd_block(req->originating_fd, 1);
+ mconsole_reply(req, "stopped", 0, 0);
+ for (;;) {
+@@ -247,6 +247,7 @@ void mconsole_stop(struct mc_request *req)
+ }
+ os_set_fd_block(req->originating_fd, 0);
+ mconsole_reply(req, "", 0, 0);
++ unblock_signals();
+ }
+
+ static DEFINE_SPINLOCK(mc_devices_lock);
+diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
+index 433a3f8f2ef3e..32b3341fe9707 100644
+--- a/arch/um/drivers/random.c
++++ b/arch/um/drivers/random.c
+@@ -28,7 +28,7 @@
+ * protects against a module being loaded twice at the same time.
+ */
+ static int random_fd = -1;
+-static struct hwrng hwrng = { 0, };
++static struct hwrng hwrng;
+ static DECLARE_COMPLETION(have_data);
+
+ static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block)
+diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
+index 41eae2e8fb652..8514966778d53 100644
+--- a/arch/um/drivers/ssl.c
++++ b/arch/um/drivers/ssl.c
+@@ -47,9 +47,7 @@ static struct line_driver driver = {
+ .minor_start = 64,
+ .type = TTY_DRIVER_TYPE_SERIAL,
+ .subtype = 0,
+- .read_irq = SSL_IRQ,
+ .read_irq_name = "ssl",
+- .write_irq = SSL_WRITE_IRQ,
+ .write_irq_name = "ssl-write",
+ .mc = {
+ .list = LIST_HEAD_INIT(driver.mc.list),
+diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
+index e8b762f4d8c25..489d5a746ed33 100644
+--- a/arch/um/drivers/stdio_console.c
++++ b/arch/um/drivers/stdio_console.c
+@@ -53,9 +53,7 @@ static struct line_driver driver = {
+ .minor_start = 0,
+ .type = TTY_DRIVER_TYPE_CONSOLE,
+ .subtype = SYSTEM_TYPE_CONSOLE,
+- .read_irq = CONSOLE_IRQ,
+ .read_irq_name = "console",
+- .write_irq = CONSOLE_WRITE_IRQ,
+ .write_irq_name = "console-write",
+ .mc = {
+ .list = LIST_HEAD_INIT(driver.mc.list),
+diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
+index cd9dc0556e913..fefd343412c79 100644
+--- a/arch/um/drivers/ubd_kern.c
++++ b/arch/um/drivers/ubd_kern.c
+@@ -27,6 +27,7 @@
+ #include <linux/blk-mq.h>
+ #include <linux/ata.h>
+ #include <linux/hdreg.h>
++#include <linux/major.h>
+ #include <linux/cdrom.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
+index cde6db184c26b..45a4bcd27a39b 100644
+--- a/arch/um/drivers/vector_kern.c
++++ b/arch/um/drivers/vector_kern.c
+@@ -770,6 +770,7 @@ static int vector_config(char *str, char **error_out)
+
+ if (parsed == NULL) {
+ *error_out = "vector_config failed to parse parameters";
++ kfree(params);
+ return -EINVAL;
+ }
+
+diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
+index c080666330234..d762d726b66cf 100644
+--- a/arch/um/drivers/virt-pci.c
++++ b/arch/um/drivers/virt-pci.c
+@@ -131,8 +131,11 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
+ out ? 1 : 0,
+ posted ? cmd : HANDLE_NO_FREE(cmd),
+ GFP_ATOMIC);
+- if (ret)
++ if (ret) {
++ if (posted)
++ kfree(cmd);
+ goto out;
++ }
+
+ if (posted) {
+ virtqueue_kick(dev->cmd_vq);
+@@ -181,15 +184,15 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
+ /* buf->data is maximum size - we may only use parts of it */
+ struct um_pci_message_buffer *buf;
+ u8 *data;
+- unsigned long ret = ~0ULL;
++ unsigned long ret = ULONG_MAX;
+
+ if (!dev)
+- return ~0ULL;
++ return ULONG_MAX;
+
+ buf = get_cpu_var(um_pci_msg_bufs);
+ data = buf->data;
+
+- memset(data, 0xff, sizeof(data));
++ memset(buf->data, 0xff, sizeof(buf->data));
+
+ switch (size) {
+ case 1:
+@@ -304,7 +307,7 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
+ /* buf->data is maximum size - we may only use parts of it */
+ struct um_pci_message_buffer *buf;
+ u8 *data;
+- unsigned long ret = ~0ULL;
++ unsigned long ret = ULONG_MAX;
+
+ buf = get_cpu_var(um_pci_msg_bufs);
+ data = buf->data;
+@@ -615,22 +618,33 @@ static void um_pci_virtio_remove(struct virtio_device *vdev)
+ struct um_pci_device *dev = vdev->priv;
+ int i;
+
+- /* Stop all virtqueues */
+- vdev->config->reset(vdev);
+- vdev->config->del_vqs(vdev);
+-
+ device_set_wakeup_enable(&vdev->dev, false);
+
+ mutex_lock(&um_pci_mtx);
+ for (i = 0; i < MAX_DEVICES; i++) {
+ if (um_pci_devices[i].dev != dev)
+ continue;
++
+ um_pci_devices[i].dev = NULL;
+ irq_free_desc(dev->irq);
++
++ break;
+ }
+ mutex_unlock(&um_pci_mtx);
+
+- um_pci_rescan();
++ if (i < MAX_DEVICES) {
++ struct pci_dev *pci_dev;
++
++ pci_dev = pci_get_slot(bridge->bus, i);
++ if (pci_dev)
++ pci_stop_and_remove_bus_device_locked(pci_dev);
++ }
++
++ /* Stop all virtqueues */
++ virtio_reset_device(vdev);
++ dev->cmd_vq = NULL;
++ dev->irq_vq = NULL;
++ vdev->config->del_vqs(vdev);
+
+ kfree(dev);
+ }
+diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
+index d51e445df7976..204e9dfbff1a0 100644
+--- a/arch/um/drivers/virtio_uml.c
++++ b/arch/um/drivers/virtio_uml.c
+@@ -21,6 +21,7 @@
+ * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
+ */
+ #include <linux/module.h>
++#include <linux/of.h>
+ #include <linux/platform_device.h>
+ #include <linux/slab.h>
+ #include <linux/virtio.h>
+@@ -49,6 +50,7 @@ struct virtio_uml_platform_data {
+ struct virtio_uml_device {
+ struct virtio_device vdev;
+ struct platform_device *pdev;
++ struct virtio_uml_platform_data *pdata;
+
+ spinlock_t sock_lock;
+ int sock, req_fd, irq;
+@@ -61,6 +63,7 @@ struct virtio_uml_device {
+
+ u8 config_changed_irq:1;
+ uint64_t vq_irq_vq_map;
++ int recv_rc;
+ };
+
+ struct virtio_uml_vq_info {
+@@ -146,14 +149,6 @@ static int vhost_user_recv(struct virtio_uml_device *vu_dev,
+
+ rc = vhost_user_recv_header(fd, msg);
+
+- if (rc == -ECONNRESET && vu_dev->registered) {
+- struct virtio_uml_platform_data *pdata;
+-
+- pdata = vu_dev->pdev->dev.platform_data;
+-
+- virtio_break_device(&vu_dev->vdev);
+- schedule_work(&pdata->conn_broken_wk);
+- }
+ if (rc)
+ return rc;
+ size = msg->header.size;
+@@ -162,6 +157,22 @@ static int vhost_user_recv(struct virtio_uml_device *vu_dev,
+ return full_read(fd, &msg->payload, size, false);
+ }
+
++static void vhost_user_check_reset(struct virtio_uml_device *vu_dev,
++ int rc)
++{
++ struct virtio_uml_platform_data *pdata = vu_dev->pdata;
++
++ if (rc != -ECONNRESET)
++ return;
++
++ if (!vu_dev->registered)
++ return;
++
++ vu_dev->registered = 0;
++
++ schedule_work(&pdata->conn_broken_wk);
++}
++
+ static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
+ struct vhost_user_msg *msg,
+ size_t max_payload_size)
+@@ -169,8 +180,10 @@ static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
+ int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
+ max_payload_size, true);
+
+- if (rc)
++ if (rc) {
++ vhost_user_check_reset(vu_dev, rc);
+ return rc;
++ }
+
+ if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
+ return -EPROTO;
+@@ -367,6 +380,7 @@ static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
+ sizeof(msg.msg.payload) +
+ sizeof(msg.extra_payload));
+
++ vu_dev->recv_rc = rc;
+ if (rc)
+ return IRQ_NONE;
+
+@@ -410,7 +424,9 @@ static irqreturn_t vu_req_interrupt(int irq, void *data)
+ if (!um_irq_timetravel_handler_used())
+ ret = vu_req_read_message(vu_dev, NULL);
+
+- if (vu_dev->vq_irq_vq_map) {
++ if (vu_dev->recv_rc) {
++ vhost_user_check_reset(vu_dev, vu_dev->recv_rc);
++ } else if (vu_dev->vq_irq_vq_map) {
+ struct virtqueue *vq;
+
+ virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+@@ -1090,6 +1106,8 @@ static void virtio_uml_release_dev(struct device *d)
+ container_of(d, struct virtio_device, dev);
+ struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
++ time_travel_propagate_time();
++
+ /* might not have been opened due to not negotiating the feature */
+ if (vu_dev->req_fd >= 0) {
+ um_free_irq(vu_dev->irq, vu_dev);
+@@ -1113,21 +1131,72 @@ void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
+ no_vq_suspend ? "dis" : "en");
+ }
+
++static void vu_of_conn_broken(struct work_struct *wk)
++{
++ struct virtio_uml_platform_data *pdata;
++ struct virtio_uml_device *vu_dev;
++
++ pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
++
++ vu_dev = platform_get_drvdata(pdata->pdev);
++
++ virtio_break_device(&vu_dev->vdev);
++
++ /*
++ * We can't remove the device from the devicetree so the only thing we
++ * can do is warn.
++ */
++ WARN_ON(1);
++}
++
+ /* Platform device */
+
++static struct virtio_uml_platform_data *
++virtio_uml_create_pdata(struct platform_device *pdev)
++{
++ struct device_node *np = pdev->dev.of_node;
++ struct virtio_uml_platform_data *pdata;
++ int ret;
++
++ if (!np)
++ return ERR_PTR(-EINVAL);
++
++ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
++ if (!pdata)
++ return ERR_PTR(-ENOMEM);
++
++ INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken);
++ pdata->pdev = pdev;
++
++ ret = of_property_read_string(np, "socket-path", &pdata->socket_path);
++ if (ret)
++ return ERR_PTR(ret);
++
++ ret = of_property_read_u32(np, "virtio-device-id",
++ &pdata->virtio_device_id);
++ if (ret)
++ return ERR_PTR(ret);
++
++ return pdata;
++}
++
+ static int virtio_uml_probe(struct platform_device *pdev)
+ {
+ struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+ struct virtio_uml_device *vu_dev;
+ int rc;
+
+- if (!pdata)
+- return -EINVAL;
++ if (!pdata) {
++ pdata = virtio_uml_create_pdata(pdev);
++ if (IS_ERR(pdata))
++ return PTR_ERR(pdata);
++ }
+
+ vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
+ if (!vu_dev)
+ return -ENOMEM;
+
++ vu_dev->pdata = pdata;
+ vu_dev->vdev.dev.parent = &pdev->dev;
+ vu_dev->vdev.dev.release = virtio_uml_release_dev;
+ vu_dev->vdev.config = &virtio_uml_config_ops;
+@@ -1136,6 +1205,8 @@ static int virtio_uml_probe(struct platform_device *pdev)
+ vu_dev->pdev = pdev;
+ vu_dev->req_fd = -1;
+
++ time_travel_propagate_time();
++
+ do {
+ rc = os_connect_socket(pdata->socket_path);
+ } while (rc == -EINTR);
+@@ -1201,8 +1272,14 @@ static int vu_unregister_cmdline_device(struct device *dev, void *data)
+ static void vu_conn_broken(struct work_struct *wk)
+ {
+ struct virtio_uml_platform_data *pdata;
++ struct virtio_uml_device *vu_dev;
+
+ pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
++
++ vu_dev = platform_get_drvdata(pdata->pdev);
++
++ virtio_break_device(&vu_dev->vdev);
++
+ vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
+ }
+
+diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
+index e5a7b552bb384..a8c763c296b48 100644
+--- a/arch/um/include/asm/Kbuild
++++ b/arch/um/include/asm/Kbuild
+@@ -4,6 +4,7 @@ generic-y += bug.h
+ generic-y += compat.h
+ generic-y += current.h
+ generic-y += device.h
++generic-y += dma-mapping.h
+ generic-y += emergency-restart.h
+ generic-y += exec.h
+ generic-y += extable.h
+diff --git a/arch/um/include/asm/archrandom.h b/arch/um/include/asm/archrandom.h
+new file mode 100644
+index 0000000000000..2f24cb96391d7
+--- /dev/null
++++ b/arch/um/include/asm/archrandom.h
+@@ -0,0 +1,30 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __ASM_UM_ARCHRANDOM_H__
++#define __ASM_UM_ARCHRANDOM_H__
++
++#include <linux/types.h>
++
++/* This is from <os.h>, but better not to #include that in a global header here. */
++ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
++
++static inline bool __must_check arch_get_random_long(unsigned long *v)
++{
++ return os_getrandom(v, sizeof(*v), 0) == sizeof(*v);
++}
++
++static inline bool __must_check arch_get_random_int(unsigned int *v)
++{
++ return os_getrandom(v, sizeof(*v), 0) == sizeof(*v);
++}
++
++static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
++{
++ return false;
++}
++
++static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
++{
++ return false;
++}
++
++#endif
+diff --git a/arch/um/include/asm/bugs.h b/arch/um/include/asm/bugs.h
+deleted file mode 100644
+index 4473942a08397..0000000000000
+--- a/arch/um/include/asm/bugs.h
++++ /dev/null
+@@ -1,7 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef __UM_BUGS_H
+-#define __UM_BUGS_H
+-
+-void check_bugs(void);
+-
+-#endif
+diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h
+index 56fc2b8f2dd01..e79b2ab6f40c8 100644
+--- a/arch/um/include/asm/delay.h
++++ b/arch/um/include/asm/delay.h
+@@ -14,7 +14,7 @@ static inline void um_ndelay(unsigned long nsecs)
+ ndelay(nsecs);
+ }
+ #undef ndelay
+-#define ndelay um_ndelay
++#define ndelay(n) um_ndelay(n)
+
+ static inline void um_udelay(unsigned long usecs)
+ {
+@@ -26,5 +26,5 @@ static inline void um_udelay(unsigned long usecs)
+ udelay(usecs);
+ }
+ #undef udelay
+-#define udelay um_udelay
++#define udelay(n) um_udelay(n)
+ #endif /* __UM_DELAY_H */
+diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
+index e187c789369d3..749dfe8512e84 100644
+--- a/arch/um/include/asm/irq.h
++++ b/arch/um/include/asm/irq.h
+@@ -4,19 +4,15 @@
+
+ #define TIMER_IRQ 0
+ #define UMN_IRQ 1
+-#define CONSOLE_IRQ 2
+-#define CONSOLE_WRITE_IRQ 3
+-#define UBD_IRQ 4
+-#define UM_ETH_IRQ 5
+-#define SSL_IRQ 6
+-#define SSL_WRITE_IRQ 7
+-#define ACCEPT_IRQ 8
+-#define MCONSOLE_IRQ 9
+-#define WINCH_IRQ 10
+-#define SIGIO_WRITE_IRQ 11
+-#define TELNETD_IRQ 12
+-#define XTERM_IRQ 13
+-#define RANDOM_IRQ 14
++#define UBD_IRQ 2
++#define UM_ETH_IRQ 3
++#define ACCEPT_IRQ 4
++#define MCONSOLE_IRQ 5
++#define WINCH_IRQ 6
++#define SIGIO_WRITE_IRQ 7
++#define TELNETD_IRQ 8
++#define XTERM_IRQ 9
++#define RANDOM_IRQ 10
+
+ #ifdef CONFIG_UML_NET_VECTOR
+
+diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h
+index dab5744e9253d..1e69ef5bc35e0 100644
+--- a/arch/um/include/asm/irqflags.h
++++ b/arch/um/include/asm/irqflags.h
+@@ -3,7 +3,7 @@
+ #define __UM_IRQFLAGS_H
+
+ extern int signals_enabled;
+-int set_signals(int enable);
++int um_set_signals(int enable);
+ void block_signals(void);
+ void unblock_signals(void);
+
+@@ -16,7 +16,7 @@ static inline unsigned long arch_local_save_flags(void)
+ #define arch_local_irq_restore arch_local_irq_restore
+ static inline void arch_local_irq_restore(unsigned long flags)
+ {
+- set_signals(flags);
++ um_set_signals(flags);
+ }
+
+ #define arch_local_irq_enable arch_local_irq_enable
+diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
+index 3b1cb8b3b1864..e610e932cfe1e 100644
+--- a/arch/um/include/asm/thread_info.h
++++ b/arch/um/include/asm/thread_info.h
+@@ -64,6 +64,7 @@ static inline struct thread_info *current_thread_info(void)
+ #define TIF_RESTORE_SIGMASK 7
+ #define TIF_NOTIFY_RESUME 8
+ #define TIF_SECCOMP 9 /* secure computing */
++#define TIF_SINGLESTEP 10 /* single stepping userspace */
+
+ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+@@ -72,5 +73,6 @@ static inline struct thread_info *current_thread_info(void)
+ #define _TIF_MEMDIE (1 << TIF_MEMDIE)
+ #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP)
++#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+
+ #endif
+diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h
+index e392a9a5bc9bd..9f27176adb26d 100644
+--- a/arch/um/include/asm/timex.h
++++ b/arch/um/include/asm/timex.h
+@@ -2,13 +2,8 @@
+ #ifndef __UM_TIMEX_H
+ #define __UM_TIMEX_H
+
+-typedef unsigned long cycles_t;
+-
+-static inline cycles_t get_cycles (void)
+-{
+- return 0;
+-}
+-
+ #define CLOCK_TICK_RATE (HZ)
+
++#include <asm-generic/timex.h>
++
+ #endif
+diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
+index f512704a9ec7b..647fae200c5d3 100644
+--- a/arch/um/include/asm/xor.h
++++ b/arch/um/include/asm/xor.h
+@@ -4,8 +4,10 @@
+
+ #ifdef CONFIG_64BIT
+ #undef CONFIG_X86_32
++#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_sse_pf64))
+ #else
+ #define CONFIG_X86_32 1
++#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_8regs))
+ #endif
+
+ #include <asm/cpufeature.h>
+@@ -16,7 +18,7 @@
+ #undef XOR_SELECT_TEMPLATE
+ /* pick an arbitrary one - measuring isn't possible with inf-cpu */
+ #define XOR_SELECT_TEMPLATE(x) \
+- (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL)
++ (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x)
+ #endif
+
+ #endif
+diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h
+index bdb2869b72b31..8863319039f3d 100644
+--- a/arch/um/include/shared/longjmp.h
++++ b/arch/um/include/shared/longjmp.h
+@@ -18,7 +18,7 @@ extern void longjmp(jmp_buf, int);
+ enable = *(volatile int *)&signals_enabled; \
+ n = setjmp(*buf); \
+ if(n != 0) \
+- set_signals_trace(enable); \
++ um_set_signals_trace(enable); \
+ n; })
+
+ #endif
+diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
+index 96d400387c93e..90e9c9f86f15c 100644
+--- a/arch/um/include/shared/os.h
++++ b/arch/um/include/shared/os.h
+@@ -11,6 +11,12 @@
+ #include <irq_user.h>
+ #include <longjmp.h>
+ #include <mm_id.h>
++/* This is to get size_t */
++#ifndef __UM_HOST__
++#include <linux/types.h>
++#else
++#include <sys/types.h>
++#endif
+
+ #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
+
+@@ -238,8 +244,8 @@ extern void send_sigio_to_self(void);
+ extern int change_sig(int signal, int on);
+ extern void block_signals(void);
+ extern void unblock_signals(void);
+-extern int set_signals(int enable);
+-extern int set_signals_trace(int enable);
++extern int um_set_signals(int enable);
++extern int um_set_signals_trace(int enable);
+ extern int os_is_signal_stack(void);
+ extern void deliver_alarm(void);
+ extern void register_pm_wake_signal(void);
+@@ -252,6 +258,7 @@ extern void stack_protections(unsigned long address);
+ extern int raw(int fd);
+ extern void setup_machinename(char *machine_out);
+ extern void setup_hostinfo(char *buf, int len);
++extern ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
+ extern void os_dump_core(void) __attribute__ ((noreturn));
+ extern void um_early_printk(const char *s, unsigned int n);
+ extern void os_fix_helper_signals(void);
+diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
+index 0c50fa6e8a55b..fbb709a222839 100644
+--- a/arch/um/include/shared/registers.h
++++ b/arch/um/include/shared/registers.h
+@@ -16,8 +16,8 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs);
+ extern int save_fpx_registers(int pid, unsigned long *fp_regs);
+ extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
+ extern int save_registers(int pid, struct uml_pt_regs *regs);
+-extern int restore_registers(int pid, struct uml_pt_regs *regs);
+-extern int init_registers(int pid);
++extern int restore_pid_registers(int pid, struct uml_pt_regs *regs);
++extern int init_pid_registers(int pid);
+ extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
+ extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
+ extern int get_fp_registers(int pid, unsigned long *regs);
+diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
+index 4d84981003419..335dcb2d63e78 100644
+--- a/arch/um/kernel/exec.c
++++ b/arch/um/kernel/exec.c
+@@ -42,7 +42,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
+ {
+ PT_REGS_IP(regs) = eip;
+ PT_REGS_SP(regs) = esp;
+- current->ptrace &= ~PT_DTRACE;
++ clear_thread_flag(TIF_SINGLESTEP);
+ #ifdef SUBARCH_EXECVE1
+ SUBARCH_EXECVE1(regs->regs);
+ #endif
+diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
+index b1e5634398d09..3a85bde3e1734 100644
+--- a/arch/um/kernel/ksyms.c
++++ b/arch/um/kernel/ksyms.c
+@@ -6,7 +6,7 @@
+ #include <linux/module.h>
+ #include <os.h>
+
+-EXPORT_SYMBOL(set_signals);
++EXPORT_SYMBOL(um_set_signals);
+ EXPORT_SYMBOL(signals_enabled);
+
+ EXPORT_SYMBOL(os_stat_fd);
+diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
+index 457a38db368b7..b3fbfca494006 100644
+--- a/arch/um/kernel/process.c
++++ b/arch/um/kernel/process.c
+@@ -339,7 +339,7 @@ int singlestepping(void * t)
+ {
+ struct task_struct *task = t ? t : current;
+
+- if (!(task->ptrace & PT_DTRACE))
++ if (!test_thread_flag(TIF_SINGLESTEP))
+ return 0;
+
+ if (task->thread.singlestep_syscall)
+diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
+index b425f47bddbb3..d37802ced5636 100644
+--- a/arch/um/kernel/ptrace.c
++++ b/arch/um/kernel/ptrace.c
+@@ -12,7 +12,7 @@
+
+ void user_enable_single_step(struct task_struct *child)
+ {
+- child->ptrace |= PT_DTRACE;
++ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ child->thread.singlestep_syscall = 0;
+
+ #ifdef SUBARCH_SET_SINGLESTEPPING
+@@ -22,7 +22,7 @@ void user_enable_single_step(struct task_struct *child)
+
+ void user_disable_single_step(struct task_struct *child)
+ {
+- child->ptrace &= ~PT_DTRACE;
++ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ child->thread.singlestep_syscall = 0;
+
+ #ifdef SUBARCH_SET_SINGLESTEPPING
+@@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code)
+ }
+
+ /*
+- * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
++ * XXX Check TIF_SINGLESTEP for singlestepping check and
+ * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
+ */
+ int syscall_trace_enter(struct pt_regs *regs)
+@@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs *regs)
+ audit_syscall_exit(regs);
+
+ /* Fake a debug trap */
+- if (ptraced & PT_DTRACE)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ send_sigtrap(&regs->regs, 0);
+
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
+index 88cd9b5c1b744..ae4658f576ab7 100644
+--- a/arch/um/kernel/signal.c
++++ b/arch/um/kernel/signal.c
+@@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+ unsigned long sp;
+ int err;
+
+- if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
++ if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED))
+ singlestep = 1;
+
+ /* Did we come from a system call? */
+@@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs)
+ * on the host. The tracing thread will check this flag and
+ * PTRACE_SYSCALL if necessary.
+ */
+- if (current->ptrace & PT_DTRACE)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ current->thread.singlestep_syscall =
+ is_syscall(PT_REGS_IP(&current->thread.regs));
+
+diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
+index 3198c47673879..c32efb09db214 100644
+--- a/arch/um/kernel/trap.c
++++ b/arch/um/kernel/trap.c
+@@ -158,7 +158,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip)
+
+ void fatal_sigsegv(void)
+ {
+- force_sigsegv(SIGSEGV);
++ force_fatal_sig(SIGSEGV);
+ do_signal(&current->thread.regs);
+ /*
+ * This is to tell gcc that we're not returning - do_signal
+diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
+index a149a5e9a16a1..748595b054c44 100644
+--- a/arch/um/kernel/um_arch.c
++++ b/arch/um/kernel/um_arch.c
+@@ -3,6 +3,7 @@
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
++#include <linux/cpu.h>
+ #include <linux/delay.h>
+ #include <linux/init.h>
+ #include <linux/mm.h>
+@@ -16,6 +17,7 @@
+ #include <linux/sched/task.h>
+ #include <linux/kmsg_dump.h>
+ #include <linux/suspend.h>
++#include <linux/random.h>
+
+ #include <asm/processor.h>
+ #include <asm/cpufeature.h>
+@@ -30,7 +32,7 @@
+ #include <os.h>
+
+ #define DEFAULT_COMMAND_LINE_ROOT "root=98:0"
+-#define DEFAULT_COMMAND_LINE_CONSOLE "console=tty"
++#define DEFAULT_COMMAND_LINE_CONSOLE "console=tty0"
+
+ /* Changed in add_arg and setup_arch, which run before SMP is started */
+ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 };
+@@ -93,7 +95,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
+
+ static void *c_start(struct seq_file *m, loff_t *pos)
+ {
+- return *pos < NR_CPUS ? cpu_data + *pos : NULL;
++ return *pos < nr_cpu_ids ? cpu_data + *pos : NULL;
+ }
+
+ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+@@ -404,6 +406,8 @@ int __init __weak read_initrd(void)
+
+ void __init setup_arch(char **cmdline_p)
+ {
++ u8 rng_seed[32];
++
+ stack_protections((unsigned long) &init_thread_info);
+ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
+ mem_total_pages(physmem_size, iomem_size, highmem);
+@@ -413,14 +417,27 @@ void __init setup_arch(char **cmdline_p)
+ strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
+ *cmdline_p = command_line;
+ setup_hostinfo(host_info, sizeof host_info);
++
++ if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
++ add_bootloader_randomness(rng_seed, sizeof(rng_seed));
++ memzero_explicit(rng_seed, sizeof(rng_seed));
++ }
+ }
+
+-void __init check_bugs(void)
++void __init arch_cpu_finalize_init(void)
+ {
+ arch_check_bugs();
+ os_check_bugs();
+ }
+
++void apply_retpolines(s32 *start, s32 *end)
++{
++}
++
++void apply_returns(s32 *start, s32 *end)
++{
++}
++
+ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+ {
+ }
+diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S
+index 16e49bfa2b426..53d719c04ba94 100644
+--- a/arch/um/kernel/vmlinux.lds.S
++++ b/arch/um/kernel/vmlinux.lds.S
+@@ -1,4 +1,4 @@
+-
++#define RUNTIME_DISCARD_EXIT
+ KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
+
+ #ifdef CONFIG_LD_SCRIPT_STATIC
+diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
+index 2d9270508e156..b123955be7acc 100644
+--- a/arch/um/os-Linux/registers.c
++++ b/arch/um/os-Linux/registers.c
+@@ -21,7 +21,7 @@ int save_registers(int pid, struct uml_pt_regs *regs)
+ return 0;
+ }
+
+-int restore_registers(int pid, struct uml_pt_regs *regs)
++int restore_pid_registers(int pid, struct uml_pt_regs *regs)
+ {
+ int err;
+
+@@ -36,7 +36,7 @@ int restore_registers(int pid, struct uml_pt_regs *regs)
+ static unsigned long exec_regs[MAX_REG_NR];
+ static unsigned long exec_fp_regs[FP_SIZE];
+
+-int init_registers(int pid)
++int init_pid_registers(int pid)
+ {
+ int err;
+
+diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
+index 6597ea1986ffa..9e71794839e87 100644
+--- a/arch/um/os-Linux/sigio.c
++++ b/arch/um/os-Linux/sigio.c
+@@ -132,7 +132,7 @@ static void update_thread(void)
+ int n;
+ char c;
+
+- flags = set_signals_trace(0);
++ flags = um_set_signals_trace(0);
+ CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c)));
+ if (n != sizeof(c)) {
+ printk(UM_KERN_ERR "update_thread : write failed, err = %d\n",
+@@ -147,7 +147,7 @@ static void update_thread(void)
+ goto fail;
+ }
+
+- set_signals_trace(flags);
++ um_set_signals_trace(flags);
+ return;
+ fail:
+ /* Critical section start */
+@@ -161,7 +161,7 @@ static void update_thread(void)
+ close(write_sigio_fds[0]);
+ close(write_sigio_fds[1]);
+ /* Critical section end */
+- set_signals_trace(flags);
++ um_set_signals_trace(flags);
+ }
+
+ int __add_sigio_fd(int fd)
+diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
+index 6cf098c23a394..24a403a70a020 100644
+--- a/arch/um/os-Linux/signal.c
++++ b/arch/um/os-Linux/signal.c
+@@ -94,7 +94,7 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
+
+ sig_handler_common(sig, si, mc);
+
+- set_signals_trace(enabled);
++ um_set_signals_trace(enabled);
+ }
+
+ static void timer_real_alarm_handler(mcontext_t *mc)
+@@ -126,7 +126,7 @@ void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
+
+ signals_active &= ~SIGALRM_MASK;
+
+- set_signals_trace(enabled);
++ um_set_signals_trace(enabled);
+ }
+
+ void deliver_alarm(void) {
+@@ -348,7 +348,7 @@ void unblock_signals(void)
+ }
+ }
+
+-int set_signals(int enable)
++int um_set_signals(int enable)
+ {
+ int ret;
+ if (signals_enabled == enable)
+@@ -362,7 +362,7 @@ int set_signals(int enable)
+ return ret;
+ }
+
+-int set_signals_trace(int enable)
++int um_set_signals_trace(int enable)
+ {
+ int ret;
+ if (signals_enabled == enable)
+diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
+index 87d3129e7362e..0df2ebcc97c0d 100644
+--- a/arch/um/os-Linux/skas/process.c
++++ b/arch/um/os-Linux/skas/process.c
+@@ -5,6 +5,7 @@
+ */
+
+ #include <stdlib.h>
++#include <stdbool.h>
+ #include <unistd.h>
+ #include <sched.h>
+ #include <errno.h>
+@@ -707,10 +708,24 @@ void halt_skas(void)
+ UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT);
+ }
+
++static bool noreboot;
++
++static int __init noreboot_cmd_param(char *str, int *add)
++{
++ noreboot = true;
++ return 0;
++}
++
++__uml_setup("noreboot", noreboot_cmd_param,
++"noreboot\n"
++" Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n"
++" This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n"
++" crashes in CI\n");
++
+ void reboot_skas(void)
+ {
+ block_signals_trace();
+- UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT);
++ UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT);
+ }
+
+ void __switch_mm(struct mm_id *mm_idp)
+diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
+index 8a72c99994eb1..e3ee4db58b40d 100644
+--- a/arch/um/os-Linux/start_up.c
++++ b/arch/um/os-Linux/start_up.c
+@@ -368,7 +368,7 @@ void __init os_early_checks(void)
+ check_tmpexec();
+
+ pid = start_ptraced_child();
+- if (init_registers(pid))
++ if (init_pid_registers(pid))
+ fatal("Failed to initialize default registers");
+ stop_ptraced_child(pid, 1, 1);
+ }
+diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
+index 41297ec404bf9..fc0f2a9dee5af 100644
+--- a/arch/um/os-Linux/util.c
++++ b/arch/um/os-Linux/util.c
+@@ -14,6 +14,7 @@
+ #include <sys/wait.h>
+ #include <sys/mman.h>
+ #include <sys/utsname.h>
++#include <sys/random.h>
+ #include <init.h>
+ #include <os.h>
+
+@@ -96,6 +97,11 @@ static inline void __attribute__ ((noreturn)) uml_abort(void)
+ exit(127);
+ }
+
++ssize_t os_getrandom(void *buf, size_t len, unsigned int flags)
++{
++ return getrandom(buf, len, flags);
++}
++
+ /*
+ * UML helper threads must not handle SIGWINCH/INT/TERM
+ */
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index d9830e7e1060f..cfb1edd25437d 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -68,6 +68,7 @@ config X86
+ select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_CACHE_LINE_SIZE
++ select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_DEBUG_VIRTUAL
+ select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
+@@ -260,6 +261,7 @@ config X86
+ select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
+ select TRACE_IRQFLAGS_SUPPORT
++ select TRACE_IRQFLAGS_NMI_SUPPORT
+ select USER_STACKTRACE_SUPPORT
+ select VIRT_TO_BUS
+ select HAVE_ARCH_KCSAN if X86_64
+@@ -459,15 +461,6 @@ config GOLDFISH
+ def_bool y
+ depends on X86_GOLDFISH
+
+-config RETPOLINE
+- bool "Avoid speculative indirect branches in kernel"
+- default y
+- help
+- Compile kernel with the retpoline compiler options to guard against
+- kernel-to-user data leaks by avoiding speculative indirect
+- branches. Requires a compiler with -mindirect-branch=thunk-extern
+- support for full protection. The kernel may run slower.
+-
+ config X86_CPU_RESCTRL
+ bool "x86 CPU resource control support"
+ depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
+@@ -1256,7 +1249,8 @@ config TOSHIBA
+
+ config I8K
+ tristate "Dell i8k legacy laptop support"
+- select HWMON
++ depends on HWMON
++ depends on PROC_FS
+ select SENSORS_DELL_SMM
+ help
+ This option enables legacy /proc/i8k userspace interface in hwmon
+@@ -1310,7 +1304,7 @@ config MICROCODE
+
+ config MICROCODE_INTEL
+ bool "Intel microcode loading support"
+- depends on MICROCODE
++ depends on CPU_SUP_INTEL && MICROCODE
+ default MICROCODE
+ help
+ This options enables microcode patch loading support for Intel
+@@ -1322,22 +1316,21 @@ config MICROCODE_INTEL
+
+ config MICROCODE_AMD
+ bool "AMD microcode loading support"
+- depends on MICROCODE
++ depends on CPU_SUP_AMD && MICROCODE
+ help
+ If you select this option, microcode patch loading support for AMD
+ processors will be enabled.
+
+-config MICROCODE_OLD_INTERFACE
+- bool "Ancient loading interface (DEPRECATED)"
++config MICROCODE_LATE_LOADING
++ bool "Late microcode loading (DANGEROUS)"
+ default n
+ depends on MICROCODE
+ help
+- DO NOT USE THIS! This is the ancient /dev/cpu/microcode interface
+- which was used by userspace tools like iucode_tool and microcode.ctl.
+- It is inadequate because it runs too late to be able to properly
+- load microcode on a machine and it needs special tools. Instead, you
+- should've switched to the early loading method with the initrd or
+- builtin microcode by now: Documentation/x86/microcode.rst
++ Loading microcode late, when the system is up and executing instructions
++ is a tricky business and should be avoided if possible. Just the sequence
++ of synchronizing all cores and SMT threads is one fragile dance which does
++ not guarantee that cores might not softlock after the loading. Therefore,
++ use this at your own risk. Late loading taints the kernel too.
+
+ config X86_MSR
+ tristate "/dev/cpu/*/msr - Model-specific register support"
+@@ -1518,6 +1511,7 @@ config AMD_MEM_ENCRYPT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ select INSTRUCTION_DECODER
+ select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
++ select ARCH_HAS_CC_PLATFORM
+ help
+ Say yes to enable support for the encryption of system memory.
+ This requires an AMD processor that supports Secure Memory
+@@ -1917,6 +1911,7 @@ config EFI
+ depends on ACPI
+ select UCS2_STRING
+ select EFI_RUNTIME_WRAPPERS
++ select ARCH_USE_MEMREMAP_PROT
+ help
+ This enables the kernel to use EFI runtime services that are
+ available (such as the EFI variable services).
+@@ -1931,7 +1926,6 @@ config EFI
+ config EFI_STUB
+ bool "EFI stub support"
+ depends on EFI && !X86_USE_3DNOW
+- depends on $(cc-option,-mabi=ms) || X86_32
+ select RELOCATABLE
+ help
+ This kernel feature allows a bzImage to be loaded directly
+@@ -2392,6 +2386,114 @@ source "kernel/livepatch/Kconfig"
+
+ endmenu
+
++config CC_HAS_SLS
++ def_bool $(cc-option,-mharden-sls=all)
++
++config CC_HAS_RETURN_THUNK
++ def_bool $(cc-option,-mfunction-return=thunk-extern)
++
++menuconfig SPECULATION_MITIGATIONS
++ bool "Mitigations for speculative execution vulnerabilities"
++ default y
++ help
++ Say Y here to enable options which enable mitigations for
++ speculative execution hardware vulnerabilities.
++
++ If you say N, all mitigations will be disabled. You really
++ should know what you are doing to say so.
++
++if SPECULATION_MITIGATIONS
++
++config PAGE_TABLE_ISOLATION
++ bool "Remove the kernel mapping in user mode"
++ default y
++ depends on (X86_64 || X86_PAE)
++ help
++ This feature reduces the number of hardware side channels by
++ ensuring that the majority of kernel addresses are not mapped
++ into userspace.
++
++ See Documentation/x86/pti.rst for more details.
++
++config RETPOLINE
++ bool "Avoid speculative indirect branches in kernel"
++ default y
++ help
++ Compile kernel with the retpoline compiler options to guard against
++ kernel-to-user data leaks by avoiding speculative indirect
++ branches. Requires a compiler with -mindirect-branch=thunk-extern
++ support for full protection. The kernel may run slower.
++
++config RETHUNK
++ bool "Enable return-thunks"
++ depends on RETPOLINE && CC_HAS_RETURN_THUNK
++ default y if X86_64
++ help
++ Compile the kernel with the return-thunks compiler option to guard
++ against kernel-to-user data leaks by avoiding return speculation.
++ Requires a compiler with -mfunction-return=thunk-extern
++ support for full protection. The kernel may run slower.
++
++config CPU_UNRET_ENTRY
++ bool "Enable UNRET on kernel entry"
++ depends on CPU_SUP_AMD && RETHUNK && X86_64
++ default y
++ help
++ Compile the kernel with support for the retbleed=unret mitigation.
++
++config CPU_IBPB_ENTRY
++ bool "Enable IBPB on kernel entry"
++ depends on CPU_SUP_AMD && X86_64
++ default y
++ help
++ Compile the kernel with support for the retbleed=ibpb mitigation.
++
++config CPU_IBRS_ENTRY
++ bool "Enable IBRS on kernel entry"
++ depends on CPU_SUP_INTEL && X86_64
++ default y
++ help
++ Compile the kernel with support for the spectre_v2=ibrs mitigation.
++ This mitigates both spectre_v2 and retbleed at great cost to
++ performance.
++
++config CPU_SRSO
++ bool "Mitigate speculative RAS overflow on AMD"
++ depends on CPU_SUP_AMD && X86_64 && RETHUNK
++ default y
++ help
++ Enable the SRSO mitigation needed on AMD Zen1-4 machines.
++
++config SLS
++ bool "Mitigate Straight-Line-Speculation"
++ depends on CC_HAS_SLS && X86_64
++ default n
++ help
++ Compile the kernel with straight-line-speculation options to guard
++ against straight line speculation. The kernel image might be slightly
++ larger.
++
++config GDS_FORCE_MITIGATION
++ bool "Force GDS Mitigation"
++ depends on CPU_SUP_INTEL
++ default n
++ help
++ Gather Data Sampling (GDS) is a hardware vulnerability which allows
++ unprivileged speculative access to data which was previously stored in
++ vector registers.
++
++ This option is equivalent to setting gather_data_sampling=force on the
++ command line. The microcode mitigation is used if present, otherwise
++ AVX is disabled as a mitigation. On affected systems that are missing
++ the microcode any userspace code that unconditionally uses AVX will
++ break with this option set.
++
++ Setting this option on systems not vulnerable to GDS has no effect.
++
++ If in doubt, say N.
++
++endif
++
+ config ARCH_HAS_ADD_PAGES
+ def_bool y
+ depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG
+@@ -2795,6 +2897,11 @@ config IA32_AOUT
+ config X86_X32
+ bool "x32 ABI for 64-bit mode"
+ depends on X86_64
++ # llvm-objcopy does not convert x86_64 .note.gnu.property or
++ # compressed debug sections to x86_x32 properly:
++ # https://github.com/ClangBuiltLinux/linux/issues/514
++ # https://github.com/ClangBuiltLinux/linux/issues/1141
++ depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm)
+ help
+ Include code to run binaries for the x32 native 32-bit ABI
+ for 64-bit processors. An x32 process gets access to the
+diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
+index d3a6f74a94bdf..d4d6db4dde220 100644
+--- a/arch/x86/Kconfig.debug
++++ b/arch/x86/Kconfig.debug
+@@ -1,8 +1,5 @@
+ # SPDX-License-Identifier: GPL-2.0
+
+-config TRACE_IRQFLAGS_NMI_SUPPORT
+- def_bool y
+-
+ config EARLY_PRINTK_USB
+ bool
+
+diff --git a/arch/x86/Makefile b/arch/x86/Makefile
+index 7488cfbbd2f60..9c09bbd390cec 100644
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -24,7 +24,7 @@ endif
+
+ # How to compile the 16-bit code. Note we always compile for -march=i386;
+ # that way we can complain to the user if the CPU is insufficient.
+-REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \
++REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \
+ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
+ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
+@@ -179,6 +179,10 @@ ifdef CONFIG_RETPOLINE
+ endif
+ endif
+
++ifdef CONFIG_SLS
++ KBUILD_CFLAGS += -mharden-sls=all
++endif
++
+ KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
+
+ ifdef CONFIG_LTO_CLANG
+diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
+index b5aecb524a8aa..ffec8bb01ba8c 100644
+--- a/arch/x86/boot/Makefile
++++ b/arch/x86/boot/Makefile
+@@ -103,7 +103,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
+ AFLAGS_header.o += -I$(objtree)/$(obj)
+ $(obj)/header.o: $(obj)/zoffset.h
+
+-LDFLAGS_setup.elf := -m elf_i386 -T
++LDFLAGS_setup.elf := -m elf_i386 -z noexecstack -T
+ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
+ $(call if_changed,ld)
+
+diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
+index 5521ea12f44e0..aa9b964575843 100644
+--- a/arch/x86/boot/bioscall.S
++++ b/arch/x86/boot/bioscall.S
+@@ -32,7 +32,7 @@ intcall:
+ movw %dx, %si
+ movw %sp, %di
+ movw $11, %cx
+- rep; movsd
++ rep; movsl
+
+ /* Pop full state from the stack */
+ popal
+@@ -67,7 +67,7 @@ intcall:
+ jz 4f
+ movw %sp, %si
+ movw $11, %cx
+- rep; movsd
++ rep; movsl
+ 4: addw $44, %sp
+
+ /* Restore state and return */
+diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
+index 34c9dbb6a47d6..686a9d75a0e41 100644
+--- a/arch/x86/boot/boot.h
++++ b/arch/x86/boot/boot.h
+@@ -110,66 +110,78 @@ typedef unsigned int addr_t;
+
+ static inline u8 rdfs8(addr_t addr)
+ {
++ u8 *ptr = (u8 *)absolute_pointer(addr);
+ u8 v;
+- asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
++ asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
+ return v;
+ }
+ static inline u16 rdfs16(addr_t addr)
+ {
++ u16 *ptr = (u16 *)absolute_pointer(addr);
+ u16 v;
+- asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
++ asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+ }
+ static inline u32 rdfs32(addr_t addr)
+ {
++ u32 *ptr = (u32 *)absolute_pointer(addr);
+ u32 v;
+- asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
++ asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+ }
+
+ static inline void wrfs8(u8 v, addr_t addr)
+ {
+- asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
++ u8 *ptr = (u8 *)absolute_pointer(addr);
++ asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
+ }
+ static inline void wrfs16(u16 v, addr_t addr)
+ {
+- asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
++ u16 *ptr = (u16 *)absolute_pointer(addr);
++ asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
+ }
+ static inline void wrfs32(u32 v, addr_t addr)
+ {
+- asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
++ u32 *ptr = (u32 *)absolute_pointer(addr);
++ asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
+ }
+
+ static inline u8 rdgs8(addr_t addr)
+ {
++ u8 *ptr = (u8 *)absolute_pointer(addr);
+ u8 v;
+- asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr));
++ asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
+ return v;
+ }
+ static inline u16 rdgs16(addr_t addr)
+ {
++ u16 *ptr = (u16 *)absolute_pointer(addr);
+ u16 v;
+- asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr));
++ asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+ }
+ static inline u32 rdgs32(addr_t addr)
+ {
++ u32 *ptr = (u32 *)absolute_pointer(addr);
+ u32 v;
+- asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr));
++ asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
+ return v;
+ }
+
+ static inline void wrgs8(u8 v, addr_t addr)
+ {
+- asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v));
++ u8 *ptr = (u8 *)absolute_pointer(addr);
++ asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
+ }
+ static inline void wrgs16(u16 v, addr_t addr)
+ {
+- asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v));
++ u16 *ptr = (u16 *)absolute_pointer(addr);
++ asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
+ }
+ static inline void wrgs32(u32 v, addr_t addr)
+ {
+- asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v));
++ u32 *ptr = (u32 *)absolute_pointer(addr);
++ asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
+ }
+
+ /* Note: these only return true/false, not a signed return value! */
+diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
+index 431bf7f846c3c..15c5ae62a0e94 100644
+--- a/arch/x86/boot/compressed/Makefile
++++ b/arch/x86/boot/compressed/Makefile
+@@ -28,7 +28,11 @@ KCOV_INSTRUMENT := n
+ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
+ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst
+
+-KBUILD_CFLAGS := -m$(BITS) -O2
++# CLANG_FLAGS must come before any cc-disable-warning or cc-option calls in
++# case of cross compiling, as it has the '--target=' flag, which is needed to
++# avoid errors with '-march=i386', and future flags may depend on the target to
++# be valid.
++KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS)
+ KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
+ KBUILD_CFLAGS += -Wundef
+ KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+@@ -47,7 +51,6 @@ KBUILD_CFLAGS += -D__DISABLE_EXPORTS
+ # Disable relocation relaxation in case the link is not PIE.
+ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
+ KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
+-KBUILD_CFLAGS += $(CLANG_FLAGS)
+
+ # sev.c indirectly inludes inat-table.h which is generated during
+ # compilation and stored in $(objtree). Add the directory to the includes so
+@@ -66,6 +69,10 @@ LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
+ ifdef CONFIG_LD_ORPHAN_WARN
+ LDFLAGS_vmlinux += --orphan-handling=warn
+ endif
++LDFLAGS_vmlinux += -z noexecstack
++ifeq ($(CONFIG_LD_IS_BFD),y)
++LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
++endif
+ LDFLAGS_vmlinux += -T
+
+ hostprogs := mkpiggy
+diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
+index 8bb92e9f4e973..70052779b235c 100644
+--- a/arch/x86/boot/compressed/efi_thunk_64.S
++++ b/arch/x86/boot/compressed/efi_thunk_64.S
+@@ -93,7 +93,7 @@ SYM_FUNC_START(__efi64_thunk)
+
+ pop %rbx
+ pop %rbp
+- ret
++ RET
+ SYM_FUNC_END(__efi64_thunk)
+
+ .code32
+diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
+index 572c535cf45bc..c3d427c817c73 100644
+--- a/arch/x86/boot/compressed/head_64.S
++++ b/arch/x86/boot/compressed/head_64.S
+@@ -468,11 +468,25 @@ SYM_CODE_START(startup_64)
+ /* Save the trampoline address in RCX */
+ movq %rax, %rcx
+
++ /* Set up 32-bit addressable stack */
++ leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
++
++ /*
++ * Preserve live 64-bit registers on the stack: this is necessary
++ * because the architecture does not guarantee that GPRs will retain
++ * their full 64-bit values across a 32-bit mode switch.
++ */
++ pushq %rbp
++ pushq %rbx
++ pushq %rsi
++
+ /*
+- * Load the address of trampoline_return() into RDI.
+- * It will be used by the trampoline to return to the main code.
++ * Push the 64-bit address of trampoline_return() onto the new stack.
++ * It will be used by the trampoline to return to the main code. Due to
++ * the 32-bit mode switch, it cannot be kept it in a register either.
+ */
+ leaq trampoline_return(%rip), %rdi
++ pushq %rdi
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+@@ -480,6 +494,11 @@ SYM_CODE_START(startup_64)
+ pushq %rax
+ lretq
+ trampoline_return:
++ /* Restore live 64-bit registers */
++ popq %rsi
++ popq %rbx
++ popq %rbp
++
+ /* Restore the stack, the 32-bit trampoline uses its own stack */
+ leaq rva(boot_stack_end)(%rbx), %rsp
+
+@@ -600,7 +619,7 @@ SYM_FUNC_END(.Lrelocated)
+ /*
+ * This is the 32-bit trampoline that will be copied over to low memory.
+ *
+- * RDI contains the return address (might be above 4G).
++ * Return address is at the top of the stack (might be above 4G).
+ * ECX contains the base address of the trampoline memory.
+ * Non zero RDX means trampoline needs to enable 5-level paging.
+ */
+@@ -610,9 +629,6 @@ SYM_CODE_START(trampoline_32bit_src)
+ movl %eax, %ds
+ movl %eax, %ss
+
+- /* Set up new stack */
+- leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
+-
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+@@ -672,7 +688,7 @@ SYM_CODE_END(trampoline_32bit_src)
+ .code64
+ SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
+ /* Return from the trampoline */
+- jmp *%rdi
++ retq
+ SYM_FUNC_END(.Lpaging_enabled)
+
+ /*
+@@ -813,7 +829,7 @@ SYM_FUNC_START(efi32_pe_entry)
+ 2: popl %edi // restore callee-save registers
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(efi32_pe_entry)
+
+ .section ".rodata"
+@@ -868,7 +884,7 @@ SYM_FUNC_START(startup32_set_idt_entry)
+
+ pop %ecx
+ pop %ebx
+- ret
++ RET
+ SYM_FUNC_END(startup32_set_idt_entry)
+ #endif
+
+@@ -884,7 +900,7 @@ SYM_FUNC_START(startup32_load_idt)
+ movl %eax, rva(boot32_idt_desc+2)(%ebp)
+ lidt rva(boot32_idt_desc)(%ebp)
+ #endif
+- ret
++ RET
+ SYM_FUNC_END(startup32_load_idt)
+
+ /*
+@@ -954,7 +970,7 @@ SYM_FUNC_START(startup32_check_sev_cbit)
+ popl %ebx
+ popl %eax
+ #endif
+- ret
++ RET
+ SYM_FUNC_END(startup32_check_sev_cbit)
+
+ /*
+diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
+index c1e81a848b2a5..a63424d13627b 100644
+--- a/arch/x86/boot/compressed/mem_encrypt.S
++++ b/arch/x86/boot/compressed/mem_encrypt.S
+@@ -58,7 +58,7 @@ SYM_FUNC_START(get_sev_encryption_bit)
+
+ #endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+- ret
++ RET
+ SYM_FUNC_END(get_sev_encryption_bit)
+
+ /**
+@@ -92,7 +92,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid)
+ /* All good - return success */
+ xorl %eax, %eax
+ 1:
+- ret
++ RET
+ 2:
+ movl $-1, %eax
+ jmp 1b
+@@ -221,7 +221,7 @@ SYM_FUNC_START(set_sev_encryption_mask)
+ #endif
+
+ xor %rax, %rax
+- ret
++ RET
+ SYM_FUNC_END(set_sev_encryption_mask)
+
+ .data
+diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
+index e3add857c2c9d..c421af5a3cdce 100644
+--- a/arch/x86/boot/main.c
++++ b/arch/x86/boot/main.c
+@@ -33,7 +33,7 @@ static void copy_boot_params(void)
+ u16 cl_offset;
+ };
+ const struct old_cmdline * const oldcmd =
+- (const struct old_cmdline *)OLD_CL_ADDRESS;
++ absolute_pointer(OLD_CL_ADDRESS);
+
+ BUILD_BUG_ON(sizeof(boot_params) != 4096);
+ memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
+diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
+index e81885384f604..99398cbdae434 100644
+--- a/arch/x86/configs/i386_defconfig
++++ b/arch/x86/configs/i386_defconfig
+@@ -262,3 +262,4 @@ CONFIG_BLK_DEV_IO_TRACE=y
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+ CONFIG_EARLY_PRINTK_DBGP=y
+ CONFIG_DEBUG_BOOT_PARAMS=y
++CONFIG_KALLSYMS_ALL=y
+diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
+index e8a7a0af2bdaa..d7298b104a456 100644
+--- a/arch/x86/configs/x86_64_defconfig
++++ b/arch/x86/configs/x86_64_defconfig
+@@ -258,3 +258,4 @@ CONFIG_BLK_DEV_IO_TRACE=y
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+ CONFIG_EARLY_PRINTK_DBGP=y
+ CONFIG_DEBUG_BOOT_PARAMS=y
++CONFIG_KALLSYMS_ALL=y
+diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
+index f307c93fc90a7..fce05e9df56db 100644
+--- a/arch/x86/crypto/Makefile
++++ b/arch/x86/crypto/Makefile
+@@ -61,8 +61,8 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
+ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
+ sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
+
+-obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
+-blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
++obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
++libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
+
+ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
+ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
+diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
+index 51d46d93efbcc..b48ddebb47489 100644
+--- a/arch/x86/crypto/aegis128-aesni-asm.S
++++ b/arch/x86/crypto/aegis128-aesni-asm.S
+@@ -122,7 +122,7 @@ SYM_FUNC_START_LOCAL(__load_partial)
+ pxor T0, MSG
+
+ .Lld_partial_8:
+- ret
++ RET
+ SYM_FUNC_END(__load_partial)
+
+ /*
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(__store_partial)
+ mov %r10b, (%r9)
+
+ .Lst_partial_1:
+- ret
++ RET
+ SYM_FUNC_END(__store_partial)
+
+ /*
+@@ -225,7 +225,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_init)
+ movdqu STATE4, 0x40(STATEP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_init)
+
+ /*
+@@ -337,7 +337,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_1:
+ movdqu STATE4, 0x00(STATEP)
+@@ -346,7 +346,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_2:
+ movdqu STATE3, 0x00(STATEP)
+@@ -355,7 +355,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_3:
+ movdqu STATE2, 0x00(STATEP)
+@@ -364,7 +364,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out_4:
+ movdqu STATE1, 0x00(STATEP)
+@@ -373,11 +373,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lad_out:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_ad)
+
+ .macro encrypt_block a s0 s1 s2 s3 s4 i
+@@ -452,7 +452,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_1:
+ movdqu STATE3, 0x00(STATEP)
+@@ -461,7 +461,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_2:
+ movdqu STATE2, 0x00(STATEP)
+@@ -470,7 +470,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_3:
+ movdqu STATE1, 0x00(STATEP)
+@@ -479,7 +479,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out_4:
+ movdqu STATE0, 0x00(STATEP)
+@@ -488,11 +488,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Lenc_out:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_enc)
+
+ /*
+@@ -532,7 +532,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
+ movdqu STATE3, 0x40(STATEP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
+
+ .macro decrypt_block a s0 s1 s2 s3 s4 i
+@@ -606,7 +606,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_1:
+ movdqu STATE3, 0x00(STATEP)
+@@ -615,7 +615,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_2:
+ movdqu STATE2, 0x00(STATEP)
+@@ -624,7 +624,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_3:
+ movdqu STATE1, 0x00(STATEP)
+@@ -633,7 +633,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out_4:
+ movdqu STATE0, 0x00(STATEP)
+@@ -642,11 +642,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+- ret
++ RET
+
+ .Ldec_out:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_dec)
+
+ /*
+@@ -696,7 +696,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
+ movdqu STATE3, 0x40(STATEP)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
+
+ /*
+@@ -743,5 +743,5 @@ SYM_FUNC_START(crypto_aegis128_aesni_final)
+ movdqu MSG, (%rsi)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(crypto_aegis128_aesni_final)
+diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+index 3f0fc7dd87d77..c799838242a69 100644
+--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
++++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+@@ -525,7 +525,7 @@ ddq_add_8:
+ /* return updated IV */
+ vpshufb xbyteswap, xcounter, xcounter
+ vmovdqu xcounter, (p_iv)
+- ret
++ RET
+ .endm
+
+ /*
+diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
+index 4e3972570916e..363699dd72206 100644
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -1594,7 +1594,7 @@ SYM_FUNC_START(aesni_gcm_dec)
+ GCM_ENC_DEC dec
+ GCM_COMPLETE arg10, arg11
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec)
+
+
+@@ -1683,7 +1683,7 @@ SYM_FUNC_START(aesni_gcm_enc)
+
+ GCM_COMPLETE arg10, arg11
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc)
+
+ /*****************************************************************************
+@@ -1701,7 +1701,7 @@ SYM_FUNC_START(aesni_gcm_init)
+ FUNC_SAVE
+ GCM_INIT %arg3, %arg4,%arg5, %arg6
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_init)
+
+ /*****************************************************************************
+@@ -1716,7 +1716,7 @@ SYM_FUNC_START(aesni_gcm_enc_update)
+ FUNC_SAVE
+ GCM_ENC_DEC enc
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc_update)
+
+ /*****************************************************************************
+@@ -1731,7 +1731,7 @@ SYM_FUNC_START(aesni_gcm_dec_update)
+ FUNC_SAVE
+ GCM_ENC_DEC dec
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec_update)
+
+ /*****************************************************************************
+@@ -1746,7 +1746,7 @@ SYM_FUNC_START(aesni_gcm_finalize)
+ FUNC_SAVE
+ GCM_COMPLETE %arg3 %arg4
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_finalize)
+
+ #endif
+@@ -1762,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a)
+ pxor %xmm1, %xmm0
+ movaps %xmm0, (TKEYP)
+ add $0x10, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_256a)
+ SYM_FUNC_END_ALIAS(_key_expansion_128)
+
+@@ -1787,7 +1787,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192a)
+ shufps $0b01001110, %xmm2, %xmm1
+ movaps %xmm1, 0x10(TKEYP)
+ add $0x20, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_192a)
+
+ SYM_FUNC_START_LOCAL(_key_expansion_192b)
+@@ -1806,7 +1806,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192b)
+
+ movaps %xmm0, (TKEYP)
+ add $0x10, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_192b)
+
+ SYM_FUNC_START_LOCAL(_key_expansion_256b)
+@@ -1818,7 +1818,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b)
+ pxor %xmm1, %xmm2
+ movaps %xmm2, (TKEYP)
+ add $0x10, TKEYP
+- ret
++ RET
+ SYM_FUNC_END(_key_expansion_256b)
+
+ /*
+@@ -1933,7 +1933,7 @@ SYM_FUNC_START(aesni_set_key)
+ popl KEYP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_set_key)
+
+ /*
+@@ -1957,7 +1957,7 @@ SYM_FUNC_START(aesni_enc)
+ popl KEYP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_enc)
+
+ /*
+@@ -2014,7 +2014,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc1)
+ aesenc KEY, STATE
+ movaps 0x70(TKEYP), KEY
+ aesenclast KEY, STATE
+- ret
++ RET
+ SYM_FUNC_END(_aesni_enc1)
+
+ /*
+@@ -2122,7 +2122,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4)
+ aesenclast KEY, STATE2
+ aesenclast KEY, STATE3
+ aesenclast KEY, STATE4
+- ret
++ RET
+ SYM_FUNC_END(_aesni_enc4)
+
+ /*
+@@ -2147,7 +2147,7 @@ SYM_FUNC_START(aesni_dec)
+ popl KEYP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_dec)
+
+ /*
+@@ -2204,7 +2204,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec1)
+ aesdec KEY, STATE
+ movaps 0x70(TKEYP), KEY
+ aesdeclast KEY, STATE
+- ret
++ RET
+ SYM_FUNC_END(_aesni_dec1)
+
+ /*
+@@ -2312,7 +2312,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec4)
+ aesdeclast KEY, STATE2
+ aesdeclast KEY, STATE3
+ aesdeclast KEY, STATE4
+- ret
++ RET
+ SYM_FUNC_END(_aesni_dec4)
+
+ /*
+@@ -2372,7 +2372,7 @@ SYM_FUNC_START(aesni_ecb_enc)
+ popl LEN
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_ecb_enc)
+
+ /*
+@@ -2433,7 +2433,7 @@ SYM_FUNC_START(aesni_ecb_dec)
+ popl LEN
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_ecb_dec)
+
+ /*
+@@ -2477,7 +2477,7 @@ SYM_FUNC_START(aesni_cbc_enc)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_cbc_enc)
+
+ /*
+@@ -2570,7 +2570,7 @@ SYM_FUNC_START(aesni_cbc_dec)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_cbc_dec)
+
+ /*
+@@ -2627,7 +2627,7 @@ SYM_FUNC_START(aesni_cts_cbc_enc)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_cts_cbc_enc)
+
+ /*
+@@ -2688,7 +2688,7 @@ SYM_FUNC_START(aesni_cts_cbc_dec)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_cts_cbc_dec)
+
+ .pushsection .rodata
+@@ -2725,7 +2725,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc_init)
+ mov $1, TCTR_LOW
+ movq TCTR_LOW, INC
+ movq CTR, TCTR_LOW
+- ret
++ RET
+ SYM_FUNC_END(_aesni_inc_init)
+
+ /*
+@@ -2753,7 +2753,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc)
+ .Linc_low:
+ movaps CTR, IV
+ pshufb BSWAP_MASK, IV
+- ret
++ RET
+ SYM_FUNC_END(_aesni_inc)
+
+ /*
+@@ -2816,7 +2816,7 @@ SYM_FUNC_START(aesni_ctr_enc)
+ movups IV, (IVP)
+ .Lctr_enc_just_ret:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(aesni_ctr_enc)
+
+ #endif
+@@ -2932,7 +2932,7 @@ SYM_FUNC_START(aesni_xts_encrypt)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+
+ .Lxts_enc_1x:
+ add $64, LEN
+@@ -3092,7 +3092,7 @@ SYM_FUNC_START(aesni_xts_decrypt)
+ popl IVP
+ #endif
+ FRAME_END
+- ret
++ RET
+
+ .Lxts_dec_1x:
+ add $64, LEN
+diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
+index 98e3552b6e039..0852ab573fd30 100644
+--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
++++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
+@@ -1767,7 +1767,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen2)
+ FUNC_SAVE
+ INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_init_avx_gen2)
+
+ ###############################################################################
+@@ -1788,15 +1788,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2)
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_enc_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_enc_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2)
+
+ ###############################################################################
+@@ -1817,15 +1817,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2)
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_dec_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_dec_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2)
+
+ ###############################################################################
+@@ -1846,15 +1846,15 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen2)
+ # must be 192
+ GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_finalize:
+ GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_finalize:
+ GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
+
+ ###############################################################################
+@@ -2735,7 +2735,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen4)
+ FUNC_SAVE
+ INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_init_avx_gen4)
+
+ ###############################################################################
+@@ -2756,15 +2756,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4)
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_enc_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_enc_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4)
+
+ ###############################################################################
+@@ -2785,15 +2785,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4)
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_dec_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_dec_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4)
+
+ ###############################################################################
+@@ -2814,13 +2814,13 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen4)
+ # must be 192
+ GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_128_finalize4:
+ GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ key_256_finalize4:
+ GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
+ FUNC_RESTORE
+- ret
++ RET
+ SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)
+diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
+index 0fc961bef299c..41901ba9d3a2c 100644
+--- a/arch/x86/crypto/aesni-intel_glue.c
++++ b/arch/x86/crypto/aesni-intel_glue.c
+@@ -866,7 +866,7 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
+ req = &subreq;
+
+ err = skcipher_walk_virt(&walk, req, false);
+- if (err)
++ if (!walk.nbytes)
+ return err;
+ } else {
+ tail = 0;
+@@ -1107,7 +1107,7 @@ static struct aead_alg aesni_aeads[] = { {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx),
+- .cra_alignmask = AESNI_ALIGN - 1,
++ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ }, {
+@@ -1124,7 +1124,7 @@ static struct aead_alg aesni_aeads[] = { {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct generic_gcmaes_ctx),
+- .cra_alignmask = AESNI_ALIGN - 1,
++ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ } };
+diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
+index 2ca79974f8198..b50b35ff1fdba 100644
+--- a/arch/x86/crypto/blake2s-core.S
++++ b/arch/x86/crypto/blake2s-core.S
+@@ -171,7 +171,7 @@ SYM_FUNC_START(blake2s_compress_ssse3)
+ movdqu %xmm1,0x10(%rdi)
+ movdqu %xmm14,0x20(%rdi)
+ .Lendofloop:
+- ret
++ RET
+ SYM_FUNC_END(blake2s_compress_ssse3)
+
+ #ifdef CONFIG_AS_AVX512
+@@ -251,6 +251,6 @@ SYM_FUNC_START(blake2s_compress_avx512)
+ vmovdqu %xmm1,0x10(%rdi)
+ vmovdqu %xmm4,0x20(%rdi)
+ vzeroupper
+- retq
++ RET
+ SYM_FUNC_END(blake2s_compress_avx512)
+ #endif /* CONFIG_AS_AVX512 */
+diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
+index a40365ab301ee..aaba212305288 100644
+--- a/arch/x86/crypto/blake2s-glue.c
++++ b/arch/x86/crypto/blake2s-glue.c
+@@ -4,8 +4,6 @@
+ */
+
+ #include <crypto/internal/blake2s.h>
+-#include <crypto/internal/simd.h>
+-#include <crypto/internal/hash.h>
+
+ #include <linux/types.h>
+ #include <linux/jump_label.h>
+@@ -28,14 +26,13 @@ asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+ static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
+ static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
+
+-void blake2s_compress_arch(struct blake2s_state *state,
+- const u8 *block, size_t nblocks,
+- const u32 inc)
++void blake2s_compress(struct blake2s_state *state, const u8 *block,
++ size_t nblocks, const u32 inc)
+ {
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
+
+- if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
++ if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
+ blake2s_compress_generic(state, block, nblocks, inc);
+ return;
+ }
+@@ -56,49 +53,12 @@ void blake2s_compress_arch(struct blake2s_state *state,
+ block += blocks * BLAKE2S_BLOCK_SIZE;
+ } while (nblocks);
+ }
+-EXPORT_SYMBOL(blake2s_compress_arch);
+-
+-static int crypto_blake2s_update_x86(struct shash_desc *desc,
+- const u8 *in, unsigned int inlen)
+-{
+- return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch);
+-}
+-
+-static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
+-{
+- return crypto_blake2s_final(desc, out, blake2s_compress_arch);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size) \
+- { \
+- .base.cra_name = name, \
+- .base.cra_driver_name = driver_name, \
+- .base.cra_priority = 200, \
+- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
+- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
+- .base.cra_module = THIS_MODULE, \
+- .digestsize = digest_size, \
+- .setkey = crypto_blake2s_setkey, \
+- .init = crypto_blake2s_init, \
+- .update = crypto_blake2s_update_x86, \
+- .final = crypto_blake2s_final_x86, \
+- .descsize = sizeof(struct blake2s_state), \
+- }
+-
+-static struct shash_alg blake2s_algs[] = {
+- BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
+-};
++EXPORT_SYMBOL(blake2s_compress);
+
+ static int __init blake2s_mod_init(void)
+ {
+- if (!boot_cpu_has(X86_FEATURE_SSSE3))
+- return 0;
+-
+- static_branch_enable(&blake2s_use_ssse3);
++ if (boot_cpu_has(X86_FEATURE_SSSE3))
++ static_branch_enable(&blake2s_use_ssse3);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+@@ -109,26 +69,9 @@ static int __init blake2s_mod_init(void)
+ XFEATURE_MASK_AVX512, NULL))
+ static_branch_enable(&blake2s_use_avx512);
+
+- return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+- crypto_register_shashes(blake2s_algs,
+- ARRAY_SIZE(blake2s_algs)) : 0;
+-}
+-
+-static void __exit blake2s_mod_exit(void)
+-{
+- if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+- crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
++ return 0;
+ }
+
+ module_init(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+ MODULE_LICENSE("GPL v2");
+diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
+index 4222ac6d65848..802d715826891 100644
+--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
++++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
+@@ -135,10 +135,10 @@ SYM_FUNC_START(__blowfish_enc_blk)
+ jnz .L__enc_xor;
+
+ write_block();
+- ret;
++ RET;
+ .L__enc_xor:
+ xor_block();
+- ret;
++ RET;
+ SYM_FUNC_END(__blowfish_enc_blk)
+
+ SYM_FUNC_START(blowfish_dec_blk)
+@@ -170,7 +170,7 @@ SYM_FUNC_START(blowfish_dec_blk)
+
+ movq %r11, %r12;
+
+- ret;
++ RET;
+ SYM_FUNC_END(blowfish_dec_blk)
+
+ /**********************************************************************
+@@ -322,14 +322,14 @@ SYM_FUNC_START(__blowfish_enc_blk_4way)
+
+ popq %rbx;
+ popq %r12;
+- ret;
++ RET;
+
+ .L__enc_xor4:
+ xor_block4();
+
+ popq %rbx;
+ popq %r12;
+- ret;
++ RET;
+ SYM_FUNC_END(__blowfish_enc_blk_4way)
+
+ SYM_FUNC_START(blowfish_dec_blk_4way)
+@@ -364,5 +364,5 @@ SYM_FUNC_START(blowfish_dec_blk_4way)
+ popq %rbx;
+ popq %r12;
+
+- ret;
++ RET;
+ SYM_FUNC_END(blowfish_dec_blk_4way)
+diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+index e2a0e0f4bf9d8..2e1658ddbe1a9 100644
+--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+@@ -192,7 +192,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
+ roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
+ %rcx, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+
+ .align 8
+@@ -200,7 +200,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a
+ roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
+ %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
+ %rax, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+
+ /*
+@@ -778,7 +778,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
+ %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Lenc_max32:
+@@ -865,7 +865,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
+ %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Ldec_max32:
+@@ -906,7 +906,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way)
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_enc_16way)
+
+ SYM_FUNC_START(camellia_ecb_dec_16way)
+@@ -936,7 +936,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way)
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_dec_16way)
+
+ SYM_FUNC_START(camellia_cbc_dec_16way)
+@@ -987,5 +987,5 @@ SYM_FUNC_START(camellia_cbc_dec_16way)
+ %xmm8, %rsi);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_cbc_dec_16way)
+diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+index 706f70829a07e..0e4e9abbf4de3 100644
+--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+@@ -226,7 +226,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
+ roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
+ %rcx, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+
+ .align 8
+@@ -234,7 +234,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a
+ roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
+ %rax, (%r9));
+- ret;
++ RET;
+ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+
+ /*
+@@ -814,7 +814,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
+ %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Lenc_max32:
+@@ -901,7 +901,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
+ %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
+
+ FRAME_END
+- ret;
++ RET;
+
+ .align 8
+ .Ldec_max32:
+@@ -946,7 +946,7 @@ SYM_FUNC_START(camellia_ecb_enc_32way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_enc_32way)
+
+ SYM_FUNC_START(camellia_ecb_dec_32way)
+@@ -980,7 +980,7 @@ SYM_FUNC_START(camellia_ecb_dec_32way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_ecb_dec_32way)
+
+ SYM_FUNC_START(camellia_cbc_dec_32way)
+@@ -1047,5 +1047,5 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
+
+ addq $(16 * 32), %rsp;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_cbc_dec_32way)
+diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
+index 1372e64088507..347c059f59403 100644
+--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
++++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
+@@ -213,13 +213,13 @@ SYM_FUNC_START(__camellia_enc_blk)
+ enc_outunpack(mov, RT1);
+
+ movq RR12, %r12;
+- ret;
++ RET;
+
+ .L__enc_xor:
+ enc_outunpack(xor, RT1);
+
+ movq RR12, %r12;
+- ret;
++ RET;
+ SYM_FUNC_END(__camellia_enc_blk)
+
+ SYM_FUNC_START(camellia_dec_blk)
+@@ -257,7 +257,7 @@ SYM_FUNC_START(camellia_dec_blk)
+ dec_outunpack();
+
+ movq RR12, %r12;
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_dec_blk)
+
+ /**********************************************************************
+@@ -448,14 +448,14 @@ SYM_FUNC_START(__camellia_enc_blk_2way)
+
+ movq RR12, %r12;
+ popq %rbx;
+- ret;
++ RET;
+
+ .L__enc2_xor:
+ enc_outunpack2(xor, RT2);
+
+ movq RR12, %r12;
+ popq %rbx;
+- ret;
++ RET;
+ SYM_FUNC_END(__camellia_enc_blk_2way)
+
+ SYM_FUNC_START(camellia_dec_blk_2way)
+@@ -495,5 +495,5 @@ SYM_FUNC_START(camellia_dec_blk_2way)
+
+ movq RR12, %r12;
+ movq RXOR, %rbx;
+- ret;
++ RET;
+ SYM_FUNC_END(camellia_dec_blk_2way)
+diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+index 8a6181b08b590..b258af420c92c 100644
+--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+@@ -279,7 +279,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
+ outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+ outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__cast5_enc_blk16)
+
+ .align 16
+@@ -352,7 +352,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
+ outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+ outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
+
+- ret;
++ RET;
+
+ .L__skip_dec:
+ vpsrldq $4, RKR, RKR;
+@@ -393,7 +393,7 @@ SYM_FUNC_START(cast5_ecb_enc_16way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_ecb_enc_16way)
+
+ SYM_FUNC_START(cast5_ecb_dec_16way)
+@@ -431,7 +431,7 @@ SYM_FUNC_START(cast5_ecb_dec_16way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_ecb_dec_16way)
+
+ SYM_FUNC_START(cast5_cbc_dec_16way)
+@@ -483,7 +483,7 @@ SYM_FUNC_START(cast5_cbc_dec_16way)
+ popq %r15;
+ popq %r12;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_cbc_dec_16way)
+
+ SYM_FUNC_START(cast5_ctr_16way)
+@@ -559,5 +559,5 @@ SYM_FUNC_START(cast5_ctr_16way)
+ popq %r15;
+ popq %r12;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast5_ctr_16way)
+diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+index fbddcecc3e3fc..82b716fd5dbac 100644
+--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+@@ -289,7 +289,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
+ outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__cast6_enc_blk8)
+
+ .align 8
+@@ -336,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
+ outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__cast6_dec_blk8)
+
+ SYM_FUNC_START(cast6_ecb_enc_8way)
+@@ -359,7 +359,7 @@ SYM_FUNC_START(cast6_ecb_enc_8way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_ecb_enc_8way)
+
+ SYM_FUNC_START(cast6_ecb_dec_8way)
+@@ -382,7 +382,7 @@ SYM_FUNC_START(cast6_ecb_dec_8way)
+
+ popq %r15;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_ecb_dec_8way)
+
+ SYM_FUNC_START(cast6_cbc_dec_8way)
+@@ -408,5 +408,5 @@ SYM_FUNC_START(cast6_cbc_dec_8way)
+ popq %r15;
+ popq %r12;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(cast6_cbc_dec_8way)
+diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
+index ee9a40ab41093..f3d8fc0182493 100644
+--- a/arch/x86/crypto/chacha-avx2-x86_64.S
++++ b/arch/x86/crypto/chacha-avx2-x86_64.S
+@@ -193,7 +193,7 @@ SYM_FUNC_START(chacha_2block_xor_avx2)
+
+ .Ldone2:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart2:
+ # xor remaining bytes from partial register into output
+@@ -498,7 +498,7 @@ SYM_FUNC_START(chacha_4block_xor_avx2)
+
+ .Ldone4:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart4:
+ # xor remaining bytes from partial register into output
+@@ -992,7 +992,7 @@ SYM_FUNC_START(chacha_8block_xor_avx2)
+ .Ldone8:
+ vzeroupper
+ lea -8(%r10),%rsp
+- ret
++ RET
+
+ .Lxorpart8:
+ # xor remaining bytes from partial register into output
+diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S
+index bb193fde123a0..259383e1ad440 100644
+--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
++++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S
+@@ -166,13 +166,13 @@ SYM_FUNC_START(chacha_2block_xor_avx512vl)
+
+ .Ldone2:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart2:
+ # xor remaining bytes from partial register into output
+ mov %rcx,%rax
+ and $0xf,%rcx
+- jz .Ldone8
++ jz .Ldone2
+ mov %rax,%r9
+ and $~0xf,%r9
+
+@@ -432,13 +432,13 @@ SYM_FUNC_START(chacha_4block_xor_avx512vl)
+
+ .Ldone4:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart4:
+ # xor remaining bytes from partial register into output
+ mov %rcx,%rax
+ and $0xf,%rcx
+- jz .Ldone8
++ jz .Ldone4
+ mov %rax,%r9
+ and $~0xf,%r9
+
+@@ -812,7 +812,7 @@ SYM_FUNC_START(chacha_8block_xor_avx512vl)
+
+ .Ldone8:
+ vzeroupper
+- ret
++ RET
+
+ .Lxorpart8:
+ # xor remaining bytes from partial register into output
+diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
+index ca1788bfee162..7111949cd5b99 100644
+--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
++++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
+@@ -108,7 +108,7 @@ SYM_FUNC_START_LOCAL(chacha_permute)
+ sub $2,%r8d
+ jnz .Ldoubleround
+
+- ret
++ RET
+ SYM_FUNC_END(chacha_permute)
+
+ SYM_FUNC_START(chacha_block_xor_ssse3)
+@@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_block_xor_ssse3)
+
+ .Ldone:
+ FRAME_END
+- ret
++ RET
+
+ .Lxorpart:
+ # xor remaining bytes from partial register into output
+@@ -217,7 +217,7 @@ SYM_FUNC_START(hchacha_block_ssse3)
+ movdqu %xmm3,0x10(%rsi)
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(hchacha_block_ssse3)
+
+ SYM_FUNC_START(chacha_4block_xor_ssse3)
+@@ -762,7 +762,7 @@ SYM_FUNC_START(chacha_4block_xor_ssse3)
+
+ .Ldone4:
+ lea -8(%r10),%rsp
+- ret
++ RET
+
+ .Lxorpart4:
+ # xor remaining bytes from partial register into output
+diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
+index 6e7d4c4d32081..c392a6edbfff6 100644
+--- a/arch/x86/crypto/crc32-pclmul_asm.S
++++ b/arch/x86/crypto/crc32-pclmul_asm.S
+@@ -236,5 +236,5 @@ fold_64:
+ pxor %xmm2, %xmm1
+ pextrd $0x01, %xmm1, %eax
+
+- ret
++ RET
+ SYM_FUNC_END(crc32_pclmul_le_16)
+diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+index ac1f303eed0f4..80c0d22fc42c6 100644
+--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
++++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+@@ -306,7 +306,7 @@ do_return:
+ popq %rsi
+ popq %rdi
+ popq %rbx
+- ret
++ RET
+ SYM_FUNC_END(crc_pcl)
+
+ .section .rodata, "a", @progbits
+diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
+index b2533d63030e5..721474abfb719 100644
+--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
++++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
+@@ -257,7 +257,7 @@ SYM_FUNC_START(crc_t10dif_pcl)
+ # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
+
+ pextrw $0, %xmm0, %eax
+- ret
++ RET
+
+ .align 16
+ .Lless_than_256_bytes:
+diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
+index fac0fdc3f25da..f4c760f4cade6 100644
+--- a/arch/x86/crypto/des3_ede-asm_64.S
++++ b/arch/x86/crypto/des3_ede-asm_64.S
+@@ -243,7 +243,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
+ popq %r12;
+ popq %rbx;
+
+- ret;
++ RET;
+ SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
+
+ /***********************************************************************
+@@ -528,7 +528,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
+ popq %r12;
+ popq %rbx;
+
+- ret;
++ RET;
+ SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
+
+ .section .rodata, "a", @progbits
+diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+index 99ac25e18e098..2bf8718999209 100644
+--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+@@ -85,7 +85,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
+ psrlq $1, T2
+ pxor T2, T1
+ pxor T1, DATA
+- ret
++ RET
+ SYM_FUNC_END(__clmul_gf128mul_ble)
+
+ /* void clmul_ghash_mul(char *dst, const u128 *shash) */
+@@ -99,7 +99,7 @@ SYM_FUNC_START(clmul_ghash_mul)
+ pshufb BSWAP, DATA
+ movups DATA, (%rdi)
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(clmul_ghash_mul)
+
+ /*
+@@ -128,5 +128,5 @@ SYM_FUNC_START(clmul_ghash_update)
+ movups DATA, (%rdi)
+ .Lupdate_just_ret:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(clmul_ghash_update)
+diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
+index 1f1a95f3dd0ca..c0ab0ff4af655 100644
+--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
++++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
+@@ -19,6 +19,7 @@
+ #include <crypto/internal/simd.h>
+ #include <asm/cpu_device_id.h>
+ #include <asm/simd.h>
++#include <asm/unaligned.h>
+
+ #define GHASH_BLOCK_SIZE 16
+ #define GHASH_DIGEST_SIZE 16
+@@ -54,15 +55,14 @@ static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+ {
+ struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+- be128 *x = (be128 *)key;
+ u64 a, b;
+
+ if (keylen != GHASH_BLOCK_SIZE)
+ return -EINVAL;
+
+ /* perform multiplication by 'x' in GF(2^128) */
+- a = be64_to_cpu(x->a);
+- b = be64_to_cpu(x->b);
++ a = get_unaligned_be64(key);
++ b = get_unaligned_be64(key + 8);
+
+ ctx->shash.a = (b << 1) | (a >> 63);
+ ctx->shash.b = (a << 1) | (b >> 63);
+diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
+index b22c7b9362726..6a0b15e7196a8 100644
+--- a/arch/x86/crypto/nh-avx2-x86_64.S
++++ b/arch/x86/crypto/nh-avx2-x86_64.S
+@@ -153,5 +153,5 @@ SYM_FUNC_START(nh_avx2)
+ vpaddq T1, T0, T0
+ vpaddq T4, T0, T0
+ vmovdqu T0, (HASH)
+- ret
++ RET
+ SYM_FUNC_END(nh_avx2)
+diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S
+index d7ae22dd66839..34c567bbcb4fa 100644
+--- a/arch/x86/crypto/nh-sse2-x86_64.S
++++ b/arch/x86/crypto/nh-sse2-x86_64.S
+@@ -119,5 +119,5 @@ SYM_FUNC_START(nh_sse2)
+ paddq PASS2_SUMS, T1
+ movdqu T0, 0x00(HASH)
+ movdqu T1, 0x10(HASH)
+- ret
++ RET
+ SYM_FUNC_END(nh_sse2)
+diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+index 71fae5a09e56d..2077ce7a56479 100644
+--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
++++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+@@ -297,7 +297,7 @@ ___
+ $code.=<<___;
+ mov \$1,%eax
+ .Lno_key:
+- ret
++ RET
+ ___
+ &end_function("poly1305_init_x86_64");
+
+@@ -373,7 +373,7 @@ $code.=<<___;
+ .cfi_adjust_cfa_offset -48
+ .Lno_data:
+ .Lblocks_epilogue:
+- ret
++ RET
+ .cfi_endproc
+ ___
+ &end_function("poly1305_blocks_x86_64");
+@@ -399,7 +399,7 @@ $code.=<<___;
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+- ret
++ RET
+ ___
+ &end_function("poly1305_emit_x86_64");
+ if ($avx) {
+@@ -429,7 +429,7 @@ ___
+ &poly1305_iteration();
+ $code.=<<___;
+ pop $ctx
+- ret
++ RET
+ .size __poly1305_block,.-__poly1305_block
+
+ .type __poly1305_init_avx,\@abi-omnipotent
+@@ -594,7 +594,7 @@ __poly1305_init_avx:
+
+ lea -48-64($ctx),$ctx # size [de-]optimization
+ pop %rbp
+- ret
++ RET
+ .size __poly1305_init_avx,.-__poly1305_init_avx
+ ___
+
+@@ -747,7 +747,7 @@ $code.=<<___;
+ .cfi_restore %rbp
+ .Lno_data_avx:
+ .Lblocks_avx_epilogue:
+- ret
++ RET
+ .cfi_endproc
+
+ .align 32
+@@ -1452,7 +1452,7 @@ $code.=<<___ if (!$win64);
+ ___
+ $code.=<<___;
+ vzeroupper
+- ret
++ RET
+ .cfi_endproc
+ ___
+ &end_function("poly1305_blocks_avx");
+@@ -1508,7 +1508,7 @@ $code.=<<___;
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+- ret
++ RET
+ ___
+ &end_function("poly1305_emit_avx");
+
+@@ -1675,7 +1675,7 @@ $code.=<<___;
+ .cfi_restore %rbp
+ .Lno_data_avx2$suffix:
+ .Lblocks_avx2_epilogue$suffix:
+- ret
++ RET
+ .cfi_endproc
+
+ .align 32
+@@ -2201,7 +2201,7 @@ $code.=<<___ if (!$win64);
+ ___
+ $code.=<<___;
+ vzeroupper
+- ret
++ RET
+ .cfi_endproc
+ ___
+ if($avx > 2 && $avx512) {
+@@ -2792,7 +2792,7 @@ $code.=<<___ if (!$win64);
+ .cfi_def_cfa_register %rsp
+ ___
+ $code.=<<___;
+- ret
++ RET
+ .cfi_endproc
+ ___
+
+@@ -2893,7 +2893,7 @@ $code.=<<___ if ($flavour =~ /elf32/);
+ ___
+ $code.=<<___;
+ mov \$1,%eax
+- ret
++ RET
+ .size poly1305_init_base2_44,.-poly1305_init_base2_44
+ ___
+ {
+@@ -3010,7 +3010,7 @@ poly1305_blocks_vpmadd52:
+ jnz .Lblocks_vpmadd52_4x
+
+ .Lno_data_vpmadd52:
+- ret
++ RET
+ .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
+ ___
+ }
+@@ -3451,7 +3451,7 @@ poly1305_blocks_vpmadd52_4x:
+ vzeroall
+
+ .Lno_data_vpmadd52_4x:
+- ret
++ RET
+ .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
+ ___
+ }
+@@ -3824,7 +3824,7 @@ $code.=<<___;
+ vzeroall
+
+ .Lno_data_vpmadd52_8x:
+- ret
++ RET
+ .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
+ ___
+ }
+@@ -3861,7 +3861,7 @@ poly1305_emit_base2_44:
+ mov %rax,0($mac) # write result
+ mov %rcx,8($mac)
+
+- ret
++ RET
+ .size poly1305_emit_base2_44,.-poly1305_emit_base2_44
+ ___
+ } } }
+@@ -3916,7 +3916,7 @@ xor128_encrypt_n_pad:
+
+ .Ldone_enc:
+ mov $otp,%rax
+- ret
++ RET
+ .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
+
+ .globl xor128_decrypt_n_pad
+@@ -3967,7 +3967,7 @@ xor128_decrypt_n_pad:
+
+ .Ldone_dec:
+ mov $otp,%rax
+- ret
++ RET
+ .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
+ ___
+ }
+@@ -4109,7 +4109,7 @@ avx_handler:
+ pop %rbx
+ pop %rdi
+ pop %rsi
+- ret
++ RET
+ .size avx_handler,.-avx_handler
+
+ .section .pdata
+diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+index b7ee24df7fbae..82f2313f512b8 100644
+--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+@@ -601,7 +601,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
+ write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk8_avx)
+
+ .align 8
+@@ -655,7 +655,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
+ write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_dec_blk8_avx)
+
+ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+@@ -673,7 +673,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+ store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_enc_8way_avx)
+
+ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+@@ -691,7 +691,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+ store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_dec_8way_avx)
+
+ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+@@ -709,5 +709,5 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+ store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_cbc_dec_8way_avx)
+diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
+index 9161b6e441f31..8ea34c9b93160 100644
+--- a/arch/x86/crypto/serpent-avx2-asm_64.S
++++ b/arch/x86/crypto/serpent-avx2-asm_64.S
+@@ -601,7 +601,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
+ write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk16)
+
+ .align 8
+@@ -655,7 +655,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
+ write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_dec_blk16)
+
+ SYM_FUNC_START(serpent_ecb_enc_16way)
+@@ -677,7 +677,7 @@ SYM_FUNC_START(serpent_ecb_enc_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_enc_16way)
+
+ SYM_FUNC_START(serpent_ecb_dec_16way)
+@@ -699,7 +699,7 @@ SYM_FUNC_START(serpent_ecb_dec_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_ecb_dec_16way)
+
+ SYM_FUNC_START(serpent_cbc_dec_16way)
+@@ -722,5 +722,5 @@ SYM_FUNC_START(serpent_cbc_dec_16way)
+ vzeroupper;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_cbc_dec_16way)
+diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+index 6379b99cb722e..8ccb03ad7cef5 100644
+--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
++++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+@@ -553,12 +553,12 @@ SYM_FUNC_START(__serpent_enc_blk_4way)
+
+ write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
+
+- ret;
++ RET;
+
+ .L__enc_xor4:
+ xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk_4way)
+
+ SYM_FUNC_START(serpent_dec_blk_4way)
+@@ -612,5 +612,5 @@ SYM_FUNC_START(serpent_dec_blk_4way)
+ movl arg_dst(%esp), %eax;
+ write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
+
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_dec_blk_4way)
+diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+index efb6dc17dc907..e0998a011d1dd 100644
+--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
++++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+@@ -675,13 +675,13 @@ SYM_FUNC_START(__serpent_enc_blk_8way)
+ write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+
+ .L__enc_xor8:
+ xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__serpent_enc_blk_8way)
+
+ SYM_FUNC_START(serpent_dec_blk_8way)
+@@ -735,5 +735,5 @@ SYM_FUNC_START(serpent_dec_blk_8way)
+ write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(serpent_dec_blk_8way)
+diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+index 5eed620f46765..a96b2fd26dab4 100644
+--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
++++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+@@ -674,7 +674,7 @@ _loop3:
+ pop %r12
+ pop %rbx
+
+- ret
++ RET
+
+ SYM_FUNC_END(\name)
+ .endm
+diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
+index 5d8415f482bd7..2f94ec0e763bf 100644
+--- a/arch/x86/crypto/sha1_ni_asm.S
++++ b/arch/x86/crypto/sha1_ni_asm.S
+@@ -290,7 +290,7 @@ SYM_FUNC_START(sha1_ni_transform)
+ mov %rbp, %rsp
+ pop %rbp
+
+- ret
++ RET
+ SYM_FUNC_END(sha1_ni_transform)
+
+ .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
+diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
+index d25668d2a1e92..263f916362e02 100644
+--- a/arch/x86/crypto/sha1_ssse3_asm.S
++++ b/arch/x86/crypto/sha1_ssse3_asm.S
+@@ -99,7 +99,7 @@
+ pop %rbp
+ pop %r12
+ pop %rbx
+- ret
++ RET
+
+ SYM_FUNC_END(\name)
+ .endm
+diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
+index 4739cd31b9db1..3baa1ec390974 100644
+--- a/arch/x86/crypto/sha256-avx-asm.S
++++ b/arch/x86/crypto/sha256-avx-asm.S
+@@ -458,7 +458,7 @@ done_hash:
+ popq %r13
+ popq %r12
+ popq %rbx
+- ret
++ RET
+ SYM_FUNC_END(sha256_transform_avx)
+
+ .section .rodata.cst256.K256, "aM", @progbits, 256
+diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
+index 4087f7432a7e8..9bcdbc47b8b4b 100644
+--- a/arch/x86/crypto/sha256-avx2-asm.S
++++ b/arch/x86/crypto/sha256-avx2-asm.S
+@@ -710,7 +710,7 @@ done_hash:
+ popq %r13
+ popq %r12
+ popq %rbx
+- ret
++ RET
+ SYM_FUNC_END(sha256_transform_rorx)
+
+ .section .rodata.cst512.K256, "aM", @progbits, 512
+diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
+index ddfa863b4ee33..c4a5db612c327 100644
+--- a/arch/x86/crypto/sha256-ssse3-asm.S
++++ b/arch/x86/crypto/sha256-ssse3-asm.S
+@@ -472,7 +472,7 @@ done_hash:
+ popq %r12
+ popq %rbx
+
+- ret
++ RET
+ SYM_FUNC_END(sha256_transform_ssse3)
+
+ .section .rodata.cst256.K256, "aM", @progbits, 256
+diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
+index 7abade04a3a38..94d50dd27cb53 100644
+--- a/arch/x86/crypto/sha256_ni_asm.S
++++ b/arch/x86/crypto/sha256_ni_asm.S
+@@ -326,7 +326,7 @@ SYM_FUNC_START(sha256_ni_transform)
+
+ .Ldone_hash:
+
+- ret
++ RET
+ SYM_FUNC_END(sha256_ni_transform)
+
+ .section .rodata.cst256.K256, "aM", @progbits, 256
+diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
+index 3d8f0fd4eea87..1fefe6dd3a9e2 100644
+--- a/arch/x86/crypto/sha512-avx-asm.S
++++ b/arch/x86/crypto/sha512-avx-asm.S
+@@ -361,7 +361,7 @@ updateblock:
+ pop %rbx
+
+ nowork:
+- ret
++ RET
+ SYM_FUNC_END(sha512_transform_avx)
+
+ ########################################################################
+diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
+index 072cb0f0deae3..5cdaab7d69015 100644
+--- a/arch/x86/crypto/sha512-avx2-asm.S
++++ b/arch/x86/crypto/sha512-avx2-asm.S
+@@ -679,7 +679,7 @@ done_hash:
+ pop %r12
+ pop %rbx
+
+- ret
++ RET
+ SYM_FUNC_END(sha512_transform_rorx)
+
+ ########################################################################
+diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
+index bd51c9070bedc..b84c22e06c5f7 100644
+--- a/arch/x86/crypto/sha512-ssse3-asm.S
++++ b/arch/x86/crypto/sha512-ssse3-asm.S
+@@ -363,7 +363,7 @@ updateblock:
+ pop %rbx
+
+ nowork:
+- ret
++ RET
+ SYM_FUNC_END(sha512_transform_ssse3)
+
+ ########################################################################
+diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+index 1cc72b4804fab..4767ab61ff489 100644
+--- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S
++++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+@@ -246,7 +246,7 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4)
+ .Lblk4_store_output_done:
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx_crypt4)
+
+ .align 8
+@@ -356,7 +356,7 @@ SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(__sm4_crypt_blk8)
+
+ /*
+@@ -412,7 +412,7 @@ SYM_FUNC_START(sm4_aesni_avx_crypt8)
+ .Lblk8_store_output_done:
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx_crypt8)
+
+ /*
+@@ -487,7 +487,7 @@ SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
+
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
+
+ /*
+@@ -537,7 +537,7 @@ SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
+
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
+
+ /*
+@@ -590,5 +590,5 @@ SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
+
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8)
+diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
+index 9c5d3f3ad45a9..4732fe8bb65b6 100644
+--- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
++++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
+@@ -268,7 +268,7 @@ SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(__sm4_crypt_blk16)
+
+ #define inc_le128(x, minus_one, tmp) \
+@@ -387,7 +387,7 @@ SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
+
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
+
+ /*
+@@ -441,7 +441,7 @@ SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
+
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
+
+ /*
+@@ -497,5 +497,5 @@ SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
+
+ vzeroall;
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)
+diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+index 37e63b3c664eb..31f9b2ec3857d 100644
+--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
++++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+@@ -267,7 +267,7 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
+ outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__twofish_enc_blk8)
+
+ .align 8
+@@ -307,7 +307,7 @@ SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
+ outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+
+- ret;
++ RET;
+ SYM_FUNC_END(__twofish_dec_blk8)
+
+ SYM_FUNC_START(twofish_ecb_enc_8way)
+@@ -327,7 +327,7 @@ SYM_FUNC_START(twofish_ecb_enc_8way)
+ store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_ecb_enc_8way)
+
+ SYM_FUNC_START(twofish_ecb_dec_8way)
+@@ -347,7 +347,7 @@ SYM_FUNC_START(twofish_ecb_dec_8way)
+ store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_ecb_dec_8way)
+
+ SYM_FUNC_START(twofish_cbc_dec_8way)
+@@ -372,5 +372,5 @@ SYM_FUNC_START(twofish_cbc_dec_8way)
+ popq %r12;
+
+ FRAME_END
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_cbc_dec_8way)
+diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
+index a6f09e4f2e463..3abcad6618840 100644
+--- a/arch/x86/crypto/twofish-i586-asm_32.S
++++ b/arch/x86/crypto/twofish-i586-asm_32.S
+@@ -260,7 +260,7 @@ SYM_FUNC_START(twofish_enc_blk)
+ pop %ebx
+ pop %ebp
+ mov $1, %eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_enc_blk)
+
+ SYM_FUNC_START(twofish_dec_blk)
+@@ -317,5 +317,5 @@ SYM_FUNC_START(twofish_dec_blk)
+ pop %ebx
+ pop %ebp
+ mov $1, %eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_dec_blk)
+diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+index bca4cea757ce2..d2288bf38a8a5 100644
+--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
++++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+@@ -258,7 +258,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
+ popq %rbx;
+ popq %r12;
+ popq %r13;
+- ret;
++ RET;
+
+ .L__enc_xor3:
+ outunpack_enc3(xor);
+@@ -266,7 +266,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
+ popq %rbx;
+ popq %r12;
+ popq %r13;
+- ret;
++ RET;
+ SYM_FUNC_END(__twofish_enc_blk_3way)
+
+ SYM_FUNC_START(twofish_dec_blk_3way)
+@@ -301,5 +301,5 @@ SYM_FUNC_START(twofish_dec_blk_3way)
+ popq %rbx;
+ popq %r12;
+ popq %r13;
+- ret;
++ RET;
+ SYM_FUNC_END(twofish_dec_blk_3way)
+diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
+index d2e56232494a8..775af290cd196 100644
+--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
++++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
+@@ -252,7 +252,7 @@ SYM_FUNC_START(twofish_enc_blk)
+
+ popq R1
+ movl $1,%eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_enc_blk)
+
+ SYM_FUNC_START(twofish_dec_blk)
+@@ -304,5 +304,5 @@ SYM_FUNC_START(twofish_dec_blk)
+
+ popq R1
+ movl $1,%eax
+- ret
++ RET
+ SYM_FUNC_END(twofish_dec_blk)
+diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
+index 7fec5dcf64386..ca2fe186994b0 100644
+--- a/arch/x86/entry/Makefile
++++ b/arch/x86/entry/Makefile
+@@ -11,12 +11,13 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
+
+ CFLAGS_common.o += -fno-stack-protector
+
+-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
++obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
+ obj-y += common.o
+
+ obj-y += vdso/
+ obj-y += vsyscall/
+
++obj-$(CONFIG_PREEMPTION) += thunk_$(BITS).o
+ obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
+ obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o
+
+diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
+index a4c061fb7c6ea..b00a3a95fbfab 100644
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -7,6 +7,8 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/processor-flags.h>
+ #include <asm/ptrace-abi.h>
++#include <asm/msr.h>
++#include <asm/nospec-branch.h>
+
+ /*
+
+@@ -119,27 +121,19 @@ For 32-bit we have the following conventions - kernel is built with
+ CLEAR_REGS
+ .endm
+
+-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
++.macro POP_REGS pop_rdi=1
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+- .if \skip_r11rcx
+- popq %rsi
+- .else
+ popq %r11
+- .endif
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+- .if \skip_r11rcx
+- popq %rsi
+- .else
+ popq %rcx
+- .endif
+ popq %rdx
+ popq %rsi
+ .if \pop_rdi
+@@ -289,6 +283,66 @@ For 32-bit we have the following conventions - kernel is built with
+
+ #endif
+
++/*
++ * IBRS kernel mitigation for Spectre_v2.
++ *
++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
++ * the regs it uses (AX, CX, DX). Must be called before the first RET
++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
++ *
++ * The optional argument is used to save/restore the current value,
++ * which is used on the paranoid paths.
++ *
++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
++ */
++.macro IBRS_ENTER save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++ movl $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++ rdmsr
++ shl $32, %rdx
++ or %rdx, %rax
++ mov %rax, \save_reg
++ test $SPEC_CTRL_IBRS, %eax
++ jz .Ldo_wrmsr_\@
++ lfence
++ jmp .Lend_\@
++.Ldo_wrmsr_\@:
++.endif
++
++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++ movl %edx, %eax
++ shr $32, %rdx
++ wrmsr
++.Lend_\@:
++#endif
++.endm
++
++/*
++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
++ * regs. Must be called after the last RET.
++ */
++.macro IBRS_EXIT save_reg
++#ifdef CONFIG_CPU_IBRS_ENTRY
++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
++ movl $MSR_IA32_SPEC_CTRL, %ecx
++
++.ifnb \save_reg
++ mov \save_reg, %rdx
++.else
++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
++ andl $(~SPEC_CTRL_IBRS), %edx
++.endif
++
++ movl %edx, %eax
++ shr $32, %rdx
++ wrmsr
++.Lend_\@:
++#endif
++.endm
++
+ /*
+ * Mitigate Spectre v1 for conditional swapgs code paths.
+ *
+diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
+new file mode 100644
+index 0000000000000..bfb7bcb362bcf
+--- /dev/null
++++ b/arch/x86/entry/entry.S
+@@ -0,0 +1,22 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Common place for both 32- and 64-bit entry routines.
++ */
++
++#include <linux/linkage.h>
++#include <asm/export.h>
++#include <asm/msr-index.h>
++
++.pushsection .noinstr.text, "ax"
++
++SYM_FUNC_START(entry_ibpb)
++ movl $MSR_IA32_PRED_CMD, %ecx
++ movl $PRED_CMD_IBPB, %eax
++ xorl %edx, %edx
++ wrmsr
++ RET
++SYM_FUNC_END(entry_ibpb)
++/* For KVM */
++EXPORT_SYMBOL_GPL(entry_ibpb);
++
++.popsection
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index ccb9d32768f31..e309e71560389 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -268,19 +268,16 @@
+ 1: popl %ds
+ 2: popl %es
+ 3: popl %fs
+- addl $(4 + \pop), %esp /* pop the unused "gs" slot */
++4: addl $(4 + \pop), %esp /* pop the unused "gs" slot */
+ IRET_FRAME
+-.pushsection .fixup, "ax"
+-4: movl $0, (%esp)
+- jmp 1b
+-5: movl $0, (%esp)
+- jmp 2b
+-6: movl $0, (%esp)
+- jmp 3b
+-.popsection
+- _ASM_EXTABLE(1b, 4b)
+- _ASM_EXTABLE(2b, 5b)
+- _ASM_EXTABLE(3b, 6b)
++
++ /*
++ * There is no _ASM_EXTABLE_TYPE_REG() for ASM, however since this is
++ * ASM the registers are known and we can trivially hard-code them.
++ */
++ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_POP_ZERO|EX_REG_DS)
++ _ASM_EXTABLE_TYPE(2b, 3b, EX_TYPE_POP_ZERO|EX_REG_ES)
++ _ASM_EXTABLE_TYPE(3b, 4b, EX_TYPE_POP_ZERO|EX_REG_FS)
+ .endm
+
+ .macro RESTORE_ALL_NMI cr3_reg:req pop=0
+@@ -701,7 +698,6 @@ SYM_CODE_START(__switch_to_asm)
+ movl %ebx, PER_CPU_VAR(__stack_chk_guard)
+ #endif
+
+-#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+@@ -710,7 +706,6 @@ SYM_CODE_START(__switch_to_asm)
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+
+ /* Restore flags or the incoming task to restore AC state. */
+ popfl
+@@ -740,7 +735,7 @@ SYM_FUNC_START(schedule_tail_wrapper)
+ popl %eax
+
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(schedule_tail_wrapper)
+ .popsection
+
+@@ -925,10 +920,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
+ sti
+ sysexit
+
+-.pushsection .fixup, "ax"
+-2: movl $0, PT_FS(%esp)
+- jmp 1b
+-.popsection
++2: movl $0, PT_FS(%esp)
++ jmp 1b
+ _ASM_EXTABLE(1b, 2b)
+
+ .Lsysenter_fix_flags:
+@@ -996,8 +989,7 @@ restore_all_switch_stack:
+ */
+ iret
+
+-.section .fixup, "ax"
+-SYM_CODE_START(asm_iret_error)
++.Lasm_iret_error:
+ pushl $0 # no error code
+ pushl $iret_error
+
+@@ -1014,9 +1006,8 @@ SYM_CODE_START(asm_iret_error)
+ #endif
+
+ jmp handle_exception
+-SYM_CODE_END(asm_iret_error)
+-.previous
+- _ASM_EXTABLE(.Lirq_return, asm_iret_error)
++
++ _ASM_EXTABLE(.Lirq_return, .Lasm_iret_error)
+ SYM_FUNC_END(entry_INT80_32)
+
+ .macro FIXUP_ESPFIX_STACK
+@@ -1248,14 +1239,14 @@ SYM_CODE_START(asm_exc_nmi)
+ SYM_CODE_END(asm_exc_nmi)
+
+ .pushsection .text, "ax"
+-SYM_CODE_START(rewind_stack_do_exit)
++SYM_CODE_START(rewind_stack_and_make_dead)
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
+ leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+
+- call do_exit
++ call make_task_dead
+ 1: jmp 1b
+-SYM_CODE_END(rewind_stack_do_exit)
++SYM_CODE_END(rewind_stack_and_make_dead)
+ .popsection
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index e38a4cf795d96..9f1333a9ee41d 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -85,7 +85,7 @@
+ */
+
+ SYM_CODE_START(entry_SYSCALL_64)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+
+ swapgs
+ /* tss.sp2 is scratch space. */
+@@ -110,6 +110,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
+ movq %rsp, %rdi
+ /* Sign extend the lower 32bit as syscall numbers are treated as int */
+ movslq %eax, %rsi
++
++ /* clobbers %rax, make sure it is after saving the syscall nr */
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ call do_syscall_64 /* returns with IRQs disabled */
+
+ /*
+@@ -189,8 +194,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
+ * perf profiles. Nothing jumps here.
+ */
+ syscall_return_via_sysret:
+- /* rcx and r11 are already restored (see code above) */
+- POP_REGS pop_rdi=0 skip_r11rcx=1
++ IBRS_EXIT
++ POP_REGS pop_rdi=0
+
+ /*
+ * Now all regs are restored except RSP and RDI.
+@@ -243,7 +248,6 @@ SYM_FUNC_START(__switch_to_asm)
+ movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+ #endif
+
+-#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+@@ -252,7 +256,6 @@ SYM_FUNC_START(__switch_to_asm)
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+-#endif
+
+ /* restore callee-saved registers */
+ popq %r15
+@@ -315,6 +318,14 @@ SYM_CODE_END(ret_from_fork)
+ #endif
+ .endm
+
++SYM_CODE_START_LOCAL(xen_error_entry)
++ UNWIND_HINT_FUNC
++ PUSH_AND_CLEAR_REGS save_ret=1
++ ENCODE_FRAME_POINTER 8
++ UNTRAIN_RET
++ RET
++SYM_CODE_END(xen_error_entry)
++
+ /**
+ * idtentry_body - Macro to emit code calling the C function
+ * @cfunc: C function to be called
+@@ -322,7 +333,18 @@ SYM_CODE_END(ret_from_fork)
+ */
+ .macro idtentry_body cfunc has_error_code:req
+
+- call error_entry
++ /*
++ * Call error_entry() and switch to the task stack if from userspace.
++ *
++ * When in XENPV, it is already in the task stack, and it can't fault
++ * for native_iret() nor native_load_gs_index() since XENPV uses its
++ * own pvops for IRET and load_gs_index(). And it doesn't need to
++ * switch the CR3. So it can skip invoking error_entry().
++ */
++ ALTERNATIVE "call error_entry; movq %rax, %rsp", \
++ "call xen_error_entry", X86_FEATURE_XENPV
++
++ ENCODE_FRAME_POINTER
+ UNWIND_HINT_REGS
+
+ movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
+@@ -351,6 +373,7 @@ SYM_CODE_END(ret_from_fork)
+ SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+ ASM_CLAC
++ cld
+
+ .if \has_error_code == 0
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
+@@ -418,6 +441,7 @@ SYM_CODE_END(\asmsym)
+ SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS
+ ASM_CLAC
++ cld
+
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
+
+@@ -473,6 +497,7 @@ SYM_CODE_END(\asmsym)
+ SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS
+ ASM_CLAC
++ cld
+
+ /*
+ * If the entry is from userspace, switch stacks and treat it as
+@@ -499,6 +524,7 @@ SYM_CODE_START(\asmsym)
+ call vc_switch_off_ist
+ movq %rax, %rsp /* Switch to new stack */
+
++ ENCODE_FRAME_POINTER
+ UNWIND_HINT_REGS
+
+ /* Update pt_regs */
+@@ -534,6 +560,7 @@ SYM_CODE_END(\asmsym)
+ SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS offset=8
+ ASM_CLAC
++ cld
+
+ /* paranoid_entry returns GS information for paranoid_exit in EBX. */
+ call paranoid_entry
+@@ -567,6 +594,7 @@ __irqentry_text_end:
+
+ SYM_CODE_START_LOCAL(common_interrupt_return)
+ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
++ IBRS_EXIT
+ #ifdef CONFIG_DEBUG_ENTRY
+ /* Assert that pt_regs indicates user mode. */
+ testb $3, CS(%rsp)
+@@ -574,6 +602,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
+ ud2
+ 1:
+ #endif
++#ifdef CONFIG_XEN_PV
++ ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
++#endif
++
+ POP_REGS pop_rdi=0
+
+ /*
+@@ -670,6 +702,7 @@ native_irq_return_ldt:
+ pushq %rdi /* Stash user RDI */
+ swapgs /* to kernel GS */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
++ UNTRAIN_RET
+
+ movq PER_CPU_VAR(espfix_waddr), %rdi
+ movq %rax, (0*8)(%rdi) /* user RAX */
+@@ -734,7 +767,7 @@ SYM_FUNC_START(asm_load_gs_index)
+ 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
+ swapgs
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(asm_load_gs_index)
+ EXPORT_SYMBOL(asm_load_gs_index)
+
+@@ -841,10 +874,12 @@ SYM_CODE_END(xen_failsafe_callback)
+ * 1 -> no SWAPGS on exit
+ *
+ * Y GSBASE value at entry, must be restored in paranoid_exit
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+ */
+ SYM_CODE_START_LOCAL(paranoid_entry)
+ UNWIND_HINT_FUNC
+- cld
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+
+@@ -885,11 +920,12 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ * is needed here.
+ */
+ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+- ret
++ jmp .Lparanoid_gsbase_done
+
+ .Lparanoid_entry_checkgs:
+ /* EBX = 1 -> kernel GSBASE active, no restore required */
+ movl $1, %ebx
++
+ /*
+ * The kernel-enforced convention is a negative GSBASE indicates
+ * a kernel value. No SWAPGS needed on entry and exit.
+@@ -897,22 +933,23 @@ SYM_CODE_START_LOCAL(paranoid_entry)
+ movl $MSR_GS_BASE, %ecx
+ rdmsr
+ testl %edx, %edx
+- jns .Lparanoid_entry_swapgs
+- ret
++ js .Lparanoid_kernel_gsbase
+
+-.Lparanoid_entry_swapgs:
++ /* EBX = 0 -> SWAPGS required on exit */
++ xorl %ebx, %ebx
+ swapgs
++.Lparanoid_kernel_gsbase:
++ FENCE_SWAPGS_KERNEL_ENTRY
++.Lparanoid_gsbase_done:
+
+ /*
+- * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
+- * unconditional CR3 write, even in the PTI case. So do an lfence
+- * to prevent GS speculation, regardless of whether PTI is enabled.
++ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
++ * CR3 above, keep the old value in a callee saved register.
+ */
+- FENCE_SWAPGS_KERNEL_ENTRY
++ IBRS_ENTER save_reg=%r15
++ UNTRAIN_RET
+
+- /* EBX = 0 -> SWAPGS required on exit */
+- xorl %ebx, %ebx
+- ret
++ RET
+ SYM_CODE_END(paranoid_entry)
+
+ /*
+@@ -933,9 +970,19 @@ SYM_CODE_END(paranoid_entry)
+ * 1 -> no SWAPGS on exit
+ *
+ * Y User space GSBASE, must be restored unconditionally
++ *
++ * R14 - old CR3
++ * R15 - old SPEC_CTRL
+ */
+ SYM_CODE_START_LOCAL(paranoid_exit)
+ UNWIND_HINT_REGS
++
++ /*
++ * Must restore IBRS state before both CR3 and %GS since we need access
++ * to the per-CPU x86_spec_ctrl_shadow variable.
++ */
++ IBRS_EXIT save_reg=%r15
++
+ /*
+ * The order of operations is important. RESTORE_CR3 requires
+ * kernel GSBASE.
+@@ -964,13 +1011,14 @@ SYM_CODE_START_LOCAL(paranoid_exit)
+ SYM_CODE_END(paranoid_exit)
+
+ /*
+- * Save all registers in pt_regs, and switch GS if needed.
++ * Switch GS and CR3 if needed.
+ */
+ SYM_CODE_START_LOCAL(error_entry)
+ UNWIND_HINT_FUNC
+- cld
++
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
++
+ testb $3, CS+8(%rsp)
+ jz .Lerror_kernelspace
+
+@@ -982,21 +1030,15 @@ SYM_CODE_START_LOCAL(error_entry)
+ FENCE_SWAPGS_USER_ENTRY
+ /* We have user CR3. Change to kernel CR3. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++ IBRS_ENTER
++ UNTRAIN_RET
+
++ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
+ .Lerror_entry_from_usermode_after_swapgs:
++
+ /* Put us onto the real thread stack. */
+- popq %r12 /* save return addr in %12 */
+- movq %rsp, %rdi /* arg0 = pt_regs pointer */
+ call sync_regs
+- movq %rax, %rsp /* switch stack */
+- ENCODE_FRAME_POINTER
+- pushq %r12
+- ret
+-
+-.Lerror_entry_done_lfence:
+- FENCE_SWAPGS_KERNEL_ENTRY
+-.Lerror_entry_done:
+- ret
++ RET
+
+ /*
+ * There are two places in the kernel that can potentially fault with
+@@ -1020,8 +1062,16 @@ SYM_CODE_START_LOCAL(error_entry)
+ * .Lgs_change's error handler with kernel gsbase.
+ */
+ SWAPGS
+- FENCE_SWAPGS_USER_ENTRY
+- jmp .Lerror_entry_done
++
++ /*
++ * Issue an LFENCE to prevent GS speculation, regardless of whether it is a
++ * kernel or user gsbase.
++ */
++.Lerror_entry_done_lfence:
++ FENCE_SWAPGS_KERNEL_ENTRY
++ leaq 8(%rsp), %rax /* return pt_regs pointer */
++ ANNOTATE_UNRET_END
++ RET
+
+ .Lbstep_iret:
+ /* Fix truncated RIP */
+@@ -1036,14 +1086,16 @@ SYM_CODE_START_LOCAL(error_entry)
+ SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
++ IBRS_ENTER
++ UNTRAIN_RET
+
+ /*
+ * Pretend that the exception came from user mode: set up pt_regs
+ * as if we faulted immediately after IRET.
+ */
+- mov %rsp, %rdi
++ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
+ call fixup_bad_iret
+- mov %rax, %rsp
++ mov %rax, %rdi
+ jmp .Lerror_entry_from_usermode_after_swapgs
+ SYM_CODE_END(error_entry)
+
+@@ -1105,6 +1157,7 @@ SYM_CODE_START(asm_exc_nmi)
+ */
+
+ ASM_CLAC
++ cld
+
+ /* Use %rdx as our temp variable throughout */
+ pushq %rdx
+@@ -1124,7 +1177,6 @@ SYM_CODE_START(asm_exc_nmi)
+ */
+
+ swapgs
+- cld
+ FENCE_SWAPGS_USER_ENTRY
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
+ movq %rsp, %rdx
+@@ -1140,6 +1192,9 @@ SYM_CODE_START(asm_exc_nmi)
+ PUSH_AND_CLEAR_REGS rdx=(%rdx)
+ ENCODE_FRAME_POINTER
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ /*
+ * At this point we no longer need to worry about stack damage
+ * due to nesting -- we're on the normal thread stack and we're
+@@ -1362,6 +1417,9 @@ end_repeat_nmi:
+ movq $-1, %rsi
+ call exc_nmi
+
++ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
++ IBRS_EXIT save_reg=%r15
++
+ /* Always restore stashed CR3 value (see paranoid_entry) */
+ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
+@@ -1429,7 +1487,7 @@ SYM_CODE_END(ignore_sysret)
+ #endif
+
+ .pushsection .text, "ax"
+-SYM_CODE_START(rewind_stack_do_exit)
++SYM_CODE_START(rewind_stack_and_make_dead)
+ UNWIND_HINT_FUNC
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+@@ -1438,6 +1496,6 @@ SYM_CODE_START(rewind_stack_do_exit)
+ leaq -PTREGS_SIZE(%rax), %rsp
+ UNWIND_HINT_REGS
+
+- call do_exit
+-SYM_CODE_END(rewind_stack_do_exit)
++ call make_task_dead
++SYM_CODE_END(rewind_stack_and_make_dead)
+ .popsection
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 0051cf5c792d1..4d637a965efbe 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -4,7 +4,6 @@
+ *
+ * Copyright 2000-2002 Andi Kleen, SuSE Labs.
+ */
+-#include "calling.h"
+ #include <asm/asm-offsets.h>
+ #include <asm/current.h>
+ #include <asm/errno.h>
+@@ -14,9 +13,12 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/nospec-branch.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+
++#include "calling.h"
++
+ .section .entry.text, "ax"
+
+ /*
+@@ -47,7 +49,7 @@
+ * 0(%ebp) arg6
+ */
+ SYM_CODE_START(entry_SYSENTER_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ /* Interrupts are off on entry. */
+ SWAPGS
+
+@@ -112,6 +114,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+
+ cld
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ /*
+ * SYSENTER doesn't filter flags, so we need to clear NT and AC
+ * ourselves. To save a few cycles, we can check whether
+@@ -197,7 +202,7 @@ SYM_CODE_END(entry_SYSENTER_compat)
+ * 0(%esp) arg6
+ */
+ SYM_CODE_START(entry_SYSCALL_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ /* Interrupts are off on entry. */
+ swapgs
+
+@@ -252,6 +257,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
+
+ UNWIND_HINT_REGS
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ movq %rsp, %rdi
+ call do_fast_syscall_32
+ /* XEN PV guests always use IRET path */
+@@ -266,6 +274,8 @@ sysret32_from_system_call:
+ */
+ STACKLEAK_ERASE
+
++ IBRS_EXIT
++
+ movq RBX(%rsp), %rbx /* pt_regs->rbx */
+ movq RBP(%rsp), %rbp /* pt_regs->rbp */
+ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
+@@ -339,7 +349,7 @@ SYM_CODE_END(entry_SYSCALL_compat)
+ * ebp arg6
+ */
+ SYM_CODE_START(entry_INT80_compat)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ /*
+ * Interrupts are off on entry.
+ */
+@@ -409,6 +419,9 @@ SYM_CODE_START(entry_INT80_compat)
+
+ cld
+
++ IBRS_ENTER
++ UNTRAIN_RET
++
+ movq %rsp, %rdi
+ call do_int80_syscall_32
+ jmp swapgs_restore_regs_and_return_to_usermode
+diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
+index f1f96d4d8cd60..ff6e7003da974 100644
+--- a/arch/x86/entry/thunk_32.S
++++ b/arch/x86/entry/thunk_32.S
+@@ -24,15 +24,13 @@ SYM_CODE_START_NOALIGN(\name)
+ popl %edx
+ popl %ecx
+ popl %eax
+- ret
++ RET
+ _ASM_NOKPROBE(\name)
+ SYM_CODE_END(\name)
+ .endm
+
+-#ifdef CONFIG_PREEMPTION
+ THUNK preempt_schedule_thunk, preempt_schedule
+ THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+ EXPORT_SYMBOL(preempt_schedule_thunk)
+ EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
+-#endif
+
+diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
+index 496b11ec469de..f38b07d2768bb 100644
+--- a/arch/x86/entry/thunk_64.S
++++ b/arch/x86/entry/thunk_64.S
+@@ -31,14 +31,11 @@ SYM_FUNC_END(\name)
+ _ASM_NOKPROBE(\name)
+ .endm
+
+-#ifdef CONFIG_PREEMPTION
+ THUNK preempt_schedule_thunk, preempt_schedule
+ THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+ EXPORT_SYMBOL(preempt_schedule_thunk)
+ EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
+-#endif
+
+-#ifdef CONFIG_PREEMPTION
+ SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
+ popq %r11
+ popq %r10
+@@ -50,7 +47,6 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
+ popq %rsi
+ popq %rdi
+ popq %rbp
+- ret
++ RET
+ _ASM_NOKPROBE(__thunk_restore)
+ SYM_CODE_END(__thunk_restore)
+-#endif
+diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
+index a2dddcc189f69..c277c63195ce8 100644
+--- a/arch/x86/entry/vdso/Makefile
++++ b/arch/x86/entry/vdso/Makefile
+@@ -92,6 +92,7 @@ endif
+ endif
+
+ $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
+
+ #
+ # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+@@ -178,7 +179,7 @@ quiet_cmd_vdso = VDSO $@
+ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
+
+ VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
+- $(call ld-option, --eh-frame-hdr) -Bsymbolic
++ $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack
+ GCOV_PROFILE := n
+
+ quiet_cmd_vdso_and_check = VDSO $@
+diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
+index 4bf48462fca7a..e8c60ae7a7c83 100644
+--- a/arch/x86/entry/vdso/vdso.lds.S
++++ b/arch/x86/entry/vdso/vdso.lds.S
+@@ -27,7 +27,9 @@ VERSION {
+ __vdso_time;
+ clock_getres;
+ __vdso_clock_getres;
++#ifdef CONFIG_X86_SGX
+ __vdso_sgx_enter_enclave;
++#endif
+ local: *;
+ };
+ }
+diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
+index 6ddd7a937b3e3..d33c6513fd2cb 100644
+--- a/arch/x86/entry/vdso/vdso32/system_call.S
++++ b/arch/x86/entry/vdso/vdso32/system_call.S
+@@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL)
+ popl %ecx
+ CFI_RESTORE ecx
+ CFI_ADJUST_CFA_OFFSET -4
+- ret
++ RET
+ CFI_ENDPROC
+
+ .size __kernel_vsyscall,.-__kernel_vsyscall
+diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
+index 235a5794296ac..a380f7ecdd544 100644
+--- a/arch/x86/entry/vdso/vma.c
++++ b/arch/x86/entry/vdso/vma.c
+@@ -322,8 +322,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
+
+ /* Round the lowest possible end address up to a PMD boundary. */
+ end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+- if (end >= TASK_SIZE_MAX)
+- end = TASK_SIZE_MAX;
++ if (end >= DEFAULT_MAP_WINDOW)
++ end = DEFAULT_MAP_WINDOW;
+ end -= len;
+
+ if (end > start) {
+@@ -438,7 +438,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+ static __init int vdso_setup(char *s)
+ {
+ vdso64_enabled = simple_strtoul(s, NULL, 0);
+- return 0;
++ return 1;
+ }
+ __setup("vdso=", vdso_setup);
+
+diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S
+index 99dafac992e2c..d77d278ee9dd6 100644
+--- a/arch/x86/entry/vdso/vsgx.S
++++ b/arch/x86/entry/vdso/vsgx.S
+@@ -81,7 +81,7 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
+ pop %rbx
+ leave
+ .cfi_def_cfa %rsp, 8
+- ret
++ RET
+
+ /* The out-of-line code runs with the pre-leave stack frame. */
+ .cfi_def_cfa %rbp, 16
+diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
+index 1b40b92970831..fd2ee9408e914 100644
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -226,7 +226,8 @@ bool emulate_vsyscall(unsigned long error_code,
+ if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
+ warn_bad_vsyscall(KERN_DEBUG, regs,
+ "seccomp tried to change syscall nr or ip");
+- do_exit(SIGSYS);
++ force_exit_sig(SIGSYS);
++ return true;
+ }
+ regs->orig_ax = -1;
+ if (tmp)
+diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+index 2e203f3a25a7b..ef2dd18272431 100644
+--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+@@ -20,16 +20,19 @@ __vsyscall_page:
+ mov $__NR_gettimeofday, %rax
+ syscall
+ ret
++ int3
+
+ .balign 1024, 0xcc
+ mov $__NR_time, %rax
+ syscall
+ ret
++ int3
+
+ .balign 1024, 0xcc
+ mov $__NR_getcpu, %rax
+ syscall
+ ret
++ int3
+
+ .balign 4096, 0xcc
+
+diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
+index 9687a8aef01c5..4ebedc7e1188b 100644
+--- a/arch/x86/events/amd/core.c
++++ b/arch/x86/events/amd/core.c
+@@ -364,7 +364,7 @@ static int amd_pmu_hw_config(struct perf_event *event)
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+- return -ENOENT;
++ return forward_event_to_ibs(event);
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+@@ -976,7 +976,7 @@ static int __init amd_core_pmu_init(void)
+ * numbered counter following it.
+ */
+ for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
+- even_ctr_mask |= 1 << i;
++ even_ctr_mask |= BIT_ULL(i);
+
+ pair_constraint = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
+diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
+index 9739019d4b67a..b605e08f9a8ef 100644
+--- a/arch/x86/events/amd/ibs.c
++++ b/arch/x86/events/amd/ibs.c
+@@ -194,7 +194,7 @@ static struct perf_ibs *get_ibs_pmu(int type)
+ }
+
+ /*
+- * Use IBS for precise event sampling:
++ * core pmu config -> IBS config
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+@@ -203,25 +203,9 @@ static struct perf_ibs *get_ibs_pmu(int type)
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+- *
+- * The rip of IBS samples has skid 0. Thus, IBS supports precise
+- * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+- * rip is invalid when IBS was not able to record the rip correctly.
+- * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+- *
+ */
+-static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
++static int core_pmu_ibs_config(struct perf_event *event, u64 *config)
+ {
+- switch (event->attr.precise_ip) {
+- case 0:
+- return -ENOENT;
+- case 1:
+- case 2:
+- break;
+- default:
+- return -EOPNOTSUPP;
+- }
+-
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+@@ -247,22 +231,37 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+ return -EOPNOTSUPP;
+ }
+
++/*
++ * The rip of IBS samples has skid 0. Thus, IBS supports precise
++ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
++ * rip is invalid when IBS was not able to record the rip correctly.
++ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
++ */
++int forward_event_to_ibs(struct perf_event *event)
++{
++ u64 config = 0;
++
++ if (!event->attr.precise_ip || event->attr.precise_ip > 2)
++ return -EOPNOTSUPP;
++
++ if (!core_pmu_ibs_config(event, &config)) {
++ event->attr.type = perf_ibs_op.pmu.type;
++ event->attr.config = config;
++ }
++ return -ENOENT;
++}
++
+ static int perf_ibs_init(struct perf_event *event)
+ {
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+- int ret;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+- if (perf_ibs) {
+- config = event->attr.config;
+- } else {
+- perf_ibs = &perf_ibs_op;
+- ret = perf_ibs_precise_event(event, &config);
+- if (ret)
+- return ret;
+- }
++ if (!perf_ibs)
++ return -ENOENT;
++
++ config = event->attr.config;
+
+ if (event->pmu != &perf_ibs->pmu)
+ return -ENOENT;
+@@ -304,6 +303,16 @@ static int perf_ibs_init(struct perf_event *event)
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
++ /*
++ * rip recorded by IbsOpRip will not be consistent with rsp and rbp
++ * recorded as part of interrupt regs. Thus we need to use rip from
++ * interrupt regs while unwinding call stack. Setting _EARLY flag
++ * makes sure we unwind call-stack before perf sample rip is set to
++ * IbsOpRip.
++ */
++ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
++ event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
++
+ return 0;
+ }
+
+@@ -687,6 +696,14 @@ fail:
+ data.raw = &raw;
+ }
+
++ /*
++ * rip recorded by IbsOpRip will not be consistent with rsp and rbp
++ * recorded as part of interrupt regs. Thus we need to use rip from
++ * interrupt regs while unwinding call stack.
++ */
++ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
++ data.callchain = perf_callchain(event, iregs);
++
+ throttle = perf_event_overflow(event, &data, &regs);
+ out:
+ if (throttle) {
+@@ -759,9 +776,10 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+ return ret;
+ }
+
+-static __init void perf_event_ibs_init(void)
++static __init int perf_event_ibs_init(void)
+ {
+ struct attribute **attr = ibs_op_format_attrs;
++ int ret;
+
+ /*
+ * Some chips fail to reset the fetch count when it is written; instead
+@@ -773,7 +791,9 @@ static __init void perf_event_ibs_init(void)
+ if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
+ perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
+
+- perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
++ ret = perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
++ if (ret)
++ return ret;
+
+ if (ibs_caps & IBS_CAPS_OPCNT) {
+ perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+@@ -786,15 +806,35 @@ static __init void perf_event_ibs_init(void)
+ perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ }
+
+- perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
++ ret = perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
++ if (ret)
++ goto err_op;
++
++ ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
++ if (ret)
++ goto err_nmi;
+
+- register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+ pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
++ return 0;
++
++err_nmi:
++ perf_pmu_unregister(&perf_ibs_op.pmu);
++ free_percpu(perf_ibs_op.pcpu);
++ perf_ibs_op.pcpu = NULL;
++err_op:
++ perf_pmu_unregister(&perf_ibs_fetch.pmu);
++ free_percpu(perf_ibs_fetch.pcpu);
++ perf_ibs_fetch.pcpu = NULL;
++
++ return ret;
+ }
+
+ #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
+
+-static __init void perf_event_ibs_init(void) { }
++static __init int perf_event_ibs_init(void)
++{
++ return 0;
++}
+
+ #endif
+
+@@ -1064,9 +1104,7 @@ static __init int amd_ibs_init(void)
+ x86_pmu_amd_ibs_starting_cpu,
+ x86_pmu_amd_ibs_dying_cpu);
+
+- perf_event_ibs_init();
+-
+- return 0;
++ return perf_event_ibs_init();
+ }
+
+ /* Since we need the pci subsystem to init ibs we can't do this earlier: */
+diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
+index 6dfa8ddaa60f7..81d5e0a1f48cd 100644
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -2762,10 +2762,11 @@ static bool perf_hw_regs(struct pt_regs *regs)
+ void
+ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ struct unwind_state state;
+ unsigned long addr;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+@@ -2865,10 +2866,11 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
+ void
+ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ struct stack_frame frame;
+ const struct stack_frame __user *fp;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++ if (guest_cbs && guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+@@ -2945,18 +2947,21 @@ static unsigned long code_segment_base(struct pt_regs *regs)
+
+ unsigned long perf_instruction_pointer(struct pt_regs *regs)
+ {
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+- return perf_guest_cbs->get_guest_ip();
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
++
++ if (guest_cbs && guest_cbs->is_in_guest())
++ return guest_cbs->get_guest_ip();
+
+ return regs->ip + code_segment_base(regs);
+ }
+
+ unsigned long perf_misc_flags(struct pt_regs *regs)
+ {
++ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+ int misc = 0;
+
+- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+- if (perf_guest_cbs->is_user_mode())
++ if (guest_cbs && guest_cbs->is_in_guest()) {
++ if (guest_cbs->is_user_mode())
+ misc |= PERF_RECORD_MISC_GUEST_USER;
+ else
+ misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 9a044438072ba..b70e1522a27ac 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
+
+ static struct event_constraint intel_icl_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+- FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
++ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */
++ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+@@ -254,7 +255,7 @@ static struct event_constraint intel_icl_event_constraints[] = {
+ INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+ INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
+- INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
++ INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
+ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
+@@ -280,7 +281,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+- INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
++ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ EVENT_EXTRA_END
+@@ -288,7 +289,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
+
+ static struct event_constraint intel_spr_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+- FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
++ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+@@ -2787,6 +2788,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
+ {
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++ struct perf_guest_info_callbacks *guest_cbs;
+ int bit;
+ int handled = 0;
+ u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
+@@ -2853,9 +2855,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
+ */
+ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
+ handled++;
+- if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
+- perf_guest_cbs->handle_intel_pt_intr))
+- perf_guest_cbs->handle_intel_pt_intr();
++
++ guest_cbs = perf_get_guest_cbs();
++ if (unlikely(guest_cbs && guest_cbs->is_in_guest() &&
++ guest_cbs->handle_intel_pt_intr))
++ guest_cbs->handle_intel_pt_intr();
+ else
+ intel_pt_interrupt();
+ }
+@@ -2998,8 +3002,10 @@ intel_vlbr_constraints(struct perf_event *event)
+ {
+ struct event_constraint *c = &vlbr_constraint;
+
+- if (unlikely(constraint_match(c, event->hw.config)))
++ if (unlikely(constraint_match(c, event->hw.config))) {
++ event->hw.flags |= c->flags;
+ return c;
++ }
+
+ return NULL;
+ }
+@@ -4648,6 +4654,19 @@ static __initconst const struct x86_pmu intel_pmu = {
+ .lbr_read = intel_pmu_lbr_read_64,
+ .lbr_save = intel_pmu_lbr_save,
+ .lbr_restore = intel_pmu_lbr_restore,
++
++ /*
++ * SMM has access to all 4 rings and while traditionally SMM code only
++ * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM.
++ *
++ * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction
++ * between SMM or not, this results in what should be pure userspace
++ * counters including SMM data.
++ *
++ * This is a clear privilege issue, therefore globally disable
++ * counting SMM by default.
++ */
++ .attr_freeze_on_smi = 1,
+ };
+
+ static __init void intel_clovertown_quirk(void)
+@@ -4694,6 +4713,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = {
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000),
++ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
+@@ -5447,7 +5467,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
+ /* Disabled fixed counters which are not in CPUID */
+ c->idxmsk64 &= intel_ctrl;
+
+- if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES)
++ /*
++ * Don't extend the pseudo-encoding to the
++ * generic counters
++ */
++ if (!use_fixed_pseudo_encoding(c->code))
+ c->idxmsk64 |= (1ULL << num_counters) - 1;
+ }
+ c->idxmsk64 &=
+@@ -6085,6 +6109,7 @@ __init int intel_pmu_init(void)
+ break;
+
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
++ case INTEL_FAM6_EMERALDRAPIDS_X:
+ pmem = true;
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+@@ -6181,6 +6206,19 @@ __init int intel_pmu_init(void)
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
++
++ /*
++ * Quirk: For some Alder Lake machine, when all E-cores are disabled in
++ * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
++ * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
++ * mistakenly add extra counters for P-cores. Correct the number of
++ * counters here.
++ */
++ if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
++ pmu->num_counters = x86_pmu.num_counters;
++ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
++ }
++
+ pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+ pmu->unconstrained = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
+diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
+index 8647713276a73..21a9cb48daf5d 100644
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -236,6 +236,7 @@ static u64 load_latency_data(u64 status)
+ static u64 store_latency_data(u64 status)
+ {
+ union intel_x86_pebs_dse dse;
++ union perf_mem_data_src src;
+ u64 val;
+
+ dse.val = status;
+@@ -263,7 +264,14 @@ static u64 store_latency_data(u64 status)
+
+ val |= P(BLK, NA);
+
+- return val;
++ /*
++ * the pebs_data_source table is only for loads
++ * so override the mem_op to say STORE instead
++ */
++ src.val = val;
++ src.mem_op = P(OP,STORE);
++
++ return src.val;
+ }
+
+ struct pebs_record_core {
+@@ -923,12 +931,18 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
+ };
+
+ struct event_constraint intel_icl_pebs_event_constraints[] = {
+- INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
++ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */
++ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
+
+ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
+- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
+
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
+
+@@ -943,14 +957,19 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
+ };
+
+ struct event_constraint intel_spr_pebs_event_constraints[] = {
+- INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
++ INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
+
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
+ INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
+ INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
+- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
+- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
+
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
+
+diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
+index 9e6d6eaeb4cb6..b3f92255cbd2d 100644
+--- a/arch/x86/events/intel/lbr.c
++++ b/arch/x86/events/intel/lbr.c
+@@ -1114,6 +1114,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+
+ if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
+ reg->config = mask;
++
++ /*
++ * The Arch LBR HW can retrieve the common branch types
++ * from the LBR_INFO. It doesn't require the high overhead
++ * SW disassemble.
++ * Enable the branch type by default for the Arch LBR.
++ */
++ reg->reg |= X86_BR_TYPE_SAVE;
+ return 0;
+ }
+
+@@ -1734,6 +1742,9 @@ static bool is_arch_lbr_xsave_available(void)
+ * Check the LBR state with the corresponding software structure.
+ * Disable LBR XSAVES support if the size doesn't match.
+ */
++ if (xfeature_size(XFEATURE_LBR) == 0)
++ return false;
++
+ if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
+ return false;
+
+@@ -1836,7 +1847,7 @@ void __init intel_pmu_arch_lbr_init(void)
+ return;
+
+ clear_arch_lbr:
+- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);
++ setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
+ }
+
+ /**
+diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
+index 7f406c14715fd..d0295240c78a8 100644
+--- a/arch/x86/events/intel/pt.c
++++ b/arch/x86/events/intel/pt.c
+@@ -13,6 +13,8 @@
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+ #include <linux/types.h>
++#include <linux/bits.h>
++#include <linux/limits.h>
+ #include <linux/slab.h>
+ #include <linux/device.h>
+
+@@ -472,7 +474,7 @@ static u64 pt_config_filters(struct perf_event *event)
+ pt->filters.filter[range].msr_b = filter->msr_b;
+ }
+
+- rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
++ rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off;
+ }
+
+ return rtit_ctl;
+@@ -897,8 +899,9 @@ static void pt_handle_status(struct pt *pt)
+ * means we are already losing data; need to let the decoder
+ * know.
+ */
+- if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
+- buf->output_off == pt_buffer_region_size(buf)) {
++ if (!buf->single &&
++ (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
++ buf->output_off == pt_buffer_region_size(buf))) {
+ perf_aux_output_flag(&pt->handle,
+ PERF_AUX_FLAG_TRUNCATED);
+ advance++;
+@@ -1244,6 +1247,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
+ if (1 << order != nr_pages)
+ goto out;
+
++ /*
++ * Some processors cannot always support single range for more than
++ * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might
++ * also be affected, so for now rather than trying to keep track of
++ * which ones, just disable it for all.
++ */
++ if (nr_pages > 1)
++ goto out;
++
+ buf->single = true;
+ buf->nr_pages = nr_pages;
+ ret = 0;
+@@ -1347,10 +1359,36 @@ static void pt_addr_filters_fini(struct perf_event *event)
+ event->hw.addr_filters = NULL;
+ }
+
+-static inline bool valid_kernel_ip(unsigned long ip)
++#ifdef CONFIG_X86_64
++static u64 canonical_address(u64 vaddr, u8 vaddr_bits)
++{
++ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
++}
++
++static u64 is_canonical_address(u64 vaddr, u8 vaddr_bits)
++{
++ return canonical_address(vaddr, vaddr_bits) == vaddr;
++}
++
++/* Clamp to a canonical address greater-than-or-equal-to the address given */
++static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits)
++{
++ return is_canonical_address(vaddr, vaddr_bits) ?
++ vaddr :
++ -BIT_ULL(vaddr_bits - 1);
++}
++
++/* Clamp to a canonical address less-than-or-equal-to the address given */
++static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits)
+ {
+- return virt_addr_valid(ip) && kernel_ip(ip);
++ return is_canonical_address(vaddr, vaddr_bits) ?
++ vaddr :
++ BIT_ULL(vaddr_bits - 1) - 1;
+ }
++#else
++#define clamp_to_ge_canonical_addr(x, y) (x)
++#define clamp_to_le_canonical_addr(x, y) (x)
++#endif
+
+ static int pt_event_addr_filters_validate(struct list_head *filters)
+ {
+@@ -1366,14 +1404,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
+ filter->action == PERF_ADDR_FILTER_ACTION_START)
+ return -EOPNOTSUPP;
+
+- if (!filter->path.dentry) {
+- if (!valid_kernel_ip(filter->offset))
+- return -EINVAL;
+-
+- if (!valid_kernel_ip(filter->offset + filter->size))
+- return -EINVAL;
+- }
+-
+ if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
+ return -EOPNOTSUPP;
+ }
+@@ -1397,9 +1427,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
+ if (filter->path.dentry && !fr[range].start) {
+ msr_a = msr_b = 0;
+ } else {
+- /* apply the offset */
+- msr_a = fr[range].start;
+- msr_b = msr_a + fr[range].size - 1;
++ unsigned long n = fr[range].size - 1;
++ unsigned long a = fr[range].start;
++ unsigned long b;
++
++ if (a > ULONG_MAX - n)
++ b = ULONG_MAX;
++ else
++ b = a + n;
++ /*
++ * Apply the offset. 64-bit addresses written to the
++ * MSRs must be canonical, but the range can encompass
++ * non-canonical addresses. Since software cannot
++ * execute at non-canonical addresses, adjusting to
++ * canonical addresses does not affect the result of the
++ * address filter.
++ */
++ msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits);
++ msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits);
++ if (msr_b < msr_a)
++ msr_a = msr_b = 0;
+ }
+
+ filters->filter[range].msr_a = msr_a;
+diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
+index c72e368dd1641..7e16c590f2593 100644
+--- a/arch/x86/events/intel/uncore.c
++++ b/arch/x86/events/intel/uncore.c
+@@ -1829,6 +1829,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
++ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
+ {},
+ };
+diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
+index b9687980aab6d..d6f7c6c1a930a 100644
+--- a/arch/x86/events/intel/uncore.h
++++ b/arch/x86/events/intel/uncore.h
+@@ -2,6 +2,7 @@
+ #include <linux/slab.h>
+ #include <linux/pci.h>
+ #include <asm/apicdef.h>
++#include <asm/intel-family.h>
+ #include <linux/io-64-nonatomic-lo-hi.h>
+
+ #include <linux/perf_event.h>
+diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
+index 7280c8a3c8310..6d735611c281c 100644
+--- a/arch/x86/events/intel/uncore_discovery.h
++++ b/arch/x86/events/intel/uncore_discovery.h
+@@ -30,7 +30,7 @@
+
+
+ #define uncore_discovery_invalid_unit(unit) \
+- (!unit.table1 || !unit.ctl || !unit.table3 || \
++ (!unit.table1 || !unit.ctl || \
+ unit.table1 == -1ULL || unit.ctl == -1ULL || \
+ unit.table3 == -1ULL)
+
+diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
+index 0f63706cdadfc..912fb3821a6bb 100644
+--- a/arch/x86/events/intel/uncore_snb.c
++++ b/arch/x86/events/intel/uncore_snb.c
+@@ -788,6 +788,22 @@ int snb_pci2phy_map_init(int devid)
+ return 0;
+ }
+
++static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
++{
++ struct hw_perf_event *hwc = &event->hw;
++
++ /*
++ * SNB IMC counters are 32-bit and are laid out back to back
++ * in MMIO space. Therefore we must use a 32-bit accessor function
++ * using readq() from uncore_mmio_read_counter() causes problems
++ * because it is reading 64-bit at a time. This is okay for the
++ * uncore_perf_event_update() function because it drops the upper
++ * 32-bits but not okay for plain uncore_read_counter() as invoked
++ * in uncore_pmu_event_start().
++ */
++ return (u64)readl(box->io_addr + hwc->event_base);
++}
++
+ static struct pmu snb_uncore_imc_pmu = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = snb_uncore_imc_event_init,
+@@ -807,7 +823,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
+ .disable_event = snb_uncore_imc_disable_event,
+ .enable_event = snb_uncore_imc_enable_event,
+ .hw_config = snb_uncore_imc_hw_config,
+- .read_counter = uncore_mmio_read_counter,
++ .read_counter = snb_uncore_imc_read_counter,
+ };
+
+ static struct intel_uncore_type snb_uncore_imc = {
+@@ -1407,6 +1423,7 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+ /* MCHBAR is disabled */
+ if (!(mch_bar & BIT(0))) {
+ pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n");
++ pci_dev_put(pdev);
+ return;
+ }
+ mch_bar &= ~BIT(0);
+@@ -1420,6 +1437,8 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+ box->io_addr = ioremap(addr, type->mmio_map_size);
+ if (!box->io_addr)
+ pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
++
++ pci_dev_put(pdev);
+ }
+
+ static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
+diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
+index 5ddc0f30db6fc..9b5859812f4fb 100644
+--- a/arch/x86/events/intel/uncore_snbep.c
++++ b/arch/x86/events/intel/uncore_snbep.c
+@@ -452,7 +452,7 @@
+ #define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0
+
+ /* ICX IMC */
+-#define ICX_NUMBER_IMC_CHN 2
++#define ICX_NUMBER_IMC_CHN 3
+ #define ICX_IMC_MEM_STRIDE 0x4
+
+ /* SPR */
+@@ -2891,6 +2891,7 @@ static bool hswep_has_limit_sbox(unsigned int device)
+ return false;
+
+ pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4);
++ pci_dev_put(dev);
+ if (!hswep_get_chop(capid4))
+ return true;
+
+@@ -3608,6 +3609,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
++ /* Any of the CHA events may be filtered by Thread/Core-ID.*/
++ if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN)
++ idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID;
+
+ for (er = skx_uncore_cha_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+@@ -3675,6 +3679,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xd4, 0xc),
++ UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
+ EVENT_CONSTRAINT_END
+ };
+
+@@ -3799,6 +3804,21 @@ static const struct attribute_group *skx_iio_attr_update[] = {
+ NULL,
+ };
+
++static void pmu_clear_mapping_attr(const struct attribute_group **groups,
++ struct attribute_group *ag)
++{
++ int i;
++
++ for (i = 0; groups[i]; i++) {
++ if (groups[i] == ag) {
++ for (i++; groups[i]; i++)
++ groups[i - 1] = groups[i];
++ groups[i - 1] = NULL;
++ break;
++ }
++ }
++}
++
+ static int
+ pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
+ {
+@@ -3847,7 +3867,7 @@ clear_attrs:
+ clear_topology:
+ kfree(type->topology);
+ clear_attr_update:
+- type->attr_update = NULL;
++ pmu_clear_mapping_attr(type->attr_update, ag);
+ return ret;
+ }
+
+@@ -4488,6 +4508,8 @@ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_map
+ type->topology = NULL;
+ }
+
++ pci_dev_put(dev);
++
+ return ret;
+ }
+
+@@ -4525,6 +4547,13 @@ static void snr_iio_cleanup_mapping(struct intel_uncore_type *type)
+ pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
+ }
+
++static struct event_constraint snr_uncore_iio_constraints[] = {
++ UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
++ UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
++ UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
++ EVENT_CONSTRAINT_END
++};
++
+ static struct intel_uncore_type snr_uncore_iio = {
+ .name = "iio",
+ .num_counters = 4,
+@@ -4536,6 +4565,7 @@ static struct intel_uncore_type snr_uncore_iio = {
+ .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL,
+ .msr_offset = SNR_IIO_MSR_OFFSET,
++ .constraints = snr_uncore_iio_constraints,
+ .ops = &ivbep_uncore_msr_ops,
+ .format_group = &snr_uncore_iio_format_group,
+ .attr_update = snr_iio_attr_update,
+@@ -4845,6 +4875,8 @@ static int snr_uncore_mmio_map(struct intel_uncore_box *box,
+
+ addr += box_ctl;
+
++ pci_dev_put(pdev);
++
+ box->io_addr = ioremap(addr, type->mmio_map_size);
+ if (!box->io_addr) {
+ pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
+@@ -5076,8 +5108,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x03, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
++ UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
++ UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
+ EVENT_CONSTRAINT_END
+ };
+
+@@ -5125,6 +5159,11 @@ static int icx_iio_get_topology(struct intel_uncore_type *type)
+
+ static int icx_iio_set_mapping(struct intel_uncore_type *type)
+ {
++ /* Detect ICX-D system. This case is not supported */
++ if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) {
++ pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group);
++ return -EPERM;
++ }
+ return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
+ }
+
+@@ -5463,12 +5502,12 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = {
+ static struct intel_uncore_type icx_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+- .num_boxes = 8,
++ .num_boxes = 12,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
+ .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
+- .event_descs = hswep_uncore_imc_events,
++ .event_descs = snr_uncore_imc_events,
+ .perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
+ .event_ctl = SNR_IMC_MMIO_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+@@ -5647,6 +5686,7 @@ static struct intel_uncore_type spr_uncore_chabox = {
+ .event_mask = SPR_CHA_PMON_EVENT_MASK,
+ .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
+ .num_shared_regs = 1,
++ .constraints = skx_uncore_chabox_constraints,
+ .ops = &spr_uncore_chabox_ops,
+ .format_group = &spr_uncore_chabox_format_group,
+ .attr_update = uncore_alias_groups,
+@@ -5658,6 +5698,7 @@ static struct intel_uncore_type spr_uncore_iio = {
+ .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .format_group = &snr_uncore_iio_format_group,
+ .attr_update = uncore_alias_groups,
++ .constraints = icx_uncore_iio_constraints,
+ };
+
+ static struct attribute *spr_uncore_raw_formats_attr[] = {
+@@ -5686,9 +5727,16 @@ static struct intel_uncore_type spr_uncore_irp = {
+
+ };
+
++static struct event_constraint spr_uncore_m2pcie_constraints[] = {
++ UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
++ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
++ EVENT_CONSTRAINT_END
++};
++
+ static struct intel_uncore_type spr_uncore_m2pcie = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "m2pcie",
++ .constraints = spr_uncore_m2pcie_constraints,
+ };
+
+ static struct intel_uncore_type spr_uncore_pcu = {
+@@ -5765,6 +5813,7 @@ static struct intel_uncore_type spr_uncore_upi = {
+ static struct intel_uncore_type spr_uncore_m3upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "m3upi",
++ .constraints = icx_uncore_m3upi_constraints,
+ };
+
+ static struct intel_uncore_type spr_uncore_mdf = {
+@@ -5773,6 +5822,7 @@ static struct intel_uncore_type spr_uncore_mdf = {
+ };
+
+ #define UNCORE_SPR_NUM_UNCORE_TYPES 12
++#define UNCORE_SPR_CHA 0
+ #define UNCORE_SPR_IIO 1
+ #define UNCORE_SPR_IMC 6
+
+@@ -6015,12 +6065,32 @@ static int uncore_type_max_boxes(struct intel_uncore_type **types,
+ return max + 1;
+ }
+
++#define SPR_MSR_UNC_CBO_CONFIG 0x2FFE
++
+ void spr_uncore_cpu_init(void)
+ {
++ struct intel_uncore_type *type;
++ u64 num_cbo;
++
+ uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR,
+ UNCORE_SPR_MSR_EXTRA_UNCORES,
+ spr_msr_uncores);
+
++ type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA);
++ if (type) {
++ /*
++ * The value from the discovery table (stored in the type->num_boxes
++ * of UNCORE_SPR_CHA) is incorrect on some SPR variants because of a
++ * firmware bug. Using the value from SPR_MSR_UNC_CBO_CONFIG to replace it.
++ */
++ rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo);
++ /*
++ * The MSR doesn't work on the EMR XCC, but the firmware bug doesn't impact
++ * the EMR XCC. Don't let the value from the MSR replace the existing value.
++ */
++ if (num_cbo)
++ type->num_boxes = num_cbo;
++ }
+ spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
+ }
+
+diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
+index 96c775abe31ff..d23b5523cdd3b 100644
+--- a/arch/x86/events/msr.c
++++ b/arch/x86/events/msr.c
+@@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data)
+ case INTEL_FAM6_BROADWELL_G:
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
++ case INTEL_FAM6_EMERALDRAPIDS_X:
+
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_D:
+diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
+index 85feafacc445d..840ee43e3e464 100644
+--- a/arch/x86/events/rapl.c
++++ b/arch/x86/events/rapl.c
+@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
+ * - perf_msr_probe(PERF_RAPL_MAX)
+ * - want to use same event codes across both architectures
+ */
+-static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
+- [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
++static struct perf_msr amd_rapl_msrs[] = {
++ [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
++ [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
++ [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
++ [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
++ [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
+ };
+
+-
+ static int rapl_cpu_offline(unsigned int cpu)
+ {
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+@@ -801,6 +804,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
++ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl),
++ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
+diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
+index 949d845c922b4..3e9acdaeed1ec 100644
+--- a/arch/x86/events/zhaoxin/core.c
++++ b/arch/x86/events/zhaoxin/core.c
+@@ -541,7 +541,13 @@ __init int zhaoxin_pmu_init(void)
+
+ switch (boot_cpu_data.x86) {
+ case 0x06:
+- if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
++ /*
++ * Support Zhaoxin CPU from ZXC series, exclude Nano series through FMS.
++ * Nano FMS: Family=6, Model=F, Stepping=[0-A][C-D]
++ * ZXC FMS: Family=6, Model=F, Stepping=E-F OR Family=6, Model=0x19, Stepping=0-3
++ */
++ if ((boot_cpu_data.x86_model == 0x0f && boot_cpu_data.x86_stepping >= 0x0e) ||
++ boot_cpu_data.x86_model == 0x19) {
+
+ x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
+
+diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
+index 708a2712a516d..95f98af74fdca 100644
+--- a/arch/x86/hyperv/hv_init.c
++++ b/arch/x86/hyperv/hv_init.c
+@@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
+ static int hv_cpu_init(unsigned int cpu)
+ {
+ union hv_vp_assist_msr_contents msr = { 0 };
+- struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
++ struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
+ int ret;
+
+ ret = hv_common_cpu_init(cpu);
+@@ -55,34 +55,32 @@ static int hv_cpu_init(unsigned int cpu)
+ if (!hv_vp_assist_page)
+ return 0;
+
+- if (!*hvp) {
+- if (hv_root_partition) {
+- /*
+- * For root partition we get the hypervisor provided VP assist
+- * page, instead of allocating a new page.
+- */
+- rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+- *hvp = memremap(msr.pfn <<
+- HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
+- PAGE_SIZE, MEMREMAP_WB);
+- } else {
+- /*
+- * The VP assist page is an "overlay" page (see Hyper-V TLFS's
+- * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
+- * out to make sure we always write the EOI MSR in
+- * hv_apic_eoi_write() *after* the EOI optimization is disabled
+- * in hv_cpu_die(), otherwise a CPU may not be stopped in the
+- * case of CPU offlining and the VM will hang.
+- */
++ if (hv_root_partition) {
++ /*
++ * For root partition we get the hypervisor provided VP assist
++ * page, instead of allocating a new page.
++ */
++ rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
++ *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
++ PAGE_SIZE, MEMREMAP_WB);
++ } else {
++ /*
++ * The VP assist page is an "overlay" page (see Hyper-V TLFS's
++ * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
++ * out to make sure we always write the EOI MSR in
++ * hv_apic_eoi_write() *after* the EOI optimization is disabled
++ * in hv_cpu_die(), otherwise a CPU may not be stopped in the
++ * case of CPU offlining and the VM will hang.
++ */
++ if (!*hvp)
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
+- if (*hvp)
+- msr.pfn = vmalloc_to_pfn(*hvp);
+- }
+- WARN_ON(!(*hvp));
+- if (*hvp) {
+- msr.enable = 1;
+- wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+- }
++ if (*hvp)
++ msr.pfn = vmalloc_to_pfn(*hvp);
++
++ }
++ if (!WARN_ON(!(*hvp))) {
++ msr.enable = 1;
++ wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+ }
+
+ return 0;
+@@ -139,7 +137,6 @@ void set_hv_tscchange_cb(void (*cb)(void))
+ struct hv_reenlightenment_control re_ctrl = {
+ .vector = HYPERV_REENLIGHTENMENT_VECTOR,
+ .enabled = 1,
+- .target_vp = hv_vp_index[smp_processor_id()]
+ };
+ struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
+
+@@ -148,13 +145,20 @@ void set_hv_tscchange_cb(void (*cb)(void))
+ return;
+ }
+
++ if (!hv_vp_index)
++ return;
++
+ hv_reenlightenment_cb = cb;
+
+ /* Make sure callback is registered before we write to MSRs */
+ wmb();
+
++ re_ctrl.target_vp = hv_vp_index[get_cpu()];
++
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
++
++ put_cpu();
+ }
+ EXPORT_SYMBOL_GPL(set_hv_tscchange_cb);
+
+@@ -342,20 +346,13 @@ static void __init hv_get_partition_id(void)
+ */
+ void __init hyperv_init(void)
+ {
+- u64 guest_id, required_msrs;
++ u64 guest_id;
+ union hv_x64_msr_hypercall_contents hypercall_msr;
+ int cpuhp;
+
+ if (x86_hyper_type != X86_HYPER_MS_HYPERV)
+ return;
+
+- /* Absolutely required MSRs */
+- required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
+- HV_MSR_VP_INDEX_AVAILABLE;
+-
+- if ((ms_hyperv.features & required_msrs) != required_msrs)
+- return;
+-
+ if (hv_common_init())
+ return;
+
+@@ -472,8 +469,6 @@ void hyperv_cleanup(void)
+ {
+ union hv_x64_msr_hypercall_contents hypercall_msr;
+
+- unregister_syscore_ops(&hv_syscore_ops);
+-
+ /* Reset our OS id */
+ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+
+diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
+index bd13736d0c054..0ad2378fe6ad7 100644
+--- a/arch/x86/hyperv/mmu.c
++++ b/arch/x86/hyperv/mmu.c
+@@ -68,15 +68,6 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
+
+ local_irq_save(flags);
+
+- /*
+- * Only check the mask _after_ interrupt has been disabled to avoid the
+- * mask changing under our feet.
+- */
+- if (cpumask_empty(cpus)) {
+- local_irq_restore(flags);
+- return;
+- }
+-
+ flush_pcpu = (struct hv_tlb_flush **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
+
+@@ -115,7 +106,9 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
+ * must. We will also check all VP numbers when walking the
+ * supplied CPU set to remain correct in all cases.
+ */
+- if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
++ cpu = cpumask_last(cpus);
++
++ if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64)
+ goto do_ex_hypercall;
+
+ for_each_cpu(cpu, cpus) {
+@@ -131,6 +124,12 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
+ __set_bit(vcpu, (unsigned long *)
+ &flush->processor_mask);
+ }
++
++ /* nothing to flush if 'processor_mask' ends up being empty */
++ if (!flush->processor_mask) {
++ local_irq_restore(flags);
++ return;
++ }
+ }
+
+ /*
+diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
+index 1b07fb102c4ed..07949102a08d0 100644
+--- a/arch/x86/include/asm/GEN-for-each-reg.h
++++ b/arch/x86/include/asm/GEN-for-each-reg.h
+@@ -1,11 +1,16 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are in machine order; things rely on that.
++ */
+ #ifdef CONFIG_64BIT
+ GEN(rax)
+-GEN(rbx)
+ GEN(rcx)
+ GEN(rdx)
++GEN(rbx)
++GEN(rsp)
++GEN(rbp)
+ GEN(rsi)
+ GEN(rdi)
+-GEN(rbp)
+ GEN(r8)
+ GEN(r9)
+ GEN(r10)
+@@ -16,10 +21,11 @@ GEN(r14)
+ GEN(r15)
+ #else
+ GEN(eax)
+-GEN(ebx)
+ GEN(ecx)
+ GEN(edx)
++GEN(ebx)
++GEN(esp)
++GEN(ebp)
+ GEN(esi)
+ GEN(edi)
+-GEN(ebp)
+ #endif
+diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
+index 9aff97f0de7fd..d937c55e717e6 100644
+--- a/arch/x86/include/asm/acenv.h
++++ b/arch/x86/include/asm/acenv.h
+@@ -13,7 +13,19 @@
+
+ /* Asm macros */
+
+-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
++/*
++ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
++ * It is required to prevent data loss.
++ *
++ * While running inside virtual machine, the kernel can bypass cache flushing.
++ * Changing sleep state in a virtual machine doesn't affect the host system
++ * sleep state and cannot lead to data loss.
++ */
++#define ACPI_FLUSH_CPU_CACHE() \
++do { \
++ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \
++ wbinvd(); \
++} while (0)
+
+ int __acpi_acquire_global_lock(unsigned int *lock);
+ int __acpi_release_global_lock(unsigned int *lock);
+diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
+index a3c2315aca121..a364971967c40 100644
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -75,6 +75,8 @@ extern int alternatives_patched;
+
+ extern void alternative_instructions(void);
+ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
++extern void apply_retpolines(s32 *start, s32 *end);
++extern void apply_returns(s32 *start, s32 *end);
+
+ struct module;
+
+diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
+index 4cb726c71ed8c..8f80de627c60a 100644
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -17,21 +17,3 @@
+ extern void cmpxchg8b_emu(void);
+ #endif
+
+-#ifdef CONFIG_RETPOLINE
+-
+-#undef GEN
+-#define GEN(reg) \
+- extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) \
+- extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-
+-#undef GEN
+-#define GEN(reg) \
+- extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
+-#include <asm/GEN-for-each-reg.h>
+-
+-#endif /* CONFIG_RETPOLINE */
+diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
+index 3ad3da9a7d974..6dd47c9ec788a 100644
+--- a/arch/x86/include/asm/asm.h
++++ b/arch/x86/include/asm/asm.h
+@@ -122,28 +122,19 @@
+
+ #ifdef __KERNEL__
+
++# include <asm/extable_fixup_types.h>
++
+ /* Exception table entry */
+ #ifdef __ASSEMBLY__
+-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
++
++# define _ASM_EXTABLE_TYPE(from, to, type) \
+ .pushsection "__ex_table","a" ; \
+ .balign 4 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
+- .long (handler) - . ; \
++ .long type ; \
+ .popsection
+
+-# define _ASM_EXTABLE(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+-
+-# define _ASM_EXTABLE_UA(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+-
+-# define _ASM_EXTABLE_CPY(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+-
+-# define _ASM_EXTABLE_FAULT(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+-
+ # ifdef CONFIG_KPROBES
+ # define _ASM_NOKPROBE(entry) \
+ .pushsection "_kprobe_blacklist","aw" ; \
+@@ -155,26 +146,51 @@
+ # endif
+
+ #else /* ! __ASSEMBLY__ */
+-# define _EXPAND_EXTABLE_HANDLE(x) #x
+-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
++
++# define DEFINE_EXTABLE_TYPE_REG \
++ ".macro extable_type_reg type:req reg:req\n" \
++ ".set .Lfound, 0\n" \
++ ".set .Lregnr, 0\n" \
++ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \
++ ".ifc \\reg, %%\\rs\n" \
++ ".set .Lfound, .Lfound+1\n" \
++ ".long \\type + (.Lregnr << 8)\n" \
++ ".endif\n" \
++ ".set .Lregnr, .Lregnr+1\n" \
++ ".endr\n" \
++ ".set .Lregnr, 0\n" \
++ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \
++ ".ifc \\reg, %%\\rs\n" \
++ ".set .Lfound, .Lfound+1\n" \
++ ".long \\type + (.Lregnr << 8)\n" \
++ ".endif\n" \
++ ".set .Lregnr, .Lregnr+1\n" \
++ ".endr\n" \
++ ".if (.Lfound != 1)\n" \
++ ".error \"extable_type_reg: bad register argument\"\n" \
++ ".endif\n" \
++ ".endm\n"
++
++# define UNDEFINE_EXTABLE_TYPE_REG \
++ ".purgem extable_type_reg\n"
++
++# define _ASM_EXTABLE_TYPE(from, to, type) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 4\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
+- " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
++ " .long " __stringify(type) " \n" \
+ " .popsection\n"
+
+-# define _ASM_EXTABLE(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+-
+-# define _ASM_EXTABLE_UA(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+-
+-# define _ASM_EXTABLE_CPY(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+-
+-# define _ASM_EXTABLE_FAULT(from, to) \
+- _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
++# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \
++ " .pushsection \"__ex_table\",\"a\"\n" \
++ " .balign 4\n" \
++ " .long (" #from ") - .\n" \
++ " .long (" #to ") - .\n" \
++ DEFINE_EXTABLE_TYPE_REG \
++ "extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\
++ UNDEFINE_EXTABLE_TYPE_REG \
++ " .popsection\n"
+
+ /* For C file, we already have NOKPROBE_SYMBOL macro */
+
+@@ -188,6 +204,17 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
+ #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+ #endif /* __ASSEMBLY__ */
+
+-#endif /* __KERNEL__ */
++#define _ASM_EXTABLE(from, to) \
++ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT)
+
++#define _ASM_EXTABLE_UA(from, to) \
++ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS)
++
++#define _ASM_EXTABLE_CPY(from, to) \
++ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY)
++
++#define _ASM_EXTABLE_FAULT(from, to) \
++ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
++
++#endif /* __KERNEL__ */
+ #endif /* _ASM_X86_ASM_H */
+diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
+index 84b87538a15de..66570e95af398 100644
+--- a/arch/x86/include/asm/bug.h
++++ b/arch/x86/include/asm/bug.h
+@@ -22,7 +22,7 @@
+
+ #ifdef CONFIG_DEBUG_BUGVERBOSE
+
+-#define _BUG_FLAGS(ins, flags) \
++#define _BUG_FLAGS(ins, flags, extra) \
+ do { \
+ asm_inline volatile("1:\t" ins "\n" \
+ ".pushsection __bug_table,\"aw\"\n" \
+@@ -31,7 +31,8 @@ do { \
+ "\t.word %c1" "\t# bug_entry::line\n" \
+ "\t.word %c2" "\t# bug_entry::flags\n" \
+ "\t.org 2b+%c3\n" \
+- ".popsection" \
++ ".popsection\n" \
++ extra \
+ : : "i" (__FILE__), "i" (__LINE__), \
+ "i" (flags), \
+ "i" (sizeof(struct bug_entry))); \
+@@ -39,14 +40,15 @@ do { \
+
+ #else /* !CONFIG_DEBUG_BUGVERBOSE */
+
+-#define _BUG_FLAGS(ins, flags) \
++#define _BUG_FLAGS(ins, flags, extra) \
+ do { \
+ asm_inline volatile("1:\t" ins "\n" \
+ ".pushsection __bug_table,\"aw\"\n" \
+ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
+ "\t.word %c0" "\t# bug_entry::flags\n" \
+ "\t.org 2b+%c1\n" \
+- ".popsection" \
++ ".popsection\n" \
++ extra \
+ : : "i" (flags), \
+ "i" (sizeof(struct bug_entry))); \
+ } while (0)
+@@ -55,7 +57,7 @@ do { \
+
+ #else
+
+-#define _BUG_FLAGS(ins, flags) asm volatile(ins)
++#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins)
+
+ #endif /* CONFIG_GENERIC_BUG */
+
+@@ -63,8 +65,8 @@ do { \
+ #define BUG() \
+ do { \
+ instrumentation_begin(); \
+- _BUG_FLAGS(ASM_UD2, 0); \
+- unreachable(); \
++ _BUG_FLAGS(ASM_UD2, 0, ""); \
++ __builtin_unreachable(); \
+ } while (0)
+
+ /*
+@@ -75,9 +77,9 @@ do { \
+ */
+ #define __WARN_FLAGS(flags) \
+ do { \
++ __auto_type __flags = BUGFLAG_WARNING|(flags); \
+ instrumentation_begin(); \
+- _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
+- annotate_reachable(); \
++ _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \
+ instrumentation_end(); \
+ } while (0)
+
+diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
+index 92ae283899409..f25ca2d709d40 100644
+--- a/arch/x86/include/asm/bugs.h
++++ b/arch/x86/include/asm/bugs.h
+@@ -4,8 +4,6 @@
+
+ #include <asm/processor.h>
+
+-extern void check_bugs(void);
+-
+ #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
+ int ppro_with_ram_bug(void);
+ #else
+diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
+index 7516e4199b3c6..20fd0acd7d800 100644
+--- a/arch/x86/include/asm/compat.h
++++ b/arch/x86/include/asm/compat.h
+@@ -28,15 +28,13 @@ typedef u16 compat_ipc_pid_t;
+ typedef __kernel_fsid_t compat_fsid_t;
+
+ struct compat_stat {
+- compat_dev_t st_dev;
+- u16 __pad1;
++ u32 st_dev;
+ compat_ino_t st_ino;
+ compat_mode_t st_mode;
+ compat_nlink_t st_nlink;
+ __compat_uid_t st_uid;
+ __compat_gid_t st_gid;
+- compat_dev_t st_rdev;
+- u16 __pad2;
++ u32 st_rdev;
+ u32 st_size;
+ u32 st_blksize;
+ u32 st_blocks;
+diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
+index 3d52b094850a9..75efc4c6f0766 100644
+--- a/arch/x86/include/asm/cpu_entry_area.h
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -10,6 +10,12 @@
+
+ #ifdef CONFIG_X86_64
+
++#ifdef CONFIG_AMD_MEM_ENCRYPT
++#define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ
++#else
++#define VC_EXCEPTION_STKSZ 0
++#endif
++
+ /* Macro to enforce the same ordering and stack sizes */
+ #define ESTACKS_MEMBERS(guardsize, optional_stack_size) \
+ char DF_stack_guard[guardsize]; \
+@@ -28,7 +34,7 @@
+
+ /* The exception stacks' physical storage. No guard pages required */
+ struct exception_stacks {
+- ESTACKS_MEMBERS(0, 0)
++ ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ)
+ };
+
+ /* The effective cpu entry area mapping with guard pages. */
+@@ -137,7 +143,7 @@ extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+ extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+-static inline struct entry_stack *cpu_entry_stack(int cpu)
++static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
+ {
+ return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+ }
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index 16a51e7288d58..cc3f62f5d5515 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -32,6 +32,7 @@ enum cpuid_leafs
+ CPUID_8000_0007_EBX,
+ CPUID_7_EDX,
+ CPUID_8000_001F_EAX,
++ CPUID_8000_0021_EAX,
+ };
+
+ #ifdef CONFIG_X86_FEATURE_NAMES
+@@ -51,7 +52,7 @@ extern const char * const x86_power_flags[32];
+ extern const char * const x86_bug_flags[NBUGINTS*32];
+
+ #define test_cpu_cap(c, bit) \
+- test_bit(bit, (unsigned long *)((c)->x86_capability))
++ arch_test_bit(bit, (unsigned long *)((c)->x86_capability))
+
+ /*
+ * There are 32 bits/features in each mask word. The high bits
+@@ -91,8 +92,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ REQUIRED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+
+ #define DISABLED_MASK_BIT_SET(feature_bit) \
+ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
+@@ -115,8 +117,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ DISABLED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+
+ #define cpu_has(c, bit) \
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index d0ce5cfd3ac14..d6089072ee41f 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -13,8 +13,8 @@
+ /*
+ * Defines x86 CPU feature bits
+ */
+-#define NCAPINTS 20 /* N 32-bit words worth of info */
+-#define NBUGINTS 1 /* N 32-bit bug flags */
++#define NCAPINTS 21 /* N 32-bit words worth of info */
++#define NBUGINTS 2 /* N 32-bit bug flags */
+
+ /*
+ * Note: If the comment begins with a quoted string, that string is used
+@@ -203,8 +203,8 @@
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ /* FREE! ( 7*32+10) */
+ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
+ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+@@ -294,6 +294,21 @@
+ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
+ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
++#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
++#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
++#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
++#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
++
++
++#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
++
++#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
++#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
++#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
+@@ -313,6 +328,7 @@
+ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
+ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
+ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
+
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+@@ -400,6 +416,10 @@
+ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+
++#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
++#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
++#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
++
+ /*
+ * BUG word(s)
+ */
+@@ -436,5 +456,14 @@
+ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
++#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
++#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
++#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
++#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
++#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
++#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
+
++/* BUG word 2 */
++#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
++#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
+index cfdf307ddc012..9ed8343c9b3cb 100644
+--- a/arch/x86/include/asm/debugreg.h
++++ b/arch/x86/include/asm/debugreg.h
+@@ -39,7 +39,20 @@ static __always_inline unsigned long native_get_debugreg(int regno)
+ asm("mov %%db6, %0" :"=r" (val));
+ break;
+ case 7:
+- asm("mov %%db7, %0" :"=r" (val));
++ /*
++ * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them
++ * with other code.
++ *
++ * This is needed because a DR7 access can cause a #VC exception
++ * when running under SEV-ES. Taking a #VC exception is not a
++ * safe thing to do just anywhere in the entry code and
++ * re-ordering might place the access into an unsafe location.
++ *
++ * This happened in the NMI handler, where the DR7 read was
++ * re-ordered to happen before the call to sev_es_ist_enter(),
++ * causing stack recursion.
++ */
++ asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER);
+ break;
+ default:
+ BUG();
+@@ -66,7 +79,16 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
+ asm("mov %0, %%db6" ::"r" (value));
+ break;
+ case 7:
+- asm("mov %0, %%db7" ::"r" (value));
++ /*
++ * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them
++ * with other code.
++ *
++ * While is didn't happen with a DR7 write (see the DR7 read
++ * comment above which explains where it happened), add the
++ * __FORCE_ORDER here too to avoid similar problems in the
++ * future.
++ */
++ asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER);
+ break;
+ default:
+ BUG();
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index 8f28fafa98b32..99a12012c66ee 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -56,6 +56,25 @@
+ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
+ #endif
+
++#ifdef CONFIG_RETPOLINE
++# define DISABLE_RETPOLINE 0
++#else
++# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
++#ifdef CONFIG_RETHUNK
++# define DISABLE_RETHUNK 0
++#else
++# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
++#endif
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
++# define DISABLE_UNRET 0
++#else
++# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
++#endif
++
+ /* Force disable because it's broken beyond repair */
+ #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+
+@@ -79,7 +98,7 @@
+ #define DISABLED_MASK8 0
+ #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+ #define DISABLED_MASK10 0
+-#define DISABLED_MASK11 0
++#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
+ #define DISABLED_MASK12 0
+ #define DISABLED_MASK13 0
+ #define DISABLED_MASK14 0
+@@ -89,6 +108,7 @@
+ #define DISABLED_MASK17 0
+ #define DISABLED_MASK18 0
+ #define DISABLED_MASK19 0
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
++#define DISABLED_MASK20 0
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */
+diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
+index 4d0b126835b8a..63158fd558567 100644
+--- a/arch/x86/include/asm/efi.h
++++ b/arch/x86/include/asm/efi.h
+@@ -197,8 +197,6 @@ static inline bool efi_runtime_supported(void)
+
+ extern void parse_efi_setup(u64 phys_addr, u32 data_len);
+
+-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+-
+ extern void efi_thunk_runtime_setup(void);
+ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
+index 43184640b579a..a12fdf01dc260 100644
+--- a/arch/x86/include/asm/entry-common.h
++++ b/arch/x86/include/asm/entry-common.h
+@@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ static __always_inline void arch_exit_to_user_mode(void)
+ {
+ mds_user_clear_cpu_buffers();
++ amd_clear_divider();
+ }
+ #define arch_exit_to_user_mode arch_exit_to_user_mode
+
+diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
+index 1f0cbc52937ca..155c991ba95e2 100644
+--- a/arch/x86/include/asm/extable.h
++++ b/arch/x86/include/asm/extable.h
+@@ -1,12 +1,18 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+ #ifndef _ASM_X86_EXTABLE_H
+ #define _ASM_X86_EXTABLE_H
++
++#include <asm/extable_fixup_types.h>
++
+ /*
+- * The exception table consists of triples of addresses relative to the
+- * exception table entry itself. The first address is of an instruction
+- * that is allowed to fault, the second is the target at which the program
+- * should continue. The third is a handler function to deal with the fault
+- * caused by the instruction in the first field.
++ * The exception table consists of two addresses relative to the
++ * exception table entry itself and a type selector field.
++ *
++ * The first address is of an instruction that is allowed to fault, the
++ * second is the target at which the program should continue.
++ *
++ * The type entry is used by fixup_exception() to select the handler to
++ * deal with the fault caused by the instruction in the first field.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+@@ -15,7 +21,7 @@
+ */
+
+ struct exception_table_entry {
+- int insn, fixup, handler;
++ int insn, fixup, data;
+ };
+ struct pt_regs;
+
+@@ -25,21 +31,27 @@ struct pt_regs;
+ do { \
+ (a)->fixup = (b)->fixup + (delta); \
+ (b)->fixup = (tmp).fixup - (delta); \
+- (a)->handler = (b)->handler + (delta); \
+- (b)->handler = (tmp).handler - (delta); \
++ (a)->data = (b)->data; \
++ (b)->data = (tmp).data; \
+ } while (0)
+
+-enum handler_type {
+- EX_HANDLER_NONE,
+- EX_HANDLER_FAULT,
+- EX_HANDLER_UACCESS,
+- EX_HANDLER_OTHER
+-};
+-
+ extern int fixup_exception(struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr);
+ extern int fixup_bug(struct pt_regs *regs, int trapnr);
+-extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
++extern int ex_get_fixup_type(unsigned long ip);
+ extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
+
++#ifdef CONFIG_X86_MCE
++extern void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr);
++#else
++static inline void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { }
++#endif
++
++#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64)
++bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs);
++#else
++static inline bool ex_handler_bpf(const struct exception_table_entry *x,
++ struct pt_regs *regs) { return false; }
++#endif
++
+ #endif
+diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
+new file mode 100644
+index 0000000000000..b3b785b9bb14c
+--- /dev/null
++++ b/arch/x86/include/asm/extable_fixup_types.h
+@@ -0,0 +1,58 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H
++#define _ASM_X86_EXTABLE_FIXUP_TYPES_H
++
++/*
++ * Our IMM is signed, as such it must live at the top end of the word. Also,
++ * since C99 hex constants are of ambigious type, force cast the mask to 'int'
++ * so that FIELD_GET() will DTRT and sign extend the value when it extracts it.
++ */
++#define EX_DATA_TYPE_MASK ((int)0x000000FF)
++#define EX_DATA_REG_MASK ((int)0x00000F00)
++#define EX_DATA_FLAG_MASK ((int)0x0000F000)
++#define EX_DATA_IMM_MASK ((int)0xFFFF0000)
++
++#define EX_DATA_REG_SHIFT 8
++#define EX_DATA_FLAG_SHIFT 12
++#define EX_DATA_IMM_SHIFT 16
++
++#define EX_DATA_REG(reg) ((reg) << EX_DATA_REG_SHIFT)
++#define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT)
++#define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT)
++
++/* segment regs */
++#define EX_REG_DS EX_DATA_REG(8)
++#define EX_REG_ES EX_DATA_REG(9)
++#define EX_REG_FS EX_DATA_REG(10)
++#define EX_REG_GS EX_DATA_REG(11)
++
++/* flags */
++#define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1)
++#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2)
++#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3)
++
++/* types */
++#define EX_TYPE_NONE 0
++#define EX_TYPE_DEFAULT 1
++#define EX_TYPE_FAULT 2
++#define EX_TYPE_UACCESS 3
++#define EX_TYPE_COPY 4
++#define EX_TYPE_CLEAR_FS 5
++#define EX_TYPE_FPU_RESTORE 6
++#define EX_TYPE_BPF 7
++#define EX_TYPE_WRMSR 8
++#define EX_TYPE_RDMSR 9
++#define EX_TYPE_WRMSR_SAFE 10 /* reg := -EIO */
++#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */
++#define EX_TYPE_WRMSR_IN_MCE 12
++#define EX_TYPE_RDMSR_IN_MCE 13
++#define EX_TYPE_DEFAULT_MCE_SAFE 14
++#define EX_TYPE_FAULT_MCE_SAFE 15
++
++#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */
++#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0))
++
++#define EX_TYPE_IMM_REG 17 /* reg := (long)imm */
++#define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT))
++
++#endif
+diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
+index 5a18694a89b24..6a6f741edda36 100644
+--- a/arch/x86/include/asm/fpu/internal.h
++++ b/arch/x86/include/asm/fpu/internal.h
+@@ -43,7 +43,7 @@ extern void fpu_flush_thread(void);
+ extern void fpu__init_cpu(void);
+ extern void fpu__init_system_xstate(void);
+ extern void fpu__init_cpu_xstate(void);
+-extern void fpu__init_system(struct cpuinfo_x86 *c);
++extern void fpu__init_system(void);
+ extern void fpu__init_check_bugs(void);
+ extern void fpu__resume_cpu(void);
+
+@@ -126,7 +126,7 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu);
+ #define kernel_insn(insn, output, input...) \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
++ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \
+ : output : input)
+
+ static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx)
+@@ -253,7 +253,7 @@ static inline void fxsave(struct fxregs_state *fx)
+ XRSTORS, X86_FEATURE_XSAVES) \
+ "\n" \
+ "3:\n" \
+- _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
++ _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
+ : \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+@@ -416,8 +416,7 @@ DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+- * Either one of these invalidation functions is enough. Invalidate
+- * a resource you control: CPU if using the CPU for something else
++ * Invalidate a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
+ */
+diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
+index 109dfcc75299d..d91df71f60fb1 100644
+--- a/arch/x86/include/asm/fpu/xstate.h
++++ b/arch/x86/include/asm/fpu/xstate.h
+@@ -136,8 +136,8 @@ extern void __init update_regset_xstate_info(unsigned int size,
+
+ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+ int xfeature_size(int xfeature_nr);
+-int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+-int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
++int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, u32 *pkru);
++int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
+
+ void xsaves(struct xregs_state *xsave, u64 mask);
+ void xrstors(struct xregs_state *xsave, u64 mask);
+diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
+index f9c00110a69ad..99d345b686fa2 100644
+--- a/arch/x86/include/asm/futex.h
++++ b/arch/x86/include/asm/futex.h
+@@ -17,13 +17,9 @@ do { \
+ int oldval = 0, ret; \
+ asm volatile("1:\t" insn "\n" \
+ "2:\n" \
+- "\t.section .fixup,\"ax\"\n" \
+- "3:\tmov\t%3, %1\n" \
+- "\tjmp\t2b\n" \
+- "\t.previous\n" \
+- _ASM_EXTABLE_UA(1b, 3b) \
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \
+ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
+- : "i" (-EFAULT), "0" (oparg), "1" (0)); \
++ : "0" (oparg), "1" (0)); \
+ if (ret) \
+ goto label; \
+ *oval = oldval; \
+@@ -39,15 +35,11 @@ do { \
+ "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
+ "\tjnz\t2b\n" \
+ "4:\n" \
+- "\t.section .fixup,\"ax\"\n" \
+- "5:\tmov\t%5, %1\n" \
+- "\tjmp\t4b\n" \
+- "\t.previous\n" \
+- _ASM_EXTABLE_UA(1b, 5b) \
+- _ASM_EXTABLE_UA(3b, 5b) \
++ _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \
++ _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \
+ : "=&a" (oldval), "=&r" (ret), \
+ "+m" (*uaddr), "=&r" (tem) \
+- : "r" (oparg), "i" (-EFAULT), "1" (0)); \
++ : "r" (oparg), "1" (0)); \
+ if (ret) \
+ goto label; \
+ *oval = oldval; \
+@@ -95,15 +87,11 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ if (!user_access_begin(uaddr, sizeof(u32)))
+ return -EFAULT;
+ asm volatile("\n"
+- "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
++ "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
+ "2:\n"
+- "\t.section .fixup, \"ax\"\n"
+- "3:\tmov %3, %0\n"
+- "\tjmp 2b\n"
+- "\t.previous\n"
+- _ASM_EXTABLE_UA(1b, 3b)
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \
+ : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+- : "i" (-EFAULT), "r" (newval), "1" (oldval)
++ : "r" (newval), "1" (oldval)
+ : "memory"
+ );
+ user_access_end();
+diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
+index 2322d6bd58833..b54b3e18d94ba 100644
+--- a/arch/x86/include/asm/hyperv-tlfs.h
++++ b/arch/x86/include/asm/hyperv-tlfs.h
+@@ -529,7 +529,7 @@ struct hv_enlightened_vmcs {
+ u64 guest_rip;
+
+ u32 hv_clean_fields;
+- u32 hv_padding_32;
++ u32 padding32_1;
+ u32 hv_synthetic_controls;
+ struct {
+ u32 nested_flush_hypercall:1;
+@@ -537,7 +537,7 @@ struct hv_enlightened_vmcs {
+ u32 reserved:30;
+ } __packed hv_enlightenments_control;
+ u32 hv_vp_id;
+-
++ u32 padding32_2;
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 padding64_4[4];
+diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
+index 91d7182ad2d6e..3df123f437c96 100644
+--- a/arch/x86/include/asm/insn-eval.h
++++ b/arch/x86/include/asm/insn-eval.h
+@@ -15,12 +15,15 @@
+ #define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf)
+ #define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4))
+
++int pt_regs_offset(struct pt_regs *regs, int regno);
++
+ bool insn_has_rep_prefix(struct insn *insn);
+ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
+ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
+ int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
+ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
+ int insn_get_code_seg_params(struct pt_regs *regs);
++int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
+ int insn_fetch_from_user(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
+ int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
+index 27158436f322d..d975c60f863a2 100644
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -105,10 +105,24 @@
+
+ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
+
++#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
++
++#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
++#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
++
+ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
+ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
++#define INTEL_FAM6_ALDERLAKE_N 0xBE
++
++#define INTEL_FAM6_RAPTORLAKE 0xB7
++#define INTEL_FAM6_RAPTORLAKE_P 0xBA
++#define INTEL_FAM6_RAPTORLAKE_S 0xBF
++
++#define INTEL_FAM6_LUNARLAKE_M 0xBD
+
+-/* "Small Core" Processors (Atom) */
++#define INTEL_FAM6_ARROWLAKE 0xC6
++
++/* "Small Core" Processors (Atom/E-Core) */
+
+ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
+ #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
+@@ -135,6 +149,10 @@
+ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
+ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
+
++#define INTEL_FAM6_SIERRAFOREST_X 0xAF
++
++#define INTEL_FAM6_GRANDRIDGE 0xB6
++
+ /* Xeon Phi */
+
+ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
+diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
+index bf1ed2ddc74bd..7a983119bc403 100644
+--- a/arch/x86/include/asm/iommu.h
++++ b/arch/x86/include/asm/iommu.h
+@@ -17,8 +17,10 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+ {
+ u64 start = rmrr->base_address;
+ u64 end = rmrr->end_address + 1;
++ int entry_type;
+
+- if (e820__mapped_all(start, end, E820_TYPE_RESERVED))
++ entry_type = e820__get_entry_type(start, end);
++ if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS)
+ return 0;
+
+ pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n",
+diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
+index 562854c608082..e087cd7837c31 100644
+--- a/arch/x86/include/asm/irq_stack.h
++++ b/arch/x86/include/asm/irq_stack.h
+@@ -77,11 +77,11 @@
+ * Function calls can clobber anything except the callee-saved
+ * registers. Tell the compiler.
+ */
+-#define call_on_irqstack(func, asm_call, argconstr...) \
++#define call_on_stack(stack, func, asm_call, argconstr...) \
+ { \
+ register void *tos asm("r11"); \
+ \
+- tos = ((void *)__this_cpu_read(hardirq_stack_ptr)); \
++ tos = ((void *)(stack)); \
+ \
+ asm_inline volatile( \
+ "movq %%rsp, (%[tos]) \n" \
+@@ -98,6 +98,26 @@
+ ); \
+ }
+
++#define ASM_CALL_ARG0 \
++ "call %P[__func] \n" \
++ ASM_REACHABLE
++
++#define ASM_CALL_ARG1 \
++ "movq %[arg1], %%rdi \n" \
++ ASM_CALL_ARG0
++
++#define ASM_CALL_ARG2 \
++ "movq %[arg2], %%rsi \n" \
++ ASM_CALL_ARG1
++
++#define ASM_CALL_ARG3 \
++ "movq %[arg3], %%rdx \n" \
++ ASM_CALL_ARG2
++
++#define call_on_irqstack(func, asm_call, argconstr...) \
++ call_on_stack(__this_cpu_read(hardirq_stack_ptr), \
++ func, asm_call, argconstr)
++
+ /* Macros to assert type correctness for run_*_on_irqstack macros */
+ #define assert_function_type(func, proto) \
+ static_assert(__builtin_types_compatible_p(typeof(&func), proto))
+@@ -147,8 +167,7 @@
+ */
+ #define ASM_CALL_SYSVEC \
+ "call irq_enter_rcu \n" \
+- "movq %[arg1], %%rdi \n" \
+- "call %P[__func] \n" \
++ ASM_CALL_ARG1 \
+ "call irq_exit_rcu \n"
+
+ #define SYSVEC_CONSTRAINTS , [arg1] "r" (regs)
+@@ -168,12 +187,10 @@
+ */
+ #define ASM_CALL_IRQ \
+ "call irq_enter_rcu \n" \
+- "movq %[arg1], %%rdi \n" \
+- "movl %[arg2], %%esi \n" \
+- "call %P[__func] \n" \
++ ASM_CALL_ARG2 \
+ "call irq_exit_rcu \n"
+
+-#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" (vector)
++#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" ((unsigned long)vector)
+
+ #define run_irq_on_irqstack_cond(func, regs, vector) \
+ { \
+@@ -185,9 +202,6 @@
+ IRQ_CONSTRAINTS, regs, vector); \
+ }
+
+-#define ASM_CALL_SOFTIRQ \
+- "call %P[__func] \n"
+-
+ /*
+ * Macro to invoke __do_softirq on the irq stack. This is only called from
+ * task context when bottom halves are about to be reenabled and soft
+@@ -197,7 +211,7 @@
+ #define do_softirq_own_stack() \
+ { \
+ __this_cpu_write(hardirq_stack_inuse, true); \
+- call_on_irqstack(__do_softirq, ASM_CALL_SOFTIRQ); \
++ call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \
+ __this_cpu_write(hardirq_stack_inuse, false); \
+ }
+
+diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
+index 0a6e34b070175..c7c924e15011d 100644
+--- a/arch/x86/include/asm/kexec.h
++++ b/arch/x86/include/asm/kexec.h
+@@ -186,6 +186,14 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+ extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+ #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
++#ifdef CONFIG_KEXEC_FILE
++struct purgatory_info;
++int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
++ Elf_Shdr *section,
++ const Elf_Shdr *relsec,
++ const Elf_Shdr *symtab);
++#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
++#endif
+ #endif
+
+ typedef void crash_vmclear_fn(void);
+diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
+index cefe1d81e2e8b..4bdcb91478a51 100644
+--- a/arch/x86/include/asm/kvm-x86-ops.h
++++ b/arch/x86/include/asm/kvm-x86-ops.h
+@@ -34,6 +34,7 @@ KVM_X86_OP(get_segment)
+ KVM_X86_OP(get_cpl)
+ KVM_X86_OP(set_segment)
+ KVM_X86_OP_NULL(get_cs_db_l_bits)
++KVM_X86_OP(is_valid_cr0)
+ KVM_X86_OP(set_cr0)
+ KVM_X86_OP(is_valid_cr4)
+ KVM_X86_OP(set_cr4)
+@@ -47,6 +48,7 @@ KVM_X86_OP(set_dr7)
+ KVM_X86_OP(cache_reg)
+ KVM_X86_OP(get_rflags)
+ KVM_X86_OP(set_rflags)
++KVM_X86_OP(get_if_flag)
+ KVM_X86_OP(tlb_flush_all)
+ KVM_X86_OP(tlb_flush_current)
+ KVM_X86_OP_NULL(tlb_remote_flush)
+@@ -114,6 +116,7 @@ KVM_X86_OP(enable_smi_window)
+ KVM_X86_OP_NULL(mem_enc_op)
+ KVM_X86_OP_NULL(mem_enc_reg_region)
+ KVM_X86_OP_NULL(mem_enc_unreg_region)
++KVM_X86_OP_NULL(guest_memory_reclaimed)
+ KVM_X86_OP(get_msr_feature)
+ KVM_X86_OP(can_emulate_instruction)
+ KVM_X86_OP(apic_init_signal_blocked)
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 13f64654dfff8..08cfc26ee7c67 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -98,7 +98,7 @@
+ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+ #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
+ #define KVM_REQ_TLB_FLUSH_GUEST \
+- KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
++ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+ #define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
+ #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
+ #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+@@ -364,6 +364,7 @@ union kvm_mmu_extended_role {
+ unsigned int cr4_smap:1;
+ unsigned int cr4_smep:1;
+ unsigned int cr4_la57:1;
++ unsigned int efer_lma:1;
+ };
+ };
+
+@@ -497,6 +498,7 @@ struct kvm_pmu {
+ unsigned nr_arch_fixed_counters;
+ unsigned available_event_types;
+ u64 fixed_ctr_ctrl;
++ u64 fixed_ctr_ctrl_mask;
+ u64 global_ctrl;
+ u64 global_status;
+ u64 global_ovf_ctrl;
+@@ -504,6 +506,7 @@ struct kvm_pmu {
+ u64 global_ctrl_mask;
+ u64 global_ovf_ctrl_mask;
+ u64 reserved_bits;
++ u64 raw_event_mask;
+ u8 version;
+ struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
+ struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
+@@ -640,6 +643,7 @@ struct kvm_vcpu_arch {
+ u64 ia32_misc_enable_msr;
+ u64 smbase;
+ u64 smi_count;
++ bool at_instruction_boundary;
+ bool tpr_access_reporting;
+ bool xsaves_enabled;
+ u64 ia32_xss;
+@@ -751,7 +755,7 @@ struct kvm_vcpu_arch {
+ u8 preempted;
+ u64 msr_val;
+ u64 last_steal;
+- struct gfn_to_pfn_cache cache;
++ struct gfn_to_hva_cache cache;
+ } st;
+
+ u64 l1_tsc_offset;
+@@ -1269,6 +1273,8 @@ struct kvm_vcpu_stat {
+ u64 nested_run;
+ u64 directed_yield_attempted;
+ u64 directed_yield_successful;
++ u64 preemption_reported;
++ u64 preemption_other;
+ u64 guest_mode;
+ };
+
+@@ -1327,8 +1333,9 @@ struct kvm_x86_ops {
+ void (*set_segment)(struct kvm_vcpu *vcpu,
+ struct kvm_segment *var, int seg);
+ void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
++ bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
+ void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
+- bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
++ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+ void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+ int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+ void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+@@ -1340,6 +1347,7 @@ struct kvm_x86_ops {
+ void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
+ unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
+ void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
++ bool (*get_if_flag)(struct kvm_vcpu *vcpu);
+
+ void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
+ void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
+@@ -1469,6 +1477,7 @@ struct kvm_x86_ops {
+ int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
++ void (*guest_memory_reclaimed)(struct kvm *kvm);
+
+ int (*get_msr_feature)(struct kvm_msr_entry *entry);
+
+@@ -1485,6 +1494,7 @@ struct kvm_x86_ops {
+ };
+
+ struct kvm_x86_nested_ops {
++ void (*leave_nested)(struct kvm_vcpu *vcpu);
+ int (*check_events)(struct kvm_vcpu *vcpu);
+ bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+ void (*triple_fault)(struct kvm_vcpu *vcpu);
+@@ -1507,6 +1517,7 @@ struct kvm_x86_init_ops {
+ int (*disabled_by_bios)(void);
+ int (*check_processor_compatibility)(void);
+ int (*hardware_setup)(void);
++ bool (*intel_pt_intr_in_guest)(void);
+
+ struct kvm_x86_ops *runtime_ops;
+ };
+@@ -1554,8 +1565,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+ return -ENOTSUPP;
+ }
+
+-int kvm_mmu_module_init(void);
+-void kvm_mmu_module_exit(void);
++void __init kvm_mmu_x86_module_init(void);
++int kvm_mmu_vendor_module_init(void);
++void kvm_mmu_vendor_module_exit(void);
+
+ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
+ int kvm_mmu_create(struct kvm_vcpu *vcpu);
+diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
+index 365111789cc68..5000cf59bdf5b 100644
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -18,6 +18,28 @@
+ #define __ALIGN_STR __stringify(__ALIGN)
+ #endif
+
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define RET jmp __x86_return_thunk
++#else /* CONFIG_RETPOLINE */
++#ifdef CONFIG_SLS
++#define RET ret; int3
++#else
++#define RET ret
++#endif
++#endif /* CONFIG_RETPOLINE */
++
++#else /* __ASSEMBLY__ */
++
++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
++#define ASM_RET "jmp __x86_return_thunk\n\t"
++#else /* CONFIG_RETPOLINE */
++#ifdef CONFIG_SLS
++#define ASM_RET "ret; int3\n\t"
++#else
++#define ASM_RET "ret\n\t"
++#endif
++#endif /* CONFIG_RETPOLINE */
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_X86_LINKAGE_H */
+diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
+index 9c80c68d75b54..2356fdddd3e61 100644
+--- a/arch/x86/include/asm/mem_encrypt.h
++++ b/arch/x86/include/asm/mem_encrypt.h
+@@ -13,6 +13,7 @@
+ #ifndef __ASSEMBLY__
+
+ #include <linux/init.h>
++#include <linux/cc_platform.h>
+
+ #include <asm/bootparam.h>
+
+@@ -46,14 +47,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+
+ void __init mem_encrypt_free_decrypted_mem(void);
+
+-/* Architecture __weak replacement functions */
+-void __init mem_encrypt_init(void);
+-
+ void __init sev_es_init_vc_handling(void);
+ bool sme_active(void);
+ bool sev_active(void);
+ bool sev_es_active(void);
+
++void __init mem_encrypt_init(void);
++
+ #define __bss_decrypted __section(".bss..decrypted")
+
+ #else /* !CONFIG_AMD_MEM_ENCRYPT */
+@@ -86,6 +86,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
+
+ static inline void mem_encrypt_free_decrypted_mem(void) { }
+
++static inline void mem_encrypt_init(void) { }
++
+ #define __bss_decrypted
+
+ #endif /* CONFIG_AMD_MEM_ENCRYPT */
+diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
+index ab45a220fac47..4ca377efc9869 100644
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -5,10 +5,12 @@
+ #include <asm/cpu.h>
+ #include <linux/earlycpio.h>
+ #include <linux/initrd.h>
++#include <asm/microcode_amd.h>
+
+ struct ucode_patch {
+ struct list_head plist;
+ void *data; /* Intel uses only this one */
++ unsigned int size;
+ u32 patch_id;
+ u16 equiv_cpu;
+ };
+@@ -129,13 +131,15 @@ static inline unsigned int x86_cpuid_family(void)
+ #ifdef CONFIG_MICROCODE
+ extern void __init load_ucode_bsp(void);
+ extern void load_ucode_ap(void);
+-void reload_early_microcode(void);
++void reload_early_microcode(unsigned int cpu);
+ extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
+ extern bool initrd_gone;
++void microcode_bsp_resume(void);
+ #else
+ static inline void __init load_ucode_bsp(void) { }
+ static inline void load_ucode_ap(void) { }
+-static inline void reload_early_microcode(void) { }
++static inline void reload_early_microcode(unsigned int cpu) { }
++static inline void microcode_bsp_resume(void) { }
+ static inline bool
+ get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
+ #endif
+diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
+index 7063b5a43220a..403a8e76b310c 100644
+--- a/arch/x86/include/asm/microcode_amd.h
++++ b/arch/x86/include/asm/microcode_amd.h
+@@ -47,12 +47,14 @@ struct microcode_amd {
+ extern void __init load_ucode_amd_bsp(unsigned int family);
+ extern void load_ucode_amd_ap(unsigned int family);
+ extern int __init save_microcode_in_initrd_amd(unsigned int family);
+-void reload_ucode_amd(void);
++void reload_ucode_amd(unsigned int cpu);
++extern void amd_check_microcode(void);
+ #else
+ static inline void __init load_ucode_amd_bsp(unsigned int family) {}
+ static inline void load_ucode_amd_ap(unsigned int family) {}
+ static inline int __init
+ save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
+-static inline void reload_ucode_amd(void) {}
++static inline void reload_ucode_amd(unsigned int cpu) {}
++static inline void amd_check_microcode(void) {}
+ #endif
+ #endif /* _ASM_X86_MICROCODE_AMD_H */
+diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
+index adccbc209169a..c2b9ab94408e6 100644
+--- a/arch/x86/include/asm/mshyperv.h
++++ b/arch/x86/include/asm/mshyperv.h
+@@ -176,13 +176,6 @@ bool hv_vcpu_is_preempted(int vcpu);
+ static inline void hv_apic_init(void) {}
+ #endif
+
+-static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
+- struct msi_desc *msi_desc)
+-{
+- msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
+- msi_entry->data.as_uint32 = msi_desc->msg.data;
+-}
+-
+ struct irq_domain *hv_create_pci_msi_domain(void);
+
+ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
+diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
+index b85147d75626e..d71c7e8b738d2 100644
+--- a/arch/x86/include/asm/msi.h
++++ b/arch/x86/include/asm/msi.h
+@@ -12,14 +12,17 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ /* Structs and defines for the X86 specific MSI message format */
+
+ typedef struct x86_msi_data {
+- u32 vector : 8,
+- delivery_mode : 3,
+- dest_mode_logical : 1,
+- reserved : 2,
+- active_low : 1,
+- is_level : 1;
+-
+- u32 dmar_subhandle;
++ union {
++ struct {
++ u32 vector : 8,
++ delivery_mode : 3,
++ dest_mode_logical : 1,
++ reserved : 2,
++ active_low : 1,
++ is_level : 1;
++ };
++ u32 dmar_subhandle;
++ };
+ } __attribute__ ((packed)) arch_msi_msg_data_t;
+ #define arch_msi_msg_data x86_msi_data
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index a7c413432b33d..91d8322af4139 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -51,9 +51,16 @@
+ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
++
++/* A mask for bits which the kernel toggles when controlling mitigations */
++#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
++ | SPEC_CTRL_RRSBA_DIS_S)
+
+ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
++#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */
+
+ #define MSR_PPIN_CTL 0x0000004e
+ #define MSR_PPIN 0x0000004f
+@@ -91,6 +98,7 @@
+ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
+ #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
+ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
+ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
+ #define ARCH_CAP_SSB_NO BIT(4) /*
+ * Not susceptible to Speculative Store Bypass
+@@ -114,6 +122,50 @@
+ * Not susceptible to
+ * TSX Async Abort (TAA) vulnerabilities.
+ */
++#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
++ * Not susceptible to SBDR and SSDP
++ * variants of Processor MMIO stale data
++ * vulnerabilities.
++ */
++#define ARCH_CAP_FBSDP_NO BIT(14) /*
++ * Not susceptible to FBSDP variant of
++ * Processor MMIO stale data
++ * vulnerabilities.
++ */
++#define ARCH_CAP_PSDP_NO BIT(15) /*
++ * Not susceptible to PSDP variant of
++ * Processor MMIO stale data
++ * vulnerabilities.
++ */
++#define ARCH_CAP_FB_CLEAR BIT(17) /*
++ * VERW clears CPU fill buffer
++ * even on MDS_NO CPUs.
++ */
++#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
++ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
++ * bit available to control VERW
++ * behavior.
++ */
++#define ARCH_CAP_RRSBA BIT(19) /*
++ * Indicates RET may use predictors
++ * other than the RSB. With eIBRS
++ * enabled predictions in kernel mode
++ * are restricted to targets in
++ * kernel.
++ */
++#define ARCH_CAP_PBRSB_NO BIT(24) /*
++ * Not susceptible to Post-Barrier
++ * Return Stack Buffer Predictions.
++ */
++#define ARCH_CAP_GDS_CTRL BIT(25) /*
++ * CPU is vulnerable to Gather
++ * Data Sampling (GDS) and
++ * has controls for mitigation.
++ */
++#define ARCH_CAP_GDS_NO BIT(26) /*
++ * CPU is not vulnerable to Gather
++ * Data Sampling (GDS).
++ */
+
+ #define MSR_IA32_FLUSH_CMD 0x0000010b
+ #define L1D_FLUSH BIT(0) /*
+@@ -128,9 +180,12 @@
+ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
+ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+
+-/* SRBDS support */
+ #define MSR_IA32_MCU_OPT_CTRL 0x00000123
+-#define RNGDS_MITG_DIS BIT(0)
++#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
++#define RTM_ALLOW BIT(1) /* TSX development mode */
++#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
++#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */
++#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */
+
+ #define MSR_IA32_SYSENTER_CS 0x00000174
+ #define MSR_IA32_SYSENTER_ESP 0x00000175
+@@ -456,6 +511,12 @@
+ #define MSR_AMD64_CPUID_FN_1 0xc0011004
+ #define MSR_AMD64_LS_CFG 0xc0011020
+ #define MSR_AMD64_DC_CFG 0xc0011022
++
++#define MSR_AMD64_DE_CFG 0xc0011029
++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
++#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
++
+ #define MSR_AMD64_BU_CFG2 0xc001102a
+ #define MSR_AMD64_IBSFETCHCTL 0xc0011030
+ #define MSR_AMD64_IBSFETCHLINAD 0xc0011031
+@@ -489,6 +550,9 @@
+ /* Fam 17h MSRs */
+ #define MSR_F17H_IRPERF 0xc00000e9
+
++#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL 0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR 0xc0010231
+@@ -530,9 +594,6 @@
+ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+ #define FAM10H_MMIO_CONF_BASE_SHIFT 20
+ #define MSR_FAM10H_NODE_ID 0xc001100c
+-#define MSR_F10H_DECFG 0xc0011029
+-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1 0xc001001a
+diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
+index a3f87f1015d3d..d42e6c6b47b1e 100644
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -92,7 +92,7 @@ static __always_inline unsigned long long __rdmsr(unsigned int msr)
+
+ asm volatile("1: rdmsr\n"
+ "2:\n"
+- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
++ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
+ : EAX_EDX_RET(val, low, high) : "c" (msr));
+
+ return EAX_EDX_VAL(val, low, high);
+@@ -102,7 +102,7 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
+ {
+ asm volatile("1: wrmsr\n"
+ "2:\n"
+- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
++ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a"(low), "d" (high) : "memory");
+ }
+
+@@ -137,17 +137,11 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
+ {
+ DECLARE_ARGS(val, low, high);
+
+- asm volatile("2: rdmsr ; xor %[err],%[err]\n"
+- "1:\n\t"
+- ".section .fixup,\"ax\"\n\t"
+- "3: mov %[fault],%[err]\n\t"
+- "xorl %%eax, %%eax\n\t"
+- "xorl %%edx, %%edx\n\t"
+- "jmp 1b\n\t"
+- ".previous\n\t"
+- _ASM_EXTABLE(2b, 3b)
++ asm volatile("1: rdmsr ; xor %[err],%[err]\n"
++ "2:\n\t"
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
+ : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
+- : "c" (msr), [fault] "i" (-EIO));
++ : "c" (msr));
+ if (tracepoint_enabled(read_msr))
+ do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
+ return EAX_EDX_VAL(val, low, high);
+@@ -169,15 +163,11 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high)
+ {
+ int err;
+
+- asm volatile("2: wrmsr ; xor %[err],%[err]\n"
+- "1:\n\t"
+- ".section .fixup,\"ax\"\n\t"
+- "3: mov %[fault],%[err] ; jmp 1b\n\t"
+- ".previous\n\t"
+- _ASM_EXTABLE(2b, 3b)
++ asm volatile("1: wrmsr ; xor %[err],%[err]\n"
++ "2:\n\t"
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
+ : [err] "=a" (err)
+- : "c" (msr), "0" (low), "d" (high),
+- [fault] "i" (-EIO)
++ : "c" (msr), "0" (low), "d" (high)
+ : "memory");
+ if (tracepoint_enabled(write_msr))
+ do_trace_write_msr(msr, ((u64)high << 32 | low), err);
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index ec2d5c8c66947..940c15ee5650f 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -5,11 +5,15 @@
+
+ #include <linux/static_key.h>
+ #include <linux/objtool.h>
++#include <linux/linkage.h>
+
+ #include <asm/alternative.h>
+ #include <asm/cpufeatures.h>
+ #include <asm/msr-index.h>
+ #include <asm/unwind_hints.h>
++#include <asm/percpu.h>
++
++#define RETPOLINE_THUNK_SIZE 32
+
+ /*
+ * Fill the CPU return stack buffer.
+@@ -31,32 +35,57 @@
+ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+
+ /*
++ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
++ */
++#define __FILL_RETURN_SLOT \
++ ANNOTATE_INTRA_FUNCTION_CALL; \
++ call 772f; \
++ int3; \
++772:
++
++/*
++ * Stuff the entire RSB.
++ *
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version - two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+- mov $(nr/2), reg; \
+-771: \
+- ANNOTATE_INTRA_FUNCTION_CALL; \
+- call 772f; \
+-773: /* speculation trap */ \
+- UNWIND_HINT_EMPTY; \
+- pause; \
+- lfence; \
+- jmp 773b; \
+-772: \
+- ANNOTATE_INTRA_FUNCTION_CALL; \
+- call 774f; \
+-775: /* speculation trap */ \
+- UNWIND_HINT_EMPTY; \
+- pause; \
+- lfence; \
+- jmp 775b; \
+-774: \
+- add $(BITS_PER_LONG/8) * 2, sp; \
+- dec reg; \
+- jnz 771b;
++#ifdef CONFIG_X86_64
++#define __FILL_RETURN_BUFFER(reg, nr) \
++ mov $(nr/2), reg; \
++771: \
++ __FILL_RETURN_SLOT \
++ __FILL_RETURN_SLOT \
++ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
++ dec reg; \
++ jnz 771b; \
++ /* barrier for jnz misprediction */ \
++ lfence;
++#else
++/*
++ * i386 doesn't unconditionally have LFENCE, as such it can't
++ * do a loop.
++ */
++#define __FILL_RETURN_BUFFER(reg, nr) \
++ .rept nr; \
++ __FILL_RETURN_SLOT; \
++ .endr; \
++ add $(BITS_PER_LONG/8) * nr, %_ASM_SP;
++#endif
++
++/*
++ * Stuff a single RSB slot.
++ *
++ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
++ * forced to retire before letting a RET instruction execute.
++ *
++ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
++ * before this point.
++ */
++#define __FILL_ONE_RETURN \
++ __FILL_RETURN_SLOT \
++ add $(BITS_PER_LONG/8), %_ASM_SP; \
++ lfence;
+
+ #ifdef __ASSEMBLY__
+
+@@ -72,6 +101,23 @@
+ .popsection
+ .endm
+
++/*
++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
++ * vs RETBleed validation.
++ */
++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
++
++/*
++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
++ * eventually turn into it's own annotation.
++ */
++.macro ANNOTATE_UNRET_END
++#if (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
++ ANNOTATE_RETPOLINE_SAFE
++ nop
++#endif
++.endm
++
+ /*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+@@ -81,7 +127,7 @@
+ #ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
+- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ #else
+ jmp *%\reg
+ #endif
+@@ -91,7 +137,7 @@
+ #ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
+- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ #else
+ call *%\reg
+ #endif
+@@ -101,11 +147,38 @@
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+-#ifdef CONFIG_RETPOLINE
+- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
+- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
++ __stringify(__FILL_ONE_RETURN), \ftr2
++
+ .Lskip_rsb_\@:
++.endm
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
++#define CALL_UNTRAIN_RET "call entry_untrain_ret"
++#else
++#define CALL_UNTRAIN_RET ""
++#endif
++
++/*
++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
++ * return thunk isn't mapped into the userspace tables (then again, AMD
++ * typically has NO_MELTDOWN).
++ *
++ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
++ * entry_ibpb() will clobber AX, CX, DX.
++ *
++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
++ * where we have a stack but before any RET instruction.
++ */
++.macro UNTRAIN_RET
++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
++ defined(CONFIG_CPU_SRSO)
++ ANNOTATE_UNRET_END
++ ALTERNATIVE_2 "", \
++ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
++ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ #endif
+ .endm
+
+@@ -117,7 +190,34 @@
+ _ASM_PTR " 999b\n\t" \
+ ".popsection\n\t"
+
++#ifdef CONFIG_RETHUNK
++extern void __x86_return_thunk(void);
++#else
++static inline void __x86_return_thunk(void) {}
++#endif
++
++extern void retbleed_return_thunk(void);
++extern void srso_return_thunk(void);
++extern void srso_alias_return_thunk(void);
++
++extern void retbleed_untrain_ret(void);
++extern void srso_untrain_ret(void);
++extern void srso_alias_untrain_ret(void);
++
++extern void entry_untrain_ret(void);
++extern void entry_ibpb(void);
++
+ #ifdef CONFIG_RETPOLINE
++
++typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
++
++#define GEN(reg) \
++ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
++#include <asm/GEN-for-each-reg.h>
++#undef GEN
++
++extern retpoline_thunk_t __x86_indirect_thunk_array[];
++
+ #ifdef CONFIG_X86_64
+
+ /*
+@@ -133,7 +233,7 @@
+ "lfence;\n" \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+- X86_FEATURE_RETPOLINE_AMD)
++ X86_FEATURE_RETPOLINE_LFENCE)
+
+ # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+@@ -163,7 +263,7 @@
+ "lfence;\n" \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
+- X86_FEATURE_RETPOLINE_AMD)
++ X86_FEATURE_RETPOLINE_LFENCE)
+
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+ #endif
+@@ -175,9 +275,12 @@
+ /* The Spectre V2 mitigation variants */
+ enum spectre_v2_mitigation {
+ SPECTRE_V2_NONE,
+- SPECTRE_V2_RETPOLINE_GENERIC,
+- SPECTRE_V2_RETPOLINE_AMD,
+- SPECTRE_V2_IBRS_ENHANCED,
++ SPECTRE_V2_RETPOLINE,
++ SPECTRE_V2_LFENCE,
++ SPECTRE_V2_EIBRS,
++ SPECTRE_V2_EIBRS_RETPOLINE,
++ SPECTRE_V2_EIBRS_LFENCE,
++ SPECTRE_V2_IBRS,
+ };
+
+ /* The indirect branch speculation control variants */
+@@ -211,15 +314,18 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+ : "memory");
+ }
+
++extern u64 x86_pred_cmd;
++
+ static inline void indirect_branch_prediction_barrier(void)
+ {
+- u64 val = PRED_CMD_IBPB;
+-
+- alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
++ alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
+ }
+
+ /* The Intel SPEC CTRL MSR base value cache */
+ extern u64 x86_spec_ctrl_base;
++DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
++extern void update_spec_ctrl_cond(u64 val);
++extern u64 spec_ctrl_current(void);
+
+ /*
+ * With retpoline, we must use IBRS to restrict branch prediction
+@@ -229,18 +335,18 @@ extern u64 x86_spec_ctrl_base;
+ */
+ #define firmware_restrict_branch_speculation_start() \
+ do { \
+- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
+- \
+ preempt_disable(); \
+- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
++ spec_ctrl_current() | SPEC_CTRL_IBRS, \
+ X86_FEATURE_USE_IBRS_FW); \
++ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
++ X86_FEATURE_USE_IBPB_FW); \
+ } while (0)
+
+ #define firmware_restrict_branch_speculation_end() \
+ do { \
+- u64 val = x86_spec_ctrl_base; \
+- \
+- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
++ spec_ctrl_current(), \
+ X86_FEATURE_USE_IBRS_FW); \
+ preempt_enable(); \
+ } while (0)
+@@ -254,6 +360,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+
+ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+
++DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
++
+ #include <asm/segment.h>
+
+ /**
+@@ -303,63 +411,4 @@ static inline void mds_idle_clear_cpu_buffers(void)
+
+ #endif /* __ASSEMBLY__ */
+
+-/*
+- * Below is used in the eBPF JIT compiler and emits the byte sequence
+- * for the following assembly:
+- *
+- * With retpolines configured:
+- *
+- * callq do_rop
+- * spec_trap:
+- * pause
+- * lfence
+- * jmp spec_trap
+- * do_rop:
+- * mov %rcx,(%rsp) for x86_64
+- * mov %edx,(%esp) for x86_32
+- * retq
+- *
+- * Without retpolines configured:
+- *
+- * jmp *%rcx for x86_64
+- * jmp *%edx for x86_32
+- */
+-#ifdef CONFIG_RETPOLINE
+-# ifdef CONFIG_X86_64
+-# define RETPOLINE_RCX_BPF_JIT_SIZE 17
+-# define RETPOLINE_RCX_BPF_JIT() \
+-do { \
+- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
+- /* spec_trap: */ \
+- EMIT2(0xF3, 0x90); /* pause */ \
+- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+- /* do_rop: */ \
+- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
+- EMIT1(0xC3); /* retq */ \
+-} while (0)
+-# else /* !CONFIG_X86_64 */
+-# define RETPOLINE_EDX_BPF_JIT() \
+-do { \
+- EMIT1_off32(0xE8, 7); /* call do_rop */ \
+- /* spec_trap: */ \
+- EMIT2(0xF3, 0x90); /* pause */ \
+- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+- /* do_rop: */ \
+- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
+- EMIT1(0xC3); /* ret */ \
+-} while (0)
+-# endif
+-#else /* !CONFIG_RETPOLINE */
+-# ifdef CONFIG_X86_64
+-# define RETPOLINE_RCX_BPF_JIT_SIZE 2
+-# define RETPOLINE_RCX_BPF_JIT() \
+- EMIT2(0xFF, 0xE1); /* jmp *%rcx */
+-# else /* !CONFIG_X86_64 */
+-# define RETPOLINE_EDX_BPF_JIT() \
+- EMIT2(0xFF, 0xE2) /* jmp *%edx */
+-# endif
+-#endif
+-
+ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
+diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
+index 4bde0dc66100c..56891399fa2a6 100644
+--- a/arch/x86/include/asm/page_64.h
++++ b/arch/x86/include/asm/page_64.h
+@@ -15,7 +15,7 @@ extern unsigned long page_offset_base;
+ extern unsigned long vmalloc_base;
+ extern unsigned long vmemmap_base;
+
+-static inline unsigned long __phys_addr_nodebug(unsigned long x)
++static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
+ {
+ unsigned long y = x - __START_KERNEL_map;
+
+diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
+index a8d4ad8565681..e9e2c3ba59239 100644
+--- a/arch/x86/include/asm/page_64_types.h
++++ b/arch/x86/include/asm/page_64_types.h
+@@ -15,7 +15,7 @@
+ #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
+ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+
+-#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
++#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER)
+ #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+
+ #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
+diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
+index da3a1ac82be58..4d8b2731f4f85 100644
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -665,7 +665,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
+ "call " #func ";" \
+ PV_RESTORE_ALL_CALLER_REGS \
+ FRAME_END \
+- "ret;" \
++ ASM_RET \
+ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
+ ".popsection")
+
+diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
+index 8fc1b5003713f..0e4efcde07831 100644
+--- a/arch/x86/include/asm/perf_event.h
++++ b/arch/x86/include/asm/perf_event.h
+@@ -241,6 +241,11 @@ struct x86_pmu_capability {
+ #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
+ #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
+
++static inline bool use_fixed_pseudo_encoding(u64 code)
++{
++ return !(code & 0xff);
++}
++
+ /*
+ * We model BTS tracing as another fixed-mode PMC.
+ *
+@@ -422,8 +427,10 @@ struct pebs_xmm {
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+ extern u32 get_ibs_caps(void);
++extern int forward_event_to_ibs(struct perf_event *event);
+ #else
+ static inline u32 get_ibs_caps(void) { return 0; }
++static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; }
+ #endif
+
+ #ifdef CONFIG_PERF_EVENTS
+diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
+index 56d0399a0cd16..dd520b44e89cc 100644
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -235,8 +235,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
+
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
+ #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) })
+-#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+-#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
++#define __swp_entry_to_pte(x) (__pte((x).val))
++#define __swp_entry_to_pmd(x) (__pmd((x).val))
+
+ extern int kern_addr_valid(unsigned long addr);
+ extern void cleanup_highmap(void);
+diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
+index 40497a9020c6e..28e59576c75be 100644
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -123,11 +123,12 @@
+ * instance, and is *not* included in this mask since
+ * pte_modify() does modify it.
+ */
+-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
+- _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
+- _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
+- _PAGE_UFFD_WP)
+-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
++#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
++ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\
++ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
++ _PAGE_UFFD_WP)
++#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
++#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
+
+ /*
+ * The cache modes defined here are used to translate between pure SW usage
+diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h
+index ccc539faa5bbc..4d8b9448fe8d2 100644
+--- a/arch/x86/include/asm/pkru.h
++++ b/arch/x86/include/asm/pkru.h
+@@ -4,8 +4,8 @@
+
+ #include <asm/fpu/xstate.h>
+
+-#define PKRU_AD_BIT 0x1
+-#define PKRU_WD_BIT 0x2
++#define PKRU_AD_BIT 0x1u
++#define PKRU_WD_BIT 0x2u
+ #define PKRU_BITS_PER_PKEY 2
+
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 9ad2acaaae9b8..bbbf27cfe7015 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -518,6 +518,7 @@ struct thread_struct {
+ */
+ unsigned long iopl_emul;
+
++ unsigned int iopl_warn:1;
+ unsigned int sig_on_uaccess_err:1;
+
+ /*
+@@ -802,9 +803,13 @@ extern u16 get_llc_id(unsigned int cpu);
+ #ifdef CONFIG_CPU_SUP_AMD
+ extern u32 amd_get_nodes_per_socket(void);
+ extern u32 amd_get_highest_perf(void);
++extern bool cpu_has_ibpb_brtype_microcode(void);
++extern void amd_clear_divider(void);
+ #else
+ static inline u32 amd_get_nodes_per_socket(void) { return 0; }
+ static inline u32 amd_get_highest_perf(void) { return 0; }
++static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
++static inline void amd_clear_divider(void) { }
+ #endif
+
+ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
+@@ -833,8 +838,9 @@ bool xen_set_default_idle(void);
+ #define xen_set_default_idle 0
+ #endif
+
+-void stop_this_cpu(void *dummy);
+-void microcode_check(void);
++void __noreturn stop_this_cpu(void *dummy);
++void microcode_check(struct cpuinfo_x86 *prev_info);
++void store_cpu_caps(struct cpuinfo_x86 *info);
+
+ enum l1tf_mitigations {
+ L1TF_MITIGATION_OFF,
+@@ -853,4 +859,6 @@ enum mds_mitigations {
+ MDS_MITIGATION_VMWERV,
+ };
+
++extern bool gds_ucode_mitigated(void);
++
+ #endif /* _ASM_X86_PROCESSOR_H */
+diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
+index 159622ee06748..1474cf96251dd 100644
+--- a/arch/x86/include/asm/qspinlock_paravirt.h
++++ b/arch/x86/include/asm/qspinlock_paravirt.h
+@@ -48,7 +48,7 @@ asm (".pushsection .text;"
+ "jne .slowpath;"
+ "pop %rdx;"
+ FRAME_END
+- "ret;"
++ ASM_RET
+ ".slowpath: "
+ "push %rsi;"
+ "movzbl %al,%esi;"
+@@ -56,7 +56,7 @@ asm (".pushsection .text;"
+ "pop %rsi;"
+ "pop %rdx;"
+ FRAME_END
+- "ret;"
++ ASM_RET
+ ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
+ ".popsection");
+
+diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
+index 5db5d083c8732..331474b150f16 100644
+--- a/arch/x86/include/asm/realmode.h
++++ b/arch/x86/include/asm/realmode.h
+@@ -89,6 +89,7 @@ static inline void set_real_mode_mem(phys_addr_t mem)
+ }
+
+ void reserve_real_mode(void);
++void load_trampoline_pgtable(void);
+
+ #endif /* __ASSEMBLY__ */
+
+diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
+index 04c17be9b5fda..bc5b4d788c08d 100644
+--- a/arch/x86/include/asm/reboot.h
++++ b/arch/x86/include/asm/reboot.h
+@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
+ #define MRR_BIOS 0
+ #define MRR_APM 1
+
++void cpu_emergency_disable_virtualization(void);
++
+ typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
+ void nmi_panic_self_stop(struct pt_regs *regs);
+ void nmi_shootdown_cpus(nmi_shootdown_cb callback);
+diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
+index b2d504f119370..9bf60a8b9e9c2 100644
+--- a/arch/x86/include/asm/required-features.h
++++ b/arch/x86/include/asm/required-features.h
+@@ -102,6 +102,7 @@
+ #define REQUIRED_MASK17 0
+ #define REQUIRED_MASK18 0
+ #define REQUIRED_MASK19 0
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
++#define REQUIRED_MASK20 0
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
+index d60ed0668a593..b9ccdf5ea98ba 100644
+--- a/arch/x86/include/asm/resctrl.h
++++ b/arch/x86/include/asm/resctrl.h
+@@ -51,7 +51,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+ * simple as possible.
+ * Must be called with preemption disabled.
+ */
+-static void __resctrl_sched_in(void)
++static inline void __resctrl_sched_in(struct task_struct *tsk)
+ {
+ struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 closid = state->default_closid;
+@@ -63,13 +63,13 @@ static void __resctrl_sched_in(void)
+ * Else use the closid/rmid assigned to this cpu.
+ */
+ if (static_branch_likely(&rdt_alloc_enable_key)) {
+- tmp = READ_ONCE(current->closid);
++ tmp = READ_ONCE(tsk->closid);
+ if (tmp)
+ closid = tmp;
+ }
+
+ if (static_branch_likely(&rdt_mon_enable_key)) {
+- tmp = READ_ONCE(current->rmid);
++ tmp = READ_ONCE(tsk->rmid);
+ if (tmp)
+ rmid = tmp;
+ }
+@@ -81,17 +81,17 @@ static void __resctrl_sched_in(void)
+ }
+ }
+
+-static inline void resctrl_sched_in(void)
++static inline void resctrl_sched_in(struct task_struct *tsk)
+ {
+ if (static_branch_likely(&rdt_enable_key))
+- __resctrl_sched_in();
++ __resctrl_sched_in(tsk);
+ }
+
+ void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+
+ #else
+
+-static inline void resctrl_sched_in(void) {}
++static inline void resctrl_sched_in(struct task_struct *tsk) {}
+ static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
+
+ #endif /* CONFIG_X86_CPU_RESCTRL */
+diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
+index 72044026eb3c2..8dd8e8ec9fa55 100644
+--- a/arch/x86/include/asm/segment.h
++++ b/arch/x86/include/asm/segment.h
+@@ -339,7 +339,7 @@ static inline void __loadsegment_fs(unsigned short value)
+ "1: movw %0, %%fs \n"
+ "2: \n"
+
+- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
++ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS)
+
+ : : "rm" (value) : "memory");
+ }
+diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
+index 5b1ed650b1248..84eab27248754 100644
+--- a/arch/x86/include/asm/sigframe.h
++++ b/arch/x86/include/asm/sigframe.h
+@@ -85,6 +85,4 @@ struct rt_sigframe_x32 {
+
+ #endif /* CONFIG_X86_64 */
+
+-void __init init_sigframe_size(void);
+-
+ #endif /* _ASM_X86_SIGFRAME_H */
+diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
+index f248eb2ac2d4a..3881b5333eb81 100644
+--- a/arch/x86/include/asm/stacktrace.h
++++ b/arch/x86/include/asm/stacktrace.h
+@@ -38,6 +38,16 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
+ bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info);
+
++static __always_inline
++bool get_stack_guard_info(unsigned long *stack, struct stack_info *info)
++{
++ /* make sure it's not in the stack proper */
++ if (get_stack_info_noinstr(stack, current, info))
++ return false;
++ /* but if it is in the page below it, we hit a guard */
++ return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info);
++}
++
+ const char *stack_type_name(enum stack_type type);
+
+ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
+diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
+index cbb67b6030f97..491aadfac6117 100644
+--- a/arch/x86/include/asm/static_call.h
++++ b/arch/x86/include/asm/static_call.h
+@@ -21,6 +21,16 @@
+ * relative displacement across sections.
+ */
+
++/*
++ * The trampoline is 8 bytes and of the general form:
++ *
++ * jmp.d32 \func
++ * ud1 %esp, %ecx
++ *
++ * That trailing #UD provides both a speculation stop and serves as a unique
++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
++ * and __static_call_fixup().
++ */
+ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
+ asm(".pushsection .static_call.text, \"ax\" \n" \
+ ".align 4 \n" \
+@@ -34,8 +44,13 @@
+ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+
++#ifdef CONFIG_RETHUNK
++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
++#else
+ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+- __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
++#endif
+
+
+ #define ARCH_ADD_TRAMP_KEY(name) \
+@@ -44,4 +59,6 @@
+ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \
+ ".popsection \n")
+
++extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
++
+ #endif /* _ASM_STATIC_CALL_H */
+diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
+index 7b132d0312ebf..a800abb1a9925 100644
+--- a/arch/x86/include/asm/suspend_32.h
++++ b/arch/x86/include/asm/suspend_32.h
+@@ -19,7 +19,6 @@ struct saved_context {
+ u16 gs;
+ unsigned long cr0, cr2, cr3, cr4;
+ u64 misc_enable;
+- bool misc_enable_saved;
+ struct saved_msrs saved_msrs;
+ struct desc_ptr gdt_desc;
+ struct desc_ptr idt;
+@@ -28,6 +27,7 @@ struct saved_context {
+ unsigned long tr;
+ unsigned long safety;
+ unsigned long return_address;
++ bool misc_enable_saved;
+ } __attribute__((packed));
+
+ /* routines for saving/restoring kernel state */
+diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
+index 35bb35d28733e..54df06687d834 100644
+--- a/arch/x86/include/asm/suspend_64.h
++++ b/arch/x86/include/asm/suspend_64.h
+@@ -14,9 +14,13 @@
+ * Image of the saved processor state, used by the low level ACPI suspend to
+ * RAM code and by the low level hibernation code.
+ *
+- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
+- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
+- * still work as required.
++ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
++ * and make sure that __save/__restore_processor_state(), defined in
++ * arch/x86/power/cpu.c, still work as required.
++ *
++ * Because the structure is packed, make sure to avoid unaligned members. For
++ * optimisation purposes but also because tools like kmemleak only search for
++ * pointers that are aligned.
+ */
+ struct saved_context {
+ struct pt_regs regs;
+@@ -36,7 +40,6 @@ struct saved_context {
+
+ unsigned long cr0, cr2, cr3, cr4;
+ u64 misc_enable;
+- bool misc_enable_saved;
+ struct saved_msrs saved_msrs;
+ unsigned long efer;
+ u16 gdt_pad; /* Unused */
+@@ -48,6 +51,7 @@ struct saved_context {
+ unsigned long tr;
+ unsigned long safety;
+ unsigned long return_address;
++ bool misc_enable_saved;
+ } __attribute__((packed));
+
+ #define loaddebug(thread,register) \
+diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
+index 6a2827d0681fc..e8ab7c1f1080a 100644
+--- a/arch/x86/include/asm/syscall_wrapper.h
++++ b/arch/x86/include/asm/syscall_wrapper.h
+@@ -6,7 +6,7 @@
+ #ifndef _ASM_X86_SYSCALL_WRAPPER_H
+ #define _ASM_X86_SYSCALL_WRAPPER_H
+
+-struct pt_regs;
++#include <asm/ptrace.h>
+
+ extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
+ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
+diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
+index a4a8b1b16c0c1..956e4145311b1 100644
+--- a/arch/x86/include/asm/timex.h
++++ b/arch/x86/include/asm/timex.h
+@@ -5,6 +5,15 @@
+ #include <asm/processor.h>
+ #include <asm/tsc.h>
+
++static inline unsigned long random_get_entropy(void)
++{
++ if (!IS_ENABLED(CONFIG_X86_TSC) &&
++ !cpu_feature_enabled(X86_FEATURE_TSC))
++ return random_get_entropy_fallback();
++ return rdtsc();
++}
++#define random_get_entropy random_get_entropy
++
+ /* Assume we use the PIT time source for the clock tick */
+ #define CLOCK_TICK_RATE PIT_TICK_RATE
+
+diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
+index 9239399e54914..55160445ea78b 100644
+--- a/arch/x86/include/asm/topology.h
++++ b/arch/x86/include/asm/topology.h
+@@ -218,7 +218,7 @@ static inline void arch_set_max_freq_ratio(bool turbo_disabled)
+ }
+ #endif
+
+-#ifdef CONFIG_ACPI_CPPC_LIB
++#if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP)
+ void init_freq_invariance_cppc(void);
+ #define init_freq_invariance_cppc init_freq_invariance_cppc
+ #endif
+diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
+index 7f7200021bd13..1cdd7e8bcba78 100644
+--- a/arch/x86/include/asm/traps.h
++++ b/arch/x86/include/asm/traps.h
+@@ -13,7 +13,7 @@
+ #ifdef CONFIG_X86_64
+ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
+ asmlinkage __visible notrace
+-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
++struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
+ void __init trap_init(void);
+ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
+ #endif
+@@ -40,9 +40,9 @@ void math_emulate(struct math_emu_info *);
+ bool fault_in_kernel_space(unsigned long address);
+
+ #ifdef CONFIG_VMAP_STACK
+-void __noreturn handle_stack_overflow(const char *message,
+- struct pt_regs *regs,
+- unsigned long fault_address);
++void __noreturn handle_stack_overflow(struct pt_regs *regs,
++ unsigned long fault_address,
++ struct stack_info *info);
+ #endif
+
+ #endif /* _ASM_X86_TRAPS_H */
+diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
+index 01a300a9700b9..fbdc3d9514943 100644
+--- a/arch/x86/include/asm/tsc.h
++++ b/arch/x86/include/asm/tsc.h
+@@ -20,13 +20,12 @@ extern void disable_TSC(void);
+
+ static inline cycles_t get_cycles(void)
+ {
+-#ifndef CONFIG_X86_TSC
+- if (!boot_cpu_has(X86_FEATURE_TSC))
++ if (!IS_ENABLED(CONFIG_X86_TSC) &&
++ !cpu_feature_enabled(X86_FEATURE_TSC))
+ return 0;
+-#endif
+-
+ return rdtsc();
+ }
++#define get_cycles get_cycles
+
+ extern struct system_counterval_t convert_art_to_tsc(u64 art);
+ extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
+diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
+index 5c95d242f38d7..ab5e577373093 100644
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -314,11 +314,12 @@ do { \
+ do { \
+ __chk_user_ptr(ptr); \
+ switch (size) { \
+- unsigned char x_u8__; \
+- case 1: \
++ case 1: { \
++ unsigned char x_u8__; \
+ __get_user_asm(x_u8__, ptr, "b", "=q", label); \
+ (x) = x_u8__; \
+ break; \
++ } \
+ case 2: \
+ __get_user_asm(x, ptr, "w", "=r", label); \
+ break; \
+@@ -413,6 +414,103 @@ do { \
+
+ #endif // CONFIG_CC_ASM_GOTO_OUTPUT
+
++#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
++ bool success; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm_volatile_goto("\n" \
++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
++ _ASM_EXTABLE_UA(1b, %l[label]) \
++ : CC_OUT(z) (success), \
++ [ptr] "+m" (*_ptr), \
++ [old] "+a" (__old) \
++ : [new] ltype (__new) \
++ : "memory" \
++ : label); \
++ if (unlikely(!success)) \
++ *_old = __old; \
++ likely(success); })
++
++#ifdef CONFIG_X86_32
++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
++ bool success; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm_volatile_goto("\n" \
++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
++ _ASM_EXTABLE_UA(1b, %l[label]) \
++ : CC_OUT(z) (success), \
++ "+A" (__old), \
++ [ptr] "+m" (*_ptr) \
++ : "b" ((u32)__new), \
++ "c" ((u32)((u64)__new >> 32)) \
++ : "memory" \
++ : label); \
++ if (unlikely(!success)) \
++ *_old = __old; \
++ likely(success); })
++#endif // CONFIG_X86_32
++#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
++ int __err = 0; \
++ bool success; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm volatile("\n" \
++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
++ CC_SET(z) \
++ "2:\n" \
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
++ %[errout]) \
++ : CC_OUT(z) (success), \
++ [errout] "+r" (__err), \
++ [ptr] "+m" (*_ptr), \
++ [old] "+a" (__old) \
++ : [new] ltype (__new) \
++ : "memory"); \
++ if (unlikely(__err)) \
++ goto label; \
++ if (unlikely(!success)) \
++ *_old = __old; \
++ likely(success); })
++
++#ifdef CONFIG_X86_32
++/*
++ * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error.
++ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
++ * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses
++ * both ESI and EDI for the memory operand, compilation will fail if the error
++ * is an input+output as there will be no register available for input.
++ */
++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
++ int __result; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm volatile("\n" \
++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
++ "mov $0, %%ecx\n\t" \
++ "setz %%cl\n" \
++ "2:\n" \
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \
++ : [result]"=c" (__result), \
++ "+A" (__old), \
++ [ptr] "+m" (*_ptr) \
++ : "b" ((u32)__new), \
++ "c" ((u32)((u64)__new >> 32)) \
++ : "memory", "cc"); \
++ if (unlikely(__result < 0)) \
++ goto label; \
++ if (unlikely(!__result)) \
++ *_old = __old; \
++ likely(__result); })
++#endif // CONFIG_X86_32
++#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++
+ /* FIXME: this hack is definitely wrong -AK */
+ struct __large_struct { unsigned long buf[100]; };
+ #define __m(x) (*(struct __large_struct __user *)(x))
+@@ -505,6 +603,51 @@ do { \
+ } while (0)
+ #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
++extern void __try_cmpxchg_user_wrong_size(void);
++
++#ifndef CONFIG_X86_32
++#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \
++ __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
++#endif
++
++/*
++ * Force the pointer to u<size> to match the size expected by the asm helper.
++ * clang/LLVM compiles all cases and only discards the unused paths after
++ * processing errors, which breaks i386 if the pointer is an 8-byte value.
++ */
++#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
++ bool __ret; \
++ __chk_user_ptr(_ptr); \
++ switch (sizeof(*(_ptr))) { \
++ case 1: __ret = __try_cmpxchg_user_asm("b", "q", \
++ (__force u8 *)(_ptr), (_oldp), \
++ (_nval), _label); \
++ break; \
++ case 2: __ret = __try_cmpxchg_user_asm("w", "r", \
++ (__force u16 *)(_ptr), (_oldp), \
++ (_nval), _label); \
++ break; \
++ case 4: __ret = __try_cmpxchg_user_asm("l", "r", \
++ (__force u32 *)(_ptr), (_oldp), \
++ (_nval), _label); \
++ break; \
++ case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
++ (_nval), _label); \
++ break; \
++ default: __try_cmpxchg_user_wrong_size(); \
++ } \
++ __ret; })
++
++/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */
++#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
++ int __ret = -EFAULT; \
++ __uaccess_begin_nospec(); \
++ __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \
++_label: \
++ __uaccess_end(); \
++ __ret; \
++ })
++
+ /*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
+index 8e574c0afef80..56664b31b6dad 100644
+--- a/arch/x86/include/asm/unwind_hints.h
++++ b/arch/x86/include/asm/unwind_hints.h
+@@ -8,7 +8,11 @@
+ #ifdef __ASSEMBLY__
+
+ .macro UNWIND_HINT_EMPTY
+- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
++ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
++.endm
++
++.macro UNWIND_HINT_ENTRY
++ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
+ .endm
+
+ .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+@@ -52,6 +56,14 @@
+ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
+ .endm
+
++.macro UNWIND_HINT_SAVE
++ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
++.endm
++
++.macro UNWIND_HINT_RESTORE
++ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
++.endm
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_X86_UNWIND_HINTS_H */
+diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
+index 8757078d4442a..6c2e3ff3cb28f 100644
+--- a/arch/x86/include/asm/virtext.h
++++ b/arch/x86/include/asm/virtext.h
+@@ -101,12 +101,6 @@ static inline int cpu_has_svm(const char **msg)
+ return 0;
+ }
+
+- if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
+- if (msg)
+- *msg = "can't execute cpuid_8000000a";
+- return 0;
+- }
+-
+ if (!boot_cpu_has(X86_FEATURE_SVM)) {
+ if (msg)
+ *msg = "svm not available";
+@@ -126,7 +120,21 @@ static inline void cpu_svm_disable(void)
+
+ wrmsrl(MSR_VM_HSAVE_PA, 0);
+ rdmsrl(MSR_EFER, efer);
+- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
++ if (efer & EFER_SVME) {
++ /*
++ * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
++ * aren't blocked, e.g. if a fatal error occurred between CLGI
++ * and STGI. Note, STGI may #UD if SVM is disabled from NMI
++ * context between reading EFER and executing STGI. In that
++ * case, GIF must already be set, otherwise the NMI would have
++ * been blocked, so just eat the fault.
++ */
++ asm_volatile_goto("1: stgi\n\t"
++ _ASM_EXTABLE(1b, %l[fault])
++ ::: "memory" : fault);
++fault:
++ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
++ }
+ }
+
+ /** Makes sure SVM is disabled, if it is supported on the CPU
+diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
+index ff4b52e37e60d..5adab895127e1 100644
+--- a/arch/x86/include/asm/xen/hypervisor.h
++++ b/arch/x86/include/asm/xen/hypervisor.h
+@@ -62,4 +62,9 @@ void xen_arch_register_cpu(int num);
+ void xen_arch_unregister_cpu(int num);
+ #endif
+
++#ifdef CONFIG_PVH
++void __init xen_pvh_init(struct boot_params *boot_params);
++void __init mem_map_via_hcall(struct boot_params *boot_params_p);
++#endif
++
+ #endif /* _ASM_X86_XEN_HYPERVISOR_H */
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index 8f4e8fa6ed759..2ff3e600f4269 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -21,6 +21,7 @@ CFLAGS_REMOVE_ftrace.o = -pg
+ CFLAGS_REMOVE_early_printk.o = -pg
+ CFLAGS_REMOVE_head64.o = -pg
+ CFLAGS_REMOVE_sev.o = -pg
++CFLAGS_REMOVE_cc_platform.o = -pg
+ endif
+
+ KASAN_SANITIZE_head$(BITS).o := n
+@@ -29,6 +30,7 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
+ KASAN_SANITIZE_stacktrace.o := n
+ KASAN_SANITIZE_paravirt.o := n
+ KASAN_SANITIZE_sev.o := n
++KASAN_SANITIZE_cc_platform.o := n
+
+ # With some compiler versions the generated code results in boot hangs, caused
+ # by several compilation units. To be safe, disable all instrumentation.
+@@ -47,6 +49,7 @@ endif
+ KCOV_INSTRUMENT := n
+
+ CFLAGS_head$(BITS).o += -fno-stack-protector
++CFLAGS_cc_platform.o += -fno-stack-protector
+
+ CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
+
+@@ -147,6 +150,9 @@ obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
+ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
+
+ obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o
++
++obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
++
+ ###
+ # 64 bit specific files
+ ifeq ($(CONFIG_X86_64),y)
+diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
+index 14bcd59bcdee2..94ac7402c1ac2 100644
+--- a/arch/x86/kernel/acpi/boot.c
++++ b/arch/x86/kernel/acpi/boot.c
+@@ -1319,6 +1319,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d)
+ return 0;
+ }
+
++static int __init disable_acpi_xsdt(const struct dmi_system_id *d)
++{
++ if (!acpi_force) {
++ pr_notice("%s detected: force use of acpi=rsdt\n", d->ident);
++ acpi_gbl_do_not_use_xsdt = TRUE;
++ } else {
++ pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n");
++ }
++ return 0;
++}
++
+ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
+ {
+ if (!acpi_force) {
+@@ -1442,6 +1453,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ },
+ },
++ /*
++ * Boxes that need ACPI XSDT use disabled due to corrupted tables
++ */
++ {
++ .callback = disable_acpi_xsdt,
++ .ident = "Advantech DAC-BJ01",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "NEC"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"),
++ DMI_MATCH(DMI_BIOS_VERSION, "V1.12"),
++ DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"),
++ },
++ },
+ {}
+ };
+
+diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
+index 7de599eba7f04..7945eae5b315f 100644
+--- a/arch/x86/kernel/acpi/cstate.c
++++ b/arch/x86/kernel/acpi/cstate.c
+@@ -79,6 +79,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
+ */
+ flags->bm_control = 0;
+ }
++ if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17) {
++ /*
++ * For all AMD Zen or newer CPUs that support C3, caches
++ * should not be flushed by software while entering C3
++ * type state. Set bm->check to 1 so that kernel doesn't
++ * need to execute cache flush operation.
++ */
++ flags->bm_check = 1;
++ /*
++ * In current AMD C state implementation ARB_DIS is no longer
++ * used. So set bm_control to zero to indicate ARB_DIS is not
++ * required while entering C3 type state.
++ */
++ flags->bm_control = 0;
++ }
+ }
+ EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
+
+diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
+index daf88f8143c5f..cf69081073b54 100644
+--- a/arch/x86/kernel/acpi/wakeup_32.S
++++ b/arch/x86/kernel/acpi/wakeup_32.S
+@@ -60,7 +60,7 @@ save_registers:
+ popl saved_context_eflags
+
+ movl $ret_point, saved_eip
+- ret
++ RET
+
+
+ restore_registers:
+@@ -70,7 +70,7 @@ restore_registers:
+ movl saved_context_edi, %edi
+ pushl saved_context_eflags
+ popfl
+- ret
++ RET
+
+ SYM_CODE_START(do_suspend_lowlevel)
+ call save_processor_state
+@@ -86,7 +86,7 @@ SYM_CODE_START(do_suspend_lowlevel)
+ ret_point:
+ call restore_registers
+ call restore_processor_state
+- ret
++ RET
+ SYM_CODE_END(do_suspend_lowlevel)
+
+ .data
+diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
+index e9da3dc712541..43dd7f281a216 100644
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -29,6 +29,7 @@
+ #include <asm/io.h>
+ #include <asm/fixmap.h>
+ #include <asm/paravirt.h>
++#include <asm/asm-prototypes.h>
+
+ int __read_mostly alternatives_patched;
+
+@@ -113,6 +114,8 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
+ }
+ }
+
++extern s32 __retpoline_sites[], __retpoline_sites_end[];
++extern s32 __return_sites[], __return_sites_end[];
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ extern s32 __smp_locks[], __smp_locks_end[];
+ void text_poke_early(void *addr, const void *opcode, size_t len);
+@@ -221,7 +224,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+ * "noinline" to cause control flow change and thus invalidate I$ and
+ * cause refetch after modification.
+ */
+-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
++static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
+ {
+ struct insn insn;
+ int i = 0;
+@@ -239,11 +242,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
+ * optimized.
+ */
+ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+- i += optimize_nops_range(instr, a->instrlen, i);
++ i += optimize_nops_range(instr, len, i);
+ else
+ i += insn.length;
+
+- if (i >= a->instrlen)
++ if (i >= len)
+ return;
+ }
+ }
+@@ -331,10 +334,254 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
+ text_poke_early(instr, insn_buff, insn_buff_sz);
+
+ next:
+- optimize_nops(a, instr);
++ optimize_nops(instr, a->instrlen);
+ }
+ }
+
++#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
++
++/*
++ * CALL/JMP *%\reg
++ */
++static int emit_indirect(int op, int reg, u8 *bytes)
++{
++ int i = 0;
++ u8 modrm;
++
++ switch (op) {
++ case CALL_INSN_OPCODE:
++ modrm = 0x10; /* Reg = 2; CALL r/m */
++ break;
++
++ case JMP32_INSN_OPCODE:
++ modrm = 0x20; /* Reg = 4; JMP r/m */
++ break;
++
++ default:
++ WARN_ON_ONCE(1);
++ return -1;
++ }
++
++ if (reg >= 8) {
++ bytes[i++] = 0x41; /* REX.B prefix */
++ reg -= 8;
++ }
++
++ modrm |= 0xc0; /* Mod = 3 */
++ modrm += reg;
++
++ bytes[i++] = 0xff; /* opcode */
++ bytes[i++] = modrm;
++
++ return i;
++}
++
++/*
++ * Rewrite the compiler generated retpoline thunk calls.
++ *
++ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
++ * indirect instructions, avoiding the extra indirection.
++ *
++ * For example, convert:
++ *
++ * CALL __x86_indirect_thunk_\reg
++ *
++ * into:
++ *
++ * CALL *%\reg
++ *
++ * It also tries to inline spectre_v2=retpoline,amd when size permits.
++ */
++static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
++{
++ retpoline_thunk_t *target;
++ int reg, ret, i = 0;
++ u8 op, cc;
++
++ target = addr + insn->length + insn->immediate.value;
++ reg = target - __x86_indirect_thunk_array;
++
++ if (WARN_ON_ONCE(reg & ~0xf))
++ return -1;
++
++ /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
++ BUG_ON(reg == 4);
++
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
++ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
++ return -1;
++
++ op = insn->opcode.bytes[0];
++
++ /*
++ * Convert:
++ *
++ * Jcc.d32 __x86_indirect_thunk_\reg
++ *
++ * into:
++ *
++ * Jncc.d8 1f
++ * [ LFENCE ]
++ * JMP *%\reg
++ * [ NOP ]
++ * 1:
++ */
++ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
++ if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
++ cc = insn->opcode.bytes[1] & 0xf;
++ cc ^= 1; /* invert condition */
++
++ bytes[i++] = 0x70 + cc; /* Jcc.d8 */
++ bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */
++
++ /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
++ op = JMP32_INSN_OPCODE;
++ }
++
++ /*
++ * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
++ */
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
++ bytes[i++] = 0x0f;
++ bytes[i++] = 0xae;
++ bytes[i++] = 0xe8; /* LFENCE */
++ }
++
++ ret = emit_indirect(op, reg, bytes + i);
++ if (ret < 0)
++ return ret;
++ i += ret;
++
++ for (; i < insn->length;)
++ bytes[i++] = BYTES_NOP1;
++
++ return i;
++}
++
++/*
++ * Generated by 'objtool --retpoline'.
++ */
++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
++{
++ s32 *s;
++
++ for (s = start; s < end; s++) {
++ void *addr = (void *)s + *s;
++ struct insn insn;
++ int len, ret;
++ u8 bytes[16];
++ u8 op1, op2;
++
++ ret = insn_decode_kernel(&insn, addr);
++ if (WARN_ON_ONCE(ret < 0))
++ continue;
++
++ op1 = insn.opcode.bytes[0];
++ op2 = insn.opcode.bytes[1];
++
++ switch (op1) {
++ case CALL_INSN_OPCODE:
++ case JMP32_INSN_OPCODE:
++ break;
++
++ case 0x0f: /* escape */
++ if (op2 >= 0x80 && op2 <= 0x8f)
++ break;
++ fallthrough;
++ default:
++ WARN_ON_ONCE(1);
++ continue;
++ }
++
++ DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
++ addr, addr, insn.length,
++ addr + insn.length + insn.immediate.value);
++
++ len = patch_retpoline(addr, &insn, bytes);
++ if (len == insn.length) {
++ optimize_nops(bytes, len);
++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
++ text_poke_early(addr, bytes, len);
++ }
++ }
++}
++
++#ifdef CONFIG_RETHUNK
++/*
++ * Rewrite the compiler generated return thunk tail-calls.
++ *
++ * For example, convert:
++ *
++ * JMP __x86_return_thunk
++ *
++ * into:
++ *
++ * RET
++ */
++static int patch_return(void *addr, struct insn *insn, u8 *bytes)
++{
++ int i = 0;
++
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++ return -1;
++
++ bytes[i++] = RET_INSN_OPCODE;
++
++ for (; i < insn->length;)
++ bytes[i++] = INT3_INSN_OPCODE;
++
++ return i;
++}
++
++void __init_or_module noinline apply_returns(s32 *start, s32 *end)
++{
++ s32 *s;
++
++ for (s = start; s < end; s++) {
++ void *dest = NULL, *addr = (void *)s + *s;
++ struct insn insn;
++ int len, ret;
++ u8 bytes[16];
++ u8 op;
++
++ ret = insn_decode_kernel(&insn, addr);
++ if (WARN_ON_ONCE(ret < 0))
++ continue;
++
++ op = insn.opcode.bytes[0];
++ if (op == JMP32_INSN_OPCODE)
++ dest = addr + insn.length + insn.immediate.value;
++
++ if (__static_call_fixup(addr, op, dest) ||
++ WARN_ONCE(dest != &__x86_return_thunk,
++ "missing return thunk: %pS-%pS: %*ph",
++ addr, dest, 5, addr))
++ continue;
++
++ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
++ addr, addr, insn.length,
++ addr + insn.length + insn.immediate.value);
++
++ len = patch_return(addr, &insn, bytes);
++ if (len == insn.length) {
++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
++ text_poke_early(addr, bytes, len);
++ }
++ }
++}
++#else
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
++#endif /* CONFIG_RETHUNK */
++
++#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
++
++void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
++
++#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
++
+ #ifdef CONFIG_SMP
+ static void alternatives_smp_lock(const s32 *start, const s32 *end,
+ u8 *text, u8 *text_end)
+@@ -537,7 +784,7 @@ asm (
+ " .type int3_magic, @function\n"
+ "int3_magic:\n"
+ " movl $1, (%" _ASM_ARG1 ")\n"
+-" ret\n"
++ ASM_RET
+ " .size int3_magic, .-int3_magic\n"
+ " .popsection\n"
+ );
+@@ -642,6 +889,13 @@ void __init alternative_instructions(void)
+ */
+ apply_paravirt(__parainstructions, __parainstructions_end);
+
++ /*
++ * Rewrite the retpolines, must be done before alternatives since
++ * those can rewrite the retpoline thunks.
++ */
++ apply_retpolines(__retpoline_sites, __retpoline_sites_end);
++ apply_returns(__return_sites, __return_sites_end);
++
+ /*
+ * Then patch alternatives, such that those paravirt calls that are in
+ * alternatives can be overwritten by their immediate fragments.
+@@ -930,10 +1184,13 @@ void text_poke_sync(void)
+ }
+
+ struct text_poke_loc {
+- s32 rel_addr; /* addr := _stext + rel_addr */
+- s32 rel32;
++ /* addr := _stext + rel_addr */
++ s32 rel_addr;
++ s32 disp;
++ u8 len;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
++ /* see text_poke_bp_batch() */
+ u8 old;
+ };
+
+@@ -943,21 +1200,23 @@ struct bp_patching_desc {
+ atomic_t refs;
+ };
+
+-static struct bp_patching_desc *bp_desc;
++static struct bp_patching_desc bp_desc;
+
+ static __always_inline
+-struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
++struct bp_patching_desc *try_get_desc(void)
+ {
+- struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */
++ struct bp_patching_desc *desc = &bp_desc;
+
+- if (!desc || !arch_atomic_inc_not_zero(&desc->refs))
++ if (!arch_atomic_inc_not_zero(&desc->refs))
+ return NULL;
+
+ return desc;
+ }
+
+-static __always_inline void put_desc(struct bp_patching_desc *desc)
++static __always_inline void put_desc(void)
+ {
++ struct bp_patching_desc *desc = &bp_desc;
++
+ smp_mb__before_atomic();
+ arch_atomic_dec(&desc->refs);
+ }
+@@ -982,7 +1241,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
+ {
+ struct bp_patching_desc *desc;
+ struct text_poke_loc *tp;
+- int len, ret = 0;
++ int ret = 0;
+ void *ip;
+
+ if (user_mode(regs))
+@@ -990,15 +1249,15 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
+
+ /*
+ * Having observed our INT3 instruction, we now must observe
+- * bp_desc:
++ * bp_desc with non-zero refcount:
+ *
+- * bp_desc = desc INT3
++ * bp_desc.refs = 1 INT3
+ * WMB RMB
+- * write INT3 if (desc)
++ * write INT3 if (bp_desc.refs != 0)
+ */
+ smp_rmb();
+
+- desc = try_get_desc(&bp_desc);
++ desc = try_get_desc();
+ if (!desc)
+ return 0;
+
+@@ -1022,8 +1281,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
+ goto out_put;
+ }
+
+- len = text_opcode_size(tp->opcode);
+- ip += len;
++ ip += tp->len;
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+@@ -1038,12 +1296,12 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
+ break;
+
+ case CALL_INSN_OPCODE:
+- int3_emulate_call(regs, (long)ip + tp->rel32);
++ int3_emulate_call(regs, (long)ip + tp->disp);
+ break;
+
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+- int3_emulate_jmp(regs, (long)ip + tp->rel32);
++ int3_emulate_jmp(regs, (long)ip + tp->disp);
+ break;
+
+ default:
+@@ -1053,7 +1311,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
+ ret = 1;
+
+ out_put:
+- put_desc(desc);
++ put_desc();
+ return ret;
+ }
+
+@@ -1084,18 +1342,20 @@ static int tp_vec_nr;
+ */
+ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
+ {
+- struct bp_patching_desc desc = {
+- .vec = tp,
+- .nr_entries = nr_entries,
+- .refs = ATOMIC_INIT(1),
+- };
+ unsigned char int3 = INT3_INSN_OPCODE;
+ unsigned int i;
+ int do_sync;
+
+ lockdep_assert_held(&text_mutex);
+
+- smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
++ bp_desc.vec = tp;
++ bp_desc.nr_entries = nr_entries;
++
++ /*
++ * Corresponds to the implicit memory barrier in try_get_desc() to
++ * ensure reading a non-zero refcount provides up to date bp_desc data.
++ */
++ atomic_set_release(&bp_desc.refs, 1);
+
+ /*
+ * Corresponding read barrier in int3 notifier for making sure the
+@@ -1118,7 +1378,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
+ */
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
+ u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
+- int len = text_opcode_size(tp[i].opcode);
++ int len = tp[i].len;
+
+ if (len - INT3_INSN_SIZE > 0) {
+ memcpy(old + INT3_INSN_SIZE,
+@@ -1183,32 +1443,46 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
+ text_poke_sync();
+
+ /*
+- * Remove and synchronize_rcu(), except we have a very primitive
+- * refcount based completion.
++ * Remove and wait for refs to be zero.
+ */
+- WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
+- if (!atomic_dec_and_test(&desc.refs))
+- atomic_cond_read_acquire(&desc.refs, !VAL);
++ if (!atomic_dec_and_test(&bp_desc.refs))
++ atomic_cond_read_acquire(&bp_desc.refs, !VAL);
+ }
+
+ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate)
+ {
+ struct insn insn;
+- int ret;
++ int ret, i;
+
+ memcpy((void *)tp->text, opcode, len);
+ if (!emulate)
+ emulate = opcode;
+
+ ret = insn_decode_kernel(&insn, emulate);
+-
+ BUG_ON(ret < 0);
+- BUG_ON(len != insn.length);
+
+ tp->rel_addr = addr - (void *)_stext;
++ tp->len = len;
+ tp->opcode = insn.opcode.bytes[0];
+
++ switch (tp->opcode) {
++ case RET_INSN_OPCODE:
++ case JMP32_INSN_OPCODE:
++ case JMP8_INSN_OPCODE:
++ /*
++ * Control flow instructions without implied execution of the
++ * next instruction can be padded with INT3.
++ */
++ for (i = insn.length; i < len; i++)
++ BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
++ break;
++
++ default:
++ BUG_ON(len != insn.length);
++ };
++
++
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ case RET_INSN_OPCODE:
+@@ -1217,7 +1491,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+- tp->rel32 = insn.immediate.value;
++ tp->disp = insn.immediate.value;
+ break;
+
+ default: /* assume NOP */
+@@ -1225,13 +1499,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ case 2: /* NOP2 -- emulate as JMP8+0 */
+ BUG_ON(memcmp(emulate, x86_nops[len], len));
+ tp->opcode = JMP8_INSN_OPCODE;
+- tp->rel32 = 0;
++ tp->disp = 0;
+ break;
+
+ case 5: /* NOP5 -- emulate as JMP32+0 */
+ BUG_ON(memcmp(emulate, x86_nops[len], len));
+ tp->opcode = JMP32_INSN_OPCODE;
+- tp->rel32 = 0;
++ tp->disp = 0;
+ break;
+
+ default: /* unknown instruction */
+diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
+index b70344bf66008..4df7d694369a5 100644
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -170,7 +170,7 @@ static __init int setup_apicpmtimer(char *s)
+ {
+ apic_calibrate_pmtmr = 1;
+ notsc_setup(NULL);
+- return 0;
++ return 1;
+ }
+ __setup("apicpmtimer", setup_apicpmtimer);
+ #endif
+@@ -412,10 +412,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
+ if (vector && !eilvt_entry_is_changeable(vector, new))
+ /* may not change if vectors are different */
+ return rsvd;
+- rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
+- } while (rsvd != new);
++ } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
+
+- rsvd &= ~APIC_EILVT_MASKED;
++ rsvd = new & ~APIC_EILVT_MASKED;
+ if (rsvd && rsvd != vector)
+ pr_info("LVT offset %d assigned for vector 0x%02x\n",
+ offset, rsvd);
+diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
+index c1bb384935b05..bb71b628edcb4 100644
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -2479,17 +2479,21 @@ static int io_apic_get_redir_entries(int ioapic)
+
+ unsigned int arch_dynirq_lower_bound(unsigned int from)
+ {
++ unsigned int ret;
++
+ /*
+ * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
+ * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
+ */
+- if (!ioapic_initialized)
+- return gsi_top;
++ ret = ioapic_dynirq_base ? : gsi_top;
++
+ /*
+- * For DT enabled machines ioapic_dynirq_base is irrelevant and not
+- * updated. So simply return @from if ioapic_dynirq_base == 0.
++ * For DT enabled machines ioapic_dynirq_base is irrelevant and
++ * always 0. gsi_top can be 0 if there is no IO/APIC registered.
++ * 0 is an invalid interrupt number for dynamic allocations. Return
++ * @from instead.
+ */
+- return ioapic_dynirq_base ? : from;
++ return ret ? : from;
+ }
+
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
+index 6bde05a86b4ed..896bc41cb2ba7 100644
+--- a/arch/x86/kernel/apic/x2apic_phys.c
++++ b/arch/x86/kernel/apic/x2apic_phys.c
+@@ -97,7 +97,10 @@ static void init_x2apic_ldr(void)
+
+ static int x2apic_phys_probe(void)
+ {
+- if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
++ if (!x2apic_mode)
++ return 0;
++
++ if (x2apic_phys || x2apic_fadt_phys())
+ return 1;
+
+ return apic == &apic_x2apic_phys;
+diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
+index f5a48e66e4f54..a6e9c2794ef56 100644
+--- a/arch/x86/kernel/apic/x2apic_uv_x.c
++++ b/arch/x86/kernel/apic/x2apic_uv_x.c
+@@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void)
+ int mmr_shift;
+ char *state;
+
+- /* Different returns from different UV BIOS versions */
++ /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */
++ if (!is_uv(UV2|UV3|UV4)) {
++ mark_tsc_async_resets("UV5+");
++ return;
++ }
++
++ /* UV2,3,4, UV BIOS TSC sync state available */
+ mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
+ mmr_shift =
+ is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
+diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
+index 241dda687eb9f..06978a1194f24 100644
+--- a/arch/x86/kernel/apm_32.c
++++ b/arch/x86/kernel/apm_32.c
+@@ -237,12 +237,6 @@
+ extern int (*console_blank_hook)(int);
+ #endif
+
+-/*
+- * The apm_bios device is one of the misc char devices.
+- * This is its minor number.
+- */
+-#define APM_MINOR_DEV 134
+-
+ /*
+ * Various options can be changed at boot time as follows:
+ * (We allow underscores for compatibility with the modules code)
+diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c
+new file mode 100644
+index 0000000000000..03bb2f343ddb7
+--- /dev/null
++++ b/arch/x86/kernel/cc_platform.c
+@@ -0,0 +1,69 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Confidential Computing Platform Capability checks
++ *
++ * Copyright (C) 2021 Advanced Micro Devices, Inc.
++ *
++ * Author: Tom Lendacky <thomas.lendacky@amd.com>
++ */
++
++#include <linux/export.h>
++#include <linux/cc_platform.h>
++#include <linux/mem_encrypt.h>
++
++#include <asm/processor.h>
++
++static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr)
++{
++#ifdef CONFIG_INTEL_TDX_GUEST
++ return false;
++#else
++ return false;
++#endif
++}
++
++/*
++ * SME and SEV are very similar but they are not the same, so there are
++ * times that the kernel will need to distinguish between SME and SEV. The
++ * cc_platform_has() function is used for this. When a distinction isn't
++ * needed, the CC_ATTR_MEM_ENCRYPT attribute can be used.
++ *
++ * The trampoline code is a good example for this requirement. Before
++ * paging is activated, SME will access all memory as decrypted, but SEV
++ * will access all memory as encrypted. So, when APs are being brought
++ * up under SME the trampoline area cannot be encrypted, whereas under SEV
++ * the trampoline area must be encrypted.
++ */
++static bool amd_cc_platform_has(enum cc_attr attr)
++{
++#ifdef CONFIG_AMD_MEM_ENCRYPT
++ switch (attr) {
++ case CC_ATTR_MEM_ENCRYPT:
++ return sme_me_mask;
++
++ case CC_ATTR_HOST_MEM_ENCRYPT:
++ return sme_me_mask && !(sev_status & MSR_AMD64_SEV_ENABLED);
++
++ case CC_ATTR_GUEST_MEM_ENCRYPT:
++ return sev_status & MSR_AMD64_SEV_ENABLED;
++
++ case CC_ATTR_GUEST_STATE_ENCRYPT:
++ return sev_status & MSR_AMD64_SEV_ES_ENABLED;
++
++ default:
++ return false;
++ }
++#else
++ return false;
++#endif
++}
++
++
++bool cc_platform_has(enum cc_attr attr)
++{
++ if (sme_me_mask)
++ return amd_cc_platform_has(attr);
++
++ return false;
++}
++EXPORT_SYMBOL_GPL(cc_platform_has);
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 2131af9f2fa23..0a0230bd5089a 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -27,11 +27,6 @@
+
+ #include "cpu.h"
+
+-static const int amd_erratum_383[];
+-static const int amd_erratum_400[];
+-static const int amd_erratum_1054[];
+-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
+-
+ /*
+ * nodes_per_socket: Stores the number of nodes per socket.
+ * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
+@@ -39,6 +34,83 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
+ */
+ static u32 nodes_per_socket = 1;
+
++/*
++ * AMD errata checking
++ *
++ * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
++ * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
++ * have an OSVW id assigned, which it takes as first argument. Both take a
++ * variable number of family-specific model-stepping ranges created by
++ * AMD_MODEL_RANGE().
++ *
++ * Example:
++ *
++ * const int amd_erratum_319[] =
++ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
++ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
++ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
++ */
++
++#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
++#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
++#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
++ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
++#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
++#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
++#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
++
++static const int amd_erratum_400[] =
++ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
++ AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
++
++static const int amd_erratum_383[] =
++ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
++
++/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
++static const int amd_erratum_1054[] =
++ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
++
++static const int amd_zenbleed[] =
++ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
++ AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
++ AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
++ AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
++
++static const int amd_div0[] =
++ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
++ AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
++
++static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
++{
++ int osvw_id = *erratum++;
++ u32 range;
++ u32 ms;
++
++ if (osvw_id >= 0 && osvw_id < 65536 &&
++ cpu_has(cpu, X86_FEATURE_OSVW)) {
++ u64 osvw_len;
++
++ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
++ if (osvw_id < osvw_len) {
++ u64 osvw_bits;
++
++ rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
++ osvw_bits);
++ return osvw_bits & (1ULL << (osvw_id & 0x3f));
++ }
++ }
++
++ /* OSVW unavailable or ID unknown, match family-model-stepping range */
++ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
++ while ((range = *erratum++))
++ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
++ (ms >= AMD_MODEL_RANGE_START(range)) &&
++ (ms <= AMD_MODEL_RANGE_END(range)))
++ return true;
++
++ return false;
++}
++
+ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+ {
+ u32 gprs[8] = { 0 };
+@@ -794,8 +866,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
+ set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
+ }
+
+-#define MSR_AMD64_DE_CFG 0xC0011029
+-
+ static void init_amd_ln(struct cpuinfo_x86 *c)
+ {
+ /*
+@@ -886,6 +956,37 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
+ clear_rdrand_cpuid_bit(c);
+ }
+
++void init_spectral_chicken(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_CPU_UNRET_ENTRY
++ u64 value;
++
++ /*
++ * On Zen2 we offer this chicken (bit) on the altar of Speculation.
++ *
++ * This suppresses speculation from the middle of a basic block, i.e. it
++ * suppresses non-branch predictions.
++ *
++ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
++ */
++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
++ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
++ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
++ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
++ }
++ }
++#endif
++ /*
++ * Work around Erratum 1386. The XSAVES instruction malfunctions in
++ * certain circumstances on Zen1/2 uarch, and not all parts have had
++ * updated microcode at the time of writing (March 2023).
++ *
++ * Affected parts all have no supervisor XSAVE states, meaning that
++ * the XSAVEC instruction (which works fine) is equivalent.
++ */
++ clear_cpu_cap(c, X86_FEATURE_XSAVES);
++}
++
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+ {
+ set_cpu_cap(c, X86_FEATURE_ZEN);
+@@ -894,12 +995,62 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
+ node_reclaim_distance = 32;
+ #endif
+
+- /*
+- * Fix erratum 1076: CPB feature bit not being set in CPUID.
+- * Always set it, except when running under a hypervisor.
+- */
+- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
+- set_cpu_cap(c, X86_FEATURE_CPB);
++ /* Fix up CPUID bits, but only if not virtualised. */
++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
++
++ /* Erratum 1076: CPB feature bit not being set in CPUID. */
++ if (!cpu_has(c, X86_FEATURE_CPB))
++ set_cpu_cap(c, X86_FEATURE_CPB);
++
++ /*
++ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
++ * Branch Type Confusion, but predate the allocation of the
++ * BTC_NO bit.
++ */
++ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
++ set_cpu_cap(c, X86_FEATURE_BTC_NO);
++ }
++}
++
++static bool cpu_has_zenbleed_microcode(void)
++{
++ u32 good_rev = 0;
++
++ switch (boot_cpu_data.x86_model) {
++ case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
++ case 0x60 ... 0x67: good_rev = 0x0860010b; break;
++ case 0x68 ... 0x6f: good_rev = 0x08608105; break;
++ case 0x70 ... 0x7f: good_rev = 0x08701032; break;
++ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
++
++ default:
++ return false;
++ break;
++ }
++
++ if (boot_cpu_data.microcode < good_rev)
++ return false;
++
++ return true;
++}
++
++static void zenbleed_check(struct cpuinfo_x86 *c)
++{
++ if (!cpu_has_amd_erratum(c, amd_zenbleed))
++ return;
++
++ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
++ return;
++
++ if (!cpu_has(c, X86_FEATURE_AVX))
++ return;
++
++ if (!cpu_has_zenbleed_microcode()) {
++ pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n");
++ msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
++ } else {
++ msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
++ }
+ }
+
+ static void init_amd(struct cpuinfo_x86 *c)
+@@ -931,7 +1082,8 @@ static void init_amd(struct cpuinfo_x86 *c)
+ case 0x12: init_amd_ln(c); break;
+ case 0x15: init_amd_bd(c); break;
+ case 0x16: init_amd_jg(c); break;
+- case 0x17: fallthrough;
++ case 0x17: init_spectral_chicken(c);
++ fallthrough;
+ case 0x19: init_amd_zn(c); break;
+ }
+
+@@ -958,8 +1110,8 @@ static void init_amd(struct cpuinfo_x86 *c)
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+- msr_set_bit(MSR_F10H_DECFG,
+- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
++ msr_set_bit(MSR_AMD64_DE_CFG,
++ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT);
+
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+@@ -989,6 +1141,15 @@ static void init_amd(struct cpuinfo_x86 *c)
+ if (cpu_has(c, X86_FEATURE_IRPERF) &&
+ !cpu_has_amd_erratum(c, amd_erratum_1054))
+ msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
++
++ check_null_seg_clears_base(c);
++
++ zenbleed_check(c);
++
++ if (cpu_has_amd_erratum(c, amd_div0)) {
++ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
++ setup_force_cpu_bug(X86_BUG_DIV0);
++ }
+ }
+
+ #ifdef CONFIG_X86_32
+@@ -1084,73 +1245,6 @@ static const struct cpu_dev amd_cpu_dev = {
+
+ cpu_dev_register(amd_cpu_dev);
+
+-/*
+- * AMD errata checking
+- *
+- * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
+- * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
+- * have an OSVW id assigned, which it takes as first argument. Both take a
+- * variable number of family-specific model-stepping ranges created by
+- * AMD_MODEL_RANGE().
+- *
+- * Example:
+- *
+- * const int amd_erratum_319[] =
+- * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
+- * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
+- * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
+- */
+-
+-#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
+-#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+-#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+- ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+-#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+-#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+-#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+-
+-static const int amd_erratum_400[] =
+- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+- AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+-
+-static const int amd_erratum_383[] =
+- AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+-
+-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+-static const int amd_erratum_1054[] =
+- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+-
+-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+-{
+- int osvw_id = *erratum++;
+- u32 range;
+- u32 ms;
+-
+- if (osvw_id >= 0 && osvw_id < 65536 &&
+- cpu_has(cpu, X86_FEATURE_OSVW)) {
+- u64 osvw_len;
+-
+- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
+- if (osvw_id < osvw_len) {
+- u64 osvw_bits;
+-
+- rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
+- osvw_bits);
+- return osvw_bits & (1ULL << (osvw_id & 0x3f));
+- }
+- }
+-
+- /* OSVW unavailable or ID unknown, match family-model-stepping range */
+- ms = (cpu->x86_model << 4) | cpu->x86_stepping;
+- while ((range = *erratum++))
+- if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+- (ms >= AMD_MODEL_RANGE_START(range)) &&
+- (ms <= AMD_MODEL_RANGE_END(range)))
+- return true;
+-
+- return false;
+-}
+-
+ void set_dr_addr_mask(unsigned long mask, int dr)
+ {
+ if (!boot_cpu_has(X86_FEATURE_BPEXT))
+@@ -1185,3 +1279,45 @@ u32 amd_get_highest_perf(void)
+ return 255;
+ }
+ EXPORT_SYMBOL_GPL(amd_get_highest_perf);
++
++bool cpu_has_ibpb_brtype_microcode(void)
++{
++ switch (boot_cpu_data.x86) {
++ /* Zen1/2 IBPB flushes branch type predictions too. */
++ case 0x17:
++ return boot_cpu_has(X86_FEATURE_AMD_IBPB);
++ case 0x19:
++ /* Poke the MSR bit on Zen3/4 to check its presence. */
++ if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
++ setup_force_cpu_cap(X86_FEATURE_SBPB);
++ return true;
++ } else {
++ return false;
++ }
++ default:
++ return false;
++ }
++}
++
++static void zenbleed_check_cpu(void *unused)
++{
++ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
++
++ zenbleed_check(c);
++}
++
++void amd_check_microcode(void)
++{
++ on_each_cpu(zenbleed_check_cpu, NULL, 1);
++}
++
++/*
++ * Issue a DIV 0/1 insn to clear any division data from previous DIV
++ * operations.
++ */
++void noinstr amd_clear_divider(void)
++{
++ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
++ :: "a" (0), "d" (0), "r" (1));
++}
++EXPORT_SYMBOL_GPL(amd_clear_divider);
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index ecfca3bbcd968..0d2c5fe841414 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -9,13 +9,13 @@
+ * - Andrew D. Balsa (code cleanup).
+ */
+ #include <linux/init.h>
+-#include <linux/utsname.h>
+ #include <linux/cpu.h>
+ #include <linux/module.h>
+ #include <linux/nospec.h>
+ #include <linux/prctl.h>
+ #include <linux/sched/smt.h>
+ #include <linux/pgtable.h>
++#include <linux/bpf.h>
+
+ #include <asm/spec-ctrl.h>
+ #include <asm/cmdline.h>
+@@ -26,8 +26,6 @@
+ #include <asm/msr.h>
+ #include <asm/vmx.h>
+ #include <asm/paravirt.h>
+-#include <asm/alternative.h>
+-#include <asm/set_memory.h>
+ #include <asm/intel-family.h>
+ #include <asm/e820/api.h>
+ #include <asm/hypervisor.h>
+@@ -37,24 +35,66 @@
+
+ static void __init spectre_v1_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
++static void __init retbleed_select_mitigation(void);
++static void __init spectre_v2_user_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ static void __init mds_select_mitigation(void);
+-static void __init mds_print_mitigation(void);
++static void __init md_clear_update_mitigation(void);
++static void __init md_clear_select_mitigation(void);
+ static void __init taa_select_mitigation(void);
++static void __init mmio_select_mitigation(void);
+ static void __init srbds_select_mitigation(void);
+ static void __init l1d_flush_select_mitigation(void);
++static void __init gds_select_mitigation(void);
++static void __init srso_select_mitigation(void);
+
+-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
++/* The base value of the SPEC_CTRL MSR without task-specific bits set */
+ u64 x86_spec_ctrl_base;
+ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
++
++/* The current value of the SPEC_CTRL MSR with task-specific bits set */
++DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
++
++u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
++EXPORT_SYMBOL_GPL(x86_pred_cmd);
++
+ static DEFINE_MUTEX(spec_ctrl_mutex);
+
++void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
++
++/* Update SPEC_CTRL MSR and its cached copy unconditionally */
++static void update_spec_ctrl(u64 val)
++{
++ this_cpu_write(x86_spec_ctrl_current, val);
++ wrmsrl(MSR_IA32_SPEC_CTRL, val);
++}
++
+ /*
+- * The vendor and possibly platform specific bits which can be modified in
+- * x86_spec_ctrl_base.
++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
+ */
+-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
++void update_spec_ctrl_cond(u64 val)
++{
++ if (this_cpu_read(x86_spec_ctrl_current) == val)
++ return;
++
++ this_cpu_write(x86_spec_ctrl_current, val);
++
++ /*
++ * When KERNEL_IBRS this MSR is written on return-to-user, unless
++ * forced the update can be delayed until that time.
++ */
++ if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
++ wrmsrl(MSR_IA32_SPEC_CTRL, val);
++}
++
++u64 spec_ctrl_current(void)
++{
++ return this_cpu_read(x86_spec_ctrl_current);
++}
++EXPORT_SYMBOL_GPL(spec_ctrl_current);
+
+ /*
+ * AMD specific MSR info for Speculative Store Bypass control.
+@@ -84,108 +124,68 @@ EXPORT_SYMBOL_GPL(mds_idle_clear);
+ */
+ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+
+-void __init check_bugs(void)
+-{
+- identify_boot_cpu();
+-
+- /*
+- * identify_boot_cpu() initialized SMT support information, let the
+- * core code know.
+- */
+- cpu_smt_check_topology();
+-
+- if (!IS_ENABLED(CONFIG_SMP)) {
+- pr_info("CPU: ");
+- print_cpu_info(&boot_cpu_data);
+- }
++/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */
++DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
++EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
+
++void __init cpu_select_mitigations(void)
++{
+ /*
+ * Read the SPEC_CTRL MSR to account for reserved bits which may
+ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+ * init code as it is not enumerated and depends on the family.
+ */
+- if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
++ if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) {
+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+- /* Allow STIBP in MSR_SPEC_CTRL if supported */
+- if (boot_cpu_has(X86_FEATURE_STIBP))
+- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
++ /*
++ * Previously running kernel (kexec), may have some controls
++ * turned ON. Clear them and let the mitigations setup below
++ * rediscover them based on configuration.
++ */
++ x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
++ }
+
+ /* Select the proper CPU mitigations before patching alternatives: */
+ spectre_v1_select_mitigation();
+ spectre_v2_select_mitigation();
+- ssb_select_mitigation();
+- l1tf_select_mitigation();
+- mds_select_mitigation();
+- taa_select_mitigation();
+- srbds_select_mitigation();
+- l1d_flush_select_mitigation();
+-
+ /*
+- * As MDS and TAA mitigations are inter-related, print MDS
+- * mitigation until after TAA mitigation selection is done.
++ * retbleed_select_mitigation() relies on the state set by
++ * spectre_v2_select_mitigation(); specifically it wants to know about
++ * spectre_v2=ibrs.
+ */
+- mds_print_mitigation();
+-
+- arch_smt_update();
+-
+-#ifdef CONFIG_X86_32
++ retbleed_select_mitigation();
+ /*
+- * Check whether we are able to run this kernel safely on SMP.
+- *
+- * - i386 is no longer supported.
+- * - In order to run on anything without a TSC, we need to be
+- * compiled for a i486.
++ * spectre_v2_user_select_mitigation() relies on the state set by
++ * retbleed_select_mitigation(); specifically the STIBP selection is
++ * forced for UNRET or IBPB.
+ */
+- if (boot_cpu_data.x86 < 4)
+- panic("Kernel requires i486+ for 'invlpg' and other features");
+-
+- init_utsname()->machine[1] =
+- '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+- alternative_instructions();
+-
+- fpu__init_check_bugs();
+-#else /* CONFIG_X86_64 */
+- alternative_instructions();
++ spectre_v2_user_select_mitigation();
++ ssb_select_mitigation();
++ l1tf_select_mitigation();
++ md_clear_select_mitigation();
++ srbds_select_mitigation();
++ l1d_flush_select_mitigation();
+
+ /*
+- * Make sure the first 2MB area is not mapped by huge pages
+- * There are typically fixed size MTRRs in there and overlapping
+- * MTRRs into large pages causes slow downs.
+- *
+- * Right now we don't do that with gbpages because there seems
+- * very little benefit for that case.
++ * srso_select_mitigation() depends and must run after
++ * retbleed_select_mitigation().
+ */
+- if (!direct_gbpages)
+- set_memory_4k((unsigned long)__va(0), 1);
+-#endif
++ srso_select_mitigation();
++ gds_select_mitigation();
+ }
+
++/*
++ * NOTE: For VMX, this function is not called in the vmexit path.
++ * It uses vmx_spec_ctrl_restore_host() instead.
++ */
+ void
+ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+ {
+- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
++ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+ struct thread_info *ti = current_thread_info();
+
+- /* Is MSR_SPEC_CTRL implemented ? */
+ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+- /*
+- * Restrict guest_spec_ctrl to supported values. Clear the
+- * modifiable bits in the host base value and or the
+- * modifiable bits from the guest value.
+- */
+- guestval = hostval & ~x86_spec_ctrl_mask;
+- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+-
+- /* SSBD controlled in MSR_SPEC_CTRL */
+- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+- static_cpu_has(X86_FEATURE_AMD_SSBD))
+- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+-
+- /* Conditional STIBP enabled? */
+- if (static_branch_unlikely(&switch_to_cond_stibp))
+- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
+-
+ if (hostval != guestval) {
+ msrval = setguest ? guestval : hostval;
+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+@@ -266,14 +266,6 @@ static void __init mds_select_mitigation(void)
+ }
+ }
+
+-static void __init mds_print_mitigation(void)
+-{
+- if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
+- return;
+-
+- pr_info("%s\n", mds_strings[mds_mitigation]);
+-}
+-
+ static int __init mds_cmdline(char *str)
+ {
+ if (!boot_cpu_has_bug(X86_BUG_MDS))
+@@ -328,7 +320,7 @@ static void __init taa_select_mitigation(void)
+ /* TSX previously disabled by tsx=off */
+ if (!boot_cpu_has(X86_FEATURE_RTM)) {
+ taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
+- goto out;
++ return;
+ }
+
+ if (cpu_mitigations_off()) {
+@@ -342,7 +334,7 @@ static void __init taa_select_mitigation(void)
+ */
+ if (taa_mitigation == TAA_MITIGATION_OFF &&
+ mds_mitigation == MDS_MITIGATION_OFF)
+- goto out;
++ return;
+
+ if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+ taa_mitigation = TAA_MITIGATION_VERW;
+@@ -374,18 +366,6 @@ static void __init taa_select_mitigation(void)
+
+ if (taa_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+-
+- /*
+- * Update MDS mitigation, if necessary, as the mds_user_clear is
+- * now enabled for TAA mitigation.
+- */
+- if (mds_mitigation == MDS_MITIGATION_OFF &&
+- boot_cpu_has_bug(X86_BUG_MDS)) {
+- mds_mitigation = MDS_MITIGATION_FULL;
+- mds_select_mitigation();
+- }
+-out:
+- pr_info("%s\n", taa_strings[taa_mitigation]);
+ }
+
+ static int __init tsx_async_abort_parse_cmdline(char *str)
+@@ -409,6 +389,154 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
+ }
+ early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
+
++#undef pr_fmt
++#define pr_fmt(fmt) "MMIO Stale Data: " fmt
++
++enum mmio_mitigations {
++ MMIO_MITIGATION_OFF,
++ MMIO_MITIGATION_UCODE_NEEDED,
++ MMIO_MITIGATION_VERW,
++};
++
++/* Default mitigation for Processor MMIO Stale Data vulnerabilities */
++static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW;
++static bool mmio_nosmt __ro_after_init = false;
++
++static const char * const mmio_strings[] = {
++ [MMIO_MITIGATION_OFF] = "Vulnerable",
++ [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
++ [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
++};
++
++static void __init mmio_select_mitigation(void)
++{
++ u64 ia32_cap;
++
++ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
++ boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
++ cpu_mitigations_off()) {
++ mmio_mitigation = MMIO_MITIGATION_OFF;
++ return;
++ }
++
++ if (mmio_mitigation == MMIO_MITIGATION_OFF)
++ return;
++
++ ia32_cap = x86_read_arch_cap_msr();
++
++ /*
++ * Enable CPU buffer clear mitigation for host and VMM, if also affected
++ * by MDS or TAA. Otherwise, enable mitigation for VMM only.
++ */
++ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
++ boot_cpu_has(X86_FEATURE_RTM)))
++ static_branch_enable(&mds_user_clear);
++ else
++ static_branch_enable(&mmio_stale_data_clear);
++
++ /*
++ * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can
++ * be propagated to uncore buffers, clearing the Fill buffers on idle
++ * is required irrespective of SMT state.
++ */
++ if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
++ static_branch_enable(&mds_idle_clear);
++
++ /*
++ * Check if the system has the right microcode.
++ *
++ * CPU Fill buffer clear mitigation is enumerated by either an explicit
++ * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
++ * affected systems.
++ */
++ if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
++ (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
++ boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
++ !(ia32_cap & ARCH_CAP_MDS_NO)))
++ mmio_mitigation = MMIO_MITIGATION_VERW;
++ else
++ mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
++
++ if (mmio_nosmt || cpu_mitigations_auto_nosmt())
++ cpu_smt_disable(false);
++}
++
++static int __init mmio_stale_data_parse_cmdline(char *str)
++{
++ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
++ return 0;
++
++ if (!str)
++ return -EINVAL;
++
++ if (!strcmp(str, "off")) {
++ mmio_mitigation = MMIO_MITIGATION_OFF;
++ } else if (!strcmp(str, "full")) {
++ mmio_mitigation = MMIO_MITIGATION_VERW;
++ } else if (!strcmp(str, "full,nosmt")) {
++ mmio_mitigation = MMIO_MITIGATION_VERW;
++ mmio_nosmt = true;
++ }
++
++ return 0;
++}
++early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
++
++#undef pr_fmt
++#define pr_fmt(fmt) "" fmt
++
++static void __init md_clear_update_mitigation(void)
++{
++ if (cpu_mitigations_off())
++ return;
++
++ if (!static_key_enabled(&mds_user_clear))
++ goto out;
++
++ /*
++ * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
++ * mitigation, if necessary.
++ */
++ if (mds_mitigation == MDS_MITIGATION_OFF &&
++ boot_cpu_has_bug(X86_BUG_MDS)) {
++ mds_mitigation = MDS_MITIGATION_FULL;
++ mds_select_mitigation();
++ }
++ if (taa_mitigation == TAA_MITIGATION_OFF &&
++ boot_cpu_has_bug(X86_BUG_TAA)) {
++ taa_mitigation = TAA_MITIGATION_VERW;
++ taa_select_mitigation();
++ }
++ if (mmio_mitigation == MMIO_MITIGATION_OFF &&
++ boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
++ mmio_mitigation = MMIO_MITIGATION_VERW;
++ mmio_select_mitigation();
++ }
++out:
++ if (boot_cpu_has_bug(X86_BUG_MDS))
++ pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
++ if (boot_cpu_has_bug(X86_BUG_TAA))
++ pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
++ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
++ pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
++ else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++ pr_info("MMIO Stale Data: Unknown: No mitigations\n");
++}
++
++static void __init md_clear_select_mitigation(void)
++{
++ mds_select_mitigation();
++ taa_select_mitigation();
++ mmio_select_mitigation();
++
++ /*
++ * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
++ * and print their mitigation after MDS, TAA and MMIO Stale Data
++ * mitigation selection is done.
++ */
++ md_clear_update_mitigation();
++}
++
+ #undef pr_fmt
+ #define pr_fmt(fmt) "SRBDS: " fmt
+
+@@ -470,11 +598,13 @@ static void __init srbds_select_mitigation(void)
+ return;
+
+ /*
+- * Check to see if this is one of the MDS_NO systems supporting
+- * TSX that are only exposed to SRBDS when TSX is enabled.
++ * Check to see if this is one of the MDS_NO systems supporting TSX that
++ * are only exposed to SRBDS when TSX is enabled or when CPU is affected
++ * by Processor MMIO Stale Data vulnerability.
+ */
+ ia32_cap = x86_read_arch_cap_msr();
+- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
++ if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
++ !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
+ else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
+@@ -528,6 +658,149 @@ static int __init l1d_flush_parse_cmdline(char *str)
+ }
+ early_param("l1d_flush", l1d_flush_parse_cmdline);
+
++#undef pr_fmt
++#define pr_fmt(fmt) "GDS: " fmt
++
++enum gds_mitigations {
++ GDS_MITIGATION_OFF,
++ GDS_MITIGATION_UCODE_NEEDED,
++ GDS_MITIGATION_FORCE,
++ GDS_MITIGATION_FULL,
++ GDS_MITIGATION_FULL_LOCKED,
++ GDS_MITIGATION_HYPERVISOR,
++};
++
++#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
++static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
++#else
++static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
++#endif
++
++static const char * const gds_strings[] = {
++ [GDS_MITIGATION_OFF] = "Vulnerable",
++ [GDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
++ [GDS_MITIGATION_FORCE] = "Mitigation: AVX disabled, no microcode",
++ [GDS_MITIGATION_FULL] = "Mitigation: Microcode",
++ [GDS_MITIGATION_FULL_LOCKED] = "Mitigation: Microcode (locked)",
++ [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status",
++};
++
++bool gds_ucode_mitigated(void)
++{
++ return (gds_mitigation == GDS_MITIGATION_FULL ||
++ gds_mitigation == GDS_MITIGATION_FULL_LOCKED);
++}
++EXPORT_SYMBOL_GPL(gds_ucode_mitigated);
++
++void update_gds_msr(void)
++{
++ u64 mcu_ctrl_after;
++ u64 mcu_ctrl;
++
++ switch (gds_mitigation) {
++ case GDS_MITIGATION_OFF:
++ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
++ mcu_ctrl |= GDS_MITG_DIS;
++ break;
++ case GDS_MITIGATION_FULL_LOCKED:
++ /*
++ * The LOCKED state comes from the boot CPU. APs might not have
++ * the same state. Make sure the mitigation is enabled on all
++ * CPUs.
++ */
++ case GDS_MITIGATION_FULL:
++ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
++ mcu_ctrl &= ~GDS_MITG_DIS;
++ break;
++ case GDS_MITIGATION_FORCE:
++ case GDS_MITIGATION_UCODE_NEEDED:
++ case GDS_MITIGATION_HYPERVISOR:
++ return;
++ };
++
++ wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
++
++ /*
++ * Check to make sure that the WRMSR value was not ignored. Writes to
++ * GDS_MITG_DIS will be ignored if this processor is locked but the boot
++ * processor was not.
++ */
++ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
++ WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
++}
++
++static void __init gds_select_mitigation(void)
++{
++ u64 mcu_ctrl;
++
++ if (!boot_cpu_has_bug(X86_BUG_GDS))
++ return;
++
++ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
++ gds_mitigation = GDS_MITIGATION_HYPERVISOR;
++ goto out;
++ }
++
++ if (cpu_mitigations_off())
++ gds_mitigation = GDS_MITIGATION_OFF;
++ /* Will verify below that mitigation _can_ be disabled */
++
++ /* No microcode */
++ if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
++ if (gds_mitigation == GDS_MITIGATION_FORCE) {
++ /*
++ * This only needs to be done on the boot CPU so do it
++ * here rather than in update_gds_msr()
++ */
++ setup_clear_cpu_cap(X86_FEATURE_AVX);
++ pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
++ } else {
++ gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
++ }
++ goto out;
++ }
++
++ /* Microcode has mitigation, use it */
++ if (gds_mitigation == GDS_MITIGATION_FORCE)
++ gds_mitigation = GDS_MITIGATION_FULL;
++
++ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
++ if (mcu_ctrl & GDS_MITG_LOCKED) {
++ if (gds_mitigation == GDS_MITIGATION_OFF)
++ pr_warn("Mitigation locked. Disable failed.\n");
++
++ /*
++ * The mitigation is selected from the boot CPU. All other CPUs
++ * _should_ have the same state. If the boot CPU isn't locked
++ * but others are then update_gds_msr() will WARN() of the state
++ * mismatch. If the boot CPU is locked update_gds_msr() will
++ * ensure the other CPUs have the mitigation enabled.
++ */
++ gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
++ }
++
++ update_gds_msr();
++out:
++ pr_info("%s\n", gds_strings[gds_mitigation]);
++}
++
++static int __init gds_parse_cmdline(char *str)
++{
++ if (!str)
++ return -EINVAL;
++
++ if (!boot_cpu_has_bug(X86_BUG_GDS))
++ return 0;
++
++ if (!strcmp(str, "off"))
++ gds_mitigation = GDS_MITIGATION_OFF;
++ else if (!strcmp(str, "force"))
++ gds_mitigation = GDS_MITIGATION_FORCE;
++
++ return 0;
++}
++early_param("gather_data_sampling", gds_parse_cmdline);
++
+ #undef pr_fmt
+ #define pr_fmt(fmt) "Spectre V1 : " fmt
+
+@@ -608,22 +881,193 @@ static void __init spectre_v1_select_mitigation(void)
+ }
+ }
+
+- pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
+-}
+-
+-static int __init nospectre_v1_cmdline(char *str)
+-{
+- spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
+- return 0;
++ pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
++}
++
++static int __init nospectre_v1_cmdline(char *str)
++{
++ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
++ return 0;
++}
++early_param("nospectre_v1", nospectre_v1_cmdline);
++
++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
++ SPECTRE_V2_NONE;
++
++#undef pr_fmt
++#define pr_fmt(fmt) "RETBleed: " fmt
++
++enum retbleed_mitigation {
++ RETBLEED_MITIGATION_NONE,
++ RETBLEED_MITIGATION_UNRET,
++ RETBLEED_MITIGATION_IBPB,
++ RETBLEED_MITIGATION_IBRS,
++ RETBLEED_MITIGATION_EIBRS,
++};
++
++enum retbleed_mitigation_cmd {
++ RETBLEED_CMD_OFF,
++ RETBLEED_CMD_AUTO,
++ RETBLEED_CMD_UNRET,
++ RETBLEED_CMD_IBPB,
++};
++
++const char * const retbleed_strings[] = {
++ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
++ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
++ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
++ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
++ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
++};
++
++static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
++ RETBLEED_MITIGATION_NONE;
++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
++ RETBLEED_CMD_AUTO;
++
++static int __ro_after_init retbleed_nosmt = false;
++
++static int __init retbleed_parse_cmdline(char *str)
++{
++ if (!str)
++ return -EINVAL;
++
++ while (str) {
++ char *next = strchr(str, ',');
++ if (next) {
++ *next = 0;
++ next++;
++ }
++
++ if (!strcmp(str, "off")) {
++ retbleed_cmd = RETBLEED_CMD_OFF;
++ } else if (!strcmp(str, "auto")) {
++ retbleed_cmd = RETBLEED_CMD_AUTO;
++ } else if (!strcmp(str, "unret")) {
++ retbleed_cmd = RETBLEED_CMD_UNRET;
++ } else if (!strcmp(str, "ibpb")) {
++ retbleed_cmd = RETBLEED_CMD_IBPB;
++ } else if (!strcmp(str, "nosmt")) {
++ retbleed_nosmt = true;
++ } else {
++ pr_err("Ignoring unknown retbleed option (%s).", str);
++ }
++
++ str = next;
++ }
++
++ return 0;
++}
++early_param("retbleed", retbleed_parse_cmdline);
++
++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
++
++static void __init retbleed_select_mitigation(void)
++{
++ bool mitigate_smt = false;
++
++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
++ return;
++
++ switch (retbleed_cmd) {
++ case RETBLEED_CMD_OFF:
++ return;
++
++ case RETBLEED_CMD_UNRET:
++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ } else {
++ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
++ goto do_cmd_auto;
++ }
++ break;
++
++ case RETBLEED_CMD_IBPB:
++ if (!boot_cpu_has(X86_FEATURE_IBPB)) {
++ pr_err("WARNING: CPU does not support IBPB.\n");
++ goto do_cmd_auto;
++ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ } else {
++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
++ goto do_cmd_auto;
++ }
++ break;
++
++do_cmd_auto:
++ case RETBLEED_CMD_AUTO:
++ default:
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
++ }
++
++ /*
++ * The Intel mitigation (IBRS or eIBRS) was already selected in
++ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will
++ * be set accordingly below.
++ */
++
++ break;
++ }
++
++ switch (retbleed_mitigation) {
++ case RETBLEED_MITIGATION_UNRET:
++ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
++ setup_force_cpu_cap(X86_FEATURE_UNRET);
++
++ if (IS_ENABLED(CONFIG_RETHUNK))
++ x86_return_thunk = retbleed_return_thunk;
++
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++ pr_err(RETBLEED_UNTRAIN_MSG);
++
++ mitigate_smt = true;
++ break;
++
++ case RETBLEED_MITIGATION_IBPB:
++ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++ mitigate_smt = true;
++ break;
++
++ default:
++ break;
++ }
++
++ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
++ (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
++ cpu_smt_disable(false);
++
++ /*
++ * Let IBRS trump all on Intel without affecting the effects of the
++ * retbleed= cmdline option.
++ */
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++ switch (spectre_v2_enabled) {
++ case SPECTRE_V2_IBRS:
++ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
++ break;
++ case SPECTRE_V2_EIBRS:
++ case SPECTRE_V2_EIBRS_RETPOLINE:
++ case SPECTRE_V2_EIBRS_LFENCE:
++ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
++ break;
++ default:
++ pr_err(RETBLEED_INTEL_MSG);
++ }
++ }
++
++ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+ }
+-early_param("nospectre_v1", nospectre_v1_cmdline);
+
+ #undef pr_fmt
+ #define pr_fmt(fmt) "Spectre V2 : " fmt
+
+-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+- SPECTRE_V2_NONE;
+-
+ static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+ SPECTRE_V2_USER_NONE;
+ static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
+@@ -650,6 +1094,33 @@ static inline const char *spectre_v2_module_string(void)
+ static inline const char *spectre_v2_module_string(void) { return ""; }
+ #endif
+
++#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
++#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
++#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
++#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
++
++#ifdef CONFIG_BPF_SYSCALL
++void unpriv_ebpf_notify(int new_state)
++{
++ if (new_state)
++ return;
++
++ /* Unprivileged eBPF is enabled */
++
++ switch (spectre_v2_enabled) {
++ case SPECTRE_V2_EIBRS:
++ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
++ break;
++ case SPECTRE_V2_EIBRS_LFENCE:
++ if (sched_smt_active())
++ pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
++ break;
++ default:
++ break;
++ }
++}
++#endif
++
+ static inline bool match_option(const char *arg, int arglen, const char *opt)
+ {
+ int len = strlen(opt);
+@@ -664,7 +1135,11 @@ enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+- SPECTRE_V2_CMD_RETPOLINE_AMD,
++ SPECTRE_V2_CMD_RETPOLINE_LFENCE,
++ SPECTRE_V2_CMD_EIBRS,
++ SPECTRE_V2_CMD_EIBRS_RETPOLINE,
++ SPECTRE_V2_CMD_EIBRS_LFENCE,
++ SPECTRE_V2_CMD_IBRS,
+ };
+
+ enum spectre_v2_user_cmd {
+@@ -705,13 +1180,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
+ pr_info("spectre_v2_user=%s forced on command line.\n", reason);
+ }
+
++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
++
+ static enum spectre_v2_user_cmd __init
+-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_parse_user_cmdline(void)
+ {
+ char arg[20];
+ int ret, i;
+
+- switch (v2_cmd) {
++ switch (spectre_v2_cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return SPECTRE_V2_USER_CMD_NONE;
+ case SPECTRE_V2_CMD_FORCE:
+@@ -737,8 +1214,20 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+ return SPECTRE_V2_USER_CMD_AUTO;
+ }
+
++static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
++{
++ return mode == SPECTRE_V2_EIBRS ||
++ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++ mode == SPECTRE_V2_EIBRS_LFENCE;
++}
++
++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
++{
++ return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS;
++}
++
+ static void __init
+-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
++spectre_v2_user_select_mitigation(void)
+ {
+ enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
+ bool smt_possible = IS_ENABLED(CONFIG_SMP);
+@@ -751,7 +1240,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+ cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+ smt_possible = false;
+
+- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
++ cmd = spectre_v2_parse_user_cmdline();
+ switch (cmd) {
+ case SPECTRE_V2_USER_CMD_NONE:
+ goto set_mode;
+@@ -799,12 +1288,19 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+ }
+
+ /*
+- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
+- * required.
++ * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
++ * is not required.
++ *
++ * Enhanced IBRS also protects against cross-thread branch target
++ * injection in user-mode as the IBRS bit remains always set which
++ * implicitly enables cross-thread protections. However, in legacy IBRS
++ * mode, the IBRS bit is set only on kernel entry and cleared on return
++ * to userspace. This disables the implicit cross-thread protection,
++ * so allow for STIBP to be selected in that case.
+ */
+ if (!boot_cpu_has(X86_FEATURE_STIBP) ||
+ !smt_possible ||
+- spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++ spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ return;
+
+ /*
+@@ -816,6 +1312,14 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
++ retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
++ if (mode != SPECTRE_V2_USER_STRICT &&
++ mode != SPECTRE_V2_USER_STRICT_PREFERRED)
++ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n");
++ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
++ }
++
+ spectre_v2_user_stibp = mode;
+
+ set_mode:
+@@ -824,9 +1328,12 @@ set_mode:
+
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
++ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
++ [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
++ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
++ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
++ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
+ };
+
+ static const struct {
+@@ -837,9 +1344,14 @@ static const struct {
+ { "off", SPECTRE_V2_CMD_NONE, false },
+ { "on", SPECTRE_V2_CMD_FORCE, true },
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
++ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
++ { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
++ { "eibrs", SPECTRE_V2_CMD_EIBRS, false },
++ { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
++ { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
++ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
+ };
+
+ static void __init spec_v2_print_cond(const char *reason, bool secure)
+@@ -875,17 +1387,54 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
+- cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
+- cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
++ cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
++ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
++ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
++ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+ !IS_ENABLED(CONFIG_RETPOLINE)) {
+- pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
++ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if ((cmd == SPECTRE_V2_CMD_EIBRS ||
++ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
++ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
++ !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
++ pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
++ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
++ !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
++ pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
++ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
++ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
++ mitigation_options[i].option);
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
++ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
++ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+- if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
+- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON &&
+- boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
++ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
++ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+@@ -894,6 +1443,79 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ return cmd;
+ }
+
++static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
++{
++ if (!IS_ENABLED(CONFIG_RETPOLINE)) {
++ pr_err("Kernel not compiled with retpoline; no mitigation available!");
++ return SPECTRE_V2_NONE;
++ }
++
++ return SPECTRE_V2_RETPOLINE;
++}
++
++/* Disable in-kernel use of non-RSB RET predictors */
++static void __init spec_ctrl_disable_kernel_rrsba(void)
++{
++ u64 ia32_cap;
++
++ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
++ return;
++
++ ia32_cap = x86_read_arch_cap_msr();
++
++ if (ia32_cap & ARCH_CAP_RRSBA) {
++ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
++ update_spec_ctrl(x86_spec_ctrl_base);
++ }
++}
++
++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
++{
++ /*
++ * Similar to context switches, there are two types of RSB attacks
++ * after VM exit:
++ *
++ * 1) RSB underflow
++ *
++ * 2) Poisoned RSB entry
++ *
++ * When retpoline is enabled, both are mitigated by filling/clearing
++ * the RSB.
++ *
++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
++ * prediction isolation protections, RSB still needs to be cleared
++ * because of #2. Note that SMEP provides no protection here, unlike
++ * user-space-poisoned RSB entries.
++ *
++ * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB
++ * bug is present then a LITE version of RSB protection is required,
++ * just a single call needs to retire before a RET is executed.
++ */
++ switch (mode) {
++ case SPECTRE_V2_NONE:
++ return;
++
++ case SPECTRE_V2_EIBRS_LFENCE:
++ case SPECTRE_V2_EIBRS:
++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
++ pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
++ }
++ return;
++
++ case SPECTRE_V2_EIBRS_RETPOLINE:
++ case SPECTRE_V2_RETPOLINE:
++ case SPECTRE_V2_LFENCE:
++ case SPECTRE_V2_IBRS:
++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
++ pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
++ return;
++ }
++
++ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
++ dump_stack();
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -914,86 +1536,172 @@ static void __init spectre_v2_select_mitigation(void)
+ case SPECTRE_V2_CMD_FORCE:
+ case SPECTRE_V2_CMD_AUTO:
+ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+- mode = SPECTRE_V2_IBRS_ENHANCED;
+- /* Force it so VMEXIT will restore correctly */
+- x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+- goto specv2_set_mode;
++ mode = SPECTRE_V2_EIBRS;
++ break;
++ }
++
++ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
++ boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++ retbleed_cmd != RETBLEED_CMD_OFF &&
++ boot_cpu_has(X86_FEATURE_IBRS) &&
++ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
++ mode = SPECTRE_V2_IBRS;
++ break;
+ }
+- if (IS_ENABLED(CONFIG_RETPOLINE))
+- goto retpoline_auto;
++
++ mode = spectre_v2_select_retpoline();
+ break;
+- case SPECTRE_V2_CMD_RETPOLINE_AMD:
+- if (IS_ENABLED(CONFIG_RETPOLINE))
+- goto retpoline_amd;
++
++ case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
++ pr_err(SPECTRE_V2_LFENCE_MSG);
++ mode = SPECTRE_V2_LFENCE;
+ break;
++
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+- if (IS_ENABLED(CONFIG_RETPOLINE))
+- goto retpoline_generic;
++ mode = SPECTRE_V2_RETPOLINE;
+ break;
++
+ case SPECTRE_V2_CMD_RETPOLINE:
+- if (IS_ENABLED(CONFIG_RETPOLINE))
+- goto retpoline_auto;
++ mode = spectre_v2_select_retpoline();
++ break;
++
++ case SPECTRE_V2_CMD_IBRS:
++ mode = SPECTRE_V2_IBRS;
++ break;
++
++ case SPECTRE_V2_CMD_EIBRS:
++ mode = SPECTRE_V2_EIBRS;
++ break;
++
++ case SPECTRE_V2_CMD_EIBRS_LFENCE:
++ mode = SPECTRE_V2_EIBRS_LFENCE;
++ break;
++
++ case SPECTRE_V2_CMD_EIBRS_RETPOLINE:
++ mode = SPECTRE_V2_EIBRS_RETPOLINE;
+ break;
+ }
+- pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
+- return;
+
+-retpoline_auto:
+- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+- retpoline_amd:
+- if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+- pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
+- goto retpoline_generic;
+- }
+- mode = SPECTRE_V2_RETPOLINE_AMD;
+- setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+- setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+- } else {
+- retpoline_generic:
+- mode = SPECTRE_V2_RETPOLINE_GENERIC;
++ if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
++ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
++
++ if (spectre_v2_in_ibrs_mode(mode)) {
++ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
++ update_spec_ctrl(x86_spec_ctrl_base);
++ }
++
++ switch (mode) {
++ case SPECTRE_V2_NONE:
++ case SPECTRE_V2_EIBRS:
++ break;
++
++ case SPECTRE_V2_IBRS:
++ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
++ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
++ pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
++ break;
++
++ case SPECTRE_V2_LFENCE:
++ case SPECTRE_V2_EIBRS_LFENCE:
++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
++ fallthrough;
++
++ case SPECTRE_V2_RETPOLINE:
++ case SPECTRE_V2_EIBRS_RETPOLINE:
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
++ break;
+ }
+
+-specv2_set_mode:
++ /*
++ * Disable alternate RSB predictions in kernel when indirect CALLs and
++ * JMPs gets protection against BHI and Intramode-BTI, but RET
++ * prediction from a non-RSB predictor is still a risk.
++ */
++ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
++ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
++ mode == SPECTRE_V2_RETPOLINE)
++ spec_ctrl_disable_kernel_rrsba();
++
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+- * If spectre v2 protection has been enabled, unconditionally fill
+- * RSB during a context switch; this protects against two independent
+- * issues:
++ * If Spectre v2 protection has been enabled, fill the RSB during a
++ * context switch. In general there are two types of RSB attacks
++ * across context switches, for which the CALLs/RETs may be unbalanced.
++ *
++ * 1) RSB underflow
++ *
++ * Some Intel parts have "bottomless RSB". When the RSB is empty,
++ * speculated return targets may come from the branch predictor,
++ * which could have a user-poisoned BTB or BHB entry.
++ *
++ * AMD has it even worse: *all* returns are speculated from the BTB,
++ * regardless of the state of the RSB.
++ *
++ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
++ * scenario is mitigated by the IBRS branch prediction isolation
++ * properties, so the RSB buffer filling wouldn't be necessary to
++ * protect against this type of attack.
++ *
++ * The "user -> user" attack scenario is mitigated by RSB filling.
++ *
++ * 2) Poisoned RSB entry
++ *
++ * If the 'next' in-kernel return stack is shorter than 'prev',
++ * 'next' could be tricked into speculating with a user-poisoned RSB
++ * entry.
++ *
++ * The "user -> kernel" attack scenario is mitigated by SMEP and
++ * eIBRS.
++ *
++ * The "user -> user" scenario, also known as SpectreBHB, requires
++ * RSB clearing.
++ *
++ * So to mitigate all cases, unconditionally fill RSB on context
++ * switches.
+ *
+- * - RSB underflow (and switch to BTB) on Skylake+
+- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
++ * FIXME: Is this pointless for retbleed-affected AMD?
+ */
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+
++ spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
++
+ /*
+- * Retpoline means the kernel is safe because it has no indirect
+- * branches. Enhanced IBRS protects firmware too, so, enable restricted
+- * speculation around firmware calls only when Enhanced IBRS isn't
+- * supported.
++ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
++ * and Enhanced IBRS protect firmware too, so enable IBRS around
++ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
++ * enabled.
+ *
+ * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
+ * the user might select retpoline on the kernel command line and if
+ * the CPU supports Enhanced IBRS, kernel might un-intentionally not
+ * enable IBRS around firmware calls.
+ */
+- if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
++ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++ boot_cpu_has(X86_FEATURE_IBPB) &&
++ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
++
++ if (retbleed_cmd != RETBLEED_CMD_IBPB) {
++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW);
++ pr_info("Enabling Speculation Barrier for firmware calls\n");
++ }
++
++ } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
+
+ /* Set up IBPB and STIBP depending on the general spectre V2 command */
+- spectre_v2_user_select_mitigation(cmd);
++ spectre_v2_cmd = cmd;
+ }
+
+ static void update_stibp_msr(void * __unused)
+ {
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
++ update_spec_ctrl(val);
+ }
+
+ /* Update x86_spec_ctrl_base in case SMT state changed. */
+@@ -1028,6 +1736,8 @@ static void update_indir_branch_cond(void)
+ /* Update the static key controlling the MDS CPU buffer clear in idle */
+ static void update_mds_branch_idle(void)
+ {
++ u64 ia32_cap = x86_read_arch_cap_msr();
++
+ /*
+ * Enable the idle clearing if SMT is active on CPUs which are
+ * affected only by MSBDS and not any other MDS variant.
+@@ -1039,19 +1749,26 @@ static void update_mds_branch_idle(void)
+ if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
+ return;
+
+- if (sched_smt_active())
++ if (sched_smt_active()) {
+ static_branch_enable(&mds_idle_clear);
+- else
++ } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
++ (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+ static_branch_disable(&mds_idle_clear);
++ }
+ }
+
+ #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+ #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
++#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
+
+ void cpu_bugs_smt_update(void)
+ {
+ mutex_lock(&spec_ctrl_mutex);
+
++ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
++ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
++ pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
++
+ switch (spectre_v2_user_stibp) {
+ case SPECTRE_V2_USER_NONE:
+ break;
+@@ -1087,6 +1804,16 @@ void cpu_bugs_smt_update(void)
+ break;
+ }
+
++ switch (mmio_mitigation) {
++ case MMIO_MITIGATION_VERW:
++ case MMIO_MITIGATION_UCODE_NEEDED:
++ if (sched_smt_active())
++ pr_warn_once(MMIO_MSG_SMT);
++ break;
++ case MMIO_MITIGATION_OFF:
++ break;
++ }
++
+ mutex_unlock(&spec_ctrl_mutex);
+ }
+
+@@ -1190,16 +1917,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
+ break;
+ }
+
+- /*
+- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+- * bit in the mask to allow guests to use the mitigation even in the
+- * case where the host does not enable it.
+- */
+- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+- }
+-
+ /*
+ * We have three CPU feature flags that are in play here:
+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+@@ -1217,7 +1934,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
+ x86_amd_ssb_disable();
+ } else {
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ update_spec_ctrl(x86_spec_ctrl_base);
+ }
+ }
+
+@@ -1364,6 +2081,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
+ if (ctrl == PR_SPEC_FORCE_DISABLE)
+ task_set_spec_ib_force_disable(task);
+ task_update_spec_tif(task);
++ if (task == current)
++ indirect_branch_prediction_barrier();
+ break;
+ default:
+ return -ERANGE;
+@@ -1468,7 +2187,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+ void x86_spec_ctrl_setup_ap(void)
+ {
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
++ update_spec_ctrl(x86_spec_ctrl_base);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+@@ -1599,6 +2318,170 @@ static int __init l1tf_cmdline(char *str)
+ }
+ early_param("l1tf", l1tf_cmdline);
+
++#undef pr_fmt
++#define pr_fmt(fmt) "Speculative Return Stack Overflow: " fmt
++
++enum srso_mitigation {
++ SRSO_MITIGATION_NONE,
++ SRSO_MITIGATION_MICROCODE,
++ SRSO_MITIGATION_SAFE_RET,
++ SRSO_MITIGATION_IBPB,
++ SRSO_MITIGATION_IBPB_ON_VMEXIT,
++};
++
++enum srso_mitigation_cmd {
++ SRSO_CMD_OFF,
++ SRSO_CMD_MICROCODE,
++ SRSO_CMD_SAFE_RET,
++ SRSO_CMD_IBPB,
++ SRSO_CMD_IBPB_ON_VMEXIT,
++};
++
++static const char * const srso_strings[] = {
++ [SRSO_MITIGATION_NONE] = "Vulnerable",
++ [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode",
++ [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET",
++ [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB",
++ [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only"
++};
++
++static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE;
++static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET;
++
++static int __init srso_parse_cmdline(char *str)
++{
++ if (!str)
++ return -EINVAL;
++
++ if (!strcmp(str, "off"))
++ srso_cmd = SRSO_CMD_OFF;
++ else if (!strcmp(str, "microcode"))
++ srso_cmd = SRSO_CMD_MICROCODE;
++ else if (!strcmp(str, "safe-ret"))
++ srso_cmd = SRSO_CMD_SAFE_RET;
++ else if (!strcmp(str, "ibpb"))
++ srso_cmd = SRSO_CMD_IBPB;
++ else if (!strcmp(str, "ibpb-vmexit"))
++ srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT;
++ else
++ pr_err("Ignoring unknown SRSO option (%s).", str);
++
++ return 0;
++}
++early_param("spec_rstack_overflow", srso_parse_cmdline);
++
++#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options."
++
++static void __init srso_select_mitigation(void)
++{
++ bool has_microcode;
++
++ if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
++ goto pred_cmd;
++
++ /*
++ * The first check is for the kernel running as a guest in order
++ * for guests to verify whether IBPB is a viable mitigation.
++ */
++ has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode();
++ if (!has_microcode) {
++ pr_warn("IBPB-extending microcode not applied!\n");
++ pr_warn(SRSO_NOTICE);
++ } else {
++ /*
++ * Enable the synthetic (even if in a real CPUID leaf)
++ * flags for guests.
++ */
++ setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
++
++ /*
++ * Zen1/2 with SMT off aren't vulnerable after the right
++ * IBPB microcode has been applied.
++ */
++ if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) {
++ setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
++ return;
++ }
++ }
++
++ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
++ if (has_microcode) {
++ pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n");
++ srso_mitigation = SRSO_MITIGATION_IBPB;
++ goto pred_cmd;
++ }
++ }
++
++ switch (srso_cmd) {
++ case SRSO_CMD_OFF:
++ return;
++
++ case SRSO_CMD_MICROCODE:
++ if (has_microcode) {
++ srso_mitigation = SRSO_MITIGATION_MICROCODE;
++ pr_warn(SRSO_NOTICE);
++ }
++ break;
++
++ case SRSO_CMD_SAFE_RET:
++ if (IS_ENABLED(CONFIG_CPU_SRSO)) {
++ /*
++ * Enable the return thunk for generated code
++ * like ftrace, static_call, etc.
++ */
++ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
++ setup_force_cpu_cap(X86_FEATURE_UNRET);
++
++ if (boot_cpu_data.x86 == 0x19) {
++ setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
++ x86_return_thunk = srso_alias_return_thunk;
++ } else {
++ setup_force_cpu_cap(X86_FEATURE_SRSO);
++ x86_return_thunk = srso_return_thunk;
++ }
++ srso_mitigation = SRSO_MITIGATION_SAFE_RET;
++ } else {
++ pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
++ goto pred_cmd;
++ }
++ break;
++
++ case SRSO_CMD_IBPB:
++ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
++ if (has_microcode) {
++ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
++ srso_mitigation = SRSO_MITIGATION_IBPB;
++ }
++ } else {
++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
++ goto pred_cmd;
++ }
++ break;
++
++ case SRSO_CMD_IBPB_ON_VMEXIT:
++ if (IS_ENABLED(CONFIG_CPU_SRSO)) {
++ if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
++ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
++ srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
++ }
++ } else {
++ pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
++ goto pred_cmd;
++ }
++ break;
++
++ default:
++ break;
++ }
++
++ pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode"));
++
++pred_cmd:
++ if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) &&
++ boot_cpu_has(X86_FEATURE_SBPB))
++ x86_pred_cmd = PRED_CMD_SBPB;
++}
++
+ #undef pr_fmt
+ #define pr_fmt(fmt) fmt
+
+@@ -1689,9 +2572,26 @@ static ssize_t tsx_async_abort_show_state(char *buf)
+ sched_smt_active() ? "vulnerable" : "disabled");
+ }
+
++static ssize_t mmio_stale_data_show_state(char *buf)
++{
++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++ return sysfs_emit(buf, "Unknown: No mitigations\n");
++
++ if (mmio_mitigation == MMIO_MITIGATION_OFF)
++ return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
++
++ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
++ return sysfs_emit(buf, "%s; SMT Host state unknown\n",
++ mmio_strings[mmio_mitigation]);
++ }
++
++ return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation],
++ sched_smt_active() ? "vulnerable" : "disabled");
++}
++
+ static char *stibp_state(void)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
++ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ return "";
+
+ switch (spectre_v2_user_stibp) {
+@@ -1721,11 +2621,80 @@ static char *ibpb_state(void)
+ return "";
+ }
+
++static char *pbrsb_eibrs_state(void)
++{
++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
++ if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
++ boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
++ return ", PBRSB-eIBRS: SW sequence";
++ else
++ return ", PBRSB-eIBRS: Vulnerable";
++ } else {
++ return ", PBRSB-eIBRS: Not affected";
++ }
++}
++
++static ssize_t spectre_v2_show_state(char *buf)
++{
++ if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
++ return sprintf(buf, "Vulnerable: LFENCE\n");
++
++ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
++ return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
++
++ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
++ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
++ return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
++
++ return sprintf(buf, "%s%s%s%s%s%s%s\n",
++ spectre_v2_strings[spectre_v2_enabled],
++ ibpb_state(),
++ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
++ stibp_state(),
++ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
++ pbrsb_eibrs_state(),
++ spectre_v2_module_string());
++}
++
+ static ssize_t srbds_show_state(char *buf)
+ {
+ return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
+ }
+
++static ssize_t retbleed_show_state(char *buf)
++{
++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
++ retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
++ return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n");
++
++ return sprintf(buf, "%s; SMT %s\n",
++ retbleed_strings[retbleed_mitigation],
++ !sched_smt_active() ? "disabled" :
++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
++ "enabled with STIBP protection" : "vulnerable");
++ }
++
++ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
++}
++
++static ssize_t gds_show_state(char *buf)
++{
++ return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
++}
++
++static ssize_t srso_show_state(char *buf)
++{
++ if (boot_cpu_has(X86_FEATURE_SRSO_NO))
++ return sysfs_emit(buf, "Mitigation: SMT disabled\n");
++
++ return sysfs_emit(buf, "%s%s\n",
++ srso_strings[srso_mitigation],
++ (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode"));
++}
++
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
+ {
+@@ -1746,12 +2715,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
+ return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
+
+ case X86_BUG_SPECTRE_V2:
+- return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+- ibpb_state(),
+- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+- stibp_state(),
+- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+- spectre_v2_module_string());
++ return spectre_v2_show_state(buf);
+
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+@@ -1773,6 +2737,19 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
+ case X86_BUG_SRBDS:
+ return srbds_show_state(buf);
+
++ case X86_BUG_MMIO_STALE_DATA:
++ case X86_BUG_MMIO_UNKNOWN:
++ return mmio_stale_data_show_state(buf);
++
++ case X86_BUG_RETBLEED:
++ return retbleed_show_state(buf);
++
++ case X86_BUG_GDS:
++ return gds_show_state(buf);
++
++ case X86_BUG_SRSO:
++ return srso_show_state(buf);
++
+ default:
+ break;
+ }
+@@ -1824,4 +2801,27 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
+ {
+ return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
+ }
++
++ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
++ else
++ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
++}
++
++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
++}
++
++ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
++}
++
++ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ return cpu_show_common(dev, attr, buf, X86_BUG_SRSO);
++}
+ #endif
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index b3410f1ac2175..3151c08bb54a5 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -18,11 +18,15 @@
+ #include <linux/init.h>
+ #include <linux/kprobes.h>
+ #include <linux/kgdb.h>
++#include <linux/mem_encrypt.h>
+ #include <linux/smp.h>
++#include <linux/cpu.h>
+ #include <linux/io.h>
+ #include <linux/syscore_ops.h>
+ #include <linux/pgtable.h>
++#include <linux/utsname.h>
+
++#include <asm/alternative.h>
+ #include <asm/cmdline.h>
+ #include <asm/stackprotector.h>
+ #include <asm/perf_event.h>
+@@ -58,7 +62,7 @@
+ #include <asm/intel-family.h>
+ #include <asm/cpu_device_id.h>
+ #include <asm/uv/uv.h>
+-#include <asm/sigframe.h>
++#include <asm/set_memory.h>
+
+ #include "cpu.h"
+
+@@ -964,6 +968,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ if (c->extended_cpuid_level >= 0x8000001f)
+ c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
+
++ if (c->extended_cpuid_level >= 0x80000021)
++ c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021);
++
+ init_scattered_cpuid_features(c);
+ init_speculation_control(c);
+
+@@ -1027,6 +1034,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+ #define NO_SWAPGS BIT(6)
+ #define NO_ITLB_MULTIHIT BIT(7)
+ #define NO_SPECTRE_V2 BIT(8)
++#define NO_MMIO BIT(9)
++#define NO_EIBRS_PBRSB BIT(10)
+
+ #define VULNWL(vendor, family, model, whitelist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
+@@ -1047,6 +1056,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
+ VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
+
+ /* Intel Family 6 */
++ VULNWL_INTEL(TIGERLAKE, NO_MMIO),
++ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO),
++ VULNWL_INTEL(ALDERLAKE, NO_MMIO),
++ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO),
++
+ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
+@@ -1065,9 +1079,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
+ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+
+- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+
+ /*
+ * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -1077,42 +1091,89 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
+ * good enough for our purposes.
+ */
+
+- VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT),
++ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB),
++ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB),
++ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+
+ /* AMD Family 0xf - 0x12 */
+- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+
+ /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+
+ /* Zhaoxin Family 7 */
+- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
+- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
++ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
++ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ {}
+ };
+
++#define VULNBL(vendor, family, model, blacklist) \
++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
++
+ #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
+ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
+ INTEL_FAM6_##model, steppings, \
+ X86_FEATURE_ANY, issues)
+
++#define VULNBL_AMD(family, blacklist) \
++ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
++
++#define VULNBL_HYGON(family, blacklist) \
++ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
++
+ #define SRBDS BIT(0)
++/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
++#define MMIO BIT(1)
++/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
++#define MMIO_SBDS BIT(2)
++/* CPU is affected by RETbleed, speculating where you would not expect it */
++#define RETBLEED BIT(3)
++/* CPU is affected by SMT (cross-thread) return predictions */
++#define SMT_RSB BIT(4)
++/* CPU is affected by SRSO */
++#define SRSO BIT(5)
++/* CPU is affected by GDS */
++#define GDS BIT(6)
+
+ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
+ VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
++ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
++ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
++ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),
+- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
++ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
++ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS),
++ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
++ VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS),
++ VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
++
++ VULNBL_AMD(0x15, RETBLEED),
++ VULNBL_AMD(0x16, RETBLEED),
++ VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
++ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
++ VULNBL_AMD(0x19, SRSO),
+ {}
+ };
+
+@@ -1133,6 +1194,13 @@ u64 x86_read_arch_cap_msr(void)
+ return ia32_cap;
+ }
+
++static bool arch_cap_mmio_immune(u64 ia32_cap)
++{
++ return (ia32_cap & ARCH_CAP_FBSDP_NO &&
++ ia32_cap & ARCH_CAP_PSDP_NO &&
++ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
++}
++
+ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ {
+ u64 ia32_cap = x86_read_arch_cap_msr();
+@@ -1186,12 +1254,61 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ /*
+ * SRBDS affects CPUs which support RDRAND or RDSEED and are listed
+ * in the vulnerability blacklist.
++ *
++ * Some of the implications and mitigation of Shared Buffers Data
++ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
++ * SRBDS.
+ */
+ if ((cpu_has(c, X86_FEATURE_RDRAND) ||
+ cpu_has(c, X86_FEATURE_RDSEED)) &&
+- cpu_matches(cpu_vuln_blacklist, SRBDS))
++ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
+ setup_force_cpu_bug(X86_BUG_SRBDS);
+
++ /*
++ * Processor MMIO Stale Data bug enumeration
++ *
++ * Affected CPU list is generally enough to enumerate the vulnerability,
++ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
++ * not want the guest to enumerate the bug.
++ *
++ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
++ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
++ */
++ if (!arch_cap_mmio_immune(ia32_cap)) {
++ if (cpu_matches(cpu_vuln_blacklist, MMIO))
++ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
++ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
++ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
++ }
++
++ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
++ setup_force_cpu_bug(X86_BUG_RETBLEED);
++ }
++
++ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
++ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
++ !(ia32_cap & ARCH_CAP_PBRSB_NO))
++ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
++
++ if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
++ setup_force_cpu_bug(X86_BUG_SMT_RSB);
++
++ /*
++ * Check if CPU is vulnerable to GDS. If running in a virtual machine on
++ * an affected processor, the VMM may have disabled the use of GATHER by
++ * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
++ * which means that AVX will be disabled.
++ */
++ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
++ boot_cpu_has(X86_FEATURE_AVX))
++ setup_force_cpu_bug(X86_BUG_GDS);
++
++ if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
++ if (cpu_matches(cpu_vuln_blacklist, SRSO))
++ setup_force_cpu_bug(X86_BUG_SRSO);
++ }
++
+ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+ return;
+
+@@ -1333,10 +1450,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+
+ sld_setup(c);
+
+- fpu__init_system(c);
+-
+- init_sigframe_size();
+-
+ #ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+@@ -1396,9 +1509,8 @@ void __init early_cpu_init(void)
+ early_identify_cpu(&boot_cpu_data);
+ }
+
+-static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
++static bool detect_null_seg_behavior(void)
+ {
+-#ifdef CONFIG_X86_64
+ /*
+ * Empirically, writing zero to a segment selector on AMD does
+ * not clear the base, whereas writing zero to a segment
+@@ -1419,10 +1531,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+ wrmsrl(MSR_FS_BASE, 1);
+ loadsegment(fs, 0);
+ rdmsrl(MSR_FS_BASE, tmp);
+- if (tmp != 0)
+- set_cpu_bug(c, X86_BUG_NULL_SEG);
+ wrmsrl(MSR_FS_BASE, old_base);
+-#endif
++ return tmp == 0;
++}
++
++void check_null_seg_clears_base(struct cpuinfo_x86 *c)
++{
++ /* BUG_NULL_SEG is only relevant with 64bit userspace */
++ if (!IS_ENABLED(CONFIG_X86_64))
++ return;
++
++ /* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */
++ if (c->extended_cpuid_level >= 0x80000021 &&
++ cpuid_eax(0x80000021) & BIT(6))
++ return;
++
++ /*
++ * CPUID bit above wasn't set. If this kernel is still running
++ * as a HV guest, then the HV has decided not to advertize
++ * that CPUID bit for whatever reason. For example, one
++ * member of the migration pool might be vulnerable. Which
++ * means, the bug is present: set the BUG flag and return.
++ */
++ if (cpu_has(c, X86_FEATURE_HYPERVISOR)) {
++ set_cpu_bug(c, X86_BUG_NULL_SEG);
++ return;
++ }
++
++ /*
++ * Zen2 CPUs also have this behaviour, but no CPUID bit.
++ * 0x18 is the respective family for Hygon.
++ */
++ if ((c->x86 == 0x17 || c->x86 == 0x18) &&
++ detect_null_seg_behavior())
++ return;
++
++ /* All the remaining ones are affected */
++ set_cpu_bug(c, X86_BUG_NULL_SEG);
+ }
+
+ static void generic_identify(struct cpuinfo_x86 *c)
+@@ -1458,8 +1603,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
+
+ get_model_name(c); /* Default name */
+
+- detect_null_seg_behavior(c);
+-
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+@@ -1684,6 +1827,10 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
+ validate_apic_and_package_id(c);
+ x86_spec_ctrl_setup_ap();
+ update_srbds_msr();
++ if (boot_cpu_has_bug(X86_BUG_GDS))
++ update_gds_msr();
++
++ tsx_ap_init();
+ }
+
+ static __init int setup_noclflush(char *arg)
+@@ -2009,8 +2156,6 @@ void cpu_init(void)
+
+ doublefault_init_cpu_tss();
+
+- fpu__init_cpu();
+-
+ if (is_uv_system())
+ uv_cpu_init();
+
+@@ -2026,38 +2171,58 @@ void cpu_init_secondary(void)
+ */
+ cpu_init_exception_handling();
+ cpu_init();
++ fpu__init_cpu();
+ }
+ #endif
+
+-/*
++#ifdef CONFIG_MICROCODE_LATE_LOADING
++/**
++ * store_cpu_caps() - Store a snapshot of CPU capabilities
++ * @curr_info: Pointer where to store it
++ *
++ * Returns: None
++ */
++void store_cpu_caps(struct cpuinfo_x86 *curr_info)
++{
++ /* Reload CPUID max function as it might've changed. */
++ curr_info->cpuid_level = cpuid_eax(0);
++
++ /* Copy all capability leafs and pick up the synthetic ones. */
++ memcpy(&curr_info->x86_capability, &boot_cpu_data.x86_capability,
++ sizeof(curr_info->x86_capability));
++
++ /* Get the hardware CPUID leafs */
++ get_cpu_cap(curr_info);
++}
++
++/**
++ * microcode_check() - Check if any CPU capabilities changed after an update.
++ * @prev_info: CPU capabilities stored before an update.
++ *
+ * The microcode loader calls this upon late microcode load to recheck features,
+ * only when microcode has been updated. Caller holds microcode_mutex and CPU
+ * hotplug lock.
++ *
++ * Return: None
+ */
+-void microcode_check(void)
++void microcode_check(struct cpuinfo_x86 *prev_info)
+ {
+- struct cpuinfo_x86 info;
++ struct cpuinfo_x86 curr_info;
+
+ perf_check_microcode();
+
+- /* Reload CPUID max function as it might've changed. */
+- info.cpuid_level = cpuid_eax(0);
+-
+- /*
+- * Copy all capability leafs to pick up the synthetic ones so that
+- * memcmp() below doesn't fail on that. The ones coming from CPUID will
+- * get overwritten in get_cpu_cap().
+- */
+- memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
++ amd_check_microcode();
+
+- get_cpu_cap(&info);
++ store_cpu_caps(&curr_info);
+
+- if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
++ if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability,
++ sizeof(prev_info->x86_capability)))
+ return;
+
+ pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
+ pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+ }
++#endif
+
+ /*
+ * Invoked from core CPU hotplug code after hotplug operations
+@@ -2069,3 +2234,69 @@ void arch_smt_update(void)
+ /* Check whether IPI broadcasting can be enabled */
+ apic_smt_update();
+ }
++
++void __init arch_cpu_finalize_init(void)
++{
++ identify_boot_cpu();
++
++ /*
++ * identify_boot_cpu() initialized SMT support information, let the
++ * core code know.
++ */
++ cpu_smt_check_topology();
++
++ if (!IS_ENABLED(CONFIG_SMP)) {
++ pr_info("CPU: ");
++ print_cpu_info(&boot_cpu_data);
++ }
++
++ cpu_select_mitigations();
++
++ arch_smt_update();
++
++ if (IS_ENABLED(CONFIG_X86_32)) {
++ /*
++ * Check whether this is a real i386 which is not longer
++ * supported and fixup the utsname.
++ */
++ if (boot_cpu_data.x86 < 4)
++ panic("Kernel requires i486+ for 'invlpg' and other features");
++
++ init_utsname()->machine[1] =
++ '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
++ }
++
++ /*
++ * Must be before alternatives because it might set or clear
++ * feature bits.
++ */
++ fpu__init_system();
++ fpu__init_cpu();
++
++ alternative_instructions();
++
++ if (IS_ENABLED(CONFIG_X86_64)) {
++ /*
++ * Make sure the first 2MB area is not mapped by huge pages
++ * There are typically fixed size MTRRs in there and overlapping
++ * MTRRs into large pages causes slow downs.
++ *
++ * Right now we don't do that with gbpages because there seems
++ * very little benefit for that case.
++ */
++ if (!direct_gbpages)
++ set_memory_4k((unsigned long)__va(0), 1);
++ } else {
++ fpu__init_check_bugs();
++ }
++
++ /*
++ * This needs to be called before any devices perform DMA
++ * operations that might use the SWIOTLB bounce buffers. It will
++ * mark the bounce buffers as decrypted so that their usage will
++ * not cause "plain-text" data to be decrypted when accessed. It
++ * must be called after late_time_init() so that Hyper-V x86/x64
++ * hypercalls work when the SWIOTLB bounce buffers are decrypted.
++ */
++ mem_encrypt_init();
++}
+diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
+index 95521302630d4..d9aeb335002dd 100644
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -55,13 +55,14 @@ enum tsx_ctrl_states {
+ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
+
+ extern void __init tsx_init(void);
+-extern void tsx_enable(void);
+-extern void tsx_disable(void);
+-extern void tsx_clear_cpuid(void);
++void tsx_ap_init(void);
+ #else
+ static inline void tsx_init(void) { }
++static inline void tsx_ap_init(void) { }
+ #endif /* CONFIG_CPU_SUP_INTEL */
+
++extern void init_spectral_chicken(struct cpuinfo_x86 *c);
++
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+@@ -75,11 +76,14 @@ extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
+ extern int detect_extended_topology(struct cpuinfo_x86 *c);
+ extern int detect_ht_early(struct cpuinfo_x86 *c);
+ extern void detect_ht(struct cpuinfo_x86 *c);
++extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
+
+ unsigned int aperfmperf_get_khz(int cpu);
++void cpu_select_mitigations(void);
+
+ extern void x86_spec_ctrl_setup_ap(void);
+ extern void update_srbds_msr(void);
++extern void update_gds_msr(void);
+
+ extern u64 x86_read_arch_cap_msr(void);
+
+diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
+index da696eb4821a0..e77032c5f85cc 100644
+--- a/arch/x86/kernel/cpu/feat_ctl.c
++++ b/arch/x86/kernel/cpu/feat_ctl.c
+@@ -1,11 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0
+ #include <linux/tboot.h>
+
++#include <asm/cpu.h>
+ #include <asm/cpufeature.h>
+ #include <asm/msr-index.h>
+ #include <asm/processor.h>
+ #include <asm/vmx.h>
+-#include "cpu.h"
+
+ #undef pr_fmt
+ #define pr_fmt(fmt) "x86/cpu: " fmt
+diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
+index 6d50136f7ab98..c393b8773ace6 100644
+--- a/arch/x86/kernel/cpu/hygon.c
++++ b/arch/x86/kernel/cpu/hygon.c
+@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c)
+ /* get apicid instead of initial apic id from cpuid */
+ c->apicid = hard_smp_processor_id();
+
++ /*
++ * XXX someone from Hygon needs to confirm this DTRT
++ *
++ init_spectral_chicken(c);
++ */
++
+ set_cpu_cap(c, X86_FEATURE_ZEN);
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
+@@ -320,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+- msr_set_bit(MSR_F10H_DECFG,
+- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
++ msr_set_bit(MSR_AMD64_DE_CFG,
++ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT);
+
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+@@ -335,6 +341,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
+ /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */
+ if (!cpu_has(c, X86_FEATURE_XENPV))
+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
++
++ check_null_seg_clears_base(c);
+ }
+
+ static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index 8321c43554a1d..ae7d4c85f4f43 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -91,7 +91,7 @@ static bool ring3mwait_disabled __read_mostly;
+ static int __init ring3mwait_disable(char *__unused)
+ {
+ ring3mwait_disabled = true;
+- return 0;
++ return 1;
+ }
+ __setup("ring3mwait=disable", ring3mwait_disable);
+
+@@ -717,13 +717,6 @@ static void init_intel(struct cpuinfo_x86 *c)
+
+ init_intel_misc_features(c);
+
+- if (tsx_ctrl_state == TSX_CTRL_ENABLE)
+- tsx_enable();
+- else if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+- tsx_disable();
+- else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT)
+- tsx_clear_cpuid();
+-
+ split_lock_init();
+ bus_lock_init();
+
+@@ -1152,22 +1145,23 @@ static void bus_lock_init(void)
+ {
+ u64 val;
+
+- /*
+- * Warn and fatal are handled by #AC for split lock if #AC for
+- * split lock is supported.
+- */
+- if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) ||
+- (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+- (sld_state == sld_warn || sld_state == sld_fatal)) ||
+- sld_state == sld_off)
++ if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ return;
+
+- /*
+- * Enable #DB for bus lock. All bus locks are handled in #DB except
+- * split locks are handled in #AC in the fatal case.
+- */
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
+- val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
++
++ if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
++ (sld_state == sld_warn || sld_state == sld_fatal)) ||
++ sld_state == sld_off) {
++ /*
++ * Warn and fatal are handled by #AC for split lock if #AC for
++ * split lock is supported.
++ */
++ val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
++ } else {
++ val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
++ }
++
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+ }
+
+diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
+index 08831acc1d036..d4e75be64a4c5 100644
+--- a/arch/x86/kernel/cpu/mce/amd.c
++++ b/arch/x86/kernel/cpu/mce/amd.c
+@@ -210,10 +210,10 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
+ * A list of the banks enabled on each logical CPU. Controls which respective
+ * descriptors to initialize later in mce_threshold_create_device().
+ */
+-static DEFINE_PER_CPU(unsigned int, bank_map);
++static DEFINE_PER_CPU(u64, bank_map);
+
+ /* Map of banks that have more than MCA_MISC0 available. */
+-static DEFINE_PER_CPU(u32, smca_misc_banks_map);
++static DEFINE_PER_CPU(u64, smca_misc_banks_map);
+
+ static void amd_threshold_interrupt(void);
+ static void amd_deferred_error_interrupt(void);
+@@ -242,7 +242,7 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
+ return;
+
+ if (low & MASK_BLKPTR_LO)
+- per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
++ per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
+
+ }
+
+@@ -400,7 +400,7 @@ static void threshold_restart_bank(void *_tr)
+ u32 hi, lo;
+
+ /* sysfs write might race against an offline operation */
+- if (this_cpu_read(threshold_banks))
++ if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
+ return;
+
+ rdmsr(tr->b->address, lo, hi);
+@@ -505,7 +505,7 @@ static u32 smca_get_block_address(unsigned int bank, unsigned int block,
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+- if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
++ if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
+ return 0;
+
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+@@ -526,7 +526,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+ /* Fall back to method we used for older processors: */
+ switch (block) {
+ case 0:
+- addr = msr_ops.misc(bank);
++ addr = mca_msr_reg(bank, MCA_MISC);
+ break;
+ case 1:
+ offset = ((low & MASK_BLKPTR_LO) >> 21);
+@@ -549,7 +549,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+ int new;
+
+ if (!block)
+- per_cpu(bank_map, cpu) |= (1 << bank);
++ per_cpu(bank_map, cpu) |= BIT_ULL(bank);
+
+ memset(&b, 0, sizeof(b));
+ b.cpu = cpu;
+@@ -965,6 +965,24 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
+ return status & MCI_STATUS_DEFERRED;
+ }
+
++static bool _log_error_deferred(unsigned int bank, u32 misc)
++{
++ if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
++ mca_msr_reg(bank, MCA_ADDR), misc))
++ return false;
++
++ /*
++ * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
++ * Return true here to avoid accessing these registers.
++ */
++ if (!mce_flags.smca)
++ return true;
++
++ /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
++ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
++ return true;
++}
++
+ /*
+ * We have three scenarios for checking for Deferred errors:
+ *
+@@ -976,20 +994,9 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
+ */
+ static void log_error_deferred(unsigned int bank)
+ {
+- bool defrd;
+-
+- defrd = _log_error_bank(bank, msr_ops.status(bank),
+- msr_ops.addr(bank), 0);
+-
+- if (!mce_flags.smca)
++ if (_log_error_deferred(bank, 0))
+ return;
+
+- /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
+- if (defrd) {
+- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+- return;
+- }
+-
+ /*
+ * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
+ * for a valid error.
+@@ -1009,7 +1016,7 @@ static void amd_deferred_error_interrupt(void)
+
+ static void log_error_thresholding(unsigned int bank, u64 misc)
+ {
+- _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
++ _log_error_deferred(bank, misc);
+ }
+
+ static void log_and_reset_block(struct threshold_block *block)
+@@ -1054,7 +1061,7 @@ static void amd_threshold_interrupt(void)
+ return;
+
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
+- if (!(per_cpu(bank_map, cpu) & (1 << bank)))
++ if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
+ continue;
+
+ first_block = bp[bank]->blocks;
+@@ -1397,7 +1404,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
+ }
+ }
+
+- err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
++ err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
+ if (err)
+ goto out_kobj;
+
+@@ -1470,10 +1477,23 @@ out_free:
+ kfree(bank);
+ }
+
++static void __threshold_remove_device(struct threshold_bank **bp)
++{
++ unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
++
++ for (bank = 0; bank < numbanks; bank++) {
++ if (!bp[bank])
++ continue;
++
++ threshold_remove_bank(bp[bank]);
++ bp[bank] = NULL;
++ }
++ kfree(bp);
++}
++
+ int mce_threshold_remove_device(unsigned int cpu)
+ {
+ struct threshold_bank **bp = this_cpu_read(threshold_banks);
+- unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
+
+ if (!bp)
+ return 0;
+@@ -1484,13 +1504,7 @@ int mce_threshold_remove_device(unsigned int cpu)
+ */
+ this_cpu_write(threshold_banks, NULL);
+
+- for (bank = 0; bank < numbanks; bank++) {
+- if (bp[bank]) {
+- threshold_remove_bank(bp[bank]);
+- bp[bank] = NULL;
+- }
+- }
+- kfree(bp);
++ __threshold_remove_device(bp);
+ return 0;
+ }
+
+@@ -1524,18 +1538,17 @@ int mce_threshold_create_device(unsigned int cpu)
+ return -ENOMEM;
+
+ for (bank = 0; bank < numbanks; ++bank) {
+- if (!(this_cpu_read(bank_map) & (1 << bank)))
++ if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
+ continue;
+ err = threshold_create_bank(bp, cpu, bank);
+- if (err)
+- goto out_err;
++ if (err) {
++ __threshold_remove_device(bp);
++ return err;
++ }
+ }
+ this_cpu_write(threshold_banks, bp);
+
+ if (thresholding_irq_en)
+ mce_threshold_vector = amd_threshold_interrupt;
+ return 0;
+-out_err:
+- mce_threshold_remove_device(cpu);
+- return err;
+ }
+diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
+index 0e3ae64d3b76b..b08b90cdc2a3e 100644
+--- a/arch/x86/kernel/cpu/mce/apei.c
++++ b/arch/x86/kernel/cpu/mce/apei.c
+@@ -29,15 +29,26 @@
+ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
+ {
+ struct mce m;
++ int lsb;
+
+ if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+ return;
+
++ /*
++ * Even if the ->validation_bits are set for address mask,
++ * to be extra safe, check and reject an error radius '0',
++ * and fall back to the default page size.
++ */
++ if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
++ lsb = find_first_bit((void *)&mem_err->physical_addr_mask, PAGE_SHIFT);
++ else
++ lsb = PAGE_SHIFT;
++
+ mce_setup(&m);
+ m.bank = -1;
+ /* Fake a memory read error with unknown channel */
+ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
+- m.misc = (MCI_MISC_ADDR_PHYS << 6) | PAGE_SHIFT;
++ m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
+
+ if (severity >= GHES_SEV_RECOVERABLE)
+ m.status |= MCI_STATUS_UC;
+diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
+index 193204aee8801..a0727723676b4 100644
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -176,53 +176,27 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
+ }
+ EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
+
+-static inline u32 ctl_reg(int bank)
++u32 mca_msr_reg(int bank, enum mca_msr reg)
+ {
+- return MSR_IA32_MCx_CTL(bank);
+-}
+-
+-static inline u32 status_reg(int bank)
+-{
+- return MSR_IA32_MCx_STATUS(bank);
+-}
+-
+-static inline u32 addr_reg(int bank)
+-{
+- return MSR_IA32_MCx_ADDR(bank);
+-}
+-
+-static inline u32 misc_reg(int bank)
+-{
+- return MSR_IA32_MCx_MISC(bank);
+-}
+-
+-static inline u32 smca_ctl_reg(int bank)
+-{
+- return MSR_AMD64_SMCA_MCx_CTL(bank);
+-}
+-
+-static inline u32 smca_status_reg(int bank)
+-{
+- return MSR_AMD64_SMCA_MCx_STATUS(bank);
+-}
++ if (mce_flags.smca) {
++ switch (reg) {
++ case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
++ case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
++ case MCA_MISC: return MSR_AMD64_SMCA_MCx_MISC(bank);
++ case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
++ }
++ }
+
+-static inline u32 smca_addr_reg(int bank)
+-{
+- return MSR_AMD64_SMCA_MCx_ADDR(bank);
+-}
++ switch (reg) {
++ case MCA_CTL: return MSR_IA32_MCx_CTL(bank);
++ case MCA_ADDR: return MSR_IA32_MCx_ADDR(bank);
++ case MCA_MISC: return MSR_IA32_MCx_MISC(bank);
++ case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
++ }
+
+-static inline u32 smca_misc_reg(int bank)
+-{
+- return MSR_AMD64_SMCA_MCx_MISC(bank);
++ return 0;
+ }
+
+-struct mca_msr_regs msr_ops = {
+- .ctl = ctl_reg,
+- .status = status_reg,
+- .addr = addr_reg,
+- .misc = misc_reg
+-};
+-
+ static void __print_mce(struct mce *m)
+ {
+ pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
+@@ -295,11 +269,17 @@ static void wait_for_panic(void)
+ panic("Panicing machine check CPU died");
+ }
+
+-static void mce_panic(const char *msg, struct mce *final, char *exp)
++static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
+ {
+- int apei_err = 0;
+ struct llist_node *pending;
+ struct mce_evt_llist *l;
++ int apei_err = 0;
++
++ /*
++ * Allow instrumentation around external facilities usage. Not that it
++ * matters a whole lot since the machine is going to panic anyway.
++ */
++ instrumentation_begin();
+
+ if (!fake_panic) {
+ /*
+@@ -314,7 +294,7 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
+ } else {
+ /* Don't log too much for fake panic */
+ if (atomic_inc_return(&mce_fake_panicked) > 1)
+- return;
++ goto out;
+ }
+ pending = mce_gen_pool_prepare_records();
+ /* First print corrected ones that are still unlogged */
+@@ -352,6 +332,9 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
+ panic(msg);
+ } else
+ pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
++
++out:
++ instrumentation_end();
+ }
+
+ /* Support code for software error injection */
+@@ -362,24 +345,27 @@ static int msr_to_offset(u32 msr)
+
+ if (msr == mca_cfg.rip_msr)
+ return offsetof(struct mce, ip);
+- if (msr == msr_ops.status(bank))
++ if (msr == mca_msr_reg(bank, MCA_STATUS))
+ return offsetof(struct mce, status);
+- if (msr == msr_ops.addr(bank))
++ if (msr == mca_msr_reg(bank, MCA_ADDR))
+ return offsetof(struct mce, addr);
+- if (msr == msr_ops.misc(bank))
++ if (msr == mca_msr_reg(bank, MCA_MISC))
+ return offsetof(struct mce, misc);
+ if (msr == MSR_IA32_MCG_STATUS)
+ return offsetof(struct mce, mcgstatus);
+ return -1;
+ }
+
+-__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
+ {
+- pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+- (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
++ if (wrmsr) {
++ pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
++ (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
++ regs->ip, (void *)regs->ip);
++ } else {
++ pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
++ (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
++ }
+
+ show_stack_regs(regs);
+
+@@ -387,8 +373,6 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+
+ while (true)
+ cpu_relax();
+-
+- return true;
+ }
+
+ /* MSR access wrappers used for error injection */
+@@ -420,32 +404,13 @@ static noinstr u64 mce_rdmsrl(u32 msr)
+ */
+ asm volatile("1: rdmsr\n"
+ "2:\n"
+- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
++ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR_IN_MCE)
+ : EAX_EDX_RET(val, low, high) : "c" (msr));
+
+
+ return EAX_EDX_VAL(val, low, high);
+ }
+
+-__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
+-{
+- pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+- (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
+- regs->ip, (void *)regs->ip);
+-
+- show_stack_regs(regs);
+-
+- panic("MCA architectural violation!\n");
+-
+- while (true)
+- cpu_relax();
+-
+- return true;
+-}
+-
+ static noinstr void mce_wrmsrl(u32 msr, u64 v)
+ {
+ u32 low, high;
+@@ -470,7 +435,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
+ /* See comment in mce_rdmsrl() */
+ asm volatile("1: wrmsr\n"
+ "2:\n"
+- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
++ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE)
+ : : "c" (msr), "a"(low), "d" (high) : "memory");
+ }
+
+@@ -682,13 +647,13 @@ static struct notifier_block mce_default_nb = {
+ /*
+ * Read ADDR and MISC registers.
+ */
+-static void mce_read_aux(struct mce *m, int i)
++static noinstr void mce_read_aux(struct mce *m, int i)
+ {
+ if (m->status & MCI_STATUS_MISCV)
+- m->misc = mce_rdmsrl(msr_ops.misc(i));
++ m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
+
+ if (m->status & MCI_STATUS_ADDRV) {
+- m->addr = mce_rdmsrl(msr_ops.addr(i));
++ m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR));
+
+ /*
+ * Mask the reported address by the reported granularity.
+@@ -758,7 +723,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+ m.bank = i;
+
+ barrier();
+- m.status = mce_rdmsrl(msr_ops.status(i));
++ m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+
+ /* If this entry is not valid, ignore it */
+ if (!(m.status & MCI_STATUS_VAL))
+@@ -826,7 +791,7 @@ clear_it:
+ /*
+ * Clear state for this bank.
+ */
+- mce_wrmsrl(msr_ops.status(i), 0);
++ mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ }
+
+ /*
+@@ -851,7 +816,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+ int i;
+
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
+- m->status = mce_rdmsrl(msr_ops.status(i));
++ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+
+@@ -1072,10 +1037,13 @@ static int mce_start(int *no_way_out)
+ * Synchronize between CPUs after main scanning loop.
+ * This invokes the bulk of the Monarch processing.
+ */
+-static int mce_end(int order)
++static noinstr int mce_end(int order)
+ {
+- int ret = -1;
+ u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
++ int ret = -1;
++
++ /* Allow instrumentation around external facilities. */
++ instrumentation_begin();
+
+ if (!timeout)
+ goto reset;
+@@ -1119,7 +1087,8 @@ static int mce_end(int order)
+ /*
+ * Don't reset anything. That's done by the Monarch.
+ */
+- return 0;
++ ret = 0;
++ goto out;
+ }
+
+ /*
+@@ -1135,6 +1104,10 @@ reset:
+ * Let others run again.
+ */
+ atomic_set(&mce_executing, 0);
++
++out:
++ instrumentation_end();
++
+ return ret;
+ }
+
+@@ -1144,7 +1117,7 @@ static void mce_clear_state(unsigned long *toclear)
+
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
+ if (test_bit(i, toclear))
+- mce_wrmsrl(msr_ops.status(i), 0);
++ mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ }
+ }
+
+@@ -1203,7 +1176,7 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
+ m->addr = 0;
+ m->bank = i;
+
+- m->status = mce_rdmsrl(msr_ops.status(i));
++ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+
+@@ -1280,10 +1253,12 @@ static void kill_me_maybe(struct callback_head *cb)
+
+ /*
+ * -EHWPOISON from memory_failure() means that it already sent SIGBUS
+- * to the current process with the proper error info, so no need to
+- * send SIGBUS here again.
++ * to the current process with the proper error info,
++ * -EOPNOTSUPP means hwpoison_filter() filtered the error event,
++ *
++ * In both cases, no further processing is required.
+ */
+- if (ret == -EHWPOISON)
++ if (ret == -EHWPOISON || ret == -EOPNOTSUPP)
+ return;
+
+ if (p->mce_vaddr != (void __user *)-1l) {
+@@ -1454,6 +1429,14 @@ noinstr void do_machine_check(struct pt_regs *regs)
+ if (worst != MCE_AR_SEVERITY && !kill_current_task)
+ goto out;
+
++ /*
++ * Enable instrumentation around the external facilities like
++ * task_work_add() (via queue_task_work()), fixup_exception() etc.
++ * For now, that is. Fixing this properly would need a lot more involved
++ * reorganization.
++ */
++ instrumentation_begin();
++
+ /* Fault was in user mode and we need to take some action */
+ if ((m.cs & 3) == 3) {
+ /* If this triggers there is no way to recover. Die hard. */
+@@ -1479,6 +1462,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
+ if (m.kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&m, msg, kill_current_task);
+ }
++
++ instrumentation_end();
++
+ out:
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ }
+@@ -1687,8 +1673,8 @@ static void __mcheck_cpu_init_clear_banks(void)
+
+ if (!b->init)
+ continue;
+- wrmsrl(msr_ops.ctl(i), b->ctl);
+- wrmsrl(msr_ops.status(i), 0);
++ wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
++ wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ }
+ }
+
+@@ -1714,7 +1700,7 @@ static void __mcheck_cpu_check_banks(void)
+ if (!b->init)
+ continue;
+
+- rdmsrl(msr_ops.ctl(i), msrval);
++ rdmsrl(mca_msr_reg(i, MCA_CTL), msrval);
+ b->init = !!msrval;
+ }
+ }
+@@ -1871,13 +1857,6 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
+ mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
+ mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
+ mce_flags.amd_threshold = 1;
+-
+- if (mce_flags.smca) {
+- msr_ops.ctl = smca_ctl_reg;
+- msr_ops.status = smca_status_reg;
+- msr_ops.addr = smca_addr_reg;
+- msr_ops.misc = smca_misc_reg;
+- }
+ }
+ }
+
+@@ -2253,7 +2232,7 @@ static void mce_disable_error_reporting(void)
+ struct mce_bank *b = &mce_banks[i];
+
+ if (b->init)
+- wrmsrl(msr_ops.ctl(i), 0);
++ wrmsrl(mca_msr_reg(i, MCA_CTL), 0);
+ }
+ return;
+ }
+@@ -2323,6 +2302,7 @@ static void mce_restart(void)
+ {
+ mce_timer_delete_all();
+ on_each_cpu(mce_cpu_restart, NULL, 1);
++ mce_schedule_work();
+ }
+
+ /* Toggle features for corrected errors */
+@@ -2605,7 +2585,7 @@ static void mce_reenable_cpu(void)
+ struct mce_bank *b = &mce_banks[i];
+
+ if (b->init)
+- wrmsrl(msr_ops.ctl(i), b->ctl);
++ wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
+ }
+ }
+
+diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
+index 0bfc14041bbb4..b63b548497c14 100644
+--- a/arch/x86/kernel/cpu/mce/inject.c
++++ b/arch/x86/kernel/cpu/mce/inject.c
+@@ -350,7 +350,7 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,
+ char buf[MAX_FLAG_OPT_SIZE], *__buf;
+ int err;
+
+- if (cnt > MAX_FLAG_OPT_SIZE)
++ if (!cnt || cnt > MAX_FLAG_OPT_SIZE)
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
+index acfd5d9f93c68..baafbb37be678 100644
+--- a/arch/x86/kernel/cpu/mce/intel.c
++++ b/arch/x86/kernel/cpu/mce/intel.c
+@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_ICELAKE_X:
++ case INTEL_FAM6_ICELAKE_D:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_XEON_PHI_KNL:
+ case INTEL_FAM6_XEON_PHI_KNM:
+@@ -547,12 +548,13 @@ bool intel_filter_mce(struct mce *m)
+ {
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+- /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
++ /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
+ if ((c->x86 == 6) &&
+ ((c->x86_model == INTEL_FAM6_HASWELL) ||
+ (c->x86_model == INTEL_FAM6_HASWELL_L) ||
+ (c->x86_model == INTEL_FAM6_BROADWELL) ||
+- (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
++ (c->x86_model == INTEL_FAM6_HASWELL_G) ||
++ (c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
+ (m->bank == 0) &&
+ ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
+ return true;
+diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
+index 88dcc79cfb07d..760b57814760a 100644
+--- a/arch/x86/kernel/cpu/mce/internal.h
++++ b/arch/x86/kernel/cpu/mce/internal.h
+@@ -168,14 +168,14 @@ struct mce_vendor_flags {
+
+ extern struct mce_vendor_flags mce_flags;
+
+-struct mca_msr_regs {
+- u32 (*ctl) (int bank);
+- u32 (*status) (int bank);
+- u32 (*addr) (int bank);
+- u32 (*misc) (int bank);
++enum mca_msr {
++ MCA_CTL,
++ MCA_STATUS,
++ MCA_ADDR,
++ MCA_MISC,
+ };
+
+-extern struct mca_msr_regs msr_ops;
++u32 mca_msr_reg(int bank, enum mca_msr reg);
+
+ /* Decide whether to add MCE record to MCE event pool or filter it out. */
+ extern bool filter_mce(struct mce *m);
+@@ -186,14 +186,4 @@ extern bool amd_filter_mce(struct mce *m);
+ static inline bool amd_filter_mce(struct mce *m) { return false; };
+ #endif
+
+-__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr);
+-
+-__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr);
+-
+ #endif /* __X86_MCE_INTERNAL_H__ */
+diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
+index 17e6314431169..d9b77a74f8d2e 100644
+--- a/arch/x86/kernel/cpu/mce/severity.c
++++ b/arch/x86/kernel/cpu/mce/severity.c
+@@ -265,25 +265,26 @@ static bool is_copy_from_user(struct pt_regs *regs)
+ */
+ static int error_context(struct mce *m, struct pt_regs *regs)
+ {
+- enum handler_type t;
+-
+ if ((m->cs & 3) == 3)
+ return IN_USER;
+ if (!mc_recoverable(m->mcgstatus))
+ return IN_KERNEL;
+
+- t = ex_get_fault_handler_type(m->ip);
+- if (t == EX_HANDLER_FAULT) {
+- m->kflags |= MCE_IN_KERNEL_RECOV;
+- return IN_KERNEL_RECOV;
+- }
+- if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
+- m->kflags |= MCE_IN_KERNEL_RECOV;
++ switch (ex_get_fixup_type(m->ip)) {
++ case EX_TYPE_UACCESS:
++ case EX_TYPE_COPY:
++ if (!regs || !is_copy_from_user(regs))
++ return IN_KERNEL;
+ m->kflags |= MCE_IN_KERNEL_COPYIN;
++ fallthrough;
++ case EX_TYPE_FAULT:
++ case EX_TYPE_FAULT_MCE_SAFE:
++ case EX_TYPE_DEFAULT_MCE_SAFE:
++ m->kflags |= MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
++ default:
++ return IN_KERNEL;
+ }
+-
+- return IN_KERNEL;
+ }
+
+ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
+diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
+index 3d4a48336084f..6a95a52d08daa 100644
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -55,7 +55,9 @@ struct cont_desc {
+ };
+
+ static u32 ucode_new_rev;
+-static u8 amd_ucode_patch[PATCH_MAX_SIZE];
++
++/* One blob per node. */
++static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE];
+
+ /*
+ * Microcode patch container file is prepended to the initrd in cpio
+@@ -428,7 +430,7 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p
+ patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
+ #else
+ new_rev = &ucode_new_rev;
+- patch = &amd_ucode_patch;
++ patch = &amd_ucode_patch[0];
+ #endif
+
+ desc.cpuid_1_eax = cpuid_1_eax;
+@@ -440,7 +442,13 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p
+ return ret;
+
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+- if (rev >= mc->hdr.patch_id)
++
++ /*
++ * Allow application of the same revision to pick up SMT-specific
++ * changes even if the revision of the other SMT thread is already
++ * up-to-date.
++ */
++ if (rev > mc->hdr.patch_id)
+ return ret;
+
+ if (!__apply_microcode_amd(mc)) {
+@@ -522,8 +530,12 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
+
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
+- /* Check whether we have saved a new patch already: */
+- if (*new_rev && rev < mc->hdr.patch_id) {
++ /*
++ * Check whether a new patch has been saved already. Also, allow application of
++ * the same revision in order to pick up SMT-thread-specific configuration even
++ * if the sibling SMT thread already has an up-to-date revision.
++ */
++ if (*new_rev && rev <= mc->hdr.patch_id) {
+ if (!__apply_microcode_amd(mc)) {
+ *new_rev = mc->hdr.patch_id;
+ return;
+@@ -537,8 +549,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
+ apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false);
+ }
+
+-static enum ucode_state
+-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
++static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
+
+ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
+ {
+@@ -556,19 +567,19 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
+ if (!desc.mc)
+ return -EINVAL;
+
+- ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
++ ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
+ if (ret > UCODE_UPDATED)
+ return -EINVAL;
+
+ return 0;
+ }
+
+-void reload_ucode_amd(void)
++void reload_ucode_amd(unsigned int cpu)
+ {
+- struct microcode_amd *mc;
+ u32 rev, dummy __always_unused;
++ struct microcode_amd *mc;
+
+- mc = (struct microcode_amd *)amd_ucode_patch;
++ mc = (struct microcode_amd *)amd_ucode_patch[cpu_to_node(cpu)];
+
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
+@@ -688,7 +699,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
+ /* need to apply patch? */
+- if (rev >= mc_amd->hdr.patch_id) {
++ if (rev > mc_amd->hdr.patch_id) {
+ ret = UCODE_OK;
+ goto out;
+ }
+@@ -782,6 +793,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover,
+ kfree(patch);
+ return -EINVAL;
+ }
++ patch->size = *patch_size;
+
+ mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
+ proc_id = mc_hdr->processor_rev_id;
+@@ -833,9 +845,10 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
+ return UCODE_OK;
+ }
+
+-static enum ucode_state
+-load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
++static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+ {
++ struct cpuinfo_x86 *c;
++ unsigned int nid, cpu;
+ struct ucode_patch *p;
+ enum ucode_state ret;
+
+@@ -848,22 +861,22 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
+ return ret;
+ }
+
+- p = find_patch(0);
+- if (!p) {
+- return ret;
+- } else {
+- if (boot_cpu_data.microcode >= p->patch_id)
+- return ret;
++ for_each_node(nid) {
++ cpu = cpumask_first(cpumask_of_node(nid));
++ c = &cpu_data(cpu);
+
+- ret = UCODE_NEW;
+- }
++ p = find_patch(cpu);
++ if (!p)
++ continue;
+
+- /* save BSP's matching patch for early load */
+- if (!save)
+- return ret;
++ if (c->microcode >= p->patch_id)
++ continue;
++
++ ret = UCODE_NEW;
+
+- memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+- memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
++ memset(&amd_ucode_patch[nid], 0, PATCH_MAX_SIZE);
++ memcpy(&amd_ucode_patch[nid], p->data, min_t(u32, p->size, PATCH_MAX_SIZE));
++ }
+
+ return ret;
+ }
+@@ -889,12 +902,11 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
+ {
+ char fw_name[36] = "amd-ucode/microcode_amd.bin";
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+- bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
+ enum ucode_state ret = UCODE_NFOUND;
+ const struct firmware *fw;
+
+ /* reload ucode container only on the boot cpu */
+- if (!refresh_fw || !bsp)
++ if (!refresh_fw)
+ return UCODE_OK;
+
+ if (c->x86 >= 0x15)
+@@ -909,7 +921,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
+ if (!verify_container(fw->data, fw->size, false))
+ goto fw_release;
+
+- ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
++ ret = load_microcode_amd(c->x86, fw->data, fw->size);
+
+ fw_release:
+ release_firmware(fw);
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index efb69be41ab18..d2f00d77e9adf 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -315,7 +315,7 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
+ #endif
+ }
+
+-void reload_early_microcode(void)
++void reload_early_microcode(unsigned int cpu)
+ {
+ int vendor, family;
+
+@@ -329,7 +329,7 @@ void reload_early_microcode(void)
+ break;
+ case X86_VENDOR_AMD:
+ if (family >= 0x10)
+- reload_ucode_amd();
++ reload_ucode_amd(cpu);
+ break;
+ default:
+ break;
+@@ -390,101 +390,10 @@ static int apply_microcode_on_target(int cpu)
+ return ret;
+ }
+
+-#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+-static int do_microcode_update(const void __user *buf, size_t size)
+-{
+- int error = 0;
+- int cpu;
+-
+- for_each_online_cpu(cpu) {
+- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+- enum ucode_state ustate;
+-
+- if (!uci->valid)
+- continue;
+-
+- ustate = microcode_ops->request_microcode_user(cpu, buf, size);
+- if (ustate == UCODE_ERROR) {
+- error = -1;
+- break;
+- } else if (ustate == UCODE_NEW) {
+- apply_microcode_on_target(cpu);
+- }
+- }
+-
+- return error;
+-}
+-
+-static int microcode_open(struct inode *inode, struct file *file)
+-{
+- return capable(CAP_SYS_RAWIO) ? stream_open(inode, file) : -EPERM;
+-}
+-
+-static ssize_t microcode_write(struct file *file, const char __user *buf,
+- size_t len, loff_t *ppos)
+-{
+- ssize_t ret = -EINVAL;
+- unsigned long nr_pages = totalram_pages();
+-
+- if ((len >> PAGE_SHIFT) > nr_pages) {
+- pr_err("too much data (max %ld pages)\n", nr_pages);
+- return ret;
+- }
+-
+- cpus_read_lock();
+- mutex_lock(&microcode_mutex);
+-
+- if (do_microcode_update(buf, len) == 0)
+- ret = (ssize_t)len;
+-
+- if (ret > 0)
+- perf_check_microcode();
+-
+- mutex_unlock(&microcode_mutex);
+- cpus_read_unlock();
+-
+- return ret;
+-}
+-
+-static const struct file_operations microcode_fops = {
+- .owner = THIS_MODULE,
+- .write = microcode_write,
+- .open = microcode_open,
+- .llseek = no_llseek,
+-};
+-
+-static struct miscdevice microcode_dev = {
+- .minor = MICROCODE_MINOR,
+- .name = "microcode",
+- .nodename = "cpu/microcode",
+- .fops = &microcode_fops,
+-};
+-
+-static int __init microcode_dev_init(void)
+-{
+- int error;
+-
+- error = misc_register(&microcode_dev);
+- if (error) {
+- pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
+- return error;
+- }
+-
+- return 0;
+-}
+-
+-static void __exit microcode_dev_exit(void)
+-{
+- misc_deregister(&microcode_dev);
+-}
+-#else
+-#define microcode_dev_init() 0
+-#define microcode_dev_exit() do { } while (0)
+-#endif
+-
+ /* fake device for request_firmware */
+ static struct platform_device *microcode_pdev;
+
++#ifdef CONFIG_MICROCODE_LATE_LOADING
+ /*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+@@ -599,16 +508,27 @@ wait_for_siblings:
+ */
+ static int microcode_reload_late(void)
+ {
+- int ret;
++ int old = boot_cpu_data.microcode, ret;
++ struct cpuinfo_x86 prev_info;
+
+ atomic_set(&late_cpus_in, 0);
+ atomic_set(&late_cpus_out, 0);
+
+- ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+- if (ret == 0)
+- microcode_check();
++ /*
++ * Take a snapshot before the microcode update in order to compare and
++ * check whether any bits changed after an update.
++ */
++ store_cpu_caps(&prev_info);
+
+- pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode);
++ ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
++ if (!ret) {
++ pr_info("Reload succeeded, microcode revision: 0x%x -> 0x%x\n",
++ old, boot_cpu_data.microcode);
++ microcode_check(&prev_info);
++ } else {
++ pr_info("Reload failed, current microcode revision: 0x%x\n",
++ boot_cpu_data.microcode);
++ }
+
+ return ret;
+ }
+@@ -652,6 +572,9 @@ put:
+ return ret;
+ }
+
++static DEVICE_ATTR_WO(reload);
++#endif
++
+ static ssize_t version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
+@@ -668,7 +591,6 @@ static ssize_t pf_show(struct device *dev,
+ return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
+ }
+
+-static DEVICE_ATTR_WO(reload);
+ static DEVICE_ATTR(version, 0444, version_show, NULL);
+ static DEVICE_ATTR(processor_flags, 0444, pf_show, NULL);
+
+@@ -775,9 +697,9 @@ static struct subsys_interface mc_cpu_interface = {
+ };
+
+ /**
+- * mc_bp_resume - Update boot CPU microcode during resume.
++ * microcode_bsp_resume - Update boot CPU microcode during resume.
+ */
+-static void mc_bp_resume(void)
++void microcode_bsp_resume(void)
+ {
+ int cpu = smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+@@ -785,11 +707,11 @@ static void mc_bp_resume(void)
+ if (uci->valid && uci->mc)
+ microcode_ops->apply_microcode(cpu);
+ else if (!uci->mc)
+- reload_early_microcode();
++ reload_early_microcode(cpu);
+ }
+
+ static struct syscore_ops mc_syscore_ops = {
+- .resume = mc_bp_resume,
++ .resume = microcode_bsp_resume,
+ };
+
+ static int mc_cpu_starting(unsigned int cpu)
+@@ -821,7 +743,9 @@ static int mc_cpu_down_prep(unsigned int cpu)
+ }
+
+ static struct attribute *cpu_root_microcode_attrs[] = {
++#ifdef CONFIG_MICROCODE_LATE_LOADING
+ &dev_attr_reload.attr,
++#endif
+ NULL
+ };
+
+@@ -873,10 +797,6 @@ static int __init microcode_init(void)
+ goto out_driver;
+ }
+
+- error = microcode_dev_init();
+- if (error)
+- goto out_ucode_group;
+-
+ register_syscore_ops(&mc_syscore_ops);
+ cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+ mc_cpu_starting, NULL);
+@@ -887,10 +807,6 @@ static int __init microcode_init(void)
+
+ return 0;
+
+- out_ucode_group:
+- sysfs_remove_group(&cpu_subsys.dev_root->kobj,
+- &cpu_root_microcode_group);
+-
+ out_driver:
+ cpus_read_lock();
+ mutex_lock(&microcode_mutex);
+diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
+index 7e8e07bddd5fe..1ba590e6ef7bb 100644
+--- a/arch/x86/kernel/cpu/microcode/intel.c
++++ b/arch/x86/kernel/cpu/microcode/intel.c
+@@ -659,7 +659,6 @@ void load_ucode_intel_ap(void)
+ else
+ iup = &intel_ucode_patch;
+
+-reget:
+ if (!*iup) {
+ patch = __load_ucode_intel(&uci);
+ if (!patch)
+@@ -670,12 +669,7 @@ reget:
+
+ uci.mc = *iup;
+
+- if (apply_microcode_early(&uci, true)) {
+- /* Mixed-silicon system? Try to refetch the proper patch: */
+- *iup = NULL;
+-
+- goto reget;
+- }
++ apply_microcode_early(&uci, true);
+ }
+
+ static struct microcode_intel *find_patch(struct ucode_cpu_info *uci)
+diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
+index e095c28d27ae8..8d3c649a1769b 100644
+--- a/arch/x86/kernel/cpu/mshyperv.c
++++ b/arch/x86/kernel/cpu/mshyperv.c
+@@ -79,7 +79,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
+ inc_irq_stat(hyperv_stimer0_count);
+ if (hv_stimer0_handler)
+ hv_stimer0_handler();
+- add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
++ add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
+ ack_APIC_irq();
+
+ set_irq_regs(old_regs);
+@@ -163,12 +163,22 @@ static uint32_t __init ms_hyperv_platform(void)
+ cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
+ &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
+
+- if (eax >= HYPERV_CPUID_MIN &&
+- eax <= HYPERV_CPUID_MAX &&
+- !memcmp("Microsoft Hv", hyp_signature, 12))
+- return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
++ if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
++ memcmp("Microsoft Hv", hyp_signature, 12))
++ return 0;
+
+- return 0;
++ /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
++ eax = cpuid_eax(HYPERV_CPUID_FEATURES);
++ if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
++ pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
++ return 0;
++ }
++ if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
++ pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
++ return 0;
++ }
++
++ return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+ }
+
+ static unsigned char hv_get_nmi_reason(void)
+@@ -279,12 +289,16 @@ static void __init ms_hyperv_init_platform(void)
+ * To mirror what Windows does we should extract CPU management
+ * features and use the ReservedIdentityBit to detect if Linux is the
+ * root partition. But that requires negotiating CPU management
+- * interface (a process to be finalized).
++ * interface (a process to be finalized). For now, use the privilege
++ * flag as the indicator for running as root.
+ *
+- * For now, use the privilege flag as the indicator for running as
+- * root.
++ * Hyper-V should never specify running as root and as a Confidential
++ * VM. But to protect against a compromised/malicious Hyper-V trying
++ * to exploit root behavior to expose Confidential VM memory, ignore
++ * the root partition setting if also a Confidential VM.
+ */
+- if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
++ if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
++ !(ms_hyperv.priv_high & HV_ISOLATION)) {
+ hv_root_partition = true;
+ pr_info("Hyper-V: running as root partition\n");
+ }
+diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
+index bb1c3f5f60c81..a5c51a14fbce8 100644
+--- a/arch/x86/kernel/cpu/resctrl/core.c
++++ b/arch/x86/kernel/cpu/resctrl/core.c
+@@ -66,9 +66,6 @@ struct rdt_hw_resource rdt_resources_all[] = {
+ .rid = RDT_RESOURCE_L3,
+ .name = "L3",
+ .cache_level = 3,
+- .cache = {
+- .min_cbm_bits = 1,
+- },
+ .domains = domain_init(RDT_RESOURCE_L3),
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+@@ -83,9 +80,6 @@ struct rdt_hw_resource rdt_resources_all[] = {
+ .rid = RDT_RESOURCE_L2,
+ .name = "L2",
+ .cache_level = 2,
+- .cache = {
+- .min_cbm_bits = 1,
+- },
+ .domains = domain_init(RDT_RESOURCE_L2),
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+@@ -877,6 +871,7 @@ static __init void rdt_init_res_defs_intel(void)
+ r->cache.arch_has_sparse_bitmaps = false;
+ r->cache.arch_has_empty_bitmaps = false;
+ r->cache.arch_has_per_cpu_cfg = false;
++ r->cache.min_cbm_bits = 1;
+ } else if (r->rid == RDT_RESOURCE_MBA) {
+ hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
+ hw_res->msr_update = mba_wrmsr_intel;
+@@ -897,6 +892,7 @@ static __init void rdt_init_res_defs_amd(void)
+ r->cache.arch_has_sparse_bitmaps = true;
+ r->cache.arch_has_empty_bitmaps = true;
+ r->cache.arch_has_per_cpu_cfg = true;
++ r->cache.min_cbm_bits = 0;
+ } else if (r->rid == RDT_RESOURCE_MBA) {
+ hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
+ hw_res->msr_update = mba_wrmsr_amd;
+diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+index 87666275eed92..000e1467b4cde 100644
+--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
++++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+@@ -353,7 +353,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ {
+ struct resctrl_schema *s;
+ struct rdtgroup *rdtgrp;
+- struct rdt_domain *dom;
+ struct rdt_resource *r;
+ char *tok, *resname;
+ int ret = 0;
+@@ -382,10 +381,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ goto out;
+ }
+
+- list_for_each_entry(s, &resctrl_schema_all, list) {
+- list_for_each_entry(dom, &s->res->domains, list)
+- memset(dom->staged_config, 0, sizeof(dom->staged_config));
+- }
++ rdt_staged_configs_clear();
+
+ while ((tok = strsep(&buf, "\n")) != NULL) {
+ resname = strim(strsep(&tok, ":"));
+@@ -422,6 +418,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ }
+
+ out:
++ rdt_staged_configs_clear();
+ rdtgroup_kn_unlock(of->kn);
+ cpus_read_unlock();
+ return ret ?: nbytes;
+diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
+index 1d647188a43bf..218d88800565a 100644
+--- a/arch/x86/kernel/cpu/resctrl/internal.h
++++ b/arch/x86/kernel/cpu/resctrl/internal.h
+@@ -550,5 +550,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
+ void __check_limbo(struct rdt_domain *d, bool force_free);
+ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
+ void __init thread_throttle_mode_init(void);
++void rdt_staged_configs_clear(void);
+
+ #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
+diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+index db813f819ad6c..4d8398986f784 100644
+--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
++++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+@@ -420,6 +420,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
+ struct pseudo_lock_region *plr = rdtgrp->plr;
+ u32 rmid_p, closid_p;
+ unsigned long i;
++ u64 saved_msr;
+ #ifdef CONFIG_KASAN
+ /*
+ * The registers used for local register variables are also used
+@@ -463,6 +464,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
+ * the buffer and evict pseudo-locked memory read earlier from the
+ * cache.
+ */
++ saved_msr = __rdmsr(MSR_MISC_FEATURE_CONTROL);
+ __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ closid_p = this_cpu_read(pqr_state.cur_closid);
+ rmid_p = this_cpu_read(pqr_state.cur_rmid);
+@@ -514,7 +516,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
+ __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
+
+ /* Re-enable the hardware prefetcher(s) */
+- wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
++ wrmsrl(MSR_MISC_FEATURE_CONTROL, saved_msr);
+ local_irq_enable();
+
+ plr->thread_done = 1;
+@@ -871,6 +873,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
+ static int measure_cycles_lat_fn(void *_plr)
+ {
+ struct pseudo_lock_region *plr = _plr;
++ u32 saved_low, saved_high;
+ unsigned long i;
+ u64 start, end;
+ void *mem_r;
+@@ -879,6 +882,7 @@ static int measure_cycles_lat_fn(void *_plr)
+ /*
+ * Disable hardware prefetchers.
+ */
++ rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ mem_r = READ_ONCE(plr->kmem);
+ /*
+@@ -895,7 +899,7 @@ static int measure_cycles_lat_fn(void *_plr)
+ end = rdtsc_ordered();
+ trace_pseudo_lock_mem_latency((u32)(end - start));
+ }
+- wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
++ wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
+ local_irq_enable();
+ plr->thread_done = 1;
+ wake_up_interruptible(&plr->lock_thread_wq);
+@@ -940,6 +944,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
+ u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
+ struct perf_event *miss_event, *hit_event;
+ int hit_pmcnum, miss_pmcnum;
++ u32 saved_low, saved_high;
+ unsigned int line_size;
+ unsigned int size;
+ unsigned long i;
+@@ -973,6 +978,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
+ /*
+ * Disable hardware prefetchers.
+ */
++ rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+
+ /* Initialize rest of local variables */
+@@ -1031,7 +1037,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr,
+ */
+ rmb();
+ /* Re-enable hardware prefetchers */
+- wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
++ wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high);
+ local_irq_enable();
+ out_hit:
+ perf_event_release_kernel(hit_event);
+diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+index b57b3db9a6a78..2ec16477eb3e1 100644
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -78,6 +78,19 @@ void rdt_last_cmd_printf(const char *fmt, ...)
+ va_end(ap);
+ }
+
++void rdt_staged_configs_clear(void)
++{
++ struct rdt_resource *r;
++ struct rdt_domain *dom;
++
++ lockdep_assert_held(&rdtgroup_mutex);
++
++ for_each_alloc_capable_rdt_resource(r) {
++ list_for_each_entry(dom, &r->domains, list)
++ memset(dom->staged_config, 0, sizeof(dom->staged_config));
++ }
++}
++
+ /*
+ * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
+ * we can keep a bitmap of free CLOSIDs in a single integer.
+@@ -314,7 +327,7 @@ static void update_cpu_closid_rmid(void *info)
+ * executing task might have its own closid selected. Just reuse
+ * the context switch code.
+ */
+- resctrl_sched_in();
++ resctrl_sched_in(current);
+ }
+
+ /*
+@@ -535,7 +548,7 @@ static void _update_task_closid_rmid(void *task)
+ * Otherwise, the MSR is updated when the task is scheduled in.
+ */
+ if (task == current)
+- resctrl_sched_in();
++ resctrl_sched_in(task);
+ }
+
+ static void update_task_closid_rmid(struct task_struct *t)
+@@ -580,8 +593,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
+ /*
+ * Ensure the task's closid and rmid are written before determining if
+ * the task is current that will decide if it will be interrupted.
++ * This pairs with the full barrier between the rq->curr update and
++ * resctrl_sched_in() during context switch.
+ */
+- barrier();
++ smp_mb();
+
+ /*
+ * By now, the task's closid and rmid are set. If the task is current
+@@ -716,11 +731,15 @@ unlock:
+ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
+ {
+ struct task_struct *p, *t;
++ pid_t pid;
+
+ rcu_read_lock();
+ for_each_process_thread(p, t) {
+- if (is_closid_match(t, r) || is_rmid_match(t, r))
+- seq_printf(s, "%d\n", t->pid);
++ if (is_closid_match(t, r) || is_rmid_match(t, r)) {
++ pid = task_pid_vnr(t);
++ if (pid)
++ seq_printf(s, "%d\n", pid);
++ }
+ }
+ rcu_read_unlock();
+ }
+@@ -2363,6 +2382,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
+ WRITE_ONCE(t->closid, to->closid);
+ WRITE_ONCE(t->rmid, to->mon.rmid);
+
++ /*
++ * Order the closid/rmid stores above before the loads
++ * in task_curr(). This pairs with the full barrier
++ * between the rq->curr update and resctrl_sched_in()
++ * during context switch.
++ */
++ smp_mb();
++
+ /*
+ * If the task is on a CPU, set the CPU in the mask.
+ * The detection is inaccurate as tasks might move or
+@@ -2803,7 +2830,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+ {
+ struct resctrl_schema *s;
+ struct rdt_resource *r;
+- int ret;
++ int ret = 0;
++
++ rdt_staged_configs_clear();
+
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
+@@ -2812,20 +2841,22 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+ } else {
+ ret = rdtgroup_init_cat(s, rdtgrp->closid);
+ if (ret < 0)
+- return ret;
++ goto out;
+ }
+
+ ret = resctrl_arch_update_domains(r, rdtgrp->closid);
+ if (ret < 0) {
+ rdt_last_cmd_puts("Failed to initialize allocations\n");
+- return ret;
++ goto out;
+ }
+
+ }
+
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+
+- return 0;
++out:
++ rdt_staged_configs_clear();
++ return ret;
+ }
+
+ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index 21d1f062895a8..06bfef1c4175e 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -26,6 +26,7 @@ struct cpuid_bit {
+ static const struct cpuid_bit cpuid_bits[] = {
+ { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
+ { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
++ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
+index 001808e3901cc..fa5777af8da1a 100644
+--- a/arch/x86/kernel/cpu/sgx/encl.c
++++ b/arch/x86/kernel/cpu/sgx/encl.c
+@@ -12,6 +12,116 @@
+ #include "encls.h"
+ #include "sgx.h"
+
++#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
++/*
++ * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
++ * determine the page index associated with the first PCMD entry
++ * within a PCMD page.
++ */
++#define PCMD_FIRST_MASK GENMASK(4, 0)
++
++/**
++ * reclaimer_writing_to_pcmd() - Query if any enclave page associated with
++ * a PCMD page is in process of being reclaimed.
++ * @encl: Enclave to which PCMD page belongs
++ * @start_addr: Address of enclave page using first entry within the PCMD page
++ *
++ * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
++ * stored. The PCMD data of a reclaimed enclave page contains enough
++ * information for the processor to verify the page at the time
++ * it is loaded back into the Enclave Page Cache (EPC).
++ *
++ * The backing storage to which enclave pages are reclaimed is laid out as
++ * follows:
++ * Encrypted enclave pages:SECS page:PCMD pages
++ *
++ * Each PCMD page contains the PCMD metadata of
++ * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
++ *
++ * A PCMD page can only be truncated if it is (a) empty, and (b) not in the
++ * process of getting data (and thus soon being non-empty). (b) is tested with
++ * a check if an enclave page sharing the PCMD page is in the process of being
++ * reclaimed.
++ *
++ * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
++ * intends to reclaim that enclave page - it means that the PCMD page
++ * associated with that enclave page is about to get some data and thus
++ * even if the PCMD page is empty, it should not be truncated.
++ *
++ * Context: Enclave mutex (&sgx_encl->lock) must be held.
++ * Return: 1 if the reclaimer is about to write to the PCMD page
++ * 0 if the reclaimer has no intention to write to the PCMD page
++ */
++static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
++ unsigned long start_addr)
++{
++ int reclaimed = 0;
++ int i;
++
++ /*
++ * PCMD_FIRST_MASK is based on number of PCMD entries within
++ * PCMD page being 32.
++ */
++ BUILD_BUG_ON(PCMDS_PER_PAGE != 32);
++
++ for (i = 0; i < PCMDS_PER_PAGE; i++) {
++ struct sgx_encl_page *entry;
++ unsigned long addr;
++
++ addr = start_addr + i * PAGE_SIZE;
++
++ /*
++ * Stop when reaching the SECS page - it does not
++ * have a page_array entry and its reclaim is
++ * started and completed with enclave mutex held so
++ * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED
++ * flag.
++ */
++ if (addr == encl->base + encl->size)
++ break;
++
++ entry = xa_load(&encl->page_array, PFN_DOWN(addr));
++ if (!entry)
++ continue;
++
++ /*
++ * VA page slot ID uses same bit as the flag so it is important
++ * to ensure that the page is not already in backing store.
++ */
++ if (entry->epc_page &&
++ (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
++ reclaimed = 1;
++ break;
++ }
++ }
++
++ return reclaimed;
++}
++
++/*
++ * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
++ * follow right after the EPC data in the backing storage. In addition to the
++ * visible enclave pages, there's one extra page slot for SECS, before PCMD
++ * structs.
++ */
++static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
++ unsigned long page_index)
++{
++ pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
++
++ return epc_end_off + page_index * sizeof(struct sgx_pcmd);
++}
++
++/*
++ * Free a page from the backing storage in the given page index.
++ */
++static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
++{
++ struct inode *inode = file_inode(encl->backing);
++
++ shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
++}
++
+ /*
+ * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
+ * Pages" in the SDM.
+@@ -22,9 +132,12 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
+ {
+ unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
+ struct sgx_encl *encl = encl_page->encl;
++ pgoff_t page_index, page_pcmd_off;
++ unsigned long pcmd_first_page;
+ struct sgx_pageinfo pginfo;
+ struct sgx_backing b;
+- pgoff_t page_index;
++ bool pcmd_page_empty;
++ u8 *pcmd_page;
+ int ret;
+
+ if (secs_page)
+@@ -32,14 +145,21 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
+ else
+ page_index = PFN_DOWN(encl->size);
+
+- ret = sgx_encl_get_backing(encl, page_index, &b);
++ /*
++ * Address of enclave page using the first entry within the PCMD page.
++ */
++ pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;
++
++ page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
++
++ ret = sgx_encl_lookup_backing(encl, page_index, &b);
+ if (ret)
+ return ret;
+
+ pginfo.addr = encl_page->desc & PAGE_MASK;
+ pginfo.contents = (unsigned long)kmap_atomic(b.contents);
+- pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) +
+- b.pcmd_offset;
++ pcmd_page = kmap_atomic(b.pcmd);
++ pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
+
+ if (secs_page)
+ pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
+@@ -55,10 +175,32 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
+ ret = -EFAULT;
+ }
+
+- kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset));
++ memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
++ set_page_dirty(b.pcmd);
++
++ /*
++ * The area for the PCMD in the page was zeroed above. Check if the
++ * whole page is now empty meaning that all PCMD's have been zeroed:
++ */
++ pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
++
++ kunmap_atomic(pcmd_page);
+ kunmap_atomic((void *)(unsigned long)pginfo.contents);
+
+- sgx_encl_put_backing(&b, false);
++ get_page(b.pcmd);
++ sgx_encl_put_backing(&b);
++
++ sgx_encl_truncate_backing_page(encl, page_index);
++
++ if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
++ sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
++ pcmd_page = kmap_atomic(b.pcmd);
++ if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
++ pr_warn("PCMD page not empty after truncate.\n");
++ kunmap_atomic(pcmd_page);
++ }
++
++ put_page(b.pcmd);
+
+ return ret;
+ }
+@@ -391,11 +533,15 @@ const struct vm_operations_struct sgx_vm_ops = {
+ void sgx_encl_release(struct kref *ref)
+ {
+ struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
++ unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
+ struct sgx_va_page *va_page;
+ struct sgx_encl_page *entry;
+- unsigned long index;
++ unsigned long count = 0;
+
+- xa_for_each(&encl->page_array, index, entry) {
++ XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));
++
++ xas_lock(&xas);
++ xas_for_each(&xas, entry, max_page_index) {
+ if (entry->epc_page) {
+ /*
+ * The page and its radix tree entry cannot be freed
+@@ -410,7 +556,20 @@ void sgx_encl_release(struct kref *ref)
+ }
+
+ kfree(entry);
++ /*
++ * Invoke scheduler on every XA_CHECK_SCHED iteration
++ * to prevent soft lockups.
++ */
++ if (!(++count % XA_CHECK_SCHED)) {
++ xas_pause(&xas);
++ xas_unlock(&xas);
++
++ cond_resched();
++
++ xas_lock(&xas);
++ }
+ }
++ xas_unlock(&xas);
+
+ xa_destroy(&encl->page_array);
+
+@@ -574,10 +733,10 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
+ * 0 on success,
+ * -errno otherwise.
+ */
+-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
++static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+ struct sgx_backing *backing)
+ {
+- pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5);
++ pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+ struct page *contents;
+ struct page *pcmd;
+
+@@ -585,7 +744,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+ if (IS_ERR(contents))
+ return PTR_ERR(contents);
+
+- pcmd = sgx_encl_get_backing_page(encl, pcmd_index);
++ pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
+ if (IS_ERR(pcmd)) {
+ put_page(contents);
+ return PTR_ERR(pcmd);
+@@ -594,25 +753,118 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+ backing->page_index = page_index;
+ backing->contents = contents;
+ backing->pcmd = pcmd;
+- backing->pcmd_offset =
+- (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) *
+- sizeof(struct sgx_pcmd);
++ backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
+
+ return 0;
+ }
+
++/*
++ * When called from ksgxd, returns the mem_cgroup of a struct mm stored
++ * in the enclave's mm_list. When not called from ksgxd, just returns
++ * the mem_cgroup of the current task.
++ */
++static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
++{
++ struct mem_cgroup *memcg = NULL;
++ struct sgx_encl_mm *encl_mm;
++ int idx;
++
++ /*
++ * If called from normal task context, return the mem_cgroup
++ * of the current task's mm. The remainder of the handling is for
++ * ksgxd.
++ */
++ if (!current_is_ksgxd())
++ return get_mem_cgroup_from_mm(current->mm);
++
++ /*
++ * Search the enclave's mm_list to find an mm associated with
++ * this enclave to charge the allocation to.
++ */
++ idx = srcu_read_lock(&encl->srcu);
++
++ list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
++ if (!mmget_not_zero(encl_mm->mm))
++ continue;
++
++ memcg = get_mem_cgroup_from_mm(encl_mm->mm);
++
++ mmput_async(encl_mm->mm);
++
++ break;
++ }
++
++ srcu_read_unlock(&encl->srcu, idx);
++
++ /*
++ * In the rare case that there isn't an mm associated with
++ * the enclave, set memcg to the current active mem_cgroup.
++ * This will be the root mem_cgroup if there is no active
++ * mem_cgroup.
++ */
++ if (!memcg)
++ return get_mem_cgroup_from_mm(NULL);
++
++ return memcg;
++}
++
+ /**
+- * sgx_encl_put_backing() - Unpin the backing storage
++ * sgx_encl_alloc_backing() - allocate a new backing storage page
++ * @encl: an enclave pointer
++ * @page_index: enclave page index
+ * @backing: data for accessing backing storage for the page
+- * @do_write: mark pages dirty
++ *
++ * When called from ksgxd, sets the active memcg from one of the
++ * mms in the enclave's mm_list prior to any backing page allocation,
++ * in order to ensure that shmem page allocations are charged to the
++ * enclave.
++ *
++ * Return:
++ * 0 on success,
++ * -errno otherwise.
+ */
+-void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write)
++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing)
+ {
+- if (do_write) {
+- set_page_dirty(backing->pcmd);
+- set_page_dirty(backing->contents);
+- }
++ struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
++ struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
++ int ret;
++
++ ret = sgx_encl_get_backing(encl, page_index, backing);
++
++ set_active_memcg(memcg);
++ mem_cgroup_put(encl_memcg);
+
++ return ret;
++}
++
++/**
++ * sgx_encl_lookup_backing() - retrieve an existing backing storage page
++ * @encl: an enclave pointer
++ * @page_index: enclave page index
++ * @backing: data for accessing backing storage for the page
++ *
++ * Retrieve a backing page for loading data back into an EPC page with ELDU.
++ * It is the caller's responsibility to ensure that it is appropriate to use
++ * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
++ * not used correctly, this will cause an allocation which is not accounted for.
++ *
++ * Return:
++ * 0 on success,
++ * -errno otherwise.
++ */
++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing)
++{
++ return sgx_encl_get_backing(encl, page_index, backing);
++}
++
++/**
++ * sgx_encl_put_backing() - Unpin the backing storage
++ * @backing: data for accessing backing storage for the page
++ */
++void sgx_encl_put_backing(struct sgx_backing *backing)
++{
+ put_page(backing->pcmd);
+ put_page(backing->contents);
+ }
+diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
+index fec43ca65065b..332ef3568267e 100644
+--- a/arch/x86/kernel/cpu/sgx/encl.h
++++ b/arch/x86/kernel/cpu/sgx/encl.h
+@@ -103,11 +103,14 @@ static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
+ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
+ unsigned long end, unsigned long vm_flags);
+
++bool current_is_ksgxd(void);
+ void sgx_encl_release(struct kref *ref);
+ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
+-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+- struct sgx_backing *backing);
+-void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing);
++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing);
++void sgx_encl_put_backing(struct sgx_backing *backing);
+ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
+ struct sgx_encl_page *page);
+
+diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
+index 83df20e3e6333..217777c029eea 100644
+--- a/arch/x86/kernel/cpu/sgx/ioctl.c
++++ b/arch/x86/kernel/cpu/sgx/ioctl.c
+@@ -372,6 +372,29 @@ err_out_free:
+ return ret;
+ }
+
++/*
++ * Ensure user provided offset and length values are valid for
++ * an enclave.
++ */
++static int sgx_validate_offset_length(struct sgx_encl *encl,
++ unsigned long offset,
++ unsigned long length)
++{
++ if (!IS_ALIGNED(offset, PAGE_SIZE))
++ return -EINVAL;
++
++ if (!length || !IS_ALIGNED(length, PAGE_SIZE))
++ return -EINVAL;
++
++ if (offset + length < offset)
++ return -EINVAL;
++
++ if (offset + length - PAGE_SIZE >= encl->size)
++ return -EINVAL;
++
++ return 0;
++}
++
+ /**
+ * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
+ * @encl: an enclave pointer
+@@ -425,14 +448,10 @@ static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
+ if (copy_from_user(&add_arg, arg, sizeof(add_arg)))
+ return -EFAULT;
+
+- if (!IS_ALIGNED(add_arg.offset, PAGE_SIZE) ||
+- !IS_ALIGNED(add_arg.src, PAGE_SIZE))
+- return -EINVAL;
+-
+- if (!add_arg.length || add_arg.length & (PAGE_SIZE - 1))
++ if (!IS_ALIGNED(add_arg.src, PAGE_SIZE))
+ return -EINVAL;
+
+- if (add_arg.offset + add_arg.length - PAGE_SIZE >= encl->size)
++ if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length))
+ return -EINVAL;
+
+ if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo,
+diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
+index 63d3de02bbccb..4ea48acf55faa 100644
+--- a/arch/x86/kernel/cpu/sgx/main.c
++++ b/arch/x86/kernel/cpu/sgx/main.c
+@@ -28,8 +28,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
+ static LIST_HEAD(sgx_active_page_list);
+ static DEFINE_SPINLOCK(sgx_reclaimer_lock);
+
+-/* The free page list lock protected variables prepend the lock. */
+-static unsigned long sgx_nr_free_pages;
++static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
+
+ /* Nodes with one or more EPC sections. */
+ static nodemask_t sgx_numa_mask;
+@@ -47,9 +46,13 @@ static LIST_HEAD(sgx_dirty_page_list);
+ * Reset post-kexec EPC pages to the uninitialized state. The pages are removed
+ * from the input list, and made available for the page allocator. SECS pages
+ * prepending their children in the input list are left intact.
++ *
++ * Return 0 when sanitization was successful or kthread was stopped, and the
++ * number of unsanitized pages otherwise.
+ */
+-static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
++static unsigned long __sgx_sanitize_pages(struct list_head *dirty_page_list)
+ {
++ unsigned long left_dirty = 0;
+ struct sgx_epc_page *page;
+ LIST_HEAD(dirty);
+ int ret;
+@@ -57,7 +60,7 @@ static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
+ /* dirty_page_list is thread-local, no need for a lock: */
+ while (!list_empty(dirty_page_list)) {
+ if (kthread_should_stop())
+- return;
++ return 0;
+
+ page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
+
+@@ -72,12 +75,14 @@ static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
+ } else {
+ /* The page is not yet clean - move to the dirty list. */
+ list_move_tail(&page->list, &dirty);
++ left_dirty++;
+ }
+
+ cond_resched();
+ }
+
+ list_splice(&dirty, dirty_page_list);
++ return left_dirty;
+ }
+
+ static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
+@@ -171,6 +176,8 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
+ backing->pcmd_offset;
+
+ ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
++ set_page_dirty(backing->pcmd);
++ set_page_dirty(backing->contents);
+
+ kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
+ backing->pcmd_offset));
+@@ -288,9 +295,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
+ sgx_encl_ewb(epc_page, backing);
+ encl_page->epc_page = NULL;
+ encl->secs_child_cnt--;
++ sgx_encl_put_backing(backing);
+
+ if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
+- ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
++ ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size),
+ &secs_backing);
+ if (ret)
+ goto out;
+@@ -300,7 +308,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
+ sgx_encl_free_epc_page(encl->secs.epc_page);
+ encl->secs.epc_page = NULL;
+
+- sgx_encl_put_backing(&secs_backing, true);
++ sgx_encl_put_backing(&secs_backing);
+ }
+
+ out:
+@@ -361,11 +369,14 @@ static void sgx_reclaim_pages(void)
+ goto skip;
+
+ page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
+- ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
+- if (ret)
+- goto skip;
+
+ mutex_lock(&encl_page->encl->lock);
++ ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]);
++ if (ret) {
++ mutex_unlock(&encl_page->encl->lock);
++ goto skip;
++ }
++
+ encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
+ mutex_unlock(&encl_page->encl->lock);
+ continue;
+@@ -393,7 +404,6 @@ skip:
+
+ encl_page = epc_page->owner;
+ sgx_reclaimer_write(epc_page, &backing[i]);
+- sgx_encl_put_backing(&backing[i], true);
+
+ kref_put(&encl_page->encl->refcount, sgx_encl_release);
+ epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+@@ -403,14 +413,15 @@ skip:
+
+ spin_lock(&node->lock);
+ list_add_tail(&epc_page->list, &node->free_page_list);
+- sgx_nr_free_pages++;
+ spin_unlock(&node->lock);
++ atomic_long_inc(&sgx_nr_free_pages);
+ }
+ }
+
+ static bool sgx_should_reclaim(unsigned long watermark)
+ {
+- return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
++ return atomic_long_read(&sgx_nr_free_pages) < watermark &&
++ !list_empty(&sgx_active_page_list);
+ }
+
+ static int ksgxd(void *p)
+@@ -422,10 +433,7 @@ static int ksgxd(void *p)
+ * required for SECS pages, whose child pages blocked EREMOVE.
+ */
+ __sgx_sanitize_pages(&sgx_dirty_page_list);
+- __sgx_sanitize_pages(&sgx_dirty_page_list);
+-
+- /* sanity check: */
+- WARN_ON(!list_empty(&sgx_dirty_page_list));
++ WARN_ON(__sgx_sanitize_pages(&sgx_dirty_page_list));
+
+ while (!kthread_should_stop()) {
+ if (try_to_freeze())
+@@ -457,6 +465,11 @@ static bool __init sgx_page_reclaimer_init(void)
+ return true;
+ }
+
++bool current_is_ksgxd(void)
++{
++ return current == ksgxd_tsk;
++}
++
+ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
+ {
+ struct sgx_numa_node *node = &sgx_numa_nodes[nid];
+@@ -471,9 +484,9 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
+
+ page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
+ list_del_init(&page->list);
+- sgx_nr_free_pages--;
+
+ spin_unlock(&node->lock);
++ atomic_long_dec(&sgx_nr_free_pages);
+
+ return page;
+ }
+@@ -625,9 +638,9 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
+ spin_lock(&node->lock);
+
+ list_add_tail(&page->list, &node->free_page_list);
+- sgx_nr_free_pages++;
+
+ spin_unlock(&node->lock);
++ atomic_long_inc(&sgx_nr_free_pages);
+ }
+
+ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
+diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
+index 64511c4a52001..1550910201238 100644
+--- a/arch/x86/kernel/cpu/sgx/virt.c
++++ b/arch/x86/kernel/cpu/sgx/virt.c
+@@ -167,6 +167,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
+ continue;
+
+ xa_erase(&vepc->page_array, index);
++ cond_resched();
+ }
+
+ /*
+@@ -185,6 +186,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
+ list_add_tail(&epc_page->list, &secs_pages);
+
+ xa_erase(&vepc->page_array, index);
++ cond_resched();
+ }
+
+ /*
+@@ -206,6 +208,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
+
+ if (sgx_vepc_free_page(epc_page))
+ list_add_tail(&epc_page->list, &secs_pages);
++ cond_resched();
+ }
+
+ if (!list_empty(&secs_pages))
+diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
+index 132a2de44d2fe..0270925fe013b 100644
+--- a/arch/x86/kernel/cpu/topology.c
++++ b/arch/x86/kernel/cpu/topology.c
+@@ -79,7 +79,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
+ * initial apic id, which also represents 32-bit extended x2apic id.
+ */
+ c->initial_apicid = edx;
+- smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
++ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
+ #endif
+ return 0;
+ }
+@@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+ unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
+ unsigned int core_select_mask, core_level_siblings;
+ unsigned int die_select_mask, die_level_siblings;
++ unsigned int pkg_mask_width;
+ bool die_level_present = false;
+ int leaf;
+
+@@ -108,13 +109,14 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+ */
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ c->initial_apicid = edx;
+- core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
++ core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
++ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
+ core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+- die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
++ pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+
+ sub_index = 1;
+- do {
++ while (true) {
+ cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
+
+ /*
+@@ -132,10 +134,15 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ }
+
++ if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE)
++ pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
++ else
++ break;
++
+ sub_index++;
+- } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
++ }
+
+- core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
++ core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width;
+ die_select_mask = (~(-1 << die_plus_mask_width)) >>
+ core_plus_mask_width;
+
+@@ -148,7 +155,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
+ }
+
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
+- die_plus_mask_width);
++ pkg_mask_width);
+ /*
+ * Reinit the apicid, now that we have extended initial_apicid.
+ */
+diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
+index 9c7a5f0492929..8009c8346d8f8 100644
+--- a/arch/x86/kernel/cpu/tsx.c
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -19,7 +19,7 @@
+
+ enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
+
+-void tsx_disable(void)
++static void tsx_disable(void)
+ {
+ u64 tsx;
+
+@@ -39,7 +39,7 @@ void tsx_disable(void)
+ wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+ }
+
+-void tsx_enable(void)
++static void tsx_enable(void)
+ {
+ u64 tsx;
+
+@@ -58,24 +58,6 @@ void tsx_enable(void)
+ wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+ }
+
+-static bool __init tsx_ctrl_is_supported(void)
+-{
+- u64 ia32_cap = x86_read_arch_cap_msr();
+-
+- /*
+- * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
+- * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+- *
+- * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+- * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+- * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+- * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+- * tsx= cmdline requests will do nothing on CPUs without
+- * MSR_IA32_TSX_CTRL support.
+- */
+- return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+-}
+-
+ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+ {
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+@@ -84,7 +66,45 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+ return TSX_CTRL_ENABLE;
+ }
+
+-void tsx_clear_cpuid(void)
++/*
++ * Disabling TSX is not a trivial business.
++ *
++ * First of all, there's a CPUID bit: X86_FEATURE_RTM_ALWAYS_ABORT
++ * which says that TSX is practically disabled (all transactions are
++ * aborted by default). When that bit is set, the kernel unconditionally
++ * disables TSX.
++ *
++ * In order to do that, however, it needs to dance a bit:
++ *
++ * 1. The first method to disable it is through MSR_TSX_FORCE_ABORT and
++ * the MSR is present only when *two* CPUID bits are set:
++ *
++ * - X86_FEATURE_RTM_ALWAYS_ABORT
++ * - X86_FEATURE_TSX_FORCE_ABORT
++ *
++ * 2. The second method is for CPUs which do not have the above-mentioned
++ * MSR: those use a different MSR - MSR_IA32_TSX_CTRL and disable TSX
++ * through that one. Those CPUs can also have the initially mentioned
++ * CPUID bit X86_FEATURE_RTM_ALWAYS_ABORT set and for those the same strategy
++ * applies: TSX gets disabled unconditionally.
++ *
++ * When either of the two methods are present, the kernel disables TSX and
++ * clears the respective RTM and HLE feature flags.
++ *
++ * An additional twist in the whole thing presents late microcode loading
++ * which, when done, may cause for the X86_FEATURE_RTM_ALWAYS_ABORT CPUID
++ * bit to be set after the update.
++ *
++ * A subsequent hotplug operation on any logical CPU except the BSP will
++ * cause for the supported CPUID feature bits to get re-detected and, if
++ * RTM and HLE get cleared all of a sudden, but, userspace did consult
++ * them before the update, then funny explosions will happen. Long story
++ * short: the kernel doesn't modify CPUID feature bits after booting.
++ *
++ * That's why, this function's call in init_intel() doesn't clear the
++ * feature flags.
++ */
++static void tsx_clear_cpuid(void)
+ {
+ u64 msr;
+
+@@ -97,6 +117,40 @@ void tsx_clear_cpuid(void)
+ rdmsrl(MSR_TSX_FORCE_ABORT, msr);
+ msr |= MSR_TFA_TSX_CPUID_CLEAR;
+ wrmsrl(MSR_TSX_FORCE_ABORT, msr);
++ } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) {
++ rdmsrl(MSR_IA32_TSX_CTRL, msr);
++ msr |= TSX_CTRL_CPUID_CLEAR;
++ wrmsrl(MSR_IA32_TSX_CTRL, msr);
++ }
++}
++
++/*
++ * Disable TSX development mode
++ *
++ * When the microcode released in Feb 2022 is applied, TSX will be disabled by
++ * default on some processors. MSR 0x122 (TSX_CTRL) and MSR 0x123
++ * (IA32_MCU_OPT_CTRL) can be used to re-enable TSX for development, doing so is
++ * not recommended for production deployments. In particular, applying MD_CLEAR
++ * flows for mitigation of the Intel TSX Asynchronous Abort (TAA) transient
++ * execution attack may not be effective on these processors when Intel TSX is
++ * enabled with updated microcode.
++ */
++static void tsx_dev_mode_disable(void)
++{
++ u64 mcu_opt_ctrl;
++
++ /* Check if RTM_ALLOW exists */
++ if (!boot_cpu_has_bug(X86_BUG_TAA) ||
++ !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) ||
++ !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL))
++ return;
++
++ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
++
++ if (mcu_opt_ctrl & RTM_ALLOW) {
++ mcu_opt_ctrl &= ~RTM_ALLOW;
++ wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl);
++ setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
+ }
+ }
+
+@@ -105,14 +159,14 @@ void __init tsx_init(void)
+ char arg[5] = {};
+ int ret;
+
++ tsx_dev_mode_disable();
++
+ /*
+- * Hardware will always abort a TSX transaction if both CPUID bits
+- * RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are set. In this case, it is
+- * better not to enumerate CPUID.RTM and CPUID.HLE bits. Clear them
+- * here.
++ * Hardware will always abort a TSX transaction when the CPUID bit
++ * RTM_ALWAYS_ABORT is set. In this case, it is better not to enumerate
++ * CPUID.RTM and CPUID.HLE bits. Clear them here.
+ */
+- if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) &&
+- boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
++ if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) {
+ tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT;
+ tsx_clear_cpuid();
+ setup_clear_cpu_cap(X86_FEATURE_RTM);
+@@ -120,7 +174,20 @@ void __init tsx_init(void)
+ return;
+ }
+
+- if (!tsx_ctrl_is_supported()) {
++ /*
++ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
++ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
++ *
++ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
++ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
++ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
++ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
++ * tsx= cmdline requests will do nothing on CPUs without
++ * MSR_IA32_TSX_CTRL support.
++ */
++ if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) {
++ setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL);
++ } else {
+ tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED;
+ return;
+ }
+@@ -175,3 +242,16 @@ void __init tsx_init(void)
+ setup_force_cpu_cap(X86_FEATURE_HLE);
+ }
+ }
++
++void tsx_ap_init(void)
++{
++ tsx_dev_mode_disable();
++
++ if (tsx_ctrl_state == TSX_CTRL_ENABLE)
++ tsx_enable();
++ else if (tsx_ctrl_state == TSX_CTRL_DISABLE)
++ tsx_disable();
++ else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT)
++ /* See comment over that function for more details. */
++ tsx_clear_cpuid();
++}
+diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
+index e8326a8d1c5dc..97b9212a6aabf 100644
+--- a/arch/x86/kernel/crash.c
++++ b/arch/x86/kernel/crash.c
+@@ -37,7 +37,6 @@
+ #include <linux/kdebug.h>
+ #include <asm/cpu.h>
+ #include <asm/reboot.h>
+-#include <asm/virtext.h>
+ #include <asm/intel_pt.h>
+ #include <asm/crash.h>
+ #include <asm/cmdline.h>
+@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
+ */
+ cpu_crash_vmclear_loaded_vmcss();
+
+- /* Disable VMX or SVM if needed.
+- *
+- * We need to disable virtualization on all CPUs.
+- * Having VMX or SVM enabled on any CPU may break rebooting
+- * after the kdump kernel has finished its task.
+- */
+- cpu_emergency_vmxoff();
+- cpu_emergency_svm_disable();
+-
+ /*
+ * Disable Intel PT to stop its logging
+ */
+@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
+ */
+ cpu_crash_vmclear_loaded_vmcss();
+
+- /* Booting kdump kernel with VMX or SVM enabled won't work,
+- * because (among other limitations) we can't disable paging
+- * with the virt flags.
+- */
+- cpu_emergency_vmxoff();
+- cpu_emergency_svm_disable();
++ cpu_emergency_disable_virtualization();
+
+ /*
+ * Disable Intel PT to stop its logging
+@@ -401,10 +386,8 @@ int crash_load_segments(struct kimage *image)
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+- if (ret) {
+- vfree((void *)image->elf_headers);
++ if (ret)
+ return ret;
+- }
+ image->elf_load_addr = kbuf.mem;
+ pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.bufsz);
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
+index ea4fe192189d5..92b33c7eaf3f9 100644
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -195,7 +195,6 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ printk("%sCall Trace:\n", log_lvl);
+
+ unwind_start(&state, task, regs, stack);
+- stack = stack ? : get_stack_pointer(task, regs);
+ regs = unwind_get_entry_regs(&state, &partial);
+
+ /*
+@@ -214,9 +213,13 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ * - hardirq stack
+ * - entry stack
+ */
+- for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
++ for (stack = stack ?: get_stack_pointer(task, regs);
++ stack;
++ stack = stack_info.next_sp) {
+ const char *stack_name;
+
++ stack = PTR_ALIGN(stack, sizeof(long));
++
+ if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+ /*
+ * We weren't on a valid stack. It's possible that
+@@ -351,7 +354,7 @@ unsigned long oops_begin(void)
+ }
+ NOKPROBE_SYMBOL(oops_begin);
+
+-void __noreturn rewind_stack_do_exit(int signr);
++void __noreturn rewind_stack_and_make_dead(int signr);
+
+ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+ {
+@@ -386,7 +389,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+ * reuse the task stack and that existing poisons are invalid.
+ */
+ kasan_unpoison_task_stack(current);
+- rewind_stack_do_exit(signr);
++ rewind_stack_and_make_dead(signr);
+ }
+ NOKPROBE_SYMBOL(oops_end);
+
+diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
+index 5601b95944fae..6c5defd6569a3 100644
+--- a/arch/x86/kernel/dumpstack_64.c
++++ b/arch/x86/kernel/dumpstack_64.c
+@@ -32,9 +32,15 @@ const char *stack_type_name(enum stack_type type)
+ {
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
+
++ if (type == STACK_TYPE_TASK)
++ return "TASK";
++
+ if (type == STACK_TYPE_IRQ)
+ return "IRQ";
+
++ if (type == STACK_TYPE_SOFTIRQ)
++ return "SOFTIRQ";
++
+ if (type == STACK_TYPE_ENTRY) {
+ /*
+ * On 64-bit, we have a generic entry stack that we
+diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
+index bc0657f0deedf..f267205f2d5a4 100644
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt);
+ */
+ void __init e820__reserve_setup_data(void)
+ {
++ struct setup_indirect *indirect;
+ struct setup_data *data;
+- u64 pa_data;
++ u64 pa_data, pa_next;
++ u32 len;
+
+ pa_data = boot_params.hdr.setup_data;
+ if (!pa_data)
+@@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void)
+
+ while (pa_data) {
+ data = early_memremap(pa_data, sizeof(*data));
++ if (!data) {
++ pr_warn("e820: failed to memremap setup_data entry\n");
++ return;
++ }
++
++ len = sizeof(*data);
++ pa_next = data->next;
++
+ e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+
+ /*
+@@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void)
+ sizeof(*data) + data->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+
+- if (data->type == SETUP_INDIRECT &&
+- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+- e820__range_update(((struct setup_indirect *)data->data)->addr,
+- ((struct setup_indirect *)data->data)->len,
+- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+- e820__range_update_kexec(((struct setup_indirect *)data->data)->addr,
+- ((struct setup_indirect *)data->data)->len,
+- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
++ if (data->type == SETUP_INDIRECT) {
++ len += data->len;
++ early_memunmap(data, sizeof(*data));
++ data = early_memremap(pa_data, len);
++ if (!data) {
++ pr_warn("e820: failed to memremap indirect setup_data\n");
++ return;
++ }
++
++ indirect = (struct setup_indirect *)data->data;
++
++ if (indirect->type != SETUP_INDIRECT) {
++ e820__range_update(indirect->addr, indirect->len,
++ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
++ e820__range_update_kexec(indirect->addr, indirect->len,
++ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
++ }
+ }
+
+- pa_data = data->next;
+- early_memunmap(data, sizeof(*data));
++ pa_data = pa_next;
++ early_memunmap(data, len);
+ }
+
+ e820__update_table(e820_table);
+diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
+index 391a4e2b86049..8690fab95ae4b 100644
+--- a/arch/x86/kernel/early-quirks.c
++++ b/arch/x86/kernel/early-quirks.c
+@@ -515,6 +515,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = {
+ .stolen_size = gen9_stolen_size,
+ };
+
++/* Intel integrated GPUs for which we need to reserve "stolen memory" */
+ static const struct pci_device_id intel_early_ids[] __initconst = {
+ INTEL_I830_IDS(&i830_early_ops),
+ INTEL_I845G_IDS(&i845_early_ops),
+@@ -591,6 +592,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func)
+ u16 device;
+ int i;
+
++ /*
++ * Reserve "stolen memory" for an integrated GPU. If we've already
++ * found one, there's nothing to do for other (discrete) GPUs.
++ */
++ if (resource_size(&intel_graphics_stolen_res))
++ return;
++
+ device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
+
+ for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) {
+@@ -703,7 +711,7 @@ static struct chipset early_qrk[] __initdata = {
+ { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
+ PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
+ { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
+- QFLAG_APPLY_ONCE, intel_graphics_quirks },
++ 0, intel_graphics_quirks },
+ /*
+ * HPET on the current version of the Baytrail platform has accuracy
+ * problems: it will halt in deep idle state - so we disable it.
+diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
+index 7ada7bd03a327..3ad1bf5de7373 100644
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -25,17 +25,7 @@
+ */
+ union fpregs_state init_fpstate __ro_after_init;
+
+-/*
+- * Track whether the kernel is using the FPU state
+- * currently.
+- *
+- * This flag is used:
+- *
+- * - by IRQ context code to potentially use the FPU
+- * if it's unused.
+- *
+- * - to debug kernel_fpu_begin()/end() correctness
+- */
++/* Track in-kernel FPU usage */
+ static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+ /*
+@@ -43,42 +33,37 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
+ */
+ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+-static bool kernel_fpu_disabled(void)
+-{
+- return this_cpu_read(in_kernel_fpu);
+-}
+-
+-static bool interrupted_kernel_fpu_idle(void)
+-{
+- return !kernel_fpu_disabled();
+-}
+-
+-/*
+- * Were we in user mode (or vm86 mode) when we were
+- * interrupted?
+- *
+- * Doing kernel_fpu_begin/end() is ok if we are running
+- * in an interrupt context from user mode - we'll just
+- * save the FPU state as required.
+- */
+-static bool interrupted_user_mode(void)
+-{
+- struct pt_regs *regs = get_irq_regs();
+- return regs && user_mode(regs);
+-}
+-
+ /*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+- *
+- * It's always ok in process context (ie "not interrupt")
+- * but it is sometimes ok even from an irq.
+ */
+ bool irq_fpu_usable(void)
+ {
+- return !in_interrupt() ||
+- interrupted_user_mode() ||
+- interrupted_kernel_fpu_idle();
++ if (WARN_ON_ONCE(in_nmi()))
++ return false;
++
++ /* In kernel FPU usage already active? */
++ if (this_cpu_read(in_kernel_fpu))
++ return false;
++
++ /*
++ * When not in NMI or hard interrupt context, FPU can be used in:
++ *
++ * - Task context except from within fpregs_lock()'ed critical
++ * regions.
++ *
++ * - Soft interrupt processing context which cannot happen
++ * while in a fpregs_lock()'ed critical region.
++ */
++ if (!in_hardirq())
++ return true;
++
++ /*
++ * In hard interrupt context it's safe when soft interrupts
++ * are enabled, which means the interrupt did not hit in
++ * a fpregs_lock()'ed critical region.
++ */
++ return !softirq_count();
+ }
+ EXPORT_SYMBOL(irq_fpu_usable);
+
+@@ -345,7 +330,7 @@ static void fpu_reset_fpstate(void)
+ struct fpu *fpu = &current->thread.fpu;
+
+ fpregs_lock();
+- fpu__drop(fpu);
++ __fpu_invalidate_fpregs_state(fpu);
+ /*
+ * This does not change the actual hardware registers. It just
+ * resets the memory image and sets TIF_NEED_FPU_LOAD so a
+diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
+index 64e29927cc32f..ddf65f1927e12 100644
+--- a/arch/x86/kernel/fpu/init.c
++++ b/arch/x86/kernel/fpu/init.c
+@@ -49,7 +49,7 @@ void fpu__init_cpu(void)
+ fpu__init_cpu_xstate();
+ }
+
+-static bool fpu__probe_without_cpuid(void)
++static bool __init fpu__probe_without_cpuid(void)
+ {
+ unsigned long cr0;
+ u16 fsw, fcw;
+@@ -67,7 +67,7 @@ static bool fpu__probe_without_cpuid(void)
+ return fsw == 0 && (fcw & 0x103f) == 0x003f;
+ }
+
+-static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
++static void __init fpu__init_system_early_generic(void)
+ {
+ if (!boot_cpu_has(X86_FEATURE_CPUID) &&
+ !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
+@@ -138,9 +138,6 @@ static void __init fpu__init_system_generic(void)
+ unsigned int fpu_kernel_xstate_size __ro_after_init;
+ EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
+
+-/* Get alignment of the TYPE. */
+-#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
+-
+ /*
+ * Enforce that 'MEMBER' is the last field of 'TYPE'.
+ *
+@@ -148,8 +145,8 @@ EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
+ * because that's how C aligns structs.
+ */
+ #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
+- BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \
+- TYPE_ALIGN(TYPE)))
++ BUILD_BUG_ON(sizeof(TYPE) != \
++ ALIGN(offsetofend(TYPE, MEMBER), _Alignof(TYPE)))
+
+ /*
+ * We append the 'struct fpu' to the task_struct:
+@@ -229,9 +226,9 @@ static void __init fpu__init_system_ctx_switch(void)
+ * Called on the boot CPU once per system bootup, to set up the initial
+ * FPU state that is later cloned into all processes:
+ */
+-void __init fpu__init_system(struct cpuinfo_x86 *c)
++void __init fpu__init_system(void)
+ {
+- fpu__init_system_early_generic(c);
++ fpu__init_system_early_generic();
+
+ /*
+ * The FPU has to be operational for some of the
+diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
+index 66ed317ebc0d3..bd243ae57680e 100644
+--- a/arch/x86/kernel/fpu/regset.c
++++ b/arch/x86/kernel/fpu/regset.c
+@@ -87,11 +87,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
+ const void *kbuf, const void __user *ubuf)
+ {
+ struct fpu *fpu = &target->thread.fpu;
+- struct user32_fxsr_struct newstate;
++ struct fxregs_state newstate;
+ int ret;
+
+- BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state));
+-
+ if (!cpu_feature_enabled(X86_FEATURE_FXSR))
+ return -ENODEV;
+
+@@ -112,9 +110,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
+ /* Copy the state */
+ memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate));
+
+- /* Clear xmm8..15 */
++ /* Clear xmm8..15 for 32-bit callers */
+ BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16);
+- memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16);
++ if (in_ia32_syscall())
++ memset(&fpu->state.fxsave.xmm_space[8*4], 0, 8 * 16);
+
+ /* Mark FP and SSE as in use when XSAVE is enabled */
+ if (use_xsave())
+@@ -164,7 +163,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
+ }
+
+ fpu_force_restore(fpu);
+- ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf);
++ ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf, &target->thread.pkru);
+
+ out:
+ vfree(tmpbuf);
+diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
+index 831b25c5e7058..7f76cb099e66a 100644
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -205,7 +205,7 @@ retry:
+ fpregs_unlock();
+
+ if (ret) {
+- if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
++ if (!fault_in_writeable(buf_fx, fpu_user_xstate_size))
+ goto retry;
+ return -EFAULT;
+ }
+@@ -278,10 +278,9 @@ retry:
+ if (ret != -EFAULT)
+ return -EINVAL;
+
+- ret = fault_in_pages_readable(buf, size);
+- if (!ret)
++ if (!fault_in_readable(buf, size))
+ goto retry;
+- return ret;
++ return -EFAULT;
+ }
+
+ /*
+@@ -371,7 +370,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
+ fpregs_unlock();
+
+ if (use_xsave() && !fx_only) {
+- ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx);
++ ret = copy_sigframe_from_user_to_xstate(tsk, buf_fx);
+ if (ret)
+ return ret;
+ } else {
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index c8def1b7f8fba..81891f0fff6f6 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -809,6 +809,13 @@ void __init fpu__init_system_xstate(void)
+ goto out_disable;
+ }
+
++ /*
++ * CPU capabilities initialization runs before FPU init. So
++ * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
++ * functional, set the feature bit so depending code works.
++ */
++ setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
++
+ print_xstate_offset_size();
+ pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
+ xfeatures_mask_all,
+@@ -1091,8 +1098,31 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
+ }
+
+
++/**
++ * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
++ * @fpstate: The fpstate buffer to copy to
++ * @kbuf: The UABI format buffer, if it comes from the kernel
++ * @ubuf: The UABI format buffer, if it comes from userspace
++ * @pkru: The location to write the PKRU value to
++ *
++ * Converts from the UABI format into the kernel internal hardware
++ * dependent format.
++ *
++ * This function ultimately has two different callers with distinct PKRU
++ * behavior.
++ * 1. When called from sigreturn the PKRU register will be restored from
++ * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
++ * @fpstate is sufficient to cover this case, but the caller will also
++ * pass a pointer to the thread_struct's pkru field in @pkru and updating
++ * it is harmless.
++ * 2. When called from ptrace the PKRU register will be restored from the
++ * thread_struct's pkru field. A pointer to that is passed in @pkru.
++ * The kernel will restore it manually, so the XRSTOR behavior that resets
++ * the PKRU register to the hardware init value (0) if the corresponding
++ * xfeatures bit is not set is emulated here.
++ */
+ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf,
+- const void __user *ubuf)
++ const void __user *ubuf, u32 *pkru)
+ {
+ unsigned int offset, size;
+ struct xstate_header hdr;
+@@ -1140,6 +1170,14 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf,
+ }
+ }
+
++ if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
++ struct pkru_state *xpkru;
++
++ xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
++ *pkru = xpkru->pkru;
++ } else
++ *pkru = 0;
++
+ /*
+ * The state that came in from userspace was user-state only.
+ * Mask all the user states out of 'xfeatures':
+@@ -1159,9 +1197,9 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf,
+ * format and copy to the target thread. This is called from
+ * xstateregs_set().
+ */
+-int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
++int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, u32 *pkru)
+ {
+- return copy_uabi_to_xstate(xsave, kbuf, NULL);
++ return copy_uabi_to_xstate(xsave, kbuf, NULL, pkru);
+ }
+
+ /*
+@@ -1169,10 +1207,10 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
+ * XSAVE[S] format and copy to the target thread. This is called from the
+ * sigreturn() and rt_sigreturn() system calls.
+ */
+-int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave,
++int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
+ const void __user *ubuf)
+ {
+- return copy_uabi_to_xstate(xsave, NULL, ubuf);
++ return copy_uabi_to_xstate(&tsk->thread.fpu.state.xsave, NULL, ubuf, &tsk->thread.pkru);
+ }
+
+ static bool validate_xsaves_xrstors(u64 mask)
+diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
+index 1b3ce3b4a2a2f..4017da3a4c701 100644
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -93,6 +93,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code)
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
++ ftrace_expected = old_code;
+ WARN_ON(1);
+ return -EINVAL;
+ }
+@@ -218,7 +219,9 @@ void ftrace_replace_code(int enable)
+
+ ret = ftrace_verify_code(rec->ip, old);
+ if (ret) {
++ ftrace_expected = old;
+ ftrace_bug(ret, rec);
++ ftrace_expected = NULL;
+ return;
+ }
+ }
+@@ -308,7 +311,7 @@ union ftrace_op_code_union {
+ } __attribute__((packed));
+ };
+
+-#define RET_SIZE 1
++#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
+
+ static unsigned long
+ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+@@ -321,12 +324,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+ unsigned long offset;
+ unsigned long npages;
+ unsigned long size;
+- unsigned long retq;
+ unsigned long *ptr;
+ void *trampoline;
+ void *ip;
+ /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
+ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
++ unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
+ union ftrace_op_code_union op_ptr;
+ int ret;
+
+@@ -366,10 +369,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
+ ip = trampoline + size;
+
+ /* The trampoline ends with ret(q) */
+- retq = (unsigned long)ftrace_stub;
+- ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
+- if (WARN_ON(ret < 0))
+- goto fail;
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++ memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE);
++ else
++ memcpy(ip, retq, sizeof(retq));
+
+ /* No need to test direct calls on created trampolines */
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
+index e405fe1a8bf41..a0ed0e4a2c0cd 100644
+--- a/arch/x86/kernel/ftrace_32.S
++++ b/arch/x86/kernel/ftrace_32.S
+@@ -19,7 +19,7 @@
+ #endif
+
+ SYM_FUNC_START(__fentry__)
+- ret
++ RET
+ SYM_FUNC_END(__fentry__)
+ EXPORT_SYMBOL(__fentry__)
+
+@@ -84,7 +84,7 @@ ftrace_graph_call:
+
+ /* This is weak to keep gas from relaxing the jumps */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+- ret
++ RET
+ SYM_CODE_END(ftrace_caller)
+
+ SYM_CODE_START(ftrace_regs_caller)
+@@ -177,7 +177,7 @@ SYM_CODE_START(ftrace_graph_caller)
+ popl %edx
+ popl %ecx
+ popl %eax
+- ret
++ RET
+ SYM_CODE_END(ftrace_graph_caller)
+
+ .globl return_to_handler
+diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
+index 7c273846c6877..6cc14a835991d 100644
+--- a/arch/x86/kernel/ftrace_64.S
++++ b/arch/x86/kernel/ftrace_64.S
+@@ -132,7 +132,7 @@
+ #ifdef CONFIG_DYNAMIC_FTRACE
+
+ SYM_FUNC_START(__fentry__)
+- retq
++ RET
+ SYM_FUNC_END(__fentry__)
+ EXPORT_SYMBOL(__fentry__)
+
+@@ -181,11 +181,10 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
+
+ /*
+ * This is weak to keep gas from relaxing the jumps.
+- * It is also used to copy the retq for trampolines.
+ */
+ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+ UNWIND_HINT_FUNC
+- retq
++ RET
+ SYM_FUNC_END(ftrace_epilogue)
+
+ SYM_FUNC_START(ftrace_regs_caller)
+@@ -299,7 +298,7 @@ fgraph_trace:
+ #endif
+
+ SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
+- retq
++ RET
+
+ trace:
+ /* save_mcount_regs fills in first two parameters */
+@@ -331,11 +330,11 @@ SYM_FUNC_START(ftrace_graph_caller)
+
+ restore_mcount_regs
+
+- retq
++ RET
+ SYM_FUNC_END(ftrace_graph_caller)
+
+ SYM_FUNC_START(return_to_handler)
+- subq $24, %rsp
++ subq $16, %rsp
+
+ /* Save the return values */
+ movq %rax, (%rsp)
+@@ -347,7 +346,19 @@ SYM_FUNC_START(return_to_handler)
+ movq %rax, %rdi
+ movq 8(%rsp), %rdx
+ movq (%rsp), %rax
+- addq $24, %rsp
+- JMP_NOSPEC rdi
++
++ addq $16, %rsp
++ /*
++ * Jump back to the old return address. This cannot be JMP_NOSPEC rdi
++ * since IBT would demand that contain ENDBR, which simply isn't so for
++ * return addresses. Use a retpoline here to keep the RSB balanced.
++ */
++ ANNOTATE_INTRA_FUNCTION_CALL
++ call .Ldo_rop
++ int3
++.Ldo_rop:
++ mov %rdi, (%rsp)
++ UNWIND_HINT_FUNC
++ RET
+ SYM_FUNC_END(return_to_handler)
+ #endif
+diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
+index de01903c37355..5036104d54707 100644
+--- a/arch/x86/kernel/head64.c
++++ b/arch/x86/kernel/head64.c
+@@ -418,6 +418,8 @@ static void __init clear_bss(void)
+ {
+ memset(__bss_start, 0,
+ (unsigned long) __bss_stop - (unsigned long) __bss_start);
++ memset(__brk_base, 0,
++ (unsigned long) __brk_limit - (unsigned long) __brk_base);
+ }
+
+ static unsigned long get_cmd_line_ptr(void)
+diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
+index d8c64dab0efe0..9b7acc9c7874c 100644
+--- a/arch/x86/kernel/head_32.S
++++ b/arch/x86/kernel/head_32.S
+@@ -23,6 +23,7 @@
+ #include <asm/cpufeatures.h>
+ #include <asm/percpu.h>
+ #include <asm/nops.h>
++#include <asm/nospec-branch.h>
+ #include <asm/bootparam.h>
+ #include <asm/export.h>
+ #include <asm/pgtable_32.h>
+@@ -340,7 +341,7 @@ SYM_FUNC_END(startup_32_smp)
+ __INIT
+ setup_once:
+ andl $0,setup_once_ref /* Once is enough, thanks */
+- ret
++ RET
+
+ SYM_FUNC_START(early_idt_handler_array)
+ # 36(%esp) %eflags
+diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
+index d8b3ebd2bb85f..81f1ae278718e 100644
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -312,6 +312,8 @@ SYM_CODE_END(start_cpu0)
+ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
+ UNWIND_HINT_IRET_REGS offset=8
+
++ ANNOTATE_UNRET_END
++
+ /* Build pt_regs */
+ PUSH_AND_CLEAR_REGS
+
+@@ -369,6 +371,7 @@ SYM_CODE_START(early_idt_handler_array)
+ SYM_CODE_END(early_idt_handler_array)
+
+ SYM_CODE_START_LOCAL(early_idt_handler_common)
++ ANNOTATE_UNRET_END
+ /*
+ * The stack is the hardware frame, an error code or zero, and the
+ * vector number.
+@@ -415,6 +418,8 @@ SYM_CODE_END(early_idt_handler_common)
+ SYM_CODE_START_NOALIGN(vc_no_ghcb)
+ UNWIND_HINT_IRET_REGS offset=8
+
++ ANNOTATE_UNRET_END
++
+ /* Build pt_regs */
+ PUSH_AND_CLEAR_REGS
+
+diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
+index 882213df37130..71f336425e58a 100644
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -1435,8 +1435,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
+ hpet_rtc_timer_reinit();
+ memset(&curr_time, 0, sizeof(struct rtc_time));
+
+- if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
+- mc146818_get_time(&curr_time);
++ if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
++ if (unlikely(mc146818_get_time(&curr_time) < 0)) {
++ pr_err_ratelimited("unable to read current time from RTC\n");
++ return IRQ_HANDLED;
++ }
++ }
+
+ if (hpet_rtc_flags & RTC_UIE &&
+ curr_time.tm_sec != hpet_prev_update_sec) {
+diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
+index 15aefa3f3e18e..f91e5e31aa4f0 100644
+--- a/arch/x86/kernel/i8259.c
++++ b/arch/x86/kernel/i8259.c
+@@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq)
+ disable_irq_nosync(irq);
+ io_apic_irqs &= ~(1<<irq);
+ irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
++ irq_set_status_flags(irq, IRQ_LEVEL);
+ enable_irq(irq);
+ lapic_assign_legacy_vector(irq, true);
+ }
+diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
+index e28f6a5d14f1b..766ffe3ba3137 100644
+--- a/arch/x86/kernel/irq.c
++++ b/arch/x86/kernel/irq.c
+@@ -291,8 +291,10 @@ void kvm_set_posted_intr_wakeup_handler(void (*handler)(void))
+ {
+ if (handler)
+ kvm_posted_intr_wakeup_handler = handler;
+- else
++ else {
+ kvm_posted_intr_wakeup_handler = dummy_handler;
++ synchronize_rcu();
++ }
+ }
+ EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler);
+
+diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
+index 8ef35063964b1..b8db1022aa6ca 100644
+--- a/arch/x86/kernel/irqflags.S
++++ b/arch/x86/kernel/irqflags.S
+@@ -10,6 +10,6 @@
+ SYM_FUNC_START(native_save_fl)
+ pushf
+ pop %_ASM_AX
+- ret
++ RET
+ SYM_FUNC_END(native_save_fl)
+ EXPORT_SYMBOL(native_save_fl)
+diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
+index beb1bada1b0ab..c683666876f1c 100644
+--- a/arch/x86/kernel/irqinit.c
++++ b/arch/x86/kernel/irqinit.c
+@@ -65,8 +65,10 @@ void __init init_ISA_irqs(void)
+
+ legacy_pic->init(0);
+
+- for (i = 0; i < nr_legacy_irqs(); i++)
++ for (i = 0; i < nr_legacy_irqs(); i++) {
+ irq_set_chip_and_handler(i, chip, handle_level_irq);
++ irq_set_status_flags(i, IRQ_LEVEL);
++ }
+ }
+
+ void __init init_IRQ(void)
+diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
+index 64b6da95af984..e2e89bebcbc32 100644
+--- a/arch/x86/kernel/kdebugfs.c
++++ b/arch/x86/kernel/kdebugfs.c
+@@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no,
+
+ static int __init create_setup_data_nodes(struct dentry *parent)
+ {
++ struct setup_indirect *indirect;
+ struct setup_data_node *node;
+ struct setup_data *data;
+- int error;
++ u64 pa_data, pa_next;
+ struct dentry *d;
+- u64 pa_data;
++ int error;
++ u32 len;
+ int no = 0;
+
+ d = debugfs_create_dir("setup_data", parent);
+@@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent)
+ error = -ENOMEM;
+ goto err_dir;
+ }
+-
+- if (data->type == SETUP_INDIRECT &&
+- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+- node->paddr = ((struct setup_indirect *)data->data)->addr;
+- node->type = ((struct setup_indirect *)data->data)->type;
+- node->len = ((struct setup_indirect *)data->data)->len;
++ pa_next = data->next;
++
++ if (data->type == SETUP_INDIRECT) {
++ len = sizeof(*data) + data->len;
++ memunmap(data);
++ data = memremap(pa_data, len, MEMREMAP_WB);
++ if (!data) {
++ kfree(node);
++ error = -ENOMEM;
++ goto err_dir;
++ }
++
++ indirect = (struct setup_indirect *)data->data;
++
++ if (indirect->type != SETUP_INDIRECT) {
++ node->paddr = indirect->addr;
++ node->type = indirect->type;
++ node->len = indirect->len;
++ } else {
++ node->paddr = pa_data;
++ node->type = data->type;
++ node->len = data->len;
++ }
+ } else {
+ node->paddr = pa_data;
+ node->type = data->type;
+@@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
+ }
+
+ create_setup_data_node(d, no, node);
+- pa_data = data->next;
++ pa_data = pa_next;
+
+ memunmap(data);
+ no++;
+diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
+index b6e046e4b2895..c4b618d0b16a0 100644
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -37,6 +37,7 @@
+ #include <linux/extable.h>
+ #include <linux/kdebug.h>
+ #include <linux/kallsyms.h>
++#include <linux/kgdb.h>
+ #include <linux/ftrace.h>
+ #include <linux/kasan.h>
+ #include <linux/moduleloader.h>
+@@ -289,12 +290,15 @@ static int can_probe(unsigned long paddr)
+ if (ret < 0)
+ return 0;
+
++#ifdef CONFIG_KGDB
+ /*
+- * Another debugging subsystem might insert this breakpoint.
+- * In that case, we can't recover it.
++ * If there is a dynamically installed kgdb sw breakpoint,
++ * this function should not be probed.
+ */
+- if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
++ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
++ kgdb_has_hit_break(addr))
+ return 0;
++#endif
+ addr += insn.length;
+ }
+
+@@ -495,7 +499,7 @@ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
+ match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+ if (p->ainsn.jcc.type >= 0xe)
+- match = match && (regs->flags & X86_EFLAGS_ZF);
++ match = match || (regs->flags & X86_EFLAGS_ZF);
+ }
+ __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
+ }
+@@ -816,16 +820,20 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+ static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+ {
+- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+- kcb->kprobe_status = KPROBE_HIT_SSDONE;
+- cur->post_handler(cur, regs, 0);
+- }
+-
+ /* Restore back the original saved kprobes variables and continue. */
+- if (kcb->kprobe_status == KPROBE_REENTER)
++ if (kcb->kprobe_status == KPROBE_REENTER) {
++ /* This will restore both kcb and current_kprobe */
+ restore_previous_kprobe(kcb);
+- else
++ } else {
++ /*
++ * Always update the kcb status because
++ * reset_curent_kprobe() doesn't update kcb.
++ */
++ kcb->kprobe_status = KPROBE_HIT_SSDONE;
++ if (cur->post_handler)
++ cur->post_handler(cur, regs, 0);
+ reset_current_kprobe();
++ }
+ }
+ NOKPROBE_SYMBOL(kprobe_post_process);
+
+@@ -1044,7 +1052,7 @@ asm(
+ RESTORE_REGS_STRING
+ " popfl\n"
+ #endif
+- " ret\n"
++ ASM_RET
+ ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
+ );
+ NOKPROBE_SYMBOL(kretprobe_trampoline);
+diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
+index 71425ebba98a1..98d0e2012e1f3 100644
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -15,6 +15,7 @@
+ #include <linux/extable.h>
+ #include <linux/kdebug.h>
+ #include <linux/kallsyms.h>
++#include <linux/kgdb.h>
+ #include <linux/ftrace.h>
+ #include <linux/objtool.h>
+ #include <linux/pgtable.h>
+@@ -45,8 +46,8 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+ /* This function only handles jump-optimized kprobe */
+ if (kp && kprobe_optimized(kp)) {
+ op = container_of(kp, struct optimized_kprobe, kp);
+- /* If op->list is not empty, op is under optimizing */
+- if (list_empty(&op->list))
++ /* If op is optimized or under unoptimizing */
++ if (list_empty(&op->list) || optprobe_queued_unopt(op))
+ goto found;
+ }
+ }
+@@ -272,19 +273,6 @@ static int insn_is_indirect_jump(struct insn *insn)
+ return ret;
+ }
+
+-static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
+-{
+- unsigned char ops;
+-
+- for (; addr < eaddr; addr++) {
+- if (get_kernel_nofault(ops, (void *)addr) < 0 ||
+- ops != INT3_INSN_OPCODE)
+- return false;
+- }
+-
+- return true;
+-}
+-
+ /* Decode whole function to ensure any instructions don't jump into target */
+ static int can_optimize(unsigned long paddr)
+ {
+@@ -327,15 +315,15 @@ static int can_optimize(unsigned long paddr)
+ ret = insn_decode_kernel(&insn, (void *)recovered_insn);
+ if (ret < 0)
+ return 0;
+-
++#ifdef CONFIG_KGDB
+ /*
+- * In the case of detecting unknown breakpoint, this could be
+- * a padding INT3 between functions. Let's check that all the
+- * rest of the bytes are also INT3.
++ * If there is a dynamically installed kgdb sw breakpoint,
++ * this function should not be probed.
+ */
+- if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
+- return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
+-
++ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
++ kgdb_has_hit_break(addr))
++ return 0;
++#endif
+ /* Recover address */
+ insn.kaddr = (void *)addr;
+ insn.next_byte = (void *)(addr + insn.length);
+@@ -358,7 +346,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
+
+ for (i = 1; i < op->optinsn.size; i++) {
+ p = get_kprobe(op->kp.addr + i);
+- if (p && !kprobe_disabled(p))
++ if (p && !kprobe_disarmed(p))
+ return -EEXIST;
+ }
+
+diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
+index d0a19121c6a4f..257892fcefa79 100644
+--- a/arch/x86/kernel/ksysfs.c
++++ b/arch/x86/kernel/ksysfs.c
+@@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr)
+
+ static int __init get_setup_data_size(int nr, size_t *size)
+ {
+- int i = 0;
++ u64 pa_data = boot_params.hdr.setup_data, pa_next;
++ struct setup_indirect *indirect;
+ struct setup_data *data;
+- u64 pa_data = boot_params.hdr.setup_data;
++ int i = 0;
++ u32 len;
+
+ while (pa_data) {
+ data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
+ if (!data)
+ return -ENOMEM;
++ pa_next = data->next;
++
+ if (nr == i) {
+- if (data->type == SETUP_INDIRECT &&
+- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
+- *size = ((struct setup_indirect *)data->data)->len;
+- else
++ if (data->type == SETUP_INDIRECT) {
++ len = sizeof(*data) + data->len;
++ memunmap(data);
++ data = memremap(pa_data, len, MEMREMAP_WB);
++ if (!data)
++ return -ENOMEM;
++
++ indirect = (struct setup_indirect *)data->data;
++
++ if (indirect->type != SETUP_INDIRECT)
++ *size = indirect->len;
++ else
++ *size = data->len;
++ } else {
+ *size = data->len;
++ }
+
+ memunmap(data);
+ return 0;
+ }
+
+- pa_data = data->next;
++ pa_data = pa_next;
+ memunmap(data);
+ i++;
+ }
+@@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size)
+ static ssize_t type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+ {
++ struct setup_indirect *indirect;
++ struct setup_data *data;
+ int nr, ret;
+ u64 paddr;
+- struct setup_data *data;
++ u32 len;
+
+ ret = kobj_to_setup_data_nr(kobj, &nr);
+ if (ret)
+@@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj,
+ if (!data)
+ return -ENOMEM;
+
+- if (data->type == SETUP_INDIRECT)
+- ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type);
+- else
++ if (data->type == SETUP_INDIRECT) {
++ len = sizeof(*data) + data->len;
++ memunmap(data);
++ data = memremap(paddr, len, MEMREMAP_WB);
++ if (!data)
++ return -ENOMEM;
++
++ indirect = (struct setup_indirect *)data->data;
++
++ ret = sprintf(buf, "0x%x\n", indirect->type);
++ } else {
+ ret = sprintf(buf, "0x%x\n", data->type);
++ }
++
+ memunmap(data);
+ return ret;
+ }
+@@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp,
+ char *buf,
+ loff_t off, size_t count)
+ {
++ struct setup_indirect *indirect;
++ struct setup_data *data;
+ int nr, ret = 0;
+ u64 paddr, len;
+- struct setup_data *data;
+ void *p;
+
+ ret = kobj_to_setup_data_nr(kobj, &nr);
+@@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp,
+ if (!data)
+ return -ENOMEM;
+
+- if (data->type == SETUP_INDIRECT &&
+- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+- paddr = ((struct setup_indirect *)data->data)->addr;
+- len = ((struct setup_indirect *)data->data)->len;
++ if (data->type == SETUP_INDIRECT) {
++ len = sizeof(*data) + data->len;
++ memunmap(data);
++ data = memremap(paddr, len, MEMREMAP_WB);
++ if (!data)
++ return -ENOMEM;
++
++ indirect = (struct setup_indirect *)data->data;
++
++ if (indirect->type != SETUP_INDIRECT) {
++ paddr = indirect->addr;
++ len = indirect->len;
++ } else {
++ /*
++ * Even though this is technically undefined, return
++ * the data as though it is a normal setup_data struct.
++ * This will at least allow it to be inspected.
++ */
++ paddr += sizeof(*data);
++ len = data->len;
++ }
+ } else {
+ paddr += sizeof(*data);
+ len = data->len;
+diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
+index b656456c3a944..eba6485a59a39 100644
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -66,6 +66,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align
+ DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
+ static int has_steal_clock = 0;
+
++static int has_guest_poll = 0;
+ /*
+ * No need for any "IO delay" on KVM
+ */
+@@ -187,7 +188,7 @@ void kvm_async_pf_task_wake(u32 token)
+ {
+ u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
+ struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
+- struct kvm_task_sleep_node *n;
++ struct kvm_task_sleep_node *n, *dummy = NULL;
+
+ if (token == ~0) {
+ apf_task_wake_all();
+@@ -199,28 +200,41 @@ again:
+ n = _find_apf_task(b, token);
+ if (!n) {
+ /*
+- * async PF was not yet handled.
+- * Add dummy entry for the token.
++ * Async #PF not yet handled, add a dummy entry for the token.
++ * Allocating the token must be down outside of the raw lock
++ * as the allocator is preemptible on PREEMPT_RT kernels.
+ */
+- n = kzalloc(sizeof(*n), GFP_ATOMIC);
+- if (!n) {
++ if (!dummy) {
++ raw_spin_unlock(&b->lock);
++ dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC);
++
+ /*
+- * Allocation failed! Busy wait while other cpu
+- * handles async PF.
++ * Continue looping on allocation failure, eventually
++ * the async #PF will be handled and allocating a new
++ * node will be unnecessary.
++ */
++ if (!dummy)
++ cpu_relax();
++
++ /*
++ * Recheck for async #PF completion before enqueueing
++ * the dummy token to avoid duplicate list entries.
+ */
+- raw_spin_unlock(&b->lock);
+- cpu_relax();
+ goto again;
+ }
+- n->token = token;
+- n->cpu = smp_processor_id();
+- init_swait_queue_head(&n->wq);
+- hlist_add_head(&n->link, &b->list);
++ dummy->token = token;
++ dummy->cpu = smp_processor_id();
++ init_swait_queue_head(&dummy->wq);
++ hlist_add_head(&dummy->link, &b->list);
++ dummy = NULL;
+ } else {
+ apf_task_wake_one(n);
+ }
+ raw_spin_unlock(&b->lock);
+- return;
++
++ /* A dummy token might be allocated and ultimately not used. */
++ if (dummy)
++ kfree(dummy);
+ }
+ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
+
+@@ -457,19 +471,22 @@ static bool pv_tlb_flush_supported(void)
+ {
+ return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
++ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
++ (num_possible_cpus() != 1));
+ }
+
+ static bool pv_ipi_supported(void)
+ {
+- return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
++ return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
++ (num_possible_cpus() != 1));
+ }
+
+ static bool pv_sched_yield_supported(void)
+ {
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
++ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
++ (num_possible_cpus() != 1));
+ }
+
+ #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
+@@ -507,7 +524,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
+ } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
+ ipi_bitmap <<= min - apic_id;
+ min = apic_id;
+- } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
++ } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) {
+ max = apic_id < max ? max : apic_id;
+ } else {
+ ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+@@ -647,14 +664,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
+
+ static int kvm_suspend(void)
+ {
++ u64 val = 0;
++
+ kvm_guest_cpu_offline(false);
+
++#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
++ if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
++ rdmsrl(MSR_KVM_POLL_CONTROL, val);
++ has_guest_poll = !(val & 1);
++#endif
+ return 0;
+ }
+
+ static void kvm_resume(void)
+ {
+ kvm_cpu_online(raw_smp_processor_id());
++
++#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
++ if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
++ wrmsrl(MSR_KVM_POLL_CONTROL, 0);
++#endif
+ }
+
+ static struct syscore_ops kvm_syscore_ops = {
+@@ -919,7 +948,7 @@ asm(
+ "movq __per_cpu_offset(,%rdi,8), %rax;"
+ "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+ "setne %al;"
+-"ret;"
++ASM_RET
+ ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
+ ".popsection");
+
+diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
+index 131f30fdcfbdc..dc8b17568784f 100644
+--- a/arch/x86/kernel/machine_kexec_64.c
++++ b/arch/x86/kernel/machine_kexec_64.c
+@@ -373,9 +373,6 @@ void machine_kexec(struct kimage *image)
+ #ifdef CONFIG_KEXEC_FILE
+ void *arch_kexec_kernel_image_load(struct kimage *image)
+ {
+- vfree(image->elf_headers);
+- image->elf_headers = NULL;
+-
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+@@ -511,6 +508,15 @@ overflow:
+ (int)ELF64_R_TYPE(rel[i].r_info), value);
+ return -ENOEXEC;
+ }
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++ vfree(image->elf_headers);
++ image->elf_headers = NULL;
++ image->elf_headers_sz = 0;
++
++ return kexec_image_post_load_cleanup_default(image);
++}
+ #endif /* CONFIG_KEXEC_FILE */
+
+ static int
+diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
+index 5e9a34b5bd741..06b53ea940bf6 100644
+--- a/arch/x86/kernel/module.c
++++ b/arch/x86/kernel/module.c
+@@ -67,6 +67,7 @@ static unsigned long int get_module_load_offset(void)
+
+ void *module_alloc(unsigned long size)
+ {
++ gfp_t gfp_mask = GFP_KERNEL;
+ void *p;
+
+ if (PAGE_ALIGN(size) > MODULES_LEN)
+@@ -74,10 +75,10 @@ void *module_alloc(unsigned long size)
+
+ p = __vmalloc_node_range(size, MODULE_ALIGN,
+ MODULES_VADDR + get_module_load_offset(),
+- MODULES_END, GFP_KERNEL,
+- PAGE_KERNEL, 0, NUMA_NO_NODE,
++ MODULES_END, gfp_mask,
++ PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
+ __builtin_return_address(0));
+- if (p && (kasan_module_alloc(p, size) < 0)) {
++ if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
+ vfree(p);
+ return NULL;
+ }
+@@ -251,7 +252,8 @@ int module_finalize(const Elf_Ehdr *hdr,
+ struct module *me)
+ {
+ const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+- *para = NULL, *orc = NULL, *orc_ip = NULL;
++ *para = NULL, *orc = NULL, *orc_ip = NULL,
++ *retpolines = NULL, *returns = NULL;
+ char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+@@ -267,8 +269,28 @@ int module_finalize(const Elf_Ehdr *hdr,
+ orc = s;
+ if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
+ orc_ip = s;
++ if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
++ retpolines = s;
++ if (!strcmp(".return_sites", secstrings + s->sh_name))
++ returns = s;
+ }
+
++ /*
++ * See alternative_instructions() for the ordering rules between the
++ * various patching types.
++ */
++ if (para) {
++ void *pseg = (void *)para->sh_addr;
++ apply_paravirt(pseg, pseg + para->sh_size);
++ }
++ if (retpolines) {
++ void *rseg = (void *)retpolines->sh_addr;
++ apply_retpolines(rseg, rseg + retpolines->sh_size);
++ }
++ if (returns) {
++ void *rseg = (void *)returns->sh_addr;
++ apply_returns(rseg, rseg + returns->sh_size);
++ }
+ if (alt) {
+ /* patch .altinstructions */
+ void *aseg = (void *)alt->sh_addr;
+@@ -282,11 +304,6 @@ int module_finalize(const Elf_Ehdr *hdr,
+ tseg, tseg + text->sh_size);
+ }
+
+- if (para) {
+- void *pseg = (void *)para->sh_addr;
+- apply_paravirt(pseg, pseg + para->sh_size);
+- }
+-
+ /* make jump label nops */
+ jump_label_apply_nops(me);
+
+diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
+index 04cafc057bed4..f1cdb8891ad41 100644
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -41,7 +41,7 @@ extern void _paravirt_nop(void);
+ asm (".pushsection .entry.text, \"ax\"\n"
+ ".global _paravirt_nop\n"
+ "_paravirt_nop:\n\t"
+- "ret\n\t"
++ ASM_RET
+ ".size _paravirt_nop, . - _paravirt_nop\n\t"
+ ".type _paravirt_nop, @function\n\t"
+ ".popsection");
+diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
+index 6b07faaa15798..23154d24b1173 100644
+--- a/arch/x86/kernel/pmem.c
++++ b/arch/x86/kernel/pmem.c
+@@ -27,6 +27,11 @@ static __init int register_e820_pmem(void)
+ * simply here to trigger the module to load on demand.
+ */
+ pdev = platform_device_alloc("e820_pmem", -1);
+- return platform_device_add(pdev);
++
++ rc = platform_device_add(pdev);
++ if (rc)
++ platform_device_put(pdev);
++
++ return rc;
+ }
+ device_initcall(register_e820_pmem);
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index 1d9463e3096b6..e6b28c689e9a9 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -132,6 +132,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
+ frame->ret_addr = (unsigned long) ret_from_fork;
+ p->thread.sp = (unsigned long) fork_frame;
+ p->thread.io_bitmap = NULL;
++ p->thread.iopl_warn = 0;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+ #ifdef CONFIG_X86_64
+@@ -583,7 +584,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
+ }
+
+ if (updmsr)
+- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++ update_spec_ctrl_cond(msr);
+ }
+
+ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
+@@ -730,7 +731,7 @@ bool xen_set_default_idle(void)
+ }
+ #endif
+
+-void stop_this_cpu(void *dummy)
++void __noreturn stop_this_cpu(void *dummy)
+ {
+ local_irq_disable();
+ /*
+@@ -804,6 +805,10 @@ static void amd_e400_idle(void)
+ */
+ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+ {
++ /* User has disallowed the use of MWAIT. Fallback to HALT */
++ if (boot_option_idle_override == IDLE_NOMWAIT)
++ return 0;
++
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+@@ -912,9 +917,8 @@ static int __init idle_setup(char *str)
+ } else if (!strcmp(str, "nomwait")) {
+ /*
+ * If the boot option of "idle=nomwait" is added,
+- * it means that mwait will be disabled for CPU C2/C3
+- * states. In such case it won't touch the variable
+- * of boot_option_idle_override.
++ * it means that mwait will be disabled for CPU C1/C2/C3
++ * states.
+ */
+ boot_option_idle_override = IDLE_NOMWAIT;
+ } else
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 4f2f54e1281c3..d4a130337e931 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -216,7 +216,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ switch_fpu_finish(next_fpu);
+
+ /* Load the Intel cache allocation PQR MSR. */
+- resctrl_sched_in();
++ resctrl_sched_in(next_p);
+
+ return prev_p;
+ }
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index ec0d836a13b12..b8fe38cd121df 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -656,7 +656,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ }
+
+ /* Load the Intel cache allocation PQR MSR. */
+- resctrl_sched_in();
++ resctrl_sched_in(next_p);
+
+ return prev_p;
+ }
+diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
+index 4c208ea3bd9f3..033d9c6a94689 100644
+--- a/arch/x86/kernel/ptrace.c
++++ b/arch/x86/kernel/ptrace.c
+@@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
+ },
+ [REGSET_FP] = {
+ .core_note_type = NT_PRFPREG,
+- .n = sizeof(struct user_i387_struct) / sizeof(long),
++ .n = sizeof(struct fxregs_state) / sizeof(long),
+ .size = sizeof(long), .align = sizeof(long),
+ .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
+ },
+@@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = {
+ },
+ [REGSET_XFP] = {
+ .core_note_type = NT_PRXFPREG,
+- .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
++ .n = sizeof(struct fxregs_state) / sizeof(u32),
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
+ },
+diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
+index 0a40df66a40de..deedd77c7593f 100644
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -113,17 +113,9 @@ void __noreturn machine_real_restart(unsigned int type)
+ spin_unlock(&rtc_lock);
+
+ /*
+- * Switch back to the initial page table.
++ * Switch to the trampoline page table.
+ */
+-#ifdef CONFIG_X86_32
+- load_cr3(initial_page_table);
+-#else
+- write_cr3(real_mode_header->trampoline_pgd);
+-
+- /* Exiting long mode will fail if CR4.PCIDE is set. */
+- if (boot_cpu_has(X86_FEATURE_PCID))
+- cr4_clear_bits(X86_CR4_PCIDE);
+-#endif
++ load_trampoline_pgtable();
+
+ /* Jump to the identity-mapped low memory code */
+ #ifdef CONFIG_X86_32
+@@ -536,33 +528,29 @@ static inline void kb_wait(void)
+ }
+ }
+
+-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
+-{
+- cpu_emergency_vmxoff();
+-}
++static inline void nmi_shootdown_cpus_on_restart(void);
+
+-/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
+-static void emergency_vmx_disable_all(void)
++static void emergency_reboot_disable_virtualization(void)
+ {
+ /* Just make sure we won't change CPUs while doing this */
+ local_irq_disable();
+
+ /*
+- * Disable VMX on all CPUs before rebooting, otherwise we risk hanging
+- * the machine, because the CPU blocks INIT when it's in VMX root.
++ * Disable virtualization on all CPUs before rebooting to avoid hanging
++ * the system, as VMX and SVM block INIT when running in the host.
+ *
+ * We can't take any locks and we may be on an inconsistent state, so
+- * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
++ * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
+ *
+- * Do the NMI shootdown even if VMX if off on _this_ CPU, as that
+- * doesn't prevent a different CPU from being in VMX root operation.
++ * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
++ * other CPUs may have virtualization enabled.
+ */
+- if (cpu_has_vmx()) {
+- /* Safely force _this_ CPU out of VMX root operation. */
+- __cpu_emergency_vmxoff();
++ if (cpu_has_vmx() || cpu_has_svm(NULL)) {
++ /* Safely force _this_ CPU out of VMX/SVM operation. */
++ cpu_emergency_disable_virtualization();
+
+- /* Halt and exit VMX root operation on the other CPUs. */
+- nmi_shootdown_cpus(vmxoff_nmi);
++ /* Disable VMX/SVM and halt on other CPUs. */
++ nmi_shootdown_cpus_on_restart();
+ }
+ }
+
+@@ -598,7 +586,7 @@ static void native_machine_emergency_restart(void)
+ unsigned short mode;
+
+ if (reboot_emergency)
+- emergency_vmx_disable_all();
++ emergency_reboot_disable_virtualization();
+
+ tboot_shutdown(TB_SHUTDOWN_REBOOT);
+
+@@ -803,6 +791,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
+ /* This is the CPU performing the emergency shutdown work. */
+ int crashing_cpu = -1;
+
++/*
++ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
++ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
++ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
++ */
++void cpu_emergency_disable_virtualization(void)
++{
++ cpu_emergency_vmxoff();
++ cpu_emergency_svm_disable();
++}
++
+ #if defined(CONFIG_SMP)
+
+ static nmi_shootdown_cb shootdown_callback;
+@@ -825,7 +824,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
+ return NMI_HANDLED;
+ local_irq_disable();
+
+- shootdown_callback(cpu, regs);
++ if (shootdown_callback)
++ shootdown_callback(cpu, regs);
++
++ /*
++ * Prepare the CPU for reboot _after_ invoking the callback so that the
++ * callback can safely use virtualization instructions, e.g. VMCLEAR.
++ */
++ cpu_emergency_disable_virtualization();
+
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+@@ -836,18 +842,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
+ return NMI_HANDLED;
+ }
+
+-/*
+- * Halt all other CPUs, calling the specified function on each of them
++/**
++ * nmi_shootdown_cpus - Stop other CPUs via NMI
++ * @callback: Optional callback to be invoked from the NMI handler
++ *
++ * The NMI handler on the remote CPUs invokes @callback, if not
++ * NULL, first and then disables virtualization to ensure that
++ * INIT is recognized during reboot.
+ *
+- * This function can be used to halt all other CPUs on crash
+- * or emergency reboot time. The function passed as parameter
+- * will be called inside a NMI handler on all CPUs.
++ * nmi_shootdown_cpus() can only be invoked once. After the first
++ * invocation all other CPUs are stuck in crash_nmi_callback() and
++ * cannot respond to a second NMI.
+ */
+ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+ {
+ unsigned long msecs;
++
+ local_irq_disable();
+
++ /*
++ * Avoid certain doom if a shootdown already occurred; re-registering
++ * the NMI handler will cause list corruption, modifying the callback
++ * will do who knows what, etc...
++ */
++ if (WARN_ON_ONCE(crash_ipi_issued))
++ return;
++
+ /* Make a note of crashing cpu. Will be used in NMI callback. */
+ crashing_cpu = safe_smp_processor_id();
+
+@@ -875,7 +895,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+ msecs--;
+ }
+
+- /* Leave the nmi callback set */
++ /*
++ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
++ * the callback could result in a NULL pointer dereference if a CPU
++ * (finally) responds after the timeout expires.
++ */
++}
++
++static inline void nmi_shootdown_cpus_on_restart(void)
++{
++ if (!crash_ipi_issued)
++ nmi_shootdown_cpus(NULL);
+ }
+
+ /*
+@@ -905,6 +935,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+ /* No other CPUs to shoot down */
+ }
+
++static inline void nmi_shootdown_cpus_on_restart(void) { }
++
+ void run_crash_ipi_callback(struct pt_regs *regs)
+ {
+ }
+diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
+index f469153eca8ab..c7c4b1917336d 100644
+--- a/arch/x86/kernel/relocate_kernel_32.S
++++ b/arch/x86/kernel/relocate_kernel_32.S
+@@ -7,10 +7,12 @@
+ #include <linux/linkage.h>
+ #include <asm/page_types.h>
+ #include <asm/kexec.h>
++#include <asm/nospec-branch.h>
+ #include <asm/processor-flags.h>
+
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+ */
+
+ #define PTR(x) (x << 2)
+@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+ movl %edi, %eax
+ addl $(identity_mapped - relocate_kernel), %eax
+ pushl %eax
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(relocate_kernel)
+
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %ebp, %ebp
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ 1:
+ popl %edx
+ movl CP_PA_SWAP_PAGE(%edi), %esp
+ addl $PAGE_SIZE, %esp
+ 2:
++ ANNOTATE_RETPOLINE_SAFE
+ call *%edx
+
+ /* get the re-entry point of the peer system */
+@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+ movl %edi, %eax
+ addl $(virtual_mapped - relocate_kernel), %eax
+ pushl %eax
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(identity_mapped)
+
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+ popl %edi
+ popl %esi
+ popl %ebx
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(virtual_mapped)
+
+ /* Do the copies */
+@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+ popl %edi
+ popl %ebx
+ popl %ebp
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(swap_pages)
+
+ .globl kexec_control_code_size
+diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
+index c53271aebb64d..8a9cea950e398 100644
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -13,7 +13,8 @@
+ #include <asm/unwind_hints.h>
+
+ /*
+- * Must be relocatable PIC code callable as a C function
++ * Must be relocatable PIC code callable as a C function, in particular
++ * there must be a plain RET and not jump to return thunk.
+ */
+
+ #define PTR(x) (x << 3)
+@@ -104,7 +105,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
+ /* jump to identity mapped page */
+ addq $(identity_mapped - relocate_kernel), %r8
+ pushq %r8
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(relocate_kernel)
+
+ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+@@ -191,7 +194,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+ xorl %r14d, %r14d
+ xorl %r15d, %r15d
+
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+
+ 1:
+ popq %rdx
+@@ -210,7 +215,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
+ call swap_pages
+ movq $virtual_mapped, %rax
+ pushq %rax
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(identity_mapped)
+
+ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+@@ -231,7 +238,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
+ popq %r12
+ popq %rbp
+ popq %rbx
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(virtual_mapped)
+
+ /* Do the copies */
+@@ -288,7 +297,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
+ lea PAGE_SIZE(%rax), %rsi
+ jmp 0b
+ 3:
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_CODE_END(swap_pages)
+
+ .globl kexec_control_code_size
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
+index 40ed44ead0631..8e56c4de00b9c 100644
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -368,21 +368,41 @@ static void __init parse_setup_data(void)
+
+ static void __init memblock_x86_reserve_range_setup_data(void)
+ {
++ struct setup_indirect *indirect;
+ struct setup_data *data;
+- u64 pa_data;
++ u64 pa_data, pa_next;
++ u32 len;
+
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = early_memremap(pa_data, sizeof(*data));
++ if (!data) {
++ pr_warn("setup: failed to memremap setup_data entry\n");
++ return;
++ }
++
++ len = sizeof(*data);
++ pa_next = data->next;
++
+ memblock_reserve(pa_data, sizeof(*data) + data->len);
+
+- if (data->type == SETUP_INDIRECT &&
+- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
+- memblock_reserve(((struct setup_indirect *)data->data)->addr,
+- ((struct setup_indirect *)data->data)->len);
++ if (data->type == SETUP_INDIRECT) {
++ len += data->len;
++ early_memunmap(data, sizeof(*data));
++ data = early_memremap(pa_data, len);
++ if (!data) {
++ pr_warn("setup: failed to memremap indirect setup_data\n");
++ return;
++ }
+
+- pa_data = data->next;
+- early_memunmap(data, sizeof(*data));
++ indirect = (struct setup_indirect *)data->data;
++
++ if (indirect->type != SETUP_INDIRECT)
++ memblock_reserve(indirect->addr, indirect->len);
++ }
++
++ pa_data = pa_next;
++ early_memunmap(data, len);
+ }
+ }
+
+@@ -713,9 +733,6 @@ static void __init early_reserve_memory(void)
+
+ early_reserve_initrd();
+
+- if (efi_enabled(EFI_BOOT))
+- efi_memblock_x86_reserve_range();
+-
+ memblock_x86_reserve_range_setup_data();
+
+ reserve_ibft_region();
+@@ -890,6 +907,9 @@ void __init setup_arch(char **cmdline_p)
+
+ parse_early_param();
+
++ if (efi_enabled(EFI_BOOT))
++ efi_memblock_x86_reserve_range();
++
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Memory used by the kernel cannot be hot-removed because Linux
+diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
+index a6895e440bc35..a0064cf77e562 100644
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -46,16 +46,6 @@ static struct ghcb __initdata *boot_ghcb;
+ struct sev_es_runtime_data {
+ struct ghcb ghcb_page;
+
+- /* Physical storage for the per-CPU IST stack of the #VC handler */
+- char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+-
+- /*
+- * Physical storage for the per-CPU fall-back stack of the #VC handler.
+- * The fall-back stack is used when it is not safe to switch back to the
+- * interrupted stack in the #VC entry code.
+- */
+- char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+-
+ /*
+ * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+ * It is needed when an NMI happens while the #VC handler uses the real
+@@ -99,27 +89,6 @@ DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+ /* Needed in vc_early_forward_exception */
+ void do_early_exception(struct pt_regs *regs, int trapnr);
+
+-static void __init setup_vc_stacks(int cpu)
+-{
+- struct sev_es_runtime_data *data;
+- struct cpu_entry_area *cea;
+- unsigned long vaddr;
+- phys_addr_t pa;
+-
+- data = per_cpu(runtime_data, cpu);
+- cea = get_cpu_entry_area(cpu);
+-
+- /* Map #VC IST stack */
+- vaddr = CEA_ESTACK_BOT(&cea->estacks, VC);
+- pa = __pa(data->ist_stack);
+- cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+-
+- /* Map VC fall-back stack */
+- vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2);
+- pa = __pa(data->fallback_stack);
+- cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+-}
+-
+ static __always_inline bool on_vc_stack(struct pt_regs *regs)
+ {
+ unsigned long sp = regs->sp;
+@@ -325,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ char *dst, char *buf, size_t size)
+ {
+ unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+- char __user *target = (char __user *)dst;
+- u64 d8;
+- u32 d4;
+- u16 d2;
+- u8 d1;
+
+ /*
+ * This function uses __put_user() independent of whether kernel or user
+@@ -351,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ * instructions here would cause infinite nesting.
+ */
+ switch (size) {
+- case 1:
++ case 1: {
++ u8 d1;
++ u8 __user *target = (u8 __user *)dst;
++
+ memcpy(&d1, buf, 1);
+ if (__put_user(d1, target))
+ goto fault;
+ break;
+- case 2:
++ }
++ case 2: {
++ u16 d2;
++ u16 __user *target = (u16 __user *)dst;
++
+ memcpy(&d2, buf, 2);
+ if (__put_user(d2, target))
+ goto fault;
+ break;
+- case 4:
++ }
++ case 4: {
++ u32 d4;
++ u32 __user *target = (u32 __user *)dst;
++
+ memcpy(&d4, buf, 4);
+ if (__put_user(d4, target))
+ goto fault;
+ break;
+- case 8:
++ }
++ case 8: {
++ u64 d8;
++ u64 __user *target = (u64 __user *)dst;
++
+ memcpy(&d8, buf, 8);
+ if (__put_user(d8, target))
+ goto fault;
+ break;
++ }
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return ES_UNSUPPORTED;
+@@ -393,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ char *src, char *buf, size_t size)
+ {
+ unsigned long error_code = X86_PF_PROT;
+- char __user *s = (char __user *)src;
+- u64 d8;
+- u32 d4;
+- u16 d2;
+- u8 d1;
+
+ /*
+ * This function uses __get_user() independent of whether kernel or user
+@@ -419,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ * instructions here would cause infinite nesting.
+ */
+ switch (size) {
+- case 1:
++ case 1: {
++ u8 d1;
++ u8 __user *s = (u8 __user *)src;
++
+ if (__get_user(d1, s))
+ goto fault;
+ memcpy(buf, &d1, 1);
+ break;
+- case 2:
++ }
++ case 2: {
++ u16 d2;
++ u16 __user *s = (u16 __user *)src;
++
+ if (__get_user(d2, s))
+ goto fault;
+ memcpy(buf, &d2, 2);
+ break;
+- case 4:
++ }
++ case 4: {
++ u32 d4;
++ u32 __user *s = (u32 __user *)src;
++
+ if (__get_user(d4, s))
+ goto fault;
+ memcpy(buf, &d4, 4);
+ break;
+- case 8:
++ }
++ case 8: {
++ u64 d8;
++ u64 __user *s = (u64 __user *)src;
+ if (__get_user(d8, s))
+ goto fault;
+ memcpy(buf, &d8, 8);
+ break;
++ }
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return ES_UNSUPPORTED;
+@@ -787,7 +777,6 @@ void __init sev_es_init_vc_handling(void)
+ for_each_possible_cpu(cpu) {
+ alloc_runtime_data(cpu);
+ init_ghcb(cpu);
+- setup_vc_stacks(cpu);
+ }
+
+ sev_es_setup_play_dead();
+diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S
+index ee04941a6546a..3355e27c69ebf 100644
+--- a/arch/x86/kernel/sev_verify_cbit.S
++++ b/arch/x86/kernel/sev_verify_cbit.S
+@@ -85,5 +85,5 @@ SYM_FUNC_START(sev_verify_cbit)
+ #endif
+ /* Return page-table pointer */
+ movq %rdi, %rax
+- ret
++ RET
+ SYM_FUNC_END(sev_verify_cbit)
+diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
+index f4d21e4700835..bf10340a9b71d 100644
+--- a/arch/x86/kernel/signal.c
++++ b/arch/x86/kernel/signal.c
+@@ -722,7 +722,7 @@ badframe:
+ /* max_frame_size tells userspace the worst case signal stack size. */
+ static unsigned long __ro_after_init max_frame_size;
+
+-void __init init_sigframe_size(void)
++static int __init init_sigframe_size(void)
+ {
+ max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING;
+
+@@ -732,7 +732,9 @@ void __init init_sigframe_size(void)
+ max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);
+
+ pr_info("max sigframe size: %lu\n", max_frame_size);
++ return 0;
+ }
++early_initcall(init_sigframe_size);
+
+ unsigned long get_sigframe_size(void)
+ {
+diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
+index b52407c56000e..879ef8c72f5c0 100644
+--- a/arch/x86/kernel/signal_compat.c
++++ b/arch/x86/kernel/signal_compat.c
+@@ -149,8 +149,10 @@ static inline void signal_compat_build_tests(void)
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20);
++ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x24);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14);
++ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_flags) != 0x18);
+
+ CHECK_CSI_OFFSET(_sigpoll);
+ CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
+diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
+index 06db901fabe8e..375b33ecafa27 100644
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -32,7 +32,7 @@
+ #include <asm/mce.h>
+ #include <asm/trace/irq_vectors.h>
+ #include <asm/kexec.h>
+-#include <asm/virtext.h>
++#include <asm/reboot.h>
+
+ /*
+ * Some notes on x86 processor bugs affecting SMP operation:
+@@ -122,7 +122,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
+ if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
+ return NMI_HANDLED;
+
+- cpu_emergency_vmxoff();
++ cpu_emergency_disable_virtualization();
+ stop_this_cpu(NULL);
+
+ return NMI_HANDLED;
+@@ -134,7 +134,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
+ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
+ {
+ ack_APIC_irq();
+- cpu_emergency_vmxoff();
++ cpu_emergency_disable_virtualization();
+ stop_this_cpu(NULL);
+ }
+
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 85f6e242b6b45..714f66aa03388 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -105,6 +105,17 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
+ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+ EXPORT_PER_CPU_SYMBOL(cpu_info);
+
++struct mwait_cpu_dead {
++ unsigned int control;
++ unsigned int status;
++};
++
++/*
++ * Cache line aligned data for mwait_play_dead(). Separate on purpose so
++ * that it's unlikely to be touched by other CPUs.
++ */
++static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
++
+ /* Logical package management. We might want to allocate that dynamically */
+ unsigned int __max_logical_packages __read_mostly;
+ EXPORT_SYMBOL(__max_logical_packages);
+@@ -1685,10 +1696,10 @@ EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
+ */
+ static inline void mwait_play_dead(void)
+ {
++ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int highest_cstate = 0;
+ unsigned int highest_subcstate = 0;
+- void *mwait_ptr;
+ int i;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+@@ -1723,13 +1734,6 @@ static inline void mwait_play_dead(void)
+ (highest_subcstate - 1);
+ }
+
+- /*
+- * This should be a memory location in a cache line which is
+- * unlikely to be touched by other processors. The actual
+- * content is immaterial as it is not actually modified in any way.
+- */
+- mwait_ptr = &current_thread_info()->flags;
+-
+ wbinvd();
+
+ while (1) {
+@@ -1741,9 +1745,9 @@ static inline void mwait_play_dead(void)
+ * case where we return around the loop.
+ */
+ mb();
+- clflush(mwait_ptr);
++ clflush(md);
+ mb();
+- __monitor(mwait_ptr, 0, 0);
++ __monitor(md, 0, 0);
+ mb();
+ __mwait(eax, 0);
+
+diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
+index ea028e736831a..b48b659ccf6fb 100644
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -12,12 +12,21 @@ enum insn_type {
+ };
+
+ /*
+- * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax
+- * The REX.W cancels the effect of any data16.
++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
++ * that there is no false-positive trampoline identification while also being a
++ * speculation stop.
+ */
+-static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 };
++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
+
+-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
++/*
++ * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax
++ */
++static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
++
++static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
++
++static void __ref __static_call_transform(void *insn, enum insn_type type,
++ void *func, bool modinit)
+ {
+ const void *emulate = NULL;
+ int size = CALL_INSN_SIZE;
+@@ -42,15 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
+ break;
+
+ case RET:
+- code = text_gen_insn(RET_INSN_OPCODE, insn, func);
+- size = RET_INSN_SIZE;
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
++ else
++ code = &retinsn;
+ break;
+ }
+
+ if (memcmp(insn, code, size) == 0)
+ return;
+
+- if (unlikely(system_state == SYSTEM_BOOTING))
++ if (system_state == SYSTEM_BOOTING || modinit)
+ return text_poke_early(insn, code, size);
+
+ text_poke_bp(insn, code, size, emulate);
+@@ -98,14 +109,55 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+
+ if (tramp) {
+ __static_call_validate(tramp, true);
+- __static_call_transform(tramp, __sc_insn(!func, true), func);
++ __static_call_transform(tramp, __sc_insn(!func, true), func, false);
+ }
+
+ if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
+ __static_call_validate(site, tail);
+- __static_call_transform(site, __sc_insn(!func, tail), func);
++ __static_call_transform(site, __sc_insn(!func, tail), func, false);
+ }
+
+ mutex_unlock(&text_mutex);
+ }
+ EXPORT_SYMBOL_GPL(arch_static_call_transform);
++
++#ifdef CONFIG_RETHUNK
++/*
++ * This is called by apply_returns() to fix up static call trampolines,
++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
++ * having a return trampoline.
++ *
++ * The problem is that static_call() is available before determining
++ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
++ *
++ * This means that __static_call_transform() above can have overwritten the
++ * return trampoline and we now need to fix things up to be consistent.
++ */
++bool __static_call_fixup(void *tramp, u8 op, void *dest)
++{
++ unsigned long addr = (unsigned long)tramp;
++ /*
++ * Not all .return_sites are a static_call trampoline (most are not).
++ * Check if the 3 bytes after the return are still kernel text, if not,
++ * then this definitely is not a trampoline and we need not worry
++ * further.
++ *
++ * This avoids the memcmp() below tripping over pagefaults etc..
++ */
++ if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) &&
++ !kernel_text_address(addr + 7))
++ return false;
++
++ if (memcmp(tramp+5, tramp_ud, 3)) {
++ /* Not a trampoline site, not our problem. */
++ return false;
++ }
++
++ mutex_lock(&text_mutex);
++ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
++ __static_call_transform(tramp, RET, NULL, true);
++ mutex_unlock(&text_mutex);
++
++ return true;
++}
++#endif
+diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
+index 0f3c307b37b3a..8e2b2552b5eea 100644
+--- a/arch/x86/kernel/step.c
++++ b/arch/x86/kernel/step.c
+@@ -180,8 +180,7 @@ void set_task_blockstep(struct task_struct *task, bool on)
+ *
+ * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
+ * task is current or it can't be running, otherwise we can race
+- * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
+- * PTRACE_KILL is not safe.
++ * with __switch_to_xtra(). We rely on ptrace_freeze_traced().
+ */
+ local_irq_disable();
+ debugctl = get_debugctlmsr();
+diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
+index 660b78827638f..8cc653ffdccd7 100644
+--- a/arch/x86/kernel/sys_x86_64.c
++++ b/arch/x86/kernel/sys_x86_64.c
+@@ -68,9 +68,6 @@ static int __init control_va_addr_alignment(char *str)
+ if (*str == 0)
+ return 1;
+
+- if (*str == '=')
+- str++;
+-
+ if (!strcmp(str, "32"))
+ va_align.flags = ALIGN_VA_32;
+ else if (!strcmp(str, "64"))
+@@ -80,11 +77,11 @@ static int __init control_va_addr_alignment(char *str)
+ else if (!strcmp(str, "on"))
+ va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
+ else
+- return 0;
++ pr_warn("invalid option value: 'align_va_addr=%s'\n", str);
+
+ return 1;
+ }
+-__setup("align_va_addr", control_va_addr_alignment);
++__setup("align_va_addr=", control_va_addr_alignment);
+
+ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index a58800973aed3..ca47080e37741 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -313,17 +313,19 @@ out:
+ }
+
+ #ifdef CONFIG_VMAP_STACK
+-__visible void __noreturn handle_stack_overflow(const char *message,
+- struct pt_regs *regs,
+- unsigned long fault_address)
++__visible void __noreturn handle_stack_overflow(struct pt_regs *regs,
++ unsigned long fault_address,
++ struct stack_info *info)
+ {
+- printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
+- (void *)fault_address, current->stack,
+- (char *)current->stack + THREAD_SIZE - 1);
+- die(message, regs, 0);
++ const char *name = stack_type_name(info->type);
++
++ printk(KERN_EMERG "BUG: %s stack guard page was hit at %p (stack is %p..%p)\n",
++ name, (void *)fault_address, info->begin, info->end);
++
++ die("stack guard page", regs, 0);
+
+ /* Be absolutely certain we don't return. */
+- panic("%s", message);
++ panic("%s stack guard hit", name);
+ }
+ #endif
+
+@@ -353,6 +355,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
+
+ #ifdef CONFIG_VMAP_STACK
+ unsigned long address = read_cr2();
++ struct stack_info info;
+ #endif
+
+ #ifdef CONFIG_X86_ESPFIX64
+@@ -455,10 +458,8 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
+ * stack even if the actual trigger for the double fault was
+ * something else.
+ */
+- if ((unsigned long)task_stack_page(tsk) - 1 - address < PAGE_SIZE) {
+- handle_stack_overflow("kernel stack overflow (double-fault)",
+- regs, address);
+- }
++ if (get_stack_guard_info((void *)address, &info))
++ handle_stack_overflow(regs, address, &info);
+ #endif
+
+ pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
+@@ -528,6 +529,36 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
+
+ #define GPFSTR "general protection fault"
+
++static bool fixup_iopl_exception(struct pt_regs *regs)
++{
++ struct thread_struct *t = &current->thread;
++ unsigned char byte;
++ unsigned long ip;
++
++ if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3)
++ return false;
++
++ if (insn_get_effective_ip(regs, &ip))
++ return false;
++
++ if (get_user(byte, (const char __user *)ip))
++ return false;
++
++ if (byte != 0xfa && byte != 0xfb)
++ return false;
++
++ if (!t->iopl_warn && printk_ratelimit()) {
++ pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
++ current->comm, task_pid_nr(current), ip);
++ print_vma_addr(KERN_CONT " in ", ip);
++ pr_cont("\n");
++ t->iopl_warn = 1;
++ }
++
++ regs->ip += 1;
++ return true;
++}
++
+ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
+ {
+ char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
+@@ -553,6 +584,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
+ tsk = current;
+
+ if (user_mode(regs)) {
++ if (fixup_iopl_exception(regs))
++ goto exit;
++
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_nr = X86_TRAP_GP;
+
+@@ -625,6 +659,7 @@ static bool do_int3(struct pt_regs *regs)
+
+ return res == NOTIFY_STOP;
+ }
++NOKPROBE_SYMBOL(do_int3);
+
+ static void do_int3_user(struct pt_regs *regs)
+ {
+@@ -709,7 +744,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
+ stack = (unsigned long *)sp;
+
+ if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
+- info.type >= STACK_TYPE_EXCEPTION_LAST)
++ info.type > STACK_TYPE_EXCEPTION_LAST)
+ sp = __this_cpu_ist_top_va(VC2);
+
+ sync:
+@@ -727,14 +762,10 @@ sync:
+ }
+ #endif
+
+-struct bad_iret_stack {
+- void *error_entry_ret;
+- struct pt_regs regs;
+-};
+-
+-asmlinkage __visible noinstr
+-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
++asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
+ {
++ struct pt_regs tmp, *new_stack;
++
+ /*
+ * This is called from entry_64.S early in handling a fault
+ * caused by a bad iret to user mode. To handle the fault
+@@ -743,19 +774,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
+ * just below the IRET frame) and we want to pretend that the
+ * exception came from the IRET target.
+ */
+- struct bad_iret_stack tmp, *new_stack =
+- (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
++ new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
+
+ /* Copy the IRET target to the temporary storage. */
+- __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
++ __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
+
+ /* Copy the remainder of the stack from the current stack. */
+- __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
++ __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
+
+ /* Update the entry stack */
+ __memcpy(new_stack, &tmp, sizeof(tmp));
+
+- BUG_ON(!user_mode(&new_stack->regs));
++ BUG_ON(!user_mode(new_stack));
+ return new_stack;
+ }
+ #endif
+diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
+index 2e076a459a0c0..a698196377be9 100644
+--- a/arch/x86/kernel/tsc.c
++++ b/arch/x86/kernel/tsc.c
+@@ -1180,6 +1180,12 @@ void mark_tsc_unstable(char *reason)
+
+ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
++static void __init tsc_disable_clocksource_watchdog(void)
++{
++ clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
++ clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
++}
++
+ static void __init check_system_tsc_reliable(void)
+ {
+ #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
+@@ -1196,6 +1202,23 @@ static void __init check_system_tsc_reliable(void)
+ #endif
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
+ tsc_clocksource_reliable = 1;
++
++ /*
++ * Disable the clocksource watchdog when the system has:
++ * - TSC running at constant frequency
++ * - TSC which does not stop in C-States
++ * - the TSC_ADJUST register which allows to detect even minimal
++ * modifications
++ * - not more than two sockets. As the number of sockets cannot be
++ * evaluated at the early boot stage where this has to be
++ * invoked, check the number of online memory nodes as a
++ * fallback solution which is an reasonable estimate.
++ */
++ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
++ boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
++ boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
++ nr_online_nodes <= 2)
++ tsc_disable_clocksource_watchdog();
+ }
+
+ /*
+@@ -1387,9 +1410,6 @@ static int __init init_tsc_clocksource(void)
+ if (tsc_unstable)
+ goto unreg;
+
+- if (tsc_clocksource_reliable || no_tsc_watchdog)
+- clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+-
+ if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
+ clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
+
+@@ -1527,7 +1547,7 @@ void __init tsc_init(void)
+ }
+
+ if (tsc_clocksource_reliable || no_tsc_watchdog)
+- clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
++ tsc_disable_clocksource_watchdog();
+
+ clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
+ detect_art();
+diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
+index 50a4515fe0ad1..9452dc9664b51 100644
+--- a/arch/x86/kernel/tsc_sync.c
++++ b/arch/x86/kernel/tsc_sync.c
+@@ -30,6 +30,7 @@ struct tsc_adjust {
+ };
+
+ static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
++static struct timer_list tsc_sync_check_timer;
+
+ /*
+ * TSC's on different sockets may be reset asynchronously.
+@@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume)
+ }
+ }
+
++/*
++ * Normally the tsc_sync will be checked every time system enters idle
++ * state, but there is still caveat that a system won't enter idle,
++ * either because it's too busy or configured purposely to not enter
++ * idle.
++ *
++ * So setup a periodic timer (every 10 minutes) to make sure the check
++ * is always on.
++ */
++
++#define SYNC_CHECK_INTERVAL (HZ * 600)
++
++static void tsc_sync_check_timer_fn(struct timer_list *unused)
++{
++ int next_cpu;
++
++ tsc_verify_tsc_adjust(false);
++
++ /* Run the check for all onlined CPUs in turn */
++ next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
++ if (next_cpu >= nr_cpu_ids)
++ next_cpu = cpumask_first(cpu_online_mask);
++
++ tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL;
++ add_timer_on(&tsc_sync_check_timer, next_cpu);
++}
++
++static int __init start_sync_check_timer(void)
++{
++ if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable)
++ return 0;
++
++ timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0);
++ tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL;
++ add_timer(&tsc_sync_check_timer);
++
++ return 0;
++}
++late_initcall(start_sync_check_timer);
++
+ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
+ unsigned int cpu, bool bootcpu)
+ {
+diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
+index a1202536fc57c..8488966da5f19 100644
+--- a/arch/x86/kernel/unwind_orc.c
++++ b/arch/x86/kernel/unwind_orc.c
+@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip);
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
+ {
+ struct ftrace_ops *ops;
+- unsigned long caller;
++ unsigned long tramp_addr, offset;
+
+ ops = ftrace_ops_trampoline(ip);
+ if (!ops)
+ return NULL;
+
++ /* Set tramp_addr to the start of the code copied by the trampoline */
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+- caller = (unsigned long)ftrace_regs_call;
++ tramp_addr = (unsigned long)ftrace_regs_caller;
+ else
+- caller = (unsigned long)ftrace_call;
++ tramp_addr = (unsigned long)ftrace_caller;
++
++ /* Now place tramp_addr to the location within the trampoline ip is at */
++ offset = ip - ops->trampoline;
++ tramp_addr += offset;
+
+ /* Prevent unlikely recursion */
+- if (ip == caller)
++ if (ip == tramp_addr)
+ return NULL;
+
+- return orc_find(caller);
++ return orc_find(tramp_addr);
+ }
+ #else
+ static struct orc_entry *orc_ftrace_find(unsigned long ip)
+@@ -695,7 +700,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ /* Otherwise, skip ahead to the user-specified starting frame: */
+ while (!unwind_done(state) &&
+ (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+- state->sp < (unsigned long)first_frame))
++ state->sp <= (unsigned long)first_frame))
+ unwind_next_frame(state);
+
+ return;
+diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
+index b63cf8f7745ee..6c07f6daaa227 100644
+--- a/arch/x86/kernel/uprobes.c
++++ b/arch/x86/kernel/uprobes.c
+@@ -722,8 +722,9 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
+ switch (opc1) {
+ case 0xeb: /* jmp 8 */
+ case 0xe9: /* jmp 32 */
+- case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
+ break;
++ case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
++ goto setup;
+
+ case 0xe8: /* call relative */
+ branch_clear_offset(auprobe, insn);
+@@ -753,6 +754,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
+ return -ENOTSUPP;
+ }
+
++setup:
+ auprobe->branch.opc1 = opc1;
+ auprobe->branch.ilen = insn->length;
+ auprobe->branch.offs = insn->immediate.value;
+diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
+index 641f0fe1e5b4a..1258a5872d128 100644
+--- a/arch/x86/kernel/verify_cpu.S
++++ b/arch/x86/kernel/verify_cpu.S
+@@ -132,9 +132,9 @@ SYM_FUNC_START_LOCAL(verify_cpu)
+ .Lverify_cpu_no_longmode:
+ popf # Restore caller passed flags
+ movl $1,%eax
+- ret
++ RET
+ .Lverify_cpu_sse_ok:
+ popf # Restore caller passed flags
+ xorl %eax, %eax
+- ret
++ RET
+ SYM_FUNC_END(verify_cpu)
+diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
+index e5a7a10a0164d..17d58740891e2 100644
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -142,6 +142,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
+
+ user_access_end();
+
++exit_vm86:
+ preempt_disable();
+ tsk->thread.sp0 = vm86->saved_sp0;
+ tsk->thread.sysenter_cs = __KERNEL_CS;
+@@ -161,7 +162,8 @@ Efault_end:
+ user_access_end();
+ Efault:
+ pr_alert("could not access userspace vm86 info\n");
+- do_exit(SIGSEGV);
++ force_exit_sig(SIGSEGV);
++ goto exit_vm86;
+ }
+
+ static int do_vm86_irq_handling(int subfunction, int irqnumber);
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index efd9e9ea17f25..ca1a7595edac8 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -133,7 +133,20 @@ SECTIONS
+ LOCK_TEXT
+ KPROBES_TEXT
+ ALIGN_ENTRY_TEXT_BEGIN
++#ifdef CONFIG_CPU_SRSO
++ *(.text..__x86.rethunk_untrain)
++#endif
++
+ ENTRY_TEXT
++
++#ifdef CONFIG_CPU_SRSO
++ /*
++ * See the comment above srso_alias_untrain_ret()'s
++ * definition.
++ */
++ . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
++ *(.text..__x86.rethunk_safe)
++#endif
+ ALIGN_ENTRY_TEXT_END
+ SOFTIRQENTRY_TEXT
+ STATIC_CALL_TEXT
+@@ -142,13 +155,15 @@ SECTIONS
+
+ #ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+- *(.text.__x86.indirect_thunk)
++ *(.text..__x86.indirect_thunk)
++ *(.text..__x86.return_thunk)
+ __indirect_thunk_end = .;
+ #endif
+ } :text =0xcccc
+
+ /* End of text section, which should occupy whole number of pages */
+ _etext = .;
++
+ . = ALIGN(PAGE_SIZE);
+
+ X86_ALIGN_RODATA_BEGIN
+@@ -272,6 +287,27 @@ SECTIONS
+ __parainstructions_end = .;
+ }
+
++#ifdef CONFIG_RETPOLINE
++ /*
++ * List of instructions that call/jmp/jcc to retpoline thunks
++ * __x86_indirect_thunk_*(). These instructions can be patched along
++ * with alternatives, after which the section can be freed.
++ */
++ . = ALIGN(8);
++ .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
++ __retpoline_sites = .;
++ *(.retpoline_sites)
++ __retpoline_sites_end = .;
++ }
++
++ . = ALIGN(8);
++ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
++ __return_sites = .;
++ *(.return_sites)
++ __return_sites_end = .;
++ }
++#endif
++
+ /*
+ * struct alt_inst entries. From the header (alternative.h):
+ * "Alternative instructions for different CPU types or capabilities"
+@@ -475,6 +511,27 @@ INIT_PER_CPU(irq_stack_backing_store);
+ "fixed_percpu_data is not at start of per-cpu area");
+ #endif
+
++#ifdef CONFIG_RETHUNK
++. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
++. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
++#endif
++
++#ifdef CONFIG_CPU_SRSO
++/*
++ * GNU ld cannot do XOR until 2.41.
++ * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
++ *
++ * LLVM lld cannot do XOR until lld-17.
++ * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb
++ *
++ * Instead do: (A | B) - (A & B) in order to compute the XOR
++ * of the two function addresses:
++ */
++. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) -
++ (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
++ "SRSO function pair won't alias");
++#endif
++
+ #endif /* CONFIG_X86_64 */
+
+ #ifdef CONFIG_KEXEC_CORE
+diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
+index 8b395821cb8d0..d3e3b16ea9cf3 100644
+--- a/arch/x86/kernel/x86_init.c
++++ b/arch/x86/kernel/x86_init.c
+@@ -32,8 +32,8 @@ static int __init iommu_init_noop(void) { return 0; }
+ static void iommu_shutdown_noop(void) { }
+ bool __init bool_x86_init_noop(void) { return false; }
+ void x86_op_int_noop(int cpu) { }
+-static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
+-static __init void get_rtc_noop(struct timespec64 *now) { }
++static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
++static void get_rtc_noop(struct timespec64 *now) { }
+
+ static __initconst const struct of_device_id of_cmos_match[] = {
+ { .compatible = "motorola,mc146818" },
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 751aa85a30012..b939b94d931f7 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -232,6 +232,25 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
+ return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
+ }
+
++static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
++ int nent)
++{
++ int r;
++
++ r = kvm_check_cpuid(e2, nent);
++ if (r)
++ return r;
++
++ kvfree(vcpu->arch.cpuid_entries);
++ vcpu->arch.cpuid_entries = e2;
++ vcpu->arch.cpuid_nent = nent;
++
++ kvm_update_cpuid_runtime(vcpu);
++ kvm_vcpu_after_set_cpuid(vcpu);
++
++ return 0;
++}
++
+ /* when an old userspace process fills a new kernel module */
+ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid *cpuid,
+@@ -268,18 +287,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
+ e2[i].padding[2] = 0;
+ }
+
+- r = kvm_check_cpuid(e2, cpuid->nent);
+- if (r) {
++ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
++ if (r)
+ kvfree(e2);
+- goto out_free_cpuid;
+- }
+-
+- kvfree(vcpu->arch.cpuid_entries);
+- vcpu->arch.cpuid_entries = e2;
+- vcpu->arch.cpuid_nent = cpuid->nent;
+-
+- kvm_update_cpuid_runtime(vcpu);
+- kvm_vcpu_after_set_cpuid(vcpu);
+
+ out_free_cpuid:
+ kvfree(e);
+@@ -303,20 +313,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
+ return PTR_ERR(e2);
+ }
+
+- r = kvm_check_cpuid(e2, cpuid->nent);
+- if (r) {
++ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
++ if (r)
+ kvfree(e2);
+- return r;
+- }
+-
+- kvfree(vcpu->arch.cpuid_entries);
+- vcpu->arch.cpuid_entries = e2;
+- vcpu->arch.cpuid_nent = cpuid->nent;
+
+- kvm_update_cpuid_runtime(vcpu);
+- kvm_vcpu_after_set_cpuid(vcpu);
+-
+- return 0;
++ return r;
+ }
+
+ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
+@@ -420,12 +421,13 @@ void kvm_set_cpu_caps(void)
+ );
+
+ kvm_cpu_cap_mask(CPUID_7_0_EBX,
+- F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+- F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
+- F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
+- F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+- F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
+- );
++ F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |
++ F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |
++ F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |
++ F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |
++ F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |
++ F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |
++ F(AVX512VL));
+
+ kvm_cpu_cap_mask(CPUID_7_ECX,
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
+@@ -542,6 +544,9 @@ void kvm_set_cpu_caps(void)
+ F(PMM) | F(PMM_EN)
+ );
+
++ if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
++ kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
++
+ /*
+ * Hide RDTSCP and RDPID if either feature is reported as supported but
+ * probing MSR_TSC_AUX failed. This is purely a sanity check and
+@@ -565,16 +570,22 @@ struct kvm_cpuid_array {
+ int nent;
+ };
+
++static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array)
++{
++ if (array->nent >= array->maxnent)
++ return NULL;
++
++ return &array->entries[array->nent++];
++}
++
+ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
+ u32 function, u32 index)
+ {
+- struct kvm_cpuid_entry2 *entry;
++ struct kvm_cpuid_entry2 *entry = get_next_cpuid(array);
+
+- if (array->nent >= array->maxnent)
++ if (!entry)
+ return NULL;
+
+- entry = &array->entries[array->nent++];
+-
+ entry->function = function;
+ entry->index = index;
+ entry->flags = 0;
+@@ -716,13 +727,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+ entry->edx = 0;
+ }
+ break;
+- case 9:
+- break;
+ case 0xa: { /* Architectural Performance Monitoring */
+ struct x86_pmu_capability cap;
+ union cpuid10_eax eax;
+ union cpuid10_edx edx;
+
++ if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
++ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
++ break;
++ }
++
+ perf_get_x86_pmu_capability(&cap);
+
+ /*
+@@ -750,22 +764,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+ entry->edx = edx.full;
+ break;
+ }
+- /*
+- * Per Intel's SDM, the 0x1f is a superset of 0xb,
+- * thus they can be handled by common code.
+- */
+ case 0x1f:
+ case 0xb:
+ /*
+- * Populate entries until the level type (ECX[15:8]) of the
+- * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
+- * the starting entry, filled by the primary do_host_cpuid().
++ * No topology; a valid topology is indicated by the presence
++ * of subleaf 1.
+ */
+- for (i = 1; entry->ecx & 0xff00; ++i) {
+- entry = do_host_cpuid(array, function, i);
+- if (!entry)
+- goto out;
+- }
++ entry->eax = entry->ebx = entry->ecx = 0;
+ break;
+ case 0xd:
+ entry->eax &= supported_xcr0;
+@@ -897,11 +902,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+ entry->eax = min(entry->eax, 0x8000001f);
+ break;
+ case 0x80000001:
++ entry->ebx &= ~GENMASK(27, 16);
+ cpuid_entry_override(entry, CPUID_8000_0001_EDX);
+ cpuid_entry_override(entry, CPUID_8000_0001_ECX);
+ break;
+ case 0x80000006:
+- /* L2 cache and TLB: pass through host info. */
++ /* Drop reserved bits, pass host L2 cache and TLB info. */
++ entry->edx &= ~GENMASK(17, 16);
+ break;
+ case 0x80000007: /* Advanced power management */
+ /* invariant TSC is CPUID.80000007H:EDX[8] */
+@@ -931,6 +938,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+ g_phys_as = phys_as;
+
+ entry->eax = g_phys_as | (virt_as << 8);
++ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
+ entry->edx = 0;
+ cpuid_entry_override(entry, CPUID_8000_0008_EBX);
+ break;
+@@ -950,14 +958,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+ entry->ecx = entry->edx = 0;
+ break;
+ case 0x8000001a:
++ entry->eax &= GENMASK(2, 0);
++ entry->ebx = entry->ecx = entry->edx = 0;
++ break;
+ case 0x8000001e:
++ /* Do not return host topology information. */
++ entry->eax = entry->ebx = entry->ecx = 0;
++ entry->edx = 0; /* reserved */
+ break;
+ case 0x8000001F:
+ if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ } else {
+ cpuid_entry_override(entry, CPUID_8000_001F_EAX);
+-
++ /* Clear NumVMPL since KVM does not support VMPL. */
++ entry->ebx &= ~GENMASK(31, 12);
+ /*
+ * Enumerate '0' for "PA bits reduction", the adjusted
+ * MAXPHYADDR is enumerated directly (see 0x80000008).
+diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
+index 54a83a7445384..f33c804a922ac 100644
+--- a/arch/x86/kvm/debugfs.c
++++ b/arch/x86/kvm/debugfs.c
+@@ -95,6 +95,9 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
+ unsigned int *log[KVM_NR_PAGE_SIZES], *cur;
+ int i, j, k, l, ret;
+
++ if (!kvm_memslots_have_rmaps(kvm))
++ return 0;
++
+ ret = -ENOMEM;
+ memset(log, 0, sizeof(log));
+ for (i = 0; i < KVM_NR_PAGE_SIZES; i++) {
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 9a144ca8e1460..cb96e4354f317 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -187,9 +187,6 @@
+ #define X8(x...) X4(x), X4(x)
+ #define X16(x...) X8(x), X8(x)
+
+-#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
+-#define FASTOP_SIZE 8
+-
+ struct opcode {
+ u64 flags : 56;
+ u64 intercept : 8;
+@@ -303,9 +300,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
+ * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
+ * different operand sizes can be reached by calculation, rather than a jump
+ * table (which would be bigger than the code).
++ *
++ * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
++ * and 1 for the straight line speculation INT3, leaves 7 bytes for the
++ * body of the function. Currently none is larger than 4.
+ */
+ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
+
++#define FASTOP_SIZE 16
++
+ #define __FOP_FUNC(name) \
+ ".align " __stringify(FASTOP_SIZE) " \n\t" \
+ ".type " name ", @function \n\t" \
+@@ -315,19 +318,21 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
+ __FOP_FUNC(#name)
+
+ #define __FOP_RET(name) \
+- "ret \n\t" \
++ ASM_RET \
+ ".size " name ", .-" name "\n\t"
+
+ #define FOP_RET(name) \
+ __FOP_RET(#name)
+
+-#define FOP_START(op) \
++#define __FOP_START(op, align) \
+ extern void em_##op(struct fastop *fake); \
+ asm(".pushsection .text, \"ax\" \n\t" \
+ ".global em_" #op " \n\t" \
+- ".align " __stringify(FASTOP_SIZE) " \n\t" \
++ ".align " __stringify(align) " \n\t" \
+ "em_" #op ":\n\t"
+
++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
++
+ #define FOP_END \
+ ".popsection")
+
+@@ -427,18 +432,29 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
+ FOP_END
+
+ /* Special case for SETcc - 1 instruction per cc */
++
++/*
++ * Depending on .config the SETcc functions look like:
++ *
++ * SETcc %al [3 bytes]
++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
++ * INT3 [1 byte; CONFIG_SLS]
++ */
++#define SETCC_ALIGN 16
++
+ #define FOP_SETCC(op) \
+- ".align 4 \n\t" \
++ ".align " __stringify(SETCC_ALIGN) " \n\t" \
+ ".type " #op ", @function \n\t" \
+ #op ": \n\t" \
+ #op " %al \n\t" \
+- __FOP_RET(#op)
++ __FOP_RET(#op) \
++ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
+
+ asm(".pushsection .fixup, \"ax\"\n"
+- "kvm_fastop_exception: xor %esi, %esi; ret\n"
++ "kvm_fastop_exception: xor %esi, %esi; " ASM_RET
+ ".popsection");
+
+-FOP_START(setcc)
++__FOP_START(setcc, SETCC_ALIGN)
+ FOP_SETCC(seto)
+ FOP_SETCC(setno)
+ FOP_SETCC(setc)
+@@ -779,8 +795,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
+ ctxt->mode, linear);
+ }
+
+-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+- enum x86emul_mode mode)
++static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
+ {
+ ulong linear;
+ int rc;
+@@ -790,41 +805,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+
+ if (ctxt->op_bytes != sizeof(unsigned long))
+ addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
++ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
+ if (rc == X86EMUL_CONTINUE)
+ ctxt->_eip = addr.ea;
+ return rc;
+ }
+
++static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
++{
++ u64 efer;
++ struct desc_struct cs;
++ u16 selector;
++ u32 base3;
++
++ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
++
++ if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
++ /* Real mode. cpu must not have long mode active */
++ if (efer & EFER_LMA)
++ return X86EMUL_UNHANDLEABLE;
++ ctxt->mode = X86EMUL_MODE_REAL;
++ return X86EMUL_CONTINUE;
++ }
++
++ if (ctxt->eflags & X86_EFLAGS_VM) {
++ /* Protected/VM86 mode. cpu must not have long mode active */
++ if (efer & EFER_LMA)
++ return X86EMUL_UNHANDLEABLE;
++ ctxt->mode = X86EMUL_MODE_VM86;
++ return X86EMUL_CONTINUE;
++ }
++
++ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
++ return X86EMUL_UNHANDLEABLE;
++
++ if (efer & EFER_LMA) {
++ if (cs.l) {
++ /* Proper long mode */
++ ctxt->mode = X86EMUL_MODE_PROT64;
++ } else if (cs.d) {
++ /* 32 bit compatibility mode*/
++ ctxt->mode = X86EMUL_MODE_PROT32;
++ } else {
++ ctxt->mode = X86EMUL_MODE_PROT16;
++ }
++ } else {
++ /* Legacy 32 bit / 16 bit mode */
++ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
++ }
++
++ return X86EMUL_CONTINUE;
++}
++
+ static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+ {
+- return assign_eip(ctxt, dst, ctxt->mode);
++ return assign_eip(ctxt, dst);
+ }
+
+-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+- const struct desc_struct *cs_desc)
++static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
+ {
+- enum x86emul_mode mode = ctxt->mode;
+- int rc;
++ int rc = emulator_recalc_and_set_mode(ctxt);
+
+-#ifdef CONFIG_X86_64
+- if (ctxt->mode >= X86EMUL_MODE_PROT16) {
+- if (cs_desc->l) {
+- u64 efer = 0;
++ if (rc != X86EMUL_CONTINUE)
++ return rc;
+
+- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+- if (efer & EFER_LMA)
+- mode = X86EMUL_MODE_PROT64;
+- } else
+- mode = X86EMUL_MODE_PROT32; /* temporary value */
+- }
+-#endif
+- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
+- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+- rc = assign_eip(ctxt, dst, mode);
+- if (rc == X86EMUL_CONTINUE)
+- ctxt->mode = mode;
+- return rc;
++ return assign_eip(ctxt, dst);
+ }
+
+ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+@@ -1053,7 +1098,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
+ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
+ {
+ u8 rc;
+- void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
++ void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
+
+ flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+ asm("push %[flags]; popf; " CALL_NOSPEC
+@@ -1614,11 +1659,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+ goto exception;
+ }
+
+- if (!seg_desc.p) {
+- err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
+- goto exception;
+- }
+-
+ dpl = seg_desc.dpl;
+
+ switch (seg) {
+@@ -1658,12 +1698,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+ case VCPU_SREG_TR:
+ if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
+ goto exception;
+- old_desc = seg_desc;
+- seg_desc.type |= 2; /* busy */
+- ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
+- sizeof(seg_desc), &ctxt->exception);
+- if (ret != X86EMUL_CONTINUE)
+- return ret;
+ break;
+ case VCPU_SREG_LDTR:
+ if (seg_desc.s || seg_desc.type != 2)
+@@ -1682,6 +1716,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+ break;
+ }
+
++ if (!seg_desc.p) {
++ err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
++ goto exception;
++ }
++
+ if (seg_desc.s) {
+ /* mark segment as accessed */
+ if (!(seg_desc.type & 1)) {
+@@ -1696,8 +1735,17 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+ if (ret != X86EMUL_CONTINUE)
+ return ret;
+ if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
+- ((u64)base3 << 32), ctxt))
+- return emulate_gp(ctxt, 0);
++ ((u64)base3 << 32), ctxt))
++ return emulate_gp(ctxt, err_code);
++ }
++
++ if (seg == VCPU_SREG_TR) {
++ old_desc = seg_desc;
++ seg_desc.type |= 2; /* busy */
++ ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
++ sizeof(seg_desc), &ctxt->exception);
++ if (ret != X86EMUL_CONTINUE)
++ return ret;
+ }
+ load:
+ ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
+@@ -1917,7 +1965,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+- if (ctxt->modrm_reg == VCPU_SREG_SS)
++ if (seg == VCPU_SREG_SS)
+ ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
+ if (ctxt->op_bytes > 2)
+ rsp_increment(ctxt, ctxt->op_bytes - 2);
+@@ -2134,7 +2182,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
++ rc = assign_eip_far(ctxt, ctxt->src.val);
+ /* Error handling is not implemented. */
+ if (rc != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+@@ -2215,7 +2263,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
+ &new_desc);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+- rc = assign_eip_far(ctxt, eip, &new_desc);
++ rc = assign_eip_far(ctxt, eip);
+ /* Error handling is not implemented. */
+ if (rc != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+@@ -2598,7 +2646,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
+ * those side effects need to be explicitly handled for both success
+ * and shutdown.
+ */
+- return X86EMUL_CONTINUE;
++ return emulator_recalc_and_set_mode(ctxt);
+
+ emulate_shutdown:
+ ctxt->ops->triple_fault(ctxt);
+@@ -2842,6 +2890,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
+ ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
+
+ ctxt->_eip = rdx;
++ ctxt->mode = usermode;
+ *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
+
+ return X86EMUL_CONTINUE;
+@@ -3438,7 +3487,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
++ rc = assign_eip_far(ctxt, ctxt->src.val);
+ if (rc != X86EMUL_CONTINUE)
+ goto fail;
+
+@@ -3510,8 +3559,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt)
+ {
+ u64 tsc_aux = 0;
+
+- if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
++ if (!ctxt->ops->guest_has_rdpid(ctxt))
+ return emulate_ud(ctxt);
++
++ ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
+ ctxt->dst.val = tsc_aux;
+ return X86EMUL_CONTINUE;
+ }
+@@ -3578,11 +3629,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
+
+ static int em_cr_write(struct x86_emulate_ctxt *ctxt)
+ {
+- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
++ int cr_num = ctxt->modrm_reg;
++ int r;
++
++ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
+ return emulate_gp(ctxt, 0);
+
+ /* Disable writeback. */
+ ctxt->dst.type = OP_NONE;
++
++ if (cr_num == 0) {
++ /*
++ * CR0 write might have updated CR0.PE and/or CR0.PG
++ * which can affect the cpu's execution mode.
++ */
++ r = emulator_recalc_and_set_mode(ctxt);
++ if (r != X86EMUL_CONTINUE)
++ return r;
++ }
++
+ return X86EMUL_CONTINUE;
+ }
+
+@@ -4101,6 +4166,9 @@ static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
+ {
+ u32 eax, ecx, edx;
+
++ if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
++ return emulate_ud(ctxt);
++
+ eax = reg_read(ctxt, VCPU_REGS_RAX);
+ edx = reg_read(ctxt, VCPU_REGS_RDX);
+ ecx = reg_read(ctxt, VCPU_REGS_RCX);
+diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
+index d5124b520f761..a067c7ce8e19c 100644
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -236,7 +236,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
+ struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
+ int ret;
+
+- if (!synic->active && !host)
++ if (!synic->active && (!host || data))
+ return 1;
+
+ trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
+@@ -282,6 +282,9 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
+ case HV_X64_MSR_EOM: {
+ int i;
+
++ if (!synic->active)
++ break;
++
+ for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
+ kvm_hv_notify_acked_sint(vcpu, i);
+ break;
+@@ -446,6 +449,9 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
+ struct kvm_lapic_irq irq;
+ int ret, vector;
+
++ if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm))
++ return -EINVAL;
++
+ if (sint >= ARRAY_SIZE(synic->sint))
+ return -EINVAL;
+
+@@ -658,7 +664,7 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
+
+- if (!synic->active && !host)
++ if (!synic->active && (!host || config))
+ return 1;
+
+ if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode &&
+@@ -687,7 +693,7 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
+ struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
+ struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
+
+- if (!synic->active && !host)
++ if (!synic->active && (!host || count))
+ return 1;
+
+ trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id,
+@@ -1749,7 +1755,7 @@ struct kvm_hv_hcall {
+ sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
+ };
+
+-static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
++static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
+ {
+ int i;
+ gpa_t gpa;
+@@ -1765,7 +1771,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
+ int sparse_banks_len;
+ bool all_cpus;
+
+- if (!ex) {
++ if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST ||
++ hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) {
+ if (hc->fast) {
+ flush.address_space = hc->ingpa;
+ flush.flags = hc->outgpa;
+@@ -1819,7 +1826,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
+
+ if (!all_cpus) {
+ if (hc->fast) {
+- if (sparse_banks_len > HV_HYPERCALL_MAX_XMM_REGISTERS - 1)
++ /* XMM0 is already consumed, each XMM holds two sparse banks. */
++ if (sparse_banks_len > 2 * (HV_HYPERCALL_MAX_XMM_REGISTERS - 1))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ for (i = 0; i < sparse_banks_len; i += 2) {
+ sparse_banks[i] = sse128_lo(hc->xmm[i / 2 + 1]);
+@@ -1838,16 +1846,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
+
+ cpumask_clear(&hv_vcpu->tlb_flush);
+
+- vcpu_mask = all_cpus ? NULL :
+- sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+- vp_bitmap, vcpu_bitmap);
+-
+ /*
+ * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
+ * analyze it here, flush TLB regardless of the specified address space.
+ */
+- kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
+- NULL, vcpu_mask, &hv_vcpu->tlb_flush);
++ if (all_cpus) {
++ kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
++ } else {
++ vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
++ vp_bitmap, vcpu_bitmap);
++
++ kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
++ NULL, vcpu_mask, &hv_vcpu->tlb_flush);
++ }
+
+ ret_success:
+ /* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
+@@ -1874,7 +1885,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
+ }
+ }
+
+-static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
++static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
+ {
+ struct kvm *kvm = vcpu->kvm;
+ struct hv_send_ipi_ex send_ipi_ex;
+@@ -1887,8 +1898,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
+ int sparse_banks_len;
+ u32 vector;
+ bool all_cpus;
++ int i;
+
+- if (!ex) {
++ if (hc->code == HVCALL_SEND_IPI) {
+ if (!hc->fast) {
+ if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
+ sizeof(send_ipi))))
+@@ -1907,9 +1919,15 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
+
+ trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
+ } else {
+- if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
+- sizeof(send_ipi_ex))))
+- return HV_STATUS_INVALID_HYPERCALL_INPUT;
++ if (!hc->fast) {
++ if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
++ sizeof(send_ipi_ex))))
++ return HV_STATUS_INVALID_HYPERCALL_INPUT;
++ } else {
++ send_ipi_ex.vector = (u32)hc->ingpa;
++ send_ipi_ex.vp_set.format = hc->outgpa;
++ send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]);
++ }
+
+ trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
+ send_ipi_ex.vp_set.format,
+@@ -1917,23 +1935,40 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
+
+ vector = send_ipi_ex.vector;
+ valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
+- sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+- sizeof(sparse_banks[0]);
++ sparse_banks_len = bitmap_weight(&valid_bank_mask, 64);
+
+ all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
+
++ if (all_cpus)
++ goto check_and_send_ipi;
++
+ if (!sparse_banks_len)
+ goto ret_success;
+
+- if (!all_cpus &&
+- kvm_read_guest(kvm,
+- hc->ingpa + offsetof(struct hv_send_ipi_ex,
+- vp_set.bank_contents),
+- sparse_banks,
+- sparse_banks_len))
+- return HV_STATUS_INVALID_HYPERCALL_INPUT;
++ if (!hc->fast) {
++ if (kvm_read_guest(kvm,
++ hc->ingpa + offsetof(struct hv_send_ipi_ex,
++ vp_set.bank_contents),
++ sparse_banks,
++ sparse_banks_len * sizeof(sparse_banks[0])))
++ return HV_STATUS_INVALID_HYPERCALL_INPUT;
++ } else {
++ /*
++ * The lower half of XMM0 is already consumed, each XMM holds
++ * two sparse banks.
++ */
++ if (sparse_banks_len > (2 * HV_HYPERCALL_MAX_XMM_REGISTERS - 1))
++ return HV_STATUS_INVALID_HYPERCALL_INPUT;
++ for (i = 0; i < sparse_banks_len; i++) {
++ if (i % 2)
++ sparse_banks[i] = sse128_lo(hc->xmm[(i + 1) / 2]);
++ else
++ sparse_banks[i] = sse128_hi(hc->xmm[i / 2]);
++ }
++ }
+ }
+
++check_and_send_ipi:
+ if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+@@ -2022,7 +2057,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
+ {
+ bool longmode;
+
+- longmode = is_64_bit_mode(vcpu);
++ longmode = is_64_bit_hypercall(vcpu);
+ if (longmode)
+ kvm_rax_write(vcpu, result);
+ else {
+@@ -2092,6 +2127,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
++ case HVCALL_SEND_IPI_EX:
+ return true;
+ }
+
+@@ -2171,7 +2207,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+ }
+
+ #ifdef CONFIG_X86_64
+- if (is_64_bit_mode(vcpu)) {
++ if (is_64_bit_hypercall(vcpu)) {
+ hc.param = kvm_rcx_read(vcpu);
+ hc.ingpa = kvm_rdx_read(vcpu);
+ hc.outgpa = kvm_r8_read(vcpu);
+@@ -2243,46 +2279,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+ kvm_hv_hypercall_complete_userspace;
+ return 0;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+- if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
+- ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+- break;
+- }
+- ret = kvm_hv_flush_tlb(vcpu, &hc, false);
+- break;
+- case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+- if (unlikely(hc.rep)) {
+- ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+- break;
+- }
+- ret = kvm_hv_flush_tlb(vcpu, &hc, false);
+- break;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+ if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+- ret = kvm_hv_flush_tlb(vcpu, &hc, true);
++ ret = kvm_hv_flush_tlb(vcpu, &hc);
+ break;
++ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+ if (unlikely(hc.rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+- ret = kvm_hv_flush_tlb(vcpu, &hc, true);
++ ret = kvm_hv_flush_tlb(vcpu, &hc);
+ break;
+ case HVCALL_SEND_IPI:
+- if (unlikely(hc.rep)) {
+- ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+- break;
+- }
+- ret = kvm_hv_send_ipi(vcpu, &hc, false);
+- break;
+ case HVCALL_SEND_IPI_EX:
+- if (unlikely(hc.fast || hc.rep)) {
++ if (unlikely(hc.rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+- ret = kvm_hv_send_ipi(vcpu, &hc, true);
++ ret = kvm_hv_send_ipi(vcpu, &hc);
+ break;
+ case HVCALL_POST_DEBUG_DATA:
+ case HVCALL_RETRIEVE_DEBUG_DATA:
+diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
+index 8c065da73f8e5..4e0f52660842b 100644
+--- a/arch/x86/kvm/ioapic.c
++++ b/arch/x86/kvm/ioapic.c
+@@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+ static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
+ {
+ ioapic->rtc_status.pending_eoi = 0;
+- bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
++ bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+ }
+
+ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
+diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
+index bbd4a5d18b5dc..f1b2b2a6ff4db 100644
+--- a/arch/x86/kvm/ioapic.h
++++ b/arch/x86/kvm/ioapic.h
+@@ -39,13 +39,13 @@ struct kvm_vcpu;
+
+ struct dest_map {
+ /* vcpu bitmap where IRQ has been sent */
+- DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
++ DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+
+ /*
+ * Vector sent to a given vcpu, only valid when
+ * the vcpu's bit in map is set
+ */
+- u8 vectors[KVM_MAX_VCPU_ID + 1];
++ u8 vectors[KVM_MAX_VCPU_ID];
+ };
+
+
+@@ -81,7 +81,6 @@ struct kvm_ioapic {
+ unsigned long irq_states[IOAPIC_NUM_PINS];
+ struct kvm_io_device dev;
+ struct kvm *kvm;
+- void (*ack_notifier)(void *opaque, int irq);
+ spinlock_t lock;
+ struct rtc_status rtc_status;
+ struct delayed_work eoi_inject;
+diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
+index 650642b18d151..c2d7cfe82d004 100644
+--- a/arch/x86/kvm/irq.h
++++ b/arch/x86/kvm/irq.h
+@@ -56,7 +56,6 @@ struct kvm_pic {
+ struct kvm_io_device dev_master;
+ struct kvm_io_device dev_slave;
+ struct kvm_io_device dev_elcr;
+- void (*ack_notifier)(void *opaque, int irq);
+ unsigned long irq_states[PIC_NUM_PINS];
+ };
+
+diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
+index 68b420289d7ed..fb09cd22cb7f5 100644
+--- a/arch/x86/kvm/kvm_emulate.h
++++ b/arch/x86/kvm/kvm_emulate.h
+@@ -226,6 +226,7 @@ struct x86_emulate_ops {
+ bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
+ bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
+ bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
++ bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
+
+ void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
+
+diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
+index d6ac32f3f650c..40fc1879a6970 100644
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -113,7 +113,8 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
+
+ static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+ {
+- return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
++ return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
++ (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
+ }
+
+ bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
+@@ -676,38 +677,32 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
+ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
+ {
+ u8 val;
+- if (pv_eoi_get_user(vcpu, &val) < 0) {
+- printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
+- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
++ if (pv_eoi_get_user(vcpu, &val) < 0)
+ return false;
+- }
++
+ return val & KVM_PV_EOI_ENABLED;
+ }
+
+ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
+ {
+- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
+- printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
+- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
++ if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
+ return;
+- }
++
+ __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
+ }
+
+ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
+ {
+- if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
+- printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
+- (unsigned long long)vcpu->arch.pv_eoi.msr_val);
++ if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
+ return;
+- }
++
+ __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
+ }
+
+ static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
+ {
+ int highest_irr;
+- if (apic->vcpu->arch.apicv_active)
++ if (kvm_x86_ops.sync_pir_to_irr)
+ highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
+ else
+ highest_irr = apic_find_highest_irr(apic);
+@@ -993,6 +988,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+ *r = -1;
+
+ if (irq->shorthand == APIC_DEST_SELF) {
++ if (KVM_BUG_ON(!src, kvm)) {
++ *r = 0;
++ return true;
++ }
+ *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
+ return true;
+ }
+@@ -1296,6 +1295,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
+
+ kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
+ }
++EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
+
+ static u32 apic_get_tmcct(struct kvm_lapic *apic)
+ {
+@@ -1507,6 +1507,7 @@ static void cancel_apic_timer(struct kvm_lapic *apic)
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
++ atomic_set(&apic->lapic_timer.pending, 0);
+ }
+
+ static void apic_update_lvtt(struct kvm_lapic *apic)
+@@ -2127,11 +2128,14 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
+ break;
+
+ case APIC_SELF_IPI:
+- if (apic_x2apic_mode(apic)) {
+- kvm_lapic_reg_write(apic, APIC_ICR,
+- APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
+- } else
++ /*
++ * Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold
++ * the vector, everything else is reserved.
++ */
++ if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
+ ret = 1;
++ else
++ kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
+ break;
+ default:
+ ret = 1;
+@@ -2248,10 +2252,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
+
+ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
+ {
+- struct kvm_lapic *apic = vcpu->arch.apic;
+-
+- apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
+- | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
++ apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
+ }
+
+ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
+@@ -2315,6 +2316,7 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
+ apic->irr_pending = (apic_search_irr(apic) != -1);
+ apic->isr_count = count_vectors(apic->regs + APIC_ISR);
+ }
++ apic->highest_isr_cache = -1;
+ }
+ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
+
+@@ -2367,7 +2369,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
+ kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
+ }
+ kvm_apic_update_apicv(vcpu);
+- apic->highest_isr_cache = -1;
+ update_divide_count(apic);
+ atomic_set(&apic->lapic_timer.pending, 0);
+
+@@ -2629,7 +2630,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
+ kvm_apic_set_version(vcpu);
+
+ apic_update_ppr(apic);
+- hrtimer_cancel(&apic->lapic_timer.timer);
++ cancel_apic_timer(apic);
+ apic->lapic_timer.expired_tscdeadline = 0;
+ apic_update_lvtt(apic);
+ apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
+@@ -2637,7 +2638,6 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
+ __start_apic_timer(apic, APIC_TMCCT);
+ kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
+ kvm_apic_update_apicv(vcpu);
+- apic->highest_isr_cache = -1;
+ if (vcpu->arch.apicv_active) {
+ static_call(kvm_x86_apicv_post_state_restore)(vcpu);
+ static_call(kvm_x86_hwapic_irr_update)(vcpu,
+@@ -2801,6 +2801,10 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+ /* if this is ICR write vector before command */
+ if (reg == APIC_ICR)
+ kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
++ else if (data >> 32)
++ /* Bits 63:32 are reserved in all other registers. */
++ return 1;
++
+ return kvm_lapic_reg_write(apic, reg, (u32)data);
+ }
+
+@@ -2835,6 +2839,10 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
+ /* if this is ICR write vector before command */
+ if (reg == APIC_ICR)
+ kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
++ else if (data >> 32)
++ /* Bits 63:32 are reserved in all other registers. */
++ return 1;
++
+ return kvm_lapic_reg_write(apic, reg, (u32)data);
+ }
+
+diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
+index e9688a9f7b579..7bb165c232334 100644
+--- a/arch/x86/kvm/mmu.h
++++ b/arch/x86/kvm/mmu.h
+@@ -49,6 +49,7 @@
+ X86_CR4_LA57)
+
+ #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP)
++#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX)
+
+ static __always_inline u64 rsvd_bits(int s, int e)
+ {
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 0cc58901bf7a7..4724289c8a7f8 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -1071,20 +1071,6 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu)
+ return kvm_mmu_memory_cache_nr_free_objects(mc);
+ }
+
+-static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+-{
+- struct kvm_memory_slot *slot;
+- struct kvm_mmu_page *sp;
+- struct kvm_rmap_head *rmap_head;
+-
+- sp = sptep_to_sp(spte);
+- kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
+- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+- rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
+- return pte_list_add(vcpu, spte, rmap_head);
+-}
+-
+-
+ static void rmap_remove(struct kvm *kvm, u64 *spte)
+ {
+ struct kvm_memslots *slots;
+@@ -1097,9 +1083,9 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
+ gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
+
+ /*
+- * Unlike rmap_add and rmap_recycle, rmap_remove does not run in the
+- * context of a vCPU so have to determine which memslots to use based
+- * on context information in sp->role.
++ * Unlike rmap_add, rmap_remove does not run in the context of a vCPU
++ * so we have to determine which memslots to use based on context
++ * information in sp->role.
+ */
+ slots = kvm_memslots_for_spte_role(kvm, sp->role);
+
+@@ -1592,7 +1578,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+ flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
+
+ if (is_tdp_mmu_enabled(kvm))
+- flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
++ flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
+
+ return flush;
+ }
+@@ -1639,19 +1625,24 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+
+ #define RMAP_RECYCLE_THRESHOLD 1000
+
+-static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
++static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+ {
+ struct kvm_memory_slot *slot;
+- struct kvm_rmap_head *rmap_head;
+ struct kvm_mmu_page *sp;
++ struct kvm_rmap_head *rmap_head;
++ int rmap_count;
+
+ sp = sptep_to_sp(spte);
++ kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
++ rmap_count = pte_list_add(vcpu, spte, rmap_head);
+
+- kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, __pte(0));
+- kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
+- KVM_PAGES_PER_HPAGE(sp->role.level));
++ if (rmap_count > RMAP_RECYCLE_THRESHOLD) {
++ kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, __pte(0));
++ kvm_flush_remote_tlbs_with_address(
++ vcpu->kvm, sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
++ }
+ }
+
+ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+@@ -2188,10 +2179,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
+ iterator->shadow_addr = root;
+ iterator->level = vcpu->arch.mmu->shadow_root_level;
+
+- if (iterator->level == PT64_ROOT_4LEVEL &&
++ if (iterator->level >= PT64_ROOT_4LEVEL &&
+ vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
+ !vcpu->arch.mmu->direct_map)
+- --iterator->level;
++ iterator->level = PT32E_ROOT_LEVEL;
+
+ if (iterator->level == PT32E_ROOT_LEVEL) {
+ /*
+@@ -2366,6 +2357,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
+ {
+ bool list_unstable;
+
++ lockdep_assert_held_write(&kvm->mmu_lock);
+ trace_kvm_mmu_prepare_zap_page(sp);
+ ++kvm->stat.mmu_shadow_zapped;
+ *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
+@@ -2718,7 +2710,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
+ bool host_writable)
+ {
+ int was_rmapped = 0;
+- int rmap_count;
+ int set_spte_ret;
+ int ret = RET_PF_FIXED;
+ bool flush = false;
+@@ -2778,9 +2769,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
+
+ if (!was_rmapped) {
+ kvm_update_page_stats(vcpu->kvm, level, 1);
+- rmap_count = rmap_add(vcpu, sptep, gfn);
+- if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+- rmap_recycle(vcpu, sptep, gfn);
++ rmap_add(vcpu, sptep, gfn);
+ }
+
+ return ret;
+@@ -3314,6 +3303,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+ return;
+
+ sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK);
++ if (WARN_ON(!sp))
++ return;
+
+ if (is_tdp_mmu_page(sp))
+ kvm_tdp_mmu_put_root(kvm, sp, false);
+@@ -3579,7 +3570,7 @@ set_root_pgd:
+ out_unlock:
+ write_unlock(&vcpu->kvm->mmu_lock);
+
+- return 0;
++ return r;
+ }
+
+ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
+@@ -3889,12 +3880,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
+ walk_shadow_page_lockless_end(vcpu);
+ }
+
++static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
++{
++ /* make sure the token value is not 0 */
++ u32 id = vcpu->arch.apf.id;
++
++ if (id << 12 == 0)
++ vcpu->arch.apf.id = 1;
++
++ return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
++}
++
+ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ gfn_t gfn)
+ {
+ struct kvm_arch_async_pf arch;
+
+- arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
++ arch.token = alloc_apf_token(vcpu);
+ arch.gfn = gfn;
+ arch.direct_map = vcpu->arch.mmu->direct_map;
+ arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
+@@ -3956,6 +3958,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+
+ *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
+ write, writable, hva);
++ return false;
+
+ out_retry:
+ *r = RET_PF_RETRY;
+@@ -4005,16 +4008,17 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+
+ if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
+ goto out_unlock;
+- r = make_mmu_pages_available(vcpu);
+- if (r)
+- goto out_unlock;
+
+- if (is_tdp_mmu_fault)
++ if (is_tdp_mmu_fault) {
+ r = kvm_tdp_mmu_map(vcpu, gpa, error_code, map_writable, max_level,
+ pfn, prefault);
+- else
++ } else {
++ r = make_mmu_pages_available(vcpu);
++ if (r)
++ goto out_unlock;
+ r = __direct_map(vcpu, gpa, error_code, map_writable, max_level, pfn,
+ prefault, is_tdp);
++ }
+
+ out_unlock:
+ if (is_tdp_mmu_fault)
+@@ -4679,6 +4683,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
+ /* PKEY and LA57 are active iff long mode is active. */
+ ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
+ ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
++ ext.efer_lma = ____is_efer_lma(regs);
+ }
+
+ ext.valid = 1;
+@@ -4851,7 +4856,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
+ struct kvm_mmu *context = &vcpu->arch.guest_mmu;
+ struct kvm_mmu_role_regs regs = {
+ .cr0 = cr0,
+- .cr4 = cr4,
++ .cr4 = cr4 & ~X86_CR4_PKE,
+ .efer = efer,
+ };
+ union kvm_mmu_role new_role;
+@@ -4915,7 +4920,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ context->direct_map = false;
+
+ update_permission_bitmask(context, true);
+- update_pkru_bitmask(context);
++ context->pkru_mask = 0;
+ reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+ reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
+ }
+@@ -5368,7 +5373,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+
+ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+ {
+- kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
++ kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
+ ++vcpu->stat.invlpg;
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
+@@ -5381,14 +5386,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
+ uint i;
+
+ if (pcid == kvm_get_active_pcid(vcpu)) {
+- mmu->invlpg(vcpu, gva, mmu->root_hpa);
++ if (mmu->invlpg)
++ mmu->invlpg(vcpu, gva, mmu->root_hpa);
+ tlb_flush = true;
+ }
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
+ pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
+- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
++ if (mmu->invlpg)
++ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ tlb_flush = true;
+ }
+ }
+@@ -5473,8 +5480,8 @@ slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot,
+ }
+
+ static __always_inline bool
+-slot_handle_leaf(struct kvm *kvm, const struct kvm_memory_slot *memslot,
+- slot_level_handler fn, bool flush_on_yield)
++slot_handle_level_4k(struct kvm *kvm, const struct kvm_memory_slot *memslot,
++ slot_level_handler fn, bool flush_on_yield)
+ {
+ return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
+ PG_LEVEL_4K, flush_on_yield);
+@@ -5575,6 +5582,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
+ {
+ struct kvm_mmu_page *sp, *node;
+ int nr_zapped, batch = 0;
++ bool unstable;
+
+ restart:
+ list_for_each_entry_safe_reverse(sp, node,
+@@ -5606,11 +5614,12 @@ restart:
+ goto restart;
+ }
+
+- if (__kvm_mmu_prepare_zap_page(kvm, sp,
+- &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
+- batch += nr_zapped;
++ unstable = __kvm_mmu_prepare_zap_page(kvm, sp,
++ &kvm->arch.zapped_obsolete_pages, &nr_zapped);
++ batch += nr_zapped;
++
++ if (unstable)
+ goto restart;
+- }
+ }
+
+ /*
+@@ -5758,13 +5767,11 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start,
+ gfn_end, flush);
+- if (flush)
+- kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
+- gfn_end - gfn_start);
+ }
+
+ if (flush)
+- kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
++ kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
++ gfn_end - gfn_start);
+
+ kvm_dec_notifier_count(kvm, gfn_start, gfn_end);
+
+@@ -5856,21 +5863,21 @@ restart:
+ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ const struct kvm_memory_slot *slot)
+ {
+- bool flush = false;
+-
+ if (kvm_memslots_have_rmaps(kvm)) {
+ write_lock(&kvm->mmu_lock);
+- flush = slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
+- if (flush)
++ /*
++ * Zap only 4k SPTEs since the legacy MMU only supports dirty
++ * logging at a 4k granularity and never creates collapsible
++ * 2m SPTEs during dirty logging.
++ */
++ if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
+ kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+ write_unlock(&kvm->mmu_lock);
+ }
+
+ if (is_tdp_mmu_enabled(kvm)) {
+ read_lock(&kvm->mmu_lock);
+- flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
+- if (flush)
+- kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
++ kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
+ read_unlock(&kvm->mmu_lock);
+ }
+ }
+@@ -5897,8 +5904,11 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
+
+ if (kvm_memslots_have_rmaps(kvm)) {
+ write_lock(&kvm->mmu_lock);
+- flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty,
+- false);
++ /*
++ * Clear dirty bits only on 4k SPTEs since the legacy MMU only
++ * support dirty logging at a 4k granularity.
++ */
++ flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
+ write_unlock(&kvm->mmu_lock);
+ }
+
+@@ -6091,12 +6101,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+ return 0;
+ }
+
+-int kvm_mmu_module_init(void)
++/*
++ * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as
++ * its default value of -1 is technically undefined behavior for a boolean.
++ */
++void __init kvm_mmu_x86_module_init(void)
+ {
+- int ret = -ENOMEM;
+-
+ if (nx_huge_pages == -1)
+ __set_nx_huge_pages(get_nx_auto_mode());
++}
++
++/*
++ * The bulk of the MMU initialization is deferred until the vendor module is
++ * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need
++ * to be reset when a potentially different vendor module is loaded.
++ */
++int kvm_mmu_vendor_module_init(void)
++{
++ int ret = -ENOMEM;
+
+ /*
+ * MMU roles use union aliasing which is, generally speaking, an
+@@ -6168,7 +6190,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
+ mmu_free_memory_caches(vcpu);
+ }
+
+-void kvm_mmu_module_exit(void)
++void kvm_mmu_vendor_module_exit(void)
+ {
+ mmu_destroy_caches();
+ percpu_counter_destroy(&kvm_total_used_mmu_pages);
+diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
+index 21427e84a82ef..630ae70bb6bd3 100644
+--- a/arch/x86/kvm/mmu/page_track.c
++++ b/arch/x86/kvm/mmu/page_track.c
+@@ -36,8 +36,8 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+
+ for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
+ slot->arch.gfn_track[i] =
+- kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
+- GFP_KERNEL_ACCOUNT);
++ __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
++ GFP_KERNEL_ACCOUNT);
+ if (!slot->arch.gfn_track[i])
+ goto track_free;
+ }
+diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
+index 913d52a7923e6..a1811f51eda92 100644
+--- a/arch/x86/kvm/mmu/paging_tmpl.h
++++ b/arch/x86/kvm/mmu/paging_tmpl.h
+@@ -34,9 +34,8 @@
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
+ #ifdef CONFIG_X86_64
+ #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
+- #define CMPXCHG cmpxchg
++ #define CMPXCHG "cmpxchgq"
+ #else
+- #define CMPXCHG cmpxchg64
+ #define PT_MAX_FULL_LEVELS 2
+ #endif
+ #elif PTTYPE == 32
+@@ -52,7 +51,7 @@
+ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
+ #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
+- #define CMPXCHG cmpxchg
++ #define CMPXCHG "cmpxchgl"
+ #elif PTTYPE == PTTYPE_EPT
+ #define pt_element_t u64
+ #define guest_walker guest_walkerEPT
+@@ -65,7 +64,9 @@
+ #define PT_GUEST_DIRTY_SHIFT 9
+ #define PT_GUEST_ACCESSED_SHIFT 8
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
+- #define CMPXCHG cmpxchg64
++ #ifdef CONFIG_X86_64
++ #define CMPXCHG "cmpxchgq"
++ #endif
+ #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
+ #else
+ #error Invalid PTTYPE value
+@@ -147,43 +148,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ pt_element_t __user *ptep_user, unsigned index,
+ pt_element_t orig_pte, pt_element_t new_pte)
+ {
+- int npages;
+- pt_element_t ret;
+- pt_element_t *table;
+- struct page *page;
+-
+- npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
+- if (likely(npages == 1)) {
+- table = kmap_atomic(page);
+- ret = CMPXCHG(&table[index], orig_pte, new_pte);
+- kunmap_atomic(table);
+-
+- kvm_release_page_dirty(page);
+- } else {
+- struct vm_area_struct *vma;
+- unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
+- unsigned long pfn;
+- unsigned long paddr;
+-
+- mmap_read_lock(current->mm);
+- vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
+- if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
+- mmap_read_unlock(current->mm);
+- return -EFAULT;
+- }
+- pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+- paddr = pfn << PAGE_SHIFT;
+- table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
+- if (!table) {
+- mmap_read_unlock(current->mm);
+- return -EFAULT;
+- }
+- ret = CMPXCHG(&table[index], orig_pte, new_pte);
+- memunmap(table);
+- mmap_read_unlock(current->mm);
+- }
++ int r = -EFAULT;
++
++ if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
++ return -EFAULT;
++
++#ifdef CMPXCHG
++ asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
++ "mov $0, %[r]\n"
++ "setnz %b[r]\n"
++ "2:"
++ _ASM_EXTABLE_UA(1b, 2b)
++ : [ptr] "+m" (*ptep_user),
++ [old] "+a" (orig_pte),
++ [r] "+q" (r)
++ : [new] "r" (new_pte)
++ : "memory");
++#else
++ asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
++ "movl $0, %[r]\n"
++ "jz 2f\n"
++ "incl %[r]\n"
++ "2:"
++ _ASM_EXTABLE_UA(1b, 2b)
++ : [ptr] "+m" (*ptep_user),
++ [old] "+A" (orig_pte),
++ [r] "+rm" (r)
++ : [new_lo] "b" ((u32)new_pte),
++ [new_hi] "c" ((u32)(new_pte >> 32))
++ : "memory");
++#endif
+
+- return (ret != orig_pte);
++ user_access_end();
++ return r;
+ }
+
+ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
+diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
+index eb7b227fc6cfe..31d6456d8ac33 100644
+--- a/arch/x86/kvm/mmu/spte.h
++++ b/arch/x86/kvm/mmu/spte.h
+@@ -310,12 +310,7 @@ static inline bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check,
+ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
+ u64 spte, int level)
+ {
+- /*
+- * Use a bitwise-OR instead of a logical-OR to aggregate the reserved
+- * bits and EPT's invalid memtype/XWR checks to avoid an extra Jcc
+- * (this is extremely unlikely to be short-circuited as true).
+- */
+- return __is_bad_mt_xwr(rsvd_check, spte) |
++ return __is_bad_mt_xwr(rsvd_check, spte) ||
+ __is_rsvd_bits_set(rsvd_check, spte, level);
+ }
+
+diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
+index b3ed302c1a359..caa96c270b954 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.c
++++ b/arch/x86/kvm/mmu/tdp_iter.c
+@@ -26,6 +26,7 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
+ */
+ void tdp_iter_restart(struct tdp_iter *iter)
+ {
++ iter->yielded = false;
+ iter->yielded_gfn = iter->next_last_level_gfn;
+ iter->level = iter->root_level;
+
+@@ -160,6 +161,11 @@ static bool try_step_up(struct tdp_iter *iter)
+ */
+ void tdp_iter_next(struct tdp_iter *iter)
+ {
++ if (iter->yielded) {
++ tdp_iter_restart(iter);
++ return;
++ }
++
+ if (try_step_down(iter))
+ return;
+
+diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
+index b1748b988d3ae..e19cabbcb65c8 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.h
++++ b/arch/x86/kvm/mmu/tdp_iter.h
+@@ -45,6 +45,12 @@ struct tdp_iter {
+ * iterator walks off the end of the paging structure.
+ */
+ bool valid;
++ /*
++ * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
++ * which case tdp_iter_next() needs to restart the walk at the root
++ * level instead of advancing to the next entry.
++ */
++ bool yielded;
+ };
+
+ /*
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 64ccfc1fa5535..7a64fb2380448 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -10,7 +10,7 @@
+ #include <asm/cmpxchg.h>
+ #include <trace/events/kvm.h>
+
+-static bool __read_mostly tdp_mmu_enabled = true;
++static bool __read_mostly tdp_mmu_enabled = false;
+ module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
+
+ /* Initializes the TDP MMU for the VM, if enabled. */
+@@ -99,15 +99,18 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
+ }
+
+ /*
+- * Finds the next valid root after root (or the first valid root if root
+- * is NULL), takes a reference on it, and returns that next root. If root
+- * is not NULL, this thread should have already taken a reference on it, and
+- * that reference will be dropped. If no valid root is found, this
+- * function will return NULL.
++ * Returns the next root after @prev_root (or the first root if @prev_root is
++ * NULL). A reference to the returned root is acquired, and the reference to
++ * @prev_root is released (the caller obviously must hold a reference to
++ * @prev_root if it's non-NULL).
++ *
++ * If @only_valid is true, invalid roots are skipped.
++ *
++ * Returns NULL if the end of tdp_mmu_roots was reached.
+ */
+ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+ struct kvm_mmu_page *prev_root,
+- bool shared)
++ bool shared, bool only_valid)
+ {
+ struct kvm_mmu_page *next_root;
+
+@@ -121,9 +124,14 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+ next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ typeof(*next_root), link);
+
+- while (next_root && !kvm_tdp_mmu_get_root(kvm, next_root))
++ while (next_root) {
++ if ((!only_valid || !next_root->role.invalid) &&
++ kvm_tdp_mmu_get_root(kvm, next_root))
++ break;
++
+ next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ &next_root->link, typeof(*next_root), link);
++ }
+
+ rcu_read_unlock();
+
+@@ -143,13 +151,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+ * mode. In the unlikely event that this thread must free a root, the lock
+ * will be temporarily dropped and reacquired in write mode.
+ */
+-#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
+- for (_root = tdp_mmu_next_root(_kvm, NULL, _shared); \
+- _root; \
+- _root = tdp_mmu_next_root(_kvm, _root, _shared)) \
+- if (kvm_mmu_page_as_id(_root) != _as_id) { \
++#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\
++ for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \
++ _root; \
++ _root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \
++ if (kvm_mmu_page_as_id(_root) != _as_id) { \
+ } else
+
++#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
++ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
++
++#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
++ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, false)
++
+ #define for_each_tdp_mmu_root(_kvm, _root, _as_id) \
+ list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link, \
+ lockdep_is_held_type(&kvm->mmu_lock, 0) || \
+@@ -199,7 +213,10 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
+
+ role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
+
+- /* Check for an existing root before allocating a new one. */
++ /*
++ * Check for an existing root before allocating a new one. Note, the
++ * role check prevents consuming an invalid root.
++ */
+ for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
+ if (root->role.word == role.word &&
+ kvm_tdp_mmu_get_root(kvm, root))
+@@ -316,9 +333,6 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
+ struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
+ int level = sp->role.level;
+ gfn_t base_gfn = sp->gfn;
+- u64 old_child_spte;
+- u64 *sptep;
+- gfn_t gfn;
+ int i;
+
+ trace_kvm_mmu_prepare_zap_page(sp);
+@@ -326,8 +340,9 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
+ tdp_mmu_unlink_page(kvm, sp, shared);
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+- sptep = rcu_dereference(pt) + i;
+- gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
++ u64 *sptep = rcu_dereference(pt) + i;
++ gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
++ u64 old_child_spte;
+
+ if (shared) {
+ /*
+@@ -373,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
+ shared);
+ }
+
+- kvm_flush_remote_tlbs_with_address(kvm, gfn,
++ kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
+ KVM_PAGES_PER_HPAGE(level + 1));
+
+ call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
+@@ -503,6 +518,8 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
+ struct tdp_iter *iter,
+ u64 new_spte)
+ {
++ WARN_ON_ONCE(iter->yielded);
++
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
+ /*
+@@ -613,6 +630,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ u64 new_spte, bool record_acc_track,
+ bool record_dirty_log)
+ {
++ WARN_ON_ONCE(iter->yielded);
++
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
+ /*
+@@ -678,18 +697,19 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
+ * If this function should yield and flush is set, it will perform a remote
+ * TLB flush before yielding.
+ *
+- * If this function yields, it will also reset the tdp_iter's walk over the
+- * paging structure and the calling function should skip to the next
+- * iteration to allow the iterator to continue its traversal from the
+- * paging structure root.
++ * If this function yields, iter->yielded is set and the caller must skip to
++ * the next iteration, where tdp_iter_next() will reset the tdp_iter's walk
++ * over the paging structures to allow the iterator to continue its traversal
++ * from the paging structure root.
+ *
+- * Return true if this function yielded and the iterator's traversal was reset.
+- * Return false if a yield was not needed.
++ * Returns true if this function yielded.
+ */
+-static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
+- struct tdp_iter *iter, bool flush,
+- bool shared)
++static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
++ struct tdp_iter *iter,
++ bool flush, bool shared)
+ {
++ WARN_ON(iter->yielded);
++
+ /* Ensure forward progress has been made before yielding. */
+ if (iter->next_last_level_gfn == iter->yielded_gfn)
+ return false;
+@@ -709,12 +729,10 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
+
+ WARN_ON(iter->gfn > iter->next_last_level_gfn);
+
+- tdp_iter_restart(iter);
+-
+- return true;
++ iter->yielded = true;
+ }
+
+- return false;
++ return iter->yielded;
+ }
+
+ /*
+@@ -1080,13 +1098,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
+ bool flush)
+ {
+- struct kvm_mmu_page *root;
+-
+- for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
+- flush |= zap_gfn_range(kvm, root, range->start, range->end,
+- range->may_block, flush, false);
+-
+- return flush;
++ return __kvm_tdp_mmu_zap_gfn_range(kvm, range->slot->as_id, range->start,
++ range->end, range->may_block, flush);
+ }
+
+ typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter,
+@@ -1270,7 +1283,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
+
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
+- for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
++ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
+ slot->base_gfn + slot->npages, min_level);
+
+@@ -1298,6 +1311,9 @@ retry:
+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
+ continue;
+
++ if (!is_shadow_present_pte(iter.old_spte))
++ continue;
++
+ if (spte_ad_need_write_protect(iter.old_spte)) {
+ if (is_writable_pte(iter.old_spte))
+ new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
+@@ -1341,7 +1357,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
+
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
+- for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
++ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
+ slot->base_gfn + slot->npages);
+
+@@ -1415,10 +1431,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+ * Clear leaf entries which could be replaced by large mappings, for
+ * GFNs within the slot.
+ */
+-static bool zap_collapsible_spte_range(struct kvm *kvm,
++static void zap_collapsible_spte_range(struct kvm *kvm,
+ struct kvm_mmu_page *root,
+- const struct kvm_memory_slot *slot,
+- bool flush)
++ const struct kvm_memory_slot *slot)
+ {
+ gfn_t start = slot->base_gfn;
+ gfn_t end = start + slot->npages;
+@@ -1429,10 +1444,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
+
+ tdp_root_for_each_pte(iter, root, start, end) {
+ retry:
+- if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
+- flush = false;
++ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
+ continue;
+- }
+
+ if (!is_shadow_present_pte(iter.old_spte) ||
+ !is_last_spte(iter.old_spte, iter.level))
+@@ -1444,6 +1457,7 @@ retry:
+ pfn, PG_LEVEL_NUM))
+ continue;
+
++ /* Note, a successful atomic zap also does a remote TLB flush. */
+ if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
+ /*
+ * The iter must explicitly re-read the SPTE because
+@@ -1452,30 +1466,24 @@ retry:
+ iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+ goto retry;
+ }
+- flush = true;
+ }
+
+ rcu_read_unlock();
+-
+- return flush;
+ }
+
+ /*
+ * Clear non-leaf entries (and free associated page tables) which could
+ * be replaced by large mappings, for GFNs within the slot.
+ */
+-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+- const struct kvm_memory_slot *slot,
+- bool flush)
++void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
++ const struct kvm_memory_slot *slot)
+ {
+ struct kvm_mmu_page *root;
+
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
+- for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+- flush = zap_collapsible_spte_range(kvm, root, slot, flush);
+-
+- return flush;
++ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
++ zap_collapsible_spte_range(kvm, root, slot);
+ }
+
+ /*
+@@ -1500,12 +1508,12 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
+ !is_last_spte(iter.old_spte, iter.level))
+ continue;
+
+- if (!is_writable_pte(iter.old_spte))
+- break;
+-
+ new_spte = iter.old_spte &
+ ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
+
++ if (new_spte == iter.old_spte)
++ break;
++
+ tdp_mmu_set_spte(kvm, &iter, new_spte);
+ spte_set = true;
+ }
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
+index 358f447d40120..39468b637d2e4 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.h
++++ b/arch/x86/kvm/mmu/tdp_mmu.h
+@@ -10,9 +10,6 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
+ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm *kvm,
+ struct kvm_mmu_page *root)
+ {
+- if (root->role.invalid)
+- return false;
+-
+ return refcount_inc_not_zero(&root->tdp_mmu_root_count);
+ }
+
+@@ -66,9 +63,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn, unsigned long mask,
+ bool wrprot);
+-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+- const struct kvm_memory_slot *slot,
+- bool flush);
++void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
++ const struct kvm_memory_slot *slot);
+
+ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn,
+diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
+index 0772bad9165c5..62333f9756a36 100644
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -95,9 +95,8 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
+ }
+
+ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
+- unsigned config, bool exclude_user,
+- bool exclude_kernel, bool intr,
+- bool in_tx, bool in_tx_cp)
++ u64 config, bool exclude_user,
++ bool exclude_kernel, bool intr)
+ {
+ struct perf_event *event;
+ struct perf_event_attr attr = {
+@@ -113,16 +112,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
+
+ attr.sample_period = get_sample_period(pmc, pmc->counter);
+
+- if (in_tx)
+- attr.config |= HSW_IN_TX;
+- if (in_tx_cp) {
++ if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
++ guest_cpuid_is_intel(pmc->vcpu)) {
+ /*
+ * HSW_IN_TX_CHECKPOINTED is not supported with nonzero
+ * period. Just clear the sample period so at least
+ * allocating the counter doesn't fail.
+ */
+ attr.sample_period = 0;
+- attr.config |= HSW_IN_TX_CHECKPOINTED;
+ }
+
+ event = perf_event_create_kernel_counter(&attr, -1, current,
+@@ -173,11 +170,12 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
+
+ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+ {
+- unsigned config, type = PERF_TYPE_RAW;
+- u8 event_select, unit_mask;
++ u64 config;
++ u32 type = PERF_TYPE_RAW;
+ struct kvm *kvm = pmc->vcpu->kvm;
+ struct kvm_pmu_event_filter *filter;
+ int i;
++ struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
+ bool allow_event = true;
+
+ if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+@@ -206,23 +204,18 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+ if (!allow_event)
+ return;
+
+- event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+- unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+-
+ if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
+ ARCH_PERFMON_EVENTSEL_INV |
+ ARCH_PERFMON_EVENTSEL_CMASK |
+ HSW_IN_TX |
+ HSW_IN_TX_CHECKPOINTED))) {
+- config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
+- event_select,
+- unit_mask);
++ config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
+ if (config != PERF_COUNT_HW_MAX)
+ type = PERF_TYPE_HARDWARE;
+ }
+
+ if (type == PERF_TYPE_RAW)
+- config = eventsel & X86_RAW_EVENT_MASK;
++ config = eventsel & pmu->raw_event_mask;
+
+ if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
+ return;
+@@ -233,9 +226,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+ pmc_reprogram_counter(pmc, type, config,
+ !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+- eventsel & ARCH_PERFMON_EVENTSEL_INT,
+- (eventsel & HSW_IN_TX),
+- (eventsel & HSW_IN_TX_CHECKPOINTED));
++ eventsel & ARCH_PERFMON_EVENTSEL_INT);
+ }
+ EXPORT_SYMBOL_GPL(reprogram_gp_counter);
+
+@@ -271,7 +262,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
+ kvm_x86_ops.pmu_ops->find_fixed_event(idx),
+ !(en_field & 0x2), /* exclude user */
+ !(en_field & 0x1), /* exclude kernel */
+- pmi, false, false);
++ pmi);
+ }
+ EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
+
+diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
+index 0e4f2b1fa9fbd..c206decb39fab 100644
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -24,8 +24,7 @@ struct kvm_event_hw_type_mapping {
+ };
+
+ struct kvm_pmu_ops {
+- unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select,
+- u8 unit_mask);
++ unsigned int (*pmc_perf_hw_id)(struct kvm_pmc *pmc);
+ unsigned (*find_fixed_event)(int idx);
+ bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
+ struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
+@@ -142,6 +141,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
+ return sample_period;
+ }
+
++static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
++{
++ if (!pmc->perf_event || pmc->is_paused)
++ return;
++
++ perf_event_period(pmc->perf_event,
++ get_sample_period(pmc, pmc->counter));
++}
++
+ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
+ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
+ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
+diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
+index a19d473d01847..7eeade35a425b 100644
+--- a/arch/x86/kvm/reverse_cpuid.h
++++ b/arch/x86/kvm/reverse_cpuid.h
+@@ -48,6 +48,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
+ [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
+ [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
+ [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
++ [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
+ };
+
+ /*
+diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
+index 8052d92069e01..b595a33860d70 100644
+--- a/arch/x86/kvm/svm/avic.c
++++ b/arch/x86/kvm/svm/avic.c
+@@ -318,20 +318,24 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
+ trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
+
+ switch (id) {
++ case AVIC_IPI_FAILURE_INVALID_TARGET:
+ case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
+ /*
+- * AVIC hardware handles the generation of
+- * IPIs when the specified Message Type is Fixed
+- * (also known as fixed delivery mode) and
+- * the Trigger Mode is edge-triggered. The hardware
+- * also supports self and broadcast delivery modes
+- * specified via the Destination Shorthand(DSH)
+- * field of the ICRL. Logical and physical APIC ID
+- * formats are supported. All other IPI types cause
+- * a #VMEXIT, which needs to emulated.
++ * Emulate IPIs that are not handled by AVIC hardware, which
++ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
++ * if _any_ targets are invalid, e.g. if the logical mode mask
++ * is a superset of running vCPUs.
++ *
++ * The exit is a trap, e.g. ICR holds the correct value and RIP
++ * has been advanced, KVM is responsible only for emulating the
++ * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
++ * in which case KVM needs to emulate the ICR write as well in
++ * order to clear the BUSY flag.
+ */
+- kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+- kvm_lapic_reg_write(apic, APIC_ICR, icrl);
++ if (icrl & APIC_ICR_BUSY)
++ kvm_apic_write_nodecode(vcpu, APIC_ICR);
++ else
++ kvm_apic_send_ipi(apic, icrl, icrh);
+ break;
+ case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
+ /*
+@@ -341,10 +345,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
+ */
+ avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
+ break;
+- case AVIC_IPI_FAILURE_INVALID_TARGET:
+- WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
+- index, vcpu->vcpu_id, icrh, icrl);
+- break;
+ case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+ WARN_ONCE(1, "Invalid backing page\n");
+ break;
+@@ -801,7 +801,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+ {
+ struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_irq_routing_table *irq_rt;
+- int idx, ret = -EINVAL;
++ int idx, ret = 0;
+
+ if (!kvm_arch_has_assigned_device(kvm) ||
+ !irq_remapping_cap(IRQ_POSTING_CAP))
+@@ -812,7 +812,13 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+- WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
++
++ if (guest_irq >= irq_rt->nr_rt_entries ||
++ hlist_empty(&irq_rt->map[guest_irq])) {
++ pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
++ guest_irq, irq_rt->nr_rt_entries);
++ goto out;
++ }
+
+ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+ struct vcpu_data vcpu_info;
+@@ -943,15 +949,10 @@ out:
+ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+ u64 entry;
+- /* ID = 0xff (broadcast), ID > 0xff (reserved) */
+ int h_physical_id = kvm_cpu_get_apicid(cpu);
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+- /*
+- * Since the host physical APIC id is 8 bits,
+- * we can support host APIC ID upto 255.
+- */
+- if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
++ if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
+ return;
+
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+@@ -988,16 +989,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
+ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
++ int cpu = get_cpu();
+
++ WARN_ON(cpu != vcpu->cpu);
+ svm->avic_is_running = is_run;
+
+- if (!kvm_vcpu_apicv_active(vcpu))
+- return;
+-
+- if (is_run)
+- avic_vcpu_load(vcpu, vcpu->cpu);
+- else
+- avic_vcpu_put(vcpu);
++ if (kvm_vcpu_apicv_active(vcpu)) {
++ if (is_run)
++ avic_vcpu_load(vcpu, cpu);
++ else
++ avic_vcpu_put(vcpu);
++ }
++ put_cpu();
+ }
+
+ void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
+diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
+index 510b833cbd399..e0b4f88b04b3e 100644
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -275,7 +275,8 @@ static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
+ return false;
+ }
+
+- if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
++ /* Note, SVM doesn't have any additional restrictions on CR4. */
++ if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4)))
+ return false;
+
+ return true;
+@@ -750,9 +751,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
+ struct kvm_host_map map;
+ int rc;
+
+- /* Triple faults in L2 should never escape. */
+- WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+-
+ rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+@@ -921,6 +919,9 @@ void svm_free_nested(struct vcpu_svm *svm)
+ if (!svm->nested.initialized)
+ return;
+
++ if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
++ svm_switch_vmcb(svm, &svm->vmcb01);
++
+ svm_vcpu_free_msrpm(svm->nested.msrpm);
+ svm->nested.msrpm = NULL;
+
+@@ -939,12 +940,9 @@ void svm_free_nested(struct vcpu_svm *svm)
+ svm->nested.initialized = false;
+ }
+
+-/*
+- * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+- */
+-void svm_leave_nested(struct vcpu_svm *svm)
++void svm_leave_nested(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_vcpu *vcpu = &svm->vcpu;
++ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (is_guest_mode(vcpu)) {
+ svm->nested.nested_run_pending = 0;
+@@ -1313,7 +1311,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+ return -EINVAL;
+
+ if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+- svm_leave_nested(svm);
++ svm_leave_nested(vcpu);
+ svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+ return 0;
+ }
+@@ -1357,18 +1355,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+ !nested_vmcb_valid_sregs(vcpu, save))
+ goto out_free;
+
+- /*
+- * While the nested guest CR3 is already checked and set by
+- * KVM_SET_SREGS, it was set when nested state was yet loaded,
+- * thus MMU might not be initialized correctly.
+- * Set it again to fix this.
+- */
+-
+- ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+- nested_npt_enabled(svm), false);
+- if (WARN_ON_ONCE(ret))
+- goto out_free;
+-
+
+ /*
+ * All checks done, we can enter guest mode. Userspace provides
+@@ -1378,7 +1364,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+ */
+
+ if (is_guest_mode(vcpu))
+- svm_leave_nested(svm);
++ svm_leave_nested(vcpu);
+ else
+ svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
+@@ -1394,6 +1380,20 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+ nested_vmcb02_prepare_control(svm);
++
++ /*
++ * While the nested guest CR3 is already checked and set by
++ * KVM_SET_SREGS, it was set when nested state was yet loaded,
++ * thus MMU might not be initialized correctly.
++ * Set it again to fix this.
++ */
++
++ ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
++ nested_npt_enabled(svm), false);
++ if (WARN_ON_ONCE(ret))
++ goto out_free;
++
++
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+ ret = 0;
+ out_free:
+@@ -1432,6 +1432,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
+ }
+
+ struct kvm_x86_nested_ops svm_nested_ops = {
++ .leave_nested = svm_leave_nested,
+ .check_events = svm_check_nested_events,
+ .triple_fault = nested_svm_triple_fault,
+ .get_nested_state_pages = svm_get_nested_state_pages,
+diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
+index fdf587f19c5fb..d35c94e13afb0 100644
+--- a/arch/x86/kvm/svm/pmu.c
++++ b/arch/x86/kvm/svm/pmu.c
+@@ -44,6 +44,22 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
+ [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+ };
+
++/* duplicated from amd_f17h_perfmon_event_map. */
++static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = {
++ [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
++ [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
++ [2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES },
++ [3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES },
++ [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
++ [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
++ [6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
++ [7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
++};
++
++/* amd_pmc_perf_hw_id depends on these being the same size */
++static_assert(ARRAY_SIZE(amd_event_mapping) ==
++ ARRAY_SIZE(amd_f17h_event_mapping));
++
+ static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
+ {
+ struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+@@ -134,21 +150,27 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
+ return &pmu->gp_counters[msr_to_index(msr)];
+ }
+
+-static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
+- u8 event_select,
+- u8 unit_mask)
++static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
+ {
++ struct kvm_event_hw_type_mapping *event_mapping;
++ u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
++ u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+ int i;
+
++ if (guest_cpuid_family(pmc->vcpu) >= 0x17)
++ event_mapping = amd_f17h_event_mapping;
++ else
++ event_mapping = amd_event_mapping;
++
+ for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
+- if (amd_event_mapping[i].eventsel == event_select
+- && amd_event_mapping[i].unit_mask == unit_mask)
++ if (event_mapping[i].eventsel == event_select
++ && event_mapping[i].unit_mask == unit_mask)
+ break;
+
+ if (i == ARRAY_SIZE(amd_event_mapping))
+ return PERF_COUNT_HW_MAX;
+
+- return amd_event_mapping[i].event_type;
++ return event_mapping[i].event_type;
+ }
+
+ /* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
+@@ -256,17 +278,16 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
+ if (pmc) {
+ pmc->counter += data - pmc_read_counter(pmc);
++ pmc_update_sample_period(pmc);
+ return 0;
+ }
+ /* MSR_EVNTSELn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
+ if (pmc) {
+- if (data == pmc->eventsel)
+- return 0;
+- if (!(data & pmu->reserved_bits)) {
++ data &= ~pmu->reserved_bits;
++ if (data != pmc->eventsel)
+ reprogram_gp_counter(pmc, data);
+- return 0;
+- }
++ return 0;
+ }
+
+ return 1;
+@@ -282,7 +303,8 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
+ pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+
+ pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
+- pmu->reserved_bits = 0xffffffff00200000ull;
++ pmu->reserved_bits = 0xfffffff000280000ull;
++ pmu->raw_event_mask = AMD64_RAW_EVENT_MASK;
+ pmu->version = 1;
+ /* not applicable to AMD; but clean them to prevent any fall out */
+ pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+@@ -320,7 +342,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
+ }
+
+ struct kvm_pmu_ops amd_pmu_ops = {
+- .find_arch_event = amd_find_arch_event,
++ .pmc_perf_hw_id = amd_pmc_perf_hw_id,
+ .find_fixed_event = amd_find_fixed_event,
+ .pmc_is_enabled = amd_pmc_is_enabled,
+ .pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index 7e34d7163adab..93d73b55ae3e6 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -676,7 +676,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ return -EINVAL;
+
+- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
++ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
+ if (!blob)
+ return -ENOMEM;
+
+@@ -796,7 +796,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
+ if (!IS_ALIGNED(dst_paddr, 16) ||
+ !IS_ALIGNED(paddr, 16) ||
+ !IS_ALIGNED(size, 16)) {
+- tpage = (void *)alloc_page(GFP_KERNEL);
++ tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!tpage)
+ return -ENOMEM;
+
+@@ -832,7 +832,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+
+ /* If source buffer is not aligned then use an intermediate buffer */
+ if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
+- src_tpage = alloc_page(GFP_KERNEL);
++ src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!src_tpage)
+ return -ENOMEM;
+
+@@ -853,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+ if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+ int dst_offset;
+
+- dst_tpage = alloc_page(GFP_KERNEL);
++ dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!dst_tpage) {
+ ret = -ENOMEM;
+ goto e_free;
+@@ -1082,7 +1082,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ return -EINVAL;
+
+- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
++ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
+ if (!blob)
+ return -ENOMEM;
+
+@@ -1164,7 +1164,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ return -EINVAL;
+
+ /* allocate the memory to hold the session data blob */
+- session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT);
++ session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT);
+ if (!session_data)
+ return -ENOMEM;
+
+@@ -1277,7 +1277,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+
+ /* Check if we are crossing the page boundary */
+ offset = params.guest_uaddr & (PAGE_SIZE - 1);
+- if ((params.guest_len + offset > PAGE_SIZE))
++ if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE)
+ return -EINVAL;
+
+ /* Pin guest memory */
+@@ -1288,11 +1288,11 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+
+ /* allocate memory for header and transport buffer */
+ ret = -ENOMEM;
+- hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
++ hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
+ if (!hdr)
+ goto e_unpin;
+
+- trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
++ trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
+ if (!trans_data)
+ goto e_free_hdr;
+
+@@ -1457,7 +1457,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+
+ /* Check if we are crossing the page boundary */
+ offset = params.guest_uaddr & (PAGE_SIZE - 1);
+- if ((params.guest_len + offset > PAGE_SIZE))
++ if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE)
+ return -EINVAL;
+
+ hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+@@ -1787,7 +1787,12 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
+ mutex_unlock(&source_kvm->lock);
+ mutex_lock(&kvm->lock);
+
+- if (sev_guest(kvm)) {
++ /*
++ * Disallow out-of-band SEV/SEV-ES init if the target is already an
++ * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
++ * created after SEV/SEV-ES initialization, e.g. to init intercepts.
++ */
++ if (sev_guest(kvm) || kvm->created_vcpus) {
+ ret = -EINVAL;
+ goto e_mirror_unlock;
+ }
+@@ -1800,6 +1805,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
+ mirror_sev->fd = source_sev.fd;
+ mirror_sev->es_active = source_sev.es_active;
+ mirror_sev->handle = source_sev.handle;
++ INIT_LIST_HEAD(&mirror_sev->regions_list);
+ /*
+ * Do not copy ap_jump_table. Since the mirror does not share the same
+ * KVM contexts as the original, and they may have different
+@@ -1984,11 +1990,14 @@ static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
+ unsigned long len)
+ {
+ /*
+- * If hardware enforced cache coherency for encrypted mappings of the
+- * same physical page is supported, nothing to do.
++ * If CPU enforced cache coherency for encrypted mappings of the
++ * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
++ * flush is still needed in order to work properly with DMA devices.
+ */
+- if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
++ if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
++ clflush_cache_range(va, PAGE_SIZE);
+ return;
++ }
+
+ /*
+ * If the VM Page Flush MSR is supported, use it to flush the page
+@@ -2028,6 +2037,14 @@ static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
+ wbinvd_on_all_cpus();
+ }
+
++void sev_guest_memory_reclaimed(struct kvm *kvm)
++{
++ if (!sev_guest(kvm))
++ return;
++
++ wbinvd_on_all_cpus();
++}
++
+ void sev_free_vcpu(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm;
+@@ -2311,7 +2328,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
+ }
+
+ #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
+-static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
++static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+ {
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ struct ghcb *ghcb = svm->ghcb;
+@@ -2322,14 +2339,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+ scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+ if (!scratch_gpa_beg) {
+ pr_err("vmgexit: scratch gpa not provided\n");
+- return false;
++ return -EINVAL;
+ }
+
+ scratch_gpa_end = scratch_gpa_beg + len;
+ if (scratch_gpa_end < scratch_gpa_beg) {
+ pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
+ len, scratch_gpa_beg);
+- return false;
++ return -EINVAL;
+ }
+
+ if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
+@@ -2347,7 +2364,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+ scratch_gpa_end > ghcb_scratch_end) {
+ pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
+ scratch_gpa_beg, scratch_gpa_end);
+- return false;
++ return -EINVAL;
+ }
+
+ scratch_va = (void *)svm->ghcb;
+@@ -2360,18 +2377,18 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+ if (len > GHCB_SCRATCH_AREA_LIMIT) {
+ pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
+ len, GHCB_SCRATCH_AREA_LIMIT);
+- return false;
++ return -EINVAL;
+ }
+ scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT);
+ if (!scratch_va)
+- return false;
++ return -ENOMEM;
+
+ if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
+ /* Unable to copy scratch area from guest */
+ pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
+
+ kfree(scratch_va);
+- return false;
++ return -EFAULT;
+ }
+
+ /*
+@@ -2387,7 +2404,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+ svm->ghcb_sa = scratch_va;
+ svm->ghcb_sa_len = len;
+
+- return true;
++ return 0;
+ }
+
+ static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
+@@ -2526,10 +2543,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+- ret = -EINVAL;
+ switch (exit_code) {
+ case SVM_VMGEXIT_MMIO_READ:
+- if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
++ ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
++ if (ret)
+ break;
+
+ ret = kvm_sev_es_mmio_read(vcpu,
+@@ -2538,7 +2555,8 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+ svm->ghcb_sa);
+ break;
+ case SVM_VMGEXIT_MMIO_WRITE:
+- if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
++ ret = setup_vmgexit_scratch(svm, false, control->exit_info_2);
++ if (ret)
+ break;
+
+ ret = kvm_sev_es_mmio_write(vcpu,
+@@ -2581,6 +2599,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+ vcpu_unimpl(vcpu,
+ "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+ control->exit_info_1, control->exit_info_2);
++ ret = -EINVAL;
+ break;
+ default:
+ ret = svm_invoke_exit_handler(vcpu, exit_code);
+@@ -2593,6 +2612,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+ {
+ int count;
+ int bytes;
++ int r;
+
+ if (svm->vmcb->control.exit_info_2 > INT_MAX)
+ return -EINVAL;
+@@ -2601,8 +2621,9 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+ if (unlikely(check_mul_overflow(count, size, &bytes)))
+ return -EINVAL;
+
+- if (!setup_vmgexit_scratch(svm, in, bytes))
+- return -EINVAL;
++ r = setup_vmgexit_scratch(svm, in, bytes);
++ if (r)
++ return r;
+
+ return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
+ }
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 989685098b3ea..8e9a6c41f9eea 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -281,7 +281,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+
+ if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+ if (!(efer & EFER_SVME)) {
+- svm_leave_nested(svm);
++ svm_leave_nested(vcpu);
+ svm_set_gif(svm, true);
+ /* #GP intercept is still needed for vmware backdoor */
+ if (!enable_vmware_backdoor)
+@@ -303,7 +303,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ return ret;
+ }
+
+- if (svm_gp_erratum_intercept)
++ /*
++ * Never intercept #GP for SEV guests, KVM can't
++ * decrypt guest memory to workaround the erratum.
++ */
++ if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
+ set_exception_intercept(svm, GP_VECTOR);
+ }
+ }
+@@ -313,12 +317,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ return 0;
+ }
+
+-static int is_external_interrupt(u32 info)
+-{
+- info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
+- return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
+-}
+-
+ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+@@ -390,6 +388,10 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
+ */
+ (void)skip_emulated_instruction(vcpu);
+ rip = kvm_rip_read(vcpu);
++
++ if (boot_cpu_has(X86_FEATURE_NRIPS))
++ svm->vmcb->control.next_rip = rip;
++
+ svm->int3_rip = rip + svm->vmcb->save.cs.base;
+ svm->int3_injected = rip - old_rip;
+ }
+@@ -463,11 +465,24 @@ static int has_svm(void)
+ return 1;
+ }
+
++void __svm_write_tsc_multiplier(u64 multiplier)
++{
++ preempt_disable();
++
++ if (multiplier == __this_cpu_read(current_tsc_ratio))
++ goto out;
++
++ wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
++ __this_cpu_write(current_tsc_ratio, multiplier);
++out:
++ preempt_enable();
++}
++
+ static void svm_hardware_disable(void)
+ {
+ /* Make sure we clean up behind us */
+ if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
+- wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
++ __svm_write_tsc_multiplier(TSC_RATIO_DEFAULT);
+
+ cpu_svm_disable();
+
+@@ -509,8 +524,11 @@ static int svm_hardware_enable(void)
+ wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
+
+ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+- wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
+- __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
++ /*
++ * Set the default value, even if we don't use TSC scaling
++ * to avoid having stale value in the msr
++ */
++ __svm_write_tsc_multiplier(TSC_RATIO_DEFAULT);
+ }
+
+
+@@ -1123,9 +1141,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+
+ static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+ {
+- wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
++ __svm_write_tsc_multiplier(multiplier);
+ }
+
++
+ /* Evaluate instruction intercepts that depend on guest CPUID features. */
+ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+ struct vcpu_svm *svm)
+@@ -1176,9 +1195,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
+ * Guest access to VMware backdoor ports could legitimately
+ * trigger #GP because of TSS I/O permission bitmap.
+ * We intercept those #GP and allow access to them anyway
+- * as VMware does.
++ * as VMware does. Don't intercept #GP for SEV guests as KVM can't
++ * decrypt guest memory to decode the faulting instruction.
+ */
+- if (enable_vmware_backdoor)
++ if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
+ set_exception_intercept(svm, GP_VECTOR);
+
+ svm_set_intercept(svm, INTERCEPT_INTR);
+@@ -1418,6 +1438,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
+ */
+ svm_clear_current_vmcb(svm->vmcb);
+
++ svm_leave_nested(vcpu);
+ svm_free_nested(svm);
+
+ sev_free_vcpu(vcpu);
+@@ -1431,6 +1452,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+
++ amd_clear_divider();
++
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_unmap_ghcb(svm);
+
+@@ -1447,13 +1470,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
+ vmsave(__sme_page_pa(sd->save_area));
+ }
+
+- if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+- u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+- if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
+- __this_cpu_write(current_tsc_ratio, tsc_ratio);
+- wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
+- }
+- }
++ if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
++ __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+
+ if (likely(tsc_aux_uret_slot >= 0))
+ kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
+@@ -1473,7 +1491,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+
+ if (sd->current_vmcb != svm->vmcb) {
+ sd->current_vmcb = svm->vmcb;
+- indirect_branch_prediction_barrier();
++
++ if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT))
++ indirect_branch_prediction_barrier();
+ }
+ if (kvm_vcpu_apicv_active(vcpu))
+ avic_vcpu_load(vcpu, cpu);
+@@ -1517,6 +1537,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+ to_svm(vcpu)->vmcb->save.rflags = rflags;
+ }
+
++static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
++{
++ struct vmcb *vmcb = to_svm(vcpu)->vmcb;
++
++ return sev_es_guest(vcpu->kvm)
++ ? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK
++ : kvm_get_rflags(vcpu) & X86_EFLAGS_IF;
++}
++
+ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
+ {
+ switch (reg) {
+@@ -1709,10 +1738,16 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
+ vmcb_mark_dirty(svm->vmcb, VMCB_DT);
+ }
+
++static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++ return true;
++}
++
+ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 hcr0 = cr0;
++ bool old_paging = is_paging(vcpu);
+
+ #ifdef CONFIG_X86_64
+ if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
+@@ -1729,8 +1764,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ #endif
+ vcpu->arch.cr0 = cr0;
+
+- if (!npt_enabled)
++ if (!npt_enabled) {
+ hcr0 |= X86_CR0_PG | X86_CR0_WP;
++ if (old_paging != is_paging(vcpu))
++ svm_set_cr4(vcpu, kvm_read_cr4(vcpu));
++ }
+
+ /*
+ * re-enable caching here because the QEMU bios
+@@ -1774,8 +1812,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ svm_flush_tlb(vcpu);
+
+ vcpu->arch.cr4 = cr4;
+- if (!npt_enabled)
++ if (!npt_enabled) {
+ cr4 |= X86_CR4_PAE;
++
++ if (!is_paging(vcpu))
++ cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
++ }
+ cr4 |= host_cr4_mce;
+ to_svm(vcpu)->vmcb->save.cr4 = cr4;
+ vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
+@@ -2224,10 +2266,6 @@ static int gp_interception(struct kvm_vcpu *vcpu)
+ if (error_code)
+ goto reinject;
+
+- /* All SVM instructions expect page aligned RAX */
+- if (svm->vmcb->save.rax & ~PAGE_MASK)
+- goto reinject;
+-
+ /* Decode the instruction for usage later */
+ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
+ goto reinject;
+@@ -2245,8 +2283,13 @@ static int gp_interception(struct kvm_vcpu *vcpu)
+ if (!is_guest_mode(vcpu))
+ return kvm_emulate_instruction(vcpu,
+ EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
+- } else
++ } else {
++ /* All SVM instructions expect page aligned RAX */
++ if (svm->vmcb->save.rax & ~PAGE_MASK)
++ goto reinject;
++
+ return emulate_svm_instr(vcpu, opcode);
++ }
+
+ reinject:
+ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+@@ -2639,9 +2682,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+ msr->data = 0;
+
+ switch (msr->index) {
+- case MSR_F10H_DECFG:
+- if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+- msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
++ case MSR_AMD64_DE_CFG:
++ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
++ msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
+ break;
+ case MSR_IA32_PERF_CAPABILITIES:
+ return 0;
+@@ -2750,7 +2793,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ msr_info->data = 0x1E;
+ }
+ break;
+- case MSR_F10H_DECFG:
++ case MSR_AMD64_DE_CFG:
+ msr_info->data = svm->msr_decfg;
+ break;
+ default:
+@@ -2950,7 +2993,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ case MSR_VM_IGNNE:
+ vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
+ break;
+- case MSR_F10H_DECFG: {
++ case MSR_AMD64_DE_CFG: {
+ struct kvm_msr_entry msr_entry;
+
+ msr_entry.index = msr->index;
+@@ -3332,15 +3375,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
+ return 0;
+ }
+
+- if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
+- exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
+- exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
+- exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
+- printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
+- "exit_code 0x%x\n",
+- __func__, svm->vmcb->control.exit_int_info,
+- exit_code);
+-
+ if (exit_fastpath != EXIT_FASTPATH_NONE)
+ return 1;
+
+@@ -3394,8 +3428,6 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+- BUG_ON(!(gif_set(svm)));
+-
+ trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
+ ++vcpu->stat.irq_injections;
+
+@@ -3485,14 +3517,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
+ if (!gif_set(svm))
+ return true;
+
+- if (sev_es_guest(vcpu->kvm)) {
+- /*
+- * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+- * bit to determine the state of the IF flag.
+- */
+- if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+- return true;
+- } else if (is_guest_mode(vcpu)) {
++ if (is_guest_mode(vcpu)) {
+ /* As long as interrupts are being delivered... */
+ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
+ ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
+@@ -3503,7 +3528,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
+ if (nested_exit_on_intr(svm))
+ return false;
+ } else {
+- if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
++ if (!svm_get_if_flag(vcpu))
+ return true;
+ }
+
+@@ -3666,6 +3691,18 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
+ vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
+ type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
+
++ /*
++ * If NextRIP isn't enabled, KVM must manually advance RIP prior to
++ * injecting the soft exception/interrupt. That advancement needs to
++ * be unwound if vectoring didn't complete. Note, the new event may
++ * not be the injected event, e.g. if KVM injected an INTn, the INTn
++ * hit a #NP in the guest, and the #NP encountered a #PF, the #NP will
++ * be the reported vectored event, but RIP still needs to be unwound.
++ */
++ if (int3_injected && type == SVM_EXITINTINFO_TYPE_EXEPT &&
++ kvm_is_linear_rip(vcpu, svm->int3_rip))
++ kvm_rip_write(vcpu, kvm_rip_read(vcpu) - int3_injected);
++
+ switch (type) {
+ case SVM_EXITINTINFO_TYPE_NMI:
+ vcpu->arch.nmi_injected = true;
+@@ -3679,16 +3716,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
+
+ /*
+ * In case of software exceptions, do not reinject the vector,
+- * but re-execute the instruction instead. Rewind RIP first
+- * if we emulated INT3 before.
++ * but re-execute the instruction instead.
+ */
+- if (kvm_exception_is_soft(vector)) {
+- if (vector == BP_VECTOR && int3_injected &&
+- kvm_is_linear_rip(vcpu, svm->int3_rip))
+- kvm_rip_write(vcpu,
+- kvm_rip_read(vcpu) - int3_injected);
++ if (kvm_exception_is_soft(vector))
+ break;
+- }
++
+ if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
+ u32 err = svm->vmcb->control.exit_int_info_err;
+ kvm_requeue_exception_e(vcpu, vector, err);
+@@ -3717,8 +3749,14 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
+
+ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+ {
+- if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+- to_svm(vcpu)->vmcb->control.exit_info_1)
++ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
++
++ /*
++ * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
++ * can't read guest memory (dereference memslots) to decode the WRMSR.
++ */
++ if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
++ nrips && control->next_rip)
+ return handle_fastpath_set_msr_irqoff(vcpu);
+
+ return EXIT_FASTPATH_NONE;
+@@ -4247,6 +4285,8 @@ out:
+
+ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+ {
++ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
++ vcpu->arch.at_instruction_boundary = true;
+ }
+
+ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+@@ -4376,10 +4416,17 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+ * Enter the nested guest now
+ */
+
++ vmcb_mark_all_dirty(svm->vmcb01.ptr);
++
+ vmcb12 = map.hva;
+ nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+
++ if (ret)
++ goto unmap_save;
++
++ svm->nested.nested_run_pending = 1;
++
+ unmap_save:
+ kvm_vcpu_unmap(vcpu, &map_save, true);
+ unmap_map:
+@@ -4405,8 +4452,13 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
+ bool smep, smap, is_user;
+ unsigned long cr4;
+
++ /* Emulation is always possible when KVM has access to all guest state. */
++ if (!sev_guest(vcpu->kvm))
++ return true;
++
+ /*
+- * When the guest is an SEV-ES guest, emulation is not possible.
++ * Emulation is impossible for SEV-ES guests as KVM doesn't have access
++ * to guest register state.
+ */
+ if (sev_es_guest(vcpu->kvm))
+ return false;
+@@ -4454,23 +4506,27 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
+ if (likely(!insn || insn_len))
+ return true;
+
+- /*
+- * If RIP is invalid, go ahead with emulation which will cause an
+- * internal error exit.
+- */
+- if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+- return true;
+-
+ cr4 = kvm_read_cr4(vcpu);
+ smep = cr4 & X86_CR4_SMEP;
+ smap = cr4 & X86_CR4_SMAP;
+ is_user = svm_get_cpl(vcpu) == 3;
+ if (smap && (!smep || is_user)) {
+- if (!sev_guest(vcpu->kvm))
+- return true;
+-
+ pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
+- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++
++ /*
++ * If the fault occurred in userspace, arbitrarily inject #GP
++ * to avoid killing the guest and to hopefully avoid confusing
++ * the guest kernel too much, e.g. injecting #PF would not be
++ * coherent with respect to the guest's page tables. Request
++ * triple fault if the fault occurred in the kernel as there's
++ * no fault that KVM can inject without confusing the guest.
++ * In practice, the triple fault is moot as no sane SEV kernel
++ * will execute from user memory while also running with SMAP=1.
++ */
++ if (is_user)
++ kvm_inject_gp(vcpu, 0);
++ else
++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ }
+
+ return false;
+@@ -4549,6 +4605,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+ .set_segment = svm_set_segment,
+ .get_cpl = svm_get_cpl,
+ .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
++ .is_valid_cr0 = svm_is_valid_cr0,
+ .set_cr0 = svm_set_cr0,
+ .is_valid_cr4 = svm_is_valid_cr4,
+ .set_cr4 = svm_set_cr4,
+@@ -4562,6 +4619,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+ .cache_reg = svm_cache_reg,
+ .get_rflags = svm_get_rflags,
+ .set_rflags = svm_set_rflags,
++ .get_if_flag = svm_get_if_flag,
+
+ .tlb_flush_all = svm_flush_tlb,
+ .tlb_flush_current = svm_flush_tlb,
+@@ -4592,7 +4650,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+ .load_eoi_exitmap = svm_load_eoi_exitmap,
+ .hwapic_irr_update = svm_hwapic_irr_update,
+ .hwapic_isr_update = svm_hwapic_isr_update,
+- .sync_pir_to_irr = kvm_lapic_find_highest_irr,
+ .apicv_post_state_restore = avic_post_state_restore,
+
+ .set_tss_addr = svm_set_tss_addr,
+@@ -4635,6 +4692,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+ .mem_enc_op = svm_mem_enc_op,
+ .mem_enc_reg_region = svm_register_enc_region,
+ .mem_enc_unreg_region = svm_unregister_enc_region,
++ .guest_memory_reclaimed = sev_guest_memory_reclaimed,
+
+ .vm_copy_enc_context_from = svm_vm_copy_asid_from,
+
+diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
+index 5d30db599e10d..1d9b1a9e4398f 100644
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -22,6 +22,8 @@
+ #include <asm/svm.h>
+ #include <asm/sev-common.h>
+
++#include "kvm_cache_regs.h"
++
+ #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
+ #define IOPM_SIZE PAGE_SIZE * 3
+@@ -461,7 +463,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
+
+ int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
+ u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
+-void svm_leave_nested(struct vcpu_svm *svm);
++void svm_leave_nested(struct kvm_vcpu *vcpu);
+ void svm_free_nested(struct vcpu_svm *svm);
+ int svm_allocate_nested(struct vcpu_svm *svm);
+ int nested_svm_vmrun(struct kvm_vcpu *vcpu);
+@@ -485,6 +487,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ int nested_svm_exit_special(struct vcpu_svm *svm);
+ void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ struct vmcb_control_area *control);
++void __svm_write_tsc_multiplier(u64 multiplier);
+ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
+ void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
+ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
+@@ -497,7 +500,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
+ #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
+ #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+
+-#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
++#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
+ #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
+ #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+ #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
+@@ -553,6 +556,8 @@ int svm_register_enc_region(struct kvm *kvm,
+ int svm_unregister_enc_region(struct kvm *kvm,
+ struct kvm_enc_region *range);
+ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
++void sev_guest_memory_reclaimed(struct kvm *kvm);
++
+ void pre_sev_run(struct vcpu_svm *svm, int cpu);
+ void __init sev_set_cpu_caps(void);
+ void __init sev_hardware_setup(void);
+diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
+index 98aa981c04ec5..8cdc62c74a964 100644
+--- a/arch/x86/kvm/svm/svm_onhyperv.c
++++ b/arch/x86/kvm/svm/svm_onhyperv.c
+@@ -4,7 +4,6 @@
+ */
+
+ #include <linux/kvm_host.h>
+-#include "kvm_cache_regs.h"
+
+ #include <asm/mshyperv.h>
+
+diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
+index c53b8bf8d0138..3a0c3814a3770 100644
+--- a/arch/x86/kvm/svm/svm_onhyperv.h
++++ b/arch/x86/kvm/svm/svm_onhyperv.h
+@@ -48,7 +48,7 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+ hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
+ }
+
+-static inline void svm_hv_hardware_setup(void)
++static inline __init void svm_hv_hardware_setup(void)
+ {
+ if (npt_enabled &&
+ ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
+@@ -112,7 +112,7 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+ {
+ }
+
+-static inline void svm_hv_hardware_setup(void)
++static inline __init void svm_hv_hardware_setup(void)
+ {
+ }
+
+diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
+index 4fa17df123cd6..f960608555226 100644
+--- a/arch/x86/kvm/svm/vmenter.S
++++ b/arch/x86/kvm/svm/vmenter.S
+@@ -110,6 +110,18 @@ SYM_FUNC_START(__svm_vcpu_run)
+ mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
+
++ /*
++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
++ * untrained as soon as we exit the VM and are back to the
++ * kernel. This should be done before re-enabling interrupts
++ * because interrupt handlers won't sanitize 'ret' if the return is
++ * from the kernel.
++ */
++ UNTRAIN_RET
++
++ /* SRSO */
++ ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT
++
+ /*
+ * Clear all general purpose registers except RSP and RAX to prevent
+ * speculative use of the guest's values, even those that are reloaded
+@@ -148,7 +160,7 @@ SYM_FUNC_START(__svm_vcpu_run)
+ pop %edi
+ #endif
+ pop %_ASM_BP
+- ret
++ RET
+
+ 3: cmpb $0, kvm_rebooting
+ jne 2b
+@@ -190,6 +202,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ #endif
+
++ /*
++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
++ * untrained as soon as we exit the VM and are back to the
++ * kernel. This should be done before re-enabling interrupts
++ * because interrupt handlers won't sanitize RET if the return is
++ * from the kernel.
++ */
++ UNTRAIN_RET
++
+ pop %_ASM_BX
+
+ #ifdef CONFIG_X86_64
+@@ -202,7 +223,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
+ pop %edi
+ #endif
+ pop %_ASM_BP
+- ret
++ RET
+
+ 3: cmpb $0, kvm_rebooting
+ jne 2b
+diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
+index 03ebe368333ef..c41506ed8c7dd 100644
+--- a/arch/x86/kvm/trace.h
++++ b/arch/x86/kvm/trace.h
+@@ -355,25 +355,29 @@ TRACE_EVENT(kvm_inj_virq,
+ * Tracepoint for kvm interrupt injection:
+ */
+ TRACE_EVENT(kvm_inj_exception,
+- TP_PROTO(unsigned exception, bool has_error, unsigned error_code),
+- TP_ARGS(exception, has_error, error_code),
++ TP_PROTO(unsigned exception, bool has_error, unsigned error_code,
++ bool reinjected),
++ TP_ARGS(exception, has_error, error_code, reinjected),
+
+ TP_STRUCT__entry(
+ __field( u8, exception )
+ __field( u8, has_error )
+ __field( u32, error_code )
++ __field( bool, reinjected )
+ ),
+
+ TP_fast_assign(
+ __entry->exception = exception;
+ __entry->has_error = has_error;
+ __entry->error_code = error_code;
++ __entry->reinjected = reinjected;
+ ),
+
+- TP_printk("%s (0x%x)",
++ TP_printk("%s (0x%x)%s",
+ __print_symbolic(__entry->exception, kvm_trace_sym_exc),
+ /* FIXME: don't print error_code if not present */
+- __entry->has_error ? __entry->error_code : 0)
++ __entry->has_error ? __entry->error_code : 0,
++ __entry->reinjected ? " [reinjected]" : "")
+ );
+
+ /*
+diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
+index ba6f99f584ac3..a7ed30d5647af 100644
+--- a/arch/x86/kvm/vmx/evmcs.c
++++ b/arch/x86/kvm/vmx/evmcs.c
+@@ -362,6 +362,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+ break;
++ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ break;
+diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
+index 152ab0aa82cf6..57451cf622d3e 100644
+--- a/arch/x86/kvm/vmx/evmcs.h
++++ b/arch/x86/kvm/vmx/evmcs.h
+@@ -59,7 +59,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
+ SECONDARY_EXEC_SHADOW_VMCS | \
+ SECONDARY_EXEC_TSC_SCALING | \
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+-#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
++#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \
++ (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
++ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+ #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+ #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
+
+@@ -160,16 +162,6 @@ static inline u16 evmcs_read16(unsigned long field)
+ return *(u16 *)((char *)current_evmcs + offset);
+ }
+
+-static inline void evmcs_touch_msr_bitmap(void)
+-{
+- if (unlikely(!current_evmcs))
+- return;
+-
+- if (current_evmcs->hv_enlightenments_control.msr_bitmap)
+- current_evmcs->hv_clean_fields &=
+- ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+-}
+-
+ static inline void evmcs_load(u64 phys_addr)
+ {
+ struct hv_vp_assist_page *vp_ap =
+@@ -190,7 +182,6 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; }
+ static inline u32 evmcs_read32(unsigned long field) { return 0; }
+ static inline u16 evmcs_read16(unsigned long field) { return 0; }
+ static inline void evmcs_load(u64 phys_addr) {}
+-static inline void evmcs_touch_msr_bitmap(void) {}
+ #endif /* IS_ENABLED(CONFIG_HYPERV) */
+
+ #define EVMPTR_INVALID (-1ULL)
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index eedcebf580041..e4e4c1d3aa179 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -523,29 +523,6 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
+ return 0;
+ }
+
+-/*
+- * Check if MSR is intercepted for L01 MSR bitmap.
+- */
+-static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+-{
+- unsigned long *msr_bitmap;
+- int f = sizeof(unsigned long);
+-
+- if (!cpu_has_vmx_msr_bitmap())
+- return true;
+-
+- msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
+-
+- if (msr <= 0x1fff) {
+- return !!test_bit(msr, msr_bitmap + 0x800 / f);
+- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+- msr &= 0x1fff;
+- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+- }
+-
+- return true;
+-}
+-
+ /*
+ * If a msr is allowed by L0, we should check whether it is allowed by L1.
+ * The corresponding bit will be cleared unless both of L0 and L1 allow it.
+@@ -599,6 +576,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
+ }
+ }
+
++#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
++static inline \
++void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
++ unsigned long *msr_bitmap_l1, \
++ unsigned long *msr_bitmap_l0, u32 msr) \
++{ \
++ if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
++ vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
++ vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
++ else \
++ vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
++}
++BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
++BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
++
++static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
++ unsigned long *msr_bitmap_l1,
++ unsigned long *msr_bitmap_l0,
++ u32 msr, int types)
++{
++ if (types & MSR_TYPE_R)
++ nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
++ msr_bitmap_l0, msr);
++ if (types & MSR_TYPE_W)
++ nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
++ msr_bitmap_l0, msr);
++}
++
+ /*
+ * Merge L0's and L1's MSR bitmap, return false to indicate that
+ * we do not use the hardware.
+@@ -606,10 +611,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
+ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+ {
++ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int msr;
+ unsigned long *msr_bitmap_l1;
+- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+- struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
++ unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
++ struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
+
+ /* Nothing to do if the MSR bitmap is not in use. */
+ if (!cpu_has_vmx_msr_bitmap() ||
+@@ -660,44 +666,27 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ }
+ }
+
+- /* KVM unconditionally exposes the FS/GS base MSRs to L1. */
++ /*
++ * Always check vmcs01's bitmap to honor userspace MSR filters and any
++ * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
++ */
+ #ifdef CONFIG_X86_64
+- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+- MSR_FS_BASE, MSR_TYPE_RW);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_FS_BASE, MSR_TYPE_RW);
+
+- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+- MSR_GS_BASE, MSR_TYPE_RW);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_GS_BASE, MSR_TYPE_RW);
+
+- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+- MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ #endif
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
+
+- /*
+- * Checking the L0->L1 bitmap is trying to verify two things:
+- *
+- * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+- * ensures that we do not accidentally generate an L02 MSR bitmap
+- * from the L12 MSR bitmap that is too permissive.
+- * 2. That L1 or L2s have actually used the MSR. This avoids
+- * unnecessarily merging of the bitmap if the MSR is unused. This
+- * works properly because we only update the L01 MSR bitmap lazily.
+- * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
+- * updated to reflect this when L1 (or its L2s) actually write to
+- * the MSR.
+- */
+- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
+- nested_vmx_disable_intercept_for_msr(
+- msr_bitmap_l1, msr_bitmap_l0,
+- MSR_IA32_SPEC_CTRL,
+- MSR_TYPE_R | MSR_TYPE_W);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_IA32_PRED_CMD, MSR_TYPE_W);
+
+- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
+- nested_vmx_disable_intercept_for_msr(
+- msr_bitmap_l1, msr_bitmap_l0,
+- MSR_IA32_PRED_CMD,
+- MSR_TYPE_W);
+-
+- kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
++ kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
+
+ return true;
+ }
+@@ -1191,29 +1180,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
+ WARN_ON(!enable_vpid);
+
+ /*
+- * If VPID is enabled and used by vmc12, but L2 does not have a unique
+- * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
+- * a VPID for L2, flush the current context as the effective ASID is
+- * common to both L1 and L2.
+- *
+- * Defer the flush so that it runs after vmcs02.EPTP has been set by
+- * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
+- * redundant flushes further down the nested pipeline.
+- *
+- * If a TLB flush isn't required due to any of the above, and vpid12 is
+- * changing then the new "virtual" VPID (vpid12) will reuse the same
+- * "real" VPID (vpid02), and so needs to be flushed. There's no direct
+- * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
+- * all nested vCPUs. Remember, a flush on VM-Enter does not invalidate
+- * guest-physical mappings, so there is no need to sync the nEPT MMU.
++ * VPID is enabled and in use by vmcs12. If vpid12 is changing, then
++ * emulate a guest TLB flush as KVM does not track vpid12 history nor
++ * is the VPID incorporated into the MMU context. I.e. KVM must assume
++ * that the new vpid12 has never been used and thus represents a new
++ * guest ASID that cannot have entries in the TLB.
+ */
+- if (!nested_has_guest_tlb_tag(vcpu)) {
+- kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+- } else if (is_vmenter &&
+- vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
++ if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+ vmx->nested.last_vpid = vmcs12->virtual_processor_id;
+- vpid_sync_context(nested_get_vpid02(vcpu));
++ kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
++ return;
+ }
++
++ /*
++ * If VPID is enabled, used by vmc12, and vpid12 is not changing but
++ * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
++ * KVM was unable to allocate a VPID for L2, flush the current context
++ * as the effective ASID is common to both L1 and L2.
++ */
++ if (!nested_has_guest_tlb_tag(vcpu))
++ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+
+ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
+@@ -1231,7 +1217,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+ /* reserved */
+ BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+- u64 vmx_basic = vmx->nested.msrs.basic;
++ u64 vmx_basic = vmcs_config.nested.basic;
+
+ if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+ return -EINVAL;
+@@ -1254,36 +1240,42 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
+ return 0;
+ }
+
+-static int
+-vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
++ u32 **low, u32 **high)
+ {
+- u64 supported;
+- u32 *lowp, *highp;
+-
+ switch (msr_index) {
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+- lowp = &vmx->nested.msrs.pinbased_ctls_low;
+- highp = &vmx->nested.msrs.pinbased_ctls_high;
++ *low = &msrs->pinbased_ctls_low;
++ *high = &msrs->pinbased_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+- lowp = &vmx->nested.msrs.procbased_ctls_low;
+- highp = &vmx->nested.msrs.procbased_ctls_high;
++ *low = &msrs->procbased_ctls_low;
++ *high = &msrs->procbased_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+- lowp = &vmx->nested.msrs.exit_ctls_low;
+- highp = &vmx->nested.msrs.exit_ctls_high;
++ *low = &msrs->exit_ctls_low;
++ *high = &msrs->exit_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+- lowp = &vmx->nested.msrs.entry_ctls_low;
+- highp = &vmx->nested.msrs.entry_ctls_high;
++ *low = &msrs->entry_ctls_low;
++ *high = &msrs->entry_ctls_high;
+ break;
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+- lowp = &vmx->nested.msrs.secondary_ctls_low;
+- highp = &vmx->nested.msrs.secondary_ctls_high;
++ *low = &msrs->secondary_ctls_low;
++ *high = &msrs->secondary_ctls_high;
+ break;
+ default:
+ BUG();
+ }
++}
++
++static int
++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++ u32 *lowp, *highp;
++ u64 supported;
++
++ vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
+
+ supported = vmx_control_msr(*lowp, *highp);
+
+@@ -1295,6 +1287,7 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+ if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+ return -EINVAL;
+
++ vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
+ *lowp = data;
+ *highp = data >> 32;
+ return 0;
+@@ -1308,10 +1301,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
+ BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+ /* reserved */
+ GENMASK_ULL(13, 9) | BIT_ULL(31);
+- u64 vmx_misc;
+-
+- vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+- vmx->nested.msrs.misc_high);
++ u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
++ vmcs_config.nested.misc_high);
+
+ if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+ return -EINVAL;
+@@ -1339,10 +1330,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
+
+ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+ {
+- u64 vmx_ept_vpid_cap;
+-
+- vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+- vmx->nested.msrs.vpid_caps);
++ u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
++ vmcs_config.nested.vpid_caps);
+
+ /* Every bit is either reserved or a feature bit. */
+ if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+@@ -1353,20 +1342,21 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+ return 0;
+ }
+
+-static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
+ {
+- u64 *msr;
+-
+ switch (msr_index) {
+ case MSR_IA32_VMX_CR0_FIXED0:
+- msr = &vmx->nested.msrs.cr0_fixed0;
+- break;
++ return &msrs->cr0_fixed0;
+ case MSR_IA32_VMX_CR4_FIXED0:
+- msr = &vmx->nested.msrs.cr4_fixed0;
+- break;
++ return &msrs->cr4_fixed0;
+ default:
+ BUG();
+ }
++}
++
++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++ const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
+
+ /*
+ * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+@@ -1375,7 +1365,7 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+ if (!is_bitwise_subset(data, *msr, -1ULL))
+ return -EINVAL;
+
+- *msr = data;
++ *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
+ return 0;
+ }
+
+@@ -1436,7 +1426,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+ vmx->nested.msrs.vmcs_enum = data;
+ return 0;
+ case MSR_IA32_VMX_VMFUNC:
+- if (data & ~vmx->nested.msrs.vmfunc_controls)
++ if (data & ~vmcs_config.nested.vmfunc_controls)
+ return -EINVAL;
+ vmx->nested.msrs.vmfunc_controls = data;
+ return 0;
+@@ -2283,7 +2273,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_ENABLE_VMFUNC |
+- SECONDARY_EXEC_TSC_SCALING |
+ SECONDARY_EXEC_DESC);
+
+ if (nested_cpu_has(vmcs12,
+@@ -2324,9 +2313,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
+ * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
+ * on the related bits (if supported by the CPU) in the hope that
+ * we can avoid VMWrites during vmx_set_efer().
++ *
++ * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is
++ * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to
++ * do the same for L2.
+ */
+ exec_control = __vm_entry_controls_get(vmcs01);
+- exec_control |= vmcs12->vm_entry_controls;
++ exec_control |= (vmcs12->vm_entry_controls &
++ ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
+ exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
+ if (cpu_has_load_ia32_efer()) {
+ if (guest_efer & EFER_LMA)
+@@ -2622,9 +2616,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
++ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
+ WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+- vmcs12->guest_ia32_perf_global_ctrl)))
++ vmcs12->guest_ia32_perf_global_ctrl))) {
++ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return -EINVAL;
++ }
+
+ kvm_rsp_write(vcpu, vmcs12->guest_rsp);
+ kvm_rip_write(vcpu, vmcs12->guest_rip);
+@@ -2865,6 +2862,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
+ return 0;
+ }
+
++static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
++ struct vmcs12 *vmcs12)
++{
++#ifdef CONFIG_X86_64
++ if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
++ !!(vcpu->arch.efer & EFER_LMA)))
++ return -EINVAL;
++#endif
++ return 0;
++}
++
+ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+ {
+@@ -2889,18 +2897,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
+ return -EINVAL;
+
+ #ifdef CONFIG_X86_64
+- ia32e = !!(vcpu->arch.efer & EFER_LMA);
++ ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
+ #else
+ ia32e = false;
+ #endif
+
+ if (ia32e) {
+- if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
+- CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
++ if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+ return -EINVAL;
+ } else {
+- if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
+- CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
++ if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+ CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
+ CC((vmcs12->host_rip) >> 32))
+ return -EINVAL;
+@@ -2985,7 +2991,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12,
+ enum vm_entry_failure_code *entry_failure_code)
+ {
+- bool ia32e;
++ bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
+
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+
+@@ -3011,6 +3017,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ vmcs12->guest_ia32_perf_global_ctrl)))
+ return -EINVAL;
+
++ if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
++ return -EINVAL;
++
++ if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
++ CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
++ return -EINVAL;
++
+ /*
+ * If the load IA32_EFER VM-entry control is 1, the following checks
+ * are performed on the field for the IA32_EFER MSR:
+@@ -3022,7 +3035,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ */
+ if (to_vmx(vcpu)->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
+- ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
+ if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
+ CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
+ CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
+@@ -3080,7 +3092,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
+ }
+
+ vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+- vmx->loaded_vmcs->launched);
++ __vmx_vcpu_run_flags(vmx));
+
+ if (vmx->msr_autoload.host.nr)
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+@@ -3360,18 +3372,19 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ };
+ u32 failed_index;
+
+- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+- kvm_vcpu_flush_tlb_current(vcpu);
++ kvm_service_local_tlb_flush_requests(vcpu);
+
+ evaluate_pending_interrupts = exec_controls_get(vmx) &
+ (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
+ if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+ evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
+
+- if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
++ if (!vmx->nested.nested_run_pending ||
++ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+ vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ if (kvm_mpx_supported() &&
+- !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
++ (!vmx->nested.nested_run_pending ||
++ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
+ vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+
+ /*
+@@ -3570,6 +3583,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
+ if (nested_vmx_check_controls(vcpu, vmcs12))
+ return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
++ if (nested_vmx_check_address_space_size(vcpu, vmcs12))
++ return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
++
+ if (nested_vmx_check_host_state(vcpu, vmcs12))
+ return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+
+@@ -3679,12 +3695,34 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+ }
+
+ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
+- struct vmcs12 *vmcs12)
++ struct vmcs12 *vmcs12,
++ u32 vm_exit_reason, u32 exit_intr_info)
+ {
+ u32 idt_vectoring;
+ unsigned int nr;
+
+- if (vcpu->arch.exception.injected) {
++ /*
++ * Per the SDM, VM-Exits due to double and triple faults are never
++ * considered to occur during event delivery, even if the double/triple
++ * fault is the result of an escalating vectoring issue.
++ *
++ * Note, the SDM qualifies the double fault behavior with "The original
++ * event results in a double-fault exception". It's unclear why the
++ * qualification exists since exits due to double fault can occur only
++ * while vectoring a different exception (injected events are never
++ * subject to interception), i.e. there's _always_ an original event.
++ *
++ * The SDM also uses NMI as a confusing example for the "original event
++ * causes the VM exit directly" clause. NMI isn't special in any way,
++ * the same rule applies to all events that cause an exit directly.
++ * NMI is an odd choice for the example because NMIs can only occur on
++ * instruction boundaries, i.e. they _can't_ occur during vectoring.
++ */
++ if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT ||
++ ((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI &&
++ is_double_fault(exit_intr_info))) {
++ vmcs12->idt_vectoring_info_field = 0;
++ } else if (vcpu->arch.exception.injected) {
+ nr = vcpu->arch.exception.nr;
+ idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+@@ -3717,6 +3755,8 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
+ idt_vectoring |= INTR_TYPE_EXT_INTR;
+
+ vmcs12->idt_vectoring_info_field = idt_vectoring;
++ } else {
++ vmcs12->idt_vectoring_info_field = 0;
+ }
+ }
+
+@@ -3792,7 +3832,16 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
+ u32 intr_info = nr | INTR_INFO_VALID_MASK;
+
+ if (vcpu->arch.exception.has_error_code) {
+- vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
++ /*
++ * Intel CPUs do not generate error codes with bits 31:16 set,
++ * and more importantly VMX disallows setting bits 31:16 in the
++ * injected error code for VM-Entry. Drop the bits to mimic
++ * hardware and avoid inducing failure on nested VM-Entry if L1
++ * chooses to inject the exception back to L2. AMD CPUs _do_
++ * generate "full" 32-bit error codes, so KVM allows userspace
++ * to inject exception error codes with bits 31:16 set.
++ */
++ vmcs12->vm_exit_intr_error_code = (u16)vcpu->arch.exception.error_code;
+ intr_info |= INTR_INFO_DELIVER_CODE_MASK;
+ }
+
+@@ -4186,12 +4235,12 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ if (to_vmx(vcpu)->exit_reason.enclave_mode)
+ vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
+ vmcs12->exit_qualification = exit_qualification;
+- vmcs12->vm_exit_intr_info = exit_intr_info;
+-
+- vmcs12->idt_vectoring_info_field = 0;
+- vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+- vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+
++ /*
++ * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched
++ * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other
++ * exit info fields are unmodified.
++ */
+ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+ vmcs12->launch_state = 1;
+
+@@ -4203,7 +4252,12 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ * Transfer the event that L0 or L1 may wanted to inject into
+ * L2 to IDT_VECTORING_INFO_FIELD.
+ */
+- vmcs12_save_pending_event(vcpu, vmcs12);
++ vmcs12_save_pending_event(vcpu, vmcs12,
++ vm_exit_reason, exit_intr_info);
++
++ vmcs12->vm_exit_intr_info = exit_intr_info;
++ vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
++ vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+
+ /*
+ * According to spec, there's no need to store the guest's
+@@ -4217,14 +4271,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ nested_vmx_abort(vcpu,
+ VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ }
+-
+- /*
+- * Drop what we picked up for L2 via vmx_complete_interrupts. It is
+- * preserved above and would only end up incorrectly in L1.
+- */
+- vcpu->arch.nmi_injected = false;
+- kvm_clear_exception_queue(vcpu);
+- kvm_clear_interrupt_queue(vcpu);
+ }
+
+ /*
+@@ -4296,7 +4342,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+ vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
+ vcpu->arch.pat = vmcs12->host_ia32_pat;
+ }
+- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
++ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
++ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
+ WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->host_ia32_perf_global_ctrl));
+
+@@ -4502,9 +4549,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
+ /* trying to cancel vmlaunch/vmresume is a bug */
+ WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
+- /* Similarly, triple faults in L2 should never escape. */
+- WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+-
+ if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+ /*
+ * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
+@@ -4515,9 +4559,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
+ (void)nested_get_evmcs_page(vcpu);
+ }
+
+- /* Service the TLB flush request for L2 before switching to L1. */
+- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+- kvm_vcpu_flush_tlb_current(vcpu);
++ /* Service pending TLB flush requests for L2 before switching to L1. */
++ kvm_service_local_tlb_flush_requests(vcpu);
+
+ /*
+ * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
+@@ -4567,8 +4610,30 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
+ WARN_ON_ONCE(nested_early_check);
+ }
+
++ /*
++ * Drop events/exceptions that were queued for re-injection to L2
++ * (picked up via vmx_complete_interrupts()), as well as exceptions
++ * that were pending for L2. Note, this must NOT be hoisted above
++ * prepare_vmcs12(), events/exceptions queued for re-injection need to
++ * be captured in vmcs12 (see vmcs12_save_pending_event()).
++ */
++ vcpu->arch.nmi_injected = false;
++ kvm_clear_exception_queue(vcpu);
++ kvm_clear_interrupt_queue(vcpu);
++
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+
++ /*
++ * If IBRS is advertised to the vCPU, KVM must flush the indirect
++ * branch predictors when transitioning from L2 to L1, as L1 expects
++ * hardware (KVM in this case) to provide separate predictor modes.
++ * Bare metal isolates VMX root (host) from VMX non-root (guest), but
++ * doesn't isolate different VMCSs, i.e. in this case, doesn't provide
++ * separate modes for L2 vs L1.
++ */
++ if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++ indirect_branch_prediction_barrier();
++
+ /* Update any VMCS fields that might have changed while L2 ran */
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+@@ -4603,6 +4668,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
+ kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+ }
+
++ if (vmx->nested.update_vmcs01_apicv_status) {
++ vmx->nested.update_vmcs01_apicv_status = false;
++ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++ }
++
+ if ((vm_exit_reason != -1) &&
+ (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
+@@ -4917,20 +4987,36 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+
+ /*
+- * The Intel VMX Instruction Reference lists a bunch of bits that are
+- * prerequisite to running VMXON, most notably cr4.VMXE must be set to
+- * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
+- * Otherwise, we should fail with #UD. But most faulting conditions
+- * have already been checked by hardware, prior to the VM-exit for
+- * VMXON. We do test guest cr4.VMXE because processor CR4 always has
+- * that bit set to 1 in non-root mode.
++ * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter
++ * the guest and so cannot rely on hardware to perform the check,
++ * which has higher priority than VM-Exit (see Intel SDM's pseudocode
++ * for VMXON).
++ *
++ * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86
++ * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't
++ * force any of the relevant guest state. For a restricted guest, KVM
++ * does force CR0.PE=1, but only to also force VM86 in order to emulate
++ * Real Mode, and so there's no need to check CR0.PE manually.
+ */
+ if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+- /* CPL=0 must be checked manually. */
++ /*
++ * The CPL is checked for "not in VMX operation" and for "in VMX root",
++ * and has higher priority than the VM-Fail due to being post-VMXON,
++ * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root,
++ * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits
++ * from L2 to L1, i.e. there's no need to check for the vCPU being in
++ * VMX non-root.
++ *
++ * Forwarding the VM-Exit unconditionally, i.e. without performing the
++ * #UD checks (see above), is functionally ok because KVM doesn't allow
++ * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's
++ * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are
++ * missed by hardware due to shadowing CR0 and/or CR4.
++ */
+ if (vmx_get_cpl(vcpu)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+@@ -4939,6 +5025,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+ if (vmx->nested.vmxon)
+ return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
+
++ /*
++ * Invalid CR0/CR4 generates #GP. These checks are performed if and
++ * only if the vCPU isn't already in VMX operation, i.e. effectively
++ * have lower priority than the VM-Fail above.
++ */
++ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
++ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
++ kvm_inject_gp(vcpu, 0);
++ return 1;
++ }
++
+ if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
+ != VMXON_NEEDED_FEATURES) {
+ kvm_inject_gp(vcpu, 0);
+@@ -6218,9 +6315,6 @@ out:
+ return kvm_state.size;
+ }
+
+-/*
+- * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+- */
+ void vmx_leave_nested(struct kvm_vcpu *vcpu)
+ {
+ if (is_guest_mode(vcpu)) {
+@@ -6589,7 +6683,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
+ SECONDARY_EXEC_ENABLE_INVPCID |
+ SECONDARY_EXEC_RDSEED_EXITING |
+ SECONDARY_EXEC_XSAVES |
+- SECONDARY_EXEC_TSC_SCALING;
++ SECONDARY_EXEC_TSC_SCALING |
++ SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
+ /*
+ * We can emulate "VMCS shadowing," even if the hardware
+@@ -6697,6 +6792,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
+ rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
+
++ if (vmx_umip_emulated())
++ msrs->cr4_fixed1 |= X86_CR4_UMIP;
++
+ msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
+ }
+
+@@ -6750,6 +6848,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
+ }
+
+ struct kvm_x86_nested_ops vmx_nested_ops = {
++ .leave_nested = vmx_leave_nested,
+ .check_events = vmx_check_nested_events,
+ .hv_timer_pending = nested_vmx_preemption_timer_pending,
+ .triple_fault = nested_vmx_triple_fault,
+diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
+index b69a80f43b37e..2d0ac8a86d4a4 100644
+--- a/arch/x86/kvm/vmx/nested.h
++++ b/arch/x86/kvm/vmx/nested.h
+@@ -280,7 +280,8 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
+
+- return fixed_bits_valid(val, fixed0, fixed1);
++ return fixed_bits_valid(val, fixed0, fixed1) &&
++ __kvm_is_valid_cr4(vcpu, val);
+ }
+
+ /* No difference in the restrictions on guest and host CR4 in VMX operation. */
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 10cc4f65c4efd..e624a39365ecb 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -68,10 +68,11 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+ reprogram_counter(pmu, bit);
+ }
+
+-static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
+- u8 event_select,
+- u8 unit_mask)
++static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
+ {
++ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
++ u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
++ u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
+@@ -103,6 +104,9 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
+ {
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
++ if (!intel_pmu_has_perf_global_ctrl(pmu))
++ return true;
++
+ return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+ }
+
+@@ -218,7 +222,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+- ret = pmu->version > 1;
++ return intel_pmu_has_perf_global_ctrl(pmu);
+ break;
+ default:
+ ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
+@@ -395,12 +399,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ struct kvm_pmc *pmc;
+ u32 msr = msr_info->index;
+ u64 data = msr_info->data;
++ u64 reserved_bits;
+
+ switch (msr) {
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ if (pmu->fixed_ctr_ctrl == data)
+ return 0;
+- if (!(data & 0xfffffffffffff444ull)) {
++ if (!(data & pmu->fixed_ctr_ctrl_mask)) {
+ reprogram_fixed_counters(pmu, data);
+ return 0;
+ }
+@@ -437,20 +442,20 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ !(msr & MSR_PMC_FULL_WIDTH_BIT))
+ data = (s64)(s32)data;
+ pmc->counter += data - pmc_read_counter(pmc);
+- if (pmc->perf_event && !pmc->is_paused)
+- perf_event_period(pmc->perf_event,
+- get_sample_period(pmc, data));
++ pmc_update_sample_period(pmc);
+ return 0;
+ } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ pmc->counter += data - pmc_read_counter(pmc);
+- if (pmc->perf_event && !pmc->is_paused)
+- perf_event_period(pmc->perf_event,
+- get_sample_period(pmc, data));
++ pmc_update_sample_period(pmc);
+ return 0;
+ } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
+ if (data == pmc->eventsel)
+ return 0;
+- if (!(data & pmu->reserved_bits)) {
++ reserved_bits = pmu->reserved_bits;
++ if ((pmc->idx == 2) &&
++ (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
++ reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
++ if (!(data & reserved_bits)) {
+ reprogram_gp_counter(pmc, data);
+ return 0;
+ }
+@@ -470,6 +475,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+ struct kvm_cpuid_entry2 *entry;
+ union cpuid10_eax eax;
+ union cpuid10_edx edx;
++ int i;
+
+ pmu->nr_arch_gp_counters = 0;
+ pmu->nr_arch_fixed_counters = 0;
+@@ -477,6 +483,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+ pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+ pmu->version = 0;
+ pmu->reserved_bits = 0xffffffff00200000ull;
++ pmu->raw_event_mask = X86_RAW_EVENT_MASK;
++ pmu->global_ctrl_mask = ~0ull;
++ pmu->global_ovf_ctrl_mask = ~0ull;
++ pmu->fixed_ctr_ctrl_mask = ~0ull;
+
+ entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+ if (!entry)
+@@ -510,6 +520,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+ ((u64)1 << edx.split.bit_width_fixed) - 1;
+ }
+
++ for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
++ pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
+ pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
+ (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
+ pmu->global_ctrl_mask = ~pmu->global_ctrl;
+@@ -523,8 +535,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+ entry = kvm_find_cpuid_entry(vcpu, 7, 0);
+ if (entry &&
+ (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
+- (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
+- pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
++ (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
++ pmu->reserved_bits ^= HSW_IN_TX;
++ pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
++ }
+
+ bitmap_set(pmu->all_valid_pmc_idx,
+ 0, pmu->nr_arch_gp_counters);
+@@ -706,7 +720,7 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
+ }
+
+ struct kvm_pmu_ops intel_pmu_ops = {
+- .find_arch_event = intel_find_arch_event,
++ .pmc_perf_hw_id = intel_pmc_perf_hw_id,
+ .find_fixed_event = intel_find_fixed_event,
+ .pmc_is_enabled = intel_pmc_is_enabled,
+ .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
+diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
+index 5f81ef092bd43..46fb83d6a286e 100644
+--- a/arch/x86/kvm/vmx/posted_intr.c
++++ b/arch/x86/kvm/vmx/posted_intr.c
+@@ -5,6 +5,7 @@
+ #include <asm/cpu.h>
+
+ #include "lapic.h"
++#include "irq.h"
+ #include "posted_intr.h"
+ #include "trace.h"
+ #include "vmx.h"
+@@ -14,7 +15,7 @@
+ * can find which vCPU should be waken up.
+ */
+ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+-static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
++static DEFINE_PER_CPU(raw_spinlock_t, blocked_vcpu_on_cpu_lock);
+
+ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+ {
+@@ -50,7 +51,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+
+ /* The full case. */
+ do {
+- old.control = new.control = pi_desc->control;
++ old.control = new.control = READ_ONCE(pi_desc->control);
+
+ dest = cpu_physical_id(cpu);
+
+@@ -77,13 +78,18 @@ after_clear_sn:
+ pi_set_on(pi_desc);
+ }
+
++static bool vmx_can_use_vtd_pi(struct kvm *kvm)
++{
++ return irqchip_in_kernel(kvm) && enable_apicv &&
++ kvm_arch_has_assigned_device(kvm) &&
++ irq_remapping_cap(IRQ_POSTING_CAP);
++}
++
+ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+ {
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+- !irq_remapping_cap(IRQ_POSTING_CAP) ||
+- !kvm_vcpu_apicv_active(vcpu))
++ if (!vmx_can_use_vtd_pi(vcpu->kvm))
+ return;
+
+ /* Set SN when the vCPU is preempted */
+@@ -98,7 +104,7 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
+ unsigned int dest;
+
+ do {
+- old.control = new.control = pi_desc->control;
++ old.control = new.control = READ_ONCE(pi_desc->control);
+ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+ "Wakeup handler not enabled while the VCPU is blocked\n");
+
+@@ -115,9 +121,9 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
+ new.control) != old.control);
+
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++ raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_del(&vcpu->blocked_vcpu_list);
+- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++ raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ vcpu->pre_pcpu = -1;
+ }
+ }
+@@ -141,24 +147,23 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
+ struct pi_desc old, new;
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+- !irq_remapping_cap(IRQ_POSTING_CAP) ||
+- !kvm_vcpu_apicv_active(vcpu))
++ if (!vmx_can_use_vtd_pi(vcpu->kvm) ||
++ vmx_interrupt_blocked(vcpu))
+ return 0;
+
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+ vcpu->pre_pcpu = vcpu->cpu;
+- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++ raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu,
+ vcpu->pre_pcpu));
+- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++ raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ }
+
+ do {
+- old.control = new.control = pi_desc->control;
++ old.control = new.control = READ_ONCE(pi_desc->control);
+
+ WARN((pi_desc->sn == 1),
+ "Warning: SN field of posted-interrupts "
+@@ -211,7 +216,7 @@ void pi_wakeup_handler(void)
+ struct kvm_vcpu *vcpu;
+ int cpu = smp_processor_id();
+
+- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
++ raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+ blocked_vcpu_list) {
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+@@ -219,13 +224,13 @@ void pi_wakeup_handler(void)
+ if (pi_test_on(pi_desc) == 1)
+ kvm_vcpu_kick(vcpu);
+ }
+- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
++ raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ }
+
+ void __init pi_init_cpu(int cpu)
+ {
+ INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+- spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
++ raw_spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ }
+
+ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
+@@ -270,9 +275,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
+ struct vcpu_data vcpu_info;
+ int idx, ret = 0;
+
+- if (!kvm_arch_has_assigned_device(kvm) ||
+- !irq_remapping_cap(IRQ_POSTING_CAP) ||
+- !kvm_vcpu_apicv_active(kvm->vcpus[0]))
++ if (!vmx_can_use_vtd_pi(kvm))
+ return 0;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
+new file mode 100644
+index 0000000000000..edc3f16cc1896
+--- /dev/null
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -0,0 +1,8 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __KVM_X86_VMX_RUN_FLAGS_H
++#define __KVM_X86_VMX_RUN_FLAGS_H
++
++#define VMX_RUN_VMRESUME (1 << 0)
++#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
++
++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
+index 6693ebdc07701..b8cf9a59c145e 100644
+--- a/arch/x86/kvm/vmx/sgx.c
++++ b/arch/x86/kvm/vmx/sgx.c
+@@ -188,8 +188,10 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
+ /* Enforce CPUID restriction on max enclave size. */
+ max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
+ sgx_12_0->edx;
+- if (size >= BIT_ULL(max_size_log2))
++ if (size >= BIT_ULL(max_size_log2)) {
+ kvm_inject_gp(vcpu, 0);
++ return 1;
++ }
+
+ /*
+ * sgx_virt_ecreate() returns:
+diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
+index 6e5de2e2b0da6..4de2a6e3b1900 100644
+--- a/arch/x86/kvm/vmx/vmcs.h
++++ b/arch/x86/kvm/vmx/vmcs.h
+@@ -104,6 +104,11 @@ static inline bool is_breakpoint(u32 intr_info)
+ return is_exception_n(intr_info, BP_VECTOR);
+ }
+
++static inline bool is_double_fault(u32 intr_info)
++{
++ return is_exception_n(intr_info, DF_VECTOR);
++}
++
+ static inline bool is_page_fault(u32 intr_info)
+ {
+ return is_exception_n(intr_info, PF_VECTOR);
+diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
+index 3a6461694fc25..982138bebb70f 100644
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -5,6 +5,7 @@
+ #include <asm/kvm_vcpu_regs.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/segment.h>
++#include "run_flags.h"
+
+ #define WORD_SIZE (BITS_PER_LONG / 8)
+
+@@ -30,73 +31,12 @@
+
+ .section .noinstr.text, "ax"
+
+-/**
+- * vmx_vmenter - VM-Enter the current loaded VMCS
+- *
+- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
+- *
+- * Returns:
+- * %RFLAGS.CF is set on VM-Fail Invalid
+- * %RFLAGS.ZF is set on VM-Fail Valid
+- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
+- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
+- * to vmx_vmexit.
+- */
+-SYM_FUNC_START_LOCAL(vmx_vmenter)
+- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
+- je 2f
+-
+-1: vmresume
+- ret
+-
+-2: vmlaunch
+- ret
+-
+-3: cmpb $0, kvm_rebooting
+- je 4f
+- ret
+-4: ud2
+-
+- _ASM_EXTABLE(1b, 3b)
+- _ASM_EXTABLE(2b, 3b)
+-
+-SYM_FUNC_END(vmx_vmenter)
+-
+-/**
+- * vmx_vmexit - Handle a VMX VM-Exit
+- *
+- * Returns:
+- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
+- *
+- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
+- * here after hardware loads the host's state, i.e. this is the destination
+- * referred to by VMCS.HOST_RIP.
+- */
+-SYM_FUNC_START(vmx_vmexit)
+-#ifdef CONFIG_RETPOLINE
+- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+- /* Preserve guest's RAX, it's used to stuff the RSB. */
+- push %_ASM_AX
+-
+- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+-
+- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+- or $1, %_ASM_AX
+-
+- pop %_ASM_AX
+-.Lvmexit_skip_rsb:
+-#endif
+- ret
+-SYM_FUNC_END(vmx_vmexit)
+-
+ /**
+ * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
++ * @vmx: struct vcpu_vmx *
+ * @regs: unsigned long * (to guest registers)
+- * @launched: %true if the VMCS has been launched
++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
++ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
+ *
+ * Returns:
+ * 0 on VM-Exit, 1 on VM-Fail
+@@ -115,24 +55,29 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ #endif
+ push %_ASM_BX
+
++ /* Save @vmx for SPEC_CTRL handling */
++ push %_ASM_ARG1
++
++ /* Save @flags for SPEC_CTRL handling */
++ push %_ASM_ARG3
++
+ /*
+ * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
+ * @regs is needed after VM-Exit to save the guest's register values.
+ */
+ push %_ASM_ARG2
+
+- /* Copy @launched to BL, _ASM_ARG3 is volatile. */
++ /* Copy @flags to BL, _ASM_ARG3 is volatile. */
+ mov %_ASM_ARG3B, %bl
+
+- /* Adjust RSP to account for the CALL to vmx_vmenter(). */
+- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
++ lea (%_ASM_SP), %_ASM_ARG2
+ call vmx_update_host_rsp
+
+ /* Load @regs to RAX. */
+ mov (%_ASM_SP), %_ASM_AX
+
+ /* Check if vmlaunch or vmresume is needed */
+- testb %bl, %bl
++ testb $VMX_RUN_VMRESUME, %bl
+
+ /* Load guest registers. Don't clobber flags. */
+ mov VCPU_RCX(%_ASM_AX), %_ASM_CX
+@@ -154,11 +99,36 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ /* Load guest RAX. This kills the @regs pointer! */
+ mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+
+- /* Enter guest mode */
+- call vmx_vmenter
++ /* Check EFLAGS.ZF from 'testb' above */
++ jz .Lvmlaunch
++
++ /*
++ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
++ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
++ * So this isn't a typical function and objtool needs to be told to
++ * save the unwind state here and restore it below.
++ */
++ UNWIND_HINT_SAVE
++
++/*
++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
++ * the 'vmx_vmexit' label below.
++ */
++.Lvmresume:
++ vmresume
++ jmp .Lvmfail
++
++.Lvmlaunch:
++ vmlaunch
++ jmp .Lvmfail
++
++ _ASM_EXTABLE(.Lvmresume, .Lfixup)
++ _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
+
+- /* Jump on VM-Fail. */
+- jbe 2f
++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
++
++ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
++ UNWIND_HINT_RESTORE
+
+ /* Temporarily save guest's RAX. */
+ push %_ASM_AX
+@@ -185,21 +155,23 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ mov %r15, VCPU_R15(%_ASM_AX)
+ #endif
+
+- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
+- xor %eax, %eax
++ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
++ xor %ebx, %ebx
+
++.Lclear_regs:
+ /*
+- * Clear all general purpose registers except RSP and RAX to prevent
++ * Clear all general purpose registers except RSP and RBX to prevent
+ * speculative use of the guest's values, even those that are reloaded
+ * via the stack. In theory, an L1 cache miss when restoring registers
+ * could lead to speculative execution with the guest's values.
+ * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
+ * free. RSP and RAX are exempt as RSP is restored by hardware during
+- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
++ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
++ * value.
+ */
+-1: xor %ecx, %ecx
++ xor %eax, %eax
++ xor %ecx, %ecx
+ xor %edx, %edx
+- xor %ebx, %ebx
+ xor %ebp, %ebp
+ xor %esi, %esi
+ xor %edi, %edi
+@@ -216,8 +188,32 @@ SYM_FUNC_START(__vmx_vcpu_run)
+
+ /* "POP" @regs. */
+ add $WORD_SIZE, %_ASM_SP
+- pop %_ASM_BX
+
++ /*
++ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
++ * the first unbalanced RET after vmexit!
++ *
++ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
++ * entries and (in some cases) RSB underflow.
++ *
++ * eIBRS has its own protection against poisoned RSB, so it doesn't
++ * need the RSB filling sequence. But it does need to be enabled, and a
++ * single call to retire, before the first unbalanced RET.
++ */
++
++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
++ X86_FEATURE_RSB_VMEXIT_LITE
++
++
++ pop %_ASM_ARG2 /* @flags */
++ pop %_ASM_ARG1 /* @vmx */
++
++ call vmx_spec_ctrl_restore_host
++
++ /* Put return value in AX */
++ mov %_ASM_BX, %_ASM_AX
++
++ pop %_ASM_BX
+ #ifdef CONFIG_X86_64
+ pop %r12
+ pop %r13
+@@ -228,11 +224,17 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ pop %edi
+ #endif
+ pop %_ASM_BP
+- ret
++ RET
++
++.Lfixup:
++ cmpb $0, kvm_rebooting
++ jne .Lvmfail
++ ud2
++.Lvmfail:
++ /* VM-Fail: set return value to 1 */
++ mov $1, %_ASM_BX
++ jmp .Lclear_regs
+
+- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
+-2: mov $1, %eax
+- jmp 1b
+ SYM_FUNC_END(__vmx_vcpu_run)
+
+
+@@ -293,7 +295,7 @@ SYM_FUNC_START(vmread_error_trampoline)
+ pop %_ASM_AX
+ pop %_ASM_BP
+
+- ret
++ RET
+ SYM_FUNC_END(vmread_error_trampoline)
+
+ SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
+@@ -326,5 +328,5 @@ SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
+ */
+ mov %_ASM_BP, %_ASM_SP
+ pop %_ASM_BP
+- ret
++ RET
+ SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 7d595effb66f0..89744ee06101a 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -226,6 +226,9 @@ static const struct {
+ #define L1D_CACHE_ORDER 4
+ static void *vmx_l1d_flush_pages;
+
++/* Control for disabling CPU Fill buffer clear */
++static bool __read_mostly vmx_fb_clear_ctrl_available;
++
+ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
+ {
+ struct page *page;
+@@ -357,6 +360,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
+ return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
+ }
+
++static void vmx_setup_fb_clear_ctrl(void)
++{
++ u64 msr;
++
++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
++ !boot_cpu_has_bug(X86_BUG_MDS) &&
++ !boot_cpu_has_bug(X86_BUG_TAA)) {
++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
++ if (msr & ARCH_CAP_FB_CLEAR_CTRL)
++ vmx_fb_clear_ctrl_available = true;
++ }
++}
++
++static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
++{
++ u64 msr;
++
++ if (!vmx->disable_fb_clear)
++ return;
++
++ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
++ msr |= FB_CLEAR_DIS;
++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
++ /* Cache the MSR value to avoid reading it later */
++ vmx->msr_ia32_mcu_opt_ctrl = msr;
++}
++
++static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
++{
++ if (!vmx->disable_fb_clear)
++ return;
++
++ vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
++}
++
++static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
++{
++ vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
++
++ /*
++ * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
++ * at VMEntry. Skip the MSR read/write when a guest has no use case to
++ * execute VERW.
++ */
++ if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) ||
++ ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) &&
++ (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) &&
++ (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) &&
++ (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) &&
++ (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO)))
++ vmx->disable_fb_clear = false;
++}
++
+ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
+ .set = vmentry_l1d_flush_set,
+ .get = vmentry_l1d_flush_get,
+@@ -769,24 +826,30 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
+ /*
+ * Check if MSR is intercepted for currently loaded MSR bitmap.
+ */
+-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
++static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
+ {
+- unsigned long *msr_bitmap;
+- int f = sizeof(unsigned long);
+-
+- if (!cpu_has_vmx_msr_bitmap())
++ if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
+ return true;
+
+- msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
++ return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr);
++}
+
+- if (msr <= 0x1fff) {
+- return !!test_bit(msr, msr_bitmap + 0x800 / f);
+- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+- msr &= 0x1fff;
+- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+- }
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
++{
++ unsigned int flags = 0;
+
+- return true;
++ if (vmx->loaded_vmcs->launched)
++ flags |= VMX_RUN_VMRESUME;
++
++ /*
++ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
++ * to change it directly without causing a vmexit. In that case read
++ * it after vmexit and store it in vmx->spec_ctrl.
++ */
++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
++ flags |= VMX_RUN_SAVE_SPEC_CTRL;
++
++ return flags;
+ }
+
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+@@ -1269,8 +1332,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
+
+ /*
+ * No indirect branch prediction barrier needed when switching
+- * the active VMCS within a guest, e.g. on nested VM-Enter.
+- * The L1 VMM can protect itself with retpolines, IBPB or IBRS.
++ * the active VMCS within a vCPU, unless IBRS is advertised to
++ * the vCPU. To minimize the number of IBPBs executed, KVM
++ * performs IBPB on nested VM-Exit (a single nested transition
++ * may switch the active VMCS multiple times).
+ */
+ if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
+ indirect_branch_prediction_barrier();
+@@ -1351,6 +1416,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long old_rflags;
+
++ /*
++ * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
++ * is an unrestricted guest in order to mark L2 as needing emulation
++ * if L1 runs L2 as a restricted guest.
++ */
+ if (is_unrestricted_guest(vcpu)) {
+ kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
+ vmx->rflags = rflags;
+@@ -1370,6 +1440,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+ vmx->emulation_required = vmx_emulation_required(vcpu);
+ }
+
++static bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
++{
++ return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
++}
++
+ u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
+ {
+ u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+@@ -1608,7 +1683,17 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
+ kvm_deliver_exception_payload(vcpu);
+
+ if (has_error_code) {
+- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
++ /*
++ * Despite the error code being architecturally defined as 32
++ * bits, and the VMCS field being 32 bits, Intel CPUs and thus
++ * VMX don't actually supporting setting bits 31:16. Hardware
++ * will (should) never provide a bogus error code, but AMD CPUs
++ * do generate error codes with bits 31:16 set, and so KVM's
++ * ABI lets userspace shove in arbitrary 32-bit values. Drop
++ * the upper bits to avoid VM-Fail, losing information that
++ * does't really exist is preferable to killing the VM.
++ */
++ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)error_code);
+ intr_info |= INTR_INFO_DELIVER_CODE_MASK;
+ }
+
+@@ -2234,6 +2319,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ ret = kvm_set_msr_common(vcpu, msr_info);
+ }
+
++ /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */
++ if (msr_index == MSR_IA32_ARCH_CAPABILITIES)
++ vmx_update_fb_clear_dis(vcpu, vmx);
++
+ return ret;
+ }
+
+@@ -2655,15 +2744,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+ if (!loaded_vmcs->msr_bitmap)
+ goto out_vmcs;
+ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+-
+- if (IS_ENABLED(CONFIG_HYPERV) &&
+- static_branch_unlikely(&enable_evmcs) &&
+- (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
+- struct hv_enlightened_vmcs *evmcs =
+- (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
+-
+- evmcs->hv_enlightenments_control.msr_bitmap = 1;
+- }
+ }
+
+ memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+@@ -2814,6 +2894,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
+
++ /*
++ * KVM should never use VM86 to virtualize Real Mode when L2 is active,
++ * as using VM86 is unnecessary if unrestricted guest is enabled, and
++ * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
++ * should VM-Fail and KVM should reject userspace attempts to stuff
++ * CR0.PG=0 when L2 is active.
++ */
++ WARN_ON_ONCE(is_guest_mode(vcpu));
++
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
+@@ -2927,6 +3016,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
+ }
+ }
+
++static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
++{
++ if (is_guest_mode(vcpu))
++ return nested_get_vpid02(vcpu);
++ return to_vmx(vcpu)->vpid;
++}
++
+ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
+ {
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
+@@ -2939,31 +3035,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
+ if (enable_ept)
+ ept_sync_context(construct_eptp(vcpu, root_hpa,
+ mmu->shadow_root_level));
+- else if (!is_guest_mode(vcpu))
+- vpid_sync_context(to_vmx(vcpu)->vpid);
+ else
+- vpid_sync_context(nested_get_vpid02(vcpu));
++ vpid_sync_context(vmx_get_current_vpid(vcpu));
+ }
+
+ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+ {
+ /*
+- * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
++ * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
+ * vmx_flush_tlb_guest() for an explanation of why this is ok.
+ */
+- vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
++ vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
+ }
+
+ static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
+ {
+ /*
+- * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
+- * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit
+- * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
++ * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
++ * vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are
++ * required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+ * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
+ * i.e. no explicit INVVPID is necessary.
+ */
+- vpid_sync_context(to_vmx(vcpu)->vpid);
++ vpid_sync_context(vmx_get_current_vpid(vcpu));
+ }
+
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
+@@ -2999,6 +3093,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
+ #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
+ CPU_BASED_CR3_STORE_EXITING)
+
++static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++ if (is_guest_mode(vcpu))
++ return nested_guest_cr0_valid(vcpu, cr0);
++
++ if (to_vmx(vcpu)->nested.vmxon)
++ return nested_host_cr0_valid(vcpu, cr0);
++
++ return true;
++}
++
+ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+@@ -3008,7 +3113,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
+
+ hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
+- if (is_unrestricted_guest(vcpu))
++ if (enable_unrestricted_guest)
+ hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
+ else {
+ hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
+@@ -3036,7 +3141,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ }
+ #endif
+
+- if (enable_ept && !is_unrestricted_guest(vcpu)) {
++ if (enable_ept && !enable_unrestricted_guest) {
+ /*
+ * Ensure KVM has an up-to-date snapshot of the guest's CR3. If
+ * the below code _enables_ CR3 exiting, vmx_cache_reg() will
+@@ -3135,8 +3240,8 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+ /*
+ * We operate under the default treatment of SMM, so VMX cannot be
+- * enabled under SMM. Note, whether or not VMXE is allowed at all is
+- * handled by kvm_is_valid_cr4().
++ * enabled under SMM. Note, whether or not VMXE is allowed at all,
++ * i.e. is a reserved bit, is handled by common x86 code.
+ */
+ if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
+ return false;
+@@ -3159,7 +3264,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ unsigned long hw_cr4;
+
+ hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
+- if (is_unrestricted_guest(vcpu))
++ if (enable_unrestricted_guest)
+ hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
+ else if (vmx->rmode.vm86_active)
+ hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
+@@ -3179,7 +3284,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ vcpu->arch.cr4 = cr4;
+ kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
+
+- if (!is_unrestricted_guest(vcpu)) {
++ if (!enable_unrestricted_guest) {
+ if (enable_ept) {
+ if (!is_paging(vcpu)) {
+ hw_cr4 &= ~X86_CR4_PAE;
+@@ -3274,18 +3379,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
+ {
+ u32 ar;
+
+- if (var->unusable || !var->present)
+- ar = 1 << 16;
+- else {
+- ar = var->type & 15;
+- ar |= (var->s & 1) << 4;
+- ar |= (var->dpl & 3) << 5;
+- ar |= (var->present & 1) << 7;
+- ar |= (var->avl & 1) << 12;
+- ar |= (var->l & 1) << 13;
+- ar |= (var->db & 1) << 14;
+- ar |= (var->g & 1) << 15;
+- }
++ ar = var->type & 15;
++ ar |= (var->s & 1) << 4;
++ ar |= (var->dpl & 3) << 5;
++ ar |= (var->present & 1) << 7;
++ ar |= (var->avl & 1) << 12;
++ ar |= (var->l & 1) << 13;
++ ar |= (var->db & 1) << 14;
++ ar |= (var->g & 1) << 15;
++ ar |= (var->unusable || !var->present) << 16;
+
+ return ar;
+ }
+@@ -3695,44 +3797,20 @@ void free_vpid(int vpid)
+ spin_unlock(&vmx_vpid_lock);
+ }
+
+-static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+-{
+- int f = sizeof(unsigned long);
+-
+- if (msr <= 0x1fff)
+- __clear_bit(msr, msr_bitmap + 0x000 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+-}
+-
+-static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
++static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
+ {
+- int f = sizeof(unsigned long);
+-
+- if (msr <= 0x1fff)
+- __clear_bit(msr, msr_bitmap + 0x800 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
+-}
+-
+-static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+-{
+- int f = sizeof(unsigned long);
+-
+- if (msr <= 0x1fff)
+- __set_bit(msr, msr_bitmap + 0x000 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+-}
+-
+-static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
+-{
+- int f = sizeof(unsigned long);
++ /*
++ * When KVM is a nested hypervisor on top of Hyper-V and uses
++ * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR
++ * bitmap has changed.
++ */
++ if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs)) {
++ struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
+
+- if (msr <= 0x1fff)
+- __set_bit(msr, msr_bitmap + 0x800 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
++ if (evmcs->hv_enlightenments_control.msr_bitmap)
++ evmcs->hv_clean_fields &=
++ ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
++ }
+ }
+
+ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
+@@ -3743,8 +3821,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+- if (static_branch_unlikely(&enable_evmcs))
+- evmcs_touch_msr_bitmap();
++ vmx_msr_bitmap_l01_changed(vmx);
+
+ /*
+ * Mark the desired intercept state in shadow bitmap, this is needed
+@@ -3788,8 +3865,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+- if (static_branch_unlikely(&enable_evmcs))
+- evmcs_touch_msr_bitmap();
++ vmx_msr_bitmap_l01_changed(vmx);
+
+ /*
+ * Mark the desired intercept state in shadow bitmap, this is needed
+@@ -4012,8 +4088,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+ if (pi_test_and_set_on(&vmx->pi_desc))
+ return 0;
+
+- if (vcpu != kvm_get_running_vcpu() &&
+- !kvm_vcpu_trigger_posted_interrupt(vcpu, false))
++ if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+@@ -4140,6 +4215,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
++ if (is_guest_mode(vcpu)) {
++ vmx->nested.update_vmcs01_apicv_status = true;
++ return;
++ }
++
+ pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
+ if (cpu_has_secondary_exec_ctrls()) {
+ if (kvm_vcpu_apicv_active(vcpu))
+@@ -4487,6 +4567,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+
+ vpid_sync_context(vmx->vpid);
++
++ vmx_update_fb_clear_dis(vcpu, vmx);
+ }
+
+ static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
+@@ -4833,8 +4915,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
+ dr6 = vmx_get_exit_qual(vcpu);
+ if (!(vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
++ /*
++ * If the #DB was due to ICEBP, a.k.a. INT1, skip the
++ * instruction. ICEBP generates a trap-like #DB, but
++ * despite its interception control being tied to #DB,
++ * is an instruction intercept, i.e. the VM-Exit occurs
++ * on the ICEBP itself. Note, skipping ICEBP also
++ * clears STI and MOVSS blocking.
++ *
++ * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
++ * if single-step is enabled in RFLAGS and STI or MOVSS
++ * blocking is active, as the CPU doesn't set the bit
++ * on VM-Exit due to #DB interception. VM-Entry has a
++ * consistency check that a single-step #DB is pending
++ * in this scenario as the previous instruction cannot
++ * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
++ * don't modify RFLAGS), therefore the one instruction
++ * delay when activating single-step breakpoints must
++ * have already expired. Note, the CPU sets/clears BS
++ * as appropriate for all other VM-Exits types.
++ */
+ if (is_icebp(intr_info))
+ WARN_ON(!skip_emulated_instruction(vcpu));
++ else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
++ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
++ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
++ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
++ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
+
+ kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
+ return 1;
+@@ -4940,18 +5047,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
+ val = (val & ~vmcs12->cr0_guest_host_mask) |
+ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
+
+- if (!nested_guest_cr0_valid(vcpu, val))
+- return 1;
+-
+ if (kvm_set_cr0(vcpu, val))
+ return 1;
+ vmcs_writel(CR0_READ_SHADOW, orig_val);
+ return 0;
+ } else {
+- if (to_vmx(vcpu)->nested.vmxon &&
+- !nested_host_cr0_valid(vcpu, val))
+- return 1;
+-
+ return kvm_set_cr0(vcpu, val);
+ }
+ }
+@@ -5907,18 +6007,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
+ vmx_flush_pml_buffer(vcpu);
+
+ /*
+- * We should never reach this point with a pending nested VM-Enter, and
+- * more specifically emulation of L2 due to invalid guest state (see
+- * below) should never happen as that means we incorrectly allowed a
+- * nested VM-Enter with an invalid vmcs12.
++ * KVM should never reach this point with a pending nested VM-Enter.
++ * More specifically, short-circuiting VM-Entry to emulate L2 due to
++ * invalid guest state should never happen as that means KVM knowingly
++ * allowed a nested VM-Enter with an invalid vmcs12. More below.
+ */
+ if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
+ return -EIO;
+
+- /* If guest state is invalid, start emulating */
+- if (vmx->emulation_required)
+- return handle_invalid_guest_state(vcpu);
+-
+ if (is_guest_mode(vcpu)) {
+ /*
+ * PML is never enabled when running L2, bail immediately if a
+@@ -5940,10 +6036,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
+ */
+ nested_mark_vmcs12_pages_dirty(vcpu);
+
++ /*
++ * Synthesize a triple fault if L2 state is invalid. In normal
++ * operation, nested VM-Enter rejects any attempt to enter L2
++ * with invalid state. However, those checks are skipped if
++ * state is being stuffed via RSM or KVM_SET_NESTED_STATE. If
++ * L2 state is invalid, it means either L1 modified SMRAM state
++ * or userspace provided bad state. Synthesize TRIPLE_FAULT as
++ * doing so is architecturally allowed in the RSM case, and is
++ * the least awful solution for the userspace case without
++ * risking false positives.
++ */
++ if (vmx->emulation_required) {
++ nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
++ return 1;
++ }
++
+ if (nested_vmx_reflect_vmexit(vcpu))
+ return 1;
+ }
+
++ /* If guest state is invalid, start emulating. L2 is handled above. */
++ if (vmx->emulation_required)
++ return handle_invalid_guest_state(vcpu);
++
+ if (exit_reason.failed_vmentry) {
+ dump_vmcs(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+@@ -6288,9 +6404,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int max_irr;
+- bool max_irr_updated;
++ bool got_posted_interrupt;
+
+- if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
++ if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
+ return -EIO;
+
+ if (pi_test_on(&vmx->pi_desc)) {
+@@ -6300,22 +6416,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+ * But on x86 this is just a compiler barrier anyway.
+ */
+ smp_mb__after_atomic();
+- max_irr_updated =
++ got_posted_interrupt =
+ kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
+-
+- /*
+- * If we are running L2 and L1 has a new pending interrupt
+- * which can be injected, this may cause a vmexit or it may
+- * be injected into L2. Either way, this interrupt will be
+- * processed via KVM_REQ_EVENT, not RVI, because we do not use
+- * virtual interrupt delivery to inject L1 interrupts into L2.
+- */
+- if (is_guest_mode(vcpu) && max_irr_updated)
+- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ } else {
+ max_irr = kvm_lapic_find_highest_irr(vcpu);
++ got_posted_interrupt = false;
+ }
+- vmx_hwapic_irr_update(vcpu, max_irr);
++
++ /*
++ * Newly recognized interrupts are injected via either virtual interrupt
++ * delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is
++ * disabled in two cases:
++ *
++ * 1) If L2 is running and the vCPU has a new pending interrupt. If L1
++ * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
++ * VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected
++ * into L2, but KVM doesn't use virtual interrupt delivery to inject
++ * interrupts into L2, and so KVM_REQ_EVENT is again needed.
++ *
++ * 2) If APICv is disabled for this vCPU, assigned devices may still
++ * attempt to post interrupts. The posted interrupt vector will cause
++ * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
++ */
++ if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
++ vmx_set_rvi(max_irr);
++ else if (got_posted_interrupt)
++ kvm_make_request(KVM_REQ_EVENT, vcpu);
++
+ return max_irr;
+ }
+
+@@ -6375,6 +6502,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
+ return;
+
+ handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
++ vcpu->arch.at_instruction_boundary = true;
+ }
+
+ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+@@ -6576,6 +6704,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
+ }
+ }
+
++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
++ unsigned int flags)
++{
++ u64 hostval = this_cpu_read(x86_spec_ctrl_current);
++
++ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
++ return;
++
++ if (flags & VMX_RUN_SAVE_SPEC_CTRL)
++ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
++
++ /*
++ * If the guest/host SPEC_CTRL values differ, restore the host value.
++ *
++ * For legacy IBRS, the IBRS bit always needs to be written after
++ * transitioning from a less privileged predictor mode, regardless of
++ * whether the guest/host values differ.
++ */
++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
++ vmx->spec_ctrl != hostval)
++ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
++
++ barrier_nospec();
++}
++
+ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+ {
+ switch (to_vmx(vcpu)->exit_reason.basic) {
+@@ -6589,7 +6742,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+ }
+
+ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+- struct vcpu_vmx *vmx)
++ struct vcpu_vmx *vmx,
++ unsigned long flags)
+ {
+ kvm_guest_enter_irqoff();
+
+@@ -6598,15 +6752,22 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+ vmx_l1d_flush(vcpu);
+ else if (static_branch_unlikely(&mds_user_clear))
+ mds_clear_cpu_buffers();
++ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
++ kvm_arch_has_assigned_device(vcpu->kvm))
++ mds_clear_cpu_buffers();
++
++ vmx_disable_fb_clear(vmx);
+
+ if (vcpu->arch.cr2 != native_read_cr2())
+ native_write_cr2(vcpu->arch.cr2);
+
+ vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+- vmx->loaded_vmcs->launched);
++ flags);
+
+ vcpu->arch.cr2 = native_read_cr2();
+
++ vmx_enable_fb_clear(vmx);
++
+ kvm_guest_exit_irqoff();
+ }
+
+@@ -6626,9 +6787,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ * consistency check VM-Exit due to invalid guest state and bail.
+ */
+ if (unlikely(vmx->emulation_required)) {
+-
+- /* We don't emulate invalid state of a nested guest */
+- vmx->fail = is_guest_mode(vcpu);
++ vmx->fail = 0;
+
+ vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
+ vmx->exit_reason.failed_vmentry = 1;
+@@ -6703,27 +6862,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
+
+ /* The actual VMENTER/EXIT is in the .noinstr.text section. */
+- vmx_vcpu_enter_exit(vcpu, vmx);
+-
+- /*
+- * We do not use IBRS in the kernel. If this vCPU has used the
+- * SPEC_CTRL MSR it may have left it on; save the value and
+- * turn it off. This is much more efficient than blindly adding
+- * it to the atomic save/restore list. Especially as the former
+- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+- *
+- * For non-nested case:
+- * If the L01 MSR bitmap does not intercept the MSR, then we need to
+- * save it.
+- *
+- * For nested case:
+- * If the L02 MSR bitmap does not intercept the MSR, then we need to
+- * save it.
+- */
+- if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+-
+- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
++ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
+
+ /* All fields are clean at this point */
+ if (static_branch_unlikely(&enable_evmcs)) {
+@@ -6853,6 +6992,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+ if (err < 0)
+ goto free_pml;
+
++ /*
++ * Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a
++ * nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the
++ * feature only for vmcs01, KVM currently isn't equipped to realize any
++ * performance benefits from enabling it for vmcs02.
++ */
++ if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
++ (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
++ struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
++
++ evmcs->hv_enlightenments_control.msr_bitmap = 1;
++ }
++
+ /* The MSR bitmap starts with all ones */
+ bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+ bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+@@ -7359,6 +7511,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
+ /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ break;
+
++ case x86_intercept_pause:
++ /*
++ * PAUSE is a single-byte NOP with a REPE prefix, i.e. collides
++ * with vanilla NOPs in the emulator. Apply the interception
++ * check only to actual PAUSE instructions. Don't check
++ * PAUSE-loop-exiting, software can't expect a given PAUSE to
++ * exit, i.e. KVM is within its rights to allow L2 to execute
++ * the PAUSE.
++ */
++ if ((info->rep_prefix != REPE_PREFIX) ||
++ !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING))
++ return X86EMUL_CONTINUE;
++
++ break;
++
+ /* TODO: check more intercepts... */
+ default:
+ break;
+@@ -7463,17 +7630,11 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
+ if (pi_pre_block(vcpu))
+ return 1;
+
+- if (kvm_lapic_hv_timer_in_use(vcpu))
+- kvm_lapic_switch_to_sw_timer(vcpu);
+-
+ return 0;
+ }
+
+ static void vmx_post_block(struct kvm_vcpu *vcpu)
+ {
+- if (kvm_x86_ops.set_hv_timer)
+- kvm_lapic_switch_to_hv_timer(vcpu);
+-
+ pi_post_block(vcpu);
+ }
+
+@@ -7524,6 +7685,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+ if (ret)
+ return ret;
+
++ vmx->nested.nested_run_pending = 1;
+ vmx->nested.smm.guest_mode = false;
+ }
+ return 0;
+@@ -7551,6 +7713,8 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
+
+ static void hardware_unsetup(void)
+ {
++ kvm_set_posted_intr_wakeup_handler(NULL);
++
+ if (nested)
+ nested_vmx_hardware_unsetup();
+
+@@ -7593,6 +7757,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
+ .set_segment = vmx_set_segment,
+ .get_cpl = vmx_get_cpl,
+ .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
++ .is_valid_cr0 = vmx_is_valid_cr0,
+ .set_cr0 = vmx_set_cr0,
+ .is_valid_cr4 = vmx_is_valid_cr4,
+ .set_cr4 = vmx_set_cr4,
+@@ -7606,6 +7771,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
+ .cache_reg = vmx_cache_reg,
+ .get_rflags = vmx_get_rflags,
+ .set_rflags = vmx_set_rflags,
++ .get_if_flag = vmx_get_if_flag,
+
+ .tlb_flush_all = vmx_flush_tlb_all,
+ .tlb_flush_current = vmx_flush_tlb_current,
+@@ -7781,6 +7947,11 @@ static __init int hardware_setup(void)
+ if (!cpu_has_virtual_nmis())
+ enable_vnmi = 0;
+
++#ifdef CONFIG_X86_SGX_KVM
++ if (!cpu_has_vmx_encls_vmexit())
++ enable_sgx = false;
++#endif
++
+ /*
+ * set_apic_access_page_addr() is used to reload apic access
+ * page upon invalidation. No need to do anything if not
+@@ -7809,10 +7980,10 @@ static __init int hardware_setup(void)
+ ple_window_shrink = 0;
+ }
+
+- if (!cpu_has_vmx_apicv()) {
++ if (!cpu_has_vmx_apicv())
+ enable_apicv = 0;
++ if (!enable_apicv)
+ vmx_x86_ops.sync_pir_to_irr = NULL;
+- }
+
+ if (cpu_has_vmx_tsc_scaling()) {
+ kvm_has_tsc_control = true;
+@@ -7879,8 +8050,6 @@ static __init int hardware_setup(void)
+ vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
+ }
+
+- kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
+-
+ kvm_mce_cap_supported |= MCG_LMCE_P;
+
+ if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
+@@ -7904,6 +8073,9 @@ static __init int hardware_setup(void)
+ r = alloc_kvm_area();
+ if (r)
+ nested_vmx_hardware_unsetup();
++
++ kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
++
+ return r;
+ }
+
+@@ -7912,6 +8084,7 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = {
+ .disabled_by_bios = vmx_disabled_by_bios,
+ .check_processor_compatibility = vmx_check_processor_compat,
+ .hardware_setup = hardware_setup,
++ .intel_pt_intr_in_guest = vmx_pt_mode_is_host_guest,
+
+ .runtime_ops = &vmx_x86_ops,
+ };
+@@ -8020,6 +8193,8 @@ static int __init vmx_init(void)
+ return r;
+ }
+
++ vmx_setup_fb_clear_ctrl();
++
+ for_each_possible_cpu(cpu) {
+ INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
+diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
+index 592217fd7d920..20f1213a93685 100644
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -13,6 +13,7 @@
+ #include "vmcs.h"
+ #include "vmx_ops.h"
+ #include "cpuid.h"
++#include "run_flags.h"
+
+ #define MSR_TYPE_R 1
+ #define MSR_TYPE_W 2
+@@ -91,6 +92,18 @@ union vmx_exit_reason {
+ u32 full;
+ };
+
++static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
++{
++ /*
++ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
++ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
++ * greater than zero. However, KVM only exposes and emulates the MSR
++ * to/for the guest if the guest PMU supports at least "Architectural
++ * Performance Monitoring Version 2".
++ */
++ return pmu->version > 1;
++}
++
+ #define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
+ #define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
+
+@@ -164,6 +177,7 @@ struct nested_vmx {
+ bool change_vmcs01_virtual_apic_mode;
+ bool reload_vmcs01_apic_access_page;
+ bool update_vmcs01_cpu_dirty_logging;
++ bool update_vmcs01_apicv_status;
+
+ /*
+ * Enlightened VMCS has been enabled. It does not mean that L1 has to
+@@ -325,6 +339,8 @@ struct vcpu_vmx {
+ u64 msr_ia32_feature_control_valid_bits;
+ /* SGX Launch Control public key hash */
+ u64 msr_ia32_sgxlepubkeyhash[4];
++ u64 msr_ia32_mcu_opt_ctrl;
++ bool disable_fb_clear;
+
+ struct pt_desc pt_desc;
+ struct lbr_desc lbr_desc;
+@@ -379,7 +395,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
+ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
+ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
++ unsigned int flags);
+ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
+ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
+
+@@ -400,6 +419,69 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
+
+ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+
++static inline bool vmx_test_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ return test_bit(msr, msr_bitmap + 0x000 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ return test_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
++ return true;
++}
++
++static inline bool vmx_test_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ return test_bit(msr, msr_bitmap + 0x800 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ return test_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
++ return true;
++}
++
++static inline void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __clear_bit(msr, msr_bitmap + 0x000 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
++}
++
++static inline void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __clear_bit(msr, msr_bitmap + 0x800 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
++}
++
++static inline void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __set_bit(msr, msr_bitmap + 0x000 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
++}
++
++static inline void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __set_bit(msr, msr_bitmap + 0x800 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
++}
++
++
+ static inline u8 vmx_get_rvi(void)
+ {
+ return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index bfe0de3008a60..a26200c3e82b5 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -184,6 +184,10 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
+ int __read_mostly pi_inject_timer = -1;
+ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+
++/* Enable/disable SMT_RSB bug mitigation */
++bool __read_mostly mitigate_smt_rsb;
++module_param(mitigate_smt_rsb, bool, 0444);
++
+ /*
+ * Restoring the host value for MSRs that are only consumed when running in
+ * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
+@@ -277,6 +281,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ STATS_DESC_COUNTER(VCPU, nested_run),
+ STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
+ STATS_DESC_COUNTER(VCPU, directed_yield_successful),
++ STATS_DESC_COUNTER(VCPU, preemption_reported),
++ STATS_DESC_COUNTER(VCPU, preemption_other),
+ STATS_DESC_ICOUNTER(VCPU, guest_mode)
+ };
+
+@@ -523,6 +529,7 @@ static int exception_class(int vector)
+ #define EXCPT_TRAP 1
+ #define EXCPT_ABORT 2
+ #define EXCPT_INTERRUPT 3
++#define EXCPT_DB 4
+
+ static int exception_type(int vector)
+ {
+@@ -533,8 +540,14 @@ static int exception_type(int vector)
+
+ mask = 1 << vector;
+
+- /* #DB is trap, as instruction watchpoints are handled elsewhere */
+- if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
++ /*
++ * #DBs can be trap-like or fault-like, the caller must check other CPU
++ * state, e.g. DR6, to determine whether a #DB is a trap or fault.
++ */
++ if (mask & (1 << DB_VECTOR))
++ return EXCPT_DB;
++
++ if (mask & ((1 << BP_VECTOR) | (1 << OF_VECTOR)))
+ return EXCPT_TRAP;
+
+ if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
+@@ -599,6 +612,12 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
+ }
+ EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+
++/* Forcibly leave the nested mode in cases like a vCPU reset */
++static void kvm_leave_nested(struct kvm_vcpu *vcpu)
++{
++ kvm_x86_ops.nested_ops->leave_nested(vcpu);
++}
++
+ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+ unsigned nr, bool has_error, u32 error_code,
+ bool has_payload, unsigned long payload, bool reinject)
+@@ -848,6 +867,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
+
+ memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
++ kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
+ vcpu->arch.pdptrs_from_userspace = false;
+
+ out:
+@@ -856,6 +876,22 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(load_pdptrs);
+
++static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++#ifdef CONFIG_X86_64
++ if (cr0 & 0xffffffff00000000UL)
++ return false;
++#endif
++
++ if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
++ return false;
++
++ if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
++ return false;
++
++ return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
++}
++
+ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
+ {
+ if ((cr0 ^ old_cr0) & X86_CR0_PG) {
+@@ -878,20 +914,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ unsigned long old_cr0 = kvm_read_cr0(vcpu);
+ unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
+
+- cr0 |= X86_CR0_ET;
+-
+-#ifdef CONFIG_X86_64
+- if (cr0 & 0xffffffff00000000UL)
++ if (!kvm_is_valid_cr0(vcpu, cr0))
+ return 1;
+-#endif
+
+- cr0 &= ~CR0_RESERVED_BITS;
+-
+- if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
+- return 1;
++ cr0 |= X86_CR0_ET;
+
+- if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
+- return 1;
++ /* Write to CR0 reserved bits are ignored, even on Intel. */
++ cr0 &= ~CR0_RESERVED_BITS;
+
+ #ifdef CONFIG_X86_64
+ if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
+@@ -1018,6 +1047,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+
+ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
+ {
++ /* Note, #UD due to CR4.OSXSAVE=0 has priority over the intercept. */
+ if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
+ __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
+ kvm_inject_gp(vcpu, 0);
+@@ -1028,7 +1058,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
+ }
+ EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
+
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+ if (cr4 & cr4_reserved_bits)
+ return false;
+@@ -1036,9 +1066,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
+ return false;
+
+- return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
++ return true;
++}
++EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
++
++static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++ return __kvm_is_valid_cr4(vcpu, cr4) &&
++ static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
+ }
+-EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
+
+ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
+ {
+@@ -1091,6 +1127,18 @@ static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
+ unsigned long roots_to_free = 0;
+ int i;
+
++ /*
++ * MOV CR3 and INVPCID are usually not intercepted when using TDP, but
++ * this is reachable when running EPT=1 and unrestricted_guest=0, and
++ * also via the emulator. KVM's TDP page tables are not in the scope of
++ * the invalidation, but the guest's TLB entries need to be flushed as
++ * the CPU may have cached entries in its TLB for the target PCID.
++ */
++ if (unlikely(tdp_enabled)) {
++ kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
++ return;
++ }
++
+ /*
+ * If neither the current CR3 nor any of the prev_roots use the given
+ * PCID, then nothing needs to be done here because a resync will
+@@ -1311,27 +1359,17 @@ static const u32 msrs_to_save_all[] = {
+ MSR_IA32_UMWAIT_CONTROL,
+
+ MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
+- MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
++ MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
+ MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
+ MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+ MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
+ MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
+ MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
+ MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
+- MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
+- MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
+- MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
+- MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
+- MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
+ MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
+ MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
+ MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
+ MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
+- MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
+- MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
+- MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
+- MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
+- MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+
+ MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
+ MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
+@@ -1435,7 +1473,7 @@ static const u32 msr_based_features_all[] = {
+ MSR_IA32_VMX_EPT_VPID_CAP,
+ MSR_IA32_VMX_VMFUNC,
+
+- MSR_F10H_DECFG,
++ MSR_AMD64_DE_CFG,
+ MSR_IA32_UCODE_REV,
+ MSR_IA32_ARCH_CAPABILITIES,
+ MSR_IA32_PERF_CAPABILITIES,
+@@ -1444,12 +1482,32 @@ static const u32 msr_based_features_all[] = {
+ static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
+ static unsigned int num_msr_based_features;
+
++/*
++ * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
++ * does not yet virtualize. These include:
++ * 10 - MISC_PACKAGE_CTRLS
++ * 11 - ENERGY_FILTERING_CTL
++ * 12 - DOITM
++ * 18 - FB_CLEAR_CTRL
++ * 21 - XAPIC_DISABLE_STATUS
++ * 23 - OVERCLOCKING_STATUS
++ */
++
++#define KVM_SUPPORTED_ARCH_CAP \
++ (ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \
++ ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
++ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
++ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
++ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
++
+ static u64 kvm_get_arch_capabilities(void)
+ {
+ u64 data = 0;
+
+- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
++ data &= KVM_SUPPORTED_ARCH_CAP;
++ }
+
+ /*
+ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
+@@ -1497,6 +1555,9 @@ static u64 kvm_get_arch_capabilities(void)
+ */
+ }
+
++ if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
++ data |= ARCH_CAP_GDS_NO;
++
+ return data;
+ }
+
+@@ -1592,8 +1653,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ return r;
+ }
+
+- /* Update reserved bits */
+- if ((efer ^ old_efer) & EFER_NX)
++ if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS)
+ kvm_mmu_reset_context(vcpu);
+
+ return 0;
+@@ -3079,17 +3139,20 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ /* only 0 or all 1s can be written to IA32_MCi_CTL
+ * some Linux kernels though clear bit 10 in bank 4 to
+ * workaround a BIOS/GART TBL issue on AMD K8s, ignore
+- * this to avoid an uncatched #GP in the guest
++ * this to avoid an uncatched #GP in the guest.
++ *
++ * UNIXWARE clears bit 0 of MC1_CTL to ignore
++ * correctable, single-bit ECC data errors.
+ */
+ if ((offset & 0x3) == 0 &&
+- data != 0 && (data | (1 << 10)) != ~(u64)0)
+- return -1;
++ data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
++ return 1;
+
+ /* MCi_STATUS */
+ if (!msr_info->host_initiated &&
+ (offset & 0x3) == 1 && data != 0) {
+ if (!can_set_mci_status(vcpu))
+- return -1;
++ return 1;
+ }
+
+ vcpu->arch.mce_banks[offset] = data;
+@@ -3193,10 +3256,37 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
+ static_call(kvm_x86_tlb_flush_guest)(vcpu);
+ }
+
++
++static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
++{
++ ++vcpu->stat.tlb_flush;
++ static_call(kvm_x86_tlb_flush_current)(vcpu);
++}
++
++/*
++ * Service "local" TLB flush requests, which are specific to the current MMU
++ * context. In addition to the generic event handling in vcpu_enter_guest(),
++ * TLB flushes that are targeted at an MMU context also need to be serviced
++ * prior before nested VM-Enter/VM-Exit.
++ */
++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
++{
++ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
++ kvm_vcpu_flush_tlb_current(vcpu);
++
++ if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
++ kvm_vcpu_flush_tlb_guest(vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
++
+ static void record_steal_time(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_host_map map;
+- struct kvm_steal_time *st;
++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
++ struct kvm_steal_time __user *st;
++ struct kvm_memslots *slots;
++ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
++ u64 steal;
++ u32 version;
+
+ if (kvm_xen_msr_enabled(vcpu->kvm)) {
+ kvm_xen_runstate_set_running(vcpu);
+@@ -3206,47 +3296,85 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ return;
+
+- /* -EAGAIN is returned in atomic context so we can just return. */
+- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+- &map, &vcpu->arch.st.cache, false))
++ if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
+ return;
+
+- st = map.hva +
+- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++ slots = kvm_memslots(vcpu->kvm);
++
++ if (unlikely(slots->generation != ghc->generation ||
++ gpa != ghc->gpa ||
++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
++ /* We rely on the fact that it fits in a single page. */
++ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
++
++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)
++ return;
++ }
+
++ st = (struct kvm_steal_time __user *)ghc->hva;
+ /*
+ * Doing a TLB flush here, on the guest's behalf, can avoid
+ * expensive IPIs.
+ */
+ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+- u8 st_preempted = xchg(&st->preempted, 0);
++ u8 st_preempted = 0;
++ int err = -EFAULT;
++
++ if (!user_access_begin(st, sizeof(*st)))
++ return;
++
++ asm volatile("1: xchgb %0, %2\n"
++ "xor %1, %1\n"
++ "2:\n"
++ _ASM_EXTABLE_UA(1b, 2b)
++ : "+q" (st_preempted),
++ "+&r" (err),
++ "+m" (st->preempted));
++ if (err)
++ goto out;
++
++ user_access_end();
++
++ vcpu->arch.st.preempted = 0;
+
+ trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+ st_preempted & KVM_VCPU_FLUSH_TLB);
+ if (st_preempted & KVM_VCPU_FLUSH_TLB)
+ kvm_vcpu_flush_tlb_guest(vcpu);
++
++ if (!user_access_begin(st, sizeof(*st)))
++ goto dirty;
+ } else {
+- st->preempted = 0;
+- }
++ if (!user_access_begin(st, sizeof(*st)))
++ return;
+
+- vcpu->arch.st.preempted = 0;
++ unsafe_put_user(0, &st->preempted, out);
++ vcpu->arch.st.preempted = 0;
++ }
+
+- if (st->version & 1)
+- st->version += 1; /* first time write, random junk */
++ unsafe_get_user(version, &st->version, out);
++ if (version & 1)
++ version += 1; /* first time write, random junk */
+
+- st->version += 1;
++ version += 1;
++ unsafe_put_user(version, &st->version, out);
+
+ smp_wmb();
+
+- st->steal += current->sched_info.run_delay -
++ unsafe_get_user(steal, &st->steal, out);
++ steal += current->sched_info.run_delay -
+ vcpu->arch.st.last_steal;
+ vcpu->arch.st.last_steal = current->sched_info.run_delay;
++ unsafe_put_user(steal, &st->steal, out);
+
+- smp_wmb();
+-
+- st->version += 1;
++ version += 1;
++ unsafe_put_user(version, &st->version, out);
+
+- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
++ out:
++ user_access_end();
++ dirty:
++ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+ }
+
+ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+@@ -3282,7 +3410,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+
+ if (!msr_info->host_initiated)
+ return 1;
+- if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
++ if (kvm_get_msr_feature(&msr_ent))
+ return 1;
+ if (data & ~msr_ent.data)
+ return 1;
+@@ -3376,6 +3504,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ if (data & ~supported_xss)
+ return 1;
+ vcpu->arch.ia32_xss = data;
++ kvm_update_cpuid_runtime(vcpu);
+ break;
+ case MSR_SMI_COUNT:
+ if (!msr_info->host_initiated)
+@@ -4051,10 +4180,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+ r = KVM_CLOCK_TSC_STABLE;
+ break;
+ case KVM_CAP_X86_DISABLE_EXITS:
+- r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
+- KVM_X86_DISABLE_EXITS_CSTATE;
+- if(kvm_can_mwait_in_guest())
+- r |= KVM_X86_DISABLE_EXITS_MWAIT;
++ r = KVM_X86_DISABLE_EXITS_PAUSE;
++
++ if (!mitigate_smt_rsb) {
++ r |= KVM_X86_DISABLE_EXITS_HLT |
++ KVM_X86_DISABLE_EXITS_CSTATE;
++
++ if (kvm_can_mwait_in_guest())
++ r |= KVM_X86_DISABLE_EXITS_MWAIT;
++ }
+ break;
+ case KVM_CAP_X86_SMM:
+ /* SMBASE is usually relocated above 1M on modern chipsets,
+@@ -4285,44 +4419,70 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+
+ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_host_map map;
+- struct kvm_steal_time *st;
++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
++ struct kvm_steal_time __user *st;
++ struct kvm_memslots *slots;
++ static const u8 preempted = KVM_VCPU_PREEMPTED;
++ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
++
++ /*
++ * The vCPU can be marked preempted if and only if the VM-Exit was on
++ * an instruction boundary and will not trigger guest emulation of any
++ * kind (see vcpu_run). Vendor specific code controls (conservatively)
++ * when this is true, for example allowing the vCPU to be marked
++ * preempted if and only if the VM-Exit was due to a host interrupt.
++ */
++ if (!vcpu->arch.at_instruction_boundary) {
++ vcpu->stat.preemption_other++;
++ return;
++ }
+
++ vcpu->stat.preemption_reported++;
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ return;
+
+ if (vcpu->arch.st.preempted)
+ return;
+
+- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
+- &vcpu->arch.st.cache, true))
++ /* This happens on process exit */
++ if (unlikely(current->mm != vcpu->kvm->mm))
++ return;
++
++ slots = kvm_memslots(vcpu->kvm);
++
++ if (unlikely(slots->generation != ghc->generation ||
++ gpa != ghc->gpa ||
++ kvm_is_error_hva(ghc->hva) || !ghc->memslot))
+ return;
+
+- st = map.hva +
+- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++ st = (struct kvm_steal_time __user *)ghc->hva;
++ BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
+
+- st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
++ if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
++ vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+
+- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
++ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+ }
+
+ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+ {
+ int idx;
+
+- if (vcpu->preempted && !vcpu->arch.guest_state_protected)
+- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
++ if (vcpu->preempted) {
++ if (!vcpu->arch.guest_state_protected)
++ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+
+- /*
+- * Take the srcu lock as memslots will be accessed to check the gfn
+- * cache generation against the memslots generation.
+- */
+- idx = srcu_read_lock(&vcpu->kvm->srcu);
+- if (kvm_xen_msr_enabled(vcpu->kvm))
+- kvm_xen_runstate_set_preempted(vcpu);
+- else
+- kvm_steal_time_set_preempted(vcpu);
+- srcu_read_unlock(&vcpu->kvm->srcu, idx);
++ /*
++ * Take the srcu lock as memslots will be accessed to check the gfn
++ * cache generation against the memslots generation.
++ */
++ idx = srcu_read_lock(&vcpu->kvm->srcu);
++ if (kvm_xen_msr_enabled(vcpu->kvm))
++ kvm_xen_runstate_set_preempted(vcpu);
++ else
++ kvm_steal_time_set_preempted(vcpu);
++ srcu_read_unlock(&vcpu->kvm->srcu, idx);
++ }
+
+ static_call(kvm_x86_vcpu_put)(vcpu);
+ vcpu->arch.last_host_tsc = rdtsc();
+@@ -4331,8 +4491,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
+ struct kvm_lapic_state *s)
+ {
+- if (vcpu->arch.apicv_active)
+- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+
+ return kvm_apic_get_state(vcpu, s);
+ }
+@@ -4642,8 +4801,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
+ vcpu->arch.apic->sipi_vector = events->sipi_vector;
+
+ if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+- if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
++ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
++ kvm_leave_nested(vcpu);
+ kvm_smm_changed(vcpu, events->smi.smm);
++ }
+
+ vcpu->arch.smi_pending = events->smi.pending;
+
+@@ -4672,12 +4833,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
+ {
+ unsigned long val;
+
++ memset(dbgregs, 0, sizeof(*dbgregs));
+ memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
+ kvm_get_dr(vcpu, 6, &val);
+ dbgregs->dr6 = val;
+ dbgregs->dr7 = vcpu->arch.dr7;
+- dbgregs->flags = 0;
+- memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
+ }
+
+ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+@@ -5606,15 +5766,26 @@ split_irqchip_unlock:
+ if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
+ break;
+
+- if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+- kvm_can_mwait_in_guest())
+- kvm->arch.mwait_in_guest = true;
+- if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
+- kvm->arch.hlt_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
+ kvm->arch.pause_in_guest = true;
+- if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
+- kvm->arch.cstate_in_guest = true;
++
++#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \
++ "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests."
++
++ if (!mitigate_smt_rsb) {
++ if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() &&
++ (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
++ pr_warn_once(SMT_RSB_MSG);
++
++ if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
++ kvm_can_mwait_in_guest())
++ kvm->arch.mwait_in_guest = true;
++ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
++ kvm->arch.hlt_in_guest = true;
++ if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
++ kvm->arch.cstate_in_guest = true;
++ }
++
+ r = 0;
+ break;
+ case KVM_CAP_MSR_PLATFORM_INFO:
+@@ -5626,6 +5797,11 @@ split_irqchip_unlock:
+ r = 0;
+ break;
+ case KVM_CAP_X86_USER_SPACE_MSR:
++ r = -EINVAL;
++ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
++ KVM_MSR_EXIT_REASON_UNKNOWN |
++ KVM_MSR_EXIT_REASON_FILTER))
++ break;
+ kvm->arch.user_space_msr_mask = cap->args[0];
+ r = 0;
+ break;
+@@ -5746,23 +5922,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
+ return 0;
+ }
+
+-static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
++static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
++ struct kvm_msr_filter *filter)
+ {
+- struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_x86_msr_filter *new_filter, *old_filter;
+- struct kvm_msr_filter filter;
+ bool default_allow;
+ bool empty = true;
+ int r = 0;
+ u32 i;
+
+- if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+- return -EFAULT;
++ if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
++ return -EINVAL;
+
+- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
+- empty &= !filter.ranges[i].nmsrs;
++ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++)
++ empty &= !filter->ranges[i].nmsrs;
+
+- default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
++ default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY);
+ if (empty && !default_allow)
+ return -EINVAL;
+
+@@ -5770,8 +5945,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+ if (!new_filter)
+ return -ENOMEM;
+
+- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
+- r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
++ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) {
++ r = kvm_add_msr_filter(new_filter, &filter->ranges[i]);
+ if (r) {
+ kvm_free_msr_filter(new_filter);
+ return r;
+@@ -5794,6 +5969,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+ return 0;
+ }
+
++#ifdef CONFIG_KVM_COMPAT
++/* for KVM_X86_SET_MSR_FILTER */
++struct kvm_msr_filter_range_compat {
++ __u32 flags;
++ __u32 nmsrs;
++ __u32 base;
++ __u32 bitmap;
++};
++
++struct kvm_msr_filter_compat {
++ __u32 flags;
++ struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES];
++};
++
++#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat)
++
++long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
++ unsigned long arg)
++{
++ void __user *argp = (void __user *)arg;
++ struct kvm *kvm = filp->private_data;
++ long r = -ENOTTY;
++
++ switch (ioctl) {
++ case KVM_X86_SET_MSR_FILTER_COMPAT: {
++ struct kvm_msr_filter __user *user_msr_filter = argp;
++ struct kvm_msr_filter_compat filter_compat;
++ struct kvm_msr_filter filter;
++ int i;
++
++ if (copy_from_user(&filter_compat, user_msr_filter,
++ sizeof(filter_compat)))
++ return -EFAULT;
++
++ filter.flags = filter_compat.flags;
++ for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
++ struct kvm_msr_filter_range_compat *cr;
++
++ cr = &filter_compat.ranges[i];
++ filter.ranges[i] = (struct kvm_msr_filter_range) {
++ .flags = cr->flags,
++ .nmsrs = cr->nmsrs,
++ .base = cr->base,
++ .bitmap = (__u8 *)(ulong)cr->bitmap,
++ };
++ }
++
++ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
++ break;
++ }
++ }
++
++ return r;
++}
++#endif
++
+ #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+ static int kvm_arch_suspend_notifier(struct kvm *kvm)
+ {
+@@ -6168,9 +6399,16 @@ set_pit2_out:
+ case KVM_SET_PMU_EVENT_FILTER:
+ r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
+ break;
+- case KVM_X86_SET_MSR_FILTER:
+- r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
++ case KVM_X86_SET_MSR_FILTER: {
++ struct kvm_msr_filter __user *user_msr_filter = argp;
++ struct kvm_msr_filter filter;
++
++ if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
++ return -EFAULT;
++
++ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
+ break;
++ }
+ default:
+ r = -ENOTTY;
+ }
+@@ -6238,12 +6476,12 @@ static void kvm_init_msr_list(void)
+ intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
+ continue;
+ break;
+- case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
++ case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 7:
+ if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+ min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+ continue;
+ break;
+- case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
++ case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 7:
+ if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+ min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+ continue;
+@@ -6803,15 +7041,8 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+ exception, &write_emultor);
+ }
+
+-#define CMPXCHG_TYPE(t, ptr, old, new) \
+- (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
+-
+-#ifdef CONFIG_X86_64
+-# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
+-#else
+-# define CMPXCHG64(ptr, old, new) \
+- (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
+-#endif
++#define emulator_try_cmpxchg_user(t, ptr, old, new) \
++ (__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t))
+
+ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr,
+@@ -6820,12 +7051,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
+ unsigned int bytes,
+ struct x86_exception *exception)
+ {
+- struct kvm_host_map map;
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ u64 page_line_mask;
++ unsigned long hva;
+ gpa_t gpa;
+- char *kaddr;
+- bool exchanged;
++ int r;
+
+ /* guests cmpxchg8b have to be emulated atomically */
+ if (bytes > 8 || (bytes & (bytes - 1)))
+@@ -6849,31 +7079,32 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
+ if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
+ goto emul_write;
+
+- if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
++ hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa));
++ if (kvm_is_error_hva(hva))
+ goto emul_write;
+
+- kaddr = map.hva + offset_in_page(gpa);
++ hva += offset_in_page(gpa);
+
+ switch (bytes) {
+ case 1:
+- exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u8, hva, old, new);
+ break;
+ case 2:
+- exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u16, hva, old, new);
+ break;
+ case 4:
+- exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u32, hva, old, new);
+ break;
+ case 8:
+- exchanged = CMPXCHG64(kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u64, hva, old, new);
+ break;
+ default:
+ BUG();
+ }
+
+- kvm_vcpu_unmap(vcpu, &map, true);
+-
+- if (!exchanged)
++ if (r < 0)
++ goto emul_write;
++ if (r)
+ return X86EMUL_CMPXCHG_FAILED;
+
+ kvm_page_track_write(vcpu, gpa, new, bytes);
+@@ -6948,7 +7179,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, void *val, unsigned int count)
+ {
+ if (vcpu->arch.pio.count) {
+- /* Complete previous iteration. */
++ /*
++ * Complete a previous iteration that required userspace I/O.
++ * Note, @count isn't guaranteed to match pio.count as userspace
++ * can modify ECX before rerunning the vCPU. Ignore any such
++ * shenanigans as KVM doesn't support modifying the rep count,
++ * and the emulator ensures @count doesn't overflow the buffer.
++ */
+ } else {
+ int r = __emulator_pio_in(vcpu, size, port, count);
+ if (!r)
+@@ -6957,7 +7194,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ /* Results already available, fall through. */
+ }
+
+- WARN_ON(count != vcpu->arch.pio.count);
+ complete_emulator_pio_in(vcpu, val);
+ return 1;
+ }
+@@ -7300,6 +7536,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
+ return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
+ }
+
++static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt)
++{
++ return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
++}
++
+ static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
+ {
+ return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
+@@ -7382,6 +7623,7 @@ static const struct x86_emulate_ops emulate_ops = {
+ .guest_has_long_mode = emulator_guest_has_long_mode,
+ .guest_has_movbe = emulator_guest_has_movbe,
+ .guest_has_fxsr = emulator_guest_has_fxsr,
++ .guest_has_rdpid = emulator_guest_has_rdpid,
+ .set_nmi_mask = emulator_set_nmi_mask,
+ .get_hflags = emulator_get_hflags,
+ .exiting_smm = emulator_exiting_smm,
+@@ -7747,7 +7989,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+ }
+ EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+
+-static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
++static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r)
+ {
+ if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
+ (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
+@@ -7816,25 +8058,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
+ }
+
+ /*
+- * Decode to be emulated instruction. Return EMULATION_OK if success.
++ * Decode an instruction for emulation. The caller is responsible for handling
++ * code breakpoints. Note, manually detecting code breakpoints is unnecessary
++ * (and wrong) when emulating on an intercepted fault-like exception[*], as
++ * code breakpoints have higher priority and thus have already been done by
++ * hardware.
++ *
++ * [*] Except #MC, which is higher priority, but KVM should never emulate in
++ * response to a machine check.
+ */
+ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
+ void *insn, int insn_len)
+ {
+- int r = EMULATION_OK;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
++ int r;
+
+ init_emulate_ctxt(vcpu);
+
+- /*
+- * We will reenter on the same instruction since we do not set
+- * complete_userspace_io. This does not handle watchpoints yet,
+- * those would be handled in the emulate_ops.
+- */
+- if (!(emulation_type & EMULTYPE_SKIP) &&
+- kvm_vcpu_check_breakpoint(vcpu, &r))
+- return r;
+-
+ r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
+
+ trace_kvm_emulate_insn_start(vcpu);
+@@ -7867,6 +8107,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ if (!(emulation_type & EMULTYPE_NO_DECODE)) {
+ kvm_clear_exception_queue(vcpu);
+
++ /*
++ * Return immediately if RIP hits a code breakpoint, such #DBs
++ * are fault-like and are higher priority than any faults on
++ * the code fetch itself.
++ */
++ if (!(emulation_type & EMULTYPE_SKIP) &&
++ kvm_vcpu_check_code_breakpoint(vcpu, &r))
++ return r;
++
+ r = x86_decode_emulated_instruction(vcpu, emulation_type,
+ insn, insn_len);
+ if (r != EMULATION_OK) {
+@@ -7879,7 +8128,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ write_fault_to_spt,
+ emulation_type))
+ return 1;
+- if (ctxt->have_exception) {
++
++ if (ctxt->have_exception &&
++ !(emulation_type & EMULTYPE_SKIP)) {
+ /*
+ * #UD should result in just EMULATION_FAILED, and trap-like
+ * exception should not be encountered during decode.
+@@ -7905,7 +8156,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ * updating interruptibility state and injecting single-step #DBs.
+ */
+ if (emulation_type & EMULTYPE_SKIP) {
+- kvm_rip_write(vcpu, ctxt->_eip);
++ if (ctxt->mode != X86EMUL_MODE_PROT64)
++ ctxt->eip = (u32)ctxt->_eip;
++ else
++ ctxt->eip = ctxt->_eip;
++
++ kvm_rip_write(vcpu, ctxt->eip);
+ if (ctxt->eflags & X86_EFLAGS_RF)
+ kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
+ return 1;
+@@ -7969,6 +8225,9 @@ restart:
+ writeback = false;
+ r = 0;
+ vcpu->arch.complete_userspace_io = complete_emulated_mmio;
++ } else if (vcpu->arch.complete_userspace_io) {
++ writeback = false;
++ r = 0;
+ } else if (r == EMULATION_RESTART)
+ goto restart;
+ else
+@@ -7978,6 +8237,12 @@ restart:
+ unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
+ toggle_interruptibility(vcpu, ctxt->interruptibility);
+ vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
++
++ /*
++ * Note, EXCPT_DB is assumed to be fault-like as the emulator
++ * only supports code breakpoints and general detect #DB, both
++ * of which are fault-like.
++ */
+ if (!ctxt->have_exception ||
+ exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
+ kvm_rip_write(vcpu, ctxt->eip);
+@@ -8340,7 +8605,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
+ .is_in_guest = kvm_is_in_guest,
+ .is_user_mode = kvm_is_user_mode,
+ .get_guest_ip = kvm_get_guest_ip,
+- .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
++ .handle_intel_pt_intr = NULL,
+ };
+
+ #ifdef CONFIG_X86_64
+@@ -8455,14 +8720,12 @@ int kvm_arch_init(void *opaque)
+ }
+ kvm_nr_uret_msrs = 0;
+
+- r = kvm_mmu_module_init();
++ r = kvm_mmu_vendor_module_init();
+ if (r)
+ goto out_free_percpu;
+
+ kvm_timer_init();
+
+- perf_register_guest_info_callbacks(&kvm_guest_cbs);
+-
+ if (boot_cpu_has(X86_FEATURE_XSAVE)) {
+ host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
+@@ -8496,7 +8759,6 @@ void kvm_arch_exit(void)
+ clear_hv_tscchange_cb();
+ #endif
+ kvm_lapic_exit();
+- perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
+
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
+@@ -8508,7 +8770,7 @@ void kvm_arch_exit(void)
+ cancel_work_sync(&pvclock_gtod_work);
+ #endif
+ kvm_x86_ops.hardware_enable = NULL;
+- kvm_mmu_module_exit();
++ kvm_mmu_vendor_module_exit();
+ free_percpu(user_return_msrs);
+ kmem_cache_destroy(x86_emulator_cache);
+ kmem_cache_destroy(x86_fpu_cache);
+@@ -8567,6 +8829,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
+ if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
+ return -KVM_EOPNOTSUPP;
+
++ /*
++ * When tsc is in permanent catchup mode guests won't be able to use
++ * pvclock_read_retry loop to get consistent view of pvclock
++ */
++ if (vcpu->arch.tsc_always_catchup)
++ return -KVM_EOPNOTSUPP;
++
+ if (!kvm_get_walltime_and_clockread(&ts, &cycle))
+ return -KVM_EOPNOTSUPP;
+
+@@ -8592,15 +8861,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
+ */
+ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
+ {
+- struct kvm_lapic_irq lapic_irq;
+-
+- lapic_irq.shorthand = APIC_DEST_NOSHORT;
+- lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
+- lapic_irq.level = 0;
+- lapic_irq.dest_id = apicid;
+- lapic_irq.msi_redir_hint = false;
++ /*
++ * All other fields are unused for APIC_DM_REMRD, but may be consumed by
++ * common code, e.g. for tracing. Defer initialization to the compiler.
++ */
++ struct kvm_lapic_irq lapic_irq = {
++ .delivery_mode = APIC_DM_REMRD,
++ .dest_mode = APIC_DEST_PHYSICAL,
++ .shorthand = APIC_DEST_NOSHORT,
++ .dest_id = apicid,
++ };
+
+- lapic_irq.delivery_mode = APIC_DM_REMRD;
+ kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
+ }
+
+@@ -8686,7 +8957,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
+
+ trace_kvm_hypercall(nr, a0, a1, a2, a3);
+
+- op_64_bit = is_64_bit_mode(vcpu);
++ op_64_bit = is_64_bit_hypercall(vcpu);
+ if (!op_64_bit) {
+ nr &= 0xFFFFFFFF;
+ a0 &= 0xFFFFFFFF;
+@@ -8790,14 +9061,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
+ {
+ struct kvm_run *kvm_run = vcpu->run;
+
+- /*
+- * if_flag is obsolete and useless, so do not bother
+- * setting it for SEV-ES guests. Userspace can just
+- * use kvm_run->ready_for_interrupt_injection.
+- */
+- kvm_run->if_flag = !vcpu->arch.guest_state_protected
+- && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+-
++ kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
+ kvm_run->cr8 = kvm_get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
+
+@@ -8855,6 +9119,11 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
+
+ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
+ {
++ trace_kvm_inj_exception(vcpu->arch.exception.nr,
++ vcpu->arch.exception.has_error_code,
++ vcpu->arch.exception.error_code,
++ vcpu->arch.exception.injected);
++
+ if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
+ vcpu->arch.exception.error_code = false;
+ static_call(kvm_x86_queue_exception)(vcpu);
+@@ -8912,13 +9181,16 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
+
+ /* try to inject new event if pending */
+ if (vcpu->arch.exception.pending) {
+- trace_kvm_inj_exception(vcpu->arch.exception.nr,
+- vcpu->arch.exception.has_error_code,
+- vcpu->arch.exception.error_code);
+-
+- vcpu->arch.exception.pending = false;
+- vcpu->arch.exception.injected = true;
+-
++ /*
++ * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS
++ * value pushed on the stack. Trap-like exception and all #DBs
++ * leave RF as-is (KVM follows Intel's behavior in this regard;
++ * AMD states that code breakpoint #DBs excplitly clear RF=0).
++ *
++ * Note, most versions of Intel's SDM and AMD's APM incorrectly
++ * describe the behavior of General Detect #DBs, which are
++ * fault-like. They do _not_ set RF, a la code breakpoints.
++ */
+ if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+ __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+ X86_EFLAGS_RF);
+@@ -8932,6 +9204,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
+ }
+
+ kvm_inject_exception(vcpu);
++
++ vcpu->arch.exception.pending = false;
++ vcpu->arch.exception.injected = true;
++
+ can_inject = false;
+ }
+
+@@ -9359,8 +9635,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
+ if (irqchip_split(vcpu->kvm))
+ kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
+ else {
+- if (vcpu->arch.apicv_active)
+- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+ if (ioapic_in_kernel(vcpu->kvm))
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+ }
+@@ -9378,12 +9653,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+ if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+ return;
+
+- if (to_hv_vcpu(vcpu))
++ if (to_hv_vcpu(vcpu)) {
+ bitmap_or((ulong *)eoi_exit_bitmap,
+ vcpu->arch.ioapic_handled_vectors,
+ to_hv_synic(vcpu)->vec_bitmap, 256);
++ static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
++ return;
++ }
+
+- static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
++ static_call(kvm_x86_load_eoi_exitmap)(
++ vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
+ }
+
+ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+@@ -9400,6 +9679,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+ }
+
++void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
++{
++ static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
++}
++
+ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+ {
+ if (!lapic_in_kernel(vcpu))
+@@ -9475,10 +9759,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+ /* Flushing all ASIDs flushes the current ASID... */
+ kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+- if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+- kvm_vcpu_flush_tlb_current(vcpu);
+- if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
+- kvm_vcpu_flush_tlb_guest(vcpu);
++ kvm_service_local_tlb_flush_requests(vcpu);
+
+ if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
+@@ -9629,10 +9910,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+
+ /*
+ * This handles the case where a posted interrupt was
+- * notified with kvm_vcpu_kick.
++ * notified with kvm_vcpu_kick. Assigned devices can
++ * use the POSTED_INTR_VECTOR even if APICv is disabled,
++ * so do it even if APICv is disabled on this vCPU.
+ */
+- if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++ if (kvm_lapic_enabled(vcpu))
++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+
+ if (kvm_vcpu_exit_request(vcpu)) {
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+@@ -9668,13 +9951,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+ if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
+ break;
+
+- if (vcpu->arch.apicv_active)
+- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
++ if (kvm_lapic_enabled(vcpu))
++ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+
+ if (unlikely(kvm_vcpu_exit_request(vcpu))) {
+ exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
+ break;
+ }
++
++ /* Note, VM-Exits that go down the "slow" path are accounted below. */
++ ++vcpu->stat.exits;
+ }
+
+ /*
+@@ -9772,12 +10058,28 @@ out:
+
+ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+ {
++ bool hv_timer;
++
+ if (!kvm_arch_vcpu_runnable(vcpu) &&
+ (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
++ /*
++ * Switch to the software timer before halt-polling/blocking as
++ * the guest's timer may be a break event for the vCPU, and the
++ * hypervisor timer runs only when the CPU is in guest mode.
++ * Switch before halt-polling so that KVM recognizes an expired
++ * timer before blocking.
++ */
++ hv_timer = kvm_lapic_hv_timer_in_use(vcpu);
++ if (hv_timer)
++ kvm_lapic_switch_to_sw_timer(vcpu);
++
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_block(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
++ if (hv_timer)
++ kvm_lapic_switch_to_hv_timer(vcpu);
++
+ if (kvm_x86_ops.post_block)
+ static_call(kvm_x86_post_block)(vcpu);
+
+@@ -9823,6 +10125,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
+ vcpu->arch.l1tf_flush_l1d = true;
+
+ for (;;) {
++ /*
++ * If another guest vCPU requests a PV TLB flush in the middle
++ * of instruction emulation, the rest of the emulation could
++ * use a stale page translation. Assume that any code after
++ * this point can start executing an instruction.
++ */
++ vcpu->arch.at_instruction_boundary = false;
+ if (kvm_vcpu_running(vcpu)) {
+ r = vcpu_enter_guest(vcpu);
+ } else {
+@@ -10009,6 +10318,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ r = -EINTR;
+ goto out;
+ }
++ /*
++ * It should be impossible for the hypervisor timer to be in
++ * use before KVM has ever run the vCPU.
++ */
++ WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
+ kvm_vcpu_block(vcpu);
+ if (kvm_apic_accept_events(vcpu) < 0) {
+ r = 0;
+@@ -10341,7 +10655,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+ return false;
+ }
+
+- return kvm_is_valid_cr4(vcpu, sregs->cr4);
++ return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
++ kvm_is_valid_cr0(vcpu, sregs->cr0);
+ }
+
+ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
+@@ -10707,8 +11022,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+ if (r < 0)
+ goto fail_mmu_destroy;
+- if (kvm_apicv_activated(vcpu->kvm))
++
++ /*
++ * Defer evaluating inhibits until the vCPU is first run, as
++ * this vCPU will not get notified of any changes until this
++ * vCPU is visible to other vCPUs (marked online and added to
++ * the set of vCPUs). Opportunistically mark APICv active as
++ * VMX in particularly is highly unlikely to have inhibits.
++ * Ignore the current per-VM APICv state so that vCPU creation
++ * is guaranteed to run with a deterministic value, the request
++ * will ensure the vCPU gets the correct state before VM-Entry.
++ */
++ if (enable_apicv) {
+ vcpu->arch.apicv_active = true;
++ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++ }
+ } else
+ static_branch_inc(&kvm_has_noapic_vcpu);
+
+@@ -10817,11 +11145,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+
+ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+ {
+- struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
+ int idx;
+
+- kvm_release_pfn(cache->pfn, cache->dirty, cache);
+-
+ kvmclock_reset(vcpu);
+
+ static_call(kvm_x86_vcpu_free)(vcpu);
+@@ -10850,8 +11175,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ unsigned long new_cr0;
+ u32 eax, dummy;
+
++ /*
++ * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's
++ * possible to INIT the vCPU while L2 is active. Force the vCPU back
++ * into L1 as EFER.SVME is cleared on INIT (along with all other EFER
++ * bits), i.e. virtualization is disabled.
++ */
++ if (is_guest_mode(vcpu))
++ kvm_leave_nested(vcpu);
++
+ kvm_lapic_reset(vcpu, init_event);
+
++ WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
+ vcpu->arch.hflags = 0;
+
+ vcpu->arch.smi_pending = 0;
+@@ -10908,7 +11243,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+
+ vcpu->arch.msr_misc_features_enables = 0;
+
+- vcpu->arch.xcr0 = XFEATURE_MASK_FP;
++ __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
++ __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
+ }
+
+ memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+@@ -10927,8 +11263,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ eax = 0x600;
+ kvm_rdx_write(vcpu, eax);
+
+- vcpu->arch.ia32_xss = 0;
+-
+ static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
+
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+@@ -11104,6 +11438,10 @@ int kvm_arch_hardware_setup(void *opaque)
+ memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
+ kvm_ops_static_call_update();
+
++ if (ops->intel_pt_intr_in_guest && ops->intel_pt_intr_in_guest())
++ kvm_guest_cbs.handle_intel_pt_intr = kvm_handle_intel_pt_intr;
++ perf_register_guest_info_callbacks(&kvm_guest_cbs);
++
+ if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
+ supported_xss = 0;
+
+@@ -11131,6 +11469,9 @@ int kvm_arch_hardware_setup(void *opaque)
+
+ void kvm_arch_hardware_unsetup(void)
+ {
++ perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
++ kvm_guest_cbs.handle_intel_pt_intr = NULL;
++
+ static_call(kvm_x86_hardware_unsetup)();
+ }
+
+@@ -11420,7 +11761,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
+ if (slot->arch.rmap[i])
+ continue;
+
+- slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
++ slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
+ if (!slot->arch.rmap[i]) {
+ memslot_rmap_free(slot);
+ return -ENOMEM;
+@@ -11501,7 +11842,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
+
+ lpages = __kvm_mmu_slot_lpages(slot, npages, level);
+
+- linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
++ linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
+ if (!linfo)
+ goto out_free;
+
+@@ -12045,9 +12386,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+
+-bool kvm_arch_has_assigned_device(struct kvm *kvm)
++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+- return atomic_read(&kvm->arch.assigned_device_count);
++ return arch_atomic_read(&kvm->arch.assigned_device_count);
+ }
+ EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+
+@@ -12509,3 +12850,20 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
++
++static int __init kvm_x86_init(void)
++{
++ kvm_mmu_x86_module_init();
++ mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible();
++ return 0;
++}
++module_init(kvm_x86_init);
++
++static void __exit kvm_x86_exit(void)
++{
++ /*
++ * If module_init() is implemented, module_exit() must also be
++ * implemented to allow module unload.
++ */
++}
++module_exit(kvm_x86_exit);
+diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
+index 7d66d63dc55a6..cd0c93ec72fad 100644
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -26,7 +26,7 @@ static __always_inline void kvm_guest_enter_irqoff(void)
+ */
+ instrumentation_begin();
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+
+ guest_enter_irqoff();
+@@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
+
+ #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
+
++void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
+ int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+
+ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
+@@ -153,12 +154,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
+ {
+ int cs_db, cs_l;
+
++ WARN_ON_ONCE(vcpu->arch.guest_state_protected);
++
+ if (!is_long_mode(vcpu))
+ return false;
+ static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
+ return cs_l;
+ }
+
++static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
++{
++ /*
++ * If running with protected guest state, the CS register is not
++ * accessible. The hypercall register values will have had to been
++ * provided in 64-bit mode, so assume the guest is in 64-bit.
++ */
++ return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
++}
++
+ static inline bool x86_exception_has_error_code(unsigned int vector)
+ {
+ static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
+@@ -173,12 +186,6 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
+ return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
+ }
+
+-static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+-{
+- ++vcpu->stat.tlb_flush;
+- static_call(kvm_x86_tlb_flush_current)(vcpu);
+-}
+-
+ static inline int is_pae(struct kvm_vcpu *vcpu)
+ {
+ return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
+@@ -441,7 +448,7 @@ static inline void kvm_machine_check(void)
+ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
+ int kvm_spec_ctrl_test_value(u64 value);
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+ struct x86_exception *e);
+ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
+diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
+index 8f62baebd0286..ab9f88de6deb9 100644
+--- a/arch/x86/kvm/xen.c
++++ b/arch/x86/kvm/xen.c
+@@ -93,32 +93,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+ {
+ struct kvm_vcpu_xen *vx = &v->arch.xen;
++ struct gfn_to_hva_cache *ghc = &vx->runstate_cache;
++ struct kvm_memslots *slots = kvm_memslots(v->kvm);
++ bool atomic = (state == RUNSTATE_runnable);
+ uint64_t state_entry_time;
+- unsigned int offset;
++ int __user *user_state;
++ uint64_t __user *user_times;
+
+ kvm_xen_update_runstate(v, state);
+
+ if (!vx->runstate_set)
+ return;
+
+- BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
++ if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) &&
++ kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len))
++ return;
++
++ /* We made sure it fits in a single page */
++ BUG_ON(!ghc->memslot);
++
++ if (atomic)
++ pagefault_disable();
+
+- offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+-#ifdef CONFIG_X86_64
+ /*
+- * The only difference is alignment of uint64_t in 32-bit.
+- * So the first field 'state' is accessed directly using
+- * offsetof() (where its offset happens to be zero), while the
+- * remaining fields which are all uint64_t, start at 'offset'
+- * which we tweak here by adding 4.
++ * The only difference between 32-bit and 64-bit versions of the
++ * runstate struct us the alignment of uint64_t in 32-bit, which
++ * means that the 64-bit version has an additional 4 bytes of
++ * padding after the first field 'state'.
++ *
++ * So we use 'int __user *user_state' to point to the state field,
++ * and 'uint64_t __user *user_times' for runstate_entry_time. So
++ * the actual array of time[] in each state starts at user_times[1].
+ */
++ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
++ BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
++ user_state = (int __user *)ghc->hva;
++
++ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
++
++ user_times = (uint64_t __user *)(ghc->hva +
++ offsetof(struct compat_vcpu_runstate_info,
++ state_entry_time));
++#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+ if (v->kvm->arch.xen.long_mode)
+- offset = offsetof(struct vcpu_runstate_info, state_entry_time);
++ user_times = (uint64_t __user *)(ghc->hva +
++ offsetof(struct vcpu_runstate_info,
++ state_entry_time));
+ #endif
+ /*
+ * First write the updated state_entry_time at the appropriate
+@@ -132,10 +157,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+ BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+ sizeof(state_entry_time));
+
+- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+- &state_entry_time, offset,
+- sizeof(state_entry_time)))
+- return;
++ if (__put_user(state_entry_time, user_times))
++ goto out;
+ smp_wmb();
+
+ /*
+@@ -149,11 +172,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+ BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+ sizeof(vx->current_runstate));
+
+- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+- &vx->current_runstate,
+- offsetof(struct vcpu_runstate_info, state),
+- sizeof(vx->current_runstate)))
+- return;
++ if (__put_user(vx->current_runstate, user_state))
++ goto out;
+
+ /*
+ * Write the actual runstate times immediately after the
+@@ -168,24 +188,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ sizeof(vx->runstate_times));
+
+- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+- &vx->runstate_times[0],
+- offset + sizeof(u64),
+- sizeof(vx->runstate_times)))
+- return;
+-
++ if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)))
++ goto out;
+ smp_wmb();
+
+ /*
+ * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
+ * runstate_entry_time field.
+ */
+-
+ state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+- &state_entry_time, offset,
+- sizeof(state_entry_time)))
+- return;
++ __put_user(state_entry_time, user_times);
++ smp_wmb();
++
++ out:
++ mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
++
++ if (atomic)
++ pagefault_enable();
+ }
+
+ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
+@@ -299,7 +318,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
+ break;
+
+ case KVM_XEN_ATTR_TYPE_SHARED_INFO:
+- data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
++ data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
+ r = 0;
+ break;
+
+@@ -337,6 +356,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
+ break;
+ }
+
++ /* It must fit within a single page */
++ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
++ r = -EINVAL;
++ break;
++ }
++
+ r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_info_cache,
+ data->u.gpa,
+@@ -354,6 +379,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
+ break;
+ }
+
++ /* It must fit within a single page */
++ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) {
++ r = -EINVAL;
++ break;
++ }
++
+ r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_time_info_cache,
+ data->u.gpa,
+@@ -375,6 +406,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
+ break;
+ }
+
++ /* It must fit within a single page */
++ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) {
++ r = -EINVAL;
++ break;
++ }
++
+ r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.xen.runstate_cache,
+ data->u.gpa,
+@@ -698,7 +735,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
+ kvm_hv_hypercall_enabled(vcpu))
+ return kvm_hv_hypercall(vcpu);
+
+- longmode = is_64_bit_mode(vcpu);
++ longmode = is_64_bit_hypercall(vcpu);
+ if (!longmode) {
+ params[0] = (u32)kvm_rbx_read(vcpu);
+ params[1] = (u32)kvm_rcx_read(vcpu);
+diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
+index cc0cf5f37450b..a7693a286e401 100644
+--- a/arch/x86/kvm/xen.h
++++ b/arch/x86/kvm/xen.h
+@@ -97,8 +97,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+ * behalf of the vCPU. Only if the VMM does actually block
+ * does it need to enter RUNSTATE_blocked.
+ */
+- if (vcpu->preempted)
+- kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
++ if (WARN_ON_ONCE(!vcpu->preempted))
++ return;
++
++ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+ }
+
+ /* 32-bit compatibility definitions, also used natively in 32-bit build */
+diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
+index 16bc9130e7a5e..e768815e58ae4 100644
+--- a/arch/x86/lib/atomic64_386_32.S
++++ b/arch/x86/lib/atomic64_386_32.S
+@@ -9,81 +9,83 @@
+ #include <asm/alternative.h>
+
+ /* if you want SMP support, implement these with real spinlocks */
+-.macro LOCK reg
++.macro IRQ_SAVE reg
+ pushfl
+ cli
+ .endm
+
+-.macro UNLOCK reg
++.macro IRQ_RESTORE reg
+ popfl
+ .endm
+
+-#define BEGIN(op) \
++#define BEGIN_IRQ_SAVE(op) \
+ .macro endp; \
+ SYM_FUNC_END(atomic64_##op##_386); \
+ .purgem endp; \
+ .endm; \
+ SYM_FUNC_START(atomic64_##op##_386); \
+- LOCK v;
++ IRQ_SAVE v;
+
+ #define ENDP endp
+
+-#define RET \
+- UNLOCK v; \
+- ret
+-
+-#define RET_ENDP \
+- RET; \
+- ENDP
++#define RET_IRQ_RESTORE \
++ IRQ_RESTORE v; \
++ RET
+
+ #define v %ecx
+-BEGIN(read)
++BEGIN_IRQ_SAVE(read)
+ movl (v), %eax
+ movl 4(v), %edx
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(set)
++BEGIN_IRQ_SAVE(set)
+ movl %ebx, (v)
+ movl %ecx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(xchg)
++BEGIN_IRQ_SAVE(xchg)
+ movl (v), %eax
+ movl 4(v), %edx
+ movl %ebx, (v)
+ movl %ecx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(add)
++BEGIN_IRQ_SAVE(add)
+ addl %eax, (v)
+ adcl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(add_return)
++BEGIN_IRQ_SAVE(add_return)
+ addl (v), %eax
+ adcl 4(v), %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(sub)
++BEGIN_IRQ_SAVE(sub)
+ subl %eax, (v)
+ sbbl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %ecx
+-BEGIN(sub_return)
++BEGIN_IRQ_SAVE(sub_return)
+ negl %edx
+ negl %eax
+ sbbl $0, %edx
+@@ -91,47 +93,52 @@ BEGIN(sub_return)
+ adcl 4(v), %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(inc)
++BEGIN_IRQ_SAVE(inc)
+ addl $1, (v)
+ adcl $0, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(inc_return)
++BEGIN_IRQ_SAVE(inc_return)
+ movl (v), %eax
+ movl 4(v), %edx
+ addl $1, %eax
+ adcl $0, %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(dec)
++BEGIN_IRQ_SAVE(dec)
+ subl $1, (v)
+ sbbl $0, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(dec_return)
++BEGIN_IRQ_SAVE(dec_return)
+ movl (v), %eax
+ movl 4(v), %edx
+ subl $1, %eax
+ sbbl $0, %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(add_unless)
++BEGIN_IRQ_SAVE(add_unless)
+ addl %eax, %ecx
+ adcl %edx, %edi
+ addl (v), %eax
+@@ -143,7 +150,7 @@ BEGIN(add_unless)
+ movl %edx, 4(v)
+ movl $1, %eax
+ 2:
+- RET
++ RET_IRQ_RESTORE
+ 3:
+ cmpl %edx, %edi
+ jne 1b
+@@ -153,7 +160,7 @@ ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(inc_not_zero)
++BEGIN_IRQ_SAVE(inc_not_zero)
+ movl (v), %eax
+ movl 4(v), %edx
+ testl %eax, %eax
+@@ -165,7 +172,7 @@ BEGIN(inc_not_zero)
+ movl %edx, 4(v)
+ movl $1, %eax
+ 2:
+- RET
++ RET_IRQ_RESTORE
+ 3:
+ testl %edx, %edx
+ jne 1b
+@@ -174,7 +181,7 @@ ENDP
+ #undef v
+
+ #define v %esi
+-BEGIN(dec_if_positive)
++BEGIN_IRQ_SAVE(dec_if_positive)
+ movl (v), %eax
+ movl 4(v), %edx
+ subl $1, %eax
+@@ -183,5 +190,6 @@ BEGIN(dec_if_positive)
+ movl %eax, (v)
+ movl %edx, 4(v)
+ 1:
+-RET_ENDP
++ RET_IRQ_RESTORE
++ENDP
+ #undef v
+diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
+index ce6935690766f..90afb488b396a 100644
+--- a/arch/x86/lib/atomic64_cx8_32.S
++++ b/arch/x86/lib/atomic64_cx8_32.S
+@@ -18,7 +18,7 @@
+
+ SYM_FUNC_START(atomic64_read_cx8)
+ read64 %ecx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_read_cx8)
+
+ SYM_FUNC_START(atomic64_set_cx8)
+@@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8)
+ cmpxchg8b (%esi)
+ jne 1b
+
+- ret
++ RET
+ SYM_FUNC_END(atomic64_set_cx8)
+
+ SYM_FUNC_START(atomic64_xchg_cx8)
+@@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8)
+ cmpxchg8b (%esi)
+ jne 1b
+
+- ret
++ RET
+ SYM_FUNC_END(atomic64_xchg_cx8)
+
+ .macro addsub_return func ins insc
+@@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
+ popl %esi
+ popl %ebx
+ popl %ebp
+- ret
++ RET
+ SYM_FUNC_END(atomic64_\func\()_return_cx8)
+ .endm
+
+@@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_\func\()_return_cx8)
+ .endm
+
+@@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_cx8)
+ movl %ebx, %eax
+ movl %ecx, %edx
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_dec_if_positive_cx8)
+
+ SYM_FUNC_START(atomic64_add_unless_cx8)
+@@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8)
+ addl $8, %esp
+ popl %ebx
+ popl %ebp
+- ret
++ RET
+ 4:
+ cmpl %edx, 4(%esp)
+ jne 2b
+@@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8)
+ movl $1, %eax
+ 3:
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(atomic64_inc_not_zero_cx8)
+diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
+index 4304320e51f4d..929ad1747dea0 100644
+--- a/arch/x86/lib/checksum_32.S
++++ b/arch/x86/lib/checksum_32.S
+@@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial)
+ 8:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+ SYM_FUNC_END(csum_partial)
+
+ #else
+@@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial)
+ 90:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+ SYM_FUNC_END(csum_partial)
+
+ #endif
+@@ -371,7 +371,7 @@ EXC( movb %cl, (%edi) )
+ popl %esi
+ popl %edi
+ popl %ecx # equivalent to addl $4,%esp
+- ret
++ RET
+ SYM_FUNC_END(csum_partial_copy_generic)
+
+ #else
+@@ -447,7 +447,7 @@ EXC( movb %dl, (%edi) )
+ popl %esi
+ popl %edi
+ popl %ebx
+- ret
++ RET
+ SYM_FUNC_END(csum_partial_copy_generic)
+
+ #undef ROUND
+diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
+index c4c7dd115953c..fe59b8ac4fccd 100644
+--- a/arch/x86/lib/clear_page_64.S
++++ b/arch/x86/lib/clear_page_64.S
+@@ -17,7 +17,7 @@ SYM_FUNC_START(clear_page_rep)
+ movl $4096/8,%ecx
+ xorl %eax,%eax
+ rep stosq
+- ret
++ RET
+ SYM_FUNC_END(clear_page_rep)
+ EXPORT_SYMBOL_GPL(clear_page_rep)
+
+@@ -39,7 +39,7 @@ SYM_FUNC_START(clear_page_orig)
+ leaq 64(%rdi),%rdi
+ jnz .Lloop
+ nop
+- ret
++ RET
+ SYM_FUNC_END(clear_page_orig)
+ EXPORT_SYMBOL_GPL(clear_page_orig)
+
+@@ -47,6 +47,6 @@ SYM_FUNC_START(clear_page_erms)
+ movl $4096,%ecx
+ xorl %eax,%eax
+ rep stosb
+- ret
++ RET
+ SYM_FUNC_END(clear_page_erms)
+ EXPORT_SYMBOL_GPL(clear_page_erms)
+diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
+index 3542502faa3b7..33c70c0160ea0 100644
+--- a/arch/x86/lib/cmpxchg16b_emu.S
++++ b/arch/x86/lib/cmpxchg16b_emu.S
+@@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
+
+ popfq
+ mov $1, %al
+- ret
++ RET
+
+ .Lnot_same:
+ popfq
+ xor %al,%al
+- ret
++ RET
+
+ SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
+diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
+index ca01ed6029f4f..6a912d58fecc3 100644
+--- a/arch/x86/lib/cmpxchg8b_emu.S
++++ b/arch/x86/lib/cmpxchg8b_emu.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
+ movl %ecx, 4(%esi)
+
+ popfl
+- ret
++ RET
+
+ .Lnot_same:
+ movl (%esi), %eax
+@@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
+ movl 4(%esi), %edx
+
+ popfl
+- ret
++ RET
+
+ SYM_FUNC_END(cmpxchg8b_emu)
+ EXPORT_SYMBOL(cmpxchg8b_emu)
+diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
+index e5f77e2930349..2c623a2bbd26e 100644
+--- a/arch/x86/lib/copy_mc_64.S
++++ b/arch/x86/lib/copy_mc_64.S
+@@ -77,7 +77,7 @@ SYM_FUNC_START(copy_mc_fragile)
+ .L_done_memcpy_trap:
+ xorl %eax, %eax
+ .L_done:
+- ret
++ RET
+ SYM_FUNC_END(copy_mc_fragile)
+
+ .section .fixup, "ax"
+@@ -132,7 +132,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string)
+ rep movsb
+ /* Copy successful. Return zero */
+ xorl %eax, %eax
+- ret
++ RET
+ SYM_FUNC_END(copy_mc_enhanced_fast_string)
+
+ .section .fixup, "ax"
+@@ -145,7 +145,7 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string)
+ * user-copy routines.
+ */
+ movq %rcx, %rax
+- ret
++ RET
+
+ .previous
+
+diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
+index db4b4f9197c7d..30ea644bf446d 100644
+--- a/arch/x86/lib/copy_page_64.S
++++ b/arch/x86/lib/copy_page_64.S
+@@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page)
+ ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
+ movl $4096/8, %ecx
+ rep movsq
+- ret
++ RET
+ SYM_FUNC_END(copy_page)
+ EXPORT_SYMBOL(copy_page)
+
+@@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs)
+ movq (%rsp), %rbx
+ movq 1*8(%rsp), %r12
+ addq $2*8, %rsp
+- ret
++ RET
+ SYM_FUNC_END(copy_page_regs)
+diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
+index 57b79c577496d..84cee84fc658a 100644
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -105,7 +105,7 @@ SYM_FUNC_START(copy_user_generic_unrolled)
+ jnz 21b
+ 23: xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ .section .fixup,"ax"
+ 30: shll $6,%ecx
+@@ -173,7 +173,7 @@ SYM_FUNC_START(copy_user_generic_string)
+ movsb
+ xorl %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ .section .fixup,"ax"
+ 11: leal (%rdx,%rcx,8),%ecx
+@@ -207,7 +207,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
+ movsb
+ xorl %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ .section .fixup,"ax"
+ 12: movl %ecx,%edx /* ecx is zerorest also */
+@@ -239,7 +239,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
+ 1: rep movsb
+ 2: mov %ecx,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ /*
+ * Return zero to pretend that this copy succeeded. This
+@@ -250,7 +250,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
+ */
+ 3: xorl %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+
+ _ASM_EXTABLE_CPY(1b, 2b)
+ SYM_CODE_END(.Lcopy_user_handle_tail)
+@@ -361,7 +361,7 @@ SYM_FUNC_START(__copy_user_nocache)
+ xorl %eax,%eax
+ ASM_CLAC
+ sfence
+- ret
++ RET
+
+ .section .fixup,"ax"
+ .L_fixup_4x8b_copy:
+diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
+index 1fbd8ee9642d1..d9e16a2cf2856 100644
+--- a/arch/x86/lib/csum-copy_64.S
++++ b/arch/x86/lib/csum-copy_64.S
+@@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
+ movq 3*8(%rsp), %r13
+ movq 4*8(%rsp), %r15
+ addq $5*8, %rsp
+- ret
++ RET
+ .Lshort:
+ movl %ecx, %r10d
+ jmp .L1
+diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
+index 65d15df6212d6..0e65d00e2339f 100644
+--- a/arch/x86/lib/delay.c
++++ b/arch/x86/lib/delay.c
+@@ -54,8 +54,8 @@ static void delay_loop(u64 __loops)
+ " jnz 2b \n"
+ "3: dec %0 \n"
+
+- : /* we don't need output */
+- :"a" (loops)
++ : "+a" (loops)
++ :
+ );
+ }
+
+diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
+index be5b5fb1598bd..520897061ee09 100644
+--- a/arch/x86/lib/error-inject.c
++++ b/arch/x86/lib/error-inject.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+
++#include <linux/linkage.h>
+ #include <linux/error-injection.h>
+ #include <linux/kprobes.h>
+
+@@ -10,7 +11,7 @@ asm(
+ ".type just_return_func, @function\n"
+ ".globl just_return_func\n"
+ "just_return_func:\n"
+- " ret\n"
++ ASM_RET
+ ".size just_return_func, .-just_return_func\n"
+ );
+
+diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
+index fa1bc2104b326..b70d98d79a9da 100644
+--- a/arch/x86/lib/getuser.S
++++ b/arch/x86/lib/getuser.S
+@@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1)
+ 1: movzbl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_1)
+ EXPORT_SYMBOL(__get_user_1)
+
+@@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2)
+ 2: movzwl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_2)
+ EXPORT_SYMBOL(__get_user_2)
+
+@@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4)
+ 3: movl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_4)
+ EXPORT_SYMBOL(__get_user_4)
+
+@@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8)
+ 4: movq (%_ASM_AX),%rdx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ #else
+ LOAD_TASK_SIZE_MINUS_N(7)
+ cmp %_ASM_DX,%_ASM_AX
+@@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8)
+ 5: movl 4(%_ASM_AX),%ecx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ #endif
+ SYM_FUNC_END(__get_user_8)
+ EXPORT_SYMBOL(__get_user_8)
+@@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1)
+ 6: movzbl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_1)
+ EXPORT_SYMBOL(__get_user_nocheck_1)
+
+@@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2)
+ 7: movzwl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_2)
+ EXPORT_SYMBOL(__get_user_nocheck_2)
+
+@@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4)
+ 8: movl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_4)
+ EXPORT_SYMBOL(__get_user_nocheck_4)
+
+@@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8)
+ #endif
+ xor %eax,%eax
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__get_user_nocheck_8)
+ EXPORT_SYMBOL(__get_user_nocheck_8)
+
+@@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
+ bad_get_user:
+ xor %edx,%edx
+ mov $(-EFAULT),%_ASM_AX
+- ret
++ RET
+ SYM_CODE_END(.Lbad_get_user_clac)
+
+ #ifdef CONFIG_X86_32
+@@ -179,7 +179,7 @@ bad_get_user_8:
+ xor %edx,%edx
+ xor %ecx,%ecx
+ mov $(-EFAULT),%_ASM_AX
+- ret
++ RET
+ SYM_CODE_END(.Lbad_get_user_8_clac)
+ #endif
+
+diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
+index dbf8cc97b7f53..12c16c6aa44a3 100644
+--- a/arch/x86/lib/hweight.S
++++ b/arch/x86/lib/hweight.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32)
+ imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
+ shrl $24, %eax # w = w_tmp >> 24
+ __ASM_SIZE(pop,) %__ASM_REG(dx)
+- ret
++ RET
+ SYM_FUNC_END(__sw_hweight32)
+ EXPORT_SYMBOL(__sw_hweight32)
+
+@@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64)
+
+ popq %rdx
+ popq %rdi
+- ret
++ RET
+ #else /* CONFIG_X86_32 */
+ /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
+ pushl %ecx
+@@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64)
+ addl %ecx, %eax # result
+
+ popl %ecx
+- ret
++ RET
+ #endif
+ SYM_FUNC_END(__sw_hweight64)
+ EXPORT_SYMBOL(__sw_hweight64)
+diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
+index a1d24fdc07cf0..c8a962c2e653d 100644
+--- a/arch/x86/lib/insn-eval.c
++++ b/arch/x86/lib/insn-eval.c
+@@ -412,32 +412,44 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
+ #endif /* CONFIG_X86_64 */
+ }
+
+-static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
+- enum reg_type type)
++static const int pt_regoff[] = {
++ offsetof(struct pt_regs, ax),
++ offsetof(struct pt_regs, cx),
++ offsetof(struct pt_regs, dx),
++ offsetof(struct pt_regs, bx),
++ offsetof(struct pt_regs, sp),
++ offsetof(struct pt_regs, bp),
++ offsetof(struct pt_regs, si),
++ offsetof(struct pt_regs, di),
++#ifdef CONFIG_X86_64
++ offsetof(struct pt_regs, r8),
++ offsetof(struct pt_regs, r9),
++ offsetof(struct pt_regs, r10),
++ offsetof(struct pt_regs, r11),
++ offsetof(struct pt_regs, r12),
++ offsetof(struct pt_regs, r13),
++ offsetof(struct pt_regs, r14),
++ offsetof(struct pt_regs, r15),
++#else
++ offsetof(struct pt_regs, ds),
++ offsetof(struct pt_regs, es),
++ offsetof(struct pt_regs, fs),
++ offsetof(struct pt_regs, gs),
++#endif
++};
++
++int pt_regs_offset(struct pt_regs *regs, int regno)
++{
++ if ((unsigned)regno < ARRAY_SIZE(pt_regoff))
++ return pt_regoff[regno];
++ return -EDOM;
++}
++
++static int get_regno(struct insn *insn, enum reg_type type)
+ {
++ int nr_registers = ARRAY_SIZE(pt_regoff);
+ int regno = 0;
+
+- static const int regoff[] = {
+- offsetof(struct pt_regs, ax),
+- offsetof(struct pt_regs, cx),
+- offsetof(struct pt_regs, dx),
+- offsetof(struct pt_regs, bx),
+- offsetof(struct pt_regs, sp),
+- offsetof(struct pt_regs, bp),
+- offsetof(struct pt_regs, si),
+- offsetof(struct pt_regs, di),
+-#ifdef CONFIG_X86_64
+- offsetof(struct pt_regs, r8),
+- offsetof(struct pt_regs, r9),
+- offsetof(struct pt_regs, r10),
+- offsetof(struct pt_regs, r11),
+- offsetof(struct pt_regs, r12),
+- offsetof(struct pt_regs, r13),
+- offsetof(struct pt_regs, r14),
+- offsetof(struct pt_regs, r15),
+-#endif
+- };
+- int nr_registers = ARRAY_SIZE(regoff);
+ /*
+ * Don't possibly decode a 32-bit instructions as
+ * reading a 64-bit-only register.
+@@ -505,7 +517,18 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
+ WARN_ONCE(1, "decoded an instruction with an invalid register");
+ return -EINVAL;
+ }
+- return regoff[regno];
++ return regno;
++}
++
++static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
++ enum reg_type type)
++{
++ int regno = get_regno(insn, type);
++
++ if (regno < 0)
++ return regno;
++
++ return pt_regs_offset(regs, regno);
+ }
+
+ /**
+@@ -1417,7 +1440,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
+ }
+ }
+
+-static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
++int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
+ {
+ unsigned long seg_base = 0;
+
+diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
+index c565def611e24..55e371cc69fd5 100644
+--- a/arch/x86/lib/insn.c
++++ b/arch/x86/lib/insn.c
+@@ -13,6 +13,7 @@
+ #endif
+ #include <asm/inat.h> /*__ignore_sync_check__ */
+ #include <asm/insn.h> /* __ignore_sync_check__ */
++#include <asm/unaligned.h> /* __ignore_sync_check__ */
+
+ #include <linux/errno.h>
+ #include <linux/kconfig.h>
+@@ -37,10 +38,10 @@
+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+ #define __get_next(t, insn) \
+- ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
++ ({ t r = get_unaligned((t *)(insn)->next_byte); (insn)->next_byte += sizeof(t); leXX_to_cpu(t, r); })
+
+ #define __peek_nbyte_next(t, insn, n) \
+- ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); })
++ ({ t r = get_unaligned((t *)(insn)->next_byte + n); leXX_to_cpu(t, r); })
+
+ #define get_next(t, insn) \
+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
+index cb5a1964506b1..6ff2f56cb0f71 100644
+--- a/arch/x86/lib/iomap_copy_64.S
++++ b/arch/x86/lib/iomap_copy_64.S
+@@ -10,6 +10,6 @@
+ */
+ SYM_FUNC_START(__iowrite32_copy)
+ movl %edx,%ecx
+- rep movsd
+- ret
++ rep movsl
++ RET
+ SYM_FUNC_END(__iowrite32_copy)
+diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
+index 1cc9da6e29c79..59cf2343f3d90 100644
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
+ rep movsq
+ movl %edx, %ecx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy)
+ SYM_FUNC_END_ALIAS(__memcpy)
+ EXPORT_SYMBOL(memcpy)
+@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy_erms)
+
+ SYM_FUNC_START_LOCAL(memcpy_orig)
+@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq %r9, 1*8(%rdi)
+ movq %r10, -2*8(%rdi, %rdx)
+ movq %r11, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_16bytes:
+ cmpl $8, %edx
+@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq -1*8(%rsi, %rdx), %r9
+ movq %r8, 0*8(%rdi)
+ movq %r9, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_8bytes:
+ cmpl $4, %edx
+@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movl -4(%rsi, %rdx), %r8d
+ movl %ecx, (%rdi)
+ movl %r8d, -4(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_3bytes:
+ subl $1, %edx
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movb %cl, (%rdi)
+
+ .Lend:
+- retq
++ RET
+ SYM_FUNC_END(memcpy_orig)
+
+ .popsection
+diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
+index 64801010d312d..4b8ee3a2fcc37 100644
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove)
+ /* FSRM implies ERMS => no length checks, do the copy directly */
+ .Lmemmove_begin_forward:
+ ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
+- ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
++ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
+
+ /*
+ * movsq instruction have many startup latency
+@@ -205,7 +205,12 @@ SYM_FUNC_START(__memmove)
+ movb (%rsi), %r11b
+ movb %r11b, (%rdi)
+ 13:
+- retq
++ RET
++
++.Lmemmove_erms:
++ movq %rdx, %rcx
++ rep movsb
++ RET
+ SYM_FUNC_END(__memmove)
+ SYM_FUNC_END_ALIAS(memmove)
+ EXPORT_SYMBOL(__memmove)
+diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
+index 9827ae267f96e..d624f2bc42f16 100644
+--- a/arch/x86/lib/memset_64.S
++++ b/arch/x86/lib/memset_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
+ movl %edx,%ecx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(__memset)
+ SYM_FUNC_END_ALIAS(memset)
+ EXPORT_SYMBOL(memset)
+@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
+ movq %rdx,%rcx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(memset_erms)
+
+ SYM_FUNC_START_LOCAL(memset_orig)
+@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
+
+ .Lende:
+ movq %r10,%rax
+- ret
++ RET
+
+ .Lbad_alignment:
+ cmpq $7,%rdx
+diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
+index a2b9caa5274c8..ebd259f314963 100644
+--- a/arch/x86/lib/msr-reg.S
++++ b/arch/x86/lib/msr-reg.S
+@@ -35,7 +35,7 @@ SYM_FUNC_START(\op\()_safe_regs)
+ movl %edi, 28(%r10)
+ popq %r12
+ popq %rbx
+- ret
++ RET
+ 3:
+ movl $-EIO, %r11d
+ jmp 2b
+@@ -77,7 +77,7 @@ SYM_FUNC_START(\op\()_safe_regs)
+ popl %esi
+ popl %ebp
+ popl %ebx
+- ret
++ RET
+ 3:
+ movl $-EIO, 4(%esp)
+ jmp 2b
+diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
+index 0ea344c5ea439..ecb2049c1273f 100644
+--- a/arch/x86/lib/putuser.S
++++ b/arch/x86/lib/putuser.S
+@@ -52,7 +52,7 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
+ 1: movb %al,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__put_user_1)
+ EXPORT_SYMBOL(__put_user_1)
+ EXPORT_SYMBOL(__put_user_nocheck_1)
+@@ -66,7 +66,7 @@ SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
+ 2: movw %ax,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__put_user_2)
+ EXPORT_SYMBOL(__put_user_2)
+ EXPORT_SYMBOL(__put_user_nocheck_2)
+@@ -80,7 +80,7 @@ SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
+ 3: movl %eax,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+- ret
++ RET
+ SYM_FUNC_END(__put_user_4)
+ EXPORT_SYMBOL(__put_user_4)
+ EXPORT_SYMBOL(__put_user_nocheck_4)
+diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
+index ec9922cba30a4..6f5321b36dbb1 100644
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -9,8 +9,9 @@
+ #include <asm/nospec-branch.h>
+ #include <asm/unwind_hints.h>
+ #include <asm/frame.h>
++#include <asm/nops.h>
+
+- .section .text.__x86.indirect_thunk
++ .section .text..__x86.indirect_thunk
+
+ .macro RETPOLINE reg
+ ANNOTATE_INTRA_FUNCTION_CALL
+@@ -23,50 +24,18 @@
+ .Ldo_rop_\@:
+ mov %\reg, (%_ASM_SP)
+ UNWIND_HINT_FUNC
+- ret
++ RET
+ .endm
+
+ .macro THUNK reg
+
+- .align 32
+-
+-SYM_FUNC_START(__x86_indirect_thunk_\reg)
+-
+- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+-
+-SYM_FUNC_END(__x86_indirect_thunk_\reg)
+-
+-.endm
+-
+-/*
+- * This generates .altinstr_replacement symbols for use by objtool. They,
+- * however, must not actually live in .altinstr_replacement since that will be
+- * discarded after init, but module alternatives will also reference these
+- * symbols.
+- *
+- * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+- */
+-.macro ALT_THUNK reg
+-
+- .align 1
+-
+-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+- ANNOTATE_RETPOLINE_SAFE
+-1: call *%\reg
+-2: .skip 5-(2b-1b), 0x90
+-SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+-
+-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
+-
+-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+- ANNOTATE_RETPOLINE_SAFE
+-1: jmp *%\reg
+-2: .skip 5-(2b-1b), 0x90
+-SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
++ .align RETPOLINE_THUNK_SIZE
++SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
++ UNWIND_HINT_EMPTY
+
+-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
++ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
++ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
+
+ .endm
+
+@@ -85,22 +54,214 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
+ #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+ #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+
+-#undef GEN
++ .align RETPOLINE_THUNK_SIZE
++SYM_CODE_START(__x86_indirect_thunk_array)
++
+ #define GEN(reg) THUNK reg
+ #include <asm/GEN-for-each-reg.h>
+-
+ #undef GEN
++
++ .align RETPOLINE_THUNK_SIZE
++SYM_CODE_END(__x86_indirect_thunk_array)
++
+ #define GEN(reg) EXPORT_THUNK(reg)
+ #include <asm/GEN-for-each-reg.h>
+-
+ #undef GEN
+-#define GEN(reg) ALT_THUNK reg
+-#include <asm/GEN-for-each-reg.h>
+
+-#undef GEN
+-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+-#include <asm/GEN-for-each-reg.h>
++/*
++ * This function name is magical and is used by -mfunction-return=thunk-extern
++ * for the compiler to generate JMPs to it.
++ */
++#ifdef CONFIG_RETHUNK
+
+-#undef GEN
+-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+-#include <asm/GEN-for-each-reg.h>
++/*
++ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
++ * special addresses:
++ *
++ * - srso_alias_untrain_ret() is 2M aligned
++ * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
++ * and 20 in its virtual address are set (while those bits in the
++ * srso_alias_untrain_ret() function are cleared).
++ *
++ * This guarantees that those two addresses will alias in the branch
++ * target buffer of Zen3/4 generations, leading to any potential
++ * poisoned entries at that BTB slot to get evicted.
++ *
++ * As a result, srso_alias_safe_ret() becomes a safe return.
++ */
++#ifdef CONFIG_CPU_SRSO
++ .section .text..__x86.rethunk_untrain
++
++SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
++ UNWIND_HINT_FUNC
++ ASM_NOP2
++ lfence
++ jmp srso_alias_return_thunk
++SYM_FUNC_END(srso_alias_untrain_ret)
++__EXPORT_THUNK(srso_alias_untrain_ret)
++
++ .section .text..__x86.rethunk_safe
++#else
++/* dummy definition for alternatives */
++SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
++SYM_FUNC_END(srso_alias_untrain_ret)
++#endif
++
++SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
++ lea 8(%_ASM_SP), %_ASM_SP
++ UNWIND_HINT_FUNC
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
++SYM_FUNC_END(srso_alias_safe_ret)
++
++ .section .text..__x86.return_thunk
++
++SYM_CODE_START(srso_alias_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
++ call srso_alias_safe_ret
++ ud2
++SYM_CODE_END(srso_alias_return_thunk)
++
++/*
++ * Some generic notes on the untraining sequences:
++ *
++ * They are interchangeable when it comes to flushing potentially wrong
++ * RET predictions from the BTB.
++ *
++ * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
++ * Retbleed sequence because the return sequence done there
++ * (srso_safe_ret()) is longer and the return sequence must fully nest
++ * (end before) the untraining sequence. Therefore, the untraining
++ * sequence must fully overlap the return sequence.
++ *
++ * Regarding alignment - the instructions which need to be untrained,
++ * must all start at a cacheline boundary for Zen1/2 generations. That
++ * is, instruction sequences starting at srso_safe_ret() and
++ * the respective instruction sequences at retbleed_return_thunk()
++ * must start at a cacheline boundary.
++ */
++
++/*
++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
++ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
++ * alignment within the BTB.
++ * 2) The instruction at retbleed_untrain_ret must contain, and not
++ * end with, the 0xc3 byte of the RET.
++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
++ * from re-poisioning the BTB prediction.
++ */
++ .align 64
++ .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
++SYM_FUNC_START_NOALIGN(retbleed_untrain_ret);
++
++ /*
++ * As executed from retbleed_untrain_ret, this is:
++ *
++ * TEST $0xcc, %bl
++ * LFENCE
++ * JMP retbleed_return_thunk
++ *
++ * Executing the TEST instruction has a side effect of evicting any BTB
++ * prediction (potentially attacker controlled) attached to the RET, as
++ * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
++ */
++ .byte 0xf6
++
++ /*
++ * As executed from retbleed_return_thunk, this is a plain RET.
++ *
++ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
++ *
++ * We subsequently jump backwards and architecturally execute the RET.
++ * This creates a correct BTB prediction (type=ret), but in the
++ * meantime we suffer Straight Line Speculation (because the type was
++ * no branch) which is halted by the INT3.
++ *
++ * With SMT enabled and STIBP active, a sibling thread cannot poison
++ * RET's prediction to a type of its choice, but can evict the
++ * prediction due to competitive sharing. If the prediction is
++ * evicted, retbleed_return_thunk will suffer Straight Line Speculation
++ * which will be contained safely by the INT3.
++ */
++SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
++ ret
++ int3
++SYM_CODE_END(retbleed_return_thunk)
++
++ /*
++ * Ensure the TEST decoding / BTB invalidation is complete.
++ */
++ lfence
++
++ /*
++ * Jump back and execute the RET in the middle of the TEST instruction.
++ * INT3 is for SLS protection.
++ */
++ jmp retbleed_return_thunk
++ int3
++SYM_FUNC_END(retbleed_untrain_ret)
++__EXPORT_THUNK(retbleed_untrain_ret)
++
++/*
++ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
++ * above. On kernel entry, srso_untrain_ret() is executed which is a
++ *
++ * movabs $0xccccc30824648d48,%rax
++ *
++ * and when the return thunk executes the inner label srso_safe_ret()
++ * later, it is a stack manipulation and a RET which is mispredicted and
++ * thus a "safe" one to use.
++ */
++ .align 64
++ .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
++SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
++ .byte 0x48, 0xb8
++
++/*
++ * This forces the function return instruction to speculate into a trap
++ * (UD2 in srso_return_thunk() below). This RET will then mispredict
++ * and execution will continue at the return site read from the top of
++ * the stack.
++ */
++SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
++ lea 8(%_ASM_SP), %_ASM_SP
++ ret
++ int3
++ int3
++ /* end of movabs */
++ lfence
++ call srso_safe_ret
++ ud2
++SYM_CODE_END(srso_safe_ret)
++SYM_FUNC_END(srso_untrain_ret)
++__EXPORT_THUNK(srso_untrain_ret)
++
++SYM_CODE_START(srso_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
++ call srso_safe_ret
++ ud2
++SYM_CODE_END(srso_return_thunk)
++
++SYM_FUNC_START(entry_untrain_ret)
++ ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
++ "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
++ "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
++SYM_FUNC_END(entry_untrain_ret)
++__EXPORT_THUNK(entry_untrain_ret)
++
++SYM_CODE_START(__x86_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
++SYM_CODE_END(__x86_return_thunk)
++EXPORT_SYMBOL(__x86_return_thunk)
++
++#endif /* CONFIG_RETHUNK */
+diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
+index 508c81e97ab10..f1c0befb62df5 100644
+--- a/arch/x86/lib/usercopy_64.c
++++ b/arch/x86/lib/usercopy_64.c
+@@ -121,7 +121,7 @@ void __memcpy_flushcache(void *_dst, const void *_src, size_t size)
+
+ /* cache copy and flush to align dest */
+ if (!IS_ALIGNED(dest, 8)) {
+- unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
++ size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest);
+
+ memcpy((void *) dest, (void *) source, len);
+ clean_cache_range((void *) dest, len);
+diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S
+index 951da2ad54bbf..8c270ab415bee 100644
+--- a/arch/x86/math-emu/div_Xsig.S
++++ b/arch/x86/math-emu/div_Xsig.S
+@@ -341,7 +341,7 @@ L_exit:
+ popl %esi
+
+ leave
+- ret
++ RET
+
+
+ #ifdef PARANOID
+diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S
+index d047d1816abe9..637439bfefa47 100644
+--- a/arch/x86/math-emu/div_small.S
++++ b/arch/x86/math-emu/div_small.S
+@@ -44,5 +44,5 @@ SYM_FUNC_START(FPU_div_small)
+ popl %esi
+
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_div_small)
+diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S
+index 4afc7b1fa6e95..54a031b661421 100644
+--- a/arch/x86/math-emu/mul_Xsig.S
++++ b/arch/x86/math-emu/mul_Xsig.S
+@@ -62,7 +62,7 @@ SYM_FUNC_START(mul32_Xsig)
+
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(mul32_Xsig)
+
+
+@@ -115,7 +115,7 @@ SYM_FUNC_START(mul64_Xsig)
+
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(mul64_Xsig)
+
+
+@@ -175,5 +175,5 @@ SYM_FUNC_START(mul_Xsig_Xsig)
+
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(mul_Xsig_Xsig)
+diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S
+index 702315eecb860..35fd723fc0df8 100644
+--- a/arch/x86/math-emu/polynom_Xsig.S
++++ b/arch/x86/math-emu/polynom_Xsig.S
+@@ -133,5 +133,5 @@ L_accum_done:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(polynomial_Xsig)
+diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S
+index cad1d60b1e844..594936eeed67a 100644
+--- a/arch/x86/math-emu/reg_norm.S
++++ b/arch/x86/math-emu/reg_norm.S
+@@ -72,7 +72,7 @@ L_exit_valid:
+ L_exit:
+ popl %ebx
+ leave
+- ret
++ RET
+
+
+ L_zero:
+@@ -138,7 +138,7 @@ L_exit_nuo_valid:
+
+ popl %ebx
+ leave
+- ret
++ RET
+
+ L_exit_nuo_zero:
+ movl TAG_Zero,%eax
+@@ -146,5 +146,5 @@ L_exit_nuo_zero:
+
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_normalize_nuo)
+diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
+index 4a9fc3cc5a4d4..0bb2a092161af 100644
+--- a/arch/x86/math-emu/reg_round.S
++++ b/arch/x86/math-emu/reg_round.S
+@@ -437,7 +437,7 @@ fpu_Arith_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+
+
+ /*
+diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S
+index 9c9e2c810afe8..07247287a3af7 100644
+--- a/arch/x86/math-emu/reg_u_add.S
++++ b/arch/x86/math-emu/reg_u_add.S
+@@ -164,6 +164,6 @@ L_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ #endif /* PARANOID */
+ SYM_FUNC_END(FPU_u_add)
+diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S
+index e2fb5c2644c55..b5a41e2fc484c 100644
+--- a/arch/x86/math-emu/reg_u_div.S
++++ b/arch/x86/math-emu/reg_u_div.S
+@@ -468,7 +468,7 @@ L_exit:
+ popl %esi
+
+ leave
+- ret
++ RET
+ #endif /* PARANOID */
+
+ SYM_FUNC_END(FPU_u_div)
+diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S
+index 0c779c87ac5b3..e2588b24b8c2c 100644
+--- a/arch/x86/math-emu/reg_u_mul.S
++++ b/arch/x86/math-emu/reg_u_mul.S
+@@ -144,7 +144,7 @@ L_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ #endif /* PARANOID */
+
+ SYM_FUNC_END(FPU_u_mul)
+diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S
+index e9bb7c248649f..4c900c29e4ff2 100644
+--- a/arch/x86/math-emu/reg_u_sub.S
++++ b/arch/x86/math-emu/reg_u_sub.S
+@@ -270,5 +270,5 @@ L_exit:
+ popl %edi
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_u_sub)
+diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S
+index d9d7de8dbd7b6..126c40473badb 100644
+--- a/arch/x86/math-emu/round_Xsig.S
++++ b/arch/x86/math-emu/round_Xsig.S
+@@ -78,7 +78,7 @@ L_exit:
+ popl %esi
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(round_Xsig)
+
+
+@@ -138,5 +138,5 @@ L_n_exit:
+ popl %esi
+ popl %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(norm_Xsig)
+diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S
+index 726af985f7582..f726bf6f6396e 100644
+--- a/arch/x86/math-emu/shr_Xsig.S
++++ b/arch/x86/math-emu/shr_Xsig.S
+@@ -45,7 +45,7 @@ SYM_FUNC_START(shr_Xsig)
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_31:
+ cmpl $64,%ecx
+@@ -61,7 +61,7 @@ L_more_than_31:
+ movl $0,8(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_63:
+ cmpl $96,%ecx
+@@ -76,7 +76,7 @@ L_more_than_63:
+ movl %edx,8(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_95:
+ xorl %eax,%eax
+@@ -85,5 +85,5 @@ L_more_than_95:
+ movl %eax,8(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(shr_Xsig)
+diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S
+index 4fc89174caf0c..f608a28a4c43a 100644
+--- a/arch/x86/math-emu/wm_shrx.S
++++ b/arch/x86/math-emu/wm_shrx.S
+@@ -55,7 +55,7 @@ SYM_FUNC_START(FPU_shrx)
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_31:
+ cmpl $64,%ecx
+@@ -70,7 +70,7 @@ L_more_than_31:
+ movl $0,4(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_63:
+ cmpl $96,%ecx
+@@ -84,7 +84,7 @@ L_more_than_63:
+ movl %edx,4(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+
+ L_more_than_95:
+ xorl %eax,%eax
+@@ -92,7 +92,7 @@ L_more_than_95:
+ movl %eax,4(%esi)
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_shrx)
+
+
+@@ -146,7 +146,7 @@ SYM_FUNC_START(FPU_shrxs)
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ /* Shift by [0..31] bits */
+ Ls_less_than_32:
+@@ -163,7 +163,7 @@ Ls_less_than_32:
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ /* Shift by [64..95] bits */
+ Ls_more_than_63:
+@@ -189,7 +189,7 @@ Ls_more_than_63:
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+
+ Ls_more_than_95:
+ /* Shift by [96..inf) bits */
+@@ -203,5 +203,5 @@ Ls_more_than_95:
+ popl %ebx
+ popl %esi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(FPU_shrxs)
+diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
+index f5e1e60c9095f..6c2f1b76a0b61 100644
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -110,6 +110,13 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
+ cea_map_stack(NMI);
+ cea_map_stack(DB);
+ cea_map_stack(MCE);
++
++ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
++ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
++ cea_map_stack(VC);
++ cea_map_stack(VC2);
++ }
++ }
+ }
+ #else
+ static inline void percpu_setup_exception_stacks(unsigned int cpu)
+diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
+index e1664e9f969c3..a9c7efd4b7946 100644
+--- a/arch/x86/mm/extable.c
++++ b/arch/x86/mm/extable.c
+@@ -2,48 +2,50 @@
+ #include <linux/extable.h>
+ #include <linux/uaccess.h>
+ #include <linux/sched/debug.h>
++#include <linux/bitfield.h>
+ #include <xen/xen.h>
+
+ #include <asm/fpu/internal.h>
+ #include <asm/sev.h>
+ #include <asm/traps.h>
+ #include <asm/kdebug.h>
++#include <asm/insn-eval.h>
+
+-typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+- struct pt_regs *, int, unsigned long,
+- unsigned long);
++static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr)
++{
++ int reg_offset = pt_regs_offset(regs, nr);
++ static unsigned long __dummy;
++
++ if (WARN_ON_ONCE(reg_offset < 0))
++ return &__dummy;
++
++ return (unsigned long *)((unsigned long)regs + reg_offset);
++}
+
+ static inline unsigned long
+ ex_fixup_addr(const struct exception_table_entry *x)
+ {
+ return (unsigned long)&x->fixup + x->fixup;
+ }
+-static inline ex_handler_t
+-ex_fixup_handler(const struct exception_table_entry *x)
+-{
+- return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+-}
+
+-__visible bool ex_handler_default(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++static bool ex_handler_default(const struct exception_table_entry *e,
++ struct pt_regs *regs)
+ {
+- regs->ip = ex_fixup_addr(fixup);
++ if (e->data & EX_FLAG_CLEAR_AX)
++ regs->ax = 0;
++ if (e->data & EX_FLAG_CLEAR_DX)
++ regs->dx = 0;
++
++ regs->ip = ex_fixup_addr(e);
+ return true;
+ }
+-EXPORT_SYMBOL(ex_handler_default);
+
+-__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++static bool ex_handler_fault(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, int trapnr)
+ {
+- regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+- return true;
++ return ex_handler_default(fixup, regs);
+ }
+-EXPORT_SYMBOL_GPL(ex_handler_fault);
+
+ /*
+ * Handler for when we fail to restore a task's FPU state. We should never get
+@@ -55,10 +57,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+-__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
++ struct pt_regs *regs)
+ {
+ regs->ip = ex_fixup_addr(fixup);
+
+@@ -68,98 +68,77 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+ __restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate());
+ return true;
+ }
+-EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
+-__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, int trapnr)
+ {
+ WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
+- regs->ip = ex_fixup_addr(fixup);
+- return true;
++ return ex_handler_default(fixup, regs);
+ }
+-EXPORT_SYMBOL(ex_handler_uaccess);
+
+-__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++static bool ex_handler_copy(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, int trapnr)
+ {
+ WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
+- regs->ip = ex_fixup_addr(fixup);
+- regs->ax = trapnr;
+- return true;
++ return ex_handler_fault(fixup, regs, trapnr);
+ }
+-EXPORT_SYMBOL(ex_handler_copy);
+
+-__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++static bool ex_handler_msr(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, bool wrmsr, bool safe, int reg)
+ {
+- if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+- (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
++ if (__ONCE_LITE_IF(!safe && wrmsr)) {
++ pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
++ (unsigned int)regs->cx, (unsigned int)regs->dx,
++ (unsigned int)regs->ax, regs->ip, (void *)regs->ip);
+ show_stack_regs(regs);
++ }
+
+- /* Pretend that the read succeeded and returned 0. */
+- regs->ip = ex_fixup_addr(fixup);
+- regs->ax = 0;
+- regs->dx = 0;
+- return true;
+-}
+-EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
+-
+-__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
+-{
+- if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+- (unsigned int)regs->cx, (unsigned int)regs->dx,
+- (unsigned int)regs->ax, regs->ip, (void *)regs->ip))
++ if (__ONCE_LITE_IF(!safe && !wrmsr)) {
++ pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
++ (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
+ show_stack_regs(regs);
++ }
+
+- /* Pretend that the write succeeded. */
+- regs->ip = ex_fixup_addr(fixup);
+- return true;
++ if (!wrmsr) {
++ /* Pretend that the read succeeded and returned 0. */
++ regs->ax = 0;
++ regs->dx = 0;
++ }
++
++ if (safe)
++ *pt_regs_nr(regs, reg) = -EIO;
++
++ return ex_handler_default(fixup, regs);
+ }
+-EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
+
+-__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code,
+- unsigned long fault_addr)
++static bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
++ struct pt_regs *regs)
+ {
+ if (static_cpu_has(X86_BUG_NULL_SEG))
+ asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
+ asm volatile ("mov %0, %%fs" : : "rm" (0));
+- return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr);
++ return ex_handler_default(fixup, regs);
+ }
+-EXPORT_SYMBOL(ex_handler_clear_fs);
+
+-enum handler_type ex_get_fault_handler_type(unsigned long ip)
++static bool ex_handler_imm_reg(const struct exception_table_entry *fixup,
++ struct pt_regs *regs, int reg, int imm)
+ {
+- const struct exception_table_entry *e;
+- ex_handler_t handler;
++ *pt_regs_nr(regs, reg) = (long)imm;
++ return ex_handler_default(fixup, regs);
++}
+
+- e = search_exception_tables(ip);
+- if (!e)
+- return EX_HANDLER_NONE;
+- handler = ex_fixup_handler(e);
+- if (handler == ex_handler_fault)
+- return EX_HANDLER_FAULT;
+- else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
+- return EX_HANDLER_UACCESS;
+- else
+- return EX_HANDLER_OTHER;
++int ex_get_fixup_type(unsigned long ip)
++{
++ const struct exception_table_entry *e = search_exception_tables(ip);
++
++ return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE;
+ }
+
+ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
+ unsigned long fault_addr)
+ {
+ const struct exception_table_entry *e;
+- ex_handler_t handler;
++ int type, reg, imm;
+
+ #ifdef CONFIG_PNPBIOS
+ if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
+@@ -179,8 +158,48 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
+ if (!e)
+ return 0;
+
+- handler = ex_fixup_handler(e);
+- return handler(e, regs, trapnr, error_code, fault_addr);
++ type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
++ reg = FIELD_GET(EX_DATA_REG_MASK, e->data);
++ imm = FIELD_GET(EX_DATA_IMM_MASK, e->data);
++
++ switch (type) {
++ case EX_TYPE_DEFAULT:
++ case EX_TYPE_DEFAULT_MCE_SAFE:
++ return ex_handler_default(e, regs);
++ case EX_TYPE_FAULT:
++ case EX_TYPE_FAULT_MCE_SAFE:
++ return ex_handler_fault(e, regs, trapnr);
++ case EX_TYPE_UACCESS:
++ return ex_handler_uaccess(e, regs, trapnr);
++ case EX_TYPE_COPY:
++ return ex_handler_copy(e, regs, trapnr);
++ case EX_TYPE_CLEAR_FS:
++ return ex_handler_clear_fs(e, regs);
++ case EX_TYPE_FPU_RESTORE:
++ return ex_handler_fprestore(e, regs);
++ case EX_TYPE_BPF:
++ return ex_handler_bpf(e, regs);
++ case EX_TYPE_WRMSR:
++ return ex_handler_msr(e, regs, true, false, reg);
++ case EX_TYPE_RDMSR:
++ return ex_handler_msr(e, regs, false, false, reg);
++ case EX_TYPE_WRMSR_SAFE:
++ return ex_handler_msr(e, regs, true, true, reg);
++ case EX_TYPE_RDMSR_SAFE:
++ return ex_handler_msr(e, regs, false, true, reg);
++ case EX_TYPE_WRMSR_IN_MCE:
++ ex_handler_msr_mce(regs, true);
++ break;
++ case EX_TYPE_RDMSR_IN_MCE:
++ ex_handler_msr_mce(regs, false);
++ break;
++ case EX_TYPE_POP_REG:
++ regs->sp += sizeof(long);
++ fallthrough;
++ case EX_TYPE_IMM_REG:
++ return ex_handler_imm_reg(e, regs, reg, imm);
++ }
++ BUG();
+ }
+
+ extern unsigned int early_recursion_flag;
+diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
+index 84a2c8c4af735..4bfed53e210ec 100644
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -32,6 +32,7 @@
+ #include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
+ #include <asm/kvm_para.h> /* kvm_handle_async_pf */
+ #include <asm/vdso.h> /* fixup_vdso_exception() */
++#include <asm/irq_stack.h>
+
+ #define CREATE_TRACE_POINTS
+ #include <asm/trace/exceptions.h>
+@@ -631,6 +632,9 @@ static noinline void
+ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+ {
++#ifdef CONFIG_VMAP_STACK
++ struct stack_info info;
++#endif
+ unsigned long flags;
+ int sig;
+
+@@ -649,9 +653,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
+ * that we're in vmalloc space to avoid this.
+ */
+ if (is_vmalloc_addr((void *)address) &&
+- (((unsigned long)current->stack - 1 - address < PAGE_SIZE) ||
+- address - ((unsigned long)current->stack + THREAD_SIZE) < PAGE_SIZE)) {
+- unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
++ get_stack_guard_info((void *)address, &info)) {
+ /*
+ * We're likely to be running with very little stack space
+ * left. It's plausible that we'd hit this condition but
+@@ -662,13 +664,11 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
+ * and then double-fault, though, because we're likely to
+ * break the console driver and lose most of the stack dump.
+ */
+- asm volatile ("movq %[stack], %%rsp\n\t"
+- "call handle_stack_overflow\n\t"
+- "1: jmp 1b"
+- : ASM_CALL_CONSTRAINT
+- : "D" ("kernel stack overflow (page fault)"),
+- "S" (regs), "d" (address),
+- [stack] "rm" (stack));
++ call_on_stack(__this_cpu_ist_top_va(DF) - sizeof(void*),
++ handle_stack_overflow,
++ ASM_CALL_ARG3,
++ , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info));
++
+ unreachable();
+ }
+ #endif
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 23a14d82e7838..56d5ab70bfa1c 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -9,6 +9,7 @@
+ #include <linux/sched/task.h>
+
+ #include <asm/set_memory.h>
++#include <asm/cpu_device_id.h>
+ #include <asm/e820/api.h>
+ #include <asm/init.h>
+ #include <asm/page.h>
+@@ -26,6 +27,7 @@
+ #include <asm/pti.h>
+ #include <asm/text-patching.h>
+ #include <asm/memtype.h>
++#include <asm/paravirt.h>
+
+ /*
+ * We need to define the tracepoints somewhere, and tlb.c
+@@ -78,10 +80,20 @@ static uint8_t __pte2cachemode_tbl[8] = {
+ [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
+ };
+
+-/* Check that the write-protect PAT entry is set for write-protect */
++/*
++ * Check that the write-protect PAT entry is set for write-protect.
++ * To do this without making assumptions how PAT has been set up (Xen has
++ * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache
++ * mode via the __cachemode2pte_tbl[] into protection bits (those protection
++ * bits will select a cache mode of WP or better), and then translate the
++ * protection bits back into the cache mode using __pte2cm_idx() and the
++ * __pte2cachemode_tbl[] array. This will return the really used cache mode.
++ */
+ bool x86_has_pat_wp(void)
+ {
+- return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP;
++ uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP];
++
++ return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP;
+ }
+
+ enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
+@@ -251,6 +263,24 @@ static void __init probe_page_size_mask(void)
+ }
+ }
+
++#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \
++ .family = 6, \
++ .model = _model, \
++ }
++/*
++ * INVLPG may not properly flush Global entries
++ * on these CPUs when PCIDs are enabled.
++ */
++static const struct x86_cpu_id invlpg_miss_ids[] = {
++ INTEL_MATCH(INTEL_FAM6_ALDERLAKE ),
++ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
++ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
++ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ),
++ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
++ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),
++ {}
++};
++
+ static void setup_pcid(void)
+ {
+ if (!IS_ENABLED(CONFIG_X86_64))
+@@ -259,6 +289,12 @@ static void setup_pcid(void)
+ if (!boot_cpu_has(X86_FEATURE_PCID))
+ return;
+
++ if (x86_match_cpu(invlpg_miss_ids)) {
++ pr_info("Incomplete global flushes, disabling PCID");
++ setup_clear_cpu_cap(X86_FEATURE_PCID);
++ return;
++ }
++
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
+ /*
+ * This can't be cr4_set_bits_and_update_boot() -- the
+@@ -787,9 +823,12 @@ void __init poking_init(void)
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+- poking_mm = copy_init_mm();
++ poking_mm = mm_alloc();
+ BUG_ON(!poking_mm);
+
++ /* Xen PV guests need the PGD to be pinned. */
++ paravirt_arch_dup_mmap(NULL, poking_mm);
++
+ /*
+ * Randomize the poking address, but make sure that the following page
+ * will be mapped at the same PMD. We need 2 pages, so find space for 3,
+diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
+index 36098226a9573..200ad5ceeb43f 100644
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -646,7 +646,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
+ pages++;
+ spin_lock(&init_mm.page_table_lock);
+
+- prot = __pgprot(pgprot_val(prot) | __PAGE_KERNEL_LARGE);
++ prot = __pgprot(pgprot_val(prot) | _PAGE_PSE);
+
+ set_pte_init((pte_t *)pud,
+ pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
+@@ -902,6 +902,8 @@ static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end
+
+ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
+ {
++ const unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
++
+ vmemmap_flush_unused_pmd();
+
+ /*
+@@ -914,8 +916,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
+ * Mark with PAGE_UNUSED the unused parts of the new memmap range
+ */
+ if (!IS_ALIGNED(start, PMD_SIZE))
+- memset((void *)start, PAGE_UNUSED,
+- start - ALIGN_DOWN(start, PMD_SIZE));
++ memset((void *)page, PAGE_UNUSED, start - page);
+
+ /*
+ * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
+diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
+index 60ade7dd71bd9..5dfa40279f0fd 100644
+--- a/arch/x86/mm/ioremap.c
++++ b/arch/x86/mm/ioremap.c
+@@ -216,9 +216,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size,
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+- phys_addr &= PHYSICAL_PAGE_MASK;
++ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
++ /*
++ * Mask out any bits not part of the actual physical
++ * address, like memory encryption bits.
++ */
++ phys_addr &= PHYSICAL_PAGE_MASK;
++
+ retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
+ pcm, &new_pcm);
+ if (retval) {
+@@ -614,6 +620,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr,
+ static bool memremap_is_setup_data(resource_size_t phys_addr,
+ unsigned long size)
+ {
++ struct setup_indirect *indirect;
+ struct setup_data *data;
+ u64 paddr, paddr_next;
+
+@@ -626,6 +633,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
+
+ data = memremap(paddr, sizeof(*data),
+ MEMREMAP_WB | MEMREMAP_DEC);
++ if (!data) {
++ pr_warn("failed to memremap setup_data entry\n");
++ return false;
++ }
+
+ paddr_next = data->next;
+ len = data->len;
+@@ -635,10 +646,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
+ return true;
+ }
+
+- if (data->type == SETUP_INDIRECT &&
+- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+- paddr = ((struct setup_indirect *)data->data)->addr;
+- len = ((struct setup_indirect *)data->data)->len;
++ if (data->type == SETUP_INDIRECT) {
++ memunmap(data);
++ data = memremap(paddr, sizeof(*data) + len,
++ MEMREMAP_WB | MEMREMAP_DEC);
++ if (!data) {
++ pr_warn("failed to memremap indirect setup_data\n");
++ return false;
++ }
++
++ indirect = (struct setup_indirect *)data->data;
++
++ if (indirect->type != SETUP_INDIRECT) {
++ paddr = indirect->addr;
++ len = indirect->len;
++ }
+ }
+
+ memunmap(data);
+@@ -659,22 +681,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
+ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
+ unsigned long size)
+ {
++ struct setup_indirect *indirect;
+ struct setup_data *data;
+ u64 paddr, paddr_next;
+
+ paddr = boot_params.hdr.setup_data;
+ while (paddr) {
+- unsigned int len;
++ unsigned int len, size;
+
+ if (phys_addr == paddr)
+ return true;
+
+ data = early_memremap_decrypted(paddr, sizeof(*data));
++ if (!data) {
++ pr_warn("failed to early memremap setup_data entry\n");
++ return false;
++ }
++
++ size = sizeof(*data);
+
+ paddr_next = data->next;
+ len = data->len;
+
+- early_memunmap(data, sizeof(*data));
++ if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
++ early_memunmap(data, sizeof(*data));
++ return true;
++ }
++
++ if (data->type == SETUP_INDIRECT) {
++ size += len;
++ early_memunmap(data, sizeof(*data));
++ data = early_memremap_decrypted(paddr, size);
++ if (!data) {
++ pr_warn("failed to early memremap indirect setup_data\n");
++ return false;
++ }
++
++ indirect = (struct setup_indirect *)data->data;
++
++ if (indirect->type != SETUP_INDIRECT) {
++ paddr = indirect->addr;
++ len = indirect->len;
++ }
++ }
++
++ early_memunmap(data, size);
+
+ if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
+ return true;
+diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
+index 557f0fe25dff4..37db264866b64 100644
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void)
+ set_p4d(p4d_tramp,
+ __p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
+
+- set_pgd(&trampoline_pgd_entry,
+- __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
++ trampoline_pgd_entry =
++ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp));
+ } else {
+- set_pgd(&trampoline_pgd_entry,
+- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
++ trampoline_pgd_entry =
++ __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
+ }
+ }
+diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
+index ff08dc4636347..e29b1418d00c7 100644
+--- a/arch/x86/mm/mem_encrypt.c
++++ b/arch/x86/mm/mem_encrypt.c
+@@ -20,6 +20,7 @@
+ #include <linux/bitops.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/virtio_config.h>
++#include <linux/cc_platform.h>
+
+ #include <asm/tlbflush.h>
+ #include <asm/fixmap.h>
+diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
+index 17d292b7072f1..9de3d900bc927 100644
+--- a/arch/x86/mm/mem_encrypt_boot.S
++++ b/arch/x86/mm/mem_encrypt_boot.S
+@@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute)
+ movq %rbp, %rsp /* Restore original stack pointer */
+ pop %rbp
+
++ /* Offset to __x86_return_thunk would be wrong here */
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ SYM_FUNC_END(sme_encrypt_execute)
+
+ SYM_FUNC_START(__enc_copy)
+@@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy)
+ pop %r12
+ pop %r15
+
++ /* Offset to __x86_return_thunk would be wrong here */
++ ANNOTATE_UNRET_SAFE
+ ret
++ int3
+ .L__enc_copy_end:
+ SYM_FUNC_END(__enc_copy)
+diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
+index 470b202084306..c7e9fb1d830d2 100644
+--- a/arch/x86/mm/mem_encrypt_identity.c
++++ b/arch/x86/mm/mem_encrypt_identity.c
+@@ -27,6 +27,15 @@
+ #undef CONFIG_PARAVIRT_XXL
+ #undef CONFIG_PARAVIRT_SPINLOCKS
+
++/*
++ * This code runs before CPU feature bits are set. By default, the
++ * pgtable_l5_enabled() function uses bit X86_FEATURE_LA57 to determine if
++ * 5-level paging is active, so that won't work here. USE_EARLY_PGTABLE_L5
++ * is provided to handle this situation and, instead, use a variable that
++ * has been set by the early boot code.
++ */
++#define USE_EARLY_PGTABLE_L5
++
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/mem_encrypt.h>
+@@ -576,7 +585,8 @@ void __init sme_enable(struct boot_params *bp)
+ cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+ ((u64)bp->ext_cmd_line_ptr << 32));
+
+- cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
++ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
++ return;
+
+ if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+ sme_me_mask = me_mask;
+diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
+index 1e9b93b088dbf..e360c6892a584 100644
+--- a/arch/x86/mm/numa.c
++++ b/arch/x86/mm/numa.c
+@@ -860,7 +860,7 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable)
+ return;
+ }
+ mask = node_to_cpumask_map[node];
+- if (!mask) {
++ if (!cpumask_available(mask)) {
+ pr_err("node_to_cpumask_map[%i] NULL\n", node);
+ dump_stack();
+ return;
+@@ -906,7 +906,7 @@ const struct cpumask *cpumask_of_node(int node)
+ dump_stack();
+ return cpu_none_mask;
+ }
+- if (node_to_cpumask_map[node] == NULL) {
++ if (!cpumask_available(node_to_cpumask_map[node])) {
+ printk(KERN_WARNING
+ "cpumask_of_node(%d): no node_to_cpumask_map!\n",
+ node);
+diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
+index 4ba2a3ee4bce1..d5ef64ddd35e9 100644
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@ -101,7 +101,7 @@ int pat_debug_enable;
+ static int __init pat_debug_setup(char *str)
+ {
+ pat_debug_enable = 1;
+- return 0;
++ return 1;
+ }
+ __setup("debugpat", pat_debug_setup);
+
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 59ba2968af1b3..511172d70825c 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -854,13 +854,11 @@ done:
+ nr_invalidate);
+ }
+
+-static bool tlb_is_not_lazy(int cpu)
++static bool tlb_is_not_lazy(int cpu, void *data)
+ {
+ return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
+ }
+
+-static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
+-
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
+ EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
+
+@@ -889,36 +887,11 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
+ * up on the new contents of what used to be page tables, while
+ * doing a speculative memory access.
+ */
+- if (info->freed_tables) {
++ if (info->freed_tables)
+ on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
+- } else {
+- /*
+- * Although we could have used on_each_cpu_cond_mask(),
+- * open-coding it has performance advantages, as it eliminates
+- * the need for indirect calls or retpolines. In addition, it
+- * allows to use a designated cpumask for evaluating the
+- * condition, instead of allocating one.
+- *
+- * This code works under the assumption that there are no nested
+- * TLB flushes, an assumption that is already made in
+- * flush_tlb_mm_range().
+- *
+- * cond_cpumask is logically a stack-local variable, but it is
+- * more efficient to have it off the stack and not to allocate
+- * it on demand. Preemption is disabled and this code is
+- * non-reentrant.
+- */
+- struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask);
+- int cpu;
+-
+- cpumask_clear(cond_cpumask);
+-
+- for_each_cpu(cpu, cpumask) {
+- if (tlb_is_not_lazy(cpu))
+- __cpumask_set_cpu(cpu, cond_cpumask);
+- }
+- on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true);
+- }
++ else
++ on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
++ (void *)info, 1, cpumask);
+ }
+
+ void flush_tlb_multi(const struct cpumask *cpumask,
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 9ea57389c554b..dccaab2113f93 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -15,7 +15,6 @@
+ #include <asm/set_memory.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/text-patching.h>
+-#include <asm/asm-prototypes.h>
+
+ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+ {
+@@ -225,6 +224,14 @@ static void jit_fill_hole(void *area, unsigned int size)
+
+ struct jit_context {
+ int cleanup_addr; /* Epilogue code offset */
++
++ /*
++ * Program specific offsets of labels in the code; these rely on the
++ * JIT doing at least 2 passes, recording the position on the first
++ * pass, only to generate the correct offset on the second pass.
++ */
++ int tail_call_direct_label;
++ int tail_call_indirect_label;
+ };
+
+ /* Maximum number of bytes emitted while JITing one eBPF insn */
+@@ -380,20 +387,38 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+ }
+
+-static int get_pop_bytes(bool *callee_regs_used)
++#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
++
++static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
+ {
+- int bytes = 0;
++ u8 *prog = *pprog;
+
+- if (callee_regs_used[3])
+- bytes += 2;
+- if (callee_regs_used[2])
+- bytes += 2;
+- if (callee_regs_used[1])
+- bytes += 2;
+- if (callee_regs_used[0])
+- bytes += 1;
++#ifdef CONFIG_RETPOLINE
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
++ EMIT_LFENCE();
++ EMIT2(0xFF, 0xE0 + reg);
++ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
++ emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
++ } else
++#endif
++ EMIT2(0xFF, 0xE0 + reg);
++
++ *pprog = prog;
++}
++
++static void emit_return(u8 **pprog, u8 *ip)
++{
++ u8 *prog = *pprog;
++
++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
++ emit_jump(&prog, &__x86_return_thunk, ip);
++ } else {
++ EMIT1(0xC3); /* ret */
++ if (IS_ENABLED(CONFIG_SLS))
++ EMIT1(0xCC); /* int3 */
++ }
+
+- return bytes;
++ *pprog = prog;
+ }
+
+ /*
+@@ -411,29 +436,12 @@ static int get_pop_bytes(bool *callee_regs_used)
+ * out:
+ */
+ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+- u32 stack_depth)
++ u32 stack_depth, u8 *ip,
++ struct jit_context *ctx)
+ {
+ int tcc_off = -4 - round_up(stack_depth, 8);
+- u8 *prog = *pprog;
+- int pop_bytes = 0;
+- int off1 = 42;
+- int off2 = 31;
+- int off3 = 9;
+-
+- /* count the additional bytes used for popping callee regs from stack
+- * that need to be taken into account for each of the offsets that
+- * are used for bailing out of the tail call
+- */
+- pop_bytes = get_pop_bytes(callee_regs_used);
+- off1 += pop_bytes;
+- off2 += pop_bytes;
+- off3 += pop_bytes;
+-
+- if (stack_depth) {
+- off1 += 7;
+- off2 += 7;
+- off3 += 7;
+- }
++ u8 *prog = *pprog, *start = *pprog;
++ int offset;
+
+ /*
+ * rdi - pointer to ctx
+@@ -448,8 +456,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+ EMIT2(0x89, 0xD2); /* mov edx, edx */
+ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
+ offsetof(struct bpf_array, map.max_entries));
+-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
+- EMIT2(X86_JBE, OFFSET1); /* jbe out */
++
++ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++ EMIT2(X86_JBE, offset); /* jbe out */
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+@@ -457,8 +466,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+ */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
+- EMIT2(X86_JA, OFFSET2); /* ja out */
++
++ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++ EMIT2(X86_JA, offset); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
+
+@@ -471,12 +481,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+ * goto out;
+ */
+ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
+-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
+- EMIT2(X86_JE, OFFSET3); /* je out */
+
+- *pprog = prog;
+- pop_callee_regs(pprog, callee_regs_used);
+- prog = *pprog;
++ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
++ EMIT2(X86_JE, offset); /* je out */
++
++ pop_callee_regs(&prog, callee_regs_used);
+
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+@@ -493,41 +502,21 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+ * rdi == ctx (1st arg)
+ * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
+ */
+- RETPOLINE_RCX_BPF_JIT();
++ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
+
+ /* out: */
++ ctx->tail_call_indirect_label = prog - start;
+ *pprog = prog;
+ }
+
+ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+- u8 **pprog, int addr, u8 *image,
+- bool *callee_regs_used, u32 stack_depth)
++ u8 **pprog, u8 *ip,
++ bool *callee_regs_used, u32 stack_depth,
++ struct jit_context *ctx)
+ {
+ int tcc_off = -4 - round_up(stack_depth, 8);
+- u8 *prog = *pprog;
+- int pop_bytes = 0;
+- int off1 = 20;
+- int poke_off;
+-
+- /* count the additional bytes used for popping callee regs to stack
+- * that need to be taken into account for jump offset that is used for
+- * bailing out from of the tail call when limit is reached
+- */
+- pop_bytes = get_pop_bytes(callee_regs_used);
+- off1 += pop_bytes;
+-
+- /*
+- * total bytes for:
+- * - nop5/ jmpq $off
+- * - pop callee regs
+- * - sub rsp, $val if depth > 0
+- * - pop rax
+- */
+- poke_off = X86_PATCH_SIZE + pop_bytes + 1;
+- if (stack_depth) {
+- poke_off += 7;
+- off1 += 7;
+- }
++ u8 *prog = *pprog, *start = *pprog;
++ int offset;
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+@@ -535,28 +524,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+ */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+- EMIT2(X86_JA, off1); /* ja out */
++
++ offset = ctx->tail_call_direct_label - (prog + 2 - start);
++ EMIT2(X86_JA, offset); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
+
+- poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
++ poke->tailcall_bypass = ip + (prog - start);
+ poke->adj_off = X86_TAIL_CALL_OFFSET;
+- poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
++ poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
+ poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
+
+ emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
+ poke->tailcall_bypass);
+
+- *pprog = prog;
+- pop_callee_regs(pprog, callee_regs_used);
+- prog = *pprog;
++ pop_callee_regs(&prog, callee_regs_used);
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
+
+ memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
+ prog += X86_PATCH_SIZE;
++
+ /* out: */
++ ctx->tail_call_direct_label = prog - start;
+
+ *pprog = prog;
+ }
+@@ -721,6 +712,20 @@ static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
+ *pprog = prog;
+ }
+
++/*
++ * Similar version of maybe_emit_mod() for a single register
++ */
++static void maybe_emit_1mod(u8 **pprog, u32 reg, bool is64)
++{
++ u8 *prog = *pprog;
++
++ if (is64)
++ EMIT1(add_1mod(0x48, reg));
++ else if (is_ereg(reg))
++ EMIT1(add_1mod(0x40, reg));
++ *pprog = prog;
++}
++
+ /* LDX: dst_reg = *(u8*)(src_reg + off) */
+ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+ {
+@@ -827,9 +832,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
+ return 0;
+ }
+
+-static bool ex_handler_bpf(const struct exception_table_entry *x,
+- struct pt_regs *regs, int trapnr,
+- unsigned long error_code, unsigned long fault_addr)
++bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
+ {
+ u32 reg = x->fixup >> 8;
+
+@@ -951,10 +954,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ /* neg dst */
+ case BPF_ALU | BPF_NEG:
+ case BPF_ALU64 | BPF_NEG:
+- if (BPF_CLASS(insn->code) == BPF_ALU64)
+- EMIT1(add_1mod(0x48, dst_reg));
+- else if (is_ereg(dst_reg))
+- EMIT1(add_1mod(0x40, dst_reg));
++ maybe_emit_1mod(&prog, dst_reg,
++ BPF_CLASS(insn->code) == BPF_ALU64);
+ EMIT2(0xF7, add_1reg(0xD8, dst_reg));
+ break;
+
+@@ -968,10 +969,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+- if (BPF_CLASS(insn->code) == BPF_ALU64)
+- EMIT1(add_1mod(0x48, dst_reg));
+- else if (is_ereg(dst_reg))
+- EMIT1(add_1mod(0x40, dst_reg));
++ maybe_emit_1mod(&prog, dst_reg,
++ BPF_CLASS(insn->code) == BPF_ALU64);
+
+ /*
+ * b3 holds 'normal' opcode, b2 short form only valid
+@@ -1112,10 +1111,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+- if (BPF_CLASS(insn->code) == BPF_ALU64)
+- EMIT1(add_1mod(0x48, dst_reg));
+- else if (is_ereg(dst_reg))
+- EMIT1(add_1mod(0x40, dst_reg));
++ maybe_emit_1mod(&prog, dst_reg,
++ BPF_CLASS(insn->code) == BPF_ALU64);
+
+ b3 = simple_alu_opcodes[BPF_OP(insn->code)];
+ if (imm32 == 1)
+@@ -1146,10 +1143,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ }
+
+ /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
+- if (BPF_CLASS(insn->code) == BPF_ALU64)
+- EMIT1(add_1mod(0x48, dst_reg));
+- else if (is_ereg(dst_reg))
+- EMIT1(add_1mod(0x40, dst_reg));
++ maybe_emit_1mod(&prog, dst_reg,
++ BPF_CLASS(insn->code) == BPF_ALU64);
+
+ b3 = simple_alu_opcodes[BPF_OP(insn->code)];
+ EMIT2(0xD3, add_1reg(b3, dst_reg));
+@@ -1222,8 +1217,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+- /* Emit 'lfence' */
+- EMIT3(0x0F, 0xAE, 0xE8);
++ EMIT_LFENCE();
+ break;
+
+ /* ST: *(u8*)(dst_reg + off) = imm */
+@@ -1274,19 +1268,54 @@ st: if (is_imm8(insn->off))
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+- /* test src_reg, src_reg */
+- maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */
+- EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg));
+- /* jne start_of_ldx */
+- EMIT2(X86_JNE, 0);
++ /* Though the verifier prevents negative insn->off in BPF_PROBE_MEM
++ * add abs(insn->off) to the limit to make sure that negative
++ * offset won't be an issue.
++ * insn->off is s16, so it won't affect valid pointers.
++ */
++ u64 limit = TASK_SIZE_MAX + PAGE_SIZE + abs(insn->off);
++ u8 *end_of_jmp1, *end_of_jmp2;
++
++ /* Conservatively check that src_reg + insn->off is a kernel address:
++ * 1. src_reg + insn->off >= limit
++ * 2. src_reg + insn->off doesn't become small positive.
++ * Cannot do src_reg + insn->off >= limit in one branch,
++ * since it needs two spare registers, but JIT has only one.
++ */
++
++ /* movabsq r11, limit */
++ EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
++ EMIT((u32)limit, 4);
++ EMIT(limit >> 32, 4);
++ /* cmp src_reg, r11 */
++ maybe_emit_mod(&prog, src_reg, AUX_REG, true);
++ EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
++ /* if unsigned '<' goto end_of_jmp2 */
++ EMIT2(X86_JB, 0);
++ end_of_jmp1 = prog;
++
++ /* mov r11, src_reg */
++ emit_mov_reg(&prog, true, AUX_REG, src_reg);
++ /* add r11, insn->off */
++ maybe_emit_1mod(&prog, AUX_REG, true);
++ EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
++ /* jmp if not carry to start_of_ldx
++ * Otherwise ERR_PTR(-EINVAL) + 128 will be the user addr
++ * that has to be rejected.
++ */
++ EMIT2(0x73 /* JNC */, 0);
++ end_of_jmp2 = prog;
++
+ /* xor dst_reg, dst_reg */
+ emit_mov_imm32(&prog, false, dst_reg, 0);
+ /* jmp byte_after_ldx */
+ EMIT2(0xEB, 0);
+
+- /* populate jmp_offset for JNE above */
+- temp[4] = prog - temp - 5 /* sizeof(test + jne) */;
++ /* populate jmp_offset for JB above to jump to xor dst_reg */
++ end_of_jmp1[-1] = end_of_jmp2 - end_of_jmp1;
++ /* populate jmp_offset for JNC above to jump to start_of_ldx */
+ start_of_ldx = prog;
++ end_of_jmp2[-1] = start_of_ldx - end_of_jmp2;
+ }
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+@@ -1313,12 +1342,7 @@ st: if (is_imm8(insn->off))
+ }
+ ex->insn = delta;
+
+- delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+- if (!is_simm32(delta)) {
+- pr_err("extable->handler doesn't fit into 32-bit\n");
+- return -EFAULT;
+- }
+- ex->handler = delta;
++ ex->data = EX_TYPE_BPF;
+
+ if (dst_reg > BPF_REG_9) {
+ pr_err("verifier error\n");
+@@ -1332,7 +1356,7 @@ st: if (is_imm8(insn->off))
+ * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+ * of 4 bytes will be ignored and rbx will be zero inited.
+ */
+- ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
++ ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
+ }
+ break;
+
+@@ -1399,8 +1423,9 @@ st: if (is_imm8(insn->off))
+ case BPF_JMP | BPF_CALL:
+ func = (u8 *) __bpf_call_base + imm32;
+ if (tail_call_reachable) {
++ /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
+ EMIT3_off32(0x48, 0x8B, 0x85,
+- -(bpf_prog->aux->stack_depth + 8));
++ -round_up(bpf_prog->aux->stack_depth, 8) - 8);
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
+ return -EINVAL;
+ } else {
+@@ -1412,13 +1437,16 @@ st: if (is_imm8(insn->off))
+ case BPF_JMP | BPF_TAIL_CALL:
+ if (imm32)
+ emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+- &prog, addrs[i], image,
++ &prog, image + addrs[i - 1],
+ callee_regs_used,
+- bpf_prog->aux->stack_depth);
++ bpf_prog->aux->stack_depth,
++ ctx);
+ else
+ emit_bpf_tail_call_indirect(&prog,
+ callee_regs_used,
+- bpf_prog->aux->stack_depth);
++ bpf_prog->aux->stack_depth,
++ image + addrs[i - 1],
++ ctx);
+ break;
+
+ /* cond jump */
+@@ -1459,10 +1487,8 @@ st: if (is_imm8(insn->off))
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ /* test dst_reg, imm32 */
+- if (BPF_CLASS(insn->code) == BPF_JMP)
+- EMIT1(add_1mod(0x48, dst_reg));
+- else if (is_ereg(dst_reg))
+- EMIT1(add_1mod(0x40, dst_reg));
++ maybe_emit_1mod(&prog, dst_reg,
++ BPF_CLASS(insn->code) == BPF_JMP);
+ EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
+ goto emit_cond_jmp;
+
+@@ -1495,10 +1521,8 @@ st: if (is_imm8(insn->off))
+ }
+
+ /* cmp dst_reg, imm8/32 */
+- if (BPF_CLASS(insn->code) == BPF_JMP)
+- EMIT1(add_1mod(0x48, dst_reg));
+- else if (is_ereg(dst_reg))
+- EMIT1(add_1mod(0x40, dst_reg));
++ maybe_emit_1mod(&prog, dst_reg,
++ BPF_CLASS(insn->code) == BPF_JMP);
+
+ if (is_imm8(imm32))
+ EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
+@@ -1665,7 +1689,7 @@ emit_jmp:
+ ctx->cleanup_addr = proglen;
+ pop_callee_regs(&prog, callee_regs_used);
+ EMIT1(0xC9); /* leave */
+- EMIT1(0xC3); /* ret */
++ emit_return(&prog, image + addrs[i - 1] + (prog - temp));
+ break;
+
+ default:
+@@ -2111,7 +2135,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip our return address and return to parent */
+ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+- EMIT1(0xC3); /* ret */
++ emit_return(&prog, prog);
+ /* Make sure the trampoline generation logic doesn't overflow */
+ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
+ ret = -EFAULT;
+@@ -2124,24 +2148,6 @@ cleanup:
+ return ret;
+ }
+
+-static int emit_fallback_jump(u8 **pprog)
+-{
+- u8 *prog = *pprog;
+- int err = 0;
+-
+-#ifdef CONFIG_RETPOLINE
+- /* Note that this assumes the the compiler uses external
+- * thunks for indirect calls. Both clang and GCC use the same
+- * naming convention for external thunks.
+- */
+- err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
+-#else
+- EMIT2(0xFF, 0xE2); /* jmp rdx */
+-#endif
+- *pprog = prog;
+- return err;
+-}
+-
+ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
+ {
+ u8 *jg_reloc, *prog = *pprog;
+@@ -2163,9 +2169,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
+ if (err)
+ return err;
+
+- err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
+- if (err)
+- return err;
++ emit_indirect_jump(&prog, 2 /* rdx */, prog);
+
+ *pprog = prog;
+ return 0;
+diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
+index 3bfda5f502cb8..da9b7cfa46329 100644
+--- a/arch/x86/net/bpf_jit_comp32.c
++++ b/arch/x86/net/bpf_jit_comp32.c
+@@ -15,6 +15,7 @@
+ #include <asm/cacheflush.h>
+ #include <asm/set_memory.h>
+ #include <asm/nospec-branch.h>
++#include <asm/asm-prototypes.h>
+ #include <linux/bpf.h>
+
+ /*
+@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
+ *pprog = prog;
+ }
+
++static int emit_jmp_edx(u8 **pprog, u8 *ip)
++{
++ u8 *prog = *pprog;
++ int cnt = 0;
++
++#ifdef CONFIG_RETPOLINE
++ EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
++#else
++ EMIT2(0xFF, 0xE2);
++#endif
++ *pprog = prog;
++
++ return cnt;
++}
++
+ /*
+ * Generate the following code:
+ * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
+@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
+ * goto *(prog->bpf_func + prologue_size);
+ * out:
+ */
+-static void emit_bpf_tail_call(u8 **pprog)
++static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
+ {
+ u8 *prog = *pprog;
+ int cnt = 0;
+@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog)
+ * eax == ctx (1st arg)
+ * edx == prog->bpf_func + prologue_size
+ */
+- RETPOLINE_EDX_BPF_JIT();
++ cnt += emit_jmp_edx(&prog, ip + cnt);
+
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+@@ -2122,7 +2138,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ break;
+ }
+ case BPF_JMP | BPF_TAIL_CALL:
+- emit_bpf_tail_call(&prog);
++ emit_bpf_tail_call(&prog, image + addrs[i - 1]);
+ break;
+
+ /* cond jump */
+diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
+index 2edd86649468f..bf5161dcf89e7 100644
+--- a/arch/x86/pci/fixup.c
++++ b/arch/x86/pci/fixup.c
+@@ -7,6 +7,7 @@
+ #include <linux/dmi.h>
+ #include <linux/pci.h>
+ #include <linux/vgaarb.h>
++#include <asm/amd_nb.h>
+ #include <asm/hpet.h>
+ #include <asm/pci_x86.h>
+
+@@ -353,8 +354,8 @@ static void pci_fixup_video(struct pci_dev *pdev)
+ }
+ }
+ }
+-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
++DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
++ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+
+
+ static const struct dmi_system_id msi_k8t_dmi_table[] = {
+@@ -824,3 +825,23 @@ static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
+ #endif
++
++#ifdef CONFIG_AMD_NB
++
++#define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008
++#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L
++
++static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev)
++{
++ u32 data;
++
++ if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) {
++ data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK;
++ if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data))
++ pci_err(dev, "Failed to write data 0x%x\n", data);
++ } else {
++ pci_err(dev, "Failed to read data\n");
++ }
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0);
++#endif
+diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
+index 97b63e35e1528..21c4bc41741fe 100644
+--- a/arch/x86/pci/irq.c
++++ b/arch/x86/pci/irq.c
+@@ -253,6 +253,15 @@ static void write_pc_conf_nybble(u8 base, u8 index, u8 val)
+ pc_conf_set(reg, x);
+ }
+
++/*
++ * FinALi pirq rules are as follows:
++ *
++ * - bit 0 selects between INTx Routing Table Mapping Registers,
++ *
++ * - bit 3 selects the nibble within the INTx Routing Table Mapping Register,
++ *
++ * - bits 7:4 map to bits 3:0 of the PCI INTx Sensitivity Register.
++ */
+ static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
+ int pirq)
+ {
+@@ -260,11 +269,13 @@ static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev,
+ 0, 9, 3, 10, 4, 5, 7, 6, 0, 11, 0, 12, 0, 14, 0, 15
+ };
+ unsigned long flags;
++ u8 index;
+ u8 x;
+
++ index = (pirq & 1) << 1 | (pirq & 8) >> 3;
+ raw_spin_lock_irqsave(&pc_conf_lock, flags);
+ pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
+- x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1)];
++ x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index)];
+ pc_conf_set(PC_CONF_FINALI_LOCK, 0);
+ raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
+ return x;
+@@ -278,13 +289,15 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
+ };
+ u8 val = irqmap[irq];
+ unsigned long flags;
++ u8 index;
+
+ if (!val)
+ return 0;
+
++ index = (pirq & 1) << 1 | (pirq & 8) >> 3;
+ raw_spin_lock_irqsave(&pc_conf_lock, flags);
+ pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY);
+- write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1, val);
++ write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index, val);
+ pc_conf_set(PC_CONF_FINALI_LOCK, 0);
+ raw_spin_unlock_irqrestore(&pc_conf_lock, flags);
+ return 1;
+@@ -293,7 +306,7 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev,
+ static int pirq_finali_lvl(struct pci_dev *router, struct pci_dev *dev,
+ int pirq, int irq)
+ {
+- u8 mask = ~(1u << (pirq - 1));
++ u8 mask = ~((pirq & 0xf0u) >> 4);
+ unsigned long flags;
+ u8 trig;
+
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 5debe4ac6f819..f153e9ab8c966 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -472,7 +472,6 @@ static __init void xen_setup_pci_msi(void)
+ xen_msi_ops.setup_msi_irqs = xen_setup_msi_irqs;
+ }
+ xen_msi_ops.teardown_msi_irqs = xen_pv_teardown_msi_irqs;
+- pci_msi_ignore_mask = 1;
+ } else if (xen_hvm_domain()) {
+ xen_msi_ops.setup_msi_irqs = xen_hvm_setup_msi_irqs;
+ xen_msi_ops.teardown_msi_irqs = xen_teardown_msi_irqs;
+@@ -486,6 +485,11 @@ static __init void xen_setup_pci_msi(void)
+ * in allocating the native domain and never use it.
+ */
+ x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain;
++ /*
++ * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
++ * controlled by the hypervisor.
++ */
++ pci_msi_ignore_mask = 1;
+ }
+
+ #else /* CONFIG_PCI_MSI */
+diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
+index 09ec84f6ef517..f3cfdb1c9a359 100644
+--- a/arch/x86/platform/efi/efi_stub_32.S
++++ b/arch/x86/platform/efi/efi_stub_32.S
+@@ -56,5 +56,5 @@ SYM_FUNC_START(efi_call_svam)
+
+ movl 16(%esp), %ebx
+ leave
+- ret
++ RET
+ SYM_FUNC_END(efi_call_svam)
+diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
+index 90380a17ab238..2206b8bc47b8a 100644
+--- a/arch/x86/platform/efi/efi_stub_64.S
++++ b/arch/x86/platform/efi/efi_stub_64.S
+@@ -23,5 +23,5 @@ SYM_FUNC_START(__efi_call)
+ mov %rsi, %rcx
+ CALL_NOSPEC rdi
+ leave
+- ret
++ RET
+ SYM_FUNC_END(__efi_call)
+diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
+index fd3dd1708eba5..a7ffe30e86143 100644
+--- a/arch/x86/platform/efi/efi_thunk_64.S
++++ b/arch/x86/platform/efi/efi_thunk_64.S
+@@ -22,6 +22,7 @@
+ #include <linux/linkage.h>
+ #include <asm/page_types.h>
+ #include <asm/segment.h>
++#include <asm/nospec-branch.h>
+
+ .text
+ .code64
+@@ -63,7 +64,9 @@ SYM_CODE_START(__efi64_thunk)
+ 1: movq 24(%rsp), %rsp
+ pop %rbx
+ pop %rbp
+- retq
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
+
+ .code32
+ 2: pushl $__KERNEL_CS
+diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
+index b15ebfe40a73e..b0b848d6933af 100644
+--- a/arch/x86/platform/efi/quirks.c
++++ b/arch/x86/platform/efi/quirks.c
+@@ -277,7 +277,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
+ return;
+ }
+
+- new = early_memremap(data.phys_map, data.size);
++ new = early_memremap_prot(data.phys_map, data.size,
++ pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL)));
+ if (!new) {
+ pr_err("Failed to map new boot services memmap\n");
+ return;
+diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
+index f03a6883dcc6d..89f25af4b3c33 100644
+--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
++++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
+@@ -80,7 +80,7 @@ static void send_ebook_state(void)
+ return;
+ }
+
+- if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state)
++ if (test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == !!state)
+ return; /* Nothing new to report. */
+
+ input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state);
+diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S
+index 75f4faff84682..3a5abffe5660d 100644
+--- a/arch/x86/platform/olpc/xo1-wakeup.S
++++ b/arch/x86/platform/olpc/xo1-wakeup.S
+@@ -77,7 +77,7 @@ save_registers:
+ pushfl
+ popl saved_context_eflags
+
+- ret
++ RET
+
+ restore_registers:
+ movl saved_context_ebp, %ebp
+@@ -88,7 +88,7 @@ restore_registers:
+ pushl saved_context_eflags
+ popfl
+
+- ret
++ RET
+
+ SYM_CODE_START(do_olpc_suspend_lowlevel)
+ call save_processor_state
+@@ -109,7 +109,7 @@ ret_point:
+
+ call restore_registers
+ call restore_processor_state
+- ret
++ RET
+ SYM_CODE_END(do_olpc_suspend_lowlevel)
+
+ .data
+diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
+index 6665f88020983..f5133d620d4ef 100644
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -25,6 +25,7 @@
+ #include <asm/cpu.h>
+ #include <asm/mmu_context.h>
+ #include <asm/cpu_device_id.h>
++#include <asm/microcode.h>
+
+ #ifdef CONFIG_X86_32
+ __visible unsigned long saved_context_ebx;
+@@ -40,7 +41,8 @@ static void msr_save_context(struct saved_context *ctxt)
+ struct saved_msr *end = msr + ctxt->saved_msrs.num;
+
+ while (msr < end) {
+- msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
++ if (msr->valid)
++ rdmsrl(msr->info.msr_no, msr->info.reg.q);
+ msr++;
+ }
+ }
+@@ -261,11 +263,18 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
+ x86_platform.restore_sched_clock_state();
+ mtrr_bp_restore();
+ perf_restore_debug_store();
+- msr_restore_context(ctxt);
+
+ c = &cpu_data(smp_processor_id());
+ if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
+ init_ia32_feat_ctl(c);
++
++ microcode_bsp_resume();
++
++ /*
++ * This needs to happen after the microcode has been updated upon resume
++ * because some of the MSRs are "emulated" in microcode.
++ */
++ msr_restore_context(ctxt);
+ }
+
+ /* Needed by apm.c */
+@@ -424,8 +433,10 @@ static int msr_build_context(const u32 *msr_id, const int num)
+ }
+
+ for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
++ u64 dummy;
++
+ msr_array[i].info.msr_no = msr_id[j];
+- msr_array[i].valid = false;
++ msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy);
+ msr_array[i].info.reg.q = 0;
+ }
+ saved_msrs->num = total_num;
+@@ -500,10 +511,32 @@ static int pm_cpu_check(const struct x86_cpu_id *c)
+ return ret;
+ }
+
++static void pm_save_spec_msr(void)
++{
++ struct msr_enumeration {
++ u32 msr_no;
++ u32 feature;
++ } msr_enum[] = {
++ { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL },
++ { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL },
++ { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT },
++ { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL },
++ { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD },
++ { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC },
++ };
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(msr_enum); i++) {
++ if (boot_cpu_has(msr_enum[i].feature))
++ msr_build_context(&msr_enum[i].msr_no, 1);
++ }
++}
++
+ static int pm_check_save_msr(void)
+ {
+ dmi_check_system(msr_save_dmi_table);
+ pm_cpu_check(msr_save_cpu_table);
++ pm_save_spec_msr();
+
+ return 0;
+ }
+diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
+index 8786653ad3c06..5606a15cf9a17 100644
+--- a/arch/x86/power/hibernate_asm_32.S
++++ b/arch/x86/power/hibernate_asm_32.S
+@@ -32,7 +32,7 @@ SYM_FUNC_START(swsusp_arch_suspend)
+ FRAME_BEGIN
+ call swsusp_save
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(swsusp_arch_suspend)
+
+ SYM_CODE_START(restore_image)
+@@ -108,5 +108,5 @@ SYM_FUNC_START(restore_registers)
+ /* tell the hibernation core that we've just restored the memory */
+ movl %eax, in_suspend
+
+- ret
++ RET
+ SYM_FUNC_END(restore_registers)
+diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
+index d9bed596d849c..0a0539e1cc814 100644
+--- a/arch/x86/power/hibernate_asm_64.S
++++ b/arch/x86/power/hibernate_asm_64.S
+@@ -66,7 +66,7 @@ SYM_FUNC_START(restore_registers)
+ /* tell the hibernation core that we've just restored the memory */
+ movq %rax, in_suspend(%rip)
+
+- ret
++ RET
+ SYM_FUNC_END(restore_registers)
+
+ SYM_FUNC_START(swsusp_arch_suspend)
+@@ -96,7 +96,7 @@ SYM_FUNC_START(swsusp_arch_suspend)
+ FRAME_BEGIN
+ call swsusp_save
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(swsusp_arch_suspend)
+
+ SYM_FUNC_START(restore_image)
+diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
+index 95ea17a9d20cb..dc0b91c1db04b 100644
+--- a/arch/x86/purgatory/Makefile
++++ b/arch/x86/purgatory/Makefile
+@@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
+
+ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
++# When profile-guided optimization is enabled, llvm emits two different
++# overlapping text sections, which is not supported by kexec. Remove profile
++# optimization flags.
++KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
++
+ # When linking purgatory.ro with -r unresolved symbols are not checked,
+ # also link a purgatory.chk binary without -r to check for unresolved symbols.
+ PURGATORY_LDFLAGS := -e purgatory_start -nostdlib -z nodefaultlib
+@@ -64,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
+ CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
+ CFLAGS_string.o += $(PURGATORY_CFLAGS)
+
+-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
+-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
++asflags-remove-y += -g -Wa,-gdwarf-2
+
+ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
+index 31b5856010cba..1d20ed4b28729 100644
+--- a/arch/x86/realmode/init.c
++++ b/arch/x86/realmode/init.c
+@@ -17,6 +17,32 @@ u32 *trampoline_cr4_features;
+ /* Hold the pgd entry used on booting additional CPUs */
+ pgd_t trampoline_pgd_entry;
+
++void load_trampoline_pgtable(void)
++{
++#ifdef CONFIG_X86_32
++ load_cr3(initial_page_table);
++#else
++ /*
++ * This function is called before exiting to real-mode and that will
++ * fail with CR4.PCIDE still set.
++ */
++ if (boot_cpu_has(X86_FEATURE_PCID))
++ cr4_clear_bits(X86_CR4_PCIDE);
++
++ write_cr3(real_mode_header->trampoline_pgd);
++#endif
++
++ /*
++ * The CR3 write above will not flush global TLB entries.
++ * Stale, global entries from previous page tables may still be
++ * present. Flush those stale entries.
++ *
++ * This ensures that memory accessed while running with
++ * trampoline_pgd is *actually* mapped into trampoline_pgd.
++ */
++ __flush_tlb_all();
++}
++
+ void __init reserve_real_mode(void)
+ {
+ phys_addr_t mem;
+@@ -72,6 +98,7 @@ static void __init setup_real_mode(void)
+ #ifdef CONFIG_X86_64
+ u64 *trampoline_pgd;
+ u64 efer;
++ int i;
+ #endif
+
+ base = (unsigned char *)real_mode_header;
+@@ -128,8 +155,17 @@ static void __init setup_real_mode(void)
+ trampoline_header->flags = 0;
+
+ trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
++
++ /* Map the real mode stub as virtual == physical */
+ trampoline_pgd[0] = trampoline_pgd_entry.pgd;
+- trampoline_pgd[511] = init_top_pgt[511].pgd;
++
++ /*
++ * Include the entirety of the kernel mapping into the trampoline
++ * PGD. This way, all mappings present in the normal kernel page
++ * tables are usable while running on trampoline_pgd.
++ */
++ for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
++ trampoline_pgd[i] = init_top_pgt[i].pgd;
+ #endif
+
+ sme_sev_setup_real_mode(trampoline_header);
+diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
+index 5ccb18290d717..a8591ec8ae682 100644
+--- a/arch/x86/um/Makefile
++++ b/arch/x86/um/Makefile
+@@ -28,7 +28,8 @@ else
+
+ obj-y += syscalls_64.o vdso/
+
+-subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
++subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o
++subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o
+
+ endif
+
+diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
+index 13f118dec74f8..aed782ab77213 100644
+--- a/arch/x86/um/checksum_32.S
++++ b/arch/x86/um/checksum_32.S
+@@ -110,7 +110,7 @@ csum_partial:
+ 7:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+
+ #else
+
+@@ -208,7 +208,7 @@ csum_partial:
+ 80:
+ popl %ebx
+ popl %esi
+- ret
++ RET
+
+ #endif
+ EXPORT_SYMBOL(csum_partial)
+diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
+index 3ee234b6234dd..255a44dd415a9 100644
+--- a/arch/x86/um/ldt.c
++++ b/arch/x86/um/ldt.c
+@@ -23,9 +23,11 @@ static long write_ldt_entry(struct mm_id *mm_idp, int func,
+ {
+ long res;
+ void *stub_addr;
++
++ BUILD_BUG_ON(sizeof(*desc) % sizeof(long));
++
+ res = syscall_stub_data(mm_idp, (unsigned long *)desc,
+- (sizeof(*desc) + sizeof(long) - 1) &
+- ~(sizeof(long) - 1),
++ sizeof(*desc) / sizeof(long),
+ addr, &stub_addr);
+ if (!res) {
+ unsigned long args[] = { func,
+diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S
+index 62eaf8c80e041..2d991ddbcca57 100644
+--- a/arch/x86/um/setjmp_32.S
++++ b/arch/x86/um/setjmp_32.S
+@@ -34,7 +34,7 @@ kernel_setjmp:
+ movl %esi,12(%edx)
+ movl %edi,16(%edx)
+ movl %ecx,20(%edx) # Return address
+- ret
++ RET
+
+ .size kernel_setjmp,.-kernel_setjmp
+
+diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S
+index 1b5d40d4ff46d..b46acb6a8ebd8 100644
+--- a/arch/x86/um/setjmp_64.S
++++ b/arch/x86/um/setjmp_64.S
+@@ -33,7 +33,7 @@ kernel_setjmp:
+ movq %r14,40(%rdi)
+ movq %r15,48(%rdi)
+ movq %rsi,56(%rdi) # Return address
+- ret
++ RET
+
+ .size kernel_setjmp,.-kernel_setjmp
+
+diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
+index 68fd2cf526fd7..f6e9f84397e79 100644
+--- a/arch/x86/um/shared/sysdep/syscalls_32.h
++++ b/arch/x86/um/shared/sysdep/syscalls_32.h
+@@ -6,10 +6,9 @@
+ #include <asm/unistd.h>
+ #include <sysdep/ptrace.h>
+
+-typedef long syscall_handler_t(struct pt_regs);
++typedef long syscall_handler_t(struct syscall_args);
+
+ extern syscall_handler_t *sys_call_table[];
+
+ #define EXECUTE_SYSCALL(syscall, regs) \
+- ((long (*)(struct syscall_args)) \
+- (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
++ ((*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
+diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h
+index 8a7d5e1da98e5..1e6875b4ffd83 100644
+--- a/arch/x86/um/shared/sysdep/syscalls_64.h
++++ b/arch/x86/um/shared/sysdep/syscalls_64.h
+@@ -10,13 +10,12 @@
+ #include <linux/msg.h>
+ #include <linux/shm.h>
+
+-typedef long syscall_handler_t(void);
++typedef long syscall_handler_t(long, long, long, long, long, long);
+
+ extern syscall_handler_t *sys_call_table[];
+
+ #define EXECUTE_SYSCALL(syscall, regs) \
+- (((long (*)(long, long, long, long, long, long)) \
+- (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
++ (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
+ UPT_SYSCALL_ARG2(&regs->regs), \
+ UPT_SYSCALL_ARG3(&regs->regs), \
+ UPT_SYSCALL_ARG4(&regs->regs), \
+diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
+index 58f51667e2e4b..8249685b40960 100644
+--- a/arch/x86/um/syscalls_64.c
++++ b/arch/x86/um/syscalls_64.c
+@@ -11,6 +11,7 @@
+ #include <linux/uaccess.h>
+ #include <asm/prctl.h> /* XXX This should get the constants from libc */
+ #include <os.h>
++#include <registers.h>
+
+ long arch_prctl(struct task_struct *task, int option,
+ unsigned long __user *arg2)
+@@ -35,7 +36,7 @@ long arch_prctl(struct task_struct *task, int option,
+ switch (option) {
+ case ARCH_SET_FS:
+ case ARCH_SET_GS:
+- ret = restore_registers(pid, &current->thread.regs.regs);
++ ret = restore_pid_registers(pid, &current->thread.regs.regs);
+ if (ret)
+ return ret;
+ break;
+diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
+index ac8eee093f9cd..66162eafd8e8f 100644
+--- a/arch/x86/um/tls_32.c
++++ b/arch/x86/um/tls_32.c
+@@ -65,9 +65,6 @@ static int get_free_idx(struct task_struct* task)
+ struct thread_struct *t = &task->thread;
+ int idx;
+
+- if (!t->arch.tls_array)
+- return GDT_ENTRY_TLS_MIN;
+-
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (!t->arch.tls_array[idx].present)
+ return idx + GDT_ENTRY_TLS_MIN;
+@@ -240,9 +237,6 @@ static int get_tls_entry(struct task_struct *task, struct user_desc *info,
+ {
+ struct thread_struct *t = &task->thread;
+
+- if (!t->arch.tls_array)
+- goto clear;
+-
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
+index 5943387e3f357..5ca366e15c767 100644
+--- a/arch/x86/um/vdso/Makefile
++++ b/arch/x86/um/vdso/Makefile
+@@ -62,7 +62,7 @@ quiet_cmd_vdso = VDSO $@
+ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
+ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
+
+-VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
++VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack
+ GCOV_PROFILE := n
+
+ #
+diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
+index 2112b8d146688..ff0f3b4b6c45e 100644
+--- a/arch/x86/um/vdso/um_vdso.c
++++ b/arch/x86/um/vdso/um_vdso.c
+@@ -17,8 +17,10 @@ int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts)
+ {
+ long ret;
+
+- asm("syscall" : "=a" (ret) :
+- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
++ asm("syscall"
++ : "=a" (ret)
++ : "0" (__NR_clock_gettime), "D" (clock), "S" (ts)
++ : "rcx", "r11", "memory");
+
+ return ret;
+ }
+@@ -29,8 +31,10 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
+ {
+ long ret;
+
+- asm("syscall" : "=a" (ret) :
+- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
++ asm("syscall"
++ : "=a" (ret)
++ : "0" (__NR_gettimeofday), "D" (tv), "S" (tz)
++ : "rcx", "r11", "memory");
+
+ return ret;
+ }
+diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
+index 4953260e281c3..40b5779fce21c 100644
+--- a/arch/x86/xen/Makefile
++++ b/arch/x86/xen/Makefile
+@@ -45,7 +45,7 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
+
+ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
+
+-obj-$(CONFIG_XEN_PV_DOM0) += vga.o
++obj-$(CONFIG_XEN_DOM0) += vga.o
+
+ obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
+
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index a7b7d674f5005..998db0257e2ad 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -759,6 +759,7 @@ static void xen_load_idt(const struct desc_ptr *desc)
+ {
+ static DEFINE_SPINLOCK(lock);
+ static struct trap_info traps[257];
++ static const struct trap_info zero = { };
+ unsigned out;
+
+ trace_xen_cpu_load_idt(desc);
+@@ -768,7 +769,7 @@ static void xen_load_idt(const struct desc_ptr *desc)
+ memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
+
+ out = xen_convert_trap_info(desc, traps, false);
+- memset(&traps[out], 0, sizeof(traps[0]));
++ traps[out] = zero;
+
+ xen_mc_flush();
+ if (HYPERVISOR_set_trap_table(traps))
+@@ -1352,7 +1353,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
+
+ x86_platform.set_legacy_features =
+ xen_dom0_set_legacy_features;
+- xen_init_vga(info, xen_start_info->console.dom0.info_size);
++ xen_init_vga(info, xen_start_info->console.dom0.info_size,
++ &boot_params.screen_info);
+ xen_start_info->console.domU.mfn = 0;
+ xen_start_info->console.domU.evtchn = 0;
+
+@@ -1364,10 +1366,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
+
+ xen_acpi_sleep_register();
+
+- /* Avoid searching for BIOS MP tables */
+- x86_init.mpparse.find_smp_config = x86_init_noop;
+- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+-
+ xen_boot_params_init_edd();
+
+ #ifdef CONFIG_ACPI
+diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
+index bcae606bbc5cf..ada3868c02c23 100644
+--- a/arch/x86/xen/enlighten_pvh.c
++++ b/arch/x86/xen/enlighten_pvh.c
+@@ -43,6 +43,19 @@ void __init xen_pvh_init(struct boot_params *boot_params)
+ x86_init.oem.banner = xen_banner;
+
+ xen_efi_init(boot_params);
++
++ if (xen_initial_domain()) {
++ struct xen_platform_op op = {
++ .cmd = XENPF_get_dom0_console,
++ };
++ int ret = HYPERVISOR_platform_op(&op);
++
++ if (ret > 0)
++ xen_init_vga(&op.u.dom0_console,
++ min(ret * sizeof(char),
++ sizeof(op.u.dom0_console)),
++ &boot_params->screen_info);
++ }
+ }
+
+ void __init mem_map_via_hcall(struct boot_params *boot_params_p)
+diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
+index e13b0b49fcdfc..d7249f4c90f1b 100644
+--- a/arch/x86/xen/pmu.c
++++ b/arch/x86/xen/pmu.c
+@@ -512,10 +512,7 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
+ return ret;
+ }
+
+-bool is_xen_pmu(int cpu)
+-{
+- return (get_xenpmu_data() != NULL);
+-}
++bool is_xen_pmu;
+
+ void xen_pmu_init(int cpu)
+ {
+@@ -526,7 +523,7 @@ void xen_pmu_init(int cpu)
+
+ BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
+
+- if (xen_hvm_domain())
++ if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu))
+ return;
+
+ xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
+@@ -547,7 +544,8 @@ void xen_pmu_init(int cpu)
+ per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
+ per_cpu(xenpmu_shared, cpu).flags = 0;
+
+- if (cpu == 0) {
++ if (!is_xen_pmu) {
++ is_xen_pmu = true;
+ perf_register_guest_info_callbacks(&xen_guest_cbs);
+ xen_pmu_arch_init();
+ }
+diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
+index 0e83a160589bc..65c58894fc79f 100644
+--- a/arch/x86/xen/pmu.h
++++ b/arch/x86/xen/pmu.h
+@@ -4,6 +4,8 @@
+
+ #include <xen/interface/xenpmu.h>
+
++extern bool is_xen_pmu;
++
+ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+ #ifdef CONFIG_XEN_HAVE_VPMU
+ void xen_pmu_init(int cpu);
+@@ -12,7 +14,6 @@ void xen_pmu_finish(int cpu);
+ static inline void xen_pmu_init(int cpu) {}
+ static inline void xen_pmu_finish(int cpu) {}
+ #endif
+-bool is_xen_pmu(int cpu);
+ bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
+ bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
+ int pmu_apic_update(uint32_t reg);
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index 8bfc103301077..1f80dd3a2dd4a 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -922,7 +922,7 @@ void xen_enable_sysenter(void)
+ if (!boot_cpu_has(sysenter_feature))
+ return;
+
+- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
++ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
+ if(ret != 0)
+ setup_clear_cpu_cap(sysenter_feature);
+ }
+@@ -931,7 +931,7 @@ void xen_enable_syscall(void)
+ {
+ int ret;
+
+- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
++ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
+ if (ret != 0) {
+ printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
+ /* Pretty fatal; 64-bit userspace has no other
+@@ -940,7 +940,7 @@ void xen_enable_syscall(void)
+
+ if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
+ ret = register_callback(CALLBACKTYPE_syscall32,
+- xen_syscall32_target);
++ xen_entry_SYSCALL_compat);
+ if (ret != 0)
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+ }
+diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
+index c1b2f764b29a2..cdec892b28e2e 100644
+--- a/arch/x86/xen/smp.c
++++ b/arch/x86/xen/smp.c
+@@ -32,30 +32,30 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
+
+ void xen_smp_intr_free(unsigned int cpu)
+ {
++ kfree(per_cpu(xen_resched_irq, cpu).name);
++ per_cpu(xen_resched_irq, cpu).name = NULL;
+ if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
+ per_cpu(xen_resched_irq, cpu).irq = -1;
+- kfree(per_cpu(xen_resched_irq, cpu).name);
+- per_cpu(xen_resched_irq, cpu).name = NULL;
+ }
++ kfree(per_cpu(xen_callfunc_irq, cpu).name);
++ per_cpu(xen_callfunc_irq, cpu).name = NULL;
+ if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
+ per_cpu(xen_callfunc_irq, cpu).irq = -1;
+- kfree(per_cpu(xen_callfunc_irq, cpu).name);
+- per_cpu(xen_callfunc_irq, cpu).name = NULL;
+ }
++ kfree(per_cpu(xen_debug_irq, cpu).name);
++ per_cpu(xen_debug_irq, cpu).name = NULL;
+ if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
+ per_cpu(xen_debug_irq, cpu).irq = -1;
+- kfree(per_cpu(xen_debug_irq, cpu).name);
+- per_cpu(xen_debug_irq, cpu).name = NULL;
+ }
++ kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
++ per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
+ if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
+ NULL);
+ per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
+- kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
+- per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
+ }
+ }
+
+@@ -65,6 +65,7 @@ int xen_smp_intr_init(unsigned int cpu)
+ char *resched_name, *callfunc_name, *debug_name;
+
+ resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
++ per_cpu(xen_resched_irq, cpu).name = resched_name;
+ rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
+ cpu,
+ xen_reschedule_interrupt,
+@@ -74,9 +75,9 @@ int xen_smp_intr_init(unsigned int cpu)
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_resched_irq, cpu).irq = rc;
+- per_cpu(xen_resched_irq, cpu).name = resched_name;
+
+ callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
++ per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
+ rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
+ cpu,
+ xen_call_function_interrupt,
+@@ -86,10 +87,10 @@ int xen_smp_intr_init(unsigned int cpu)
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_callfunc_irq, cpu).irq = rc;
+- per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
+
+ if (!xen_fifo_events) {
+ debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
++ per_cpu(xen_debug_irq, cpu).name = debug_name;
+ rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
+ xen_debug_interrupt,
+ IRQF_PERCPU | IRQF_NOBALANCING,
+@@ -97,10 +98,10 @@ int xen_smp_intr_init(unsigned int cpu)
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_debug_irq, cpu).irq = rc;
+- per_cpu(xen_debug_irq, cpu).name = debug_name;
+ }
+
+ callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
++ per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
+ rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+ cpu,
+ xen_call_function_single_interrupt,
+@@ -110,7 +111,6 @@ int xen_smp_intr_init(unsigned int cpu)
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
+- per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
+
+ return 0;
+
+diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
+index 6ff3c887e0b99..b70afdff419ca 100644
+--- a/arch/x86/xen/smp_hvm.c
++++ b/arch/x86/xen/smp_hvm.c
+@@ -19,6 +19,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
+ */
+ xen_vcpu_setup(0);
+
++ /*
++ * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS.
++ * Refer to comments in xen_hvm_init_time_ops().
++ */
++ xen_hvm_init_time_ops();
++
+ /*
+ * The alternative logic (which patches the unlock/lock) runs before
+ * the smp bootup up code is activated. Hence we need to set this up
+diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
+index 7ed56c6075b0c..a1f974309b1cf 100644
+--- a/arch/x86/xen/smp_pv.c
++++ b/arch/x86/xen/smp_pv.c
+@@ -30,6 +30,7 @@
+ #include <asm/desc.h>
+ #include <asm/cpu.h>
+ #include <asm/io_apic.h>
++#include <asm/fpu/internal.h>
+
+ #include <xen/interface/xen.h>
+ #include <xen/interface/vcpu.h>
+@@ -63,6 +64,7 @@ static void cpu_bringup(void)
+
+ cr4_init();
+ cpu_init();
++ fpu__init_cpu();
+ touch_softlockup_watchdog();
+
+ /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
+@@ -97,18 +99,18 @@ asmlinkage __visible void cpu_bringup_and_idle(void)
+
+ void xen_smp_intr_free_pv(unsigned int cpu)
+ {
++ kfree(per_cpu(xen_irq_work, cpu).name);
++ per_cpu(xen_irq_work, cpu).name = NULL;
+ if (per_cpu(xen_irq_work, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
+ per_cpu(xen_irq_work, cpu).irq = -1;
+- kfree(per_cpu(xen_irq_work, cpu).name);
+- per_cpu(xen_irq_work, cpu).name = NULL;
+ }
+
++ kfree(per_cpu(xen_pmu_irq, cpu).name);
++ per_cpu(xen_pmu_irq, cpu).name = NULL;
+ if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
+ per_cpu(xen_pmu_irq, cpu).irq = -1;
+- kfree(per_cpu(xen_pmu_irq, cpu).name);
+- per_cpu(xen_pmu_irq, cpu).name = NULL;
+ }
+ }
+
+@@ -118,6 +120,7 @@ int xen_smp_intr_init_pv(unsigned int cpu)
+ char *callfunc_name, *pmu_name;
+
+ callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
++ per_cpu(xen_irq_work, cpu).name = callfunc_name;
+ rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
+ cpu,
+ xen_irq_work_interrupt,
+@@ -127,10 +130,10 @@ int xen_smp_intr_init_pv(unsigned int cpu)
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_irq_work, cpu).irq = rc;
+- per_cpu(xen_irq_work, cpu).name = callfunc_name;
+
+- if (is_xen_pmu(cpu)) {
++ if (is_xen_pmu) {
+ pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
++ per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+ rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
+ xen_pmu_irq_handler,
+ IRQF_PERCPU|IRQF_NOBALANCING,
+@@ -138,7 +141,6 @@ int xen_smp_intr_init_pv(unsigned int cpu)
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_pmu_irq, cpu).irq = rc;
+- per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+ }
+
+ return 0;
+@@ -148,28 +150,12 @@ int xen_smp_intr_init_pv(unsigned int cpu)
+ return rc;
+ }
+
+-static void __init xen_fill_possible_map(void)
+-{
+- int i, rc;
+-
+- if (xen_initial_domain())
+- return;
+-
+- for (i = 0; i < nr_cpu_ids; i++) {
+- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
+- if (rc >= 0) {
+- num_processors++;
+- set_cpu_possible(i, true);
+- }
+- }
+-}
+-
+-static void __init xen_filter_cpu_maps(void)
++static void __init _get_smp_config(unsigned int early)
+ {
+ int i, rc;
+ unsigned int subtract = 0;
+
+- if (!xen_initial_domain())
++ if (early)
+ return;
+
+ num_processors = 0;
+@@ -210,7 +196,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
+ * sure the old memory can be recycled. */
+ make_lowmem_page_readwrite(xen_initial_gdt);
+
+- xen_filter_cpu_maps();
+ xen_setup_vcpu_info_placement();
+
+ /*
+@@ -486,5 +471,8 @@ static const struct smp_ops xen_smp_ops __initconst = {
+ void __init xen_smp_init(void)
+ {
+ smp_ops = xen_smp_ops;
+- xen_fill_possible_map();
++
++ /* Avoid searching for BIOS MP tables */
++ x86_init.mpparse.find_smp_config = x86_init_noop;
++ x86_init.mpparse.get_smp_config = _get_smp_config;
+ }
+diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
+index 043c73dfd2c98..5c6fc16e4b925 100644
+--- a/arch/x86/xen/spinlock.c
++++ b/arch/x86/xen/spinlock.c
+@@ -75,6 +75,7 @@ void xen_init_lock_cpu(int cpu)
+ cpu, per_cpu(lock_kicker_irq, cpu));
+
+ name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
++ per_cpu(irq_name, cpu) = name;
+ irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
+ cpu,
+ dummy_handler,
+@@ -85,7 +86,6 @@ void xen_init_lock_cpu(int cpu)
+ if (irq >= 0) {
+ disable_irq(irq); /* make sure it's never delivered */
+ per_cpu(lock_kicker_irq, cpu) = irq;
+- per_cpu(irq_name, cpu) = name;
+ }
+
+ printk("cpu %d spinlock event irq %d\n", cpu, irq);
+@@ -98,6 +98,8 @@ void xen_uninit_lock_cpu(int cpu)
+ if (!xen_pvspin)
+ return;
+
++ kfree(per_cpu(irq_name, cpu));
++ per_cpu(irq_name, cpu) = NULL;
+ /*
+ * When booting the kernel with 'mitigations=auto,nosmt', the secondary
+ * CPUs are not activated, and lock_kicker_irq is not initialized.
+@@ -108,8 +110,6 @@ void xen_uninit_lock_cpu(int cpu)
+
+ unbind_from_irqhandler(irq, NULL);
+ per_cpu(lock_kicker_irq, cpu) = -1;
+- kfree(per_cpu(irq_name, cpu));
+- per_cpu(irq_name, cpu) = NULL;
+ }
+
+ PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen);
+diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
+index d9c945ee11008..9ef0a5cca96ee 100644
+--- a/arch/x86/xen/time.c
++++ b/arch/x86/xen/time.c
+@@ -558,6 +558,11 @@ static void xen_hvm_setup_cpu_clockevents(void)
+
+ void __init xen_hvm_init_time_ops(void)
+ {
++ static bool hvm_time_initialized;
++
++ if (hvm_time_initialized)
++ return;
++
+ /*
+ * vector callback is needed otherwise we cannot receive interrupts
+ * on cpu > 0 and at this point we don't know how many cpus are
+@@ -567,7 +572,22 @@ void __init xen_hvm_init_time_ops(void)
+ return;
+
+ if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
+- pr_info("Xen doesn't support pvclock on HVM, disable pv timer");
++ pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
++ return;
++ }
++
++ /*
++ * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
++ * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
++ * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
++ * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
++ *
++ * The xen_hvm_init_time_ops() should be called again later after
++ * __this_cpu_read(xen_vcpu) is available.
++ */
++ if (!__this_cpu_read(xen_vcpu)) {
++ pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
++ xen_vcpu_nr(0));
+ return;
+ }
+
+@@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void)
+ x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
+
+ x86_platform.set_wallclock = xen_set_wallclock;
++
++ hvm_time_initialized = true;
+ }
+ #endif
+
+diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
+index e336f223f7f47..93697109592c3 100644
+--- a/arch/x86/xen/vga.c
++++ b/arch/x86/xen/vga.c
+@@ -9,10 +9,9 @@
+
+ #include "xen-ops.h"
+
+-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
++void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size,
++ struct screen_info *screen_info)
+ {
+- struct screen_info *screen_info = &boot_params.screen_info;
+-
+ /* This is drawn from a dump from vgacon:startup in
+ * standard Linux. */
+ screen_info->orig_video_mode = 3;
+diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
+index 1e626444712be..1b757a1ee1bb6 100644
+--- a/arch/x86/xen/xen-asm.S
++++ b/arch/x86/xen/xen-asm.S
+@@ -20,6 +20,7 @@
+
+ #include <linux/init.h>
+ #include <linux/linkage.h>
++#include <../entry/calling.h>
+
+ /*
+ * Enable events. This clears the event mask and tests the pending
+@@ -44,7 +45,7 @@ SYM_FUNC_START(xen_irq_enable_direct)
+ call check_events
+ 1:
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(xen_irq_enable_direct)
+
+
+@@ -54,7 +55,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
+ */
+ SYM_FUNC_START(xen_irq_disable_direct)
+ movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+- ret
++ RET
+ SYM_FUNC_END(xen_irq_disable_direct)
+
+ /*
+@@ -70,7 +71,7 @@ SYM_FUNC_START(xen_save_fl_direct)
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ setz %ah
+ addb %ah, %ah
+- ret
++ RET
+ SYM_FUNC_END(xen_save_fl_direct)
+
+ /*
+@@ -99,7 +100,7 @@ SYM_FUNC_START(check_events)
+ pop %rcx
+ pop %rax
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(check_events)
+
+ SYM_FUNC_START(xen_read_cr2)
+@@ -107,19 +108,19 @@ SYM_FUNC_START(xen_read_cr2)
+ _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+ _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(xen_read_cr2);
+
+ SYM_FUNC_START(xen_read_cr2_direct)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+ FRAME_END
+- ret
++ RET
+ SYM_FUNC_END(xen_read_cr2_direct);
+
+ .macro xen_pv_trap name
+ SYM_CODE_START(xen_\name)
+- UNWIND_HINT_EMPTY
++ UNWIND_HINT_ENTRY
+ pop %rcx
+ pop %r11
+ jmp \name
+@@ -191,6 +192,25 @@ SYM_CODE_START(xen_iret)
+ jmp hypercall_iret
+ SYM_CODE_END(xen_iret)
+
++/*
++ * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is
++ * also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode()
++ * in XEN pv would cause %rsp to move up to the top of the kernel stack and
++ * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI
++ * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET
++ * frame at the same address is useless.
++ */
++SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode)
++ UNWIND_HINT_REGS
++ POP_REGS
++
++ /* stackleak_erase() can work safely on the kernel stack. */
++ STACKLEAK_ERASE_NOCLOBBER
++
++ addq $8, %rsp /* skip regs->orig_ax */
++ jmp xen_iret
++SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
++
+ /*
+ * Xen handles syscall callbacks much like ordinary exceptions, which
+ * means we have:
+@@ -207,8 +227,8 @@ SYM_CODE_END(xen_iret)
+ */
+
+ /* Normal 64-bit system call target */
+-SYM_CODE_START(xen_syscall_target)
+- UNWIND_HINT_EMPTY
++SYM_CODE_START(xen_entry_SYSCALL_64)
++ UNWIND_HINT_ENTRY
+ popq %rcx
+ popq %r11
+
+@@ -221,13 +241,13 @@ SYM_CODE_START(xen_syscall_target)
+ movq $__USER_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_64_after_hwframe
+-SYM_CODE_END(xen_syscall_target)
++SYM_CODE_END(xen_entry_SYSCALL_64)
+
+ #ifdef CONFIG_IA32_EMULATION
+
+ /* 32-bit compat syscall target */
+-SYM_CODE_START(xen_syscall32_target)
+- UNWIND_HINT_EMPTY
++SYM_CODE_START(xen_entry_SYSCALL_compat)
++ UNWIND_HINT_ENTRY
+ popq %rcx
+ popq %r11
+
+@@ -240,11 +260,11 @@ SYM_CODE_START(xen_syscall32_target)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_compat_after_hwframe
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+
+ /* 32-bit compat sysenter target */
+-SYM_CODE_START(xen_sysenter_target)
+- UNWIND_HINT_EMPTY
++SYM_CODE_START(xen_entry_SYSENTER_compat)
++ UNWIND_HINT_ENTRY
+ /*
+ * NB: Xen is polite and clears TF from EFLAGS for us. This means
+ * that we don't need to guard against single step exceptions here.
+@@ -261,18 +281,18 @@ SYM_CODE_START(xen_sysenter_target)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSENTER_compat_after_hwframe
+-SYM_CODE_END(xen_sysenter_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
+
+ #else /* !CONFIG_IA32_EMULATION */
+
+-SYM_CODE_START(xen_syscall32_target)
+-SYM_CODE_START(xen_sysenter_target)
+- UNWIND_HINT_EMPTY
++SYM_CODE_START(xen_entry_SYSCALL_compat)
++SYM_CODE_START(xen_entry_SYSENTER_compat)
++ UNWIND_HINT_ENTRY
+ lea 16(%rsp), %rsp /* strip %rcx, %r11 */
+ mov $-ENOSYS, %rax
+ pushq $0
+ jmp hypercall_iret
+-SYM_CODE_END(xen_sysenter_target)
+-SYM_CODE_END(xen_syscall32_target)
++SYM_CODE_END(xen_entry_SYSENTER_compat)
++SYM_CODE_END(xen_entry_SYSCALL_compat)
+
+ #endif /* CONFIG_IA32_EMULATION */
+diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
+index cb6538ae2fe07..2a3ef5fcba34b 100644
+--- a/arch/x86/xen/xen-head.S
++++ b/arch/x86/xen/xen-head.S
+@@ -69,8 +69,9 @@ SYM_CODE_END(asm_cpu_bringup_and_idle)
+ SYM_CODE_START(hypercall_page)
+ .rept (PAGE_SIZE / 32)
+ UNWIND_HINT_FUNC
+- .skip 31, 0x90
++ ANNOTATE_UNRET_SAFE
+ ret
++ .skip 31, 0xcc
+ .endr
+
+ #define HYPERCALL(n) \
+diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
+index 8bc8b72a205d4..71f31032c635f 100644
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -10,10 +10,10 @@
+ /* These are code, but not functions. Defined in entry.S */
+ extern const char xen_failsafe_callback[];
+
+-void xen_sysenter_target(void);
++void xen_entry_SYSENTER_compat(void);
+ #ifdef CONFIG_X86_64
+-void xen_syscall_target(void);
+-void xen_syscall32_target(void);
++void xen_entry_SYSCALL_64(void);
++void xen_entry_SYSCALL_compat(void);
+ #endif
+
+ extern void *xen_initial_gdt;
+@@ -110,11 +110,12 @@ static inline void xen_uninit_lock_cpu(int cpu)
+
+ struct dom0_vga_console_info;
+
+-#ifdef CONFIG_XEN_PV_DOM0
+-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
++#ifdef CONFIG_XEN_DOM0
++void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size,
++ struct screen_info *);
+ #else
+ static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
+- size_t size)
++ size_t size, struct screen_info *si)
+ {
+ }
+ #endif
+diff --git a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi
+index 9bf8bad1dd18a..c33932568aa73 100644
+--- a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi
++++ b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi
+@@ -8,19 +8,19 @@
+ reg = <0x00000000 0x08000000>;
+ bank-width = <2>;
+ device-width = <2>;
+- partition@0x0 {
++ partition@0 {
+ label = "data";
+ reg = <0x00000000 0x06000000>;
+ };
+- partition@0x6000000 {
++ partition@6000000 {
+ label = "boot loader area";
+ reg = <0x06000000 0x00800000>;
+ };
+- partition@0x6800000 {
++ partition@6800000 {
+ label = "kernel image";
+ reg = <0x06800000 0x017e0000>;
+ };
+- partition@0x7fe0000 {
++ partition@7fe0000 {
+ label = "boot environment";
+ reg = <0x07fe0000 0x00020000>;
+ };
+diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi
+index 40c2f81f7cb66..7bde2ab2d6fb5 100644
+--- a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi
++++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi
+@@ -8,19 +8,19 @@
+ reg = <0x08000000 0x01000000>;
+ bank-width = <2>;
+ device-width = <2>;
+- partition@0x0 {
++ partition@0 {
+ label = "boot loader area";
+ reg = <0x00000000 0x00400000>;
+ };
+- partition@0x400000 {
++ partition@400000 {
+ label = "kernel image";
+ reg = <0x00400000 0x00600000>;
+ };
+- partition@0xa00000 {
++ partition@a00000 {
+ label = "data";
+ reg = <0x00a00000 0x005e0000>;
+ };
+- partition@0xfe0000 {
++ partition@fe0000 {
+ label = "boot environment";
+ reg = <0x00fe0000 0x00020000>;
+ };
+diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi
+index fb8d3a9f33c23..0655b868749a4 100644
+--- a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi
++++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi
+@@ -8,11 +8,11 @@
+ reg = <0x08000000 0x00400000>;
+ bank-width = <2>;
+ device-width = <2>;
+- partition@0x0 {
++ partition@0 {
+ label = "boot loader area";
+ reg = <0x00000000 0x003f0000>;
+ };
+- partition@0x3f0000 {
++ partition@3f0000 {
+ label = "boot environment";
+ reg = <0x003f0000 0x00010000>;
+ };
+diff --git a/arch/xtensa/include/asm/bugs.h b/arch/xtensa/include/asm/bugs.h
+deleted file mode 100644
+index 69b29d1982494..0000000000000
+--- a/arch/xtensa/include/asm/bugs.h
++++ /dev/null
+@@ -1,18 +0,0 @@
+-/*
+- * include/asm-xtensa/bugs.h
+- *
+- * This is included by init/main.c to check for architecture-dependent bugs.
+- *
+- * Xtensa processors don't have any bugs. :)
+- *
+- * This file is subject to the terms and conditions of the GNU General
+- * Public License. See the file "COPYING" in the main directory of
+- * this archive for more details.
+- */
+-
+-#ifndef _XTENSA_BUGS_H
+-#define _XTENSA_BUGS_H
+-
+-static void check_bugs(void) { }
+-
+-#endif /* _XTENSA_BUGS_H */
+diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h
+index 5590b0f688376..a4e40166ff4bb 100644
+--- a/arch/xtensa/include/asm/core.h
++++ b/arch/xtensa/include/asm/core.h
+@@ -26,4 +26,13 @@
+ #define XCHAL_SPANNING_WAY 0
+ #endif
+
++#ifndef XCHAL_HW_MIN_VERSION
++#if defined(XCHAL_HW_MIN_VERSION_MAJOR) && defined(XCHAL_HW_MIN_VERSION_MINOR)
++#define XCHAL_HW_MIN_VERSION (XCHAL_HW_MIN_VERSION_MAJOR * 100 + \
++ XCHAL_HW_MIN_VERSION_MINOR)
++#else
++#define XCHAL_HW_MIN_VERSION 0
++#endif
++#endif
++
+ #endif
+diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
+index bd5aeb7955675..a63eca1266577 100644
+--- a/arch/xtensa/include/asm/pgtable.h
++++ b/arch/xtensa/include/asm/pgtable.h
+@@ -411,6 +411,10 @@ extern void update_mmu_cache(struct vm_area_struct * vma,
+
+ typedef pte_t *pte_addr_t;
+
++void update_mmu_tlb(struct vm_area_struct *vma,
++ unsigned long address, pte_t *ptep);
++#define __HAVE_ARCH_UPDATE_MMU_TLB
++
+ #endif /* !defined (__ASSEMBLY__) */
+
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
+index 7f63aca6a0d34..9dd4efe1bf0bd 100644
+--- a/arch/xtensa/include/asm/processor.h
++++ b/arch/xtensa/include/asm/processor.h
+@@ -226,8 +226,8 @@ extern unsigned long get_wchan(struct task_struct *p);
+
+ #define xtensa_set_sr(x, sr) \
+ ({ \
+- unsigned int v = (unsigned int)(x); \
+- __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: "a"(v)); \
++ __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: \
++ "a"((unsigned int)(x))); \
+ })
+
+ #define xtensa_get_sr(sr) \
+diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h
+index 233ec75e60c69..3f2462f2d0270 100644
+--- a/arch/xtensa/include/asm/timex.h
++++ b/arch/xtensa/include/asm/timex.h
+@@ -29,10 +29,6 @@
+
+ extern unsigned long ccount_freq;
+
+-typedef unsigned long long cycles_t;
+-
+-#define get_cycles() (0)
+-
+ void local_timer_setup(unsigned cpu);
+
+ /*
+@@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare)
+ xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER);
+ }
+
++#include <asm-generic/timex.h>
++
+ #endif /* _XTENSA_TIMEX_H */
+diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
+index 45cc0ae0af6f9..c7b9f12896f20 100644
+--- a/arch/xtensa/kernel/coprocessor.S
++++ b/arch/xtensa/kernel/coprocessor.S
+@@ -29,7 +29,7 @@
+ .if XTENSA_HAVE_COPROCESSOR(x); \
+ .align 4; \
+ .Lsave_cp_regs_cp##x: \
+- xchal_cp##x##_store a2 a4 a5 a6 a7; \
++ xchal_cp##x##_store a2 a3 a4 a5 a6; \
+ jx a0; \
+ .endif
+
+@@ -46,7 +46,7 @@
+ .if XTENSA_HAVE_COPROCESSOR(x); \
+ .align 4; \
+ .Lload_cp_regs_cp##x: \
+- xchal_cp##x##_load a2 a4 a5 a6 a7; \
++ xchal_cp##x##_load a2 a3 a4 a5 a6; \
+ jx a0; \
+ .endif
+
+diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c
+index 61cf6497a646b..ad1841cecdfb7 100644
+--- a/arch/xtensa/kernel/jump_label.c
++++ b/arch/xtensa/kernel/jump_label.c
+@@ -40,7 +40,7 @@ static int patch_text_stop_machine(void *data)
+ {
+ struct patch *patch = data;
+
+- if (atomic_inc_return(&patch->cpu_count) == 1) {
++ if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
+ local_patch_text(patch->addr, patch->data, patch->sz);
+ atomic_inc(&patch->cpu_count);
+ } else {
+@@ -61,7 +61,7 @@ static void patch_text(unsigned long addr, const void *data, size_t sz)
+ .data = data,
+ };
+ stop_machine_cpuslocked(patch_text_stop_machine,
+- &patch, NULL);
++ &patch, cpu_online_mask);
+ } else {
+ unsigned long flags;
+
+diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
+index a0d05c8598d0f..183618090d05b 100644
+--- a/arch/xtensa/kernel/perf_event.c
++++ b/arch/xtensa/kernel/perf_event.c
+@@ -13,17 +13,26 @@
+ #include <linux/perf_event.h>
+ #include <linux/platform_device.h>
+
++#include <asm/core.h>
+ #include <asm/processor.h>
+ #include <asm/stacktrace.h>
+
++#define XTENSA_HWVERSION_RG_2015_0 260000
++
++#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
++#define XTENSA_PMU_ERI_BASE 0x00101000
++#else
++#define XTENSA_PMU_ERI_BASE 0x00001000
++#endif
++
+ /* Global control/status for all perf counters */
+-#define XTENSA_PMU_PMG 0x1000
++#define XTENSA_PMU_PMG XTENSA_PMU_ERI_BASE
+ /* Perf counter values */
+-#define XTENSA_PMU_PM(i) (0x1080 + (i) * 4)
++#define XTENSA_PMU_PM(i) (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
+ /* Perf counter control registers */
+-#define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4)
++#define XTENSA_PMU_PMCTRL(i) (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
+ /* Perf counter status registers */
+-#define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4)
++#define XTENSA_PMU_PMSTAT(i) (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
+
+ #define XTENSA_PMU_PMG_PMEN 0x1
+
+diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
+index bb3f4797d212b..db6cdea471d83 100644
+--- a/arch/xtensa/kernel/ptrace.c
++++ b/arch/xtensa/kernel/ptrace.c
+@@ -226,12 +226,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+
+ void user_enable_single_step(struct task_struct *child)
+ {
+- child->ptrace |= PT_SINGLESTEP;
++ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ }
+
+ void user_disable_single_step(struct task_struct *child)
+ {
+- child->ptrace &= ~PT_SINGLESTEP;
++ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ }
+
+ /*
+diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
+index c4d77dbfb61af..f2b00f43cf236 100644
+--- a/arch/xtensa/kernel/signal.c
++++ b/arch/xtensa/kernel/signal.c
+@@ -465,7 +465,7 @@ static void do_signal(struct pt_regs *regs)
+ /* Set up the stack frame */
+ ret = setup_frame(&ksig, sigmask_to_save(), regs);
+ signal_setup_done(ret, &ksig, 0);
+- if (current->ptrace & PT_SINGLESTEP)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ task_pt_regs(current)->icountlevel = 1;
+
+ return;
+@@ -491,7 +491,7 @@ static void do_signal(struct pt_regs *regs)
+ /* If there's no signal to deliver, we just restore the saved mask. */
+ restore_saved_sigmask();
+
+- if (current->ptrace & PT_SINGLESTEP)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ task_pt_regs(current)->icountlevel = 1;
+ return;
+ }
+diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
+index e8ceb15286081..16b8a6273772c 100644
+--- a/arch/xtensa/kernel/time.c
++++ b/arch/xtensa/kernel/time.c
+@@ -154,6 +154,7 @@ static void __init calibrate_ccount(void)
+ cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu");
+ if (cpu) {
+ clk = of_clk_get(cpu, 0);
++ of_node_put(cpu);
+ if (!IS_ERR(clk)) {
+ ccount_freq = clk_get_rate(clk);
+ return;
+diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
+index 874b6efc6fb31..5624a64ff7e93 100644
+--- a/arch/xtensa/kernel/traps.c
++++ b/arch/xtensa/kernel/traps.c
+@@ -510,7 +510,7 @@ static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
+
+ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+ {
+- size_t len;
++ size_t len, off = 0;
+
+ if (!sp)
+ sp = stack_pointer(task);
+@@ -519,9 +519,17 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+ kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE);
+
+ printk("%sStack:\n", loglvl);
+- print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE,
+- STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE,
+- sp, len, false);
++ while (off < len) {
++ u8 line[STACK_DUMP_LINE_SIZE];
++ size_t line_len = len - off > STACK_DUMP_LINE_SIZE ?
++ STACK_DUMP_LINE_SIZE : len - off;
++
++ __memcpy(line, (u8 *)sp + off, line_len);
++ print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE,
++ STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE,
++ line, line_len, false);
++ off += STACK_DUMP_LINE_SIZE;
++ }
+ show_trace(task, sp, loglvl);
+ }
+
+@@ -552,5 +560,5 @@ void die(const char * str, struct pt_regs * regs, long err)
+ if (panic_on_oops)
+ panic("Fatal exception");
+
+- do_exit(err);
++ make_task_dead(err);
+ }
+diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
+index f436cf2efd8b7..27a477dae2322 100644
+--- a/arch/xtensa/mm/tlb.c
++++ b/arch/xtensa/mm/tlb.c
+@@ -162,6 +162,12 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ }
+ }
+
++void update_mmu_tlb(struct vm_area_struct *vma,
++ unsigned long address, pte_t *ptep)
++{
++ local_flush_tlb_page(vma, address);
++}
++
+ #ifdef CONFIG_DEBUG_TLB_SANITY
+
+ static unsigned get_pte_for_vaddr(unsigned vaddr)
+diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
+index 81d7c7e8f7e96..10b79d3c74e07 100644
+--- a/arch/xtensa/platforms/iss/console.c
++++ b/arch/xtensa/platforms/iss/console.c
+@@ -36,24 +36,19 @@ static void rs_poll(struct timer_list *);
+ static struct tty_driver *serial_driver;
+ static struct tty_port serial_port;
+ static DEFINE_TIMER(serial_timer, rs_poll);
+-static DEFINE_SPINLOCK(timer_lock);
+
+ static int rs_open(struct tty_struct *tty, struct file * filp)
+ {
+- spin_lock_bh(&timer_lock);
+ if (tty->count == 1)
+ mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
+- spin_unlock_bh(&timer_lock);
+
+ return 0;
+ }
+
+ static void rs_close(struct tty_struct *tty, struct file * filp)
+ {
+- spin_lock_bh(&timer_lock);
+ if (tty->count == 1)
+ del_timer_sync(&serial_timer);
+- spin_unlock_bh(&timer_lock);
+ }
+
+
+@@ -73,8 +68,6 @@ static void rs_poll(struct timer_list *unused)
+ int rd = 1;
+ unsigned char c;
+
+- spin_lock(&timer_lock);
+-
+ while (simc_poll(0)) {
+ rd = simc_read(0, &c, 1);
+ if (rd <= 0)
+@@ -87,7 +80,6 @@ static void rs_poll(struct timer_list *unused)
+ tty_flip_buffer_push(port);
+ if (rd)
+ mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
+- spin_unlock(&timer_lock);
+ }
+
+
+diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
+index 4986226a5ab26..1270de83435eb 100644
+--- a/arch/xtensa/platforms/iss/network.c
++++ b/arch/xtensa/platforms/iss/network.c
+@@ -231,7 +231,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init)
+
+ init += sizeof(TRANSPORT_TUNTAP_NAME) - 1;
+ if (*init == ',') {
+- rem = split_if_spec(init + 1, &mac_str, &dev_name);
++ rem = split_if_spec(init + 1, &mac_str, &dev_name, NULL);
+ if (rem != NULL) {
+ pr_err("%s: extra garbage on specification : '%s'\n",
+ dev->name, rem);
+@@ -502,16 +502,24 @@ static const struct net_device_ops iss_netdev_ops = {
+ .ndo_set_rx_mode = iss_net_set_multicast_list,
+ };
+
+-static int iss_net_configure(int index, char *init)
++static void iss_net_pdev_release(struct device *dev)
++{
++ struct platform_device *pdev = to_platform_device(dev);
++ struct iss_net_private *lp =
++ container_of(pdev, struct iss_net_private, pdev);
++
++ free_netdev(lp->dev);
++}
++
++static void iss_net_configure(int index, char *init)
+ {
+ struct net_device *dev;
+ struct iss_net_private *lp;
+- int err;
+
+ dev = alloc_etherdev(sizeof(*lp));
+ if (dev == NULL) {
+ pr_err("eth_configure: failed to allocate device\n");
+- return 1;
++ return;
+ }
+
+ /* Initialize private element. */
+@@ -540,7 +548,7 @@ static int iss_net_configure(int index, char *init)
+ if (!tuntap_probe(lp, index, init)) {
+ pr_err("%s: invalid arguments. Skipping device!\n",
+ dev->name);
+- goto errout;
++ goto err_free_netdev;
+ }
+
+ pr_info("Netdevice %d (%pM)\n", index, dev->dev_addr);
+@@ -548,7 +556,8 @@ static int iss_net_configure(int index, char *init)
+ /* sysfs register */
+
+ if (!driver_registered) {
+- platform_driver_register(&iss_net_driver);
++ if (platform_driver_register(&iss_net_driver))
++ goto err_free_netdev;
+ driver_registered = 1;
+ }
+
+@@ -558,7 +567,9 @@ static int iss_net_configure(int index, char *init)
+
+ lp->pdev.id = index;
+ lp->pdev.name = DRIVER_NAME;
+- platform_device_register(&lp->pdev);
++ lp->pdev.dev.release = iss_net_pdev_release;
++ if (platform_device_register(&lp->pdev))
++ goto err_free_netdev;
+ SET_NETDEV_DEV(dev, &lp->pdev.dev);
+
+ dev->netdev_ops = &iss_netdev_ops;
+@@ -567,23 +578,20 @@ static int iss_net_configure(int index, char *init)
+ dev->irq = -1;
+
+ rtnl_lock();
+- err = register_netdevice(dev);
+- rtnl_unlock();
+-
+- if (err) {
++ if (register_netdevice(dev)) {
++ rtnl_unlock();
+ pr_err("%s: error registering net device!\n", dev->name);
+- /* XXX: should we call ->remove() here? */
+- free_netdev(dev);
+- return 1;
++ platform_device_unregister(&lp->pdev);
++ return;
+ }
++ rtnl_unlock();
+
+ timer_setup(&lp->tl, iss_net_user_timer_expire, 0);
+
+- return 0;
++ return;
+
+-errout:
+- /* FIXME: unregister; free, etc.. */
+- return -EIO;
++err_free_netdev:
++ free_netdev(dev);
+ }
+
+ /* ------------------------------------------------------------------------- */
+diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
+index 3cdfa00738e07..edb27649851f9 100644
+--- a/arch/xtensa/platforms/iss/simdisk.c
++++ b/arch/xtensa/platforms/iss/simdisk.c
+@@ -212,12 +212,18 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
+ struct simdisk *dev = PDE_DATA(file_inode(file));
+ const char *s = dev->filename;
+ if (s) {
+- ssize_t n = simple_read_from_buffer(buf, size, ppos,
+- s, strlen(s));
+- if (n < 0)
+- return n;
+- buf += n;
+- size -= n;
++ ssize_t len = strlen(s);
++ char *temp = kmalloc(len + 2, GFP_KERNEL);
++
++ if (!temp)
++ return -ENOMEM;
++
++ len = scnprintf(temp, len + 2, "%s\n", s);
++ len = simple_read_from_buffer(buf, size, ppos,
++ temp, len);
++
++ kfree(temp);
++ return len;
+ }
+ return simple_read_from_buffer(buf, size, ppos, "\n", 1);
+ }
+diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
+index 538e6748e85a7..c79c1d09ea863 100644
+--- a/arch/xtensa/platforms/xtfpga/setup.c
++++ b/arch/xtensa/platforms/xtfpga/setup.c
+@@ -133,6 +133,7 @@ static int __init machine_setup(void)
+
+ if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc")))
+ update_local_mac(eth);
++ of_node_put(eth);
+ return 0;
+ }
+ arch_initcall(machine_setup);
+diff --git a/block/Makefile b/block/Makefile
+index 41aa1ba69c900..74df168729ecb 100644
+--- a/block/Makefile
++++ b/block/Makefile
+@@ -3,7 +3,7 @@
+ # Makefile for the kernel block layer
+ #
+
+-obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
++obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
+ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
+ blk-exec.o blk-merge.o blk-timeout.o \
+ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
+diff --git a/block/bdev.c b/block/bdev.c
+index 485a258b0ab37..b8599a4088843 100644
+--- a/block/bdev.c
++++ b/block/bdev.c
+@@ -184,14 +184,13 @@ int sb_min_blocksize(struct super_block *sb, int size)
+
+ EXPORT_SYMBOL(sb_min_blocksize);
+
+-int __sync_blockdev(struct block_device *bdev, int wait)
++int sync_blockdev_nowait(struct block_device *bdev)
+ {
+ if (!bdev)
+ return 0;
+- if (!wait)
+- return filemap_flush(bdev->bd_inode->i_mapping);
+- return filemap_write_and_wait(bdev->bd_inode->i_mapping);
++ return filemap_flush(bdev->bd_inode->i_mapping);
+ }
++EXPORT_SYMBOL_GPL(sync_blockdev_nowait);
+
+ /*
+ * Write out and wait upon all the dirty data associated with a block
+@@ -199,7 +198,9 @@ int __sync_blockdev(struct block_device *bdev, int wait)
+ */
+ int sync_blockdev(struct block_device *bdev)
+ {
+- return __sync_blockdev(bdev, 1);
++ if (!bdev)
++ return 0;
++ return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+ }
+ EXPORT_SYMBOL(sync_blockdev);
+
+@@ -834,7 +835,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+ * used in blkdev_get/put().
+ */
+ if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+- (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
++ (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
+ bdev->bd_write_holder = true;
+ unblock_events = false;
+ }
+@@ -1016,7 +1017,7 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
+ }
+ EXPORT_SYMBOL(__invalidate_device);
+
+-void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
++void sync_bdevs(bool wait)
+ {
+ struct inode *inode, *old_inode = NULL;
+
+@@ -1047,8 +1048,19 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+ bdev = I_BDEV(inode);
+
+ mutex_lock(&bdev->bd_disk->open_mutex);
+- if (bdev->bd_openers)
+- func(bdev, arg);
++ if (!bdev->bd_openers) {
++ ; /* skip */
++ } else if (wait) {
++ /*
++ * We keep the error status of individual mapping so
++ * that applications can catch the writeback error using
++ * fsync(2). See filemap_fdatawait_keep_errors() for
++ * details.
++ */
++ filemap_fdatawait_keep_errors(inode->i_mapping);
++ } else {
++ filemap_fdatawrite(inode->i_mapping);
++ }
+ mutex_unlock(&bdev->bd_disk->open_mutex);
+
+ spin_lock(&blockdev_superblock->s_inode_list_lock);
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index 85b8e1c3a762d..53e275e377a73 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -555,6 +555,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
+ */
+ bfqg->bfqd = bfqd;
+ bfqg->active_entities = 0;
++ bfqg->online = true;
+ bfqg->rq_pos_tree = RB_ROOT;
+ }
+
+@@ -583,28 +584,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
+ entity->sched_data = &parent->sched_data;
+ }
+
+-static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
+- struct blkcg *blkcg)
++static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
+ {
+- struct blkcg_gq *blkg;
+-
+- blkg = blkg_lookup(blkcg, bfqd->queue);
+- if (likely(blkg))
+- return blkg_to_bfqg(blkg);
+- return NULL;
+-}
+-
+-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+- struct blkcg *blkcg)
+-{
+- struct bfq_group *bfqg, *parent;
++ struct bfq_group *parent;
+ struct bfq_entity *entity;
+
+- bfqg = bfq_lookup_bfqg(bfqd, blkcg);
+-
+- if (unlikely(!bfqg))
+- return NULL;
+-
+ /*
+ * Update chain of bfq_groups as we might be handling a leaf group
+ * which, along with some of its relatives, has not been hooked yet
+@@ -621,8 +605,28 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+ bfq_group_set_parent(curr_bfqg, parent);
+ }
+ }
++}
+
+- return bfqg;
++struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
++{
++ struct blkcg_gq *blkg = bio->bi_blkg;
++ struct bfq_group *bfqg;
++
++ while (blkg) {
++ if (!blkg->online) {
++ blkg = blkg->parent;
++ continue;
++ }
++ bfqg = blkg_to_bfqg(blkg);
++ if (bfqg->online) {
++ bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
++ return bfqg;
++ }
++ blkg = blkg->parent;
++ }
++ bio_associate_blkg_from_css(bio,
++ &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
++ return bfqd->root_group;
+ }
+
+ /**
+@@ -644,6 +648,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ struct bfq_entity *entity = &bfqq->entity;
+
++ /*
++ * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group
++ * until elevator exit.
++ */
++ if (bfqq == &bfqd->oom_bfqq)
++ return;
+ /*
+ * Get extra reference to prevent bfqq from being freed in
+ * next possible expire or deactivate.
+@@ -698,38 +708,58 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ * Move bic to blkcg, assuming that bfqd->lock is held; which makes
+ * sure that the reference to cgroup is valid across the call (see
+ * comments in bfq_bic_update_cgroup on this issue)
+- *
+- * NOTE: an alternative approach might have been to store the current
+- * cgroup in bfqq and getting a reference to it, reducing the lookup
+- * time here, at the price of slightly more complex code.
+ */
+-static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+- struct bfq_io_cq *bic,
+- struct blkcg *blkcg)
++static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
++ struct bfq_io_cq *bic,
++ struct bfq_group *bfqg)
+ {
+- struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
+- struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
+- struct bfq_group *bfqg;
++ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false);
++ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true);
+ struct bfq_entity *entity;
+
+- bfqg = bfq_find_set_group(bfqd, blkcg);
+-
+- if (unlikely(!bfqg))
+- bfqg = bfqd->root_group;
+-
+ if (async_bfqq) {
+ entity = &async_bfqq->entity;
+
+ if (entity->sched_data != &bfqg->sched_data) {
+- bic_set_bfqq(bic, NULL, 0);
++ bic_set_bfqq(bic, NULL, false);
+ bfq_release_process_ref(bfqd, async_bfqq);
+ }
+ }
+
+ if (sync_bfqq) {
+- entity = &sync_bfqq->entity;
+- if (entity->sched_data != &bfqg->sched_data)
+- bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
++ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
++ /* We are the only user of this bfqq, just move it */
++ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
++ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
++ } else {
++ struct bfq_queue *bfqq;
++
++ /*
++ * The queue was merged to a different queue. Check
++ * that the merge chain still belongs to the same
++ * cgroup.
++ */
++ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
++ if (bfqq->entity.sched_data !=
++ &bfqg->sched_data)
++ break;
++ if (bfqq) {
++ /*
++ * Some queue changed cgroup so the merge is
++ * not valid anymore. We cannot easily just
++ * cancel the merge (by clearing new_bfqq) as
++ * there may be other processes using this
++ * queue and holding refs to all queues below
++ * sync_bfqq->new_bfqq. Similarly if the merge
++ * already happened, we need to detach from
++ * bfqq now so that we cannot merge bio to a
++ * request from the old cgroup.
++ */
++ bfq_put_cooperator(sync_bfqq);
++ bic_set_bfqq(bic, NULL, true);
++ bfq_release_process_ref(bfqd, sync_bfqq);
++ }
++ }
+ }
+
+ return bfqg;
+@@ -738,20 +768,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+ {
+ struct bfq_data *bfqd = bic_to_bfqd(bic);
+- struct bfq_group *bfqg = NULL;
++ struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
+ uint64_t serial_nr;
+
+- rcu_read_lock();
+- serial_nr = __bio_blkcg(bio)->css.serial_nr;
++ serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
+
+ /*
+ * Check whether blkcg has changed. The condition may trigger
+ * spuriously on a newly created cic but there's no harm.
+ */
+ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
+- goto out;
++ return;
+
+- bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
++ /*
++ * New cgroup for this process. Make sure it is linked to bfq internal
++ * cgroup hierarchy.
++ */
++ bfq_link_bfqg(bfqd, bfqg);
++ __bfq_bic_change_cgroup(bfqd, bic, bfqg);
+ /*
+ * Update blkg_path for bfq_log_* functions. We cache this
+ * path, and update it here, for the following
+@@ -804,8 +838,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+ */
+ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
+ bic->blkcg_serial_nr = serial_nr;
+-out:
+- rcu_read_unlock();
+ }
+
+ /**
+@@ -933,6 +965,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
+
+ put_async_queues:
+ bfq_put_async_queues(bfqd, bfqg);
++ bfqg->online = false;
+
+ spin_unlock_irqrestore(&bfqd->lock, flags);
+ /*
+@@ -1422,7 +1455,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
+ bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+ }
+
+-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
++struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
+ {
+ return bfqd->root_group;
+ }
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 480e1a1348596..f54554906451e 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -386,6 +386,12 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq);
+
+ void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
+ {
++ struct bfq_queue *old_bfqq = bic->bfqq[is_sync];
++
++ /* Clear bic pointer if bfqq is detached from this bic */
++ if (old_bfqq && old_bfqq->bic == bic)
++ old_bfqq->bic = NULL;
++
+ /*
+ * If bfqq != NULL, then a non-stable queue merge between
+ * bic->bfqq and bfqq is happening here. This causes troubles
+@@ -461,6 +467,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
+ */
+ void bfq_schedule_dispatch(struct bfq_data *bfqd)
+ {
++ lockdep_assert_held(&bfqd->lock);
++
+ if (bfqd->queued != 0) {
+ bfq_log(bfqd, "schedule dispatch");
+ blk_mq_run_hw_queues(bfqd->queue, true);
+@@ -2022,9 +2030,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ if (!bfqd->last_completed_rq_bfqq ||
+ bfqd->last_completed_rq_bfqq == bfqq ||
+ bfq_bfqq_has_short_ttime(bfqq) ||
+- bfqq->dispatched > 0 ||
+- now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
+- bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
++ now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
+ return;
+
+ if (bfqd->last_completed_rq_bfqq !=
+@@ -2084,7 +2090,7 @@ static void bfq_add_request(struct request *rq)
+ bfqq->queued[rq_is_sync(rq)]++;
+ bfqd->queued++;
+
+- if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
++ if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
+ bfq_check_waker(bfqd, bfqq, now_ns);
+
+ /*
+@@ -2337,10 +2343,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
+
+ spin_lock_irq(&bfqd->lock);
+
+- if (bic)
++ if (bic) {
++ /*
++ * Make sure cgroup info is uptodate for current process before
++ * considering the merge.
++ */
++ bfq_bic_update_cgroup(bic, bio);
++
+ bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
+- else
++ } else {
+ bfqd->bio_bfqq = NULL;
++ }
+ bfqd->bio_bic = bic;
+
+ ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
+@@ -2370,8 +2383,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
+ return ELEVATOR_NO_MERGE;
+ }
+
+-static struct bfq_queue *bfq_init_rq(struct request *rq);
+-
+ static void bfq_request_merged(struct request_queue *q, struct request *req,
+ enum elv_merge type)
+ {
+@@ -2380,7 +2391,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
+ blk_rq_pos(req) <
+ blk_rq_pos(container_of(rb_prev(&req->rb_node),
+ struct request, rb_node))) {
+- struct bfq_queue *bfqq = bfq_init_rq(req);
++ struct bfq_queue *bfqq = RQ_BFQQ(req);
+ struct bfq_data *bfqd;
+ struct request *prev, *next_rq;
+
+@@ -2432,8 +2443,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
+ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
+ struct request *next)
+ {
+- struct bfq_queue *bfqq = bfq_init_rq(rq),
+- *next_bfqq = bfq_init_rq(next);
++ struct bfq_queue *bfqq = RQ_BFQQ(rq),
++ *next_bfqq = RQ_BFQQ(next);
+
+ if (!bfqq)
+ goto remove;
+@@ -2638,6 +2649,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+ if (process_refs == 0 || new_process_refs == 0)
+ return NULL;
+
++ /*
++ * Make sure merged queues belong to the same parent. Parents could
++ * have changed since the time we decided the two queues are suitable
++ * for merging.
++ */
++ if (new_bfqq->entity.parent != bfqq->entity.parent)
++ return NULL;
++
+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
+ new_bfqq->pid);
+
+@@ -2662,6 +2681,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+ * are likely to increase the throughput.
+ */
+ bfqq->new_bfqq = new_bfqq;
++ /*
++ * The above assignment schedules the following redirections:
++ * each time some I/O for bfqq arrives, the process that
++ * generated that I/O is disassociated from bfqq and
++ * associated with new_bfqq. Here we increases new_bfqq->ref
++ * in advance, adding the number of processes that are
++ * expected to be associated with new_bfqq as they happen to
++ * issue I/O.
++ */
+ new_bfqq->ref += process_refs;
+ return new_bfqq;
+ }
+@@ -2724,6 +2752,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ struct bfq_queue *in_service_bfqq, *new_bfqq;
+
++ /* if a merge has already been setup, then proceed with that first */
++ if (bfqq->new_bfqq)
++ return bfqq->new_bfqq;
++
+ /*
+ * Check delayed stable merge for rotational or non-queueing
+ * devs. For this branch to be executed, bfqq must not be
+@@ -2762,9 +2794,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ struct bfq_queue *new_bfqq =
+ bfq_setup_merge(bfqq, stable_merge_bfqq);
+
+- bic->stably_merged = true;
+- if (new_bfqq && new_bfqq->bic)
+- new_bfqq->bic->stably_merged = true;
++ if (new_bfqq) {
++ bic->stably_merged = true;
++ if (new_bfqq->bic)
++ new_bfqq->bic->stably_merged =
++ true;
++ }
+ return new_bfqq;
+ } else
+ return NULL;
+@@ -2825,9 +2860,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ if (bfq_too_late_for_merging(bfqq))
+ return NULL;
+
+- if (bfqq->new_bfqq)
+- return bfqq->new_bfqq;
+-
+ if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
+ return NULL;
+
+@@ -3014,7 +3046,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+ /*
+ * Merge queues (that is, let bic redirect its requests to new_bfqq)
+ */
+- bic_set_bfqq(bic, new_bfqq, 1);
++ bic_set_bfqq(bic, new_bfqq, true);
+ bfq_mark_bfqq_coop(new_bfqq);
+ /*
+ * new_bfqq now belongs to at least two bics (it is a shared queue):
+@@ -5061,7 +5093,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
+ struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+ struct request *rq;
+ struct bfq_queue *in_serv_queue;
+- bool waiting_rq, idle_timer_disabled;
++ bool waiting_rq, idle_timer_disabled = false;
+
+ spin_lock_irq(&bfqd->lock);
+
+@@ -5069,14 +5101,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
+ waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
+
+ rq = __bfq_dispatch_request(hctx);
+-
+- idle_timer_disabled =
+- waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
++ if (in_serv_queue == bfqd->in_service_queue) {
++ idle_timer_disabled =
++ waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
++ }
+
+ spin_unlock_irq(&bfqd->lock);
+-
+- bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue,
+- idle_timer_disabled);
++ bfq_update_dispatch_stats(hctx->queue, rq,
++ idle_timer_disabled ? in_serv_queue : NULL,
++ idle_timer_disabled);
+
+ return rq;
+ }
+@@ -5173,7 +5206,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
+ bfq_put_queue(bfqq);
+ }
+
+-static void bfq_put_cooperator(struct bfq_queue *bfqq)
++void bfq_put_cooperator(struct bfq_queue *bfqq)
+ {
+ struct bfq_queue *__bfqq, *next;
+
+@@ -5218,9 +5251,8 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
+ unsigned long flags;
+
+ spin_lock_irqsave(&bfqd->lock, flags);
+- bfqq->bic = NULL;
+- bfq_exit_bfqq(bfqd, bfqq);
+ bic_set_bfqq(bic, NULL, is_sync);
++ bfq_exit_bfqq(bfqd, bfqq);
+ spin_unlock_irqrestore(&bfqd->lock, flags);
+ }
+ }
+@@ -5327,9 +5359,11 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
+
+ bfqq = bic_to_bfqq(bic, false);
+ if (bfqq) {
+- bfq_release_process_ref(bfqd, bfqq);
+- bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic, true);
++ struct bfq_queue *old_bfqq = bfqq;
++
++ bfqq = bfq_get_queue(bfqd, bio, false, bic, true);
+ bic_set_bfqq(bic, bfqq, false);
++ bfq_release_process_ref(bfqd, old_bfqq);
+ }
+
+ bfqq = bic_to_bfqq(bic, true);
+@@ -5579,14 +5613,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq;
+ struct bfq_group *bfqg;
+
+- rcu_read_lock();
+-
+- bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
+- if (!bfqg) {
+- bfqq = &bfqd->oom_bfqq;
+- goto out;
+- }
+-
++ bfqg = bfq_bio_bfqg(bfqd, bio);
+ if (!is_sync) {
+ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
+ ioprio);
+@@ -5632,8 +5659,6 @@ out:
+
+ if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
+ bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);
+-
+- rcu_read_unlock();
+ return bfqq;
+ }
+
+@@ -5964,6 +5989,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
+ unsigned int cmd_flags) {}
+ #endif /* CONFIG_BFQ_CGROUP_DEBUG */
+
++static struct bfq_queue *bfq_init_rq(struct request *rq);
++
+ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+ bool at_head)
+ {
+@@ -5979,60 +6006,16 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+ bfqg_stats_update_legacy_io(q, rq);
+ #endif
+ spin_lock_irq(&bfqd->lock);
++ bfqq = bfq_init_rq(rq);
+ if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
+ spin_unlock_irq(&bfqd->lock);
+ blk_mq_free_requests(&free);
+ return;
+ }
+
+- spin_unlock_irq(&bfqd->lock);
+-
+ trace_block_rq_insert(rq);
+
+- spin_lock_irq(&bfqd->lock);
+- bfqq = bfq_init_rq(rq);
+-
+- /*
+- * Reqs with at_head or passthrough flags set are to be put
+- * directly into dispatch list. Additional case for putting rq
+- * directly into the dispatch queue: the only active
+- * bfq_queues are bfqq and either its waker bfq_queue or one
+- * of its woken bfq_queues. The rationale behind this
+- * additional condition is as follows:
+- * - consider a bfq_queue, say Q1, detected as a waker of
+- * another bfq_queue, say Q2
+- * - by definition of a waker, Q1 blocks the I/O of Q2, i.e.,
+- * some I/O of Q1 needs to be completed for new I/O of Q2
+- * to arrive. A notable example of waker is journald
+- * - so, Q1 and Q2 are in any respect the queues of two
+- * cooperating processes (or of two cooperating sets of
+- * processes): the goal of Q1's I/O is doing what needs to
+- * be done so that new Q2's I/O can finally be
+- * issued. Therefore, if the service of Q1's I/O is delayed,
+- * then Q2's I/O is delayed too. Conversely, if Q2's I/O is
+- * delayed, the goal of Q1's I/O is hindered.
+- * - as a consequence, if some I/O of Q1/Q2 arrives while
+- * Q2/Q1 is the only queue in service, there is absolutely
+- * no point in delaying the service of such an I/O. The
+- * only possible result is a throughput loss
+- * - so, when the above condition holds, the best option is to
+- * have the new I/O dispatched as soon as possible
+- * - the most effective and efficient way to attain the above
+- * goal is to put the new I/O directly in the dispatch
+- * list
+- * - as an additional restriction, Q1 and Q2 must be the only
+- * busy queues for this commit to put the I/O of Q2/Q1 in
+- * the dispatch list. This is necessary, because, if also
+- * other queues are waiting for service, then putting new
+- * I/O directly in the dispatch list may evidently cause a
+- * violation of service guarantees for the other queues
+- */
+- if (!bfqq ||
+- (bfqq != bfqd->in_service_queue &&
+- bfqd->in_service_queue != NULL &&
+- bfq_tot_busy_queues(bfqd) == 1 + bfq_bfqq_busy(bfqq) &&
+- (bfqq->waker_bfqq == bfqd->in_service_queue ||
+- bfqd->in_service_queue->waker_bfqq == bfqq)) || at_head) {
++ if (!bfqq || at_head) {
+ if (at_head)
+ list_add(&rq->queuelist, &bfqd->dispatch);
+ else
+@@ -6059,7 +6042,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+ * merge).
+ */
+ cmd_flags = rq->cmd_flags;
+-
+ spin_unlock_irq(&bfqd->lock);
+
+ bfq_update_insert_stats(q, bfqq, idle_timer_disabled,
+@@ -6453,6 +6435,7 @@ static void bfq_finish_requeue_request(struct request *rq)
+ bfq_completed_request(bfqq, bfqd);
+ }
+ bfq_finish_requeue_request_body(bfqq);
++ RQ_BIC(rq)->requests--;
+ spin_unlock_irqrestore(&bfqd->lock, flags);
+
+ /*
+@@ -6494,7 +6477,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+ return bfqq;
+ }
+
+- bic_set_bfqq(bic, NULL, 1);
++ bic_set_bfqq(bic, NULL, true);
+
+ bfq_put_cooperator(bfqq);
+
+@@ -6654,6 +6637,12 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
+ bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
+ true, is_sync,
+ NULL);
++ if (unlikely(bfqq == &bfqd->oom_bfqq))
++ bfqq_already_existing = true;
++ } else
++ bfqq_already_existing = true;
++
++ if (!bfqq_already_existing) {
+ bfqq->waker_bfqq = old_bfqq->waker_bfqq;
+ bfqq->tentative_waker_bfqq = NULL;
+
+@@ -6667,13 +6656,13 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
+ if (bfqq->waker_bfqq)
+ hlist_add_head(&bfqq->woken_list_node,
+ &bfqq->waker_bfqq->woken_list);
+- } else
+- bfqq_already_existing = true;
++ }
+ }
+ }
+
+ bfqq->allocated++;
+ bfqq->ref++;
++ bic->requests++;
+ bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
+ rq, bfqq, bfqq->ref);
+
+@@ -6770,8 +6759,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ bfq_bfqq_expire(bfqd, bfqq, true, reason);
+
+ schedule_dispatch:
+- spin_unlock_irqrestore(&bfqd->lock, flags);
+ bfq_schedule_dispatch(bfqd);
++ spin_unlock_irqrestore(&bfqd->lock, flags);
+ }
+
+ /*
+@@ -6920,6 +6909,8 @@ static void bfq_exit_queue(struct elevator_queue *e)
+ spin_unlock_irq(&bfqd->lock);
+ #endif
+
++ wbt_enable_default(bfqd->queue);
++
+ kfree(bfqd);
+ }
+
+diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
+index a73488eec8a47..2bd696aaf02cc 100644
+--- a/block/bfq-iosched.h
++++ b/block/bfq-iosched.h
+@@ -466,6 +466,7 @@ struct bfq_io_cq {
+ struct bfq_queue *stable_merge_bfqq;
+
+ bool stably_merged; /* non splittable if true */
++ unsigned int requests; /* Number of requests this process has in flight */
+ };
+
+ /**
+@@ -925,6 +926,8 @@ struct bfq_group {
+
+ /* reference counter (see comments in bfq_bic_update_cgroup) */
+ int ref;
++ /* Is bfq_group still online? */
++ bool online;
+
+ struct bfq_entity entity;
+ struct bfq_sched_data sched_data;
+@@ -976,6 +979,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
+ void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ bool compensate, enum bfqq_expiration reason);
+ void bfq_put_queue(struct bfq_queue *bfqq);
++void bfq_put_cooperator(struct bfq_queue *bfqq);
+ void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
+ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+ void bfq_schedule_dispatch(struct bfq_data *bfqd);
+@@ -1003,8 +1007,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
+ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
+ void bfq_end_wr_async(struct bfq_data *bfqd);
+-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
+- struct blkcg *blkcg);
++struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
+ struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
+ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
+diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
+index b74cc0da118ec..709b901de3ca9 100644
+--- a/block/bfq-wf2q.c
++++ b/block/bfq-wf2q.c
+@@ -519,7 +519,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio)
+ static unsigned short bfq_weight_to_ioprio(int weight)
+ {
+ return max_t(int, 0,
+- IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight);
++ IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF);
+ }
+
+ static void bfq_get_entity(struct bfq_entity *entity)
+diff --git a/block/bio-integrity.c b/block/bio-integrity.c
+index 6b47cddbbca17..4f34ac27c47dd 100644
+--- a/block/bio-integrity.c
++++ b/block/bio-integrity.c
+@@ -373,7 +373,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
+
+- bip->bip_iter.bi_sector += bytes_done >> 9;
++ bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9);
+ bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
+ }
+
+@@ -417,6 +417,7 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+
+ bip->bip_vcnt = bip_src->bip_vcnt;
+ bip->bip_iter = bip_src->bip_iter;
++ bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
+
+ return 0;
+ }
+diff --git a/block/bio.c b/block/bio.c
+index a6fb6a0b42955..ba9120d4fe499 100644
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -567,7 +567,8 @@ void bio_truncate(struct bio *bio, unsigned new_size)
+ offset = new_size - done;
+ else
+ offset = 0;
+- zero_user(bv.bv_page, offset, bv.bv_len - offset);
++ zero_user(bv.bv_page, bv.bv_offset + offset,
++ bv.bv_len - offset);
+ truncated = true;
+ }
+ done += bv.bv_len;
+@@ -664,6 +665,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
+ bio_alloc_cache_prune(cache, -1U);
+ }
+ free_percpu(bs->cache);
++ bs->cache = NULL;
+ }
+
+ /**
+@@ -908,7 +910,7 @@ EXPORT_SYMBOL(bio_add_pc_page);
+ int bio_add_zone_append_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+ {
+- struct request_queue *q = bio->bi_bdev->bd_disk->queue;
++ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ bool same_page = false;
+
+ if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
+@@ -1052,7 +1054,7 @@ static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
+
+ static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
+ {
+- struct request_queue *q = bio->bi_bdev->bd_disk->queue;
++ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ struct iov_iter i = *iter;
+
+ iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9);
+@@ -1069,6 +1071,37 @@ static void bio_put_pages(struct page **pages, size_t size, size_t off)
+ put_page(pages[i]);
+ }
+
++static int bio_iov_add_page(struct bio *bio, struct page *page,
++ unsigned int len, unsigned int offset)
++{
++ bool same_page = false;
++
++ if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
++ if (WARN_ON_ONCE(bio_full(bio, len)))
++ return -EINVAL;
++ __bio_add_page(bio, page, len, offset);
++ return 0;
++ }
++
++ if (same_page)
++ put_page(page);
++ return 0;
++}
++
++static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
++ unsigned int len, unsigned int offset)
++{
++ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
++ bool same_page = false;
++
++ if (bio_add_hw_page(q, bio, page, len, offset,
++ queue_max_zone_append_sectors(q), &same_page) != len)
++ return -EINVAL;
++ if (same_page)
++ put_page(page);
++ return 0;
++}
++
+ #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
+
+ /**
+@@ -1087,61 +1120,11 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+ unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+ struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+ struct page **pages = (struct page **)bv;
+- bool same_page = false;
+- ssize_t size, left;
+- unsigned len, i;
+- size_t offset;
+-
+- /*
+- * Move page array up in the allocated memory for the bio vecs as far as
+- * possible so that we can start filling biovecs from the beginning
+- * without overwriting the temporary page array.
+- */
+- BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
+- pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
+-
+- size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+- if (unlikely(size <= 0))
+- return size ? size : -EFAULT;
+-
+- for (left = size, i = 0; left > 0; left -= len, i++) {
+- struct page *page = pages[i];
+-
+- len = min_t(size_t, PAGE_SIZE - offset, left);
+-
+- if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
+- if (same_page)
+- put_page(page);
+- } else {
+- if (WARN_ON_ONCE(bio_full(bio, len))) {
+- bio_put_pages(pages + i, left, offset);
+- return -EINVAL;
+- }
+- __bio_add_page(bio, page, len, offset);
+- }
+- offset = 0;
+- }
+-
+- iov_iter_advance(iter, size);
+- return 0;
+-}
+-
+-static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
+-{
+- unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+- unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+- struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+- unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
+- struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
+- struct page **pages = (struct page **)bv;
+ ssize_t size, left;
+ unsigned len, i;
+ size_t offset;
+ int ret = 0;
+
+- if (WARN_ON_ONCE(!max_append_sectors))
+- return 0;
+-
+ /*
+ * Move page array up in the allocated memory for the bio vecs as far as
+ * possible so that we can start filling biovecs from the beginning
+@@ -1156,17 +1139,18 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
+
+ for (left = size, i = 0; left > 0; left -= len, i++) {
+ struct page *page = pages[i];
+- bool same_page = false;
+
+ len = min_t(size_t, PAGE_SIZE - offset, left);
+- if (bio_add_hw_page(q, bio, page, len, offset,
+- max_append_sectors, &same_page) != len) {
++ if (bio_op(bio) == REQ_OP_ZONE_APPEND)
++ ret = bio_iov_add_zone_append_page(bio, page, len,
++ offset);
++ else
++ ret = bio_iov_add_page(bio, page, len, offset);
++
++ if (ret) {
+ bio_put_pages(pages + i, left, offset);
+- ret = -EINVAL;
+ break;
+ }
+- if (same_page)
+- put_page(page);
+ offset = 0;
+ }
+
+@@ -1208,10 +1192,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+ }
+
+ do {
+- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+- ret = __bio_iov_append_get_pages(bio, iter);
+- else
+- ret = __bio_iov_iter_get_pages(bio, iter);
++ ret = __bio_iov_iter_get_pages(bio, iter);
+ } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
+
+ /* don't account direct I/O as memory stall */
+@@ -1288,10 +1269,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
+ struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
+ unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
+- void *src_buf;
++ void *src_buf = bvec_kmap_local(&src_bv);
++ void *dst_buf = bvec_kmap_local(&dst_bv);
++
++ memcpy(dst_buf, src_buf, bytes);
+
+- src_buf = bvec_kmap_local(&src_bv);
+- memcpy_to_bvec(&dst_bv, src_buf);
++ kunmap_local(dst_buf);
+ kunmap_local(src_buf);
+
+ bio_advance_iter_single(src, src_iter, bytes);
+@@ -1466,11 +1449,10 @@ again:
+ if (!bio_integrity_endio(bio))
+ return;
+
+- if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED))
+- rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
++ rq_qos_done_bio(bio);
+
+ if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+- trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
++ trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio);
+ bio_clear_flag(bio, BIO_TRACE_COMPLETION);
+ }
+
+@@ -1551,7 +1533,7 @@ EXPORT_SYMBOL(bio_split);
+ void bio_trim(struct bio *bio, sector_t offset, sector_t size)
+ {
+ if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
+- offset + size > bio->bi_iter.bi_size))
++ offset + size > bio_sectors(bio)))
+ return;
+
+ size <<= 9;
+diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
+index 9a1c5839dd469..3ee4c1217b636 100644
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -633,6 +633,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+
+ q = bdev->bd_disk->queue;
+
++ /*
++ * blkcg_deactivate_policy() requires queue to be frozen, we can grab
++ * q_usage_counter to prevent concurrent with blkcg_deactivate_policy().
++ */
++ ret = blk_queue_enter(q, 0);
++ if (ret)
++ goto fail;
++
+ rcu_read_lock();
+ spin_lock_irq(&q->queue_lock);
+
+@@ -667,13 +675,13 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+ new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
+ if (unlikely(!new_blkg)) {
+ ret = -ENOMEM;
+- goto fail;
++ goto fail_exit_queue;
+ }
+
+ if (radix_tree_preload(GFP_KERNEL)) {
+ blkg_free(new_blkg);
+ ret = -ENOMEM;
+- goto fail;
++ goto fail_exit_queue;
+ }
+
+ rcu_read_lock();
+@@ -702,6 +710,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+ goto success;
+ }
+ success:
++ blk_queue_exit(q);
+ ctx->bdev = bdev;
+ ctx->blkg = blkg;
+ ctx->body = input;
+@@ -712,6 +721,8 @@ fail_preloaded:
+ fail_unlock:
+ spin_unlock_irq(&q->queue_lock);
+ rcu_read_unlock();
++fail_exit_queue:
++ blk_queue_exit(q);
+ fail:
+ blkdev_put_no_open(bdev);
+ /*
+@@ -844,11 +855,11 @@ static void blkcg_fill_root_iostats(void)
+ blk_queue_root_blkg(bdev->bd_disk->queue);
+ struct blkg_iostat tmp;
+ int cpu;
++ unsigned long flags;
+
+ memset(&tmp, 0, sizeof(tmp));
+ for_each_possible_cpu(cpu) {
+ struct disk_stats *cpu_dkstats;
+- unsigned long flags;
+
+ cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
+ tmp.ios[BLKG_IOSTAT_READ] +=
+@@ -864,11 +875,11 @@ static void blkcg_fill_root_iostats(void)
+ cpu_dkstats->sectors[STAT_WRITE] << 9;
+ tmp.bytes[BLKG_IOSTAT_DISCARD] +=
+ cpu_dkstats->sectors[STAT_DISCARD] << 9;
+-
+- flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
+- blkg_iostat_set(&blkg->iostat.cur, &tmp);
+- u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
+ }
++
++ flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
++ blkg_iostat_set(&blkg->iostat.cur, &tmp);
++ u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
+ }
+ }
+
+@@ -1349,6 +1360,10 @@ retry:
+ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+ pol->pd_init_fn(blkg->pd[pol->plid]);
+
++ if (pol->pd_online_fn)
++ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
++ pol->pd_online_fn(blkg->pd[pol->plid]);
++
+ __set_bit(pol->plid, q->blkcg_pols);
+ ret = 0;
+
+@@ -1875,12 +1890,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
+ */
+ void bio_clone_blkg_association(struct bio *dst, struct bio *src)
+ {
+- if (src->bi_blkg) {
+- if (dst->bi_blkg)
+- blkg_put(dst->bi_blkg);
+- blkg_get(src->bi_blkg);
+- dst->bi_blkg = src->bi_blkg;
+- }
++ if (src->bi_blkg)
++ bio_associate_blkg_from_css(dst, &bio_blkcg(src)->css);
+ }
+ EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
+
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 4d8f5fe915887..0c4a4e42ad870 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -49,6 +49,7 @@
+ #include "blk-mq.h"
+ #include "blk-mq-sched.h"
+ #include "blk-pm.h"
++#include "blk-rq-qos.h"
+
+ struct dentry *blk_debugfs_root;
+
+@@ -350,13 +351,6 @@ void blk_queue_start_drain(struct request_queue *q)
+ wake_up_all(&q->mq_freeze_wq);
+ }
+
+-void blk_set_queue_dying(struct request_queue *q)
+-{
+- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+- blk_queue_start_drain(q);
+-}
+-EXPORT_SYMBOL_GPL(blk_set_queue_dying);
+-
+ /**
+ * blk_cleanup_queue - shutdown a request queue
+ * @q: request queue to shutdown
+@@ -374,7 +368,8 @@ void blk_cleanup_queue(struct request_queue *q)
+ WARN_ON_ONCE(blk_queue_registered(q));
+
+ /* mark @q DYING, no new request or merges will be allowed afterwards */
+- blk_set_queue_dying(q);
++ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
++ blk_queue_start_drain(q);
+
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+@@ -386,11 +381,16 @@ void blk_cleanup_queue(struct request_queue *q)
+ */
+ blk_freeze_queue(q);
+
++ /* cleanup rq qos structures for queue without disk */
++ rq_qos_exit(q);
++
+ blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
+
+ blk_sync_queue(q);
+- if (queue_is_mq(q))
++ if (queue_is_mq(q)) {
++ blk_mq_cancel_work_sync(q);
+ blk_mq_exit_queue(q);
++ }
+
+ /*
+ * In theory, request pool of sched_tags belongs to request queue.
+@@ -447,7 +447,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
+
+ while (!blk_try_enter_queue(q, pm)) {
+ if (flags & BLK_MQ_REQ_NOWAIT)
+- return -EBUSY;
++ return -EAGAIN;
+
+ /*
+ * read pair of barrier in blk_freeze_queue_start(), we need to
+@@ -478,7 +478,7 @@ static inline int bio_queue_enter(struct bio *bio)
+ if (test_bit(GD_DEAD, &disk->state))
+ goto dead;
+ bio_wouldblock_error(bio);
+- return -EBUSY;
++ return -EAGAIN;
+ }
+
+ /*
+@@ -698,14 +698,10 @@ static inline bool should_fail_request(struct block_device *part,
+ static inline bool bio_check_ro(struct bio *bio)
+ {
+ if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
+- char b[BDEVNAME_SIZE];
+-
+ if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
+ return false;
+-
+- WARN_ONCE(1,
+- "Trying to write to read-only block-device %s (partno %d)\n",
+- bio_devname(bio, b), bio->bi_bdev->bd_partno);
++ pr_warn("Trying to write to read-only block-device %pg\n",
++ bio->bi_bdev);
+ /* Older lvm-tools actually trigger this */
+ return false;
+ }
+@@ -887,10 +883,8 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
+ if (unlikely(!current->io_context))
+ create_task_io_context(current, GFP_ATOMIC, q->node);
+
+- if (blk_throtl_bio(bio)) {
+- blkcg_bio_issue_init(bio);
++ if (blk_throtl_bio(bio))
+ return false;
+- }
+
+ blk_cgroup_bio_start(bio);
+ blkcg_bio_issue_init(bio);
+@@ -1293,21 +1287,33 @@ void blk_account_io_start(struct request *rq)
+ }
+
+ static unsigned long __part_start_io_acct(struct block_device *part,
+- unsigned int sectors, unsigned int op)
++ unsigned int sectors, unsigned int op,
++ unsigned long start_time)
+ {
+ const int sgrp = op_stat_group(op);
+- unsigned long now = READ_ONCE(jiffies);
+
+ part_stat_lock();
+- update_io_ticks(part, now, false);
++ update_io_ticks(part, start_time, false);
+ part_stat_inc(part, ios[sgrp]);
+ part_stat_add(part, sectors[sgrp], sectors);
+ part_stat_local_inc(part, in_flight[op_is_write(op)]);
+ part_stat_unlock();
+
+- return now;
++ return start_time;
+ }
+
++/**
++ * bio_start_io_acct_time - start I/O accounting for bio based drivers
++ * @bio: bio to start account for
++ * @start_time: start time that should be passed back to bio_end_io_acct().
++ */
++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
++{
++ __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
++ bio_op(bio), start_time);
++}
++EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
++
+ /**
+ * bio_start_io_acct - start I/O accounting for bio based drivers
+ * @bio: bio to start account for
+@@ -1316,14 +1322,15 @@ static unsigned long __part_start_io_acct(struct block_device *part,
+ */
+ unsigned long bio_start_io_acct(struct bio *bio)
+ {
+- return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio));
++ return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
++ bio_op(bio), jiffies);
+ }
+ EXPORT_SYMBOL_GPL(bio_start_io_acct);
+
+ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+ unsigned int op)
+ {
+- return __part_start_io_acct(disk->part0, sectors, op);
++ return __part_start_io_acct(disk->part0, sectors, op, jiffies);
+ }
+ EXPORT_SYMBOL(disk_start_io_acct);
+
+@@ -1414,6 +1421,13 @@ bool blk_update_request(struct request *req, blk_status_t error,
+ req->q->integrity.profile->complete_fn(req, nr_bytes);
+ #endif
+
++ /*
++ * Upper layers may call blk_crypto_evict_key() anytime after the last
++ * bio_endio(). Therefore, the keyslot must be released before that.
++ */
++ if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
++ __blk_crypto_rq_put_keyslot(req);
++
+ if (unlikely(error && !blk_rq_is_passthrough(req) &&
+ !(req->rq_flags & RQF_QUIET)))
+ print_req_error(req, error, __func__);
+diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
+index 0d36aae538d7b..8e08345576203 100644
+--- a/block/blk-crypto-internal.h
++++ b/block/blk-crypto-internal.h
+@@ -60,6 +60,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
+ return rq->crypt_ctx;
+ }
+
++static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
++{
++ return rq->crypt_keyslot;
++}
++
+ #else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+ static inline bool bio_crypt_rq_ctx_compatible(struct request *rq,
+@@ -93,6 +98,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
+ return false;
+ }
+
++static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
++{
++ return false;
++}
++
+ #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+ void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
+@@ -127,14 +137,21 @@ static inline bool blk_crypto_bio_prep(struct bio **bio_ptr)
+ return true;
+ }
+
+-blk_status_t __blk_crypto_init_request(struct request *rq);
+-static inline blk_status_t blk_crypto_init_request(struct request *rq)
++blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq);
++static inline blk_status_t blk_crypto_rq_get_keyslot(struct request *rq)
+ {
+ if (blk_crypto_rq_is_encrypted(rq))
+- return __blk_crypto_init_request(rq);
++ return __blk_crypto_rq_get_keyslot(rq);
+ return BLK_STS_OK;
+ }
+
++void __blk_crypto_rq_put_keyslot(struct request *rq);
++static inline void blk_crypto_rq_put_keyslot(struct request *rq)
++{
++ if (blk_crypto_rq_has_keyslot(rq))
++ __blk_crypto_rq_put_keyslot(rq);
++}
++
+ void __blk_crypto_free_request(struct request *rq);
+ static inline void blk_crypto_free_request(struct request *rq)
+ {
+@@ -173,7 +190,7 @@ static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq)
+ {
+
+ if (blk_crypto_rq_is_encrypted(rq))
+- return blk_crypto_init_request(rq);
++ return blk_crypto_rq_get_keyslot(rq);
+ return BLK_STS_OK;
+ }
+
+diff --git a/block/blk-crypto.c b/block/blk-crypto.c
+index 103c2e2d50d67..5029a50807d5d 100644
+--- a/block/blk-crypto.c
++++ b/block/blk-crypto.c
+@@ -13,6 +13,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/keyslot-manager.h>
+ #include <linux/module.h>
++#include <linux/ratelimit.h>
+ #include <linux/slab.h>
+
+ #include "blk-crypto-internal.h"
+@@ -216,26 +217,26 @@ static bool bio_crypt_check_alignment(struct bio *bio)
+ return true;
+ }
+
+-blk_status_t __blk_crypto_init_request(struct request *rq)
++blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq)
+ {
+ return blk_ksm_get_slot_for_key(rq->q->ksm, rq->crypt_ctx->bc_key,
+ &rq->crypt_keyslot);
+ }
+
+-/**
+- * __blk_crypto_free_request - Uninitialize the crypto fields of a request.
+- *
+- * @rq: The request whose crypto fields to uninitialize.
+- *
+- * Completely uninitializes the crypto fields of a request. If a keyslot has
+- * been programmed into some inline encryption hardware, that keyslot is
+- * released. The rq->crypt_ctx is also freed.
+- */
+-void __blk_crypto_free_request(struct request *rq)
++void __blk_crypto_rq_put_keyslot(struct request *rq)
+ {
+ blk_ksm_put_slot(rq->crypt_keyslot);
++ rq->crypt_keyslot = NULL;
++}
++
++void __blk_crypto_free_request(struct request *rq)
++{
++ /* The keyslot, if one was needed, should have been released earlier. */
++ if (WARN_ON_ONCE(rq->crypt_keyslot))
++ __blk_crypto_rq_put_keyslot(rq);
++
+ mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
+- blk_crypto_rq_set_defaults(rq);
++ rq->crypt_ctx = NULL;
+ }
+
+ /**
+@@ -384,29 +385,39 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key,
+ }
+
+ /**
+- * blk_crypto_evict_key() - Evict a key from any inline encryption hardware
+- * it may have been programmed into
+- * @q: The request queue who's associated inline encryption hardware this key
+- * might have been programmed into
+- * @key: The key to evict
++ * blk_crypto_evict_key() - Evict a blk_crypto_key from a request_queue
++ * @q: a request_queue on which I/O using the key may have been done
++ * @key: the key to evict
+ *
+- * Upper layers (filesystems) must call this function to ensure that a key is
+- * evicted from any hardware that it might have been programmed into. The key
+- * must not be in use by any in-flight IO when this function is called.
++ * For a given request_queue, this function removes the given blk_crypto_key
++ * from the keyslot management structures and evicts it from any underlying
++ * hardware keyslot(s) or blk-crypto-fallback keyslot it may have been
++ * programmed into.
+ *
+- * Return: 0 on success or if key is not present in the q's ksm, -err on error.
++ * Upper layers must call this before freeing the blk_crypto_key. It must be
++ * called for every request_queue the key may have been used on. The key must
++ * no longer be in use by any I/O when this function is called.
++ *
++ * Context: May sleep.
+ */
+-int blk_crypto_evict_key(struct request_queue *q,
+- const struct blk_crypto_key *key)
++void blk_crypto_evict_key(struct request_queue *q,
++ const struct blk_crypto_key *key)
+ {
+- if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
+- return blk_ksm_evict_key(q->ksm, key);
++ int err;
+
++ if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
++ err = blk_ksm_evict_key(q->ksm, key);
++ else
++ err = blk_crypto_fallback_evict_key(key);
+ /*
+- * If the request queue's associated inline encryption hardware didn't
+- * have support for the key, then the key might have been programmed
+- * into the fallback keyslot manager, so try to evict from there.
++ * An error can only occur here if the key failed to be evicted from a
++ * keyslot (due to a hardware or driver issue) or is allegedly still in
++ * use by I/O (due to a kernel bug). Even in these cases, the key is
++ * still unlinked from the keyslot management structures, and the caller
++ * is allowed and expected to free it right away. There's nothing
++ * callers can do to handle errors, so just log them and return void.
+ */
+- return blk_crypto_fallback_evict_key(key);
++ if (err)
++ pr_warn_ratelimited("error %d evicting key\n", err);
+ }
+ EXPORT_SYMBOL_GPL(blk_crypto_evict_key);
+diff --git a/block/blk-flush.c b/block/blk-flush.c
+index 4201728bf3a5a..94a86acbb7f67 100644
+--- a/block/blk-flush.c
++++ b/block/blk-flush.c
+@@ -235,8 +235,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
+ * avoiding use-after-free.
+ */
+ WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
+- if (fq->rq_status != BLK_STS_OK)
++ if (fq->rq_status != BLK_STS_OK) {
+ error = fq->rq_status;
++ fq->rq_status = BLK_STS_OK;
++ }
+
+ if (!q->elevator) {
+ flush_rq->tag = BLK_MQ_NO_TAG;
+diff --git a/block/blk-ioc.c b/block/blk-ioc.c
+index 57299f860d41e..90c05971f71e0 100644
+--- a/block/blk-ioc.c
++++ b/block/blk-ioc.c
+@@ -265,6 +265,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
+ INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC);
+ INIT_HLIST_HEAD(&ioc->icq_list);
+ INIT_WORK(&ioc->release_work, ioc_release_fn);
++ ioc->ioprio = IOPRIO_DEFAULT;
+
+ /*
+ * Try to install. ioc shouldn't be installed if someone else
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index b3880e4ba22a1..f95feabb3ca88 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -232,7 +232,9 @@ enum {
+
+ /* 1/64k is granular enough and can easily be handled w/ u32 */
+ WEIGHT_ONE = 1 << 16,
++};
+
++enum {
+ /*
+ * As vtime is used to calculate the cost of each IO, it needs to
+ * be fairly high precision. For example, it should be able to
+@@ -256,6 +258,11 @@ enum {
+ VRATE_MIN = VTIME_PER_USEC * VRATE_MIN_PPM / MILLION,
+ VRATE_CLAMP_ADJ_PCT = 4,
+
++ /* switch iff the conditions are met for longer than this */
++ AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
++};
++
++enum {
+ /* if IOs end up waiting for requests, issue less */
+ RQ_WAIT_BUSY_PCT = 5,
+
+@@ -294,9 +301,6 @@ enum {
+ /* don't let cmds which take a very long time pin lagging for too long */
+ MAX_LAGGING_PERIODS = 10,
+
+- /* switch iff the conditions are met for longer than this */
+- AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
+-
+ /*
+ * Count IO size in 4k pages. The 12bit shift helps keeping
+ * size-proportional components of cost calculation in closer
+@@ -870,9 +874,14 @@ static void calc_lcoefs(u64 bps, u64 seqiops, u64 randiops,
+
+ *page = *seqio = *randio = 0;
+
+- if (bps)
+- *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC,
+- DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE));
++ if (bps) {
++ u64 bps_pages = DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE);
++
++ if (bps_pages)
++ *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC, bps_pages);
++ else
++ *page = 1;
++ }
+
+ if (seqiops) {
+ v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, seqiops);
+@@ -2311,11 +2320,28 @@ static void ioc_timer_fn(struct timer_list *timer)
+ hwm = current_hweight_max(iocg);
+ new_hwi = hweight_after_donation(iocg, old_hwi, hwm,
+ usage, &now);
+- if (new_hwi < hwm) {
++ /*
++ * Donation calculation assumes hweight_after_donation
++ * to be positive, a condition that a donor w/ hwa < 2
++ * can't meet. Don't bother with donation if hwa is
++ * below 2. It's not gonna make a meaningful difference
++ * anyway.
++ */
++ if (new_hwi < hwm && hwa >= 2) {
+ iocg->hweight_donating = hwa;
+ iocg->hweight_after_donation = new_hwi;
+ list_add(&iocg->surplus_list, &surpluses);
+- } else {
++ } else if (!iocg->abs_vdebt) {
++ /*
++ * @iocg doesn't have enough to donate. Reset
++ * its inuse to active.
++ *
++ * Don't reset debtors as their inuse's are
++ * owned by debt handling. This shouldn't affect
++ * donation calculuation in any meaningful way
++ * as @iocg doesn't have a meaningful amount of
++ * share anyway.
++ */
+ TRACE_IOCG_PATH(inuse_shortage, iocg, &now,
+ iocg->inuse, iocg->active,
+ iocg->hweight_inuse, new_hwi);
+@@ -2422,6 +2448,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
+ u32 hwi, adj_step;
+ s64 margin;
+ u64 cost, new_inuse;
++ unsigned long flags;
+
+ current_hweight(iocg, NULL, &hwi);
+ old_hwi = hwi;
+@@ -2440,11 +2467,11 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
+ iocg->inuse == iocg->active)
+ return cost;
+
+- spin_lock_irq(&ioc->lock);
++ spin_lock_irqsave(&ioc->lock, flags);
+
+ /* we own inuse only when @iocg is in the normal active state */
+ if (iocg->abs_vdebt || list_empty(&iocg->active_list)) {
+- spin_unlock_irq(&ioc->lock);
++ spin_unlock_irqrestore(&ioc->lock, flags);
+ return cost;
+ }
+
+@@ -2465,7 +2492,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
+ } while (time_after64(vtime + cost, now->vnow) &&
+ iocg->inuse != iocg->active);
+
+- spin_unlock_irq(&ioc->lock);
++ spin_unlock_irqrestore(&ioc->lock, flags);
+
+ TRACE_IOCG_PATH(inuse_adjust, iocg, now,
+ old_inuse, iocg->inuse, old_hwi, hwi);
+@@ -2876,15 +2903,21 @@ static int blk_iocost_init(struct request_queue *q)
+ * called before policy activation completion, can't assume that the
+ * target bio has an iocg associated and need to test for NULL iocg.
+ */
+- rq_qos_add(q, rqos);
++ ret = rq_qos_add(q, rqos);
++ if (ret)
++ goto err_free_ioc;
++
+ ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
+- if (ret) {
+- rq_qos_del(q, rqos);
+- free_percpu(ioc->pcpu_stat);
+- kfree(ioc);
+- return ret;
+- }
++ if (ret)
++ goto err_del_qos;
+ return 0;
++
++err_del_qos:
++ rq_qos_del(q, rqos);
++err_free_ioc:
++ free_percpu(ioc->pcpu_stat);
++ kfree(ioc);
++ return ret;
+ }
+
+ static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp)
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index c0545f9da549c..bdef8395af6e7 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -86,7 +86,17 @@ struct iolatency_grp;
+ struct blk_iolatency {
+ struct rq_qos rqos;
+ struct timer_list timer;
+- atomic_t enabled;
++
++ /*
++ * ->enabled is the master enable switch gating the throttling logic and
++ * inflight tracking. The number of cgroups which have iolat enabled is
++ * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
++ * from ->enable_work with the request_queue frozen. For details, See
++ * blkiolatency_enable_work_fn().
++ */
++ bool enabled;
++ atomic_t enable_cnt;
++ struct work_struct enable_work;
+ };
+
+ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
+@@ -94,11 +104,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
+ return container_of(rqos, struct blk_iolatency, rqos);
+ }
+
+-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
+-{
+- return atomic_read(&blkiolat->enabled) > 0;
+-}
+-
+ struct child_latency_info {
+ spinlock_t lock;
+
+@@ -463,7 +468,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
+ struct blkcg_gq *blkg = bio->bi_blkg;
+ bool issue_as_root = bio_issue_as_root_blkg(bio);
+
+- if (!blk_iolatency_enabled(blkiolat))
++ if (!blkiolat->enabled)
+ return;
+
+ while (blkg && blkg->parent) {
+@@ -593,19 +598,17 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
+ u64 window_start;
+ u64 now;
+ bool issue_as_root = bio_issue_as_root_blkg(bio);
+- bool enabled = false;
+ int inflight = 0;
+
+ blkg = bio->bi_blkg;
+- if (!blkg || !bio_flagged(bio, BIO_TRACKED))
++ if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED))
+ return;
+
+ iolat = blkg_to_lat(bio->bi_blkg);
+ if (!iolat)
+ return;
+
+- enabled = blk_iolatency_enabled(iolat->blkiolat);
+- if (!enabled)
++ if (!iolat->blkiolat->enabled)
+ return;
+
+ now = ktime_to_ns(ktime_get());
+@@ -644,6 +647,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
+ struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
+
+ del_timer_sync(&blkiolat->timer);
++ flush_work(&blkiolat->enable_work);
+ blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
+ kfree(blkiolat);
+ }
+@@ -715,6 +719,44 @@ next:
+ rcu_read_unlock();
+ }
+
++/**
++ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
++ * @work: enable_work of the blk_iolatency of interest
++ *
++ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
++ * is relatively expensive as it involves walking up the hierarchy twice for
++ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
++ * want to disable the in-flight tracking.
++ *
++ * We have to make sure that the counting is balanced - we don't want to leak
++ * the in-flight counts by disabling accounting in the completion path while IOs
++ * are in flight. This is achieved by ensuring that no IO is in flight by
++ * freezing the queue while flipping ->enabled. As this requires a sleepable
++ * context, ->enabled flipping is punted to this work function.
++ */
++static void blkiolatency_enable_work_fn(struct work_struct *work)
++{
++ struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
++ enable_work);
++ bool enabled;
++
++ /*
++ * There can only be one instance of this function running for @blkiolat
++ * and it's guaranteed to be executed at least once after the latest
++ * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
++ * sufficient.
++ *
++ * Also, we know @blkiolat is safe to access as ->enable_work is flushed
++ * in blkcg_iolatency_exit().
++ */
++ enabled = atomic_read(&blkiolat->enable_cnt);
++ if (enabled != blkiolat->enabled) {
++ blk_mq_freeze_queue(blkiolat->rqos.q);
++ blkiolat->enabled = enabled;
++ blk_mq_unfreeze_queue(blkiolat->rqos.q);
++ }
++}
++
+ int blk_iolatency_init(struct request_queue *q)
+ {
+ struct blk_iolatency *blkiolat;
+@@ -730,27 +772,29 @@ int blk_iolatency_init(struct request_queue *q)
+ rqos->ops = &blkcg_iolatency_ops;
+ rqos->q = q;
+
+- rq_qos_add(q, rqos);
+-
++ ret = rq_qos_add(q, rqos);
++ if (ret)
++ goto err_free;
+ ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
+- if (ret) {
+- rq_qos_del(q, rqos);
+- kfree(blkiolat);
+- return ret;
+- }
++ if (ret)
++ goto err_qos_del;
+
+ timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
++ INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
+
+ return 0;
++
++err_qos_del:
++ rq_qos_del(q, rqos);
++err_free:
++ kfree(blkiolat);
++ return ret;
+ }
+
+-/*
+- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
+- * return 0.
+- */
+-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
++static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+ {
+ struct iolatency_grp *iolat = blkg_to_lat(blkg);
++ struct blk_iolatency *blkiolat = iolat->blkiolat;
+ u64 oldval = iolat->min_lat_nsec;
+
+ iolat->min_lat_nsec = val;
+@@ -758,13 +802,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+ iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
+ BLKIOLATENCY_MAX_WIN_SIZE);
+
+- if (!oldval && val)
+- return 1;
++ if (!oldval && val) {
++ if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
++ schedule_work(&blkiolat->enable_work);
++ }
+ if (oldval && !val) {
+ blkcg_clear_delay(blkg);
+- return -1;
++ if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
++ schedule_work(&blkiolat->enable_work);
+ }
+- return 0;
+ }
+
+ static void iolatency_clear_scaling(struct blkcg_gq *blkg)
+@@ -796,7 +842,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
+ u64 lat_val = 0;
+ u64 oldval;
+ int ret;
+- int enable = 0;
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
+ if (ret)
+@@ -831,41 +876,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
+ blkg = ctx.blkg;
+ oldval = iolat->min_lat_nsec;
+
+- enable = iolatency_set_min_lat_nsec(blkg, lat_val);
+- if (enable) {
+- if (!blk_get_queue(blkg->q)) {
+- ret = -ENODEV;
+- goto out;
+- }
+-
+- blkg_get(blkg);
+- }
+-
+- if (oldval != iolat->min_lat_nsec) {
++ iolatency_set_min_lat_nsec(blkg, lat_val);
++ if (oldval != iolat->min_lat_nsec)
+ iolatency_clear_scaling(blkg);
+- }
+-
+ ret = 0;
+ out:
+ blkg_conf_finish(&ctx);
+- if (ret == 0 && enable) {
+- struct iolatency_grp *tmp = blkg_to_lat(blkg);
+- struct blk_iolatency *blkiolat = tmp->blkiolat;
+-
+- blk_mq_freeze_queue(blkg->q);
+-
+- if (enable == 1)
+- atomic_inc(&blkiolat->enabled);
+- else if (enable == -1)
+- atomic_dec(&blkiolat->enabled);
+- else
+- WARN_ON_ONCE(1);
+-
+- blk_mq_unfreeze_queue(blkg->q);
+-
+- blkg_put(blkg);
+- blk_put_queue(blkg->q);
+- }
+ return ret ?: nbytes;
+ }
+
+@@ -1006,14 +1022,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
+ {
+ struct iolatency_grp *iolat = pd_to_lat(pd);
+ struct blkcg_gq *blkg = lat_to_blkg(iolat);
+- struct blk_iolatency *blkiolat = iolat->blkiolat;
+- int ret;
+
+- ret = iolatency_set_min_lat_nsec(blkg, 0);
+- if (ret == 1)
+- atomic_inc(&blkiolat->enabled);
+- if (ret == -1)
+- atomic_dec(&blkiolat->enabled);
++ iolatency_set_min_lat_nsec(blkg, 0);
+ iolatency_clear_scaling(blkg);
+ }
+
+diff --git a/block/blk-map.c b/block/blk-map.c
+index 4526adde01564..c7f71d83eff18 100644
+--- a/block/blk-map.c
++++ b/block/blk-map.c
+@@ -446,7 +446,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
+ if (bytes > len)
+ bytes = len;
+
+- page = alloc_page(GFP_NOIO | gfp_mask);
++ page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask);
+ if (!page)
+ goto cleanup;
+
+diff --git a/block/blk-merge.c b/block/blk-merge.c
+index 7a5c81c02c800..1affc5fd35f0c 100644
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -7,6 +7,7 @@
+ #include <linux/bio.h>
+ #include <linux/blkdev.h>
+ #include <linux/scatterlist.h>
++#include <linux/blk-cgroup.h>
+
+ #include <trace/events/block.h>
+
+@@ -278,6 +279,16 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
+ *segs = nsegs;
+ return NULL;
+ split:
++ /*
++ * We can't sanely support splitting for a REQ_NOWAIT bio. End it
++ * with EAGAIN if splitting is required and return an error pointer.
++ */
++ if (bio->bi_opf & REQ_NOWAIT) {
++ bio->bi_status = BLK_STS_AGAIN;
++ bio_endio(bio);
++ return ERR_PTR(-EAGAIN);
++ }
++
+ *segs = nsegs;
+
+ /*
+@@ -337,11 +348,13 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
+ break;
+ }
+ split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
++ if (IS_ERR(split))
++ *bio = split = NULL;
+ break;
+ }
+
+ if (split) {
+- /* there isn't chance to merge the splitted bio */
++ /* there isn't chance to merge the split bio */
+ split->bi_opf |= REQ_NOMERGE;
+
+ bio_chain(split, *bio);
+@@ -561,6 +574,9 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq)
+ static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
+ unsigned int nr_phys_segs)
+ {
++ if (!blk_cgroup_mergeable(req, bio))
++ goto no_merge;
++
+ if (blk_integrity_merge_bio(req->q, req, bio) == false)
+ goto no_merge;
+
+@@ -657,6 +673,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
+ if (total_phys_segments > blk_rq_get_max_segments(req))
+ return 0;
+
++ if (!blk_cgroup_mergeable(req, next->bio))
++ return 0;
++
+ if (blk_integrity_merge_rq(q, req, next) == false)
+ return 0;
+
+@@ -799,6 +818,8 @@ static struct request *attempt_merge(struct request_queue *q,
+ if (!blk_discard_mergable(req))
+ elv_merge_requests(q, req, next);
+
++ blk_crypto_rq_put_keyslot(next);
++
+ /*
+ * 'next' is going away, so update stats accordingly
+ */
+@@ -863,6 +884,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
+ if (rq->rq_disk != bio->bi_bdev->bd_disk)
+ return false;
+
++ /* don't merge across cgroup boundaries */
++ if (!blk_cgroup_mergeable(rq, bio))
++ return false;
++
+ /* only merge integrity protected bio into ditto rq */
+ if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
+ return false;
+diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
+index 3b38d15723de1..7023257a133df 100644
+--- a/block/blk-mq-debugfs.c
++++ b/block/blk-mq-debugfs.c
+@@ -879,6 +879,9 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
+ char name[20];
+ int i;
+
++ if (!q->debugfs_dir)
++ return;
++
+ snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
+ hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
+
+diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
+index 0f006cabfd914..ff1021dbb0d22 100644
+--- a/block/blk-mq-sched.c
++++ b/block/blk-mq-sched.c
+@@ -45,8 +45,7 @@ void blk_mq_sched_assign_ioc(struct request *rq)
+ }
+
+ /*
+- * Mark a hardware queue as needing a restart. For shared queues, maintain
+- * a count of how many hardware queues are marked for restart.
++ * Mark a hardware queue as needing a restart.
+ */
+ void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+ {
+@@ -110,7 +109,7 @@ dispatch:
+ /*
+ * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
+ * its queue by itself in its completion handler, so we don't need to
+- * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
++ * restart queue if .get_budget() fails to get the budget.
+ *
+ * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
+ * be run again. This is necessary to avoid starving flushes.
+@@ -208,11 +207,18 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+
+ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+ {
++ unsigned long end = jiffies + HZ;
+ int ret;
+
+ do {
+ ret = __blk_mq_do_dispatch_sched(hctx);
+- } while (ret == 1);
++ if (ret != 1)
++ break;
++ if (need_resched() || time_is_before_jiffies(end)) {
++ blk_mq_delay_run_hw_queue(hctx, 0);
++ break;
++ }
++ } while (1);
+
+ return ret;
+ }
+@@ -231,7 +237,7 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
+ /*
+ * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
+ * its queue by itself in its completion handler, so we don't need to
+- * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
++ * restart queue if .get_budget() fails to get the budget.
+ *
+ * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
+ * be run again. This is necessary to avoid starving flushes.
+diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
+index 253c857cba47c..7074ce8d2d03f 100644
+--- a/block/blk-mq-sysfs.c
++++ b/block/blk-mq-sysfs.c
+@@ -187,7 +187,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
+ {
+ struct request_queue *q = hctx->queue;
+ struct blk_mq_ctx *ctx;
+- int i, ret;
++ int i, j, ret;
+
+ if (!hctx->nr_ctx)
+ return 0;
+@@ -199,9 +199,16 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
+ hctx_for_each_ctx(hctx, ctx, i) {
+ ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
+ if (ret)
+- break;
++ goto out;
+ }
+
++ return 0;
++out:
++ hctx_for_each_ctx(hctx, ctx, j) {
++ if (j < i)
++ kobject_del(&ctx->kobj);
++ }
++ kobject_del(&hctx->kobj);
+ return ret;
+ }
+
+diff --git a/block/blk-mq.c b/block/blk-mq.c
+index 652a31fc3bb38..bbbbcd2c19418 100644
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -457,7 +457,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+ * allocator for this for the rare use case of a command tied to
+ * a specific queue.
+ */
+- if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
++ if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)) ||
++ WARN_ON_ONCE(!(flags & BLK_MQ_REQ_RESERVED)))
+ return ERR_PTR(-EINVAL);
+
+ if (hctx_idx >= q->nr_hw_queues)
+@@ -476,6 +477,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+ if (!blk_mq_hw_queue_mapped(data.hctx))
+ goto out_queue_exit;
+ cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
++ if (cpu >= nr_cpu_ids)
++ goto out_queue_exit;
+ data.ctx = __blk_mq_get_ctx(q, cpu);
+
+ if (!q->elevator)
+@@ -763,7 +766,6 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
+ /* this request will be re-inserted to io scheduler queue */
+ blk_mq_sched_requeue_request(rq);
+
+- BUG_ON(!list_empty(&rq->queuelist));
+ blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
+ }
+ EXPORT_SYMBOL(blk_mq_requeue_request);
+@@ -1399,7 +1401,8 @@ out:
+ /* If we didn't flush the entire list, we could have told the driver
+ * there was more coming, but that turned out to be a lie.
+ */
+- if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
++ if ((!list_empty(list) || errors || needs_resource ||
++ ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
+ q->mq_ops->commit_rqs(hctx);
+ /*
+ * Any items that need requeuing? Stuff them into hctx->dispatch,
+@@ -1643,8 +1646,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q)
+ */
+ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
+ {
+- struct blk_mq_hw_ctx *hctx;
+-
++ struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+ /*
+ * If the IO scheduler does not respect hardware queues when
+ * dispatching, we just don't bother with multiple HW queues and
+@@ -1652,8 +1654,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
+ * just causes lock contention inside the scheduler and pointless cache
+ * bouncing.
+ */
+- hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
+- raw_smp_processor_id());
++ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx);
++
+ if (!blk_mq_hctx_stopped(hctx))
+ return hctx;
+ return NULL;
+@@ -2111,6 +2113,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+ list_del_init(&rq->queuelist);
+ ret = blk_mq_request_issue_directly(rq, list_empty(list));
+ if (ret != BLK_STS_OK) {
++ errors++;
+ if (ret == BLK_STS_RESOURCE ||
+ ret == BLK_STS_DEV_RESOURCE) {
+ blk_mq_request_bypass_insert(rq, false,
+@@ -2118,7 +2121,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
+ break;
+ }
+ blk_mq_end_request(rq, ret);
+- errors++;
+ } else
+ queued++;
+ }
+@@ -2148,14 +2150,14 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
+ }
+
+ /*
+- * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
++ * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+ {
+ if (plug->multiple_queues)
+- return BLK_MAX_REQUEST_COUNT * 4;
++ return BLK_MAX_REQUEST_COUNT * 2;
+ return BLK_MAX_REQUEST_COUNT;
+ }
+
+@@ -2192,6 +2194,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
+
+ blk_queue_bounce(q, &bio);
+ __blk_queue_split(&bio, &nr_segs);
++ if (!bio)
++ goto queue_exit;
+
+ if (!bio_integrity_prep(bio))
+ goto queue_exit;
+@@ -2224,7 +2228,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
+
+ blk_mq_bio_to_request(rq, bio, nr_segs);
+
+- ret = blk_crypto_init_request(rq);
++ ret = blk_crypto_rq_get_keyslot(rq);
+ if (ret != BLK_STS_OK) {
+ bio->bi_status = ret;
+ bio_endio(bio);
+@@ -4019,6 +4023,19 @@ unsigned int blk_mq_rq_cpu(struct request *rq)
+ }
+ EXPORT_SYMBOL(blk_mq_rq_cpu);
+
++void blk_mq_cancel_work_sync(struct request_queue *q)
++{
++ if (queue_is_mq(q)) {
++ struct blk_mq_hw_ctx *hctx;
++ int i;
++
++ cancel_delayed_work_sync(&q->requeue_work);
++
++ queue_for_each_hw_ctx(q, hctx, i)
++ cancel_delayed_work_sync(&hctx->run_work);
++ }
++}
++
+ static int __init blk_mq_init(void)
+ {
+ int i;
+diff --git a/block/blk-mq.h b/block/blk-mq.h
+index d08779f77a265..7cdca23b6263d 100644
+--- a/block/blk-mq.h
++++ b/block/blk-mq.h
+@@ -129,6 +129,8 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
+ extern void blk_mq_sysfs_unregister(struct request_queue *q);
+ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
+
++void blk_mq_cancel_work_sync(struct request_queue *q);
++
+ void blk_mq_release(struct request_queue *q);
+
+ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
+diff --git a/block/blk-pm.c b/block/blk-pm.c
+index 17bd020268d42..2dad62cc15727 100644
+--- a/block/blk-pm.c
++++ b/block/blk-pm.c
+@@ -163,27 +163,19 @@ EXPORT_SYMBOL(blk_pre_runtime_resume);
+ /**
+ * blk_post_runtime_resume - Post runtime resume processing
+ * @q: the queue of the device
+- * @err: return value of the device's runtime_resume function
+ *
+ * Description:
+- * Update the queue's runtime status according to the return value of the
+- * device's runtime_resume function. If the resume was successful, call
+- * blk_set_runtime_active() to do the real work of restarting the queue.
++ * For historical reasons, this routine merely calls blk_set_runtime_active()
++ * to do the real work of restarting the queue. It does this regardless of
++ * whether the device's runtime-resume succeeded; even if it failed the
++ * driver or error handler will need to communicate with the device.
+ *
+ * This function should be called near the end of the device's
+ * runtime_resume callback.
+ */
+-void blk_post_runtime_resume(struct request_queue *q, int err)
++void blk_post_runtime_resume(struct request_queue *q)
+ {
+- if (!q->dev)
+- return;
+- if (!err) {
+- blk_set_runtime_active(q);
+- } else {
+- spin_lock_irq(&q->queue_lock);
+- q->rpm_status = RPM_SUSPENDED;
+- spin_unlock_irq(&q->queue_lock);
+- }
++ blk_set_runtime_active(q);
+ }
+ EXPORT_SYMBOL(blk_post_runtime_resume);
+
+@@ -201,7 +193,7 @@ EXPORT_SYMBOL(blk_post_runtime_resume);
+ * runtime PM status and re-enable peeking requests from the queue. It
+ * should be called before first request is added to the queue.
+ *
+- * This function is also called by blk_post_runtime_resume() for successful
++ * This function is also called by blk_post_runtime_resume() for
+ * runtime resumes. It does everything necessary to restart the queue.
+ */
+ void blk_set_runtime_active(struct request_queue *q)
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index f000f83e0621c..1655f76b6a1b6 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+ init_waitqueue_head(&rq_wait->wait);
+ }
+
+-static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
++static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+ {
+ /*
+ * No IO can be in-flight when adding rqos, so freeze queue, which
+@@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+ blk_mq_freeze_queue(q);
+
+ spin_lock_irq(&q->queue_lock);
++ if (rq_qos_id(q, rqos->id))
++ goto ebusy;
+ rqos->next = q->rq_qos;
+ q->rq_qos = rqos;
+ spin_unlock_irq(&q->queue_lock);
+@@ -106,6 +108,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+
+ if (rqos->ops->debugfs_attrs)
+ blk_mq_debugfs_register_rqos(rqos);
++
++ return 0;
++ebusy:
++ spin_unlock_irq(&q->queue_lock);
++ blk_mq_unfreeze_queue(q);
++ return -EBUSY;
++
+ }
+
+ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+@@ -177,21 +186,22 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq)
+ __rq_qos_requeue(q->rq_qos, rq);
+ }
+
+-static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio)
++static inline void rq_qos_done_bio(struct bio *bio)
+ {
+- if (q->rq_qos)
+- __rq_qos_done_bio(q->rq_qos, bio);
++ if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
++ bio_flagged(bio, BIO_QOS_MERGED))) {
++ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
++ if (q->rq_qos)
++ __rq_qos_done_bio(q->rq_qos, bio);
++ }
+ }
+
+ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
+ {
+- /*
+- * BIO_TRACKED lets controllers know that a bio went through the
+- * normal rq_qos path.
+- */
+- bio_set_flag(bio, BIO_TRACKED);
+- if (q->rq_qos)
++ if (q->rq_qos) {
++ bio_set_flag(bio, BIO_QOS_THROTTLED);
+ __rq_qos_throttle(q->rq_qos, bio);
++ }
+ }
+
+ static inline void rq_qos_track(struct request_queue *q, struct request *rq,
+@@ -204,8 +214,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq,
+ static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
+ struct bio *bio)
+ {
+- if (q->rq_qos)
++ if (q->rq_qos) {
++ bio_set_flag(bio, BIO_QOS_MERGED);
+ __rq_qos_merge(q->rq_qos, rq, bio);
++ }
+ }
+
+ static inline void rq_qos_queue_depth_changed(struct request_queue *q)
+diff --git a/block/blk-settings.c b/block/blk-settings.c
+index b880c70e22e4e..73a80895e3ae1 100644
+--- a/block/blk-settings.c
++++ b/block/blk-settings.c
+@@ -875,6 +875,7 @@ static bool disk_has_partitions(struct gendisk *disk)
+ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
+ {
+ struct request_queue *q = disk->queue;
++ unsigned int old_model = q->limits.zoned;
+
+ switch (model) {
+ case BLK_ZONED_HM:
+@@ -912,7 +913,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
+ */
+ blk_queue_zone_write_granularity(q,
+ queue_logical_block_size(q));
+- } else {
++ } else if (old_model != BLK_ZONED_NONE) {
+ blk_queue_clear_zone_settings(q);
+ }
+ }
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index 614d9d47de36b..00021f0123701 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -805,16 +805,6 @@ static void blk_release_queue(struct kobject *kobj)
+
+ blk_free_queue_stats(q->stats);
+
+- if (queue_is_mq(q)) {
+- struct blk_mq_hw_ctx *hctx;
+- int i;
+-
+- cancel_delayed_work_sync(&q->requeue_work);
+-
+- queue_for_each_hw_ctx(q, hctx, i)
+- cancel_delayed_work_sync(&hctx->run_work);
+- }
+-
+ blk_exit_queue(q);
+
+ blk_queue_free_zone_bitmaps(q);
+@@ -964,15 +954,17 @@ void blk_unregister_queue(struct gendisk *disk)
+ */
+ if (queue_is_mq(q))
+ blk_mq_unregister_dev(disk_to_dev(disk), q);
+-
+- kobject_uevent(&q->kobj, KOBJ_REMOVE);
+- kobject_del(&q->kobj);
+ blk_trace_remove_sysfs(disk_to_dev(disk));
+
+ mutex_lock(&q->sysfs_lock);
+ if (q->elevator)
+ elv_unregister_queue(q);
+ mutex_unlock(&q->sysfs_lock);
++
++ /* Now that we've deleted all child objects, we can delete the queue. */
++ kobject_uevent(&q->kobj, KOBJ_REMOVE);
++ kobject_del(&q->kobj);
++
+ mutex_unlock(&q->sysfs_dir_lock);
+
+ kobject_put(&disk_to_dev(disk)->kobj);
+diff --git a/block/blk-throttle.c b/block/blk-throttle.c
+index 7c4e7993ba970..68cf8dbb4c67a 100644
+--- a/block/blk-throttle.c
++++ b/block/blk-throttle.c
+@@ -950,7 +950,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
+ u64 bps_limit, unsigned long *wait)
+ {
+ bool rw = bio_data_dir(bio);
+- u64 bytes_allowed, extra_bytes, tmp;
++ u64 bytes_allowed, extra_bytes;
+ unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+ unsigned int bio_size = throtl_bio_data_size(bio);
+
+@@ -967,10 +967,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
+ jiffy_elapsed_rnd = tg->td->throtl_slice;
+
+ jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
+-
+- tmp = bps_limit * jiffy_elapsed_rnd;
+- do_div(tmp, HZ);
+- bytes_allowed = tmp;
++ bytes_allowed = mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed_rnd,
++ (u64)HZ);
+
+ if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
+ if (wait)
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index 874c1c37bf0c6..e91d334b2788c 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -357,6 +357,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
+ unsigned int inflight = wbt_inflight(rwb);
+ int status;
+
++ if (!rwb->rqos.q->disk)
++ return;
++
+ status = latency_exceeded(rwb, cb->stat);
+
+ trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
+@@ -817,6 +820,7 @@ int wbt_init(struct request_queue *q)
+ {
+ struct rq_wb *rwb;
+ int i;
++ int ret;
+
+ rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
+ if (!rwb)
+@@ -837,19 +841,26 @@ int wbt_init(struct request_queue *q)
+ rwb->last_comp = rwb->last_issue = jiffies;
+ rwb->win_nsec = RWB_WINDOW_NSEC;
+ rwb->enable_state = WBT_STATE_ON_DEFAULT;
+- rwb->wc = 1;
++ rwb->wc = test_bit(QUEUE_FLAG_WC, &q->queue_flags);
+ rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
++ rwb->min_lat_nsec = wbt_default_latency_nsec(q);
++
++ wbt_queue_depth_changed(&rwb->rqos);
+
+ /*
+ * Assign rwb and add the stats callback.
+ */
+- rq_qos_add(q, &rwb->rqos);
++ ret = rq_qos_add(q, &rwb->rqos);
++ if (ret)
++ goto err_free;
++
+ blk_stat_add_callback(q, rwb->cb);
+
+- rwb->min_lat_nsec = wbt_default_latency_nsec(q);
++ return 0;
+
+- wbt_queue_depth_changed(&rwb->rqos);
+- wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
++err_free:
++ blk_stat_free_callback(rwb->cb);
++ kfree(rwb);
++ return ret;
+
+- return 0;
+ }
+diff --git a/block/blk-zoned.c b/block/blk-zoned.c
+index 1d0c76c18fc52..774ecc598bee2 100644
+--- a/block/blk-zoned.c
++++ b/block/blk-zoned.c
+@@ -429,9 +429,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+ op = REQ_OP_ZONE_RESET;
+
+ /* Invalidate the page cache, including dirty pages. */
++ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+ ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+ if (ret)
+- return ret;
++ goto fail;
+ break;
+ case BLKOPENZONE:
+ op = REQ_OP_ZONE_OPEN;
+@@ -449,15 +450,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+ ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
+ GFP_KERNEL);
+
+- /*
+- * Invalidate the page cache again for zone reset: writes can only be
+- * direct for zoned devices so concurrent writes would not add any page
+- * to the page cache after/during reset. The page cache may be filled
+- * again due to concurrent reads though and dropping the pages for
+- * these is fine.
+- */
+- if (!ret && cmd == BLKRESETZONE)
+- ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
++fail:
++ if (cmd == BLKRESETZONE)
++ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+
+ return ret;
+ }
+diff --git a/block/blk.h b/block/blk.h
+index 6c3c00a8fe19d..aab72194d2266 100644
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -184,6 +184,12 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
+ void blk_account_io_start(struct request *req);
+ void blk_account_io_done(struct request *req, u64 now);
+
++/*
++ * Plug flush limits
++ */
++#define BLK_MAX_REQUEST_COUNT 32
++#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
++
+ /*
+ * Internal elevator interface
+ */
+diff --git a/block/disk-events.c b/block/disk-events.c
+index 8d5496e7592a5..c3488409dd32f 100644
+--- a/block/disk-events.c
++++ b/block/disk-events.c
+@@ -307,6 +307,7 @@ bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+ if (!(events & DISK_EVENT_MEDIA_CHANGE))
+ return false;
+
++ inc_diskseq(disk);
+ if (__invalidate_device(disk->part0, true))
+ pr_warn("VFS: busy inodes on changed media %s\n",
+ disk->disk_name);
+diff --git a/block/elevator.c b/block/elevator.c
+index ff45d8388f487..1b5e57f6115f3 100644
+--- a/block/elevator.c
++++ b/block/elevator.c
+@@ -523,8 +523,6 @@ void elv_unregister_queue(struct request_queue *q)
+ kobject_del(&e->kobj);
+
+ e->registered = 0;
+- /* Re-enable throttling in case elevator disabled it */
+- wbt_enable_default(q);
+ }
+ }
+
+@@ -694,12 +692,18 @@ void elevator_init_mq(struct request_queue *q)
+ if (!e)
+ return;
+
++ /*
++ * We are called before adding disk, when there isn't any FS I/O,
++ * so freezing queue plus canceling dispatch work is enough to
++ * drain any dispatch activities originated from passthrough
++ * requests, then no need to quiesce queue which may add long boot
++ * latency, especially when lots of disks are involved.
++ */
+ blk_mq_freeze_queue(q);
+- blk_mq_quiesce_queue(q);
++ blk_mq_cancel_work_sync(q);
+
+ err = blk_mq_init_sched(q, e);
+
+- blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
+
+ if (err) {
+diff --git a/block/fops.c b/block/fops.c
+index 1e970c247e0eb..6c265a1bcf1b1 100644
+--- a/block/fops.c
++++ b/block/fops.c
+@@ -243,6 +243,24 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ bio_endio(bio);
+ break;
+ }
++ if (iocb->ki_flags & IOCB_NOWAIT) {
++ /*
++ * This is nonblocking IO, and we need to allocate
++ * another bio if we have data left to map. As we
++ * cannot guarantee that one of the sub bios will not
++ * fail getting issued FOR NOWAIT and as error results
++ * are coalesced across all of them, be safe and ask for
++ * a retry of this from blocking context.
++ */
++ if (unlikely(iov_iter_count(iter))) {
++ bio_release_pages(bio, false);
++ bio_clear_flag(bio, BIO_REFFED);
++ bio_put(bio);
++ blk_finish_plug(&plug);
++ return -EAGAIN;
++ }
++ bio->bi_opf |= REQ_NOWAIT;
++ }
+
+ if (is_read) {
+ bio->bi_opf = REQ_OP_READ;
+@@ -252,9 +270,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ bio->bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(bio->bi_iter.bi_size);
+ }
+- if (iocb->ki_flags & IOCB_NOWAIT)
+- bio->bi_opf |= REQ_NOWAIT;
+-
+ dio->size += bio->bi_iter.bi_size;
+ pos += bio->bi_iter.bi_size;
+
+diff --git a/block/genhd.c b/block/genhd.c
+index ab12ae6e636e8..6123f13e148e0 100644
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -19,6 +19,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/slab.h>
+ #include <linux/kmod.h>
++#include <linux/major.h>
+ #include <linux/mutex.h>
+ #include <linux/idr.h>
+ #include <linux/log2.h>
+@@ -323,7 +324,7 @@ int blk_alloc_ext_minor(void)
+ {
+ int idx;
+
+- idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
++ idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL);
+ if (idx == -ENOSPC)
+ return -EBUSY;
+ return idx;
+@@ -420,6 +421,8 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
+ DISK_MAX_PARTS);
+ disk->minors = DISK_MAX_PARTS;
+ }
++ if (disk->first_minor + disk->minors > MINORMASK + 1)
++ return -EINVAL;
+ } else {
+ if (WARN_ON(disk->minors))
+ return -EINVAL;
+@@ -432,10 +435,6 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
+ disk->flags |= GENHD_FL_EXT_DEVT;
+ }
+
+- ret = disk_alloc_events(disk);
+- if (ret)
+- goto out_free_ext_minor;
+-
+ /* delay uevents, until we scanned partition table */
+ dev_set_uevent_suppress(ddev, 1);
+
+@@ -446,7 +445,12 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
+ ddev->devt = MKDEV(disk->major, disk->first_minor);
+ ret = device_add(ddev);
+ if (ret)
+- goto out_disk_release_events;
++ goto out_free_ext_minor;
++
++ ret = disk_alloc_events(disk);
++ if (ret)
++ goto out_device_del;
++
+ if (!sysfs_deprecated) {
+ ret = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
+@@ -467,11 +471,15 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
+
+ disk->part0->bd_holder_dir =
+ kobject_create_and_add("holders", &ddev->kobj);
+- if (!disk->part0->bd_holder_dir)
++ if (!disk->part0->bd_holder_dir) {
++ ret = -ENOMEM;
+ goto out_del_integrity;
++ }
+ disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
+- if (!disk->slave_dir)
++ if (!disk->slave_dir) {
++ ret = -ENOMEM;
+ goto out_put_holder_dir;
++ }
+
+ ret = bd_register_pending_holders(disk);
+ if (ret < 0)
+@@ -487,7 +495,7 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
+ * and don't bother scanning for partitions either.
+ */
+ disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
+- disk->flags |= GENHD_FL_NO_PART_SCAN;
++ disk->flags |= GENHD_FL_NO_PART;
+ } else {
+ ret = bdi_register(disk->bdi, "%u:%u",
+ disk->major, disk->first_minor);
+@@ -519,8 +527,10 @@ out_unregister_bdi:
+ bdi_unregister(disk->bdi);
+ out_unregister_queue:
+ blk_unregister_queue(disk);
++ rq_qos_exit(disk->queue);
+ out_put_slave_dir:
+ kobject_put(disk->slave_dir);
++ disk->slave_dir = NULL;
+ out_put_holder_dir:
+ kobject_put(disk->part0->bd_holder_dir);
+ out_del_integrity:
+@@ -530,8 +540,6 @@ out_del_block_link:
+ sysfs_remove_link(block_depr, dev_name(ddev));
+ out_device_del:
+ device_del(ddev);
+-out_disk_release_events:
+- disk_release_events(disk);
+ out_free_ext_minor:
+ if (disk->major == BLOCK_EXT_MAJOR)
+ blk_free_ext_minor(disk->first_minor);
+@@ -539,6 +547,20 @@ out_free_ext_minor:
+ }
+ EXPORT_SYMBOL(device_add_disk);
+
++/**
++ * blk_mark_disk_dead - mark a disk as dead
++ * @disk: disk to mark as dead
++ *
++ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
++ * to this disk.
++ */
++void blk_mark_disk_dead(struct gendisk *disk)
++{
++ set_bit(GD_DEAD, &disk->state);
++ blk_queue_start_drain(disk->queue);
++}
++EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
++
+ /**
+ * del_gendisk - remove the gendisk
+ * @disk: the struct gendisk to remove
+@@ -603,6 +625,7 @@ void del_gendisk(struct gendisk *disk)
+
+ kobject_put(disk->part0->bd_holder_dir);
+ kobject_put(disk->slave_dir);
++ disk->slave_dir = NULL;
+
+ part_stat_set_all(disk->part0, 0);
+ disk->part0->bd_stamp = 0;
+@@ -1082,6 +1105,8 @@ static void disk_release(struct device *dev)
+ might_sleep();
+ WARN_ON_ONCE(disk_live(disk));
+
++ blk_mq_cancel_work_sync(disk->queue);
++
+ disk_release_events(disk);
+ kfree(disk->random);
+ xa_destroy(&disk->part_tbl);
+diff --git a/block/holder.c b/block/holder.c
+index 9dc084182337f..27cddce1b4461 100644
+--- a/block/holder.c
++++ b/block/holder.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ #include <linux/genhd.h>
++#include <linux/slab.h>
+
+ struct bd_holder_disk {
+ struct list_head list;
+diff --git a/block/ioctl.c b/block/ioctl.c
+index eb0491e90b9a0..8f39e413f12a3 100644
+--- a/block/ioctl.c
++++ b/block/ioctl.c
+@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
+ struct blkpg_partition p;
+ long long start, length;
+
++ if (disk->flags & GENHD_FL_NO_PART)
++ return -EINVAL;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
+@@ -113,6 +115,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
+ uint64_t range[2];
+ uint64_t start, len;
+ struct request_queue *q = bdev_get_queue(bdev);
++ struct inode *inode = bdev->bd_inode;
+ int err;
+
+ if (!(mode & FMODE_WRITE))
+@@ -135,12 +138,17 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
+ if (start + len > i_size_read(bdev->bd_inode))
+ return -EINVAL;
+
++ filemap_invalidate_lock(inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, start + len - 1);
+ if (err)
+- return err;
++ goto fail;
++
++ err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
++ GFP_KERNEL, flags);
+
+- return blkdev_issue_discard(bdev, start >> 9, len >> 9,
+- GFP_KERNEL, flags);
++fail:
++ filemap_invalidate_unlock(inode->i_mapping);
++ return err;
+ }
+
+ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
+@@ -148,6 +156,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
+ {
+ uint64_t range[2];
+ uint64_t start, end, len;
++ struct inode *inode = bdev->bd_inode;
+ int err;
+
+ if (!(mode & FMODE_WRITE))
+@@ -170,12 +179,17 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
+ return -EINVAL;
+
+ /* Invalidate the page cache, including dirty pages */
++ filemap_invalidate_lock(inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, end);
+ if (err)
+- return err;
++ goto fail;
+
+- return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
+- BLKDEV_ZERO_NOUNMAP);
++ err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
++ BLKDEV_ZERO_NOUNMAP);
++
++fail:
++ filemap_invalidate_unlock(inode->i_mapping);
++ return err;
+ }
+
+ static int put_ushort(unsigned short __user *argp, unsigned short val)
+@@ -633,7 +647,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+ (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
+ case BLKGETSIZE:
+ size = i_size_read(bdev->bd_inode);
+- if ((size >> 9) > ~0UL)
++ if ((size >> 9) > ~(compat_ulong_t)0)
+ return -EFBIG;
+ return compat_put_ulong(argp, size >> 9);
+
+diff --git a/block/ioprio.c b/block/ioprio.c
+index 0e4ff245f2bf2..6c830154856fc 100644
+--- a/block/ioprio.c
++++ b/block/ioprio.c
+@@ -69,7 +69,14 @@ int ioprio_check_cap(int ioprio)
+
+ switch (class) {
+ case IOPRIO_CLASS_RT:
+- if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
++ /*
++ * Originally this only checked for CAP_SYS_ADMIN,
++ * which was implicitly allowed for pid 0 by security
++ * modules such as SELinux. Make sure we check
++ * CAP_SYS_ADMIN first to avoid a denial/avc for
++ * possibly missing CAP_SYS_NICE permission.
++ */
++ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
+ return -EPERM;
+ fallthrough;
+ /* rt has prio field too */
+@@ -182,9 +189,9 @@ out:
+ int ioprio_best(unsigned short aprio, unsigned short bprio)
+ {
+ if (!ioprio_valid(aprio))
+- aprio = IOPRIO_DEFAULT;
++ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
+ if (!ioprio_valid(bprio))
+- bprio = IOPRIO_DEFAULT;
++ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
+
+ return min(aprio, bprio);
+ }
+@@ -213,6 +220,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+ pgrp = task_pgrp(current);
+ else
+ pgrp = find_vpid(who);
++ read_lock(&tasklist_lock);
+ do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
+ tmpio = get_task_ioprio(p);
+ if (tmpio < 0)
+@@ -222,6 +230,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+ else
+ ret = ioprio_best(ret, tmpio);
+ } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
++ read_unlock(&tasklist_lock);
++
+ break;
+ case IOPRIO_WHO_USER:
+ uid = make_kuid(current_user_ns(), who);
+diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c
+index 2c4a55bea6ca1..2a7a36551cfae 100644
+--- a/block/keyslot-manager.c
++++ b/block/keyslot-manager.c
+@@ -343,25 +343,16 @@ bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
+ return true;
+ }
+
+-/**
+- * blk_ksm_evict_key() - Evict a key from the lower layer device.
+- * @ksm: The keyslot manager to evict from
+- * @key: The key to evict
+- *
+- * Find the keyslot that the specified key was programmed into, and evict that
+- * slot from the lower layer device. The slot must not be in use by any
+- * in-flight IO when this function is called.
+- *
+- * Context: Process context. Takes and releases ksm->lock.
+- * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
+- * if the keyslot is still in use, or another -errno value on other
+- * error.
++/*
++ * This is an internal function that evicts a key from an inline encryption
++ * device that can be either a real device or the blk-crypto-fallback "device".
++ * It is used only by blk_crypto_evict_key(); see that function for details.
+ */
+ int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
+ const struct blk_crypto_key *key)
+ {
+ struct blk_ksm_keyslot *slot;
+- int err = 0;
++ int err;
+
+ if (blk_ksm_is_passthrough(ksm)) {
+ if (ksm->ksm_ll_ops.keyslot_evict) {
+@@ -375,22 +366,30 @@ int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
+
+ blk_ksm_hw_enter(ksm);
+ slot = blk_ksm_find_keyslot(ksm, key);
+- if (!slot)
+- goto out_unlock;
++ if (!slot) {
++ /*
++ * Not an error, since a key not in use by I/O is not guaranteed
++ * to be in a keyslot. There can be more keys than keyslots.
++ */
++ err = 0;
++ goto out;
++ }
+
+ if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
++ /* BUG: key is still in use by I/O */
+ err = -EBUSY;
+- goto out_unlock;
++ goto out_remove;
+ }
+ err = ksm->ksm_ll_ops.keyslot_evict(ksm, key,
+ blk_ksm_get_slot_idx(slot));
+- if (err)
+- goto out_unlock;
+-
++out_remove:
++ /*
++ * Callers free the key even on error, so unlink the key from the hash
++ * table and clear slot->key even on error.
++ */
+ hlist_del(&slot->hash_node);
+ slot->key = NULL;
+- err = 0;
+-out_unlock:
++out:
+ blk_ksm_hw_exit(ksm);
+ return err;
+ }
+diff --git a/block/mq-deadline.c b/block/mq-deadline.c
+index 7f3c3932b723e..aaef5088a3baf 100644
+--- a/block/mq-deadline.c
++++ b/block/mq-deadline.c
+@@ -153,6 +153,20 @@ static u8 dd_rq_ioclass(struct request *rq)
+ return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
+ }
+
++/*
++ * get the request before `rq' in sector-sorted order
++ */
++static inline struct request *
++deadline_earlier_request(struct request *rq)
++{
++ struct rb_node *node = rb_prev(&rq->rb_node);
++
++ if (node)
++ return rb_entry_rq(node);
++
++ return NULL;
++}
++
+ /*
+ * get the request after `rq' in sector-sorted order
+ */
+@@ -288,6 +302,39 @@ static inline int deadline_check_fifo(struct dd_per_prio *per_prio,
+ return 0;
+ }
+
++/*
++ * Check if rq has a sequential request preceding it.
++ */
++static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq)
++{
++ struct request *prev = deadline_earlier_request(rq);
++
++ if (!prev)
++ return false;
++
++ return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq);
++}
++
++/*
++ * Skip all write requests that are sequential from @rq, even if we cross
++ * a zone boundary.
++ */
++static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
++ struct request *rq)
++{
++ sector_t pos = blk_rq_pos(rq);
++ sector_t skipped_sectors = 0;
++
++ while (rq) {
++ if (blk_rq_pos(rq) != pos + skipped_sectors)
++ break;
++ skipped_sectors += blk_rq_sectors(rq);
++ rq = deadline_latter_request(rq);
++ }
++
++ return rq;
++}
++
+ /*
+ * For the specified data direction, return the next request to
+ * dispatch using arrival ordered lists.
+@@ -308,11 +355,16 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+
+ /*
+ * Look for a write request that can be dispatched, that is one with
+- * an unlocked target zone.
++ * an unlocked target zone. For some HDDs, breaking a sequential
++ * write stream can lead to lower throughput, so make sure to preserve
++ * sequential write streams, even if that stream crosses into the next
++ * zones and these zones are unlocked.
+ */
+ spin_lock_irqsave(&dd->zone_lock, flags);
+ list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
+- if (blk_req_can_dispatch_to_zone(rq))
++ if (blk_req_can_dispatch_to_zone(rq) &&
++ (blk_queue_nonrot(rq->q) ||
++ !deadline_is_seq_write(dd, rq)))
+ goto out;
+ }
+ rq = NULL;
+@@ -342,13 +394,19 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+
+ /*
+ * Look for a write request that can be dispatched, that is one with
+- * an unlocked target zone.
++ * an unlocked target zone. For some HDDs, breaking a sequential
++ * write stream can lead to lower throughput, so make sure to preserve
++ * sequential write streams, even if that stream crosses into the next
++ * zones and these zones are unlocked.
+ */
+ spin_lock_irqsave(&dd->zone_lock, flags);
+ while (rq) {
+ if (blk_req_can_dispatch_to_zone(rq))
+ break;
+- rq = deadline_latter_request(rq);
++ if (blk_queue_nonrot(rq->q))
++ rq = deadline_latter_request(rq);
++ else
++ rq = deadline_skip_seq_writes(dd, rq);
+ }
+ spin_unlock_irqrestore(&dd->zone_lock, flags);
+
+@@ -733,6 +791,18 @@ static void dd_prepare_request(struct request *rq)
+ rq->elv.priv[0] = NULL;
+ }
+
++static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx)
++{
++ struct deadline_data *dd = hctx->queue->elevator->elevator_data;
++ enum dd_prio p;
++
++ for (p = 0; p <= DD_PRIO_MAX; p++)
++ if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE]))
++ return true;
++
++ return false;
++}
++
+ /*
+ * Callback from inside blk_mq_free_request().
+ *
+@@ -755,7 +825,6 @@ static void dd_finish_request(struct request *rq)
+ struct deadline_data *dd = q->elevator->elevator_data;
+ const u8 ioprio_class = dd_rq_ioclass(rq);
+ const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+- struct dd_per_prio *per_prio = &dd->per_prio[prio];
+
+ /*
+ * The block layer core may call dd_finish_request() without having
+@@ -771,9 +840,10 @@ static void dd_finish_request(struct request *rq)
+
+ spin_lock_irqsave(&dd->zone_lock, flags);
+ blk_req_zone_write_unlock(rq);
+- if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
+- blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
+ spin_unlock_irqrestore(&dd->zone_lock, flags);
++
++ if (dd_has_write_work(rq->mq_hctx))
++ blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
+ }
+ }
+
+@@ -811,7 +881,7 @@ SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
+ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
+ SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
+ SHOW_INT(deadline_front_merges_show, dd->front_merges);
+-SHOW_INT(deadline_async_depth_show, dd->front_merges);
++SHOW_INT(deadline_async_depth_show, dd->async_depth);
+ SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
+ #undef SHOW_INT
+ #undef SHOW_JIFFIES
+@@ -840,7 +910,7 @@ STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX)
+ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
+ STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
+ STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
+-STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
++STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
+ STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
+ #undef STORE_FUNCTION
+ #undef STORE_INT
+diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
+index 5c8624e26a54c..5069210954129 100644
+--- a/block/partitions/amiga.c
++++ b/block/partitions/amiga.c
+@@ -11,10 +11,18 @@
+ #define pr_fmt(fmt) fmt
+
+ #include <linux/types.h>
++#include <linux/mm_types.h>
++#include <linux/overflow.h>
+ #include <linux/affs_hardblocks.h>
+
+ #include "check.h"
+
++/* magic offsets in partition DosEnvVec */
++#define NR_HD 3
++#define NR_SECT 5
++#define LO_CYL 9
++#define HI_CYL 10
++
+ static __inline__ u32
+ checksum_block(__be32 *m, int size)
+ {
+@@ -31,8 +39,12 @@ int amiga_partition(struct parsed_partitions *state)
+ unsigned char *data;
+ struct RigidDiskBlock *rdb;
+ struct PartitionBlock *pb;
+- int start_sect, nr_sects, blk, part, res = 0;
+- int blksize = 1; /* Multiplier for disk block size */
++ u64 start_sect, nr_sects;
++ sector_t blk, end_sect;
++ u32 cylblk; /* rdb_CylBlocks = nr_heads*sect_per_track */
++ u32 nr_hd, nr_sect, lo_cyl, hi_cyl;
++ int part, res = 0;
++ unsigned int blksize = 1; /* Multiplier for disk block size */
+ int slot = 1;
+
+ for (blk = 0; ; blk++, put_dev_sector(sect)) {
+@@ -40,7 +52,7 @@ int amiga_partition(struct parsed_partitions *state)
+ goto rdb_done;
+ data = read_part_sector(state, blk, &sect);
+ if (!data) {
+- pr_err("Dev %s: unable to read RDB block %d\n",
++ pr_err("Dev %s: unable to read RDB block %llu\n",
+ state->disk->disk_name, blk);
+ res = -1;
+ goto rdb_done;
+@@ -57,12 +69,12 @@ int amiga_partition(struct parsed_partitions *state)
+ *(__be32 *)(data+0xdc) = 0;
+ if (checksum_block((__be32 *)data,
+ be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
+- pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n",
++ pr_err("Trashed word at 0xd0 in block %llu ignored in checksum calculation\n",
+ blk);
+ break;
+ }
+
+- pr_err("Dev %s: RDB in block %d has bad checksum\n",
++ pr_err("Dev %s: RDB in block %llu has bad checksum\n",
+ state->disk->disk_name, blk);
+ }
+
+@@ -78,11 +90,16 @@ int amiga_partition(struct parsed_partitions *state)
+ }
+ blk = be32_to_cpu(rdb->rdb_PartitionList);
+ put_dev_sector(sect);
+- for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
+- blk *= blksize; /* Read in terms partition table understands */
++ for (part = 1; (s32) blk>0 && part<=16; part++, put_dev_sector(sect)) {
++ /* Read in terms partition table understands */
++ if (check_mul_overflow(blk, (sector_t) blksize, &blk)) {
++ pr_err("Dev %s: overflow calculating partition block %llu! Skipping partitions %u and beyond\n",
++ state->disk->disk_name, blk, part);
++ break;
++ }
+ data = read_part_sector(state, blk, &sect);
+ if (!data) {
+- pr_err("Dev %s: unable to read partition block %d\n",
++ pr_err("Dev %s: unable to read partition block %llu\n",
+ state->disk->disk_name, blk);
+ res = -1;
+ goto rdb_done;
+@@ -94,19 +111,70 @@ int amiga_partition(struct parsed_partitions *state)
+ if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 )
+ continue;
+
+- /* Tell Kernel about it */
++ /* RDB gives us more than enough rope to hang ourselves with,
++ * many times over (2^128 bytes if all fields max out).
++ * Some careful checks are in order, so check for potential
++ * overflows.
++ * We are multiplying four 32 bit numbers to one sector_t!
++ */
++
++ nr_hd = be32_to_cpu(pb->pb_Environment[NR_HD]);
++ nr_sect = be32_to_cpu(pb->pb_Environment[NR_SECT]);
++
++ /* CylBlocks is total number of blocks per cylinder */
++ if (check_mul_overflow(nr_hd, nr_sect, &cylblk)) {
++ pr_err("Dev %s: heads*sects %u overflows u32, skipping partition!\n",
++ state->disk->disk_name, cylblk);
++ continue;
++ }
++
++ /* check for consistency with RDB defined CylBlocks */
++ if (cylblk > be32_to_cpu(rdb->rdb_CylBlocks)) {
++ pr_warn("Dev %s: cylblk %u > rdb_CylBlocks %u!\n",
++ state->disk->disk_name, cylblk,
++ be32_to_cpu(rdb->rdb_CylBlocks));
++ }
++
++ /* RDB allows for variable logical block size -
++ * normalize to 512 byte blocks and check result.
++ */
++
++ if (check_mul_overflow(cylblk, blksize, &cylblk)) {
++ pr_err("Dev %s: partition %u bytes per cyl. overflows u32, skipping partition!\n",
++ state->disk->disk_name, part);
++ continue;
++ }
++
++ /* Calculate partition start and end. Limit of 32 bit on cylblk
++ * guarantees no overflow occurs if LBD support is enabled.
++ */
++
++ lo_cyl = be32_to_cpu(pb->pb_Environment[LO_CYL]);
++ start_sect = ((u64) lo_cyl * cylblk);
++
++ hi_cyl = be32_to_cpu(pb->pb_Environment[HI_CYL]);
++ nr_sects = (((u64) hi_cyl - lo_cyl + 1) * cylblk);
+
+- nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 -
+- be32_to_cpu(pb->pb_Environment[9])) *
+- be32_to_cpu(pb->pb_Environment[3]) *
+- be32_to_cpu(pb->pb_Environment[5]) *
+- blksize;
+ if (!nr_sects)
+ continue;
+- start_sect = be32_to_cpu(pb->pb_Environment[9]) *
+- be32_to_cpu(pb->pb_Environment[3]) *
+- be32_to_cpu(pb->pb_Environment[5]) *
+- blksize;
++
++ /* Warn user if partition end overflows u32 (AmigaDOS limit) */
++
++ if ((start_sect + nr_sects) > UINT_MAX) {
++ pr_warn("Dev %s: partition %u (%llu-%llu) needs 64 bit device support!\n",
++ state->disk->disk_name, part,
++ start_sect, start_sect + nr_sects);
++ }
++
++ if (check_add_overflow(start_sect, nr_sects, &end_sect)) {
++ pr_err("Dev %s: partition %u (%llu-%llu) needs LBD device support, skipping partition!\n",
++ state->disk->disk_name, part,
++ start_sect, end_sect);
++ continue;
++ }
++
++ /* Tell Kernel about it */
++
+ put_partition(state,slot++,start_sect,nr_sects);
+ {
+ /* Be even more informative to aid mounting */
+diff --git a/block/partitions/core.c b/block/partitions/core.c
+index 7bea19dd9458f..1ead8c0015616 100644
+--- a/block/partitions/core.c
++++ b/block/partitions/core.c
+@@ -5,6 +5,7 @@
+ * Copyright (C) 2020 Christoph Hellwig
+ */
+ #include <linux/fs.h>
++#include <linux/major.h>
+ #include <linux/slab.h>
+ #include <linux/ctype.h>
+ #include <linux/genhd.h>
+@@ -525,18 +526,15 @@ out_unlock:
+
+ static bool disk_unlock_native_capacity(struct gendisk *disk)
+ {
+- const struct block_device_operations *bdops = disk->fops;
+-
+- if (bdops->unlock_native_capacity &&
+- !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
+- printk(KERN_CONT "enabling native capacity\n");
+- bdops->unlock_native_capacity(disk);
+- disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+- return true;
+- } else {
++ if (!disk->fops->unlock_native_capacity ||
++ test_and_set_bit(GD_NATIVE_CAPACITY, &disk->state)) {
+ printk(KERN_CONT "truncated\n");
+ return false;
+ }
++
++ printk(KERN_CONT "enabling native capacity\n");
++ disk->fops->unlock_native_capacity(disk);
++ return true;
+ }
+
+ void blk_drop_partitions(struct gendisk *disk)
+diff --git a/block/sed-opal.c b/block/sed-opal.c
+index daafadbb88cae..0ac5a4f3f2261 100644
+--- a/block/sed-opal.c
++++ b/block/sed-opal.c
+@@ -88,8 +88,8 @@ struct opal_dev {
+ u64 lowest_lba;
+
+ size_t pos;
+- u8 cmd[IO_BUFFER_LENGTH];
+- u8 resp[IO_BUFFER_LENGTH];
++ u8 *cmd;
++ u8 *resp;
+
+ struct parsed_resp parsed;
+ size_t prev_d_len;
+@@ -2134,6 +2134,8 @@ void free_opal_dev(struct opal_dev *dev)
+ return;
+
+ clean_opal_dev(dev);
++ kfree(dev->resp);
++ kfree(dev->cmd);
+ kfree(dev);
+ }
+ EXPORT_SYMBOL(free_opal_dev);
+@@ -2146,17 +2148,39 @@ struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv)
+ if (!dev)
+ return NULL;
+
++ /*
++ * Presumably DMA-able buffers must be cache-aligned. Kmalloc makes
++ * sure the allocated buffer is DMA-safe in that regard.
++ */
++ dev->cmd = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL);
++ if (!dev->cmd)
++ goto err_free_dev;
++
++ dev->resp = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL);
++ if (!dev->resp)
++ goto err_free_cmd;
++
+ INIT_LIST_HEAD(&dev->unlk_lst);
+ mutex_init(&dev->dev_lock);
+ dev->data = data;
+ dev->send_recv = send_recv;
+ if (check_opal_support(dev) != 0) {
+ pr_debug("Opal is not supported on this device\n");
+- kfree(dev);
+- return NULL;
++ goto err_free_resp;
+ }
+
+ return dev;
++
++err_free_resp:
++ kfree(dev->resp);
++
++err_free_cmd:
++ kfree(dev->cmd);
++
++err_free_dev:
++ kfree(dev);
++
++ return NULL;
+ }
+ EXPORT_SYMBOL(init_opal_dev);
+
+diff --git a/certs/blacklist_hashes.c b/certs/blacklist_hashes.c
+index 344892337be07..d5961aa3d3380 100644
+--- a/certs/blacklist_hashes.c
++++ b/certs/blacklist_hashes.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ #include "blacklist.h"
+
+-const char __initdata *const blacklist_hashes[] = {
++const char __initconst *const blacklist_hashes[] = {
+ #include CONFIG_SYSTEM_BLACKLIST_HASH_LIST
+ , NULL
+ };
+diff --git a/crypto/Kconfig b/crypto/Kconfig
+index 536df4b6b825c..db260ccfba51b 100644
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -15,6 +15,7 @@ source "crypto/async_tx/Kconfig"
+ #
+ menuconfig CRYPTO
+ tristate "Cryptographic API"
++ select LIB_MEMNEQ
+ help
+ This option provides the core Cryptographic API.
+
+@@ -233,12 +234,12 @@ config CRYPTO_DH
+
+ config CRYPTO_ECC
+ tristate
++ select CRYPTO_RNG_DEFAULT
+
+ config CRYPTO_ECDH
+ tristate "ECDH algorithm"
+ select CRYPTO_ECC
+ select CRYPTO_KPP
+- select CRYPTO_RNG_DEFAULT
+ help
+ Generic implementation of the ECDH algorithm
+
+@@ -683,26 +684,8 @@ config CRYPTO_BLAKE2B
+
+ See https://blake2.net for further information.
+
+-config CRYPTO_BLAKE2S
+- tristate "BLAKE2s digest algorithm"
+- select CRYPTO_LIB_BLAKE2S_GENERIC
+- select CRYPTO_HASH
+- help
+- Implementation of cryptographic hash function BLAKE2s
+- optimized for 8-32bit platforms and can produce digests of any size
+- between 1 to 32. The keyed hash is also implemented.
+-
+- This module provides the following algorithms:
+-
+- - blake2s-128
+- - blake2s-160
+- - blake2s-224
+- - blake2s-256
+-
+- See https://blake2.net for further information.
+-
+ config CRYPTO_BLAKE2S_X86
+- tristate "BLAKE2s digest algorithm (x86 accelerated version)"
++ bool "BLAKE2s digest algorithm (x86 accelerated version)"
+ depends on X86 && 64BIT
+ select CRYPTO_LIB_BLAKE2S_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+@@ -1919,7 +1902,6 @@ config CRYPTO_STATS
+ config CRYPTO_HASH_INFO
+ bool
+
+-source "lib/crypto/Kconfig"
+ source "drivers/crypto/Kconfig"
+ source "crypto/asymmetric_keys/Kconfig"
+ source "certs/Kconfig"
+diff --git a/crypto/Makefile b/crypto/Makefile
+index c633f15a04813..429591ffeb5da 100644
+--- a/crypto/Makefile
++++ b/crypto/Makefile
+@@ -4,7 +4,7 @@
+ #
+
+ obj-$(CONFIG_CRYPTO) += crypto.o
+-crypto-y := api.o cipher.o compress.o memneq.o
++crypto-y := api.o cipher.o compress.o
+
+ obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o
+ obj-$(CONFIG_CRYPTO_FIPS) += fips.o
+@@ -83,7 +83,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
+ obj-$(CONFIG_CRYPTO_WP512) += wp512.o
+ CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
+ obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
+-obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
+ obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
+ obj-$(CONFIG_CRYPTO_ECB) += ecb.o
+ obj-$(CONFIG_CRYPTO_CBC) += cbc.o
+diff --git a/crypto/akcipher.c b/crypto/akcipher.c
+index f866085c8a4a3..ab975a420e1e9 100644
+--- a/crypto/akcipher.c
++++ b/crypto/akcipher.c
+@@ -120,6 +120,12 @@ static int akcipher_default_op(struct akcipher_request *req)
+ return -ENOSYS;
+ }
+
++static int akcipher_default_set_key(struct crypto_akcipher *tfm,
++ const void *key, unsigned int keylen)
++{
++ return -ENOSYS;
++}
++
+ int crypto_register_akcipher(struct akcipher_alg *alg)
+ {
+ struct crypto_alg *base = &alg->base;
+@@ -132,6 +138,8 @@ int crypto_register_akcipher(struct akcipher_alg *alg)
+ alg->encrypt = akcipher_default_op;
+ if (!alg->decrypt)
+ alg->decrypt = akcipher_default_op;
++ if (!alg->set_priv_key)
++ alg->set_priv_key = akcipher_default_set_key;
+
+ akcipher_prepare_alg(alg);
+ return crypto_register_alg(base);
+diff --git a/crypto/algapi.c b/crypto/algapi.c
+index 43f999dba4dc0..c390a79c5a669 100644
+--- a/crypto/algapi.c
++++ b/crypto/algapi.c
+@@ -16,6 +16,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/slab.h>
+ #include <linux/string.h>
++#include <linux/workqueue.h>
+
+ #include "internal.h"
+
+@@ -68,15 +69,26 @@ static void crypto_free_instance(struct crypto_instance *inst)
+ inst->alg.cra_type->free(inst);
+ }
+
+-static void crypto_destroy_instance(struct crypto_alg *alg)
++static void crypto_destroy_instance_workfn(struct work_struct *w)
+ {
+- struct crypto_instance *inst = (void *)alg;
++ struct crypto_instance *inst = container_of(w, struct crypto_instance,
++ free_work);
+ struct crypto_template *tmpl = inst->tmpl;
+
+ crypto_free_instance(inst);
+ crypto_tmpl_put(tmpl);
+ }
+
++static void crypto_destroy_instance(struct crypto_alg *alg)
++{
++ struct crypto_instance *inst = container_of(alg,
++ struct crypto_instance,
++ alg);
++
++ INIT_WORK(&inst->free_work, crypto_destroy_instance_workfn);
++ schedule_work(&inst->free_work);
++}
++
+ /*
+ * This function adds a spawn to the list secondary_spawns which
+ * will be used at the end of crypto_remove_spawns to unregister
+@@ -456,7 +468,9 @@ void crypto_unregister_alg(struct crypto_alg *alg)
+ if (WARN(ret, "Algorithm %s is not registered", alg->cra_driver_name))
+ return;
+
+- BUG_ON(refcount_read(&alg->cra_refcnt) != 1);
++ if (WARN_ON(refcount_read(&alg->cra_refcnt) != 1))
++ return;
++
+ if (alg->cra_destroy)
+ alg->cra_destroy(alg);
+
+@@ -918,6 +932,9 @@ EXPORT_SYMBOL_GPL(crypto_enqueue_request);
+ void crypto_enqueue_request_head(struct crypto_queue *queue,
+ struct crypto_async_request *request)
+ {
++ if (unlikely(queue->qlen >= queue->max_qlen))
++ queue->backlog = queue->backlog->prev;
++
+ queue->qlen++;
+ list_add(&request->list, &queue->list);
+ }
+@@ -1277,3 +1294,4 @@ module_exit(crypto_algapi_exit);
+
+ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("Cryptographic algorithms API");
++MODULE_SOFTDEP("pre: cryptomgr");
+diff --git a/crypto/api.c b/crypto/api.c
+index c4eda56cff891..5ffcd3ab4a753 100644
+--- a/crypto/api.c
++++ b/crypto/api.c
+@@ -603,4 +603,3 @@ EXPORT_SYMBOL_GPL(crypto_req_done);
+
+ MODULE_DESCRIPTION("Cryptographic core API");
+ MODULE_LICENSE("GPL");
+-MODULE_SOFTDEP("pre: cryptomgr");
+diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
+index 0b4d07aa88111..df279538cead3 100644
+--- a/crypto/asymmetric_keys/pkcs7_verify.c
++++ b/crypto/asymmetric_keys/pkcs7_verify.c
+@@ -79,16 +79,16 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
+ }
+
+ if (sinfo->msgdigest_len != sig->digest_size) {
+- pr_debug("Sig %u: Invalid digest size (%u)\n",
+- sinfo->index, sinfo->msgdigest_len);
++ pr_warn("Sig %u: Invalid digest size (%u)\n",
++ sinfo->index, sinfo->msgdigest_len);
+ ret = -EBADMSG;
+ goto error;
+ }
+
+ if (memcmp(sig->digest, sinfo->msgdigest,
+ sinfo->msgdigest_len) != 0) {
+- pr_debug("Sig %u: Message digest doesn't match\n",
+- sinfo->index);
++ pr_warn("Sig %u: Message digest doesn't match\n",
++ sinfo->index);
+ ret = -EKEYREJECTED;
+ goto error;
+ }
+@@ -174,12 +174,6 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7,
+ pr_devel("Sig %u: Found cert serial match X.509[%u]\n",
+ sinfo->index, certix);
+
+- if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) {
+- pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n",
+- sinfo->index);
+- continue;
+- }
+-
+ sinfo->signer = x509;
+ return 0;
+ }
+@@ -487,7 +481,7 @@ int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
+ const void *data, size_t datalen)
+ {
+ if (pkcs7->data) {
+- pr_debug("Data already supplied\n");
++ pr_warn("Data already supplied\n");
+ return -EINVAL;
+ }
+ pkcs7->data = data;
+diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
+index 4fefb219bfdc8..50c933f86b218 100644
+--- a/crypto/asymmetric_keys/public_key.c
++++ b/crypto/asymmetric_keys/public_key.c
+@@ -60,39 +60,83 @@ static void public_key_destroy(void *payload0, void *payload3)
+ }
+
+ /*
+- * Determine the crypto algorithm name.
++ * Given a public_key, and an encoding and hash_algo to be used for signing
++ * and/or verification with that key, determine the name of the corresponding
++ * akcipher algorithm. Also check that encoding and hash_algo are allowed.
+ */
+-static
+-int software_key_determine_akcipher(const char *encoding,
+- const char *hash_algo,
+- const struct public_key *pkey,
+- char alg_name[CRYPTO_MAX_ALG_NAME])
++static int
++software_key_determine_akcipher(const struct public_key *pkey,
++ const char *encoding, const char *hash_algo,
++ char alg_name[CRYPTO_MAX_ALG_NAME])
+ {
+ int n;
+
+- if (strcmp(encoding, "pkcs1") == 0) {
+- /* The data wangled by the RSA algorithm is typically padded
+- * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
+- * sec 8.2].
++ if (!encoding)
++ return -EINVAL;
++
++ if (strcmp(pkey->pkey_algo, "rsa") == 0) {
++ /*
++ * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2].
++ */
++ if (strcmp(encoding, "pkcs1") == 0) {
++ if (!hash_algo)
++ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
++ "pkcs1pad(%s)",
++ pkey->pkey_algo);
++ else
++ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
++ "pkcs1pad(%s,%s)",
++ pkey->pkey_algo, hash_algo);
++ return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
++ }
++ if (strcmp(encoding, "raw") != 0)
++ return -EINVAL;
++ /*
++ * Raw RSA cannot differentiate between different hash
++ * algorithms.
++ */
++ if (hash_algo)
++ return -EINVAL;
++ } else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) {
++ if (strcmp(encoding, "x962") != 0)
++ return -EINVAL;
++ /*
++ * ECDSA signatures are taken over a raw hash, so they don't
++ * differentiate between different hash algorithms. That means
++ * that the verifier should hard-code a specific hash algorithm.
++ * Unfortunately, in practice ECDSA is used with multiple SHAs,
++ * so we have to allow all of them and not just one.
+ */
+ if (!hash_algo)
+- n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+- "pkcs1pad(%s)",
+- pkey->pkey_algo);
+- else
+- n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+- "pkcs1pad(%s,%s)",
+- pkey->pkey_algo, hash_algo);
+- return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
+- }
+-
+- if (strcmp(encoding, "raw") == 0 ||
+- strcmp(encoding, "x962") == 0) {
+- strcpy(alg_name, pkey->pkey_algo);
+- return 0;
++ return -EINVAL;
++ if (strcmp(hash_algo, "sha1") != 0 &&
++ strcmp(hash_algo, "sha224") != 0 &&
++ strcmp(hash_algo, "sha256") != 0 &&
++ strcmp(hash_algo, "sha384") != 0 &&
++ strcmp(hash_algo, "sha512") != 0)
++ return -EINVAL;
++ } else if (strcmp(pkey->pkey_algo, "sm2") == 0) {
++ if (strcmp(encoding, "raw") != 0)
++ return -EINVAL;
++ if (!hash_algo)
++ return -EINVAL;
++ if (strcmp(hash_algo, "sm3") != 0)
++ return -EINVAL;
++ } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) {
++ if (strcmp(encoding, "raw") != 0)
++ return -EINVAL;
++ if (!hash_algo)
++ return -EINVAL;
++ if (strcmp(hash_algo, "streebog256") != 0 &&
++ strcmp(hash_algo, "streebog512") != 0)
++ return -EINVAL;
++ } else {
++ /* Unknown public key algorithm */
++ return -ENOPKG;
+ }
+-
+- return -ENOPKG;
++ if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0)
++ return -EINVAL;
++ return 0;
+ }
+
+ static u8 *pkey_pack_u32(u8 *dst, u32 val)
+@@ -113,9 +157,8 @@ static int software_key_query(const struct kernel_pkey_params *params,
+ u8 *key, *ptr;
+ int ret, len;
+
+- ret = software_key_determine_akcipher(params->encoding,
+- params->hash_algo,
+- pkey, alg_name);
++ ret = software_key_determine_akcipher(pkey, params->encoding,
++ params->hash_algo, alg_name);
+ if (ret < 0)
+ return ret;
+
+@@ -143,8 +186,28 @@ static int software_key_query(const struct kernel_pkey_params *params,
+
+ len = crypto_akcipher_maxsize(tfm);
+ info->key_size = len * 8;
+- info->max_data_size = len;
+- info->max_sig_size = len;
++
++ if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) {
++ /*
++ * ECDSA key sizes are much smaller than RSA, and thus could
++ * operate on (hashed) inputs that are larger than key size.
++ * For example SHA384-hashed input used with secp256r1
++ * based keys. Set max_data_size to be at least as large as
++ * the largest supported hash size (SHA512)
++ */
++ info->max_data_size = 64;
++
++ /*
++ * Verify takes ECDSA-Sig (described in RFC 5480) as input,
++ * which is actually 2 'key_size'-bit integers encoded in
++ * ASN.1. Account for the ASN.1 encoding overhead here.
++ */
++ info->max_sig_size = 2 * (len + 3) + 2;
++ } else {
++ info->max_data_size = len;
++ info->max_sig_size = len;
++ }
++
+ info->max_enc_size = len;
+ info->max_dec_size = len;
+ info->supported_ops = (KEYCTL_SUPPORTS_ENCRYPT |
+@@ -179,9 +242,8 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
+
+ pr_devel("==>%s()\n", __func__);
+
+- ret = software_key_determine_akcipher(params->encoding,
+- params->hash_algo,
+- pkey, alg_name);
++ ret = software_key_determine_akcipher(pkey, params->encoding,
++ params->hash_algo, alg_name);
+ if (ret < 0)
+ return ret;
+
+@@ -262,6 +324,10 @@ static int cert_sig_digest_update(const struct public_key_signature *sig,
+
+ BUG_ON(!sig->data);
+
++ /* SM2 signatures always use the SM3 hash algorithm */
++ if (!sig->hash_algo || strcmp(sig->hash_algo, "sm3") != 0)
++ return -EINVAL;
++
+ ret = sm2_compute_z_digest(tfm_pkey, SM2_DEFAULT_USERID,
+ SM2_DEFAULT_USERID_LEN, dgst);
+ if (ret)
+@@ -314,9 +380,10 @@ int public_key_verify_signature(const struct public_key *pkey,
+ struct crypto_wait cwait;
+ struct crypto_akcipher *tfm;
+ struct akcipher_request *req;
+- struct scatterlist src_sg[2];
++ struct scatterlist src_sg;
+ char alg_name[CRYPTO_MAX_ALG_NAME];
+- char *key, *ptr;
++ char *buf, *ptr;
++ size_t buf_len;
+ int ret;
+
+ pr_devel("==>%s()\n", __func__);
+@@ -325,9 +392,23 @@ int public_key_verify_signature(const struct public_key *pkey,
+ BUG_ON(!sig);
+ BUG_ON(!sig->s);
+
+- ret = software_key_determine_akcipher(sig->encoding,
+- sig->hash_algo,
+- pkey, alg_name);
++ /*
++ * If the signature specifies a public key algorithm, it *must* match
++ * the key's actual public key algorithm.
++ *
++ * Small exception: ECDSA signatures don't specify the curve, but ECDSA
++ * keys do. So the strings can mismatch slightly in that case:
++ * "ecdsa-nist-*" for the key, but "ecdsa" for the signature.
++ */
++ if (sig->pkey_algo) {
++ if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 &&
++ (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 ||
++ strcmp(sig->pkey_algo, "ecdsa") != 0))
++ return -EKEYREJECTED;
++ }
++
++ ret = software_key_determine_akcipher(pkey, sig->encoding,
++ sig->hash_algo, alg_name);
+ if (ret < 0)
+ return ret;
+
+@@ -340,35 +421,37 @@ int public_key_verify_signature(const struct public_key *pkey,
+ if (!req)
+ goto error_free_tfm;
+
+- key = kmalloc(pkey->keylen + sizeof(u32) * 2 + pkey->paramlen,
+- GFP_KERNEL);
+- if (!key)
++ buf_len = max_t(size_t, pkey->keylen + sizeof(u32) * 2 + pkey->paramlen,
++ sig->s_size + sig->digest_size);
++
++ buf = kmalloc(buf_len, GFP_KERNEL);
++ if (!buf)
+ goto error_free_req;
+
+- memcpy(key, pkey->key, pkey->keylen);
+- ptr = key + pkey->keylen;
++ memcpy(buf, pkey->key, pkey->keylen);
++ ptr = buf + pkey->keylen;
+ ptr = pkey_pack_u32(ptr, pkey->algo);
+ ptr = pkey_pack_u32(ptr, pkey->paramlen);
+ memcpy(ptr, pkey->params, pkey->paramlen);
+
+ if (pkey->key_is_private)
+- ret = crypto_akcipher_set_priv_key(tfm, key, pkey->keylen);
++ ret = crypto_akcipher_set_priv_key(tfm, buf, pkey->keylen);
+ else
+- ret = crypto_akcipher_set_pub_key(tfm, key, pkey->keylen);
++ ret = crypto_akcipher_set_pub_key(tfm, buf, pkey->keylen);
+ if (ret)
+- goto error_free_key;
++ goto error_free_buf;
+
+- if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 &&
+- sig->data_size) {
++ if (strcmp(pkey->pkey_algo, "sm2") == 0 && sig->data_size) {
+ ret = cert_sig_digest_update(sig, tfm);
+ if (ret)
+- goto error_free_key;
++ goto error_free_buf;
+ }
+
+- sg_init_table(src_sg, 2);
+- sg_set_buf(&src_sg[0], sig->s, sig->s_size);
+- sg_set_buf(&src_sg[1], sig->digest, sig->digest_size);
+- akcipher_request_set_crypt(req, src_sg, NULL, sig->s_size,
++ memcpy(buf, sig->s, sig->s_size);
++ memcpy(buf + sig->s_size, sig->digest, sig->digest_size);
++
++ sg_init_one(&src_sg, buf, sig->s_size + sig->digest_size);
++ akcipher_request_set_crypt(req, &src_sg, NULL, sig->s_size,
+ sig->digest_size);
+ crypto_init_wait(&cwait);
+ akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+@@ -376,8 +459,8 @@ int public_key_verify_signature(const struct public_key *pkey,
+ crypto_req_done, &cwait);
+ ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
+
+-error_free_key:
+- kfree(key);
++error_free_buf:
++ kfree(buf);
+ error_free_req:
+ akcipher_request_free(req);
+ error_free_tfm:
+diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
+index 7553ab18db898..22beaf2213a22 100644
+--- a/crypto/asymmetric_keys/verify_pefile.c
++++ b/crypto/asymmetric_keys/verify_pefile.c
+@@ -74,7 +74,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
+ break;
+
+ default:
+- pr_debug("Unknown PEOPT magic = %04hx\n", pe32->magic);
++ pr_warn("Unknown PEOPT magic = %04hx\n", pe32->magic);
+ return -ELIBBAD;
+ }
+
+@@ -95,7 +95,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
+ ctx->certs_size = ddir->certs.size;
+
+ if (!ddir->certs.virtual_address || !ddir->certs.size) {
+- pr_debug("Unsigned PE binary\n");
++ pr_warn("Unsigned PE binary\n");
+ return -ENODATA;
+ }
+
+@@ -127,7 +127,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf,
+ unsigned len;
+
+ if (ctx->sig_len < sizeof(wrapper)) {
+- pr_debug("Signature wrapper too short\n");
++ pr_warn("Signature wrapper too short\n");
+ return -ELIBBAD;
+ }
+
+@@ -135,19 +135,23 @@ static int pefile_strip_sig_wrapper(const void *pebuf,
+ pr_debug("sig wrapper = { %x, %x, %x }\n",
+ wrapper.length, wrapper.revision, wrapper.cert_type);
+
+- /* Both pesign and sbsign round up the length of certificate table
+- * (in optional header data directories) to 8 byte alignment.
++ /* sbsign rounds up the length of certificate table (in optional
++ * header data directories) to 8 byte alignment. However, the PE
++ * specification states that while entries are 8-byte aligned, this is
++ * not included in their length, and as a result, pesign has not
++ * rounded up since 0.110.
+ */
+- if (round_up(wrapper.length, 8) != ctx->sig_len) {
+- pr_debug("Signature wrapper len wrong\n");
++ if (wrapper.length > ctx->sig_len) {
++ pr_warn("Signature wrapper bigger than sig len (%x > %x)\n",
++ ctx->sig_len, wrapper.length);
+ return -ELIBBAD;
+ }
+ if (wrapper.revision != WIN_CERT_REVISION_2_0) {
+- pr_debug("Signature is not revision 2.0\n");
++ pr_warn("Signature is not revision 2.0\n");
+ return -ENOTSUPP;
+ }
+ if (wrapper.cert_type != WIN_CERT_TYPE_PKCS_SIGNED_DATA) {
+- pr_debug("Signature certificate type is not PKCS\n");
++ pr_warn("Signature certificate type is not PKCS\n");
+ return -ENOTSUPP;
+ }
+
+@@ -160,7 +164,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf,
+ ctx->sig_offset += sizeof(wrapper);
+ ctx->sig_len -= sizeof(wrapper);
+ if (ctx->sig_len < 4) {
+- pr_debug("Signature data missing\n");
++ pr_warn("Signature data missing\n");
+ return -EKEYREJECTED;
+ }
+
+@@ -194,7 +198,7 @@ check_len:
+ return 0;
+ }
+ not_pkcs7:
+- pr_debug("Signature data not PKCS#7\n");
++ pr_warn("Signature data not PKCS#7\n");
+ return -ELIBBAD;
+ }
+
+@@ -337,8 +341,8 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
+ digest_size = crypto_shash_digestsize(tfm);
+
+ if (digest_size != ctx->digest_len) {
+- pr_debug("Digest size mismatch (%zx != %x)\n",
+- digest_size, ctx->digest_len);
++ pr_warn("Digest size mismatch (%zx != %x)\n",
++ digest_size, ctx->digest_len);
+ ret = -EBADMSG;
+ goto error_no_desc;
+ }
+@@ -369,7 +373,7 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
+ * PKCS#7 certificate.
+ */
+ if (memcmp(digest, ctx->digest, ctx->digest_len) != 0) {
+- pr_debug("Digest mismatch\n");
++ pr_warn("Digest mismatch\n");
+ ret = -EKEYREJECTED;
+ } else {
+ pr_debug("The digests match!\n");
+diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
+index 3d45161b271a4..99fe28663f948 100644
+--- a/crypto/asymmetric_keys/x509_public_key.c
++++ b/crypto/asymmetric_keys/x509_public_key.c
+@@ -128,11 +128,10 @@ int x509_check_for_self_signed(struct x509_certificate *cert)
+ goto out;
+ }
+
+- ret = -EKEYREJECTED;
+- if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0 &&
+- (strncmp(cert->pub->pkey_algo, "ecdsa-", 6) != 0 ||
+- strcmp(cert->sig->pkey_algo, "ecdsa") != 0))
++ if (cert->unsupported_sig) {
++ ret = 0;
+ goto out;
++ }
+
+ ret = public_key_verify_signature(cert->pub, cert->sig);
+ if (ret < 0) {
+diff --git a/crypto/authenc.c b/crypto/authenc.c
+index 670bf1a01d00e..17f674a7cdff5 100644
+--- a/crypto/authenc.c
++++ b/crypto/authenc.c
+@@ -253,7 +253,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req,
+ dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen);
+
+ skcipher_request_set_tfm(skreq, ctx->enc);
+- skcipher_request_set_callback(skreq, aead_request_flags(req),
++ skcipher_request_set_callback(skreq, flags,
+ req->base.complete, req->base.data);
+ skcipher_request_set_crypt(skreq, src, dst,
+ req->cryptlen - authsize, req->iv);
+diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
+deleted file mode 100644
+index 72fe480f9bd67..0000000000000
+--- a/crypto/blake2s_generic.c
++++ /dev/null
+@@ -1,75 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0 OR MIT
+-/*
+- * shash interface to the generic implementation of BLAKE2s
+- *
+- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+- */
+-
+-#include <crypto/internal/blake2s.h>
+-#include <crypto/internal/hash.h>
+-
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-
+-static int crypto_blake2s_update_generic(struct shash_desc *desc,
+- const u8 *in, unsigned int inlen)
+-{
+- return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic);
+-}
+-
+-static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
+-{
+- return crypto_blake2s_final(desc, out, blake2s_compress_generic);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size) \
+- { \
+- .base.cra_name = name, \
+- .base.cra_driver_name = driver_name, \
+- .base.cra_priority = 100, \
+- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
+- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
+- .base.cra_module = THIS_MODULE, \
+- .digestsize = digest_size, \
+- .setkey = crypto_blake2s_setkey, \
+- .init = crypto_blake2s_init, \
+- .update = crypto_blake2s_update_generic, \
+- .final = crypto_blake2s_final_generic, \
+- .descsize = sizeof(struct blake2s_state), \
+- }
+-
+-static struct shash_alg blake2s_algs[] = {
+- BLAKE2S_ALG("blake2s-128", "blake2s-128-generic",
+- BLAKE2S_128_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-160", "blake2s-160-generic",
+- BLAKE2S_160_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-224", "blake2s-224-generic",
+- BLAKE2S_224_HASH_SIZE),
+- BLAKE2S_ALG("blake2s-256", "blake2s-256-generic",
+- BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_mod_init(void)
+-{
+- return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-static void __exit blake2s_mod_exit(void)
+-{
+- crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-subsys_initcall(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+-
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-generic");
+-MODULE_LICENSE("GPL v2");
+diff --git a/crypto/cryptd.c b/crypto/cryptd.c
+index a1bea0f4baa88..ca3a40fc7da91 100644
+--- a/crypto/cryptd.c
++++ b/crypto/cryptd.c
+@@ -39,6 +39,10 @@ struct cryptd_cpu_queue {
+ };
+
+ struct cryptd_queue {
++ /*
++ * Protected by disabling BH to allow enqueueing from softinterrupt and
++ * dequeuing from kworker (cryptd_queue_worker()).
++ */
+ struct cryptd_cpu_queue __percpu *cpu_queue;
+ };
+
+@@ -64,11 +68,12 @@ struct aead_instance_ctx {
+
+ struct cryptd_skcipher_ctx {
+ refcount_t refcnt;
+- struct crypto_sync_skcipher *child;
++ struct crypto_skcipher *child;
+ };
+
+ struct cryptd_skcipher_request_ctx {
+ crypto_completion_t complete;
++ struct skcipher_request req;
+ };
+
+ struct cryptd_hash_ctx {
+@@ -125,28 +130,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue)
+ static int cryptd_enqueue_request(struct cryptd_queue *queue,
+ struct crypto_async_request *request)
+ {
+- int cpu, err;
++ int err;
+ struct cryptd_cpu_queue *cpu_queue;
+ refcount_t *refcnt;
+
+- cpu = get_cpu();
++ local_bh_disable();
+ cpu_queue = this_cpu_ptr(queue->cpu_queue);
+ err = crypto_enqueue_request(&cpu_queue->queue, request);
+
+ refcnt = crypto_tfm_ctx(request->tfm);
+
+ if (err == -ENOSPC)
+- goto out_put_cpu;
++ goto out;
+
+- queue_work_on(cpu, cryptd_wq, &cpu_queue->work);
++ queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work);
+
+ if (!refcount_read(refcnt))
+- goto out_put_cpu;
++ goto out;
+
+ refcount_inc(refcnt);
+
+-out_put_cpu:
+- put_cpu();
++out:
++ local_bh_enable();
+
+ return err;
+ }
+@@ -162,15 +167,10 @@ static void cryptd_queue_worker(struct work_struct *work)
+ cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
+ /*
+ * Only handle one request at a time to avoid hogging crypto workqueue.
+- * preempt_disable/enable is used to prevent being preempted by
+- * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
+- * cryptd_enqueue_request() being accessed from software interrupts.
+ */
+ local_bh_disable();
+- preempt_disable();
+ backlog = crypto_get_backlog(&cpu_queue->queue);
+ req = crypto_dequeue_request(&cpu_queue->queue);
+- preempt_enable();
+ local_bh_enable();
+
+ if (!req)
+@@ -228,13 +228,13 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
+ const u8 *key, unsigned int keylen)
+ {
+ struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
+- struct crypto_sync_skcipher *child = ctx->child;
++ struct crypto_skcipher *child = ctx->child;
+
+- crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+- crypto_sync_skcipher_set_flags(child,
+- crypto_skcipher_get_flags(parent) &
+- CRYPTO_TFM_REQ_MASK);
+- return crypto_sync_skcipher_setkey(child, key, keylen);
++ crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
++ crypto_skcipher_set_flags(child,
++ crypto_skcipher_get_flags(parent) &
++ CRYPTO_TFM_REQ_MASK);
++ return crypto_skcipher_setkey(child, key, keylen);
+ }
+
+ static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
+@@ -259,13 +259,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
+ struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+- struct crypto_sync_skcipher *child = ctx->child;
+- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
++ struct skcipher_request *subreq = &rctx->req;
++ struct crypto_skcipher *child = ctx->child;
+
+ if (unlikely(err == -EINPROGRESS))
+ goto out;
+
+- skcipher_request_set_sync_tfm(subreq, child);
++ skcipher_request_set_tfm(subreq, child);
+ skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+ skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+@@ -287,13 +287,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
+ struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+- struct crypto_sync_skcipher *child = ctx->child;
+- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
++ struct skcipher_request *subreq = &rctx->req;
++ struct crypto_skcipher *child = ctx->child;
+
+ if (unlikely(err == -EINPROGRESS))
+ goto out;
+
+- skcipher_request_set_sync_tfm(subreq, child);
++ skcipher_request_set_tfm(subreq, child);
+ skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+ skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+@@ -344,9 +344,10 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
+ if (IS_ERR(cipher))
+ return PTR_ERR(cipher);
+
+- ctx->child = (struct crypto_sync_skcipher *)cipher;
++ ctx->child = cipher;
+ crypto_skcipher_set_reqsize(
+- tfm, sizeof(struct cryptd_skcipher_request_ctx));
++ tfm, sizeof(struct cryptd_skcipher_request_ctx) +
++ crypto_skcipher_reqsize(cipher));
+ return 0;
+ }
+
+@@ -354,7 +355,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
+ {
+ struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+- crypto_free_sync_skcipher(ctx->child);
++ crypto_free_skcipher(ctx->child);
+ }
+
+ static void cryptd_skcipher_free(struct skcipher_instance *inst)
+@@ -932,7 +933,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
+ {
+ struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
+
+- return &ctx->child->base;
++ return ctx->child;
+ }
+ EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
+
+diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
+index cff21f4e03e32..34effd4826c03 100644
+--- a/crypto/crypto_engine.c
++++ b/crypto/crypto_engine.c
+@@ -53,7 +53,8 @@ static void crypto_finalize_request(struct crypto_engine *engine,
+ dev_err(engine->dev, "failed to unprepare request\n");
+ }
+ }
+- req->complete(req, err);
++ lockdep_assert_in_softirq();
++ crypto_request_complete(req, err);
+
+ kthread_queue_work(engine->kworker, &engine->pump_requests);
+ }
+@@ -128,9 +129,6 @@ start_request:
+ if (!engine->retry_support)
+ engine->cur_req = async_req;
+
+- if (backlog)
+- backlog->complete(backlog, -EINPROGRESS);
+-
+ if (engine->busy)
+ was_busy = true;
+ else
+@@ -213,9 +211,12 @@ req_err_1:
+ }
+
+ req_err_2:
+- async_req->complete(async_req, ret);
++ crypto_request_complete(async_req, ret);
+
+ retry:
++ if (backlog)
++ crypto_request_complete(backlog, -EINPROGRESS);
++
+ /* If retry mechanism is supported, send new requests to engine */
+ if (engine->retry_support) {
+ spin_lock_irqsave(&engine->queue_lock, flags);
+diff --git a/crypto/drbg.c b/crypto/drbg.c
+index ea85d4a0fe9e9..44b0a7f624021 100644
+--- a/crypto/drbg.c
++++ b/crypto/drbg.c
+@@ -1036,17 +1036,38 @@ static const struct drbg_state_ops drbg_hash_ops = {
+ ******************************************************************/
+
+ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed,
+- int reseed)
++ int reseed, enum drbg_seed_state new_seed_state)
+ {
+ int ret = drbg->d_ops->update(drbg, seed, reseed);
+
+ if (ret)
+ return ret;
+
+- drbg->seeded = true;
++ drbg->seeded = new_seed_state;
+ /* 10.1.1.2 / 10.1.1.3 step 5 */
+ drbg->reseed_ctr = 1;
+
++ switch (drbg->seeded) {
++ case DRBG_SEED_STATE_UNSEEDED:
++ /* Impossible, but handle it to silence compiler warnings. */
++ fallthrough;
++ case DRBG_SEED_STATE_PARTIAL:
++ /*
++ * Require frequent reseeds until the seed source is
++ * fully initialized.
++ */
++ drbg->reseed_threshold = 50;
++ break;
++
++ case DRBG_SEED_STATE_FULL:
++ /*
++ * Seed source has become fully initialized, frequent
++ * reseeds no longer required.
++ */
++ drbg->reseed_threshold = drbg_max_requests(drbg);
++ break;
++ }
++
+ return ret;
+ }
+
+@@ -1066,12 +1087,10 @@ static inline int drbg_get_random_bytes(struct drbg_state *drbg,
+ return 0;
+ }
+
+-static void drbg_async_seed(struct work_struct *work)
++static int drbg_seed_from_random(struct drbg_state *drbg)
+ {
+ struct drbg_string data;
+ LIST_HEAD(seedlist);
+- struct drbg_state *drbg = container_of(work, struct drbg_state,
+- seed_work);
+ unsigned int entropylen = drbg_sec_strength(drbg->core->flags);
+ unsigned char entropy[32];
+ int ret;
+@@ -1082,26 +1101,15 @@ static void drbg_async_seed(struct work_struct *work)
+ drbg_string_fill(&data, entropy, entropylen);
+ list_add_tail(&data.list, &seedlist);
+
+- mutex_lock(&drbg->drbg_mutex);
+-
+ ret = drbg_get_random_bytes(drbg, entropy, entropylen);
+ if (ret)
+- goto unlock;
+-
+- /* Set seeded to false so that if __drbg_seed fails the
+- * next generate call will trigger a reseed.
+- */
+- drbg->seeded = false;
+-
+- __drbg_seed(drbg, &seedlist, true);
+-
+- if (drbg->seeded)
+- drbg->reseed_threshold = drbg_max_requests(drbg);
++ goto out;
+
+-unlock:
+- mutex_unlock(&drbg->drbg_mutex);
++ ret = __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL);
+
++out:
+ memzero_explicit(entropy, entropylen);
++ return ret;
+ }
+
+ /*
+@@ -1123,6 +1131,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
+ unsigned int entropylen = drbg_sec_strength(drbg->core->flags);
+ struct drbg_string data1;
+ LIST_HEAD(seedlist);
++ enum drbg_seed_state new_seed_state = DRBG_SEED_STATE_FULL;
+
+ /* 9.1 / 9.2 / 9.3.1 step 3 */
+ if (pers && pers->len > (drbg_max_addtl(drbg))) {
+@@ -1150,6 +1159,9 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
+ BUG_ON((entropylen * 2) > sizeof(entropy));
+
+ /* Get seed from in-kernel /dev/urandom */
++ if (!rng_is_initialized())
++ new_seed_state = DRBG_SEED_STATE_PARTIAL;
++
+ ret = drbg_get_random_bytes(drbg, entropy, entropylen);
+ if (ret)
+ goto out;
+@@ -1206,7 +1218,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
+ memset(drbg->C, 0, drbg_statelen(drbg));
+ }
+
+- ret = __drbg_seed(drbg, &seedlist, reseed);
++ ret = __drbg_seed(drbg, &seedlist, reseed, new_seed_state);
+
+ out:
+ memzero_explicit(entropy, entropylen * 2);
+@@ -1386,19 +1398,25 @@ static int drbg_generate(struct drbg_state *drbg,
+ * here. The spec is a bit convoluted here, we make it simpler.
+ */
+ if (drbg->reseed_threshold < drbg->reseed_ctr)
+- drbg->seeded = false;
++ drbg->seeded = DRBG_SEED_STATE_UNSEEDED;
+
+- if (drbg->pr || !drbg->seeded) {
++ if (drbg->pr || drbg->seeded == DRBG_SEED_STATE_UNSEEDED) {
+ pr_devel("DRBG: reseeding before generation (prediction "
+ "resistance: %s, state %s)\n",
+ drbg->pr ? "true" : "false",
+- drbg->seeded ? "seeded" : "unseeded");
++ (drbg->seeded == DRBG_SEED_STATE_FULL ?
++ "seeded" : "unseeded"));
+ /* 9.3.1 steps 7.1 through 7.3 */
+ len = drbg_seed(drbg, addtl, true);
+ if (len)
+ goto err;
+ /* 9.3.1 step 7.4 */
+ addtl = NULL;
++ } else if (rng_is_initialized() &&
++ drbg->seeded == DRBG_SEED_STATE_PARTIAL) {
++ len = drbg_seed_from_random(drbg);
++ if (len)
++ goto err;
+ }
+
+ if (addtl && 0 < addtl->len)
+@@ -1491,51 +1509,23 @@ static int drbg_generate_long(struct drbg_state *drbg,
+ return 0;
+ }
+
+-static void drbg_schedule_async_seed(struct random_ready_callback *rdy)
+-{
+- struct drbg_state *drbg = container_of(rdy, struct drbg_state,
+- random_ready);
+-
+- schedule_work(&drbg->seed_work);
+-}
+-
+ static int drbg_prepare_hrng(struct drbg_state *drbg)
+ {
+- int err;
+-
+ /* We do not need an HRNG in test mode. */
+ if (list_empty(&drbg->test_data.list))
+ return 0;
+
+ drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0);
++ if (IS_ERR(drbg->jent)) {
++ const int err = PTR_ERR(drbg->jent);
+
+- INIT_WORK(&drbg->seed_work, drbg_async_seed);
+-
+- drbg->random_ready.owner = THIS_MODULE;
+- drbg->random_ready.func = drbg_schedule_async_seed;
+-
+- err = add_random_ready_callback(&drbg->random_ready);
+-
+- switch (err) {
+- case 0:
+- break;
+-
+- case -EALREADY:
+- err = 0;
+- fallthrough;
+-
+- default:
+- drbg->random_ready.func = NULL;
+- return err;
++ drbg->jent = NULL;
++ if (fips_enabled)
++ return err;
++ pr_info("DRBG: Continuing without Jitter RNG\n");
+ }
+
+- /*
+- * Require frequent reseeds until the seed source is fully
+- * initialized.
+- */
+- drbg->reseed_threshold = 50;
+-
+- return err;
++ return 0;
+ }
+
+ /*
+@@ -1578,7 +1568,7 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
+ if (!drbg->core) {
+ drbg->core = &drbg_cores[coreref];
+ drbg->pr = pr;
+- drbg->seeded = false;
++ drbg->seeded = DRBG_SEED_STATE_UNSEEDED;
+ drbg->reseed_threshold = drbg_max_requests(drbg);
+
+ ret = drbg_alloc_state(drbg);
+@@ -1589,14 +1579,6 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
+ if (ret)
+ goto free_everything;
+
+- if (IS_ERR(drbg->jent)) {
+- ret = PTR_ERR(drbg->jent);
+- drbg->jent = NULL;
+- if (fips_enabled || ret != -ENOENT)
+- goto free_everything;
+- pr_info("DRBG: Continuing without Jitter RNG\n");
+- }
+-
+ reseed = false;
+ }
+
+@@ -1629,11 +1611,6 @@ free_everything:
+ */
+ static int drbg_uninstantiate(struct drbg_state *drbg)
+ {
+- if (drbg->random_ready.func) {
+- del_random_ready_callback(&drbg->random_ready);
+- cancel_work_sync(&drbg->seed_work);
+- }
+-
+ if (!IS_ERR_OR_NULL(drbg->jent))
+ crypto_free_rng(drbg->jent);
+ drbg->jent = NULL;
+diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c
+index 6a3fd09057d0c..f7ed430206720 100644
+--- a/crypto/ecrdsa.c
++++ b/crypto/ecrdsa.c
+@@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req)
+
+ /* Step 1: verify that 0 < r < q, 0 < s < q */
+ if (vli_is_zero(r, ndigits) ||
+- vli_cmp(r, ctx->curve->n, ndigits) == 1 ||
++ vli_cmp(r, ctx->curve->n, ndigits) >= 0 ||
+ vli_is_zero(s, ndigits) ||
+- vli_cmp(s, ctx->curve->n, ndigits) == 1)
++ vli_cmp(s, ctx->curve->n, ndigits) >= 0)
+ return -EKEYREJECTED;
+
+ /* Step 2: calculate hash (h) of the message (passed as input) */
+ /* Step 3: calculate e = h \mod q */
+ vli_from_le64(e, digest, ndigits);
+- if (vli_cmp(e, ctx->curve->n, ndigits) == 1)
++ if (vli_cmp(e, ctx->curve->n, ndigits) >= 0)
+ vli_sub(e, e, ctx->curve->n, ndigits);
+ if (vli_is_zero(e, ndigits))
+ e[0] = 1;
+@@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req)
+ /* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */
+ ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key,
+ ctx->curve);
+- if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1)
++ if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0)
+ vli_sub(cc.x, cc.x, ctx->curve->n, ndigits);
+
+ /* Step 7: if R == r signature is valid */
+diff --git a/crypto/essiv.c b/crypto/essiv.c
+index 8bcc5bdcb2a95..3505b071e6471 100644
+--- a/crypto/essiv.c
++++ b/crypto/essiv.c
+@@ -171,7 +171,12 @@ static void essiv_aead_done(struct crypto_async_request *areq, int err)
+ struct aead_request *req = areq->data;
+ struct essiv_aead_request_ctx *rctx = aead_request_ctx(req);
+
++ if (err == -EINPROGRESS)
++ goto out;
++
+ kfree(rctx->assoc);
++
++out:
+ aead_request_complete(req, err);
+ }
+
+@@ -247,7 +252,7 @@ static int essiv_aead_crypt(struct aead_request *req, bool enc)
+ err = enc ? crypto_aead_encrypt(subreq) :
+ crypto_aead_decrypt(subreq);
+
+- if (rctx->assoc && err != -EINPROGRESS)
++ if (rctx->assoc && err != -EINPROGRESS && err != -EBUSY)
+ kfree(rctx->assoc);
+ return err;
+ }
+diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c
+index a11b3208760f3..f6d3a84e3c214 100644
+--- a/crypto/jitterentropy.c
++++ b/crypto/jitterentropy.c
+@@ -265,7 +265,6 @@ static int jent_stuck(struct rand_data *ec, __u64 current_delta)
+ {
+ __u64 delta2 = jent_delta(ec->last_delta, current_delta);
+ __u64 delta3 = jent_delta(ec->last_delta2, delta2);
+- unsigned int delta_masked = current_delta & JENT_APT_WORD_MASK;
+
+ ec->last_delta = current_delta;
+ ec->last_delta2 = delta2;
+@@ -274,7 +273,7 @@ static int jent_stuck(struct rand_data *ec, __u64 current_delta)
+ * Insert the result of the comparison of two back-to-back time
+ * deltas.
+ */
+- jent_apt_insert(ec, delta_masked);
++ jent_apt_insert(ec, current_delta);
+
+ if (!current_delta || !delta2 || !delta3) {
+ /* RCT with a stuck bit */
+diff --git a/crypto/memneq.c b/crypto/memneq.c
+deleted file mode 100644
+index afed1bd16aee0..0000000000000
+--- a/crypto/memneq.c
++++ /dev/null
+@@ -1,168 +0,0 @@
+-/*
+- * Constant-time equality testing of memory regions.
+- *
+- * Authors:
+- *
+- * James Yonan <james@openvpn.net>
+- * Daniel Borkmann <dborkman@redhat.com>
+- *
+- * This file is provided under a dual BSD/GPLv2 license. When using or
+- * redistributing this file, you may do so under either license.
+- *
+- * GPL LICENSE SUMMARY
+- *
+- * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of version 2 of the GNU General Public License as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope that it will be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+- * General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+- * The full GNU General Public License is included in this distribution
+- * in the file called LICENSE.GPL.
+- *
+- * BSD LICENSE
+- *
+- * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- *
+- * * Redistributions of source code must retain the above copyright
+- * notice, this list of conditions and the following disclaimer.
+- * * Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in
+- * the documentation and/or other materials provided with the
+- * distribution.
+- * * Neither the name of OpenVPN Technologies nor the names of its
+- * contributors may be used to endorse or promote products derived
+- * from this software without specific prior written permission.
+- *
+- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- */
+-
+-#include <crypto/algapi.h>
+-
+-#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
+-
+-/* Generic path for arbitrary size */
+-static inline unsigned long
+-__crypto_memneq_generic(const void *a, const void *b, size_t size)
+-{
+- unsigned long neq = 0;
+-
+-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+- while (size >= sizeof(unsigned long)) {
+- neq |= *(unsigned long *)a ^ *(unsigned long *)b;
+- OPTIMIZER_HIDE_VAR(neq);
+- a += sizeof(unsigned long);
+- b += sizeof(unsigned long);
+- size -= sizeof(unsigned long);
+- }
+-#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+- while (size > 0) {
+- neq |= *(unsigned char *)a ^ *(unsigned char *)b;
+- OPTIMIZER_HIDE_VAR(neq);
+- a += 1;
+- b += 1;
+- size -= 1;
+- }
+- return neq;
+-}
+-
+-/* Loop-free fast-path for frequently used 16-byte size */
+-static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
+-{
+- unsigned long neq = 0;
+-
+-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+- if (sizeof(unsigned long) == 8) {
+- neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
+- OPTIMIZER_HIDE_VAR(neq);
+- } else if (sizeof(unsigned int) == 4) {
+- neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
+- OPTIMIZER_HIDE_VAR(neq);
+- } else
+-#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+- {
+- neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
+- OPTIMIZER_HIDE_VAR(neq);
+- neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
+- OPTIMIZER_HIDE_VAR(neq);
+- }
+-
+- return neq;
+-}
+-
+-/* Compare two areas of memory without leaking timing information,
+- * and with special optimizations for common sizes. Users should
+- * not call this function directly, but should instead use
+- * crypto_memneq defined in crypto/algapi.h.
+- */
+-noinline unsigned long __crypto_memneq(const void *a, const void *b,
+- size_t size)
+-{
+- switch (size) {
+- case 16:
+- return __crypto_memneq_16(a, b);
+- default:
+- return __crypto_memneq_generic(a, b, size);
+- }
+-}
+-EXPORT_SYMBOL(__crypto_memneq);
+-
+-#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
+diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
+index d569c7ed6c800..9d10b846ccf73 100644
+--- a/crypto/pcrypt.c
++++ b/crypto/pcrypt.c
+@@ -78,12 +78,14 @@ static void pcrypt_aead_enc(struct padata_priv *padata)
+ {
+ struct pcrypt_request *preq = pcrypt_padata_request(padata);
+ struct aead_request *req = pcrypt_request_ctx(preq);
++ int ret;
+
+- padata->info = crypto_aead_encrypt(req);
++ ret = crypto_aead_encrypt(req);
+
+- if (padata->info == -EINPROGRESS)
++ if (ret == -EINPROGRESS)
+ return;
+
++ padata->info = ret;
+ padata_do_serial(padata);
+ }
+
+@@ -123,12 +125,14 @@ static void pcrypt_aead_dec(struct padata_priv *padata)
+ {
+ struct pcrypt_request *preq = pcrypt_padata_request(padata);
+ struct aead_request *req = pcrypt_request_ctx(preq);
++ int ret;
+
+- padata->info = crypto_aead_decrypt(req);
++ ret = crypto_aead_decrypt(req);
+
+- if (padata->info == -EINPROGRESS)
++ if (ret == -EINPROGRESS)
+ return;
+
++ padata->info = ret;
+ padata_do_serial(padata);
+ }
+
+diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
+index 8ac3e73e8ea65..e2f4ccbd71dd8 100644
+--- a/crypto/rsa-pkcs1pad.c
++++ b/crypto/rsa-pkcs1pad.c
+@@ -214,16 +214,14 @@ static void pkcs1pad_encrypt_sign_complete_cb(
+ struct crypto_async_request *child_async_req, int err)
+ {
+ struct akcipher_request *req = child_async_req->data;
+- struct crypto_async_request async_req;
+
+ if (err == -EINPROGRESS)
+- return;
++ goto out;
++
++ err = pkcs1pad_encrypt_sign_complete(req, err);
+
+- async_req.data = req->base.data;
+- async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req));
+- async_req.flags = child_async_req->flags;
+- req->base.complete(&async_req,
+- pkcs1pad_encrypt_sign_complete(req, err));
++out:
++ akcipher_request_complete(req, err);
+ }
+
+ static int pkcs1pad_encrypt(struct akcipher_request *req)
+@@ -332,15 +330,14 @@ static void pkcs1pad_decrypt_complete_cb(
+ struct crypto_async_request *child_async_req, int err)
+ {
+ struct akcipher_request *req = child_async_req->data;
+- struct crypto_async_request async_req;
+
+ if (err == -EINPROGRESS)
+- return;
++ goto out;
+
+- async_req.data = req->base.data;
+- async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req));
+- async_req.flags = child_async_req->flags;
+- req->base.complete(&async_req, pkcs1pad_decrypt_complete(req, err));
++ err = pkcs1pad_decrypt_complete(req, err);
++
++out:
++ akcipher_request_complete(req, err);
+ }
+
+ static int pkcs1pad_decrypt(struct akcipher_request *req)
+@@ -476,6 +473,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
+ pos++;
+
+ if (digest_info) {
++ if (digest_info->size > dst_len - pos)
++ goto done;
+ if (crypto_memneq(out_buf + pos, digest_info->data,
+ digest_info->size))
+ goto done;
+@@ -495,7 +494,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
+ sg_nents_for_len(req->src,
+ req->src_len + req->dst_len),
+ req_ctx->out_buf + ctx->key_size,
+- req->dst_len, ctx->key_size);
++ req->dst_len, req->src_len);
+ /* Do the actual verification step. */
+ if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos,
+ req->dst_len) != 0)
+@@ -510,15 +509,14 @@ static void pkcs1pad_verify_complete_cb(
+ struct crypto_async_request *child_async_req, int err)
+ {
+ struct akcipher_request *req = child_async_req->data;
+- struct crypto_async_request async_req;
+
+ if (err == -EINPROGRESS)
+- return;
++ goto out;
+
+- async_req.data = req->base.data;
+- async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req));
+- async_req.flags = child_async_req->flags;
+- req->base.complete(&async_req, pkcs1pad_verify_complete(req, err));
++ err = pkcs1pad_verify_complete(req, err);
++
++out:
++ akcipher_request_complete(req, err);
+ }
+
+ /*
+@@ -538,7 +536,7 @@ static int pkcs1pad_verify(struct akcipher_request *req)
+
+ if (WARN_ON(req->dst) ||
+ WARN_ON(!req->dst_len) ||
+- !ctx->key_size || req->src_len < ctx->key_size)
++ !ctx->key_size || req->src_len != ctx->key_size)
+ return -EINVAL;
+
+ req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);
+@@ -576,6 +574,10 @@ static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm)
+ return PTR_ERR(child_tfm);
+
+ ctx->child = child_tfm;
++
++ akcipher_set_reqsize(tfm, sizeof(struct pkcs1pad_request) +
++ crypto_akcipher_reqsize(child_tfm));
++
+ return 0;
+ }
+
+@@ -621,6 +623,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
+
+ rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn);
+
++ if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) {
++ err = -EINVAL;
++ goto err_free_inst;
++ }
++
+ err = -ENAMETOOLONG;
+ hash_name = crypto_attr_alg_name(tb[2]);
+ if (IS_ERR(hash_name)) {
+@@ -666,7 +673,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
+ inst->alg.set_pub_key = pkcs1pad_set_pub_key;
+ inst->alg.set_priv_key = pkcs1pad_set_priv_key;
+ inst->alg.max_size = pkcs1pad_get_max_size;
+- inst->alg.reqsize = sizeof(struct pkcs1pad_request) + rsa_alg->reqsize;
+
+ inst->free = pkcs1pad_free;
+
+diff --git a/crypto/seqiv.c b/crypto/seqiv.c
+index 0899d527c2845..b1bcfe537daf1 100644
+--- a/crypto/seqiv.c
++++ b/crypto/seqiv.c
+@@ -23,7 +23,7 @@ static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err)
+ struct aead_request *subreq = aead_request_ctx(req);
+ struct crypto_aead *geniv;
+
+- if (err == -EINPROGRESS)
++ if (err == -EINPROGRESS || err == -EBUSY)
+ return;
+
+ if (err)
+diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
+index 82b0400985a51..4ada7e7493904 100644
+--- a/crypto/tcrypt.c
++++ b/crypto/tcrypt.c
+@@ -1295,15 +1295,6 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
+ goto out_free_tfm;
+ }
+
+-
+- for (i = 0; i < num_mb; ++i)
+- if (testmgr_alloc_buf(data[i].xbuf)) {
+- while (i--)
+- testmgr_free_buf(data[i].xbuf);
+- goto out_free_tfm;
+- }
+-
+-
+ for (i = 0; i < num_mb; ++i) {
+ data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!data[i].req) {
+@@ -1333,7 +1324,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
+
+ if (bs > XBUFSIZE * PAGE_SIZE) {
+ pr_err("template (%u) too big for buffer (%lu)\n",
+- *b_size, XBUFSIZE * PAGE_SIZE);
++ bs, XBUFSIZE * PAGE_SIZE);
+ goto out;
+ }
+
+@@ -1386,8 +1377,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
+ memset(cur->xbuf[p], 0xff, k);
+
+ skcipher_request_set_crypt(cur->req, cur->sg,
+- cur->sg, *b_size,
+- iv);
++ cur->sg, bs, iv);
+ }
+
+ if (secs) {
+@@ -1864,10 +1854,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+ ret += tcrypt_test("rmd160");
+ break;
+
+- case 41:
+- ret += tcrypt_test("blake2s-256");
+- break;
+-
+ case 42:
+ ret += tcrypt_test("blake2b-512");
+ break;
+@@ -2435,10 +2421,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+ test_hash_speed("rmd160", sec, generic_hash_speed_template);
+ if (mode > 300 && mode < 400) break;
+ fallthrough;
+- case 316:
+- test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
+- if (mode > 300 && mode < 400) break;
+- fallthrough;
+ case 317:
+ test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
+ if (mode > 300 && mode < 400) break;
+@@ -2547,10 +2529,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+ test_ahash_speed("rmd160", sec, generic_hash_speed_template);
+ if (mode > 400 && mode < 500) break;
+ fallthrough;
+- case 416:
+- test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
+- if (mode > 400 && mode < 500) break;
+- fallthrough;
+ case 417:
+ test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
+ if (mode > 400 && mode < 500) break;
+diff --git a/crypto/testmgr.c b/crypto/testmgr.c
+index 70f69f0910c9e..163a1283a866a 100644
+--- a/crypto/testmgr.c
++++ b/crypto/testmgr.c
+@@ -4329,30 +4329,6 @@ static const struct alg_test_desc alg_test_descs[] = {
+ .suite = {
+ .hash = __VECS(blake2b_512_tv_template)
+ }
+- }, {
+- .alg = "blake2s-128",
+- .test = alg_test_hash,
+- .suite = {
+- .hash = __VECS(blakes2s_128_tv_template)
+- }
+- }, {
+- .alg = "blake2s-160",
+- .test = alg_test_hash,
+- .suite = {
+- .hash = __VECS(blakes2s_160_tv_template)
+- }
+- }, {
+- .alg = "blake2s-224",
+- .test = alg_test_hash,
+- .suite = {
+- .hash = __VECS(blakes2s_224_tv_template)
+- }
+- }, {
+- .alg = "blake2s-256",
+- .test = alg_test_hash,
+- .suite = {
+- .hash = __VECS(blakes2s_256_tv_template)
+- }
+ }, {
+ .alg = "cbc(aes)",
+ .test = alg_test_skcipher,
+diff --git a/crypto/testmgr.h b/crypto/testmgr.h
+index e6fca34b5b257..2be20a590a606 100644
+--- a/crypto/testmgr.h
++++ b/crypto/testmgr.h
+@@ -32583,221 +32583,4 @@ static const struct hash_testvec blake2b_512_tv_template[] = {{
+ 0xae, 0x15, 0x81, 0x15, 0xd0, 0x88, 0xa0, 0x3c, },
+ }};
+
+-static const struct hash_testvec blakes2s_128_tv_template[] = {{
+- .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
+- 0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
+-}, {
+- .plaintext = blake2_ordered_sequence,
+- .psize = 64,
+- .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
+- 0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
+-}, {
+- .ksize = 16,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 1,
+- .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
+- 0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
+-}, {
+- .ksize = 32,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 7,
+- .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
+- 0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
+-}, {
+- .ksize = 1,
+- .key = "B",
+- .plaintext = blake2_ordered_sequence,
+- .psize = 15,
+- .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
+- 0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
+-}, {
+- .ksize = 16,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 247,
+- .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
+- 0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
+-}, {
+- .ksize = 32,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 256,
+- .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
+- 0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
+-}};
+-
+-static const struct hash_testvec blakes2s_160_tv_template[] = {{
+- .plaintext = blake2_ordered_sequence,
+- .psize = 7,
+- .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
+- 0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
+- 0xe3, 0xf2, 0x84, 0xff, },
+-}, {
+- .plaintext = blake2_ordered_sequence,
+- .psize = 256,
+- .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
+- 0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
+- 0x9b, 0x2d, 0x35, 0x05, },
+-}, {
+- .ksize = 1,
+- .key = "B",
+- .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
+- 0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
+- 0x79, 0x65, 0x32, 0x93, },
+-}, {
+- .ksize = 32,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 1,
+- .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
+- 0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
+- 0xa2, 0x3a, 0x56, 0x9c, },
+-}, {
+- .ksize = 16,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 15,
+- .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
+- 0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
+- 0x83, 0x39, 0x0f, 0x30, },
+-}, {
+- .ksize = 1,
+- .key = "B",
+- .plaintext = blake2_ordered_sequence,
+- .psize = 64,
+- .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
+- 0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
+- 0xac, 0xa6, 0x81, 0x63, },
+-}, {
+- .ksize = 32,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 247,
+- .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
+- 0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
+- 0x0a, 0xf6, 0x73, 0xe8, },
+-}};
+-
+-static const struct hash_testvec blakes2s_224_tv_template[] = {{
+- .plaintext = blake2_ordered_sequence,
+- .psize = 1,
+- .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
+- 0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
+- 0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
+- 0x48, 0x21, 0x97, 0xbb, },
+-}, {
+- .plaintext = blake2_ordered_sequence,
+- .psize = 247,
+- .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
+- 0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
+- 0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
+- 0x2b, 0xa4, 0xd5, 0xf6, },
+-}, {
+- .ksize = 16,
+- .key = blake2_ordered_sequence,
+- .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
+- 0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
+- 0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
+- 0xa7, 0x19, 0xfc, 0xb8, },
+-}, {
+- .ksize = 1,
+- .key = "B",
+- .plaintext = blake2_ordered_sequence,
+- .psize = 7,
+- .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
+- 0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
+- 0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
+- 0x7b, 0x45, 0xfe, 0x05, },
+-}, {
+- .ksize = 32,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 15,
+- .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
+- 0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
+- 0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
+- 0x25, 0xab, 0xc5, 0x02, },
+-}, {
+- .ksize = 16,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 64,
+- .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
+- 0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
+- 0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
+- 0x6a, 0x31, 0x83, 0xb5, },
+-}, {
+- .ksize = 1,
+- .key = "B",
+- .plaintext = blake2_ordered_sequence,
+- .psize = 256,
+- .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
+- 0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
+- 0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
+- 0xb3, 0xd7, 0xec, 0xcc, },
+-}};
+-
+-static const struct hash_testvec blakes2s_256_tv_template[] = {{
+- .plaintext = blake2_ordered_sequence,
+- .psize = 15,
+- .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
+- 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
+- 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
+- 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
+-}, {
+- .ksize = 32,
+- .key = blake2_ordered_sequence,
+- .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
+- 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
+- 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
+- 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
+-}, {
+- .ksize = 1,
+- .key = "B",
+- .plaintext = blake2_ordered_sequence,
+- .psize = 1,
+- .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
+- 0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
+- 0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
+- 0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
+-}, {
+- .ksize = 16,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 7,
+- .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
+- 0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
+- 0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
+- 0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
+-}, {
+- .ksize = 32,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 64,
+- .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
+- 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
+- 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
+- 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
+-}, {
+- .ksize = 1,
+- .key = "B",
+- .plaintext = blake2_ordered_sequence,
+- .psize = 247,
+- .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
+- 0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
+- 0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
+- 0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
+-}, {
+- .ksize = 16,
+- .key = blake2_ordered_sequence,
+- .plaintext = blake2_ordered_sequence,
+- .psize = 256,
+- .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
+- 0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
+- 0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
+- 0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
+-}};
+-
+ #endif /* _CRYPTO_TESTMGR_H */
+diff --git a/crypto/xts.c b/crypto/xts.c
+index 6c12f30dbdd6d..de6cbcf69bbd6 100644
+--- a/crypto/xts.c
++++ b/crypto/xts.c
+@@ -203,12 +203,12 @@ static void xts_encrypt_done(struct crypto_async_request *areq, int err)
+ if (!err) {
+ struct xts_request_ctx *rctx = skcipher_request_ctx(req);
+
+- rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
++ rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+ err = xts_xor_tweak_post(req, true);
+
+ if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
+ err = xts_cts_final(req, crypto_skcipher_encrypt);
+- if (err == -EINPROGRESS)
++ if (err == -EINPROGRESS || err == -EBUSY)
+ return;
+ }
+ }
+@@ -223,12 +223,12 @@ static void xts_decrypt_done(struct crypto_async_request *areq, int err)
+ if (!err) {
+ struct xts_request_ctx *rctx = skcipher_request_ctx(req);
+
+- rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
++ rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+ err = xts_xor_tweak_post(req, false);
+
+ if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
+ err = xts_cts_final(req, crypto_skcipher_decrypt);
+- if (err == -EINPROGRESS)
++ if (err == -EINPROGRESS || err == -EBUSY)
+ return;
+ }
+ }
+@@ -466,3 +466,4 @@ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("XTS block cipher mode");
+ MODULE_ALIAS_CRYPTO("xts");
+ MODULE_IMPORT_NS(CRYPTO_INTERNAL);
++MODULE_SOFTDEP("pre: ecb");
+diff --git a/drivers/Makefile b/drivers/Makefile
+index be5d40ae14882..a110338c860c7 100644
+--- a/drivers/Makefile
++++ b/drivers/Makefile
+@@ -41,8 +41,7 @@ obj-$(CONFIG_DMADEVICES) += dma/
+ # SOC specific infrastructure drivers.
+ obj-y += soc/
+
+-obj-$(CONFIG_VIRTIO) += virtio/
+-obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio/
++obj-y += virtio/
+ obj-$(CONFIG_VDPA) += vdpa/
+ obj-$(CONFIG_XEN) += xen/
+
+diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c
+index d726537fa16ce..7b2016534162c 100644
+--- a/drivers/accessibility/speakup/main.c
++++ b/drivers/accessibility/speakup/main.c
+@@ -1778,7 +1778,7 @@ static void speakup_con_update(struct vc_data *vc)
+ {
+ unsigned long flags;
+
+- if (!speakup_console[vc->vc_num] || spk_parked)
++ if (!speakup_console[vc->vc_num] || spk_parked || !synth)
+ return;
+ if (!spin_trylock_irqsave(&speakup_info.spinlock, flags))
+ /* Speakup output, discard */
+diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c
+index 580ec796816bc..78ca4987e619e 100644
+--- a/drivers/accessibility/speakup/speakup_dectlk.c
++++ b/drivers/accessibility/speakup/speakup_dectlk.c
+@@ -44,6 +44,7 @@ static struct var_t vars[] = {
+ { CAPS_START, .u.s = {"[:dv ap 160] " } },
+ { CAPS_STOP, .u.s = {"[:dv ap 100 ] " } },
+ { RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } },
++ { PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } },
+ { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
+ { VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } },
+ { PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } },
+diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c
+index 0d1f397cd8961..07373b3debd1e 100644
+--- a/drivers/accessibility/speakup/spk_ttyio.c
++++ b/drivers/accessibility/speakup/spk_ttyio.c
+@@ -88,7 +88,7 @@ static int spk_ttyio_receive_buf2(struct tty_struct *tty,
+ }
+
+ if (!ldisc_data->buf_free)
+- /* ttyio_in will tty_schedule_flip */
++ /* ttyio_in will tty_flip_buffer_push */
+ return 0;
+
+ /* Make sure the consumer has read buf before we have seen
+@@ -312,7 +312,7 @@ static unsigned char ttyio_in(struct spk_synth *in_synth, int timeout)
+ mb();
+ ldisc_data->buf_free = true;
+ /* Let TTY push more characters */
+- tty_schedule_flip(tty->port);
++ tty_flip_buffer_push(tty->port);
+
+ return rv;
+ }
+@@ -354,6 +354,9 @@ void spk_ttyio_release(struct spk_synth *in_synth)
+ {
+ struct tty_struct *tty = in_synth->dev;
+
++ if (tty == NULL)
++ return;
++
+ tty_lock(tty);
+
+ if (tty->ops->close)
+diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
+index b0cb662233f1a..81aff651a0d49 100644
+--- a/drivers/acpi/ac.c
++++ b/drivers/acpi/ac.c
+@@ -61,6 +61,7 @@ static SIMPLE_DEV_PM_OPS(acpi_ac_pm, NULL, acpi_ac_resume);
+
+ static int ac_sleep_before_get_state_ms;
+ static int ac_check_pmic = 1;
++static int ac_only;
+
+ static struct acpi_driver acpi_ac_driver = {
+ .name = "ac",
+@@ -93,6 +94,11 @@ static int acpi_ac_get_state(struct acpi_ac *ac)
+ if (!ac)
+ return -EINVAL;
+
++ if (ac_only) {
++ ac->state = 1;
++ return 0;
++ }
++
+ status = acpi_evaluate_integer(ac->device->handle, "_PSR", NULL,
+ &ac->state);
+ if (ACPI_FAILURE(status)) {
+@@ -200,6 +206,12 @@ static int __init ac_do_not_check_pmic_quirk(const struct dmi_system_id *d)
+ return 0;
+ }
+
++static int __init ac_only_quirk(const struct dmi_system_id *d)
++{
++ ac_only = 1;
++ return 0;
++}
++
+ /* Please keep this list alphabetically sorted */
+ static const struct dmi_system_id ac_dmi_table[] __initconst = {
+ {
+@@ -209,6 +221,13 @@ static const struct dmi_system_id ac_dmi_table[] __initconst = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "EF20EA"),
+ },
+ },
++ {
++ /* Kodlix GK45 returning incorrect state */
++ .callback = ac_only_quirk,
++ .matches = {
++ DMI_MATCH(DMI_PRODUCT_NAME, "GK45"),
++ },
++ },
+ {
+ /* Lenovo Ideapad Miix 320, AXP288 PMIC, separate fuel-gauge */
+ .callback = ac_do_not_check_pmic_quirk,
+diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
+index 72f1fb77abcd0..e648158368a7d 100644
+--- a/drivers/acpi/acpi_extlog.c
++++ b/drivers/acpi/acpi_extlog.c
+@@ -12,6 +12,7 @@
+ #include <linux/ratelimit.h>
+ #include <linux/edac.h>
+ #include <linux/ras.h>
++#include <acpi/ghes.h>
+ #include <asm/cpu.h>
+ #include <asm/mce.h>
+
+@@ -138,8 +139,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
+ int cpu = mce->extcpu;
+ struct acpi_hest_generic_status *estatus, *tmp;
+ struct acpi_hest_generic_data *gdata;
+- const guid_t *fru_id = &guid_null;
+- char *fru_text = "";
++ const guid_t *fru_id;
++ char *fru_text;
+ guid_t *sec_type;
+ static u32 err_seq;
+
+@@ -160,17 +161,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
+
+ /* log event via trace */
+ err_seq++;
+- gdata = (struct acpi_hest_generic_data *)(tmp + 1);
+- if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+- fru_id = (guid_t *)gdata->fru_id;
+- if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+- fru_text = gdata->fru_text;
+- sec_type = (guid_t *)gdata->section_type;
+- if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+- struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+- if (gdata->error_data_length >= sizeof(*mem))
+- trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+- (u8)gdata->error_severity);
++ apei_estatus_for_each_section(tmp, gdata) {
++ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
++ fru_id = (guid_t *)gdata->fru_id;
++ else
++ fru_id = &guid_null;
++ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
++ fru_text = gdata->fru_text;
++ else
++ fru_text = "";
++ sec_type = (guid_t *)gdata->section_type;
++ if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
++ struct cper_sec_mem_err *mem = (void *)(gdata + 1);
++
++ if (gdata->error_data_length >= sizeof(*mem))
++ trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
++ (u8)gdata->error_severity);
++ }
+ }
+
+ out:
+diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c
+index 6922a44b3ce70..a2056c4c8cb70 100644
+--- a/drivers/acpi/acpi_fpdt.c
++++ b/drivers/acpi/acpi_fpdt.c
+@@ -143,6 +143,23 @@ static const struct attribute_group boot_attr_group = {
+
+ static struct kobject *fpdt_kobj;
+
++#if defined CONFIG_X86 && defined CONFIG_PHYS_ADDR_T_64BIT
++#include <linux/processor.h>
++static bool fpdt_address_valid(u64 address)
++{
++ /*
++ * On some systems the table contains invalid addresses
++ * with unsuppored high address bits set, check for this.
++ */
++ return !(address >> boot_cpu_data.x86_phys_bits);
++}
++#else
++static bool fpdt_address_valid(u64 address)
++{
++ return true;
++}
++#endif
++
+ static int fpdt_process_subtable(u64 address, u32 subtable_type)
+ {
+ struct fpdt_subtable_header *subtable_header;
+@@ -151,6 +168,11 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type)
+ u32 length, offset;
+ int result;
+
++ if (!fpdt_address_valid(address)) {
++ pr_info(FW_BUG "invalid physical address: 0x%llx!\n", address);
++ return -EINVAL;
++ }
++
+ subtable_header = acpi_os_map_memory(address, sizeof(*subtable_header));
+ if (!subtable_header)
+ return -ENOMEM;
+diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
+index 30b1f511c2af0..f609f9d62efdd 100644
+--- a/drivers/acpi/acpi_lpss.c
++++ b/drivers/acpi/acpi_lpss.c
+@@ -403,6 +403,9 @@ static int register_device_clock(struct acpi_device *adev,
+ if (!lpss_clk_dev)
+ lpt_register_clock_device();
+
++ if (IS_ERR(lpss_clk_dev))
++ return PTR_ERR(lpss_clk_dev);
++
+ clk_data = platform_get_drvdata(lpss_clk_dev);
+ if (!clk_data)
+ return -ENODEV;
+diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
+index 42ede059728ce..2b18b51f6351e 100644
+--- a/drivers/acpi/acpi_video.c
++++ b/drivers/acpi/acpi_video.c
+@@ -73,6 +73,7 @@ module_param(device_id_scheme, bool, 0444);
+ static int only_lcd = -1;
+ module_param(only_lcd, int, 0444);
+
++static bool may_report_brightness_keys;
+ static int register_count;
+ static DEFINE_MUTEX(register_count_mutex);
+ static DEFINE_MUTEX(video_list_lock);
+@@ -495,6 +496,22 @@ static const struct dmi_system_id video_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE R830"),
+ },
+ },
++ {
++ .callback = video_disable_backlight_sysfs_if,
++ .ident = "Toshiba Satellite Z830",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE Z830"),
++ },
++ },
++ {
++ .callback = video_disable_backlight_sysfs_if,
++ .ident = "Toshiba Portege Z830",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE Z830"),
++ },
++ },
+ /*
+ * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set
+ * but the IDs actually follow the Device ID Scheme.
+@@ -1222,6 +1239,9 @@ acpi_video_bus_get_one_device(struct acpi_device *device,
+ acpi_video_device_bind(video, data);
+ acpi_video_device_find_cap(data);
+
++ if (data->cap._BCM && data->cap._BCL)
++ may_report_brightness_keys = true;
++
+ mutex_lock(&video->device_list_lock);
+ list_add_tail(&data->entry, &video->video_device_list);
+ mutex_unlock(&video->device_list_lock);
+@@ -1689,6 +1709,9 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data)
+ break;
+ }
+
++ if (keycode)
++ may_report_brightness_keys = true;
++
+ acpi_notifier_call_chain(device, event, 0);
+
+ if (keycode && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS)) {
+@@ -2251,6 +2274,7 @@ void acpi_video_unregister(void)
+ if (register_count) {
+ acpi_bus_unregister_driver(&acpi_video_bus);
+ register_count = 0;
++ may_report_brightness_keys = false;
+ }
+ mutex_unlock(&register_count_mutex);
+ }
+@@ -2272,13 +2296,7 @@ void acpi_video_unregister_backlight(void)
+
+ bool acpi_video_handles_brightness_key_presses(void)
+ {
+- bool have_video_busses;
+-
+- mutex_lock(&video_list_lock);
+- have_video_busses = !list_empty(&video_bus_head);
+- mutex_unlock(&video_list_lock);
+-
+- return have_video_busses &&
++ return may_report_brightness_keys &&
+ (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS);
+ }
+ EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses);
+diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
+index 59700433a96e5..f919811156b1f 100644
+--- a/drivers/acpi/acpica/Makefile
++++ b/drivers/acpi/acpica/Makefile
+@@ -3,7 +3,7 @@
+ # Makefile for ACPICA Core interpreter
+ #
+
+-ccflags-y := -Os -D_LINUX -DBUILDING_ACPICA
++ccflags-y := -D_LINUX -DBUILDING_ACPICA
+ ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT
+
+ # use acpi.o to put all files here into acpi.o modparam namespace
+diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
+index d41b810e367c4..4366d36ef1198 100644
+--- a/drivers/acpi/acpica/acglobal.h
++++ b/drivers/acpi/acpica/acglobal.h
+@@ -226,6 +226,8 @@ extern struct acpi_bit_register_info
+ acpi_gbl_bit_register_info[ACPI_NUM_BITREG];
+ ACPI_GLOBAL(u8, acpi_gbl_sleep_type_a);
+ ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b);
++ACPI_GLOBAL(u8, acpi_gbl_sleep_type_a_s0);
++ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b_s0);
+
+ /*****************************************************************************
+ *
+diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
+index 810de0b4c1256..9c3ad33e926a6 100644
+--- a/drivers/acpi/acpica/achware.h
++++ b/drivers/acpi/acpica/achware.h
+@@ -101,8 +101,6 @@ acpi_status
+ acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
+ acpi_event_status *event_status);
+
+-acpi_status acpi_hw_disable_all_gpes(void);
+-
+ acpi_status acpi_hw_enable_all_runtime_gpes(void);
+
+ acpi_status acpi_hw_enable_all_wakeup_gpes(void);
+diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
+index 3615e1a6efd8a..b91155ea9c343 100644
+--- a/drivers/acpi/acpica/dbnames.c
++++ b/drivers/acpi/acpica/dbnames.c
+@@ -652,6 +652,9 @@ acpi_status acpi_db_display_objects(char *obj_type_arg, char *display_count_arg)
+ object_info =
+ ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_object_info));
+
++ if (!object_info)
++ return (AE_NO_MEMORY);
++
+ /* Walk the namespace from the root */
+
+ (void)acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
+diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
+index 8e011e59b9b48..ee1832ba39a24 100644
+--- a/drivers/acpi/acpica/dsmethod.c
++++ b/drivers/acpi/acpica/dsmethod.c
+@@ -517,7 +517,7 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread,
+ info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
+ if (!info) {
+ status = AE_NO_MEMORY;
+- goto cleanup;
++ goto pop_walk_state;
+ }
+
+ info->parameters = &this_walk_state->operands[0];
+@@ -529,7 +529,7 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread,
+
+ ACPI_FREE(info);
+ if (ACPI_FAILURE(status)) {
+- goto cleanup;
++ goto pop_walk_state;
+ }
+
+ next_walk_state->method_nesting_depth =
+@@ -575,6 +575,12 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread,
+
+ return_ACPI_STATUS(status);
+
++pop_walk_state:
++
++ /* On error, pop the walk state to be deleted from thread */
++
++ acpi_ds_pop_walk_state(thread);
++
+ cleanup:
+
+ /* On error, we must terminate the method properly */
+diff --git a/drivers/acpi/acpica/dswstate.c b/drivers/acpi/acpica/dswstate.c
+index fbe2ba05c82a6..1c862940cc5b2 100644
+--- a/drivers/acpi/acpica/dswstate.c
++++ b/drivers/acpi/acpica/dswstate.c
+@@ -576,9 +576,14 @@ acpi_ds_init_aml_walk(struct acpi_walk_state *walk_state,
+ ACPI_FUNCTION_TRACE(ds_init_aml_walk);
+
+ walk_state->parser_state.aml =
+- walk_state->parser_state.aml_start = aml_start;
+- walk_state->parser_state.aml_end =
+- walk_state->parser_state.pkg_end = aml_start + aml_length;
++ walk_state->parser_state.aml_start =
++ walk_state->parser_state.aml_end =
++ walk_state->parser_state.pkg_end = aml_start;
++ /* Avoid undefined behavior: applying zero offset to null pointer */
++ if (aml_length != 0) {
++ walk_state->parser_state.aml_end += aml_length;
++ walk_state->parser_state.pkg_end += aml_length;
++ }
+
+ /* The next_op of the next_walk will be the beginning of the method */
+
+diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
+index 06f3c9df1e22d..8618500f23b39 100644
+--- a/drivers/acpi/acpica/exfield.c
++++ b/drivers/acpi/acpica/exfield.c
+@@ -330,12 +330,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
+ obj_desc->field.base_byte_offset,
+ source_desc->buffer.pointer, data_length);
+
+- if ((obj_desc->field.region_obj->region.address ==
+- PCC_MASTER_SUBSPACE
+- && MASTER_SUBSPACE_COMMAND(obj_desc->field.
+- base_byte_offset))
+- || GENERIC_SUBSPACE_COMMAND(obj_desc->field.
+- base_byte_offset)) {
++ if (MASTER_SUBSPACE_COMMAND(obj_desc->field.base_byte_offset)) {
+
+ /* Perform the write */
+
+diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
+index b639e930d6429..44b7c350ed5ca 100644
+--- a/drivers/acpi/acpica/exoparg1.c
++++ b/drivers/acpi/acpica/exoparg1.c
+@@ -1007,7 +1007,8 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
+ (walk_state, return_desc,
+ &temp_desc);
+ if (ACPI_FAILURE(status)) {
+- goto cleanup;
++ return_ACPI_STATUS
++ (status);
+ }
+
+ return_desc = temp_desc;
+diff --git a/drivers/acpi/acpica/hwesleep.c b/drivers/acpi/acpica/hwesleep.c
+index 803402aefaeb6..7ee2939c08cd4 100644
+--- a/drivers/acpi/acpica/hwesleep.c
++++ b/drivers/acpi/acpica/hwesleep.c
+@@ -104,7 +104,9 @@ acpi_status acpi_hw_extended_sleep(u8 sleep_state)
+
+ /* Flush caches, as per ACPI specification */
+
+- ACPI_FLUSH_CPU_CACHE();
++ if (sleep_state < ACPI_STATE_S4) {
++ ACPI_FLUSH_CPU_CACHE();
++ }
+
+ status = acpi_os_enter_sleep(sleep_state, sleep_control, 0);
+ if (status == AE_CTRL_TERMINATE) {
+@@ -147,17 +149,13 @@ acpi_status acpi_hw_extended_sleep(u8 sleep_state)
+
+ acpi_status acpi_hw_extended_wake_prep(u8 sleep_state)
+ {
+- acpi_status status;
+ u8 sleep_type_value;
+
+ ACPI_FUNCTION_TRACE(hw_extended_wake_prep);
+
+- status = acpi_get_sleep_type_data(ACPI_STATE_S0,
+- &acpi_gbl_sleep_type_a,
+- &acpi_gbl_sleep_type_b);
+- if (ACPI_SUCCESS(status)) {
++ if (acpi_gbl_sleep_type_a_s0 != ACPI_SLEEP_TYPE_INVALID) {
+ sleep_type_value =
+- ((acpi_gbl_sleep_type_a << ACPI_X_SLEEP_TYPE_POSITION) &
++ ((acpi_gbl_sleep_type_a_s0 << ACPI_X_SLEEP_TYPE_POSITION) &
+ ACPI_X_SLEEP_TYPE_MASK);
+
+ (void)acpi_write((u64)(sleep_type_value | ACPI_X_SLEEP_ENABLE),
+diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
+index 14baa13bf8482..5efa3d8e483e0 100644
+--- a/drivers/acpi/acpica/hwsleep.c
++++ b/drivers/acpi/acpica/hwsleep.c
+@@ -110,7 +110,9 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state)
+
+ /* Flush caches, as per ACPI specification */
+
+- ACPI_FLUSH_CPU_CACHE();
++ if (sleep_state < ACPI_STATE_S4) {
++ ACPI_FLUSH_CPU_CACHE();
++ }
+
+ status = acpi_os_enter_sleep(sleep_state, pm1a_control, pm1b_control);
+ if (status == AE_CTRL_TERMINATE) {
+@@ -179,7 +181,7 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state)
+
+ acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state)
+ {
+- acpi_status status;
++ acpi_status status = AE_OK;
+ struct acpi_bit_register_info *sleep_type_reg_info;
+ struct acpi_bit_register_info *sleep_enable_reg_info;
+ u32 pm1a_control;
+@@ -192,10 +194,7 @@ acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state)
+ * This is unclear from the ACPI Spec, but it is required
+ * by some machines.
+ */
+- status = acpi_get_sleep_type_data(ACPI_STATE_S0,
+- &acpi_gbl_sleep_type_a,
+- &acpi_gbl_sleep_type_b);
+- if (ACPI_SUCCESS(status)) {
++ if (acpi_gbl_sleep_type_a_s0 != ACPI_SLEEP_TYPE_INVALID) {
+ sleep_type_reg_info =
+ acpi_hw_get_bit_register_info(ACPI_BITREG_SLEEP_TYPE);
+ sleep_enable_reg_info =
+@@ -216,9 +215,9 @@ acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state)
+
+ /* Insert the SLP_TYP bits */
+
+- pm1a_control |= (acpi_gbl_sleep_type_a <<
++ pm1a_control |= (acpi_gbl_sleep_type_a_s0 <<
+ sleep_type_reg_info->bit_position);
+- pm1b_control |= (acpi_gbl_sleep_type_b <<
++ pm1b_control |= (acpi_gbl_sleep_type_b_s0 <<
+ sleep_type_reg_info->bit_position);
+
+ /* Write the control registers and ignore any errors */
+diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
+index e15badf4077aa..c6716f90e013a 100644
+--- a/drivers/acpi/acpica/hwvalid.c
++++ b/drivers/acpi/acpica/hwvalid.c
+@@ -23,8 +23,8 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width);
+ *
+ * The table is used to implement the Microsoft port access rules that
+ * first appeared in Windows XP. Some ports are always illegal, and some
+- * ports are only illegal if the BIOS calls _OSI with a win_XP string or
+- * later (meaning that the BIOS itelf is post-XP.)
++ * ports are only illegal if the BIOS calls _OSI with nothing newer than
++ * the specific _OSI strings.
+ *
+ * This provides ACPICA with the desired port protections and
+ * Microsoft compatibility.
+@@ -145,7 +145,8 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
+
+ /* Port illegality may depend on the _OSI calls made by the BIOS */
+
+- if (acpi_gbl_osi_data >= port_info->osi_dependency) {
++ if (port_info->osi_dependency == ACPI_ALWAYS_ILLEGAL ||
++ acpi_gbl_osi_data == port_info->osi_dependency) {
+ ACPI_DEBUG_PRINT((ACPI_DB_VALUES,
+ "Denied AML access to port 0x%8.8X%8.8X/%X (%s 0x%.4X-0x%.4X)\n",
+ ACPI_FORMAT_UINT64(address),
+diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c
+index 89b12afed564e..ba77598ee43e8 100644
+--- a/drivers/acpi/acpica/hwxfsleep.c
++++ b/drivers/acpi/acpica/hwxfsleep.c
+@@ -162,8 +162,6 @@ acpi_status acpi_enter_sleep_state_s4bios(void)
+ return_ACPI_STATUS(status);
+ }
+
+- ACPI_FLUSH_CPU_CACHE();
+-
+ status = acpi_hw_write_port(acpi_gbl_FADT.smi_command,
+ (u32)acpi_gbl_FADT.s4_bios_request, 8);
+ if (ACPI_FAILURE(status)) {
+@@ -217,6 +215,13 @@ acpi_status acpi_enter_sleep_state_prep(u8 sleep_state)
+ return_ACPI_STATUS(status);
+ }
+
++ status = acpi_get_sleep_type_data(ACPI_STATE_S0,
++ &acpi_gbl_sleep_type_a_s0,
++ &acpi_gbl_sleep_type_b_s0);
++ if (ACPI_FAILURE(status)) {
++ acpi_gbl_sleep_type_a_s0 = ACPI_SLEEP_TYPE_INVALID;
++ }
++
+ /* Execute the _PTS method (Prepare To Sleep) */
+
+ arg_list.count = 1;
+diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
+index 499067daa22c6..1b8677f2ced37 100644
+--- a/drivers/acpi/acpica/nsrepair.c
++++ b/drivers/acpi/acpica/nsrepair.c
+@@ -181,8 +181,9 @@ acpi_ns_simple_repair(struct acpi_evaluate_info *info,
+ * Try to fix if there was no return object. Warning if failed to fix.
+ */
+ if (!return_object) {
+- if (expected_btypes && (!(expected_btypes & ACPI_RTYPE_NONE))) {
+- if (package_index != ACPI_NOT_PACKAGE_ELEMENT) {
++ if (expected_btypes) {
++ if (!(expected_btypes & ACPI_RTYPE_NONE) &&
++ package_index != ACPI_NOT_PACKAGE_ELEMENT) {
+ ACPI_WARN_PREDEFINED((AE_INFO,
+ info->full_pathname,
+ ACPI_WARN_ALWAYS,
+@@ -196,14 +197,15 @@ acpi_ns_simple_repair(struct acpi_evaluate_info *info,
+ if (ACPI_SUCCESS(status)) {
+ return (AE_OK); /* Repair was successful */
+ }
+- } else {
++ }
++
++ if (expected_btypes != ACPI_RTYPE_NONE) {
+ ACPI_WARN_PREDEFINED((AE_INFO,
+ info->full_pathname,
+ ACPI_WARN_ALWAYS,
+ "Missing expected return value"));
++ return (AE_AML_NO_RETURN_VALUE);
+ }
+-
+- return (AE_AML_NO_RETURN_VALUE);
+ }
+ }
+
+diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c
+index 915c2433463d7..e7c30ce06e189 100644
+--- a/drivers/acpi/acpica/nswalk.c
++++ b/drivers/acpi/acpica/nswalk.c
+@@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type,
+
+ if (start_node == ACPI_ROOT_OBJECT) {
+ start_node = acpi_gbl_root_node;
++ if (!start_node) {
++ return_ACPI_STATUS(AE_NO_NAMESPACE);
++ }
+ }
+
+ /* Null child means "get first node" */
+diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c
+index d9877153f4001..fdd503bb69c47 100644
+--- a/drivers/acpi/acpica/utcopy.c
++++ b/drivers/acpi/acpica/utcopy.c
+@@ -916,13 +916,6 @@ acpi_ut_copy_ipackage_to_ipackage(union acpi_operand_object *source_obj,
+ status = acpi_ut_walk_package_tree(source_obj, dest_obj,
+ acpi_ut_copy_ielement_to_ielement,
+ walk_state);
+- if (ACPI_FAILURE(status)) {
+-
+- /* On failure, delete the destination package object */
+-
+- acpi_ut_remove_reference(dest_obj);
+- }
+-
+ return_ACPI_STATUS(status);
+ }
+
+diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
+index e5ba9795ec696..8d7736d2d2699 100644
+--- a/drivers/acpi/acpica/utdelete.c
++++ b/drivers/acpi/acpica/utdelete.c
+@@ -422,6 +422,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action)
+ ACPI_WARNING((AE_INFO,
+ "Obj %p, Reference Count is already zero, cannot decrement\n",
+ object));
++ return;
+ }
+
+ ACPI_DEBUG_PRINT_RAW((ACPI_DB_ALLOCATIONS,
+diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
+index 19e50fcbf4d6f..45973aa6e06d4 100644
+--- a/drivers/acpi/apei/bert.c
++++ b/drivers/acpi/apei/bert.c
+@@ -30,14 +30,25 @@
+ #undef pr_fmt
+ #define pr_fmt(fmt) "BERT: " fmt
+
++#define ACPI_BERT_PRINT_MAX_RECORDS 5
++#define ACPI_BERT_PRINT_MAX_LEN 1024
++
+ static int bert_disable;
+
++/*
++ * Print "all" the error records in the BERT table, but avoid huge spam to
++ * the console if the BIOS included oversize records, or too many records.
++ * Skipping some records here does not lose anything because the full
++ * data is available to user tools in:
++ * /sys/firmware/acpi/tables/data/BERT
++ */
+ static void __init bert_print_all(struct acpi_bert_region *region,
+ unsigned int region_len)
+ {
+ struct acpi_hest_generic_status *estatus =
+ (struct acpi_hest_generic_status *)region;
+ int remain = region_len;
++ int printed = 0, skipped = 0;
+ u32 estatus_len;
+
+ while (remain >= sizeof(struct acpi_bert_region)) {
+@@ -45,21 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region,
+ if (remain < estatus_len) {
+ pr_err(FW_BUG "Truncated status block (length: %u).\n",
+ estatus_len);
+- return;
++ break;
+ }
+
+ /* No more error records. */
+ if (!estatus->block_status)
+- return;
++ break;
+
+ if (cper_estatus_check(estatus)) {
+ pr_err(FW_BUG "Invalid error record.\n");
+- return;
++ break;
+ }
+
+- pr_info_once("Error records from previous boot:\n");
+-
+- cper_estatus_print(KERN_INFO HW_ERR, estatus);
++ if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
++ printed < ACPI_BERT_PRINT_MAX_RECORDS) {
++ pr_info_once("Error records from previous boot:\n");
++ cper_estatus_print(KERN_INFO HW_ERR, estatus);
++ printed++;
++ } else {
++ skipped++;
++ }
+
+ /*
+ * Because the boot error source is "one-time polled" type,
+@@ -71,13 +87,16 @@ static void __init bert_print_all(struct acpi_bert_region *region,
+ estatus = (void *)estatus + estatus_len;
+ remain -= estatus_len;
+ }
++
++ if (skipped)
++ pr_info(HW_ERR "Skipped %d error records\n", skipped);
+ }
+
+ static int __init setup_bert_disable(char *str)
+ {
+ bert_disable = 1;
+
+- return 0;
++ return 1;
+ }
+ __setup("bert_disable", setup_bert_disable);
+
+diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
+index 2882450c443ed..2e0ab898cce3b 100644
+--- a/drivers/acpi/apei/einj.c
++++ b/drivers/acpi/apei/einj.c
+@@ -544,6 +544,8 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+ != REGION_INTERSECTS) &&
+ (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
++ != REGION_INTERSECTS) &&
++ (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED)
+ != REGION_INTERSECTS)))
+ return -EINVAL;
+
+diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
+index 242f3c2d55330..698d67cee0527 100644
+--- a/drivers/acpi/apei/erst.c
++++ b/drivers/acpi/apei/erst.c
+@@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(erst_clear);
+ static int __init setup_erst_disable(char *str)
+ {
+ erst_disable = 1;
+- return 0;
++ return 1;
+ }
+
+ __setup("erst_disable", setup_erst_disable);
+diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
+index 0c8330ed1ffd5..8678e162181f4 100644
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -163,7 +163,7 @@ static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
+ clear_fixmap(fixmap_idx);
+ }
+
+-int ghes_estatus_pool_init(int num_ghes)
++int ghes_estatus_pool_init(unsigned int num_ghes)
+ {
+ unsigned long addr, len;
+ int rc;
+@@ -985,7 +985,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
+ ghes_estatus_cache_add(generic, estatus);
+ }
+
+- if (task_work_pending && current->mm != &init_mm) {
++ if (task_work_pending && current->mm) {
+ estatus_node->task_work.func = ghes_kick_task_work;
+ estatus_node->task_work_cpu = smp_processor_id();
+ ret = task_work_add(current, &estatus_node->task_work,
+@@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
+ .remove = ghes_remove,
+ };
+
+-static int __init ghes_init(void)
++void __init ghes_init(void)
+ {
+ int rc;
+
++ sdei_init();
++
+ if (acpi_disabled)
+- return -ENODEV;
++ return;
+
+ switch (hest_disable) {
+ case HEST_NOT_FOUND:
+- return -ENODEV;
++ return;
+ case HEST_DISABLED:
+ pr_info(GHES_PFX "HEST is not enabled!\n");
+- return -EINVAL;
++ return;
+ default:
+ break;
+ }
+
+ if (ghes_disable) {
+ pr_info(GHES_PFX "GHES is not enabled!\n");
+- return -EINVAL;
++ return;
+ }
+
+ ghes_nmi_init_cxt();
+
+ rc = platform_driver_register(&ghes_platform_driver);
+ if (rc)
+- goto err;
++ return;
+
+ rc = apei_osc_setup();
+ if (rc == 0 && osc_sb_apei_support_acked)
+@@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
+ else
+ pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+-
+- return 0;
+-err:
+- return rc;
+ }
+-device_initcall(ghes_init);
+diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
+index 277f00b288d14..317bba602ad54 100644
+--- a/drivers/acpi/apei/hest.c
++++ b/drivers/acpi/apei/hest.c
+@@ -223,7 +223,7 @@ err:
+ static int __init setup_hest_disable(char *str)
+ {
+ hest_disable = HEST_DISABLED;
+- return 0;
++ return 1;
+ }
+
+ __setup("hest_disable", setup_hest_disable);
+diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
+index 3b23fb775ac45..f2f8f05662deb 100644
+--- a/drivers/acpi/arm64/iort.c
++++ b/drivers/acpi/arm64/iort.c
+@@ -1361,9 +1361,17 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res,
+ res[0].start = pmcg->page0_base_address;
+ res[0].end = pmcg->page0_base_address + SZ_4K - 1;
+ res[0].flags = IORESOURCE_MEM;
+- res[1].start = pmcg->page1_base_address;
+- res[1].end = pmcg->page1_base_address + SZ_4K - 1;
+- res[1].flags = IORESOURCE_MEM;
++ /*
++ * The initial version in DEN0049C lacked a way to describe register
++ * page 1, which makes it broken for most PMCG implementations; in
++ * that case, just let the driver fail gracefully if it expects to
++ * find a second memory resource.
++ */
++ if (node->revision > 0) {
++ res[1].start = pmcg->page1_base_address;
++ res[1].end = pmcg->page1_base_address + SZ_4K - 1;
++ res[1].flags = IORESOURCE_MEM;
++ }
+
+ if (pmcg->overflow_gsiv)
+ acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow",
+diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
+index dae91f906cea9..c7569151fd02a 100644
+--- a/drivers/acpi/battery.c
++++ b/drivers/acpi/battery.c
+@@ -53,12 +53,17 @@ static int battery_bix_broken_package;
+ static int battery_notification_delay_ms;
+ static int battery_ac_is_broken;
+ static int battery_check_pmic = 1;
++static int battery_quirk_notcharging;
+ static unsigned int cache_time = 1000;
+ module_param(cache_time, uint, 0644);
+ MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
+
+ static const struct acpi_device_id battery_device_ids[] = {
+ {"PNP0C0A", 0},
++
++ /* Microsoft Surface Go 3 */
++ {"MSHW0146", 0},
++
+ {"", 0},
+ };
+
+@@ -169,7 +174,7 @@ static int acpi_battery_is_charged(struct acpi_battery *battery)
+ return 1;
+
+ /* fallback to using design values for broken batteries */
+- if (battery->design_capacity == battery->capacity_now)
++ if (battery->design_capacity <= battery->capacity_now)
+ return 1;
+
+ /* we don't do any sort of metric based on percentages */
+@@ -217,6 +222,8 @@ static int acpi_battery_get_property(struct power_supply *psy,
+ val->intval = POWER_SUPPLY_STATUS_CHARGING;
+ else if (acpi_battery_is_charged(battery))
+ val->intval = POWER_SUPPLY_STATUS_FULL;
++ else if (battery_quirk_notcharging)
++ val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ else
+ val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+ break;
+@@ -442,7 +449,7 @@ static int extract_package(struct acpi_battery *battery,
+
+ if (element->type == ACPI_TYPE_STRING ||
+ element->type == ACPI_TYPE_BUFFER)
+- strncpy(ptr, element->string.pointer, 32);
++ strscpy(ptr, element->string.pointer, 32);
+ else if (element->type == ACPI_TYPE_INTEGER) {
+ strncpy(ptr, (u8 *)&element->integer.value,
+ sizeof(u64));
+@@ -1111,6 +1118,12 @@ battery_do_not_check_pmic_quirk(const struct dmi_system_id *d)
+ return 0;
+ }
+
++static int __init battery_quirk_not_charging(const struct dmi_system_id *d)
++{
++ battery_quirk_notcharging = 1;
++ return 0;
++}
++
+ static const struct dmi_system_id bat_dmi_table[] __initconst = {
+ {
+ /* NEC LZ750/LS */
+@@ -1155,6 +1168,27 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = {
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+ },
+ },
++ {
++ /*
++ * On Lenovo ThinkPads the BIOS specification defines
++ * a state when the bits for charging and discharging
++ * are both set to 0. That state is "Not Charging".
++ */
++ .callback = battery_quirk_not_charging,
++ .ident = "Lenovo ThinkPad",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"),
++ },
++ },
++ {
++ /* Microsoft Surface Go 3 */
++ .callback = battery_notification_delay_quirk,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"),
++ },
++ },
+ {},
+ };
+
+diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
+index fa923a9292244..7774b603a7962 100644
+--- a/drivers/acpi/bus.c
++++ b/drivers/acpi/bus.c
+@@ -98,8 +98,8 @@ int acpi_bus_get_status(struct acpi_device *device)
+ acpi_status status;
+ unsigned long long sta;
+
+- if (acpi_device_always_present(device)) {
+- acpi_set_device_status(device, ACPI_STA_DEFAULT);
++ if (acpi_device_override_status(device, &sta)) {
++ acpi_set_device_status(device, sta);
+ return 0;
+ }
+
+@@ -332,21 +332,32 @@ static void acpi_bus_osc_negotiate_platform_control(void)
+ if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
+ return;
+
+- kfree(context.ret.pointer);
++ capbuf_ret = context.ret.pointer;
++ if (context.ret.length <= OSC_SUPPORT_DWORD) {
++ kfree(context.ret.pointer);
++ return;
++ }
+
+- /* Now run _OSC again with query flag clear */
++ /*
++ * Now run _OSC again with query flag clear and with the caps
++ * supported by both the OS and the platform.
++ */
+ capbuf[OSC_QUERY_DWORD] = 0;
++ capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD];
++ kfree(context.ret.pointer);
+
+ if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
+ return;
+
+ capbuf_ret = context.ret.pointer;
+- osc_sb_apei_support_acked =
+- capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
+- osc_pc_lpi_support_confirmed =
+- capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
+- osc_sb_native_usb4_support_confirmed =
+- capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
++ if (context.ret.length > OSC_SUPPORT_DWORD) {
++ osc_sb_apei_support_acked =
++ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
++ osc_pc_lpi_support_confirmed =
++ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
++ osc_sb_native_usb4_support_confirmed =
++ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
++ }
+
+ kfree(context.ret.pointer);
+ }
+@@ -1329,6 +1340,9 @@ static int __init acpi_init(void)
+
+ pci_mmcfg_late_init();
+ acpi_iort_init();
++ acpi_viot_early_init();
++ acpi_hest_init();
++ ghes_init();
+ acpi_scan_init();
+ acpi_ec_init();
+ acpi_debugfs_init();
+diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
+index bd482108310cf..7cc9183c8dc8e 100644
+--- a/drivers/acpi/cppc_acpi.c
++++ b/drivers/acpi/cppc_acpi.c
+@@ -100,6 +100,16 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
+ (cpc)->cpc_entry.reg.space_id == \
+ ACPI_ADR_SPACE_PLATFORM_COMM)
+
++/* Check if a CPC register is in SystemMemory */
++#define CPC_IN_SYSTEM_MEMORY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
++ (cpc)->cpc_entry.reg.space_id == \
++ ACPI_ADR_SPACE_SYSTEM_MEMORY)
++
++/* Check if a CPC register is in SystemIo */
++#define CPC_IN_SYSTEM_IO(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
++ (cpc)->cpc_entry.reg.space_id == \
++ ACPI_ADR_SPACE_SYSTEM_IO)
++
+ /* Evaluates to True if reg is a NULL register descriptor */
+ #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \
+ (reg)->address == 0 && \
+@@ -411,7 +421,7 @@ bool acpi_cpc_valid(void)
+ struct cpc_desc *cpc_ptr;
+ int cpu;
+
+- for_each_possible_cpu(cpu) {
++ for_each_present_cpu(cpu) {
+ cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
+ if (!cpc_ptr)
+ return false;
+@@ -587,33 +597,6 @@ static int pcc_data_alloc(int pcc_ss_id)
+ return 0;
+ }
+
+-/* Check if CPPC revision + num_ent combination is supported */
+-static bool is_cppc_supported(int revision, int num_ent)
+-{
+- int expected_num_ent;
+-
+- switch (revision) {
+- case CPPC_V2_REV:
+- expected_num_ent = CPPC_V2_NUM_ENT;
+- break;
+- case CPPC_V3_REV:
+- expected_num_ent = CPPC_V3_NUM_ENT;
+- break;
+- default:
+- pr_debug("Firmware exports unsupported CPPC revision: %d\n",
+- revision);
+- return false;
+- }
+-
+- if (expected_num_ent != num_ent) {
+- pr_debug("Firmware exports %d entries. Expected: %d for CPPC rev:%d\n",
+- num_ent, expected_num_ent, revision);
+- return false;
+- }
+-
+- return true;
+-}
+-
+ /*
+ * An example CPC table looks like the following.
+ *
+@@ -703,12 +686,16 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+ cpc_obj = &out_obj->package.elements[0];
+ if (cpc_obj->type == ACPI_TYPE_INTEGER) {
+ num_ent = cpc_obj->integer.value;
++ if (num_ent <= 1) {
++ pr_debug("Unexpected _CPC NumEntries value (%d) for CPU:%d\n",
++ num_ent, pr->id);
++ goto out_free;
++ }
+ } else {
+ pr_debug("Unexpected entry type(%d) for NumEntries\n",
+ cpc_obj->type);
+ goto out_free;
+ }
+- cpc_ptr->num_entries = num_ent;
+
+ /* Second entry should be revision. */
+ cpc_obj = &out_obj->package.elements[1];
+@@ -719,10 +706,32 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+ cpc_obj->type);
+ goto out_free;
+ }
+- cpc_ptr->version = cpc_rev;
+
+- if (!is_cppc_supported(cpc_rev, num_ent))
++ if (cpc_rev < CPPC_V2_REV) {
++ pr_debug("Unsupported _CPC Revision (%d) for CPU:%d\n", cpc_rev,
++ pr->id);
++ goto out_free;
++ }
++
++ /*
++ * Disregard _CPC if the number of entries in the return pachage is not
++ * as expected, but support future revisions being proper supersets of
++ * the v3 and only causing more entries to be returned by _CPC.
++ */
++ if ((cpc_rev == CPPC_V2_REV && num_ent != CPPC_V2_NUM_ENT) ||
++ (cpc_rev == CPPC_V3_REV && num_ent != CPPC_V3_NUM_ENT) ||
++ (cpc_rev > CPPC_V3_REV && num_ent <= CPPC_V3_NUM_ENT)) {
++ pr_debug("Unexpected number of _CPC return package entries (%d) for CPU:%d\n",
++ num_ent, pr->id);
+ goto out_free;
++ }
++ if (cpc_rev > CPPC_V3_REV) {
++ num_ent = CPPC_V3_NUM_ENT;
++ cpc_rev = CPPC_V3_REV;
++ }
++
++ cpc_ptr->num_entries = num_ent;
++ cpc_ptr->version = cpc_rev;
+
+ /* Iterate through remaining entries in _CPC */
+ for (i = 2; i < num_ent; i++) {
+@@ -1011,7 +1020,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
+ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
+ {
+ struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
+- struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx];
++ struct cpc_register_resource *reg;
++
++ if (!cpc_desc) {
++ pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
++ return -ENODEV;
++ }
++
++ reg = &cpc_desc->cpc_regs[reg_idx];
+
+ if (CPC_IN_PCC(reg)) {
+ int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
+@@ -1366,6 +1382,9 @@ EXPORT_SYMBOL_GPL(cppc_set_perf);
+ * transition latency for performance change requests. The closest we have
+ * is the timing information from the PCCT tables which provides the info
+ * on the number and frequency of PCC commands the platform can handle.
++ *
++ * If desired_reg is in the SystemMemory or SystemIo ACPI address space,
++ * then assume there is no latency.
+ */
+ unsigned int cppc_get_transition_latency(int cpu_num)
+ {
+@@ -1391,7 +1410,9 @@ unsigned int cppc_get_transition_latency(int cpu_num)
+ return CPUFREQ_ETERNAL;
+
+ desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+- if (!CPC_IN_PCC(desired_reg))
++ if (CPC_IN_SYSTEM_MEMORY(desired_reg) || CPC_IN_SYSTEM_IO(desired_reg))
++ return 0;
++ else if (!CPC_IN_PCC(desired_reg))
+ return CPUFREQ_ETERNAL;
+
+ if (pcc_ss_id < 0)
+diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
+index e629e891d1bb3..472418a0e0cab 100644
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -166,6 +166,7 @@ struct acpi_ec_query {
+ struct transaction transaction;
+ struct work_struct work;
+ struct acpi_ec_query_handler *handler;
++ struct acpi_ec *ec;
+ };
+
+ static int acpi_ec_query(struct acpi_ec *ec, u8 *data);
+@@ -182,7 +183,6 @@ static struct workqueue_struct *ec_wq;
+ static struct workqueue_struct *ec_query_wq;
+
+ static int EC_FLAGS_CORRECT_ECDT; /* Needs ECDT port address correction */
+-static int EC_FLAGS_IGNORE_DSDT_GPE; /* Needs ECDT GPE as correction setting */
+ static int EC_FLAGS_TRUST_DSDT_GPE; /* Needs DSDT GPE as correction setting */
+ static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
+
+@@ -452,6 +452,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec)
+ ec_dbg_evt("Command(%s) submitted/blocked",
+ acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
+ ec->nr_pending_queries++;
++ ec->events_in_progress++;
+ queue_work(ec_wq, &ec->work);
+ }
+ }
+@@ -518,7 +519,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec)
+ #ifdef CONFIG_PM_SLEEP
+ static void __acpi_ec_flush_work(void)
+ {
+- drain_workqueue(ec_wq); /* flush ec->work */
++ flush_workqueue(ec_wq); /* flush ec->work */
+ flush_workqueue(ec_query_wq); /* flush queries */
+ }
+
+@@ -1100,10 +1101,11 @@ static void acpi_ec_remove_query_handlers(struct acpi_ec *ec,
+ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
+ {
+ acpi_ec_remove_query_handlers(ec, false, query_bit);
++ flush_workqueue(ec_query_wq);
+ }
+ EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler);
+
+-static struct acpi_ec_query *acpi_ec_create_query(u8 *pval)
++static struct acpi_ec_query *acpi_ec_create_query(struct acpi_ec *ec, u8 *pval)
+ {
+ struct acpi_ec_query *q;
+ struct transaction *t;
+@@ -1111,11 +1113,13 @@ static struct acpi_ec_query *acpi_ec_create_query(u8 *pval)
+ q = kzalloc(sizeof (struct acpi_ec_query), GFP_KERNEL);
+ if (!q)
+ return NULL;
++
+ INIT_WORK(&q->work, acpi_ec_event_processor);
+ t = &q->transaction;
+ t->command = ACPI_EC_COMMAND_QUERY;
+ t->rdata = pval;
+ t->rlen = 1;
++ q->ec = ec;
+ return q;
+ }
+
+@@ -1132,13 +1136,21 @@ static void acpi_ec_event_processor(struct work_struct *work)
+ {
+ struct acpi_ec_query *q = container_of(work, struct acpi_ec_query, work);
+ struct acpi_ec_query_handler *handler = q->handler;
++ struct acpi_ec *ec = q->ec;
+
+ ec_dbg_evt("Query(0x%02x) started", handler->query_bit);
++
+ if (handler->func)
+ handler->func(handler->data);
+ else if (handler->handle)
+ acpi_evaluate_object(handler->handle, NULL, NULL, NULL);
++
+ ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit);
++
++ spin_lock_irq(&ec->lock);
++ ec->queries_in_progress--;
++ spin_unlock_irq(&ec->lock);
++
+ acpi_ec_delete_query(q);
+ }
+
+@@ -1148,7 +1160,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data)
+ int result;
+ struct acpi_ec_query *q;
+
+- q = acpi_ec_create_query(&value);
++ q = acpi_ec_create_query(ec, &value);
+ if (!q)
+ return -ENOMEM;
+
+@@ -1170,19 +1182,20 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data)
+ }
+
+ /*
+- * It is reported that _Qxx are evaluated in a parallel way on
+- * Windows:
++ * It is reported that _Qxx are evaluated in a parallel way on Windows:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=94411
+ *
+- * Put this log entry before schedule_work() in order to make
+- * it appearing before any other log entries occurred during the
+- * work queue execution.
++ * Put this log entry before queue_work() to make it appear in the log
++ * before any other messages emitted during workqueue handling.
+ */
+ ec_dbg_evt("Query(0x%02x) scheduled", value);
+- if (!queue_work(ec_query_wq, &q->work)) {
+- ec_dbg_evt("Query(0x%02x) overlapped", value);
+- result = -EBUSY;
+- }
++
++ spin_lock_irq(&ec->lock);
++
++ ec->queries_in_progress++;
++ queue_work(ec_query_wq, &q->work);
++
++ spin_unlock_irq(&ec->lock);
+
+ err_exit:
+ if (result)
+@@ -1240,6 +1253,10 @@ static void acpi_ec_event_handler(struct work_struct *work)
+ ec_dbg_evt("Event stopped");
+
+ acpi_ec_check_event(ec);
++
++ spin_lock_irqsave(&ec->lock, flags);
++ ec->events_in_progress--;
++ spin_unlock_irqrestore(&ec->lock, flags);
+ }
+
+ static void acpi_ec_handle_interrupt(struct acpi_ec *ec)
+@@ -1375,24 +1392,16 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
+ if (ec->data_addr == 0 || ec->command_addr == 0)
+ return AE_OK;
+
+- if (boot_ec && boot_ec_is_ecdt && EC_FLAGS_IGNORE_DSDT_GPE) {
+- /*
+- * Always inherit the GPE number setting from the ECDT
+- * EC.
+- */
+- ec->gpe = boot_ec->gpe;
+- } else {
+- /* Get GPE bit assignment (EC events). */
+- /* TODO: Add support for _GPE returning a package */
+- status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp);
+- if (ACPI_SUCCESS(status))
+- ec->gpe = tmp;
++ /* Get GPE bit assignment (EC events). */
++ /* TODO: Add support for _GPE returning a package */
++ status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp);
++ if (ACPI_SUCCESS(status))
++ ec->gpe = tmp;
++ /*
++ * Errors are non-fatal, allowing for ACPI Reduced Hardware
++ * platforms which use GpioInt instead of GPE.
++ */
+
+- /*
+- * Errors are non-fatal, allowing for ACPI Reduced Hardware
+- * platforms which use GpioInt instead of GPE.
+- */
+- }
+ /* Use the global lock for all EC transactions? */
+ tmp = 0;
+ acpi_evaluate_integer(handle, "_GLK", NULL, &tmp);
+@@ -1830,60 +1839,12 @@ static int ec_honor_dsdt_gpe(const struct dmi_system_id *id)
+ return 0;
+ }
+
+-/*
+- * Some DSDTs contain wrong GPE setting.
+- * Asus FX502VD/VE, GL702VMK, X550VXK, X580VD
+- * https://bugzilla.kernel.org/show_bug.cgi?id=195651
+- */
+-static int ec_honor_ecdt_gpe(const struct dmi_system_id *id)
+-{
+- pr_debug("Detected system needing ignore DSDT GPE setting.\n");
+- EC_FLAGS_IGNORE_DSDT_GPE = 1;
+- return 0;
+-}
+-
+ static const struct dmi_system_id ec_dmi_table[] __initconst = {
+ {
+ ec_correct_ecdt, "MSI MS-171F", {
+ DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MS-171F"),}, NULL},
+ {
+- ec_honor_ecdt_gpe, "ASUS FX502VD", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "FX502VD"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUS FX502VE", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "FX502VE"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUS GL702VMK", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "GL702VMK"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X505BA", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X505BA"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X505BP", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X505BP"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X542BA", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X542BA"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X542BP", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X542BP"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUS X550VXK", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X550VXK"),}, NULL},
+- {
+- ec_honor_ecdt_gpe, "ASUS X580VD", {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X580VD"),}, NULL},
+- {
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=209989 */
+ ec_honor_dsdt_gpe, "HP Pavilion Gaming Laptop 15-cx0xxx", {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+@@ -2021,6 +1982,7 @@ void acpi_ec_set_gpe_wake_mask(u8 action)
+
+ bool acpi_ec_dispatch_gpe(void)
+ {
++ bool work_in_progress;
+ u32 ret;
+
+ if (!first_ec)
+@@ -2041,8 +2003,19 @@ bool acpi_ec_dispatch_gpe(void)
+ if (ret == ACPI_INTERRUPT_HANDLED)
+ pm_pr_dbg("ACPI EC GPE dispatched\n");
+
+- /* Flush the event and query workqueues. */
+- acpi_ec_flush_work();
++ /* Drain EC work. */
++ do {
++ acpi_ec_flush_work();
++
++ pm_pr_dbg("ACPI EC work flushed\n");
++
++ spin_lock_irq(&first_ec->lock);
++
++ work_in_progress = first_ec->events_in_progress +
++ first_ec->queries_in_progress > 0;
++
++ spin_unlock_irq(&first_ec->lock);
++ } while (work_in_progress && !pm_wakeup_pending());
+
+ return false;
+ }
+@@ -2138,13 +2111,6 @@ static const struct dmi_system_id acpi_ec_no_wakeup[] = {
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "Thinkpad X1 Carbon 6th"),
+ },
+ },
+- {
+- .ident = "ThinkPad X1 Carbon 6th",
+- .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Carbon 6th"),
+- },
+- },
+ {
+ .ident = "ThinkPad X1 Yoga 3rd",
+ .matches = {
+diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
+index d91b560e88674..54b2be94d23dc 100644
+--- a/drivers/acpi/internal.h
++++ b/drivers/acpi/internal.h
+@@ -183,6 +183,8 @@ struct acpi_ec {
+ struct work_struct work;
+ unsigned long timestamp;
+ unsigned long nr_pending_queries;
++ unsigned int events_in_progress;
++ unsigned int queries_in_progress;
+ bool busy_polling;
+ unsigned int polling_guard;
+ };
+diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
+index 7dd80acf92c78..2575d6c51f898 100644
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -3676,8 +3676,8 @@ void acpi_nfit_shutdown(void *data)
+
+ mutex_lock(&acpi_desc->init_mutex);
+ set_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
+- cancel_delayed_work_sync(&acpi_desc->dwork);
+ mutex_unlock(&acpi_desc->init_mutex);
++ cancel_delayed_work_sync(&acpi_desc->dwork);
+
+ /*
+ * Bounce the nvdimm bus lock to make sure any in-flight
+diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
+index c3d783aca196f..b42653707fdcd 100644
+--- a/drivers/acpi/numa/hmat.c
++++ b/drivers/acpi/numa/hmat.c
+@@ -563,17 +563,26 @@ static int initiator_cmp(void *priv, const struct list_head *a,
+ {
+ struct memory_initiator *ia;
+ struct memory_initiator *ib;
+- unsigned long *p_nodes = priv;
+
+ ia = list_entry(a, struct memory_initiator, node);
+ ib = list_entry(b, struct memory_initiator, node);
+
+- set_bit(ia->processor_pxm, p_nodes);
+- set_bit(ib->processor_pxm, p_nodes);
+-
+ return ia->processor_pxm - ib->processor_pxm;
+ }
+
++static int initiators_to_nodemask(unsigned long *p_nodes)
++{
++ struct memory_initiator *initiator;
++
++ if (list_empty(&initiators))
++ return -ENXIO;
++
++ list_for_each_entry(initiator, &initiators, node)
++ set_bit(initiator->processor_pxm, p_nodes);
++
++ return 0;
++}
++
+ static void hmat_register_target_initiators(struct memory_target *target)
+ {
+ static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
+@@ -610,7 +619,10 @@ static void hmat_register_target_initiators(struct memory_target *target)
+ * initiators.
+ */
+ bitmap_zero(p_nodes, MAX_NUMNODES);
+- list_sort(p_nodes, &initiators, initiator_cmp);
++ list_sort(NULL, &initiators, initiator_cmp);
++ if (initiators_to_nodemask(p_nodes) < 0)
++ return;
++
+ if (!access0done) {
+ for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
+ loc = localities_types[i];
+@@ -644,8 +656,9 @@ static void hmat_register_target_initiators(struct memory_target *target)
+
+ /* Access 1 ignores Generic Initiators */
+ bitmap_zero(p_nodes, MAX_NUMNODES);
+- list_sort(p_nodes, &initiators, initiator_cmp);
+- best = 0;
++ if (initiators_to_nodemask(p_nodes) < 0)
++ return;
++
+ for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
+ loc = localities_types[i];
+ if (!loc)
+diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
+index 53cab975f612c..63b98eae5e75e 100644
+--- a/drivers/acpi/pci_mcfg.c
++++ b/drivers/acpi/pci_mcfg.c
+@@ -41,6 +41,8 @@ struct mcfg_fixup {
+ static struct mcfg_fixup mcfg_quirks[] = {
+ /* { OEM_ID, OEM_TABLE_ID, REV, SEGMENT, BUS_RANGE, ops, cfgres }, */
+
++#ifdef CONFIG_ARM64
++
+ #define AL_ECAM(table_id, rev, seg, ops) \
+ { "AMAZON", table_id, rev, seg, MCFG_BUS_ANY, ops }
+
+@@ -169,6 +171,7 @@ static struct mcfg_fixup mcfg_quirks[] = {
+ ALTRA_ECAM_QUIRK(1, 13),
+ ALTRA_ECAM_QUIRK(1, 14),
+ ALTRA_ECAM_QUIRK(1, 15),
++#endif /* ARM64 */
+ };
+
+ static char mcfg_oem_id[ACPI_OEM_ID_SIZE];
+diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
+index d7deedf3548e0..223aa010dd8da 100644
+--- a/drivers/acpi/pci_root.c
++++ b/drivers/acpi/pci_root.c
+@@ -22,8 +22,6 @@
+ #include <linux/slab.h>
+ #include <linux/dmi.h>
+ #include <linux/platform_data/x86/apple.h>
+-#include <acpi/apei.h> /* for acpi_hest_init() */
+-
+ #include "internal.h"
+
+ #define ACPI_PCI_ROOT_CLASS "pci_bridge"
+@@ -938,7 +936,6 @@ out_release_info:
+
+ void __init acpi_pci_root_init(void)
+ {
+- acpi_hest_init();
+ if (acpi_pci_disabled)
+ return;
+
+diff --git a/drivers/acpi/pmic/intel_pmic.c b/drivers/acpi/pmic/intel_pmic.c
+index a371f273f99dd..9cde299eba880 100644
+--- a/drivers/acpi/pmic/intel_pmic.c
++++ b/drivers/acpi/pmic/intel_pmic.c
+@@ -211,31 +211,36 @@ static acpi_status intel_pmic_regs_handler(u32 function,
+ void *handler_context, void *region_context)
+ {
+ struct intel_pmic_opregion *opregion = region_context;
+- int result = 0;
++ int result = -EINVAL;
++
++ if (function == ACPI_WRITE) {
++ switch (address) {
++ case 0:
++ return AE_OK;
++ case 1:
++ opregion->ctx.addr |= (*value64 & 0xff) << 8;
++ return AE_OK;
++ case 2:
++ opregion->ctx.addr |= *value64 & 0xff;
++ return AE_OK;
++ case 3:
++ opregion->ctx.val = *value64 & 0xff;
++ return AE_OK;
++ case 4:
++ if (*value64) {
++ result = regmap_write(opregion->regmap, opregion->ctx.addr,
++ opregion->ctx.val);
++ } else {
++ result = regmap_read(opregion->regmap, opregion->ctx.addr,
++ &opregion->ctx.val);
++ }
++ opregion->ctx.addr = 0;
++ }
++ }
+
+- switch (address) {
+- case 0:
+- return AE_OK;
+- case 1:
+- opregion->ctx.addr |= (*value64 & 0xff) << 8;
+- return AE_OK;
+- case 2:
+- opregion->ctx.addr |= *value64 & 0xff;
++ if (function == ACPI_READ && address == 3) {
++ *value64 = opregion->ctx.val;
+ return AE_OK;
+- case 3:
+- opregion->ctx.val = *value64 & 0xff;
+- return AE_OK;
+- case 4:
+- if (*value64) {
+- result = regmap_write(opregion->regmap, opregion->ctx.addr,
+- opregion->ctx.val);
+- } else {
+- result = regmap_read(opregion->regmap, opregion->ctx.addr,
+- &opregion->ctx.val);
+- if (result == 0)
+- *value64 = opregion->ctx.val;
+- }
+- memset(&opregion->ctx, 0x00, sizeof(opregion->ctx));
+ }
+
+ if (result < 0) {
+diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
+index f0ed4414edb1f..c95eedd58f5bf 100644
+--- a/drivers/acpi/power.c
++++ b/drivers/acpi/power.c
+@@ -52,7 +52,6 @@ struct acpi_power_resource {
+ u32 order;
+ unsigned int ref_count;
+ u8 state;
+- bool wakeup_enabled;
+ struct mutex resource_lock;
+ struct list_head dependents;
+ };
+@@ -615,20 +614,19 @@ int acpi_power_wakeup_list_init(struct list_head *list, int *system_level_p)
+
+ list_for_each_entry(entry, list, node) {
+ struct acpi_power_resource *resource = entry->resource;
+- int result;
+ u8 state;
+
+ mutex_lock(&resource->resource_lock);
+
+- result = acpi_power_get_state(resource, &state);
+- if (result) {
+- mutex_unlock(&resource->resource_lock);
+- return result;
+- }
+- if (state == ACPI_POWER_RESOURCE_STATE_ON) {
+- resource->ref_count++;
+- resource->wakeup_enabled = true;
+- }
++ /*
++ * Make sure that the power resource state and its reference
++ * counter value are consistent with each other.
++ */
++ if (!resource->ref_count &&
++ !acpi_power_get_state(resource, &state) &&
++ state == ACPI_POWER_RESOURCE_STATE_ON)
++ __acpi_power_off(resource);
++
+ if (system_level > resource->system_level)
+ system_level = resource->system_level;
+
+@@ -711,7 +709,6 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
+ */
+ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
+ {
+- struct acpi_power_resource_entry *entry;
+ int err = 0;
+
+ if (!dev || !dev->wakeup.flags.valid)
+@@ -722,26 +719,13 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
+ if (dev->wakeup.prepare_count++)
+ goto out;
+
+- list_for_each_entry(entry, &dev->wakeup.resources, node) {
+- struct acpi_power_resource *resource = entry->resource;
+-
+- mutex_lock(&resource->resource_lock);
+-
+- if (!resource->wakeup_enabled) {
+- err = acpi_power_on_unlocked(resource);
+- if (!err)
+- resource->wakeup_enabled = true;
+- }
+-
+- mutex_unlock(&resource->resource_lock);
+-
+- if (err) {
+- dev_err(&dev->dev,
+- "Cannot turn wakeup power resources on\n");
+- dev->wakeup.flags.valid = 0;
+- goto out;
+- }
++ err = acpi_power_on_list(&dev->wakeup.resources);
++ if (err) {
++ dev_err(&dev->dev, "Cannot turn on wakeup power resources\n");
++ dev->wakeup.flags.valid = 0;
++ goto out;
+ }
++
+ /*
+ * Passing 3 as the third argument below means the device may be
+ * put into arbitrary power state afterward.
+@@ -771,39 +755,31 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
+
+ mutex_lock(&acpi_device_lock);
+
+- if (--dev->wakeup.prepare_count > 0)
++ /* Do nothing if wakeup power has not been enabled for this device. */
++ if (dev->wakeup.prepare_count <= 0)
+ goto out;
+
+- /*
+- * Executing the code below even if prepare_count is already zero when
+- * the function is called may be useful, for example for initialisation.
+- */
+- if (dev->wakeup.prepare_count < 0)
+- dev->wakeup.prepare_count = 0;
++ if (--dev->wakeup.prepare_count > 0)
++ goto out;
+
+ err = acpi_device_sleep_wake(dev, 0, 0, 0);
+ if (err)
+ goto out;
+
++ /*
++ * All of the power resources in the list need to be turned off even if
++ * there are errors.
++ */
+ list_for_each_entry(entry, &dev->wakeup.resources, node) {
+- struct acpi_power_resource *resource = entry->resource;
+-
+- mutex_lock(&resource->resource_lock);
+-
+- if (resource->wakeup_enabled) {
+- err = acpi_power_off_unlocked(resource);
+- if (!err)
+- resource->wakeup_enabled = false;
+- }
+-
+- mutex_unlock(&resource->resource_lock);
++ int ret;
+
+- if (err) {
+- dev_err(&dev->dev,
+- "Cannot turn wakeup power resources off\n");
+- dev->wakeup.flags.valid = 0;
+- break;
+- }
++ ret = acpi_power_off(entry->resource);
++ if (ret && !err)
++ err = ret;
++ }
++ if (err) {
++ dev_err(&dev->dev, "Cannot turn off wakeup power resources\n");
++ dev->wakeup.flags.valid = 0;
+ }
+
+ out:
+diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
+index 89c22bc550570..09c0af8a46f0a 100644
+--- a/drivers/acpi/prmt.c
++++ b/drivers/acpi/prmt.c
+@@ -219,6 +219,11 @@ static acpi_status acpi_platformrt_space_handler(u32 function,
+ efi_status_t status;
+ struct prm_context_buffer context;
+
++ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
++ pr_err_ratelimited("PRM: EFI runtime services no longer available\n");
++ return AE_NO_HANDLER;
++ }
++
+ /*
+ * The returned acpi_status will always be AE_OK. Error values will be
+ * saved in the first byte of the PRM message buffer to be used by ASL.
+@@ -308,6 +313,11 @@ void __init init_prmt(void)
+
+ pr_info("PRM: found %u modules\n", mc);
+
++ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
++ pr_err("PRM: EFI runtime services unavailable\n");
++ return;
++ }
++
+ status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT,
+ ACPI_ADR_SPACE_PLATFORM_RT,
+ &acpi_platformrt_space_handler,
+diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
+index f37fba9e5ba0b..e9116db1e3527 100644
+--- a/drivers/acpi/processor_idle.c
++++ b/drivers/acpi/processor_idle.c
+@@ -531,10 +531,27 @@ static void wait_for_freeze(void)
+ /* No delay is needed if we are in guest */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return;
++ /*
++ * Modern (>=Nehalem) Intel systems use ACPI via intel_idle,
++ * not this code. Assume that any Intel systems using this
++ * are ancient and may need the dummy wait. This also assumes
++ * that the motivating chipset issue was Intel-only.
++ */
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
++ return;
+ #endif
+- /* Dummy wait op - must do something useless after P_LVL2 read
+- because chipsets cannot guarantee that STPCLK# signal
+- gets asserted in time to freeze execution properly. */
++ /*
++ * Dummy wait op - must do something useless after P_LVL2 read
++ * because chipsets cannot guarantee that STPCLK# signal gets
++ * asserted in time to freeze execution properly
++ *
++ * This workaround has been in place since the original ACPI
++ * implementation was merged, circa 2002.
++ *
++ * If a profile is pointing to this instruction, please first
++ * consider moving your system to a more modern idle
++ * mechanism.
++ */
+ inl(acpi_gbl_FADT.xpm_timer_block.address);
+ }
+
+@@ -604,7 +621,7 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
+ * @cx: Target state context
+ * @index: index of target state
+ */
+-static int acpi_idle_enter_bm(struct cpuidle_driver *drv,
++static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv,
+ struct acpi_processor *pr,
+ struct acpi_processor_cx *cx,
+ int index)
+@@ -661,7 +678,7 @@ static int acpi_idle_enter_bm(struct cpuidle_driver *drv,
+ return index;
+ }
+
+-static int acpi_idle_enter(struct cpuidle_device *dev,
++static int __cpuidle acpi_idle_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+ {
+ struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
+@@ -690,7 +707,7 @@ static int acpi_idle_enter(struct cpuidle_device *dev,
+ return index;
+ }
+
+-static int acpi_idle_enter_s2idle(struct cpuidle_device *dev,
++static int __cpuidle acpi_idle_enter_s2idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+ {
+ struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
+@@ -789,9 +806,11 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr)
+ state->enter = acpi_idle_enter;
+
+ state->flags = 0;
+- if (cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2) {
++ if (cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 ||
++ cx->type == ACPI_STATE_C3) {
+ state->enter_dead = acpi_idle_play_dead;
+- drv->safe_state_index = count;
++ if (cx->type != ACPI_STATE_C3)
++ drv->safe_state_index = count;
+ }
+ /*
+ * Halt-induced C1 is not good for ->enter_s2idle, because it
+@@ -1075,6 +1094,11 @@ static int flatten_lpi_states(struct acpi_processor *pr,
+ return 0;
+ }
+
++int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
++{
++ return -EOPNOTSUPP;
++}
++
+ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
+ {
+ int ret, i;
+@@ -1083,6 +1107,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
+ struct acpi_device *d = NULL;
+ struct acpi_lpi_states_array info[2], *tmp, *prev, *curr;
+
++ /* make sure our architecture has support */
++ ret = acpi_processor_ffh_lpi_probe(pr->id);
++ if (ret == -EOPNOTSUPP)
++ return ret;
++
+ if (!osc_pc_lpi_support_confirmed)
+ return -EOPNOTSUPP;
+
+@@ -1134,11 +1163,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
+ return 0;
+ }
+
+-int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
+-{
+- return -ENODEV;
+-}
+-
+ int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
+ {
+ return -ENODEV;
+diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
+index 8c3f82c9fff35..18fb04523f93b 100644
+--- a/drivers/acpi/processor_pdc.c
++++ b/drivers/acpi/processor_pdc.c
+@@ -14,6 +14,8 @@
+ #include <linux/acpi.h>
+ #include <acpi/processor.h>
+
++#include <xen/xen.h>
++
+ #include "internal.h"
+
+ static bool __init processor_physically_present(acpi_handle handle)
+@@ -47,6 +49,15 @@ static bool __init processor_physically_present(acpi_handle handle)
+ return false;
+ }
+
++ if (xen_initial_domain())
++ /*
++ * When running as a Xen dom0 the number of processors Linux
++ * sees can be different from the real number of processors on
++ * the system, and we still need to execute _PDC for all of
++ * them.
++ */
++ return xen_processor_present(acpi_id);
++
+ type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
+ cpuid = acpi_get_cpuid(handle, type, acpi_id);
+
+diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
+index 757a98f6d7a24..1696700fd2fb5 100644
+--- a/drivers/acpi/processor_perflib.c
++++ b/drivers/acpi/processor_perflib.c
+@@ -53,6 +53,8 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
+ {
+ acpi_status status = 0;
+ unsigned long long ppc = 0;
++ s32 qos_value;
++ int index;
+ int ret;
+
+ if (!pr)
+@@ -72,17 +74,30 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
+ }
+ }
+
++ index = ppc;
++
++ if (pr->performance_platform_limit == index ||
++ ppc >= pr->performance->state_count)
++ return 0;
++
+ pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
+- (int)ppc, ppc ? "" : "not");
++ index, index ? "is" : "is not");
+
+- pr->performance_platform_limit = (int)ppc;
++ pr->performance_platform_limit = index;
+
+- if (ppc >= pr->performance->state_count ||
+- unlikely(!freq_qos_request_active(&pr->perflib_req)))
++ if (unlikely(!freq_qos_request_active(&pr->perflib_req)))
+ return 0;
+
+- ret = freq_qos_update_request(&pr->perflib_req,
+- pr->performance->states[ppc].core_frequency * 1000);
++ /*
++ * If _PPC returns 0, it means that all of the available states can be
++ * used ("no limit").
++ */
++ if (index == 0)
++ qos_value = FREQ_QOS_MAX_DEFAULT_VALUE;
++ else
++ qos_value = pr->performance->states[index].core_frequency * 1000;
++
++ ret = freq_qos_update_request(&pr->perflib_req, qos_value);
+ if (ret < 0) {
+ pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n",
+ pr->id, ret);
+@@ -165,9 +180,16 @@ void acpi_processor_ppc_init(struct cpufreq_policy *policy)
+ if (!pr)
+ continue;
+
++ /*
++ * Reset performance_platform_limit in case there is a stale
++ * value in it, so as to make it match the "no limit" QoS value
++ * below.
++ */
++ pr->performance_platform_limit = 0;
++
+ ret = freq_qos_add_request(&policy->constraints,
+- &pr->perflib_req,
+- FREQ_QOS_MAX, INT_MAX);
++ &pr->perflib_req, FREQ_QOS_MAX,
++ FREQ_QOS_MAX_DEFAULT_VALUE);
+ if (ret < 0)
+ pr_err("Failed to add freq constraint for CPU%d (%d)\n",
+ cpu, ret);
+diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
+index a3d34e3f9f94b..921a0b5a58e58 100644
+--- a/drivers/acpi/processor_thermal.c
++++ b/drivers/acpi/processor_thermal.c
+@@ -144,7 +144,7 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
+ unsigned int cpu;
+
+ for_each_cpu(cpu, policy->related_cpus) {
+- struct acpi_processor *pr = per_cpu(processors, policy->cpu);
++ struct acpi_processor *pr = per_cpu(processors, cpu);
+
+ if (pr)
+ freq_qos_remove_request(&pr->thermal_req);
+diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
+index e312ebaed8db4..488915328646e 100644
+--- a/drivers/acpi/property.c
++++ b/drivers/acpi/property.c
+@@ -155,10 +155,10 @@ static bool acpi_nondev_subnode_ok(acpi_handle scope,
+ return acpi_nondev_subnode_data_ok(handle, link, list, parent);
+ }
+
+-static int acpi_add_nondev_subnodes(acpi_handle scope,
+- const union acpi_object *links,
+- struct list_head *list,
+- struct fwnode_handle *parent)
++static bool acpi_add_nondev_subnodes(acpi_handle scope,
++ const union acpi_object *links,
++ struct list_head *list,
++ struct fwnode_handle *parent)
+ {
+ bool ret = false;
+ int i;
+@@ -433,6 +433,16 @@ void acpi_init_properties(struct acpi_device *adev)
+ acpi_extract_apple_properties(adev);
+ }
+
++static void acpi_free_device_properties(struct list_head *list)
++{
++ struct acpi_device_properties *props, *tmp;
++
++ list_for_each_entry_safe(props, tmp, list, list) {
++ list_del(&props->list);
++ kfree(props);
++ }
++}
++
+ static void acpi_destroy_nondev_subnodes(struct list_head *list)
+ {
+ struct acpi_data_node *dn, *next;
+@@ -445,22 +455,18 @@ static void acpi_destroy_nondev_subnodes(struct list_head *list)
+ wait_for_completion(&dn->kobj_done);
+ list_del(&dn->sibling);
+ ACPI_FREE((void *)dn->data.pointer);
++ acpi_free_device_properties(&dn->data.properties);
+ kfree(dn);
+ }
+ }
+
+ void acpi_free_properties(struct acpi_device *adev)
+ {
+- struct acpi_device_properties *props, *tmp;
+-
+ acpi_destroy_nondev_subnodes(&adev->data.subnodes);
+ ACPI_FREE((void *)adev->data.pointer);
+ adev->data.of_compatible = NULL;
+ adev->data.pointer = NULL;
+- list_for_each_entry_safe(props, tmp, &adev->data.properties, list) {
+- list_del(&props->list);
+- kfree(props);
+- }
++ acpi_free_device_properties(&adev->data.properties);
+ }
+
+ /**
+@@ -685,7 +691,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
+ */
+ if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) {
+ if (index)
+- return -EINVAL;
++ return -ENOENT;
+
+ ret = acpi_bus_get_device(obj->reference.handle, &device);
+ if (ret)
+@@ -1090,15 +1096,10 @@ struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode)
+ /* All data nodes have parent pointer so just return that */
+ return to_acpi_data_node(fwnode)->parent;
+ } else if (is_acpi_device_node(fwnode)) {
+- acpi_handle handle, parent_handle;
++ struct device *dev = to_acpi_device_node(fwnode)->dev.parent;
+
+- handle = to_acpi_device_node(fwnode)->handle;
+- if (ACPI_SUCCESS(acpi_get_parent(handle, &parent_handle))) {
+- struct acpi_device *adev;
+-
+- if (!acpi_bus_get_device(parent_handle, &adev))
+- return acpi_fwnode_handle(adev);
+- }
++ if (dev)
++ return acpi_fwnode_handle(to_acpi_device(dev));
+ }
+
+ return NULL;
+diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
+index ee78a210c6068..b0c7ae50a8d79 100644
+--- a/drivers/acpi/resource.c
++++ b/drivers/acpi/resource.c
+@@ -16,6 +16,7 @@
+ #include <linux/ioport.h>
+ #include <linux/slab.h>
+ #include <linux/irq.h>
++#include <linux/dmi.h>
+
+ #ifdef CONFIG_X86
+ #define valid_IRQ(i) (((i) != 0) && ((i) != 2))
+@@ -380,9 +381,157 @@ unsigned int acpi_dev_get_irq_type(int triggering, int polarity)
+ }
+ EXPORT_SYMBOL_GPL(acpi_dev_get_irq_type);
+
++static const struct dmi_system_id medion_laptop[] = {
++ {
++ .ident = "MEDION P15651",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "MEDION"),
++ DMI_MATCH(DMI_BOARD_NAME, "M15T"),
++ },
++ },
++ {
++ .ident = "MEDION S17405",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "MEDION"),
++ DMI_MATCH(DMI_BOARD_NAME, "M17T"),
++ },
++ },
++ {
++ .ident = "MEDION S17413",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "MEDION"),
++ DMI_MATCH(DMI_BOARD_NAME, "M1xA"),
++ },
++ },
++ { }
++};
++
++static const struct dmi_system_id asus_laptop[] = {
++ {
++ .ident = "Asus Vivobook K3402ZA",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_BOARD_NAME, "K3402ZA"),
++ },
++ },
++ {
++ .ident = "Asus Vivobook K3502ZA",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_BOARD_NAME, "K3502ZA"),
++ },
++ },
++ { }
++};
++
++static const struct dmi_system_id lenovo_laptop[] = {
++ {
++ .ident = "LENOVO IdeaPad Flex 5 14ALC7",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "82R9"),
++ },
++ },
++ {
++ .ident = "LENOVO IdeaPad Flex 5 16ALC7",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "82RA"),
++ },
++ },
++ { }
++};
++
++static const struct dmi_system_id tongfang_gm_rg[] = {
++ {
++ .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
++ },
++ },
++ { }
++};
++
++static const struct dmi_system_id maingear_laptop[] = {
++ {
++ .ident = "MAINGEAR Vector Pro 2 15",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"),
++ }
++ },
++ {
++ .ident = "MAINGEAR Vector Pro 2 17",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-17A3070T"),
++ },
++ },
++ { }
++};
++
++static const struct dmi_system_id lg_laptop[] = {
++ {
++ .ident = "LG Electronics 17U70P",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
++ DMI_MATCH(DMI_BOARD_NAME, "17U70P"),
++ },
++ },
++ { }
++};
++
++struct irq_override_cmp {
++ const struct dmi_system_id *system;
++ unsigned char irq;
++ unsigned char triggering;
++ unsigned char polarity;
++ unsigned char shareable;
++ bool override;
++};
++
++static const struct irq_override_cmp override_table[] = {
++ { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
++ { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
++ { lenovo_laptop, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
++ { lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
++ { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
++ { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
++ { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
++};
++
++static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
++ u8 shareable)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(override_table); i++) {
++ const struct irq_override_cmp *entry = &override_table[i];
++
++ if (dmi_check_system(entry->system) &&
++ entry->irq == gsi &&
++ entry->triggering == triggering &&
++ entry->polarity == polarity &&
++ entry->shareable == shareable)
++ return entry->override;
++ }
++
++#ifdef CONFIG_X86
++ /*
++ * IRQ override isn't needed on modern AMD Zen systems and
++ * this override breaks active low IRQs on AMD Ryzen 6000 and
++ * newer systems. Skip it.
++ */
++ if (boot_cpu_has(X86_FEATURE_ZEN))
++ return false;
++#endif
++
++ return true;
++}
++
+ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
+ u8 triggering, u8 polarity, u8 shareable,
+- bool legacy)
++ bool check_override)
+ {
+ int irq, p, t;
+
+@@ -401,7 +550,9 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
+ * using extended IRQ descriptors we take the IRQ configuration
+ * from _CRS directly.
+ */
+- if (legacy && !acpi_get_override_irq(gsi, &t, &p)) {
++ if (check_override &&
++ acpi_dev_irq_override(gsi, triggering, polarity, shareable) &&
++ !acpi_get_override_irq(gsi, &t, &p)) {
+ u8 trig = t ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+ u8 pol = p ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+
+@@ -656,6 +807,23 @@ int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list)
+ }
+ EXPORT_SYMBOL_GPL(acpi_dev_get_dma_resources);
+
++/**
++ * acpi_dev_get_memory_resources - Get current memory resources of a device.
++ * @adev: ACPI device node to get the resources for.
++ * @list: Head of the resultant list of resources (must be empty).
++ *
++ * This is a helper function that locates all memory type resources of @adev
++ * with acpi_dev_get_resources().
++ *
++ * The number of resources in the output list is returned on success, an error
++ * code reflecting the error condition is returned otherwise.
++ */
++int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list)
++{
++ return acpi_dev_get_resources(adev, list, is_memory, NULL);
++}
++EXPORT_SYMBOL_GPL(acpi_dev_get_memory_resources);
++
+ /**
+ * acpi_dev_filter_resource_type - Filter ACPI resource according to resource
+ * types
+diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
+index 5b54c80b9d32a..ae74720888dbf 100644
+--- a/drivers/acpi/scan.c
++++ b/drivers/acpi/scan.c
+@@ -793,6 +793,7 @@ static bool acpi_info_matches_ids(struct acpi_device_info *info,
+ static const char * const acpi_ignore_dep_ids[] = {
+ "PNP0D80", /* Windows-compatible System Power Management Controller */
+ "INT33BD", /* Intel Baytrail Mailbox Device */
++ "LATT2021", /* Lattice FW Update Client Driver */
+ NULL
+ };
+
+@@ -1690,6 +1691,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
+ {
+ struct list_head resource_list;
+ bool is_serial_bus_slave = false;
++ static const struct acpi_device_id ignore_serial_bus_ids[] = {
+ /*
+ * These devices have multiple I2cSerialBus resources and an i2c-client
+ * must be instantiated for each, each with its own i2c_device_id.
+@@ -1698,11 +1700,18 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
+ * drivers/platform/x86/i2c-multi-instantiate.c driver, which knows
+ * which i2c_device_id to use for each resource.
+ */
+- static const struct acpi_device_id i2c_multi_instantiate_ids[] = {
+ {"BSG1160", },
+ {"BSG2150", },
+ {"INT33FE", },
+ {"INT3515", },
++ /*
++ * HIDs of device with an UartSerialBusV2 resource for which userspace
++ * expects a regular tty cdev to be created (instead of the in kernel
++ * serdev) and which have a kernel driver which expects a platform_dev
++ * such as the rfkill-gpio driver.
++ */
++ {"BCM4752", },
++ {"LNV4752", },
+ {}
+ };
+
+@@ -1716,8 +1725,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
+ fwnode_property_present(&device->fwnode, "baud")))
+ return true;
+
+- /* Instantiate a pdev for the i2c-multi-instantiate drv to bind to */
+- if (!acpi_match_device_ids(device, i2c_multi_instantiate_ids))
++ if (!acpi_match_device_ids(device, ignore_serial_bus_ids))
+ return false;
+
+ INIT_LIST_HEAD(&resource_list);
+diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
+index 3023224515abe..b277e25b276ce 100644
+--- a/drivers/acpi/sleep.c
++++ b/drivers/acpi/sleep.c
+@@ -361,6 +361,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "80E3"),
+ },
+ },
++ {
++ .callback = init_nvs_save_s3,
++ .ident = "Lenovo G40-45",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "80E1"),
++ },
++ },
+ /*
+ * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using
+ * the Low Power S0 Idle firmware interface (see
+@@ -374,6 +382,18 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"),
+ },
+ },
++ /*
++ * ASUS B1400CEAE hangs on resume from suspend (see
++ * https://bugzilla.kernel.org/show_bug.cgi?id=215742).
++ */
++ {
++ .callback = init_default_s3,
++ .ident = "ASUS B1400CEAE",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"),
++ },
++ },
+ {},
+ };
+
+@@ -615,11 +635,19 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
+ }
+
+ /*
+- * Disable and clear GPE status before interrupt is enabled. Some GPEs
+- * (like wakeup GPE) haven't handler, this can avoid such GPE misfire.
+- * acpi_leave_sleep_state will reenable specific GPEs later
++ * Disable all GPE and clear their status bits before interrupts are
++ * enabled. Some GPEs (like wakeup GPEs) have no handlers and this can
++ * prevent them from producing spurious interrups.
++ *
++ * acpi_leave_sleep_state() will reenable specific GPEs later.
++ *
++ * Because this code runs on one CPU with disabled interrupts (all of
++ * the other CPUs are offline at this time), it need not acquire any
++ * sleeping locks which may trigger an implicit preemption point even
++ * if there is no contention, so avoid doing that by using a low-level
++ * library routine here.
+ */
+- acpi_disable_all_gpes();
++ acpi_hw_disable_all_gpes();
+ /* Allow EC transactions to happen. */
+ acpi_ec_unblock_transactions();
+
+@@ -767,6 +795,7 @@ bool acpi_s2idle_wake(void)
+ return true;
+ }
+
++ pm_wakeup_clear(acpi_sci_irq);
+ rearm_wake_irq(acpi_sci_irq);
+ }
+
+diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
+index 00c0ebaab29f7..6e23b76aef5dc 100644
+--- a/drivers/acpi/sysfs.c
++++ b/drivers/acpi/sysfs.c
+@@ -415,19 +415,30 @@ static ssize_t acpi_data_show(struct file *filp, struct kobject *kobj,
+ loff_t offset, size_t count)
+ {
+ struct acpi_data_attr *data_attr;
+- void *base;
+- ssize_t rc;
++ void __iomem *base;
++ ssize_t size;
+
+ data_attr = container_of(bin_attr, struct acpi_data_attr, attr);
++ size = data_attr->attr.size;
++
++ if (offset < 0)
++ return -EINVAL;
++
++ if (offset >= size)
++ return 0;
+
+- base = acpi_os_map_memory(data_attr->addr, data_attr->attr.size);
++ if (count > size - offset)
++ count = size - offset;
++
++ base = acpi_os_map_iomem(data_attr->addr, size);
+ if (!base)
+ return -ENOMEM;
+- rc = memory_read_from_buffer(buf, count, &offset, base,
+- data_attr->attr.size);
+- acpi_os_unmap_memory(base, data_attr->attr.size);
+
+- return rc;
++ memcpy_fromio(buf, base + offset, count);
++
++ acpi_os_unmap_iomem(base, size);
++
++ return count;
+ }
+
+ static int acpi_bert_data_init(void *th, struct acpi_data_attr *data_attr)
+diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
+index 95105db642b98..809e12b941235 100644
+--- a/drivers/acpi/thermal.c
++++ b/drivers/acpi/thermal.c
+@@ -59,10 +59,6 @@ static int tzp;
+ module_param(tzp, int, 0444);
+ MODULE_PARM_DESC(tzp, "Thermal zone polling frequency, in 1/10 seconds.");
+
+-static int nocrt;
+-module_param(nocrt, int, 0);
+-MODULE_PARM_DESC(nocrt, "Set to take no action upon ACPI thermal zone critical trips points.");
+-
+ static int off;
+ module_param(off, int, 0);
+ MODULE_PARM_DESC(off, "Set to disable ACPI thermal support.");
+@@ -1098,8 +1094,6 @@ static int acpi_thermal_resume(struct device *dev)
+ return -EINVAL;
+
+ for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
+- if (!(&tz->trips.active[i]))
+- break;
+ if (!tz->trips.active[i].flags.valid)
+ break;
+ tz->trips.active[i].flags.enabled = 1;
+@@ -1134,7 +1128,7 @@ static int thermal_nocrt(const struct dmi_system_id *d) {
+
+ pr_notice("%s detected: disabling all critical thermal trip point actions.\n",
+ d->ident);
+- nocrt = 1;
++ crt = -1;
+ return 0;
+ }
+ static int thermal_tzp(const struct dmi_system_id *d) {
+diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
+index 33474fd969913..038542b3a80a7 100644
+--- a/drivers/acpi/video_detect.c
++++ b/drivers/acpi/video_detect.c
+@@ -313,7 +313,7 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
+ .ident = "Lenovo Ideapad Z570",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "102434U"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "Ideapad Z570"),
+ },
+ },
+ {
+@@ -409,7 +409,161 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "GA503"),
+ },
+ },
+-
++ /*
++ * Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a
++ * working native and video interface. However the default detection
++ * mechanism first registers the video interface before unregistering
++ * it again and switching to the native interface during boot. This
++ * results in a dangling SBIOS request for backlight change for some
++ * reason, causing the backlight to switch to ~2% once per boot on the
++ * first power cord connect or disconnect event. Setting the native
++ * interface explicitly circumvents this buggy behaviour, by avoiding
++ * the unregistering process.
++ */
++ {
++ .callback = video_detect_force_native,
++ .ident = "Clevo NL5xRU",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "Clevo NL5xRU",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "Clevo NL5xRU",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "Clevo NL5xNU",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
++ },
++ },
++ /*
++ * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10,
++ * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo
++ * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description
++ * above.
++ */
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang PF5PU1G",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang PF4NU1F",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang PF4NU1F",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang PF5NU1G",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang PF5NU1G",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang PF5LUXG",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"),
++ },
++ },
++ /*
++ * More Tongfang devices with the same issue as the Clevo NL5xRU and
++ * NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description above.
++ */
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GKxNRxx",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "GKxNRxx"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GKxNRxx",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "POLARIS1501A1650TI"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GKxNRxx",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "POLARIS1501A2060"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GKxNRxx",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "POLARIS1701A1650TI"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GKxNRxx",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "POLARIS1701A2060"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GMxNGxx",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "GMxNGxx"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GMxZGxx",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "GMxZGxx"),
++ },
++ },
++ {
++ .callback = video_detect_force_native,
++ .ident = "TongFang GMxRGxx",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
++ },
++ },
+ /*
+ * Desktops which falsely report a backlight and which our heuristics
+ * for this do not catch.
+diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c
+index d2256326c73ae..fe4b66dae01b5 100644
+--- a/drivers/acpi/viot.c
++++ b/drivers/acpi/viot.c
+@@ -248,6 +248,26 @@ err_free:
+ return ret;
+ }
+
++/**
++ * acpi_viot_early_init - Test the presence of VIOT and enable ACS
++ *
++ * If the VIOT does exist, ACS must be enabled. This cannot be
++ * done in acpi_viot_init() which is called after the bus scan
++ */
++void __init acpi_viot_early_init(void)
++{
++#ifdef CONFIG_PCI
++ acpi_status status;
++ struct acpi_table_header *hdr;
++
++ status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr);
++ if (ACPI_FAILURE(status))
++ return;
++ pci_request_acs();
++ acpi_put_table(hdr);
++#endif
++}
++
+ /**
+ * acpi_viot_init - Parse the VIOT table
+ *
+@@ -309,6 +329,7 @@ static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data)
+ {
+ u32 epid;
+ struct viot_endpoint *ep;
++ struct device *aliased_dev = data;
+ u32 domain_nr = pci_domain_nr(pdev->bus);
+
+ list_for_each_entry(ep, &viot_pci_ranges, list) {
+@@ -319,13 +340,7 @@ static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data)
+ epid = ((domain_nr - ep->segment_start) << 16) +
+ dev_id - ep->bdf_start + ep->endpoint_id;
+
+- /*
+- * If we found a PCI range managed by the viommu, we're
+- * the one that has to request ACS.
+- */
+- pci_request_acs();
+-
+- return viot_dev_iommu_init(&pdev->dev, ep->viommu,
++ return viot_dev_iommu_init(aliased_dev, ep->viommu,
+ epid);
+ }
+ }
+@@ -359,7 +374,7 @@ int viot_iommu_configure(struct device *dev)
+ {
+ if (dev_is_pci(dev))
+ return pci_for_each_dma_alias(to_pci_dev(dev),
+- viot_pci_dev_iommu_init, NULL);
++ viot_pci_dev_iommu_init, dev);
+ else if (dev_is_platform(dev))
+ return viot_mmio_dev_iommu_init(to_platform_device(dev));
+ return -ENODEV;
+diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
+index 1c48358b43ba3..946e0160ad3bf 100644
+--- a/drivers/acpi/x86/s2idle.c
++++ b/drivers/acpi/x86/s2idle.c
+@@ -86,6 +86,8 @@ struct lpi_device_constraint_amd {
+ int min_dstate;
+ };
+
++static LIST_HEAD(lps0_s2idle_devops_head);
++
+ static struct lpi_constraints *lpi_constraints_table;
+ static int lpi_constraints_table_size;
+ static int rev_id;
+@@ -119,17 +121,16 @@ static void lpi_device_get_constraints_amd(void)
+ acpi_handle_debug(lps0_device_handle,
+ "LPI: constraints list begin:\n");
+
+- for (j = 0; j < package->package.count; ++j) {
++ for (j = 0; j < package->package.count; j++) {
+ union acpi_object *info_obj = &package->package.elements[j];
+ struct lpi_device_constraint_amd dev_info = {};
+ struct lpi_constraints *list;
+ acpi_status status;
+
+- for (k = 0; k < info_obj->package.count; ++k) {
+- union acpi_object *obj = &info_obj->package.elements[k];
++ list = &lpi_constraints_table[lpi_constraints_table_size];
+
+- list = &lpi_constraints_table[lpi_constraints_table_size];
+- list->min_dstate = -1;
++ for (k = 0; k < info_obj->package.count; k++) {
++ union acpi_object *obj = &info_obj->package.elements[k];
+
+ switch (k) {
+ case 0:
+@@ -145,27 +146,21 @@ static void lpi_device_get_constraints_amd(void)
+ dev_info.min_dstate = obj->integer.value;
+ break;
+ }
++ }
+
+- if (!dev_info.enabled || !dev_info.name ||
+- !dev_info.min_dstate)
+- continue;
++ if (!dev_info.enabled || !dev_info.name ||
++ !dev_info.min_dstate)
++ continue;
+
+- status = acpi_get_handle(NULL, dev_info.name,
+- &list->handle);
+- if (ACPI_FAILURE(status))
+- continue;
++ status = acpi_get_handle(NULL, dev_info.name, &list->handle);
++ if (ACPI_FAILURE(status))
++ continue;
+
+- acpi_handle_debug(lps0_device_handle,
+- "Name:%s\n", dev_info.name);
++ acpi_handle_debug(lps0_device_handle,
++ "Name:%s\n", dev_info.name);
+
+- list->min_dstate = dev_info.min_dstate;
++ list->min_dstate = dev_info.min_dstate;
+
+- if (list->min_dstate < 0) {
+- acpi_handle_debug(lps0_device_handle,
+- "Incomplete constraint defined\n");
+- continue;
+- }
+- }
+ lpi_constraints_table_size++;
+ }
+ }
+@@ -210,7 +205,7 @@ static void lpi_device_get_constraints(void)
+ if (!package)
+ continue;
+
+- for (j = 0; j < package->package.count; ++j) {
++ for (j = 0; j < package->package.count; j++) {
+ union acpi_object *element =
+ &(package->package.elements[j]);
+
+@@ -242,7 +237,7 @@ static void lpi_device_get_constraints(void)
+
+ constraint->min_dstate = -1;
+
+- for (j = 0; j < package_count; ++j) {
++ for (j = 0; j < package_count; j++) {
+ union acpi_object *info_obj = &info.package[j];
+ union acpi_object *cnstr_pkg;
+ union acpi_object *obj;
+@@ -378,16 +373,13 @@ static int lps0_device_attach(struct acpi_device *adev,
+ * AMDI0006:
+ * - should use rev_id 0x0
+ * - function mask = 0x3: Should use Microsoft method
+- * AMDI0007:
+- * - Should use rev_id 0x2
+- * - Should only use AMD method
+ */
+ const char *hid = acpi_device_hid(adev);
+- rev_id = strcmp(hid, "AMDI0007") ? 0 : 2;
++ rev_id = 0;
+ lps0_dsm_func_mask = validate_dsm(adev->handle,
+ ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
+ lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle,
+- ACPI_LPS0_DSM_UUID_MICROSOFT, 0,
++ ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id,
+ &lps0_dsm_guid_microsoft);
+ if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") ||
+ !strcmp(hid, "AMD0005") ||
+@@ -395,9 +387,6 @@ static int lps0_device_attach(struct acpi_device *adev,
+ lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1;
+ acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n",
+ ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask);
+- } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) {
+- lps0_dsm_func_mask_microsoft = -EINVAL;
+- acpi_handle_debug(adev->handle, "_DSM Using AMD method\n");
+ }
+ } else {
+ rev_id = 1;
+@@ -424,15 +413,11 @@ static int lps0_device_attach(struct acpi_device *adev,
+ mem_sleep_current = PM_SUSPEND_TO_IDLE;
+
+ /*
+- * Some Intel based LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U don't
+- * use intel-hid or intel-vbtn but require the EC GPE to be enabled while
+- * suspended for certain wakeup devices to work, so mark it as wakeup-capable.
+- *
+- * Only enable on !AMD as enabling this universally causes problems for a number
+- * of AMD based systems.
++ * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the
++ * EC GPE to be enabled while suspended for certain wakeup devices to
++ * work, so mark it as wakeup-capable.
+ */
+- if (!acpi_s2idle_vendor_amd())
+- acpi_ec_mark_gpe_for_wake();
++ acpi_ec_mark_gpe_for_wake();
+
+ return 0;
+ }
+@@ -444,6 +429,8 @@ static struct acpi_scan_handler lps0_handler = {
+
+ int acpi_s2idle_prepare_late(void)
+ {
++ struct acpi_s2idle_dev_ops *handler;
++
+ if (!lps0_device_handle || sleep_no_lps0)
+ return 0;
+
+@@ -474,14 +461,26 @@ int acpi_s2idle_prepare_late(void)
+ acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY,
+ lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+ }
++
++ list_for_each_entry(handler, &lps0_s2idle_devops_head, list_node) {
++ if (handler->prepare)
++ handler->prepare();
++ }
++
+ return 0;
+ }
+
+ void acpi_s2idle_restore_early(void)
+ {
++ struct acpi_s2idle_dev_ops *handler;
++
+ if (!lps0_device_handle || sleep_no_lps0)
+ return;
+
++ list_for_each_entry(handler, &lps0_s2idle_devops_head, list_node)
++ if (handler->restore)
++ handler->restore();
++
+ /* Modern standby exit */
+ if (lps0_dsm_func_mask_microsoft > 0)
+ acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT,
+@@ -524,4 +523,28 @@ void acpi_s2idle_setup(void)
+ s2idle_set_ops(&acpi_s2idle_ops_lps0);
+ }
+
++int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg)
++{
++ if (!lps0_device_handle || sleep_no_lps0)
++ return -ENODEV;
++
++ lock_system_sleep();
++ list_add(&arg->list_node, &lps0_s2idle_devops_head);
++ unlock_system_sleep();
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(acpi_register_lps0_dev);
++
++void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg)
++{
++ if (!lps0_device_handle || sleep_no_lps0)
++ return;
++
++ lock_system_sleep();
++ list_del(&arg->list_node);
++ unlock_system_sleep();
++}
++EXPORT_SYMBOL_GPL(acpi_unregister_lps0_dev);
++
+ #endif /* CONFIG_SUSPEND */
+diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
+index f22f23933063b..f1dd086d0b87d 100644
+--- a/drivers/acpi/x86/utils.c
++++ b/drivers/acpi/x86/utils.c
+@@ -22,58 +22,71 @@
+ * Some BIOS-es (temporarily) hide specific APCI devices to work around Windows
+ * driver bugs. We use DMI matching to match known cases of this.
+ *
+- * We work around this by always reporting ACPI_STA_DEFAULT for these
+- * devices. Note this MUST only be done for devices where this is safe.
++ * Likewise sometimes some not-actually present devices are sometimes
++ * reported as present, which may cause issues.
+ *
+- * This forcing of devices to be present is limited to specific CPU (SoC)
+- * models both to avoid potentially causing trouble on other models and
+- * because some HIDs are re-used on different SoCs for completely
+- * different devices.
++ * We work around this by using the below quirk list to override the status
++ * reported by the _STA method with a fixed value (ACPI_STA_DEFAULT or 0).
++ * Note this MUST only be done for devices where this is safe.
++ *
++ * This status overriding is limited to specific CPU (SoC) models both to
++ * avoid potentially causing trouble on other models and because some HIDs
++ * are re-used on different SoCs for completely different devices.
+ */
+-struct always_present_id {
++struct override_status_id {
+ struct acpi_device_id hid[2];
+ struct x86_cpu_id cpu_ids[2];
+ struct dmi_system_id dmi_ids[2]; /* Optional */
+ const char *uid;
++ const char *path;
++ unsigned long long status;
+ };
+
+-#define X86_MATCH(model) X86_MATCH_INTEL_FAM6_MODEL(model, NULL)
+-
+-#define ENTRY(hid, uid, cpu_models, dmi...) { \
++#define ENTRY(status, hid, uid, path, cpu_model, dmi...) { \
+ { { hid, }, {} }, \
+- { cpu_models, {} }, \
++ { X86_MATCH_INTEL_FAM6_MODEL(cpu_model, NULL), {} }, \
+ { { .matches = dmi }, {} }, \
+ uid, \
++ path, \
++ status, \
+ }
+
+-static const struct always_present_id always_present_ids[] = {
++#define PRESENT_ENTRY_HID(hid, uid, cpu_model, dmi...) \
++ ENTRY(ACPI_STA_DEFAULT, hid, uid, NULL, cpu_model, dmi)
++
++#define NOT_PRESENT_ENTRY_HID(hid, uid, cpu_model, dmi...) \
++ ENTRY(0, hid, uid, NULL, cpu_model, dmi)
++
++#define PRESENT_ENTRY_PATH(path, cpu_model, dmi...) \
++ ENTRY(ACPI_STA_DEFAULT, "", NULL, path, cpu_model, dmi)
++
++#define NOT_PRESENT_ENTRY_PATH(path, cpu_model, dmi...) \
++ ENTRY(0, "", NULL, path, cpu_model, dmi)
++
++static const struct override_status_id override_status_ids[] = {
+ /*
+ * Bay / Cherry Trail PWM directly poked by GPU driver in win10,
+ * but Linux uses a separate PWM driver, harmless if not used.
+ */
+- ENTRY("80860F09", "1", X86_MATCH(ATOM_SILVERMONT), {}),
+- ENTRY("80862288", "1", X86_MATCH(ATOM_AIRMONT), {}),
++ PRESENT_ENTRY_HID("80860F09", "1", ATOM_SILVERMONT, {}),
++ PRESENT_ENTRY_HID("80862288", "1", ATOM_AIRMONT, {}),
+
+- /* Lenovo Yoga Book uses PWM2 for keyboard backlight control */
+- ENTRY("80862289", "2", X86_MATCH(ATOM_AIRMONT), {
+- DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"),
+- }),
+ /*
+ * The INT0002 device is necessary to clear wakeup interrupt sources
+ * on Cherry Trail devices, without it we get nobody cared IRQ msgs.
+ */
+- ENTRY("INT0002", "1", X86_MATCH(ATOM_AIRMONT), {}),
++ PRESENT_ENTRY_HID("INT0002", "1", ATOM_AIRMONT, {}),
+ /*
+ * On the Dell Venue 11 Pro 7130 and 7139, the DSDT hides
+ * the touchscreen ACPI device until a certain time
+ * after _SB.PCI0.GFX0.LCD.LCD1._ON gets called has passed
+ * *and* _STA has been called at least 3 times since.
+ */
+- ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), {
++ PRESENT_ENTRY_HID("SYNA7500", "1", HASWELL_L, {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"),
+ }),
+- ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), {
++ PRESENT_ENTRY_HID("SYNA7500", "1", HASWELL_L, {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7139"),
+ }),
+@@ -81,54 +94,83 @@ static const struct always_present_id always_present_ids[] = {
+ /*
+ * The GPD win BIOS dated 20170221 has disabled the accelerometer, the
+ * drivers sometimes cause crashes under Windows and this is how the
+- * manufacturer has solved this :| Note that the the DMI data is less
+- * generic then it seems, a board_vendor of "AMI Corporation" is quite
+- * rare and a board_name of "Default String" also is rare.
++ * manufacturer has solved this :| The DMI match may not seem unique,
++ * but it is. In the 67000+ DMI decode dumps from linux-hardware.org
++ * only 116 have board_vendor set to "AMI Corporation" and of those 116
++ * only the GPD win and pocket entries' board_name is "Default string".
+ *
+ * Unfortunately the GPD pocket also uses these strings and its BIOS
+ * was copy-pasted from the GPD win, so it has a disabled KIOX000A
+ * node which we should not enable, thus we also check the BIOS date.
+ */
+- ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), {
++ PRESENT_ENTRY_HID("KIOX000A", "1", ATOM_AIRMONT, {
+ DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "Default string"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
+ DMI_MATCH(DMI_BIOS_DATE, "02/21/2017")
+ }),
+- ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), {
++ PRESENT_ENTRY_HID("KIOX000A", "1", ATOM_AIRMONT, {
+ DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "Default string"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
+ DMI_MATCH(DMI_BIOS_DATE, "03/20/2017")
+ }),
+- ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), {
++ PRESENT_ENTRY_HID("KIOX000A", "1", ATOM_AIRMONT, {
+ DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "Default string"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
+ DMI_MATCH(DMI_BIOS_DATE, "05/25/2017")
+ }),
++
++ /*
++ * The GPD win/pocket have a PCI wifi card, but its DSDT has the SDIO
++ * mmc controller enabled and that has a child-device which _PS3
++ * method sets a GPIO causing the PCI wifi card to turn off.
++ * See above remark about uniqueness of the DMI match.
++ */
++ NOT_PRESENT_ENTRY_PATH("\\_SB_.PCI0.SDHB.BRC1", ATOM_AIRMONT, {
++ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
++ DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"),
++ DMI_EXACT_MATCH(DMI_BOARD_SERIAL, "Default string"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"),
++ }),
+ };
+
+-bool acpi_device_always_present(struct acpi_device *adev)
++bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status)
+ {
+ bool ret = false;
+ unsigned int i;
+
+- for (i = 0; i < ARRAY_SIZE(always_present_ids); i++) {
+- if (acpi_match_device_ids(adev, always_present_ids[i].hid))
++ for (i = 0; i < ARRAY_SIZE(override_status_ids); i++) {
++ if (!x86_match_cpu(override_status_ids[i].cpu_ids))
+ continue;
+
+- if (!adev->pnp.unique_id ||
+- strcmp(adev->pnp.unique_id, always_present_ids[i].uid))
++ if (override_status_ids[i].dmi_ids[0].matches[0].slot &&
++ !dmi_check_system(override_status_ids[i].dmi_ids))
+ continue;
+
+- if (!x86_match_cpu(always_present_ids[i].cpu_ids))
+- continue;
++ if (override_status_ids[i].path) {
++ struct acpi_buffer path = { ACPI_ALLOCATE_BUFFER, NULL };
++ bool match;
+
+- if (always_present_ids[i].dmi_ids[0].matches[0].slot &&
+- !dmi_check_system(always_present_ids[i].dmi_ids))
+- continue;
++ if (acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &path))
++ continue;
+
++ match = strcmp((char *)path.pointer, override_status_ids[i].path) == 0;
++ kfree(path.pointer);
++
++ if (!match)
++ continue;
++ } else {
++ if (acpi_match_device_ids(adev, override_status_ids[i].hid))
++ continue;
++
++ if (!adev->pnp.unique_id ||
++ strcmp(adev->pnp.unique_id, override_status_ids[i].uid))
++ continue;
++ }
++
++ *status = override_status_ids[i].status;
+ ret = true;
+ break;
+ }
+@@ -149,10 +191,22 @@ bool acpi_device_always_present(struct acpi_device *adev)
+ * a hardcoded allowlist for D3 support, which was used for these platforms.
+ *
+ * This allows quirking on Linux in a similar fashion.
++ *
++ * Cezanne systems shouldn't *normally* need this as the BIOS includes
++ * StorageD3Enable. But for two reasons we have added it.
++ * 1) The BIOS on a number of Dell systems have ambiguity
++ * between the same value used for _ADR on ACPI nodes GPP1.DEV0 and GPP1.NVME.
++ * GPP1.NVME is needed to get StorageD3Enable node set properly.
++ * https://bugzilla.kernel.org/show_bug.cgi?id=216440
++ * https://bugzilla.kernel.org/show_bug.cgi?id=216773
++ * https://bugzilla.kernel.org/show_bug.cgi?id=217003
++ * 2) On at least one HP system StorageD3Enable is missing on the second NVME
++ disk in the system.
+ */
+ static const struct x86_cpu_id storage_d3_cpu_ids[] = {
+ X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */
+ X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */
++ X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL), /* Cezanne */
+ {}
+ };
+
+diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
+index 962041148482c..1af5ff9231eb0 100644
+--- a/drivers/amba/bus.c
++++ b/drivers/amba/bus.c
+@@ -366,6 +366,7 @@ static void amba_device_release(struct device *dev)
+ {
+ struct amba_device *d = to_amba_device(dev);
+
++ of_node_put(d->dev.of_node);
+ if (d->res.parent)
+ release_resource(&d->res);
+ kfree(d);
+@@ -377,9 +378,6 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
+ void __iomem *tmp;
+ int i, ret;
+
+- WARN_ON(dev->irq[0] == (unsigned int)-1);
+- WARN_ON(dev->irq[1] == (unsigned int)-1);
+-
+ ret = request_resource(parent, &dev->res);
+ if (ret)
+ goto err_out;
+diff --git a/drivers/android/binder.c b/drivers/android/binder.c
+index 9edacc8b97688..cbbed43baf056 100644
+--- a/drivers/android/binder.c
++++ b/drivers/android/binder.c
+@@ -170,8 +170,32 @@ static inline void binder_stats_created(enum binder_stat_types type)
+ atomic_inc(&binder_stats.obj_created[type]);
+ }
+
+-struct binder_transaction_log binder_transaction_log;
+-struct binder_transaction_log binder_transaction_log_failed;
++struct binder_transaction_log_entry {
++ int debug_id;
++ int debug_id_done;
++ int call_type;
++ int from_proc;
++ int from_thread;
++ int target_handle;
++ int to_proc;
++ int to_thread;
++ int to_node;
++ int data_size;
++ int offsets_size;
++ int return_error_line;
++ uint32_t return_error;
++ uint32_t return_error_param;
++ char context_name[BINDERFS_MAX_NAME + 1];
++};
++
++struct binder_transaction_log {
++ atomic_t cur;
++ bool full;
++ struct binder_transaction_log_entry entry[32];
++};
++
++static struct binder_transaction_log binder_transaction_log;
++static struct binder_transaction_log binder_transaction_log_failed;
+
+ static struct binder_transaction_log_entry *binder_transaction_log_add(
+ struct binder_transaction_log *log)
+@@ -1334,6 +1358,18 @@ static int binder_inc_ref_for_node(struct binder_proc *proc,
+ }
+ ret = binder_inc_ref_olocked(ref, strong, target_list);
+ *rdata = ref->data;
++ if (ret && ref == new_ref) {
++ /*
++ * Cleanup the failed reference here as the target
++ * could now be dead and have already released its
++ * references by now. Calling on the new reference
++ * with strong=0 and a tmp_refs will not decrement
++ * the node. The new_ref gets kfree'd below.
++ */
++ binder_cleanup_ref_olocked(new_ref);
++ ref = NULL;
++ }
++
+ binder_proc_unlock(proc);
+ if (new_ref && ref != new_ref)
+ /*
+@@ -1608,15 +1644,21 @@ static void binder_cleanup_transaction(struct binder_transaction *t,
+ /**
+ * binder_get_object() - gets object and checks for valid metadata
+ * @proc: binder_proc owning the buffer
++ * @u: sender's user pointer to base of buffer
+ * @buffer: binder_buffer that we're parsing.
+ * @offset: offset in the @buffer at which to validate an object.
+ * @object: struct binder_object to read into
+ *
+- * Return: If there's a valid metadata object at @offset in @buffer, the
++ * Copy the binder object at the given offset into @object. If @u is
++ * provided then the copy is from the sender's buffer. If not, then
++ * it is copied from the target's @buffer.
++ *
++ * Return: If there's a valid metadata object at @offset, the
+ * size of that object. Otherwise, it returns zero. The object
+ * is read into the struct binder_object pointed to by @object.
+ */
+ static size_t binder_get_object(struct binder_proc *proc,
++ const void __user *u,
+ struct binder_buffer *buffer,
+ unsigned long offset,
+ struct binder_object *object)
+@@ -1626,10 +1668,16 @@ static size_t binder_get_object(struct binder_proc *proc,
+ size_t object_size = 0;
+
+ read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
+- if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
+- binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
+- offset, read_size))
++ if (offset > buffer->data_size || read_size < sizeof(*hdr))
+ return 0;
++ if (u) {
++ if (copy_from_user(object, u + offset, read_size))
++ return 0;
++ } else {
++ if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
++ offset, read_size))
++ return 0;
++ }
+
+ /* Ok, now see if we read a complete object. */
+ hdr = &object->hdr;
+@@ -1702,7 +1750,7 @@ static struct binder_buffer_object *binder_validate_ptr(
+ b, buffer_offset,
+ sizeof(object_offset)))
+ return NULL;
+- object_size = binder_get_object(proc, b, object_offset, object);
++ object_size = binder_get_object(proc, NULL, b, object_offset, object);
+ if (!object_size || object->hdr.type != BINDER_TYPE_PTR)
+ return NULL;
+ if (object_offsetp)
+@@ -1767,7 +1815,8 @@ static bool binder_validate_fixup(struct binder_proc *proc,
+ unsigned long buffer_offset;
+ struct binder_object last_object;
+ struct binder_buffer_object *last_bbo;
+- size_t object_size = binder_get_object(proc, b, last_obj_offset,
++ size_t object_size = binder_get_object(proc, NULL, b,
++ last_obj_offset,
+ &last_object);
+ if (object_size != sizeof(*last_bbo))
+ return false;
+@@ -1854,24 +1903,23 @@ static void binder_deferred_fd_close(int fd)
+ static void binder_transaction_buffer_release(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_buffer *buffer,
+- binder_size_t failed_at,
++ binder_size_t off_end_offset,
+ bool is_failure)
+ {
+ int debug_id = buffer->debug_id;
+- binder_size_t off_start_offset, buffer_offset, off_end_offset;
++ binder_size_t off_start_offset, buffer_offset;
+
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "%d buffer release %d, size %zd-%zd, failed at %llx\n",
+ proc->pid, buffer->debug_id,
+ buffer->data_size, buffer->offsets_size,
+- (unsigned long long)failed_at);
++ (unsigned long long)off_end_offset);
+
+ if (buffer->target_node)
+ binder_dec_node(buffer->target_node, 1, 0);
+
+ off_start_offset = ALIGN(buffer->data_size, sizeof(void *));
+- off_end_offset = is_failure ? failed_at :
+- off_start_offset + buffer->offsets_size;
++
+ for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
+ buffer_offset += sizeof(binder_size_t)) {
+ struct binder_object_header *hdr;
+@@ -1882,7 +1930,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
+ if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset,
+ buffer, buffer_offset,
+ sizeof(object_offset)))
+- object_size = binder_get_object(proc, buffer,
++ object_size = binder_get_object(proc, NULL, buffer,
+ object_offset, &object);
+ if (object_size == 0) {
+ pr_err("transaction release %d bad object at offset %lld, size %zd\n",
+@@ -1956,9 +2004,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
+ binder_size_t fd_buf_size;
+ binder_size_t num_valid;
+
+- if (proc->tsk != current->group_leader) {
++ if (is_failure) {
+ /*
+- * Nothing to do if running in sender context
+ * The fd fixups have not been applied so no
+ * fds need to be closed.
+ */
+@@ -2032,6 +2079,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
+ }
+ }
+
++/* Clean up all the objects in the buffer */
++static inline void binder_release_entire_buffer(struct binder_proc *proc,
++ struct binder_thread *thread,
++ struct binder_buffer *buffer,
++ bool is_failure)
++{
++ binder_size_t off_end_offset;
++
++ off_end_offset = ALIGN(buffer->data_size, sizeof(void *));
++ off_end_offset += buffer->offsets_size;
++
++ binder_transaction_buffer_release(proc, thread, buffer,
++ off_end_offset, is_failure);
++}
++
+ static int binder_translate_binder(struct flat_binder_object *fp,
+ struct binder_transaction *t,
+ struct binder_thread *thread)
+@@ -2056,7 +2118,7 @@ static int binder_translate_binder(struct flat_binder_object *fp,
+ ret = -EINVAL;
+ goto done;
+ }
+- if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
++ if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
+ ret = -EPERM;
+ goto done;
+ }
+@@ -2102,7 +2164,7 @@ static int binder_translate_handle(struct flat_binder_object *fp,
+ proc->pid, thread->pid, fp->handle);
+ return -EINVAL;
+ }
+- if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
++ if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
+ ret = -EPERM;
+ goto done;
+ }
+@@ -2190,7 +2252,7 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset,
+ ret = -EBADF;
+ goto err_fget;
+ }
+- ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file);
++ ret = security_binder_transfer_file(proc->cred, target_proc->cred, file);
+ if (ret < 0) {
+ ret = -EPERM;
+ goto err_security;
+@@ -2221,16 +2283,266 @@ err_fd_not_accepted:
+ return ret;
+ }
+
+-static int binder_translate_fd_array(struct binder_fd_array_object *fda,
++/**
++ * struct binder_ptr_fixup - data to be fixed-up in target buffer
++ * @offset offset in target buffer to fixup
++ * @skip_size bytes to skip in copy (fixup will be written later)
++ * @fixup_data data to write at fixup offset
++ * @node list node
++ *
++ * This is used for the pointer fixup list (pf) which is created and consumed
++ * during binder_transaction() and is only accessed locally. No
++ * locking is necessary.
++ *
++ * The list is ordered by @offset.
++ */
++struct binder_ptr_fixup {
++ binder_size_t offset;
++ size_t skip_size;
++ binder_uintptr_t fixup_data;
++ struct list_head node;
++};
++
++/**
++ * struct binder_sg_copy - scatter-gather data to be copied
++ * @offset offset in target buffer
++ * @sender_uaddr user address in source buffer
++ * @length bytes to copy
++ * @node list node
++ *
++ * This is used for the sg copy list (sgc) which is created and consumed
++ * during binder_transaction() and is only accessed locally. No
++ * locking is necessary.
++ *
++ * The list is ordered by @offset.
++ */
++struct binder_sg_copy {
++ binder_size_t offset;
++ const void __user *sender_uaddr;
++ size_t length;
++ struct list_head node;
++};
++
++/**
++ * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data
++ * @alloc: binder_alloc associated with @buffer
++ * @buffer: binder buffer in target process
++ * @sgc_head: list_head of scatter-gather copy list
++ * @pf_head: list_head of pointer fixup list
++ *
++ * Processes all elements of @sgc_head, applying fixups from @pf_head
++ * and copying the scatter-gather data from the source process' user
++ * buffer to the target's buffer. It is expected that the list creation
++ * and processing all occurs during binder_transaction() so these lists
++ * are only accessed in local context.
++ *
++ * Return: 0=success, else -errno
++ */
++static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
++ struct binder_buffer *buffer,
++ struct list_head *sgc_head,
++ struct list_head *pf_head)
++{
++ int ret = 0;
++ struct binder_sg_copy *sgc, *tmpsgc;
++ struct binder_ptr_fixup *tmppf;
++ struct binder_ptr_fixup *pf =
++ list_first_entry_or_null(pf_head, struct binder_ptr_fixup,
++ node);
++
++ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
++ size_t bytes_copied = 0;
++
++ while (bytes_copied < sgc->length) {
++ size_t copy_size;
++ size_t bytes_left = sgc->length - bytes_copied;
++ size_t offset = sgc->offset + bytes_copied;
++
++ /*
++ * We copy up to the fixup (pointed to by pf)
++ */
++ copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset)
++ : bytes_left;
++ if (!ret && copy_size)
++ ret = binder_alloc_copy_user_to_buffer(
++ alloc, buffer,
++ offset,
++ sgc->sender_uaddr + bytes_copied,
++ copy_size);
++ bytes_copied += copy_size;
++ if (copy_size != bytes_left) {
++ BUG_ON(!pf);
++ /* we stopped at a fixup offset */
++ if (pf->skip_size) {
++ /*
++ * we are just skipping. This is for
++ * BINDER_TYPE_FDA where the translated
++ * fds will be fixed up when we get
++ * to target context.
++ */
++ bytes_copied += pf->skip_size;
++ } else {
++ /* apply the fixup indicated by pf */
++ if (!ret)
++ ret = binder_alloc_copy_to_buffer(
++ alloc, buffer,
++ pf->offset,
++ &pf->fixup_data,
++ sizeof(pf->fixup_data));
++ bytes_copied += sizeof(pf->fixup_data);
++ }
++ list_del(&pf->node);
++ kfree(pf);
++ pf = list_first_entry_or_null(pf_head,
++ struct binder_ptr_fixup, node);
++ }
++ }
++ list_del(&sgc->node);
++ kfree(sgc);
++ }
++ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
++ BUG_ON(pf->skip_size == 0);
++ list_del(&pf->node);
++ kfree(pf);
++ }
++ BUG_ON(!list_empty(sgc_head));
++
++ return ret > 0 ? -EINVAL : ret;
++}
++
++/**
++ * binder_cleanup_deferred_txn_lists() - free specified lists
++ * @sgc_head: list_head of scatter-gather copy list
++ * @pf_head: list_head of pointer fixup list
++ *
++ * Called to clean up @sgc_head and @pf_head if there is an
++ * error.
++ */
++static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head,
++ struct list_head *pf_head)
++{
++ struct binder_sg_copy *sgc, *tmpsgc;
++ struct binder_ptr_fixup *pf, *tmppf;
++
++ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
++ list_del(&sgc->node);
++ kfree(sgc);
++ }
++ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
++ list_del(&pf->node);
++ kfree(pf);
++ }
++}
++
++/**
++ * binder_defer_copy() - queue a scatter-gather buffer for copy
++ * @sgc_head: list_head of scatter-gather copy list
++ * @offset: binder buffer offset in target process
++ * @sender_uaddr: user address in source process
++ * @length: bytes to copy
++ *
++ * Specify a scatter-gather block to be copied. The actual copy must
++ * be deferred until all the needed fixups are identified and queued.
++ * Then the copy and fixups are done together so un-translated values
++ * from the source are never visible in the target buffer.
++ *
++ * We are guaranteed that repeated calls to this function will have
++ * monotonically increasing @offset values so the list will naturally
++ * be ordered.
++ *
++ * Return: 0=success, else -errno
++ */
++static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset,
++ const void __user *sender_uaddr, size_t length)
++{
++ struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL);
++
++ if (!bc)
++ return -ENOMEM;
++
++ bc->offset = offset;
++ bc->sender_uaddr = sender_uaddr;
++ bc->length = length;
++ INIT_LIST_HEAD(&bc->node);
++
++ /*
++ * We are guaranteed that the deferred copies are in-order
++ * so just add to the tail.
++ */
++ list_add_tail(&bc->node, sgc_head);
++
++ return 0;
++}
++
++/**
++ * binder_add_fixup() - queue a fixup to be applied to sg copy
++ * @pf_head: list_head of binder ptr fixup list
++ * @offset: binder buffer offset in target process
++ * @fixup: bytes to be copied for fixup
++ * @skip_size: bytes to skip when copying (fixup will be applied later)
++ *
++ * Add the specified fixup to a list ordered by @offset. When copying
++ * the scatter-gather buffers, the fixup will be copied instead of
++ * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup
++ * will be applied later (in target process context), so we just skip
++ * the bytes specified by @skip_size. If @skip_size is 0, we copy the
++ * value in @fixup.
++ *
++ * This function is called *mostly* in @offset order, but there are
++ * exceptions. Since out-of-order inserts are relatively uncommon,
++ * we insert the new element by searching backward from the tail of
++ * the list.
++ *
++ * Return: 0=success, else -errno
++ */
++static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset,
++ binder_uintptr_t fixup, size_t skip_size)
++{
++ struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL);
++ struct binder_ptr_fixup *tmppf;
++
++ if (!pf)
++ return -ENOMEM;
++
++ pf->offset = offset;
++ pf->fixup_data = fixup;
++ pf->skip_size = skip_size;
++ INIT_LIST_HEAD(&pf->node);
++
++ /* Fixups are *mostly* added in-order, but there are some
++ * exceptions. Look backwards through list for insertion point.
++ */
++ list_for_each_entry_reverse(tmppf, pf_head, node) {
++ if (tmppf->offset < pf->offset) {
++ list_add(&pf->node, &tmppf->node);
++ return 0;
++ }
++ }
++ /*
++ * if we get here, then the new offset is the lowest so
++ * insert at the head
++ */
++ list_add(&pf->node, pf_head);
++ return 0;
++}
++
++static int binder_translate_fd_array(struct list_head *pf_head,
++ struct binder_fd_array_object *fda,
++ const void __user *sender_ubuffer,
+ struct binder_buffer_object *parent,
++ struct binder_buffer_object *sender_uparent,
+ struct binder_transaction *t,
+ struct binder_thread *thread,
+ struct binder_transaction *in_reply_to)
+ {
+ binder_size_t fdi, fd_buf_size;
+ binder_size_t fda_offset;
++ const void __user *sender_ufda_base;
+ struct binder_proc *proc = thread->proc;
+- struct binder_proc *target_proc = t->to_proc;
++ int ret;
++
++ if (fda->num_fds == 0)
++ return 0;
+
+ fd_buf_size = sizeof(u32) * fda->num_fds;
+ if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
+@@ -2254,29 +2566,36 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
+ */
+ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) +
+ fda->parent_offset;
+- if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) {
++ sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer +
++ fda->parent_offset;
++
++ if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) ||
++ !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) {
+ binder_user_error("%d:%d parent offset not aligned correctly.\n",
+ proc->pid, thread->pid);
+ return -EINVAL;
+ }
++ ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32));
++ if (ret)
++ return ret;
++
+ for (fdi = 0; fdi < fda->num_fds; fdi++) {
+ u32 fd;
+- int ret;
+ binder_size_t offset = fda_offset + fdi * sizeof(fd);
++ binder_size_t sender_uoffset = fdi * sizeof(fd);
+
+- ret = binder_alloc_copy_from_buffer(&target_proc->alloc,
+- &fd, t->buffer,
+- offset, sizeof(fd));
++ ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd));
+ if (!ret)
+ ret = binder_translate_fd(fd, offset, t, thread,
+ in_reply_to);
+- if (ret < 0)
+- return ret;
++ if (ret)
++ return ret > 0 ? -EINVAL : ret;
+ }
+ return 0;
+ }
+
+-static int binder_fixup_parent(struct binder_transaction *t,
++static int binder_fixup_parent(struct list_head *pf_head,
++ struct binder_transaction *t,
+ struct binder_thread *thread,
+ struct binder_buffer_object *bp,
+ binder_size_t off_start_offset,
+@@ -2322,14 +2641,7 @@ static int binder_fixup_parent(struct binder_transaction *t,
+ }
+ buffer_offset = bp->parent_offset +
+ (uintptr_t)parent->buffer - (uintptr_t)b->user_data;
+- if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset,
+- &bp->buffer, sizeof(bp->buffer))) {
+- binder_user_error("%d:%d got transaction with invalid parent offset\n",
+- proc->pid, thread->pid);
+- return -EINVAL;
+- }
+-
+- return 0;
++ return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0);
+ }
+
+ /**
+@@ -2456,6 +2768,7 @@ static void binder_transaction(struct binder_proc *proc,
+ binder_size_t off_start_offset, off_end_offset;
+ binder_size_t off_min;
+ binder_size_t sg_buf_offset, sg_buf_end_offset;
++ binder_size_t user_offset = 0;
+ struct binder_proc *target_proc = NULL;
+ struct binder_thread *target_thread = NULL;
+ struct binder_node *target_node = NULL;
+@@ -2470,6 +2783,12 @@ static void binder_transaction(struct binder_proc *proc,
+ int t_debug_id = atomic_inc_return(&binder_last_id);
+ char *secctx = NULL;
+ u32 secctx_sz = 0;
++ struct list_head sgc_head;
++ struct list_head pf_head;
++ const void __user *user_buffer = (const void __user *)
++ (uintptr_t)tr->data.ptr.buffer;
++ INIT_LIST_HEAD(&sgc_head);
++ INIT_LIST_HEAD(&pf_head);
+
+ e = binder_transaction_log_add(&binder_transaction_log);
+ e->debug_id = t_debug_id;
+@@ -2595,8 +2914,8 @@ static void binder_transaction(struct binder_proc *proc,
+ return_error_line = __LINE__;
+ goto err_invalid_target_handle;
+ }
+- if (security_binder_transaction(proc->tsk,
+- target_proc->tsk) < 0) {
++ if (security_binder_transaction(proc->cred,
++ target_proc->cred) < 0) {
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EPERM;
+ return_error_line = __LINE__;
+@@ -2722,16 +3041,7 @@ static void binder_transaction(struct binder_proc *proc,
+ u32 secid;
+ size_t added_size;
+
+- /*
+- * Arguably this should be the task's subjective LSM secid but
+- * we can't reliably access the subjective creds of a task
+- * other than our own so we must use the objective creds, which
+- * are safe to access. The downside is that if a task is
+- * temporarily overriding it's creds it will not be reflected
+- * here; however, it isn't clear that binder would handle that
+- * case well anyway.
+- */
+- security_task_getsecid_obj(proc->tsk, &secid);
++ security_cred_getsecid(proc->cred, &secid);
+ ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
+ if (ret) {
+ return_error = BR_FAILED_REPLY;
+@@ -2790,19 +3100,6 @@ static void binder_transaction(struct binder_proc *proc,
+ t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF);
+ trace_binder_transaction_alloc_buf(t->buffer);
+
+- if (binder_alloc_copy_user_to_buffer(
+- &target_proc->alloc,
+- t->buffer, 0,
+- (const void __user *)
+- (uintptr_t)tr->data.ptr.buffer,
+- tr->data_size)) {
+- binder_user_error("%d:%d got transaction with invalid data ptr\n",
+- proc->pid, thread->pid);
+- return_error = BR_FAILED_REPLY;
+- return_error_param = -EFAULT;
+- return_error_line = __LINE__;
+- goto err_copy_data_failed;
+- }
+ if (binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer,
+@@ -2847,6 +3144,7 @@ static void binder_transaction(struct binder_proc *proc,
+ size_t object_size;
+ struct binder_object object;
+ binder_size_t object_offset;
++ binder_size_t copy_size;
+
+ if (binder_alloc_copy_from_buffer(&target_proc->alloc,
+ &object_offset,
+@@ -2858,8 +3156,27 @@ static void binder_transaction(struct binder_proc *proc,
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
+- object_size = binder_get_object(target_proc, t->buffer,
+- object_offset, &object);
++
++ /*
++ * Copy the source user buffer up to the next object
++ * that will be processed.
++ */
++ copy_size = object_offset - user_offset;
++ if (copy_size && (user_offset > object_offset ||
++ binder_alloc_copy_user_to_buffer(
++ &target_proc->alloc,
++ t->buffer, user_offset,
++ user_buffer + user_offset,
++ copy_size))) {
++ binder_user_error("%d:%d got transaction with invalid data ptr\n",
++ proc->pid, thread->pid);
++ return_error = BR_FAILED_REPLY;
++ return_error_param = -EFAULT;
++ return_error_line = __LINE__;
++ goto err_copy_data_failed;
++ }
++ object_size = binder_get_object(target_proc, user_buffer,
++ t->buffer, object_offset, &object);
+ if (object_size == 0 || object_offset < off_min) {
+ binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n",
+ proc->pid, thread->pid,
+@@ -2871,6 +3188,11 @@ static void binder_transaction(struct binder_proc *proc,
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
++ /*
++ * Set offset to the next buffer fragment to be
++ * copied
++ */
++ user_offset = object_offset + object_size;
+
+ hdr = &object.hdr;
+ off_min = object_offset + object_size;
+@@ -2933,6 +3255,8 @@ static void binder_transaction(struct binder_proc *proc,
+ case BINDER_TYPE_FDA: {
+ struct binder_object ptr_object;
+ binder_size_t parent_offset;
++ struct binder_object user_object;
++ size_t user_parent_size;
+ struct binder_fd_array_object *fda =
+ to_binder_fd_array_object(hdr);
+ size_t num_valid = (buffer_offset - off_start_offset) /
+@@ -2964,11 +3288,35 @@ static void binder_transaction(struct binder_proc *proc,
+ return_error_line = __LINE__;
+ goto err_bad_parent;
+ }
+- ret = binder_translate_fd_array(fda, parent, t, thread,
+- in_reply_to);
+- if (ret < 0) {
++ /*
++ * We need to read the user version of the parent
++ * object to get the original user offset
++ */
++ user_parent_size =
++ binder_get_object(proc, user_buffer, t->buffer,
++ parent_offset, &user_object);
++ if (user_parent_size != sizeof(user_object.bbo)) {
++ binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n",
++ proc->pid, thread->pid,
++ user_parent_size,
++ sizeof(user_object.bbo));
+ return_error = BR_FAILED_REPLY;
+- return_error_param = ret;
++ return_error_param = -EINVAL;
++ return_error_line = __LINE__;
++ goto err_bad_parent;
++ }
++ ret = binder_translate_fd_array(&pf_head, fda,
++ user_buffer, parent,
++ &user_object.bbo, t,
++ thread, in_reply_to);
++ if (!ret)
++ ret = binder_alloc_copy_to_buffer(&target_proc->alloc,
++ t->buffer,
++ object_offset,
++ fda, sizeof(*fda));
++ if (ret) {
++ return_error = BR_FAILED_REPLY;
++ return_error_param = ret > 0 ? -EINVAL : ret;
+ return_error_line = __LINE__;
+ goto err_translate_failed;
+ }
+@@ -2990,19 +3338,14 @@ static void binder_transaction(struct binder_proc *proc,
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
+- if (binder_alloc_copy_user_to_buffer(
+- &target_proc->alloc,
+- t->buffer,
+- sg_buf_offset,
+- (const void __user *)
+- (uintptr_t)bp->buffer,
+- bp->length)) {
+- binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
+- proc->pid, thread->pid);
+- return_error_param = -EFAULT;
++ ret = binder_defer_copy(&sgc_head, sg_buf_offset,
++ (const void __user *)(uintptr_t)bp->buffer,
++ bp->length);
++ if (ret) {
+ return_error = BR_FAILED_REPLY;
++ return_error_param = ret;
+ return_error_line = __LINE__;
+- goto err_copy_data_failed;
++ goto err_translate_failed;
+ }
+ /* Fixup buffer pointer to target proc address space */
+ bp->buffer = (uintptr_t)
+@@ -3011,7 +3354,8 @@ static void binder_transaction(struct binder_proc *proc,
+
+ num_valid = (buffer_offset - off_start_offset) /
+ sizeof(binder_size_t);
+- ret = binder_fixup_parent(t, thread, bp,
++ ret = binder_fixup_parent(&pf_head, t,
++ thread, bp,
+ off_start_offset,
+ num_valid,
+ last_fixup_obj_off,
+@@ -3038,6 +3382,30 @@ static void binder_transaction(struct binder_proc *proc,
+ goto err_bad_object_type;
+ }
+ }
++ /* Done processing objects, copy the rest of the buffer */
++ if (binder_alloc_copy_user_to_buffer(
++ &target_proc->alloc,
++ t->buffer, user_offset,
++ user_buffer + user_offset,
++ tr->data_size - user_offset)) {
++ binder_user_error("%d:%d got transaction with invalid data ptr\n",
++ proc->pid, thread->pid);
++ return_error = BR_FAILED_REPLY;
++ return_error_param = -EFAULT;
++ return_error_line = __LINE__;
++ goto err_copy_data_failed;
++ }
++
++ ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer,
++ &sgc_head, &pf_head);
++ if (ret) {
++ binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
++ proc->pid, thread->pid);
++ return_error = BR_FAILED_REPLY;
++ return_error_param = ret;
++ return_error_line = __LINE__;
++ goto err_copy_data_failed;
++ }
+ if (t->buffer->oneway_spam_suspect)
+ tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
+ else
+@@ -3111,6 +3479,7 @@ err_bad_object_type:
+ err_bad_offset:
+ err_bad_parent:
+ err_copy_data_failed:
++ binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head);
+ binder_free_txn_fixups(t);
+ trace_binder_transaction_failed_buffer_release(t->buffer);
+ binder_transaction_buffer_release(target_proc, NULL, t->buffer,
+@@ -3185,6 +3554,7 @@ err_invalid_target_handle:
+ * binder_free_buf() - free the specified buffer
+ * @proc: binder proc that owns buffer
+ * @buffer: buffer to be freed
++ * @is_failure: failed to send transaction
+ *
+ * If buffer for an async transaction, enqueue the next async
+ * transaction from the node.
+@@ -3194,7 +3564,7 @@ err_invalid_target_handle:
+ static void
+ binder_free_buf(struct binder_proc *proc,
+ struct binder_thread *thread,
+- struct binder_buffer *buffer)
++ struct binder_buffer *buffer, bool is_failure)
+ {
+ binder_inner_proc_lock(proc);
+ if (buffer->transaction) {
+@@ -3222,7 +3592,7 @@ binder_free_buf(struct binder_proc *proc,
+ binder_node_inner_unlock(buf_node);
+ }
+ trace_binder_transaction_buffer_release(buffer);
+- binder_transaction_buffer_release(proc, thread, buffer, 0, false);
++ binder_release_entire_buffer(proc, thread, buffer, is_failure);
+ binder_alloc_free_buf(&proc->alloc, buffer);
+ }
+
+@@ -3424,7 +3794,7 @@ static int binder_thread_write(struct binder_proc *proc,
+ proc->pid, thread->pid, (u64)data_ptr,
+ buffer->debug_id,
+ buffer->transaction ? "active" : "finished");
+- binder_free_buf(proc, thread, buffer);
++ binder_free_buf(proc, thread, buffer, false);
+ break;
+ }
+
+@@ -4117,7 +4487,7 @@ retry:
+ buffer->transaction = NULL;
+ binder_cleanup_transaction(t, "fd fixups failed",
+ BR_FAILED_REPLY);
+- binder_free_buf(proc, thread, buffer);
++ binder_free_buf(proc, thread, buffer, true);
+ binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+ "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n",
+ proc->pid, thread->pid,
+@@ -4353,6 +4723,7 @@ static void binder_free_proc(struct binder_proc *proc)
+ }
+ binder_alloc_deferred_release(&proc->alloc);
+ put_task_struct(proc->tsk);
++ put_cred(proc->cred);
+ binder_stats_deleted(BINDER_STAT_PROC);
+ kfree(proc);
+ }
+@@ -4430,23 +4801,20 @@ static int binder_thread_release(struct binder_proc *proc,
+ __release(&t->lock);
+
+ /*
+- * If this thread used poll, make sure we remove the waitqueue
+- * from any epoll data structures holding it with POLLFREE.
+- * waitqueue_active() is safe to use here because we're holding
+- * the inner lock.
++ * If this thread used poll, make sure we remove the waitqueue from any
++ * poll data structures holding it.
+ */
+- if ((thread->looper & BINDER_LOOPER_STATE_POLL) &&
+- waitqueue_active(&thread->wait)) {
+- wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE);
+- }
++ if (thread->looper & BINDER_LOOPER_STATE_POLL)
++ wake_up_pollfree(&thread->wait);
+
+ binder_inner_proc_unlock(thread->proc);
+
+ /*
+- * This is needed to avoid races between wake_up_poll() above and
+- * and ep_remove_waitqueue() called for other reasons (eg the epoll file
+- * descriptor being closed); ep_remove_waitqueue() holds an RCU read
+- * lock, so we can be sure it's done after calling synchronize_rcu().
++ * This is needed to avoid races between wake_up_pollfree() above and
++ * someone else removing the last entry from the queue for other reasons
++ * (e.g. ep_remove_wait_queue() being called due to an epoll file
++ * descriptor being closed). Such other users hold an RCU read lock, so
++ * we can be sure they're done after we call synchronize_rcu().
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL)
+ synchronize_rcu();
+@@ -4564,7 +4932,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp,
+ ret = -EBUSY;
+ goto out;
+ }
+- ret = security_binder_set_context_mgr(proc->tsk);
++ ret = security_binder_set_context_mgr(proc->cred);
+ if (ret < 0)
+ goto out;
+ if (uid_valid(context->binder_context_mgr_uid)) {
+@@ -5055,6 +5423,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
+ spin_lock_init(&proc->outer_lock);
+ get_task_struct(current->group_leader);
+ proc->tsk = current->group_leader;
++ proc->cred = get_cred(filp->f_cred);
+ INIT_LIST_HEAD(&proc->todo);
+ init_waitqueue_head(&proc->freeze_wait);
+ proc->default_priority = task_nice(current);
+@@ -5765,8 +6134,7 @@ static void print_binder_proc_stats(struct seq_file *m,
+ print_binder_stats(m, " ", &proc->stats);
+ }
+
+-
+-int binder_state_show(struct seq_file *m, void *unused)
++static int state_show(struct seq_file *m, void *unused)
+ {
+ struct binder_proc *proc;
+ struct binder_node *node;
+@@ -5805,7 +6173,7 @@ int binder_state_show(struct seq_file *m, void *unused)
+ return 0;
+ }
+
+-int binder_stats_show(struct seq_file *m, void *unused)
++static int stats_show(struct seq_file *m, void *unused)
+ {
+ struct binder_proc *proc;
+
+@@ -5821,7 +6189,7 @@ int binder_stats_show(struct seq_file *m, void *unused)
+ return 0;
+ }
+
+-int binder_transactions_show(struct seq_file *m, void *unused)
++static int transactions_show(struct seq_file *m, void *unused)
+ {
+ struct binder_proc *proc;
+
+@@ -5877,7 +6245,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
+ "\n" : " (incomplete)\n");
+ }
+
+-int binder_transaction_log_show(struct seq_file *m, void *unused)
++static int transaction_log_show(struct seq_file *m, void *unused)
+ {
+ struct binder_transaction_log *log = m->private;
+ unsigned int log_cur = atomic_read(&log->cur);
+@@ -5909,6 +6277,45 @@ const struct file_operations binder_fops = {
+ .release = binder_release,
+ };
+
++DEFINE_SHOW_ATTRIBUTE(state);
++DEFINE_SHOW_ATTRIBUTE(stats);
++DEFINE_SHOW_ATTRIBUTE(transactions);
++DEFINE_SHOW_ATTRIBUTE(transaction_log);
++
++const struct binder_debugfs_entry binder_debugfs_entries[] = {
++ {
++ .name = "state",
++ .mode = 0444,
++ .fops = &state_fops,
++ .data = NULL,
++ },
++ {
++ .name = "stats",
++ .mode = 0444,
++ .fops = &stats_fops,
++ .data = NULL,
++ },
++ {
++ .name = "transactions",
++ .mode = 0444,
++ .fops = &transactions_fops,
++ .data = NULL,
++ },
++ {
++ .name = "transaction_log",
++ .mode = 0444,
++ .fops = &transaction_log_fops,
++ .data = &binder_transaction_log,
++ },
++ {
++ .name = "failed_transaction_log",
++ .mode = 0444,
++ .fops = &transaction_log_fops,
++ .data = &binder_transaction_log_failed,
++ },
++ {} /* terminator */
++};
++
+ static int __init init_binder_device(const char *name)
+ {
+ int ret;
+@@ -5954,36 +6361,18 @@ static int __init binder_init(void)
+ atomic_set(&binder_transaction_log_failed.cur, ~0U);
+
+ binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL);
+- if (binder_debugfs_dir_entry_root)
++ if (binder_debugfs_dir_entry_root) {
++ const struct binder_debugfs_entry *db_entry;
++
++ binder_for_each_debugfs_entry(db_entry)
++ debugfs_create_file(db_entry->name,
++ db_entry->mode,
++ binder_debugfs_dir_entry_root,
++ db_entry->data,
++ db_entry->fops);
++
+ binder_debugfs_dir_entry_proc = debugfs_create_dir("proc",
+ binder_debugfs_dir_entry_root);
+-
+- if (binder_debugfs_dir_entry_root) {
+- debugfs_create_file("state",
+- 0444,
+- binder_debugfs_dir_entry_root,
+- NULL,
+- &binder_state_fops);
+- debugfs_create_file("stats",
+- 0444,
+- binder_debugfs_dir_entry_root,
+- NULL,
+- &binder_stats_fops);
+- debugfs_create_file("transactions",
+- 0444,
+- binder_debugfs_dir_entry_root,
+- NULL,
+- &binder_transactions_fops);
+- debugfs_create_file("transaction_log",
+- 0444,
+- binder_debugfs_dir_entry_root,
+- &binder_transaction_log,
+- &binder_transaction_log_fops);
+- debugfs_create_file("failed_transaction_log",
+- 0444,
+- binder_debugfs_dir_entry_root,
+- &binder_transaction_log_failed,
+- &binder_transaction_log_fops);
+ }
+
+ if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) &&
+@@ -6023,6 +6412,7 @@ err_init_binder_device_failed:
+
+ err_alloc_device_names_failed:
+ debugfs_remove_recursive(binder_debugfs_dir_entry_root);
++ binder_alloc_shrinker_exit();
+
+ return ret;
+ }
+diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
+index 340515f54498c..54cee2b31c8e5 100644
+--- a/drivers/android/binder_alloc.c
++++ b/drivers/android/binder_alloc.c
+@@ -212,7 +212,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
+ mm = alloc->vma_vm_mm;
+
+ if (mm) {
+- mmap_read_lock(mm);
++ mmap_write_lock(mm);
+ vma = alloc->vma;
+ }
+
+@@ -270,7 +270,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
+ trace_binder_alloc_page_end(alloc, index);
+ }
+ if (mm) {
+- mmap_read_unlock(mm);
++ mmap_write_unlock(mm);
+ mmput(mm);
+ }
+ return 0;
+@@ -303,39 +303,24 @@ err_page_ptr_cleared:
+ }
+ err_no_vma:
+ if (mm) {
+- mmap_read_unlock(mm);
++ mmap_write_unlock(mm);
+ mmput(mm);
+ }
+ return vma ? -ENOMEM : -ESRCH;
+ }
+
+-
+ static inline void binder_alloc_set_vma(struct binder_alloc *alloc,
+ struct vm_area_struct *vma)
+ {
+- if (vma)
+- alloc->vma_vm_mm = vma->vm_mm;
+- /*
+- * If we see alloc->vma is not NULL, buffer data structures set up
+- * completely. Look at smp_rmb side binder_alloc_get_vma.
+- * We also want to guarantee new alloc->vma_vm_mm is always visible
+- * if alloc->vma is set.
+- */
+- smp_wmb();
+- alloc->vma = vma;
++ /* pairs with smp_load_acquire in binder_alloc_get_vma() */
++ smp_store_release(&alloc->vma, vma);
+ }
+
+ static inline struct vm_area_struct *binder_alloc_get_vma(
+ struct binder_alloc *alloc)
+ {
+- struct vm_area_struct *vma = NULL;
+-
+- if (alloc->vma) {
+- /* Look at description in binder_alloc_set_vma */
+- smp_rmb();
+- vma = alloc->vma;
+- }
+- return vma;
++ /* pairs with smp_store_release in binder_alloc_set_vma() */
++ return smp_load_acquire(&alloc->vma);
+ }
+
+ static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
+@@ -398,6 +383,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
+ size_t size, data_offsets_size;
+ int ret;
+
++ /* Check binder_alloc is fully initialized */
+ if (!binder_alloc_get_vma(alloc)) {
+ binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
+ "%d: binder_alloc_buf, no vma\n",
+@@ -671,7 +657,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc,
+ BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size);
+
+ if (buffer->async_transaction) {
+- alloc->free_async_space += size + sizeof(struct binder_buffer);
++ alloc->free_async_space += buffer_size + sizeof(struct binder_buffer);
+
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
+ "%d: binder_free_buf size %zd async free %zd\n",
+@@ -754,6 +740,12 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
+ const char *failure_string;
+ struct binder_buffer *buffer;
+
++ if (unlikely(vma->vm_mm != alloc->vma_vm_mm)) {
++ ret = -EINVAL;
++ failure_string = "invalid vma->vm_mm";
++ goto err_invalid_mm;
++ }
++
+ mutex_lock(&binder_alloc_mmap_lock);
+ if (alloc->buffer_size) {
+ ret = -EBUSY;
+@@ -787,8 +779,9 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
+ buffer->free = 1;
+ binder_insert_free_buffer(alloc, buffer);
+ alloc->free_async_space = alloc->buffer_size / 2;
++
++ /* Signal binder_alloc is fully initialized */
+ binder_alloc_set_vma(alloc, vma);
+- mmgrab(alloc->vma_vm_mm);
+
+ return 0;
+
+@@ -801,6 +794,7 @@ err_alloc_pages_failed:
+ alloc->buffer_size = 0;
+ err_already_mapped:
+ mutex_unlock(&binder_alloc_mmap_lock);
++err_invalid_mm:
+ binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
+ "%s: %d %lx-%lx %s failed %d\n", __func__,
+ alloc->pid, vma->vm_start, vma->vm_end,
+@@ -1079,6 +1073,8 @@ static struct shrinker binder_shrinker = {
+ void binder_alloc_init(struct binder_alloc *alloc)
+ {
+ alloc->pid = current->group_leader->pid;
++ alloc->vma_vm_mm = current->mm;
++ mmgrab(alloc->vma_vm_mm);
+ mutex_init(&alloc->mutex);
+ INIT_LIST_HEAD(&alloc->buffers);
+ }
+@@ -1095,6 +1091,12 @@ int binder_alloc_shrinker_init(void)
+ return ret;
+ }
+
++void binder_alloc_shrinker_exit(void)
++{
++ unregister_shrinker(&binder_shrinker);
++ list_lru_destroy(&binder_alloc_lru);
++}
++
+ /**
+ * check_buffer() - verify that buffer/offset is safe to access
+ * @alloc: binder_alloc for this proc
+diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
+index 7dea57a84c79b..399f2b269f2c5 100644
+--- a/drivers/android/binder_alloc.h
++++ b/drivers/android/binder_alloc.h
+@@ -131,6 +131,7 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
+ int pid);
+ extern void binder_alloc_init(struct binder_alloc *alloc);
+ extern int binder_alloc_shrinker_init(void);
++extern void binder_alloc_shrinker_exit(void);
+ extern void binder_alloc_vma_close(struct binder_alloc *alloc);
+ extern struct binder_buffer *
+ binder_alloc_prepare_to_free(struct binder_alloc *alloc,
+diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
+index 402c4d4362a83..1ade9799c8d58 100644
+--- a/drivers/android/binder_internal.h
++++ b/drivers/android/binder_internal.h
+@@ -107,41 +107,19 @@ static inline int __init init_binderfs(void)
+ }
+ #endif
+
+-int binder_stats_show(struct seq_file *m, void *unused);
+-DEFINE_SHOW_ATTRIBUTE(binder_stats);
+-
+-int binder_state_show(struct seq_file *m, void *unused);
+-DEFINE_SHOW_ATTRIBUTE(binder_state);
+-
+-int binder_transactions_show(struct seq_file *m, void *unused);
+-DEFINE_SHOW_ATTRIBUTE(binder_transactions);
+-
+-int binder_transaction_log_show(struct seq_file *m, void *unused);
+-DEFINE_SHOW_ATTRIBUTE(binder_transaction_log);
+-
+-struct binder_transaction_log_entry {
+- int debug_id;
+- int debug_id_done;
+- int call_type;
+- int from_proc;
+- int from_thread;
+- int target_handle;
+- int to_proc;
+- int to_thread;
+- int to_node;
+- int data_size;
+- int offsets_size;
+- int return_error_line;
+- uint32_t return_error;
+- uint32_t return_error_param;
+- char context_name[BINDERFS_MAX_NAME + 1];
++struct binder_debugfs_entry {
++ const char *name;
++ umode_t mode;
++ const struct file_operations *fops;
++ void *data;
+ };
+
+-struct binder_transaction_log {
+- atomic_t cur;
+- bool full;
+- struct binder_transaction_log_entry entry[32];
+-};
++extern const struct binder_debugfs_entry binder_debugfs_entries[];
++
++#define binder_for_each_debugfs_entry(entry) \
++ for ((entry) = binder_debugfs_entries; \
++ (entry)->name; \
++ (entry)++)
+
+ enum binder_stat_types {
+ BINDER_STAT_PROC,
+@@ -364,6 +342,9 @@ struct binder_ref {
+ * (invariant after initialized)
+ * @tsk task_struct for group_leader of process
+ * (invariant after initialized)
++ * @cred struct cred associated with the `struct file`
++ * in binder_open()
++ * (invariant after initialized)
+ * @deferred_work_node: element for binder_deferred_list
+ * (protected by binder_deferred_lock)
+ * @deferred_work: bitmap of deferred work to perform
+@@ -426,6 +407,7 @@ struct binder_proc {
+ struct list_head waiting_threads;
+ int pid;
+ struct task_struct *tsk;
++ const struct cred *cred;
+ struct hlist_node deferred_work_node;
+ int deferred_work;
+ int outstanding_txns;
+@@ -571,6 +553,4 @@ struct binder_object {
+ };
+ };
+
+-extern struct binder_transaction_log binder_transaction_log;
+-extern struct binder_transaction_log binder_transaction_log_failed;
+ #endif /* _LINUX_BINDER_INTERNAL_H */
+diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
+index e3605cdd43357..6d717ed76766e 100644
+--- a/drivers/android/binderfs.c
++++ b/drivers/android/binderfs.c
+@@ -621,6 +621,7 @@ static int init_binder_features(struct super_block *sb)
+ static int init_binder_logs(struct super_block *sb)
+ {
+ struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir;
++ const struct binder_debugfs_entry *db_entry;
+ struct binderfs_info *info;
+ int ret = 0;
+
+@@ -631,43 +632,15 @@ static int init_binder_logs(struct super_block *sb)
+ goto out;
+ }
+
+- dentry = binderfs_create_file(binder_logs_root_dir, "stats",
+- &binder_stats_fops, NULL);
+- if (IS_ERR(dentry)) {
+- ret = PTR_ERR(dentry);
+- goto out;
+- }
+-
+- dentry = binderfs_create_file(binder_logs_root_dir, "state",
+- &binder_state_fops, NULL);
+- if (IS_ERR(dentry)) {
+- ret = PTR_ERR(dentry);
+- goto out;
+- }
+-
+- dentry = binderfs_create_file(binder_logs_root_dir, "transactions",
+- &binder_transactions_fops, NULL);
+- if (IS_ERR(dentry)) {
+- ret = PTR_ERR(dentry);
+- goto out;
+- }
+-
+- dentry = binderfs_create_file(binder_logs_root_dir,
+- "transaction_log",
+- &binder_transaction_log_fops,
+- &binder_transaction_log);
+- if (IS_ERR(dentry)) {
+- ret = PTR_ERR(dentry);
+- goto out;
+- }
+-
+- dentry = binderfs_create_file(binder_logs_root_dir,
+- "failed_transaction_log",
+- &binder_transaction_log_fops,
+- &binder_transaction_log_failed);
+- if (IS_ERR(dentry)) {
+- ret = PTR_ERR(dentry);
+- goto out;
++ binder_for_each_debugfs_entry(db_entry) {
++ dentry = binderfs_create_file(binder_logs_root_dir,
++ db_entry->name,
++ db_entry->fops,
++ db_entry->data);
++ if (IS_ERR(dentry)) {
++ ret = PTR_ERR(dentry);
++ goto out;
++ }
+ }
+
+ proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc");
+diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
+index 2a04e8abd3977..26e0eb537b4f5 100644
+--- a/drivers/ata/acard-ahci.c
++++ b/drivers/ata/acard-ahci.c
+@@ -267,7 +267,7 @@ static bool acard_ahci_qc_fill_rtf(struct ata_queued_cmd *qc)
+ if (qc->tf.protocol == ATA_PROT_PIO && qc->dma_dir == DMA_FROM_DEVICE &&
+ !(qc->flags & ATA_QCFLAG_FAILED)) {
+ ata_tf_from_fis(rx_fis + RX_FIS_PIO_SETUP, &qc->result_tf);
+- qc->result_tf.command = (rx_fis + RX_FIS_PIO_SETUP)[15];
++ qc->result_tf.status = (rx_fis + RX_FIS_PIO_SETUP)[15];
+ } else
+ ata_tf_from_fis(rx_fis + RX_FIS_D2H_REG, &qc->result_tf);
+
+diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
+index 186cbf90c8ead..149ee16fd0225 100644
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -83,6 +83,7 @@ enum board_ids {
+ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
+ static void ahci_remove_one(struct pci_dev *dev);
+ static void ahci_shutdown_one(struct pci_dev *dev);
++static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hpriv);
+ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
+ unsigned long deadline);
+ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
+@@ -442,6 +443,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
+ /* AMD */
+ { PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
+ { PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */
++ { PCI_VDEVICE(AMD, 0x7901), board_ahci_mobile }, /* AMD Green Sardine */
+ /* AMD is using RAID class only for ahci controllers */
+ { PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
+@@ -667,6 +669,25 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
+ ahci_save_initial_config(&pdev->dev, hpriv);
+ }
+
++static int ahci_pci_reset_controller(struct ata_host *host)
++{
++ struct pci_dev *pdev = to_pci_dev(host->dev);
++ struct ahci_host_priv *hpriv = host->private_data;
++ int rc;
++
++ rc = ahci_reset_controller(host);
++ if (rc)
++ return rc;
++
++ /*
++ * If platform firmware failed to enable ports, try to enable
++ * them here.
++ */
++ ahci_intel_pcs_quirk(pdev, hpriv);
++
++ return 0;
++}
++
+ static void ahci_pci_init_controller(struct ata_host *host)
+ {
+ struct ahci_host_priv *hpriv = host->private_data;
+@@ -734,7 +755,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class,
+
+ /* clear D2H reception area to properly wait for D2H FIS */
+ ata_tf_init(link->device, &tf);
+- tf.command = ATA_BUSY;
++ tf.status = ATA_BUSY;
+ ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+
+ rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context),
+@@ -805,7 +826,7 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
+
+ /* clear D2H reception area to properly wait for D2H FIS */
+ ata_tf_init(link->device, &tf);
+- tf.command = ATA_BUSY;
++ tf.status = ATA_BUSY;
+ ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+
+ rc = sata_link_hardreset(link, timing, deadline, &online,
+@@ -868,7 +889,7 @@ static int ahci_pci_device_runtime_resume(struct device *dev)
+ struct ata_host *host = pci_get_drvdata(pdev);
+ int rc;
+
+- rc = ahci_reset_controller(host);
++ rc = ahci_pci_reset_controller(host);
+ if (rc)
+ return rc;
+ ahci_pci_init_controller(host);
+@@ -903,7 +924,7 @@ static int ahci_pci_device_resume(struct device *dev)
+ ahci_mcp89_apple_enable(pdev);
+
+ if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
+- rc = ahci_reset_controller(host);
++ rc = ahci_pci_reset_controller(host);
+ if (rc)
+ return rc;
+
+@@ -1788,12 +1809,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ /* save initial config */
+ ahci_pci_save_initial_config(pdev, hpriv);
+
+- /*
+- * If platform firmware failed to enable ports, try to enable
+- * them here.
+- */
+- ahci_intel_pcs_quirk(pdev, hpriv);
+-
+ /* prepare host */
+ if (hpriv->cap & HOST_CAP_NCQ) {
+ pi.flags |= ATA_FLAG_NCQ;
+@@ -1903,7 +1918,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ if (rc)
+ return rc;
+
+- rc = ahci_reset_controller(host);
++ rc = ahci_pci_reset_controller(host);
+ if (rc)
+ return rc;
+
+diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
+index 2e89499bd9c3d..dcc2d92cf6b62 100644
+--- a/drivers/ata/ahci.h
++++ b/drivers/ata/ahci.h
+@@ -24,6 +24,7 @@
+ #include <linux/libata.h>
+ #include <linux/phy/phy.h>
+ #include <linux/regulator/consumer.h>
++#include <linux/bits.h>
+
+ /* Enclosure Management Control */
+ #define EM_CTRL_MSG_TYPE 0x000f0000
+@@ -54,12 +55,12 @@ enum {
+ AHCI_PORT_PRIV_FBS_DMA_SZ = AHCI_CMD_SLOT_SZ +
+ AHCI_CMD_TBL_AR_SZ +
+ (AHCI_RX_FIS_SZ * 16),
+- AHCI_IRQ_ON_SG = (1 << 31),
+- AHCI_CMD_ATAPI = (1 << 5),
+- AHCI_CMD_WRITE = (1 << 6),
+- AHCI_CMD_PREFETCH = (1 << 7),
+- AHCI_CMD_RESET = (1 << 8),
+- AHCI_CMD_CLR_BUSY = (1 << 10),
++ AHCI_IRQ_ON_SG = BIT(31),
++ AHCI_CMD_ATAPI = BIT(5),
++ AHCI_CMD_WRITE = BIT(6),
++ AHCI_CMD_PREFETCH = BIT(7),
++ AHCI_CMD_RESET = BIT(8),
++ AHCI_CMD_CLR_BUSY = BIT(10),
+
+ RX_FIS_PIO_SETUP = 0x20, /* offset of PIO Setup FIS data */
+ RX_FIS_D2H_REG = 0x40, /* offset of D2H Register FIS data */
+@@ -77,37 +78,37 @@ enum {
+ HOST_CAP2 = 0x24, /* host capabilities, extended */
+
+ /* HOST_CTL bits */
+- HOST_RESET = (1 << 0), /* reset controller; self-clear */
+- HOST_IRQ_EN = (1 << 1), /* global IRQ enable */
+- HOST_MRSM = (1 << 2), /* MSI Revert to Single Message */
+- HOST_AHCI_EN = (1 << 31), /* AHCI enabled */
++ HOST_RESET = BIT(0), /* reset controller; self-clear */
++ HOST_IRQ_EN = BIT(1), /* global IRQ enable */
++ HOST_MRSM = BIT(2), /* MSI Revert to Single Message */
++ HOST_AHCI_EN = BIT(31), /* AHCI enabled */
+
+ /* HOST_CAP bits */
+- HOST_CAP_SXS = (1 << 5), /* Supports External SATA */
+- HOST_CAP_EMS = (1 << 6), /* Enclosure Management support */
+- HOST_CAP_CCC = (1 << 7), /* Command Completion Coalescing */
+- HOST_CAP_PART = (1 << 13), /* Partial state capable */
+- HOST_CAP_SSC = (1 << 14), /* Slumber state capable */
+- HOST_CAP_PIO_MULTI = (1 << 15), /* PIO multiple DRQ support */
+- HOST_CAP_FBS = (1 << 16), /* FIS-based switching support */
+- HOST_CAP_PMP = (1 << 17), /* Port Multiplier support */
+- HOST_CAP_ONLY = (1 << 18), /* Supports AHCI mode only */
+- HOST_CAP_CLO = (1 << 24), /* Command List Override support */
+- HOST_CAP_LED = (1 << 25), /* Supports activity LED */
+- HOST_CAP_ALPM = (1 << 26), /* Aggressive Link PM support */
+- HOST_CAP_SSS = (1 << 27), /* Staggered Spin-up */
+- HOST_CAP_MPS = (1 << 28), /* Mechanical presence switch */
+- HOST_CAP_SNTF = (1 << 29), /* SNotification register */
+- HOST_CAP_NCQ = (1 << 30), /* Native Command Queueing */
+- HOST_CAP_64 = (1 << 31), /* PCI DAC (64-bit DMA) support */
++ HOST_CAP_SXS = BIT(5), /* Supports External SATA */
++ HOST_CAP_EMS = BIT(6), /* Enclosure Management support */
++ HOST_CAP_CCC = BIT(7), /* Command Completion Coalescing */
++ HOST_CAP_PART = BIT(13), /* Partial state capable */
++ HOST_CAP_SSC = BIT(14), /* Slumber state capable */
++ HOST_CAP_PIO_MULTI = BIT(15), /* PIO multiple DRQ support */
++ HOST_CAP_FBS = BIT(16), /* FIS-based switching support */
++ HOST_CAP_PMP = BIT(17), /* Port Multiplier support */
++ HOST_CAP_ONLY = BIT(18), /* Supports AHCI mode only */
++ HOST_CAP_CLO = BIT(24), /* Command List Override support */
++ HOST_CAP_LED = BIT(25), /* Supports activity LED */
++ HOST_CAP_ALPM = BIT(26), /* Aggressive Link PM support */
++ HOST_CAP_SSS = BIT(27), /* Staggered Spin-up */
++ HOST_CAP_MPS = BIT(28), /* Mechanical presence switch */
++ HOST_CAP_SNTF = BIT(29), /* SNotification register */
++ HOST_CAP_NCQ = BIT(30), /* Native Command Queueing */
++ HOST_CAP_64 = BIT(31), /* PCI DAC (64-bit DMA) support */
+
+ /* HOST_CAP2 bits */
+- HOST_CAP2_BOH = (1 << 0), /* BIOS/OS handoff supported */
+- HOST_CAP2_NVMHCI = (1 << 1), /* NVMHCI supported */
+- HOST_CAP2_APST = (1 << 2), /* Automatic partial to slumber */
+- HOST_CAP2_SDS = (1 << 3), /* Support device sleep */
+- HOST_CAP2_SADM = (1 << 4), /* Support aggressive DevSlp */
+- HOST_CAP2_DESO = (1 << 5), /* DevSlp from slumber only */
++ HOST_CAP2_BOH = BIT(0), /* BIOS/OS handoff supported */
++ HOST_CAP2_NVMHCI = BIT(1), /* NVMHCI supported */
++ HOST_CAP2_APST = BIT(2), /* Automatic partial to slumber */
++ HOST_CAP2_SDS = BIT(3), /* Support device sleep */
++ HOST_CAP2_SADM = BIT(4), /* Support aggressive DevSlp */
++ HOST_CAP2_DESO = BIT(5), /* DevSlp from slumber only */
+
+ /* registers for each SATA port */
+ PORT_LST_ADDR = 0x00, /* command list DMA addr */
+@@ -129,24 +130,24 @@ enum {
+ PORT_DEVSLP = 0x44, /* device sleep */
+
+ /* PORT_IRQ_{STAT,MASK} bits */
+- PORT_IRQ_COLD_PRES = (1 << 31), /* cold presence detect */
+- PORT_IRQ_TF_ERR = (1 << 30), /* task file error */
+- PORT_IRQ_HBUS_ERR = (1 << 29), /* host bus fatal error */
+- PORT_IRQ_HBUS_DATA_ERR = (1 << 28), /* host bus data error */
+- PORT_IRQ_IF_ERR = (1 << 27), /* interface fatal error */
+- PORT_IRQ_IF_NONFATAL = (1 << 26), /* interface non-fatal error */
+- PORT_IRQ_OVERFLOW = (1 << 24), /* xfer exhausted available S/G */
+- PORT_IRQ_BAD_PMP = (1 << 23), /* incorrect port multiplier */
+-
+- PORT_IRQ_PHYRDY = (1 << 22), /* PhyRdy changed */
+- PORT_IRQ_DEV_ILCK = (1 << 7), /* device interlock */
+- PORT_IRQ_CONNECT = (1 << 6), /* port connect change status */
+- PORT_IRQ_SG_DONE = (1 << 5), /* descriptor processed */
+- PORT_IRQ_UNK_FIS = (1 << 4), /* unknown FIS rx'd */
+- PORT_IRQ_SDB_FIS = (1 << 3), /* Set Device Bits FIS rx'd */
+- PORT_IRQ_DMAS_FIS = (1 << 2), /* DMA Setup FIS rx'd */
+- PORT_IRQ_PIOS_FIS = (1 << 1), /* PIO Setup FIS rx'd */
+- PORT_IRQ_D2H_REG_FIS = (1 << 0), /* D2H Register FIS rx'd */
++ PORT_IRQ_COLD_PRES = BIT(31), /* cold presence detect */
++ PORT_IRQ_TF_ERR = BIT(30), /* task file error */
++ PORT_IRQ_HBUS_ERR = BIT(29), /* host bus fatal error */
++ PORT_IRQ_HBUS_DATA_ERR = BIT(28), /* host bus data error */
++ PORT_IRQ_IF_ERR = BIT(27), /* interface fatal error */
++ PORT_IRQ_IF_NONFATAL = BIT(26), /* interface non-fatal error */
++ PORT_IRQ_OVERFLOW = BIT(24), /* xfer exhausted available S/G */
++ PORT_IRQ_BAD_PMP = BIT(23), /* incorrect port multiplier */
++
++ PORT_IRQ_PHYRDY = BIT(22), /* PhyRdy changed */
++ PORT_IRQ_DEV_ILCK = BIT(7), /* device interlock */
++ PORT_IRQ_CONNECT = BIT(6), /* port connect change status */
++ PORT_IRQ_SG_DONE = BIT(5), /* descriptor processed */
++ PORT_IRQ_UNK_FIS = BIT(4), /* unknown FIS rx'd */
++ PORT_IRQ_SDB_FIS = BIT(3), /* Set Device Bits FIS rx'd */
++ PORT_IRQ_DMAS_FIS = BIT(2), /* DMA Setup FIS rx'd */
++ PORT_IRQ_PIOS_FIS = BIT(1), /* PIO Setup FIS rx'd */
++ PORT_IRQ_D2H_REG_FIS = BIT(0), /* D2H Register FIS rx'd */
+
+ PORT_IRQ_FREEZE = PORT_IRQ_HBUS_ERR |
+ PORT_IRQ_IF_ERR |
+@@ -162,34 +163,34 @@ enum {
+ PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS,
+
+ /* PORT_CMD bits */
+- PORT_CMD_ASP = (1 << 27), /* Aggressive Slumber/Partial */
+- PORT_CMD_ALPE = (1 << 26), /* Aggressive Link PM enable */
+- PORT_CMD_ATAPI = (1 << 24), /* Device is ATAPI */
+- PORT_CMD_FBSCP = (1 << 22), /* FBS Capable Port */
+- PORT_CMD_ESP = (1 << 21), /* External Sata Port */
+- PORT_CMD_HPCP = (1 << 18), /* HotPlug Capable Port */
+- PORT_CMD_PMP = (1 << 17), /* PMP attached */
+- PORT_CMD_LIST_ON = (1 << 15), /* cmd list DMA engine running */
+- PORT_CMD_FIS_ON = (1 << 14), /* FIS DMA engine running */
+- PORT_CMD_FIS_RX = (1 << 4), /* Enable FIS receive DMA engine */
+- PORT_CMD_CLO = (1 << 3), /* Command list override */
+- PORT_CMD_POWER_ON = (1 << 2), /* Power up device */
+- PORT_CMD_SPIN_UP = (1 << 1), /* Spin up device */
+- PORT_CMD_START = (1 << 0), /* Enable port DMA engine */
+-
+- PORT_CMD_ICC_MASK = (0xf << 28), /* i/f ICC state mask */
+- PORT_CMD_ICC_ACTIVE = (0x1 << 28), /* Put i/f in active state */
+- PORT_CMD_ICC_PARTIAL = (0x2 << 28), /* Put i/f in partial state */
+- PORT_CMD_ICC_SLUMBER = (0x6 << 28), /* Put i/f in slumber state */
++ PORT_CMD_ASP = BIT(27), /* Aggressive Slumber/Partial */
++ PORT_CMD_ALPE = BIT(26), /* Aggressive Link PM enable */
++ PORT_CMD_ATAPI = BIT(24), /* Device is ATAPI */
++ PORT_CMD_FBSCP = BIT(22), /* FBS Capable Port */
++ PORT_CMD_ESP = BIT(21), /* External Sata Port */
++ PORT_CMD_HPCP = BIT(18), /* HotPlug Capable Port */
++ PORT_CMD_PMP = BIT(17), /* PMP attached */
++ PORT_CMD_LIST_ON = BIT(15), /* cmd list DMA engine running */
++ PORT_CMD_FIS_ON = BIT(14), /* FIS DMA engine running */
++ PORT_CMD_FIS_RX = BIT(4), /* Enable FIS receive DMA engine */
++ PORT_CMD_CLO = BIT(3), /* Command list override */
++ PORT_CMD_POWER_ON = BIT(2), /* Power up device */
++ PORT_CMD_SPIN_UP = BIT(1), /* Spin up device */
++ PORT_CMD_START = BIT(0), /* Enable port DMA engine */
++
++ PORT_CMD_ICC_MASK = (0xfu << 28), /* i/f ICC state mask */
++ PORT_CMD_ICC_ACTIVE = (0x1u << 28), /* Put i/f in active state */
++ PORT_CMD_ICC_PARTIAL = (0x2u << 28), /* Put i/f in partial state */
++ PORT_CMD_ICC_SLUMBER = (0x6u << 28), /* Put i/f in slumber state */
+
+ /* PORT_FBS bits */
+ PORT_FBS_DWE_OFFSET = 16, /* FBS device with error offset */
+ PORT_FBS_ADO_OFFSET = 12, /* FBS active dev optimization offset */
+ PORT_FBS_DEV_OFFSET = 8, /* FBS device to issue offset */
+ PORT_FBS_DEV_MASK = (0xf << PORT_FBS_DEV_OFFSET), /* FBS.DEV */
+- PORT_FBS_SDE = (1 << 2), /* FBS single device error */
+- PORT_FBS_DEC = (1 << 1), /* FBS device error clear */
+- PORT_FBS_EN = (1 << 0), /* Enable FBS */
++ PORT_FBS_SDE = BIT(2), /* FBS single device error */
++ PORT_FBS_DEC = BIT(1), /* FBS device error clear */
++ PORT_FBS_EN = BIT(0), /* Enable FBS */
+
+ /* PORT_DEVSLP bits */
+ PORT_DEVSLP_DM_OFFSET = 25, /* DITO multiplier offset */
+@@ -197,52 +198,52 @@ enum {
+ PORT_DEVSLP_DITO_OFFSET = 15, /* DITO offset */
+ PORT_DEVSLP_MDAT_OFFSET = 10, /* Minimum assertion time */
+ PORT_DEVSLP_DETO_OFFSET = 2, /* DevSlp exit timeout */
+- PORT_DEVSLP_DSP = (1 << 1), /* DevSlp present */
+- PORT_DEVSLP_ADSE = (1 << 0), /* Aggressive DevSlp enable */
++ PORT_DEVSLP_DSP = BIT(1), /* DevSlp present */
++ PORT_DEVSLP_ADSE = BIT(0), /* Aggressive DevSlp enable */
+
+ /* hpriv->flags bits */
+
+ #define AHCI_HFLAGS(flags) .private_data = (void *)(flags)
+
+- AHCI_HFLAG_NO_NCQ = (1 << 0),
+- AHCI_HFLAG_IGN_IRQ_IF_ERR = (1 << 1), /* ignore IRQ_IF_ERR */
+- AHCI_HFLAG_IGN_SERR_INTERNAL = (1 << 2), /* ignore SERR_INTERNAL */
+- AHCI_HFLAG_32BIT_ONLY = (1 << 3), /* force 32bit */
+- AHCI_HFLAG_MV_PATA = (1 << 4), /* PATA port */
+- AHCI_HFLAG_NO_MSI = (1 << 5), /* no PCI MSI */
+- AHCI_HFLAG_NO_PMP = (1 << 6), /* no PMP */
+- AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */
+- AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */
+- AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */
+- AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as
+- link offline */
+- AHCI_HFLAG_NO_SNTF = (1 << 12), /* no sntf */
+- AHCI_HFLAG_NO_FPDMA_AA = (1 << 13), /* no FPDMA AA */
+- AHCI_HFLAG_YES_FBS = (1 << 14), /* force FBS cap on */
+- AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on
+- port start (wait until
+- error-handling stage) */
+- AHCI_HFLAG_NO_DEVSLP = (1 << 17), /* no device sleep */
+- AHCI_HFLAG_NO_FBS = (1 << 18), /* no FBS */
++ AHCI_HFLAG_NO_NCQ = BIT(0),
++ AHCI_HFLAG_IGN_IRQ_IF_ERR = BIT(1), /* ignore IRQ_IF_ERR */
++ AHCI_HFLAG_IGN_SERR_INTERNAL = BIT(2), /* ignore SERR_INTERNAL */
++ AHCI_HFLAG_32BIT_ONLY = BIT(3), /* force 32bit */
++ AHCI_HFLAG_MV_PATA = BIT(4), /* PATA port */
++ AHCI_HFLAG_NO_MSI = BIT(5), /* no PCI MSI */
++ AHCI_HFLAG_NO_PMP = BIT(6), /* no PMP */
++ AHCI_HFLAG_SECT255 = BIT(8), /* max 255 sectors */
++ AHCI_HFLAG_YES_NCQ = BIT(9), /* force NCQ cap on */
++ AHCI_HFLAG_NO_SUSPEND = BIT(10), /* don't suspend */
++ AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = BIT(11), /* treat SRST timeout as
++ link offline */
++ AHCI_HFLAG_NO_SNTF = BIT(12), /* no sntf */
++ AHCI_HFLAG_NO_FPDMA_AA = BIT(13), /* no FPDMA AA */
++ AHCI_HFLAG_YES_FBS = BIT(14), /* force FBS cap on */
++ AHCI_HFLAG_DELAY_ENGINE = BIT(15), /* do not start engine on
++ port start (wait until
++ error-handling stage) */
++ AHCI_HFLAG_NO_DEVSLP = BIT(17), /* no device sleep */
++ AHCI_HFLAG_NO_FBS = BIT(18), /* no FBS */
+
+ #ifdef CONFIG_PCI_MSI
+- AHCI_HFLAG_MULTI_MSI = (1 << 20), /* per-port MSI(-X) */
++ AHCI_HFLAG_MULTI_MSI = BIT(20), /* per-port MSI(-X) */
+ #else
+ /* compile out MSI infrastructure */
+ AHCI_HFLAG_MULTI_MSI = 0,
+ #endif
+- AHCI_HFLAG_WAKE_BEFORE_STOP = (1 << 22), /* wake before DMA stop */
+- AHCI_HFLAG_YES_ALPM = (1 << 23), /* force ALPM cap on */
+- AHCI_HFLAG_NO_WRITE_TO_RO = (1 << 24), /* don't write to read
+- only registers */
+- AHCI_HFLAG_IS_MOBILE = (1 << 25), /* mobile chipset, use
+- SATA_MOBILE_LPM_POLICY
+- as default lpm_policy */
+- AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during
+- suspend/resume */
+- AHCI_HFLAG_IGN_NOTSUPP_POWER_ON = (1 << 27), /* ignore -EOPNOTSUPP
+- from phy_power_on() */
+- AHCI_HFLAG_NO_SXS = (1 << 28), /* SXS not supported */
++ AHCI_HFLAG_WAKE_BEFORE_STOP = BIT(22), /* wake before DMA stop */
++ AHCI_HFLAG_YES_ALPM = BIT(23), /* force ALPM cap on */
++ AHCI_HFLAG_NO_WRITE_TO_RO = BIT(24), /* don't write to read
++ only registers */
++ AHCI_HFLAG_IS_MOBILE = BIT(25), /* mobile chipset, use
++ SATA_MOBILE_LPM_POLICY
++ as default lpm_policy */
++ AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during
++ suspend/resume */
++ AHCI_HFLAG_IGN_NOTSUPP_POWER_ON = BIT(27), /* ignore -EOPNOTSUPP
++ from phy_power_on() */
++ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */
+
+ /* ap->flags bits */
+
+@@ -254,26 +255,26 @@ enum {
+ PCS_7 = 0x94, /* 7+ port PCS (Denverton) */
+
+ /* em constants */
+- EM_MAX_SLOTS = 8,
++ EM_MAX_SLOTS = SATA_PMP_MAX_PORTS,
+ EM_MAX_RETRY = 5,
+
+ /* em_ctl bits */
+- EM_CTL_RST = (1 << 9), /* Reset */
+- EM_CTL_TM = (1 << 8), /* Transmit Message */
+- EM_CTL_MR = (1 << 0), /* Message Received */
+- EM_CTL_ALHD = (1 << 26), /* Activity LED */
+- EM_CTL_XMT = (1 << 25), /* Transmit Only */
+- EM_CTL_SMB = (1 << 24), /* Single Message Buffer */
+- EM_CTL_SGPIO = (1 << 19), /* SGPIO messages supported */
+- EM_CTL_SES = (1 << 18), /* SES-2 messages supported */
+- EM_CTL_SAFTE = (1 << 17), /* SAF-TE messages supported */
+- EM_CTL_LED = (1 << 16), /* LED messages supported */
++ EM_CTL_RST = BIT(9), /* Reset */
++ EM_CTL_TM = BIT(8), /* Transmit Message */
++ EM_CTL_MR = BIT(0), /* Message Received */
++ EM_CTL_ALHD = BIT(26), /* Activity LED */
++ EM_CTL_XMT = BIT(25), /* Transmit Only */
++ EM_CTL_SMB = BIT(24), /* Single Message Buffer */
++ EM_CTL_SGPIO = BIT(19), /* SGPIO messages supported */
++ EM_CTL_SES = BIT(18), /* SES-2 messages supported */
++ EM_CTL_SAFTE = BIT(17), /* SAF-TE messages supported */
++ EM_CTL_LED = BIT(16), /* LED messages supported */
+
+ /* em message type */
+- EM_MSG_TYPE_LED = (1 << 0), /* LED */
+- EM_MSG_TYPE_SAFTE = (1 << 1), /* SAF-TE */
+- EM_MSG_TYPE_SES2 = (1 << 2), /* SES-2 */
+- EM_MSG_TYPE_SGPIO = (1 << 3), /* SGPIO */
++ EM_MSG_TYPE_LED = BIT(0), /* LED */
++ EM_MSG_TYPE_SAFTE = BIT(1), /* SAF-TE */
++ EM_MSG_TYPE_SES2 = BIT(2), /* SES-2 */
++ EM_MSG_TYPE_SGPIO = BIT(3), /* SGPIO */
+ };
+
+ struct ahci_cmd_hdr {
+diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
+index 388baf528fa81..189f75d537414 100644
+--- a/drivers/ata/ahci_imx.c
++++ b/drivers/ata/ahci_imx.c
+@@ -1230,4 +1230,4 @@ module_platform_driver(imx_ahci_driver);
+ MODULE_DESCRIPTION("Freescale i.MX AHCI SATA platform driver");
+ MODULE_AUTHOR("Richard Zhu <Hong-Xing.Zhu@freescale.com>");
+ MODULE_LICENSE("GPL");
+-MODULE_ALIAS("ahci:imx");
++MODULE_ALIAS("platform:" DRV_NAME);
+diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
+index 5b46fc9aeb4a0..e5ac3d1c214c0 100644
+--- a/drivers/ata/ahci_qoriq.c
++++ b/drivers/ata/ahci_qoriq.c
+@@ -125,7 +125,7 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
+
+ /* clear D2H reception area to properly wait for D2H FIS */
+ ata_tf_init(link->device, &tf);
+- tf.command = ATA_BUSY;
++ tf.status = ATA_BUSY;
+ ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+
+ rc = sata_link_hardreset(link, timing, deadline, &online,
+diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
+index dffc432b9d54a..292099410cf68 100644
+--- a/drivers/ata/ahci_xgene.c
++++ b/drivers/ata/ahci_xgene.c
+@@ -365,7 +365,7 @@ static int xgene_ahci_do_hardreset(struct ata_link *link,
+ do {
+ /* clear D2H reception area to properly wait for D2H FIS */
+ ata_tf_init(link->device, &tf);
+- tf.command = ATA_BUSY;
++ tf.status = ATA_BUSY;
+ ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+ rc = sata_link_hardreset(link, timing, deadline, online,
+ ahci_check_ready);
+diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
+index 5b3fa2cbe7223..192115a45dd78 100644
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -1552,7 +1552,7 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
+
+ /* clear D2H reception area to properly wait for D2H FIS */
+ ata_tf_init(link->device, &tf);
+- tf.command = ATA_BUSY;
++ tf.status = ATA_BUSY;
+ ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+
+ rc = sata_link_hardreset(link, timing, deadline, online,
+@@ -2038,7 +2038,7 @@ static bool ahci_qc_fill_rtf(struct ata_queued_cmd *qc)
+ if (qc->tf.protocol == ATA_PROT_PIO && qc->dma_dir == DMA_FROM_DEVICE &&
+ !(qc->flags & ATA_QCFLAG_FAILED)) {
+ ata_tf_from_fis(rx_fis + RX_FIS_PIO_SETUP, &qc->result_tf);
+- qc->result_tf.command = (rx_fis + RX_FIS_PIO_SETUP)[15];
++ qc->result_tf.status = (rx_fis + RX_FIS_PIO_SETUP)[15];
+ } else
+ ata_tf_from_fis(rx_fis + RX_FIS_D2H_REG, &qc->result_tf);
+
+@@ -2305,6 +2305,18 @@ int ahci_port_resume(struct ata_port *ap)
+ EXPORT_SYMBOL_GPL(ahci_port_resume);
+
+ #ifdef CONFIG_PM
++static void ahci_handle_s2idle(struct ata_port *ap)
++{
++ void __iomem *port_mmio = ahci_port_base(ap);
++ u32 devslp;
++
++ if (pm_suspend_via_firmware())
++ return;
++ devslp = readl(port_mmio + PORT_DEVSLP);
++ if ((devslp & PORT_DEVSLP_ADSE))
++ ata_msleep(ap, devslp_idle_timeout);
++}
++
+ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg)
+ {
+ const char *emsg = NULL;
+@@ -2318,6 +2330,9 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg)
+ ata_port_freeze(ap);
+ }
+
++ if (acpi_storage_d3(ap->host->dev))
++ ahci_handle_s2idle(ap);
++
+ ahci_rpm_put_port(ap);
+ return rc;
+ }
+diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
+index 0910441321f72..64d6da0a53035 100644
+--- a/drivers/ata/libahci_platform.c
++++ b/drivers/ata/libahci_platform.c
+@@ -451,14 +451,24 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
+ }
+ }
+
+- hpriv->nports = child_nodes = of_get_child_count(dev->of_node);
++ /*
++ * Too many sub-nodes most likely means having something wrong with
++ * the firmware.
++ */
++ child_nodes = of_get_child_count(dev->of_node);
++ if (child_nodes > AHCI_MAX_PORTS) {
++ rc = -EINVAL;
++ goto err_out;
++ }
+
+ /*
+ * If no sub-node was found, we still need to set nports to
+ * one in order to be able to use the
+ * ahci_platform_[en|dis]able_[phys|regulators] functions.
+ */
+- if (!child_nodes)
++ if (child_nodes)
++ hpriv->nports = child_nodes;
++ else
+ hpriv->nports = 1;
+
+ hpriv->phys = devm_kcalloc(dev, hpriv->nports, sizeof(*hpriv->phys), GFP_KERNEL);
+diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
+index 7a7d6642edcc5..d15f3e908ea4a 100644
+--- a/drivers/ata/libata-acpi.c
++++ b/drivers/ata/libata-acpi.c
+@@ -554,13 +554,13 @@ static void ata_acpi_gtf_to_tf(struct ata_device *dev,
+
+ tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+ tf->protocol = ATA_PROT_NODATA;
+- tf->feature = gtf->tf[0]; /* 0x1f1 */
++ tf->error = gtf->tf[0]; /* 0x1f1 */
+ tf->nsect = gtf->tf[1]; /* 0x1f2 */
+ tf->lbal = gtf->tf[2]; /* 0x1f3 */
+ tf->lbam = gtf->tf[3]; /* 0x1f4 */
+ tf->lbah = gtf->tf[4]; /* 0x1f5 */
+ tf->device = gtf->tf[5]; /* 0x1f6 */
+- tf->command = gtf->tf[6]; /* 0x1f7 */
++ tf->status = gtf->tf[6]; /* 0x1f7 */
+ }
+
+ static int ata_acpi_filter_tf(struct ata_device *dev,
+@@ -650,9 +650,7 @@ static int ata_acpi_run_tf(struct ata_device *dev,
+ struct ata_taskfile *pptf = NULL;
+ struct ata_taskfile tf, ptf, rtf;
+ unsigned int err_mask;
+- const char *level;
+ const char *descr;
+- char msg[60];
+ int rc;
+
+ if ((gtf->tf[0] == 0) && (gtf->tf[1] == 0) && (gtf->tf[2] == 0)
+@@ -666,6 +664,10 @@ static int ata_acpi_run_tf(struct ata_device *dev,
+ pptf = &ptf;
+ }
+
++ descr = ata_get_cmd_descript(tf.command);
++ if (!descr)
++ descr = "unknown";
++
+ if (!ata_acpi_filter_tf(dev, &tf, pptf)) {
+ rtf = tf;
+ err_mask = ata_exec_internal(dev, &rtf, NULL,
+@@ -673,40 +675,42 @@ static int ata_acpi_run_tf(struct ata_device *dev,
+
+ switch (err_mask) {
+ case 0:
+- level = KERN_DEBUG;
+- snprintf(msg, sizeof(msg), "succeeded");
++ ata_dev_dbg(dev,
++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
++ "(%s) succeeded\n",
++ tf.command, tf.feature, tf.nsect, tf.lbal,
++ tf.lbam, tf.lbah, tf.device, descr);
+ rc = 1;
+ break;
+
+ case AC_ERR_DEV:
+- level = KERN_INFO;
+- snprintf(msg, sizeof(msg),
+- "rejected by device (Stat=0x%02x Err=0x%02x)",
+- rtf.command, rtf.feature);
++ ata_dev_info(dev,
++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
++ "(%s) rejected by device (Stat=0x%02x Err=0x%02x)",
++ tf.command, tf.feature, tf.nsect, tf.lbal,
++ tf.lbam, tf.lbah, tf.device, descr,
++ rtf.status, rtf.error);
+ rc = 0;
+ break;
+
+ default:
+- level = KERN_ERR;
+- snprintf(msg, sizeof(msg),
+- "failed (Emask=0x%x Stat=0x%02x Err=0x%02x)",
+- err_mask, rtf.command, rtf.feature);
++ ata_dev_err(dev,
++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
++ "(%s) failed (Emask=0x%x Stat=0x%02x Err=0x%02x)",
++ tf.command, tf.feature, tf.nsect, tf.lbal,
++ tf.lbam, tf.lbah, tf.device, descr,
++ err_mask, rtf.status, rtf.error);
+ rc = -EIO;
+ break;
+ }
+ } else {
+- level = KERN_INFO;
+- snprintf(msg, sizeof(msg), "filtered out");
++ ata_dev_info(dev,
++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x"
++ "(%s) filtered out\n",
++ tf.command, tf.feature, tf.nsect, tf.lbal,
++ tf.lbam, tf.lbah, tf.device, descr);
+ rc = 0;
+ }
+- descr = ata_get_cmd_descript(tf.command);
+-
+- ata_dev_printk(dev, level,
+- "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x (%s) %s\n",
+- tf.command, tf.feature, tf.nsect, tf.lbal,
+- tf.lbam, tf.lbah, tf.device,
+- (descr ? descr : "unknown"), msg);
+-
+ return rc;
+ }
+
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index eed65311b5d1d..025260b80a94c 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -1185,7 +1185,7 @@ static int ata_read_native_max_address(struct ata_device *dev, u64 *max_sectors)
+ ata_dev_warn(dev,
+ "failed to read native max address (err_mask=0x%x)\n",
+ err_mask);
+- if (err_mask == AC_ERR_DEV && (tf.feature & ATA_ABORTED))
++ if (err_mask == AC_ERR_DEV && (tf.error & ATA_ABORTED))
+ return -EACCES;
+ return -EIO;
+ }
+@@ -1249,7 +1249,7 @@ static int ata_set_max_sectors(struct ata_device *dev, u64 new_sectors)
+ "failed to set max address (err_mask=0x%x)\n",
+ err_mask);
+ if (err_mask == AC_ERR_DEV &&
+- (tf.feature & (ATA_ABORTED | ATA_IDNF)))
++ (tf.error & (ATA_ABORTED | ATA_IDNF)))
+ return -EACCES;
+ return -EIO;
+ }
+@@ -1616,7 +1616,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
+
+ /* perform minimal error analysis */
+ if (qc->flags & ATA_QCFLAG_FAILED) {
+- if (qc->result_tf.command & (ATA_ERR | ATA_DF))
++ if (qc->result_tf.status & (ATA_ERR | ATA_DF))
+ qc->err_mask |= AC_ERR_DEV;
+
+ if (!qc->err_mask)
+@@ -1625,7 +1625,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
+ if (qc->err_mask & ~AC_ERR_OTHER)
+ qc->err_mask &= ~AC_ERR_OTHER;
+ } else if (qc->tf.command == ATA_CMD_REQ_SENSE_DATA) {
+- qc->result_tf.command |= ATA_SENSE;
++ qc->result_tf.status |= ATA_SENSE;
+ }
+
+ /* finish up */
+@@ -1848,7 +1848,7 @@ retry:
+ return 0;
+ }
+
+- if ((err_mask == AC_ERR_DEV) && (tf.feature & ATA_ABORTED)) {
++ if ((err_mask == AC_ERR_DEV) && (tf.error & ATA_ABORTED)) {
+ /* Device or controller might have reported
+ * the wrong device class. Give a shot at the
+ * other IDENTIFY if the current one is
+@@ -2007,7 +2007,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
+
+ retry:
+ ata_tf_init(dev, &tf);
+- if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) &&
++ if (ata_dma_enabled(dev) && ata_id_has_read_log_dma_ext(dev->id) &&
+ !(dev->horkage & ATA_HORKAGE_NO_DMA_LOG)) {
+ tf.command = ATA_CMD_READ_LOG_DMA_EXT;
+ tf.protocol = ATA_PROT_DMA;
+@@ -2031,8 +2031,9 @@ retry:
+ dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
+ goto retry;
+ }
+- ata_dev_err(dev, "Read log page 0x%02x failed, Emask 0x%x\n",
+- (unsigned int)page, err_mask);
++ ata_dev_err(dev,
++ "Read log 0x%02x page 0x%02x failed, Emask 0x%x\n",
++ (unsigned int)log, (unsigned int)page, err_mask);
+ }
+
+ return err_mask;
+@@ -2166,6 +2167,9 @@ static void ata_dev_config_ncq_prio(struct ata_device *dev)
+ struct ata_port *ap = dev->link->ap;
+ unsigned int err_mask;
+
++ if (!ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
++ return;
++
+ err_mask = ata_read_log_page(dev,
+ ATA_LOG_IDENTIFY_DEVICE,
+ ATA_LOG_SATA_SETTINGS,
+@@ -2442,7 +2446,8 @@ static void ata_dev_config_devslp(struct ata_device *dev)
+ * Check device sleep capability. Get DevSlp timing variables
+ * from SATA Settings page of Identify Device Data Log.
+ */
+- if (!ata_id_has_devslp(dev->id))
++ if (!ata_id_has_devslp(dev->id) ||
++ !ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
+ return;
+
+ err_mask = ata_read_log_page(dev,
+@@ -3071,7 +3076,7 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit)
+ */
+ if (spd > 1)
+ mask &= (1 << (spd - 1)) - 1;
+- else
++ else if (link->sata_spd)
+ return -EINVAL;
+
+ /* were we already at the bottom? */
+@@ -3851,6 +3856,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
+ { "VRFDFC22048UCHC-TE*", NULL, ATA_HORKAGE_NODMA },
+ /* Odd clown on sil3726/4726 PMPs */
+ { "Config Disk", NULL, ATA_HORKAGE_DISABLE },
++ /* Similar story with ASMedia 1092 */
++ { "ASMT109x- Config", NULL, ATA_HORKAGE_DISABLE },
+
+ /* Weird ATAPI devices */
+ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 },
+@@ -3954,6 +3961,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
+ { "PIONEER DVD-RW DVR-212D", NULL, ATA_HORKAGE_NOSETXFER },
+ { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER },
+
++ /* These specific Pioneer models have LPM issues */
++ { "PIONEER BD-RW BDR-207M", NULL, ATA_HORKAGE_NOLPM },
++ { "PIONEER BD-RW BDR-205", NULL, ATA_HORKAGE_NOLPM },
++
+ /* Crucial BX100 SSD 500GB has broken LPM support */
+ { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM },
+
+@@ -3992,6 +4003,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
+ ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+ ATA_HORKAGE_ZERO_AFTER_TRIM, },
++ { "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
++ ATA_HORKAGE_NO_DMA_LOG |
++ ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
+ ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
+@@ -4007,6 +4021,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
+
+ /* devices that don't properly handle TRIM commands */
+ { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, },
++ { "M88V29*", NULL, ATA_HORKAGE_NOTRIM, },
+
+ /*
+ * As defined, the DRAT (Deterministic Read After Trim) and RZAT
+@@ -4356,7 +4371,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev,
+ /* A clean abort indicates an original or just out of spec drive
+ and we should continue as we issue the setup based on the
+ drive reported working geometry */
+- if (err_mask == AC_ERR_DEV && (tf.feature & ATA_ABORTED))
++ if (err_mask == AC_ERR_DEV && (tf.error & ATA_ABORTED))
+ err_mask = 0;
+
+ DPRINTK("EXIT, err_mask=%x\n", err_mask);
+@@ -5489,7 +5504,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev,
+ const struct ata_port_info * const * ppi,
+ int n_ports)
+ {
+- const struct ata_port_info *pi;
++ const struct ata_port_info *pi = &ata_dummy_port_info;
+ struct ata_host *host;
+ int i, j;
+
+@@ -5497,7 +5512,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev,
+ if (!host)
+ return NULL;
+
+- for (i = 0, j = 0, pi = NULL; i < host->n_ports; i++) {
++ for (i = 0, j = 0; i < host->n_ports; i++) {
+ struct ata_port *ap = host->ports[i];
+
+ if (ppi[j])
+@@ -6482,67 +6497,6 @@ const struct ata_port_info ata_dummy_port_info = {
+ };
+ EXPORT_SYMBOL_GPL(ata_dummy_port_info);
+
+-/*
+- * Utility print functions
+- */
+-void ata_port_printk(const struct ata_port *ap, const char *level,
+- const char *fmt, ...)
+-{
+- struct va_format vaf;
+- va_list args;
+-
+- va_start(args, fmt);
+-
+- vaf.fmt = fmt;
+- vaf.va = &args;
+-
+- printk("%sata%u: %pV", level, ap->print_id, &vaf);
+-
+- va_end(args);
+-}
+-EXPORT_SYMBOL(ata_port_printk);
+-
+-void ata_link_printk(const struct ata_link *link, const char *level,
+- const char *fmt, ...)
+-{
+- struct va_format vaf;
+- va_list args;
+-
+- va_start(args, fmt);
+-
+- vaf.fmt = fmt;
+- vaf.va = &args;
+-
+- if (sata_pmp_attached(link->ap) || link->ap->slave_link)
+- printk("%sata%u.%02u: %pV",
+- level, link->ap->print_id, link->pmp, &vaf);
+- else
+- printk("%sata%u: %pV",
+- level, link->ap->print_id, &vaf);
+-
+- va_end(args);
+-}
+-EXPORT_SYMBOL(ata_link_printk);
+-
+-void ata_dev_printk(const struct ata_device *dev, const char *level,
+- const char *fmt, ...)
+-{
+- struct va_format vaf;
+- va_list args;
+-
+- va_start(args, fmt);
+-
+- vaf.fmt = fmt;
+- vaf.va = &args;
+-
+- printk("%sata%u.%02u: %pV",
+- level, dev->link->ap->print_id, dev->link->pmp + dev->devno,
+- &vaf);
+-
+- va_end(args);
+-}
+-EXPORT_SYMBOL(ata_dev_printk);
+-
+ void ata_print_version(const struct device *dev, const char *version)
+ {
+ dev_printk(KERN_DEBUG, dev, "version %s\n", version);
+diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
+index bf9c4b6c5c3d4..8350abc172908 100644
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -93,6 +93,12 @@ static const unsigned long ata_eh_identify_timeouts[] = {
+ ULONG_MAX,
+ };
+
++static const unsigned long ata_eh_revalidate_timeouts[] = {
++ 15000, /* Some drives are slow to read log pages when waking-up */
++ 15000, /* combined time till here is enough even for media access */
++ ULONG_MAX,
++};
++
+ static const unsigned long ata_eh_flush_timeouts[] = {
+ 15000, /* be generous with flush */
+ 15000, /* ditto */
+@@ -129,6 +135,8 @@ static const struct ata_eh_cmd_timeout_ent
+ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
+ { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
+ .timeouts = ata_eh_identify_timeouts, },
++ { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT),
++ .timeouts = ata_eh_revalidate_timeouts, },
+ { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
+ .timeouts = ata_eh_other_timeouts, },
+ { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
+@@ -1378,7 +1386,7 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
+
+ err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
+ if (err_mask == AC_ERR_DEV)
+- *r_sense_key = tf.feature >> 4;
++ *r_sense_key = tf.error >> 4;
+ return err_mask;
+ }
+
+@@ -1423,12 +1431,12 @@ static void ata_eh_request_sense(struct ata_queued_cmd *qc,
+
+ err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+ /* Ignore err_mask; ATA_ERR might be set */
+- if (tf.command & ATA_SENSE) {
++ if (tf.status & ATA_SENSE) {
+ ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal);
+ qc->flags |= ATA_QCFLAG_SENSE_VALID;
+ } else {
+ ata_dev_warn(dev, "request sense failed stat %02x emask %x\n",
+- tf.command, err_mask);
++ tf.status, err_mask);
+ }
+ }
+
+@@ -1553,7 +1561,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
+ const struct ata_taskfile *tf)
+ {
+ unsigned int tmp, action = 0;
+- u8 stat = tf->command, err = tf->feature;
++ u8 stat = tf->status, err = tf->error;
+
+ if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
+ qc->err_mask |= AC_ERR_HSM;
+@@ -1590,7 +1598,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
+ if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
+ tmp = atapi_eh_request_sense(qc->dev,
+ qc->scsicmd->sense_buffer,
+- qc->result_tf.feature >> 4);
++ qc->result_tf.error >> 4);
+ if (!tmp)
+ qc->flags |= ATA_QCFLAG_SENSE_VALID;
+ else
+@@ -2122,6 +2130,7 @@ const char *ata_get_cmd_descript(u8 command)
+ { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
+ { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" },
+ { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" },
++ { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" },
+ { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" },
+ { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" },
+ { ATA_CMD_PIO_READ, "READ SECTOR(S)" },
+@@ -2363,7 +2372,7 @@ static void ata_eh_link_report(struct ata_link *link)
+ cmd->hob_feature, cmd->hob_nsect,
+ cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
+ cmd->device, qc->tag, data_buf, cdb_buf,
+- res->command, res->feature, res->nsect,
++ res->status, res->error, res->nsect,
+ res->lbal, res->lbam, res->lbah,
+ res->hob_feature, res->hob_nsect,
+ res->hob_lbal, res->hob_lbam, res->hob_lbah,
+@@ -2371,28 +2380,28 @@ static void ata_eh_link_report(struct ata_link *link)
+ qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
+
+ #ifdef CONFIG_ATA_VERBOSE_ERROR
+- if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
+- ATA_SENSE | ATA_ERR)) {
+- if (res->command & ATA_BUSY)
++ if (res->status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
++ ATA_SENSE | ATA_ERR)) {
++ if (res->status & ATA_BUSY)
+ ata_dev_err(qc->dev, "status: { Busy }\n");
+ else
+ ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n",
+- res->command & ATA_DRDY ? "DRDY " : "",
+- res->command & ATA_DF ? "DF " : "",
+- res->command & ATA_DRQ ? "DRQ " : "",
+- res->command & ATA_SENSE ? "SENSE " : "",
+- res->command & ATA_ERR ? "ERR " : "");
++ res->status & ATA_DRDY ? "DRDY " : "",
++ res->status & ATA_DF ? "DF " : "",
++ res->status & ATA_DRQ ? "DRQ " : "",
++ res->status & ATA_SENSE ? "SENSE " : "",
++ res->status & ATA_ERR ? "ERR " : "");
+ }
+
+ if (cmd->command != ATA_CMD_PACKET &&
+- (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF |
+- ATA_IDNF | ATA_ABORTED)))
++ (res->error & (ATA_ICRC | ATA_UNC | ATA_AMNF | ATA_IDNF |
++ ATA_ABORTED)))
+ ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n",
+- res->feature & ATA_ICRC ? "ICRC " : "",
+- res->feature & ATA_UNC ? "UNC " : "",
+- res->feature & ATA_AMNF ? "AMNF " : "",
+- res->feature & ATA_IDNF ? "IDNF " : "",
+- res->feature & ATA_ABORTED ? "ABRT " : "");
++ res->error & ATA_ICRC ? "ICRC " : "",
++ res->error & ATA_UNC ? "UNC " : "",
++ res->error & ATA_AMNF ? "AMNF " : "",
++ res->error & ATA_IDNF ? "IDNF " : "",
++ res->error & ATA_ABORTED ? "ABRT " : "");
+ #endif
+ }
+ }
+diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
+index 8f3ff830ab0c6..b5aa525d87603 100644
+--- a/drivers/ata/libata-sata.c
++++ b/drivers/ata/libata-sata.c
+@@ -191,8 +191,8 @@ EXPORT_SYMBOL_GPL(ata_tf_to_fis);
+
+ void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf)
+ {
+- tf->command = fis[2]; /* status */
+- tf->feature = fis[3]; /* error */
++ tf->status = fis[2];
++ tf->error = fis[3];
+
+ tf->lbal = fis[4];
+ tf->lbam = fis[5];
+@@ -1402,8 +1402,8 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
+
+ *tag = buf[0] & 0x1f;
+
+- tf->command = buf[2];
+- tf->feature = buf[3];
++ tf->status = buf[2];
++ tf->error = buf[3];
+ tf->lbal = buf[4];
+ tf->lbam = buf[5];
+ tf->lbah = buf[6];
+@@ -1413,7 +1413,8 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
+ tf->hob_lbah = buf[10];
+ tf->nsect = buf[12];
+ tf->hob_nsect = buf[13];
+- if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id))
++ if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id) &&
++ (tf->status & ATA_SENSE))
+ tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
+
+ return 0;
+@@ -1477,8 +1478,12 @@ void ata_eh_analyze_ncq_error(struct ata_link *link)
+ memcpy(&qc->result_tf, &tf, sizeof(tf));
+ qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
+ qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
+- if (dev->class == ATA_DEV_ZAC &&
+- ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) {
++
++ /*
++ * If the device supports NCQ autosense, ata_eh_read_log_10h() will have
++ * stored the sense data in qc->result_tf.auxiliary.
++ */
++ if (qc->result_tf.auxiliary) {
+ char sense_key, asc, ascq;
+
+ sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
+diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
+index 1fb4611f7eeb9..fd9c768f31efe 100644
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -671,7 +671,7 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
+ */
+ static void ata_dump_status(unsigned id, struct ata_taskfile *tf)
+ {
+- u8 stat = tf->command, err = tf->feature;
++ u8 stat = tf->status, err = tf->error;
+
+ pr_warn("ata%u: status=0x%02x { ", id, stat);
+ if (stat & ATA_BUSY) {
+@@ -867,8 +867,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
+ * onto sense key, asc & ascq.
+ */
+ if (qc->err_mask ||
+- tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
+- ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature,
++ tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
++ ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
+ &sense_key, &asc, &ascq, verbose);
+ ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
+ } else {
+@@ -897,13 +897,13 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
+ * Copy registers into sense buffer.
+ */
+ desc[2] = 0x00;
+- desc[3] = tf->feature; /* == error reg */
++ desc[3] = tf->error;
+ desc[5] = tf->nsect;
+ desc[7] = tf->lbal;
+ desc[9] = tf->lbam;
+ desc[11] = tf->lbah;
+ desc[12] = tf->device;
+- desc[13] = tf->command; /* == status reg */
++ desc[13] = tf->status;
+
+ /*
+ * Fill in Extend bit, and the high order bytes
+@@ -918,8 +918,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
+ }
+ } else {
+ /* Fixed sense format */
+- desc[0] = tf->feature;
+- desc[1] = tf->command; /* status */
++ desc[0] = tf->error;
++ desc[1] = tf->status;
+ desc[2] = tf->device;
+ desc[3] = tf->nsect;
+ desc[7] = 0;
+@@ -968,14 +968,14 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
+ * onto sense key, asc & ascq.
+ */
+ if (qc->err_mask ||
+- tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
+- ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature,
++ tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
++ ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
+ &sense_key, &asc, &ascq, verbose);
+ ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq);
+ } else {
+ /* Could not decode error */
+ ata_dev_warn(dev, "could not decode error status 0x%x err_mask 0x%x\n",
+- tf->command, qc->err_mask);
++ tf->status, qc->err_mask);
+ ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0);
+ return;
+ }
+@@ -2490,7 +2490,7 @@ static void atapi_request_sense(struct ata_queued_cmd *qc)
+
+ /* fill these in, for the case where they are -not- overwritten */
+ cmd->sense_buffer[0] = 0x70;
+- cmd->sense_buffer[2] = qc->tf.feature >> 4;
++ cmd->sense_buffer[2] = qc->tf.error >> 4;
+
+ ata_qc_reinit(qc);
+
+@@ -2698,18 +2698,36 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc)
+ return 0;
+ }
+
+-static struct ata_device *ata_find_dev(struct ata_port *ap, int devno)
++static struct ata_device *ata_find_dev(struct ata_port *ap, unsigned int devno)
+ {
+- if (!sata_pmp_attached(ap)) {
+- if (likely(devno >= 0 &&
+- devno < ata_link_max_devices(&ap->link)))
++ /*
++ * For the non-PMP case, ata_link_max_devices() returns 1 (SATA case),
++ * or 2 (IDE master + slave case). However, the former case includes
++ * libsas hosted devices which are numbered per scsi host, leading
++ * to devno potentially being larger than 0 but with each struct
++ * ata_device having its own struct ata_port and struct ata_link.
++ * To accommodate these, ignore devno and always use device number 0.
++ */
++ if (likely(!sata_pmp_attached(ap))) {
++ int link_max_devices = ata_link_max_devices(&ap->link);
++
++ if (link_max_devices == 1)
++ return &ap->link.device[0];
++
++ if (devno < link_max_devices)
+ return &ap->link.device[devno];
+- } else {
+- if (likely(devno >= 0 &&
+- devno < ap->nr_pmp_links))
+- return &ap->pmp_link[devno].device[0];
++
++ return NULL;
+ }
+
++ /*
++ * For PMP-attached devices, the device number corresponds to C
++ * (channel) of SCSI [H:C:I:L], indicating the port pmp link
++ * for the device.
++ */
++ if (devno < ap->nr_pmp_links)
++ return &ap->pmp_link[devno].device[0];
++
+ return NULL;
+ }
+
+@@ -2826,8 +2844,19 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
+ goto invalid_fld;
+ }
+
+- if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0)
+- tf->protocol = ATA_PROT_NCQ_NODATA;
++ if ((cdb[2 + cdb_offset] & 0x3) == 0) {
++ /*
++ * When T_LENGTH is zero (No data is transferred), dir should
++ * be DMA_NONE.
++ */
++ if (scmd->sc_data_direction != DMA_NONE) {
++ fp = 2 + cdb_offset;
++ goto invalid_fld;
++ }
++
++ if (ata_is_ncq(tf->protocol))
++ tf->protocol = ATA_PROT_NCQ_NODATA;
++ }
+
+ /* enable LBA */
+ tf->flags |= ATA_TFLAG_LBA;
+@@ -3248,6 +3277,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf)
+ case REPORT_LUNS:
+ case REQUEST_SENSE:
+ case SYNCHRONIZE_CACHE:
++ case SYNCHRONIZE_CACHE_16:
+ case REZERO_UNIT:
+ case SEEK_6:
+ case SEEK_10:
+@@ -3914,6 +3944,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
+ return ata_scsi_write_same_xlat;
+
+ case SYNCHRONIZE_CACHE:
++ case SYNCHRONIZE_CACHE_16:
+ if (ata_try_flush_cache(dev))
+ return ata_scsi_flush_xlat;
+ break;
+@@ -3975,44 +4006,51 @@ void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd)
+
+ int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
+ {
++ struct ata_port *ap = dev->link->ap;
+ u8 scsi_op = scmd->cmnd[0];
+ ata_xlat_func_t xlat_func;
+- int rc = 0;
++
++ /*
++ * scsi_queue_rq() will defer commands if scsi_host_in_recovery().
++ * However, this check is done without holding the ap->lock (a libata
++ * specific lock), so we can have received an error irq since then,
++ * therefore we must check if EH is pending, while holding ap->lock.
++ */
++ if (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS))
++ return SCSI_MLQUEUE_DEVICE_BUSY;
++
++ if (unlikely(!scmd->cmd_len))
++ goto bad_cdb_len;
+
+ if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) {
+- if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len))
++ if (unlikely(scmd->cmd_len > dev->cdb_len))
+ goto bad_cdb_len;
+
+ xlat_func = ata_get_xlat_func(dev, scsi_op);
+- } else {
+- if (unlikely(!scmd->cmd_len))
+- goto bad_cdb_len;
++ } else if (likely((scsi_op != ATA_16) || !atapi_passthru16)) {
++ /* relay SCSI command to ATAPI device */
++ int len = COMMAND_SIZE(scsi_op);
+
+- xlat_func = NULL;
+- if (likely((scsi_op != ATA_16) || !atapi_passthru16)) {
+- /* relay SCSI command to ATAPI device */
+- int len = COMMAND_SIZE(scsi_op);
+- if (unlikely(len > scmd->cmd_len ||
+- len > dev->cdb_len ||
+- scmd->cmd_len > ATAPI_CDB_LEN))
+- goto bad_cdb_len;
++ if (unlikely(len > scmd->cmd_len ||
++ len > dev->cdb_len ||
++ scmd->cmd_len > ATAPI_CDB_LEN))
++ goto bad_cdb_len;
+
+- xlat_func = atapi_xlat;
+- } else {
+- /* ATA_16 passthru, treat as an ATA command */
+- if (unlikely(scmd->cmd_len > 16))
+- goto bad_cdb_len;
++ xlat_func = atapi_xlat;
++ } else {
++ /* ATA_16 passthru, treat as an ATA command */
++ if (unlikely(scmd->cmd_len > 16))
++ goto bad_cdb_len;
+
+- xlat_func = ata_get_xlat_func(dev, scsi_op);
+- }
++ xlat_func = ata_get_xlat_func(dev, scsi_op);
+ }
+
+ if (xlat_func)
+- rc = ata_scsi_translate(dev, scmd, xlat_func);
+- else
+- ata_scsi_simulate(dev, scmd);
++ return ata_scsi_translate(dev, scmd, xlat_func);
+
+- return rc;
++ ata_scsi_simulate(dev, scmd);
++
++ return 0;
+
+ bad_cdb_len:
+ DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n",
+@@ -4159,6 +4197,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
+ * turning this into a no-op.
+ */
+ case SYNCHRONIZE_CACHE:
++ case SYNCHRONIZE_CACHE_16:
+ fallthrough;
+
+ /* no-op's, complete with success */
+diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
+index b71ea4a680b01..8409e53b7b7a0 100644
+--- a/drivers/ata/libata-sff.c
++++ b/drivers/ata/libata-sff.c
+@@ -457,8 +457,8 @@ void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+
+- tf->command = ata_sff_check_status(ap);
+- tf->feature = ioread8(ioaddr->error_addr);
++ tf->status = ata_sff_check_status(ap);
++ tf->error = ioread8(ioaddr->error_addr);
+ tf->nsect = ioread8(ioaddr->nsect_addr);
+ tf->lbal = ioread8(ioaddr->lbal_addr);
+ tf->lbam = ioread8(ioaddr->lbam_addr);
+@@ -1837,7 +1837,7 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
+ memset(&tf, 0, sizeof(tf));
+
+ ap->ops->sff_tf_read(ap, &tf);
+- err = tf.feature;
++ err = tf.error;
+ if (r_err)
+ *r_err = err;
+
+diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
+index 34bb4608bdc67..60f22e1a4943f 100644
+--- a/drivers/ata/libata-transport.c
++++ b/drivers/ata/libata-transport.c
+@@ -196,7 +196,7 @@ static struct {
+ { XFER_PIO_0, "XFER_PIO_0" },
+ { XFER_PIO_SLOW, "XFER_PIO_SLOW" }
+ };
+-ata_bitfield_name_match(xfer,ata_xfer_names)
++ata_bitfield_name_search(xfer, ata_xfer_names)
+
+ /*
+ * ATA Port attributes
+@@ -301,7 +301,9 @@ int ata_tport_add(struct device *parent,
+ pm_runtime_enable(dev);
+ pm_runtime_forbid(dev);
+
+- transport_add_device(dev);
++ error = transport_add_device(dev);
++ if (error)
++ goto tport_transport_add_err;
+ transport_configure_device(dev);
+
+ error = ata_tlink_add(&ap->link);
+@@ -312,12 +314,12 @@ int ata_tport_add(struct device *parent,
+
+ tport_link_err:
+ transport_remove_device(dev);
++ tport_transport_add_err:
+ device_del(dev);
+
+ tport_err:
+ transport_destroy_device(dev);
+ put_device(dev);
+- ata_host_put(ap->host);
+ return error;
+ }
+
+@@ -426,7 +428,9 @@ int ata_tlink_add(struct ata_link *link)
+ goto tlink_err;
+ }
+
+- transport_add_device(dev);
++ error = transport_add_device(dev);
++ if (error)
++ goto tlink_transport_err;
+ transport_configure_device(dev);
+
+ ata_for_each_dev(ata_dev, link, ALL) {
+@@ -441,6 +445,7 @@ int ata_tlink_add(struct ata_link *link)
+ ata_tdev_delete(ata_dev);
+ }
+ transport_remove_device(dev);
++ tlink_transport_err:
+ device_del(dev);
+ tlink_err:
+ transport_destroy_device(dev);
+@@ -678,7 +683,13 @@ static int ata_tdev_add(struct ata_device *ata_dev)
+ return error;
+ }
+
+- transport_add_device(dev);
++ error = transport_add_device(dev);
++ if (error) {
++ device_del(dev);
++ ata_tdev_free(ata_dev);
++ return error;
++ }
++
+ transport_configure_device(dev);
+ return 0;
+ }
+diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
+index 63f39440a9b42..4ba02f082f962 100644
+--- a/drivers/ata/pata_arasan_cf.c
++++ b/drivers/ata/pata_arasan_cf.c
+@@ -528,7 +528,8 @@ static void data_xfer(struct work_struct *work)
+ /* dma_request_channel may sleep, so calling from process context */
+ acdev->dma_chan = dma_request_chan(acdev->host->dev, "data");
+ if (IS_ERR(acdev->dma_chan)) {
+- dev_err(acdev->host->dev, "Unable to get dma_chan\n");
++ dev_err_probe(acdev->host->dev, PTR_ERR(acdev->dma_chan),
++ "Unable to get dma_chan\n");
+ acdev->dma_chan = NULL;
+ goto chan_request_fail;
+ }
+diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c
+index 46208ececbb6a..3fc26026014e2 100644
+--- a/drivers/ata/pata_ep93xx.c
++++ b/drivers/ata/pata_ep93xx.c
+@@ -416,8 +416,8 @@ static void ep93xx_pata_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ep93xx_pata_data *drv_data = ap->host->private_data;
+
+- tf->command = ep93xx_pata_check_status(ap);
+- tf->feature = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_FEATURE);
++ tf->status = ep93xx_pata_check_status(ap);
++ tf->error = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_FEATURE);
+ tf->nsect = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_NSECT);
+ tf->lbal = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_LBAL);
+ tf->lbam = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_LBAM);
+diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c
+index 121635aa8c00c..a7745a2be9056 100644
+--- a/drivers/ata/pata_falcon.c
++++ b/drivers/ata/pata_falcon.c
+@@ -123,8 +123,8 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
+ struct resource *base_res, *ctl_res, *irq_res;
+ struct ata_host *host;
+ struct ata_port *ap;
+- void __iomem *base;
+- int irq = 0;
++ void __iomem *base, *ctl_base;
++ int irq = 0, io_offset = 1, reg_shift = 2; /* Falcon defaults */
+
+ dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n");
+
+@@ -165,26 +165,34 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
+ ap->pio_mask = ATA_PIO4;
+ ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
+
+- base = (void __iomem *)base_mem_res->start;
+ /* N.B. this assumes data_addr will be used for word-sized I/O only */
+- ap->ioaddr.data_addr = base + 0 + 0 * 4;
+- ap->ioaddr.error_addr = base + 1 + 1 * 4;
+- ap->ioaddr.feature_addr = base + 1 + 1 * 4;
+- ap->ioaddr.nsect_addr = base + 1 + 2 * 4;
+- ap->ioaddr.lbal_addr = base + 1 + 3 * 4;
+- ap->ioaddr.lbam_addr = base + 1 + 4 * 4;
+- ap->ioaddr.lbah_addr = base + 1 + 5 * 4;
+- ap->ioaddr.device_addr = base + 1 + 6 * 4;
+- ap->ioaddr.status_addr = base + 1 + 7 * 4;
+- ap->ioaddr.command_addr = base + 1 + 7 * 4;
+-
+- base = (void __iomem *)ctl_mem_res->start;
+- ap->ioaddr.altstatus_addr = base + 1;
+- ap->ioaddr.ctl_addr = base + 1;
+-
+- ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
+- (unsigned long)base_mem_res->start,
+- (unsigned long)ctl_mem_res->start);
++ ap->ioaddr.data_addr = (void __iomem *)base_mem_res->start;
++
++ if (base_res) { /* only Q40 has IO resources */
++ io_offset = 0x10000;
++ reg_shift = 0;
++ base = (void __iomem *)base_res->start;
++ ctl_base = (void __iomem *)ctl_res->start;
++ } else {
++ base = (void __iomem *)base_mem_res->start;
++ ctl_base = (void __iomem *)ctl_mem_res->start;
++ }
++
++ ap->ioaddr.error_addr = base + io_offset + (1 << reg_shift);
++ ap->ioaddr.feature_addr = base + io_offset + (1 << reg_shift);
++ ap->ioaddr.nsect_addr = base + io_offset + (2 << reg_shift);
++ ap->ioaddr.lbal_addr = base + io_offset + (3 << reg_shift);
++ ap->ioaddr.lbam_addr = base + io_offset + (4 << reg_shift);
++ ap->ioaddr.lbah_addr = base + io_offset + (5 << reg_shift);
++ ap->ioaddr.device_addr = base + io_offset + (6 << reg_shift);
++ ap->ioaddr.status_addr = base + io_offset + (7 << reg_shift);
++ ap->ioaddr.command_addr = base + io_offset + (7 << reg_shift);
++
++ ap->ioaddr.altstatus_addr = ctl_base + io_offset;
++ ap->ioaddr.ctl_addr = ctl_base + io_offset;
++
++ ata_port_desc(ap, "cmd %px ctl %px data %px",
++ base, ctl_base, ap->ioaddr.data_addr);
+
+ irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (irq_res && irq_res->start > 0) {
+diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c
+index 34cb104f6b43e..bc30e2f305beb 100644
+--- a/drivers/ata/pata_ftide010.c
++++ b/drivers/ata/pata_ftide010.c
+@@ -570,6 +570,7 @@ static struct platform_driver pata_ftide010_driver = {
+ };
+ module_platform_driver(pata_ftide010_driver);
+
++MODULE_DESCRIPTION("low level driver for Faraday Technology FTIDE010");
+ MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+ MODULE_LICENSE("GPL");
+ MODULE_ALIAS("platform:" DRV_NAME);
+diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
+index f242157bc81bb..9d371859e81ed 100644
+--- a/drivers/ata/pata_hpt37x.c
++++ b/drivers/ata/pata_hpt37x.c
+@@ -919,6 +919,20 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+ irqmask &= ~0x10;
+ pci_write_config_byte(dev, 0x5a, irqmask);
+
++ /*
++ * HPT371 chips physically have only one channel, the secondary one,
++ * but the primary channel registers do exist! Go figure...
++ * So, we manually disable the non-existing channel here
++ * (if the BIOS hasn't done this already).
++ */
++ if (dev->device == PCI_DEVICE_ID_TTI_HPT371) {
++ u8 mcr1;
++
++ pci_read_config_byte(dev, 0x50, &mcr1);
++ mcr1 &= ~0x04;
++ pci_write_config_byte(dev, 0x50, mcr1);
++ }
++
+ /*
+ * default to pci clock. make sure MA15/16 are set to output
+ * to prevent drives having problems with 40-pin cables. Needed
+@@ -950,14 +964,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+
+ if ((freq >> 12) != 0xABCDE) {
+ int i;
+- u8 sr;
++ u16 sr;
+ u32 total = 0;
+
+ pr_warn("BIOS has not set timing clocks\n");
+
+ /* This is the process the HPT371 BIOS is reported to use */
+ for (i = 0; i < 128; i++) {
+- pci_read_config_byte(dev, 0x78, &sr);
++ pci_read_config_word(dev, 0x78, &sr);
+ total += sr & 0x1FF;
+ udelay(15);
+ }
+diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c
+index 99c63087c8ae9..17b557c91e1c7 100644
+--- a/drivers/ata/pata_ixp4xx_cf.c
++++ b/drivers/ata/pata_ixp4xx_cf.c
+@@ -114,7 +114,7 @@ static void ixp4xx_set_piomode(struct ata_port *ap, struct ata_device *adev)
+ {
+ struct ixp4xx_pata *ixpp = ap->host->private_data;
+
+- ata_dev_printk(adev, KERN_INFO, "configured for PIO%d 8bit\n",
++ ata_dev_info(adev, "configured for PIO%d 8bit\n",
+ adev->pio_mode - XFER_PIO_0);
+ ixp4xx_set_8bit_timing(ixpp, adev->pio_mode);
+ }
+@@ -132,8 +132,8 @@ static unsigned int ixp4xx_mmio_data_xfer(struct ata_queued_cmd *qc,
+ struct ixp4xx_pata *ixpp = ap->host->private_data;
+ unsigned long flags;
+
+- ata_dev_printk(adev, KERN_DEBUG, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE",
+- buflen);
++ ata_dev_dbg(adev, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE",
++ buflen);
+ spin_lock_irqsave(ap->lock, flags);
+
+ /* set the expansion bus in 16bit mode and restore
+diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
+index 0a8bf09a5c19e..03c580625c2cc 100644
+--- a/drivers/ata/pata_legacy.c
++++ b/drivers/ata/pata_legacy.c
+@@ -315,9 +315,10 @@ static void pdc20230_set_piomode(struct ata_port *ap, struct ata_device *adev)
+ outb(inb(0x1F4) & 0x07, 0x1F4);
+
+ rt = inb(0x1F3);
+- rt &= 0x07 << (3 * adev->devno);
++ rt &= ~(0x07 << (3 * !adev->devno));
+ if (pio)
+- rt |= (1 + 3 * pio) << (3 * adev->devno);
++ rt |= (1 + 3 * pio) << (3 * !adev->devno);
++ outb(rt, 0x1F3);
+
+ udelay(100);
+ outb(inb(0x1F2) | 0x01, 0x1F2);
+diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
+index 361597d14c569..d45a75bfc0169 100644
+--- a/drivers/ata/pata_marvell.c
++++ b/drivers/ata/pata_marvell.c
+@@ -83,6 +83,8 @@ static int marvell_cable_detect(struct ata_port *ap)
+ switch(ap->port_no)
+ {
+ case 0:
++ if (!ap->ioaddr.bmdma_addr)
++ return ATA_CBL_PATA_UNK;
+ if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1)
+ return ATA_CBL_PATA40;
+ return ATA_CBL_PATA80;
+diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c
+index f4949e704356e..602472d4e693e 100644
+--- a/drivers/ata/pata_ns87415.c
++++ b/drivers/ata/pata_ns87415.c
+@@ -260,12 +260,12 @@ static u8 ns87560_check_status(struct ata_port *ap)
+ * LOCKING:
+ * Inherited from caller.
+ */
+-void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
++static void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+
+- tf->command = ns87560_check_status(ap);
+- tf->feature = ioread8(ioaddr->error_addr);
++ tf->status = ns87560_check_status(ap);
++ tf->error = ioread8(ioaddr->error_addr);
+ tf->nsect = ioread8(ioaddr->nsect_addr);
+ tf->lbal = ioread8(ioaddr->lbal_addr);
+ tf->lbam = ioread8(ioaddr->lbam_addr);
+diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
+index b5a3f710d76de..6c9f2efcedc11 100644
+--- a/drivers/ata/pata_octeon_cf.c
++++ b/drivers/ata/pata_octeon_cf.c
+@@ -386,7 +386,7 @@ static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf)
+ void __iomem *base = ap->ioaddr.data_addr;
+
+ blob = __raw_readw(base + 0xc);
+- tf->feature = blob >> 8;
++ tf->error = blob >> 8;
+
+ blob = __raw_readw(base + 2);
+ tf->nsect = blob & 0xff;
+@@ -398,7 +398,7 @@ static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf)
+
+ blob = __raw_readw(base + 6);
+ tf->device = blob & 0xff;
+- tf->command = blob >> 8;
++ tf->status = blob >> 8;
+
+ if (tf->flags & ATA_TFLAG_LBA48) {
+ if (likely(ap->ioaddr.ctl_addr)) {
+@@ -888,12 +888,14 @@ static int octeon_cf_probe(struct platform_device *pdev)
+ int i;
+ res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0);
+ if (!res_dma) {
++ put_device(&dma_dev->dev);
+ of_node_put(dma_node);
+ return -EINVAL;
+ }
+ cf_port->dma_base = (u64)devm_ioremap(&pdev->dev, res_dma->start,
+ resource_size(res_dma));
+ if (!cf_port->dma_base) {
++ put_device(&dma_dev->dev);
+ of_node_put(dma_node);
+ return -EINVAL;
+ }
+@@ -903,6 +905,7 @@ static int octeon_cf_probe(struct platform_device *pdev)
+ irq = i;
+ irq_handler = octeon_cf_interrupt;
+ }
++ put_device(&dma_dev->dev);
+ }
+ of_node_put(dma_node);
+ }
+diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
+index 3da0e8e302861..149d771c61d67 100644
+--- a/drivers/ata/pata_samsung_cf.c
++++ b/drivers/ata/pata_samsung_cf.c
+@@ -213,7 +213,7 @@ static void pata_s3c_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+
+- tf->feature = ata_inb(ap->host, ioaddr->error_addr);
++ tf->error = ata_inb(ap->host, ioaddr->error_addr);
+ tf->nsect = ata_inb(ap->host, ioaddr->nsect_addr);
+ tf->lbal = ata_inb(ap->host, ioaddr->lbal_addr);
+ tf->lbam = ata_inb(ap->host, ioaddr->lbam_addr);
+diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
+index 338c2e50f7591..29e2b0dfba309 100644
+--- a/drivers/ata/sata_dwc_460ex.c
++++ b/drivers/ata/sata_dwc_460ex.c
+@@ -145,7 +145,11 @@ struct sata_dwc_device {
+ #endif
+ };
+
+-#define SATA_DWC_QCMD_MAX 32
++/*
++ * Allow one extra special slot for commands and DMA management
++ * to account for libata internal commands.
++ */
++#define SATA_DWC_QCMD_MAX (ATA_MAX_QUEUE + 1)
+
+ struct sata_dwc_device_port {
+ struct sata_dwc_device *hsdev;
+diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
+index e5838b23c9e0a..3b31a4f596d86 100644
+--- a/drivers/ata/sata_fsl.c
++++ b/drivers/ata/sata_fsl.c
+@@ -1394,6 +1394,14 @@ static int sata_fsl_init_controller(struct ata_host *host)
+ return 0;
+ }
+
++static void sata_fsl_host_stop(struct ata_host *host)
++{
++ struct sata_fsl_host_priv *host_priv = host->private_data;
++
++ iounmap(host_priv->hcr_base);
++ kfree(host_priv);
++}
++
+ /*
+ * scsi mid-layer and libata interface structures
+ */
+@@ -1426,6 +1434,8 @@ static struct ata_port_operations sata_fsl_ops = {
+ .port_start = sata_fsl_port_start,
+ .port_stop = sata_fsl_port_stop,
+
++ .host_stop = sata_fsl_host_stop,
++
+ .pmp_attach = sata_fsl_pmp_attach,
+ .pmp_detach = sata_fsl_pmp_detach,
+ };
+@@ -1480,9 +1490,9 @@ static int sata_fsl_probe(struct platform_device *ofdev)
+ host_priv->ssr_base = ssr_base;
+ host_priv->csr_base = csr_base;
+
+- irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
+- if (!irq) {
+- dev_err(&ofdev->dev, "invalid irq from platform\n");
++ irq = platform_get_irq(ofdev, 0);
++ if (irq < 0) {
++ retval = irq;
+ goto error_exit_with_cleanup;
+ }
+ host_priv->irq = irq;
+@@ -1557,10 +1567,6 @@ static int sata_fsl_remove(struct platform_device *ofdev)
+
+ ata_host_detach(host);
+
+- irq_dispose_mapping(host_priv->irq);
+- iounmap(host_priv->hcr_base);
+- kfree(host_priv);
+-
+ return 0;
+ }
+
+diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c
+index f793564f3d787..6fd54e968d10a 100644
+--- a/drivers/ata/sata_gemini.c
++++ b/drivers/ata/sata_gemini.c
+@@ -435,6 +435,7 @@ static struct platform_driver gemini_sata_driver = {
+ };
+ module_platform_driver(gemini_sata_driver);
+
++MODULE_DESCRIPTION("low level driver for Cortina Systems Gemini SATA bridge");
+ MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+ MODULE_LICENSE("GPL");
+ MODULE_ALIAS("platform:" DRV_NAME);
+diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
+index 8440203e835ed..f9bb3be4b939e 100644
+--- a/drivers/ata/sata_highbank.c
++++ b/drivers/ata/sata_highbank.c
+@@ -400,7 +400,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class,
+
+ /* clear D2H reception area to properly wait for D2H FIS */
+ ata_tf_init(link->device, &tf);
+- tf.command = ATA_BUSY;
++ tf.status = ATA_BUSY;
+ ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+
+ do {
+diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
+index e517bd8822a5f..659f1a903298f 100644
+--- a/drivers/ata/sata_inic162x.c
++++ b/drivers/ata/sata_inic162x.c
+@@ -559,13 +559,13 @@ static void inic_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ void __iomem *port_base = inic_port_base(ap);
+
+- tf->feature = readb(port_base + PORT_TF_FEATURE);
++ tf->error = readb(port_base + PORT_TF_FEATURE);
+ tf->nsect = readb(port_base + PORT_TF_NSECT);
+ tf->lbal = readb(port_base + PORT_TF_LBAL);
+ tf->lbam = readb(port_base + PORT_TF_LBAM);
+ tf->lbah = readb(port_base + PORT_TF_LBAH);
+ tf->device = readb(port_base + PORT_TF_DEVICE);
+- tf->command = readb(port_base + PORT_TF_COMMAND);
++ tf->status = readb(port_base + PORT_TF_COMMAND);
+ }
+
+ static bool inic_qc_fill_rtf(struct ata_queued_cmd *qc)
+@@ -582,11 +582,11 @@ static bool inic_qc_fill_rtf(struct ata_queued_cmd *qc)
+ */
+ inic_tf_read(qc->ap, &tf);
+
+- if (!(tf.command & ATA_ERR))
++ if (!(tf.status & ATA_ERR))
+ return false;
+
+- rtf->command = tf.command;
+- rtf->feature = tf.feature;
++ rtf->status = tf.status;
++ rtf->error = tf.error;
+ return true;
+ }
+
+diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
+index 44b0ed8f6bb8a..9759e24f718fc 100644
+--- a/drivers/ata/sata_rcar.c
++++ b/drivers/ata/sata_rcar.c
+@@ -417,8 +417,8 @@ static void sata_rcar_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+
+- tf->command = sata_rcar_check_status(ap);
+- tf->feature = ioread32(ioaddr->error_addr);
++ tf->status = sata_rcar_check_status(ap);
++ tf->error = ioread32(ioaddr->error_addr);
+ tf->nsect = ioread32(ioaddr->nsect_addr);
+ tf->lbal = ioread32(ioaddr->lbal_addr);
+ tf->lbam = ioread32(ioaddr->lbam_addr);
+diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
+index f8552559db7f5..2e3418a82b445 100644
+--- a/drivers/ata/sata_svw.c
++++ b/drivers/ata/sata_svw.c
+@@ -194,24 +194,24 @@ static void k2_sata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
+ static void k2_sata_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+- u16 nsect, lbal, lbam, lbah, feature;
++ u16 nsect, lbal, lbam, lbah, error;
+
+- tf->command = k2_stat_check_status(ap);
++ tf->status = k2_stat_check_status(ap);
+ tf->device = readw(ioaddr->device_addr);
+- feature = readw(ioaddr->error_addr);
++ error = readw(ioaddr->error_addr);
+ nsect = readw(ioaddr->nsect_addr);
+ lbal = readw(ioaddr->lbal_addr);
+ lbam = readw(ioaddr->lbam_addr);
+ lbah = readw(ioaddr->lbah_addr);
+
+- tf->feature = feature;
++ tf->error = error;
+ tf->nsect = nsect;
+ tf->lbal = lbal;
+ tf->lbam = lbam;
+ tf->lbah = lbah;
+
+ if (tf->flags & ATA_TFLAG_LBA48) {
+- tf->hob_feature = feature >> 8;
++ tf->hob_feature = error >> 8;
+ tf->hob_nsect = nsect >> 8;
+ tf->hob_lbal = lbal >> 8;
+ tf->hob_lbam = lbam >> 8;
+diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
+index 8fa952cb9f7f4..87e4ed66b3064 100644
+--- a/drivers/ata/sata_vsc.c
++++ b/drivers/ata/sata_vsc.c
+@@ -183,24 +183,24 @@ static void vsc_sata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf)
+ static void vsc_sata_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+ {
+ struct ata_ioports *ioaddr = &ap->ioaddr;
+- u16 nsect, lbal, lbam, lbah, feature;
++ u16 nsect, lbal, lbam, lbah, error;
+
+- tf->command = ata_sff_check_status(ap);
++ tf->status = ata_sff_check_status(ap);
+ tf->device = readw(ioaddr->device_addr);
+- feature = readw(ioaddr->error_addr);
++ error = readw(ioaddr->error_addr);
+ nsect = readw(ioaddr->nsect_addr);
+ lbal = readw(ioaddr->lbal_addr);
+ lbam = readw(ioaddr->lbam_addr);
+ lbah = readw(ioaddr->lbah_addr);
+
+- tf->feature = feature;
++ tf->error = error;
+ tf->nsect = nsect;
+ tf->lbal = lbal;
+ tf->lbam = lbam;
+ tf->lbah = lbah;
+
+ if (tf->flags & ATA_TFLAG_LBA48) {
+- tf->hob_feature = feature >> 8;
++ tf->hob_feature = error >> 8;
+ tf->hob_nsect = nsect >> 8;
+ tf->hob_lbal = lbal >> 8;
+ tf->hob_lbam = lbam >> 8;
+diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
+index 422753d52244b..a31ffe16e626f 100644
+--- a/drivers/atm/eni.c
++++ b/drivers/atm/eni.c
+@@ -1112,6 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags);
+ skb_data3 = skb->data[3];
+ paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
+ DMA_TO_DEVICE);
++ if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr))
++ return enq_next;
+ ENI_PRV_PADDR(skb) = paddr;
+ /* prepare DMA queue entries */
+ j = 0;
+diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
+index 3bc3c314a467b..4f67404fe64c7 100644
+--- a/drivers/atm/firestream.c
++++ b/drivers/atm/firestream.c
+@@ -1676,6 +1676,8 @@ static int fs_init(struct fs_dev *dev)
+ dev->hw_base = pci_resource_start(pci_dev, 0);
+
+ dev->base = ioremap(dev->hw_base, 0x1000);
++ if (!dev->base)
++ return 1;
+
+ reset_chip (dev);
+
+diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
+index 81ce81a75fc67..49cb4537344aa 100644
+--- a/drivers/atm/idt77252.c
++++ b/drivers/atm/idt77252.c
+@@ -2909,6 +2909,7 @@ close_card_oam(struct idt77252_dev *card)
+
+ recycle_rx_pool_skb(card, &vc->rcv.rx_pool);
+ }
++ kfree(vc);
+ }
+ }
+ }
+@@ -2952,6 +2953,15 @@ open_card_ubr0(struct idt77252_dev *card)
+ return 0;
+ }
+
++static void
++close_card_ubr0(struct idt77252_dev *card)
++{
++ struct vc_map *vc = card->vcs[0];
++
++ free_scq(card, vc->scq);
++ kfree(vc);
++}
++
+ static int
+ idt77252_dev_open(struct idt77252_dev *card)
+ {
+@@ -3001,6 +3011,7 @@ static void idt77252_dev_close(struct atm_dev *dev)
+ struct idt77252_dev *card = dev->dev_data;
+ u32 conf;
+
++ close_card_ubr0(card);
+ close_card_oam(card);
+
+ conf = SAR_CFG_RXPTH | /* enable receive path */
+@@ -3752,6 +3763,7 @@ static void __exit idt77252_exit(void)
+ card = idt77252_chain;
+ dev = card->atmdev;
+ idt77252_chain = card->next;
++ del_timer_sync(&card->tst_timer);
+
+ if (dev->phy->stop)
+ dev->phy->stop(dev);
+diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
+index 304accde365c8..6c010d4efa4ae 100644
+--- a/drivers/auxdisplay/charlcd.c
++++ b/drivers/auxdisplay/charlcd.c
+@@ -578,6 +578,9 @@ static int charlcd_init(struct charlcd *lcd)
+ * Since charlcd_init_display() needs to write data, we have to
+ * enable mark the LCD initialized just before.
+ */
++ if (WARN_ON(!lcd->ops->init_display))
++ return -EINVAL;
++
+ ret = lcd->ops->init_display(lcd);
+ if (ret)
+ return ret;
+diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
+index 8b2a0eb3f32a4..d56a5d508ccd7 100644
+--- a/drivers/auxdisplay/hd44780.c
++++ b/drivers/auxdisplay/hd44780.c
+@@ -322,8 +322,10 @@ fail1:
+ static int hd44780_remove(struct platform_device *pdev)
+ {
+ struct charlcd *lcd = platform_get_drvdata(pdev);
++ struct hd44780_common *hdc = lcd->drvdata;
+
+ charlcd_unregister(lcd);
++ kfree(hdc->hd44780);
+ kfree(lcd->drvdata);
+
+ kfree(lcd);
+diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
+index 1e69cc6d21a0d..ed58083499907 100644
+--- a/drivers/auxdisplay/ht16k33.c
++++ b/drivers/auxdisplay/ht16k33.c
+@@ -219,6 +219,15 @@ static const struct backlight_ops ht16k33_bl_ops = {
+ .check_fb = ht16k33_bl_check_fb,
+ };
+
++/*
++ * Blank events will be passed to the actual device handling the backlight when
++ * we return zero here.
++ */
++static int ht16k33_blank(int blank, struct fb_info *info)
++{
++ return 0;
++}
++
+ static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma)
+ {
+ struct ht16k33_priv *priv = info->par;
+@@ -231,6 +240,7 @@ static const struct fb_ops ht16k33_fb_ops = {
+ .owner = THIS_MODULE,
+ .fb_read = fb_sys_read,
+ .fb_write = fb_sys_write,
++ .fb_blank = ht16k33_blank,
+ .fb_fillrect = sys_fillrect,
+ .fb_copyarea = sys_copyarea,
+ .fb_imageblit = sys_imageblit,
+@@ -413,6 +423,33 @@ static int ht16k33_probe(struct i2c_client *client,
+ if (err)
+ return err;
+
++ /* Backlight */
++ memset(&bl_props, 0, sizeof(struct backlight_properties));
++ bl_props.type = BACKLIGHT_RAW;
++ bl_props.max_brightness = MAX_BRIGHTNESS;
++
++ bl = devm_backlight_device_register(&client->dev, DRIVER_NAME"-bl",
++ &client->dev, priv,
++ &ht16k33_bl_ops, &bl_props);
++ if (IS_ERR(bl)) {
++ dev_err(&client->dev, "failed to register backlight\n");
++ return PTR_ERR(bl);
++ }
++
++ err = of_property_read_u32(node, "default-brightness-level",
++ &dft_brightness);
++ if (err) {
++ dft_brightness = MAX_BRIGHTNESS;
++ } else if (dft_brightness > MAX_BRIGHTNESS) {
++ dev_warn(&client->dev,
++ "invalid default brightness level: %u, using %u\n",
++ dft_brightness, MAX_BRIGHTNESS);
++ dft_brightness = MAX_BRIGHTNESS;
++ }
++
++ bl->props.brightness = dft_brightness;
++ ht16k33_bl_update_status(bl);
++
+ /* Framebuffer (2 bytes per column) */
+ BUILD_BUG_ON(PAGE_SIZE < HT16K33_FB_SIZE);
+ fbdev->buffer = (unsigned char *) get_zeroed_page(GFP_KERNEL);
+@@ -445,6 +482,7 @@ static int ht16k33_probe(struct i2c_client *client,
+ fbdev->info->screen_size = HT16K33_FB_SIZE;
+ fbdev->info->fix = ht16k33_fb_fix;
+ fbdev->info->var = ht16k33_fb_var;
++ fbdev->info->bl_dev = bl;
+ fbdev->info->pseudo_palette = NULL;
+ fbdev->info->flags = FBINFO_FLAG_DEFAULT;
+ fbdev->info->par = priv;
+@@ -460,34 +498,6 @@ static int ht16k33_probe(struct i2c_client *client,
+ goto err_fbdev_unregister;
+ }
+
+- /* Backlight */
+- memset(&bl_props, 0, sizeof(struct backlight_properties));
+- bl_props.type = BACKLIGHT_RAW;
+- bl_props.max_brightness = MAX_BRIGHTNESS;
+-
+- bl = devm_backlight_device_register(&client->dev, DRIVER_NAME"-bl",
+- &client->dev, priv,
+- &ht16k33_bl_ops, &bl_props);
+- if (IS_ERR(bl)) {
+- dev_err(&client->dev, "failed to register backlight\n");
+- err = PTR_ERR(bl);
+- goto err_fbdev_unregister;
+- }
+-
+- err = of_property_read_u32(node, "default-brightness-level",
+- &dft_brightness);
+- if (err) {
+- dft_brightness = MAX_BRIGHTNESS;
+- } else if (dft_brightness > MAX_BRIGHTNESS) {
+- dev_warn(&client->dev,
+- "invalid default brightness level: %u, using %u\n",
+- dft_brightness, MAX_BRIGHTNESS);
+- dft_brightness = MAX_BRIGHTNESS;
+- }
+-
+- bl->props.brightness = dft_brightness;
+- ht16k33_bl_update_status(bl);
+-
+ ht16k33_fb_queue(priv);
+ return 0;
+
+diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
+index 1cce409ce5cac..e33ce0151cdfd 100644
+--- a/drivers/auxdisplay/img-ascii-lcd.c
++++ b/drivers/auxdisplay/img-ascii-lcd.c
+@@ -280,6 +280,16 @@ static int img_ascii_lcd_display(struct img_ascii_lcd_ctx *ctx,
+ if (msg[count - 1] == '\n')
+ count--;
+
++ if (!count) {
++ /* clear the LCD */
++ devm_kfree(&ctx->pdev->dev, ctx->message);
++ ctx->message = NULL;
++ ctx->message_len = 0;
++ memset(ctx->curr, ' ', ctx->cfg->num_chars);
++ ctx->cfg->update(ctx);
++ return 0;
++ }
++
+ new_msg = devm_kmalloc(&ctx->pdev->dev, count + 1, GFP_KERNEL);
+ if (!new_msg)
+ return -ENOMEM;
+diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c
+index 38ba08628ccb3..2578b2d454397 100644
+--- a/drivers/auxdisplay/lcd2s.c
++++ b/drivers/auxdisplay/lcd2s.c
+@@ -238,7 +238,7 @@ static int lcd2s_redefine_char(struct charlcd *lcd, char *esc)
+ if (buf[1] > 7)
+ return 1;
+
+- i = 0;
++ i = 2;
+ shift = 0;
+ value = 0;
+ while (*esc && i < LCD2S_CHARACTER_SIZE + 2) {
+@@ -298,6 +298,10 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c,
+ I2C_FUNC_SMBUS_WRITE_BLOCK_DATA))
+ return -EIO;
+
++ lcd2s = devm_kzalloc(&i2c->dev, sizeof(*lcd2s), GFP_KERNEL);
++ if (!lcd2s)
++ return -ENOMEM;
++
+ /* Test, if the display is responding */
+ err = lcd2s_i2c_smbus_write_byte(i2c, LCD2S_CMD_DISPLAY_OFF);
+ if (err < 0)
+@@ -307,12 +311,6 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c,
+ if (!lcd)
+ return -ENOMEM;
+
+- lcd2s = kzalloc(sizeof(struct lcd2s_data), GFP_KERNEL);
+- if (!lcd2s) {
+- err = -ENOMEM;
+- goto fail1;
+- }
+-
+ lcd->drvdata = lcd2s;
+ lcd2s->i2c = i2c;
+ lcd2s->charlcd = lcd;
+@@ -321,26 +319,24 @@ static int lcd2s_i2c_probe(struct i2c_client *i2c,
+ err = device_property_read_u32(&i2c->dev, "display-height-chars",
+ &lcd->height);
+ if (err)
+- goto fail2;
++ goto fail1;
+
+ err = device_property_read_u32(&i2c->dev, "display-width-chars",
+ &lcd->width);
+ if (err)
+- goto fail2;
++ goto fail1;
+
+ lcd->ops = &lcd2s_ops;
+
+ err = charlcd_register(lcd2s->charlcd);
+ if (err)
+- goto fail2;
++ goto fail1;
+
+ i2c_set_clientdata(i2c, lcd2s);
+ return 0;
+
+-fail2:
+- kfree(lcd2s);
+ fail1:
+- kfree(lcd);
++ charlcd_free(lcd2s->charlcd);
+ return err;
+ }
+
+@@ -349,7 +345,7 @@ static int lcd2s_i2c_remove(struct i2c_client *i2c)
+ struct lcd2s_data *lcd2s = i2c_get_clientdata(i2c);
+
+ charlcd_unregister(lcd2s->charlcd);
+- kfree(lcd2s->charlcd);
++ charlcd_free(lcd2s->charlcd);
+ return 0;
+ }
+
+diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
+index 43407665918f3..31bd6f4e5dc47 100644
+--- a/drivers/base/arch_topology.c
++++ b/drivers/base/arch_topology.c
+@@ -609,7 +609,7 @@ void update_siblings_masks(unsigned int cpuid)
+ for_each_online_cpu(cpu) {
+ cpu_topo = &cpu_topology[cpu];
+
+- if (cpuid_topo->llc_id == cpu_topo->llc_id) {
++ if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
+ cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
+ cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
+ }
+@@ -690,4 +690,23 @@ void __init init_cpu_topology(void)
+ else if (of_have_populated_dt() && parse_dt_topology())
+ reset_cpu_topology();
+ }
++
++void store_cpu_topology(unsigned int cpuid)
++{
++ struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
++
++ if (cpuid_topo->package_id != -1)
++ goto topology_populated;
++
++ cpuid_topo->thread_id = -1;
++ cpuid_topo->core_id = cpuid;
++ cpuid_topo->package_id = cpu_to_node(cpuid);
++
++ pr_debug("CPU%u: package %d core %d thread %d\n",
++ cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
++ cpuid_topo->thread_id);
++
++topology_populated:
++ update_siblings_masks(cpuid);
++}
+ #endif
+diff --git a/drivers/base/bus.c b/drivers/base/bus.c
+index bdc98c5713d5e..d171535fc18f5 100644
+--- a/drivers/base/bus.c
++++ b/drivers/base/bus.c
+@@ -617,7 +617,7 @@ int bus_add_driver(struct device_driver *drv)
+ if (drv->bus->p->drivers_autoprobe) {
+ error = driver_attach(drv);
+ if (error)
+- goto out_unregister;
++ goto out_del_list;
+ }
+ module_add_driver(drv->owner, drv);
+
+@@ -644,6 +644,8 @@ int bus_add_driver(struct device_driver *drv)
+
+ return 0;
+
++out_del_list:
++ klist_del(&priv->knode_bus);
+ out_unregister:
+ kobject_put(&priv->kobj);
+ /* drv->p is freed in driver_release() */
+diff --git a/drivers/base/class.c b/drivers/base/class.c
+index 7476f393df977..0e44a68e90a02 100644
+--- a/drivers/base/class.c
++++ b/drivers/base/class.c
+@@ -192,6 +192,11 @@ int __class_register(struct class *cls, struct lock_class_key *key)
+ }
+ error = class_add_groups(class_get(cls), cls->class_groups);
+ class_put(cls);
++ if (error) {
++ kobject_del(&cp->subsys.kobj);
++ kfree_const(cp->subsys.kobj.name);
++ kfree(cp);
++ }
+ return error;
+ }
+ EXPORT_SYMBOL_GPL(__class_register);
+diff --git a/drivers/base/component.c b/drivers/base/component.c
+index 5e79299f6c3ff..058f1a2cb2a9a 100644
+--- a/drivers/base/component.c
++++ b/drivers/base/component.c
+@@ -130,7 +130,7 @@ static void component_master_debugfs_add(struct master *m)
+
+ static void component_master_debugfs_del(struct master *m)
+ {
+- debugfs_remove(debugfs_lookup(dev_name(m->parent), component_debugfs_dir));
++ debugfs_lookup_and_remove(dev_name(m->parent), component_debugfs_dir);
+ }
+
+ #else
+@@ -246,7 +246,7 @@ static int try_to_bring_up_master(struct master *master,
+ return 0;
+ }
+
+- if (!devres_open_group(master->parent, NULL, GFP_KERNEL))
++ if (!devres_open_group(master->parent, master, GFP_KERNEL))
+ return -ENOMEM;
+
+ /* Found all components */
+@@ -258,6 +258,7 @@ static int try_to_bring_up_master(struct master *master,
+ return ret;
+ }
+
++ devres_close_group(master->parent, NULL);
+ master->bound = true;
+ return 1;
+ }
+@@ -282,7 +283,7 @@ static void take_down_master(struct master *master)
+ {
+ if (master->bound) {
+ master->ops->unbind(master->parent);
+- devres_release_group(master->parent, NULL);
++ devres_release_group(master->parent, master);
+ master->bound = false;
+ }
+ }
+diff --git a/drivers/base/core.c b/drivers/base/core.c
+index 249da496581a0..adf003a7e8d6a 100644
+--- a/drivers/base/core.c
++++ b/drivers/base/core.c
+@@ -485,8 +485,8 @@ static void device_link_release_fn(struct work_struct *work)
+ /* Ensure that all references to the link object have been dropped. */
+ device_link_synchronize_removal();
+
+- while (refcount_dec_not_one(&link->rpm_active))
+- pm_runtime_put(link->supplier);
++ pm_runtime_release_supplier(link);
++ pm_request_idle(link->supplier);
+
+ put_device(link->consumer);
+ put_device(link->supplier);
+@@ -821,9 +821,7 @@ struct device_link *device_link_add(struct device *consumer,
+ dev_bus_name(supplier), dev_name(supplier),
+ dev_bus_name(consumer), dev_name(consumer));
+ if (device_register(&link->link_dev)) {
+- put_device(consumer);
+- put_device(supplier);
+- kfree(link);
++ put_device(&link->link_dev);
+ link = NULL;
+ goto out;
+ }
+@@ -3330,7 +3328,7 @@ int device_add(struct device *dev)
+ /* we require the name to be set before, and pass NULL */
+ error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
+ if (error) {
+- glue_dir = get_glue_dir(dev);
++ glue_dir = kobj;
+ goto Error;
+ }
+
+@@ -3430,6 +3428,7 @@ done:
+ device_pm_remove(dev);
+ dpm_sysfs_remove(dev);
+ DPMError:
++ dev->driver = NULL;
+ bus_remove_device(dev);
+ BusError:
+ device_remove_attrs(dev);
+diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
+index 5fc258073bc75..46430cf2401e7 100644
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -487,7 +487,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
+ bool cpu_is_hotpluggable(unsigned int cpu)
+ {
+ struct device *dev = get_cpu_device(cpu);
+- return dev && container_of(dev, struct cpu, dev)->hotpluggable;
++ return dev && container_of(dev, struct cpu, dev)->hotpluggable
++ && tick_nohz_cpu_hotpluggable(cpu);
+ }
+ EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
+
+@@ -564,6 +565,30 @@ ssize_t __weak cpu_show_srbds(struct device *dev,
+ return sysfs_emit(buf, "Not affected\n");
+ }
+
++ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sysfs_emit(buf, "Not affected\n");
++}
++
++ssize_t __weak cpu_show_retbleed(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sysfs_emit(buf, "Not affected\n");
++}
++
++ssize_t __weak cpu_show_gds(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sysfs_emit(buf, "Not affected\n");
++}
++
++ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sysfs_emit(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+@@ -573,6 +598,10 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
+ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
+ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
+ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
++static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
++static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
++static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
+
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+@@ -584,6 +613,10 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_tsx_async_abort.attr,
+ &dev_attr_itlb_multihit.attr,
+ &dev_attr_srbds.attr,
++ &dev_attr_mmio_stale_data.attr,
++ &dev_attr_retbleed.attr,
++ &dev_attr_gather_data_sampling.attr,
++ &dev_attr_spec_rstack_overflow.attr,
+ NULL
+ };
+
+diff --git a/drivers/base/dd.c b/drivers/base/dd.c
+index 68ea1f949daa9..ab0b2eb5fa07f 100644
+--- a/drivers/base/dd.c
++++ b/drivers/base/dd.c
+@@ -257,7 +257,6 @@ DEFINE_SHOW_ATTRIBUTE(deferred_devs);
+
+ int driver_deferred_probe_timeout;
+ EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout);
+-static DECLARE_WAIT_QUEUE_HEAD(probe_timeout_waitqueue);
+
+ static int __init deferred_probe_timeout_setup(char *str)
+ {
+@@ -296,6 +295,7 @@ int driver_deferred_probe_check_state(struct device *dev)
+
+ return -EPROBE_DEFER;
+ }
++EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state);
+
+ static void deferred_probe_timeout_work_func(struct work_struct *work)
+ {
+@@ -311,7 +311,6 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
+ list_for_each_entry(p, &deferred_probe_pending_list, deferred_probe)
+ dev_info(p->device, "deferred probe pending\n");
+ mutex_unlock(&deferred_probe_mutex);
+- wake_up_all(&probe_timeout_waitqueue);
+ }
+ static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
+
+@@ -353,7 +352,7 @@ late_initcall(deferred_probe_initcall);
+
+ static void __exit deferred_probe_exit(void)
+ {
+- debugfs_remove_recursive(debugfs_lookup("devices_deferred", NULL));
++ debugfs_lookup_and_remove("devices_deferred", NULL);
+ }
+ __exitcall(deferred_probe_exit);
+
+@@ -629,6 +628,9 @@ re_probe:
+ drv->remove(dev);
+
+ devres_release_all(dev);
++ arch_teardown_dma_ops(dev);
++ kfree(dev->dma_range_map);
++ dev->dma_range_map = NULL;
+ driver_sysfs_remove(dev);
+ dev->driver = NULL;
+ dev_set_drvdata(dev, NULL);
+@@ -688,7 +690,12 @@ static int really_probe_debug(struct device *dev, struct device_driver *drv)
+ calltime = ktime_get();
+ ret = really_probe(dev, drv);
+ rettime = ktime_get();
+- pr_debug("probe of %s returned %d after %lld usecs\n",
++ /*
++ * Don't change this to pr_debug() because that requires
++ * CONFIG_DYNAMIC_DEBUG and we want a simple 'initcall_debug' on the
++ * kernel commandline to print this all the time at the debug level.
++ */
++ printk(KERN_DEBUG "probe of %s returned %d after %lld usecs\n",
+ dev_name(dev), ret, ktime_us_delta(rettime, calltime));
+ return ret;
+ }
+@@ -715,9 +722,6 @@ int driver_probe_done(void)
+ */
+ void wait_for_device_probe(void)
+ {
+- /* wait for probe timeout */
+- wait_event(probe_timeout_waitqueue, !driver_deferred_probe_timeout);
+-
+ /* wait for the deferred probe workqueue to finish */
+ flush_work(&deferred_probe_work);
+
+@@ -806,7 +810,7 @@ static int __init save_async_options(char *buf)
+ pr_warn("Too long list of driver names for 'driver_async_probe'!\n");
+
+ strlcpy(async_probe_drv_names, buf, ASYNC_DRV_NAMES_MAX_LEN);
+- return 0;
++ return 1;
+ }
+ __setup("driver_async_probe=", save_async_options);
+
+@@ -878,6 +882,11 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
+ dev_dbg(dev, "Device match requests probe deferral\n");
+ dev->can_match = true;
+ driver_deferred_probe_add(dev);
++ /*
++ * Device can't match with a driver right now, so don't attempt
++ * to match or bind with other drivers on the bus.
++ */
++ return ret;
+ } else if (ret < 0) {
+ dev_dbg(dev, "Bus failed to match device: %d\n", ret);
+ return ret;
+@@ -940,6 +949,7 @@ out_unlock:
+ static int __device_attach(struct device *dev, bool allow_async)
+ {
+ int ret = 0;
++ bool async = false;
+
+ device_lock(dev);
+ if (dev->p->dead) {
+@@ -978,7 +988,7 @@ static int __device_attach(struct device *dev, bool allow_async)
+ */
+ dev_dbg(dev, "scheduling asynchronous probe\n");
+ get_device(dev);
+- async_schedule_dev(__device_attach_async_helper, dev);
++ async = true;
+ } else {
+ pm_request_idle(dev);
+ }
+@@ -988,6 +998,8 @@ static int __device_attach(struct device *dev, bool allow_async)
+ }
+ out_unlock:
+ device_unlock(dev);
++ if (async)
++ async_schedule_dev(__device_attach_async_helper, dev);
+ return ret;
+ }
+
+@@ -1092,6 +1104,7 @@ static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie)
+ static int __driver_attach(struct device *dev, void *data)
+ {
+ struct device_driver *drv = data;
++ bool async = false;
+ int ret;
+
+ /*
+@@ -1112,9 +1125,18 @@ static int __driver_attach(struct device *dev, void *data)
+ dev_dbg(dev, "Device match requests probe deferral\n");
+ dev->can_match = true;
+ driver_deferred_probe_add(dev);
++ /*
++ * Driver could not match with device, but may match with
++ * another device on the bus.
++ */
++ return 0;
+ } else if (ret < 0) {
+ dev_dbg(dev, "Bus failed to match device: %d\n", ret);
+- return ret;
++ /*
++ * Driver could not match with device, but may match with
++ * another device on the bus.
++ */
++ return 0;
+ } /* ret > 0 means positive match */
+
+ if (driver_allows_async_probing(drv)) {
+@@ -1130,9 +1152,11 @@ static int __driver_attach(struct device *dev, void *data)
+ if (!dev->driver) {
+ get_device(dev);
+ dev->p->async_driver = drv;
+- async_schedule_dev(__driver_attach_async_helper, dev);
++ async = true;
+ }
+ device_unlock(dev);
++ if (async)
++ async_schedule_dev(__driver_attach_async_helper, dev);
+ return 0;
+ }
+
+@@ -1208,6 +1232,8 @@ static void __device_release_driver(struct device *dev, struct device *parent)
+
+ devres_release_all(dev);
+ arch_teardown_dma_ops(dev);
++ kfree(dev->dma_range_map);
++ dev->dma_range_map = NULL;
+ dev->driver = NULL;
+ dev_set_drvdata(dev, NULL);
+ if (dev->pm_domain && dev->pm_domain->dismiss)
+diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
+index 8be352ab4ddbf..fa13ad49d2116 100644
+--- a/drivers/base/devtmpfs.c
++++ b/drivers/base/devtmpfs.c
+@@ -59,8 +59,15 @@ static struct dentry *public_dev_mount(struct file_system_type *fs_type, int fla
+ const char *dev_name, void *data)
+ {
+ struct super_block *s = mnt->mnt_sb;
++ int err;
++
+ atomic_inc(&s->s_active);
+ down_write(&s->s_umount);
++ err = reconfigure_single(s, flags, data);
++ if (err < 0) {
++ deactivate_locked_super(s);
++ return ERR_PTR(err);
++ }
+ return dget(s->s_root);
+ }
+
+diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
+index bdbedc6660a87..04ede46f75123 100644
+--- a/drivers/base/firmware_loader/main.c
++++ b/drivers/base/firmware_loader/main.c
+@@ -100,12 +100,15 @@ static struct firmware_cache fw_cache;
+ extern struct builtin_fw __start_builtin_fw[];
+ extern struct builtin_fw __end_builtin_fw[];
+
+-static void fw_copy_to_prealloc_buf(struct firmware *fw,
++static bool fw_copy_to_prealloc_buf(struct firmware *fw,
+ void *buf, size_t size)
+ {
+- if (!buf || size < fw->size)
+- return;
++ if (!buf)
++ return true;
++ if (size < fw->size)
++ return false;
+ memcpy(buf, fw->data, fw->size);
++ return true;
+ }
+
+ static bool fw_get_builtin_firmware(struct firmware *fw, const char *name,
+@@ -117,9 +120,7 @@ static bool fw_get_builtin_firmware(struct firmware *fw, const char *name,
+ if (strcmp(name, b_fw->name) == 0) {
+ fw->size = b_fw->size;
+ fw->data = b_fw->data;
+- fw_copy_to_prealloc_buf(fw, buf, size);
+-
+- return true;
++ return fw_copy_to_prealloc_buf(fw, buf, size);
+ }
+ }
+
+@@ -794,6 +795,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
+ size_t offset, u32 opt_flags)
+ {
+ struct firmware *fw = NULL;
++ struct cred *kern_cred = NULL;
++ const struct cred *old_cred;
+ bool nondirect = false;
+ int ret;
+
+@@ -810,6 +813,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
+ if (ret <= 0) /* error or already assigned */
+ goto out;
+
++ /*
++ * We are about to try to access the firmware file. Because we may have been
++ * called by a driver when serving an unrelated request from userland, we use
++ * the kernel credentials to read the file.
++ */
++ kern_cred = prepare_kernel_cred(NULL);
++ if (!kern_cred) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ old_cred = override_creds(kern_cred);
++
+ ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL);
+
+ /* Only full reads can support decompression, platform, and sysfs. */
+@@ -835,6 +850,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
+ } else
+ ret = assign_fw(fw, device);
+
++ revert_creds(old_cred);
++ put_cred(kern_cred);
++
+ out:
+ if (ret < 0) {
+ fw_abort_batch_reqs(fw);
+diff --git a/drivers/base/init.c b/drivers/base/init.c
+index a9f57c22fb9e2..dab8aa5d28889 100644
+--- a/drivers/base/init.c
++++ b/drivers/base/init.c
+@@ -8,6 +8,7 @@
+ #include <linux/init.h>
+ #include <linux/memory.h>
+ #include <linux/of.h>
++#include <linux/backing-dev.h>
+
+ #include "base.h"
+
+@@ -20,6 +21,7 @@
+ void __init driver_init(void)
+ {
+ /* These are the core pieces */
++ bdi_init(&noop_backing_dev_info);
+ devtmpfs_init();
+ devices_init();
+ buses_init();
+diff --git a/drivers/base/memory.c b/drivers/base/memory.c
+index 365cd4a7f2397..c778d1df74557 100644
+--- a/drivers/base/memory.c
++++ b/drivers/base/memory.c
+@@ -555,6 +555,8 @@ static ssize_t hard_offline_page_store(struct device *dev,
+ return -EINVAL;
+ pfn >>= PAGE_SHIFT;
+ ret = memory_failure(pfn, 0);
++ if (ret == -EOPNOTSUPP)
++ ret = 0;
+ return ret ? ret : count;
+ }
+
+@@ -634,10 +636,9 @@ int register_memory(struct memory_block *memory)
+ }
+ ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
+ GFP_KERNEL));
+- if (ret) {
+- put_device(&memory->dev);
++ if (ret)
+ device_unregister(&memory->dev);
+- }
++
+ return ret;
+ }
+
+@@ -663,14 +664,16 @@ static int init_memory_block(unsigned long block_id, unsigned long state,
+ mem->nr_vmemmap_pages = nr_vmemmap_pages;
+ INIT_LIST_HEAD(&mem->group_next);
+
++ ret = register_memory(mem);
++ if (ret)
++ return ret;
++
+ if (group) {
+ mem->group = group;
+ list_add(&mem->group_next, &group->memory_blocks);
+ }
+
+- ret = register_memory(mem);
+-
+- return ret;
++ return 0;
+ }
+
+ static int add_memory_block(unsigned long base_section_nr)
+diff --git a/drivers/base/node.c b/drivers/base/node.c
+index c56d34f8158f7..5366d1b5359c8 100644
+--- a/drivers/base/node.c
++++ b/drivers/base/node.c
+@@ -45,7 +45,7 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
+ return n;
+ }
+
+-static BIN_ATTR_RO(cpumap, 0);
++static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES);
+
+ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+@@ -66,7 +66,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+ return n;
+ }
+
+-static BIN_ATTR_RO(cpulist, 0);
++static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES);
+
+ /**
+ * struct node_access_nodes - Access class device to hold user visible
+@@ -679,6 +679,7 @@ static int register_node(struct node *node, int num)
+ */
+ void unregister_node(struct node *node)
+ {
++ compaction_unregister_node(node);
+ hugetlb_unregister_node(node); /* no-op, if memoryless node */
+ node_remove_accesses(node);
+ node_remove_caches(node);
+diff --git a/drivers/base/platform.c b/drivers/base/platform.c
+index 652531f67135a..ac5cf1a8d79ab 100644
+--- a/drivers/base/platform.c
++++ b/drivers/base/platform.c
+@@ -1427,7 +1427,9 @@ static void platform_remove(struct device *_dev)
+ struct platform_driver *drv = to_platform_driver(_dev->driver);
+ struct platform_device *dev = to_platform_device(_dev);
+
+- if (drv->remove) {
++ if (drv->remove_new) {
++ drv->remove_new(dev);
++ } else if (drv->remove) {
+ int ret = drv->remove(dev);
+
+ if (ret)
+diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
+index 5db704f02e712..6ffee01e174da 100644
+--- a/drivers/base/power/domain.c
++++ b/drivers/base/power/domain.c
+@@ -217,10 +217,10 @@ static void genpd_debug_add(struct generic_pm_domain *genpd);
+
+ static void genpd_debug_remove(struct generic_pm_domain *genpd)
+ {
+- struct dentry *d;
++ if (!genpd_debugfs_dir)
++ return;
+
+- d = debugfs_lookup(genpd->name, genpd_debugfs_dir);
+- debugfs_remove(d);
++ debugfs_lookup_and_remove(genpd->name, genpd_debugfs_dir);
+ }
+
+ static void genpd_update_accounting(struct generic_pm_domain *genpd)
+@@ -1978,6 +1978,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
+ genpd->device_count = 0;
+ genpd->max_off_time_ns = -1;
+ genpd->max_off_time_changed = true;
++ genpd->next_wakeup = KTIME_MAX;
+ genpd->provider = NULL;
+ genpd->has_provider = false;
+ genpd->accounting_time = ktime_get();
+@@ -2058,9 +2059,9 @@ static int genpd_remove(struct generic_pm_domain *genpd)
+ kfree(link);
+ }
+
+- genpd_debug_remove(genpd);
+ list_del(&genpd->gpd_list_node);
+ genpd_unlock(genpd);
++ genpd_debug_remove(genpd);
+ cancel_work_sync(&genpd->power_off_work);
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+@@ -2859,10 +2860,10 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
+
+ err = of_property_read_u32(state_node, "min-residency-us", &residency);
+ if (!err)
+- genpd_state->residency_ns = 1000 * residency;
++ genpd_state->residency_ns = 1000LL * residency;
+
+- genpd_state->power_on_latency_ns = 1000 * exit_latency;
+- genpd_state->power_off_latency_ns = 1000 * entry_latency;
++ genpd_state->power_on_latency_ns = 1000LL * exit_latency;
++ genpd_state->power_off_latency_ns = 1000LL * entry_latency;
+ genpd_state->fwnode = &state_node->fwnode;
+
+ return 0;
+@@ -2885,6 +2886,10 @@ static int genpd_iterate_idle_states(struct device_node *dn,
+ np = it.node;
+ if (!of_match_node(idle_state_match, np))
+ continue;
++
++ if (!of_device_is_available(np))
++ continue;
++
+ if (states) {
+ ret = genpd_parse_state(&states[i], np);
+ if (ret) {
+diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
+index cbea78e79f3df..8c4819fe73d4c 100644
+--- a/drivers/base/power/main.c
++++ b/drivers/base/power/main.c
+@@ -711,6 +711,7 @@ static void dpm_noirq_resume_devices(pm_message_t state)
+ dev = to_device(dpm_noirq_list.next);
+ get_device(dev);
+ list_move_tail(&dev->power.entry, &dpm_late_early_list);
++
+ mutex_unlock(&dpm_list_mtx);
+
+ if (!is_async(dev)) {
+@@ -725,8 +726,9 @@ static void dpm_noirq_resume_devices(pm_message_t state)
+ }
+ }
+
+- mutex_lock(&dpm_list_mtx);
+ put_device(dev);
++
++ mutex_lock(&dpm_list_mtx);
+ }
+ mutex_unlock(&dpm_list_mtx);
+ async_synchronize_full();
+@@ -852,6 +854,7 @@ void dpm_resume_early(pm_message_t state)
+ dev = to_device(dpm_late_early_list.next);
+ get_device(dev);
+ list_move_tail(&dev->power.entry, &dpm_suspended_list);
++
+ mutex_unlock(&dpm_list_mtx);
+
+ if (!is_async(dev)) {
+@@ -865,8 +868,10 @@ void dpm_resume_early(pm_message_t state)
+ pm_dev_err(dev, state, " early", error);
+ }
+ }
+- mutex_lock(&dpm_list_mtx);
++
+ put_device(dev);
++
++ mutex_lock(&dpm_list_mtx);
+ }
+ mutex_unlock(&dpm_list_mtx);
+ async_synchronize_full();
+@@ -1029,7 +1034,12 @@ void dpm_resume(pm_message_t state)
+ }
+ if (!list_empty(&dev->power.entry))
+ list_move_tail(&dev->power.entry, &dpm_prepared_list);
++
++ mutex_unlock(&dpm_list_mtx);
++
+ put_device(dev);
++
++ mutex_lock(&dpm_list_mtx);
+ }
+ mutex_unlock(&dpm_list_mtx);
+ async_synchronize_full();
+@@ -1051,7 +1061,7 @@ static void device_complete(struct device *dev, pm_message_t state)
+ const char *info = NULL;
+
+ if (dev->power.syscore)
+- return;
++ goto out;
+
+ device_lock(dev);
+
+@@ -1081,6 +1091,7 @@ static void device_complete(struct device *dev, pm_message_t state)
+
+ device_unlock(dev);
+
++out:
+ pm_runtime_put(dev);
+ }
+
+@@ -1106,14 +1117,16 @@ void dpm_complete(pm_message_t state)
+ get_device(dev);
+ dev->power.is_prepared = false;
+ list_move(&dev->power.entry, &list);
++
+ mutex_unlock(&dpm_list_mtx);
+
+ trace_device_pm_callback_start(dev, "", state.event);
+ device_complete(dev, state);
+ trace_device_pm_callback_end(dev, 0);
+
+- mutex_lock(&dpm_list_mtx);
+ put_device(dev);
++
++ mutex_lock(&dpm_list_mtx);
+ }
+ list_splice(&list, &dpm_list);
+ mutex_unlock(&dpm_list_mtx);
+@@ -1298,17 +1311,21 @@ static int dpm_noirq_suspend_devices(pm_message_t state)
+ error = device_suspend_noirq(dev);
+
+ mutex_lock(&dpm_list_mtx);
++
+ if (error) {
+ pm_dev_err(dev, state, " noirq", error);
+ dpm_save_failed_dev(dev_name(dev));
+- put_device(dev);
+- break;
+- }
+- if (!list_empty(&dev->power.entry))
++ } else if (!list_empty(&dev->power.entry)) {
+ list_move(&dev->power.entry, &dpm_noirq_list);
++ }
++
++ mutex_unlock(&dpm_list_mtx);
++
+ put_device(dev);
+
+- if (async_error)
++ mutex_lock(&dpm_list_mtx);
++
++ if (error || async_error)
+ break;
+ }
+ mutex_unlock(&dpm_list_mtx);
+@@ -1475,23 +1492,28 @@ int dpm_suspend_late(pm_message_t state)
+ struct device *dev = to_device(dpm_suspended_list.prev);
+
+ get_device(dev);
++
+ mutex_unlock(&dpm_list_mtx);
+
+ error = device_suspend_late(dev);
+
+ mutex_lock(&dpm_list_mtx);
++
+ if (!list_empty(&dev->power.entry))
+ list_move(&dev->power.entry, &dpm_late_early_list);
+
+ if (error) {
+ pm_dev_err(dev, state, " late", error);
+ dpm_save_failed_dev(dev_name(dev));
+- put_device(dev);
+- break;
+ }
++
++ mutex_unlock(&dpm_list_mtx);
++
+ put_device(dev);
+
+- if (async_error)
++ mutex_lock(&dpm_list_mtx);
++
++ if (error || async_error)
+ break;
+ }
+ mutex_unlock(&dpm_list_mtx);
+@@ -1751,21 +1773,27 @@ int dpm_suspend(pm_message_t state)
+ struct device *dev = to_device(dpm_prepared_list.prev);
+
+ get_device(dev);
++
+ mutex_unlock(&dpm_list_mtx);
+
+ error = device_suspend(dev);
+
+ mutex_lock(&dpm_list_mtx);
++
+ if (error) {
+ pm_dev_err(dev, state, "", error);
+ dpm_save_failed_dev(dev_name(dev));
+- put_device(dev);
+- break;
+- }
+- if (!list_empty(&dev->power.entry))
++ } else if (!list_empty(&dev->power.entry)) {
+ list_move(&dev->power.entry, &dpm_suspended_list);
++ }
++
++ mutex_unlock(&dpm_list_mtx);
++
+ put_device(dev);
+- if (async_error)
++
++ mutex_lock(&dpm_list_mtx);
++
++ if (error || async_error)
+ break;
+ }
+ mutex_unlock(&dpm_list_mtx);
+@@ -1794,9 +1822,6 @@ static int device_prepare(struct device *dev, pm_message_t state)
+ int (*callback)(struct device *) = NULL;
+ int ret = 0;
+
+- if (dev->power.syscore)
+- return 0;
+-
+ /*
+ * If a device's parent goes into runtime suspend at the wrong time,
+ * it won't be possible to resume the device. To prevent this we
+@@ -1805,6 +1830,9 @@ static int device_prepare(struct device *dev, pm_message_t state)
+ */
+ pm_runtime_get_noresume(dev);
+
++ if (dev->power.syscore)
++ return 0;
++
+ device_lock(dev);
+
+ dev->power.wakeup_path = false;
+@@ -1878,10 +1906,11 @@ int dpm_prepare(pm_message_t state)
+ device_block_probing();
+
+ mutex_lock(&dpm_list_mtx);
+- while (!list_empty(&dpm_list)) {
++ while (!list_empty(&dpm_list) && !error) {
+ struct device *dev = to_device(dpm_list.next);
+
+ get_device(dev);
++
+ mutex_unlock(&dpm_list_mtx);
+
+ trace_device_pm_callback_start(dev, "", state.event);
+@@ -1889,21 +1918,23 @@ int dpm_prepare(pm_message_t state)
+ trace_device_pm_callback_end(dev, error);
+
+ mutex_lock(&dpm_list_mtx);
+- if (error) {
+- if (error == -EAGAIN) {
+- put_device(dev);
+- error = 0;
+- continue;
+- }
++
++ if (!error) {
++ dev->power.is_prepared = true;
++ if (!list_empty(&dev->power.entry))
++ list_move_tail(&dev->power.entry, &dpm_prepared_list);
++ } else if (error == -EAGAIN) {
++ error = 0;
++ } else {
+ dev_info(dev, "not prepared for power transition: code %d\n",
+ error);
+- put_device(dev);
+- break;
+ }
+- dev->power.is_prepared = true;
+- if (!list_empty(&dev->power.entry))
+- list_move_tail(&dev->power.entry, &dpm_prepared_list);
++
++ mutex_unlock(&dpm_list_mtx);
++
+ put_device(dev);
++
++ mutex_lock(&dpm_list_mtx);
+ }
+ mutex_unlock(&dpm_list_mtx);
+ trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
+@@ -1991,7 +2022,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops)
+
+ void device_pm_check_callbacks(struct device *dev)
+ {
+- spin_lock_irq(&dev->power.lock);
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev->power.lock, flags);
+ dev->power.no_pm_callbacks =
+ (!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
+ !dev->bus->suspend && !dev->bus->resume)) &&
+@@ -2000,7 +2033,7 @@ void device_pm_check_callbacks(struct device *dev)
+ (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
+ (!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
+ !dev->driver->suspend && !dev->driver->resume));
+- spin_unlock_irq(&dev->power.lock);
++ spin_unlock_irqrestore(&dev->power.lock, flags);
+ }
+
+ bool dev_pm_skip_suspend(struct device *dev)
+diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
+index 54292cdd7808b..922ed457db191 100644
+--- a/drivers/base/power/power.h
++++ b/drivers/base/power/power.h
+@@ -25,8 +25,11 @@ extern u64 pm_runtime_active_time(struct device *dev);
+
+ #define WAKE_IRQ_DEDICATED_ALLOCATED BIT(0)
+ #define WAKE_IRQ_DEDICATED_MANAGED BIT(1)
++#define WAKE_IRQ_DEDICATED_REVERSE BIT(2)
+ #define WAKE_IRQ_DEDICATED_MASK (WAKE_IRQ_DEDICATED_ALLOCATED | \
+- WAKE_IRQ_DEDICATED_MANAGED)
++ WAKE_IRQ_DEDICATED_MANAGED | \
++ WAKE_IRQ_DEDICATED_REVERSE)
++#define WAKE_IRQ_DEDICATED_ENABLED BIT(3)
+
+ struct wake_irq {
+ struct device *dev;
+@@ -39,7 +42,8 @@ extern void dev_pm_arm_wake_irq(struct wake_irq *wirq);
+ extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq);
+ extern void dev_pm_enable_wake_irq_check(struct device *dev,
+ bool can_change_status);
+-extern void dev_pm_disable_wake_irq_check(struct device *dev);
++extern void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable);
++extern void dev_pm_enable_wake_irq_complete(struct device *dev);
+
+ #ifdef CONFIG_PM_SLEEP
+
+diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
+index ec94049442b99..5824d41a0b745 100644
+--- a/drivers/base/power/runtime.c
++++ b/drivers/base/power/runtime.c
+@@ -305,16 +305,34 @@ static int rpm_get_suppliers(struct device *dev)
+ return 0;
+ }
+
++/**
++ * pm_runtime_release_supplier - Drop references to device link's supplier.
++ * @link: Target device link.
++ *
++ * Drop all runtime PM references associated with @link to its supplier device.
++ */
++void pm_runtime_release_supplier(struct device_link *link)
++{
++ struct device *supplier = link->supplier;
++
++ /*
++ * The additional power.usage_count check is a safety net in case
++ * the rpm_active refcount becomes saturated, in which case
++ * refcount_dec_not_one() would return true forever, but it is not
++ * strictly necessary.
++ */
++ while (refcount_dec_not_one(&link->rpm_active) &&
++ atomic_read(&supplier->power.usage_count) > 0)
++ pm_runtime_put_noidle(supplier);
++}
++
+ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
+ {
+ struct device_link *link;
+
+ list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+ device_links_read_lock_held()) {
+-
+- while (refcount_dec_not_one(&link->rpm_active))
+- pm_runtime_put_noidle(link->supplier);
+-
++ pm_runtime_release_supplier(link);
+ if (try_to_suspend)
+ pm_request_idle(link->supplier);
+ }
+@@ -466,7 +484,17 @@ static int rpm_idle(struct device *dev, int rpmflags)
+
+ dev->power.idle_notification = true;
+
+- retval = __rpm_callback(callback, dev);
++ if (dev->power.irq_safe)
++ spin_unlock(&dev->power.lock);
++ else
++ spin_unlock_irq(&dev->power.lock);
++
++ retval = callback(dev);
++
++ if (dev->power.irq_safe)
++ spin_lock(&dev->power.lock);
++ else
++ spin_lock_irq(&dev->power.lock);
+
+ dev->power.idle_notification = false;
+ wake_up_all(&dev->power.wait_queue);
+@@ -645,6 +673,8 @@ static int rpm_suspend(struct device *dev, int rpmflags)
+ if (retval)
+ goto fail;
+
++ dev_pm_enable_wake_irq_complete(dev);
++
+ no_callback:
+ __update_runtime_status(dev, RPM_SUSPENDED);
+ pm_runtime_deactivate_timer(dev);
+@@ -690,7 +720,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
+ return retval;
+
+ fail:
+- dev_pm_disable_wake_irq_check(dev);
++ dev_pm_disable_wake_irq_check(dev, true);
+ __update_runtime_status(dev, RPM_ACTIVE);
+ dev->power.deferred_resume = false;
+ wake_up_all(&dev->power.wait_queue);
+@@ -873,7 +903,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
+
+ callback = RPM_GET_CALLBACK(dev, runtime_resume);
+
+- dev_pm_disable_wake_irq_check(dev);
++ dev_pm_disable_wake_irq_check(dev, false);
+ retval = rpm_callback(callback, dev);
+ if (retval) {
+ __update_runtime_status(dev, RPM_SUSPENDED);
+@@ -1770,9 +1800,8 @@ void pm_runtime_drop_link(struct device_link *link)
+ return;
+
+ pm_runtime_drop_link_count(link->consumer);
+-
+- while (refcount_dec_not_one(&link->rpm_active))
+- pm_runtime_put(link->supplier);
++ pm_runtime_release_supplier(link);
++ pm_request_idle(link->supplier);
+ }
+
+ static bool pm_runtime_need_not_resume(struct device *dev)
+diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
+index 94665037f4a35..72b7a92337b18 100644
+--- a/drivers/base/power/trace.c
++++ b/drivers/base/power/trace.c
+@@ -120,7 +120,11 @@ static unsigned int read_magic_time(void)
+ struct rtc_time time;
+ unsigned int val;
+
+- mc146818_get_time(&time);
++ if (mc146818_get_time(&time) < 0) {
++ pr_err("Unable to read current time from RTC\n");
++ return 0;
++ }
++
+ pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time);
+ val = time.tm_year; /* 100 years */
+ if (val > 100)
+diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
+index b91a3a9bf9f6d..6f2cdd8643afa 100644
+--- a/drivers/base/power/wakeirq.c
++++ b/drivers/base/power/wakeirq.c
+@@ -142,24 +142,7 @@ static irqreturn_t handle_threaded_wake_irq(int irq, void *_wirq)
+ return IRQ_HANDLED;
+ }
+
+-/**
+- * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
+- * @dev: Device entry
+- * @irq: Device wake-up interrupt
+- *
+- * Unless your hardware has separate wake-up interrupts in addition
+- * to the device IO interrupts, you don't need this.
+- *
+- * Sets up a threaded interrupt handler for a device that has
+- * a dedicated wake-up interrupt in addition to the device IO
+- * interrupt.
+- *
+- * The interrupt starts disabled, and needs to be managed for
+- * the device by the bus code or the device driver using
+- * dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq()
+- * functions.
+- */
+-int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
++static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned int flag)
+ {
+ struct wake_irq *wirq;
+ int err;
+@@ -197,7 +180,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
+ if (err)
+ goto err_free_irq;
+
+- wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED;
++ wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED | flag;
+
+ return err;
+
+@@ -210,8 +193,57 @@ err_free:
+
+ return err;
+ }
++
++
++/**
++ * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
++ * @dev: Device entry
++ * @irq: Device wake-up interrupt
++ *
++ * Unless your hardware has separate wake-up interrupts in addition
++ * to the device IO interrupts, you don't need this.
++ *
++ * Sets up a threaded interrupt handler for a device that has
++ * a dedicated wake-up interrupt in addition to the device IO
++ * interrupt.
++ *
++ * The interrupt starts disabled, and needs to be managed for
++ * the device by the bus code or the device driver using
++ * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
++ * functions.
++ */
++int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
++{
++ return __dev_pm_set_dedicated_wake_irq(dev, irq, 0);
++}
+ EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
+
++/**
++ * dev_pm_set_dedicated_wake_irq_reverse - Request a dedicated wake-up interrupt
++ * with reverse enable ordering
++ * @dev: Device entry
++ * @irq: Device wake-up interrupt
++ *
++ * Unless your hardware has separate wake-up interrupts in addition
++ * to the device IO interrupts, you don't need this.
++ *
++ * Sets up a threaded interrupt handler for a device that has a dedicated
++ * wake-up interrupt in addition to the device IO interrupt. It sets
++ * the status of WAKE_IRQ_DEDICATED_REVERSE to tell rpm_suspend()
++ * to enable dedicated wake-up interrupt after running the runtime suspend
++ * callback for @dev.
++ *
++ * The interrupt starts disabled, and needs to be managed for
++ * the device by the bus code or the device driver using
++ * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
++ * functions.
++ */
++int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
++{
++ return __dev_pm_set_dedicated_wake_irq(dev, irq, WAKE_IRQ_DEDICATED_REVERSE);
++}
++EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);
++
+ /**
+ * dev_pm_enable_wake_irq - Enable device wake-up interrupt
+ * @dev: Device
+@@ -282,25 +314,56 @@ void dev_pm_enable_wake_irq_check(struct device *dev,
+ return;
+
+ enable:
+- enable_irq(wirq->irq);
++ if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) {
++ enable_irq(wirq->irq);
++ wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
++ }
+ }
+
+ /**
+ * dev_pm_disable_wake_irq_check - Checks and disables wake-up interrupt
+ * @dev: Device
++ * @cond_disable: if set, also check WAKE_IRQ_DEDICATED_REVERSE
+ *
+ * Disables wake-up interrupt conditionally based on status.
+ * Should be only called from rpm_suspend() and rpm_resume() path.
+ */
+-void dev_pm_disable_wake_irq_check(struct device *dev)
++void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
+ {
+ struct wake_irq *wirq = dev->power.wakeirq;
+
+ if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
+ return;
+
+- if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED)
++ if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
++ return;
++
++ if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) {
++ wirq->status &= ~WAKE_IRQ_DEDICATED_ENABLED;
+ disable_irq_nosync(wirq->irq);
++ }
++}
++
++/**
++ * dev_pm_enable_wake_irq_complete - enable wake IRQ not enabled before
++ * @dev: Device using the wake IRQ
++ *
++ * Enable wake IRQ conditionally based on status, mainly used if want to
++ * enable wake IRQ after running ->runtime_suspend() which depends on
++ * WAKE_IRQ_DEDICATED_REVERSE.
++ *
++ * Should be only called from rpm_suspend() path.
++ */
++void dev_pm_enable_wake_irq_complete(struct device *dev)
++{
++ struct wake_irq *wirq = dev->power.wakeirq;
++
++ if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
++ return;
++
++ if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED &&
++ wirq->status & WAKE_IRQ_DEDICATED_REVERSE)
++ enable_irq(wirq->irq);
+ }
+
+ /**
+@@ -317,7 +380,7 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq)
+
+ if (device_may_wakeup(wirq->dev)) {
+ if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+- !pm_runtime_status_suspended(wirq->dev))
++ !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
+ enable_irq(wirq->irq);
+
+ enable_irq_wake(wirq->irq);
+@@ -340,7 +403,7 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
+ disable_irq_wake(wirq->irq);
+
+ if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
+- !pm_runtime_status_suspended(wirq->dev))
++ !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
+ disable_irq_nosync(wirq->irq);
+ }
+ }
+diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
+index 99bda0da23a82..8666590201c9a 100644
+--- a/drivers/base/power/wakeup.c
++++ b/drivers/base/power/wakeup.c
+@@ -34,7 +34,8 @@ suspend_state_t pm_suspend_target_state;
+ bool events_check_enabled __read_mostly;
+
+ /* First wakeup IRQ seen by the kernel in the last cycle. */
+-unsigned int pm_wakeup_irq __read_mostly;
++static unsigned int wakeup_irq[2] __read_mostly;
++static DEFINE_RAW_SPINLOCK(wakeup_irq_lock);
+
+ /* If greater than 0 and the system is suspending, terminate the suspend. */
+ static atomic_t pm_abort_suspend __read_mostly;
+@@ -942,19 +943,45 @@ void pm_system_cancel_wakeup(void)
+ atomic_dec_if_positive(&pm_abort_suspend);
+ }
+
+-void pm_wakeup_clear(bool reset)
++void pm_wakeup_clear(unsigned int irq_number)
+ {
+- pm_wakeup_irq = 0;
+- if (reset)
++ raw_spin_lock_irq(&wakeup_irq_lock);
++
++ if (irq_number && wakeup_irq[0] == irq_number)
++ wakeup_irq[0] = wakeup_irq[1];
++ else
++ wakeup_irq[0] = 0;
++
++ wakeup_irq[1] = 0;
++
++ raw_spin_unlock_irq(&wakeup_irq_lock);
++
++ if (!irq_number)
+ atomic_set(&pm_abort_suspend, 0);
+ }
+
+ void pm_system_irq_wakeup(unsigned int irq_number)
+ {
+- if (pm_wakeup_irq == 0) {
+- pm_wakeup_irq = irq_number;
++ unsigned long flags;
++
++ raw_spin_lock_irqsave(&wakeup_irq_lock, flags);
++
++ if (wakeup_irq[0] == 0)
++ wakeup_irq[0] = irq_number;
++ else if (wakeup_irq[1] == 0)
++ wakeup_irq[1] = irq_number;
++ else
++ irq_number = 0;
++
++ raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags);
++
++ if (irq_number)
+ pm_system_wakeup();
+- }
++}
++
++unsigned int pm_wakeup_irq(void)
++{
++ return wakeup_irq[0];
+ }
+
+ /**
+diff --git a/drivers/base/property.c b/drivers/base/property.c
+index 453918eb7390c..17a648d643566 100644
+--- a/drivers/base/property.c
++++ b/drivers/base/property.c
+@@ -48,12 +48,14 @@ bool fwnode_property_present(const struct fwnode_handle *fwnode,
+ {
+ bool ret;
+
++ if (IS_ERR_OR_NULL(fwnode))
++ return false;
++
+ ret = fwnode_call_bool_op(fwnode, property_present, propname);
+- if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
+- !IS_ERR_OR_NULL(fwnode->secondary))
+- ret = fwnode_call_bool_op(fwnode->secondary, property_present,
+- propname);
+- return ret;
++ if (ret)
++ return ret;
++
++ return fwnode_call_bool_op(fwnode->secondary, property_present, propname);
+ }
+ EXPORT_SYMBOL_GPL(fwnode_property_present);
+
+@@ -233,15 +235,16 @@ static int fwnode_property_read_int_array(const struct fwnode_handle *fwnode,
+ {
+ int ret;
+
++ if (IS_ERR_OR_NULL(fwnode))
++ return -EINVAL;
++
+ ret = fwnode_call_int_op(fwnode, property_read_int_array, propname,
+ elem_size, val, nval);
+- if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
+- !IS_ERR_OR_NULL(fwnode->secondary))
+- ret = fwnode_call_int_op(
+- fwnode->secondary, property_read_int_array, propname,
+- elem_size, val, nval);
++ if (ret != -EINVAL)
++ return ret;
+
+- return ret;
++ return fwnode_call_int_op(fwnode->secondary, property_read_int_array, propname,
++ elem_size, val, nval);
+ }
+
+ /**
+@@ -372,14 +375,16 @@ int fwnode_property_read_string_array(const struct fwnode_handle *fwnode,
+ {
+ int ret;
+
++ if (IS_ERR_OR_NULL(fwnode))
++ return -EINVAL;
++
+ ret = fwnode_call_int_op(fwnode, property_read_string_array, propname,
+ val, nval);
+- if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
+- !IS_ERR_OR_NULL(fwnode->secondary))
+- ret = fwnode_call_int_op(fwnode->secondary,
+- property_read_string_array, propname,
+- val, nval);
+- return ret;
++ if (ret != -EINVAL)
++ return ret;
++
++ return fwnode_call_int_op(fwnode->secondary, property_read_string_array, propname,
++ val, nval);
+ }
+ EXPORT_SYMBOL_GPL(fwnode_property_read_string_array);
+
+@@ -479,7 +484,20 @@ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode,
+ unsigned int nargs, unsigned int index,
+ struct fwnode_reference_args *args)
+ {
+- return fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop,
++ int ret;
++
++ if (IS_ERR_OR_NULL(fwnode))
++ return -ENOENT;
++
++ ret = fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop,
++ nargs, index, args);
++ if (ret == 0)
++ return ret;
++
++ if (IS_ERR_OR_NULL(fwnode->secondary))
++ return ret;
++
++ return fwnode_call_int_op(fwnode->secondary, get_reference_args, prop, nargs_prop,
+ nargs, index, args);
+ }
+ EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args);
+@@ -675,12 +693,13 @@ EXPORT_SYMBOL_GPL(fwnode_count_parents);
+ struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwnode,
+ unsigned int depth)
+ {
+- unsigned int i;
+-
+ fwnode_handle_get(fwnode);
+
+- for (i = 0; i < depth && fwnode; i++)
++ do {
++ if (depth-- == 0)
++ break;
+ fwnode = fwnode_get_next_parent(fwnode);
++ } while (fwnode);
+
+ return fwnode;
+ }
+@@ -699,17 +718,17 @@ EXPORT_SYMBOL_GPL(fwnode_get_nth_parent);
+ bool fwnode_is_ancestor_of(struct fwnode_handle *test_ancestor,
+ struct fwnode_handle *test_child)
+ {
+- if (!test_ancestor)
++ if (IS_ERR_OR_NULL(test_ancestor))
+ return false;
+
+ fwnode_handle_get(test_child);
+- while (test_child) {
++ do {
+ if (test_child == test_ancestor) {
+ fwnode_handle_put(test_child);
+ return true;
+ }
+ test_child = fwnode_get_next_parent(test_child);
+- }
++ } while (test_child);
+ return false;
+ }
+
+@@ -738,7 +757,7 @@ fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode,
+ {
+ struct fwnode_handle *next_child = child;
+
+- if (!fwnode)
++ if (IS_ERR_OR_NULL(fwnode))
+ return NULL;
+
+ do {
+@@ -762,16 +781,16 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev,
+ const struct fwnode_handle *fwnode = dev_fwnode(dev);
+ struct fwnode_handle *next;
+
++ if (IS_ERR_OR_NULL(fwnode))
++ return NULL;
++
+ /* Try to find a child in primary fwnode */
+ next = fwnode_get_next_child_node(fwnode, child);
+ if (next)
+ return next;
+
+ /* When no more children in primary, continue with secondary */
+- if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
+- next = fwnode_get_next_child_node(fwnode->secondary, child);
+-
+- return next;
++ return fwnode_get_next_child_node(fwnode->secondary, child);
+ }
+ EXPORT_SYMBOL_GPL(device_get_next_child_node);
+
+@@ -838,6 +857,9 @@ EXPORT_SYMBOL_GPL(fwnode_handle_put);
+ */
+ bool fwnode_device_is_available(const struct fwnode_handle *fwnode)
+ {
++ if (IS_ERR_OR_NULL(fwnode))
++ return false;
++
+ if (!fwnode_has_op(fwnode, device_is_available))
+ return true;
+
+@@ -1033,25 +1055,31 @@ struct fwnode_handle *
+ fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
+ struct fwnode_handle *prev)
+ {
++ struct fwnode_handle *ep, *port_parent = NULL;
+ const struct fwnode_handle *parent;
+- struct fwnode_handle *ep;
+
+ /*
+ * If this function is in a loop and the previous iteration returned
+ * an endpoint from fwnode->secondary, then we need to use the secondary
+ * as parent rather than @fwnode.
+ */
+- if (prev)
+- parent = fwnode_graph_get_port_parent(prev);
+- else
++ if (prev) {
++ port_parent = fwnode_graph_get_port_parent(prev);
++ parent = port_parent;
++ } else {
+ parent = fwnode;
++ }
++ if (IS_ERR_OR_NULL(parent))
++ return NULL;
+
+ ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev);
++ if (ep)
++ goto out_put_port_parent;
+
+- if (IS_ERR_OR_NULL(ep) &&
+- !IS_ERR_OR_NULL(parent) && !IS_ERR_OR_NULL(parent->secondary))
+- ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL);
++ ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL);
+
++out_put_port_parent:
++ fwnode_handle_put(port_parent);
+ return ep;
+ }
+ EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
+@@ -1269,8 +1297,10 @@ fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id,
+
+ fwnode_graph_for_each_endpoint(fwnode, ep) {
+ node = fwnode_graph_get_remote_port_parent(ep);
+- if (!fwnode_device_is_available(node))
++ if (!fwnode_device_is_available(node)) {
++ fwnode_handle_put(node);
+ continue;
++ }
+
+ ret = match(node, con_id, data);
+ fwnode_handle_put(node);
+diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
+index fabf87058d80b..ae6b8788d5f3f 100644
+--- a/drivers/base/regmap/regcache-rbtree.c
++++ b/drivers/base/regmap/regcache-rbtree.c
+@@ -277,7 +277,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
+
+ blk = krealloc(rbnode->block,
+ blklen * map->cache_word_size,
+- GFP_KERNEL);
++ map->alloc_flags);
+ if (!blk)
+ return -ENOMEM;
+
+@@ -286,7 +286,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
+ if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) {
+ present = krealloc(rbnode->cache_present,
+ BITS_TO_LONGS(blklen) * sizeof(*present),
+- GFP_KERNEL);
++ map->alloc_flags);
+ if (!present)
+ return -ENOMEM;
+
+@@ -320,7 +320,7 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
+ const struct regmap_range *range;
+ int i;
+
+- rbnode = kzalloc(sizeof(*rbnode), GFP_KERNEL);
++ rbnode = kzalloc(sizeof(*rbnode), map->alloc_flags);
+ if (!rbnode)
+ return NULL;
+
+@@ -346,13 +346,13 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
+ }
+
+ rbnode->block = kmalloc_array(rbnode->blklen, map->cache_word_size,
+- GFP_KERNEL);
++ map->alloc_flags);
+ if (!rbnode->block)
+ goto err_free;
+
+ rbnode->cache_present = kcalloc(BITS_TO_LONGS(rbnode->blklen),
+ sizeof(*rbnode->cache_present),
+- GFP_KERNEL);
++ map->alloc_flags);
+ if (!rbnode->cache_present)
+ goto err_free_block;
+
+diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
+index f2469d3435ca3..0b517a83c4493 100644
+--- a/drivers/base/regmap/regcache.c
++++ b/drivers/base/regmap/regcache.c
+@@ -343,6 +343,9 @@ int regcache_sync(struct regmap *map)
+ const char *name;
+ bool bypass;
+
++ if (WARN_ON(map->cache_type == REGCACHE_NONE))
++ return -EINVAL;
++
+ BUG_ON(!map->cache_ops);
+
+ map->lock(map->lock_arg);
+@@ -412,6 +415,9 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
+ const char *name;
+ bool bypass;
+
++ if (WARN_ON(map->cache_type == REGCACHE_NONE))
++ return -EINVAL;
++
+ BUG_ON(!map->cache_ops);
+
+ map->lock(map->lock_arg);
+diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
+index 980e5ce6a3a35..3ec611dc0c09f 100644
+--- a/drivers/base/regmap/regmap-i2c.c
++++ b/drivers/base/regmap/regmap-i2c.c
+@@ -242,8 +242,8 @@ static int regmap_i2c_smbus_i2c_read(void *context, const void *reg,
+ static const struct regmap_bus regmap_i2c_smbus_i2c_block = {
+ .write = regmap_i2c_smbus_i2c_write,
+ .read = regmap_i2c_smbus_i2c_read,
+- .max_raw_read = I2C_SMBUS_BLOCK_MAX,
+- .max_raw_write = I2C_SMBUS_BLOCK_MAX,
++ .max_raw_read = I2C_SMBUS_BLOCK_MAX - 1,
++ .max_raw_write = I2C_SMBUS_BLOCK_MAX - 1,
+ };
+
+ static int regmap_i2c_smbus_i2c_write_reg16(void *context, const void *data,
+@@ -299,8 +299,8 @@ static int regmap_i2c_smbus_i2c_read_reg16(void *context, const void *reg,
+ static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = {
+ .write = regmap_i2c_smbus_i2c_write_reg16,
+ .read = regmap_i2c_smbus_i2c_read_reg16,
+- .max_raw_read = I2C_SMBUS_BLOCK_MAX,
+- .max_raw_write = I2C_SMBUS_BLOCK_MAX,
++ .max_raw_read = I2C_SMBUS_BLOCK_MAX - 2,
++ .max_raw_write = I2C_SMBUS_BLOCK_MAX - 2,
+ };
+
+ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
+diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
+index d2656581a6085..3aac960ae30ab 100644
+--- a/drivers/base/regmap/regmap-irq.c
++++ b/drivers/base/regmap/regmap-irq.c
+@@ -189,11 +189,9 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
+ ret = regmap_write(map, reg, d->mask_buf[i]);
+ if (d->chip->clear_ack) {
+ if (d->chip->ack_invert && !ret)
+- ret = regmap_write(map, reg,
+- d->mask_buf[i]);
++ ret = regmap_write(map, reg, UINT_MAX);
+ else if (!ret)
+- ret = regmap_write(map, reg,
+- ~d->mask_buf[i]);
++ ret = regmap_write(map, reg, 0);
+ }
+ if (ret != 0)
+ dev_err(d->map->dev, "Failed to ack 0x%x: %d\n",
+@@ -254,6 +252,7 @@ static void regmap_irq_enable(struct irq_data *data)
+ struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+ struct regmap *map = d->map;
+ const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
++ unsigned int reg = irq_data->reg_offset / map->reg_stride;
+ unsigned int mask, type;
+
+ type = irq_data->type.type_falling_val | irq_data->type.type_rising_val;
+@@ -270,14 +269,14 @@ static void regmap_irq_enable(struct irq_data *data)
+ * at the corresponding offset in regmap_irq_set_type().
+ */
+ if (d->chip->type_in_mask && type)
+- mask = d->type_buf[irq_data->reg_offset / map->reg_stride];
++ mask = d->type_buf[reg] & irq_data->mask;
+ else
+ mask = irq_data->mask;
+
+ if (d->chip->clear_on_unmask)
+ d->clear_status = true;
+
+- d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask;
++ d->mask_buf[reg] &= ~mask;
+ }
+
+ static void regmap_irq_disable(struct irq_data *data)
+@@ -388,6 +387,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
+ subreg = &chip->sub_reg_offsets[b];
+ for (i = 0; i < subreg->num_regs; i++) {
+ unsigned int offset = subreg->offset[i];
++ unsigned int index = offset / map->reg_stride;
+
+ if (chip->not_fixed_stride)
+ ret = regmap_read(map,
+@@ -396,7 +396,7 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
+ else
+ ret = regmap_read(map,
+ chip->status_base + offset,
+- &data->status_buf[offset]);
++ &data->status_buf[index]);
+
+ if (ret)
+ break;
+@@ -556,11 +556,9 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
+ data->status_buf[i]);
+ if (chip->clear_ack) {
+ if (chip->ack_invert && !ret)
+- ret = regmap_write(map, reg,
+- data->status_buf[i]);
++ ret = regmap_write(map, reg, UINT_MAX);
+ else if (!ret)
+- ret = regmap_write(map, reg,
+- ~data->status_buf[i]);
++ ret = regmap_write(map, reg, 0);
+ }
+ if (ret != 0)
+ dev_err(map->dev, "Failed to ack 0x%x: %d\n",
+@@ -817,13 +815,9 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
+ d->status_buf[i] & d->mask_buf[i]);
+ if (chip->clear_ack) {
+ if (chip->ack_invert && !ret)
+- ret = regmap_write(map, reg,
+- (d->status_buf[i] &
+- d->mask_buf[i]));
++ ret = regmap_write(map, reg, UINT_MAX);
+ else if (!ret)
+- ret = regmap_write(map, reg,
+- ~(d->status_buf[i] &
+- d->mask_buf[i]));
++ ret = regmap_write(map, reg, 0);
+ }
+ if (ret != 0) {
+ dev_err(map->dev, "Failed to ack 0x%x: %d\n",
+diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
+index 21a0c2562ec06..f7811641ed5ae 100644
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -647,6 +647,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
+ if (ret)
+ return ret;
+
++ regmap_debugfs_exit(map);
+ regmap_debugfs_init(map);
+
+ /* Add a devres resource for dev_get_regmap() */
+diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
+index c46f6a8e14d23..3ba1232ce8451 100644
+--- a/drivers/base/swnode.c
++++ b/drivers/base/swnode.c
+@@ -535,7 +535,7 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode,
+ return -ENOENT;
+
+ if (nargs_prop) {
+- error = property_entry_read_int_array(swnode->node->properties,
++ error = property_entry_read_int_array(ref->node->properties,
+ nargs_prop, sizeof(u32),
+ &nargs_prop_val, 1);
+ if (error)
+diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c
+index 3bb7beb127a96..88336f093decd 100644
+--- a/drivers/base/test/test_async_driver_probe.c
++++ b/drivers/base/test/test_async_driver_probe.c
+@@ -84,7 +84,7 @@ test_platform_device_register_node(char *name, int id, int nid)
+
+ pdev = platform_device_alloc(name, id);
+ if (!pdev)
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+
+ if (nid != NUMA_NO_NODE)
+ set_dev_node(&pdev->dev, nid);
+@@ -146,7 +146,7 @@ static int __init test_async_probe_init(void)
+ calltime = ktime_get();
+ for_each_online_cpu(cpu) {
+ nid = cpu_to_node(cpu);
+- pdev = &sync_dev[sync_id];
++ pdev = &async_dev[async_id];
+
+ *pdev = test_platform_device_register_node("test_async_driver",
+ async_id,
+diff --git a/drivers/base/topology.c b/drivers/base/topology.c
+index 43c0940643f5d..5df6d861bc21b 100644
+--- a/drivers/base/topology.c
++++ b/drivers/base/topology.c
+@@ -52,39 +52,39 @@ define_id_show_func(core_id);
+ static DEVICE_ATTR_RO(core_id);
+
+ define_siblings_read_func(thread_siblings, sibling_cpumask);
+-static BIN_ATTR_RO(thread_siblings, 0);
+-static BIN_ATTR_RO(thread_siblings_list, 0);
++static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES);
+
+ define_siblings_read_func(core_cpus, sibling_cpumask);
+-static BIN_ATTR_RO(core_cpus, 0);
+-static BIN_ATTR_RO(core_cpus_list, 0);
++static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES);
+
+ define_siblings_read_func(core_siblings, core_cpumask);
+-static BIN_ATTR_RO(core_siblings, 0);
+-static BIN_ATTR_RO(core_siblings_list, 0);
++static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES);
+
+ define_siblings_read_func(die_cpus, die_cpumask);
+-static BIN_ATTR_RO(die_cpus, 0);
+-static BIN_ATTR_RO(die_cpus_list, 0);
++static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES);
+
+ define_siblings_read_func(package_cpus, core_cpumask);
+-static BIN_ATTR_RO(package_cpus, 0);
+-static BIN_ATTR_RO(package_cpus_list, 0);
++static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES);
+
+ #ifdef CONFIG_SCHED_BOOK
+ define_id_show_func(book_id);
+ static DEVICE_ATTR_RO(book_id);
+ define_siblings_read_func(book_siblings, book_cpumask);
+-static BIN_ATTR_RO(book_siblings, 0);
+-static BIN_ATTR_RO(book_siblings_list, 0);
++static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+
+ #ifdef CONFIG_SCHED_DRAWER
+ define_id_show_func(drawer_id);
+ static DEVICE_ATTR_RO(drawer_id);
+ define_siblings_read_func(drawer_siblings, drawer_cpumask);
+-static BIN_ATTR_RO(drawer_siblings, 0);
+-static BIN_ATTR_RO(drawer_siblings_list, 0);
++static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+
+ static struct bin_attribute *bin_attrs[] = {
+diff --git a/drivers/base/transport_class.c b/drivers/base/transport_class.c
+index ccc86206e5087..09ee2a1e35bbd 100644
+--- a/drivers/base/transport_class.c
++++ b/drivers/base/transport_class.c
+@@ -155,12 +155,27 @@ static int transport_add_class_device(struct attribute_container *cont,
+ struct device *dev,
+ struct device *classdev)
+ {
++ struct transport_class *tclass = class_to_transport_class(cont->class);
+ int error = attribute_container_add_class_device(classdev);
+ struct transport_container *tcont =
+ attribute_container_to_transport_container(cont);
+
+- if (!error && tcont->statistics)
++ if (error)
++ goto err_remove;
++
++ if (tcont->statistics) {
+ error = sysfs_create_group(&classdev->kobj, tcont->statistics);
++ if (error)
++ goto err_del;
++ }
++
++ return 0;
++
++err_del:
++ attribute_container_class_device_del(classdev);
++err_remove:
++ if (tclass->remove)
++ tclass->remove(tcont, dev, classdev);
+
+ return error;
+ }
+diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
+index ab3e37aa1830c..fb12311b87ff9 100644
+--- a/drivers/block/Kconfig
++++ b/drivers/block/Kconfig
+@@ -33,6 +33,22 @@ config BLK_DEV_FD
+ To compile this driver as a module, choose M here: the
+ module will be called floppy.
+
++config BLK_DEV_FD_RAWCMD
++ bool "Support for raw floppy disk commands (DEPRECATED)"
++ depends on BLK_DEV_FD
++ help
++ If you want to use actual physical floppies and expect to do
++ special low-level hardware accesses to them (access and use
++ non-standard formats, for example), then enable this.
++
++ Note that the code enabled by this option is rarely used and
++ might be unstable or insecure, and distros should not enable it.
++
++ Note: FDRAWCMD is deprecated and will be removed from the kernel
++ in the near future.
++
++ If unsure, say N.
++
+ config AMIGA_FLOPPY
+ tristate "Amiga floppy support"
+ depends on AMIGA
+@@ -255,15 +271,6 @@ config BLK_DEV_NBD
+
+ If unsure, say N.
+
+-config BLK_DEV_SX8
+- tristate "Promise SATA SX8 support"
+- depends on PCI
+- help
+- Saying Y or M here will enable support for the
+- Promise SATA SX8 controllers.
+-
+- Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
+-
+ config BLK_DEV_RAM
+ tristate "RAM block device support"
+ help
+@@ -394,6 +401,7 @@ config XEN_BLKDEV_BACKEND
+ config VIRTIO_BLK
+ tristate "Virtio block driver"
+ depends on VIRTIO
++ select SG_POOL
+ help
+ This is the virtual block driver for virtio. It can be used with
+ QEMU based VMMs (like KVM or Xen). Say Y or M.
+diff --git a/drivers/block/Makefile b/drivers/block/Makefile
+index bc68817ef4966..91220b251b467 100644
+--- a/drivers/block/Makefile
++++ b/drivers/block/Makefile
+@@ -27,8 +27,6 @@ obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
+ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
+ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
+
+-obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
+-
+ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
+ obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
+ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
+diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
+index 8b1714021498c..1ed557cb5ed23 100644
+--- a/drivers/block/amiflop.c
++++ b/drivers/block/amiflop.c
+@@ -61,6 +61,7 @@
+ #include <linux/hdreg.h>
+ #include <linux/delay.h>
+ #include <linux/init.h>
++#include <linux/major.h>
+ #include <linux/mutex.h>
+ #include <linux/fs.h>
+ #include <linux/blk-mq.h>
+diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
+index a093644ac39fb..82faaa4581579 100644
+--- a/drivers/block/ataflop.c
++++ b/drivers/block/ataflop.c
+@@ -68,6 +68,7 @@
+ #include <linux/delay.h>
+ #include <linux/init.h>
+ #include <linux/blk-mq.h>
++#include <linux/major.h>
+ #include <linux/mutex.h>
+ #include <linux/completion.h>
+ #include <linux/wait.h>
+@@ -298,6 +299,7 @@ static struct atari_floppy_struct {
+ disk change detection) */
+ int flags; /* flags */
+ struct gendisk *disk[NUM_DISK_MINORS];
++ bool registered[NUM_DISK_MINORS];
+ int ref;
+ int type;
+ struct blk_mq_tag_set tag_set;
+@@ -456,10 +458,20 @@ static DEFINE_TIMER(fd_timer, check_change);
+
+ static void fd_end_request_cur(blk_status_t err)
+ {
++ DPRINT(("fd_end_request_cur(), bytes %d of %d\n",
++ blk_rq_cur_bytes(fd_request),
++ blk_rq_bytes(fd_request)));
++
+ if (!blk_update_request(fd_request, err,
+ blk_rq_cur_bytes(fd_request))) {
++ DPRINT(("calling __blk_mq_end_request()\n"));
+ __blk_mq_end_request(fd_request, err);
+ fd_request = NULL;
++ } else {
++ /* requeue rest of request */
++ DPRINT(("calling blk_mq_requeue_request()\n"));
++ blk_mq_requeue_request(fd_request, true);
++ fd_request = NULL;
+ }
+ }
+
+@@ -653,9 +665,6 @@ static inline void copy_buffer(void *from, void *to)
+ *p2++ = *p1++;
+ }
+
+-
+-
+-
+ /* General Interrupt Handling */
+
+ static void (*FloppyIRQHandler)( int status ) = NULL;
+@@ -700,12 +709,21 @@ static void fd_error( void )
+ if (fd_request->error_count >= MAX_ERRORS) {
+ printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
+ fd_end_request_cur(BLK_STS_IOERR);
++ finish_fdc();
++ return;
+ }
+ else if (fd_request->error_count == RECALIBRATE_ERRORS) {
+ printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
+ if (SelectedDrive != -1)
+ SUD.track = -1;
+ }
++ /* need to re-run request to recalibrate */
++ atari_disable_irq( IRQ_MFP_FDC );
++
++ setup_req_params( SelectedDrive );
++ do_fd_action( SelectedDrive );
++
++ atari_enable_irq( IRQ_MFP_FDC );
+ }
+
+
+@@ -732,8 +750,10 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
+ if (type) {
+ type--;
+ if (type >= NUM_DISK_MINORS ||
+- minor2disktype[type].drive_types > DriveType)
++ minor2disktype[type].drive_types > DriveType) {
++ finish_fdc();
+ return -EINVAL;
++ }
+ }
+
+ q = unit[drive].disk[type]->queue;
+@@ -751,6 +771,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
+ }
+
+ if (!UDT || desc->track >= UDT->blocks/UDT->spt/2 || desc->head >= 2) {
++ finish_fdc();
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -791,6 +812,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
+
+ wait_for_completion(&format_wait);
+
++ finish_fdc();
+ ret = FormatError ? -EIO : 0;
+ out:
+ blk_mq_unquiesce_queue(q);
+@@ -825,6 +847,7 @@ static void do_fd_action( int drive )
+ else {
+ /* all sectors finished */
+ fd_end_request_cur(BLK_STS_OK);
++ finish_fdc();
+ return;
+ }
+ }
+@@ -1229,6 +1252,7 @@ static void fd_rwsec_done1(int status)
+ else {
+ /* all sectors finished */
+ fd_end_request_cur(BLK_STS_OK);
++ finish_fdc();
+ }
+ return;
+
+@@ -1350,7 +1374,7 @@ static void fd_times_out(struct timer_list *unused)
+
+ static void finish_fdc( void )
+ {
+- if (!NeedSeek) {
++ if (!NeedSeek || !stdma_is_locked_by(floppy_irq)) {
+ finish_fdc_done( 0 );
+ }
+ else {
+@@ -1385,7 +1409,8 @@ static void finish_fdc_done( int dummy )
+ start_motor_off_timer();
+
+ local_irq_save(flags);
+- stdma_release();
++ if (stdma_is_locked_by(floppy_irq))
++ stdma_release();
+ local_irq_restore(flags);
+
+ DPRINT(("finish_fdc() finished\n"));
+@@ -1475,15 +1500,6 @@ static void setup_req_params( int drive )
+ ReqTrack, ReqSector, (unsigned long)ReqData ));
+ }
+
+-static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx)
+-{
+- spin_lock_irq(&ataflop_lock);
+- atari_disable_irq(IRQ_MFP_FDC);
+- finish_fdc();
+- atari_enable_irq(IRQ_MFP_FDC);
+- spin_unlock_irq(&ataflop_lock);
+-}
+-
+ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+ {
+@@ -1491,6 +1507,10 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ int drive = floppy - unit;
+ int type = floppy->type;
+
++ DPRINT(("Queue request: drive %d type %d sectors %d of %d last %d\n",
++ drive, type, blk_rq_cur_sectors(bd->rq),
++ blk_rq_sectors(bd->rq), bd->last));
++
+ spin_lock_irq(&ataflop_lock);
+ if (fd_request) {
+ spin_unlock_irq(&ataflop_lock);
+@@ -1511,6 +1531,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ /* drive not connected */
+ printk(KERN_ERR "Unknown Device: fd%d\n", drive );
+ fd_end_request_cur(BLK_STS_IOERR);
++ stdma_release();
+ goto out;
+ }
+
+@@ -1527,11 +1548,13 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ if (--type >= NUM_DISK_MINORS) {
+ printk(KERN_WARNING "fd%d: invalid disk format", drive );
+ fd_end_request_cur(BLK_STS_IOERR);
++ stdma_release();
+ goto out;
+ }
+ if (minor2disktype[type].drive_types > DriveType) {
+ printk(KERN_WARNING "fd%d: unsupported disk format", drive );
+ fd_end_request_cur(BLK_STS_IOERR);
++ stdma_release();
+ goto out;
+ }
+ type = minor2disktype[type].index;
+@@ -1550,8 +1573,6 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ setup_req_params( drive );
+ do_fd_action( drive );
+
+- if (bd->last)
+- finish_fdc();
+ atari_enable_irq( IRQ_MFP_FDC );
+
+ out:
+@@ -1634,6 +1655,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+ /* what if type > 0 here? Overwrite specified entry ? */
+ if (type) {
+ /* refuse to re-set a predefined type for now */
++ finish_fdc();
+ return -EINVAL;
+ }
+
+@@ -1701,8 +1723,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+
+ /* sanity check */
+ if (setprm.track != dtp->blocks/dtp->spt/2 ||
+- setprm.head != 2)
++ setprm.head != 2) {
++ finish_fdc();
+ return -EINVAL;
++ }
+
+ UDT = dtp;
+ set_capacity(disk, UDT->blocks);
+@@ -1962,7 +1986,6 @@ static const struct block_device_operations floppy_fops = {
+
+ static const struct blk_mq_ops ataflop_mq_ops = {
+ .queue_rq = ataflop_queue_rq,
+- .commit_rqs = ataflop_commit_rqs,
+ };
+
+ static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
+@@ -1986,8 +2009,6 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
+ return 0;
+ }
+
+-static DEFINE_MUTEX(ataflop_probe_lock);
+-
+ static void ataflop_probe(dev_t dev)
+ {
+ int drive = MINOR(dev) & 3;
+@@ -1998,12 +2019,46 @@ static void ataflop_probe(dev_t dev)
+
+ if (drive >= FD_MAX_UNITS || type >= NUM_DISK_MINORS)
+ return;
+- mutex_lock(&ataflop_probe_lock);
+ if (!unit[drive].disk[type]) {
+- if (ataflop_alloc_disk(drive, type) == 0)
++ if (ataflop_alloc_disk(drive, type) == 0) {
+ add_disk(unit[drive].disk[type]);
++ unit[drive].registered[type] = true;
++ }
++ }
++}
++
++static void atari_floppy_cleanup(void)
++{
++ int i;
++ int type;
++
++ for (i = 0; i < FD_MAX_UNITS; i++) {
++ for (type = 0; type < NUM_DISK_MINORS; type++) {
++ if (!unit[i].disk[type])
++ continue;
++ del_gendisk(unit[i].disk[type]);
++ blk_cleanup_queue(unit[i].disk[type]->queue);
++ put_disk(unit[i].disk[type]);
++ }
++ blk_mq_free_tag_set(&unit[i].tag_set);
++ }
++
++ del_timer_sync(&fd_timer);
++ atari_stram_free(DMABuffer);
++}
++
++static void atari_cleanup_floppy_disk(struct atari_floppy_struct *fs)
++{
++ int type;
++
++ for (type = 0; type < NUM_DISK_MINORS; type++) {
++ if (!fs->disk[type])
++ continue;
++ if (fs->registered[type])
++ del_gendisk(fs->disk[type]);
++ blk_cleanup_disk(fs->disk[type]);
+ }
+- mutex_unlock(&ataflop_probe_lock);
++ blk_mq_free_tag_set(&fs->tag_set);
+ }
+
+ static int __init atari_floppy_init (void)
+@@ -2015,11 +2070,6 @@ static int __init atari_floppy_init (void)
+ /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */
+ return -ENODEV;
+
+- mutex_lock(&ataflop_probe_lock);
+- ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe);
+- if (ret)
+- goto out_unlock;
+-
+ for (i = 0; i < FD_MAX_UNITS; i++) {
+ memset(&unit[i].tag_set, 0, sizeof(unit[i].tag_set));
+ unit[i].tag_set.ops = &ataflop_mq_ops;
+@@ -2065,6 +2115,7 @@ static int __init atari_floppy_init (void)
+ unit[i].track = -1;
+ unit[i].flags = 0;
+ add_disk(unit[i].disk[0]);
++ unit[i].registered[0] = true;
+ }
+
+ printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n",
+@@ -2072,18 +2123,17 @@ static int __init atari_floppy_init (void)
+ UseTrackbuffer ? "" : "no ");
+ config_types();
+
+- return 0;
++ ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe);
++ if (ret) {
++ printk(KERN_ERR "atari_floppy_init: cannot register block device\n");
++ atari_floppy_cleanup();
++ }
++ return ret;
+
+ err:
+- while (--i >= 0) {
+- blk_cleanup_queue(unit[i].disk[0]->queue);
+- put_disk(unit[i].disk[0]);
+- blk_mq_free_tag_set(&unit[i].tag_set);
+- }
++ while (--i >= 0)
++ atari_cleanup_floppy_disk(&unit[i]);
+
+- unregister_blkdev(FLOPPY_MAJOR, "fd");
+-out_unlock:
+- mutex_unlock(&ataflop_probe_lock);
+ return ret;
+ }
+
+@@ -2128,22 +2178,8 @@ __setup("floppy=", atari_floppy_setup);
+
+ static void __exit atari_floppy_exit(void)
+ {
+- int i, type;
+-
+- for (i = 0; i < FD_MAX_UNITS; i++) {
+- for (type = 0; type < NUM_DISK_MINORS; type++) {
+- if (!unit[i].disk[type])
+- continue;
+- del_gendisk(unit[i].disk[type]);
+- blk_cleanup_queue(unit[i].disk[type]->queue);
+- put_disk(unit[i].disk[type]);
+- }
+- blk_mq_free_tag_set(&unit[i].tag_set);
+- }
+ unregister_blkdev(FLOPPY_MAJOR, "fd");
+-
+- del_timer_sync(&fd_timer);
+- atari_stram_free( DMABuffer );
++ atari_floppy_cleanup();
+ }
+
+ module_init(atari_floppy_init)
+diff --git a/drivers/block/brd.c b/drivers/block/brd.c
+index 530b312402031..76ce6f766d55e 100644
+--- a/drivers/block/brd.c
++++ b/drivers/block/brd.c
+@@ -78,11 +78,9 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
+ }
+
+ /*
+- * Look up and return a brd's page for a given sector.
+- * If one does not exist, allocate an empty page, and insert that. Then
+- * return it.
++ * Insert a new page for a given sector, if one does not already exist.
+ */
+-static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
++static int brd_insert_page(struct brd_device *brd, sector_t sector)
+ {
+ pgoff_t idx;
+ struct page *page;
+@@ -90,7 +88,7 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
+
+ page = brd_lookup_page(brd, sector);
+ if (page)
+- return page;
++ return 0;
+
+ /*
+ * Must use NOIO because we don't want to recurse back into the
+@@ -99,11 +97,11 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
+ gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
+ page = alloc_page(gfp_flags);
+ if (!page)
+- return NULL;
++ return -ENOMEM;
+
+ if (radix_tree_preload(GFP_NOIO)) {
+ __free_page(page);
+- return NULL;
++ return -ENOMEM;
+ }
+
+ spin_lock(&brd->brd_lock);
+@@ -120,8 +118,7 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
+ spin_unlock(&brd->brd_lock);
+
+ radix_tree_preload_end();
+-
+- return page;
++ return 0;
+ }
+
+ /*
+@@ -174,16 +171,17 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+ {
+ unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+ size_t copy;
++ int ret;
+
+ copy = min_t(size_t, n, PAGE_SIZE - offset);
+- if (!brd_insert_page(brd, sector))
+- return -ENOSPC;
++ ret = brd_insert_page(brd, sector);
++ if (ret)
++ return ret;
+ if (copy < n) {
+ sector += copy >> SECTOR_SHIFT;
+- if (!brd_insert_page(brd, sector))
+- return -ENOSPC;
++ ret = brd_insert_page(brd, sector);
+ }
+- return 0;
++ return ret;
+ }
+
+ /*
+@@ -372,6 +370,7 @@ static int brd_alloc(int i)
+ struct brd_device *brd;
+ struct gendisk *disk;
+ char buf[DISK_NAME_LEN];
++ int err = -ENOMEM;
+
+ mutex_lock(&brd_devices_mutex);
+ list_for_each_entry(brd, &brd_devices, brd_list) {
+@@ -422,16 +421,21 @@ static int brd_alloc(int i)
+ /* Tell the block layer that this is not a rotational device */
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+- add_disk(disk);
++ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
++ err = add_disk(disk);
++ if (err)
++ goto out_cleanup_disk;
+
+ return 0;
+
++out_cleanup_disk:
++ blk_cleanup_disk(disk);
+ out_free_dev:
+ mutex_lock(&brd_devices_mutex);
+ list_del(&brd->brd_list);
+ mutex_unlock(&brd_devices_mutex);
+ kfree(brd);
+- return -ENOMEM;
++ return err;
+ }
+
+ static void brd_probe(dev_t dev)
+diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
+index 5d9181382ce19..0a5766a2f1618 100644
+--- a/drivers/block/drbd/drbd_int.h
++++ b/drivers/block/drbd/drbd_int.h
+@@ -1642,22 +1642,22 @@ struct sib_info {
+ };
+ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
+
+-extern void notify_resource_state(struct sk_buff *,
++extern int notify_resource_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_resource *,
+ struct resource_info *,
+ enum drbd_notification_type);
+-extern void notify_device_state(struct sk_buff *,
++extern int notify_device_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_device *,
+ struct device_info *,
+ enum drbd_notification_type);
+-extern void notify_connection_state(struct sk_buff *,
++extern int notify_connection_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_connection *,
+ struct connection_info *,
+ enum drbd_notification_type);
+-extern void notify_peer_device_state(struct sk_buff *,
++extern int notify_peer_device_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_peer_device *,
+ struct peer_device_info *,
+diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
+index 55234a558e98b..eaf20a3324018 100644
+--- a/drivers/block/drbd/drbd_main.c
++++ b/drivers/block/drbd/drbd_main.c
+@@ -171,7 +171,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
+ unsigned int set_size)
+ {
+ struct drbd_request *r;
+- struct drbd_request *req = NULL;
++ struct drbd_request *req = NULL, *tmp = NULL;
+ int expect_epoch = 0;
+ int expect_size = 0;
+
+@@ -225,8 +225,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
+ * to catch requests being barrier-acked "unexpectedly".
+ * It usually should find the same req again, or some READ preceding it. */
+ list_for_each_entry(req, &connection->transfer_log, tl_requests)
+- if (req->epoch == expect_epoch)
++ if (req->epoch == expect_epoch) {
++ tmp = req;
+ break;
++ }
++ req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests);
+ list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) {
+ if (req->epoch != expect_epoch)
+ break;
+@@ -2241,7 +2244,8 @@ void drbd_destroy_device(struct kref *kref)
+ kref_put(&peer_device->connection->kref, drbd_destroy_connection);
+ kfree(peer_device);
+ }
+- memset(device, 0xfd, sizeof(*device));
++ if (device->submit.wq)
++ destroy_workqueue(device->submit.wq);
+ kfree(device);
+ kref_put(&resource->kref, drbd_destroy_resource);
+ }
+@@ -2333,7 +2337,6 @@ void drbd_destroy_resource(struct kref *kref)
+ idr_destroy(&resource->devices);
+ free_cpumask_var(resource->cpu_mask);
+ kfree(resource->name);
+- memset(resource, 0xf2, sizeof(*resource));
+ kfree(resource);
+ }
+
+@@ -2674,7 +2677,6 @@ void drbd_destroy_connection(struct kref *kref)
+ drbd_free_socket(&connection->data);
+ kfree(connection->int_dig_in);
+ kfree(connection->int_dig_vv);
+- memset(connection, 0xfc, sizeof(*connection));
+ kfree(connection);
+ kref_put(&resource->kref, drbd_destroy_resource);
+ }
+@@ -2696,7 +2698,7 @@ static int init_submitter(struct drbd_device *device)
+ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
+ {
+ struct drbd_resource *resource = adm_ctx->resource;
+- struct drbd_connection *connection;
++ struct drbd_connection *connection, *n;
+ struct drbd_device *device;
+ struct drbd_peer_device *peer_device, *tmp_peer_device;
+ struct gendisk *disk;
+@@ -2737,6 +2739,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
+ sprintf(disk->disk_name, "drbd%d", minor);
+ disk->private_data = device;
+
++ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
+ blk_queue_write_cache(disk->queue, true, true);
+ /* Setting the max_hw_sectors to an odd value of 8kibyte here
+ This triggers a max_bio_size message upon first attach or connect */
+@@ -2791,10 +2794,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
+
+ if (init_submitter(device)) {
+ err = ERR_NOMEM;
+- goto out_idr_remove_vol;
++ goto out_idr_remove_from_resource;
+ }
+
+- add_disk(disk);
++ err = add_disk(disk);
++ if (err)
++ goto out_destroy_workqueue;
+
+ /* inherit the connection state */
+ device->state.conn = first_connection(resource)->cstate;
+@@ -2808,10 +2813,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
+ drbd_debugfs_device_add(device);
+ return NO_ERROR;
+
+-out_idr_remove_vol:
+- idr_remove(&connection->peer_devices, vnr);
++out_destroy_workqueue:
++ destroy_workqueue(device->submit.wq);
+ out_idr_remove_from_resource:
+- for_each_connection(connection, resource) {
++ for_each_connection_safe(connection, n, resource) {
+ peer_device = idr_remove(&connection->peer_devices, vnr);
+ if (peer_device)
+ kref_put(&connection->kref, drbd_destroy_connection);
+@@ -3603,9 +3608,8 @@ const char *cmdname(enum drbd_packet cmd)
+ * when we want to support more than
+ * one PRO_VERSION */
+ static const char *cmdnames[] = {
++
+ [P_DATA] = "Data",
+- [P_WSAME] = "WriteSame",
+- [P_TRIM] = "Trim",
+ [P_DATA_REPLY] = "DataReply",
+ [P_RS_DATA_REPLY] = "RSDataReply",
+ [P_BARRIER] = "Barrier",
+@@ -3616,7 +3620,6 @@ const char *cmdname(enum drbd_packet cmd)
+ [P_DATA_REQUEST] = "DataRequest",
+ [P_RS_DATA_REQUEST] = "RSDataRequest",
+ [P_SYNC_PARAM] = "SyncParam",
+- [P_SYNC_PARAM89] = "SyncParam89",
+ [P_PROTOCOL] = "ReportProtocol",
+ [P_UUIDS] = "ReportUUIDs",
+ [P_SIZES] = "ReportSizes",
+@@ -3624,6 +3627,7 @@ const char *cmdname(enum drbd_packet cmd)
+ [P_SYNC_UUID] = "ReportSyncUUID",
+ [P_AUTH_CHALLENGE] = "AuthChallenge",
+ [P_AUTH_RESPONSE] = "AuthResponse",
++ [P_STATE_CHG_REQ] = "StateChgRequest",
+ [P_PING] = "Ping",
+ [P_PING_ACK] = "PingAck",
+ [P_RECV_ACK] = "RecvAck",
+@@ -3634,23 +3638,25 @@ const char *cmdname(enum drbd_packet cmd)
+ [P_NEG_DREPLY] = "NegDReply",
+ [P_NEG_RS_DREPLY] = "NegRSDReply",
+ [P_BARRIER_ACK] = "BarrierAck",
+- [P_STATE_CHG_REQ] = "StateChgRequest",
+ [P_STATE_CHG_REPLY] = "StateChgReply",
+ [P_OV_REQUEST] = "OVRequest",
+ [P_OV_REPLY] = "OVReply",
+ [P_OV_RESULT] = "OVResult",
+ [P_CSUM_RS_REQUEST] = "CsumRSRequest",
+ [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
++ [P_SYNC_PARAM89] = "SyncParam89",
+ [P_COMPRESSED_BITMAP] = "CBitmap",
+ [P_DELAY_PROBE] = "DelayProbe",
+ [P_OUT_OF_SYNC] = "OutOfSync",
+- [P_RETRY_WRITE] = "RetryWrite",
+ [P_RS_CANCEL] = "RSCancel",
+ [P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
+ [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
+ [P_PROTOCOL_UPDATE] = "protocol_update",
++ [P_TRIM] = "Trim",
+ [P_RS_THIN_REQ] = "rs_thin_req",
+ [P_RS_DEALLOCATED] = "rs_deallocated",
++ [P_WSAME] = "WriteSame",
++ [P_ZEROES] = "Zeroes",
+
+ /* enum drbd_packet, but not commands - obsoleted flags:
+ * P_MAY_IGNORE
+diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
+index 44ccf8b4f4b29..69184cf17b6ad 100644
+--- a/drivers/block/drbd/drbd_nl.c
++++ b/drivers/block/drbd/drbd_nl.c
+@@ -4617,7 +4617,7 @@ static int nla_put_notification_header(struct sk_buff *msg,
+ return drbd_notification_header_to_skb(msg, &nh, true);
+ }
+
+-void notify_resource_state(struct sk_buff *skb,
++int notify_resource_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_resource *resource,
+ struct resource_info *resource_info,
+@@ -4659,16 +4659,17 @@ void notify_resource_state(struct sk_buff *skb,
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+- return;
++ return 0;
+
+ nla_put_failure:
+ nlmsg_free(skb);
+ failed:
+ drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
++ return err;
+ }
+
+-void notify_device_state(struct sk_buff *skb,
++int notify_device_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_device *device,
+ struct device_info *device_info,
+@@ -4708,16 +4709,17 @@ void notify_device_state(struct sk_buff *skb,
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+- return;
++ return 0;
+
+ nla_put_failure:
+ nlmsg_free(skb);
+ failed:
+ drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
++ return err;
+ }
+
+-void notify_connection_state(struct sk_buff *skb,
++int notify_connection_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_connection *connection,
+ struct connection_info *connection_info,
+@@ -4757,16 +4759,17 @@ void notify_connection_state(struct sk_buff *skb,
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+- return;
++ return 0;
+
+ nla_put_failure:
+ nlmsg_free(skb);
+ failed:
+ drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
++ return err;
+ }
+
+-void notify_peer_device_state(struct sk_buff *skb,
++int notify_peer_device_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_peer_device *peer_device,
+ struct peer_device_info *peer_device_info,
+@@ -4807,13 +4810,14 @@ void notify_peer_device_state(struct sk_buff *skb,
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+- return;
++ return 0;
+
+ nla_put_failure:
+ nlmsg_free(skb);
+ failed:
+ drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
++ return err;
+ }
+
+ void notify_helper(enum drbd_notification_type type,
+@@ -4864,7 +4868,7 @@ fail:
+ err, seq);
+ }
+
+-static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
++static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+ {
+ struct drbd_genlmsghdr *dh;
+ int err;
+@@ -4878,11 +4882,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+ if (nla_put_notification_header(skb, NOTIFY_EXISTS))
+ goto nla_put_failure;
+ genlmsg_end(skb, dh);
+- return;
++ return 0;
+
+ nla_put_failure:
+ nlmsg_free(skb);
+ pr_err("Error %d sending event. Event seq:%u\n", err, seq);
++ return err;
+ }
+
+ static void free_state_changes(struct list_head *list)
+@@ -4909,6 +4914,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+ unsigned int seq = cb->args[2];
+ unsigned int n;
+ enum drbd_notification_type flags = 0;
++ int err = 0;
+
+ /* There is no need for taking notification_mutex here: it doesn't
+ matter if the initial state events mix with later state chage
+@@ -4917,32 +4923,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+
+ cb->args[5]--;
+ if (cb->args[5] == 1) {
+- notify_initial_state_done(skb, seq);
++ err = notify_initial_state_done(skb, seq);
+ goto out;
+ }
+ n = cb->args[4]++;
+ if (cb->args[4] < cb->args[3])
+ flags |= NOTIFY_CONTINUES;
+ if (n < 1) {
+- notify_resource_state_change(skb, seq, state_change->resource,
++ err = notify_resource_state_change(skb, seq, state_change->resource,
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n--;
+ if (n < state_change->n_connections) {
+- notify_connection_state_change(skb, seq, &state_change->connections[n],
++ err = notify_connection_state_change(skb, seq, &state_change->connections[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n -= state_change->n_connections;
+ if (n < state_change->n_devices) {
+- notify_device_state_change(skb, seq, &state_change->devices[n],
++ err = notify_device_state_change(skb, seq, &state_change->devices[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n -= state_change->n_devices;
+ if (n < state_change->n_devices * state_change->n_connections) {
+- notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
++ err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+@@ -4957,7 +4963,10 @@ next:
+ cb->args[4] = 0;
+ }
+ out:
+- return skb->len;
++ if (err)
++ return err;
++ else
++ return skb->len;
+ }
+
+ int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
+index 1f740e42e4571..0104e101b0d71 100644
+--- a/drivers/block/drbd/drbd_receiver.c
++++ b/drivers/block/drbd/drbd_receiver.c
+@@ -1301,7 +1301,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
+ bio_set_dev(bio, device->ldev->backing_bdev);
+ bio->bi_private = octx;
+ bio->bi_end_io = one_flush_endio;
+- bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
++ bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+ device->flush_jif = jiffies;
+ set_bit(FLUSH_PENDING, &device->flags);
+diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
+index 5ca233644d705..4281dc847bc22 100644
+--- a/drivers/block/drbd/drbd_req.c
++++ b/drivers/block/drbd/drbd_req.c
+@@ -180,7 +180,8 @@ void start_new_tl_epoch(struct drbd_connection *connection)
+ void complete_master_bio(struct drbd_device *device,
+ struct bio_and_error *m)
+ {
+- m->bio->bi_status = errno_to_blk_status(m->error);
++ if (unlikely(m->error))
++ m->bio->bi_status = errno_to_blk_status(m->error);
+ bio_endio(m->bio);
+ dec_ap_bio(device);
+ }
+@@ -1601,6 +1602,8 @@ blk_qc_t drbd_submit_bio(struct bio *bio)
+ struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
+
+ blk_queue_split(&bio);
++ if (!bio)
++ return BLK_QC_T_NONE;
+
+ /*
+ * what we "blindly" assume:
+diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
+index b8a27818ab3f8..4ee11aef6672b 100644
+--- a/drivers/block/drbd/drbd_state.c
++++ b/drivers/block/drbd/drbd_state.c
+@@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
+ return rv;
+ }
+
+-void notify_resource_state_change(struct sk_buff *skb,
++int notify_resource_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_resource_state_change *resource_state_change,
+ enum drbd_notification_type type)
+@@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb,
+ .res_susp_fen = resource_state_change->susp_fen[NEW],
+ };
+
+- notify_resource_state(skb, seq, resource, &resource_info, type);
++ return notify_resource_state(skb, seq, resource, &resource_info, type);
+ }
+
+-void notify_connection_state_change(struct sk_buff *skb,
++int notify_connection_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_connection_state_change *connection_state_change,
+ enum drbd_notification_type type)
+@@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb,
+ .conn_role = connection_state_change->peer_role[NEW],
+ };
+
+- notify_connection_state(skb, seq, connection, &connection_info, type);
++ return notify_connection_state(skb, seq, connection, &connection_info, type);
+ }
+
+-void notify_device_state_change(struct sk_buff *skb,
++int notify_device_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_device_state_change *device_state_change,
+ enum drbd_notification_type type)
+@@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb,
+ .dev_disk_state = device_state_change->disk_state[NEW],
+ };
+
+- notify_device_state(skb, seq, device, &device_info, type);
++ return notify_device_state(skb, seq, device, &device_info, type);
+ }
+
+-void notify_peer_device_state_change(struct sk_buff *skb,
++int notify_peer_device_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_peer_device_state_change *p,
+ enum drbd_notification_type type)
+@@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb,
+ .peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
+ };
+
+- notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
++ return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
+ }
+
+ static void broadcast_state_change(struct drbd_state_change *state_change)
+@@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
+ struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
+ bool resource_state_has_changed;
+ unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
+- void (*last_func)(struct sk_buff *, unsigned int, void *,
++ int (*last_func)(struct sk_buff *, unsigned int, void *,
+ enum drbd_notification_type) = NULL;
+ void *last_arg = NULL;
+
+diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h
+index ba80f612d6abb..d5b0479bc9a66 100644
+--- a/drivers/block/drbd/drbd_state_change.h
++++ b/drivers/block/drbd/drbd_state_change.h
+@@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_
+ extern void copy_old_to_new_state_change(struct drbd_state_change *);
+ extern void forget_state_change(struct drbd_state_change *);
+
+-extern void notify_resource_state_change(struct sk_buff *,
++extern int notify_resource_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_resource_state_change *,
+ enum drbd_notification_type type);
+-extern void notify_connection_state_change(struct sk_buff *,
++extern int notify_connection_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_connection_state_change *,
+ enum drbd_notification_type type);
+-extern void notify_device_state_change(struct sk_buff *,
++extern int notify_device_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_device_state_change *,
+ enum drbd_notification_type type);
+-extern void notify_peer_device_state_change(struct sk_buff *,
++extern int notify_peer_device_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_peer_device_state_change *,
+ enum drbd_notification_type type);
+diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
+index fef79ea52e3ed..4dc25a123d946 100644
+--- a/drivers/block/floppy.c
++++ b/drivers/block/floppy.c
+@@ -184,6 +184,7 @@ static int print_unex = 1;
+ #include <linux/ioport.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
++#include <linux/major.h>
+ #include <linux/platform_device.h>
+ #include <linux/mod_devicetable.h>
+ #include <linux/mutex.h>
+@@ -508,8 +509,8 @@ static unsigned long fdc_busy;
+ static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
+ static DECLARE_WAIT_QUEUE_HEAD(command_done);
+
+-/* Errors during formatting are counted here. */
+-static int format_errors;
++/* errors encountered on the current (or last) request */
++static int floppy_errors;
+
+ /* Format request descriptor. */
+ static struct format_descr format_req;
+@@ -529,7 +530,6 @@ static struct format_descr format_req;
+ static char *floppy_track_buffer;
+ static int max_buffer_sectors;
+
+-static int *errors;
+ typedef void (*done_f)(int);
+ static const struct cont_t {
+ void (*interrupt)(void);
+@@ -1014,7 +1014,7 @@ static DECLARE_DELAYED_WORK(fd_timer, fd_timer_workfn);
+ static void cancel_activity(void)
+ {
+ do_floppy = NULL;
+- cancel_delayed_work_sync(&fd_timer);
++ cancel_delayed_work(&fd_timer);
+ cancel_work_sync(&floppy_work);
+ }
+
+@@ -1454,7 +1454,7 @@ static int interpret_errors(void)
+ if (drive_params[current_drive].flags & FTD_MSG)
+ DPRINT("Over/Underrun - retrying\n");
+ bad = 0;
+- } else if (*errors >= drive_params[current_drive].max_errors.reporting) {
++ } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) {
+ print_errors();
+ }
+ if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC)
+@@ -2094,7 +2094,7 @@ static void bad_flp_intr(void)
+ if (!next_valid_format(current_drive))
+ return;
+ }
+- err_count = ++(*errors);
++ err_count = ++floppy_errors;
+ INFBOUND(write_errors[current_drive].badness, err_count);
+ if (err_count > drive_params[current_drive].max_errors.abort)
+ cont->done(0);
+@@ -2240,9 +2240,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
+ return -EINVAL;
+ }
+ format_req = *tmp_format_req;
+- format_errors = 0;
+ cont = &format_cont;
+- errors = &format_errors;
++ floppy_errors = 0;
+ ret = wait_til_done(redo_format, true);
+ if (ret == -EINTR)
+ return -EINTR;
+@@ -2760,10 +2759,11 @@ static int set_next_request(void)
+ current_req = list_first_entry_or_null(&floppy_reqs, struct request,
+ queuelist);
+ if (current_req) {
+- current_req->error_count = 0;
++ floppy_errors = 0;
+ list_del_init(&current_req->queuelist);
++ return 1;
+ }
+- return current_req != NULL;
++ return 0;
+ }
+
+ /* Starts or continues processing request. Will automatically unlock the
+@@ -2822,7 +2822,6 @@ do_request:
+ _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format];
+ } else
+ probing = 0;
+- errors = &(current_req->error_count);
+ tmp = make_raw_rw_request();
+ if (tmp < 2) {
+ request_done(tmp);
+@@ -2983,6 +2982,8 @@ static const char *drive_name(int type, int drive)
+ return "(null)";
+ }
+
++#ifdef CONFIG_BLK_DEV_FD_RAWCMD
++
+ /* raw commands */
+ static void raw_cmd_done(int flag)
+ {
+@@ -3080,6 +3081,8 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr)
+ }
+ }
+
++#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT)
++
+ static int raw_cmd_copyin(int cmd, void __user *param,
+ struct floppy_raw_cmd **rcmd)
+ {
+@@ -3107,7 +3110,7 @@ loop:
+ ptr->resultcode = 0;
+
+ if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
+- if (ptr->length <= 0)
++ if (ptr->length <= 0 || ptr->length >= MAX_LEN)
+ return -EINVAL;
+ ptr->kernel_data = (char *)fd_dma_mem_alloc(ptr->length);
+ fallback_on_nodma_alloc(&ptr->kernel_data, ptr->length);
+@@ -3180,6 +3183,35 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
+ return ret;
+ }
+
++static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
++ void __user *param)
++{
++ int ret;
++
++ pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n");
++
++ if (type)
++ return -EINVAL;
++ if (lock_fdc(drive))
++ return -EINTR;
++ set_floppy(drive);
++ ret = raw_cmd_ioctl(cmd, param);
++ if (ret == -EINTR)
++ return -EINTR;
++ process_fd_request();
++ return ret;
++}
++
++#else /* CONFIG_BLK_DEV_FD_RAWCMD */
++
++static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
++ void __user *param)
++{
++ return -EOPNOTSUPP;
++}
++
++#endif
++
+ static int invalidate_drive(struct block_device *bdev)
+ {
+ /* invalidate the buffer track to force a reread */
+@@ -3368,7 +3400,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
+ {
+ int drive = (long)bdev->bd_disk->private_data;
+ int type = ITYPE(drive_state[drive].fd_device);
+- int i;
+ int ret;
+ int size;
+ union inparam {
+@@ -3519,16 +3550,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
+ outparam = &write_errors[drive];
+ break;
+ case FDRAWCMD:
+- if (type)
+- return -EINVAL;
+- if (lock_fdc(drive))
+- return -EINTR;
+- set_floppy(drive);
+- i = raw_cmd_ioctl(cmd, (void __user *)param);
+- if (i == -EINTR)
+- return -EINTR;
+- process_fd_request();
+- return i;
++ return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param);
+ case FDTWADDLE:
+ if (lock_fdc(drive))
+ return -EINTR;
+@@ -4478,6 +4500,7 @@ static const struct blk_mq_ops floppy_mq_ops = {
+ };
+
+ static struct platform_device floppy_device[N_DRIVE];
++static bool registered[N_DRIVE];
+
+ static bool floppy_available(int drive)
+ {
+@@ -4564,8 +4587,10 @@ static int __init do_floppy_init(void)
+ goto out_put_disk;
+
+ err = floppy_alloc_disk(drive, 0);
+- if (err)
++ if (err) {
++ blk_mq_free_tag_set(&tag_sets[drive]);
+ goto out_put_disk;
++ }
+
+ timer_setup(&motor_off_timer[drive], motor_off_callback, 0);
+ }
+@@ -4693,6 +4718,8 @@ static int __init do_floppy_init(void)
+ if (err)
+ goto out_remove_drives;
+
++ registered[drive] = true;
++
+ device_add_disk(&floppy_device[drive].dev, disks[drive][0],
+ NULL);
+ }
+@@ -4703,7 +4730,8 @@ out_remove_drives:
+ while (drive--) {
+ if (floppy_available(drive)) {
+ del_gendisk(disks[drive][0]);
+- platform_device_unregister(&floppy_device[drive]);
++ if (registered[drive])
++ platform_device_unregister(&floppy_device[drive]);
+ }
+ }
+ out_release_dma:
+@@ -4946,7 +4974,8 @@ static void __exit floppy_module_exit(void)
+ if (disks[drive][i])
+ del_gendisk(disks[drive][i]);
+ }
+- platform_device_unregister(&floppy_device[drive]);
++ if (registered[drive])
++ platform_device_unregister(&floppy_device[drive]);
+ }
+ for (i = 0; i < ARRAY_SIZE(floppy_type); i++) {
+ if (disks[drive][i])
+diff --git a/drivers/block/loop.c b/drivers/block/loop.c
+index 7bf4686af774e..1d60d5ac0db80 100644
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -79,6 +79,7 @@
+ #include <linux/ioprio.h>
+ #include <linux/blk-cgroup.h>
+ #include <linux/sched/mm.h>
++#include <linux/statfs.h>
+
+ #include "loop.h"
+
+@@ -272,19 +273,6 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
+ blk_mq_unfreeze_queue(lo->lo_queue);
+ }
+
+-/**
+- * loop_validate_block_size() - validates the passed in block size
+- * @bsize: size to validate
+- */
+-static int
+-loop_validate_block_size(unsigned short bsize)
+-{
+- if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+- return -EINVAL;
+-
+- return 0;
+-}
+-
+ /**
+ * loop_set_size() - sets device size and notifies userspace
+ * @lo: struct loop_device to set the size for
+@@ -748,6 +736,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
+
+ if (!file)
+ return -EBADF;
++
++ /* suppress uevents while reconfiguring the device */
++ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
++
+ is_loop = is_loop_device(file);
+ error = loop_global_lock_killable(lo, is_loop);
+ if (error)
+@@ -802,13 +794,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
+ fput(old_file);
+ if (partscan)
+ loop_reread_partitions(lo);
+- return 0;
++
++ error = 0;
++done:
++ /* enable and uncork uevent now that we are done */
++ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
++ return error;
+
+ out_err:
+ loop_global_unlock(lo, is_loop);
+ out_putf:
+ fput(file);
+- return error;
++ goto done;
+ }
+
+ /* loop sysfs attributes */
+@@ -856,33 +853,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
+
+ static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
+ {
+- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
++ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+ }
+
+ static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
+ {
+- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
++ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+ }
+
+ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
+ {
+ int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
+
+- return sprintf(buf, "%s\n", autoclear ? "1" : "0");
++ return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0");
+ }
+
+ static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
+ {
+ int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
+
+- return sprintf(buf, "%s\n", partscan ? "1" : "0");
++ return sysfs_emit(buf, "%s\n", partscan ? "1" : "0");
+ }
+
+ static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
+ {
+ int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
+
+- return sprintf(buf, "%s\n", dio ? "1" : "0");
++ return sysfs_emit(buf, "%s\n", dio ? "1" : "0");
+ }
+
+ LOOP_ATTR_RO(backing_file);
+@@ -952,8 +949,13 @@ static void loop_config_discard(struct loop_device *lo)
+ granularity = 0;
+
+ } else {
++ struct kstatfs sbuf;
++
+ max_discard_sectors = UINT_MAX >> 9;
+- granularity = inode->i_sb->s_blocksize;
++ if (!vfs_statfs(&file->f_path, &sbuf))
++ granularity = sbuf.f_bsize;
++ else
++ max_discard_sectors = 0;
+ }
+
+ if (max_discard_sectors) {
+@@ -1159,8 +1161,13 @@ loop_set_status_from_info(struct loop_device *lo,
+ if (err)
+ return err;
+
++ /* Avoid assigning overflow values */
++ if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX)
++ return -EOVERFLOW;
++
+ lo->lo_offset = info->lo_offset;
+ lo->lo_sizelimit = info->lo_sizelimit;
++
+ memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+ memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
+ lo->lo_file_name[LO_NAME_SIZE-1] = 0;
+@@ -1236,7 +1243,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
+ }
+
+ if (config->block_size) {
+- error = loop_validate_block_size(config->block_size);
++ error = blk_validate_block_size(config->block_size);
+ if (error)
+ goto out_unlock;
+ }
+@@ -1258,6 +1265,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
+ goto out_unlock;
+ }
+
++ /* suppress uevents while reconfiguring the device */
++ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
++
+ disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+ set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
+
+@@ -1304,13 +1314,18 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
+ lo->lo_flags |= LO_FLAGS_PARTSCAN;
+ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
+ if (partscan)
+- lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
++ lo->lo_disk->flags &= ~GENHD_FL_NO_PART;
++
++ /* enable and uncork uevent now that we are done */
++ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+
+ loop_global_unlock(lo, is_loop);
+ if (partscan)
+ loop_reread_partitions(lo);
++
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(bdev, loop_configure);
++
+ return 0;
+
+ out_unlock:
+@@ -1448,7 +1463,7 @@ out_unlock:
+ mutex_lock(&lo->lo_mutex);
+ lo->lo_flags = 0;
+ if (!part_shift)
+- lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
++ lo->lo_disk->flags |= GENHD_FL_NO_PART;
+ lo->lo_state = Lo_unbound;
+ mutex_unlock(&lo->lo_mutex);
+
+@@ -1565,7 +1580,7 @@ out_unfreeze:
+
+ if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
+ !(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
+- lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
++ lo->lo_disk->flags &= ~GENHD_FL_NO_PART;
+ partscan = true;
+ }
+ out_unlock:
+@@ -1759,7 +1774,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
+ if (lo->lo_state != Lo_bound)
+ return -ENXIO;
+
+- err = loop_validate_block_size(arg);
++ err = blk_validate_block_size(arg);
+ if (err)
+ return err;
+
+@@ -2093,7 +2108,16 @@ static const struct block_device_operations lo_fops = {
+ /*
+ * And now the modules code and kernel interface.
+ */
+-static int max_loop;
++
++/*
++ * If max_loop is specified, create that many devices upfront.
++ * This also becomes a hard limit. If max_loop is not specified,
++ * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
++ * init time. Loop devices can be requested on-demand with the
++ * /dev/loop-control interface, or be instantiated by accessing
++ * a 'dead' device node.
++ */
++static int max_loop = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
+ module_param(max_loop, int, 0444);
+ MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
+ module_param(max_part, int, 0444);
+@@ -2181,35 +2205,44 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+
+ static void loop_handle_cmd(struct loop_cmd *cmd)
+ {
++ struct cgroup_subsys_state *cmd_blkcg_css = cmd->blkcg_css;
++ struct cgroup_subsys_state *cmd_memcg_css = cmd->memcg_css;
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
+ const bool write = op_is_write(req_op(rq));
+ struct loop_device *lo = rq->q->queuedata;
+ int ret = 0;
+ struct mem_cgroup *old_memcg = NULL;
++ const bool use_aio = cmd->use_aio;
+
+ if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
+ ret = -EIO;
+ goto failed;
+ }
+
+- if (cmd->blkcg_css)
+- kthread_associate_blkcg(cmd->blkcg_css);
+- if (cmd->memcg_css)
++ if (cmd_blkcg_css)
++ kthread_associate_blkcg(cmd_blkcg_css);
++ if (cmd_memcg_css)
+ old_memcg = set_active_memcg(
+- mem_cgroup_from_css(cmd->memcg_css));
++ mem_cgroup_from_css(cmd_memcg_css));
+
++ /*
++ * do_req_filebacked() may call blk_mq_complete_request() synchronously
++ * or asynchronously if using aio. Hence, do not touch 'cmd' after
++ * do_req_filebacked() has returned unless we are sure that 'cmd' has
++ * not yet been completed.
++ */
+ ret = do_req_filebacked(lo, rq);
+
+- if (cmd->blkcg_css)
++ if (cmd_blkcg_css)
+ kthread_associate_blkcg(NULL);
+
+- if (cmd->memcg_css) {
++ if (cmd_memcg_css) {
+ set_active_memcg(old_memcg);
+- css_put(cmd->memcg_css);
++ css_put(cmd_memcg_css);
+ }
+ failed:
+ /* complete non-aio request */
+- if (!cmd->use_aio || ret) {
++ if (!use_aio || ret) {
+ if (ret == -EOPNOTSUPP)
+ cmd->ret = ret;
+ else
+@@ -2377,7 +2410,7 @@ static int loop_add(int i)
+ * userspace tools. Parameters like this in general should be avoided.
+ */
+ if (!part_shift)
+- disk->flags |= GENHD_FL_NO_PART_SCAN;
++ disk->flags |= GENHD_FL_NO_PART;
+ disk->flags |= GENHD_FL_EXT_DEVT;
+ atomic_set(&lo->lo_refcnt, 0);
+ mutex_init(&lo->lo_mutex);
+@@ -2442,7 +2475,7 @@ static int loop_control_remove(int idx)
+ int ret;
+
+ if (idx < 0) {
+- pr_warn("deleting an unspecified loop device is not supported.\n");
++ pr_warn_once("deleting an unspecified loop device is not supported.\n");
+ return -EINVAL;
+ }
+
+@@ -2538,7 +2571,7 @@ MODULE_ALIAS("devname:loop-control");
+
+ static int __init loop_init(void)
+ {
+- int i, nr;
++ int i;
+ int err;
+
+ part_shift = 0;
+@@ -2566,19 +2599,6 @@ static int __init loop_init(void)
+ goto err_out;
+ }
+
+- /*
+- * If max_loop is specified, create that many devices upfront.
+- * This also becomes a hard limit. If max_loop is not specified,
+- * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
+- * init time. Loop devices can be requested on-demand with the
+- * /dev/loop-control interface, or be instantiated by accessing
+- * a 'dead' device node.
+- */
+- if (max_loop)
+- nr = max_loop;
+- else
+- nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
+-
+ err = misc_register(&loop_misc);
+ if (err < 0)
+ goto err_out;
+@@ -2590,7 +2610,7 @@ static int __init loop_init(void)
+ }
+
+ /* pre-create number of devices given by config or max_loop */
+- for (i = 0; i < nr; i++)
++ for (i = 0; i < max_loop; i++)
+ loop_add(i);
+
+ printk(KERN_INFO "loop: module loaded\n");
+diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
+index 901855717cb53..ba61e72741eab 100644
+--- a/drivers/block/mtip32xx/mtip32xx.c
++++ b/drivers/block/mtip32xx/mtip32xx.c
+@@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
+ "Completion workers still active!\n");
+ }
+
+- blk_set_queue_dying(dd->queue);
++ blk_mark_disk_dead(dd->disk);
+ set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
+
+ /* Clean up the block layer. */
+diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
+index 26798da661bd4..0bda4a468c660 100644
+--- a/drivers/block/n64cart.c
++++ b/drivers/block/n64cart.c
+@@ -88,7 +88,7 @@ static blk_qc_t n64cart_submit_bio(struct bio *bio)
+ {
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+- struct device *dev = bio->bi_disk->private_data;
++ struct device *dev = bio->bi_bdev->bd_disk->private_data;
+ u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+
+ bio_for_each_segment(bvec, bio, iter) {
+@@ -137,7 +137,7 @@ static int __init n64cart_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ disk->first_minor = 0;
+- disk->flags = GENHD_FL_NO_PART_SCAN;
++ disk->flags = GENHD_FL_NO_PART;
+ disk->fops = &n64cart_fops;
+ disk->private_data = &pdev->dev;
+ strcpy(disk->disk_name, "n64cart");
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index 1183f7872b713..e563aa407e888 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -122,10 +122,10 @@ struct nbd_device {
+ struct work_struct remove_work;
+
+ struct list_head list;
+- struct task_struct *task_recv;
+ struct task_struct *task_setup;
+
+ unsigned long flags;
++ pid_t pid; /* pid of nbd-client, if attached */
+
+ char *backend;
+ };
+@@ -217,7 +217,7 @@ static ssize_t pid_show(struct device *dev,
+ struct gendisk *disk = dev_to_disk(dev);
+ struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+
+- return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
++ return sprintf(buf, "%d\n", nbd->pid);
+ }
+
+ static const struct device_attribute pid_attr = {
+@@ -254,7 +254,7 @@ static void nbd_dev_remove(struct nbd_device *nbd)
+ mutex_lock(&nbd_index_mutex);
+ idr_remove(&nbd_index_idr, nbd->index);
+ mutex_unlock(&nbd_index_mutex);
+-
++ destroy_workqueue(nbd->recv_workq);
+ kfree(nbd);
+ }
+
+@@ -326,10 +326,13 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
+ if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
+ return -EINVAL;
+
++ if (bytesize < 0)
++ return -EINVAL;
++
+ nbd->config->bytesize = bytesize;
+ nbd->config->blksize_bits = __ffs(blksize);
+
+- if (!nbd->task_recv)
++ if (!nbd->pid)
+ return 0;
+
+ if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
+@@ -896,11 +899,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
+ struct nbd_config *config = nbd->config;
+ if (!config->dead_conn_timeout)
+ return 0;
+- if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
++
++ if (!wait_event_timeout(config->conn_wait,
++ test_bit(NBD_RT_DISCONNECTED,
++ &config->runtime_flags) ||
++ atomic_read(&config->live_connections) > 0,
++ config->dead_conn_timeout))
+ return 0;
+- return wait_event_timeout(config->conn_wait,
+- atomic_read(&config->live_connections) > 0,
+- config->dead_conn_timeout) > 0;
++
++ return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
+ }
+
+ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
+@@ -1044,6 +1051,9 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
+ struct nbd_sock *nsock;
+ int err;
+
++ /* Arg will be cast to int, check it to avoid overflow */
++ if (arg > INT_MAX)
++ return -EINVAL;
+ sock = nbd_get_socket(nbd, arg, &err);
+ if (!sock)
+ return err;
+@@ -1163,11 +1173,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
+ return -ENOSPC;
+ }
+
+-static void nbd_bdev_reset(struct block_device *bdev)
++static void nbd_bdev_reset(struct nbd_device *nbd)
+ {
+- if (bdev->bd_openers > 1)
++ if (nbd->disk->part0->bd_openers > 1)
+ return;
+- set_capacity(bdev->bd_disk, 0);
++ set_capacity(nbd->disk, 0);
+ }
+
+ static void nbd_parse_flags(struct nbd_device *nbd)
+@@ -1241,7 +1251,7 @@ static void nbd_config_put(struct nbd_device *nbd)
+ if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
+ &config->runtime_flags))
+ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+- nbd->task_recv = NULL;
++ nbd->pid = 0;
+ if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE,
+ &config->runtime_flags)) {
+ device_remove_file(disk_to_dev(nbd->disk), &backend_attr);
+@@ -1260,10 +1270,6 @@ static void nbd_config_put(struct nbd_device *nbd)
+ kfree(nbd->config);
+ nbd->config = NULL;
+
+- if (nbd->recv_workq)
+- destroy_workqueue(nbd->recv_workq);
+- nbd->recv_workq = NULL;
+-
+ nbd->tag_set.timeout = 0;
+ nbd->disk->queue->limits.discard_granularity = 0;
+ nbd->disk->queue->limits.discard_alignment = 0;
+@@ -1282,7 +1288,7 @@ static int nbd_start_device(struct nbd_device *nbd)
+ int num_connections = config->num_connections;
+ int error = 0, i;
+
+- if (nbd->task_recv)
++ if (nbd->pid)
+ return -EBUSY;
+ if (!config->socks)
+ return -EINVAL;
+@@ -1292,16 +1298,8 @@ static int nbd_start_device(struct nbd_device *nbd)
+ return -EINVAL;
+ }
+
+- nbd->recv_workq = alloc_workqueue("knbd%d-recv",
+- WQ_MEM_RECLAIM | WQ_HIGHPRI |
+- WQ_UNBOUND, 0, nbd->index);
+- if (!nbd->recv_workq) {
+- dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
+- return -ENOMEM;
+- }
+-
+ blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
+- nbd->task_recv = current;
++ nbd->pid = task_pid_nr(current);
+
+ nbd_parse_flags(nbd);
+
+@@ -1345,7 +1343,7 @@ static int nbd_start_device(struct nbd_device *nbd)
+ return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
+ }
+
+-static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
++static int nbd_start_device_ioctl(struct nbd_device *nbd)
+ {
+ struct nbd_config *config = nbd->config;
+ int ret;
+@@ -1359,12 +1357,14 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
+ mutex_unlock(&nbd->config_lock);
+ ret = wait_event_interruptible(config->recv_wq,
+ atomic_read(&config->recv_threads) == 0);
+- if (ret)
++ if (ret) {
+ sock_shutdown(nbd);
+- flush_workqueue(nbd->recv_workq);
++ nbd_clear_que(nbd);
++ }
+
++ flush_workqueue(nbd->recv_workq);
+ mutex_lock(&nbd->config_lock);
+- nbd_bdev_reset(bdev);
++ nbd_bdev_reset(nbd);
+ /* user requested, ignore socket errors */
+ if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
+ ret = 0;
+@@ -1376,9 +1376,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
+ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
+ struct block_device *bdev)
+ {
+- sock_shutdown(nbd);
++ nbd_clear_sock(nbd);
+ __invalidate_device(bdev, true);
+- nbd_bdev_reset(bdev);
++ nbd_bdev_reset(nbd);
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
+ &nbd->config->runtime_flags))
+ nbd_config_put(nbd);
+@@ -1424,7 +1424,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
+ config->flags = arg;
+ return 0;
+ case NBD_DO_IT:
+- return nbd_start_device_ioctl(nbd, bdev);
++ return nbd_start_device_ioctl(nbd);
+ case NBD_CLEAR_QUE:
+ /*
+ * This is for compatibility only. The queue is always cleared
+@@ -1475,15 +1475,20 @@ static struct nbd_config *nbd_alloc_config(void)
+ {
+ struct nbd_config *config;
+
++ if (!try_module_get(THIS_MODULE))
++ return ERR_PTR(-ENODEV);
++
+ config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
+- if (!config)
+- return NULL;
++ if (!config) {
++ module_put(THIS_MODULE);
++ return ERR_PTR(-ENOMEM);
++ }
++
+ atomic_set(&config->recv_threads, 0);
+ init_waitqueue_head(&config->recv_wq);
+ init_waitqueue_head(&config->conn_wait);
+ config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
+ atomic_set(&config->live_connections, 0);
+- try_module_get(THIS_MODULE);
+ return config;
+ }
+
+@@ -1510,12 +1515,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
+ mutex_unlock(&nbd->config_lock);
+ goto out;
+ }
+- config = nbd->config = nbd_alloc_config();
+- if (!config) {
+- ret = -ENOMEM;
++ config = nbd_alloc_config();
++ if (IS_ERR(config)) {
++ ret = PTR_ERR(config);
+ mutex_unlock(&nbd->config_lock);
+ goto out;
+ }
++ nbd->config = config;
+ refcount_set(&nbd->config_refs, 1);
+ refcount_inc(&nbd->refs);
+ mutex_unlock(&nbd->config_lock);
+@@ -1557,8 +1563,8 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
+ {
+ struct nbd_device *nbd = s->private;
+
+- if (nbd->task_recv)
+- seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
++ if (nbd->pid)
++ seq_printf(s, "recv: %d\n", nbd->pid);
+
+ return 0;
+ }
+@@ -1599,7 +1605,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
+ return -EIO;
+
+ dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
+- if (!dir) {
++ if (IS_ERR(dir)) {
+ dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
+ nbd_name(nbd));
+ return -EIO;
+@@ -1625,7 +1631,7 @@ static int nbd_dbg_init(void)
+ struct dentry *dbg_dir;
+
+ dbg_dir = debugfs_create_dir("nbd", NULL);
+- if (!dbg_dir)
++ if (IS_ERR(dbg_dir))
+ return -EIO;
+
+ nbd_dbg_dir = dbg_dir;
+@@ -1725,6 +1731,15 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
+ }
+ nbd->disk = disk;
+
++ nbd->recv_workq = alloc_workqueue("nbd%d-recv",
++ WQ_MEM_RECLAIM | WQ_HIGHPRI |
++ WQ_UNBOUND, 0, nbd->index);
++ if (!nbd->recv_workq) {
++ dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
++ err = -ENOMEM;
++ goto out_err_disk;
++ }
++
+ /*
+ * Tell the block layer that we are not a rotational device
+ */
+@@ -1747,22 +1762,14 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
+ refcount_set(&nbd->refs, 0);
+ INIT_LIST_HEAD(&nbd->list);
+ disk->major = NBD_MAJOR;
+-
+- /* Too big first_minor can cause duplicate creation of
+- * sysfs files/links, since first_minor will be truncated to
+- * byte in __device_add_disk().
+- */
+ disk->first_minor = index << part_shift;
+- if (disk->first_minor > 0xff) {
+- err = -EINVAL;
+- goto out_free_idr;
+- }
+-
+ disk->minors = 1 << part_shift;
+ disk->fops = &nbd_fops;
+ disk->private_data = nbd;
+ sprintf(disk->disk_name, "nbd%d", index);
+- add_disk(disk);
++ err = add_disk(disk);
++ if (err)
++ goto out_free_work;
+
+ /*
+ * Now publish the device.
+@@ -1771,6 +1778,10 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
+ nbd_total_devices++;
+ return nbd;
+
++out_free_work:
++ destroy_workqueue(nbd->recv_workq);
++out_err_disk:
++ blk_cleanup_disk(disk);
+ out_free_idr:
+ mutex_lock(&nbd_index_mutex);
+ idr_remove(&nbd_index_idr, index);
+@@ -1856,8 +1867,19 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
+ return -EPERM;
+
+- if (info->attrs[NBD_ATTR_INDEX])
++ if (info->attrs[NBD_ATTR_INDEX]) {
+ index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
++
++ /*
++ * Too big first_minor can cause duplicate creation of
++ * sysfs files/links, since index << part_shift might overflow, or
++ * MKDEV() expect that the max bits of first_minor is 20.
++ */
++ if (index < 0 || index > MINORMASK >> part_shift) {
++ printk(KERN_ERR "nbd: illegal input index %d\n", index);
++ return -EINVAL;
++ }
++ }
+ if (!info->attrs[NBD_ATTR_SOCKETS]) {
+ printk(KERN_ERR "nbd: must specify at least one socket\n");
+ return -EINVAL;
+@@ -1907,13 +1929,14 @@ again:
+ nbd_put(nbd);
+ return -EINVAL;
+ }
+- config = nbd->config = nbd_alloc_config();
+- if (!nbd->config) {
++ config = nbd_alloc_config();
++ if (IS_ERR(config)) {
+ mutex_unlock(&nbd->config_lock);
+ nbd_put(nbd);
+ printk(KERN_ERR "nbd: couldn't allocate config\n");
+- return -ENOMEM;
++ return PTR_ERR(config);
+ }
++ nbd->config = config;
+ refcount_set(&nbd->config_refs, 1);
+ set_bit(NBD_RT_BOUND, &config->runtime_flags);
+
+@@ -2023,14 +2046,12 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
+ mutex_lock(&nbd->config_lock);
+ nbd_disconnect(nbd);
+ sock_shutdown(nbd);
++ wake_up(&nbd->config->conn_wait);
+ /*
+- * Make sure recv thread has finished, so it does not drop the last
+- * config ref and try to destroy the workqueue from inside the work
+- * queue. And this also ensure that we can safely call nbd_clear_que()
++ * Make sure recv thread has finished, we can safely call nbd_clear_que()
+ * to cancel the inflight I/Os.
+ */
+- if (nbd->recv_workq)
+- flush_workqueue(nbd->recv_workq);
++ flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ nbd->task_setup = NULL;
+ mutex_unlock(&nbd->config_lock);
+@@ -2135,7 +2156,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
+ mutex_lock(&nbd->config_lock);
+ config = nbd->config;
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
+- !nbd->task_recv) {
++ !nbd->pid) {
+ dev_err(nbd_to_dev(nbd),
+ "not configured, cannot reconfigure\n");
+ ret = -EINVAL;
+@@ -2473,6 +2494,12 @@ static void __exit nbd_cleanup(void)
+ struct nbd_device *nbd;
+ LIST_HEAD(del_list);
+
++ /*
++ * Unregister netlink interface prior to waiting
++ * for the completion of netlink commands.
++ */
++ genl_unregister_family(&nbd_genl_family);
++
+ nbd_dbg_close();
+
+ mutex_lock(&nbd_index_mutex);
+@@ -2482,6 +2509,9 @@ static void __exit nbd_cleanup(void)
+ while (!list_empty(&del_list)) {
+ nbd = list_first_entry(&del_list, struct nbd_device, list);
+ list_del_init(&nbd->list);
++ if (refcount_read(&nbd->config_refs))
++ printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n",
++ refcount_read(&nbd->config_refs));
+ if (refcount_read(&nbd->refs) != 1)
+ printk(KERN_ERR "nbd: possibly leaking a device\n");
+ nbd_put(nbd);
+@@ -2491,7 +2521,6 @@ static void __exit nbd_cleanup(void)
+ destroy_workqueue(nbd_del_wq);
+
+ idr_destroy(&nbd_index_idr);
+- genl_unregister_family(&nbd_genl_family);
+ unregister_blkdev(NBD_MAJOR, "nbd");
+ }
+
+diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
+index 187d779c8ca08..686ec6bcdef3d 100644
+--- a/drivers/block/null_blk/main.c
++++ b/drivers/block/null_blk/main.c
+@@ -1314,8 +1314,7 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
+ case NULL_IRQ_SOFTIRQ:
+ switch (cmd->nq->dev->queue_mode) {
+ case NULL_Q_MQ:
+- if (likely(!blk_should_fake_timeout(cmd->rq->q)))
+- blk_mq_complete_request(cmd->rq);
++ blk_mq_complete_request(cmd->rq);
+ break;
+ case NULL_Q_BIO:
+ /*
+@@ -1491,7 +1490,8 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
+ cmd->rq = bd->rq;
+ cmd->error = BLK_STS_OK;
+ cmd->nq = nq;
+- cmd->fake_timeout = should_timeout_request(bd->rq);
++ cmd->fake_timeout = should_timeout_request(bd->rq) ||
++ blk_should_fake_timeout(bd->rq->q);
+
+ blk_mq_start_request(bd->rq);
+
+@@ -1744,6 +1744,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
+
+ static int null_validate_conf(struct nullb_device *dev)
+ {
++ if (dev->queue_mode == NULL_Q_RQ) {
++ pr_err("legacy IO path is no longer available\n");
++ return -EINVAL;
++ }
++
+ dev->blocksize = round_down(dev->blocksize, 512);
+ dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+
+@@ -1884,8 +1889,13 @@ static int null_add_dev(struct nullb_device *dev)
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
+
+ mutex_lock(&lock);
+- nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+- dev->index = nullb->index;
++ rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
++ if (rv < 0) {
++ mutex_unlock(&lock);
++ goto out_cleanup_zone;
++ }
++ nullb->index = rv;
++ dev->index = rv;
+ mutex_unlock(&lock);
+
+ blk_queue_logical_block_size(nullb->q, dev->blocksize);
+@@ -1905,13 +1915,16 @@ static int null_add_dev(struct nullb_device *dev)
+
+ rv = null_gendisk_register(nullb);
+ if (rv)
+- goto out_cleanup_zone;
++ goto out_ida_free;
+
+ mutex_lock(&lock);
+ list_add_tail(&nullb->list, &nullb_list);
+ mutex_unlock(&lock);
+
+ return 0;
++
++out_ida_free:
++ ida_free(&nullb_indexes, nullb->index);
+ out_cleanup_zone:
+ null_free_zoned_dev(dev);
+ out_cleanup_disk:
+diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
+index f9cdd11f02f58..91369084e1274 100644
+--- a/drivers/block/paride/pcd.c
++++ b/drivers/block/paride/pcd.c
+@@ -183,8 +183,6 @@ static int pcd_audio_ioctl(struct cdrom_device_info *cdi,
+ static int pcd_packet(struct cdrom_device_info *cdi,
+ struct packet_command *cgc);
+
+-static int pcd_detect(void);
+-static void pcd_probe_capabilities(void);
+ static void do_pcd_read_drq(void);
+ static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd);
+@@ -302,53 +300,6 @@ static const struct blk_mq_ops pcd_mq_ops = {
+ .queue_rq = pcd_queue_rq,
+ };
+
+-static void pcd_init_units(void)
+-{
+- struct pcd_unit *cd;
+- int unit;
+-
+- pcd_drive_count = 0;
+- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+- struct gendisk *disk;
+-
+- if (blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1,
+- BLK_MQ_F_SHOULD_MERGE))
+- continue;
+-
+- disk = blk_mq_alloc_disk(&cd->tag_set, cd);
+- if (IS_ERR(disk)) {
+- blk_mq_free_tag_set(&cd->tag_set);
+- continue;
+- }
+-
+- INIT_LIST_HEAD(&cd->rq_list);
+- blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
+- cd->disk = disk;
+- cd->pi = &cd->pia;
+- cd->present = 0;
+- cd->last_sense = 0;
+- cd->changed = 1;
+- cd->drive = (*drives[unit])[D_SLV];
+- if ((*drives[unit])[D_PRT])
+- pcd_drive_count++;
+-
+- cd->name = &cd->info.name[0];
+- snprintf(cd->name, sizeof(cd->info.name), "%s%d", name, unit);
+- cd->info.ops = &pcd_dops;
+- cd->info.handle = cd;
+- cd->info.speed = 0;
+- cd->info.capacity = 1;
+- cd->info.mask = 0;
+- disk->major = major;
+- disk->first_minor = unit;
+- disk->minors = 1;
+- strcpy(disk->disk_name, cd->name); /* umm... */
+- disk->fops = &pcd_bdops;
+- disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+- disk->events = DISK_EVENT_MEDIA_CHANGE;
+- }
+-}
+-
+ static int pcd_open(struct cdrom_device_info *cdi, int purpose)
+ {
+ struct pcd_unit *cd = cdi->handle;
+@@ -630,10 +581,11 @@ static int pcd_drive_status(struct cdrom_device_info *cdi, int slot_nr)
+ return CDS_DISC_OK;
+ }
+
+-static int pcd_identify(struct pcd_unit *cd, char *id)
++static int pcd_identify(struct pcd_unit *cd)
+ {
+- int k, s;
+ char id_cmd[12] = { 0x12, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0 };
++ char id[18];
++ int k, s;
+
+ pcd_bufblk = -1;
+
+@@ -661,108 +613,47 @@ static int pcd_identify(struct pcd_unit *cd, char *id)
+ }
+
+ /*
+- * returns 0, with id set if drive is detected
+- * -1, if drive detection failed
++ * returns 0, with id set if drive is detected, otherwise an error code.
+ */
+-static int pcd_probe(struct pcd_unit *cd, int ms, char *id)
++static int pcd_probe(struct pcd_unit *cd, int ms)
+ {
+ if (ms == -1) {
+ for (cd->drive = 0; cd->drive <= 1; cd->drive++)
+- if (!pcd_reset(cd) && !pcd_identify(cd, id))
++ if (!pcd_reset(cd) && !pcd_identify(cd))
+ return 0;
+ } else {
+ cd->drive = ms;
+- if (!pcd_reset(cd) && !pcd_identify(cd, id))
++ if (!pcd_reset(cd) && !pcd_identify(cd))
+ return 0;
+ }
+- return -1;
++ return -ENODEV;
+ }
+
+-static void pcd_probe_capabilities(void)
++static int pcd_probe_capabilities(struct pcd_unit *cd)
+ {
+- int unit, r;
+- char buffer[32];
+ char cmd[12] = { 0x5a, 1 << 3, 0x2a, 0, 0, 0, 0, 18, 0, 0, 0, 0 };
+- struct pcd_unit *cd;
+-
+- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+- if (!cd->present)
+- continue;
+- r = pcd_atapi(cd, cmd, 18, buffer, "mode sense capabilities");
+- if (r)
+- continue;
+- /* we should now have the cap page */
+- if ((buffer[11] & 1) == 0)
+- cd->info.mask |= CDC_CD_R;
+- if ((buffer[11] & 2) == 0)
+- cd->info.mask |= CDC_CD_RW;
+- if ((buffer[12] & 1) == 0)
+- cd->info.mask |= CDC_PLAY_AUDIO;
+- if ((buffer[14] & 1) == 0)
+- cd->info.mask |= CDC_LOCK;
+- if ((buffer[14] & 8) == 0)
+- cd->info.mask |= CDC_OPEN_TRAY;
+- if ((buffer[14] >> 6) == 0)
+- cd->info.mask |= CDC_CLOSE_TRAY;
+- }
+-}
+-
+-static int pcd_detect(void)
+-{
+- char id[18];
+- int k, unit;
+- struct pcd_unit *cd;
++ char buffer[32];
++ int ret;
+
+- printk("%s: %s version %s, major %d, nice %d\n",
+- name, name, PCD_VERSION, major, nice);
++ ret = pcd_atapi(cd, cmd, 18, buffer, "mode sense capabilities");
++ if (ret)
++ return ret;
++
++ /* we should now have the cap page */
++ if ((buffer[11] & 1) == 0)
++ cd->info.mask |= CDC_CD_R;
++ if ((buffer[11] & 2) == 0)
++ cd->info.mask |= CDC_CD_RW;
++ if ((buffer[12] & 1) == 0)
++ cd->info.mask |= CDC_PLAY_AUDIO;
++ if ((buffer[14] & 1) == 0)
++ cd->info.mask |= CDC_LOCK;
++ if ((buffer[14] & 8) == 0)
++ cd->info.mask |= CDC_OPEN_TRAY;
++ if ((buffer[14] >> 6) == 0)
++ cd->info.mask |= CDC_CLOSE_TRAY;
+
+- par_drv = pi_register_driver(name);
+- if (!par_drv) {
+- pr_err("failed to register %s driver\n", name);
+- return -1;
+- }
+-
+- k = 0;
+- if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
+- cd = pcd;
+- if (cd->disk && pi_init(cd->pi, 1, -1, -1, -1, -1, -1,
+- pcd_buffer, PI_PCD, verbose, cd->name)) {
+- if (!pcd_probe(cd, -1, id)) {
+- cd->present = 1;
+- k++;
+- } else
+- pi_release(cd->pi);
+- }
+- } else {
+- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+- int *conf = *drives[unit];
+- if (!conf[D_PRT])
+- continue;
+- if (!cd->disk)
+- continue;
+- if (!pi_init(cd->pi, 0, conf[D_PRT], conf[D_MOD],
+- conf[D_UNI], conf[D_PRO], conf[D_DLY],
+- pcd_buffer, PI_PCD, verbose, cd->name))
+- continue;
+- if (!pcd_probe(cd, conf[D_SLV], id)) {
+- cd->present = 1;
+- k++;
+- } else
+- pi_release(cd->pi);
+- }
+- }
+- if (k)
+- return 0;
+-
+- printk("%s: No CD-ROM drive found\n", name);
+- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+- if (!cd->disk)
+- continue;
+- blk_cleanup_disk(cd->disk);
+- blk_mq_free_tag_set(&cd->tag_set);
+- }
+- pi_unregister_driver(par_drv);
+- return -1;
++ return 0;
+ }
+
+ /* I/O request processing */
+@@ -999,43 +890,124 @@ static int pcd_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
+ return 0;
+ }
+
++static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port,
++ int mode, int unit, int protocol, int delay, int ms)
++{
++ struct gendisk *disk;
++ int ret;
++
++ ret = blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1,
++ BLK_MQ_F_SHOULD_MERGE);
++ if (ret)
++ return ret;
++
++ disk = blk_mq_alloc_disk(&cd->tag_set, cd);
++ if (IS_ERR(disk)) {
++ ret = PTR_ERR(disk);
++ goto out_free_tag_set;
++ }
++
++ INIT_LIST_HEAD(&cd->rq_list);
++ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
++ cd->disk = disk;
++ cd->pi = &cd->pia;
++ cd->present = 0;
++ cd->last_sense = 0;
++ cd->changed = 1;
++ cd->drive = (*drives[cd - pcd])[D_SLV];
++
++ cd->name = &cd->info.name[0];
++ snprintf(cd->name, sizeof(cd->info.name), "%s%d", name, unit);
++ cd->info.ops = &pcd_dops;
++ cd->info.handle = cd;
++ cd->info.speed = 0;
++ cd->info.capacity = 1;
++ cd->info.mask = 0;
++ disk->major = major;
++ disk->first_minor = unit;
++ disk->minors = 1;
++ strcpy(disk->disk_name, cd->name); /* umm... */
++ disk->fops = &pcd_bdops;
++ disk->events = DISK_EVENT_MEDIA_CHANGE;
++ disk->event_flags = DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE;
++
++ if (!pi_init(cd->pi, autoprobe, port, mode, unit, protocol, delay,
++ pcd_buffer, PI_PCD, verbose, cd->name)) {
++ ret = -ENODEV;
++ goto out_free_disk;
++ }
++ ret = pcd_probe(cd, ms);
++ if (ret)
++ goto out_pi_release;
++
++ cd->present = 1;
++ pcd_probe_capabilities(cd);
++ register_cdrom(cd->disk, &cd->info);
++ add_disk(cd->disk);
++ return 0;
++
++out_pi_release:
++ pi_release(cd->pi);
++out_free_disk:
++ blk_cleanup_disk(cd->disk);
++out_free_tag_set:
++ blk_mq_free_tag_set(&cd->tag_set);
++ return ret;
++}
++
+ static int __init pcd_init(void)
+ {
+- struct pcd_unit *cd;
+- int unit;
++ int found = 0, unit;
+
+ if (disable)
+ return -EINVAL;
+
+- pcd_init_units();
++ if (register_blkdev(major, name))
++ return -EBUSY;
+
+- if (pcd_detect())
+- return -ENODEV;
++ pr_info("%s: %s version %s, major %d, nice %d\n",
++ name, name, PCD_VERSION, major, nice);
+
+- /* get the atapi capabilities page */
+- pcd_probe_capabilities();
++ par_drv = pi_register_driver(name);
++ if (!par_drv) {
++ pr_err("failed to register %s driver\n", name);
++ goto out_unregister_blkdev;
++ }
+
+- if (register_blkdev(major, name)) {
+- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+- if (!cd->disk)
+- continue;
++ for (unit = 0; unit < PCD_UNITS; unit++) {
++ if ((*drives[unit])[D_PRT])
++ pcd_drive_count++;
++ }
++
++ if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
++ if (!pcd_init_unit(pcd, 1, -1, -1, -1, -1, -1, -1))
++ found++;
++ } else {
++ for (unit = 0; unit < PCD_UNITS; unit++) {
++ struct pcd_unit *cd = &pcd[unit];
++ int *conf = *drives[unit];
+
+- blk_cleanup_queue(cd->disk->queue);
+- blk_mq_free_tag_set(&cd->tag_set);
+- put_disk(cd->disk);
++ if (!conf[D_PRT])
++ continue;
++ if (!pcd_init_unit(cd, 0, conf[D_PRT], conf[D_MOD],
++ conf[D_UNI], conf[D_PRO], conf[D_DLY],
++ conf[D_SLV]))
++ found++;
+ }
+- return -EBUSY;
+ }
+
+- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+- if (cd->present) {
+- register_cdrom(cd->disk, &cd->info);
+- cd->disk->private_data = cd;
+- add_disk(cd->disk);
+- }
++ if (!found) {
++ pr_info("%s: No CD-ROM drive found\n", name);
++ goto out_unregister_pi_driver;
+ }
+
+ return 0;
++
++out_unregister_pi_driver:
++ pi_unregister_driver(par_drv);
++out_unregister_blkdev:
++ unregister_blkdev(major, name);
++ return -ENODEV;
+ }
+
+ static void __exit pcd_exit(void)
+@@ -1044,20 +1016,18 @@ static void __exit pcd_exit(void)
+ int unit;
+
+ for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+- if (!cd->disk)
++ if (!cd->present)
+ continue;
+
+- if (cd->present) {
+- del_gendisk(cd->disk);
+- pi_release(cd->pi);
+- unregister_cdrom(&cd->info);
+- }
+- blk_cleanup_queue(cd->disk->queue);
++ del_gendisk(cd->disk);
++ pi_release(cd->pi);
++ unregister_cdrom(&cd->info);
++ blk_cleanup_disk(cd->disk);
++
+ blk_mq_free_tag_set(&cd->tag_set);
+- put_disk(cd->disk);
+ }
+- unregister_blkdev(major, name);
+ pi_unregister_driver(par_drv);
++ unregister_blkdev(major, name);
+ }
+
+ MODULE_LICENSE("GPL");
+diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
+index 0f26b2510a756..ca2ab977ef8ef 100644
+--- a/drivers/block/pktcdvd.c
++++ b/drivers/block/pktcdvd.c
+@@ -2407,6 +2407,8 @@ static blk_qc_t pkt_submit_bio(struct bio *bio)
+ struct bio *split;
+
+ blk_queue_split(&bio);
++ if (!bio)
++ return BLK_QC_T_NONE;
+
+ pd = bio->bi_bdev->bd_disk->queue->queuedata;
+ if (!pd) {
+diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
+index c7b19e128b03c..c79aa4d8ccf73 100644
+--- a/drivers/block/ps3vram.c
++++ b/drivers/block/ps3vram.c
+@@ -587,6 +587,8 @@ static blk_qc_t ps3vram_submit_bio(struct bio *bio)
+ dev_dbg(&dev->core, "%s\n", __func__);
+
+ blk_queue_split(&bio);
++ if (!bio)
++ return BLK_QC_T_NONE;
+
+ spin_lock_irq(&priv->lock);
+ busy = !bio_list_empty(&priv->list);
+diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
+index e65c9d706f6fb..fe8bdbf4616bc 100644
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -1335,14 +1335,30 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
+ /*
+ * Must be called after rbd_obj_calc_img_extents().
+ */
+-static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
++static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
+ {
+- if (!obj_req->num_img_extents ||
+- (rbd_obj_is_entire(obj_req) &&
+- !obj_req->img_request->snapc->num_snaps))
+- return false;
++ rbd_assert(obj_req->img_request->snapc);
+
+- return true;
++ if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
++ dout("%s %p objno %llu discard\n", __func__, obj_req,
++ obj_req->ex.oe_objno);
++ return;
++ }
++
++ if (!obj_req->num_img_extents) {
++ dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
++ obj_req->ex.oe_objno);
++ return;
++ }
++
++ if (rbd_obj_is_entire(obj_req) &&
++ !obj_req->img_request->snapc->num_snaps) {
++ dout("%s %p objno %llu entire\n", __func__, obj_req,
++ obj_req->ex.oe_objno);
++ return;
++ }
++
++ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
+ }
+
+ static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
+@@ -1443,6 +1459,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
+ static struct ceph_osd_request *
+ rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
+ {
++ rbd_assert(obj_req->img_request->snapc);
+ return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
+ num_ops);
+ }
+@@ -1579,15 +1596,18 @@ static void rbd_img_request_init(struct rbd_img_request *img_request,
+ mutex_init(&img_request->state_mutex);
+ }
+
++/*
++ * Only snap_id is captured here, for reads. For writes, snapshot
++ * context is captured in rbd_img_object_requests() after exclusive
++ * lock is ensured to be held.
++ */
+ static void rbd_img_capture_header(struct rbd_img_request *img_req)
+ {
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+
+ lockdep_assert_held(&rbd_dev->header_rwsem);
+
+- if (rbd_img_is_write(img_req))
+- img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+- else
++ if (!rbd_img_is_write(img_req))
+ img_req->snap_id = rbd_dev->spec->snap_id;
+
+ if (rbd_dev_parent_get(rbd_dev))
+@@ -2234,9 +2254,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
+ if (ret)
+ return ret;
+
+- if (rbd_obj_copyup_enabled(obj_req))
+- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
+-
+ obj_req->write_state = RBD_OBJ_WRITE_START;
+ return 0;
+ }
+@@ -2342,8 +2359,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
+ if (ret)
+ return ret;
+
+- if (rbd_obj_copyup_enabled(obj_req))
+- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
+ if (!obj_req->num_img_extents) {
+ obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
+ if (rbd_obj_is_entire(obj_req))
+@@ -3288,6 +3303,7 @@ again:
+ case RBD_OBJ_WRITE_START:
+ rbd_assert(!*result);
+
++ rbd_obj_set_copyup_enabled(obj_req);
+ if (rbd_obj_write_is_noop(obj_req))
+ return true;
+
+@@ -3474,9 +3490,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
+
+ static void rbd_img_object_requests(struct rbd_img_request *img_req)
+ {
++ struct rbd_device *rbd_dev = img_req->rbd_dev;
+ struct rbd_obj_request *obj_req;
+
+ rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
++ rbd_assert(!need_exclusive_lock(img_req) ||
++ __rbd_is_lock_owner(rbd_dev));
++
++ if (rbd_img_is_write(img_req)) {
++ rbd_assert(!img_req->snapc);
++ down_read(&rbd_dev->header_rwsem);
++ img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
++ up_read(&rbd_dev->header_rwsem);
++ }
+
+ for_each_obj_request(img_req, obj_req) {
+ int result = 0;
+@@ -3494,7 +3520,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
+
+ static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
+ {
+- struct rbd_device *rbd_dev = img_req->rbd_dev;
+ int ret;
+
+ again:
+@@ -3515,9 +3540,6 @@ again:
+ if (*result)
+ return true;
+
+- rbd_assert(!need_exclusive_lock(img_req) ||
+- __rbd_is_lock_owner(rbd_dev));
+-
+ rbd_img_object_requests(img_req);
+ if (!img_req->pending.num_pending) {
+ *result = img_req->pending.result;
+@@ -3655,7 +3677,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
+ ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+ RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
+ RBD_LOCK_TAG, "", 0);
+- if (ret)
++ if (ret && ret != -EEXIST)
+ return ret;
+
+ __rbd_lock(rbd_dev, cookie);
+@@ -3829,51 +3851,82 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
+ list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
+ }
+
+-static int get_lock_owner_info(struct rbd_device *rbd_dev,
+- struct ceph_locker **lockers, u32 *num_lockers)
++static bool locker_equal(const struct ceph_locker *lhs,
++ const struct ceph_locker *rhs)
++{
++ return lhs->id.name.type == rhs->id.name.type &&
++ lhs->id.name.num == rhs->id.name.num &&
++ !strcmp(lhs->id.cookie, rhs->id.cookie) &&
++ ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
++}
++
++static void free_locker(struct ceph_locker *locker)
++{
++ if (locker)
++ ceph_free_lockers(locker, 1);
++}
++
++static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
+ {
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
++ struct ceph_locker *lockers;
++ u32 num_lockers;
+ u8 lock_type;
+ char *lock_tag;
++ u64 handle;
+ int ret;
+
+- dout("%s rbd_dev %p\n", __func__, rbd_dev);
+-
+ ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, RBD_LOCK_NAME,
+- &lock_type, &lock_tag, lockers, num_lockers);
+- if (ret)
+- return ret;
++ &lock_type, &lock_tag, &lockers, &num_lockers);
++ if (ret) {
++ rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
++ return ERR_PTR(ret);
++ }
+
+- if (*num_lockers == 0) {
++ if (num_lockers == 0) {
+ dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
++ lockers = NULL;
+ goto out;
+ }
+
+ if (strcmp(lock_tag, RBD_LOCK_TAG)) {
+ rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
+ lock_tag);
+- ret = -EBUSY;
+- goto out;
++ goto err_busy;
+ }
+
+- if (lock_type == CEPH_CLS_LOCK_SHARED) {
+- rbd_warn(rbd_dev, "shared lock type detected");
+- ret = -EBUSY;
+- goto out;
++ if (lock_type != CEPH_CLS_LOCK_EXCLUSIVE) {
++ rbd_warn(rbd_dev, "incompatible lock type detected");
++ goto err_busy;
+ }
+
+- if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
+- strlen(RBD_LOCK_COOKIE_PREFIX))) {
++ WARN_ON(num_lockers != 1);
++ ret = sscanf(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu",
++ &handle);
++ if (ret != 1) {
+ rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
+- (*lockers)[0].id.cookie);
+- ret = -EBUSY;
+- goto out;
++ lockers[0].id.cookie);
++ goto err_busy;
+ }
++ if (ceph_addr_is_blank(&lockers[0].info.addr)) {
++ rbd_warn(rbd_dev, "locker has a blank address");
++ goto err_busy;
++ }
++
++ dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n",
++ __func__, rbd_dev, ENTITY_NAME(lockers[0].id.name),
++ &lockers[0].info.addr.in_addr,
++ le32_to_cpu(lockers[0].info.addr.nonce), handle);
+
+ out:
+ kfree(lock_tag);
+- return ret;
++ return lockers;
++
++err_busy:
++ kfree(lock_tag);
++ ceph_free_lockers(lockers, num_lockers);
++ return ERR_PTR(-EBUSY);
+ }
+
+ static int find_watcher(struct rbd_device *rbd_dev,
+@@ -3889,8 +3942,10 @@ static int find_watcher(struct rbd_device *rbd_dev,
+ ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, &watchers,
+ &num_watchers);
+- if (ret)
++ if (ret) {
++ rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
+ return ret;
++ }
+
+ sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
+ for (i = 0; i < num_watchers; i++) {
+@@ -3927,51 +3982,72 @@ out:
+ static int rbd_try_lock(struct rbd_device *rbd_dev)
+ {
+ struct ceph_client *client = rbd_dev->rbd_client->client;
+- struct ceph_locker *lockers;
+- u32 num_lockers;
++ struct ceph_locker *locker, *refreshed_locker;
+ int ret;
+
+ for (;;) {
++ locker = refreshed_locker = NULL;
++
+ ret = rbd_lock(rbd_dev);
+- if (ret != -EBUSY)
+- return ret;
++ if (!ret)
++ goto out;
++ if (ret != -EBUSY) {
++ rbd_warn(rbd_dev, "failed to lock header: %d", ret);
++ goto out;
++ }
+
+ /* determine if the current lock holder is still alive */
+- ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
+- if (ret)
+- return ret;
+-
+- if (num_lockers == 0)
++ locker = get_lock_owner_info(rbd_dev);
++ if (IS_ERR(locker)) {
++ ret = PTR_ERR(locker);
++ locker = NULL;
++ goto out;
++ }
++ if (!locker)
+ goto again;
+
+- ret = find_watcher(rbd_dev, lockers);
++ ret = find_watcher(rbd_dev, locker);
+ if (ret)
+ goto out; /* request lock or error */
+
++ refreshed_locker = get_lock_owner_info(rbd_dev);
++ if (IS_ERR(refreshed_locker)) {
++ ret = PTR_ERR(refreshed_locker);
++ refreshed_locker = NULL;
++ goto out;
++ }
++ if (!refreshed_locker ||
++ !locker_equal(locker, refreshed_locker))
++ goto again;
++
+ rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
+- ENTITY_NAME(lockers[0].id.name));
++ ENTITY_NAME(locker->id.name));
+
+ ret = ceph_monc_blocklist_add(&client->monc,
+- &lockers[0].info.addr);
++ &locker->info.addr);
+ if (ret) {
+- rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
+- ENTITY_NAME(lockers[0].id.name), ret);
++ rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
++ ENTITY_NAME(locker->id.name), ret);
+ goto out;
+ }
+
+ ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, RBD_LOCK_NAME,
+- lockers[0].id.cookie,
+- &lockers[0].id.name);
+- if (ret && ret != -ENOENT)
++ locker->id.cookie, &locker->id.name);
++ if (ret && ret != -ENOENT) {
++ rbd_warn(rbd_dev, "failed to break header lock: %d",
++ ret);
+ goto out;
++ }
+
+ again:
+- ceph_free_lockers(lockers, num_lockers);
++ free_locker(refreshed_locker);
++ free_locker(locker);
+ }
+
+ out:
+- ceph_free_lockers(lockers, num_lockers);
++ free_locker(refreshed_locker);
++ free_locker(locker);
+ return ret;
+ }
+
+@@ -3979,6 +4055,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
+ {
+ int ret;
+
++ ret = rbd_dev_refresh(rbd_dev);
++ if (ret)
++ return ret;
++
+ if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
+ ret = rbd_object_map_open(rbd_dev);
+ if (ret)
+@@ -4017,11 +4097,8 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
+
+ ret = rbd_try_lock(rbd_dev);
+ if (ret < 0) {
+- rbd_warn(rbd_dev, "failed to lock header: %d", ret);
+- if (ret == -EBLOCKLISTED)
+- goto out;
+-
+- ret = 1; /* request lock anyway */
++ rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
++ goto out;
+ }
+ if (ret > 0) {
+ up_write(&rbd_dev->lock_rwsem);
+@@ -5296,8 +5373,7 @@ static void rbd_dev_release(struct device *dev)
+ module_put(THIS_MODULE);
+ }
+
+-static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
+- struct rbd_spec *spec)
++static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
+ {
+ struct rbd_device *rbd_dev;
+
+@@ -5342,9 +5418,6 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
+ rbd_dev->dev.parent = &rbd_root_dev;
+ device_initialize(&rbd_dev->dev);
+
+- rbd_dev->rbd_client = rbdc;
+- rbd_dev->spec = spec;
+-
+ return rbd_dev;
+ }
+
+@@ -5357,12 +5430,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
+ {
+ struct rbd_device *rbd_dev;
+
+- rbd_dev = __rbd_dev_create(rbdc, spec);
++ rbd_dev = __rbd_dev_create(spec);
+ if (!rbd_dev)
+ return NULL;
+
+- rbd_dev->opts = opts;
+-
+ /* get an id and fill in device name */
+ rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
+ minor_to_rbd_dev_id(1 << MINORBITS),
+@@ -5379,6 +5450,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
+ /* we have a ref from do_rbd_add() */
+ __module_get(THIS_MODULE);
+
++ rbd_dev->rbd_client = rbdc;
++ rbd_dev->spec = spec;
++ rbd_dev->opts = opts;
++
+ dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
+ return rbd_dev;
+
+@@ -6559,12 +6634,11 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
+ cancel_delayed_work_sync(&rbd_dev->lock_dwork);
+ if (!ret)
+ ret = -ETIMEDOUT;
+- }
+
+- if (ret) {
+- rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
+- return ret;
++ rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
+ }
++ if (ret)
++ return ret;
+
+ /*
+ * The lock may have been released by now, unless automatic lock
+@@ -6739,7 +6813,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
+ goto out_err;
+ }
+
+- parent = __rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec);
++ parent = __rbd_dev_create(rbd_dev->parent_spec);
+ if (!parent) {
+ ret = -ENOMEM;
+ goto out_err;
+@@ -6749,8 +6823,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
+ * Images related by parent/child relationships always share
+ * rbd_client and spec/parent_spec, so bump their refcounts.
+ */
+- __rbd_get_client(rbd_dev->rbd_client);
+- rbd_spec_get(rbd_dev->parent_spec);
++ parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client);
++ parent->spec = rbd_spec_get(rbd_dev->parent_spec);
+
+ __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
+
+@@ -7182,7 +7256,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
+ * IO to complete/fail.
+ */
+ blk_mq_freeze_queue(rbd_dev->disk->queue);
+- blk_set_queue_dying(rbd_dev->disk->queue);
++ blk_mark_disk_dead(rbd_dev->disk);
+ }
+
+ del_gendisk(rbd_dev->disk);
+diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
+index c1bc5c0fef71d..e0fe0a9aa3708 100644
+--- a/drivers/block/rnbd/rnbd-proto.h
++++ b/drivers/block/rnbd/rnbd-proto.h
+@@ -241,7 +241,7 @@ static inline u32 rnbd_to_bio_flags(u32 rnbd_opf)
+ bio_opf = REQ_OP_WRITE;
+ break;
+ case RNBD_OP_FLUSH:
+- bio_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
++ bio_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+ break;
+ case RNBD_OP_DISCARD:
+ bio_opf = REQ_OP_DISCARD;
+diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
+index aafecfe970558..1896cde8135e4 100644
+--- a/drivers/block/rnbd/rnbd-srv.c
++++ b/drivers/block/rnbd/rnbd-srv.c
+@@ -266,12 +266,12 @@ out:
+ static int create_sess(struct rtrs_srv *rtrs)
+ {
+ struct rnbd_srv_session *srv_sess;
+- char sessname[NAME_MAX];
++ char pathname[NAME_MAX];
+ int err;
+
+- err = rtrs_srv_get_sess_name(rtrs, sessname, sizeof(sessname));
++ err = rtrs_srv_get_path_name(rtrs, pathname, sizeof(pathname));
+ if (err) {
+- pr_err("rtrs_srv_get_sess_name(%s): %d\n", sessname, err);
++ pr_err("rtrs_srv_get_path_name(%s): %d\n", pathname, err);
+
+ return err;
+ }
+@@ -284,8 +284,8 @@ static int create_sess(struct rtrs_srv *rtrs)
+ offsetof(struct rnbd_dev_blk_io, bio),
+ BIOSET_NEED_BVECS);
+ if (err) {
+- pr_err("Allocating srv_session for session %s failed\n",
+- sessname);
++ pr_err("Allocating srv_session for path %s failed\n",
++ pathname);
+ kfree(srv_sess);
+ return err;
+ }
+@@ -298,7 +298,7 @@ static int create_sess(struct rtrs_srv *rtrs)
+ mutex_unlock(&sess_lock);
+
+ srv_sess->rtrs = rtrs;
+- strscpy(srv_sess->sessname, sessname, sizeof(srv_sess->sessname));
++ strscpy(srv_sess->sessname, pathname, sizeof(srv_sess->sessname));
+
+ rtrs_srv_set_sess_priv(rtrs, srv_sess);
+
+@@ -333,10 +333,11 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
+ {
+ struct rnbd_srv_session *sess = sess_dev->sess;
+
+- sess_dev->keep_id = true;
+ /* It is already started to close by client's close message. */
+ if (!mutex_trylock(&sess->lock))
+ return;
++
++ sess_dev->keep_id = true;
+ /* first remove sysfs itself to avoid deadlock */
+ sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
+ rnbd_srv_destroy_dev_session_sysfs(sess_dev);
+diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
+index 1cc40b0ea7619..6b253d99bc48d 100644
+--- a/drivers/block/rsxx/dev.c
++++ b/drivers/block/rsxx/dev.c
+@@ -127,6 +127,8 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio)
+ blk_status_t st = BLK_STS_IOERR;
+
+ blk_queue_split(&bio);
++ if (!bio)
++ return BLK_QC_T_NONE;
+
+ might_sleep();
+
+diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
+index 4d4bb810c2aea..656d99faf40a2 100644
+--- a/drivers/block/sunvdc.c
++++ b/drivers/block/sunvdc.c
+@@ -964,6 +964,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+ print_version();
+
+ hp = mdesc_grab();
++ if (!hp)
++ return -ENODEV;
+
+ err = -ENODEV;
+ if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
+diff --git a/drivers/block/swim.c b/drivers/block/swim.c
+index 7ccc8d2a41bc6..3911d0833e1b9 100644
+--- a/drivers/block/swim.c
++++ b/drivers/block/swim.c
+@@ -16,6 +16,7 @@
+ #include <linux/fd.h>
+ #include <linux/slab.h>
+ #include <linux/blk-mq.h>
++#include <linux/major.h>
+ #include <linux/mutex.h>
+ #include <linux/hdreg.h>
+ #include <linux/kernel.h>
+diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
+deleted file mode 100644
+index 420cd952ddc4b..0000000000000
+--- a/drivers/block/sx8.c
++++ /dev/null
+@@ -1,1575 +0,0 @@
+-/*
+- * sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
+- *
+- * Copyright 2004-2005 Red Hat, Inc.
+- *
+- * Author/maintainer: Jeff Garzik <jgarzik@pobox.com>
+- *
+- * This file is subject to the terms and conditions of the GNU General Public
+- * License. See the file "COPYING" in the main directory of this archive
+- * for more details.
+- */
+-
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/init.h>
+-#include <linux/pci.h>
+-#include <linux/slab.h>
+-#include <linux/spinlock.h>
+-#include <linux/blk-mq.h>
+-#include <linux/sched.h>
+-#include <linux/interrupt.h>
+-#include <linux/compiler.h>
+-#include <linux/workqueue.h>
+-#include <linux/bitops.h>
+-#include <linux/delay.h>
+-#include <linux/ktime.h>
+-#include <linux/hdreg.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/completion.h>
+-#include <linux/scatterlist.h>
+-#include <asm/io.h>
+-#include <linux/uaccess.h>
+-
+-#if 0
+-#define CARM_DEBUG
+-#define CARM_VERBOSE_DEBUG
+-#else
+-#undef CARM_DEBUG
+-#undef CARM_VERBOSE_DEBUG
+-#endif
+-#undef CARM_NDEBUG
+-
+-#define DRV_NAME "sx8"
+-#define DRV_VERSION "1.0"
+-#define PFX DRV_NAME ": "
+-
+-MODULE_AUTHOR("Jeff Garzik");
+-MODULE_LICENSE("GPL");
+-MODULE_DESCRIPTION("Promise SATA SX8 block driver");
+-MODULE_VERSION(DRV_VERSION);
+-
+-/*
+- * SX8 hardware has a single message queue for all ATA ports.
+- * When this driver was written, the hardware (firmware?) would
+- * corrupt data eventually, if more than one request was outstanding.
+- * As one can imagine, having 8 ports bottlenecking on a single
+- * command hurts performance.
+- *
+- * Based on user reports, later versions of the hardware (firmware?)
+- * seem to be able to survive with more than one command queued.
+- *
+- * Therefore, we default to the safe option -- 1 command -- but
+- * allow the user to increase this.
+- *
+- * SX8 should be able to support up to ~60 queued commands (CARM_MAX_REQ),
+- * but problems seem to occur when you exceed ~30, even on newer hardware.
+- */
+-static int max_queue = 1;
+-module_param(max_queue, int, 0444);
+-MODULE_PARM_DESC(max_queue, "Maximum number of queued commands. (min==1, max==30, safe==1)");
+-
+-
+-#define NEXT_RESP(idx) ((idx + 1) % RMSG_Q_LEN)
+-
+-/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
+-#define TAG_ENCODE(tag) (((tag) << 16) | 0xf)
+-#define TAG_DECODE(tag) (((tag) >> 16) & 0x1f)
+-#define TAG_VALID(tag) ((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))
+-
+-/* note: prints function name for you */
+-#ifdef CARM_DEBUG
+-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
+-#ifdef CARM_VERBOSE_DEBUG
+-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
+-#else
+-#define VPRINTK(fmt, args...)
+-#endif /* CARM_VERBOSE_DEBUG */
+-#else
+-#define DPRINTK(fmt, args...)
+-#define VPRINTK(fmt, args...)
+-#endif /* CARM_DEBUG */
+-
+-#ifdef CARM_NDEBUG
+-#define assert(expr)
+-#else
+-#define assert(expr) \
+- if(unlikely(!(expr))) { \
+- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+- #expr, __FILE__, __func__, __LINE__); \
+- }
+-#endif
+-
+-/* defines only for the constants which don't work well as enums */
+-struct carm_host;
+-
+-enum {
+- /* adapter-wide limits */
+- CARM_MAX_PORTS = 8,
+- CARM_SHM_SIZE = (4096 << 7),
+- CARM_MINORS_PER_MAJOR = 256 / CARM_MAX_PORTS,
+- CARM_MAX_WAIT_Q = CARM_MAX_PORTS + 1,
+-
+- /* command message queue limits */
+- CARM_MAX_REQ = 64, /* max command msgs per host */
+- CARM_MSG_LOW_WATER = (CARM_MAX_REQ / 4), /* refill mark */
+-
+- /* S/G limits, host-wide and per-request */
+- CARM_MAX_REQ_SG = 32, /* max s/g entries per request */
+- CARM_MAX_HOST_SG = 600, /* max s/g entries per host */
+- CARM_SG_LOW_WATER = (CARM_MAX_HOST_SG / 4), /* re-fill mark */
+-
+- /* hardware registers */
+- CARM_IHQP = 0x1c,
+- CARM_INT_STAT = 0x10, /* interrupt status */
+- CARM_INT_MASK = 0x14, /* interrupt mask */
+- CARM_HMUC = 0x18, /* host message unit control */
+- RBUF_ADDR_LO = 0x20, /* response msg DMA buf low 32 bits */
+- RBUF_ADDR_HI = 0x24, /* response msg DMA buf high 32 bits */
+- RBUF_BYTE_SZ = 0x28,
+- CARM_RESP_IDX = 0x2c,
+- CARM_CMS0 = 0x30, /* command message size reg 0 */
+- CARM_LMUC = 0x48,
+- CARM_HMPHA = 0x6c,
+- CARM_INITC = 0xb5,
+-
+- /* bits in CARM_INT_{STAT,MASK} */
+- INT_RESERVED = 0xfffffff0,
+- INT_WATCHDOG = (1 << 3), /* watchdog timer */
+- INT_Q_OVERFLOW = (1 << 2), /* cmd msg q overflow */
+- INT_Q_AVAILABLE = (1 << 1), /* cmd msg q has free space */
+- INT_RESPONSE = (1 << 0), /* response msg available */
+- INT_ACK_MASK = INT_WATCHDOG | INT_Q_OVERFLOW,
+- INT_DEF_MASK = INT_RESERVED | INT_Q_OVERFLOW |
+- INT_RESPONSE,
+-
+- /* command messages, and related register bits */
+- CARM_HAVE_RESP = 0x01,
+- CARM_MSG_READ = 1,
+- CARM_MSG_WRITE = 2,
+- CARM_MSG_VERIFY = 3,
+- CARM_MSG_GET_CAPACITY = 4,
+- CARM_MSG_FLUSH = 5,
+- CARM_MSG_IOCTL = 6,
+- CARM_MSG_ARRAY = 8,
+- CARM_MSG_MISC = 9,
+- CARM_CME = (1 << 2),
+- CARM_RME = (1 << 1),
+- CARM_WZBC = (1 << 0),
+- CARM_RMI = (1 << 0),
+- CARM_Q_FULL = (1 << 3),
+- CARM_MSG_SIZE = 288,
+- CARM_Q_LEN = 48,
+-
+- /* CARM_MSG_IOCTL messages */
+- CARM_IOC_SCAN_CHAN = 5, /* scan channels for devices */
+- CARM_IOC_GET_TCQ = 13, /* get tcq/ncq depth */
+- CARM_IOC_SET_TCQ = 14, /* set tcq/ncq depth */
+-
+- IOC_SCAN_CHAN_NODEV = 0x1f,
+- IOC_SCAN_CHAN_OFFSET = 0x40,
+-
+- /* CARM_MSG_ARRAY messages */
+- CARM_ARRAY_INFO = 0,
+-
+- ARRAY_NO_EXIST = (1 << 31),
+-
+- /* response messages */
+- RMSG_SZ = 8, /* sizeof(struct carm_response) */
+- RMSG_Q_LEN = 48, /* resp. msg list length */
+- RMSG_OK = 1, /* bit indicating msg was successful */
+- /* length of entire resp. msg buffer */
+- RBUF_LEN = RMSG_SZ * RMSG_Q_LEN,
+-
+- PDC_SHM_SIZE = (4096 << 7), /* length of entire h/w buffer */
+-
+- /* CARM_MSG_MISC messages */
+- MISC_GET_FW_VER = 2,
+- MISC_ALLOC_MEM = 3,
+- MISC_SET_TIME = 5,
+-
+- /* MISC_GET_FW_VER feature bits */
+- FW_VER_4PORT = (1 << 2), /* 1=4 ports, 0=8 ports */
+- FW_VER_NON_RAID = (1 << 1), /* 1=non-RAID firmware, 0=RAID */
+- FW_VER_ZCR = (1 << 0), /* zero channel RAID (whatever that is) */
+-
+- /* carm_host flags */
+- FL_NON_RAID = FW_VER_NON_RAID,
+- FL_4PORT = FW_VER_4PORT,
+- FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT),
+- FL_DYN_MAJOR = (1 << 17),
+-};
+-
+-enum {
+- CARM_SG_BOUNDARY = 0xffffUL, /* s/g segment boundary */
+-};
+-
+-enum scatter_gather_types {
+- SGT_32BIT = 0,
+- SGT_64BIT = 1,
+-};
+-
+-enum host_states {
+- HST_INVALID, /* invalid state; never used */
+- HST_ALLOC_BUF, /* setting up master SHM area */
+- HST_ERROR, /* we never leave here */
+- HST_PORT_SCAN, /* start dev scan */
+- HST_DEV_SCAN_START, /* start per-device probe */
+- HST_DEV_SCAN, /* continue per-device probe */
+- HST_DEV_ACTIVATE, /* activate devices we found */
+- HST_PROBE_FINISHED, /* probe is complete */
+- HST_PROBE_START, /* initiate probe */
+- HST_SYNC_TIME, /* tell firmware what time it is */
+- HST_GET_FW_VER, /* get firmware version, adapter port cnt */
+-};
+-
+-#ifdef CARM_DEBUG
+-static const char *state_name[] = {
+- "HST_INVALID",
+- "HST_ALLOC_BUF",
+- "HST_ERROR",
+- "HST_PORT_SCAN",
+- "HST_DEV_SCAN_START",
+- "HST_DEV_SCAN",
+- "HST_DEV_ACTIVATE",
+- "HST_PROBE_FINISHED",
+- "HST_PROBE_START",
+- "HST_SYNC_TIME",
+- "HST_GET_FW_VER",
+-};
+-#endif
+-
+-struct carm_port {
+- unsigned int port_no;
+- struct gendisk *disk;
+- struct carm_host *host;
+-
+- /* attached device characteristics */
+- u64 capacity;
+- char name[41];
+- u16 dev_geom_head;
+- u16 dev_geom_sect;
+- u16 dev_geom_cyl;
+-};
+-
+-struct carm_request {
+- int n_elem;
+- unsigned int msg_type;
+- unsigned int msg_subtype;
+- unsigned int msg_bucket;
+- struct scatterlist sg[CARM_MAX_REQ_SG];
+-};
+-
+-struct carm_host {
+- unsigned long flags;
+- void __iomem *mmio;
+- void *shm;
+- dma_addr_t shm_dma;
+-
+- int major;
+- int id;
+- char name[32];
+-
+- spinlock_t lock;
+- struct pci_dev *pdev;
+- unsigned int state;
+- u32 fw_ver;
+-
+- struct blk_mq_tag_set tag_set;
+- struct request_queue *oob_q;
+- unsigned int n_oob;
+-
+- unsigned int hw_sg_used;
+-
+- unsigned int resp_idx;
+-
+- unsigned int wait_q_prod;
+- unsigned int wait_q_cons;
+- struct request_queue *wait_q[CARM_MAX_WAIT_Q];
+-
+- void *msg_base;
+- dma_addr_t msg_dma;
+-
+- int cur_scan_dev;
+- unsigned long dev_active;
+- unsigned long dev_present;
+- struct carm_port port[CARM_MAX_PORTS];
+-
+- struct work_struct fsm_task;
+-
+- struct completion probe_comp;
+-};
+-
+-struct carm_response {
+- __le32 ret_handle;
+- __le32 status;
+-} __attribute__((packed));
+-
+-struct carm_msg_sg {
+- __le32 start;
+- __le32 len;
+-} __attribute__((packed));
+-
+-struct carm_msg_rw {
+- u8 type;
+- u8 id;
+- u8 sg_count;
+- u8 sg_type;
+- __le32 handle;
+- __le32 lba;
+- __le16 lba_count;
+- __le16 lba_high;
+- struct carm_msg_sg sg[32];
+-} __attribute__((packed));
+-
+-struct carm_msg_allocbuf {
+- u8 type;
+- u8 subtype;
+- u8 n_sg;
+- u8 sg_type;
+- __le32 handle;
+- __le32 addr;
+- __le32 len;
+- __le32 evt_pool;
+- __le32 n_evt;
+- __le32 rbuf_pool;
+- __le32 n_rbuf;
+- __le32 msg_pool;
+- __le32 n_msg;
+- struct carm_msg_sg sg[8];
+-} __attribute__((packed));
+-
+-struct carm_msg_ioctl {
+- u8 type;
+- u8 subtype;
+- u8 array_id;
+- u8 reserved1;
+- __le32 handle;
+- __le32 data_addr;
+- u32 reserved2;
+-} __attribute__((packed));
+-
+-struct carm_msg_sync_time {
+- u8 type;
+- u8 subtype;
+- u16 reserved1;
+- __le32 handle;
+- u32 reserved2;
+- __le32 timestamp;
+-} __attribute__((packed));
+-
+-struct carm_msg_get_fw_ver {
+- u8 type;
+- u8 subtype;
+- u16 reserved1;
+- __le32 handle;
+- __le32 data_addr;
+- u32 reserved2;
+-} __attribute__((packed));
+-
+-struct carm_fw_ver {
+- __le32 version;
+- u8 features;
+- u8 reserved1;
+- u16 reserved2;
+-} __attribute__((packed));
+-
+-struct carm_array_info {
+- __le32 size;
+-
+- __le16 size_hi;
+- __le16 stripe_size;
+-
+- __le32 mode;
+-
+- __le16 stripe_blk_sz;
+- __le16 reserved1;
+-
+- __le16 cyl;
+- __le16 head;
+-
+- __le16 sect;
+- u8 array_id;
+- u8 reserved2;
+-
+- char name[40];
+-
+- __le32 array_status;
+-
+- /* device list continues beyond this point? */
+-} __attribute__((packed));
+-
+-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
+-static void carm_remove_one (struct pci_dev *pdev);
+-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+-
+-static const struct pci_device_id carm_pci_tbl[] = {
+- { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
+- { PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
+- { } /* terminate list */
+-};
+-MODULE_DEVICE_TABLE(pci, carm_pci_tbl);
+-
+-static struct pci_driver carm_driver = {
+- .name = DRV_NAME,
+- .id_table = carm_pci_tbl,
+- .probe = carm_init_one,
+- .remove = carm_remove_one,
+-};
+-
+-static const struct block_device_operations carm_bd_ops = {
+- .owner = THIS_MODULE,
+- .getgeo = carm_bdev_getgeo,
+-};
+-
+-static unsigned int carm_host_id;
+-static unsigned long carm_major_alloc;
+-
+-
+-
+-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+-{
+- struct carm_port *port = bdev->bd_disk->private_data;
+-
+- geo->heads = (u8) port->dev_geom_head;
+- geo->sectors = (u8) port->dev_geom_sect;
+- geo->cylinders = port->dev_geom_cyl;
+- return 0;
+-}
+-
+-static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
+-
+-static inline int carm_lookup_bucket(u32 msg_size)
+-{
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
+- if (msg_size <= msg_sizes[i])
+- return i;
+-
+- return -ENOENT;
+-}
+-
+-static void carm_init_buckets(void __iomem *mmio)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
+- writel(msg_sizes[i], mmio + CARM_CMS0 + (4 * i));
+-}
+-
+-static inline void *carm_ref_msg(struct carm_host *host,
+- unsigned int msg_idx)
+-{
+- return host->msg_base + (msg_idx * CARM_MSG_SIZE);
+-}
+-
+-static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
+- unsigned int msg_idx)
+-{
+- return host->msg_dma + (msg_idx * CARM_MSG_SIZE);
+-}
+-
+-static int carm_send_msg(struct carm_host *host,
+- struct carm_request *crq, unsigned tag)
+-{
+- void __iomem *mmio = host->mmio;
+- u32 msg = (u32) carm_ref_msg_dma(host, tag);
+- u32 cm_bucket = crq->msg_bucket;
+- u32 tmp;
+- int rc = 0;
+-
+- VPRINTK("ENTER\n");
+-
+- tmp = readl(mmio + CARM_HMUC);
+- if (tmp & CARM_Q_FULL) {
+-#if 0
+- tmp = readl(mmio + CARM_INT_MASK);
+- tmp |= INT_Q_AVAILABLE;
+- writel(tmp, mmio + CARM_INT_MASK);
+- readl(mmio + CARM_INT_MASK); /* flush */
+-#endif
+- DPRINTK("host msg queue full\n");
+- rc = -EBUSY;
+- } else {
+- writel(msg | (cm_bucket << 1), mmio + CARM_IHQP);
+- readl(mmio + CARM_IHQP); /* flush */
+- }
+-
+- return rc;
+-}
+-
+-static int carm_array_info (struct carm_host *host, unsigned int array_idx)
+-{
+- struct carm_msg_ioctl *ioc;
+- u32 msg_data;
+- dma_addr_t msg_dma;
+- struct carm_request *crq;
+- struct request *rq;
+- int rc;
+-
+- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
+- if (IS_ERR(rq)) {
+- rc = -ENOMEM;
+- goto err_out;
+- }
+- crq = blk_mq_rq_to_pdu(rq);
+-
+- ioc = carm_ref_msg(host, rq->tag);
+- msg_dma = carm_ref_msg_dma(host, rq->tag);
+- msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
+-
+- crq->msg_type = CARM_MSG_ARRAY;
+- crq->msg_subtype = CARM_ARRAY_INFO;
+- rc = carm_lookup_bucket(sizeof(struct carm_msg_ioctl) +
+- sizeof(struct carm_array_info));
+- BUG_ON(rc < 0);
+- crq->msg_bucket = (u32) rc;
+-
+- memset(ioc, 0, sizeof(*ioc));
+- ioc->type = CARM_MSG_ARRAY;
+- ioc->subtype = CARM_ARRAY_INFO;
+- ioc->array_id = (u8) array_idx;
+- ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
+- ioc->data_addr = cpu_to_le32(msg_data);
+-
+- spin_lock_irq(&host->lock);
+- assert(host->state == HST_DEV_SCAN_START ||
+- host->state == HST_DEV_SCAN);
+- spin_unlock_irq(&host->lock);
+-
+- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
+- blk_execute_rq_nowait(NULL, rq, true, NULL);
+-
+- return 0;
+-
+-err_out:
+- spin_lock_irq(&host->lock);
+- host->state = HST_ERROR;
+- spin_unlock_irq(&host->lock);
+- return rc;
+-}
+-
+-typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *);
+-
+-static int carm_send_special (struct carm_host *host, carm_sspc_t func)
+-{
+- struct request *rq;
+- struct carm_request *crq;
+- struct carm_msg_ioctl *ioc;
+- void *mem;
+- unsigned int msg_size;
+- int rc;
+-
+- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
+- if (IS_ERR(rq))
+- return -ENOMEM;
+- crq = blk_mq_rq_to_pdu(rq);
+-
+- mem = carm_ref_msg(host, rq->tag);
+-
+- msg_size = func(host, rq->tag, mem);
+-
+- ioc = mem;
+- crq->msg_type = ioc->type;
+- crq->msg_subtype = ioc->subtype;
+- rc = carm_lookup_bucket(msg_size);
+- BUG_ON(rc < 0);
+- crq->msg_bucket = (u32) rc;
+-
+- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
+- blk_execute_rq_nowait(NULL, rq, true, NULL);
+-
+- return 0;
+-}
+-
+-static unsigned int carm_fill_sync_time(struct carm_host *host,
+- unsigned int idx, void *mem)
+-{
+- struct carm_msg_sync_time *st = mem;
+-
+- time64_t tv = ktime_get_real_seconds();
+-
+- memset(st, 0, sizeof(*st));
+- st->type = CARM_MSG_MISC;
+- st->subtype = MISC_SET_TIME;
+- st->handle = cpu_to_le32(TAG_ENCODE(idx));
+- st->timestamp = cpu_to_le32(tv);
+-
+- return sizeof(struct carm_msg_sync_time);
+-}
+-
+-static unsigned int carm_fill_alloc_buf(struct carm_host *host,
+- unsigned int idx, void *mem)
+-{
+- struct carm_msg_allocbuf *ab = mem;
+-
+- memset(ab, 0, sizeof(*ab));
+- ab->type = CARM_MSG_MISC;
+- ab->subtype = MISC_ALLOC_MEM;
+- ab->handle = cpu_to_le32(TAG_ENCODE(idx));
+- ab->n_sg = 1;
+- ab->sg_type = SGT_32BIT;
+- ab->addr = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
+- ab->len = cpu_to_le32(PDC_SHM_SIZE >> 1);
+- ab->evt_pool = cpu_to_le32(host->shm_dma + (16 * 1024));
+- ab->n_evt = cpu_to_le32(1024);
+- ab->rbuf_pool = cpu_to_le32(host->shm_dma);
+- ab->n_rbuf = cpu_to_le32(RMSG_Q_LEN);
+- ab->msg_pool = cpu_to_le32(host->shm_dma + RBUF_LEN);
+- ab->n_msg = cpu_to_le32(CARM_Q_LEN);
+- ab->sg[0].start = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
+- ab->sg[0].len = cpu_to_le32(65536);
+-
+- return sizeof(struct carm_msg_allocbuf);
+-}
+-
+-static unsigned int carm_fill_scan_channels(struct carm_host *host,
+- unsigned int idx, void *mem)
+-{
+- struct carm_msg_ioctl *ioc = mem;
+- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +
+- IOC_SCAN_CHAN_OFFSET);
+-
+- memset(ioc, 0, sizeof(*ioc));
+- ioc->type = CARM_MSG_IOCTL;
+- ioc->subtype = CARM_IOC_SCAN_CHAN;
+- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
+- ioc->data_addr = cpu_to_le32(msg_data);
+-
+- /* fill output data area with "no device" default values */
+- mem += IOC_SCAN_CHAN_OFFSET;
+- memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);
+-
+- return IOC_SCAN_CHAN_OFFSET + CARM_MAX_PORTS;
+-}
+-
+-static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
+- unsigned int idx, void *mem)
+-{
+- struct carm_msg_get_fw_ver *ioc = mem;
+- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));
+-
+- memset(ioc, 0, sizeof(*ioc));
+- ioc->type = CARM_MSG_MISC;
+- ioc->subtype = MISC_GET_FW_VER;
+- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
+- ioc->data_addr = cpu_to_le32(msg_data);
+-
+- return sizeof(struct carm_msg_get_fw_ver) +
+- sizeof(struct carm_fw_ver);
+-}
+-
+-static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
+-{
+- unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
+-
+- blk_mq_stop_hw_queues(q);
+- VPRINTK("STOPPED QUEUE %p\n", q);
+-
+- host->wait_q[idx] = q;
+- host->wait_q_prod++;
+- BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */
+-}
+-
+-static inline struct request_queue *carm_pop_q(struct carm_host *host)
+-{
+- unsigned int idx;
+-
+- if (host->wait_q_prod == host->wait_q_cons)
+- return NULL;
+-
+- idx = host->wait_q_cons % CARM_MAX_WAIT_Q;
+- host->wait_q_cons++;
+-
+- return host->wait_q[idx];
+-}
+-
+-static inline void carm_round_robin(struct carm_host *host)
+-{
+- struct request_queue *q = carm_pop_q(host);
+- if (q) {
+- blk_mq_start_hw_queues(q);
+- VPRINTK("STARTED QUEUE %p\n", q);
+- }
+-}
+-
+-static inline enum dma_data_direction carm_rq_dir(struct request *rq)
+-{
+- return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+-}
+-
+-static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
+- const struct blk_mq_queue_data *bd)
+-{
+- struct request_queue *q = hctx->queue;
+- struct request *rq = bd->rq;
+- struct carm_port *port = q->queuedata;
+- struct carm_host *host = port->host;
+- struct carm_request *crq = blk_mq_rq_to_pdu(rq);
+- struct carm_msg_rw *msg;
+- struct scatterlist *sg;
+- int i, n_elem = 0, rc;
+- unsigned int msg_size;
+- u32 tmp;
+-
+- crq->n_elem = 0;
+- sg_init_table(crq->sg, CARM_MAX_REQ_SG);
+-
+- blk_mq_start_request(rq);
+-
+- spin_lock_irq(&host->lock);
+- if (req_op(rq) == REQ_OP_DRV_OUT)
+- goto send_msg;
+-
+- /* get scatterlist from block layer */
+- sg = &crq->sg[0];
+- n_elem = blk_rq_map_sg(q, rq, sg);
+- if (n_elem <= 0)
+- goto out_ioerr;
+-
+- /* map scatterlist to PCI bus addresses */
+- n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq));
+- if (n_elem <= 0)
+- goto out_ioerr;
+-
+- /* obey global hardware limit on S/G entries */
+- if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem)
+- goto out_resource;
+-
+- crq->n_elem = n_elem;
+- host->hw_sg_used += n_elem;
+-
+- /*
+- * build read/write message
+- */
+-
+- VPRINTK("build msg\n");
+- msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag);
+-
+- if (rq_data_dir(rq) == WRITE) {
+- msg->type = CARM_MSG_WRITE;
+- crq->msg_type = CARM_MSG_WRITE;
+- } else {
+- msg->type = CARM_MSG_READ;
+- crq->msg_type = CARM_MSG_READ;
+- }
+-
+- msg->id = port->port_no;
+- msg->sg_count = n_elem;
+- msg->sg_type = SGT_32BIT;
+- msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
+- msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
+- tmp = (blk_rq_pos(rq) >> 16) >> 16;
+- msg->lba_high = cpu_to_le16( (u16) tmp );
+- msg->lba_count = cpu_to_le16(blk_rq_sectors(rq));
+-
+- msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
+- for (i = 0; i < n_elem; i++) {
+- struct carm_msg_sg *carm_sg = &msg->sg[i];
+- carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));
+- carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));
+- msg_size += sizeof(struct carm_msg_sg);
+- }
+-
+- rc = carm_lookup_bucket(msg_size);
+- BUG_ON(rc < 0);
+- crq->msg_bucket = (u32) rc;
+-send_msg:
+- /*
+- * queue read/write message to hardware
+- */
+- VPRINTK("send msg, tag == %u\n", rq->tag);
+- rc = carm_send_msg(host, crq, rq->tag);
+- if (rc) {
+- host->hw_sg_used -= n_elem;
+- goto out_resource;
+- }
+-
+- spin_unlock_irq(&host->lock);
+- return BLK_STS_OK;
+-out_resource:
+- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq));
+- carm_push_q(host, q);
+- spin_unlock_irq(&host->lock);
+- return BLK_STS_DEV_RESOURCE;
+-out_ioerr:
+- carm_round_robin(host);
+- spin_unlock_irq(&host->lock);
+- return BLK_STS_IOERR;
+-}
+-
+-static void carm_handle_array_info(struct carm_host *host,
+- struct carm_request *crq, u8 *mem,
+- blk_status_t error)
+-{
+- struct carm_port *port;
+- u8 *msg_data = mem + sizeof(struct carm_array_info);
+- struct carm_array_info *desc = (struct carm_array_info *) msg_data;
+- u64 lo, hi;
+- int cur_port;
+- size_t slen;
+-
+- DPRINTK("ENTER\n");
+-
+- if (error)
+- goto out;
+- if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
+- goto out;
+-
+- cur_port = host->cur_scan_dev;
+-
+- /* should never occur */
+- if ((cur_port < 0) || (cur_port >= CARM_MAX_PORTS)) {
+- printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",
+- cur_port, (int) desc->array_id);
+- goto out;
+- }
+-
+- port = &host->port[cur_port];
+-
+- lo = (u64) le32_to_cpu(desc->size);
+- hi = (u64) le16_to_cpu(desc->size_hi);
+-
+- port->capacity = lo | (hi << 32);
+- port->dev_geom_head = le16_to_cpu(desc->head);
+- port->dev_geom_sect = le16_to_cpu(desc->sect);
+- port->dev_geom_cyl = le16_to_cpu(desc->cyl);
+-
+- host->dev_active |= (1 << cur_port);
+-
+- strncpy(port->name, desc->name, sizeof(port->name));
+- port->name[sizeof(port->name) - 1] = 0;
+- slen = strlen(port->name);
+- while (slen && (port->name[slen - 1] == ' ')) {
+- port->name[slen - 1] = 0;
+- slen--;
+- }
+-
+- printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",
+- pci_name(host->pdev), port->port_no,
+- (unsigned long long) port->capacity);
+- printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",
+- pci_name(host->pdev), port->port_no, port->name);
+-
+-out:
+- assert(host->state == HST_DEV_SCAN);
+- schedule_work(&host->fsm_task);
+-}
+-
+-static void carm_handle_scan_chan(struct carm_host *host,
+- struct carm_request *crq, u8 *mem,
+- blk_status_t error)
+-{
+- u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
+- unsigned int i, dev_count = 0;
+- int new_state = HST_DEV_SCAN_START;
+-
+- DPRINTK("ENTER\n");
+-
+- if (error) {
+- new_state = HST_ERROR;
+- goto out;
+- }
+-
+- /* TODO: scan and support non-disk devices */
+- for (i = 0; i < 8; i++)
+- if (msg_data[i] == 0) { /* direct-access device (disk) */
+- host->dev_present |= (1 << i);
+- dev_count++;
+- }
+-
+- printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",
+- pci_name(host->pdev), dev_count);
+-
+-out:
+- assert(host->state == HST_PORT_SCAN);
+- host->state = new_state;
+- schedule_work(&host->fsm_task);
+-}
+-
+-static void carm_handle_generic(struct carm_host *host,
+- struct carm_request *crq, blk_status_t error,
+- int cur_state, int next_state)
+-{
+- DPRINTK("ENTER\n");
+-
+- assert(host->state == cur_state);
+- if (error)
+- host->state = HST_ERROR;
+- else
+- host->state = next_state;
+- schedule_work(&host->fsm_task);
+-}
+-
+-static inline void carm_handle_resp(struct carm_host *host,
+- __le32 ret_handle_le, u32 status)
+-{
+- u32 handle = le32_to_cpu(ret_handle_le);
+- unsigned int msg_idx;
+- struct request *rq;
+- struct carm_request *crq;
+- blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
+- u8 *mem;
+-
+- VPRINTK("ENTER, handle == 0x%x\n", handle);
+-
+- if (unlikely(!TAG_VALID(handle))) {
+- printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",
+- pci_name(host->pdev), handle);
+- return;
+- }
+-
+- msg_idx = TAG_DECODE(handle);
+- VPRINTK("tag == %u\n", msg_idx);
+-
+- rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx);
+- crq = blk_mq_rq_to_pdu(rq);
+-
+- /* fast path */
+- if (likely(crq->msg_type == CARM_MSG_READ ||
+- crq->msg_type == CARM_MSG_WRITE)) {
+- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem,
+- carm_rq_dir(rq));
+- goto done;
+- }
+-
+- mem = carm_ref_msg(host, msg_idx);
+-
+- switch (crq->msg_type) {
+- case CARM_MSG_IOCTL: {
+- switch (crq->msg_subtype) {
+- case CARM_IOC_SCAN_CHAN:
+- carm_handle_scan_chan(host, crq, mem, error);
+- goto done;
+- default:
+- /* unknown / invalid response */
+- goto err_out;
+- }
+- break;
+- }
+-
+- case CARM_MSG_MISC: {
+- switch (crq->msg_subtype) {
+- case MISC_ALLOC_MEM:
+- carm_handle_generic(host, crq, error,
+- HST_ALLOC_BUF, HST_SYNC_TIME);
+- goto done;
+- case MISC_SET_TIME:
+- carm_handle_generic(host, crq, error,
+- HST_SYNC_TIME, HST_GET_FW_VER);
+- goto done;
+- case MISC_GET_FW_VER: {
+- struct carm_fw_ver *ver = (struct carm_fw_ver *)
+- (mem + sizeof(struct carm_msg_get_fw_ver));
+- if (!error) {
+- host->fw_ver = le32_to_cpu(ver->version);
+- host->flags |= (ver->features & FL_FW_VER_MASK);
+- }
+- carm_handle_generic(host, crq, error,
+- HST_GET_FW_VER, HST_PORT_SCAN);
+- goto done;
+- }
+- default:
+- /* unknown / invalid response */
+- goto err_out;
+- }
+- break;
+- }
+-
+- case CARM_MSG_ARRAY: {
+- switch (crq->msg_subtype) {
+- case CARM_ARRAY_INFO:
+- carm_handle_array_info(host, crq, mem, error);
+- break;
+- default:
+- /* unknown / invalid response */
+- goto err_out;
+- }
+- break;
+- }
+-
+- default:
+- /* unknown / invalid response */
+- goto err_out;
+- }
+-
+- return;
+-
+-err_out:
+- printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
+- pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
+- error = BLK_STS_IOERR;
+-done:
+- host->hw_sg_used -= crq->n_elem;
+- blk_mq_end_request(blk_mq_rq_from_pdu(crq), error);
+-
+- if (host->hw_sg_used <= CARM_SG_LOW_WATER)
+- carm_round_robin(host);
+-}
+-
+-static inline void carm_handle_responses(struct carm_host *host)
+-{
+- void __iomem *mmio = host->mmio;
+- struct carm_response *resp = (struct carm_response *) host->shm;
+- unsigned int work = 0;
+- unsigned int idx = host->resp_idx % RMSG_Q_LEN;
+-
+- while (1) {
+- u32 status = le32_to_cpu(resp[idx].status);
+-
+- if (status == 0xffffffff) {
+- VPRINTK("ending response on index %u\n", idx);
+- writel(idx << 3, mmio + CARM_RESP_IDX);
+- break;
+- }
+-
+- /* response to a message we sent */
+- else if ((status & (1 << 31)) == 0) {
+- VPRINTK("handling msg response on index %u\n", idx);
+- carm_handle_resp(host, resp[idx].ret_handle, status);
+- resp[idx].status = cpu_to_le32(0xffffffff);
+- }
+-
+- /* asynchronous events the hardware throws our way */
+- else if ((status & 0xff000000) == (1 << 31)) {
+- u8 *evt_type_ptr = (u8 *) &resp[idx];
+- u8 evt_type = *evt_type_ptr;
+- printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",
+- pci_name(host->pdev), (int) evt_type);
+- resp[idx].status = cpu_to_le32(0xffffffff);
+- }
+-
+- idx = NEXT_RESP(idx);
+- work++;
+- }
+-
+- VPRINTK("EXIT, work==%u\n", work);
+- host->resp_idx += work;
+-}
+-
+-static irqreturn_t carm_interrupt(int irq, void *__host)
+-{
+- struct carm_host *host = __host;
+- void __iomem *mmio;
+- u32 mask;
+- int handled = 0;
+- unsigned long flags;
+-
+- if (!host) {
+- VPRINTK("no host\n");
+- return IRQ_NONE;
+- }
+-
+- spin_lock_irqsave(&host->lock, flags);
+-
+- mmio = host->mmio;
+-
+- /* reading should also clear interrupts */
+- mask = readl(mmio + CARM_INT_STAT);
+-
+- if (mask == 0 || mask == 0xffffffff) {
+- VPRINTK("no work, mask == 0x%x\n", mask);
+- goto out;
+- }
+-
+- if (mask & INT_ACK_MASK)
+- writel(mask, mmio + CARM_INT_STAT);
+-
+- if (unlikely(host->state == HST_INVALID)) {
+- VPRINTK("not initialized yet, mask = 0x%x\n", mask);
+- goto out;
+- }
+-
+- if (mask & CARM_HAVE_RESP) {
+- handled = 1;
+- carm_handle_responses(host);
+- }
+-
+-out:
+- spin_unlock_irqrestore(&host->lock, flags);
+- VPRINTK("EXIT\n");
+- return IRQ_RETVAL(handled);
+-}
+-
+-static void carm_fsm_task (struct work_struct *work)
+-{
+- struct carm_host *host =
+- container_of(work, struct carm_host, fsm_task);
+- unsigned long flags;
+- unsigned int state;
+- int rc, i, next_dev;
+- int reschedule = 0;
+- int new_state = HST_INVALID;
+-
+- spin_lock_irqsave(&host->lock, flags);
+- state = host->state;
+- spin_unlock_irqrestore(&host->lock, flags);
+-
+- DPRINTK("ENTER, state == %s\n", state_name[state]);
+-
+- switch (state) {
+- case HST_PROBE_START:
+- new_state = HST_ALLOC_BUF;
+- reschedule = 1;
+- break;
+-
+- case HST_ALLOC_BUF:
+- rc = carm_send_special(host, carm_fill_alloc_buf);
+- if (rc) {
+- new_state = HST_ERROR;
+- reschedule = 1;
+- }
+- break;
+-
+- case HST_SYNC_TIME:
+- rc = carm_send_special(host, carm_fill_sync_time);
+- if (rc) {
+- new_state = HST_ERROR;
+- reschedule = 1;
+- }
+- break;
+-
+- case HST_GET_FW_VER:
+- rc = carm_send_special(host, carm_fill_get_fw_ver);
+- if (rc) {
+- new_state = HST_ERROR;
+- reschedule = 1;
+- }
+- break;
+-
+- case HST_PORT_SCAN:
+- rc = carm_send_special(host, carm_fill_scan_channels);
+- if (rc) {
+- new_state = HST_ERROR;
+- reschedule = 1;
+- }
+- break;
+-
+- case HST_DEV_SCAN_START:
+- host->cur_scan_dev = -1;
+- new_state = HST_DEV_SCAN;
+- reschedule = 1;
+- break;
+-
+- case HST_DEV_SCAN:
+- next_dev = -1;
+- for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)
+- if (host->dev_present & (1 << i)) {
+- next_dev = i;
+- break;
+- }
+-
+- if (next_dev >= 0) {
+- host->cur_scan_dev = next_dev;
+- rc = carm_array_info(host, next_dev);
+- if (rc) {
+- new_state = HST_ERROR;
+- reschedule = 1;
+- }
+- } else {
+- new_state = HST_DEV_ACTIVATE;
+- reschedule = 1;
+- }
+- break;
+-
+- case HST_DEV_ACTIVATE: {
+- int activated = 0;
+- for (i = 0; i < CARM_MAX_PORTS; i++)
+- if (host->dev_active & (1 << i)) {
+- struct carm_port *port = &host->port[i];
+- struct gendisk *disk = port->disk;
+-
+- set_capacity(disk, port->capacity);
+- add_disk(disk);
+- activated++;
+- }
+-
+- printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",
+- pci_name(host->pdev), activated);
+-
+- new_state = HST_PROBE_FINISHED;
+- reschedule = 1;
+- break;
+- }
+-
+- case HST_PROBE_FINISHED:
+- complete(&host->probe_comp);
+- break;
+-
+- case HST_ERROR:
+- /* FIXME: TODO */
+- break;
+-
+- default:
+- /* should never occur */
+- printk(KERN_ERR PFX "BUG: unknown state %d\n", state);
+- assert(0);
+- break;
+- }
+-
+- if (new_state != HST_INVALID) {
+- spin_lock_irqsave(&host->lock, flags);
+- host->state = new_state;
+- spin_unlock_irqrestore(&host->lock, flags);
+- }
+- if (reschedule)
+- schedule_work(&host->fsm_task);
+-}
+-
+-static int carm_init_wait(void __iomem *mmio, u32 bits, unsigned int test_bit)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < 50000; i++) {
+- u32 tmp = readl(mmio + CARM_LMUC);
+- udelay(100);
+-
+- if (test_bit) {
+- if ((tmp & bits) == bits)
+- return 0;
+- } else {
+- if ((tmp & bits) == 0)
+- return 0;
+- }
+-
+- cond_resched();
+- }
+-
+- printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",
+- bits, test_bit ? "yes" : "no");
+- return -EBUSY;
+-}
+-
+-static void carm_init_responses(struct carm_host *host)
+-{
+- void __iomem *mmio = host->mmio;
+- unsigned int i;
+- struct carm_response *resp = (struct carm_response *) host->shm;
+-
+- for (i = 0; i < RMSG_Q_LEN; i++)
+- resp[i].status = cpu_to_le32(0xffffffff);
+-
+- writel(0, mmio + CARM_RESP_IDX);
+-}
+-
+-static int carm_init_host(struct carm_host *host)
+-{
+- void __iomem *mmio = host->mmio;
+- u32 tmp;
+- u8 tmp8;
+- int rc;
+-
+- DPRINTK("ENTER\n");
+-
+- writel(0, mmio + CARM_INT_MASK);
+-
+- tmp8 = readb(mmio + CARM_INITC);
+- if (tmp8 & 0x01) {
+- tmp8 &= ~0x01;
+- writeb(tmp8, mmio + CARM_INITC);
+- readb(mmio + CARM_INITC); /* flush */
+-
+- DPRINTK("snooze...\n");
+- msleep(5000);
+- }
+-
+- tmp = readl(mmio + CARM_HMUC);
+- if (tmp & CARM_CME) {
+- DPRINTK("CME bit present, waiting\n");
+- rc = carm_init_wait(mmio, CARM_CME, 1);
+- if (rc) {
+- DPRINTK("EXIT, carm_init_wait 1 failed\n");
+- return rc;
+- }
+- }
+- if (tmp & CARM_RME) {
+- DPRINTK("RME bit present, waiting\n");
+- rc = carm_init_wait(mmio, CARM_RME, 1);
+- if (rc) {
+- DPRINTK("EXIT, carm_init_wait 2 failed\n");
+- return rc;
+- }
+- }
+-
+- tmp &= ~(CARM_RME | CARM_CME);
+- writel(tmp, mmio + CARM_HMUC);
+- readl(mmio + CARM_HMUC); /* flush */
+-
+- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 0);
+- if (rc) {
+- DPRINTK("EXIT, carm_init_wait 3 failed\n");
+- return rc;
+- }
+-
+- carm_init_buckets(mmio);
+-
+- writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);
+- writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);
+- writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);
+-
+- tmp = readl(mmio + CARM_HMUC);
+- tmp |= (CARM_RME | CARM_CME | CARM_WZBC);
+- writel(tmp, mmio + CARM_HMUC);
+- readl(mmio + CARM_HMUC); /* flush */
+-
+- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 1);
+- if (rc) {
+- DPRINTK("EXIT, carm_init_wait 4 failed\n");
+- return rc;
+- }
+-
+- writel(0, mmio + CARM_HMPHA);
+- writel(INT_DEF_MASK, mmio + CARM_INT_MASK);
+-
+- carm_init_responses(host);
+-
+- /* start initialization, probing state machine */
+- spin_lock_irq(&host->lock);
+- assert(host->state == HST_INVALID);
+- host->state = HST_PROBE_START;
+- spin_unlock_irq(&host->lock);
+- schedule_work(&host->fsm_task);
+-
+- DPRINTK("EXIT\n");
+- return 0;
+-}
+-
+-static const struct blk_mq_ops carm_mq_ops = {
+- .queue_rq = carm_queue_rq,
+-};
+-
+-static int carm_init_disk(struct carm_host *host, unsigned int port_no)
+-{
+- struct carm_port *port = &host->port[port_no];
+- struct gendisk *disk;
+-
+- port->host = host;
+- port->port_no = port_no;
+-
+- disk = blk_mq_alloc_disk(&host->tag_set, port);
+- if (IS_ERR(disk))
+- return PTR_ERR(disk);
+-
+- port->disk = disk;
+- sprintf(disk->disk_name, DRV_NAME "/%u",
+- (unsigned int)host->id * CARM_MAX_PORTS + port_no);
+- disk->major = host->major;
+- disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
+- disk->minors = CARM_MINORS_PER_MAJOR;
+- disk->fops = &carm_bd_ops;
+- disk->private_data = port;
+-
+- blk_queue_max_segments(disk->queue, CARM_MAX_REQ_SG);
+- blk_queue_segment_boundary(disk->queue, CARM_SG_BOUNDARY);
+- return 0;
+-}
+-
+-static void carm_free_disk(struct carm_host *host, unsigned int port_no)
+-{
+- struct carm_port *port = &host->port[port_no];
+- struct gendisk *disk = port->disk;
+-
+- if (!disk)
+- return;
+-
+- if (host->state > HST_DEV_ACTIVATE)
+- del_gendisk(disk);
+- blk_cleanup_disk(disk);
+-}
+-
+-static int carm_init_shm(struct carm_host *host)
+-{
+- host->shm = dma_alloc_coherent(&host->pdev->dev, CARM_SHM_SIZE,
+- &host->shm_dma, GFP_KERNEL);
+- if (!host->shm)
+- return -ENOMEM;
+-
+- host->msg_base = host->shm + RBUF_LEN;
+- host->msg_dma = host->shm_dma + RBUF_LEN;
+-
+- memset(host->shm, 0xff, RBUF_LEN);
+- memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);
+-
+- return 0;
+-}
+-
+-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
+-{
+- struct carm_host *host;
+- int rc;
+- struct request_queue *q;
+- unsigned int i;
+-
+- printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
+-
+- rc = pci_enable_device(pdev);
+- if (rc)
+- return rc;
+-
+- rc = pci_request_regions(pdev, DRV_NAME);
+- if (rc)
+- goto err_out;
+-
+- rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+- if (rc) {
+- printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
+- pci_name(pdev));
+- goto err_out_regions;
+- }
+-
+- host = kzalloc(sizeof(*host), GFP_KERNEL);
+- if (!host) {
+- rc = -ENOMEM;
+- goto err_out_regions;
+- }
+-
+- host->pdev = pdev;
+- spin_lock_init(&host->lock);
+- INIT_WORK(&host->fsm_task, carm_fsm_task);
+- init_completion(&host->probe_comp);
+-
+- host->mmio = ioremap(pci_resource_start(pdev, 0),
+- pci_resource_len(pdev, 0));
+- if (!host->mmio) {
+- printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",
+- pci_name(pdev));
+- rc = -ENOMEM;
+- goto err_out_kfree;
+- }
+-
+- rc = carm_init_shm(host);
+- if (rc) {
+- printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",
+- pci_name(pdev));
+- goto err_out_iounmap;
+- }
+-
+- memset(&host->tag_set, 0, sizeof(host->tag_set));
+- host->tag_set.ops = &carm_mq_ops;
+- host->tag_set.cmd_size = sizeof(struct carm_request);
+- host->tag_set.nr_hw_queues = 1;
+- host->tag_set.nr_maps = 1;
+- host->tag_set.queue_depth = max_queue;
+- host->tag_set.numa_node = NUMA_NO_NODE;
+- host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+-
+- rc = blk_mq_alloc_tag_set(&host->tag_set);
+- if (rc)
+- goto err_out_dma_free;
+-
+- q = blk_mq_init_queue(&host->tag_set);
+- if (IS_ERR(q)) {
+- rc = PTR_ERR(q);
+- blk_mq_free_tag_set(&host->tag_set);
+- goto err_out_dma_free;
+- }
+-
+- host->oob_q = q;
+- q->queuedata = host;
+-
+- /*
+- * Figure out which major to use: 160, 161, or dynamic
+- */
+- if (!test_and_set_bit(0, &carm_major_alloc))
+- host->major = 160;
+- else if (!test_and_set_bit(1, &carm_major_alloc))
+- host->major = 161;
+- else
+- host->flags |= FL_DYN_MAJOR;
+-
+- host->id = carm_host_id;
+- sprintf(host->name, DRV_NAME "%d", carm_host_id);
+-
+- rc = register_blkdev(host->major, host->name);
+- if (rc < 0)
+- goto err_out_free_majors;
+- if (host->flags & FL_DYN_MAJOR)
+- host->major = rc;
+-
+- for (i = 0; i < CARM_MAX_PORTS; i++) {
+- rc = carm_init_disk(host, i);
+- if (rc)
+- goto err_out_blkdev_disks;
+- }
+-
+- pci_set_master(pdev);
+-
+- rc = request_irq(pdev->irq, carm_interrupt, IRQF_SHARED, DRV_NAME, host);
+- if (rc) {
+- printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",
+- pci_name(pdev));
+- goto err_out_blkdev_disks;
+- }
+-
+- rc = carm_init_host(host);
+- if (rc)
+- goto err_out_free_irq;
+-
+- DPRINTK("waiting for probe_comp\n");
+- wait_for_completion(&host->probe_comp);
+-
+- printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n",
+- host->name, pci_name(pdev), (int) CARM_MAX_PORTS,
+- (unsigned long long)pci_resource_start(pdev, 0),
+- pdev->irq, host->major);
+-
+- carm_host_id++;
+- pci_set_drvdata(pdev, host);
+- return 0;
+-
+-err_out_free_irq:
+- free_irq(pdev->irq, host);
+-err_out_blkdev_disks:
+- for (i = 0; i < CARM_MAX_PORTS; i++)
+- carm_free_disk(host, i);
+- unregister_blkdev(host->major, host->name);
+-err_out_free_majors:
+- if (host->major == 160)
+- clear_bit(0, &carm_major_alloc);
+- else if (host->major == 161)
+- clear_bit(1, &carm_major_alloc);
+- blk_cleanup_queue(host->oob_q);
+- blk_mq_free_tag_set(&host->tag_set);
+-err_out_dma_free:
+- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
+-err_out_iounmap:
+- iounmap(host->mmio);
+-err_out_kfree:
+- kfree(host);
+-err_out_regions:
+- pci_release_regions(pdev);
+-err_out:
+- pci_disable_device(pdev);
+- return rc;
+-}
+-
+-static void carm_remove_one (struct pci_dev *pdev)
+-{
+- struct carm_host *host = pci_get_drvdata(pdev);
+- unsigned int i;
+-
+- if (!host) {
+- printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
+- pci_name(pdev));
+- return;
+- }
+-
+- free_irq(pdev->irq, host);
+- for (i = 0; i < CARM_MAX_PORTS; i++)
+- carm_free_disk(host, i);
+- unregister_blkdev(host->major, host->name);
+- if (host->major == 160)
+- clear_bit(0, &carm_major_alloc);
+- else if (host->major == 161)
+- clear_bit(1, &carm_major_alloc);
+- blk_cleanup_queue(host->oob_q);
+- blk_mq_free_tag_set(&host->tag_set);
+- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
+- iounmap(host->mmio);
+- kfree(host);
+- pci_release_regions(pdev);
+- pci_disable_device(pdev);
+-}
+-
+-module_pci_driver(carm_driver);
+diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
+index 303caf2d17d0c..d2ba849bb8d19 100644
+--- a/drivers/block/virtio_blk.c
++++ b/drivers/block/virtio_blk.c
+@@ -24,6 +24,12 @@
+ /* The maximum number of sg elements that fit into a virtqueue */
+ #define VIRTIO_BLK_MAX_SG_ELEMS 32768
+
++#ifdef CONFIG_ARCH_NO_SG_CHAIN
++#define VIRTIO_BLK_INLINE_SG_CNT 0
++#else
++#define VIRTIO_BLK_INLINE_SG_CNT 2
++#endif
++
+ static int major;
+ static DEFINE_IDA(vd_index_ida);
+
+@@ -77,6 +83,7 @@ struct virtio_blk {
+ struct virtblk_req {
+ struct virtio_blk_outhdr out_hdr;
+ u8 status;
++ struct sg_table sg_table;
+ struct scatterlist sg[];
+ };
+
+@@ -162,12 +169,92 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
+ return 0;
+ }
+
+-static inline void virtblk_request_done(struct request *req)
++static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr)
+ {
+- struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
++ if (blk_rq_nr_phys_segments(req))
++ sg_free_table_chained(&vbr->sg_table,
++ VIRTIO_BLK_INLINE_SG_CNT);
++}
+
++static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req,
++ struct virtblk_req *vbr)
++{
++ int err;
++
++ if (!blk_rq_nr_phys_segments(req))
++ return 0;
++
++ vbr->sg_table.sgl = vbr->sg;
++ err = sg_alloc_table_chained(&vbr->sg_table,
++ blk_rq_nr_phys_segments(req),
++ vbr->sg_table.sgl,
++ VIRTIO_BLK_INLINE_SG_CNT);
++ if (unlikely(err))
++ return -ENOMEM;
++
++ return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl);
++}
++
++static void virtblk_cleanup_cmd(struct request *req)
++{
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
+ kfree(bvec_virt(&req->special_vec));
++}
++
++static int virtblk_setup_cmd(struct virtio_device *vdev, struct request *req,
++ struct virtblk_req *vbr)
++{
++ bool unmap = false;
++ u32 type;
++
++ vbr->out_hdr.sector = 0;
++
++ switch (req_op(req)) {
++ case REQ_OP_READ:
++ type = VIRTIO_BLK_T_IN;
++ vbr->out_hdr.sector = cpu_to_virtio64(vdev,
++ blk_rq_pos(req));
++ break;
++ case REQ_OP_WRITE:
++ type = VIRTIO_BLK_T_OUT;
++ vbr->out_hdr.sector = cpu_to_virtio64(vdev,
++ blk_rq_pos(req));
++ break;
++ case REQ_OP_FLUSH:
++ type = VIRTIO_BLK_T_FLUSH;
++ break;
++ case REQ_OP_DISCARD:
++ type = VIRTIO_BLK_T_DISCARD;
++ break;
++ case REQ_OP_WRITE_ZEROES:
++ type = VIRTIO_BLK_T_WRITE_ZEROES;
++ unmap = !(req->cmd_flags & REQ_NOUNMAP);
++ break;
++ case REQ_OP_DRV_IN:
++ type = VIRTIO_BLK_T_GET_ID;
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ return BLK_STS_IOERR;
++ }
++
++ vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
++ vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
++
++ if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
++ if (virtblk_setup_discard_write_zeroes(req, unmap))
++ return BLK_STS_RESOURCE;
++ }
++
++ return 0;
++}
++
++static inline void virtblk_request_done(struct request *req)
++{
++ struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
++
++ virtblk_unmap_data(req, vbr);
++ virtblk_cleanup_cmd(req);
+ blk_mq_end_request(req, virtblk_result(vbr));
+ }
+
+@@ -221,61 +308,27 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+ struct request *req = bd->rq;
+ struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
+ unsigned long flags;
+- unsigned int num;
++ int num;
+ int qid = hctx->queue_num;
+ int err;
+ bool notify = false;
+- bool unmap = false;
+- u32 type;
+
+ BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
+
+- switch (req_op(req)) {
+- case REQ_OP_READ:
+- case REQ_OP_WRITE:
+- type = 0;
+- break;
+- case REQ_OP_FLUSH:
+- type = VIRTIO_BLK_T_FLUSH;
+- break;
+- case REQ_OP_DISCARD:
+- type = VIRTIO_BLK_T_DISCARD;
+- break;
+- case REQ_OP_WRITE_ZEROES:
+- type = VIRTIO_BLK_T_WRITE_ZEROES;
+- unmap = !(req->cmd_flags & REQ_NOUNMAP);
+- break;
+- case REQ_OP_DRV_IN:
+- type = VIRTIO_BLK_T_GET_ID;
+- break;
+- default:
+- WARN_ON_ONCE(1);
+- return BLK_STS_IOERR;
+- }
+-
+- vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
+- vbr->out_hdr.sector = type ?
+- 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
+- vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
++ err = virtblk_setup_cmd(vblk->vdev, req, vbr);
++ if (unlikely(err))
++ return err;
+
+ blk_mq_start_request(req);
+
+- if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+- err = virtblk_setup_discard_write_zeroes(req, unmap);
+- if (err)
+- return BLK_STS_RESOURCE;
+- }
+-
+- num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
+- if (num) {
+- if (rq_data_dir(req) == WRITE)
+- vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
+- else
+- vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
++ num = virtblk_map_data(hctx, req, vbr);
++ if (unlikely(num < 0)) {
++ virtblk_cleanup_cmd(req);
++ return BLK_STS_RESOURCE;
+ }
+
+ spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
+- err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
++ err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num);
+ if (err) {
+ virtqueue_kick(vblk->vqs[qid].vq);
+ /* Don't stop the queue if -ENOMEM: we may have failed to
+@@ -284,6 +337,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+ if (err == -ENOSPC)
+ blk_mq_stop_hw_queue(hctx);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
++ virtblk_unmap_data(req, vbr);
++ virtblk_cleanup_cmd(req);
+ switch (err) {
+ case -ENOSPC:
+ return BLK_STS_DEV_RESOURCE;
+@@ -660,16 +715,6 @@ static const struct attribute_group *virtblk_attr_groups[] = {
+ NULL,
+ };
+
+-static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
+- unsigned int hctx_idx, unsigned int numa_node)
+-{
+- struct virtio_blk *vblk = set->driver_data;
+- struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
+-
+- sg_init_table(vbr->sg, vblk->sg_elems);
+- return 0;
+-}
+-
+ static int virtblk_map_queues(struct blk_mq_tag_set *set)
+ {
+ struct virtio_blk *vblk = set->driver_data;
+@@ -682,7 +727,6 @@ static const struct blk_mq_ops virtio_mq_ops = {
+ .queue_rq = virtio_queue_rq,
+ .commit_rqs = virtio_commit_rqs,
+ .complete = virtblk_request_done,
+- .init_request = virtblk_init_request,
+ .map_queues = virtblk_map_queues,
+ };
+
+@@ -762,7 +806,7 @@ static int virtblk_probe(struct virtio_device *vdev)
+ vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ vblk->tag_set.cmd_size =
+ sizeof(struct virtblk_req) +
+- sizeof(struct scatterlist) * sg_elems;
++ sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
+ vblk->tag_set.driver_data = vblk;
+ vblk->tag_set.nr_hw_queues = vblk->num_vqs;
+
+@@ -815,9 +859,17 @@ static int virtblk_probe(struct virtio_device *vdev)
+ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
+ struct virtio_blk_config, blk_size,
+ &blk_size);
+- if (!err)
++ if (!err) {
++ err = blk_validate_block_size(blk_size);
++ if (err) {
++ dev_err(&vdev->dev,
++ "virtio_blk: invalid block size: 0x%x\n",
++ blk_size);
++ goto out_cleanup_disk;
++ }
++
+ blk_queue_logical_block_size(q, blk_size);
+- else
++ } else
+ blk_size = queue_logical_block_size(q);
+
+ /* Use topology information if available */
+@@ -847,11 +899,12 @@ static int virtblk_probe(struct virtio_device *vdev)
+ blk_queue_io_opt(q, blk_size * opt_io_size);
+
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+- q->limits.discard_granularity = blk_size;
+-
+ virtio_cread(vdev, struct virtio_blk_config,
+ discard_sector_alignment, &v);
+- q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
++ if (v)
++ q->limits.discard_granularity = v << SECTOR_SHIFT;
++ else
++ q->limits.discard_granularity = blk_size;
+
+ virtio_cread(vdev, struct virtio_blk_config,
+ max_discard_sectors, &v);
+@@ -859,9 +912,15 @@ static int virtblk_probe(struct virtio_device *vdev)
+
+ virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
+ &v);
++
++ /*
++ * max_discard_seg == 0 is out of spec but we always
++ * handled it.
++ */
++ if (!v)
++ v = sg_elems - 2;
+ blk_queue_max_discard_segments(q,
+- min_not_zero(v,
+- MAX_DISCARD_SEGMENTS));
++ min(v, MAX_DISCARD_SEGMENTS));
+
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+ }
+diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
+index bda5c815e4415..a28473470e662 100644
+--- a/drivers/block/xen-blkback/common.h
++++ b/drivers/block/xen-blkback/common.h
+@@ -226,6 +226,9 @@ struct xen_vbd {
+ sector_t size;
+ unsigned int flush_support:1;
+ unsigned int discard_secure:1;
++ /* Connect-time cached feature_persistent parameter value */
++ unsigned int feature_gnt_persistent_parm:1;
++ /* Persistent grants feature negotiation result */
+ unsigned int feature_gnt_persistent:1;
+ unsigned int overflow_max_grants:1;
+ };
+diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
+index 33eba3df4dd9a..1525e28c5d703 100644
+--- a/drivers/block/xen-blkback/xenbus.c
++++ b/drivers/block/xen-blkback/xenbus.c
+@@ -156,6 +156,11 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
+ return 0;
+ }
+
++/* Enable the persistent grants feature. */
++static bool feature_persistent = true;
++module_param(feature_persistent, bool, 0644);
++MODULE_PARM_DESC(feature_persistent, "Enables the persistent grants feature");
++
+ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
+ {
+ struct xen_blkif *blkif;
+@@ -471,12 +476,6 @@ static void xen_vbd_free(struct xen_vbd *vbd)
+ vbd->bdev = NULL;
+ }
+
+-/* Enable the persistent grants feature. */
+-static bool feature_persistent = true;
+-module_param(feature_persistent, bool, 0644);
+-MODULE_PARM_DESC(feature_persistent,
+- "Enables the persistent grants feature");
+-
+ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
+ unsigned major, unsigned minor, int readonly,
+ int cdrom)
+@@ -522,8 +521,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
+ if (q && blk_queue_secure_erase(q))
+ vbd->discard_secure = true;
+
+- vbd->feature_gnt_persistent = feature_persistent;
+-
+ pr_debug("Successful creation of handle=%04x (dom=%u)\n",
+ handle, blkif->domid);
+ return 0;
+@@ -913,7 +910,7 @@ again:
+ xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
+- be->blkif->vbd.feature_gnt_persistent);
++ be->blkif->vbd.feature_gnt_persistent_parm);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
+ dev->nodename);
+@@ -1090,10 +1087,11 @@ static int connect_ring(struct backend_info *be)
+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
+ return -ENOSYS;
+ }
+- if (blkif->vbd.feature_gnt_persistent)
+- blkif->vbd.feature_gnt_persistent =
+- xenbus_read_unsigned(dev->otherend,
+- "feature-persistent", 0);
++
++ blkif->vbd.feature_gnt_persistent_parm = feature_persistent;
++ blkif->vbd.feature_gnt_persistent =
++ blkif->vbd.feature_gnt_persistent_parm &&
++ xenbus_read_unsigned(dev->otherend, "feature-persistent", 0);
+
+ blkif->vbd.overflow_max_grants = 0;
+
+diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
+index 72902104f1112..831747ba8113c 100644
+--- a/drivers/block/xen-blkfront.c
++++ b/drivers/block/xen-blkfront.c
+@@ -42,6 +42,7 @@
+ #include <linux/cdrom.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
++#include <linux/major.h>
+ #include <linux/mutex.h>
+ #include <linux/scatterlist.h>
+ #include <linux/bitmap.h>
+@@ -151,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order;
+ module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
+ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+
++static bool __read_mostly xen_blkif_trusted = true;
++module_param_named(trusted, xen_blkif_trusted, bool, 0644);
++MODULE_PARM_DESC(trusted, "Is the backend trusted");
++
+ #define BLK_RING_SIZE(info) \
+ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
+
+@@ -207,7 +212,11 @@ struct blkfront_info
+ unsigned int feature_fua:1;
+ unsigned int feature_discard:1;
+ unsigned int feature_secdiscard:1;
++ /* Connect-time cached feature_persistent parameter */
++ unsigned int feature_persistent_parm:1;
++ /* Persistent grants feature negotiation result */
+ unsigned int feature_persistent:1;
++ unsigned int bounce:1;
+ unsigned int discard_granularity;
+ unsigned int discard_alignment;
+ /* Number of 4KB segments handled */
+@@ -310,8 +319,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
+ if (!gnt_list_entry)
+ goto out_of_memory;
+
+- if (info->feature_persistent) {
+- granted_page = alloc_page(GFP_NOIO);
++ if (info->bounce) {
++ granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
+ if (!granted_page) {
+ kfree(gnt_list_entry);
+ goto out_of_memory;
+@@ -330,7 +339,7 @@ out_of_memory:
+ list_for_each_entry_safe(gnt_list_entry, n,
+ &rinfo->grants, node) {
+ list_del(&gnt_list_entry->node);
+- if (info->feature_persistent)
++ if (info->bounce)
+ __free_page(gnt_list_entry->page);
+ kfree(gnt_list_entry);
+ i--;
+@@ -376,7 +385,7 @@ static struct grant *get_grant(grant_ref_t *gref_head,
+ /* Assign a gref to this page */
+ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
+ BUG_ON(gnt_list_entry->gref == -ENOSPC);
+- if (info->feature_persistent)
++ if (info->bounce)
+ grant_foreign_access(gnt_list_entry, info);
+ else {
+ /* Grant access to the GFN passed by the caller */
+@@ -400,7 +409,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
+ /* Assign a gref to this page */
+ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
+ BUG_ON(gnt_list_entry->gref == -ENOSPC);
+- if (!info->feature_persistent) {
++ if (!info->bounce) {
+ struct page *indirect_page;
+
+ /* Fetch a pre-allocated page to use for indirect grefs */
+@@ -702,7 +711,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
+ .grant_idx = 0,
+ .segments = NULL,
+ .rinfo = rinfo,
+- .need_copy = rq_data_dir(req) && info->feature_persistent,
++ .need_copy = rq_data_dir(req) && info->bounce,
+ };
+
+ /*
+@@ -771,7 +780,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
+ ring_req->u.rw.handle = info->handle;
+ ring_req->operation = rq_data_dir(req) ?
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
+- if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
++ if (req_op(req) == REQ_OP_FLUSH ||
++ (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) {
+ /*
+ * Ideally we can do an unordered flush-to-disk.
+ * In case the backend onlysupports barriers, use that.
+@@ -980,11 +990,12 @@ static void xlvbd_flush(struct blkfront_info *info)
+ {
+ blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
+ info->feature_fua ? true : false);
+- pr_info("blkfront: %s: %s %s %s %s %s\n",
++ pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
+ info->gd->disk_name, flush_info(info),
+ "persistent grants:", info->feature_persistent ?
+ "enabled;" : "disabled;", "indirect descriptors:",
+- info->max_indirect_segments ? "enabled;" : "disabled;");
++ info->max_indirect_segments ? "enabled;" : "disabled;",
++ "bounce buffer:", info->bounce ? "enabled" : "disabled;");
+ }
+
+ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
+@@ -1211,7 +1222,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
+ if (!list_empty(&rinfo->indirect_pages)) {
+ struct page *indirect_page, *n;
+
+- BUG_ON(info->feature_persistent);
++ BUG_ON(info->bounce);
+ list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
+ list_del(&indirect_page->lru);
+ __free_page(indirect_page);
+@@ -1228,7 +1239,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
+ 0, 0UL);
+ rinfo->persistent_gnts_c--;
+ }
+- if (info->feature_persistent)
++ if (info->bounce)
+ __free_page(persistent_gnt->page);
+ kfree(persistent_gnt);
+ }
+@@ -1249,7 +1260,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
+ for (j = 0; j < segs; j++) {
+ persistent_gnt = rinfo->shadow[i].grants_used[j];
+ gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+- if (info->feature_persistent)
++ if (info->bounce)
+ __free_page(persistent_gnt->page);
+ kfree(persistent_gnt);
+ }
+@@ -1290,7 +1301,8 @@ free_shadow:
+ rinfo->ring_ref[i] = GRANT_INVALID_REF;
+ }
+ }
+- free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
++ free_pages_exact(rinfo->ring.sring,
++ info->nr_ring_pages * XEN_PAGE_SIZE);
+ rinfo->ring.sring = NULL;
+
+ if (rinfo->irq)
+@@ -1374,9 +1386,15 @@ static int blkif_get_final_status(enum blk_req_status s1,
+ return BLKIF_RSP_OKAY;
+ }
+
+-static bool blkif_completion(unsigned long *id,
+- struct blkfront_ring_info *rinfo,
+- struct blkif_response *bret)
++/*
++ * Return values:
++ * 1 response processed.
++ * 0 missing further responses.
++ * -1 error while processing.
++ */
++static int blkif_completion(unsigned long *id,
++ struct blkfront_ring_info *rinfo,
++ struct blkif_response *bret)
+ {
+ int i = 0;
+ struct scatterlist *sg;
+@@ -1399,7 +1417,7 @@ static bool blkif_completion(unsigned long *id,
+
+ /* Wait the second response if not yet here. */
+ if (s2->status < REQ_DONE)
+- return false;
++ return 0;
+
+ bret->status = blkif_get_final_status(s->status,
+ s2->status);
+@@ -1432,7 +1450,7 @@ static bool blkif_completion(unsigned long *id,
+ data.s = s;
+ num_sg = s->num_sg;
+
+- if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
++ if (bret->operation == BLKIF_OP_READ && info->bounce) {
+ for_each_sg(s->sg, sg, num_sg, i) {
+ BUG_ON(sg->offset + sg->length > PAGE_SIZE);
+
+@@ -1450,47 +1468,48 @@ static bool blkif_completion(unsigned long *id,
+ }
+ /* Add the persistent grant into the list of free grants */
+ for (i = 0; i < num_grant; i++) {
+- if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
++ if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
+ /*
+ * If the grant is still mapped by the backend (the
+ * backend has chosen to make this grant persistent)
+ * we add it at the head of the list, so it will be
+ * reused first.
+ */
+- if (!info->feature_persistent)
+- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
+- s->grants_used[i]->gref);
++ if (!info->feature_persistent) {
++ pr_alert("backed has not unmapped grant: %u\n",
++ s->grants_used[i]->gref);
++ return -1;
++ }
+ list_add(&s->grants_used[i]->node, &rinfo->grants);
+ rinfo->persistent_gnts_c++;
+ } else {
+ /*
+- * If the grant is not mapped by the backend we end the
+- * foreign access and add it to the tail of the list,
+- * so it will not be picked again unless we run out of
+- * persistent grants.
++ * If the grant is not mapped by the backend we add it
++ * to the tail of the list, so it will not be picked
++ * again unless we run out of persistent grants.
+ */
+- gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
+ s->grants_used[i]->gref = GRANT_INVALID_REF;
+ list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
+ }
+ }
+ if (s->req.operation == BLKIF_OP_INDIRECT) {
+ for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
+- if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
+- if (!info->feature_persistent)
+- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
+- s->indirect_grants[i]->gref);
++ if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
++ if (!info->feature_persistent) {
++ pr_alert("backed has not unmapped grant: %u\n",
++ s->indirect_grants[i]->gref);
++ return -1;
++ }
+ list_add(&s->indirect_grants[i]->node, &rinfo->grants);
+ rinfo->persistent_gnts_c++;
+ } else {
+ struct page *indirect_page;
+
+- gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
+ /*
+ * Add the used indirect page back to the list of
+ * available pages for indirect grefs.
+ */
+- if (!info->feature_persistent) {
++ if (!info->bounce) {
+ indirect_page = s->indirect_grants[i]->page;
+ list_add(&indirect_page->lru, &rinfo->indirect_pages);
+ }
+@@ -1500,7 +1519,7 @@ static bool blkif_completion(unsigned long *id,
+ }
+ }
+
+- return true;
++ return 1;
+ }
+
+ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+@@ -1511,9 +1530,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+ unsigned long flags;
+ struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
+ struct blkfront_info *info = rinfo->dev_info;
++ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
+
+- if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
++ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
++ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ return IRQ_HANDLED;
++ }
+
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+ again:
+@@ -1529,6 +1551,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+ unsigned long id;
+ unsigned int op;
+
++ eoiflag = 0;
++
+ RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
+ id = bret.id;
+
+@@ -1561,12 +1585,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+ }
+
+ if (bret.operation != BLKIF_OP_DISCARD) {
++ int ret;
++
+ /*
+ * We may need to wait for an extra response if the
+ * I/O request is split in 2
+ */
+- if (!blkif_completion(&id, rinfo, &bret))
++ ret = blkif_completion(&id, rinfo, &bret);
++ if (!ret)
+ continue;
++ if (unlikely(ret < 0))
++ goto err;
+ }
+
+ if (add_id_to_freelist(rinfo, id)) {
+@@ -1645,6 +1674,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+
++ xen_irq_lateeoi(irq, eoiflag);
++
+ return IRQ_HANDLED;
+
+ err:
+@@ -1652,6 +1683,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+
++ /* No EOI in order to avoid further interrupts. */
++
+ pr_alert("%s disabled for further use\n", info->gd->disk_name);
+ return IRQ_HANDLED;
+ }
+@@ -1669,8 +1702,7 @@ static int setup_blkring(struct xenbus_device *dev,
+ for (i = 0; i < info->nr_ring_pages; i++)
+ rinfo->ring_ref[i] = GRANT_INVALID_REF;
+
+- sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+- get_order(ring_size));
++ sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO);
+ if (!sring) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+ return -ENOMEM;
+@@ -1680,7 +1712,7 @@ static int setup_blkring(struct xenbus_device *dev,
+
+ err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
+ if (err < 0) {
+- free_pages((unsigned long)sring, get_order(ring_size));
++ free_pages_exact(sring, ring_size);
+ rinfo->ring.sring = NULL;
+ goto fail;
+ }
+@@ -1691,8 +1723,8 @@ static int setup_blkring(struct xenbus_device *dev,
+ if (err)
+ goto fail;
+
+- err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
+- "blkif", rinfo);
++ err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt,
++ 0, "blkif", rinfo);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err,
+ "bind_evtchn_to_irqhandler failed");
+@@ -1754,6 +1786,12 @@ abort_transaction:
+ return err;
+ }
+
++/* Enable the persistent grants feature. */
++static bool feature_persistent = true;
++module_param(feature_persistent, bool, 0644);
++MODULE_PARM_DESC(feature_persistent,
++ "Enables the persistent grants feature");
++
+ /* Common code used when first setting up, and when resuming. */
+ static int talk_to_blkback(struct xenbus_device *dev,
+ struct blkfront_info *info)
+@@ -1768,6 +1806,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
+ if (!info)
+ return -ENODEV;
+
++ /* Check if backend is trusted. */
++ info->bounce = !xen_blkif_trusted ||
++ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
++
+ max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
+ "max-ring-page-order", 0);
+ ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+@@ -1841,8 +1883,9 @@ again:
+ message = "writing protocol";
+ goto abort_transaction;
+ }
++ info->feature_persistent_parm = feature_persistent;
+ err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
+- info->feature_persistent);
++ info->feature_persistent_parm);
+ if (err)
+ dev_warn(&dev->dev,
+ "writing persistent grants feature to xenbus");
+@@ -1910,12 +1953,6 @@ static int negotiate_mq(struct blkfront_info *info)
+ return 0;
+ }
+
+-/* Enable the persistent grants feature. */
+-static bool feature_persistent = true;
+-module_param(feature_persistent, bool, 0644);
+-MODULE_PARM_DESC(feature_persistent,
+- "Enables the persistent grants feature");
+-
+ /*
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffer for communication with the backend, and
+@@ -1982,8 +2019,6 @@ static int blkfront_probe(struct xenbus_device *dev,
+ info->vdevice = vdevice;
+ info->connected = BLKIF_STATE_DISCONNECTED;
+
+- info->feature_persistent = feature_persistent;
+-
+ /* Front end dir is a number, which is used as the id. */
+ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
+ dev_set_drvdata(&dev->dev, info);
+@@ -2118,9 +2153,11 @@ static void blkfront_closing(struct blkfront_info *info)
+ return;
+
+ /* No more blkif_request(). */
+- blk_mq_stop_hw_queues(info->rq);
+- blk_set_queue_dying(info->rq);
+- set_capacity(info->gd, 0);
++ if (info->rq && info->gd) {
++ blk_mq_stop_hw_queues(info->rq);
++ blk_mark_disk_dead(info->gd);
++ set_capacity(info->gd, 0);
++ }
+
+ for_each_rinfo(info, rinfo, i) {
+ /* No more gnttab callback work. */
+@@ -2175,17 +2212,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
+ if (err)
+ goto out_of_memory;
+
+- if (!info->feature_persistent && info->max_indirect_segments) {
++ if (!info->bounce && info->max_indirect_segments) {
+ /*
+- * We are using indirect descriptors but not persistent
+- * grants, we need to allocate a set of pages that can be
++ * We are using indirect descriptors but don't have a bounce
++ * buffer, we need to allocate a set of pages that can be
+ * used for mapping indirect grefs
+ */
+ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
+
+ BUG_ON(!list_empty(&rinfo->indirect_pages));
+ for (i = 0; i < num; i++) {
+- struct page *indirect_page = alloc_page(GFP_KERNEL);
++ struct page *indirect_page = alloc_page(GFP_KERNEL |
++ __GFP_ZERO);
+ if (!indirect_page)
+ goto out_of_memory;
+ list_add(&indirect_page->lru, &rinfo->indirect_pages);
+@@ -2274,10 +2312,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
+ if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0))
+ blkfront_setup_discard(info);
+
+- if (info->feature_persistent)
++ if (info->feature_persistent_parm)
+ info->feature_persistent =
+ !!xenbus_read_unsigned(info->xbdev->otherend,
+ "feature-persistent", 0);
++ if (info->feature_persistent)
++ info->bounce = true;
+
+ indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
+ "feature-max-indirect-segments", 0);
+@@ -2456,16 +2496,19 @@ static int blkfront_remove(struct xenbus_device *xbdev)
+
+ dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
+
+- del_gendisk(info->gd);
++ if (info->gd)
++ del_gendisk(info->gd);
+
+ mutex_lock(&blkfront_mutex);
+ list_del(&info->info_list);
+ mutex_unlock(&blkfront_mutex);
+
+ blkif_free(info, 0);
+- xlbd_release_minors(info->gd->first_minor, info->gd->minors);
+- blk_cleanup_disk(info->gd);
+- blk_mq_free_tag_set(&info->tag_set);
++ if (info->gd) {
++ xlbd_release_minors(info->gd->first_minor, info->gd->minors);
++ blk_cleanup_disk(info->gd);
++ blk_mq_free_tag_set(&info->tag_set);
++ }
+
+ kfree(info);
+ return 0;
+@@ -2520,11 +2563,10 @@ static void purge_persistent_grants(struct blkfront_info *info)
+ list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
+ node) {
+ if (gnt_list_entry->gref == GRANT_INVALID_REF ||
+- gnttab_query_foreign_access(gnt_list_entry->gref))
++ !gnttab_try_end_foreign_access(gnt_list_entry->gref))
+ continue;
+
+ list_del(&gnt_list_entry->node);
+- gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+ rinfo->persistent_gnts_c--;
+ gnt_list_entry->gref = GRANT_INVALID_REF;
+ list_add_tail(&gnt_list_entry->node, &rinfo->grants);
+@@ -2539,6 +2581,13 @@ static void blkfront_delay_work(struct work_struct *work)
+ struct blkfront_info *info;
+ bool need_schedule_work = false;
+
++ /*
++ * Note that when using bounce buffers but not persistent grants
++ * there's no need to run blkfront_delay_work because grants are
++ * revoked in blkif_completion or else an error is reported and the
++ * connection is closed.
++ */
++
+ mutex_lock(&blkfront_mutex);
+
+ list_for_each_entry(info, &info_list, info_list) {
+diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
+index 052aa3f65514e..0916de952e091 100644
+--- a/drivers/block/zram/zcomp.c
++++ b/drivers/block/zram/zcomp.c
+@@ -63,12 +63,6 @@ static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp)
+
+ bool zcomp_available_algorithm(const char *comp)
+ {
+- int i;
+-
+- i = sysfs_match_string(backends, comp);
+- if (i >= 0)
+- return true;
+-
+ /*
+ * Crypto does not ignore a trailing new line symbol,
+ * so make sure you don't supply a string containing
+@@ -217,6 +211,11 @@ struct zcomp *zcomp_create(const char *compress)
+ struct zcomp *comp;
+ int error;
+
++ /*
++ * Crypto API will execute /sbin/modprobe if the compression module
++ * is not loaded yet. We must do it here, otherwise we are about to
++ * call /sbin/modprobe under CPU hot-plug lock.
++ */
+ if (!zcomp_available_algorithm(compress))
+ return ERR_PTR(-EINVAL);
+
+diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
+index fcaf2750f68f7..6383c81ac5b37 100644
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -910,7 +910,7 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
+ zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
+ zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
+
+- if (count < copied) {
++ if (count <= copied) {
+ zram_slot_unlock(zram, index);
+ break;
+ }
+diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
+index 5a321b4076aab..cab93935cc7f1 100644
+--- a/drivers/bluetooth/bfusb.c
++++ b/drivers/bluetooth/bfusb.c
+@@ -628,6 +628,9 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i
+ data->bulk_out_ep = bulk_out_ep->desc.bEndpointAddress;
+ data->bulk_pkt_size = le16_to_cpu(bulk_out_ep->desc.wMaxPacketSize);
+
++ if (!data->bulk_pkt_size)
++ goto done;
++
+ rwlock_init(&data->lock);
+
+ data->reassembly = NULL;
+diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
+index e4182acee488c..f228cdbccaee3 100644
+--- a/drivers/bluetooth/btbcm.c
++++ b/drivers/bluetooth/btbcm.c
+@@ -6,8 +6,10 @@
+ * Copyright (C) 2015 Intel Corporation
+ */
+
++#include <linux/efi.h>
+ #include <linux/module.h>
+ #include <linux/firmware.h>
++#include <linux/dmi.h>
+ #include <asm/unaligned.h>
+
+ #include <net/bluetooth/bluetooth.h>
+@@ -32,6 +34,43 @@
+ /* For kmalloc-ing the fw-name array instead of putting it on the stack */
+ typedef char bcm_fw_name[BCM_FW_NAME_LEN];
+
++#ifdef CONFIG_EFI
++static int btbcm_set_bdaddr_from_efi(struct hci_dev *hdev)
++{
++ efi_guid_t guid = EFI_GUID(0x74b00bd9, 0x805a, 0x4d61, 0xb5, 0x1f,
++ 0x43, 0x26, 0x81, 0x23, 0xd1, 0x13);
++ bdaddr_t efi_bdaddr, bdaddr;
++ efi_status_t status;
++ unsigned long len;
++ int ret;
++
++ if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
++ return -EOPNOTSUPP;
++
++ len = sizeof(efi_bdaddr);
++ status = efi.get_variable(L"BDADDR", &guid, NULL, &len, &efi_bdaddr);
++ if (status != EFI_SUCCESS)
++ return -ENXIO;
++
++ if (len != sizeof(efi_bdaddr))
++ return -EIO;
++
++ baswap(&bdaddr, &efi_bdaddr);
++
++ ret = btbcm_set_bdaddr(hdev, &bdaddr);
++ if (ret)
++ return ret;
++
++ bt_dev_info(hdev, "BCM: Using EFI device address (%pMR)", &bdaddr);
++ return 0;
++}
++#else
++static int btbcm_set_bdaddr_from_efi(struct hci_dev *hdev)
++{
++ return -EOPNOTSUPP;
++}
++#endif
++
+ int btbcm_check_bdaddr(struct hci_dev *hdev)
+ {
+ struct hci_rp_read_bd_addr *bda;
+@@ -85,9 +124,12 @@ int btbcm_check_bdaddr(struct hci_dev *hdev)
+ !bacmp(&bda->bdaddr, BDADDR_BCM4345C5) ||
+ !bacmp(&bda->bdaddr, BDADDR_BCM43430A0) ||
+ !bacmp(&bda->bdaddr, BDADDR_BCM43341B)) {
+- bt_dev_info(hdev, "BCM: Using default device address (%pMR)",
+- &bda->bdaddr);
+- set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
++ /* Try falling back to BDADDR EFI variable */
++ if (btbcm_set_bdaddr_from_efi(hdev) != 0) {
++ bt_dev_info(hdev, "BCM: Using default device address (%pMR)",
++ &bda->bdaddr);
++ set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
++ }
+ }
+
+ kfree_skb(skb);
+@@ -343,6 +385,52 @@ static struct sk_buff *btbcm_read_usb_product(struct hci_dev *hdev)
+ return skb;
+ }
+
++static const struct dmi_system_id disable_broken_read_transmit_power[] = {
++ {
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro16,1"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro16,2"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro16,4"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir8,1"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir8,2"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "iMac20,1"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "iMac20,2"),
++ },
++ },
++ { }
++};
++
+ static int btbcm_read_info(struct hci_dev *hdev)
+ {
+ struct sk_buff *skb;
+@@ -363,6 +451,10 @@ static int btbcm_read_info(struct hci_dev *hdev)
+ bt_dev_info(hdev, "BCM: features 0x%2.2x", skb->data[1]);
+ kfree_skb(skb);
+
++ /* Read DMI and disable broken Read LE Min/Max Tx Power */
++ if (dmi_first_match(disable_broken_read_transmit_power))
++ set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks);
++
+ return 0;
+ }
+
+@@ -402,6 +494,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
+ { 0x6606, "BCM4345C5" }, /* 003.006.006 */
+ { 0x230f, "BCM4356A2" }, /* 001.003.015 */
+ { 0x220e, "BCM20702A1" }, /* 001.002.014 */
++ { 0x420d, "BCM4349B1" }, /* 002.002.013 */
++ { 0x420e, "BCM4349B1" }, /* 002.002.014 */
+ { 0x4217, "BCM4329B1" }, /* 002.002.023 */
+ { 0x6106, "BCM4359C0" }, /* 003.001.006 */
+ { 0x4106, "BCM4335A0" }, /* 002.001.006 */
+diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
+index f1705b46fc889..2a4cc5d8c2d40 100644
+--- a/drivers/bluetooth/btintel.c
++++ b/drivers/bluetooth/btintel.c
+@@ -2193,8 +2193,15 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+ * As a workaround, send HCI Reset command first which will reset the
+ * number of completed commands and allow normal command processing
+ * from now on.
++ *
++ * Regarding the INTEL_BROKEN_SHUTDOWN_LED flag, these devices maybe
++ * in the SW_RFKILL ON state as a workaround of fixing LED issue during
++ * the shutdown() procedure, and once the device is in SW_RFKILL ON
++ * state, the only way to exit out of it is sending the HCI_Reset
++ * command.
+ */
+- if (btintel_test_flag(hdev, INTEL_BROKEN_INITIAL_NCMD)) {
++ if (btintel_test_flag(hdev, INTEL_BROKEN_INITIAL_NCMD) ||
++ btintel_test_flag(hdev, INTEL_BROKEN_SHUTDOWN_LED)) {
+ skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL,
+ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+@@ -2256,27 +2263,31 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+
+ /* Apply the device specific HCI quirks
+ *
+- * WBS for SdP - SdP and Stp have a same hw_varaint but
+- * different fw_variant
++ * WBS for SdP - For the Legacy ROM products, only SdP
++ * supports the WBS. But the version information is not
++ * enough to use here because the StP2 and SdP have same
++ * hw_variant and fw_variant. So, this flag is set by
++ * the transport driver (btusb) based on the HW info
++ * (idProduct)
+ */
+- if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22)
++ if (!btintel_test_flag(hdev,
++ INTEL_ROM_LEGACY_NO_WBS_SUPPORT))
+ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+ &hdev->quirks);
+-
+- /* These devices have an issue with LED which doesn't
+- * go off immediately during shutdown. Set the flag
+- * here to send the LED OFF command during shutdown.
+- */
+- btintel_set_flag(hdev, INTEL_BROKEN_LED);
++ if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22)
++ set_bit(HCI_QUIRK_VALID_LE_STATES,
++ &hdev->quirks);
+
+ err = btintel_legacy_rom_setup(hdev, &ver);
+ break;
+ case 0x0b: /* SfP */
+- case 0x0c: /* WsP */
+ case 0x11: /* JfP */
+ case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* CcP */
++ set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
++ fallthrough;
++ case 0x0c: /* WsP */
+ /* Apply the device specific HCI quirks
+ *
+ * All Legacy bootloader devices support WBS
+@@ -2284,11 +2295,6 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+ &hdev->quirks);
+
+- /* Valid LE States quirk for JfP/ThP familiy */
+- if (ver.hw_variant == 0x11 || ver.hw_variant == 0x12)
+- set_bit(HCI_QUIRK_VALID_LE_STATES,
+- &hdev->quirks);
+-
+ /* Setup MSFT Extension support */
+ btintel_set_msft_opcode(hdev, ver.hw_variant);
+
+@@ -2329,10 +2335,14 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+ case 0x12: /* ThP */
+ case 0x13: /* HrP */
+ case 0x14: /* CcP */
+- /* Some legacy bootloader devices from JfP supports both old
+- * and TLV based HCI_Intel_Read_Version command. But we don't
+- * want to use the TLV based setup routines for those legacy
+- * bootloader device.
++ /* Some legacy bootloader devices starting from JfP,
++ * the operational firmware supports both old and TLV based
++ * HCI_Intel_Read_Version command based on the command
++ * parameter.
++ *
++ * For upgrading firmware case, the TLV based version cannot
++ * be used because the firmware filename for legacy bootloader
++ * is based on the old format.
+ *
+ * Also, it is not easy to convert TLV based version from the
+ * legacy version format.
+@@ -2343,7 +2353,20 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+ */
+ err = btintel_read_version(hdev, &ver);
+ if (err)
+- return err;
++ break;
++
++ /* Apply the device specific HCI quirks
++ *
++ * All Legacy bootloader devices support WBS
++ */
++ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
++
++ /* Set Valid LE States quirk */
++ set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
++
++ /* Setup MSFT Extension support */
++ btintel_set_msft_opcode(hdev, ver.hw_variant);
++
+ err = btintel_bootloader_setup(hdev, &ver);
+ break;
+ case 0x17:
+@@ -2358,9 +2381,8 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+ */
+ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+
+- /* Valid LE States quirk for GfP */
+- if (INTEL_HW_VARIANT(ver_tlv.cnvi_bt) == 0x18)
+- set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
++ /* Apply LE States quirk from solar onwards */
++ set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
+
+ /* Setup MSFT Extension support */
+ btintel_set_msft_opcode(hdev,
+@@ -2371,7 +2393,8 @@ static int btintel_setup_combined(struct hci_dev *hdev)
+ default:
+ bt_dev_err(hdev, "Unsupported Intel hw variant (%u)",
+ INTEL_HW_VARIANT(ver_tlv.cnvi_bt));
+- return -EINVAL;
++ err = -EINVAL;
++ break;
+ }
+
+ exit_error:
+@@ -2399,9 +2422,10 @@ static int btintel_shutdown_combined(struct hci_dev *hdev)
+
+ /* Some platforms have an issue with BT LED when the interface is
+ * down or BT radio is turned off, which takes 5 seconds to BT LED
+- * goes off. This command turns off the BT LED immediately.
++ * goes off. As a workaround, sends HCI_Intel_SW_RFKILL to put the
++ * device in the RFKILL ON state which turns off the BT LED immediately.
+ */
+- if (btintel_test_flag(hdev, INTEL_BROKEN_LED)) {
++ if (btintel_test_flag(hdev, INTEL_BROKEN_SHUTDOWN_LED)) {
+ skb = __hci_cmd_sync(hdev, 0xfc3f, 0, NULL, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ ret = PTR_ERR(skb);
+diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
+index aa64072bbe68d..2b85ebf633211 100644
+--- a/drivers/bluetooth/btintel.h
++++ b/drivers/bluetooth/btintel.h
+@@ -145,8 +145,9 @@ enum {
+ INTEL_FIRMWARE_FAILED,
+ INTEL_BOOTING,
+ INTEL_BROKEN_INITIAL_NCMD,
+- INTEL_BROKEN_LED,
++ INTEL_BROKEN_SHUTDOWN_LED,
+ INTEL_ROM_LEGACY,
++ INTEL_ROM_LEGACY_NO_WBS_SUPPORT,
+
+ __INTEL_NUM_FLAGS,
+ };
+diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c
+index 9872ef18f9fea..d66e4df171d20 100644
+--- a/drivers/bluetooth/btmtksdio.c
++++ b/drivers/bluetooth/btmtksdio.c
+@@ -331,6 +331,7 @@ static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
+ {
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+ struct hci_event_hdr *hdr = (void *)skb->data;
++ u8 evt = hdr->evt;
+ int err;
+
+ /* Fix up the vendor event id with 0xff for vendor specific instead
+@@ -355,7 +356,7 @@ static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
+ if (err < 0)
+ goto err_free_skb;
+
+- if (hdr->evt == HCI_EV_VENDOR) {
++ if (evt == HCI_EV_VENDOR) {
+ if (test_and_clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT,
+ &bdev->tx_state)) {
+ /* Barrier to sync with other CPUs */
+@@ -981,6 +982,8 @@ static int btmtksdio_probe(struct sdio_func *func,
+ hdev->manufacturer = 70;
+ set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+
++ sdio_set_drvdata(func, bdev);
++
+ err = hci_register_dev(hdev);
+ if (err < 0) {
+ dev_err(&func->dev, "Can't register HCI device\n");
+@@ -988,8 +991,6 @@ static int btmtksdio_probe(struct sdio_func *func,
+ return err;
+ }
+
+- sdio_set_drvdata(func, bdev);
+-
+ /* pm_runtime_enable would be done after the firmware is being
+ * downloaded because the core layer probably already enables
+ * runtime PM for this func such as the case host->caps &
+@@ -1042,6 +1043,8 @@ static int btmtksdio_runtime_suspend(struct device *dev)
+ if (!bdev)
+ return 0;
+
++ sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER);
++
+ sdio_claim_host(bdev->func);
+
+ sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err);
+diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c
+index e9d91d7c0db48..9ba22b13b4fa0 100644
+--- a/drivers/bluetooth/btmtkuart.c
++++ b/drivers/bluetooth/btmtkuart.c
+@@ -158,8 +158,10 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev,
+ int err;
+
+ hlen = sizeof(*hdr) + wmt_params->dlen;
+- if (hlen > 255)
+- return -EINVAL;
++ if (hlen > 255) {
++ err = -EINVAL;
++ goto err_free_skb;
++ }
+
+ hdr = (struct mtk_wmt_hdr *)&wc;
+ hdr->dir = 1;
+@@ -173,7 +175,7 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev,
+ err = __hci_cmd_send(hdev, 0xfc6f, hlen, &wc);
+ if (err < 0) {
+ clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state);
+- return err;
++ goto err_free_skb;
+ }
+
+ /* The vendor specific WMT commands are all answered by a vendor
+@@ -190,13 +192,14 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev,
+ if (err == -EINTR) {
+ bt_dev_err(hdev, "Execution of wmt command interrupted");
+ clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state);
+- return err;
++ goto err_free_skb;
+ }
+
+ if (err) {
+ bt_dev_err(hdev, "Execution of wmt command timed out");
+ clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state);
+- return -ETIMEDOUT;
++ err = -ETIMEDOUT;
++ goto err_free_skb;
+ }
+
+ /* Parse and handle the return WMT event */
+diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c
+index 2acb719e596f5..11c7e04bf3947 100644
+--- a/drivers/bluetooth/btqcomsmd.c
++++ b/drivers/bluetooth/btqcomsmd.c
+@@ -122,6 +122,21 @@ static int btqcomsmd_setup(struct hci_dev *hdev)
+ return 0;
+ }
+
++static int btqcomsmd_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
++{
++ int ret;
++
++ ret = qca_set_bdaddr_rome(hdev, bdaddr);
++ if (ret)
++ return ret;
++
++ /* The firmware stops responding for a while after setting the bdaddr,
++ * causing timeouts for subsequent commands. Sleep a bit to avoid this.
++ */
++ usleep_range(1000, 10000);
++ return 0;
++}
++
+ static int btqcomsmd_probe(struct platform_device *pdev)
+ {
+ struct btqcomsmd *btq;
+@@ -162,7 +177,7 @@ static int btqcomsmd_probe(struct platform_device *pdev)
+ hdev->close = btqcomsmd_close;
+ hdev->send = btqcomsmd_send;
+ hdev->setup = btqcomsmd_setup;
+- hdev->set_bdaddr = qca_set_bdaddr_rome;
++ hdev->set_bdaddr = btqcomsmd_set_bdaddr;
+
+ ret = hci_register_dev(hdev);
+ if (ret < 0)
+diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
+index 199e8f7d426d9..2e4ac39dd9751 100644
+--- a/drivers/bluetooth/btsdio.c
++++ b/drivers/bluetooth/btsdio.c
+@@ -355,6 +355,7 @@ static void btsdio_remove(struct sdio_func *func)
+ if (!data)
+ return;
+
++ cancel_work_sync(&data->work);
+ hdev = data->hdev;
+
+ sdio_set_drvdata(func, NULL);
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
+index 60d2fce59a71d..84a42348b3bcb 100644
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -59,7 +59,9 @@ static struct usb_driver btusb_driver;
+ #define BTUSB_WIDEBAND_SPEECH 0x400000
+ #define BTUSB_VALID_LE_STATES 0x800000
+ #define BTUSB_QCA_WCN6855 0x1000000
++#define BTUSB_INTEL_BROKEN_SHUTDOWN_LED 0x2000000
+ #define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000
++#define BTUSB_INTEL_NO_WBS_SUPPORT 0x8000000
+
+ static const struct usb_device_id btusb_table[] = {
+ /* Generic Bluetooth USB device */
+@@ -295,6 +297,24 @@ static const struct usb_device_id blacklist_table[] = {
+ { USB_DEVICE(0x0cf3, 0xe600), .driver_info = BTUSB_QCA_WCN6855 |
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x0489, 0xe0cc), .driver_info = BTUSB_QCA_WCN6855 |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x0489, 0xe0d6), .driver_info = BTUSB_QCA_WCN6855 |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x0489, 0xe0e3), .driver_info = BTUSB_QCA_WCN6855 |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x10ab, 0x9309), .driver_info = BTUSB_QCA_WCN6855 |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x10ab, 0x9409), .driver_info = BTUSB_QCA_WCN6855 |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x0489, 0xe0d0), .driver_info = BTUSB_QCA_WCN6855 |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
+
+ /* Broadcom BCM2035 */
+ { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
+@@ -365,16 +385,25 @@ static const struct usb_device_id blacklist_table[] = {
+ { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED },
+ { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
+ { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED |
+- BTUSB_INTEL_BROKEN_INITIAL_NCMD },
+- { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED },
++ BTUSB_INTEL_NO_WBS_SUPPORT |
++ BTUSB_INTEL_BROKEN_INITIAL_NCMD |
++ BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
++ { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED |
++ BTUSB_INTEL_NO_WBS_SUPPORT |
++ BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
+ { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED },
+- { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED },
++ { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED |
++ BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
+ { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_COMBINED },
+
+ /* Other Intel Bluetooth devices */
+ { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
+ .driver_info = BTUSB_IGNORE },
+
++ /* Realtek 8821CE Bluetooth devices */
++ { USB_DEVICE(0x13d3, 0x3529), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++
+ /* Realtek 8822CE Bluetooth devices */
+ { USB_DEVICE(0x0bda, 0xb00c), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
+@@ -382,8 +411,30 @@ static const struct usb_device_id blacklist_table[] = {
+ BTUSB_WIDEBAND_SPEECH },
+
+ /* Realtek 8852AE Bluetooth devices */
++ { USB_DEVICE(0x0bda, 0x2852), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
+ { USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x0bda, 0x385a), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x0bda, 0x4852), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x04c5, 0x165c), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++
++ /* Realtek 8852CE Bluetooth devices */
++ { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
+
+ /* Realtek Bluetooth devices */
+ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),
+@@ -410,10 +461,27 @@ static const struct usb_device_id blacklist_table[] = {
+ { USB_DEVICE(0x13d3, 0x3563), .driver_info = BTUSB_MEDIATEK |
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x13d3, 0x3564), .driver_info = BTUSB_MEDIATEK |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
+ { USB_DEVICE(0x0489, 0xe0cd), .driver_info = BTUSB_MEDIATEK |
+ BTUSB_WIDEBAND_SPEECH |
+ BTUSB_VALID_LE_STATES },
+
++ /* MediaTek MT7922A Bluetooth devices */
++ { USB_DEVICE(0x0489, 0xe0d8), .driver_info = BTUSB_MEDIATEK |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x0489, 0xe0f5), .driver_info = BTUSB_MEDIATEK |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++ { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK |
++ BTUSB_WIDEBAND_SPEECH |
++ BTUSB_VALID_LE_STATES },
++
+ /* Additional Realtek 8723AE Bluetooth devices */
+ { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
+ { USB_DEVICE(0x13d3, 0x3394), .driver_info = BTUSB_REALTEK },
+@@ -433,9 +501,15 @@ static const struct usb_device_id blacklist_table[] = {
+ { USB_DEVICE(0x0bda, 0xb009), .driver_info = BTUSB_REALTEK },
+ { USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK },
+
++ /* Additional Realtek 8761B Bluetooth devices */
++ { USB_DEVICE(0x2357, 0x0604), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
++
+ /* Additional Realtek 8761BU Bluetooth devices */
+ { USB_DEVICE(0x0b05, 0x190e), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
++ { USB_DEVICE(0x2550, 0x8761), .driver_info = BTUSB_REALTEK |
++ BTUSB_WIDEBAND_SPEECH },
+
+ /* Additional Realtek 8821AE Bluetooth devices */
+ { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
+@@ -451,10 +525,6 @@ static const struct usb_device_id blacklist_table[] = {
+ /* Additional Realtek 8822CE Bluetooth devices */
+ { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
+- /* Bluetooth component of Realtek 8852AE device */
+- { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
+- BTUSB_WIDEBAND_SPEECH },
+-
+ { USB_DEVICE(0x04c5, 0x161f), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
+ { USB_DEVICE(0x0b05, 0x18ef), .driver_info = BTUSB_REALTEK |
+@@ -672,13 +742,13 @@ static inline void btusb_free_frags(struct btusb_data *data)
+
+ spin_lock_irqsave(&data->rxlock, flags);
+
+- kfree_skb(data->evt_skb);
++ dev_kfree_skb_irq(data->evt_skb);
+ data->evt_skb = NULL;
+
+- kfree_skb(data->acl_skb);
++ dev_kfree_skb_irq(data->acl_skb);
+ data->acl_skb = NULL;
+
+- kfree_skb(data->sco_skb);
++ dev_kfree_skb_irq(data->sco_skb);
+ data->sco_skb = NULL;
+
+ spin_unlock_irqrestore(&data->rxlock, flags);
+@@ -1686,7 +1756,7 @@ static int btusb_switch_alt_setting(struct hci_dev *hdev, int new_alts)
+ * alternate setting.
+ */
+ spin_lock_irqsave(&data->rxlock, flags);
+- kfree_skb(data->sco_skb);
++ dev_kfree_skb_irq(data->sco_skb);
+ data->sco_skb = NULL;
+ spin_unlock_irqrestore(&data->rxlock, flags);
+
+@@ -1838,6 +1908,11 @@ static int btusb_setup_csr(struct hci_dev *hdev)
+
+ rp = (struct hci_rp_read_local_version *)skb->data;
+
++ bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u",
++ le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev),
++ le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver),
++ le16_to_cpu(rp->manufacturer));
++
+ /* Detect a wide host of Chinese controllers that aren't CSR.
+ *
+ * Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891
+@@ -2217,6 +2292,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb)
+ skb = bt_skb_alloc(HCI_WMT_MAX_EVENT_SIZE, GFP_ATOMIC);
+ if (!skb) {
+ hdev->stat.err_rx++;
++ kfree(urb->setup_packet);
+ return;
+ }
+
+@@ -2237,6 +2313,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb)
+ data->evt_skb = skb_clone(skb, GFP_ATOMIC);
+ if (!data->evt_skb) {
+ kfree_skb(skb);
++ kfree(urb->setup_packet);
+ return;
+ }
+ }
+@@ -2245,6 +2322,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb)
+ if (err < 0) {
+ kfree_skb(data->evt_skb);
+ data->evt_skb = NULL;
++ kfree(urb->setup_packet);
+ return;
+ }
+
+@@ -2255,6 +2333,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb)
+ wake_up_bit(&data->flags,
+ BTUSB_TX_WAIT_VND_EVT);
+ }
++ kfree(urb->setup_packet);
+ return;
+ } else if (urb->status == -ENOENT) {
+ /* Avoid suspend failed when usb_kill_urb */
+@@ -2275,6 +2354,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb)
+ usb_anchor_urb(urb, &data->ctrl_anchor);
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
++ kfree(urb->setup_packet);
+ /* -EPERM: urb is being killed;
+ * -ENODEV: device got disconnected
+ */
+@@ -2367,15 +2447,29 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev,
+
+ set_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags);
+
++ /* WMT cmd/event doesn't follow up the generic HCI cmd/event handling,
++ * it needs constantly polling control pipe until the host received the
++ * WMT event, thus, we should require to specifically acquire PM counter
++ * on the USB to prevent the interface from entering auto suspended
++ * while WMT cmd/event in progress.
++ */
++ err = usb_autopm_get_interface(data->intf);
++ if (err < 0)
++ goto err_free_wc;
++
+ err = __hci_cmd_send(hdev, 0xfc6f, hlen, wc);
+
+ if (err < 0) {
+ clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags);
++ usb_autopm_put_interface(data->intf);
+ goto err_free_wc;
+ }
+
+ /* Submit control IN URB on demand to process the WMT event */
+ err = btusb_mtk_submit_wmt_recv_urb(hdev);
++
++ usb_autopm_put_interface(data->intf);
++
+ if (err < 0)
+ goto err_free_wc;
+
+@@ -2515,6 +2609,7 @@ static int btusb_mtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwnam
+ } else {
+ bt_dev_err(hdev, "Failed wmt patch dwnld status (%d)",
+ status);
++ err = -EIO;
+ goto err_release_fw;
+ }
+ }
+@@ -2804,11 +2899,16 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
+ case 0x7668:
+ fwname = FIRMWARE_MT7668;
+ break;
++ case 0x7922:
+ case 0x7961:
+ snprintf(fw_bin_name, sizeof(fw_bin_name),
+ "mediatek/BT_RAM_CODE_MT%04x_1_%x_hdr.bin",
+ dev_id & 0xffff, (fw_version & 0xff) + 1);
+ err = btusb_mtk_setup_firmware_79xx(hdev, fw_bin_name);
++ if (err < 0) {
++ bt_dev_err(hdev, "Failed to set up firmware (%d)", err);
++ return err;
++ }
+
+ /* It's Device EndPoint Reset Option Register */
+ btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT);
+@@ -2828,6 +2928,7 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
+ }
+
+ hci_set_msft_opcode(hdev, 0xFD30);
++ hci_set_aosp_capable(hdev);
+ goto done;
+ default:
+ bt_dev_err(hdev, "Unsupported hardware variant (%08x)",
+@@ -3806,8 +3907,14 @@ static int btusb_probe(struct usb_interface *intf,
+ hdev->send = btusb_send_frame_intel;
+ hdev->cmd_timeout = btusb_intel_cmd_timeout;
+
++ if (id->driver_info & BTUSB_INTEL_NO_WBS_SUPPORT)
++ btintel_set_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT);
++
+ if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD)
+ btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD);
++
++ if (id->driver_info & BTUSB_INTEL_BROKEN_SHUTDOWN_LED)
++ btintel_set_flag(hdev, INTEL_BROKEN_SHUTDOWN_LED);
+ }
+
+ if (id->driver_info & BTUSB_MARVELL)
+diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
+index ef54afa293574..cf622e4596055 100644
+--- a/drivers/bluetooth/hci_bcm.c
++++ b/drivers/bluetooth/hci_bcm.c
+@@ -1188,7 +1188,12 @@ static int bcm_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ dev->dev = &pdev->dev;
+- dev->irq = platform_get_irq(pdev, 0);
++
++ ret = platform_get_irq(pdev, 0);
++ if (ret < 0)
++ return ret;
++
++ dev->irq = ret;
+
+ /* Initialize routing field to an unused value */
+ dev->pcm_int_params[0] = 0xff;
+@@ -1510,8 +1515,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = {
+ { .compatible = "brcm,bcm4345c5" },
+ { .compatible = "brcm,bcm4330-bt" },
+ { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data },
++ { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data },
+ { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data },
+ { .compatible = "brcm,bcm4335a0" },
++ { .compatible = "infineon,cyw55572-bt" },
+ { },
+ };
+ MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match);
+diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
+index cf4a560958173..8055f63603f45 100644
+--- a/drivers/bluetooth/hci_bcsp.c
++++ b/drivers/bluetooth/hci_bcsp.c
+@@ -378,7 +378,7 @@ static void bcsp_pkt_cull(struct bcsp_struct *bcsp)
+ i++;
+
+ __skb_unlink(skb, &bcsp->unack);
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ }
+
+ if (skb_queue_empty(&bcsp->unack))
+diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
+index 0c0dedece59c5..1363b21c81b73 100644
+--- a/drivers/bluetooth/hci_h5.c
++++ b/drivers/bluetooth/hci_h5.c
+@@ -313,7 +313,7 @@ static void h5_pkt_cull(struct h5 *h5)
+ break;
+
+ __skb_unlink(skb, &h5->unack);
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ }
+
+ if (skb_queue_empty(&h5->unack))
+@@ -587,9 +587,11 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count)
+ count -= processed;
+ }
+
+- pm_runtime_get(&hu->serdev->dev);
+- pm_runtime_mark_last_busy(&hu->serdev->dev);
+- pm_runtime_put_autosuspend(&hu->serdev->dev);
++ if (hu->serdev) {
++ pm_runtime_get(&hu->serdev->dev);
++ pm_runtime_mark_last_busy(&hu->serdev->dev);
++ pm_runtime_put_autosuspend(&hu->serdev->dev);
++ }
+
+ return 0;
+ }
+@@ -627,9 +629,11 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb)
+ break;
+ }
+
+- pm_runtime_get_sync(&hu->serdev->dev);
+- pm_runtime_mark_last_busy(&hu->serdev->dev);
+- pm_runtime_put_autosuspend(&hu->serdev->dev);
++ if (hu->serdev) {
++ pm_runtime_get_sync(&hu->serdev->dev);
++ pm_runtime_mark_last_busy(&hu->serdev->dev);
++ pm_runtime_put_autosuspend(&hu->serdev->dev);
++ }
+
+ return 0;
+ }
+@@ -846,6 +850,8 @@ static int h5_serdev_probe(struct serdev_device *serdev)
+ h5->vnd = data->vnd;
+ }
+
++ if (data->driver_info & H5_INFO_WAKEUP_DISABLE)
++ set_bit(H5_WAKEUP_DISABLE, &h5->flags);
+
+ h5->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW);
+ if (IS_ERR(h5->enable_gpio))
+@@ -860,9 +866,6 @@ static int h5_serdev_probe(struct serdev_device *serdev)
+ if (err)
+ return err;
+
+- if (data->driver_info & H5_INFO_WAKEUP_DISABLE)
+- set_bit(H5_WAKEUP_DISABLE, &h5->flags);
+-
+ return 0;
+ }
+
+@@ -962,11 +965,13 @@ static void h5_btrtl_open(struct h5 *h5)
+ serdev_device_set_parity(h5->hu->serdev, SERDEV_PARITY_EVEN);
+ serdev_device_set_baudrate(h5->hu->serdev, 115200);
+
+- pm_runtime_set_active(&h5->hu->serdev->dev);
+- pm_runtime_use_autosuspend(&h5->hu->serdev->dev);
+- pm_runtime_set_autosuspend_delay(&h5->hu->serdev->dev,
+- SUSPEND_TIMEOUT_MS);
+- pm_runtime_enable(&h5->hu->serdev->dev);
++ if (!test_bit(H5_WAKEUP_DISABLE, &h5->flags)) {
++ pm_runtime_set_active(&h5->hu->serdev->dev);
++ pm_runtime_use_autosuspend(&h5->hu->serdev->dev);
++ pm_runtime_set_autosuspend_delay(&h5->hu->serdev->dev,
++ SUSPEND_TIMEOUT_MS);
++ pm_runtime_enable(&h5->hu->serdev->dev);
++ }
+
+ /* The controller needs up to 500ms to wakeup */
+ gpiod_set_value_cansleep(h5->enable_gpio, 1);
+@@ -976,7 +981,8 @@ static void h5_btrtl_open(struct h5 *h5)
+
+ static void h5_btrtl_close(struct h5 *h5)
+ {
+- pm_runtime_disable(&h5->hu->serdev->dev);
++ if (!test_bit(H5_WAKEUP_DISABLE, &h5->flags))
++ pm_runtime_disable(&h5->hu->serdev->dev);
+
+ gpiod_set_value_cansleep(h5->device_wake_gpio, 0);
+ gpiod_set_value_cansleep(h5->enable_gpio, 0);
+diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
+index 7249b91d9b91a..78afb9a348e70 100644
+--- a/drivers/bluetooth/hci_intel.c
++++ b/drivers/bluetooth/hci_intel.c
+@@ -1217,7 +1217,11 @@ static struct platform_driver intel_driver = {
+
+ int __init intel_init(void)
+ {
+- platform_driver_register(&intel_driver);
++ int err;
++
++ err = platform_driver_register(&intel_driver);
++ if (err)
++ return err;
+
+ return hci_uart_register_proto(&intel_proto);
+ }
+diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
+index 5ed2cfa7da1d9..2d960a5e36793 100644
+--- a/drivers/bluetooth/hci_ldisc.c
++++ b/drivers/bluetooth/hci_ldisc.c
+@@ -490,6 +490,11 @@ static int hci_uart_tty_open(struct tty_struct *tty)
+ BT_ERR("Can't allocate control structure");
+ return -ENFILE;
+ }
++ if (percpu_init_rwsem(&hu->proto_lock)) {
++ BT_ERR("Can't allocate semaphore structure");
++ kfree(hu);
++ return -ENOMEM;
++ }
+
+ tty->disc_data = hu;
+ hu->tty = tty;
+@@ -502,8 +507,6 @@ static int hci_uart_tty_open(struct tty_struct *tty)
+ INIT_WORK(&hu->init_ready, hci_uart_init_work);
+ INIT_WORK(&hu->write_work, hci_uart_write_work);
+
+- percpu_init_rwsem(&hu->proto_lock);
+-
+ /* Flush any pending characters in the driver */
+ tty_driver_flush_buffer(tty);
+
+diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
+index eb1e736efeebb..e4e5b26e2c33b 100644
+--- a/drivers/bluetooth/hci_ll.c
++++ b/drivers/bluetooth/hci_ll.c
+@@ -345,7 +345,7 @@ static int ll_enqueue(struct hci_uart *hu, struct sk_buff *skb)
+ default:
+ BT_ERR("illegal hcill state: %ld (losing packet)",
+ ll->hcill_state);
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ break;
+ }
+
+diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
+index 05f7f6de6863d..97da0b2bfd17e 100644
+--- a/drivers/bluetooth/hci_nokia.c
++++ b/drivers/bluetooth/hci_nokia.c
+@@ -734,7 +734,11 @@ static int nokia_bluetooth_serdev_probe(struct serdev_device *serdev)
+ return err;
+ }
+
+- clk_prepare_enable(sysclk);
++ err = clk_prepare_enable(sysclk);
++ if (err) {
++ dev_err(dev, "could not enable sysclk: %d", err);
++ return err;
++ }
+ btdev->sysclk_speed = clk_get_rate(sysclk);
+ clk_disable_unprepare(sysclk);
+
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
+index 53deea2eb7b4d..56b4b7248483a 100644
+--- a/drivers/bluetooth/hci_qca.c
++++ b/drivers/bluetooth/hci_qca.c
+@@ -78,7 +78,8 @@ enum qca_flags {
+ QCA_HW_ERROR_EVENT,
+ QCA_SSR_TRIGGERED,
+ QCA_BT_OFF,
+- QCA_ROM_FW
++ QCA_ROM_FW,
++ QCA_DEBUGFS_CREATED,
+ };
+
+ enum qca_capabilities {
+@@ -635,6 +636,9 @@ static void qca_debugfs_init(struct hci_dev *hdev)
+ if (!hdev->debugfs)
+ return;
+
++ if (test_and_set_bit(QCA_DEBUGFS_CREATED, &qca->flags))
++ return;
++
+ ibs_dir = debugfs_create_dir("ibs", hdev->debugfs);
+
+ /* read only */
+@@ -696,9 +700,9 @@ static int qca_close(struct hci_uart *hu)
+ skb_queue_purge(&qca->tx_wait_q);
+ skb_queue_purge(&qca->txq);
+ skb_queue_purge(&qca->rx_memdump_q);
+- del_timer(&qca->tx_idle_timer);
+- del_timer(&qca->wake_retrans_timer);
+ destroy_workqueue(qca->workqueue);
++ del_timer_sync(&qca->tx_idle_timer);
++ del_timer_sync(&qca->wake_retrans_timer);
+ qca->hu = NULL;
+
+ kfree_skb(qca->rx_skb);
+@@ -912,7 +916,7 @@ static int qca_enqueue(struct hci_uart *hu, struct sk_buff *skb)
+ default:
+ BT_ERR("Illegal tx state: %d (losing packet)",
+ qca->tx_ibs_state);
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ break;
+ }
+
+@@ -1582,10 +1586,11 @@ static bool qca_prevent_wake(struct hci_dev *hdev)
+ struct hci_uart *hu = hci_get_drvdata(hdev);
+ bool wakeup;
+
+- /* UART driver handles the interrupt from BT SoC.So we need to use
+- * device handle of UART driver to get the status of device may wakeup.
++ /* BT SoC attached through the serial bus is handled by the serdev driver.
++ * So we need to use the device handle of the serdev driver to get the
++ * status of device may wakeup.
+ */
+- wakeup = device_may_wakeup(hu->serdev->ctrl->dev.parent);
++ wakeup = device_may_wakeup(&hu->serdev->ctrl->dev);
+ bt_dev_dbg(hu->hdev, "wakeup status : %d", wakeup);
+
+ return !wakeup;
+@@ -1927,6 +1932,9 @@ static int qca_power_off(struct hci_dev *hdev)
+ hu->hdev->hw_error = NULL;
+ hu->hdev->cmd_timeout = NULL;
+
++ del_timer_sync(&qca->wake_retrans_timer);
++ del_timer_sync(&qca->tx_idle_timer);
++
+ /* Stop sending shutdown command if soc crashes. */
+ if (soc_type != QCA_ROME
+ && qca->memdump_state == QCA_MEMDUMP_IDLE) {
+@@ -2055,14 +2063,14 @@ static int qca_serdev_probe(struct serdev_device *serdev)
+
+ qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
+ GPIOD_OUT_LOW);
+- if (!qcadev->bt_en && data->soc_type == QCA_WCN6750) {
++ if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) {
+ dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
+ power_ctrl_enabled = false;
+ }
+
+ qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
+ GPIOD_IN);
+- if (!qcadev->sw_ctrl && data->soc_type == QCA_WCN6750)
++ if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750)
+ dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n");
+
+ qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
+@@ -2084,7 +2092,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
+
+ qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
+ GPIOD_OUT_LOW);
+- if (!qcadev->bt_en) {
++ if (IS_ERR_OR_NULL(qcadev->bt_en)) {
+ dev_warn(&serdev->dev, "failed to acquire enable gpio\n");
+ power_ctrl_enabled = false;
+ }
+@@ -2153,10 +2161,17 @@ static void qca_serdev_shutdown(struct device *dev)
+ int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS);
+ struct serdev_device *serdev = to_serdev_device(dev);
+ struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
++ struct hci_uart *hu = &qcadev->serdev_hu;
++ struct hci_dev *hdev = hu->hdev;
++ struct qca_data *qca = hu->priv;
+ const u8 ibs_wake_cmd[] = { 0xFD };
+ const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 };
+
+ if (qcadev->btsoc_type == QCA_QCA6390) {
++ if (test_bit(QCA_BT_OFF, &qca->flags) ||
++ !test_bit(HCI_RUNNING, &hdev->flags))
++ return;
++
+ serdev_device_write_flush(serdev);
+ ret = serdev_device_write_buf(serdev, ibs_wake_cmd,
+ sizeof(ibs_wake_cmd));
+diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
+index 3b00d82d36cf7..649d112eea787 100644
+--- a/drivers/bluetooth/hci_serdev.c
++++ b/drivers/bluetooth/hci_serdev.c
+@@ -301,9 +301,12 @@ int hci_uart_register_device(struct hci_uart *hu,
+
+ serdev_device_set_client_ops(hu->serdev, &hci_serdev_client_ops);
+
++ if (percpu_init_rwsem(&hu->proto_lock))
++ return -ENOMEM;
++
+ err = serdev_device_open(hu->serdev);
+ if (err)
+- return err;
++ goto err_rwsem;
+
+ err = p->open(hu);
+ if (err)
+@@ -327,7 +330,6 @@ int hci_uart_register_device(struct hci_uart *hu,
+
+ INIT_WORK(&hu->init_ready, hci_uart_init_work);
+ INIT_WORK(&hu->write_work, hci_uart_write_work);
+- percpu_init_rwsem(&hu->proto_lock);
+
+ /* Only when vendor specific setup callback is provided, consider
+ * the manufacturer information valid. This avoids filling in the
+@@ -377,6 +379,8 @@ err_alloc:
+ p->close(hu);
+ err_open:
+ serdev_device_close(hu->serdev);
++err_rwsem:
++ percpu_free_rwsem(&hu->proto_lock);
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(hci_uart_register_device);
+@@ -398,5 +402,6 @@ void hci_uart_unregister_device(struct hci_uart *hu)
+ clear_bit(HCI_UART_PROTO_READY, &hu->flags);
+ serdev_device_close(hu->serdev);
+ }
++ percpu_free_rwsem(&hu->proto_lock);
+ }
+ EXPORT_SYMBOL_GPL(hci_uart_unregister_device);
+diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
+index 8ab26dec5f6e8..8469f9876dd26 100644
+--- a/drivers/bluetooth/hci_vhci.c
++++ b/drivers/bluetooth/hci_vhci.c
+@@ -121,6 +121,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
+ if (opcode & 0x80)
+ set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
+
++ set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
++
+ if (hci_register_dev(hdev) < 0) {
+ BT_ERR("Can't register HCI device");
+ hci_free_dev(hdev);
+diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c
+index 57908ce4fae85..612f10456849f 100644
+--- a/drivers/bluetooth/virtio_bt.c
++++ b/drivers/bluetooth/virtio_bt.c
+@@ -202,6 +202,9 @@ static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb)
+ hci_skb_pkt_type(skb) = pkt_type;
+ hci_recv_frame(vbt->hdev, skb);
+ break;
++ default:
++ kfree_skb(skb);
++ break;
+ }
+ }
+
+@@ -216,7 +219,7 @@ static void virtbt_rx_work(struct work_struct *work)
+ if (!skb)
+ return;
+
+- skb->len = len;
++ skb_put(skb, len);
+ virtbt_rx_handle(vbt, skb);
+
+ if (virtbt_add_inbuf(vbt) < 0)
+diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
+index 52c2f35a26a99..16da51130d1a1 100644
+--- a/drivers/bus/Makefile
++++ b/drivers/bus/Makefile
+@@ -39,4 +39,4 @@ obj-$(CONFIG_VEXPRESS_CONFIG) += vexpress-config.o
+ obj-$(CONFIG_DA8XX_MSTPRI) += da8xx-mstpri.o
+
+ # MHI
+-obj-$(CONFIG_MHI_BUS) += mhi/
++obj-y += mhi/
+diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
+index 8fd4a356a86ec..74593a1722fe0 100644
+--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
++++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
+@@ -1236,14 +1236,14 @@ error_cleanup_mc_io:
+ static int fsl_mc_bus_remove(struct platform_device *pdev)
+ {
+ struct fsl_mc *mc = platform_get_drvdata(pdev);
++ struct fsl_mc_io *mc_io;
+
+ if (!fsl_mc_is_root_dprc(&mc->root_mc_bus_dev->dev))
+ return -EINVAL;
+
++ mc_io = mc->root_mc_bus_dev->mc_io;
+ fsl_mc_device_remove(mc->root_mc_bus_dev);
+-
+- fsl_destroy_mc_io(mc->root_mc_bus_dev->mc_io);
+- mc->root_mc_bus_dev->mc_io = NULL;
++ fsl_destroy_mc_io(mc_io);
+
+ bus_unregister_notifier(&fsl_mc_bus_type, &fsl_mc_nb);
+
+diff --git a/drivers/bus/hisi_lpc.c b/drivers/bus/hisi_lpc.c
+index 378f5d62a9912..e7eaa8784fee0 100644
+--- a/drivers/bus/hisi_lpc.c
++++ b/drivers/bus/hisi_lpc.c
+@@ -503,13 +503,13 @@ static int hisi_lpc_acpi_probe(struct device *hostdev)
+ {
+ struct acpi_device *adev = ACPI_COMPANION(hostdev);
+ struct acpi_device *child;
++ struct platform_device *pdev;
+ int ret;
+
+ /* Only consider the children of the host */
+ list_for_each_entry(child, &adev->children, node) {
+ const char *hid = acpi_device_hid(child);
+ const struct hisi_lpc_acpi_cell *cell;
+- struct platform_device *pdev;
+ const struct resource *res;
+ bool found = false;
+ int num_res;
+@@ -571,22 +571,24 @@ static int hisi_lpc_acpi_probe(struct device *hostdev)
+
+ ret = platform_device_add_resources(pdev, res, num_res);
+ if (ret)
+- goto fail;
++ goto fail_put_device;
+
+ ret = platform_device_add_data(pdev, cell->pdata,
+ cell->pdata_size);
+ if (ret)
+- goto fail;
++ goto fail_put_device;
+
+ ret = platform_device_add(pdev);
+ if (ret)
+- goto fail;
++ goto fail_put_device;
+
+ acpi_device_set_enumerated(child);
+ }
+
+ return 0;
+
++fail_put_device:
++ platform_device_put(pdev);
+ fail:
+ hisi_lpc_acpi_remove(hostdev);
+ return ret;
+diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
+index 28bb65a5613fd..201767823edb5 100644
+--- a/drivers/bus/imx-weim.c
++++ b/drivers/bus/imx-weim.c
+@@ -192,8 +192,8 @@ static int weim_parse_dt(struct platform_device *pdev, void __iomem *base)
+ const struct of_device_id *of_id = of_match_device(weim_id_table,
+ &pdev->dev);
+ const struct imx_weim_devtype *devtype = of_id->data;
++ int ret = 0, have_child = 0;
+ struct device_node *child;
+- int ret, have_child = 0;
+ struct cs_timing_state ts = {};
+ u32 reg;
+
+diff --git a/drivers/bus/intel-ixp4xx-eb.c b/drivers/bus/intel-ixp4xx-eb.c
+index a4388440aca7a..972603ed06a6c 100644
+--- a/drivers/bus/intel-ixp4xx-eb.c
++++ b/drivers/bus/intel-ixp4xx-eb.c
+@@ -33,7 +33,7 @@
+ #define IXP4XX_EXP_TIMING_STRIDE 0x04
+ #define IXP4XX_EXP_CS_EN BIT(31)
+ #define IXP456_EXP_PAR_EN BIT(30) /* Only on IXP45x and IXP46x */
+-#define IXP4XX_EXP_T1_MASK GENMASK(28, 27)
++#define IXP4XX_EXP_T1_MASK GENMASK(29, 28)
+ #define IXP4XX_EXP_T1_SHIFT 28
+ #define IXP4XX_EXP_T2_MASK GENMASK(27, 26)
+ #define IXP4XX_EXP_T2_SHIFT 26
+@@ -49,7 +49,7 @@
+ #define IXP4XX_EXP_SIZE_SHIFT 10
+ #define IXP4XX_EXP_CNFG_0 BIT(9) /* Always zero */
+ #define IXP43X_EXP_SYNC_INTEL BIT(8) /* Only on IXP43x */
+-#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x */
++#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x, dangerous to touch on IXP42x */
+ #define IXP4XX_EXP_BYTE_RD16 BIT(6)
+ #define IXP4XX_EXP_HRDY_POL BIT(5) /* Only on IXP42x */
+ #define IXP4XX_EXP_MUX_EN BIT(4)
+@@ -57,8 +57,6 @@
+ #define IXP4XX_EXP_WORD BIT(2) /* Always zero */
+ #define IXP4XX_EXP_WR_EN BIT(1)
+ #define IXP4XX_EXP_BYTE_EN BIT(0)
+-#define IXP42X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(8)|BIT(7)|IXP4XX_EXP_WORD)
+-#define IXP43X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(5)|IXP4XX_EXP_WORD)
+
+ #define IXP4XX_EXP_CNFG0 0x20
+ #define IXP4XX_EXP_CNFG0_MEM_MAP BIT(31)
+@@ -252,10 +250,9 @@ static void ixp4xx_exp_setup_chipselect(struct ixp4xx_eb *eb,
+ cs_cfg |= val << IXP4XX_EXP_CYC_TYPE_SHIFT;
+ }
+
+- if (eb->is_42x)
+- cs_cfg &= ~IXP42X_RESERVED;
+ if (eb->is_43x) {
+- cs_cfg &= ~IXP43X_RESERVED;
++ /* Should always be zero */
++ cs_cfg &= ~IXP4XX_EXP_WORD;
+ /*
+ * This bit for Intel strata flash is currently unused, but let's
+ * report it if we find one.
+diff --git a/drivers/bus/mhi/Kconfig b/drivers/bus/mhi/Kconfig
+index da5cd0c9fc620..4748df7f9cd58 100644
+--- a/drivers/bus/mhi/Kconfig
++++ b/drivers/bus/mhi/Kconfig
+@@ -2,30 +2,7 @@
+ #
+ # MHI bus
+ #
+-# Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++# Copyright (c) 2021, Linaro Ltd.
+ #
+
+-config MHI_BUS
+- tristate "Modem Host Interface (MHI) bus"
+- help
+- Bus driver for MHI protocol. Modem Host Interface (MHI) is a
+- communication protocol used by the host processors to control
+- and communicate with modem devices over a high speed peripheral
+- bus or shared memory.
+-
+-config MHI_BUS_DEBUG
+- bool "Debugfs support for the MHI bus"
+- depends on MHI_BUS && DEBUG_FS
+- help
+- Enable debugfs support for use with the MHI transport. Allows
+- reading and/or modifying some values within the MHI controller
+- for debug and test purposes.
+-
+-config MHI_BUS_PCI_GENERIC
+- tristate "MHI PCI controller driver"
+- depends on MHI_BUS
+- depends on PCI
+- help
+- This driver provides MHI PCI controller driver for devices such as
+- Qualcomm SDX55 based PCIe modems.
+-
++source "drivers/bus/mhi/host/Kconfig"
+diff --git a/drivers/bus/mhi/Makefile b/drivers/bus/mhi/Makefile
+index 0a2d778d6fb42..5f5708a249f54 100644
+--- a/drivers/bus/mhi/Makefile
++++ b/drivers/bus/mhi/Makefile
+@@ -1,6 +1,2 @@
+-# core layer
+-obj-y += core/
+-
+-obj-$(CONFIG_MHI_BUS_PCI_GENERIC) += mhi_pci_generic.o
+-mhi_pci_generic-y += pci_generic.o
+-
++# Host MHI stack
++obj-y += host/
+diff --git a/drivers/bus/mhi/core/Makefile b/drivers/bus/mhi/core/Makefile
+deleted file mode 100644
+index c3feb4130aa37..0000000000000
+--- a/drivers/bus/mhi/core/Makefile
++++ /dev/null
+@@ -1,4 +0,0 @@
+-obj-$(CONFIG_MHI_BUS) += mhi.o
+-
+-mhi-y := init.o main.o pm.o boot.o
+-mhi-$(CONFIG_MHI_BUS_DEBUG) += debugfs.o
+diff --git a/drivers/bus/mhi/core/boot.c b/drivers/bus/mhi/core/boot.c
+deleted file mode 100644
+index 0a972620a4030..0000000000000
+--- a/drivers/bus/mhi/core/boot.c
++++ /dev/null
+@@ -1,533 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+- *
+- */
+-
+-#include <linux/delay.h>
+-#include <linux/device.h>
+-#include <linux/dma-direction.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/firmware.h>
+-#include <linux/interrupt.h>
+-#include <linux/list.h>
+-#include <linux/mhi.h>
+-#include <linux/module.h>
+-#include <linux/random.h>
+-#include <linux/slab.h>
+-#include <linux/wait.h>
+-#include "internal.h"
+-
+-/* Setup RDDM vector table for RDDM transfer and program RXVEC */
+-void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
+- struct image_info *img_info)
+-{
+- struct mhi_buf *mhi_buf = img_info->mhi_buf;
+- struct bhi_vec_entry *bhi_vec = img_info->bhi_vec;
+- void __iomem *base = mhi_cntrl->bhie;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- u32 sequence_id;
+- unsigned int i;
+-
+- for (i = 0; i < img_info->entries - 1; i++, mhi_buf++, bhi_vec++) {
+- bhi_vec->dma_addr = mhi_buf->dma_addr;
+- bhi_vec->size = mhi_buf->len;
+- }
+-
+- dev_dbg(dev, "BHIe programming for RDDM\n");
+-
+- mhi_write_reg(mhi_cntrl, base, BHIE_RXVECADDR_HIGH_OFFS,
+- upper_32_bits(mhi_buf->dma_addr));
+-
+- mhi_write_reg(mhi_cntrl, base, BHIE_RXVECADDR_LOW_OFFS,
+- lower_32_bits(mhi_buf->dma_addr));
+-
+- mhi_write_reg(mhi_cntrl, base, BHIE_RXVECSIZE_OFFS, mhi_buf->len);
+- sequence_id = MHI_RANDOM_U32_NONZERO(BHIE_RXVECSTATUS_SEQNUM_BMSK);
+-
+- mhi_write_reg_field(mhi_cntrl, base, BHIE_RXVECDB_OFFS,
+- BHIE_RXVECDB_SEQNUM_BMSK, BHIE_RXVECDB_SEQNUM_SHFT,
+- sequence_id);
+-
+- dev_dbg(dev, "Address: %p and len: 0x%zx sequence: %u\n",
+- &mhi_buf->dma_addr, mhi_buf->len, sequence_id);
+-}
+-
+-/* Collect RDDM buffer during kernel panic */
+-static int __mhi_download_rddm_in_panic(struct mhi_controller *mhi_cntrl)
+-{
+- int ret;
+- u32 rx_status;
+- enum mhi_ee_type ee;
+- const u32 delayus = 2000;
+- u32 retry = (mhi_cntrl->timeout_ms * 1000) / delayus;
+- const u32 rddm_timeout_us = 200000;
+- int rddm_retry = rddm_timeout_us / delayus;
+- void __iomem *base = mhi_cntrl->bhie;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+-
+- dev_dbg(dev, "Entered with pm_state:%s dev_state:%s ee:%s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state),
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state),
+- TO_MHI_EXEC_STR(mhi_cntrl->ee));
+-
+- /*
+- * This should only be executing during a kernel panic, we expect all
+- * other cores to shutdown while we're collecting RDDM buffer. After
+- * returning from this function, we expect the device to reset.
+- *
+- * Normaly, we read/write pm_state only after grabbing the
+- * pm_lock, since we're in a panic, skipping it. Also there is no
+- * gurantee that this state change would take effect since
+- * we're setting it w/o grabbing pm_lock
+- */
+- mhi_cntrl->pm_state = MHI_PM_LD_ERR_FATAL_DETECT;
+- /* update should take the effect immediately */
+- smp_wmb();
+-
+- /*
+- * Make sure device is not already in RDDM. In case the device asserts
+- * and a kernel panic follows, device will already be in RDDM.
+- * Do not trigger SYS ERR again and proceed with waiting for
+- * image download completion.
+- */
+- ee = mhi_get_exec_env(mhi_cntrl);
+- if (ee == MHI_EE_MAX)
+- goto error_exit_rddm;
+-
+- if (ee != MHI_EE_RDDM) {
+- dev_dbg(dev, "Trigger device into RDDM mode using SYS ERR\n");
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_SYS_ERR);
+-
+- dev_dbg(dev, "Waiting for device to enter RDDM\n");
+- while (rddm_retry--) {
+- ee = mhi_get_exec_env(mhi_cntrl);
+- if (ee == MHI_EE_RDDM)
+- break;
+-
+- udelay(delayus);
+- }
+-
+- if (rddm_retry <= 0) {
+- /* Hardware reset so force device to enter RDDM */
+- dev_dbg(dev,
+- "Did not enter RDDM, do a host req reset\n");
+- mhi_write_reg(mhi_cntrl, mhi_cntrl->regs,
+- MHI_SOC_RESET_REQ_OFFSET,
+- MHI_SOC_RESET_REQ);
+- udelay(delayus);
+- }
+-
+- ee = mhi_get_exec_env(mhi_cntrl);
+- }
+-
+- dev_dbg(dev,
+- "Waiting for RDDM image download via BHIe, current EE:%s\n",
+- TO_MHI_EXEC_STR(ee));
+-
+- while (retry--) {
+- ret = mhi_read_reg_field(mhi_cntrl, base, BHIE_RXVECSTATUS_OFFS,
+- BHIE_RXVECSTATUS_STATUS_BMSK,
+- BHIE_RXVECSTATUS_STATUS_SHFT,
+- &rx_status);
+- if (ret)
+- return -EIO;
+-
+- if (rx_status == BHIE_RXVECSTATUS_STATUS_XFER_COMPL)
+- return 0;
+-
+- udelay(delayus);
+- }
+-
+- ee = mhi_get_exec_env(mhi_cntrl);
+- ret = mhi_read_reg(mhi_cntrl, base, BHIE_RXVECSTATUS_OFFS, &rx_status);
+-
+- dev_err(dev, "RXVEC_STATUS: 0x%x\n", rx_status);
+-
+-error_exit_rddm:
+- dev_err(dev, "RDDM transfer failed. Current EE: %s\n",
+- TO_MHI_EXEC_STR(ee));
+-
+- return -EIO;
+-}
+-
+-/* Download RDDM image from device */
+-int mhi_download_rddm_image(struct mhi_controller *mhi_cntrl, bool in_panic)
+-{
+- void __iomem *base = mhi_cntrl->bhie;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- u32 rx_status;
+-
+- if (in_panic)
+- return __mhi_download_rddm_in_panic(mhi_cntrl);
+-
+- dev_dbg(dev, "Waiting for RDDM image download via BHIe\n");
+-
+- /* Wait for the image download to complete */
+- wait_event_timeout(mhi_cntrl->state_event,
+- mhi_read_reg_field(mhi_cntrl, base,
+- BHIE_RXVECSTATUS_OFFS,
+- BHIE_RXVECSTATUS_STATUS_BMSK,
+- BHIE_RXVECSTATUS_STATUS_SHFT,
+- &rx_status) || rx_status,
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+-
+- return (rx_status == BHIE_RXVECSTATUS_STATUS_XFER_COMPL) ? 0 : -EIO;
+-}
+-EXPORT_SYMBOL_GPL(mhi_download_rddm_image);
+-
+-static int mhi_fw_load_bhie(struct mhi_controller *mhi_cntrl,
+- const struct mhi_buf *mhi_buf)
+-{
+- void __iomem *base = mhi_cntrl->bhie;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- rwlock_t *pm_lock = &mhi_cntrl->pm_lock;
+- u32 tx_status, sequence_id;
+- int ret;
+-
+- read_lock_bh(pm_lock);
+- if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
+- read_unlock_bh(pm_lock);
+- return -EIO;
+- }
+-
+- sequence_id = MHI_RANDOM_U32_NONZERO(BHIE_TXVECSTATUS_SEQNUM_BMSK);
+- dev_dbg(dev, "Starting image download via BHIe. Sequence ID: %u\n",
+- sequence_id);
+- mhi_write_reg(mhi_cntrl, base, BHIE_TXVECADDR_HIGH_OFFS,
+- upper_32_bits(mhi_buf->dma_addr));
+-
+- mhi_write_reg(mhi_cntrl, base, BHIE_TXVECADDR_LOW_OFFS,
+- lower_32_bits(mhi_buf->dma_addr));
+-
+- mhi_write_reg(mhi_cntrl, base, BHIE_TXVECSIZE_OFFS, mhi_buf->len);
+-
+- mhi_write_reg_field(mhi_cntrl, base, BHIE_TXVECDB_OFFS,
+- BHIE_TXVECDB_SEQNUM_BMSK, BHIE_TXVECDB_SEQNUM_SHFT,
+- sequence_id);
+- read_unlock_bh(pm_lock);
+-
+- /* Wait for the image download to complete */
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state) ||
+- mhi_read_reg_field(mhi_cntrl, base,
+- BHIE_TXVECSTATUS_OFFS,
+- BHIE_TXVECSTATUS_STATUS_BMSK,
+- BHIE_TXVECSTATUS_STATUS_SHFT,
+- &tx_status) || tx_status,
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+- if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state) ||
+- tx_status != BHIE_TXVECSTATUS_STATUS_XFER_COMPL)
+- return -EIO;
+-
+- return (!ret) ? -ETIMEDOUT : 0;
+-}
+-
+-static int mhi_fw_load_bhi(struct mhi_controller *mhi_cntrl,
+- dma_addr_t dma_addr,
+- size_t size)
+-{
+- u32 tx_status, val, session_id;
+- int i, ret;
+- void __iomem *base = mhi_cntrl->bhi;
+- rwlock_t *pm_lock = &mhi_cntrl->pm_lock;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- struct {
+- char *name;
+- u32 offset;
+- } error_reg[] = {
+- { "ERROR_CODE", BHI_ERRCODE },
+- { "ERROR_DBG1", BHI_ERRDBG1 },
+- { "ERROR_DBG2", BHI_ERRDBG2 },
+- { "ERROR_DBG3", BHI_ERRDBG3 },
+- { NULL },
+- };
+-
+- read_lock_bh(pm_lock);
+- if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
+- read_unlock_bh(pm_lock);
+- goto invalid_pm_state;
+- }
+-
+- session_id = MHI_RANDOM_U32_NONZERO(BHI_TXDB_SEQNUM_BMSK);
+- dev_dbg(dev, "Starting image download via BHI. Session ID: %u\n",
+- session_id);
+- mhi_write_reg(mhi_cntrl, base, BHI_STATUS, 0);
+- mhi_write_reg(mhi_cntrl, base, BHI_IMGADDR_HIGH,
+- upper_32_bits(dma_addr));
+- mhi_write_reg(mhi_cntrl, base, BHI_IMGADDR_LOW,
+- lower_32_bits(dma_addr));
+- mhi_write_reg(mhi_cntrl, base, BHI_IMGSIZE, size);
+- mhi_write_reg(mhi_cntrl, base, BHI_IMGTXDB, session_id);
+- read_unlock_bh(pm_lock);
+-
+- /* Wait for the image download to complete */
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state) ||
+- mhi_read_reg_field(mhi_cntrl, base, BHI_STATUS,
+- BHI_STATUS_MASK, BHI_STATUS_SHIFT,
+- &tx_status) || tx_status,
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+- if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
+- goto invalid_pm_state;
+-
+- if (tx_status == BHI_STATUS_ERROR) {
+- dev_err(dev, "Image transfer failed\n");
+- read_lock_bh(pm_lock);
+- if (MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
+- for (i = 0; error_reg[i].name; i++) {
+- ret = mhi_read_reg(mhi_cntrl, base,
+- error_reg[i].offset, &val);
+- if (ret)
+- break;
+- dev_err(dev, "Reg: %s value: 0x%x\n",
+- error_reg[i].name, val);
+- }
+- }
+- read_unlock_bh(pm_lock);
+- goto invalid_pm_state;
+- }
+-
+- return (!ret) ? -ETIMEDOUT : 0;
+-
+-invalid_pm_state:
+-
+- return -EIO;
+-}
+-
+-void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
+- struct image_info *image_info)
+-{
+- int i;
+- struct mhi_buf *mhi_buf = image_info->mhi_buf;
+-
+- for (i = 0; i < image_info->entries; i++, mhi_buf++)
+- dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
+- mhi_buf->buf, mhi_buf->dma_addr);
+-
+- kfree(image_info->mhi_buf);
+- kfree(image_info);
+-}
+-
+-int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
+- struct image_info **image_info,
+- size_t alloc_size)
+-{
+- size_t seg_size = mhi_cntrl->seg_len;
+- int segments = DIV_ROUND_UP(alloc_size, seg_size) + 1;
+- int i;
+- struct image_info *img_info;
+- struct mhi_buf *mhi_buf;
+-
+- img_info = kzalloc(sizeof(*img_info), GFP_KERNEL);
+- if (!img_info)
+- return -ENOMEM;
+-
+- /* Allocate memory for entries */
+- img_info->mhi_buf = kcalloc(segments, sizeof(*img_info->mhi_buf),
+- GFP_KERNEL);
+- if (!img_info->mhi_buf)
+- goto error_alloc_mhi_buf;
+-
+- /* Allocate and populate vector table */
+- mhi_buf = img_info->mhi_buf;
+- for (i = 0; i < segments; i++, mhi_buf++) {
+- size_t vec_size = seg_size;
+-
+- /* Vector table is the last entry */
+- if (i == segments - 1)
+- vec_size = sizeof(struct bhi_vec_entry) * i;
+-
+- mhi_buf->len = vec_size;
+- mhi_buf->buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
+- vec_size, &mhi_buf->dma_addr,
+- GFP_KERNEL);
+- if (!mhi_buf->buf)
+- goto error_alloc_segment;
+- }
+-
+- img_info->bhi_vec = img_info->mhi_buf[segments - 1].buf;
+- img_info->entries = segments;
+- *image_info = img_info;
+-
+- return 0;
+-
+-error_alloc_segment:
+- for (--i, --mhi_buf; i >= 0; i--, mhi_buf--)
+- dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
+- mhi_buf->buf, mhi_buf->dma_addr);
+-
+-error_alloc_mhi_buf:
+- kfree(img_info);
+-
+- return -ENOMEM;
+-}
+-
+-static void mhi_firmware_copy(struct mhi_controller *mhi_cntrl,
+- const struct firmware *firmware,
+- struct image_info *img_info)
+-{
+- size_t remainder = firmware->size;
+- size_t to_cpy;
+- const u8 *buf = firmware->data;
+- struct mhi_buf *mhi_buf = img_info->mhi_buf;
+- struct bhi_vec_entry *bhi_vec = img_info->bhi_vec;
+-
+- while (remainder) {
+- to_cpy = min(remainder, mhi_buf->len);
+- memcpy(mhi_buf->buf, buf, to_cpy);
+- bhi_vec->dma_addr = mhi_buf->dma_addr;
+- bhi_vec->size = to_cpy;
+-
+- buf += to_cpy;
+- remainder -= to_cpy;
+- bhi_vec++;
+- mhi_buf++;
+- }
+-}
+-
+-void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl)
+-{
+- const struct firmware *firmware = NULL;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- const char *fw_name;
+- void *buf;
+- dma_addr_t dma_addr;
+- size_t size;
+- int i, ret;
+-
+- if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
+- dev_err(dev, "Device MHI is not in valid state\n");
+- return;
+- }
+-
+- /* save hardware info from BHI */
+- ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_SERIALNU,
+- &mhi_cntrl->serial_number);
+- if (ret)
+- dev_err(dev, "Could not capture serial number via BHI\n");
+-
+- for (i = 0; i < ARRAY_SIZE(mhi_cntrl->oem_pk_hash); i++) {
+- ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_OEMPKHASH(i),
+- &mhi_cntrl->oem_pk_hash[i]);
+- if (ret) {
+- dev_err(dev, "Could not capture OEM PK HASH via BHI\n");
+- break;
+- }
+- }
+-
+- /* wait for ready on pass through or any other execution environment */
+- if (mhi_cntrl->ee != MHI_EE_EDL && mhi_cntrl->ee != MHI_EE_PBL)
+- goto fw_load_ready_state;
+-
+- fw_name = (mhi_cntrl->ee == MHI_EE_EDL) ?
+- mhi_cntrl->edl_image : mhi_cntrl->fw_image;
+-
+- if (!fw_name || (mhi_cntrl->fbc_download && (!mhi_cntrl->sbl_size ||
+- !mhi_cntrl->seg_len))) {
+- dev_err(dev,
+- "No firmware image defined or !sbl_size || !seg_len\n");
+- goto error_fw_load;
+- }
+-
+- ret = request_firmware(&firmware, fw_name, dev);
+- if (ret) {
+- dev_err(dev, "Error loading firmware: %d\n", ret);
+- goto error_fw_load;
+- }
+-
+- size = (mhi_cntrl->fbc_download) ? mhi_cntrl->sbl_size : firmware->size;
+-
+- /* SBL size provided is maximum size, not necessarily the image size */
+- if (size > firmware->size)
+- size = firmware->size;
+-
+- buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, size, &dma_addr,
+- GFP_KERNEL);
+- if (!buf) {
+- release_firmware(firmware);
+- goto error_fw_load;
+- }
+-
+- /* Download image using BHI */
+- memcpy(buf, firmware->data, size);
+- ret = mhi_fw_load_bhi(mhi_cntrl, dma_addr, size);
+- dma_free_coherent(mhi_cntrl->cntrl_dev, size, buf, dma_addr);
+-
+- /* Error or in EDL mode, we're done */
+- if (ret) {
+- dev_err(dev, "MHI did not load image over BHI, ret: %d\n", ret);
+- release_firmware(firmware);
+- goto error_fw_load;
+- }
+-
+- /* Wait for ready since EDL image was loaded */
+- if (fw_name == mhi_cntrl->edl_image) {
+- release_firmware(firmware);
+- goto fw_load_ready_state;
+- }
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- mhi_cntrl->dev_state = MHI_STATE_RESET;
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- /*
+- * If we're doing fbc, populate vector tables while
+- * device transitioning into MHI READY state
+- */
+- if (mhi_cntrl->fbc_download) {
+- ret = mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->fbc_image,
+- firmware->size);
+- if (ret) {
+- release_firmware(firmware);
+- goto error_fw_load;
+- }
+-
+- /* Load the firmware into BHIE vec table */
+- mhi_firmware_copy(mhi_cntrl, firmware, mhi_cntrl->fbc_image);
+- }
+-
+- release_firmware(firmware);
+-
+-fw_load_ready_state:
+- /* Transitioning into MHI RESET->READY state */
+- ret = mhi_ready_state_transition(mhi_cntrl);
+- if (ret) {
+- dev_err(dev, "MHI did not enter READY state\n");
+- goto error_ready_state;
+- }
+-
+- dev_info(dev, "Wait for device to enter SBL or Mission mode\n");
+- return;
+-
+-error_ready_state:
+- if (mhi_cntrl->fbc_download) {
+- mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->fbc_image);
+- mhi_cntrl->fbc_image = NULL;
+- }
+-
+-error_fw_load:
+- mhi_cntrl->pm_state = MHI_PM_FW_DL_ERR;
+- wake_up_all(&mhi_cntrl->state_event);
+-}
+-
+-int mhi_download_amss_image(struct mhi_controller *mhi_cntrl)
+-{
+- struct image_info *image_info = mhi_cntrl->fbc_image;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- int ret;
+-
+- if (!image_info)
+- return -EIO;
+-
+- ret = mhi_fw_load_bhie(mhi_cntrl,
+- /* Vector table is the last entry */
+- &image_info->mhi_buf[image_info->entries - 1]);
+- if (ret) {
+- dev_err(dev, "MHI did not load AMSS, ret:%d\n", ret);
+- mhi_cntrl->pm_state = MHI_PM_FW_DL_ERR;
+- wake_up_all(&mhi_cntrl->state_event);
+- }
+-
+- return ret;
+-}
+diff --git a/drivers/bus/mhi/core/debugfs.c b/drivers/bus/mhi/core/debugfs.c
+deleted file mode 100644
+index 858d7516410bb..0000000000000
+--- a/drivers/bus/mhi/core/debugfs.c
++++ /dev/null
+@@ -1,413 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+- *
+- */
+-
+-#include <linux/debugfs.h>
+-#include <linux/device.h>
+-#include <linux/interrupt.h>
+-#include <linux/list.h>
+-#include <linux/mhi.h>
+-#include <linux/module.h>
+-#include "internal.h"
+-
+-static int mhi_debugfs_states_show(struct seq_file *m, void *d)
+-{
+- struct mhi_controller *mhi_cntrl = m->private;
+-
+- /* states */
+- seq_printf(m, "PM state: %s Device: %s MHI state: %s EE: %s wake: %s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state),
+- mhi_is_active(mhi_cntrl) ? "Active" : "Inactive",
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state),
+- TO_MHI_EXEC_STR(mhi_cntrl->ee),
+- mhi_cntrl->wake_set ? "true" : "false");
+-
+- /* counters */
+- seq_printf(m, "M0: %u M2: %u M3: %u", mhi_cntrl->M0, mhi_cntrl->M2,
+- mhi_cntrl->M3);
+-
+- seq_printf(m, " device wake: %u pending packets: %u\n",
+- atomic_read(&mhi_cntrl->dev_wake),
+- atomic_read(&mhi_cntrl->pending_pkts));
+-
+- return 0;
+-}
+-
+-static int mhi_debugfs_events_show(struct seq_file *m, void *d)
+-{
+- struct mhi_controller *mhi_cntrl = m->private;
+- struct mhi_event *mhi_event;
+- struct mhi_event_ctxt *er_ctxt;
+- int i;
+-
+- if (!mhi_is_active(mhi_cntrl)) {
+- seq_puts(m, "Device not ready\n");
+- return -ENODEV;
+- }
+-
+- er_ctxt = mhi_cntrl->mhi_ctxt->er_ctxt;
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings;
+- i++, er_ctxt++, mhi_event++) {
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- if (mhi_event->offload_ev) {
+- seq_printf(m, "Index: %d is an offload event ring\n",
+- i);
+- continue;
+- }
+-
+- seq_printf(m, "Index: %d intmod count: %lu time: %lu",
+- i, (er_ctxt->intmod & EV_CTX_INTMODC_MASK) >>
+- EV_CTX_INTMODC_SHIFT,
+- (er_ctxt->intmod & EV_CTX_INTMODT_MASK) >>
+- EV_CTX_INTMODT_SHIFT);
+-
+- seq_printf(m, " base: 0x%0llx len: 0x%llx", er_ctxt->rbase,
+- er_ctxt->rlen);
+-
+- seq_printf(m, " rp: 0x%llx wp: 0x%llx", er_ctxt->rp,
+- er_ctxt->wp);
+-
+- seq_printf(m, " local rp: 0x%pK db: 0x%pad\n", ring->rp,
+- &mhi_event->db_cfg.db_val);
+- }
+-
+- return 0;
+-}
+-
+-static int mhi_debugfs_channels_show(struct seq_file *m, void *d)
+-{
+- struct mhi_controller *mhi_cntrl = m->private;
+- struct mhi_chan *mhi_chan;
+- struct mhi_chan_ctxt *chan_ctxt;
+- int i;
+-
+- if (!mhi_is_active(mhi_cntrl)) {
+- seq_puts(m, "Device not ready\n");
+- return -ENODEV;
+- }
+-
+- mhi_chan = mhi_cntrl->mhi_chan;
+- chan_ctxt = mhi_cntrl->mhi_ctxt->chan_ctxt;
+- for (i = 0; i < mhi_cntrl->max_chan; i++, chan_ctxt++, mhi_chan++) {
+- struct mhi_ring *ring = &mhi_chan->tre_ring;
+-
+- if (mhi_chan->offload_ch) {
+- seq_printf(m, "%s(%u) is an offload channel\n",
+- mhi_chan->name, mhi_chan->chan);
+- continue;
+- }
+-
+- if (!mhi_chan->mhi_dev)
+- continue;
+-
+- seq_printf(m,
+- "%s(%u) state: 0x%lx brstmode: 0x%lx pollcfg: 0x%lx",
+- mhi_chan->name, mhi_chan->chan, (chan_ctxt->chcfg &
+- CHAN_CTX_CHSTATE_MASK) >> CHAN_CTX_CHSTATE_SHIFT,
+- (chan_ctxt->chcfg & CHAN_CTX_BRSTMODE_MASK) >>
+- CHAN_CTX_BRSTMODE_SHIFT, (chan_ctxt->chcfg &
+- CHAN_CTX_POLLCFG_MASK) >> CHAN_CTX_POLLCFG_SHIFT);
+-
+- seq_printf(m, " type: 0x%x event ring: %u", chan_ctxt->chtype,
+- chan_ctxt->erindex);
+-
+- seq_printf(m, " base: 0x%llx len: 0x%llx rp: 0x%llx wp: 0x%llx",
+- chan_ctxt->rbase, chan_ctxt->rlen, chan_ctxt->rp,
+- chan_ctxt->wp);
+-
+- seq_printf(m, " local rp: 0x%pK local wp: 0x%pK db: 0x%pad\n",
+- ring->rp, ring->wp,
+- &mhi_chan->db_cfg.db_val);
+- }
+-
+- return 0;
+-}
+-
+-static int mhi_device_info_show(struct device *dev, void *data)
+-{
+- struct mhi_device *mhi_dev;
+-
+- if (dev->bus != &mhi_bus_type)
+- return 0;
+-
+- mhi_dev = to_mhi_device(dev);
+-
+- seq_printf((struct seq_file *)data, "%s: type: %s dev_wake: %u",
+- mhi_dev->name, mhi_dev->dev_type ? "Controller" : "Transfer",
+- mhi_dev->dev_wake);
+-
+- /* for transfer device types only */
+- if (mhi_dev->dev_type == MHI_DEVICE_XFER)
+- seq_printf((struct seq_file *)data, " channels: %u(UL)/%u(DL)",
+- mhi_dev->ul_chan_id, mhi_dev->dl_chan_id);
+-
+- seq_puts((struct seq_file *)data, "\n");
+-
+- return 0;
+-}
+-
+-static int mhi_debugfs_devices_show(struct seq_file *m, void *d)
+-{
+- struct mhi_controller *mhi_cntrl = m->private;
+-
+- if (!mhi_is_active(mhi_cntrl)) {
+- seq_puts(m, "Device not ready\n");
+- return -ENODEV;
+- }
+-
+- /* Show controller and client(s) info */
+- mhi_device_info_show(&mhi_cntrl->mhi_dev->dev, m);
+- device_for_each_child(&mhi_cntrl->mhi_dev->dev, m, mhi_device_info_show);
+-
+- return 0;
+-}
+-
+-static int mhi_debugfs_regdump_show(struct seq_file *m, void *d)
+-{
+- struct mhi_controller *mhi_cntrl = m->private;
+- enum mhi_state state;
+- enum mhi_ee_type ee;
+- int i, ret = -EIO;
+- u32 val;
+- void __iomem *mhi_base = mhi_cntrl->regs;
+- void __iomem *bhi_base = mhi_cntrl->bhi;
+- void __iomem *bhie_base = mhi_cntrl->bhie;
+- void __iomem *wake_db = mhi_cntrl->wake_db;
+- struct {
+- const char *name;
+- int offset;
+- void __iomem *base;
+- } regs[] = {
+- { "MHI_REGLEN", MHIREGLEN, mhi_base},
+- { "MHI_VER", MHIVER, mhi_base},
+- { "MHI_CFG", MHICFG, mhi_base},
+- { "MHI_CTRL", MHICTRL, mhi_base},
+- { "MHI_STATUS", MHISTATUS, mhi_base},
+- { "MHI_WAKE_DB", 0, wake_db},
+- { "BHI_EXECENV", BHI_EXECENV, bhi_base},
+- { "BHI_STATUS", BHI_STATUS, bhi_base},
+- { "BHI_ERRCODE", BHI_ERRCODE, bhi_base},
+- { "BHI_ERRDBG1", BHI_ERRDBG1, bhi_base},
+- { "BHI_ERRDBG2", BHI_ERRDBG2, bhi_base},
+- { "BHI_ERRDBG3", BHI_ERRDBG3, bhi_base},
+- { "BHIE_TXVEC_DB", BHIE_TXVECDB_OFFS, bhie_base},
+- { "BHIE_TXVEC_STATUS", BHIE_TXVECSTATUS_OFFS, bhie_base},
+- { "BHIE_RXVEC_DB", BHIE_RXVECDB_OFFS, bhie_base},
+- { "BHIE_RXVEC_STATUS", BHIE_RXVECSTATUS_OFFS, bhie_base},
+- { NULL },
+- };
+-
+- if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state))
+- return ret;
+-
+- seq_printf(m, "Host PM state: %s Device state: %s EE: %s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state),
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state),
+- TO_MHI_EXEC_STR(mhi_cntrl->ee));
+-
+- state = mhi_get_mhi_state(mhi_cntrl);
+- ee = mhi_get_exec_env(mhi_cntrl);
+- seq_printf(m, "Device EE: %s state: %s\n", TO_MHI_EXEC_STR(ee),
+- TO_MHI_STATE_STR(state));
+-
+- for (i = 0; regs[i].name; i++) {
+- if (!regs[i].base)
+- continue;
+- ret = mhi_read_reg(mhi_cntrl, regs[i].base, regs[i].offset,
+- &val);
+- if (ret)
+- continue;
+-
+- seq_printf(m, "%s: 0x%x\n", regs[i].name, val);
+- }
+-
+- return 0;
+-}
+-
+-static int mhi_debugfs_device_wake_show(struct seq_file *m, void *d)
+-{
+- struct mhi_controller *mhi_cntrl = m->private;
+- struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
+-
+- if (!mhi_is_active(mhi_cntrl)) {
+- seq_puts(m, "Device not ready\n");
+- return -ENODEV;
+- }
+-
+- seq_printf(m,
+- "Wake count: %d\n%s\n", mhi_dev->dev_wake,
+- "Usage: echo get/put > device_wake to vote/unvote for M0");
+-
+- return 0;
+-}
+-
+-static ssize_t mhi_debugfs_device_wake_write(struct file *file,
+- const char __user *ubuf,
+- size_t count, loff_t *ppos)
+-{
+- struct seq_file *m = file->private_data;
+- struct mhi_controller *mhi_cntrl = m->private;
+- struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
+- char buf[16];
+- int ret = -EINVAL;
+-
+- if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
+- return -EFAULT;
+-
+- if (!strncmp(buf, "get", 3)) {
+- ret = mhi_device_get_sync(mhi_dev);
+- } else if (!strncmp(buf, "put", 3)) {
+- mhi_device_put(mhi_dev);
+- ret = 0;
+- }
+-
+- return ret ? ret : count;
+-}
+-
+-static int mhi_debugfs_timeout_ms_show(struct seq_file *m, void *d)
+-{
+- struct mhi_controller *mhi_cntrl = m->private;
+-
+- seq_printf(m, "%u ms\n", mhi_cntrl->timeout_ms);
+-
+- return 0;
+-}
+-
+-static ssize_t mhi_debugfs_timeout_ms_write(struct file *file,
+- const char __user *ubuf,
+- size_t count, loff_t *ppos)
+-{
+- struct seq_file *m = file->private_data;
+- struct mhi_controller *mhi_cntrl = m->private;
+- u32 timeout_ms;
+-
+- if (kstrtou32_from_user(ubuf, count, 0, &timeout_ms))
+- return -EINVAL;
+-
+- mhi_cntrl->timeout_ms = timeout_ms;
+-
+- return count;
+-}
+-
+-static int mhi_debugfs_states_open(struct inode *inode, struct file *fp)
+-{
+- return single_open(fp, mhi_debugfs_states_show, inode->i_private);
+-}
+-
+-static int mhi_debugfs_events_open(struct inode *inode, struct file *fp)
+-{
+- return single_open(fp, mhi_debugfs_events_show, inode->i_private);
+-}
+-
+-static int mhi_debugfs_channels_open(struct inode *inode, struct file *fp)
+-{
+- return single_open(fp, mhi_debugfs_channels_show, inode->i_private);
+-}
+-
+-static int mhi_debugfs_devices_open(struct inode *inode, struct file *fp)
+-{
+- return single_open(fp, mhi_debugfs_devices_show, inode->i_private);
+-}
+-
+-static int mhi_debugfs_regdump_open(struct inode *inode, struct file *fp)
+-{
+- return single_open(fp, mhi_debugfs_regdump_show, inode->i_private);
+-}
+-
+-static int mhi_debugfs_device_wake_open(struct inode *inode, struct file *fp)
+-{
+- return single_open(fp, mhi_debugfs_device_wake_show, inode->i_private);
+-}
+-
+-static int mhi_debugfs_timeout_ms_open(struct inode *inode, struct file *fp)
+-{
+- return single_open(fp, mhi_debugfs_timeout_ms_show, inode->i_private);
+-}
+-
+-static const struct file_operations debugfs_states_fops = {
+- .open = mhi_debugfs_states_open,
+- .release = single_release,
+- .read = seq_read,
+-};
+-
+-static const struct file_operations debugfs_events_fops = {
+- .open = mhi_debugfs_events_open,
+- .release = single_release,
+- .read = seq_read,
+-};
+-
+-static const struct file_operations debugfs_channels_fops = {
+- .open = mhi_debugfs_channels_open,
+- .release = single_release,
+- .read = seq_read,
+-};
+-
+-static const struct file_operations debugfs_devices_fops = {
+- .open = mhi_debugfs_devices_open,
+- .release = single_release,
+- .read = seq_read,
+-};
+-
+-static const struct file_operations debugfs_regdump_fops = {
+- .open = mhi_debugfs_regdump_open,
+- .release = single_release,
+- .read = seq_read,
+-};
+-
+-static const struct file_operations debugfs_device_wake_fops = {
+- .open = mhi_debugfs_device_wake_open,
+- .write = mhi_debugfs_device_wake_write,
+- .release = single_release,
+- .read = seq_read,
+-};
+-
+-static const struct file_operations debugfs_timeout_ms_fops = {
+- .open = mhi_debugfs_timeout_ms_open,
+- .write = mhi_debugfs_timeout_ms_write,
+- .release = single_release,
+- .read = seq_read,
+-};
+-
+-static struct dentry *mhi_debugfs_root;
+-
+-void mhi_create_debugfs(struct mhi_controller *mhi_cntrl)
+-{
+- mhi_cntrl->debugfs_dentry =
+- debugfs_create_dir(dev_name(&mhi_cntrl->mhi_dev->dev),
+- mhi_debugfs_root);
+-
+- debugfs_create_file("states", 0444, mhi_cntrl->debugfs_dentry,
+- mhi_cntrl, &debugfs_states_fops);
+- debugfs_create_file("events", 0444, mhi_cntrl->debugfs_dentry,
+- mhi_cntrl, &debugfs_events_fops);
+- debugfs_create_file("channels", 0444, mhi_cntrl->debugfs_dentry,
+- mhi_cntrl, &debugfs_channels_fops);
+- debugfs_create_file("devices", 0444, mhi_cntrl->debugfs_dentry,
+- mhi_cntrl, &debugfs_devices_fops);
+- debugfs_create_file("regdump", 0444, mhi_cntrl->debugfs_dentry,
+- mhi_cntrl, &debugfs_regdump_fops);
+- debugfs_create_file("device_wake", 0644, mhi_cntrl->debugfs_dentry,
+- mhi_cntrl, &debugfs_device_wake_fops);
+- debugfs_create_file("timeout_ms", 0644, mhi_cntrl->debugfs_dentry,
+- mhi_cntrl, &debugfs_timeout_ms_fops);
+-}
+-
+-void mhi_destroy_debugfs(struct mhi_controller *mhi_cntrl)
+-{
+- debugfs_remove_recursive(mhi_cntrl->debugfs_dentry);
+- mhi_cntrl->debugfs_dentry = NULL;
+-}
+-
+-void mhi_debugfs_init(void)
+-{
+- mhi_debugfs_root = debugfs_create_dir(mhi_bus_type.name, NULL);
+-}
+-
+-void mhi_debugfs_exit(void)
+-{
+- debugfs_remove_recursive(mhi_debugfs_root);
+-}
+diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c
+deleted file mode 100644
+index 5aaca6d0f52b2..0000000000000
+--- a/drivers/bus/mhi/core/init.c
++++ /dev/null
+@@ -1,1427 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+- *
+- */
+-
+-#include <linux/debugfs.h>
+-#include <linux/device.h>
+-#include <linux/dma-direction.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/idr.h>
+-#include <linux/interrupt.h>
+-#include <linux/list.h>
+-#include <linux/mhi.h>
+-#include <linux/mod_devicetable.h>
+-#include <linux/module.h>
+-#include <linux/slab.h>
+-#include <linux/vmalloc.h>
+-#include <linux/wait.h>
+-#include "internal.h"
+-
+-static DEFINE_IDA(mhi_controller_ida);
+-
+-const char * const mhi_ee_str[MHI_EE_MAX] = {
+- [MHI_EE_PBL] = "PRIMARY BOOTLOADER",
+- [MHI_EE_SBL] = "SECONDARY BOOTLOADER",
+- [MHI_EE_AMSS] = "MISSION MODE",
+- [MHI_EE_RDDM] = "RAMDUMP DOWNLOAD MODE",
+- [MHI_EE_WFW] = "WLAN FIRMWARE",
+- [MHI_EE_PTHRU] = "PASS THROUGH",
+- [MHI_EE_EDL] = "EMERGENCY DOWNLOAD",
+- [MHI_EE_FP] = "FLASH PROGRAMMER",
+- [MHI_EE_DISABLE_TRANSITION] = "DISABLE",
+- [MHI_EE_NOT_SUPPORTED] = "NOT SUPPORTED",
+-};
+-
+-const char * const dev_state_tran_str[DEV_ST_TRANSITION_MAX] = {
+- [DEV_ST_TRANSITION_PBL] = "PBL",
+- [DEV_ST_TRANSITION_READY] = "READY",
+- [DEV_ST_TRANSITION_SBL] = "SBL",
+- [DEV_ST_TRANSITION_MISSION_MODE] = "MISSION MODE",
+- [DEV_ST_TRANSITION_FP] = "FLASH PROGRAMMER",
+- [DEV_ST_TRANSITION_SYS_ERR] = "SYS ERROR",
+- [DEV_ST_TRANSITION_DISABLE] = "DISABLE",
+-};
+-
+-const char * const mhi_state_str[MHI_STATE_MAX] = {
+- [MHI_STATE_RESET] = "RESET",
+- [MHI_STATE_READY] = "READY",
+- [MHI_STATE_M0] = "M0",
+- [MHI_STATE_M1] = "M1",
+- [MHI_STATE_M2] = "M2",
+- [MHI_STATE_M3] = "M3",
+- [MHI_STATE_M3_FAST] = "M3 FAST",
+- [MHI_STATE_BHI] = "BHI",
+- [MHI_STATE_SYS_ERR] = "SYS ERROR",
+-};
+-
+-const char * const mhi_ch_state_type_str[MHI_CH_STATE_TYPE_MAX] = {
+- [MHI_CH_STATE_TYPE_RESET] = "RESET",
+- [MHI_CH_STATE_TYPE_STOP] = "STOP",
+- [MHI_CH_STATE_TYPE_START] = "START",
+-};
+-
+-static const char * const mhi_pm_state_str[] = {
+- [MHI_PM_STATE_DISABLE] = "DISABLE",
+- [MHI_PM_STATE_POR] = "POWER ON RESET",
+- [MHI_PM_STATE_M0] = "M0",
+- [MHI_PM_STATE_M2] = "M2",
+- [MHI_PM_STATE_M3_ENTER] = "M?->M3",
+- [MHI_PM_STATE_M3] = "M3",
+- [MHI_PM_STATE_M3_EXIT] = "M3->M0",
+- [MHI_PM_STATE_FW_DL_ERR] = "Firmware Download Error",
+- [MHI_PM_STATE_SYS_ERR_DETECT] = "SYS ERROR Detect",
+- [MHI_PM_STATE_SYS_ERR_PROCESS] = "SYS ERROR Process",
+- [MHI_PM_STATE_SHUTDOWN_PROCESS] = "SHUTDOWN Process",
+- [MHI_PM_STATE_LD_ERR_FATAL_DETECT] = "Linkdown or Error Fatal Detect",
+-};
+-
+-const char *to_mhi_pm_state_str(enum mhi_pm_state state)
+-{
+- int index = find_last_bit((unsigned long *)&state, 32);
+-
+- if (index >= ARRAY_SIZE(mhi_pm_state_str))
+- return "Invalid State";
+-
+- return mhi_pm_state_str[index];
+-}
+-
+-static ssize_t serial_number_show(struct device *dev,
+- struct device_attribute *attr,
+- char *buf)
+-{
+- struct mhi_device *mhi_dev = to_mhi_device(dev);
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+-
+- return snprintf(buf, PAGE_SIZE, "Serial Number: %u\n",
+- mhi_cntrl->serial_number);
+-}
+-static DEVICE_ATTR_RO(serial_number);
+-
+-static ssize_t oem_pk_hash_show(struct device *dev,
+- struct device_attribute *attr,
+- char *buf)
+-{
+- struct mhi_device *mhi_dev = to_mhi_device(dev);
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- int i, cnt = 0;
+-
+- for (i = 0; i < ARRAY_SIZE(mhi_cntrl->oem_pk_hash); i++)
+- cnt += snprintf(buf + cnt, PAGE_SIZE - cnt,
+- "OEMPKHASH[%d]: 0x%x\n", i,
+- mhi_cntrl->oem_pk_hash[i]);
+-
+- return cnt;
+-}
+-static DEVICE_ATTR_RO(oem_pk_hash);
+-
+-static struct attribute *mhi_dev_attrs[] = {
+- &dev_attr_serial_number.attr,
+- &dev_attr_oem_pk_hash.attr,
+- NULL,
+-};
+-ATTRIBUTE_GROUPS(mhi_dev);
+-
+-/* MHI protocol requires the transfer ring to be aligned with ring length */
+-static int mhi_alloc_aligned_ring(struct mhi_controller *mhi_cntrl,
+- struct mhi_ring *ring,
+- u64 len)
+-{
+- ring->alloc_size = len + (len - 1);
+- ring->pre_aligned = dma_alloc_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
+- &ring->dma_handle, GFP_KERNEL);
+- if (!ring->pre_aligned)
+- return -ENOMEM;
+-
+- ring->iommu_base = (ring->dma_handle + (len - 1)) & ~(len - 1);
+- ring->base = ring->pre_aligned + (ring->iommu_base - ring->dma_handle);
+-
+- return 0;
+-}
+-
+-void mhi_deinit_free_irq(struct mhi_controller *mhi_cntrl)
+-{
+- int i;
+- struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
+-
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- if (mhi_event->offload_ev)
+- continue;
+-
+- free_irq(mhi_cntrl->irq[mhi_event->irq], mhi_event);
+- }
+-
+- free_irq(mhi_cntrl->irq[0], mhi_cntrl);
+-}
+-
+-int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- unsigned long irq_flags = IRQF_SHARED | IRQF_NO_SUSPEND;
+- int i, ret;
+-
+- /* if controller driver has set irq_flags, use it */
+- if (mhi_cntrl->irq_flags)
+- irq_flags = mhi_cntrl->irq_flags;
+-
+- /* Setup BHI_INTVEC IRQ */
+- ret = request_threaded_irq(mhi_cntrl->irq[0], mhi_intvec_handler,
+- mhi_intvec_threaded_handler,
+- irq_flags,
+- "bhi", mhi_cntrl);
+- if (ret)
+- return ret;
+-
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- if (mhi_event->offload_ev)
+- continue;
+-
+- if (mhi_event->irq >= mhi_cntrl->nr_irqs) {
+- dev_err(dev, "irq %d not available for event ring\n",
+- mhi_event->irq);
+- ret = -EINVAL;
+- goto error_request;
+- }
+-
+- ret = request_irq(mhi_cntrl->irq[mhi_event->irq],
+- mhi_irq_handler,
+- irq_flags,
+- "mhi", mhi_event);
+- if (ret) {
+- dev_err(dev, "Error requesting irq:%d for ev:%d\n",
+- mhi_cntrl->irq[mhi_event->irq], i);
+- goto error_request;
+- }
+- }
+-
+- return 0;
+-
+-error_request:
+- for (--i, --mhi_event; i >= 0; i--, mhi_event--) {
+- if (mhi_event->offload_ev)
+- continue;
+-
+- free_irq(mhi_cntrl->irq[mhi_event->irq], mhi_event);
+- }
+- free_irq(mhi_cntrl->irq[0], mhi_cntrl);
+-
+- return ret;
+-}
+-
+-void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl)
+-{
+- int i;
+- struct mhi_ctxt *mhi_ctxt = mhi_cntrl->mhi_ctxt;
+- struct mhi_cmd *mhi_cmd;
+- struct mhi_event *mhi_event;
+- struct mhi_ring *ring;
+-
+- mhi_cmd = mhi_cntrl->mhi_cmd;
+- for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++) {
+- ring = &mhi_cmd->ring;
+- dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
+- ring->pre_aligned, ring->dma_handle);
+- ring->base = NULL;
+- ring->iommu_base = 0;
+- }
+-
+- dma_free_coherent(mhi_cntrl->cntrl_dev,
+- sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS,
+- mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr);
+-
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- if (mhi_event->offload_ev)
+- continue;
+-
+- ring = &mhi_event->ring;
+- dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
+- ring->pre_aligned, ring->dma_handle);
+- ring->base = NULL;
+- ring->iommu_base = 0;
+- }
+-
+- dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) *
+- mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt,
+- mhi_ctxt->er_ctxt_addr);
+-
+- dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) *
+- mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt,
+- mhi_ctxt->chan_ctxt_addr);
+-
+- kfree(mhi_ctxt);
+- mhi_cntrl->mhi_ctxt = NULL;
+-}
+-
+-int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_ctxt *mhi_ctxt;
+- struct mhi_chan_ctxt *chan_ctxt;
+- struct mhi_event_ctxt *er_ctxt;
+- struct mhi_cmd_ctxt *cmd_ctxt;
+- struct mhi_chan *mhi_chan;
+- struct mhi_event *mhi_event;
+- struct mhi_cmd *mhi_cmd;
+- u32 tmp;
+- int ret = -ENOMEM, i;
+-
+- atomic_set(&mhi_cntrl->dev_wake, 0);
+- atomic_set(&mhi_cntrl->pending_pkts, 0);
+-
+- mhi_ctxt = kzalloc(sizeof(*mhi_ctxt), GFP_KERNEL);
+- if (!mhi_ctxt)
+- return -ENOMEM;
+-
+- /* Setup channel ctxt */
+- mhi_ctxt->chan_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
+- sizeof(*mhi_ctxt->chan_ctxt) *
+- mhi_cntrl->max_chan,
+- &mhi_ctxt->chan_ctxt_addr,
+- GFP_KERNEL);
+- if (!mhi_ctxt->chan_ctxt)
+- goto error_alloc_chan_ctxt;
+-
+- mhi_chan = mhi_cntrl->mhi_chan;
+- chan_ctxt = mhi_ctxt->chan_ctxt;
+- for (i = 0; i < mhi_cntrl->max_chan; i++, chan_ctxt++, mhi_chan++) {
+- /* Skip if it is an offload channel */
+- if (mhi_chan->offload_ch)
+- continue;
+-
+- tmp = chan_ctxt->chcfg;
+- tmp &= ~CHAN_CTX_CHSTATE_MASK;
+- tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT);
+- tmp &= ~CHAN_CTX_BRSTMODE_MASK;
+- tmp |= (mhi_chan->db_cfg.brstmode << CHAN_CTX_BRSTMODE_SHIFT);
+- tmp &= ~CHAN_CTX_POLLCFG_MASK;
+- tmp |= (mhi_chan->db_cfg.pollcfg << CHAN_CTX_POLLCFG_SHIFT);
+- chan_ctxt->chcfg = tmp;
+-
+- chan_ctxt->chtype = mhi_chan->type;
+- chan_ctxt->erindex = mhi_chan->er_index;
+-
+- mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
+- mhi_chan->tre_ring.db_addr = (void __iomem *)&chan_ctxt->wp;
+- }
+-
+- /* Setup event context */
+- mhi_ctxt->er_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
+- sizeof(*mhi_ctxt->er_ctxt) *
+- mhi_cntrl->total_ev_rings,
+- &mhi_ctxt->er_ctxt_addr,
+- GFP_KERNEL);
+- if (!mhi_ctxt->er_ctxt)
+- goto error_alloc_er_ctxt;
+-
+- er_ctxt = mhi_ctxt->er_ctxt;
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, er_ctxt++,
+- mhi_event++) {
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- /* Skip if it is an offload event */
+- if (mhi_event->offload_ev)
+- continue;
+-
+- tmp = er_ctxt->intmod;
+- tmp &= ~EV_CTX_INTMODC_MASK;
+- tmp &= ~EV_CTX_INTMODT_MASK;
+- tmp |= (mhi_event->intmod << EV_CTX_INTMODT_SHIFT);
+- er_ctxt->intmod = tmp;
+-
+- er_ctxt->ertype = MHI_ER_TYPE_VALID;
+- er_ctxt->msivec = mhi_event->irq;
+- mhi_event->db_cfg.db_mode = true;
+-
+- ring->el_size = sizeof(struct mhi_tre);
+- ring->len = ring->el_size * ring->elements;
+- ret = mhi_alloc_aligned_ring(mhi_cntrl, ring, ring->len);
+- if (ret)
+- goto error_alloc_er;
+-
+- /*
+- * If the read pointer equals to the write pointer, then the
+- * ring is empty
+- */
+- ring->rp = ring->wp = ring->base;
+- er_ctxt->rbase = ring->iommu_base;
+- er_ctxt->rp = er_ctxt->wp = er_ctxt->rbase;
+- er_ctxt->rlen = ring->len;
+- ring->ctxt_wp = &er_ctxt->wp;
+- }
+-
+- /* Setup cmd context */
+- ret = -ENOMEM;
+- mhi_ctxt->cmd_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
+- sizeof(*mhi_ctxt->cmd_ctxt) *
+- NR_OF_CMD_RINGS,
+- &mhi_ctxt->cmd_ctxt_addr,
+- GFP_KERNEL);
+- if (!mhi_ctxt->cmd_ctxt)
+- goto error_alloc_er;
+-
+- mhi_cmd = mhi_cntrl->mhi_cmd;
+- cmd_ctxt = mhi_ctxt->cmd_ctxt;
+- for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++, cmd_ctxt++) {
+- struct mhi_ring *ring = &mhi_cmd->ring;
+-
+- ring->el_size = sizeof(struct mhi_tre);
+- ring->elements = CMD_EL_PER_RING;
+- ring->len = ring->el_size * ring->elements;
+- ret = mhi_alloc_aligned_ring(mhi_cntrl, ring, ring->len);
+- if (ret)
+- goto error_alloc_cmd;
+-
+- ring->rp = ring->wp = ring->base;
+- cmd_ctxt->rbase = ring->iommu_base;
+- cmd_ctxt->rp = cmd_ctxt->wp = cmd_ctxt->rbase;
+- cmd_ctxt->rlen = ring->len;
+- ring->ctxt_wp = &cmd_ctxt->wp;
+- }
+-
+- mhi_cntrl->mhi_ctxt = mhi_ctxt;
+-
+- return 0;
+-
+-error_alloc_cmd:
+- for (--i, --mhi_cmd; i >= 0; i--, mhi_cmd--) {
+- struct mhi_ring *ring = &mhi_cmd->ring;
+-
+- dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
+- ring->pre_aligned, ring->dma_handle);
+- }
+- dma_free_coherent(mhi_cntrl->cntrl_dev,
+- sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS,
+- mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr);
+- i = mhi_cntrl->total_ev_rings;
+- mhi_event = mhi_cntrl->mhi_event + i;
+-
+-error_alloc_er:
+- for (--i, --mhi_event; i >= 0; i--, mhi_event--) {
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- if (mhi_event->offload_ev)
+- continue;
+-
+- dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
+- ring->pre_aligned, ring->dma_handle);
+- }
+- dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) *
+- mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt,
+- mhi_ctxt->er_ctxt_addr);
+-
+-error_alloc_er_ctxt:
+- dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) *
+- mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt,
+- mhi_ctxt->chan_ctxt_addr);
+-
+-error_alloc_chan_ctxt:
+- kfree(mhi_ctxt);
+-
+- return ret;
+-}
+-
+-int mhi_init_mmio(struct mhi_controller *mhi_cntrl)
+-{
+- u32 val;
+- int i, ret;
+- struct mhi_chan *mhi_chan;
+- struct mhi_event *mhi_event;
+- void __iomem *base = mhi_cntrl->regs;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- struct {
+- u32 offset;
+- u32 mask;
+- u32 shift;
+- u32 val;
+- } reg_info[] = {
+- {
+- CCABAP_HIGHER, U32_MAX, 0,
+- upper_32_bits(mhi_cntrl->mhi_ctxt->chan_ctxt_addr),
+- },
+- {
+- CCABAP_LOWER, U32_MAX, 0,
+- lower_32_bits(mhi_cntrl->mhi_ctxt->chan_ctxt_addr),
+- },
+- {
+- ECABAP_HIGHER, U32_MAX, 0,
+- upper_32_bits(mhi_cntrl->mhi_ctxt->er_ctxt_addr),
+- },
+- {
+- ECABAP_LOWER, U32_MAX, 0,
+- lower_32_bits(mhi_cntrl->mhi_ctxt->er_ctxt_addr),
+- },
+- {
+- CRCBAP_HIGHER, U32_MAX, 0,
+- upper_32_bits(mhi_cntrl->mhi_ctxt->cmd_ctxt_addr),
+- },
+- {
+- CRCBAP_LOWER, U32_MAX, 0,
+- lower_32_bits(mhi_cntrl->mhi_ctxt->cmd_ctxt_addr),
+- },
+- {
+- MHICFG, MHICFG_NER_MASK, MHICFG_NER_SHIFT,
+- mhi_cntrl->total_ev_rings,
+- },
+- {
+- MHICFG, MHICFG_NHWER_MASK, MHICFG_NHWER_SHIFT,
+- mhi_cntrl->hw_ev_rings,
+- },
+- {
+- MHICTRLBASE_HIGHER, U32_MAX, 0,
+- upper_32_bits(mhi_cntrl->iova_start),
+- },
+- {
+- MHICTRLBASE_LOWER, U32_MAX, 0,
+- lower_32_bits(mhi_cntrl->iova_start),
+- },
+- {
+- MHIDATABASE_HIGHER, U32_MAX, 0,
+- upper_32_bits(mhi_cntrl->iova_start),
+- },
+- {
+- MHIDATABASE_LOWER, U32_MAX, 0,
+- lower_32_bits(mhi_cntrl->iova_start),
+- },
+- {
+- MHICTRLLIMIT_HIGHER, U32_MAX, 0,
+- upper_32_bits(mhi_cntrl->iova_stop),
+- },
+- {
+- MHICTRLLIMIT_LOWER, U32_MAX, 0,
+- lower_32_bits(mhi_cntrl->iova_stop),
+- },
+- {
+- MHIDATALIMIT_HIGHER, U32_MAX, 0,
+- upper_32_bits(mhi_cntrl->iova_stop),
+- },
+- {
+- MHIDATALIMIT_LOWER, U32_MAX, 0,
+- lower_32_bits(mhi_cntrl->iova_stop),
+- },
+- { 0, 0, 0 }
+- };
+-
+- dev_dbg(dev, "Initializing MHI registers\n");
+-
+- /* Read channel db offset */
+- ret = mhi_read_reg_field(mhi_cntrl, base, CHDBOFF, CHDBOFF_CHDBOFF_MASK,
+- CHDBOFF_CHDBOFF_SHIFT, &val);
+- if (ret) {
+- dev_err(dev, "Unable to read CHDBOFF register\n");
+- return -EIO;
+- }
+-
+- /* Setup wake db */
+- mhi_cntrl->wake_db = base + val + (8 * MHI_DEV_WAKE_DB);
+- mhi_cntrl->wake_set = false;
+-
+- /* Setup channel db address for each channel in tre_ring */
+- mhi_chan = mhi_cntrl->mhi_chan;
+- for (i = 0; i < mhi_cntrl->max_chan; i++, val += 8, mhi_chan++)
+- mhi_chan->tre_ring.db_addr = base + val;
+-
+- /* Read event ring db offset */
+- ret = mhi_read_reg_field(mhi_cntrl, base, ERDBOFF, ERDBOFF_ERDBOFF_MASK,
+- ERDBOFF_ERDBOFF_SHIFT, &val);
+- if (ret) {
+- dev_err(dev, "Unable to read ERDBOFF register\n");
+- return -EIO;
+- }
+-
+- /* Setup event db address for each ev_ring */
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, val += 8, mhi_event++) {
+- if (mhi_event->offload_ev)
+- continue;
+-
+- mhi_event->ring.db_addr = base + val;
+- }
+-
+- /* Setup DB register for primary CMD rings */
+- mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING].ring.db_addr = base + CRDB_LOWER;
+-
+- /* Write to MMIO registers */
+- for (i = 0; reg_info[i].offset; i++)
+- mhi_write_reg_field(mhi_cntrl, base, reg_info[i].offset,
+- reg_info[i].mask, reg_info[i].shift,
+- reg_info[i].val);
+-
+- return 0;
+-}
+-
+-void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan)
+-{
+- struct mhi_ring *buf_ring;
+- struct mhi_ring *tre_ring;
+- struct mhi_chan_ctxt *chan_ctxt;
+- u32 tmp;
+-
+- buf_ring = &mhi_chan->buf_ring;
+- tre_ring = &mhi_chan->tre_ring;
+- chan_ctxt = &mhi_cntrl->mhi_ctxt->chan_ctxt[mhi_chan->chan];
+-
+- if (!chan_ctxt->rbase) /* Already uninitialized */
+- return;
+-
+- dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size,
+- tre_ring->pre_aligned, tre_ring->dma_handle);
+- vfree(buf_ring->base);
+-
+- buf_ring->base = tre_ring->base = NULL;
+- tre_ring->ctxt_wp = NULL;
+- chan_ctxt->rbase = 0;
+- chan_ctxt->rlen = 0;
+- chan_ctxt->rp = 0;
+- chan_ctxt->wp = 0;
+-
+- tmp = chan_ctxt->chcfg;
+- tmp &= ~CHAN_CTX_CHSTATE_MASK;
+- tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT);
+- chan_ctxt->chcfg = tmp;
+-
+- /* Update to all cores */
+- smp_wmb();
+-}
+-
+-int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan)
+-{
+- struct mhi_ring *buf_ring;
+- struct mhi_ring *tre_ring;
+- struct mhi_chan_ctxt *chan_ctxt;
+- u32 tmp;
+- int ret;
+-
+- buf_ring = &mhi_chan->buf_ring;
+- tre_ring = &mhi_chan->tre_ring;
+- tre_ring->el_size = sizeof(struct mhi_tre);
+- tre_ring->len = tre_ring->el_size * tre_ring->elements;
+- chan_ctxt = &mhi_cntrl->mhi_ctxt->chan_ctxt[mhi_chan->chan];
+- ret = mhi_alloc_aligned_ring(mhi_cntrl, tre_ring, tre_ring->len);
+- if (ret)
+- return -ENOMEM;
+-
+- buf_ring->el_size = sizeof(struct mhi_buf_info);
+- buf_ring->len = buf_ring->el_size * buf_ring->elements;
+- buf_ring->base = vzalloc(buf_ring->len);
+-
+- if (!buf_ring->base) {
+- dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size,
+- tre_ring->pre_aligned, tre_ring->dma_handle);
+- return -ENOMEM;
+- }
+-
+- tmp = chan_ctxt->chcfg;
+- tmp &= ~CHAN_CTX_CHSTATE_MASK;
+- tmp |= (MHI_CH_STATE_ENABLED << CHAN_CTX_CHSTATE_SHIFT);
+- chan_ctxt->chcfg = tmp;
+-
+- chan_ctxt->rbase = tre_ring->iommu_base;
+- chan_ctxt->rp = chan_ctxt->wp = chan_ctxt->rbase;
+- chan_ctxt->rlen = tre_ring->len;
+- tre_ring->ctxt_wp = &chan_ctxt->wp;
+-
+- tre_ring->rp = tre_ring->wp = tre_ring->base;
+- buf_ring->rp = buf_ring->wp = buf_ring->base;
+- mhi_chan->db_cfg.db_mode = 1;
+-
+- /* Update to all cores */
+- smp_wmb();
+-
+- return 0;
+-}
+-
+-static int parse_ev_cfg(struct mhi_controller *mhi_cntrl,
+- const struct mhi_controller_config *config)
+-{
+- struct mhi_event *mhi_event;
+- const struct mhi_event_config *event_cfg;
+- struct device *dev = mhi_cntrl->cntrl_dev;
+- int i, num;
+-
+- num = config->num_events;
+- mhi_cntrl->total_ev_rings = num;
+- mhi_cntrl->mhi_event = kcalloc(num, sizeof(*mhi_cntrl->mhi_event),
+- GFP_KERNEL);
+- if (!mhi_cntrl->mhi_event)
+- return -ENOMEM;
+-
+- /* Populate event ring */
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < num; i++) {
+- event_cfg = &config->event_cfg[i];
+-
+- mhi_event->er_index = i;
+- mhi_event->ring.elements = event_cfg->num_elements;
+- mhi_event->intmod = event_cfg->irq_moderation_ms;
+- mhi_event->irq = event_cfg->irq;
+-
+- if (event_cfg->channel != U32_MAX) {
+- /* This event ring has a dedicated channel */
+- mhi_event->chan = event_cfg->channel;
+- if (mhi_event->chan >= mhi_cntrl->max_chan) {
+- dev_err(dev,
+- "Event Ring channel not available\n");
+- goto error_ev_cfg;
+- }
+-
+- mhi_event->mhi_chan =
+- &mhi_cntrl->mhi_chan[mhi_event->chan];
+- }
+-
+- /* Priority is fixed to 1 for now */
+- mhi_event->priority = 1;
+-
+- mhi_event->db_cfg.brstmode = event_cfg->mode;
+- if (MHI_INVALID_BRSTMODE(mhi_event->db_cfg.brstmode))
+- goto error_ev_cfg;
+-
+- if (mhi_event->db_cfg.brstmode == MHI_DB_BRST_ENABLE)
+- mhi_event->db_cfg.process_db = mhi_db_brstmode;
+- else
+- mhi_event->db_cfg.process_db = mhi_db_brstmode_disable;
+-
+- mhi_event->data_type = event_cfg->data_type;
+-
+- switch (mhi_event->data_type) {
+- case MHI_ER_DATA:
+- mhi_event->process_event = mhi_process_data_event_ring;
+- break;
+- case MHI_ER_CTRL:
+- mhi_event->process_event = mhi_process_ctrl_ev_ring;
+- break;
+- default:
+- dev_err(dev, "Event Ring type not supported\n");
+- goto error_ev_cfg;
+- }
+-
+- mhi_event->hw_ring = event_cfg->hardware_event;
+- if (mhi_event->hw_ring)
+- mhi_cntrl->hw_ev_rings++;
+- else
+- mhi_cntrl->sw_ev_rings++;
+-
+- mhi_event->cl_manage = event_cfg->client_managed;
+- mhi_event->offload_ev = event_cfg->offload_channel;
+- mhi_event++;
+- }
+-
+- return 0;
+-
+-error_ev_cfg:
+-
+- kfree(mhi_cntrl->mhi_event);
+- return -EINVAL;
+-}
+-
+-static int parse_ch_cfg(struct mhi_controller *mhi_cntrl,
+- const struct mhi_controller_config *config)
+-{
+- const struct mhi_channel_config *ch_cfg;
+- struct device *dev = mhi_cntrl->cntrl_dev;
+- int i;
+- u32 chan;
+-
+- mhi_cntrl->max_chan = config->max_channels;
+-
+- /*
+- * The allocation of MHI channels can exceed 32KB in some scenarios,
+- * so to avoid any memory possible allocation failures, vzalloc is
+- * used here
+- */
+- mhi_cntrl->mhi_chan = vzalloc(mhi_cntrl->max_chan *
+- sizeof(*mhi_cntrl->mhi_chan));
+- if (!mhi_cntrl->mhi_chan)
+- return -ENOMEM;
+-
+- INIT_LIST_HEAD(&mhi_cntrl->lpm_chans);
+-
+- /* Populate channel configurations */
+- for (i = 0; i < config->num_channels; i++) {
+- struct mhi_chan *mhi_chan;
+-
+- ch_cfg = &config->ch_cfg[i];
+-
+- chan = ch_cfg->num;
+- if (chan >= mhi_cntrl->max_chan) {
+- dev_err(dev, "Channel %d not available\n", chan);
+- goto error_chan_cfg;
+- }
+-
+- mhi_chan = &mhi_cntrl->mhi_chan[chan];
+- mhi_chan->name = ch_cfg->name;
+- mhi_chan->chan = chan;
+-
+- mhi_chan->tre_ring.elements = ch_cfg->num_elements;
+- if (!mhi_chan->tre_ring.elements)
+- goto error_chan_cfg;
+-
+- /*
+- * For some channels, local ring length should be bigger than
+- * the transfer ring length due to internal logical channels
+- * in device. So host can queue much more buffers than transfer
+- * ring length. Example, RSC channels should have a larger local
+- * channel length than transfer ring length.
+- */
+- mhi_chan->buf_ring.elements = ch_cfg->local_elements;
+- if (!mhi_chan->buf_ring.elements)
+- mhi_chan->buf_ring.elements = mhi_chan->tre_ring.elements;
+- mhi_chan->er_index = ch_cfg->event_ring;
+- mhi_chan->dir = ch_cfg->dir;
+-
+- /*
+- * For most channels, chtype is identical to channel directions.
+- * So, if it is not defined then assign channel direction to
+- * chtype
+- */
+- mhi_chan->type = ch_cfg->type;
+- if (!mhi_chan->type)
+- mhi_chan->type = (enum mhi_ch_type)mhi_chan->dir;
+-
+- mhi_chan->ee_mask = ch_cfg->ee_mask;
+- mhi_chan->db_cfg.pollcfg = ch_cfg->pollcfg;
+- mhi_chan->lpm_notify = ch_cfg->lpm_notify;
+- mhi_chan->offload_ch = ch_cfg->offload_channel;
+- mhi_chan->db_cfg.reset_req = ch_cfg->doorbell_mode_switch;
+- mhi_chan->pre_alloc = ch_cfg->auto_queue;
+-
+- /*
+- * If MHI host allocates buffers, then the channel direction
+- * should be DMA_FROM_DEVICE
+- */
+- if (mhi_chan->pre_alloc && mhi_chan->dir != DMA_FROM_DEVICE) {
+- dev_err(dev, "Invalid channel configuration\n");
+- goto error_chan_cfg;
+- }
+-
+- /*
+- * Bi-directional and direction less channel must be an
+- * offload channel
+- */
+- if ((mhi_chan->dir == DMA_BIDIRECTIONAL ||
+- mhi_chan->dir == DMA_NONE) && !mhi_chan->offload_ch) {
+- dev_err(dev, "Invalid channel configuration\n");
+- goto error_chan_cfg;
+- }
+-
+- if (!mhi_chan->offload_ch) {
+- mhi_chan->db_cfg.brstmode = ch_cfg->doorbell;
+- if (MHI_INVALID_BRSTMODE(mhi_chan->db_cfg.brstmode)) {
+- dev_err(dev, "Invalid Door bell mode\n");
+- goto error_chan_cfg;
+- }
+- }
+-
+- if (mhi_chan->db_cfg.brstmode == MHI_DB_BRST_ENABLE)
+- mhi_chan->db_cfg.process_db = mhi_db_brstmode;
+- else
+- mhi_chan->db_cfg.process_db = mhi_db_brstmode_disable;
+-
+- mhi_chan->configured = true;
+-
+- if (mhi_chan->lpm_notify)
+- list_add_tail(&mhi_chan->node, &mhi_cntrl->lpm_chans);
+- }
+-
+- return 0;
+-
+-error_chan_cfg:
+- vfree(mhi_cntrl->mhi_chan);
+-
+- return -EINVAL;
+-}
+-
+-static int parse_config(struct mhi_controller *mhi_cntrl,
+- const struct mhi_controller_config *config)
+-{
+- int ret;
+-
+- /* Parse MHI channel configuration */
+- ret = parse_ch_cfg(mhi_cntrl, config);
+- if (ret)
+- return ret;
+-
+- /* Parse MHI event configuration */
+- ret = parse_ev_cfg(mhi_cntrl, config);
+- if (ret)
+- goto error_ev_cfg;
+-
+- mhi_cntrl->timeout_ms = config->timeout_ms;
+- if (!mhi_cntrl->timeout_ms)
+- mhi_cntrl->timeout_ms = MHI_TIMEOUT_MS;
+-
+- mhi_cntrl->bounce_buf = config->use_bounce_buf;
+- mhi_cntrl->buffer_len = config->buf_len;
+- if (!mhi_cntrl->buffer_len)
+- mhi_cntrl->buffer_len = MHI_MAX_MTU;
+-
+- /* By default, host is allowed to ring DB in both M0 and M2 states */
+- mhi_cntrl->db_access = MHI_PM_M0 | MHI_PM_M2;
+- if (config->m2_no_db)
+- mhi_cntrl->db_access &= ~MHI_PM_M2;
+-
+- return 0;
+-
+-error_ev_cfg:
+- vfree(mhi_cntrl->mhi_chan);
+-
+- return ret;
+-}
+-
+-int mhi_register_controller(struct mhi_controller *mhi_cntrl,
+- const struct mhi_controller_config *config)
+-{
+- struct mhi_event *mhi_event;
+- struct mhi_chan *mhi_chan;
+- struct mhi_cmd *mhi_cmd;
+- struct mhi_device *mhi_dev;
+- u32 soc_info;
+- int ret, i;
+-
+- if (!mhi_cntrl || !mhi_cntrl->cntrl_dev || !mhi_cntrl->regs ||
+- !mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put ||
+- !mhi_cntrl->status_cb || !mhi_cntrl->read_reg ||
+- !mhi_cntrl->write_reg || !mhi_cntrl->nr_irqs ||
+- !mhi_cntrl->irq || !mhi_cntrl->reg_len)
+- return -EINVAL;
+-
+- ret = parse_config(mhi_cntrl, config);
+- if (ret)
+- return -EINVAL;
+-
+- mhi_cntrl->mhi_cmd = kcalloc(NR_OF_CMD_RINGS,
+- sizeof(*mhi_cntrl->mhi_cmd), GFP_KERNEL);
+- if (!mhi_cntrl->mhi_cmd) {
+- ret = -ENOMEM;
+- goto err_free_event;
+- }
+-
+- INIT_LIST_HEAD(&mhi_cntrl->transition_list);
+- mutex_init(&mhi_cntrl->pm_mutex);
+- rwlock_init(&mhi_cntrl->pm_lock);
+- spin_lock_init(&mhi_cntrl->transition_lock);
+- spin_lock_init(&mhi_cntrl->wlock);
+- INIT_WORK(&mhi_cntrl->st_worker, mhi_pm_st_worker);
+- init_waitqueue_head(&mhi_cntrl->state_event);
+-
+- mhi_cntrl->hiprio_wq = alloc_ordered_workqueue("mhi_hiprio_wq", WQ_HIGHPRI);
+- if (!mhi_cntrl->hiprio_wq) {
+- dev_err(mhi_cntrl->cntrl_dev, "Failed to allocate workqueue\n");
+- ret = -ENOMEM;
+- goto err_free_cmd;
+- }
+-
+- mhi_cmd = mhi_cntrl->mhi_cmd;
+- for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++)
+- spin_lock_init(&mhi_cmd->lock);
+-
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- /* Skip for offload events */
+- if (mhi_event->offload_ev)
+- continue;
+-
+- mhi_event->mhi_cntrl = mhi_cntrl;
+- spin_lock_init(&mhi_event->lock);
+- if (mhi_event->data_type == MHI_ER_CTRL)
+- tasklet_init(&mhi_event->task, mhi_ctrl_ev_task,
+- (ulong)mhi_event);
+- else
+- tasklet_init(&mhi_event->task, mhi_ev_task,
+- (ulong)mhi_event);
+- }
+-
+- mhi_chan = mhi_cntrl->mhi_chan;
+- for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
+- mutex_init(&mhi_chan->mutex);
+- init_completion(&mhi_chan->completion);
+- rwlock_init(&mhi_chan->lock);
+-
+- /* used in setting bei field of TRE */
+- mhi_event = &mhi_cntrl->mhi_event[mhi_chan->er_index];
+- mhi_chan->intmod = mhi_event->intmod;
+- }
+-
+- if (mhi_cntrl->bounce_buf) {
+- mhi_cntrl->map_single = mhi_map_single_use_bb;
+- mhi_cntrl->unmap_single = mhi_unmap_single_use_bb;
+- } else {
+- mhi_cntrl->map_single = mhi_map_single_no_bb;
+- mhi_cntrl->unmap_single = mhi_unmap_single_no_bb;
+- }
+-
+- /* Read the MHI device info */
+- ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs,
+- SOC_HW_VERSION_OFFS, &soc_info);
+- if (ret)
+- goto err_destroy_wq;
+-
+- mhi_cntrl->family_number = (soc_info & SOC_HW_VERSION_FAM_NUM_BMSK) >>
+- SOC_HW_VERSION_FAM_NUM_SHFT;
+- mhi_cntrl->device_number = (soc_info & SOC_HW_VERSION_DEV_NUM_BMSK) >>
+- SOC_HW_VERSION_DEV_NUM_SHFT;
+- mhi_cntrl->major_version = (soc_info & SOC_HW_VERSION_MAJOR_VER_BMSK) >>
+- SOC_HW_VERSION_MAJOR_VER_SHFT;
+- mhi_cntrl->minor_version = (soc_info & SOC_HW_VERSION_MINOR_VER_BMSK) >>
+- SOC_HW_VERSION_MINOR_VER_SHFT;
+-
+- mhi_cntrl->index = ida_alloc(&mhi_controller_ida, GFP_KERNEL);
+- if (mhi_cntrl->index < 0) {
+- ret = mhi_cntrl->index;
+- goto err_destroy_wq;
+- }
+-
+- /* Register controller with MHI bus */
+- mhi_dev = mhi_alloc_device(mhi_cntrl);
+- if (IS_ERR(mhi_dev)) {
+- dev_err(mhi_cntrl->cntrl_dev, "Failed to allocate MHI device\n");
+- ret = PTR_ERR(mhi_dev);
+- goto err_ida_free;
+- }
+-
+- mhi_dev->dev_type = MHI_DEVICE_CONTROLLER;
+- mhi_dev->mhi_cntrl = mhi_cntrl;
+- dev_set_name(&mhi_dev->dev, "mhi%d", mhi_cntrl->index);
+- mhi_dev->name = dev_name(&mhi_dev->dev);
+-
+- /* Init wakeup source */
+- device_init_wakeup(&mhi_dev->dev, true);
+-
+- ret = device_add(&mhi_dev->dev);
+- if (ret)
+- goto err_release_dev;
+-
+- mhi_cntrl->mhi_dev = mhi_dev;
+-
+- mhi_create_debugfs(mhi_cntrl);
+-
+- return 0;
+-
+-err_release_dev:
+- put_device(&mhi_dev->dev);
+-err_ida_free:
+- ida_free(&mhi_controller_ida, mhi_cntrl->index);
+-err_destroy_wq:
+- destroy_workqueue(mhi_cntrl->hiprio_wq);
+-err_free_cmd:
+- kfree(mhi_cntrl->mhi_cmd);
+-err_free_event:
+- kfree(mhi_cntrl->mhi_event);
+- vfree(mhi_cntrl->mhi_chan);
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(mhi_register_controller);
+-
+-void mhi_unregister_controller(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
+- struct mhi_chan *mhi_chan = mhi_cntrl->mhi_chan;
+- unsigned int i;
+-
+- mhi_destroy_debugfs(mhi_cntrl);
+-
+- destroy_workqueue(mhi_cntrl->hiprio_wq);
+- kfree(mhi_cntrl->mhi_cmd);
+- kfree(mhi_cntrl->mhi_event);
+-
+- /* Drop the references to MHI devices created for channels */
+- for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
+- if (!mhi_chan->mhi_dev)
+- continue;
+-
+- put_device(&mhi_chan->mhi_dev->dev);
+- }
+- vfree(mhi_cntrl->mhi_chan);
+-
+- device_del(&mhi_dev->dev);
+- put_device(&mhi_dev->dev);
+-
+- ida_free(&mhi_controller_ida, mhi_cntrl->index);
+-}
+-EXPORT_SYMBOL_GPL(mhi_unregister_controller);
+-
+-struct mhi_controller *mhi_alloc_controller(void)
+-{
+- struct mhi_controller *mhi_cntrl;
+-
+- mhi_cntrl = kzalloc(sizeof(*mhi_cntrl), GFP_KERNEL);
+-
+- return mhi_cntrl;
+-}
+-EXPORT_SYMBOL_GPL(mhi_alloc_controller);
+-
+-void mhi_free_controller(struct mhi_controller *mhi_cntrl)
+-{
+- kfree(mhi_cntrl);
+-}
+-EXPORT_SYMBOL_GPL(mhi_free_controller);
+-
+-int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl)
+-{
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- u32 bhi_off, bhie_off;
+- int ret;
+-
+- mutex_lock(&mhi_cntrl->pm_mutex);
+-
+- ret = mhi_init_dev_ctxt(mhi_cntrl);
+- if (ret)
+- goto error_dev_ctxt;
+-
+- ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIOFF, &bhi_off);
+- if (ret) {
+- dev_err(dev, "Error getting BHI offset\n");
+- goto error_reg_offset;
+- }
+-
+- if (bhi_off >= mhi_cntrl->reg_len) {
+- dev_err(dev, "BHI offset: 0x%x is out of range: 0x%zx\n",
+- bhi_off, mhi_cntrl->reg_len);
+- ret = -EINVAL;
+- goto error_reg_offset;
+- }
+- mhi_cntrl->bhi = mhi_cntrl->regs + bhi_off;
+-
+- if (mhi_cntrl->fbc_download || mhi_cntrl->rddm_size) {
+- ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIEOFF,
+- &bhie_off);
+- if (ret) {
+- dev_err(dev, "Error getting BHIE offset\n");
+- goto error_reg_offset;
+- }
+-
+- if (bhie_off >= mhi_cntrl->reg_len) {
+- dev_err(dev,
+- "BHIe offset: 0x%x is out of range: 0x%zx\n",
+- bhie_off, mhi_cntrl->reg_len);
+- ret = -EINVAL;
+- goto error_reg_offset;
+- }
+- mhi_cntrl->bhie = mhi_cntrl->regs + bhie_off;
+- }
+-
+- if (mhi_cntrl->rddm_size) {
+- /*
+- * This controller supports RDDM, so we need to manually clear
+- * BHIE RX registers since POR values are undefined.
+- */
+- memset_io(mhi_cntrl->bhie + BHIE_RXVECADDR_LOW_OFFS,
+- 0, BHIE_RXVECSTATUS_OFFS - BHIE_RXVECADDR_LOW_OFFS +
+- 4);
+- /*
+- * Allocate RDDM table for debugging purpose if specified
+- */
+- mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->rddm_image,
+- mhi_cntrl->rddm_size);
+- if (mhi_cntrl->rddm_image)
+- mhi_rddm_prepare(mhi_cntrl, mhi_cntrl->rddm_image);
+- }
+-
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+-
+- return 0;
+-
+-error_reg_offset:
+- mhi_deinit_dev_ctxt(mhi_cntrl);
+-
+-error_dev_ctxt:
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(mhi_prepare_for_power_up);
+-
+-void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl)
+-{
+- if (mhi_cntrl->fbc_image) {
+- mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->fbc_image);
+- mhi_cntrl->fbc_image = NULL;
+- }
+-
+- if (mhi_cntrl->rddm_image) {
+- mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->rddm_image);
+- mhi_cntrl->rddm_image = NULL;
+- }
+-
+- mhi_cntrl->bhi = NULL;
+- mhi_cntrl->bhie = NULL;
+-
+- mhi_deinit_dev_ctxt(mhi_cntrl);
+-}
+-EXPORT_SYMBOL_GPL(mhi_unprepare_after_power_down);
+-
+-static void mhi_release_device(struct device *dev)
+-{
+- struct mhi_device *mhi_dev = to_mhi_device(dev);
+-
+- /*
+- * We need to set the mhi_chan->mhi_dev to NULL here since the MHI
+- * devices for the channels will only get created if the mhi_dev
+- * associated with it is NULL. This scenario will happen during the
+- * controller suspend and resume.
+- */
+- if (mhi_dev->ul_chan)
+- mhi_dev->ul_chan->mhi_dev = NULL;
+-
+- if (mhi_dev->dl_chan)
+- mhi_dev->dl_chan->mhi_dev = NULL;
+-
+- kfree(mhi_dev);
+-}
+-
+-struct mhi_device *mhi_alloc_device(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_device *mhi_dev;
+- struct device *dev;
+-
+- mhi_dev = kzalloc(sizeof(*mhi_dev), GFP_KERNEL);
+- if (!mhi_dev)
+- return ERR_PTR(-ENOMEM);
+-
+- dev = &mhi_dev->dev;
+- device_initialize(dev);
+- dev->bus = &mhi_bus_type;
+- dev->release = mhi_release_device;
+-
+- if (mhi_cntrl->mhi_dev) {
+- /* for MHI client devices, parent is the MHI controller device */
+- dev->parent = &mhi_cntrl->mhi_dev->dev;
+- } else {
+- /* for MHI controller device, parent is the bus device (e.g. pci device) */
+- dev->parent = mhi_cntrl->cntrl_dev;
+- }
+-
+- mhi_dev->mhi_cntrl = mhi_cntrl;
+- mhi_dev->dev_wake = 0;
+-
+- return mhi_dev;
+-}
+-
+-static int mhi_driver_probe(struct device *dev)
+-{
+- struct mhi_device *mhi_dev = to_mhi_device(dev);
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct device_driver *drv = dev->driver;
+- struct mhi_driver *mhi_drv = to_mhi_driver(drv);
+- struct mhi_event *mhi_event;
+- struct mhi_chan *ul_chan = mhi_dev->ul_chan;
+- struct mhi_chan *dl_chan = mhi_dev->dl_chan;
+- int ret;
+-
+- /* Bring device out of LPM */
+- ret = mhi_device_get_sync(mhi_dev);
+- if (ret)
+- return ret;
+-
+- ret = -EINVAL;
+-
+- if (ul_chan) {
+- /*
+- * If channel supports LPM notifications then status_cb should
+- * be provided
+- */
+- if (ul_chan->lpm_notify && !mhi_drv->status_cb)
+- goto exit_probe;
+-
+- /* For non-offload channels then xfer_cb should be provided */
+- if (!ul_chan->offload_ch && !mhi_drv->ul_xfer_cb)
+- goto exit_probe;
+-
+- ul_chan->xfer_cb = mhi_drv->ul_xfer_cb;
+- }
+-
+- ret = -EINVAL;
+- if (dl_chan) {
+- /*
+- * If channel supports LPM notifications then status_cb should
+- * be provided
+- */
+- if (dl_chan->lpm_notify && !mhi_drv->status_cb)
+- goto exit_probe;
+-
+- /* For non-offload channels then xfer_cb should be provided */
+- if (!dl_chan->offload_ch && !mhi_drv->dl_xfer_cb)
+- goto exit_probe;
+-
+- mhi_event = &mhi_cntrl->mhi_event[dl_chan->er_index];
+-
+- /*
+- * If the channel event ring is managed by client, then
+- * status_cb must be provided so that the framework can
+- * notify pending data
+- */
+- if (mhi_event->cl_manage && !mhi_drv->status_cb)
+- goto exit_probe;
+-
+- dl_chan->xfer_cb = mhi_drv->dl_xfer_cb;
+- }
+-
+- /* Call the user provided probe function */
+- ret = mhi_drv->probe(mhi_dev, mhi_dev->id);
+- if (ret)
+- goto exit_probe;
+-
+- mhi_device_put(mhi_dev);
+-
+- return ret;
+-
+-exit_probe:
+- mhi_unprepare_from_transfer(mhi_dev);
+-
+- mhi_device_put(mhi_dev);
+-
+- return ret;
+-}
+-
+-static int mhi_driver_remove(struct device *dev)
+-{
+- struct mhi_device *mhi_dev = to_mhi_device(dev);
+- struct mhi_driver *mhi_drv = to_mhi_driver(dev->driver);
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct mhi_chan *mhi_chan;
+- enum mhi_ch_state ch_state[] = {
+- MHI_CH_STATE_DISABLED,
+- MHI_CH_STATE_DISABLED
+- };
+- int dir;
+-
+- /* Skip if it is a controller device */
+- if (mhi_dev->dev_type == MHI_DEVICE_CONTROLLER)
+- return 0;
+-
+- /* Reset both channels */
+- for (dir = 0; dir < 2; dir++) {
+- mhi_chan = dir ? mhi_dev->ul_chan : mhi_dev->dl_chan;
+-
+- if (!mhi_chan)
+- continue;
+-
+- /* Wake all threads waiting for completion */
+- write_lock_irq(&mhi_chan->lock);
+- mhi_chan->ccs = MHI_EV_CC_INVALID;
+- complete_all(&mhi_chan->completion);
+- write_unlock_irq(&mhi_chan->lock);
+-
+- /* Set the channel state to disabled */
+- mutex_lock(&mhi_chan->mutex);
+- write_lock_irq(&mhi_chan->lock);
+- ch_state[dir] = mhi_chan->ch_state;
+- mhi_chan->ch_state = MHI_CH_STATE_SUSPENDED;
+- write_unlock_irq(&mhi_chan->lock);
+-
+- /* Reset the non-offload channel */
+- if (!mhi_chan->offload_ch)
+- mhi_reset_chan(mhi_cntrl, mhi_chan);
+-
+- mutex_unlock(&mhi_chan->mutex);
+- }
+-
+- mhi_drv->remove(mhi_dev);
+-
+- /* De-init channel if it was enabled */
+- for (dir = 0; dir < 2; dir++) {
+- mhi_chan = dir ? mhi_dev->ul_chan : mhi_dev->dl_chan;
+-
+- if (!mhi_chan)
+- continue;
+-
+- mutex_lock(&mhi_chan->mutex);
+-
+- if ((ch_state[dir] == MHI_CH_STATE_ENABLED ||
+- ch_state[dir] == MHI_CH_STATE_STOP) &&
+- !mhi_chan->offload_ch)
+- mhi_deinit_chan_ctxt(mhi_cntrl, mhi_chan);
+-
+- mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
+-
+- mutex_unlock(&mhi_chan->mutex);
+- }
+-
+- while (mhi_dev->dev_wake)
+- mhi_device_put(mhi_dev);
+-
+- return 0;
+-}
+-
+-int __mhi_driver_register(struct mhi_driver *mhi_drv, struct module *owner)
+-{
+- struct device_driver *driver = &mhi_drv->driver;
+-
+- if (!mhi_drv->probe || !mhi_drv->remove)
+- return -EINVAL;
+-
+- driver->bus = &mhi_bus_type;
+- driver->owner = owner;
+- driver->probe = mhi_driver_probe;
+- driver->remove = mhi_driver_remove;
+-
+- return driver_register(driver);
+-}
+-EXPORT_SYMBOL_GPL(__mhi_driver_register);
+-
+-void mhi_driver_unregister(struct mhi_driver *mhi_drv)
+-{
+- driver_unregister(&mhi_drv->driver);
+-}
+-EXPORT_SYMBOL_GPL(mhi_driver_unregister);
+-
+-static int mhi_uevent(struct device *dev, struct kobj_uevent_env *env)
+-{
+- struct mhi_device *mhi_dev = to_mhi_device(dev);
+-
+- return add_uevent_var(env, "MODALIAS=" MHI_DEVICE_MODALIAS_FMT,
+- mhi_dev->name);
+-}
+-
+-static int mhi_match(struct device *dev, struct device_driver *drv)
+-{
+- struct mhi_device *mhi_dev = to_mhi_device(dev);
+- struct mhi_driver *mhi_drv = to_mhi_driver(drv);
+- const struct mhi_device_id *id;
+-
+- /*
+- * If the device is a controller type then there is no client driver
+- * associated with it
+- */
+- if (mhi_dev->dev_type == MHI_DEVICE_CONTROLLER)
+- return 0;
+-
+- for (id = mhi_drv->id_table; id->chan[0]; id++)
+- if (!strcmp(mhi_dev->name, id->chan)) {
+- mhi_dev->id = id;
+- return 1;
+- }
+-
+- return 0;
+-};
+-
+-struct bus_type mhi_bus_type = {
+- .name = "mhi",
+- .dev_name = "mhi",
+- .match = mhi_match,
+- .uevent = mhi_uevent,
+- .dev_groups = mhi_dev_groups,
+-};
+-
+-static int __init mhi_init(void)
+-{
+- mhi_debugfs_init();
+- return bus_register(&mhi_bus_type);
+-}
+-
+-static void __exit mhi_exit(void)
+-{
+- mhi_debugfs_exit();
+- bus_unregister(&mhi_bus_type);
+-}
+-
+-postcore_initcall(mhi_init);
+-module_exit(mhi_exit);
+-
+-MODULE_LICENSE("GPL v2");
+-MODULE_DESCRIPTION("MHI Host Interface");
+diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h
+deleted file mode 100644
+index 3a732afaf73ed..0000000000000
+--- a/drivers/bus/mhi/core/internal.h
++++ /dev/null
+@@ -1,717 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+- *
+- */
+-
+-#ifndef _MHI_INT_H
+-#define _MHI_INT_H
+-
+-#include <linux/mhi.h>
+-
+-extern struct bus_type mhi_bus_type;
+-
+-#define MHIREGLEN (0x0)
+-#define MHIREGLEN_MHIREGLEN_MASK (0xFFFFFFFF)
+-#define MHIREGLEN_MHIREGLEN_SHIFT (0)
+-
+-#define MHIVER (0x8)
+-#define MHIVER_MHIVER_MASK (0xFFFFFFFF)
+-#define MHIVER_MHIVER_SHIFT (0)
+-
+-#define MHICFG (0x10)
+-#define MHICFG_NHWER_MASK (0xFF000000)
+-#define MHICFG_NHWER_SHIFT (24)
+-#define MHICFG_NER_MASK (0xFF0000)
+-#define MHICFG_NER_SHIFT (16)
+-#define MHICFG_NHWCH_MASK (0xFF00)
+-#define MHICFG_NHWCH_SHIFT (8)
+-#define MHICFG_NCH_MASK (0xFF)
+-#define MHICFG_NCH_SHIFT (0)
+-
+-#define CHDBOFF (0x18)
+-#define CHDBOFF_CHDBOFF_MASK (0xFFFFFFFF)
+-#define CHDBOFF_CHDBOFF_SHIFT (0)
+-
+-#define ERDBOFF (0x20)
+-#define ERDBOFF_ERDBOFF_MASK (0xFFFFFFFF)
+-#define ERDBOFF_ERDBOFF_SHIFT (0)
+-
+-#define BHIOFF (0x28)
+-#define BHIOFF_BHIOFF_MASK (0xFFFFFFFF)
+-#define BHIOFF_BHIOFF_SHIFT (0)
+-
+-#define BHIEOFF (0x2C)
+-#define BHIEOFF_BHIEOFF_MASK (0xFFFFFFFF)
+-#define BHIEOFF_BHIEOFF_SHIFT (0)
+-
+-#define DEBUGOFF (0x30)
+-#define DEBUGOFF_DEBUGOFF_MASK (0xFFFFFFFF)
+-#define DEBUGOFF_DEBUGOFF_SHIFT (0)
+-
+-#define MHICTRL (0x38)
+-#define MHICTRL_MHISTATE_MASK (0x0000FF00)
+-#define MHICTRL_MHISTATE_SHIFT (8)
+-#define MHICTRL_RESET_MASK (0x2)
+-#define MHICTRL_RESET_SHIFT (1)
+-
+-#define MHISTATUS (0x48)
+-#define MHISTATUS_MHISTATE_MASK (0x0000FF00)
+-#define MHISTATUS_MHISTATE_SHIFT (8)
+-#define MHISTATUS_SYSERR_MASK (0x4)
+-#define MHISTATUS_SYSERR_SHIFT (2)
+-#define MHISTATUS_READY_MASK (0x1)
+-#define MHISTATUS_READY_SHIFT (0)
+-
+-#define CCABAP_LOWER (0x58)
+-#define CCABAP_LOWER_CCABAP_LOWER_MASK (0xFFFFFFFF)
+-#define CCABAP_LOWER_CCABAP_LOWER_SHIFT (0)
+-
+-#define CCABAP_HIGHER (0x5C)
+-#define CCABAP_HIGHER_CCABAP_HIGHER_MASK (0xFFFFFFFF)
+-#define CCABAP_HIGHER_CCABAP_HIGHER_SHIFT (0)
+-
+-#define ECABAP_LOWER (0x60)
+-#define ECABAP_LOWER_ECABAP_LOWER_MASK (0xFFFFFFFF)
+-#define ECABAP_LOWER_ECABAP_LOWER_SHIFT (0)
+-
+-#define ECABAP_HIGHER (0x64)
+-#define ECABAP_HIGHER_ECABAP_HIGHER_MASK (0xFFFFFFFF)
+-#define ECABAP_HIGHER_ECABAP_HIGHER_SHIFT (0)
+-
+-#define CRCBAP_LOWER (0x68)
+-#define CRCBAP_LOWER_CRCBAP_LOWER_MASK (0xFFFFFFFF)
+-#define CRCBAP_LOWER_CRCBAP_LOWER_SHIFT (0)
+-
+-#define CRCBAP_HIGHER (0x6C)
+-#define CRCBAP_HIGHER_CRCBAP_HIGHER_MASK (0xFFFFFFFF)
+-#define CRCBAP_HIGHER_CRCBAP_HIGHER_SHIFT (0)
+-
+-#define CRDB_LOWER (0x70)
+-#define CRDB_LOWER_CRDB_LOWER_MASK (0xFFFFFFFF)
+-#define CRDB_LOWER_CRDB_LOWER_SHIFT (0)
+-
+-#define CRDB_HIGHER (0x74)
+-#define CRDB_HIGHER_CRDB_HIGHER_MASK (0xFFFFFFFF)
+-#define CRDB_HIGHER_CRDB_HIGHER_SHIFT (0)
+-
+-#define MHICTRLBASE_LOWER (0x80)
+-#define MHICTRLBASE_LOWER_MHICTRLBASE_LOWER_MASK (0xFFFFFFFF)
+-#define MHICTRLBASE_LOWER_MHICTRLBASE_LOWER_SHIFT (0)
+-
+-#define MHICTRLBASE_HIGHER (0x84)
+-#define MHICTRLBASE_HIGHER_MHICTRLBASE_HIGHER_MASK (0xFFFFFFFF)
+-#define MHICTRLBASE_HIGHER_MHICTRLBASE_HIGHER_SHIFT (0)
+-
+-#define MHICTRLLIMIT_LOWER (0x88)
+-#define MHICTRLLIMIT_LOWER_MHICTRLLIMIT_LOWER_MASK (0xFFFFFFFF)
+-#define MHICTRLLIMIT_LOWER_MHICTRLLIMIT_LOWER_SHIFT (0)
+-
+-#define MHICTRLLIMIT_HIGHER (0x8C)
+-#define MHICTRLLIMIT_HIGHER_MHICTRLLIMIT_HIGHER_MASK (0xFFFFFFFF)
+-#define MHICTRLLIMIT_HIGHER_MHICTRLLIMIT_HIGHER_SHIFT (0)
+-
+-#define MHIDATABASE_LOWER (0x98)
+-#define MHIDATABASE_LOWER_MHIDATABASE_LOWER_MASK (0xFFFFFFFF)
+-#define MHIDATABASE_LOWER_MHIDATABASE_LOWER_SHIFT (0)
+-
+-#define MHIDATABASE_HIGHER (0x9C)
+-#define MHIDATABASE_HIGHER_MHIDATABASE_HIGHER_MASK (0xFFFFFFFF)
+-#define MHIDATABASE_HIGHER_MHIDATABASE_HIGHER_SHIFT (0)
+-
+-#define MHIDATALIMIT_LOWER (0xA0)
+-#define MHIDATALIMIT_LOWER_MHIDATALIMIT_LOWER_MASK (0xFFFFFFFF)
+-#define MHIDATALIMIT_LOWER_MHIDATALIMIT_LOWER_SHIFT (0)
+-
+-#define MHIDATALIMIT_HIGHER (0xA4)
+-#define MHIDATALIMIT_HIGHER_MHIDATALIMIT_HIGHER_MASK (0xFFFFFFFF)
+-#define MHIDATALIMIT_HIGHER_MHIDATALIMIT_HIGHER_SHIFT (0)
+-
+-/* Host request register */
+-#define MHI_SOC_RESET_REQ_OFFSET (0xB0)
+-#define MHI_SOC_RESET_REQ BIT(0)
+-
+-/* MHI BHI offfsets */
+-#define BHI_BHIVERSION_MINOR (0x00)
+-#define BHI_BHIVERSION_MAJOR (0x04)
+-#define BHI_IMGADDR_LOW (0x08)
+-#define BHI_IMGADDR_HIGH (0x0C)
+-#define BHI_IMGSIZE (0x10)
+-#define BHI_RSVD1 (0x14)
+-#define BHI_IMGTXDB (0x18)
+-#define BHI_TXDB_SEQNUM_BMSK (0x3FFFFFFF)
+-#define BHI_TXDB_SEQNUM_SHFT (0)
+-#define BHI_RSVD2 (0x1C)
+-#define BHI_INTVEC (0x20)
+-#define BHI_RSVD3 (0x24)
+-#define BHI_EXECENV (0x28)
+-#define BHI_STATUS (0x2C)
+-#define BHI_ERRCODE (0x30)
+-#define BHI_ERRDBG1 (0x34)
+-#define BHI_ERRDBG2 (0x38)
+-#define BHI_ERRDBG3 (0x3C)
+-#define BHI_SERIALNU (0x40)
+-#define BHI_SBLANTIROLLVER (0x44)
+-#define BHI_NUMSEG (0x48)
+-#define BHI_MSMHWID(n) (0x4C + (0x4 * (n)))
+-#define BHI_OEMPKHASH(n) (0x64 + (0x4 * (n)))
+-#define BHI_RSVD5 (0xC4)
+-#define BHI_STATUS_MASK (0xC0000000)
+-#define BHI_STATUS_SHIFT (30)
+-#define BHI_STATUS_ERROR (3)
+-#define BHI_STATUS_SUCCESS (2)
+-#define BHI_STATUS_RESET (0)
+-
+-/* MHI BHIE offsets */
+-#define BHIE_MSMSOCID_OFFS (0x0000)
+-#define BHIE_TXVECADDR_LOW_OFFS (0x002C)
+-#define BHIE_TXVECADDR_HIGH_OFFS (0x0030)
+-#define BHIE_TXVECSIZE_OFFS (0x0034)
+-#define BHIE_TXVECDB_OFFS (0x003C)
+-#define BHIE_TXVECDB_SEQNUM_BMSK (0x3FFFFFFF)
+-#define BHIE_TXVECDB_SEQNUM_SHFT (0)
+-#define BHIE_TXVECSTATUS_OFFS (0x0044)
+-#define BHIE_TXVECSTATUS_SEQNUM_BMSK (0x3FFFFFFF)
+-#define BHIE_TXVECSTATUS_SEQNUM_SHFT (0)
+-#define BHIE_TXVECSTATUS_STATUS_BMSK (0xC0000000)
+-#define BHIE_TXVECSTATUS_STATUS_SHFT (30)
+-#define BHIE_TXVECSTATUS_STATUS_RESET (0x00)
+-#define BHIE_TXVECSTATUS_STATUS_XFER_COMPL (0x02)
+-#define BHIE_TXVECSTATUS_STATUS_ERROR (0x03)
+-#define BHIE_RXVECADDR_LOW_OFFS (0x0060)
+-#define BHIE_RXVECADDR_HIGH_OFFS (0x0064)
+-#define BHIE_RXVECSIZE_OFFS (0x0068)
+-#define BHIE_RXVECDB_OFFS (0x0070)
+-#define BHIE_RXVECDB_SEQNUM_BMSK (0x3FFFFFFF)
+-#define BHIE_RXVECDB_SEQNUM_SHFT (0)
+-#define BHIE_RXVECSTATUS_OFFS (0x0078)
+-#define BHIE_RXVECSTATUS_SEQNUM_BMSK (0x3FFFFFFF)
+-#define BHIE_RXVECSTATUS_SEQNUM_SHFT (0)
+-#define BHIE_RXVECSTATUS_STATUS_BMSK (0xC0000000)
+-#define BHIE_RXVECSTATUS_STATUS_SHFT (30)
+-#define BHIE_RXVECSTATUS_STATUS_RESET (0x00)
+-#define BHIE_RXVECSTATUS_STATUS_XFER_COMPL (0x02)
+-#define BHIE_RXVECSTATUS_STATUS_ERROR (0x03)
+-
+-#define SOC_HW_VERSION_OFFS (0x224)
+-#define SOC_HW_VERSION_FAM_NUM_BMSK (0xF0000000)
+-#define SOC_HW_VERSION_FAM_NUM_SHFT (28)
+-#define SOC_HW_VERSION_DEV_NUM_BMSK (0x0FFF0000)
+-#define SOC_HW_VERSION_DEV_NUM_SHFT (16)
+-#define SOC_HW_VERSION_MAJOR_VER_BMSK (0x0000FF00)
+-#define SOC_HW_VERSION_MAJOR_VER_SHFT (8)
+-#define SOC_HW_VERSION_MINOR_VER_BMSK (0x000000FF)
+-#define SOC_HW_VERSION_MINOR_VER_SHFT (0)
+-
+-#define EV_CTX_RESERVED_MASK GENMASK(7, 0)
+-#define EV_CTX_INTMODC_MASK GENMASK(15, 8)
+-#define EV_CTX_INTMODC_SHIFT 8
+-#define EV_CTX_INTMODT_MASK GENMASK(31, 16)
+-#define EV_CTX_INTMODT_SHIFT 16
+-struct mhi_event_ctxt {
+- __u32 intmod;
+- __u32 ertype;
+- __u32 msivec;
+-
+- __u64 rbase __packed __aligned(4);
+- __u64 rlen __packed __aligned(4);
+- __u64 rp __packed __aligned(4);
+- __u64 wp __packed __aligned(4);
+-};
+-
+-#define CHAN_CTX_CHSTATE_MASK GENMASK(7, 0)
+-#define CHAN_CTX_CHSTATE_SHIFT 0
+-#define CHAN_CTX_BRSTMODE_MASK GENMASK(9, 8)
+-#define CHAN_CTX_BRSTMODE_SHIFT 8
+-#define CHAN_CTX_POLLCFG_MASK GENMASK(15, 10)
+-#define CHAN_CTX_POLLCFG_SHIFT 10
+-#define CHAN_CTX_RESERVED_MASK GENMASK(31, 16)
+-struct mhi_chan_ctxt {
+- __u32 chcfg;
+- __u32 chtype;
+- __u32 erindex;
+-
+- __u64 rbase __packed __aligned(4);
+- __u64 rlen __packed __aligned(4);
+- __u64 rp __packed __aligned(4);
+- __u64 wp __packed __aligned(4);
+-};
+-
+-struct mhi_cmd_ctxt {
+- __u32 reserved0;
+- __u32 reserved1;
+- __u32 reserved2;
+-
+- __u64 rbase __packed __aligned(4);
+- __u64 rlen __packed __aligned(4);
+- __u64 rp __packed __aligned(4);
+- __u64 wp __packed __aligned(4);
+-};
+-
+-struct mhi_ctxt {
+- struct mhi_event_ctxt *er_ctxt;
+- struct mhi_chan_ctxt *chan_ctxt;
+- struct mhi_cmd_ctxt *cmd_ctxt;
+- dma_addr_t er_ctxt_addr;
+- dma_addr_t chan_ctxt_addr;
+- dma_addr_t cmd_ctxt_addr;
+-};
+-
+-struct mhi_tre {
+- u64 ptr;
+- u32 dword[2];
+-};
+-
+-struct bhi_vec_entry {
+- u64 dma_addr;
+- u64 size;
+-};
+-
+-enum mhi_cmd_type {
+- MHI_CMD_NOP = 1,
+- MHI_CMD_RESET_CHAN = 16,
+- MHI_CMD_STOP_CHAN = 17,
+- MHI_CMD_START_CHAN = 18,
+-};
+-
+-/* No operation command */
+-#define MHI_TRE_CMD_NOOP_PTR (0)
+-#define MHI_TRE_CMD_NOOP_DWORD0 (0)
+-#define MHI_TRE_CMD_NOOP_DWORD1 (MHI_CMD_NOP << 16)
+-
+-/* Channel reset command */
+-#define MHI_TRE_CMD_RESET_PTR (0)
+-#define MHI_TRE_CMD_RESET_DWORD0 (0)
+-#define MHI_TRE_CMD_RESET_DWORD1(chid) ((chid << 24) | \
+- (MHI_CMD_RESET_CHAN << 16))
+-
+-/* Channel stop command */
+-#define MHI_TRE_CMD_STOP_PTR (0)
+-#define MHI_TRE_CMD_STOP_DWORD0 (0)
+-#define MHI_TRE_CMD_STOP_DWORD1(chid) ((chid << 24) | \
+- (MHI_CMD_STOP_CHAN << 16))
+-
+-/* Channel start command */
+-#define MHI_TRE_CMD_START_PTR (0)
+-#define MHI_TRE_CMD_START_DWORD0 (0)
+-#define MHI_TRE_CMD_START_DWORD1(chid) ((chid << 24) | \
+- (MHI_CMD_START_CHAN << 16))
+-
+-#define MHI_TRE_GET_CMD_CHID(tre) (((tre)->dword[1] >> 24) & 0xFF)
+-#define MHI_TRE_GET_CMD_TYPE(tre) (((tre)->dword[1] >> 16) & 0xFF)
+-
+-/* Event descriptor macros */
+-#define MHI_TRE_EV_PTR(ptr) (ptr)
+-#define MHI_TRE_EV_DWORD0(code, len) ((code << 24) | len)
+-#define MHI_TRE_EV_DWORD1(chid, type) ((chid << 24) | (type << 16))
+-#define MHI_TRE_GET_EV_PTR(tre) ((tre)->ptr)
+-#define MHI_TRE_GET_EV_CODE(tre) (((tre)->dword[0] >> 24) & 0xFF)
+-#define MHI_TRE_GET_EV_LEN(tre) ((tre)->dword[0] & 0xFFFF)
+-#define MHI_TRE_GET_EV_CHID(tre) (((tre)->dword[1] >> 24) & 0xFF)
+-#define MHI_TRE_GET_EV_TYPE(tre) (((tre)->dword[1] >> 16) & 0xFF)
+-#define MHI_TRE_GET_EV_STATE(tre) (((tre)->dword[0] >> 24) & 0xFF)
+-#define MHI_TRE_GET_EV_EXECENV(tre) (((tre)->dword[0] >> 24) & 0xFF)
+-#define MHI_TRE_GET_EV_SEQ(tre) ((tre)->dword[0])
+-#define MHI_TRE_GET_EV_TIME(tre) ((tre)->ptr)
+-#define MHI_TRE_GET_EV_COOKIE(tre) lower_32_bits((tre)->ptr)
+-#define MHI_TRE_GET_EV_VEID(tre) (((tre)->dword[0] >> 16) & 0xFF)
+-#define MHI_TRE_GET_EV_LINKSPEED(tre) (((tre)->dword[1] >> 24) & 0xFF)
+-#define MHI_TRE_GET_EV_LINKWIDTH(tre) ((tre)->dword[0] & 0xFF)
+-
+-/* Transfer descriptor macros */
+-#define MHI_TRE_DATA_PTR(ptr) (ptr)
+-#define MHI_TRE_DATA_DWORD0(len) (len & MHI_MAX_MTU)
+-#define MHI_TRE_DATA_DWORD1(bei, ieot, ieob, chain) ((2 << 16) | (bei << 10) \
+- | (ieot << 9) | (ieob << 8) | chain)
+-
+-/* RSC transfer descriptor macros */
+-#define MHI_RSCTRE_DATA_PTR(ptr, len) (((u64)len << 48) | ptr)
+-#define MHI_RSCTRE_DATA_DWORD0(cookie) (cookie)
+-#define MHI_RSCTRE_DATA_DWORD1 (MHI_PKT_TYPE_COALESCING << 16)
+-
+-enum mhi_pkt_type {
+- MHI_PKT_TYPE_INVALID = 0x0,
+- MHI_PKT_TYPE_NOOP_CMD = 0x1,
+- MHI_PKT_TYPE_TRANSFER = 0x2,
+- MHI_PKT_TYPE_COALESCING = 0x8,
+- MHI_PKT_TYPE_RESET_CHAN_CMD = 0x10,
+- MHI_PKT_TYPE_STOP_CHAN_CMD = 0x11,
+- MHI_PKT_TYPE_START_CHAN_CMD = 0x12,
+- MHI_PKT_TYPE_STATE_CHANGE_EVENT = 0x20,
+- MHI_PKT_TYPE_CMD_COMPLETION_EVENT = 0x21,
+- MHI_PKT_TYPE_TX_EVENT = 0x22,
+- MHI_PKT_TYPE_RSC_TX_EVENT = 0x28,
+- MHI_PKT_TYPE_EE_EVENT = 0x40,
+- MHI_PKT_TYPE_TSYNC_EVENT = 0x48,
+- MHI_PKT_TYPE_BW_REQ_EVENT = 0x50,
+- MHI_PKT_TYPE_STALE_EVENT, /* internal event */
+-};
+-
+-/* MHI transfer completion events */
+-enum mhi_ev_ccs {
+- MHI_EV_CC_INVALID = 0x0,
+- MHI_EV_CC_SUCCESS = 0x1,
+- MHI_EV_CC_EOT = 0x2, /* End of transfer event */
+- MHI_EV_CC_OVERFLOW = 0x3,
+- MHI_EV_CC_EOB = 0x4, /* End of block event */
+- MHI_EV_CC_OOB = 0x5, /* Out of block event */
+- MHI_EV_CC_DB_MODE = 0x6,
+- MHI_EV_CC_UNDEFINED_ERR = 0x10,
+- MHI_EV_CC_BAD_TRE = 0x11,
+-};
+-
+-enum mhi_ch_state {
+- MHI_CH_STATE_DISABLED = 0x0,
+- MHI_CH_STATE_ENABLED = 0x1,
+- MHI_CH_STATE_RUNNING = 0x2,
+- MHI_CH_STATE_SUSPENDED = 0x3,
+- MHI_CH_STATE_STOP = 0x4,
+- MHI_CH_STATE_ERROR = 0x5,
+-};
+-
+-enum mhi_ch_state_type {
+- MHI_CH_STATE_TYPE_RESET,
+- MHI_CH_STATE_TYPE_STOP,
+- MHI_CH_STATE_TYPE_START,
+- MHI_CH_STATE_TYPE_MAX,
+-};
+-
+-extern const char * const mhi_ch_state_type_str[MHI_CH_STATE_TYPE_MAX];
+-#define TO_CH_STATE_TYPE_STR(state) (((state) >= MHI_CH_STATE_TYPE_MAX) ? \
+- "INVALID_STATE" : \
+- mhi_ch_state_type_str[(state)])
+-
+-#define MHI_INVALID_BRSTMODE(mode) (mode != MHI_DB_BRST_DISABLE && \
+- mode != MHI_DB_BRST_ENABLE)
+-
+-extern const char * const mhi_ee_str[MHI_EE_MAX];
+-#define TO_MHI_EXEC_STR(ee) (((ee) >= MHI_EE_MAX) ? \
+- "INVALID_EE" : mhi_ee_str[ee])
+-
+-#define MHI_IN_PBL(ee) (ee == MHI_EE_PBL || ee == MHI_EE_PTHRU || \
+- ee == MHI_EE_EDL)
+-
+-#define MHI_IN_MISSION_MODE(ee) (ee == MHI_EE_AMSS || ee == MHI_EE_WFW || \
+- ee == MHI_EE_FP)
+-
+-enum dev_st_transition {
+- DEV_ST_TRANSITION_PBL,
+- DEV_ST_TRANSITION_READY,
+- DEV_ST_TRANSITION_SBL,
+- DEV_ST_TRANSITION_MISSION_MODE,
+- DEV_ST_TRANSITION_FP,
+- DEV_ST_TRANSITION_SYS_ERR,
+- DEV_ST_TRANSITION_DISABLE,
+- DEV_ST_TRANSITION_MAX,
+-};
+-
+-extern const char * const dev_state_tran_str[DEV_ST_TRANSITION_MAX];
+-#define TO_DEV_STATE_TRANS_STR(state) (((state) >= DEV_ST_TRANSITION_MAX) ? \
+- "INVALID_STATE" : dev_state_tran_str[state])
+-
+-extern const char * const mhi_state_str[MHI_STATE_MAX];
+-#define TO_MHI_STATE_STR(state) ((state >= MHI_STATE_MAX || \
+- !mhi_state_str[state]) ? \
+- "INVALID_STATE" : mhi_state_str[state])
+-
+-/* internal power states */
+-enum mhi_pm_state {
+- MHI_PM_STATE_DISABLE,
+- MHI_PM_STATE_POR,
+- MHI_PM_STATE_M0,
+- MHI_PM_STATE_M2,
+- MHI_PM_STATE_M3_ENTER,
+- MHI_PM_STATE_M3,
+- MHI_PM_STATE_M3_EXIT,
+- MHI_PM_STATE_FW_DL_ERR,
+- MHI_PM_STATE_SYS_ERR_DETECT,
+- MHI_PM_STATE_SYS_ERR_PROCESS,
+- MHI_PM_STATE_SHUTDOWN_PROCESS,
+- MHI_PM_STATE_LD_ERR_FATAL_DETECT,
+- MHI_PM_STATE_MAX
+-};
+-
+-#define MHI_PM_DISABLE BIT(0)
+-#define MHI_PM_POR BIT(1)
+-#define MHI_PM_M0 BIT(2)
+-#define MHI_PM_M2 BIT(3)
+-#define MHI_PM_M3_ENTER BIT(4)
+-#define MHI_PM_M3 BIT(5)
+-#define MHI_PM_M3_EXIT BIT(6)
+-/* firmware download failure state */
+-#define MHI_PM_FW_DL_ERR BIT(7)
+-#define MHI_PM_SYS_ERR_DETECT BIT(8)
+-#define MHI_PM_SYS_ERR_PROCESS BIT(9)
+-#define MHI_PM_SHUTDOWN_PROCESS BIT(10)
+-/* link not accessible */
+-#define MHI_PM_LD_ERR_FATAL_DETECT BIT(11)
+-
+-#define MHI_REG_ACCESS_VALID(pm_state) ((pm_state & (MHI_PM_POR | MHI_PM_M0 | \
+- MHI_PM_M2 | MHI_PM_M3_ENTER | MHI_PM_M3_EXIT | \
+- MHI_PM_SYS_ERR_DETECT | MHI_PM_SYS_ERR_PROCESS | \
+- MHI_PM_SHUTDOWN_PROCESS | MHI_PM_FW_DL_ERR)))
+-#define MHI_PM_IN_ERROR_STATE(pm_state) (pm_state >= MHI_PM_FW_DL_ERR)
+-#define MHI_PM_IN_FATAL_STATE(pm_state) (pm_state == MHI_PM_LD_ERR_FATAL_DETECT)
+-#define MHI_DB_ACCESS_VALID(mhi_cntrl) (mhi_cntrl->pm_state & \
+- mhi_cntrl->db_access)
+-#define MHI_WAKE_DB_CLEAR_VALID(pm_state) (pm_state & (MHI_PM_M0 | \
+- MHI_PM_M2 | MHI_PM_M3_EXIT))
+-#define MHI_WAKE_DB_SET_VALID(pm_state) (pm_state & MHI_PM_M2)
+-#define MHI_WAKE_DB_FORCE_SET_VALID(pm_state) MHI_WAKE_DB_CLEAR_VALID(pm_state)
+-#define MHI_EVENT_ACCESS_INVALID(pm_state) (pm_state == MHI_PM_DISABLE || \
+- MHI_PM_IN_ERROR_STATE(pm_state))
+-#define MHI_PM_IN_SUSPEND_STATE(pm_state) (pm_state & \
+- (MHI_PM_M3_ENTER | MHI_PM_M3))
+-
+-#define NR_OF_CMD_RINGS 1
+-#define CMD_EL_PER_RING 128
+-#define PRIMARY_CMD_RING 0
+-#define MHI_DEV_WAKE_DB 127
+-#define MHI_MAX_MTU 0xffff
+-#define MHI_RANDOM_U32_NONZERO(bmsk) (prandom_u32_max(bmsk) + 1)
+-
+-enum mhi_er_type {
+- MHI_ER_TYPE_INVALID = 0x0,
+- MHI_ER_TYPE_VALID = 0x1,
+-};
+-
+-struct db_cfg {
+- bool reset_req;
+- bool db_mode;
+- u32 pollcfg;
+- enum mhi_db_brst_mode brstmode;
+- dma_addr_t db_val;
+- void (*process_db)(struct mhi_controller *mhi_cntrl,
+- struct db_cfg *db_cfg, void __iomem *io_addr,
+- dma_addr_t db_val);
+-};
+-
+-struct mhi_pm_transitions {
+- enum mhi_pm_state from_state;
+- u32 to_states;
+-};
+-
+-struct state_transition {
+- struct list_head node;
+- enum dev_st_transition state;
+-};
+-
+-struct mhi_ring {
+- dma_addr_t dma_handle;
+- dma_addr_t iommu_base;
+- u64 *ctxt_wp; /* point to ctxt wp */
+- void *pre_aligned;
+- void *base;
+- void *rp;
+- void *wp;
+- size_t el_size;
+- size_t len;
+- size_t elements;
+- size_t alloc_size;
+- void __iomem *db_addr;
+-};
+-
+-struct mhi_cmd {
+- struct mhi_ring ring;
+- spinlock_t lock;
+-};
+-
+-struct mhi_buf_info {
+- void *v_addr;
+- void *bb_addr;
+- void *wp;
+- void *cb_buf;
+- dma_addr_t p_addr;
+- size_t len;
+- enum dma_data_direction dir;
+- bool used; /* Indicates whether the buffer is used or not */
+- bool pre_mapped; /* Already pre-mapped by client */
+-};
+-
+-struct mhi_event {
+- struct mhi_controller *mhi_cntrl;
+- struct mhi_chan *mhi_chan; /* dedicated to channel */
+- u32 er_index;
+- u32 intmod;
+- u32 irq;
+- int chan; /* this event ring is dedicated to a channel (optional) */
+- u32 priority;
+- enum mhi_er_data_type data_type;
+- struct mhi_ring ring;
+- struct db_cfg db_cfg;
+- struct tasklet_struct task;
+- spinlock_t lock;
+- int (*process_event)(struct mhi_controller *mhi_cntrl,
+- struct mhi_event *mhi_event,
+- u32 event_quota);
+- bool hw_ring;
+- bool cl_manage;
+- bool offload_ev; /* managed by a device driver */
+-};
+-
+-struct mhi_chan {
+- const char *name;
+- /*
+- * Important: When consuming, increment tre_ring first and when
+- * releasing, decrement buf_ring first. If tre_ring has space, buf_ring
+- * is guranteed to have space so we do not need to check both rings.
+- */
+- struct mhi_ring buf_ring;
+- struct mhi_ring tre_ring;
+- u32 chan;
+- u32 er_index;
+- u32 intmod;
+- enum mhi_ch_type type;
+- enum dma_data_direction dir;
+- struct db_cfg db_cfg;
+- enum mhi_ch_ee_mask ee_mask;
+- enum mhi_ch_state ch_state;
+- enum mhi_ev_ccs ccs;
+- struct mhi_device *mhi_dev;
+- void (*xfer_cb)(struct mhi_device *mhi_dev, struct mhi_result *result);
+- struct mutex mutex;
+- struct completion completion;
+- rwlock_t lock;
+- struct list_head node;
+- bool lpm_notify;
+- bool configured;
+- bool offload_ch;
+- bool pre_alloc;
+- bool wake_capable;
+-};
+-
+-/* Default MHI timeout */
+-#define MHI_TIMEOUT_MS (1000)
+-
+-/* debugfs related functions */
+-#ifdef CONFIG_MHI_BUS_DEBUG
+-void mhi_create_debugfs(struct mhi_controller *mhi_cntrl);
+-void mhi_destroy_debugfs(struct mhi_controller *mhi_cntrl);
+-void mhi_debugfs_init(void);
+-void mhi_debugfs_exit(void);
+-#else
+-static inline void mhi_create_debugfs(struct mhi_controller *mhi_cntrl)
+-{
+-}
+-
+-static inline void mhi_destroy_debugfs(struct mhi_controller *mhi_cntrl)
+-{
+-}
+-
+-static inline void mhi_debugfs_init(void)
+-{
+-}
+-
+-static inline void mhi_debugfs_exit(void)
+-{
+-}
+-#endif
+-
+-struct mhi_device *mhi_alloc_device(struct mhi_controller *mhi_cntrl);
+-
+-int mhi_destroy_device(struct device *dev, void *data);
+-void mhi_create_devices(struct mhi_controller *mhi_cntrl);
+-
+-int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
+- struct image_info **image_info, size_t alloc_size);
+-void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
+- struct image_info *image_info);
+-
+-/* Power management APIs */
+-enum mhi_pm_state __must_check mhi_tryset_pm_state(
+- struct mhi_controller *mhi_cntrl,
+- enum mhi_pm_state state);
+-const char *to_mhi_pm_state_str(enum mhi_pm_state state);
+-int mhi_queue_state_transition(struct mhi_controller *mhi_cntrl,
+- enum dev_st_transition state);
+-void mhi_pm_st_worker(struct work_struct *work);
+-void mhi_pm_sys_err_handler(struct mhi_controller *mhi_cntrl);
+-int mhi_ready_state_transition(struct mhi_controller *mhi_cntrl);
+-int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl);
+-void mhi_pm_m1_transition(struct mhi_controller *mhi_cntrl);
+-int mhi_pm_m3_transition(struct mhi_controller *mhi_cntrl);
+-int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl);
+-int mhi_send_cmd(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
+- enum mhi_cmd_type cmd);
+-int mhi_download_amss_image(struct mhi_controller *mhi_cntrl);
+-static inline bool mhi_is_active(struct mhi_controller *mhi_cntrl)
+-{
+- return (mhi_cntrl->dev_state >= MHI_STATE_M0 &&
+- mhi_cntrl->dev_state <= MHI_STATE_M3_FAST);
+-}
+-
+-static inline void mhi_trigger_resume(struct mhi_controller *mhi_cntrl)
+-{
+- pm_wakeup_event(&mhi_cntrl->mhi_dev->dev, 0);
+- mhi_cntrl->runtime_get(mhi_cntrl);
+- mhi_cntrl->runtime_put(mhi_cntrl);
+-}
+-
+-/* Register access methods */
+-void mhi_db_brstmode(struct mhi_controller *mhi_cntrl, struct db_cfg *db_cfg,
+- void __iomem *db_addr, dma_addr_t db_val);
+-void mhi_db_brstmode_disable(struct mhi_controller *mhi_cntrl,
+- struct db_cfg *db_mode, void __iomem *db_addr,
+- dma_addr_t db_val);
+-int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl,
+- void __iomem *base, u32 offset, u32 *out);
+-int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
+- void __iomem *base, u32 offset, u32 mask,
+- u32 shift, u32 *out);
+-int __must_check mhi_poll_reg_field(struct mhi_controller *mhi_cntrl,
+- void __iomem *base, u32 offset, u32 mask,
+- u32 shift, u32 val, u32 delayus);
+-void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base,
+- u32 offset, u32 val);
+-void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base,
+- u32 offset, u32 mask, u32 shift, u32 val);
+-void mhi_ring_er_db(struct mhi_event *mhi_event);
+-void mhi_write_db(struct mhi_controller *mhi_cntrl, void __iomem *db_addr,
+- dma_addr_t db_val);
+-void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd);
+-void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan);
+-
+-/* Initialization methods */
+-int mhi_init_mmio(struct mhi_controller *mhi_cntrl);
+-int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl);
+-void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl);
+-int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl);
+-void mhi_deinit_free_irq(struct mhi_controller *mhi_cntrl);
+-void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
+- struct image_info *img_info);
+-void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
+-int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan);
+-int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan);
+-void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan);
+-void mhi_reset_chan(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan);
+-
+-/* Event processing methods */
+-void mhi_ctrl_ev_task(unsigned long data);
+-void mhi_ev_task(unsigned long data);
+-int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl,
+- struct mhi_event *mhi_event, u32 event_quota);
+-int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl,
+- struct mhi_event *mhi_event, u32 event_quota);
+-
+-/* ISR handlers */
+-irqreturn_t mhi_irq_handler(int irq_number, void *dev);
+-irqreturn_t mhi_intvec_threaded_handler(int irq_number, void *dev);
+-irqreturn_t mhi_intvec_handler(int irq_number, void *dev);
+-
+-int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
+- struct mhi_buf_info *info, enum mhi_flags flags);
+-int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info);
+-int mhi_map_single_use_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info);
+-void mhi_unmap_single_no_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info);
+-void mhi_unmap_single_use_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info);
+-
+-#endif /* _MHI_INT_H */
+diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c
+deleted file mode 100644
+index b15c5bc37dd4f..0000000000000
+--- a/drivers/bus/mhi/core/main.c
++++ /dev/null
+@@ -1,1673 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+- *
+- */
+-
+-#include <linux/delay.h>
+-#include <linux/device.h>
+-#include <linux/dma-direction.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/interrupt.h>
+-#include <linux/list.h>
+-#include <linux/mhi.h>
+-#include <linux/module.h>
+-#include <linux/skbuff.h>
+-#include <linux/slab.h>
+-#include "internal.h"
+-
+-int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl,
+- void __iomem *base, u32 offset, u32 *out)
+-{
+- return mhi_cntrl->read_reg(mhi_cntrl, base + offset, out);
+-}
+-
+-int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
+- void __iomem *base, u32 offset,
+- u32 mask, u32 shift, u32 *out)
+-{
+- u32 tmp;
+- int ret;
+-
+- ret = mhi_read_reg(mhi_cntrl, base, offset, &tmp);
+- if (ret)
+- return ret;
+-
+- *out = (tmp & mask) >> shift;
+-
+- return 0;
+-}
+-
+-int __must_check mhi_poll_reg_field(struct mhi_controller *mhi_cntrl,
+- void __iomem *base, u32 offset,
+- u32 mask, u32 shift, u32 val, u32 delayus)
+-{
+- int ret;
+- u32 out, retry = (mhi_cntrl->timeout_ms * 1000) / delayus;
+-
+- while (retry--) {
+- ret = mhi_read_reg_field(mhi_cntrl, base, offset, mask, shift,
+- &out);
+- if (ret)
+- return ret;
+-
+- if (out == val)
+- return 0;
+-
+- fsleep(delayus);
+- }
+-
+- return -ETIMEDOUT;
+-}
+-
+-void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base,
+- u32 offset, u32 val)
+-{
+- mhi_cntrl->write_reg(mhi_cntrl, base + offset, val);
+-}
+-
+-void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base,
+- u32 offset, u32 mask, u32 shift, u32 val)
+-{
+- int ret;
+- u32 tmp;
+-
+- ret = mhi_read_reg(mhi_cntrl, base, offset, &tmp);
+- if (ret)
+- return;
+-
+- tmp &= ~mask;
+- tmp |= (val << shift);
+- mhi_write_reg(mhi_cntrl, base, offset, tmp);
+-}
+-
+-void mhi_write_db(struct mhi_controller *mhi_cntrl, void __iomem *db_addr,
+- dma_addr_t db_val)
+-{
+- mhi_write_reg(mhi_cntrl, db_addr, 4, upper_32_bits(db_val));
+- mhi_write_reg(mhi_cntrl, db_addr, 0, lower_32_bits(db_val));
+-}
+-
+-void mhi_db_brstmode(struct mhi_controller *mhi_cntrl,
+- struct db_cfg *db_cfg,
+- void __iomem *db_addr,
+- dma_addr_t db_val)
+-{
+- if (db_cfg->db_mode) {
+- db_cfg->db_val = db_val;
+- mhi_write_db(mhi_cntrl, db_addr, db_val);
+- db_cfg->db_mode = 0;
+- }
+-}
+-
+-void mhi_db_brstmode_disable(struct mhi_controller *mhi_cntrl,
+- struct db_cfg *db_cfg,
+- void __iomem *db_addr,
+- dma_addr_t db_val)
+-{
+- db_cfg->db_val = db_val;
+- mhi_write_db(mhi_cntrl, db_addr, db_val);
+-}
+-
+-void mhi_ring_er_db(struct mhi_event *mhi_event)
+-{
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- mhi_event->db_cfg.process_db(mhi_event->mhi_cntrl, &mhi_event->db_cfg,
+- ring->db_addr, *ring->ctxt_wp);
+-}
+-
+-void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd)
+-{
+- dma_addr_t db;
+- struct mhi_ring *ring = &mhi_cmd->ring;
+-
+- db = ring->iommu_base + (ring->wp - ring->base);
+- *ring->ctxt_wp = db;
+- mhi_write_db(mhi_cntrl, ring->db_addr, db);
+-}
+-
+-void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan)
+-{
+- struct mhi_ring *ring = &mhi_chan->tre_ring;
+- dma_addr_t db;
+-
+- db = ring->iommu_base + (ring->wp - ring->base);
+-
+- /*
+- * Writes to the new ring element must be visible to the hardware
+- * before letting h/w know there is new element to fetch.
+- */
+- dma_wmb();
+- *ring->ctxt_wp = db;
+-
+- mhi_chan->db_cfg.process_db(mhi_cntrl, &mhi_chan->db_cfg,
+- ring->db_addr, db);
+-}
+-
+-enum mhi_ee_type mhi_get_exec_env(struct mhi_controller *mhi_cntrl)
+-{
+- u32 exec;
+- int ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_EXECENV, &exec);
+-
+- return (ret) ? MHI_EE_MAX : exec;
+-}
+-EXPORT_SYMBOL_GPL(mhi_get_exec_env);
+-
+-enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl)
+-{
+- u32 state;
+- int ret = mhi_read_reg_field(mhi_cntrl, mhi_cntrl->regs, MHISTATUS,
+- MHISTATUS_MHISTATE_MASK,
+- MHISTATUS_MHISTATE_SHIFT, &state);
+- return ret ? MHI_STATE_MAX : state;
+-}
+-EXPORT_SYMBOL_GPL(mhi_get_mhi_state);
+-
+-void mhi_soc_reset(struct mhi_controller *mhi_cntrl)
+-{
+- if (mhi_cntrl->reset) {
+- mhi_cntrl->reset(mhi_cntrl);
+- return;
+- }
+-
+- /* Generic MHI SoC reset */
+- mhi_write_reg(mhi_cntrl, mhi_cntrl->regs, MHI_SOC_RESET_REQ_OFFSET,
+- MHI_SOC_RESET_REQ);
+-}
+-EXPORT_SYMBOL_GPL(mhi_soc_reset);
+-
+-int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info)
+-{
+- buf_info->p_addr = dma_map_single(mhi_cntrl->cntrl_dev,
+- buf_info->v_addr, buf_info->len,
+- buf_info->dir);
+- if (dma_mapping_error(mhi_cntrl->cntrl_dev, buf_info->p_addr))
+- return -ENOMEM;
+-
+- return 0;
+-}
+-
+-int mhi_map_single_use_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info)
+-{
+- void *buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, buf_info->len,
+- &buf_info->p_addr, GFP_ATOMIC);
+-
+- if (!buf)
+- return -ENOMEM;
+-
+- if (buf_info->dir == DMA_TO_DEVICE)
+- memcpy(buf, buf_info->v_addr, buf_info->len);
+-
+- buf_info->bb_addr = buf;
+-
+- return 0;
+-}
+-
+-void mhi_unmap_single_no_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info)
+-{
+- dma_unmap_single(mhi_cntrl->cntrl_dev, buf_info->p_addr, buf_info->len,
+- buf_info->dir);
+-}
+-
+-void mhi_unmap_single_use_bb(struct mhi_controller *mhi_cntrl,
+- struct mhi_buf_info *buf_info)
+-{
+- if (buf_info->dir == DMA_FROM_DEVICE)
+- memcpy(buf_info->v_addr, buf_info->bb_addr, buf_info->len);
+-
+- dma_free_coherent(mhi_cntrl->cntrl_dev, buf_info->len,
+- buf_info->bb_addr, buf_info->p_addr);
+-}
+-
+-static int get_nr_avail_ring_elements(struct mhi_controller *mhi_cntrl,
+- struct mhi_ring *ring)
+-{
+- int nr_el;
+-
+- if (ring->wp < ring->rp) {
+- nr_el = ((ring->rp - ring->wp) / ring->el_size) - 1;
+- } else {
+- nr_el = (ring->rp - ring->base) / ring->el_size;
+- nr_el += ((ring->base + ring->len - ring->wp) /
+- ring->el_size) - 1;
+- }
+-
+- return nr_el;
+-}
+-
+-static void *mhi_to_virtual(struct mhi_ring *ring, dma_addr_t addr)
+-{
+- return (addr - ring->iommu_base) + ring->base;
+-}
+-
+-static void mhi_add_ring_element(struct mhi_controller *mhi_cntrl,
+- struct mhi_ring *ring)
+-{
+- ring->wp += ring->el_size;
+- if (ring->wp >= (ring->base + ring->len))
+- ring->wp = ring->base;
+- /* smp update */
+- smp_wmb();
+-}
+-
+-static void mhi_del_ring_element(struct mhi_controller *mhi_cntrl,
+- struct mhi_ring *ring)
+-{
+- ring->rp += ring->el_size;
+- if (ring->rp >= (ring->base + ring->len))
+- ring->rp = ring->base;
+- /* smp update */
+- smp_wmb();
+-}
+-
+-static bool is_valid_ring_ptr(struct mhi_ring *ring, dma_addr_t addr)
+-{
+- return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len;
+-}
+-
+-int mhi_destroy_device(struct device *dev, void *data)
+-{
+- struct mhi_chan *ul_chan, *dl_chan;
+- struct mhi_device *mhi_dev;
+- struct mhi_controller *mhi_cntrl;
+- enum mhi_ee_type ee = MHI_EE_MAX;
+-
+- if (dev->bus != &mhi_bus_type)
+- return 0;
+-
+- mhi_dev = to_mhi_device(dev);
+- mhi_cntrl = mhi_dev->mhi_cntrl;
+-
+- /* Only destroy virtual devices thats attached to bus */
+- if (mhi_dev->dev_type == MHI_DEVICE_CONTROLLER)
+- return 0;
+-
+- ul_chan = mhi_dev->ul_chan;
+- dl_chan = mhi_dev->dl_chan;
+-
+- /*
+- * If execution environment is specified, remove only those devices that
+- * started in them based on ee_mask for the channels as we move on to a
+- * different execution environment
+- */
+- if (data)
+- ee = *(enum mhi_ee_type *)data;
+-
+- /*
+- * For the suspend and resume case, this function will get called
+- * without mhi_unregister_controller(). Hence, we need to drop the
+- * references to mhi_dev created for ul and dl channels. We can
+- * be sure that there will be no instances of mhi_dev left after
+- * this.
+- */
+- if (ul_chan) {
+- if (ee != MHI_EE_MAX && !(ul_chan->ee_mask & BIT(ee)))
+- return 0;
+-
+- put_device(&ul_chan->mhi_dev->dev);
+- }
+-
+- if (dl_chan) {
+- if (ee != MHI_EE_MAX && !(dl_chan->ee_mask & BIT(ee)))
+- return 0;
+-
+- put_device(&dl_chan->mhi_dev->dev);
+- }
+-
+- dev_dbg(&mhi_cntrl->mhi_dev->dev, "destroy device for chan:%s\n",
+- mhi_dev->name);
+-
+- /* Notify the client and remove the device from MHI bus */
+- device_del(dev);
+- put_device(dev);
+-
+- return 0;
+-}
+-
+-int mhi_get_free_desc_count(struct mhi_device *mhi_dev,
+- enum dma_data_direction dir)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ?
+- mhi_dev->ul_chan : mhi_dev->dl_chan;
+- struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
+-
+- return get_nr_avail_ring_elements(mhi_cntrl, tre_ring);
+-}
+-EXPORT_SYMBOL_GPL(mhi_get_free_desc_count);
+-
+-void mhi_notify(struct mhi_device *mhi_dev, enum mhi_callback cb_reason)
+-{
+- struct mhi_driver *mhi_drv;
+-
+- if (!mhi_dev->dev.driver)
+- return;
+-
+- mhi_drv = to_mhi_driver(mhi_dev->dev.driver);
+-
+- if (mhi_drv->status_cb)
+- mhi_drv->status_cb(mhi_dev, cb_reason);
+-}
+-EXPORT_SYMBOL_GPL(mhi_notify);
+-
+-/* Bind MHI channels to MHI devices */
+-void mhi_create_devices(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_chan *mhi_chan;
+- struct mhi_device *mhi_dev;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- int i, ret;
+-
+- mhi_chan = mhi_cntrl->mhi_chan;
+- for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
+- if (!mhi_chan->configured || mhi_chan->mhi_dev ||
+- !(mhi_chan->ee_mask & BIT(mhi_cntrl->ee)))
+- continue;
+- mhi_dev = mhi_alloc_device(mhi_cntrl);
+- if (IS_ERR(mhi_dev))
+- return;
+-
+- mhi_dev->dev_type = MHI_DEVICE_XFER;
+- switch (mhi_chan->dir) {
+- case DMA_TO_DEVICE:
+- mhi_dev->ul_chan = mhi_chan;
+- mhi_dev->ul_chan_id = mhi_chan->chan;
+- break;
+- case DMA_FROM_DEVICE:
+- /* We use dl_chan as offload channels */
+- mhi_dev->dl_chan = mhi_chan;
+- mhi_dev->dl_chan_id = mhi_chan->chan;
+- break;
+- default:
+- dev_err(dev, "Direction not supported\n");
+- put_device(&mhi_dev->dev);
+- return;
+- }
+-
+- get_device(&mhi_dev->dev);
+- mhi_chan->mhi_dev = mhi_dev;
+-
+- /* Check next channel if it matches */
+- if ((i + 1) < mhi_cntrl->max_chan && mhi_chan[1].configured) {
+- if (!strcmp(mhi_chan[1].name, mhi_chan->name)) {
+- i++;
+- mhi_chan++;
+- if (mhi_chan->dir == DMA_TO_DEVICE) {
+- mhi_dev->ul_chan = mhi_chan;
+- mhi_dev->ul_chan_id = mhi_chan->chan;
+- } else {
+- mhi_dev->dl_chan = mhi_chan;
+- mhi_dev->dl_chan_id = mhi_chan->chan;
+- }
+- get_device(&mhi_dev->dev);
+- mhi_chan->mhi_dev = mhi_dev;
+- }
+- }
+-
+- /* Channel name is same for both UL and DL */
+- mhi_dev->name = mhi_chan->name;
+- dev_set_name(&mhi_dev->dev, "%s_%s",
+- dev_name(&mhi_cntrl->mhi_dev->dev),
+- mhi_dev->name);
+-
+- /* Init wakeup source if available */
+- if (mhi_dev->dl_chan && mhi_dev->dl_chan->wake_capable)
+- device_init_wakeup(&mhi_dev->dev, true);
+-
+- ret = device_add(&mhi_dev->dev);
+- if (ret)
+- put_device(&mhi_dev->dev);
+- }
+-}
+-
+-irqreturn_t mhi_irq_handler(int irq_number, void *dev)
+-{
+- struct mhi_event *mhi_event = dev;
+- struct mhi_controller *mhi_cntrl = mhi_event->mhi_cntrl;
+- struct mhi_event_ctxt *er_ctxt =
+- &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index];
+- struct mhi_ring *ev_ring = &mhi_event->ring;
+- dma_addr_t ptr = er_ctxt->rp;
+- void *dev_rp;
+-
+- if (!is_valid_ring_ptr(ev_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event ring rp points outside of the event ring\n");
+- return IRQ_HANDLED;
+- }
+-
+- dev_rp = mhi_to_virtual(ev_ring, ptr);
+-
+- /* Only proceed if event ring has pending events */
+- if (ev_ring->rp == dev_rp)
+- return IRQ_HANDLED;
+-
+- /* For client managed event ring, notify pending data */
+- if (mhi_event->cl_manage) {
+- struct mhi_chan *mhi_chan = mhi_event->mhi_chan;
+- struct mhi_device *mhi_dev = mhi_chan->mhi_dev;
+-
+- if (mhi_dev)
+- mhi_notify(mhi_dev, MHI_CB_PENDING_DATA);
+- } else {
+- tasklet_schedule(&mhi_event->task);
+- }
+-
+- return IRQ_HANDLED;
+-}
+-
+-irqreturn_t mhi_intvec_threaded_handler(int irq_number, void *priv)
+-{
+- struct mhi_controller *mhi_cntrl = priv;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- enum mhi_state state;
+- enum mhi_pm_state pm_state = 0;
+- enum mhi_ee_type ee;
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- goto exit_intvec;
+- }
+-
+- state = mhi_get_mhi_state(mhi_cntrl);
+- ee = mhi_get_exec_env(mhi_cntrl);
+- dev_dbg(dev, "local ee: %s state: %s device ee: %s state: %s\n",
+- TO_MHI_EXEC_STR(mhi_cntrl->ee),
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state),
+- TO_MHI_EXEC_STR(ee), TO_MHI_STATE_STR(state));
+-
+- if (state == MHI_STATE_SYS_ERR) {
+- dev_dbg(dev, "System error detected\n");
+- pm_state = mhi_tryset_pm_state(mhi_cntrl,
+- MHI_PM_SYS_ERR_DETECT);
+- }
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- if (pm_state != MHI_PM_SYS_ERR_DETECT || ee == mhi_cntrl->ee)
+- goto exit_intvec;
+-
+- switch (ee) {
+- case MHI_EE_RDDM:
+- /* proceed if power down is not already in progress */
+- if (mhi_cntrl->rddm_image && mhi_is_active(mhi_cntrl)) {
+- mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_EE_RDDM);
+- mhi_cntrl->ee = ee;
+- wake_up_all(&mhi_cntrl->state_event);
+- }
+- break;
+- case MHI_EE_PBL:
+- case MHI_EE_EDL:
+- case MHI_EE_PTHRU:
+- mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_FATAL_ERROR);
+- mhi_cntrl->ee = ee;
+- wake_up_all(&mhi_cntrl->state_event);
+- mhi_pm_sys_err_handler(mhi_cntrl);
+- break;
+- default:
+- wake_up_all(&mhi_cntrl->state_event);
+- mhi_pm_sys_err_handler(mhi_cntrl);
+- break;
+- }
+-
+-exit_intvec:
+-
+- return IRQ_HANDLED;
+-}
+-
+-irqreturn_t mhi_intvec_handler(int irq_number, void *dev)
+-{
+- struct mhi_controller *mhi_cntrl = dev;
+-
+- /* Wake up events waiting for state change */
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- return IRQ_WAKE_THREAD;
+-}
+-
+-static void mhi_recycle_ev_ring_element(struct mhi_controller *mhi_cntrl,
+- struct mhi_ring *ring)
+-{
+- dma_addr_t ctxt_wp;
+-
+- /* Update the WP */
+- ring->wp += ring->el_size;
+- ctxt_wp = *ring->ctxt_wp + ring->el_size;
+-
+- if (ring->wp >= (ring->base + ring->len)) {
+- ring->wp = ring->base;
+- ctxt_wp = ring->iommu_base;
+- }
+-
+- *ring->ctxt_wp = ctxt_wp;
+-
+- /* Update the RP */
+- ring->rp += ring->el_size;
+- if (ring->rp >= (ring->base + ring->len))
+- ring->rp = ring->base;
+-
+- /* Update to all cores */
+- smp_wmb();
+-}
+-
+-static int parse_xfer_event(struct mhi_controller *mhi_cntrl,
+- struct mhi_tre *event,
+- struct mhi_chan *mhi_chan)
+-{
+- struct mhi_ring *buf_ring, *tre_ring;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- struct mhi_result result;
+- unsigned long flags = 0;
+- u32 ev_code;
+-
+- ev_code = MHI_TRE_GET_EV_CODE(event);
+- buf_ring = &mhi_chan->buf_ring;
+- tre_ring = &mhi_chan->tre_ring;
+-
+- result.transaction_status = (ev_code == MHI_EV_CC_OVERFLOW) ?
+- -EOVERFLOW : 0;
+-
+- /*
+- * If it's a DB Event then we need to grab the lock
+- * with preemption disabled and as a write because we
+- * have to update db register and there are chances that
+- * another thread could be doing the same.
+- */
+- if (ev_code >= MHI_EV_CC_OOB)
+- write_lock_irqsave(&mhi_chan->lock, flags);
+- else
+- read_lock_bh(&mhi_chan->lock);
+-
+- if (mhi_chan->ch_state != MHI_CH_STATE_ENABLED)
+- goto end_process_tx_event;
+-
+- switch (ev_code) {
+- case MHI_EV_CC_OVERFLOW:
+- case MHI_EV_CC_EOB:
+- case MHI_EV_CC_EOT:
+- {
+- dma_addr_t ptr = MHI_TRE_GET_EV_PTR(event);
+- struct mhi_tre *local_rp, *ev_tre;
+- void *dev_rp;
+- struct mhi_buf_info *buf_info;
+- u16 xfer_len;
+-
+- if (!is_valid_ring_ptr(tre_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event element points outside of the tre ring\n");
+- break;
+- }
+- /* Get the TRB this event points to */
+- ev_tre = mhi_to_virtual(tre_ring, ptr);
+-
+- dev_rp = ev_tre + 1;
+- if (dev_rp >= (tre_ring->base + tre_ring->len))
+- dev_rp = tre_ring->base;
+-
+- result.dir = mhi_chan->dir;
+-
+- local_rp = tre_ring->rp;
+- while (local_rp != dev_rp) {
+- buf_info = buf_ring->rp;
+- /* If it's the last TRE, get length from the event */
+- if (local_rp == ev_tre)
+- xfer_len = MHI_TRE_GET_EV_LEN(event);
+- else
+- xfer_len = buf_info->len;
+-
+- /* Unmap if it's not pre-mapped by client */
+- if (likely(!buf_info->pre_mapped))
+- mhi_cntrl->unmap_single(mhi_cntrl, buf_info);
+-
+- result.buf_addr = buf_info->cb_buf;
+-
+- /* truncate to buf len if xfer_len is larger */
+- result.bytes_xferd =
+- min_t(u16, xfer_len, buf_info->len);
+- mhi_del_ring_element(mhi_cntrl, buf_ring);
+- mhi_del_ring_element(mhi_cntrl, tre_ring);
+- local_rp = tre_ring->rp;
+-
+- /* notify client */
+- mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
+-
+- if (mhi_chan->dir == DMA_TO_DEVICE) {
+- atomic_dec(&mhi_cntrl->pending_pkts);
+- /* Release the reference got from mhi_queue() */
+- mhi_cntrl->runtime_put(mhi_cntrl);
+- }
+-
+- /*
+- * Recycle the buffer if buffer is pre-allocated,
+- * if there is an error, not much we can do apart
+- * from dropping the packet
+- */
+- if (mhi_chan->pre_alloc) {
+- if (mhi_queue_buf(mhi_chan->mhi_dev,
+- mhi_chan->dir,
+- buf_info->cb_buf,
+- buf_info->len, MHI_EOT)) {
+- dev_err(dev,
+- "Error recycling buffer for chan:%d\n",
+- mhi_chan->chan);
+- kfree(buf_info->cb_buf);
+- }
+- }
+- }
+- break;
+- } /* CC_EOT */
+- case MHI_EV_CC_OOB:
+- case MHI_EV_CC_DB_MODE:
+- {
+- unsigned long pm_lock_flags;
+-
+- mhi_chan->db_cfg.db_mode = 1;
+- read_lock_irqsave(&mhi_cntrl->pm_lock, pm_lock_flags);
+- if (tre_ring->wp != tre_ring->rp &&
+- MHI_DB_ACCESS_VALID(mhi_cntrl)) {
+- mhi_ring_chan_db(mhi_cntrl, mhi_chan);
+- }
+- read_unlock_irqrestore(&mhi_cntrl->pm_lock, pm_lock_flags);
+- break;
+- }
+- case MHI_EV_CC_BAD_TRE:
+- default:
+- dev_err(dev, "Unknown event 0x%x\n", ev_code);
+- break;
+- } /* switch(MHI_EV_READ_CODE(EV_TRB_CODE,event)) */
+-
+-end_process_tx_event:
+- if (ev_code >= MHI_EV_CC_OOB)
+- write_unlock_irqrestore(&mhi_chan->lock, flags);
+- else
+- read_unlock_bh(&mhi_chan->lock);
+-
+- return 0;
+-}
+-
+-static int parse_rsc_event(struct mhi_controller *mhi_cntrl,
+- struct mhi_tre *event,
+- struct mhi_chan *mhi_chan)
+-{
+- struct mhi_ring *buf_ring, *tre_ring;
+- struct mhi_buf_info *buf_info;
+- struct mhi_result result;
+- int ev_code;
+- u32 cookie; /* offset to local descriptor */
+- u16 xfer_len;
+-
+- buf_ring = &mhi_chan->buf_ring;
+- tre_ring = &mhi_chan->tre_ring;
+-
+- ev_code = MHI_TRE_GET_EV_CODE(event);
+- cookie = MHI_TRE_GET_EV_COOKIE(event);
+- xfer_len = MHI_TRE_GET_EV_LEN(event);
+-
+- /* Received out of bound cookie */
+- WARN_ON(cookie >= buf_ring->len);
+-
+- buf_info = buf_ring->base + cookie;
+-
+- result.transaction_status = (ev_code == MHI_EV_CC_OVERFLOW) ?
+- -EOVERFLOW : 0;
+-
+- /* truncate to buf len if xfer_len is larger */
+- result.bytes_xferd = min_t(u16, xfer_len, buf_info->len);
+- result.buf_addr = buf_info->cb_buf;
+- result.dir = mhi_chan->dir;
+-
+- read_lock_bh(&mhi_chan->lock);
+-
+- if (mhi_chan->ch_state != MHI_CH_STATE_ENABLED)
+- goto end_process_rsc_event;
+-
+- WARN_ON(!buf_info->used);
+-
+- /* notify the client */
+- mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
+-
+- /*
+- * Note: We're arbitrarily incrementing RP even though, completion
+- * packet we processed might not be the same one, reason we can do this
+- * is because device guaranteed to cache descriptors in order it
+- * receive, so even though completion event is different we can re-use
+- * all descriptors in between.
+- * Example:
+- * Transfer Ring has descriptors: A, B, C, D
+- * Last descriptor host queue is D (WP) and first descriptor
+- * host queue is A (RP).
+- * The completion event we just serviced is descriptor C.
+- * Then we can safely queue descriptors to replace A, B, and C
+- * even though host did not receive any completions.
+- */
+- mhi_del_ring_element(mhi_cntrl, tre_ring);
+- buf_info->used = false;
+-
+-end_process_rsc_event:
+- read_unlock_bh(&mhi_chan->lock);
+-
+- return 0;
+-}
+-
+-static void mhi_process_cmd_completion(struct mhi_controller *mhi_cntrl,
+- struct mhi_tre *tre)
+-{
+- dma_addr_t ptr = MHI_TRE_GET_EV_PTR(tre);
+- struct mhi_cmd *cmd_ring = &mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING];
+- struct mhi_ring *mhi_ring = &cmd_ring->ring;
+- struct mhi_tre *cmd_pkt;
+- struct mhi_chan *mhi_chan;
+- u32 chan;
+-
+- if (!is_valid_ring_ptr(mhi_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event element points outside of the cmd ring\n");
+- return;
+- }
+-
+- cmd_pkt = mhi_to_virtual(mhi_ring, ptr);
+-
+- chan = MHI_TRE_GET_CMD_CHID(cmd_pkt);
+-
+- if (chan < mhi_cntrl->max_chan &&
+- mhi_cntrl->mhi_chan[chan].configured) {
+- mhi_chan = &mhi_cntrl->mhi_chan[chan];
+- write_lock_bh(&mhi_chan->lock);
+- mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
+- complete(&mhi_chan->completion);
+- write_unlock_bh(&mhi_chan->lock);
+- } else {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Completion packet for invalid channel ID: %d\n", chan);
+- }
+-
+- mhi_del_ring_element(mhi_cntrl, mhi_ring);
+-}
+-
+-int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl,
+- struct mhi_event *mhi_event,
+- u32 event_quota)
+-{
+- struct mhi_tre *dev_rp, *local_rp;
+- struct mhi_ring *ev_ring = &mhi_event->ring;
+- struct mhi_event_ctxt *er_ctxt =
+- &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index];
+- struct mhi_chan *mhi_chan;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- u32 chan;
+- int count = 0;
+- dma_addr_t ptr = er_ctxt->rp;
+-
+- /*
+- * This is a quick check to avoid unnecessary event processing
+- * in case MHI is already in error state, but it's still possible
+- * to transition to error state while processing events
+- */
+- if (unlikely(MHI_EVENT_ACCESS_INVALID(mhi_cntrl->pm_state)))
+- return -EIO;
+-
+- if (!is_valid_ring_ptr(ev_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event ring rp points outside of the event ring\n");
+- return -EIO;
+- }
+-
+- dev_rp = mhi_to_virtual(ev_ring, ptr);
+- local_rp = ev_ring->rp;
+-
+- while (dev_rp != local_rp) {
+- enum mhi_pkt_type type = MHI_TRE_GET_EV_TYPE(local_rp);
+-
+- switch (type) {
+- case MHI_PKT_TYPE_BW_REQ_EVENT:
+- {
+- struct mhi_link_info *link_info;
+-
+- link_info = &mhi_cntrl->mhi_link_info;
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- link_info->target_link_speed =
+- MHI_TRE_GET_EV_LINKSPEED(local_rp);
+- link_info->target_link_width =
+- MHI_TRE_GET_EV_LINKWIDTH(local_rp);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- dev_dbg(dev, "Received BW_REQ event\n");
+- mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_BW_REQ);
+- break;
+- }
+- case MHI_PKT_TYPE_STATE_CHANGE_EVENT:
+- {
+- enum mhi_state new_state;
+-
+- new_state = MHI_TRE_GET_EV_STATE(local_rp);
+-
+- dev_dbg(dev, "State change event to state: %s\n",
+- TO_MHI_STATE_STR(new_state));
+-
+- switch (new_state) {
+- case MHI_STATE_M0:
+- mhi_pm_m0_transition(mhi_cntrl);
+- break;
+- case MHI_STATE_M1:
+- mhi_pm_m1_transition(mhi_cntrl);
+- break;
+- case MHI_STATE_M3:
+- mhi_pm_m3_transition(mhi_cntrl);
+- break;
+- case MHI_STATE_SYS_ERR:
+- {
+- enum mhi_pm_state pm_state;
+-
+- dev_dbg(dev, "System error detected\n");
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- pm_state = mhi_tryset_pm_state(mhi_cntrl,
+- MHI_PM_SYS_ERR_DETECT);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- if (pm_state == MHI_PM_SYS_ERR_DETECT)
+- mhi_pm_sys_err_handler(mhi_cntrl);
+- break;
+- }
+- default:
+- dev_err(dev, "Invalid state: %s\n",
+- TO_MHI_STATE_STR(new_state));
+- }
+-
+- break;
+- }
+- case MHI_PKT_TYPE_CMD_COMPLETION_EVENT:
+- mhi_process_cmd_completion(mhi_cntrl, local_rp);
+- break;
+- case MHI_PKT_TYPE_EE_EVENT:
+- {
+- enum dev_st_transition st = DEV_ST_TRANSITION_MAX;
+- enum mhi_ee_type event = MHI_TRE_GET_EV_EXECENV(local_rp);
+-
+- dev_dbg(dev, "Received EE event: %s\n",
+- TO_MHI_EXEC_STR(event));
+- switch (event) {
+- case MHI_EE_SBL:
+- st = DEV_ST_TRANSITION_SBL;
+- break;
+- case MHI_EE_WFW:
+- case MHI_EE_AMSS:
+- st = DEV_ST_TRANSITION_MISSION_MODE;
+- break;
+- case MHI_EE_FP:
+- st = DEV_ST_TRANSITION_FP;
+- break;
+- case MHI_EE_RDDM:
+- mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_EE_RDDM);
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- mhi_cntrl->ee = event;
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- wake_up_all(&mhi_cntrl->state_event);
+- break;
+- default:
+- dev_err(dev,
+- "Unhandled EE event: 0x%x\n", type);
+- }
+- if (st != DEV_ST_TRANSITION_MAX)
+- mhi_queue_state_transition(mhi_cntrl, st);
+-
+- break;
+- }
+- case MHI_PKT_TYPE_TX_EVENT:
+- chan = MHI_TRE_GET_EV_CHID(local_rp);
+-
+- WARN_ON(chan >= mhi_cntrl->max_chan);
+-
+- /*
+- * Only process the event ring elements whose channel
+- * ID is within the maximum supported range.
+- */
+- if (chan < mhi_cntrl->max_chan) {
+- mhi_chan = &mhi_cntrl->mhi_chan[chan];
+- if (!mhi_chan->configured)
+- break;
+- parse_xfer_event(mhi_cntrl, local_rp, mhi_chan);
+- event_quota--;
+- }
+- break;
+- default:
+- dev_err(dev, "Unhandled event type: %d\n", type);
+- break;
+- }
+-
+- mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring);
+- local_rp = ev_ring->rp;
+-
+- ptr = er_ctxt->rp;
+- if (!is_valid_ring_ptr(ev_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event ring rp points outside of the event ring\n");
+- return -EIO;
+- }
+-
+- dev_rp = mhi_to_virtual(ev_ring, ptr);
+- count++;
+- }
+-
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
+- mhi_ring_er_db(mhi_event);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- return count;
+-}
+-
+-int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl,
+- struct mhi_event *mhi_event,
+- u32 event_quota)
+-{
+- struct mhi_tre *dev_rp, *local_rp;
+- struct mhi_ring *ev_ring = &mhi_event->ring;
+- struct mhi_event_ctxt *er_ctxt =
+- &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index];
+- int count = 0;
+- u32 chan;
+- struct mhi_chan *mhi_chan;
+- dma_addr_t ptr = er_ctxt->rp;
+-
+- if (unlikely(MHI_EVENT_ACCESS_INVALID(mhi_cntrl->pm_state)))
+- return -EIO;
+-
+- if (!is_valid_ring_ptr(ev_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event ring rp points outside of the event ring\n");
+- return -EIO;
+- }
+-
+- dev_rp = mhi_to_virtual(ev_ring, ptr);
+- local_rp = ev_ring->rp;
+-
+- while (dev_rp != local_rp && event_quota > 0) {
+- enum mhi_pkt_type type = MHI_TRE_GET_EV_TYPE(local_rp);
+-
+- chan = MHI_TRE_GET_EV_CHID(local_rp);
+-
+- WARN_ON(chan >= mhi_cntrl->max_chan);
+-
+- /*
+- * Only process the event ring elements whose channel
+- * ID is within the maximum supported range.
+- */
+- if (chan < mhi_cntrl->max_chan &&
+- mhi_cntrl->mhi_chan[chan].configured) {
+- mhi_chan = &mhi_cntrl->mhi_chan[chan];
+-
+- if (likely(type == MHI_PKT_TYPE_TX_EVENT)) {
+- parse_xfer_event(mhi_cntrl, local_rp, mhi_chan);
+- event_quota--;
+- } else if (type == MHI_PKT_TYPE_RSC_TX_EVENT) {
+- parse_rsc_event(mhi_cntrl, local_rp, mhi_chan);
+- event_quota--;
+- }
+- }
+-
+- mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring);
+- local_rp = ev_ring->rp;
+-
+- ptr = er_ctxt->rp;
+- if (!is_valid_ring_ptr(ev_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event ring rp points outside of the event ring\n");
+- return -EIO;
+- }
+-
+- dev_rp = mhi_to_virtual(ev_ring, ptr);
+- count++;
+- }
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
+- mhi_ring_er_db(mhi_event);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- return count;
+-}
+-
+-void mhi_ev_task(unsigned long data)
+-{
+- struct mhi_event *mhi_event = (struct mhi_event *)data;
+- struct mhi_controller *mhi_cntrl = mhi_event->mhi_cntrl;
+-
+- /* process all pending events */
+- spin_lock_bh(&mhi_event->lock);
+- mhi_event->process_event(mhi_cntrl, mhi_event, U32_MAX);
+- spin_unlock_bh(&mhi_event->lock);
+-}
+-
+-void mhi_ctrl_ev_task(unsigned long data)
+-{
+- struct mhi_event *mhi_event = (struct mhi_event *)data;
+- struct mhi_controller *mhi_cntrl = mhi_event->mhi_cntrl;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- enum mhi_state state;
+- enum mhi_pm_state pm_state = 0;
+- int ret;
+-
+- /*
+- * We can check PM state w/o a lock here because there is no way
+- * PM state can change from reg access valid to no access while this
+- * thread being executed.
+- */
+- if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
+- /*
+- * We may have a pending event but not allowed to
+- * process it since we are probably in a suspended state,
+- * so trigger a resume.
+- */
+- mhi_trigger_resume(mhi_cntrl);
+-
+- return;
+- }
+-
+- /* Process ctrl events events */
+- ret = mhi_event->process_event(mhi_cntrl, mhi_event, U32_MAX);
+-
+- /*
+- * We received an IRQ but no events to process, maybe device went to
+- * SYS_ERR state? Check the state to confirm.
+- */
+- if (!ret) {
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- state = mhi_get_mhi_state(mhi_cntrl);
+- if (state == MHI_STATE_SYS_ERR) {
+- dev_dbg(dev, "System error detected\n");
+- pm_state = mhi_tryset_pm_state(mhi_cntrl,
+- MHI_PM_SYS_ERR_DETECT);
+- }
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- if (pm_state == MHI_PM_SYS_ERR_DETECT)
+- mhi_pm_sys_err_handler(mhi_cntrl);
+- }
+-}
+-
+-static bool mhi_is_ring_full(struct mhi_controller *mhi_cntrl,
+- struct mhi_ring *ring)
+-{
+- void *tmp = ring->wp + ring->el_size;
+-
+- if (tmp >= (ring->base + ring->len))
+- tmp = ring->base;
+-
+- return (tmp == ring->rp);
+-}
+-
+-static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info,
+- enum dma_data_direction dir, enum mhi_flags mflags)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
+- mhi_dev->dl_chan;
+- struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
+- unsigned long flags;
+- int ret;
+-
+- if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)))
+- return -EIO;
+-
+- read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
+-
+- ret = mhi_is_ring_full(mhi_cntrl, tre_ring);
+- if (unlikely(ret)) {
+- ret = -EAGAIN;
+- goto exit_unlock;
+- }
+-
+- ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags);
+- if (unlikely(ret))
+- goto exit_unlock;
+-
+- /* Packet is queued, take a usage ref to exit M3 if necessary
+- * for host->device buffer, balanced put is done on buffer completion
+- * for device->host buffer, balanced put is after ringing the DB
+- */
+- mhi_cntrl->runtime_get(mhi_cntrl);
+-
+- /* Assert dev_wake (to exit/prevent M1/M2)*/
+- mhi_cntrl->wake_toggle(mhi_cntrl);
+-
+- if (mhi_chan->dir == DMA_TO_DEVICE)
+- atomic_inc(&mhi_cntrl->pending_pkts);
+-
+- if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
+- mhi_ring_chan_db(mhi_cntrl, mhi_chan);
+-
+- if (dir == DMA_FROM_DEVICE)
+- mhi_cntrl->runtime_put(mhi_cntrl);
+-
+-exit_unlock:
+- read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags);
+-
+- return ret;
+-}
+-
+-int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir,
+- struct sk_buff *skb, size_t len, enum mhi_flags mflags)
+-{
+- struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
+- mhi_dev->dl_chan;
+- struct mhi_buf_info buf_info = { };
+-
+- buf_info.v_addr = skb->data;
+- buf_info.cb_buf = skb;
+- buf_info.len = len;
+-
+- if (unlikely(mhi_chan->pre_alloc))
+- return -EINVAL;
+-
+- return mhi_queue(mhi_dev, &buf_info, dir, mflags);
+-}
+-EXPORT_SYMBOL_GPL(mhi_queue_skb);
+-
+-int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir,
+- struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags)
+-{
+- struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
+- mhi_dev->dl_chan;
+- struct mhi_buf_info buf_info = { };
+-
+- buf_info.p_addr = mhi_buf->dma_addr;
+- buf_info.cb_buf = mhi_buf;
+- buf_info.pre_mapped = true;
+- buf_info.len = len;
+-
+- if (unlikely(mhi_chan->pre_alloc))
+- return -EINVAL;
+-
+- return mhi_queue(mhi_dev, &buf_info, dir, mflags);
+-}
+-EXPORT_SYMBOL_GPL(mhi_queue_dma);
+-
+-int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
+- struct mhi_buf_info *info, enum mhi_flags flags)
+-{
+- struct mhi_ring *buf_ring, *tre_ring;
+- struct mhi_tre *mhi_tre;
+- struct mhi_buf_info *buf_info;
+- int eot, eob, chain, bei;
+- int ret;
+-
+- buf_ring = &mhi_chan->buf_ring;
+- tre_ring = &mhi_chan->tre_ring;
+-
+- buf_info = buf_ring->wp;
+- WARN_ON(buf_info->used);
+- buf_info->pre_mapped = info->pre_mapped;
+- if (info->pre_mapped)
+- buf_info->p_addr = info->p_addr;
+- else
+- buf_info->v_addr = info->v_addr;
+- buf_info->cb_buf = info->cb_buf;
+- buf_info->wp = tre_ring->wp;
+- buf_info->dir = mhi_chan->dir;
+- buf_info->len = info->len;
+-
+- if (!info->pre_mapped) {
+- ret = mhi_cntrl->map_single(mhi_cntrl, buf_info);
+- if (ret)
+- return ret;
+- }
+-
+- eob = !!(flags & MHI_EOB);
+- eot = !!(flags & MHI_EOT);
+- chain = !!(flags & MHI_CHAIN);
+- bei = !!(mhi_chan->intmod);
+-
+- mhi_tre = tre_ring->wp;
+- mhi_tre->ptr = MHI_TRE_DATA_PTR(buf_info->p_addr);
+- mhi_tre->dword[0] = MHI_TRE_DATA_DWORD0(info->len);
+- mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(bei, eot, eob, chain);
+-
+- /* increment WP */
+- mhi_add_ring_element(mhi_cntrl, tre_ring);
+- mhi_add_ring_element(mhi_cntrl, buf_ring);
+-
+- return 0;
+-}
+-
+-int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir,
+- void *buf, size_t len, enum mhi_flags mflags)
+-{
+- struct mhi_buf_info buf_info = { };
+-
+- buf_info.v_addr = buf;
+- buf_info.cb_buf = buf;
+- buf_info.len = len;
+-
+- return mhi_queue(mhi_dev, &buf_info, dir, mflags);
+-}
+-EXPORT_SYMBOL_GPL(mhi_queue_buf);
+-
+-bool mhi_queue_is_full(struct mhi_device *mhi_dev, enum dma_data_direction dir)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ?
+- mhi_dev->ul_chan : mhi_dev->dl_chan;
+- struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
+-
+- return mhi_is_ring_full(mhi_cntrl, tre_ring);
+-}
+-EXPORT_SYMBOL_GPL(mhi_queue_is_full);
+-
+-int mhi_send_cmd(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan,
+- enum mhi_cmd_type cmd)
+-{
+- struct mhi_tre *cmd_tre = NULL;
+- struct mhi_cmd *mhi_cmd = &mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING];
+- struct mhi_ring *ring = &mhi_cmd->ring;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- int chan = 0;
+-
+- if (mhi_chan)
+- chan = mhi_chan->chan;
+-
+- spin_lock_bh(&mhi_cmd->lock);
+- if (!get_nr_avail_ring_elements(mhi_cntrl, ring)) {
+- spin_unlock_bh(&mhi_cmd->lock);
+- return -ENOMEM;
+- }
+-
+- /* prepare the cmd tre */
+- cmd_tre = ring->wp;
+- switch (cmd) {
+- case MHI_CMD_RESET_CHAN:
+- cmd_tre->ptr = MHI_TRE_CMD_RESET_PTR;
+- cmd_tre->dword[0] = MHI_TRE_CMD_RESET_DWORD0;
+- cmd_tre->dword[1] = MHI_TRE_CMD_RESET_DWORD1(chan);
+- break;
+- case MHI_CMD_STOP_CHAN:
+- cmd_tre->ptr = MHI_TRE_CMD_STOP_PTR;
+- cmd_tre->dword[0] = MHI_TRE_CMD_STOP_DWORD0;
+- cmd_tre->dword[1] = MHI_TRE_CMD_STOP_DWORD1(chan);
+- break;
+- case MHI_CMD_START_CHAN:
+- cmd_tre->ptr = MHI_TRE_CMD_START_PTR;
+- cmd_tre->dword[0] = MHI_TRE_CMD_START_DWORD0;
+- cmd_tre->dword[1] = MHI_TRE_CMD_START_DWORD1(chan);
+- break;
+- default:
+- dev_err(dev, "Command not supported\n");
+- break;
+- }
+-
+- /* queue to hardware */
+- mhi_add_ring_element(mhi_cntrl, ring);
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
+- mhi_ring_cmd_db(mhi_cntrl, mhi_cmd);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+- spin_unlock_bh(&mhi_cmd->lock);
+-
+- return 0;
+-}
+-
+-static int mhi_update_channel_state(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan,
+- enum mhi_ch_state_type to_state)
+-{
+- struct device *dev = &mhi_chan->mhi_dev->dev;
+- enum mhi_cmd_type cmd = MHI_CMD_NOP;
+- int ret;
+-
+- dev_dbg(dev, "%d: Updating channel state to: %s\n", mhi_chan->chan,
+- TO_CH_STATE_TYPE_STR(to_state));
+-
+- switch (to_state) {
+- case MHI_CH_STATE_TYPE_RESET:
+- write_lock_irq(&mhi_chan->lock);
+- if (mhi_chan->ch_state != MHI_CH_STATE_STOP &&
+- mhi_chan->ch_state != MHI_CH_STATE_ENABLED &&
+- mhi_chan->ch_state != MHI_CH_STATE_SUSPENDED) {
+- write_unlock_irq(&mhi_chan->lock);
+- return -EINVAL;
+- }
+- mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
+- write_unlock_irq(&mhi_chan->lock);
+-
+- cmd = MHI_CMD_RESET_CHAN;
+- break;
+- case MHI_CH_STATE_TYPE_STOP:
+- if (mhi_chan->ch_state != MHI_CH_STATE_ENABLED)
+- return -EINVAL;
+-
+- cmd = MHI_CMD_STOP_CHAN;
+- break;
+- case MHI_CH_STATE_TYPE_START:
+- if (mhi_chan->ch_state != MHI_CH_STATE_STOP &&
+- mhi_chan->ch_state != MHI_CH_STATE_DISABLED)
+- return -EINVAL;
+-
+- cmd = MHI_CMD_START_CHAN;
+- break;
+- default:
+- dev_err(dev, "%d: Channel state update to %s not allowed\n",
+- mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
+- return -EINVAL;
+- }
+-
+- /* bring host and device out of suspended states */
+- ret = mhi_device_get_sync(mhi_cntrl->mhi_dev);
+- if (ret)
+- return ret;
+- mhi_cntrl->runtime_get(mhi_cntrl);
+-
+- reinit_completion(&mhi_chan->completion);
+- ret = mhi_send_cmd(mhi_cntrl, mhi_chan, cmd);
+- if (ret) {
+- dev_err(dev, "%d: Failed to send %s channel command\n",
+- mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
+- goto exit_channel_update;
+- }
+-
+- ret = wait_for_completion_timeout(&mhi_chan->completion,
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+- if (!ret || mhi_chan->ccs != MHI_EV_CC_SUCCESS) {
+- dev_err(dev,
+- "%d: Failed to receive %s channel command completion\n",
+- mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
+- ret = -EIO;
+- goto exit_channel_update;
+- }
+-
+- ret = 0;
+-
+- if (to_state != MHI_CH_STATE_TYPE_RESET) {
+- write_lock_irq(&mhi_chan->lock);
+- mhi_chan->ch_state = (to_state == MHI_CH_STATE_TYPE_START) ?
+- MHI_CH_STATE_ENABLED : MHI_CH_STATE_STOP;
+- write_unlock_irq(&mhi_chan->lock);
+- }
+-
+- dev_dbg(dev, "%d: Channel state change to %s successful\n",
+- mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
+-
+-exit_channel_update:
+- mhi_cntrl->runtime_put(mhi_cntrl);
+- mhi_device_put(mhi_cntrl->mhi_dev);
+-
+- return ret;
+-}
+-
+-static void mhi_unprepare_channel(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan)
+-{
+- int ret;
+- struct device *dev = &mhi_chan->mhi_dev->dev;
+-
+- mutex_lock(&mhi_chan->mutex);
+-
+- if (!(BIT(mhi_cntrl->ee) & mhi_chan->ee_mask)) {
+- dev_dbg(dev, "Current EE: %s Required EE Mask: 0x%x\n",
+- TO_MHI_EXEC_STR(mhi_cntrl->ee), mhi_chan->ee_mask);
+- goto exit_unprepare_channel;
+- }
+-
+- /* no more processing events for this channel */
+- ret = mhi_update_channel_state(mhi_cntrl, mhi_chan,
+- MHI_CH_STATE_TYPE_RESET);
+- if (ret)
+- dev_err(dev, "%d: Failed to reset channel, still resetting\n",
+- mhi_chan->chan);
+-
+-exit_unprepare_channel:
+- write_lock_irq(&mhi_chan->lock);
+- mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
+- write_unlock_irq(&mhi_chan->lock);
+-
+- if (!mhi_chan->offload_ch) {
+- mhi_reset_chan(mhi_cntrl, mhi_chan);
+- mhi_deinit_chan_ctxt(mhi_cntrl, mhi_chan);
+- }
+- dev_dbg(dev, "%d: successfully reset\n", mhi_chan->chan);
+-
+- mutex_unlock(&mhi_chan->mutex);
+-}
+-
+-int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan)
+-{
+- int ret = 0;
+- struct device *dev = &mhi_chan->mhi_dev->dev;
+-
+- if (!(BIT(mhi_cntrl->ee) & mhi_chan->ee_mask)) {
+- dev_err(dev, "Current EE: %s Required EE Mask: 0x%x\n",
+- TO_MHI_EXEC_STR(mhi_cntrl->ee), mhi_chan->ee_mask);
+- return -ENOTCONN;
+- }
+-
+- mutex_lock(&mhi_chan->mutex);
+-
+- /* Check of client manages channel context for offload channels */
+- if (!mhi_chan->offload_ch) {
+- ret = mhi_init_chan_ctxt(mhi_cntrl, mhi_chan);
+- if (ret)
+- goto error_init_chan;
+- }
+-
+- ret = mhi_update_channel_state(mhi_cntrl, mhi_chan,
+- MHI_CH_STATE_TYPE_START);
+- if (ret)
+- goto error_pm_state;
+-
+- /* Pre-allocate buffer for xfer ring */
+- if (mhi_chan->pre_alloc) {
+- int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
+- &mhi_chan->tre_ring);
+- size_t len = mhi_cntrl->buffer_len;
+-
+- while (nr_el--) {
+- void *buf;
+- struct mhi_buf_info info = { };
+- buf = kmalloc(len, GFP_KERNEL);
+- if (!buf) {
+- ret = -ENOMEM;
+- goto error_pre_alloc;
+- }
+-
+- /* Prepare transfer descriptors */
+- info.v_addr = buf;
+- info.cb_buf = buf;
+- info.len = len;
+- ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &info, MHI_EOT);
+- if (ret) {
+- kfree(buf);
+- goto error_pre_alloc;
+- }
+- }
+-
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (MHI_DB_ACCESS_VALID(mhi_cntrl)) {
+- read_lock_irq(&mhi_chan->lock);
+- mhi_ring_chan_db(mhi_cntrl, mhi_chan);
+- read_unlock_irq(&mhi_chan->lock);
+- }
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+- }
+-
+- mutex_unlock(&mhi_chan->mutex);
+-
+- return 0;
+-
+-error_pm_state:
+- if (!mhi_chan->offload_ch)
+- mhi_deinit_chan_ctxt(mhi_cntrl, mhi_chan);
+-
+-error_init_chan:
+- mutex_unlock(&mhi_chan->mutex);
+-
+- return ret;
+-
+-error_pre_alloc:
+- mutex_unlock(&mhi_chan->mutex);
+- mhi_unprepare_channel(mhi_cntrl, mhi_chan);
+-
+- return ret;
+-}
+-
+-static void mhi_mark_stale_events(struct mhi_controller *mhi_cntrl,
+- struct mhi_event *mhi_event,
+- struct mhi_event_ctxt *er_ctxt,
+- int chan)
+-
+-{
+- struct mhi_tre *dev_rp, *local_rp;
+- struct mhi_ring *ev_ring;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- unsigned long flags;
+- dma_addr_t ptr;
+-
+- dev_dbg(dev, "Marking all events for chan: %d as stale\n", chan);
+-
+- ev_ring = &mhi_event->ring;
+-
+- /* mark all stale events related to channel as STALE event */
+- spin_lock_irqsave(&mhi_event->lock, flags);
+-
+- ptr = er_ctxt->rp;
+- if (!is_valid_ring_ptr(ev_ring, ptr)) {
+- dev_err(&mhi_cntrl->mhi_dev->dev,
+- "Event ring rp points outside of the event ring\n");
+- dev_rp = ev_ring->rp;
+- } else {
+- dev_rp = mhi_to_virtual(ev_ring, ptr);
+- }
+-
+- local_rp = ev_ring->rp;
+- while (dev_rp != local_rp) {
+- if (MHI_TRE_GET_EV_TYPE(local_rp) == MHI_PKT_TYPE_TX_EVENT &&
+- chan == MHI_TRE_GET_EV_CHID(local_rp))
+- local_rp->dword[1] = MHI_TRE_EV_DWORD1(chan,
+- MHI_PKT_TYPE_STALE_EVENT);
+- local_rp++;
+- if (local_rp == (ev_ring->base + ev_ring->len))
+- local_rp = ev_ring->base;
+- }
+-
+- dev_dbg(dev, "Finished marking events as stale events\n");
+- spin_unlock_irqrestore(&mhi_event->lock, flags);
+-}
+-
+-static void mhi_reset_data_chan(struct mhi_controller *mhi_cntrl,
+- struct mhi_chan *mhi_chan)
+-{
+- struct mhi_ring *buf_ring, *tre_ring;
+- struct mhi_result result;
+-
+- /* Reset any pending buffers */
+- buf_ring = &mhi_chan->buf_ring;
+- tre_ring = &mhi_chan->tre_ring;
+- result.transaction_status = -ENOTCONN;
+- result.bytes_xferd = 0;
+- while (tre_ring->rp != tre_ring->wp) {
+- struct mhi_buf_info *buf_info = buf_ring->rp;
+-
+- if (mhi_chan->dir == DMA_TO_DEVICE) {
+- atomic_dec(&mhi_cntrl->pending_pkts);
+- /* Release the reference got from mhi_queue() */
+- mhi_cntrl->runtime_put(mhi_cntrl);
+- }
+-
+- if (!buf_info->pre_mapped)
+- mhi_cntrl->unmap_single(mhi_cntrl, buf_info);
+-
+- mhi_del_ring_element(mhi_cntrl, buf_ring);
+- mhi_del_ring_element(mhi_cntrl, tre_ring);
+-
+- if (mhi_chan->pre_alloc) {
+- kfree(buf_info->cb_buf);
+- } else {
+- result.buf_addr = buf_info->cb_buf;
+- mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
+- }
+- }
+-}
+-
+-void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan)
+-{
+- struct mhi_event *mhi_event;
+- struct mhi_event_ctxt *er_ctxt;
+- int chan = mhi_chan->chan;
+-
+- /* Nothing to reset, client doesn't queue buffers */
+- if (mhi_chan->offload_ch)
+- return;
+-
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- mhi_event = &mhi_cntrl->mhi_event[mhi_chan->er_index];
+- er_ctxt = &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_chan->er_index];
+-
+- mhi_mark_stale_events(mhi_cntrl, mhi_event, er_ctxt, chan);
+-
+- mhi_reset_data_chan(mhi_cntrl, mhi_chan);
+-
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-}
+-
+-/* Move channel to start state */
+-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
+-{
+- int ret, dir;
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct mhi_chan *mhi_chan;
+-
+- for (dir = 0; dir < 2; dir++) {
+- mhi_chan = dir ? mhi_dev->dl_chan : mhi_dev->ul_chan;
+- if (!mhi_chan)
+- continue;
+-
+- ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
+- if (ret)
+- goto error_open_chan;
+- }
+-
+- return 0;
+-
+-error_open_chan:
+- for (--dir; dir >= 0; dir--) {
+- mhi_chan = dir ? mhi_dev->dl_chan : mhi_dev->ul_chan;
+- if (!mhi_chan)
+- continue;
+-
+- mhi_unprepare_channel(mhi_cntrl, mhi_chan);
+- }
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(mhi_prepare_for_transfer);
+-
+-void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct mhi_chan *mhi_chan;
+- int dir;
+-
+- for (dir = 0; dir < 2; dir++) {
+- mhi_chan = dir ? mhi_dev->ul_chan : mhi_dev->dl_chan;
+- if (!mhi_chan)
+- continue;
+-
+- mhi_unprepare_channel(mhi_cntrl, mhi_chan);
+- }
+-}
+-EXPORT_SYMBOL_GPL(mhi_unprepare_from_transfer);
+-
+-int mhi_poll(struct mhi_device *mhi_dev, u32 budget)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- struct mhi_chan *mhi_chan = mhi_dev->dl_chan;
+- struct mhi_event *mhi_event = &mhi_cntrl->mhi_event[mhi_chan->er_index];
+- int ret;
+-
+- spin_lock_bh(&mhi_event->lock);
+- ret = mhi_event->process_event(mhi_cntrl, mhi_event, budget);
+- spin_unlock_bh(&mhi_event->lock);
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(mhi_poll);
+diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
+deleted file mode 100644
+index fb99e3727155b..0000000000000
+--- a/drivers/bus/mhi/core/pm.c
++++ /dev/null
+@@ -1,1256 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+- *
+- */
+-
+-#include <linux/delay.h>
+-#include <linux/device.h>
+-#include <linux/dma-direction.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/interrupt.h>
+-#include <linux/list.h>
+-#include <linux/mhi.h>
+-#include <linux/module.h>
+-#include <linux/slab.h>
+-#include <linux/wait.h>
+-#include "internal.h"
+-
+-/*
+- * Not all MHI state transitions are synchronous. Transitions like Linkdown,
+- * SYS_ERR, and shutdown can happen anytime asynchronously. This function will
+- * transition to a new state only if we're allowed to.
+- *
+- * Priority increases as we go down. For instance, from any state in L0, the
+- * transition can be made to states in L1, L2 and L3. A notable exception to
+- * this rule is state DISABLE. From DISABLE state we can only transition to
+- * POR state. Also, while in L2 state, user cannot jump back to previous
+- * L1 or L0 states.
+- *
+- * Valid transitions:
+- * L0: DISABLE <--> POR
+- * POR <--> POR
+- * POR -> M0 -> M2 --> M0
+- * POR -> FW_DL_ERR
+- * FW_DL_ERR <--> FW_DL_ERR
+- * M0 <--> M0
+- * M0 -> FW_DL_ERR
+- * M0 -> M3_ENTER -> M3 -> M3_EXIT --> M0
+- * L1: SYS_ERR_DETECT -> SYS_ERR_PROCESS --> POR
+- * L2: SHUTDOWN_PROCESS -> LD_ERR_FATAL_DETECT
+- * SHUTDOWN_PROCESS -> DISABLE
+- * L3: LD_ERR_FATAL_DETECT <--> LD_ERR_FATAL_DETECT
+- * LD_ERR_FATAL_DETECT -> DISABLE
+- */
+-static struct mhi_pm_transitions const dev_state_transitions[] = {
+- /* L0 States */
+- {
+- MHI_PM_DISABLE,
+- MHI_PM_POR
+- },
+- {
+- MHI_PM_POR,
+- MHI_PM_POR | MHI_PM_DISABLE | MHI_PM_M0 |
+- MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
+- MHI_PM_LD_ERR_FATAL_DETECT | MHI_PM_FW_DL_ERR
+- },
+- {
+- MHI_PM_M0,
+- MHI_PM_M0 | MHI_PM_M2 | MHI_PM_M3_ENTER |
+- MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
+- MHI_PM_LD_ERR_FATAL_DETECT | MHI_PM_FW_DL_ERR
+- },
+- {
+- MHI_PM_M2,
+- MHI_PM_M0 | MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
+- MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- {
+- MHI_PM_M3_ENTER,
+- MHI_PM_M3 | MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
+- MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- {
+- MHI_PM_M3,
+- MHI_PM_M3_EXIT | MHI_PM_SYS_ERR_DETECT |
+- MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- {
+- MHI_PM_M3_EXIT,
+- MHI_PM_M0 | MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
+- MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- {
+- MHI_PM_FW_DL_ERR,
+- MHI_PM_FW_DL_ERR | MHI_PM_SYS_ERR_DETECT |
+- MHI_PM_SHUTDOWN_PROCESS | MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- /* L1 States */
+- {
+- MHI_PM_SYS_ERR_DETECT,
+- MHI_PM_SYS_ERR_PROCESS | MHI_PM_SHUTDOWN_PROCESS |
+- MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- {
+- MHI_PM_SYS_ERR_PROCESS,
+- MHI_PM_POR | MHI_PM_SHUTDOWN_PROCESS |
+- MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- /* L2 States */
+- {
+- MHI_PM_SHUTDOWN_PROCESS,
+- MHI_PM_DISABLE | MHI_PM_LD_ERR_FATAL_DETECT
+- },
+- /* L3 States */
+- {
+- MHI_PM_LD_ERR_FATAL_DETECT,
+- MHI_PM_LD_ERR_FATAL_DETECT | MHI_PM_DISABLE
+- },
+-};
+-
+-enum mhi_pm_state __must_check mhi_tryset_pm_state(struct mhi_controller *mhi_cntrl,
+- enum mhi_pm_state state)
+-{
+- unsigned long cur_state = mhi_cntrl->pm_state;
+- int index = find_last_bit(&cur_state, 32);
+-
+- if (unlikely(index >= ARRAY_SIZE(dev_state_transitions)))
+- return cur_state;
+-
+- if (unlikely(dev_state_transitions[index].from_state != cur_state))
+- return cur_state;
+-
+- if (unlikely(!(dev_state_transitions[index].to_states & state)))
+- return cur_state;
+-
+- mhi_cntrl->pm_state = state;
+- return mhi_cntrl->pm_state;
+-}
+-
+-void mhi_set_mhi_state(struct mhi_controller *mhi_cntrl, enum mhi_state state)
+-{
+- if (state == MHI_STATE_RESET) {
+- mhi_write_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
+- MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 1);
+- } else {
+- mhi_write_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
+- MHICTRL_MHISTATE_MASK,
+- MHICTRL_MHISTATE_SHIFT, state);
+- }
+-}
+-
+-/* NOP for backward compatibility, host allowed to ring DB in M2 state */
+-static void mhi_toggle_dev_wake_nop(struct mhi_controller *mhi_cntrl)
+-{
+-}
+-
+-static void mhi_toggle_dev_wake(struct mhi_controller *mhi_cntrl)
+-{
+- mhi_cntrl->wake_get(mhi_cntrl, false);
+- mhi_cntrl->wake_put(mhi_cntrl, true);
+-}
+-
+-/* Handle device ready state transition */
+-int mhi_ready_state_transition(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_event *mhi_event;
+- enum mhi_pm_state cur_state;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- u32 interval_us = 25000; /* poll register field every 25 milliseconds */
+- int ret, i;
+-
+- /* Check if device entered error state */
+- if (MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state)) {
+- dev_err(dev, "Device link is not accessible\n");
+- return -EIO;
+- }
+-
+- /* Wait for RESET to be cleared and READY bit to be set by the device */
+- ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
+- MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 0,
+- interval_us);
+- if (ret) {
+- dev_err(dev, "Device failed to clear MHI Reset\n");
+- return ret;
+- }
+-
+- ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHISTATUS,
+- MHISTATUS_READY_MASK, MHISTATUS_READY_SHIFT, 1,
+- interval_us);
+- if (ret) {
+- dev_err(dev, "Device failed to enter MHI Ready\n");
+- return ret;
+- }
+-
+- dev_dbg(dev, "Device in READY State\n");
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_POR);
+- mhi_cntrl->dev_state = MHI_STATE_READY;
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- if (cur_state != MHI_PM_POR) {
+- dev_err(dev, "Error moving to state %s from %s\n",
+- to_mhi_pm_state_str(MHI_PM_POR),
+- to_mhi_pm_state_str(cur_state));
+- return -EIO;
+- }
+-
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
+- dev_err(dev, "Device registers not accessible\n");
+- goto error_mmio;
+- }
+-
+- /* Configure MMIO registers */
+- ret = mhi_init_mmio(mhi_cntrl);
+- if (ret) {
+- dev_err(dev, "Error configuring MMIO registers\n");
+- goto error_mmio;
+- }
+-
+- /* Add elements to all SW event rings */
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- /* Skip if this is an offload or HW event */
+- if (mhi_event->offload_ev || mhi_event->hw_ring)
+- continue;
+-
+- ring->wp = ring->base + ring->len - ring->el_size;
+- *ring->ctxt_wp = ring->iommu_base + ring->len - ring->el_size;
+- /* Update all cores */
+- smp_wmb();
+-
+- /* Ring the event ring db */
+- spin_lock_irq(&mhi_event->lock);
+- mhi_ring_er_db(mhi_event);
+- spin_unlock_irq(&mhi_event->lock);
+- }
+-
+- /* Set MHI to M0 state */
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M0);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- return 0;
+-
+-error_mmio:
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- return -EIO;
+-}
+-
+-int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl)
+-{
+- enum mhi_pm_state cur_state;
+- struct mhi_chan *mhi_chan;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- int i;
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- mhi_cntrl->dev_state = MHI_STATE_M0;
+- cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M0);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- if (unlikely(cur_state != MHI_PM_M0)) {
+- dev_err(dev, "Unable to transition to M0 state\n");
+- return -EIO;
+- }
+- mhi_cntrl->M0++;
+-
+- /* Wake up the device */
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- mhi_cntrl->wake_get(mhi_cntrl, true);
+-
+- /* Ring all event rings and CMD ring only if we're in mission mode */
+- if (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) {
+- struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
+- struct mhi_cmd *mhi_cmd =
+- &mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING];
+-
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- if (mhi_event->offload_ev)
+- continue;
+-
+- spin_lock_irq(&mhi_event->lock);
+- mhi_ring_er_db(mhi_event);
+- spin_unlock_irq(&mhi_event->lock);
+- }
+-
+- /* Only ring primary cmd ring if ring is not empty */
+- spin_lock_irq(&mhi_cmd->lock);
+- if (mhi_cmd->ring.rp != mhi_cmd->ring.wp)
+- mhi_ring_cmd_db(mhi_cntrl, mhi_cmd);
+- spin_unlock_irq(&mhi_cmd->lock);
+- }
+-
+- /* Ring channel DB registers */
+- mhi_chan = mhi_cntrl->mhi_chan;
+- for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
+- struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
+-
+- if (mhi_chan->db_cfg.reset_req) {
+- write_lock_irq(&mhi_chan->lock);
+- mhi_chan->db_cfg.db_mode = true;
+- write_unlock_irq(&mhi_chan->lock);
+- }
+-
+- read_lock_irq(&mhi_chan->lock);
+-
+- /* Only ring DB if ring is not empty */
+- if (tre_ring->base && tre_ring->wp != tre_ring->rp)
+- mhi_ring_chan_db(mhi_cntrl, mhi_chan);
+- read_unlock_irq(&mhi_chan->lock);
+- }
+-
+- mhi_cntrl->wake_put(mhi_cntrl, false);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- return 0;
+-}
+-
+-/*
+- * After receiving the MHI state change event from the device indicating the
+- * transition to M1 state, the host can transition the device to M2 state
+- * for keeping it in low power state.
+- */
+-void mhi_pm_m1_transition(struct mhi_controller *mhi_cntrl)
+-{
+- enum mhi_pm_state state;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M2);
+- if (state == MHI_PM_M2) {
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M2);
+- mhi_cntrl->dev_state = MHI_STATE_M2;
+-
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- mhi_cntrl->M2++;
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- /* If there are any pending resources, exit M2 immediately */
+- if (unlikely(atomic_read(&mhi_cntrl->pending_pkts) ||
+- atomic_read(&mhi_cntrl->dev_wake))) {
+- dev_dbg(dev,
+- "Exiting M2, pending_pkts: %d dev_wake: %d\n",
+- atomic_read(&mhi_cntrl->pending_pkts),
+- atomic_read(&mhi_cntrl->dev_wake));
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- mhi_cntrl->wake_get(mhi_cntrl, true);
+- mhi_cntrl->wake_put(mhi_cntrl, true);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+- } else {
+- mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_IDLE);
+- }
+- } else {
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- }
+-}
+-
+-/* MHI M3 completion handler */
+-int mhi_pm_m3_transition(struct mhi_controller *mhi_cntrl)
+-{
+- enum mhi_pm_state state;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- mhi_cntrl->dev_state = MHI_STATE_M3;
+- state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M3);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- if (state != MHI_PM_M3) {
+- dev_err(dev, "Unable to transition to M3 state\n");
+- return -EIO;
+- }
+-
+- mhi_cntrl->M3++;
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- return 0;
+-}
+-
+-/* Handle device Mission Mode transition */
+-static int mhi_pm_mission_mode_transition(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_event *mhi_event;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- enum mhi_ee_type ee = MHI_EE_MAX, current_ee = mhi_cntrl->ee;
+- int i, ret;
+-
+- dev_dbg(dev, "Processing Mission Mode transition\n");
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- if (MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state))
+- ee = mhi_get_exec_env(mhi_cntrl);
+-
+- if (!MHI_IN_MISSION_MODE(ee)) {
+- mhi_cntrl->pm_state = MHI_PM_LD_ERR_FATAL_DETECT;
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- wake_up_all(&mhi_cntrl->state_event);
+- return -EIO;
+- }
+- mhi_cntrl->ee = ee;
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- device_for_each_child(&mhi_cntrl->mhi_dev->dev, &current_ee,
+- mhi_destroy_device);
+- mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_EE_MISSION_MODE);
+-
+- /* Force MHI to be in M0 state before continuing */
+- ret = __mhi_device_get_sync(mhi_cntrl);
+- if (ret)
+- return ret;
+-
+- read_lock_bh(&mhi_cntrl->pm_lock);
+-
+- if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
+- ret = -EIO;
+- goto error_mission_mode;
+- }
+-
+- /* Add elements to all HW event rings */
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- if (mhi_event->offload_ev || !mhi_event->hw_ring)
+- continue;
+-
+- ring->wp = ring->base + ring->len - ring->el_size;
+- *ring->ctxt_wp = ring->iommu_base + ring->len - ring->el_size;
+- /* Update to all cores */
+- smp_wmb();
+-
+- spin_lock_irq(&mhi_event->lock);
+- if (MHI_DB_ACCESS_VALID(mhi_cntrl))
+- mhi_ring_er_db(mhi_event);
+- spin_unlock_irq(&mhi_event->lock);
+- }
+-
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- /*
+- * The MHI devices are only created when the client device switches its
+- * Execution Environment (EE) to either SBL or AMSS states
+- */
+- mhi_create_devices(mhi_cntrl);
+-
+- read_lock_bh(&mhi_cntrl->pm_lock);
+-
+-error_mission_mode:
+- mhi_cntrl->wake_put(mhi_cntrl, false);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- return ret;
+-}
+-
+-/* Handle shutdown transitions */
+-static void mhi_pm_disable_transition(struct mhi_controller *mhi_cntrl)
+-{
+- enum mhi_pm_state cur_state;
+- struct mhi_event *mhi_event;
+- struct mhi_cmd_ctxt *cmd_ctxt;
+- struct mhi_cmd *mhi_cmd;
+- struct mhi_event_ctxt *er_ctxt;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- int ret, i;
+-
+- dev_dbg(dev, "Processing disable transition with PM state: %s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state));
+-
+- mutex_lock(&mhi_cntrl->pm_mutex);
+-
+- /* Trigger MHI RESET so that the device will not access host memory */
+- if (!MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state)) {
+- dev_dbg(dev, "Triggering MHI Reset in device\n");
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
+-
+- /* Wait for the reset bit to be cleared by the device */
+- ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
+- MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 0,
+- 25000);
+- if (ret)
+- dev_err(dev, "Device failed to clear MHI Reset\n");
+-
+- /*
+- * Device will clear BHI_INTVEC as a part of RESET processing,
+- * hence re-program it
+- */
+- mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
+- }
+-
+- dev_dbg(dev,
+- "Waiting for all pending event ring processing to complete\n");
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- if (mhi_event->offload_ev)
+- continue;
+- free_irq(mhi_cntrl->irq[mhi_event->irq], mhi_event);
+- tasklet_kill(&mhi_event->task);
+- }
+-
+- /* Release lock and wait for all pending threads to complete */
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+- dev_dbg(dev, "Waiting for all pending threads to complete\n");
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- dev_dbg(dev, "Reset all active channels and remove MHI devices\n");
+- device_for_each_child(&mhi_cntrl->mhi_dev->dev, NULL, mhi_destroy_device);
+-
+- mutex_lock(&mhi_cntrl->pm_mutex);
+-
+- WARN_ON(atomic_read(&mhi_cntrl->dev_wake));
+- WARN_ON(atomic_read(&mhi_cntrl->pending_pkts));
+-
+- /* Reset the ev rings and cmd rings */
+- dev_dbg(dev, "Resetting EV CTXT and CMD CTXT\n");
+- mhi_cmd = mhi_cntrl->mhi_cmd;
+- cmd_ctxt = mhi_cntrl->mhi_ctxt->cmd_ctxt;
+- for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++, cmd_ctxt++) {
+- struct mhi_ring *ring = &mhi_cmd->ring;
+-
+- ring->rp = ring->base;
+- ring->wp = ring->base;
+- cmd_ctxt->rp = cmd_ctxt->rbase;
+- cmd_ctxt->wp = cmd_ctxt->rbase;
+- }
+-
+- mhi_event = mhi_cntrl->mhi_event;
+- er_ctxt = mhi_cntrl->mhi_ctxt->er_ctxt;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, er_ctxt++,
+- mhi_event++) {
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- /* Skip offload events */
+- if (mhi_event->offload_ev)
+- continue;
+-
+- ring->rp = ring->base;
+- ring->wp = ring->base;
+- er_ctxt->rp = er_ctxt->rbase;
+- er_ctxt->wp = er_ctxt->rbase;
+- }
+-
+- /* Move to disable state */
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_DISABLE);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- if (unlikely(cur_state != MHI_PM_DISABLE))
+- dev_err(dev, "Error moving from PM state: %s to: %s\n",
+- to_mhi_pm_state_str(cur_state),
+- to_mhi_pm_state_str(MHI_PM_DISABLE));
+-
+- dev_dbg(dev, "Exiting with PM state: %s, MHI state: %s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state),
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state));
+-
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+-}
+-
+-/* Handle system error transitions */
+-static void mhi_pm_sys_error_transition(struct mhi_controller *mhi_cntrl)
+-{
+- enum mhi_pm_state cur_state, prev_state;
+- enum dev_st_transition next_state;
+- struct mhi_event *mhi_event;
+- struct mhi_cmd_ctxt *cmd_ctxt;
+- struct mhi_cmd *mhi_cmd;
+- struct mhi_event_ctxt *er_ctxt;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- int ret, i;
+-
+- dev_dbg(dev, "Transitioning from PM state: %s to: %s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state),
+- to_mhi_pm_state_str(MHI_PM_SYS_ERR_PROCESS));
+-
+- /* We must notify MHI control driver so it can clean up first */
+- mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_SYS_ERROR);
+-
+- mutex_lock(&mhi_cntrl->pm_mutex);
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- prev_state = mhi_cntrl->pm_state;
+- cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_SYS_ERR_PROCESS);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- if (cur_state != MHI_PM_SYS_ERR_PROCESS) {
+- dev_err(dev, "Failed to transition from PM state: %s to: %s\n",
+- to_mhi_pm_state_str(cur_state),
+- to_mhi_pm_state_str(MHI_PM_SYS_ERR_PROCESS));
+- goto exit_sys_error_transition;
+- }
+-
+- mhi_cntrl->ee = MHI_EE_DISABLE_TRANSITION;
+- mhi_cntrl->dev_state = MHI_STATE_RESET;
+-
+- /* Wake up threads waiting for state transition */
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- /* Trigger MHI RESET so that the device will not access host memory */
+- if (MHI_REG_ACCESS_VALID(prev_state)) {
+- u32 in_reset = -1;
+- unsigned long timeout = msecs_to_jiffies(mhi_cntrl->timeout_ms);
+-
+- dev_dbg(dev, "Triggering MHI Reset in device\n");
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
+-
+- /* Wait for the reset bit to be cleared by the device */
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- mhi_read_reg_field(mhi_cntrl,
+- mhi_cntrl->regs,
+- MHICTRL,
+- MHICTRL_RESET_MASK,
+- MHICTRL_RESET_SHIFT,
+- &in_reset) ||
+- !in_reset, timeout);
+- if (!ret || in_reset) {
+- dev_err(dev, "Device failed to exit MHI Reset state\n");
+- goto exit_sys_error_transition;
+- }
+-
+- /*
+- * Device will clear BHI_INTVEC as a part of RESET processing,
+- * hence re-program it
+- */
+- mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
+- }
+-
+- dev_dbg(dev,
+- "Waiting for all pending event ring processing to complete\n");
+- mhi_event = mhi_cntrl->mhi_event;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
+- if (mhi_event->offload_ev)
+- continue;
+- tasklet_kill(&mhi_event->task);
+- }
+-
+- /* Release lock and wait for all pending threads to complete */
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+- dev_dbg(dev, "Waiting for all pending threads to complete\n");
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- dev_dbg(dev, "Reset all active channels and remove MHI devices\n");
+- device_for_each_child(&mhi_cntrl->mhi_dev->dev, NULL, mhi_destroy_device);
+-
+- mutex_lock(&mhi_cntrl->pm_mutex);
+-
+- WARN_ON(atomic_read(&mhi_cntrl->dev_wake));
+- WARN_ON(atomic_read(&mhi_cntrl->pending_pkts));
+-
+- /* Reset the ev rings and cmd rings */
+- dev_dbg(dev, "Resetting EV CTXT and CMD CTXT\n");
+- mhi_cmd = mhi_cntrl->mhi_cmd;
+- cmd_ctxt = mhi_cntrl->mhi_ctxt->cmd_ctxt;
+- for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++, cmd_ctxt++) {
+- struct mhi_ring *ring = &mhi_cmd->ring;
+-
+- ring->rp = ring->base;
+- ring->wp = ring->base;
+- cmd_ctxt->rp = cmd_ctxt->rbase;
+- cmd_ctxt->wp = cmd_ctxt->rbase;
+- }
+-
+- mhi_event = mhi_cntrl->mhi_event;
+- er_ctxt = mhi_cntrl->mhi_ctxt->er_ctxt;
+- for (i = 0; i < mhi_cntrl->total_ev_rings; i++, er_ctxt++,
+- mhi_event++) {
+- struct mhi_ring *ring = &mhi_event->ring;
+-
+- /* Skip offload events */
+- if (mhi_event->offload_ev)
+- continue;
+-
+- ring->rp = ring->base;
+- ring->wp = ring->base;
+- er_ctxt->rp = er_ctxt->rbase;
+- er_ctxt->wp = er_ctxt->rbase;
+- }
+-
+- /* Transition to next state */
+- if (MHI_IN_PBL(mhi_get_exec_env(mhi_cntrl))) {
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_POR);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- if (cur_state != MHI_PM_POR) {
+- dev_err(dev, "Error moving to state %s from %s\n",
+- to_mhi_pm_state_str(MHI_PM_POR),
+- to_mhi_pm_state_str(cur_state));
+- goto exit_sys_error_transition;
+- }
+- next_state = DEV_ST_TRANSITION_PBL;
+- } else {
+- next_state = DEV_ST_TRANSITION_READY;
+- }
+-
+- mhi_queue_state_transition(mhi_cntrl, next_state);
+-
+-exit_sys_error_transition:
+- dev_dbg(dev, "Exiting with PM state: %s, MHI state: %s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state),
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state));
+-
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+-}
+-
+-/* Queue a new work item and schedule work */
+-int mhi_queue_state_transition(struct mhi_controller *mhi_cntrl,
+- enum dev_st_transition state)
+-{
+- struct state_transition *item = kmalloc(sizeof(*item), GFP_ATOMIC);
+- unsigned long flags;
+-
+- if (!item)
+- return -ENOMEM;
+-
+- item->state = state;
+- spin_lock_irqsave(&mhi_cntrl->transition_lock, flags);
+- list_add_tail(&item->node, &mhi_cntrl->transition_list);
+- spin_unlock_irqrestore(&mhi_cntrl->transition_lock, flags);
+-
+- queue_work(mhi_cntrl->hiprio_wq, &mhi_cntrl->st_worker);
+-
+- return 0;
+-}
+-
+-/* SYS_ERR worker */
+-void mhi_pm_sys_err_handler(struct mhi_controller *mhi_cntrl)
+-{
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+-
+- /* skip if controller supports RDDM */
+- if (mhi_cntrl->rddm_image) {
+- dev_dbg(dev, "Controller supports RDDM, skip SYS_ERROR\n");
+- return;
+- }
+-
+- mhi_queue_state_transition(mhi_cntrl, DEV_ST_TRANSITION_SYS_ERR);
+-}
+-
+-/* Device State Transition worker */
+-void mhi_pm_st_worker(struct work_struct *work)
+-{
+- struct state_transition *itr, *tmp;
+- LIST_HEAD(head);
+- struct mhi_controller *mhi_cntrl = container_of(work,
+- struct mhi_controller,
+- st_worker);
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+-
+- spin_lock_irq(&mhi_cntrl->transition_lock);
+- list_splice_tail_init(&mhi_cntrl->transition_list, &head);
+- spin_unlock_irq(&mhi_cntrl->transition_lock);
+-
+- list_for_each_entry_safe(itr, tmp, &head, node) {
+- list_del(&itr->node);
+- dev_dbg(dev, "Handling state transition: %s\n",
+- TO_DEV_STATE_TRANS_STR(itr->state));
+-
+- switch (itr->state) {
+- case DEV_ST_TRANSITION_PBL:
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- if (MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state))
+- mhi_cntrl->ee = mhi_get_exec_env(mhi_cntrl);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- mhi_fw_load_handler(mhi_cntrl);
+- break;
+- case DEV_ST_TRANSITION_SBL:
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- mhi_cntrl->ee = MHI_EE_SBL;
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- /*
+- * The MHI devices are only created when the client
+- * device switches its Execution Environment (EE) to
+- * either SBL or AMSS states
+- */
+- mhi_create_devices(mhi_cntrl);
+- if (mhi_cntrl->fbc_download)
+- mhi_download_amss_image(mhi_cntrl);
+- break;
+- case DEV_ST_TRANSITION_MISSION_MODE:
+- mhi_pm_mission_mode_transition(mhi_cntrl);
+- break;
+- case DEV_ST_TRANSITION_FP:
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- mhi_cntrl->ee = MHI_EE_FP;
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- mhi_create_devices(mhi_cntrl);
+- break;
+- case DEV_ST_TRANSITION_READY:
+- mhi_ready_state_transition(mhi_cntrl);
+- break;
+- case DEV_ST_TRANSITION_SYS_ERR:
+- mhi_pm_sys_error_transition(mhi_cntrl);
+- break;
+- case DEV_ST_TRANSITION_DISABLE:
+- mhi_pm_disable_transition(mhi_cntrl);
+- break;
+- default:
+- break;
+- }
+- kfree(itr);
+- }
+-}
+-
+-int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_chan *itr, *tmp;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- enum mhi_pm_state new_state;
+- int ret;
+-
+- if (mhi_cntrl->pm_state == MHI_PM_DISABLE)
+- return -EINVAL;
+-
+- if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
+- return -EIO;
+-
+- /* Return busy if there are any pending resources */
+- if (atomic_read(&mhi_cntrl->dev_wake) ||
+- atomic_read(&mhi_cntrl->pending_pkts))
+- return -EBUSY;
+-
+- /* Take MHI out of M2 state */
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- mhi_cntrl->wake_get(mhi_cntrl, false);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- mhi_cntrl->dev_state == MHI_STATE_M0 ||
+- mhi_cntrl->dev_state == MHI_STATE_M1 ||
+- MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+-
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- mhi_cntrl->wake_put(mhi_cntrl, false);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
+- dev_err(dev,
+- "Could not enter M0/M1 state");
+- return -EIO;
+- }
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+-
+- if (atomic_read(&mhi_cntrl->dev_wake) ||
+- atomic_read(&mhi_cntrl->pending_pkts)) {
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- return -EBUSY;
+- }
+-
+- dev_dbg(dev, "Allowing M3 transition\n");
+- new_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M3_ENTER);
+- if (new_state != MHI_PM_M3_ENTER) {
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- dev_err(dev,
+- "Error setting to PM state: %s from: %s\n",
+- to_mhi_pm_state_str(MHI_PM_M3_ENTER),
+- to_mhi_pm_state_str(mhi_cntrl->pm_state));
+- return -EIO;
+- }
+-
+- /* Set MHI to M3 and wait for completion */
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- dev_dbg(dev, "Waiting for M3 completion\n");
+-
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- mhi_cntrl->dev_state == MHI_STATE_M3 ||
+- MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+-
+- if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
+- dev_err(dev,
+- "Did not enter M3 state, MHI state: %s, PM state: %s\n",
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state),
+- to_mhi_pm_state_str(mhi_cntrl->pm_state));
+- return -EIO;
+- }
+-
+- /* Notify clients about entering LPM */
+- list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
+- mutex_lock(&itr->mutex);
+- if (itr->mhi_dev)
+- mhi_notify(itr->mhi_dev, MHI_CB_LPM_ENTER);
+- mutex_unlock(&itr->mutex);
+- }
+-
+- return 0;
+-}
+-EXPORT_SYMBOL_GPL(mhi_pm_suspend);
+-
+-int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
+-{
+- struct mhi_chan *itr, *tmp;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- enum mhi_pm_state cur_state;
+- int ret;
+-
+- dev_dbg(dev, "Entered with PM state: %s, MHI state: %s\n",
+- to_mhi_pm_state_str(mhi_cntrl->pm_state),
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state));
+-
+- if (mhi_cntrl->pm_state == MHI_PM_DISABLE)
+- return 0;
+-
+- if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
+- return -EIO;
+-
+- if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3)
+- return -EINVAL;
+-
+- /* Notify clients about exiting LPM */
+- list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
+- mutex_lock(&itr->mutex);
+- if (itr->mhi_dev)
+- mhi_notify(itr->mhi_dev, MHI_CB_LPM_EXIT);
+- mutex_unlock(&itr->mutex);
+- }
+-
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M3_EXIT);
+- if (cur_state != MHI_PM_M3_EXIT) {
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- dev_info(dev,
+- "Error setting to PM state: %s from: %s\n",
+- to_mhi_pm_state_str(MHI_PM_M3_EXIT),
+- to_mhi_pm_state_str(mhi_cntrl->pm_state));
+- return -EIO;
+- }
+-
+- /* Set MHI to M0 and wait for completion */
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M0);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- mhi_cntrl->dev_state == MHI_STATE_M0 ||
+- mhi_cntrl->dev_state == MHI_STATE_M2 ||
+- MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+-
+- if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
+- dev_err(dev,
+- "Did not enter M0 state, MHI state: %s, PM state: %s\n",
+- TO_MHI_STATE_STR(mhi_cntrl->dev_state),
+- to_mhi_pm_state_str(mhi_cntrl->pm_state));
+- return -EIO;
+- }
+-
+- return 0;
+-}
+-EXPORT_SYMBOL_GPL(mhi_pm_resume);
+-
+-int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl)
+-{
+- int ret;
+-
+- /* Wake up the device */
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+- return -EIO;
+- }
+- mhi_cntrl->wake_get(mhi_cntrl, true);
+- if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
+- mhi_trigger_resume(mhi_cntrl);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- mhi_cntrl->pm_state == MHI_PM_M0 ||
+- MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+-
+- if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- mhi_cntrl->wake_put(mhi_cntrl, false);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+- return -EIO;
+- }
+-
+- return 0;
+-}
+-
+-/* Assert device wake db */
+-static void mhi_assert_dev_wake(struct mhi_controller *mhi_cntrl, bool force)
+-{
+- unsigned long flags;
+-
+- /*
+- * If force flag is set, then increment the wake count value and
+- * ring wake db
+- */
+- if (unlikely(force)) {
+- spin_lock_irqsave(&mhi_cntrl->wlock, flags);
+- atomic_inc(&mhi_cntrl->dev_wake);
+- if (MHI_WAKE_DB_FORCE_SET_VALID(mhi_cntrl->pm_state) &&
+- !mhi_cntrl->wake_set) {
+- mhi_write_db(mhi_cntrl, mhi_cntrl->wake_db, 1);
+- mhi_cntrl->wake_set = true;
+- }
+- spin_unlock_irqrestore(&mhi_cntrl->wlock, flags);
+- } else {
+- /*
+- * If resources are already requested, then just increment
+- * the wake count value and return
+- */
+- if (likely(atomic_add_unless(&mhi_cntrl->dev_wake, 1, 0)))
+- return;
+-
+- spin_lock_irqsave(&mhi_cntrl->wlock, flags);
+- if ((atomic_inc_return(&mhi_cntrl->dev_wake) == 1) &&
+- MHI_WAKE_DB_SET_VALID(mhi_cntrl->pm_state) &&
+- !mhi_cntrl->wake_set) {
+- mhi_write_db(mhi_cntrl, mhi_cntrl->wake_db, 1);
+- mhi_cntrl->wake_set = true;
+- }
+- spin_unlock_irqrestore(&mhi_cntrl->wlock, flags);
+- }
+-}
+-
+-/* De-assert device wake db */
+-static void mhi_deassert_dev_wake(struct mhi_controller *mhi_cntrl,
+- bool override)
+-{
+- unsigned long flags;
+-
+- /*
+- * Only continue if there is a single resource, else just decrement
+- * and return
+- */
+- if (likely(atomic_add_unless(&mhi_cntrl->dev_wake, -1, 1)))
+- return;
+-
+- spin_lock_irqsave(&mhi_cntrl->wlock, flags);
+- if ((atomic_dec_return(&mhi_cntrl->dev_wake) == 0) &&
+- MHI_WAKE_DB_CLEAR_VALID(mhi_cntrl->pm_state) && !override &&
+- mhi_cntrl->wake_set) {
+- mhi_write_db(mhi_cntrl, mhi_cntrl->wake_db, 0);
+- mhi_cntrl->wake_set = false;
+- }
+- spin_unlock_irqrestore(&mhi_cntrl->wlock, flags);
+-}
+-
+-int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
+-{
+- enum mhi_state state;
+- enum mhi_ee_type current_ee;
+- enum dev_st_transition next_state;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- u32 val;
+- int ret;
+-
+- dev_info(dev, "Requested to power ON\n");
+-
+- /* Supply default wake routines if not provided by controller driver */
+- if (!mhi_cntrl->wake_get || !mhi_cntrl->wake_put ||
+- !mhi_cntrl->wake_toggle) {
+- mhi_cntrl->wake_get = mhi_assert_dev_wake;
+- mhi_cntrl->wake_put = mhi_deassert_dev_wake;
+- mhi_cntrl->wake_toggle = (mhi_cntrl->db_access & MHI_PM_M2) ?
+- mhi_toggle_dev_wake_nop : mhi_toggle_dev_wake;
+- }
+-
+- mutex_lock(&mhi_cntrl->pm_mutex);
+- mhi_cntrl->pm_state = MHI_PM_DISABLE;
+-
+- ret = mhi_init_irq_setup(mhi_cntrl);
+- if (ret)
+- goto error_setup_irq;
+-
+- /* Setup BHI INTVEC */
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
+- mhi_cntrl->pm_state = MHI_PM_POR;
+- mhi_cntrl->ee = MHI_EE_MAX;
+- current_ee = mhi_get_exec_env(mhi_cntrl);
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+-
+- /* Confirm that the device is in valid exec env */
+- if (!MHI_IN_PBL(current_ee) && current_ee != MHI_EE_AMSS) {
+- dev_err(dev, "%s is not a valid EE for power on\n",
+- TO_MHI_EXEC_STR(current_ee));
+- ret = -EIO;
+- goto error_async_power_up;
+- }
+-
+- state = mhi_get_mhi_state(mhi_cntrl);
+- dev_dbg(dev, "Attempting power on with EE: %s, state: %s\n",
+- TO_MHI_EXEC_STR(current_ee), TO_MHI_STATE_STR(state));
+-
+- if (state == MHI_STATE_SYS_ERR) {
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state) ||
+- mhi_read_reg_field(mhi_cntrl,
+- mhi_cntrl->regs,
+- MHICTRL,
+- MHICTRL_RESET_MASK,
+- MHICTRL_RESET_SHIFT,
+- &val) ||
+- !val,
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+- if (!ret) {
+- ret = -EIO;
+- dev_info(dev, "Failed to reset MHI due to syserr state\n");
+- goto error_async_power_up;
+- }
+-
+- /*
+- * device cleares INTVEC as part of RESET processing,
+- * re-program it
+- */
+- mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
+- }
+-
+- /* Transition to next state */
+- next_state = MHI_IN_PBL(current_ee) ?
+- DEV_ST_TRANSITION_PBL : DEV_ST_TRANSITION_READY;
+-
+- mhi_queue_state_transition(mhi_cntrl, next_state);
+-
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+-
+- dev_info(dev, "Power on setup success\n");
+-
+- return 0;
+-
+-error_async_power_up:
+- mhi_deinit_free_irq(mhi_cntrl);
+-
+-error_setup_irq:
+- mhi_cntrl->pm_state = MHI_PM_DISABLE;
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(mhi_async_power_up);
+-
+-void mhi_power_down(struct mhi_controller *mhi_cntrl, bool graceful)
+-{
+- enum mhi_pm_state cur_state, transition_state;
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+-
+- mutex_lock(&mhi_cntrl->pm_mutex);
+- write_lock_irq(&mhi_cntrl->pm_lock);
+- cur_state = mhi_cntrl->pm_state;
+- if (cur_state == MHI_PM_DISABLE) {
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+- return; /* Already powered down */
+- }
+-
+- /* If it's not a graceful shutdown, force MHI to linkdown state */
+- transition_state = (graceful) ? MHI_PM_SHUTDOWN_PROCESS :
+- MHI_PM_LD_ERR_FATAL_DETECT;
+-
+- cur_state = mhi_tryset_pm_state(mhi_cntrl, transition_state);
+- if (cur_state != transition_state) {
+- dev_err(dev, "Failed to move to state: %s from: %s\n",
+- to_mhi_pm_state_str(transition_state),
+- to_mhi_pm_state_str(mhi_cntrl->pm_state));
+- /* Force link down or error fatal detected state */
+- mhi_cntrl->pm_state = MHI_PM_LD_ERR_FATAL_DETECT;
+- }
+-
+- /* mark device inactive to avoid any further host processing */
+- mhi_cntrl->ee = MHI_EE_DISABLE_TRANSITION;
+- mhi_cntrl->dev_state = MHI_STATE_RESET;
+-
+- wake_up_all(&mhi_cntrl->state_event);
+-
+- write_unlock_irq(&mhi_cntrl->pm_lock);
+- mutex_unlock(&mhi_cntrl->pm_mutex);
+-
+- mhi_queue_state_transition(mhi_cntrl, DEV_ST_TRANSITION_DISABLE);
+-
+- /* Wait for shutdown to complete */
+- flush_work(&mhi_cntrl->st_worker);
+-
+- free_irq(mhi_cntrl->irq[0], mhi_cntrl);
+-}
+-EXPORT_SYMBOL_GPL(mhi_power_down);
+-
+-int mhi_sync_power_up(struct mhi_controller *mhi_cntrl)
+-{
+- int ret = mhi_async_power_up(mhi_cntrl);
+-
+- if (ret)
+- return ret;
+-
+- wait_event_timeout(mhi_cntrl->state_event,
+- MHI_IN_MISSION_MODE(mhi_cntrl->ee) ||
+- MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+-
+- ret = (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -ETIMEDOUT;
+- if (ret)
+- mhi_power_down(mhi_cntrl, false);
+-
+- return ret;
+-}
+-EXPORT_SYMBOL(mhi_sync_power_up);
+-
+-int mhi_force_rddm_mode(struct mhi_controller *mhi_cntrl)
+-{
+- struct device *dev = &mhi_cntrl->mhi_dev->dev;
+- int ret;
+-
+- /* Check if device is already in RDDM */
+- if (mhi_cntrl->ee == MHI_EE_RDDM)
+- return 0;
+-
+- dev_dbg(dev, "Triggering SYS_ERR to force RDDM state\n");
+- mhi_set_mhi_state(mhi_cntrl, MHI_STATE_SYS_ERR);
+-
+- /* Wait for RDDM event */
+- ret = wait_event_timeout(mhi_cntrl->state_event,
+- mhi_cntrl->ee == MHI_EE_RDDM,
+- msecs_to_jiffies(mhi_cntrl->timeout_ms));
+- ret = ret ? 0 : -EIO;
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(mhi_force_rddm_mode);
+-
+-void mhi_device_get(struct mhi_device *mhi_dev)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+-
+- mhi_dev->dev_wake++;
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
+- mhi_trigger_resume(mhi_cntrl);
+-
+- mhi_cntrl->wake_get(mhi_cntrl, true);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-}
+-EXPORT_SYMBOL_GPL(mhi_device_get);
+-
+-int mhi_device_get_sync(struct mhi_device *mhi_dev)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+- int ret;
+-
+- ret = __mhi_device_get_sync(mhi_cntrl);
+- if (!ret)
+- mhi_dev->dev_wake++;
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(mhi_device_get_sync);
+-
+-void mhi_device_put(struct mhi_device *mhi_dev)
+-{
+- struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
+-
+- mhi_dev->dev_wake--;
+- read_lock_bh(&mhi_cntrl->pm_lock);
+- if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
+- mhi_trigger_resume(mhi_cntrl);
+-
+- mhi_cntrl->wake_put(mhi_cntrl, false);
+- read_unlock_bh(&mhi_cntrl->pm_lock);
+-}
+-EXPORT_SYMBOL_GPL(mhi_device_put);
+diff --git a/drivers/bus/mhi/host/Kconfig b/drivers/bus/mhi/host/Kconfig
+new file mode 100644
+index 0000000000000..da5cd0c9fc620
+--- /dev/null
++++ b/drivers/bus/mhi/host/Kconfig
+@@ -0,0 +1,31 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# MHI bus
++#
++# Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++#
++
++config MHI_BUS
++ tristate "Modem Host Interface (MHI) bus"
++ help
++ Bus driver for MHI protocol. Modem Host Interface (MHI) is a
++ communication protocol used by the host processors to control
++ and communicate with modem devices over a high speed peripheral
++ bus or shared memory.
++
++config MHI_BUS_DEBUG
++ bool "Debugfs support for the MHI bus"
++ depends on MHI_BUS && DEBUG_FS
++ help
++ Enable debugfs support for use with the MHI transport. Allows
++ reading and/or modifying some values within the MHI controller
++ for debug and test purposes.
++
++config MHI_BUS_PCI_GENERIC
++ tristate "MHI PCI controller driver"
++ depends on MHI_BUS
++ depends on PCI
++ help
++ This driver provides MHI PCI controller driver for devices such as
++ Qualcomm SDX55 based PCIe modems.
++
+diff --git a/drivers/bus/mhi/host/Makefile b/drivers/bus/mhi/host/Makefile
+new file mode 100644
+index 0000000000000..859c2f38451c6
+--- /dev/null
++++ b/drivers/bus/mhi/host/Makefile
+@@ -0,0 +1,6 @@
++obj-$(CONFIG_MHI_BUS) += mhi.o
++mhi-y := init.o main.o pm.o boot.o
++mhi-$(CONFIG_MHI_BUS_DEBUG) += debugfs.o
++
++obj-$(CONFIG_MHI_BUS_PCI_GENERIC) += mhi_pci_generic.o
++mhi_pci_generic-y += pci_generic.o
+diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
+new file mode 100644
+index 0000000000000..c9dfb1a48ad6d
+--- /dev/null
++++ b/drivers/bus/mhi/host/boot.c
+@@ -0,0 +1,541 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++ *
++ */
++
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/dma-direction.h>
++#include <linux/dma-mapping.h>
++#include <linux/firmware.h>
++#include <linux/interrupt.h>
++#include <linux/list.h>
++#include <linux/mhi.h>
++#include <linux/module.h>
++#include <linux/random.h>
++#include <linux/slab.h>
++#include <linux/wait.h>
++#include "internal.h"
++
++/* Setup RDDM vector table for RDDM transfer and program RXVEC */
++void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
++ struct image_info *img_info)
++{
++ struct mhi_buf *mhi_buf = img_info->mhi_buf;
++ struct bhi_vec_entry *bhi_vec = img_info->bhi_vec;
++ void __iomem *base = mhi_cntrl->bhie;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ u32 sequence_id;
++ unsigned int i;
++
++ for (i = 0; i < img_info->entries - 1; i++, mhi_buf++, bhi_vec++) {
++ bhi_vec->dma_addr = mhi_buf->dma_addr;
++ bhi_vec->size = mhi_buf->len;
++ }
++
++ dev_dbg(dev, "BHIe programming for RDDM\n");
++
++ mhi_write_reg(mhi_cntrl, base, BHIE_RXVECADDR_HIGH_OFFS,
++ upper_32_bits(mhi_buf->dma_addr));
++
++ mhi_write_reg(mhi_cntrl, base, BHIE_RXVECADDR_LOW_OFFS,
++ lower_32_bits(mhi_buf->dma_addr));
++
++ mhi_write_reg(mhi_cntrl, base, BHIE_RXVECSIZE_OFFS, mhi_buf->len);
++ sequence_id = MHI_RANDOM_U32_NONZERO(BHIE_RXVECSTATUS_SEQNUM_BMSK);
++
++ mhi_write_reg_field(mhi_cntrl, base, BHIE_RXVECDB_OFFS,
++ BHIE_RXVECDB_SEQNUM_BMSK, BHIE_RXVECDB_SEQNUM_SHFT,
++ sequence_id);
++
++ dev_dbg(dev, "Address: %p and len: 0x%zx sequence: %u\n",
++ &mhi_buf->dma_addr, mhi_buf->len, sequence_id);
++}
++
++/* Collect RDDM buffer during kernel panic */
++static int __mhi_download_rddm_in_panic(struct mhi_controller *mhi_cntrl)
++{
++ int ret;
++ u32 rx_status;
++ enum mhi_ee_type ee;
++ const u32 delayus = 2000;
++ u32 retry = (mhi_cntrl->timeout_ms * 1000) / delayus;
++ const u32 rddm_timeout_us = 200000;
++ int rddm_retry = rddm_timeout_us / delayus;
++ void __iomem *base = mhi_cntrl->bhie;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++
++ dev_dbg(dev, "Entered with pm_state:%s dev_state:%s ee:%s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state),
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state),
++ TO_MHI_EXEC_STR(mhi_cntrl->ee));
++
++ /*
++ * This should only be executing during a kernel panic, we expect all
++ * other cores to shutdown while we're collecting RDDM buffer. After
++ * returning from this function, we expect the device to reset.
++ *
++ * Normaly, we read/write pm_state only after grabbing the
++ * pm_lock, since we're in a panic, skipping it. Also there is no
++ * gurantee that this state change would take effect since
++ * we're setting it w/o grabbing pm_lock
++ */
++ mhi_cntrl->pm_state = MHI_PM_LD_ERR_FATAL_DETECT;
++ /* update should take the effect immediately */
++ smp_wmb();
++
++ /*
++ * Make sure device is not already in RDDM. In case the device asserts
++ * and a kernel panic follows, device will already be in RDDM.
++ * Do not trigger SYS ERR again and proceed with waiting for
++ * image download completion.
++ */
++ ee = mhi_get_exec_env(mhi_cntrl);
++ if (ee == MHI_EE_MAX)
++ goto error_exit_rddm;
++
++ if (ee != MHI_EE_RDDM) {
++ dev_dbg(dev, "Trigger device into RDDM mode using SYS ERR\n");
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_SYS_ERR);
++
++ dev_dbg(dev, "Waiting for device to enter RDDM\n");
++ while (rddm_retry--) {
++ ee = mhi_get_exec_env(mhi_cntrl);
++ if (ee == MHI_EE_RDDM)
++ break;
++
++ udelay(delayus);
++ }
++
++ if (rddm_retry <= 0) {
++ /* Hardware reset so force device to enter RDDM */
++ dev_dbg(dev,
++ "Did not enter RDDM, do a host req reset\n");
++ mhi_write_reg(mhi_cntrl, mhi_cntrl->regs,
++ MHI_SOC_RESET_REQ_OFFSET,
++ MHI_SOC_RESET_REQ);
++ udelay(delayus);
++ }
++
++ ee = mhi_get_exec_env(mhi_cntrl);
++ }
++
++ dev_dbg(dev,
++ "Waiting for RDDM image download via BHIe, current EE:%s\n",
++ TO_MHI_EXEC_STR(ee));
++
++ while (retry--) {
++ ret = mhi_read_reg_field(mhi_cntrl, base, BHIE_RXVECSTATUS_OFFS,
++ BHIE_RXVECSTATUS_STATUS_BMSK,
++ BHIE_RXVECSTATUS_STATUS_SHFT,
++ &rx_status);
++ if (ret)
++ return -EIO;
++
++ if (rx_status == BHIE_RXVECSTATUS_STATUS_XFER_COMPL)
++ return 0;
++
++ udelay(delayus);
++ }
++
++ ee = mhi_get_exec_env(mhi_cntrl);
++ ret = mhi_read_reg(mhi_cntrl, base, BHIE_RXVECSTATUS_OFFS, &rx_status);
++
++ dev_err(dev, "RXVEC_STATUS: 0x%x\n", rx_status);
++
++error_exit_rddm:
++ dev_err(dev, "RDDM transfer failed. Current EE: %s\n",
++ TO_MHI_EXEC_STR(ee));
++
++ return -EIO;
++}
++
++/* Download RDDM image from device */
++int mhi_download_rddm_image(struct mhi_controller *mhi_cntrl, bool in_panic)
++{
++ void __iomem *base = mhi_cntrl->bhie;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ u32 rx_status;
++
++ if (in_panic)
++ return __mhi_download_rddm_in_panic(mhi_cntrl);
++
++ dev_dbg(dev, "Waiting for RDDM image download via BHIe\n");
++
++ /* Wait for the image download to complete */
++ wait_event_timeout(mhi_cntrl->state_event,
++ mhi_read_reg_field(mhi_cntrl, base,
++ BHIE_RXVECSTATUS_OFFS,
++ BHIE_RXVECSTATUS_STATUS_BMSK,
++ BHIE_RXVECSTATUS_STATUS_SHFT,
++ &rx_status) || rx_status,
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++
++ return (rx_status == BHIE_RXVECSTATUS_STATUS_XFER_COMPL) ? 0 : -EIO;
++}
++EXPORT_SYMBOL_GPL(mhi_download_rddm_image);
++
++static int mhi_fw_load_bhie(struct mhi_controller *mhi_cntrl,
++ const struct mhi_buf *mhi_buf)
++{
++ void __iomem *base = mhi_cntrl->bhie;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ rwlock_t *pm_lock = &mhi_cntrl->pm_lock;
++ u32 tx_status, sequence_id;
++ int ret;
++
++ read_lock_bh(pm_lock);
++ if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
++ read_unlock_bh(pm_lock);
++ return -EIO;
++ }
++
++ sequence_id = MHI_RANDOM_U32_NONZERO(BHIE_TXVECSTATUS_SEQNUM_BMSK);
++ dev_dbg(dev, "Starting image download via BHIe. Sequence ID: %u\n",
++ sequence_id);
++ mhi_write_reg(mhi_cntrl, base, BHIE_TXVECADDR_HIGH_OFFS,
++ upper_32_bits(mhi_buf->dma_addr));
++
++ mhi_write_reg(mhi_cntrl, base, BHIE_TXVECADDR_LOW_OFFS,
++ lower_32_bits(mhi_buf->dma_addr));
++
++ mhi_write_reg(mhi_cntrl, base, BHIE_TXVECSIZE_OFFS, mhi_buf->len);
++
++ mhi_write_reg_field(mhi_cntrl, base, BHIE_TXVECDB_OFFS,
++ BHIE_TXVECDB_SEQNUM_BMSK, BHIE_TXVECDB_SEQNUM_SHFT,
++ sequence_id);
++ read_unlock_bh(pm_lock);
++
++ /* Wait for the image download to complete */
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state) ||
++ mhi_read_reg_field(mhi_cntrl, base,
++ BHIE_TXVECSTATUS_OFFS,
++ BHIE_TXVECSTATUS_STATUS_BMSK,
++ BHIE_TXVECSTATUS_STATUS_SHFT,
++ &tx_status) || tx_status,
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++ if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state) ||
++ tx_status != BHIE_TXVECSTATUS_STATUS_XFER_COMPL)
++ return -EIO;
++
++ return (!ret) ? -ETIMEDOUT : 0;
++}
++
++static int mhi_fw_load_bhi(struct mhi_controller *mhi_cntrl,
++ dma_addr_t dma_addr,
++ size_t size)
++{
++ u32 tx_status, val, session_id;
++ int i, ret;
++ void __iomem *base = mhi_cntrl->bhi;
++ rwlock_t *pm_lock = &mhi_cntrl->pm_lock;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ struct {
++ char *name;
++ u32 offset;
++ } error_reg[] = {
++ { "ERROR_CODE", BHI_ERRCODE },
++ { "ERROR_DBG1", BHI_ERRDBG1 },
++ { "ERROR_DBG2", BHI_ERRDBG2 },
++ { "ERROR_DBG3", BHI_ERRDBG3 },
++ { NULL },
++ };
++
++ read_lock_bh(pm_lock);
++ if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
++ read_unlock_bh(pm_lock);
++ goto invalid_pm_state;
++ }
++
++ session_id = MHI_RANDOM_U32_NONZERO(BHI_TXDB_SEQNUM_BMSK);
++ dev_dbg(dev, "Starting image download via BHI. Session ID: %u\n",
++ session_id);
++ mhi_write_reg(mhi_cntrl, base, BHI_STATUS, 0);
++ mhi_write_reg(mhi_cntrl, base, BHI_IMGADDR_HIGH,
++ upper_32_bits(dma_addr));
++ mhi_write_reg(mhi_cntrl, base, BHI_IMGADDR_LOW,
++ lower_32_bits(dma_addr));
++ mhi_write_reg(mhi_cntrl, base, BHI_IMGSIZE, size);
++ mhi_write_reg(mhi_cntrl, base, BHI_IMGTXDB, session_id);
++ read_unlock_bh(pm_lock);
++
++ /* Wait for the image download to complete */
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state) ||
++ mhi_read_reg_field(mhi_cntrl, base, BHI_STATUS,
++ BHI_STATUS_MASK, BHI_STATUS_SHIFT,
++ &tx_status) || tx_status,
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++ if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
++ goto invalid_pm_state;
++
++ if (tx_status == BHI_STATUS_ERROR) {
++ dev_err(dev, "Image transfer failed\n");
++ read_lock_bh(pm_lock);
++ if (MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
++ for (i = 0; error_reg[i].name; i++) {
++ ret = mhi_read_reg(mhi_cntrl, base,
++ error_reg[i].offset, &val);
++ if (ret)
++ break;
++ dev_err(dev, "Reg: %s value: 0x%x\n",
++ error_reg[i].name, val);
++ }
++ }
++ read_unlock_bh(pm_lock);
++ goto invalid_pm_state;
++ }
++
++ return (!ret) ? -ETIMEDOUT : 0;
++
++invalid_pm_state:
++
++ return -EIO;
++}
++
++void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
++ struct image_info *image_info)
++{
++ int i;
++ struct mhi_buf *mhi_buf = image_info->mhi_buf;
++
++ for (i = 0; i < image_info->entries; i++, mhi_buf++)
++ dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
++ mhi_buf->buf, mhi_buf->dma_addr);
++
++ kfree(image_info->mhi_buf);
++ kfree(image_info);
++}
++
++int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
++ struct image_info **image_info,
++ size_t alloc_size)
++{
++ size_t seg_size = mhi_cntrl->seg_len;
++ int segments = DIV_ROUND_UP(alloc_size, seg_size) + 1;
++ int i;
++ struct image_info *img_info;
++ struct mhi_buf *mhi_buf;
++
++ img_info = kzalloc(sizeof(*img_info), GFP_KERNEL);
++ if (!img_info)
++ return -ENOMEM;
++
++ /* Allocate memory for entries */
++ img_info->mhi_buf = kcalloc(segments, sizeof(*img_info->mhi_buf),
++ GFP_KERNEL);
++ if (!img_info->mhi_buf)
++ goto error_alloc_mhi_buf;
++
++ /* Allocate and populate vector table */
++ mhi_buf = img_info->mhi_buf;
++ for (i = 0; i < segments; i++, mhi_buf++) {
++ size_t vec_size = seg_size;
++
++ /* Vector table is the last entry */
++ if (i == segments - 1)
++ vec_size = sizeof(struct bhi_vec_entry) * i;
++
++ mhi_buf->len = vec_size;
++ mhi_buf->buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
++ vec_size, &mhi_buf->dma_addr,
++ GFP_KERNEL);
++ if (!mhi_buf->buf)
++ goto error_alloc_segment;
++ }
++
++ img_info->bhi_vec = img_info->mhi_buf[segments - 1].buf;
++ img_info->entries = segments;
++ *image_info = img_info;
++
++ return 0;
++
++error_alloc_segment:
++ for (--i, --mhi_buf; i >= 0; i--, mhi_buf--)
++ dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len,
++ mhi_buf->buf, mhi_buf->dma_addr);
++
++error_alloc_mhi_buf:
++ kfree(img_info);
++
++ return -ENOMEM;
++}
++
++static void mhi_firmware_copy(struct mhi_controller *mhi_cntrl,
++ const struct firmware *firmware,
++ struct image_info *img_info)
++{
++ size_t remainder = firmware->size;
++ size_t to_cpy;
++ const u8 *buf = firmware->data;
++ struct mhi_buf *mhi_buf = img_info->mhi_buf;
++ struct bhi_vec_entry *bhi_vec = img_info->bhi_vec;
++
++ while (remainder) {
++ to_cpy = min(remainder, mhi_buf->len);
++ memcpy(mhi_buf->buf, buf, to_cpy);
++ bhi_vec->dma_addr = mhi_buf->dma_addr;
++ bhi_vec->size = to_cpy;
++
++ buf += to_cpy;
++ remainder -= to_cpy;
++ bhi_vec++;
++ mhi_buf++;
++ }
++}
++
++void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl)
++{
++ const struct firmware *firmware = NULL;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ enum mhi_pm_state new_state;
++ const char *fw_name;
++ void *buf;
++ dma_addr_t dma_addr;
++ size_t size;
++ int i, ret;
++
++ if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
++ dev_err(dev, "Device MHI is not in valid state\n");
++ return;
++ }
++
++ /* save hardware info from BHI */
++ ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_SERIALNU,
++ &mhi_cntrl->serial_number);
++ if (ret)
++ dev_err(dev, "Could not capture serial number via BHI\n");
++
++ for (i = 0; i < ARRAY_SIZE(mhi_cntrl->oem_pk_hash); i++) {
++ ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_OEMPKHASH(i),
++ &mhi_cntrl->oem_pk_hash[i]);
++ if (ret) {
++ dev_err(dev, "Could not capture OEM PK HASH via BHI\n");
++ break;
++ }
++ }
++
++ /* wait for ready on pass through or any other execution environment */
++ if (mhi_cntrl->ee != MHI_EE_EDL && mhi_cntrl->ee != MHI_EE_PBL)
++ goto fw_load_ready_state;
++
++ fw_name = (mhi_cntrl->ee == MHI_EE_EDL) ?
++ mhi_cntrl->edl_image : mhi_cntrl->fw_image;
++
++ if (!fw_name || (mhi_cntrl->fbc_download && (!mhi_cntrl->sbl_size ||
++ !mhi_cntrl->seg_len))) {
++ dev_err(dev,
++ "No firmware image defined or !sbl_size || !seg_len\n");
++ goto error_fw_load;
++ }
++
++ ret = request_firmware(&firmware, fw_name, dev);
++ if (ret) {
++ dev_err(dev, "Error loading firmware: %d\n", ret);
++ goto error_fw_load;
++ }
++
++ size = (mhi_cntrl->fbc_download) ? mhi_cntrl->sbl_size : firmware->size;
++
++ /* SBL size provided is maximum size, not necessarily the image size */
++ if (size > firmware->size)
++ size = firmware->size;
++
++ buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, size, &dma_addr,
++ GFP_KERNEL);
++ if (!buf) {
++ release_firmware(firmware);
++ goto error_fw_load;
++ }
++
++ /* Download image using BHI */
++ memcpy(buf, firmware->data, size);
++ ret = mhi_fw_load_bhi(mhi_cntrl, dma_addr, size);
++ dma_free_coherent(mhi_cntrl->cntrl_dev, size, buf, dma_addr);
++
++ /* Error or in EDL mode, we're done */
++ if (ret) {
++ dev_err(dev, "MHI did not load image over BHI, ret: %d\n", ret);
++ release_firmware(firmware);
++ goto error_fw_load;
++ }
++
++ /* Wait for ready since EDL image was loaded */
++ if (fw_name == mhi_cntrl->edl_image) {
++ release_firmware(firmware);
++ goto fw_load_ready_state;
++ }
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ mhi_cntrl->dev_state = MHI_STATE_RESET;
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ /*
++ * If we're doing fbc, populate vector tables while
++ * device transitioning into MHI READY state
++ */
++ if (mhi_cntrl->fbc_download) {
++ ret = mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->fbc_image,
++ firmware->size);
++ if (ret) {
++ release_firmware(firmware);
++ goto error_fw_load;
++ }
++
++ /* Load the firmware into BHIE vec table */
++ mhi_firmware_copy(mhi_cntrl, firmware, mhi_cntrl->fbc_image);
++ }
++
++ release_firmware(firmware);
++
++fw_load_ready_state:
++ /* Transitioning into MHI RESET->READY state */
++ ret = mhi_ready_state_transition(mhi_cntrl);
++ if (ret) {
++ dev_err(dev, "MHI did not enter READY state\n");
++ goto error_ready_state;
++ }
++
++ dev_info(dev, "Wait for device to enter SBL or Mission mode\n");
++ return;
++
++error_ready_state:
++ if (mhi_cntrl->fbc_download) {
++ mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->fbc_image);
++ mhi_cntrl->fbc_image = NULL;
++ }
++
++error_fw_load:
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ new_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_FW_DL_ERR);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (new_state == MHI_PM_FW_DL_ERR)
++ wake_up_all(&mhi_cntrl->state_event);
++}
++
++int mhi_download_amss_image(struct mhi_controller *mhi_cntrl)
++{
++ struct image_info *image_info = mhi_cntrl->fbc_image;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ enum mhi_pm_state new_state;
++ int ret;
++
++ if (!image_info)
++ return -EIO;
++
++ ret = mhi_fw_load_bhie(mhi_cntrl,
++ /* Vector table is the last entry */
++ &image_info->mhi_buf[image_info->entries - 1]);
++ if (ret) {
++ dev_err(dev, "MHI did not load AMSS, ret:%d\n", ret);
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ new_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_FW_DL_ERR);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (new_state == MHI_PM_FW_DL_ERR)
++ wake_up_all(&mhi_cntrl->state_event);
++ }
++
++ return ret;
++}
+diff --git a/drivers/bus/mhi/host/debugfs.c b/drivers/bus/mhi/host/debugfs.c
+new file mode 100644
+index 0000000000000..d818586c229d2
+--- /dev/null
++++ b/drivers/bus/mhi/host/debugfs.c
+@@ -0,0 +1,413 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
++ *
++ */
++
++#include <linux/debugfs.h>
++#include <linux/device.h>
++#include <linux/interrupt.h>
++#include <linux/list.h>
++#include <linux/mhi.h>
++#include <linux/module.h>
++#include "internal.h"
++
++static int mhi_debugfs_states_show(struct seq_file *m, void *d)
++{
++ struct mhi_controller *mhi_cntrl = m->private;
++
++ /* states */
++ seq_printf(m, "PM state: %s Device: %s MHI state: %s EE: %s wake: %s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state),
++ mhi_is_active(mhi_cntrl) ? "Active" : "Inactive",
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state),
++ TO_MHI_EXEC_STR(mhi_cntrl->ee),
++ mhi_cntrl->wake_set ? "true" : "false");
++
++ /* counters */
++ seq_printf(m, "M0: %u M2: %u M3: %u", mhi_cntrl->M0, mhi_cntrl->M2,
++ mhi_cntrl->M3);
++
++ seq_printf(m, " device wake: %u pending packets: %u\n",
++ atomic_read(&mhi_cntrl->dev_wake),
++ atomic_read(&mhi_cntrl->pending_pkts));
++
++ return 0;
++}
++
++static int mhi_debugfs_events_show(struct seq_file *m, void *d)
++{
++ struct mhi_controller *mhi_cntrl = m->private;
++ struct mhi_event *mhi_event;
++ struct mhi_event_ctxt *er_ctxt;
++ int i;
++
++ if (!mhi_is_active(mhi_cntrl)) {
++ seq_puts(m, "Device not ready\n");
++ return -ENODEV;
++ }
++
++ er_ctxt = mhi_cntrl->mhi_ctxt->er_ctxt;
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings;
++ i++, er_ctxt++, mhi_event++) {
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ if (mhi_event->offload_ev) {
++ seq_printf(m, "Index: %d is an offload event ring\n",
++ i);
++ continue;
++ }
++
++ seq_printf(m, "Index: %d intmod count: %lu time: %lu",
++ i, (le32_to_cpu(er_ctxt->intmod) & EV_CTX_INTMODC_MASK) >>
++ EV_CTX_INTMODC_SHIFT,
++ (le32_to_cpu(er_ctxt->intmod) & EV_CTX_INTMODT_MASK) >>
++ EV_CTX_INTMODT_SHIFT);
++
++ seq_printf(m, " base: 0x%0llx len: 0x%llx", le64_to_cpu(er_ctxt->rbase),
++ le64_to_cpu(er_ctxt->rlen));
++
++ seq_printf(m, " rp: 0x%llx wp: 0x%llx", le64_to_cpu(er_ctxt->rp),
++ le64_to_cpu(er_ctxt->wp));
++
++ seq_printf(m, " local rp: 0x%pK db: 0x%pad\n", ring->rp,
++ &mhi_event->db_cfg.db_val);
++ }
++
++ return 0;
++}
++
++static int mhi_debugfs_channels_show(struct seq_file *m, void *d)
++{
++ struct mhi_controller *mhi_cntrl = m->private;
++ struct mhi_chan *mhi_chan;
++ struct mhi_chan_ctxt *chan_ctxt;
++ int i;
++
++ if (!mhi_is_active(mhi_cntrl)) {
++ seq_puts(m, "Device not ready\n");
++ return -ENODEV;
++ }
++
++ mhi_chan = mhi_cntrl->mhi_chan;
++ chan_ctxt = mhi_cntrl->mhi_ctxt->chan_ctxt;
++ for (i = 0; i < mhi_cntrl->max_chan; i++, chan_ctxt++, mhi_chan++) {
++ struct mhi_ring *ring = &mhi_chan->tre_ring;
++
++ if (mhi_chan->offload_ch) {
++ seq_printf(m, "%s(%u) is an offload channel\n",
++ mhi_chan->name, mhi_chan->chan);
++ continue;
++ }
++
++ if (!mhi_chan->mhi_dev)
++ continue;
++
++ seq_printf(m,
++ "%s(%u) state: 0x%lx brstmode: 0x%lx pollcfg: 0x%lx",
++ mhi_chan->name, mhi_chan->chan, (le32_to_cpu(chan_ctxt->chcfg) &
++ CHAN_CTX_CHSTATE_MASK) >> CHAN_CTX_CHSTATE_SHIFT,
++ (le32_to_cpu(chan_ctxt->chcfg) & CHAN_CTX_BRSTMODE_MASK) >>
++ CHAN_CTX_BRSTMODE_SHIFT, (le32_to_cpu(chan_ctxt->chcfg) &
++ CHAN_CTX_POLLCFG_MASK) >> CHAN_CTX_POLLCFG_SHIFT);
++
++ seq_printf(m, " type: 0x%x event ring: %u", le32_to_cpu(chan_ctxt->chtype),
++ le32_to_cpu(chan_ctxt->erindex));
++
++ seq_printf(m, " base: 0x%llx len: 0x%llx rp: 0x%llx wp: 0x%llx",
++ le64_to_cpu(chan_ctxt->rbase), le64_to_cpu(chan_ctxt->rlen),
++ le64_to_cpu(chan_ctxt->rp), le64_to_cpu(chan_ctxt->wp));
++
++ seq_printf(m, " local rp: 0x%pK local wp: 0x%pK db: 0x%pad\n",
++ ring->rp, ring->wp,
++ &mhi_chan->db_cfg.db_val);
++ }
++
++ return 0;
++}
++
++static int mhi_device_info_show(struct device *dev, void *data)
++{
++ struct mhi_device *mhi_dev;
++
++ if (dev->bus != &mhi_bus_type)
++ return 0;
++
++ mhi_dev = to_mhi_device(dev);
++
++ seq_printf((struct seq_file *)data, "%s: type: %s dev_wake: %u",
++ mhi_dev->name, mhi_dev->dev_type ? "Controller" : "Transfer",
++ mhi_dev->dev_wake);
++
++ /* for transfer device types only */
++ if (mhi_dev->dev_type == MHI_DEVICE_XFER)
++ seq_printf((struct seq_file *)data, " channels: %u(UL)/%u(DL)",
++ mhi_dev->ul_chan_id, mhi_dev->dl_chan_id);
++
++ seq_puts((struct seq_file *)data, "\n");
++
++ return 0;
++}
++
++static int mhi_debugfs_devices_show(struct seq_file *m, void *d)
++{
++ struct mhi_controller *mhi_cntrl = m->private;
++
++ if (!mhi_is_active(mhi_cntrl)) {
++ seq_puts(m, "Device not ready\n");
++ return -ENODEV;
++ }
++
++ /* Show controller and client(s) info */
++ mhi_device_info_show(&mhi_cntrl->mhi_dev->dev, m);
++ device_for_each_child(&mhi_cntrl->mhi_dev->dev, m, mhi_device_info_show);
++
++ return 0;
++}
++
++static int mhi_debugfs_regdump_show(struct seq_file *m, void *d)
++{
++ struct mhi_controller *mhi_cntrl = m->private;
++ enum mhi_state state;
++ enum mhi_ee_type ee;
++ int i, ret = -EIO;
++ u32 val;
++ void __iomem *mhi_base = mhi_cntrl->regs;
++ void __iomem *bhi_base = mhi_cntrl->bhi;
++ void __iomem *bhie_base = mhi_cntrl->bhie;
++ void __iomem *wake_db = mhi_cntrl->wake_db;
++ struct {
++ const char *name;
++ int offset;
++ void __iomem *base;
++ } regs[] = {
++ { "MHI_REGLEN", MHIREGLEN, mhi_base},
++ { "MHI_VER", MHIVER, mhi_base},
++ { "MHI_CFG", MHICFG, mhi_base},
++ { "MHI_CTRL", MHICTRL, mhi_base},
++ { "MHI_STATUS", MHISTATUS, mhi_base},
++ { "MHI_WAKE_DB", 0, wake_db},
++ { "BHI_EXECENV", BHI_EXECENV, bhi_base},
++ { "BHI_STATUS", BHI_STATUS, bhi_base},
++ { "BHI_ERRCODE", BHI_ERRCODE, bhi_base},
++ { "BHI_ERRDBG1", BHI_ERRDBG1, bhi_base},
++ { "BHI_ERRDBG2", BHI_ERRDBG2, bhi_base},
++ { "BHI_ERRDBG3", BHI_ERRDBG3, bhi_base},
++ { "BHIE_TXVEC_DB", BHIE_TXVECDB_OFFS, bhie_base},
++ { "BHIE_TXVEC_STATUS", BHIE_TXVECSTATUS_OFFS, bhie_base},
++ { "BHIE_RXVEC_DB", BHIE_RXVECDB_OFFS, bhie_base},
++ { "BHIE_RXVEC_STATUS", BHIE_RXVECSTATUS_OFFS, bhie_base},
++ { NULL },
++ };
++
++ if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state))
++ return ret;
++
++ seq_printf(m, "Host PM state: %s Device state: %s EE: %s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state),
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state),
++ TO_MHI_EXEC_STR(mhi_cntrl->ee));
++
++ state = mhi_get_mhi_state(mhi_cntrl);
++ ee = mhi_get_exec_env(mhi_cntrl);
++ seq_printf(m, "Device EE: %s state: %s\n", TO_MHI_EXEC_STR(ee),
++ TO_MHI_STATE_STR(state));
++
++ for (i = 0; regs[i].name; i++) {
++ if (!regs[i].base)
++ continue;
++ ret = mhi_read_reg(mhi_cntrl, regs[i].base, regs[i].offset,
++ &val);
++ if (ret)
++ continue;
++
++ seq_printf(m, "%s: 0x%x\n", regs[i].name, val);
++ }
++
++ return 0;
++}
++
++static int mhi_debugfs_device_wake_show(struct seq_file *m, void *d)
++{
++ struct mhi_controller *mhi_cntrl = m->private;
++ struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
++
++ if (!mhi_is_active(mhi_cntrl)) {
++ seq_puts(m, "Device not ready\n");
++ return -ENODEV;
++ }
++
++ seq_printf(m,
++ "Wake count: %d\n%s\n", mhi_dev->dev_wake,
++ "Usage: echo get/put > device_wake to vote/unvote for M0");
++
++ return 0;
++}
++
++static ssize_t mhi_debugfs_device_wake_write(struct file *file,
++ const char __user *ubuf,
++ size_t count, loff_t *ppos)
++{
++ struct seq_file *m = file->private_data;
++ struct mhi_controller *mhi_cntrl = m->private;
++ struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
++ char buf[16];
++ int ret = -EINVAL;
++
++ if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
++ return -EFAULT;
++
++ if (!strncmp(buf, "get", 3)) {
++ ret = mhi_device_get_sync(mhi_dev);
++ } else if (!strncmp(buf, "put", 3)) {
++ mhi_device_put(mhi_dev);
++ ret = 0;
++ }
++
++ return ret ? ret : count;
++}
++
++static int mhi_debugfs_timeout_ms_show(struct seq_file *m, void *d)
++{
++ struct mhi_controller *mhi_cntrl = m->private;
++
++ seq_printf(m, "%u ms\n", mhi_cntrl->timeout_ms);
++
++ return 0;
++}
++
++static ssize_t mhi_debugfs_timeout_ms_write(struct file *file,
++ const char __user *ubuf,
++ size_t count, loff_t *ppos)
++{
++ struct seq_file *m = file->private_data;
++ struct mhi_controller *mhi_cntrl = m->private;
++ u32 timeout_ms;
++
++ if (kstrtou32_from_user(ubuf, count, 0, &timeout_ms))
++ return -EINVAL;
++
++ mhi_cntrl->timeout_ms = timeout_ms;
++
++ return count;
++}
++
++static int mhi_debugfs_states_open(struct inode *inode, struct file *fp)
++{
++ return single_open(fp, mhi_debugfs_states_show, inode->i_private);
++}
++
++static int mhi_debugfs_events_open(struct inode *inode, struct file *fp)
++{
++ return single_open(fp, mhi_debugfs_events_show, inode->i_private);
++}
++
++static int mhi_debugfs_channels_open(struct inode *inode, struct file *fp)
++{
++ return single_open(fp, mhi_debugfs_channels_show, inode->i_private);
++}
++
++static int mhi_debugfs_devices_open(struct inode *inode, struct file *fp)
++{
++ return single_open(fp, mhi_debugfs_devices_show, inode->i_private);
++}
++
++static int mhi_debugfs_regdump_open(struct inode *inode, struct file *fp)
++{
++ return single_open(fp, mhi_debugfs_regdump_show, inode->i_private);
++}
++
++static int mhi_debugfs_device_wake_open(struct inode *inode, struct file *fp)
++{
++ return single_open(fp, mhi_debugfs_device_wake_show, inode->i_private);
++}
++
++static int mhi_debugfs_timeout_ms_open(struct inode *inode, struct file *fp)
++{
++ return single_open(fp, mhi_debugfs_timeout_ms_show, inode->i_private);
++}
++
++static const struct file_operations debugfs_states_fops = {
++ .open = mhi_debugfs_states_open,
++ .release = single_release,
++ .read = seq_read,
++};
++
++static const struct file_operations debugfs_events_fops = {
++ .open = mhi_debugfs_events_open,
++ .release = single_release,
++ .read = seq_read,
++};
++
++static const struct file_operations debugfs_channels_fops = {
++ .open = mhi_debugfs_channels_open,
++ .release = single_release,
++ .read = seq_read,
++};
++
++static const struct file_operations debugfs_devices_fops = {
++ .open = mhi_debugfs_devices_open,
++ .release = single_release,
++ .read = seq_read,
++};
++
++static const struct file_operations debugfs_regdump_fops = {
++ .open = mhi_debugfs_regdump_open,
++ .release = single_release,
++ .read = seq_read,
++};
++
++static const struct file_operations debugfs_device_wake_fops = {
++ .open = mhi_debugfs_device_wake_open,
++ .write = mhi_debugfs_device_wake_write,
++ .release = single_release,
++ .read = seq_read,
++};
++
++static const struct file_operations debugfs_timeout_ms_fops = {
++ .open = mhi_debugfs_timeout_ms_open,
++ .write = mhi_debugfs_timeout_ms_write,
++ .release = single_release,
++ .read = seq_read,
++};
++
++static struct dentry *mhi_debugfs_root;
++
++void mhi_create_debugfs(struct mhi_controller *mhi_cntrl)
++{
++ mhi_cntrl->debugfs_dentry =
++ debugfs_create_dir(dev_name(&mhi_cntrl->mhi_dev->dev),
++ mhi_debugfs_root);
++
++ debugfs_create_file("states", 0444, mhi_cntrl->debugfs_dentry,
++ mhi_cntrl, &debugfs_states_fops);
++ debugfs_create_file("events", 0444, mhi_cntrl->debugfs_dentry,
++ mhi_cntrl, &debugfs_events_fops);
++ debugfs_create_file("channels", 0444, mhi_cntrl->debugfs_dentry,
++ mhi_cntrl, &debugfs_channels_fops);
++ debugfs_create_file("devices", 0444, mhi_cntrl->debugfs_dentry,
++ mhi_cntrl, &debugfs_devices_fops);
++ debugfs_create_file("regdump", 0444, mhi_cntrl->debugfs_dentry,
++ mhi_cntrl, &debugfs_regdump_fops);
++ debugfs_create_file("device_wake", 0644, mhi_cntrl->debugfs_dentry,
++ mhi_cntrl, &debugfs_device_wake_fops);
++ debugfs_create_file("timeout_ms", 0644, mhi_cntrl->debugfs_dentry,
++ mhi_cntrl, &debugfs_timeout_ms_fops);
++}
++
++void mhi_destroy_debugfs(struct mhi_controller *mhi_cntrl)
++{
++ debugfs_remove_recursive(mhi_cntrl->debugfs_dentry);
++ mhi_cntrl->debugfs_dentry = NULL;
++}
++
++void mhi_debugfs_init(void)
++{
++ mhi_debugfs_root = debugfs_create_dir(mhi_bus_type.name, NULL);
++}
++
++void mhi_debugfs_exit(void)
++{
++ debugfs_remove_recursive(mhi_debugfs_root);
++}
+diff --git a/drivers/bus/mhi/host/init.c b/drivers/bus/mhi/host/init.c
+new file mode 100644
+index 0000000000000..829d4fca7ddc9
+--- /dev/null
++++ b/drivers/bus/mhi/host/init.c
+@@ -0,0 +1,1443 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++ *
++ */
++
++#include <linux/debugfs.h>
++#include <linux/device.h>
++#include <linux/dma-direction.h>
++#include <linux/dma-mapping.h>
++#include <linux/idr.h>
++#include <linux/interrupt.h>
++#include <linux/list.h>
++#include <linux/mhi.h>
++#include <linux/mod_devicetable.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/wait.h>
++#include "internal.h"
++
++static DEFINE_IDA(mhi_controller_ida);
++
++const char * const mhi_ee_str[MHI_EE_MAX] = {
++ [MHI_EE_PBL] = "PRIMARY BOOTLOADER",
++ [MHI_EE_SBL] = "SECONDARY BOOTLOADER",
++ [MHI_EE_AMSS] = "MISSION MODE",
++ [MHI_EE_RDDM] = "RAMDUMP DOWNLOAD MODE",
++ [MHI_EE_WFW] = "WLAN FIRMWARE",
++ [MHI_EE_PTHRU] = "PASS THROUGH",
++ [MHI_EE_EDL] = "EMERGENCY DOWNLOAD",
++ [MHI_EE_FP] = "FLASH PROGRAMMER",
++ [MHI_EE_DISABLE_TRANSITION] = "DISABLE",
++ [MHI_EE_NOT_SUPPORTED] = "NOT SUPPORTED",
++};
++
++const char * const dev_state_tran_str[DEV_ST_TRANSITION_MAX] = {
++ [DEV_ST_TRANSITION_PBL] = "PBL",
++ [DEV_ST_TRANSITION_READY] = "READY",
++ [DEV_ST_TRANSITION_SBL] = "SBL",
++ [DEV_ST_TRANSITION_MISSION_MODE] = "MISSION MODE",
++ [DEV_ST_TRANSITION_FP] = "FLASH PROGRAMMER",
++ [DEV_ST_TRANSITION_SYS_ERR] = "SYS ERROR",
++ [DEV_ST_TRANSITION_DISABLE] = "DISABLE",
++};
++
++const char * const mhi_state_str[MHI_STATE_MAX] = {
++ [MHI_STATE_RESET] = "RESET",
++ [MHI_STATE_READY] = "READY",
++ [MHI_STATE_M0] = "M0",
++ [MHI_STATE_M1] = "M1",
++ [MHI_STATE_M2] = "M2",
++ [MHI_STATE_M3] = "M3",
++ [MHI_STATE_M3_FAST] = "M3 FAST",
++ [MHI_STATE_BHI] = "BHI",
++ [MHI_STATE_SYS_ERR] = "SYS ERROR",
++};
++
++const char * const mhi_ch_state_type_str[MHI_CH_STATE_TYPE_MAX] = {
++ [MHI_CH_STATE_TYPE_RESET] = "RESET",
++ [MHI_CH_STATE_TYPE_STOP] = "STOP",
++ [MHI_CH_STATE_TYPE_START] = "START",
++};
++
++static const char * const mhi_pm_state_str[] = {
++ [MHI_PM_STATE_DISABLE] = "DISABLE",
++ [MHI_PM_STATE_POR] = "POWER ON RESET",
++ [MHI_PM_STATE_M0] = "M0",
++ [MHI_PM_STATE_M2] = "M2",
++ [MHI_PM_STATE_M3_ENTER] = "M?->M3",
++ [MHI_PM_STATE_M3] = "M3",
++ [MHI_PM_STATE_M3_EXIT] = "M3->M0",
++ [MHI_PM_STATE_FW_DL_ERR] = "Firmware Download Error",
++ [MHI_PM_STATE_SYS_ERR_DETECT] = "SYS ERROR Detect",
++ [MHI_PM_STATE_SYS_ERR_PROCESS] = "SYS ERROR Process",
++ [MHI_PM_STATE_SHUTDOWN_PROCESS] = "SHUTDOWN Process",
++ [MHI_PM_STATE_LD_ERR_FATAL_DETECT] = "Linkdown or Error Fatal Detect",
++};
++
++const char *to_mhi_pm_state_str(u32 state)
++{
++ int index;
++
++ if (state)
++ index = __fls(state);
++
++ if (!state || index >= ARRAY_SIZE(mhi_pm_state_str))
++ return "Invalid State";
++
++ return mhi_pm_state_str[index];
++}
++
++static ssize_t serial_number_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ struct mhi_device *mhi_dev = to_mhi_device(dev);
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++
++ return snprintf(buf, PAGE_SIZE, "Serial Number: %u\n",
++ mhi_cntrl->serial_number);
++}
++static DEVICE_ATTR_RO(serial_number);
++
++static ssize_t oem_pk_hash_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ struct mhi_device *mhi_dev = to_mhi_device(dev);
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ int i, cnt = 0;
++
++ for (i = 0; i < ARRAY_SIZE(mhi_cntrl->oem_pk_hash); i++)
++ cnt += snprintf(buf + cnt, PAGE_SIZE - cnt,
++ "OEMPKHASH[%d]: 0x%x\n", i,
++ mhi_cntrl->oem_pk_hash[i]);
++
++ return cnt;
++}
++static DEVICE_ATTR_RO(oem_pk_hash);
++
++static struct attribute *mhi_dev_attrs[] = {
++ &dev_attr_serial_number.attr,
++ &dev_attr_oem_pk_hash.attr,
++ NULL,
++};
++ATTRIBUTE_GROUPS(mhi_dev);
++
++/* MHI protocol requires the transfer ring to be aligned with ring length */
++static int mhi_alloc_aligned_ring(struct mhi_controller *mhi_cntrl,
++ struct mhi_ring *ring,
++ u64 len)
++{
++ ring->alloc_size = len + (len - 1);
++ ring->pre_aligned = dma_alloc_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
++ &ring->dma_handle, GFP_KERNEL);
++ if (!ring->pre_aligned)
++ return -ENOMEM;
++
++ ring->iommu_base = (ring->dma_handle + (len - 1)) & ~(len - 1);
++ ring->base = ring->pre_aligned + (ring->iommu_base - ring->dma_handle);
++
++ return 0;
++}
++
++void mhi_deinit_free_irq(struct mhi_controller *mhi_cntrl)
++{
++ int i;
++ struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
++
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ if (mhi_event->offload_ev)
++ continue;
++
++ free_irq(mhi_cntrl->irq[mhi_event->irq], mhi_event);
++ }
++
++ free_irq(mhi_cntrl->irq[0], mhi_cntrl);
++}
++
++int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ unsigned long irq_flags = IRQF_SHARED | IRQF_NO_SUSPEND;
++ int i, ret;
++
++ /* if controller driver has set irq_flags, use it */
++ if (mhi_cntrl->irq_flags)
++ irq_flags = mhi_cntrl->irq_flags;
++
++ /* Setup BHI_INTVEC IRQ */
++ ret = request_threaded_irq(mhi_cntrl->irq[0], mhi_intvec_handler,
++ mhi_intvec_threaded_handler,
++ irq_flags,
++ "bhi", mhi_cntrl);
++ if (ret)
++ return ret;
++
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ if (mhi_event->offload_ev)
++ continue;
++
++ if (mhi_event->irq >= mhi_cntrl->nr_irqs) {
++ dev_err(dev, "irq %d not available for event ring\n",
++ mhi_event->irq);
++ ret = -EINVAL;
++ goto error_request;
++ }
++
++ ret = request_irq(mhi_cntrl->irq[mhi_event->irq],
++ mhi_irq_handler,
++ irq_flags,
++ "mhi", mhi_event);
++ if (ret) {
++ dev_err(dev, "Error requesting irq:%d for ev:%d\n",
++ mhi_cntrl->irq[mhi_event->irq], i);
++ goto error_request;
++ }
++ }
++
++ return 0;
++
++error_request:
++ for (--i, --mhi_event; i >= 0; i--, mhi_event--) {
++ if (mhi_event->offload_ev)
++ continue;
++
++ free_irq(mhi_cntrl->irq[mhi_event->irq], mhi_event);
++ }
++ free_irq(mhi_cntrl->irq[0], mhi_cntrl);
++
++ return ret;
++}
++
++void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl)
++{
++ int i;
++ struct mhi_ctxt *mhi_ctxt = mhi_cntrl->mhi_ctxt;
++ struct mhi_cmd *mhi_cmd;
++ struct mhi_event *mhi_event;
++ struct mhi_ring *ring;
++
++ mhi_cmd = mhi_cntrl->mhi_cmd;
++ for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++) {
++ ring = &mhi_cmd->ring;
++ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
++ ring->pre_aligned, ring->dma_handle);
++ ring->base = NULL;
++ ring->iommu_base = 0;
++ }
++
++ dma_free_coherent(mhi_cntrl->cntrl_dev,
++ sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS,
++ mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr);
++
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ if (mhi_event->offload_ev)
++ continue;
++
++ ring = &mhi_event->ring;
++ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
++ ring->pre_aligned, ring->dma_handle);
++ ring->base = NULL;
++ ring->iommu_base = 0;
++ }
++
++ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) *
++ mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt,
++ mhi_ctxt->er_ctxt_addr);
++
++ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) *
++ mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt,
++ mhi_ctxt->chan_ctxt_addr);
++
++ kfree(mhi_ctxt);
++ mhi_cntrl->mhi_ctxt = NULL;
++}
++
++int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_ctxt *mhi_ctxt;
++ struct mhi_chan_ctxt *chan_ctxt;
++ struct mhi_event_ctxt *er_ctxt;
++ struct mhi_cmd_ctxt *cmd_ctxt;
++ struct mhi_chan *mhi_chan;
++ struct mhi_event *mhi_event;
++ struct mhi_cmd *mhi_cmd;
++ u32 tmp;
++ int ret = -ENOMEM, i;
++
++ atomic_set(&mhi_cntrl->dev_wake, 0);
++ atomic_set(&mhi_cntrl->pending_pkts, 0);
++
++ mhi_ctxt = kzalloc(sizeof(*mhi_ctxt), GFP_KERNEL);
++ if (!mhi_ctxt)
++ return -ENOMEM;
++
++ /* Setup channel ctxt */
++ mhi_ctxt->chan_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
++ sizeof(*mhi_ctxt->chan_ctxt) *
++ mhi_cntrl->max_chan,
++ &mhi_ctxt->chan_ctxt_addr,
++ GFP_KERNEL);
++ if (!mhi_ctxt->chan_ctxt)
++ goto error_alloc_chan_ctxt;
++
++ mhi_chan = mhi_cntrl->mhi_chan;
++ chan_ctxt = mhi_ctxt->chan_ctxt;
++ for (i = 0; i < mhi_cntrl->max_chan; i++, chan_ctxt++, mhi_chan++) {
++ /* Skip if it is an offload channel */
++ if (mhi_chan->offload_ch)
++ continue;
++
++ tmp = le32_to_cpu(chan_ctxt->chcfg);
++ tmp &= ~CHAN_CTX_CHSTATE_MASK;
++ tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT);
++ tmp &= ~CHAN_CTX_BRSTMODE_MASK;
++ tmp |= (mhi_chan->db_cfg.brstmode << CHAN_CTX_BRSTMODE_SHIFT);
++ tmp &= ~CHAN_CTX_POLLCFG_MASK;
++ tmp |= (mhi_chan->db_cfg.pollcfg << CHAN_CTX_POLLCFG_SHIFT);
++ chan_ctxt->chcfg = cpu_to_le32(tmp);
++
++ chan_ctxt->chtype = cpu_to_le32(mhi_chan->type);
++ chan_ctxt->erindex = cpu_to_le32(mhi_chan->er_index);
++
++ mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
++ mhi_chan->tre_ring.db_addr = (void __iomem *)&chan_ctxt->wp;
++ }
++
++ /* Setup event context */
++ mhi_ctxt->er_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
++ sizeof(*mhi_ctxt->er_ctxt) *
++ mhi_cntrl->total_ev_rings,
++ &mhi_ctxt->er_ctxt_addr,
++ GFP_KERNEL);
++ if (!mhi_ctxt->er_ctxt)
++ goto error_alloc_er_ctxt;
++
++ er_ctxt = mhi_ctxt->er_ctxt;
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, er_ctxt++,
++ mhi_event++) {
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ /* Skip if it is an offload event */
++ if (mhi_event->offload_ev)
++ continue;
++
++ tmp = le32_to_cpu(er_ctxt->intmod);
++ tmp &= ~EV_CTX_INTMODC_MASK;
++ tmp &= ~EV_CTX_INTMODT_MASK;
++ tmp |= (mhi_event->intmod << EV_CTX_INTMODT_SHIFT);
++ er_ctxt->intmod = cpu_to_le32(tmp);
++
++ er_ctxt->ertype = cpu_to_le32(MHI_ER_TYPE_VALID);
++ er_ctxt->msivec = cpu_to_le32(mhi_event->irq);
++ mhi_event->db_cfg.db_mode = true;
++
++ ring->el_size = sizeof(struct mhi_tre);
++ ring->len = ring->el_size * ring->elements;
++ ret = mhi_alloc_aligned_ring(mhi_cntrl, ring, ring->len);
++ if (ret)
++ goto error_alloc_er;
++
++ /*
++ * If the read pointer equals to the write pointer, then the
++ * ring is empty
++ */
++ ring->rp = ring->wp = ring->base;
++ er_ctxt->rbase = cpu_to_le64(ring->iommu_base);
++ er_ctxt->rp = er_ctxt->wp = er_ctxt->rbase;
++ er_ctxt->rlen = cpu_to_le64(ring->len);
++ ring->ctxt_wp = &er_ctxt->wp;
++ }
++
++ /* Setup cmd context */
++ ret = -ENOMEM;
++ mhi_ctxt->cmd_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev,
++ sizeof(*mhi_ctxt->cmd_ctxt) *
++ NR_OF_CMD_RINGS,
++ &mhi_ctxt->cmd_ctxt_addr,
++ GFP_KERNEL);
++ if (!mhi_ctxt->cmd_ctxt)
++ goto error_alloc_er;
++
++ mhi_cmd = mhi_cntrl->mhi_cmd;
++ cmd_ctxt = mhi_ctxt->cmd_ctxt;
++ for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++, cmd_ctxt++) {
++ struct mhi_ring *ring = &mhi_cmd->ring;
++
++ ring->el_size = sizeof(struct mhi_tre);
++ ring->elements = CMD_EL_PER_RING;
++ ring->len = ring->el_size * ring->elements;
++ ret = mhi_alloc_aligned_ring(mhi_cntrl, ring, ring->len);
++ if (ret)
++ goto error_alloc_cmd;
++
++ ring->rp = ring->wp = ring->base;
++ cmd_ctxt->rbase = cpu_to_le64(ring->iommu_base);
++ cmd_ctxt->rp = cmd_ctxt->wp = cmd_ctxt->rbase;
++ cmd_ctxt->rlen = cpu_to_le64(ring->len);
++ ring->ctxt_wp = &cmd_ctxt->wp;
++ }
++
++ mhi_cntrl->mhi_ctxt = mhi_ctxt;
++
++ return 0;
++
++error_alloc_cmd:
++ for (--i, --mhi_cmd; i >= 0; i--, mhi_cmd--) {
++ struct mhi_ring *ring = &mhi_cmd->ring;
++
++ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
++ ring->pre_aligned, ring->dma_handle);
++ }
++ dma_free_coherent(mhi_cntrl->cntrl_dev,
++ sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS,
++ mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr);
++ i = mhi_cntrl->total_ev_rings;
++ mhi_event = mhi_cntrl->mhi_event + i;
++
++error_alloc_er:
++ for (--i, --mhi_event; i >= 0; i--, mhi_event--) {
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ if (mhi_event->offload_ev)
++ continue;
++
++ dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size,
++ ring->pre_aligned, ring->dma_handle);
++ }
++ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) *
++ mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt,
++ mhi_ctxt->er_ctxt_addr);
++
++error_alloc_er_ctxt:
++ dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) *
++ mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt,
++ mhi_ctxt->chan_ctxt_addr);
++
++error_alloc_chan_ctxt:
++ kfree(mhi_ctxt);
++
++ return ret;
++}
++
++int mhi_init_mmio(struct mhi_controller *mhi_cntrl)
++{
++ u32 val;
++ int i, ret;
++ struct mhi_chan *mhi_chan;
++ struct mhi_event *mhi_event;
++ void __iomem *base = mhi_cntrl->regs;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ struct {
++ u32 offset;
++ u32 mask;
++ u32 shift;
++ u32 val;
++ } reg_info[] = {
++ {
++ CCABAP_HIGHER, U32_MAX, 0,
++ upper_32_bits(mhi_cntrl->mhi_ctxt->chan_ctxt_addr),
++ },
++ {
++ CCABAP_LOWER, U32_MAX, 0,
++ lower_32_bits(mhi_cntrl->mhi_ctxt->chan_ctxt_addr),
++ },
++ {
++ ECABAP_HIGHER, U32_MAX, 0,
++ upper_32_bits(mhi_cntrl->mhi_ctxt->er_ctxt_addr),
++ },
++ {
++ ECABAP_LOWER, U32_MAX, 0,
++ lower_32_bits(mhi_cntrl->mhi_ctxt->er_ctxt_addr),
++ },
++ {
++ CRCBAP_HIGHER, U32_MAX, 0,
++ upper_32_bits(mhi_cntrl->mhi_ctxt->cmd_ctxt_addr),
++ },
++ {
++ CRCBAP_LOWER, U32_MAX, 0,
++ lower_32_bits(mhi_cntrl->mhi_ctxt->cmd_ctxt_addr),
++ },
++ {
++ MHICFG, MHICFG_NER_MASK, MHICFG_NER_SHIFT,
++ mhi_cntrl->total_ev_rings,
++ },
++ {
++ MHICFG, MHICFG_NHWER_MASK, MHICFG_NHWER_SHIFT,
++ mhi_cntrl->hw_ev_rings,
++ },
++ {
++ MHICTRLBASE_HIGHER, U32_MAX, 0,
++ upper_32_bits(mhi_cntrl->iova_start),
++ },
++ {
++ MHICTRLBASE_LOWER, U32_MAX, 0,
++ lower_32_bits(mhi_cntrl->iova_start),
++ },
++ {
++ MHIDATABASE_HIGHER, U32_MAX, 0,
++ upper_32_bits(mhi_cntrl->iova_start),
++ },
++ {
++ MHIDATABASE_LOWER, U32_MAX, 0,
++ lower_32_bits(mhi_cntrl->iova_start),
++ },
++ {
++ MHICTRLLIMIT_HIGHER, U32_MAX, 0,
++ upper_32_bits(mhi_cntrl->iova_stop),
++ },
++ {
++ MHICTRLLIMIT_LOWER, U32_MAX, 0,
++ lower_32_bits(mhi_cntrl->iova_stop),
++ },
++ {
++ MHIDATALIMIT_HIGHER, U32_MAX, 0,
++ upper_32_bits(mhi_cntrl->iova_stop),
++ },
++ {
++ MHIDATALIMIT_LOWER, U32_MAX, 0,
++ lower_32_bits(mhi_cntrl->iova_stop),
++ },
++ { 0, 0, 0 }
++ };
++
++ dev_dbg(dev, "Initializing MHI registers\n");
++
++ /* Read channel db offset */
++ ret = mhi_read_reg_field(mhi_cntrl, base, CHDBOFF, CHDBOFF_CHDBOFF_MASK,
++ CHDBOFF_CHDBOFF_SHIFT, &val);
++ if (ret) {
++ dev_err(dev, "Unable to read CHDBOFF register\n");
++ return -EIO;
++ }
++
++ if (val >= mhi_cntrl->reg_len - (8 * MHI_DEV_WAKE_DB)) {
++ dev_err(dev, "CHDB offset: 0x%x is out of range: 0x%zx\n",
++ val, mhi_cntrl->reg_len - (8 * MHI_DEV_WAKE_DB));
++ return -ERANGE;
++ }
++
++ /* Setup wake db */
++ mhi_cntrl->wake_db = base + val + (8 * MHI_DEV_WAKE_DB);
++ mhi_cntrl->wake_set = false;
++
++ /* Setup channel db address for each channel in tre_ring */
++ mhi_chan = mhi_cntrl->mhi_chan;
++ for (i = 0; i < mhi_cntrl->max_chan; i++, val += 8, mhi_chan++)
++ mhi_chan->tre_ring.db_addr = base + val;
++
++ /* Read event ring db offset */
++ ret = mhi_read_reg_field(mhi_cntrl, base, ERDBOFF, ERDBOFF_ERDBOFF_MASK,
++ ERDBOFF_ERDBOFF_SHIFT, &val);
++ if (ret) {
++ dev_err(dev, "Unable to read ERDBOFF register\n");
++ return -EIO;
++ }
++
++ if (val >= mhi_cntrl->reg_len - (8 * mhi_cntrl->total_ev_rings)) {
++ dev_err(dev, "ERDB offset: 0x%x is out of range: 0x%zx\n",
++ val, mhi_cntrl->reg_len - (8 * mhi_cntrl->total_ev_rings));
++ return -ERANGE;
++ }
++
++ /* Setup event db address for each ev_ring */
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, val += 8, mhi_event++) {
++ if (mhi_event->offload_ev)
++ continue;
++
++ mhi_event->ring.db_addr = base + val;
++ }
++
++ /* Setup DB register for primary CMD rings */
++ mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING].ring.db_addr = base + CRDB_LOWER;
++
++ /* Write to MMIO registers */
++ for (i = 0; reg_info[i].offset; i++)
++ mhi_write_reg_field(mhi_cntrl, base, reg_info[i].offset,
++ reg_info[i].mask, reg_info[i].shift,
++ reg_info[i].val);
++
++ return 0;
++}
++
++void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan)
++{
++ struct mhi_ring *buf_ring;
++ struct mhi_ring *tre_ring;
++ struct mhi_chan_ctxt *chan_ctxt;
++ u32 tmp;
++
++ buf_ring = &mhi_chan->buf_ring;
++ tre_ring = &mhi_chan->tre_ring;
++ chan_ctxt = &mhi_cntrl->mhi_ctxt->chan_ctxt[mhi_chan->chan];
++
++ if (!chan_ctxt->rbase) /* Already uninitialized */
++ return;
++
++ dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size,
++ tre_ring->pre_aligned, tre_ring->dma_handle);
++ vfree(buf_ring->base);
++
++ buf_ring->base = tre_ring->base = NULL;
++ tre_ring->ctxt_wp = NULL;
++ chan_ctxt->rbase = 0;
++ chan_ctxt->rlen = 0;
++ chan_ctxt->rp = 0;
++ chan_ctxt->wp = 0;
++
++ tmp = le32_to_cpu(chan_ctxt->chcfg);
++ tmp &= ~CHAN_CTX_CHSTATE_MASK;
++ tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT);
++ chan_ctxt->chcfg = cpu_to_le32(tmp);
++
++ /* Update to all cores */
++ smp_wmb();
++}
++
++int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan)
++{
++ struct mhi_ring *buf_ring;
++ struct mhi_ring *tre_ring;
++ struct mhi_chan_ctxt *chan_ctxt;
++ u32 tmp;
++ int ret;
++
++ buf_ring = &mhi_chan->buf_ring;
++ tre_ring = &mhi_chan->tre_ring;
++ tre_ring->el_size = sizeof(struct mhi_tre);
++ tre_ring->len = tre_ring->el_size * tre_ring->elements;
++ chan_ctxt = &mhi_cntrl->mhi_ctxt->chan_ctxt[mhi_chan->chan];
++ ret = mhi_alloc_aligned_ring(mhi_cntrl, tre_ring, tre_ring->len);
++ if (ret)
++ return -ENOMEM;
++
++ buf_ring->el_size = sizeof(struct mhi_buf_info);
++ buf_ring->len = buf_ring->el_size * buf_ring->elements;
++ buf_ring->base = vzalloc(buf_ring->len);
++
++ if (!buf_ring->base) {
++ dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size,
++ tre_ring->pre_aligned, tre_ring->dma_handle);
++ return -ENOMEM;
++ }
++
++ tmp = le32_to_cpu(chan_ctxt->chcfg);
++ tmp &= ~CHAN_CTX_CHSTATE_MASK;
++ tmp |= (MHI_CH_STATE_ENABLED << CHAN_CTX_CHSTATE_SHIFT);
++ chan_ctxt->chcfg = cpu_to_le32(tmp);
++
++ chan_ctxt->rbase = cpu_to_le64(tre_ring->iommu_base);
++ chan_ctxt->rp = chan_ctxt->wp = chan_ctxt->rbase;
++ chan_ctxt->rlen = cpu_to_le64(tre_ring->len);
++ tre_ring->ctxt_wp = &chan_ctxt->wp;
++
++ tre_ring->rp = tre_ring->wp = tre_ring->base;
++ buf_ring->rp = buf_ring->wp = buf_ring->base;
++ mhi_chan->db_cfg.db_mode = 1;
++
++ /* Update to all cores */
++ smp_wmb();
++
++ return 0;
++}
++
++static int parse_ev_cfg(struct mhi_controller *mhi_cntrl,
++ const struct mhi_controller_config *config)
++{
++ struct mhi_event *mhi_event;
++ const struct mhi_event_config *event_cfg;
++ struct device *dev = mhi_cntrl->cntrl_dev;
++ int i, num;
++
++ num = config->num_events;
++ mhi_cntrl->total_ev_rings = num;
++ mhi_cntrl->mhi_event = kcalloc(num, sizeof(*mhi_cntrl->mhi_event),
++ GFP_KERNEL);
++ if (!mhi_cntrl->mhi_event)
++ return -ENOMEM;
++
++ /* Populate event ring */
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < num; i++) {
++ event_cfg = &config->event_cfg[i];
++
++ mhi_event->er_index = i;
++ mhi_event->ring.elements = event_cfg->num_elements;
++ mhi_event->intmod = event_cfg->irq_moderation_ms;
++ mhi_event->irq = event_cfg->irq;
++
++ if (event_cfg->channel != U32_MAX) {
++ /* This event ring has a dedicated channel */
++ mhi_event->chan = event_cfg->channel;
++ if (mhi_event->chan >= mhi_cntrl->max_chan) {
++ dev_err(dev,
++ "Event Ring channel not available\n");
++ goto error_ev_cfg;
++ }
++
++ mhi_event->mhi_chan =
++ &mhi_cntrl->mhi_chan[mhi_event->chan];
++ }
++
++ /* Priority is fixed to 1 for now */
++ mhi_event->priority = 1;
++
++ mhi_event->db_cfg.brstmode = event_cfg->mode;
++ if (MHI_INVALID_BRSTMODE(mhi_event->db_cfg.brstmode))
++ goto error_ev_cfg;
++
++ if (mhi_event->db_cfg.brstmode == MHI_DB_BRST_ENABLE)
++ mhi_event->db_cfg.process_db = mhi_db_brstmode;
++ else
++ mhi_event->db_cfg.process_db = mhi_db_brstmode_disable;
++
++ mhi_event->data_type = event_cfg->data_type;
++
++ switch (mhi_event->data_type) {
++ case MHI_ER_DATA:
++ mhi_event->process_event = mhi_process_data_event_ring;
++ break;
++ case MHI_ER_CTRL:
++ mhi_event->process_event = mhi_process_ctrl_ev_ring;
++ break;
++ default:
++ dev_err(dev, "Event Ring type not supported\n");
++ goto error_ev_cfg;
++ }
++
++ mhi_event->hw_ring = event_cfg->hardware_event;
++ if (mhi_event->hw_ring)
++ mhi_cntrl->hw_ev_rings++;
++ else
++ mhi_cntrl->sw_ev_rings++;
++
++ mhi_event->cl_manage = event_cfg->client_managed;
++ mhi_event->offload_ev = event_cfg->offload_channel;
++ mhi_event++;
++ }
++
++ return 0;
++
++error_ev_cfg:
++
++ kfree(mhi_cntrl->mhi_event);
++ return -EINVAL;
++}
++
++static int parse_ch_cfg(struct mhi_controller *mhi_cntrl,
++ const struct mhi_controller_config *config)
++{
++ const struct mhi_channel_config *ch_cfg;
++ struct device *dev = mhi_cntrl->cntrl_dev;
++ int i;
++ u32 chan;
++
++ mhi_cntrl->max_chan = config->max_channels;
++
++ /*
++ * The allocation of MHI channels can exceed 32KB in some scenarios,
++ * so to avoid any memory possible allocation failures, vzalloc is
++ * used here
++ */
++ mhi_cntrl->mhi_chan = vzalloc(mhi_cntrl->max_chan *
++ sizeof(*mhi_cntrl->mhi_chan));
++ if (!mhi_cntrl->mhi_chan)
++ return -ENOMEM;
++
++ INIT_LIST_HEAD(&mhi_cntrl->lpm_chans);
++
++ /* Populate channel configurations */
++ for (i = 0; i < config->num_channels; i++) {
++ struct mhi_chan *mhi_chan;
++
++ ch_cfg = &config->ch_cfg[i];
++
++ chan = ch_cfg->num;
++ if (chan >= mhi_cntrl->max_chan) {
++ dev_err(dev, "Channel %d not available\n", chan);
++ goto error_chan_cfg;
++ }
++
++ mhi_chan = &mhi_cntrl->mhi_chan[chan];
++ mhi_chan->name = ch_cfg->name;
++ mhi_chan->chan = chan;
++
++ mhi_chan->tre_ring.elements = ch_cfg->num_elements;
++ if (!mhi_chan->tre_ring.elements)
++ goto error_chan_cfg;
++
++ /*
++ * For some channels, local ring length should be bigger than
++ * the transfer ring length due to internal logical channels
++ * in device. So host can queue much more buffers than transfer
++ * ring length. Example, RSC channels should have a larger local
++ * channel length than transfer ring length.
++ */
++ mhi_chan->buf_ring.elements = ch_cfg->local_elements;
++ if (!mhi_chan->buf_ring.elements)
++ mhi_chan->buf_ring.elements = mhi_chan->tre_ring.elements;
++ mhi_chan->er_index = ch_cfg->event_ring;
++ mhi_chan->dir = ch_cfg->dir;
++
++ /*
++ * For most channels, chtype is identical to channel directions.
++ * So, if it is not defined then assign channel direction to
++ * chtype
++ */
++ mhi_chan->type = ch_cfg->type;
++ if (!mhi_chan->type)
++ mhi_chan->type = (enum mhi_ch_type)mhi_chan->dir;
++
++ mhi_chan->ee_mask = ch_cfg->ee_mask;
++ mhi_chan->db_cfg.pollcfg = ch_cfg->pollcfg;
++ mhi_chan->lpm_notify = ch_cfg->lpm_notify;
++ mhi_chan->offload_ch = ch_cfg->offload_channel;
++ mhi_chan->db_cfg.reset_req = ch_cfg->doorbell_mode_switch;
++ mhi_chan->pre_alloc = ch_cfg->auto_queue;
++ mhi_chan->wake_capable = ch_cfg->wake_capable;
++
++ /*
++ * If MHI host allocates buffers, then the channel direction
++ * should be DMA_FROM_DEVICE
++ */
++ if (mhi_chan->pre_alloc && mhi_chan->dir != DMA_FROM_DEVICE) {
++ dev_err(dev, "Invalid channel configuration\n");
++ goto error_chan_cfg;
++ }
++
++ /*
++ * Bi-directional and direction less channel must be an
++ * offload channel
++ */
++ if ((mhi_chan->dir == DMA_BIDIRECTIONAL ||
++ mhi_chan->dir == DMA_NONE) && !mhi_chan->offload_ch) {
++ dev_err(dev, "Invalid channel configuration\n");
++ goto error_chan_cfg;
++ }
++
++ if (!mhi_chan->offload_ch) {
++ mhi_chan->db_cfg.brstmode = ch_cfg->doorbell;
++ if (MHI_INVALID_BRSTMODE(mhi_chan->db_cfg.brstmode)) {
++ dev_err(dev, "Invalid Door bell mode\n");
++ goto error_chan_cfg;
++ }
++ }
++
++ if (mhi_chan->db_cfg.brstmode == MHI_DB_BRST_ENABLE)
++ mhi_chan->db_cfg.process_db = mhi_db_brstmode;
++ else
++ mhi_chan->db_cfg.process_db = mhi_db_brstmode_disable;
++
++ mhi_chan->configured = true;
++
++ if (mhi_chan->lpm_notify)
++ list_add_tail(&mhi_chan->node, &mhi_cntrl->lpm_chans);
++ }
++
++ return 0;
++
++error_chan_cfg:
++ vfree(mhi_cntrl->mhi_chan);
++
++ return -EINVAL;
++}
++
++static int parse_config(struct mhi_controller *mhi_cntrl,
++ const struct mhi_controller_config *config)
++{
++ int ret;
++
++ /* Parse MHI channel configuration */
++ ret = parse_ch_cfg(mhi_cntrl, config);
++ if (ret)
++ return ret;
++
++ /* Parse MHI event configuration */
++ ret = parse_ev_cfg(mhi_cntrl, config);
++ if (ret)
++ goto error_ev_cfg;
++
++ mhi_cntrl->timeout_ms = config->timeout_ms;
++ if (!mhi_cntrl->timeout_ms)
++ mhi_cntrl->timeout_ms = MHI_TIMEOUT_MS;
++
++ mhi_cntrl->bounce_buf = config->use_bounce_buf;
++ mhi_cntrl->buffer_len = config->buf_len;
++ if (!mhi_cntrl->buffer_len)
++ mhi_cntrl->buffer_len = MHI_MAX_MTU;
++
++ /* By default, host is allowed to ring DB in both M0 and M2 states */
++ mhi_cntrl->db_access = MHI_PM_M0 | MHI_PM_M2;
++ if (config->m2_no_db)
++ mhi_cntrl->db_access &= ~MHI_PM_M2;
++
++ return 0;
++
++error_ev_cfg:
++ vfree(mhi_cntrl->mhi_chan);
++
++ return ret;
++}
++
++int mhi_register_controller(struct mhi_controller *mhi_cntrl,
++ const struct mhi_controller_config *config)
++{
++ struct mhi_event *mhi_event;
++ struct mhi_chan *mhi_chan;
++ struct mhi_cmd *mhi_cmd;
++ struct mhi_device *mhi_dev;
++ u32 soc_info;
++ int ret, i;
++
++ if (!mhi_cntrl || !mhi_cntrl->cntrl_dev || !mhi_cntrl->regs ||
++ !mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put ||
++ !mhi_cntrl->status_cb || !mhi_cntrl->read_reg ||
++ !mhi_cntrl->write_reg || !mhi_cntrl->nr_irqs ||
++ !mhi_cntrl->irq || !mhi_cntrl->reg_len)
++ return -EINVAL;
++
++ ret = parse_config(mhi_cntrl, config);
++ if (ret)
++ return -EINVAL;
++
++ mhi_cntrl->mhi_cmd = kcalloc(NR_OF_CMD_RINGS,
++ sizeof(*mhi_cntrl->mhi_cmd), GFP_KERNEL);
++ if (!mhi_cntrl->mhi_cmd) {
++ ret = -ENOMEM;
++ goto err_free_event;
++ }
++
++ INIT_LIST_HEAD(&mhi_cntrl->transition_list);
++ mutex_init(&mhi_cntrl->pm_mutex);
++ rwlock_init(&mhi_cntrl->pm_lock);
++ spin_lock_init(&mhi_cntrl->transition_lock);
++ spin_lock_init(&mhi_cntrl->wlock);
++ INIT_WORK(&mhi_cntrl->st_worker, mhi_pm_st_worker);
++ init_waitqueue_head(&mhi_cntrl->state_event);
++
++ mhi_cntrl->hiprio_wq = alloc_ordered_workqueue("mhi_hiprio_wq", WQ_HIGHPRI);
++ if (!mhi_cntrl->hiprio_wq) {
++ dev_err(mhi_cntrl->cntrl_dev, "Failed to allocate workqueue\n");
++ ret = -ENOMEM;
++ goto err_free_cmd;
++ }
++
++ mhi_cmd = mhi_cntrl->mhi_cmd;
++ for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++)
++ spin_lock_init(&mhi_cmd->lock);
++
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ /* Skip for offload events */
++ if (mhi_event->offload_ev)
++ continue;
++
++ mhi_event->mhi_cntrl = mhi_cntrl;
++ spin_lock_init(&mhi_event->lock);
++ if (mhi_event->data_type == MHI_ER_CTRL)
++ tasklet_init(&mhi_event->task, mhi_ctrl_ev_task,
++ (ulong)mhi_event);
++ else
++ tasklet_init(&mhi_event->task, mhi_ev_task,
++ (ulong)mhi_event);
++ }
++
++ mhi_chan = mhi_cntrl->mhi_chan;
++ for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
++ mutex_init(&mhi_chan->mutex);
++ init_completion(&mhi_chan->completion);
++ rwlock_init(&mhi_chan->lock);
++
++ /* used in setting bei field of TRE */
++ mhi_event = &mhi_cntrl->mhi_event[mhi_chan->er_index];
++ mhi_chan->intmod = mhi_event->intmod;
++ }
++
++ if (mhi_cntrl->bounce_buf) {
++ mhi_cntrl->map_single = mhi_map_single_use_bb;
++ mhi_cntrl->unmap_single = mhi_unmap_single_use_bb;
++ } else {
++ mhi_cntrl->map_single = mhi_map_single_no_bb;
++ mhi_cntrl->unmap_single = mhi_unmap_single_no_bb;
++ }
++
++ /* Read the MHI device info */
++ ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs,
++ SOC_HW_VERSION_OFFS, &soc_info);
++ if (ret)
++ goto err_destroy_wq;
++
++ mhi_cntrl->family_number = (soc_info & SOC_HW_VERSION_FAM_NUM_BMSK) >>
++ SOC_HW_VERSION_FAM_NUM_SHFT;
++ mhi_cntrl->device_number = (soc_info & SOC_HW_VERSION_DEV_NUM_BMSK) >>
++ SOC_HW_VERSION_DEV_NUM_SHFT;
++ mhi_cntrl->major_version = (soc_info & SOC_HW_VERSION_MAJOR_VER_BMSK) >>
++ SOC_HW_VERSION_MAJOR_VER_SHFT;
++ mhi_cntrl->minor_version = (soc_info & SOC_HW_VERSION_MINOR_VER_BMSK) >>
++ SOC_HW_VERSION_MINOR_VER_SHFT;
++
++ mhi_cntrl->index = ida_alloc(&mhi_controller_ida, GFP_KERNEL);
++ if (mhi_cntrl->index < 0) {
++ ret = mhi_cntrl->index;
++ goto err_destroy_wq;
++ }
++
++ /* Register controller with MHI bus */
++ mhi_dev = mhi_alloc_device(mhi_cntrl);
++ if (IS_ERR(mhi_dev)) {
++ dev_err(mhi_cntrl->cntrl_dev, "Failed to allocate MHI device\n");
++ ret = PTR_ERR(mhi_dev);
++ goto err_ida_free;
++ }
++
++ mhi_dev->dev_type = MHI_DEVICE_CONTROLLER;
++ mhi_dev->mhi_cntrl = mhi_cntrl;
++ dev_set_name(&mhi_dev->dev, "mhi%d", mhi_cntrl->index);
++ mhi_dev->name = dev_name(&mhi_dev->dev);
++
++ /* Init wakeup source */
++ device_init_wakeup(&mhi_dev->dev, true);
++
++ ret = device_add(&mhi_dev->dev);
++ if (ret)
++ goto err_release_dev;
++
++ mhi_cntrl->mhi_dev = mhi_dev;
++
++ mhi_create_debugfs(mhi_cntrl);
++
++ return 0;
++
++err_release_dev:
++ put_device(&mhi_dev->dev);
++err_ida_free:
++ ida_free(&mhi_controller_ida, mhi_cntrl->index);
++err_destroy_wq:
++ destroy_workqueue(mhi_cntrl->hiprio_wq);
++err_free_cmd:
++ kfree(mhi_cntrl->mhi_cmd);
++err_free_event:
++ kfree(mhi_cntrl->mhi_event);
++ vfree(mhi_cntrl->mhi_chan);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(mhi_register_controller);
++
++void mhi_unregister_controller(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_device *mhi_dev = mhi_cntrl->mhi_dev;
++ struct mhi_chan *mhi_chan = mhi_cntrl->mhi_chan;
++ unsigned int i;
++
++ mhi_destroy_debugfs(mhi_cntrl);
++
++ destroy_workqueue(mhi_cntrl->hiprio_wq);
++ kfree(mhi_cntrl->mhi_cmd);
++ kfree(mhi_cntrl->mhi_event);
++
++ /* Drop the references to MHI devices created for channels */
++ for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
++ if (!mhi_chan->mhi_dev)
++ continue;
++
++ put_device(&mhi_chan->mhi_dev->dev);
++ }
++ vfree(mhi_cntrl->mhi_chan);
++
++ device_del(&mhi_dev->dev);
++ put_device(&mhi_dev->dev);
++
++ ida_free(&mhi_controller_ida, mhi_cntrl->index);
++}
++EXPORT_SYMBOL_GPL(mhi_unregister_controller);
++
++struct mhi_controller *mhi_alloc_controller(void)
++{
++ struct mhi_controller *mhi_cntrl;
++
++ mhi_cntrl = kzalloc(sizeof(*mhi_cntrl), GFP_KERNEL);
++
++ return mhi_cntrl;
++}
++EXPORT_SYMBOL_GPL(mhi_alloc_controller);
++
++void mhi_free_controller(struct mhi_controller *mhi_cntrl)
++{
++ kfree(mhi_cntrl);
++}
++EXPORT_SYMBOL_GPL(mhi_free_controller);
++
++int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl)
++{
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ u32 bhi_off, bhie_off;
++ int ret;
++
++ mutex_lock(&mhi_cntrl->pm_mutex);
++
++ ret = mhi_init_dev_ctxt(mhi_cntrl);
++ if (ret)
++ goto error_dev_ctxt;
++
++ ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIOFF, &bhi_off);
++ if (ret) {
++ dev_err(dev, "Error getting BHI offset\n");
++ goto error_reg_offset;
++ }
++
++ if (bhi_off >= mhi_cntrl->reg_len) {
++ dev_err(dev, "BHI offset: 0x%x is out of range: 0x%zx\n",
++ bhi_off, mhi_cntrl->reg_len);
++ ret = -EINVAL;
++ goto error_reg_offset;
++ }
++ mhi_cntrl->bhi = mhi_cntrl->regs + bhi_off;
++
++ if (mhi_cntrl->fbc_download || mhi_cntrl->rddm_size) {
++ ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIEOFF,
++ &bhie_off);
++ if (ret) {
++ dev_err(dev, "Error getting BHIE offset\n");
++ goto error_reg_offset;
++ }
++
++ if (bhie_off >= mhi_cntrl->reg_len) {
++ dev_err(dev,
++ "BHIe offset: 0x%x is out of range: 0x%zx\n",
++ bhie_off, mhi_cntrl->reg_len);
++ ret = -EINVAL;
++ goto error_reg_offset;
++ }
++ mhi_cntrl->bhie = mhi_cntrl->regs + bhie_off;
++ }
++
++ if (mhi_cntrl->rddm_size) {
++ /*
++ * This controller supports RDDM, so we need to manually clear
++ * BHIE RX registers since POR values are undefined.
++ */
++ memset_io(mhi_cntrl->bhie + BHIE_RXVECADDR_LOW_OFFS,
++ 0, BHIE_RXVECSTATUS_OFFS - BHIE_RXVECADDR_LOW_OFFS +
++ 4);
++ /*
++ * Allocate RDDM table for debugging purpose if specified
++ */
++ mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->rddm_image,
++ mhi_cntrl->rddm_size);
++ if (mhi_cntrl->rddm_image)
++ mhi_rddm_prepare(mhi_cntrl, mhi_cntrl->rddm_image);
++ }
++
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++
++ return 0;
++
++error_reg_offset:
++ mhi_deinit_dev_ctxt(mhi_cntrl);
++
++error_dev_ctxt:
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(mhi_prepare_for_power_up);
++
++void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl)
++{
++ if (mhi_cntrl->fbc_image) {
++ mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->fbc_image);
++ mhi_cntrl->fbc_image = NULL;
++ }
++
++ if (mhi_cntrl->rddm_image) {
++ mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->rddm_image);
++ mhi_cntrl->rddm_image = NULL;
++ }
++
++ mhi_cntrl->bhi = NULL;
++ mhi_cntrl->bhie = NULL;
++
++ mhi_deinit_dev_ctxt(mhi_cntrl);
++}
++EXPORT_SYMBOL_GPL(mhi_unprepare_after_power_down);
++
++static void mhi_release_device(struct device *dev)
++{
++ struct mhi_device *mhi_dev = to_mhi_device(dev);
++
++ /*
++ * We need to set the mhi_chan->mhi_dev to NULL here since the MHI
++ * devices for the channels will only get created if the mhi_dev
++ * associated with it is NULL. This scenario will happen during the
++ * controller suspend and resume.
++ */
++ if (mhi_dev->ul_chan)
++ mhi_dev->ul_chan->mhi_dev = NULL;
++
++ if (mhi_dev->dl_chan)
++ mhi_dev->dl_chan->mhi_dev = NULL;
++
++ kfree(mhi_dev);
++}
++
++struct mhi_device *mhi_alloc_device(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_device *mhi_dev;
++ struct device *dev;
++
++ mhi_dev = kzalloc(sizeof(*mhi_dev), GFP_KERNEL);
++ if (!mhi_dev)
++ return ERR_PTR(-ENOMEM);
++
++ dev = &mhi_dev->dev;
++ device_initialize(dev);
++ dev->bus = &mhi_bus_type;
++ dev->release = mhi_release_device;
++
++ if (mhi_cntrl->mhi_dev) {
++ /* for MHI client devices, parent is the MHI controller device */
++ dev->parent = &mhi_cntrl->mhi_dev->dev;
++ } else {
++ /* for MHI controller device, parent is the bus device (e.g. pci device) */
++ dev->parent = mhi_cntrl->cntrl_dev;
++ }
++
++ mhi_dev->mhi_cntrl = mhi_cntrl;
++ mhi_dev->dev_wake = 0;
++
++ return mhi_dev;
++}
++
++static int mhi_driver_probe(struct device *dev)
++{
++ struct mhi_device *mhi_dev = to_mhi_device(dev);
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct device_driver *drv = dev->driver;
++ struct mhi_driver *mhi_drv = to_mhi_driver(drv);
++ struct mhi_event *mhi_event;
++ struct mhi_chan *ul_chan = mhi_dev->ul_chan;
++ struct mhi_chan *dl_chan = mhi_dev->dl_chan;
++ int ret;
++
++ /* Bring device out of LPM */
++ ret = mhi_device_get_sync(mhi_dev);
++ if (ret)
++ return ret;
++
++ ret = -EINVAL;
++
++ if (ul_chan) {
++ /*
++ * If channel supports LPM notifications then status_cb should
++ * be provided
++ */
++ if (ul_chan->lpm_notify && !mhi_drv->status_cb)
++ goto exit_probe;
++
++ /* For non-offload channels then xfer_cb should be provided */
++ if (!ul_chan->offload_ch && !mhi_drv->ul_xfer_cb)
++ goto exit_probe;
++
++ ul_chan->xfer_cb = mhi_drv->ul_xfer_cb;
++ }
++
++ ret = -EINVAL;
++ if (dl_chan) {
++ /*
++ * If channel supports LPM notifications then status_cb should
++ * be provided
++ */
++ if (dl_chan->lpm_notify && !mhi_drv->status_cb)
++ goto exit_probe;
++
++ /* For non-offload channels then xfer_cb should be provided */
++ if (!dl_chan->offload_ch && !mhi_drv->dl_xfer_cb)
++ goto exit_probe;
++
++ mhi_event = &mhi_cntrl->mhi_event[dl_chan->er_index];
++
++ /*
++ * If the channel event ring is managed by client, then
++ * status_cb must be provided so that the framework can
++ * notify pending data
++ */
++ if (mhi_event->cl_manage && !mhi_drv->status_cb)
++ goto exit_probe;
++
++ dl_chan->xfer_cb = mhi_drv->dl_xfer_cb;
++ }
++
++ /* Call the user provided probe function */
++ ret = mhi_drv->probe(mhi_dev, mhi_dev->id);
++ if (ret)
++ goto exit_probe;
++
++ mhi_device_put(mhi_dev);
++
++ return ret;
++
++exit_probe:
++ mhi_unprepare_from_transfer(mhi_dev);
++
++ mhi_device_put(mhi_dev);
++
++ return ret;
++}
++
++static int mhi_driver_remove(struct device *dev)
++{
++ struct mhi_device *mhi_dev = to_mhi_device(dev);
++ struct mhi_driver *mhi_drv = to_mhi_driver(dev->driver);
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct mhi_chan *mhi_chan;
++ enum mhi_ch_state ch_state[] = {
++ MHI_CH_STATE_DISABLED,
++ MHI_CH_STATE_DISABLED
++ };
++ int dir;
++
++ /* Skip if it is a controller device */
++ if (mhi_dev->dev_type == MHI_DEVICE_CONTROLLER)
++ return 0;
++
++ /* Reset both channels */
++ for (dir = 0; dir < 2; dir++) {
++ mhi_chan = dir ? mhi_dev->ul_chan : mhi_dev->dl_chan;
++
++ if (!mhi_chan)
++ continue;
++
++ /* Wake all threads waiting for completion */
++ write_lock_irq(&mhi_chan->lock);
++ mhi_chan->ccs = MHI_EV_CC_INVALID;
++ complete_all(&mhi_chan->completion);
++ write_unlock_irq(&mhi_chan->lock);
++
++ /* Set the channel state to disabled */
++ mutex_lock(&mhi_chan->mutex);
++ write_lock_irq(&mhi_chan->lock);
++ ch_state[dir] = mhi_chan->ch_state;
++ mhi_chan->ch_state = MHI_CH_STATE_SUSPENDED;
++ write_unlock_irq(&mhi_chan->lock);
++
++ /* Reset the non-offload channel */
++ if (!mhi_chan->offload_ch)
++ mhi_reset_chan(mhi_cntrl, mhi_chan);
++
++ mutex_unlock(&mhi_chan->mutex);
++ }
++
++ mhi_drv->remove(mhi_dev);
++
++ /* De-init channel if it was enabled */
++ for (dir = 0; dir < 2; dir++) {
++ mhi_chan = dir ? mhi_dev->ul_chan : mhi_dev->dl_chan;
++
++ if (!mhi_chan)
++ continue;
++
++ mutex_lock(&mhi_chan->mutex);
++
++ if ((ch_state[dir] == MHI_CH_STATE_ENABLED ||
++ ch_state[dir] == MHI_CH_STATE_STOP) &&
++ !mhi_chan->offload_ch)
++ mhi_deinit_chan_ctxt(mhi_cntrl, mhi_chan);
++
++ mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
++
++ mutex_unlock(&mhi_chan->mutex);
++ }
++
++ while (mhi_dev->dev_wake)
++ mhi_device_put(mhi_dev);
++
++ return 0;
++}
++
++int __mhi_driver_register(struct mhi_driver *mhi_drv, struct module *owner)
++{
++ struct device_driver *driver = &mhi_drv->driver;
++
++ if (!mhi_drv->probe || !mhi_drv->remove)
++ return -EINVAL;
++
++ driver->bus = &mhi_bus_type;
++ driver->owner = owner;
++ driver->probe = mhi_driver_probe;
++ driver->remove = mhi_driver_remove;
++
++ return driver_register(driver);
++}
++EXPORT_SYMBOL_GPL(__mhi_driver_register);
++
++void mhi_driver_unregister(struct mhi_driver *mhi_drv)
++{
++ driver_unregister(&mhi_drv->driver);
++}
++EXPORT_SYMBOL_GPL(mhi_driver_unregister);
++
++static int mhi_uevent(struct device *dev, struct kobj_uevent_env *env)
++{
++ struct mhi_device *mhi_dev = to_mhi_device(dev);
++
++ return add_uevent_var(env, "MODALIAS=" MHI_DEVICE_MODALIAS_FMT,
++ mhi_dev->name);
++}
++
++static int mhi_match(struct device *dev, struct device_driver *drv)
++{
++ struct mhi_device *mhi_dev = to_mhi_device(dev);
++ struct mhi_driver *mhi_drv = to_mhi_driver(drv);
++ const struct mhi_device_id *id;
++
++ /*
++ * If the device is a controller type then there is no client driver
++ * associated with it
++ */
++ if (mhi_dev->dev_type == MHI_DEVICE_CONTROLLER)
++ return 0;
++
++ for (id = mhi_drv->id_table; id->chan[0]; id++)
++ if (!strcmp(mhi_dev->name, id->chan)) {
++ mhi_dev->id = id;
++ return 1;
++ }
++
++ return 0;
++};
++
++struct bus_type mhi_bus_type = {
++ .name = "mhi",
++ .dev_name = "mhi",
++ .match = mhi_match,
++ .uevent = mhi_uevent,
++ .dev_groups = mhi_dev_groups,
++};
++
++static int __init mhi_init(void)
++{
++ mhi_debugfs_init();
++ return bus_register(&mhi_bus_type);
++}
++
++static void __exit mhi_exit(void)
++{
++ mhi_debugfs_exit();
++ bus_unregister(&mhi_bus_type);
++}
++
++postcore_initcall(mhi_init);
++module_exit(mhi_exit);
++
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("MHI Host Interface");
+diff --git a/drivers/bus/mhi/host/internal.h b/drivers/bus/mhi/host/internal.h
+new file mode 100644
+index 0000000000000..71f181402be98
+--- /dev/null
++++ b/drivers/bus/mhi/host/internal.h
+@@ -0,0 +1,718 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++ *
++ */
++
++#ifndef _MHI_INT_H
++#define _MHI_INT_H
++
++#include <linux/mhi.h>
++
++extern struct bus_type mhi_bus_type;
++
++#define MHIREGLEN (0x0)
++#define MHIREGLEN_MHIREGLEN_MASK (0xFFFFFFFF)
++#define MHIREGLEN_MHIREGLEN_SHIFT (0)
++
++#define MHIVER (0x8)
++#define MHIVER_MHIVER_MASK (0xFFFFFFFF)
++#define MHIVER_MHIVER_SHIFT (0)
++
++#define MHICFG (0x10)
++#define MHICFG_NHWER_MASK (0xFF000000)
++#define MHICFG_NHWER_SHIFT (24)
++#define MHICFG_NER_MASK (0xFF0000)
++#define MHICFG_NER_SHIFT (16)
++#define MHICFG_NHWCH_MASK (0xFF00)
++#define MHICFG_NHWCH_SHIFT (8)
++#define MHICFG_NCH_MASK (0xFF)
++#define MHICFG_NCH_SHIFT (0)
++
++#define CHDBOFF (0x18)
++#define CHDBOFF_CHDBOFF_MASK (0xFFFFFFFF)
++#define CHDBOFF_CHDBOFF_SHIFT (0)
++
++#define ERDBOFF (0x20)
++#define ERDBOFF_ERDBOFF_MASK (0xFFFFFFFF)
++#define ERDBOFF_ERDBOFF_SHIFT (0)
++
++#define BHIOFF (0x28)
++#define BHIOFF_BHIOFF_MASK (0xFFFFFFFF)
++#define BHIOFF_BHIOFF_SHIFT (0)
++
++#define BHIEOFF (0x2C)
++#define BHIEOFF_BHIEOFF_MASK (0xFFFFFFFF)
++#define BHIEOFF_BHIEOFF_SHIFT (0)
++
++#define DEBUGOFF (0x30)
++#define DEBUGOFF_DEBUGOFF_MASK (0xFFFFFFFF)
++#define DEBUGOFF_DEBUGOFF_SHIFT (0)
++
++#define MHICTRL (0x38)
++#define MHICTRL_MHISTATE_MASK (0x0000FF00)
++#define MHICTRL_MHISTATE_SHIFT (8)
++#define MHICTRL_RESET_MASK (0x2)
++#define MHICTRL_RESET_SHIFT (1)
++
++#define MHISTATUS (0x48)
++#define MHISTATUS_MHISTATE_MASK (0x0000FF00)
++#define MHISTATUS_MHISTATE_SHIFT (8)
++#define MHISTATUS_SYSERR_MASK (0x4)
++#define MHISTATUS_SYSERR_SHIFT (2)
++#define MHISTATUS_READY_MASK (0x1)
++#define MHISTATUS_READY_SHIFT (0)
++
++#define CCABAP_LOWER (0x58)
++#define CCABAP_LOWER_CCABAP_LOWER_MASK (0xFFFFFFFF)
++#define CCABAP_LOWER_CCABAP_LOWER_SHIFT (0)
++
++#define CCABAP_HIGHER (0x5C)
++#define CCABAP_HIGHER_CCABAP_HIGHER_MASK (0xFFFFFFFF)
++#define CCABAP_HIGHER_CCABAP_HIGHER_SHIFT (0)
++
++#define ECABAP_LOWER (0x60)
++#define ECABAP_LOWER_ECABAP_LOWER_MASK (0xFFFFFFFF)
++#define ECABAP_LOWER_ECABAP_LOWER_SHIFT (0)
++
++#define ECABAP_HIGHER (0x64)
++#define ECABAP_HIGHER_ECABAP_HIGHER_MASK (0xFFFFFFFF)
++#define ECABAP_HIGHER_ECABAP_HIGHER_SHIFT (0)
++
++#define CRCBAP_LOWER (0x68)
++#define CRCBAP_LOWER_CRCBAP_LOWER_MASK (0xFFFFFFFF)
++#define CRCBAP_LOWER_CRCBAP_LOWER_SHIFT (0)
++
++#define CRCBAP_HIGHER (0x6C)
++#define CRCBAP_HIGHER_CRCBAP_HIGHER_MASK (0xFFFFFFFF)
++#define CRCBAP_HIGHER_CRCBAP_HIGHER_SHIFT (0)
++
++#define CRDB_LOWER (0x70)
++#define CRDB_LOWER_CRDB_LOWER_MASK (0xFFFFFFFF)
++#define CRDB_LOWER_CRDB_LOWER_SHIFT (0)
++
++#define CRDB_HIGHER (0x74)
++#define CRDB_HIGHER_CRDB_HIGHER_MASK (0xFFFFFFFF)
++#define CRDB_HIGHER_CRDB_HIGHER_SHIFT (0)
++
++#define MHICTRLBASE_LOWER (0x80)
++#define MHICTRLBASE_LOWER_MHICTRLBASE_LOWER_MASK (0xFFFFFFFF)
++#define MHICTRLBASE_LOWER_MHICTRLBASE_LOWER_SHIFT (0)
++
++#define MHICTRLBASE_HIGHER (0x84)
++#define MHICTRLBASE_HIGHER_MHICTRLBASE_HIGHER_MASK (0xFFFFFFFF)
++#define MHICTRLBASE_HIGHER_MHICTRLBASE_HIGHER_SHIFT (0)
++
++#define MHICTRLLIMIT_LOWER (0x88)
++#define MHICTRLLIMIT_LOWER_MHICTRLLIMIT_LOWER_MASK (0xFFFFFFFF)
++#define MHICTRLLIMIT_LOWER_MHICTRLLIMIT_LOWER_SHIFT (0)
++
++#define MHICTRLLIMIT_HIGHER (0x8C)
++#define MHICTRLLIMIT_HIGHER_MHICTRLLIMIT_HIGHER_MASK (0xFFFFFFFF)
++#define MHICTRLLIMIT_HIGHER_MHICTRLLIMIT_HIGHER_SHIFT (0)
++
++#define MHIDATABASE_LOWER (0x98)
++#define MHIDATABASE_LOWER_MHIDATABASE_LOWER_MASK (0xFFFFFFFF)
++#define MHIDATABASE_LOWER_MHIDATABASE_LOWER_SHIFT (0)
++
++#define MHIDATABASE_HIGHER (0x9C)
++#define MHIDATABASE_HIGHER_MHIDATABASE_HIGHER_MASK (0xFFFFFFFF)
++#define MHIDATABASE_HIGHER_MHIDATABASE_HIGHER_SHIFT (0)
++
++#define MHIDATALIMIT_LOWER (0xA0)
++#define MHIDATALIMIT_LOWER_MHIDATALIMIT_LOWER_MASK (0xFFFFFFFF)
++#define MHIDATALIMIT_LOWER_MHIDATALIMIT_LOWER_SHIFT (0)
++
++#define MHIDATALIMIT_HIGHER (0xA4)
++#define MHIDATALIMIT_HIGHER_MHIDATALIMIT_HIGHER_MASK (0xFFFFFFFF)
++#define MHIDATALIMIT_HIGHER_MHIDATALIMIT_HIGHER_SHIFT (0)
++
++/* Host request register */
++#define MHI_SOC_RESET_REQ_OFFSET (0xB0)
++#define MHI_SOC_RESET_REQ BIT(0)
++
++/* MHI BHI offfsets */
++#define BHI_BHIVERSION_MINOR (0x00)
++#define BHI_BHIVERSION_MAJOR (0x04)
++#define BHI_IMGADDR_LOW (0x08)
++#define BHI_IMGADDR_HIGH (0x0C)
++#define BHI_IMGSIZE (0x10)
++#define BHI_RSVD1 (0x14)
++#define BHI_IMGTXDB (0x18)
++#define BHI_TXDB_SEQNUM_BMSK (0x3FFFFFFF)
++#define BHI_TXDB_SEQNUM_SHFT (0)
++#define BHI_RSVD2 (0x1C)
++#define BHI_INTVEC (0x20)
++#define BHI_RSVD3 (0x24)
++#define BHI_EXECENV (0x28)
++#define BHI_STATUS (0x2C)
++#define BHI_ERRCODE (0x30)
++#define BHI_ERRDBG1 (0x34)
++#define BHI_ERRDBG2 (0x38)
++#define BHI_ERRDBG3 (0x3C)
++#define BHI_SERIALNU (0x40)
++#define BHI_SBLANTIROLLVER (0x44)
++#define BHI_NUMSEG (0x48)
++#define BHI_MSMHWID(n) (0x4C + (0x4 * (n)))
++#define BHI_OEMPKHASH(n) (0x64 + (0x4 * (n)))
++#define BHI_RSVD5 (0xC4)
++#define BHI_STATUS_MASK (0xC0000000)
++#define BHI_STATUS_SHIFT (30)
++#define BHI_STATUS_ERROR (3)
++#define BHI_STATUS_SUCCESS (2)
++#define BHI_STATUS_RESET (0)
++
++/* MHI BHIE offsets */
++#define BHIE_MSMSOCID_OFFS (0x0000)
++#define BHIE_TXVECADDR_LOW_OFFS (0x002C)
++#define BHIE_TXVECADDR_HIGH_OFFS (0x0030)
++#define BHIE_TXVECSIZE_OFFS (0x0034)
++#define BHIE_TXVECDB_OFFS (0x003C)
++#define BHIE_TXVECDB_SEQNUM_BMSK (0x3FFFFFFF)
++#define BHIE_TXVECDB_SEQNUM_SHFT (0)
++#define BHIE_TXVECSTATUS_OFFS (0x0044)
++#define BHIE_TXVECSTATUS_SEQNUM_BMSK (0x3FFFFFFF)
++#define BHIE_TXVECSTATUS_SEQNUM_SHFT (0)
++#define BHIE_TXVECSTATUS_STATUS_BMSK (0xC0000000)
++#define BHIE_TXVECSTATUS_STATUS_SHFT (30)
++#define BHIE_TXVECSTATUS_STATUS_RESET (0x00)
++#define BHIE_TXVECSTATUS_STATUS_XFER_COMPL (0x02)
++#define BHIE_TXVECSTATUS_STATUS_ERROR (0x03)
++#define BHIE_RXVECADDR_LOW_OFFS (0x0060)
++#define BHIE_RXVECADDR_HIGH_OFFS (0x0064)
++#define BHIE_RXVECSIZE_OFFS (0x0068)
++#define BHIE_RXVECDB_OFFS (0x0070)
++#define BHIE_RXVECDB_SEQNUM_BMSK (0x3FFFFFFF)
++#define BHIE_RXVECDB_SEQNUM_SHFT (0)
++#define BHIE_RXVECSTATUS_OFFS (0x0078)
++#define BHIE_RXVECSTATUS_SEQNUM_BMSK (0x3FFFFFFF)
++#define BHIE_RXVECSTATUS_SEQNUM_SHFT (0)
++#define BHIE_RXVECSTATUS_STATUS_BMSK (0xC0000000)
++#define BHIE_RXVECSTATUS_STATUS_SHFT (30)
++#define BHIE_RXVECSTATUS_STATUS_RESET (0x00)
++#define BHIE_RXVECSTATUS_STATUS_XFER_COMPL (0x02)
++#define BHIE_RXVECSTATUS_STATUS_ERROR (0x03)
++
++#define SOC_HW_VERSION_OFFS (0x224)
++#define SOC_HW_VERSION_FAM_NUM_BMSK (0xF0000000)
++#define SOC_HW_VERSION_FAM_NUM_SHFT (28)
++#define SOC_HW_VERSION_DEV_NUM_BMSK (0x0FFF0000)
++#define SOC_HW_VERSION_DEV_NUM_SHFT (16)
++#define SOC_HW_VERSION_MAJOR_VER_BMSK (0x0000FF00)
++#define SOC_HW_VERSION_MAJOR_VER_SHFT (8)
++#define SOC_HW_VERSION_MINOR_VER_BMSK (0x000000FF)
++#define SOC_HW_VERSION_MINOR_VER_SHFT (0)
++
++#define EV_CTX_RESERVED_MASK GENMASK(7, 0)
++#define EV_CTX_INTMODC_MASK GENMASK(15, 8)
++#define EV_CTX_INTMODC_SHIFT 8
++#define EV_CTX_INTMODT_MASK GENMASK(31, 16)
++#define EV_CTX_INTMODT_SHIFT 16
++struct mhi_event_ctxt {
++ __le32 intmod;
++ __le32 ertype;
++ __le32 msivec;
++
++ __le64 rbase __packed __aligned(4);
++ __le64 rlen __packed __aligned(4);
++ __le64 rp __packed __aligned(4);
++ __le64 wp __packed __aligned(4);
++};
++
++#define CHAN_CTX_CHSTATE_MASK GENMASK(7, 0)
++#define CHAN_CTX_CHSTATE_SHIFT 0
++#define CHAN_CTX_BRSTMODE_MASK GENMASK(9, 8)
++#define CHAN_CTX_BRSTMODE_SHIFT 8
++#define CHAN_CTX_POLLCFG_MASK GENMASK(15, 10)
++#define CHAN_CTX_POLLCFG_SHIFT 10
++#define CHAN_CTX_RESERVED_MASK GENMASK(31, 16)
++struct mhi_chan_ctxt {
++ __le32 chcfg;
++ __le32 chtype;
++ __le32 erindex;
++
++ __le64 rbase __packed __aligned(4);
++ __le64 rlen __packed __aligned(4);
++ __le64 rp __packed __aligned(4);
++ __le64 wp __packed __aligned(4);
++};
++
++struct mhi_cmd_ctxt {
++ __le32 reserved0;
++ __le32 reserved1;
++ __le32 reserved2;
++
++ __le64 rbase __packed __aligned(4);
++ __le64 rlen __packed __aligned(4);
++ __le64 rp __packed __aligned(4);
++ __le64 wp __packed __aligned(4);
++};
++
++struct mhi_ctxt {
++ struct mhi_event_ctxt *er_ctxt;
++ struct mhi_chan_ctxt *chan_ctxt;
++ struct mhi_cmd_ctxt *cmd_ctxt;
++ dma_addr_t er_ctxt_addr;
++ dma_addr_t chan_ctxt_addr;
++ dma_addr_t cmd_ctxt_addr;
++};
++
++struct mhi_tre {
++ __le64 ptr;
++ __le32 dword[2];
++};
++
++struct bhi_vec_entry {
++ u64 dma_addr;
++ u64 size;
++};
++
++enum mhi_cmd_type {
++ MHI_CMD_NOP = 1,
++ MHI_CMD_RESET_CHAN = 16,
++ MHI_CMD_STOP_CHAN = 17,
++ MHI_CMD_START_CHAN = 18,
++};
++
++/* No operation command */
++#define MHI_TRE_CMD_NOOP_PTR (0)
++#define MHI_TRE_CMD_NOOP_DWORD0 (0)
++#define MHI_TRE_CMD_NOOP_DWORD1 (cpu_to_le32(MHI_CMD_NOP << 16))
++
++/* Channel reset command */
++#define MHI_TRE_CMD_RESET_PTR (0)
++#define MHI_TRE_CMD_RESET_DWORD0 (0)
++#define MHI_TRE_CMD_RESET_DWORD1(chid) (cpu_to_le32((chid << 24) | \
++ (MHI_CMD_RESET_CHAN << 16)))
++
++/* Channel stop command */
++#define MHI_TRE_CMD_STOP_PTR (0)
++#define MHI_TRE_CMD_STOP_DWORD0 (0)
++#define MHI_TRE_CMD_STOP_DWORD1(chid) (cpu_to_le32((chid << 24) | \
++ (MHI_CMD_STOP_CHAN << 16)))
++
++/* Channel start command */
++#define MHI_TRE_CMD_START_PTR (0)
++#define MHI_TRE_CMD_START_DWORD0 (0)
++#define MHI_TRE_CMD_START_DWORD1(chid) (cpu_to_le32((chid << 24) | \
++ (MHI_CMD_START_CHAN << 16)))
++
++#define MHI_TRE_GET_DWORD(tre, word) (le32_to_cpu((tre)->dword[(word)]))
++#define MHI_TRE_GET_CMD_CHID(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF)
++#define MHI_TRE_GET_CMD_TYPE(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 16) & 0xFF)
++
++/* Event descriptor macros */
++#define MHI_TRE_EV_PTR(ptr) (cpu_to_le64(ptr))
++#define MHI_TRE_EV_DWORD0(code, len) (cpu_to_le32((code << 24) | len))
++#define MHI_TRE_EV_DWORD1(chid, type) (cpu_to_le32((chid << 24) | (type << 16)))
++#define MHI_TRE_GET_EV_PTR(tre) (le64_to_cpu((tre)->ptr))
++#define MHI_TRE_GET_EV_CODE(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF)
++#define MHI_TRE_GET_EV_LEN(tre) (MHI_TRE_GET_DWORD(tre, 0) & 0xFFFF)
++#define MHI_TRE_GET_EV_CHID(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF)
++#define MHI_TRE_GET_EV_TYPE(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 16) & 0xFF)
++#define MHI_TRE_GET_EV_STATE(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF)
++#define MHI_TRE_GET_EV_EXECENV(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF)
++#define MHI_TRE_GET_EV_SEQ(tre) MHI_TRE_GET_DWORD(tre, 0)
++#define MHI_TRE_GET_EV_TIME(tre) (MHI_TRE_GET_EV_PTR(tre))
++#define MHI_TRE_GET_EV_COOKIE(tre) lower_32_bits(MHI_TRE_GET_EV_PTR(tre))
++#define MHI_TRE_GET_EV_VEID(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 16) & 0xFF)
++#define MHI_TRE_GET_EV_LINKSPEED(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF)
++#define MHI_TRE_GET_EV_LINKWIDTH(tre) (MHI_TRE_GET_DWORD(tre, 0) & 0xFF)
++
++/* Transfer descriptor macros */
++#define MHI_TRE_DATA_PTR(ptr) (cpu_to_le64(ptr))
++#define MHI_TRE_DATA_DWORD0(len) (cpu_to_le32(len & MHI_MAX_MTU))
++#define MHI_TRE_DATA_DWORD1(bei, ieot, ieob, chain) (cpu_to_le32((2 << 16) | (bei << 10) \
++ | (ieot << 9) | (ieob << 8) | chain))
++
++/* RSC transfer descriptor macros */
++#define MHI_RSCTRE_DATA_PTR(ptr, len) (cpu_to_le64(((u64)len << 48) | ptr))
++#define MHI_RSCTRE_DATA_DWORD0(cookie) (cpu_to_le32(cookie))
++#define MHI_RSCTRE_DATA_DWORD1 (cpu_to_le32(MHI_PKT_TYPE_COALESCING << 16))
++
++enum mhi_pkt_type {
++ MHI_PKT_TYPE_INVALID = 0x0,
++ MHI_PKT_TYPE_NOOP_CMD = 0x1,
++ MHI_PKT_TYPE_TRANSFER = 0x2,
++ MHI_PKT_TYPE_COALESCING = 0x8,
++ MHI_PKT_TYPE_RESET_CHAN_CMD = 0x10,
++ MHI_PKT_TYPE_STOP_CHAN_CMD = 0x11,
++ MHI_PKT_TYPE_START_CHAN_CMD = 0x12,
++ MHI_PKT_TYPE_STATE_CHANGE_EVENT = 0x20,
++ MHI_PKT_TYPE_CMD_COMPLETION_EVENT = 0x21,
++ MHI_PKT_TYPE_TX_EVENT = 0x22,
++ MHI_PKT_TYPE_RSC_TX_EVENT = 0x28,
++ MHI_PKT_TYPE_EE_EVENT = 0x40,
++ MHI_PKT_TYPE_TSYNC_EVENT = 0x48,
++ MHI_PKT_TYPE_BW_REQ_EVENT = 0x50,
++ MHI_PKT_TYPE_STALE_EVENT, /* internal event */
++};
++
++/* MHI transfer completion events */
++enum mhi_ev_ccs {
++ MHI_EV_CC_INVALID = 0x0,
++ MHI_EV_CC_SUCCESS = 0x1,
++ MHI_EV_CC_EOT = 0x2, /* End of transfer event */
++ MHI_EV_CC_OVERFLOW = 0x3,
++ MHI_EV_CC_EOB = 0x4, /* End of block event */
++ MHI_EV_CC_OOB = 0x5, /* Out of block event */
++ MHI_EV_CC_DB_MODE = 0x6,
++ MHI_EV_CC_UNDEFINED_ERR = 0x10,
++ MHI_EV_CC_BAD_TRE = 0x11,
++};
++
++enum mhi_ch_state {
++ MHI_CH_STATE_DISABLED = 0x0,
++ MHI_CH_STATE_ENABLED = 0x1,
++ MHI_CH_STATE_RUNNING = 0x2,
++ MHI_CH_STATE_SUSPENDED = 0x3,
++ MHI_CH_STATE_STOP = 0x4,
++ MHI_CH_STATE_ERROR = 0x5,
++};
++
++enum mhi_ch_state_type {
++ MHI_CH_STATE_TYPE_RESET,
++ MHI_CH_STATE_TYPE_STOP,
++ MHI_CH_STATE_TYPE_START,
++ MHI_CH_STATE_TYPE_MAX,
++};
++
++extern const char * const mhi_ch_state_type_str[MHI_CH_STATE_TYPE_MAX];
++#define TO_CH_STATE_TYPE_STR(state) (((state) >= MHI_CH_STATE_TYPE_MAX) ? \
++ "INVALID_STATE" : \
++ mhi_ch_state_type_str[(state)])
++
++#define MHI_INVALID_BRSTMODE(mode) (mode != MHI_DB_BRST_DISABLE && \
++ mode != MHI_DB_BRST_ENABLE)
++
++extern const char * const mhi_ee_str[MHI_EE_MAX];
++#define TO_MHI_EXEC_STR(ee) (((ee) >= MHI_EE_MAX) ? \
++ "INVALID_EE" : mhi_ee_str[ee])
++
++#define MHI_IN_PBL(ee) (ee == MHI_EE_PBL || ee == MHI_EE_PTHRU || \
++ ee == MHI_EE_EDL)
++
++#define MHI_IN_MISSION_MODE(ee) (ee == MHI_EE_AMSS || ee == MHI_EE_WFW || \
++ ee == MHI_EE_FP)
++
++enum dev_st_transition {
++ DEV_ST_TRANSITION_PBL,
++ DEV_ST_TRANSITION_READY,
++ DEV_ST_TRANSITION_SBL,
++ DEV_ST_TRANSITION_MISSION_MODE,
++ DEV_ST_TRANSITION_FP,
++ DEV_ST_TRANSITION_SYS_ERR,
++ DEV_ST_TRANSITION_DISABLE,
++ DEV_ST_TRANSITION_MAX,
++};
++
++extern const char * const dev_state_tran_str[DEV_ST_TRANSITION_MAX];
++#define TO_DEV_STATE_TRANS_STR(state) (((state) >= DEV_ST_TRANSITION_MAX) ? \
++ "INVALID_STATE" : dev_state_tran_str[state])
++
++extern const char * const mhi_state_str[MHI_STATE_MAX];
++#define TO_MHI_STATE_STR(state) ((state >= MHI_STATE_MAX || \
++ !mhi_state_str[state]) ? \
++ "INVALID_STATE" : mhi_state_str[state])
++
++/* internal power states */
++enum mhi_pm_state {
++ MHI_PM_STATE_DISABLE,
++ MHI_PM_STATE_POR,
++ MHI_PM_STATE_M0,
++ MHI_PM_STATE_M2,
++ MHI_PM_STATE_M3_ENTER,
++ MHI_PM_STATE_M3,
++ MHI_PM_STATE_M3_EXIT,
++ MHI_PM_STATE_FW_DL_ERR,
++ MHI_PM_STATE_SYS_ERR_DETECT,
++ MHI_PM_STATE_SYS_ERR_PROCESS,
++ MHI_PM_STATE_SHUTDOWN_PROCESS,
++ MHI_PM_STATE_LD_ERR_FATAL_DETECT,
++ MHI_PM_STATE_MAX
++};
++
++#define MHI_PM_DISABLE BIT(0)
++#define MHI_PM_POR BIT(1)
++#define MHI_PM_M0 BIT(2)
++#define MHI_PM_M2 BIT(3)
++#define MHI_PM_M3_ENTER BIT(4)
++#define MHI_PM_M3 BIT(5)
++#define MHI_PM_M3_EXIT BIT(6)
++/* firmware download failure state */
++#define MHI_PM_FW_DL_ERR BIT(7)
++#define MHI_PM_SYS_ERR_DETECT BIT(8)
++#define MHI_PM_SYS_ERR_PROCESS BIT(9)
++#define MHI_PM_SHUTDOWN_PROCESS BIT(10)
++/* link not accessible */
++#define MHI_PM_LD_ERR_FATAL_DETECT BIT(11)
++
++#define MHI_REG_ACCESS_VALID(pm_state) ((pm_state & (MHI_PM_POR | MHI_PM_M0 | \
++ MHI_PM_M2 | MHI_PM_M3_ENTER | MHI_PM_M3_EXIT | \
++ MHI_PM_SYS_ERR_DETECT | MHI_PM_SYS_ERR_PROCESS | \
++ MHI_PM_SHUTDOWN_PROCESS | MHI_PM_FW_DL_ERR)))
++#define MHI_PM_IN_ERROR_STATE(pm_state) (pm_state >= MHI_PM_FW_DL_ERR)
++#define MHI_PM_IN_FATAL_STATE(pm_state) (pm_state == MHI_PM_LD_ERR_FATAL_DETECT)
++#define MHI_DB_ACCESS_VALID(mhi_cntrl) (mhi_cntrl->pm_state & \
++ mhi_cntrl->db_access)
++#define MHI_WAKE_DB_CLEAR_VALID(pm_state) (pm_state & (MHI_PM_M0 | \
++ MHI_PM_M2 | MHI_PM_M3_EXIT))
++#define MHI_WAKE_DB_SET_VALID(pm_state) (pm_state & MHI_PM_M2)
++#define MHI_WAKE_DB_FORCE_SET_VALID(pm_state) MHI_WAKE_DB_CLEAR_VALID(pm_state)
++#define MHI_EVENT_ACCESS_INVALID(pm_state) (pm_state == MHI_PM_DISABLE || \
++ MHI_PM_IN_ERROR_STATE(pm_state))
++#define MHI_PM_IN_SUSPEND_STATE(pm_state) (pm_state & \
++ (MHI_PM_M3_ENTER | MHI_PM_M3))
++
++#define NR_OF_CMD_RINGS 1
++#define CMD_EL_PER_RING 128
++#define PRIMARY_CMD_RING 0
++#define MHI_DEV_WAKE_DB 127
++#define MHI_MAX_MTU 0xffff
++#define MHI_RANDOM_U32_NONZERO(bmsk) (prandom_u32_max(bmsk) + 1)
++
++enum mhi_er_type {
++ MHI_ER_TYPE_INVALID = 0x0,
++ MHI_ER_TYPE_VALID = 0x1,
++};
++
++struct db_cfg {
++ bool reset_req;
++ bool db_mode;
++ u32 pollcfg;
++ enum mhi_db_brst_mode brstmode;
++ dma_addr_t db_val;
++ void (*process_db)(struct mhi_controller *mhi_cntrl,
++ struct db_cfg *db_cfg, void __iomem *io_addr,
++ dma_addr_t db_val);
++};
++
++struct mhi_pm_transitions {
++ enum mhi_pm_state from_state;
++ u32 to_states;
++};
++
++struct state_transition {
++ struct list_head node;
++ enum dev_st_transition state;
++};
++
++struct mhi_ring {
++ dma_addr_t dma_handle;
++ dma_addr_t iommu_base;
++ __le64 *ctxt_wp; /* point to ctxt wp */
++ void *pre_aligned;
++ void *base;
++ void *rp;
++ void *wp;
++ size_t el_size;
++ size_t len;
++ size_t elements;
++ size_t alloc_size;
++ void __iomem *db_addr;
++};
++
++struct mhi_cmd {
++ struct mhi_ring ring;
++ spinlock_t lock;
++};
++
++struct mhi_buf_info {
++ void *v_addr;
++ void *bb_addr;
++ void *wp;
++ void *cb_buf;
++ dma_addr_t p_addr;
++ size_t len;
++ enum dma_data_direction dir;
++ bool used; /* Indicates whether the buffer is used or not */
++ bool pre_mapped; /* Already pre-mapped by client */
++};
++
++struct mhi_event {
++ struct mhi_controller *mhi_cntrl;
++ struct mhi_chan *mhi_chan; /* dedicated to channel */
++ u32 er_index;
++ u32 intmod;
++ u32 irq;
++ int chan; /* this event ring is dedicated to a channel (optional) */
++ u32 priority;
++ enum mhi_er_data_type data_type;
++ struct mhi_ring ring;
++ struct db_cfg db_cfg;
++ struct tasklet_struct task;
++ spinlock_t lock;
++ int (*process_event)(struct mhi_controller *mhi_cntrl,
++ struct mhi_event *mhi_event,
++ u32 event_quota);
++ bool hw_ring;
++ bool cl_manage;
++ bool offload_ev; /* managed by a device driver */
++};
++
++struct mhi_chan {
++ const char *name;
++ /*
++ * Important: When consuming, increment tre_ring first and when
++ * releasing, decrement buf_ring first. If tre_ring has space, buf_ring
++ * is guranteed to have space so we do not need to check both rings.
++ */
++ struct mhi_ring buf_ring;
++ struct mhi_ring tre_ring;
++ u32 chan;
++ u32 er_index;
++ u32 intmod;
++ enum mhi_ch_type type;
++ enum dma_data_direction dir;
++ struct db_cfg db_cfg;
++ enum mhi_ch_ee_mask ee_mask;
++ enum mhi_ch_state ch_state;
++ enum mhi_ev_ccs ccs;
++ struct mhi_device *mhi_dev;
++ void (*xfer_cb)(struct mhi_device *mhi_dev, struct mhi_result *result);
++ struct mutex mutex;
++ struct completion completion;
++ rwlock_t lock;
++ struct list_head node;
++ bool lpm_notify;
++ bool configured;
++ bool offload_ch;
++ bool pre_alloc;
++ bool wake_capable;
++};
++
++/* Default MHI timeout */
++#define MHI_TIMEOUT_MS (1000)
++
++/* debugfs related functions */
++#ifdef CONFIG_MHI_BUS_DEBUG
++void mhi_create_debugfs(struct mhi_controller *mhi_cntrl);
++void mhi_destroy_debugfs(struct mhi_controller *mhi_cntrl);
++void mhi_debugfs_init(void);
++void mhi_debugfs_exit(void);
++#else
++static inline void mhi_create_debugfs(struct mhi_controller *mhi_cntrl)
++{
++}
++
++static inline void mhi_destroy_debugfs(struct mhi_controller *mhi_cntrl)
++{
++}
++
++static inline void mhi_debugfs_init(void)
++{
++}
++
++static inline void mhi_debugfs_exit(void)
++{
++}
++#endif
++
++struct mhi_device *mhi_alloc_device(struct mhi_controller *mhi_cntrl);
++
++int mhi_destroy_device(struct device *dev, void *data);
++void mhi_create_devices(struct mhi_controller *mhi_cntrl);
++
++int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl,
++ struct image_info **image_info, size_t alloc_size);
++void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl,
++ struct image_info *image_info);
++
++/* Power management APIs */
++enum mhi_pm_state __must_check mhi_tryset_pm_state(
++ struct mhi_controller *mhi_cntrl,
++ enum mhi_pm_state state);
++const char *to_mhi_pm_state_str(u32 state);
++int mhi_queue_state_transition(struct mhi_controller *mhi_cntrl,
++ enum dev_st_transition state);
++void mhi_pm_st_worker(struct work_struct *work);
++void mhi_pm_sys_err_handler(struct mhi_controller *mhi_cntrl);
++int mhi_ready_state_transition(struct mhi_controller *mhi_cntrl);
++int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl);
++void mhi_pm_m1_transition(struct mhi_controller *mhi_cntrl);
++int mhi_pm_m3_transition(struct mhi_controller *mhi_cntrl);
++int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl);
++int mhi_send_cmd(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
++ enum mhi_cmd_type cmd);
++int mhi_download_amss_image(struct mhi_controller *mhi_cntrl);
++static inline bool mhi_is_active(struct mhi_controller *mhi_cntrl)
++{
++ return (mhi_cntrl->dev_state >= MHI_STATE_M0 &&
++ mhi_cntrl->dev_state <= MHI_STATE_M3_FAST);
++}
++
++static inline void mhi_trigger_resume(struct mhi_controller *mhi_cntrl)
++{
++ pm_wakeup_event(&mhi_cntrl->mhi_dev->dev, 0);
++ mhi_cntrl->runtime_get(mhi_cntrl);
++ mhi_cntrl->runtime_put(mhi_cntrl);
++}
++
++/* Register access methods */
++void mhi_db_brstmode(struct mhi_controller *mhi_cntrl, struct db_cfg *db_cfg,
++ void __iomem *db_addr, dma_addr_t db_val);
++void mhi_db_brstmode_disable(struct mhi_controller *mhi_cntrl,
++ struct db_cfg *db_mode, void __iomem *db_addr,
++ dma_addr_t db_val);
++int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl,
++ void __iomem *base, u32 offset, u32 *out);
++int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
++ void __iomem *base, u32 offset, u32 mask,
++ u32 shift, u32 *out);
++int __must_check mhi_poll_reg_field(struct mhi_controller *mhi_cntrl,
++ void __iomem *base, u32 offset, u32 mask,
++ u32 shift, u32 val, u32 delayus);
++void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base,
++ u32 offset, u32 val);
++void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base,
++ u32 offset, u32 mask, u32 shift, u32 val);
++void mhi_ring_er_db(struct mhi_event *mhi_event);
++void mhi_write_db(struct mhi_controller *mhi_cntrl, void __iomem *db_addr,
++ dma_addr_t db_val);
++void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd);
++void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan);
++
++/* Initialization methods */
++int mhi_init_mmio(struct mhi_controller *mhi_cntrl);
++int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl);
++void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl);
++int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl);
++void mhi_deinit_free_irq(struct mhi_controller *mhi_cntrl);
++void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
++ struct image_info *img_info);
++void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
++int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan);
++int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan);
++void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan);
++void mhi_reset_chan(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan);
++
++/* Event processing methods */
++void mhi_ctrl_ev_task(unsigned long data);
++void mhi_ev_task(unsigned long data);
++int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl,
++ struct mhi_event *mhi_event, u32 event_quota);
++int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl,
++ struct mhi_event *mhi_event, u32 event_quota);
++
++/* ISR handlers */
++irqreturn_t mhi_irq_handler(int irq_number, void *dev);
++irqreturn_t mhi_intvec_threaded_handler(int irq_number, void *dev);
++irqreturn_t mhi_intvec_handler(int irq_number, void *dev);
++
++int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
++ struct mhi_buf_info *info, enum mhi_flags flags);
++int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info);
++int mhi_map_single_use_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info);
++void mhi_unmap_single_no_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info);
++void mhi_unmap_single_use_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info);
++
++#endif /* _MHI_INT_H */
+diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c
+new file mode 100644
+index 0000000000000..6b36689999427
+--- /dev/null
++++ b/drivers/bus/mhi/host/main.c
+@@ -0,0 +1,1673 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++ *
++ */
++
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/dma-direction.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/list.h>
++#include <linux/mhi.h>
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/slab.h>
++#include "internal.h"
++
++int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl,
++ void __iomem *base, u32 offset, u32 *out)
++{
++ return mhi_cntrl->read_reg(mhi_cntrl, base + offset, out);
++}
++
++int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
++ void __iomem *base, u32 offset,
++ u32 mask, u32 shift, u32 *out)
++{
++ u32 tmp;
++ int ret;
++
++ ret = mhi_read_reg(mhi_cntrl, base, offset, &tmp);
++ if (ret)
++ return ret;
++
++ *out = (tmp & mask) >> shift;
++
++ return 0;
++}
++
++int __must_check mhi_poll_reg_field(struct mhi_controller *mhi_cntrl,
++ void __iomem *base, u32 offset,
++ u32 mask, u32 shift, u32 val, u32 delayus)
++{
++ int ret;
++ u32 out, retry = (mhi_cntrl->timeout_ms * 1000) / delayus;
++
++ while (retry--) {
++ ret = mhi_read_reg_field(mhi_cntrl, base, offset, mask, shift,
++ &out);
++ if (ret)
++ return ret;
++
++ if (out == val)
++ return 0;
++
++ fsleep(delayus);
++ }
++
++ return -ETIMEDOUT;
++}
++
++void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base,
++ u32 offset, u32 val)
++{
++ mhi_cntrl->write_reg(mhi_cntrl, base + offset, val);
++}
++
++void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base,
++ u32 offset, u32 mask, u32 shift, u32 val)
++{
++ int ret;
++ u32 tmp;
++
++ ret = mhi_read_reg(mhi_cntrl, base, offset, &tmp);
++ if (ret)
++ return;
++
++ tmp &= ~mask;
++ tmp |= (val << shift);
++ mhi_write_reg(mhi_cntrl, base, offset, tmp);
++}
++
++void mhi_write_db(struct mhi_controller *mhi_cntrl, void __iomem *db_addr,
++ dma_addr_t db_val)
++{
++ mhi_write_reg(mhi_cntrl, db_addr, 4, upper_32_bits(db_val));
++ mhi_write_reg(mhi_cntrl, db_addr, 0, lower_32_bits(db_val));
++}
++
++void mhi_db_brstmode(struct mhi_controller *mhi_cntrl,
++ struct db_cfg *db_cfg,
++ void __iomem *db_addr,
++ dma_addr_t db_val)
++{
++ if (db_cfg->db_mode) {
++ db_cfg->db_val = db_val;
++ mhi_write_db(mhi_cntrl, db_addr, db_val);
++ db_cfg->db_mode = 0;
++ }
++}
++
++void mhi_db_brstmode_disable(struct mhi_controller *mhi_cntrl,
++ struct db_cfg *db_cfg,
++ void __iomem *db_addr,
++ dma_addr_t db_val)
++{
++ db_cfg->db_val = db_val;
++ mhi_write_db(mhi_cntrl, db_addr, db_val);
++}
++
++void mhi_ring_er_db(struct mhi_event *mhi_event)
++{
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ mhi_event->db_cfg.process_db(mhi_event->mhi_cntrl, &mhi_event->db_cfg,
++ ring->db_addr, le64_to_cpu(*ring->ctxt_wp));
++}
++
++void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd)
++{
++ dma_addr_t db;
++ struct mhi_ring *ring = &mhi_cmd->ring;
++
++ db = ring->iommu_base + (ring->wp - ring->base);
++ *ring->ctxt_wp = cpu_to_le64(db);
++ mhi_write_db(mhi_cntrl, ring->db_addr, db);
++}
++
++void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan)
++{
++ struct mhi_ring *ring = &mhi_chan->tre_ring;
++ dma_addr_t db;
++
++ db = ring->iommu_base + (ring->wp - ring->base);
++
++ /*
++ * Writes to the new ring element must be visible to the hardware
++ * before letting h/w know there is new element to fetch.
++ */
++ dma_wmb();
++ *ring->ctxt_wp = cpu_to_le64(db);
++
++ mhi_chan->db_cfg.process_db(mhi_cntrl, &mhi_chan->db_cfg,
++ ring->db_addr, db);
++}
++
++enum mhi_ee_type mhi_get_exec_env(struct mhi_controller *mhi_cntrl)
++{
++ u32 exec;
++ int ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_EXECENV, &exec);
++
++ return (ret) ? MHI_EE_MAX : exec;
++}
++EXPORT_SYMBOL_GPL(mhi_get_exec_env);
++
++enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl)
++{
++ u32 state;
++ int ret = mhi_read_reg_field(mhi_cntrl, mhi_cntrl->regs, MHISTATUS,
++ MHISTATUS_MHISTATE_MASK,
++ MHISTATUS_MHISTATE_SHIFT, &state);
++ return ret ? MHI_STATE_MAX : state;
++}
++EXPORT_SYMBOL_GPL(mhi_get_mhi_state);
++
++void mhi_soc_reset(struct mhi_controller *mhi_cntrl)
++{
++ if (mhi_cntrl->reset) {
++ mhi_cntrl->reset(mhi_cntrl);
++ return;
++ }
++
++ /* Generic MHI SoC reset */
++ mhi_write_reg(mhi_cntrl, mhi_cntrl->regs, MHI_SOC_RESET_REQ_OFFSET,
++ MHI_SOC_RESET_REQ);
++}
++EXPORT_SYMBOL_GPL(mhi_soc_reset);
++
++int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info)
++{
++ buf_info->p_addr = dma_map_single(mhi_cntrl->cntrl_dev,
++ buf_info->v_addr, buf_info->len,
++ buf_info->dir);
++ if (dma_mapping_error(mhi_cntrl->cntrl_dev, buf_info->p_addr))
++ return -ENOMEM;
++
++ return 0;
++}
++
++int mhi_map_single_use_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info)
++{
++ void *buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, buf_info->len,
++ &buf_info->p_addr, GFP_ATOMIC);
++
++ if (!buf)
++ return -ENOMEM;
++
++ if (buf_info->dir == DMA_TO_DEVICE)
++ memcpy(buf, buf_info->v_addr, buf_info->len);
++
++ buf_info->bb_addr = buf;
++
++ return 0;
++}
++
++void mhi_unmap_single_no_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info)
++{
++ dma_unmap_single(mhi_cntrl->cntrl_dev, buf_info->p_addr, buf_info->len,
++ buf_info->dir);
++}
++
++void mhi_unmap_single_use_bb(struct mhi_controller *mhi_cntrl,
++ struct mhi_buf_info *buf_info)
++{
++ if (buf_info->dir == DMA_FROM_DEVICE)
++ memcpy(buf_info->v_addr, buf_info->bb_addr, buf_info->len);
++
++ dma_free_coherent(mhi_cntrl->cntrl_dev, buf_info->len,
++ buf_info->bb_addr, buf_info->p_addr);
++}
++
++static int get_nr_avail_ring_elements(struct mhi_controller *mhi_cntrl,
++ struct mhi_ring *ring)
++{
++ int nr_el;
++
++ if (ring->wp < ring->rp) {
++ nr_el = ((ring->rp - ring->wp) / ring->el_size) - 1;
++ } else {
++ nr_el = (ring->rp - ring->base) / ring->el_size;
++ nr_el += ((ring->base + ring->len - ring->wp) /
++ ring->el_size) - 1;
++ }
++
++ return nr_el;
++}
++
++static void *mhi_to_virtual(struct mhi_ring *ring, dma_addr_t addr)
++{
++ return (addr - ring->iommu_base) + ring->base;
++}
++
++static void mhi_add_ring_element(struct mhi_controller *mhi_cntrl,
++ struct mhi_ring *ring)
++{
++ ring->wp += ring->el_size;
++ if (ring->wp >= (ring->base + ring->len))
++ ring->wp = ring->base;
++ /* smp update */
++ smp_wmb();
++}
++
++static void mhi_del_ring_element(struct mhi_controller *mhi_cntrl,
++ struct mhi_ring *ring)
++{
++ ring->rp += ring->el_size;
++ if (ring->rp >= (ring->base + ring->len))
++ ring->rp = ring->base;
++ /* smp update */
++ smp_wmb();
++}
++
++static bool is_valid_ring_ptr(struct mhi_ring *ring, dma_addr_t addr)
++{
++ return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len;
++}
++
++int mhi_destroy_device(struct device *dev, void *data)
++{
++ struct mhi_chan *ul_chan, *dl_chan;
++ struct mhi_device *mhi_dev;
++ struct mhi_controller *mhi_cntrl;
++ enum mhi_ee_type ee = MHI_EE_MAX;
++
++ if (dev->bus != &mhi_bus_type)
++ return 0;
++
++ mhi_dev = to_mhi_device(dev);
++ mhi_cntrl = mhi_dev->mhi_cntrl;
++
++ /* Only destroy virtual devices thats attached to bus */
++ if (mhi_dev->dev_type == MHI_DEVICE_CONTROLLER)
++ return 0;
++
++ ul_chan = mhi_dev->ul_chan;
++ dl_chan = mhi_dev->dl_chan;
++
++ /*
++ * If execution environment is specified, remove only those devices that
++ * started in them based on ee_mask for the channels as we move on to a
++ * different execution environment
++ */
++ if (data)
++ ee = *(enum mhi_ee_type *)data;
++
++ /*
++ * For the suspend and resume case, this function will get called
++ * without mhi_unregister_controller(). Hence, we need to drop the
++ * references to mhi_dev created for ul and dl channels. We can
++ * be sure that there will be no instances of mhi_dev left after
++ * this.
++ */
++ if (ul_chan) {
++ if (ee != MHI_EE_MAX && !(ul_chan->ee_mask & BIT(ee)))
++ return 0;
++
++ put_device(&ul_chan->mhi_dev->dev);
++ }
++
++ if (dl_chan) {
++ if (ee != MHI_EE_MAX && !(dl_chan->ee_mask & BIT(ee)))
++ return 0;
++
++ put_device(&dl_chan->mhi_dev->dev);
++ }
++
++ dev_dbg(&mhi_cntrl->mhi_dev->dev, "destroy device for chan:%s\n",
++ mhi_dev->name);
++
++ /* Notify the client and remove the device from MHI bus */
++ device_del(dev);
++ put_device(dev);
++
++ return 0;
++}
++
++int mhi_get_free_desc_count(struct mhi_device *mhi_dev,
++ enum dma_data_direction dir)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ?
++ mhi_dev->ul_chan : mhi_dev->dl_chan;
++ struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
++
++ return get_nr_avail_ring_elements(mhi_cntrl, tre_ring);
++}
++EXPORT_SYMBOL_GPL(mhi_get_free_desc_count);
++
++void mhi_notify(struct mhi_device *mhi_dev, enum mhi_callback cb_reason)
++{
++ struct mhi_driver *mhi_drv;
++
++ if (!mhi_dev->dev.driver)
++ return;
++
++ mhi_drv = to_mhi_driver(mhi_dev->dev.driver);
++
++ if (mhi_drv->status_cb)
++ mhi_drv->status_cb(mhi_dev, cb_reason);
++}
++EXPORT_SYMBOL_GPL(mhi_notify);
++
++/* Bind MHI channels to MHI devices */
++void mhi_create_devices(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_chan *mhi_chan;
++ struct mhi_device *mhi_dev;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ int i, ret;
++
++ mhi_chan = mhi_cntrl->mhi_chan;
++ for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
++ if (!mhi_chan->configured || mhi_chan->mhi_dev ||
++ !(mhi_chan->ee_mask & BIT(mhi_cntrl->ee)))
++ continue;
++ mhi_dev = mhi_alloc_device(mhi_cntrl);
++ if (IS_ERR(mhi_dev))
++ return;
++
++ mhi_dev->dev_type = MHI_DEVICE_XFER;
++ switch (mhi_chan->dir) {
++ case DMA_TO_DEVICE:
++ mhi_dev->ul_chan = mhi_chan;
++ mhi_dev->ul_chan_id = mhi_chan->chan;
++ break;
++ case DMA_FROM_DEVICE:
++ /* We use dl_chan as offload channels */
++ mhi_dev->dl_chan = mhi_chan;
++ mhi_dev->dl_chan_id = mhi_chan->chan;
++ break;
++ default:
++ dev_err(dev, "Direction not supported\n");
++ put_device(&mhi_dev->dev);
++ return;
++ }
++
++ get_device(&mhi_dev->dev);
++ mhi_chan->mhi_dev = mhi_dev;
++
++ /* Check next channel if it matches */
++ if ((i + 1) < mhi_cntrl->max_chan && mhi_chan[1].configured) {
++ if (!strcmp(mhi_chan[1].name, mhi_chan->name)) {
++ i++;
++ mhi_chan++;
++ if (mhi_chan->dir == DMA_TO_DEVICE) {
++ mhi_dev->ul_chan = mhi_chan;
++ mhi_dev->ul_chan_id = mhi_chan->chan;
++ } else {
++ mhi_dev->dl_chan = mhi_chan;
++ mhi_dev->dl_chan_id = mhi_chan->chan;
++ }
++ get_device(&mhi_dev->dev);
++ mhi_chan->mhi_dev = mhi_dev;
++ }
++ }
++
++ /* Channel name is same for both UL and DL */
++ mhi_dev->name = mhi_chan->name;
++ dev_set_name(&mhi_dev->dev, "%s_%s",
++ dev_name(&mhi_cntrl->mhi_dev->dev),
++ mhi_dev->name);
++
++ /* Init wakeup source if available */
++ if (mhi_dev->dl_chan && mhi_dev->dl_chan->wake_capable)
++ device_init_wakeup(&mhi_dev->dev, true);
++
++ ret = device_add(&mhi_dev->dev);
++ if (ret)
++ put_device(&mhi_dev->dev);
++ }
++}
++
++irqreturn_t mhi_irq_handler(int irq_number, void *dev)
++{
++ struct mhi_event *mhi_event = dev;
++ struct mhi_controller *mhi_cntrl = mhi_event->mhi_cntrl;
++ struct mhi_event_ctxt *er_ctxt =
++ &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index];
++ struct mhi_ring *ev_ring = &mhi_event->ring;
++ dma_addr_t ptr = le64_to_cpu(er_ctxt->rp);
++ void *dev_rp;
++
++ if (!is_valid_ring_ptr(ev_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event ring rp points outside of the event ring\n");
++ return IRQ_HANDLED;
++ }
++
++ dev_rp = mhi_to_virtual(ev_ring, ptr);
++
++ /* Only proceed if event ring has pending events */
++ if (ev_ring->rp == dev_rp)
++ return IRQ_HANDLED;
++
++ /* For client managed event ring, notify pending data */
++ if (mhi_event->cl_manage) {
++ struct mhi_chan *mhi_chan = mhi_event->mhi_chan;
++ struct mhi_device *mhi_dev = mhi_chan->mhi_dev;
++
++ if (mhi_dev)
++ mhi_notify(mhi_dev, MHI_CB_PENDING_DATA);
++ } else {
++ tasklet_schedule(&mhi_event->task);
++ }
++
++ return IRQ_HANDLED;
++}
++
++irqreturn_t mhi_intvec_threaded_handler(int irq_number, void *priv)
++{
++ struct mhi_controller *mhi_cntrl = priv;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ enum mhi_state state;
++ enum mhi_pm_state pm_state = 0;
++ enum mhi_ee_type ee;
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ goto exit_intvec;
++ }
++
++ state = mhi_get_mhi_state(mhi_cntrl);
++ ee = mhi_get_exec_env(mhi_cntrl);
++ dev_dbg(dev, "local ee: %s state: %s device ee: %s state: %s\n",
++ TO_MHI_EXEC_STR(mhi_cntrl->ee),
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state),
++ TO_MHI_EXEC_STR(ee), TO_MHI_STATE_STR(state));
++
++ if (state == MHI_STATE_SYS_ERR) {
++ dev_dbg(dev, "System error detected\n");
++ pm_state = mhi_tryset_pm_state(mhi_cntrl,
++ MHI_PM_SYS_ERR_DETECT);
++ }
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ if (pm_state != MHI_PM_SYS_ERR_DETECT)
++ goto exit_intvec;
++
++ switch (ee) {
++ case MHI_EE_RDDM:
++ /* proceed if power down is not already in progress */
++ if (mhi_cntrl->rddm_image && mhi_is_active(mhi_cntrl)) {
++ mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_EE_RDDM);
++ mhi_cntrl->ee = ee;
++ wake_up_all(&mhi_cntrl->state_event);
++ }
++ break;
++ case MHI_EE_PBL:
++ case MHI_EE_EDL:
++ case MHI_EE_PTHRU:
++ mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_FATAL_ERROR);
++ mhi_cntrl->ee = ee;
++ wake_up_all(&mhi_cntrl->state_event);
++ mhi_pm_sys_err_handler(mhi_cntrl);
++ break;
++ default:
++ wake_up_all(&mhi_cntrl->state_event);
++ mhi_pm_sys_err_handler(mhi_cntrl);
++ break;
++ }
++
++exit_intvec:
++
++ return IRQ_HANDLED;
++}
++
++irqreturn_t mhi_intvec_handler(int irq_number, void *dev)
++{
++ struct mhi_controller *mhi_cntrl = dev;
++
++ /* Wake up events waiting for state change */
++ wake_up_all(&mhi_cntrl->state_event);
++
++ return IRQ_WAKE_THREAD;
++}
++
++static void mhi_recycle_ev_ring_element(struct mhi_controller *mhi_cntrl,
++ struct mhi_ring *ring)
++{
++ dma_addr_t ctxt_wp;
++
++ /* Update the WP */
++ ring->wp += ring->el_size;
++ ctxt_wp = le64_to_cpu(*ring->ctxt_wp) + ring->el_size;
++
++ if (ring->wp >= (ring->base + ring->len)) {
++ ring->wp = ring->base;
++ ctxt_wp = ring->iommu_base;
++ }
++
++ *ring->ctxt_wp = cpu_to_le64(ctxt_wp);
++
++ /* Update the RP */
++ ring->rp += ring->el_size;
++ if (ring->rp >= (ring->base + ring->len))
++ ring->rp = ring->base;
++
++ /* Update to all cores */
++ smp_wmb();
++}
++
++static int parse_xfer_event(struct mhi_controller *mhi_cntrl,
++ struct mhi_tre *event,
++ struct mhi_chan *mhi_chan)
++{
++ struct mhi_ring *buf_ring, *tre_ring;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ struct mhi_result result;
++ unsigned long flags = 0;
++ u32 ev_code;
++
++ ev_code = MHI_TRE_GET_EV_CODE(event);
++ buf_ring = &mhi_chan->buf_ring;
++ tre_ring = &mhi_chan->tre_ring;
++
++ result.transaction_status = (ev_code == MHI_EV_CC_OVERFLOW) ?
++ -EOVERFLOW : 0;
++
++ /*
++ * If it's a DB Event then we need to grab the lock
++ * with preemption disabled and as a write because we
++ * have to update db register and there are chances that
++ * another thread could be doing the same.
++ */
++ if (ev_code >= MHI_EV_CC_OOB)
++ write_lock_irqsave(&mhi_chan->lock, flags);
++ else
++ read_lock_bh(&mhi_chan->lock);
++
++ if (mhi_chan->ch_state != MHI_CH_STATE_ENABLED)
++ goto end_process_tx_event;
++
++ switch (ev_code) {
++ case MHI_EV_CC_OVERFLOW:
++ case MHI_EV_CC_EOB:
++ case MHI_EV_CC_EOT:
++ {
++ dma_addr_t ptr = MHI_TRE_GET_EV_PTR(event);
++ struct mhi_tre *local_rp, *ev_tre;
++ void *dev_rp;
++ struct mhi_buf_info *buf_info;
++ u16 xfer_len;
++
++ if (!is_valid_ring_ptr(tre_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event element points outside of the tre ring\n");
++ break;
++ }
++ /* Get the TRB this event points to */
++ ev_tre = mhi_to_virtual(tre_ring, ptr);
++
++ dev_rp = ev_tre + 1;
++ if (dev_rp >= (tre_ring->base + tre_ring->len))
++ dev_rp = tre_ring->base;
++
++ result.dir = mhi_chan->dir;
++
++ local_rp = tre_ring->rp;
++ while (local_rp != dev_rp) {
++ buf_info = buf_ring->rp;
++ /* If it's the last TRE, get length from the event */
++ if (local_rp == ev_tre)
++ xfer_len = MHI_TRE_GET_EV_LEN(event);
++ else
++ xfer_len = buf_info->len;
++
++ /* Unmap if it's not pre-mapped by client */
++ if (likely(!buf_info->pre_mapped))
++ mhi_cntrl->unmap_single(mhi_cntrl, buf_info);
++
++ result.buf_addr = buf_info->cb_buf;
++
++ /* truncate to buf len if xfer_len is larger */
++ result.bytes_xferd =
++ min_t(u16, xfer_len, buf_info->len);
++ mhi_del_ring_element(mhi_cntrl, buf_ring);
++ mhi_del_ring_element(mhi_cntrl, tre_ring);
++ local_rp = tre_ring->rp;
++
++ /* notify client */
++ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++
++ if (mhi_chan->dir == DMA_TO_DEVICE) {
++ atomic_dec(&mhi_cntrl->pending_pkts);
++ /* Release the reference got from mhi_queue() */
++ mhi_cntrl->runtime_put(mhi_cntrl);
++ }
++
++ /*
++ * Recycle the buffer if buffer is pre-allocated,
++ * if there is an error, not much we can do apart
++ * from dropping the packet
++ */
++ if (mhi_chan->pre_alloc) {
++ if (mhi_queue_buf(mhi_chan->mhi_dev,
++ mhi_chan->dir,
++ buf_info->cb_buf,
++ buf_info->len, MHI_EOT)) {
++ dev_err(dev,
++ "Error recycling buffer for chan:%d\n",
++ mhi_chan->chan);
++ kfree(buf_info->cb_buf);
++ }
++ }
++ }
++ break;
++ } /* CC_EOT */
++ case MHI_EV_CC_OOB:
++ case MHI_EV_CC_DB_MODE:
++ {
++ unsigned long pm_lock_flags;
++
++ mhi_chan->db_cfg.db_mode = 1;
++ read_lock_irqsave(&mhi_cntrl->pm_lock, pm_lock_flags);
++ if (tre_ring->wp != tre_ring->rp &&
++ MHI_DB_ACCESS_VALID(mhi_cntrl)) {
++ mhi_ring_chan_db(mhi_cntrl, mhi_chan);
++ }
++ read_unlock_irqrestore(&mhi_cntrl->pm_lock, pm_lock_flags);
++ break;
++ }
++ case MHI_EV_CC_BAD_TRE:
++ default:
++ dev_err(dev, "Unknown event 0x%x\n", ev_code);
++ break;
++ } /* switch(MHI_EV_READ_CODE(EV_TRB_CODE,event)) */
++
++end_process_tx_event:
++ if (ev_code >= MHI_EV_CC_OOB)
++ write_unlock_irqrestore(&mhi_chan->lock, flags);
++ else
++ read_unlock_bh(&mhi_chan->lock);
++
++ return 0;
++}
++
++static int parse_rsc_event(struct mhi_controller *mhi_cntrl,
++ struct mhi_tre *event,
++ struct mhi_chan *mhi_chan)
++{
++ struct mhi_ring *buf_ring, *tre_ring;
++ struct mhi_buf_info *buf_info;
++ struct mhi_result result;
++ int ev_code;
++ u32 cookie; /* offset to local descriptor */
++ u16 xfer_len;
++
++ buf_ring = &mhi_chan->buf_ring;
++ tre_ring = &mhi_chan->tre_ring;
++
++ ev_code = MHI_TRE_GET_EV_CODE(event);
++ cookie = MHI_TRE_GET_EV_COOKIE(event);
++ xfer_len = MHI_TRE_GET_EV_LEN(event);
++
++ /* Received out of bound cookie */
++ WARN_ON(cookie >= buf_ring->len);
++
++ buf_info = buf_ring->base + cookie;
++
++ result.transaction_status = (ev_code == MHI_EV_CC_OVERFLOW) ?
++ -EOVERFLOW : 0;
++
++ /* truncate to buf len if xfer_len is larger */
++ result.bytes_xferd = min_t(u16, xfer_len, buf_info->len);
++ result.buf_addr = buf_info->cb_buf;
++ result.dir = mhi_chan->dir;
++
++ read_lock_bh(&mhi_chan->lock);
++
++ if (mhi_chan->ch_state != MHI_CH_STATE_ENABLED)
++ goto end_process_rsc_event;
++
++ WARN_ON(!buf_info->used);
++
++ /* notify the client */
++ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++
++ /*
++ * Note: We're arbitrarily incrementing RP even though, completion
++ * packet we processed might not be the same one, reason we can do this
++ * is because device guaranteed to cache descriptors in order it
++ * receive, so even though completion event is different we can re-use
++ * all descriptors in between.
++ * Example:
++ * Transfer Ring has descriptors: A, B, C, D
++ * Last descriptor host queue is D (WP) and first descriptor
++ * host queue is A (RP).
++ * The completion event we just serviced is descriptor C.
++ * Then we can safely queue descriptors to replace A, B, and C
++ * even though host did not receive any completions.
++ */
++ mhi_del_ring_element(mhi_cntrl, tre_ring);
++ buf_info->used = false;
++
++end_process_rsc_event:
++ read_unlock_bh(&mhi_chan->lock);
++
++ return 0;
++}
++
++static void mhi_process_cmd_completion(struct mhi_controller *mhi_cntrl,
++ struct mhi_tre *tre)
++{
++ dma_addr_t ptr = MHI_TRE_GET_EV_PTR(tre);
++ struct mhi_cmd *cmd_ring = &mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING];
++ struct mhi_ring *mhi_ring = &cmd_ring->ring;
++ struct mhi_tre *cmd_pkt;
++ struct mhi_chan *mhi_chan;
++ u32 chan;
++
++ if (!is_valid_ring_ptr(mhi_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event element points outside of the cmd ring\n");
++ return;
++ }
++
++ cmd_pkt = mhi_to_virtual(mhi_ring, ptr);
++
++ chan = MHI_TRE_GET_CMD_CHID(cmd_pkt);
++
++ if (chan < mhi_cntrl->max_chan &&
++ mhi_cntrl->mhi_chan[chan].configured) {
++ mhi_chan = &mhi_cntrl->mhi_chan[chan];
++ write_lock_bh(&mhi_chan->lock);
++ mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
++ complete(&mhi_chan->completion);
++ write_unlock_bh(&mhi_chan->lock);
++ } else {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Completion packet for invalid channel ID: %d\n", chan);
++ }
++
++ mhi_del_ring_element(mhi_cntrl, mhi_ring);
++}
++
++int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl,
++ struct mhi_event *mhi_event,
++ u32 event_quota)
++{
++ struct mhi_tre *dev_rp, *local_rp;
++ struct mhi_ring *ev_ring = &mhi_event->ring;
++ struct mhi_event_ctxt *er_ctxt =
++ &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index];
++ struct mhi_chan *mhi_chan;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ u32 chan;
++ int count = 0;
++ dma_addr_t ptr = le64_to_cpu(er_ctxt->rp);
++
++ /*
++ * This is a quick check to avoid unnecessary event processing
++ * in case MHI is already in error state, but it's still possible
++ * to transition to error state while processing events
++ */
++ if (unlikely(MHI_EVENT_ACCESS_INVALID(mhi_cntrl->pm_state)))
++ return -EIO;
++
++ if (!is_valid_ring_ptr(ev_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event ring rp points outside of the event ring\n");
++ return -EIO;
++ }
++
++ dev_rp = mhi_to_virtual(ev_ring, ptr);
++ local_rp = ev_ring->rp;
++
++ while (dev_rp != local_rp) {
++ enum mhi_pkt_type type = MHI_TRE_GET_EV_TYPE(local_rp);
++
++ switch (type) {
++ case MHI_PKT_TYPE_BW_REQ_EVENT:
++ {
++ struct mhi_link_info *link_info;
++
++ link_info = &mhi_cntrl->mhi_link_info;
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ link_info->target_link_speed =
++ MHI_TRE_GET_EV_LINKSPEED(local_rp);
++ link_info->target_link_width =
++ MHI_TRE_GET_EV_LINKWIDTH(local_rp);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ dev_dbg(dev, "Received BW_REQ event\n");
++ mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_BW_REQ);
++ break;
++ }
++ case MHI_PKT_TYPE_STATE_CHANGE_EVENT:
++ {
++ enum mhi_state new_state;
++
++ new_state = MHI_TRE_GET_EV_STATE(local_rp);
++
++ dev_dbg(dev, "State change event to state: %s\n",
++ TO_MHI_STATE_STR(new_state));
++
++ switch (new_state) {
++ case MHI_STATE_M0:
++ mhi_pm_m0_transition(mhi_cntrl);
++ break;
++ case MHI_STATE_M1:
++ mhi_pm_m1_transition(mhi_cntrl);
++ break;
++ case MHI_STATE_M3:
++ mhi_pm_m3_transition(mhi_cntrl);
++ break;
++ case MHI_STATE_SYS_ERR:
++ {
++ enum mhi_pm_state pm_state;
++
++ dev_dbg(dev, "System error detected\n");
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ pm_state = mhi_tryset_pm_state(mhi_cntrl,
++ MHI_PM_SYS_ERR_DETECT);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (pm_state == MHI_PM_SYS_ERR_DETECT)
++ mhi_pm_sys_err_handler(mhi_cntrl);
++ break;
++ }
++ default:
++ dev_err(dev, "Invalid state: %s\n",
++ TO_MHI_STATE_STR(new_state));
++ }
++
++ break;
++ }
++ case MHI_PKT_TYPE_CMD_COMPLETION_EVENT:
++ mhi_process_cmd_completion(mhi_cntrl, local_rp);
++ break;
++ case MHI_PKT_TYPE_EE_EVENT:
++ {
++ enum dev_st_transition st = DEV_ST_TRANSITION_MAX;
++ enum mhi_ee_type event = MHI_TRE_GET_EV_EXECENV(local_rp);
++
++ dev_dbg(dev, "Received EE event: %s\n",
++ TO_MHI_EXEC_STR(event));
++ switch (event) {
++ case MHI_EE_SBL:
++ st = DEV_ST_TRANSITION_SBL;
++ break;
++ case MHI_EE_WFW:
++ case MHI_EE_AMSS:
++ st = DEV_ST_TRANSITION_MISSION_MODE;
++ break;
++ case MHI_EE_FP:
++ st = DEV_ST_TRANSITION_FP;
++ break;
++ case MHI_EE_RDDM:
++ mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_EE_RDDM);
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ mhi_cntrl->ee = event;
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ wake_up_all(&mhi_cntrl->state_event);
++ break;
++ default:
++ dev_err(dev,
++ "Unhandled EE event: 0x%x\n", type);
++ }
++ if (st != DEV_ST_TRANSITION_MAX)
++ mhi_queue_state_transition(mhi_cntrl, st);
++
++ break;
++ }
++ case MHI_PKT_TYPE_TX_EVENT:
++ chan = MHI_TRE_GET_EV_CHID(local_rp);
++
++ WARN_ON(chan >= mhi_cntrl->max_chan);
++
++ /*
++ * Only process the event ring elements whose channel
++ * ID is within the maximum supported range.
++ */
++ if (chan < mhi_cntrl->max_chan) {
++ mhi_chan = &mhi_cntrl->mhi_chan[chan];
++ if (!mhi_chan->configured)
++ break;
++ parse_xfer_event(mhi_cntrl, local_rp, mhi_chan);
++ event_quota--;
++ }
++ break;
++ default:
++ dev_err(dev, "Unhandled event type: %d\n", type);
++ break;
++ }
++
++ mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring);
++ local_rp = ev_ring->rp;
++
++ ptr = le64_to_cpu(er_ctxt->rp);
++ if (!is_valid_ring_ptr(ev_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event ring rp points outside of the event ring\n");
++ return -EIO;
++ }
++
++ dev_rp = mhi_to_virtual(ev_ring, ptr);
++ count++;
++ }
++
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
++ mhi_ring_er_db(mhi_event);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ return count;
++}
++
++int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl,
++ struct mhi_event *mhi_event,
++ u32 event_quota)
++{
++ struct mhi_tre *dev_rp, *local_rp;
++ struct mhi_ring *ev_ring = &mhi_event->ring;
++ struct mhi_event_ctxt *er_ctxt =
++ &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index];
++ int count = 0;
++ u32 chan;
++ struct mhi_chan *mhi_chan;
++ dma_addr_t ptr = le64_to_cpu(er_ctxt->rp);
++
++ if (unlikely(MHI_EVENT_ACCESS_INVALID(mhi_cntrl->pm_state)))
++ return -EIO;
++
++ if (!is_valid_ring_ptr(ev_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event ring rp points outside of the event ring\n");
++ return -EIO;
++ }
++
++ dev_rp = mhi_to_virtual(ev_ring, ptr);
++ local_rp = ev_ring->rp;
++
++ while (dev_rp != local_rp && event_quota > 0) {
++ enum mhi_pkt_type type = MHI_TRE_GET_EV_TYPE(local_rp);
++
++ chan = MHI_TRE_GET_EV_CHID(local_rp);
++
++ WARN_ON(chan >= mhi_cntrl->max_chan);
++
++ /*
++ * Only process the event ring elements whose channel
++ * ID is within the maximum supported range.
++ */
++ if (chan < mhi_cntrl->max_chan &&
++ mhi_cntrl->mhi_chan[chan].configured) {
++ mhi_chan = &mhi_cntrl->mhi_chan[chan];
++
++ if (likely(type == MHI_PKT_TYPE_TX_EVENT)) {
++ parse_xfer_event(mhi_cntrl, local_rp, mhi_chan);
++ event_quota--;
++ } else if (type == MHI_PKT_TYPE_RSC_TX_EVENT) {
++ parse_rsc_event(mhi_cntrl, local_rp, mhi_chan);
++ event_quota--;
++ }
++ }
++
++ mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring);
++ local_rp = ev_ring->rp;
++
++ ptr = le64_to_cpu(er_ctxt->rp);
++ if (!is_valid_ring_ptr(ev_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event ring rp points outside of the event ring\n");
++ return -EIO;
++ }
++
++ dev_rp = mhi_to_virtual(ev_ring, ptr);
++ count++;
++ }
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
++ mhi_ring_er_db(mhi_event);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ return count;
++}
++
++void mhi_ev_task(unsigned long data)
++{
++ struct mhi_event *mhi_event = (struct mhi_event *)data;
++ struct mhi_controller *mhi_cntrl = mhi_event->mhi_cntrl;
++
++ /* process all pending events */
++ spin_lock_bh(&mhi_event->lock);
++ mhi_event->process_event(mhi_cntrl, mhi_event, U32_MAX);
++ spin_unlock_bh(&mhi_event->lock);
++}
++
++void mhi_ctrl_ev_task(unsigned long data)
++{
++ struct mhi_event *mhi_event = (struct mhi_event *)data;
++ struct mhi_controller *mhi_cntrl = mhi_event->mhi_cntrl;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ enum mhi_state state;
++ enum mhi_pm_state pm_state = 0;
++ int ret;
++
++ /*
++ * We can check PM state w/o a lock here because there is no way
++ * PM state can change from reg access valid to no access while this
++ * thread being executed.
++ */
++ if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
++ /*
++ * We may have a pending event but not allowed to
++ * process it since we are probably in a suspended state,
++ * so trigger a resume.
++ */
++ mhi_trigger_resume(mhi_cntrl);
++
++ return;
++ }
++
++ /* Process ctrl events events */
++ ret = mhi_event->process_event(mhi_cntrl, mhi_event, U32_MAX);
++
++ /*
++ * We received an IRQ but no events to process, maybe device went to
++ * SYS_ERR state? Check the state to confirm.
++ */
++ if (!ret) {
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ state = mhi_get_mhi_state(mhi_cntrl);
++ if (state == MHI_STATE_SYS_ERR) {
++ dev_dbg(dev, "System error detected\n");
++ pm_state = mhi_tryset_pm_state(mhi_cntrl,
++ MHI_PM_SYS_ERR_DETECT);
++ }
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (pm_state == MHI_PM_SYS_ERR_DETECT)
++ mhi_pm_sys_err_handler(mhi_cntrl);
++ }
++}
++
++static bool mhi_is_ring_full(struct mhi_controller *mhi_cntrl,
++ struct mhi_ring *ring)
++{
++ void *tmp = ring->wp + ring->el_size;
++
++ if (tmp >= (ring->base + ring->len))
++ tmp = ring->base;
++
++ return (tmp == ring->rp);
++}
++
++static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info,
++ enum dma_data_direction dir, enum mhi_flags mflags)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
++ mhi_dev->dl_chan;
++ struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
++ unsigned long flags;
++ int ret;
++
++ if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)))
++ return -EIO;
++
++ read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
++
++ ret = mhi_is_ring_full(mhi_cntrl, tre_ring);
++ if (unlikely(ret)) {
++ ret = -EAGAIN;
++ goto exit_unlock;
++ }
++
++ ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags);
++ if (unlikely(ret))
++ goto exit_unlock;
++
++ /* Packet is queued, take a usage ref to exit M3 if necessary
++ * for host->device buffer, balanced put is done on buffer completion
++ * for device->host buffer, balanced put is after ringing the DB
++ */
++ mhi_cntrl->runtime_get(mhi_cntrl);
++
++ /* Assert dev_wake (to exit/prevent M1/M2)*/
++ mhi_cntrl->wake_toggle(mhi_cntrl);
++
++ if (mhi_chan->dir == DMA_TO_DEVICE)
++ atomic_inc(&mhi_cntrl->pending_pkts);
++
++ if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
++ mhi_ring_chan_db(mhi_cntrl, mhi_chan);
++
++ if (dir == DMA_FROM_DEVICE)
++ mhi_cntrl->runtime_put(mhi_cntrl);
++
++exit_unlock:
++ read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags);
++
++ return ret;
++}
++
++int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir,
++ struct sk_buff *skb, size_t len, enum mhi_flags mflags)
++{
++ struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
++ mhi_dev->dl_chan;
++ struct mhi_buf_info buf_info = { };
++
++ buf_info.v_addr = skb->data;
++ buf_info.cb_buf = skb;
++ buf_info.len = len;
++
++ if (unlikely(mhi_chan->pre_alloc))
++ return -EINVAL;
++
++ return mhi_queue(mhi_dev, &buf_info, dir, mflags);
++}
++EXPORT_SYMBOL_GPL(mhi_queue_skb);
++
++int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir,
++ struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags)
++{
++ struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
++ mhi_dev->dl_chan;
++ struct mhi_buf_info buf_info = { };
++
++ buf_info.p_addr = mhi_buf->dma_addr;
++ buf_info.cb_buf = mhi_buf;
++ buf_info.pre_mapped = true;
++ buf_info.len = len;
++
++ if (unlikely(mhi_chan->pre_alloc))
++ return -EINVAL;
++
++ return mhi_queue(mhi_dev, &buf_info, dir, mflags);
++}
++EXPORT_SYMBOL_GPL(mhi_queue_dma);
++
++int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
++ struct mhi_buf_info *info, enum mhi_flags flags)
++{
++ struct mhi_ring *buf_ring, *tre_ring;
++ struct mhi_tre *mhi_tre;
++ struct mhi_buf_info *buf_info;
++ int eot, eob, chain, bei;
++ int ret;
++
++ buf_ring = &mhi_chan->buf_ring;
++ tre_ring = &mhi_chan->tre_ring;
++
++ buf_info = buf_ring->wp;
++ WARN_ON(buf_info->used);
++ buf_info->pre_mapped = info->pre_mapped;
++ if (info->pre_mapped)
++ buf_info->p_addr = info->p_addr;
++ else
++ buf_info->v_addr = info->v_addr;
++ buf_info->cb_buf = info->cb_buf;
++ buf_info->wp = tre_ring->wp;
++ buf_info->dir = mhi_chan->dir;
++ buf_info->len = info->len;
++
++ if (!info->pre_mapped) {
++ ret = mhi_cntrl->map_single(mhi_cntrl, buf_info);
++ if (ret)
++ return ret;
++ }
++
++ eob = !!(flags & MHI_EOB);
++ eot = !!(flags & MHI_EOT);
++ chain = !!(flags & MHI_CHAIN);
++ bei = !!(mhi_chan->intmod);
++
++ mhi_tre = tre_ring->wp;
++ mhi_tre->ptr = MHI_TRE_DATA_PTR(buf_info->p_addr);
++ mhi_tre->dword[0] = MHI_TRE_DATA_DWORD0(info->len);
++ mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(bei, eot, eob, chain);
++
++ /* increment WP */
++ mhi_add_ring_element(mhi_cntrl, tre_ring);
++ mhi_add_ring_element(mhi_cntrl, buf_ring);
++
++ return 0;
++}
++
++int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir,
++ void *buf, size_t len, enum mhi_flags mflags)
++{
++ struct mhi_buf_info buf_info = { };
++
++ buf_info.v_addr = buf;
++ buf_info.cb_buf = buf;
++ buf_info.len = len;
++
++ return mhi_queue(mhi_dev, &buf_info, dir, mflags);
++}
++EXPORT_SYMBOL_GPL(mhi_queue_buf);
++
++bool mhi_queue_is_full(struct mhi_device *mhi_dev, enum dma_data_direction dir)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ?
++ mhi_dev->ul_chan : mhi_dev->dl_chan;
++ struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
++
++ return mhi_is_ring_full(mhi_cntrl, tre_ring);
++}
++EXPORT_SYMBOL_GPL(mhi_queue_is_full);
++
++int mhi_send_cmd(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan,
++ enum mhi_cmd_type cmd)
++{
++ struct mhi_tre *cmd_tre = NULL;
++ struct mhi_cmd *mhi_cmd = &mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING];
++ struct mhi_ring *ring = &mhi_cmd->ring;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ int chan = 0;
++
++ if (mhi_chan)
++ chan = mhi_chan->chan;
++
++ spin_lock_bh(&mhi_cmd->lock);
++ if (!get_nr_avail_ring_elements(mhi_cntrl, ring)) {
++ spin_unlock_bh(&mhi_cmd->lock);
++ return -ENOMEM;
++ }
++
++ /* prepare the cmd tre */
++ cmd_tre = ring->wp;
++ switch (cmd) {
++ case MHI_CMD_RESET_CHAN:
++ cmd_tre->ptr = MHI_TRE_CMD_RESET_PTR;
++ cmd_tre->dword[0] = MHI_TRE_CMD_RESET_DWORD0;
++ cmd_tre->dword[1] = MHI_TRE_CMD_RESET_DWORD1(chan);
++ break;
++ case MHI_CMD_STOP_CHAN:
++ cmd_tre->ptr = MHI_TRE_CMD_STOP_PTR;
++ cmd_tre->dword[0] = MHI_TRE_CMD_STOP_DWORD0;
++ cmd_tre->dword[1] = MHI_TRE_CMD_STOP_DWORD1(chan);
++ break;
++ case MHI_CMD_START_CHAN:
++ cmd_tre->ptr = MHI_TRE_CMD_START_PTR;
++ cmd_tre->dword[0] = MHI_TRE_CMD_START_DWORD0;
++ cmd_tre->dword[1] = MHI_TRE_CMD_START_DWORD1(chan);
++ break;
++ default:
++ dev_err(dev, "Command not supported\n");
++ break;
++ }
++
++ /* queue to hardware */
++ mhi_add_ring_element(mhi_cntrl, ring);
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl)))
++ mhi_ring_cmd_db(mhi_cntrl, mhi_cmd);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++ spin_unlock_bh(&mhi_cmd->lock);
++
++ return 0;
++}
++
++static int mhi_update_channel_state(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan,
++ enum mhi_ch_state_type to_state)
++{
++ struct device *dev = &mhi_chan->mhi_dev->dev;
++ enum mhi_cmd_type cmd = MHI_CMD_NOP;
++ int ret;
++
++ dev_dbg(dev, "%d: Updating channel state to: %s\n", mhi_chan->chan,
++ TO_CH_STATE_TYPE_STR(to_state));
++
++ switch (to_state) {
++ case MHI_CH_STATE_TYPE_RESET:
++ write_lock_irq(&mhi_chan->lock);
++ if (mhi_chan->ch_state != MHI_CH_STATE_STOP &&
++ mhi_chan->ch_state != MHI_CH_STATE_ENABLED &&
++ mhi_chan->ch_state != MHI_CH_STATE_SUSPENDED) {
++ write_unlock_irq(&mhi_chan->lock);
++ return -EINVAL;
++ }
++ mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
++ write_unlock_irq(&mhi_chan->lock);
++
++ cmd = MHI_CMD_RESET_CHAN;
++ break;
++ case MHI_CH_STATE_TYPE_STOP:
++ if (mhi_chan->ch_state != MHI_CH_STATE_ENABLED)
++ return -EINVAL;
++
++ cmd = MHI_CMD_STOP_CHAN;
++ break;
++ case MHI_CH_STATE_TYPE_START:
++ if (mhi_chan->ch_state != MHI_CH_STATE_STOP &&
++ mhi_chan->ch_state != MHI_CH_STATE_DISABLED)
++ return -EINVAL;
++
++ cmd = MHI_CMD_START_CHAN;
++ break;
++ default:
++ dev_err(dev, "%d: Channel state update to %s not allowed\n",
++ mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
++ return -EINVAL;
++ }
++
++ /* bring host and device out of suspended states */
++ ret = mhi_device_get_sync(mhi_cntrl->mhi_dev);
++ if (ret)
++ return ret;
++ mhi_cntrl->runtime_get(mhi_cntrl);
++
++ reinit_completion(&mhi_chan->completion);
++ ret = mhi_send_cmd(mhi_cntrl, mhi_chan, cmd);
++ if (ret) {
++ dev_err(dev, "%d: Failed to send %s channel command\n",
++ mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
++ goto exit_channel_update;
++ }
++
++ ret = wait_for_completion_timeout(&mhi_chan->completion,
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++ if (!ret || mhi_chan->ccs != MHI_EV_CC_SUCCESS) {
++ dev_err(dev,
++ "%d: Failed to receive %s channel command completion\n",
++ mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
++ ret = -EIO;
++ goto exit_channel_update;
++ }
++
++ ret = 0;
++
++ if (to_state != MHI_CH_STATE_TYPE_RESET) {
++ write_lock_irq(&mhi_chan->lock);
++ mhi_chan->ch_state = (to_state == MHI_CH_STATE_TYPE_START) ?
++ MHI_CH_STATE_ENABLED : MHI_CH_STATE_STOP;
++ write_unlock_irq(&mhi_chan->lock);
++ }
++
++ dev_dbg(dev, "%d: Channel state change to %s successful\n",
++ mhi_chan->chan, TO_CH_STATE_TYPE_STR(to_state));
++
++exit_channel_update:
++ mhi_cntrl->runtime_put(mhi_cntrl);
++ mhi_device_put(mhi_cntrl->mhi_dev);
++
++ return ret;
++}
++
++static void mhi_unprepare_channel(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan)
++{
++ int ret;
++ struct device *dev = &mhi_chan->mhi_dev->dev;
++
++ mutex_lock(&mhi_chan->mutex);
++
++ if (!(BIT(mhi_cntrl->ee) & mhi_chan->ee_mask)) {
++ dev_dbg(dev, "Current EE: %s Required EE Mask: 0x%x\n",
++ TO_MHI_EXEC_STR(mhi_cntrl->ee), mhi_chan->ee_mask);
++ goto exit_unprepare_channel;
++ }
++
++ /* no more processing events for this channel */
++ ret = mhi_update_channel_state(mhi_cntrl, mhi_chan,
++ MHI_CH_STATE_TYPE_RESET);
++ if (ret)
++ dev_err(dev, "%d: Failed to reset channel, still resetting\n",
++ mhi_chan->chan);
++
++exit_unprepare_channel:
++ write_lock_irq(&mhi_chan->lock);
++ mhi_chan->ch_state = MHI_CH_STATE_DISABLED;
++ write_unlock_irq(&mhi_chan->lock);
++
++ if (!mhi_chan->offload_ch) {
++ mhi_reset_chan(mhi_cntrl, mhi_chan);
++ mhi_deinit_chan_ctxt(mhi_cntrl, mhi_chan);
++ }
++ dev_dbg(dev, "%d: successfully reset\n", mhi_chan->chan);
++
++ mutex_unlock(&mhi_chan->mutex);
++}
++
++int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan)
++{
++ int ret = 0;
++ struct device *dev = &mhi_chan->mhi_dev->dev;
++
++ if (!(BIT(mhi_cntrl->ee) & mhi_chan->ee_mask)) {
++ dev_err(dev, "Current EE: %s Required EE Mask: 0x%x\n",
++ TO_MHI_EXEC_STR(mhi_cntrl->ee), mhi_chan->ee_mask);
++ return -ENOTCONN;
++ }
++
++ mutex_lock(&mhi_chan->mutex);
++
++ /* Check of client manages channel context for offload channels */
++ if (!mhi_chan->offload_ch) {
++ ret = mhi_init_chan_ctxt(mhi_cntrl, mhi_chan);
++ if (ret)
++ goto error_init_chan;
++ }
++
++ ret = mhi_update_channel_state(mhi_cntrl, mhi_chan,
++ MHI_CH_STATE_TYPE_START);
++ if (ret)
++ goto error_pm_state;
++
++ /* Pre-allocate buffer for xfer ring */
++ if (mhi_chan->pre_alloc) {
++ int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
++ &mhi_chan->tre_ring);
++ size_t len = mhi_cntrl->buffer_len;
++
++ while (nr_el--) {
++ void *buf;
++ struct mhi_buf_info info = { };
++ buf = kmalloc(len, GFP_KERNEL);
++ if (!buf) {
++ ret = -ENOMEM;
++ goto error_pre_alloc;
++ }
++
++ /* Prepare transfer descriptors */
++ info.v_addr = buf;
++ info.cb_buf = buf;
++ info.len = len;
++ ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &info, MHI_EOT);
++ if (ret) {
++ kfree(buf);
++ goto error_pre_alloc;
++ }
++ }
++
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (MHI_DB_ACCESS_VALID(mhi_cntrl)) {
++ read_lock_irq(&mhi_chan->lock);
++ mhi_ring_chan_db(mhi_cntrl, mhi_chan);
++ read_unlock_irq(&mhi_chan->lock);
++ }
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++ }
++
++ mutex_unlock(&mhi_chan->mutex);
++
++ return 0;
++
++error_pm_state:
++ if (!mhi_chan->offload_ch)
++ mhi_deinit_chan_ctxt(mhi_cntrl, mhi_chan);
++
++error_init_chan:
++ mutex_unlock(&mhi_chan->mutex);
++
++ return ret;
++
++error_pre_alloc:
++ mutex_unlock(&mhi_chan->mutex);
++ mhi_unprepare_channel(mhi_cntrl, mhi_chan);
++
++ return ret;
++}
++
++static void mhi_mark_stale_events(struct mhi_controller *mhi_cntrl,
++ struct mhi_event *mhi_event,
++ struct mhi_event_ctxt *er_ctxt,
++ int chan)
++
++{
++ struct mhi_tre *dev_rp, *local_rp;
++ struct mhi_ring *ev_ring;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ unsigned long flags;
++ dma_addr_t ptr;
++
++ dev_dbg(dev, "Marking all events for chan: %d as stale\n", chan);
++
++ ev_ring = &mhi_event->ring;
++
++ /* mark all stale events related to channel as STALE event */
++ spin_lock_irqsave(&mhi_event->lock, flags);
++
++ ptr = le64_to_cpu(er_ctxt->rp);
++ if (!is_valid_ring_ptr(ev_ring, ptr)) {
++ dev_err(&mhi_cntrl->mhi_dev->dev,
++ "Event ring rp points outside of the event ring\n");
++ dev_rp = ev_ring->rp;
++ } else {
++ dev_rp = mhi_to_virtual(ev_ring, ptr);
++ }
++
++ local_rp = ev_ring->rp;
++ while (dev_rp != local_rp) {
++ if (MHI_TRE_GET_EV_TYPE(local_rp) == MHI_PKT_TYPE_TX_EVENT &&
++ chan == MHI_TRE_GET_EV_CHID(local_rp))
++ local_rp->dword[1] = MHI_TRE_EV_DWORD1(chan,
++ MHI_PKT_TYPE_STALE_EVENT);
++ local_rp++;
++ if (local_rp == (ev_ring->base + ev_ring->len))
++ local_rp = ev_ring->base;
++ }
++
++ dev_dbg(dev, "Finished marking events as stale events\n");
++ spin_unlock_irqrestore(&mhi_event->lock, flags);
++}
++
++static void mhi_reset_data_chan(struct mhi_controller *mhi_cntrl,
++ struct mhi_chan *mhi_chan)
++{
++ struct mhi_ring *buf_ring, *tre_ring;
++ struct mhi_result result;
++
++ /* Reset any pending buffers */
++ buf_ring = &mhi_chan->buf_ring;
++ tre_ring = &mhi_chan->tre_ring;
++ result.transaction_status = -ENOTCONN;
++ result.bytes_xferd = 0;
++ while (tre_ring->rp != tre_ring->wp) {
++ struct mhi_buf_info *buf_info = buf_ring->rp;
++
++ if (mhi_chan->dir == DMA_TO_DEVICE) {
++ atomic_dec(&mhi_cntrl->pending_pkts);
++ /* Release the reference got from mhi_queue() */
++ mhi_cntrl->runtime_put(mhi_cntrl);
++ }
++
++ if (!buf_info->pre_mapped)
++ mhi_cntrl->unmap_single(mhi_cntrl, buf_info);
++
++ mhi_del_ring_element(mhi_cntrl, buf_ring);
++ mhi_del_ring_element(mhi_cntrl, tre_ring);
++
++ if (mhi_chan->pre_alloc) {
++ kfree(buf_info->cb_buf);
++ } else {
++ result.buf_addr = buf_info->cb_buf;
++ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
++ }
++ }
++}
++
++void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan)
++{
++ struct mhi_event *mhi_event;
++ struct mhi_event_ctxt *er_ctxt;
++ int chan = mhi_chan->chan;
++
++ /* Nothing to reset, client doesn't queue buffers */
++ if (mhi_chan->offload_ch)
++ return;
++
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ mhi_event = &mhi_cntrl->mhi_event[mhi_chan->er_index];
++ er_ctxt = &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_chan->er_index];
++
++ mhi_mark_stale_events(mhi_cntrl, mhi_event, er_ctxt, chan);
++
++ mhi_reset_data_chan(mhi_cntrl, mhi_chan);
++
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++}
++
++/* Move channel to start state */
++int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
++{
++ int ret, dir;
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct mhi_chan *mhi_chan;
++
++ for (dir = 0; dir < 2; dir++) {
++ mhi_chan = dir ? mhi_dev->dl_chan : mhi_dev->ul_chan;
++ if (!mhi_chan)
++ continue;
++
++ ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
++ if (ret)
++ goto error_open_chan;
++ }
++
++ return 0;
++
++error_open_chan:
++ for (--dir; dir >= 0; dir--) {
++ mhi_chan = dir ? mhi_dev->dl_chan : mhi_dev->ul_chan;
++ if (!mhi_chan)
++ continue;
++
++ mhi_unprepare_channel(mhi_cntrl, mhi_chan);
++ }
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(mhi_prepare_for_transfer);
++
++void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct mhi_chan *mhi_chan;
++ int dir;
++
++ for (dir = 0; dir < 2; dir++) {
++ mhi_chan = dir ? mhi_dev->ul_chan : mhi_dev->dl_chan;
++ if (!mhi_chan)
++ continue;
++
++ mhi_unprepare_channel(mhi_cntrl, mhi_chan);
++ }
++}
++EXPORT_SYMBOL_GPL(mhi_unprepare_from_transfer);
++
++int mhi_poll(struct mhi_device *mhi_dev, u32 budget)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ struct mhi_chan *mhi_chan = mhi_dev->dl_chan;
++ struct mhi_event *mhi_event = &mhi_cntrl->mhi_event[mhi_chan->er_index];
++ int ret;
++
++ spin_lock_bh(&mhi_event->lock);
++ ret = mhi_event->process_event(mhi_cntrl, mhi_event, budget);
++ spin_unlock_bh(&mhi_event->lock);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(mhi_poll);
+diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
+new file mode 100644
+index 0000000000000..b780990faf806
+--- /dev/null
++++ b/drivers/bus/mhi/host/pci_generic.c
+@@ -0,0 +1,1146 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * MHI PCI driver - MHI over PCI controller driver
++ *
++ * This module is a generic driver for registering MHI-over-PCI devices,
++ * such as PCIe QCOM modems.
++ *
++ * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
++ */
++
++#include <linux/aer.h>
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/mhi.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++#include <linux/pm_runtime.h>
++#include <linux/timer.h>
++#include <linux/workqueue.h>
++
++#define MHI_PCI_DEFAULT_BAR_NUM 0
++
++#define MHI_POST_RESET_DELAY_MS 2000
++
++#define HEALTH_CHECK_PERIOD (HZ * 2)
++
++/**
++ * struct mhi_pci_dev_info - MHI PCI device specific information
++ * @config: MHI controller configuration
++ * @name: name of the PCI module
++ * @fw: firmware path (if any)
++ * @edl: emergency download mode firmware path (if any)
++ * @bar_num: PCI base address register to use for MHI MMIO register space
++ * @dma_data_width: DMA transfer word size (32 or 64 bits)
++ * @mru_default: default MRU size for MBIM network packets
++ * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
++ * of inband wake support (such as sdx24)
++ */
++struct mhi_pci_dev_info {
++ const struct mhi_controller_config *config;
++ const char *name;
++ const char *fw;
++ const char *edl;
++ unsigned int bar_num;
++ unsigned int dma_data_width;
++ unsigned int mru_default;
++ bool sideband_wake;
++};
++
++#define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_TO_DEVICE, \
++ .ee_mask = BIT(MHI_EE_AMSS), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_DISABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = false, \
++ } \
++
++#define MHI_CHANNEL_CONFIG_DL(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_FROM_DEVICE, \
++ .ee_mask = BIT(MHI_EE_AMSS), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_DISABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = false, \
++ }
++
++#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_FROM_DEVICE, \
++ .ee_mask = BIT(MHI_EE_AMSS), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_DISABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = false, \
++ .auto_queue = true, \
++ }
++
++#define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \
++ { \
++ .num_elements = el_count, \
++ .irq_moderation_ms = 0, \
++ .irq = (ev_ring) + 1, \
++ .priority = 1, \
++ .mode = MHI_DB_BRST_DISABLE, \
++ .data_type = MHI_ER_CTRL, \
++ .hardware_event = false, \
++ .client_managed = false, \
++ .offload_channel = false, \
++ }
++
++#define MHI_CHANNEL_CONFIG_HW_UL(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_TO_DEVICE, \
++ .ee_mask = BIT(MHI_EE_AMSS), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_ENABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = true, \
++ } \
++
++#define MHI_CHANNEL_CONFIG_HW_DL(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_FROM_DEVICE, \
++ .ee_mask = BIT(MHI_EE_AMSS), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_ENABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = true, \
++ }
++
++#define MHI_CHANNEL_CONFIG_UL_SBL(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_TO_DEVICE, \
++ .ee_mask = BIT(MHI_EE_SBL), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_DISABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = false, \
++ } \
++
++#define MHI_CHANNEL_CONFIG_DL_SBL(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_FROM_DEVICE, \
++ .ee_mask = BIT(MHI_EE_SBL), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_DISABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = false, \
++ }
++
++#define MHI_CHANNEL_CONFIG_UL_FP(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_TO_DEVICE, \
++ .ee_mask = BIT(MHI_EE_FP), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_DISABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = false, \
++ } \
++
++#define MHI_CHANNEL_CONFIG_DL_FP(ch_num, ch_name, el_count, ev_ring) \
++ { \
++ .num = ch_num, \
++ .name = ch_name, \
++ .num_elements = el_count, \
++ .event_ring = ev_ring, \
++ .dir = DMA_FROM_DEVICE, \
++ .ee_mask = BIT(MHI_EE_FP), \
++ .pollcfg = 0, \
++ .doorbell = MHI_DB_BRST_DISABLE, \
++ .lpm_notify = false, \
++ .offload_channel = false, \
++ .doorbell_mode_switch = false, \
++ }
++
++#define MHI_EVENT_CONFIG_DATA(ev_ring, el_count) \
++ { \
++ .num_elements = el_count, \
++ .irq_moderation_ms = 5, \
++ .irq = (ev_ring) + 1, \
++ .priority = 1, \
++ .mode = MHI_DB_BRST_DISABLE, \
++ .data_type = MHI_ER_DATA, \
++ .hardware_event = false, \
++ .client_managed = false, \
++ .offload_channel = false, \
++ }
++
++#define MHI_EVENT_CONFIG_HW_DATA(ev_ring, el_count, ch_num) \
++ { \
++ .num_elements = el_count, \
++ .irq_moderation_ms = 1, \
++ .irq = (ev_ring) + 1, \
++ .priority = 1, \
++ .mode = MHI_DB_BRST_DISABLE, \
++ .data_type = MHI_ER_DATA, \
++ .hardware_event = true, \
++ .client_managed = false, \
++ .offload_channel = false, \
++ .channel = ch_num, \
++ }
++
++static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = {
++ MHI_CHANNEL_CONFIG_UL(4, "DIAG", 16, 1),
++ MHI_CHANNEL_CONFIG_DL(5, "DIAG", 16, 1),
++ MHI_CHANNEL_CONFIG_UL(12, "MBIM", 4, 0),
++ MHI_CHANNEL_CONFIG_DL(13, "MBIM", 4, 0),
++ MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
++ MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
++ MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
++ MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0),
++ MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0),
++ MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0),
++ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
++ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 128, 3),
++};
++
++static struct mhi_event_config modem_qcom_v1_mhi_events[] = {
++ /* first ring is control+data ring */
++ MHI_EVENT_CONFIG_CTRL(0, 64),
++ /* DIAG dedicated event ring */
++ MHI_EVENT_CONFIG_DATA(1, 128),
++ /* Hardware channels request dedicated hardware event rings */
++ MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
++ MHI_EVENT_CONFIG_HW_DATA(3, 2048, 101)
++};
++
++static const struct mhi_controller_config modem_qcom_v1_mhiv_config = {
++ .max_channels = 128,
++ .timeout_ms = 8000,
++ .num_channels = ARRAY_SIZE(modem_qcom_v1_mhi_channels),
++ .ch_cfg = modem_qcom_v1_mhi_channels,
++ .num_events = ARRAY_SIZE(modem_qcom_v1_mhi_events),
++ .event_cfg = modem_qcom_v1_mhi_events,
++};
++
++static const struct mhi_pci_dev_info mhi_qcom_sdx65_info = {
++ .name = "qcom-sdx65m",
++ .fw = "qcom/sdx65m/xbl.elf",
++ .edl = "qcom/sdx65m/edl.mbn",
++ .config = &modem_qcom_v1_mhiv_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .sideband_wake = false,
++};
++
++static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
++ .name = "qcom-sdx55m",
++ .fw = "qcom/sdx55m/sbl1.mbn",
++ .edl = "qcom/sdx55m/edl.mbn",
++ .config = &modem_qcom_v1_mhiv_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .mru_default = 32768,
++ .sideband_wake = false,
++};
++
++static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = {
++ .name = "qcom-sdx24",
++ .edl = "qcom/prog_firehose_sdx24.mbn",
++ .config = &modem_qcom_v1_mhiv_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .sideband_wake = true,
++};
++
++static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = {
++ MHI_CHANNEL_CONFIG_UL(0, "NMEA", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(1, "NMEA", 32, 0),
++ MHI_CHANNEL_CONFIG_UL_SBL(2, "SAHARA", 32, 0),
++ MHI_CHANNEL_CONFIG_DL_SBL(3, "SAHARA", 32, 0),
++ MHI_CHANNEL_CONFIG_UL(4, "DIAG", 32, 1),
++ MHI_CHANNEL_CONFIG_DL(5, "DIAG", 32, 1),
++ MHI_CHANNEL_CONFIG_UL(12, "MBIM", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0),
++ MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
++ /* The EDL firmware is a flash-programmer exposing firehose protocol */
++ MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0),
++ MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0),
++ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2),
++ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3),
++};
++
++static struct mhi_event_config mhi_quectel_em1xx_events[] = {
++ MHI_EVENT_CONFIG_CTRL(0, 128),
++ MHI_EVENT_CONFIG_DATA(1, 128),
++ MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
++ MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101)
++};
++
++static const struct mhi_controller_config modem_quectel_em1xx_config = {
++ .max_channels = 128,
++ .timeout_ms = 20000,
++ .num_channels = ARRAY_SIZE(mhi_quectel_em1xx_channels),
++ .ch_cfg = mhi_quectel_em1xx_channels,
++ .num_events = ARRAY_SIZE(mhi_quectel_em1xx_events),
++ .event_cfg = mhi_quectel_em1xx_events,
++};
++
++static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = {
++ .name = "quectel-em1xx",
++ .edl = "qcom/prog_firehose_sdx24.mbn",
++ .config = &modem_quectel_em1xx_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .mru_default = 32768,
++ .sideband_wake = true,
++};
++
++static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = {
++ MHI_CHANNEL_CONFIG_UL(0, "LOOPBACK", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(1, "LOOPBACK", 32, 0),
++ MHI_CHANNEL_CONFIG_UL(4, "DIAG", 32, 1),
++ MHI_CHANNEL_CONFIG_DL(5, "DIAG", 32, 1),
++ MHI_CHANNEL_CONFIG_UL(12, "MBIM", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0),
++ MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
++ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2),
++ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3),
++};
++
++static struct mhi_event_config mhi_foxconn_sdx55_events[] = {
++ MHI_EVENT_CONFIG_CTRL(0, 128),
++ MHI_EVENT_CONFIG_DATA(1, 128),
++ MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
++ MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101)
++};
++
++static const struct mhi_controller_config modem_foxconn_sdx55_config = {
++ .max_channels = 128,
++ .timeout_ms = 20000,
++ .num_channels = ARRAY_SIZE(mhi_foxconn_sdx55_channels),
++ .ch_cfg = mhi_foxconn_sdx55_channels,
++ .num_events = ARRAY_SIZE(mhi_foxconn_sdx55_events),
++ .event_cfg = mhi_foxconn_sdx55_events,
++};
++
++static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = {
++ .name = "foxconn-sdx55",
++ .fw = "qcom/sdx55m/sbl1.mbn",
++ .edl = "qcom/sdx55m/edl.mbn",
++ .config = &modem_foxconn_sdx55_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .mru_default = 32768,
++ .sideband_wake = false,
++};
++
++static const struct mhi_channel_config mhi_mv31_channels[] = {
++ MHI_CHANNEL_CONFIG_UL(0, "LOOPBACK", 64, 0),
++ MHI_CHANNEL_CONFIG_DL(1, "LOOPBACK", 64, 0),
++ /* MBIM Control Channel */
++ MHI_CHANNEL_CONFIG_UL(12, "MBIM", 64, 0),
++ MHI_CHANNEL_CONFIG_DL(13, "MBIM", 64, 0),
++ /* MBIM Data Channel */
++ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 512, 2),
++ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 512, 3),
++};
++
++static struct mhi_event_config mhi_mv31_events[] = {
++ MHI_EVENT_CONFIG_CTRL(0, 256),
++ MHI_EVENT_CONFIG_DATA(1, 256),
++ MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
++ MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101),
++};
++
++static const struct mhi_controller_config modem_mv31_config = {
++ .max_channels = 128,
++ .timeout_ms = 20000,
++ .num_channels = ARRAY_SIZE(mhi_mv31_channels),
++ .ch_cfg = mhi_mv31_channels,
++ .num_events = ARRAY_SIZE(mhi_mv31_events),
++ .event_cfg = mhi_mv31_events,
++};
++
++static const struct mhi_pci_dev_info mhi_mv31_info = {
++ .name = "cinterion-mv31",
++ .config = &modem_mv31_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .mru_default = 32768,
++};
++
++static const struct mhi_channel_config mhi_telit_fn980_hw_v1_channels[] = {
++ MHI_CHANNEL_CONFIG_UL(14, "QMI", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(15, "QMI", 32, 0),
++ MHI_CHANNEL_CONFIG_UL(20, "IPCR", 16, 0),
++ MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 16, 0),
++ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 1),
++ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 128, 2),
++};
++
++static struct mhi_event_config mhi_telit_fn980_hw_v1_events[] = {
++ MHI_EVENT_CONFIG_CTRL(0, 128),
++ MHI_EVENT_CONFIG_HW_DATA(1, 1024, 100),
++ MHI_EVENT_CONFIG_HW_DATA(2, 2048, 101)
++};
++
++static struct mhi_controller_config modem_telit_fn980_hw_v1_config = {
++ .max_channels = 128,
++ .timeout_ms = 20000,
++ .num_channels = ARRAY_SIZE(mhi_telit_fn980_hw_v1_channels),
++ .ch_cfg = mhi_telit_fn980_hw_v1_channels,
++ .num_events = ARRAY_SIZE(mhi_telit_fn980_hw_v1_events),
++ .event_cfg = mhi_telit_fn980_hw_v1_events,
++};
++
++static const struct mhi_pci_dev_info mhi_telit_fn980_hw_v1_info = {
++ .name = "telit-fn980-hwv1",
++ .fw = "qcom/sdx55m/sbl1.mbn",
++ .edl = "qcom/sdx55m/edl.mbn",
++ .config = &modem_telit_fn980_hw_v1_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .mru_default = 32768,
++ .sideband_wake = false,
++};
++
++static const struct mhi_channel_config mhi_telit_fn990_channels[] = {
++ MHI_CHANNEL_CONFIG_UL_SBL(2, "SAHARA", 32, 0),
++ MHI_CHANNEL_CONFIG_DL_SBL(3, "SAHARA", 32, 0),
++ MHI_CHANNEL_CONFIG_UL(4, "DIAG", 64, 1),
++ MHI_CHANNEL_CONFIG_DL(5, "DIAG", 64, 1),
++ MHI_CHANNEL_CONFIG_UL(12, "MBIM", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0),
++ MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
++ MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
++ MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2),
++ MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3),
++};
++
++static struct mhi_event_config mhi_telit_fn990_events[] = {
++ MHI_EVENT_CONFIG_CTRL(0, 128),
++ MHI_EVENT_CONFIG_DATA(1, 128),
++ MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
++ MHI_EVENT_CONFIG_HW_DATA(3, 2048, 101)
++};
++
++static const struct mhi_controller_config modem_telit_fn990_config = {
++ .max_channels = 128,
++ .timeout_ms = 20000,
++ .num_channels = ARRAY_SIZE(mhi_telit_fn990_channels),
++ .ch_cfg = mhi_telit_fn990_channels,
++ .num_events = ARRAY_SIZE(mhi_telit_fn990_events),
++ .event_cfg = mhi_telit_fn990_events,
++};
++
++static const struct mhi_pci_dev_info mhi_telit_fn990_info = {
++ .name = "telit-fn990",
++ .config = &modem_telit_fn990_config,
++ .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
++ .dma_data_width = 32,
++ .sideband_wake = false,
++ .mru_default = 32768,
++};
++
++static const struct pci_device_id mhi_pci_id_table[] = {
++ /* Telit FN980 hardware revision v1 */
++ { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, 0x1C5D, 0x2000),
++ .driver_data = (kernel_ulong_t) &mhi_telit_fn980_hw_v1_info },
++ { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0306),
++ .driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info },
++ { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304),
++ .driver_data = (kernel_ulong_t) &mhi_qcom_sdx24_info },
++ /* Telit FN990 */
++ { PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0308, 0x1c5d, 0x2010),
++ .driver_data = (kernel_ulong_t) &mhi_telit_fn990_info },
++ { PCI_DEVICE(0x1eac, 0x1001), /* EM120R-GL (sdx24) */
++ .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info },
++ { PCI_DEVICE(0x1eac, 0x1002), /* EM160R-GL (sdx24) */
++ .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info },
++ { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0308),
++ .driver_data = (kernel_ulong_t) &mhi_qcom_sdx65_info },
++ /* T99W175 (sdx55), Both for eSIM and Non-eSIM */
++ { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0ab),
++ .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
++ /* DW5930e (sdx55), With eSIM, It's also T99W175 */
++ { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0b0),
++ .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
++ /* DW5930e (sdx55), Non-eSIM, It's also T99W175 */
++ { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0b1),
++ .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
++ /* MV31-W (Cinterion) */
++ { PCI_DEVICE(0x1269, 0x00b3),
++ .driver_data = (kernel_ulong_t) &mhi_mv31_info },
++ { }
++};
++MODULE_DEVICE_TABLE(pci, mhi_pci_id_table);
++
++enum mhi_pci_device_status {
++ MHI_PCI_DEV_STARTED,
++ MHI_PCI_DEV_SUSPENDED,
++};
++
++struct mhi_pci_device {
++ struct mhi_controller mhi_cntrl;
++ struct pci_saved_state *pci_state;
++ struct work_struct recovery_work;
++ struct timer_list health_check_timer;
++ unsigned long status;
++};
++
++static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
++ void __iomem *addr, u32 *out)
++{
++ *out = readl(addr);
++ return 0;
++}
++
++static void mhi_pci_write_reg(struct mhi_controller *mhi_cntrl,
++ void __iomem *addr, u32 val)
++{
++ writel(val, addr);
++}
++
++static void mhi_pci_status_cb(struct mhi_controller *mhi_cntrl,
++ enum mhi_callback cb)
++{
++ struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
++
++ /* Nothing to do for now */
++ switch (cb) {
++ case MHI_CB_FATAL_ERROR:
++ case MHI_CB_SYS_ERROR:
++ dev_warn(&pdev->dev, "firmware crashed (%u)\n", cb);
++ pm_runtime_forbid(&pdev->dev);
++ break;
++ case MHI_CB_EE_MISSION_MODE:
++ pm_runtime_allow(&pdev->dev);
++ break;
++ default:
++ break;
++ }
++}
++
++static void mhi_pci_wake_get_nop(struct mhi_controller *mhi_cntrl, bool force)
++{
++ /* no-op */
++}
++
++static void mhi_pci_wake_put_nop(struct mhi_controller *mhi_cntrl, bool override)
++{
++ /* no-op */
++}
++
++static void mhi_pci_wake_toggle_nop(struct mhi_controller *mhi_cntrl)
++{
++ /* no-op */
++}
++
++static bool mhi_pci_is_alive(struct mhi_controller *mhi_cntrl)
++{
++ struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
++ u16 vendor = 0;
++
++ if (pci_read_config_word(pdev, PCI_VENDOR_ID, &vendor))
++ return false;
++
++ if (vendor == (u16) ~0 || vendor == 0)
++ return false;
++
++ return true;
++}
++
++static int mhi_pci_claim(struct mhi_controller *mhi_cntrl,
++ unsigned int bar_num, u64 dma_mask)
++{
++ struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
++ int err;
++
++ err = pci_assign_resource(pdev, bar_num);
++ if (err)
++ return err;
++
++ err = pcim_enable_device(pdev);
++ if (err) {
++ dev_err(&pdev->dev, "failed to enable pci device: %d\n", err);
++ return err;
++ }
++
++ err = pcim_iomap_regions(pdev, 1 << bar_num, pci_name(pdev));
++ if (err) {
++ dev_err(&pdev->dev, "failed to map pci region: %d\n", err);
++ return err;
++ }
++ mhi_cntrl->regs = pcim_iomap_table(pdev)[bar_num];
++ mhi_cntrl->reg_len = pci_resource_len(pdev, bar_num);
++
++ err = pci_set_dma_mask(pdev, dma_mask);
++ if (err) {
++ dev_err(&pdev->dev, "Cannot set proper DMA mask\n");
++ return err;
++ }
++
++ err = pci_set_consistent_dma_mask(pdev, dma_mask);
++ if (err) {
++ dev_err(&pdev->dev, "set consistent dma mask failed\n");
++ return err;
++ }
++
++ pci_set_master(pdev);
++
++ return 0;
++}
++
++static int mhi_pci_get_irqs(struct mhi_controller *mhi_cntrl,
++ const struct mhi_controller_config *mhi_cntrl_config)
++{
++ struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
++ int nr_vectors, i;
++ int *irq;
++
++ /*
++ * Alloc one MSI vector for BHI + one vector per event ring, ideally...
++ * No explicit pci_free_irq_vectors required, done by pcim_release.
++ */
++ mhi_cntrl->nr_irqs = 1 + mhi_cntrl_config->num_events;
++
++ nr_vectors = pci_alloc_irq_vectors(pdev, 1, mhi_cntrl->nr_irqs, PCI_IRQ_MSI);
++ if (nr_vectors < 0) {
++ dev_err(&pdev->dev, "Error allocating MSI vectors %d\n",
++ nr_vectors);
++ return nr_vectors;
++ }
++
++ if (nr_vectors < mhi_cntrl->nr_irqs) {
++ dev_warn(&pdev->dev, "using shared MSI\n");
++
++ /* Patch msi vectors, use only one (shared) */
++ for (i = 0; i < mhi_cntrl_config->num_events; i++)
++ mhi_cntrl_config->event_cfg[i].irq = 0;
++ mhi_cntrl->nr_irqs = 1;
++ }
++
++ irq = devm_kcalloc(&pdev->dev, mhi_cntrl->nr_irqs, sizeof(int), GFP_KERNEL);
++ if (!irq)
++ return -ENOMEM;
++
++ for (i = 0; i < mhi_cntrl->nr_irqs; i++) {
++ int vector = i >= nr_vectors ? (nr_vectors - 1) : i;
++
++ irq[i] = pci_irq_vector(pdev, vector);
++ }
++
++ mhi_cntrl->irq = irq;
++
++ return 0;
++}
++
++static int mhi_pci_runtime_get(struct mhi_controller *mhi_cntrl)
++{
++ /* The runtime_get() MHI callback means:
++ * Do whatever is requested to leave M3.
++ */
++ return pm_runtime_get(mhi_cntrl->cntrl_dev);
++}
++
++static void mhi_pci_runtime_put(struct mhi_controller *mhi_cntrl)
++{
++ /* The runtime_put() MHI callback means:
++ * Device can be moved in M3 state.
++ */
++ pm_runtime_mark_last_busy(mhi_cntrl->cntrl_dev);
++ pm_runtime_put(mhi_cntrl->cntrl_dev);
++}
++
++static void mhi_pci_recovery_work(struct work_struct *work)
++{
++ struct mhi_pci_device *mhi_pdev = container_of(work, struct mhi_pci_device,
++ recovery_work);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++ struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
++ int err;
++
++ dev_warn(&pdev->dev, "device recovery started\n");
++
++ del_timer(&mhi_pdev->health_check_timer);
++ pm_runtime_forbid(&pdev->dev);
++
++ /* Clean up MHI state */
++ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
++ mhi_power_down(mhi_cntrl, false);
++ mhi_unprepare_after_power_down(mhi_cntrl);
++ }
++
++ pci_set_power_state(pdev, PCI_D0);
++ pci_load_saved_state(pdev, mhi_pdev->pci_state);
++ pci_restore_state(pdev);
++
++ if (!mhi_pci_is_alive(mhi_cntrl))
++ goto err_try_reset;
++
++ err = mhi_prepare_for_power_up(mhi_cntrl);
++ if (err)
++ goto err_try_reset;
++
++ err = mhi_sync_power_up(mhi_cntrl);
++ if (err)
++ goto err_unprepare;
++
++ dev_dbg(&pdev->dev, "Recovery completed\n");
++
++ set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
++ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
++ return;
++
++err_unprepare:
++ mhi_unprepare_after_power_down(mhi_cntrl);
++err_try_reset:
++ if (pci_reset_function(pdev))
++ dev_err(&pdev->dev, "Recovery failed\n");
++}
++
++static void health_check(struct timer_list *t)
++{
++ struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++
++ if (!test_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status) ||
++ test_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status))
++ return;
++
++ if (!mhi_pci_is_alive(mhi_cntrl)) {
++ dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
++ queue_work(system_long_wq, &mhi_pdev->recovery_work);
++ return;
++ }
++
++ /* reschedule in two seconds */
++ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
++}
++
++static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
++{
++ const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
++ const struct mhi_controller_config *mhi_cntrl_config;
++ struct mhi_pci_device *mhi_pdev;
++ struct mhi_controller *mhi_cntrl;
++ int err;
++
++ dev_dbg(&pdev->dev, "MHI PCI device found: %s\n", info->name);
++
++ /* mhi_pdev.mhi_cntrl must be zero-initialized */
++ mhi_pdev = devm_kzalloc(&pdev->dev, sizeof(*mhi_pdev), GFP_KERNEL);
++ if (!mhi_pdev)
++ return -ENOMEM;
++
++ INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
++ timer_setup(&mhi_pdev->health_check_timer, health_check, 0);
++
++ mhi_cntrl_config = info->config;
++ mhi_cntrl = &mhi_pdev->mhi_cntrl;
++
++ mhi_cntrl->cntrl_dev = &pdev->dev;
++ mhi_cntrl->iova_start = 0;
++ mhi_cntrl->iova_stop = (dma_addr_t)DMA_BIT_MASK(info->dma_data_width);
++ mhi_cntrl->fw_image = info->fw;
++ mhi_cntrl->edl_image = info->edl;
++
++ mhi_cntrl->read_reg = mhi_pci_read_reg;
++ mhi_cntrl->write_reg = mhi_pci_write_reg;
++ mhi_cntrl->status_cb = mhi_pci_status_cb;
++ mhi_cntrl->runtime_get = mhi_pci_runtime_get;
++ mhi_cntrl->runtime_put = mhi_pci_runtime_put;
++ mhi_cntrl->mru = info->mru_default;
++
++ if (info->sideband_wake) {
++ mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
++ mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
++ mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
++ }
++
++ err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
++ if (err)
++ return err;
++
++ err = mhi_pci_get_irqs(mhi_cntrl, mhi_cntrl_config);
++ if (err)
++ return err;
++
++ pci_set_drvdata(pdev, mhi_pdev);
++
++ /* Have stored pci confspace at hand for restore in sudden PCI error.
++ * cache the state locally and discard the PCI core one.
++ */
++ pci_save_state(pdev);
++ mhi_pdev->pci_state = pci_store_saved_state(pdev);
++ pci_load_saved_state(pdev, NULL);
++
++ pci_enable_pcie_error_reporting(pdev);
++
++ err = mhi_register_controller(mhi_cntrl, mhi_cntrl_config);
++ if (err)
++ goto err_disable_reporting;
++
++ /* MHI bus does not power up the controller by default */
++ err = mhi_prepare_for_power_up(mhi_cntrl);
++ if (err) {
++ dev_err(&pdev->dev, "failed to prepare MHI controller\n");
++ goto err_unregister;
++ }
++
++ err = mhi_sync_power_up(mhi_cntrl);
++ if (err) {
++ dev_err(&pdev->dev, "failed to power up MHI controller\n");
++ goto err_unprepare;
++ }
++
++ set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
++
++ /* start health check */
++ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
++
++ /* Only allow runtime-suspend if PME capable (for wakeup) */
++ if (pci_pme_capable(pdev, PCI_D3hot)) {
++ pm_runtime_set_autosuspend_delay(&pdev->dev, 2000);
++ pm_runtime_use_autosuspend(&pdev->dev);
++ pm_runtime_mark_last_busy(&pdev->dev);
++ pm_runtime_put_noidle(&pdev->dev);
++ }
++
++ return 0;
++
++err_unprepare:
++ mhi_unprepare_after_power_down(mhi_cntrl);
++err_unregister:
++ mhi_unregister_controller(mhi_cntrl);
++err_disable_reporting:
++ pci_disable_pcie_error_reporting(pdev);
++
++ return err;
++}
++
++static void mhi_pci_remove(struct pci_dev *pdev)
++{
++ struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++
++ del_timer_sync(&mhi_pdev->health_check_timer);
++ cancel_work_sync(&mhi_pdev->recovery_work);
++
++ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
++ mhi_power_down(mhi_cntrl, true);
++ mhi_unprepare_after_power_down(mhi_cntrl);
++ }
++
++ /* balancing probe put_noidle */
++ if (pci_pme_capable(pdev, PCI_D3hot))
++ pm_runtime_get_noresume(&pdev->dev);
++
++ mhi_unregister_controller(mhi_cntrl);
++ pci_disable_pcie_error_reporting(pdev);
++}
++
++static void mhi_pci_shutdown(struct pci_dev *pdev)
++{
++ mhi_pci_remove(pdev);
++ pci_set_power_state(pdev, PCI_D3hot);
++}
++
++static void mhi_pci_reset_prepare(struct pci_dev *pdev)
++{
++ struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++
++ dev_info(&pdev->dev, "reset\n");
++
++ del_timer(&mhi_pdev->health_check_timer);
++
++ /* Clean up MHI state */
++ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
++ mhi_power_down(mhi_cntrl, false);
++ mhi_unprepare_after_power_down(mhi_cntrl);
++ }
++
++ /* cause internal device reset */
++ mhi_soc_reset(mhi_cntrl);
++
++ /* Be sure device reset has been executed */
++ msleep(MHI_POST_RESET_DELAY_MS);
++}
++
++static void mhi_pci_reset_done(struct pci_dev *pdev)
++{
++ struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++ int err;
++
++ /* Restore initial known working PCI state */
++ pci_load_saved_state(pdev, mhi_pdev->pci_state);
++ pci_restore_state(pdev);
++
++ /* Is device status available ? */
++ if (!mhi_pci_is_alive(mhi_cntrl)) {
++ dev_err(&pdev->dev, "reset failed\n");
++ return;
++ }
++
++ err = mhi_prepare_for_power_up(mhi_cntrl);
++ if (err) {
++ dev_err(&pdev->dev, "failed to prepare MHI controller\n");
++ return;
++ }
++
++ err = mhi_sync_power_up(mhi_cntrl);
++ if (err) {
++ dev_err(&pdev->dev, "failed to power up MHI controller\n");
++ mhi_unprepare_after_power_down(mhi_cntrl);
++ return;
++ }
++
++ set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
++ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
++}
++
++static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
++ pci_channel_state_t state)
++{
++ struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++
++ dev_err(&pdev->dev, "PCI error detected, state = %u\n", state);
++
++ if (state == pci_channel_io_perm_failure)
++ return PCI_ERS_RESULT_DISCONNECT;
++
++ /* Clean up MHI state */
++ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
++ mhi_power_down(mhi_cntrl, false);
++ mhi_unprepare_after_power_down(mhi_cntrl);
++ } else {
++ /* Nothing to do */
++ return PCI_ERS_RESULT_RECOVERED;
++ }
++
++ pci_disable_device(pdev);
++
++ return PCI_ERS_RESULT_NEED_RESET;
++}
++
++static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)
++{
++ if (pci_enable_device(pdev)) {
++ dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");
++ return PCI_ERS_RESULT_DISCONNECT;
++ }
++
++ return PCI_ERS_RESULT_RECOVERED;
++}
++
++static void mhi_pci_io_resume(struct pci_dev *pdev)
++{
++ struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
++
++ dev_err(&pdev->dev, "PCI slot reset done\n");
++
++ queue_work(system_long_wq, &mhi_pdev->recovery_work);
++}
++
++static const struct pci_error_handlers mhi_pci_err_handler = {
++ .error_detected = mhi_pci_error_detected,
++ .slot_reset = mhi_pci_slot_reset,
++ .resume = mhi_pci_io_resume,
++ .reset_prepare = mhi_pci_reset_prepare,
++ .reset_done = mhi_pci_reset_done,
++};
++
++static int __maybe_unused mhi_pci_runtime_suspend(struct device *dev)
++{
++ struct pci_dev *pdev = to_pci_dev(dev);
++ struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++ int err;
++
++ if (test_and_set_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status))
++ return 0;
++
++ del_timer(&mhi_pdev->health_check_timer);
++ cancel_work_sync(&mhi_pdev->recovery_work);
++
++ if (!test_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status) ||
++ mhi_cntrl->ee != MHI_EE_AMSS)
++ goto pci_suspend; /* Nothing to do at MHI level */
++
++ /* Transition to M3 state */
++ err = mhi_pm_suspend(mhi_cntrl);
++ if (err) {
++ dev_err(&pdev->dev, "failed to suspend device: %d\n", err);
++ clear_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status);
++ return -EBUSY;
++ }
++
++pci_suspend:
++ pci_disable_device(pdev);
++ pci_wake_from_d3(pdev, true);
++
++ return 0;
++}
++
++static int __maybe_unused mhi_pci_runtime_resume(struct device *dev)
++{
++ struct pci_dev *pdev = to_pci_dev(dev);
++ struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++ int err;
++
++ if (!test_and_clear_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status))
++ return 0;
++
++ err = pci_enable_device(pdev);
++ if (err)
++ goto err_recovery;
++
++ pci_set_master(pdev);
++ pci_wake_from_d3(pdev, false);
++
++ if (!test_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status) ||
++ mhi_cntrl->ee != MHI_EE_AMSS)
++ return 0; /* Nothing to do at MHI level */
++
++ /* Exit M3, transition to M0 state */
++ err = mhi_pm_resume(mhi_cntrl);
++ if (err) {
++ dev_err(&pdev->dev, "failed to resume device: %d\n", err);
++ goto err_recovery;
++ }
++
++ /* Resume health check */
++ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
++
++ /* It can be a remote wakeup (no mhi runtime_get), update access time */
++ pm_runtime_mark_last_busy(dev);
++
++ return 0;
++
++err_recovery:
++ /* Do not fail to not mess up our PCI device state, the device likely
++ * lost power (d3cold) and we simply need to reset it from the recovery
++ * procedure, trigger the recovery asynchronously to prevent system
++ * suspend exit delaying.
++ */
++ queue_work(system_long_wq, &mhi_pdev->recovery_work);
++ pm_runtime_mark_last_busy(dev);
++
++ return 0;
++}
++
++static int __maybe_unused mhi_pci_suspend(struct device *dev)
++{
++ pm_runtime_disable(dev);
++ return mhi_pci_runtime_suspend(dev);
++}
++
++static int __maybe_unused mhi_pci_resume(struct device *dev)
++{
++ int ret;
++
++ /* Depending the platform, device may have lost power (d3cold), we need
++ * to resume it now to check its state and recover when necessary.
++ */
++ ret = mhi_pci_runtime_resume(dev);
++ pm_runtime_enable(dev);
++
++ return ret;
++}
++
++static int __maybe_unused mhi_pci_freeze(struct device *dev)
++{
++ struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
++ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
++
++ /* We want to stop all operations, hibernation does not guarantee that
++ * device will be in the same state as before freezing, especially if
++ * the intermediate restore kernel reinitializes MHI device with new
++ * context.
++ */
++ flush_work(&mhi_pdev->recovery_work);
++ if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
++ mhi_power_down(mhi_cntrl, true);
++ mhi_unprepare_after_power_down(mhi_cntrl);
++ }
++
++ return 0;
++}
++
++static int __maybe_unused mhi_pci_restore(struct device *dev)
++{
++ struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
++
++ /* Reinitialize the device */
++ queue_work(system_long_wq, &mhi_pdev->recovery_work);
++
++ return 0;
++}
++
++static const struct dev_pm_ops mhi_pci_pm_ops = {
++ SET_RUNTIME_PM_OPS(mhi_pci_runtime_suspend, mhi_pci_runtime_resume, NULL)
++#ifdef CONFIG_PM_SLEEP
++ .suspend = mhi_pci_suspend,
++ .resume = mhi_pci_resume,
++ .freeze = mhi_pci_freeze,
++ .thaw = mhi_pci_restore,
++ .poweroff = mhi_pci_freeze,
++ .restore = mhi_pci_restore,
++#endif
++};
++
++static struct pci_driver mhi_pci_driver = {
++ .name = "mhi-pci-generic",
++ .id_table = mhi_pci_id_table,
++ .probe = mhi_pci_probe,
++ .remove = mhi_pci_remove,
++ .shutdown = mhi_pci_shutdown,
++ .err_handler = &mhi_pci_err_handler,
++ .driver.pm = &mhi_pci_pm_ops
++};
++module_pci_driver(mhi_pci_driver);
++
++MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
++MODULE_DESCRIPTION("Modem Host Interface (MHI) PCI controller driver");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c
+new file mode 100644
+index 0000000000000..470dddca025dc
+--- /dev/null
++++ b/drivers/bus/mhi/host/pm.c
+@@ -0,0 +1,1266 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++ *
++ */
++
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/dma-direction.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/list.h>
++#include <linux/mhi.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/wait.h>
++#include "internal.h"
++
++/*
++ * Not all MHI state transitions are synchronous. Transitions like Linkdown,
++ * SYS_ERR, and shutdown can happen anytime asynchronously. This function will
++ * transition to a new state only if we're allowed to.
++ *
++ * Priority increases as we go down. For instance, from any state in L0, the
++ * transition can be made to states in L1, L2 and L3. A notable exception to
++ * this rule is state DISABLE. From DISABLE state we can only transition to
++ * POR state. Also, while in L2 state, user cannot jump back to previous
++ * L1 or L0 states.
++ *
++ * Valid transitions:
++ * L0: DISABLE <--> POR
++ * POR <--> POR
++ * POR -> M0 -> M2 --> M0
++ * POR -> FW_DL_ERR
++ * FW_DL_ERR <--> FW_DL_ERR
++ * M0 <--> M0
++ * M0 -> FW_DL_ERR
++ * M0 -> M3_ENTER -> M3 -> M3_EXIT --> M0
++ * L1: SYS_ERR_DETECT -> SYS_ERR_PROCESS --> POR
++ * L2: SHUTDOWN_PROCESS -> LD_ERR_FATAL_DETECT
++ * SHUTDOWN_PROCESS -> DISABLE
++ * L3: LD_ERR_FATAL_DETECT <--> LD_ERR_FATAL_DETECT
++ * LD_ERR_FATAL_DETECT -> DISABLE
++ */
++static struct mhi_pm_transitions const dev_state_transitions[] = {
++ /* L0 States */
++ {
++ MHI_PM_DISABLE,
++ MHI_PM_POR
++ },
++ {
++ MHI_PM_POR,
++ MHI_PM_POR | MHI_PM_DISABLE | MHI_PM_M0 |
++ MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
++ MHI_PM_LD_ERR_FATAL_DETECT | MHI_PM_FW_DL_ERR
++ },
++ {
++ MHI_PM_M0,
++ MHI_PM_M0 | MHI_PM_M2 | MHI_PM_M3_ENTER |
++ MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
++ MHI_PM_LD_ERR_FATAL_DETECT | MHI_PM_FW_DL_ERR
++ },
++ {
++ MHI_PM_M2,
++ MHI_PM_M0 | MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
++ MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ {
++ MHI_PM_M3_ENTER,
++ MHI_PM_M3 | MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
++ MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ {
++ MHI_PM_M3,
++ MHI_PM_M3_EXIT | MHI_PM_SYS_ERR_DETECT |
++ MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ {
++ MHI_PM_M3_EXIT,
++ MHI_PM_M0 | MHI_PM_SYS_ERR_DETECT | MHI_PM_SHUTDOWN_PROCESS |
++ MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ {
++ MHI_PM_FW_DL_ERR,
++ MHI_PM_FW_DL_ERR | MHI_PM_SYS_ERR_DETECT |
++ MHI_PM_SHUTDOWN_PROCESS | MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ /* L1 States */
++ {
++ MHI_PM_SYS_ERR_DETECT,
++ MHI_PM_SYS_ERR_PROCESS | MHI_PM_SHUTDOWN_PROCESS |
++ MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ {
++ MHI_PM_SYS_ERR_PROCESS,
++ MHI_PM_POR | MHI_PM_SHUTDOWN_PROCESS |
++ MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ /* L2 States */
++ {
++ MHI_PM_SHUTDOWN_PROCESS,
++ MHI_PM_DISABLE | MHI_PM_LD_ERR_FATAL_DETECT
++ },
++ /* L3 States */
++ {
++ MHI_PM_LD_ERR_FATAL_DETECT,
++ MHI_PM_LD_ERR_FATAL_DETECT | MHI_PM_DISABLE
++ },
++};
++
++enum mhi_pm_state __must_check mhi_tryset_pm_state(struct mhi_controller *mhi_cntrl,
++ enum mhi_pm_state state)
++{
++ unsigned long cur_state = mhi_cntrl->pm_state;
++ int index = find_last_bit(&cur_state, 32);
++
++ if (unlikely(index >= ARRAY_SIZE(dev_state_transitions)))
++ return cur_state;
++
++ if (unlikely(dev_state_transitions[index].from_state != cur_state))
++ return cur_state;
++
++ if (unlikely(!(dev_state_transitions[index].to_states & state)))
++ return cur_state;
++
++ mhi_cntrl->pm_state = state;
++ return mhi_cntrl->pm_state;
++}
++
++void mhi_set_mhi_state(struct mhi_controller *mhi_cntrl, enum mhi_state state)
++{
++ if (state == MHI_STATE_RESET) {
++ mhi_write_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
++ MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 1);
++ } else {
++ mhi_write_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
++ MHICTRL_MHISTATE_MASK,
++ MHICTRL_MHISTATE_SHIFT, state);
++ }
++}
++
++/* NOP for backward compatibility, host allowed to ring DB in M2 state */
++static void mhi_toggle_dev_wake_nop(struct mhi_controller *mhi_cntrl)
++{
++}
++
++static void mhi_toggle_dev_wake(struct mhi_controller *mhi_cntrl)
++{
++ mhi_cntrl->wake_get(mhi_cntrl, false);
++ mhi_cntrl->wake_put(mhi_cntrl, true);
++}
++
++/* Handle device ready state transition */
++int mhi_ready_state_transition(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_event *mhi_event;
++ enum mhi_pm_state cur_state;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ u32 interval_us = 25000; /* poll register field every 25 milliseconds */
++ int ret, i;
++
++ /* Check if device entered error state */
++ if (MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state)) {
++ dev_err(dev, "Device link is not accessible\n");
++ return -EIO;
++ }
++
++ /* Wait for RESET to be cleared and READY bit to be set by the device */
++ ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
++ MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 0,
++ interval_us);
++ if (ret) {
++ dev_err(dev, "Device failed to clear MHI Reset\n");
++ return ret;
++ }
++
++ ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHISTATUS,
++ MHISTATUS_READY_MASK, MHISTATUS_READY_SHIFT, 1,
++ interval_us);
++ if (ret) {
++ dev_err(dev, "Device failed to enter MHI Ready\n");
++ return ret;
++ }
++
++ dev_dbg(dev, "Device in READY State\n");
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_POR);
++ mhi_cntrl->dev_state = MHI_STATE_READY;
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ if (cur_state != MHI_PM_POR) {
++ dev_err(dev, "Error moving to state %s from %s\n",
++ to_mhi_pm_state_str(MHI_PM_POR),
++ to_mhi_pm_state_str(cur_state));
++ return -EIO;
++ }
++
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (!MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state)) {
++ dev_err(dev, "Device registers not accessible\n");
++ goto error_mmio;
++ }
++
++ /* Configure MMIO registers */
++ ret = mhi_init_mmio(mhi_cntrl);
++ if (ret) {
++ dev_err(dev, "Error configuring MMIO registers\n");
++ goto error_mmio;
++ }
++
++ /* Add elements to all SW event rings */
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ /* Skip if this is an offload or HW event */
++ if (mhi_event->offload_ev || mhi_event->hw_ring)
++ continue;
++
++ ring->wp = ring->base + ring->len - ring->el_size;
++ *ring->ctxt_wp = cpu_to_le64(ring->iommu_base + ring->len - ring->el_size);
++ /* Update all cores */
++ smp_wmb();
++
++ /* Ring the event ring db */
++ spin_lock_irq(&mhi_event->lock);
++ mhi_ring_er_db(mhi_event);
++ spin_unlock_irq(&mhi_event->lock);
++ }
++
++ /* Set MHI to M0 state */
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M0);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ return 0;
++
++error_mmio:
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ return -EIO;
++}
++
++int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl)
++{
++ enum mhi_pm_state cur_state;
++ struct mhi_chan *mhi_chan;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ int i;
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ mhi_cntrl->dev_state = MHI_STATE_M0;
++ cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M0);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (unlikely(cur_state != MHI_PM_M0)) {
++ dev_err(dev, "Unable to transition to M0 state\n");
++ return -EIO;
++ }
++ mhi_cntrl->M0++;
++
++ /* Wake up the device */
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ mhi_cntrl->wake_get(mhi_cntrl, true);
++
++ /* Ring all event rings and CMD ring only if we're in mission mode */
++ if (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) {
++ struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
++ struct mhi_cmd *mhi_cmd =
++ &mhi_cntrl->mhi_cmd[PRIMARY_CMD_RING];
++
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ if (mhi_event->offload_ev)
++ continue;
++
++ spin_lock_irq(&mhi_event->lock);
++ mhi_ring_er_db(mhi_event);
++ spin_unlock_irq(&mhi_event->lock);
++ }
++
++ /* Only ring primary cmd ring if ring is not empty */
++ spin_lock_irq(&mhi_cmd->lock);
++ if (mhi_cmd->ring.rp != mhi_cmd->ring.wp)
++ mhi_ring_cmd_db(mhi_cntrl, mhi_cmd);
++ spin_unlock_irq(&mhi_cmd->lock);
++ }
++
++ /* Ring channel DB registers */
++ mhi_chan = mhi_cntrl->mhi_chan;
++ for (i = 0; i < mhi_cntrl->max_chan; i++, mhi_chan++) {
++ struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
++
++ if (mhi_chan->db_cfg.reset_req) {
++ write_lock_irq(&mhi_chan->lock);
++ mhi_chan->db_cfg.db_mode = true;
++ write_unlock_irq(&mhi_chan->lock);
++ }
++
++ read_lock_irq(&mhi_chan->lock);
++
++ /* Only ring DB if ring is not empty */
++ if (tre_ring->base && tre_ring->wp != tre_ring->rp &&
++ mhi_chan->ch_state == MHI_CH_STATE_ENABLED)
++ mhi_ring_chan_db(mhi_cntrl, mhi_chan);
++ read_unlock_irq(&mhi_chan->lock);
++ }
++
++ mhi_cntrl->wake_put(mhi_cntrl, false);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++ wake_up_all(&mhi_cntrl->state_event);
++
++ return 0;
++}
++
++/*
++ * After receiving the MHI state change event from the device indicating the
++ * transition to M1 state, the host can transition the device to M2 state
++ * for keeping it in low power state.
++ */
++void mhi_pm_m1_transition(struct mhi_controller *mhi_cntrl)
++{
++ enum mhi_pm_state state;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M2);
++ if (state == MHI_PM_M2) {
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M2);
++ mhi_cntrl->dev_state = MHI_STATE_M2;
++
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ mhi_cntrl->M2++;
++ wake_up_all(&mhi_cntrl->state_event);
++
++ /* If there are any pending resources, exit M2 immediately */
++ if (unlikely(atomic_read(&mhi_cntrl->pending_pkts) ||
++ atomic_read(&mhi_cntrl->dev_wake))) {
++ dev_dbg(dev,
++ "Exiting M2, pending_pkts: %d dev_wake: %d\n",
++ atomic_read(&mhi_cntrl->pending_pkts),
++ atomic_read(&mhi_cntrl->dev_wake));
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ mhi_cntrl->wake_get(mhi_cntrl, true);
++ mhi_cntrl->wake_put(mhi_cntrl, true);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++ } else {
++ mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_IDLE);
++ }
++ } else {
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ }
++}
++
++/* MHI M3 completion handler */
++int mhi_pm_m3_transition(struct mhi_controller *mhi_cntrl)
++{
++ enum mhi_pm_state state;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ mhi_cntrl->dev_state = MHI_STATE_M3;
++ state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M3);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (state != MHI_PM_M3) {
++ dev_err(dev, "Unable to transition to M3 state\n");
++ return -EIO;
++ }
++
++ mhi_cntrl->M3++;
++ wake_up_all(&mhi_cntrl->state_event);
++
++ return 0;
++}
++
++/* Handle device Mission Mode transition */
++static int mhi_pm_mission_mode_transition(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_event *mhi_event;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ enum mhi_ee_type ee = MHI_EE_MAX, current_ee = mhi_cntrl->ee;
++ int i, ret;
++
++ dev_dbg(dev, "Processing Mission Mode transition\n");
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ if (MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state))
++ ee = mhi_get_exec_env(mhi_cntrl);
++
++ if (!MHI_IN_MISSION_MODE(ee)) {
++ mhi_cntrl->pm_state = MHI_PM_LD_ERR_FATAL_DETECT;
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ wake_up_all(&mhi_cntrl->state_event);
++ return -EIO;
++ }
++ mhi_cntrl->ee = ee;
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ wake_up_all(&mhi_cntrl->state_event);
++
++ device_for_each_child(&mhi_cntrl->mhi_dev->dev, &current_ee,
++ mhi_destroy_device);
++ mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_EE_MISSION_MODE);
++
++ /* Force MHI to be in M0 state before continuing */
++ ret = __mhi_device_get_sync(mhi_cntrl);
++ if (ret)
++ return ret;
++
++ read_lock_bh(&mhi_cntrl->pm_lock);
++
++ if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
++ ret = -EIO;
++ goto error_mission_mode;
++ }
++
++ /* Add elements to all HW event rings */
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ if (mhi_event->offload_ev || !mhi_event->hw_ring)
++ continue;
++
++ ring->wp = ring->base + ring->len - ring->el_size;
++ *ring->ctxt_wp = cpu_to_le64(ring->iommu_base + ring->len - ring->el_size);
++ /* Update to all cores */
++ smp_wmb();
++
++ spin_lock_irq(&mhi_event->lock);
++ if (MHI_DB_ACCESS_VALID(mhi_cntrl))
++ mhi_ring_er_db(mhi_event);
++ spin_unlock_irq(&mhi_event->lock);
++ }
++
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ /*
++ * The MHI devices are only created when the client device switches its
++ * Execution Environment (EE) to either SBL or AMSS states
++ */
++ mhi_create_devices(mhi_cntrl);
++
++ read_lock_bh(&mhi_cntrl->pm_lock);
++
++error_mission_mode:
++ mhi_cntrl->wake_put(mhi_cntrl, false);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ return ret;
++}
++
++/* Handle shutdown transitions */
++static void mhi_pm_disable_transition(struct mhi_controller *mhi_cntrl)
++{
++ enum mhi_pm_state cur_state;
++ struct mhi_event *mhi_event;
++ struct mhi_cmd_ctxt *cmd_ctxt;
++ struct mhi_cmd *mhi_cmd;
++ struct mhi_event_ctxt *er_ctxt;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ int ret, i;
++
++ dev_dbg(dev, "Processing disable transition with PM state: %s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state));
++
++ mutex_lock(&mhi_cntrl->pm_mutex);
++
++ /* Trigger MHI RESET so that the device will not access host memory */
++ if (!MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state)) {
++ /* Skip MHI RESET if in RDDM state */
++ if (mhi_cntrl->rddm_image && mhi_get_exec_env(mhi_cntrl) == MHI_EE_RDDM)
++ goto skip_mhi_reset;
++
++ dev_dbg(dev, "Triggering MHI Reset in device\n");
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
++
++ /* Wait for the reset bit to be cleared by the device */
++ ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
++ MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 0,
++ 25000);
++ if (ret)
++ dev_err(dev, "Device failed to clear MHI Reset\n");
++
++ /*
++ * Device will clear BHI_INTVEC as a part of RESET processing,
++ * hence re-program it
++ */
++ mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
++ }
++
++skip_mhi_reset:
++ dev_dbg(dev,
++ "Waiting for all pending event ring processing to complete\n");
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ if (mhi_event->offload_ev)
++ continue;
++ free_irq(mhi_cntrl->irq[mhi_event->irq], mhi_event);
++ tasklet_kill(&mhi_event->task);
++ }
++
++ /* Release lock and wait for all pending threads to complete */
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++ dev_dbg(dev, "Waiting for all pending threads to complete\n");
++ wake_up_all(&mhi_cntrl->state_event);
++
++ dev_dbg(dev, "Reset all active channels and remove MHI devices\n");
++ device_for_each_child(&mhi_cntrl->mhi_dev->dev, NULL, mhi_destroy_device);
++
++ mutex_lock(&mhi_cntrl->pm_mutex);
++
++ WARN_ON(atomic_read(&mhi_cntrl->dev_wake));
++ WARN_ON(atomic_read(&mhi_cntrl->pending_pkts));
++
++ /* Reset the ev rings and cmd rings */
++ dev_dbg(dev, "Resetting EV CTXT and CMD CTXT\n");
++ mhi_cmd = mhi_cntrl->mhi_cmd;
++ cmd_ctxt = mhi_cntrl->mhi_ctxt->cmd_ctxt;
++ for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++, cmd_ctxt++) {
++ struct mhi_ring *ring = &mhi_cmd->ring;
++
++ ring->rp = ring->base;
++ ring->wp = ring->base;
++ cmd_ctxt->rp = cmd_ctxt->rbase;
++ cmd_ctxt->wp = cmd_ctxt->rbase;
++ }
++
++ mhi_event = mhi_cntrl->mhi_event;
++ er_ctxt = mhi_cntrl->mhi_ctxt->er_ctxt;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, er_ctxt++,
++ mhi_event++) {
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ /* Skip offload events */
++ if (mhi_event->offload_ev)
++ continue;
++
++ ring->rp = ring->base;
++ ring->wp = ring->base;
++ er_ctxt->rp = er_ctxt->rbase;
++ er_ctxt->wp = er_ctxt->rbase;
++ }
++
++ /* Move to disable state */
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_DISABLE);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (unlikely(cur_state != MHI_PM_DISABLE))
++ dev_err(dev, "Error moving from PM state: %s to: %s\n",
++ to_mhi_pm_state_str(cur_state),
++ to_mhi_pm_state_str(MHI_PM_DISABLE));
++
++ dev_dbg(dev, "Exiting with PM state: %s, MHI state: %s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state),
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state));
++
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++}
++
++/* Handle system error transitions */
++static void mhi_pm_sys_error_transition(struct mhi_controller *mhi_cntrl)
++{
++ enum mhi_pm_state cur_state, prev_state;
++ enum dev_st_transition next_state;
++ struct mhi_event *mhi_event;
++ struct mhi_cmd_ctxt *cmd_ctxt;
++ struct mhi_cmd *mhi_cmd;
++ struct mhi_event_ctxt *er_ctxt;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ int ret, i;
++
++ dev_dbg(dev, "Transitioning from PM state: %s to: %s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state),
++ to_mhi_pm_state_str(MHI_PM_SYS_ERR_PROCESS));
++
++ /* We must notify MHI control driver so it can clean up first */
++ mhi_cntrl->status_cb(mhi_cntrl, MHI_CB_SYS_ERROR);
++
++ mutex_lock(&mhi_cntrl->pm_mutex);
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ prev_state = mhi_cntrl->pm_state;
++ cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_SYS_ERR_PROCESS);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ if (cur_state != MHI_PM_SYS_ERR_PROCESS) {
++ dev_err(dev, "Failed to transition from PM state: %s to: %s\n",
++ to_mhi_pm_state_str(cur_state),
++ to_mhi_pm_state_str(MHI_PM_SYS_ERR_PROCESS));
++ goto exit_sys_error_transition;
++ }
++
++ mhi_cntrl->ee = MHI_EE_DISABLE_TRANSITION;
++ mhi_cntrl->dev_state = MHI_STATE_RESET;
++
++ /* Wake up threads waiting for state transition */
++ wake_up_all(&mhi_cntrl->state_event);
++
++ /* Trigger MHI RESET so that the device will not access host memory */
++ if (MHI_REG_ACCESS_VALID(prev_state)) {
++ u32 in_reset = -1;
++ unsigned long timeout = msecs_to_jiffies(mhi_cntrl->timeout_ms);
++
++ dev_dbg(dev, "Triggering MHI Reset in device\n");
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
++
++ /* Wait for the reset bit to be cleared by the device */
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ mhi_read_reg_field(mhi_cntrl,
++ mhi_cntrl->regs,
++ MHICTRL,
++ MHICTRL_RESET_MASK,
++ MHICTRL_RESET_SHIFT,
++ &in_reset) ||
++ !in_reset, timeout);
++ if (!ret || in_reset) {
++ dev_err(dev, "Device failed to exit MHI Reset state\n");
++ goto exit_sys_error_transition;
++ }
++
++ /*
++ * Device will clear BHI_INTVEC as a part of RESET processing,
++ * hence re-program it
++ */
++ mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
++ }
++
++ dev_dbg(dev,
++ "Waiting for all pending event ring processing to complete\n");
++ mhi_event = mhi_cntrl->mhi_event;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
++ if (mhi_event->offload_ev)
++ continue;
++ tasklet_kill(&mhi_event->task);
++ }
++
++ /* Release lock and wait for all pending threads to complete */
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++ dev_dbg(dev, "Waiting for all pending threads to complete\n");
++ wake_up_all(&mhi_cntrl->state_event);
++
++ dev_dbg(dev, "Reset all active channels and remove MHI devices\n");
++ device_for_each_child(&mhi_cntrl->mhi_dev->dev, NULL, mhi_destroy_device);
++
++ mutex_lock(&mhi_cntrl->pm_mutex);
++
++ WARN_ON(atomic_read(&mhi_cntrl->dev_wake));
++ WARN_ON(atomic_read(&mhi_cntrl->pending_pkts));
++
++ /* Reset the ev rings and cmd rings */
++ dev_dbg(dev, "Resetting EV CTXT and CMD CTXT\n");
++ mhi_cmd = mhi_cntrl->mhi_cmd;
++ cmd_ctxt = mhi_cntrl->mhi_ctxt->cmd_ctxt;
++ for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++, cmd_ctxt++) {
++ struct mhi_ring *ring = &mhi_cmd->ring;
++
++ ring->rp = ring->base;
++ ring->wp = ring->base;
++ cmd_ctxt->rp = cmd_ctxt->rbase;
++ cmd_ctxt->wp = cmd_ctxt->rbase;
++ }
++
++ mhi_event = mhi_cntrl->mhi_event;
++ er_ctxt = mhi_cntrl->mhi_ctxt->er_ctxt;
++ for (i = 0; i < mhi_cntrl->total_ev_rings; i++, er_ctxt++,
++ mhi_event++) {
++ struct mhi_ring *ring = &mhi_event->ring;
++
++ /* Skip offload events */
++ if (mhi_event->offload_ev)
++ continue;
++
++ ring->rp = ring->base;
++ ring->wp = ring->base;
++ er_ctxt->rp = er_ctxt->rbase;
++ er_ctxt->wp = er_ctxt->rbase;
++ }
++
++ /* Transition to next state */
++ if (MHI_IN_PBL(mhi_get_exec_env(mhi_cntrl))) {
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_POR);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ if (cur_state != MHI_PM_POR) {
++ dev_err(dev, "Error moving to state %s from %s\n",
++ to_mhi_pm_state_str(MHI_PM_POR),
++ to_mhi_pm_state_str(cur_state));
++ goto exit_sys_error_transition;
++ }
++ next_state = DEV_ST_TRANSITION_PBL;
++ } else {
++ next_state = DEV_ST_TRANSITION_READY;
++ }
++
++ mhi_queue_state_transition(mhi_cntrl, next_state);
++
++exit_sys_error_transition:
++ dev_dbg(dev, "Exiting with PM state: %s, MHI state: %s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state),
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state));
++
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++}
++
++/* Queue a new work item and schedule work */
++int mhi_queue_state_transition(struct mhi_controller *mhi_cntrl,
++ enum dev_st_transition state)
++{
++ struct state_transition *item = kmalloc(sizeof(*item), GFP_ATOMIC);
++ unsigned long flags;
++
++ if (!item)
++ return -ENOMEM;
++
++ item->state = state;
++ spin_lock_irqsave(&mhi_cntrl->transition_lock, flags);
++ list_add_tail(&item->node, &mhi_cntrl->transition_list);
++ spin_unlock_irqrestore(&mhi_cntrl->transition_lock, flags);
++
++ queue_work(mhi_cntrl->hiprio_wq, &mhi_cntrl->st_worker);
++
++ return 0;
++}
++
++/* SYS_ERR worker */
++void mhi_pm_sys_err_handler(struct mhi_controller *mhi_cntrl)
++{
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++
++ /* skip if controller supports RDDM */
++ if (mhi_cntrl->rddm_image) {
++ dev_dbg(dev, "Controller supports RDDM, skip SYS_ERROR\n");
++ return;
++ }
++
++ mhi_queue_state_transition(mhi_cntrl, DEV_ST_TRANSITION_SYS_ERR);
++}
++
++/* Device State Transition worker */
++void mhi_pm_st_worker(struct work_struct *work)
++{
++ struct state_transition *itr, *tmp;
++ LIST_HEAD(head);
++ struct mhi_controller *mhi_cntrl = container_of(work,
++ struct mhi_controller,
++ st_worker);
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++
++ spin_lock_irq(&mhi_cntrl->transition_lock);
++ list_splice_tail_init(&mhi_cntrl->transition_list, &head);
++ spin_unlock_irq(&mhi_cntrl->transition_lock);
++
++ list_for_each_entry_safe(itr, tmp, &head, node) {
++ list_del(&itr->node);
++ dev_dbg(dev, "Handling state transition: %s\n",
++ TO_DEV_STATE_TRANS_STR(itr->state));
++
++ switch (itr->state) {
++ case DEV_ST_TRANSITION_PBL:
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ if (MHI_REG_ACCESS_VALID(mhi_cntrl->pm_state))
++ mhi_cntrl->ee = mhi_get_exec_env(mhi_cntrl);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ mhi_fw_load_handler(mhi_cntrl);
++ break;
++ case DEV_ST_TRANSITION_SBL:
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ mhi_cntrl->ee = MHI_EE_SBL;
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ /*
++ * The MHI devices are only created when the client
++ * device switches its Execution Environment (EE) to
++ * either SBL or AMSS states
++ */
++ mhi_create_devices(mhi_cntrl);
++ if (mhi_cntrl->fbc_download)
++ mhi_download_amss_image(mhi_cntrl);
++ break;
++ case DEV_ST_TRANSITION_MISSION_MODE:
++ mhi_pm_mission_mode_transition(mhi_cntrl);
++ break;
++ case DEV_ST_TRANSITION_FP:
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ mhi_cntrl->ee = MHI_EE_FP;
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ mhi_create_devices(mhi_cntrl);
++ break;
++ case DEV_ST_TRANSITION_READY:
++ mhi_ready_state_transition(mhi_cntrl);
++ break;
++ case DEV_ST_TRANSITION_SYS_ERR:
++ mhi_pm_sys_error_transition(mhi_cntrl);
++ break;
++ case DEV_ST_TRANSITION_DISABLE:
++ mhi_pm_disable_transition(mhi_cntrl);
++ break;
++ default:
++ break;
++ }
++ kfree(itr);
++ }
++}
++
++int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
++{
++ struct mhi_chan *itr, *tmp;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ enum mhi_pm_state new_state;
++ int ret;
++
++ if (mhi_cntrl->pm_state == MHI_PM_DISABLE)
++ return -EINVAL;
++
++ if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
++ return -EIO;
++
++ /* Return busy if there are any pending resources */
++ if (atomic_read(&mhi_cntrl->dev_wake) ||
++ atomic_read(&mhi_cntrl->pending_pkts))
++ return -EBUSY;
++
++ /* Take MHI out of M2 state */
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ mhi_cntrl->wake_get(mhi_cntrl, false);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ mhi_cntrl->dev_state == MHI_STATE_M0 ||
++ mhi_cntrl->dev_state == MHI_STATE_M1 ||
++ MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ mhi_cntrl->wake_put(mhi_cntrl, false);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
++ dev_err(dev,
++ "Could not enter M0/M1 state");
++ return -EIO;
++ }
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++
++ if (atomic_read(&mhi_cntrl->dev_wake) ||
++ atomic_read(&mhi_cntrl->pending_pkts)) {
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ return -EBUSY;
++ }
++
++ dev_dbg(dev, "Allowing M3 transition\n");
++ new_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M3_ENTER);
++ if (new_state != MHI_PM_M3_ENTER) {
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ dev_err(dev,
++ "Error setting to PM state: %s from: %s\n",
++ to_mhi_pm_state_str(MHI_PM_M3_ENTER),
++ to_mhi_pm_state_str(mhi_cntrl->pm_state));
++ return -EIO;
++ }
++
++ /* Set MHI to M3 and wait for completion */
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ dev_dbg(dev, "Waiting for M3 completion\n");
++
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ mhi_cntrl->dev_state == MHI_STATE_M3 ||
++ MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++
++ if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
++ dev_err(dev,
++ "Did not enter M3 state, MHI state: %s, PM state: %s\n",
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state),
++ to_mhi_pm_state_str(mhi_cntrl->pm_state));
++ return -EIO;
++ }
++
++ /* Notify clients about entering LPM */
++ list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
++ mutex_lock(&itr->mutex);
++ if (itr->mhi_dev)
++ mhi_notify(itr->mhi_dev, MHI_CB_LPM_ENTER);
++ mutex_unlock(&itr->mutex);
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(mhi_pm_suspend);
++
++static int __mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force)
++{
++ struct mhi_chan *itr, *tmp;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ enum mhi_pm_state cur_state;
++ int ret;
++
++ dev_dbg(dev, "Entered with PM state: %s, MHI state: %s\n",
++ to_mhi_pm_state_str(mhi_cntrl->pm_state),
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state));
++
++ if (mhi_cntrl->pm_state == MHI_PM_DISABLE)
++ return 0;
++
++ if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
++ return -EIO;
++
++ if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) {
++ dev_warn(dev, "Resuming from non M3 state (%s)\n",
++ TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl)));
++ if (!force)
++ return -EINVAL;
++ }
++
++ /* Notify clients about exiting LPM */
++ list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
++ mutex_lock(&itr->mutex);
++ if (itr->mhi_dev)
++ mhi_notify(itr->mhi_dev, MHI_CB_LPM_EXIT);
++ mutex_unlock(&itr->mutex);
++ }
++
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ cur_state = mhi_tryset_pm_state(mhi_cntrl, MHI_PM_M3_EXIT);
++ if (cur_state != MHI_PM_M3_EXIT) {
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ dev_info(dev,
++ "Error setting to PM state: %s from: %s\n",
++ to_mhi_pm_state_str(MHI_PM_M3_EXIT),
++ to_mhi_pm_state_str(mhi_cntrl->pm_state));
++ return -EIO;
++ }
++
++ /* Set MHI to M0 and wait for completion */
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_M0);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ mhi_cntrl->dev_state == MHI_STATE_M0 ||
++ mhi_cntrl->dev_state == MHI_STATE_M2 ||
++ MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++
++ if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
++ dev_err(dev,
++ "Did not enter M0 state, MHI state: %s, PM state: %s\n",
++ TO_MHI_STATE_STR(mhi_cntrl->dev_state),
++ to_mhi_pm_state_str(mhi_cntrl->pm_state));
++ return -EIO;
++ }
++
++ return 0;
++}
++
++int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
++{
++ return __mhi_pm_resume(mhi_cntrl, false);
++}
++EXPORT_SYMBOL_GPL(mhi_pm_resume);
++
++int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl)
++{
++ return __mhi_pm_resume(mhi_cntrl, true);
++}
++EXPORT_SYMBOL_GPL(mhi_pm_resume_force);
++
++int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl)
++{
++ int ret;
++
++ /* Wake up the device */
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++ return -EIO;
++ }
++ mhi_cntrl->wake_get(mhi_cntrl, true);
++ if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
++ mhi_trigger_resume(mhi_cntrl);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ mhi_cntrl->pm_state == MHI_PM_M0 ||
++ MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++
++ if (!ret || MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) {
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ mhi_cntrl->wake_put(mhi_cntrl, false);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++ return -EIO;
++ }
++
++ return 0;
++}
++
++/* Assert device wake db */
++static void mhi_assert_dev_wake(struct mhi_controller *mhi_cntrl, bool force)
++{
++ unsigned long flags;
++
++ /*
++ * If force flag is set, then increment the wake count value and
++ * ring wake db
++ */
++ if (unlikely(force)) {
++ spin_lock_irqsave(&mhi_cntrl->wlock, flags);
++ atomic_inc(&mhi_cntrl->dev_wake);
++ if (MHI_WAKE_DB_FORCE_SET_VALID(mhi_cntrl->pm_state) &&
++ !mhi_cntrl->wake_set) {
++ mhi_write_db(mhi_cntrl, mhi_cntrl->wake_db, 1);
++ mhi_cntrl->wake_set = true;
++ }
++ spin_unlock_irqrestore(&mhi_cntrl->wlock, flags);
++ } else {
++ /*
++ * If resources are already requested, then just increment
++ * the wake count value and return
++ */
++ if (likely(atomic_add_unless(&mhi_cntrl->dev_wake, 1, 0)))
++ return;
++
++ spin_lock_irqsave(&mhi_cntrl->wlock, flags);
++ if ((atomic_inc_return(&mhi_cntrl->dev_wake) == 1) &&
++ MHI_WAKE_DB_SET_VALID(mhi_cntrl->pm_state) &&
++ !mhi_cntrl->wake_set) {
++ mhi_write_db(mhi_cntrl, mhi_cntrl->wake_db, 1);
++ mhi_cntrl->wake_set = true;
++ }
++ spin_unlock_irqrestore(&mhi_cntrl->wlock, flags);
++ }
++}
++
++/* De-assert device wake db */
++static void mhi_deassert_dev_wake(struct mhi_controller *mhi_cntrl,
++ bool override)
++{
++ unsigned long flags;
++
++ /*
++ * Only continue if there is a single resource, else just decrement
++ * and return
++ */
++ if (likely(atomic_add_unless(&mhi_cntrl->dev_wake, -1, 1)))
++ return;
++
++ spin_lock_irqsave(&mhi_cntrl->wlock, flags);
++ if ((atomic_dec_return(&mhi_cntrl->dev_wake) == 0) &&
++ MHI_WAKE_DB_CLEAR_VALID(mhi_cntrl->pm_state) && !override &&
++ mhi_cntrl->wake_set) {
++ mhi_write_db(mhi_cntrl, mhi_cntrl->wake_db, 0);
++ mhi_cntrl->wake_set = false;
++ }
++ spin_unlock_irqrestore(&mhi_cntrl->wlock, flags);
++}
++
++int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
++{
++ enum mhi_state state;
++ enum mhi_ee_type current_ee;
++ enum dev_st_transition next_state;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ u32 interval_us = 25000; /* poll register field every 25 milliseconds */
++ int ret;
++
++ dev_info(dev, "Requested to power ON\n");
++
++ /* Supply default wake routines if not provided by controller driver */
++ if (!mhi_cntrl->wake_get || !mhi_cntrl->wake_put ||
++ !mhi_cntrl->wake_toggle) {
++ mhi_cntrl->wake_get = mhi_assert_dev_wake;
++ mhi_cntrl->wake_put = mhi_deassert_dev_wake;
++ mhi_cntrl->wake_toggle = (mhi_cntrl->db_access & MHI_PM_M2) ?
++ mhi_toggle_dev_wake_nop : mhi_toggle_dev_wake;
++ }
++
++ mutex_lock(&mhi_cntrl->pm_mutex);
++ mhi_cntrl->pm_state = MHI_PM_DISABLE;
++
++ /* Setup BHI INTVEC */
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
++ mhi_cntrl->pm_state = MHI_PM_POR;
++ mhi_cntrl->ee = MHI_EE_MAX;
++ current_ee = mhi_get_exec_env(mhi_cntrl);
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++
++ /* Confirm that the device is in valid exec env */
++ if (!MHI_IN_PBL(current_ee) && current_ee != MHI_EE_AMSS) {
++ dev_err(dev, "%s is not a valid EE for power on\n",
++ TO_MHI_EXEC_STR(current_ee));
++ ret = -EIO;
++ goto error_exit;
++ }
++
++ state = mhi_get_mhi_state(mhi_cntrl);
++ dev_dbg(dev, "Attempting power on with EE: %s, state: %s\n",
++ TO_MHI_EXEC_STR(current_ee), TO_MHI_STATE_STR(state));
++
++ if (state == MHI_STATE_SYS_ERR) {
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
++ ret = mhi_poll_reg_field(mhi_cntrl, mhi_cntrl->regs, MHICTRL,
++ MHICTRL_RESET_MASK, MHICTRL_RESET_SHIFT, 0,
++ interval_us);
++ if (ret) {
++ dev_info(dev, "Failed to reset MHI due to syserr state\n");
++ goto error_exit;
++ }
++
++ /*
++ * device cleares INTVEC as part of RESET processing,
++ * re-program it
++ */
++ mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
++ }
++
++ ret = mhi_init_irq_setup(mhi_cntrl);
++ if (ret)
++ goto error_exit;
++
++ /* Transition to next state */
++ next_state = MHI_IN_PBL(current_ee) ?
++ DEV_ST_TRANSITION_PBL : DEV_ST_TRANSITION_READY;
++
++ mhi_queue_state_transition(mhi_cntrl, next_state);
++
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++
++ dev_info(dev, "Power on setup success\n");
++
++ return 0;
++
++error_exit:
++ mhi_cntrl->pm_state = MHI_PM_DISABLE;
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(mhi_async_power_up);
++
++void mhi_power_down(struct mhi_controller *mhi_cntrl, bool graceful)
++{
++ enum mhi_pm_state cur_state, transition_state;
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++
++ mutex_lock(&mhi_cntrl->pm_mutex);
++ write_lock_irq(&mhi_cntrl->pm_lock);
++ cur_state = mhi_cntrl->pm_state;
++ if (cur_state == MHI_PM_DISABLE) {
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++ return; /* Already powered down */
++ }
++
++ /* If it's not a graceful shutdown, force MHI to linkdown state */
++ transition_state = (graceful) ? MHI_PM_SHUTDOWN_PROCESS :
++ MHI_PM_LD_ERR_FATAL_DETECT;
++
++ cur_state = mhi_tryset_pm_state(mhi_cntrl, transition_state);
++ if (cur_state != transition_state) {
++ dev_err(dev, "Failed to move to state: %s from: %s\n",
++ to_mhi_pm_state_str(transition_state),
++ to_mhi_pm_state_str(mhi_cntrl->pm_state));
++ /* Force link down or error fatal detected state */
++ mhi_cntrl->pm_state = MHI_PM_LD_ERR_FATAL_DETECT;
++ }
++
++ /* mark device inactive to avoid any further host processing */
++ mhi_cntrl->ee = MHI_EE_DISABLE_TRANSITION;
++ mhi_cntrl->dev_state = MHI_STATE_RESET;
++
++ wake_up_all(&mhi_cntrl->state_event);
++
++ write_unlock_irq(&mhi_cntrl->pm_lock);
++ mutex_unlock(&mhi_cntrl->pm_mutex);
++
++ mhi_queue_state_transition(mhi_cntrl, DEV_ST_TRANSITION_DISABLE);
++
++ /* Wait for shutdown to complete */
++ flush_work(&mhi_cntrl->st_worker);
++
++ free_irq(mhi_cntrl->irq[0], mhi_cntrl);
++}
++EXPORT_SYMBOL_GPL(mhi_power_down);
++
++int mhi_sync_power_up(struct mhi_controller *mhi_cntrl)
++{
++ int ret = mhi_async_power_up(mhi_cntrl);
++
++ if (ret)
++ return ret;
++
++ wait_event_timeout(mhi_cntrl->state_event,
++ MHI_IN_MISSION_MODE(mhi_cntrl->ee) ||
++ MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++
++ ret = (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -ETIMEDOUT;
++ if (ret)
++ mhi_power_down(mhi_cntrl, false);
++
++ return ret;
++}
++EXPORT_SYMBOL(mhi_sync_power_up);
++
++int mhi_force_rddm_mode(struct mhi_controller *mhi_cntrl)
++{
++ struct device *dev = &mhi_cntrl->mhi_dev->dev;
++ int ret;
++
++ /* Check if device is already in RDDM */
++ if (mhi_cntrl->ee == MHI_EE_RDDM)
++ return 0;
++
++ dev_dbg(dev, "Triggering SYS_ERR to force RDDM state\n");
++ mhi_set_mhi_state(mhi_cntrl, MHI_STATE_SYS_ERR);
++
++ /* Wait for RDDM event */
++ ret = wait_event_timeout(mhi_cntrl->state_event,
++ mhi_cntrl->ee == MHI_EE_RDDM,
++ msecs_to_jiffies(mhi_cntrl->timeout_ms));
++ ret = ret ? 0 : -EIO;
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(mhi_force_rddm_mode);
++
++void mhi_device_get(struct mhi_device *mhi_dev)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++
++ mhi_dev->dev_wake++;
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
++ mhi_trigger_resume(mhi_cntrl);
++
++ mhi_cntrl->wake_get(mhi_cntrl, true);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++}
++EXPORT_SYMBOL_GPL(mhi_device_get);
++
++int mhi_device_get_sync(struct mhi_device *mhi_dev)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++ int ret;
++
++ ret = __mhi_device_get_sync(mhi_cntrl);
++ if (!ret)
++ mhi_dev->dev_wake++;
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(mhi_device_get_sync);
++
++void mhi_device_put(struct mhi_device *mhi_dev)
++{
++ struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
++
++ mhi_dev->dev_wake--;
++ read_lock_bh(&mhi_cntrl->pm_lock);
++ if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
++ mhi_trigger_resume(mhi_cntrl);
++
++ mhi_cntrl->wake_put(mhi_cntrl, false);
++ read_unlock_bh(&mhi_cntrl->pm_lock);
++}
++EXPORT_SYMBOL_GPL(mhi_device_put);
+diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
+deleted file mode 100644
+index 59a4896a80309..0000000000000
+--- a/drivers/bus/mhi/pci_generic.c
++++ /dev/null
+@@ -1,1062 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * MHI PCI driver - MHI over PCI controller driver
+- *
+- * This module is a generic driver for registering MHI-over-PCI devices,
+- * such as PCIe QCOM modems.
+- *
+- * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
+- */
+-
+-#include <linux/aer.h>
+-#include <linux/delay.h>
+-#include <linux/device.h>
+-#include <linux/mhi.h>
+-#include <linux/module.h>
+-#include <linux/pci.h>
+-#include <linux/pm_runtime.h>
+-#include <linux/timer.h>
+-#include <linux/workqueue.h>
+-
+-#define MHI_PCI_DEFAULT_BAR_NUM 0
+-
+-#define MHI_POST_RESET_DELAY_MS 500
+-
+-#define HEALTH_CHECK_PERIOD (HZ * 2)
+-
+-/**
+- * struct mhi_pci_dev_info - MHI PCI device specific information
+- * @config: MHI controller configuration
+- * @name: name of the PCI module
+- * @fw: firmware path (if any)
+- * @edl: emergency download mode firmware path (if any)
+- * @bar_num: PCI base address register to use for MHI MMIO register space
+- * @dma_data_width: DMA transfer word size (32 or 64 bits)
+- * @mru_default: default MRU size for MBIM network packets
+- * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
+- * of inband wake support (such as sdx24)
+- */
+-struct mhi_pci_dev_info {
+- const struct mhi_controller_config *config;
+- const char *name;
+- const char *fw;
+- const char *edl;
+- unsigned int bar_num;
+- unsigned int dma_data_width;
+- unsigned int mru_default;
+- bool sideband_wake;
+-};
+-
+-#define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_TO_DEVICE, \
+- .ee_mask = BIT(MHI_EE_AMSS), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_DISABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = false, \
+- } \
+-
+-#define MHI_CHANNEL_CONFIG_DL(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_FROM_DEVICE, \
+- .ee_mask = BIT(MHI_EE_AMSS), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_DISABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = false, \
+- }
+-
+-#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_FROM_DEVICE, \
+- .ee_mask = BIT(MHI_EE_AMSS), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_DISABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = false, \
+- .auto_queue = true, \
+- }
+-
+-#define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \
+- { \
+- .num_elements = el_count, \
+- .irq_moderation_ms = 0, \
+- .irq = (ev_ring) + 1, \
+- .priority = 1, \
+- .mode = MHI_DB_BRST_DISABLE, \
+- .data_type = MHI_ER_CTRL, \
+- .hardware_event = false, \
+- .client_managed = false, \
+- .offload_channel = false, \
+- }
+-
+-#define MHI_CHANNEL_CONFIG_HW_UL(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_TO_DEVICE, \
+- .ee_mask = BIT(MHI_EE_AMSS), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_ENABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = true, \
+- } \
+-
+-#define MHI_CHANNEL_CONFIG_HW_DL(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_FROM_DEVICE, \
+- .ee_mask = BIT(MHI_EE_AMSS), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_ENABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = true, \
+- }
+-
+-#define MHI_CHANNEL_CONFIG_UL_SBL(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_TO_DEVICE, \
+- .ee_mask = BIT(MHI_EE_SBL), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_DISABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = false, \
+- } \
+-
+-#define MHI_CHANNEL_CONFIG_DL_SBL(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_FROM_DEVICE, \
+- .ee_mask = BIT(MHI_EE_SBL), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_DISABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = false, \
+- }
+-
+-#define MHI_CHANNEL_CONFIG_UL_FP(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_TO_DEVICE, \
+- .ee_mask = BIT(MHI_EE_FP), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_DISABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = false, \
+- } \
+-
+-#define MHI_CHANNEL_CONFIG_DL_FP(ch_num, ch_name, el_count, ev_ring) \
+- { \
+- .num = ch_num, \
+- .name = ch_name, \
+- .num_elements = el_count, \
+- .event_ring = ev_ring, \
+- .dir = DMA_FROM_DEVICE, \
+- .ee_mask = BIT(MHI_EE_FP), \
+- .pollcfg = 0, \
+- .doorbell = MHI_DB_BRST_DISABLE, \
+- .lpm_notify = false, \
+- .offload_channel = false, \
+- .doorbell_mode_switch = false, \
+- }
+-
+-#define MHI_EVENT_CONFIG_DATA(ev_ring, el_count) \
+- { \
+- .num_elements = el_count, \
+- .irq_moderation_ms = 5, \
+- .irq = (ev_ring) + 1, \
+- .priority = 1, \
+- .mode = MHI_DB_BRST_DISABLE, \
+- .data_type = MHI_ER_DATA, \
+- .hardware_event = false, \
+- .client_managed = false, \
+- .offload_channel = false, \
+- }
+-
+-#define MHI_EVENT_CONFIG_HW_DATA(ev_ring, el_count, ch_num) \
+- { \
+- .num_elements = el_count, \
+- .irq_moderation_ms = 1, \
+- .irq = (ev_ring) + 1, \
+- .priority = 1, \
+- .mode = MHI_DB_BRST_DISABLE, \
+- .data_type = MHI_ER_DATA, \
+- .hardware_event = true, \
+- .client_managed = false, \
+- .offload_channel = false, \
+- .channel = ch_num, \
+- }
+-
+-static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = {
+- MHI_CHANNEL_CONFIG_UL(4, "DIAG", 16, 1),
+- MHI_CHANNEL_CONFIG_DL(5, "DIAG", 16, 1),
+- MHI_CHANNEL_CONFIG_UL(12, "MBIM", 4, 0),
+- MHI_CHANNEL_CONFIG_DL(13, "MBIM", 4, 0),
+- MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
+- MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
+- MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
+- MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0),
+- MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0),
+- MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0),
+- MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
+- MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 128, 3),
+-};
+-
+-static struct mhi_event_config modem_qcom_v1_mhi_events[] = {
+- /* first ring is control+data ring */
+- MHI_EVENT_CONFIG_CTRL(0, 64),
+- /* DIAG dedicated event ring */
+- MHI_EVENT_CONFIG_DATA(1, 128),
+- /* Hardware channels request dedicated hardware event rings */
+- MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
+- MHI_EVENT_CONFIG_HW_DATA(3, 2048, 101)
+-};
+-
+-static const struct mhi_controller_config modem_qcom_v1_mhiv_config = {
+- .max_channels = 128,
+- .timeout_ms = 8000,
+- .num_channels = ARRAY_SIZE(modem_qcom_v1_mhi_channels),
+- .ch_cfg = modem_qcom_v1_mhi_channels,
+- .num_events = ARRAY_SIZE(modem_qcom_v1_mhi_events),
+- .event_cfg = modem_qcom_v1_mhi_events,
+-};
+-
+-static const struct mhi_pci_dev_info mhi_qcom_sdx65_info = {
+- .name = "qcom-sdx65m",
+- .fw = "qcom/sdx65m/xbl.elf",
+- .edl = "qcom/sdx65m/edl.mbn",
+- .config = &modem_qcom_v1_mhiv_config,
+- .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+- .dma_data_width = 32,
+- .sideband_wake = false,
+-};
+-
+-static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
+- .name = "qcom-sdx55m",
+- .fw = "qcom/sdx55m/sbl1.mbn",
+- .edl = "qcom/sdx55m/edl.mbn",
+- .config = &modem_qcom_v1_mhiv_config,
+- .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+- .dma_data_width = 32,
+- .mru_default = 32768,
+- .sideband_wake = false,
+-};
+-
+-static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = {
+- .name = "qcom-sdx24",
+- .edl = "qcom/prog_firehose_sdx24.mbn",
+- .config = &modem_qcom_v1_mhiv_config,
+- .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+- .dma_data_width = 32,
+- .sideband_wake = true,
+-};
+-
+-static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = {
+- MHI_CHANNEL_CONFIG_UL(0, "NMEA", 32, 0),
+- MHI_CHANNEL_CONFIG_DL(1, "NMEA", 32, 0),
+- MHI_CHANNEL_CONFIG_UL_SBL(2, "SAHARA", 32, 0),
+- MHI_CHANNEL_CONFIG_DL_SBL(3, "SAHARA", 32, 0),
+- MHI_CHANNEL_CONFIG_UL(4, "DIAG", 32, 1),
+- MHI_CHANNEL_CONFIG_DL(5, "DIAG", 32, 1),
+- MHI_CHANNEL_CONFIG_UL(12, "MBIM", 32, 0),
+- MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0),
+- MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
+- MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
+- /* The EDL firmware is a flash-programmer exposing firehose protocol */
+- MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0),
+- MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0),
+- MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2),
+- MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3),
+-};
+-
+-static struct mhi_event_config mhi_quectel_em1xx_events[] = {
+- MHI_EVENT_CONFIG_CTRL(0, 128),
+- MHI_EVENT_CONFIG_DATA(1, 128),
+- MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
+- MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101)
+-};
+-
+-static const struct mhi_controller_config modem_quectel_em1xx_config = {
+- .max_channels = 128,
+- .timeout_ms = 20000,
+- .num_channels = ARRAY_SIZE(mhi_quectel_em1xx_channels),
+- .ch_cfg = mhi_quectel_em1xx_channels,
+- .num_events = ARRAY_SIZE(mhi_quectel_em1xx_events),
+- .event_cfg = mhi_quectel_em1xx_events,
+-};
+-
+-static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = {
+- .name = "quectel-em1xx",
+- .edl = "qcom/prog_firehose_sdx24.mbn",
+- .config = &modem_quectel_em1xx_config,
+- .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+- .dma_data_width = 32,
+- .sideband_wake = true,
+-};
+-
+-static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = {
+- MHI_CHANNEL_CONFIG_UL(0, "LOOPBACK", 32, 0),
+- MHI_CHANNEL_CONFIG_DL(1, "LOOPBACK", 32, 0),
+- MHI_CHANNEL_CONFIG_UL(4, "DIAG", 32, 1),
+- MHI_CHANNEL_CONFIG_DL(5, "DIAG", 32, 1),
+- MHI_CHANNEL_CONFIG_UL(12, "MBIM", 32, 0),
+- MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0),
+- MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
+- MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
+- MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2),
+- MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3),
+-};
+-
+-static struct mhi_event_config mhi_foxconn_sdx55_events[] = {
+- MHI_EVENT_CONFIG_CTRL(0, 128),
+- MHI_EVENT_CONFIG_DATA(1, 128),
+- MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
+- MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101)
+-};
+-
+-static const struct mhi_controller_config modem_foxconn_sdx55_config = {
+- .max_channels = 128,
+- .timeout_ms = 20000,
+- .num_channels = ARRAY_SIZE(mhi_foxconn_sdx55_channels),
+- .ch_cfg = mhi_foxconn_sdx55_channels,
+- .num_events = ARRAY_SIZE(mhi_foxconn_sdx55_events),
+- .event_cfg = mhi_foxconn_sdx55_events,
+-};
+-
+-static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = {
+- .name = "foxconn-sdx55",
+- .fw = "qcom/sdx55m/sbl1.mbn",
+- .edl = "qcom/sdx55m/edl.mbn",
+- .config = &modem_foxconn_sdx55_config,
+- .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+- .dma_data_width = 32,
+- .sideband_wake = false,
+-};
+-
+-static const struct mhi_channel_config mhi_mv31_channels[] = {
+- MHI_CHANNEL_CONFIG_UL(0, "LOOPBACK", 64, 0),
+- MHI_CHANNEL_CONFIG_DL(1, "LOOPBACK", 64, 0),
+- /* MBIM Control Channel */
+- MHI_CHANNEL_CONFIG_UL(12, "MBIM", 64, 0),
+- MHI_CHANNEL_CONFIG_DL(13, "MBIM", 64, 0),
+- /* MBIM Data Channel */
+- MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 512, 2),
+- MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 512, 3),
+-};
+-
+-static struct mhi_event_config mhi_mv31_events[] = {
+- MHI_EVENT_CONFIG_CTRL(0, 256),
+- MHI_EVENT_CONFIG_DATA(1, 256),
+- MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100),
+- MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101),
+-};
+-
+-static const struct mhi_controller_config modem_mv31_config = {
+- .max_channels = 128,
+- .timeout_ms = 20000,
+- .num_channels = ARRAY_SIZE(mhi_mv31_channels),
+- .ch_cfg = mhi_mv31_channels,
+- .num_events = ARRAY_SIZE(mhi_mv31_events),
+- .event_cfg = mhi_mv31_events,
+-};
+-
+-static const struct mhi_pci_dev_info mhi_mv31_info = {
+- .name = "cinterion-mv31",
+- .config = &modem_mv31_config,
+- .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
+- .dma_data_width = 32,
+-};
+-
+-static const struct pci_device_id mhi_pci_id_table[] = {
+- { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0306),
+- .driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info },
+- { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304),
+- .driver_data = (kernel_ulong_t) &mhi_qcom_sdx24_info },
+- { PCI_DEVICE(0x1eac, 0x1001), /* EM120R-GL (sdx24) */
+- .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info },
+- { PCI_DEVICE(0x1eac, 0x1002), /* EM160R-GL (sdx24) */
+- .driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info },
+- { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0308),
+- .driver_data = (kernel_ulong_t) &mhi_qcom_sdx65_info },
+- /* T99W175 (sdx55), Both for eSIM and Non-eSIM */
+- { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0ab),
+- .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
+- /* DW5930e (sdx55), With eSIM, It's also T99W175 */
+- { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0b0),
+- .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
+- /* DW5930e (sdx55), Non-eSIM, It's also T99W175 */
+- { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0b1),
+- .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
+- /* MV31-W (Cinterion) */
+- { PCI_DEVICE(0x1269, 0x00b3),
+- .driver_data = (kernel_ulong_t) &mhi_mv31_info },
+- { }
+-};
+-MODULE_DEVICE_TABLE(pci, mhi_pci_id_table);
+-
+-enum mhi_pci_device_status {
+- MHI_PCI_DEV_STARTED,
+- MHI_PCI_DEV_SUSPENDED,
+-};
+-
+-struct mhi_pci_device {
+- struct mhi_controller mhi_cntrl;
+- struct pci_saved_state *pci_state;
+- struct work_struct recovery_work;
+- struct timer_list health_check_timer;
+- unsigned long status;
+-};
+-
+-static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
+- void __iomem *addr, u32 *out)
+-{
+- *out = readl(addr);
+- return 0;
+-}
+-
+-static void mhi_pci_write_reg(struct mhi_controller *mhi_cntrl,
+- void __iomem *addr, u32 val)
+-{
+- writel(val, addr);
+-}
+-
+-static void mhi_pci_status_cb(struct mhi_controller *mhi_cntrl,
+- enum mhi_callback cb)
+-{
+- struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+-
+- /* Nothing to do for now */
+- switch (cb) {
+- case MHI_CB_FATAL_ERROR:
+- case MHI_CB_SYS_ERROR:
+- dev_warn(&pdev->dev, "firmware crashed (%u)\n", cb);
+- pm_runtime_forbid(&pdev->dev);
+- break;
+- case MHI_CB_EE_MISSION_MODE:
+- pm_runtime_allow(&pdev->dev);
+- break;
+- default:
+- break;
+- }
+-}
+-
+-static void mhi_pci_wake_get_nop(struct mhi_controller *mhi_cntrl, bool force)
+-{
+- /* no-op */
+-}
+-
+-static void mhi_pci_wake_put_nop(struct mhi_controller *mhi_cntrl, bool override)
+-{
+- /* no-op */
+-}
+-
+-static void mhi_pci_wake_toggle_nop(struct mhi_controller *mhi_cntrl)
+-{
+- /* no-op */
+-}
+-
+-static bool mhi_pci_is_alive(struct mhi_controller *mhi_cntrl)
+-{
+- struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+- u16 vendor = 0;
+-
+- if (pci_read_config_word(pdev, PCI_VENDOR_ID, &vendor))
+- return false;
+-
+- if (vendor == (u16) ~0 || vendor == 0)
+- return false;
+-
+- return true;
+-}
+-
+-static int mhi_pci_claim(struct mhi_controller *mhi_cntrl,
+- unsigned int bar_num, u64 dma_mask)
+-{
+- struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+- int err;
+-
+- err = pci_assign_resource(pdev, bar_num);
+- if (err)
+- return err;
+-
+- err = pcim_enable_device(pdev);
+- if (err) {
+- dev_err(&pdev->dev, "failed to enable pci device: %d\n", err);
+- return err;
+- }
+-
+- err = pcim_iomap_regions(pdev, 1 << bar_num, pci_name(pdev));
+- if (err) {
+- dev_err(&pdev->dev, "failed to map pci region: %d\n", err);
+- return err;
+- }
+- mhi_cntrl->regs = pcim_iomap_table(pdev)[bar_num];
+- mhi_cntrl->reg_len = pci_resource_len(pdev, bar_num);
+-
+- err = pci_set_dma_mask(pdev, dma_mask);
+- if (err) {
+- dev_err(&pdev->dev, "Cannot set proper DMA mask\n");
+- return err;
+- }
+-
+- err = pci_set_consistent_dma_mask(pdev, dma_mask);
+- if (err) {
+- dev_err(&pdev->dev, "set consistent dma mask failed\n");
+- return err;
+- }
+-
+- pci_set_master(pdev);
+-
+- return 0;
+-}
+-
+-static int mhi_pci_get_irqs(struct mhi_controller *mhi_cntrl,
+- const struct mhi_controller_config *mhi_cntrl_config)
+-{
+- struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+- int nr_vectors, i;
+- int *irq;
+-
+- /*
+- * Alloc one MSI vector for BHI + one vector per event ring, ideally...
+- * No explicit pci_free_irq_vectors required, done by pcim_release.
+- */
+- mhi_cntrl->nr_irqs = 1 + mhi_cntrl_config->num_events;
+-
+- nr_vectors = pci_alloc_irq_vectors(pdev, 1, mhi_cntrl->nr_irqs, PCI_IRQ_MSI);
+- if (nr_vectors < 0) {
+- dev_err(&pdev->dev, "Error allocating MSI vectors %d\n",
+- nr_vectors);
+- return nr_vectors;
+- }
+-
+- if (nr_vectors < mhi_cntrl->nr_irqs) {
+- dev_warn(&pdev->dev, "using shared MSI\n");
+-
+- /* Patch msi vectors, use only one (shared) */
+- for (i = 0; i < mhi_cntrl_config->num_events; i++)
+- mhi_cntrl_config->event_cfg[i].irq = 0;
+- mhi_cntrl->nr_irqs = 1;
+- }
+-
+- irq = devm_kcalloc(&pdev->dev, mhi_cntrl->nr_irqs, sizeof(int), GFP_KERNEL);
+- if (!irq)
+- return -ENOMEM;
+-
+- for (i = 0; i < mhi_cntrl->nr_irqs; i++) {
+- int vector = i >= nr_vectors ? (nr_vectors - 1) : i;
+-
+- irq[i] = pci_irq_vector(pdev, vector);
+- }
+-
+- mhi_cntrl->irq = irq;
+-
+- return 0;
+-}
+-
+-static int mhi_pci_runtime_get(struct mhi_controller *mhi_cntrl)
+-{
+- /* The runtime_get() MHI callback means:
+- * Do whatever is requested to leave M3.
+- */
+- return pm_runtime_get(mhi_cntrl->cntrl_dev);
+-}
+-
+-static void mhi_pci_runtime_put(struct mhi_controller *mhi_cntrl)
+-{
+- /* The runtime_put() MHI callback means:
+- * Device can be moved in M3 state.
+- */
+- pm_runtime_mark_last_busy(mhi_cntrl->cntrl_dev);
+- pm_runtime_put(mhi_cntrl->cntrl_dev);
+-}
+-
+-static void mhi_pci_recovery_work(struct work_struct *work)
+-{
+- struct mhi_pci_device *mhi_pdev = container_of(work, struct mhi_pci_device,
+- recovery_work);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+- struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+- int err;
+-
+- dev_warn(&pdev->dev, "device recovery started\n");
+-
+- del_timer(&mhi_pdev->health_check_timer);
+- pm_runtime_forbid(&pdev->dev);
+-
+- /* Clean up MHI state */
+- if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+- mhi_power_down(mhi_cntrl, false);
+- mhi_unprepare_after_power_down(mhi_cntrl);
+- }
+-
+- pci_set_power_state(pdev, PCI_D0);
+- pci_load_saved_state(pdev, mhi_pdev->pci_state);
+- pci_restore_state(pdev);
+-
+- if (!mhi_pci_is_alive(mhi_cntrl))
+- goto err_try_reset;
+-
+- err = mhi_prepare_for_power_up(mhi_cntrl);
+- if (err)
+- goto err_try_reset;
+-
+- err = mhi_sync_power_up(mhi_cntrl);
+- if (err)
+- goto err_unprepare;
+-
+- dev_dbg(&pdev->dev, "Recovery completed\n");
+-
+- set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+- mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+- return;
+-
+-err_unprepare:
+- mhi_unprepare_after_power_down(mhi_cntrl);
+-err_try_reset:
+- if (pci_reset_function(pdev))
+- dev_err(&pdev->dev, "Recovery failed\n");
+-}
+-
+-static void health_check(struct timer_list *t)
+-{
+- struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+-
+- if (!test_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status) ||
+- test_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status))
+- return;
+-
+- if (!mhi_pci_is_alive(mhi_cntrl)) {
+- dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
+- queue_work(system_long_wq, &mhi_pdev->recovery_work);
+- return;
+- }
+-
+- /* reschedule in two seconds */
+- mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+-}
+-
+-static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+-{
+- const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
+- const struct mhi_controller_config *mhi_cntrl_config;
+- struct mhi_pci_device *mhi_pdev;
+- struct mhi_controller *mhi_cntrl;
+- int err;
+-
+- dev_dbg(&pdev->dev, "MHI PCI device found: %s\n", info->name);
+-
+- /* mhi_pdev.mhi_cntrl must be zero-initialized */
+- mhi_pdev = devm_kzalloc(&pdev->dev, sizeof(*mhi_pdev), GFP_KERNEL);
+- if (!mhi_pdev)
+- return -ENOMEM;
+-
+- INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
+- timer_setup(&mhi_pdev->health_check_timer, health_check, 0);
+-
+- mhi_cntrl_config = info->config;
+- mhi_cntrl = &mhi_pdev->mhi_cntrl;
+-
+- mhi_cntrl->cntrl_dev = &pdev->dev;
+- mhi_cntrl->iova_start = 0;
+- mhi_cntrl->iova_stop = (dma_addr_t)DMA_BIT_MASK(info->dma_data_width);
+- mhi_cntrl->fw_image = info->fw;
+- mhi_cntrl->edl_image = info->edl;
+-
+- mhi_cntrl->read_reg = mhi_pci_read_reg;
+- mhi_cntrl->write_reg = mhi_pci_write_reg;
+- mhi_cntrl->status_cb = mhi_pci_status_cb;
+- mhi_cntrl->runtime_get = mhi_pci_runtime_get;
+- mhi_cntrl->runtime_put = mhi_pci_runtime_put;
+- mhi_cntrl->mru = info->mru_default;
+-
+- if (info->sideband_wake) {
+- mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
+- mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
+- mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
+- }
+-
+- err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
+- if (err)
+- return err;
+-
+- err = mhi_pci_get_irqs(mhi_cntrl, mhi_cntrl_config);
+- if (err)
+- return err;
+-
+- pci_set_drvdata(pdev, mhi_pdev);
+-
+- /* Have stored pci confspace at hand for restore in sudden PCI error.
+- * cache the state locally and discard the PCI core one.
+- */
+- pci_save_state(pdev);
+- mhi_pdev->pci_state = pci_store_saved_state(pdev);
+- pci_load_saved_state(pdev, NULL);
+-
+- pci_enable_pcie_error_reporting(pdev);
+-
+- err = mhi_register_controller(mhi_cntrl, mhi_cntrl_config);
+- if (err)
+- goto err_disable_reporting;
+-
+- /* MHI bus does not power up the controller by default */
+- err = mhi_prepare_for_power_up(mhi_cntrl);
+- if (err) {
+- dev_err(&pdev->dev, "failed to prepare MHI controller\n");
+- goto err_unregister;
+- }
+-
+- err = mhi_sync_power_up(mhi_cntrl);
+- if (err) {
+- dev_err(&pdev->dev, "failed to power up MHI controller\n");
+- goto err_unprepare;
+- }
+-
+- set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+-
+- /* start health check */
+- mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+-
+- /* Only allow runtime-suspend if PME capable (for wakeup) */
+- if (pci_pme_capable(pdev, PCI_D3hot)) {
+- pm_runtime_set_autosuspend_delay(&pdev->dev, 2000);
+- pm_runtime_use_autosuspend(&pdev->dev);
+- pm_runtime_mark_last_busy(&pdev->dev);
+- pm_runtime_put_noidle(&pdev->dev);
+- }
+-
+- return 0;
+-
+-err_unprepare:
+- mhi_unprepare_after_power_down(mhi_cntrl);
+-err_unregister:
+- mhi_unregister_controller(mhi_cntrl);
+-err_disable_reporting:
+- pci_disable_pcie_error_reporting(pdev);
+-
+- return err;
+-}
+-
+-static void mhi_pci_remove(struct pci_dev *pdev)
+-{
+- struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+-
+- del_timer_sync(&mhi_pdev->health_check_timer);
+- cancel_work_sync(&mhi_pdev->recovery_work);
+-
+- if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+- mhi_power_down(mhi_cntrl, true);
+- mhi_unprepare_after_power_down(mhi_cntrl);
+- }
+-
+- /* balancing probe put_noidle */
+- if (pci_pme_capable(pdev, PCI_D3hot))
+- pm_runtime_get_noresume(&pdev->dev);
+-
+- mhi_unregister_controller(mhi_cntrl);
+- pci_disable_pcie_error_reporting(pdev);
+-}
+-
+-static void mhi_pci_shutdown(struct pci_dev *pdev)
+-{
+- mhi_pci_remove(pdev);
+- pci_set_power_state(pdev, PCI_D3hot);
+-}
+-
+-static void mhi_pci_reset_prepare(struct pci_dev *pdev)
+-{
+- struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+-
+- dev_info(&pdev->dev, "reset\n");
+-
+- del_timer(&mhi_pdev->health_check_timer);
+-
+- /* Clean up MHI state */
+- if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+- mhi_power_down(mhi_cntrl, false);
+- mhi_unprepare_after_power_down(mhi_cntrl);
+- }
+-
+- /* cause internal device reset */
+- mhi_soc_reset(mhi_cntrl);
+-
+- /* Be sure device reset has been executed */
+- msleep(MHI_POST_RESET_DELAY_MS);
+-}
+-
+-static void mhi_pci_reset_done(struct pci_dev *pdev)
+-{
+- struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+- int err;
+-
+- /* Restore initial known working PCI state */
+- pci_load_saved_state(pdev, mhi_pdev->pci_state);
+- pci_restore_state(pdev);
+-
+- /* Is device status available ? */
+- if (!mhi_pci_is_alive(mhi_cntrl)) {
+- dev_err(&pdev->dev, "reset failed\n");
+- return;
+- }
+-
+- err = mhi_prepare_for_power_up(mhi_cntrl);
+- if (err) {
+- dev_err(&pdev->dev, "failed to prepare MHI controller\n");
+- return;
+- }
+-
+- err = mhi_sync_power_up(mhi_cntrl);
+- if (err) {
+- dev_err(&pdev->dev, "failed to power up MHI controller\n");
+- mhi_unprepare_after_power_down(mhi_cntrl);
+- return;
+- }
+-
+- set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+- mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+-}
+-
+-static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
+- pci_channel_state_t state)
+-{
+- struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+-
+- dev_err(&pdev->dev, "PCI error detected, state = %u\n", state);
+-
+- if (state == pci_channel_io_perm_failure)
+- return PCI_ERS_RESULT_DISCONNECT;
+-
+- /* Clean up MHI state */
+- if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+- mhi_power_down(mhi_cntrl, false);
+- mhi_unprepare_after_power_down(mhi_cntrl);
+- } else {
+- /* Nothing to do */
+- return PCI_ERS_RESULT_RECOVERED;
+- }
+-
+- pci_disable_device(pdev);
+-
+- return PCI_ERS_RESULT_NEED_RESET;
+-}
+-
+-static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)
+-{
+- if (pci_enable_device(pdev)) {
+- dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");
+- return PCI_ERS_RESULT_DISCONNECT;
+- }
+-
+- return PCI_ERS_RESULT_RECOVERED;
+-}
+-
+-static void mhi_pci_io_resume(struct pci_dev *pdev)
+-{
+- struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+-
+- dev_err(&pdev->dev, "PCI slot reset done\n");
+-
+- queue_work(system_long_wq, &mhi_pdev->recovery_work);
+-}
+-
+-static const struct pci_error_handlers mhi_pci_err_handler = {
+- .error_detected = mhi_pci_error_detected,
+- .slot_reset = mhi_pci_slot_reset,
+- .resume = mhi_pci_io_resume,
+- .reset_prepare = mhi_pci_reset_prepare,
+- .reset_done = mhi_pci_reset_done,
+-};
+-
+-static int __maybe_unused mhi_pci_runtime_suspend(struct device *dev)
+-{
+- struct pci_dev *pdev = to_pci_dev(dev);
+- struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+- int err;
+-
+- if (test_and_set_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status))
+- return 0;
+-
+- del_timer(&mhi_pdev->health_check_timer);
+- cancel_work_sync(&mhi_pdev->recovery_work);
+-
+- if (!test_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status) ||
+- mhi_cntrl->ee != MHI_EE_AMSS)
+- goto pci_suspend; /* Nothing to do at MHI level */
+-
+- /* Transition to M3 state */
+- err = mhi_pm_suspend(mhi_cntrl);
+- if (err) {
+- dev_err(&pdev->dev, "failed to suspend device: %d\n", err);
+- clear_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status);
+- return -EBUSY;
+- }
+-
+-pci_suspend:
+- pci_disable_device(pdev);
+- pci_wake_from_d3(pdev, true);
+-
+- return 0;
+-}
+-
+-static int __maybe_unused mhi_pci_runtime_resume(struct device *dev)
+-{
+- struct pci_dev *pdev = to_pci_dev(dev);
+- struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+- int err;
+-
+- if (!test_and_clear_bit(MHI_PCI_DEV_SUSPENDED, &mhi_pdev->status))
+- return 0;
+-
+- err = pci_enable_device(pdev);
+- if (err)
+- goto err_recovery;
+-
+- pci_set_master(pdev);
+- pci_wake_from_d3(pdev, false);
+-
+- if (!test_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status) ||
+- mhi_cntrl->ee != MHI_EE_AMSS)
+- return 0; /* Nothing to do at MHI level */
+-
+- /* Exit M3, transition to M0 state */
+- err = mhi_pm_resume(mhi_cntrl);
+- if (err) {
+- dev_err(&pdev->dev, "failed to resume device: %d\n", err);
+- goto err_recovery;
+- }
+-
+- /* Resume health check */
+- mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+-
+- /* It can be a remote wakeup (no mhi runtime_get), update access time */
+- pm_runtime_mark_last_busy(dev);
+-
+- return 0;
+-
+-err_recovery:
+- /* Do not fail to not mess up our PCI device state, the device likely
+- * lost power (d3cold) and we simply need to reset it from the recovery
+- * procedure, trigger the recovery asynchronously to prevent system
+- * suspend exit delaying.
+- */
+- queue_work(system_long_wq, &mhi_pdev->recovery_work);
+- pm_runtime_mark_last_busy(dev);
+-
+- return 0;
+-}
+-
+-static int __maybe_unused mhi_pci_suspend(struct device *dev)
+-{
+- pm_runtime_disable(dev);
+- return mhi_pci_runtime_suspend(dev);
+-}
+-
+-static int __maybe_unused mhi_pci_resume(struct device *dev)
+-{
+- int ret;
+-
+- /* Depending the platform, device may have lost power (d3cold), we need
+- * to resume it now to check its state and recover when necessary.
+- */
+- ret = mhi_pci_runtime_resume(dev);
+- pm_runtime_enable(dev);
+-
+- return ret;
+-}
+-
+-static int __maybe_unused mhi_pci_freeze(struct device *dev)
+-{
+- struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+- struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+-
+- /* We want to stop all operations, hibernation does not guarantee that
+- * device will be in the same state as before freezing, especially if
+- * the intermediate restore kernel reinitializes MHI device with new
+- * context.
+- */
+- if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+- mhi_power_down(mhi_cntrl, false);
+- mhi_unprepare_after_power_down(mhi_cntrl);
+- }
+-
+- return 0;
+-}
+-
+-static int __maybe_unused mhi_pci_restore(struct device *dev)
+-{
+- struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+-
+- /* Reinitialize the device */
+- queue_work(system_long_wq, &mhi_pdev->recovery_work);
+-
+- return 0;
+-}
+-
+-static const struct dev_pm_ops mhi_pci_pm_ops = {
+- SET_RUNTIME_PM_OPS(mhi_pci_runtime_suspend, mhi_pci_runtime_resume, NULL)
+-#ifdef CONFIG_PM_SLEEP
+- .suspend = mhi_pci_suspend,
+- .resume = mhi_pci_resume,
+- .freeze = mhi_pci_freeze,
+- .thaw = mhi_pci_restore,
+- .restore = mhi_pci_restore,
+-#endif
+-};
+-
+-static struct pci_driver mhi_pci_driver = {
+- .name = "mhi-pci-generic",
+- .id_table = mhi_pci_id_table,
+- .probe = mhi_pci_probe,
+- .remove = mhi_pci_remove,
+- .shutdown = mhi_pci_shutdown,
+- .err_handler = &mhi_pci_err_handler,
+- .driver.pm = &mhi_pci_pm_ops
+-};
+-module_pci_driver(mhi_pci_driver);
+-
+-MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+-MODULE_DESCRIPTION("Modem Host Interface (MHI) PCI controller driver");
+-MODULE_LICENSE("GPL");
+diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c
+index 626dedd110cbc..fca0d0669aa97 100644
+--- a/drivers/bus/mips_cdmm.c
++++ b/drivers/bus/mips_cdmm.c
+@@ -351,6 +351,7 @@ phys_addr_t __weak mips_cdmm_phys_base(void)
+ np = of_find_compatible_node(NULL, NULL, "mti,mips-cdmm");
+ if (np) {
+ err = of_address_to_resource(np, 0, &res);
++ of_node_put(np);
+ if (!err)
+ return res.start;
+ }
+diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
+index 6f225dddc74f4..fac8627b04e34 100644
+--- a/drivers/bus/sunxi-rsb.c
++++ b/drivers/bus/sunxi-rsb.c
+@@ -227,6 +227,8 @@ static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb,
+
+ dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev));
+
++ return rdev;
++
+ err_device_add:
+ put_device(&rdev->dev);
+
+@@ -269,6 +271,9 @@ EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register);
+ /* common code that starts a transfer */
+ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb)
+ {
++ u32 int_mask, status;
++ bool timeout;
++
+ if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) {
+ dev_dbg(rsb->dev, "RSB transfer still in progress\n");
+ return -EBUSY;
+@@ -276,13 +281,23 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb)
+
+ reinit_completion(&rsb->complete);
+
+- writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER,
+- rsb->regs + RSB_INTE);
++ int_mask = RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER;
++ writel(int_mask, rsb->regs + RSB_INTE);
+ writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB,
+ rsb->regs + RSB_CTRL);
+
+- if (!wait_for_completion_io_timeout(&rsb->complete,
+- msecs_to_jiffies(100))) {
++ if (irqs_disabled()) {
++ timeout = readl_poll_timeout_atomic(rsb->regs + RSB_INTS,
++ status, (status & int_mask),
++ 10, 100000);
++ writel(status, rsb->regs + RSB_INTS);
++ } else {
++ timeout = !wait_for_completion_io_timeout(&rsb->complete,
++ msecs_to_jiffies(100));
++ status = rsb->status;
++ }
++
++ if (timeout) {
+ dev_dbg(rsb->dev, "RSB timeout\n");
+
+ /* abort the transfer */
+@@ -294,18 +309,18 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb)
+ return -ETIMEDOUT;
+ }
+
+- if (rsb->status & RSB_INTS_LOAD_BSY) {
++ if (status & RSB_INTS_LOAD_BSY) {
+ dev_dbg(rsb->dev, "RSB busy\n");
+ return -EBUSY;
+ }
+
+- if (rsb->status & RSB_INTS_TRANS_ERR) {
+- if (rsb->status & RSB_INTS_TRANS_ERR_ACK) {
++ if (status & RSB_INTS_TRANS_ERR) {
++ if (status & RSB_INTS_TRANS_ERR_ACK) {
+ dev_dbg(rsb->dev, "RSB slave nack\n");
+ return -EINVAL;
+ }
+
+- if (rsb->status & RSB_INTS_TRANS_ERR_DATA) {
++ if (status & RSB_INTS_TRANS_ERR_DATA) {
+ dev_dbg(rsb->dev, "RSB transfer data error\n");
+ return -EIO;
+ }
+@@ -687,11 +702,11 @@ err_clk_disable:
+
+ static void sunxi_rsb_hw_exit(struct sunxi_rsb *rsb)
+ {
+- /* Keep the clock and PM reference counts consistent. */
+- if (pm_runtime_status_suspended(rsb->dev))
+- pm_runtime_resume(rsb->dev);
+ reset_control_assert(rsb->rstc);
+- clk_disable_unprepare(rsb->clk);
++
++ /* Keep the clock and PM reference counts consistent. */
++ if (!pm_runtime_status_suspended(rsb->dev))
++ clk_disable_unprepare(rsb->clk);
+ }
+
+ static int __maybe_unused sunxi_rsb_runtime_suspend(struct device *dev)
+@@ -814,14 +829,6 @@ static int sunxi_rsb_remove(struct platform_device *pdev)
+ return 0;
+ }
+
+-static void sunxi_rsb_shutdown(struct platform_device *pdev)
+-{
+- struct sunxi_rsb *rsb = platform_get_drvdata(pdev);
+-
+- pm_runtime_disable(&pdev->dev);
+- sunxi_rsb_hw_exit(rsb);
+-}
+-
+ static const struct dev_pm_ops sunxi_rsb_dev_pm_ops = {
+ SET_RUNTIME_PM_OPS(sunxi_rsb_runtime_suspend,
+ sunxi_rsb_runtime_resume, NULL)
+@@ -837,7 +844,6 @@ MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table);
+ static struct platform_driver sunxi_rsb_driver = {
+ .probe = sunxi_rsb_probe,
+ .remove = sunxi_rsb_remove,
+- .shutdown = sunxi_rsb_shutdown,
+ .driver = {
+ .name = RSB_CTRL_NAME,
+ .of_match_table = sunxi_rsb_of_match_table,
+@@ -855,7 +861,13 @@ static int __init sunxi_rsb_init(void)
+ return ret;
+ }
+
+- return platform_driver_register(&sunxi_rsb_driver);
++ ret = platform_driver_register(&sunxi_rsb_driver);
++ if (ret) {
++ bus_unregister(&sunxi_rsb_bus);
++ return ret;
++ }
++
++ return 0;
+ }
+ module_init(sunxi_rsb_init);
+
+diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
+index 6a8b7fb5be58d..436c0f3563d79 100644
+--- a/drivers/bus/ti-sysc.c
++++ b/drivers/bus/ti-sysc.c
+@@ -6,6 +6,7 @@
+ #include <linux/io.h>
+ #include <linux/clk.h>
+ #include <linux/clkdev.h>
++#include <linux/cpu_pm.h>
+ #include <linux/delay.h>
+ #include <linux/list.h>
+ #include <linux/module.h>
+@@ -17,6 +18,7 @@
+ #include <linux/of_platform.h>
+ #include <linux/slab.h>
+ #include <linux/sys_soc.h>
++#include <linux/timekeeping.h>
+ #include <linux/iopoll.h>
+
+ #include <linux/platform_data/ti-sysc.h>
+@@ -51,11 +53,18 @@ struct sysc_address {
+ struct list_head node;
+ };
+
++struct sysc_module {
++ struct sysc *ddata;
++ struct list_head node;
++};
++
+ struct sysc_soc_info {
+ unsigned long general_purpose:1;
+ enum sysc_soc soc;
+- struct mutex list_lock; /* disabled modules list lock */
++ struct mutex list_lock; /* disabled and restored modules list lock */
+ struct list_head disabled_modules;
++ struct list_head restored_modules;
++ struct notifier_block nb;
+ };
+
+ enum sysc_clocks {
+@@ -223,37 +232,77 @@ static u32 sysc_read_sysstatus(struct sysc *ddata)
+ return sysc_read(ddata, offset);
+ }
+
+-/* Poll on reset status */
+-static int sysc_wait_softreset(struct sysc *ddata)
++static int sysc_poll_reset_sysstatus(struct sysc *ddata)
+ {
+- u32 sysc_mask, syss_done, rstval;
+- int syss_offset, error = 0;
+-
+- if (ddata->cap->regbits->srst_shift < 0)
+- return 0;
+-
+- syss_offset = ddata->offsets[SYSC_SYSSTATUS];
+- sysc_mask = BIT(ddata->cap->regbits->srst_shift);
++ int error, retries;
++ u32 syss_done, rstval;
+
+ if (ddata->cfg.quirks & SYSS_QUIRK_RESETDONE_INVERTED)
+ syss_done = 0;
+ else
+ syss_done = ddata->cfg.syss_mask;
+
+- if (syss_offset >= 0) {
++ if (likely(!timekeeping_suspended)) {
+ error = readx_poll_timeout_atomic(sysc_read_sysstatus, ddata,
+ rstval, (rstval & ddata->cfg.syss_mask) ==
+ syss_done, 100, MAX_MODULE_SOFTRESET_WAIT);
++ } else {
++ retries = MAX_MODULE_SOFTRESET_WAIT;
++ while (retries--) {
++ rstval = sysc_read_sysstatus(ddata);
++ if ((rstval & ddata->cfg.syss_mask) == syss_done)
++ return 0;
++ udelay(2); /* Account for udelay flakeyness */
++ }
++ error = -ETIMEDOUT;
++ }
++
++ return error;
++}
++
++static int sysc_poll_reset_sysconfig(struct sysc *ddata)
++{
++ int error, retries;
++ u32 sysc_mask, rstval;
++
++ sysc_mask = BIT(ddata->cap->regbits->srst_shift);
+
+- } else if (ddata->cfg.quirks & SYSC_QUIRK_RESET_STATUS) {
++ if (likely(!timekeeping_suspended)) {
+ error = readx_poll_timeout_atomic(sysc_read_sysconfig, ddata,
+ rstval, !(rstval & sysc_mask),
+ 100, MAX_MODULE_SOFTRESET_WAIT);
++ } else {
++ retries = MAX_MODULE_SOFTRESET_WAIT;
++ while (retries--) {
++ rstval = sysc_read_sysconfig(ddata);
++ if (!(rstval & sysc_mask))
++ return 0;
++ udelay(2); /* Account for udelay flakeyness */
++ }
++ error = -ETIMEDOUT;
+ }
+
+ return error;
+ }
+
++/* Poll on reset status */
++static int sysc_wait_softreset(struct sysc *ddata)
++{
++ int syss_offset, error = 0;
++
++ if (ddata->cap->regbits->srst_shift < 0)
++ return 0;
++
++ syss_offset = ddata->offsets[SYSC_SYSSTATUS];
++
++ if (syss_offset >= 0)
++ error = sysc_poll_reset_sysstatus(ddata);
++ else if (ddata->cfg.quirks & SYSC_QUIRK_RESET_STATUS)
++ error = sysc_poll_reset_sysconfig(ddata);
++
++ return error;
++}
++
+ static int sysc_add_named_clock_from_child(struct sysc *ddata,
+ const char *name,
+ const char *optfck_name)
+@@ -1518,7 +1567,7 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
+ 0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
+ SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -ENODEV, 0x4ea2080d, 0xffffffff,
+ SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY |
+- SYSC_QUIRK_REINIT_ON_RESUME),
++ SYSC_QUIRK_REINIT_ON_CTX_LOST),
+ SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
+ SYSC_MODULE_QUIRK_WDT),
+ /* PRUSS on am3, am4 and am5 */
+@@ -1710,7 +1759,7 @@ static u32 sysc_quirk_dispc(struct sysc *ddata, int dispc_offset,
+ if (!ddata->module_va)
+ return -EIO;
+
+- /* DISP_CONTROL */
++ /* DISP_CONTROL, shut down lcd and digit on disable if enabled */
+ val = sysc_read(ddata, dispc_offset + 0x40);
+ lcd_en = val & lcd_en_mask;
+ digit_en = val & digit_en_mask;
+@@ -1722,7 +1771,7 @@ static u32 sysc_quirk_dispc(struct sysc *ddata, int dispc_offset,
+ else
+ irq_mask |= BIT(2) | BIT(3); /* EVSYNC bits */
+ }
+- if (disable & (lcd_en | digit_en))
++ if (disable && (lcd_en || digit_en))
+ sysc_write(ddata, dispc_offset + 0x40,
+ val & ~(lcd_en_mask | digit_en_mask));
+
+@@ -2040,6 +2089,8 @@ static int sysc_reset(struct sysc *ddata)
+ sysc_val = sysc_read_sysconfig(ddata);
+ sysc_val |= sysc_mask;
+ sysc_write(ddata, sysc_offset, sysc_val);
++ /* Flush posted write */
++ sysc_val = sysc_read_sysconfig(ddata);
+ }
+
+ if (ddata->cfg.srst_udelay)
+@@ -2401,6 +2452,78 @@ static struct dev_pm_domain sysc_child_pm_domain = {
+ }
+ };
+
++/* Caller needs to take list_lock if ever used outside of cpu_pm */
++static void sysc_reinit_modules(struct sysc_soc_info *soc)
++{
++ struct sysc_module *module;
++ struct list_head *pos;
++ struct sysc *ddata;
++
++ list_for_each(pos, &sysc_soc->restored_modules) {
++ module = list_entry(pos, struct sysc_module, node);
++ ddata = module->ddata;
++ sysc_reinit_module(ddata, ddata->enabled);
++ }
++}
++
++/**
++ * sysc_context_notifier - optionally reset and restore module after idle
++ * @nb: notifier block
++ * @cmd: unused
++ * @v: unused
++ *
++ * Some interconnect target modules need to be restored, or reset and restored
++ * on CPU_PM CPU_PM_CLUSTER_EXIT notifier. This is needed at least for am335x
++ * OTG and GPMC target modules even if the modules are unused.
++ */
++static int sysc_context_notifier(struct notifier_block *nb, unsigned long cmd,
++ void *v)
++{
++ struct sysc_soc_info *soc;
++
++ soc = container_of(nb, struct sysc_soc_info, nb);
++
++ switch (cmd) {
++ case CPU_CLUSTER_PM_ENTER:
++ break;
++ case CPU_CLUSTER_PM_ENTER_FAILED: /* No need to restore context */
++ break;
++ case CPU_CLUSTER_PM_EXIT:
++ sysc_reinit_modules(soc);
++ break;
++ }
++
++ return NOTIFY_OK;
++}
++
++/**
++ * sysc_add_restored - optionally add reset and restore quirk hanlling
++ * @ddata: device data
++ */
++static void sysc_add_restored(struct sysc *ddata)
++{
++ struct sysc_module *restored_module;
++
++ restored_module = kzalloc(sizeof(*restored_module), GFP_KERNEL);
++ if (!restored_module)
++ return;
++
++ restored_module->ddata = ddata;
++
++ mutex_lock(&sysc_soc->list_lock);
++
++ list_add(&restored_module->node, &sysc_soc->restored_modules);
++
++ if (sysc_soc->nb.notifier_call)
++ goto out_unlock;
++
++ sysc_soc->nb.notifier_call = sysc_context_notifier;
++ cpu_pm_register_notifier(&sysc_soc->nb);
++
++out_unlock:
++ mutex_unlock(&sysc_soc->list_lock);
++}
++
+ /**
+ * sysc_legacy_idle_quirk - handle children in omap_device compatible way
+ * @ddata: device driver data
+@@ -2900,12 +3023,14 @@ static int sysc_add_disabled(unsigned long base)
+ }
+
+ /*
+- * One time init to detect the booted SoC and disable unavailable features.
++ * One time init to detect the booted SoC, disable unavailable features
++ * and initialize list for optional cpu_pm notifier.
++ *
+ * Note that we initialize static data shared across all ti-sysc instances
+ * so ddata is only used for SoC type. This can be called from module_init
+ * once we no longer need to rely on platform data.
+ */
+-static int sysc_init_soc(struct sysc *ddata)
++static int sysc_init_static_data(struct sysc *ddata)
+ {
+ const struct soc_device_attribute *match;
+ struct ti_sysc_platform_data *pdata;
+@@ -2921,6 +3046,7 @@ static int sysc_init_soc(struct sysc *ddata)
+
+ mutex_init(&sysc_soc->list_lock);
+ INIT_LIST_HEAD(&sysc_soc->disabled_modules);
++ INIT_LIST_HEAD(&sysc_soc->restored_modules);
+ sysc_soc->general_purpose = true;
+
+ pdata = dev_get_platdata(ddata->dev);
+@@ -2929,7 +3055,7 @@ static int sysc_init_soc(struct sysc *ddata)
+
+ match = soc_device_match(sysc_soc_match);
+ if (match && match->data)
+- sysc_soc->soc = (int)match->data;
++ sysc_soc->soc = (enum sysc_soc)(uintptr_t)match->data;
+
+ /*
+ * Check and warn about possible old incomplete dtb. We now want to see
+@@ -2985,15 +3111,24 @@ static int sysc_init_soc(struct sysc *ddata)
+ return 0;
+ }
+
+-static void sysc_cleanup_soc(void)
++static void sysc_cleanup_static_data(void)
+ {
++ struct sysc_module *restored_module;
+ struct sysc_address *disabled_module;
+ struct list_head *pos, *tmp;
+
+ if (!sysc_soc)
+ return;
+
++ if (sysc_soc->nb.notifier_call)
++ cpu_pm_unregister_notifier(&sysc_soc->nb);
++
+ mutex_lock(&sysc_soc->list_lock);
++ list_for_each_safe(pos, tmp, &sysc_soc->restored_modules) {
++ restored_module = list_entry(pos, struct sysc_module, node);
++ list_del(pos);
++ kfree(restored_module);
++ }
+ list_for_each_safe(pos, tmp, &sysc_soc->disabled_modules) {
+ disabled_module = list_entry(pos, struct sysc_address, node);
+ list_del(pos);
+@@ -3029,13 +3164,27 @@ static int sysc_check_disabled_devices(struct sysc *ddata)
+ */
+ static int sysc_check_active_timer(struct sysc *ddata)
+ {
++ int error;
++
+ if (ddata->cap->type != TI_SYSC_OMAP2_TIMER &&
+ ddata->cap->type != TI_SYSC_OMAP4_TIMER)
+ return 0;
+
++ /*
++ * Quirk for omap3 beagleboard revision A to B4 to use gpt12.
++ * Revision C and later are fixed with commit 23885389dbbb ("ARM:
++ * dts: Fix timer regression for beagleboard revision c"). This all
++ * can be dropped if we stop supporting old beagleboard revisions
++ * A to B4 at some point.
++ */
++ if (sysc_soc->soc == SOC_3430)
++ error = -ENXIO;
++ else
++ error = -EBUSY;
++
+ if ((ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) &&
+ (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE))
+- return -ENXIO;
++ return error;
+
+ return 0;
+ }
+@@ -3061,7 +3210,7 @@ static int sysc_probe(struct platform_device *pdev)
+ ddata->dev = &pdev->dev;
+ platform_set_drvdata(pdev, ddata);
+
+- error = sysc_init_soc(ddata);
++ error = sysc_init_static_data(ddata);
+ if (error)
+ return error;
+
+@@ -3159,6 +3308,9 @@ static int sysc_probe(struct platform_device *pdev)
+ pm_runtime_put(&pdev->dev);
+ }
+
++ if (ddata->cfg.quirks & SYSC_QUIRK_REINIT_ON_CTX_LOST)
++ sysc_add_restored(ddata);
++
+ return 0;
+
+ err:
+@@ -3175,7 +3327,9 @@ static int sysc_remove(struct platform_device *pdev)
+ struct sysc *ddata = platform_get_drvdata(pdev);
+ int error;
+
+- cancel_delayed_work_sync(&ddata->idle_work);
++ /* Device can still be enabled, see deferred idle quirk in probe */
++ if (cancel_delayed_work_sync(&ddata->idle_work))
++ ti_sysc_idle(&ddata->idle_work.work);
+
+ error = pm_runtime_resume_and_get(ddata->dev);
+ if (error < 0) {
+@@ -3240,7 +3394,7 @@ static void __exit sysc_exit(void)
+ {
+ bus_unregister_notifier(&platform_bus_type, &sysc_nb);
+ platform_driver_unregister(&sysc_driver);
+- sysc_cleanup_soc();
++ sysc_cleanup_static_data();
+ }
+ module_exit(sysc_exit);
+
+diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
+index 740811893c570..d454428f4981d 100644
+--- a/drivers/char/Kconfig
++++ b/drivers/char/Kconfig
+@@ -428,27 +428,40 @@ config ADI
+ driver include crash and makedumpfile.
+
+ config RANDOM_TRUST_CPU
+- bool "Trust the CPU manufacturer to initialize Linux's CRNG"
++ bool "Initialize RNG using CPU RNG instructions"
++ default y
+ depends on ARCH_RANDOM
+- default n
+ help
+- Assume that CPU manufacturer (e.g., Intel or AMD for RDSEED or
+- RDRAND, IBM for the S390 and Power PC architectures) is trustworthy
+- for the purposes of initializing Linux's CRNG. Since this is not
+- something that can be independently audited, this amounts to trusting
+- that CPU manufacturer (perhaps with the insistence or mandate
+- of a Nation State's intelligence or law enforcement agencies)
+- has not installed a hidden back door to compromise the CPU's
+- random number generation facilities. This can also be configured
+- at boot with "random.trust_cpu=on/off".
++ Initialize the RNG using random numbers supplied by the CPU's
++ RNG instructions (e.g. RDRAND), if supported and available. These
++ random numbers are never used directly, but are rather hashed into
++ the main input pool, and this happens regardless of whether or not
++ this option is enabled. Instead, this option controls whether the
++ they are credited and hence can initialize the RNG. Additionally,
++ other sources of randomness are always used, regardless of this
++ setting. Enabling this implies trusting that the CPU can supply high
++ quality and non-backdoored random numbers.
++
++ Say Y here unless you have reason to mistrust your CPU or believe
++ its RNG facilities may be faulty. This may also be configured at
++ boot time with "random.trust_cpu=on/off".
+
+ config RANDOM_TRUST_BOOTLOADER
+- bool "Trust the bootloader to initialize Linux's CRNG"
++ bool "Initialize RNG using bootloader-supplied seed"
++ default y
+ help
+- Some bootloaders can provide entropy to increase the kernel's initial
+- device randomness. Say Y here to assume the entropy provided by the
+- booloader is trustworthy so it will be added to the kernel's entropy
+- pool. Otherwise, say N here so it will be regarded as device input that
+- only mixes the entropy pool.
++ Initialize the RNG using a seed supplied by the bootloader or boot
++ environment (e.g. EFI or a bootloader-generated device tree). This
++ seed is not used directly, but is rather hashed into the main input
++ pool, and this happens regardless of whether or not this option is
++ enabled. Instead, this option controls whether the seed is credited
++ and hence can initialize the RNG. Additionally, other sources of
++ randomness are always used, regardless of this setting. Enabling
++ this implies trusting that the bootloader can supply high quality and
++ non-backdoored seeds.
++
++ Say Y here unless you have reason to mistrust your bootloader or
++ believe its RNG facilities may be faulty. This may also be configured
++ at boot time with "random.trust_bootloader=on/off".
+
+ endmenu
+diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
+index ed3c4c42fc23b..514f9f287a781 100644
+--- a/drivers/char/agp/parisc-agp.c
++++ b/drivers/char/agp/parisc-agp.c
+@@ -90,6 +90,9 @@ parisc_agp_tlbflush(struct agp_memory *mem)
+ {
+ struct _parisc_agp_info *info = &parisc_agp_info;
+
++ /* force fdc ops to be visible to IOMMU */
++ asm_io_sync();
++
+ writeq(info->gart_base | ilog2(info->gart_size), info->ioc_regs+IOC_PCOM);
+ readq(info->ioc_regs+IOC_PCOM); /* flush */
+ }
+@@ -158,6 +161,7 @@ parisc_agp_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
+ info->gatt[j] =
+ parisc_agp_mask_memory(agp_bridge,
+ paddr, type);
++ asm_io_fdc(&info->gatt[j]);
+ }
+ }
+
+@@ -191,7 +195,16 @@ static unsigned long
+ parisc_agp_mask_memory(struct agp_bridge_data *bridge, dma_addr_t addr,
+ int type)
+ {
+- return SBA_PDIR_VALID_BIT | addr;
++ unsigned ci; /* coherent index */
++ dma_addr_t pa;
++
++ pa = addr & IOVP_MASK;
++ asm("lci 0(%1), %0" : "=r" (ci) : "r" (phys_to_virt(pa)));
++
++ pa |= (ci >> PAGE_SHIFT) & 0xff;/* move CI (8 bits) into lowest byte */
++ pa |= SBA_PDIR_VALID_BIT; /* set "valid" bit */
++
++ return cpu_to_le64(pa);
+ }
+
+ static void
+@@ -281,7 +294,7 @@ agp_ioc_init(void __iomem *ioc_regs)
+ return 0;
+ }
+
+-static int
++static int __init
+ lba_find_capability(int cap)
+ {
+ struct _parisc_agp_info *info = &parisc_agp_info;
+@@ -366,7 +379,7 @@ fail:
+ return error;
+ }
+
+-static int
++static int __init
+ find_quicksilver(struct device *dev, void *data)
+ {
+ struct parisc_device **lba = data;
+@@ -378,7 +391,7 @@ find_quicksilver(struct device *dev, void *data)
+ return 0;
+ }
+
+-static int
++static int __init
+ parisc_agp_init(void)
+ {
+ extern struct sba_device *sba_list;
+diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
+index deb85a334c937..260573c283209 100644
+--- a/drivers/char/applicom.c
++++ b/drivers/char/applicom.c
+@@ -197,8 +197,10 @@ static int __init applicom_init(void)
+ if (!pci_match_id(applicom_pci_tbl, dev))
+ continue;
+
+- if (pci_enable_device(dev))
++ if (pci_enable_device(dev)) {
++ pci_dev_put(dev);
+ return -EIO;
++ }
+
+ RamIO = ioremap(pci_resource_start(dev, 0), LEN_RAM_IO);
+
+@@ -207,6 +209,7 @@ static int __init applicom_init(void)
+ "space at 0x%llx\n",
+ (unsigned long long)pci_resource_start(dev, 0));
+ pci_disable_device(dev);
++ pci_dev_put(dev);
+ return -EIO;
+ }
+
+diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
+index 239eca4d68055..650c7d9180802 100644
+--- a/drivers/char/hw_random/Kconfig
++++ b/drivers/char/hw_random/Kconfig
+@@ -414,7 +414,7 @@ config HW_RANDOM_MESON
+
+ config HW_RANDOM_CAVIUM
+ tristate "Cavium ThunderX Random Number Generator support"
+- depends on HW_RANDOM && PCI && (ARM64 || (COMPILE_TEST && 64BIT))
++ depends on HW_RANDOM && PCI && ARCH_THUNDER
+ default HW_RANDOM
+ help
+ This driver provides kernel-side support for the Random Number
+diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
+index c22d4184bb612..0555e3838bce1 100644
+--- a/drivers/char/hw_random/amd-rng.c
++++ b/drivers/char/hw_random/amd-rng.c
+@@ -143,15 +143,19 @@ static int __init amd_rng_mod_init(void)
+ found:
+ err = pci_read_config_dword(pdev, 0x58, &pmbase);
+ if (err)
+- return err;
++ goto put_dev;
+
+ pmbase &= 0x0000FF00;
+- if (pmbase == 0)
+- return -EIO;
++ if (pmbase == 0) {
++ err = -EIO;
++ goto put_dev;
++ }
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+- if (!priv)
+- return -ENOMEM;
++ if (!priv) {
++ err = -ENOMEM;
++ goto put_dev;
++ }
+
+ if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) {
+ dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
+@@ -185,6 +189,8 @@ err_iomap:
+ release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+ out:
+ kfree(priv);
++put_dev:
++ pci_dev_put(pdev);
+ return err;
+ }
+
+@@ -200,6 +206,8 @@ static void __exit amd_rng_mod_exit(void)
+
+ release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+
++ pci_dev_put(priv->pcidev);
++
+ kfree(priv);
+ }
+
+diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c
+index b24ac39a903b3..e34c3ea692b6c 100644
+--- a/drivers/char/hw_random/arm_smccc_trng.c
++++ b/drivers/char/hw_random/arm_smccc_trng.c
+@@ -71,8 +71,6 @@ static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+ MAX_BITS_PER_CALL);
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND, bits, &res);
+- if ((int)res.a0 < 0)
+- return (int)res.a0;
+
+ switch ((int)res.a0) {
+ case SMCCC_RET_SUCCESS:
+@@ -88,6 +86,8 @@ static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+ return copied;
+ cond_resched();
+ break;
++ default:
++ return -EIO;
+ }
+ }
+
+diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
+index ecb71c4317a50..8cf0ef501341e 100644
+--- a/drivers/char/hw_random/atmel-rng.c
++++ b/drivers/char/hw_random/atmel-rng.c
+@@ -114,6 +114,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
+
+ err_register:
+ clk_disable_unprepare(trng->clk);
++ atmel_trng_disable(trng);
+ return ret;
+ }
+
+diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
+index 3de4a6a443ef9..6f66919652bf5 100644
+--- a/drivers/char/hw_random/cavium-rng-vf.c
++++ b/drivers/char/hw_random/cavium-rng-vf.c
+@@ -1,10 +1,7 @@
++// SPDX-License-Identifier: GPL-2.0
+ /*
+- * Hardware Random Number Generator support for Cavium, Inc.
+- * Thunder processor family.
+- *
+- * This file is subject to the terms and conditions of the GNU General Public
+- * License. See the file "COPYING" in the main directory of this archive
+- * for more details.
++ * Hardware Random Number Generator support.
++ * Cavium Thunder, Marvell OcteonTx/Tx2 processor families.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+@@ -15,16 +12,146 @@
+ #include <linux/pci.h>
+ #include <linux/pci_ids.h>
+
++#include <asm/arch_timer.h>
++
++/* PCI device IDs */
++#define PCI_DEVID_CAVIUM_RNG_PF 0xA018
++#define PCI_DEVID_CAVIUM_RNG_VF 0xA033
++
++#define HEALTH_STATUS_REG 0x38
++
++/* RST device info */
++#define PCI_DEVICE_ID_RST_OTX2 0xA085
++#define RST_BOOT_REG 0x1600ULL
++#define CLOCK_BASE_RATE 50000000ULL
++#define MSEC_TO_NSEC(x) (x * 1000000)
++
+ struct cavium_rng {
+ struct hwrng ops;
+ void __iomem *result;
++ void __iomem *pf_regbase;
++ struct pci_dev *pdev;
++ u64 clock_rate;
++ u64 prev_error;
++ u64 prev_time;
+ };
+
++static inline bool is_octeontx(struct pci_dev *pdev)
++{
++ if (midr_is_cpu_model_range(read_cpuid_id(), MIDR_THUNDERX_83XX,
++ MIDR_CPU_VAR_REV(0, 0),
++ MIDR_CPU_VAR_REV(3, 0)) ||
++ midr_is_cpu_model_range(read_cpuid_id(), MIDR_THUNDERX_81XX,
++ MIDR_CPU_VAR_REV(0, 0),
++ MIDR_CPU_VAR_REV(3, 0)) ||
++ midr_is_cpu_model_range(read_cpuid_id(), MIDR_THUNDERX,
++ MIDR_CPU_VAR_REV(0, 0),
++ MIDR_CPU_VAR_REV(3, 0)))
++ return true;
++
++ return false;
++}
++
++static u64 rng_get_coprocessor_clkrate(void)
++{
++ u64 ret = CLOCK_BASE_RATE * 16; /* Assume 800Mhz as default */
++ struct pci_dev *pdev;
++ void __iomem *base;
++
++ pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
++ PCI_DEVICE_ID_RST_OTX2, NULL);
++ if (!pdev)
++ goto error;
++
++ base = pci_ioremap_bar(pdev, 0);
++ if (!base)
++ goto error_put_pdev;
++
++ /* RST: PNR_MUL * 50Mhz gives clockrate */
++ ret = CLOCK_BASE_RATE * ((readq(base + RST_BOOT_REG) >> 33) & 0x3F);
++
++ iounmap(base);
++
++error_put_pdev:
++ pci_dev_put(pdev);
++
++error:
++ return ret;
++}
++
++static int check_rng_health(struct cavium_rng *rng)
++{
++ u64 cur_err, cur_time;
++ u64 status, cycles;
++ u64 time_elapsed;
++
++
++ /* Skip checking health for OcteonTx */
++ if (!rng->pf_regbase)
++ return 0;
++
++ status = readq(rng->pf_regbase + HEALTH_STATUS_REG);
++ if (status & BIT_ULL(0)) {
++ dev_err(&rng->pdev->dev, "HWRNG: Startup health test failed\n");
++ return -EIO;
++ }
++
++ cycles = status >> 1;
++ if (!cycles)
++ return 0;
++
++ cur_time = arch_timer_read_counter();
++
++ /* RNM_HEALTH_STATUS[CYCLES_SINCE_HEALTH_FAILURE]
++ * Number of coprocessor cycles times 2 since the last failure.
++ * This field doesn't get cleared/updated until another failure.
++ */
++ cycles = cycles / 2;
++ cur_err = (cycles * 1000000000) / rng->clock_rate; /* In nanosec */
++
++ /* Ignore errors that happenned a long time ago, these
++ * are most likely false positive errors.
++ */
++ if (cur_err > MSEC_TO_NSEC(10)) {
++ rng->prev_error = 0;
++ rng->prev_time = 0;
++ return 0;
++ }
++
++ if (rng->prev_error) {
++ /* Calculate time elapsed since last error
++ * '1' tick of CNTVCT is 10ns, since it runs at 100Mhz.
++ */
++ time_elapsed = (cur_time - rng->prev_time) * 10;
++ time_elapsed += rng->prev_error;
++
++ /* Check if current error is a new one or the old one itself.
++ * If error is a new one then consider there is a persistent
++ * issue with entropy, declare hardware failure.
++ */
++ if (cur_err < time_elapsed) {
++ dev_err(&rng->pdev->dev, "HWRNG failure detected\n");
++ rng->prev_error = cur_err;
++ rng->prev_time = cur_time;
++ return -EIO;
++ }
++ }
++
++ rng->prev_error = cur_err;
++ rng->prev_time = cur_time;
++ return 0;
++}
++
+ /* Read data from the RNG unit */
+ static int cavium_rng_read(struct hwrng *rng, void *dat, size_t max, bool wait)
+ {
+ struct cavium_rng *p = container_of(rng, struct cavium_rng, ops);
+ unsigned int size = max;
++ int err = 0;
++
++ err = check_rng_health(p);
++ if (err)
++ return err;
+
+ while (size >= 8) {
+ *((u64 *)dat) = readq(p->result);
+@@ -39,6 +166,39 @@ static int cavium_rng_read(struct hwrng *rng, void *dat, size_t max, bool wait)
+ return max;
+ }
+
++static int cavium_map_pf_regs(struct cavium_rng *rng)
++{
++ struct pci_dev *pdev;
++
++ /* Health status is not supported on 83xx, skip mapping PF CSRs */
++ if (is_octeontx(rng->pdev)) {
++ rng->pf_regbase = NULL;
++ return 0;
++ }
++
++ pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
++ PCI_DEVID_CAVIUM_RNG_PF, NULL);
++ if (!pdev) {
++ dev_err(&pdev->dev, "Cannot find RNG PF device\n");
++ return -EIO;
++ }
++
++ rng->pf_regbase = ioremap(pci_resource_start(pdev, 0),
++ pci_resource_len(pdev, 0));
++ if (!rng->pf_regbase) {
++ dev_err(&pdev->dev, "Failed to map PF CSR region\n");
++ pci_dev_put(pdev);
++ return -ENOMEM;
++ }
++
++ pci_dev_put(pdev);
++
++ /* Get co-processor clock rate */
++ rng->clock_rate = rng_get_coprocessor_clkrate();
++
++ return 0;
++}
++
+ /* Map Cavium RNG to an HWRNG object */
+ static int cavium_rng_probe_vf(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+@@ -50,6 +210,8 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev,
+ if (!rng)
+ return -ENOMEM;
+
++ rng->pdev = pdev;
++
+ /* Map the RNG result */
+ rng->result = pcim_iomap(pdev, 0, 0);
+ if (!rng->result) {
+@@ -67,6 +229,11 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev,
+
+ pci_set_drvdata(pdev, rng);
+
++ /* Health status is available only at PF, hence map PF registers. */
++ ret = cavium_map_pf_regs(rng);
++ if (ret)
++ return ret;
++
+ ret = devm_hwrng_register(&pdev->dev, &rng->ops);
+ if (ret) {
+ dev_err(&pdev->dev, "Error registering device as HWRNG.\n");
+@@ -76,10 +243,18 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev,
+ return 0;
+ }
+
++/* Remove the VF */
++static void cavium_rng_remove_vf(struct pci_dev *pdev)
++{
++ struct cavium_rng *rng;
++
++ rng = pci_get_drvdata(pdev);
++ iounmap(rng->pf_regbase);
++}
+
+ static const struct pci_device_id cavium_rng_vf_id_table[] = {
+- { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa033), 0, 0, 0},
+- {0,},
++ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CAVIUM_RNG_VF) },
++ { 0, }
+ };
+ MODULE_DEVICE_TABLE(pci, cavium_rng_vf_id_table);
+
+@@ -87,8 +262,9 @@ static struct pci_driver cavium_rng_vf_driver = {
+ .name = "cavium_rng_vf",
+ .id_table = cavium_rng_vf_id_table,
+ .probe = cavium_rng_probe_vf,
++ .remove = cavium_rng_remove_vf,
+ };
+ module_pci_driver(cavium_rng_vf_driver);
+
+ MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+-MODULE_LICENSE("GPL");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/char/hw_random/cavium-rng.c b/drivers/char/hw_random/cavium-rng.c
+index 63d6e68c24d2f..b96579222408b 100644
+--- a/drivers/char/hw_random/cavium-rng.c
++++ b/drivers/char/hw_random/cavium-rng.c
+@@ -1,10 +1,7 @@
++// SPDX-License-Identifier: GPL-2.0
+ /*
+- * Hardware Random Number Generator support for Cavium Inc.
+- * Thunder processor family.
+- *
+- * This file is subject to the terms and conditions of the GNU General Public
+- * License. See the file "COPYING" in the main directory of this archive
+- * for more details.
++ * Hardware Random Number Generator support.
++ * Cavium Thunder, Marvell OcteonTx/Tx2 processor families.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+@@ -91,4 +88,4 @@ static struct pci_driver cavium_rng_pf_driver = {
+
+ module_pci_driver(cavium_rng_pf_driver);
+ MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+-MODULE_LICENSE("GPL");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
+index a3db27916256d..cfb085de876b7 100644
+--- a/drivers/char/hw_random/core.c
++++ b/drivers/char/hw_random/core.c
+@@ -15,6 +15,7 @@
+ #include <linux/err.h>
+ #include <linux/fs.h>
+ #include <linux/hw_random.h>
++#include <linux/random.h>
+ #include <linux/kernel.h>
+ #include <linux/kthread.h>
+ #include <linux/sched/signal.h>
+diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c
+index 138ce434f86b2..12fbe80918319 100644
+--- a/drivers/char/hw_random/geode-rng.c
++++ b/drivers/char/hw_random/geode-rng.c
+@@ -51,6 +51,10 @@ static const struct pci_device_id pci_tbl[] = {
+ };
+ MODULE_DEVICE_TABLE(pci, pci_tbl);
+
++struct amd_geode_priv {
++ struct pci_dev *pcidev;
++ void __iomem *membase;
++};
+
+ static int geode_rng_data_read(struct hwrng *rng, u32 *data)
+ {
+@@ -90,6 +94,7 @@ static int __init geode_rng_init(void)
+ const struct pci_device_id *ent;
+ void __iomem *mem;
+ unsigned long rng_base;
++ struct amd_geode_priv *priv;
+
+ for_each_pci_dev(pdev) {
+ ent = pci_match_id(pci_tbl, pdev);
+@@ -97,17 +102,26 @@ static int __init geode_rng_init(void)
+ goto found;
+ }
+ /* Device not found. */
+- goto out;
++ return err;
+
+ found:
++ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
++ if (!priv) {
++ err = -ENOMEM;
++ goto put_dev;
++ }
++
+ rng_base = pci_resource_start(pdev, 0);
+ if (rng_base == 0)
+- goto out;
++ goto free_priv;
+ err = -ENOMEM;
+ mem = ioremap(rng_base, 0x58);
+ if (!mem)
+- goto out;
+- geode_rng.priv = (unsigned long)mem;
++ goto free_priv;
++
++ geode_rng.priv = (unsigned long)priv;
++ priv->membase = mem;
++ priv->pcidev = pdev;
+
+ pr_info("AMD Geode RNG detected\n");
+ err = hwrng_register(&geode_rng);
+@@ -116,20 +130,26 @@ found:
+ err);
+ goto err_unmap;
+ }
+-out:
+ return err;
+
+ err_unmap:
+ iounmap(mem);
+- goto out;
++free_priv:
++ kfree(priv);
++put_dev:
++ pci_dev_put(pdev);
++ return err;
+ }
+
+ static void __exit geode_rng_exit(void)
+ {
+- void __iomem *mem = (void __iomem *)geode_rng.priv;
++ struct amd_geode_priv *priv;
+
++ priv = (struct amd_geode_priv *)geode_rng.priv;
+ hwrng_unregister(&geode_rng);
+- iounmap(mem);
++ iounmap(priv->membase);
++ pci_dev_put(priv->pcidev);
++ kfree(priv);
+ }
+
+ module_init(geode_rng_init);
+diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
+index b05d676ca814c..c4ae72b6743ed 100644
+--- a/drivers/char/hw_random/imx-rngc.c
++++ b/drivers/char/hw_random/imx-rngc.c
+@@ -110,7 +110,7 @@ static int imx_rngc_self_test(struct imx_rngc *rngc)
+ cmd = readl(rngc->base + RNGC_COMMAND);
+ writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND);
+
+- ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT);
++ ret = wait_for_completion_timeout(&rngc->rng_op_done, msecs_to_jiffies(RNGC_TIMEOUT));
+ imx_rngc_irq_mask_clear(rngc);
+ if (!ret)
+ return -ETIMEDOUT;
+@@ -187,9 +187,7 @@ static int imx_rngc_init(struct hwrng *rng)
+ cmd = readl(rngc->base + RNGC_COMMAND);
+ writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND);
+
+- ret = wait_for_completion_timeout(&rngc->rng_op_done,
+- RNGC_TIMEOUT);
+-
++ ret = wait_for_completion_timeout(&rngc->rng_op_done, msecs_to_jiffies(RNGC_TIMEOUT));
+ if (!ret) {
+ ret = -ETIMEDOUT;
+ goto err;
+@@ -270,13 +268,6 @@ static int imx_rngc_probe(struct platform_device *pdev)
+ goto err;
+ }
+
+- ret = devm_request_irq(&pdev->dev,
+- irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
+- if (ret) {
+- dev_err(rngc->dev, "Can't get interrupt working.\n");
+- goto err;
+- }
+-
+ init_completion(&rngc->rng_op_done);
+
+ rngc->rng.name = pdev->name;
+@@ -290,6 +281,13 @@ static int imx_rngc_probe(struct platform_device *pdev)
+
+ imx_rngc_irq_mask_clear(rngc);
+
++ ret = devm_request_irq(&pdev->dev,
++ irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
++ if (ret) {
++ dev_err(rngc->dev, "Can't get interrupt working.\n");
++ return ret;
++ }
++
+ if (self_test) {
+ ret = imx_rngc_self_test(rngc);
+ if (ret) {
+diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c
+index a43743887db19..9142a63b92b30 100644
+--- a/drivers/char/hw_random/iproc-rng200.c
++++ b/drivers/char/hw_random/iproc-rng200.c
+@@ -189,6 +189,8 @@ static int iproc_rng200_probe(struct platform_device *pdev)
+ return PTR_ERR(priv->base);
+ }
+
++ dev_set_drvdata(dev, priv);
++
+ priv->rng.name = "iproc-rng200";
+ priv->rng.read = iproc_rng200_read;
+ priv->rng.init = iproc_rng200_init;
+@@ -206,6 +208,28 @@ static int iproc_rng200_probe(struct platform_device *pdev)
+ return 0;
+ }
+
++static int __maybe_unused iproc_rng200_suspend(struct device *dev)
++{
++ struct iproc_rng200_dev *priv = dev_get_drvdata(dev);
++
++ iproc_rng200_cleanup(&priv->rng);
++
++ return 0;
++}
++
++static int __maybe_unused iproc_rng200_resume(struct device *dev)
++{
++ struct iproc_rng200_dev *priv = dev_get_drvdata(dev);
++
++ iproc_rng200_init(&priv->rng);
++
++ return 0;
++}
++
++static const struct dev_pm_ops iproc_rng200_pm_ops = {
++ SET_SYSTEM_SLEEP_PM_OPS(iproc_rng200_suspend, iproc_rng200_resume)
++};
++
+ static const struct of_device_id iproc_rng200_of_match[] = {
+ { .compatible = "brcm,bcm2711-rng200", },
+ { .compatible = "brcm,bcm7211-rng200", },
+@@ -219,6 +243,7 @@ static struct platform_driver iproc_rng200_driver = {
+ .driver = {
+ .name = "iproc-rng200",
+ .of_match_table = iproc_rng200_of_match,
++ .pm = &iproc_rng200_pm_ops,
+ },
+ .probe = iproc_rng200_probe,
+ };
+diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
+index 8ad7b515a51b8..6c00ea0085553 100644
+--- a/drivers/char/hw_random/mtk-rng.c
++++ b/drivers/char/hw_random/mtk-rng.c
+@@ -166,8 +166,13 @@ static int mtk_rng_runtime_resume(struct device *dev)
+ return mtk_rng_init(&priv->rng);
+ }
+
+-static UNIVERSAL_DEV_PM_OPS(mtk_rng_pm_ops, mtk_rng_runtime_suspend,
+- mtk_rng_runtime_resume, NULL);
++static const struct dev_pm_ops mtk_rng_pm_ops = {
++ SET_RUNTIME_PM_OPS(mtk_rng_runtime_suspend,
++ mtk_rng_runtime_resume, NULL)
++ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
++ pm_runtime_force_resume)
++};
++
+ #define MTK_RNG_PM_OPS (&mtk_rng_pm_ops)
+ #else /* CONFIG_PM */
+ #define MTK_RNG_PM_OPS NULL
+diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
+index 67947a19aa225..3774adf903a83 100644
+--- a/drivers/char/hw_random/nomadik-rng.c
++++ b/drivers/char/hw_random/nomadik-rng.c
+@@ -13,8 +13,6 @@
+ #include <linux/clk.h>
+ #include <linux/err.h>
+
+-static struct clk *rng_clk;
+-
+ static int nmk_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+ {
+ void __iomem *base = (void __iomem *)rng->priv;
+@@ -36,21 +34,20 @@ static struct hwrng nmk_rng = {
+
+ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
+ {
++ struct clk *rng_clk;
+ void __iomem *base;
+ int ret;
+
+- rng_clk = devm_clk_get(&dev->dev, NULL);
++ rng_clk = devm_clk_get_enabled(&dev->dev, NULL);
+ if (IS_ERR(rng_clk)) {
+ dev_err(&dev->dev, "could not get rng clock\n");
+ ret = PTR_ERR(rng_clk);
+ return ret;
+ }
+
+- clk_prepare_enable(rng_clk);
+-
+ ret = amba_request_regions(dev, dev->dev.init_name);
+ if (ret)
+- goto out_clk;
++ return ret;
+ ret = -ENOMEM;
+ base = devm_ioremap(&dev->dev, dev->res.start,
+ resource_size(&dev->res));
+@@ -64,15 +61,12 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
+
+ out_release:
+ amba_release_regions(dev);
+-out_clk:
+- clk_disable(rng_clk);
+ return ret;
+ }
+
+ static void nmk_rng_remove(struct amba_device *dev)
+ {
+ amba_release_regions(dev);
+- clk_disable(rng_clk);
+ }
+
+ static const struct amba_id nmk_rng_ids[] = {
+diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
+index e0d77fa048fb6..f06e4f95114f9 100644
+--- a/drivers/char/hw_random/omap3-rom-rng.c
++++ b/drivers/char/hw_random/omap3-rom-rng.c
+@@ -92,7 +92,7 @@ static int __maybe_unused omap_rom_rng_runtime_resume(struct device *dev)
+
+ r = ddata->rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
+ if (r != 0) {
+- clk_disable(ddata->clk);
++ clk_disable_unprepare(ddata->clk);
+ dev_err(dev, "HW init failed: %d\n", r);
+
+ return -EIO;
+diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c
+index 99c8bd0859a14..e04a054e89307 100644
+--- a/drivers/char/hw_random/pic32-rng.c
++++ b/drivers/char/hw_random/pic32-rng.c
+@@ -36,7 +36,6 @@
+ struct pic32_rng {
+ void __iomem *base;
+ struct hwrng rng;
+- struct clk *clk;
+ };
+
+ /*
+@@ -70,6 +69,7 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
+ static int pic32_rng_probe(struct platform_device *pdev)
+ {
+ struct pic32_rng *priv;
++ struct clk *clk;
+ u32 v;
+ int ret;
+
+@@ -81,13 +81,9 @@ static int pic32_rng_probe(struct platform_device *pdev)
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+- priv->clk = devm_clk_get(&pdev->dev, NULL);
+- if (IS_ERR(priv->clk))
+- return PTR_ERR(priv->clk);
+-
+- ret = clk_prepare_enable(priv->clk);
+- if (ret)
+- return ret;
++ clk = devm_clk_get_enabled(&pdev->dev, NULL);
++ if (IS_ERR(clk))
++ return PTR_ERR(clk);
+
+ /* enable TRNG in enhanced mode */
+ v = TRNGEN | TRNGMOD;
+@@ -98,15 +94,11 @@ static int pic32_rng_probe(struct platform_device *pdev)
+
+ ret = devm_hwrng_register(&pdev->dev, &priv->rng);
+ if (ret)
+- goto err_register;
++ return ret;
+
+ platform_set_drvdata(pdev, priv);
+
+ return 0;
+-
+-err_register:
+- clk_disable_unprepare(priv->clk);
+- return ret;
+ }
+
+ static int pic32_rng_remove(struct platform_device *pdev)
+@@ -114,7 +106,6 @@ static int pic32_rng_remove(struct platform_device *pdev)
+ struct pic32_rng *rng = platform_get_drvdata(pdev);
+
+ writel(0, rng->base + RNGCON);
+- clk_disable_unprepare(rng->clk);
+ return 0;
+ }
+
+diff --git a/drivers/char/hw_random/st-rng.c b/drivers/char/hw_random/st-rng.c
+index 15ba1e6fae4d2..6e9dfac9fc9f4 100644
+--- a/drivers/char/hw_random/st-rng.c
++++ b/drivers/char/hw_random/st-rng.c
+@@ -42,7 +42,6 @@
+
+ struct st_rng_data {
+ void __iomem *base;
+- struct clk *clk;
+ struct hwrng ops;
+ };
+
+@@ -85,26 +84,18 @@ static int st_rng_probe(struct platform_device *pdev)
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+- clk = devm_clk_get(&pdev->dev, NULL);
++ clk = devm_clk_get_enabled(&pdev->dev, NULL);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+- ret = clk_prepare_enable(clk);
+- if (ret)
+- return ret;
+-
+ ddata->ops.priv = (unsigned long)ddata;
+ ddata->ops.read = st_rng_read;
+ ddata->ops.name = pdev->name;
+ ddata->base = base;
+- ddata->clk = clk;
+-
+- dev_set_drvdata(&pdev->dev, ddata);
+
+ ret = devm_hwrng_register(&pdev->dev, &ddata->ops);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register HW RNG\n");
+- clk_disable_unprepare(clk);
+ return ret;
+ }
+
+@@ -113,15 +104,6 @@ static int st_rng_probe(struct platform_device *pdev)
+ return 0;
+ }
+
+-static int st_rng_remove(struct platform_device *pdev)
+-{
+- struct st_rng_data *ddata = dev_get_drvdata(&pdev->dev);
+-
+- clk_disable_unprepare(ddata->clk);
+-
+- return 0;
+-}
+-
+ static const struct of_device_id st_rng_match[] __maybe_unused = {
+ { .compatible = "st,rng" },
+ {},
+@@ -134,7 +116,6 @@ static struct platform_driver st_rng_driver = {
+ .of_match_table = of_match_ptr(st_rng_match),
+ },
+ .probe = st_rng_probe,
+- .remove = st_rng_remove
+ };
+
+ module_platform_driver(st_rng_driver);
+diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
+index a90001e02bf7a..3a194eb3ce8ad 100644
+--- a/drivers/char/hw_random/virtio-rng.c
++++ b/drivers/char/hw_random/virtio-rng.c
+@@ -4,6 +4,7 @@
+ * Copyright (C) 2007, 2008 Rusty Russell IBM Corporation
+ */
+
++#include <asm/barrier.h>
+ #include <linux/err.h>
+ #include <linux/hw_random.h>
+ #include <linux/scatterlist.h>
+@@ -18,71 +19,111 @@ static DEFINE_IDA(rng_index_ida);
+ struct virtrng_info {
+ struct hwrng hwrng;
+ struct virtqueue *vq;
+- struct completion have_data;
+ char name[25];
+- unsigned int data_avail;
+ int index;
+- bool busy;
+ bool hwrng_register_done;
+ bool hwrng_removed;
++ /* data transfer */
++ struct completion have_data;
++ unsigned int data_avail;
++ unsigned int data_idx;
++ /* minimal size returned by rng_buffer_size() */
++#if SMP_CACHE_BYTES < 32
++ u8 data[32];
++#else
++ u8 data[SMP_CACHE_BYTES];
++#endif
+ };
+
+ static void random_recv_done(struct virtqueue *vq)
+ {
+ struct virtrng_info *vi = vq->vdev->priv;
++ unsigned int len;
+
+ /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
+- if (!virtqueue_get_buf(vi->vq, &vi->data_avail))
++ if (!virtqueue_get_buf(vi->vq, &len))
+ return;
+
++ smp_store_release(&vi->data_avail, len);
+ complete(&vi->have_data);
+ }
+
+-/* The host will fill any buffer we give it with sweet, sweet randomness. */
+-static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size)
++static void request_entropy(struct virtrng_info *vi)
+ {
+ struct scatterlist sg;
+
+- sg_init_one(&sg, buf, size);
++ reinit_completion(&vi->have_data);
++ vi->data_idx = 0;
++
++ sg_init_one(&sg, vi->data, sizeof(vi->data));
+
+ /* There should always be room for one buffer. */
+- virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL);
++ virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL);
+
+ virtqueue_kick(vi->vq);
+ }
+
++static unsigned int copy_data(struct virtrng_info *vi, void *buf,
++ unsigned int size)
++{
++ size = min_t(unsigned int, size, vi->data_avail);
++ memcpy(buf, vi->data + vi->data_idx, size);
++ vi->data_idx += size;
++ vi->data_avail -= size;
++ if (vi->data_avail == 0)
++ request_entropy(vi);
++ return size;
++}
++
+ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
+ {
+ int ret;
+ struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
++ unsigned int chunk;
++ size_t read;
+
+ if (vi->hwrng_removed)
+ return -ENODEV;
+
+- if (!vi->busy) {
+- vi->busy = true;
+- reinit_completion(&vi->have_data);
+- register_buffer(vi, buf, size);
++ read = 0;
++
++ /* copy available data */
++ if (smp_load_acquire(&vi->data_avail)) {
++ chunk = copy_data(vi, buf, size);
++ size -= chunk;
++ read += chunk;
+ }
+
+ if (!wait)
+- return 0;
+-
+- ret = wait_for_completion_killable(&vi->have_data);
+- if (ret < 0)
+- return ret;
++ return read;
++
++ /* We have already copied available entropy,
++ * so either size is 0 or data_avail is 0
++ */
++ while (size != 0) {
++ /* data_avail is 0 but a request is pending */
++ ret = wait_for_completion_killable(&vi->have_data);
++ if (ret < 0)
++ return ret;
++ /* if vi->data_avail is 0, we have been interrupted
++ * by a cleanup, but buffer stays in the queue
++ */
++ if (vi->data_avail == 0)
++ return read;
+
+- vi->busy = false;
++ chunk = copy_data(vi, buf + read, size);
++ size -= chunk;
++ read += chunk;
++ }
+
+- return vi->data_avail;
++ return read;
+ }
+
+ static void virtio_cleanup(struct hwrng *rng)
+ {
+ struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
+
+- if (vi->busy)
+- wait_for_completion(&vi->have_data);
++ complete(&vi->have_data);
+ }
+
+ static int probe_common(struct virtio_device *vdev)
+@@ -118,6 +159,9 @@ static int probe_common(struct virtio_device *vdev)
+ goto err_find;
+ }
+
++ /* we always have a pending entropy request */
++ request_entropy(vi);
++
+ return 0;
+
+ err_find:
+@@ -133,9 +177,9 @@ static void remove_common(struct virtio_device *vdev)
+
+ vi->hwrng_removed = true;
+ vi->data_avail = 0;
++ vi->data_idx = 0;
+ complete(&vi->have_data);
+ vdev->config->reset(vdev);
+- vi->busy = false;
+ if (vi->hwrng_register_done)
+ hwrng_unregister(&vi->hwrng);
+ vdev->config->del_vqs(vdev);
+diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
+index 249b31197eeae..8298a4dd0de68 100644
+--- a/drivers/char/ipmi/Kconfig
++++ b/drivers/char/ipmi/Kconfig
+@@ -153,7 +153,8 @@ config IPMI_KCS_BMC_SERIO
+
+ config ASPEED_BT_IPMI_BMC
+ depends on ARCH_ASPEED || COMPILE_TEST
+- depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
++ depends on MFD_SYSCON
++ select REGMAP_MMIO
+ tristate "BT IPMI bmc driver"
+ help
+ Provides a driver for the BT (Block Transfer) IPMI interface
+diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
+index e96cb5c4f97a3..15c211c5d6f4e 100644
+--- a/drivers/char/ipmi/ipmi_msghandler.c
++++ b/drivers/char/ipmi/ipmi_msghandler.c
+@@ -11,8 +11,8 @@
+ * Copyright 2002 MontaVista Software Inc.
+ */
+
+-#define pr_fmt(fmt) "%s" fmt, "IPMI message handler: "
+-#define dev_fmt pr_fmt
++#define pr_fmt(fmt) "IPMI message handler: " fmt
++#define dev_fmt(fmt) pr_fmt(fmt)
+
+ #include <linux/module.h>
+ #include <linux/errno.h>
+@@ -191,6 +191,8 @@ struct ipmi_user {
+ struct work_struct remove_work;
+ };
+
++static struct workqueue_struct *remove_work_wq;
++
+ static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index)
+ __acquires(user->release_barrier)
+ {
+@@ -1261,7 +1263,7 @@ static void free_user(struct kref *ref)
+ struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
+
+ /* SRCU cleanup must happen in task context. */
+- schedule_work(&user->remove_work);
++ queue_work(remove_work_wq, &user->remove_work);
+ }
+
+ static void _ipmi_destroy_user(struct ipmi_user *user)
+@@ -1271,6 +1273,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user)
+ unsigned long flags;
+ struct cmd_rcvr *rcvr;
+ struct cmd_rcvr *rcvrs = NULL;
++ struct module *owner;
+
+ if (!acquire_ipmi_user(user, &i)) {
+ /*
+@@ -1332,8 +1335,9 @@ static void _ipmi_destroy_user(struct ipmi_user *user)
+ kfree(rcvr);
+ }
+
++ owner = intf->owner;
+ kref_put(&intf->refcount, intf_free);
+- module_put(intf->owner);
++ module_put(owner);
+ }
+
+ int ipmi_destroy_user(struct ipmi_user *user)
+@@ -2930,7 +2934,7 @@ cleanup_bmc_device(struct kref *ref)
+ * with removing the device attributes while reading a device
+ * attribute.
+ */
+- schedule_work(&bmc->remove_work);
++ queue_work(remove_work_wq, &bmc->remove_work);
+ }
+
+ /*
+@@ -3525,12 +3529,16 @@ static void deliver_smi_err_response(struct ipmi_smi *intf,
+ struct ipmi_smi_msg *msg,
+ unsigned char err)
+ {
++ int rv;
+ msg->rsp[0] = msg->data[0] | 4;
+ msg->rsp[1] = msg->data[1];
+ msg->rsp[2] = err;
+ msg->rsp_size = 3;
+- /* It's an error, so it will never requeue, no need to check return. */
+- handle_one_recv_msg(intf, msg);
++
++ /* This will never requeue, but it may ask us to free the message. */
++ rv = handle_one_recv_msg(intf, msg);
++ if (rv == 0)
++ ipmi_free_smi_msg(msg);
+ }
+
+ static void cleanup_smi_msgs(struct ipmi_smi *intf)
+@@ -4789,7 +4797,9 @@ static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
+ static void free_smi_msg(struct ipmi_smi_msg *msg)
+ {
+ atomic_dec(&smi_msg_inuse_count);
+- kfree(msg);
++ /* Try to keep as much stuff out of the panic path as possible. */
++ if (!oops_in_progress)
++ kfree(msg);
+ }
+
+ struct ipmi_smi_msg *ipmi_alloc_smi_msg(void)
+@@ -4808,7 +4818,9 @@ EXPORT_SYMBOL(ipmi_alloc_smi_msg);
+ static void free_recv_msg(struct ipmi_recv_msg *msg)
+ {
+ atomic_dec(&recv_msg_inuse_count);
+- kfree(msg);
++ /* Try to keep as much stuff out of the panic path as possible. */
++ if (!oops_in_progress)
++ kfree(msg);
+ }
+
+ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void)
+@@ -4826,7 +4838,7 @@ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void)
+
+ void ipmi_free_recv_msg(struct ipmi_recv_msg *msg)
+ {
+- if (msg->user)
++ if (msg->user && !oops_in_progress)
+ kref_put(&msg->user->refcount, free_user);
+ msg->done(msg);
+ }
+@@ -5142,7 +5154,16 @@ static int ipmi_init_msghandler(void)
+ if (initialized)
+ goto out;
+
+- init_srcu_struct(&ipmi_interfaces_srcu);
++ rv = init_srcu_struct(&ipmi_interfaces_srcu);
++ if (rv)
++ goto out;
++
++ remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq");
++ if (!remove_work_wq) {
++ pr_err("unable to create ipmi-msghandler-remove-wq workqueue");
++ rv = -ENOMEM;
++ goto out_wq;
++ }
+
+ timer_setup(&ipmi_timer, ipmi_timeout, 0);
+ mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
+@@ -5151,6 +5172,9 @@ static int ipmi_init_msghandler(void)
+
+ initialized = true;
+
++out_wq:
++ if (rv)
++ cleanup_srcu_struct(&ipmi_interfaces_srcu);
+ out:
+ mutex_unlock(&ipmi_interfaces_mutex);
+ return rv;
+@@ -5174,6 +5198,8 @@ static void __exit cleanup_ipmi(void)
+ int count;
+
+ if (initialized) {
++ destroy_workqueue(remove_work_wq);
++
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &panic_block);
+
+diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
+index 6f3272b58ced3..f4360fbddbffe 100644
+--- a/drivers/char/ipmi/ipmi_si_intf.c
++++ b/drivers/char/ipmi/ipmi_si_intf.c
+@@ -2081,6 +2081,11 @@ static int try_smi_init(struct smi_info *new_smi)
+ new_smi->io.io_cleanup = NULL;
+ }
+
++ if (rv && new_smi->si_sm) {
++ kfree(new_smi->si_sm);
++ new_smi->si_sm = NULL;
++ }
++
+ return rv;
+ }
+
+@@ -2152,6 +2157,20 @@ skip_fallback_noirq:
+ }
+ module_init(init_ipmi_si);
+
++static void wait_msg_processed(struct smi_info *smi_info)
++{
++ unsigned long jiffies_now;
++ long time_diff;
++
++ while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) {
++ jiffies_now = jiffies;
++ time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
++ * SI_USEC_PER_JIFFY);
++ smi_event_handler(smi_info, time_diff);
++ schedule_timeout_uninterruptible(1);
++ }
++}
++
+ static void shutdown_smi(void *send_info)
+ {
+ struct smi_info *smi_info = send_info;
+@@ -2186,16 +2205,13 @@ static void shutdown_smi(void *send_info)
+ * in the BMC. Note that timers and CPU interrupts are off,
+ * so no need for locks.
+ */
+- while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) {
+- poll(smi_info);
+- schedule_timeout_uninterruptible(1);
+- }
++ wait_msg_processed(smi_info);
++
+ if (smi_info->handlers)
+ disable_si_irq(smi_info);
+- while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) {
+- poll(smi_info);
+- schedule_timeout_uninterruptible(1);
+- }
++
++ wait_msg_processed(smi_info);
++
+ if (smi_info->handlers)
+ smi_info->handlers->cleanup(smi_info->si_sm);
+
+diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
+index 20d5af92966d4..30f757249c5c0 100644
+--- a/drivers/char/ipmi/ipmi_ssif.c
++++ b/drivers/char/ipmi/ipmi_ssif.c
+@@ -74,7 +74,8 @@
+ /*
+ * Timer values
+ */
+-#define SSIF_MSG_USEC 20000 /* 20ms between message tries. */
++#define SSIF_MSG_USEC 60000 /* 60ms between message tries (T3). */
++#define SSIF_REQ_RETRY_USEC 60000 /* 60ms between send retries (T6). */
+ #define SSIF_MSG_PART_USEC 5000 /* 5ms for a message part */
+
+ /* How many times to we retry sending/receiving the message. */
+@@ -82,7 +83,9 @@
+ #define SSIF_RECV_RETRIES 250
+
+ #define SSIF_MSG_MSEC (SSIF_MSG_USEC / 1000)
++#define SSIF_REQ_RETRY_MSEC (SSIF_REQ_RETRY_USEC / 1000)
+ #define SSIF_MSG_JIFFIES ((SSIF_MSG_USEC * 1000) / TICK_NSEC)
++#define SSIF_REQ_RETRY_JIFFIES ((SSIF_REQ_RETRY_USEC * 1000) / TICK_NSEC)
+ #define SSIF_MSG_PART_JIFFIES ((SSIF_MSG_PART_USEC * 1000) / TICK_NSEC)
+
+ /*
+@@ -92,7 +95,7 @@
+ #define SSIF_WATCH_WATCHDOG_TIMEOUT msecs_to_jiffies(250)
+
+ enum ssif_intf_state {
+- SSIF_NORMAL,
++ SSIF_IDLE,
+ SSIF_GETTING_FLAGS,
+ SSIF_GETTING_EVENTS,
+ SSIF_CLEARING_FLAGS,
+@@ -100,8 +103,8 @@ enum ssif_intf_state {
+ /* FIXME - add watchdog stuff. */
+ };
+
+-#define SSIF_IDLE(ssif) ((ssif)->ssif_state == SSIF_NORMAL \
+- && (ssif)->curr_msg == NULL)
++#define IS_SSIF_IDLE(ssif) ((ssif)->ssif_state == SSIF_IDLE \
++ && (ssif)->curr_msg == NULL)
+
+ /*
+ * Indexes into stats[] in ssif_info below.
+@@ -229,6 +232,9 @@ struct ssif_info {
+ bool got_alert;
+ bool waiting_alert;
+
++ /* Used to inform the timeout that it should do a resend. */
++ bool do_resend;
++
+ /*
+ * If set to true, this will request events the next time the
+ * state machine is idle.
+@@ -348,9 +354,9 @@ static void return_hosed_msg(struct ssif_info *ssif_info,
+
+ /*
+ * Must be called with the message lock held. This will release the
+- * message lock. Note that the caller will check SSIF_IDLE and start a
+- * new operation, so there is no need to check for new messages to
+- * start in here.
++ * message lock. Note that the caller will check IS_SSIF_IDLE and
++ * start a new operation, so there is no need to check for new
++ * messages to start in here.
+ */
+ static void start_clear_flags(struct ssif_info *ssif_info, unsigned long *flags)
+ {
+@@ -367,7 +373,7 @@ static void start_clear_flags(struct ssif_info *ssif_info, unsigned long *flags)
+
+ if (start_send(ssif_info, msg, 3) != 0) {
+ /* Error, just go to normal state. */
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ }
+ }
+
+@@ -382,7 +388,7 @@ static void start_flag_fetch(struct ssif_info *ssif_info, unsigned long *flags)
+ mb[0] = (IPMI_NETFN_APP_REQUEST << 2);
+ mb[1] = IPMI_GET_MSG_FLAGS_CMD;
+ if (start_send(ssif_info, mb, 2) != 0)
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ }
+
+ static void check_start_send(struct ssif_info *ssif_info, unsigned long *flags,
+@@ -393,7 +399,7 @@ static void check_start_send(struct ssif_info *ssif_info, unsigned long *flags,
+
+ flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+ ssif_info->curr_msg = NULL;
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ ipmi_free_smi_msg(msg);
+ }
+@@ -407,7 +413,7 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags)
+
+ msg = ipmi_alloc_smi_msg();
+ if (!msg) {
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ return;
+ }
+@@ -430,7 +436,7 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info,
+
+ msg = ipmi_alloc_smi_msg();
+ if (!msg) {
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ return;
+ }
+@@ -448,9 +454,9 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info,
+
+ /*
+ * Must be called with the message lock held. This will release the
+- * message lock. Note that the caller will check SSIF_IDLE and start a
+- * new operation, so there is no need to check for new messages to
+- * start in here.
++ * message lock. Note that the caller will check IS_SSIF_IDLE and
++ * start a new operation, so there is no need to check for new
++ * messages to start in here.
+ */
+ static void handle_flags(struct ssif_info *ssif_info, unsigned long *flags)
+ {
+@@ -466,7 +472,7 @@ static void handle_flags(struct ssif_info *ssif_info, unsigned long *flags)
+ /* Events available. */
+ start_event_fetch(ssif_info, flags);
+ else {
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ }
+ }
+@@ -538,22 +544,30 @@ static void start_get(struct ssif_info *ssif_info)
+ ssif_info->recv, I2C_SMBUS_BLOCK_DATA);
+ }
+
++static void start_resend(struct ssif_info *ssif_info);
++
+ static void retry_timeout(struct timer_list *t)
+ {
+ struct ssif_info *ssif_info = from_timer(ssif_info, t, retry_timer);
+ unsigned long oflags, *flags;
+- bool waiting;
++ bool waiting, resend;
+
+ if (ssif_info->stopping)
+ return;
+
+ flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
++ resend = ssif_info->do_resend;
++ ssif_info->do_resend = false;
+ waiting = ssif_info->waiting_alert;
+ ssif_info->waiting_alert = false;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+
+ if (waiting)
+ start_get(ssif_info);
++ if (resend) {
++ start_resend(ssif_info);
++ ssif_inc_stat(ssif_info, send_retries);
++ }
+ }
+
+ static void watch_timeout(struct timer_list *t)
+@@ -568,7 +582,7 @@ static void watch_timeout(struct timer_list *t)
+ if (ssif_info->watch_timeout) {
+ mod_timer(&ssif_info->watch_timer,
+ jiffies + ssif_info->watch_timeout);
+- if (SSIF_IDLE(ssif_info)) {
++ if (IS_SSIF_IDLE(ssif_info)) {
+ start_flag_fetch(ssif_info, flags); /* Releases lock */
+ return;
+ }
+@@ -602,8 +616,6 @@ static void ssif_alert(struct i2c_client *client, enum i2c_alert_protocol type,
+ start_get(ssif_info);
+ }
+
+-static int start_resend(struct ssif_info *ssif_info);
+-
+ static void msg_done_handler(struct ssif_info *ssif_info, int result,
+ unsigned char *data, unsigned int len)
+ {
+@@ -756,7 +768,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
+ }
+
+ switch (ssif_info->ssif_state) {
+- case SSIF_NORMAL:
++ case SSIF_IDLE:
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ if (!msg)
+ break;
+@@ -774,7 +786,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
+ * Error fetching flags, or invalid length,
+ * just give up for now.
+ */
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ dev_warn(&ssif_info->client->dev,
+ "Error getting flags: %d %d, %x\n",
+@@ -782,9 +794,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
+ } else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
+ || data[1] != IPMI_GET_MSG_FLAGS_CMD) {
+ /*
+- * Don't abort here, maybe it was a queued
+- * response to a previous command.
++ * Recv error response, give up.
+ */
++ ssif_info->ssif_state = SSIF_IDLE;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ dev_warn(&ssif_info->client->dev,
+ "Invalid response getting flags: %x %x\n",
+@@ -809,11 +821,19 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
+ "Invalid response clearing flags: %x %x\n",
+ data[0], data[1]);
+ }
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ break;
+
+ case SSIF_GETTING_EVENTS:
++ if (!msg) {
++ /* Should never happen, but just in case. */
++ dev_warn(&ssif_info->client->dev,
++ "No message set while getting events\n");
++ ipmi_ssif_unlock_cond(ssif_info, flags);
++ break;
++ }
++
+ if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) {
+ /* Error getting event, probably done. */
+ msg->done(msg);
+@@ -838,6 +858,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
+ break;
+
+ case SSIF_GETTING_MESSAGES:
++ if (!msg) {
++ /* Should never happen, but just in case. */
++ dev_warn(&ssif_info->client->dev,
++ "No message set while getting messages\n");
++ ipmi_ssif_unlock_cond(ssif_info, flags);
++ break;
++ }
++
+ if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) {
+ /* Error getting event, probably done. */
+ msg->done(msg);
+@@ -861,10 +889,17 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
+ deliver_recv_msg(ssif_info, msg);
+ }
+ break;
++
++ default:
++ /* Should never happen, but just in case. */
++ dev_warn(&ssif_info->client->dev,
++ "Invalid state in message done handling: %d\n",
++ ssif_info->ssif_state);
++ ipmi_ssif_unlock_cond(ssif_info, flags);
+ }
+
+ flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+- if (SSIF_IDLE(ssif_info) && !ssif_info->stopping) {
++ if (IS_SSIF_IDLE(ssif_info) && !ssif_info->stopping) {
+ if (ssif_info->req_events)
+ start_event_fetch(ssif_info, flags);
+ else if (ssif_info->req_flags)
+@@ -886,31 +921,23 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
+ if (result < 0) {
+ ssif_info->retries_left--;
+ if (ssif_info->retries_left > 0) {
+- if (!start_resend(ssif_info)) {
+- ssif_inc_stat(ssif_info, send_retries);
+- return;
+- }
+- /* request failed, just return the error. */
+- ssif_inc_stat(ssif_info, send_errors);
+-
+- if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+- dev_dbg(&ssif_info->client->dev,
+- "%s: Out of retries\n", __func__);
+- msg_done_handler(ssif_info, -EIO, NULL, 0);
++ /*
++ * Wait the retry timeout time per the spec,
++ * then redo the send.
++ */
++ ssif_info->do_resend = true;
++ mod_timer(&ssif_info->retry_timer,
++ jiffies + SSIF_REQ_RETRY_JIFFIES);
+ return;
+ }
+
+ ssif_inc_stat(ssif_info, send_errors);
+
+- /*
+- * Got an error on transmit, let the done routine
+- * handle it.
+- */
+ if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+ dev_dbg(&ssif_info->client->dev,
+- "%s: Error %d\n", __func__, result);
++ "%s: Out of retries\n", __func__);
+
+- msg_done_handler(ssif_info, result, NULL, 0);
++ msg_done_handler(ssif_info, -EIO, NULL, 0);
+ return;
+ }
+
+@@ -973,7 +1000,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
+ }
+ }
+
+-static int start_resend(struct ssif_info *ssif_info)
++static void start_resend(struct ssif_info *ssif_info)
+ {
+ int command;
+
+@@ -998,7 +1025,6 @@ static int start_resend(struct ssif_info *ssif_info)
+
+ ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE,
+ command, ssif_info->data, I2C_SMBUS_BLOCK_DATA);
+- return 0;
+ }
+
+ static int start_send(struct ssif_info *ssif_info,
+@@ -1013,7 +1039,8 @@ static int start_send(struct ssif_info *ssif_info,
+ ssif_info->retries_left = SSIF_SEND_RETRIES;
+ memcpy(ssif_info->data + 1, data, len);
+ ssif_info->data_len = len;
+- return start_resend(ssif_info);
++ start_resend(ssif_info);
++ return 0;
+ }
+
+ /* Must be called with the message lock held. */
+@@ -1023,7 +1050,7 @@ static void start_next_msg(struct ssif_info *ssif_info, unsigned long *flags)
+ unsigned long oflags;
+
+ restart:
+- if (!SSIF_IDLE(ssif_info)) {
++ if (!IS_SSIF_IDLE(ssif_info)) {
+ ipmi_ssif_unlock_cond(ssif_info, flags);
+ return;
+ }
+@@ -1246,7 +1273,7 @@ static void shutdown_ssif(void *send_info)
+ dev_set_drvdata(&ssif_info->client->dev, NULL);
+
+ /* make sure the driver is not looking for flags any more. */
+- while (ssif_info->ssif_state != SSIF_NORMAL)
++ while (ssif_info->ssif_state != SSIF_IDLE)
+ schedule_timeout(1);
+
+ ssif_info->stopping = true;
+@@ -1313,8 +1340,10 @@ static int do_cmd(struct i2c_client *client, int len, unsigned char *msg,
+ ret = i2c_smbus_write_block_data(client, SSIF_IPMI_REQUEST, len, msg);
+ if (ret) {
+ retry_cnt--;
+- if (retry_cnt > 0)
++ if (retry_cnt > 0) {
++ msleep(SSIF_REQ_RETRY_MSEC);
+ goto retry1;
++ }
+ return -ENODEV;
+ }
+
+@@ -1385,7 +1414,7 @@ static struct ssif_addr_info *ssif_info_find(unsigned short addr,
+ restart:
+ list_for_each_entry(info, &ssif_infos, link) {
+ if (info->binfo.addr == addr) {
+- if (info->addr_src == SI_SMBIOS)
++ if (info->addr_src == SI_SMBIOS && !info->adapter_name)
+ info->adapter_name = kstrdup(adapter_name,
+ GFP_KERNEL);
+
+@@ -1455,8 +1484,10 @@ retry_write:
+ 32, msg);
+ if (ret) {
+ retry_cnt--;
+- if (retry_cnt > 0)
++ if (retry_cnt > 0) {
++ msleep(SSIF_REQ_RETRY_MSEC);
+ goto retry_write;
++ }
+ dev_err(&client->dev, "Could not write multi-part start, though the BMC said it could handle it. Just limit sends to one part.\n");
+ return ret;
+ }
+@@ -1583,6 +1614,11 @@ static int ssif_add_infos(struct i2c_client *client)
+ info->addr_src = SI_ACPI;
+ info->client = client;
+ info->adapter_name = kstrdup(client->adapter->name, GFP_KERNEL);
++ if (!info->adapter_name) {
++ kfree(info);
++ return -ENOMEM;
++ }
++
+ info->binfo.addr = client->addr;
+ list_add_tail(&info->link, &ssif_infos);
+ return 0;
+@@ -1659,6 +1695,9 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ }
+ }
+
++ ssif_info->client = client;
++ i2c_set_clientdata(client, ssif_info);
++
+ rv = ssif_check_and_remove(client, ssif_info);
+ /* If rv is 0 and addr source is not SI_ACPI, continue probing */
+ if (!rv && ssif_info->addr_source == SI_ACPI) {
+@@ -1679,9 +1718,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ ipmi_addr_src_to_str(ssif_info->addr_source),
+ client->addr, client->adapter->name, slave_addr);
+
+- ssif_info->client = client;
+- i2c_set_clientdata(client, ssif_info);
+-
+ /* Now check for system interface capabilities */
+ msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+ msg[1] = IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD;
+@@ -1818,7 +1854,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ }
+
+ spin_lock_init(&ssif_info->lock);
+- ssif_info->ssif_state = SSIF_NORMAL;
++ ssif_info->ssif_state = SSIF_IDLE;
+ timer_setup(&ssif_info->retry_timer, retry_timeout, 0);
+ timer_setup(&ssif_info->watch_timer, watch_timeout, 0);
+
+@@ -1881,6 +1917,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
+
+ dev_err(&ssif_info->client->dev,
+ "Unable to start IPMI SSIF: %d\n", rv);
++ i2c_set_clientdata(client, NULL);
+ kfree(ssif_info);
+ }
+ kfree(resp);
+diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
+index e4ff3b50de7f3..883b4a3410122 100644
+--- a/drivers/char/ipmi/ipmi_watchdog.c
++++ b/drivers/char/ipmi/ipmi_watchdog.c
+@@ -342,13 +342,17 @@ static atomic_t msg_tofree = ATOMIC_INIT(0);
+ static DECLARE_COMPLETION(msg_wait);
+ static void msg_free_smi(struct ipmi_smi_msg *msg)
+ {
+- if (atomic_dec_and_test(&msg_tofree))
+- complete(&msg_wait);
++ if (atomic_dec_and_test(&msg_tofree)) {
++ if (!oops_in_progress)
++ complete(&msg_wait);
++ }
+ }
+ static void msg_free_recv(struct ipmi_recv_msg *msg)
+ {
+- if (atomic_dec_and_test(&msg_tofree))
+- complete(&msg_wait);
++ if (atomic_dec_and_test(&msg_tofree)) {
++ if (!oops_in_progress)
++ complete(&msg_wait);
++ }
+ }
+ static struct ipmi_smi_msg smi_msg = {
+ .done = msg_free_smi
+@@ -434,8 +438,10 @@ static int _ipmi_set_timeout(int do_heartbeat)
+ rv = __ipmi_set_timeout(&smi_msg,
+ &recv_msg,
+ &send_heartbeat_now);
+- if (rv)
++ if (rv) {
++ atomic_set(&msg_tofree, 0);
+ return rv;
++ }
+
+ wait_for_completion(&msg_wait);
+
+@@ -497,7 +503,7 @@ static void panic_halt_ipmi_heartbeat(void)
+ msg.cmd = IPMI_WDOG_RESET_TIMER;
+ msg.data = NULL;
+ msg.data_len = 0;
+- atomic_inc(&panic_done_count);
++ atomic_add(2, &panic_done_count);
+ rv = ipmi_request_supply_msgs(watchdog_user,
+ (struct ipmi_addr *) &addr,
+ 0,
+@@ -507,7 +513,7 @@ static void panic_halt_ipmi_heartbeat(void)
+ &panic_halt_heartbeat_recv_msg,
+ 1);
+ if (rv)
+- atomic_dec(&panic_done_count);
++ atomic_sub(2, &panic_done_count);
+ }
+
+ static struct ipmi_smi_msg panic_halt_smi_msg = {
+@@ -531,12 +537,12 @@ static void panic_halt_ipmi_set_timeout(void)
+ /* Wait for the messages to be free. */
+ while (atomic_read(&panic_done_count) != 0)
+ ipmi_poll_interface(watchdog_user);
+- atomic_inc(&panic_done_count);
++ atomic_add(2, &panic_done_count);
+ rv = __ipmi_set_timeout(&panic_halt_smi_msg,
+ &panic_halt_recv_msg,
+ &send_heartbeat_now);
+ if (rv) {
+- atomic_dec(&panic_done_count);
++ atomic_sub(2, &panic_done_count);
+ pr_warn("Unable to extend the watchdog timeout\n");
+ } else {
+ if (send_heartbeat_now)
+@@ -580,6 +586,7 @@ restart:
+ &recv_msg,
+ 1);
+ if (rv) {
++ atomic_set(&msg_tofree, 0);
+ pr_warn("heartbeat send failure: %d\n", rv);
+ return rv;
+ }
+diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
+index 92a37b33494cb..f23c146bb740c 100644
+--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
++++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
+@@ -404,13 +404,31 @@ static void aspeed_kcs_check_obe(struct timer_list *timer)
+ static void aspeed_kcs_irq_mask_update(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 state)
+ {
+ struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
++ int rc;
++ u8 str;
+
+ /* We don't have an OBE IRQ, emulate it */
+ if (mask & KCS_BMC_EVENT_TYPE_OBE) {
+- if (KCS_BMC_EVENT_TYPE_OBE & state)
+- mod_timer(&priv->obe.timer, jiffies + OBE_POLL_PERIOD);
+- else
++ if (KCS_BMC_EVENT_TYPE_OBE & state) {
++ /*
++ * Given we don't have an OBE IRQ, delay by polling briefly to see if we can
++ * observe such an event before returning to the caller. This is not
++ * incorrect because OBF may have already become clear before enabling the
++ * IRQ if we had one, under which circumstance no event will be propagated
++ * anyway.
++ *
++ * The onus is on the client to perform a race-free check that it hasn't
++ * missed the event.
++ */
++ rc = read_poll_timeout_atomic(aspeed_kcs_inb, str,
++ !(str & KCS_BMC_STR_OBF), 1, 100, false,
++ &priv->kcs_bmc, priv->kcs_bmc.ioreg.str);
++ /* Time for the slow path? */
++ if (rc == -ETIMEDOUT)
++ mod_timer(&priv->obe.timer, jiffies + OBE_POLL_PERIOD);
++ } else {
+ del_timer(&priv->obe.timer);
++ }
+ }
+
+ if (mask & KCS_BMC_EVENT_TYPE_IBF) {
+diff --git a/drivers/char/ipmi/kcs_bmc_serio.c b/drivers/char/ipmi/kcs_bmc_serio.c
+index 7948cabde50b4..7e2067628a6ce 100644
+--- a/drivers/char/ipmi/kcs_bmc_serio.c
++++ b/drivers/char/ipmi/kcs_bmc_serio.c
+@@ -73,10 +73,12 @@ static int kcs_bmc_serio_add_device(struct kcs_bmc_device *kcs_bmc)
+ struct serio *port;
+
+ priv = devm_kzalloc(kcs_bmc->dev, sizeof(*priv), GFP_KERNEL);
++ if (!priv)
++ return -ENOMEM;
+
+ /* Use kzalloc() as the allocation is cleaned up with kfree() via serio_unregister_port() */
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+- if (!(priv && port))
++ if (!port)
+ return -ENOMEM;
+
+ port->id.type = SERIO_8042;
+diff --git a/drivers/char/mem.c b/drivers/char/mem.c
+index 1c596b5cdb279..d8e3b547e0ae7 100644
+--- a/drivers/char/mem.c
++++ b/drivers/char/mem.c
+@@ -702,8 +702,8 @@ static const struct memdev {
+ #endif
+ [5] = { "zero", 0666, &zero_fops, 0 },
+ [7] = { "full", 0666, &full_fops, 0 },
+- [8] = { "random", 0666, &random_fops, 0 },
+- [9] = { "urandom", 0666, &urandom_fops, 0 },
++ [8] = { "random", 0666, &random_fops, FMODE_NOWAIT },
++ [9] = { "urandom", 0666, &urandom_fops, FMODE_NOWAIT },
+ #ifdef CONFIG_PRINTK
+ [11] = { "kmsg", 0644, &kmsg_fops, 0 },
+ #endif
+diff --git a/drivers/char/mwave/3780i.h b/drivers/char/mwave/3780i.h
+index 9ccb6b270b071..95164246afd1a 100644
+--- a/drivers/char/mwave/3780i.h
++++ b/drivers/char/mwave/3780i.h
+@@ -68,7 +68,7 @@ typedef struct {
+ unsigned char ClockControl:1; /* RW: Clock control: 0=normal, 1=stop 3780i clocks */
+ unsigned char SoftReset:1; /* RW: Soft reset 0=normal, 1=soft reset active */
+ unsigned char ConfigMode:1; /* RW: Configuration mode, 0=normal, 1=config mode */
+- unsigned char Reserved:5; /* 0: Reserved */
++ unsigned short Reserved:13; /* 0: Reserved */
+ } DSP_ISA_SLAVE_CONTROL;
+
+
+diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
+index 8f1bce0b4fe50..7057b7bacc8cf 100644
+--- a/drivers/char/pcmcia/cm4000_cs.c
++++ b/drivers/char/pcmcia/cm4000_cs.c
+@@ -530,7 +530,8 @@ static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq)
+ DEBUGP(5, dev, "NumRecBytes is valid\n");
+ break;
+ }
+- usleep_range(10000, 11000);
++ /* can not sleep as this is in atomic context */
++ mdelay(10);
+ }
+ if (i == 100) {
+ DEBUGP(5, dev, "Timeout waiting for NumRecBytes getting "
+@@ -550,7 +551,8 @@ static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq)
+ }
+ break;
+ }
+- usleep_range(10000, 11000);
++ /* can not sleep as this is in atomic context */
++ mdelay(10);
+ }
+
+ /* check whether it is a short PTS reply? */
+diff --git a/drivers/char/random.c b/drivers/char/random.c
+index 605969ed0f965..8642326de6e1c 100644
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -1,310 +1,26 @@
++// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+ /*
+- * random.c -- A strong random number generator
+- *
+- * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All
+- * Rights Reserved.
+- *
++ * Copyright (C) 2017-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
+- *
+- * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All
+- * rights reserved.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, and the entire permission notice in its entirety,
+- * including the disclaimer of warranties.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote
+- * products derived from this software without specific prior
+- * written permission.
+- *
+- * ALTERNATIVELY, this product may be distributed under the terms of
+- * the GNU General Public License, in which case the provisions of the GPL are
+- * required INSTEAD OF the above restrictions. (This clause is
+- * necessary due to a potential bad interaction between the GPL and
+- * the restrictions contained in a BSD-style copyright.)
+- *
+- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+- * DAMAGE.
+- */
+-
+-/*
+- * (now, with legal B.S. out of the way.....)
+- *
+- * This routine gathers environmental noise from device drivers, etc.,
+- * and returns good random numbers, suitable for cryptographic use.
+- * Besides the obvious cryptographic uses, these numbers are also good
+- * for seeding TCP sequence numbers, and other places where it is
+- * desirable to have numbers which are not only random, but hard to
+- * predict by an attacker.
+- *
+- * Theory of operation
+- * ===================
+- *
+- * Computers are very predictable devices. Hence it is extremely hard
+- * to produce truly random numbers on a computer --- as opposed to
+- * pseudo-random numbers, which can easily generated by using a
+- * algorithm. Unfortunately, it is very easy for attackers to guess
+- * the sequence of pseudo-random number generators, and for some
+- * applications this is not acceptable. So instead, we must try to
+- * gather "environmental noise" from the computer's environment, which
+- * must be hard for outside attackers to observe, and use that to
+- * generate random numbers. In a Unix environment, this is best done
+- * from inside the kernel.
+- *
+- * Sources of randomness from the environment include inter-keyboard
+- * timings, inter-interrupt timings from some interrupts, and other
+- * events which are both (a) non-deterministic and (b) hard for an
+- * outside observer to measure. Randomness from these sources are
+- * added to an "entropy pool", which is mixed using a CRC-like function.
+- * This is not cryptographically strong, but it is adequate assuming
+- * the randomness is not chosen maliciously, and it is fast enough that
+- * the overhead of doing it on every interrupt is very reasonable.
+- * As random bytes are mixed into the entropy pool, the routines keep
+- * an *estimate* of how many bits of randomness have been stored into
+- * the random number generator's internal state.
+- *
+- * When random bytes are desired, they are obtained by taking the SHA
+- * hash of the contents of the "entropy pool". The SHA hash avoids
+- * exposing the internal state of the entropy pool. It is believed to
+- * be computationally infeasible to derive any useful information
+- * about the input of SHA from its output. Even if it is possible to
+- * analyze SHA in some clever way, as long as the amount of data
+- * returned from the generator is less than the inherent entropy in
+- * the pool, the output data is totally unpredictable. For this
+- * reason, the routine decreases its internal estimate of how many
+- * bits of "true randomness" are contained in the entropy pool as it
+- * outputs random numbers.
+- *
+- * If this estimate goes to zero, the routine can still generate
+- * random numbers; however, an attacker may (at least in theory) be
+- * able to infer the future output of the generator from prior
+- * outputs. This requires successful cryptanalysis of SHA, which is
+- * not believed to be feasible, but there is a remote possibility.
+- * Nonetheless, these numbers should be useful for the vast majority
+- * of purposes.
+- *
+- * Exported interfaces ---- output
+- * ===============================
+- *
+- * There are four exported interfaces; two for use within the kernel,
+- * and two or use from userspace.
+- *
+- * Exported interfaces ---- userspace output
+- * -----------------------------------------
+- *
+- * The userspace interfaces are two character devices /dev/random and
+- * /dev/urandom. /dev/random is suitable for use when very high
+- * quality randomness is desired (for example, for key generation or
+- * one-time pads), as it will only return a maximum of the number of
+- * bits of randomness (as estimated by the random number generator)
+- * contained in the entropy pool.
+- *
+- * The /dev/urandom device does not have this limit, and will return
+- * as many bytes as are requested. As more and more random bytes are
+- * requested without giving time for the entropy pool to recharge,
+- * this will result in random numbers that are merely cryptographically
+- * strong. For many applications, however, this is acceptable.
+- *
+- * Exported interfaces ---- kernel output
+- * --------------------------------------
+- *
+- * The primary kernel interface is
+- *
+- * void get_random_bytes(void *buf, int nbytes);
+- *
+- * This interface will return the requested number of random bytes,
+- * and place it in the requested buffer. This is equivalent to a
+- * read from /dev/urandom.
+- *
+- * For less critical applications, there are the functions:
+- *
+- * u32 get_random_u32()
+- * u64 get_random_u64()
+- * unsigned int get_random_int()
+- * unsigned long get_random_long()
+- *
+- * These are produced by a cryptographic RNG seeded from get_random_bytes,
+- * and so do not deplete the entropy pool as much. These are recommended
+- * for most in-kernel operations *if the result is going to be stored in
+- * the kernel*.
+- *
+- * Specifically, the get_random_int() family do not attempt to do
+- * "anti-backtracking". If you capture the state of the kernel (e.g.
+- * by snapshotting the VM), you can figure out previous get_random_int()
+- * return values. But if the value is stored in the kernel anyway,
+- * this is not a problem.
+- *
+- * It *is* safe to expose get_random_int() output to attackers (e.g. as
+- * network cookies); given outputs 1..n, it's not feasible to predict
+- * outputs 0 or n+1. The only concern is an attacker who breaks into
+- * the kernel later; the get_random_int() engine is not reseeded as
+- * often as the get_random_bytes() one.
+- *
+- * get_random_bytes() is needed for keys that need to stay secret after
+- * they are erased from the kernel. For example, any key that will
+- * be wrapped and stored encrypted. And session encryption keys: we'd
+- * like to know that after the session is closed and the keys erased,
+- * the plaintext is unrecoverable to someone who recorded the ciphertext.
+- *
+- * But for network ports/cookies, stack canaries, PRNG seeds, address
+- * space layout randomization, session *authentication* keys, or other
+- * applications where the sensitive data is stored in the kernel in
+- * plaintext for as long as it's sensitive, the get_random_int() family
+- * is just fine.
+- *
+- * Consider ASLR. We want to keep the address space secret from an
+- * outside attacker while the process is running, but once the address
+- * space is torn down, it's of no use to an attacker any more. And it's
+- * stored in kernel data structures as long as it's alive, so worrying
+- * about an attacker's ability to extrapolate it from the get_random_int()
+- * CRNG is silly.
+- *
+- * Even some cryptographic keys are safe to generate with get_random_int().
+- * In particular, keys for SipHash are generally fine. Here, knowledge
+- * of the key authorizes you to do something to a kernel object (inject
+- * packets to a network connection, or flood a hash table), and the
+- * key is stored with the object being protected. Once it goes away,
+- * we no longer care if anyone knows the key.
+- *
+- * prandom_u32()
+- * -------------
+- *
+- * For even weaker applications, see the pseudorandom generator
+- * prandom_u32(), prandom_max(), and prandom_bytes(). If the random
+- * numbers aren't security-critical at all, these are *far* cheaper.
+- * Useful for self-tests, random error simulation, randomized backoffs,
+- * and any other application where you trust that nobody is trying to
+- * maliciously mess with you by guessing the "random" numbers.
+- *
+- * Exported interfaces ---- input
+- * ==============================
+- *
+- * The current exported interfaces for gathering environmental noise
+- * from the devices are:
+- *
+- * void add_device_randomness(const void *buf, unsigned int size);
+- * void add_input_randomness(unsigned int type, unsigned int code,
+- * unsigned int value);
+- * void add_interrupt_randomness(int irq, int irq_flags);
+- * void add_disk_randomness(struct gendisk *disk);
+- *
+- * add_device_randomness() is for adding data to the random pool that
+- * is likely to differ between two devices (or possibly even per boot).
+- * This would be things like MAC addresses or serial numbers, or the
+- * read-out of the RTC. This does *not* add any actual entropy to the
+- * pool, but it initializes the pool to different values for devices
+- * that might otherwise be identical and have very little entropy
+- * available to them (particularly common in the embedded world).
+- *
+- * add_input_randomness() uses the input layer interrupt timing, as well as
+- * the event type information from the hardware.
+- *
+- * add_interrupt_randomness() uses the interrupt timing as random
+- * inputs to the entropy pool. Using the cycle counters and the irq source
+- * as inputs, it feeds the randomness roughly once a second.
+- *
+- * add_disk_randomness() uses what amounts to the seek time of block
+- * layer request events, on a per-disk_devt basis, as input to the
+- * entropy pool. Note that high-speed solid state drives with very low
+- * seek times do not make for good sources of entropy, as their seek
+- * times are usually fairly consistent.
+- *
+- * All of these routines try to estimate how many bits of randomness a
+- * particular randomness source. They do this by keeping track of the
+- * first and second order deltas of the event timings.
+- *
+- * Ensuring unpredictability at system startup
+- * ============================================
+- *
+- * When any operating system starts up, it will go through a sequence
+- * of actions that are fairly predictable by an adversary, especially
+- * if the start-up does not involve interaction with a human operator.
+- * This reduces the actual number of bits of unpredictability in the
+- * entropy pool below the value in entropy_count. In order to
+- * counteract this effect, it helps to carry information in the
+- * entropy pool across shut-downs and start-ups. To do this, put the
+- * following lines an appropriate script which is run during the boot
+- * sequence:
+- *
+- * echo "Initializing random number generator..."
+- * random_seed=/var/run/random-seed
+- * # Carry a random seed from start-up to start-up
+- * # Load and then save the whole entropy pool
+- * if [ -f $random_seed ]; then
+- * cat $random_seed >/dev/urandom
+- * else
+- * touch $random_seed
+- * fi
+- * chmod 600 $random_seed
+- * dd if=/dev/urandom of=$random_seed count=1 bs=512
+- *
+- * and the following lines in an appropriate script which is run as
+- * the system is shutdown:
+- *
+- * # Carry a random seed from shut-down to start-up
+- * # Save the whole entropy pool
+- * echo "Saving random seed..."
+- * random_seed=/var/run/random-seed
+- * touch $random_seed
+- * chmod 600 $random_seed
+- * dd if=/dev/urandom of=$random_seed count=1 bs=512
+- *
+- * For example, on most modern systems using the System V init
+- * scripts, such code fragments would be found in
+- * /etc/rc.d/init.d/random. On older Linux systems, the correct script
+- * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0.
+- *
+- * Effectively, these commands cause the contents of the entropy pool
+- * to be saved at shut-down time and reloaded into the entropy pool at
+- * start-up. (The 'dd' in the addition to the bootup script is to
+- * make sure that /etc/random-seed is different for every start-up,
+- * even if the system crashes without executing rc.0.) Even with
+- * complete knowledge of the start-up activities, predicting the state
+- * of the entropy pool requires knowledge of the previous history of
+- * the system.
+- *
+- * Configuring the /dev/random driver under Linux
+- * ==============================================
+- *
+- * The /dev/random driver under Linux uses minor numbers 8 and 9 of
+- * the /dev/mem major number (#1). So if your system does not have
+- * /dev/random and /dev/urandom created already, they can be created
+- * by using the commands:
+- *
+- * mknod /dev/random c 1 8
+- * mknod /dev/urandom c 1 9
+- *
+- * Acknowledgements:
+- * =================
+- *
+- * Ideas for constructing this random number generator were derived
+- * from Pretty Good Privacy's random number generator, and from private
+- * discussions with Phil Karn. Colin Plumb provided a faster random
+- * number generator, which speed up the mixing function of the entropy
+- * pool, taken from PGPfone. Dale Worley has also contributed many
+- * useful ideas and suggestions to improve this driver.
+- *
+- * Any flaws in the design are solely my responsibility, and should
+- * not be attributed to the Phil, Colin, or any of authors of PGP.
+- *
+- * Further background information on this topic may be obtained from
+- * RFC 1750, "Randomness Recommendations for Security", by Donald
+- * Eastlake, Steve Crocker, and Jeff Schiller.
++ * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved.
++ *
++ * This driver produces cryptographically secure pseudorandom data. It is divided
++ * into roughly six sections, each with a section header:
++ *
++ * - Initialization and readiness waiting.
++ * - Fast key erasure RNG, the "crng".
++ * - Entropy accumulation and extraction routines.
++ * - Entropy collection routines.
++ * - Userspace reader/writer interfaces.
++ * - Sysctl interface.
++ *
++ * The high level overview is that there is one input pool, into which
++ * various pieces of data are hashed. Prior to initialization, some of that
++ * data is then "credited" as having a certain number of bits of entropy.
++ * When enough bits of entropy are available, the hash is finalized and
++ * handed as a key to a stream cipher that expands it indefinitely for
++ * various consumers. This key is periodically refreshed as the various
++ * entropy collectors, described below, add data to the input pool.
+ */
+
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+@@ -327,7 +43,6 @@
+ #include <linux/spinlock.h>
+ #include <linux/kthread.h>
+ #include <linux/percpu.h>
+-#include <linux/fips.h>
+ #include <linux/ptrace.h>
+ #include <linux/workqueue.h>
+ #include <linux/irq.h>
+@@ -335,1457 +50,1075 @@
+ #include <linux/syscalls.h>
+ #include <linux/completion.h>
+ #include <linux/uuid.h>
++#include <linux/uaccess.h>
++#include <linux/siphash.h>
++#include <linux/uio.h>
+ #include <crypto/chacha.h>
+-#include <crypto/sha1.h>
+-
++#include <crypto/blake2s.h>
+ #include <asm/processor.h>
+-#include <linux/uaccess.h>
+ #include <asm/irq.h>
+ #include <asm/irq_regs.h>
+ #include <asm/io.h>
+
+-#define CREATE_TRACE_POINTS
+-#include <trace/events/random.h>
+-
+-/* #define ADD_INTERRUPT_BENCH */
++/*********************************************************************
++ *
++ * Initialization and readiness waiting.
++ *
++ * Much of the RNG infrastructure is devoted to various dependencies
++ * being able to wait until the RNG has collected enough entropy and
++ * is ready for safe consumption.
++ *
++ *********************************************************************/
+
+ /*
+- * Configuration information
++ * crng_init is protected by base_crng->lock, and only increases
++ * its value (from empty->early->ready).
+ */
+-#define INPUT_POOL_SHIFT 12
+-#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5))
+-#define OUTPUT_POOL_SHIFT 10
+-#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5))
+-#define EXTRACT_SIZE 10
+-
++static enum {
++ CRNG_EMPTY = 0, /* Little to no entropy collected */
++ CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */
++ CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */
++} crng_init __read_mostly = CRNG_EMPTY;
++#define crng_ready() (likely(crng_init >= CRNG_READY))
++/* Various types of waiters for crng_init->CRNG_READY transition. */
++static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
++static struct fasync_struct *fasync;
++static DEFINE_SPINLOCK(random_ready_chain_lock);
++static RAW_NOTIFIER_HEAD(random_ready_chain);
+
+-#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))
++/* Control how we warn userspace. */
++static struct ratelimit_state urandom_warning =
++ RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE);
++static int ratelimit_disable __read_mostly =
++ IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM);
++module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
++MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
+
+ /*
+- * To allow fractional bits to be tracked, the entropy_count field is
+- * denominated in units of 1/8th bits.
++ * Returns whether or not the input pool has been seeded and thus guaranteed
++ * to supply cryptographically secure random numbers. This applies to: the
++ * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
++ * ,u64,int,long} family of functions.
+ *
+- * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in
+- * credit_entropy_bits() needs to be 64 bits wide.
++ * Returns: true if the input pool has been seeded.
++ * false if the input pool has not been seeded.
+ */
+-#define ENTROPY_SHIFT 3
+-#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT)
++bool rng_is_initialized(void)
++{
++ return crng_ready();
++}
++EXPORT_SYMBOL(rng_is_initialized);
+
+-/*
+- * If the entropy count falls under this number of bits, then we
+- * should wake up processes which are selecting or polling on write
+- * access to /dev/random.
+- */
+-static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
++/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */
++static void try_to_generate_entropy(void);
+
+ /*
+- * Originally, we used a primitive polynomial of degree .poolwords
+- * over GF(2). The taps for various sizes are defined below. They
+- * were chosen to be evenly spaced except for the last tap, which is 1
+- * to get the twisting happening as fast as possible.
+- *
+- * For the purposes of better mixing, we use the CRC-32 polynomial as
+- * well to make a (modified) twisted Generalized Feedback Shift
+- * Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR
+- * generators. ACM Transactions on Modeling and Computer Simulation
+- * 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted
+- * GFSR generators II. ACM Transactions on Modeling and Computer
+- * Simulation 4:254-266)
+- *
+- * Thanks to Colin Plumb for suggesting this.
+- *
+- * The mixing operation is much less sensitive than the output hash,
+- * where we use SHA-1. All that we want of mixing operation is that
+- * it be a good non-cryptographic hash; i.e. it not produce collisions
+- * when fed "random" data of the sort we expect to see. As long as
+- * the pool state differs for different inputs, we have preserved the
+- * input entropy and done a good job. The fact that an intelligent
+- * attacker can construct inputs that will produce controlled
+- * alterations to the pool's state is not important because we don't
+- * consider such inputs to contribute any randomness. The only
+- * property we need with respect to them is that the attacker can't
+- * increase his/her knowledge of the pool's state. Since all
+- * additions are reversible (knowing the final state and the input,
+- * you can reconstruct the initial state), if an attacker has any
+- * uncertainty about the initial state, he/she can only shuffle that
+- * uncertainty about, but never cause any collisions (which would
+- * decrease the uncertainty).
++ * Wait for the input pool to be seeded and thus guaranteed to supply
++ * cryptographically secure random numbers. This applies to: the /dev/urandom
++ * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
++ * family of functions. Using any of these functions without first calling
++ * this function forfeits the guarantee of security.
+ *
+- * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and
+- * Videau in their paper, "The Linux Pseudorandom Number Generator
+- * Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their
+- * paper, they point out that we are not using a true Twisted GFSR,
+- * since Matsumoto & Kurita used a trinomial feedback polynomial (that
+- * is, with only three taps, instead of the six that we are using).
+- * As a result, the resulting polynomial is neither primitive nor
+- * irreducible, and hence does not have a maximal period over
+- * GF(2**32). They suggest a slight change to the generator
+- * polynomial which improves the resulting TGFSR polynomial to be
+- * irreducible, which we have made here.
++ * Returns: 0 if the input pool has been seeded.
++ * -ERESTARTSYS if the function was interrupted by a signal.
+ */
+-static const struct poolinfo {
+- int poolbitshift, poolwords, poolbytes, poolfracbits;
+-#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5)
+- int tap1, tap2, tap3, tap4, tap5;
+-} poolinfo_table[] = {
+- /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */
+- /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
+- { S(128), 104, 76, 51, 25, 1 },
+-};
++int wait_for_random_bytes(void)
++{
++ while (!crng_ready()) {
++ int ret;
++
++ try_to_generate_entropy();
++ ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
++ if (ret)
++ return ret > 0 ? 0 : ret;
++ }
++ return 0;
++}
++EXPORT_SYMBOL(wait_for_random_bytes);
+
+ /*
+- * Static global variables
++ * Add a callback function that will be invoked when the input
++ * pool is initialised.
++ *
++ * returns: 0 if callback is successfully added
++ * -EALREADY if pool is already initialised (callback not called)
+ */
+-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+-static struct fasync_struct *fasync;
+-
+-static DEFINE_SPINLOCK(random_ready_list_lock);
+-static LIST_HEAD(random_ready_list);
++int __cold register_random_ready_notifier(struct notifier_block *nb)
++{
++ unsigned long flags;
++ int ret = -EALREADY;
+
+-struct crng_state {
+- __u32 state[16];
+- unsigned long init_time;
+- spinlock_t lock;
+-};
++ if (crng_ready())
++ return ret;
+
+-static struct crng_state primary_crng = {
+- .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock),
+-};
++ spin_lock_irqsave(&random_ready_chain_lock, flags);
++ if (!crng_ready())
++ ret = raw_notifier_chain_register(&random_ready_chain, nb);
++ spin_unlock_irqrestore(&random_ready_chain_lock, flags);
++ return ret;
++}
+
+ /*
+- * crng_init = 0 --> Uninitialized
+- * 1 --> Initialized
+- * 2 --> Initialized from input_pool
+- *
+- * crng_init is protected by primary_crng->lock, and only increases
+- * its value (from 0->1->2).
++ * Delete a previously registered readiness callback function.
+ */
+-static int crng_init = 0;
+-#define crng_ready() (likely(crng_init > 1))
+-static int crng_init_cnt = 0;
+-static unsigned long crng_global_init_time = 0;
+-#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE)
+-static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]);
+-static void _crng_backtrack_protect(struct crng_state *crng,
+- __u8 tmp[CHACHA_BLOCK_SIZE], int used);
+-static void process_random_ready_list(void);
+-static void _get_random_bytes(void *buf, int nbytes);
+-
+-static struct ratelimit_state unseeded_warning =
+- RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
+-static struct ratelimit_state urandom_warning =
+- RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
++int __cold unregister_random_ready_notifier(struct notifier_block *nb)
++{
++ unsigned long flags;
++ int ret;
++
++ spin_lock_irqsave(&random_ready_chain_lock, flags);
++ ret = raw_notifier_chain_unregister(&random_ready_chain, nb);
++ spin_unlock_irqrestore(&random_ready_chain_lock, flags);
++ return ret;
++}
+
+-static int ratelimit_disable __read_mostly;
++static void __cold process_random_ready_list(void)
++{
++ unsigned long flags;
+
+-module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
+-MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
++ spin_lock_irqsave(&random_ready_chain_lock, flags);
++ raw_notifier_call_chain(&random_ready_chain, 0, NULL);
++ spin_unlock_irqrestore(&random_ready_chain_lock, flags);
++}
+
+-/**********************************************************************
++#define warn_unseeded_randomness() \
++ if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \
++ printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \
++ __func__, (void *)_RET_IP_, crng_init)
++
++
++/*********************************************************************
+ *
+- * OS independent entropy store. Here are the functions which handle
+- * storing entropy in an entropy pool.
++ * Fast key erasure RNG, the "crng".
+ *
+- **********************************************************************/
++ * These functions expand entropy from the entropy extractor into
++ * long streams for external consumption using the "fast key erasure"
++ * RNG described at <https://blog.cr.yp.to/20170723-random.html>.
++ *
++ * There are a few exported interfaces for use by other drivers:
++ *
++ * void get_random_bytes(void *buf, size_t len)
++ * u32 get_random_u32()
++ * u64 get_random_u64()
++ * unsigned int get_random_int()
++ * unsigned long get_random_long()
++ *
++ * These interfaces will return the requested number of random bytes
++ * into the given buffer or as a return value. This is equivalent to
++ * a read from /dev/urandom. The u32, u64, int, and long family of
++ * functions may be higher performance for one-off random integers,
++ * because they do a bit of buffering and do not invoke reseeding
++ * until the buffer is emptied.
++ *
++ *********************************************************************/
+
+-struct entropy_store;
+-struct entropy_store {
+- /* read-only data: */
+- const struct poolinfo *poolinfo;
+- __u32 *pool;
+- const char *name;
++enum {
++ CRNG_RESEED_START_INTERVAL = HZ,
++ CRNG_RESEED_INTERVAL = 60 * HZ
++};
+
+- /* read-write data: */
++static struct {
++ u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
++ unsigned long birth;
++ unsigned long generation;
+ spinlock_t lock;
+- unsigned short add_ptr;
+- unsigned short input_rotate;
+- int entropy_count;
+- unsigned int last_data_init:1;
+- __u8 last_data[EXTRACT_SIZE];
++} base_crng = {
++ .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock)
+ };
+
+-static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+- size_t nbytes, int min, int rsvd);
+-static ssize_t _extract_entropy(struct entropy_store *r, void *buf,
+- size_t nbytes, int fips);
+-
+-static void crng_reseed(struct crng_state *crng, struct entropy_store *r);
+-static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy;
+-
+-static struct entropy_store input_pool = {
+- .poolinfo = &poolinfo_table[0],
+- .name = "input",
+- .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+- .pool = input_pool_data
++struct crng {
++ u8 key[CHACHA_KEY_SIZE];
++ unsigned long generation;
++ local_lock_t lock;
+ };
+
+-static __u32 const twist_table[8] = {
+- 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
+- 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
+-
+-/*
+- * This function adds bytes into the entropy "pool". It does not
+- * update the entropy estimate. The caller should call
+- * credit_entropy_bits if this is appropriate.
+- *
+- * The pool is stirred with a primitive polynomial of the appropriate
+- * degree, and then twisted. We twist by three bits at a time because
+- * it's cheap to do so and helps slightly in the expected case where
+- * the entropy is concentrated in the low-order bits.
+- */
+-static void _mix_pool_bytes(struct entropy_store *r, const void *in,
+- int nbytes)
+-{
+- unsigned long i, tap1, tap2, tap3, tap4, tap5;
+- int input_rotate;
+- int wordmask = r->poolinfo->poolwords - 1;
+- const char *bytes = in;
+- __u32 w;
+-
+- tap1 = r->poolinfo->tap1;
+- tap2 = r->poolinfo->tap2;
+- tap3 = r->poolinfo->tap3;
+- tap4 = r->poolinfo->tap4;
+- tap5 = r->poolinfo->tap5;
+-
+- input_rotate = r->input_rotate;
+- i = r->add_ptr;
+-
+- /* mix one byte at a time to simplify size handling and churn faster */
+- while (nbytes--) {
+- w = rol32(*bytes++, input_rotate);
+- i = (i - 1) & wordmask;
+-
+- /* XOR in the various taps */
+- w ^= r->pool[i];
+- w ^= r->pool[(i + tap1) & wordmask];
+- w ^= r->pool[(i + tap2) & wordmask];
+- w ^= r->pool[(i + tap3) & wordmask];
+- w ^= r->pool[(i + tap4) & wordmask];
+- w ^= r->pool[(i + tap5) & wordmask];
+-
+- /* Mix the result back in with a twist */
+- r->pool[i] = (w >> 3) ^ twist_table[w & 7];
+-
+- /*
+- * Normally, we add 7 bits of rotation to the pool.
+- * At the beginning of the pool, add an extra 7 bits
+- * rotation, so that successive passes spread the
+- * input bits across the pool evenly.
+- */
+- input_rotate = (input_rotate + (i ? 7 : 14)) & 31;
+- }
+-
+- r->input_rotate = input_rotate;
+- r->add_ptr = i;
+-}
++static DEFINE_PER_CPU(struct crng, crngs) = {
++ .generation = ULONG_MAX,
++ .lock = INIT_LOCAL_LOCK(crngs.lock),
++};
+
+-static void __mix_pool_bytes(struct entropy_store *r, const void *in,
+- int nbytes)
+-{
+- trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_);
+- _mix_pool_bytes(r, in, nbytes);
+-}
++/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */
++static void extract_entropy(void *buf, size_t len);
+
+-static void mix_pool_bytes(struct entropy_store *r, const void *in,
+- int nbytes)
++/* This extracts a new crng key from the input pool. */
++static void crng_reseed(void)
+ {
+ unsigned long flags;
++ unsigned long next_gen;
++ u8 key[CHACHA_KEY_SIZE];
+
+- trace_mix_pool_bytes(r->name, nbytes, _RET_IP_);
+- spin_lock_irqsave(&r->lock, flags);
+- _mix_pool_bytes(r, in, nbytes);
+- spin_unlock_irqrestore(&r->lock, flags);
+-}
++ extract_entropy(key, sizeof(key));
+
+-struct fast_pool {
+- __u32 pool[4];
+- unsigned long last;
+- unsigned short reg_idx;
+- unsigned char count;
+-};
++ /*
++ * We copy the new key into the base_crng, overwriting the old one,
++ * and update the generation counter. We avoid hitting ULONG_MAX,
++ * because the per-cpu crngs are initialized to ULONG_MAX, so this
++ * forces new CPUs that come online to always initialize.
++ */
++ spin_lock_irqsave(&base_crng.lock, flags);
++ memcpy(base_crng.key, key, sizeof(base_crng.key));
++ next_gen = base_crng.generation + 1;
++ if (next_gen == ULONG_MAX)
++ ++next_gen;
++ WRITE_ONCE(base_crng.generation, next_gen);
++ WRITE_ONCE(base_crng.birth, jiffies);
++ if (!crng_ready())
++ crng_init = CRNG_READY;
++ spin_unlock_irqrestore(&base_crng.lock, flags);
++ memzero_explicit(key, sizeof(key));
++}
+
+ /*
+- * This is a fast mixing routine used by the interrupt randomness
+- * collector. It's hardcoded for an 128 bit pool and assumes that any
+- * locks that might be needed are taken by the caller.
++ * This generates a ChaCha block using the provided key, and then
++ * immediately overwites that key with half the block. It returns
++ * the resultant ChaCha state to the user, along with the second
++ * half of the block containing 32 bytes of random data that may
++ * be used; random_data_len may not be greater than 32.
++ *
++ * The returned ChaCha state contains within it a copy of the old
++ * key value, at index 4, so the state should always be zeroed out
++ * immediately after using in order to maintain forward secrecy.
++ * If the state cannot be erased in a timely manner, then it is
++ * safer to set the random_data parameter to &chacha_state[4] so
++ * that this function overwrites it before returning.
+ */
+-static void fast_mix(struct fast_pool *f)
++static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
++ u32 chacha_state[CHACHA_STATE_WORDS],
++ u8 *random_data, size_t random_data_len)
+ {
+- __u32 a = f->pool[0], b = f->pool[1];
+- __u32 c = f->pool[2], d = f->pool[3];
+-
+- a += b; c += d;
+- b = rol32(b, 6); d = rol32(d, 27);
+- d ^= a; b ^= c;
+-
+- a += b; c += d;
+- b = rol32(b, 16); d = rol32(d, 14);
+- d ^= a; b ^= c;
++ u8 first_block[CHACHA_BLOCK_SIZE];
+
+- a += b; c += d;
+- b = rol32(b, 6); d = rol32(d, 27);
+- d ^= a; b ^= c;
++ BUG_ON(random_data_len > 32);
+
+- a += b; c += d;
+- b = rol32(b, 16); d = rol32(d, 14);
+- d ^= a; b ^= c;
++ chacha_init_consts(chacha_state);
++ memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE);
++ memset(&chacha_state[12], 0, sizeof(u32) * 4);
++ chacha20_block(chacha_state, first_block);
+
+- f->pool[0] = a; f->pool[1] = b;
+- f->pool[2] = c; f->pool[3] = d;
+- f->count++;
++ memcpy(key, first_block, CHACHA_KEY_SIZE);
++ memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
++ memzero_explicit(first_block, sizeof(first_block));
+ }
+
+-static void process_random_ready_list(void)
++/*
++ * Return whether the crng seed is considered to be sufficiently old
++ * that a reseeding is needed. This happens if the last reseeding
++ * was CRNG_RESEED_INTERVAL ago, or during early boot, at an interval
++ * proportional to the uptime.
++ */
++static bool crng_has_old_seed(void)
+ {
+- unsigned long flags;
+- struct random_ready_callback *rdy, *tmp;
+-
+- spin_lock_irqsave(&random_ready_list_lock, flags);
+- list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) {
+- struct module *owner = rdy->owner;
+-
+- list_del_init(&rdy->list);
+- rdy->func(rdy);
+- module_put(owner);
++ static bool early_boot = true;
++ unsigned long interval = CRNG_RESEED_INTERVAL;
++
++ if (unlikely(READ_ONCE(early_boot))) {
++ time64_t uptime = ktime_get_seconds();
++ if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
++ WRITE_ONCE(early_boot, false);
++ else
++ interval = max_t(unsigned int, CRNG_RESEED_START_INTERVAL,
++ (unsigned int)uptime / 2 * HZ);
+ }
+- spin_unlock_irqrestore(&random_ready_list_lock, flags);
++ return time_is_before_jiffies(READ_ONCE(base_crng.birth) + interval);
+ }
+
+ /*
+- * Credit (or debit) the entropy store with n bits of entropy.
+- * Use credit_entropy_bits_safe() if the value comes from userspace
+- * or otherwise should be checked for extreme values.
++ * This function returns a ChaCha state that you may use for generating
++ * random data. It also returns up to 32 bytes on its own of random data
++ * that may be used; random_data_len may not be greater than 32.
+ */
+-static void credit_entropy_bits(struct entropy_store *r, int nbits)
++static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
++ u8 *random_data, size_t random_data_len)
+ {
+- int entropy_count, orig;
+- const int pool_size = r->poolinfo->poolfracbits;
+- int nfrac = nbits << ENTROPY_SHIFT;
++ unsigned long flags;
++ struct crng *crng;
+
+- if (!nbits)
+- return;
++ BUG_ON(random_data_len > 32);
+
+-retry:
+- entropy_count = orig = READ_ONCE(r->entropy_count);
+- if (nfrac < 0) {
+- /* Debit */
+- entropy_count += nfrac;
+- } else {
+- /*
+- * Credit: we have to account for the possibility of
+- * overwriting already present entropy. Even in the
+- * ideal case of pure Shannon entropy, new contributions
+- * approach the full value asymptotically:
+- *
+- * entropy <- entropy + (pool_size - entropy) *
+- * (1 - exp(-add_entropy/pool_size))
+- *
+- * For add_entropy <= pool_size/2 then
+- * (1 - exp(-add_entropy/pool_size)) >=
+- * (add_entropy/pool_size)*0.7869...
+- * so we can approximate the exponential with
+- * 3/4*add_entropy/pool_size and still be on the
+- * safe side by adding at most pool_size/2 at a time.
+- *
+- * The use of pool_size-2 in the while statement is to
+- * prevent rounding artifacts from making the loop
+- * arbitrarily long; this limits the loop to log2(pool_size)*2
+- * turns no matter how large nbits is.
+- */
+- int pnfrac = nfrac;
+- const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2;
+- /* The +2 corresponds to the /4 in the denominator */
+-
+- do {
+- unsigned int anfrac = min(pnfrac, pool_size/2);
+- unsigned int add =
+- ((pool_size - entropy_count)*anfrac*3) >> s;
+-
+- entropy_count += add;
+- pnfrac -= anfrac;
+- } while (unlikely(entropy_count < pool_size-2 && pnfrac));
++ /*
++ * For the fast path, we check whether we're ready, unlocked first, and
++ * then re-check once locked later. In the case where we're really not
++ * ready, we do fast key erasure with the base_crng directly, extracting
++ * when crng_init is CRNG_EMPTY.
++ */
++ if (!crng_ready()) {
++ bool ready;
++
++ spin_lock_irqsave(&base_crng.lock, flags);
++ ready = crng_ready();
++ if (!ready) {
++ if (crng_init == CRNG_EMPTY)
++ extract_entropy(base_crng.key, sizeof(base_crng.key));
++ crng_fast_key_erasure(base_crng.key, chacha_state,
++ random_data, random_data_len);
++ }
++ spin_unlock_irqrestore(&base_crng.lock, flags);
++ if (!ready)
++ return;
+ }
+
+- if (WARN_ON(entropy_count < 0)) {
+- pr_warn("negative entropy/overflow: pool %s count %d\n",
+- r->name, entropy_count);
+- entropy_count = 0;
+- } else if (entropy_count > pool_size)
+- entropy_count = pool_size;
+- if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+- goto retry;
+-
+- trace_credit_entropy_bits(r->name, nbits,
+- entropy_count >> ENTROPY_SHIFT, _RET_IP_);
++ /*
++ * If the base_crng is old enough, we reseed, which in turn bumps the
++ * generation counter that we check below.
++ */
++ if (unlikely(crng_has_old_seed()))
++ crng_reseed();
+
+- if (r == &input_pool) {
+- int entropy_bits = entropy_count >> ENTROPY_SHIFT;
++ local_lock_irqsave(&crngs.lock, flags);
++ crng = raw_cpu_ptr(&crngs);
+
+- if (crng_init < 2 && entropy_bits >= 128)
+- crng_reseed(&primary_crng, r);
++ /*
++ * If our per-cpu crng is older than the base_crng, then it means
++ * somebody reseeded the base_crng. In that case, we do fast key
++ * erasure on the base_crng, and use its output as the new key
++ * for our per-cpu crng. This brings us up to date with base_crng.
++ */
++ if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) {
++ spin_lock(&base_crng.lock);
++ crng_fast_key_erasure(base_crng.key, chacha_state,
++ crng->key, sizeof(crng->key));
++ crng->generation = base_crng.generation;
++ spin_unlock(&base_crng.lock);
+ }
++
++ /*
++ * Finally, when we've made it this far, our per-cpu crng has an up
++ * to date key, and we can do fast key erasure with it to produce
++ * some random data and a ChaCha state for the caller. All other
++ * branches of this function are "unlikely", so most of the time we
++ * should wind up here immediately.
++ */
++ crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len);
++ local_unlock_irqrestore(&crngs.lock, flags);
+ }
+
+-static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
++static void _get_random_bytes(void *buf, size_t len)
+ {
+- const int nbits_max = r->poolinfo->poolwords * 32;
++ u32 chacha_state[CHACHA_STATE_WORDS];
++ u8 tmp[CHACHA_BLOCK_SIZE];
++ size_t first_block_len;
+
+- if (nbits < 0)
+- return -EINVAL;
+-
+- /* Cap the value to avoid overflows */
+- nbits = min(nbits, nbits_max);
++ if (!len)
++ return;
+
+- credit_entropy_bits(r, nbits);
+- return 0;
+-}
++ first_block_len = min_t(size_t, 32, len);
++ crng_make_state(chacha_state, buf, first_block_len);
++ len -= first_block_len;
++ buf += first_block_len;
+
+-/*********************************************************************
+- *
+- * CRNG using CHACHA20
+- *
+- *********************************************************************/
++ while (len) {
++ if (len < CHACHA_BLOCK_SIZE) {
++ chacha20_block(chacha_state, tmp);
++ memcpy(buf, tmp, len);
++ memzero_explicit(tmp, sizeof(tmp));
++ break;
++ }
+
+-#define CRNG_RESEED_INTERVAL (300*HZ)
++ chacha20_block(chacha_state, buf);
++ if (unlikely(chacha_state[12] == 0))
++ ++chacha_state[13];
++ len -= CHACHA_BLOCK_SIZE;
++ buf += CHACHA_BLOCK_SIZE;
++ }
+
+-static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
++ memzero_explicit(chacha_state, sizeof(chacha_state));
++}
+
+-#ifdef CONFIG_NUMA
+ /*
+- * Hack to deal with crazy userspace progams when they are all trying
+- * to access /dev/urandom in parallel. The programs are almost
+- * certainly doing something terribly wrong, but we'll work around
+- * their brain damage.
++ * This function is the exported kernel interface. It returns some
++ * number of good random numbers, suitable for key generation, seeding
++ * TCP sequence numbers, etc. It does not rely on the hardware random
++ * number generator. For random bytes direct from the hardware RNG
++ * (when available), use get_random_bytes_arch(). In order to ensure
++ * that the randomness provided by this function is okay, the function
++ * wait_for_random_bytes() should be called and return 0 at least once
++ * at any point prior.
+ */
+-static struct crng_state **crng_node_pool __read_mostly;
+-#endif
+-
+-static void invalidate_batched_entropy(void);
+-static void numa_crng_init(void);
+-
+-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+-static int __init parse_trust_cpu(char *arg)
++void get_random_bytes(void *buf, size_t len)
+ {
+- return kstrtobool(arg, &trust_cpu);
++ warn_unseeded_randomness();
++ _get_random_bytes(buf, len);
+ }
+-early_param("random.trust_cpu", parse_trust_cpu);
++EXPORT_SYMBOL(get_random_bytes);
+
+-static bool crng_init_try_arch(struct crng_state *crng)
++static ssize_t get_random_bytes_user(struct iov_iter *iter)
+ {
+- int i;
+- bool arch_init = true;
+- unsigned long rv;
+-
+- for (i = 4; i < 16; i++) {
+- if (!arch_get_random_seed_long(&rv) &&
+- !arch_get_random_long(&rv)) {
+- rv = random_get_entropy();
+- arch_init = false;
+- }
+- crng->state[i] ^= rv;
+- }
++ u32 chacha_state[CHACHA_STATE_WORDS];
++ u8 block[CHACHA_BLOCK_SIZE];
++ size_t ret = 0, copied;
+
+- return arch_init;
+-}
++ if (unlikely(!iov_iter_count(iter)))
++ return 0;
+
+-static bool __init crng_init_try_arch_early(struct crng_state *crng)
+-{
+- int i;
+- bool arch_init = true;
+- unsigned long rv;
+-
+- for (i = 4; i < 16; i++) {
+- if (!arch_get_random_seed_long_early(&rv) &&
+- !arch_get_random_long_early(&rv)) {
+- rv = random_get_entropy();
+- arch_init = false;
+- }
+- crng->state[i] ^= rv;
++ /*
++ * Immediately overwrite the ChaCha key at index 4 with random
++ * bytes, in case userspace causes copy_to_iter() below to sleep
++ * forever, so that we still retain forward secrecy in that case.
++ */
++ crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE);
++ /*
++ * However, if we're doing a read of len <= 32, we don't need to
++ * use chacha_state after, so we can simply return those bytes to
++ * the user directly.
++ */
++ if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) {
++ ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter);
++ goto out_zero_chacha;
+ }
+
+- return arch_init;
+-}
++ for (;;) {
++ chacha20_block(chacha_state, block);
++ if (unlikely(chacha_state[12] == 0))
++ ++chacha_state[13];
+
+-static void __maybe_unused crng_initialize_secondary(struct crng_state *crng)
+-{
+- chacha_init_consts(crng->state);
+- _get_random_bytes(&crng->state[4], sizeof(__u32) * 12);
+- crng_init_try_arch(crng);
+- crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
+-}
++ copied = copy_to_iter(block, sizeof(block), iter);
++ ret += copied;
++ if (!iov_iter_count(iter) || copied != sizeof(block))
++ break;
+
+-static void __init crng_initialize_primary(struct crng_state *crng)
+-{
+- chacha_init_consts(crng->state);
+- _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0);
+- if (crng_init_try_arch_early(crng) && trust_cpu) {
+- invalidate_batched_entropy();
+- numa_crng_init();
+- crng_init = 2;
+- pr_notice("crng done (trusting CPU's manufacturer)\n");
++ BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
++ if (ret % PAGE_SIZE == 0) {
++ if (signal_pending(current))
++ break;
++ cond_resched();
++ }
+ }
+- crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
+-}
+
+-#ifdef CONFIG_NUMA
+-static void do_numa_crng_init(struct work_struct *work)
+-{
+- int i;
+- struct crng_state *crng;
+- struct crng_state **pool;
+-
+- pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL);
+- for_each_online_node(i) {
+- crng = kmalloc_node(sizeof(struct crng_state),
+- GFP_KERNEL | __GFP_NOFAIL, i);
+- spin_lock_init(&crng->lock);
+- crng_initialize_secondary(crng);
+- pool[i] = crng;
+- }
+- mb();
+- if (cmpxchg(&crng_node_pool, NULL, pool)) {
+- for_each_node(i)
+- kfree(pool[i]);
+- kfree(pool);
+- }
++ memzero_explicit(block, sizeof(block));
++out_zero_chacha:
++ memzero_explicit(chacha_state, sizeof(chacha_state));
++ return ret ? ret : -EFAULT;
+ }
+
+-static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init);
++/*
++ * Batched entropy returns random integers. The quality of the random
++ * number is good as /dev/urandom. In order to ensure that the randomness
++ * provided by this function is okay, the function wait_for_random_bytes()
++ * should be called and return 0 at least once at any point prior.
++ */
+
+-static void numa_crng_init(void)
++#define DEFINE_BATCHED_ENTROPY(type) \
++struct batch_ ##type { \
++ /* \
++ * We make this 1.5x a ChaCha block, so that we get the \
++ * remaining 32 bytes from fast key erasure, plus one full \
++ * block from the detached ChaCha state. We can increase \
++ * the size of this later if needed so long as we keep the \
++ * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. \
++ */ \
++ type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \
++ local_lock_t lock; \
++ unsigned long generation; \
++ unsigned int position; \
++}; \
++ \
++static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \
++ .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock), \
++ .position = UINT_MAX \
++}; \
++ \
++type get_random_ ##type(void) \
++{ \
++ type ret; \
++ unsigned long flags; \
++ struct batch_ ##type *batch; \
++ unsigned long next_gen; \
++ \
++ warn_unseeded_randomness(); \
++ \
++ if (!crng_ready()) { \
++ _get_random_bytes(&ret, sizeof(ret)); \
++ return ret; \
++ } \
++ \
++ local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \
++ batch = raw_cpu_ptr(&batched_entropy_##type); \
++ \
++ next_gen = READ_ONCE(base_crng.generation); \
++ if (batch->position >= ARRAY_SIZE(batch->entropy) || \
++ next_gen != batch->generation) { \
++ _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \
++ batch->position = 0; \
++ batch->generation = next_gen; \
++ } \
++ \
++ ret = batch->entropy[batch->position]; \
++ batch->entropy[batch->position] = 0; \
++ ++batch->position; \
++ local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \
++ return ret; \
++} \
++EXPORT_SYMBOL(get_random_ ##type);
++
++DEFINE_BATCHED_ENTROPY(u64)
++DEFINE_BATCHED_ENTROPY(u32)
++
++#ifdef CONFIG_SMP
++/*
++ * This function is called when the CPU is coming up, with entry
++ * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP.
++ */
++int __cold random_prepare_cpu(unsigned int cpu)
+ {
+- schedule_work(&numa_crng_init_work);
++ /*
++ * When the cpu comes back online, immediately invalidate both
++ * the per-cpu crng and all batches, so that we serve fresh
++ * randomness.
++ */
++ per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX;
++ per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX;
++ per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX;
++ return 0;
+ }
+-#else
+-static void numa_crng_init(void) {}
+ #endif
+
+ /*
+- * crng_fast_load() can be called by code in the interrupt service
+- * path. So we can't afford to dilly-dally.
++ * This function will use the architecture-specific hardware random
++ * number generator if it is available. It is not recommended for
++ * use. Use get_random_bytes() instead. It returns the number of
++ * bytes filled in.
+ */
+-static int crng_fast_load(const char *cp, size_t len)
++size_t __must_check get_random_bytes_arch(void *buf, size_t len)
+ {
+- unsigned long flags;
+- char *p;
+-
+- if (!spin_trylock_irqsave(&primary_crng.lock, flags))
+- return 0;
+- if (crng_init != 0) {
+- spin_unlock_irqrestore(&primary_crng.lock, flags);
+- return 0;
+- }
+- p = (unsigned char *) &primary_crng.state[4];
+- while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
+- p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
+- cp++; crng_init_cnt++; len--;
+- }
+- spin_unlock_irqrestore(&primary_crng.lock, flags);
+- if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
+- invalidate_batched_entropy();
+- crng_init = 1;
+- pr_notice("fast init done\n");
+- }
+- return 1;
+-}
+-
+-/*
+- * crng_slow_load() is called by add_device_randomness, which has two
+- * attributes. (1) We can't trust the buffer passed to it is
+- * guaranteed to be unpredictable (so it might not have any entropy at
+- * all), and (2) it doesn't have the performance constraints of
+- * crng_fast_load().
+- *
+- * So we do something more comprehensive which is guaranteed to touch
+- * all of the primary_crng's state, and which uses a LFSR with a
+- * period of 255 as part of the mixing algorithm. Finally, we do
+- * *not* advance crng_init_cnt since buffer we may get may be something
+- * like a fixed DMI table (for example), which might very well be
+- * unique to the machine, but is otherwise unvarying.
+- */
+-static int crng_slow_load(const char *cp, size_t len)
+-{
+- unsigned long flags;
+- static unsigned char lfsr = 1;
+- unsigned char tmp;
+- unsigned i, max = CHACHA_KEY_SIZE;
+- const char * src_buf = cp;
+- char * dest_buf = (char *) &primary_crng.state[4];
+-
+- if (!spin_trylock_irqsave(&primary_crng.lock, flags))
+- return 0;
+- if (crng_init != 0) {
+- spin_unlock_irqrestore(&primary_crng.lock, flags);
+- return 0;
+- }
+- if (len > max)
+- max = len;
+-
+- for (i = 0; i < max ; i++) {
+- tmp = lfsr;
+- lfsr >>= 1;
+- if (tmp & 1)
+- lfsr ^= 0xE1;
+- tmp = dest_buf[i % CHACHA_KEY_SIZE];
+- dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr;
+- lfsr += (tmp << 3) | (tmp >> 5);
+- }
+- spin_unlock_irqrestore(&primary_crng.lock, flags);
+- return 1;
+-}
+-
+-static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
+-{
+- unsigned long flags;
+- int i, num;
+- union {
+- __u8 block[CHACHA_BLOCK_SIZE];
+- __u32 key[8];
+- } buf;
+-
+- if (r) {
+- num = extract_entropy(r, &buf, 32, 16, 0);
+- if (num == 0)
+- return;
+- } else {
+- _extract_crng(&primary_crng, buf.block);
+- _crng_backtrack_protect(&primary_crng, buf.block,
+- CHACHA_KEY_SIZE);
+- }
+- spin_lock_irqsave(&crng->lock, flags);
+- for (i = 0; i < 8; i++) {
+- unsigned long rv;
+- if (!arch_get_random_seed_long(&rv) &&
+- !arch_get_random_long(&rv))
+- rv = random_get_entropy();
+- crng->state[i+4] ^= buf.key[i] ^ rv;
+- }
+- memzero_explicit(&buf, sizeof(buf));
+- crng->init_time = jiffies;
+- spin_unlock_irqrestore(&crng->lock, flags);
+- if (crng == &primary_crng && crng_init < 2) {
+- invalidate_batched_entropy();
+- numa_crng_init();
+- crng_init = 2;
+- process_random_ready_list();
+- wake_up_interruptible(&crng_init_wait);
+- kill_fasync(&fasync, SIGIO, POLL_IN);
+- pr_notice("crng init done\n");
+- if (unseeded_warning.missed) {
+- pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
+- unseeded_warning.missed);
+- unseeded_warning.missed = 0;
+- }
+- if (urandom_warning.missed) {
+- pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
+- urandom_warning.missed);
+- urandom_warning.missed = 0;
+- }
+- }
+-}
+-
+-static void _extract_crng(struct crng_state *crng,
+- __u8 out[CHACHA_BLOCK_SIZE])
+-{
+- unsigned long v, flags;
+-
+- if (crng_ready() &&
+- (time_after(crng_global_init_time, crng->init_time) ||
+- time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)))
+- crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL);
+- spin_lock_irqsave(&crng->lock, flags);
+- if (arch_get_random_long(&v))
+- crng->state[14] ^= v;
+- chacha20_block(&crng->state[0], out);
+- if (crng->state[12] == 0)
+- crng->state[13]++;
+- spin_unlock_irqrestore(&crng->lock, flags);
+-}
+-
+-static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE])
+-{
+- struct crng_state *crng = NULL;
+-
+-#ifdef CONFIG_NUMA
+- if (crng_node_pool)
+- crng = crng_node_pool[numa_node_id()];
+- if (crng == NULL)
+-#endif
+- crng = &primary_crng;
+- _extract_crng(crng, out);
+-}
+-
+-/*
+- * Use the leftover bytes from the CRNG block output (if there is
+- * enough) to mutate the CRNG key to provide backtracking protection.
+- */
+-static void _crng_backtrack_protect(struct crng_state *crng,
+- __u8 tmp[CHACHA_BLOCK_SIZE], int used)
+-{
+- unsigned long flags;
+- __u32 *s, *d;
+- int i;
+-
+- used = round_up(used, sizeof(__u32));
+- if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) {
+- extract_crng(tmp);
+- used = 0;
+- }
+- spin_lock_irqsave(&crng->lock, flags);
+- s = (__u32 *) &tmp[used];
+- d = &crng->state[4];
+- for (i=0; i < 8; i++)
+- *d++ ^= *s++;
+- spin_unlock_irqrestore(&crng->lock, flags);
+-}
+-
+-static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used)
+-{
+- struct crng_state *crng = NULL;
+-
+-#ifdef CONFIG_NUMA
+- if (crng_node_pool)
+- crng = crng_node_pool[numa_node_id()];
+- if (crng == NULL)
+-#endif
+- crng = &primary_crng;
+- _crng_backtrack_protect(crng, tmp, used);
+-}
++ size_t left = len;
++ u8 *p = buf;
+
+-static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
+-{
+- ssize_t ret = 0, i = CHACHA_BLOCK_SIZE;
+- __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
+- int large_request = (nbytes > 256);
+-
+- while (nbytes) {
+- if (large_request && need_resched()) {
+- if (signal_pending(current)) {
+- if (ret == 0)
+- ret = -ERESTARTSYS;
+- break;
+- }
+- schedule();
+- }
++ while (left) {
++ unsigned long v;
++ size_t block_len = min_t(size_t, left, sizeof(unsigned long));
+
+- extract_crng(tmp);
+- i = min_t(int, nbytes, CHACHA_BLOCK_SIZE);
+- if (copy_to_user(buf, tmp, i)) {
+- ret = -EFAULT;
++ if (!arch_get_random_long(&v))
+ break;
+- }
+
+- nbytes -= i;
+- buf += i;
+- ret += i;
++ memcpy(p, &v, block_len);
++ p += block_len;
++ left -= block_len;
+ }
+- crng_backtrack_protect(tmp, i);
+-
+- /* Wipe data just written to memory */
+- memzero_explicit(tmp, sizeof(tmp));
+
+- return ret;
++ return len - left;
+ }
++EXPORT_SYMBOL(get_random_bytes_arch);
+
+
+-/*********************************************************************
++/**********************************************************************
+ *
+- * Entropy input management
++ * Entropy accumulation and extraction routines.
+ *
+- *********************************************************************/
++ * Callers may add entropy via:
++ *
++ * static void mix_pool_bytes(const void *buf, size_t len)
++ *
++ * After which, if added entropy should be credited:
++ *
++ * static void credit_init_bits(size_t bits)
++ *
++ * Finally, extract entropy via:
++ *
++ * static void extract_entropy(void *buf, size_t len)
++ *
++ **********************************************************************/
+
+-/* There is one of these per entropy source */
+-struct timer_rand_state {
+- cycles_t last_time;
+- long last_delta, last_delta2;
++enum {
++ POOL_BITS = BLAKE2S_HASH_SIZE * 8,
++ POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */
++ POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */
++};
++
++static struct {
++ struct blake2s_state hash;
++ spinlock_t lock;
++ unsigned int init_bits;
++} input_pool = {
++ .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
++ BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
++ BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
++ .hash.outlen = BLAKE2S_HASH_SIZE,
++ .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+ };
+
+-#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
++static void _mix_pool_bytes(const void *buf, size_t len)
++{
++ blake2s_update(&input_pool.hash, buf, len);
++}
+
+ /*
+- * Add device- or boot-specific data to the input pool to help
+- * initialize it.
+- *
+- * None of this adds any entropy; it is meant to avoid the problem of
+- * the entropy pool having similar initial state across largely
+- * identical devices.
++ * This function adds bytes into the input pool. It does not
++ * update the initialization bit counter; the caller should call
++ * credit_init_bits if this is appropriate.
+ */
+-void add_device_randomness(const void *buf, unsigned int size)
++static void mix_pool_bytes(const void *buf, size_t len)
+ {
+- unsigned long time = random_get_entropy() ^ jiffies;
+ unsigned long flags;
+
+- if (!crng_ready() && size)
+- crng_slow_load(buf, size);
+-
+- trace_add_device_randomness(size, _RET_IP_);
+ spin_lock_irqsave(&input_pool.lock, flags);
+- _mix_pool_bytes(&input_pool, buf, size);
+- _mix_pool_bytes(&input_pool, &time, sizeof(time));
++ _mix_pool_bytes(buf, len);
+ spin_unlock_irqrestore(&input_pool.lock, flags);
+ }
+-EXPORT_SYMBOL(add_device_randomness);
+-
+-static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
+
+ /*
+- * This function adds entropy to the entropy "pool" by using timing
+- * delays. It uses the timer_rand_state structure to make an estimate
+- * of how many bits of entropy this call has added to the pool.
+- *
+- * The number "num" is also added to the pool - it should somehow describe
+- * the type of event which just happened. This is currently 0-255 for
+- * keyboard scan codes, and 256 upwards for interrupts.
+- *
++ * This is an HKDF-like construction for using the hashed collected entropy
++ * as a PRF key, that's then expanded block-by-block.
+ */
+-static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
++static void extract_entropy(void *buf, size_t len)
+ {
+- struct entropy_store *r;
++ unsigned long flags;
++ u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
+ struct {
+- long jiffies;
+- unsigned cycles;
+- unsigned num;
+- } sample;
+- long delta, delta2, delta3;
+-
+- sample.jiffies = jiffies;
+- sample.cycles = random_get_entropy();
+- sample.num = num;
+- r = &input_pool;
+- mix_pool_bytes(r, &sample, sizeof(sample));
++ unsigned long rdseed[32 / sizeof(long)];
++ size_t counter;
++ } block;
++ size_t i;
++
++ for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) {
++ if (!arch_get_random_seed_long(&block.rdseed[i]) &&
++ !arch_get_random_long(&block.rdseed[i]))
++ block.rdseed[i] = random_get_entropy();
++ }
+
+- /*
+- * Calculate number of bits of randomness we probably added.
+- * We take into account the first, second and third-order deltas
+- * in order to make our estimate.
+- */
+- delta = sample.jiffies - READ_ONCE(state->last_time);
+- WRITE_ONCE(state->last_time, sample.jiffies);
++ spin_lock_irqsave(&input_pool.lock, flags);
+
+- delta2 = delta - READ_ONCE(state->last_delta);
+- WRITE_ONCE(state->last_delta, delta);
++ /* seed = HASHPRF(last_key, entropy_input) */
++ blake2s_final(&input_pool.hash, seed);
+
+- delta3 = delta2 - READ_ONCE(state->last_delta2);
+- WRITE_ONCE(state->last_delta2, delta2);
++ /* next_key = HASHPRF(seed, RDSEED || 0) */
++ block.counter = 0;
++ blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
++ blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
+
+- if (delta < 0)
+- delta = -delta;
+- if (delta2 < 0)
+- delta2 = -delta2;
+- if (delta3 < 0)
+- delta3 = -delta3;
+- if (delta > delta2)
+- delta = delta2;
+- if (delta > delta3)
+- delta = delta3;
++ spin_unlock_irqrestore(&input_pool.lock, flags);
++ memzero_explicit(next_key, sizeof(next_key));
++
++ while (len) {
++ i = min_t(size_t, len, BLAKE2S_HASH_SIZE);
++ /* output = HASHPRF(seed, RDSEED || ++counter) */
++ ++block.counter;
++ blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
++ len -= i;
++ buf += i;
++ }
+
+- /*
+- * delta is now minimum absolute delta.
+- * Round down by 1 bit on general principles,
+- * and limit entropy estimate to 12 bits.
+- */
+- credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
++ memzero_explicit(seed, sizeof(seed));
++ memzero_explicit(&block, sizeof(block));
+ }
+
+-void add_input_randomness(unsigned int type, unsigned int code,
+- unsigned int value)
++#define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits)
++
++static void __cold _credit_init_bits(size_t bits)
+ {
+- static unsigned char last_value;
++ unsigned int new, orig, add;
++ unsigned long flags;
+
+- /* ignore autorepeat and the like */
+- if (value == last_value)
++ if (!bits)
+ return;
+
+- last_value = value;
+- add_timer_randomness(&input_timer_state,
+- (type << 4) ^ code ^ (code >> 4) ^ value);
+- trace_add_input_randomness(ENTROPY_BITS(&input_pool));
+-}
+-EXPORT_SYMBOL_GPL(add_input_randomness);
+-
+-static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
+-
+-#ifdef ADD_INTERRUPT_BENCH
+-static unsigned long avg_cycles, avg_deviation;
++ add = min_t(size_t, bits, POOL_BITS);
+
+-#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */
+-#define FIXED_1_2 (1 << (AVG_SHIFT-1))
+-
+-static void add_interrupt_bench(cycles_t start)
+-{
+- long delta = random_get_entropy() - start;
+-
+- /* Use a weighted moving average */
+- delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT);
+- avg_cycles += delta;
+- /* And average deviation */
+- delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT);
+- avg_deviation += delta;
+-}
+-#else
+-#define add_interrupt_bench(x)
+-#endif
+-
+-static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
+-{
+- __u32 *ptr = (__u32 *) regs;
+- unsigned int idx;
+-
+- if (regs == NULL)
+- return 0;
+- idx = READ_ONCE(f->reg_idx);
+- if (idx >= sizeof(struct pt_regs) / sizeof(__u32))
+- idx = 0;
+- ptr += idx++;
+- WRITE_ONCE(f->reg_idx, idx);
+- return *ptr;
+-}
++ do {
++ orig = READ_ONCE(input_pool.init_bits);
++ new = min_t(unsigned int, POOL_BITS, orig + add);
++ } while (cmpxchg(&input_pool.init_bits, orig, new) != orig);
+
+-void add_interrupt_randomness(int irq, int irq_flags)
+-{
+- struct entropy_store *r;
+- struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
+- struct pt_regs *regs = get_irq_regs();
+- unsigned long now = jiffies;
+- cycles_t cycles = random_get_entropy();
+- __u32 c_high, j_high;
+- __u64 ip;
+-
+- if (cycles == 0)
+- cycles = get_reg(fast_pool, regs);
+- c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
+- j_high = (sizeof(now) > 4) ? now >> 32 : 0;
+- fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
+- fast_pool->pool[1] ^= now ^ c_high;
+- ip = regs ? instruction_pointer(regs) : _RET_IP_;
+- fast_pool->pool[2] ^= ip;
+- fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
+- get_reg(fast_pool, regs);
+-
+- fast_mix(fast_pool);
+- add_interrupt_bench(cycles);
+-
+- if (unlikely(crng_init == 0)) {
+- if ((fast_pool->count >= 64) &&
+- crng_fast_load((char *) fast_pool->pool,
+- sizeof(fast_pool->pool))) {
+- fast_pool->count = 0;
+- fast_pool->last = now;
++ if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
++ crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */
++ process_random_ready_list();
++ wake_up_interruptible(&crng_init_wait);
++ kill_fasync(&fasync, SIGIO, POLL_IN);
++ pr_notice("crng init done\n");
++ if (urandom_warning.missed)
++ pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
++ urandom_warning.missed);
++ } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) {
++ spin_lock_irqsave(&base_crng.lock, flags);
++ /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */
++ if (crng_init == CRNG_EMPTY) {
++ extract_entropy(base_crng.key, sizeof(base_crng.key));
++ crng_init = CRNG_EARLY;
+ }
+- return;
++ spin_unlock_irqrestore(&base_crng.lock, flags);
+ }
+-
+- if ((fast_pool->count < 64) &&
+- !time_after(now, fast_pool->last + HZ))
+- return;
+-
+- r = &input_pool;
+- if (!spin_trylock(&r->lock))
+- return;
+-
+- fast_pool->last = now;
+- __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool));
+- spin_unlock(&r->lock);
+-
+- fast_pool->count = 0;
+-
+- /* award one bit for the contents of the fast pool */
+- credit_entropy_bits(r, 1);
+ }
+-EXPORT_SYMBOL_GPL(add_interrupt_randomness);
+
+-#ifdef CONFIG_BLOCK
+-void add_disk_randomness(struct gendisk *disk)
+-{
+- if (!disk || !disk->random)
+- return;
+- /* first major is 1, so we get >= 0x200 here */
+- add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
+- trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool));
+-}
+-EXPORT_SYMBOL_GPL(add_disk_randomness);
+-#endif
+
+-/*********************************************************************
++/**********************************************************************
+ *
+- * Entropy extraction routines
++ * Entropy collection routines.
+ *
+- *********************************************************************/
++ * The following exported functions are used for pushing entropy into
++ * the above entropy accumulation routines:
++ *
++ * void add_device_randomness(const void *buf, size_t len);
++ * void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy);
++ * void add_bootloader_randomness(const void *buf, size_t len);
++ * void add_interrupt_randomness(int irq);
++ * void add_input_randomness(unsigned int type, unsigned int code, unsigned int value);
++ * void add_disk_randomness(struct gendisk *disk);
++ *
++ * add_device_randomness() adds data to the input pool that
++ * is likely to differ between two devices (or possibly even per boot).
++ * This would be things like MAC addresses or serial numbers, or the
++ * read-out of the RTC. This does *not* credit any actual entropy to
++ * the pool, but it initializes the pool to different values for devices
++ * that might otherwise be identical and have very little entropy
++ * available to them (particularly common in the embedded world).
++ *
++ * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
++ * entropy as specified by the caller. If the entropy pool is full it will
++ * block until more entropy is needed.
++ *
++ * add_bootloader_randomness() is called by bootloader drivers, such as EFI
++ * and device tree, and credits its input depending on whether or not the
++ * configuration option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
++ *
++ * add_interrupt_randomness() uses the interrupt timing as random
++ * inputs to the entropy pool. Using the cycle counters and the irq source
++ * as inputs, it feeds the input pool roughly once a second or after 64
++ * interrupts, crediting 1 bit of entropy for whichever comes first.
++ *
++ * add_input_randomness() uses the input layer interrupt timing, as well
++ * as the event type information from the hardware.
++ *
++ * add_disk_randomness() uses what amounts to the seek time of block
++ * layer request events, on a per-disk_devt basis, as input to the
++ * entropy pool. Note that high-speed solid state drives with very low
++ * seek times do not make for good sources of entropy, as their seek
++ * times are usually fairly consistent.
++ *
++ * The last two routines try to estimate how many bits of entropy
++ * to credit. They do this by keeping track of the first and second
++ * order deltas of the event timings.
++ *
++ **********************************************************************/
+
+-/*
+- * This function decides how many bytes to actually take from the
+- * given pool, and also debits the entropy count accordingly.
+- */
+-static size_t account(struct entropy_store *r, size_t nbytes, int min,
+- int reserved)
++static bool trust_cpu __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
++static bool trust_bootloader __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);
++static int __init parse_trust_cpu(char *arg)
+ {
+- int entropy_count, orig, have_bytes;
+- size_t ibytes, nfrac;
+-
+- BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
+-
+- /* Can we pull enough? */
+-retry:
+- entropy_count = orig = READ_ONCE(r->entropy_count);
+- ibytes = nbytes;
+- /* never pull more than available */
+- have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
+-
+- if ((have_bytes -= reserved) < 0)
+- have_bytes = 0;
+- ibytes = min_t(size_t, ibytes, have_bytes);
+- if (ibytes < min)
+- ibytes = 0;
+-
+- if (WARN_ON(entropy_count < 0)) {
+- pr_warn("negative entropy count: pool %s count %d\n",
+- r->name, entropy_count);
+- entropy_count = 0;
+- }
+- nfrac = ibytes << (ENTROPY_SHIFT + 3);
+- if ((size_t) entropy_count > nfrac)
+- entropy_count -= nfrac;
+- else
+- entropy_count = 0;
+-
+- if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+- goto retry;
+-
+- trace_debit_entropy(r->name, 8 * ibytes);
+- if (ibytes && ENTROPY_BITS(r) < random_write_wakeup_bits) {
+- wake_up_interruptible(&random_write_wait);
+- kill_fasync(&fasync, SIGIO, POLL_OUT);
+- }
+-
+- return ibytes;
++ return kstrtobool(arg, &trust_cpu);
+ }
+-
+-/*
+- * This function does the actual extraction for extract_entropy.
+- *
+- * Note: we assume that .poolwords is a multiple of 16 words.
+- */
+-static void extract_buf(struct entropy_store *r, __u8 *out)
++static int __init parse_trust_bootloader(char *arg)
+ {
+- int i;
+- union {
+- __u32 w[5];
+- unsigned long l[LONGS(20)];
+- } hash;
+- __u32 workspace[SHA1_WORKSPACE_WORDS];
+- unsigned long flags;
+-
+- /*
+- * If we have an architectural hardware random number
+- * generator, use it for SHA's initial vector
+- */
+- sha1_init(hash.w);
+- for (i = 0; i < LONGS(20); i++) {
+- unsigned long v;
+- if (!arch_get_random_long(&v))
+- break;
+- hash.l[i] = v;
+- }
+-
+- /* Generate a hash across the pool, 16 words (512 bits) at a time */
+- spin_lock_irqsave(&r->lock, flags);
+- for (i = 0; i < r->poolinfo->poolwords; i += 16)
+- sha1_transform(hash.w, (__u8 *)(r->pool + i), workspace);
+-
+- /*
+- * We mix the hash back into the pool to prevent backtracking
+- * attacks (where the attacker knows the state of the pool
+- * plus the current outputs, and attempts to find previous
+- * ouputs), unless the hash function can be inverted. By
+- * mixing at least a SHA1 worth of hash data back, we make
+- * brute-forcing the feedback as hard as brute-forcing the
+- * hash.
+- */
+- __mix_pool_bytes(r, hash.w, sizeof(hash.w));
+- spin_unlock_irqrestore(&r->lock, flags);
+-
+- memzero_explicit(workspace, sizeof(workspace));
+-
+- /*
+- * In case the hash function has some recognizable output
+- * pattern, we fold it in half. Thus, we always feed back
+- * twice as much data as we output.
+- */
+- hash.w[0] ^= hash.w[3];
+- hash.w[1] ^= hash.w[4];
+- hash.w[2] ^= rol32(hash.w[2], 16);
+-
+- memcpy(out, &hash, EXTRACT_SIZE);
+- memzero_explicit(&hash, sizeof(hash));
++ return kstrtobool(arg, &trust_bootloader);
+ }
++early_param("random.trust_cpu", parse_trust_cpu);
++early_param("random.trust_bootloader", parse_trust_bootloader);
+
+-static ssize_t _extract_entropy(struct entropy_store *r, void *buf,
+- size_t nbytes, int fips)
++/*
++ * The first collection of entropy occurs at system boot while interrupts
++ * are still turned off. Here we push in latent entropy, RDSEED, a timestamp,
++ * utsname(), and the command line. Depending on the above configuration knob,
++ * RDSEED may be considered sufficient for initialization. Note that much
++ * earlier setup may already have pushed entropy into the input pool by the
++ * time we get here.
++ */
++int __init random_init(const char *command_line)
+ {
+- ssize_t ret = 0, i;
+- __u8 tmp[EXTRACT_SIZE];
+- unsigned long flags;
++ ktime_t now = ktime_get_real();
++ unsigned int i, arch_bits;
++ unsigned long entropy;
+
+- while (nbytes) {
+- extract_buf(r, tmp);
++#if defined(LATENT_ENTROPY_PLUGIN)
++ static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy;
++ _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed));
++#endif
+
+- if (fips) {
+- spin_lock_irqsave(&r->lock, flags);
+- if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
+- panic("Hardware RNG duplicated output!\n");
+- memcpy(r->last_data, tmp, EXTRACT_SIZE);
+- spin_unlock_irqrestore(&r->lock, flags);
++ for (i = 0, arch_bits = BLAKE2S_BLOCK_SIZE * 8;
++ i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) {
++ if (!arch_get_random_seed_long_early(&entropy) &&
++ !arch_get_random_long_early(&entropy)) {
++ entropy = random_get_entropy();
++ arch_bits -= sizeof(entropy) * 8;
+ }
+- i = min_t(int, nbytes, EXTRACT_SIZE);
+- memcpy(buf, tmp, i);
+- nbytes -= i;
+- buf += i;
+- ret += i;
++ _mix_pool_bytes(&entropy, sizeof(entropy));
+ }
++ _mix_pool_bytes(&now, sizeof(now));
++ _mix_pool_bytes(utsname(), sizeof(*(utsname())));
++ _mix_pool_bytes(command_line, strlen(command_line));
++ add_latent_entropy();
+
+- /* Wipe data just returned from memory */
+- memzero_explicit(tmp, sizeof(tmp));
++ if (crng_ready())
++ crng_reseed();
++ else if (trust_cpu)
++ _credit_init_bits(arch_bits);
+
+- return ret;
++ return 0;
+ }
+
+ /*
+- * This function extracts randomness from the "entropy pool", and
+- * returns it in a buffer.
++ * Add device- or boot-specific data to the input pool to help
++ * initialize it.
+ *
+- * The min parameter specifies the minimum amount we can pull before
+- * failing to avoid races that defeat catastrophic reseeding while the
+- * reserved parameter indicates how much entropy we must leave in the
+- * pool after each pull to avoid starving other readers.
++ * None of this adds any entropy; it is meant to avoid the problem of
++ * the entropy pool having similar initial state across largely
++ * identical devices.
+ */
+-static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+- size_t nbytes, int min, int reserved)
++void add_device_randomness(const void *buf, size_t len)
+ {
+- __u8 tmp[EXTRACT_SIZE];
++ unsigned long entropy = random_get_entropy();
+ unsigned long flags;
+
+- /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
+- if (fips_enabled) {
+- spin_lock_irqsave(&r->lock, flags);
+- if (!r->last_data_init) {
+- r->last_data_init = 1;
+- spin_unlock_irqrestore(&r->lock, flags);
+- trace_extract_entropy(r->name, EXTRACT_SIZE,
+- ENTROPY_BITS(r), _RET_IP_);
+- extract_buf(r, tmp);
+- spin_lock_irqsave(&r->lock, flags);
+- memcpy(r->last_data, tmp, EXTRACT_SIZE);
+- }
+- spin_unlock_irqrestore(&r->lock, flags);
+- }
+-
+- trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_);
+- nbytes = account(r, nbytes, min, reserved);
+-
+- return _extract_entropy(r, buf, nbytes, fips_enabled);
++ spin_lock_irqsave(&input_pool.lock, flags);
++ _mix_pool_bytes(&entropy, sizeof(entropy));
++ _mix_pool_bytes(buf, len);
++ spin_unlock_irqrestore(&input_pool.lock, flags);
+ }
++EXPORT_SYMBOL(add_device_randomness);
+
+-#define warn_unseeded_randomness(previous) \
+- _warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous))
+-
+-static void _warn_unseeded_randomness(const char *func_name, void *caller,
+- void **previous)
++/*
++ * Interface for in-kernel drivers of true hardware RNGs.
++ * Those devices may produce endless random bits and will be throttled
++ * when our pool is full.
++ */
++void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy)
+ {
+-#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+- const bool print_once = false;
+-#else
+- static bool print_once __read_mostly;
+-#endif
++ mix_pool_bytes(buf, len);
++ credit_init_bits(entropy);
+
+- if (print_once ||
+- crng_ready() ||
+- (previous && (caller == READ_ONCE(*previous))))
+- return;
+- WRITE_ONCE(*previous, caller);
+-#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+- print_once = true;
+-#endif
+- if (__ratelimit(&unseeded_warning))
+- printk_deferred(KERN_NOTICE "random: %s called from %pS "
+- "with crng_init=%d\n", func_name, caller,
+- crng_init);
++ /*
++ * Throttle writing to once every CRNG_RESEED_INTERVAL, unless
++ * we're not yet initialized.
++ */
++ if (!kthread_should_stop() && crng_ready())
++ schedule_timeout_interruptible(CRNG_RESEED_INTERVAL);
+ }
++EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
+
+ /*
+- * This function is the exported kernel interface. It returns some
+- * number of good random numbers, suitable for key generation, seeding
+- * TCP sequence numbers, etc. It does not rely on the hardware random
+- * number generator. For random bytes direct from the hardware RNG
+- * (when available), use get_random_bytes_arch(). In order to ensure
+- * that the randomness provided by this function is okay, the function
+- * wait_for_random_bytes() should be called and return 0 at least once
+- * at any point prior.
++ * Handle random seed passed by bootloader, and credit it if
++ * CONFIG_RANDOM_TRUST_BOOTLOADER is set.
+ */
+-static void _get_random_bytes(void *buf, int nbytes)
++void __init add_bootloader_randomness(const void *buf, size_t len)
+ {
+- __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
+-
+- trace_get_random_bytes(nbytes, _RET_IP_);
+-
+- while (nbytes >= CHACHA_BLOCK_SIZE) {
+- extract_crng(buf);
+- buf += CHACHA_BLOCK_SIZE;
+- nbytes -= CHACHA_BLOCK_SIZE;
+- }
+-
+- if (nbytes > 0) {
+- extract_crng(tmp);
+- memcpy(buf, tmp, nbytes);
+- crng_backtrack_protect(tmp, nbytes);
+- } else
+- crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE);
+- memzero_explicit(tmp, sizeof(tmp));
++ mix_pool_bytes(buf, len);
++ if (trust_bootloader)
++ credit_init_bits(len * 8);
+ }
+
+-void get_random_bytes(void *buf, int nbytes)
+-{
+- static void *previous;
++struct fast_pool {
++ unsigned long pool[4];
++ unsigned long last;
++ unsigned int count;
++ struct timer_list mix;
++};
+
+- warn_unseeded_randomness(&previous);
+- _get_random_bytes(buf, nbytes);
+-}
+-EXPORT_SYMBOL(get_random_bytes);
++static void mix_interrupt_randomness(struct timer_list *work);
+
++static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
++#ifdef CONFIG_64BIT
++#define FASTMIX_PERM SIPHASH_PERMUTATION
++ .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 },
++#else
++#define FASTMIX_PERM HSIPHASH_PERMUTATION
++ .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 },
++#endif
++ .mix = __TIMER_INITIALIZER(mix_interrupt_randomness, 0)
++};
+
+ /*
+- * Each time the timer fires, we expect that we got an unpredictable
+- * jump in the cycle counter. Even if the timer is running on another
+- * CPU, the timer activity will be touching the stack of the CPU that is
+- * generating entropy..
+- *
+- * Note that we don't re-arm the timer in the timer itself - we are
+- * happy to be scheduled away, since that just makes the load more
+- * complex, but we do not want the timer to keep ticking unless the
+- * entropy loop is running.
+- *
+- * So the re-arming always happens in the entropy loop itself.
++ * This is [Half]SipHash-1-x, starting from an empty key. Because
++ * the key is fixed, it assumes that its inputs are non-malicious,
++ * and therefore this has no security on its own. s represents the
++ * four-word SipHash state, while v represents a two-word input.
+ */
+-static void entropy_timer(struct timer_list *t)
++static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2)
+ {
+- credit_entropy_bits(&input_pool, 1);
++ s[3] ^= v1;
++ FASTMIX_PERM(s[0], s[1], s[2], s[3]);
++ s[0] ^= v1;
++ s[3] ^= v2;
++ FASTMIX_PERM(s[0], s[1], s[2], s[3]);
++ s[0] ^= v2;
+ }
+
++#ifdef CONFIG_SMP
+ /*
+- * If we have an actual cycle counter, see if we can
+- * generate enough entropy with timing noise
++ * This function is called when the CPU has just come online, with
++ * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE.
+ */
+-static void try_to_generate_entropy(void)
++int __cold random_online_cpu(unsigned int cpu)
+ {
+- struct {
+- unsigned long now;
+- struct timer_list timer;
+- } stack;
++ /*
++ * During CPU shutdown and before CPU onlining, add_interrupt_
++ * randomness() may schedule mix_interrupt_randomness(), and
++ * set the MIX_INFLIGHT flag. However, because the worker can
++ * be scheduled on a different CPU during this period, that
++ * flag will never be cleared. For that reason, we zero out
++ * the flag here, which runs just after workqueues are onlined
++ * for the CPU again. This also has the effect of setting the
++ * irq randomness count to zero so that new accumulated irqs
++ * are fresh.
++ */
++ per_cpu_ptr(&irq_randomness, cpu)->count = 0;
++ return 0;
++}
++#endif
+
+- stack.now = random_get_entropy();
++static void mix_interrupt_randomness(struct timer_list *work)
++{
++ struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
++ /*
++ * The size of the copied stack pool is explicitly 2 longs so that we
++ * only ever ingest half of the siphash output each time, retaining
++ * the other half as the next "key" that carries over. The entropy is
++ * supposed to be sufficiently dispersed between bits so on average
++ * we don't wind up "losing" some.
++ */
++ unsigned long pool[2];
++ unsigned int count;
+
+- /* Slow counter - or none. Don't even bother */
+- if (stack.now == random_get_entropy())
++ /* Check to see if we're running on the wrong CPU due to hotplug. */
++ local_irq_disable();
++ if (fast_pool != this_cpu_ptr(&irq_randomness)) {
++ local_irq_enable();
+ return;
+-
+- timer_setup_on_stack(&stack.timer, entropy_timer, 0);
+- while (!crng_ready()) {
+- if (!timer_pending(&stack.timer))
+- mod_timer(&stack.timer, jiffies+1);
+- mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now));
+- schedule();
+- stack.now = random_get_entropy();
+ }
+
+- del_timer_sync(&stack.timer);
+- destroy_timer_on_stack(&stack.timer);
+- mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now));
+-}
+-
+-/*
+- * Wait for the urandom pool to be seeded and thus guaranteed to supply
+- * cryptographically secure random numbers. This applies to: the /dev/urandom
+- * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
+- * family of functions. Using any of these functions without first calling
+- * this function forfeits the guarantee of security.
+- *
+- * Returns: 0 if the urandom pool has been seeded.
+- * -ERESTARTSYS if the function was interrupted by a signal.
+- */
+-int wait_for_random_bytes(void)
+-{
+- if (likely(crng_ready()))
+- return 0;
+-
+- do {
+- int ret;
+- ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
+- if (ret)
+- return ret > 0 ? 0 : ret;
++ /*
++ * Copy the pool to the stack so that the mixer always has a
++ * consistent view, before we reenable irqs again.
++ */
++ memcpy(pool, fast_pool->pool, sizeof(pool));
++ count = fast_pool->count;
++ fast_pool->count = 0;
++ fast_pool->last = jiffies;
++ local_irq_enable();
+
+- try_to_generate_entropy();
+- } while (!crng_ready());
++ mix_pool_bytes(pool, sizeof(pool));
++ credit_init_bits(clamp_t(unsigned int, (count & U16_MAX) / 64, 1, sizeof(pool) * 8));
+
+- return 0;
++ memzero_explicit(pool, sizeof(pool));
+ }
+-EXPORT_SYMBOL(wait_for_random_bytes);
+
+-/*
+- * Returns whether or not the urandom pool has been seeded and thus guaranteed
+- * to supply cryptographically secure random numbers. This applies to: the
+- * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
+- * ,u64,int,long} family of functions.
+- *
+- * Returns: true if the urandom pool has been seeded.
+- * false if the urandom pool has not been seeded.
+- */
+-bool rng_is_initialized(void)
+-{
+- return crng_ready();
+-}
+-EXPORT_SYMBOL(rng_is_initialized);
+-
+-/*
+- * Add a callback function that will be invoked when the nonblocking
+- * pool is initialised.
+- *
+- * returns: 0 if callback is successfully added
+- * -EALREADY if pool is already initialised (callback not called)
+- * -ENOENT if module for callback is not alive
+- */
+-int add_random_ready_callback(struct random_ready_callback *rdy)
++void add_interrupt_randomness(int irq)
+ {
+- struct module *owner;
+- unsigned long flags;
+- int err = -EALREADY;
+-
+- if (crng_ready())
+- return err;
+-
+- owner = rdy->owner;
+- if (!try_module_get(owner))
+- return -ENOENT;
+-
+- spin_lock_irqsave(&random_ready_list_lock, flags);
+- if (crng_ready())
+- goto out;
+-
+- owner = NULL;
++ enum { MIX_INFLIGHT = 1U << 31 };
++ unsigned long entropy = random_get_entropy();
++ struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
++ struct pt_regs *regs = get_irq_regs();
++ unsigned int new_count;
+
+- list_add(&rdy->list, &random_ready_list);
+- err = 0;
++ fast_mix(fast_pool->pool, entropy,
++ (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq));
++ new_count = ++fast_pool->count;
+
+-out:
+- spin_unlock_irqrestore(&random_ready_list_lock, flags);
++ if (new_count & MIX_INFLIGHT)
++ return;
+
+- module_put(owner);
++ if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ))
++ return;
+
+- return err;
++ fast_pool->count |= MIX_INFLIGHT;
++ if (!timer_pending(&fast_pool->mix)) {
++ fast_pool->mix.expires = jiffies;
++ add_timer_on(&fast_pool->mix, raw_smp_processor_id());
++ }
+ }
+-EXPORT_SYMBOL(add_random_ready_callback);
++EXPORT_SYMBOL_GPL(add_interrupt_randomness);
++
++/* There is one of these per entropy source */
++struct timer_rand_state {
++ unsigned long last_time;
++ long last_delta, last_delta2;
++};
+
+ /*
+- * Delete a previously registered readiness callback function.
++ * This function adds entropy to the entropy "pool" by using timing
++ * delays. It uses the timer_rand_state structure to make an estimate
++ * of how many bits of entropy this call has added to the pool. The
++ * value "num" is also added to the pool; it should somehow describe
++ * the type of event that just happened.
+ */
+-void del_random_ready_callback(struct random_ready_callback *rdy)
++static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
+ {
+- unsigned long flags;
+- struct module *owner = NULL;
++ unsigned long entropy = random_get_entropy(), now = jiffies, flags;
++ long delta, delta2, delta3;
++ unsigned int bits;
+
+- spin_lock_irqsave(&random_ready_list_lock, flags);
+- if (!list_empty(&rdy->list)) {
+- list_del_init(&rdy->list);
+- owner = rdy->owner;
++ /*
++ * If we're in a hard IRQ, add_interrupt_randomness() will be called
++ * sometime after, so mix into the fast pool.
++ */
++ if (in_hardirq()) {
++ fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num);
++ } else {
++ spin_lock_irqsave(&input_pool.lock, flags);
++ _mix_pool_bytes(&entropy, sizeof(entropy));
++ _mix_pool_bytes(&num, sizeof(num));
++ spin_unlock_irqrestore(&input_pool.lock, flags);
+ }
+- spin_unlock_irqrestore(&random_ready_list_lock, flags);
+
+- module_put(owner);
+-}
+-EXPORT_SYMBOL(del_random_ready_callback);
++ if (crng_ready())
++ return;
++
++ /*
++ * Calculate number of bits of randomness we probably added.
++ * We take into account the first, second and third-order deltas
++ * in order to make our estimate.
++ */
++ delta = now - READ_ONCE(state->last_time);
++ WRITE_ONCE(state->last_time, now);
+
+-/*
+- * This function will use the architecture-specific hardware random
+- * number generator if it is available. The arch-specific hw RNG will
+- * almost certainly be faster than what we can do in software, but it
+- * is impossible to verify that it is implemented securely (as
+- * opposed, to, say, the AES encryption of a sequence number using a
+- * key known by the NSA). So it's useful if we need the speed, but
+- * only if we're willing to trust the hardware manufacturer not to
+- * have put in a back door.
+- *
+- * Return number of bytes filled in.
+- */
+-int __must_check get_random_bytes_arch(void *buf, int nbytes)
+-{
+- int left = nbytes;
+- char *p = buf;
++ delta2 = delta - READ_ONCE(state->last_delta);
++ WRITE_ONCE(state->last_delta, delta);
+
+- trace_get_random_bytes_arch(left, _RET_IP_);
+- while (left) {
+- unsigned long v;
+- int chunk = min_t(int, left, sizeof(unsigned long));
++ delta3 = delta2 - READ_ONCE(state->last_delta2);
++ WRITE_ONCE(state->last_delta2, delta2);
+
+- if (!arch_get_random_long(&v))
+- break;
++ if (delta < 0)
++ delta = -delta;
++ if (delta2 < 0)
++ delta2 = -delta2;
++ if (delta3 < 0)
++ delta3 = -delta3;
++ if (delta > delta2)
++ delta = delta2;
++ if (delta > delta3)
++ delta = delta3;
+
+- memcpy(p, &v, chunk);
+- p += chunk;
+- left -= chunk;
+- }
++ /*
++ * delta is now minimum absolute delta. Round down by 1 bit
++ * on general principles, and limit entropy estimate to 11 bits.
++ */
++ bits = min(fls(delta >> 1), 11);
+
+- return nbytes - left;
++ /*
++ * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness()
++ * will run after this, which uses a different crediting scheme of 1 bit
++ * per every 64 interrupts. In order to let that function do accounting
++ * close to the one in this function, we credit a full 64/64 bit per bit,
++ * and then subtract one to account for the extra one added.
++ */
++ if (in_hardirq())
++ this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1;
++ else
++ _credit_init_bits(bits);
+ }
+-EXPORT_SYMBOL(get_random_bytes_arch);
+
+-/*
+- * init_std_data - initialize pool with system data
+- *
+- * @r: pool to initialize
+- *
+- * This function clears the pool's entropy count and mixes some system
+- * data into the pool to prepare it for use. The pool is not cleared
+- * as that can only decrease the entropy in the pool.
+- */
+-static void __init init_std_data(struct entropy_store *r)
++void add_input_randomness(unsigned int type, unsigned int code, unsigned int value)
+ {
+- int i;
+- ktime_t now = ktime_get_real();
+- unsigned long rv;
+-
+- mix_pool_bytes(r, &now, sizeof(now));
+- for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) {
+- if (!arch_get_random_seed_long(&rv) &&
+- !arch_get_random_long(&rv))
+- rv = random_get_entropy();
+- mix_pool_bytes(r, &rv, sizeof(rv));
+- }
+- mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
++ static unsigned char last_value;
++ static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };
++
++ /* Ignore autorepeat and the like. */
++ if (value == last_value)
++ return;
++
++ last_value = value;
++ add_timer_randomness(&input_timer_state,
++ (type << 4) ^ code ^ (code >> 4) ^ value);
+ }
++EXPORT_SYMBOL_GPL(add_input_randomness);
+
+-/*
+- * Note that setup_arch() may call add_device_randomness()
+- * long before we get here. This allows seeding of the pools
+- * with some platform dependent data very early in the boot
+- * process. But it limits our options here. We must use
+- * statically allocated structures that already have all
+- * initializations complete at compile time. We should also
+- * take care not to overwrite the precious per platform data
+- * we were given.
+- */
+-int __init rand_initialize(void)
++#ifdef CONFIG_BLOCK
++void add_disk_randomness(struct gendisk *disk)
+ {
+- init_std_data(&input_pool);
+- crng_initialize_primary(&primary_crng);
+- crng_global_init_time = jiffies;
+- if (ratelimit_disable) {
+- urandom_warning.interval = 0;
+- unseeded_warning.interval = 0;
+- }
+- return 0;
++ if (!disk || !disk->random)
++ return;
++ /* First major is 1, so we get >= 0x200 here. */
++ add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
+ }
++EXPORT_SYMBOL_GPL(add_disk_randomness);
+
+-#ifdef CONFIG_BLOCK
+-void rand_initialize_disk(struct gendisk *disk)
++void __cold rand_initialize_disk(struct gendisk *disk)
+ {
+ struct timer_rand_state *state;
+
+@@ -1801,116 +1134,194 @@ void rand_initialize_disk(struct gendisk *disk)
+ }
+ #endif
+
+-static ssize_t
+-urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes,
+- loff_t *ppos)
++/*
++ * Each time the timer fires, we expect that we got an unpredictable
++ * jump in the cycle counter. Even if the timer is running on another
++ * CPU, the timer activity will be touching the stack of the CPU that is
++ * generating entropy..
++ *
++ * Note that we don't re-arm the timer in the timer itself - we are
++ * happy to be scheduled away, since that just makes the load more
++ * complex, but we do not want the timer to keep ticking unless the
++ * entropy loop is running.
++ *
++ * So the re-arming always happens in the entropy loop itself.
++ */
++static void __cold entropy_timer(struct timer_list *t)
+ {
+- int ret;
+-
+- nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3));
+- ret = extract_crng_user(buf, nbytes);
+- trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool));
+- return ret;
++ credit_init_bits(1);
+ }
+
+-static ssize_t
+-urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
++/*
++ * If we have an actual cycle counter, see if we can
++ * generate enough entropy with timing noise
++ */
++static void __cold try_to_generate_entropy(void)
+ {
+- unsigned long flags;
+- static int maxwarn = 10;
++ struct {
++ unsigned long entropy;
++ struct timer_list timer;
++ } stack;
++
++ stack.entropy = random_get_entropy();
++
++ /* Slow counter - or none. Don't even bother */
++ if (stack.entropy == random_get_entropy())
++ return;
+
+- if (!crng_ready() && maxwarn > 0) {
+- maxwarn--;
+- if (__ratelimit(&urandom_warning))
+- pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
+- current->comm, nbytes);
+- spin_lock_irqsave(&primary_crng.lock, flags);
+- crng_init_cnt = 0;
+- spin_unlock_irqrestore(&primary_crng.lock, flags);
++ timer_setup_on_stack(&stack.timer, entropy_timer, 0);
++ while (!crng_ready() && !signal_pending(current)) {
++ if (!timer_pending(&stack.timer))
++ mod_timer(&stack.timer, jiffies + 1);
++ mix_pool_bytes(&stack.entropy, sizeof(stack.entropy));
++ schedule();
++ stack.entropy = random_get_entropy();
+ }
+
+- return urandom_read_nowarn(file, buf, nbytes, ppos);
++ del_timer_sync(&stack.timer);
++ destroy_timer_on_stack(&stack.timer);
++ mix_pool_bytes(&stack.entropy, sizeof(stack.entropy));
+ }
+
+-static ssize_t
+-random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
++
++/**********************************************************************
++ *
++ * Userspace reader/writer interfaces.
++ *
++ * getrandom(2) is the primary modern interface into the RNG and should
++ * be used in preference to anything else.
++ *
++ * Reading from /dev/random has the same functionality as calling
++ * getrandom(2) with flags=0. In earlier versions, however, it had
++ * vastly different semantics and should therefore be avoided, to
++ * prevent backwards compatibility issues.
++ *
++ * Reading from /dev/urandom has the same functionality as calling
++ * getrandom(2) with flags=GRND_INSECURE. Because it does not block
++ * waiting for the RNG to be ready, it should not be used.
++ *
++ * Writing to either /dev/random or /dev/urandom adds entropy to
++ * the input pool but does not credit it.
++ *
++ * Polling on /dev/random indicates when the RNG is initialized, on
++ * the read side, and when it wants new entropy, on the write side.
++ *
++ * Both /dev/random and /dev/urandom have the same set of ioctls for
++ * adding entropy, getting the entropy count, zeroing the count, and
++ * reseeding the crng.
++ *
++ **********************************************************************/
++
++SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags)
+ {
++ struct iov_iter iter;
++ struct iovec iov;
+ int ret;
+
+- ret = wait_for_random_bytes();
+- if (ret != 0)
++ if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
++ return -EINVAL;
++
++ /*
++ * Requesting insecure and blocking randomness at the same time makes
++ * no sense.
++ */
++ if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
++ return -EINVAL;
++
++ if (!crng_ready() && !(flags & GRND_INSECURE)) {
++ if (flags & GRND_NONBLOCK)
++ return -EAGAIN;
++ ret = wait_for_random_bytes();
++ if (unlikely(ret))
++ return ret;
++ }
++
++ ret = import_single_range(READ, ubuf, len, &iov, &iter);
++ if (unlikely(ret))
+ return ret;
+- return urandom_read_nowarn(file, buf, nbytes, ppos);
++ return get_random_bytes_user(&iter);
+ }
+
+-static __poll_t
+-random_poll(struct file *file, poll_table * wait)
++static __poll_t random_poll(struct file *file, poll_table *wait)
+ {
+- __poll_t mask;
+-
+ poll_wait(file, &crng_init_wait, wait);
+- poll_wait(file, &random_write_wait, wait);
+- mask = 0;
+- if (crng_ready())
+- mask |= EPOLLIN | EPOLLRDNORM;
+- if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
+- mask |= EPOLLOUT | EPOLLWRNORM;
+- return mask;
++ return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM;
+ }
+
+-static int
+-write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
++static ssize_t write_pool_user(struct iov_iter *iter)
+ {
+- size_t bytes;
+- __u32 t, buf[16];
+- const char __user *p = buffer;
++ u8 block[BLAKE2S_BLOCK_SIZE];
++ ssize_t ret = 0;
++ size_t copied;
+
+- while (count > 0) {
+- int b, i = 0;
++ if (unlikely(!iov_iter_count(iter)))
++ return 0;
+
+- bytes = min(count, sizeof(buf));
+- if (copy_from_user(&buf, p, bytes))
+- return -EFAULT;
++ for (;;) {
++ copied = copy_from_iter(block, sizeof(block), iter);
++ ret += copied;
++ mix_pool_bytes(block, copied);
++ if (!iov_iter_count(iter) || copied != sizeof(block))
++ break;
+
+- for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) {
+- if (!arch_get_random_int(&t))
++ BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
++ if (ret % PAGE_SIZE == 0) {
++ if (signal_pending(current))
+ break;
+- buf[i] ^= t;
++ cond_resched();
+ }
++ }
++
++ memzero_explicit(block, sizeof(block));
++ return ret ? ret : -EFAULT;
++}
++
++static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter)
++{
++ return write_pool_user(iter);
++}
+
+- count -= bytes;
+- p += bytes;
++static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
++{
++ static int maxwarn = 10;
+
+- mix_pool_bytes(r, buf, bytes);
+- cond_resched();
++ if (!crng_ready()) {
++ if (!ratelimit_disable && maxwarn <= 0)
++ ++urandom_warning.missed;
++ else if (ratelimit_disable || __ratelimit(&urandom_warning)) {
++ --maxwarn;
++ pr_notice("%s: uninitialized urandom read (%zu bytes read)\n",
++ current->comm, iov_iter_count(iter));
++ }
+ }
+
+- return 0;
++ return get_random_bytes_user(iter);
+ }
+
+-static ssize_t random_write(struct file *file, const char __user *buffer,
+- size_t count, loff_t *ppos)
++static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
+ {
+- size_t ret;
++ int ret;
+
+- ret = write_pool(&input_pool, buffer, count);
+- if (ret)
+- return ret;
++ if (!crng_ready() &&
++ ((kiocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) ||
++ (kiocb->ki_filp->f_flags & O_NONBLOCK)))
++ return -EAGAIN;
+
+- return (ssize_t)count;
++ ret = wait_for_random_bytes();
++ if (ret != 0)
++ return ret;
++ return get_random_bytes_user(iter);
+ }
+
+ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ {
+- int size, ent_count;
+ int __user *p = (int __user *)arg;
+- int retval;
++ int ent_count;
+
+ switch (cmd) {
+ case RNDGETENTCNT:
+- /* inherently racy, no point locking */
+- ent_count = ENTROPY_BITS(&input_pool);
+- if (put_user(ent_count, p))
++ /* Inherently racy, no point locking. */
++ if (put_user(input_pool.init_bits, p))
+ return -EFAULT;
+ return 0;
+ case RNDADDTOENTCNT:
+@@ -1918,38 +1329,48 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ return -EPERM;
+ if (get_user(ent_count, p))
+ return -EFAULT;
+- return credit_entropy_bits_safe(&input_pool, ent_count);
+- case RNDADDENTROPY:
++ if (ent_count < 0)
++ return -EINVAL;
++ credit_init_bits(ent_count);
++ return 0;
++ case RNDADDENTROPY: {
++ struct iov_iter iter;
++ struct iovec iov;
++ ssize_t ret;
++ int len;
++
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (get_user(ent_count, p++))
+ return -EFAULT;
+ if (ent_count < 0)
+ return -EINVAL;
+- if (get_user(size, p++))
++ if (get_user(len, p++))
++ return -EFAULT;
++ ret = import_single_range(WRITE, p, len, &iov, &iter);
++ if (unlikely(ret))
++ return ret;
++ ret = write_pool_user(&iter);
++ if (unlikely(ret < 0))
++ return ret;
++ /* Since we're crediting, enforce that it was all written into the pool. */
++ if (unlikely(ret != len))
+ return -EFAULT;
+- retval = write_pool(&input_pool, (const char __user *)p,
+- size);
+- if (retval < 0)
+- return retval;
+- return credit_entropy_bits_safe(&input_pool, ent_count);
++ credit_init_bits(ent_count);
++ return 0;
++ }
+ case RNDZAPENTCNT:
+ case RNDCLEARPOOL:
+- /*
+- * Clear the entropy pool counters. We no longer clear
+- * the entropy pool, as that's silly.
+- */
++ /* No longer has any effect. */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+- input_pool.entropy_count = 0;
+ return 0;
+ case RNDRESEEDCRNG:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+- if (crng_init < 2)
++ if (!crng_ready())
+ return -ENODATA;
+- crng_reseed(&primary_crng, &input_pool);
+- crng_global_init_time = jiffies - 1;
++ crng_reseed();
+ return 0;
+ default:
+ return -EINVAL;
+@@ -1962,55 +1383,56 @@ static int random_fasync(int fd, struct file *filp, int on)
+ }
+
+ const struct file_operations random_fops = {
+- .read = random_read,
+- .write = random_write,
+- .poll = random_poll,
++ .read_iter = random_read_iter,
++ .write_iter = random_write_iter,
++ .poll = random_poll,
+ .unlocked_ioctl = random_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .fasync = random_fasync,
+ .llseek = noop_llseek,
++ .splice_read = generic_file_splice_read,
++ .splice_write = iter_file_splice_write,
+ };
+
+ const struct file_operations urandom_fops = {
+- .read = urandom_read,
+- .write = random_write,
++ .read_iter = urandom_read_iter,
++ .write_iter = random_write_iter,
+ .unlocked_ioctl = random_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .fasync = random_fasync,
+ .llseek = noop_llseek,
++ .splice_read = generic_file_splice_read,
++ .splice_write = iter_file_splice_write,
+ };
+
+-SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
+- unsigned int, flags)
+-{
+- int ret;
+-
+- if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE))
+- return -EINVAL;
+-
+- /*
+- * Requesting insecure and blocking randomness at the same time makes
+- * no sense.
+- */
+- if ((flags & (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM))
+- return -EINVAL;
+-
+- if (count > INT_MAX)
+- count = INT_MAX;
+-
+- if (!(flags & GRND_INSECURE) && !crng_ready()) {
+- if (flags & GRND_NONBLOCK)
+- return -EAGAIN;
+- ret = wait_for_random_bytes();
+- if (unlikely(ret))
+- return ret;
+- }
+- return urandom_read_nowarn(NULL, buf, count, NULL);
+-}
+
+ /********************************************************************
+ *
+- * Sysctl interface
++ * Sysctl interface.
++ *
++ * These are partly unused legacy knobs with dummy values to not break
++ * userspace and partly still useful things. They are usually accessible
++ * in /proc/sys/kernel/random/ and are as follows:
++ *
++ * - boot_id - a UUID representing the current boot.
++ *
++ * - uuid - a random UUID, different each time the file is read.
++ *
++ * - poolsize - the number of bits of entropy that the input pool can
++ * hold, tied to the POOL_BITS constant.
++ *
++ * - entropy_avail - the number of bits of entropy currently in the
++ * input pool. Always <= poolsize.
++ *
++ * - write_wakeup_threshold - the amount of entropy in the input pool
++ * below which write polls to /dev/random will unblock, requesting
++ * more entropy, tied to the POOL_READY_BITS constant. It is writable
++ * to avoid breaking old userspaces, but writing to it does not
++ * change any behavior of the RNG.
++ *
++ * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL.
++ * It is writable to avoid breaking old userspaces, but writing
++ * to it does not change any behavior of the RNG.
+ *
+ ********************************************************************/
+
+@@ -2018,25 +1440,28 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
+
+ #include <linux/sysctl.h>
+
+-static int min_write_thresh;
+-static int max_write_thresh = INPUT_POOL_WORDS * 32;
+-static int random_min_urandom_seed = 60;
+-static char sysctl_bootid[16];
++static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ;
++static int sysctl_random_write_wakeup_bits = POOL_READY_BITS;
++static int sysctl_poolsize = POOL_BITS;
++static u8 sysctl_bootid[UUID_SIZE];
+
+ /*
+ * This function is used to return both the bootid UUID, and random
+- * UUID. The difference is in whether table->data is NULL; if it is,
++ * UUID. The difference is in whether table->data is NULL; if it is,
+ * then a new UUID is generated and returned to the user.
+- *
+- * If the user accesses this via the proc interface, the UUID will be
+- * returned as an ASCII string in the standard UUID format; if via the
+- * sysctl system call, as 16 bytes of binary data.
+ */
+-static int proc_do_uuid(struct ctl_table *table, int write,
+- void *buffer, size_t *lenp, loff_t *ppos)
++static int proc_do_uuid(struct ctl_table *table, int write, void *buf,
++ size_t *lenp, loff_t *ppos)
+ {
+- struct ctl_table fake_table;
+- unsigned char buf[64], tmp_uuid[16], *uuid;
++ u8 tmp_uuid[UUID_SIZE], *uuid;
++ char uuid_string[UUID_STRING_LEN + 1];
++ struct ctl_table fake_table = {
++ .data = uuid_string,
++ .maxlen = UUID_STRING_LEN
++ };
++
++ if (write)
++ return -EPERM;
+
+ uuid = table->data;
+ if (!uuid) {
+@@ -2051,32 +1476,17 @@ static int proc_do_uuid(struct ctl_table *table, int write,
+ spin_unlock(&bootid_spinlock);
+ }
+
+- sprintf(buf, "%pU", uuid);
+-
+- fake_table.data = buf;
+- fake_table.maxlen = sizeof(buf);
+-
+- return proc_dostring(&fake_table, write, buffer, lenp, ppos);
++ snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid);
++ return proc_dostring(&fake_table, 0, buf, lenp, ppos);
+ }
+
+-/*
+- * Return entropy available scaled to integral bits
+- */
+-static int proc_do_entropy(struct ctl_table *table, int write,
+- void *buffer, size_t *lenp, loff_t *ppos)
++/* The same as proc_dointvec, but writes don't change anything. */
++static int proc_do_rointvec(struct ctl_table *table, int write, void *buf,
++ size_t *lenp, loff_t *ppos)
+ {
+- struct ctl_table fake_table;
+- int entropy_count;
+-
+- entropy_count = *(int *)table->data >> ENTROPY_SHIFT;
+-
+- fake_table.data = &entropy_count;
+- fake_table.maxlen = sizeof(entropy_count);
+-
+- return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
++ return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos);
+ }
+
+-static int sysctl_poolsize = INPUT_POOL_WORDS * 32;
+ extern struct ctl_table random_table[];
+ struct ctl_table random_table[] = {
+ {
+@@ -2088,218 +1498,36 @@ struct ctl_table random_table[] = {
+ },
+ {
+ .procname = "entropy_avail",
++ .data = &input_pool.init_bits,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+- .proc_handler = proc_do_entropy,
+- .data = &input_pool.entropy_count,
++ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "write_wakeup_threshold",
+- .data = &random_write_wakeup_bits,
++ .data = &sysctl_random_write_wakeup_bits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_write_thresh,
+- .extra2 = &max_write_thresh,
++ .proc_handler = proc_do_rointvec,
+ },
+ {
+ .procname = "urandom_min_reseed_secs",
+- .data = &random_min_urandom_seed,
++ .data = &sysctl_random_min_urandom_seed,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = proc_dointvec,
++ .proc_handler = proc_do_rointvec,
+ },
+ {
+ .procname = "boot_id",
+ .data = &sysctl_bootid,
+- .maxlen = 16,
+ .mode = 0444,
+ .proc_handler = proc_do_uuid,
+ },
+ {
+ .procname = "uuid",
+- .maxlen = 16,
+ .mode = 0444,
+ .proc_handler = proc_do_uuid,
+ },
+-#ifdef ADD_INTERRUPT_BENCH
+- {
+- .procname = "add_interrupt_avg_cycles",
+- .data = &avg_cycles,
+- .maxlen = sizeof(avg_cycles),
+- .mode = 0444,
+- .proc_handler = proc_doulongvec_minmax,
+- },
+- {
+- .procname = "add_interrupt_avg_deviation",
+- .data = &avg_deviation,
+- .maxlen = sizeof(avg_deviation),
+- .mode = 0444,
+- .proc_handler = proc_doulongvec_minmax,
+- },
+-#endif
+ { }
+ };
+-#endif /* CONFIG_SYSCTL */
+-
+-struct batched_entropy {
+- union {
+- u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
+- u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
+- };
+- unsigned int position;
+- spinlock_t batch_lock;
+-};
+-
+-/*
+- * Get a random word for internal kernel use only. The quality of the random
+- * number is good as /dev/urandom, but there is no backtrack protection, with
+- * the goal of being quite fast and not depleting entropy. In order to ensure
+- * that the randomness provided by this function is okay, the function
+- * wait_for_random_bytes() should be called and return 0 at least once at any
+- * point prior.
+- */
+-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
+- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
+-};
+-
+-u64 get_random_u64(void)
+-{
+- u64 ret;
+- unsigned long flags;
+- struct batched_entropy *batch;
+- static void *previous;
+-
+- warn_unseeded_randomness(&previous);
+-
+- batch = raw_cpu_ptr(&batched_entropy_u64);
+- spin_lock_irqsave(&batch->batch_lock, flags);
+- if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+- extract_crng((u8 *)batch->entropy_u64);
+- batch->position = 0;
+- }
+- ret = batch->entropy_u64[batch->position++];
+- spin_unlock_irqrestore(&batch->batch_lock, flags);
+- return ret;
+-}
+-EXPORT_SYMBOL(get_random_u64);
+-
+-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
+- .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
+-};
+-u32 get_random_u32(void)
+-{
+- u32 ret;
+- unsigned long flags;
+- struct batched_entropy *batch;
+- static void *previous;
+-
+- warn_unseeded_randomness(&previous);
+-
+- batch = raw_cpu_ptr(&batched_entropy_u32);
+- spin_lock_irqsave(&batch->batch_lock, flags);
+- if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+- extract_crng((u8 *)batch->entropy_u32);
+- batch->position = 0;
+- }
+- ret = batch->entropy_u32[batch->position++];
+- spin_unlock_irqrestore(&batch->batch_lock, flags);
+- return ret;
+-}
+-EXPORT_SYMBOL(get_random_u32);
+-
+-/* It's important to invalidate all potential batched entropy that might
+- * be stored before the crng is initialized, which we can do lazily by
+- * simply resetting the counter to zero so that it's re-extracted on the
+- * next usage. */
+-static void invalidate_batched_entropy(void)
+-{
+- int cpu;
+- unsigned long flags;
+-
+- for_each_possible_cpu (cpu) {
+- struct batched_entropy *batched_entropy;
+-
+- batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
+- spin_lock_irqsave(&batched_entropy->batch_lock, flags);
+- batched_entropy->position = 0;
+- spin_unlock(&batched_entropy->batch_lock);
+-
+- batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu);
+- spin_lock(&batched_entropy->batch_lock);
+- batched_entropy->position = 0;
+- spin_unlock_irqrestore(&batched_entropy->batch_lock, flags);
+- }
+-}
+-
+-/**
+- * randomize_page - Generate a random, page aligned address
+- * @start: The smallest acceptable address the caller will take.
+- * @range: The size of the area, starting at @start, within which the
+- * random address must fall.
+- *
+- * If @start + @range would overflow, @range is capped.
+- *
+- * NOTE: Historical use of randomize_range, which this replaces, presumed that
+- * @start was already page aligned. We now align it regardless.
+- *
+- * Return: A page aligned address within [start, start + range). On error,
+- * @start is returned.
+- */
+-unsigned long
+-randomize_page(unsigned long start, unsigned long range)
+-{
+- if (!PAGE_ALIGNED(start)) {
+- range -= PAGE_ALIGN(start) - start;
+- start = PAGE_ALIGN(start);
+- }
+-
+- if (start > ULONG_MAX - range)
+- range = ULONG_MAX - start;
+-
+- range >>= PAGE_SHIFT;
+-
+- if (range == 0)
+- return start;
+-
+- return start + (get_random_long() % range << PAGE_SHIFT);
+-}
+-
+-/* Interface for in-kernel drivers of true hardware RNGs.
+- * Those devices may produce endless random bits and will be throttled
+- * when our pool is full.
+- */
+-void add_hwgenerator_randomness(const char *buffer, size_t count,
+- size_t entropy)
+-{
+- struct entropy_store *poolp = &input_pool;
+-
+- if (unlikely(crng_init == 0)) {
+- crng_fast_load(buffer, count);
+- return;
+- }
+-
+- /* Suspend writing if we're above the trickle threshold.
+- * We'll be woken up again once below random_write_wakeup_thresh,
+- * or when the calling thread is about to terminate.
+- */
+- wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+- ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
+- mix_pool_bytes(poolp, buffer, count);
+- credit_entropy_bits(poolp, entropy);
+-}
+-EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
+-
+-/* Handle random seed passed by bootloader.
+- * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
+- * it would be regarded as device data.
+- * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
+- */
+-void add_bootloader_randomness(const void *buf, unsigned int size)
+-{
+- if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
+- add_hwgenerator_randomness(buf, size, size * 8);
+- else
+- add_device_randomness(buf, size);
+-}
+-EXPORT_SYMBOL_GPL(add_bootloader_randomness);
++#endif /* CONFIG_SYSCTL */
+diff --git a/drivers/char/tpm/eventlog/acpi.c b/drivers/char/tpm/eventlog/acpi.c
+index 1b18ce5ebab1e..cd266021d0103 100644
+--- a/drivers/char/tpm/eventlog/acpi.c
++++ b/drivers/char/tpm/eventlog/acpi.c
+@@ -90,16 +90,21 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
+ return -ENODEV;
+
+ if (tbl->header.length <
+- sizeof(*tbl) + sizeof(struct acpi_tpm2_phy))
++ sizeof(*tbl) + sizeof(struct acpi_tpm2_phy)) {
++ acpi_put_table((struct acpi_table_header *)tbl);
+ return -ENODEV;
++ }
+
+ tpm2_phy = (void *)tbl + sizeof(*tbl);
+ len = tpm2_phy->log_area_minimum_length;
+
+ start = tpm2_phy->log_area_start_address;
+- if (!start || !len)
++ if (!start || !len) {
++ acpi_put_table((struct acpi_table_header *)tbl);
+ return -ENODEV;
++ }
+
++ acpi_put_table((struct acpi_table_header *)tbl);
+ format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2;
+ } else {
+ /* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */
+@@ -120,8 +125,10 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
+ break;
+ }
+
++ acpi_put_table((struct acpi_table_header *)buff);
+ format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
+ }
++
+ if (!len) {
+ dev_warn(&chip->dev, "%s: TCPA log area empty\n", __func__);
+ return -EIO;
+@@ -136,8 +143,12 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
+
+ ret = -EIO;
+ virt = acpi_os_map_iomem(start, len);
+- if (!virt)
++ if (!virt) {
++ dev_warn(&chip->dev, "%s: Failed to map ACPI memory\n", __func__);
++ /* try EFI log next */
++ ret = -ENODEV;
+ goto err;
++ }
+
+ memcpy_fromio(log->bios_event_log, virt, len);
+
+@@ -156,5 +167,4 @@ err:
+ kfree(log->bios_event_log);
+ log->bios_event_log = NULL;
+ return ret;
+-
+ }
+diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
+index ddaeceb7e1091..65d800ecc9964 100644
+--- a/drivers/char/tpm/tpm-chip.c
++++ b/drivers/char/tpm/tpm-chip.c
+@@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev)
+ kfree(chip);
+ }
+
+-static void tpm_devs_release(struct device *dev)
+-{
+- struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
+-
+- /* release the master device reference */
+- put_device(&chip->dev);
+-}
+-
+ /**
+ * tpm_class_shutdown() - prepare the TPM device for loss of power.
+ * @dev: device to which the chip is associated.
+@@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
+ chip->dev_num = rc;
+
+ device_initialize(&chip->dev);
+- device_initialize(&chip->devs);
+
+ chip->dev.class = tpm_class;
+ chip->dev.class->shutdown_pre = tpm_class_shutdown;
+@@ -352,29 +343,12 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
+ chip->dev.parent = pdev;
+ chip->dev.groups = chip->groups;
+
+- chip->devs.parent = pdev;
+- chip->devs.class = tpmrm_class;
+- chip->devs.release = tpm_devs_release;
+- /* get extra reference on main device to hold on
+- * behalf of devs. This holds the chip structure
+- * while cdevs is in use. The corresponding put
+- * is in the tpm_devs_release (TPM2 only)
+- */
+- if (chip->flags & TPM_CHIP_FLAG_TPM2)
+- get_device(&chip->dev);
+-
+ if (chip->dev_num == 0)
+ chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR);
+ else
+ chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num);
+
+- chip->devs.devt =
+- MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
+-
+ rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num);
+- if (rc)
+- goto out;
+- rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
+ if (rc)
+ goto out;
+
+@@ -382,9 +356,7 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
+ chip->flags |= TPM_CHIP_FLAG_VIRTUAL;
+
+ cdev_init(&chip->cdev, &tpm_fops);
+- cdev_init(&chip->cdevs, &tpmrm_fops);
+ chip->cdev.owner = THIS_MODULE;
+- chip->cdevs.owner = THIS_MODULE;
+
+ rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE);
+ if (rc) {
+@@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
+ return chip;
+
+ out:
+- put_device(&chip->devs);
+ put_device(&chip->dev);
+ return ERR_PTR(rc);
+ }
+@@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip)
+ }
+
+ if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+- rc = cdev_device_add(&chip->cdevs, &chip->devs);
+- if (rc) {
+- dev_err(&chip->devs,
+- "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
+- dev_name(&chip->devs), MAJOR(chip->devs.devt),
+- MINOR(chip->devs.devt), rc);
+- return rc;
+- }
++ rc = tpm_devs_add(chip);
++ if (rc)
++ goto err_del_cdev;
+ }
+
+ /* Make the chip available. */
+@@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip)
+ idr_replace(&dev_nums_idr, chip, chip->dev_num);
+ mutex_unlock(&idr_lock);
+
++ return 0;
++
++err_del_cdev:
++ cdev_device_del(&chip->cdev, &chip->dev);
+ return rc;
+ }
+
+@@ -474,13 +444,21 @@ static void tpm_del_char_device(struct tpm_chip *chip)
+
+ /* Make the driver uncallable. */
+ down_write(&chip->ops_sem);
+- if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+- if (!tpm_chip_start(chip)) {
+- tpm2_shutdown(chip, TPM2_SU_CLEAR);
+- tpm_chip_stop(chip);
++
++ /*
++ * Check if chip->ops is still valid: In case that the controller
++ * drivers shutdown handler unregisters the controller in its
++ * shutdown handler we are called twice and chip->ops to NULL.
++ */
++ if (chip->ops) {
++ if (chip->flags & TPM_CHIP_FLAG_TPM2) {
++ if (!tpm_chip_start(chip)) {
++ tpm2_shutdown(chip, TPM2_SU_CLEAR);
++ tpm_chip_stop(chip);
++ }
+ }
++ chip->ops = NULL;
+ }
+- chip->ops = NULL;
+ up_write(&chip->ops_sem);
+ }
+
+@@ -641,7 +619,7 @@ void tpm_chip_unregister(struct tpm_chip *chip)
+ hwrng_unregister(&chip->hwrng);
+ tpm_bios_log_teardown(chip);
+ if (chip->flags & TPM_CHIP_FLAG_TPM2)
+- cdev_device_del(&chip->cdevs, &chip->devs);
++ tpm_devs_remove(chip);
+ tpm_del_char_device(chip);
+ }
+ EXPORT_SYMBOL_GPL(tpm_chip_unregister);
+diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
+index c08cbb306636b..dc4c0a0a51290 100644
+--- a/drivers/char/tpm/tpm-dev-common.c
++++ b/drivers/char/tpm/tpm-dev-common.c
+@@ -69,7 +69,13 @@ static void tpm_dev_async_work(struct work_struct *work)
+ ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer,
+ sizeof(priv->data_buffer));
+ tpm_put_ops(priv->chip);
+- if (ret > 0) {
++
++ /*
++ * If ret is > 0 then tpm_dev_transmit returned the size of the
++ * response. If ret is < 0 then tpm_dev_transmit failed and
++ * returned an error code.
++ */
++ if (ret != 0) {
+ priv->response_length = ret;
+ mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
+ }
+diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
+index 1621ce8187052..d69905233aff2 100644
+--- a/drivers/char/tpm/tpm-interface.c
++++ b/drivers/char/tpm/tpm-interface.c
+@@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev)
+ !pm_suspend_via_firmware())
+ goto suspended;
+
+- if (!tpm_chip_start(chip)) {
++ rc = tpm_try_get_ops(chip);
++ if (!rc) {
+ if (chip->flags & TPM_CHIP_FLAG_TPM2)
+ tpm2_shutdown(chip, TPM2_SU_STATE);
+ else
+ rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+
+- tpm_chip_stop(chip);
++ tpm_put_ops(chip);
+ }
+
+ suspended:
+diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
+index 283f78211c3a7..2163c6ee0d364 100644
+--- a/drivers/char/tpm/tpm.h
++++ b/drivers/char/tpm/tpm.h
+@@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
+ size_t cmdsiz);
+ int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf,
+ size_t *bufsiz);
++int tpm_devs_add(struct tpm_chip *chip);
++void tpm_devs_remove(struct tpm_chip *chip);
+
+ void tpm_bios_log_setup(struct tpm_chip *chip);
+ void tpm_bios_log_teardown(struct tpm_chip *chip);
+diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
+index a25815a6f6253..de92065394be9 100644
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -400,7 +400,16 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value,
+ if (!rc) {
+ out = (struct tpm2_get_cap_out *)
+ &buf.data[TPM_HEADER_SIZE];
+- *value = be32_to_cpu(out->value);
++ /*
++ * To prevent failing boot up of some systems, Infineon TPM2.0
++ * returns SUCCESS on TPM2_Startup in field upgrade mode. Also
++ * the TPM2_Getcapability command returns a zero length list
++ * in field upgrade mode.
++ */
++ if (be32_to_cpu(out->property_cnt) > 0)
++ *value = be32_to_cpu(out->value);
++ else
++ rc = -ENODATA;
+ }
+ tpm_buf_destroy(&buf);
+ return rc;
+diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
+index 784b8b3cb903f..ffb35f0154c16 100644
+--- a/drivers/char/tpm/tpm2-space.c
++++ b/drivers/char/tpm/tpm2-space.c
+@@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size)
+
+ void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space)
+ {
+- mutex_lock(&chip->tpm_mutex);
+- if (!tpm_chip_start(chip)) {
++
++ if (tpm_try_get_ops(chip) == 0) {
+ tpm2_flush_sessions(chip, space);
+- tpm_chip_stop(chip);
++ tpm_put_ops(chip);
+ }
+- mutex_unlock(&chip->tpm_mutex);
++
+ kfree(space->context_buf);
+ kfree(space->session_buf);
+ }
+@@ -455,6 +455,9 @@ static int tpm2_map_response_body(struct tpm_chip *chip, u32 cc, u8 *rsp,
+ if (be32_to_cpu(data->capability) != TPM2_CAP_HANDLES)
+ return 0;
+
++ if (be32_to_cpu(data->count) > (UINT_MAX - TPM_HEADER_SIZE - 9) / 4)
++ return -EFAULT;
++
+ if (len != TPM_HEADER_SIZE + 9 + 4 * be32_to_cpu(data->count))
+ return -EFAULT;
+
+@@ -571,3 +574,68 @@ out:
+ dev_err(&chip->dev, "%s: error %d\n", __func__, rc);
+ return rc;
+ }
++
++/*
++ * Put the reference to the main device.
++ */
++static void tpm_devs_release(struct device *dev)
++{
++ struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
++
++ /* release the master device reference */
++ put_device(&chip->dev);
++}
++
++/*
++ * Remove the device file for exposed TPM spaces and release the device
++ * reference. This may also release the reference to the master device.
++ */
++void tpm_devs_remove(struct tpm_chip *chip)
++{
++ cdev_device_del(&chip->cdevs, &chip->devs);
++ put_device(&chip->devs);
++}
++
++/*
++ * Add a device file to expose TPM spaces. Also take a reference to the
++ * main device.
++ */
++int tpm_devs_add(struct tpm_chip *chip)
++{
++ int rc;
++
++ device_initialize(&chip->devs);
++ chip->devs.parent = chip->dev.parent;
++ chip->devs.class = tpmrm_class;
++
++ /*
++ * Get extra reference on main device to hold on behalf of devs.
++ * This holds the chip structure while cdevs is in use. The
++ * corresponding put is in the tpm_devs_release.
++ */
++ get_device(&chip->dev);
++ chip->devs.release = tpm_devs_release;
++ chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
++ cdev_init(&chip->cdevs, &tpmrm_fops);
++ chip->cdevs.owner = THIS_MODULE;
++
++ rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
++ if (rc)
++ goto err_put_devs;
++
++ rc = cdev_device_add(&chip->cdevs, &chip->devs);
++ if (rc) {
++ dev_err(&chip->devs,
++ "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
++ dev_name(&chip->devs), MAJOR(chip->devs.devt),
++ MINOR(chip->devs.devt), rc);
++ goto err_put_devs;
++ }
++
++ return 0;
++
++err_put_devs:
++ put_device(&chip->devs);
++
++ return rc;
++}
+diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
+index 18606651d1aa4..16fc481d60950 100644
+--- a/drivers/char/tpm/tpm_crb.c
++++ b/drivers/char/tpm/tpm_crb.c
+@@ -252,7 +252,7 @@ static int __crb_relinquish_locality(struct device *dev,
+ iowrite32(CRB_LOC_CTRL_RELINQUISH, &priv->regs_h->loc_ctrl);
+ if (!crb_wait_for_reg_32(&priv->regs_h->loc_state, mask, value,
+ TPM2_TIMEOUT_C)) {
+- dev_warn(dev, "TPM_LOC_STATE_x.requestAccess timed out\n");
++ dev_warn(dev, "TPM_LOC_STATE_x.Relinquish timed out\n");
+ return -ETIME;
+ }
+
+@@ -676,12 +676,16 @@ static int crb_acpi_add(struct acpi_device *device)
+
+ /* Should the FIFO driver handle this? */
+ sm = buf->start_method;
+- if (sm == ACPI_TPM2_MEMORY_MAPPED)
+- return -ENODEV;
++ if (sm == ACPI_TPM2_MEMORY_MAPPED) {
++ rc = -ENODEV;
++ goto out;
++ }
+
+ priv = devm_kzalloc(dev, sizeof(struct crb_priv), GFP_KERNEL);
+- if (!priv)
+- return -ENOMEM;
++ if (!priv) {
++ rc = -ENOMEM;
++ goto out;
++ }
+
+ if (sm == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC) {
+ if (buf->header.length < (sizeof(*buf) + sizeof(*crb_smc))) {
+@@ -689,7 +693,8 @@ static int crb_acpi_add(struct acpi_device *device)
+ FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n",
+ buf->header.length,
+ ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC);
+- return -EINVAL;
++ rc = -EINVAL;
++ goto out;
+ }
+ crb_smc = ACPI_ADD_PTR(struct tpm2_crb_smc, buf, sizeof(*buf));
+ priv->smc_func_id = crb_smc->smc_func_id;
+@@ -700,17 +705,23 @@ static int crb_acpi_add(struct acpi_device *device)
+
+ rc = crb_map_io(device, priv, buf);
+ if (rc)
+- return rc;
++ goto out;
+
+ chip = tpmm_chip_alloc(dev, &tpm_crb);
+- if (IS_ERR(chip))
+- return PTR_ERR(chip);
++ if (IS_ERR(chip)) {
++ rc = PTR_ERR(chip);
++ goto out;
++ }
+
+ dev_set_drvdata(&chip->dev, priv);
+ chip->acpi_dev_handle = device->handle;
+ chip->flags = TPM_CHIP_FLAG_TPM2;
+
+- return tpm_chip_register(chip);
++ rc = tpm_chip_register(chip);
++
++out:
++ acpi_put_table((struct acpi_table_header *)buf);
++ return rc;
+ }
+
+ static int crb_acpi_remove(struct acpi_device *device)
+diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c
+index 6e3235565a4d8..d9daaafdd295c 100644
+--- a/drivers/char/tpm/tpm_ftpm_tee.c
++++ b/drivers/char/tpm/tpm_ftpm_tee.c
+@@ -397,7 +397,13 @@ static int __init ftpm_mod_init(void)
+ if (rc)
+ return rc;
+
+- return driver_register(&ftpm_tee_driver.driver);
++ rc = driver_register(&ftpm_tee_driver.driver);
++ if (rc) {
++ platform_driver_unregister(&ftpm_tee_plat_driver);
++ return rc;
++ }
++
++ return 0;
+ }
+
+ static void __exit ftpm_mod_exit(void)
+diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
+index 3af4c07a9342f..d3989b257f422 100644
+--- a/drivers/char/tpm/tpm_ibmvtpm.c
++++ b/drivers/char/tpm/tpm_ibmvtpm.c
+@@ -681,6 +681,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
+ if (!wait_event_timeout(ibmvtpm->crq_queue.wq,
+ ibmvtpm->rtce_buf != NULL,
+ HZ)) {
++ rc = -ENODEV;
+ dev_err(dev, "CRQ response timed out\n");
+ goto init_irq_cleanup;
+ }
+diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
+index d3f2e5364c275..dfb463ee7ca1a 100644
+--- a/drivers/char/tpm/tpm_tis.c
++++ b/drivers/char/tpm/tpm_tis.c
+@@ -83,6 +83,22 @@ static const struct dmi_system_id tpm_tis_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T490s"),
+ },
+ },
++ {
++ .callback = tpm_tis_disable_irq,
++ .ident = "ThinkStation P360 Tiny",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P360 Tiny"),
++ },
++ },
++ {
++ .callback = tpm_tis_disable_irq,
++ .ident = "ThinkPad L490",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L490"),
++ },
++ },
+ {}
+ };
+
+@@ -125,6 +141,7 @@ static int check_acpi_tpm2(struct device *dev)
+ const struct acpi_device_id *aid = acpi_match_device(tpm_acpi_tbl, dev);
+ struct acpi_table_tpm2 *tbl;
+ acpi_status st;
++ int ret = 0;
+
+ if (!aid || aid->driver_data != DEVICE_IS_TPM2)
+ return 0;
+@@ -132,8 +149,7 @@ static int check_acpi_tpm2(struct device *dev)
+ /* If the ACPI TPM2 signature is matched then a global ACPI_SIG_TPM2
+ * table is mandatory
+ */
+- st =
+- acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **)&tbl);
++ st = acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **)&tbl);
+ if (ACPI_FAILURE(st) || tbl->header.length < sizeof(*tbl)) {
+ dev_err(dev, FW_BUG "failed to get TPM2 ACPI table\n");
+ return -EINVAL;
+@@ -141,9 +157,10 @@ static int check_acpi_tpm2(struct device *dev)
+
+ /* The tpm2_crb driver handles this device */
+ if (tbl->start_method != ACPI_TPM2_MEMORY_MAPPED)
+- return -ENODEV;
++ ret = -ENODEV;
+
+- return 0;
++ acpi_put_table((struct acpi_table_header *)tbl);
++ return ret;
+ }
+ #else
+ static int check_acpi_tpm2(struct device *dev)
+diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
+index 69579efb247b3..d7c440ac465f3 100644
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -48,6 +48,7 @@ static int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask,
+ unsigned long timeout, wait_queue_head_t *queue,
+ bool check_cancel)
+ {
++ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ unsigned long stop;
+ long rc;
+ u8 status;
+@@ -80,8 +81,8 @@ again:
+ }
+ } else {
+ do {
+- usleep_range(TPM_TIMEOUT_USECS_MIN,
+- TPM_TIMEOUT_USECS_MAX);
++ usleep_range(priv->timeout_min,
++ priv->timeout_max);
+ status = chip->ops->status(chip);
+ if ((status & mask) == mask)
+ return 0;
+@@ -135,16 +136,27 @@ static bool check_locality(struct tpm_chip *chip, int l)
+ return false;
+ }
+
+-static int release_locality(struct tpm_chip *chip, int l)
++static int __tpm_tis_relinquish_locality(struct tpm_tis_data *priv, int l)
++{
++ tpm_tis_write8(priv, TPM_ACCESS(l), TPM_ACCESS_ACTIVE_LOCALITY);
++
++ return 0;
++}
++
++static int tpm_tis_relinquish_locality(struct tpm_chip *chip, int l)
+ {
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+
+- tpm_tis_write8(priv, TPM_ACCESS(l), TPM_ACCESS_ACTIVE_LOCALITY);
++ mutex_lock(&priv->locality_count_mutex);
++ priv->locality_count--;
++ if (priv->locality_count == 0)
++ __tpm_tis_relinquish_locality(priv, l);
++ mutex_unlock(&priv->locality_count_mutex);
+
+ return 0;
+ }
+
+-static int request_locality(struct tpm_chip *chip, int l)
++static int __tpm_tis_request_locality(struct tpm_chip *chip, int l)
+ {
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ unsigned long stop, timeout;
+@@ -185,6 +197,20 @@ again:
+ return -1;
+ }
+
++static int tpm_tis_request_locality(struct tpm_chip *chip, int l)
++{
++ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
++ int ret = 0;
++
++ mutex_lock(&priv->locality_count_mutex);
++ if (priv->locality_count == 0)
++ ret = __tpm_tis_request_locality(chip, l);
++ if (!ret)
++ priv->locality_count++;
++ mutex_unlock(&priv->locality_count_mutex);
++ return ret;
++}
++
+ static u8 tpm_tis_status(struct tpm_chip *chip)
+ {
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+@@ -288,6 +314,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+ int size = 0;
+ int status;
+ u32 expected;
++ int rc;
+
+ if (count < TPM_HEADER_SIZE) {
+ size = -EIO;
+@@ -307,8 +334,13 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+ goto out;
+ }
+
+- size += recv_data(chip, &buf[TPM_HEADER_SIZE],
+- expected - TPM_HEADER_SIZE);
++ rc = recv_data(chip, &buf[TPM_HEADER_SIZE],
++ expected - TPM_HEADER_SIZE);
++ if (rc < 0) {
++ size = rc;
++ goto out;
++ }
++ size += rc;
+ if (size < expected) {
+ dev_err(&chip->dev, "Unable to read remainder of result\n");
+ size = -ETIME;
+@@ -637,7 +669,7 @@ static int probe_itpm(struct tpm_chip *chip)
+ if (vendor != TPM_VID_INTEL)
+ return 0;
+
+- if (request_locality(chip, 0) != 0)
++ if (tpm_tis_request_locality(chip, 0) != 0)
+ return -EBUSY;
+
+ rc = tpm_tis_send_data(chip, cmd_getticks, len);
+@@ -658,7 +690,7 @@ static int probe_itpm(struct tpm_chip *chip)
+
+ out:
+ tpm_tis_ready(chip);
+- release_locality(chip, priv->locality);
++ tpm_tis_relinquish_locality(chip, priv->locality);
+
+ return rc;
+ }
+@@ -705,7 +737,9 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
+ wake_up_interruptible(&priv->int_queue);
+
+ /* Clear interrupts handled with TPM_EOI */
++ tpm_tis_request_locality(chip, 0);
+ rc = tpm_tis_write32(priv, TPM_INT_STATUS(priv->locality), interrupt);
++ tpm_tis_relinquish_locality(chip, 0);
+ if (rc < 0)
+ return IRQ_NONE;
+
+@@ -713,25 +747,17 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
+ return IRQ_HANDLED;
+ }
+
+-static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
++static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
+ {
+ const char *desc = "attempting to generate an interrupt";
+ u32 cap2;
+ cap_t cap;
+ int ret;
+
+- ret = request_locality(chip, 0);
+- if (ret < 0)
+- return ret;
+-
+ if (chip->flags & TPM_CHIP_FLAG_TPM2)
+ ret = tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
+ else
+ ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+-
+- release_locality(chip, 0);
+-
+- return ret;
+ }
+
+ /* Register the IRQ and issue a command that will cause an interrupt. If an
+@@ -746,60 +772,66 @@ static int tpm_tis_probe_irq_single(struct tpm_chip *chip, u32 intmask,
+ int rc;
+ u32 int_status;
+
+- if (devm_request_irq(chip->dev.parent, irq, tis_int_handler, flags,
+- dev_name(&chip->dev), chip) != 0) {
++
++ rc = devm_request_threaded_irq(chip->dev.parent, irq, NULL,
++ tis_int_handler, IRQF_ONESHOT | flags,
++ dev_name(&chip->dev), chip);
++ if (rc) {
+ dev_info(&chip->dev, "Unable to request irq: %d for probe\n",
+ irq);
+ return -1;
+ }
+ priv->irq = irq;
+
++ rc = tpm_tis_request_locality(chip, 0);
++ if (rc < 0)
++ return rc;
++
+ rc = tpm_tis_read8(priv, TPM_INT_VECTOR(priv->locality),
+ &original_int_vec);
+- if (rc < 0)
++ if (rc < 0) {
++ tpm_tis_relinquish_locality(chip, priv->locality);
+ return rc;
++ }
+
+ rc = tpm_tis_write8(priv, TPM_INT_VECTOR(priv->locality), irq);
+ if (rc < 0)
+- return rc;
++ goto restore_irqs;
+
+ rc = tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &int_status);
+ if (rc < 0)
+- return rc;
++ goto restore_irqs;
+
+ /* Clear all existing */
+ rc = tpm_tis_write32(priv, TPM_INT_STATUS(priv->locality), int_status);
+ if (rc < 0)
+- return rc;
+-
++ goto restore_irqs;
+ /* Turn on */
+ rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality),
+ intmask | TPM_GLOBAL_INT_ENABLE);
+ if (rc < 0)
+- return rc;
++ goto restore_irqs;
+
+ priv->irq_tested = false;
+
+ /* Generate an interrupt by having the core call through to
+ * tpm_tis_send
+ */
+- rc = tpm_tis_gen_interrupt(chip);
+- if (rc < 0)
+- return rc;
++ tpm_tis_gen_interrupt(chip);
+
++restore_irqs:
+ /* tpm_tis_send will either confirm the interrupt is working or it
+ * will call disable_irq which undoes all of the above.
+ */
+ if (!(chip->flags & TPM_CHIP_FLAG_IRQ)) {
+- rc = tpm_tis_write8(priv, original_int_vec,
+- TPM_INT_VECTOR(priv->locality));
+- if (rc < 0)
+- return rc;
+-
+- return 1;
++ tpm_tis_write8(priv, original_int_vec,
++ TPM_INT_VECTOR(priv->locality));
++ rc = -1;
+ }
+
+- return 0;
++ tpm_tis_relinquish_locality(chip, priv->locality);
++
++ return rc;
+ }
+
+ /* Try to find the IRQ the TPM is using. This is for legacy x86 systems that
+@@ -913,8 +945,8 @@ static const struct tpm_class_ops tpm_tis = {
+ .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+ .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+ .req_canceled = tpm_tis_req_canceled,
+- .request_locality = request_locality,
+- .relinquish_locality = release_locality,
++ .request_locality = tpm_tis_request_locality,
++ .relinquish_locality = tpm_tis_relinquish_locality,
+ .clk_enable = tpm_tis_clkrun_enable,
+ };
+
+@@ -945,9 +977,26 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
+ chip->timeout_b = msecs_to_jiffies(TIS_TIMEOUT_B_MAX);
+ chip->timeout_c = msecs_to_jiffies(TIS_TIMEOUT_C_MAX);
+ chip->timeout_d = msecs_to_jiffies(TIS_TIMEOUT_D_MAX);
++ priv->timeout_min = TPM_TIMEOUT_USECS_MIN;
++ priv->timeout_max = TPM_TIMEOUT_USECS_MAX;
+ priv->phy_ops = phy_ops;
++ priv->locality_count = 0;
++ mutex_init(&priv->locality_count_mutex);
++
+ dev_set_drvdata(&chip->dev, priv);
+
++ rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor);
++ if (rc < 0)
++ return rc;
++
++ priv->manufacturer_id = vendor;
++
++ if (priv->manufacturer_id == TPM_VID_ATML &&
++ !(chip->flags & TPM_CHIP_FLAG_TPM2)) {
++ priv->timeout_min = TIS_TIMEOUT_MIN_ATML;
++ priv->timeout_max = TIS_TIMEOUT_MAX_ATML;
++ }
++
+ if (is_bsw()) {
+ priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR,
+ ILB_REMAP_SIZE);
+@@ -978,7 +1027,15 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
+ intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_LOCALITY_CHANGE_INT |
+ TPM_INTF_DATA_AVAIL_INT | TPM_INTF_STS_VALID_INT;
+ intmask &= ~TPM_GLOBAL_INT_ENABLE;
++
++ rc = tpm_tis_request_locality(chip, 0);
++ if (rc < 0) {
++ rc = -ENODEV;
++ goto out_err;
++ }
++
+ tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
++ tpm_tis_relinquish_locality(chip, 0);
+
+ rc = tpm_chip_start(chip);
+ if (rc)
+@@ -988,12 +1045,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
+ if (rc)
+ goto out_err;
+
+- rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor);
+- if (rc < 0)
+- goto out_err;
+-
+- priv->manufacturer_id = vendor;
+-
+ rc = tpm_tis_read8(priv, TPM_RID(0), &rid);
+ if (rc < 0)
+ goto out_err;
+@@ -1044,13 +1095,13 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
+ * proper timeouts for the driver.
+ */
+
+- rc = request_locality(chip, 0);
++ rc = tpm_tis_request_locality(chip, 0);
+ if (rc < 0)
+ goto out_err;
+
+ rc = tpm_get_timeouts(chip);
+
+- release_locality(chip, 0);
++ tpm_tis_relinquish_locality(chip, 0);
+
+ if (rc) {
+ dev_err(dev, "Could not get TPM timeouts and durations\n");
+@@ -1058,17 +1109,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
+ goto out_err;
+ }
+
+- if (irq) {
++ if (irq)
+ tpm_tis_probe_irq_single(chip, intmask, IRQF_SHARED,
+ irq);
+- if (!(chip->flags & TPM_CHIP_FLAG_IRQ)) {
+- dev_err(&chip->dev, FW_BUG
++ else
++ tpm_tis_probe_irq(chip, intmask);
++
++ if (!(chip->flags & TPM_CHIP_FLAG_IRQ)) {
++ dev_err(&chip->dev, FW_BUG
+ "TPM interrupt not working, polling instead\n");
+
+- disable_interrupts(chip);
+- }
+- } else {
+- tpm_tis_probe_irq(chip, intmask);
++ rc = tpm_tis_request_locality(chip, 0);
++ if (rc < 0)
++ goto out_err;
++ disable_interrupts(chip);
++ tpm_tis_relinquish_locality(chip, 0);
+ }
+ }
+
+@@ -1129,28 +1184,27 @@ int tpm_tis_resume(struct device *dev)
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+ int ret;
+
++ ret = tpm_tis_request_locality(chip, 0);
++ if (ret < 0)
++ return ret;
++
+ if (chip->flags & TPM_CHIP_FLAG_IRQ)
+ tpm_tis_reenable_interrupts(chip);
+
+ ret = tpm_pm_resume(dev);
+ if (ret)
+- return ret;
++ goto out;
+
+ /*
+ * TPM 1.2 requires self-test on resume. This function actually returns
+ * an error code but for unknown reason it isn't handled.
+ */
+- if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+- ret = request_locality(chip, 0);
+- if (ret < 0)
+- return ret;
+-
++ if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
+ tpm1_do_selftest(chip);
++out:
++ tpm_tis_relinquish_locality(chip, 0);
+
+- release_locality(chip, 0);
+- }
+-
+- return 0;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(tpm_tis_resume);
+ #endif
+diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
+index b2a3c6c72882d..464ed352ab2e8 100644
+--- a/drivers/char/tpm/tpm_tis_core.h
++++ b/drivers/char/tpm/tpm_tis_core.h
+@@ -54,6 +54,8 @@ enum tis_defaults {
+ TIS_MEM_LEN = 0x5000,
+ TIS_SHORT_TIMEOUT = 750, /* ms */
+ TIS_LONG_TIMEOUT = 2000, /* 2 sec */
++ TIS_TIMEOUT_MIN_ATML = 14700, /* usecs */
++ TIS_TIMEOUT_MAX_ATML = 15000, /* usecs */
+ };
+
+ /* Some timeout values are needed before it is known whether the chip is
+@@ -88,6 +90,8 @@ enum tpm_tis_flags {
+
+ struct tpm_tis_data {
+ u16 manufacturer_id;
++ struct mutex locality_count_mutex;
++ unsigned int locality_count;
+ int locality;
+ int irq;
+ bool irq_tested;
+@@ -98,6 +102,8 @@ struct tpm_tis_data {
+ wait_queue_head_t read_queue;
+ const struct tpm_tis_phy_ops *phy_ops;
+ unsigned short rng_quality;
++ unsigned int timeout_min; /* usecs */
++ unsigned int timeout_max; /* usecs */
+ };
+
+ struct tpm_tis_phy_ops {
+diff --git a/drivers/char/tpm/tpm_tis_i2c_cr50.c b/drivers/char/tpm/tpm_tis_i2c_cr50.c
+index c892781037037..e2ab6a329732b 100644
+--- a/drivers/char/tpm/tpm_tis_i2c_cr50.c
++++ b/drivers/char/tpm/tpm_tis_i2c_cr50.c
+@@ -754,8 +754,8 @@ static int tpm_cr50_i2c_remove(struct i2c_client *client)
+ struct device *dev = &client->dev;
+
+ if (!chip) {
+- dev_err(dev, "Could not get client data at remove\n");
+- return -ENODEV;
++ dev_crit(dev, "Could not get client data at remove, memory corruption ahead\n");
++ return 0;
+ }
+
+ tpm_chip_unregister(chip);
+diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
+index 54584b4b00d19..aaa59a00eeaef 100644
+--- a/drivers/char/tpm/tpm_tis_spi_main.c
++++ b/drivers/char/tpm/tpm_tis_spi_main.c
+@@ -267,6 +267,7 @@ static const struct spi_device_id tpm_tis_spi_id[] = {
+ { "st33htpm-spi", (unsigned long)tpm_tis_spi_probe },
+ { "slb9670", (unsigned long)tpm_tis_spi_probe },
+ { "tpm_tis_spi", (unsigned long)tpm_tis_spi_probe },
++ { "tpm_tis-spi", (unsigned long)tpm_tis_spi_probe },
+ { "cr50", (unsigned long)cr50_spi_probe },
+ {}
+ };
+diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
+index 91c772e38bb54..ff2ec71d592ef 100644
+--- a/drivers/char/tpm/tpm_vtpm_proxy.c
++++ b/drivers/char/tpm/tpm_vtpm_proxy.c
+@@ -683,37 +683,21 @@ static struct miscdevice vtpmx_miscdev = {
+ .fops = &vtpmx_fops,
+ };
+
+-static int vtpmx_init(void)
+-{
+- return misc_register(&vtpmx_miscdev);
+-}
+-
+-static void vtpmx_cleanup(void)
+-{
+- misc_deregister(&vtpmx_miscdev);
+-}
+-
+ static int __init vtpm_module_init(void)
+ {
+ int rc;
+
+- rc = vtpmx_init();
+- if (rc) {
+- pr_err("couldn't create vtpmx device\n");
+- return rc;
+- }
+-
+ workqueue = create_workqueue("tpm-vtpm");
+ if (!workqueue) {
+ pr_err("couldn't create workqueue\n");
+- rc = -ENOMEM;
+- goto err_vtpmx_cleanup;
++ return -ENOMEM;
+ }
+
+- return 0;
+-
+-err_vtpmx_cleanup:
+- vtpmx_cleanup();
++ rc = misc_register(&vtpmx_miscdev);
++ if (rc) {
++ pr_err("couldn't create vtpmx device\n");
++ destroy_workqueue(workqueue);
++ }
+
+ return rc;
+ }
+@@ -721,7 +705,7 @@ err_vtpmx_cleanup:
+ static void __exit vtpm_module_exit(void)
+ {
+ destroy_workqueue(workqueue);
+- vtpmx_cleanup();
++ misc_deregister(&vtpmx_miscdev);
+ }
+
+ module_init(vtpm_module_init);
+diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
+index 7eaf303a7a86f..77bc993d75130 100644
+--- a/drivers/char/virtio_console.c
++++ b/drivers/char/virtio_console.c
+@@ -1956,6 +1956,13 @@ static void virtcons_remove(struct virtio_device *vdev)
+ list_del(&portdev->list);
+ spin_unlock_irq(&pdrvdata_lock);
+
++ /* Device is going away, exit any polling for buffers */
++ virtio_break_device(vdev);
++ if (use_multiport(portdev))
++ flush_work(&portdev->control_work);
++ else
++ flush_work(&portdev->config_work);
++
+ /* Disable interrupts for vqs */
+ vdev->config->reset(vdev);
+ /* Finish up work that's lined up */
+@@ -2229,7 +2236,7 @@ static struct virtio_driver virtio_rproc_serial = {
+ .remove = virtcons_remove,
+ };
+
+-static int __init init(void)
++static int __init virtio_console_init(void)
+ {
+ int err;
+
+@@ -2264,7 +2271,7 @@ free:
+ return err;
+ }
+
+-static void __exit fini(void)
++static void __exit virtio_console_fini(void)
+ {
+ reclaim_dma_bufs();
+
+@@ -2274,8 +2281,8 @@ static void __exit fini(void)
+ class_destroy(pdrvdata.class);
+ debugfs_remove_recursive(pdrvdata.debugfs_dir);
+ }
+-module_init(init);
+-module_exit(fini);
++module_init(virtio_console_init);
++module_exit(virtio_console_fini);
+
+ MODULE_DESCRIPTION("Virtio console driver");
+ MODULE_LICENSE("GPL");
+diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c
+index e7f88f35c7028..39bcbfd908b46 100644
+--- a/drivers/char/xillybus/xillyusb.c
++++ b/drivers/char/xillybus/xillyusb.c
+@@ -549,6 +549,7 @@ static void cleanup_dev(struct kref *kref)
+ if (xdev->workq)
+ destroy_workqueue(xdev->workq);
+
++ usb_put_dev(xdev->udev);
+ kfree(xdev->channels); /* Argument may be NULL, and that's fine */
+ kfree(xdev);
+ }
+@@ -1912,6 +1913,7 @@ static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev)
+
+ dealloc:
+ endpoint_dealloc(xdev->msg_ep); /* Also frees FIFO mem if allocated */
++ xdev->msg_ep = NULL;
+ return -ENOMEM;
+ }
+
+diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
+index c5b3dc97396a6..d12465c227514 100644
+--- a/drivers/clk/Kconfig
++++ b/drivers/clk/Kconfig
+@@ -83,7 +83,7 @@ config COMMON_CLK_RK808
+ config COMMON_CLK_HI655X
+ tristate "Clock driver for Hi655x" if EXPERT
+ depends on (MFD_HI655X_PMIC || COMPILE_TEST)
+- depends on REGMAP
++ select REGMAP
+ default MFD_HI655X_PMIC
+ help
+ This driver supports the hi655x PMIC clock. This
+@@ -380,6 +380,7 @@ config COMMON_CLK_BD718XX
+ config COMMON_CLK_FIXED_MMIO
+ bool "Clock driver for Memory Mapped Fixed values"
+ depends on COMMON_CLK && OF
++ depends on HAS_IOMEM
+ help
+ Support for Memory Mapped IO Fixed clocks
+
+diff --git a/drivers/clk/actions/owl-s700.c b/drivers/clk/actions/owl-s700.c
+index a2f34d13fb543..6ea7da1d6d755 100644
+--- a/drivers/clk/actions/owl-s700.c
++++ b/drivers/clk/actions/owl-s700.c
+@@ -162,6 +162,7 @@ static struct clk_div_table hdmia_div_table[] = {
+
+ static struct clk_div_table rmii_div_table[] = {
+ {0, 4}, {1, 10},
++ {0, 0}
+ };
+
+ /* divider clocks */
+diff --git a/drivers/clk/actions/owl-s900.c b/drivers/clk/actions/owl-s900.c
+index 790890978424a..5144ada2c7e1a 100644
+--- a/drivers/clk/actions/owl-s900.c
++++ b/drivers/clk/actions/owl-s900.c
+@@ -140,7 +140,7 @@ static struct clk_div_table rmii_ref_div_table[] = {
+
+ static struct clk_div_table usb3_mac_div_table[] = {
+ { 1, 2 }, { 2, 3 }, { 3, 4 },
+- { 0, 8 },
++ { 0, 0 }
+ };
+
+ static struct clk_div_table i2s_div_table[] = {
+diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c
+index 428a6f4b9ebc5..8d36e615cd9dd 100644
+--- a/drivers/clk/at91/at91rm9200.c
++++ b/drivers/clk/at91/at91rm9200.c
+@@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = {
+ };
+
+ static const struct sck at91rm9200_systemck[] = {
+- { .n = "udpck", .p = "usbck", .id = 2 },
++ { .n = "udpck", .p = "usbck", .id = 1 },
+ { .n = "uhpck", .p = "usbck", .id = 4 },
+ { .n = "pck0", .p = "prog0", .id = 8 },
+ { .n = "pck1", .p = "prog1", .id = 9 },
+diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c
+index b656d25a97678..fe772baeb15ff 100644
+--- a/drivers/clk/at91/clk-generated.c
++++ b/drivers/clk/at91/clk-generated.c
+@@ -106,6 +106,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req,
+ tmp_rate = parent_rate;
+ else
+ tmp_rate = parent_rate / div;
++
++ if (tmp_rate < req->min_rate || tmp_rate > req->max_rate)
++ return;
++
+ tmp_diff = abs(req->rate - tmp_rate);
+
+ if (*best_diff < 0 || *best_diff >= tmp_diff) {
+diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c
+index a80427980bf73..04d0dd8385945 100644
+--- a/drivers/clk/at91/clk-master.c
++++ b/drivers/clk/at91/clk-master.c
+@@ -280,7 +280,7 @@ static int clk_master_pres_set_rate(struct clk_hw *hw, unsigned long rate,
+
+ else if (pres == 3)
+ pres = MASTER_PRES_MAX;
+- else
++ else if (pres)
+ pres = ffs(pres) - 1;
+
+ spin_lock_irqsave(master->lock, flags);
+@@ -309,7 +309,7 @@ static unsigned long clk_master_pres_recalc_rate(struct clk_hw *hw,
+ spin_unlock_irqrestore(master->lock, flags);
+
+ pres = (val >> master->layout->pres_shift) & MASTER_PRES_MASK;
+- if (pres == 3 && characteristics->have_div3_pres)
++ if (pres == MASTER_PRES_MAX && characteristics->have_div3_pres)
+ pres = 3;
+ else
+ pres = (1 << pres);
+@@ -610,7 +610,7 @@ static int clk_sama7g5_master_set_rate(struct clk_hw *hw, unsigned long rate,
+
+ if (div == 3)
+ div = MASTER_PRES_MAX;
+- else
++ else if (div)
+ div = ffs(div) - 1;
+
+ spin_lock_irqsave(master->lock, flags);
+diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c
+index 34e3ab13741ac..a6600afa21454 100644
+--- a/drivers/clk/at91/clk-sam9x60-pll.c
++++ b/drivers/clk/at91/clk-sam9x60-pll.c
+@@ -71,8 +71,8 @@ static unsigned long sam9x60_frac_pll_recalc_rate(struct clk_hw *hw,
+ struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw);
+ struct sam9x60_frac *frac = to_sam9x60_frac(core);
+
+- return (parent_rate * (frac->mul + 1) +
+- ((u64)parent_rate * frac->frac >> 22));
++ return parent_rate * (frac->mul + 1) +
++ DIV_ROUND_CLOSEST_ULL((u64)parent_rate * frac->frac, (1 << 22));
+ }
+
+ static int sam9x60_frac_pll_prepare(struct clk_hw *hw)
+@@ -561,7 +561,7 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock,
+
+ ret = sam9x60_frac_pll_compute_mul_frac(&frac->core, FCORE_MIN,
+ parent_rate, true);
+- if (ret <= 0) {
++ if (ret < 0) {
+ hw = ERR_PTR(ret);
+ goto free;
+ }
+diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
+index 20ee9dccee787..b40035b011d0a 100644
+--- a/drivers/clk/at91/pmc.c
++++ b/drivers/clk/at91/pmc.c
+@@ -267,6 +267,11 @@ static int __init pmc_register_ops(void)
+ if (!np)
+ return -ENODEV;
+
++ if (!of_device_is_available(np)) {
++ of_node_put(np);
++ return -ENODEV;
++ }
++
+ pmcreg = device_node_to_regmap(np);
+ of_node_put(np);
+ if (IS_ERR(pmcreg))
+diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c
+index cf8c079aa086a..9eed97a299d0f 100644
+--- a/drivers/clk/at91/sama7g5.c
++++ b/drivers/clk/at91/sama7g5.c
+@@ -687,16 +687,16 @@ static const struct {
+ { .n = "pdmc0_gclk",
+ .id = 68,
+ .r = { .max = 50000000 },
+- .pp = { "syspll_divpmcck", "baudpll_divpmcck", },
+- .pp_mux_table = { 5, 8, },
++ .pp = { "syspll_divpmcck", "audiopll_divpmcck", },
++ .pp_mux_table = { 5, 9, },
+ .pp_count = 2,
+ .pp_chg_id = INT_MIN, },
+
+ { .n = "pdmc1_gclk",
+ .id = 69,
+ .r = { .max = 50000000, },
+- .pp = { "syspll_divpmcck", "baudpll_divpmcck", },
+- .pp_mux_table = { 5, 8, },
++ .pp = { "syspll_divpmcck", "audiopll_divpmcck", },
++ .pp_mux_table = { 5, 9, },
+ .pp_count = 2,
+ .pp_chg_id = INT_MIN, },
+
+@@ -982,16 +982,7 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
+ }
+
+ parent_names[0] = "cpupll_divpmcck";
+- hw = at91_clk_register_master_pres(regmap, "cpuck", 1, parent_names,
+- &mck0_layout, &mck0_characteristics,
+- &pmc_mck0_lock,
+- CLK_SET_RATE_PARENT, 0);
+- if (IS_ERR(hw))
+- goto err_free;
+-
+- sama7g5_pmc->chws[PMC_CPU] = hw;
+-
+- hw = at91_clk_register_master_div(regmap, "mck0", "cpuck",
++ hw = at91_clk_register_master_div(regmap, "mck0", "cpupll_divpmcck",
+ &mck0_layout, &mck0_characteristics,
+ &pmc_mck0_lock, 0);
+ if (IS_ERR(hw))
+diff --git a/drivers/clk/baikal-t1/ccu-div.c b/drivers/clk/baikal-t1/ccu-div.c
+index 4062092d67f90..a6642f3d33d44 100644
+--- a/drivers/clk/baikal-t1/ccu-div.c
++++ b/drivers/clk/baikal-t1/ccu-div.c
+@@ -34,6 +34,7 @@
+ #define CCU_DIV_CTL_CLKDIV_MASK(_width) \
+ GENMASK((_width) + CCU_DIV_CTL_CLKDIV_FLD - 1, CCU_DIV_CTL_CLKDIV_FLD)
+ #define CCU_DIV_CTL_LOCK_SHIFTED BIT(27)
++#define CCU_DIV_CTL_GATE_REF_BUF BIT(28)
+ #define CCU_DIV_CTL_LOCK_NORMAL BIT(31)
+
+ #define CCU_DIV_RST_DELAY_US 1
+@@ -170,6 +171,40 @@ static int ccu_div_gate_is_enabled(struct clk_hw *hw)
+ return !!(val & CCU_DIV_CTL_EN);
+ }
+
++static int ccu_div_buf_enable(struct clk_hw *hw)
++{
++ struct ccu_div *div = to_ccu_div(hw);
++ unsigned long flags;
++
++ spin_lock_irqsave(&div->lock, flags);
++ regmap_update_bits(div->sys_regs, div->reg_ctl,
++ CCU_DIV_CTL_GATE_REF_BUF, 0);
++ spin_unlock_irqrestore(&div->lock, flags);
++
++ return 0;
++}
++
++static void ccu_div_buf_disable(struct clk_hw *hw)
++{
++ struct ccu_div *div = to_ccu_div(hw);
++ unsigned long flags;
++
++ spin_lock_irqsave(&div->lock, flags);
++ regmap_update_bits(div->sys_regs, div->reg_ctl,
++ CCU_DIV_CTL_GATE_REF_BUF, CCU_DIV_CTL_GATE_REF_BUF);
++ spin_unlock_irqrestore(&div->lock, flags);
++}
++
++static int ccu_div_buf_is_enabled(struct clk_hw *hw)
++{
++ struct ccu_div *div = to_ccu_div(hw);
++ u32 val = 0;
++
++ regmap_read(div->sys_regs, div->reg_ctl, &val);
++
++ return !(val & CCU_DIV_CTL_GATE_REF_BUF);
++}
++
+ static unsigned long ccu_div_var_recalc_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+ {
+@@ -323,6 +358,7 @@ static const struct ccu_div_dbgfs_bit ccu_div_bits[] = {
+ CCU_DIV_DBGFS_BIT_ATTR("div_en", CCU_DIV_CTL_EN),
+ CCU_DIV_DBGFS_BIT_ATTR("div_rst", CCU_DIV_CTL_RST),
+ CCU_DIV_DBGFS_BIT_ATTR("div_bypass", CCU_DIV_CTL_SET_CLKDIV),
++ CCU_DIV_DBGFS_BIT_ATTR("div_buf", CCU_DIV_CTL_GATE_REF_BUF),
+ CCU_DIV_DBGFS_BIT_ATTR("div_lock", CCU_DIV_CTL_LOCK_NORMAL)
+ };
+
+@@ -441,6 +477,9 @@ static void ccu_div_var_debug_init(struct clk_hw *hw, struct dentry *dentry)
+ continue;
+ }
+
++ if (!strcmp("div_buf", name))
++ continue;
++
+ bits[didx] = ccu_div_bits[bidx];
+ bits[didx].div = div;
+
+@@ -477,6 +516,21 @@ static void ccu_div_gate_debug_init(struct clk_hw *hw, struct dentry *dentry)
+ &ccu_div_dbgfs_fixed_clkdiv_fops);
+ }
+
++static void ccu_div_buf_debug_init(struct clk_hw *hw, struct dentry *dentry)
++{
++ struct ccu_div *div = to_ccu_div(hw);
++ struct ccu_div_dbgfs_bit *bit;
++
++ bit = kmalloc(sizeof(*bit), GFP_KERNEL);
++ if (!bit)
++ return;
++
++ *bit = ccu_div_bits[3];
++ bit->div = div;
++ debugfs_create_file_unsafe(bit->name, ccu_div_dbgfs_mode, dentry, bit,
++ &ccu_div_dbgfs_bit_fops);
++}
++
+ static void ccu_div_fixed_debug_init(struct clk_hw *hw, struct dentry *dentry)
+ {
+ struct ccu_div *div = to_ccu_div(hw);
+@@ -489,6 +543,7 @@ static void ccu_div_fixed_debug_init(struct clk_hw *hw, struct dentry *dentry)
+
+ #define ccu_div_var_debug_init NULL
+ #define ccu_div_gate_debug_init NULL
++#define ccu_div_buf_debug_init NULL
+ #define ccu_div_fixed_debug_init NULL
+
+ #endif /* !CONFIG_DEBUG_FS */
+@@ -520,6 +575,13 @@ static const struct clk_ops ccu_div_gate_ops = {
+ .debug_init = ccu_div_gate_debug_init
+ };
+
++static const struct clk_ops ccu_div_buf_ops = {
++ .enable = ccu_div_buf_enable,
++ .disable = ccu_div_buf_disable,
++ .is_enabled = ccu_div_buf_is_enabled,
++ .debug_init = ccu_div_buf_debug_init
++};
++
+ static const struct clk_ops ccu_div_fixed_ops = {
+ .recalc_rate = ccu_div_fixed_recalc_rate,
+ .round_rate = ccu_div_fixed_round_rate,
+@@ -566,6 +628,8 @@ struct ccu_div *ccu_div_hw_register(const struct ccu_div_init_data *div_init)
+ } else if (div_init->type == CCU_DIV_GATE) {
+ hw_init.ops = &ccu_div_gate_ops;
+ div->divider = div_init->divider;
++ } else if (div_init->type == CCU_DIV_BUF) {
++ hw_init.ops = &ccu_div_buf_ops;
+ } else if (div_init->type == CCU_DIV_FIXED) {
+ hw_init.ops = &ccu_div_fixed_ops;
+ div->divider = div_init->divider;
+@@ -579,6 +643,7 @@ struct ccu_div *ccu_div_hw_register(const struct ccu_div_init_data *div_init)
+ goto err_free_div;
+ }
+ parent_data.fw_name = div_init->parent_name;
++ parent_data.name = div_init->parent_name;
+ hw_init.parent_data = &parent_data;
+ hw_init.num_parents = 1;
+
+diff --git a/drivers/clk/baikal-t1/ccu-div.h b/drivers/clk/baikal-t1/ccu-div.h
+index 795665caefbdc..4eb49ff4803c6 100644
+--- a/drivers/clk/baikal-t1/ccu-div.h
++++ b/drivers/clk/baikal-t1/ccu-div.h
+@@ -13,6 +13,14 @@
+ #include <linux/bits.h>
+ #include <linux/of.h>
+
++/*
++ * CCU Divider private clock IDs
++ * @CCU_SYS_SATA_CLK: CCU SATA internal clock
++ * @CCU_SYS_XGMAC_CLK: CCU XGMAC internal clock
++ */
++#define CCU_SYS_SATA_CLK -1
++#define CCU_SYS_XGMAC_CLK -2
++
+ /*
+ * CCU Divider private flags
+ * @CCU_DIV_SKIP_ONE: Due to some reason divider can't be set to 1.
+@@ -31,11 +39,13 @@
+ * enum ccu_div_type - CCU Divider types
+ * @CCU_DIV_VAR: Clocks gate with variable divider.
+ * @CCU_DIV_GATE: Clocks gate with fixed divider.
++ * @CCU_DIV_BUF: Clock gate with no divider.
+ * @CCU_DIV_FIXED: Ungateable clock with fixed divider.
+ */
+ enum ccu_div_type {
+ CCU_DIV_VAR,
+ CCU_DIV_GATE,
++ CCU_DIV_BUF,
+ CCU_DIV_FIXED
+ };
+
+diff --git a/drivers/clk/baikal-t1/clk-ccu-div.c b/drivers/clk/baikal-t1/clk-ccu-div.c
+index f141fda12b09a..90f4fda406ee6 100644
+--- a/drivers/clk/baikal-t1/clk-ccu-div.c
++++ b/drivers/clk/baikal-t1/clk-ccu-div.c
+@@ -76,6 +76,16 @@
+ .divider = _divider \
+ }
+
++#define CCU_DIV_BUF_INFO(_id, _name, _pname, _base, _flags) \
++ { \
++ .id = _id, \
++ .name = _name, \
++ .parent_name = _pname, \
++ .base = _base, \
++ .type = CCU_DIV_BUF, \
++ .flags = _flags \
++ }
++
+ #define CCU_DIV_FIXED_INFO(_id, _name, _pname, _divider) \
+ { \
+ .id = _id, \
+@@ -188,11 +198,14 @@ static const struct ccu_div_rst_map axi_rst_map[] = {
+ * for the SoC devices registers IO-operations.
+ */
+ static const struct ccu_div_info sys_info[] = {
+- CCU_DIV_VAR_INFO(CCU_SYS_SATA_REF_CLK, "sys_sata_ref_clk",
++ CCU_DIV_VAR_INFO(CCU_SYS_SATA_CLK, "sys_sata_clk",
+ "sata_clk", CCU_SYS_SATA_REF_BASE, 4,
+ CLK_SET_RATE_GATE,
+ CCU_DIV_SKIP_ONE | CCU_DIV_LOCK_SHIFTED |
+ CCU_DIV_RESET_DOMAIN),
++ CCU_DIV_BUF_INFO(CCU_SYS_SATA_REF_CLK, "sys_sata_ref_clk",
++ "sys_sata_clk", CCU_SYS_SATA_REF_BASE,
++ CLK_SET_RATE_PARENT),
+ CCU_DIV_VAR_INFO(CCU_SYS_APB_CLK, "sys_apb_clk",
+ "pcie_clk", CCU_SYS_APB_BASE, 5,
+ CLK_IS_CRITICAL, CCU_DIV_RESET_DOMAIN),
+@@ -204,10 +217,12 @@ static const struct ccu_div_info sys_info[] = {
+ "eth_clk", CCU_SYS_GMAC1_BASE, 5),
+ CCU_DIV_FIXED_INFO(CCU_SYS_GMAC1_PTP_CLK, "sys_gmac1_ptp_clk",
+ "eth_clk", 10),
+- CCU_DIV_GATE_INFO(CCU_SYS_XGMAC_REF_CLK, "sys_xgmac_ref_clk",
+- "eth_clk", CCU_SYS_XGMAC_BASE, 8),
++ CCU_DIV_GATE_INFO(CCU_SYS_XGMAC_CLK, "sys_xgmac_clk",
++ "eth_clk", CCU_SYS_XGMAC_BASE, 1),
++ CCU_DIV_FIXED_INFO(CCU_SYS_XGMAC_REF_CLK, "sys_xgmac_ref_clk",
++ "sys_xgmac_clk", 8),
+ CCU_DIV_FIXED_INFO(CCU_SYS_XGMAC_PTP_CLK, "sys_xgmac_ptp_clk",
+- "eth_clk", 10),
++ "sys_xgmac_clk", 8),
+ CCU_DIV_GATE_INFO(CCU_SYS_USB_CLK, "sys_usb_clk",
+ "eth_clk", CCU_SYS_USB_BASE, 10),
+ CCU_DIV_VAR_INFO(CCU_SYS_PVT_CLK, "sys_pvt_clk",
+@@ -396,6 +411,9 @@ static int ccu_div_clk_register(struct ccu_div_data *data)
+ init.base = info->base;
+ init.sys_regs = data->sys_regs;
+ init.divider = info->divider;
++ } else if (init.type == CCU_DIV_BUF) {
++ init.base = info->base;
++ init.sys_regs = data->sys_regs;
+ } else {
+ init.divider = info->divider;
+ }
+diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
+index a254512965eb8..141ce19bc5700 100644
+--- a/drivers/clk/bcm/clk-bcm2835.c
++++ b/drivers/clk/bcm/clk-bcm2835.c
+@@ -30,6 +30,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/delay.h>
+ #include <linux/io.h>
++#include <linux/math.h>
+ #include <linux/module.h>
+ #include <linux/of_device.h>
+ #include <linux/platform_device.h>
+@@ -502,6 +503,8 @@ struct bcm2835_clock_data {
+ bool low_jitter;
+
+ u32 tcnt_mux;
++
++ bool round_up;
+ };
+
+ struct bcm2835_gate_data {
+@@ -932,8 +935,7 @@ static int bcm2835_clock_is_on(struct clk_hw *hw)
+
+ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
+ unsigned long rate,
+- unsigned long parent_rate,
+- bool round_up)
++ unsigned long parent_rate)
+ {
+ struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw);
+ const struct bcm2835_clock_data *data = clock->data;
+@@ -945,10 +947,6 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
+
+ rem = do_div(temp, rate);
+ div = temp;
+-
+- /* Round up and mask off the unused bits */
+- if (round_up && ((div & unused_frac_mask) != 0 || rem != 0))
+- div += unused_frac_mask + 1;
+ div &= ~unused_frac_mask;
+
+ /* different clamping limits apply for a mash clock */
+@@ -972,9 +970,9 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
+ return div;
+ }
+
+-static long bcm2835_clock_rate_from_divisor(struct bcm2835_clock *clock,
+- unsigned long parent_rate,
+- u32 div)
++static unsigned long bcm2835_clock_rate_from_divisor(struct bcm2835_clock *clock,
++ unsigned long parent_rate,
++ u32 div)
+ {
+ const struct bcm2835_clock_data *data = clock->data;
+ u64 temp;
+@@ -999,12 +997,34 @@ static long bcm2835_clock_rate_from_divisor(struct bcm2835_clock *clock,
+ return temp;
+ }
+
++static unsigned long bcm2835_round_rate(unsigned long rate)
++{
++ unsigned long scaler;
++ unsigned long limit;
++
++ limit = rate / 100000;
++
++ scaler = 1;
++ while (scaler < limit)
++ scaler *= 10;
++
++ /*
++ * If increasing a clock by less than 0.1% changes it
++ * from ..999.. to ..000.., round up.
++ */
++ if ((rate + scaler - 1) / scaler % 1000 == 0)
++ rate = roundup(rate, scaler);
++
++ return rate;
++}
++
+ static unsigned long bcm2835_clock_get_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+ {
+ struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw);
+ struct bcm2835_cprman *cprman = clock->cprman;
+ const struct bcm2835_clock_data *data = clock->data;
++ unsigned long rate;
+ u32 div;
+
+ if (data->int_bits == 0 && data->frac_bits == 0)
+@@ -1012,7 +1032,12 @@ static unsigned long bcm2835_clock_get_rate(struct clk_hw *hw,
+
+ div = cprman_read(cprman, data->div_reg);
+
+- return bcm2835_clock_rate_from_divisor(clock, parent_rate, div);
++ rate = bcm2835_clock_rate_from_divisor(clock, parent_rate, div);
++
++ if (data->round_up)
++ rate = bcm2835_round_rate(rate);
++
++ return rate;
+ }
+
+ static void bcm2835_clock_wait_busy(struct bcm2835_clock *clock)
+@@ -1079,7 +1104,7 @@ static int bcm2835_clock_set_rate(struct clk_hw *hw,
+ struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw);
+ struct bcm2835_cprman *cprman = clock->cprman;
+ const struct bcm2835_clock_data *data = clock->data;
+- u32 div = bcm2835_clock_choose_div(hw, rate, parent_rate, false);
++ u32 div = bcm2835_clock_choose_div(hw, rate, parent_rate);
+ u32 ctl;
+
+ spin_lock(&cprman->regs_lock);
+@@ -1130,7 +1155,7 @@ static unsigned long bcm2835_clock_choose_div_and_prate(struct clk_hw *hw,
+
+ if (!(BIT(parent_idx) & data->set_rate_parent)) {
+ *prate = clk_hw_get_rate(parent);
+- *div = bcm2835_clock_choose_div(hw, rate, *prate, true);
++ *div = bcm2835_clock_choose_div(hw, rate, *prate);
+
+ *avgrate = bcm2835_clock_rate_from_divisor(clock, *prate, *div);
+
+@@ -1216,7 +1241,7 @@ static int bcm2835_clock_determine_rate(struct clk_hw *hw,
+ rate = bcm2835_clock_choose_div_and_prate(hw, i, req->rate,
+ &div, &prate,
+ &avgrate);
+- if (rate > best_rate && rate <= req->rate) {
++ if (abs(req->rate - rate) < abs(req->rate - best_rate)) {
+ best_parent = parent;
+ best_prate = prate;
+ best_rate = rate;
+@@ -1790,7 +1815,7 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
+ .load_mask = CM_PLLC_LOADPER,
+ .hold_mask = CM_PLLC_HOLDPER,
+ .fixed_divider = 1,
+- .flags = CLK_SET_RATE_PARENT),
++ .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT),
+
+ /*
+ * PLLD is the display PLL, used to drive DSI display panels.
+@@ -2149,7 +2174,8 @@ static const struct bcm2835_clk_desc clk_desc_array[] = {
+ .div_reg = CM_UARTDIV,
+ .int_bits = 10,
+ .frac_bits = 12,
+- .tcnt_mux = 28),
++ .tcnt_mux = 28,
++ .round_up = true),
+
+ /* TV encoder clock. Only operating frequency is 108Mhz. */
+ [BCM2835_CLOCK_VEC] = REGISTER_PER_CLK(
+diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
+index 33da30f99c79b..d39c44b61c523 100644
+--- a/drivers/clk/bcm/clk-iproc-pll.c
++++ b/drivers/clk/bcm/clk-iproc-pll.c
+@@ -736,6 +736,7 @@ void iproc_pll_clk_setup(struct device_node *node,
+ const char *parent_name;
+ struct iproc_clk *iclk_array;
+ struct clk_hw_onecell_data *clk_data;
++ const char *clk_name;
+
+ if (WARN_ON(!pll_ctrl) || WARN_ON(!clk_ctrl))
+ return;
+@@ -783,7 +784,12 @@ void iproc_pll_clk_setup(struct device_node *node,
+ iclk = &iclk_array[0];
+ iclk->pll = pll;
+
+- init.name = node->name;
++ ret = of_property_read_string_index(node, "clock-output-names",
++ 0, &clk_name);
++ if (WARN_ON(ret))
++ goto err_pll_register;
++
++ init.name = clk_name;
+ init.ops = &iproc_pll_ops;
+ init.flags = 0;
+ parent_name = of_clk_get_parent_name(node, 0);
+@@ -803,13 +809,11 @@ void iproc_pll_clk_setup(struct device_node *node,
+ goto err_pll_register;
+
+ clk_data->hws[0] = &iclk->hw;
++ parent_name = clk_name;
+
+ /* now initialize and register all leaf clocks */
+ for (i = 1; i < num_clks; i++) {
+- const char *clk_name;
+-
+ memset(&init, 0, sizeof(init));
+- parent_name = node->name;
+
+ ret = of_property_read_string_index(node, "clock-output-names",
+ i, &clk_name);
+diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c
+index dd3b71eafabf3..56c5166f841ae 100644
+--- a/drivers/clk/bcm/clk-raspberrypi.c
++++ b/drivers/clk/bcm/clk-raspberrypi.c
+@@ -139,7 +139,7 @@ static unsigned long raspberrypi_fw_get_rate(struct clk_hw *hw,
+ ret = raspberrypi_clock_property(rpi->firmware, data,
+ RPI_FIRMWARE_GET_CLOCK_RATE, &val);
+ if (ret)
+- return ret;
++ return 0;
+
+ return val;
+ }
+@@ -156,7 +156,7 @@ static int raspberrypi_fw_set_rate(struct clk_hw *hw, unsigned long rate,
+ ret = raspberrypi_clock_property(rpi->firmware, data,
+ RPI_FIRMWARE_SET_CLOCK_RATE, &_rate);
+ if (ret)
+- dev_err_ratelimited(rpi->dev, "Failed to change %s frequency: %d",
++ dev_err_ratelimited(rpi->dev, "Failed to change %s frequency: %d\n",
+ clk_hw_get_name(hw), ret);
+
+ return ret;
+@@ -208,7 +208,7 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi,
+ RPI_FIRMWARE_GET_MIN_CLOCK_RATE,
+ &min_rate);
+ if (ret) {
+- dev_err(rpi->dev, "Failed to get clock %d min freq: %d",
++ dev_err(rpi->dev, "Failed to get clock %d min freq: %d\n",
+ id, ret);
+ return ERR_PTR(ret);
+ }
+@@ -251,8 +251,13 @@ static int raspberrypi_discover_clocks(struct raspberrypi_clk *rpi,
+ struct rpi_firmware_get_clocks_response *clks;
+ int ret;
+
++ /*
++ * The firmware doesn't guarantee that the last element of
++ * RPI_FIRMWARE_GET_CLOCKS is zeroed. So allocate an additional
++ * zero element as sentinel.
++ */
+ clks = devm_kcalloc(rpi->dev,
+- sizeof(*clks), RPI_FIRMWARE_NUM_CLK_ID,
++ RPI_FIRMWARE_NUM_CLK_ID + 1, sizeof(*clks),
+ GFP_KERNEL);
+ if (!clks)
+ return -ENOMEM;
+diff --git a/drivers/clk/berlin/bg2.c b/drivers/clk/berlin/bg2.c
+index bccdfa00fd373..67a9edbba29c4 100644
+--- a/drivers/clk/berlin/bg2.c
++++ b/drivers/clk/berlin/bg2.c
+@@ -500,12 +500,15 @@ static void __init berlin2_clock_setup(struct device_node *np)
+ int n, ret;
+
+ clk_data = kzalloc(struct_size(clk_data, hws, MAX_CLKS), GFP_KERNEL);
+- if (!clk_data)
++ if (!clk_data) {
++ of_node_put(parent_np);
+ return;
++ }
+ clk_data->num = MAX_CLKS;
+ hws = clk_data->hws;
+
+ gbase = of_iomap(parent_np, 0);
++ of_node_put(parent_np);
+ if (!gbase)
+ return;
+
+diff --git a/drivers/clk/berlin/bg2q.c b/drivers/clk/berlin/bg2q.c
+index e9518d35f262e..dd2784bb75b64 100644
+--- a/drivers/clk/berlin/bg2q.c
++++ b/drivers/clk/berlin/bg2q.c
+@@ -286,19 +286,23 @@ static void __init berlin2q_clock_setup(struct device_node *np)
+ int n, ret;
+
+ clk_data = kzalloc(struct_size(clk_data, hws, MAX_CLKS), GFP_KERNEL);
+- if (!clk_data)
++ if (!clk_data) {
++ of_node_put(parent_np);
+ return;
++ }
+ clk_data->num = MAX_CLKS;
+ hws = clk_data->hws;
+
+ gbase = of_iomap(parent_np, 0);
+ if (!gbase) {
++ of_node_put(parent_np);
+ pr_err("%pOF: Unable to map global base\n", np);
+ return;
+ }
+
+ /* BG2Q CPU PLL is not part of global registers */
+ cpupll_base = of_iomap(parent_np, 1);
++ of_node_put(parent_np);
+ if (!cpupll_base) {
+ pr_err("%pOF: Unable to map cpupll base\n", np);
+ iounmap(gbase);
+diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c
+index bc3be5f3eae15..9c3305bcb27ae 100644
+--- a/drivers/clk/clk-ast2600.c
++++ b/drivers/clk/clk-ast2600.c
+@@ -51,6 +51,8 @@ static DEFINE_SPINLOCK(aspeed_g6_clk_lock);
+ static struct clk_hw_onecell_data *aspeed_g6_clk_data;
+
+ static void __iomem *scu_g6_base;
++/* AST2600 revision: A0, A1, A2, etc */
++static u8 soc_rev;
+
+ /*
+ * Clocks marked with CLK_IS_CRITICAL:
+@@ -191,9 +193,8 @@ static struct clk_hw *ast2600_calc_pll(const char *name, u32 val)
+ static struct clk_hw *ast2600_calc_apll(const char *name, u32 val)
+ {
+ unsigned int mult, div;
+- u32 chip_id = readl(scu_g6_base + ASPEED_G6_SILICON_REV);
+
+- if (((chip_id & CHIP_REVISION_ID) >> 16) >= 2) {
++ if (soc_rev >= 2) {
+ if (val & BIT(24)) {
+ /* Pass through mode */
+ mult = div = 1;
+@@ -621,7 +622,7 @@ static int aspeed_g6_clk_probe(struct platform_device *pdev)
+ regmap_write(map, 0x308, 0x12000); /* 3x3 = 9 */
+
+ /* P-Bus (BCLK) clock divider */
+- hw = clk_hw_register_divider_table(dev, "bclk", "hpll", 0,
++ hw = clk_hw_register_divider_table(dev, "bclk", "epll", 0,
+ scu_g6_base + ASPEED_G6_CLK_SELECTION1, 20, 3, 0,
+ ast2600_div_table,
+ &aspeed_g6_clk_lock);
+@@ -707,7 +708,7 @@ static const u32 ast2600_a1_axi_ahb200_tbl[] = {
+ static void __init aspeed_g6_cc(struct regmap *map)
+ {
+ struct clk_hw *hw;
+- u32 val, div, divbits, chip_id, axi_div, ahb_div;
++ u32 val, div, divbits, axi_div, ahb_div;
+
+ clk_hw_register_fixed_rate(NULL, "clkin", NULL, 0, 25000000);
+
+@@ -738,8 +739,7 @@ static void __init aspeed_g6_cc(struct regmap *map)
+ axi_div = 2;
+
+ divbits = (val >> 11) & 0x3;
+- regmap_read(map, ASPEED_G6_SILICON_REV, &chip_id);
+- if (chip_id & BIT(16)) {
++ if (soc_rev >= 1) {
+ if (!divbits) {
+ ahb_div = ast2600_a1_axi_ahb200_tbl[(val >> 8) & 0x3];
+ if (val & BIT(16))
+@@ -784,6 +784,8 @@ static void __init aspeed_g6_cc_init(struct device_node *np)
+ if (!scu_g6_base)
+ return;
+
++ soc_rev = (readl(scu_g6_base + ASPEED_G6_SILICON_REV) & CHIP_REVISION_ID) >> 16;
++
+ aspeed_g6_clk_data = kzalloc(struct_size(aspeed_g6_clk_data, hws,
+ ASPEED_G6_NUM_CLKS), GFP_KERNEL);
+ if (!aspeed_g6_clk_data)
+diff --git a/drivers/clk/clk-bm1880.c b/drivers/clk/clk-bm1880.c
+index e6d6599d310a1..fad78a22218e8 100644
+--- a/drivers/clk/clk-bm1880.c
++++ b/drivers/clk/clk-bm1880.c
+@@ -522,14 +522,6 @@ static struct clk_hw *bm1880_clk_register_pll(struct bm1880_pll_hw_clock *pll_cl
+ return hw;
+ }
+
+-static void bm1880_clk_unregister_pll(struct clk_hw *hw)
+-{
+- struct bm1880_pll_hw_clock *pll_hw = to_bm1880_pll_clk(hw);
+-
+- clk_hw_unregister(hw);
+- kfree(pll_hw);
+-}
+-
+ static int bm1880_clk_register_plls(struct bm1880_pll_hw_clock *clks,
+ int num_clks,
+ struct bm1880_clock_data *data)
+@@ -555,7 +547,7 @@ static int bm1880_clk_register_plls(struct bm1880_pll_hw_clock *clks,
+
+ err_clk:
+ while (i--)
+- bm1880_clk_unregister_pll(data->hw_data.hws[clks[i].pll.id]);
++ clk_hw_unregister(data->hw_data.hws[clks[i].pll.id]);
+
+ return PTR_ERR(hw);
+ }
+@@ -695,14 +687,6 @@ static struct clk_hw *bm1880_clk_register_div(struct bm1880_div_hw_clock *div_cl
+ return hw;
+ }
+
+-static void bm1880_clk_unregister_div(struct clk_hw *hw)
+-{
+- struct bm1880_div_hw_clock *div_hw = to_bm1880_div_clk(hw);
+-
+- clk_hw_unregister(hw);
+- kfree(div_hw);
+-}
+-
+ static int bm1880_clk_register_divs(struct bm1880_div_hw_clock *clks,
+ int num_clks,
+ struct bm1880_clock_data *data)
+@@ -729,7 +713,7 @@ static int bm1880_clk_register_divs(struct bm1880_div_hw_clock *clks,
+
+ err_clk:
+ while (i--)
+- bm1880_clk_unregister_div(data->hw_data.hws[clks[i].div.id]);
++ clk_hw_unregister(data->hw_data.hws[clks[i].div.id]);
+
+ return PTR_ERR(hw);
+ }
+diff --git a/drivers/clk/clk-cdce925.c b/drivers/clk/clk-cdce925.c
+index 308b353815e17..470d91d7314db 100644
+--- a/drivers/clk/clk-cdce925.c
++++ b/drivers/clk/clk-cdce925.c
+@@ -705,6 +705,10 @@ static int cdce925_probe(struct i2c_client *client,
+ for (i = 0; i < data->chip_info->num_plls; ++i) {
+ pll_clk_name[i] = kasprintf(GFP_KERNEL, "%pOFn.pll%d",
+ client->dev.of_node, i);
++ if (!pll_clk_name[i]) {
++ err = -ENOMEM;
++ goto error;
++ }
+ init.name = pll_clk_name[i];
+ data->pll[i].chip = data;
+ data->pll[i].hw.init = &init;
+@@ -746,6 +750,10 @@ static int cdce925_probe(struct i2c_client *client,
+ init.num_parents = 1;
+ init.parent_names = &parent_name; /* Mux Y1 to input */
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.Y1", client->dev.of_node);
++ if (!init.name) {
++ err = -ENOMEM;
++ goto error;
++ }
+ data->clk[0].chip = data;
+ data->clk[0].hw.init = &init;
+ data->clk[0].index = 0;
+@@ -764,6 +772,10 @@ static int cdce925_probe(struct i2c_client *client,
+ for (i = 1; i < data->chip_info->num_outputs; ++i) {
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.Y%d",
+ client->dev.of_node, i+1);
++ if (!init.name) {
++ err = -ENOMEM;
++ goto error;
++ }
+ data->clk[i].chip = data;
+ data->clk[i].hw.init = &init;
+ data->clk[i].index = i;
+diff --git a/drivers/clk/clk-clps711x.c b/drivers/clk/clk-clps711x.c
+index a2c6486ef1708..f8417ee2961aa 100644
+--- a/drivers/clk/clk-clps711x.c
++++ b/drivers/clk/clk-clps711x.c
+@@ -28,11 +28,13 @@ static const struct clk_div_table spi_div_table[] = {
+ { .val = 1, .div = 8, },
+ { .val = 2, .div = 2, },
+ { .val = 3, .div = 1, },
++ { /* sentinel */ }
+ };
+
+ static const struct clk_div_table timer_div_table[] = {
+ { .val = 0, .div = 256, },
+ { .val = 1, .div = 1, },
++ { /* sentinel */ }
+ };
+
+ struct clps711x_clk {
+diff --git a/drivers/clk/clk-conf.c b/drivers/clk/clk-conf.c
+index 2ef819606c417..1a4e6340f95ce 100644
+--- a/drivers/clk/clk-conf.c
++++ b/drivers/clk/clk-conf.c
+@@ -33,9 +33,12 @@ static int __set_clk_parents(struct device_node *node, bool clk_supplier)
+ else
+ return rc;
+ }
+- if (clkspec.np == node && !clk_supplier)
++ if (clkspec.np == node && !clk_supplier) {
++ of_node_put(clkspec.np);
+ return 0;
++ }
+ pclk = of_clk_get_from_provider(&clkspec);
++ of_node_put(clkspec.np);
+ if (IS_ERR(pclk)) {
+ if (PTR_ERR(pclk) != -EPROBE_DEFER)
+ pr_warn("clk: couldn't get parent clock %d for %pOF\n",
+@@ -48,10 +51,12 @@ static int __set_clk_parents(struct device_node *node, bool clk_supplier)
+ if (rc < 0)
+ goto err;
+ if (clkspec.np == node && !clk_supplier) {
++ of_node_put(clkspec.np);
+ rc = 0;
+ goto err;
+ }
+ clk = of_clk_get_from_provider(&clkspec);
++ of_node_put(clkspec.np);
+ if (IS_ERR(clk)) {
+ if (PTR_ERR(clk) != -EPROBE_DEFER)
+ pr_warn("clk: couldn't get assigned clock %d for %pOF\n",
+@@ -93,10 +98,13 @@ static int __set_clk_rates(struct device_node *node, bool clk_supplier)
+ else
+ return rc;
+ }
+- if (clkspec.np == node && !clk_supplier)
++ if (clkspec.np == node && !clk_supplier) {
++ of_node_put(clkspec.np);
+ return 0;
++ }
+
+ clk = of_clk_get_from_provider(&clkspec);
++ of_node_put(clkspec.np);
+ if (IS_ERR(clk)) {
+ if (PTR_ERR(clk) != -EPROBE_DEFER)
+ pr_warn("clk: couldn't get clock %d for %pOF\n",
+diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
+index f9d5b73343417..737aa70e2cb3d 100644
+--- a/drivers/clk/clk-devres.c
++++ b/drivers/clk/clk-devres.c
+@@ -4,42 +4,101 @@
+ #include <linux/export.h>
+ #include <linux/gfp.h>
+
++struct devm_clk_state {
++ struct clk *clk;
++ void (*exit)(struct clk *clk);
++};
++
+ static void devm_clk_release(struct device *dev, void *res)
+ {
+- clk_put(*(struct clk **)res);
++ struct devm_clk_state *state = res;
++
++ if (state->exit)
++ state->exit(state->clk);
++
++ clk_put(state->clk);
+ }
+
+-struct clk *devm_clk_get(struct device *dev, const char *id)
++static struct clk *__devm_clk_get(struct device *dev, const char *id,
++ struct clk *(*get)(struct device *dev, const char *id),
++ int (*init)(struct clk *clk),
++ void (*exit)(struct clk *clk))
+ {
+- struct clk **ptr, *clk;
++ struct devm_clk_state *state;
++ struct clk *clk;
++ int ret;
+
+- ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
+- if (!ptr)
++ state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL);
++ if (!state)
+ return ERR_PTR(-ENOMEM);
+
+- clk = clk_get(dev, id);
+- if (!IS_ERR(clk)) {
+- *ptr = clk;
+- devres_add(dev, ptr);
+- } else {
+- devres_free(ptr);
++ clk = get(dev, id);
++ if (IS_ERR(clk)) {
++ ret = PTR_ERR(clk);
++ goto err_clk_get;
+ }
+
++ if (init) {
++ ret = init(clk);
++ if (ret)
++ goto err_clk_init;
++ }
++
++ state->clk = clk;
++ state->exit = exit;
++
++ devres_add(dev, state);
++
+ return clk;
++
++err_clk_init:
++
++ clk_put(clk);
++err_clk_get:
++
++ devres_free(state);
++ return ERR_PTR(ret);
++}
++
++struct clk *devm_clk_get(struct device *dev, const char *id)
++{
++ return __devm_clk_get(dev, id, clk_get, NULL, NULL);
+ }
+ EXPORT_SYMBOL(devm_clk_get);
+
+-struct clk *devm_clk_get_optional(struct device *dev, const char *id)
++struct clk *devm_clk_get_prepared(struct device *dev, const char *id)
+ {
+- struct clk *clk = devm_clk_get(dev, id);
++ return __devm_clk_get(dev, id, clk_get, clk_prepare, clk_unprepare);
++}
++EXPORT_SYMBOL_GPL(devm_clk_get_prepared);
+
+- if (clk == ERR_PTR(-ENOENT))
+- return NULL;
++struct clk *devm_clk_get_enabled(struct device *dev, const char *id)
++{
++ return __devm_clk_get(dev, id, clk_get,
++ clk_prepare_enable, clk_disable_unprepare);
++}
++EXPORT_SYMBOL_GPL(devm_clk_get_enabled);
+
+- return clk;
++struct clk *devm_clk_get_optional(struct device *dev, const char *id)
++{
++ return __devm_clk_get(dev, id, clk_get_optional, NULL, NULL);
+ }
+ EXPORT_SYMBOL(devm_clk_get_optional);
+
++struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id)
++{
++ return __devm_clk_get(dev, id, clk_get_optional,
++ clk_prepare, clk_unprepare);
++}
++EXPORT_SYMBOL_GPL(devm_clk_get_optional_prepared);
++
++struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id)
++{
++ return __devm_clk_get(dev, id, clk_get_optional,
++ clk_prepare_enable, clk_disable_unprepare);
++}
++EXPORT_SYMBOL_GPL(devm_clk_get_optional_enabled);
++
+ struct clk_bulk_devres {
+ struct clk_bulk_data *clks;
+ int num_clks;
+@@ -146,18 +205,19 @@ EXPORT_SYMBOL(devm_clk_put);
+ struct clk *devm_get_clk_from_child(struct device *dev,
+ struct device_node *np, const char *con_id)
+ {
+- struct clk **ptr, *clk;
++ struct devm_clk_state *state;
++ struct clk *clk;
+
+- ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
+- if (!ptr)
++ state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL);
++ if (!state)
+ return ERR_PTR(-ENOMEM);
+
+ clk = of_clk_get_by_name(np, con_id);
+ if (!IS_ERR(clk)) {
+- *ptr = clk;
+- devres_add(dev, ptr);
++ state->clk = clk;
++ devres_add(dev, state);
+ } else {
+- devres_free(ptr);
++ devres_free(state);
+ }
+
+ return clk;
+diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c
+index 78d5ea669fea7..2fe36f579ac5e 100644
+--- a/drivers/clk/clk-oxnas.c
++++ b/drivers/clk/clk-oxnas.c
+@@ -207,7 +207,7 @@ static const struct of_device_id oxnas_stdclk_dt_ids[] = {
+
+ static int oxnas_stdclk_probe(struct platform_device *pdev)
+ {
+- struct device_node *np = pdev->dev.of_node;
++ struct device_node *np = pdev->dev.of_node, *parent_np;
+ const struct oxnas_stdclk_data *data;
+ const struct of_device_id *id;
+ struct regmap *regmap;
+@@ -219,7 +219,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev)
+ return -ENODEV;
+ data = id->data;
+
+- regmap = syscon_node_to_regmap(of_get_parent(np));
++ parent_np = of_get_parent(np);
++ regmap = syscon_node_to_regmap(parent_np);
++ of_node_put(parent_np);
+ if (IS_ERR(regmap)) {
+ dev_err(&pdev->dev, "failed to have parent regmap\n");
+ return PTR_ERR(regmap);
+diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
+index 88898b97a4431..5eddb9f0d6bdb 100644
+--- a/drivers/clk/clk-qoriq.c
++++ b/drivers/clk/clk-qoriq.c
+@@ -1063,8 +1063,13 @@ static void __init _clockgen_init(struct device_node *np, bool legacy);
+ */
+ static void __init legacy_init_clockgen(struct device_node *np)
+ {
+- if (!clockgen.node)
+- _clockgen_init(of_get_parent(np), true);
++ if (!clockgen.node) {
++ struct device_node *parent_np;
++
++ parent_np = of_get_parent(np);
++ _clockgen_init(parent_np, true);
++ of_node_put(parent_np);
++ }
+ }
+
+ /* Legacy node */
+@@ -1159,6 +1164,7 @@ static struct clk * __init create_sysclk(const char *name)
+ sysclk = of_get_child_by_name(clockgen.node, "sysclk");
+ if (sysclk) {
+ clk = sysclk_from_fixed(sysclk, name);
++ of_node_put(sysclk);
+ if (!IS_ERR(clk))
+ return clk;
+ }
+diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c
+index 57ae183982d8c..91a6bc74ebd5a 100644
+--- a/drivers/clk/clk-si5341.c
++++ b/drivers/clk/clk-si5341.c
+@@ -798,6 +798,15 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw,
+ u32 r_divider;
+ u8 r[3];
+
++ err = regmap_read(output->data->regmap,
++ SI5341_OUT_CONFIG(output), &val);
++ if (err < 0)
++ return err;
++
++ /* If SI5341_OUT_CFG_RDIV_FORCE2 is set, r_divider is 2 */
++ if (val & SI5341_OUT_CFG_RDIV_FORCE2)
++ return parent_rate / 2;
++
+ err = regmap_bulk_read(output->data->regmap,
+ SI5341_OUT_R_REG(output), r, 3);
+ if (err < 0)
+@@ -814,13 +823,6 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw,
+ r_divider += 1;
+ r_divider <<= 1;
+
+- err = regmap_read(output->data->regmap,
+- SI5341_OUT_CONFIG(output), &val);
+- if (err < 0)
+- return err;
+-
+- if (val & SI5341_OUT_CFG_RDIV_FORCE2)
+- r_divider = 2;
+
+ return parent_rate / r_divider;
+ }
+@@ -1552,7 +1554,7 @@ static int si5341_probe(struct i2c_client *client,
+ struct clk_init_data init;
+ struct clk *input;
+ const char *root_clock_name;
+- const char *synth_clock_names[SI5341_NUM_SYNTH];
++ const char *synth_clock_names[SI5341_NUM_SYNTH] = { NULL };
+ int err;
+ unsigned int i;
+ struct clk_si5341_output_config config[SI5341_MAX_NUM_OUTPUTS];
+@@ -1696,6 +1698,10 @@ static int si5341_probe(struct i2c_client *client,
+ for (i = 0; i < data->num_synth; ++i) {
+ synth_clock_names[i] = devm_kasprintf(&client->dev, GFP_KERNEL,
+ "%s.N%u", client->dev.of_node->name, i);
++ if (!synth_clock_names[i]) {
++ err = -ENOMEM;
++ goto free_clk_names;
++ }
+ init.name = synth_clock_names[i];
+ data->synth[i].index = i;
+ data->synth[i].data = data;
+@@ -1704,6 +1710,7 @@ static int si5341_probe(struct i2c_client *client,
+ if (err) {
+ dev_err(&client->dev,
+ "synth N%u registration failed\n", i);
++ goto free_clk_names;
+ }
+ }
+
+@@ -1713,6 +1720,10 @@ static int si5341_probe(struct i2c_client *client,
+ for (i = 0; i < data->num_outputs; ++i) {
+ init.name = kasprintf(GFP_KERNEL, "%s.%d",
+ client->dev.of_node->name, i);
++ if (!init.name) {
++ err = -ENOMEM;
++ goto free_clk_names;
++ }
+ init.flags = config[i].synth_master ? CLK_SET_RATE_PARENT : 0;
+ data->clk[i].index = i;
+ data->clk[i].data = data;
+@@ -1734,17 +1745,17 @@ static int si5341_probe(struct i2c_client *client,
+ if (err) {
+ dev_err(&client->dev,
+ "output %u registration failed\n", i);
+- goto cleanup;
++ goto free_clk_names;
+ }
+ if (config[i].always_on)
+ clk_prepare(data->clk[i].hw.clk);
+ }
+
+- err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get,
++ err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get,
+ data);
+ if (err) {
+ dev_err(&client->dev, "unable to add clk provider\n");
+- goto cleanup;
++ goto free_clk_names;
+ }
+
+ if (initialization_required) {
+@@ -1752,11 +1763,11 @@ static int si5341_probe(struct i2c_client *client,
+ regcache_cache_only(data->regmap, false);
+ err = regcache_sync(data->regmap);
+ if (err < 0)
+- goto cleanup;
++ goto free_clk_names;
+
+ err = si5341_finalize_defaults(data);
+ if (err < 0)
+- goto cleanup;
++ goto free_clk_names;
+ }
+
+ /* wait for device to report input clock present and PLL lock */
+@@ -1765,32 +1776,31 @@ static int si5341_probe(struct i2c_client *client,
+ 10000, 250000);
+ if (err) {
+ dev_err(&client->dev, "Error waiting for input clock or PLL lock\n");
+- goto cleanup;
++ goto free_clk_names;
+ }
+
+ /* clear sticky alarm bits from initialization */
+ err = regmap_write(data->regmap, SI5341_STATUS_STICKY, 0);
+ if (err) {
+ dev_err(&client->dev, "unable to clear sticky status\n");
+- goto cleanup;
++ goto free_clk_names;
+ }
+
+ err = sysfs_create_files(&client->dev.kobj, si5341_attributes);
+- if (err) {
++ if (err)
+ dev_err(&client->dev, "unable to create sysfs files\n");
+- goto cleanup;
+- }
+
++free_clk_names:
+ /* Free the names, clk framework makes copies */
+ for (i = 0; i < data->num_synth; ++i)
+ devm_kfree(&client->dev, (void *)synth_clock_names[i]);
+
+- return 0;
+-
+ cleanup:
+- for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) {
+- if (data->clk[i].vddo_reg)
+- regulator_disable(data->clk[i].vddo_reg);
++ if (err) {
++ for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) {
++ if (data->clk[i].vddo_reg)
++ regulator_disable(data->clk[i].vddo_reg);
++ }
+ }
+ return err;
+ }
+diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
+index af46176ad0539..473dfe632cc57 100644
+--- a/drivers/clk/clk-stm32f4.c
++++ b/drivers/clk/clk-stm32f4.c
+@@ -129,7 +129,6 @@ static const struct stm32f4_gate_data stm32f429_gates[] __initconst = {
+ { STM32F4_RCC_APB2ENR, 20, "spi5", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" },
+- { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" },
+ };
+
+ static const struct stm32f4_gate_data stm32f469_gates[] __initconst = {
+@@ -211,7 +210,6 @@ static const struct stm32f4_gate_data stm32f469_gates[] __initconst = {
+ { STM32F4_RCC_APB2ENR, 20, "spi5", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" },
+- { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" },
+ };
+
+ static const struct stm32f4_gate_data stm32f746_gates[] __initconst = {
+@@ -286,7 +284,6 @@ static const struct stm32f4_gate_data stm32f746_gates[] __initconst = {
+ { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 23, "sai2", "apb2_div" },
+- { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" },
+ };
+
+ static const struct stm32f4_gate_data stm32f769_gates[] __initconst = {
+@@ -364,7 +361,6 @@ static const struct stm32f4_gate_data stm32f769_gates[] __initconst = {
+ { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 23, "sai2", "apb2_div" },
+- { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" },
+ { STM32F4_RCC_APB2ENR, 30, "mdio", "apb2_div" },
+ };
+
+diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
+index c6d3b1ab3d55c..c52f02471bb4f 100644
+--- a/drivers/clk/clk-versaclock5.c
++++ b/drivers/clk/clk-versaclock5.c
+@@ -992,6 +992,11 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ }
+
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.mux", client->dev.of_node);
++ if (!init.name) {
++ ret = -ENOMEM;
++ goto err_clk;
++ }
++
+ init.ops = &vc5_mux_ops;
+ init.flags = 0;
+ init.parent_names = parent_names;
+@@ -1006,6 +1011,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ memset(&init, 0, sizeof(init));
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.dbl",
+ client->dev.of_node);
++ if (!init.name) {
++ ret = -ENOMEM;
++ goto err_clk;
++ }
+ init.ops = &vc5_dbl_ops;
+ init.flags = CLK_SET_RATE_PARENT;
+ init.parent_names = parent_names;
+@@ -1021,6 +1030,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ /* Register PFD */
+ memset(&init, 0, sizeof(init));
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.pfd", client->dev.of_node);
++ if (!init.name) {
++ ret = -ENOMEM;
++ goto err_clk;
++ }
+ init.ops = &vc5_pfd_ops;
+ init.flags = CLK_SET_RATE_PARENT;
+ init.parent_names = parent_names;
+@@ -1038,6 +1051,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ /* Register PLL */
+ memset(&init, 0, sizeof(init));
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.pll", client->dev.of_node);
++ if (!init.name) {
++ ret = -ENOMEM;
++ goto err_clk;
++ }
+ init.ops = &vc5_pll_ops;
+ init.flags = CLK_SET_RATE_PARENT;
+ init.parent_names = parent_names;
+@@ -1057,6 +1074,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ memset(&init, 0, sizeof(init));
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.fod%d",
+ client->dev.of_node, idx);
++ if (!init.name) {
++ ret = -ENOMEM;
++ goto err_clk;
++ }
+ init.ops = &vc5_fod_ops;
+ init.flags = CLK_SET_RATE_PARENT;
+ init.parent_names = parent_names;
+@@ -1075,6 +1096,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ memset(&init, 0, sizeof(init));
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.out0_sel_i2cb",
+ client->dev.of_node);
++ if (!init.name) {
++ ret = -ENOMEM;
++ goto err_clk;
++ }
+ init.ops = &vc5_clk_out_ops;
+ init.flags = CLK_SET_RATE_PARENT;
+ init.parent_names = parent_names;
+@@ -1101,6 +1126,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ memset(&init, 0, sizeof(init));
+ init.name = kasprintf(GFP_KERNEL, "%pOFn.out%d",
+ client->dev.of_node, idx + 1);
++ if (!init.name) {
++ ret = -ENOMEM;
++ goto err_clk;
++ }
+ init.ops = &vc5_clk_out_ops;
+ init.flags = CLK_SET_RATE_PARENT;
+ init.parent_names = parent_names;
+@@ -1204,7 +1233,7 @@ static const struct vc5_chip_info idt_5p49v6901_info = {
+ .model = IDT_VC6_5P49V6901,
+ .clk_fod_cnt = 4,
+ .clk_out_cnt = 5,
+- .flags = VC5_HAS_PFD_FREQ_DBL,
++ .flags = VC5_HAS_PFD_FREQ_DBL | VC5_HAS_BYPASS_SYNC_BIT,
+ };
+
+ static const struct vc5_chip_info idt_5p49v6965_info = {
+diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
+index 65508eb89ec99..5eba83745d8de 100644
+--- a/drivers/clk/clk.c
++++ b/drivers/clk/clk.c
+@@ -631,6 +631,24 @@ static void clk_core_get_boundaries(struct clk_core *core,
+ *max_rate = min(*max_rate, clk_user->max_rate);
+ }
+
++static bool clk_core_check_boundaries(struct clk_core *core,
++ unsigned long min_rate,
++ unsigned long max_rate)
++{
++ struct clk *user;
++
++ lockdep_assert_held(&prepare_lock);
++
++ if (min_rate > core->max_rate || max_rate < core->min_rate)
++ return false;
++
++ hlist_for_each_entry(user, &core->clks, clks_node)
++ if (min_rate > user->max_rate || max_rate < user->min_rate)
++ return false;
++
++ return true;
++}
++
+ void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate,
+ unsigned long max_rate)
+ {
+@@ -828,10 +846,9 @@ static void clk_core_unprepare(struct clk_core *core)
+ if (core->ops->unprepare)
+ core->ops->unprepare(core->hw);
+
+- clk_pm_runtime_put(core);
+-
+ trace_clk_unprepare_complete(core);
+ clk_core_unprepare(core->parent);
++ clk_pm_runtime_put(core);
+ }
+
+ static void clk_core_unprepare_lock(struct clk_core *core)
+@@ -2347,6 +2364,11 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max)
+ clk->min_rate = min;
+ clk->max_rate = max;
+
++ if (!clk_core_check_boundaries(clk->core, min, max)) {
++ ret = -EINVAL;
++ goto out;
++ }
++
+ rate = clk_core_get_rate_nolock(clk->core);
+ if (rate < min || rate > max) {
+ /*
+@@ -2375,6 +2397,7 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max)
+ }
+ }
+
++out:
+ if (clk->exclusive_count)
+ clk_core_rate_protect(clk->core);
+
+@@ -3340,6 +3363,24 @@ static int __init clk_debug_init(void)
+ {
+ struct clk_core *core;
+
++#ifdef CLOCK_ALLOW_WRITE_DEBUGFS
++ pr_warn("\n");
++ pr_warn("********************************************************************\n");
++ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
++ pr_warn("** **\n");
++ pr_warn("** WRITEABLE clk DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL **\n");
++ pr_warn("** **\n");
++ pr_warn("** This means that this kernel is built to expose clk operations **\n");
++ pr_warn("** such as parent or rate setting, enabling, disabling, etc. **\n");
++ pr_warn("** to userspace, which may compromise security on your system. **\n");
++ pr_warn("** **\n");
++ pr_warn("** If you see this message and you are not debugging the **\n");
++ pr_warn("** kernel, report this immediately to your vendor! **\n");
++ pr_warn("** **\n");
++ pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
++ pr_warn("********************************************************************\n");
++#endif
++
+ rootdir = debugfs_create_dir("clk", NULL);
+
+ debugfs_create_file("clk_summary", 0444, rootdir, &all_lists,
+@@ -3392,6 +3433,19 @@ static void clk_core_reparent_orphans_nolock(void)
+ __clk_set_parent_after(orphan, parent, NULL);
+ __clk_recalc_accuracies(orphan);
+ __clk_recalc_rates(orphan, 0);
++
++ /*
++ * __clk_init_parent() will set the initial req_rate to
++ * 0 if the clock doesn't have clk_ops::recalc_rate and
++ * is an orphan when it's registered.
++ *
++ * 'req_rate' is used by clk_set_rate_range() and
++ * clk_put() to trigger a clk_set_rate() call whenever
++ * the boundaries are modified. Let's make sure
++ * 'req_rate' is set to something non-zero so that
++ * clk_set_rate_range() doesn't drop the frequency.
++ */
++ orphan->req_rate = orphan->rate;
+ }
+ }
+ }
+@@ -3415,6 +3469,14 @@ static int __clk_core_init(struct clk_core *core)
+
+ clk_prepare_lock();
+
++ /*
++ * Set hw->core after grabbing the prepare_lock to synchronize with
++ * callers of clk_core_fill_parent_index() where we treat hw->core
++ * being NULL as the clk not being registered yet. This is crucial so
++ * that clks aren't parented until their parent is fully registered.
++ */
++ core->hw->core = core;
++
+ ret = clk_pm_runtime_get(core);
+ if (ret)
+ goto unlock;
+@@ -3579,8 +3641,10 @@ static int __clk_core_init(struct clk_core *core)
+ out:
+ clk_pm_runtime_put(core);
+ unlock:
+- if (ret)
++ if (ret) {
+ hlist_del_init(&core->child_node);
++ core->hw->core = NULL;
++ }
+
+ clk_prepare_unlock();
+
+@@ -3702,8 +3766,9 @@ struct clk *clk_hw_create_clk(struct device *dev, struct clk_hw *hw,
+ struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id)
+ {
+ struct device *dev = hw->core->dev;
++ const char *name = dev ? dev_name(dev) : NULL;
+
+- return clk_hw_create_clk(dev, hw, dev_name(dev), con_id);
++ return clk_hw_create_clk(dev, hw, name, con_id);
+ }
+ EXPORT_SYMBOL(clk_hw_get_clk);
+
+@@ -3844,7 +3909,6 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
+ core->num_parents = init->num_parents;
+ core->min_rate = 0;
+ core->max_rate = ULONG_MAX;
+- hw->core = core;
+
+ ret = clk_core_populate_parent_map(core, init);
+ if (ret)
+@@ -3862,7 +3926,7 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
+ goto fail_create_clk;
+ }
+
+- clk_core_link_consumer(hw->core, hw->clk);
++ clk_core_link_consumer(core, hw->clk);
+
+ ret = __clk_core_init(core);
+ if (!ret)
+@@ -4440,6 +4504,7 @@ int devm_clk_notifier_register(struct device *dev, struct clk *clk,
+ if (!ret) {
+ devres->clk = clk;
+ devres->nb = nb;
++ devres_add(dev, devres);
+ } else {
+ devres_free(devres);
+ }
+diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c
+index 56012a3d02192..9ea1a80acbe8b 100644
+--- a/drivers/clk/hisilicon/clk-hi3559a.c
++++ b/drivers/clk/hisilicon/clk-hi3559a.c
+@@ -611,8 +611,8 @@ static struct hisi_mux_clock hi3559av100_shub_mux_clks[] = {
+
+
+ /* shub div clk */
+-static struct clk_div_table shub_spi_clk_table[] = {{0, 8}, {1, 4}, {2, 2}};
+-static struct clk_div_table shub_uart_div_clk_table[] = {{1, 8}, {2, 4}};
++static struct clk_div_table shub_spi_clk_table[] = {{0, 8}, {1, 4}, {2, 2}, {/*sentinel*/}};
++static struct clk_div_table shub_uart_div_clk_table[] = {{1, 8}, {2, 4}, {/*sentinel*/}};
+
+ static struct hisi_divider_clock hi3559av100_shub_div_clks[] = {
+ { HI3559AV100_SHUB_SPI_SOURCE_CLK, "clk_spi_clk", "shub_clk", 0, 0x20, 24, 2,
+diff --git a/drivers/clk/imx/clk-composite-8m.c b/drivers/clk/imx/clk-composite-8m.c
+index 04e728538cefe..75e05582cb24f 100644
+--- a/drivers/clk/imx/clk-composite-8m.c
++++ b/drivers/clk/imx/clk-composite-8m.c
+@@ -97,7 +97,7 @@ static int imx8m_clk_composite_divider_set_rate(struct clk_hw *hw,
+ int prediv_value;
+ int div_value;
+ int ret;
+- u32 val;
++ u32 orig, val;
+
+ ret = imx8m_clk_composite_compute_dividers(rate, parent_rate,
+ &prediv_value, &div_value);
+@@ -106,13 +106,15 @@ static int imx8m_clk_composite_divider_set_rate(struct clk_hw *hw,
+
+ spin_lock_irqsave(divider->lock, flags);
+
+- val = readl(divider->reg);
+- val &= ~((clk_div_mask(divider->width) << divider->shift) |
+- (clk_div_mask(PCG_DIV_WIDTH) << PCG_DIV_SHIFT));
++ orig = readl(divider->reg);
++ val = orig & ~((clk_div_mask(divider->width) << divider->shift) |
++ (clk_div_mask(PCG_DIV_WIDTH) << PCG_DIV_SHIFT));
+
+ val |= (u32)(prediv_value - 1) << divider->shift;
+ val |= (u32)(div_value - 1) << PCG_DIV_SHIFT;
+- writel(val, divider->reg);
++
++ if (val != orig)
++ writel(val, divider->reg);
+
+ spin_unlock_irqrestore(divider->lock, flags);
+
+diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c
+index fc1bd23d45834..598f3cf4eba49 100644
+--- a/drivers/clk/imx/clk-imx6sx.c
++++ b/drivers/clk/imx/clk-imx6sx.c
+@@ -280,13 +280,13 @@ static void __init imx6sx_clocks_init(struct device_node *ccm_node)
+ hws[IMX6SX_CLK_SSI3_SEL] = imx_clk_hw_mux("ssi3_sel", base + 0x1c, 14, 2, ssi_sels, ARRAY_SIZE(ssi_sels));
+ hws[IMX6SX_CLK_SSI2_SEL] = imx_clk_hw_mux("ssi2_sel", base + 0x1c, 12, 2, ssi_sels, ARRAY_SIZE(ssi_sels));
+ hws[IMX6SX_CLK_SSI1_SEL] = imx_clk_hw_mux("ssi1_sel", base + 0x1c, 10, 2, ssi_sels, ARRAY_SIZE(ssi_sels));
+- hws[IMX6SX_CLK_QSPI1_SEL] = imx_clk_hw_mux_flags("qspi1_sel", base + 0x1c, 7, 3, qspi1_sels, ARRAY_SIZE(qspi1_sels), CLK_SET_RATE_PARENT);
++ hws[IMX6SX_CLK_QSPI1_SEL] = imx_clk_hw_mux("qspi1_sel", base + 0x1c, 7, 3, qspi1_sels, ARRAY_SIZE(qspi1_sels));
+ hws[IMX6SX_CLK_PERCLK_SEL] = imx_clk_hw_mux("perclk_sel", base + 0x1c, 6, 1, perclk_sels, ARRAY_SIZE(perclk_sels));
+ hws[IMX6SX_CLK_VID_SEL] = imx_clk_hw_mux("vid_sel", base + 0x20, 21, 3, vid_sels, ARRAY_SIZE(vid_sels));
+ hws[IMX6SX_CLK_ESAI_SEL] = imx_clk_hw_mux("esai_sel", base + 0x20, 19, 2, audio_sels, ARRAY_SIZE(audio_sels));
+ hws[IMX6SX_CLK_CAN_SEL] = imx_clk_hw_mux("can_sel", base + 0x20, 8, 2, can_sels, ARRAY_SIZE(can_sels));
+ hws[IMX6SX_CLK_UART_SEL] = imx_clk_hw_mux("uart_sel", base + 0x24, 6, 1, uart_sels, ARRAY_SIZE(uart_sels));
+- hws[IMX6SX_CLK_QSPI2_SEL] = imx_clk_hw_mux_flags("qspi2_sel", base + 0x2c, 15, 3, qspi2_sels, ARRAY_SIZE(qspi2_sels), CLK_SET_RATE_PARENT);
++ hws[IMX6SX_CLK_QSPI2_SEL] = imx_clk_hw_mux("qspi2_sel", base + 0x2c, 15, 3, qspi2_sels, ARRAY_SIZE(qspi2_sels));
+ hws[IMX6SX_CLK_SPDIF_SEL] = imx_clk_hw_mux("spdif_sel", base + 0x30, 20, 2, audio_sels, ARRAY_SIZE(audio_sels));
+ hws[IMX6SX_CLK_AUDIO_SEL] = imx_clk_hw_mux("audio_sel", base + 0x30, 7, 2, audio_sels, ARRAY_SIZE(audio_sels));
+ hws[IMX6SX_CLK_ENET_PRE_SEL] = imx_clk_hw_mux("enet_pre_sel", base + 0x34, 15, 3, enet_pre_sels, ARRAY_SIZE(enet_pre_sels));
+diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
+index 5dbb6a9377324..206e4c43f68f8 100644
+--- a/drivers/clk/imx/clk-imx6ul.c
++++ b/drivers/clk/imx/clk-imx6ul.c
+@@ -161,7 +161,6 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
+ hws[IMX6UL_PLL5_BYPASS] = imx_clk_hw_mux_flags("pll5_bypass", base + 0xa0, 16, 1, pll5_bypass_sels, ARRAY_SIZE(pll5_bypass_sels), CLK_SET_RATE_PARENT);
+ hws[IMX6UL_PLL6_BYPASS] = imx_clk_hw_mux_flags("pll6_bypass", base + 0xe0, 16, 1, pll6_bypass_sels, ARRAY_SIZE(pll6_bypass_sels), CLK_SET_RATE_PARENT);
+ hws[IMX6UL_PLL7_BYPASS] = imx_clk_hw_mux_flags("pll7_bypass", base + 0x20, 16, 1, pll7_bypass_sels, ARRAY_SIZE(pll7_bypass_sels), CLK_SET_RATE_PARENT);
+- hws[IMX6UL_CLK_CSI_SEL] = imx_clk_hw_mux_flags("csi_sel", base + 0x3c, 9, 2, csi_sels, ARRAY_SIZE(csi_sels), CLK_SET_RATE_PARENT);
+
+ /* Do not bypass PLLs initially */
+ clk_set_parent(hws[IMX6UL_PLL1_BYPASS]->clk, hws[IMX6UL_CLK_PLL1]->clk);
+@@ -270,6 +269,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
+ hws[IMX6UL_CLK_ECSPI_SEL] = imx_clk_hw_mux("ecspi_sel", base + 0x38, 18, 1, ecspi_sels, ARRAY_SIZE(ecspi_sels));
+ hws[IMX6UL_CLK_LCDIF_PRE_SEL] = imx_clk_hw_mux_flags("lcdif_pre_sel", base + 0x38, 15, 3, lcdif_pre_sels, ARRAY_SIZE(lcdif_pre_sels), CLK_SET_RATE_PARENT);
+ hws[IMX6UL_CLK_LCDIF_SEL] = imx_clk_hw_mux("lcdif_sel", base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels));
++ hws[IMX6UL_CLK_CSI_SEL] = imx_clk_hw_mux("csi_sel", base + 0x3c, 9, 2, csi_sels, ARRAY_SIZE(csi_sels));
+
+ hws[IMX6UL_CLK_LDB_DI0_DIV_SEL] = imx_clk_hw_mux("ldb_di0", base + 0x20, 10, 1, ldb_di0_div_sels, ARRAY_SIZE(ldb_di0_div_sels));
+ hws[IMX6UL_CLK_LDB_DI1_DIV_SEL] = imx_clk_hw_mux("ldb_di1", base + 0x20, 11, 1, ldb_di1_div_sels, ARRAY_SIZE(ldb_di1_div_sels));
+diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
+index c4e0f1c07192f..3f6fd7ef2a68f 100644
+--- a/drivers/clk/imx/clk-imx7d.c
++++ b/drivers/clk/imx/clk-imx7d.c
+@@ -849,7 +849,6 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
+ hws[IMX7D_WDOG4_ROOT_CLK] = imx_clk_hw_gate4("wdog4_root_clk", "wdog_post_div", base + 0x49f0, 0);
+ hws[IMX7D_KPP_ROOT_CLK] = imx_clk_hw_gate4("kpp_root_clk", "ipg_root_clk", base + 0x4aa0, 0);
+ hws[IMX7D_CSI_MCLK_ROOT_CLK] = imx_clk_hw_gate4("csi_mclk_root_clk", "csi_mclk_post_div", base + 0x4490, 0);
+- hws[IMX7D_AUDIO_MCLK_ROOT_CLK] = imx_clk_hw_gate4("audio_mclk_root_clk", "audio_mclk_post_div", base + 0x4790, 0);
+ hws[IMX7D_WRCLK_ROOT_CLK] = imx_clk_hw_gate4("wrclk_root_clk", "wrclk_post_div", base + 0x47a0, 0);
+ hws[IMX7D_USB_CTRL_CLK] = imx_clk_hw_gate4("usb_ctrl_clk", "ahb_root_clk", base + 0x4680, 0);
+ hws[IMX7D_USB_PHY1_CLK] = imx_clk_hw_gate4("usb_phy1_clk", "pll_usb1_main_clk", base + 0x46a0, 0);
+diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c
+index c55577604e16a..4499da4154f06 100644
+--- a/drivers/clk/imx/clk-imx8mn.c
++++ b/drivers/clk/imx/clk-imx8mn.c
+@@ -30,7 +30,7 @@ static const char * const audio_pll2_bypass_sels[] = {"audio_pll2", "audio_pll2_
+ static const char * const video_pll1_bypass_sels[] = {"video_pll1", "video_pll1_ref_sel", };
+ static const char * const dram_pll_bypass_sels[] = {"dram_pll", "dram_pll_ref_sel", };
+ static const char * const gpu_pll_bypass_sels[] = {"gpu_pll", "gpu_pll_ref_sel", };
+-static const char * const vpu_pll_bypass_sels[] = {"vpu_pll", "vpu_pll_ref_sel", };
++static const char * const m7_alt_pll_bypass_sels[] = {"m7_alt_pll", "m7_alt_pll_ref_sel", };
+ static const char * const arm_pll_bypass_sels[] = {"arm_pll", "arm_pll_ref_sel", };
+ static const char * const sys_pll3_bypass_sels[] = {"sys_pll3", "sys_pll3_ref_sel", };
+
+@@ -40,7 +40,7 @@ static const char * const imx8mn_a53_sels[] = {"osc_24m", "arm_pll_out", "sys_pl
+
+ static const char * const imx8mn_a53_core_sels[] = {"arm_a53_div", "arm_pll_out", };
+
+-static const char * const imx8mn_m7_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll2_250m", "vpu_pll_out",
++static const char * const imx8mn_m7_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll2_250m", "m7_alt_pll_out",
+ "sys_pll1_800m", "audio_pll1_out", "video_pll1_out", "sys_pll3_out", };
+
+ static const char * const imx8mn_gpu_core_sels[] = {"osc_24m", "gpu_pll_out", "sys_pll1_800m",
+@@ -108,27 +108,27 @@ static const char * const imx8mn_disp_pixel_sels[] = {"osc_24m", "video_pll1_out
+ "sys_pll3_out", "clk_ext4", };
+
+ static const char * const imx8mn_sai2_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+- "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
+- "clk_ext3", "clk_ext4", };
++ "video_pll1_out", "sys_pll1_133m", "dummy",
++ "clk_ext2", "clk_ext3", };
+
+ static const char * const imx8mn_sai3_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+- "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
++ "video_pll1_out", "sys_pll1_133m", "dummy",
+ "clk_ext3", "clk_ext4", };
+
+ static const char * const imx8mn_sai5_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+- "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
++ "video_pll1_out", "sys_pll1_133m", "dummy",
+ "clk_ext2", "clk_ext3", };
+
+ static const char * const imx8mn_sai6_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+- "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
++ "video_pll1_out", "sys_pll1_133m", "dummy",
+ "clk_ext3", "clk_ext4", };
+
+ static const char * const imx8mn_sai7_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+- "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
++ "video_pll1_out", "sys_pll1_133m", "dummy",
+ "clk_ext3", "clk_ext4", };
+
+ static const char * const imx8mn_spdif1_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+- "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
++ "video_pll1_out", "sys_pll1_133m", "dummy",
+ "clk_ext2", "clk_ext3", };
+
+ static const char * const imx8mn_enet_ref_sels[] = {"osc_24m", "sys_pll2_125m", "sys_pll2_50m",
+@@ -140,8 +140,8 @@ static const char * const imx8mn_enet_timer_sels[] = {"osc_24m", "sys_pll2_100m"
+ "clk_ext4", "video_pll1_out", };
+
+ static const char * const imx8mn_enet_phy_sels[] = {"osc_24m", "sys_pll2_50m", "sys_pll2_125m",
+- "sys_pll2_200m", "sys_pll2_500m", "video_pll1_out",
+- "audio_pll2_out", };
++ "sys_pll2_200m", "sys_pll2_500m", "audio_pll1_out",
++ "video_pll_out", "audio_pll2_out", };
+
+ static const char * const imx8mn_nand_sels[] = {"osc_24m", "sys_pll2_500m", "audio_pll1_out",
+ "sys_pll1_400m", "audio_pll2_out", "sys_pll3_out",
+@@ -228,10 +228,10 @@ static const char * const imx8mn_pwm4_sels[] = {"osc_24m", "sys_pll2_100m", "sys
+ "sys_pll1_80m", "video_pll1_out", };
+
+ static const char * const imx8mn_wdog_sels[] = {"osc_24m", "sys_pll1_133m", "sys_pll1_160m",
+- "vpu_pll_out", "sys_pll2_125m", "sys_pll3_out",
++ "m7_alt_pll_out", "sys_pll2_125m", "sys_pll3_out",
+ "sys_pll1_80m", "sys_pll2_166m", };
+
+-static const char * const imx8mn_wrclk_sels[] = {"osc_24m", "sys_pll1_40m", "vpu_pll_out",
++static const char * const imx8mn_wrclk_sels[] = {"osc_24m", "sys_pll1_40m", "m7_alt_pll_out",
+ "sys_pll3_out", "sys_pll2_200m", "sys_pll1_266m",
+ "sys_pll2_500m", "sys_pll1_100m", };
+
+@@ -277,9 +277,9 @@ static const char * const imx8mn_pdm_sels[] = {"osc_24m", "sys_pll2_100m", "audi
+
+ static const char * const imx8mn_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", };
+
+-static const char * const imx8mn_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "osc_27m",
+- "sys_pll1_200m", "audio_pll2_out", "vpu_pll",
+- "sys_pll1_80m", };
++static const char * const imx8mn_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "dummy",
++ "sys_pll1_200m", "audio_pll2_out", "sys_pll2_500m",
++ "dummy", "sys_pll1_80m", };
+ static const char * const imx8mn_clko2_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_400m",
+ "sys_pll2_166m", "sys_pll3_out", "audio_pll1_out",
+ "video_pll1_out", "osc_32k", };
+@@ -299,7 +299,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
+ void __iomem *base;
+ int ret;
+
+- clk_hw_data = kzalloc(struct_size(clk_hw_data, hws,
++ clk_hw_data = devm_kzalloc(dev, struct_size(clk_hw_data, hws,
+ IMX8MN_CLK_END), GFP_KERNEL);
+ if (WARN_ON(!clk_hw_data))
+ return -ENOMEM;
+@@ -316,10 +316,10 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
+ hws[IMX8MN_CLK_EXT4] = imx_obtain_fixed_clk_hw(np, "clk_ext4");
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx8mn-anatop");
+- base = of_iomap(np, 0);
++ base = devm_of_iomap(dev, np, 0, NULL);
+ of_node_put(np);
+- if (WARN_ON(!base)) {
+- ret = -ENOMEM;
++ if (WARN_ON(IS_ERR(base))) {
++ ret = PTR_ERR(base);
+ goto unregister_hws;
+ }
+
+@@ -328,7 +328,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
+ hws[IMX8MN_VIDEO_PLL1_REF_SEL] = imx_clk_hw_mux("video_pll1_ref_sel", base + 0x28, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
+ hws[IMX8MN_DRAM_PLL_REF_SEL] = imx_clk_hw_mux("dram_pll_ref_sel", base + 0x50, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
+ hws[IMX8MN_GPU_PLL_REF_SEL] = imx_clk_hw_mux("gpu_pll_ref_sel", base + 0x64, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
+- hws[IMX8MN_VPU_PLL_REF_SEL] = imx_clk_hw_mux("vpu_pll_ref_sel", base + 0x74, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
++ hws[IMX8MN_M7_ALT_PLL_REF_SEL] = imx_clk_hw_mux("m7_alt_pll_ref_sel", base + 0x74, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
+ hws[IMX8MN_ARM_PLL_REF_SEL] = imx_clk_hw_mux("arm_pll_ref_sel", base + 0x84, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
+ hws[IMX8MN_SYS_PLL3_REF_SEL] = imx_clk_hw_mux("sys_pll3_ref_sel", base + 0x114, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels));
+
+@@ -337,7 +337,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
+ hws[IMX8MN_VIDEO_PLL1] = imx_clk_hw_pll14xx("video_pll1", "video_pll1_ref_sel", base + 0x28, &imx_1443x_pll);
+ hws[IMX8MN_DRAM_PLL] = imx_clk_hw_pll14xx("dram_pll", "dram_pll_ref_sel", base + 0x50, &imx_1443x_dram_pll);
+ hws[IMX8MN_GPU_PLL] = imx_clk_hw_pll14xx("gpu_pll", "gpu_pll_ref_sel", base + 0x64, &imx_1416x_pll);
+- hws[IMX8MN_VPU_PLL] = imx_clk_hw_pll14xx("vpu_pll", "vpu_pll_ref_sel", base + 0x74, &imx_1416x_pll);
++ hws[IMX8MN_M7_ALT_PLL] = imx_clk_hw_pll14xx("m7_alt_pll", "m7_alt_pll_ref_sel", base + 0x74, &imx_1416x_pll);
+ hws[IMX8MN_ARM_PLL] = imx_clk_hw_pll14xx("arm_pll", "arm_pll_ref_sel", base + 0x84, &imx_1416x_pll);
+ hws[IMX8MN_SYS_PLL1] = imx_clk_hw_fixed("sys_pll1", 800000000);
+ hws[IMX8MN_SYS_PLL2] = imx_clk_hw_fixed("sys_pll2", 1000000000);
+@@ -349,7 +349,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
+ hws[IMX8MN_VIDEO_PLL1_BYPASS] = imx_clk_hw_mux_flags("video_pll1_bypass", base + 0x28, 16, 1, video_pll1_bypass_sels, ARRAY_SIZE(video_pll1_bypass_sels), CLK_SET_RATE_PARENT);
+ hws[IMX8MN_DRAM_PLL_BYPASS] = imx_clk_hw_mux_flags("dram_pll_bypass", base + 0x50, 16, 1, dram_pll_bypass_sels, ARRAY_SIZE(dram_pll_bypass_sels), CLK_SET_RATE_PARENT);
+ hws[IMX8MN_GPU_PLL_BYPASS] = imx_clk_hw_mux_flags("gpu_pll_bypass", base + 0x64, 28, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
+- hws[IMX8MN_VPU_PLL_BYPASS] = imx_clk_hw_mux_flags("vpu_pll_bypass", base + 0x74, 28, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT);
++ hws[IMX8MN_M7_ALT_PLL_BYPASS] = imx_clk_hw_mux_flags("m7_alt_pll_bypass", base + 0x74, 28, 1, m7_alt_pll_bypass_sels, ARRAY_SIZE(m7_alt_pll_bypass_sels), CLK_SET_RATE_PARENT);
+ hws[IMX8MN_ARM_PLL_BYPASS] = imx_clk_hw_mux_flags("arm_pll_bypass", base + 0x84, 28, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT);
+ hws[IMX8MN_SYS_PLL3_BYPASS] = imx_clk_hw_mux_flags("sys_pll3_bypass", base + 0x114, 28, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT);
+
+@@ -359,7 +359,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
+ hws[IMX8MN_VIDEO_PLL1_OUT] = imx_clk_hw_gate("video_pll1_out", "video_pll1_bypass", base + 0x28, 13);
+ hws[IMX8MN_DRAM_PLL_OUT] = imx_clk_hw_gate("dram_pll_out", "dram_pll_bypass", base + 0x50, 13);
+ hws[IMX8MN_GPU_PLL_OUT] = imx_clk_hw_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 11);
+- hws[IMX8MN_VPU_PLL_OUT] = imx_clk_hw_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 11);
++ hws[IMX8MN_M7_ALT_PLL_OUT] = imx_clk_hw_gate("m7_alt_pll_out", "m7_alt_pll_bypass", base + 0x74, 11);
+ hws[IMX8MN_ARM_PLL_OUT] = imx_clk_hw_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 11);
+ hws[IMX8MN_SYS_PLL3_OUT] = imx_clk_hw_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 11);
+
+diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c
+index 12837304545d5..2f898c0bc867c 100644
+--- a/drivers/clk/imx/clk-imx8mp.c
++++ b/drivers/clk/imx/clk-imx8mp.c
+@@ -176,10 +176,6 @@ static const char * const imx8mp_sai3_sels[] = {"osc_24m", "audio_pll1_out", "au
+ "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
+ "clk_ext3", "clk_ext4", };
+
+-static const char * const imx8mp_sai4_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+- "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
+- "clk_ext1", "clk_ext2", };
+-
+ static const char * const imx8mp_sai5_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out",
+ "video_pll1_out", "sys_pll1_133m", "osc_hdmi",
+ "clk_ext2", "clk_ext3", };
+@@ -407,25 +403,22 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
+ struct device *dev = &pdev->dev;
+ struct device_node *np;
+ void __iomem *anatop_base, *ccm_base;
++ int err;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx8mp-anatop");
+- anatop_base = of_iomap(np, 0);
++ anatop_base = devm_of_iomap(dev, np, 0, NULL);
+ of_node_put(np);
+- if (WARN_ON(!anatop_base))
+- return -ENOMEM;
++ if (WARN_ON(IS_ERR(anatop_base)))
++ return PTR_ERR(anatop_base);
+
+ np = dev->of_node;
+ ccm_base = devm_platform_ioremap_resource(pdev, 0);
+- if (WARN_ON(IS_ERR(ccm_base))) {
+- iounmap(anatop_base);
++ if (WARN_ON(IS_ERR(ccm_base)))
+ return PTR_ERR(ccm_base);
+- }
+
+- clk_hw_data = kzalloc(struct_size(clk_hw_data, hws, IMX8MP_CLK_END), GFP_KERNEL);
+- if (WARN_ON(!clk_hw_data)) {
+- iounmap(anatop_base);
++ clk_hw_data = devm_kzalloc(dev, struct_size(clk_hw_data, hws, IMX8MP_CLK_END), GFP_KERNEL);
++ if (WARN_ON(!clk_hw_data))
+ return -ENOMEM;
+- }
+
+ clk_hw_data->num = IMX8MP_CLK_END;
+ hws = clk_hw_data->hws;
+@@ -569,7 +562,6 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
+ hws[IMX8MP_CLK_SAI1] = imx8m_clk_hw_composite("sai1", imx8mp_sai1_sels, ccm_base + 0xa580);
+ hws[IMX8MP_CLK_SAI2] = imx8m_clk_hw_composite("sai2", imx8mp_sai2_sels, ccm_base + 0xa600);
+ hws[IMX8MP_CLK_SAI3] = imx8m_clk_hw_composite("sai3", imx8mp_sai3_sels, ccm_base + 0xa680);
+- hws[IMX8MP_CLK_SAI4] = imx8m_clk_hw_composite("sai4", imx8mp_sai4_sels, ccm_base + 0xa700);
+ hws[IMX8MP_CLK_SAI5] = imx8m_clk_hw_composite("sai5", imx8mp_sai5_sels, ccm_base + 0xa780);
+ hws[IMX8MP_CLK_SAI6] = imx8m_clk_hw_composite("sai6", imx8mp_sai6_sels, ccm_base + 0xa800);
+ hws[IMX8MP_CLK_ENET_QOS] = imx8m_clk_hw_composite("enet_qos", imx8mp_enet_qos_sels, ccm_base + 0xa880);
+@@ -675,7 +667,7 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
+ hws[IMX8MP_CLK_UART2_ROOT] = imx_clk_hw_gate4("uart2_root_clk", "uart2", ccm_base + 0x44a0, 0);
+ hws[IMX8MP_CLK_UART3_ROOT] = imx_clk_hw_gate4("uart3_root_clk", "uart3", ccm_base + 0x44b0, 0);
+ hws[IMX8MP_CLK_UART4_ROOT] = imx_clk_hw_gate4("uart4_root_clk", "uart4", ccm_base + 0x44c0, 0);
+- hws[IMX8MP_CLK_USB_ROOT] = imx_clk_hw_gate4("usb_root_clk", "osc_32k", ccm_base + 0x44d0, 0);
++ hws[IMX8MP_CLK_USB_ROOT] = imx_clk_hw_gate4("usb_root_clk", "hsio_axi", ccm_base + 0x44d0, 0);
+ hws[IMX8MP_CLK_USB_PHY_ROOT] = imx_clk_hw_gate4("usb_phy_root_clk", "usb_phy_ref", ccm_base + 0x44f0, 0);
+ hws[IMX8MP_CLK_USDHC1_ROOT] = imx_clk_hw_gate4("usdhc1_root_clk", "usdhc1", ccm_base + 0x4510, 0);
+ hws[IMX8MP_CLK_USDHC2_ROOT] = imx_clk_hw_gate4("usdhc2_root_clk", "usdhc2", ccm_base + 0x4520, 0);
+@@ -710,7 +702,12 @@ static int imx8mp_clocks_probe(struct platform_device *pdev)
+
+ imx_check_clk_hws(hws, IMX8MP_CLK_END);
+
+- of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_hw_data);
++ err = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_hw_data);
++ if (err < 0) {
++ dev_err(dev, "failed to register hws for i.MX8MP\n");
++ imx_unregister_hw_clocks(hws, IMX8MP_CLK_END);
++ return err;
++ }
+
+ imx_register_uart_clocks(4);
+
+diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c
+index d3e905cf867d7..5e31a6a24b3a3 100644
+--- a/drivers/clk/imx/clk-imx8qxp-lpcg.c
++++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c
+@@ -248,7 +248,7 @@ static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev,
+
+ for (i = 0; i < count; i++) {
+ idx = bit_offset[i] / 4;
+- if (idx > IMX_LPCG_MAX_CLKS) {
++ if (idx >= IMX_LPCG_MAX_CLKS) {
+ dev_warn(&pdev->dev, "invalid bit offset of clock %d\n",
+ i);
+ ret = -EINVAL;
+@@ -370,7 +370,7 @@ static struct platform_driver imx8qxp_lpcg_clk_driver = {
+ .probe = imx8qxp_lpcg_clk_probe,
+ };
+
+-builtin_platform_driver(imx8qxp_lpcg_clk_driver);
++module_platform_driver(imx8qxp_lpcg_clk_driver);
+
+ MODULE_AUTHOR("Aisheng Dong <aisheng.dong@nxp.com>");
+ MODULE_DESCRIPTION("NXP i.MX8QXP LPCG clock driver");
+diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c
+index c53a688d8ccca..40a2efb1329be 100644
+--- a/drivers/clk/imx/clk-imx8qxp.c
++++ b/drivers/clk/imx/clk-imx8qxp.c
+@@ -308,7 +308,7 @@ static struct platform_driver imx8qxp_clk_driver = {
+ },
+ .probe = imx8qxp_clk_probe,
+ };
+-builtin_platform_driver(imx8qxp_clk_driver);
++module_platform_driver(imx8qxp_clk_driver);
+
+ MODULE_AUTHOR("Aisheng Dong <aisheng.dong@nxp.com>");
+ MODULE_DESCRIPTION("NXP i.MX8QXP clock driver");
+diff --git a/drivers/clk/imx/clk-pll14xx.c b/drivers/clk/imx/clk-pll14xx.c
+index 2b5ed86b9dbbb..483f496f437a8 100644
+--- a/drivers/clk/imx/clk-pll14xx.c
++++ b/drivers/clk/imx/clk-pll14xx.c
+@@ -60,8 +60,6 @@ static const struct imx_pll14xx_rate_table imx_pll1443x_tbl[] = {
+ PLL_1443X_RATE(650000000U, 325, 3, 2, 0),
+ PLL_1443X_RATE(594000000U, 198, 2, 2, 0),
+ PLL_1443X_RATE(519750000U, 173, 2, 2, 16384),
+- PLL_1443X_RATE(393216000U, 262, 2, 3, 9437),
+- PLL_1443X_RATE(361267200U, 361, 3, 3, 17511),
+ };
+
+ struct imx_pll14xx_clk imx_1443x_pll = {
+diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c
+index 083da31dc3ead..1cee88b073fa2 100644
+--- a/drivers/clk/imx/clk-scu.c
++++ b/drivers/clk/imx/clk-scu.c
+@@ -690,7 +690,11 @@ struct clk_hw *imx_clk_scu_alloc_dev(const char *name,
+ pr_warn("%s: failed to attached the power domain %d\n",
+ name, ret);
+
+- platform_device_add(pdev);
++ ret = platform_device_add(pdev);
++ if (ret) {
++ platform_device_put(pdev);
++ return ERR_PTR(ret);
++ }
+
+ /* For API backwards compatiblilty, simply return NULL for success */
+ return NULL;
+@@ -698,11 +702,11 @@ struct clk_hw *imx_clk_scu_alloc_dev(const char *name,
+
+ void imx_clk_scu_unregister(void)
+ {
+- struct imx_scu_clk_node *clk;
++ struct imx_scu_clk_node *clk, *n;
+ int i;
+
+ for (i = 0; i < IMX_SC_R_LAST; i++) {
+- list_for_each_entry(clk, &imx_scu_clks[i], node) {
++ list_for_each_entry_safe(clk, n, &imx_scu_clks[i], node) {
+ clk_hw_unregister(clk->hw);
+ kfree(clk);
+ }
+diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c
+index 266c7595d3302..af31633a8862e 100644
+--- a/drivers/clk/ingenic/cgu.c
++++ b/drivers/clk/ingenic/cgu.c
+@@ -453,15 +453,15 @@ ingenic_clk_calc_div(struct clk_hw *hw,
+ }
+
+ /* Impose hardware constraints */
+- div = min_t(unsigned, div, 1 << clk_info->div.bits);
+- div = max_t(unsigned, div, 1);
++ div = clamp_t(unsigned int, div, clk_info->div.div,
++ clk_info->div.div << clk_info->div.bits);
+
+ /*
+ * If the divider value itself must be divided before being written to
+ * the divider register, we must ensure we don't have any bits set that
+ * would be lost as a result of doing so.
+ */
+- div /= clk_info->div.div;
++ div = DIV_ROUND_UP(div, clk_info->div.div);
+ div *= clk_info->div.div;
+
+ return div;
+diff --git a/drivers/clk/ingenic/jz4725b-cgu.c b/drivers/clk/ingenic/jz4725b-cgu.c
+index 5154b0cf8ad6c..66ff141da0a42 100644
+--- a/drivers/clk/ingenic/jz4725b-cgu.c
++++ b/drivers/clk/ingenic/jz4725b-cgu.c
+@@ -139,11 +139,10 @@ static const struct ingenic_cgu_clk_info jz4725b_cgu_clocks[] = {
+ },
+
+ [JZ4725B_CLK_I2S] = {
+- "i2s", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE,
++ "i2s", CGU_CLK_MUX | CGU_CLK_DIV,
+ .parents = { JZ4725B_CLK_EXT, JZ4725B_CLK_PLL_HALF, -1, -1 },
+ .mux = { CGU_REG_CPCCR, 31, 1 },
+ .div = { CGU_REG_I2SCDR, 0, 1, 9, -1, -1, -1 },
+- .gate = { CGU_REG_CLKGR, 6 },
+ },
+
+ [JZ4725B_CLK_SPI] = {
+diff --git a/drivers/clk/ingenic/jz4760-cgu.c b/drivers/clk/ingenic/jz4760-cgu.c
+index 14483797a4dbf..11906242e1d3d 100644
+--- a/drivers/clk/ingenic/jz4760-cgu.c
++++ b/drivers/clk/ingenic/jz4760-cgu.c
+@@ -58,7 +58,7 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info,
+ unsigned long rate, unsigned long parent_rate,
+ unsigned int *pm, unsigned int *pn, unsigned int *pod)
+ {
+- unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 2;
++ unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 1;
+
+ /* The frequency after the N divider must be between 1 and 50 MHz. */
+ n = parent_rate / (1 * MHZ);
+@@ -66,19 +66,17 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info,
+ /* The N divider must be >= 2. */
+ n = clamp_val(n, 2, 1 << pll_info->n_bits);
+
+- for (;; n >>= 1) {
+- od = (unsigned int)-1;
++ rate /= MHZ;
++ parent_rate /= MHZ;
+
+- do {
+- m = (rate / MHZ) * (1 << ++od) * n / (parent_rate / MHZ);
+- } while ((m > m_max || m & 1) && (od < 4));
+-
+- if (od < 4 && m >= 4 && m <= m_max)
+- break;
++ for (m = m_max; m >= m_max && n >= 2; n--) {
++ m = rate * n / parent_rate;
++ od = m & 1;
++ m <<= od;
+ }
+
+ *pm = m;
+- *pn = n;
++ *pn = n + 1;
+ *pod = 1 << od;
+ }
+
+diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c
+index 77acfbeb48300..11fc395618365 100644
+--- a/drivers/clk/ingenic/tcu.c
++++ b/drivers/clk/ingenic/tcu.c
+@@ -100,15 +100,11 @@ static bool ingenic_tcu_enable_regs(struct clk_hw *hw)
+ bool enabled = false;
+
+ /*
+- * If the SoC has no global TCU clock, we must ungate the channel's
+- * clock to be able to access its registers.
+- * If we have a TCU clock, it will be enabled automatically as it has
+- * been attached to the regmap.
++ * According to the programming manual, a timer channel's registers can
++ * only be accessed when the channel's stop bit is clear.
+ */
+- if (!tcu->clk) {
+- enabled = !!ingenic_tcu_is_enabled(hw);
+- regmap_write(tcu->map, TCU_REG_TSCR, BIT(info->gate_bit));
+- }
++ enabled = !!ingenic_tcu_is_enabled(hw);
++ regmap_write(tcu->map, TCU_REG_TSCR, BIT(info->gate_bit));
+
+ return enabled;
+ }
+@@ -119,8 +115,7 @@ static void ingenic_tcu_disable_regs(struct clk_hw *hw)
+ const struct ingenic_tcu_clk_info *info = tcu_clk->info;
+ struct ingenic_tcu *tcu = tcu_clk->tcu;
+
+- if (!tcu->clk)
+- regmap_write(tcu->map, TCU_REG_TSSR, BIT(info->gate_bit));
++ regmap_write(tcu->map, TCU_REG_TSSR, BIT(info->gate_bit));
+ }
+
+ static u8 ingenic_tcu_get_parent(struct clk_hw *hw)
+diff --git a/drivers/clk/keystone/pll.c b/drivers/clk/keystone/pll.c
+index d59a7621bb204..ee5c72369334f 100644
+--- a/drivers/clk/keystone/pll.c
++++ b/drivers/clk/keystone/pll.c
+@@ -209,7 +209,7 @@ static void __init _of_pll_clk_init(struct device_node *node, bool pllctrl)
+ }
+
+ clk = clk_register_pll(NULL, node->name, parent_name, pll_data);
+- if (clk) {
++ if (!IS_ERR_OR_NULL(clk)) {
+ of_clk_add_provider(node, of_clk_src_simple_get, clk);
+ return;
+ }
+diff --git a/drivers/clk/keystone/sci-clk.c b/drivers/clk/keystone/sci-clk.c
+index 7e1b136e71ae0..8af2a9faa805a 100644
+--- a/drivers/clk/keystone/sci-clk.c
++++ b/drivers/clk/keystone/sci-clk.c
+@@ -302,6 +302,8 @@ static int _sci_clk_build(struct sci_clk_provider *provider,
+
+ name = kasprintf(GFP_KERNEL, "clk:%d:%d", sci_clk->dev_id,
+ sci_clk->clk_id);
++ if (!name)
++ return -ENOMEM;
+
+ init.name = name;
+
+diff --git a/drivers/clk/loongson1/clk-loongson1c.c b/drivers/clk/loongson1/clk-loongson1c.c
+index 703f87622cf5f..1ebf740380efb 100644
+--- a/drivers/clk/loongson1/clk-loongson1c.c
++++ b/drivers/clk/loongson1/clk-loongson1c.c
+@@ -37,6 +37,7 @@ static const struct clk_div_table ahb_div_table[] = {
+ [1] = { .val = 1, .div = 4 },
+ [2] = { .val = 2, .div = 3 },
+ [3] = { .val = 3, .div = 3 },
++ [4] = { /* sentinel */ }
+ };
+
+ void __init ls1x_clk_init(void)
+diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
+index 37b4162c58820..3a33014eee7f7 100644
+--- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
++++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
+@@ -18,9 +18,9 @@ static const struct mtk_gate_regs mfg_cg_regs = {
+ .sta_ofs = 0x0,
+ };
+
+-#define GATE_MFG(_id, _name, _parent, _shift) \
+- GATE_MTK(_id, _name, _parent, &mfg_cg_regs, _shift, \
+- &mtk_clk_gate_ops_setclr)
++#define GATE_MFG(_id, _name, _parent, _shift) \
++ GATE_MTK_FLAGS(_id, _name, _parent, &mfg_cg_regs, _shift, \
++ &mtk_clk_gate_ops_setclr, CLK_SET_RATE_PARENT)
+
+ static const struct mtk_gate mfg_clks[] = {
+ GATE_MFG(CLK_MFG_BG3D, "mfg_bg3d", "mfg_sel", 0)
+diff --git a/drivers/clk/mediatek/reset.c b/drivers/clk/mediatek/reset.c
+index e562dc3c10a4b..d311da574499f 100644
+--- a/drivers/clk/mediatek/reset.c
++++ b/drivers/clk/mediatek/reset.c
+@@ -25,7 +25,7 @@ static int mtk_reset_assert_set_clr(struct reset_controller_dev *rcdev,
+ struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
+ unsigned int reg = data->regofs + ((id / 32) << 4);
+
+- return regmap_write(data->regmap, reg, 1);
++ return regmap_write(data->regmap, reg, BIT(id % 32));
+ }
+
+ static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev,
+@@ -34,7 +34,7 @@ static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev,
+ struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
+ unsigned int reg = data->regofs + ((id / 32) << 4) + 0x4;
+
+- return regmap_write(data->regmap, reg, 1);
++ return regmap_write(data->regmap, reg, BIT(id % 32));
+ }
+
+ static int mtk_reset_assert(struct reset_controller_dev *rcdev,
+diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
+index d6eed760327d0..608e0e8ca49a8 100644
+--- a/drivers/clk/meson/gxbb.c
++++ b/drivers/clk/meson/gxbb.c
+@@ -713,6 +713,35 @@ static struct clk_regmap gxbb_mpll_prediv = {
+ };
+
+ static struct clk_regmap gxbb_mpll0_div = {
++ .data = &(struct meson_clk_mpll_data){
++ .sdm = {
++ .reg_off = HHI_MPLL_CNTL7,
++ .shift = 0,
++ .width = 14,
++ },
++ .sdm_en = {
++ .reg_off = HHI_MPLL_CNTL,
++ .shift = 25,
++ .width = 1,
++ },
++ .n2 = {
++ .reg_off = HHI_MPLL_CNTL7,
++ .shift = 16,
++ .width = 9,
++ },
++ .lock = &meson_clk_lock,
++ },
++ .hw.init = &(struct clk_init_data){
++ .name = "mpll0_div",
++ .ops = &meson_clk_mpll_ops,
++ .parent_hws = (const struct clk_hw *[]) {
++ &gxbb_mpll_prediv.hw
++ },
++ .num_parents = 1,
++ },
++};
++
++static struct clk_regmap gxl_mpll0_div = {
+ .data = &(struct meson_clk_mpll_data){
+ .sdm = {
+ .reg_off = HHI_MPLL_CNTL7,
+@@ -749,7 +778,16 @@ static struct clk_regmap gxbb_mpll0 = {
+ .hw.init = &(struct clk_init_data){
+ .name = "mpll0",
+ .ops = &clk_regmap_gate_ops,
+- .parent_hws = (const struct clk_hw *[]) { &gxbb_mpll0_div.hw },
++ .parent_data = &(const struct clk_parent_data) {
++ /*
++ * Note:
++ * GXL and GXBB have different SDM_EN registers. We
++ * fallback to the global naming string mechanism so
++ * mpll0_div picks up the appropriate one.
++ */
++ .name = "mpll0_div",
++ .index = -1,
++ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+ },
+@@ -3044,7 +3082,7 @@ static struct clk_hw_onecell_data gxl_hw_onecell_data = {
+ [CLKID_VAPB_1] = &gxbb_vapb_1.hw,
+ [CLKID_VAPB_SEL] = &gxbb_vapb_sel.hw,
+ [CLKID_VAPB] = &gxbb_vapb.hw,
+- [CLKID_MPLL0_DIV] = &gxbb_mpll0_div.hw,
++ [CLKID_MPLL0_DIV] = &gxl_mpll0_div.hw,
+ [CLKID_MPLL1_DIV] = &gxbb_mpll1_div.hw,
+ [CLKID_MPLL2_DIV] = &gxbb_mpll2_div.hw,
+ [CLKID_MPLL_PREDIV] = &gxbb_mpll_prediv.hw,
+@@ -3439,7 +3477,7 @@ static struct clk_regmap *const gxl_clk_regmaps[] = {
+ &gxbb_mpll0,
+ &gxbb_mpll1,
+ &gxbb_mpll2,
+- &gxbb_mpll0_div,
++ &gxl_mpll0_div,
+ &gxbb_mpll1_div,
+ &gxbb_mpll2_div,
+ &gxbb_cts_amclk_div,
+diff --git a/drivers/clk/meson/meson-aoclk.c b/drivers/clk/meson/meson-aoclk.c
+index 27cd2c1f3f612..434cd8f9de826 100644
+--- a/drivers/clk/meson/meson-aoclk.c
++++ b/drivers/clk/meson/meson-aoclk.c
+@@ -38,6 +38,7 @@ int meson_aoclkc_probe(struct platform_device *pdev)
+ struct meson_aoclk_reset_controller *rstc;
+ struct meson_aoclk_data *data;
+ struct device *dev = &pdev->dev;
++ struct device_node *np;
+ struct regmap *regmap;
+ int ret, clkid;
+
+@@ -49,7 +50,9 @@ int meson_aoclkc_probe(struct platform_device *pdev)
+ if (!rstc)
+ return -ENOMEM;
+
+- regmap = syscon_node_to_regmap(of_get_parent(dev->of_node));
++ np = of_get_parent(dev->of_node);
++ regmap = syscon_node_to_regmap(np);
++ of_node_put(np);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "failed to get regmap\n");
+ return PTR_ERR(regmap);
+diff --git a/drivers/clk/meson/meson-eeclk.c b/drivers/clk/meson/meson-eeclk.c
+index 8d5a5dab955a8..0e5e6b57eb20e 100644
+--- a/drivers/clk/meson/meson-eeclk.c
++++ b/drivers/clk/meson/meson-eeclk.c
+@@ -18,6 +18,7 @@ int meson_eeclkc_probe(struct platform_device *pdev)
+ {
+ const struct meson_eeclkc_data *data;
+ struct device *dev = &pdev->dev;
++ struct device_node *np;
+ struct regmap *map;
+ int ret, i;
+
+@@ -26,7 +27,9 @@ int meson_eeclkc_probe(struct platform_device *pdev)
+ return -EINVAL;
+
+ /* Get the hhi system controller node */
+- map = syscon_node_to_regmap(of_get_parent(dev->of_node));
++ np = of_get_parent(dev->of_node);
++ map = syscon_node_to_regmap(np);
++ of_node_put(np);
+ if (IS_ERR(map)) {
+ dev_err(dev,
+ "failed to get HHI regmap\n");
+diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
+index a844d35b553a2..809a0bfb670db 100644
+--- a/drivers/clk/meson/meson8b.c
++++ b/drivers/clk/meson/meson8b.c
+@@ -3717,12 +3717,15 @@ static void __init meson8b_clkc_init_common(struct device_node *np,
+ struct clk_hw_onecell_data *clk_hw_onecell_data)
+ {
+ struct meson8b_clk_reset *rstc;
++ struct device_node *parent_np;
+ const char *notifier_clk_name;
+ struct clk *notifier_clk;
+ struct regmap *map;
+ int i, ret;
+
+- map = syscon_node_to_regmap(of_get_parent(np));
++ parent_np = of_get_parent(np);
++ map = syscon_node_to_regmap(parent_np);
++ of_node_put(parent_np);
+ if (IS_ERR(map)) {
+ pr_err("failed to get HHI regmap - Trying obsolete regs\n");
+ return;
+diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c
+index 08ba59ec3fb17..71bdd7c3ff034 100644
+--- a/drivers/clk/mvebu/ap-cpu-clk.c
++++ b/drivers/clk/mvebu/ap-cpu-clk.c
+@@ -256,12 +256,15 @@ static int ap_cpu_clock_probe(struct platform_device *pdev)
+ int cpu, err;
+
+ err = of_property_read_u32(dn, "reg", &cpu);
+- if (WARN_ON(err))
++ if (WARN_ON(err)) {
++ of_node_put(dn);
+ return err;
++ }
+
+ /* If cpu2 or cpu3 is enabled */
+ if (cpu & APN806_CLUSTER_NUM_MASK) {
+ nclusters = 2;
++ of_node_put(dn);
+ break;
+ }
+ }
+@@ -288,8 +291,10 @@ static int ap_cpu_clock_probe(struct platform_device *pdev)
+ int cpu, err;
+
+ err = of_property_read_u32(dn, "reg", &cpu);
+- if (WARN_ON(err))
++ if (WARN_ON(err)) {
++ of_node_put(dn);
+ return err;
++ }
+
+ cluster_index = cpu & APN806_CLUSTER_NUM_MASK;
+ cluster_index >>= APN806_CLUSTER_NUM_OFFSET;
+@@ -301,6 +306,7 @@ static int ap_cpu_clock_probe(struct platform_device *pdev)
+ parent = of_clk_get(np, cluster_index);
+ if (IS_ERR(parent)) {
+ dev_err(dev, "Could not get the clock parent\n");
++ of_node_put(dn);
+ return -EINVAL;
+ }
+ parent_name = __clk_get_name(parent);
+@@ -319,8 +325,10 @@ static int ap_cpu_clock_probe(struct platform_device *pdev)
+ init.parent_names = &parent_name;
+
+ ret = devm_clk_hw_register(dev, &ap_cpu_clk[cluster_index].hw);
+- if (ret)
++ if (ret) {
++ of_node_put(dn);
+ return ret;
++ }
+ ap_cpu_data->hws[cluster_index] = &ap_cpu_clk[cluster_index].hw;
+ }
+
+diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
+index 9ef007b3cf9b4..6ba86cffc4135 100644
+--- a/drivers/clk/qcom/Kconfig
++++ b/drivers/clk/qcom/Kconfig
+@@ -550,6 +550,7 @@ config SM_DISPCC_8250
+
+ config SM_GCC_6115
+ tristate "SM6115 and SM4250 Global Clock Controller"
++ select QCOM_GDSC
+ help
+ Support for the global clock controller on SM6115 and SM4250 devices.
+ Say Y if you want to use peripheral devices such as UART, SPI,
+diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
+index 9825ef843f4a0..63c356ae32f23 100644
+--- a/drivers/clk/qcom/Makefile
++++ b/drivers/clk/qcom/Makefile
+@@ -11,6 +11,7 @@ clk-qcom-y += clk-branch.o
+ clk-qcom-y += clk-regmap-divider.o
+ clk-qcom-y += clk-regmap-mux.o
+ clk-qcom-y += clk-regmap-mux-div.o
++clk-qcom-y += clk-regmap-phy-mux.o
+ clk-qcom-$(CONFIG_KRAIT_CLOCKS) += clk-krait.o
+ clk-qcom-y += clk-hfpll.o
+ clk-qcom-y += reset.o
+diff --git a/drivers/clk/qcom/apss-ipq6018.c b/drivers/clk/qcom/apss-ipq6018.c
+index d78ff2f310bfa..b5d93657e1ee3 100644
+--- a/drivers/clk/qcom/apss-ipq6018.c
++++ b/drivers/clk/qcom/apss-ipq6018.c
+@@ -57,7 +57,7 @@ static struct clk_branch apcs_alias0_core_clk = {
+ .parent_hws = (const struct clk_hw *[]){
+ &apcs_alias0_clk_src.clkr.hw },
+ .num_parents = 1,
+- .flags = CLK_SET_RATE_PARENT,
++ .flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
+ .ops = &clk_branch2_ops,
+ },
+ },
+diff --git a/drivers/clk/qcom/camcc-sc7180.c b/drivers/clk/qcom/camcc-sc7180.c
+index ce73ee9037cb0..ee99e3a853125 100644
+--- a/drivers/clk/qcom/camcc-sc7180.c
++++ b/drivers/clk/qcom/camcc-sc7180.c
+@@ -1493,12 +1493,21 @@ static struct clk_branch cam_cc_sys_tmr_clk = {
+ },
+ };
+
++static struct gdsc titan_top_gdsc = {
++ .gdscr = 0xb134,
++ .pd = {
++ .name = "titan_top_gdsc",
++ },
++ .pwrsts = PWRSTS_OFF_ON,
++};
++
+ static struct gdsc bps_gdsc = {
+ .gdscr = 0x6004,
+ .pd = {
+ .name = "bps_gdsc",
+ },
+ .pwrsts = PWRSTS_OFF_ON,
++ .parent = &titan_top_gdsc.pd,
+ .flags = HW_CTRL,
+ };
+
+@@ -1508,6 +1517,7 @@ static struct gdsc ife_0_gdsc = {
+ .name = "ife_0_gdsc",
+ },
+ .pwrsts = PWRSTS_OFF_ON,
++ .parent = &titan_top_gdsc.pd,
+ };
+
+ static struct gdsc ife_1_gdsc = {
+@@ -1516,6 +1526,7 @@ static struct gdsc ife_1_gdsc = {
+ .name = "ife_1_gdsc",
+ },
+ .pwrsts = PWRSTS_OFF_ON,
++ .parent = &titan_top_gdsc.pd,
+ };
+
+ static struct gdsc ipe_0_gdsc = {
+@@ -1525,15 +1536,9 @@ static struct gdsc ipe_0_gdsc = {
+ },
+ .pwrsts = PWRSTS_OFF_ON,
+ .flags = HW_CTRL,
++ .parent = &titan_top_gdsc.pd,
+ };
+
+-static struct gdsc titan_top_gdsc = {
+- .gdscr = 0xb134,
+- .pd = {
+- .name = "titan_top_gdsc",
+- },
+- .pwrsts = PWRSTS_OFF_ON,
+-};
+
+ static struct clk_hw *cam_cc_sc7180_hws[] = {
+ [CAM_CC_PLL2_OUT_EARLY] = &cam_cc_pll2_out_early.hw,
+@@ -1672,7 +1677,7 @@ static int cam_cc_sc7180_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- ret = pm_runtime_get(&pdev->dev);
++ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/clk/qcom/camcc-sdm845.c b/drivers/clk/qcom/camcc-sdm845.c
+index 1b2cefef7431d..a8a2cfa83290a 100644
+--- a/drivers/clk/qcom/camcc-sdm845.c
++++ b/drivers/clk/qcom/camcc-sdm845.c
+@@ -1521,6 +1521,8 @@ static struct clk_branch cam_cc_sys_tmr_clk = {
+ },
+ };
+
++static struct gdsc titan_top_gdsc;
++
+ static struct gdsc bps_gdsc = {
+ .gdscr = 0x6004,
+ .pd = {
+@@ -1554,6 +1556,7 @@ static struct gdsc ife_0_gdsc = {
+ .name = "ife_0_gdsc",
+ },
+ .flags = POLL_CFG_GDSCR,
++ .parent = &titan_top_gdsc.pd,
+ .pwrsts = PWRSTS_OFF_ON,
+ };
+
+@@ -1563,6 +1566,7 @@ static struct gdsc ife_1_gdsc = {
+ .name = "ife_1_gdsc",
+ },
+ .flags = POLL_CFG_GDSCR,
++ .parent = &titan_top_gdsc.pd,
+ .pwrsts = PWRSTS_OFF_ON,
+ };
+
+diff --git a/drivers/clk/qcom/camcc-sm8250.c b/drivers/clk/qcom/camcc-sm8250.c
+index 439eaafdcc862..9b32c56a5bc5a 100644
+--- a/drivers/clk/qcom/camcc-sm8250.c
++++ b/drivers/clk/qcom/camcc-sm8250.c
+@@ -2205,6 +2205,8 @@ static struct clk_branch cam_cc_sleep_clk = {
+ },
+ };
+
++static struct gdsc titan_top_gdsc;
++
+ static struct gdsc bps_gdsc = {
+ .gdscr = 0x7004,
+ .pd = {
+@@ -2238,6 +2240,7 @@ static struct gdsc ife_0_gdsc = {
+ .name = "ife_0_gdsc",
+ },
+ .flags = POLL_CFG_GDSCR,
++ .parent = &titan_top_gdsc.pd,
+ .pwrsts = PWRSTS_OFF_ON,
+ };
+
+@@ -2247,6 +2250,7 @@ static struct gdsc ife_1_gdsc = {
+ .name = "ife_1_gdsc",
+ },
+ .flags = POLL_CFG_GDSCR,
++ .parent = &titan_top_gdsc.pd,
+ .pwrsts = PWRSTS_OFF_ON,
+ };
+
+@@ -2440,17 +2444,7 @@ static struct platform_driver cam_cc_sm8250_driver = {
+ },
+ };
+
+-static int __init cam_cc_sm8250_init(void)
+-{
+- return platform_driver_register(&cam_cc_sm8250_driver);
+-}
+-subsys_initcall(cam_cc_sm8250_init);
+-
+-static void __exit cam_cc_sm8250_exit(void)
+-{
+- platform_driver_unregister(&cam_cc_sm8250_driver);
+-}
+-module_exit(cam_cc_sm8250_exit);
++module_platform_driver(cam_cc_sm8250_driver);
+
+ MODULE_DESCRIPTION("QTI CAMCC SM8250 Driver");
+ MODULE_LICENSE("GPL v2");
+diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
+index eaedcceb766f9..5e44ceb730ad1 100644
+--- a/drivers/clk/qcom/clk-alpha-pll.c
++++ b/drivers/clk/qcom/clk-alpha-pll.c
+@@ -1420,7 +1420,7 @@ const struct clk_ops clk_alpha_pll_postdiv_fabia_ops = {
+ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops);
+
+ /**
+- * clk_lucid_pll_configure - configure the lucid pll
++ * clk_trion_pll_configure - configure the trion pll
+ *
+ * @pll: clk alpha pll
+ * @regmap: register map
+@@ -1429,6 +1429,15 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops);
+ void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
+ const struct alpha_pll_config *config)
+ {
++ /*
++ * If the bootloader left the PLL enabled it's likely that there are
++ * RCGs that will lock up if we disable the PLL below.
++ */
++ if (trion_pll_is_enabled(pll, regmap)) {
++ pr_debug("Trion PLL is already enabled, skipping configuration\n");
++ return;
++ }
++
+ clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l);
+ regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL);
+ clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha);
+diff --git a/drivers/clk/qcom/clk-krait.c b/drivers/clk/qcom/clk-krait.c
+index 59f1af415b580..e74fc81a14d00 100644
+--- a/drivers/clk/qcom/clk-krait.c
++++ b/drivers/clk/qcom/clk-krait.c
+@@ -32,11 +32,16 @@ static void __krait_mux_set_sel(struct krait_mux_clk *mux, int sel)
+ regval |= (sel & mux->mask) << (mux->shift + LPL_SHIFT);
+ }
+ krait_set_l2_indirect_reg(mux->offset, regval);
+- spin_unlock_irqrestore(&krait_clock_reg_lock, flags);
+
+ /* Wait for switch to complete. */
+ mb();
+ udelay(1);
++
++ /*
++ * Unlock now to make sure the mux register is not
++ * modified while switching to the new parent.
++ */
++ spin_unlock_irqrestore(&krait_clock_reg_lock, flags);
+ }
+
+ static int krait_mux_set_parent(struct clk_hw *hw, u8 index)
+@@ -93,6 +98,8 @@ static int krait_div2_set_rate(struct clk_hw *hw, unsigned long rate,
+
+ if (d->lpl)
+ mask = mask << (d->shift + LPL_SHIFT) | mask << d->shift;
++ else
++ mask <<= d->shift;
+
+ spin_lock_irqsave(&krait_clock_reg_lock, flags);
+ val = krait_get_l2_indirect_reg(d->offset);
+diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
+index e1b1b426fae4b..c3823cc32edc6 100644
+--- a/drivers/clk/qcom/clk-rcg2.c
++++ b/drivers/clk/qcom/clk-rcg2.c
+@@ -13,6 +13,7 @@
+ #include <linux/rational.h>
+ #include <linux/regmap.h>
+ #include <linux/math64.h>
++#include <linux/minmax.h>
+ #include <linux/slab.h>
+
+ #include <asm/div64.h>
+@@ -264,7 +265,7 @@ static int clk_rcg2_determine_floor_rate(struct clk_hw *hw,
+
+ static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f)
+ {
+- u32 cfg, mask;
++ u32 cfg, mask, d_val, not2d_val, n_minus_m;
+ struct clk_hw *hw = &rcg->clkr.hw;
+ int ret, index = qcom_find_src_index(hw, rcg->parent_map, f->src);
+
+@@ -283,8 +284,17 @@ static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f)
+ if (ret)
+ return ret;
+
++ /* Calculate 2d value */
++ d_val = f->n;
++
++ n_minus_m = f->n - f->m;
++ n_minus_m *= 2;
++
++ d_val = clamp_t(u32, d_val, f->m, n_minus_m);
++ not2d_val = ~d_val & mask;
++
+ ret = regmap_update_bits(rcg->clkr.regmap,
+- RCG_D_OFFSET(rcg), mask, ~f->n);
++ RCG_D_OFFSET(rcg), mask, not2d_val);
+ if (ret)
+ return ret;
+ }
+@@ -396,7 +406,7 @@ static int clk_rcg2_get_duty_cycle(struct clk_hw *hw, struct clk_duty *duty)
+ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty)
+ {
+ struct clk_rcg2 *rcg = to_clk_rcg2(hw);
+- u32 notn_m, n, m, d, not2d, mask, duty_per;
++ u32 notn_m, n, m, d, not2d, mask, duty_per, cfg;
+ int ret;
+
+ /* Duty-cycle cannot be modified for non-MND RCGs */
+@@ -407,6 +417,11 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty)
+
+ regmap_read(rcg->clkr.regmap, RCG_N_OFFSET(rcg), &notn_m);
+ regmap_read(rcg->clkr.regmap, RCG_M_OFFSET(rcg), &m);
++ regmap_read(rcg->clkr.regmap, RCG_CFG_OFFSET(rcg), &cfg);
++
++ /* Duty-cycle cannot be modified if MND divider is in bypass mode. */
++ if (!(cfg & CFG_MODE_MASK))
++ return -EINVAL;
+
+ n = (~(notn_m) + m) & mask;
+
+@@ -415,9 +430,11 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty)
+ /* Calculate 2d value */
+ d = DIV_ROUND_CLOSEST(n * duty_per * 2, 100);
+
+- /* Check bit widths of 2d. If D is too big reduce duty cycle. */
+- if (d > mask)
+- d = mask;
++ /*
++ * Check bit widths of 2d. If D is too big reduce duty cycle.
++ * Also make sure it is never zero.
++ */
++ d = clamp_val(d, 1, mask);
+
+ if ((d / 2) > (n - m))
+ d = (n - m) * 2;
+@@ -720,6 +737,7 @@ static const struct frac_entry frac_table_pixel[] = {
+ { 2, 9 },
+ { 4, 9 },
+ { 1, 1 },
++ { 2, 3 },
+ { }
+ };
+
+diff --git a/drivers/clk/qcom/clk-regmap-mux.c b/drivers/clk/qcom/clk-regmap-mux.c
+index b2d00b4519634..45d9cca28064f 100644
+--- a/drivers/clk/qcom/clk-regmap-mux.c
++++ b/drivers/clk/qcom/clk-regmap-mux.c
+@@ -28,7 +28,7 @@ static u8 mux_get_parent(struct clk_hw *hw)
+ val &= mask;
+
+ if (mux->parent_map)
+- return qcom_find_src_index(hw, mux->parent_map, val);
++ return qcom_find_cfg_index(hw, mux->parent_map, val);
+
+ return val;
+ }
+diff --git a/drivers/clk/qcom/clk-regmap-phy-mux.c b/drivers/clk/qcom/clk-regmap-phy-mux.c
+new file mode 100644
+index 0000000000000..7b7243b7107dc
+--- /dev/null
++++ b/drivers/clk/qcom/clk-regmap-phy-mux.c
+@@ -0,0 +1,62 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (c) 2022, Linaro Ltd.
++ */
++
++#include <linux/clk-provider.h>
++#include <linux/bitfield.h>
++#include <linux/regmap.h>
++#include <linux/export.h>
++
++#include "clk-regmap.h"
++#include "clk-regmap-phy-mux.h"
++
++#define PHY_MUX_MASK GENMASK(1, 0)
++#define PHY_MUX_PHY_SRC 0
++#define PHY_MUX_REF_SRC 2
++
++static inline struct clk_regmap_phy_mux *to_clk_regmap_phy_mux(struct clk_regmap *clkr)
++{
++ return container_of(clkr, struct clk_regmap_phy_mux, clkr);
++}
++
++static int phy_mux_is_enabled(struct clk_hw *hw)
++{
++ struct clk_regmap *clkr = to_clk_regmap(hw);
++ struct clk_regmap_phy_mux *phy_mux = to_clk_regmap_phy_mux(clkr);
++ unsigned int val;
++
++ regmap_read(clkr->regmap, phy_mux->reg, &val);
++ val = FIELD_GET(PHY_MUX_MASK, val);
++
++ WARN_ON(val != PHY_MUX_PHY_SRC && val != PHY_MUX_REF_SRC);
++
++ return val == PHY_MUX_PHY_SRC;
++}
++
++static int phy_mux_enable(struct clk_hw *hw)
++{
++ struct clk_regmap *clkr = to_clk_regmap(hw);
++ struct clk_regmap_phy_mux *phy_mux = to_clk_regmap_phy_mux(clkr);
++
++ return regmap_update_bits(clkr->regmap, phy_mux->reg,
++ PHY_MUX_MASK,
++ FIELD_PREP(PHY_MUX_MASK, PHY_MUX_PHY_SRC));
++}
++
++static void phy_mux_disable(struct clk_hw *hw)
++{
++ struct clk_regmap *clkr = to_clk_regmap(hw);
++ struct clk_regmap_phy_mux *phy_mux = to_clk_regmap_phy_mux(clkr);
++
++ regmap_update_bits(clkr->regmap, phy_mux->reg,
++ PHY_MUX_MASK,
++ FIELD_PREP(PHY_MUX_MASK, PHY_MUX_REF_SRC));
++}
++
++const struct clk_ops clk_regmap_phy_mux_ops = {
++ .enable = phy_mux_enable,
++ .disable = phy_mux_disable,
++ .is_enabled = phy_mux_is_enabled,
++};
++EXPORT_SYMBOL_GPL(clk_regmap_phy_mux_ops);
+diff --git a/drivers/clk/qcom/clk-regmap-phy-mux.h b/drivers/clk/qcom/clk-regmap-phy-mux.h
+new file mode 100644
+index 0000000000000..614dd384695ca
+--- /dev/null
++++ b/drivers/clk/qcom/clk-regmap-phy-mux.h
+@@ -0,0 +1,33 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (c) 2022, Linaro Ltd.
++ */
++
++#ifndef __QCOM_CLK_REGMAP_PHY_MUX_H__
++#define __QCOM_CLK_REGMAP_PHY_MUX_H__
++
++#include "clk-regmap.h"
++
++/*
++ * A clock implementation for PHY pipe and symbols clock muxes.
++ *
++ * If the clock is running off the from-PHY source, report it as enabled.
++ * Report it as disabled otherwise (if it uses reference source).
++ *
++ * This way the PHY will disable the pipe clock before turning off the GDSC,
++ * which in turn would lead to disabling corresponding pipe_clk_src (and thus
++ * it being parked to a safe, reference clock source). And vice versa, after
++ * enabling the GDSC the PHY will enable the pipe clock, which would cause
++ * pipe_clk_src to be switched from a safe source to the working one.
++ *
++ * For some platforms this should be used for the UFS symbol_clk_src clocks
++ * too.
++ */
++struct clk_regmap_phy_mux {
++ u32 reg;
++ struct clk_regmap clkr;
++};
++
++extern const struct clk_ops clk_regmap_phy_mux_ops;
++
++#endif
+diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
+index 60d2a78d13950..2af04fc4abfa9 100644
+--- a/drivers/clk/qcom/common.c
++++ b/drivers/clk/qcom/common.c
+@@ -69,6 +69,18 @@ int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src)
+ }
+ EXPORT_SYMBOL_GPL(qcom_find_src_index);
+
++int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, u8 cfg)
++{
++ int i, num_parents = clk_hw_get_num_parents(hw);
++
++ for (i = 0; i < num_parents; i++)
++ if (cfg == map[i].cfg)
++ return i;
++
++ return -ENOENT;
++}
++EXPORT_SYMBOL_GPL(qcom_find_cfg_index);
++
+ struct regmap *
+ qcom_cc_map(struct platform_device *pdev, const struct qcom_cc_desc *desc)
+ {
+diff --git a/drivers/clk/qcom/common.h b/drivers/clk/qcom/common.h
+index bb39a7e106d8a..9c8f7b798d9fc 100644
+--- a/drivers/clk/qcom/common.h
++++ b/drivers/clk/qcom/common.h
+@@ -49,6 +49,8 @@ extern void
+ qcom_pll_set_fsm_mode(struct regmap *m, u32 reg, u8 bias_count, u8 lock_count);
+ extern int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map,
+ u8 src);
++extern int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map,
++ u8 cfg);
+
+ extern int qcom_cc_register_board_clk(struct device *dev, const char *path,
+ const char *name, unsigned long rate);
+diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c
+index 538e4963c9152..5d2ae297e7413 100644
+--- a/drivers/clk/qcom/dispcc-sc7180.c
++++ b/drivers/clk/qcom/dispcc-sc7180.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+- * Copyright (c) 2019, The Linux Foundation. All rights reserved.
++ * Copyright (c) 2019, 2022, The Linux Foundation. All rights reserved.
+ */
+
+ #include <linux/clk-provider.h>
+@@ -625,6 +625,9 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
+
+ static struct gdsc mdss_gdsc = {
+ .gdscr = 0x3000,
++ .en_rest_wait_val = 0x2,
++ .en_few_wait_val = 0x2,
++ .clk_dis_wait_val = 0xf,
+ .pd = {
+ .name = "mdss_gdsc",
+ },
+diff --git a/drivers/clk/qcom/dispcc-sc7280.c b/drivers/clk/qcom/dispcc-sc7280.c
+index 4ef4ae231794b..ad596d567f6ab 100644
+--- a/drivers/clk/qcom/dispcc-sc7280.c
++++ b/drivers/clk/qcom/dispcc-sc7280.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+- * Copyright (c) 2021, The Linux Foundation. All rights reserved.
++ * Copyright (c) 2021-2022, The Linux Foundation. All rights reserved.
+ */
+
+ #include <linux/clk-provider.h>
+@@ -787,6 +787,9 @@ static struct clk_branch disp_cc_sleep_clk = {
+
+ static struct gdsc disp_cc_mdss_core_gdsc = {
+ .gdscr = 0x1004,
++ .en_rest_wait_val = 0x2,
++ .en_few_wait_val = 0x2,
++ .clk_dis_wait_val = 0xf,
+ .pd = {
+ .name = "disp_cc_mdss_core_gdsc",
+ },
+diff --git a/drivers/clk/qcom/dispcc-sm8250.c b/drivers/clk/qcom/dispcc-sm8250.c
+index bf9ffe1a1cf47..73c5feea9818b 100644
+--- a/drivers/clk/qcom/dispcc-sm8250.c
++++ b/drivers/clk/qcom/dispcc-sm8250.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /*
+- * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
++ * Copyright (c) 2018-2020, 2022, The Linux Foundation. All rights reserved.
+ */
+
+ #include <linux/clk-provider.h>
+@@ -1125,6 +1125,9 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
+
+ static struct gdsc mdss_gdsc = {
+ .gdscr = 0x3000,
++ .en_rest_wait_val = 0x2,
++ .en_few_wait_val = 0x2,
++ .clk_dis_wait_val = 0xf,
+ .pd = {
+ .name = "mdss_gdsc",
+ },
+diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c
+index 3f9c2f61a5d93..cde62a11f5736 100644
+--- a/drivers/clk/qcom/gcc-ipq6018.c
++++ b/drivers/clk/qcom/gcc-ipq6018.c
+@@ -1654,7 +1654,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = {
+ .name = "sdcc1_apps_clk_src",
+ .parent_data = gcc_xo_gpll0_gpll2_gpll0_out_main_div2,
+ .num_parents = 4,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_floor_ops,
+ },
+ };
+
+@@ -4517,24 +4517,24 @@ static const struct qcom_reset_map gcc_ipq6018_resets[] = {
+ [GCC_PCIE0_AHB_ARES] = { 0x75040, 5 },
+ [GCC_PCIE0_AXI_MASTER_STICKY_ARES] = { 0x75040, 6 },
+ [GCC_PCIE0_AXI_SLAVE_STICKY_ARES] = { 0x75040, 7 },
+- [GCC_PPE_FULL_RESET] = { 0x68014, 0 },
+- [GCC_UNIPHY0_SOFT_RESET] = { 0x56004, 0 },
++ [GCC_PPE_FULL_RESET] = { .reg = 0x68014, .bitmask = 0xf0000 },
++ [GCC_UNIPHY0_SOFT_RESET] = { .reg = 0x56004, .bitmask = 0x3ff2 },
+ [GCC_UNIPHY0_XPCS_RESET] = { 0x56004, 2 },
+- [GCC_UNIPHY1_SOFT_RESET] = { 0x56104, 0 },
++ [GCC_UNIPHY1_SOFT_RESET] = { .reg = 0x56104, .bitmask = 0x32 },
+ [GCC_UNIPHY1_XPCS_RESET] = { 0x56104, 2 },
+- [GCC_EDMA_HW_RESET] = { 0x68014, 0 },
+- [GCC_NSSPORT1_RESET] = { 0x68014, 0 },
+- [GCC_NSSPORT2_RESET] = { 0x68014, 0 },
+- [GCC_NSSPORT3_RESET] = { 0x68014, 0 },
+- [GCC_NSSPORT4_RESET] = { 0x68014, 0 },
+- [GCC_NSSPORT5_RESET] = { 0x68014, 0 },
+- [GCC_UNIPHY0_PORT1_ARES] = { 0x56004, 0 },
+- [GCC_UNIPHY0_PORT2_ARES] = { 0x56004, 0 },
+- [GCC_UNIPHY0_PORT3_ARES] = { 0x56004, 0 },
+- [GCC_UNIPHY0_PORT4_ARES] = { 0x56004, 0 },
+- [GCC_UNIPHY0_PORT5_ARES] = { 0x56004, 0 },
+- [GCC_UNIPHY0_PORT_4_5_RESET] = { 0x56004, 0 },
+- [GCC_UNIPHY0_PORT_4_RESET] = { 0x56004, 0 },
++ [GCC_EDMA_HW_RESET] = { .reg = 0x68014, .bitmask = 0x300000 },
++ [GCC_NSSPORT1_RESET] = { .reg = 0x68014, .bitmask = 0x1000003 },
++ [GCC_NSSPORT2_RESET] = { .reg = 0x68014, .bitmask = 0x200000c },
++ [GCC_NSSPORT3_RESET] = { .reg = 0x68014, .bitmask = 0x4000030 },
++ [GCC_NSSPORT4_RESET] = { .reg = 0x68014, .bitmask = 0x8000300 },
++ [GCC_NSSPORT5_RESET] = { .reg = 0x68014, .bitmask = 0x10000c00 },
++ [GCC_UNIPHY0_PORT1_ARES] = { .reg = 0x56004, .bitmask = 0x30 },
++ [GCC_UNIPHY0_PORT2_ARES] = { .reg = 0x56004, .bitmask = 0xc0 },
++ [GCC_UNIPHY0_PORT3_ARES] = { .reg = 0x56004, .bitmask = 0x300 },
++ [GCC_UNIPHY0_PORT4_ARES] = { .reg = 0x56004, .bitmask = 0xc00 },
++ [GCC_UNIPHY0_PORT5_ARES] = { .reg = 0x56004, .bitmask = 0x3000 },
++ [GCC_UNIPHY0_PORT_4_5_RESET] = { .reg = 0x56004, .bitmask = 0x3c02 },
++ [GCC_UNIPHY0_PORT_4_RESET] = { .reg = 0x56004, .bitmask = 0xc02 },
+ [GCC_LPASS_BCR] = {0x1F000, 0},
+ [GCC_UBI32_TBU_BCR] = {0x65000, 0},
+ [GCC_LPASS_TBU_BCR] = {0x6C000, 0},
+diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c
+index 108fe27bee10f..d6d5defb82c9f 100644
+--- a/drivers/clk/qcom/gcc-ipq8074.c
++++ b/drivers/clk/qcom/gcc-ipq8074.c
+@@ -60,11 +60,6 @@ static const struct parent_map gcc_xo_gpll0_gpll0_out_main_div2_map[] = {
+ { P_GPLL0_DIV2, 4 },
+ };
+
+-static const char * const gcc_xo_gpll0[] = {
+- "xo",
+- "gpll0",
+-};
+-
+ static const struct parent_map gcc_xo_gpll0_map[] = {
+ { P_XO, 0 },
+ { P_GPLL0, 1 },
+@@ -667,6 +662,7 @@ static struct clk_branch gcc_sleep_clk_src = {
+ },
+ .num_parents = 1,
+ .ops = &clk_branch2_ops,
++ .flags = CLK_IS_CRITICAL,
+ },
+ },
+ };
+@@ -956,6 +952,11 @@ static struct clk_rcg2 blsp1_uart6_apps_clk_src = {
+ },
+ };
+
++static const struct clk_parent_data gcc_xo_gpll0[] = {
++ { .fw_name = "xo" },
++ { .hw = &gpll0.clkr.hw },
++};
++
+ static const struct freq_tbl ftbl_pcie_axi_clk_src[] = {
+ F(19200000, P_XO, 1, 0, 0),
+ F(200000000, P_GPLL0, 4, 0, 0),
+@@ -969,7 +970,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = {
+ .parent_map = gcc_xo_gpll0_map,
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "pcie0_axi_clk_src",
+- .parent_names = gcc_xo_gpll0,
++ .parent_data = gcc_xo_gpll0,
+ .num_parents = 2,
+ .ops = &clk_rcg2_ops,
+ },
+@@ -1016,7 +1017,7 @@ static struct clk_rcg2 pcie1_axi_clk_src = {
+ .parent_map = gcc_xo_gpll0_map,
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "pcie1_axi_clk_src",
+- .parent_names = gcc_xo_gpll0,
++ .parent_data = gcc_xo_gpll0,
+ .num_parents = 2,
+ .ops = &clk_rcg2_ops,
+ },
+@@ -1074,7 +1075,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = {
+ .name = "sdcc1_apps_clk_src",
+ .parent_names = gcc_xo_gpll0_gpll2_gpll0_out_main_div2,
+ .num_parents = 4,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_floor_ops,
+ },
+ };
+
+@@ -1330,7 +1331,7 @@ static struct clk_rcg2 nss_ce_clk_src = {
+ .parent_map = gcc_xo_gpll0_map,
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "nss_ce_clk_src",
+- .parent_names = gcc_xo_gpll0,
++ .parent_data = gcc_xo_gpll0,
+ .num_parents = 2,
+ .ops = &clk_rcg2_ops,
+ },
+@@ -1788,8 +1789,10 @@ static struct clk_regmap_div nss_port4_tx_div_clk_src = {
+ static const struct freq_tbl ftbl_nss_port5_rx_clk_src[] = {
+ F(19200000, P_XO, 1, 0, 0),
+ F(25000000, P_UNIPHY1_RX, 12.5, 0, 0),
++ F(25000000, P_UNIPHY0_RX, 5, 0, 0),
+ F(78125000, P_UNIPHY1_RX, 4, 0, 0),
+ F(125000000, P_UNIPHY1_RX, 2.5, 0, 0),
++ F(125000000, P_UNIPHY0_RX, 1, 0, 0),
+ F(156250000, P_UNIPHY1_RX, 2, 0, 0),
+ F(312500000, P_UNIPHY1_RX, 1, 0, 0),
+ { }
+@@ -1828,8 +1831,10 @@ static struct clk_regmap_div nss_port5_rx_div_clk_src = {
+ static const struct freq_tbl ftbl_nss_port5_tx_clk_src[] = {
+ F(19200000, P_XO, 1, 0, 0),
+ F(25000000, P_UNIPHY1_TX, 12.5, 0, 0),
++ F(25000000, P_UNIPHY0_TX, 5, 0, 0),
+ F(78125000, P_UNIPHY1_TX, 4, 0, 0),
+ F(125000000, P_UNIPHY1_TX, 2.5, 0, 0),
++ F(125000000, P_UNIPHY0_TX, 1, 0, 0),
+ F(156250000, P_UNIPHY1_TX, 2, 0, 0),
+ F(312500000, P_UNIPHY1_TX, 1, 0, 0),
+ { }
+@@ -1867,8 +1872,10 @@ static struct clk_regmap_div nss_port5_tx_div_clk_src = {
+
+ static const struct freq_tbl ftbl_nss_port6_rx_clk_src[] = {
+ F(19200000, P_XO, 1, 0, 0),
++ F(25000000, P_UNIPHY2_RX, 5, 0, 0),
+ F(25000000, P_UNIPHY2_RX, 12.5, 0, 0),
+ F(78125000, P_UNIPHY2_RX, 4, 0, 0),
++ F(125000000, P_UNIPHY2_RX, 1, 0, 0),
+ F(125000000, P_UNIPHY2_RX, 2.5, 0, 0),
+ F(156250000, P_UNIPHY2_RX, 2, 0, 0),
+ F(312500000, P_UNIPHY2_RX, 1, 0, 0),
+@@ -1907,8 +1914,10 @@ static struct clk_regmap_div nss_port6_rx_div_clk_src = {
+
+ static const struct freq_tbl ftbl_nss_port6_tx_clk_src[] = {
+ F(19200000, P_XO, 1, 0, 0),
++ F(25000000, P_UNIPHY2_TX, 5, 0, 0),
+ F(25000000, P_UNIPHY2_TX, 12.5, 0, 0),
+ F(78125000, P_UNIPHY2_TX, 4, 0, 0),
++ F(125000000, P_UNIPHY2_TX, 1, 0, 0),
+ F(125000000, P_UNIPHY2_TX, 2.5, 0, 0),
+ F(156250000, P_UNIPHY2_TX, 2, 0, 0),
+ F(312500000, P_UNIPHY2_TX, 1, 0, 0),
+@@ -3346,6 +3355,7 @@ static struct clk_branch gcc_nssnoc_ubi1_ahb_clk = {
+
+ static struct clk_branch gcc_ubi0_ahb_clk = {
+ .halt_reg = 0x6820c,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x6820c,
+ .enable_mask = BIT(0),
+@@ -3363,6 +3373,7 @@ static struct clk_branch gcc_ubi0_ahb_clk = {
+
+ static struct clk_branch gcc_ubi0_axi_clk = {
+ .halt_reg = 0x68200,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68200,
+ .enable_mask = BIT(0),
+@@ -3380,6 +3391,7 @@ static struct clk_branch gcc_ubi0_axi_clk = {
+
+ static struct clk_branch gcc_ubi0_nc_axi_clk = {
+ .halt_reg = 0x68204,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68204,
+ .enable_mask = BIT(0),
+@@ -3397,6 +3409,7 @@ static struct clk_branch gcc_ubi0_nc_axi_clk = {
+
+ static struct clk_branch gcc_ubi0_core_clk = {
+ .halt_reg = 0x68210,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68210,
+ .enable_mask = BIT(0),
+@@ -3414,6 +3427,7 @@ static struct clk_branch gcc_ubi0_core_clk = {
+
+ static struct clk_branch gcc_ubi0_mpt_clk = {
+ .halt_reg = 0x68208,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68208,
+ .enable_mask = BIT(0),
+@@ -3431,6 +3445,7 @@ static struct clk_branch gcc_ubi0_mpt_clk = {
+
+ static struct clk_branch gcc_ubi1_ahb_clk = {
+ .halt_reg = 0x6822c,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x6822c,
+ .enable_mask = BIT(0),
+@@ -3448,6 +3463,7 @@ static struct clk_branch gcc_ubi1_ahb_clk = {
+
+ static struct clk_branch gcc_ubi1_axi_clk = {
+ .halt_reg = 0x68220,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68220,
+ .enable_mask = BIT(0),
+@@ -3465,6 +3481,7 @@ static struct clk_branch gcc_ubi1_axi_clk = {
+
+ static struct clk_branch gcc_ubi1_nc_axi_clk = {
+ .halt_reg = 0x68224,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68224,
+ .enable_mask = BIT(0),
+@@ -3482,6 +3499,7 @@ static struct clk_branch gcc_ubi1_nc_axi_clk = {
+
+ static struct clk_branch gcc_ubi1_core_clk = {
+ .halt_reg = 0x68230,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68230,
+ .enable_mask = BIT(0),
+@@ -3499,6 +3517,7 @@ static struct clk_branch gcc_ubi1_core_clk = {
+
+ static struct clk_branch gcc_ubi1_mpt_clk = {
+ .halt_reg = 0x68228,
++ .halt_check = BRANCH_HALT_DELAY,
+ .clkr = {
+ .enable_reg = 0x68228,
+ .enable_mask = BIT(0),
+@@ -4329,8 +4348,7 @@ static struct clk_rcg2 pcie0_rchng_clk_src = {
+ .parent_map = gcc_xo_gpll0_map,
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "pcie0_rchng_clk_src",
+- .parent_hws = (const struct clk_hw *[]) {
+- &gpll0.clkr.hw },
++ .parent_data = gcc_xo_gpll0,
+ .num_parents = 2,
+ .ops = &clk_rcg2_ops,
+ },
+@@ -4372,6 +4390,33 @@ static struct clk_branch gcc_pcie0_axi_s_bridge_clk = {
+ },
+ };
+
++static const struct alpha_pll_config ubi32_pll_config = {
++ .l = 0x4e,
++ .config_ctl_val = 0x200d4aa8,
++ .config_ctl_hi_val = 0x3c2,
++ .main_output_mask = BIT(0),
++ .aux_output_mask = BIT(1),
++ .pre_div_val = 0x0,
++ .pre_div_mask = BIT(12),
++ .post_div_val = 0x0,
++ .post_div_mask = GENMASK(9, 8),
++};
++
++static const struct alpha_pll_config nss_crypto_pll_config = {
++ .l = 0x3e,
++ .alpha = 0x0,
++ .alpha_hi = 0x80,
++ .config_ctl_val = 0x4001055b,
++ .main_output_mask = BIT(0),
++ .pre_div_val = 0x0,
++ .pre_div_mask = GENMASK(14, 12),
++ .post_div_val = 0x1 << 8,
++ .post_div_mask = GENMASK(11, 8),
++ .vco_mask = GENMASK(21, 20),
++ .vco_val = 0x0,
++ .alpha_en_mask = BIT(24),
++};
++
+ static struct clk_hw *gcc_ipq8074_hws[] = {
+ &gpll0_out_main_div2.hw,
+ &gpll6_out_main_div2.hw,
+@@ -4773,7 +4818,20 @@ static const struct qcom_cc_desc gcc_ipq8074_desc = {
+
+ static int gcc_ipq8074_probe(struct platform_device *pdev)
+ {
+- return qcom_cc_probe(pdev, &gcc_ipq8074_desc);
++ struct regmap *regmap;
++
++ regmap = qcom_cc_map(pdev, &gcc_ipq8074_desc);
++ if (IS_ERR(regmap))
++ return PTR_ERR(regmap);
++
++ /* SW Workaround for UBI32 Huayra PLL */
++ regmap_update_bits(regmap, 0x2501c, BIT(26), BIT(26));
++
++ clk_alpha_pll_configure(&ubi32_pll_main, regmap, &ubi32_pll_config);
++ clk_alpha_pll_configure(&nss_crypto_pll_main, regmap,
++ &nss_crypto_pll_config);
++
++ return qcom_cc_really_probe(pdev, &gcc_ipq8074_desc, regmap);
+ }
+
+ static struct platform_driver gcc_ipq8074_driver = {
+diff --git a/drivers/clk/qcom/gcc-mdm9615.c b/drivers/clk/qcom/gcc-mdm9615.c
+index 8bed02a748aba..470a277603a92 100644
+--- a/drivers/clk/qcom/gcc-mdm9615.c
++++ b/drivers/clk/qcom/gcc-mdm9615.c
+@@ -58,7 +58,7 @@ static struct clk_regmap pll0_vote = {
+ .enable_mask = BIT(0),
+ .hw.init = &(struct clk_init_data){
+ .name = "pll0_vote",
+- .parent_names = (const char *[]){ "pll8" },
++ .parent_names = (const char *[]){ "pll0" },
+ .num_parents = 1,
+ .ops = &clk_pll_vote_ops,
+ },
+diff --git a/drivers/clk/qcom/gcc-msm8939.c b/drivers/clk/qcom/gcc-msm8939.c
+index 39ebb443ae3d5..de0022e5450de 100644
+--- a/drivers/clk/qcom/gcc-msm8939.c
++++ b/drivers/clk/qcom/gcc-msm8939.c
+@@ -632,7 +632,7 @@ static struct clk_rcg2 system_noc_bfdcd_clk_src = {
+ };
+
+ static struct clk_rcg2 bimc_ddr_clk_src = {
+- .cmd_rcgr = 0x32004,
++ .cmd_rcgr = 0x32024,
+ .hid_width = 5,
+ .parent_map = gcc_xo_gpll0_bimc_map,
+ .clkr.hw.init = &(struct clk_init_data){
+@@ -644,6 +644,18 @@ static struct clk_rcg2 bimc_ddr_clk_src = {
+ },
+ };
+
++static struct clk_rcg2 system_mm_noc_bfdcd_clk_src = {
++ .cmd_rcgr = 0x2600c,
++ .hid_width = 5,
++ .parent_map = gcc_xo_gpll0_gpll6a_map,
++ .clkr.hw.init = &(struct clk_init_data){
++ .name = "system_mm_noc_bfdcd_clk_src",
++ .parent_data = gcc_xo_gpll0_gpll6a_parent_data,
++ .num_parents = 3,
++ .ops = &clk_rcg2_ops,
++ },
++};
++
+ static const struct freq_tbl ftbl_gcc_camss_ahb_clk[] = {
+ F(40000000, P_GPLL0, 10, 1, 2),
+ F(80000000, P_GPLL0, 10, 0, 0),
+@@ -1002,7 +1014,7 @@ static struct clk_rcg2 blsp1_uart2_apps_clk_src = {
+ };
+
+ static const struct freq_tbl ftbl_gcc_camss_cci_clk[] = {
+- F(19200000, P_XO, 1, 0, 0),
++ F(19200000, P_XO, 1, 0, 0),
+ { }
+ };
+
+@@ -2441,7 +2453,7 @@ static struct clk_branch gcc_camss_jpeg_axi_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_camss_jpeg_axi_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -2645,7 +2657,7 @@ static struct clk_branch gcc_camss_vfe_axi_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_camss_vfe_axi_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -2801,7 +2813,7 @@ static struct clk_branch gcc_mdss_axi_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_mdss_axi_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -3193,7 +3205,7 @@ static struct clk_branch gcc_mdp_tbu_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_mdp_tbu_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -3211,7 +3223,7 @@ static struct clk_branch gcc_venus_tbu_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_venus_tbu_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -3229,7 +3241,7 @@ static struct clk_branch gcc_vfe_tbu_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_vfe_tbu_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -3247,7 +3259,7 @@ static struct clk_branch gcc_jpeg_tbu_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_jpeg_tbu_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -3484,7 +3496,7 @@ static struct clk_branch gcc_venus0_axi_clk = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_venus0_axi_clk",
+ .parent_data = &(const struct clk_parent_data){
+- .hw = &system_noc_bfdcd_clk_src.clkr.hw,
++ .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw,
+ },
+ .num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
+@@ -3623,6 +3635,7 @@ static struct clk_regmap *gcc_msm8939_clocks[] = {
+ [GPLL2_VOTE] = &gpll2_vote,
+ [PCNOC_BFDCD_CLK_SRC] = &pcnoc_bfdcd_clk_src.clkr,
+ [SYSTEM_NOC_BFDCD_CLK_SRC] = &system_noc_bfdcd_clk_src.clkr,
++ [SYSTEM_MM_NOC_BFDCD_CLK_SRC] = &system_mm_noc_bfdcd_clk_src.clkr,
+ [CAMSS_AHB_CLK_SRC] = &camss_ahb_clk_src.clkr,
+ [APSS_AHB_CLK_SRC] = &apss_ahb_clk_src.clkr,
+ [CSI0_CLK_SRC] = &csi0_clk_src.clkr,
+diff --git a/drivers/clk/qcom/gcc-msm8994.c b/drivers/clk/qcom/gcc-msm8994.c
+index 144d2ba7a9bef..463a444c8a7e4 100644
+--- a/drivers/clk/qcom/gcc-msm8994.c
++++ b/drivers/clk/qcom/gcc-msm8994.c
+@@ -108,6 +108,7 @@ static struct clk_alpha_pll gpll4_early = {
+
+ static struct clk_alpha_pll_postdiv gpll4 = {
+ .offset = 0x1dc0,
++ .width = 4,
+ .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data)
+ {
+diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c
+index 3c3a7ff045621..9b1674b28d45d 100644
+--- a/drivers/clk/qcom/gcc-msm8996.c
++++ b/drivers/clk/qcom/gcc-msm8996.c
+@@ -2937,20 +2937,6 @@ static struct clk_branch gcc_smmu_aggre0_ahb_clk = {
+ },
+ };
+
+-static struct clk_branch gcc_aggre1_pnoc_ahb_clk = {
+- .halt_reg = 0x82014,
+- .clkr = {
+- .enable_reg = 0x82014,
+- .enable_mask = BIT(0),
+- .hw.init = &(struct clk_init_data){
+- .name = "gcc_aggre1_pnoc_ahb_clk",
+- .parent_names = (const char *[]){ "periph_noc_clk_src" },
+- .num_parents = 1,
+- .ops = &clk_branch2_ops,
+- },
+- },
+-};
+-
+ static struct clk_branch gcc_aggre2_ufs_axi_clk = {
+ .halt_reg = 0x83014,
+ .clkr = {
+@@ -3474,7 +3460,6 @@ static struct clk_regmap *gcc_msm8996_clocks[] = {
+ [GCC_AGGRE0_CNOC_AHB_CLK] = &gcc_aggre0_cnoc_ahb_clk.clkr,
+ [GCC_SMMU_AGGRE0_AXI_CLK] = &gcc_smmu_aggre0_axi_clk.clkr,
+ [GCC_SMMU_AGGRE0_AHB_CLK] = &gcc_smmu_aggre0_ahb_clk.clkr,
+- [GCC_AGGRE1_PNOC_AHB_CLK] = &gcc_aggre1_pnoc_ahb_clk.clkr,
+ [GCC_AGGRE2_UFS_AXI_CLK] = &gcc_aggre2_ufs_axi_clk.clkr,
+ [GCC_AGGRE2_USB3_AXI_CLK] = &gcc_aggre2_usb3_axi_clk.clkr,
+ [GCC_QSPI_AHB_CLK] = &gcc_qspi_ahb_clk.clkr,
+diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c
+index c2ea09945c472..a38394b4739a2 100644
+--- a/drivers/clk/qcom/gcc-sc7180.c
++++ b/drivers/clk/qcom/gcc-sc7180.c
+@@ -667,6 +667,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
+ .name = "gcc_sdcc2_apps_clk_src",
+ .parent_data = gcc_parent_data_5,
+ .num_parents = ARRAY_SIZE(gcc_parent_data_5),
++ .flags = CLK_OPS_PARENT_ENABLE,
+ .ops = &clk_rcg2_floor_ops,
+ },
+ };
+diff --git a/drivers/clk/qcom/gcc-sc7280.c b/drivers/clk/qcom/gcc-sc7280.c
+index 6cefcdc869905..d10efbf260b7a 100644
+--- a/drivers/clk/qcom/gcc-sc7280.c
++++ b/drivers/clk/qcom/gcc-sc7280.c
+@@ -2998,7 +2998,7 @@ static struct clk_branch gcc_cfg_noc_lpass_clk = {
+ .enable_mask = BIT(0),
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_cfg_noc_lpass_clk",
+- .ops = &clk_branch2_ops,
++ .ops = &clk_branch2_aon_ops,
+ },
+ },
+ };
+@@ -3571,6 +3571,7 @@ static int gcc_sc7280_probe(struct platform_device *pdev)
+ regmap_update_bits(regmap, 0x28004, BIT(0), BIT(0));
+ regmap_update_bits(regmap, 0x28014, BIT(0), BIT(0));
+ regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
++ regmap_update_bits(regmap, 0x7100C, BIT(13), BIT(13));
+
+ ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
+ ARRAY_SIZE(gcc_dfs_clocks));
+diff --git a/drivers/clk/qcom/gcc-sm6115.c b/drivers/clk/qcom/gcc-sm6115.c
+index 68fe9f6f0d2f3..1c3be4e07d5bc 100644
+--- a/drivers/clk/qcom/gcc-sm6115.c
++++ b/drivers/clk/qcom/gcc-sm6115.c
+@@ -53,11 +53,25 @@ static struct pll_vco gpll10_vco[] = {
+ { 750000000, 1500000000, 1 },
+ };
+
++static const u8 clk_alpha_pll_regs_offset[][PLL_OFF_MAX_REGS] = {
++ [CLK_ALPHA_PLL_TYPE_DEFAULT] = {
++ [PLL_OFF_L_VAL] = 0x04,
++ [PLL_OFF_ALPHA_VAL] = 0x08,
++ [PLL_OFF_ALPHA_VAL_U] = 0x0c,
++ [PLL_OFF_TEST_CTL] = 0x10,
++ [PLL_OFF_TEST_CTL_U] = 0x14,
++ [PLL_OFF_USER_CTL] = 0x18,
++ [PLL_OFF_USER_CTL_U] = 0x1c,
++ [PLL_OFF_CONFIG_CTL] = 0x20,
++ [PLL_OFF_STATUS] = 0x24,
++ },
++};
++
+ static struct clk_alpha_pll gpll0 = {
+ .offset = 0x0,
+ .vco_table = default_vco,
+ .num_vco = ARRAY_SIZE(default_vco),
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr = {
+ .enable_reg = 0x79000,
+ .enable_mask = BIT(0),
+@@ -83,7 +97,7 @@ static struct clk_alpha_pll_postdiv gpll0_out_aux2 = {
+ .post_div_table = post_div_table_gpll0_out_aux2,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll0_out_aux2),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll0_out_aux2",
+ .parent_hws = (const struct clk_hw *[]){ &gpll0.clkr.hw },
+@@ -115,7 +129,7 @@ static struct clk_alpha_pll_postdiv gpll0_out_main = {
+ .post_div_table = post_div_table_gpll0_out_main,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll0_out_main),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll0_out_main",
+ .parent_hws = (const struct clk_hw *[]){ &gpll0.clkr.hw },
+@@ -137,7 +151,7 @@ static struct clk_alpha_pll gpll10 = {
+ .offset = 0xa000,
+ .vco_table = gpll10_vco,
+ .num_vco = ARRAY_SIZE(gpll10_vco),
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr = {
+ .enable_reg = 0x79000,
+ .enable_mask = BIT(10),
+@@ -163,7 +177,7 @@ static struct clk_alpha_pll_postdiv gpll10_out_main = {
+ .post_div_table = post_div_table_gpll10_out_main,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll10_out_main),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll10_out_main",
+ .parent_hws = (const struct clk_hw *[]){ &gpll10.clkr.hw },
+@@ -189,7 +203,7 @@ static struct clk_alpha_pll gpll11 = {
+ .vco_table = default_vco,
+ .num_vco = ARRAY_SIZE(default_vco),
+ .flags = SUPPORTS_DYNAMIC_UPDATE,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr = {
+ .enable_reg = 0x79000,
+ .enable_mask = BIT(11),
+@@ -215,7 +229,7 @@ static struct clk_alpha_pll_postdiv gpll11_out_main = {
+ .post_div_table = post_div_table_gpll11_out_main,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll11_out_main),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll11_out_main",
+ .parent_hws = (const struct clk_hw *[]){ &gpll11.clkr.hw },
+@@ -229,7 +243,7 @@ static struct clk_alpha_pll gpll3 = {
+ .offset = 0x3000,
+ .vco_table = default_vco,
+ .num_vco = ARRAY_SIZE(default_vco),
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr = {
+ .enable_reg = 0x79000,
+ .enable_mask = BIT(3),
+@@ -248,7 +262,7 @@ static struct clk_alpha_pll gpll4 = {
+ .offset = 0x4000,
+ .vco_table = default_vco,
+ .num_vco = ARRAY_SIZE(default_vco),
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr = {
+ .enable_reg = 0x79000,
+ .enable_mask = BIT(4),
+@@ -274,7 +288,7 @@ static struct clk_alpha_pll_postdiv gpll4_out_main = {
+ .post_div_table = post_div_table_gpll4_out_main,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll4_out_main),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll4_out_main",
+ .parent_hws = (const struct clk_hw *[]){ &gpll4.clkr.hw },
+@@ -287,7 +301,7 @@ static struct clk_alpha_pll gpll6 = {
+ .offset = 0x6000,
+ .vco_table = default_vco,
+ .num_vco = ARRAY_SIZE(default_vco),
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr = {
+ .enable_reg = 0x79000,
+ .enable_mask = BIT(6),
+@@ -313,7 +327,7 @@ static struct clk_alpha_pll_postdiv gpll6_out_main = {
+ .post_div_table = post_div_table_gpll6_out_main,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll6_out_main),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll6_out_main",
+ .parent_hws = (const struct clk_hw *[]){ &gpll6.clkr.hw },
+@@ -326,7 +340,7 @@ static struct clk_alpha_pll gpll7 = {
+ .offset = 0x7000,
+ .vco_table = default_vco,
+ .num_vco = ARRAY_SIZE(default_vco),
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr = {
+ .enable_reg = 0x79000,
+ .enable_mask = BIT(7),
+@@ -352,7 +366,7 @@ static struct clk_alpha_pll_postdiv gpll7_out_main = {
+ .post_div_table = post_div_table_gpll7_out_main,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll7_out_main),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll7_out_main",
+ .parent_hws = (const struct clk_hw *[]){ &gpll7.clkr.hw },
+@@ -380,7 +394,7 @@ static struct clk_alpha_pll gpll8 = {
+ .offset = 0x8000,
+ .vco_table = default_vco,
+ .num_vco = ARRAY_SIZE(default_vco),
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .flags = SUPPORTS_DYNAMIC_UPDATE,
+ .clkr = {
+ .enable_reg = 0x79000,
+@@ -407,7 +421,7 @@ static struct clk_alpha_pll_postdiv gpll8_out_main = {
+ .post_div_table = post_div_table_gpll8_out_main,
+ .num_post_div = ARRAY_SIZE(post_div_table_gpll8_out_main),
+ .width = 4,
+- .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT],
++ .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_DEFAULT],
+ .clkr.hw.init = &(struct clk_init_data){
+ .name = "gpll8_out_main",
+ .parent_hws = (const struct clk_hw *[]){ &gpll8.clkr.hw },
+@@ -706,7 +720,7 @@ static struct clk_rcg2 gcc_camss_axi_clk_src = {
+ .parent_data = gcc_parents_7,
+ .num_parents = ARRAY_SIZE(gcc_parents_7),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -727,7 +741,7 @@ static struct clk_rcg2 gcc_camss_cci_clk_src = {
+ .parent_data = gcc_parents_9,
+ .num_parents = ARRAY_SIZE(gcc_parents_9),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -750,7 +764,7 @@ static struct clk_rcg2 gcc_camss_csi0phytimer_clk_src = {
+ .parent_data = gcc_parents_4,
+ .num_parents = ARRAY_SIZE(gcc_parents_4),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -765,7 +779,7 @@ static struct clk_rcg2 gcc_camss_csi1phytimer_clk_src = {
+ .parent_data = gcc_parents_4,
+ .num_parents = ARRAY_SIZE(gcc_parents_4),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -780,7 +794,7 @@ static struct clk_rcg2 gcc_camss_csi2phytimer_clk_src = {
+ .parent_data = gcc_parents_4,
+ .num_parents = ARRAY_SIZE(gcc_parents_4),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -802,7 +816,7 @@ static struct clk_rcg2 gcc_camss_mclk0_clk_src = {
+ .parent_data = gcc_parents_3,
+ .num_parents = ARRAY_SIZE(gcc_parents_3),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -817,7 +831,7 @@ static struct clk_rcg2 gcc_camss_mclk1_clk_src = {
+ .parent_data = gcc_parents_3,
+ .num_parents = ARRAY_SIZE(gcc_parents_3),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -832,7 +846,7 @@ static struct clk_rcg2 gcc_camss_mclk2_clk_src = {
+ .parent_data = gcc_parents_3,
+ .num_parents = ARRAY_SIZE(gcc_parents_3),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -847,7 +861,7 @@ static struct clk_rcg2 gcc_camss_mclk3_clk_src = {
+ .parent_data = gcc_parents_3,
+ .num_parents = ARRAY_SIZE(gcc_parents_3),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -869,7 +883,7 @@ static struct clk_rcg2 gcc_camss_ope_ahb_clk_src = {
+ .parent_data = gcc_parents_8,
+ .num_parents = ARRAY_SIZE(gcc_parents_8),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -893,7 +907,7 @@ static struct clk_rcg2 gcc_camss_ope_clk_src = {
+ .parent_data = gcc_parents_8,
+ .num_parents = ARRAY_SIZE(gcc_parents_8),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -928,7 +942,7 @@ static struct clk_rcg2 gcc_camss_tfe_0_clk_src = {
+ .parent_data = gcc_parents_5,
+ .num_parents = ARRAY_SIZE(gcc_parents_5),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -953,7 +967,7 @@ static struct clk_rcg2 gcc_camss_tfe_0_csid_clk_src = {
+ .parent_data = gcc_parents_6,
+ .num_parents = ARRAY_SIZE(gcc_parents_6),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -968,7 +982,7 @@ static struct clk_rcg2 gcc_camss_tfe_1_clk_src = {
+ .parent_data = gcc_parents_5,
+ .num_parents = ARRAY_SIZE(gcc_parents_5),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -983,7 +997,7 @@ static struct clk_rcg2 gcc_camss_tfe_1_csid_clk_src = {
+ .parent_data = gcc_parents_6,
+ .num_parents = ARRAY_SIZE(gcc_parents_6),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -998,7 +1012,7 @@ static struct clk_rcg2 gcc_camss_tfe_2_clk_src = {
+ .parent_data = gcc_parents_5,
+ .num_parents = ARRAY_SIZE(gcc_parents_5),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1013,7 +1027,7 @@ static struct clk_rcg2 gcc_camss_tfe_2_csid_clk_src = {
+ .parent_data = gcc_parents_6,
+ .num_parents = ARRAY_SIZE(gcc_parents_6),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1036,7 +1050,7 @@ static struct clk_rcg2 gcc_camss_tfe_cphy_rx_clk_src = {
+ .parent_data = gcc_parents_10,
+ .num_parents = ARRAY_SIZE(gcc_parents_10),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1058,7 +1072,7 @@ static struct clk_rcg2 gcc_camss_top_ahb_clk_src = {
+ .parent_data = gcc_parents_7,
+ .num_parents = ARRAY_SIZE(gcc_parents_7),
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1128,7 +1142,7 @@ static struct clk_rcg2 gcc_pdm2_clk_src = {
+ .name = "gcc_pdm2_clk_src",
+ .parent_data = gcc_parents_0,
+ .num_parents = ARRAY_SIZE(gcc_parents_0),
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1341,7 +1355,7 @@ static struct clk_rcg2 gcc_ufs_phy_axi_clk_src = {
+ .name = "gcc_ufs_phy_axi_clk_src",
+ .parent_data = gcc_parents_0,
+ .num_parents = ARRAY_SIZE(gcc_parents_0),
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1363,7 +1377,7 @@ static struct clk_rcg2 gcc_ufs_phy_ice_core_clk_src = {
+ .name = "gcc_ufs_phy_ice_core_clk_src",
+ .parent_data = gcc_parents_0,
+ .num_parents = ARRAY_SIZE(gcc_parents_0),
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1404,7 +1418,7 @@ static struct clk_rcg2 gcc_ufs_phy_unipro_core_clk_src = {
+ .name = "gcc_ufs_phy_unipro_core_clk_src",
+ .parent_data = gcc_parents_0,
+ .num_parents = ARRAY_SIZE(gcc_parents_0),
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1426,7 +1440,7 @@ static struct clk_rcg2 gcc_usb30_prim_master_clk_src = {
+ .name = "gcc_usb30_prim_master_clk_src",
+ .parent_data = gcc_parents_0,
+ .num_parents = ARRAY_SIZE(gcc_parents_0),
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+@@ -1495,7 +1509,7 @@ static struct clk_rcg2 gcc_video_venus_clk_src = {
+ .parent_data = gcc_parents_13,
+ .num_parents = ARRAY_SIZE(gcc_parents_13),
+ .flags = CLK_SET_RATE_PARENT,
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_shared_ops,
+ },
+ };
+
+diff --git a/drivers/clk/qcom/gcc-sm6125.c b/drivers/clk/qcom/gcc-sm6125.c
+index 543cfab7561f9..431b55bb0d2f7 100644
+--- a/drivers/clk/qcom/gcc-sm6125.c
++++ b/drivers/clk/qcom/gcc-sm6125.c
+@@ -1121,7 +1121,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
+ .name = "gcc_sdcc1_apps_clk_src",
+ .parent_data = gcc_parent_data_1,
+ .num_parents = ARRAY_SIZE(gcc_parent_data_1),
+- .ops = &clk_rcg2_ops,
++ .ops = &clk_rcg2_floor_ops,
+ },
+ };
+
+@@ -1143,7 +1143,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
+ .name = "gcc_sdcc1_ice_core_clk_src",
+ .parent_data = gcc_parent_data_0,
+ .num_parents = ARRAY_SIZE(gcc_parent_data_0),
+- .ops = &clk_rcg2_floor_ops,
++ .ops = &clk_rcg2_ops,
+ },
+ };
+
+diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c
+index 3236706771b11..e32ad7499285f 100644
+--- a/drivers/clk/qcom/gcc-sm6350.c
++++ b/drivers/clk/qcom/gcc-sm6350.c
+@@ -640,6 +640,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
+ .name = "gcc_sdcc2_apps_clk_src",
+ .parent_data = gcc_parent_data_8,
+ .num_parents = ARRAY_SIZE(gcc_parent_data_8),
++ .flags = CLK_OPS_PARENT_ENABLE,
+ .ops = &clk_rcg2_floor_ops,
+ },
+ };
+diff --git a/drivers/clk/qcom/gcc-sm8250.c b/drivers/clk/qcom/gcc-sm8250.c
+index 9755ef4888c19..30bd561461074 100644
+--- a/drivers/clk/qcom/gcc-sm8250.c
++++ b/drivers/clk/qcom/gcc-sm8250.c
+@@ -721,6 +721,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
+ .name = "gcc_sdcc2_apps_clk_src",
+ .parent_data = gcc_parent_data_4,
+ .num_parents = ARRAY_SIZE(gcc_parent_data_4),
++ .flags = CLK_OPS_PARENT_ENABLE,
+ .ops = &clk_rcg2_floor_ops,
+ },
+ };
+@@ -3267,7 +3268,7 @@ static struct gdsc usb30_prim_gdsc = {
+ .pd = {
+ .name = "usb30_prim_gdsc",
+ },
+- .pwrsts = PWRSTS_OFF_ON,
++ .pwrsts = PWRSTS_RET_ON,
+ };
+
+ static struct gdsc usb30_sec_gdsc = {
+@@ -3275,7 +3276,7 @@ static struct gdsc usb30_sec_gdsc = {
+ .pd = {
+ .name = "usb30_sec_gdsc",
+ },
+- .pwrsts = PWRSTS_OFF_ON,
++ .pwrsts = PWRSTS_RET_ON,
+ };
+
+ static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc = {
+diff --git a/drivers/clk/qcom/gcc-sm8350.c b/drivers/clk/qcom/gcc-sm8350.c
+index 6d0a9e2d51041..87d03b1684ed0 100644
+--- a/drivers/clk/qcom/gcc-sm8350.c
++++ b/drivers/clk/qcom/gcc-sm8350.c
+@@ -16,6 +16,7 @@
+ #include "clk-regmap.h"
+ #include "clk-regmap-divider.h"
+ #include "clk-regmap-mux.h"
++#include "clk-regmap-phy-mux.h"
+ #include "gdsc.h"
+ #include "reset.h"
+
+@@ -166,26 +167,6 @@ static const struct clk_parent_data gcc_parent_data_3[] = {
+ { .fw_name = "core_bi_pll_test_se" },
+ };
+
+-static const struct parent_map gcc_parent_map_4[] = {
+- { P_PCIE_0_PIPE_CLK, 0 },
+- { P_BI_TCXO, 2 },
+-};
+-
+-static const struct clk_parent_data gcc_parent_data_4[] = {
+- { .fw_name = "pcie_0_pipe_clk", },
+- { .fw_name = "bi_tcxo" },
+-};
+-
+-static const struct parent_map gcc_parent_map_5[] = {
+- { P_PCIE_1_PIPE_CLK, 0 },
+- { P_BI_TCXO, 2 },
+-};
+-
+-static const struct clk_parent_data gcc_parent_data_5[] = {
+- { .fw_name = "pcie_1_pipe_clk" },
+- { .fw_name = "bi_tcxo" },
+-};
+-
+ static const struct parent_map gcc_parent_map_6[] = {
+ { P_BI_TCXO, 0 },
+ { P_GCC_GPLL0_OUT_MAIN, 1 },
+@@ -288,32 +269,30 @@ static const struct clk_parent_data gcc_parent_data_14[] = {
+ { .fw_name = "bi_tcxo" },
+ };
+
+-static struct clk_regmap_mux gcc_pcie_0_pipe_clk_src = {
++static struct clk_regmap_phy_mux gcc_pcie_0_pipe_clk_src = {
+ .reg = 0x6b054,
+- .shift = 0,
+- .width = 2,
+- .parent_map = gcc_parent_map_4,
+ .clkr = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_pcie_0_pipe_clk_src",
+- .parent_data = gcc_parent_data_4,
+- .num_parents = ARRAY_SIZE(gcc_parent_data_4),
+- .ops = &clk_regmap_mux_closest_ops,
++ .parent_data = &(const struct clk_parent_data){
++ .fw_name = "pcie_0_pipe_clk",
++ },
++ .num_parents = 1,
++ .ops = &clk_regmap_phy_mux_ops,
+ },
+ },
+ };
+
+-static struct clk_regmap_mux gcc_pcie_1_pipe_clk_src = {
++static struct clk_regmap_phy_mux gcc_pcie_1_pipe_clk_src = {
+ .reg = 0x8d054,
+- .shift = 0,
+- .width = 2,
+- .parent_map = gcc_parent_map_5,
+ .clkr = {
+ .hw.init = &(struct clk_init_data){
+ .name = "gcc_pcie_1_pipe_clk_src",
+- .parent_data = gcc_parent_data_5,
+- .num_parents = ARRAY_SIZE(gcc_parent_data_5),
+- .ops = &clk_regmap_mux_closest_ops,
++ .parent_data = &(const struct clk_parent_data){
++ .fw_name = "pcie_1_pipe_clk",
++ },
++ .num_parents = 1,
++ .ops = &clk_regmap_phy_mux_ops,
+ },
+ },
+ };
+diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
+index 4ece326ea233e..cf23cfd7e4674 100644
+--- a/drivers/clk/qcom/gdsc.c
++++ b/drivers/clk/qcom/gdsc.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+- * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved.
++ * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved.
+ */
+
+ #include <linux/bitops.h>
+@@ -34,9 +34,14 @@
+ #define CFG_GDSCR_OFFSET 0x4
+
+ /* Wait 2^n CXO cycles between all states. Here, n=2 (4 cycles). */
+-#define EN_REST_WAIT_VAL (0x2 << 20)
+-#define EN_FEW_WAIT_VAL (0x8 << 16)
+-#define CLK_DIS_WAIT_VAL (0x2 << 12)
++#define EN_REST_WAIT_VAL 0x2
++#define EN_FEW_WAIT_VAL 0x8
++#define CLK_DIS_WAIT_VAL 0x2
++
++/* Transition delay shifts */
++#define EN_REST_WAIT_SHIFT 20
++#define EN_FEW_WAIT_SHIFT 16
++#define CLK_DIS_WAIT_SHIFT 12
+
+ #define RETAIN_MEM BIT(14)
+ #define RETAIN_PERIPH BIT(13)
+@@ -341,7 +346,18 @@ static int gdsc_init(struct gdsc *sc)
+ */
+ mask = HW_CONTROL_MASK | SW_OVERRIDE_MASK |
+ EN_REST_WAIT_MASK | EN_FEW_WAIT_MASK | CLK_DIS_WAIT_MASK;
+- val = EN_REST_WAIT_VAL | EN_FEW_WAIT_VAL | CLK_DIS_WAIT_VAL;
++
++ if (!sc->en_rest_wait_val)
++ sc->en_rest_wait_val = EN_REST_WAIT_VAL;
++ if (!sc->en_few_wait_val)
++ sc->en_few_wait_val = EN_FEW_WAIT_VAL;
++ if (!sc->clk_dis_wait_val)
++ sc->clk_dis_wait_val = CLK_DIS_WAIT_VAL;
++
++ val = sc->en_rest_wait_val << EN_REST_WAIT_SHIFT |
++ sc->en_few_wait_val << EN_FEW_WAIT_SHIFT |
++ sc->clk_dis_wait_val << CLK_DIS_WAIT_SHIFT;
++
+ ret = regmap_update_bits(sc->regmap, sc->gdscr, mask, val);
+ if (ret)
+ return ret;
+diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
+index 5bb396b344d16..762f1b5e1ec51 100644
+--- a/drivers/clk/qcom/gdsc.h
++++ b/drivers/clk/qcom/gdsc.h
+@@ -1,6 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0-only */
+ /*
+- * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved.
++ * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved.
+ */
+
+ #ifndef __QCOM_GDSC_H__
+@@ -22,6 +22,9 @@ struct reset_controller_dev;
+ * @cxcs: offsets of branch registers to toggle mem/periph bits in
+ * @cxc_count: number of @cxcs
+ * @pwrsts: Possible powerdomain power states
++ * @en_rest_wait_val: transition delay value for receiving enr ack signal
++ * @en_few_wait_val: transition delay value for receiving enf ack signal
++ * @clk_dis_wait_val: transition delay value for halting clock
+ * @resets: ids of resets associated with this gdsc
+ * @reset_count: number of @resets
+ * @rcdev: reset controller
+@@ -35,6 +38,9 @@ struct gdsc {
+ unsigned int clamp_io_ctrl;
+ unsigned int *cxcs;
+ unsigned int cxc_count;
++ unsigned int en_rest_wait_val;
++ unsigned int en_few_wait_val;
++ unsigned int clk_dis_wait_val;
+ const u8 pwrsts;
+ /* Powerdomain allowable state bitfields */
+ #define PWRSTS_OFF BIT(0)
+diff --git a/drivers/clk/qcom/gpucc-sc7280.c b/drivers/clk/qcom/gpucc-sc7280.c
+index 9a832f2bcf491..1490cd45a654a 100644
+--- a/drivers/clk/qcom/gpucc-sc7280.c
++++ b/drivers/clk/qcom/gpucc-sc7280.c
+@@ -463,6 +463,7 @@ static int gpu_cc_sc7280_probe(struct platform_device *pdev)
+ */
+ regmap_update_bits(regmap, 0x1170, BIT(0), BIT(0));
+ regmap_update_bits(regmap, 0x1098, BIT(0), BIT(0));
++ regmap_update_bits(regmap, 0x1098, BIT(13), BIT(13));
+
+ return qcom_cc_really_probe(pdev, &gpu_cc_sc7280_desc, regmap);
+ }
+diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c
+index ac09b7b840aba..a5731994cbed1 100644
+--- a/drivers/clk/qcom/lpasscorecc-sc7180.c
++++ b/drivers/clk/qcom/lpasscorecc-sc7180.c
+@@ -356,7 +356,7 @@ static const struct qcom_cc_desc lpass_audio_hm_sc7180_desc = {
+ .num_gdscs = ARRAY_SIZE(lpass_audio_hm_sc7180_gdscs),
+ };
+
+-static int lpass_create_pm_clks(struct platform_device *pdev)
++static int lpass_setup_runtime_pm(struct platform_device *pdev)
+ {
+ int ret;
+
+@@ -375,7 +375,7 @@ static int lpass_create_pm_clks(struct platform_device *pdev)
+ if (ret < 0)
+ dev_err(&pdev->dev, "failed to acquire iface clock\n");
+
+- return ret;
++ return pm_runtime_resume_and_get(&pdev->dev);
+ }
+
+ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev)
+@@ -384,7 +384,7 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev)
+ struct regmap *regmap;
+ int ret;
+
+- ret = lpass_create_pm_clks(pdev);
++ ret = lpass_setup_runtime_pm(pdev);
+ if (ret)
+ return ret;
+
+@@ -392,12 +392,14 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev)
+ desc = &lpass_audio_hm_sc7180_desc;
+ ret = qcom_cc_probe_by_index(pdev, 1, desc);
+ if (ret)
+- return ret;
++ goto exit;
+
+ lpass_core_cc_sc7180_regmap_config.name = "lpass_core_cc";
+ regmap = qcom_cc_map(pdev, &lpass_core_cc_sc7180_desc);
+- if (IS_ERR(regmap))
+- return PTR_ERR(regmap);
++ if (IS_ERR(regmap)) {
++ ret = PTR_ERR(regmap);
++ goto exit;
++ }
+
+ /*
+ * Keep the CLK always-ON
+@@ -415,6 +417,7 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev)
+ ret = qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap);
+
+ pm_runtime_mark_last_busy(&pdev->dev);
++exit:
+ pm_runtime_put_autosuspend(&pdev->dev);
+
+ return ret;
+@@ -425,14 +428,19 @@ static int lpass_hm_core_probe(struct platform_device *pdev)
+ const struct qcom_cc_desc *desc;
+ int ret;
+
+- ret = lpass_create_pm_clks(pdev);
++ ret = lpass_setup_runtime_pm(pdev);
+ if (ret)
+ return ret;
+
+ lpass_core_cc_sc7180_regmap_config.name = "lpass_hm_core";
+ desc = &lpass_core_hm_sc7180_desc;
+
+- return qcom_cc_probe_by_index(pdev, 0, desc);
++ ret = qcom_cc_probe_by_index(pdev, 0, desc);
++
++ pm_runtime_mark_last_busy(&pdev->dev);
++ pm_runtime_put_autosuspend(&pdev->dev);
++
++ return ret;
+ }
+
+ static const struct of_device_id lpass_hm_sc7180_match_table[] = {
+diff --git a/drivers/clk/qcom/mss-sc7180.c b/drivers/clk/qcom/mss-sc7180.c
+index 5a14074406623..d106bc65470e1 100644
+--- a/drivers/clk/qcom/mss-sc7180.c
++++ b/drivers/clk/qcom/mss-sc7180.c
+@@ -87,11 +87,22 @@ static int mss_sc7180_probe(struct platform_device *pdev)
+ return ret;
+ }
+
++ ret = pm_runtime_resume_and_get(&pdev->dev);
++ if (ret)
++ return ret;
++
+ ret = qcom_cc_probe(pdev, &mss_sc7180_desc);
+ if (ret < 0)
+- return ret;
++ goto err_put_rpm;
++
++ pm_runtime_put(&pdev->dev);
+
+ return 0;
++
++err_put_rpm:
++ pm_runtime_put_sync(&pdev->dev);
++
++ return ret;
+ }
+
+ static const struct dev_pm_ops mss_sc7180_pm_ops = {
+diff --git a/drivers/clk/qcom/q6sstop-qcs404.c b/drivers/clk/qcom/q6sstop-qcs404.c
+index 507386bee07dc..eb86fec29927e 100644
+--- a/drivers/clk/qcom/q6sstop-qcs404.c
++++ b/drivers/clk/qcom/q6sstop-qcs404.c
+@@ -173,21 +173,32 @@ static int q6sstopcc_qcs404_probe(struct platform_device *pdev)
+ return ret;
+ }
+
++ ret = pm_runtime_resume_and_get(&pdev->dev);
++ if (ret)
++ return ret;
++
+ q6sstop_regmap_config.name = "q6sstop_tcsr";
+ desc = &tcsr_qcs404_desc;
+
+ ret = qcom_cc_probe_by_index(pdev, 1, desc);
+ if (ret)
+- return ret;
++ goto err_put_rpm;
+
+ q6sstop_regmap_config.name = "q6sstop_cc";
+ desc = &q6sstop_qcs404_desc;
+
+ ret = qcom_cc_probe_by_index(pdev, 0, desc);
+ if (ret)
+- return ret;
++ goto err_put_rpm;
++
++ pm_runtime_put(&pdev->dev);
+
+ return 0;
++
++err_put_rpm:
++ pm_runtime_put_sync(&pdev->dev);
++
++ return ret;
+ }
+
+ static const struct dev_pm_ops q6sstopcc_pm_ops = {
+diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c
+index 819d194be8f7b..e45e32804d2c7 100644
+--- a/drivers/clk/qcom/reset.c
++++ b/drivers/clk/qcom/reset.c
+@@ -13,8 +13,11 @@
+
+ static int qcom_reset(struct reset_controller_dev *rcdev, unsigned long id)
+ {
++ struct qcom_reset_controller *rst = to_qcom_reset_controller(rcdev);
++
+ rcdev->ops->assert(rcdev, id);
+- udelay(1);
++ fsleep(rst->reset_map[id].udelay ?: 1); /* use 1 us as default */
++
+ rcdev->ops->deassert(rcdev, id);
+ return 0;
+ }
+@@ -28,7 +31,7 @@ qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id)
+
+ rst = to_qcom_reset_controller(rcdev);
+ map = &rst->reset_map[id];
+- mask = BIT(map->bit);
++ mask = map->bitmask ? map->bitmask : BIT(map->bit);
+
+ return regmap_update_bits(rst->regmap, map->reg, mask, mask);
+ }
+@@ -42,7 +45,7 @@ qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id)
+
+ rst = to_qcom_reset_controller(rcdev);
+ map = &rst->reset_map[id];
+- mask = BIT(map->bit);
++ mask = map->bitmask ? map->bitmask : BIT(map->bit);
+
+ return regmap_update_bits(rst->regmap, map->reg, mask, 0);
+ }
+diff --git a/drivers/clk/qcom/reset.h b/drivers/clk/qcom/reset.h
+index 2a08b5e282c77..9a47c838d9b1b 100644
+--- a/drivers/clk/qcom/reset.h
++++ b/drivers/clk/qcom/reset.h
+@@ -11,6 +11,8 @@
+ struct qcom_reset_map {
+ unsigned int reg;
+ u8 bit;
++ u8 udelay;
++ u32 bitmask;
+ };
+
+ struct regmap;
+diff --git a/drivers/clk/qcom/turingcc-qcs404.c b/drivers/clk/qcom/turingcc-qcs404.c
+index 4543bda793f4f..c76d36a1fcfda 100644
+--- a/drivers/clk/qcom/turingcc-qcs404.c
++++ b/drivers/clk/qcom/turingcc-qcs404.c
+@@ -124,11 +124,22 @@ static int turingcc_probe(struct platform_device *pdev)
+ return ret;
+ }
+
++ ret = pm_runtime_resume_and_get(&pdev->dev);
++ if (ret)
++ return ret;
++
+ ret = qcom_cc_probe(pdev, &turingcc_desc);
+ if (ret < 0)
+- return ret;
++ goto err_put_rpm;
++
++ pm_runtime_put(&pdev->dev);
+
+ return 0;
++
++err_put_rpm:
++ pm_runtime_put_sync(&pdev->dev);
++
++ return ret;
+ }
+
+ static const struct dev_pm_ops turingcc_pm_ops = {
+diff --git a/drivers/clk/renesas/r9a06g032-clocks.c b/drivers/clk/renesas/r9a06g032-clocks.c
+index c99942f0e4d4c..3e43ae8480ddf 100644
+--- a/drivers/clk/renesas/r9a06g032-clocks.c
++++ b/drivers/clk/renesas/r9a06g032-clocks.c
+@@ -286,8 +286,8 @@ static const struct r9a06g032_clkdesc r9a06g032_clocks[] = {
+ .name = "uart_group_012",
+ .type = K_BITSEL,
+ .source = 1 + R9A06G032_DIV_UART,
+- /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG1_PR2 */
+- .dual.sel = ((0xec / 4) << 5) | 24,
++ /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG0_0 */
++ .dual.sel = ((0x34 / 4) << 5) | 30,
+ .dual.group = 0,
+ },
+ {
+@@ -295,8 +295,8 @@ static const struct r9a06g032_clkdesc r9a06g032_clocks[] = {
+ .name = "uart_group_34567",
+ .type = K_BITSEL,
+ .source = 1 + R9A06G032_DIV_P2_PG,
+- /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG0_0 */
+- .dual.sel = ((0x34 / 4) << 5) | 30,
++ /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG1_PR2 */
++ .dual.sel = ((0xec / 4) << 5) | 24,
+ .dual.group = 1,
+ },
+ D_UGATE(CLK_UART0, "clk_uart0", UART_GROUP_012, 0, 0, 0x1b2, 0x1b3, 0x1b4, 0x1b5),
+@@ -386,7 +386,7 @@ static int r9a06g032_attach_dev(struct generic_pm_domain *pd,
+ int error;
+ int index;
+
+- while (!of_parse_phandle_with_args(np, "clocks", "#clock-cells", i,
++ while (!of_parse_phandle_with_args(np, "clocks", "#clock-cells", i++,
+ &clkspec)) {
+ if (clkspec.np != pd->dev.of_node)
+ continue;
+@@ -399,7 +399,6 @@ static int r9a06g032_attach_dev(struct generic_pm_domain *pd,
+ if (error)
+ return error;
+ }
+- i++;
+ }
+
+ return 0;
+diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c
+index 1490446985e2e..61609eddf7d04 100644
+--- a/drivers/clk/renesas/r9a07g044-cpg.c
++++ b/drivers/clk/renesas/r9a07g044-cpg.c
+@@ -61,8 +61,8 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = {
+ DEF_FIXED(".osc", R9A07G044_OSCCLK, CLK_EXTAL, 1, 1),
+ DEF_FIXED(".osc_div1000", CLK_OSC_DIV1000, CLK_EXTAL, 1, 1000),
+ DEF_SAMPLL(".pll1", CLK_PLL1, CLK_EXTAL, PLL146_CONF(0)),
+- DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 133, 2),
+- DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 133, 2),
++ DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 200, 3),
++ DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 200, 3),
+
+ DEF_FIXED(".pll2_div2", CLK_PLL2_DIV2, CLK_PLL2, 1, 2),
+ DEF_FIXED(".pll2_div16", CLK_PLL2_DIV16, CLK_PLL2, 1, 16),
+diff --git a/drivers/clk/renesas/rzg2l-cpg.c b/drivers/clk/renesas/rzg2l-cpg.c
+index 761922ea5db76..1c92e73cd2b8c 100644
+--- a/drivers/clk/renesas/rzg2l-cpg.c
++++ b/drivers/clk/renesas/rzg2l-cpg.c
+@@ -638,10 +638,16 @@ static void rzg2l_cpg_detach_dev(struct generic_pm_domain *unused, struct device
+ pm_clk_destroy(dev);
+ }
+
++static void rzg2l_cpg_genpd_remove(void *data)
++{
++ pm_genpd_remove(data);
++}
++
+ static int __init rzg2l_cpg_add_clk_domain(struct device *dev)
+ {
+ struct device_node *np = dev->of_node;
+ struct generic_pm_domain *genpd;
++ int ret;
+
+ genpd = devm_kzalloc(dev, sizeof(*genpd), GFP_KERNEL);
+ if (!genpd)
+@@ -652,10 +658,15 @@ static int __init rzg2l_cpg_add_clk_domain(struct device *dev)
+ GENPD_FLAG_ACTIVE_WAKEUP;
+ genpd->attach_dev = rzg2l_cpg_attach_dev;
+ genpd->detach_dev = rzg2l_cpg_detach_dev;
+- pm_genpd_init(genpd, &pm_domain_always_on_gov, false);
++ ret = pm_genpd_init(genpd, &pm_domain_always_on_gov, false);
++ if (ret)
++ return ret;
+
+- of_genpd_add_provider_simple(np, genpd);
+- return 0;
++ ret = devm_add_action_or_reset(dev, rzg2l_cpg_genpd_remove, genpd);
++ if (ret)
++ return ret;
++
++ return of_genpd_add_provider_simple(np, genpd);
+ }
+
+ static int __init rzg2l_cpg_probe(struct platform_device *pdev)
+diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
+index f7827b3b7fc1c..6e5e502be44a6 100644
+--- a/drivers/clk/rockchip/clk-pll.c
++++ b/drivers/clk/rockchip/clk-pll.c
+@@ -981,6 +981,7 @@ struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx,
+ return mux_clk;
+
+ err_pll:
++ kfree(pll->rate_table);
+ clk_unregister(mux_clk);
+ mux_clk = pll_clk;
+ err_mux:
+diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
+index 62a4f25439607..6bcf631b4e4c2 100644
+--- a/drivers/clk/rockchip/clk-rk3399.c
++++ b/drivers/clk/rockchip/clk-rk3399.c
+@@ -1263,7 +1263,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
+ RK3399_CLKSEL_CON(56), 6, 2, MFLAGS,
+ RK3399_CLKGATE_CON(10), 7, GFLAGS),
+
+- COMPOSITE_NOGATE(SCLK_CIF_OUT, "clk_cifout", mux_clk_cif_p, 0,
++ COMPOSITE_NOGATE(SCLK_CIF_OUT, "clk_cifout", mux_clk_cif_p, CLK_SET_RATE_PARENT,
+ RK3399_CLKSEL_CON(56), 5, 1, MFLAGS, 0, 5, DFLAGS),
+
+ /* gic */
+diff --git a/drivers/clk/rockchip/clk-rk3568.c b/drivers/clk/rockchip/clk-rk3568.c
+index 75ca855e720df..6e5440841d1ee 100644
+--- a/drivers/clk/rockchip/clk-rk3568.c
++++ b/drivers/clk/rockchip/clk-rk3568.c
+@@ -1038,13 +1038,13 @@ static struct rockchip_clk_branch rk3568_clk_branches[] __initdata = {
+ RK3568_CLKGATE_CON(20), 8, GFLAGS),
+ GATE(HCLK_VOP, "hclk_vop", "hclk_vo", 0,
+ RK3568_CLKGATE_CON(20), 9, GFLAGS),
+- COMPOSITE(DCLK_VOP0, "dclk_vop0", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
++ COMPOSITE(DCLK_VOP0, "dclk_vop0", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT,
+ RK3568_CLKSEL_CON(39), 10, 2, MFLAGS, 0, 8, DFLAGS,
+ RK3568_CLKGATE_CON(20), 10, GFLAGS),
+- COMPOSITE(DCLK_VOP1, "dclk_vop1", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
++ COMPOSITE(DCLK_VOP1, "dclk_vop1", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT,
+ RK3568_CLKSEL_CON(40), 10, 2, MFLAGS, 0, 8, DFLAGS,
+ RK3568_CLKGATE_CON(20), 11, GFLAGS),
+- COMPOSITE(DCLK_VOP2, "dclk_vop2", hpll_vpll_gpll_cpll_p, 0,
++ COMPOSITE(DCLK_VOP2, "dclk_vop2", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT,
+ RK3568_CLKSEL_CON(41), 10, 2, MFLAGS, 0, 8, DFLAGS,
+ RK3568_CLKGATE_CON(20), 12, GFLAGS),
+ GATE(CLK_VOP_PWM, "clk_vop_pwm", "xin24m", 0,
+diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
+index b7be7e11b0dfe..bb8a844309bf5 100644
+--- a/drivers/clk/rockchip/clk.c
++++ b/drivers/clk/rockchip/clk.c
+@@ -180,6 +180,7 @@ static void rockchip_fractional_approximation(struct clk_hw *hw,
+ unsigned long rate, unsigned long *parent_rate,
+ unsigned long *m, unsigned long *n)
+ {
++ struct clk_fractional_divider *fd = to_clk_fd(hw);
+ unsigned long p_rate, p_parent_rate;
+ struct clk_hw *p_parent;
+
+@@ -190,6 +191,8 @@ static void rockchip_fractional_approximation(struct clk_hw *hw,
+ *parent_rate = p_parent_rate;
+ }
+
++ fd->flags |= CLK_FRAC_DIVIDER_POWER_OF_TWO_PS;
++
+ clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n);
+ }
+
+diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c
+index 5873a9354b507..4909e940f0ab6 100644
+--- a/drivers/clk/samsung/clk-pll.c
++++ b/drivers/clk/samsung/clk-pll.c
+@@ -1385,6 +1385,7 @@ static void __init _samsung_clk_register_pll(struct samsung_clk_provider *ctx,
+ if (ret) {
+ pr_err("%s: failed to register pll clock %s : %d\n",
+ __func__, pll_clk->name, ret);
++ kfree(pll->rate_table);
+ kfree(pll);
+ return;
+ }
+diff --git a/drivers/clk/socfpga/clk-gate.c b/drivers/clk/socfpga/clk-gate.c
+index 1ec9678d8cd32..ee2a2d284113c 100644
+--- a/drivers/clk/socfpga/clk-gate.c
++++ b/drivers/clk/socfpga/clk-gate.c
+@@ -188,8 +188,10 @@ void __init socfpga_gate_init(struct device_node *node)
+ return;
+
+ ops = kmemdup(&gateclk_ops, sizeof(gateclk_ops), GFP_KERNEL);
+- if (WARN_ON(!ops))
++ if (WARN_ON(!ops)) {
++ kfree(socfpga_clk);
+ return;
++ }
+
+ rc = of_property_read_u32_array(node, "clk-gate", clk_gate, 2);
+ if (rc)
+@@ -243,6 +245,7 @@ void __init socfpga_gate_init(struct device_node *node)
+
+ err = clk_hw_register(NULL, hw_clk);
+ if (err) {
++ kfree(ops);
+ kfree(socfpga_clk);
+ return;
+ }
+diff --git a/drivers/clk/sprd/common.c b/drivers/clk/sprd/common.c
+index d620bbbcdfc88..2bfbab8db94bf 100644
+--- a/drivers/clk/sprd/common.c
++++ b/drivers/clk/sprd/common.c
+@@ -17,7 +17,6 @@ static const struct regmap_config sprdclk_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+- .max_register = 0xffff,
+ .fast_io = true,
+ };
+
+@@ -41,8 +40,10 @@ int sprd_clk_regmap_init(struct platform_device *pdev,
+ {
+ void __iomem *base;
+ struct device *dev = &pdev->dev;
+- struct device_node *node = dev->of_node;
++ struct device_node *node = dev->of_node, *np;
+ struct regmap *regmap;
++ struct resource *res;
++ struct regmap_config reg_config = sprdclk_regmap_config;
+
+ if (of_find_property(node, "sprd,syscon", NULL)) {
+ regmap = syscon_regmap_lookup_by_phandle(node, "sprd,syscon");
+@@ -50,20 +51,23 @@ int sprd_clk_regmap_init(struct platform_device *pdev,
+ pr_err("%s: failed to get syscon regmap\n", __func__);
+ return PTR_ERR(regmap);
+ }
+- } else if (of_device_is_compatible(of_get_parent(dev->of_node),
+- "syscon")) {
+- regmap = device_node_to_regmap(of_get_parent(dev->of_node));
++ } else if (of_device_is_compatible(np = of_get_parent(node), "syscon") ||
++ (of_node_put(np), 0)) {
++ regmap = device_node_to_regmap(np);
++ of_node_put(np);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "failed to get regmap from its parent.\n");
+ return PTR_ERR(regmap);
+ }
+ } else {
+- base = devm_platform_ioremap_resource(pdev, 0);
++ base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
++ reg_config.max_register = resource_size(res) - reg_config.reg_stride;
++
+ regmap = devm_regmap_init_mmio(&pdev->dev, base,
+- &sprdclk_regmap_config);
++ &reg_config);
+ if (IS_ERR(regmap)) {
+ pr_err("failed to init regmap\n");
+ return PTR_ERR(regmap);
+diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c
+index 164285d6be978..ba18e58f0aae3 100644
+--- a/drivers/clk/st/clkgen-fsyn.c
++++ b/drivers/clk/st/clkgen-fsyn.c
+@@ -1008,9 +1008,10 @@ static void __init st_of_quadfs_setup(struct device_node *np,
+
+ clk = st_clk_register_quadfs_pll(pll_name, clk_parent_name, datac->data,
+ reg, lock);
+- if (IS_ERR(clk))
++ if (IS_ERR(clk)) {
++ kfree(lock);
+ goto err_exit;
+- else
++ } else
+ pr_debug("%s: parent %s rate %u\n",
+ __clk_get_name(clk),
+ __clk_get_name(clk_get_parent(clk)),
+diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
+index f32366d9336e7..bd9a8782fec3d 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
++++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
+@@ -1464,7 +1464,7 @@ static void __init sun4i_ccu_init(struct device_node *node,
+ val &= ~GENMASK(7, 6);
+ writel(val | (2 << 6), reg + SUN4I_AHB_REG);
+
+- sunxi_ccu_probe(node, reg, desc);
++ of_sunxi_ccu_probe(node, reg, desc);
+ }
+
+ static void __init sun4i_a10_ccu_setup(struct device_node *node)
+diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a100-r.c b/drivers/clk/sunxi-ng/ccu-sun50i-a100-r.c
+index a56142b909938..6f2a589705561 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun50i-a100-r.c
++++ b/drivers/clk/sunxi-ng/ccu-sun50i-a100-r.c
+@@ -196,7 +196,7 @@ static int sun50i_a100_r_ccu_probe(struct platform_device *pdev)
+ if (IS_ERR(reg))
+ return PTR_ERR(reg);
+
+- return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a100_r_ccu_desc);
++ return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_a100_r_ccu_desc);
+ }
+
+ static const struct of_device_id sun50i_a100_r_ccu_ids[] = {
+diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a100.c b/drivers/clk/sunxi-ng/ccu-sun50i-a100.c
+index 81b48c73d389f..913bb08e6dee8 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun50i-a100.c
++++ b/drivers/clk/sunxi-ng/ccu-sun50i-a100.c
+@@ -1247,7 +1247,7 @@ static int sun50i_a100_ccu_probe(struct platform_device *pdev)
+ writel(val, reg + sun50i_a100_usb2_clk_regs[i]);
+ }
+
+- ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a100_ccu_desc);
++ ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_a100_ccu_desc);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
+index 149cfde817cba..54f25c624f020 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
++++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
+@@ -955,7 +955,7 @@ static int sun50i_a64_ccu_probe(struct platform_device *pdev)
+
+ writel(0x515, reg + SUN50I_A64_PLL_MIPI_REG);
+
+- ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc);
++ ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_a64_ccu_desc);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
+index f8909a7ed5539..f30d7eb5424d8 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
++++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
+@@ -232,7 +232,7 @@ static void __init sunxi_r_ccu_init(struct device_node *node,
+ return;
+ }
+
+- sunxi_ccu_probe(node, reg, desc);
++ of_sunxi_ccu_probe(node, reg, desc);
+ }
+
+ static void __init sun50i_h6_r_ccu_setup(struct device_node *node)
+diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
+index bff446b782907..c0800da2fa3d7 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
++++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
+@@ -1240,7 +1240,7 @@ static int sun50i_h6_ccu_probe(struct platform_device *pdev)
+ val |= BIT(24);
+ writel(val, reg + SUN50I_H6_HDMI_CEC_CLK_REG);
+
+- return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_h6_ccu_desc);
++ return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h6_ccu_desc);
+ }
+
+ static const struct of_device_id sun50i_h6_ccu_ids[] = {
+diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c
+index 225307305880e..22eb18079a154 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c
++++ b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c
+@@ -1141,9 +1141,7 @@ static void __init sun50i_h616_ccu_setup(struct device_node *node)
+ val |= BIT(24);
+ writel(val, reg + SUN50I_H616_HDMI_CEC_CLK_REG);
+
+- i = sunxi_ccu_probe(node, reg, &sun50i_h616_ccu_desc);
+- if (i)
+- pr_err("%pOF: probing clocks fails: %d\n", node, i);
++ of_sunxi_ccu_probe(node, reg, &sun50i_h616_ccu_desc);
+ }
+
+ CLK_OF_DECLARE(sun50i_h616_ccu, "allwinner,sun50i-h616-ccu",
+diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c
+index b78e9b507c1c6..1f4bc0e773a7e 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun5i.c
++++ b/drivers/clk/sunxi-ng/ccu-sun5i.c
+@@ -1012,7 +1012,7 @@ static void __init sun5i_ccu_init(struct device_node *node,
+ val &= ~GENMASK(7, 6);
+ writel(val | (2 << 6), reg + SUN5I_AHB_REG);
+
+- sunxi_ccu_probe(node, reg, desc);
++ of_sunxi_ccu_probe(node, reg, desc);
+ }
+
+ static void __init sun5i_a10s_ccu_setup(struct device_node *node)
+diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+index 9b40d53266a3f..3df5c0b415804 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
++++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+@@ -1257,7 +1257,7 @@ static void __init sun6i_a31_ccu_setup(struct device_node *node)
+ val |= 0x3 << 12;
+ writel(val, reg + SUN6I_A31_AHB1_REG);
+
+- sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc);
++ of_sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc);
+
+ ccu_mux_notifier_register(pll_cpu_clk.common.hw.clk,
+ &sun6i_a31_cpu_nb);
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c
+index 103aa504f6c8a..577bb235d6584 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c
+@@ -745,7 +745,7 @@ static void __init sun8i_a23_ccu_setup(struct device_node *node)
+ val &= ~BIT(16);
+ writel(val, reg + SUN8I_A23_PLL_MIPI_REG);
+
+- sunxi_ccu_probe(node, reg, &sun8i_a23_ccu_desc);
++ of_sunxi_ccu_probe(node, reg, &sun8i_a23_ccu_desc);
+ }
+ CLK_OF_DECLARE(sun8i_a23_ccu, "allwinner,sun8i-a23-ccu",
+ sun8i_a23_ccu_setup);
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
+index 91838cd110377..8f65cd03f5acc 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
+@@ -805,7 +805,7 @@ static void __init sun8i_a33_ccu_setup(struct device_node *node)
+ val &= ~BIT(16);
+ writel(val, reg + SUN8I_A33_PLL_MIPI_REG);
+
+- sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc);
++ of_sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc);
+
+ /* Gate then ungate PLL CPU after any rate changes */
+ ccu_pll_notifier_register(&sun8i_a33_pll_cpu_nb);
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a83t.c b/drivers/clk/sunxi-ng/ccu-sun8i-a83t.c
+index 2b434521c5ccf..c2ddcd2ddab4e 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-a83t.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-a83t.c
+@@ -906,7 +906,7 @@ static int sun8i_a83t_ccu_probe(struct platform_device *pdev)
+ sun8i_a83t_cpu_pll_fixup(reg + SUN8I_A83T_PLL_C0CPUX_REG);
+ sun8i_a83t_cpu_pll_fixup(reg + SUN8I_A83T_PLL_C1CPUX_REG);
+
+- return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun8i_a83t_ccu_desc);
++ return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun8i_a83t_ccu_desc);
+ }
+
+ static const struct of_device_id sun8i_a83t_ccu_ids[] = {
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-de2.c b/drivers/clk/sunxi-ng/ccu-sun8i-de2.c
+index 524f33275bc73..4b94b6041b271 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-de2.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-de2.c
+@@ -342,7 +342,7 @@ static int sunxi_de2_clk_probe(struct platform_device *pdev)
+ goto err_disable_mod_clk;
+ }
+
+- ret = sunxi_ccu_probe(pdev->dev.of_node, reg, ccu_desc);
++ ret = devm_sunxi_ccu_probe(&pdev->dev, reg, ccu_desc);
+ if (ret)
+ goto err_assert_reset;
+
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
+index 7e629a4493afd..d2fc2903787d8 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
+@@ -1154,7 +1154,7 @@ static void __init sunxi_h3_h5_ccu_init(struct device_node *node,
+ val &= ~GENMASK(19, 16);
+ writel(val | (0 << 16), reg + SUN8I_H3_PLL_AUDIO_REG);
+
+- sunxi_ccu_probe(node, reg, desc);
++ of_sunxi_ccu_probe(node, reg, desc);
+
+ /* Gate then ungate PLL CPU after any rate changes */
+ ccu_pll_notifier_register(&sun8i_h3_pll_cpu_nb);
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r.c b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
+index 4c8c491b87c27..9e754d1f754a1 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-r.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-r.c
+@@ -265,7 +265,7 @@ static void __init sunxi_r_ccu_init(struct device_node *node,
+ return;
+ }
+
+- sunxi_ccu_probe(node, reg, desc);
++ of_sunxi_ccu_probe(node, reg, desc);
+ }
+
+ static void __init sun8i_a83t_r_ccu_setup(struct device_node *node)
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r40.c b/drivers/clk/sunxi-ng/ccu-sun8i-r40.c
+index 84153418453f4..002e0c3a04dbe 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-r40.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-r40.c
+@@ -1346,7 +1346,7 @@ static int sun8i_r40_ccu_probe(struct platform_device *pdev)
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+- ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun8i_r40_ccu_desc);
++ ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun8i_r40_ccu_desc);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+index f49724a22540e..ce150f83ab54e 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
++++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+@@ -822,7 +822,7 @@ static void __init sun8i_v3_v3s_ccu_init(struct device_node *node,
+ val &= ~GENMASK(19, 16);
+ writel(val, reg + SUN8I_V3S_PLL_AUDIO_REG);
+
+- sunxi_ccu_probe(node, reg, ccu_desc);
++ of_sunxi_ccu_probe(node, reg, ccu_desc);
+ }
+
+ static void __init sun8i_v3s_ccu_setup(struct device_node *node)
+diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c
+index 6616e8114f623..261e64416f26a 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c
++++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c
+@@ -246,8 +246,7 @@ static int sun9i_a80_de_clk_probe(struct platform_device *pdev)
+ goto err_disable_clk;
+ }
+
+- ret = sunxi_ccu_probe(pdev->dev.of_node, reg,
+- &sun9i_a80_de_clk_desc);
++ ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun9i_a80_de_clk_desc);
+ if (ret)
+ goto err_assert_reset;
+
+diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c
+index 4b4a507d04edf..596243b3e0fa3 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c
++++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c
+@@ -117,8 +117,7 @@ static int sun9i_a80_usb_clk_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- ret = sunxi_ccu_probe(pdev->dev.of_node, reg,
+- &sun9i_a80_usb_clk_desc);
++ ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun9i_a80_usb_clk_desc);
+ if (ret)
+ goto err_disable_clk;
+
+diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
+index ef29582676f6e..97aaed0e68500 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
++++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
+@@ -1231,7 +1231,7 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev)
+ sun9i_a80_cpu_pll_fixup(reg + SUN9I_A80_PLL_C0CPUX_REG);
+ sun9i_a80_cpu_pll_fixup(reg + SUN9I_A80_PLL_C1CPUX_REG);
+
+- return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun9i_a80_ccu_desc);
++ return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun9i_a80_ccu_desc);
+ }
+
+ static const struct of_device_id sun9i_a80_ccu_ids[] = {
+diff --git a/drivers/clk/sunxi-ng/ccu-suniv-f1c100s.c b/drivers/clk/sunxi-ng/ccu-suniv-f1c100s.c
+index 7ecc3a5a5b5e1..61ad7ee91c114 100644
+--- a/drivers/clk/sunxi-ng/ccu-suniv-f1c100s.c
++++ b/drivers/clk/sunxi-ng/ccu-suniv-f1c100s.c
+@@ -538,7 +538,7 @@ static void __init suniv_f1c100s_ccu_setup(struct device_node *node)
+ val &= ~GENMASK(19, 16);
+ writel(val | (3 << 16), reg + SUNIV_PLL_AUDIO_REG);
+
+- sunxi_ccu_probe(node, reg, &suniv_ccu_desc);
++ of_sunxi_ccu_probe(node, reg, &suniv_ccu_desc);
+
+ /* Gate then ungate PLL CPU after any rate changes */
+ ccu_pll_notifier_register(&suniv_pll_cpu_nb);
+diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
+index 2e20e650b6c01..88cb569e58358 100644
+--- a/drivers/clk/sunxi-ng/ccu_common.c
++++ b/drivers/clk/sunxi-ng/ccu_common.c
+@@ -7,6 +7,7 @@
+
+ #include <linux/clk.h>
+ #include <linux/clk-provider.h>
++#include <linux/device.h>
+ #include <linux/iopoll.h>
+ #include <linux/slab.h>
+
+@@ -14,6 +15,11 @@
+ #include "ccu_gate.h"
+ #include "ccu_reset.h"
+
++struct sunxi_ccu {
++ const struct sunxi_ccu_desc *desc;
++ struct ccu_reset reset;
++};
++
+ static DEFINE_SPINLOCK(ccu_lock);
+
+ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock)
+@@ -79,12 +85,15 @@ int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb)
+ &pll_nb->clk_nb);
+ }
+
+-int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
+- const struct sunxi_ccu_desc *desc)
++static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev,
++ struct device_node *node, void __iomem *reg,
++ const struct sunxi_ccu_desc *desc)
+ {
+ struct ccu_reset *reset;
+ int i, ret;
+
++ ccu->desc = desc;
++
+ for (i = 0; i < desc->num_ccu_clks; i++) {
+ struct ccu_common *cclk = desc->ccu_clks[i];
+
+@@ -103,7 +112,10 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
+ continue;
+
+ name = hw->init->name;
+- ret = of_clk_hw_register(node, hw);
++ if (dev)
++ ret = clk_hw_register(dev, hw);
++ else
++ ret = of_clk_hw_register(node, hw);
+ if (ret) {
+ pr_err("Couldn't register clock %d - %s\n", i, name);
+ goto err_clk_unreg;
+@@ -115,15 +127,10 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
+ if (ret)
+ goto err_clk_unreg;
+
+- reset = kzalloc(sizeof(*reset), GFP_KERNEL);
+- if (!reset) {
+- ret = -ENOMEM;
+- goto err_alloc_reset;
+- }
+-
++ reset = &ccu->reset;
+ reset->rcdev.of_node = node;
+ reset->rcdev.ops = &ccu_reset_ops;
+- reset->rcdev.owner = THIS_MODULE;
++ reset->rcdev.owner = dev ? dev->driver->owner : THIS_MODULE;
+ reset->rcdev.nr_resets = desc->num_resets;
+ reset->base = reg;
+ reset->lock = &ccu_lock;
+@@ -131,13 +138,11 @@ int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
+
+ ret = reset_controller_register(&reset->rcdev);
+ if (ret)
+- goto err_of_clk_unreg;
++ goto err_del_provider;
+
+ return 0;
+
+-err_of_clk_unreg:
+- kfree(reset);
+-err_alloc_reset:
++err_del_provider:
+ of_clk_del_provider(node);
+ err_clk_unreg:
+ while (--i >= 0) {
+@@ -149,3 +154,59 @@ err_clk_unreg:
+ }
+ return ret;
+ }
++
++static void devm_sunxi_ccu_release(struct device *dev, void *res)
++{
++ struct sunxi_ccu *ccu = res;
++ const struct sunxi_ccu_desc *desc = ccu->desc;
++ int i;
++
++ reset_controller_unregister(&ccu->reset.rcdev);
++ of_clk_del_provider(dev->of_node);
++
++ for (i = 0; i < desc->hw_clks->num; i++) {
++ struct clk_hw *hw = desc->hw_clks->hws[i];
++
++ if (!hw)
++ continue;
++ clk_hw_unregister(hw);
++ }
++}
++
++int devm_sunxi_ccu_probe(struct device *dev, void __iomem *reg,
++ const struct sunxi_ccu_desc *desc)
++{
++ struct sunxi_ccu *ccu;
++ int ret;
++
++ ccu = devres_alloc(devm_sunxi_ccu_release, sizeof(*ccu), GFP_KERNEL);
++ if (!ccu)
++ return -ENOMEM;
++
++ ret = sunxi_ccu_probe(ccu, dev, dev->of_node, reg, desc);
++ if (ret) {
++ devres_free(ccu);
++ return ret;
++ }
++
++ devres_add(dev, ccu);
++
++ return 0;
++}
++
++void of_sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
++ const struct sunxi_ccu_desc *desc)
++{
++ struct sunxi_ccu *ccu;
++ int ret;
++
++ ccu = kzalloc(sizeof(*ccu), GFP_KERNEL);
++ if (!ccu)
++ return;
++
++ ret = sunxi_ccu_probe(ccu, NULL, node, reg, desc);
++ if (ret) {
++ pr_err("%pOF: probing clocks failed: %d\n", node, ret);
++ kfree(ccu);
++ }
++}
+diff --git a/drivers/clk/sunxi-ng/ccu_common.h b/drivers/clk/sunxi-ng/ccu_common.h
+index 04e7a12200a21..98a1834b58bb4 100644
+--- a/drivers/clk/sunxi-ng/ccu_common.h
++++ b/drivers/clk/sunxi-ng/ccu_common.h
+@@ -63,7 +63,9 @@ struct ccu_pll_nb {
+
+ int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb);
+
+-int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
+- const struct sunxi_ccu_desc *desc);
++int devm_sunxi_ccu_probe(struct device *dev, void __iomem *reg,
++ const struct sunxi_ccu_desc *desc);
++void of_sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
++ const struct sunxi_ccu_desc *desc);
+
+ #endif /* _COMMON_H_ */
+diff --git a/drivers/clk/sunxi-ng/ccu_mmc_timing.c b/drivers/clk/sunxi-ng/ccu_mmc_timing.c
+index de33414fc5c28..c6a6ce98ca03a 100644
+--- a/drivers/clk/sunxi-ng/ccu_mmc_timing.c
++++ b/drivers/clk/sunxi-ng/ccu_mmc_timing.c
+@@ -43,7 +43,7 @@ int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, bool new_mode)
+ EXPORT_SYMBOL_GPL(sunxi_ccu_set_mmc_timing_mode);
+
+ /**
+- * sunxi_ccu_set_mmc_timing_mode: Get the current MMC clock timing mode
++ * sunxi_ccu_get_mmc_timing_mode: Get the current MMC clock timing mode
+ * @clk: clock to query
+ *
+ * Returns 0 if the clock is in old timing mode, > 0 if it is in
+diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
+index 542b31d6e96dd..636bcf2439ef2 100644
+--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
++++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
+@@ -109,6 +109,8 @@ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
+ spin_lock_init(&data->lock);
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!r)
++ return -EINVAL;
+ /* one clock/reset pair per word */
+ count = DIV_ROUND_UP((resource_size(r)), SUN9I_MMC_WIDTH);
+ data->membase = devm_ioremap_resource(&pdev->dev, r);
+diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c
+index 6144447f86c63..62238dca9a534 100644
+--- a/drivers/clk/tegra/clk-dfll.c
++++ b/drivers/clk/tegra/clk-dfll.c
+@@ -271,6 +271,7 @@ struct tegra_dfll {
+ struct clk *ref_clk;
+ struct clk *i2c_clk;
+ struct clk *dfll_clk;
++ struct reset_control *dfll_rst;
+ struct reset_control *dvco_rst;
+ unsigned long ref_rate;
+ unsigned long i2c_clk_rate;
+@@ -1464,6 +1465,7 @@ static int dfll_init(struct tegra_dfll *td)
+ return -EINVAL;
+ }
+
++ reset_control_deassert(td->dfll_rst);
+ reset_control_deassert(td->dvco_rst);
+
+ ret = clk_prepare(td->ref_clk);
+@@ -1509,6 +1511,7 @@ di_err1:
+ clk_unprepare(td->ref_clk);
+
+ reset_control_assert(td->dvco_rst);
++ reset_control_assert(td->dfll_rst);
+
+ return ret;
+ }
+@@ -1530,6 +1533,7 @@ int tegra_dfll_suspend(struct device *dev)
+ }
+
+ reset_control_assert(td->dvco_rst);
++ reset_control_assert(td->dfll_rst);
+
+ return 0;
+ }
+@@ -1548,6 +1552,7 @@ int tegra_dfll_resume(struct device *dev)
+ {
+ struct tegra_dfll *td = dev_get_drvdata(dev);
+
++ reset_control_deassert(td->dfll_rst);
+ reset_control_deassert(td->dvco_rst);
+
+ pm_runtime_get_sync(td->dev);
+@@ -1951,6 +1956,12 @@ int tegra_dfll_register(struct platform_device *pdev,
+
+ td->soc = soc;
+
++ td->dfll_rst = devm_reset_control_get_optional(td->dev, "dfll");
++ if (IS_ERR(td->dfll_rst)) {
++ dev_err(td->dev, "couldn't get dfll reset\n");
++ return PTR_ERR(td->dfll_rst);
++ }
++
+ td->dvco_rst = devm_reset_control_get(td->dev, "dvco");
+ if (IS_ERR(td->dvco_rst)) {
+ dev_err(td->dev, "couldn't get dvco reset\n");
+@@ -2087,6 +2098,7 @@ struct tegra_dfll_soc_data *tegra_dfll_unregister(struct platform_device *pdev)
+ clk_unprepare(td->i2c_clk);
+
+ reset_control_assert(td->dvco_rst);
++ reset_control_assert(td->dfll_rst);
+
+ return td->soc;
+ }
+diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
+index bc9e47a4cb60a..4e2b26e3e5738 100644
+--- a/drivers/clk/tegra/clk-tegra114.c
++++ b/drivers/clk/tegra/clk-tegra114.c
+@@ -1317,6 +1317,7 @@ static void __init tegra114_clock_init(struct device_node *np)
+ }
+
+ pmc_base = of_iomap(node, 0);
++ of_node_put(node);
+ if (!pmc_base) {
+ pr_err("Can't map pmc registers\n");
+ WARN_ON(1);
+diff --git a/drivers/clk/tegra/clk-tegra124-emc.c b/drivers/clk/tegra/clk-tegra124-emc.c
+index 74c1d894cca86..2a6db04342815 100644
+--- a/drivers/clk/tegra/clk-tegra124-emc.c
++++ b/drivers/clk/tegra/clk-tegra124-emc.c
+@@ -198,6 +198,7 @@ static struct tegra_emc *emc_ensure_emc_driver(struct tegra_clk_emc *tegra)
+
+ tegra->emc = platform_get_drvdata(pdev);
+ if (!tegra->emc) {
++ put_device(&pdev->dev);
+ pr_err("%s: cannot find EMC driver\n", __func__);
+ return NULL;
+ }
+@@ -463,6 +464,7 @@ static int load_timings_from_dt(struct tegra_clk_emc *tegra,
+ err = load_one_timing_from_dt(tegra, timing, child);
+ if (err) {
+ of_node_put(child);
++ kfree(tegra->timings);
+ return err;
+ }
+
+@@ -514,6 +516,7 @@ struct clk *tegra124_clk_register_emc(void __iomem *base, struct device_node *np
+ err = load_timings_from_dt(tegra, node, node_ram_code);
+ if (err) {
+ of_node_put(node);
++ kfree(tegra);
+ return ERR_PTR(err);
+ }
+ }
+diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
+index 3664593a5ba4e..cc57ababc882d 100644
+--- a/drivers/clk/tegra/clk-tegra20.c
++++ b/drivers/clk/tegra/clk-tegra20.c
+@@ -18,24 +18,24 @@
+ #define MISC_CLK_ENB 0x48
+
+ #define OSC_CTRL 0x50
+-#define OSC_CTRL_OSC_FREQ_MASK (3<<30)
+-#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30)
+-#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30)
+-#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30)
+-#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30)
+-#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK)
+-
+-#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28)
+-#define OSC_CTRL_PLL_REF_DIV_1 (0<<28)
+-#define OSC_CTRL_PLL_REF_DIV_2 (1<<28)
+-#define OSC_CTRL_PLL_REF_DIV_4 (2<<28)
++#define OSC_CTRL_OSC_FREQ_MASK (3u<<30)
++#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30)
++#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30)
++#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30)
++#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30)
++#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK)
++
++#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28)
++#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28)
++#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28)
++#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28)
+
+ #define OSC_FREQ_DET 0x58
+-#define OSC_FREQ_DET_TRIG (1<<31)
++#define OSC_FREQ_DET_TRIG (1u<<31)
+
+ #define OSC_FREQ_DET_STATUS 0x5c
+-#define OSC_FREQ_DET_BUSY (1<<31)
+-#define OSC_FREQ_DET_CNT_MASK 0xFFFF
++#define OSC_FREQ_DET_BUSYu (1<<31)
++#define OSC_FREQ_DET_CNT_MASK 0xFFFFu
+
+ #define TEGRA20_CLK_PERIPH_BANKS 3
+
+@@ -1128,6 +1128,7 @@ static void __init tegra20_clock_init(struct device_node *np)
+ }
+
+ pmc_base = of_iomap(node, 0);
++ of_node_put(node);
+ if (!pmc_base) {
+ pr_err("Can't map pmc registers\n");
+ BUG();
+diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
+index b9099012dc7b1..499f999e91e13 100644
+--- a/drivers/clk/tegra/clk-tegra210.c
++++ b/drivers/clk/tegra/clk-tegra210.c
+@@ -3748,6 +3748,7 @@ static void __init tegra210_clock_init(struct device_node *np)
+ }
+
+ pmc_base = of_iomap(node, 0);
++ of_node_put(node);
+ if (!pmc_base) {
+ pr_err("Can't map pmc registers\n");
+ WARN_ON(1);
+diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c
+index 8d4c08b034bdd..e2e59d78c173f 100644
+--- a/drivers/clk/ti/clk-dra7-atl.c
++++ b/drivers/clk/ti/clk-dra7-atl.c
+@@ -251,14 +251,16 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev)
+ if (rc) {
+ pr_err("%s: failed to lookup atl clock %d\n", __func__,
+ i);
+- return -EINVAL;
++ ret = -EINVAL;
++ goto pm_put;
+ }
+
+ clk = of_clk_get_from_provider(&clkspec);
+ if (IS_ERR(clk)) {
+ pr_err("%s: failed to get atl clock %d from provider\n",
+ __func__, i);
+- return PTR_ERR(clk);
++ ret = PTR_ERR(clk);
++ goto pm_put;
+ }
+
+ cdesc = to_atl_desc(__clk_get_hw(clk));
+@@ -291,8 +293,9 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev)
+ if (cdesc->enabled)
+ atl_clk_enable(__clk_get_hw(clk));
+ }
+- pm_runtime_put_sync(cinfo->dev);
+
++pm_put:
++ pm_runtime_put_sync(cinfo->dev);
+ return ret;
+ }
+
+diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
+index 3da33c786d77c..29eafab4353ef 100644
+--- a/drivers/clk/ti/clk.c
++++ b/drivers/clk/ti/clk.c
+@@ -131,7 +131,7 @@ int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops)
+ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
+ {
+ struct ti_dt_clk *c;
+- struct device_node *node, *parent;
++ struct device_node *node, *parent, *child;
+ struct clk *clk;
+ struct of_phandle_args clkspec;
+ char buf[64];
+@@ -171,10 +171,13 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
+ node = of_find_node_by_name(NULL, buf);
+ if (num_args && compat_mode) {
+ parent = node;
+- node = of_get_child_by_name(parent, "clock");
+- if (!node)
+- node = of_get_child_by_name(parent, "clk");
+- of_node_put(parent);
++ child = of_get_child_by_name(parent, "clock");
++ if (!child)
++ child = of_get_child_by_name(parent, "clk");
++ if (child) {
++ of_node_put(parent);
++ node = child;
++ }
+ }
+
+ clkspec.np = node;
+diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
+index 864c484bde1b4..157abc46dcf44 100644
+--- a/drivers/clk/ti/clkctrl.c
++++ b/drivers/clk/ti/clkctrl.c
+@@ -267,6 +267,9 @@ static const char * __init clkctrl_get_clock_name(struct device_node *np,
+ if (clkctrl_name && !legacy_naming) {
+ clock_name = kasprintf(GFP_KERNEL, "%s-clkctrl:%04x:%d",
+ clkctrl_name, offset, index);
++ if (!clock_name)
++ return NULL;
++
+ strreplace(clock_name, '_', '-');
+
+ return clock_name;
+@@ -598,6 +601,10 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
+ if (clkctrl_name) {
+ provider->clkdm_name = kasprintf(GFP_KERNEL,
+ "%s_clkdm", clkctrl_name);
++ if (!provider->clkdm_name) {
++ kfree(provider);
++ return;
++ }
+ goto clkdm_found;
+ }
+
+diff --git a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c
+index 5319cd3804801..3bc55ab75314b 100644
+--- a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c
++++ b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c
+@@ -24,6 +24,7 @@ struct clk_hw *uniphier_clk_register_fixed_rate(struct device *dev,
+
+ init.name = name;
+ init.ops = &clk_fixed_rate_ops;
++ init.flags = 0;
+ init.parent_names = NULL;
+ init.num_parents = 0;
+
+diff --git a/drivers/clk/x86/Kconfig b/drivers/clk/x86/Kconfig
+index 69642e15fcc1f..ced99e082e3dd 100644
+--- a/drivers/clk/x86/Kconfig
++++ b/drivers/clk/x86/Kconfig
+@@ -1,8 +1,9 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ config CLK_LGM_CGU
+ depends on OF && HAS_IOMEM && (X86 || COMPILE_TEST)
++ select MFD_SYSCON
+ select OF_EARLY_FLATTREE
+ bool "Clock driver for Lightning Mountain(LGM) platform"
+ help
+- Clock Generation Unit(CGU) driver for Intel Lightning Mountain(LGM)
+- network processor SoC.
++ Clock Generation Unit(CGU) driver for MaxLinear's x86 based
++ Lightning Mountain(LGM) network processor SoC.
+diff --git a/drivers/clk/x86/clk-cgu-pll.c b/drivers/clk/x86/clk-cgu-pll.c
+index 3179557b5f784..409dbf55f4cae 100644
+--- a/drivers/clk/x86/clk-cgu-pll.c
++++ b/drivers/clk/x86/clk-cgu-pll.c
+@@ -1,8 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /*
++ * Copyright (C) 2020-2022 MaxLinear, Inc.
+ * Copyright (C) 2020 Intel Corporation.
+- * Zhu YiXin <yixin.zhu@intel.com>
+- * Rahul Tanwar <rahul.tanwar@intel.com>
++ * Zhu Yixin <yzhu@maxlinear.com>
++ * Rahul Tanwar <rtanwar@maxlinear.com>
+ */
+
+ #include <linux/clk-provider.h>
+@@ -40,13 +41,10 @@ static unsigned long lgm_pll_recalc_rate(struct clk_hw *hw, unsigned long prate)
+ {
+ struct lgm_clk_pll *pll = to_lgm_clk_pll(hw);
+ unsigned int div, mult, frac;
+- unsigned long flags;
+
+- spin_lock_irqsave(&pll->lock, flags);
+ mult = lgm_get_clk_val(pll->membase, PLL_REF_DIV(pll->reg), 0, 12);
+ div = lgm_get_clk_val(pll->membase, PLL_REF_DIV(pll->reg), 18, 6);
+ frac = lgm_get_clk_val(pll->membase, pll->reg, 2, 24);
+- spin_unlock_irqrestore(&pll->lock, flags);
+
+ if (pll->type == TYPE_LJPLL)
+ div *= 4;
+@@ -57,12 +55,9 @@ static unsigned long lgm_pll_recalc_rate(struct clk_hw *hw, unsigned long prate)
+ static int lgm_pll_is_enabled(struct clk_hw *hw)
+ {
+ struct lgm_clk_pll *pll = to_lgm_clk_pll(hw);
+- unsigned long flags;
+ unsigned int ret;
+
+- spin_lock_irqsave(&pll->lock, flags);
+ ret = lgm_get_clk_val(pll->membase, pll->reg, 0, 1);
+- spin_unlock_irqrestore(&pll->lock, flags);
+
+ return ret;
+ }
+@@ -70,15 +65,13 @@ static int lgm_pll_is_enabled(struct clk_hw *hw)
+ static int lgm_pll_enable(struct clk_hw *hw)
+ {
+ struct lgm_clk_pll *pll = to_lgm_clk_pll(hw);
+- unsigned long flags;
+ u32 val;
+ int ret;
+
+- spin_lock_irqsave(&pll->lock, flags);
+ lgm_set_clk_val(pll->membase, pll->reg, 0, 1, 1);
+- ret = readl_poll_timeout_atomic(pll->membase + pll->reg,
+- val, (val & 0x1), 1, 100);
+- spin_unlock_irqrestore(&pll->lock, flags);
++ ret = regmap_read_poll_timeout_atomic(pll->membase, pll->reg,
++ val, (val & 0x1), 1, 100);
++
+
+ return ret;
+ }
+@@ -86,11 +79,8 @@ static int lgm_pll_enable(struct clk_hw *hw)
+ static void lgm_pll_disable(struct clk_hw *hw)
+ {
+ struct lgm_clk_pll *pll = to_lgm_clk_pll(hw);
+- unsigned long flags;
+
+- spin_lock_irqsave(&pll->lock, flags);
+ lgm_set_clk_val(pll->membase, pll->reg, 0, 1, 0);
+- spin_unlock_irqrestore(&pll->lock, flags);
+ }
+
+ static const struct clk_ops lgm_pll_ops = {
+@@ -121,7 +111,6 @@ lgm_clk_register_pll(struct lgm_clk_provider *ctx,
+ return ERR_PTR(-ENOMEM);
+
+ pll->membase = ctx->membase;
+- pll->lock = ctx->lock;
+ pll->reg = list->reg;
+ pll->flags = list->flags;
+ pll->type = list->type;
+diff --git a/drivers/clk/x86/clk-cgu.c b/drivers/clk/x86/clk-cgu.c
+index 33de600e0c38e..89b53f280aee0 100644
+--- a/drivers/clk/x86/clk-cgu.c
++++ b/drivers/clk/x86/clk-cgu.c
+@@ -1,8 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /*
++ * Copyright (C) 2020-2022 MaxLinear, Inc.
+ * Copyright (C) 2020 Intel Corporation.
+- * Zhu YiXin <yixin.zhu@intel.com>
+- * Rahul Tanwar <rahul.tanwar@intel.com>
++ * Zhu Yixin <yzhu@maxlinear.com>
++ * Rahul Tanwar <rtanwar@maxlinear.com>
+ */
+ #include <linux/clk-provider.h>
+ #include <linux/device.h>
+@@ -24,14 +25,10 @@
+ static struct clk_hw *lgm_clk_register_fixed(struct lgm_clk_provider *ctx,
+ const struct lgm_clk_branch *list)
+ {
+- unsigned long flags;
+
+- if (list->div_flags & CLOCK_FLAG_VAL_INIT) {
+- spin_lock_irqsave(&ctx->lock, flags);
++ if (list->div_flags & CLOCK_FLAG_VAL_INIT)
+ lgm_set_clk_val(ctx->membase, list->div_off, list->div_shift,
+ list->div_width, list->div_val);
+- spin_unlock_irqrestore(&ctx->lock, flags);
+- }
+
+ return clk_hw_register_fixed_rate(NULL, list->name,
+ list->parent_data[0].name,
+@@ -41,33 +38,27 @@ static struct clk_hw *lgm_clk_register_fixed(struct lgm_clk_provider *ctx,
+ static u8 lgm_clk_mux_get_parent(struct clk_hw *hw)
+ {
+ struct lgm_clk_mux *mux = to_lgm_clk_mux(hw);
+- unsigned long flags;
+ u32 val;
+
+- spin_lock_irqsave(&mux->lock, flags);
+ if (mux->flags & MUX_CLK_SW)
+ val = mux->reg;
+ else
+ val = lgm_get_clk_val(mux->membase, mux->reg, mux->shift,
+ mux->width);
+- spin_unlock_irqrestore(&mux->lock, flags);
+ return clk_mux_val_to_index(hw, NULL, mux->flags, val);
+ }
+
+ static int lgm_clk_mux_set_parent(struct clk_hw *hw, u8 index)
+ {
+ struct lgm_clk_mux *mux = to_lgm_clk_mux(hw);
+- unsigned long flags;
+ u32 val;
+
+ val = clk_mux_index_to_val(NULL, mux->flags, index);
+- spin_lock_irqsave(&mux->lock, flags);
+ if (mux->flags & MUX_CLK_SW)
+ mux->reg = val;
+ else
+ lgm_set_clk_val(mux->membase, mux->reg, mux->shift,
+ mux->width, val);
+- spin_unlock_irqrestore(&mux->lock, flags);
+
+ return 0;
+ }
+@@ -90,7 +81,7 @@ static struct clk_hw *
+ lgm_clk_register_mux(struct lgm_clk_provider *ctx,
+ const struct lgm_clk_branch *list)
+ {
+- unsigned long flags, cflags = list->mux_flags;
++ unsigned long cflags = list->mux_flags;
+ struct device *dev = ctx->dev;
+ u8 shift = list->mux_shift;
+ u8 width = list->mux_width;
+@@ -111,7 +102,6 @@ lgm_clk_register_mux(struct lgm_clk_provider *ctx,
+ init.num_parents = list->num_parents;
+
+ mux->membase = ctx->membase;
+- mux->lock = ctx->lock;
+ mux->reg = reg;
+ mux->shift = shift;
+ mux->width = width;
+@@ -123,11 +113,8 @@ lgm_clk_register_mux(struct lgm_clk_provider *ctx,
+ if (ret)
+ return ERR_PTR(ret);
+
+- if (cflags & CLOCK_FLAG_VAL_INIT) {
+- spin_lock_irqsave(&mux->lock, flags);
++ if (cflags & CLOCK_FLAG_VAL_INIT)
+ lgm_set_clk_val(mux->membase, reg, shift, width, list->mux_val);
+- spin_unlock_irqrestore(&mux->lock, flags);
+- }
+
+ return hw;
+ }
+@@ -136,13 +123,10 @@ static unsigned long
+ lgm_clk_divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+ {
+ struct lgm_clk_divider *divider = to_lgm_clk_divider(hw);
+- unsigned long flags;
+ unsigned int val;
+
+- spin_lock_irqsave(&divider->lock, flags);
+ val = lgm_get_clk_val(divider->membase, divider->reg,
+ divider->shift, divider->width);
+- spin_unlock_irqrestore(&divider->lock, flags);
+
+ return divider_recalc_rate(hw, parent_rate, val, divider->table,
+ divider->flags, divider->width);
+@@ -163,7 +147,6 @@ lgm_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
+ unsigned long prate)
+ {
+ struct lgm_clk_divider *divider = to_lgm_clk_divider(hw);
+- unsigned long flags;
+ int value;
+
+ value = divider_get_val(rate, prate, divider->table,
+@@ -171,10 +154,8 @@ lgm_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
+ if (value < 0)
+ return value;
+
+- spin_lock_irqsave(&divider->lock, flags);
+ lgm_set_clk_val(divider->membase, divider->reg,
+ divider->shift, divider->width, value);
+- spin_unlock_irqrestore(&divider->lock, flags);
+
+ return 0;
+ }
+@@ -182,12 +163,10 @@ lgm_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
+ static int lgm_clk_divider_enable_disable(struct clk_hw *hw, int enable)
+ {
+ struct lgm_clk_divider *div = to_lgm_clk_divider(hw);
+- unsigned long flags;
+
+- spin_lock_irqsave(&div->lock, flags);
+- lgm_set_clk_val(div->membase, div->reg, div->shift_gate,
+- div->width_gate, enable);
+- spin_unlock_irqrestore(&div->lock, flags);
++ if (div->flags != DIV_CLK_NO_MASK)
++ lgm_set_clk_val(div->membase, div->reg, div->shift_gate,
++ div->width_gate, enable);
+ return 0;
+ }
+
+@@ -213,7 +192,7 @@ static struct clk_hw *
+ lgm_clk_register_divider(struct lgm_clk_provider *ctx,
+ const struct lgm_clk_branch *list)
+ {
+- unsigned long flags, cflags = list->div_flags;
++ unsigned long cflags = list->div_flags;
+ struct device *dev = ctx->dev;
+ struct lgm_clk_divider *div;
+ struct clk_init_data init = {};
+@@ -236,7 +215,6 @@ lgm_clk_register_divider(struct lgm_clk_provider *ctx,
+ init.num_parents = 1;
+
+ div->membase = ctx->membase;
+- div->lock = ctx->lock;
+ div->reg = reg;
+ div->shift = shift;
+ div->width = width;
+@@ -251,11 +229,8 @@ lgm_clk_register_divider(struct lgm_clk_provider *ctx,
+ if (ret)
+ return ERR_PTR(ret);
+
+- if (cflags & CLOCK_FLAG_VAL_INIT) {
+- spin_lock_irqsave(&div->lock, flags);
++ if (cflags & CLOCK_FLAG_VAL_INIT)
+ lgm_set_clk_val(div->membase, reg, shift, width, list->div_val);
+- spin_unlock_irqrestore(&div->lock, flags);
+- }
+
+ return hw;
+ }
+@@ -264,7 +239,6 @@ static struct clk_hw *
+ lgm_clk_register_fixed_factor(struct lgm_clk_provider *ctx,
+ const struct lgm_clk_branch *list)
+ {
+- unsigned long flags;
+ struct clk_hw *hw;
+
+ hw = clk_hw_register_fixed_factor(ctx->dev, list->name,
+@@ -273,12 +247,9 @@ lgm_clk_register_fixed_factor(struct lgm_clk_provider *ctx,
+ if (IS_ERR(hw))
+ return ERR_CAST(hw);
+
+- if (list->div_flags & CLOCK_FLAG_VAL_INIT) {
+- spin_lock_irqsave(&ctx->lock, flags);
++ if (list->div_flags & CLOCK_FLAG_VAL_INIT)
+ lgm_set_clk_val(ctx->membase, list->div_off, list->div_shift,
+ list->div_width, list->div_val);
+- spin_unlock_irqrestore(&ctx->lock, flags);
+- }
+
+ return hw;
+ }
+@@ -286,13 +257,10 @@ lgm_clk_register_fixed_factor(struct lgm_clk_provider *ctx,
+ static int lgm_clk_gate_enable(struct clk_hw *hw)
+ {
+ struct lgm_clk_gate *gate = to_lgm_clk_gate(hw);
+- unsigned long flags;
+ unsigned int reg;
+
+- spin_lock_irqsave(&gate->lock, flags);
+ reg = GATE_HW_REG_EN(gate->reg);
+ lgm_set_clk_val(gate->membase, reg, gate->shift, 1, 1);
+- spin_unlock_irqrestore(&gate->lock, flags);
+
+ return 0;
+ }
+@@ -300,25 +268,19 @@ static int lgm_clk_gate_enable(struct clk_hw *hw)
+ static void lgm_clk_gate_disable(struct clk_hw *hw)
+ {
+ struct lgm_clk_gate *gate = to_lgm_clk_gate(hw);
+- unsigned long flags;
+ unsigned int reg;
+
+- spin_lock_irqsave(&gate->lock, flags);
+ reg = GATE_HW_REG_DIS(gate->reg);
+ lgm_set_clk_val(gate->membase, reg, gate->shift, 1, 1);
+- spin_unlock_irqrestore(&gate->lock, flags);
+ }
+
+ static int lgm_clk_gate_is_enabled(struct clk_hw *hw)
+ {
+ struct lgm_clk_gate *gate = to_lgm_clk_gate(hw);
+ unsigned int reg, ret;
+- unsigned long flags;
+
+- spin_lock_irqsave(&gate->lock, flags);
+ reg = GATE_HW_REG_STAT(gate->reg);
+ ret = lgm_get_clk_val(gate->membase, reg, gate->shift, 1);
+- spin_unlock_irqrestore(&gate->lock, flags);
+
+ return ret;
+ }
+@@ -333,7 +295,7 @@ static struct clk_hw *
+ lgm_clk_register_gate(struct lgm_clk_provider *ctx,
+ const struct lgm_clk_branch *list)
+ {
+- unsigned long flags, cflags = list->gate_flags;
++ unsigned long cflags = list->gate_flags;
+ const char *pname = list->parent_data[0].name;
+ struct device *dev = ctx->dev;
+ u8 shift = list->gate_shift;
+@@ -354,7 +316,6 @@ lgm_clk_register_gate(struct lgm_clk_provider *ctx,
+ init.num_parents = pname ? 1 : 0;
+
+ gate->membase = ctx->membase;
+- gate->lock = ctx->lock;
+ gate->reg = reg;
+ gate->shift = shift;
+ gate->flags = cflags;
+@@ -366,9 +327,7 @@ lgm_clk_register_gate(struct lgm_clk_provider *ctx,
+ return ERR_PTR(ret);
+
+ if (cflags & CLOCK_FLAG_VAL_INIT) {
+- spin_lock_irqsave(&gate->lock, flags);
+ lgm_set_clk_val(gate->membase, reg, shift, 1, list->gate_val);
+- spin_unlock_irqrestore(&gate->lock, flags);
+ }
+
+ return hw;
+@@ -396,8 +355,22 @@ int lgm_clk_register_branches(struct lgm_clk_provider *ctx,
+ hw = lgm_clk_register_fixed_factor(ctx, list);
+ break;
+ case CLK_TYPE_GATE:
+- hw = lgm_clk_register_gate(ctx, list);
++ if (list->gate_flags & GATE_CLK_HW) {
++ hw = lgm_clk_register_gate(ctx, list);
++ } else {
++ /*
++ * GATE_CLKs can be controlled either from
++ * CGU clk driver i.e. this driver or directly
++ * from power management driver/daemon. It is
++ * dependent on the power policy/profile requirements
++ * of the end product. To override control of gate
++ * clks from this driver, provide NULL for this index
++ * of gate clk provider.
++ */
++ hw = NULL;
++ }
+ break;
++
+ default:
+ dev_err(ctx->dev, "invalid clk type\n");
+ return -EINVAL;
+@@ -443,24 +416,18 @@ lgm_clk_ddiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+ static int lgm_clk_ddiv_enable(struct clk_hw *hw)
+ {
+ struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw);
+- unsigned long flags;
+
+- spin_lock_irqsave(&ddiv->lock, flags);
+ lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift_gate,
+ ddiv->width_gate, 1);
+- spin_unlock_irqrestore(&ddiv->lock, flags);
+ return 0;
+ }
+
+ static void lgm_clk_ddiv_disable(struct clk_hw *hw)
+ {
+ struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw);
+- unsigned long flags;
+
+- spin_lock_irqsave(&ddiv->lock, flags);
+ lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift_gate,
+ ddiv->width_gate, 0);
+- spin_unlock_irqrestore(&ddiv->lock, flags);
+ }
+
+ static int
+@@ -497,32 +464,25 @@ lgm_clk_ddiv_set_rate(struct clk_hw *hw, unsigned long rate,
+ {
+ struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw);
+ u32 div, ddiv1, ddiv2;
+- unsigned long flags;
+
+ div = DIV_ROUND_CLOSEST_ULL((u64)prate, rate);
+
+- spin_lock_irqsave(&ddiv->lock, flags);
+ if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) {
+ div = DIV_ROUND_CLOSEST_ULL((u64)div, 5);
+ div = div * 2;
+ }
+
+- if (div <= 0) {
+- spin_unlock_irqrestore(&ddiv->lock, flags);
++ if (div <= 0)
+ return -EINVAL;
+- }
+
+- if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2)) {
+- spin_unlock_irqrestore(&ddiv->lock, flags);
++ if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2))
+ return -EINVAL;
+- }
+
+ lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift0, ddiv->width0,
+ ddiv1 - 1);
+
+ lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift1, ddiv->width1,
+ ddiv2 - 1);
+- spin_unlock_irqrestore(&ddiv->lock, flags);
+
+ return 0;
+ }
+@@ -533,18 +493,15 @@ lgm_clk_ddiv_round_rate(struct clk_hw *hw, unsigned long rate,
+ {
+ struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw);
+ u32 div, ddiv1, ddiv2;
+- unsigned long flags;
+ u64 rate64;
+
+ div = DIV_ROUND_CLOSEST_ULL((u64)*prate, rate);
+
+ /* if predivide bit is enabled, modify div by factor of 2.5 */
+- spin_lock_irqsave(&ddiv->lock, flags);
+ if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) {
+ div = div * 2;
+ div = DIV_ROUND_CLOSEST_ULL((u64)div, 5);
+ }
+- spin_unlock_irqrestore(&ddiv->lock, flags);
+
+ if (div <= 0)
+ return *prate;
+@@ -558,12 +515,10 @@ lgm_clk_ddiv_round_rate(struct clk_hw *hw, unsigned long rate,
+ do_div(rate64, ddiv2);
+
+ /* if predivide bit is enabled, modify rounded rate by factor of 2.5 */
+- spin_lock_irqsave(&ddiv->lock, flags);
+ if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) {
+ rate64 = rate64 * 2;
+ rate64 = DIV_ROUND_CLOSEST_ULL(rate64, 5);
+ }
+- spin_unlock_irqrestore(&ddiv->lock, flags);
+
+ return rate64;
+ }
+@@ -600,7 +555,6 @@ int lgm_clk_register_ddiv(struct lgm_clk_provider *ctx,
+ init.num_parents = 1;
+
+ ddiv->membase = ctx->membase;
+- ddiv->lock = ctx->lock;
+ ddiv->reg = list->reg;
+ ddiv->shift0 = list->shift0;
+ ddiv->width0 = list->width0;
+diff --git a/drivers/clk/x86/clk-cgu.h b/drivers/clk/x86/clk-cgu.h
+index 4e22bfb223128..bcaf8aec94e5d 100644
+--- a/drivers/clk/x86/clk-cgu.h
++++ b/drivers/clk/x86/clk-cgu.h
+@@ -1,28 +1,28 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+ /*
+- * Copyright(c) 2020 Intel Corporation.
+- * Zhu YiXin <yixin.zhu@intel.com>
+- * Rahul Tanwar <rahul.tanwar@intel.com>
++ * Copyright (C) 2020-2022 MaxLinear, Inc.
++ * Copyright (C) 2020 Intel Corporation.
++ * Zhu Yixin <yzhu@maxlinear.com>
++ * Rahul Tanwar <rtanwar@maxlinear.com>
+ */
+
+ #ifndef __CLK_CGU_H
+ #define __CLK_CGU_H
+
+-#include <linux/io.h>
++#include <linux/regmap.h>
+
+ struct lgm_clk_mux {
+ struct clk_hw hw;
+- void __iomem *membase;
++ struct regmap *membase;
+ unsigned int reg;
+ u8 shift;
+ u8 width;
+ unsigned long flags;
+- spinlock_t lock;
+ };
+
+ struct lgm_clk_divider {
+ struct clk_hw hw;
+- void __iomem *membase;
++ struct regmap *membase;
+ unsigned int reg;
+ u8 shift;
+ u8 width;
+@@ -30,12 +30,11 @@ struct lgm_clk_divider {
+ u8 width_gate;
+ unsigned long flags;
+ const struct clk_div_table *table;
+- spinlock_t lock;
+ };
+
+ struct lgm_clk_ddiv {
+ struct clk_hw hw;
+- void __iomem *membase;
++ struct regmap *membase;
+ unsigned int reg;
+ u8 shift0;
+ u8 width0;
+@@ -48,16 +47,14 @@ struct lgm_clk_ddiv {
+ unsigned int mult;
+ unsigned int div;
+ unsigned long flags;
+- spinlock_t lock;
+ };
+
+ struct lgm_clk_gate {
+ struct clk_hw hw;
+- void __iomem *membase;
++ struct regmap *membase;
+ unsigned int reg;
+ u8 shift;
+ unsigned long flags;
+- spinlock_t lock;
+ };
+
+ enum lgm_clk_type {
+@@ -77,11 +74,10 @@ enum lgm_clk_type {
+ * @clk_data: array of hw clocks and clk number.
+ */
+ struct lgm_clk_provider {
+- void __iomem *membase;
++ struct regmap *membase;
+ struct device_node *np;
+ struct device *dev;
+ struct clk_hw_onecell_data clk_data;
+- spinlock_t lock;
+ };
+
+ enum pll_type {
+@@ -92,11 +88,10 @@ enum pll_type {
+
+ struct lgm_clk_pll {
+ struct clk_hw hw;
+- void __iomem *membase;
++ struct regmap *membase;
+ unsigned int reg;
+ unsigned long flags;
+ enum pll_type type;
+- spinlock_t lock;
+ };
+
+ /**
+@@ -202,6 +197,8 @@ struct lgm_clk_branch {
+ /* clock flags definition */
+ #define CLOCK_FLAG_VAL_INIT BIT(16)
+ #define MUX_CLK_SW BIT(17)
++#define GATE_CLK_HW BIT(18)
++#define DIV_CLK_NO_MASK BIT(19)
+
+ #define LGM_MUX(_id, _name, _pdata, _f, _reg, \
+ _shift, _width, _cf, _v) \
+@@ -300,29 +297,32 @@ struct lgm_clk_branch {
+ .div = _d, \
+ }
+
+-static inline void lgm_set_clk_val(void __iomem *membase, u32 reg,
++static inline void lgm_set_clk_val(struct regmap *membase, u32 reg,
+ u8 shift, u8 width, u32 set_val)
+ {
+ u32 mask = (GENMASK(width - 1, 0) << shift);
+- u32 regval;
+
+- regval = readl(membase + reg);
+- regval = (regval & ~mask) | ((set_val << shift) & mask);
+- writel(regval, membase + reg);
++ regmap_update_bits(membase, reg, mask, set_val << shift);
+ }
+
+-static inline u32 lgm_get_clk_val(void __iomem *membase, u32 reg,
++static inline u32 lgm_get_clk_val(struct regmap *membase, u32 reg,
+ u8 shift, u8 width)
+ {
+ u32 mask = (GENMASK(width - 1, 0) << shift);
+ u32 val;
+
+- val = readl(membase + reg);
++ if (regmap_read(membase, reg, &val)) {
++ WARN_ONCE(1, "Failed to read clk reg: 0x%x\n", reg);
++ return 0;
++ }
++
+ val = (val & mask) >> shift;
+
+ return val;
+ }
+
++
++
+ int lgm_clk_register_branches(struct lgm_clk_provider *ctx,
+ const struct lgm_clk_branch *list,
+ unsigned int nr_clk);
+diff --git a/drivers/clk/x86/clk-lgm.c b/drivers/clk/x86/clk-lgm.c
+index 020f4e83a5ccb..f69455dd1c980 100644
+--- a/drivers/clk/x86/clk-lgm.c
++++ b/drivers/clk/x86/clk-lgm.c
+@@ -1,10 +1,12 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /*
++ * Copyright (C) 2020-2022 MaxLinear, Inc.
+ * Copyright (C) 2020 Intel Corporation.
+- * Zhu YiXin <yixin.zhu@intel.com>
+- * Rahul Tanwar <rahul.tanwar@intel.com>
++ * Zhu Yixin <yzhu@maxlinear.com>
++ * Rahul Tanwar <rtanwar@maxlinear.com>
+ */
+ #include <linux/clk-provider.h>
++#include <linux/mfd/syscon.h>
+ #include <linux/of.h>
+ #include <linux/platform_device.h>
+ #include <dt-bindings/clock/intel,lgm-clk.h>
+@@ -253,8 +255,8 @@ static const struct lgm_clk_branch lgm_branch_clks[] = {
+ LGM_FIXED(LGM_CLK_SLIC, "slic", NULL, 0, CGU_IF_CLK1,
+ 8, 2, CLOCK_FLAG_VAL_INIT, 8192000, 2),
+ LGM_FIXED(LGM_CLK_DOCSIS, "v_docsis", NULL, 0, 0, 0, 0, 0, 16000000, 0),
+- LGM_DIV(LGM_CLK_DCL, "dcl", "v_ifclk", 0, CGU_PCMCR,
+- 25, 3, 0, 0, 0, 0, dcl_div),
++ LGM_DIV(LGM_CLK_DCL, "dcl", "v_ifclk", CLK_SET_RATE_PARENT, CGU_PCMCR,
++ 25, 3, 0, 0, DIV_CLK_NO_MASK, 0, dcl_div),
+ LGM_MUX(LGM_CLK_PCM, "pcm", pcm_p, 0, CGU_C55_PCMCR,
+ 0, 1, CLK_MUX_ROUND_CLOSEST, 0),
+ LGM_FIXED_FACTOR(LGM_CLK_DDR_PHY, "ddr_phy", "ddr",
+@@ -433,13 +435,15 @@ static int lgm_cgu_probe(struct platform_device *pdev)
+
+ ctx->clk_data.num = CLK_NR_CLKS;
+
+- ctx->membase = devm_platform_ioremap_resource(pdev, 0);
+- if (IS_ERR(ctx->membase))
++ ctx->membase = syscon_node_to_regmap(np);
++ if (IS_ERR(ctx->membase)) {
++ dev_err(dev, "Failed to get clk CGU iomem\n");
+ return PTR_ERR(ctx->membase);
++ }
++
+
+ ctx->np = np;
+ ctx->dev = dev;
+- spin_lock_init(&ctx->lock);
+
+ ret = lgm_clk_register_plls(ctx, lgm_pll_clks,
+ ARRAY_SIZE(lgm_pll_clks));
+diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c
+index eb25303eefed4..2c9da6623b84e 100644
+--- a/drivers/clk/zynqmp/clkc.c
++++ b/drivers/clk/zynqmp/clkc.c
+@@ -710,6 +710,13 @@ static void zynqmp_get_clock_info(void)
+ FIELD_PREP(CLK_ATTR_NODE_INDEX, i);
+
+ zynqmp_pm_clock_get_name(clock[i].clk_id, &name);
++
++ /*
++ * Terminate with NULL character in case name provided by firmware
++ * is longer and truncated due to size limit.
++ */
++ name.name[sizeof(name.name) - 1] = '\0';
++
+ if (!strcmp(name.name, RESERVED_CLK_NAME))
+ continue;
+ strncpy(clock[i].clk_name, name.name, MAX_NAME_LEN);
+diff --git a/drivers/clk/zynqmp/pll.c b/drivers/clk/zynqmp/pll.c
+index 036e4ff64a2f7..bc066f300345d 100644
+--- a/drivers/clk/zynqmp/pll.c
++++ b/drivers/clk/zynqmp/pll.c
+@@ -102,26 +102,25 @@ static long zynqmp_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+ unsigned long *prate)
+ {
+ u32 fbdiv;
+- long rate_div, f;
++ u32 mult, div;
+
+- /* Enable the fractional mode if needed */
+- rate_div = (rate * FRAC_DIV) / *prate;
+- f = rate_div % FRAC_DIV;
+- if (f) {
+- if (rate > PS_PLL_VCO_MAX) {
+- fbdiv = rate / PS_PLL_VCO_MAX;
+- rate = rate / (fbdiv + 1);
+- }
+- if (rate < PS_PLL_VCO_MIN) {
+- fbdiv = DIV_ROUND_UP(PS_PLL_VCO_MIN, rate);
+- rate = rate * fbdiv;
+- }
+- return rate;
++ /* Let rate fall inside the range PS_PLL_VCO_MIN ~ PS_PLL_VCO_MAX */
++ if (rate > PS_PLL_VCO_MAX) {
++ div = DIV_ROUND_UP(rate, PS_PLL_VCO_MAX);
++ rate = rate / div;
++ }
++ if (rate < PS_PLL_VCO_MIN) {
++ mult = DIV_ROUND_UP(PS_PLL_VCO_MIN, rate);
++ rate = rate * mult;
+ }
+
+ fbdiv = DIV_ROUND_CLOSEST(rate, *prate);
+- fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX);
+- return *prate * fbdiv;
++ if (fbdiv < PLL_FBDIV_MIN || fbdiv > PLL_FBDIV_MAX) {
++ fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX);
++ rate = *prate * fbdiv;
++ }
++
++ return rate;
+ }
+
+ /**
+diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
+index 0f5e3983951a8..08f8cb944a2ac 100644
+--- a/drivers/clocksource/Kconfig
++++ b/drivers/clocksource/Kconfig
+@@ -24,6 +24,7 @@ config I8253_LOCK
+
+ config OMAP_DM_TIMER
+ bool
++ select TIMER_OF
+
+ config CLKBLD_I8253
+ def_bool y if CLKSRC_I8253 || CLKEVT_I8253 || I8253_LOCK
+diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
+index eb596ff9e7bb3..279ddff81ab49 100644
+--- a/drivers/clocksource/acpi_pm.c
++++ b/drivers/clocksource/acpi_pm.c
+@@ -229,8 +229,10 @@ static int __init parse_pmtmr(char *arg)
+ int ret;
+
+ ret = kstrtouint(arg, 16, &base);
+- if (ret)
+- return ret;
++ if (ret) {
++ pr_warn("PMTMR: invalid 'pmtmr=' value: '%s'\n", arg);
++ return 1;
++ }
+
+ pr_info("PMTMR IOPort override: 0x%04x -> 0x%04x\n", pmtmr_ioport,
+ base);
+diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
+index 3819ef5b70989..3245eb0c602d2 100644
+--- a/drivers/clocksource/dw_apb_timer_of.c
++++ b/drivers/clocksource/dw_apb_timer_of.c
+@@ -47,7 +47,7 @@ static int __init timer_get_base_and_rate(struct device_node *np,
+ pr_warn("pclk for %pOFn is present, but could not be activated\n",
+ np);
+
+- if (!of_property_read_u32(np, "clock-freq", rate) &&
++ if (!of_property_read_u32(np, "clock-freq", rate) ||
+ !of_property_read_u32(np, "clock-frequency", rate))
+ return 0;
+
+diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
+index 5e3e96d3d1b98..cc2a961ddd3be 100644
+--- a/drivers/clocksource/exynos_mct.c
++++ b/drivers/clocksource/exynos_mct.c
+@@ -504,11 +504,14 @@ static int exynos4_mct_dying_cpu(unsigned int cpu)
+ return 0;
+ }
+
+-static int __init exynos4_timer_resources(struct device_node *np, void __iomem *base)
++static int __init exynos4_timer_resources(struct device_node *np)
+ {
+- int err, cpu;
+ struct clk *mct_clk, *tick_clk;
+
++ reg_base = of_iomap(np, 0);
++ if (!reg_base)
++ panic("%s: unable to ioremap mct address space\n", __func__);
++
+ tick_clk = of_clk_get_by_name(np, "fin_pll");
+ if (IS_ERR(tick_clk))
+ panic("%s: unable to determine tick clock rate\n", __func__);
+@@ -519,9 +522,32 @@ static int __init exynos4_timer_resources(struct device_node *np, void __iomem *
+ panic("%s: unable to retrieve mct clock instance\n", __func__);
+ clk_prepare_enable(mct_clk);
+
+- reg_base = base;
+- if (!reg_base)
+- panic("%s: unable to ioremap mct address space\n", __func__);
++ return 0;
++}
++
++static int __init exynos4_timer_interrupts(struct device_node *np,
++ unsigned int int_type)
++{
++ int nr_irqs, i, err, cpu;
++
++ mct_int_type = int_type;
++
++ /* This driver uses only one global timer interrupt */
++ mct_irqs[MCT_G0_IRQ] = irq_of_parse_and_map(np, MCT_G0_IRQ);
++
++ /*
++ * Find out the number of local irqs specified. The local
++ * timer irqs are specified after the four global timer
++ * irqs are specified.
++ */
++ nr_irqs = of_irq_count(np);
++ if (nr_irqs > ARRAY_SIZE(mct_irqs)) {
++ pr_err("exynos-mct: too many (%d) interrupts configured in DT\n",
++ nr_irqs);
++ nr_irqs = ARRAY_SIZE(mct_irqs);
++ }
++ for (i = MCT_L0_IRQ; i < nr_irqs; i++)
++ mct_irqs[i] = irq_of_parse_and_map(np, i);
+
+ if (mct_int_type == MCT_INT_PPI) {
+
+@@ -532,11 +558,14 @@ static int __init exynos4_timer_resources(struct device_node *np, void __iomem *
+ mct_irqs[MCT_L0_IRQ], err);
+ } else {
+ for_each_possible_cpu(cpu) {
+- int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
++ int mct_irq;
+ struct mct_clock_event_device *pcpu_mevt =
+ per_cpu_ptr(&percpu_mct_tick, cpu);
+
+ pcpu_mevt->evt.irq = -1;
++ if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs))
++ break;
++ mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
+
+ irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
+ if (request_irq(mct_irq,
+@@ -581,24 +610,13 @@ out_irq:
+
+ static int __init mct_init_dt(struct device_node *np, unsigned int int_type)
+ {
+- u32 nr_irqs, i;
+ int ret;
+
+- mct_int_type = int_type;
+-
+- /* This driver uses only one global timer interrupt */
+- mct_irqs[MCT_G0_IRQ] = irq_of_parse_and_map(np, MCT_G0_IRQ);
+-
+- /*
+- * Find out the number of local irqs specified. The local
+- * timer irqs are specified after the four global timer
+- * irqs are specified.
+- */
+- nr_irqs = of_irq_count(np);
+- for (i = MCT_L0_IRQ; i < nr_irqs; i++)
+- mct_irqs[i] = irq_of_parse_and_map(np, i);
++ ret = exynos4_timer_resources(np);
++ if (ret)
++ return ret;
+
+- ret = exynos4_timer_resources(np, of_iomap(np, 0));
++ ret = exynos4_timer_interrupts(np, int_type);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
+index ff188ab68496e..bb47610bbd1c4 100644
+--- a/drivers/clocksource/hyperv_timer.c
++++ b/drivers/clocksource/hyperv_timer.c
+@@ -565,4 +565,3 @@ void __init hv_init_clocksource(void)
+ hv_sched_clock_offset = hv_read_reference_counter();
+ hv_setup_sched_clock(read_hv_sched_clock_msr);
+ }
+-EXPORT_SYMBOL_GPL(hv_init_clocksource);
+diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
+index dd0956ad969c1..d35548aa026fb 100644
+--- a/drivers/clocksource/sh_cmt.c
++++ b/drivers/clocksource/sh_cmt.c
+@@ -13,6 +13,7 @@
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/io.h>
++#include <linux/iopoll.h>
+ #include <linux/ioport.h>
+ #include <linux/irq.h>
+ #include <linux/module.h>
+@@ -116,6 +117,7 @@ struct sh_cmt_device {
+ void __iomem *mapbase;
+ struct clk *clk;
+ unsigned long rate;
++ unsigned int reg_delay;
+
+ raw_spinlock_t lock; /* Protect the shared start/stop register */
+
+@@ -247,10 +249,17 @@ static inline u32 sh_cmt_read_cmstr(struct sh_cmt_channel *ch)
+
+ static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, u32 value)
+ {
+- if (ch->iostart)
+- ch->cmt->info->write_control(ch->iostart, 0, value);
+- else
+- ch->cmt->info->write_control(ch->cmt->mapbase, 0, value);
++ u32 old_value = sh_cmt_read_cmstr(ch);
++
++ if (value != old_value) {
++ if (ch->iostart) {
++ ch->cmt->info->write_control(ch->iostart, 0, value);
++ udelay(ch->cmt->reg_delay);
++ } else {
++ ch->cmt->info->write_control(ch->cmt->mapbase, 0, value);
++ udelay(ch->cmt->reg_delay);
++ }
++ }
+ }
+
+ static inline u32 sh_cmt_read_cmcsr(struct sh_cmt_channel *ch)
+@@ -260,7 +269,12 @@ static inline u32 sh_cmt_read_cmcsr(struct sh_cmt_channel *ch)
+
+ static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch, u32 value)
+ {
+- ch->cmt->info->write_control(ch->ioctrl, CMCSR, value);
++ u32 old_value = sh_cmt_read_cmcsr(ch);
++
++ if (value != old_value) {
++ ch->cmt->info->write_control(ch->ioctrl, CMCSR, value);
++ udelay(ch->cmt->reg_delay);
++ }
+ }
+
+ static inline u32 sh_cmt_read_cmcnt(struct sh_cmt_channel *ch)
+@@ -268,14 +282,33 @@ static inline u32 sh_cmt_read_cmcnt(struct sh_cmt_channel *ch)
+ return ch->cmt->info->read_count(ch->ioctrl, CMCNT);
+ }
+
+-static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, u32 value)
++static inline int sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, u32 value)
+ {
++ /* Tests showed that we need to wait 3 clocks here */
++ unsigned int cmcnt_delay = DIV_ROUND_UP(3 * ch->cmt->reg_delay, 2);
++ u32 reg;
++
++ if (ch->cmt->info->model > SH_CMT_16BIT) {
++ int ret = read_poll_timeout_atomic(sh_cmt_read_cmcsr, reg,
++ !(reg & SH_CMT32_CMCSR_WRFLG),
++ 1, cmcnt_delay, false, ch);
++ if (ret < 0)
++ return ret;
++ }
++
+ ch->cmt->info->write_count(ch->ioctrl, CMCNT, value);
++ udelay(cmcnt_delay);
++ return 0;
+ }
+
+ static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch, u32 value)
+ {
+- ch->cmt->info->write_count(ch->ioctrl, CMCOR, value);
++ u32 old_value = ch->cmt->info->read_count(ch->ioctrl, CMCOR);
++
++ if (value != old_value) {
++ ch->cmt->info->write_count(ch->ioctrl, CMCOR, value);
++ udelay(ch->cmt->reg_delay);
++ }
+ }
+
+ static u32 sh_cmt_get_counter(struct sh_cmt_channel *ch, u32 *has_wrapped)
+@@ -319,7 +352,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start)
+
+ static int sh_cmt_enable(struct sh_cmt_channel *ch)
+ {
+- int k, ret;
++ int ret;
+
+ dev_pm_syscore_device(&ch->cmt->pdev->dev, true);
+
+@@ -347,26 +380,9 @@ static int sh_cmt_enable(struct sh_cmt_channel *ch)
+ }
+
+ sh_cmt_write_cmcor(ch, 0xffffffff);
+- sh_cmt_write_cmcnt(ch, 0);
+-
+- /*
+- * According to the sh73a0 user's manual, as CMCNT can be operated
+- * only by the RCLK (Pseudo 32 kHz), there's one restriction on
+- * modifying CMCNT register; two RCLK cycles are necessary before
+- * this register is either read or any modification of the value
+- * it holds is reflected in the LSI's actual operation.
+- *
+- * While at it, we're supposed to clear out the CMCNT as of this
+- * moment, so make sure it's processed properly here. This will
+- * take RCLKx2 at maximum.
+- */
+- for (k = 0; k < 100; k++) {
+- if (!sh_cmt_read_cmcnt(ch))
+- break;
+- udelay(1);
+- }
++ ret = sh_cmt_write_cmcnt(ch, 0);
+
+- if (sh_cmt_read_cmcnt(ch)) {
++ if (ret || sh_cmt_read_cmcnt(ch)) {
+ dev_err(&ch->cmt->pdev->dev, "ch%u: cannot clear CMCNT\n",
+ ch->index);
+ ret = -ETIMEDOUT;
+@@ -987,8 +1003,8 @@ MODULE_DEVICE_TABLE(of, sh_cmt_of_table);
+
+ static int sh_cmt_setup(struct sh_cmt_device *cmt, struct platform_device *pdev)
+ {
+- unsigned int mask;
+- unsigned int i;
++ unsigned int mask, i;
++ unsigned long rate;
+ int ret;
+
+ cmt->pdev = pdev;
+@@ -1024,10 +1040,16 @@ static int sh_cmt_setup(struct sh_cmt_device *cmt, struct platform_device *pdev)
+ if (ret < 0)
+ goto err_clk_unprepare;
+
+- if (cmt->info->width == 16)
+- cmt->rate = clk_get_rate(cmt->clk) / 512;
+- else
+- cmt->rate = clk_get_rate(cmt->clk) / 8;
++ rate = clk_get_rate(cmt->clk);
++ if (!rate) {
++ ret = -EINVAL;
++ goto err_clk_disable;
++ }
++
++ /* We shall wait 2 input clks after register writes */
++ if (cmt->info->model >= SH_CMT_48BIT)
++ cmt->reg_delay = DIV_ROUND_UP(2UL * USEC_PER_SEC, rate);
++ cmt->rate = rate / (cmt->info->width == 16 ? 512 : 8);
+
+ /* Map the memory resource(s). */
+ ret = sh_cmt_map_memory(cmt);
+diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c
+index 4efd0cf3b602d..0d52e28fea4de 100644
+--- a/drivers/clocksource/timer-cadence-ttc.c
++++ b/drivers/clocksource/timer-cadence-ttc.c
+@@ -486,10 +486,10 @@ static int __init ttc_timer_probe(struct platform_device *pdev)
+ * and use it. Note that the event timer uses the interrupt and it's the
+ * 2nd TTC hence the irq_of_parse_and_map(,1)
+ */
+- timer_baseaddr = of_iomap(timer, 0);
+- if (!timer_baseaddr) {
++ timer_baseaddr = devm_of_iomap(&pdev->dev, timer, 0, NULL);
++ if (IS_ERR(timer_baseaddr)) {
+ pr_err("ERROR: invalid timer base address\n");
+- return -ENXIO;
++ return PTR_ERR(timer_baseaddr);
+ }
+
+ irq = irq_of_parse_and_map(timer, 1);
+@@ -513,20 +513,27 @@ static int __init ttc_timer_probe(struct platform_device *pdev)
+ clk_ce = of_clk_get(timer, clksel);
+ if (IS_ERR(clk_ce)) {
+ pr_err("ERROR: timer input clock not found\n");
+- return PTR_ERR(clk_ce);
++ ret = PTR_ERR(clk_ce);
++ goto put_clk_cs;
+ }
+
+ ret = ttc_setup_clocksource(clk_cs, timer_baseaddr, timer_width);
+ if (ret)
+- return ret;
++ goto put_clk_ce;
+
+ ret = ttc_setup_clockevent(clk_ce, timer_baseaddr + 4, irq);
+ if (ret)
+- return ret;
++ goto put_clk_ce;
+
+ pr_info("%pOFn #0 at %p, irq=%d\n", timer, timer_baseaddr, irq);
+
+ return 0;
++
++put_clk_ce:
++ clk_put(clk_ce);
++put_clk_cs:
++ clk_put(clk_cs);
++ return ret;
+ }
+
+ static const struct of_device_id ttc_timer_of_match[] = {
+diff --git a/drivers/clocksource/timer-davinci.c b/drivers/clocksource/timer-davinci.c
+index 9996c05425200..b1c248498be46 100644
+--- a/drivers/clocksource/timer-davinci.c
++++ b/drivers/clocksource/timer-davinci.c
+@@ -257,21 +257,25 @@ int __init davinci_timer_register(struct clk *clk,
+ resource_size(&timer_cfg->reg),
+ "davinci-timer")) {
+ pr_err("Unable to request memory region\n");
+- return -EBUSY;
++ rv = -EBUSY;
++ goto exit_clk_disable;
+ }
+
+ base = ioremap(timer_cfg->reg.start, resource_size(&timer_cfg->reg));
+ if (!base) {
+ pr_err("Unable to map the register range\n");
+- return -ENOMEM;
++ rv = -ENOMEM;
++ goto exit_mem_region;
+ }
+
+ davinci_timer_init(base);
+ tick_rate = clk_get_rate(clk);
+
+ clockevent = kzalloc(sizeof(*clockevent), GFP_KERNEL);
+- if (!clockevent)
+- return -ENOMEM;
++ if (!clockevent) {
++ rv = -ENOMEM;
++ goto exit_iounmap_base;
++ }
+
+ clockevent->dev.name = "tim12";
+ clockevent->dev.features = CLOCK_EVT_FEAT_ONESHOT;
+@@ -296,7 +300,7 @@ int __init davinci_timer_register(struct clk *clk,
+ "clockevent/tim12", clockevent);
+ if (rv) {
+ pr_err("Unable to request the clockevent interrupt\n");
+- return rv;
++ goto exit_free_clockevent;
+ }
+
+ davinci_clocksource.dev.rating = 300;
+@@ -323,13 +327,27 @@ int __init davinci_timer_register(struct clk *clk,
+ rv = clocksource_register_hz(&davinci_clocksource.dev, tick_rate);
+ if (rv) {
+ pr_err("Unable to register clocksource\n");
+- return rv;
++ goto exit_free_irq;
+ }
+
+ sched_clock_register(davinci_timer_read_sched_clock,
+ DAVINCI_TIMER_CLKSRC_BITS, tick_rate);
+
+ return 0;
++
++exit_free_irq:
++ free_irq(timer_cfg->irq[DAVINCI_TIMER_CLOCKEVENT_IRQ].start,
++ clockevent);
++exit_free_clockevent:
++ kfree(clockevent);
++exit_iounmap_base:
++ iounmap(base);
++exit_mem_region:
++ release_mem_region(timer_cfg->reg.start,
++ resource_size(&timer_cfg->reg));
++exit_clk_disable:
++ clk_disable_unprepare(clk);
++ return rv;
+ }
+
+ static int __init of_davinci_timer_register(struct device_node *np)
+diff --git a/drivers/clocksource/timer-ixp4xx.c b/drivers/clocksource/timer-ixp4xx.c
+index cbb184953510b..b8e92991c4719 100644
+--- a/drivers/clocksource/timer-ixp4xx.c
++++ b/drivers/clocksource/timer-ixp4xx.c
+@@ -282,7 +282,6 @@ void __init ixp4xx_timer_setup(resource_size_t timerbase,
+ }
+ ixp4xx_timer_register(base, timer_irq, timer_freq);
+ }
+-EXPORT_SYMBOL_GPL(ixp4xx_timer_setup);
+
+ #ifdef CONFIG_OF
+ static __init int ixp4xx_of_timer_init(struct device_node *np)
+diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c
+index cfa4ec7ef3968..790d2c9b42a70 100644
+--- a/drivers/clocksource/timer-microchip-pit64b.c
++++ b/drivers/clocksource/timer-microchip-pit64b.c
+@@ -165,7 +165,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs)
+ return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
+ }
+
+-static u64 mchp_pit64b_sched_read_clk(void)
++static u64 notrace mchp_pit64b_sched_read_clk(void)
+ {
+ return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
+ }
+diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c
+index 529cc6a51cdb3..c3f54d9912be7 100644
+--- a/drivers/clocksource/timer-of.c
++++ b/drivers/clocksource/timer-of.c
+@@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np,
+ of_base->base = of_base->name ?
+ of_io_request_and_map(np, of_base->index, of_base->name) :
+ of_iomap(np, of_base->index);
+- if (IS_ERR(of_base->base)) {
+- pr_err("Failed to iomap (%s)\n", of_base->name);
+- return PTR_ERR(of_base->base);
++ if (IS_ERR_OR_NULL(of_base->base)) {
++ pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name);
++ return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM;
+ }
+
+ return 0;
+diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
+index 56c0cc32d0ac6..d514b44e67dd1 100644
+--- a/drivers/clocksource/timer-oxnas-rps.c
++++ b/drivers/clocksource/timer-oxnas-rps.c
+@@ -236,7 +236,7 @@ static int __init oxnas_rps_timer_init(struct device_node *np)
+ }
+
+ rps->irq = irq_of_parse_and_map(np, 0);
+- if (rps->irq < 0) {
++ if (!rps->irq) {
+ ret = -EINVAL;
+ goto err_iomap;
+ }
+diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c
+index 401d592e85f5a..e6a87f4af2b50 100644
+--- a/drivers/clocksource/timer-sp804.c
++++ b/drivers/clocksource/timer-sp804.c
+@@ -259,6 +259,11 @@ static int __init sp804_of_init(struct device_node *np, struct sp804_timer *time
+ struct clk *clk1, *clk2;
+ const char *name = of_get_property(np, "compatible", NULL);
+
++ if (initialized) {
++ pr_debug("%pOF: skipping further SP804 timer device\n", np);
++ return 0;
++ }
++
+ base = of_iomap(np, 0);
+ if (!base)
+ return -ENXIO;
+@@ -270,11 +275,6 @@ static int __init sp804_of_init(struct device_node *np, struct sp804_timer *time
+ writel(0, timer1_base + timer->ctrl);
+ writel(0, timer2_base + timer->ctrl);
+
+- if (initialized || !of_device_is_available(np)) {
+- ret = -EINVAL;
+- goto err;
+- }
+-
+ clk1 = of_clk_get(np, 0);
+ if (IS_ERR(clk1))
+ clk1 = NULL;
+diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c
+index b6f97960d8ee0..632523c1232f6 100644
+--- a/drivers/clocksource/timer-ti-dm-systimer.c
++++ b/drivers/clocksource/timer-ti-dm-systimer.c
+@@ -241,8 +241,7 @@ static void __init dmtimer_systimer_assign_alwon(void)
+ bool quirk_unreliable_oscillator = false;
+
+ /* Quirk unreliable 32 KiHz oscillator with incomplete dts */
+- if (of_machine_is_compatible("ti,omap3-beagle") ||
+- of_machine_is_compatible("timll,omap3-devkit8000")) {
++ if (of_machine_is_compatible("ti,omap3-beagle-ab4")) {
+ quirk_unreliable_oscillator = true;
+ counter_32k = -ENODEV;
+ }
+@@ -346,8 +345,10 @@ static int __init dmtimer_systimer_init_clock(struct dmtimer_systimer *t,
+ return error;
+
+ r = clk_get_rate(clock);
+- if (!r)
++ if (!r) {
++ clk_disable_unprepare(clock);
+ return -ENODEV;
++ }
+
+ if (is_ick)
+ t->ick = clock;
+@@ -695,9 +696,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa)
+ return 0;
+ }
+
+- if (pa == 0x48034000) /* dra7 dmtimer3 */
++ if (pa == 0x4882c000) /* dra7 dmtimer15 */
+ return dmtimer_percpu_timer_init(np, 0);
+- else if (pa == 0x48036000) /* dra7 dmtimer4 */
++ else if (pa == 0x4882e000) /* dra7 dmtimer16 */
+ return dmtimer_percpu_timer_init(np, 1);
+
+ return 0;
+diff --git a/drivers/comedi/drivers/adv_pci1760.c b/drivers/comedi/drivers/adv_pci1760.c
+index 6de8ab97d346c..d6934b6c436d1 100644
+--- a/drivers/comedi/drivers/adv_pci1760.c
++++ b/drivers/comedi/drivers/adv_pci1760.c
+@@ -59,7 +59,7 @@
+ #define PCI1760_CMD_CLR_IMB2 0x00 /* Clears IMB2 */
+ #define PCI1760_CMD_SET_DO 0x01 /* Set output state */
+ #define PCI1760_CMD_GET_DO 0x02 /* Read output status */
+-#define PCI1760_CMD_GET_STATUS 0x03 /* Read current status */
++#define PCI1760_CMD_GET_STATUS 0x07 /* Read current status */
+ #define PCI1760_CMD_GET_FW_VER 0x0e /* Read firmware version */
+ #define PCI1760_CMD_GET_HW_VER 0x0f /* Read hardware version */
+ #define PCI1760_CMD_SET_PWM_HI(x) (0x10 + (x) * 2) /* Set "hi" period */
+diff --git a/drivers/comedi/drivers/dt9812.c b/drivers/comedi/drivers/dt9812.c
+index 634f57730c1e0..704b04d2980d3 100644
+--- a/drivers/comedi/drivers/dt9812.c
++++ b/drivers/comedi/drivers/dt9812.c
+@@ -32,6 +32,7 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/errno.h>
++#include <linux/slab.h>
+ #include <linux/uaccess.h>
+
+ #include "../comedi_usb.h"
+@@ -237,22 +238,42 @@ static int dt9812_read_info(struct comedi_device *dev,
+ {
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
+- struct dt9812_usb_cmd cmd;
++ struct dt9812_usb_cmd *cmd;
++ size_t tbuf_size;
+ int count, ret;
++ void *tbuf;
+
+- cmd.cmd = cpu_to_le32(DT9812_R_FLASH_DATA);
+- cmd.u.flash_data_info.address =
++ tbuf_size = max(sizeof(*cmd), buf_size);
++
++ tbuf = kzalloc(tbuf_size, GFP_KERNEL);
++ if (!tbuf)
++ return -ENOMEM;
++
++ cmd = tbuf;
++
++ cmd->cmd = cpu_to_le32(DT9812_R_FLASH_DATA);
++ cmd->u.flash_data_info.address =
+ cpu_to_le16(DT9812_DIAGS_BOARD_INFO_ADDR + offset);
+- cmd.u.flash_data_info.numbytes = cpu_to_le16(buf_size);
++ cmd->u.flash_data_info.numbytes = cpu_to_le16(buf_size);
+
+ /* DT9812 only responds to 32 byte writes!! */
+ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+- &cmd, 32, &count, DT9812_USB_TIMEOUT);
++ cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT);
+ if (ret)
+- return ret;
++ goto out;
++
++ ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr),
++ tbuf, buf_size, &count, DT9812_USB_TIMEOUT);
++ if (!ret) {
++ if (count == buf_size)
++ memcpy(buf, tbuf, buf_size);
++ else
++ ret = -EREMOTEIO;
++ }
++out:
++ kfree(tbuf);
+
+- return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr),
+- buf, buf_size, &count, DT9812_USB_TIMEOUT);
++ return ret;
+ }
+
+ static int dt9812_read_multiple_registers(struct comedi_device *dev,
+@@ -261,22 +282,42 @@ static int dt9812_read_multiple_registers(struct comedi_device *dev,
+ {
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
+- struct dt9812_usb_cmd cmd;
++ struct dt9812_usb_cmd *cmd;
+ int i, count, ret;
++ size_t buf_size;
++ void *buf;
+
+- cmd.cmd = cpu_to_le32(DT9812_R_MULTI_BYTE_REG);
+- cmd.u.read_multi_info.count = reg_count;
++ buf_size = max_t(size_t, sizeof(*cmd), reg_count);
++
++ buf = kzalloc(buf_size, GFP_KERNEL);
++ if (!buf)
++ return -ENOMEM;
++
++ cmd = buf;
++
++ cmd->cmd = cpu_to_le32(DT9812_R_MULTI_BYTE_REG);
++ cmd->u.read_multi_info.count = reg_count;
+ for (i = 0; i < reg_count; i++)
+- cmd.u.read_multi_info.address[i] = address[i];
++ cmd->u.read_multi_info.address[i] = address[i];
+
+ /* DT9812 only responds to 32 byte writes!! */
+ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+- &cmd, 32, &count, DT9812_USB_TIMEOUT);
++ cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT);
+ if (ret)
+- return ret;
++ goto out;
++
++ ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr),
++ buf, reg_count, &count, DT9812_USB_TIMEOUT);
++ if (!ret) {
++ if (count == reg_count)
++ memcpy(value, buf, reg_count);
++ else
++ ret = -EREMOTEIO;
++ }
++out:
++ kfree(buf);
+
+- return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr),
+- value, reg_count, &count, DT9812_USB_TIMEOUT);
++ return ret;
+ }
+
+ static int dt9812_write_multiple_registers(struct comedi_device *dev,
+@@ -285,19 +326,27 @@ static int dt9812_write_multiple_registers(struct comedi_device *dev,
+ {
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
+- struct dt9812_usb_cmd cmd;
++ struct dt9812_usb_cmd *cmd;
+ int i, count;
++ int ret;
++
++ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
++ if (!cmd)
++ return -ENOMEM;
+
+- cmd.cmd = cpu_to_le32(DT9812_W_MULTI_BYTE_REG);
+- cmd.u.read_multi_info.count = reg_count;
++ cmd->cmd = cpu_to_le32(DT9812_W_MULTI_BYTE_REG);
++ cmd->u.read_multi_info.count = reg_count;
+ for (i = 0; i < reg_count; i++) {
+- cmd.u.write_multi_info.write[i].address = address[i];
+- cmd.u.write_multi_info.write[i].value = value[i];
++ cmd->u.write_multi_info.write[i].address = address[i];
++ cmd->u.write_multi_info.write[i].value = value[i];
+ }
+
+ /* DT9812 only responds to 32 byte writes!! */
+- return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+- &cmd, 32, &count, DT9812_USB_TIMEOUT);
++ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
++ cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT);
++ kfree(cmd);
++
++ return ret;
+ }
+
+ static int dt9812_rmw_multiple_registers(struct comedi_device *dev,
+@@ -306,17 +355,25 @@ static int dt9812_rmw_multiple_registers(struct comedi_device *dev,
+ {
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
+- struct dt9812_usb_cmd cmd;
++ struct dt9812_usb_cmd *cmd;
+ int i, count;
++ int ret;
++
++ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
++ if (!cmd)
++ return -ENOMEM;
+
+- cmd.cmd = cpu_to_le32(DT9812_RMW_MULTI_BYTE_REG);
+- cmd.u.rmw_multi_info.count = reg_count;
++ cmd->cmd = cpu_to_le32(DT9812_RMW_MULTI_BYTE_REG);
++ cmd->u.rmw_multi_info.count = reg_count;
+ for (i = 0; i < reg_count; i++)
+- cmd.u.rmw_multi_info.rmw[i] = rmw[i];
++ cmd->u.rmw_multi_info.rmw[i] = rmw[i];
+
+ /* DT9812 only responds to 32 byte writes!! */
+- return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+- &cmd, 32, &count, DT9812_USB_TIMEOUT);
++ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
++ cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT);
++ kfree(cmd);
++
++ return ret;
+ }
+
+ static int dt9812_digital_in(struct comedi_device *dev, u8 *bits)
+diff --git a/drivers/comedi/drivers/ni_usb6501.c b/drivers/comedi/drivers/ni_usb6501.c
+index 5b6d9d783b2f7..c42987b74b1dc 100644
+--- a/drivers/comedi/drivers/ni_usb6501.c
++++ b/drivers/comedi/drivers/ni_usb6501.c
+@@ -144,6 +144,10 @@ static const u8 READ_COUNTER_RESPONSE[] = {0x00, 0x01, 0x00, 0x10,
+ 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00};
+
++/* Largest supported packets */
++static const size_t TX_MAX_SIZE = sizeof(SET_PORT_DIR_REQUEST);
++static const size_t RX_MAX_SIZE = sizeof(READ_PORT_RESPONSE);
++
+ enum commands {
+ READ_PORT,
+ WRITE_PORT,
+@@ -501,6 +505,12 @@ static int ni6501_find_endpoints(struct comedi_device *dev)
+ if (!devpriv->ep_rx || !devpriv->ep_tx)
+ return -ENODEV;
+
++ if (usb_endpoint_maxp(devpriv->ep_rx) < RX_MAX_SIZE)
++ return -ENODEV;
++
++ if (usb_endpoint_maxp(devpriv->ep_tx) < TX_MAX_SIZE)
++ return -ENODEV;
++
+ return 0;
+ }
+
+diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c
+index 9f920819cd742..9a1d146b7ebb2 100644
+--- a/drivers/comedi/drivers/vmk80xx.c
++++ b/drivers/comedi/drivers/vmk80xx.c
+@@ -90,6 +90,9 @@ enum {
+ #define IC3_VERSION BIT(0)
+ #define IC6_VERSION BIT(1)
+
++#define MIN_BUF_SIZE 64
++#define PACKET_TIMEOUT 10000 /* ms */
++
+ enum vmk80xx_model {
+ VMK8055_MODEL,
+ VMK8061_MODEL
+@@ -157,22 +160,21 @@ static void vmk80xx_do_bulk_msg(struct comedi_device *dev)
+ __u8 rx_addr;
+ unsigned int tx_pipe;
+ unsigned int rx_pipe;
+- size_t size;
++ size_t tx_size;
++ size_t rx_size;
+
+ tx_addr = devpriv->ep_tx->bEndpointAddress;
+ rx_addr = devpriv->ep_rx->bEndpointAddress;
+ tx_pipe = usb_sndbulkpipe(usb, tx_addr);
+ rx_pipe = usb_rcvbulkpipe(usb, rx_addr);
++ tx_size = usb_endpoint_maxp(devpriv->ep_tx);
++ rx_size = usb_endpoint_maxp(devpriv->ep_rx);
+
+- /*
+- * The max packet size attributes of the K8061
+- * input/output endpoints are identical
+- */
+- size = usb_endpoint_maxp(devpriv->ep_tx);
++ usb_bulk_msg(usb, tx_pipe, devpriv->usb_tx_buf, tx_size, NULL,
++ PACKET_TIMEOUT);
+
+- usb_bulk_msg(usb, tx_pipe, devpriv->usb_tx_buf,
+- size, NULL, devpriv->ep_tx->bInterval);
+- usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, size, NULL, HZ * 10);
++ usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, rx_size, NULL,
++ PACKET_TIMEOUT);
+ }
+
+ static int vmk80xx_read_packet(struct comedi_device *dev)
+@@ -191,7 +193,7 @@ static int vmk80xx_read_packet(struct comedi_device *dev)
+ pipe = usb_rcvintpipe(usb, ep->bEndpointAddress);
+ return usb_interrupt_msg(usb, pipe, devpriv->usb_rx_buf,
+ usb_endpoint_maxp(ep), NULL,
+- HZ * 10);
++ PACKET_TIMEOUT);
+ }
+
+ static int vmk80xx_write_packet(struct comedi_device *dev, int cmd)
+@@ -212,7 +214,7 @@ static int vmk80xx_write_packet(struct comedi_device *dev, int cmd)
+ pipe = usb_sndintpipe(usb, ep->bEndpointAddress);
+ return usb_interrupt_msg(usb, pipe, devpriv->usb_tx_buf,
+ usb_endpoint_maxp(ep), NULL,
+- HZ * 10);
++ PACKET_TIMEOUT);
+ }
+
+ static int vmk80xx_reset_device(struct comedi_device *dev)
+@@ -678,12 +680,12 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev)
+ struct vmk80xx_private *devpriv = dev->private;
+ size_t size;
+
+- size = usb_endpoint_maxp(devpriv->ep_rx);
++ size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE);
+ devpriv->usb_rx_buf = kzalloc(size, GFP_KERNEL);
+ if (!devpriv->usb_rx_buf)
+ return -ENOMEM;
+
+- size = usb_endpoint_maxp(devpriv->ep_tx);
++ size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE);
+ devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
+ if (!devpriv->usb_tx_buf)
+ return -ENOMEM;
+diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c
+index 0caa60537b142..ab2c49579b289 100644
+--- a/drivers/counter/104-quad-8.c
++++ b/drivers/counter/104-quad-8.c
+@@ -61,10 +61,6 @@ struct quad8 {
+ #define QUAD8_REG_CHAN_OP 0x11
+ #define QUAD8_REG_INDEX_INPUT_LEVELS 0x16
+ #define QUAD8_DIFF_ENCODER_CABLE_STATUS 0x17
+-/* Borrow Toggle flip-flop */
+-#define QUAD8_FLAG_BT BIT(0)
+-/* Carry Toggle flip-flop */
+-#define QUAD8_FLAG_CT BIT(1)
+ /* Error flag */
+ #define QUAD8_FLAG_E BIT(4)
+ /* Up/Down flag */
+@@ -97,6 +93,9 @@ struct quad8 {
+ #define QUAD8_CMR_QUADRATURE_X2 0x10
+ #define QUAD8_CMR_QUADRATURE_X4 0x18
+
++/* Each Counter is 24 bits wide */
++#define LS7267_CNTR_MAX GENMASK(23, 0)
++
+ static int quad8_signal_read(struct counter_device *counter,
+ struct counter_signal *signal,
+ enum counter_signal_level *level)
+@@ -117,21 +116,13 @@ static int quad8_signal_read(struct counter_device *counter,
+ }
+
+ static int quad8_count_read(struct counter_device *counter,
+- struct counter_count *count, unsigned long *val)
++ struct counter_count *count, u64 *val)
+ {
+ struct quad8 *const priv = counter->priv;
+ const int base_offset = priv->base + 2 * count->id;
+- unsigned int flags;
+- unsigned int borrow;
+- unsigned int carry;
+ int i;
+
+- flags = inb(base_offset + 1);
+- borrow = flags & QUAD8_FLAG_BT;
+- carry = !!(flags & QUAD8_FLAG_CT);
+-
+- /* Borrow XOR Carry effectively doubles count range */
+- *val = (unsigned long)(borrow ^ carry) << 24;
++ *val = 0;
+
+ mutex_lock(&priv->lock);
+
+@@ -148,14 +139,13 @@ static int quad8_count_read(struct counter_device *counter,
+ }
+
+ static int quad8_count_write(struct counter_device *counter,
+- struct counter_count *count, unsigned long val)
++ struct counter_count *count, u64 val)
+ {
+ struct quad8 *const priv = counter->priv;
+ const int base_offset = priv->base + 2 * count->id;
+ int i;
+
+- /* Only 24-bit values are supported */
+- if (val > 0xFFFFFF)
++ if (val > LS7267_CNTR_MAX)
+ return -ERANGE;
+
+ mutex_lock(&priv->lock);
+@@ -188,22 +178,16 @@ static int quad8_count_write(struct counter_device *counter,
+ return 0;
+ }
+
+-enum quad8_count_function {
+- QUAD8_COUNT_FUNCTION_PULSE_DIRECTION = 0,
+- QUAD8_COUNT_FUNCTION_QUADRATURE_X1,
+- QUAD8_COUNT_FUNCTION_QUADRATURE_X2,
+- QUAD8_COUNT_FUNCTION_QUADRATURE_X4
+-};
+-
+ static const enum counter_function quad8_count_functions_list[] = {
+- [QUAD8_COUNT_FUNCTION_PULSE_DIRECTION] = COUNTER_FUNCTION_PULSE_DIRECTION,
+- [QUAD8_COUNT_FUNCTION_QUADRATURE_X1] = COUNTER_FUNCTION_QUADRATURE_X1_A,
+- [QUAD8_COUNT_FUNCTION_QUADRATURE_X2] = COUNTER_FUNCTION_QUADRATURE_X2_A,
+- [QUAD8_COUNT_FUNCTION_QUADRATURE_X4] = COUNTER_FUNCTION_QUADRATURE_X4
++ COUNTER_FUNCTION_PULSE_DIRECTION,
++ COUNTER_FUNCTION_QUADRATURE_X1_A,
++ COUNTER_FUNCTION_QUADRATURE_X2_A,
++ COUNTER_FUNCTION_QUADRATURE_X4,
+ };
+
+-static int quad8_function_get(struct counter_device *counter,
+- struct counter_count *count, size_t *function)
++static int quad8_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+ struct quad8 *const priv = counter->priv;
+ const int id = count->id;
+@@ -213,25 +197,26 @@ static int quad8_function_get(struct counter_device *counter,
+ if (priv->quadrature_mode[id])
+ switch (priv->quadrature_scale[id]) {
+ case 0:
+- *function = QUAD8_COUNT_FUNCTION_QUADRATURE_X1;
++ *function = COUNTER_FUNCTION_QUADRATURE_X1_A;
+ break;
+ case 1:
+- *function = QUAD8_COUNT_FUNCTION_QUADRATURE_X2;
++ *function = COUNTER_FUNCTION_QUADRATURE_X2_A;
+ break;
+ case 2:
+- *function = QUAD8_COUNT_FUNCTION_QUADRATURE_X4;
++ *function = COUNTER_FUNCTION_QUADRATURE_X4;
+ break;
+ }
+ else
+- *function = QUAD8_COUNT_FUNCTION_PULSE_DIRECTION;
++ *function = COUNTER_FUNCTION_PULSE_DIRECTION;
+
+ mutex_unlock(&priv->lock);
+
+ return 0;
+ }
+
+-static int quad8_function_set(struct counter_device *counter,
+- struct counter_count *count, size_t function)
++static int quad8_function_write(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function function)
+ {
+ struct quad8 *const priv = counter->priv;
+ const int id = count->id;
+@@ -247,7 +232,7 @@ static int quad8_function_set(struct counter_device *counter,
+ mode_cfg = priv->count_mode[id] << 1;
+ idr_cfg = priv->index_polarity[id] << 1;
+
+- if (function == QUAD8_COUNT_FUNCTION_PULSE_DIRECTION) {
++ if (function == COUNTER_FUNCTION_PULSE_DIRECTION) {
+ *quadrature_mode = 0;
+
+ /* Quadrature scaling only available in quadrature mode */
+@@ -263,15 +248,15 @@ static int quad8_function_set(struct counter_device *counter,
+ *quadrature_mode = 1;
+
+ switch (function) {
+- case QUAD8_COUNT_FUNCTION_QUADRATURE_X1:
++ case COUNTER_FUNCTION_QUADRATURE_X1_A:
+ *scale = 0;
+ mode_cfg |= QUAD8_CMR_QUADRATURE_X1;
+ break;
+- case QUAD8_COUNT_FUNCTION_QUADRATURE_X2:
++ case COUNTER_FUNCTION_QUADRATURE_X2_A:
+ *scale = 1;
+ mode_cfg |= QUAD8_CMR_QUADRATURE_X2;
+ break;
+- case QUAD8_COUNT_FUNCTION_QUADRATURE_X4:
++ case COUNTER_FUNCTION_QUADRATURE_X4:
+ *scale = 2;
+ mode_cfg |= QUAD8_CMR_QUADRATURE_X4;
+ break;
+@@ -290,8 +275,9 @@ static int quad8_function_set(struct counter_device *counter,
+ return 0;
+ }
+
+-static void quad8_direction_get(struct counter_device *counter,
+- struct counter_count *count, enum counter_count_direction *direction)
++static int quad8_direction_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_count_direction *direction)
+ {
+ const struct quad8 *const priv = counter->priv;
+ unsigned int ud_flag;
+@@ -302,76 +288,74 @@ static void quad8_direction_get(struct counter_device *counter,
+
+ *direction = (ud_flag) ? COUNTER_COUNT_DIRECTION_FORWARD :
+ COUNTER_COUNT_DIRECTION_BACKWARD;
+-}
+
+-enum quad8_synapse_action {
+- QUAD8_SYNAPSE_ACTION_NONE = 0,
+- QUAD8_SYNAPSE_ACTION_RISING_EDGE,
+- QUAD8_SYNAPSE_ACTION_FALLING_EDGE,
+- QUAD8_SYNAPSE_ACTION_BOTH_EDGES
+-};
++ return 0;
++}
+
+ static const enum counter_synapse_action quad8_index_actions_list[] = {
+- [QUAD8_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
+- [QUAD8_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE
++ COUNTER_SYNAPSE_ACTION_NONE,
++ COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+ };
+
+ static const enum counter_synapse_action quad8_synapse_actions_list[] = {
+- [QUAD8_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
+- [QUAD8_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+- [QUAD8_SYNAPSE_ACTION_FALLING_EDGE] = COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+- [QUAD8_SYNAPSE_ACTION_BOTH_EDGES] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES
++ COUNTER_SYNAPSE_ACTION_NONE,
++ COUNTER_SYNAPSE_ACTION_RISING_EDGE,
++ COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
++ COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+ };
+
+-static int quad8_action_get(struct counter_device *counter,
+- struct counter_count *count, struct counter_synapse *synapse,
+- size_t *action)
++static int quad8_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+ struct quad8 *const priv = counter->priv;
+ int err;
+- size_t function = 0;
++ enum counter_function function;
+ const size_t signal_a_id = count->synapses[0].signal->id;
+ enum counter_count_direction direction;
+
+ /* Handle Index signals */
+ if (synapse->signal->id >= 16) {
+- if (priv->preset_enable[count->id])
+- *action = QUAD8_SYNAPSE_ACTION_RISING_EDGE;
++ if (!priv->preset_enable[count->id])
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+ else
+- *action = QUAD8_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+
+ return 0;
+ }
+
+- err = quad8_function_get(counter, count, &function);
++ err = quad8_function_read(counter, count, &function);
+ if (err)
+ return err;
+
+ /* Default action mode */
+- *action = QUAD8_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+
+ /* Determine action mode based on current count function mode */
+ switch (function) {
+- case QUAD8_COUNT_FUNCTION_PULSE_DIRECTION:
++ case COUNTER_FUNCTION_PULSE_DIRECTION:
+ if (synapse->signal->id == signal_a_id)
+- *action = QUAD8_SYNAPSE_ACTION_RISING_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+ return 0;
+- case QUAD8_COUNT_FUNCTION_QUADRATURE_X1:
++ case COUNTER_FUNCTION_QUADRATURE_X1_A:
+ if (synapse->signal->id == signal_a_id) {
+- quad8_direction_get(counter, count, &direction);
++ err = quad8_direction_read(counter, count, &direction);
++ if (err)
++ return err;
+
+ if (direction == COUNTER_COUNT_DIRECTION_FORWARD)
+- *action = QUAD8_SYNAPSE_ACTION_RISING_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+ else
+- *action = QUAD8_SYNAPSE_ACTION_FALLING_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_FALLING_EDGE;
+ }
+ return 0;
+- case QUAD8_COUNT_FUNCTION_QUADRATURE_X2:
++ case COUNTER_FUNCTION_QUADRATURE_X2_A:
+ if (synapse->signal->id == signal_a_id)
+- *action = QUAD8_SYNAPSE_ACTION_BOTH_EDGES;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ return 0;
+- case QUAD8_COUNT_FUNCTION_QUADRATURE_X4:
+- *action = QUAD8_SYNAPSE_ACTION_BOTH_EDGES;
++ case COUNTER_FUNCTION_QUADRATURE_X4:
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ return 0;
+ default:
+ /* should never reach this path */
+@@ -383,9 +367,9 @@ static const struct counter_ops quad8_ops = {
+ .signal_read = quad8_signal_read,
+ .count_read = quad8_count_read,
+ .count_write = quad8_count_write,
+- .function_get = quad8_function_get,
+- .function_set = quad8_function_set,
+- .action_get = quad8_action_get
++ .function_read = quad8_function_read,
++ .function_write = quad8_function_write,
++ .action_read = quad8_action_read
+ };
+
+ static const char *const quad8_index_polarity_modes[] = {
+@@ -394,7 +378,8 @@ static const char *const quad8_index_polarity_modes[] = {
+ };
+
+ static int quad8_index_polarity_get(struct counter_device *counter,
+- struct counter_signal *signal, size_t *index_polarity)
++ struct counter_signal *signal,
++ u32 *index_polarity)
+ {
+ const struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id - 16;
+@@ -405,7 +390,8 @@ static int quad8_index_polarity_get(struct counter_device *counter,
+ }
+
+ static int quad8_index_polarity_set(struct counter_device *counter,
+- struct counter_signal *signal, size_t index_polarity)
++ struct counter_signal *signal,
++ u32 index_polarity)
+ {
+ struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id - 16;
+@@ -426,20 +412,14 @@ static int quad8_index_polarity_set(struct counter_device *counter,
+ return 0;
+ }
+
+-static struct counter_signal_enum_ext quad8_index_pol_enum = {
+- .items = quad8_index_polarity_modes,
+- .num_items = ARRAY_SIZE(quad8_index_polarity_modes),
+- .get = quad8_index_polarity_get,
+- .set = quad8_index_polarity_set
+-};
+-
+ static const char *const quad8_synchronous_modes[] = {
+ "non-synchronous",
+ "synchronous"
+ };
+
+ static int quad8_synchronous_mode_get(struct counter_device *counter,
+- struct counter_signal *signal, size_t *synchronous_mode)
++ struct counter_signal *signal,
++ u32 *synchronous_mode)
+ {
+ const struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id - 16;
+@@ -450,7 +430,8 @@ static int quad8_synchronous_mode_get(struct counter_device *counter,
+ }
+
+ static int quad8_synchronous_mode_set(struct counter_device *counter,
+- struct counter_signal *signal, size_t synchronous_mode)
++ struct counter_signal *signal,
++ u32 synchronous_mode)
+ {
+ struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id - 16;
+@@ -477,22 +458,18 @@ static int quad8_synchronous_mode_set(struct counter_device *counter,
+ return 0;
+ }
+
+-static struct counter_signal_enum_ext quad8_syn_mode_enum = {
+- .items = quad8_synchronous_modes,
+- .num_items = ARRAY_SIZE(quad8_synchronous_modes),
+- .get = quad8_synchronous_mode_get,
+- .set = quad8_synchronous_mode_set
+-};
+-
+-static ssize_t quad8_count_floor_read(struct counter_device *counter,
+- struct counter_count *count, void *private, char *buf)
++static int quad8_count_floor_read(struct counter_device *counter,
++ struct counter_count *count, u64 *floor)
+ {
+ /* Only a floor of 0 is supported */
+- return sprintf(buf, "0\n");
++ *floor = 0;
++
++ return 0;
+ }
+
+-static int quad8_count_mode_get(struct counter_device *counter,
+- struct counter_count *count, size_t *cnt_mode)
++static int quad8_count_mode_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_count_mode *cnt_mode)
+ {
+ const struct quad8 *const priv = counter->priv;
+
+@@ -515,26 +492,28 @@ static int quad8_count_mode_get(struct counter_device *counter,
+ return 0;
+ }
+
+-static int quad8_count_mode_set(struct counter_device *counter,
+- struct counter_count *count, size_t cnt_mode)
++static int quad8_count_mode_write(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_count_mode cnt_mode)
+ {
+ struct quad8 *const priv = counter->priv;
++ unsigned int count_mode;
+ unsigned int mode_cfg;
+ const int base_offset = priv->base + 2 * count->id + 1;
+
+ /* Map Generic Counter count mode to 104-QUAD-8 count mode */
+ switch (cnt_mode) {
+ case COUNTER_COUNT_MODE_NORMAL:
+- cnt_mode = 0;
++ count_mode = 0;
+ break;
+ case COUNTER_COUNT_MODE_RANGE_LIMIT:
+- cnt_mode = 1;
++ count_mode = 1;
+ break;
+ case COUNTER_COUNT_MODE_NON_RECYCLE:
+- cnt_mode = 2;
++ count_mode = 2;
+ break;
+ case COUNTER_COUNT_MODE_MODULO_N:
+- cnt_mode = 3;
++ count_mode = 3;
+ break;
+ default:
+ /* should never reach this path */
+@@ -543,10 +522,10 @@ static int quad8_count_mode_set(struct counter_device *counter,
+
+ mutex_lock(&priv->lock);
+
+- priv->count_mode[count->id] = cnt_mode;
++ priv->count_mode[count->id] = count_mode;
+
+ /* Set count mode configuration value */
+- mode_cfg = cnt_mode << 1;
++ mode_cfg = count_mode << 1;
+
+ /* Add quadrature mode configuration */
+ if (priv->quadrature_mode[count->id])
+@@ -560,56 +539,35 @@ static int quad8_count_mode_set(struct counter_device *counter,
+ return 0;
+ }
+
+-static struct counter_count_enum_ext quad8_cnt_mode_enum = {
+- .items = counter_count_mode_str,
+- .num_items = ARRAY_SIZE(counter_count_mode_str),
+- .get = quad8_count_mode_get,
+- .set = quad8_count_mode_set
+-};
+-
+-static ssize_t quad8_count_direction_read(struct counter_device *counter,
+- struct counter_count *count, void *priv, char *buf)
+-{
+- enum counter_count_direction dir;
+-
+- quad8_direction_get(counter, count, &dir);
+-
+- return sprintf(buf, "%s\n", counter_count_direction_str[dir]);
+-}
+-
+-static ssize_t quad8_count_enable_read(struct counter_device *counter,
+- struct counter_count *count, void *private, char *buf)
++static int quad8_count_enable_read(struct counter_device *counter,
++ struct counter_count *count, u8 *enable)
+ {
+ const struct quad8 *const priv = counter->priv;
+
+- return sprintf(buf, "%u\n", priv->ab_enable[count->id]);
++ *enable = priv->ab_enable[count->id];
++
++ return 0;
+ }
+
+-static ssize_t quad8_count_enable_write(struct counter_device *counter,
+- struct counter_count *count, void *private, const char *buf, size_t len)
++static int quad8_count_enable_write(struct counter_device *counter,
++ struct counter_count *count, u8 enable)
+ {
+ struct quad8 *const priv = counter->priv;
+ const int base_offset = priv->base + 2 * count->id;
+- int err;
+- bool ab_enable;
+ unsigned int ior_cfg;
+
+- err = kstrtobool(buf, &ab_enable);
+- if (err)
+- return err;
+-
+ mutex_lock(&priv->lock);
+
+- priv->ab_enable[count->id] = ab_enable;
++ priv->ab_enable[count->id] = enable;
+
+- ior_cfg = ab_enable | priv->preset_enable[count->id] << 1;
++ ior_cfg = enable | priv->preset_enable[count->id] << 1;
+
+ /* Load I/O control configuration */
+ outb(QUAD8_CTR_IOR | ior_cfg, base_offset + 1);
+
+ mutex_unlock(&priv->lock);
+
+- return len;
++ return 0;
+ }
+
+ static const char *const quad8_noise_error_states[] = {
+@@ -618,7 +576,7 @@ static const char *const quad8_noise_error_states[] = {
+ };
+
+ static int quad8_error_noise_get(struct counter_device *counter,
+- struct counter_count *count, size_t *noise_error)
++ struct counter_count *count, u32 *noise_error)
+ {
+ const struct quad8 *const priv = counter->priv;
+ const int base_offset = priv->base + 2 * count->id + 1;
+@@ -628,18 +586,14 @@ static int quad8_error_noise_get(struct counter_device *counter,
+ return 0;
+ }
+
+-static struct counter_count_enum_ext quad8_error_noise_enum = {
+- .items = quad8_noise_error_states,
+- .num_items = ARRAY_SIZE(quad8_noise_error_states),
+- .get = quad8_error_noise_get
+-};
+-
+-static ssize_t quad8_count_preset_read(struct counter_device *counter,
+- struct counter_count *count, void *private, char *buf)
++static int quad8_count_preset_read(struct counter_device *counter,
++ struct counter_count *count, u64 *preset)
+ {
+ const struct quad8 *const priv = counter->priv;
+
+- return sprintf(buf, "%u\n", priv->preset[count->id]);
++ *preset = priv->preset[count->id];
++
++ return 0;
+ }
+
+ static void quad8_preset_register_set(struct quad8 *const priv, const int id,
+@@ -658,19 +612,12 @@ static void quad8_preset_register_set(struct quad8 *const priv, const int id,
+ outb(preset >> (8 * i), base_offset);
+ }
+
+-static ssize_t quad8_count_preset_write(struct counter_device *counter,
+- struct counter_count *count, void *private, const char *buf, size_t len)
++static int quad8_count_preset_write(struct counter_device *counter,
++ struct counter_count *count, u64 preset)
+ {
+ struct quad8 *const priv = counter->priv;
+- unsigned int preset;
+- int ret;
+
+- ret = kstrtouint(buf, 0, &preset);
+- if (ret)
+- return ret;
+-
+- /* Only 24-bit values are supported */
+- if (preset > 0xFFFFFF)
++ if (preset > LS7267_CNTR_MAX)
+ return -ERANGE;
+
+ mutex_lock(&priv->lock);
+@@ -679,11 +626,11 @@ static ssize_t quad8_count_preset_write(struct counter_device *counter,
+
+ mutex_unlock(&priv->lock);
+
+- return len;
++ return 0;
+ }
+
+-static ssize_t quad8_count_ceiling_read(struct counter_device *counter,
+- struct counter_count *count, void *private, char *buf)
++static int quad8_count_ceiling_read(struct counter_device *counter,
++ struct counter_count *count, u64 *ceiling)
+ {
+ struct quad8 *const priv = counter->priv;
+
+@@ -693,29 +640,24 @@ static ssize_t quad8_count_ceiling_read(struct counter_device *counter,
+ switch (priv->count_mode[count->id]) {
+ case 1:
+ case 3:
+- mutex_unlock(&priv->lock);
+- return sprintf(buf, "%u\n", priv->preset[count->id]);
++ *ceiling = priv->preset[count->id];
++ break;
++ default:
++ *ceiling = LS7267_CNTR_MAX;
++ break;
+ }
+
+ mutex_unlock(&priv->lock);
+
+- /* By default 0x1FFFFFF (25 bits unsigned) is maximum count */
+- return sprintf(buf, "33554431\n");
++ return 0;
+ }
+
+-static ssize_t quad8_count_ceiling_write(struct counter_device *counter,
+- struct counter_count *count, void *private, const char *buf, size_t len)
++static int quad8_count_ceiling_write(struct counter_device *counter,
++ struct counter_count *count, u64 ceiling)
+ {
+ struct quad8 *const priv = counter->priv;
+- unsigned int ceiling;
+- int ret;
+-
+- ret = kstrtouint(buf, 0, &ceiling);
+- if (ret)
+- return ret;
+
+- /* Only 24-bit values are supported */
+- if (ceiling > 0xFFFFFF)
++ if (ceiling > LS7267_CNTR_MAX)
+ return -ERANGE;
+
+ mutex_lock(&priv->lock);
+@@ -726,7 +668,7 @@ static ssize_t quad8_count_ceiling_write(struct counter_device *counter,
+ case 3:
+ quad8_preset_register_set(priv, count->id, ceiling);
+ mutex_unlock(&priv->lock);
+- return len;
++ return 0;
+ }
+
+ mutex_unlock(&priv->lock);
+@@ -734,27 +676,25 @@ static ssize_t quad8_count_ceiling_write(struct counter_device *counter,
+ return -EINVAL;
+ }
+
+-static ssize_t quad8_count_preset_enable_read(struct counter_device *counter,
+- struct counter_count *count, void *private, char *buf)
++static int quad8_count_preset_enable_read(struct counter_device *counter,
++ struct counter_count *count,
++ u8 *preset_enable)
+ {
+ const struct quad8 *const priv = counter->priv;
+
+- return sprintf(buf, "%u\n", !priv->preset_enable[count->id]);
++ *preset_enable = !priv->preset_enable[count->id];
++
++ return 0;
+ }
+
+-static ssize_t quad8_count_preset_enable_write(struct counter_device *counter,
+- struct counter_count *count, void *private, const char *buf, size_t len)
++static int quad8_count_preset_enable_write(struct counter_device *counter,
++ struct counter_count *count,
++ u8 preset_enable)
+ {
+ struct quad8 *const priv = counter->priv;
+ const int base_offset = priv->base + 2 * count->id + 1;
+- bool preset_enable;
+- int ret;
+ unsigned int ior_cfg;
+
+- ret = kstrtobool(buf, &preset_enable);
+- if (ret)
+- return ret;
+-
+ /* Preset enable is active low in Input/Output Control register */
+ preset_enable = !preset_enable;
+
+@@ -762,25 +702,24 @@ static ssize_t quad8_count_preset_enable_write(struct counter_device *counter,
+
+ priv->preset_enable[count->id] = preset_enable;
+
+- ior_cfg = priv->ab_enable[count->id] | (unsigned int)preset_enable << 1;
++ ior_cfg = priv->ab_enable[count->id] | preset_enable << 1;
+
+ /* Load I/O control configuration to Input / Output Control Register */
+ outb(QUAD8_CTR_IOR | ior_cfg, base_offset);
+
+ mutex_unlock(&priv->lock);
+
+- return len;
++ return 0;
+ }
+
+-static ssize_t quad8_signal_cable_fault_read(struct counter_device *counter,
+- struct counter_signal *signal,
+- void *private, char *buf)
++static int quad8_signal_cable_fault_read(struct counter_device *counter,
++ struct counter_signal *signal,
++ u8 *cable_fault)
+ {
+ struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id / 2;
+ bool disabled;
+ unsigned int status;
+- unsigned int fault;
+
+ mutex_lock(&priv->lock);
+
+@@ -797,36 +736,31 @@ static ssize_t quad8_signal_cable_fault_read(struct counter_device *counter,
+ mutex_unlock(&priv->lock);
+
+ /* Mask respective channel and invert logic */
+- fault = !(status & BIT(channel_id));
++ *cable_fault = !(status & BIT(channel_id));
+
+- return sprintf(buf, "%u\n", fault);
++ return 0;
+ }
+
+-static ssize_t quad8_signal_cable_fault_enable_read(
+- struct counter_device *counter, struct counter_signal *signal,
+- void *private, char *buf)
++static int quad8_signal_cable_fault_enable_read(struct counter_device *counter,
++ struct counter_signal *signal,
++ u8 *enable)
+ {
+ const struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id / 2;
+- const unsigned int enb = !!(priv->cable_fault_enable & BIT(channel_id));
+
+- return sprintf(buf, "%u\n", enb);
++ *enable = !!(priv->cable_fault_enable & BIT(channel_id));
++
++ return 0;
+ }
+
+-static ssize_t quad8_signal_cable_fault_enable_write(
+- struct counter_device *counter, struct counter_signal *signal,
+- void *private, const char *buf, size_t len)
++static int quad8_signal_cable_fault_enable_write(struct counter_device *counter,
++ struct counter_signal *signal,
++ u8 enable)
+ {
+ struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id / 2;
+- bool enable;
+- int ret;
+ unsigned int cable_fault_enable;
+
+- ret = kstrtobool(buf, &enable);
+- if (ret)
+- return ret;
+-
+ mutex_lock(&priv->lock);
+
+ if (enable)
+@@ -841,31 +775,27 @@ static ssize_t quad8_signal_cable_fault_enable_write(
+
+ mutex_unlock(&priv->lock);
+
+- return len;
++ return 0;
+ }
+
+-static ssize_t quad8_signal_fck_prescaler_read(struct counter_device *counter,
+- struct counter_signal *signal, void *private, char *buf)
++static int quad8_signal_fck_prescaler_read(struct counter_device *counter,
++ struct counter_signal *signal,
++ u8 *prescaler)
+ {
+ const struct quad8 *const priv = counter->priv;
+- const size_t channel_id = signal->id / 2;
+
+- return sprintf(buf, "%u\n", priv->fck_prescaler[channel_id]);
++ *prescaler = priv->fck_prescaler[signal->id / 2];
++
++ return 0;
+ }
+
+-static ssize_t quad8_signal_fck_prescaler_write(struct counter_device *counter,
+- struct counter_signal *signal, void *private, const char *buf,
+- size_t len)
++static int quad8_signal_fck_prescaler_write(struct counter_device *counter,
++ struct counter_signal *signal,
++ u8 prescaler)
+ {
+ struct quad8 *const priv = counter->priv;
+ const size_t channel_id = signal->id / 2;
+ const int base_offset = priv->base + 2 * channel_id;
+- u8 prescaler;
+- int ret;
+-
+- ret = kstrtou8(buf, 0, &prescaler);
+- if (ret)
+- return ret;
+
+ mutex_lock(&priv->lock);
+
+@@ -881,31 +811,30 @@ static ssize_t quad8_signal_fck_prescaler_write(struct counter_device *counter,
+
+ mutex_unlock(&priv->lock);
+
+- return len;
++ return 0;
+ }
+
+-static const struct counter_signal_ext quad8_signal_ext[] = {
+- {
+- .name = "cable_fault",
+- .read = quad8_signal_cable_fault_read
+- },
+- {
+- .name = "cable_fault_enable",
+- .read = quad8_signal_cable_fault_enable_read,
+- .write = quad8_signal_cable_fault_enable_write
+- },
+- {
+- .name = "filter_clock_prescaler",
+- .read = quad8_signal_fck_prescaler_read,
+- .write = quad8_signal_fck_prescaler_write
+- }
++static struct counter_comp quad8_signal_ext[] = {
++ COUNTER_COMP_SIGNAL_BOOL("cable_fault", quad8_signal_cable_fault_read,
++ NULL),
++ COUNTER_COMP_SIGNAL_BOOL("cable_fault_enable",
++ quad8_signal_cable_fault_enable_read,
++ quad8_signal_cable_fault_enable_write),
++ COUNTER_COMP_SIGNAL_U8("filter_clock_prescaler",
++ quad8_signal_fck_prescaler_read,
++ quad8_signal_fck_prescaler_write)
+ };
+
+-static const struct counter_signal_ext quad8_index_ext[] = {
+- COUNTER_SIGNAL_ENUM("index_polarity", &quad8_index_pol_enum),
+- COUNTER_SIGNAL_ENUM_AVAILABLE("index_polarity", &quad8_index_pol_enum),
+- COUNTER_SIGNAL_ENUM("synchronous_mode", &quad8_syn_mode_enum),
+- COUNTER_SIGNAL_ENUM_AVAILABLE("synchronous_mode", &quad8_syn_mode_enum)
++static DEFINE_COUNTER_ENUM(quad8_index_pol_enum, quad8_index_polarity_modes);
++static DEFINE_COUNTER_ENUM(quad8_synch_mode_enum, quad8_synchronous_modes);
++
++static struct counter_comp quad8_index_ext[] = {
++ COUNTER_COMP_SIGNAL_ENUM("index_polarity", quad8_index_polarity_get,
++ quad8_index_polarity_set,
++ quad8_index_pol_enum),
++ COUNTER_COMP_SIGNAL_ENUM("synchronous_mode", quad8_synchronous_mode_get,
++ quad8_synchronous_mode_set,
++ quad8_synch_mode_enum),
+ };
+
+ #define QUAD8_QUAD_SIGNAL(_id, _name) { \
+@@ -974,39 +903,30 @@ static struct counter_synapse quad8_count_synapses[][3] = {
+ QUAD8_COUNT_SYNAPSES(6), QUAD8_COUNT_SYNAPSES(7)
+ };
+
+-static const struct counter_count_ext quad8_count_ext[] = {
+- {
+- .name = "ceiling",
+- .read = quad8_count_ceiling_read,
+- .write = quad8_count_ceiling_write
+- },
+- {
+- .name = "floor",
+- .read = quad8_count_floor_read
+- },
+- COUNTER_COUNT_ENUM("count_mode", &quad8_cnt_mode_enum),
+- COUNTER_COUNT_ENUM_AVAILABLE("count_mode", &quad8_cnt_mode_enum),
+- {
+- .name = "direction",
+- .read = quad8_count_direction_read
+- },
+- {
+- .name = "enable",
+- .read = quad8_count_enable_read,
+- .write = quad8_count_enable_write
+- },
+- COUNTER_COUNT_ENUM("error_noise", &quad8_error_noise_enum),
+- COUNTER_COUNT_ENUM_AVAILABLE("error_noise", &quad8_error_noise_enum),
+- {
+- .name = "preset",
+- .read = quad8_count_preset_read,
+- .write = quad8_count_preset_write
+- },
+- {
+- .name = "preset_enable",
+- .read = quad8_count_preset_enable_read,
+- .write = quad8_count_preset_enable_write
+- }
++static const enum counter_count_mode quad8_cnt_modes[] = {
++ COUNTER_COUNT_MODE_NORMAL,
++ COUNTER_COUNT_MODE_RANGE_LIMIT,
++ COUNTER_COUNT_MODE_NON_RECYCLE,
++ COUNTER_COUNT_MODE_MODULO_N,
++};
++
++static DEFINE_COUNTER_AVAILABLE(quad8_count_mode_available, quad8_cnt_modes);
++
++static DEFINE_COUNTER_ENUM(quad8_error_noise_enum, quad8_noise_error_states);
++
++static struct counter_comp quad8_count_ext[] = {
++ COUNTER_COMP_CEILING(quad8_count_ceiling_read,
++ quad8_count_ceiling_write),
++ COUNTER_COMP_FLOOR(quad8_count_floor_read, NULL),
++ COUNTER_COMP_COUNT_MODE(quad8_count_mode_read, quad8_count_mode_write,
++ quad8_count_mode_available),
++ COUNTER_COMP_DIRECTION(quad8_direction_read),
++ COUNTER_COMP_ENABLE(quad8_count_enable_read, quad8_count_enable_write),
++ COUNTER_COMP_COUNT_ENUM("error_noise", quad8_error_noise_get, NULL,
++ quad8_error_noise_enum),
++ COUNTER_COMP_PRESET(quad8_count_preset_read, quad8_count_preset_write),
++ COUNTER_COMP_PRESET_ENABLE(quad8_count_preset_enable_read,
++ quad8_count_preset_enable_write),
+ };
+
+ #define QUAD8_COUNT(_id, _cntname) { \
+diff --git a/drivers/counter/Makefile b/drivers/counter/Makefile
+index 19742e6f5e3eb..1ab7e087fdc26 100644
+--- a/drivers/counter/Makefile
++++ b/drivers/counter/Makefile
+@@ -4,6 +4,7 @@
+ #
+
+ obj-$(CONFIG_COUNTER) += counter.o
++counter-y := counter-core.o counter-sysfs.o
+
+ obj-$(CONFIG_104_QUAD_8) += 104-quad-8.o
+ obj-$(CONFIG_INTERRUPT_CNT) += interrupt-cnt.o
+diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
+new file mode 100644
+index 0000000000000..c533a6ff12cf7
+--- /dev/null
++++ b/drivers/counter/counter-core.c
+@@ -0,0 +1,142 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Generic Counter interface
++ * Copyright (C) 2020 William Breathitt Gray
++ */
++#include <linux/counter.h>
++#include <linux/device.h>
++#include <linux/export.h>
++#include <linux/gfp.h>
++#include <linux/idr.h>
++#include <linux/init.h>
++#include <linux/module.h>
++
++#include "counter-sysfs.h"
++
++/* Provides a unique ID for each counter device */
++static DEFINE_IDA(counter_ida);
++
++static void counter_device_release(struct device *dev)
++{
++ ida_free(&counter_ida, dev->id);
++}
++
++static struct device_type counter_device_type = {
++ .name = "counter_device",
++ .release = counter_device_release,
++};
++
++static struct bus_type counter_bus_type = {
++ .name = "counter",
++ .dev_name = "counter",
++};
++
++/**
++ * counter_register - register Counter to the system
++ * @counter: pointer to Counter to register
++ *
++ * This function registers a Counter to the system. A sysfs "counter" directory
++ * will be created and populated with sysfs attributes correlating with the
++ * Counter Signals, Synapses, and Counts respectively.
++ */
++int counter_register(struct counter_device *const counter)
++{
++ struct device *const dev = &counter->dev;
++ int id;
++ int err;
++
++ /* Acquire unique ID */
++ id = ida_alloc(&counter_ida, GFP_KERNEL);
++ if (id < 0)
++ return id;
++
++ /* Configure device structure for Counter */
++ dev->id = id;
++ dev->type = &counter_device_type;
++ dev->bus = &counter_bus_type;
++ if (counter->parent) {
++ dev->parent = counter->parent;
++ dev->of_node = counter->parent->of_node;
++ }
++ device_initialize(dev);
++ dev_set_drvdata(dev, counter);
++
++ /* Add Counter sysfs attributes */
++ err = counter_sysfs_add(counter);
++ if (err < 0)
++ goto err_free_id;
++
++ /* Add device to system */
++ err = device_add(dev);
++ if (err < 0)
++ goto err_free_id;
++
++ return 0;
++
++err_free_id:
++ put_device(dev);
++ return err;
++}
++EXPORT_SYMBOL_GPL(counter_register);
++
++/**
++ * counter_unregister - unregister Counter from the system
++ * @counter: pointer to Counter to unregister
++ *
++ * The Counter is unregistered from the system.
++ */
++void counter_unregister(struct counter_device *const counter)
++{
++ if (!counter)
++ return;
++
++ device_unregister(&counter->dev);
++}
++EXPORT_SYMBOL_GPL(counter_unregister);
++
++static void devm_counter_release(void *counter)
++{
++ counter_unregister(counter);
++}
++
++/**
++ * devm_counter_register - Resource-managed counter_register
++ * @dev: device to allocate counter_device for
++ * @counter: pointer to Counter to register
++ *
++ * Managed counter_register. The Counter registered with this function is
++ * automatically unregistered on driver detach. This function calls
++ * counter_register internally. Refer to that function for more information.
++ *
++ * RETURNS:
++ * 0 on success, negative error number on failure.
++ */
++int devm_counter_register(struct device *dev,
++ struct counter_device *const counter)
++{
++ int err;
++
++ err = counter_register(counter);
++ if (err < 0)
++ return err;
++
++ return devm_add_action_or_reset(dev, devm_counter_release, counter);
++}
++EXPORT_SYMBOL_GPL(devm_counter_register);
++
++static int __init counter_init(void)
++{
++ return bus_register(&counter_bus_type);
++}
++
++static void __exit counter_exit(void)
++{
++ bus_unregister(&counter_bus_type);
++}
++
++subsys_initcall(counter_init);
++module_exit(counter_exit);
++
++MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
++MODULE_DESCRIPTION("Generic Counter interface");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
+new file mode 100644
+index 0000000000000..108cbd838eb92
+--- /dev/null
++++ b/drivers/counter/counter-sysfs.c
+@@ -0,0 +1,849 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Generic Counter sysfs interface
++ * Copyright (C) 2020 William Breathitt Gray
++ */
++#include <linux/counter.h>
++#include <linux/device.h>
++#include <linux/err.h>
++#include <linux/gfp.h>
++#include <linux/kernel.h>
++#include <linux/kstrtox.h>
++#include <linux/list.h>
++#include <linux/string.h>
++#include <linux/sysfs.h>
++#include <linux/types.h>
++
++#include "counter-sysfs.h"
++
++/**
++ * struct counter_attribute - Counter sysfs attribute
++ * @dev_attr: device attribute for sysfs
++ * @l: node to add Counter attribute to attribute group list
++ * @comp: Counter component callbacks and data
++ * @scope: Counter scope of the attribute
++ * @parent: pointer to the parent component
++ */
++struct counter_attribute {
++ struct device_attribute dev_attr;
++ struct list_head l;
++
++ struct counter_comp comp;
++ enum counter_scope scope;
++ void *parent;
++};
++
++#define to_counter_attribute(_dev_attr) \
++ container_of(_dev_attr, struct counter_attribute, dev_attr)
++
++/**
++ * struct counter_attribute_group - container for attribute group
++ * @name: name of the attribute group
++ * @attr_list: list to keep track of created attributes
++ * @num_attr: number of attributes
++ */
++struct counter_attribute_group {
++ const char *name;
++ struct list_head attr_list;
++ size_t num_attr;
++};
++
++static const char *const counter_function_str[] = {
++ [COUNTER_FUNCTION_INCREASE] = "increase",
++ [COUNTER_FUNCTION_DECREASE] = "decrease",
++ [COUNTER_FUNCTION_PULSE_DIRECTION] = "pulse-direction",
++ [COUNTER_FUNCTION_QUADRATURE_X1_A] = "quadrature x1 a",
++ [COUNTER_FUNCTION_QUADRATURE_X1_B] = "quadrature x1 b",
++ [COUNTER_FUNCTION_QUADRATURE_X2_A] = "quadrature x2 a",
++ [COUNTER_FUNCTION_QUADRATURE_X2_B] = "quadrature x2 b",
++ [COUNTER_FUNCTION_QUADRATURE_X4] = "quadrature x4"
++};
++
++static const char *const counter_signal_value_str[] = {
++ [COUNTER_SIGNAL_LEVEL_LOW] = "low",
++ [COUNTER_SIGNAL_LEVEL_HIGH] = "high"
++};
++
++static const char *const counter_synapse_action_str[] = {
++ [COUNTER_SYNAPSE_ACTION_NONE] = "none",
++ [COUNTER_SYNAPSE_ACTION_RISING_EDGE] = "rising edge",
++ [COUNTER_SYNAPSE_ACTION_FALLING_EDGE] = "falling edge",
++ [COUNTER_SYNAPSE_ACTION_BOTH_EDGES] = "both edges"
++};
++
++static const char *const counter_count_direction_str[] = {
++ [COUNTER_COUNT_DIRECTION_FORWARD] = "forward",
++ [COUNTER_COUNT_DIRECTION_BACKWARD] = "backward"
++};
++
++static const char *const counter_count_mode_str[] = {
++ [COUNTER_COUNT_MODE_NORMAL] = "normal",
++ [COUNTER_COUNT_MODE_RANGE_LIMIT] = "range limit",
++ [COUNTER_COUNT_MODE_NON_RECYCLE] = "non-recycle",
++ [COUNTER_COUNT_MODE_MODULO_N] = "modulo-n"
++};
++
++static ssize_t counter_comp_u8_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ const struct counter_attribute *const a = to_counter_attribute(attr);
++ struct counter_device *const counter = dev_get_drvdata(dev);
++ int err;
++ u8 data = 0;
++
++ switch (a->scope) {
++ case COUNTER_SCOPE_DEVICE:
++ err = a->comp.device_u8_read(counter, &data);
++ break;
++ case COUNTER_SCOPE_SIGNAL:
++ err = a->comp.signal_u8_read(counter, a->parent, &data);
++ break;
++ case COUNTER_SCOPE_COUNT:
++ err = a->comp.count_u8_read(counter, a->parent, &data);
++ break;
++ default:
++ return -EINVAL;
++ }
++ if (err < 0)
++ return err;
++
++ if (a->comp.type == COUNTER_COMP_BOOL)
++ /* data should already be boolean but ensure just to be safe */
++ data = !!data;
++
++ return sprintf(buf, "%u\n", (unsigned int)data);
++}
++
++static ssize_t counter_comp_u8_store(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t len)
++{
++ const struct counter_attribute *const a = to_counter_attribute(attr);
++ struct counter_device *const counter = dev_get_drvdata(dev);
++ int err;
++ bool bool_data = 0;
++ u8 data = 0;
++
++ if (a->comp.type == COUNTER_COMP_BOOL) {
++ err = kstrtobool(buf, &bool_data);
++ data = bool_data;
++ } else
++ err = kstrtou8(buf, 0, &data);
++ if (err < 0)
++ return err;
++
++ switch (a->scope) {
++ case COUNTER_SCOPE_DEVICE:
++ err = a->comp.device_u8_write(counter, data);
++ break;
++ case COUNTER_SCOPE_SIGNAL:
++ err = a->comp.signal_u8_write(counter, a->parent, data);
++ break;
++ case COUNTER_SCOPE_COUNT:
++ err = a->comp.count_u8_write(counter, a->parent, data);
++ break;
++ default:
++ return -EINVAL;
++ }
++ if (err < 0)
++ return err;
++
++ return len;
++}
++
++static ssize_t counter_comp_u32_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ const struct counter_attribute *const a = to_counter_attribute(attr);
++ struct counter_device *const counter = dev_get_drvdata(dev);
++ const struct counter_available *const avail = a->comp.priv;
++ int err;
++ u32 data = 0;
++
++ switch (a->scope) {
++ case COUNTER_SCOPE_DEVICE:
++ err = a->comp.device_u32_read(counter, &data);
++ break;
++ case COUNTER_SCOPE_SIGNAL:
++ err = a->comp.signal_u32_read(counter, a->parent, &data);
++ break;
++ case COUNTER_SCOPE_COUNT:
++ if (a->comp.type == COUNTER_COMP_SYNAPSE_ACTION)
++ err = a->comp.action_read(counter, a->parent,
++ a->comp.priv, &data);
++ else
++ err = a->comp.count_u32_read(counter, a->parent, &data);
++ break;
++ default:
++ return -EINVAL;
++ }
++ if (err < 0)
++ return err;
++
++ switch (a->comp.type) {
++ case COUNTER_COMP_FUNCTION:
++ return sysfs_emit(buf, "%s\n", counter_function_str[data]);
++ case COUNTER_COMP_SIGNAL_LEVEL:
++ return sysfs_emit(buf, "%s\n", counter_signal_value_str[data]);
++ case COUNTER_COMP_SYNAPSE_ACTION:
++ return sysfs_emit(buf, "%s\n", counter_synapse_action_str[data]);
++ case COUNTER_COMP_ENUM:
++ return sysfs_emit(buf, "%s\n", avail->strs[data]);
++ case COUNTER_COMP_COUNT_DIRECTION:
++ return sysfs_emit(buf, "%s\n", counter_count_direction_str[data]);
++ case COUNTER_COMP_COUNT_MODE:
++ return sysfs_emit(buf, "%s\n", counter_count_mode_str[data]);
++ default:
++ return sprintf(buf, "%u\n", (unsigned int)data);
++ }
++}
++
++static int counter_find_enum(u32 *const enum_item, const u32 *const enums,
++ const size_t num_enums, const char *const buf,
++ const char *const string_array[])
++{
++ size_t index;
++
++ for (index = 0; index < num_enums; index++) {
++ *enum_item = enums[index];
++ if (sysfs_streq(buf, string_array[*enum_item]))
++ return 0;
++ }
++
++ return -EINVAL;
++}
++
++static ssize_t counter_comp_u32_store(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t len)
++{
++ const struct counter_attribute *const a = to_counter_attribute(attr);
++ struct counter_device *const counter = dev_get_drvdata(dev);
++ struct counter_count *const count = a->parent;
++ struct counter_synapse *const synapse = a->comp.priv;
++ const struct counter_available *const avail = a->comp.priv;
++ int err;
++ u32 data = 0;
++
++ switch (a->comp.type) {
++ case COUNTER_COMP_FUNCTION:
++ err = counter_find_enum(&data, count->functions_list,
++ count->num_functions, buf,
++ counter_function_str);
++ break;
++ case COUNTER_COMP_SYNAPSE_ACTION:
++ err = counter_find_enum(&data, synapse->actions_list,
++ synapse->num_actions, buf,
++ counter_synapse_action_str);
++ break;
++ case COUNTER_COMP_ENUM:
++ err = __sysfs_match_string(avail->strs, avail->num_items, buf);
++ data = err;
++ break;
++ case COUNTER_COMP_COUNT_MODE:
++ err = counter_find_enum(&data, avail->enums, avail->num_items,
++ buf, counter_count_mode_str);
++ break;
++ default:
++ err = kstrtou32(buf, 0, &data);
++ break;
++ }
++ if (err < 0)
++ return err;
++
++ switch (a->scope) {
++ case COUNTER_SCOPE_DEVICE:
++ err = a->comp.device_u32_write(counter, data);
++ break;
++ case COUNTER_SCOPE_SIGNAL:
++ err = a->comp.signal_u32_write(counter, a->parent, data);
++ break;
++ case COUNTER_SCOPE_COUNT:
++ if (a->comp.type == COUNTER_COMP_SYNAPSE_ACTION)
++ err = a->comp.action_write(counter, count, synapse,
++ data);
++ else
++ err = a->comp.count_u32_write(counter, count, data);
++ break;
++ default:
++ return -EINVAL;
++ }
++ if (err < 0)
++ return err;
++
++ return len;
++}
++
++static ssize_t counter_comp_u64_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ const struct counter_attribute *const a = to_counter_attribute(attr);
++ struct counter_device *const counter = dev_get_drvdata(dev);
++ int err;
++ u64 data = 0;
++
++ switch (a->scope) {
++ case COUNTER_SCOPE_DEVICE:
++ err = a->comp.device_u64_read(counter, &data);
++ break;
++ case COUNTER_SCOPE_SIGNAL:
++ err = a->comp.signal_u64_read(counter, a->parent, &data);
++ break;
++ case COUNTER_SCOPE_COUNT:
++ err = a->comp.count_u64_read(counter, a->parent, &data);
++ break;
++ default:
++ return -EINVAL;
++ }
++ if (err < 0)
++ return err;
++
++ return sprintf(buf, "%llu\n", (unsigned long long)data);
++}
++
++static ssize_t counter_comp_u64_store(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t len)
++{
++ const struct counter_attribute *const a = to_counter_attribute(attr);
++ struct counter_device *const counter = dev_get_drvdata(dev);
++ int err;
++ u64 data = 0;
++
++ err = kstrtou64(buf, 0, &data);
++ if (err < 0)
++ return err;
++
++ switch (a->scope) {
++ case COUNTER_SCOPE_DEVICE:
++ err = a->comp.device_u64_write(counter, data);
++ break;
++ case COUNTER_SCOPE_SIGNAL:
++ err = a->comp.signal_u64_write(counter, a->parent, data);
++ break;
++ case COUNTER_SCOPE_COUNT:
++ err = a->comp.count_u64_write(counter, a->parent, data);
++ break;
++ default:
++ return -EINVAL;
++ }
++ if (err < 0)
++ return err;
++
++ return len;
++}
++
++static ssize_t enums_available_show(const u32 *const enums,
++ const size_t num_enums,
++ const char *const strs[], char *buf)
++{
++ size_t len = 0;
++ size_t index;
++
++ for (index = 0; index < num_enums; index++)
++ len += sysfs_emit_at(buf, len, "%s\n", strs[enums[index]]);
++
++ return len;
++}
++
++static ssize_t strs_available_show(const struct counter_available *const avail,
++ char *buf)
++{
++ size_t len = 0;
++ size_t index;
++
++ for (index = 0; index < avail->num_items; index++)
++ len += sysfs_emit_at(buf, len, "%s\n", avail->strs[index]);
++
++ return len;
++}
++
++static ssize_t counter_comp_available_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ const struct counter_attribute *const a = to_counter_attribute(attr);
++ const struct counter_count *const count = a->parent;
++ const struct counter_synapse *const synapse = a->comp.priv;
++ const struct counter_available *const avail = a->comp.priv;
++
++ switch (a->comp.type) {
++ case COUNTER_COMP_FUNCTION:
++ return enums_available_show(count->functions_list,
++ count->num_functions,
++ counter_function_str, buf);
++ case COUNTER_COMP_SYNAPSE_ACTION:
++ return enums_available_show(synapse->actions_list,
++ synapse->num_actions,
++ counter_synapse_action_str, buf);
++ case COUNTER_COMP_ENUM:
++ return strs_available_show(avail, buf);
++ case COUNTER_COMP_COUNT_MODE:
++ return enums_available_show(avail->enums, avail->num_items,
++ counter_count_mode_str, buf);
++ default:
++ return -EINVAL;
++ }
++}
++
++static int counter_avail_attr_create(struct device *const dev,
++ struct counter_attribute_group *const group,
++ const struct counter_comp *const comp, void *const parent)
++{
++ struct counter_attribute *counter_attr;
++ struct device_attribute *dev_attr;
++
++ counter_attr = devm_kzalloc(dev, sizeof(*counter_attr), GFP_KERNEL);
++ if (!counter_attr)
++ return -ENOMEM;
++
++ /* Configure Counter attribute */
++ counter_attr->comp.type = comp->type;
++ counter_attr->comp.priv = comp->priv;
++ counter_attr->parent = parent;
++
++ /* Initialize sysfs attribute */
++ dev_attr = &counter_attr->dev_attr;
++ sysfs_attr_init(&dev_attr->attr);
++
++ /* Configure device attribute */
++ dev_attr->attr.name = devm_kasprintf(dev, GFP_KERNEL, "%s_available",
++ comp->name);
++ if (!dev_attr->attr.name)
++ return -ENOMEM;
++ dev_attr->attr.mode = 0444;
++ dev_attr->show = counter_comp_available_show;
++
++ /* Store list node */
++ list_add(&counter_attr->l, &group->attr_list);
++ group->num_attr++;
++
++ return 0;
++}
++
++static int counter_attr_create(struct device *const dev,
++ struct counter_attribute_group *const group,
++ const struct counter_comp *const comp,
++ const enum counter_scope scope,
++ void *const parent)
++{
++ struct counter_attribute *counter_attr;
++ struct device_attribute *dev_attr;
++
++ counter_attr = devm_kzalloc(dev, sizeof(*counter_attr), GFP_KERNEL);
++ if (!counter_attr)
++ return -ENOMEM;
++
++ /* Configure Counter attribute */
++ counter_attr->comp = *comp;
++ counter_attr->scope = scope;
++ counter_attr->parent = parent;
++
++ /* Configure device attribute */
++ dev_attr = &counter_attr->dev_attr;
++ sysfs_attr_init(&dev_attr->attr);
++ dev_attr->attr.name = comp->name;
++ switch (comp->type) {
++ case COUNTER_COMP_U8:
++ case COUNTER_COMP_BOOL:
++ if (comp->device_u8_read) {
++ dev_attr->attr.mode |= 0444;
++ dev_attr->show = counter_comp_u8_show;
++ }
++ if (comp->device_u8_write) {
++ dev_attr->attr.mode |= 0200;
++ dev_attr->store = counter_comp_u8_store;
++ }
++ break;
++ case COUNTER_COMP_SIGNAL_LEVEL:
++ case COUNTER_COMP_FUNCTION:
++ case COUNTER_COMP_SYNAPSE_ACTION:
++ case COUNTER_COMP_ENUM:
++ case COUNTER_COMP_COUNT_DIRECTION:
++ case COUNTER_COMP_COUNT_MODE:
++ if (comp->device_u32_read) {
++ dev_attr->attr.mode |= 0444;
++ dev_attr->show = counter_comp_u32_show;
++ }
++ if (comp->device_u32_write) {
++ dev_attr->attr.mode |= 0200;
++ dev_attr->store = counter_comp_u32_store;
++ }
++ break;
++ case COUNTER_COMP_U64:
++ if (comp->device_u64_read) {
++ dev_attr->attr.mode |= 0444;
++ dev_attr->show = counter_comp_u64_show;
++ }
++ if (comp->device_u64_write) {
++ dev_attr->attr.mode |= 0200;
++ dev_attr->store = counter_comp_u64_store;
++ }
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ /* Store list node */
++ list_add(&counter_attr->l, &group->attr_list);
++ group->num_attr++;
++
++ /* Create "*_available" attribute if needed */
++ switch (comp->type) {
++ case COUNTER_COMP_FUNCTION:
++ case COUNTER_COMP_SYNAPSE_ACTION:
++ case COUNTER_COMP_ENUM:
++ case COUNTER_COMP_COUNT_MODE:
++ return counter_avail_attr_create(dev, group, comp, parent);
++ default:
++ return 0;
++ }
++}
++
++static ssize_t counter_comp_name_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sysfs_emit(buf, "%s\n", to_counter_attribute(attr)->comp.name);
++}
++
++static int counter_name_attr_create(struct device *const dev,
++ struct counter_attribute_group *const group,
++ const char *const name)
++{
++ struct counter_attribute *counter_attr;
++
++ counter_attr = devm_kzalloc(dev, sizeof(*counter_attr), GFP_KERNEL);
++ if (!counter_attr)
++ return -ENOMEM;
++
++ /* Configure Counter attribute */
++ counter_attr->comp.name = name;
++
++ /* Configure device attribute */
++ sysfs_attr_init(&counter_attr->dev_attr.attr);
++ counter_attr->dev_attr.attr.name = "name";
++ counter_attr->dev_attr.attr.mode = 0444;
++ counter_attr->dev_attr.show = counter_comp_name_show;
++
++ /* Store list node */
++ list_add(&counter_attr->l, &group->attr_list);
++ group->num_attr++;
++
++ return 0;
++}
++
++static struct counter_comp counter_signal_comp = {
++ .type = COUNTER_COMP_SIGNAL_LEVEL,
++ .name = "signal",
++};
++
++static int counter_signal_attrs_create(struct counter_device *const counter,
++ struct counter_attribute_group *const cattr_group,
++ struct counter_signal *const signal)
++{
++ const enum counter_scope scope = COUNTER_SCOPE_SIGNAL;
++ struct device *const dev = &counter->dev;
++ int err;
++ struct counter_comp comp;
++ size_t i;
++
++ /* Create main Signal attribute */
++ comp = counter_signal_comp;
++ comp.signal_u32_read = counter->ops->signal_read;
++ err = counter_attr_create(dev, cattr_group, &comp, scope, signal);
++ if (err < 0)
++ return err;
++
++ /* Create Signal name attribute */
++ err = counter_name_attr_create(dev, cattr_group, signal->name);
++ if (err < 0)
++ return err;
++
++ /* Create an attribute for each extension */
++ for (i = 0; i < signal->num_ext; i++) {
++ err = counter_attr_create(dev, cattr_group, signal->ext + i,
++ scope, signal);
++ if (err < 0)
++ return err;
++ }
++
++ return 0;
++}
++
++static int counter_sysfs_signals_add(struct counter_device *const counter,
++ struct counter_attribute_group *const groups)
++{
++ size_t i;
++ int err;
++
++ /* Add each Signal */
++ for (i = 0; i < counter->num_signals; i++) {
++ /* Generate Signal attribute directory name */
++ groups[i].name = devm_kasprintf(&counter->dev, GFP_KERNEL,
++ "signal%zu", i);
++ if (!groups[i].name)
++ return -ENOMEM;
++
++ /* Create all attributes associated with Signal */
++ err = counter_signal_attrs_create(counter, groups + i,
++ counter->signals + i);
++ if (err < 0)
++ return err;
++ }
++
++ return 0;
++}
++
++static int counter_sysfs_synapses_add(struct counter_device *const counter,
++ struct counter_attribute_group *const group,
++ struct counter_count *const count)
++{
++ size_t i;
++
++ /* Add each Synapse */
++ for (i = 0; i < count->num_synapses; i++) {
++ struct device *const dev = &counter->dev;
++ struct counter_synapse *synapse;
++ size_t id;
++ struct counter_comp comp;
++ int err;
++
++ synapse = count->synapses + i;
++
++ /* Generate Synapse action name */
++ id = synapse->signal - counter->signals;
++ comp.name = devm_kasprintf(dev, GFP_KERNEL, "signal%zu_action",
++ id);
++ if (!comp.name)
++ return -ENOMEM;
++
++ /* Create action attribute */
++ comp.type = COUNTER_COMP_SYNAPSE_ACTION;
++ comp.action_read = counter->ops->action_read;
++ comp.action_write = counter->ops->action_write;
++ comp.priv = synapse;
++ err = counter_attr_create(dev, group, &comp,
++ COUNTER_SCOPE_COUNT, count);
++ if (err < 0)
++ return err;
++ }
++
++ return 0;
++}
++
++static struct counter_comp counter_count_comp =
++ COUNTER_COMP_COUNT_U64("count", NULL, NULL);
++
++static struct counter_comp counter_function_comp = {
++ .type = COUNTER_COMP_FUNCTION,
++ .name = "function",
++};
++
++static int counter_count_attrs_create(struct counter_device *const counter,
++ struct counter_attribute_group *const cattr_group,
++ struct counter_count *const count)
++{
++ const enum counter_scope scope = COUNTER_SCOPE_COUNT;
++ struct device *const dev = &counter->dev;
++ int err;
++ struct counter_comp comp;
++ size_t i;
++
++ /* Create main Count attribute */
++ comp = counter_count_comp;
++ comp.count_u64_read = counter->ops->count_read;
++ comp.count_u64_write = counter->ops->count_write;
++ err = counter_attr_create(dev, cattr_group, &comp, scope, count);
++ if (err < 0)
++ return err;
++
++ /* Create Count name attribute */
++ err = counter_name_attr_create(dev, cattr_group, count->name);
++ if (err < 0)
++ return err;
++
++ /* Create Count function attribute */
++ comp = counter_function_comp;
++ comp.count_u32_read = counter->ops->function_read;
++ comp.count_u32_write = counter->ops->function_write;
++ err = counter_attr_create(dev, cattr_group, &comp, scope, count);
++ if (err < 0)
++ return err;
++
++ /* Create an attribute for each extension */
++ for (i = 0; i < count->num_ext; i++) {
++ err = counter_attr_create(dev, cattr_group, count->ext + i,
++ scope, count);
++ if (err < 0)
++ return err;
++ }
++
++ return 0;
++}
++
++static int counter_sysfs_counts_add(struct counter_device *const counter,
++ struct counter_attribute_group *const groups)
++{
++ size_t i;
++ struct counter_count *count;
++ int err;
++
++ /* Add each Count */
++ for (i = 0; i < counter->num_counts; i++) {
++ count = counter->counts + i;
++
++ /* Generate Count attribute directory name */
++ groups[i].name = devm_kasprintf(&counter->dev, GFP_KERNEL,
++ "count%zu", i);
++ if (!groups[i].name)
++ return -ENOMEM;
++
++ /* Add sysfs attributes of the Synapses */
++ err = counter_sysfs_synapses_add(counter, groups + i, count);
++ if (err < 0)
++ return err;
++
++ /* Create all attributes associated with Count */
++ err = counter_count_attrs_create(counter, groups + i, count);
++ if (err < 0)
++ return err;
++ }
++
++ return 0;
++}
++
++static int counter_num_signals_read(struct counter_device *counter, u8 *val)
++{
++ *val = counter->num_signals;
++ return 0;
++}
++
++static int counter_num_counts_read(struct counter_device *counter, u8 *val)
++{
++ *val = counter->num_counts;
++ return 0;
++}
++
++static struct counter_comp counter_num_signals_comp =
++ COUNTER_COMP_DEVICE_U8("num_signals", counter_num_signals_read, NULL);
++
++static struct counter_comp counter_num_counts_comp =
++ COUNTER_COMP_DEVICE_U8("num_counts", counter_num_counts_read, NULL);
++
++static int counter_sysfs_attr_add(struct counter_device *const counter,
++ struct counter_attribute_group *cattr_group)
++{
++ const enum counter_scope scope = COUNTER_SCOPE_DEVICE;
++ struct device *const dev = &counter->dev;
++ int err;
++ size_t i;
++
++ /* Add Signals sysfs attributes */
++ err = counter_sysfs_signals_add(counter, cattr_group);
++ if (err < 0)
++ return err;
++ cattr_group += counter->num_signals;
++
++ /* Add Counts sysfs attributes */
++ err = counter_sysfs_counts_add(counter, cattr_group);
++ if (err < 0)
++ return err;
++ cattr_group += counter->num_counts;
++
++ /* Create name attribute */
++ err = counter_name_attr_create(dev, cattr_group, counter->name);
++ if (err < 0)
++ return err;
++
++ /* Create num_signals attribute */
++ err = counter_attr_create(dev, cattr_group, &counter_num_signals_comp,
++ scope, NULL);
++ if (err < 0)
++ return err;
++
++ /* Create num_counts attribute */
++ err = counter_attr_create(dev, cattr_group, &counter_num_counts_comp,
++ scope, NULL);
++ if (err < 0)
++ return err;
++
++ /* Create an attribute for each extension */
++ for (i = 0; i < counter->num_ext; i++) {
++ err = counter_attr_create(dev, cattr_group, counter->ext + i,
++ scope, NULL);
++ if (err < 0)
++ return err;
++ }
++
++ return 0;
++}
++
++/**
++ * counter_sysfs_add - Adds Counter sysfs attributes to the device structure
++ * @counter: Pointer to the Counter device structure
++ *
++ * Counter sysfs attributes are created and added to the respective device
++ * structure for later registration to the system. Resource-managed memory
++ * allocation is performed by this function, and this memory should be freed
++ * when no longer needed (automatically by a device_unregister call, or
++ * manually by a devres_release_all call).
++ */
++int counter_sysfs_add(struct counter_device *const counter)
++{
++ struct device *const dev = &counter->dev;
++ const size_t num_groups = counter->num_signals + counter->num_counts + 1;
++ struct counter_attribute_group *cattr_groups;
++ size_t i, j;
++ int err;
++ struct attribute_group *groups;
++ struct counter_attribute *p;
++
++ /* Allocate space for attribute groups (signals, counts, and ext) */
++ cattr_groups = devm_kcalloc(dev, num_groups, sizeof(*cattr_groups),
++ GFP_KERNEL);
++ if (!cattr_groups)
++ return -ENOMEM;
++
++ /* Initialize attribute lists */
++ for (i = 0; i < num_groups; i++)
++ INIT_LIST_HEAD(&cattr_groups[i].attr_list);
++
++ /* Add Counter device sysfs attributes */
++ err = counter_sysfs_attr_add(counter, cattr_groups);
++ if (err < 0)
++ return err;
++
++ /* Allocate attribute group pointers for association with device */
++ dev->groups = devm_kcalloc(dev, num_groups + 1, sizeof(*dev->groups),
++ GFP_KERNEL);
++ if (!dev->groups)
++ return -ENOMEM;
++
++ /* Allocate space for attribute groups */
++ groups = devm_kcalloc(dev, num_groups, sizeof(*groups), GFP_KERNEL);
++ if (!groups)
++ return -ENOMEM;
++
++ /* Prepare each group of attributes for association */
++ for (i = 0; i < num_groups; i++) {
++ groups[i].name = cattr_groups[i].name;
++
++ /* Allocate space for attribute pointers */
++ groups[i].attrs = devm_kcalloc(dev,
++ cattr_groups[i].num_attr + 1,
++ sizeof(*groups[i].attrs),
++ GFP_KERNEL);
++ if (!groups[i].attrs)
++ return -ENOMEM;
++
++ /* Add attribute pointers to attribute group */
++ j = 0;
++ list_for_each_entry(p, &cattr_groups[i].attr_list, l)
++ groups[i].attrs[j++] = &p->dev_attr.attr;
++
++ /* Associate attribute group */
++ dev->groups[i] = &groups[i];
++ }
++
++ return 0;
++}
+diff --git a/drivers/counter/counter-sysfs.h b/drivers/counter/counter-sysfs.h
+new file mode 100644
+index 0000000000000..14fe566aca0e0
+--- /dev/null
++++ b/drivers/counter/counter-sysfs.h
+@@ -0,0 +1,13 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Counter sysfs interface
++ * Copyright (C) 2020 William Breathitt Gray
++ */
++#ifndef _COUNTER_SYSFS_H_
++#define _COUNTER_SYSFS_H_
++
++#include <linux/counter.h>
++
++int counter_sysfs_add(struct counter_device *const counter);
++
++#endif /* _COUNTER_SYSFS_H_ */
+diff --git a/drivers/counter/counter.c b/drivers/counter/counter.c
+deleted file mode 100644
+index de921e8a3f721..0000000000000
+--- a/drivers/counter/counter.c
++++ /dev/null
+@@ -1,1496 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Generic Counter interface
+- * Copyright (C) 2018 William Breathitt Gray
+- */
+-#include <linux/counter.h>
+-#include <linux/device.h>
+-#include <linux/err.h>
+-#include <linux/export.h>
+-#include <linux/fs.h>
+-#include <linux/gfp.h>
+-#include <linux/idr.h>
+-#include <linux/init.h>
+-#include <linux/kernel.h>
+-#include <linux/list.h>
+-#include <linux/module.h>
+-#include <linux/printk.h>
+-#include <linux/slab.h>
+-#include <linux/string.h>
+-#include <linux/sysfs.h>
+-#include <linux/types.h>
+-
+-const char *const counter_count_direction_str[2] = {
+- [COUNTER_COUNT_DIRECTION_FORWARD] = "forward",
+- [COUNTER_COUNT_DIRECTION_BACKWARD] = "backward"
+-};
+-EXPORT_SYMBOL_GPL(counter_count_direction_str);
+-
+-const char *const counter_count_mode_str[4] = {
+- [COUNTER_COUNT_MODE_NORMAL] = "normal",
+- [COUNTER_COUNT_MODE_RANGE_LIMIT] = "range limit",
+- [COUNTER_COUNT_MODE_NON_RECYCLE] = "non-recycle",
+- [COUNTER_COUNT_MODE_MODULO_N] = "modulo-n"
+-};
+-EXPORT_SYMBOL_GPL(counter_count_mode_str);
+-
+-ssize_t counter_signal_enum_read(struct counter_device *counter,
+- struct counter_signal *signal, void *priv,
+- char *buf)
+-{
+- const struct counter_signal_enum_ext *const e = priv;
+- int err;
+- size_t index;
+-
+- if (!e->get)
+- return -EINVAL;
+-
+- err = e->get(counter, signal, &index);
+- if (err)
+- return err;
+-
+- if (index >= e->num_items)
+- return -EINVAL;
+-
+- return sprintf(buf, "%s\n", e->items[index]);
+-}
+-EXPORT_SYMBOL_GPL(counter_signal_enum_read);
+-
+-ssize_t counter_signal_enum_write(struct counter_device *counter,
+- struct counter_signal *signal, void *priv,
+- const char *buf, size_t len)
+-{
+- const struct counter_signal_enum_ext *const e = priv;
+- ssize_t index;
+- int err;
+-
+- if (!e->set)
+- return -EINVAL;
+-
+- index = __sysfs_match_string(e->items, e->num_items, buf);
+- if (index < 0)
+- return index;
+-
+- err = e->set(counter, signal, index);
+- if (err)
+- return err;
+-
+- return len;
+-}
+-EXPORT_SYMBOL_GPL(counter_signal_enum_write);
+-
+-ssize_t counter_signal_enum_available_read(struct counter_device *counter,
+- struct counter_signal *signal,
+- void *priv, char *buf)
+-{
+- const struct counter_signal_enum_ext *const e = priv;
+- size_t i;
+- size_t len = 0;
+-
+- if (!e->num_items)
+- return 0;
+-
+- for (i = 0; i < e->num_items; i++)
+- len += sprintf(buf + len, "%s\n", e->items[i]);
+-
+- return len;
+-}
+-EXPORT_SYMBOL_GPL(counter_signal_enum_available_read);
+-
+-ssize_t counter_count_enum_read(struct counter_device *counter,
+- struct counter_count *count, void *priv,
+- char *buf)
+-{
+- const struct counter_count_enum_ext *const e = priv;
+- int err;
+- size_t index;
+-
+- if (!e->get)
+- return -EINVAL;
+-
+- err = e->get(counter, count, &index);
+- if (err)
+- return err;
+-
+- if (index >= e->num_items)
+- return -EINVAL;
+-
+- return sprintf(buf, "%s\n", e->items[index]);
+-}
+-EXPORT_SYMBOL_GPL(counter_count_enum_read);
+-
+-ssize_t counter_count_enum_write(struct counter_device *counter,
+- struct counter_count *count, void *priv,
+- const char *buf, size_t len)
+-{
+- const struct counter_count_enum_ext *const e = priv;
+- ssize_t index;
+- int err;
+-
+- if (!e->set)
+- return -EINVAL;
+-
+- index = __sysfs_match_string(e->items, e->num_items, buf);
+- if (index < 0)
+- return index;
+-
+- err = e->set(counter, count, index);
+- if (err)
+- return err;
+-
+- return len;
+-}
+-EXPORT_SYMBOL_GPL(counter_count_enum_write);
+-
+-ssize_t counter_count_enum_available_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, char *buf)
+-{
+- const struct counter_count_enum_ext *const e = priv;
+- size_t i;
+- size_t len = 0;
+-
+- if (!e->num_items)
+- return 0;
+-
+- for (i = 0; i < e->num_items; i++)
+- len += sprintf(buf + len, "%s\n", e->items[i]);
+-
+- return len;
+-}
+-EXPORT_SYMBOL_GPL(counter_count_enum_available_read);
+-
+-ssize_t counter_device_enum_read(struct counter_device *counter, void *priv,
+- char *buf)
+-{
+- const struct counter_device_enum_ext *const e = priv;
+- int err;
+- size_t index;
+-
+- if (!e->get)
+- return -EINVAL;
+-
+- err = e->get(counter, &index);
+- if (err)
+- return err;
+-
+- if (index >= e->num_items)
+- return -EINVAL;
+-
+- return sprintf(buf, "%s\n", e->items[index]);
+-}
+-EXPORT_SYMBOL_GPL(counter_device_enum_read);
+-
+-ssize_t counter_device_enum_write(struct counter_device *counter, void *priv,
+- const char *buf, size_t len)
+-{
+- const struct counter_device_enum_ext *const e = priv;
+- ssize_t index;
+- int err;
+-
+- if (!e->set)
+- return -EINVAL;
+-
+- index = __sysfs_match_string(e->items, e->num_items, buf);
+- if (index < 0)
+- return index;
+-
+- err = e->set(counter, index);
+- if (err)
+- return err;
+-
+- return len;
+-}
+-EXPORT_SYMBOL_GPL(counter_device_enum_write);
+-
+-ssize_t counter_device_enum_available_read(struct counter_device *counter,
+- void *priv, char *buf)
+-{
+- const struct counter_device_enum_ext *const e = priv;
+- size_t i;
+- size_t len = 0;
+-
+- if (!e->num_items)
+- return 0;
+-
+- for (i = 0; i < e->num_items; i++)
+- len += sprintf(buf + len, "%s\n", e->items[i]);
+-
+- return len;
+-}
+-EXPORT_SYMBOL_GPL(counter_device_enum_available_read);
+-
+-struct counter_attr_parm {
+- struct counter_device_attr_group *group;
+- const char *prefix;
+- const char *name;
+- ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+- char *buf);
+- ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+- const char *buf, size_t len);
+- void *component;
+-};
+-
+-struct counter_device_attr {
+- struct device_attribute dev_attr;
+- struct list_head l;
+- void *component;
+-};
+-
+-static int counter_attribute_create(const struct counter_attr_parm *const parm)
+-{
+- struct counter_device_attr *counter_attr;
+- struct device_attribute *dev_attr;
+- int err;
+- struct list_head *const attr_list = &parm->group->attr_list;
+-
+- /* Allocate a Counter device attribute */
+- counter_attr = kzalloc(sizeof(*counter_attr), GFP_KERNEL);
+- if (!counter_attr)
+- return -ENOMEM;
+- dev_attr = &counter_attr->dev_attr;
+-
+- sysfs_attr_init(&dev_attr->attr);
+-
+- /* Configure device attribute */
+- dev_attr->attr.name = kasprintf(GFP_KERNEL, "%s%s", parm->prefix,
+- parm->name);
+- if (!dev_attr->attr.name) {
+- err = -ENOMEM;
+- goto err_free_counter_attr;
+- }
+- if (parm->show) {
+- dev_attr->attr.mode |= 0444;
+- dev_attr->show = parm->show;
+- }
+- if (parm->store) {
+- dev_attr->attr.mode |= 0200;
+- dev_attr->store = parm->store;
+- }
+-
+- /* Store associated Counter component with attribute */
+- counter_attr->component = parm->component;
+-
+- /* Keep track of the attribute for later cleanup */
+- list_add(&counter_attr->l, attr_list);
+- parm->group->num_attr++;
+-
+- return 0;
+-
+-err_free_counter_attr:
+- kfree(counter_attr);
+- return err;
+-}
+-
+-#define to_counter_attr(_dev_attr) \
+- container_of(_dev_attr, struct counter_device_attr, dev_attr)
+-
+-struct counter_signal_unit {
+- struct counter_signal *signal;
+-};
+-
+-static const char *const counter_signal_level_str[] = {
+- [COUNTER_SIGNAL_LEVEL_LOW] = "low",
+- [COUNTER_SIGNAL_LEVEL_HIGH] = "high"
+-};
+-
+-static ssize_t counter_signal_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- struct counter_device *const counter = dev_get_drvdata(dev);
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_signal_unit *const component = devattr->component;
+- struct counter_signal *const signal = component->signal;
+- int err;
+- enum counter_signal_level level;
+-
+- err = counter->ops->signal_read(counter, signal, &level);
+- if (err)
+- return err;
+-
+- return sprintf(buf, "%s\n", counter_signal_level_str[level]);
+-}
+-
+-struct counter_name_unit {
+- const char *name;
+-};
+-
+-static ssize_t counter_device_attr_name_show(struct device *dev,
+- struct device_attribute *attr,
+- char *buf)
+-{
+- const struct counter_name_unit *const comp = to_counter_attr(attr)->component;
+-
+- return sprintf(buf, "%s\n", comp->name);
+-}
+-
+-static int counter_name_attribute_create(
+- struct counter_device_attr_group *const group,
+- const char *const name)
+-{
+- struct counter_name_unit *name_comp;
+- struct counter_attr_parm parm;
+- int err;
+-
+- /* Skip if no name */
+- if (!name)
+- return 0;
+-
+- /* Allocate name attribute component */
+- name_comp = kmalloc(sizeof(*name_comp), GFP_KERNEL);
+- if (!name_comp)
+- return -ENOMEM;
+- name_comp->name = name;
+-
+- /* Allocate Signal name attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = "name";
+- parm.show = counter_device_attr_name_show;
+- parm.store = NULL;
+- parm.component = name_comp;
+- err = counter_attribute_create(&parm);
+- if (err)
+- goto err_free_name_comp;
+-
+- return 0;
+-
+-err_free_name_comp:
+- kfree(name_comp);
+- return err;
+-}
+-
+-struct counter_signal_ext_unit {
+- struct counter_signal *signal;
+- const struct counter_signal_ext *ext;
+-};
+-
+-static ssize_t counter_signal_ext_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_signal_ext_unit *const comp = devattr->component;
+- const struct counter_signal_ext *const ext = comp->ext;
+-
+- return ext->read(dev_get_drvdata(dev), comp->signal, ext->priv, buf);
+-}
+-
+-static ssize_t counter_signal_ext_store(struct device *dev,
+- struct device_attribute *attr,
+- const char *buf, size_t len)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_signal_ext_unit *const comp = devattr->component;
+- const struct counter_signal_ext *const ext = comp->ext;
+-
+- return ext->write(dev_get_drvdata(dev), comp->signal, ext->priv, buf,
+- len);
+-}
+-
+-static void counter_device_attr_list_free(struct list_head *attr_list)
+-{
+- struct counter_device_attr *p, *n;
+-
+- list_for_each_entry_safe(p, n, attr_list, l) {
+- /* free attribute name and associated component memory */
+- kfree(p->dev_attr.attr.name);
+- kfree(p->component);
+- list_del(&p->l);
+- kfree(p);
+- }
+-}
+-
+-static int counter_signal_ext_register(
+- struct counter_device_attr_group *const group,
+- struct counter_signal *const signal)
+-{
+- const size_t num_ext = signal->num_ext;
+- size_t i;
+- const struct counter_signal_ext *ext;
+- struct counter_signal_ext_unit *signal_ext_comp;
+- struct counter_attr_parm parm;
+- int err;
+-
+- /* Create an attribute for each extension */
+- for (i = 0 ; i < num_ext; i++) {
+- ext = signal->ext + i;
+-
+- /* Allocate signal_ext attribute component */
+- signal_ext_comp = kmalloc(sizeof(*signal_ext_comp), GFP_KERNEL);
+- if (!signal_ext_comp) {
+- err = -ENOMEM;
+- goto err_free_attr_list;
+- }
+- signal_ext_comp->signal = signal;
+- signal_ext_comp->ext = ext;
+-
+- /* Allocate a Counter device attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = ext->name;
+- parm.show = (ext->read) ? counter_signal_ext_show : NULL;
+- parm.store = (ext->write) ? counter_signal_ext_store : NULL;
+- parm.component = signal_ext_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(signal_ext_comp);
+- goto err_free_attr_list;
+- }
+- }
+-
+- return 0;
+-
+-err_free_attr_list:
+- counter_device_attr_list_free(&group->attr_list);
+- return err;
+-}
+-
+-static int counter_signal_attributes_create(
+- struct counter_device_attr_group *const group,
+- const struct counter_device *const counter,
+- struct counter_signal *const signal)
+-{
+- struct counter_signal_unit *signal_comp;
+- struct counter_attr_parm parm;
+- int err;
+-
+- /* Allocate Signal attribute component */
+- signal_comp = kmalloc(sizeof(*signal_comp), GFP_KERNEL);
+- if (!signal_comp)
+- return -ENOMEM;
+- signal_comp->signal = signal;
+-
+- /* Create main Signal attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = "signal";
+- parm.show = (counter->ops->signal_read) ? counter_signal_show : NULL;
+- parm.store = NULL;
+- parm.component = signal_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(signal_comp);
+- return err;
+- }
+-
+- /* Create Signal name attribute */
+- err = counter_name_attribute_create(group, signal->name);
+- if (err)
+- goto err_free_attr_list;
+-
+- /* Register Signal extension attributes */
+- err = counter_signal_ext_register(group, signal);
+- if (err)
+- goto err_free_attr_list;
+-
+- return 0;
+-
+-err_free_attr_list:
+- counter_device_attr_list_free(&group->attr_list);
+- return err;
+-}
+-
+-static int counter_signals_register(
+- struct counter_device_attr_group *const groups_list,
+- const struct counter_device *const counter)
+-{
+- const size_t num_signals = counter->num_signals;
+- size_t i;
+- struct counter_signal *signal;
+- const char *name;
+- int err;
+-
+- /* Register each Signal */
+- for (i = 0; i < num_signals; i++) {
+- signal = counter->signals + i;
+-
+- /* Generate Signal attribute directory name */
+- name = kasprintf(GFP_KERNEL, "signal%d", signal->id);
+- if (!name) {
+- err = -ENOMEM;
+- goto err_free_attr_groups;
+- }
+- groups_list[i].attr_group.name = name;
+-
+- /* Create all attributes associated with Signal */
+- err = counter_signal_attributes_create(groups_list + i, counter,
+- signal);
+- if (err)
+- goto err_free_attr_groups;
+- }
+-
+- return 0;
+-
+-err_free_attr_groups:
+- do {
+- kfree(groups_list[i].attr_group.name);
+- counter_device_attr_list_free(&groups_list[i].attr_list);
+- } while (i--);
+- return err;
+-}
+-
+-static const char *const counter_synapse_action_str[] = {
+- [COUNTER_SYNAPSE_ACTION_NONE] = "none",
+- [COUNTER_SYNAPSE_ACTION_RISING_EDGE] = "rising edge",
+- [COUNTER_SYNAPSE_ACTION_FALLING_EDGE] = "falling edge",
+- [COUNTER_SYNAPSE_ACTION_BOTH_EDGES] = "both edges"
+-};
+-
+-struct counter_action_unit {
+- struct counter_synapse *synapse;
+- struct counter_count *count;
+-};
+-
+-static ssize_t counter_action_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- int err;
+- struct counter_device *const counter = dev_get_drvdata(dev);
+- const struct counter_action_unit *const component = devattr->component;
+- struct counter_count *const count = component->count;
+- struct counter_synapse *const synapse = component->synapse;
+- size_t action_index;
+- enum counter_synapse_action action;
+-
+- err = counter->ops->action_get(counter, count, synapse, &action_index);
+- if (err)
+- return err;
+-
+- synapse->action = action_index;
+-
+- action = synapse->actions_list[action_index];
+- return sprintf(buf, "%s\n", counter_synapse_action_str[action]);
+-}
+-
+-static ssize_t counter_action_store(struct device *dev,
+- struct device_attribute *attr,
+- const char *buf, size_t len)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_action_unit *const component = devattr->component;
+- struct counter_synapse *const synapse = component->synapse;
+- size_t action_index;
+- const size_t num_actions = synapse->num_actions;
+- enum counter_synapse_action action;
+- int err;
+- struct counter_device *const counter = dev_get_drvdata(dev);
+- struct counter_count *const count = component->count;
+-
+- /* Find requested action mode */
+- for (action_index = 0; action_index < num_actions; action_index++) {
+- action = synapse->actions_list[action_index];
+- if (sysfs_streq(buf, counter_synapse_action_str[action]))
+- break;
+- }
+- /* If requested action mode not found */
+- if (action_index >= num_actions)
+- return -EINVAL;
+-
+- err = counter->ops->action_set(counter, count, synapse, action_index);
+- if (err)
+- return err;
+-
+- synapse->action = action_index;
+-
+- return len;
+-}
+-
+-struct counter_action_avail_unit {
+- const enum counter_synapse_action *actions_list;
+- size_t num_actions;
+-};
+-
+-static ssize_t counter_synapse_action_available_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_action_avail_unit *const component = devattr->component;
+- size_t i;
+- enum counter_synapse_action action;
+- ssize_t len = 0;
+-
+- for (i = 0; i < component->num_actions; i++) {
+- action = component->actions_list[i];
+- len += sprintf(buf + len, "%s\n",
+- counter_synapse_action_str[action]);
+- }
+-
+- return len;
+-}
+-
+-static int counter_synapses_register(
+- struct counter_device_attr_group *const group,
+- const struct counter_device *const counter,
+- struct counter_count *const count, const char *const count_attr_name)
+-{
+- size_t i;
+- struct counter_synapse *synapse;
+- const char *prefix;
+- struct counter_action_unit *action_comp;
+- struct counter_attr_parm parm;
+- int err;
+- struct counter_action_avail_unit *avail_comp;
+-
+- /* Register each Synapse */
+- for (i = 0; i < count->num_synapses; i++) {
+- synapse = count->synapses + i;
+-
+- /* Generate attribute prefix */
+- prefix = kasprintf(GFP_KERNEL, "signal%d_",
+- synapse->signal->id);
+- if (!prefix) {
+- err = -ENOMEM;
+- goto err_free_attr_list;
+- }
+-
+- /* Allocate action attribute component */
+- action_comp = kmalloc(sizeof(*action_comp), GFP_KERNEL);
+- if (!action_comp) {
+- err = -ENOMEM;
+- goto err_free_prefix;
+- }
+- action_comp->synapse = synapse;
+- action_comp->count = count;
+-
+- /* Create action attribute */
+- parm.group = group;
+- parm.prefix = prefix;
+- parm.name = "action";
+- parm.show = (counter->ops->action_get) ? counter_action_show : NULL;
+- parm.store = (counter->ops->action_set) ? counter_action_store : NULL;
+- parm.component = action_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(action_comp);
+- goto err_free_prefix;
+- }
+-
+- /* Allocate action available attribute component */
+- avail_comp = kmalloc(sizeof(*avail_comp), GFP_KERNEL);
+- if (!avail_comp) {
+- err = -ENOMEM;
+- goto err_free_prefix;
+- }
+- avail_comp->actions_list = synapse->actions_list;
+- avail_comp->num_actions = synapse->num_actions;
+-
+- /* Create action_available attribute */
+- parm.group = group;
+- parm.prefix = prefix;
+- parm.name = "action_available";
+- parm.show = counter_synapse_action_available_show;
+- parm.store = NULL;
+- parm.component = avail_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(avail_comp);
+- goto err_free_prefix;
+- }
+-
+- kfree(prefix);
+- }
+-
+- return 0;
+-
+-err_free_prefix:
+- kfree(prefix);
+-err_free_attr_list:
+- counter_device_attr_list_free(&group->attr_list);
+- return err;
+-}
+-
+-struct counter_count_unit {
+- struct counter_count *count;
+-};
+-
+-static ssize_t counter_count_show(struct device *dev,
+- struct device_attribute *attr,
+- char *buf)
+-{
+- struct counter_device *const counter = dev_get_drvdata(dev);
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_count_unit *const component = devattr->component;
+- struct counter_count *const count = component->count;
+- int err;
+- unsigned long val;
+-
+- err = counter->ops->count_read(counter, count, &val);
+- if (err)
+- return err;
+-
+- return sprintf(buf, "%lu\n", val);
+-}
+-
+-static ssize_t counter_count_store(struct device *dev,
+- struct device_attribute *attr,
+- const char *buf, size_t len)
+-{
+- struct counter_device *const counter = dev_get_drvdata(dev);
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_count_unit *const component = devattr->component;
+- struct counter_count *const count = component->count;
+- int err;
+- unsigned long val;
+-
+- err = kstrtoul(buf, 0, &val);
+- if (err)
+- return err;
+-
+- err = counter->ops->count_write(counter, count, val);
+- if (err)
+- return err;
+-
+- return len;
+-}
+-
+-static const char *const counter_function_str[] = {
+- [COUNTER_FUNCTION_INCREASE] = "increase",
+- [COUNTER_FUNCTION_DECREASE] = "decrease",
+- [COUNTER_FUNCTION_PULSE_DIRECTION] = "pulse-direction",
+- [COUNTER_FUNCTION_QUADRATURE_X1_A] = "quadrature x1 a",
+- [COUNTER_FUNCTION_QUADRATURE_X1_B] = "quadrature x1 b",
+- [COUNTER_FUNCTION_QUADRATURE_X2_A] = "quadrature x2 a",
+- [COUNTER_FUNCTION_QUADRATURE_X2_B] = "quadrature x2 b",
+- [COUNTER_FUNCTION_QUADRATURE_X4] = "quadrature x4"
+-};
+-
+-static ssize_t counter_function_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- int err;
+- struct counter_device *const counter = dev_get_drvdata(dev);
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_count_unit *const component = devattr->component;
+- struct counter_count *const count = component->count;
+- size_t func_index;
+- enum counter_function function;
+-
+- err = counter->ops->function_get(counter, count, &func_index);
+- if (err)
+- return err;
+-
+- count->function = func_index;
+-
+- function = count->functions_list[func_index];
+- return sprintf(buf, "%s\n", counter_function_str[function]);
+-}
+-
+-static ssize_t counter_function_store(struct device *dev,
+- struct device_attribute *attr,
+- const char *buf, size_t len)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_count_unit *const component = devattr->component;
+- struct counter_count *const count = component->count;
+- const size_t num_functions = count->num_functions;
+- size_t func_index;
+- enum counter_function function;
+- int err;
+- struct counter_device *const counter = dev_get_drvdata(dev);
+-
+- /* Find requested Count function mode */
+- for (func_index = 0; func_index < num_functions; func_index++) {
+- function = count->functions_list[func_index];
+- if (sysfs_streq(buf, counter_function_str[function]))
+- break;
+- }
+- /* Return error if requested Count function mode not found */
+- if (func_index >= num_functions)
+- return -EINVAL;
+-
+- err = counter->ops->function_set(counter, count, func_index);
+- if (err)
+- return err;
+-
+- count->function = func_index;
+-
+- return len;
+-}
+-
+-struct counter_count_ext_unit {
+- struct counter_count *count;
+- const struct counter_count_ext *ext;
+-};
+-
+-static ssize_t counter_count_ext_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_count_ext_unit *const comp = devattr->component;
+- const struct counter_count_ext *const ext = comp->ext;
+-
+- return ext->read(dev_get_drvdata(dev), comp->count, ext->priv, buf);
+-}
+-
+-static ssize_t counter_count_ext_store(struct device *dev,
+- struct device_attribute *attr,
+- const char *buf, size_t len)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_count_ext_unit *const comp = devattr->component;
+- const struct counter_count_ext *const ext = comp->ext;
+-
+- return ext->write(dev_get_drvdata(dev), comp->count, ext->priv, buf,
+- len);
+-}
+-
+-static int counter_count_ext_register(
+- struct counter_device_attr_group *const group,
+- struct counter_count *const count)
+-{
+- size_t i;
+- const struct counter_count_ext *ext;
+- struct counter_count_ext_unit *count_ext_comp;
+- struct counter_attr_parm parm;
+- int err;
+-
+- /* Create an attribute for each extension */
+- for (i = 0 ; i < count->num_ext; i++) {
+- ext = count->ext + i;
+-
+- /* Allocate count_ext attribute component */
+- count_ext_comp = kmalloc(sizeof(*count_ext_comp), GFP_KERNEL);
+- if (!count_ext_comp) {
+- err = -ENOMEM;
+- goto err_free_attr_list;
+- }
+- count_ext_comp->count = count;
+- count_ext_comp->ext = ext;
+-
+- /* Allocate count_ext attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = ext->name;
+- parm.show = (ext->read) ? counter_count_ext_show : NULL;
+- parm.store = (ext->write) ? counter_count_ext_store : NULL;
+- parm.component = count_ext_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(count_ext_comp);
+- goto err_free_attr_list;
+- }
+- }
+-
+- return 0;
+-
+-err_free_attr_list:
+- counter_device_attr_list_free(&group->attr_list);
+- return err;
+-}
+-
+-struct counter_func_avail_unit {
+- const enum counter_function *functions_list;
+- size_t num_functions;
+-};
+-
+-static ssize_t counter_function_available_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_func_avail_unit *const component = devattr->component;
+- const enum counter_function *const func_list = component->functions_list;
+- const size_t num_functions = component->num_functions;
+- size_t i;
+- enum counter_function function;
+- ssize_t len = 0;
+-
+- for (i = 0; i < num_functions; i++) {
+- function = func_list[i];
+- len += sprintf(buf + len, "%s\n",
+- counter_function_str[function]);
+- }
+-
+- return len;
+-}
+-
+-static int counter_count_attributes_create(
+- struct counter_device_attr_group *const group,
+- const struct counter_device *const counter,
+- struct counter_count *const count)
+-{
+- struct counter_count_unit *count_comp;
+- struct counter_attr_parm parm;
+- int err;
+- struct counter_count_unit *func_comp;
+- struct counter_func_avail_unit *avail_comp;
+-
+- /* Allocate count attribute component */
+- count_comp = kmalloc(sizeof(*count_comp), GFP_KERNEL);
+- if (!count_comp)
+- return -ENOMEM;
+- count_comp->count = count;
+-
+- /* Create main Count attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = "count";
+- parm.show = (counter->ops->count_read) ? counter_count_show : NULL;
+- parm.store = (counter->ops->count_write) ? counter_count_store : NULL;
+- parm.component = count_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(count_comp);
+- return err;
+- }
+-
+- /* Allocate function attribute component */
+- func_comp = kmalloc(sizeof(*func_comp), GFP_KERNEL);
+- if (!func_comp) {
+- err = -ENOMEM;
+- goto err_free_attr_list;
+- }
+- func_comp->count = count;
+-
+- /* Create Count function attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = "function";
+- parm.show = (counter->ops->function_get) ? counter_function_show : NULL;
+- parm.store = (counter->ops->function_set) ? counter_function_store : NULL;
+- parm.component = func_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(func_comp);
+- goto err_free_attr_list;
+- }
+-
+- /* Allocate function available attribute component */
+- avail_comp = kmalloc(sizeof(*avail_comp), GFP_KERNEL);
+- if (!avail_comp) {
+- err = -ENOMEM;
+- goto err_free_attr_list;
+- }
+- avail_comp->functions_list = count->functions_list;
+- avail_comp->num_functions = count->num_functions;
+-
+- /* Create Count function_available attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = "function_available";
+- parm.show = counter_function_available_show;
+- parm.store = NULL;
+- parm.component = avail_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(avail_comp);
+- goto err_free_attr_list;
+- }
+-
+- /* Create Count name attribute */
+- err = counter_name_attribute_create(group, count->name);
+- if (err)
+- goto err_free_attr_list;
+-
+- /* Register Count extension attributes */
+- err = counter_count_ext_register(group, count);
+- if (err)
+- goto err_free_attr_list;
+-
+- return 0;
+-
+-err_free_attr_list:
+- counter_device_attr_list_free(&group->attr_list);
+- return err;
+-}
+-
+-static int counter_counts_register(
+- struct counter_device_attr_group *const groups_list,
+- const struct counter_device *const counter)
+-{
+- size_t i;
+- struct counter_count *count;
+- const char *name;
+- int err;
+-
+- /* Register each Count */
+- for (i = 0; i < counter->num_counts; i++) {
+- count = counter->counts + i;
+-
+- /* Generate Count attribute directory name */
+- name = kasprintf(GFP_KERNEL, "count%d", count->id);
+- if (!name) {
+- err = -ENOMEM;
+- goto err_free_attr_groups;
+- }
+- groups_list[i].attr_group.name = name;
+-
+- /* Register the Synapses associated with each Count */
+- err = counter_synapses_register(groups_list + i, counter, count,
+- name);
+- if (err)
+- goto err_free_attr_groups;
+-
+- /* Create all attributes associated with Count */
+- err = counter_count_attributes_create(groups_list + i, counter,
+- count);
+- if (err)
+- goto err_free_attr_groups;
+- }
+-
+- return 0;
+-
+-err_free_attr_groups:
+- do {
+- kfree(groups_list[i].attr_group.name);
+- counter_device_attr_list_free(&groups_list[i].attr_list);
+- } while (i--);
+- return err;
+-}
+-
+-struct counter_size_unit {
+- size_t size;
+-};
+-
+-static ssize_t counter_device_attr_size_show(struct device *dev,
+- struct device_attribute *attr,
+- char *buf)
+-{
+- const struct counter_size_unit *const comp = to_counter_attr(attr)->component;
+-
+- return sprintf(buf, "%zu\n", comp->size);
+-}
+-
+-static int counter_size_attribute_create(
+- struct counter_device_attr_group *const group,
+- const size_t size, const char *const name)
+-{
+- struct counter_size_unit *size_comp;
+- struct counter_attr_parm parm;
+- int err;
+-
+- /* Allocate size attribute component */
+- size_comp = kmalloc(sizeof(*size_comp), GFP_KERNEL);
+- if (!size_comp)
+- return -ENOMEM;
+- size_comp->size = size;
+-
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = name;
+- parm.show = counter_device_attr_size_show;
+- parm.store = NULL;
+- parm.component = size_comp;
+- err = counter_attribute_create(&parm);
+- if (err)
+- goto err_free_size_comp;
+-
+- return 0;
+-
+-err_free_size_comp:
+- kfree(size_comp);
+- return err;
+-}
+-
+-struct counter_ext_unit {
+- const struct counter_device_ext *ext;
+-};
+-
+-static ssize_t counter_device_ext_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_ext_unit *const component = devattr->component;
+- const struct counter_device_ext *const ext = component->ext;
+-
+- return ext->read(dev_get_drvdata(dev), ext->priv, buf);
+-}
+-
+-static ssize_t counter_device_ext_store(struct device *dev,
+- struct device_attribute *attr,
+- const char *buf, size_t len)
+-{
+- const struct counter_device_attr *const devattr = to_counter_attr(attr);
+- const struct counter_ext_unit *const component = devattr->component;
+- const struct counter_device_ext *const ext = component->ext;
+-
+- return ext->write(dev_get_drvdata(dev), ext->priv, buf, len);
+-}
+-
+-static int counter_device_ext_register(
+- struct counter_device_attr_group *const group,
+- struct counter_device *const counter)
+-{
+- size_t i;
+- struct counter_ext_unit *ext_comp;
+- struct counter_attr_parm parm;
+- int err;
+-
+- /* Create an attribute for each extension */
+- for (i = 0 ; i < counter->num_ext; i++) {
+- /* Allocate extension attribute component */
+- ext_comp = kmalloc(sizeof(*ext_comp), GFP_KERNEL);
+- if (!ext_comp) {
+- err = -ENOMEM;
+- goto err_free_attr_list;
+- }
+-
+- ext_comp->ext = counter->ext + i;
+-
+- /* Allocate extension attribute */
+- parm.group = group;
+- parm.prefix = "";
+- parm.name = counter->ext[i].name;
+- parm.show = (counter->ext[i].read) ? counter_device_ext_show : NULL;
+- parm.store = (counter->ext[i].write) ? counter_device_ext_store : NULL;
+- parm.component = ext_comp;
+- err = counter_attribute_create(&parm);
+- if (err) {
+- kfree(ext_comp);
+- goto err_free_attr_list;
+- }
+- }
+-
+- return 0;
+-
+-err_free_attr_list:
+- counter_device_attr_list_free(&group->attr_list);
+- return err;
+-}
+-
+-static int counter_global_attr_register(
+- struct counter_device_attr_group *const group,
+- struct counter_device *const counter)
+-{
+- int err;
+-
+- /* Create name attribute */
+- err = counter_name_attribute_create(group, counter->name);
+- if (err)
+- return err;
+-
+- /* Create num_counts attribute */
+- err = counter_size_attribute_create(group, counter->num_counts,
+- "num_counts");
+- if (err)
+- goto err_free_attr_list;
+-
+- /* Create num_signals attribute */
+- err = counter_size_attribute_create(group, counter->num_signals,
+- "num_signals");
+- if (err)
+- goto err_free_attr_list;
+-
+- /* Register Counter device extension attributes */
+- err = counter_device_ext_register(group, counter);
+- if (err)
+- goto err_free_attr_list;
+-
+- return 0;
+-
+-err_free_attr_list:
+- counter_device_attr_list_free(&group->attr_list);
+- return err;
+-}
+-
+-static void counter_device_groups_list_free(
+- struct counter_device_attr_group *const groups_list,
+- const size_t num_groups)
+-{
+- struct counter_device_attr_group *group;
+- size_t i;
+-
+- /* loop through all attribute groups (signals, counts, global, etc.) */
+- for (i = 0; i < num_groups; i++) {
+- group = groups_list + i;
+-
+- /* free all attribute group and associated attributes memory */
+- kfree(group->attr_group.name);
+- kfree(group->attr_group.attrs);
+- counter_device_attr_list_free(&group->attr_list);
+- }
+-
+- kfree(groups_list);
+-}
+-
+-static int counter_device_groups_list_prepare(
+- struct counter_device *const counter)
+-{
+- const size_t total_num_groups =
+- counter->num_signals + counter->num_counts + 1;
+- struct counter_device_attr_group *groups_list;
+- size_t i;
+- int err;
+- size_t num_groups = 0;
+-
+- /* Allocate space for attribute groups (signals, counts, and ext) */
+- groups_list = kcalloc(total_num_groups, sizeof(*groups_list),
+- GFP_KERNEL);
+- if (!groups_list)
+- return -ENOMEM;
+-
+- /* Initialize attribute lists */
+- for (i = 0; i < total_num_groups; i++)
+- INIT_LIST_HEAD(&groups_list[i].attr_list);
+-
+- /* Register Signals */
+- err = counter_signals_register(groups_list, counter);
+- if (err)
+- goto err_free_groups_list;
+- num_groups += counter->num_signals;
+-
+- /* Register Counts and respective Synapses */
+- err = counter_counts_register(groups_list + num_groups, counter);
+- if (err)
+- goto err_free_groups_list;
+- num_groups += counter->num_counts;
+-
+- /* Register Counter global attributes */
+- err = counter_global_attr_register(groups_list + num_groups, counter);
+- if (err)
+- goto err_free_groups_list;
+- num_groups++;
+-
+- /* Store groups_list in device_state */
+- counter->device_state->groups_list = groups_list;
+- counter->device_state->num_groups = num_groups;
+-
+- return 0;
+-
+-err_free_groups_list:
+- counter_device_groups_list_free(groups_list, num_groups);
+- return err;
+-}
+-
+-static int counter_device_groups_prepare(
+- struct counter_device_state *const device_state)
+-{
+- size_t i, j;
+- struct counter_device_attr_group *group;
+- int err;
+- struct counter_device_attr *p;
+-
+- /* Allocate attribute groups for association with device */
+- device_state->groups = kcalloc(device_state->num_groups + 1,
+- sizeof(*device_state->groups),
+- GFP_KERNEL);
+- if (!device_state->groups)
+- return -ENOMEM;
+-
+- /* Prepare each group of attributes for association */
+- for (i = 0; i < device_state->num_groups; i++) {
+- group = device_state->groups_list + i;
+-
+- /* Allocate space for attribute pointers in attribute group */
+- group->attr_group.attrs = kcalloc(group->num_attr + 1,
+- sizeof(*group->attr_group.attrs), GFP_KERNEL);
+- if (!group->attr_group.attrs) {
+- err = -ENOMEM;
+- goto err_free_groups;
+- }
+-
+- /* Add attribute pointers to attribute group */
+- j = 0;
+- list_for_each_entry(p, &group->attr_list, l)
+- group->attr_group.attrs[j++] = &p->dev_attr.attr;
+-
+- /* Group attributes in attribute group */
+- device_state->groups[i] = &group->attr_group;
+- }
+- /* Associate attributes with device */
+- device_state->dev.groups = device_state->groups;
+-
+- return 0;
+-
+-err_free_groups:
+- do {
+- group = device_state->groups_list + i;
+- kfree(group->attr_group.attrs);
+- group->attr_group.attrs = NULL;
+- } while (i--);
+- kfree(device_state->groups);
+- return err;
+-}
+-
+-/* Provides a unique ID for each counter device */
+-static DEFINE_IDA(counter_ida);
+-
+-static void counter_device_release(struct device *dev)
+-{
+- struct counter_device *const counter = dev_get_drvdata(dev);
+- struct counter_device_state *const device_state = counter->device_state;
+-
+- kfree(device_state->groups);
+- counter_device_groups_list_free(device_state->groups_list,
+- device_state->num_groups);
+- ida_simple_remove(&counter_ida, device_state->id);
+- kfree(device_state);
+-}
+-
+-static struct device_type counter_device_type = {
+- .name = "counter_device",
+- .release = counter_device_release
+-};
+-
+-static struct bus_type counter_bus_type = {
+- .name = "counter"
+-};
+-
+-/**
+- * counter_register - register Counter to the system
+- * @counter: pointer to Counter to register
+- *
+- * This function registers a Counter to the system. A sysfs "counter" directory
+- * will be created and populated with sysfs attributes correlating with the
+- * Counter Signals, Synapses, and Counts respectively.
+- */
+-int counter_register(struct counter_device *const counter)
+-{
+- struct counter_device_state *device_state;
+- int err;
+-
+- /* Allocate internal state container for Counter device */
+- device_state = kzalloc(sizeof(*device_state), GFP_KERNEL);
+- if (!device_state)
+- return -ENOMEM;
+- counter->device_state = device_state;
+-
+- /* Acquire unique ID */
+- device_state->id = ida_simple_get(&counter_ida, 0, 0, GFP_KERNEL);
+- if (device_state->id < 0) {
+- err = device_state->id;
+- goto err_free_device_state;
+- }
+-
+- /* Configure device structure for Counter */
+- device_state->dev.type = &counter_device_type;
+- device_state->dev.bus = &counter_bus_type;
+- if (counter->parent) {
+- device_state->dev.parent = counter->parent;
+- device_state->dev.of_node = counter->parent->of_node;
+- }
+- dev_set_name(&device_state->dev, "counter%d", device_state->id);
+- device_initialize(&device_state->dev);
+- dev_set_drvdata(&device_state->dev, counter);
+-
+- /* Prepare device attributes */
+- err = counter_device_groups_list_prepare(counter);
+- if (err)
+- goto err_free_id;
+-
+- /* Organize device attributes to groups and match to device */
+- err = counter_device_groups_prepare(device_state);
+- if (err)
+- goto err_free_groups_list;
+-
+- /* Add device to system */
+- err = device_add(&device_state->dev);
+- if (err)
+- goto err_free_groups;
+-
+- return 0;
+-
+-err_free_groups:
+- kfree(device_state->groups);
+-err_free_groups_list:
+- counter_device_groups_list_free(device_state->groups_list,
+- device_state->num_groups);
+-err_free_id:
+- ida_simple_remove(&counter_ida, device_state->id);
+-err_free_device_state:
+- kfree(device_state);
+- return err;
+-}
+-EXPORT_SYMBOL_GPL(counter_register);
+-
+-/**
+- * counter_unregister - unregister Counter from the system
+- * @counter: pointer to Counter to unregister
+- *
+- * The Counter is unregistered from the system; all allocated memory is freed.
+- */
+-void counter_unregister(struct counter_device *const counter)
+-{
+- if (counter)
+- device_del(&counter->device_state->dev);
+-}
+-EXPORT_SYMBOL_GPL(counter_unregister);
+-
+-static void devm_counter_unreg(struct device *dev, void *res)
+-{
+- counter_unregister(*(struct counter_device **)res);
+-}
+-
+-/**
+- * devm_counter_register - Resource-managed counter_register
+- * @dev: device to allocate counter_device for
+- * @counter: pointer to Counter to register
+- *
+- * Managed counter_register. The Counter registered with this function is
+- * automatically unregistered on driver detach. This function calls
+- * counter_register internally. Refer to that function for more information.
+- *
+- * If an Counter registered with this function needs to be unregistered
+- * separately, devm_counter_unregister must be used.
+- *
+- * RETURNS:
+- * 0 on success, negative error number on failure.
+- */
+-int devm_counter_register(struct device *dev,
+- struct counter_device *const counter)
+-{
+- struct counter_device **ptr;
+- int ret;
+-
+- ptr = devres_alloc(devm_counter_unreg, sizeof(*ptr), GFP_KERNEL);
+- if (!ptr)
+- return -ENOMEM;
+-
+- ret = counter_register(counter);
+- if (!ret) {
+- *ptr = counter;
+- devres_add(dev, ptr);
+- } else {
+- devres_free(ptr);
+- }
+-
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(devm_counter_register);
+-
+-static int devm_counter_match(struct device *dev, void *res, void *data)
+-{
+- struct counter_device **r = res;
+-
+- if (!r || !*r) {
+- WARN_ON(!r || !*r);
+- return 0;
+- }
+-
+- return *r == data;
+-}
+-
+-/**
+- * devm_counter_unregister - Resource-managed counter_unregister
+- * @dev: device this counter_device belongs to
+- * @counter: pointer to Counter associated with the device
+- *
+- * Unregister Counter registered with devm_counter_register.
+- */
+-void devm_counter_unregister(struct device *dev,
+- struct counter_device *const counter)
+-{
+- int rc;
+-
+- rc = devres_release(dev, devm_counter_unreg, devm_counter_match,
+- counter);
+- WARN_ON(rc);
+-}
+-EXPORT_SYMBOL_GPL(devm_counter_unregister);
+-
+-static int __init counter_init(void)
+-{
+- return bus_register(&counter_bus_type);
+-}
+-
+-static void __exit counter_exit(void)
+-{
+- bus_unregister(&counter_bus_type);
+-}
+-
+-subsys_initcall(counter_init);
+-module_exit(counter_exit);
+-
+-MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
+-MODULE_DESCRIPTION("Generic Counter interface");
+-MODULE_LICENSE("GPL v2");
+diff --git a/drivers/counter/ftm-quaddec.c b/drivers/counter/ftm-quaddec.c
+index 53c15f84909b9..5ef0478709cd8 100644
+--- a/drivers/counter/ftm-quaddec.c
++++ b/drivers/counter/ftm-quaddec.c
+@@ -14,6 +14,7 @@
+ #include <linux/mutex.h>
+ #include <linux/counter.h>
+ #include <linux/bitfield.h>
++#include <linux/types.h>
+
+ #define FTM_FIELD_UPDATE(ftm, offset, mask, val) \
+ ({ \
+@@ -115,8 +116,7 @@ static void ftm_quaddec_disable(void *ftm)
+ }
+
+ static int ftm_quaddec_get_prescaler(struct counter_device *counter,
+- struct counter_count *count,
+- size_t *cnt_mode)
++ struct counter_count *count, u32 *cnt_mode)
+ {
+ struct ftm_quaddec *ftm = counter->priv;
+ uint32_t scflags;
+@@ -129,8 +129,7 @@ static int ftm_quaddec_get_prescaler(struct counter_device *counter,
+ }
+
+ static int ftm_quaddec_set_prescaler(struct counter_device *counter,
+- struct counter_count *count,
+- size_t cnt_mode)
++ struct counter_count *count, u32 cnt_mode)
+ {
+ struct ftm_quaddec *ftm = counter->priv;
+
+@@ -151,33 +150,17 @@ static const char * const ftm_quaddec_prescaler[] = {
+ "1", "2", "4", "8", "16", "32", "64", "128"
+ };
+
+-static struct counter_count_enum_ext ftm_quaddec_prescaler_enum = {
+- .items = ftm_quaddec_prescaler,
+- .num_items = ARRAY_SIZE(ftm_quaddec_prescaler),
+- .get = ftm_quaddec_get_prescaler,
+- .set = ftm_quaddec_set_prescaler
+-};
+-
+-enum ftm_quaddec_synapse_action {
+- FTM_QUADDEC_SYNAPSE_ACTION_BOTH_EDGES,
+-};
+-
+ static const enum counter_synapse_action ftm_quaddec_synapse_actions[] = {
+- [FTM_QUADDEC_SYNAPSE_ACTION_BOTH_EDGES] =
+ COUNTER_SYNAPSE_ACTION_BOTH_EDGES
+ };
+
+-enum ftm_quaddec_count_function {
+- FTM_QUADDEC_COUNT_ENCODER_MODE_1,
+-};
+-
+ static const enum counter_function ftm_quaddec_count_functions[] = {
+- [FTM_QUADDEC_COUNT_ENCODER_MODE_1] = COUNTER_FUNCTION_QUADRATURE_X4
++ COUNTER_FUNCTION_QUADRATURE_X4
+ };
+
+ static int ftm_quaddec_count_read(struct counter_device *counter,
+ struct counter_count *count,
+- unsigned long *val)
++ u64 *val)
+ {
+ struct ftm_quaddec *const ftm = counter->priv;
+ uint32_t cntval;
+@@ -191,7 +174,7 @@ static int ftm_quaddec_count_read(struct counter_device *counter,
+
+ static int ftm_quaddec_count_write(struct counter_device *counter,
+ struct counter_count *count,
+- const unsigned long val)
++ const u64 val)
+ {
+ struct ftm_quaddec *const ftm = counter->priv;
+
+@@ -205,21 +188,21 @@ static int ftm_quaddec_count_write(struct counter_device *counter,
+ return 0;
+ }
+
+-static int ftm_quaddec_count_function_get(struct counter_device *counter,
+- struct counter_count *count,
+- size_t *function)
++static int ftm_quaddec_count_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+- *function = FTM_QUADDEC_COUNT_ENCODER_MODE_1;
++ *function = COUNTER_FUNCTION_QUADRATURE_X4;
+
+ return 0;
+ }
+
+-static int ftm_quaddec_action_get(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t *action)
++static int ftm_quaddec_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+- *action = FTM_QUADDEC_SYNAPSE_ACTION_BOTH_EDGES;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+
+ return 0;
+ }
+@@ -227,8 +210,8 @@ static int ftm_quaddec_action_get(struct counter_device *counter,
+ static const struct counter_ops ftm_quaddec_cnt_ops = {
+ .count_read = ftm_quaddec_count_read,
+ .count_write = ftm_quaddec_count_write,
+- .function_get = ftm_quaddec_count_function_get,
+- .action_get = ftm_quaddec_action_get,
++ .function_read = ftm_quaddec_count_function_read,
++ .action_read = ftm_quaddec_action_read,
+ };
+
+ static struct counter_signal ftm_quaddec_signals[] = {
+@@ -255,9 +238,12 @@ static struct counter_synapse ftm_quaddec_count_synapses[] = {
+ }
+ };
+
+-static const struct counter_count_ext ftm_quaddec_count_ext[] = {
+- COUNTER_COUNT_ENUM("prescaler", &ftm_quaddec_prescaler_enum),
+- COUNTER_COUNT_ENUM_AVAILABLE("prescaler", &ftm_quaddec_prescaler_enum),
++static DEFINE_COUNTER_ENUM(ftm_quaddec_prescaler_enum, ftm_quaddec_prescaler);
++
++static struct counter_comp ftm_quaddec_count_ext[] = {
++ COUNTER_COMP_COUNT_ENUM("prescaler", ftm_quaddec_get_prescaler,
++ ftm_quaddec_set_prescaler,
++ ftm_quaddec_prescaler_enum),
+ };
+
+ static struct counter_count ftm_quaddec_counts = {
+diff --git a/drivers/counter/intel-qep.c b/drivers/counter/intel-qep.c
+index 8a6847d5fb2bd..0924d16de6e26 100644
+--- a/drivers/counter/intel-qep.c
++++ b/drivers/counter/intel-qep.c
+@@ -62,13 +62,6 @@
+
+ #define INTEL_QEP_CLK_PERIOD_NS 10
+
+-#define INTEL_QEP_COUNTER_EXT_RW(_name) \
+-{ \
+- .name = #_name, \
+- .read = _name##_read, \
+- .write = _name##_write, \
+-}
+-
+ struct intel_qep {
+ struct counter_device counter;
+ struct mutex lock;
+@@ -114,8 +107,7 @@ static void intel_qep_init(struct intel_qep *qep)
+ }
+
+ static int intel_qep_count_read(struct counter_device *counter,
+- struct counter_count *count,
+- unsigned long *val)
++ struct counter_count *count, u64 *val)
+ {
+ struct intel_qep *const qep = counter->priv;
+
+@@ -130,11 +122,11 @@ static const enum counter_function intel_qep_count_functions[] = {
+ COUNTER_FUNCTION_QUADRATURE_X4,
+ };
+
+-static int intel_qep_function_get(struct counter_device *counter,
+- struct counter_count *count,
+- size_t *function)
++static int intel_qep_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+- *function = 0;
++ *function = COUNTER_FUNCTION_QUADRATURE_X4;
+
+ return 0;
+ }
+@@ -143,19 +135,19 @@ static const enum counter_synapse_action intel_qep_synapse_actions[] = {
+ COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+ };
+
+-static int intel_qep_action_get(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t *action)
++static int intel_qep_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+- *action = 0;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ return 0;
+ }
+
+ static const struct counter_ops intel_qep_counter_ops = {
+ .count_read = intel_qep_count_read,
+- .function_get = intel_qep_function_get,
+- .action_get = intel_qep_action_get,
++ .function_read = intel_qep_function_read,
++ .action_read = intel_qep_action_read,
+ };
+
+ #define INTEL_QEP_SIGNAL(_id, _name) { \
+@@ -181,31 +173,27 @@ static struct counter_synapse intel_qep_count_synapses[] = {
+ INTEL_QEP_SYNAPSE(2),
+ };
+
+-static ssize_t ceiling_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, char *buf)
++static int intel_qep_ceiling_read(struct counter_device *counter,
++ struct counter_count *count, u64 *ceiling)
+ {
+ struct intel_qep *qep = counter->priv;
+- u32 reg;
+
+ pm_runtime_get_sync(qep->dev);
+- reg = intel_qep_readl(qep, INTEL_QEPMAX);
++ *ceiling = intel_qep_readl(qep, INTEL_QEPMAX);
+ pm_runtime_put(qep->dev);
+
+- return sysfs_emit(buf, "%u\n", reg);
++ return 0;
+ }
+
+-static ssize_t ceiling_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, const char *buf, size_t len)
++static int intel_qep_ceiling_write(struct counter_device *counter,
++ struct counter_count *count, u64 max)
+ {
+ struct intel_qep *qep = counter->priv;
+- u32 max;
+- int ret;
++ int ret = 0;
+
+- ret = kstrtou32(buf, 0, &max);
+- if (ret < 0)
+- return ret;
++ /* Intel QEP ceiling configuration only supports 32-bit values */
++ if (max != (u32)max)
++ return -ERANGE;
+
+ mutex_lock(&qep->lock);
+ if (qep->enabled) {
+@@ -216,34 +204,28 @@ static ssize_t ceiling_write(struct counter_device *counter,
+ pm_runtime_get_sync(qep->dev);
+ intel_qep_writel(qep, INTEL_QEPMAX, max);
+ pm_runtime_put(qep->dev);
+- ret = len;
+
+ out:
+ mutex_unlock(&qep->lock);
+ return ret;
+ }
+
+-static ssize_t enable_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, char *buf)
++static int intel_qep_enable_read(struct counter_device *counter,
++ struct counter_count *count, u8 *enable)
+ {
+ struct intel_qep *qep = counter->priv;
+
+- return sysfs_emit(buf, "%u\n", qep->enabled);
++ *enable = qep->enabled;
++
++ return 0;
+ }
+
+-static ssize_t enable_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, const char *buf, size_t len)
++static int intel_qep_enable_write(struct counter_device *counter,
++ struct counter_count *count, u8 val)
+ {
+ struct intel_qep *qep = counter->priv;
+ u32 reg;
+- bool val, changed;
+- int ret;
+-
+- ret = kstrtobool(buf, &val);
+- if (ret)
+- return ret;
++ bool changed;
+
+ mutex_lock(&qep->lock);
+ changed = val ^ qep->enabled;
+@@ -267,12 +249,12 @@ static ssize_t enable_write(struct counter_device *counter,
+
+ out:
+ mutex_unlock(&qep->lock);
+- return len;
++ return 0;
+ }
+
+-static ssize_t spike_filter_ns_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, char *buf)
++static int intel_qep_spike_filter_ns_read(struct counter_device *counter,
++ struct counter_count *count,
++ u64 *length)
+ {
+ struct intel_qep *qep = counter->priv;
+ u32 reg;
+@@ -281,33 +263,31 @@ static ssize_t spike_filter_ns_read(struct counter_device *counter,
+ reg = intel_qep_readl(qep, INTEL_QEPCON);
+ if (!(reg & INTEL_QEPCON_FLT_EN)) {
+ pm_runtime_put(qep->dev);
+- return sysfs_emit(buf, "0\n");
++ return 0;
+ }
+ reg = INTEL_QEPFLT_MAX_COUNT(intel_qep_readl(qep, INTEL_QEPFLT));
+ pm_runtime_put(qep->dev);
+
+- return sysfs_emit(buf, "%u\n", (reg + 2) * INTEL_QEP_CLK_PERIOD_NS);
++ *length = (reg + 2) * INTEL_QEP_CLK_PERIOD_NS;
++
++ return 0;
+ }
+
+-static ssize_t spike_filter_ns_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, const char *buf, size_t len)
++static int intel_qep_spike_filter_ns_write(struct counter_device *counter,
++ struct counter_count *count,
++ u64 length)
+ {
+ struct intel_qep *qep = counter->priv;
+- u32 reg, length;
++ u32 reg;
+ bool enable;
+- int ret;
+-
+- ret = kstrtou32(buf, 0, &length);
+- if (ret < 0)
+- return ret;
++ int ret = 0;
+
+ /*
+ * Spike filter length is (MAX_COUNT + 2) clock periods.
+ * Disable filter when userspace writes 0, enable for valid
+ * nanoseconds values and error out otherwise.
+ */
+- length /= INTEL_QEP_CLK_PERIOD_NS;
++ do_div(length, INTEL_QEP_CLK_PERIOD_NS);
+ if (length == 0) {
+ enable = false;
+ length = 0;
+@@ -336,16 +316,15 @@ static ssize_t spike_filter_ns_write(struct counter_device *counter,
+ intel_qep_writel(qep, INTEL_QEPFLT, length);
+ intel_qep_writel(qep, INTEL_QEPCON, reg);
+ pm_runtime_put(qep->dev);
+- ret = len;
+
+ out:
+ mutex_unlock(&qep->lock);
+ return ret;
+ }
+
+-static ssize_t preset_enable_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, char *buf)
++static int intel_qep_preset_enable_read(struct counter_device *counter,
++ struct counter_count *count,
++ u8 *preset_enable)
+ {
+ struct intel_qep *qep = counter->priv;
+ u32 reg;
+@@ -353,21 +332,18 @@ static ssize_t preset_enable_read(struct counter_device *counter,
+ pm_runtime_get_sync(qep->dev);
+ reg = intel_qep_readl(qep, INTEL_QEPCON);
+ pm_runtime_put(qep->dev);
+- return sysfs_emit(buf, "%u\n", !(reg & INTEL_QEPCON_COUNT_RST_MODE));
++
++ *preset_enable = !(reg & INTEL_QEPCON_COUNT_RST_MODE);
++
++ return 0;
+ }
+
+-static ssize_t preset_enable_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, const char *buf, size_t len)
++static int intel_qep_preset_enable_write(struct counter_device *counter,
++ struct counter_count *count, u8 val)
+ {
+ struct intel_qep *qep = counter->priv;
+ u32 reg;
+- bool val;
+- int ret;
+-
+- ret = kstrtobool(buf, &val);
+- if (ret)
+- return ret;
++ int ret = 0;
+
+ mutex_lock(&qep->lock);
+ if (qep->enabled) {
+@@ -384,7 +360,6 @@ static ssize_t preset_enable_write(struct counter_device *counter,
+
+ intel_qep_writel(qep, INTEL_QEPCON, reg);
+ pm_runtime_put(qep->dev);
+- ret = len;
+
+ out:
+ mutex_unlock(&qep->lock);
+@@ -392,11 +367,14 @@ out:
+ return ret;
+ }
+
+-static const struct counter_count_ext intel_qep_count_ext[] = {
+- INTEL_QEP_COUNTER_EXT_RW(ceiling),
+- INTEL_QEP_COUNTER_EXT_RW(enable),
+- INTEL_QEP_COUNTER_EXT_RW(spike_filter_ns),
+- INTEL_QEP_COUNTER_EXT_RW(preset_enable)
++static struct counter_comp intel_qep_count_ext[] = {
++ COUNTER_COMP_ENABLE(intel_qep_enable_read, intel_qep_enable_write),
++ COUNTER_COMP_CEILING(intel_qep_ceiling_read, intel_qep_ceiling_write),
++ COUNTER_COMP_PRESET_ENABLE(intel_qep_preset_enable_read,
++ intel_qep_preset_enable_write),
++ COUNTER_COMP_COUNT_U64("spike_filter_ns",
++ intel_qep_spike_filter_ns_read,
++ intel_qep_spike_filter_ns_write),
+ };
+
+ static struct counter_count intel_qep_counter_count[] = {
+diff --git a/drivers/counter/interrupt-cnt.c b/drivers/counter/interrupt-cnt.c
+index 1de4243db488c..8514a87fcbee0 100644
+--- a/drivers/counter/interrupt-cnt.c
++++ b/drivers/counter/interrupt-cnt.c
+@@ -10,6 +10,7 @@
+ #include <linux/mod_devicetable.h>
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
++#include <linux/types.h>
+
+ #define INTERRUPT_CNT_NAME "interrupt-cnt"
+
+@@ -33,30 +34,23 @@ static irqreturn_t interrupt_cnt_isr(int irq, void *dev_id)
+ return IRQ_HANDLED;
+ }
+
+-static ssize_t interrupt_cnt_enable_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *private, char *buf)
++static int interrupt_cnt_enable_read(struct counter_device *counter,
++ struct counter_count *count, u8 *enable)
+ {
+ struct interrupt_cnt_priv *priv = counter->priv;
+
+- return sysfs_emit(buf, "%d\n", priv->enabled);
++ *enable = priv->enabled;
++
++ return 0;
+ }
+
+-static ssize_t interrupt_cnt_enable_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *private, const char *buf,
+- size_t len)
++static int interrupt_cnt_enable_write(struct counter_device *counter,
++ struct counter_count *count, u8 enable)
+ {
+ struct interrupt_cnt_priv *priv = counter->priv;
+- bool enable;
+- ssize_t ret;
+-
+- ret = kstrtobool(buf, &enable);
+- if (ret)
+- return ret;
+
+ if (priv->enabled == enable)
+- return len;
++ return 0;
+
+ if (enable) {
+ priv->enabled = true;
+@@ -66,33 +60,30 @@ static ssize_t interrupt_cnt_enable_write(struct counter_device *counter,
+ priv->enabled = false;
+ }
+
+- return len;
++ return 0;
+ }
+
+-static const struct counter_count_ext interrupt_cnt_ext[] = {
+- {
+- .name = "enable",
+- .read = interrupt_cnt_enable_read,
+- .write = interrupt_cnt_enable_write,
+- },
++static struct counter_comp interrupt_cnt_ext[] = {
++ COUNTER_COMP_ENABLE(interrupt_cnt_enable_read,
++ interrupt_cnt_enable_write),
+ };
+
+ static const enum counter_synapse_action interrupt_cnt_synapse_actions[] = {
+ COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+ };
+
+-static int interrupt_cnt_action_get(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t *action)
++static int interrupt_cnt_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+- *action = 0;
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+
+ return 0;
+ }
+
+ static int interrupt_cnt_read(struct counter_device *counter,
+- struct counter_count *count, unsigned long *val)
++ struct counter_count *count, u64 *val)
+ {
+ struct interrupt_cnt_priv *priv = counter->priv;
+
+@@ -102,8 +93,7 @@ static int interrupt_cnt_read(struct counter_device *counter,
+ }
+
+ static int interrupt_cnt_write(struct counter_device *counter,
+- struct counter_count *count,
+- const unsigned long val)
++ struct counter_count *count, const u64 val)
+ {
+ struct interrupt_cnt_priv *priv = counter->priv;
+
+@@ -119,11 +109,11 @@ static const enum counter_function interrupt_cnt_functions[] = {
+ COUNTER_FUNCTION_INCREASE,
+ };
+
+-static int interrupt_cnt_function_get(struct counter_device *counter,
+- struct counter_count *count,
+- size_t *function)
++static int interrupt_cnt_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+- *function = 0;
++ *function = COUNTER_FUNCTION_INCREASE;
+
+ return 0;
+ }
+@@ -148,10 +138,10 @@ static int interrupt_cnt_signal_read(struct counter_device *counter,
+ }
+
+ static const struct counter_ops interrupt_cnt_ops = {
+- .action_get = interrupt_cnt_action_get,
++ .action_read = interrupt_cnt_action_read,
+ .count_read = interrupt_cnt_read,
+ .count_write = interrupt_cnt_write,
+- .function_get = interrupt_cnt_function_get,
++ .function_read = interrupt_cnt_function_read,
+ .signal_read = interrupt_cnt_signal_read,
+ };
+
+diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c
+index 1aa70b9c48330..4edfe1f8fff7a 100644
+--- a/drivers/counter/microchip-tcb-capture.c
++++ b/drivers/counter/microchip-tcb-capture.c
+@@ -29,31 +29,18 @@ struct mchp_tc_data {
+ int qdec_mode;
+ int num_channels;
+ int channel[2];
+- bool trig_inverted;
+-};
+-
+-enum mchp_tc_count_function {
+- MCHP_TC_FUNCTION_INCREASE,
+- MCHP_TC_FUNCTION_QUADRATURE,
+ };
+
+ static const enum counter_function mchp_tc_count_functions[] = {
+- [MCHP_TC_FUNCTION_INCREASE] = COUNTER_FUNCTION_INCREASE,
+- [MCHP_TC_FUNCTION_QUADRATURE] = COUNTER_FUNCTION_QUADRATURE_X4,
+-};
+-
+-enum mchp_tc_synapse_action {
+- MCHP_TC_SYNAPSE_ACTION_NONE = 0,
+- MCHP_TC_SYNAPSE_ACTION_RISING_EDGE,
+- MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE,
+- MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE
++ COUNTER_FUNCTION_INCREASE,
++ COUNTER_FUNCTION_QUADRATURE_X4,
+ };
+
+ static const enum counter_synapse_action mchp_tc_synapse_actions[] = {
+- [MCHP_TC_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
+- [MCHP_TC_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+- [MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE] = COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+- [MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
++ COUNTER_SYNAPSE_ACTION_NONE,
++ COUNTER_SYNAPSE_ACTION_RISING_EDGE,
++ COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
++ COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+ };
+
+ static struct counter_signal mchp_tc_count_signals[] = {
+@@ -80,23 +67,23 @@ static struct counter_synapse mchp_tc_count_synapses[] = {
+ }
+ };
+
+-static int mchp_tc_count_function_get(struct counter_device *counter,
+- struct counter_count *count,
+- size_t *function)
++static int mchp_tc_count_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+ struct mchp_tc_data *const priv = counter->priv;
+
+ if (priv->qdec_mode)
+- *function = MCHP_TC_FUNCTION_QUADRATURE;
++ *function = COUNTER_FUNCTION_QUADRATURE_X4;
+ else
+- *function = MCHP_TC_FUNCTION_INCREASE;
++ *function = COUNTER_FUNCTION_INCREASE;
+
+ return 0;
+ }
+
+-static int mchp_tc_count_function_set(struct counter_device *counter,
+- struct counter_count *count,
+- size_t function)
++static int mchp_tc_count_function_write(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function function)
+ {
+ struct mchp_tc_data *const priv = counter->priv;
+ u32 bmr, cmr;
+@@ -108,7 +95,7 @@ static int mchp_tc_count_function_set(struct counter_device *counter,
+ cmr &= ~ATMEL_TC_WAVE;
+
+ switch (function) {
+- case MCHP_TC_FUNCTION_INCREASE:
++ case COUNTER_FUNCTION_INCREASE:
+ priv->qdec_mode = 0;
+ /* Set highest rate based on whether soc has gclk or not */
+ bmr &= ~(ATMEL_TC_QDEN | ATMEL_TC_POSEN);
+@@ -120,7 +107,7 @@ static int mchp_tc_count_function_set(struct counter_device *counter,
+ cmr |= ATMEL_TC_CMR_MASK;
+ cmr &= ~(ATMEL_TC_ABETRG | ATMEL_TC_XC0);
+ break;
+- case MCHP_TC_FUNCTION_QUADRATURE:
++ case COUNTER_FUNCTION_QUADRATURE_X4:
+ if (!priv->tc_cfg->has_qdec)
+ return -EINVAL;
+ /* In QDEC mode settings both channels 0 and 1 are required */
+@@ -166,7 +153,7 @@ static int mchp_tc_count_signal_read(struct counter_device *counter,
+
+ regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], SR), &sr);
+
+- if (priv->trig_inverted)
++ if (signal->id == 1)
+ sigstatus = (sr & ATMEL_TC_MTIOB);
+ else
+ sigstatus = (sr & ATMEL_TC_MTIOA);
+@@ -176,57 +163,68 @@ static int mchp_tc_count_signal_read(struct counter_device *counter,
+ return 0;
+ }
+
+-static int mchp_tc_count_action_get(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t *action)
++static int mchp_tc_count_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+ struct mchp_tc_data *const priv = counter->priv;
+ u32 cmr;
+
++ if (priv->qdec_mode) {
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
++ return 0;
++ }
++
++ /* Only TIOA signal is evaluated in non-QDEC mode */
++ if (synapse->signal->id != 0) {
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
++ return 0;
++ }
++
+ regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], CMR), &cmr);
+
+ switch (cmr & ATMEL_TC_ETRGEDG) {
+ default:
+- *action = MCHP_TC_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+ break;
+ case ATMEL_TC_ETRGEDG_RISING:
+- *action = MCHP_TC_SYNAPSE_ACTION_RISING_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+ break;
+ case ATMEL_TC_ETRGEDG_FALLING:
+- *action = MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_FALLING_EDGE;
+ break;
+ case ATMEL_TC_ETRGEDG_BOTH:
+- *action = MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ break;
+ }
+
+ return 0;
+ }
+
+-static int mchp_tc_count_action_set(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t action)
++static int mchp_tc_count_action_write(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action action)
+ {
+ struct mchp_tc_data *const priv = counter->priv;
+ u32 edge = ATMEL_TC_ETRGEDG_NONE;
+
+- /* QDEC mode is rising edge only */
+- if (priv->qdec_mode)
++ /* QDEC mode is rising edge only; only TIOA handled in non-QDEC mode */
++ if (priv->qdec_mode || synapse->signal->id != 0)
+ return -EINVAL;
+
+ switch (action) {
+- case MCHP_TC_SYNAPSE_ACTION_NONE:
++ case COUNTER_SYNAPSE_ACTION_NONE:
+ edge = ATMEL_TC_ETRGEDG_NONE;
+ break;
+- case MCHP_TC_SYNAPSE_ACTION_RISING_EDGE:
++ case COUNTER_SYNAPSE_ACTION_RISING_EDGE:
+ edge = ATMEL_TC_ETRGEDG_RISING;
+ break;
+- case MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE:
++ case COUNTER_SYNAPSE_ACTION_FALLING_EDGE:
+ edge = ATMEL_TC_ETRGEDG_FALLING;
+ break;
+- case MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE:
++ case COUNTER_SYNAPSE_ACTION_BOTH_EDGES:
+ edge = ATMEL_TC_ETRGEDG_BOTH;
+ break;
+ default:
+@@ -240,8 +238,7 @@ static int mchp_tc_count_action_set(struct counter_device *counter,
+ }
+
+ static int mchp_tc_count_read(struct counter_device *counter,
+- struct counter_count *count,
+- unsigned long *val)
++ struct counter_count *count, u64 *val)
+ {
+ struct mchp_tc_data *const priv = counter->priv;
+ u32 cnt;
+@@ -264,12 +261,12 @@ static struct counter_count mchp_tc_counts[] = {
+ };
+
+ static const struct counter_ops mchp_tc_ops = {
+- .signal_read = mchp_tc_count_signal_read,
+- .count_read = mchp_tc_count_read,
+- .function_get = mchp_tc_count_function_get,
+- .function_set = mchp_tc_count_function_set,
+- .action_get = mchp_tc_count_action_get,
+- .action_set = mchp_tc_count_action_set
++ .signal_read = mchp_tc_count_signal_read,
++ .count_read = mchp_tc_count_read,
++ .function_read = mchp_tc_count_function_read,
++ .function_write = mchp_tc_count_function_write,
++ .action_read = mchp_tc_count_action_read,
++ .action_write = mchp_tc_count_action_write
+ };
+
+ static const struct atmel_tcb_config tcb_rm9200_config = {
+diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c
+index 13656957c45f1..637b3f0b4fa34 100644
+--- a/drivers/counter/stm32-lptimer-cnt.c
++++ b/drivers/counter/stm32-lptimer-cnt.c
+@@ -17,6 +17,7 @@
+ #include <linux/module.h>
+ #include <linux/pinctrl/consumer.h>
+ #include <linux/platform_device.h>
++#include <linux/types.h>
+
+ struct stm32_lptim_cnt {
+ struct counter_device counter;
+@@ -69,7 +70,7 @@ static int stm32_lptim_set_enable_state(struct stm32_lptim_cnt *priv,
+
+ /* ensure CMP & ARR registers are properly written */
+ ret = regmap_read_poll_timeout(priv->regmap, STM32_LPTIM_ISR, val,
+- (val & STM32_LPTIM_CMPOK_ARROK),
++ (val & STM32_LPTIM_CMPOK_ARROK) == STM32_LPTIM_CMPOK_ARROK,
+ 100, 1000);
+ if (ret)
+ return ret;
+@@ -107,11 +108,7 @@ static int stm32_lptim_setup(struct stm32_lptim_cnt *priv, int enable)
+ return regmap_update_bits(priv->regmap, STM32_LPTIM_CFGR, mask, val);
+ }
+
+-/**
+- * enum stm32_lptim_cnt_function - enumerates LPTimer counter & encoder modes
+- * @STM32_LPTIM_COUNTER_INCREASE: up count on IN1 rising, falling or both edges
+- * @STM32_LPTIM_ENCODER_BOTH_EDGE: count on both edges (IN1 & IN2 quadrature)
+- *
++/*
+ * In non-quadrature mode, device counts up on active edge.
+ * In quadrature mode, encoder counting scenarios are as follows:
+ * +---------+----------+--------------------+--------------------+
+@@ -129,33 +126,20 @@ static int stm32_lptim_setup(struct stm32_lptim_cnt *priv, int enable)
+ * | edges | Low -> | Up | Down | Down | Up |
+ * +---------+----------+----------+---------+----------+---------+
+ */
+-enum stm32_lptim_cnt_function {
+- STM32_LPTIM_COUNTER_INCREASE,
+- STM32_LPTIM_ENCODER_BOTH_EDGE,
+-};
+-
+ static const enum counter_function stm32_lptim_cnt_functions[] = {
+- [STM32_LPTIM_COUNTER_INCREASE] = COUNTER_FUNCTION_INCREASE,
+- [STM32_LPTIM_ENCODER_BOTH_EDGE] = COUNTER_FUNCTION_QUADRATURE_X4,
+-};
+-
+-enum stm32_lptim_synapse_action {
+- STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE,
+- STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE,
+- STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES,
+- STM32_LPTIM_SYNAPSE_ACTION_NONE,
++ COUNTER_FUNCTION_INCREASE,
++ COUNTER_FUNCTION_QUADRATURE_X4,
+ };
+
+ static const enum counter_synapse_action stm32_lptim_cnt_synapse_actions[] = {
+- /* Index must match with stm32_lptim_cnt_polarity[] (priv->polarity) */
+- [STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+- [STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE] = COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+- [STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+- [STM32_LPTIM_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
++ COUNTER_SYNAPSE_ACTION_RISING_EDGE,
++ COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
++ COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
++ COUNTER_SYNAPSE_ACTION_NONE,
+ };
+
+ static int stm32_lptim_cnt_read(struct counter_device *counter,
+- struct counter_count *count, unsigned long *val)
++ struct counter_count *count, u64 *val)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+ u32 cnt;
+@@ -170,28 +154,28 @@ static int stm32_lptim_cnt_read(struct counter_device *counter,
+ return 0;
+ }
+
+-static int stm32_lptim_cnt_function_get(struct counter_device *counter,
+- struct counter_count *count,
+- size_t *function)
++static int stm32_lptim_cnt_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+
+ if (!priv->quadrature_mode) {
+- *function = STM32_LPTIM_COUNTER_INCREASE;
++ *function = COUNTER_FUNCTION_INCREASE;
+ return 0;
+ }
+
+- if (priv->polarity == STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES) {
+- *function = STM32_LPTIM_ENCODER_BOTH_EDGE;
++ if (priv->polarity == STM32_LPTIM_CKPOL_BOTH_EDGES) {
++ *function = COUNTER_FUNCTION_QUADRATURE_X4;
+ return 0;
+ }
+
+ return -EINVAL;
+ }
+
+-static int stm32_lptim_cnt_function_set(struct counter_device *counter,
+- struct counter_count *count,
+- size_t function)
++static int stm32_lptim_cnt_function_write(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function function)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+
+@@ -199,12 +183,12 @@ static int stm32_lptim_cnt_function_set(struct counter_device *counter,
+ return -EBUSY;
+
+ switch (function) {
+- case STM32_LPTIM_COUNTER_INCREASE:
++ case COUNTER_FUNCTION_INCREASE:
+ priv->quadrature_mode = 0;
+ return 0;
+- case STM32_LPTIM_ENCODER_BOTH_EDGE:
++ case COUNTER_FUNCTION_QUADRATURE_X4:
+ priv->quadrature_mode = 1;
+- priv->polarity = STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES;
++ priv->polarity = STM32_LPTIM_CKPOL_BOTH_EDGES;
+ return 0;
+ default:
+ /* should never reach this path */
+@@ -212,9 +196,9 @@ static int stm32_lptim_cnt_function_set(struct counter_device *counter,
+ }
+ }
+
+-static ssize_t stm32_lptim_cnt_enable_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *private, char *buf)
++static int stm32_lptim_cnt_enable_read(struct counter_device *counter,
++ struct counter_count *count,
++ u8 *enable)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+ int ret;
+@@ -223,22 +207,18 @@ static ssize_t stm32_lptim_cnt_enable_read(struct counter_device *counter,
+ if (ret < 0)
+ return ret;
+
+- return scnprintf(buf, PAGE_SIZE, "%u\n", ret);
++ *enable = ret;
++
++ return 0;
+ }
+
+-static ssize_t stm32_lptim_cnt_enable_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *private,
+- const char *buf, size_t len)
++static int stm32_lptim_cnt_enable_write(struct counter_device *counter,
++ struct counter_count *count,
++ u8 enable)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+- bool enable;
+ int ret;
+
+- ret = kstrtobool(buf, &enable);
+- if (ret)
+- return ret;
+-
+ /* Check nobody uses the timer, or already disabled/enabled */
+ ret = stm32_lptim_is_enabled(priv);
+ if ((ret < 0) || (!ret && !enable))
+@@ -254,78 +234,81 @@ static ssize_t stm32_lptim_cnt_enable_write(struct counter_device *counter,
+ if (ret)
+ return ret;
+
+- return len;
++ return 0;
+ }
+
+-static ssize_t stm32_lptim_cnt_ceiling_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *private, char *buf)
++static int stm32_lptim_cnt_ceiling_read(struct counter_device *counter,
++ struct counter_count *count,
++ u64 *ceiling)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+
+- return snprintf(buf, PAGE_SIZE, "%u\n", priv->ceiling);
++ *ceiling = priv->ceiling;
++
++ return 0;
+ }
+
+-static ssize_t stm32_lptim_cnt_ceiling_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *private,
+- const char *buf, size_t len)
++static int stm32_lptim_cnt_ceiling_write(struct counter_device *counter,
++ struct counter_count *count,
++ u64 ceiling)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+- unsigned int ceiling;
+- int ret;
+
+ if (stm32_lptim_is_enabled(priv))
+ return -EBUSY;
+
+- ret = kstrtouint(buf, 0, &ceiling);
+- if (ret)
+- return ret;
+-
+ if (ceiling > STM32_LPTIM_MAX_ARR)
+ return -ERANGE;
+
+ priv->ceiling = ceiling;
+
+- return len;
++ return 0;
+ }
+
+-static const struct counter_count_ext stm32_lptim_cnt_ext[] = {
+- {
+- .name = "enable",
+- .read = stm32_lptim_cnt_enable_read,
+- .write = stm32_lptim_cnt_enable_write
+- },
+- {
+- .name = "ceiling",
+- .read = stm32_lptim_cnt_ceiling_read,
+- .write = stm32_lptim_cnt_ceiling_write
+- },
++static struct counter_comp stm32_lptim_cnt_ext[] = {
++ COUNTER_COMP_ENABLE(stm32_lptim_cnt_enable_read,
++ stm32_lptim_cnt_enable_write),
++ COUNTER_COMP_CEILING(stm32_lptim_cnt_ceiling_read,
++ stm32_lptim_cnt_ceiling_write),
+ };
+
+-static int stm32_lptim_cnt_action_get(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t *action)
++static int stm32_lptim_cnt_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+- size_t function;
++ enum counter_function function;
+ int err;
+
+- err = stm32_lptim_cnt_function_get(counter, count, &function);
++ err = stm32_lptim_cnt_function_read(counter, count, &function);
+ if (err)
+ return err;
+
+ switch (function) {
+- case STM32_LPTIM_COUNTER_INCREASE:
++ case COUNTER_FUNCTION_INCREASE:
+ /* LP Timer acts as up-counter on input 1 */
+- if (synapse->signal->id == count->synapses[0].signal->id)
+- *action = priv->polarity;
+- else
+- *action = STM32_LPTIM_SYNAPSE_ACTION_NONE;
+- return 0;
+- case STM32_LPTIM_ENCODER_BOTH_EDGE:
+- *action = priv->polarity;
++ if (synapse->signal->id != count->synapses[0].signal->id) {
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
++ return 0;
++ }
++
++ switch (priv->polarity) {
++ case STM32_LPTIM_CKPOL_RISING_EDGE:
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
++ return 0;
++ case STM32_LPTIM_CKPOL_FALLING_EDGE:
++ *action = COUNTER_SYNAPSE_ACTION_FALLING_EDGE;
++ return 0;
++ case STM32_LPTIM_CKPOL_BOTH_EDGES:
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
++ return 0;
++ default:
++ /* should never reach this path */
++ return -EINVAL;
++ }
++ case COUNTER_FUNCTION_QUADRATURE_X4:
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ return 0;
+ default:
+ /* should never reach this path */
+@@ -333,43 +316,48 @@ static int stm32_lptim_cnt_action_get(struct counter_device *counter,
+ }
+ }
+
+-static int stm32_lptim_cnt_action_set(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t action)
++static int stm32_lptim_cnt_action_write(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action action)
+ {
+ struct stm32_lptim_cnt *const priv = counter->priv;
+- size_t function;
++ enum counter_function function;
+ int err;
+
+ if (stm32_lptim_is_enabled(priv))
+ return -EBUSY;
+
+- err = stm32_lptim_cnt_function_get(counter, count, &function);
++ err = stm32_lptim_cnt_function_read(counter, count, &function);
+ if (err)
+ return err;
+
+ /* only set polarity when in counter mode (on input 1) */
+- if (function == STM32_LPTIM_COUNTER_INCREASE
+- && synapse->signal->id == count->synapses[0].signal->id) {
+- switch (action) {
+- case STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE:
+- case STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE:
+- case STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES:
+- priv->polarity = action;
+- return 0;
+- }
+- }
++ if (function != COUNTER_FUNCTION_INCREASE
++ || synapse->signal->id != count->synapses[0].signal->id)
++ return -EINVAL;
+
+- return -EINVAL;
++ switch (action) {
++ case COUNTER_SYNAPSE_ACTION_RISING_EDGE:
++ priv->polarity = STM32_LPTIM_CKPOL_RISING_EDGE;
++ return 0;
++ case COUNTER_SYNAPSE_ACTION_FALLING_EDGE:
++ priv->polarity = STM32_LPTIM_CKPOL_FALLING_EDGE;
++ return 0;
++ case COUNTER_SYNAPSE_ACTION_BOTH_EDGES:
++ priv->polarity = STM32_LPTIM_CKPOL_BOTH_EDGES;
++ return 0;
++ default:
++ return -EINVAL;
++ }
+ }
+
+ static const struct counter_ops stm32_lptim_cnt_ops = {
+ .count_read = stm32_lptim_cnt_read,
+- .function_get = stm32_lptim_cnt_function_get,
+- .function_set = stm32_lptim_cnt_function_set,
+- .action_get = stm32_lptim_cnt_action_get,
+- .action_set = stm32_lptim_cnt_action_set,
++ .function_read = stm32_lptim_cnt_function_read,
++ .function_write = stm32_lptim_cnt_function_write,
++ .action_read = stm32_lptim_cnt_action_read,
++ .action_write = stm32_lptim_cnt_action_write,
+ };
+
+ static struct counter_signal stm32_lptim_cnt_signals[] = {
+diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c
+index 3fb0debd7425d..0546e932db0c1 100644
+--- a/drivers/counter/stm32-timer-cnt.c
++++ b/drivers/counter/stm32-timer-cnt.c
+@@ -13,6 +13,7 @@
+ #include <linux/module.h>
+ #include <linux/pinctrl/consumer.h>
+ #include <linux/platform_device.h>
++#include <linux/types.h>
+
+ #define TIM_CCMR_CCXS (BIT(8) | BIT(0))
+ #define TIM_CCMR_MASK (TIM_CCMR_CC1S | TIM_CCMR_CC2S | \
+@@ -36,29 +37,15 @@ struct stm32_timer_cnt {
+ struct stm32_timer_regs bak;
+ };
+
+-/**
+- * enum stm32_count_function - enumerates stm32 timer counter encoder modes
+- * @STM32_COUNT_SLAVE_MODE_DISABLED: counts on internal clock when CEN=1
+- * @STM32_COUNT_ENCODER_MODE_1: counts TI1FP1 edges, depending on TI2FP2 level
+- * @STM32_COUNT_ENCODER_MODE_2: counts TI2FP2 edges, depending on TI1FP1 level
+- * @STM32_COUNT_ENCODER_MODE_3: counts on both TI1FP1 and TI2FP2 edges
+- */
+-enum stm32_count_function {
+- STM32_COUNT_SLAVE_MODE_DISABLED,
+- STM32_COUNT_ENCODER_MODE_1,
+- STM32_COUNT_ENCODER_MODE_2,
+- STM32_COUNT_ENCODER_MODE_3,
+-};
+-
+ static const enum counter_function stm32_count_functions[] = {
+- [STM32_COUNT_SLAVE_MODE_DISABLED] = COUNTER_FUNCTION_INCREASE,
+- [STM32_COUNT_ENCODER_MODE_1] = COUNTER_FUNCTION_QUADRATURE_X2_A,
+- [STM32_COUNT_ENCODER_MODE_2] = COUNTER_FUNCTION_QUADRATURE_X2_B,
+- [STM32_COUNT_ENCODER_MODE_3] = COUNTER_FUNCTION_QUADRATURE_X4,
++ COUNTER_FUNCTION_INCREASE,
++ COUNTER_FUNCTION_QUADRATURE_X2_A,
++ COUNTER_FUNCTION_QUADRATURE_X2_B,
++ COUNTER_FUNCTION_QUADRATURE_X4,
+ };
+
+ static int stm32_count_read(struct counter_device *counter,
+- struct counter_count *count, unsigned long *val)
++ struct counter_count *count, u64 *val)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+ u32 cnt;
+@@ -70,8 +57,7 @@ static int stm32_count_read(struct counter_device *counter,
+ }
+
+ static int stm32_count_write(struct counter_device *counter,
+- struct counter_count *count,
+- const unsigned long val)
++ struct counter_count *count, const u64 val)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+ u32 ceiling;
+@@ -83,9 +69,9 @@ static int stm32_count_write(struct counter_device *counter,
+ return regmap_write(priv->regmap, TIM_CNT, val);
+ }
+
+-static int stm32_count_function_get(struct counter_device *counter,
+- struct counter_count *count,
+- size_t *function)
++static int stm32_count_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+ u32 smcr;
+@@ -93,42 +79,42 @@ static int stm32_count_function_get(struct counter_device *counter,
+ regmap_read(priv->regmap, TIM_SMCR, &smcr);
+
+ switch (smcr & TIM_SMCR_SMS) {
+- case 0:
+- *function = STM32_COUNT_SLAVE_MODE_DISABLED;
++ case TIM_SMCR_SMS_SLAVE_MODE_DISABLED:
++ *function = COUNTER_FUNCTION_INCREASE;
+ return 0;
+- case 1:
+- *function = STM32_COUNT_ENCODER_MODE_1;
++ case TIM_SMCR_SMS_ENCODER_MODE_1:
++ *function = COUNTER_FUNCTION_QUADRATURE_X2_A;
+ return 0;
+- case 2:
+- *function = STM32_COUNT_ENCODER_MODE_2;
++ case TIM_SMCR_SMS_ENCODER_MODE_2:
++ *function = COUNTER_FUNCTION_QUADRATURE_X2_B;
+ return 0;
+- case 3:
+- *function = STM32_COUNT_ENCODER_MODE_3;
++ case TIM_SMCR_SMS_ENCODER_MODE_3:
++ *function = COUNTER_FUNCTION_QUADRATURE_X4;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
+
+-static int stm32_count_function_set(struct counter_device *counter,
+- struct counter_count *count,
+- size_t function)
++static int stm32_count_function_write(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function function)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+ u32 cr1, sms;
+
+ switch (function) {
+- case STM32_COUNT_SLAVE_MODE_DISABLED:
+- sms = 0;
++ case COUNTER_FUNCTION_INCREASE:
++ sms = TIM_SMCR_SMS_SLAVE_MODE_DISABLED;
+ break;
+- case STM32_COUNT_ENCODER_MODE_1:
+- sms = 1;
++ case COUNTER_FUNCTION_QUADRATURE_X2_A:
++ sms = TIM_SMCR_SMS_ENCODER_MODE_1;
+ break;
+- case STM32_COUNT_ENCODER_MODE_2:
+- sms = 2;
++ case COUNTER_FUNCTION_QUADRATURE_X2_B:
++ sms = TIM_SMCR_SMS_ENCODER_MODE_2;
+ break;
+- case STM32_COUNT_ENCODER_MODE_3:
+- sms = 3;
++ case COUNTER_FUNCTION_QUADRATURE_X4:
++ sms = TIM_SMCR_SMS_ENCODER_MODE_3;
+ break;
+ default:
+ return -EINVAL;
+@@ -150,44 +136,37 @@ static int stm32_count_function_set(struct counter_device *counter,
+ return 0;
+ }
+
+-static ssize_t stm32_count_direction_read(struct counter_device *counter,
++static int stm32_count_direction_read(struct counter_device *counter,
+ struct counter_count *count,
+- void *private, char *buf)
++ enum counter_count_direction *direction)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+- const char *direction;
+ u32 cr1;
+
+ regmap_read(priv->regmap, TIM_CR1, &cr1);
+- direction = (cr1 & TIM_CR1_DIR) ? "backward" : "forward";
++ *direction = (cr1 & TIM_CR1_DIR) ? COUNTER_COUNT_DIRECTION_BACKWARD :
++ COUNTER_COUNT_DIRECTION_FORWARD;
+
+- return scnprintf(buf, PAGE_SIZE, "%s\n", direction);
++ return 0;
+ }
+
+-static ssize_t stm32_count_ceiling_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *private, char *buf)
++static int stm32_count_ceiling_read(struct counter_device *counter,
++ struct counter_count *count, u64 *ceiling)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+ u32 arr;
+
+ regmap_read(priv->regmap, TIM_ARR, &arr);
+
+- return snprintf(buf, PAGE_SIZE, "%u\n", arr);
++ *ceiling = arr;
++
++ return 0;
+ }
+
+-static ssize_t stm32_count_ceiling_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *private,
+- const char *buf, size_t len)
++static int stm32_count_ceiling_write(struct counter_device *counter,
++ struct counter_count *count, u64 ceiling)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+- unsigned int ceiling;
+- int ret;
+-
+- ret = kstrtouint(buf, 0, &ceiling);
+- if (ret)
+- return ret;
+
+ if (ceiling > priv->max_arr)
+ return -ERANGE;
+@@ -196,34 +175,27 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter,
+ regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0);
+ regmap_write(priv->regmap, TIM_ARR, ceiling);
+
+- return len;
++ return 0;
+ }
+
+-static ssize_t stm32_count_enable_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *private, char *buf)
++static int stm32_count_enable_read(struct counter_device *counter,
++ struct counter_count *count, u8 *enable)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+ u32 cr1;
+
+ regmap_read(priv->regmap, TIM_CR1, &cr1);
+
+- return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)(cr1 & TIM_CR1_CEN));
++ *enable = cr1 & TIM_CR1_CEN;
++
++ return 0;
+ }
+
+-static ssize_t stm32_count_enable_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *private,
+- const char *buf, size_t len)
++static int stm32_count_enable_write(struct counter_device *counter,
++ struct counter_count *count, u8 enable)
+ {
+ struct stm32_timer_cnt *const priv = counter->priv;
+- int err;
+ u32 cr1;
+- bool enable;
+-
+- err = kstrtobool(buf, &enable);
+- if (err)
+- return err;
+
+ if (enable) {
+ regmap_read(priv->regmap, TIM_CR1, &cr1);
+@@ -242,70 +214,55 @@ static ssize_t stm32_count_enable_write(struct counter_device *counter,
+ /* Keep enabled state to properly handle low power states */
+ priv->enabled = enable;
+
+- return len;
++ return 0;
+ }
+
+-static const struct counter_count_ext stm32_count_ext[] = {
+- {
+- .name = "direction",
+- .read = stm32_count_direction_read,
+- },
+- {
+- .name = "enable",
+- .read = stm32_count_enable_read,
+- .write = stm32_count_enable_write
+- },
+- {
+- .name = "ceiling",
+- .read = stm32_count_ceiling_read,
+- .write = stm32_count_ceiling_write
+- },
+-};
+-
+-enum stm32_synapse_action {
+- STM32_SYNAPSE_ACTION_NONE,
+- STM32_SYNAPSE_ACTION_BOTH_EDGES
++static struct counter_comp stm32_count_ext[] = {
++ COUNTER_COMP_DIRECTION(stm32_count_direction_read),
++ COUNTER_COMP_ENABLE(stm32_count_enable_read, stm32_count_enable_write),
++ COUNTER_COMP_CEILING(stm32_count_ceiling_read,
++ stm32_count_ceiling_write),
+ };
+
+ static const enum counter_synapse_action stm32_synapse_actions[] = {
+- [STM32_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
+- [STM32_SYNAPSE_ACTION_BOTH_EDGES] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES
++ COUNTER_SYNAPSE_ACTION_NONE,
++ COUNTER_SYNAPSE_ACTION_BOTH_EDGES
+ };
+
+-static int stm32_action_get(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse,
+- size_t *action)
++static int stm32_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+- size_t function;
++ enum counter_function function;
+ int err;
+
+- err = stm32_count_function_get(counter, count, &function);
++ err = stm32_count_function_read(counter, count, &function);
+ if (err)
+ return err;
+
+ switch (function) {
+- case STM32_COUNT_SLAVE_MODE_DISABLED:
++ case COUNTER_FUNCTION_INCREASE:
+ /* counts on internal clock when CEN=1 */
+- *action = STM32_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+ return 0;
+- case STM32_COUNT_ENCODER_MODE_1:
++ case COUNTER_FUNCTION_QUADRATURE_X2_A:
+ /* counts up/down on TI1FP1 edge depending on TI2FP2 level */
+ if (synapse->signal->id == count->synapses[0].signal->id)
+- *action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ else
+- *action = STM32_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+ return 0;
+- case STM32_COUNT_ENCODER_MODE_2:
++ case COUNTER_FUNCTION_QUADRATURE_X2_B:
+ /* counts up/down on TI2FP2 edge depending on TI1FP1 level */
+ if (synapse->signal->id == count->synapses[1].signal->id)
+- *action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ else
+- *action = STM32_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+ return 0;
+- case STM32_COUNT_ENCODER_MODE_3:
++ case COUNTER_FUNCTION_QUADRATURE_X4:
+ /* counts up/down on both TI1FP1 and TI2FP2 edges */
+- *action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ return 0;
+ default:
+ return -EINVAL;
+@@ -315,9 +272,9 @@ static int stm32_action_get(struct counter_device *counter,
+ static const struct counter_ops stm32_timer_cnt_ops = {
+ .count_read = stm32_count_read,
+ .count_write = stm32_count_write,
+- .function_get = stm32_count_function_get,
+- .function_set = stm32_count_function_set,
+- .action_get = stm32_action_get,
++ .function_read = stm32_count_function_read,
++ .function_write = stm32_count_function_write,
++ .action_read = stm32_action_read,
+ };
+
+ static struct counter_signal stm32_signals[] = {
+diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c
+index 94fe58bb3eab3..09817c953f9ab 100644
+--- a/drivers/counter/ti-eqep.c
++++ b/drivers/counter/ti-eqep.c
+@@ -13,6 +13,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/regmap.h>
++#include <linux/types.h>
+
+ /* 32-bit registers */
+ #define QPOSCNT 0x0
+@@ -73,19 +74,13 @@ enum {
+ };
+
+ /* Position Counter Input Modes */
+-enum {
++enum ti_eqep_count_func {
+ TI_EQEP_COUNT_FUNC_QUAD_COUNT,
+ TI_EQEP_COUNT_FUNC_DIR_COUNT,
+ TI_EQEP_COUNT_FUNC_UP_COUNT,
+ TI_EQEP_COUNT_FUNC_DOWN_COUNT,
+ };
+
+-enum {
+- TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES,
+- TI_EQEP_SYNAPSE_ACTION_RISING_EDGE,
+- TI_EQEP_SYNAPSE_ACTION_NONE,
+-};
+-
+ struct ti_eqep_cnt {
+ struct counter_device counter;
+ struct regmap *regmap32;
+@@ -93,7 +88,7 @@ struct ti_eqep_cnt {
+ };
+
+ static int ti_eqep_count_read(struct counter_device *counter,
+- struct counter_count *count, unsigned long *val)
++ struct counter_count *count, u64 *val)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+ u32 cnt;
+@@ -105,7 +100,7 @@ static int ti_eqep_count_read(struct counter_device *counter,
+ }
+
+ static int ti_eqep_count_write(struct counter_device *counter,
+- struct counter_count *count, unsigned long val)
++ struct counter_count *count, u64 val)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+ u32 max;
+@@ -117,64 +112,100 @@ static int ti_eqep_count_write(struct counter_device *counter,
+ return regmap_write(priv->regmap32, QPOSCNT, val);
+ }
+
+-static int ti_eqep_function_get(struct counter_device *counter,
+- struct counter_count *count, size_t *function)
++static int ti_eqep_function_read(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+ u32 qdecctl;
+
+ regmap_read(priv->regmap16, QDECCTL, &qdecctl);
+- *function = (qdecctl & QDECCTL_QSRC) >> QDECCTL_QSRC_SHIFT;
++
++ switch ((qdecctl & QDECCTL_QSRC) >> QDECCTL_QSRC_SHIFT) {
++ case TI_EQEP_COUNT_FUNC_QUAD_COUNT:
++ *function = COUNTER_FUNCTION_QUADRATURE_X4;
++ break;
++ case TI_EQEP_COUNT_FUNC_DIR_COUNT:
++ *function = COUNTER_FUNCTION_PULSE_DIRECTION;
++ break;
++ case TI_EQEP_COUNT_FUNC_UP_COUNT:
++ *function = COUNTER_FUNCTION_INCREASE;
++ break;
++ case TI_EQEP_COUNT_FUNC_DOWN_COUNT:
++ *function = COUNTER_FUNCTION_DECREASE;
++ break;
++ }
+
+ return 0;
+ }
+
+-static int ti_eqep_function_set(struct counter_device *counter,
+- struct counter_count *count, size_t function)
++static int ti_eqep_function_write(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function function)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
++ enum ti_eqep_count_func qsrc;
++
++ switch (function) {
++ case COUNTER_FUNCTION_QUADRATURE_X4:
++ qsrc = TI_EQEP_COUNT_FUNC_QUAD_COUNT;
++ break;
++ case COUNTER_FUNCTION_PULSE_DIRECTION:
++ qsrc = TI_EQEP_COUNT_FUNC_DIR_COUNT;
++ break;
++ case COUNTER_FUNCTION_INCREASE:
++ qsrc = TI_EQEP_COUNT_FUNC_UP_COUNT;
++ break;
++ case COUNTER_FUNCTION_DECREASE:
++ qsrc = TI_EQEP_COUNT_FUNC_DOWN_COUNT;
++ break;
++ default:
++ /* should never reach this path */
++ return -EINVAL;
++ }
+
+ return regmap_write_bits(priv->regmap16, QDECCTL, QDECCTL_QSRC,
+- function << QDECCTL_QSRC_SHIFT);
++ qsrc << QDECCTL_QSRC_SHIFT);
+ }
+
+-static int ti_eqep_action_get(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse, size_t *action)
++static int ti_eqep_action_read(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+- size_t function;
++ enum counter_function function;
+ u32 qdecctl;
+ int err;
+
+- err = ti_eqep_function_get(counter, count, &function);
++ err = ti_eqep_function_read(counter, count, &function);
+ if (err)
+ return err;
+
+ switch (function) {
+- case TI_EQEP_COUNT_FUNC_QUAD_COUNT:
++ case COUNTER_FUNCTION_QUADRATURE_X4:
+ /* In quadrature mode, the rising and falling edge of both
+ * QEPA and QEPB trigger QCLK.
+ */
+- *action = TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ return 0;
+- case TI_EQEP_COUNT_FUNC_DIR_COUNT:
++ case COUNTER_FUNCTION_PULSE_DIRECTION:
+ /* In direction-count mode only rising edge of QEPA is counted
+ * and QEPB gives direction.
+ */
+ switch (synapse->signal->id) {
+ case TI_EQEP_SIGNAL_QEPA:
+- *action = TI_EQEP_SYNAPSE_ACTION_RISING_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+ return 0;
+ case TI_EQEP_SIGNAL_QEPB:
+- *action = TI_EQEP_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+ return 0;
+ default:
+ /* should never reach this path */
+ return -EINVAL;
+ }
+- case TI_EQEP_COUNT_FUNC_UP_COUNT:
+- case TI_EQEP_COUNT_FUNC_DOWN_COUNT:
++ case COUNTER_FUNCTION_INCREASE:
++ case COUNTER_FUNCTION_DECREASE:
+ /* In up/down-count modes only QEPA is counted and QEPB is not
+ * used.
+ */
+@@ -185,12 +216,12 @@ static int ti_eqep_action_get(struct counter_device *counter,
+ return err;
+
+ if (qdecctl & QDECCTL_XCR)
+- *action = TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES;
++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+ else
+- *action = TI_EQEP_SYNAPSE_ACTION_RISING_EDGE;
++ *action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+ return 0;
+ case TI_EQEP_SIGNAL_QEPB:
+- *action = TI_EQEP_SYNAPSE_ACTION_NONE;
++ *action = COUNTER_SYNAPSE_ACTION_NONE;
+ return 0;
+ default:
+ /* should never reach this path */
+@@ -205,82 +236,67 @@ static int ti_eqep_action_get(struct counter_device *counter,
+ static const struct counter_ops ti_eqep_counter_ops = {
+ .count_read = ti_eqep_count_read,
+ .count_write = ti_eqep_count_write,
+- .function_get = ti_eqep_function_get,
+- .function_set = ti_eqep_function_set,
+- .action_get = ti_eqep_action_get,
++ .function_read = ti_eqep_function_read,
++ .function_write = ti_eqep_function_write,
++ .action_read = ti_eqep_action_read,
+ };
+
+-static ssize_t ti_eqep_position_ceiling_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *ext_priv, char *buf)
++static int ti_eqep_position_ceiling_read(struct counter_device *counter,
++ struct counter_count *count,
++ u64 *ceiling)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+ u32 qposmax;
+
+ regmap_read(priv->regmap32, QPOSMAX, &qposmax);
+
+- return sprintf(buf, "%u\n", qposmax);
++ *ceiling = qposmax;
++
++ return 0;
+ }
+
+-static ssize_t ti_eqep_position_ceiling_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *ext_priv, const char *buf,
+- size_t len)
++static int ti_eqep_position_ceiling_write(struct counter_device *counter,
++ struct counter_count *count,
++ u64 ceiling)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+- int err;
+- u32 res;
+
+- err = kstrtouint(buf, 0, &res);
+- if (err < 0)
+- return err;
++ if (ceiling != (u32)ceiling)
++ return -ERANGE;
+
+- regmap_write(priv->regmap32, QPOSMAX, res);
++ regmap_write(priv->regmap32, QPOSMAX, ceiling);
+
+- return len;
++ return 0;
+ }
+
+-static ssize_t ti_eqep_position_enable_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *ext_priv, char *buf)
++static int ti_eqep_position_enable_read(struct counter_device *counter,
++ struct counter_count *count, u8 *enable)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+ u32 qepctl;
+
+ regmap_read(priv->regmap16, QEPCTL, &qepctl);
+
+- return sprintf(buf, "%u\n", !!(qepctl & QEPCTL_PHEN));
++ *enable = !!(qepctl & QEPCTL_PHEN);
++
++ return 0;
+ }
+
+-static ssize_t ti_eqep_position_enable_write(struct counter_device *counter,
+- struct counter_count *count,
+- void *ext_priv, const char *buf,
+- size_t len)
++static int ti_eqep_position_enable_write(struct counter_device *counter,
++ struct counter_count *count, u8 enable)
+ {
+ struct ti_eqep_cnt *priv = counter->priv;
+- int err;
+- bool res;
+-
+- err = kstrtobool(buf, &res);
+- if (err < 0)
+- return err;
+
+- regmap_write_bits(priv->regmap16, QEPCTL, QEPCTL_PHEN, res ? -1 : 0);
++ regmap_write_bits(priv->regmap16, QEPCTL, QEPCTL_PHEN, enable ? -1 : 0);
+
+- return len;
++ return 0;
+ }
+
+-static struct counter_count_ext ti_eqep_position_ext[] = {
+- {
+- .name = "ceiling",
+- .read = ti_eqep_position_ceiling_read,
+- .write = ti_eqep_position_ceiling_write,
+- },
+- {
+- .name = "enable",
+- .read = ti_eqep_position_enable_read,
+- .write = ti_eqep_position_enable_write,
+- },
++static struct counter_comp ti_eqep_position_ext[] = {
++ COUNTER_COMP_CEILING(ti_eqep_position_ceiling_read,
++ ti_eqep_position_ceiling_write),
++ COUNTER_COMP_ENABLE(ti_eqep_position_enable_read,
++ ti_eqep_position_enable_write),
+ };
+
+ static struct counter_signal ti_eqep_signals[] = {
+@@ -295,16 +311,16 @@ static struct counter_signal ti_eqep_signals[] = {
+ };
+
+ static const enum counter_function ti_eqep_position_functions[] = {
+- [TI_EQEP_COUNT_FUNC_QUAD_COUNT] = COUNTER_FUNCTION_QUADRATURE_X4,
+- [TI_EQEP_COUNT_FUNC_DIR_COUNT] = COUNTER_FUNCTION_PULSE_DIRECTION,
+- [TI_EQEP_COUNT_FUNC_UP_COUNT] = COUNTER_FUNCTION_INCREASE,
+- [TI_EQEP_COUNT_FUNC_DOWN_COUNT] = COUNTER_FUNCTION_DECREASE,
++ COUNTER_FUNCTION_QUADRATURE_X4,
++ COUNTER_FUNCTION_PULSE_DIRECTION,
++ COUNTER_FUNCTION_INCREASE,
++ COUNTER_FUNCTION_DECREASE,
+ };
+
+ static const enum counter_synapse_action ti_eqep_position_synapse_actions[] = {
+- [TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+- [TI_EQEP_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+- [TI_EQEP_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
++ COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
++ COUNTER_SYNAPSE_ACTION_RISING_EDGE,
++ COUNTER_SYNAPSE_ACTION_NONE,
+ };
+
+ static struct counter_synapse ti_eqep_position_synapses[] = {
+diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
+index d0b10baf039ab..151771129c7ba 100644
+--- a/drivers/cpufreq/amd_freq_sensitivity.c
++++ b/drivers/cpufreq/amd_freq_sensitivity.c
+@@ -124,6 +124,8 @@ static int __init amd_freq_sensitivity_init(void)
+ if (!pcidev) {
+ if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK))
+ return -ENODEV;
++ } else {
++ pci_dev_put(pcidev);
+ }
+
+ if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val))
+diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
+index c10fc33b29b18..b74289a95a171 100644
+--- a/drivers/cpufreq/armada-37xx-cpufreq.c
++++ b/drivers/cpufreq/armada-37xx-cpufreq.c
+@@ -445,7 +445,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
+ return -ENODEV;
+ }
+
+- clk = clk_get(cpu_dev, 0);
++ clk = clk_get(cpu_dev, NULL);
+ if (IS_ERR(clk)) {
+ dev_err(cpu_dev, "Cannot get clock for CPU0\n");
+ return PTR_ERR(clk);
+diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
+index 4153150e20db5..f644c5e325fb2 100644
+--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
++++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
+@@ -434,7 +434,11 @@ brcm_avs_get_freq_table(struct device *dev, struct private_data *priv)
+ if (ret)
+ return ERR_PTR(ret);
+
+- table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1, sizeof(*table),
++ /*
++ * We allocate space for the 5 different P-STATES AVS,
++ * plus extra space for a terminating element.
++ */
++ table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1 + 1, sizeof(*table),
+ GFP_KERNEL);
+ if (!table)
+ return ERR_PTR(-ENOMEM);
+diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
+index d4c27022b9c9b..e0ff09d66c96b 100644
+--- a/drivers/cpufreq/cppc_cpufreq.c
++++ b/drivers/cpufreq/cppc_cpufreq.c
+@@ -303,52 +303,48 @@ static u64 cppc_get_dmi_max_khz(void)
+
+ /*
+ * If CPPC lowest_freq and nominal_freq registers are exposed then we can
+- * use them to convert perf to freq and vice versa
+- *
+- * If the perf/freq point lies between Nominal and Lowest, we can treat
+- * (Low perf, Low freq) and (Nom Perf, Nom freq) as 2D co-ordinates of a line
+- * and extrapolate the rest
+- * For perf/freq > Nominal, we use the ratio perf:freq at Nominal for conversion
++ * use them to convert perf to freq and vice versa. The conversion is
++ * extrapolated as an affine function passing by the 2 points:
++ * - (Low perf, Low freq)
++ * - (Nominal perf, Nominal perf)
+ */
+ static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data,
+ unsigned int perf)
+ {
+ struct cppc_perf_caps *caps = &cpu_data->perf_caps;
++ s64 retval, offset = 0;
+ static u64 max_khz;
+ u64 mul, div;
+
+ if (caps->lowest_freq && caps->nominal_freq) {
+- if (perf >= caps->nominal_perf) {
+- mul = caps->nominal_freq;
+- div = caps->nominal_perf;
+- } else {
+- mul = caps->nominal_freq - caps->lowest_freq;
+- div = caps->nominal_perf - caps->lowest_perf;
+- }
++ mul = caps->nominal_freq - caps->lowest_freq;
++ div = caps->nominal_perf - caps->lowest_perf;
++ offset = caps->nominal_freq - div64_u64(caps->nominal_perf * mul, div);
+ } else {
+ if (!max_khz)
+ max_khz = cppc_get_dmi_max_khz();
+ mul = max_khz;
+ div = caps->highest_perf;
+ }
+- return (u64)perf * mul / div;
++
++ retval = offset + div64_u64(perf * mul, div);
++ if (retval >= 0)
++ return retval;
++ return 0;
+ }
+
+ static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data,
+ unsigned int freq)
+ {
+ struct cppc_perf_caps *caps = &cpu_data->perf_caps;
++ s64 retval, offset = 0;
+ static u64 max_khz;
+ u64 mul, div;
+
+ if (caps->lowest_freq && caps->nominal_freq) {
+- if (freq >= caps->nominal_freq) {
+- mul = caps->nominal_perf;
+- div = caps->nominal_freq;
+- } else {
+- mul = caps->lowest_perf;
+- div = caps->lowest_freq;
+- }
++ mul = caps->nominal_perf - caps->lowest_perf;
++ div = caps->nominal_freq - caps->lowest_freq;
++ offset = caps->nominal_perf - div64_u64(caps->nominal_freq * mul, div);
+ } else {
+ if (!max_khz)
+ max_khz = cppc_get_dmi_max_khz();
+@@ -356,7 +352,10 @@ static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data,
+ div = max_khz;
+ }
+
+- return (u64)freq * mul / div;
++ retval = offset + div64_u64(freq * mul, div);
++ if (retval >= 0)
++ return retval;
++ return 0;
+ }
+
+ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
+diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
+index ca1d103ec4492..e1b5975c7daa1 100644
+--- a/drivers/cpufreq/cpufreq-dt-platdev.c
++++ b/drivers/cpufreq/cpufreq-dt-platdev.c
+@@ -133,6 +133,7 @@ static const struct of_device_id blocklist[] __initconst = {
+ { .compatible = "nvidia,tegra30", },
+ { .compatible = "nvidia,tegra124", },
+ { .compatible = "nvidia,tegra210", },
++ { .compatible = "nvidia,tegra234", },
+
+ { .compatible = "qcom,apq8096", },
+ { .compatible = "qcom,msm8996", },
+@@ -143,6 +144,7 @@ static const struct of_device_id blocklist[] __initconst = {
+ { .compatible = "qcom,sc8180x", },
+ { .compatible = "qcom,sdm845", },
+ { .compatible = "qcom,sm6350", },
++ { .compatible = "qcom,sm6375", },
+ { .compatible = "qcom,sm8150", },
+ { .compatible = "qcom,sm8250", },
+ { .compatible = "qcom,sm8350", },
+diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
+index 5782b15a8caad..c2227be7bad88 100644
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -28,6 +28,7 @@
+ #include <linux/suspend.h>
+ #include <linux/syscore_ops.h>
+ #include <linux/tick.h>
++#include <linux/units.h>
+ #include <trace/events/power.h>
+
+ static LIST_HEAD(cpufreq_policy_list);
+@@ -449,8 +450,10 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
+ policy->cur,
+ policy->cpuinfo.max_freq);
+
++ spin_lock(&policy->transition_lock);
+ policy->transition_ongoing = false;
+ policy->transition_task = NULL;
++ spin_unlock(&policy->transition_lock);
+
+ wake_up(&policy->transition_wait);
+ }
+@@ -531,7 +534,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
+
+ target_freq = clamp_val(target_freq, policy->min, policy->max);
+
+- if (!cpufreq_driver->target_index)
++ if (!policy->freq_table)
+ return target_freq;
+
+ idx = cpufreq_frequency_table_target(policy, target_freq, relation);
+@@ -1004,10 +1007,9 @@ static struct kobj_type ktype_cpufreq = {
+ .release = cpufreq_sysfs_release,
+ };
+
+-static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
++static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu,
++ struct device *dev)
+ {
+- struct device *dev = get_cpu_device(cpu);
+-
+ if (unlikely(!dev))
+ return;
+
+@@ -1212,6 +1214,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
+ if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL))
+ goto err_free_rcpumask;
+
++ init_completion(&policy->kobj_unregister);
+ ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
+ cpufreq_global_kobject, "policy%u", cpu);
+ if (ret) {
+@@ -1250,7 +1253,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
+ init_rwsem(&policy->rwsem);
+ spin_lock_init(&policy->transition_lock);
+ init_waitqueue_head(&policy->transition_wait);
+- init_completion(&policy->kobj_unregister);
+ INIT_WORK(&policy->update, handle_update);
+
+ policy->cpu = cpu;
+@@ -1391,7 +1393,7 @@ static int cpufreq_online(unsigned int cpu)
+ if (new_policy) {
+ for_each_cpu(j, policy->related_cpus) {
+ per_cpu(cpufreq_cpu_data, j) = policy;
+- add_cpu_dev_symlink(policy, j);
++ add_cpu_dev_symlink(policy, j, get_cpu_device(j));
+ }
+
+ policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
+@@ -1403,7 +1405,7 @@ static int cpufreq_online(unsigned int cpu)
+
+ ret = freq_qos_add_request(&policy->constraints,
+ policy->min_freq_req, FREQ_QOS_MIN,
+- policy->min);
++ FREQ_QOS_MIN_DEFAULT_VALUE);
+ if (ret < 0) {
+ /*
+ * So we don't call freq_qos_remove_request() for an
+@@ -1423,7 +1425,7 @@ static int cpufreq_online(unsigned int cpu)
+
+ ret = freq_qos_add_request(&policy->constraints,
+ policy->max_freq_req, FREQ_QOS_MAX,
+- policy->max);
++ FREQ_QOS_MAX_DEFAULT_VALUE);
+ if (ret < 0) {
+ policy->max_freq_req = NULL;
+ goto out_destroy_policy;
+@@ -1565,7 +1567,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
+ /* Create sysfs link on CPU registration */
+ policy = per_cpu(cpufreq_cpu_data, cpu);
+ if (policy)
+- add_cpu_dev_symlink(policy, cpu);
++ add_cpu_dev_symlink(policy, cpu, dev);
+
+ return 0;
+ }
+@@ -1702,6 +1704,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b
+ return new_freq;
+
+ if (policy->cur != new_freq) {
++ /*
++ * For some platforms, the frequency returned by hardware may be
++ * slightly different from what is provided in the frequency
++ * table, for example hardware may return 499 MHz instead of 500
++ * MHz. In such cases it is better to avoid getting into
++ * unnecessary frequency updates.
++ */
++ if (abs(policy->cur - new_freq) < KHZ_PER_MHZ)
++ return policy->cur;
++
+ cpufreq_out_of_sync(policy, new_freq);
+ if (update)
+ schedule_work(&policy->update);
+@@ -2523,8 +2535,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
+ if (ret)
+ return ret;
+
++ /*
++ * Resolve policy min/max to available frequencies. It ensures
++ * no frequency resolution will neither overshoot the requested maximum
++ * nor undershoot the requested minimum.
++ */
+ policy->min = new_data.min;
+ policy->max = new_data.max;
++ policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L);
++ policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
+ trace_cpu_frequency_limits(policy);
+
+ policy->cached_target_freq = UINT_MAX;
+diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
+index 63f7c219062b9..55c80319d2684 100644
+--- a/drivers/cpufreq/cpufreq_governor.c
++++ b/drivers/cpufreq/cpufreq_governor.c
+@@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
+ gov->free(policy_dbs);
+ }
+
++static void cpufreq_dbs_data_release(struct kobject *kobj)
++{
++ struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj));
++ struct dbs_governor *gov = dbs_data->gov;
++
++ gov->exit(dbs_data);
++ kfree(dbs_data);
++}
++
+ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
+ {
+ struct dbs_governor *gov = dbs_governor_of(policy);
+@@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
+ goto free_policy_dbs_info;
+ }
+
++ dbs_data->gov = gov;
+ gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
+
+ ret = gov->init(dbs_data);
+@@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
+ policy->governor_data = policy_dbs;
+
+ gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
++ gov->kobj_type.release = cpufreq_dbs_data_release;
+ ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
+ get_governor_parent_kobj(policy),
+ "%s", gov->gov.name);
+@@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy)
+
+ policy->governor_data = NULL;
+
+- if (!count) {
+- if (!have_governor_per_policy())
+- gov->gdbs_data = NULL;
+-
+- gov->exit(dbs_data);
+- kfree(dbs_data);
+- }
++ if (!count && !have_governor_per_policy())
++ gov->gdbs_data = NULL;
+
+ free_policy_dbs_info(policy_dbs, gov);
+
+diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
+index bab8e61403771..a6de26318abb8 100644
+--- a/drivers/cpufreq/cpufreq_governor.h
++++ b/drivers/cpufreq/cpufreq_governor.h
+@@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
+ /* Governor demand based switching data (per-policy or global). */
+ struct dbs_data {
+ struct gov_attr_set attr_set;
++ struct dbs_governor *gov;
+ void *tuners;
+ unsigned int ignore_nice_load;
+ unsigned int sampling_rate;
+diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
+index a6f365b9cc1ad..771770ea0ed0b 100644
+--- a/drivers/cpufreq/cpufreq_governor_attr_set.c
++++ b/drivers/cpufreq/cpufreq_governor_attr_set.c
+@@ -8,11 +8,6 @@
+
+ #include "cpufreq_governor.h"
+
+-static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+-{
+- return container_of(kobj, struct gov_attr_set, kobj);
+-}
+-
+ static inline struct governor_attr *to_gov_attr(struct attribute *attr)
+ {
+ return container_of(attr, struct governor_attr, attr);
+diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
+index 8c176b7dae415..736cb2cfcbb08 100644
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -27,6 +27,7 @@
+ #include <linux/pm_qos.h>
+ #include <trace/events/power.h>
+
++#include <asm/cpu.h>
+ #include <asm/div64.h>
+ #include <asm/msr.h>
+ #include <asm/cpu_device_id.h>
+@@ -277,10 +278,10 @@ static struct cpudata **all_cpu_data;
+ * structure is used to store those callbacks.
+ */
+ struct pstate_funcs {
+- int (*get_max)(void);
+- int (*get_max_physical)(void);
+- int (*get_min)(void);
+- int (*get_turbo)(void);
++ int (*get_max)(int cpu);
++ int (*get_max_physical)(int cpu);
++ int (*get_min)(int cpu);
++ int (*get_turbo)(int cpu);
+ int (*get_scaling)(void);
+ int (*get_cpu_scaling)(int cpu);
+ int (*get_aperf_mperf_shift)(void);
+@@ -335,6 +336,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
+
+ static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
+
++#define CPPC_MAX_PERF U8_MAX
++
+ static void intel_pstate_set_itmt_prio(int cpu)
+ {
+ struct cppc_perf_caps cppc_perf;
+@@ -345,6 +348,14 @@ static void intel_pstate_set_itmt_prio(int cpu)
+ if (ret)
+ return;
+
++ /*
++ * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
++ * In this case we can't use CPPC.highest_perf to enable ITMT.
++ * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
++ */
++ if (cppc_perf.highest_perf == CPPC_MAX_PERF)
++ cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
++
+ /*
+ * The priorities can be set regardless of whether or not
+ * sched_set_itmt_support(true) has been called and it is valid to
+@@ -385,16 +396,6 @@ static int intel_pstate_get_cppc_guaranteed(int cpu)
+
+ return cppc_perf.nominal_perf;
+ }
+-
+-static u32 intel_pstate_cppc_nominal(int cpu)
+-{
+- u64 nominal_perf;
+-
+- if (cppc_get_nominal_perf(cpu, &nominal_perf))
+- return 0;
+-
+- return nominal_perf;
+-}
+ #else /* CONFIG_ACPI_CPPC_LIB */
+ static inline void intel_pstate_set_itmt_prio(int cpu)
+ {
+@@ -447,20 +448,6 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+ (u32) cpu->acpi_perf_data.states[i].control);
+ }
+
+- /*
+- * The _PSS table doesn't contain whole turbo frequency range.
+- * This just contains +1 MHZ above the max non turbo frequency,
+- * with control value corresponding to max turbo ratio. But
+- * when cpufreq set policy is called, it will call with this
+- * max frequency, which will cause a reduced performance as
+- * this driver uses real max turbo frequency as the max
+- * frequency. So correct this frequency in _PSS table to
+- * correct max turbo frequency based on the turbo state.
+- * Also need to convert to MHz as _PSS freq is in MHz.
+- */
+- if (!global.turbo_disabled)
+- cpu->acpi_perf_data.states[0].core_frequency =
+- policy->cpuinfo.max_freq / 1000;
+ cpu->valid_pss_table = true;
+ pr_debug("_PPC limits will be enforced\n");
+
+@@ -518,34 +505,18 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
+ {
+ int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
+ int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
+- int perf_ctl_turbo = pstate_funcs.get_turbo();
+- int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
++ int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu);
+ int scaling = cpu->pstate.scaling;
+
+ pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
+- pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max());
+ pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
+ pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
+ pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
+ pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
+ pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
+
+- /*
+- * If the product of the HWP performance scaling factor and the HWP_CAP
+- * highest performance is greater than the maximum turbo frequency
+- * corresponding to the pstate_funcs.get_turbo() return value, the
+- * scaling factor is too high, so recompute it to make the HWP_CAP
+- * highest performance correspond to the maximum turbo frequency.
+- */
+- if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
+- cpu->pstate.turbo_freq = turbo_freq;
+- scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
+- cpu->pstate.scaling = scaling;
+-
+- pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n",
+- cpu->cpu, scaling);
+- }
+-
++ cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_pstate * scaling,
++ perf_ctl_scaling);
+ cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
+ perf_ctl_scaling);
+
+@@ -839,6 +810,8 @@ static ssize_t store_energy_performance_preference(
+ err = cpufreq_start_governor(policy);
+ if (!ret)
+ ret = err;
++ } else {
++ ret = 0;
+ }
+ }
+
+@@ -998,9 +971,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu)
+ */
+ value &= ~GENMASK_ULL(31, 24);
+ value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
+- WRITE_ONCE(cpu->hwp_req_cached, value);
++ /*
++ * However, make sure that EPP will be set to "performance" when
++ * the CPU is brought back online again and the "performance"
++ * scaling algorithm is still in effect.
++ */
++ cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN;
+ }
+
++ /*
++ * Clear the desired perf field in the cached HWP request value to
++ * prevent nonzero desired values from being leaked into the active
++ * mode.
++ */
++ value &= ~HWP_DESIRED_PERF(~0L);
++ WRITE_ONCE(cpu->hwp_req_cached, value);
++
+ value &= ~GENMASK_ULL(31, 0);
+ min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
+
+@@ -1557,7 +1543,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
+ cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+ }
+
+-static int atom_get_min_pstate(void)
++static int atom_get_min_pstate(int not_used)
+ {
+ u64 value;
+
+@@ -1565,7 +1551,7 @@ static int atom_get_min_pstate(void)
+ return (value >> 8) & 0x7F;
+ }
+
+-static int atom_get_max_pstate(void)
++static int atom_get_max_pstate(int not_used)
+ {
+ u64 value;
+
+@@ -1573,7 +1559,7 @@ static int atom_get_max_pstate(void)
+ return (value >> 16) & 0x7F;
+ }
+
+-static int atom_get_turbo_pstate(void)
++static int atom_get_turbo_pstate(int not_used)
+ {
+ u64 value;
+
+@@ -1651,23 +1637,23 @@ static void atom_get_vid(struct cpudata *cpudata)
+ cpudata->vid.turbo = value & 0x7f;
+ }
+
+-static int core_get_min_pstate(void)
++static int core_get_min_pstate(int cpu)
+ {
+ u64 value;
+
+- rdmsrl(MSR_PLATFORM_INFO, value);
++ rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+ return (value >> 40) & 0xFF;
+ }
+
+-static int core_get_max_pstate_physical(void)
++static int core_get_max_pstate_physical(int cpu)
+ {
+ u64 value;
+
+- rdmsrl(MSR_PLATFORM_INFO, value);
++ rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+ return (value >> 8) & 0xFF;
+ }
+
+-static int core_get_tdp_ratio(u64 plat_info)
++static int core_get_tdp_ratio(int cpu, u64 plat_info)
+ {
+ /* Check how many TDP levels present */
+ if (plat_info & 0x600000000) {
+@@ -1677,13 +1663,13 @@ static int core_get_tdp_ratio(u64 plat_info)
+ int err;
+
+ /* Get the TDP level (0, 1, 2) to get ratios */
+- err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
++ err = rdmsrl_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+ if (err)
+ return err;
+
+ /* TDP MSR are continuous starting at 0x648 */
+ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
+- err = rdmsrl_safe(tdp_msr, &tdp_ratio);
++ err = rdmsrl_safe_on_cpu(cpu, tdp_msr, &tdp_ratio);
+ if (err)
+ return err;
+
+@@ -1700,7 +1686,7 @@ static int core_get_tdp_ratio(u64 plat_info)
+ return -ENXIO;
+ }
+
+-static int core_get_max_pstate(void)
++static int core_get_max_pstate(int cpu)
+ {
+ u64 tar;
+ u64 plat_info;
+@@ -1708,10 +1694,10 @@ static int core_get_max_pstate(void)
+ int tdp_ratio;
+ int err;
+
+- rdmsrl(MSR_PLATFORM_INFO, plat_info);
++ rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
+ max_pstate = (plat_info >> 8) & 0xFF;
+
+- tdp_ratio = core_get_tdp_ratio(plat_info);
++ tdp_ratio = core_get_tdp_ratio(cpu, plat_info);
+ if (tdp_ratio <= 0)
+ return max_pstate;
+
+@@ -1720,7 +1706,7 @@ static int core_get_max_pstate(void)
+ return tdp_ratio;
+ }
+
+- err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
++ err = rdmsrl_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar);
+ if (!err) {
+ int tar_levels;
+
+@@ -1735,13 +1721,13 @@ static int core_get_max_pstate(void)
+ return max_pstate;
+ }
+
+-static int core_get_turbo_pstate(void)
++static int core_get_turbo_pstate(int cpu)
+ {
+ u64 value;
+ int nont, ret;
+
+- rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
+- nont = core_get_max_pstate();
++ rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
++ nont = core_get_max_pstate(cpu);
+ ret = (value) & 255;
+ if (ret <= nont)
+ ret = nont;
+@@ -1769,50 +1755,37 @@ static int knl_get_aperf_mperf_shift(void)
+ return 10;
+ }
+
+-static int knl_get_turbo_pstate(void)
++static int knl_get_turbo_pstate(int cpu)
+ {
+ u64 value;
+ int nont, ret;
+
+- rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
+- nont = core_get_max_pstate();
++ rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
++ nont = core_get_max_pstate(cpu);
+ ret = (((value) >> 8) & 0xFF);
+ if (ret <= nont)
+ ret = nont;
+ return ret;
+ }
+
+-#ifdef CONFIG_ACPI_CPPC_LIB
+-static u32 hybrid_ref_perf;
+-
+-static int hybrid_get_cpu_scaling(int cpu)
++static void hybrid_get_type(void *data)
+ {
+- return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf,
+- intel_pstate_cppc_nominal(cpu));
++ u8 *cpu_type = data;
++
++ *cpu_type = get_this_hybrid_cpu_type();
+ }
+
+-static void intel_pstate_cppc_set_cpu_scaling(void)
++static int hybrid_get_cpu_scaling(int cpu)
+ {
+- u32 min_nominal_perf = U32_MAX;
+- int cpu;
+-
+- for_each_present_cpu(cpu) {
+- u32 nominal_perf = intel_pstate_cppc_nominal(cpu);
++ u8 cpu_type = 0;
+
+- if (nominal_perf && nominal_perf < min_nominal_perf)
+- min_nominal_perf = nominal_perf;
+- }
++ smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1);
++ /* P-cores have a smaller perf level-to-freqency scaling factor. */
++ if (cpu_type == 0x40)
++ return 78741;
+
+- if (min_nominal_perf < U32_MAX) {
+- hybrid_ref_perf = min_nominal_perf;
+- pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
+- }
+-}
+-#else
+-static inline void intel_pstate_cppc_set_cpu_scaling(void)
+-{
++ return core_get_scaling();
+ }
+-#endif /* CONFIG_ACPI_CPPC_LIB */
+
+ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
+ {
+@@ -1842,10 +1815,10 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
+
+ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
+ {
+- int perf_ctl_max_phys = pstate_funcs.get_max_physical();
++ int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu);
+ int perf_ctl_scaling = pstate_funcs.get_scaling();
+
+- cpu->pstate.min_pstate = pstate_funcs.get_min();
++ cpu->pstate.min_pstate = pstate_funcs.get_min(cpu->cpu);
+ cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
+ cpu->pstate.perf_ctl_scaling = perf_ctl_scaling;
+
+@@ -1861,8 +1834,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
+ }
+ } else {
+ cpu->pstate.scaling = perf_ctl_scaling;
+- cpu->pstate.max_pstate = pstate_funcs.get_max();
+- cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
++ cpu->pstate.max_pstate = pstate_funcs.get_max(cpu->cpu);
++ cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(cpu->cpu);
+ }
+
+ if (cpu->pstate.scaling == perf_ctl_scaling) {
+@@ -2233,6 +2206,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
+ X86_MATCH(SKYLAKE_X, core_funcs),
+ X86_MATCH(COMETLAKE, core_funcs),
+ X86_MATCH(ICELAKE_X, core_funcs),
++ X86_MATCH(TIGERLAKE, core_funcs),
+ {}
+ };
+ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
+@@ -2241,6 +2215,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
+ X86_MATCH(BROADWELL_D, core_funcs),
+ X86_MATCH(BROADWELL_X, core_funcs),
+ X86_MATCH(SKYLAKE_X, core_funcs),
++ X86_MATCH(ICELAKE_X, core_funcs),
+ {}
+ };
+
+@@ -2902,6 +2877,27 @@ static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+ return intel_pstate_cpu_exit(policy);
+ }
+
++static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
++{
++ intel_pstate_suspend(policy);
++
++ if (hwp_active) {
++ struct cpudata *cpu = all_cpu_data[policy->cpu];
++ u64 value = READ_ONCE(cpu->hwp_req_cached);
++
++ /*
++ * Clear the desired perf field in MSR_HWP_REQUEST in case
++ * intel_cpufreq_adjust_perf() is in use and the last value
++ * written by it may not be suitable.
++ */
++ value &= ~HWP_DESIRED_PERF(~0L);
++ wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
++ WRITE_ONCE(cpu->hwp_req_cached, value);
++ }
++
++ return 0;
++}
++
+ static struct cpufreq_driver intel_cpufreq = {
+ .flags = CPUFREQ_CONST_LOOPS,
+ .verify = intel_cpufreq_verify_policy,
+@@ -2911,7 +2907,7 @@ static struct cpufreq_driver intel_cpufreq = {
+ .exit = intel_cpufreq_cpu_exit,
+ .offline = intel_cpufreq_cpu_offline,
+ .online = intel_pstate_cpu_online,
+- .suspend = intel_pstate_suspend,
++ .suspend = intel_cpufreq_suspend,
+ .resume = intel_pstate_resume,
+ .update_limits = intel_pstate_update_limits,
+ .name = "intel_cpufreq",
+@@ -3016,9 +3012,9 @@ static unsigned int force_load __initdata;
+
+ static int __init intel_pstate_msrs_not_valid(void)
+ {
+- if (!pstate_funcs.get_max() ||
+- !pstate_funcs.get_min() ||
+- !pstate_funcs.get_turbo())
++ if (!pstate_funcs.get_max(0) ||
++ !pstate_funcs.get_min(0) ||
++ !pstate_funcs.get_turbo(0))
+ return -ENODEV;
+
+ return 0;
+@@ -3234,7 +3230,7 @@ static int __init intel_pstate_init(void)
+ default_driver = &intel_pstate;
+
+ if (boot_cpu_has(X86_FEATURE_HYBRID_CPU))
+- intel_pstate_cppc_set_cpu_scaling();
++ pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
+
+ goto hwp_cpu_matched;
+ }
+diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
+index 866163883b48d..bfe240c726e34 100644
+--- a/drivers/cpufreq/mediatek-cpufreq.c
++++ b/drivers/cpufreq/mediatek-cpufreq.c
+@@ -44,6 +44,8 @@ struct mtk_cpu_dvfs_info {
+ bool need_voltage_tracking;
+ };
+
++static struct platform_device *cpufreq_pdev;
++
+ static LIST_HEAD(dvfs_info_list);
+
+ static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu)
+@@ -547,7 +549,6 @@ static int __init mtk_cpufreq_driver_init(void)
+ {
+ struct device_node *np;
+ const struct of_device_id *match;
+- struct platform_device *pdev;
+ int err;
+
+ np = of_find_node_by_path("/");
+@@ -571,16 +572,23 @@ static int __init mtk_cpufreq_driver_init(void)
+ * and the device registration codes are put here to handle defer
+ * probing.
+ */
+- pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0);
+- if (IS_ERR(pdev)) {
++ cpufreq_pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0);
++ if (IS_ERR(cpufreq_pdev)) {
+ pr_err("failed to register mtk-cpufreq platform device\n");
+ platform_driver_unregister(&mtk_cpufreq_platdrv);
+- return PTR_ERR(pdev);
++ return PTR_ERR(cpufreq_pdev);
+ }
+
+ return 0;
+ }
+-device_initcall(mtk_cpufreq_driver_init);
++module_init(mtk_cpufreq_driver_init)
++
++static void __exit mtk_cpufreq_driver_exit(void)
++{
++ platform_device_unregister(cpufreq_pdev);
++ platform_driver_unregister(&mtk_cpufreq_platdrv);
++}
++module_exit(mtk_cpufreq_driver_exit)
+
+ MODULE_DESCRIPTION("MediaTek CPUFreq driver");
+ MODULE_AUTHOR("Pi-Cheng Chen <pi-cheng.chen@linaro.org>");
+diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
+index 4f20c6a9108df..8e41fe9ee870d 100644
+--- a/drivers/cpufreq/pmac32-cpufreq.c
++++ b/drivers/cpufreq/pmac32-cpufreq.c
+@@ -470,6 +470,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
+ if (slew_done_gpio_np)
+ slew_done_gpio = read_gpio(slew_done_gpio_np);
+
++ of_node_put(volt_gpio_np);
++ of_node_put(freq_gpio_np);
++ of_node_put(slew_done_gpio_np);
++
+ /* If we use the frequency GPIOs, calculate the min/max speeds based
+ * on the bus frequencies
+ */
+diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
+index 12ab4014af712..94fe0e15623e4 100644
+--- a/drivers/cpufreq/powernow-k8.c
++++ b/drivers/cpufreq/powernow-k8.c
+@@ -1101,7 +1101,8 @@ static int powernowk8_cpu_exit(struct cpufreq_policy *pol)
+
+ kfree(data->powernow_table);
+ kfree(data);
+- for_each_cpu(cpu, pol->cpus)
++ /* pol->cpus will be empty here, use related_cpus instead. */
++ for_each_cpu(cpu, pol->related_cpus)
+ per_cpu(powernow_data, cpu) = NULL;
+
+ return 0;
+diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
+index a2be0df7e1747..bbcba2c38e853 100644
+--- a/drivers/cpufreq/qcom-cpufreq-hw.c
++++ b/drivers/cpufreq/qcom-cpufreq-hw.c
+@@ -24,12 +24,16 @@
+ #define CLK_HW_DIV 2
+ #define LUT_TURBO_IND 1
+
++#define GT_IRQ_STATUS BIT(2)
++
+ #define HZ_PER_KHZ 1000
+
+ struct qcom_cpufreq_soc_data {
+ u32 reg_enable;
++ u32 reg_domain_state;
+ u32 reg_freq_lut;
+ u32 reg_volt_lut;
++ u32 reg_intr_clr;
+ u32 reg_current_vote;
+ u32 reg_perf_state;
+ u8 lut_row_size;
+@@ -173,6 +177,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
+ }
+ } else if (ret != -ENODEV) {
+ dev_err(cpu_dev, "Invalid opp table in device tree\n");
++ kfree(table);
+ return ret;
+ } else {
+ policy->fast_switch_possible = true;
+@@ -266,28 +271,31 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
+ }
+ }
+
+-static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
++static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
+ {
+- unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
++ unsigned int lval;
++
++ if (data->soc_data->reg_current_vote)
++ lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff;
++ else
++ lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff;
+
+- return (val & 0x3FF) * 19200;
++ return lval * xo_rate;
+ }
+
+ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
+ {
+ unsigned long max_capacity, capacity, freq_hz, throttled_freq;
+ struct cpufreq_policy *policy = data->policy;
+- int cpu = cpumask_first(policy->cpus);
++ int cpu = cpumask_first(policy->related_cpus);
+ struct device *dev = get_cpu_device(cpu);
+ struct dev_pm_opp *opp;
+- unsigned int freq;
+
+ /*
+ * Get the h/w throttled frequency, normalize it using the
+ * registered opp table and use it to calculate thermal pressure.
+ */
+- freq = qcom_lmh_get_throttle_freq(data);
+- freq_hz = freq * HZ_PER_KHZ;
++ freq_hz = qcom_lmh_get_throttle_freq(data);
+
+ opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
+ if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
+@@ -304,7 +312,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
+ if (capacity > max_capacity)
+ capacity = max_capacity;
+
+- arch_set_thermal_pressure(policy->cpus, max_capacity - capacity);
++ arch_set_thermal_pressure(policy->related_cpus,
++ max_capacity - capacity);
+
+ /*
+ * In the unlikely case policy is unregistered do not enable
+@@ -342,9 +351,13 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
+
+ /* Disable interrupt and enable polling */
+ disable_irq_nosync(c_data->throttle_irq);
+- qcom_lmh_dcvs_notify(c_data);
++ schedule_delayed_work(&c_data->throttle_work, 0);
+
+- return 0;
++ if (c_data->soc_data->reg_intr_clr)
++ writel_relaxed(GT_IRQ_STATUS,
++ c_data->base + c_data->soc_data->reg_intr_clr);
++
++ return IRQ_HANDLED;
+ }
+
+ static const struct qcom_cpufreq_soc_data qcom_soc_data = {
+@@ -358,8 +371,10 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = {
+
+ static const struct qcom_cpufreq_soc_data epss_soc_data = {
+ .reg_enable = 0x0,
++ .reg_domain_state = 0x20,
+ .reg_freq_lut = 0x100,
+ .reg_volt_lut = 0x200,
++ .reg_intr_clr = 0x308,
+ .reg_perf_state = 0x320,
+ .lut_row_size = 4,
+ };
+diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c
+index d1744b5d96190..6e011e8bfb6a9 100644
+--- a/drivers/cpufreq/qcom-cpufreq-nvmem.c
++++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c
+@@ -130,7 +130,7 @@ static void get_krait_bin_format_b(struct device *cpu_dev,
+ }
+
+ /* Check PVS_BLOW_STATUS */
+- pte_efuse = *(((u32 *)buf) + 4);
++ pte_efuse = *(((u32 *)buf) + 1);
+ pte_efuse &= BIT(21);
+ if (pte_efuse) {
+ dev_dbg(cpu_dev, "PVS bin: %d\n", *pvs);
+@@ -215,6 +215,7 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev,
+ int speed = 0, pvs = 0, pvs_ver = 0;
+ u8 *speedbin;
+ size_t len;
++ int ret = 0;
+
+ speedbin = nvmem_cell_read(speedbin_nvmem, &len);
+
+@@ -232,7 +233,8 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev,
+ break;
+ default:
+ dev_err(cpu_dev, "Unable to read nvmem data. Defaulting to 0!\n");
+- return -ENODEV;
++ ret = -ENODEV;
++ goto len_error;
+ }
+
+ snprintf(*pvs_name, sizeof("speedXX-pvsXX-vXX"), "speed%d-pvs%d-v%d",
+@@ -240,8 +242,9 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev,
+
+ drv->versions = (1 << speed);
+
++len_error:
+ kfree(speedbin);
+- return 0;
++ return ret;
+ }
+
+ static const struct qcom_cpufreq_match_data match_data_kryo = {
+@@ -264,7 +267,8 @@ static int qcom_cpufreq_probe(struct platform_device *pdev)
+ struct nvmem_cell *speedbin_nvmem;
+ struct device_node *np;
+ struct device *cpu_dev;
+- char *pvs_name = "speedXX-pvsXX-vXX";
++ char pvs_name_buffer[] = "speedXX-pvsXX-vXX";
++ char *pvs_name = pvs_name_buffer;
+ unsigned cpu;
+ const struct of_device_id *match;
+ int ret;
+diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
+index 6b6b20da2bcfc..573b417e14833 100644
+--- a/drivers/cpufreq/qoriq-cpufreq.c
++++ b/drivers/cpufreq/qoriq-cpufreq.c
+@@ -275,6 +275,7 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev)
+
+ np = of_find_matching_node(NULL, qoriq_cpufreq_blacklist);
+ if (np) {
++ of_node_put(np);
+ dev_info(&pdev->dev, "Disabling due to erratum A-008083");
+ return -ENODEV;
+ }
+diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+index 2deed8d8773fa..75e1bf3a08f7c 100644
+--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c
++++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+@@ -98,8 +98,10 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ ret = sun50i_cpufreq_get_efuse(&speed);
+- if (ret)
++ if (ret) {
++ kfree(opp_tables);
+ return ret;
++ }
+
+ snprintf(name, MAX_NAME_LEN, "speed%d", speed);
+
+diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
+index ff2c3f8e4668a..ce5c415fb04d9 100644
+--- a/drivers/cpuidle/cpuidle-psci-domain.c
++++ b/drivers/cpuidle/cpuidle-psci-domain.c
+@@ -182,7 +182,8 @@ static void psci_pd_remove(void)
+ struct psci_pd_provider *pd_provider, *it;
+ struct generic_pm_domain *genpd;
+
+- list_for_each_entry_safe(pd_provider, it, &psci_pd_providers, link) {
++ list_for_each_entry_safe_reverse(pd_provider, it,
++ &psci_pd_providers, link) {
+ of_genpd_del_provider(pd_provider->node);
+
+ genpd = of_genpd_remove_last(pd_provider->node);
+diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
+index b51b5df084500..540105ca0781f 100644
+--- a/drivers/cpuidle/cpuidle-psci.c
++++ b/drivers/cpuidle/cpuidle-psci.c
+@@ -23,6 +23,7 @@
+ #include <linux/pm_runtime.h>
+ #include <linux/slab.h>
+ #include <linux/string.h>
++#include <linux/syscore_ops.h>
+
+ #include <asm/cpuidle.h>
+
+@@ -131,6 +132,49 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
+ return 0;
+ }
+
++static void psci_idle_syscore_switch(bool suspend)
++{
++ bool cleared = false;
++ struct device *dev;
++ int cpu;
++
++ for_each_possible_cpu(cpu) {
++ dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev;
++
++ if (dev && suspend) {
++ dev_pm_genpd_suspend(dev);
++ } else if (dev) {
++ dev_pm_genpd_resume(dev);
++
++ /* Account for userspace having offlined a CPU. */
++ if (pm_runtime_status_suspended(dev))
++ pm_runtime_set_active(dev);
++
++ /* Clear domain state to re-start fresh. */
++ if (!cleared) {
++ psci_set_domain_state(0);
++ cleared = true;
++ }
++ }
++ }
++}
++
++static int psci_idle_syscore_suspend(void)
++{
++ psci_idle_syscore_switch(true);
++ return 0;
++}
++
++static void psci_idle_syscore_resume(void)
++{
++ psci_idle_syscore_switch(false);
++}
++
++static struct syscore_ops psci_idle_syscore_ops = {
++ .suspend = psci_idle_syscore_suspend,
++ .resume = psci_idle_syscore_resume,
++};
++
+ static void psci_idle_init_cpuhp(void)
+ {
+ int err;
+@@ -138,6 +182,8 @@ static void psci_idle_init_cpuhp(void)
+ if (!psci_cpuidle_use_cpuhp)
+ return;
+
++ register_syscore_ops(&psci_idle_syscore_ops);
++
+ err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
+ "cpuidle/psci:online",
+ psci_idle_cpuhp_up,
+diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
+index 7e7ab5597d7ac..0590001db6532 100644
+--- a/drivers/cpuidle/cpuidle-pseries.c
++++ b/drivers/cpuidle/cpuidle-pseries.c
+@@ -410,13 +410,7 @@ static int __init pseries_idle_probe(void)
+ return -ENODEV;
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+- /*
+- * Use local_paca instead of get_lppaca() since
+- * preemption is not disabled, and it is not required in
+- * fact, since lppaca_ptr does not need to be the value
+- * associated to the current CPU, it can be from any CPU.
+- */
+- if (lppaca_shared_proc(local_paca->lppaca_ptr)) {
++ if (lppaca_shared_proc()) {
+ cpuidle_state_table = shared_states;
+ max_idle_state = ARRAY_SIZE(shared_states);
+ } else {
+diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
+index 252f2a9686a62..448bc796b0b40 100644
+--- a/drivers/cpuidle/dt_idle_states.c
++++ b/drivers/cpuidle/dt_idle_states.c
+@@ -223,6 +223,6 @@ int dt_init_idle_driver(struct cpuidle_driver *drv,
+ * also be 0 on platforms with missing DT idle states or legacy DT
+ * configuration predating the DT idle states bindings.
+ */
+- return i;
++ return state_idx - start_idx;
+ }
+ EXPORT_SYMBOL_GPL(dt_init_idle_driver);
+diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
+index 53ec9585ccd44..469e18547d06c 100644
+--- a/drivers/cpuidle/sysfs.c
++++ b/drivers/cpuidle/sysfs.c
+@@ -488,6 +488,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
+ &kdev->kobj, "state%d", i);
+ if (ret) {
+ kobject_put(&kobj->kobj);
++ kfree(kobj);
+ goto error_state;
+ }
+ cpuidle_add_s2idle_attr_group(kobj);
+@@ -619,6 +620,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
+ &kdev->kobj, "driver");
+ if (ret) {
+ kobject_put(&kdrv->kobj);
++ kfree(kdrv);
+ return ret;
+ }
+
+@@ -705,7 +707,6 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
+ if (!kdev)
+ return -ENOMEM;
+ kdev->dev = dev;
+- dev->kobj_dev = kdev;
+
+ init_completion(&kdev->kobj_unregister);
+
+@@ -713,9 +714,11 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
+ "cpuidle");
+ if (error) {
+ kobject_put(&kdev->kobj);
++ kfree(kdev);
+ return error;
+ }
+
++ dev->kobj_dev = kdev;
+ kobject_uevent(&kdev->kobj, KOBJ_ADD);
+
+ return 0;
+diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
+index 51690e73153ad..a40883e118424 100644
+--- a/drivers/crypto/Kconfig
++++ b/drivers/crypto/Kconfig
+@@ -772,7 +772,12 @@ config CRYPTO_DEV_IMGTEC_HASH
+ config CRYPTO_DEV_ROCKCHIP
+ tristate "Rockchip's Cryptographic Engine driver"
+ depends on OF && ARCH_ROCKCHIP
++ depends on PM
++ select CRYPTO_ECB
++ select CRYPTO_CBC
++ select CRYPTO_DES
+ select CRYPTO_AES
++ select CRYPTO_ENGINE
+ select CRYPTO_LIB_DES
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+@@ -900,6 +905,7 @@ config CRYPTO_DEV_SA2UL
+ select CRYPTO_AES_ARM64
+ select CRYPTO_ALGAPI
+ select CRYPTO_AUTHENC
++ select CRYPTO_DES
+ select CRYPTO_SHA1
+ select CRYPTO_SHA256
+ select CRYPTO_SHA512
+diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+index 54ae8d16e4931..35e3cadccac2b 100644
+--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
++++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+@@ -11,6 +11,7 @@
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ */
+
++#include <linux/bottom_half.h>
+ #include <linux/crypto.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/io.h>
+@@ -283,7 +284,9 @@ static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
+
+ flow = rctx->flow;
+ err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm));
++ local_bh_disable();
+ crypto_finalize_skcipher_request(engine, breq, err);
++ local_bh_enable();
+ return 0;
+ }
+
+diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+index 88194718a806c..859b7522faaac 100644
+--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
++++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+@@ -9,6 +9,7 @@
+ *
+ * You could find the datasheet in Documentation/arm/sunxi.rst
+ */
++#include <linux/bottom_half.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/scatterlist.h>
+@@ -414,6 +415,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
+ theend:
+ kfree(buf);
+ kfree(result);
++ local_bh_disable();
+ crypto_finalize_hash_request(engine, breq, err);
++ local_bh_enable();
+ return 0;
+ }
+diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+index 9ef1c85c4aaa5..0cc8cafdde27c 100644
+--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
++++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+@@ -11,6 +11,7 @@
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ */
+
++#include <linux/bottom_half.h>
+ #include <linux/crypto.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/io.h>
+@@ -92,6 +93,69 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq)
+ return err;
+ }
+
++static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
++{
++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
++ struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
++ struct sun8i_ss_dev *ss = op->ss;
++ struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
++ struct scatterlist *sg = areq->src;
++ unsigned int todo, offset;
++ unsigned int len = areq->cryptlen;
++ unsigned int ivsize = crypto_skcipher_ivsize(tfm);
++ struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
++ int i = 0;
++ dma_addr_t a;
++ int err;
++
++ rctx->ivlen = ivsize;
++ if (rctx->op_dir & SS_DECRYPTION) {
++ offset = areq->cryptlen - ivsize;
++ scatterwalk_map_and_copy(sf->biv, areq->src, offset,
++ ivsize, 0);
++ }
++
++ /* we need to copy all IVs from source in case DMA is bi-directionnal */
++ while (sg && len) {
++ if (sg_dma_len(sg) == 0) {
++ sg = sg_next(sg);
++ continue;
++ }
++ if (i == 0)
++ memcpy(sf->iv[0], areq->iv, ivsize);
++ a = dma_map_single(ss->dev, sf->iv[i], ivsize, DMA_TO_DEVICE);
++ if (dma_mapping_error(ss->dev, a)) {
++ memzero_explicit(sf->iv[i], ivsize);
++ dev_err(ss->dev, "Cannot DMA MAP IV\n");
++ err = -EFAULT;
++ goto dma_iv_error;
++ }
++ rctx->p_iv[i] = a;
++ /* we need to setup all others IVs only in the decrypt way */
++ if (rctx->op_dir == SS_ENCRYPTION)
++ return 0;
++ todo = min(len, sg_dma_len(sg));
++ len -= todo;
++ i++;
++ if (i < MAX_SG) {
++ offset = sg->length - ivsize;
++ scatterwalk_map_and_copy(sf->iv[i], sg, offset, ivsize, 0);
++ }
++ rctx->niv = i;
++ sg = sg_next(sg);
++ }
++
++ return 0;
++dma_iv_error:
++ i--;
++ while (i >= 0) {
++ dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
++ memzero_explicit(sf->iv[i], ivsize);
++ i--;
++ }
++ return err;
++}
++
+ static int sun8i_ss_cipher(struct skcipher_request *areq)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+@@ -100,9 +164,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
+ struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct sun8i_ss_alg_template *algt;
++ struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
+ struct scatterlist *sg;
+ unsigned int todo, len, offset, ivsize;
+- void *backup_iv = NULL;
+ int nr_sgs = 0;
+ int nr_sgd = 0;
+ int err = 0;
+@@ -133,30 +197,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
+
+ ivsize = crypto_skcipher_ivsize(tfm);
+ if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
+- rctx->ivlen = ivsize;
+- rctx->biv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA);
+- if (!rctx->biv) {
+- err = -ENOMEM;
++ err = sun8i_ss_setup_ivs(areq);
++ if (err)
+ goto theend_key;
+- }
+- if (rctx->op_dir & SS_DECRYPTION) {
+- backup_iv = kzalloc(ivsize, GFP_KERNEL);
+- if (!backup_iv) {
+- err = -ENOMEM;
+- goto theend_key;
+- }
+- offset = areq->cryptlen - ivsize;
+- scatterwalk_map_and_copy(backup_iv, areq->src, offset,
+- ivsize, 0);
+- }
+- memcpy(rctx->biv, areq->iv, ivsize);
+- rctx->p_iv = dma_map_single(ss->dev, rctx->biv, rctx->ivlen,
+- DMA_TO_DEVICE);
+- if (dma_mapping_error(ss->dev, rctx->p_iv)) {
+- dev_err(ss->dev, "Cannot DMA MAP IV\n");
+- err = -ENOMEM;
+- goto theend_iv;
+- }
+ }
+ if (areq->src == areq->dst) {
+ nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src),
+@@ -242,21 +285,19 @@ theend_sgs:
+ }
+
+ theend_iv:
+- if (rctx->p_iv)
+- dma_unmap_single(ss->dev, rctx->p_iv, rctx->ivlen,
+- DMA_TO_DEVICE);
+-
+ if (areq->iv && ivsize > 0) {
+- if (rctx->biv) {
+- offset = areq->cryptlen - ivsize;
+- if (rctx->op_dir & SS_DECRYPTION) {
+- memcpy(areq->iv, backup_iv, ivsize);
+- kfree_sensitive(backup_iv);
+- } else {
+- scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
+- ivsize, 0);
+- }
+- kfree(rctx->biv);
++ for (i = 0; i < rctx->niv; i++) {
++ dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
++ memzero_explicit(sf->iv[i], ivsize);
++ }
++
++ offset = areq->cryptlen - ivsize;
++ if (rctx->op_dir & SS_DECRYPTION) {
++ memcpy(areq->iv, sf->biv, ivsize);
++ memzero_explicit(sf->biv, ivsize);
++ } else {
++ scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
++ ivsize, 0);
+ }
+ }
+
+@@ -274,7 +315,9 @@ static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *ar
+ struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+ err = sun8i_ss_cipher(breq);
++ local_bh_disable();
+ crypto_finalize_skcipher_request(engine, breq, err);
++ local_bh_enable();
+
+ return 0;
+ }
+diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+index 80e89066dbd1a..47b5828e35c34 100644
+--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
++++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+@@ -30,6 +30,8 @@
+ static const struct ss_variant ss_a80_variant = {
+ .alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES,
+ },
++ .alg_hash = { SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP,
++ },
+ .op_mode = { SS_OP_ECB, SS_OP_CBC,
+ },
+ .ss_clks = {
+@@ -64,6 +66,7 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx
+ const char *name)
+ {
+ int flow = rctx->flow;
++ unsigned int ivlen = rctx->ivlen;
+ u32 v = SS_START;
+ int i;
+
+@@ -102,15 +105,14 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx
+ mutex_lock(&ss->mlock);
+ writel(rctx->p_key, ss->base + SS_KEY_ADR_REG);
+
+- if (i == 0) {
+- if (rctx->p_iv)
+- writel(rctx->p_iv, ss->base + SS_IV_ADR_REG);
+- } else {
+- if (rctx->biv) {
+- if (rctx->op_dir == SS_ENCRYPTION)
+- writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
++ if (ivlen) {
++ if (rctx->op_dir == SS_ENCRYPTION) {
++ if (i == 0)
++ writel(rctx->p_iv[0], ss->base + SS_IV_ADR_REG);
+ else
+- writel(rctx->t_src[i - 1].addr + rctx->t_src[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
++ writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - ivlen, ss->base + SS_IV_ADR_REG);
++ } else {
++ writel(rctx->p_iv[i], ss->base + SS_IV_ADR_REG);
+ }
+ }
+
+@@ -462,7 +464,7 @@ static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i)
+ */
+ static int allocate_flows(struct sun8i_ss_dev *ss)
+ {
+- int i, err;
++ int i, j, err;
+
+ ss->flows = devm_kcalloc(ss->dev, MAXFLOW, sizeof(struct sun8i_ss_flow),
+ GFP_KERNEL);
+@@ -472,6 +474,36 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
+ for (i = 0; i < MAXFLOW; i++) {
+ init_completion(&ss->flows[i].complete);
+
++ ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
++ GFP_KERNEL | GFP_DMA);
++ if (!ss->flows[i].biv) {
++ err = -ENOMEM;
++ goto error_engine;
++ }
++
++ for (j = 0; j < MAX_SG; j++) {
++ ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
++ GFP_KERNEL | GFP_DMA);
++ if (!ss->flows[i].iv[j]) {
++ err = -ENOMEM;
++ goto error_engine;
++ }
++ }
++
++ /* the padding could be up to two block. */
++ ss->flows[i].pad = devm_kmalloc(ss->dev, SHA256_BLOCK_SIZE * 2,
++ GFP_KERNEL | GFP_DMA);
++ if (!ss->flows[i].pad) {
++ err = -ENOMEM;
++ goto error_engine;
++ }
++ ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
++ GFP_KERNEL | GFP_DMA);
++ if (!ss->flows[i].result) {
++ err = -ENOMEM;
++ goto error_engine;
++ }
++
+ ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
+ if (!ss->flows[i].engine) {
+ dev_err(ss->dev, "Cannot allocate engine\n");
+diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+index 3c073eb3db038..f89a580618aaa 100644
+--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
++++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+@@ -9,6 +9,7 @@
+ *
+ * You could find the datasheet in Documentation/arm/sunxi.rst
+ */
++#include <linux/bottom_half.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/scatterlist.h>
+@@ -341,18 +342,11 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
+ if (digestsize == SHA224_DIGEST_SIZE)
+ digestsize = SHA256_DIGEST_SIZE;
+
+- /* the padding could be up to two block. */
+- pad = kzalloc(algt->alg.hash.halg.base.cra_blocksize * 2, GFP_KERNEL | GFP_DMA);
+- if (!pad)
+- return -ENOMEM;
++ result = ss->flows[rctx->flow].result;
++ pad = ss->flows[rctx->flow].pad;
++ memset(pad, 0, algt->alg.hash.halg.base.cra_blocksize * 2);
+ bf = (__le32 *)pad;
+
+- result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA);
+- if (!result) {
+- kfree(pad);
+- return -ENOMEM;
+- }
+-
+ for (i = 0; i < MAX_SG; i++) {
+ rctx->t_dst[i].addr = 0;
+ rctx->t_dst[i].len = 0;
+@@ -379,13 +373,21 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
+ }
+
+ len = areq->nbytes;
+- for_each_sg(areq->src, sg, nr_sgs, i) {
++ sg = areq->src;
++ i = 0;
++ while (len > 0 && sg) {
++ if (sg_dma_len(sg) == 0) {
++ sg = sg_next(sg);
++ continue;
++ }
+ rctx->t_src[i].addr = sg_dma_address(sg);
+ todo = min(len, sg_dma_len(sg));
+ rctx->t_src[i].len = todo / 4;
+ len -= todo;
+ rctx->t_dst[i].addr = addr_res;
+ rctx->t_dst[i].len = digestsize / 4;
++ sg = sg_next(sg);
++ i++;
+ }
+ if (len > 0) {
+ dev_err(ss->dev, "remaining len %d\n", len);
+@@ -440,8 +442,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
+
+ memcpy(areq->result, result, algt->alg.hash.halg.digestsize);
+ theend:
+- kfree(pad);
+- kfree(result);
++ local_bh_disable();
+ crypto_finalize_hash_request(engine, breq, err);
++ local_bh_enable();
+ return 0;
+ }
+diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
+index 28188685b9100..eb82ee5345ae1 100644
+--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
++++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
+@@ -121,11 +121,19 @@ struct sginfo {
+ * @complete: completion for the current task on this flow
+ * @status: set to 1 by interrupt if task is done
+ * @stat_req: number of request done by this flow
++ * @iv: list of IV to use for each step
++ * @biv: buffer which contain the backuped IV
++ * @pad: padding buffer for hash operations
++ * @result: buffer for storing the result of hash operations
+ */
+ struct sun8i_ss_flow {
+ struct crypto_engine *engine;
+ struct completion complete;
+ int status;
++ u8 *iv[MAX_SG];
++ u8 *biv;
++ void *pad;
++ void *result;
+ #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+ unsigned long stat_req;
+ #endif
+@@ -164,28 +172,28 @@ struct sun8i_ss_dev {
+ * @t_src: list of mapped SGs with their size
+ * @t_dst: list of mapped SGs with their size
+ * @p_key: DMA address of the key
+- * @p_iv: DMA address of the IV
++ * @p_iv: DMA address of the IVs
++ * @niv: Number of IVs DMA mapped
+ * @method: current algorithm for this request
+ * @op_mode: op_mode for this request
+ * @op_dir: direction (encrypt vs decrypt) for this request
+ * @flow: the flow to use for this request
+- * @ivlen: size of biv
++ * @ivlen: size of IVs
+ * @keylen: keylen for this request
+- * @biv: buffer which contain the IV
+ * @fallback_req: request struct for invoking the fallback skcipher TFM
+ */
+ struct sun8i_cipher_req_ctx {
+ struct sginfo t_src[MAX_SG];
+ struct sginfo t_dst[MAX_SG];
+ u32 p_key;
+- u32 p_iv;
++ u32 p_iv[MAX_SG];
++ int niv;
+ u32 method;
+ u32 op_mode;
+ u32 op_dir;
+ int flow;
+ unsigned int ivlen;
+ unsigned int keylen;
+- void *biv;
+ struct skcipher_request fallback_req; // keep at the end
+ };
+
+diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
+index 8278d98074e9a..e1556a3582a30 100644
+--- a/drivers/crypto/amcc/crypto4xx_core.c
++++ b/drivers/crypto/amcc/crypto4xx_core.c
+@@ -522,7 +522,6 @@ static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
+ {
+ struct skcipher_request *req;
+ struct scatterlist *dst;
+- dma_addr_t addr;
+
+ req = skcipher_request_cast(pd_uinfo->async_req);
+
+@@ -531,8 +530,8 @@ static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
+ req->cryptlen, req->dst);
+ } else {
+ dst = pd_uinfo->dest_va;
+- addr = dma_map_page(dev->core_dev->device, sg_page(dst),
+- dst->offset, dst->length, DMA_FROM_DEVICE);
++ dma_unmap_page(dev->core_dev->device, pd->dest, dst->length,
++ DMA_FROM_DEVICE);
+ }
+
+ if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) {
+@@ -557,10 +556,9 @@ static void crypto4xx_ahash_done(struct crypto4xx_device *dev,
+ struct ahash_request *ahash_req;
+
+ ahash_req = ahash_request_cast(pd_uinfo->async_req);
+- ctx = crypto_tfm_ctx(ahash_req->base.tfm);
++ ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(ahash_req));
+
+- crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo,
+- crypto_tfm_ctx(ahash_req->base.tfm));
++ crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo, ctx);
+ crypto4xx_ret_sg_desc(dev, pd_uinfo);
+
+ if (pd_uinfo->state & PD_ENTRY_BUSY)
+diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+index c6865cbd334b2..e79514fce731f 100644
+--- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
++++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+@@ -265,7 +265,9 @@ static int meson_handle_cipher_request(struct crypto_engine *engine,
+ struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+ err = meson_cipher(breq);
++ local_bh_disable();
+ crypto_finalize_skcipher_request(engine, breq, err);
++ local_bh_enable();
+
+ return 0;
+ }
+diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
+index 6e7ae896717cd..937187027ad57 100644
+--- a/drivers/crypto/amlogic/amlogic-gxl-core.c
++++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
+@@ -237,7 +237,6 @@ static int meson_crypto_probe(struct platform_device *pdev)
+ return err;
+ }
+
+- mc->irqs = devm_kcalloc(mc->dev, MAXFLOW, sizeof(int), GFP_KERNEL);
+ for (i = 0; i < MAXFLOW; i++) {
+ mc->irqs[i] = platform_get_irq(pdev, i);
+ if (mc->irqs[i] < 0)
+diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h
+index dc0f142324a3c..8c0746a1d6d43 100644
+--- a/drivers/crypto/amlogic/amlogic-gxl.h
++++ b/drivers/crypto/amlogic/amlogic-gxl.h
+@@ -95,7 +95,7 @@ struct meson_dev {
+ struct device *dev;
+ struct meson_flow *chanlist;
+ atomic_t flow;
+- int *irqs;
++ int irqs[MAXFLOW];
+ #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+ struct dentry *dbgfs_dir;
+ #endif
+diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
+index 9391ccc03382d..fe05584031914 100644
+--- a/drivers/crypto/atmel-aes.c
++++ b/drivers/crypto/atmel-aes.c
+@@ -960,6 +960,7 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
+ ctx = crypto_tfm_ctx(areq->tfm);
+
+ dd->areq = areq;
++ dd->ctx = ctx;
+ start_async = (areq != new_areq);
+ dd->is_async = start_async;
+
+@@ -1274,7 +1275,6 @@ static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+- ctx->base.dd->ctx = &ctx->base;
+ ctx->base.start = atmel_aes_start;
+
+ return 0;
+@@ -1291,7 +1291,6 @@ static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+- ctx->base.dd->ctx = &ctx->base;
+ ctx->base.start = atmel_aes_ctr_start;
+
+ return 0;
+@@ -1783,7 +1782,6 @@ static int atmel_aes_gcm_init(struct crypto_aead *tfm)
+
+ crypto_aead_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+- ctx->base.dd->ctx = &ctx->base;
+ ctx->base.start = atmel_aes_gcm_start;
+
+ return 0;
+@@ -1927,7 +1925,6 @@ static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm)
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) +
+ crypto_skcipher_reqsize(ctx->fallback_tfm));
+ ctx->base.dd = dd;
+- ctx->base.dd->ctx = &ctx->base;
+ ctx->base.start = atmel_aes_xts_start;
+
+ return 0;
+@@ -2154,7 +2151,6 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
+ crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) +
+ auth_reqsize));
+ ctx->base.dd = dd;
+- ctx->base.dd->ctx = &ctx->base;
+ ctx->base.start = atmel_aes_authenc_start;
+
+ return 0;
+diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
+index 8697ae53b0633..d3d8bb0a69900 100644
+--- a/drivers/crypto/caam/caamalg.c
++++ b/drivers/crypto/caam/caamalg.c
+@@ -1533,6 +1533,9 @@ static int aead_do_one_req(struct crypto_engine *engine, void *areq)
+
+ ret = caam_jr_enqueue(ctx->jrdev, desc, aead_crypt_done, req);
+
++ if (ret == -ENOSPC && engine->retry_support)
++ return ret;
++
+ if (ret != -EINPROGRESS) {
+ aead_unmap(ctx->jrdev, rctx->edesc, req);
+ kfree(rctx->edesc);
+@@ -1762,6 +1765,9 @@ static int skcipher_do_one_req(struct crypto_engine *engine, void *areq)
+
+ ret = caam_jr_enqueue(ctx->jrdev, desc, skcipher_crypt_done, req);
+
++ if (ret == -ENOSPC && engine->retry_support)
++ return ret;
++
+ if (ret != -EINPROGRESS) {
+ skcipher_unmap(ctx->jrdev, rctx->edesc, req);
+ kfree(rctx->edesc);
+diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
+index 8b8ed77d8715d..6753f0e6e55d1 100644
+--- a/drivers/crypto/caam/caamalg_qi2.c
++++ b/drivers/crypto/caam/caamalg_qi2.c
+@@ -5470,7 +5470,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req)
+ dpaa2_fd_set_len(&fd, dpaa2_fl_get_len(&req->fd_flt[1]));
+ dpaa2_fd_set_flc(&fd, req->flc_dma);
+
+- ppriv = this_cpu_ptr(priv->ppriv);
++ ppriv = raw_cpu_ptr(priv->ppriv);
+ for (i = 0; i < (priv->dpseci_attr.num_tx_queues << 1); i++) {
+ err = dpaa2_io_service_enqueue_fq(ppriv->dpio, ppriv->req_fqid,
+ &fd);
+diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
+index e8a6d8bc43b5d..36ef738e4a181 100644
+--- a/drivers/crypto/caam/caamhash.c
++++ b/drivers/crypto/caam/caamhash.c
+@@ -765,6 +765,9 @@ static int ahash_do_one_req(struct crypto_engine *engine, void *areq)
+
+ ret = caam_jr_enqueue(jrdev, desc, state->ahash_op_done, req);
+
++ if (ret == -ENOSPC && engine->retry_support)
++ return ret;
++
+ if (ret != -EINPROGRESS) {
+ ahash_unmap(jrdev, state->edesc, req, 0);
+ kfree(state->edesc);
+diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
+index e313233ec6de7..51b48b57266a6 100644
+--- a/drivers/crypto/caam/caampkc.c
++++ b/drivers/crypto/caam/caampkc.c
+@@ -223,7 +223,9 @@ static int caam_rsa_count_leading_zeros(struct scatterlist *sgl,
+ if (len && *buff)
+ break;
+
+- sg_miter_next(&miter);
++ if (!sg_miter_next(&miter))
++ break;
++
+ buff = miter.addr;
+ len = miter.length;
+
+@@ -380,6 +382,9 @@ static int akcipher_do_one_req(struct crypto_engine *engine, void *areq)
+
+ ret = caam_jr_enqueue(jrdev, desc, req_ctx->akcipher_op_done, req);
+
++ if (ret == -ENOSPC && engine->retry_support)
++ return ret;
++
+ if (ret != -EINPROGRESS) {
+ rsa_pub_unmap(jrdev, req_ctx->edesc, req);
+ rsa_io_unmap(jrdev, req_ctx->edesc, req);
+@@ -1153,16 +1158,27 @@ static struct caam_akcipher_alg caam_rsa = {
+ int caam_pkc_init(struct device *ctrldev)
+ {
+ struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
+- u32 pk_inst;
++ u32 pk_inst, pkha;
+ int err;
+ init_done = false;
+
+ /* Determine public key hardware accelerator presence. */
+- if (priv->era < 10)
++ if (priv->era < 10) {
+ pk_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) &
+ CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT;
+- else
+- pk_inst = rd_reg32(&priv->ctrl->vreg.pkha) & CHA_VER_NUM_MASK;
++ } else {
++ pkha = rd_reg32(&priv->ctrl->vreg.pkha);
++ pk_inst = pkha & CHA_VER_NUM_MASK;
++
++ /*
++ * Newer CAAMs support partially disabled functionality. If this is the
++ * case, the number is non-zero, but this bit is set to indicate that
++ * no encryption or decryption is supported. Only signing and verifying
++ * is supported.
++ */
++ if (pkha & CHA_VER_MISC_PKHA_NO_CRYPT)
++ pk_inst = 0;
++ }
+
+ /* Do not register algorithms if PKHA is not present. */
+ if (!pk_inst)
+diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
+index ca0361b2dbb07..f9a1ec3c84851 100644
+--- a/drivers/crypto/caam/ctrl.c
++++ b/drivers/crypto/caam/ctrl.c
+@@ -284,6 +284,10 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
+ const u32 rdsta_if = RDSTA_IF0 << sh_idx;
+ const u32 rdsta_pr = RDSTA_PR0 << sh_idx;
+ const u32 rdsta_mask = rdsta_if | rdsta_pr;
++
++ /* Clear the contents before using the descriptor */
++ memset(desc, 0x00, CAAM_CMD_SZ * 7);
++
+ /*
+ * If the corresponding bit is set, this state handle
+ * was initialized by somebody else, so it's left alone.
+@@ -327,8 +331,6 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
+ }
+
+ dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx);
+- /* Clear the contents before recreating the descriptor */
+- memset(desc, 0x00, CAAM_CMD_SZ * 7);
+ }
+
+ kfree(desc);
+@@ -609,6 +611,13 @@ static bool check_version(struct fsl_mc_version *mc_version, u32 major,
+ }
+ #endif
+
++static bool needs_entropy_delay_adjustment(void)
++{
++ if (of_machine_is_compatible("fsl,imx6sx"))
++ return true;
++ return false;
++}
++
+ /* Probe routine for CAAM top (controller) level */
+ static int caam_probe(struct platform_device *pdev)
+ {
+@@ -855,6 +864,8 @@ static int caam_probe(struct platform_device *pdev)
+ * Also, if a handle was instantiated, do not change
+ * the TRNG parameters.
+ */
++ if (needs_entropy_delay_adjustment())
++ ent_delay = 12000;
+ if (!(ctrlpriv->rng4_sh_init || inst_handles)) {
+ dev_info(dev,
+ "Entropy delay = %u\n",
+@@ -871,6 +882,15 @@ static int caam_probe(struct platform_device *pdev)
+ */
+ ret = instantiate_rng(dev, inst_handles,
+ gen_sk);
++ /*
++ * Entropy delay is determined via TRNG characterization.
++ * TRNG characterization is run across different voltages
++ * and temperatures.
++ * If worst case value for ent_dly is identified,
++ * the loop can be skipped for that platform.
++ */
++ if (needs_entropy_delay_adjustment())
++ break;
+ if (ret == -EAGAIN)
+ /*
+ * if here, the loop will rerun,
+diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
+index af61f3a2c0d46..3738625c02509 100644
+--- a/drivers/crypto/caam/regs.h
++++ b/drivers/crypto/caam/regs.h
+@@ -322,6 +322,9 @@ struct version_regs {
+ /* CHA Miscellaneous Information - AESA_MISC specific */
+ #define CHA_VER_MISC_AES_GCM BIT(1 + CHA_VER_MISC_SHIFT)
+
++/* CHA Miscellaneous Information - PKHA_MISC specific */
++#define CHA_VER_MISC_PKHA_NO_CRYPT BIT(7 + CHA_VER_MISC_SHIFT)
++
+ /*
+ * caam_perfmon - Performance Monitor/Secure Memory Status/
+ * CAAM Global Status/Component Version IDs
+diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c
+index 8c32d0eb8fcf2..6872ac3440010 100644
+--- a/drivers/crypto/cavium/cpt/cptpf_main.c
++++ b/drivers/crypto/cavium/cpt/cptpf_main.c
+@@ -253,6 +253,7 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
+ const struct firmware *fw_entry;
+ struct device *dev = &cpt->pdev->dev;
+ struct ucode_header *ucode;
++ unsigned int code_length;
+ struct microcode *mcode;
+ int j, ret = 0;
+
+@@ -263,11 +264,12 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
+ ucode = (struct ucode_header *)fw_entry->data;
+ mcode = &cpt->mcode[cpt->next_mc_idx];
+ memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ);
+- mcode->code_size = ntohl(ucode->code_length) * 2;
+- if (!mcode->code_size) {
++ code_length = ntohl(ucode->code_length);
++ if (code_length == 0 || code_length >= INT_MAX / 2) {
+ ret = -EINVAL;
+ goto fw_release;
+ }
++ mcode->code_size = code_length * 2;
+
+ mcode->is_ae = is_ae;
+ mcode->core_mask = 0ULL;
+diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+index 2e9c0d2143632..199fcec9b8d0b 100644
+--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
++++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+@@ -191,6 +191,7 @@ int nitrox_mbox_init(struct nitrox_device *ndev)
+ ndev->iov.pf2vf_wq = alloc_workqueue("nitrox_pf2vf", 0, 0);
+ if (!ndev->iov.pf2vf_wq) {
+ kfree(ndev->iov.vfdev);
++ ndev->iov.vfdev = NULL;
+ return -ENOMEM;
+ }
+ /* enable pf2vf mailbox interrupts */
+diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
+index d718db224be42..b386a7063818b 100644
+--- a/drivers/crypto/ccp/ccp-dmaengine.c
++++ b/drivers/crypto/ccp/ccp-dmaengine.c
+@@ -632,6 +632,36 @@ static int ccp_terminate_all(struct dma_chan *dma_chan)
+ return 0;
+ }
+
++static void ccp_dma_release(struct ccp_device *ccp)
++{
++ struct ccp_dma_chan *chan;
++ struct dma_chan *dma_chan;
++ unsigned int i;
++
++ for (i = 0; i < ccp->cmd_q_count; i++) {
++ chan = ccp->ccp_dma_chan + i;
++ dma_chan = &chan->dma_chan;
++
++ tasklet_kill(&chan->cleanup_tasklet);
++ list_del_rcu(&dma_chan->device_node);
++ }
++}
++
++static void ccp_dma_release_channels(struct ccp_device *ccp)
++{
++ struct ccp_dma_chan *chan;
++ struct dma_chan *dma_chan;
++ unsigned int i;
++
++ for (i = 0; i < ccp->cmd_q_count; i++) {
++ chan = ccp->ccp_dma_chan + i;
++ dma_chan = &chan->dma_chan;
++
++ if (dma_chan->client_count)
++ dma_release_channel(dma_chan);
++ }
++}
++
+ int ccp_dmaengine_register(struct ccp_device *ccp)
+ {
+ struct ccp_dma_chan *chan;
+@@ -736,6 +766,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
+ return 0;
+
+ err_reg:
++ ccp_dma_release(ccp);
+ kmem_cache_destroy(ccp->dma_desc_cache);
+
+ err_cache:
+@@ -751,7 +782,9 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp)
+ if (!dmaengine)
+ return;
+
++ ccp_dma_release_channels(ccp);
+ dma_async_device_unregister(dma_dev);
++ ccp_dma_release(ccp);
+
+ kmem_cache_destroy(ccp->dma_desc_cache);
+ kmem_cache_destroy(ccp->dma_cmd_cache);
+diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
+index ae7b445999144..4bf9eaab4456f 100644
+--- a/drivers/crypto/ccp/psp-dev.c
++++ b/drivers/crypto/ccp/psp-dev.c
+@@ -42,6 +42,9 @@ static irqreturn_t psp_irq_handler(int irq, void *data)
+ /* Read the interrupt status: */
+ status = ioread32(psp->io_regs + psp->vdata->intsts_reg);
+
++ /* Clear the interrupt status by writing the same value we read. */
++ iowrite32(status, psp->io_regs + psp->vdata->intsts_reg);
++
+ /* invoke subdevice interrupt handlers */
+ if (status) {
+ if (psp->sev_irq_handler)
+@@ -51,9 +54,6 @@ static irqreturn_t psp_irq_handler(int irq, void *data)
+ psp->tee_irq_handler(irq, psp->tee_irq_data, status);
+ }
+
+- /* Clear the interrupt status by writing the same value we read. */
+- iowrite32(status, psp->io_regs + psp->vdata->intsts_reg);
+-
+ return IRQ_HANDLED;
+ }
+
+diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
+index 2ecb0e1f65d8d..70174a9118b19 100644
+--- a/drivers/crypto/ccp/sev-dev.c
++++ b/drivers/crypto/ccp/sev-dev.c
+@@ -24,6 +24,7 @@
+ #include <linux/cpufeature.h>
+
+ #include <asm/smp.h>
++#include <asm/cacheflush.h>
+
+ #include "psp-dev.h"
+ #include "sev-dev.h"
+@@ -141,6 +142,17 @@ static int sev_cmd_buffer_len(int cmd)
+ return 0;
+ }
+
++static void *sev_fw_alloc(unsigned long len)
++{
++ struct page *page;
++
++ page = alloc_pages(GFP_KERNEL, get_order(len));
++ if (!page)
++ return NULL;
++
++ return page_address(page);
++}
++
+ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
+ {
+ struct psp_device *psp = psp_master;
+@@ -241,7 +253,7 @@ static int __sev_platform_init_locked(int *error)
+ struct psp_device *psp = psp_master;
+ struct sev_data_init data;
+ struct sev_device *sev;
+- int rc = 0;
++ int psp_ret = -1, rc = 0;
+
+ if (!psp || !psp->sev_data)
+ return -ENODEV;
+@@ -266,7 +278,21 @@ static int __sev_platform_init_locked(int *error)
+ data.tmr_len = SEV_ES_TMR_SIZE;
+ }
+
+- rc = __sev_do_cmd_locked(SEV_CMD_INIT, &data, error);
++ rc = __sev_do_cmd_locked(SEV_CMD_INIT, &data, &psp_ret);
++ if (rc && psp_ret == SEV_RET_SECURE_DATA_INVALID) {
++ /*
++ * Initialization command returned an integrity check failure
++ * status code, meaning that firmware load and validation of SEV
++ * related persistent data has failed. Retrying the
++ * initialization function should succeed by replacing the state
++ * with a reset state.
++ */
++ dev_dbg(sev->dev, "SEV: retrying INIT command");
++ rc = __sev_do_cmd_locked(SEV_CMD_INIT, &data, &psp_ret);
++ }
++ if (error)
++ *error = psp_ret;
++
+ if (rc)
+ return rc;
+
+@@ -300,7 +326,7 @@ static int __sev_platform_shutdown_locked(int *error)
+ struct sev_device *sev = psp_master->sev_data;
+ int ret;
+
+- if (sev->state == SEV_STATE_UNINIT)
++ if (!sev || sev->state == SEV_STATE_UNINIT)
+ return 0;
+
+ ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
+@@ -374,6 +400,8 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
+ struct sev_user_data_status data;
+ int ret;
+
++ memset(&data, 0, sizeof(data));
++
+ ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error);
+ if (ret)
+ return ret;
+@@ -427,7 +455,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
+ if (input.length > SEV_FW_BLOB_MAX_SIZE)
+ return -EFAULT;
+
+- blob = kmalloc(input.length, GFP_KERNEL);
++ blob = kzalloc(input.length, GFP_KERNEL);
+ if (!blob)
+ return -ENOMEM;
+
+@@ -651,7 +679,14 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
+ input_address = (void __user *)input.address;
+
+ if (input.address && input.length) {
+- id_blob = kmalloc(input.length, GFP_KERNEL);
++ /*
++ * The length of the ID shouldn't be assumed by software since
++ * it may change in the future. The allocation size is limited
++ * to 1 << (PAGE_SHIFT + MAX_ORDER - 1) by the page allocator.
++ * If the allocation fails, simply return ENOMEM rather than
++ * warning in the kernel log.
++ */
++ id_blob = kzalloc(input.length, GFP_KERNEL | __GFP_NOWARN);
+ if (!id_blob)
+ return -ENOMEM;
+
+@@ -770,14 +805,14 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
+ if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE)
+ return -EFAULT;
+
+- pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
++ pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL);
+ if (!pdh_blob)
+ return -ENOMEM;
+
+ data.pdh_cert_address = __psp_pa(pdh_blob);
+ data.pdh_cert_len = input.pdh_cert_len;
+
+- cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
++ cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL);
+ if (!cert_blob) {
+ ret = -ENOMEM;
+ goto e_free_pdh;
+@@ -1064,7 +1099,6 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
+ void sev_pci_init(void)
+ {
+ struct sev_device *sev = psp_master->sev_data;
+- struct page *tmr_page;
+ int error, rc;
+
+ if (!sev)
+@@ -1080,29 +1114,16 @@ void sev_pci_init(void)
+ sev_get_api_version();
+
+ /* Obtain the TMR memory area for SEV-ES use */
+- tmr_page = alloc_pages(GFP_KERNEL, get_order(SEV_ES_TMR_SIZE));
+- if (tmr_page) {
+- sev_es_tmr = page_address(tmr_page);
+- } else {
+- sev_es_tmr = NULL;
++ sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE);
++ if (sev_es_tmr)
++ /* Must flush the cache before giving it to the firmware */
++ clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE);
++ else
+ dev_warn(sev->dev,
+ "SEV: TMR allocation failed, SEV-ES support unavailable\n");
+- }
+
+ /* Initialize the platform */
+ rc = sev_platform_init(&error);
+- if (rc && (error == SEV_RET_SECURE_DATA_INVALID)) {
+- /*
+- * INIT command returned an integrity check failure
+- * status code, meaning that firmware load and
+- * validation of SEV related persistent data has
+- * failed and persistent state has been erased.
+- * Retrying INIT command here should succeed.
+- */
+- dev_dbg(sev->dev, "SEV: retrying INIT command");
+- rc = sev_platform_init(&error);
+- }
+-
+ if (rc) {
+ dev_err(sev->dev, "SEV: failed to INIT error %#x\n", error);
+ return;
+diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
+index 88c672ad27e44..9470a9a19f29d 100644
+--- a/drivers/crypto/ccp/sp-pci.c
++++ b/drivers/crypto/ccp/sp-pci.c
+@@ -320,6 +320,15 @@ static const struct psp_vdata pspv3 = {
+ .inten_reg = 0x10690,
+ .intsts_reg = 0x10694,
+ };
++
++static const struct psp_vdata pspv4 = {
++ .sev = &sevv2,
++ .tee = &teev1,
++ .feature_reg = 0x109fc,
++ .inten_reg = 0x10690,
++ .intsts_reg = 0x10694,
++};
++
+ #endif
+
+ static const struct sp_dev_vdata dev_vdata[] = {
+@@ -365,7 +374,7 @@ static const struct sp_dev_vdata dev_vdata[] = {
+ { /* 5 */
+ .bar = 2,
+ #ifdef CONFIG_CRYPTO_DEV_SP_PSP
+- .psp_vdata = &pspv2,
++ .psp_vdata = &pspv4,
+ #endif
+ },
+ };
+diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
+index a5e041d9d2cf1..6140e49273226 100644
+--- a/drivers/crypto/ccree/cc_buffer_mgr.c
++++ b/drivers/crypto/ccree/cc_buffer_mgr.c
+@@ -258,6 +258,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg,
+ {
+ int ret = 0;
+
++ if (!nbytes) {
++ *mapped_nents = 0;
++ *lbytes = 0;
++ *nents = 0;
++ return 0;
++ }
++
+ *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
+ if (*nents > max_sg_nents) {
+ *nents = 0;
+@@ -349,12 +356,14 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx,
+ req_ctx->mlli_params.mlli_dma_addr);
+ }
+
+- dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL);
+- dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
+-
+ if (src != dst) {
+- dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL);
++ dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE);
++ dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE);
+ dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst));
++ dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
++ } else {
++ dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL);
++ dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
+ }
+ }
+
+@@ -370,6 +379,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
+ u32 dummy = 0;
+ int rc = 0;
+ u32 mapped_nents = 0;
++ int src_direction = (src != dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
+
+ req_ctx->dma_buf_type = CC_DMA_BUF_DLLI;
+ mlli_params->curr_pool = NULL;
+@@ -392,7 +402,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
+ }
+
+ /* Map the src SGL */
+- rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents,
++ rc = cc_map_sg(dev, src, nbytes, src_direction, &req_ctx->in_nents,
+ LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents);
+ if (rc)
+ goto cipher_exit;
+@@ -409,7 +419,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
+ }
+ } else {
+ /* Map the dst sg */
+- rc = cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL,
++ rc = cc_map_sg(dev, dst, nbytes, DMA_FROM_DEVICE,
+ &req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES,
+ &dummy, &mapped_nents);
+ if (rc)
+@@ -449,6 +459,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
+ struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+ unsigned int hw_iv_size = areq_ctx->hw_iv_size;
+ struct cc_drvdata *drvdata = dev_get_drvdata(dev);
++ int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
+
+ if (areq_ctx->mac_buf_dma_addr) {
+ dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr,
+@@ -507,13 +518,11 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
+ sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents,
+ areq_ctx->assoclen, req->cryptlen);
+
+- dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents,
+- DMA_BIDIRECTIONAL);
++ dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction);
+ if (req->src != req->dst) {
+ dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n",
+ sg_virt(req->dst));
+- dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents,
+- DMA_BIDIRECTIONAL);
++ dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE);
+ }
+ if (drvdata->coherent &&
+ areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT &&
+@@ -836,7 +845,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
+ else
+ size_for_map -= authsize;
+
+- rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL,
++ rc = cc_map_sg(dev, req->dst, size_for_map, DMA_FROM_DEVICE,
+ &areq_ctx->dst.mapped_nents,
+ LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes,
+ &dst_mapped_nents);
+@@ -1049,7 +1058,8 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
+ size_to_map += authsize;
+ }
+
+- rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL,
++ rc = cc_map_sg(dev, req->src, size_to_map,
++ (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL),
+ &areq_ctx->src.mapped_nents,
+ (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES +
+ LLI_MAX_NUM_OF_DATA_ENTRIES),
+diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
+index 78833491f534d..309da6334a0a0 100644
+--- a/drivers/crypto/ccree/cc_cipher.c
++++ b/drivers/crypto/ccree/cc_cipher.c
+@@ -257,8 +257,8 @@ static void cc_cipher_exit(struct crypto_tfm *tfm)
+ &ctx_p->user.key_dma_addr);
+
+ /* Free key buffer in context */
+- kfree_sensitive(ctx_p->user.key);
+ dev_dbg(dev, "Free key buffer in context. key=@%p\n", ctx_p->user.key);
++ kfree_sensitive(ctx_p->user.key);
+ }
+
+ struct tdes_keys {
+diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
+index 7083767602fcf..8f008f024f8f1 100644
+--- a/drivers/crypto/ccree/cc_debugfs.c
++++ b/drivers/crypto/ccree/cc_debugfs.c
+@@ -55,7 +55,7 @@ void __init cc_debugfs_global_init(void)
+ cc_debugfs_dir = debugfs_create_dir("ccree", NULL);
+ }
+
+-void __exit cc_debugfs_global_fini(void)
++void cc_debugfs_global_fini(void)
+ {
+ debugfs_remove(cc_debugfs_dir);
+ }
+diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
+index e599ac6dc162a..41f0a404bdf9e 100644
+--- a/drivers/crypto/ccree/cc_driver.c
++++ b/drivers/crypto/ccree/cc_driver.c
+@@ -103,7 +103,8 @@ MODULE_DEVICE_TABLE(of, arm_ccree_dev_of_match);
+ static void init_cc_cache_params(struct cc_drvdata *drvdata)
+ {
+ struct device *dev = drvdata_to_dev(drvdata);
+- u32 cache_params, ace_const, val, mask;
++ u32 cache_params, ace_const, val;
++ u64 mask;
+
+ /* compute CC_AXIM_CACHE_PARAMS */
+ cache_params = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS));
+@@ -655,9 +656,17 @@ static struct platform_driver ccree_driver = {
+
+ static int __init ccree_init(void)
+ {
++ int rc;
++
+ cc_debugfs_global_init();
+
+- return platform_driver_register(&ccree_driver);
++ rc = platform_driver_register(&ccree_driver);
++ if (rc) {
++ cc_debugfs_global_fini();
++ return rc;
++ }
++
++ return 0;
+ }
+ module_init(ccree_init);
+
+diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
+index c1c2b1d866639..f2be0a7d7f7ac 100644
+--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
++++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
+@@ -264,7 +264,9 @@ static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *a
+ struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+ err = sl3516_ce_cipher(breq);
++ local_bh_disable();
+ crypto_finalize_skcipher_request(engine, breq, err);
++ local_bh_enable();
+
+ return 0;
+ }
+diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+index a032c192ef1d6..4062251fd1b68 100644
+--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
++++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+@@ -252,7 +252,7 @@ static int hpre_prepare_dma_buf(struct hpre_asym_request *hpre_req,
+ if (unlikely(shift < 0))
+ return -EINVAL;
+
+- ptr = dma_alloc_coherent(dev, ctx->key_sz, tmp, GFP_KERNEL);
++ ptr = dma_alloc_coherent(dev, ctx->key_sz, tmp, GFP_ATOMIC);
+ if (unlikely(!ptr))
+ return -ENOMEM;
+
+@@ -1865,7 +1865,7 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req,
+ */
+ if (memcmp(ptr, p, ctx->key_sz) == 0) {
+ dev_err(dev, "gx is p!\n");
+- return -EINVAL;
++ goto err;
+ } else if (memcmp(ptr, p, ctx->key_sz) > 0) {
+ hpre_curve25519_src_modulo_p(ptr);
+ }
+diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
+index 65a641396c07f..edc61e4105f30 100644
+--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
++++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
+@@ -1143,18 +1143,12 @@ err_with_qm_init:
+ static void hpre_remove(struct pci_dev *pdev)
+ {
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+- int ret;
+
+ hisi_qm_pm_uninit(qm);
+ hisi_qm_wait_task_finish(qm, &hpre_devices);
+ hisi_qm_alg_unregister(qm, &hpre_devices);
+- if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
+- ret = hisi_qm_sriov_disable(pdev, true);
+- if (ret) {
+- pci_err(pdev, "Disable SRIOV fail!\n");
+- return;
+- }
+- }
++ if (qm->fun_type == QM_HW_PF && qm->vfs_num)
++ hisi_qm_sriov_disable(pdev, true);
+
+ hpre_debugfs_exit(qm);
+ hisi_qm_stop(qm, QM_NORMAL);
+diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
+index 369562d34d66a..fd89918abd191 100644
+--- a/drivers/crypto/hisilicon/qm.c
++++ b/drivers/crypto/hisilicon/qm.c
+@@ -1888,8 +1888,10 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
+ return ret;
+
+ /* Judge if the instance is being reset. */
+- if (unlikely(atomic_read(&qm->status.flags) == QM_STOP))
+- return 0;
++ if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) {
++ ret = 0;
++ goto put_dfx_access;
++ }
+
+ if (count > QM_DBG_WRITE_LEN) {
+ ret = -ENOSPC;
+@@ -4107,7 +4109,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num)
+ static int qm_vf_read_qos(struct hisi_qm *qm)
+ {
+ int cnt = 0;
+- int ret;
++ int ret = -EINVAL;
+
+ /* reset mailbox qos val */
+ qm->mb_qos = 0;
+@@ -5725,8 +5727,8 @@ static int hisi_qm_memory_init(struct hisi_qm *qm)
+ GFP_ATOMIC);
+ dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size);
+ if (!qm->qdma.va) {
+- ret = -ENOMEM;
+- goto err_alloc_qdma;
++ ret = -ENOMEM;
++ goto err_destroy_idr;
+ }
+
+ QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH);
+@@ -5742,7 +5744,8 @@ static int hisi_qm_memory_init(struct hisi_qm *qm)
+
+ err_alloc_qp_array:
+ dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
+-err_alloc_qdma:
++err_destroy_idr:
++ idr_destroy(&qm->qp_idr);
+ kfree(qm->factor);
+
+ return ret;
+@@ -5986,7 +5989,7 @@ int hisi_qm_resume(struct device *dev)
+ if (ret)
+ pci_err(pdev, "failed to start qm(%d)\n", ret);
+
+- return 0;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(hisi_qm_resume);
+
+diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
+index 3068093229a50..bbb35de994eb7 100644
+--- a/drivers/crypto/hisilicon/qm.h
++++ b/drivers/crypto/hisilicon/qm.h
+@@ -318,14 +318,14 @@ struct hisi_qp {
+ static inline int q_num_set(const char *val, const struct kernel_param *kp,
+ unsigned int device)
+ {
+- struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
+- device, NULL);
++ struct pci_dev *pdev;
+ u32 n, q_num;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+
++ pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL);
+ if (!pdev) {
+ q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
+ pr_info("No device found currently, suppose queue number is %u\n",
+@@ -335,6 +335,8 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp,
+ q_num = QM_QNUM_V1;
+ else
+ q_num = QM_QNUM_V2;
++
++ pci_dev_put(pdev);
+ }
+
+ ret = kstrtou32(val, 10, &n);
+diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c
+index 0a3c8f019b025..490e1542305e1 100644
+--- a/drivers/crypto/hisilicon/sec/sec_algs.c
++++ b/drivers/crypto/hisilicon/sec/sec_algs.c
+@@ -449,7 +449,7 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp,
+ */
+ }
+
+- mutex_lock(&ctx->queue->queuelock);
++ spin_lock_bh(&ctx->queue->queuelock);
+ /* Put the IV in place for chained cases */
+ switch (ctx->cipher_alg) {
+ case SEC_C_AES_CBC_128:
+@@ -509,7 +509,7 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp,
+ list_del(&backlog_req->backlog_head);
+ }
+ }
+- mutex_unlock(&ctx->queue->queuelock);
++ spin_unlock_bh(&ctx->queue->queuelock);
+
+ mutex_lock(&sec_req->lock);
+ list_del(&sec_req_el->head);
+@@ -798,7 +798,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
+ */
+
+ /* Grab a big lock for a long time to avoid concurrency issues */
+- mutex_lock(&queue->queuelock);
++ spin_lock_bh(&queue->queuelock);
+
+ /*
+ * Can go on to queue if we have space in either:
+@@ -814,15 +814,15 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
+ ret = -EBUSY;
+ if ((skreq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ list_add_tail(&sec_req->backlog_head, &ctx->backlog);
+- mutex_unlock(&queue->queuelock);
++ spin_unlock_bh(&queue->queuelock);
+ goto out;
+ }
+
+- mutex_unlock(&queue->queuelock);
++ spin_unlock_bh(&queue->queuelock);
+ goto err_free_elements;
+ }
+ ret = sec_send_request(sec_req, queue);
+- mutex_unlock(&queue->queuelock);
++ spin_unlock_bh(&queue->queuelock);
+ if (ret)
+ goto err_free_elements;
+
+@@ -881,7 +881,7 @@ static int sec_alg_skcipher_init(struct crypto_skcipher *tfm)
+ if (IS_ERR(ctx->queue))
+ return PTR_ERR(ctx->queue);
+
+- mutex_init(&ctx->queue->queuelock);
++ spin_lock_init(&ctx->queue->queuelock);
+ ctx->queue->havesoftqueue = false;
+
+ return 0;
+diff --git a/drivers/crypto/hisilicon/sec/sec_drv.h b/drivers/crypto/hisilicon/sec/sec_drv.h
+index 179a8250d691c..e2a50bf2234b9 100644
+--- a/drivers/crypto/hisilicon/sec/sec_drv.h
++++ b/drivers/crypto/hisilicon/sec/sec_drv.h
+@@ -347,7 +347,7 @@ struct sec_queue {
+ DECLARE_BITMAP(unprocessed, SEC_QUEUE_LEN);
+ DECLARE_KFIFO_PTR(softqueue, typeof(struct sec_request_el *));
+ bool havesoftqueue;
+- struct mutex queuelock;
++ spinlock_t queuelock;
+ void *shadow[SEC_QUEUE_LEN];
+ };
+
+diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
+index d97cf02b1df75..cff00fd297652 100644
+--- a/drivers/crypto/hisilicon/sec2/sec.h
++++ b/drivers/crypto/hisilicon/sec2/sec.h
+@@ -119,7 +119,7 @@ struct sec_qp_ctx {
+ struct idr req_idr;
+ struct sec_alg_res res[QM_Q_DEPTH];
+ struct sec_ctx *ctx;
+- struct mutex req_lock;
++ spinlock_t req_lock;
+ struct list_head backlog;
+ struct hisi_acc_sgl_pool *c_in_pool;
+ struct hisi_acc_sgl_pool *c_out_pool;
+diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
+index 6a45bd23b3635..0d26eda36a526 100644
+--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
++++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
+@@ -124,11 +124,11 @@ static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
+ {
+ int req_id;
+
+- mutex_lock(&qp_ctx->req_lock);
++ spin_lock_bh(&qp_ctx->req_lock);
+
+ req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL,
+ 0, QM_Q_DEPTH, GFP_ATOMIC);
+- mutex_unlock(&qp_ctx->req_lock);
++ spin_unlock_bh(&qp_ctx->req_lock);
+ if (unlikely(req_id < 0)) {
+ dev_err(req->ctx->dev, "alloc req id fail!\n");
+ return req_id;
+@@ -153,9 +153,9 @@ static void sec_free_req_id(struct sec_req *req)
+ qp_ctx->req_list[req_id] = NULL;
+ req->qp_ctx = NULL;
+
+- mutex_lock(&qp_ctx->req_lock);
++ spin_lock_bh(&qp_ctx->req_lock);
+ idr_remove(&qp_ctx->req_idr, req_id);
+- mutex_unlock(&qp_ctx->req_lock);
++ spin_unlock_bh(&qp_ctx->req_lock);
+ }
+
+ static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
+@@ -270,7 +270,7 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
+ !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG))
+ return -EBUSY;
+
+- mutex_lock(&qp_ctx->req_lock);
++ spin_lock_bh(&qp_ctx->req_lock);
+ ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
+
+ if (ctx->fake_req_limit <=
+@@ -278,10 +278,10 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
+ list_add_tail(&req->backlog_head, &qp_ctx->backlog);
+ atomic64_inc(&ctx->sec->debug.dfx.send_cnt);
+ atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt);
+- mutex_unlock(&qp_ctx->req_lock);
++ spin_unlock_bh(&qp_ctx->req_lock);
+ return -EBUSY;
+ }
+- mutex_unlock(&qp_ctx->req_lock);
++ spin_unlock_bh(&qp_ctx->req_lock);
+
+ if (unlikely(ret == -EBUSY))
+ return -ENOBUFS;
+@@ -484,7 +484,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
+
+ qp->req_cb = sec_req_cb;
+
+- mutex_init(&qp_ctx->req_lock);
++ spin_lock_init(&qp_ctx->req_lock);
+ idr_init(&qp_ctx->req_idr);
+ INIT_LIST_HEAD(&qp_ctx->backlog);
+
+@@ -617,7 +617,7 @@ static int sec_auth_init(struct sec_ctx *ctx)
+ {
+ struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+
+- a_ctx->a_key = dma_alloc_coherent(ctx->dev, SEC_MAX_KEY_SIZE,
++ a_ctx->a_key = dma_alloc_coherent(ctx->dev, SEC_MAX_AKEY_SIZE,
+ &a_ctx->a_key_dma, GFP_KERNEL);
+ if (!a_ctx->a_key)
+ return -ENOMEM;
+@@ -629,8 +629,8 @@ static void sec_auth_uninit(struct sec_ctx *ctx)
+ {
+ struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+
+- memzero_explicit(a_ctx->a_key, SEC_MAX_KEY_SIZE);
+- dma_free_coherent(ctx->dev, SEC_MAX_KEY_SIZE,
++ memzero_explicit(a_ctx->a_key, SEC_MAX_AKEY_SIZE);
++ dma_free_coherent(ctx->dev, SEC_MAX_AKEY_SIZE,
+ a_ctx->a_key, a_ctx->a_key_dma);
+ }
+
+@@ -1373,7 +1373,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
+ {
+ struct sec_req *backlog_req = NULL;
+
+- mutex_lock(&qp_ctx->req_lock);
++ spin_lock_bh(&qp_ctx->req_lock);
+ if (ctx->fake_req_limit >=
+ atomic_read(&qp_ctx->qp->qp_status.used) &&
+ !list_empty(&qp_ctx->backlog)) {
+@@ -1381,7 +1381,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
+ typeof(*backlog_req), backlog_head);
+ list_del(&backlog_req->backlog_head);
+ }
+- mutex_unlock(&qp_ctx->req_lock);
++ spin_unlock_bh(&qp_ctx->req_lock);
+
+ return backlog_req;
+ }
+@@ -2284,9 +2284,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
+ struct aead_request *aead_req,
+ bool encrypt)
+ {
+- struct aead_request *subreq = aead_request_ctx(aead_req);
+ struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+ struct device *dev = ctx->dev;
++ struct aead_request *subreq;
++ int ret;
+
+ /* Kunpeng920 aead mode not support input 0 size */
+ if (!a_ctx->fallback_aead_tfm) {
+@@ -2294,6 +2295,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
+ return -EINVAL;
+ }
+
++ subreq = aead_request_alloc(a_ctx->fallback_aead_tfm, GFP_KERNEL);
++ if (!subreq)
++ return -ENOMEM;
++
+ aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm);
+ aead_request_set_callback(subreq, aead_req->base.flags,
+ aead_req->base.complete, aead_req->base.data);
+@@ -2301,8 +2306,13 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
+ aead_req->cryptlen, aead_req->iv);
+ aead_request_set_ad(subreq, aead_req->assoclen);
+
+- return encrypt ? crypto_aead_encrypt(subreq) :
+- crypto_aead_decrypt(subreq);
++ if (encrypt)
++ ret = crypto_aead_encrypt(subreq);
++ else
++ ret = crypto_aead_decrypt(subreq);
++ aead_request_free(subreq);
++
++ return ret;
+ }
+
+ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
+diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
+index 9f71c358a6d35..ee2edaf5058df 100644
+--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
++++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
+@@ -7,6 +7,7 @@
+ #define SEC_AIV_SIZE 12
+ #define SEC_IV_SIZE 24
+ #define SEC_MAX_KEY_SIZE 64
++#define SEC_MAX_AKEY_SIZE 128
+ #define SEC_COMM_SCENE 0
+ #define SEC_MIN_BLOCK_SZ 1
+
+diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
+index 90551bf38b523..03d239cfdf8c6 100644
+--- a/drivers/crypto/hisilicon/sec2/sec_main.c
++++ b/drivers/crypto/hisilicon/sec2/sec_main.c
+@@ -443,9 +443,11 @@ static int sec_engine_init(struct hisi_qm *qm)
+
+ writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
+
+- /* Enable sm4 extra mode, as ctr/ecb */
+- writel_relaxed(SEC_BD_ERR_CHK_EN0,
+- qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
++ /* HW V2 enable sm4 extra mode, as ctr/ecb */
++ if (qm->ver < QM_HW_V3)
++ writel_relaxed(SEC_BD_ERR_CHK_EN0,
++ qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
++
+ /* Enable sm4 xts mode multiple iv */
+ writel_relaxed(SEC_BD_ERR_CHK_EN1,
+ qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
+diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
+index 057273769f264..3dbe5405d17bc 100644
+--- a/drivers/crypto/hisilicon/sgl.c
++++ b/drivers/crypto/hisilicon/sgl.c
+@@ -122,9 +122,8 @@ err_free_mem:
+ for (j = 0; j < i; j++) {
+ dma_free_coherent(dev, block_size, block[j].sgl,
+ block[j].sgl_dma);
+- memset(block + j, 0, sizeof(*block));
+ }
+- kfree(pool);
++ kfree_sensitive(pool);
+ return ERR_PTR(-ENOMEM);
+ }
+ EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
+diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
+index 9520a4113c81e..a91e6e0e9c693 100644
+--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
++++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
+@@ -122,12 +122,12 @@ static int sgl_sge_nr_set(const char *val, const struct kernel_param *kp)
+ if (ret || n == 0 || n > HISI_ACC_SGL_SGE_NR_MAX)
+ return -EINVAL;
+
+- return param_set_int(val, kp);
++ return param_set_ushort(val, kp);
+ }
+
+ static const struct kernel_param_ops sgl_sge_nr_ops = {
+ .set = sgl_sge_nr_set,
+- .get = param_get_int,
++ .get = param_get_ushort,
+ };
+
+ static u16 sgl_sge_nr = HZIP_SGL_SGE_NR;
+diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
+index aa4c7b2af3e2e..34b41cbcfa8de 100644
+--- a/drivers/crypto/img-hash.c
++++ b/drivers/crypto/img-hash.c
+@@ -358,12 +358,16 @@ static int img_hash_dma_init(struct img_hash_dev *hdev)
+ static void img_hash_dma_task(unsigned long d)
+ {
+ struct img_hash_dev *hdev = (struct img_hash_dev *)d;
+- struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
++ struct img_hash_request_ctx *ctx;
+ u8 *addr;
+ size_t nbytes, bleft, wsend, len, tbc;
+ struct scatterlist tsg;
+
+- if (!hdev->req || !ctx->sg)
++ if (!hdev->req)
++ return;
++
++ ctx = ahash_request_ctx(hdev->req);
++ if (!ctx->sg)
+ return;
+
+ addr = sg_virt(ctx->sg);
+diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
+index 9ff885d50edfc..7fa6c9144e495 100644
+--- a/drivers/crypto/inside-secure/safexcel.c
++++ b/drivers/crypto/inside-secure/safexcel.c
+@@ -1631,19 +1631,23 @@ static int safexcel_probe_generic(void *pdev,
+ &priv->ring[i].rdr);
+ if (ret) {
+ dev_err(dev, "Failed to initialize rings\n");
+- return ret;
++ goto err_cleanup_rings;
+ }
+
+ priv->ring[i].rdr_req = devm_kcalloc(dev,
+ EIP197_DEFAULT_RING_SIZE,
+ sizeof(*priv->ring[i].rdr_req),
+ GFP_KERNEL);
+- if (!priv->ring[i].rdr_req)
+- return -ENOMEM;
++ if (!priv->ring[i].rdr_req) {
++ ret = -ENOMEM;
++ goto err_cleanup_rings;
++ }
+
+ ring_irq = devm_kzalloc(dev, sizeof(*ring_irq), GFP_KERNEL);
+- if (!ring_irq)
+- return -ENOMEM;
++ if (!ring_irq) {
++ ret = -ENOMEM;
++ goto err_cleanup_rings;
++ }
+
+ ring_irq->priv = priv;
+ ring_irq->ring = i;
+@@ -1657,7 +1661,8 @@ static int safexcel_probe_generic(void *pdev,
+ ring_irq);
+ if (irq < 0) {
+ dev_err(dev, "Failed to get IRQ ID for ring %d\n", i);
+- return irq;
++ ret = irq;
++ goto err_cleanup_rings;
+ }
+
+ priv->ring[i].irq = irq;
+@@ -1669,8 +1674,10 @@ static int safexcel_probe_generic(void *pdev,
+ snprintf(wq_name, 9, "wq_ring%d", i);
+ priv->ring[i].workqueue =
+ create_singlethread_workqueue(wq_name);
+- if (!priv->ring[i].workqueue)
+- return -ENOMEM;
++ if (!priv->ring[i].workqueue) {
++ ret = -ENOMEM;
++ goto err_cleanup_rings;
++ }
+
+ priv->ring[i].requests = 0;
+ priv->ring[i].busy = false;
+@@ -1687,16 +1694,26 @@ static int safexcel_probe_generic(void *pdev,
+ ret = safexcel_hw_init(priv);
+ if (ret) {
+ dev_err(dev, "HW init failed (%d)\n", ret);
+- return ret;
++ goto err_cleanup_rings;
+ }
+
+ ret = safexcel_register_algorithms(priv);
+ if (ret) {
+ dev_err(dev, "Failed to register algorithms (%d)\n", ret);
+- return ret;
++ goto err_cleanup_rings;
+ }
+
+ return 0;
++
++err_cleanup_rings:
++ for (i = 0; i < priv->config.rings; i++) {
++ if (priv->ring[i].irq)
++ irq_set_affinity_hint(priv->ring[i].irq, NULL);
++ if (priv->ring[i].workqueue)
++ destroy_workqueue(priv->ring[i].workqueue);
++ }
++
++ return ret;
+ }
+
+ static void safexcel_hw_reset_rings(struct safexcel_crypto_priv *priv)
+@@ -1831,6 +1848,8 @@ static const struct of_device_id safexcel_of_match_table[] = {
+ {},
+ };
+
++MODULE_DEVICE_TABLE(of, safexcel_of_match_table);
++
+ static struct platform_driver crypto_safexcel = {
+ .probe = safexcel_probe,
+ .remove = safexcel_remove,
+diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
+index bc60b58022564..2124416742f84 100644
+--- a/drivers/crypto/inside-secure/safexcel_hash.c
++++ b/drivers/crypto/inside-secure/safexcel_hash.c
+@@ -383,7 +383,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
+ u32 x;
+
+ x = ipad[i] ^ ipad[i + 4];
+- cache[i] ^= swab(x);
++ cache[i] ^= swab32(x);
+ }
+ }
+ cache_len = AES_BLOCK_SIZE;
+@@ -821,7 +821,7 @@ static int safexcel_ahash_final(struct ahash_request *areq)
+ u32 *result = (void *)areq->result;
+
+ /* K3 */
+- result[i] = swab(ctx->base.ipad.word[i + 4]);
++ result[i] = swab32(ctx->base.ipad.word[i + 4]);
+ }
+ areq->result[0] ^= 0x80; // 10- padding
+ crypto_cipher_encrypt_one(ctx->kaes, areq->result, areq->result);
+@@ -2106,7 +2106,7 @@ static int safexcel_xcbcmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+ crypto_cipher_encrypt_one(ctx->kaes, (u8 *)key_tmp + AES_BLOCK_SIZE,
+ "\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3");
+ for (i = 0; i < 3 * AES_BLOCK_SIZE / sizeof(u32); i++)
+- ctx->base.ipad.word[i] = swab(key_tmp[i]);
++ ctx->base.ipad.word[i] = swab32(key_tmp[i]);
+
+ crypto_cipher_clear_flags(ctx->kaes, CRYPTO_TFM_REQ_MASK);
+ crypto_cipher_set_flags(ctx->kaes, crypto_ahash_get_flags(tfm) &
+@@ -2189,7 +2189,7 @@ static int safexcel_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+ return ret;
+
+ for (i = 0; i < len / sizeof(u32); i++)
+- ctx->base.ipad.word[i + 8] = swab(aes.key_enc[i]);
++ ctx->base.ipad.word[i + 8] = swab32(aes.key_enc[i]);
+
+ /* precompute the CMAC key material */
+ crypto_cipher_clear_flags(ctx->kaes, CRYPTO_TFM_REQ_MASK);
+diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c
+index b739d3b873dcf..0f37dfd42d850 100644
+--- a/drivers/crypto/marvell/cesa/cipher.c
++++ b/drivers/crypto/marvell/cesa/cipher.c
+@@ -297,7 +297,7 @@ static int mv_cesa_des_setkey(struct crypto_skcipher *cipher, const u8 *key,
+ static int mv_cesa_des3_ede_setkey(struct crypto_skcipher *cipher,
+ const u8 *key, unsigned int len)
+ {
+- struct mv_cesa_des_ctx *ctx = crypto_skcipher_ctx(cipher);
++ struct mv_cesa_des3_ctx *ctx = crypto_skcipher_ctx(cipher);
+ int err;
+
+ err = verify_skcipher_des3_key(cipher, key);
+@@ -624,7 +624,6 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = {
+ .decrypt = mv_cesa_ecb_des3_ede_decrypt,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+- .ivsize = DES3_EDE_BLOCK_SIZE,
+ .base = {
+ .cra_name = "ecb(des3_ede)",
+ .cra_driver_name = "mv-ecb-des3-ede",
+diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
+index 40b482198ebc5..a765eefb18c2f 100644
+--- a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
++++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
+@@ -286,6 +286,7 @@ static int process_tar_file(struct device *dev,
+ struct tar_ucode_info_t *tar_info;
+ struct otx_cpt_ucode_hdr *ucode_hdr;
+ int ucode_type, ucode_size;
++ unsigned int code_length;
+
+ /*
+ * If size is less than microcode header size then don't report
+@@ -303,7 +304,13 @@ static int process_tar_file(struct device *dev,
+ if (get_ucode_type(ucode_hdr, &ucode_type))
+ return 0;
+
+- ucode_size = ntohl(ucode_hdr->code_length) * 2;
++ code_length = ntohl(ucode_hdr->code_length);
++ if (code_length >= INT_MAX / 2) {
++ dev_err(dev, "Invalid code_length %u\n", code_length);
++ return -EINVAL;
++ }
++
++ ucode_size = code_length * 2;
+ if (!ucode_size || (size < round_up(ucode_size, 16) +
+ sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
+ dev_err(dev, "Ucode %s invalid size\n", filename);
+@@ -886,6 +893,7 @@ static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode,
+ {
+ struct otx_cpt_ucode_hdr *ucode_hdr;
+ const struct firmware *fw;
++ unsigned int code_length;
+ int ret;
+
+ set_ucode_filename(ucode, ucode_filename);
+@@ -896,7 +904,13 @@ static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode,
+ ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data;
+ memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ);
+ ucode->ver_num = ucode_hdr->ver_num;
+- ucode->size = ntohl(ucode_hdr->code_length) * 2;
++ code_length = ntohl(ucode_hdr->code_length);
++ if (code_length >= INT_MAX / 2) {
++ dev_err(dev, "Ucode invalid code_length %u\n", code_length);
++ ret = -EINVAL;
++ goto release_fw;
++ }
++ ucode->size = code_length * 2;
+ if (!ucode->size || (fw->size < round_up(ucode->size, 16)
+ + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) {
+ dev_err(dev, "Ucode %s invalid size\n", ucode_filename);
+diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+index 146a55ac4b9b0..be1ad55a208f6 100644
+--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
++++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+@@ -494,12 +494,11 @@ static ssize_t kvf_limits_store(struct device *dev,
+ {
+ struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev);
+ int lfs_num;
++ int ret;
+
+- if (kstrtoint(buf, 0, &lfs_num)) {
+- dev_err(dev, "lfs count %d must be in range [1 - %d]\n",
+- lfs_num, num_online_cpus());
+- return -EINVAL;
+- }
++ ret = kstrtoint(buf, 0, &lfs_num);
++ if (ret)
++ return ret;
+ if (lfs_num < 1 || lfs_num > num_online_cpus()) {
+ dev_err(dev, "lfs count %d must be in range [1 - %d]\n",
+ lfs_num, num_online_cpus());
+diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+index dff34b3ec09e1..7c1b92aaab398 100644
+--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
++++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+@@ -29,7 +29,8 @@ static struct otx2_cpt_bitmap get_cores_bmap(struct device *dev,
+ bool found = false;
+ int i;
+
+- if (eng_grp->g->engs_num > OTX2_CPT_MAX_ENGINES) {
++ if (eng_grp->g->engs_num < 0 ||
++ eng_grp->g->engs_num > OTX2_CPT_MAX_ENGINES) {
+ dev_err(dev, "unsupported number of engines %d on octeontx2\n",
+ eng_grp->g->engs_num);
+ return bmap;
+diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
+index a72723455df72..570074e23b60e 100644
+--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
++++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
+@@ -1274,6 +1274,7 @@ static int aead_do_fallback(struct aead_request *req, bool is_enc)
+ req->base.complete, req->base.data);
+ aead_request_set_crypt(&rctx->fbk_req, req->src,
+ req->dst, req->cryptlen, req->iv);
++ aead_request_set_ad(&rctx->fbk_req, req->assoclen);
+ ret = is_enc ? crypto_aead_encrypt(&rctx->fbk_req) :
+ crypto_aead_decrypt(&rctx->fbk_req);
+ } else {
+@@ -1633,16 +1634,13 @@ static inline int cpt_register_algs(void)
+ {
+ int i, err = 0;
+
+- if (!IS_ENABLED(CONFIG_DM_CRYPT)) {
+- for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
+- otx2_cpt_skciphers[i].base.cra_flags &=
+- ~CRYPTO_ALG_DEAD;
++ for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
++ otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+- err = crypto_register_skciphers(otx2_cpt_skciphers,
+- ARRAY_SIZE(otx2_cpt_skciphers));
+- if (err)
+- return err;
+- }
++ err = crypto_register_skciphers(otx2_cpt_skciphers,
++ ARRAY_SIZE(otx2_cpt_skciphers));
++ if (err)
++ return err;
+
+ for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++)
+ otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
+index d19e5ffb5104b..d6f9e2fe863d7 100644
+--- a/drivers/crypto/mxs-dcp.c
++++ b/drivers/crypto/mxs-dcp.c
+@@ -331,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
+ memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
+ }
+
+- for_each_sg(req->src, src, sg_nents(src), i) {
++ for_each_sg(req->src, src, sg_nents(req->src), i) {
+ src_buf = sg_virt(src);
+ len = sg_dma_len(src);
+ tlen += len;
+diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
+index 3b0bf6fea491a..b4db560105a9e 100644
+--- a/drivers/crypto/n2_core.c
++++ b/drivers/crypto/n2_core.c
+@@ -1229,6 +1229,7 @@ struct n2_hash_tmpl {
+ const u8 *hash_init;
+ u8 hw_op_hashsz;
+ u8 digest_size;
++ u8 statesize;
+ u8 block_size;
+ u8 auth_type;
+ u8 hmac_type;
+@@ -1260,6 +1261,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
+ .hmac_type = AUTH_TYPE_HMAC_MD5,
+ .hw_op_hashsz = MD5_DIGEST_SIZE,
+ .digest_size = MD5_DIGEST_SIZE,
++ .statesize = sizeof(struct md5_state),
+ .block_size = MD5_HMAC_BLOCK_SIZE },
+ { .name = "sha1",
+ .hash_zero = sha1_zero_message_hash,
+@@ -1268,6 +1270,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
+ .hmac_type = AUTH_TYPE_HMAC_SHA1,
+ .hw_op_hashsz = SHA1_DIGEST_SIZE,
+ .digest_size = SHA1_DIGEST_SIZE,
++ .statesize = sizeof(struct sha1_state),
+ .block_size = SHA1_BLOCK_SIZE },
+ { .name = "sha256",
+ .hash_zero = sha256_zero_message_hash,
+@@ -1276,6 +1279,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
+ .hmac_type = AUTH_TYPE_HMAC_SHA256,
+ .hw_op_hashsz = SHA256_DIGEST_SIZE,
+ .digest_size = SHA256_DIGEST_SIZE,
++ .statesize = sizeof(struct sha256_state),
+ .block_size = SHA256_BLOCK_SIZE },
+ { .name = "sha224",
+ .hash_zero = sha224_zero_message_hash,
+@@ -1284,6 +1288,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
+ .hmac_type = AUTH_TYPE_RESERVED,
+ .hw_op_hashsz = SHA256_DIGEST_SIZE,
+ .digest_size = SHA224_DIGEST_SIZE,
++ .statesize = sizeof(struct sha256_state),
+ .block_size = SHA224_BLOCK_SIZE },
+ };
+ #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls)
+@@ -1424,6 +1429,7 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
+
+ halg = &ahash->halg;
+ halg->digestsize = tmpl->digest_size;
++ halg->statesize = tmpl->statesize;
+
+ base = &halg->base;
+ snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
+index d00181a26dd65..483cef62acee8 100644
+--- a/drivers/crypto/nx/Makefile
++++ b/drivers/crypto/nx/Makefile
+@@ -1,7 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0
+ obj-$(CONFIG_CRYPTO_DEV_NX_ENCRYPT) += nx-crypto.o
+ nx-crypto-objs := nx.o \
+- nx_debugfs.o \
+ nx-aes-cbc.o \
+ nx-aes-ecb.o \
+ nx-aes-gcm.o \
+@@ -11,6 +10,7 @@ nx-crypto-objs := nx.o \
+ nx-sha256.o \
+ nx-sha512.o
+
++nx-crypto-$(CONFIG_DEBUG_FS) += nx_debugfs.o
+ obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o nx-compress.o
+ obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o nx-compress.o
+ nx-compress-objs := nx-842.o
+diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c
+index 32a036ada5d0a..f418817c0f43e 100644
+--- a/drivers/crypto/nx/nx-common-powernv.c
++++ b/drivers/crypto/nx/nx-common-powernv.c
+@@ -827,7 +827,7 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
+ goto err_out;
+
+ vas_init_rx_win_attr(&rxattr, coproc->ct);
+- rxattr.rx_fifo = (void *)rx_fifo;
++ rxattr.rx_fifo = rx_fifo;
+ rxattr.rx_fifo_size = fifo_size;
+ rxattr.lnotify_lpid = lpid;
+ rxattr.lnotify_pid = pid;
+diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
+index c6233173c612e..2697baebb6a35 100644
+--- a/drivers/crypto/nx/nx.h
++++ b/drivers/crypto/nx/nx.h
+@@ -170,8 +170,8 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *, unsigned int,
+ void nx_debugfs_init(struct nx_crypto_driver *);
+ void nx_debugfs_fini(struct nx_crypto_driver *);
+ #else
+-#define NX_DEBUGFS_INIT(drv) (0)
+-#define NX_DEBUGFS_FINI(drv) (0)
++#define NX_DEBUGFS_INIT(drv) do {} while (0)
++#define NX_DEBUGFS_FINI(drv) do {} while (0)
+ #endif
+
+ #define NX_PAGE_NUM(x) ((u64)(x) & 0xfffffffffffff000ULL)
+diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
+index 9b968ac4ee7b6..a196bb8b17010 100644
+--- a/drivers/crypto/omap-aes.c
++++ b/drivers/crypto/omap-aes.c
+@@ -1302,7 +1302,7 @@ static int omap_aes_suspend(struct device *dev)
+
+ static int omap_aes_resume(struct device *dev)
+ {
+- pm_runtime_resume_and_get(dev);
++ pm_runtime_get_sync(dev);
+ return 0;
+ }
+ #endif
+diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
+index f6bf53c00b614..4ec6949a7ca9e 100644
+--- a/drivers/crypto/omap-sham.c
++++ b/drivers/crypto/omap-sham.c
+@@ -2114,7 +2114,7 @@ static int omap_sham_probe(struct platform_device *pdev)
+
+ pm_runtime_enable(dev);
+
+- err = pm_runtime_get_sync(dev);
++ err = pm_runtime_resume_and_get(dev);
+ if (err < 0) {
+ dev_err(dev, "failed to get sync: %d\n", err);
+ goto err_pm;
+diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+index 33d8e50dcbdac..88c0ded411f15 100644
+--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
++++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+ /* Copyright(c) 2020 Intel Corporation */
++#include <linux/iopoll.h>
+ #include <adf_accel_devices.h>
+ #include <adf_common_drv.h>
+ #include <adf_pf2vf_msg.h>
+@@ -161,6 +162,35 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
+ ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0);
+ }
+
++static int adf_init_device(struct adf_accel_dev *accel_dev)
++{
++ void __iomem *addr;
++ u32 status;
++ u32 csr;
++ int ret;
++
++ addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr;
++
++ /* Temporarily mask PM interrupt */
++ csr = ADF_CSR_RD(addr, ADF_4XXX_ERRMSK2);
++ csr |= ADF_4XXX_PM_SOU;
++ ADF_CSR_WR(addr, ADF_4XXX_ERRMSK2, csr);
++
++ /* Set DRV_ACTIVE bit to power up the device */
++ ADF_CSR_WR(addr, ADF_4XXX_PM_INTERRUPT, ADF_4XXX_PM_DRV_ACTIVE);
++
++ /* Poll status register to make sure the device is powered up */
++ ret = read_poll_timeout(ADF_CSR_RD, status,
++ status & ADF_4XXX_PM_INIT_STATE,
++ ADF_4XXX_PM_POLL_DELAY_US,
++ ADF_4XXX_PM_POLL_TIMEOUT_US, true, addr,
++ ADF_4XXX_PM_STATUS);
++ if (ret)
++ dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n");
++
++ return ret;
++}
++
+ static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
+ {
+ return 0;
+@@ -215,6 +245,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
+ hw_data->exit_arb = adf_exit_arb;
+ hw_data->get_arb_mapping = adf_get_arbiter_mapping;
+ hw_data->enable_ints = adf_enable_ints;
++ hw_data->init_device = adf_init_device;
+ hw_data->reset_device = adf_reset_flr;
+ hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK;
+ hw_data->uof_get_num_objs = uof_get_num_objs;
+diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
+index 4fe2a776293c2..924bac6feb372 100644
+--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
++++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
+@@ -62,6 +62,16 @@
+ #define ADF_4XXX_ADMINMSGLR_OFFSET (0x500578)
+ #define ADF_4XXX_MAILBOX_BASE_OFFSET (0x600970)
+
++/* Power management */
++#define ADF_4XXX_PM_POLL_DELAY_US 20
++#define ADF_4XXX_PM_POLL_TIMEOUT_US USEC_PER_SEC
++#define ADF_4XXX_PM_STATUS (0x50A00C)
++#define ADF_4XXX_PM_INTERRUPT (0x50A028)
++#define ADF_4XXX_PM_DRV_ACTIVE BIT(20)
++#define ADF_4XXX_PM_INIT_STATE BIT(21)
++/* Power management source in ERRSOU2 and ERRMSK2 */
++#define ADF_4XXX_PM_SOU BIT(18)
++
+ /* Firmware Binaries */
+ #define ADF_4XXX_FW "qat_4xxx.bin"
+ #define ADF_4XXX_MMP "qat_4xxx_mmp.bin"
+diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
+index 9c57abdf56b78..fc477f0162135 100644
+--- a/drivers/crypto/qat/qat_common/Makefile
++++ b/drivers/crypto/qat/qat_common/Makefile
+@@ -15,6 +15,7 @@ intel_qat-objs := adf_cfg.o \
+ qat_crypto.o \
+ qat_algs.o \
+ qat_asym_algs.o \
++ qat_algs_send.o \
+ qat_uclo.o \
+ qat_hal.o
+
+diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
+index 38c0af6d4e43e..580566cfcb04c 100644
+--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
++++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
+@@ -166,6 +166,7 @@ struct adf_hw_device_data {
+ int (*init_arb)(struct adf_accel_dev *accel_dev);
+ void (*exit_arb)(struct adf_accel_dev *accel_dev);
+ const u32 *(*get_arb_mapping)(void);
++ int (*init_device)(struct adf_accel_dev *accel_dev);
+ void (*disable_iov)(struct adf_accel_dev *accel_dev);
+ void (*configure_iov_threads)(struct adf_accel_dev *accel_dev,
+ bool enable);
+diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
+index 4261749fae8d4..75693ca4afea1 100644
+--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
++++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
+@@ -49,11 +49,6 @@ struct service_hndl {
+ struct list_head list;
+ };
+
+-static inline int get_current_node(void)
+-{
+- return topology_physical_package_id(raw_smp_processor_id());
+-}
+-
+ int adf_service_register(struct service_hndl *service);
+ int adf_service_unregister(struct service_hndl *service);
+
+diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c
+index 9e560c7d41630..0ba62b286a85e 100644
+--- a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c
++++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c
+@@ -161,21 +161,33 @@ u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev)
+ u32 legfuses;
+ u32 capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC |
+ ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC |
+- ICP_ACCEL_CAPABILITIES_AUTHENTICATION;
++ ICP_ACCEL_CAPABILITIES_AUTHENTICATION |
++ ICP_ACCEL_CAPABILITIES_CIPHER |
++ ICP_ACCEL_CAPABILITIES_COMPRESSION;
+
+ /* Read accelerator capabilities mask */
+ pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses);
+
+- if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE)
++ /* A set bit in legfuses means the feature is OFF in this SKU */
++ if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) {
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC;
++ capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
++ }
+ if (legfuses & ICP_ACCEL_MASK_PKE_SLICE)
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC;
+- if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE)
++ if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) {
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION;
++ capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
++ }
++ if (legfuses & ICP_ACCEL_MASK_COMPRESS_SLICE)
++ capabilities &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION;
+
+ if ((straps | fuses) & ADF_POWERGATE_PKE)
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC;
+
++ if ((straps | fuses) & ADF_POWERGATE_DC)
++ capabilities &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION;
++
+ return capabilities;
+ }
+ EXPORT_SYMBOL_GPL(adf_gen2_get_accel_cap);
+diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h
+index 756b0ddfac5e1..2aaf02ccbb3af 100644
+--- a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h
++++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h
+@@ -111,6 +111,7 @@ do { \
+ (ADF_ARB_REG_SLOT * (index)), value)
+
+ /* Power gating */
++#define ADF_POWERGATE_DC BIT(23)
+ #define ADF_POWERGATE_PKE BIT(24)
+
+ /* WDT timers
+diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
+index b8fca1ff7aab0..0b7086cae00bd 100644
+--- a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
++++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
+@@ -99,7 +99,7 @@ do { \
+ * Timeout is in cycles. Clock speed may vary across products but this
+ * value should be a few milli-seconds.
+ */
+-#define ADF_SSM_WDT_DEFAULT_VALUE 0x200000
++#define ADF_SSM_WDT_DEFAULT_VALUE 0x7000000ULL
+ #define ADF_SSM_WDT_PKE_DEFAULT_VALUE 0x8000000
+ #define ADF_SSMWDTL_OFFSET 0x54
+ #define ADF_SSMWDTH_OFFSET 0x5C
+diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
+index 60bc7b991d351..e3749e5817d94 100644
+--- a/drivers/crypto/qat/qat_common/adf_init.c
++++ b/drivers/crypto/qat/qat_common/adf_init.c
+@@ -79,6 +79,11 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
+ return -EFAULT;
+ }
+
++ if (hw_data->init_device && hw_data->init_device(accel_dev)) {
++ dev_err(&GET_DEV(accel_dev), "Failed to initialize device\n");
++ return -EFAULT;
++ }
++
+ if (hw_data->init_admin_comms && hw_data->init_admin_comms(accel_dev)) {
+ dev_err(&GET_DEV(accel_dev), "Failed initialize admin comms\n");
+ return -EFAULT;
+diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
+index 976b9ab7617cd..7ec81989beb03 100644
+--- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
++++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
+@@ -117,37 +117,19 @@ static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
+
+ mutex_lock(lock);
+
+- /* Check if PF2VF CSR is in use by remote function */
++ /* Check if the PFVF CSR is in use by remote function */
+ val = ADF_CSR_RD(pmisc_bar_addr, pf2vf_offset);
+ if ((val & remote_in_use_mask) == remote_in_use_pattern) {
+ dev_dbg(&GET_DEV(accel_dev),
+- "PF2VF CSR in use by remote function\n");
++ "PFVF CSR in use by remote function\n");
+ ret = -EBUSY;
+ goto out;
+ }
+
+- /* Attempt to get ownership of PF2VF CSR */
+ msg &= ~local_in_use_mask;
+ msg |= local_in_use_pattern;
+- ADF_CSR_WR(pmisc_bar_addr, pf2vf_offset, msg);
+
+- /* Wait in case remote func also attempting to get ownership */
+- msleep(ADF_IOV_MSG_COLLISION_DETECT_DELAY);
+-
+- val = ADF_CSR_RD(pmisc_bar_addr, pf2vf_offset);
+- if ((val & local_in_use_mask) != local_in_use_pattern) {
+- dev_dbg(&GET_DEV(accel_dev),
+- "PF2VF CSR in use by remote - collision detected\n");
+- ret = -EBUSY;
+- goto out;
+- }
+-
+- /*
+- * This function now owns the PV2VF CSR. The IN_USE_BY pattern must
+- * remain in the PF2VF CSR for all writes including ACK from remote
+- * until this local function relinquishes the CSR. Send the message
+- * by interrupting the remote.
+- */
++ /* Attempt to get ownership of the PFVF CSR */
+ ADF_CSR_WR(pmisc_bar_addr, pf2vf_offset, msg | int_bit);
+
+ /* Wait for confirmation from remote func it received the message */
+@@ -162,7 +144,14 @@ static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
+ ret = -EIO;
+ }
+
+- /* Finished with PF2VF CSR; relinquish it and leave msg in CSR */
++ if (val != msg) {
++ dev_dbg(&GET_DEV(accel_dev),
++ "Collision - PFVF CSR overwritten by remote function\n");
++ ret = -EIO;
++ goto out;
++ }
++
++ /* Finished with the PFVF CSR; relinquish it and leave msg in CSR */
+ ADF_CSR_WR(pmisc_bar_addr, pf2vf_offset, val & ~local_in_use_mask);
+ out:
+ mutex_unlock(lock);
+@@ -170,12 +159,13 @@ out:
+ }
+
+ /**
+- * adf_iov_putmsg() - send PF2VF message
++ * adf_iov_putmsg() - send PFVF message
+ * @accel_dev: Pointer to acceleration device.
+ * @msg: Message to send
+- * @vf_nr: VF number to which the message will be sent
++ * @vf_nr: VF number to which the message will be sent if on PF, ignored
++ * otherwise
+ *
+- * Function sends a message from the PF to a VF
++ * Function sends a message through the PFVF channel
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+@@ -204,6 +194,11 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
+
+ /* Read message from the VF */
+ msg = ADF_CSR_RD(pmisc_addr, hw_data->get_pf2vf_offset(vf_nr));
++ if (!(msg & ADF_VF2PF_INT)) {
++ dev_info(&GET_DEV(accel_dev),
++ "Spurious VF2PF interrupt, msg %X. Ignored\n", msg);
++ goto out;
++ }
+
+ /* To ACK, clear the VF2PFINT bit */
+ msg &= ~ADF_VF2PF_INT;
+@@ -287,6 +282,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
+ if (resp && adf_iov_putmsg(accel_dev, resp, vf_nr))
+ dev_err(&GET_DEV(accel_dev), "Failed to send response to VF\n");
+
++out:
+ /* re-enable interrupt on PF from this VF */
+ adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr));
+
+diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c
+index 8ba28409fb74b..630d0483c4e0a 100644
+--- a/drivers/crypto/qat/qat_common/adf_transport.c
++++ b/drivers/crypto/qat/qat_common/adf_transport.c
+@@ -8,6 +8,9 @@
+ #include "adf_cfg.h"
+ #include "adf_common_drv.h"
+
++#define ADF_MAX_RING_THRESHOLD 80
++#define ADF_PERCENT(tot, percent) (((tot) * (percent)) / 100)
++
+ static inline u32 adf_modulo(u32 data, u32 shift)
+ {
+ u32 div = data >> shift;
+@@ -77,6 +80,11 @@ static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, u32 ring)
+ bank->irq_mask);
+ }
+
++bool adf_ring_nearly_full(struct adf_etr_ring_data *ring)
++{
++ return atomic_read(ring->inflights) > ring->threshold;
++}
++
+ int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg)
+ {
+ struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev);
+@@ -217,6 +225,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
+ struct adf_etr_bank_data *bank;
+ struct adf_etr_ring_data *ring;
+ char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
++ int max_inflights;
+ u32 ring_num;
+ int ret;
+
+@@ -263,6 +272,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
+ ring->ring_size = adf_verify_ring_size(msg_size, num_msgs);
+ ring->head = 0;
+ ring->tail = 0;
++ max_inflights = ADF_MAX_INFLIGHTS(ring->ring_size, ring->msg_size);
++ ring->threshold = ADF_PERCENT(max_inflights, ADF_MAX_RING_THRESHOLD);
+ atomic_set(ring->inflights, 0);
+ ret = adf_init_ring(ring);
+ if (ret)
+diff --git a/drivers/crypto/qat/qat_common/adf_transport.h b/drivers/crypto/qat/qat_common/adf_transport.h
+index 2c95f1697c76f..e6ef6f9b76913 100644
+--- a/drivers/crypto/qat/qat_common/adf_transport.h
++++ b/drivers/crypto/qat/qat_common/adf_transport.h
+@@ -14,6 +14,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
+ const char *ring_name, adf_callback_fn callback,
+ int poll_mode, struct adf_etr_ring_data **ring_ptr);
+
++bool adf_ring_nearly_full(struct adf_etr_ring_data *ring);
+ int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg);
+ void adf_remove_ring(struct adf_etr_ring_data *ring);
+ #endif
+diff --git a/drivers/crypto/qat/qat_common/adf_transport_internal.h b/drivers/crypto/qat/qat_common/adf_transport_internal.h
+index 501bcf0f1809a..8b2c92ba7ca1f 100644
+--- a/drivers/crypto/qat/qat_common/adf_transport_internal.h
++++ b/drivers/crypto/qat/qat_common/adf_transport_internal.h
+@@ -22,6 +22,7 @@ struct adf_etr_ring_data {
+ spinlock_t lock; /* protects ring data struct */
+ u16 head;
+ u16 tail;
++ u32 threshold;
+ u8 ring_number;
+ u8 ring_size;
+ u8 msg_size;
+diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c
+index 7828a6573f3e2..2e300c255ab94 100644
+--- a/drivers/crypto/qat/qat_common/adf_vf_isr.c
++++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c
+@@ -101,6 +101,11 @@ static void adf_pf2vf_bh_handler(void *data)
+
+ /* Read the message from PF */
+ msg = ADF_CSR_RD(pmisc_bar_addr, hw_data->get_pf2vf_offset(0));
++ if (!(msg & ADF_PF2VF_INT)) {
++ dev_info(&GET_DEV(accel_dev),
++ "Spurious PF2VF interrupt, msg %X. Ignored\n", msg);
++ goto out;
++ }
+
+ if (!(msg & ADF_PF2VF_MSGORIGIN_SYSTEM))
+ /* Ignore legacy non-system (non-kernel) PF2VF messages */
+@@ -149,6 +154,7 @@ static void adf_pf2vf_bh_handler(void *data)
+ msg &= ~ADF_PF2VF_INT;
+ ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg);
+
++out:
+ /* Re-enable PF2VF interrupts */
+ adf_enable_pf2vf_interrupts(accel_dev);
+ return;
+diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
+index f998ed58457c2..f56ee4cc5ae8b 100644
+--- a/drivers/crypto/qat/qat_common/qat_algs.c
++++ b/drivers/crypto/qat/qat_common/qat_algs.c
+@@ -17,7 +17,7 @@
+ #include <crypto/xts.h>
+ #include <linux/dma-mapping.h>
+ #include "adf_accel_devices.h"
+-#include "adf_transport.h"
++#include "qat_algs_send.h"
+ #include "adf_common_drv.h"
+ #include "qat_crypto.h"
+ #include "icp_qat_hw.h"
+@@ -46,19 +46,6 @@
+ static DEFINE_MUTEX(algs_lock);
+ static unsigned int active_devs;
+
+-struct qat_alg_buf {
+- u32 len;
+- u32 resrvd;
+- u64 addr;
+-} __packed;
+-
+-struct qat_alg_buf_list {
+- u64 resrvd;
+- u32 num_bufs;
+- u32 num_mapped_bufs;
+- struct qat_alg_buf bufers[];
+-} __packed __aligned(64);
+-
+ /* Common content descriptor */
+ struct qat_alg_cd {
+ union {
+@@ -447,8 +434,8 @@ static void qat_alg_skcipher_init_com(struct qat_alg_skcipher_ctx *ctx,
+ } else if (aes_v2_capable && mode == ICP_QAT_HW_CIPHER_CTR_MODE) {
+ ICP_QAT_FW_LA_SLICE_TYPE_SET(header->serv_specif_flags,
+ ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE);
+- keylen = round_up(keylen, 16);
+ memcpy(cd->ucs_aes.key, key, keylen);
++ keylen = round_up(keylen, 16);
+ } else {
+ memcpy(cd->aes.key, key, keylen);
+ }
+@@ -618,7 +605,7 @@ static int qat_alg_aead_newkey(struct crypto_aead *tfm, const u8 *key,
+ {
+ struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct qat_crypto_instance *inst = NULL;
+- int node = get_current_node();
++ int node = numa_node_id();
+ struct device *dev;
+ int ret;
+
+@@ -686,14 +673,20 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
+ dma_addr_t blpout = qat_req->buf.bloutp;
+ size_t sz = qat_req->buf.sz;
+ size_t sz_out = qat_req->buf.sz_out;
++ int bl_dma_dir;
+ int i;
+
++ bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
++
+ for (i = 0; i < bl->num_bufs; i++)
+ dma_unmap_single(dev, bl->bufers[i].addr,
+- bl->bufers[i].len, DMA_BIDIRECTIONAL);
++ bl->bufers[i].len, bl_dma_dir);
+
+ dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
+- kfree(bl);
++
++ if (!qat_req->buf.sgl_src_valid)
++ kfree(bl);
++
+ if (blp != blpout) {
+ /* If out of place operation dma unmap only data */
+ int bufless = blout->num_bufs - blout->num_mapped_bufs;
+@@ -701,17 +694,20 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
+ for (i = bufless; i < blout->num_bufs; i++) {
+ dma_unmap_single(dev, blout->bufers[i].addr,
+ blout->bufers[i].len,
+- DMA_BIDIRECTIONAL);
++ DMA_FROM_DEVICE);
+ }
+ dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE);
+- kfree(blout);
++
++ if (!qat_req->buf.sgl_dst_valid)
++ kfree(blout);
+ }
+ }
+
+ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
+ struct scatterlist *sgl,
+ struct scatterlist *sglout,
+- struct qat_crypto_request *qat_req)
++ struct qat_crypto_request *qat_req,
++ gfp_t flags)
+ {
+ struct device *dev = &GET_DEV(inst->accel_dev);
+ int i, sg_nctr = 0;
+@@ -721,15 +717,27 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
+ dma_addr_t blp = DMA_MAPPING_ERROR;
+ dma_addr_t bloutp = DMA_MAPPING_ERROR;
+ struct scatterlist *sg;
+- size_t sz_out, sz = struct_size(bufl, bufers, n + 1);
++ size_t sz_out, sz = struct_size(bufl, bufers, n);
++ int node = dev_to_node(&GET_DEV(inst->accel_dev));
++ int bufl_dma_dir;
+
+ if (unlikely(!n))
+ return -EINVAL;
+
+- bufl = kzalloc_node(sz, GFP_ATOMIC,
+- dev_to_node(&GET_DEV(inst->accel_dev)));
+- if (unlikely(!bufl))
+- return -ENOMEM;
++ qat_req->buf.sgl_src_valid = false;
++ qat_req->buf.sgl_dst_valid = false;
++
++ if (n > QAT_MAX_BUFF_DESC) {
++ bufl = kzalloc_node(sz, flags, node);
++ if (unlikely(!bufl))
++ return -ENOMEM;
++ } else {
++ bufl = &qat_req->buf.sgl_src.sgl_hdr;
++ memset(bufl, 0, sizeof(struct qat_alg_buf_list));
++ qat_req->buf.sgl_src_valid = true;
++ }
++
++ bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+
+ for_each_sg(sgl, sg, n, i)
+ bufl->bufers[i].addr = DMA_MAPPING_ERROR;
+@@ -742,7 +750,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
+
+ bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg),
+ sg->length,
+- DMA_BIDIRECTIONAL);
++ bufl_dma_dir);
+ bufl->bufers[y].len = sg->length;
+ if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr)))
+ goto err_in;
+@@ -760,12 +768,18 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
+ struct qat_alg_buf *bufers;
+
+ n = sg_nents(sglout);
+- sz_out = struct_size(buflout, bufers, n + 1);
++ sz_out = struct_size(buflout, bufers, n);
+ sg_nctr = 0;
+- buflout = kzalloc_node(sz_out, GFP_ATOMIC,
+- dev_to_node(&GET_DEV(inst->accel_dev)));
+- if (unlikely(!buflout))
+- goto err_in;
++
++ if (n > QAT_MAX_BUFF_DESC) {
++ buflout = kzalloc_node(sz_out, flags, node);
++ if (unlikely(!buflout))
++ goto err_in;
++ } else {
++ buflout = &qat_req->buf.sgl_dst.sgl_hdr;
++ memset(buflout, 0, sizeof(struct qat_alg_buf_list));
++ qat_req->buf.sgl_dst_valid = true;
++ }
+
+ bufers = buflout->bufers;
+ for_each_sg(sglout, sg, n, i)
+@@ -779,7 +793,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
+
+ bufers[y].addr = dma_map_single(dev, sg_virt(sg),
+ sg->length,
+- DMA_BIDIRECTIONAL);
++ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(dev, bufers[y].addr)))
+ goto err_out;
+ bufers[y].len = sg->length;
+@@ -809,8 +823,10 @@ err_out:
+ if (!dma_mapping_error(dev, buflout->bufers[i].addr))
+ dma_unmap_single(dev, buflout->bufers[i].addr,
+ buflout->bufers[i].len,
+- DMA_BIDIRECTIONAL);
+- kfree(buflout);
++ DMA_FROM_DEVICE);
++
++ if (!qat_req->buf.sgl_dst_valid)
++ kfree(buflout);
+
+ err_in:
+ if (!dma_mapping_error(dev, blp))
+@@ -821,9 +837,10 @@ err_in:
+ if (!dma_mapping_error(dev, bufl->bufers[i].addr))
+ dma_unmap_single(dev, bufl->bufers[i].addr,
+ bufl->bufers[i].len,
+- DMA_BIDIRECTIONAL);
++ bufl_dma_dir);
+
+- kfree(bufl);
++ if (!qat_req->buf.sgl_src_valid)
++ kfree(bufl);
+
+ dev_err(dev, "Failed to map buf for dma\n");
+ return -ENOMEM;
+@@ -925,8 +942,25 @@ void qat_alg_callback(void *resp)
+ struct icp_qat_fw_la_resp *qat_resp = resp;
+ struct qat_crypto_request *qat_req =
+ (void *)(__force long)qat_resp->opaque_data;
++ struct qat_instance_backlog *backlog = qat_req->alg_req.backlog;
+
+ qat_req->cb(qat_resp, qat_req);
++
++ qat_alg_send_backlog(backlog);
++}
++
++static int qat_alg_send_sym_message(struct qat_crypto_request *qat_req,
++ struct qat_crypto_instance *inst,
++ struct crypto_async_request *base)
++{
++ struct qat_alg_req *alg_req = &qat_req->alg_req;
++
++ alg_req->fw_req = (u32 *)&qat_req->req;
++ alg_req->tx_ring = inst->sym_tx;
++ alg_req->base = base;
++ alg_req->backlog = &inst->backlog;
++
++ return qat_alg_send_message(alg_req);
+ }
+
+ static int qat_alg_aead_dec(struct aead_request *areq)
+@@ -939,14 +973,15 @@ static int qat_alg_aead_dec(struct aead_request *areq)
+ struct icp_qat_fw_la_auth_req_params *auth_param;
+ struct icp_qat_fw_la_bulk_req *msg;
+ int digst_size = crypto_aead_authsize(aead_tfm);
+- int ret, ctr = 0;
++ gfp_t f = qat_algs_alloc_flags(&areq->base);
++ int ret;
+ u32 cipher_len;
+
+ cipher_len = areq->cryptlen - digst_size;
+ if (cipher_len % AES_BLOCK_SIZE != 0)
+ return -EINVAL;
+
+- ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req);
++ ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
+ if (unlikely(ret))
+ return ret;
+
+@@ -965,15 +1000,12 @@ static int qat_alg_aead_dec(struct aead_request *areq)
+ auth_param = (void *)((u8 *)cipher_param + sizeof(*cipher_param));
+ auth_param->auth_off = 0;
+ auth_param->auth_len = areq->assoclen + cipher_param->cipher_length;
+- do {
+- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
+- } while (ret == -EAGAIN && ctr++ < 10);
+
+- if (ret == -EAGAIN) {
++ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
++ if (ret == -ENOSPC)
+ qat_alg_free_bufl(ctx->inst, qat_req);
+- return -EBUSY;
+- }
+- return -EINPROGRESS;
++
++ return ret;
+ }
+
+ static int qat_alg_aead_enc(struct aead_request *areq)
+@@ -984,14 +1016,15 @@ static int qat_alg_aead_enc(struct aead_request *areq)
+ struct qat_crypto_request *qat_req = aead_request_ctx(areq);
+ struct icp_qat_fw_la_cipher_req_params *cipher_param;
+ struct icp_qat_fw_la_auth_req_params *auth_param;
++ gfp_t f = qat_algs_alloc_flags(&areq->base);
+ struct icp_qat_fw_la_bulk_req *msg;
+ u8 *iv = areq->iv;
+- int ret, ctr = 0;
++ int ret;
+
+ if (areq->cryptlen % AES_BLOCK_SIZE != 0)
+ return -EINVAL;
+
+- ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req);
++ ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
+ if (unlikely(ret))
+ return ret;
+
+@@ -1013,15 +1046,11 @@ static int qat_alg_aead_enc(struct aead_request *areq)
+ auth_param->auth_off = 0;
+ auth_param->auth_len = areq->assoclen + areq->cryptlen;
+
+- do {
+- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
+- } while (ret == -EAGAIN && ctr++ < 10);
+-
+- if (ret == -EAGAIN) {
++ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
++ if (ret == -ENOSPC)
+ qat_alg_free_bufl(ctx->inst, qat_req);
+- return -EBUSY;
+- }
+- return -EINPROGRESS;
++
++ return ret;
+ }
+
+ static int qat_alg_skcipher_rekey(struct qat_alg_skcipher_ctx *ctx,
+@@ -1042,7 +1071,7 @@ static int qat_alg_skcipher_newkey(struct qat_alg_skcipher_ctx *ctx,
+ {
+ struct qat_crypto_instance *inst = NULL;
+ struct device *dev;
+- int node = get_current_node();
++ int node = numa_node_id();
+ int ret;
+
+ inst = qat_crypto_get_instance_node(node);
+@@ -1173,13 +1202,14 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
+ struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
+ struct icp_qat_fw_la_cipher_req_params *cipher_param;
++ gfp_t f = qat_algs_alloc_flags(&req->base);
+ struct icp_qat_fw_la_bulk_req *msg;
+- int ret, ctr = 0;
++ int ret;
+
+ if (req->cryptlen == 0)
+ return 0;
+
+- ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
++ ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
+ if (unlikely(ret))
+ return ret;
+
+@@ -1198,15 +1228,11 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
+
+ qat_alg_set_req_iv(qat_req);
+
+- do {
+- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
+- } while (ret == -EAGAIN && ctr++ < 10);
+-
+- if (ret == -EAGAIN) {
++ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
++ if (ret == -ENOSPC)
+ qat_alg_free_bufl(ctx->inst, qat_req);
+- return -EBUSY;
+- }
+- return -EINPROGRESS;
++
++ return ret;
+ }
+
+ static int qat_alg_skcipher_blk_encrypt(struct skcipher_request *req)
+@@ -1242,13 +1268,14 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
+ struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
+ struct icp_qat_fw_la_cipher_req_params *cipher_param;
++ gfp_t f = qat_algs_alloc_flags(&req->base);
+ struct icp_qat_fw_la_bulk_req *msg;
+- int ret, ctr = 0;
++ int ret;
+
+ if (req->cryptlen == 0)
+ return 0;
+
+- ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
++ ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
+ if (unlikely(ret))
+ return ret;
+
+@@ -1268,15 +1295,11 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
+ qat_alg_set_req_iv(qat_req);
+ qat_alg_update_iv(qat_req);
+
+- do {
+- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
+- } while (ret == -EAGAIN && ctr++ < 10);
+-
+- if (ret == -EAGAIN) {
++ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
++ if (ret == -ENOSPC)
+ qat_alg_free_bufl(ctx->inst, qat_req);
+- return -EBUSY;
+- }
+- return -EINPROGRESS;
++
++ return ret;
+ }
+
+ static int qat_alg_skcipher_blk_decrypt(struct skcipher_request *req)
+diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.c b/drivers/crypto/qat/qat_common/qat_algs_send.c
+new file mode 100644
+index 0000000000000..ff5b4347f7831
+--- /dev/null
++++ b/drivers/crypto/qat/qat_common/qat_algs_send.c
+@@ -0,0 +1,86 @@
++// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
++/* Copyright(c) 2022 Intel Corporation */
++#include "adf_transport.h"
++#include "qat_algs_send.h"
++#include "qat_crypto.h"
++
++#define ADF_MAX_RETRIES 20
++
++static int qat_alg_send_message_retry(struct qat_alg_req *req)
++{
++ int ret = 0, ctr = 0;
++
++ do {
++ ret = adf_send_message(req->tx_ring, req->fw_req);
++ } while (ret == -EAGAIN && ctr++ < ADF_MAX_RETRIES);
++
++ if (ret == -EAGAIN)
++ return -ENOSPC;
++
++ return -EINPROGRESS;
++}
++
++void qat_alg_send_backlog(struct qat_instance_backlog *backlog)
++{
++ struct qat_alg_req *req, *tmp;
++
++ spin_lock_bh(&backlog->lock);
++ list_for_each_entry_safe(req, tmp, &backlog->list, list) {
++ if (adf_send_message(req->tx_ring, req->fw_req)) {
++ /* The HW ring is full. Do nothing.
++ * qat_alg_send_backlog() will be invoked again by
++ * another callback.
++ */
++ break;
++ }
++ list_del(&req->list);
++ req->base->complete(req->base, -EINPROGRESS);
++ }
++ spin_unlock_bh(&backlog->lock);
++}
++
++static void qat_alg_backlog_req(struct qat_alg_req *req,
++ struct qat_instance_backlog *backlog)
++{
++ INIT_LIST_HEAD(&req->list);
++
++ spin_lock_bh(&backlog->lock);
++ list_add_tail(&req->list, &backlog->list);
++ spin_unlock_bh(&backlog->lock);
++}
++
++static int qat_alg_send_message_maybacklog(struct qat_alg_req *req)
++{
++ struct qat_instance_backlog *backlog = req->backlog;
++ struct adf_etr_ring_data *tx_ring = req->tx_ring;
++ u32 *fw_req = req->fw_req;
++
++ /* If any request is already backlogged, then add to backlog list */
++ if (!list_empty(&backlog->list))
++ goto enqueue;
++
++ /* If ring is nearly full, then add to backlog list */
++ if (adf_ring_nearly_full(tx_ring))
++ goto enqueue;
++
++ /* If adding request to HW ring fails, then add to backlog list */
++ if (adf_send_message(tx_ring, fw_req))
++ goto enqueue;
++
++ return -EINPROGRESS;
++
++enqueue:
++ qat_alg_backlog_req(req, backlog);
++
++ return -EBUSY;
++}
++
++int qat_alg_send_message(struct qat_alg_req *req)
++{
++ u32 flags = req->base->flags;
++
++ if (flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
++ return qat_alg_send_message_maybacklog(req);
++ else
++ return qat_alg_send_message_retry(req);
++}
+diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h
+new file mode 100644
+index 0000000000000..5ce9f4f69d8ff
+--- /dev/null
++++ b/drivers/crypto/qat/qat_common/qat_algs_send.h
+@@ -0,0 +1,11 @@
++/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
++/* Copyright(c) 2022 Intel Corporation */
++#ifndef QAT_ALGS_SEND_H
++#define QAT_ALGS_SEND_H
++
++#include "qat_crypto.h"
++
++int qat_alg_send_message(struct qat_alg_req *req);
++void qat_alg_send_backlog(struct qat_instance_backlog *backlog);
++
++#endif
+diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
+index b0b78445418bb..4128200a90329 100644
+--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
++++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
+@@ -12,6 +12,7 @@
+ #include <crypto/scatterwalk.h>
+ #include "icp_qat_fw_pke.h"
+ #include "adf_accel_devices.h"
++#include "qat_algs_send.h"
+ #include "adf_transport.h"
+ #include "adf_common_drv.h"
+ #include "qat_crypto.h"
+@@ -135,8 +136,23 @@ struct qat_asym_request {
+ } areq;
+ int err;
+ void (*cb)(struct icp_qat_fw_pke_resp *resp);
++ struct qat_alg_req alg_req;
+ } __aligned(64);
+
++static int qat_alg_send_asym_message(struct qat_asym_request *qat_req,
++ struct qat_crypto_instance *inst,
++ struct crypto_async_request *base)
++{
++ struct qat_alg_req *alg_req = &qat_req->alg_req;
++
++ alg_req->fw_req = (u32 *)&qat_req->req;
++ alg_req->tx_ring = inst->pke_tx;
++ alg_req->base = base;
++ alg_req->backlog = &inst->backlog;
++
++ return qat_alg_send_message(alg_req);
++}
++
+ static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp)
+ {
+ struct qat_asym_request *req = (void *)(__force long)resp->opaque;
+@@ -148,24 +164,18 @@ static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp)
+ err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
+
+ if (areq->src) {
+- if (req->src_align)
+- dma_free_coherent(dev, req->ctx.dh->p_size,
+- req->src_align, req->in.dh.in.b);
+- else
+- dma_unmap_single(dev, req->in.dh.in.b,
+- req->ctx.dh->p_size, DMA_TO_DEVICE);
++ dma_unmap_single(dev, req->in.dh.in.b, req->ctx.dh->p_size,
++ DMA_TO_DEVICE);
++ kfree_sensitive(req->src_align);
+ }
+
+ areq->dst_len = req->ctx.dh->p_size;
++ dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size,
++ DMA_FROM_DEVICE);
+ if (req->dst_align) {
+ scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
+ areq->dst_len, 1);
+-
+- dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align,
+- req->out.dh.r);
+- } else {
+- dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size,
+- DMA_FROM_DEVICE);
++ kfree_sensitive(req->dst_align);
+ }
+
+ dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params),
+@@ -213,8 +223,10 @@ static int qat_dh_compute_value(struct kpp_request *req)
+ struct qat_asym_request *qat_req =
+ PTR_ALIGN(kpp_request_ctx(req), 64);
+ struct icp_qat_fw_pke_request *msg = &qat_req->req;
+- int ret, ctr = 0;
++ gfp_t flags = qat_algs_alloc_flags(&req->base);
+ int n_input_params = 0;
++ u8 *vaddr;
++ int ret;
+
+ if (unlikely(!ctx->xa))
+ return -EINVAL;
+@@ -223,6 +235,10 @@ static int qat_dh_compute_value(struct kpp_request *req)
+ req->dst_len = ctx->p_size;
+ return -EOVERFLOW;
+ }
++
++ if (req->src_len > ctx->p_size)
++ return -EINVAL;
++
+ memset(msg, '\0', sizeof(*msg));
+ ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
+ ICP_QAT_FW_COMN_REQ_FLAG_SET);
+@@ -271,27 +287,24 @@ static int qat_dh_compute_value(struct kpp_request *req)
+ */
+ if (sg_is_last(req->src) && req->src_len == ctx->p_size) {
+ qat_req->src_align = NULL;
+- qat_req->in.dh.in.b = dma_map_single(dev,
+- sg_virt(req->src),
+- req->src_len,
+- DMA_TO_DEVICE);
+- if (unlikely(dma_mapping_error(dev,
+- qat_req->in.dh.in.b)))
+- return ret;
+-
++ vaddr = sg_virt(req->src);
+ } else {
+ int shift = ctx->p_size - req->src_len;
+
+- qat_req->src_align = dma_alloc_coherent(dev,
+- ctx->p_size,
+- &qat_req->in.dh.in.b,
+- GFP_KERNEL);
++ qat_req->src_align = kzalloc(ctx->p_size, flags);
+ if (unlikely(!qat_req->src_align))
+ return ret;
+
+ scatterwalk_map_and_copy(qat_req->src_align + shift,
+ req->src, 0, req->src_len, 0);
++
++ vaddr = qat_req->src_align;
+ }
++
++ qat_req->in.dh.in.b = dma_map_single(dev, vaddr, ctx->p_size,
++ DMA_TO_DEVICE);
++ if (unlikely(dma_mapping_error(dev, qat_req->in.dh.in.b)))
++ goto unmap_src;
+ }
+ /*
+ * dst can be of any size in valid range, but HW expects it to be the
+@@ -302,32 +315,30 @@ static int qat_dh_compute_value(struct kpp_request *req)
+ */
+ if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) {
+ qat_req->dst_align = NULL;
+- qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst),
+- req->dst_len,
+- DMA_FROM_DEVICE);
+-
+- if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r)))
+- goto unmap_src;
+-
++ vaddr = sg_virt(req->dst);
+ } else {
+- qat_req->dst_align = dma_alloc_coherent(dev, ctx->p_size,
+- &qat_req->out.dh.r,
+- GFP_KERNEL);
++ qat_req->dst_align = kzalloc(ctx->p_size, flags);
+ if (unlikely(!qat_req->dst_align))
+ goto unmap_src;
++
++ vaddr = qat_req->dst_align;
+ }
++ qat_req->out.dh.r = dma_map_single(dev, vaddr, ctx->p_size,
++ DMA_FROM_DEVICE);
++ if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r)))
++ goto unmap_dst;
+
+ qat_req->in.dh.in_tab[n_input_params] = 0;
+ qat_req->out.dh.out_tab[1] = 0;
+ /* Mapping in.in.b or in.in_g2.xa is the same */
+- qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b,
+- sizeof(qat_req->in.dh.in.b),
++ qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh,
++ sizeof(struct qat_dh_input_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+- qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r,
+- sizeof(qat_req->out.dh.r),
++ qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh,
++ sizeof(struct qat_dh_output_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+@@ -338,13 +349,13 @@ static int qat_dh_compute_value(struct kpp_request *req)
+ msg->input_param_count = n_input_params;
+ msg->output_param_count = 1;
+
+- do {
+- ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
+- } while (ret == -EBUSY && ctr++ < 100);
++ ret = qat_alg_send_asym_message(qat_req, inst, &req->base);
++ if (ret == -ENOSPC)
++ goto unmap_all;
+
+- if (!ret)
+- return -EINPROGRESS;
++ return ret;
+
++unmap_all:
+ if (!dma_mapping_error(dev, qat_req->phy_out))
+ dma_unmap_single(dev, qat_req->phy_out,
+ sizeof(struct qat_dh_output_params),
+@@ -355,23 +366,17 @@ unmap_in_params:
+ sizeof(struct qat_dh_input_params),
+ DMA_TO_DEVICE);
+ unmap_dst:
+- if (qat_req->dst_align)
+- dma_free_coherent(dev, ctx->p_size, qat_req->dst_align,
+- qat_req->out.dh.r);
+- else
+- if (!dma_mapping_error(dev, qat_req->out.dh.r))
+- dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size,
+- DMA_FROM_DEVICE);
++ if (!dma_mapping_error(dev, qat_req->out.dh.r))
++ dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size,
++ DMA_FROM_DEVICE);
++ kfree_sensitive(qat_req->dst_align);
+ unmap_src:
+ if (req->src) {
+- if (qat_req->src_align)
+- dma_free_coherent(dev, ctx->p_size, qat_req->src_align,
+- qat_req->in.dh.in.b);
+- else
+- if (!dma_mapping_error(dev, qat_req->in.dh.in.b))
+- dma_unmap_single(dev, qat_req->in.dh.in.b,
+- ctx->p_size,
+- DMA_TO_DEVICE);
++ if (!dma_mapping_error(dev, qat_req->in.dh.in.b))
++ dma_unmap_single(dev, qat_req->in.dh.in.b,
++ ctx->p_size,
++ DMA_TO_DEVICE);
++ kfree_sensitive(qat_req->src_align);
+ }
+ return ret;
+ }
+@@ -420,14 +425,17 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params)
+ static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx)
+ {
+ if (ctx->g) {
++ memset(ctx->g, 0, ctx->p_size);
+ dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g);
+ ctx->g = NULL;
+ }
+ if (ctx->xa) {
++ memset(ctx->xa, 0, ctx->p_size);
+ dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa);
+ ctx->xa = NULL;
+ }
+ if (ctx->p) {
++ memset(ctx->p, 0, ctx->p_size);
+ dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p);
+ ctx->p = NULL;
+ }
+@@ -480,11 +488,13 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm)
+ {
+ struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+ struct qat_crypto_instance *inst =
+- qat_crypto_get_instance_node(get_current_node());
++ qat_crypto_get_instance_node(numa_node_id());
+
+ if (!inst)
+ return -EINVAL;
+
++ kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64);
++
+ ctx->p_size = 0;
+ ctx->g2 = false;
+ ctx->inst = inst;
+@@ -510,23 +520,19 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
+
+ err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
+
+- if (req->src_align)
+- dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align,
+- req->in.rsa.enc.m);
+- else
+- dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz,
+- DMA_TO_DEVICE);
++ dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz,
++ DMA_TO_DEVICE);
++
++ kfree_sensitive(req->src_align);
+
+ areq->dst_len = req->ctx.rsa->key_sz;
++ dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz,
++ DMA_FROM_DEVICE);
+ if (req->dst_align) {
+ scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
+ areq->dst_len, 1);
+
+- dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align,
+- req->out.rsa.enc.c);
+- } else {
+- dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz,
+- DMA_FROM_DEVICE);
++ kfree_sensitive(req->dst_align);
+ }
+
+ dma_unmap_single(dev, req->phy_in, sizeof(struct qat_rsa_input_params),
+@@ -542,8 +548,11 @@ void qat_alg_asym_callback(void *_resp)
+ {
+ struct icp_qat_fw_pke_resp *resp = _resp;
+ struct qat_asym_request *areq = (void *)(__force long)resp->opaque;
++ struct qat_instance_backlog *backlog = areq->alg_req.backlog;
+
+ areq->cb(resp);
++
++ qat_alg_send_backlog(backlog);
+ }
+
+ #define PKE_RSA_EP_512 0x1c161b21
+@@ -642,7 +651,9 @@ static int qat_rsa_enc(struct akcipher_request *req)
+ struct qat_asym_request *qat_req =
+ PTR_ALIGN(akcipher_request_ctx(req), 64);
+ struct icp_qat_fw_pke_request *msg = &qat_req->req;
+- int ret, ctr = 0;
++ gfp_t flags = qat_algs_alloc_flags(&req->base);
++ u8 *vaddr;
++ int ret;
+
+ if (unlikely(!ctx->n || !ctx->e))
+ return -EINVAL;
+@@ -651,6 +662,10 @@ static int qat_rsa_enc(struct akcipher_request *req)
+ req->dst_len = ctx->key_sz;
+ return -EOVERFLOW;
+ }
++
++ if (req->src_len > ctx->key_sz)
++ return -EINVAL;
++
+ memset(msg, '\0', sizeof(*msg));
+ ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
+ ICP_QAT_FW_COMN_REQ_FLAG_SET);
+@@ -679,50 +694,49 @@ static int qat_rsa_enc(struct akcipher_request *req)
+ */
+ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
+ qat_req->src_align = NULL;
+- qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src),
+- req->src_len, DMA_TO_DEVICE);
+- if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m)))
+- return ret;
+-
++ vaddr = sg_virt(req->src);
+ } else {
+ int shift = ctx->key_sz - req->src_len;
+
+- qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz,
+- &qat_req->in.rsa.enc.m,
+- GFP_KERNEL);
++ qat_req->src_align = kzalloc(ctx->key_sz, flags);
+ if (unlikely(!qat_req->src_align))
+ return ret;
+
+ scatterwalk_map_and_copy(qat_req->src_align + shift, req->src,
+ 0, req->src_len, 0);
++ vaddr = qat_req->src_align;
+ }
+- if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
+- qat_req->dst_align = NULL;
+- qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst),
+- req->dst_len,
+- DMA_FROM_DEVICE);
+
+- if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c)))
+- goto unmap_src;
++ qat_req->in.rsa.enc.m = dma_map_single(dev, vaddr, ctx->key_sz,
++ DMA_TO_DEVICE);
++ if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m)))
++ goto unmap_src;
+
++ if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
++ qat_req->dst_align = NULL;
++ vaddr = sg_virt(req->dst);
+ } else {
+- qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz,
+- &qat_req->out.rsa.enc.c,
+- GFP_KERNEL);
++ qat_req->dst_align = kzalloc(ctx->key_sz, flags);
+ if (unlikely(!qat_req->dst_align))
+ goto unmap_src;
+-
++ vaddr = qat_req->dst_align;
+ }
++
++ qat_req->out.rsa.enc.c = dma_map_single(dev, vaddr, ctx->key_sz,
++ DMA_FROM_DEVICE);
++ if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c)))
++ goto unmap_dst;
++
+ qat_req->in.rsa.in_tab[3] = 0;
+ qat_req->out.rsa.out_tab[1] = 0;
+- qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m,
+- sizeof(qat_req->in.rsa.enc.m),
++ qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa,
++ sizeof(struct qat_rsa_input_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+- qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c,
+- sizeof(qat_req->out.rsa.enc.c),
++ qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa,
++ sizeof(struct qat_rsa_output_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+@@ -732,13 +746,14 @@ static int qat_rsa_enc(struct akcipher_request *req)
+ msg->pke_mid.opaque = (u64)(__force long)qat_req;
+ msg->input_param_count = 3;
+ msg->output_param_count = 1;
+- do {
+- ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
+- } while (ret == -EBUSY && ctr++ < 100);
+
+- if (!ret)
+- return -EINPROGRESS;
++ ret = qat_alg_send_asym_message(qat_req, inst, &req->base);
++ if (ret == -ENOSPC)
++ goto unmap_all;
++
++ return ret;
+
++unmap_all:
+ if (!dma_mapping_error(dev, qat_req->phy_out))
+ dma_unmap_single(dev, qat_req->phy_out,
+ sizeof(struct qat_rsa_output_params),
+@@ -749,21 +764,15 @@ unmap_in_params:
+ sizeof(struct qat_rsa_input_params),
+ DMA_TO_DEVICE);
+ unmap_dst:
+- if (qat_req->dst_align)
+- dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
+- qat_req->out.rsa.enc.c);
+- else
+- if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c))
+- dma_unmap_single(dev, qat_req->out.rsa.enc.c,
+- ctx->key_sz, DMA_FROM_DEVICE);
++ if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c))
++ dma_unmap_single(dev, qat_req->out.rsa.enc.c,
++ ctx->key_sz, DMA_FROM_DEVICE);
++ kfree_sensitive(qat_req->dst_align);
+ unmap_src:
+- if (qat_req->src_align)
+- dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
+- qat_req->in.rsa.enc.m);
+- else
+- if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m))
+- dma_unmap_single(dev, qat_req->in.rsa.enc.m,
+- ctx->key_sz, DMA_TO_DEVICE);
++ if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m))
++ dma_unmap_single(dev, qat_req->in.rsa.enc.m, ctx->key_sz,
++ DMA_TO_DEVICE);
++ kfree_sensitive(qat_req->src_align);
+ return ret;
+ }
+
+@@ -776,7 +785,9 @@ static int qat_rsa_dec(struct akcipher_request *req)
+ struct qat_asym_request *qat_req =
+ PTR_ALIGN(akcipher_request_ctx(req), 64);
+ struct icp_qat_fw_pke_request *msg = &qat_req->req;
+- int ret, ctr = 0;
++ gfp_t flags = qat_algs_alloc_flags(&req->base);
++ u8 *vaddr;
++ int ret;
+
+ if (unlikely(!ctx->n || !ctx->d))
+ return -EINVAL;
+@@ -785,6 +796,10 @@ static int qat_rsa_dec(struct akcipher_request *req)
+ req->dst_len = ctx->key_sz;
+ return -EOVERFLOW;
+ }
++
++ if (req->src_len > ctx->key_sz)
++ return -EINVAL;
++
+ memset(msg, '\0', sizeof(*msg));
+ ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
+ ICP_QAT_FW_COMN_REQ_FLAG_SET);
+@@ -823,54 +838,51 @@ static int qat_rsa_dec(struct akcipher_request *req)
+ */
+ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
+ qat_req->src_align = NULL;
+- qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src),
+- req->dst_len, DMA_TO_DEVICE);
+- if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c)))
+- return ret;
+-
++ vaddr = sg_virt(req->src);
+ } else {
+ int shift = ctx->key_sz - req->src_len;
+
+- qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz,
+- &qat_req->in.rsa.dec.c,
+- GFP_KERNEL);
++ qat_req->src_align = kzalloc(ctx->key_sz, flags);
+ if (unlikely(!qat_req->src_align))
+ return ret;
+
+ scatterwalk_map_and_copy(qat_req->src_align + shift, req->src,
+ 0, req->src_len, 0);
++ vaddr = qat_req->src_align;
+ }
+- if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
+- qat_req->dst_align = NULL;
+- qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst),
+- req->dst_len,
+- DMA_FROM_DEVICE);
+
+- if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m)))
+- goto unmap_src;
++ qat_req->in.rsa.dec.c = dma_map_single(dev, vaddr, ctx->key_sz,
++ DMA_TO_DEVICE);
++ if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c)))
++ goto unmap_src;
+
++ if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
++ qat_req->dst_align = NULL;
++ vaddr = sg_virt(req->dst);
+ } else {
+- qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz,
+- &qat_req->out.rsa.dec.m,
+- GFP_KERNEL);
++ qat_req->dst_align = kzalloc(ctx->key_sz, flags);
+ if (unlikely(!qat_req->dst_align))
+ goto unmap_src;
+-
++ vaddr = qat_req->dst_align;
+ }
++ qat_req->out.rsa.dec.m = dma_map_single(dev, vaddr, ctx->key_sz,
++ DMA_FROM_DEVICE);
++ if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m)))
++ goto unmap_dst;
+
+ if (ctx->crt_mode)
+ qat_req->in.rsa.in_tab[6] = 0;
+ else
+ qat_req->in.rsa.in_tab[3] = 0;
+ qat_req->out.rsa.out_tab[1] = 0;
+- qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c,
+- sizeof(qat_req->in.rsa.dec.c),
++ qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa,
++ sizeof(struct qat_rsa_input_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_in)))
+ goto unmap_dst;
+
+- qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m,
+- sizeof(qat_req->out.rsa.dec.m),
++ qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa,
++ sizeof(struct qat_rsa_output_params),
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->phy_out)))
+ goto unmap_in_params;
+@@ -884,13 +896,14 @@ static int qat_rsa_dec(struct akcipher_request *req)
+ msg->input_param_count = 3;
+
+ msg->output_param_count = 1;
+- do {
+- ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
+- } while (ret == -EBUSY && ctr++ < 100);
+
+- if (!ret)
+- return -EINPROGRESS;
++ ret = qat_alg_send_asym_message(qat_req, inst, &req->base);
++ if (ret == -ENOSPC)
++ goto unmap_all;
+
++ return ret;
++
++unmap_all:
+ if (!dma_mapping_error(dev, qat_req->phy_out))
+ dma_unmap_single(dev, qat_req->phy_out,
+ sizeof(struct qat_rsa_output_params),
+@@ -901,21 +914,15 @@ unmap_in_params:
+ sizeof(struct qat_rsa_input_params),
+ DMA_TO_DEVICE);
+ unmap_dst:
+- if (qat_req->dst_align)
+- dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
+- qat_req->out.rsa.dec.m);
+- else
+- if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m))
+- dma_unmap_single(dev, qat_req->out.rsa.dec.m,
+- ctx->key_sz, DMA_FROM_DEVICE);
++ if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m))
++ dma_unmap_single(dev, qat_req->out.rsa.dec.m,
++ ctx->key_sz, DMA_FROM_DEVICE);
++ kfree_sensitive(qat_req->dst_align);
+ unmap_src:
+- if (qat_req->src_align)
+- dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
+- qat_req->in.rsa.dec.c);
+- else
+- if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c))
+- dma_unmap_single(dev, qat_req->in.rsa.dec.c,
+- ctx->key_sz, DMA_TO_DEVICE);
++ if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c))
++ dma_unmap_single(dev, qat_req->in.rsa.dec.c, ctx->key_sz,
++ DMA_TO_DEVICE);
++ kfree_sensitive(qat_req->src_align);
+ return ret;
+ }
+
+@@ -1218,11 +1225,13 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
+ {
+ struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+ struct qat_crypto_instance *inst =
+- qat_crypto_get_instance_node(get_current_node());
++ qat_crypto_get_instance_node(numa_node_id());
+
+ if (!inst)
+ return -EINVAL;
+
++ akcipher_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64);
++
+ ctx->key_sz = 0;
+ ctx->inst = inst;
+ return 0;
+@@ -1233,18 +1242,8 @@ static void qat_rsa_exit_tfm(struct crypto_akcipher *tfm)
+ struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+ struct device *dev = &GET_DEV(ctx->inst->accel_dev);
+
+- if (ctx->n)
+- dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n);
+- if (ctx->e)
+- dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e);
+- if (ctx->d) {
+- memset(ctx->d, '\0', ctx->key_sz);
+- dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d);
+- }
++ qat_rsa_clear_ctx(dev, ctx);
+ qat_crypto_put_instance(ctx->inst);
+- ctx->n = NULL;
+- ctx->e = NULL;
+- ctx->d = NULL;
+ }
+
+ static struct akcipher_alg rsa = {
+@@ -1255,7 +1254,6 @@ static struct akcipher_alg rsa = {
+ .max_size = qat_rsa_max_size,
+ .init = qat_rsa_init_tfm,
+ .exit = qat_rsa_exit_tfm,
+- .reqsize = sizeof(struct qat_asym_request) + 64,
+ .base = {
+ .cra_name = "rsa",
+ .cra_driver_name = "qat-rsa",
+@@ -1272,7 +1270,6 @@ static struct kpp_alg dh = {
+ .max_size = qat_dh_max_size,
+ .init = qat_dh_init_tfm,
+ .exit = qat_dh_exit_tfm,
+- .reqsize = sizeof(struct qat_asym_request) + 64,
+ .base = {
+ .cra_name = "dh",
+ .cra_driver_name = "qat-dh",
+diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
+index ece6776fbd53d..994e43fab0a4d 100644
+--- a/drivers/crypto/qat/qat_common/qat_crypto.c
++++ b/drivers/crypto/qat/qat_common/qat_crypto.c
+@@ -321,6 +321,9 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
+ &inst->pke_rx);
+ if (ret)
+ goto err;
++
++ INIT_LIST_HEAD(&inst->backlog.list);
++ spin_lock_init(&inst->backlog.lock);
+ }
+ return 0;
+ err:
+diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h
+index b6a4c95ae003f..df3c738ce323a 100644
+--- a/drivers/crypto/qat/qat_common/qat_crypto.h
++++ b/drivers/crypto/qat/qat_common/qat_crypto.h
+@@ -9,6 +9,19 @@
+ #include "adf_accel_devices.h"
+ #include "icp_qat_fw_la.h"
+
++struct qat_instance_backlog {
++ struct list_head list;
++ spinlock_t lock; /* protects backlog list */
++};
++
++struct qat_alg_req {
++ u32 *fw_req;
++ struct adf_etr_ring_data *tx_ring;
++ struct crypto_async_request *base;
++ struct list_head list;
++ struct qat_instance_backlog *backlog;
++};
++
+ struct qat_crypto_instance {
+ struct adf_etr_ring_data *sym_tx;
+ struct adf_etr_ring_data *sym_rx;
+@@ -19,8 +32,29 @@ struct qat_crypto_instance {
+ unsigned long state;
+ int id;
+ atomic_t refctr;
++ struct qat_instance_backlog backlog;
+ };
+
++#define QAT_MAX_BUFF_DESC 4
++
++struct qat_alg_buf {
++ u32 len;
++ u32 resrvd;
++ u64 addr;
++} __packed;
++
++struct qat_alg_buf_list {
++ u64 resrvd;
++ u32 num_bufs;
++ u32 num_mapped_bufs;
++ struct qat_alg_buf bufers[];
++} __packed;
++
++struct qat_alg_fixed_buf_list {
++ struct qat_alg_buf_list sgl_hdr;
++ struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC];
++} __packed __aligned(64);
++
+ struct qat_crypto_request_buffs {
+ struct qat_alg_buf_list *bl;
+ dma_addr_t blp;
+@@ -28,6 +62,10 @@ struct qat_crypto_request_buffs {
+ dma_addr_t bloutp;
+ size_t sz;
+ size_t sz_out;
++ bool sgl_src_valid;
++ bool sgl_dst_valid;
++ struct qat_alg_fixed_buf_list sgl_src;
++ struct qat_alg_fixed_buf_list sgl_dst;
+ };
+
+ struct qat_crypto_request;
+@@ -53,6 +91,7 @@ struct qat_crypto_request {
+ u8 iv[AES_BLOCK_SIZE];
+ };
+ bool encryption;
++ struct qat_alg_req alg_req;
+ };
+
+ static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev)
+@@ -70,4 +109,9 @@ static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev)
+ return true;
+ }
+
++static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req)
++{
++ return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
++}
++
+ #endif
+diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+index 0a9ce365a544e..c2c73ee279b29 100644
+--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
++++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+@@ -86,17 +86,26 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev)
+
+ capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC |
+ ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC |
+- ICP_ACCEL_CAPABILITIES_AUTHENTICATION;
++ ICP_ACCEL_CAPABILITIES_AUTHENTICATION |
++ ICP_ACCEL_CAPABILITIES_CIPHER |
++ ICP_ACCEL_CAPABILITIES_COMPRESSION;
+
+ /* Read accelerator capabilities mask */
+ pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses);
+
+- if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE)
++ /* A set bit in legfuses means the feature is OFF in this SKU */
++ if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) {
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC;
++ capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
++ }
+ if (legfuses & ICP_ACCEL_MASK_PKE_SLICE)
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC;
+- if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE)
++ if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) {
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION;
++ capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
++ }
++ if (legfuses & ICP_ACCEL_MASK_COMPRESS_SLICE)
++ capabilities &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION;
+
+ return capabilities;
+ }
+diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c
+index 290e2446a2f35..97a530171f07a 100644
+--- a/drivers/crypto/qce/aead.c
++++ b/drivers/crypto/qce/aead.c
+@@ -802,8 +802,8 @@ static int qce_aead_register_one(const struct qce_aead_def *def, struct qce_devi
+
+ ret = crypto_register_aead(alg);
+ if (ret) {
+- kfree(tmpl);
+ dev_err(qce->dev, "%s registration failed\n", alg->base.cra_name);
++ kfree(tmpl);
+ return ret;
+ }
+
+diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c
+index 8e6fcf2c21cc0..59159f5e64e52 100644
+--- a/drivers/crypto/qce/sha.c
++++ b/drivers/crypto/qce/sha.c
+@@ -498,8 +498,8 @@ static int qce_ahash_register_one(const struct qce_ahash_def *def,
+
+ ret = crypto_register_ahash(alg);
+ if (ret) {
+- kfree(tmpl);
+ dev_err(qce->dev, "%s registration failed\n", base->cra_name);
++ kfree(tmpl);
+ return ret;
+ }
+
+diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c
+index 8ff10928f581d..3d27cd5210ef5 100644
+--- a/drivers/crypto/qce/skcipher.c
++++ b/drivers/crypto/qce/skcipher.c
+@@ -484,8 +484,8 @@ static int qce_skcipher_register_one(const struct qce_skcipher_def *def,
+
+ ret = crypto_register_skcipher(alg);
+ if (ret) {
+- kfree(tmpl);
+ dev_err(qce->dev, "%s registration failed\n", alg->base.cra_name);
++ kfree(tmpl);
+ return ret;
+ }
+
+diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c
+index 99ba8d51d1020..031b5f701a0a3 100644
+--- a/drivers/crypto/qcom-rng.c
++++ b/drivers/crypto/qcom-rng.c
+@@ -8,6 +8,7 @@
+ #include <linux/clk.h>
+ #include <linux/crypto.h>
+ #include <linux/io.h>
++#include <linux/iopoll.h>
+ #include <linux/module.h>
+ #include <linux/of.h>
+ #include <linux/platform_device.h>
+@@ -43,16 +44,19 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
+ {
+ unsigned int currsize = 0;
+ u32 val;
++ int ret;
+
+ /* read random data from hardware */
+ do {
+- val = readl_relaxed(rng->base + PRNG_STATUS);
+- if (!(val & PRNG_STATUS_DATA_AVAIL))
+- break;
++ ret = readl_poll_timeout(rng->base + PRNG_STATUS, val,
++ val & PRNG_STATUS_DATA_AVAIL,
++ 200, 10000);
++ if (ret)
++ return ret;
+
+ val = readl_relaxed(rng->base + PRNG_DATA_OUT);
+ if (!val)
+- break;
++ return -EINVAL;
+
+ if ((max - currsize) >= WORD_SZ) {
+ memcpy(data, &val, WORD_SZ);
+@@ -65,7 +69,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
+ }
+ } while (currsize < max);
+
+- return currsize;
++ return 0;
+ }
+
+ static int qcom_rng_generate(struct crypto_rng *tfm,
+@@ -87,7 +91,7 @@ static int qcom_rng_generate(struct crypto_rng *tfm,
+ mutex_unlock(&rng->lock);
+ clk_disable_unprepare(rng->clk);
+
+- return 0;
++ return ret;
+ }
+
+ static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed,
+diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
+index 35d73061d1569..14a0aef18ab13 100644
+--- a/drivers/crypto/rockchip/rk3288_crypto.c
++++ b/drivers/crypto/rockchip/rk3288_crypto.c
+@@ -65,186 +65,24 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev)
+ clk_disable_unprepare(dev->sclk);
+ }
+
+-static int check_alignment(struct scatterlist *sg_src,
+- struct scatterlist *sg_dst,
+- int align_mask)
+-{
+- int in, out, align;
+-
+- in = IS_ALIGNED((uint32_t)sg_src->offset, 4) &&
+- IS_ALIGNED((uint32_t)sg_src->length, align_mask);
+- if (!sg_dst)
+- return in;
+- out = IS_ALIGNED((uint32_t)sg_dst->offset, 4) &&
+- IS_ALIGNED((uint32_t)sg_dst->length, align_mask);
+- align = in && out;
+-
+- return (align && (sg_src->length == sg_dst->length));
+-}
+-
+-static int rk_load_data(struct rk_crypto_info *dev,
+- struct scatterlist *sg_src,
+- struct scatterlist *sg_dst)
+-{
+- unsigned int count;
+-
+- dev->aligned = dev->aligned ?
+- check_alignment(sg_src, sg_dst, dev->align_size) :
+- dev->aligned;
+- if (dev->aligned) {
+- count = min(dev->left_bytes, sg_src->length);
+- dev->left_bytes -= count;
+-
+- if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) {
+- dev_err(dev->dev, "[%s:%d] dma_map_sg(src) error\n",
+- __func__, __LINE__);
+- return -EINVAL;
+- }
+- dev->addr_in = sg_dma_address(sg_src);
+-
+- if (sg_dst) {
+- if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) {
+- dev_err(dev->dev,
+- "[%s:%d] dma_map_sg(dst) error\n",
+- __func__, __LINE__);
+- dma_unmap_sg(dev->dev, sg_src, 1,
+- DMA_TO_DEVICE);
+- return -EINVAL;
+- }
+- dev->addr_out = sg_dma_address(sg_dst);
+- }
+- } else {
+- count = (dev->left_bytes > PAGE_SIZE) ?
+- PAGE_SIZE : dev->left_bytes;
+-
+- if (!sg_pcopy_to_buffer(dev->first, dev->src_nents,
+- dev->addr_vir, count,
+- dev->total - dev->left_bytes)) {
+- dev_err(dev->dev, "[%s:%d] pcopy err\n",
+- __func__, __LINE__);
+- return -EINVAL;
+- }
+- dev->left_bytes -= count;
+- sg_init_one(&dev->sg_tmp, dev->addr_vir, count);
+- if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, DMA_TO_DEVICE)) {
+- dev_err(dev->dev, "[%s:%d] dma_map_sg(sg_tmp) error\n",
+- __func__, __LINE__);
+- return -ENOMEM;
+- }
+- dev->addr_in = sg_dma_address(&dev->sg_tmp);
+-
+- if (sg_dst) {
+- if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1,
+- DMA_FROM_DEVICE)) {
+- dev_err(dev->dev,
+- "[%s:%d] dma_map_sg(sg_tmp) error\n",
+- __func__, __LINE__);
+- dma_unmap_sg(dev->dev, &dev->sg_tmp, 1,
+- DMA_TO_DEVICE);
+- return -ENOMEM;
+- }
+- dev->addr_out = sg_dma_address(&dev->sg_tmp);
+- }
+- }
+- dev->count = count;
+- return 0;
+-}
+-
+-static void rk_unload_data(struct rk_crypto_info *dev)
+-{
+- struct scatterlist *sg_in, *sg_out;
+-
+- sg_in = dev->aligned ? dev->sg_src : &dev->sg_tmp;
+- dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE);
+-
+- if (dev->sg_dst) {
+- sg_out = dev->aligned ? dev->sg_dst : &dev->sg_tmp;
+- dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE);
+- }
+-}
+-
+ static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
+ {
+ struct rk_crypto_info *dev = platform_get_drvdata(dev_id);
+ u32 interrupt_status;
+
+- spin_lock(&dev->lock);
+ interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS);
+ CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status);
+
++ dev->status = 1;
+ if (interrupt_status & 0x0a) {
+ dev_warn(dev->dev, "DMA Error\n");
+- dev->err = -EFAULT;
++ dev->status = 0;
+ }
+- tasklet_schedule(&dev->done_task);
++ complete(&dev->complete);
+
+- spin_unlock(&dev->lock);
+ return IRQ_HANDLED;
+ }
+
+-static int rk_crypto_enqueue(struct rk_crypto_info *dev,
+- struct crypto_async_request *async_req)
+-{
+- unsigned long flags;
+- int ret;
+-
+- spin_lock_irqsave(&dev->lock, flags);
+- ret = crypto_enqueue_request(&dev->queue, async_req);
+- if (dev->busy) {
+- spin_unlock_irqrestore(&dev->lock, flags);
+- return ret;
+- }
+- dev->busy = true;
+- spin_unlock_irqrestore(&dev->lock, flags);
+- tasklet_schedule(&dev->queue_task);
+-
+- return ret;
+-}
+-
+-static void rk_crypto_queue_task_cb(unsigned long data)
+-{
+- struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
+- struct crypto_async_request *async_req, *backlog;
+- unsigned long flags;
+- int err = 0;
+-
+- dev->err = 0;
+- spin_lock_irqsave(&dev->lock, flags);
+- backlog = crypto_get_backlog(&dev->queue);
+- async_req = crypto_dequeue_request(&dev->queue);
+-
+- if (!async_req) {
+- dev->busy = false;
+- spin_unlock_irqrestore(&dev->lock, flags);
+- return;
+- }
+- spin_unlock_irqrestore(&dev->lock, flags);
+-
+- if (backlog) {
+- backlog->complete(backlog, -EINPROGRESS);
+- backlog = NULL;
+- }
+-
+- dev->async_req = async_req;
+- err = dev->start(dev);
+- if (err)
+- dev->complete(dev->async_req, err);
+-}
+-
+-static void rk_crypto_done_task_cb(unsigned long data)
+-{
+- struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
+-
+- if (dev->err) {
+- dev->complete(dev->async_req, dev->err);
+- return;
+- }
+-
+- dev->err = dev->update(dev);
+- if (dev->err)
+- dev->complete(dev->async_req, dev->err);
+-}
+-
+ static struct rk_crypto_tmp *rk_cipher_algs[] = {
+ &rk_ecb_aes_alg,
+ &rk_cbc_aes_alg,
+@@ -337,8 +175,6 @@ static int rk_crypto_probe(struct platform_device *pdev)
+ if (err)
+ goto err_crypto;
+
+- spin_lock_init(&crypto_info->lock);
+-
+ crypto_info->reg = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(crypto_info->reg)) {
+ err = PTR_ERR(crypto_info->reg);
+@@ -389,18 +225,11 @@ static int rk_crypto_probe(struct platform_device *pdev)
+ crypto_info->dev = &pdev->dev;
+ platform_set_drvdata(pdev, crypto_info);
+
+- tasklet_init(&crypto_info->queue_task,
+- rk_crypto_queue_task_cb, (unsigned long)crypto_info);
+- tasklet_init(&crypto_info->done_task,
+- rk_crypto_done_task_cb, (unsigned long)crypto_info);
+- crypto_init_queue(&crypto_info->queue, 50);
++ crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true);
++ crypto_engine_start(crypto_info->engine);
++ init_completion(&crypto_info->complete);
+
+- crypto_info->enable_clk = rk_crypto_enable_clk;
+- crypto_info->disable_clk = rk_crypto_disable_clk;
+- crypto_info->load_data = rk_load_data;
+- crypto_info->unload_data = rk_unload_data;
+- crypto_info->enqueue = rk_crypto_enqueue;
+- crypto_info->busy = false;
++ rk_crypto_enable_clk(crypto_info);
+
+ err = rk_crypto_register(crypto_info);
+ if (err) {
+@@ -412,9 +241,9 @@ static int rk_crypto_probe(struct platform_device *pdev)
+ return 0;
+
+ err_register_alg:
+- tasklet_kill(&crypto_info->queue_task);
+- tasklet_kill(&crypto_info->done_task);
++ crypto_engine_exit(crypto_info->engine);
+ err_crypto:
++ dev_err(dev, "Crypto Accelerator not successfully registered\n");
+ return err;
+ }
+
+@@ -423,8 +252,8 @@ static int rk_crypto_remove(struct platform_device *pdev)
+ struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev);
+
+ rk_crypto_unregister();
+- tasklet_kill(&crypto_tmp->done_task);
+- tasklet_kill(&crypto_tmp->queue_task);
++ rk_crypto_disable_clk(crypto_tmp);
++ crypto_engine_exit(crypto_tmp->engine);
+ return 0;
+ }
+
+diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
+index 97278c2574ff9..045e811b4af84 100644
+--- a/drivers/crypto/rockchip/rk3288_crypto.h
++++ b/drivers/crypto/rockchip/rk3288_crypto.h
+@@ -5,9 +5,11 @@
+ #include <crypto/aes.h>
+ #include <crypto/internal/des.h>
+ #include <crypto/algapi.h>
++#include <linux/dma-mapping.h>
+ #include <linux/interrupt.h>
+ #include <linux/delay.h>
+ #include <linux/scatterlist.h>
++#include <crypto/engine.h>
+ #include <crypto/internal/hash.h>
+ #include <crypto/internal/skcipher.h>
+
+@@ -193,45 +195,15 @@ struct rk_crypto_info {
+ struct reset_control *rst;
+ void __iomem *reg;
+ int irq;
+- struct crypto_queue queue;
+- struct tasklet_struct queue_task;
+- struct tasklet_struct done_task;
+- struct crypto_async_request *async_req;
+- int err;
+- /* device lock */
+- spinlock_t lock;
+-
+- /* the public variable */
+- struct scatterlist *sg_src;
+- struct scatterlist *sg_dst;
+- struct scatterlist sg_tmp;
+- struct scatterlist *first;
+- unsigned int left_bytes;
+- void *addr_vir;
+- int aligned;
+- int align_size;
+- size_t src_nents;
+- size_t dst_nents;
+- unsigned int total;
+- unsigned int count;
+- dma_addr_t addr_in;
+- dma_addr_t addr_out;
+- bool busy;
+- int (*start)(struct rk_crypto_info *dev);
+- int (*update)(struct rk_crypto_info *dev);
+- void (*complete)(struct crypto_async_request *base, int err);
+- int (*enable_clk)(struct rk_crypto_info *dev);
+- void (*disable_clk)(struct rk_crypto_info *dev);
+- int (*load_data)(struct rk_crypto_info *dev,
+- struct scatterlist *sg_src,
+- struct scatterlist *sg_dst);
+- void (*unload_data)(struct rk_crypto_info *dev);
+- int (*enqueue)(struct rk_crypto_info *dev,
+- struct crypto_async_request *async_req);
++
++ struct crypto_engine *engine;
++ struct completion complete;
++ int status;
+ };
+
+ /* the private variable of hash */
+ struct rk_ahash_ctx {
++ struct crypto_engine_ctx enginectx;
+ struct rk_crypto_info *dev;
+ /* for fallback */
+ struct crypto_ahash *fallback_tfm;
+@@ -241,14 +213,23 @@ struct rk_ahash_ctx {
+ struct rk_ahash_rctx {
+ struct ahash_request fallback_req;
+ u32 mode;
++ int nrsg;
+ };
+
+ /* the private variable of cipher */
+ struct rk_cipher_ctx {
++ struct crypto_engine_ctx enginectx;
+ struct rk_crypto_info *dev;
+ unsigned int keylen;
+- u32 mode;
++ u8 key[AES_MAX_KEY_SIZE];
+ u8 iv[AES_BLOCK_SIZE];
++ struct crypto_skcipher *fallback_tfm;
++};
++
++struct rk_cipher_rctx {
++ u8 backup_iv[AES_BLOCK_SIZE];
++ u32 mode;
++ struct skcipher_request fallback_req; // keep at the end
+ };
+
+ enum alg_type {
+diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+index ed03058497bc2..edd40e16a3f0a 100644
+--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
++++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+@@ -9,6 +9,7 @@
+ * Some ideas are from marvell/cesa.c and s5p-sss.c driver.
+ */
+ #include <linux/device.h>
++#include <asm/unaligned.h>
+ #include "rk3288_crypto.h"
+
+ /*
+@@ -16,6 +17,40 @@
+ * so we put the fixed hash out when met zero message.
+ */
+
++static bool rk_ahash_need_fallback(struct ahash_request *req)
++{
++ struct scatterlist *sg;
++
++ sg = req->src;
++ while (sg) {
++ if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
++ return true;
++ }
++ if (sg->length % 4) {
++ return true;
++ }
++ sg = sg_next(sg);
++ }
++ return false;
++}
++
++static int rk_ahash_digest_fb(struct ahash_request *areq)
++{
++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++ struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm);
++
++ ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
++ rctx->fallback_req.base.flags = areq->base.flags &
++ CRYPTO_TFM_REQ_MAY_SLEEP;
++
++ rctx->fallback_req.nbytes = areq->nbytes;
++ rctx->fallback_req.src = areq->src;
++ rctx->fallback_req.result = areq->result;
++
++ return crypto_ahash_digest(&rctx->fallback_req);
++}
++
+ static int zero_message_process(struct ahash_request *req)
+ {
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+@@ -38,16 +73,12 @@ static int zero_message_process(struct ahash_request *req)
+ return 0;
+ }
+
+-static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err)
+-{
+- if (base->complete)
+- base->complete(base, err);
+-}
+-
+-static void rk_ahash_reg_init(struct rk_crypto_info *dev)
++static void rk_ahash_reg_init(struct ahash_request *req)
+ {
+- struct ahash_request *req = ahash_request_cast(dev->async_req);
+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
++ struct rk_crypto_info *dev = tctx->dev;
+ int reg_status;
+
+ reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) |
+@@ -74,7 +105,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev)
+ RK_CRYPTO_BYTESWAP_BRFIFO |
+ RK_CRYPTO_BYTESWAP_BTFIFO);
+
+- CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, dev->total);
++ CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes);
+ }
+
+ static int rk_ahash_init(struct ahash_request *req)
+@@ -167,48 +198,64 @@ static int rk_ahash_digest(struct ahash_request *req)
+ struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+ struct rk_crypto_info *dev = tctx->dev;
+
++ if (rk_ahash_need_fallback(req))
++ return rk_ahash_digest_fb(req);
++
+ if (!req->nbytes)
+ return zero_message_process(req);
+- else
+- return dev->enqueue(dev, &req->base);
++
++ return crypto_transfer_hash_request_to_engine(dev->engine, req);
+ }
+
+-static void crypto_ahash_dma_start(struct rk_crypto_info *dev)
++static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg)
+ {
+- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, dev->addr_in);
+- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, (dev->count + 3) / 4);
++ CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg));
++ CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4);
+ CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START |
+ (RK_CRYPTO_HASH_START << 16));
+ }
+
+-static int rk_ahash_set_data_start(struct rk_crypto_info *dev)
++static int rk_hash_prepare(struct crypto_engine *engine, void *breq)
++{
++ struct ahash_request *areq = container_of(breq, struct ahash_request, base);
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
++ int ret;
++
++ ret = dma_map_sg(tctx->dev->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
++ if (ret <= 0)
++ return -EINVAL;
++
++ rctx->nrsg = ret;
++
++ return 0;
++}
++
++static int rk_hash_unprepare(struct crypto_engine *engine, void *breq)
+ {
+- int err;
++ struct ahash_request *areq = container_of(breq, struct ahash_request, base);
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+
+- err = dev->load_data(dev, dev->sg_src, NULL);
+- if (!err)
+- crypto_ahash_dma_start(dev);
+- return err;
++ dma_unmap_sg(tctx->dev->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE);
++ return 0;
+ }
+
+-static int rk_ahash_start(struct rk_crypto_info *dev)
++static int rk_hash_run(struct crypto_engine *engine, void *breq)
+ {
+- struct ahash_request *req = ahash_request_cast(dev->async_req);
+- struct crypto_ahash *tfm;
+- struct rk_ahash_rctx *rctx;
+-
+- dev->total = req->nbytes;
+- dev->left_bytes = req->nbytes;
+- dev->aligned = 0;
+- dev->align_size = 4;
+- dev->sg_dst = NULL;
+- dev->sg_src = req->src;
+- dev->first = req->src;
+- dev->src_nents = sg_nents(req->src);
+- rctx = ahash_request_ctx(req);
++ struct ahash_request *areq = container_of(breq, struct ahash_request, base);
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
++ struct scatterlist *sg = areq->src;
++ int err = 0;
++ int i;
++ u32 v;
++
+ rctx->mode = 0;
+
+- tfm = crypto_ahash_reqtfm(req);
+ switch (crypto_ahash_digestsize(tfm)) {
+ case SHA1_DIGEST_SIZE:
+ rctx->mode = RK_CRYPTO_HASH_SHA1;
+@@ -220,32 +267,26 @@ static int rk_ahash_start(struct rk_crypto_info *dev)
+ rctx->mode = RK_CRYPTO_HASH_MD5;
+ break;
+ default:
+- return -EINVAL;
++ err = -EINVAL;
++ goto theend;
+ }
+
+- rk_ahash_reg_init(dev);
+- return rk_ahash_set_data_start(dev);
+-}
+-
+-static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
+-{
+- int err = 0;
+- struct ahash_request *req = ahash_request_cast(dev->async_req);
+- struct crypto_ahash *tfm;
+-
+- dev->unload_data(dev);
+- if (dev->left_bytes) {
+- if (dev->aligned) {
+- if (sg_is_last(dev->sg_src)) {
+- dev_warn(dev->dev, "[%s:%d], Lack of data\n",
+- __func__, __LINE__);
+- err = -ENOMEM;
+- goto out_rx;
+- }
+- dev->sg_src = sg_next(dev->sg_src);
++ rk_ahash_reg_init(areq);
++
++ while (sg) {
++ reinit_completion(&tctx->dev->complete);
++ tctx->dev->status = 0;
++ crypto_ahash_dma_start(tctx->dev, sg);
++ wait_for_completion_interruptible_timeout(&tctx->dev->complete,
++ msecs_to_jiffies(2000));
++ if (!tctx->dev->status) {
++ dev_err(tctx->dev->dev, "DMA timeout\n");
++ err = -EFAULT;
++ goto theend;
+ }
+- err = rk_ahash_set_data_start(dev);
+- } else {
++ sg = sg_next(sg);
++ }
++
+ /*
+ * it will take some time to process date after last dma
+ * transmission.
+@@ -256,18 +297,20 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
+ * efficiency, and make it response quickly when dma
+ * complete.
+ */
+- while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS))
+- udelay(10);
+-
+- tfm = crypto_ahash_reqtfm(req);
+- memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0,
+- crypto_ahash_digestsize(tfm));
+- dev->complete(dev->async_req, 0);
+- tasklet_schedule(&dev->queue_task);
++ while (!CRYPTO_READ(tctx->dev, RK_CRYPTO_HASH_STS))
++ udelay(10);
++
++ for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) {
++ v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4);
++ put_unaligned_le32(v, areq->result + i * 4);
+ }
+
+-out_rx:
+- return err;
++theend:
++ local_bh_disable();
++ crypto_finalize_hash_request(engine, breq, err);
++ local_bh_enable();
++
++ return 0;
+ }
+
+ static int rk_cra_hash_init(struct crypto_tfm *tfm)
+@@ -281,14 +324,6 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
+ algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
+
+ tctx->dev = algt->dev;
+- tctx->dev->addr_vir = (void *)__get_free_page(GFP_KERNEL);
+- if (!tctx->dev->addr_vir) {
+- dev_err(tctx->dev->dev, "failed to kmalloc for addr_vir\n");
+- return -ENOMEM;
+- }
+- tctx->dev->start = rk_ahash_start;
+- tctx->dev->update = rk_ahash_crypto_rx;
+- tctx->dev->complete = rk_ahash_crypto_complete;
+
+ /* for fallback */
+ tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0,
+@@ -297,19 +332,23 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
+ dev_err(tctx->dev->dev, "Could not load fallback driver.\n");
+ return PTR_ERR(tctx->fallback_tfm);
+ }
++
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct rk_ahash_rctx) +
+ crypto_ahash_reqsize(tctx->fallback_tfm));
+
+- return tctx->dev->enable_clk(tctx->dev);
++ tctx->enginectx.op.do_one_request = rk_hash_run;
++ tctx->enginectx.op.prepare_request = rk_hash_prepare;
++ tctx->enginectx.op.unprepare_request = rk_hash_unprepare;
++
++ return 0;
+ }
+
+ static void rk_cra_hash_exit(struct crypto_tfm *tfm)
+ {
+ struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
+
+- free_page((unsigned long)tctx->dev->addr_vir);
+- return tctx->dev->disable_clk(tctx->dev);
++ crypto_free_ahash(tctx->fallback_tfm);
+ }
+
+ struct rk_crypto_tmp rk_ahash_sha1 = {
+diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+index 1cece1a7d3f00..67a7e05d5ae31 100644
+--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
++++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+@@ -9,23 +9,77 @@
+ * Some ideas are from marvell-cesa.c and s5p-sss.c driver.
+ */
+ #include <linux/device.h>
++#include <crypto/scatterwalk.h>
+ #include "rk3288_crypto.h"
+
+ #define RK_CRYPTO_DEC BIT(0)
+
+-static void rk_crypto_complete(struct crypto_async_request *base, int err)
++static int rk_cipher_need_fallback(struct skcipher_request *req)
+ {
+- if (base->complete)
+- base->complete(base, err);
++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
++ unsigned int bs = crypto_skcipher_blocksize(tfm);
++ struct scatterlist *sgs, *sgd;
++ unsigned int stodo, dtodo, len;
++
++ if (!req->cryptlen)
++ return true;
++
++ len = req->cryptlen;
++ sgs = req->src;
++ sgd = req->dst;
++ while (sgs && sgd) {
++ if (!IS_ALIGNED(sgs->offset, sizeof(u32))) {
++ return true;
++ }
++ if (!IS_ALIGNED(sgd->offset, sizeof(u32))) {
++ return true;
++ }
++ stodo = min(len, sgs->length);
++ if (stodo % bs) {
++ return true;
++ }
++ dtodo = min(len, sgd->length);
++ if (dtodo % bs) {
++ return true;
++ }
++ if (stodo != dtodo) {
++ return true;
++ }
++ len -= stodo;
++ sgs = sg_next(sgs);
++ sgd = sg_next(sgd);
++ }
++ return false;
++}
++
++static int rk_cipher_fallback(struct skcipher_request *areq)
++{
++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
++ struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
++ int err;
++
++ skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
++ skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
++ areq->base.complete, areq->base.data);
++ skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
++ areq->cryptlen, areq->iv);
++ if (rctx->mode & RK_CRYPTO_DEC)
++ err = crypto_skcipher_decrypt(&rctx->fallback_req);
++ else
++ err = crypto_skcipher_encrypt(&rctx->fallback_req);
++ return err;
+ }
+
+ static int rk_handle_req(struct rk_crypto_info *dev,
+ struct skcipher_request *req)
+ {
+- if (!IS_ALIGNED(req->cryptlen, dev->align_size))
+- return -EINVAL;
+- else
+- return dev->enqueue(dev, &req->base);
++ struct crypto_engine *engine = dev->engine;
++
++ if (rk_cipher_need_fallback(req))
++ return rk_cipher_fallback(req);
++
++ return crypto_transfer_skcipher_request_to_engine(engine, req);
+ }
+
+ static int rk_aes_setkey(struct crypto_skcipher *cipher,
+@@ -38,8 +92,9 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher,
+ keylen != AES_KEYSIZE_256)
+ return -EINVAL;
+ ctx->keylen = keylen;
+- memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen);
+- return 0;
++ memcpy(ctx->key, key, keylen);
++
++ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
+ }
+
+ static int rk_des_setkey(struct crypto_skcipher *cipher,
+@@ -53,8 +108,9 @@ static int rk_des_setkey(struct crypto_skcipher *cipher,
+ return err;
+
+ ctx->keylen = keylen;
+- memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
+- return 0;
++ memcpy(ctx->key, key, keylen);
++
++ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
+ }
+
+ static int rk_tdes_setkey(struct crypto_skcipher *cipher,
+@@ -68,17 +124,19 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher,
+ return err;
+
+ ctx->keylen = keylen;
+- memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
+- return 0;
++ memcpy(ctx->key, key, keylen);
++
++ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
+ }
+
+ static int rk_aes_ecb_encrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_AES_ECB_MODE;
++ rctx->mode = RK_CRYPTO_AES_ECB_MODE;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -86,9 +144,10 @@ static int rk_aes_ecb_decrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
++ rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -96,9 +155,10 @@ static int rk_aes_cbc_encrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_AES_CBC_MODE;
++ rctx->mode = RK_CRYPTO_AES_CBC_MODE;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -106,9 +166,10 @@ static int rk_aes_cbc_decrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
++ rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -116,9 +177,10 @@ static int rk_des_ecb_encrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = 0;
++ rctx->mode = 0;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -126,9 +188,10 @@ static int rk_des_ecb_decrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_DEC;
++ rctx->mode = RK_CRYPTO_DEC;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -136,9 +199,10 @@ static int rk_des_cbc_encrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
++ rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -146,9 +210,10 @@ static int rk_des_cbc_decrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
++ rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -156,9 +221,10 @@ static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_TDES_SELECT;
++ rctx->mode = RK_CRYPTO_TDES_SELECT;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -166,9 +232,10 @@ static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
++ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -176,9 +243,10 @@ static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
++ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
+ return rk_handle_req(dev, req);
+ }
+
+@@ -186,43 +254,42 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req)
+ {
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *dev = ctx->dev;
+
+- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
++ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
+ RK_CRYPTO_DEC;
+ return rk_handle_req(dev, req);
+ }
+
+-static void rk_ablk_hw_init(struct rk_crypto_info *dev)
++static void rk_ablk_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req)
+ {
+- struct skcipher_request *req =
+- skcipher_request_cast(dev->async_req);
+ struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+ struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+- u32 ivsize, block, conf_reg = 0;
++ u32 block, conf_reg = 0;
+
+ block = crypto_tfm_alg_blocksize(tfm);
+- ivsize = crypto_skcipher_ivsize(cipher);
+
+ if (block == DES_BLOCK_SIZE) {
+- ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
++ rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
+ RK_CRYPTO_TDES_BYTESWAP_KEY |
+ RK_CRYPTO_TDES_BYTESWAP_IV;
+- CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode);
+- memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize);
++ CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode);
++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen);
+ conf_reg = RK_CRYPTO_DESSEL;
+ } else {
+- ctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
++ rctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
+ RK_CRYPTO_AES_KEY_CHANGE |
+ RK_CRYPTO_AES_BYTESWAP_KEY |
+ RK_CRYPTO_AES_BYTESWAP_IV;
+ if (ctx->keylen == AES_KEYSIZE_192)
+- ctx->mode |= RK_CRYPTO_AES_192BIT_key;
++ rctx->mode |= RK_CRYPTO_AES_192BIT_key;
+ else if (ctx->keylen == AES_KEYSIZE_256)
+- ctx->mode |= RK_CRYPTO_AES_256BIT_key;
+- CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode);
+- memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize);
++ rctx->mode |= RK_CRYPTO_AES_256BIT_key;
++ CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode);
++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen);
+ }
+ conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
+ RK_CRYPTO_BYTESWAP_BRFIFO;
+@@ -231,146 +298,138 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
+ RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA);
+ }
+
+-static void crypto_dma_start(struct rk_crypto_info *dev)
++static void crypto_dma_start(struct rk_crypto_info *dev,
++ struct scatterlist *sgs,
++ struct scatterlist *sgd, unsigned int todo)
+ {
+- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in);
+- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4);
+- CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out);
++ CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs));
++ CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo);
++ CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd));
+ CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START |
+ _SBF(RK_CRYPTO_BLOCK_START, 16));
+ }
+
+-static int rk_set_data_start(struct rk_crypto_info *dev)
++static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
+ {
+- int err;
+- struct skcipher_request *req =
+- skcipher_request_cast(dev->async_req);
+- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
++ struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base);
++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+- u32 ivsize = crypto_skcipher_ivsize(tfm);
+- u8 *src_last_blk = page_address(sg_page(dev->sg_src)) +
+- dev->sg_src->offset + dev->sg_src->length - ivsize;
+-
+- /* Store the iv that need to be updated in chain mode.
+- * And update the IV buffer to contain the next IV for decryption mode.
+- */
+- if (ctx->mode & RK_CRYPTO_DEC) {
+- memcpy(ctx->iv, src_last_blk, ivsize);
+- sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv,
+- ivsize, dev->total - ivsize);
+- }
+-
+- err = dev->load_data(dev, dev->sg_src, dev->sg_dst);
+- if (!err)
+- crypto_dma_start(dev);
+- return err;
+-}
+-
+-static int rk_ablk_start(struct rk_crypto_info *dev)
+-{
+- struct skcipher_request *req =
+- skcipher_request_cast(dev->async_req);
+- unsigned long flags;
++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
++ struct scatterlist *sgs, *sgd;
+ int err = 0;
++ int ivsize = crypto_skcipher_ivsize(tfm);
++ int offset;
++ u8 iv[AES_BLOCK_SIZE];
++ u8 biv[AES_BLOCK_SIZE];
++ u8 *ivtouse = areq->iv;
++ unsigned int len = areq->cryptlen;
++ unsigned int todo;
++
++ ivsize = crypto_skcipher_ivsize(tfm);
++ if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
++ if (rctx->mode & RK_CRYPTO_DEC) {
++ offset = areq->cryptlen - ivsize;
++ scatterwalk_map_and_copy(rctx->backup_iv, areq->src,
++ offset, ivsize, 0);
++ }
++ }
+
+- dev->left_bytes = req->cryptlen;
+- dev->total = req->cryptlen;
+- dev->sg_src = req->src;
+- dev->first = req->src;
+- dev->src_nents = sg_nents(req->src);
+- dev->sg_dst = req->dst;
+- dev->dst_nents = sg_nents(req->dst);
+- dev->aligned = 1;
+-
+- spin_lock_irqsave(&dev->lock, flags);
+- rk_ablk_hw_init(dev);
+- err = rk_set_data_start(dev);
+- spin_unlock_irqrestore(&dev->lock, flags);
+- return err;
+-}
+-
+-static void rk_iv_copyback(struct rk_crypto_info *dev)
+-{
+- struct skcipher_request *req =
+- skcipher_request_cast(dev->async_req);
+- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+- u32 ivsize = crypto_skcipher_ivsize(tfm);
++ sgs = areq->src;
++ sgd = areq->dst;
+
+- /* Update the IV buffer to contain the next IV for encryption mode. */
+- if (!(ctx->mode & RK_CRYPTO_DEC)) {
+- if (dev->aligned) {
+- memcpy(req->iv, sg_virt(dev->sg_dst) +
+- dev->sg_dst->length - ivsize, ivsize);
++ while (sgs && sgd && len) {
++ if (!sgs->length) {
++ sgs = sg_next(sgs);
++ sgd = sg_next(sgd);
++ continue;
++ }
++ if (rctx->mode & RK_CRYPTO_DEC) {
++ /* we backup last block of source to be used as IV at next step */
++ offset = sgs->length - ivsize;
++ scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0);
++ }
++ if (sgs == sgd) {
++ err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
++ if (err <= 0) {
++ err = -EINVAL;
++ goto theend_iv;
++ }
+ } else {
+- memcpy(req->iv, dev->addr_vir +
+- dev->count - ivsize, ivsize);
++ err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
++ if (err <= 0) {
++ err = -EINVAL;
++ goto theend_iv;
++ }
++ err = dma_map_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
++ if (err <= 0) {
++ err = -EINVAL;
++ goto theend_sgs;
++ }
++ }
++ err = 0;
++ rk_ablk_hw_init(ctx->dev, areq);
++ if (ivsize) {
++ if (ivsize == DES_BLOCK_SIZE)
++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize);
++ else
++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize);
++ }
++ reinit_completion(&ctx->dev->complete);
++ ctx->dev->status = 0;
++
++ todo = min(sg_dma_len(sgs), len);
++ len -= todo;
++ crypto_dma_start(ctx->dev, sgs, sgd, todo / 4);
++ wait_for_completion_interruptible_timeout(&ctx->dev->complete,
++ msecs_to_jiffies(2000));
++ if (!ctx->dev->status) {
++ dev_err(ctx->dev->dev, "DMA timeout\n");
++ err = -EFAULT;
++ goto theend;
+ }
++ if (sgs == sgd) {
++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
++ } else {
++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
++ dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
++ }
++ if (rctx->mode & RK_CRYPTO_DEC) {
++ memcpy(iv, biv, ivsize);
++ ivtouse = iv;
++ } else {
++ offset = sgd->length - ivsize;
++ scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0);
++ ivtouse = iv;
++ }
++ sgs = sg_next(sgs);
++ sgd = sg_next(sgd);
+ }
+-}
+-
+-static void rk_update_iv(struct rk_crypto_info *dev)
+-{
+- struct skcipher_request *req =
+- skcipher_request_cast(dev->async_req);
+- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+- u32 ivsize = crypto_skcipher_ivsize(tfm);
+- u8 *new_iv = NULL;
+
+- if (ctx->mode & RK_CRYPTO_DEC) {
+- new_iv = ctx->iv;
+- } else {
+- new_iv = page_address(sg_page(dev->sg_dst)) +
+- dev->sg_dst->offset + dev->sg_dst->length - ivsize;
++ if (areq->iv && ivsize > 0) {
++ offset = areq->cryptlen - ivsize;
++ if (rctx->mode & RK_CRYPTO_DEC) {
++ memcpy(areq->iv, rctx->backup_iv, ivsize);
++ memzero_explicit(rctx->backup_iv, ivsize);
++ } else {
++ scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
++ ivsize, 0);
++ }
+ }
+
+- if (ivsize == DES_BLOCK_SIZE)
+- memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize);
+- else if (ivsize == AES_BLOCK_SIZE)
+- memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize);
+-}
++theend:
++ local_bh_disable();
++ crypto_finalize_skcipher_request(engine, areq, err);
++ local_bh_enable();
++ return 0;
+
+-/* return:
+- * true some err was occurred
+- * fault no err, continue
+- */
+-static int rk_ablk_rx(struct rk_crypto_info *dev)
+-{
+- int err = 0;
+- struct skcipher_request *req =
+- skcipher_request_cast(dev->async_req);
+-
+- dev->unload_data(dev);
+- if (!dev->aligned) {
+- if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents,
+- dev->addr_vir, dev->count,
+- dev->total - dev->left_bytes -
+- dev->count)) {
+- err = -EINVAL;
+- goto out_rx;
+- }
+- }
+- if (dev->left_bytes) {
+- rk_update_iv(dev);
+- if (dev->aligned) {
+- if (sg_is_last(dev->sg_src)) {
+- dev_err(dev->dev, "[%s:%d] Lack of data\n",
+- __func__, __LINE__);
+- err = -ENOMEM;
+- goto out_rx;
+- }
+- dev->sg_src = sg_next(dev->sg_src);
+- dev->sg_dst = sg_next(dev->sg_dst);
+- }
+- err = rk_set_data_start(dev);
++theend_sgs:
++ if (sgs == sgd) {
++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
+ } else {
+- rk_iv_copyback(dev);
+- /* here show the calculation is over without any err */
+- dev->complete(dev->async_req, 0);
+- tasklet_schedule(&dev->queue_task);
++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
++ dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
+ }
+-out_rx:
++theend_iv:
+ return err;
+ }
+
+@@ -378,26 +437,34 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
+ {
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
++ const char *name = crypto_tfm_alg_name(&tfm->base);
+ struct rk_crypto_tmp *algt;
+
+ algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+
+ ctx->dev = algt->dev;
+- ctx->dev->align_size = crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)) + 1;
+- ctx->dev->start = rk_ablk_start;
+- ctx->dev->update = rk_ablk_rx;
+- ctx->dev->complete = rk_crypto_complete;
+- ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL);
+
+- return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM;
++ ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
++ if (IS_ERR(ctx->fallback_tfm)) {
++ dev_err(ctx->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
++ name, PTR_ERR(ctx->fallback_tfm));
++ return PTR_ERR(ctx->fallback_tfm);
++ }
++
++ tfm->reqsize = sizeof(struct rk_cipher_rctx) +
++ crypto_skcipher_reqsize(ctx->fallback_tfm);
++
++ ctx->enginectx.op.do_one_request = rk_cipher_run;
++
++ return 0;
+ }
+
+ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
+ {
+ struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+- free_page((unsigned long)ctx->dev->addr_vir);
+- ctx->dev->disable_clk(ctx->dev);
++ memzero_explicit(ctx->key, ctx->keylen);
++ crypto_free_skcipher(ctx->fallback_tfm);
+ }
+
+ struct rk_crypto_tmp rk_ecb_aes_alg = {
+@@ -406,7 +473,7 @@ struct rk_crypto_tmp rk_ecb_aes_alg = {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-rk",
+ .base.cra_priority = 300,
+- .base.cra_flags = CRYPTO_ALG_ASYNC,
++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
+ .base.cra_alignmask = 0x0f,
+@@ -428,7 +495,7 @@ struct rk_crypto_tmp rk_cbc_aes_alg = {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-rk",
+ .base.cra_priority = 300,
+- .base.cra_flags = CRYPTO_ALG_ASYNC,
++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
+ .base.cra_alignmask = 0x0f,
+@@ -451,7 +518,7 @@ struct rk_crypto_tmp rk_ecb_des_alg = {
+ .base.cra_name = "ecb(des)",
+ .base.cra_driver_name = "ecb-des-rk",
+ .base.cra_priority = 300,
+- .base.cra_flags = CRYPTO_ALG_ASYNC,
++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
+ .base.cra_alignmask = 0x07,
+@@ -473,7 +540,7 @@ struct rk_crypto_tmp rk_cbc_des_alg = {
+ .base.cra_name = "cbc(des)",
+ .base.cra_driver_name = "cbc-des-rk",
+ .base.cra_priority = 300,
+- .base.cra_flags = CRYPTO_ALG_ASYNC,
++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
+ .base.cra_alignmask = 0x07,
+@@ -496,7 +563,7 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "ecb-des3-ede-rk",
+ .base.cra_priority = 300,
+- .base.cra_flags = CRYPTO_ALG_ASYNC,
++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
+ .base.cra_alignmask = 0x07,
+@@ -506,7 +573,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
+ .exit = rk_ablk_exit_tfm,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+- .ivsize = DES_BLOCK_SIZE,
+ .setkey = rk_tdes_setkey,
+ .encrypt = rk_des3_ede_ecb_encrypt,
+ .decrypt = rk_des3_ede_ecb_decrypt,
+@@ -519,7 +585,7 @@ struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cbc-des3-ede-rk",
+ .base.cra_priority = 300,
+- .base.cra_flags = CRYPTO_ALG_ASYNC,
++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
+ .base.cra_alignmask = 0x07,
+diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
+index 55aa3a71169b0..7717e9e5977bb 100644
+--- a/drivers/crypto/s5p-sss.c
++++ b/drivers/crypto/s5p-sss.c
+@@ -2171,6 +2171,8 @@ static int s5p_aes_probe(struct platform_device *pdev)
+
+ variant = find_s5p_sss_version(pdev);
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -EINVAL;
+
+ /*
+ * Note: HASH and PRNG uses the same registers in secss, avoid
+diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
+index 457084b344c17..b07ae4ba165e7 100644
+--- a/drivers/crypto/sahara.c
++++ b/drivers/crypto/sahara.c
+@@ -26,10 +26,10 @@
+ #include <linux/kernel.h>
+ #include <linux/kthread.h>
+ #include <linux/module.h>
+-#include <linux/mutex.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+ #include <linux/platform_device.h>
++#include <linux/spinlock.h>
+
+ #define SHA_BUFFER_LEN PAGE_SIZE
+ #define SAHARA_MAX_SHA_BLOCK_SIZE SHA256_BLOCK_SIZE
+@@ -196,7 +196,7 @@ struct sahara_dev {
+ void __iomem *regs_base;
+ struct clk *clk_ipg;
+ struct clk *clk_ahb;
+- struct mutex queue_mutex;
++ spinlock_t queue_spinlock;
+ struct task_struct *kthread;
+ struct completion dma_completion;
+
+@@ -642,9 +642,9 @@ static int sahara_aes_crypt(struct skcipher_request *req, unsigned long mode)
+
+ rctx->mode = mode;
+
+- mutex_lock(&dev->queue_mutex);
++ spin_lock_bh(&dev->queue_spinlock);
+ err = crypto_enqueue_request(&dev->queue, &req->base);
+- mutex_unlock(&dev->queue_mutex);
++ spin_unlock_bh(&dev->queue_spinlock);
+
+ wake_up_process(dev->kthread);
+
+@@ -1043,10 +1043,10 @@ static int sahara_queue_manage(void *data)
+ do {
+ __set_current_state(TASK_INTERRUPTIBLE);
+
+- mutex_lock(&dev->queue_mutex);
++ spin_lock_bh(&dev->queue_spinlock);
+ backlog = crypto_get_backlog(&dev->queue);
+ async_req = crypto_dequeue_request(&dev->queue);
+- mutex_unlock(&dev->queue_mutex);
++ spin_unlock_bh(&dev->queue_spinlock);
+
+ if (backlog)
+ backlog->complete(backlog, -EINPROGRESS);
+@@ -1092,9 +1092,9 @@ static int sahara_sha_enqueue(struct ahash_request *req, int last)
+ rctx->first = 1;
+ }
+
+- mutex_lock(&dev->queue_mutex);
++ spin_lock_bh(&dev->queue_spinlock);
+ ret = crypto_enqueue_request(&dev->queue, &req->base);
+- mutex_unlock(&dev->queue_mutex);
++ spin_unlock_bh(&dev->queue_spinlock);
+
+ wake_up_process(dev->kthread);
+
+@@ -1449,7 +1449,7 @@ static int sahara_probe(struct platform_device *pdev)
+
+ crypto_init_queue(&dev->queue, SAHARA_QUEUE_LENGTH);
+
+- mutex_init(&dev->queue_mutex);
++ spin_lock_init(&dev->queue_spinlock);
+
+ dev_ptr = dev;
+
+diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
+index 75867c0b00172..90a920e7f6642 100644
+--- a/drivers/crypto/stm32/stm32-crc32.c
++++ b/drivers/crypto/stm32/stm32-crc32.c
+@@ -279,7 +279,7 @@ static struct shash_alg algs[] = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .base = {
+ .cra_name = "crc32",
+- .cra_driver_name = DRIVER_NAME,
++ .cra_driver_name = "stm32-crc32-crc32",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+@@ -301,7 +301,7 @@ static struct shash_alg algs[] = {
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .base = {
+ .cra_name = "crc32c",
+- .cra_driver_name = DRIVER_NAME,
++ .cra_driver_name = "stm32-crc32-crc32c",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+@@ -384,8 +384,10 @@ static int stm32_crc_remove(struct platform_device *pdev)
+ struct stm32_crc *crc = platform_get_drvdata(pdev);
+ int ret = pm_runtime_get_sync(crc->dev);
+
+- if (ret < 0)
++ if (ret < 0) {
++ pm_runtime_put_noidle(crc->dev);
+ return ret;
++ }
+
+ spin_lock(&crc_list.lock);
+ list_del(&crc->list);
+diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
+index 7389a0536ff02..81eb136b6c11d 100644
+--- a/drivers/crypto/stm32/stm32-cryp.c
++++ b/drivers/crypto/stm32/stm32-cryp.c
+@@ -37,7 +37,6 @@
+ /* Mode mask = bits [15..0] */
+ #define FLG_MODE_MASK GENMASK(15, 0)
+ /* Bit [31..16] status */
+-#define FLG_CCM_PADDED_WA BIT(16)
+
+ /* Registers */
+ #define CRYP_CR 0x00000000
+@@ -105,8 +104,6 @@
+ /* Misc */
+ #define AES_BLOCK_32 (AES_BLOCK_SIZE / sizeof(u32))
+ #define GCM_CTR_INIT 2
+-#define _walked_in (cryp->in_walk.offset - cryp->in_sg->offset)
+-#define _walked_out (cryp->out_walk.offset - cryp->out_sg->offset)
+ #define CRYP_AUTOSUSPEND_DELAY 50
+
+ struct stm32_cryp_caps {
+@@ -144,26 +141,16 @@ struct stm32_cryp {
+ size_t authsize;
+ size_t hw_blocksize;
+
+- size_t total_in;
+- size_t total_in_save;
+- size_t total_out;
+- size_t total_out_save;
++ size_t payload_in;
++ size_t header_in;
++ size_t payload_out;
+
+- struct scatterlist *in_sg;
+ struct scatterlist *out_sg;
+- struct scatterlist *out_sg_save;
+-
+- struct scatterlist in_sgl;
+- struct scatterlist out_sgl;
+- bool sgs_copied;
+-
+- int in_sg_len;
+- int out_sg_len;
+
+ struct scatter_walk in_walk;
+ struct scatter_walk out_walk;
+
+- u32 last_ctr[4];
++ __be32 last_ctr[4];
+ u32 gcm_ctr;
+ };
+
+@@ -262,6 +249,7 @@ static inline int stm32_cryp_wait_output(struct stm32_cryp *cryp)
+ }
+
+ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp);
++static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err);
+
+ static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx)
+ {
+@@ -283,103 +271,6 @@ static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx)
+ return cryp;
+ }
+
+-static int stm32_cryp_check_aligned(struct scatterlist *sg, size_t total,
+- size_t align)
+-{
+- int len = 0;
+-
+- if (!total)
+- return 0;
+-
+- if (!IS_ALIGNED(total, align))
+- return -EINVAL;
+-
+- while (sg) {
+- if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+- return -EINVAL;
+-
+- if (!IS_ALIGNED(sg->length, align))
+- return -EINVAL;
+-
+- len += sg->length;
+- sg = sg_next(sg);
+- }
+-
+- if (len != total)
+- return -EINVAL;
+-
+- return 0;
+-}
+-
+-static int stm32_cryp_check_io_aligned(struct stm32_cryp *cryp)
+-{
+- int ret;
+-
+- ret = stm32_cryp_check_aligned(cryp->in_sg, cryp->total_in,
+- cryp->hw_blocksize);
+- if (ret)
+- return ret;
+-
+- ret = stm32_cryp_check_aligned(cryp->out_sg, cryp->total_out,
+- cryp->hw_blocksize);
+-
+- return ret;
+-}
+-
+-static void sg_copy_buf(void *buf, struct scatterlist *sg,
+- unsigned int start, unsigned int nbytes, int out)
+-{
+- struct scatter_walk walk;
+-
+- if (!nbytes)
+- return;
+-
+- scatterwalk_start(&walk, sg);
+- scatterwalk_advance(&walk, start);
+- scatterwalk_copychunks(buf, &walk, nbytes, out);
+- scatterwalk_done(&walk, out, 0);
+-}
+-
+-static int stm32_cryp_copy_sgs(struct stm32_cryp *cryp)
+-{
+- void *buf_in, *buf_out;
+- int pages, total_in, total_out;
+-
+- if (!stm32_cryp_check_io_aligned(cryp)) {
+- cryp->sgs_copied = 0;
+- return 0;
+- }
+-
+- total_in = ALIGN(cryp->total_in, cryp->hw_blocksize);
+- pages = total_in ? get_order(total_in) : 1;
+- buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
+-
+- total_out = ALIGN(cryp->total_out, cryp->hw_blocksize);
+- pages = total_out ? get_order(total_out) : 1;
+- buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
+-
+- if (!buf_in || !buf_out) {
+- dev_err(cryp->dev, "Can't allocate pages when unaligned\n");
+- cryp->sgs_copied = 0;
+- return -EFAULT;
+- }
+-
+- sg_copy_buf(buf_in, cryp->in_sg, 0, cryp->total_in, 0);
+-
+- sg_init_one(&cryp->in_sgl, buf_in, total_in);
+- cryp->in_sg = &cryp->in_sgl;
+- cryp->in_sg_len = 1;
+-
+- sg_init_one(&cryp->out_sgl, buf_out, total_out);
+- cryp->out_sg_save = cryp->out_sg;
+- cryp->out_sg = &cryp->out_sgl;
+- cryp->out_sg_len = 1;
+-
+- cryp->sgs_copied = 1;
+-
+- return 0;
+-}
+-
+ static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, __be32 *iv)
+ {
+ if (!iv)
+@@ -481,16 +372,99 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg)
+
+ /* Wait for end of processing */
+ ret = stm32_cryp_wait_enable(cryp);
+- if (ret)
++ if (ret) {
+ dev_err(cryp->dev, "Timeout (gcm init)\n");
++ return ret;
++ }
+
+- return ret;
++ /* Prepare next phase */
++ if (cryp->areq->assoclen) {
++ cfg |= CR_PH_HEADER;
++ stm32_cryp_write(cryp, CRYP_CR, cfg);
++ } else if (stm32_cryp_get_input_text_len(cryp)) {
++ cfg |= CR_PH_PAYLOAD;
++ stm32_cryp_write(cryp, CRYP_CR, cfg);
++ }
++
++ return 0;
++}
++
++static void stm32_crypt_gcmccm_end_header(struct stm32_cryp *cryp)
++{
++ u32 cfg;
++ int err;
++
++ /* Check if whole header written */
++ if (!cryp->header_in) {
++ /* Wait for completion */
++ err = stm32_cryp_wait_busy(cryp);
++ if (err) {
++ dev_err(cryp->dev, "Timeout (gcm/ccm header)\n");
++ stm32_cryp_write(cryp, CRYP_IMSCR, 0);
++ stm32_cryp_finish_req(cryp, err);
++ return;
++ }
++
++ if (stm32_cryp_get_input_text_len(cryp)) {
++ /* Phase 3 : payload */
++ cfg = stm32_cryp_read(cryp, CRYP_CR);
++ cfg &= ~CR_CRYPEN;
++ stm32_cryp_write(cryp, CRYP_CR, cfg);
++
++ cfg &= ~CR_PH_MASK;
++ cfg |= CR_PH_PAYLOAD | CR_CRYPEN;
++ stm32_cryp_write(cryp, CRYP_CR, cfg);
++ } else {
++ /*
++ * Phase 4 : tag.
++ * Nothing to read, nothing to write, caller have to
++ * end request
++ */
++ }
++ }
++}
++
++static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp)
++{
++ unsigned int i;
++ size_t written;
++ size_t len;
++ u32 alen = cryp->areq->assoclen;
++ u32 block[AES_BLOCK_32] = {0};
++ u8 *b8 = (u8 *)block;
++
++ if (alen <= 65280) {
++ /* Write first u32 of B1 */
++ b8[0] = (alen >> 8) & 0xFF;
++ b8[1] = alen & 0xFF;
++ len = 2;
++ } else {
++ /* Build the two first u32 of B1 */
++ b8[0] = 0xFF;
++ b8[1] = 0xFE;
++ b8[2] = (alen & 0xFF000000) >> 24;
++ b8[3] = (alen & 0x00FF0000) >> 16;
++ b8[4] = (alen & 0x0000FF00) >> 8;
++ b8[5] = alen & 0x000000FF;
++ len = 6;
++ }
++
++ written = min_t(size_t, AES_BLOCK_SIZE - len, alen);
++
++ scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0);
++ for (i = 0; i < AES_BLOCK_32; i++)
++ stm32_cryp_write(cryp, CRYP_DIN, block[i]);
++
++ cryp->header_in -= written;
++
++ stm32_crypt_gcmccm_end_header(cryp);
+ }
+
+ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg)
+ {
+ int ret;
+- u8 iv[AES_BLOCK_SIZE], b0[AES_BLOCK_SIZE];
++ u32 iv_32[AES_BLOCK_32], b0_32[AES_BLOCK_32];
++ u8 *iv = (u8 *)iv_32, *b0 = (u8 *)b0_32;
+ __be32 *bd;
+ u32 *d;
+ unsigned int i, textlen;
+@@ -531,10 +505,24 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg)
+
+ /* Wait for end of processing */
+ ret = stm32_cryp_wait_enable(cryp);
+- if (ret)
++ if (ret) {
+ dev_err(cryp->dev, "Timeout (ccm init)\n");
++ return ret;
++ }
+
+- return ret;
++ /* Prepare next phase */
++ if (cryp->areq->assoclen) {
++ cfg |= CR_PH_HEADER | CR_CRYPEN;
++ stm32_cryp_write(cryp, CRYP_CR, cfg);
++
++ /* Write first (special) block (may move to next phase [payload]) */
++ stm32_cryp_write_ccm_first_header(cryp);
++ } else if (stm32_cryp_get_input_text_len(cryp)) {
++ cfg |= CR_PH_PAYLOAD;
++ stm32_cryp_write(cryp, CRYP_CR, cfg);
++ }
++
++ return 0;
+ }
+
+ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
+@@ -542,7 +530,7 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
+ int ret;
+ u32 cfg, hw_mode;
+
+- pm_runtime_resume_and_get(cryp->dev);
++ pm_runtime_get_sync(cryp->dev);
+
+ /* Disable interrupt */
+ stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+@@ -605,16 +593,6 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
+ if (ret)
+ return ret;
+
+- /* Phase 2 : header (authenticated data) */
+- if (cryp->areq->assoclen) {
+- cfg |= CR_PH_HEADER;
+- } else if (stm32_cryp_get_input_text_len(cryp)) {
+- cfg |= CR_PH_PAYLOAD;
+- stm32_cryp_write(cryp, CRYP_CR, cfg);
+- } else {
+- cfg |= CR_PH_INIT;
+- }
+-
+ break;
+
+ case CR_DES_CBC:
+@@ -633,8 +611,6 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
+
+ stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+- cryp->flags &= ~FLG_CCM_PADDED_WA;
+-
+ return 0;
+ }
+
+@@ -644,28 +620,9 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err)
+ /* Phase 4 : output tag */
+ err = stm32_cryp_read_auth_tag(cryp);
+
+- if (!err && (!(is_gcm(cryp) || is_ccm(cryp))))
++ if (!err && (!(is_gcm(cryp) || is_ccm(cryp) || is_ecb(cryp))))
+ stm32_cryp_get_iv(cryp);
+
+- if (cryp->sgs_copied) {
+- void *buf_in, *buf_out;
+- int pages, len;
+-
+- buf_in = sg_virt(&cryp->in_sgl);
+- buf_out = sg_virt(&cryp->out_sgl);
+-
+- sg_copy_buf(buf_out, cryp->out_sg_save, 0,
+- cryp->total_out_save, 1);
+-
+- len = ALIGN(cryp->total_in_save, cryp->hw_blocksize);
+- pages = len ? get_order(len) : 1;
+- free_pages((unsigned long)buf_in, pages);
+-
+- len = ALIGN(cryp->total_out_save, cryp->hw_blocksize);
+- pages = len ? get_order(len) : 1;
+- free_pages((unsigned long)buf_out, pages);
+- }
+-
+ pm_runtime_mark_last_busy(cryp->dev);
+ pm_runtime_put_autosuspend(cryp->dev);
+
+@@ -674,8 +631,6 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err)
+ else
+ crypto_finalize_skcipher_request(cryp->engine, cryp->req,
+ err);
+-
+- memset(cryp->ctx->key, 0, cryp->ctx->keylen);
+ }
+
+ static int stm32_cryp_cpu_start(struct stm32_cryp *cryp)
+@@ -801,7 +756,20 @@ static int stm32_cryp_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+ static int stm32_cryp_aes_gcm_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+ {
+- return authsize == AES_BLOCK_SIZE ? 0 : -EINVAL;
++ switch (authsize) {
++ case 4:
++ case 8:
++ case 12:
++ case 13:
++ case 14:
++ case 15:
++ case 16:
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ return 0;
+ }
+
+ static int stm32_cryp_aes_ccm_setauthsize(struct crypto_aead *tfm,
+@@ -825,31 +793,61 @@ static int stm32_cryp_aes_ccm_setauthsize(struct crypto_aead *tfm,
+
+ static int stm32_cryp_aes_ecb_encrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % AES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_AES | FLG_ECB | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_aes_ecb_decrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % AES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_AES | FLG_ECB);
+ }
+
+ static int stm32_cryp_aes_cbc_encrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % AES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_AES | FLG_CBC | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_aes_cbc_decrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % AES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_AES | FLG_CBC);
+ }
+
+ static int stm32_cryp_aes_ctr_encrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_AES | FLG_CTR | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_aes_ctr_decrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_AES | FLG_CTR);
+ }
+
+@@ -863,53 +861,122 @@ static int stm32_cryp_aes_gcm_decrypt(struct aead_request *req)
+ return stm32_cryp_aead_crypt(req, FLG_AES | FLG_GCM);
+ }
+
++static inline int crypto_ccm_check_iv(const u8 *iv)
++{
++ /* 2 <= L <= 8, so 1 <= L' <= 7. */
++ if (iv[0] < 1 || iv[0] > 7)
++ return -EINVAL;
++
++ return 0;
++}
++
+ static int stm32_cryp_aes_ccm_encrypt(struct aead_request *req)
+ {
++ int err;
++
++ err = crypto_ccm_check_iv(req->iv);
++ if (err)
++ return err;
++
+ return stm32_cryp_aead_crypt(req, FLG_AES | FLG_CCM | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_aes_ccm_decrypt(struct aead_request *req)
+ {
++ int err;
++
++ err = crypto_ccm_check_iv(req->iv);
++ if (err)
++ return err;
++
+ return stm32_cryp_aead_crypt(req, FLG_AES | FLG_CCM);
+ }
+
+ static int stm32_cryp_des_ecb_encrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_DES | FLG_ECB | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_des_ecb_decrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_DES | FLG_ECB);
+ }
+
+ static int stm32_cryp_des_cbc_encrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_DES | FLG_CBC | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_des_cbc_decrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_DES | FLG_CBC);
+ }
+
+ static int stm32_cryp_tdes_ecb_encrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_tdes_ecb_decrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB);
+ }
+
+ static int stm32_cryp_tdes_cbc_encrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC | FLG_ENCRYPT);
+ }
+
+ static int stm32_cryp_tdes_cbc_decrypt(struct skcipher_request *req)
+ {
++ if (req->cryptlen % DES_BLOCK_SIZE)
++ return -EINVAL;
++
++ if (req->cryptlen == 0)
++ return 0;
++
+ return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC);
+ }
+
+@@ -919,6 +986,7 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req,
+ struct stm32_cryp_ctx *ctx;
+ struct stm32_cryp *cryp;
+ struct stm32_cryp_reqctx *rctx;
++ struct scatterlist *in_sg;
+ int ret;
+
+ if (!req && !areq)
+@@ -944,76 +1012,55 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req,
+ if (req) {
+ cryp->req = req;
+ cryp->areq = NULL;
+- cryp->total_in = req->cryptlen;
+- cryp->total_out = cryp->total_in;
++ cryp->header_in = 0;
++ cryp->payload_in = req->cryptlen;
++ cryp->payload_out = req->cryptlen;
++ cryp->authsize = 0;
+ } else {
+ /*
+ * Length of input and output data:
+ * Encryption case:
+- * INPUT = AssocData || PlainText
++ * INPUT = AssocData || PlainText
+ * <- assoclen -> <- cryptlen ->
+- * <------- total_in ----------->
+ *
+- * OUTPUT = AssocData || CipherText || AuthTag
+- * <- assoclen -> <- cryptlen -> <- authsize ->
+- * <---------------- total_out ----------------->
++ * OUTPUT = AssocData || CipherText || AuthTag
++ * <- assoclen -> <-- cryptlen --> <- authsize ->
+ *
+ * Decryption case:
+- * INPUT = AssocData || CipherText || AuthTag
+- * <- assoclen -> <--------- cryptlen --------->
+- * <- authsize ->
+- * <---------------- total_in ------------------>
++ * INPUT = AssocData || CipherTex || AuthTag
++ * <- assoclen ---> <---------- cryptlen ---------->
+ *
+- * OUTPUT = AssocData || PlainText
+- * <- assoclen -> <- crypten - authsize ->
+- * <---------- total_out ----------------->
++ * OUTPUT = AssocData || PlainText
++ * <- assoclen -> <- cryptlen - authsize ->
+ */
+ cryp->areq = areq;
+ cryp->req = NULL;
+ cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq));
+- cryp->total_in = areq->assoclen + areq->cryptlen;
+- if (is_encrypt(cryp))
+- /* Append auth tag to output */
+- cryp->total_out = cryp->total_in + cryp->authsize;
+- else
+- /* No auth tag in output */
+- cryp->total_out = cryp->total_in - cryp->authsize;
++ if (is_encrypt(cryp)) {
++ cryp->payload_in = areq->cryptlen;
++ cryp->header_in = areq->assoclen;
++ cryp->payload_out = areq->cryptlen;
++ } else {
++ cryp->payload_in = areq->cryptlen - cryp->authsize;
++ cryp->header_in = areq->assoclen;
++ cryp->payload_out = cryp->payload_in;
++ }
+ }
+
+- cryp->total_in_save = cryp->total_in;
+- cryp->total_out_save = cryp->total_out;
++ in_sg = req ? req->src : areq->src;
++ scatterwalk_start(&cryp->in_walk, in_sg);
+
+- cryp->in_sg = req ? req->src : areq->src;
+ cryp->out_sg = req ? req->dst : areq->dst;
+- cryp->out_sg_save = cryp->out_sg;
+-
+- cryp->in_sg_len = sg_nents_for_len(cryp->in_sg, cryp->total_in);
+- if (cryp->in_sg_len < 0) {
+- dev_err(cryp->dev, "Cannot get in_sg_len\n");
+- ret = cryp->in_sg_len;
+- return ret;
+- }
+-
+- cryp->out_sg_len = sg_nents_for_len(cryp->out_sg, cryp->total_out);
+- if (cryp->out_sg_len < 0) {
+- dev_err(cryp->dev, "Cannot get out_sg_len\n");
+- ret = cryp->out_sg_len;
+- return ret;
+- }
+-
+- ret = stm32_cryp_copy_sgs(cryp);
+- if (ret)
+- return ret;
+-
+- scatterwalk_start(&cryp->in_walk, cryp->in_sg);
+ scatterwalk_start(&cryp->out_walk, cryp->out_sg);
+
+ if (is_gcm(cryp) || is_ccm(cryp)) {
+ /* In output, jump after assoc data */
+- scatterwalk_advance(&cryp->out_walk, cryp->areq->assoclen);
+- cryp->total_out -= cryp->areq->assoclen;
++ scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->areq->assoclen, 2);
+ }
+
++ if (is_ctr(cryp))
++ memset(cryp->last_ctr, 0, sizeof(cryp->last_ctr));
++
+ ret = stm32_cryp_hw_init(cryp);
+ return ret;
+ }
+@@ -1061,8 +1108,7 @@ static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq)
+ if (!cryp)
+ return -ENODEV;
+
+- if (unlikely(!cryp->areq->assoclen &&
+- !stm32_cryp_get_input_text_len(cryp))) {
++ if (unlikely(!cryp->payload_in && !cryp->header_in)) {
+ /* No input data to process: get tag and finish */
+ stm32_cryp_finish_req(cryp, 0);
+ return 0;
+@@ -1071,43 +1117,10 @@ static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq)
+ return stm32_cryp_cpu_start(cryp);
+ }
+
+-static u32 *stm32_cryp_next_out(struct stm32_cryp *cryp, u32 *dst,
+- unsigned int n)
+-{
+- scatterwalk_advance(&cryp->out_walk, n);
+-
+- if (unlikely(cryp->out_sg->length == _walked_out)) {
+- cryp->out_sg = sg_next(cryp->out_sg);
+- if (cryp->out_sg) {
+- scatterwalk_start(&cryp->out_walk, cryp->out_sg);
+- return (sg_virt(cryp->out_sg) + _walked_out);
+- }
+- }
+-
+- return (u32 *)((u8 *)dst + n);
+-}
+-
+-static u32 *stm32_cryp_next_in(struct stm32_cryp *cryp, u32 *src,
+- unsigned int n)
+-{
+- scatterwalk_advance(&cryp->in_walk, n);
+-
+- if (unlikely(cryp->in_sg->length == _walked_in)) {
+- cryp->in_sg = sg_next(cryp->in_sg);
+- if (cryp->in_sg) {
+- scatterwalk_start(&cryp->in_walk, cryp->in_sg);
+- return (sg_virt(cryp->in_sg) + _walked_in);
+- }
+- }
+-
+- return (u32 *)((u8 *)src + n);
+-}
+-
+ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
+ {
+- u32 cfg, size_bit, *dst, d32;
+- u8 *d8;
+- unsigned int i, j;
++ u32 cfg, size_bit;
++ unsigned int i;
+ int ret = 0;
+
+ /* Update Config */
+@@ -1130,7 +1143,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
+ stm32_cryp_write(cryp, CRYP_DIN, size_bit);
+
+ size_bit = is_encrypt(cryp) ? cryp->areq->cryptlen :
+- cryp->areq->cryptlen - AES_BLOCK_SIZE;
++ cryp->areq->cryptlen - cryp->authsize;
+ size_bit *= 8;
+ if (cryp->caps->swap_final)
+ size_bit = (__force u32)cpu_to_be32(size_bit);
+@@ -1139,11 +1152,9 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
+ stm32_cryp_write(cryp, CRYP_DIN, size_bit);
+ } else {
+ /* CCM: write CTR0 */
+- u8 iv[AES_BLOCK_SIZE];
+- u32 *iv32 = (u32 *)iv;
+- __be32 *biv;
+-
+- biv = (void *)iv;
++ u32 iv32[AES_BLOCK_32];
++ u8 *iv = (u8 *)iv32;
++ __be32 *biv = (__be32 *)iv32;
+
+ memcpy(iv, cryp->areq->iv, AES_BLOCK_SIZE);
+ memset(iv + AES_BLOCK_SIZE - 1 - iv[0], 0, iv[0] + 1);
+@@ -1165,39 +1176,18 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
+ }
+
+ if (is_encrypt(cryp)) {
++ u32 out_tag[AES_BLOCK_32];
++
+ /* Get and write tag */
+- dst = sg_virt(cryp->out_sg) + _walked_out;
++ for (i = 0; i < AES_BLOCK_32; i++)
++ out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+
+- for (i = 0; i < AES_BLOCK_32; i++) {
+- if (cryp->total_out >= sizeof(u32)) {
+- /* Read a full u32 */
+- *dst = stm32_cryp_read(cryp, CRYP_DOUT);
+-
+- dst = stm32_cryp_next_out(cryp, dst,
+- sizeof(u32));
+- cryp->total_out -= sizeof(u32);
+- } else if (!cryp->total_out) {
+- /* Empty fifo out (data from input padding) */
+- stm32_cryp_read(cryp, CRYP_DOUT);
+- } else {
+- /* Read less than an u32 */
+- d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+- d8 = (u8 *)&d32;
+-
+- for (j = 0; j < cryp->total_out; j++) {
+- *((u8 *)dst) = *(d8++);
+- dst = stm32_cryp_next_out(cryp, dst, 1);
+- }
+- cryp->total_out = 0;
+- }
+- }
++ scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1);
+ } else {
+ /* Get and check tag */
+ u32 in_tag[AES_BLOCK_32], out_tag[AES_BLOCK_32];
+
+- scatterwalk_map_and_copy(in_tag, cryp->in_sg,
+- cryp->total_in_save - cryp->authsize,
+- cryp->authsize, 0);
++ scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0);
+
+ for (i = 0; i < AES_BLOCK_32; i++)
+ out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+@@ -1217,115 +1207,59 @@ static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp)
+ {
+ u32 cr;
+
+- if (unlikely(cryp->last_ctr[3] == 0xFFFFFFFF)) {
+- cryp->last_ctr[3] = 0;
+- cryp->last_ctr[2]++;
+- if (!cryp->last_ctr[2]) {
+- cryp->last_ctr[1]++;
+- if (!cryp->last_ctr[1])
+- cryp->last_ctr[0]++;
+- }
++ if (unlikely(cryp->last_ctr[3] == cpu_to_be32(0xFFFFFFFF))) {
++ /*
++ * In this case, we need to increment manually the ctr counter,
++ * as HW doesn't handle the U32 carry.
++ */
++ crypto_inc((u8 *)cryp->last_ctr, sizeof(cryp->last_ctr));
+
+ cr = stm32_cryp_read(cryp, CRYP_CR);
+ stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN);
+
+- stm32_cryp_hw_write_iv(cryp, (__be32 *)cryp->last_ctr);
++ stm32_cryp_hw_write_iv(cryp, cryp->last_ctr);
+
+ stm32_cryp_write(cryp, CRYP_CR, cr);
+ }
+
+- cryp->last_ctr[0] = stm32_cryp_read(cryp, CRYP_IV0LR);
+- cryp->last_ctr[1] = stm32_cryp_read(cryp, CRYP_IV0RR);
+- cryp->last_ctr[2] = stm32_cryp_read(cryp, CRYP_IV1LR);
+- cryp->last_ctr[3] = stm32_cryp_read(cryp, CRYP_IV1RR);
++ /* The IV registers are BE */
++ cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR));
++ cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR));
++ cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR));
++ cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR));
+ }
+
+-static bool stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
++static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
+ {
+- unsigned int i, j;
+- u32 d32, *dst;
+- u8 *d8;
+- size_t tag_size;
+-
+- /* Do no read tag now (if any) */
+- if (is_encrypt(cryp) && (is_gcm(cryp) || is_ccm(cryp)))
+- tag_size = cryp->authsize;
+- else
+- tag_size = 0;
+-
+- dst = sg_virt(cryp->out_sg) + _walked_out;
++ unsigned int i;
++ u32 block[AES_BLOCK_32];
+
+- for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
+- if (likely(cryp->total_out - tag_size >= sizeof(u32))) {
+- /* Read a full u32 */
+- *dst = stm32_cryp_read(cryp, CRYP_DOUT);
++ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
++ block[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+
+- dst = stm32_cryp_next_out(cryp, dst, sizeof(u32));
+- cryp->total_out -= sizeof(u32);
+- } else if (cryp->total_out == tag_size) {
+- /* Empty fifo out (data from input padding) */
+- d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+- } else {
+- /* Read less than an u32 */
+- d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+- d8 = (u8 *)&d32;
+-
+- for (j = 0; j < cryp->total_out - tag_size; j++) {
+- *((u8 *)dst) = *(d8++);
+- dst = stm32_cryp_next_out(cryp, dst, 1);
+- }
+- cryp->total_out = tag_size;
+- }
+- }
+-
+- return !(cryp->total_out - tag_size) || !cryp->total_in;
++ scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize,
++ cryp->payload_out), 1);
++ cryp->payload_out -= min_t(size_t, cryp->hw_blocksize,
++ cryp->payload_out);
+ }
+
+ static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp)
+ {
+- unsigned int i, j;
+- u32 *src;
+- u8 d8[4];
+- size_t tag_size;
+-
+- /* Do no write tag (if any) */
+- if (is_decrypt(cryp) && (is_gcm(cryp) || is_ccm(cryp)))
+- tag_size = cryp->authsize;
+- else
+- tag_size = 0;
+-
+- src = sg_virt(cryp->in_sg) + _walked_in;
++ unsigned int i;
++ u32 block[AES_BLOCK_32] = {0};
+
+- for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
+- if (likely(cryp->total_in - tag_size >= sizeof(u32))) {
+- /* Write a full u32 */
+- stm32_cryp_write(cryp, CRYP_DIN, *src);
++ scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize,
++ cryp->payload_in), 0);
++ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
++ stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+
+- src = stm32_cryp_next_in(cryp, src, sizeof(u32));
+- cryp->total_in -= sizeof(u32);
+- } else if (cryp->total_in == tag_size) {
+- /* Write padding data */
+- stm32_cryp_write(cryp, CRYP_DIN, 0);
+- } else {
+- /* Write less than an u32 */
+- memset(d8, 0, sizeof(u32));
+- for (j = 0; j < cryp->total_in - tag_size; j++) {
+- d8[j] = *((u8 *)src);
+- src = stm32_cryp_next_in(cryp, src, 1);
+- }
+-
+- stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+- cryp->total_in = tag_size;
+- }
+- }
++ cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in);
+ }
+
+ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
+ {
+ int err;
+- u32 cfg, tmp[AES_BLOCK_32];
+- size_t total_in_ori = cryp->total_in;
+- struct scatterlist *out_sg_ori = cryp->out_sg;
++ u32 cfg, block[AES_BLOCK_32] = {0};
+ unsigned int i;
+
+ /* 'Special workaround' procedure described in the datasheet */
+@@ -1350,18 +1284,25 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
+
+ /* b) pad and write the last block */
+ stm32_cryp_irq_write_block(cryp);
+- cryp->total_in = total_in_ori;
++ /* wait end of process */
+ err = stm32_cryp_wait_output(cryp);
+ if (err) {
+- dev_err(cryp->dev, "Timeout (write gcm header)\n");
++ dev_err(cryp->dev, "Timeout (write gcm last data)\n");
+ return stm32_cryp_finish_req(cryp, err);
+ }
+
+ /* c) get and store encrypted data */
+- stm32_cryp_irq_read_data(cryp);
+- scatterwalk_map_and_copy(tmp, out_sg_ori,
+- cryp->total_in_save - total_in_ori,
+- total_in_ori, 0);
++ /*
++ * Same code as stm32_cryp_irq_read_data(), but we want to store
++ * block value
++ */
++ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
++ block[i] = stm32_cryp_read(cryp, CRYP_DOUT);
++
++ scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize,
++ cryp->payload_out), 1);
++ cryp->payload_out -= min_t(size_t, cryp->hw_blocksize,
++ cryp->payload_out);
+
+ /* d) change mode back to AES GCM */
+ cfg &= ~CR_ALGO_MASK;
+@@ -1374,19 +1315,13 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
+ stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+ /* f) write padded data */
+- for (i = 0; i < AES_BLOCK_32; i++) {
+- if (cryp->total_in)
+- stm32_cryp_write(cryp, CRYP_DIN, tmp[i]);
+- else
+- stm32_cryp_write(cryp, CRYP_DIN, 0);
+-
+- cryp->total_in -= min_t(size_t, sizeof(u32), cryp->total_in);
+- }
++ for (i = 0; i < AES_BLOCK_32; i++)
++ stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+
+ /* g) Empty fifo out */
+ err = stm32_cryp_wait_output(cryp);
+ if (err) {
+- dev_err(cryp->dev, "Timeout (write gcm header)\n");
++ dev_err(cryp->dev, "Timeout (write gcm padded data)\n");
+ return stm32_cryp_finish_req(cryp, err);
+ }
+
+@@ -1399,16 +1334,14 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
+
+ static void stm32_cryp_irq_set_npblb(struct stm32_cryp *cryp)
+ {
+- u32 cfg, payload_bytes;
++ u32 cfg;
+
+ /* disable ip, set NPBLB and reneable ip */
+ cfg = stm32_cryp_read(cryp, CRYP_CR);
+ cfg &= ~CR_CRYPEN;
+ stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+- payload_bytes = is_decrypt(cryp) ? cryp->total_in - cryp->authsize :
+- cryp->total_in;
+- cfg |= (cryp->hw_blocksize - payload_bytes) << CR_NBPBL_SHIFT;
++ cfg |= (cryp->hw_blocksize - cryp->payload_in) << CR_NBPBL_SHIFT;
+ cfg |= CR_CRYPEN;
+ stm32_cryp_write(cryp, CRYP_CR, cfg);
+ }
+@@ -1417,13 +1350,11 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
+ {
+ int err = 0;
+ u32 cfg, iv1tmp;
+- u32 cstmp1[AES_BLOCK_32], cstmp2[AES_BLOCK_32], tmp[AES_BLOCK_32];
+- size_t last_total_out, total_in_ori = cryp->total_in;
+- struct scatterlist *out_sg_ori = cryp->out_sg;
++ u32 cstmp1[AES_BLOCK_32], cstmp2[AES_BLOCK_32];
++ u32 block[AES_BLOCK_32] = {0};
+ unsigned int i;
+
+ /* 'Special workaround' procedure described in the datasheet */
+- cryp->flags |= FLG_CCM_PADDED_WA;
+
+ /* a) disable ip */
+ stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+@@ -1453,7 +1384,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
+
+ /* b) pad and write the last block */
+ stm32_cryp_irq_write_block(cryp);
+- cryp->total_in = total_in_ori;
++ /* wait end of process */
+ err = stm32_cryp_wait_output(cryp);
+ if (err) {
+ dev_err(cryp->dev, "Timeout (wite ccm padded data)\n");
+@@ -1461,13 +1392,16 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
+ }
+
+ /* c) get and store decrypted data */
+- last_total_out = cryp->total_out;
+- stm32_cryp_irq_read_data(cryp);
++ /*
++ * Same code as stm32_cryp_irq_read_data(), but we want to store
++ * block value
++ */
++ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
++ block[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+
+- memset(tmp, 0, sizeof(tmp));
+- scatterwalk_map_and_copy(tmp, out_sg_ori,
+- cryp->total_out_save - last_total_out,
+- last_total_out, 0);
++ scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize,
++ cryp->payload_out), 1);
++ cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, cryp->payload_out);
+
+ /* d) Load again CRYP_CSGCMCCMxR */
+ for (i = 0; i < ARRAY_SIZE(cstmp2); i++)
+@@ -1484,10 +1418,10 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
+ stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+ /* g) XOR and write padded data */
+- for (i = 0; i < ARRAY_SIZE(tmp); i++) {
+- tmp[i] ^= cstmp1[i];
+- tmp[i] ^= cstmp2[i];
+- stm32_cryp_write(cryp, CRYP_DIN, tmp[i]);
++ for (i = 0; i < ARRAY_SIZE(block); i++) {
++ block[i] ^= cstmp1[i];
++ block[i] ^= cstmp2[i];
++ stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+ }
+
+ /* h) wait for completion */
+@@ -1501,30 +1435,34 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
+
+ static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp)
+ {
+- if (unlikely(!cryp->total_in)) {
++ if (unlikely(!cryp->payload_in)) {
+ dev_warn(cryp->dev, "No more data to process\n");
+ return;
+ }
+
+- if (unlikely(cryp->total_in < AES_BLOCK_SIZE &&
++ if (unlikely(cryp->payload_in < AES_BLOCK_SIZE &&
+ (stm32_cryp_get_hw_mode(cryp) == CR_AES_GCM) &&
+ is_encrypt(cryp))) {
+ /* Padding for AES GCM encryption */
+- if (cryp->caps->padding_wa)
++ if (cryp->caps->padding_wa) {
+ /* Special case 1 */
+- return stm32_cryp_irq_write_gcm_padded_data(cryp);
++ stm32_cryp_irq_write_gcm_padded_data(cryp);
++ return;
++ }
+
+ /* Setting padding bytes (NBBLB) */
+ stm32_cryp_irq_set_npblb(cryp);
+ }
+
+- if (unlikely((cryp->total_in - cryp->authsize < AES_BLOCK_SIZE) &&
++ if (unlikely((cryp->payload_in < AES_BLOCK_SIZE) &&
+ (stm32_cryp_get_hw_mode(cryp) == CR_AES_CCM) &&
+ is_decrypt(cryp))) {
+ /* Padding for AES CCM decryption */
+- if (cryp->caps->padding_wa)
++ if (cryp->caps->padding_wa) {
+ /* Special case 2 */
+- return stm32_cryp_irq_write_ccm_padded_data(cryp);
++ stm32_cryp_irq_write_ccm_padded_data(cryp);
++ return;
++ }
+
+ /* Setting padding bytes (NBBLB) */
+ stm32_cryp_irq_set_npblb(cryp);
+@@ -1536,192 +1474,60 @@ static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp)
+ stm32_cryp_irq_write_block(cryp);
+ }
+
+-static void stm32_cryp_irq_write_gcm_header(struct stm32_cryp *cryp)
++static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp)
+ {
+- int err;
+- unsigned int i, j;
+- u32 cfg, *src;
+-
+- src = sg_virt(cryp->in_sg) + _walked_in;
+-
+- for (i = 0; i < AES_BLOCK_32; i++) {
+- stm32_cryp_write(cryp, CRYP_DIN, *src);
+-
+- src = stm32_cryp_next_in(cryp, src, sizeof(u32));
+- cryp->total_in -= min_t(size_t, sizeof(u32), cryp->total_in);
+-
+- /* Check if whole header written */
+- if ((cryp->total_in_save - cryp->total_in) ==
+- cryp->areq->assoclen) {
+- /* Write padding if needed */
+- for (j = i + 1; j < AES_BLOCK_32; j++)
+- stm32_cryp_write(cryp, CRYP_DIN, 0);
+-
+- /* Wait for completion */
+- err = stm32_cryp_wait_busy(cryp);
+- if (err) {
+- dev_err(cryp->dev, "Timeout (gcm header)\n");
+- return stm32_cryp_finish_req(cryp, err);
+- }
+-
+- if (stm32_cryp_get_input_text_len(cryp)) {
+- /* Phase 3 : payload */
+- cfg = stm32_cryp_read(cryp, CRYP_CR);
+- cfg &= ~CR_CRYPEN;
+- stm32_cryp_write(cryp, CRYP_CR, cfg);
+-
+- cfg &= ~CR_PH_MASK;
+- cfg |= CR_PH_PAYLOAD;
+- cfg |= CR_CRYPEN;
+- stm32_cryp_write(cryp, CRYP_CR, cfg);
+- } else {
+- /* Phase 4 : tag */
+- stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+- stm32_cryp_finish_req(cryp, 0);
+- }
+-
+- break;
+- }
+-
+- if (!cryp->total_in)
+- break;
+- }
+-}
++ unsigned int i;
++ u32 block[AES_BLOCK_32] = {0};
++ size_t written;
+
+-static void stm32_cryp_irq_write_ccm_header(struct stm32_cryp *cryp)
+-{
+- int err;
+- unsigned int i = 0, j, k;
+- u32 alen, cfg, *src;
+- u8 d8[4];
+-
+- src = sg_virt(cryp->in_sg) + _walked_in;
+- alen = cryp->areq->assoclen;
+-
+- if (!_walked_in) {
+- if (cryp->areq->assoclen <= 65280) {
+- /* Write first u32 of B1 */
+- d8[0] = (alen >> 8) & 0xFF;
+- d8[1] = alen & 0xFF;
+- d8[2] = *((u8 *)src);
+- src = stm32_cryp_next_in(cryp, src, 1);
+- d8[3] = *((u8 *)src);
+- src = stm32_cryp_next_in(cryp, src, 1);
+-
+- stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+- i++;
+-
+- cryp->total_in -= min_t(size_t, 2, cryp->total_in);
+- } else {
+- /* Build the two first u32 of B1 */
+- d8[0] = 0xFF;
+- d8[1] = 0xFE;
+- d8[2] = alen & 0xFF000000;
+- d8[3] = alen & 0x00FF0000;
+-
+- stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+- i++;
+-
+- d8[0] = alen & 0x0000FF00;
+- d8[1] = alen & 0x000000FF;
+- d8[2] = *((u8 *)src);
+- src = stm32_cryp_next_in(cryp, src, 1);
+- d8[3] = *((u8 *)src);
+- src = stm32_cryp_next_in(cryp, src, 1);
+-
+- stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+- i++;
+-
+- cryp->total_in -= min_t(size_t, 2, cryp->total_in);
+- }
+- }
++ written = min_t(size_t, AES_BLOCK_SIZE, cryp->header_in);
+
+- /* Write next u32 */
+- for (; i < AES_BLOCK_32; i++) {
+- /* Build an u32 */
+- memset(d8, 0, sizeof(u32));
+- for (k = 0; k < sizeof(u32); k++) {
+- d8[k] = *((u8 *)src);
+- src = stm32_cryp_next_in(cryp, src, 1);
+-
+- cryp->total_in -= min_t(size_t, 1, cryp->total_in);
+- if ((cryp->total_in_save - cryp->total_in) == alen)
+- break;
+- }
++ scatterwalk_copychunks(block, &cryp->in_walk, written, 0);
++ for (i = 0; i < AES_BLOCK_32; i++)
++ stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+
+- stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+-
+- if ((cryp->total_in_save - cryp->total_in) == alen) {
+- /* Write padding if needed */
+- for (j = i + 1; j < AES_BLOCK_32; j++)
+- stm32_cryp_write(cryp, CRYP_DIN, 0);
+-
+- /* Wait for completion */
+- err = stm32_cryp_wait_busy(cryp);
+- if (err) {
+- dev_err(cryp->dev, "Timeout (ccm header)\n");
+- return stm32_cryp_finish_req(cryp, err);
+- }
+-
+- if (stm32_cryp_get_input_text_len(cryp)) {
+- /* Phase 3 : payload */
+- cfg = stm32_cryp_read(cryp, CRYP_CR);
+- cfg &= ~CR_CRYPEN;
+- stm32_cryp_write(cryp, CRYP_CR, cfg);
+-
+- cfg &= ~CR_PH_MASK;
+- cfg |= CR_PH_PAYLOAD;
+- cfg |= CR_CRYPEN;
+- stm32_cryp_write(cryp, CRYP_CR, cfg);
+- } else {
+- /* Phase 4 : tag */
+- stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+- stm32_cryp_finish_req(cryp, 0);
+- }
++ cryp->header_in -= written;
+
+- break;
+- }
+- }
++ stm32_crypt_gcmccm_end_header(cryp);
+ }
+
+ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg)
+ {
+ struct stm32_cryp *cryp = arg;
+ u32 ph;
++ u32 it_mask = stm32_cryp_read(cryp, CRYP_IMSCR);
+
+ if (cryp->irq_status & MISR_OUT)
+ /* Output FIFO IRQ: read data */
+- if (unlikely(stm32_cryp_irq_read_data(cryp))) {
+- /* All bytes processed, finish */
+- stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+- stm32_cryp_finish_req(cryp, 0);
+- return IRQ_HANDLED;
+- }
++ stm32_cryp_irq_read_data(cryp);
+
+ if (cryp->irq_status & MISR_IN) {
+- if (is_gcm(cryp)) {
++ if (is_gcm(cryp) || is_ccm(cryp)) {
+ ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK;
+ if (unlikely(ph == CR_PH_HEADER))
+ /* Write Header */
+- stm32_cryp_irq_write_gcm_header(cryp);
+- else
+- /* Input FIFO IRQ: write data */
+- stm32_cryp_irq_write_data(cryp);
+- cryp->gcm_ctr++;
+- } else if (is_ccm(cryp)) {
+- ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK;
+- if (unlikely(ph == CR_PH_HEADER))
+- /* Write Header */
+- stm32_cryp_irq_write_ccm_header(cryp);
++ stm32_cryp_irq_write_gcmccm_header(cryp);
+ else
+ /* Input FIFO IRQ: write data */
+ stm32_cryp_irq_write_data(cryp);
++ if (is_gcm(cryp))
++ cryp->gcm_ctr++;
+ } else {
+ /* Input FIFO IRQ: write data */
+ stm32_cryp_irq_write_data(cryp);
+ }
+ }
+
++ /* Mask useless interrupts */
++ if (!cryp->payload_in && !cryp->header_in)
++ it_mask &= ~IMSCR_IN;
++ if (!cryp->payload_out)
++ it_mask &= ~IMSCR_OUT;
++ stm32_cryp_write(cryp, CRYP_IMSCR, it_mask);
++
++ if (!cryp->payload_in && !cryp->header_in && !cryp->payload_out)
++ stm32_cryp_finish_req(cryp, 0);
++
+ return IRQ_HANDLED;
+ }
+
+@@ -1742,7 +1548,7 @@ static struct skcipher_alg crypto_algs[] = {
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .base.cra_alignmask = 0xf,
++ .base.cra_alignmask = 0,
+ .base.cra_module = THIS_MODULE,
+
+ .init = stm32_cryp_init_tfm,
+@@ -1759,7 +1565,7 @@ static struct skcipher_alg crypto_algs[] = {
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .base.cra_alignmask = 0xf,
++ .base.cra_alignmask = 0,
+ .base.cra_module = THIS_MODULE,
+
+ .init = stm32_cryp_init_tfm,
+@@ -1777,7 +1583,7 @@ static struct skcipher_alg crypto_algs[] = {
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .base.cra_alignmask = 0xf,
++ .base.cra_alignmask = 0,
+ .base.cra_module = THIS_MODULE,
+
+ .init = stm32_cryp_init_tfm,
+@@ -1795,7 +1601,7 @@ static struct skcipher_alg crypto_algs[] = {
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .base.cra_alignmask = 0xf,
++ .base.cra_alignmask = 0,
+ .base.cra_module = THIS_MODULE,
+
+ .init = stm32_cryp_init_tfm,
+@@ -1812,7 +1618,7 @@ static struct skcipher_alg crypto_algs[] = {
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .base.cra_alignmask = 0xf,
++ .base.cra_alignmask = 0,
+ .base.cra_module = THIS_MODULE,
+
+ .init = stm32_cryp_init_tfm,
+@@ -1830,7 +1636,7 @@ static struct skcipher_alg crypto_algs[] = {
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .base.cra_alignmask = 0xf,
++ .base.cra_alignmask = 0,
+ .base.cra_module = THIS_MODULE,
+
+ .init = stm32_cryp_init_tfm,
+@@ -1847,7 +1653,7 @@ static struct skcipher_alg crypto_algs[] = {
+ .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .base.cra_alignmask = 0xf,
++ .base.cra_alignmask = 0,
+ .base.cra_module = THIS_MODULE,
+
+ .init = stm32_cryp_init_tfm,
+@@ -1877,7 +1683,7 @@ static struct aead_alg aead_algs[] = {
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .cra_alignmask = 0xf,
++ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ },
+@@ -1897,7 +1703,7 @@ static struct aead_alg aead_algs[] = {
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct stm32_cryp_ctx),
+- .cra_alignmask = 0xf,
++ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ },
+ },
+@@ -2025,8 +1831,6 @@ err_engine1:
+ list_del(&cryp->list);
+ spin_unlock(&cryp_list.lock);
+
+- pm_runtime_disable(dev);
+- pm_runtime_put_noidle(dev);
+ pm_runtime_disable(dev);
+ pm_runtime_put_noidle(dev);
+
+diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
+index 389de9e3302d5..4df5330afaa1d 100644
+--- a/drivers/crypto/stm32/stm32-hash.c
++++ b/drivers/crypto/stm32/stm32-hash.c
+@@ -565,9 +565,9 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
+ }
+
+ for_each_sg(rctx->sg, tsg, rctx->nents, i) {
++ sg[0] = *tsg;
+ len = sg->length;
+
+- sg[0] = *tsg;
+ if (sg_is_last(sg)) {
+ if (hdev->dma_mode == 1) {
+ len = (ALIGN(sg->length, 16) - 16);
+@@ -813,7 +813,7 @@ static void stm32_hash_finish_req(struct ahash_request *req, int err)
+ static int stm32_hash_hw_init(struct stm32_hash_dev *hdev,
+ struct stm32_hash_request_ctx *rctx)
+ {
+- pm_runtime_resume_and_get(hdev->dev);
++ pm_runtime_get_sync(hdev->dev);
+
+ if (!(HASH_FLAGS_INIT & hdev->flags)) {
+ stm32_hash_write(hdev, HASH_CR, HASH_CR_INIT);
+@@ -962,7 +962,7 @@ static int stm32_hash_export(struct ahash_request *req, void *out)
+ u32 *preg;
+ unsigned int i;
+
+- pm_runtime_resume_and_get(hdev->dev);
++ pm_runtime_get_sync(hdev->dev);
+
+ while ((stm32_hash_read(hdev, HASH_SR) & HASH_SR_BUSY))
+ cpu_relax();
+@@ -1000,7 +1000,7 @@ static int stm32_hash_import(struct ahash_request *req, const void *in)
+
+ preg = rctx->hw_context;
+
+- pm_runtime_resume_and_get(hdev->dev);
++ pm_runtime_get_sync(hdev->dev);
+
+ stm32_hash_write(hdev, HASH_IMR, *preg++);
+ stm32_hash_write(hdev, HASH_STR, *preg++);
+@@ -1566,9 +1566,7 @@ static int stm32_hash_remove(struct platform_device *pdev)
+ if (!hdev)
+ return -ENODEV;
+
+- ret = pm_runtime_resume_and_get(hdev->dev);
+- if (ret < 0)
+- return ret;
++ ret = pm_runtime_get_sync(hdev->dev);
+
+ stm32_hash_unregister_algs(hdev);
+
+@@ -1584,7 +1582,8 @@ static int stm32_hash_remove(struct platform_device *pdev)
+ pm_runtime_disable(hdev->dev);
+ pm_runtime_put_noidle(hdev->dev);
+
+- clk_disable_unprepare(hdev->clk);
++ if (ret >= 0)
++ clk_disable_unprepare(hdev->clk);
+
+ return 0;
+ }
+diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
+index c85fab7ef0bdd..b2c28b87f14b3 100644
+--- a/drivers/crypto/vmx/Kconfig
++++ b/drivers/crypto/vmx/Kconfig
+@@ -2,7 +2,11 @@
+ config CRYPTO_DEV_VMX_ENCRYPT
+ tristate "Encryption acceleration support on P8 CPU"
+ depends on CRYPTO_DEV_VMX
++ select CRYPTO_AES
++ select CRYPTO_CBC
++ select CRYPTO_CTR
+ select CRYPTO_GHASH
++ select CRYPTO_XTS
+ default m
+ help
+ Support for VMX cryptographic acceleration instructions on Power8 CPU.
+diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
+index 267d8042bec22..0987a6423ee06 100644
+--- a/drivers/cxl/core/bus.c
++++ b/drivers/cxl/core/bus.c
+@@ -182,6 +182,7 @@ static void cxl_decoder_release(struct device *dev)
+
+ ida_free(&port->decoder_ida, cxld->id);
+ kfree(cxld);
++ put_device(&port->dev);
+ }
+
+ static const struct device_type cxl_decoder_switch_type = {
+@@ -481,6 +482,9 @@ cxl_decoder_alloc(struct cxl_port *port, int nr_targets, resource_size_t base,
+ if (rc < 0)
+ goto err;
+
++ /* need parent to stick around to release the id */
++ get_device(&port->dev);
++
+ *cxld = (struct cxl_decoder) {
+ .id = rc,
+ .range = {
+diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
+index 41de4a136ecd7..2e7027a3fef3b 100644
+--- a/drivers/cxl/core/regs.c
++++ b/drivers/cxl/core/regs.c
+@@ -35,7 +35,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
+ struct cxl_component_reg_map *map)
+ {
+ int cap, cap_count;
+- u64 cap_array;
++ u32 cap_array;
+
+ *map = (struct cxl_component_reg_map) { 0 };
+
+@@ -45,11 +45,11 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
+ */
+ base += CXL_CM_OFFSET;
+
+- cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET);
++ cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET);
+
+ if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
+ dev_err(dev,
+- "Couldn't locate the CXL.cache and CXL.mem capability array header./n");
++ "Couldn't locate the CXL.cache and CXL.mem capability array header.\n");
+ return;
+ }
+
+diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
+index 8e45aa07d662f..5444b5a7fd3c4 100644
+--- a/drivers/cxl/pci.c
++++ b/drivers/cxl/pci.c
+@@ -972,7 +972,7 @@ static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm,
+ if (pci_resource_len(pdev, bar) < offset) {
+ dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar,
+ &pdev->resource[bar], (unsigned long long)offset);
+- return IOMEM_ERR_PTR(-ENXIO);
++ return NULL;
+ }
+
+ addr = pci_iomap(pdev, bar, 0);
+diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
+index 9652c3ee41e7f..2bb2f9a0499f7 100644
+--- a/drivers/cxl/pmem.c
++++ b/drivers/cxl/pmem.c
+@@ -149,14 +149,24 @@ static void cxl_nvb_update_state(struct work_struct *work)
+ put_device(&cxl_nvb->dev);
+ }
+
++static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
++{
++ /*
++ * Take a reference that the workqueue will drop if new work
++ * gets queued.
++ */
++ get_device(&cxl_nvb->dev);
++ if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
++ put_device(&cxl_nvb->dev);
++}
++
+ static void cxl_nvdimm_bridge_remove(struct device *dev)
+ {
+ struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
+
+ if (cxl_nvb->state == CXL_NVB_ONLINE)
+ cxl_nvb->state = CXL_NVB_OFFLINE;
+- if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
+- get_device(&cxl_nvb->dev);
++ cxl_nvdimm_bridge_state_work(cxl_nvb);
+ }
+
+ static int cxl_nvdimm_bridge_probe(struct device *dev)
+@@ -177,8 +187,7 @@ static int cxl_nvdimm_bridge_probe(struct device *dev)
+ }
+
+ cxl_nvb->state = CXL_NVB_ONLINE;
+- if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
+- get_device(&cxl_nvb->dev);
++ cxl_nvdimm_bridge_state_work(cxl_nvb);
+
+ return 0;
+ }
+diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
+index 6cc4da4c713d9..7ad61c707687f 100644
+--- a/drivers/dax/bus.c
++++ b/drivers/dax/bus.c
+@@ -397,23 +397,39 @@ static void unregister_dev_dax(void *dev)
+ dev_dbg(dev, "%s\n", __func__);
+
+ kill_dev_dax(dev_dax);
+- free_dev_dax_ranges(dev_dax);
+ device_del(dev);
++ free_dev_dax_ranges(dev_dax);
+ put_device(dev);
+ }
+
++static void dax_region_free(struct kref *kref)
++{
++ struct dax_region *dax_region;
++
++ dax_region = container_of(kref, struct dax_region, kref);
++ kfree(dax_region);
++}
++
++void dax_region_put(struct dax_region *dax_region)
++{
++ kref_put(&dax_region->kref, dax_region_free);
++}
++EXPORT_SYMBOL_GPL(dax_region_put);
++
+ /* a return value >= 0 indicates this invocation invalidated the id */
+ static int __free_dev_dax_id(struct dev_dax *dev_dax)
+ {
+- struct dax_region *dax_region = dev_dax->region;
+ struct device *dev = &dev_dax->dev;
++ struct dax_region *dax_region;
+ int rc = dev_dax->id;
+
+ device_lock_assert(dev);
+
+- if (is_static(dax_region) || dev_dax->id < 0)
++ if (!dev_dax->dyn_id || dev_dax->id < 0)
+ return -1;
++ dax_region = dev_dax->region;
+ ida_free(&dax_region->ida, dev_dax->id);
++ dax_region_put(dax_region);
+ dev_dax->id = -1;
+ return rc;
+ }
+@@ -429,6 +445,20 @@ static int free_dev_dax_id(struct dev_dax *dev_dax)
+ return rc;
+ }
+
++static int alloc_dev_dax_id(struct dev_dax *dev_dax)
++{
++ struct dax_region *dax_region = dev_dax->region;
++ int id;
++
++ id = ida_alloc(&dax_region->ida, GFP_KERNEL);
++ if (id < 0)
++ return id;
++ kref_get(&dax_region->kref);
++ dev_dax->dyn_id = true;
++ dev_dax->id = id;
++ return id;
++}
++
+ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+ {
+@@ -516,20 +546,6 @@ static const struct attribute_group *dax_region_attribute_groups[] = {
+ NULL,
+ };
+
+-static void dax_region_free(struct kref *kref)
+-{
+- struct dax_region *dax_region;
+-
+- dax_region = container_of(kref, struct dax_region, kref);
+- kfree(dax_region);
+-}
+-
+-void dax_region_put(struct dax_region *dax_region)
+-{
+- kref_put(&dax_region->kref, dax_region_free);
+-}
+-EXPORT_SYMBOL_GPL(dax_region_put);
+-
+ static void dax_region_unregister(void *region)
+ {
+ struct dax_region *dax_region = region;
+@@ -591,10 +607,12 @@ EXPORT_SYMBOL_GPL(alloc_dax_region);
+ static void dax_mapping_release(struct device *dev)
+ {
+ struct dax_mapping *mapping = to_dax_mapping(dev);
+- struct dev_dax *dev_dax = to_dev_dax(dev->parent);
++ struct device *parent = dev->parent;
++ struct dev_dax *dev_dax = to_dev_dax(parent);
+
+ ida_free(&dev_dax->ida, mapping->id);
+ kfree(mapping);
++ put_device(parent);
+ }
+
+ static void unregister_dax_mapping(void *data)
+@@ -734,6 +752,7 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
+ dev = &mapping->dev;
+ device_initialize(dev);
+ dev->parent = &dev_dax->dev;
++ get_device(dev->parent);
+ dev->type = &dax_mapping_type;
+ dev_set_name(dev, "mapping%d", mapping->id);
+ rc = device_add(dev);
+@@ -1251,12 +1270,10 @@ static const struct attribute_group *dax_attribute_groups[] = {
+ static void dev_dax_release(struct device *dev)
+ {
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+- struct dax_region *dax_region = dev_dax->region;
+ struct dax_device *dax_dev = dev_dax->dax_dev;
+
+ put_dax(dax_dev);
+ free_dev_dax_id(dev_dax);
+- dax_region_put(dax_region);
+ kfree(dev_dax->pgmap);
+ kfree(dev_dax);
+ }
+@@ -1280,6 +1297,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
+ if (!dev_dax)
+ return ERR_PTR(-ENOMEM);
+
++ dev_dax->region = dax_region;
+ if (is_static(dax_region)) {
+ if (dev_WARN_ONCE(parent, data->id < 0,
+ "dynamic id specified to static region\n")) {
+@@ -1295,13 +1313,11 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
+ goto err_id;
+ }
+
+- rc = ida_alloc(&dax_region->ida, GFP_KERNEL);
++ rc = alloc_dev_dax_id(dev_dax);
+ if (rc < 0)
+ goto err_id;
+- dev_dax->id = rc;
+ }
+
+- dev_dax->region = dax_region;
+ dev = &dev_dax->dev;
+ device_initialize(dev);
+ dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
+@@ -1339,7 +1355,6 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
+ dev_dax->target_node = dax_region->target_node;
+ dev_dax->align = dax_region->align;
+ ida_init(&dev_dax->ida);
+- kref_get(&dax_region->kref);
+
+ inode = dax_inode(dax_dev);
+ dev->devt = inode->i_rdev;
+diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
+index 1c974b7caae6e..afcada6fd2eda 100644
+--- a/drivers/dax/dax-private.h
++++ b/drivers/dax/dax-private.h
+@@ -52,7 +52,8 @@ struct dax_mapping {
+ * @region - parent region
+ * @dax_dev - core dax functionality
+ * @target_node: effective numa node if dev_dax memory range is onlined
+- * @id: ida allocated id
++ * @dyn_id: is this a dynamic or statically created instance
++ * @id: ida allocated id when the dax_region is not static
+ * @ida: mapping id allocator
+ * @dev - device core
+ * @pgmap - pgmap for memmap setup / lifetime (driver owned)
+@@ -64,6 +65,7 @@ struct dev_dax {
+ struct dax_device *dax_dev;
+ unsigned int align;
+ int target_node;
++ bool dyn_id;
+ int id;
+ struct ida ida;
+ struct device dev;
+diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
+index cb6401c9e9a4f..acf31cc1dbcca 100644
+--- a/drivers/dax/hmem/device.c
++++ b/drivers/dax/hmem/device.c
+@@ -15,6 +15,7 @@ void hmem_register_device(int target_nid, struct resource *r)
+ .start = r->start,
+ .end = r->end,
+ .flags = IORESOURCE_MEM,
++ .desc = IORES_DESC_SOFT_RESERVED,
+ };
+ struct platform_device *pdev;
+ struct memregion_info info;
+diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
+index a37622060fffa..97723ee15bc68 100644
+--- a/drivers/dax/kmem.c
++++ b/drivers/dax/kmem.c
+@@ -88,7 +88,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
+ if (!data->res_name)
+ goto err_res_name;
+
+- rc = memory_group_register_static(numa_node, total_len);
++ rc = memory_group_register_static(numa_node, PFN_UP(total_len));
+ if (rc < 0)
+ goto err_reg_mgid;
+ data->mgid = rc;
+@@ -135,7 +135,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
+ if (rc) {
+ dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
+ i, range.start, range.end);
+- release_resource(res);
++ remove_resource(res);
+ kfree(res);
+ data->res[i] = NULL;
+ if (mapped)
+@@ -181,7 +181,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
+
+ rc = remove_memory(range.start, range_len(&range));
+ if (rc == 0) {
+- release_resource(data->res[i]);
++ remove_resource(data->res[i]);
+ kfree(data->res[i]);
+ data->res[i] = NULL;
+ success++;
+diff --git a/drivers/dax/super.c b/drivers/dax/super.c
+index fc89e91beea7c..7610e4a9ac4e2 100644
+--- a/drivers/dax/super.c
++++ b/drivers/dax/super.c
+@@ -678,6 +678,7 @@ static int dax_fs_init(void)
+ static void dax_fs_exit(void)
+ {
+ kern_unmount(dax_mnt);
++ rcu_barrier();
+ kmem_cache_destroy(dax_cache);
+ }
+
+diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
+index 85faa7a5c7d12..29a14b0ffe334 100644
+--- a/drivers/devfreq/devfreq.c
++++ b/drivers/devfreq/devfreq.c
+@@ -762,6 +762,7 @@ static void devfreq_dev_release(struct device *dev)
+ dev_pm_opp_put_opp_table(devfreq->opp_table);
+
+ mutex_destroy(&devfreq->lock);
++ srcu_cleanup_notifier_head(&devfreq->transition_notifier_list);
+ kfree(devfreq);
+ }
+
+@@ -775,8 +776,7 @@ static void remove_sysfs_files(struct devfreq *devfreq,
+ * @dev: the device to add devfreq feature.
+ * @profile: device-specific profile to run devfreq.
+ * @governor_name: name of the policy to choose frequency.
+- * @data: private data for the governor. The devfreq framework does not
+- * touch this value.
++ * @data: devfreq driver pass to governors, governor should not change it.
+ */
+ struct devfreq *devfreq_add_device(struct device *dev,
+ struct devfreq_dev_profile *profile,
+@@ -1003,8 +1003,7 @@ static void devm_devfreq_dev_release(struct device *dev, void *res)
+ * @dev: the device to add devfreq feature.
+ * @profile: device-specific profile to run devfreq.
+ * @governor_name: name of the policy to choose frequency.
+- * @data: private data for the governor. The devfreq framework does not
+- * touch this value.
++ * @data: devfreq driver pass to governors, governor should not change it.
+ *
+ * This function manages automatically the memory of devfreq device using device
+ * resource management and simplify the free operation for memory of devfreq
+diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
+index 17ed980d90998..d6da9c3e31067 100644
+--- a/drivers/devfreq/event/exynos-ppmu.c
++++ b/drivers/devfreq/event/exynos-ppmu.c
+@@ -514,15 +514,19 @@ static int of_get_devfreq_events(struct device_node *np,
+
+ count = of_get_child_count(events_np);
+ desc = devm_kcalloc(dev, count, sizeof(*desc), GFP_KERNEL);
+- if (!desc)
++ if (!desc) {
++ of_node_put(events_np);
+ return -ENOMEM;
++ }
+ info->num_events = count;
+
+ of_id = of_match_device(exynos_ppmu_id_match, dev);
+ if (of_id)
+ info->ppmu_type = (enum exynos_ppmu_type)of_id->data;
+- else
++ else {
++ of_node_put(events_np);
+ return -EINVAL;
++ }
+
+ j = 0;
+ for_each_child_of_node(events_np, node) {
+diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
+index ab9db7adb3ade..d69672ccacc49 100644
+--- a/drivers/devfreq/governor_userspace.c
++++ b/drivers/devfreq/governor_userspace.c
+@@ -21,7 +21,7 @@ struct userspace_data {
+
+ static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq)
+ {
+- struct userspace_data *data = df->data;
++ struct userspace_data *data = df->governor_data;
+
+ if (data->valid)
+ *freq = data->user_frequency;
+@@ -40,7 +40,7 @@ static ssize_t set_freq_store(struct device *dev, struct device_attribute *attr,
+ int err = 0;
+
+ mutex_lock(&devfreq->lock);
+- data = devfreq->data;
++ data = devfreq->governor_data;
+
+ sscanf(buf, "%lu", &wanted);
+ data->user_frequency = wanted;
+@@ -60,7 +60,7 @@ static ssize_t set_freq_show(struct device *dev,
+ int err = 0;
+
+ mutex_lock(&devfreq->lock);
+- data = devfreq->data;
++ data = devfreq->governor_data;
+
+ if (data->valid)
+ err = sprintf(buf, "%lu\n", data->user_frequency);
+@@ -91,7 +91,7 @@ static int userspace_init(struct devfreq *devfreq)
+ goto out;
+ }
+ data->valid = false;
+- devfreq->data = data;
++ devfreq->governor_data = data;
+
+ err = sysfs_create_group(&devfreq->dev.kobj, &dev_attr_group);
+ out:
+@@ -107,8 +107,8 @@ static void userspace_exit(struct devfreq *devfreq)
+ if (devfreq->dev.kobj.sd)
+ sysfs_remove_group(&devfreq->dev.kobj, &dev_attr_group);
+
+- kfree(devfreq->data);
+- devfreq->data = NULL;
++ kfree(devfreq->governor_data);
++ devfreq->governor_data = NULL;
+ }
+
+ static int devfreq_userspace_handler(struct devfreq *devfreq,
+diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
+index 293857ebfd75d..538e8dc74f40a 100644
+--- a/drivers/devfreq/rk3399_dmc.c
++++ b/drivers/devfreq/rk3399_dmc.c
+@@ -477,6 +477,8 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev)
+ {
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev);
+
++ devfreq_event_disable_edev(dmcfreq->edev);
++
+ /*
+ * Before remove the opp table we need to unregister the opp notifier.
+ */
+diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c
+index 4c06c93c93d32..c7f7134adc21d 100644
+--- a/drivers/dio/dio.c
++++ b/drivers/dio/dio.c
+@@ -110,6 +110,12 @@ static char dio_no_name[] = { 0 };
+
+ #endif /* CONFIG_DIO_CONSTANTS */
+
++static void dio_dev_release(struct device *dev)
++{
++ struct dio_dev *ddev = container_of(dev, typeof(struct dio_dev), dev);
++ kfree(ddev);
++}
++
+ int __init dio_find(int deviceid)
+ {
+ /* Called to find a DIO device before the full bus scan has run.
+@@ -224,6 +230,7 @@ static int __init dio_init(void)
+ dev->bus = &dio_bus;
+ dev->dev.parent = &dio_bus.dev;
+ dev->dev.bus = &dio_bus_type;
++ dev->dev.release = dio_dev_release;
+ dev->scode = scode;
+ dev->resource.start = pa;
+ dev->resource.end = pa + DIO_SIZE(scode, va);
+@@ -251,6 +258,7 @@ static int __init dio_init(void)
+ if (error) {
+ pr_err("DIO: Error registering device %s\n",
+ dev->name);
++ put_device(&dev->dev);
+ continue;
+ }
+ error = dio_create_sysfs_dev_files(dev);
+diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
+index 63d32261b63ff..968c3df2810e6 100644
+--- a/drivers/dma-buf/dma-buf.c
++++ b/drivers/dma-buf/dma-buf.c
+@@ -67,14 +67,11 @@ static void dma_buf_release(struct dentry *dentry)
+ BUG_ON(dmabuf->vmapping_counter);
+
+ /*
+- * Any fences that a dma-buf poll can wait on should be signaled
+- * before releasing dma-buf. This is the responsibility of each
+- * driver that uses the reservation objects.
+- *
+- * If you hit this BUG() it means someone dropped their ref to the
+- * dma-buf while still having pending operation to the buffer.
++ * If you hit this BUG() it could mean:
++ * * There's a file reference imbalance in dma_buf_poll / dma_buf_poll_cb or somewhere else
++ * * dmabuf->cb_in/out.active are non-0 despite no pending fence callback
+ */
+- BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
++ BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
+
+ dma_buf_stats_teardown(dmabuf);
+ dmabuf->ops->release(dmabuf);
+@@ -82,6 +79,7 @@ static void dma_buf_release(struct dentry *dentry)
+ if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
+ dma_resv_fini(dmabuf->resv);
+
++ WARN_ON(!list_empty(&dmabuf->attachments));
+ module_put(dmabuf->owner);
+ kfree(dmabuf->name);
+ kfree(dmabuf);
+@@ -199,22 +197,64 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
+ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+ {
+ struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb;
++ struct dma_buf *dmabuf = container_of(dcb->poll, struct dma_buf, poll);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dcb->poll->lock, flags);
+ wake_up_locked_poll(dcb->poll, dcb->active);
+ dcb->active = 0;
+ spin_unlock_irqrestore(&dcb->poll->lock, flags);
++ dma_fence_put(fence);
++ /* Paired with get_file in dma_buf_poll */
++ fput(dmabuf->file);
++}
++
++static bool dma_buf_poll_shared(struct dma_resv *resv,
++ struct dma_buf_poll_cb_t *dcb)
++{
++ struct dma_resv_list *fobj = dma_resv_shared_list(resv);
++ struct dma_fence *fence;
++ int i, r;
++
++ if (!fobj)
++ return false;
++
++ for (i = 0; i < fobj->shared_count; ++i) {
++ fence = rcu_dereference_protected(fobj->shared[i],
++ dma_resv_held(resv));
++ dma_fence_get(fence);
++ r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
++ if (!r)
++ return true;
++ dma_fence_put(fence);
++ }
++
++ return false;
++}
++
++static bool dma_buf_poll_excl(struct dma_resv *resv,
++ struct dma_buf_poll_cb_t *dcb)
++{
++ struct dma_fence *fence = dma_resv_excl_fence(resv);
++ int r;
++
++ if (!fence)
++ return false;
++
++ dma_fence_get(fence);
++ r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
++ if (!r)
++ return true;
++ dma_fence_put(fence);
++
++ return false;
+ }
+
+ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
+ {
+ struct dma_buf *dmabuf;
+ struct dma_resv *resv;
+- struct dma_resv_list *fobj;
+- struct dma_fence *fence_excl;
+ __poll_t events;
+- unsigned shared_count, seq;
+
+ dmabuf = file->private_data;
+ if (!dmabuf || !dmabuf->resv)
+@@ -228,101 +268,57 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
+ if (!events)
+ return 0;
+
+-retry:
+- seq = read_seqcount_begin(&resv->seq);
+- rcu_read_lock();
+-
+- fobj = rcu_dereference(resv->fence);
+- if (fobj)
+- shared_count = fobj->shared_count;
+- else
+- shared_count = 0;
+- fence_excl = dma_resv_excl_fence(resv);
+- if (read_seqcount_retry(&resv->seq, seq)) {
+- rcu_read_unlock();
+- goto retry;
+- }
+-
+- if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
+- struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
+- __poll_t pevents = EPOLLIN;
++ dma_resv_lock(resv, NULL);
+
+- if (shared_count == 0)
+- pevents |= EPOLLOUT;
++ if (events & EPOLLOUT) {
++ struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out;
+
++ /* Check that callback isn't busy */
+ spin_lock_irq(&dmabuf->poll.lock);
+- if (dcb->active) {
+- dcb->active |= pevents;
+- events &= ~pevents;
+- } else
+- dcb->active = pevents;
++ if (dcb->active)
++ events &= ~EPOLLOUT;
++ else
++ dcb->active = EPOLLOUT;
+ spin_unlock_irq(&dmabuf->poll.lock);
+
+- if (events & pevents) {
+- if (!dma_fence_get_rcu(fence_excl)) {
+- /* force a recheck */
+- events &= ~pevents;
+- dma_buf_poll_cb(NULL, &dcb->cb);
+- } else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
+- dma_buf_poll_cb)) {
+- events &= ~pevents;
+- dma_fence_put(fence_excl);
+- } else {
+- /*
+- * No callback queued, wake up any additional
+- * waiters.
+- */
+- dma_fence_put(fence_excl);
++ if (events & EPOLLOUT) {
++ /* Paired with fput in dma_buf_poll_cb */
++ get_file(dmabuf->file);
++
++ if (!dma_buf_poll_shared(resv, dcb) &&
++ !dma_buf_poll_excl(resv, dcb))
++
++ /* No callback queued, wake up any other waiters */
+ dma_buf_poll_cb(NULL, &dcb->cb);
+- }
++ else
++ events &= ~EPOLLOUT;
+ }
+ }
+
+- if ((events & EPOLLOUT) && shared_count > 0) {
+- struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
+- int i;
++ if (events & EPOLLIN) {
++ struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_in;
+
+- /* Only queue a new callback if no event has fired yet */
++ /* Check that callback isn't busy */
+ spin_lock_irq(&dmabuf->poll.lock);
+ if (dcb->active)
+- events &= ~EPOLLOUT;
++ events &= ~EPOLLIN;
+ else
+- dcb->active = EPOLLOUT;
++ dcb->active = EPOLLIN;
+ spin_unlock_irq(&dmabuf->poll.lock);
+
+- if (!(events & EPOLLOUT))
+- goto out;
+-
+- for (i = 0; i < shared_count; ++i) {
+- struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
++ if (events & EPOLLIN) {
++ /* Paired with fput in dma_buf_poll_cb */
++ get_file(dmabuf->file);
+
+- if (!dma_fence_get_rcu(fence)) {
+- /*
+- * fence refcount dropped to zero, this means
+- * that fobj has been freed
+- *
+- * call dma_buf_poll_cb and force a recheck!
+- */
+- events &= ~EPOLLOUT;
++ if (!dma_buf_poll_excl(resv, dcb))
++ /* No callback queued, wake up any other waiters */
+ dma_buf_poll_cb(NULL, &dcb->cb);
+- break;
+- }
+- if (!dma_fence_add_callback(fence, &dcb->cb,
+- dma_buf_poll_cb)) {
+- dma_fence_put(fence);
+- events &= ~EPOLLOUT;
+- break;
+- }
+- dma_fence_put(fence);
++ else
++ events &= ~EPOLLIN;
+ }
+-
+- /* No callback queued, wake up any additional waiters. */
+- if (i == shared_count)
+- dma_buf_poll_cb(NULL, &dcb->cb);
+ }
+
+-out:
+- rcu_read_unlock();
++ dma_resv_unlock(resv);
+ return events;
+ }
+
+@@ -447,6 +443,7 @@ static inline int is_dma_buf_file(struct file *file)
+
+ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
+ {
++ static atomic64_t dmabuf_inode = ATOMIC64_INIT(0);
+ struct file *file;
+ struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
+
+@@ -456,6 +453,13 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
+ inode->i_size = dmabuf->size;
+ inode_set_bytes(inode, dmabuf->size);
+
++ /*
++ * The ->i_ino acquired from get_next_ino() is not unique thus
++ * not suitable for using it as dentry name by dmabuf stats.
++ * Override ->i_ino with the unique and dmabuffs specific
++ * value.
++ */
++ inode->i_ino = atomic64_add_return(1, &dmabuf_inode);
+ file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf",
+ flags, &dma_buf_fops);
+ if (IS_ERR(file))
+@@ -565,8 +569,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
+ dmabuf->owner = exp_info->owner;
+ spin_lock_init(&dmabuf->name_lock);
+ init_waitqueue_head(&dmabuf->poll);
+- dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
+- dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
++ dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
++ dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
+
+ if (!resv) {
+ resv = (struct dma_resv *)&dmabuf[1];
+@@ -583,10 +587,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
+ file->f_mode |= FMODE_LSEEK;
+ dmabuf->file = file;
+
+- ret = dma_buf_stats_setup(dmabuf);
+- if (ret)
+- goto err_sysfs;
+-
+ mutex_init(&dmabuf->lock);
+ INIT_LIST_HEAD(&dmabuf->attachments);
+
+@@ -594,6 +594,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
+ list_add(&dmabuf->list_node, &db_list.head);
+ mutex_unlock(&db_list.lock);
+
++ ret = dma_buf_stats_setup(dmabuf);
++ if (ret)
++ goto err_sysfs;
++
+ return dmabuf;
+
+ err_sysfs:
+diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
+index d3fbd950be944..3e07f961e2f3d 100644
+--- a/drivers/dma-buf/dma-fence-array.c
++++ b/drivers/dma-buf/dma-fence-array.c
+@@ -104,7 +104,11 @@ static bool dma_fence_array_signaled(struct dma_fence *fence)
+ {
+ struct dma_fence_array *array = to_dma_fence_array(fence);
+
+- return atomic_read(&array->num_pending) <= 0;
++ if (atomic_read(&array->num_pending) > 0)
++ return false;
++
++ dma_fence_array_clear_pending_error(array);
++ return true;
+ }
+
+ static void dma_fence_array_release(struct dma_fence *fence)
+diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c
+index 56bf5ad01ad54..59d158873f4cb 100644
+--- a/drivers/dma-buf/dma-heap.c
++++ b/drivers/dma-buf/dma-heap.c
+@@ -14,6 +14,7 @@
+ #include <linux/xarray.h>
+ #include <linux/list.h>
+ #include <linux/slab.h>
++#include <linux/nospec.h>
+ #include <linux/uaccess.h>
+ #include <linux/syscalls.h>
+ #include <linux/dma-heap.h>
+@@ -135,6 +136,7 @@ static long dma_heap_ioctl(struct file *file, unsigned int ucmd,
+ if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds))
+ return -EINVAL;
+
++ nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds));
+ /* Get the kernel ioctl cmd that matches */
+ kcmd = dma_heap_ioctl_cmds[nr];
+
+@@ -231,18 +233,6 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info)
+ return ERR_PTR(-EINVAL);
+ }
+
+- /* check the name is unique */
+- mutex_lock(&heap_list_lock);
+- list_for_each_entry(h, &heap_list, list) {
+- if (!strcmp(h->name, exp_info->name)) {
+- mutex_unlock(&heap_list_lock);
+- pr_err("dma_heap: Already registered heap named %s\n",
+- exp_info->name);
+- return ERR_PTR(-EINVAL);
+- }
+- }
+- mutex_unlock(&heap_list_lock);
+-
+ heap = kzalloc(sizeof(*heap), GFP_KERNEL);
+ if (!heap)
+ return ERR_PTR(-ENOMEM);
+@@ -281,13 +271,27 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info)
+ err_ret = ERR_CAST(dev_ret);
+ goto err2;
+ }
+- /* Add heap to the list */
++
+ mutex_lock(&heap_list_lock);
++ /* check the name is unique */
++ list_for_each_entry(h, &heap_list, list) {
++ if (!strcmp(h->name, exp_info->name)) {
++ mutex_unlock(&heap_list_lock);
++ pr_err("dma_heap: Already registered heap named %s\n",
++ exp_info->name);
++ err_ret = ERR_PTR(-EINVAL);
++ goto err3;
++ }
++ }
++
++ /* Add heap to the list */
+ list_add(&heap->list, &heap_list);
+ mutex_unlock(&heap_list_lock);
+
+ return heap;
+
++err3:
++ device_destroy(dma_heap_class, heap->heap_devt);
+ err2:
+ cdev_del(&heap->heap_cdev);
+ err1:
+diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
+index 0c05b79870f96..83f02bd51dda6 100644
+--- a/drivers/dma-buf/heaps/cma_heap.c
++++ b/drivers/dma-buf/heaps/cma_heap.c
+@@ -124,10 +124,11 @@ static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+ struct cma_heap_buffer *buffer = dmabuf->priv;
+ struct dma_heap_attachment *a;
+
++ mutex_lock(&buffer->lock);
++
+ if (buffer->vmap_cnt)
+ invalidate_kernel_vmap_range(buffer->vaddr, buffer->len);
+
+- mutex_lock(&buffer->lock);
+ list_for_each_entry(a, &buffer->attachments, list) {
+ if (!a->mapped)
+ continue;
+@@ -144,10 +145,11 @@ static int cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+ struct cma_heap_buffer *buffer = dmabuf->priv;
+ struct dma_heap_attachment *a;
+
++ mutex_lock(&buffer->lock);
++
+ if (buffer->vmap_cnt)
+ flush_kernel_vmap_range(buffer->vaddr, buffer->len);
+
+- mutex_lock(&buffer->lock);
+ list_for_each_entry(a, &buffer->attachments, list) {
+ if (!a->mapped)
+ continue;
+diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
+index 23a7e74ef9666..8660508f3684f 100644
+--- a/drivers/dma-buf/heaps/system_heap.c
++++ b/drivers/dma-buf/heaps/system_heap.c
+@@ -289,7 +289,7 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
+ int i;
+
+ table = &buffer->sg_table;
+- for_each_sg(table->sgl, sg, table->nents, i) {
++ for_each_sgtable_sg(table, sg, i) {
+ struct page *page = sg_page(sg);
+
+ __free_pages(page, compound_order(page));
+diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
+index 348b3a9170fa4..7f5ed1aa7a9f8 100644
+--- a/drivers/dma-buf/sw_sync.c
++++ b/drivers/dma-buf/sw_sync.c
+@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
+ */
+ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
+ {
++ LIST_HEAD(signalled);
+ struct sync_pt *pt, *next;
+
+ trace_sync_timeline(obj);
+@@ -203,21 +204,20 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
+ if (!timeline_fence_signaled(&pt->base))
+ break;
+
+- list_del_init(&pt->link);
++ dma_fence_get(&pt->base);
++
++ list_move_tail(&pt->link, &signalled);
+ rb_erase(&pt->node, &obj->pt_tree);
+
+- /*
+- * A signal callback may release the last reference to this
+- * fence, causing it to be freed. That operation has to be
+- * last to avoid a use after free inside this loop, and must
+- * be after we remove the fence from the timeline in order to
+- * prevent deadlocking on timeline->lock inside
+- * timeline_fence_release().
+- */
+ dma_fence_signal_locked(&pt->base);
+ }
+
+ spin_unlock_irq(&obj->lock);
++
++ list_for_each_entry_safe(pt, next, &signalled, link) {
++ list_del_init(&pt->link);
++ dma_fence_put(&pt->base);
++ }
+ }
+
+ /**
+diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
+index c57a609db75be..bf11d32205f38 100644
+--- a/drivers/dma-buf/udmabuf.c
++++ b/drivers/dma-buf/udmabuf.c
+@@ -32,8 +32,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
+ {
+ struct vm_area_struct *vma = vmf->vma;
+ struct udmabuf *ubuf = vma->vm_private_data;
++ pgoff_t pgoff = vmf->pgoff;
+
+- vmf->page = ubuf->pages[vmf->pgoff];
++ if (pgoff >= ubuf->pagecount)
++ return VM_FAULT_SIGBUS;
++ vmf->page = ubuf->pages[pgoff];
+ get_page(vmf->page);
+ return 0;
+ }
+@@ -121,17 +124,20 @@ static int begin_cpu_udmabuf(struct dma_buf *buf,
+ {
+ struct udmabuf *ubuf = buf->priv;
+ struct device *dev = ubuf->device->this_device;
++ int ret = 0;
+
+ if (!ubuf->sg) {
+ ubuf->sg = get_sg_table(dev, buf, direction);
+- if (IS_ERR(ubuf->sg))
+- return PTR_ERR(ubuf->sg);
++ if (IS_ERR(ubuf->sg)) {
++ ret = PTR_ERR(ubuf->sg);
++ ubuf->sg = NULL;
++ }
+ } else {
+ dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
+ direction);
+ }
+
+- return 0;
++ return ret;
+ }
+
+ static int end_cpu_udmabuf(struct dma_buf *buf,
+@@ -190,6 +196,10 @@ static long udmabuf_create(struct miscdevice *device,
+ if (ubuf->pagecount > pglimit)
+ goto err;
+ }
++
++ if (!ubuf->pagecount)
++ goto err;
++
+ ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages),
+ GFP_KERNEL);
+ if (!ubuf->pages) {
+@@ -361,7 +371,23 @@ static struct miscdevice udmabuf_misc = {
+
+ static int __init udmabuf_dev_init(void)
+ {
+- return misc_register(&udmabuf_misc);
++ int ret;
++
++ ret = misc_register(&udmabuf_misc);
++ if (ret < 0) {
++ pr_err("Could not initialize udmabuf device\n");
++ return ret;
++ }
++
++ ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device,
++ DMA_BIT_MASK(64));
++ if (ret < 0) {
++ pr_err("Could not setup DMA mask for udmabuf device\n");
++ misc_deregister(&udmabuf_misc);
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static void __exit udmabuf_dev_exit(void)
+diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
+index 80c2c03cb0141..e1beddcc8c84a 100644
+--- a/drivers/dma/Kconfig
++++ b/drivers/dma/Kconfig
+@@ -202,6 +202,7 @@ config FSL_DMA
+ config FSL_EDMA
+ tristate "Freescale eDMA engine support"
+ depends on OF
++ depends on HAS_IOMEM
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+@@ -236,7 +237,7 @@ config FSL_RAID
+
+ config HISI_DMA
+ tristate "HiSilicon DMA Engine support"
+- depends on ARM64 || COMPILE_TEST
++ depends on ARCH_HISI || COMPILE_TEST
+ depends on PCI_MSI
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+@@ -271,6 +272,7 @@ config IMX_SDMA
+
+ config INTEL_IDMA64
+ tristate "Intel integrated DMA 64-bit support"
++ depends on HAS_IOMEM
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
+index 30ae36124b1db..4583a8b5e5bd8 100644
+--- a/drivers/dma/at_hdmac.c
++++ b/drivers/dma/at_hdmac.c
+@@ -256,6 +256,8 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
+ ATC_SPIP_BOUNDARY(first->boundary));
+ channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) |
+ ATC_DPIP_BOUNDARY(first->boundary));
++ /* Don't allow CPU to reorder channel enable. */
++ wmb();
+ dma_writel(atdma, CHER, atchan->mask);
+
+ vdbg_dump_regs(atchan);
+@@ -316,7 +318,8 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
+ struct at_desc *desc_first = atc_first_active(atchan);
+ struct at_desc *desc;
+ int ret;
+- u32 ctrla, dscr, trials;
++ u32 ctrla, dscr;
++ unsigned int i;
+
+ /*
+ * If the cookie doesn't match to the currently running transfer then
+@@ -386,7 +389,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
+ dscr = channel_readl(atchan, DSCR);
+ rmb(); /* ensure DSCR is read before CTRLA */
+ ctrla = channel_readl(atchan, CTRLA);
+- for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) {
++ for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) {
+ u32 new_dscr;
+
+ rmb(); /* ensure DSCR is read after CTRLA */
+@@ -412,7 +415,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
+ rmb(); /* ensure DSCR is read before CTRLA */
+ ctrla = channel_readl(atchan, CTRLA);
+ }
+- if (unlikely(trials >= ATC_MAX_DSCR_TRIALS))
++ if (unlikely(i == ATC_MAX_DSCR_TRIALS))
+ return -ETIMEDOUT;
+
+ /* for the first descriptor we can be more accurate */
+@@ -462,18 +465,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
+ if (!atc_chan_is_cyclic(atchan))
+ dma_cookie_complete(txd);
+
+- /* If the transfer was a memset, free our temporary buffer */
+- if (desc->memset_buffer) {
+- dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
+- desc->memset_paddr);
+- desc->memset_buffer = false;
+- }
+-
+- /* move children to free_list */
+- list_splice_init(&desc->tx_list, &atchan->free_list);
+- /* move myself to free_list */
+- list_move(&desc->desc_node, &atchan->free_list);
+-
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ dma_descriptor_unmap(txd);
+@@ -483,42 +474,20 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+
+ dma_run_dependencies(txd);
+-}
+-
+-/**
+- * atc_complete_all - finish work for all transactions
+- * @atchan: channel to complete transactions for
+- *
+- * Eventually submit queued descriptors if any
+- *
+- * Assume channel is idle while calling this function
+- * Called with atchan->lock held and bh disabled
+- */
+-static void atc_complete_all(struct at_dma_chan *atchan)
+-{
+- struct at_desc *desc, *_desc;
+- LIST_HEAD(list);
+- unsigned long flags;
+-
+- dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n");
+
+ spin_lock_irqsave(&atchan->lock, flags);
+-
+- /*
+- * Submit queued descriptors ASAP, i.e. before we go through
+- * the completed ones.
+- */
+- if (!list_empty(&atchan->queue))
+- atc_dostart(atchan, atc_first_queued(atchan));
+- /* empty active_list now it is completed */
+- list_splice_init(&atchan->active_list, &list);
+- /* empty queue list by moving descriptors (if any) to active_list */
+- list_splice_init(&atchan->queue, &atchan->active_list);
+-
++ /* move children to free_list */
++ list_splice_init(&desc->tx_list, &atchan->free_list);
++ /* add myself to free_list */
++ list_add(&desc->desc_node, &atchan->free_list);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+- list_for_each_entry_safe(desc, _desc, &list, desc_node)
+- atc_chain_complete(atchan, desc);
++ /* If the transfer was a memset, free our temporary buffer */
++ if (desc->memset_buffer) {
++ dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
++ desc->memset_paddr);
++ desc->memset_buffer = false;
++ }
+ }
+
+ /**
+@@ -527,26 +496,28 @@ static void atc_complete_all(struct at_dma_chan *atchan)
+ */
+ static void atc_advance_work(struct at_dma_chan *atchan)
+ {
++ struct at_desc *desc;
+ unsigned long flags;
+- int ret;
+
+ dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n");
+
+ spin_lock_irqsave(&atchan->lock, flags);
+- ret = atc_chan_is_enabled(atchan);
+- spin_unlock_irqrestore(&atchan->lock, flags);
+- if (ret)
+- return;
+-
+- if (list_empty(&atchan->active_list) ||
+- list_is_singular(&atchan->active_list))
+- return atc_complete_all(atchan);
++ if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list))
++ return spin_unlock_irqrestore(&atchan->lock, flags);
+
+- atc_chain_complete(atchan, atc_first_active(atchan));
++ desc = atc_first_active(atchan);
++ /* Remove the transfer node from the active list. */
++ list_del_init(&desc->desc_node);
++ spin_unlock_irqrestore(&atchan->lock, flags);
++ atc_chain_complete(atchan, desc);
+
+ /* advance work */
+ spin_lock_irqsave(&atchan->lock, flags);
+- atc_dostart(atchan, atc_first_active(atchan));
++ if (!list_empty(&atchan->active_list)) {
++ desc = atc_first_queued(atchan);
++ list_move_tail(&desc->desc_node, &atchan->active_list);
++ atc_dostart(atchan, desc);
++ }
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ }
+
+@@ -558,6 +529,7 @@ static void atc_advance_work(struct at_dma_chan *atchan)
+ static void atc_handle_error(struct at_dma_chan *atchan)
+ {
+ struct at_desc *bad_desc;
++ struct at_desc *desc;
+ struct at_desc *child;
+ unsigned long flags;
+
+@@ -570,13 +542,12 @@ static void atc_handle_error(struct at_dma_chan *atchan)
+ bad_desc = atc_first_active(atchan);
+ list_del_init(&bad_desc->desc_node);
+
+- /* As we are stopped, take advantage to push queued descriptors
+- * in active_list */
+- list_splice_init(&atchan->queue, atchan->active_list.prev);
+-
+ /* Try to restart the controller */
+- if (!list_empty(&atchan->active_list))
+- atc_dostart(atchan, atc_first_active(atchan));
++ if (!list_empty(&atchan->active_list)) {
++ desc = atc_first_queued(atchan);
++ list_move_tail(&desc->desc_node, &atchan->active_list);
++ atc_dostart(atchan, desc);
++ }
+
+ /*
+ * KERN_CRITICAL may seem harsh, but since this only happens
+@@ -691,19 +662,11 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
+ spin_lock_irqsave(&atchan->lock, flags);
+ cookie = dma_cookie_assign(tx);
+
+- if (list_empty(&atchan->active_list)) {
+- dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
+- desc->txd.cookie);
+- atc_dostart(atchan, desc);
+- list_add_tail(&desc->desc_node, &atchan->active_list);
+- } else {
+- dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+- desc->txd.cookie);
+- list_add_tail(&desc->desc_node, &atchan->queue);
+- }
+-
++ list_add_tail(&desc->desc_node, &atchan->queue);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
++ dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
++ desc->txd.cookie);
+ return cookie;
+ }
+
+@@ -1437,11 +1400,8 @@ static int atc_terminate_all(struct dma_chan *chan)
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ int chan_id = atchan->chan_common.chan_id;
+- struct at_desc *desc, *_desc;
+ unsigned long flags;
+
+- LIST_HEAD(list);
+-
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+ /*
+@@ -1460,19 +1420,15 @@ static int atc_terminate_all(struct dma_chan *chan)
+ cpu_relax();
+
+ /* active_list entries will end up before queued entries */
+- list_splice_init(&atchan->queue, &list);
+- list_splice_init(&atchan->active_list, &list);
+-
+- spin_unlock_irqrestore(&atchan->lock, flags);
+-
+- /* Flush all pending and queued descriptors */
+- list_for_each_entry_safe(desc, _desc, &list, desc_node)
+- atc_chain_complete(atchan, desc);
++ list_splice_tail_init(&atchan->queue, &atchan->free_list);
++ list_splice_tail_init(&atchan->active_list, &atchan->free_list);
+
+ clear_bit(ATC_IS_PAUSED, &atchan->status);
+ /* if channel dedicated to cyclic operations, free it */
+ clear_bit(ATC_IS_CYCLIC, &atchan->status);
+
++ spin_unlock_irqrestore(&atchan->lock, flags);
++
+ return 0;
+ }
+
+@@ -1527,20 +1483,26 @@ atc_tx_status(struct dma_chan *chan,
+ }
+
+ /**
+- * atc_issue_pending - try to finish work
++ * atc_issue_pending - takes the first transaction descriptor in the pending
++ * queue and starts the transfer.
+ * @chan: target DMA channel
+ */
+ static void atc_issue_pending(struct dma_chan *chan)
+ {
+- struct at_dma_chan *atchan = to_at_dma_chan(chan);
++ struct at_dma_chan *atchan = to_at_dma_chan(chan);
++ struct at_desc *desc;
++ unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "issue_pending\n");
+
+- /* Not needed for cyclic transfers */
+- if (atc_chan_is_cyclic(atchan))
+- return;
++ spin_lock_irqsave(&atchan->lock, flags);
++ if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue))
++ return spin_unlock_irqrestore(&atchan->lock, flags);
+
+- atc_advance_work(atchan);
++ desc = atc_first_queued(atchan);
++ list_move_tail(&desc->desc_node, &atchan->active_list);
++ atc_dostart(atchan, desc);
++ spin_unlock_irqrestore(&atchan->lock, flags);
+ }
+
+ /**
+@@ -1958,7 +1920,11 @@ static int __init at_dma_probe(struct platform_device *pdev)
+ dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "",
+ plat_dat->nr_channels);
+
+- dma_async_device_register(&atdma->dma_common);
++ err = dma_async_device_register(&atdma->dma_common);
++ if (err) {
++ dev_err(&pdev->dev, "Unable to register: %d.\n", err);
++ goto err_dma_async_device_register;
++ }
+
+ /*
+ * Do not return an error if the dmac node is not present in order to
+@@ -1978,6 +1944,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
+
+ err_of_dma_controller_register:
+ dma_async_device_unregister(&atdma->dma_common);
++err_dma_async_device_register:
+ dma_pool_destroy(atdma->memset_pool);
+ err_memset_pool_create:
+ dma_pool_destroy(atdma->dma_desc_pool);
+diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
+index 4d1ebc040031c..d4d382d746078 100644
+--- a/drivers/dma/at_hdmac_regs.h
++++ b/drivers/dma/at_hdmac_regs.h
+@@ -186,13 +186,13 @@
+ /* LLI == Linked List Item; aka DMA buffer descriptor */
+ struct at_lli {
+ /* values that are not changed by hardware */
+- dma_addr_t saddr;
+- dma_addr_t daddr;
++ u32 saddr;
++ u32 daddr;
+ /* value that may get written back: */
+- u32 ctrla;
++ u32 ctrla;
+ /* more values that are not changed by hardware */
+- u32 ctrlb;
+- dma_addr_t dscr; /* chain to next lli */
++ u32 ctrlb;
++ u32 dscr; /* chain to next lli */
+ };
+
+ /**
+diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
+index ab78e0f6afd70..dd34626df1abc 100644
+--- a/drivers/dma/at_xdmac.c
++++ b/drivers/dma/at_xdmac.c
+@@ -99,6 +99,7 @@
+ #define AT_XDMAC_CNDC_NDE (0x1 << 0) /* Channel x Next Descriptor Enable */
+ #define AT_XDMAC_CNDC_NDSUP (0x1 << 1) /* Channel x Next Descriptor Source Update */
+ #define AT_XDMAC_CNDC_NDDUP (0x1 << 2) /* Channel x Next Descriptor Destination Update */
++#define AT_XDMAC_CNDC_NDVIEW_MASK GENMASK(28, 27)
+ #define AT_XDMAC_CNDC_NDVIEW_NDV0 (0x0 << 3) /* Channel x Next Descriptor View 0 */
+ #define AT_XDMAC_CNDC_NDVIEW_NDV1 (0x1 << 3) /* Channel x Next Descriptor View 1 */
+ #define AT_XDMAC_CNDC_NDVIEW_NDV2 (0x2 << 3) /* Channel x Next Descriptor View 2 */
+@@ -155,7 +156,7 @@
+ #define AT_XDMAC_CC_WRIP (0x1 << 23) /* Write in Progress (read only) */
+ #define AT_XDMAC_CC_WRIP_DONE (0x0 << 23)
+ #define AT_XDMAC_CC_WRIP_IN_PROGRESS (0x1 << 23)
+-#define AT_XDMAC_CC_PERID(i) (0x7f & (i) << 24) /* Channel Peripheral Identifier */
++#define AT_XDMAC_CC_PERID(i) ((0x7f & (i)) << 24) /* Channel Peripheral Identifier */
+ #define AT_XDMAC_CDS_MSP 0x2C /* Channel Data Stride Memory Set Pattern */
+ #define AT_XDMAC_CSUS 0x30 /* Channel Source Microblock Stride */
+ #define AT_XDMAC_CDUS 0x34 /* Channel Destination Microblock Stride */
+@@ -242,6 +243,7 @@ struct at_xdmac {
+ int irq;
+ struct clk *clk;
+ u32 save_gim;
++ u32 save_gs;
+ struct dma_pool *at_xdmac_desc_pool;
+ const struct at_xdmac_layout *layout;
+ struct at_xdmac_chan chan[];
+@@ -252,15 +254,15 @@ struct at_xdmac {
+
+ /* Linked List Descriptor */
+ struct at_xdmac_lld {
+- dma_addr_t mbr_nda; /* Next Descriptor Member */
+- u32 mbr_ubc; /* Microblock Control Member */
+- dma_addr_t mbr_sa; /* Source Address Member */
+- dma_addr_t mbr_da; /* Destination Address Member */
+- u32 mbr_cfg; /* Configuration Register */
+- u32 mbr_bc; /* Block Control Register */
+- u32 mbr_ds; /* Data Stride Register */
+- u32 mbr_sus; /* Source Microblock Stride Register */
+- u32 mbr_dus; /* Destination Microblock Stride Register */
++ u32 mbr_nda; /* Next Descriptor Member */
++ u32 mbr_ubc; /* Microblock Control Member */
++ u32 mbr_sa; /* Source Address Member */
++ u32 mbr_da; /* Destination Address Member */
++ u32 mbr_cfg; /* Configuration Register */
++ u32 mbr_bc; /* Block Control Register */
++ u32 mbr_ds; /* Data Stride Register */
++ u32 mbr_sus; /* Source Microblock Stride Register */
++ u32 mbr_dus; /* Destination Microblock Stride Register */
+ };
+
+ /* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
+@@ -385,9 +387,6 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
+
+ dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);
+
+- if (at_xdmac_chan_is_enabled(atchan))
+- return;
+-
+ /* Set transfer as active to not try to start it again. */
+ first->active_xfer = true;
+
+@@ -405,7 +404,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
+ */
+ if (at_xdmac_chan_is_cyclic(atchan))
+ reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
+- else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3)
++ else if ((first->lld.mbr_ubc &
++ AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3)
+ reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
+ else
+ reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
+@@ -476,13 +476,12 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
+ spin_lock_irqsave(&atchan->lock, irqflags);
+ cookie = dma_cookie_assign(tx);
+
++ list_add_tail(&desc->xfer_node, &atchan->xfers_list);
++ spin_unlock_irqrestore(&atchan->lock, irqflags);
++
+ dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
+ __func__, atchan, desc);
+- list_add_tail(&desc->xfer_node, &atchan->xfers_list);
+- if (list_is_singular(&atchan->xfers_list))
+- at_xdmac_start_xfer(atchan, desc);
+
+- spin_unlock_irqrestore(&atchan->lock, irqflags);
+ return cookie;
+ }
+
+@@ -733,7 +732,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ if (first)
+- list_splice_init(&first->descs_list, &atchan->free_descs_list);
++ list_splice_tail_init(&first->descs_list,
++ &atchan->free_descs_list);
+ goto spin_unlock;
+ }
+
+@@ -821,7 +821,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ if (first)
+- list_splice_init(&first->descs_list, &atchan->free_descs_list);
++ list_splice_tail_init(&first->descs_list,
++ &atchan->free_descs_list);
+ spin_unlock_irqrestore(&atchan->lock, irqflags);
+ return NULL;
+ }
+@@ -1025,6 +1026,8 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
+ NULL,
+ src_addr, dst_addr,
+ xt, xt->sgl);
++ if (!first)
++ return NULL;
+
+ /* Length of the block is (BLEN+1) microblocks. */
+ for (i = 0; i < xt->numf - 1; i++)
+@@ -1055,8 +1058,9 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
+ src_addr, dst_addr,
+ xt, chunk);
+ if (!desc) {
+- list_splice_init(&first->descs_list,
+- &atchan->free_descs_list);
++ if (first)
++ list_splice_tail_init(&first->descs_list,
++ &atchan->free_descs_list);
+ return NULL;
+ }
+
+@@ -1136,7 +1140,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ if (first)
+- list_splice_init(&first->descs_list, &atchan->free_descs_list);
++ list_splice_tail_init(&first->descs_list,
++ &atchan->free_descs_list);
+ return NULL;
+ }
+
+@@ -1312,8 +1317,8 @@ at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ sg_dma_len(sg),
+ value);
+ if (!desc && first)
+- list_splice_init(&first->descs_list,
+- &atchan->free_descs_list);
++ list_splice_tail_init(&first->descs_list,
++ &atchan->free_descs_list);
+
+ if (!first)
+ first = desc;
+@@ -1452,7 +1457,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ {
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+- struct at_xdmac_desc *desc, *_desc;
++ struct at_xdmac_desc *desc, *_desc, *iter;
+ struct list_head *descs_list;
+ enum dma_status ret;
+ int residue, retry;
+@@ -1567,11 +1572,13 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ * microblock.
+ */
+ descs_list = &desc->descs_list;
+- list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
+- dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
+- residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
+- if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
++ list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
++ dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
++ residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
++ if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
++ desc = iter;
+ break;
++ }
+ }
+ residue += cur_ubc << dwidth;
+
+@@ -1586,20 +1593,6 @@ spin_unlock:
+ return ret;
+ }
+
+-/* Call must be protected by lock. */
+-static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan,
+- struct at_xdmac_desc *desc)
+-{
+- dev_dbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+-
+- /*
+- * Remove the transfer from the transfer list then move the transfer
+- * descriptors into the free descriptors list.
+- */
+- list_del(&desc->xfer_node);
+- list_splice_init(&desc->descs_list, &atchan->free_descs_list);
+-}
+-
+ static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
+ {
+ struct at_xdmac_desc *desc;
+@@ -1623,14 +1616,17 @@ static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
+ struct at_xdmac_desc *desc;
+ struct dma_async_tx_descriptor *txd;
+
+- if (!list_empty(&atchan->xfers_list)) {
+- desc = list_first_entry(&atchan->xfers_list,
+- struct at_xdmac_desc, xfer_node);
+- txd = &desc->tx_dma_desc;
+-
+- if (txd->flags & DMA_PREP_INTERRUPT)
+- dmaengine_desc_get_callback_invoke(txd, NULL);
++ spin_lock_irq(&atchan->lock);
++ if (list_empty(&atchan->xfers_list)) {
++ spin_unlock_irq(&atchan->lock);
++ return;
+ }
++ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
++ xfer_node);
++ spin_unlock_irq(&atchan->lock);
++ txd = &desc->tx_dma_desc;
++ if (txd->flags & DMA_PREP_INTERRUPT)
++ dmaengine_desc_get_callback_invoke(txd, NULL);
+ }
+
+ static void at_xdmac_handle_error(struct at_xdmac_chan *atchan)
+@@ -1707,17 +1703,20 @@ static void at_xdmac_tasklet(struct tasklet_struct *t)
+ }
+
+ txd = &desc->tx_dma_desc;
+-
+- at_xdmac_remove_xfer(atchan, desc);
++ dma_cookie_complete(txd);
++ /* Remove the transfer from the transfer list. */
++ list_del(&desc->xfer_node);
+ spin_unlock_irq(&atchan->lock);
+
+- dma_cookie_complete(txd);
+ if (txd->flags & DMA_PREP_INTERRUPT)
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+
+ dma_run_dependencies(txd);
+
+ spin_lock_irq(&atchan->lock);
++ /* Move the xfer descriptors into the free descriptors list. */
++ list_splice_tail_init(&desc->descs_list,
++ &atchan->free_descs_list);
+ at_xdmac_advance_work(atchan);
+ spin_unlock_irq(&atchan->lock);
+ }
+@@ -1784,11 +1783,9 @@ static void at_xdmac_issue_pending(struct dma_chan *chan)
+
+ dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);
+
+- if (!at_xdmac_chan_is_cyclic(atchan)) {
+- spin_lock_irqsave(&atchan->lock, flags);
+- at_xdmac_advance_work(atchan);
+- spin_unlock_irqrestore(&atchan->lock, flags);
+- }
++ spin_lock_irqsave(&atchan->lock, flags);
++ at_xdmac_advance_work(atchan);
++ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return;
+ }
+@@ -1866,8 +1863,11 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
+ cpu_relax();
+
+ /* Cancel all pending transfers. */
+- list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node)
+- at_xdmac_remove_xfer(atchan, desc);
++ list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
++ list_del(&desc->xfer_node);
++ list_splice_tail_init(&desc->descs_list,
++ &atchan->free_descs_list);
++ }
+
+ clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+ clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+@@ -1897,6 +1897,11 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
+ for (i = 0; i < init_nr_desc_per_channel; i++) {
+ desc = at_xdmac_alloc_desc(chan, GFP_KERNEL);
+ if (!desc) {
++ if (i == 0) {
++ dev_warn(chan2dev(chan),
++ "can't allocate any descriptors\n");
++ return -EIO;
++ }
+ dev_warn(chan2dev(chan),
+ "only %d descriptors have been allocated\n", i);
+ break;
+@@ -1926,6 +1931,30 @@ static void at_xdmac_free_chan_resources(struct dma_chan *chan)
+ return;
+ }
+
++static void at_xdmac_axi_config(struct platform_device *pdev)
++{
++ struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
++ bool dev_m2m = false;
++ u32 dma_requests;
++
++ if (!atxdmac->layout->axi_config)
++ return; /* Not supported */
++
++ if (!of_property_read_u32(pdev->dev.of_node, "dma-requests",
++ &dma_requests)) {
++ dev_info(&pdev->dev, "controller in mem2mem mode.\n");
++ dev_m2m = true;
++ }
++
++ if (dev_m2m) {
++ at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_M2M);
++ at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_M2M);
++ } else {
++ at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_P2M);
++ at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_P2M);
++ }
++}
++
+ #ifdef CONFIG_PM
+ static int atmel_xdmac_prepare(struct device *dev)
+ {
+@@ -1964,6 +1993,7 @@ static int atmel_xdmac_suspend(struct device *dev)
+ }
+ }
+ atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
++ atxdmac->save_gs = at_xdmac_read(atxdmac, AT_XDMAC_GS);
+
+ at_xdmac_off(atxdmac);
+ clk_disable_unprepare(atxdmac->clk);
+@@ -1975,6 +2005,7 @@ static int atmel_xdmac_resume(struct device *dev)
+ struct at_xdmac *atxdmac = dev_get_drvdata(dev);
+ struct at_xdmac_chan *atchan;
+ struct dma_chan *chan, *_chan;
++ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ int i;
+ int ret;
+
+@@ -1982,6 +2013,8 @@ static int atmel_xdmac_resume(struct device *dev)
+ if (ret)
+ return ret;
+
++ at_xdmac_axi_config(pdev);
++
+ /* Clear pending interrupts. */
+ for (i = 0; i < atxdmac->dma.chancnt; i++) {
+ atchan = &atxdmac->chan[i];
+@@ -2000,37 +2033,14 @@ static int atmel_xdmac_resume(struct device *dev)
+ at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
+ at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
+ wmb();
+- at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
++ if (atxdmac->save_gs & atchan->mask)
++ at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+ }
+ }
+ return 0;
+ }
+ #endif /* CONFIG_PM_SLEEP */
+
+-static void at_xdmac_axi_config(struct platform_device *pdev)
+-{
+- struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
+- bool dev_m2m = false;
+- u32 dma_requests;
+-
+- if (!atxdmac->layout->axi_config)
+- return; /* Not supported */
+-
+- if (!of_property_read_u32(pdev->dev.of_node, "dma-requests",
+- &dma_requests)) {
+- dev_info(&pdev->dev, "controller in mem2mem mode.\n");
+- dev_m2m = true;
+- }
+-
+- if (dev_m2m) {
+- at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_M2M);
+- at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_M2M);
+- } else {
+- at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_P2M);
+- at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_P2M);
+- }
+-}
+-
+ static int at_xdmac_probe(struct platform_device *pdev)
+ {
+ struct at_xdmac *atxdmac;
+diff --git a/drivers/dma/bestcomm/ata.c b/drivers/dma/bestcomm/ata.c
+index 2fd87f83cf90b..e169f18da551f 100644
+--- a/drivers/dma/bestcomm/ata.c
++++ b/drivers/dma/bestcomm/ata.c
+@@ -133,7 +133,7 @@ void bcom_ata_reset_bd(struct bcom_task *tsk)
+ struct bcom_ata_var *var;
+
+ /* Reset all BD */
+- memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
++ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ tsk->index = 0;
+ tsk->outdex = 0;
+diff --git a/drivers/dma/bestcomm/bestcomm.c b/drivers/dma/bestcomm/bestcomm.c
+index d91cbbe7a48fb..8c42e5ca00a99 100644
+--- a/drivers/dma/bestcomm/bestcomm.c
++++ b/drivers/dma/bestcomm/bestcomm.c
+@@ -95,7 +95,7 @@ bcom_task_alloc(int bd_count, int bd_size, int priv_size)
+ tsk->bd = bcom_sram_alloc(bd_count * bd_size, 4, &tsk->bd_pa);
+ if (!tsk->bd)
+ goto error;
+- memset(tsk->bd, 0x00, bd_count * bd_size);
++ memset_io(tsk->bd, 0x00, bd_count * bd_size);
+
+ tsk->num_bd = bd_count;
+ tsk->bd_size = bd_size;
+@@ -186,16 +186,16 @@ bcom_load_image(int task, u32 *task_image)
+ inc = bcom_task_inc(task);
+
+ /* Clear & copy */
+- memset(var, 0x00, BCOM_VAR_SIZE);
+- memset(inc, 0x00, BCOM_INC_SIZE);
++ memset_io(var, 0x00, BCOM_VAR_SIZE);
++ memset_io(inc, 0x00, BCOM_INC_SIZE);
+
+ desc_src = (u32 *)(hdr + 1);
+ var_src = desc_src + hdr->desc_size;
+ inc_src = var_src + hdr->var_size;
+
+- memcpy(desc, desc_src, hdr->desc_size * sizeof(u32));
+- memcpy(var + hdr->first_var, var_src, hdr->var_size * sizeof(u32));
+- memcpy(inc, inc_src, hdr->inc_size * sizeof(u32));
++ memcpy_toio(desc, desc_src, hdr->desc_size * sizeof(u32));
++ memcpy_toio(var + hdr->first_var, var_src, hdr->var_size * sizeof(u32));
++ memcpy_toio(inc, inc_src, hdr->inc_size * sizeof(u32));
+
+ return 0;
+ }
+@@ -302,13 +302,13 @@ static int bcom_engine_init(void)
+ return -ENOMEM;
+ }
+
+- memset(bcom_eng->tdt, 0x00, tdt_size);
+- memset(bcom_eng->ctx, 0x00, ctx_size);
+- memset(bcom_eng->var, 0x00, var_size);
+- memset(bcom_eng->fdt, 0x00, fdt_size);
++ memset_io(bcom_eng->tdt, 0x00, tdt_size);
++ memset_io(bcom_eng->ctx, 0x00, ctx_size);
++ memset_io(bcom_eng->var, 0x00, var_size);
++ memset_io(bcom_eng->fdt, 0x00, fdt_size);
+
+ /* Copy the FDT for the EU#3 */
+- memcpy(&bcom_eng->fdt[48], fdt_ops, sizeof(fdt_ops));
++ memcpy_toio(&bcom_eng->fdt[48], fdt_ops, sizeof(fdt_ops));
+
+ /* Initialize Task base structure */
+ for (task=0; task<BCOM_MAX_TASKS; task++)
+diff --git a/drivers/dma/bestcomm/fec.c b/drivers/dma/bestcomm/fec.c
+index 7f1fb1c999e43..d203618ac11fe 100644
+--- a/drivers/dma/bestcomm/fec.c
++++ b/drivers/dma/bestcomm/fec.c
+@@ -140,7 +140,7 @@ bcom_fec_rx_reset(struct bcom_task *tsk)
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+- memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
++ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_RX_BD_PRAGMA);
+@@ -241,7 +241,7 @@ bcom_fec_tx_reset(struct bcom_task *tsk)
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+- memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
++ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_TX_BD_PRAGMA);
+diff --git a/drivers/dma/bestcomm/gen_bd.c b/drivers/dma/bestcomm/gen_bd.c
+index 906ddba6a6f5d..8a24a5cbc2633 100644
+--- a/drivers/dma/bestcomm/gen_bd.c
++++ b/drivers/dma/bestcomm/gen_bd.c
+@@ -142,7 +142,7 @@ bcom_gen_bd_rx_reset(struct bcom_task *tsk)
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+- memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
++ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_RX_BD_PRAGMA);
+@@ -226,7 +226,7 @@ bcom_gen_bd_tx_reset(struct bcom_task *tsk)
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+- memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
++ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_TX_BD_PRAGMA);
+diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
+index af3ee288bc117..4ec7bb58c195f 100644
+--- a/drivers/dma/dmaengine.c
++++ b/drivers/dma/dmaengine.c
+@@ -451,7 +451,8 @@ static int dma_chan_get(struct dma_chan *chan)
+ /* The channel is already in use, update client count */
+ if (chan->client_count) {
+ __module_get(owner);
+- goto out;
++ chan->client_count++;
++ return 0;
+ }
+
+ if (!try_module_get(owner))
+@@ -470,11 +471,11 @@ static int dma_chan_get(struct dma_chan *chan)
+ goto err_out;
+ }
+
++ chan->client_count++;
++
+ if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask))
+ balance_ref_count(chan);
+
+-out:
+- chan->client_count++;
+ return 0;
+
+ err_out:
+diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
+index 1bfbd64b13717..53f16d3f00294 100644
+--- a/drivers/dma/dmaengine.h
++++ b/drivers/dma/dmaengine.h
+@@ -176,7 +176,7 @@ dmaengine_desc_get_callback_invoke(struct dma_async_tx_descriptor *tx,
+ static inline bool
+ dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb)
+ {
+- return (cb->callback) ? true : false;
++ return cb->callback || cb->callback_result;
+ }
+
+ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
+diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
+index 35993ab921547..cfc47efcb5d93 100644
+--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
++++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
+@@ -288,8 +288,6 @@ dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+ len = vd_to_axi_desc(vdesc)->hw_desc[0].len;
+ completed_length = completed_blocks * len;
+ bytes = length - completed_length;
+- } else {
+- bytes = vd_to_axi_desc(vdesc)->length;
+ }
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+@@ -944,6 +942,11 @@ static int dw_axi_dma_chan_slave_config(struct dma_chan *dchan,
+ static void axi_chan_dump_lli(struct axi_dma_chan *chan,
+ struct axi_dma_hw_desc *desc)
+ {
++ if (!desc->lli) {
++ dev_err(dchan2dev(&chan->vc.chan), "NULL LLI\n");
++ return;
++ }
++
+ dev_err(dchan2dev(&chan->vc.chan),
+ "SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x",
+ le64_to_cpu(desc->lli->sar),
+@@ -975,6 +978,11 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
+
+ /* The bad descriptor currently is in the head of vc list */
+ vd = vchan_next_desc(&chan->vc);
++ if (!vd) {
++ dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n",
++ axi_chan_name(chan));
++ goto out;
++ }
+ /* Remove the completed descriptor from issued list */
+ list_del(&vd->node);
+
+@@ -989,6 +997,7 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
+ /* Try to restart the controller */
+ axi_chan_start_first_queued(chan);
+
++out:
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ }
+
+@@ -1011,6 +1020,11 @@ static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
+
+ /* The completed descriptor currently is in the head of vc list */
+ vd = vchan_next_desc(&chan->vc);
++ if (!vd) {
++ dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n",
++ axi_chan_name(chan));
++ goto out;
++ }
+
+ if (chan->cyclic) {
+ desc = vd_to_axi_desc(vd);
+@@ -1040,6 +1054,7 @@ static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
+ axi_chan_start_first_queued(chan);
+ }
+
++out:
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ }
+
+diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
+index 53289927dd0d6..799ebbaf35be5 100644
+--- a/drivers/dma/dw-edma/dw-edma-core.c
++++ b/drivers/dma/dw-edma/dw-edma-core.c
+@@ -171,7 +171,7 @@ static void vchan_free_desc(struct virt_dma_desc *vdesc)
+ dw_edma_free_desc(vd2dw_edma_desc(vdesc));
+ }
+
+-static void dw_edma_start_transfer(struct dw_edma_chan *chan)
++static int dw_edma_start_transfer(struct dw_edma_chan *chan)
+ {
+ struct dw_edma_chunk *child;
+ struct dw_edma_desc *desc;
+@@ -179,16 +179,16 @@ static void dw_edma_start_transfer(struct dw_edma_chan *chan)
+
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd)
+- return;
++ return 0;
+
+ desc = vd2dw_edma_desc(vd);
+ if (!desc)
+- return;
++ return 0;
+
+ child = list_first_entry_or_null(&desc->chunk->list,
+ struct dw_edma_chunk, list);
+ if (!child)
+- return;
++ return 0;
+
+ dw_edma_v0_core_start(child, !desc->xfer_sz);
+ desc->xfer_sz += child->ll_region.sz;
+@@ -196,6 +196,8 @@ static void dw_edma_start_transfer(struct dw_edma_chan *chan)
+ list_del(&child->list);
+ kfree(child);
+ desc->chunks_alloc--;
++
++ return 1;
+ }
+
+ static int dw_edma_device_config(struct dma_chan *dchan,
+@@ -279,9 +281,12 @@ static void dw_edma_device_issue_pending(struct dma_chan *dchan)
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+ unsigned long flags;
+
++ if (!chan->configured)
++ return;
++
+ spin_lock_irqsave(&chan->vc.lock, flags);
+- if (chan->configured && chan->request == EDMA_REQ_NONE &&
+- chan->status == EDMA_ST_IDLE && vchan_issue_pending(&chan->vc)) {
++ if (vchan_issue_pending(&chan->vc) && chan->request == EDMA_REQ_NONE &&
++ chan->status == EDMA_ST_IDLE) {
+ chan->status = EDMA_ST_BUSY;
+ dw_edma_start_transfer(chan);
+ }
+@@ -424,7 +429,7 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
+ chunk->ll_region.sz += burst->sz;
+ desc->alloc_sz += burst->sz;
+
+- if (chan->dir == EDMA_DIR_WRITE) {
++ if (dir == DMA_DEV_TO_MEM) {
+ burst->sar = src_addr;
+ if (xfer->type == EDMA_XFER_CYCLIC) {
+ burst->dar = xfer->xfer.cyclic.paddr;
+@@ -438,6 +443,8 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
+ * and destination addresses are increased
+ * by the same portion (data length)
+ */
++ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
++ burst->dar = dst_addr;
+ }
+ } else {
+ burst->dar = dst_addr;
+@@ -453,6 +460,8 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer)
+ * and destination addresses are increased
+ * by the same portion (data length)
+ */
++ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
++ burst->sar = src_addr;
+ }
+ }
+
+@@ -551,14 +560,14 @@ static void dw_edma_done_interrupt(struct dw_edma_chan *chan)
+ switch (chan->request) {
+ case EDMA_REQ_NONE:
+ desc = vd2dw_edma_desc(vd);
+- if (desc->chunks_alloc) {
+- chan->status = EDMA_ST_BUSY;
+- dw_edma_start_transfer(chan);
+- } else {
++ if (!desc->chunks_alloc) {
+ list_del(&vd->node);
+ vchan_cookie_complete(vd);
+- chan->status = EDMA_ST_IDLE;
+ }
++
++ /* Continue transferring if there are remaining chunks or issued requests.
++ */
++ chan->status = dw_edma_start_transfer(chan) ? EDMA_ST_BUSY : EDMA_ST_IDLE;
+ break;
+
+ case EDMA_REQ_STOP:
+diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c
+index 329fc2e57b703..043a4f3115fa3 100644
+--- a/drivers/dma/dw-edma/dw-edma-v0-core.c
++++ b/drivers/dma/dw-edma/dw-edma-v0-core.c
+@@ -192,7 +192,7 @@ static inline void writeq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ static inline u64 readq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ const void __iomem *addr)
+ {
+- u32 value;
++ u64 value;
+
+ if (dw->mf == EDMA_MF_EDMA_LEGACY) {
+ u32 viewport_sel;
+@@ -415,8 +415,11 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
+ (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE));
+ /* Linked list */
+ #ifdef CONFIG_64BIT
+- SET_CH_64(dw, chan->dir, chan->id, llp.reg,
+- chunk->ll_region.paddr);
++ /* llp is not aligned on 64bit -> keep 32bit accesses */
++ SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
++ lower_32_bits(chunk->ll_region.paddr));
++ SET_CH_32(dw, chan->dir, chan->id, llp.msb,
++ upper_32_bits(chunk->ll_region.paddr));
+ #else /* CONFIG_64BIT */
+ SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+ lower_32_bits(chunk->ll_region.paddr));
+diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c
+index c855a0e4f9ff4..df6be7ca340cd 100644
+--- a/drivers/dma/hisi_dma.c
++++ b/drivers/dma/hisi_dma.c
+@@ -30,7 +30,7 @@
+ #define HISI_DMA_MODE 0x217c
+ #define HISI_DMA_OFFSET 0x100
+
+-#define HISI_DMA_MSI_NUM 30
++#define HISI_DMA_MSI_NUM 32
+ #define HISI_DMA_CHAN_NUM 30
+ #define HISI_DMA_Q_DEPTH_VAL 1024
+
+@@ -180,7 +180,8 @@ static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index)
+ hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, index, 0);
+ }
+
+-static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan)
++static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan,
++ bool disable)
+ {
+ struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+ u32 index = chan->qp_num, tmp;
+@@ -201,8 +202,11 @@ static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan)
+ hisi_dma_do_reset(hdma_dev, index);
+ hisi_dma_reset_qp_point(hdma_dev, index);
+ hisi_dma_pause_dma(hdma_dev, index, false);
+- hisi_dma_enable_dma(hdma_dev, index, true);
+- hisi_dma_unmask_irq(hdma_dev, index);
++
++ if (!disable) {
++ hisi_dma_enable_dma(hdma_dev, index, true);
++ hisi_dma_unmask_irq(hdma_dev, index);
++ }
+
+ ret = readl_relaxed_poll_timeout(hdma_dev->base +
+ HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp,
+@@ -218,7 +222,7 @@ static void hisi_dma_free_chan_resources(struct dma_chan *c)
+ struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+ struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+
+- hisi_dma_reset_hw_chan(chan);
++ hisi_dma_reset_or_disable_hw_chan(chan, false);
+ vchan_free_chan_resources(&chan->vc);
+
+ memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth);
+@@ -267,7 +271,6 @@ static void hisi_dma_start_transfer(struct hisi_dma_chan *chan)
+
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd) {
+- dev_err(&hdma_dev->pdev->dev, "no issued task!\n");
+ chan->desc = NULL;
+ return;
+ }
+@@ -299,7 +302,7 @@ static void hisi_dma_issue_pending(struct dma_chan *c)
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+- if (vchan_issue_pending(&chan->vc))
++ if (vchan_issue_pending(&chan->vc) && !chan->desc)
+ hisi_dma_start_transfer(chan);
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+@@ -394,7 +397,7 @@ static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+
+ static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+ {
+- hisi_dma_reset_hw_chan(&hdma_dev->chan[qp_index]);
++ hisi_dma_reset_or_disable_hw_chan(&hdma_dev->chan[qp_index], true);
+ }
+
+ static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev)
+@@ -432,18 +435,15 @@ static irqreturn_t hisi_dma_irq(int irq, void *data)
+ desc = chan->desc;
+ cqe = chan->cq + chan->cq_head;
+ if (desc) {
++ chan->cq_head = (chan->cq_head + 1) % hdma_dev->chan_depth;
++ hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR,
++ chan->qp_num, chan->cq_head);
+ if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) {
+- chan->cq_head = (chan->cq_head + 1) %
+- hdma_dev->chan_depth;
+- hisi_dma_chan_write(hdma_dev->base,
+- HISI_DMA_CQ_HEAD_PTR, chan->qp_num,
+- chan->cq_head);
+ vchan_cookie_complete(&desc->vd);
++ hisi_dma_start_transfer(chan);
+ } else {
+ dev_err(&hdma_dev->pdev->dev, "task error!\n");
+ }
+-
+- chan->desc = NULL;
+ }
+
+ spin_unlock(&chan->vc.lock);
+diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
+index b9b2b4a4124ee..033df43db0cec 100644
+--- a/drivers/dma/idxd/cdev.c
++++ b/drivers/dma/idxd/cdev.c
+@@ -369,10 +369,16 @@ int idxd_cdev_register(void)
+ rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK,
+ ictx[i].name);
+ if (rc)
+- return rc;
++ goto err_free_chrdev_region;
+ }
+
+ return 0;
++
++err_free_chrdev_region:
++ for (i--; i >= 0; i--)
++ unregister_chrdev_region(ictx[i].devt, MINORMASK);
++
++ return rc;
+ }
+
+ void idxd_cdev_remove(void)
+diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
+index 83a5ff2ecf2a0..535f021911c55 100644
+--- a/drivers/dma/idxd/device.c
++++ b/drivers/dma/idxd/device.c
+@@ -394,8 +394,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
+ lockdep_assert_held(&wq->wq_lock);
+ memset(wq->wqcfg, 0, idxd->wqcfg_size);
+ wq->type = IDXD_WQT_NONE;
+- wq->size = 0;
+- wq->group = NULL;
+ wq->threshold = 0;
+ wq->priority = 0;
+ wq->ats_dis = 0;
+@@ -404,6 +402,14 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
+ memset(wq->name, 0, WQ_NAME_SIZE);
+ }
+
++static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
++{
++ lockdep_assert_held(&wq->wq_lock);
++
++ wq->size = 0;
++ wq->group = NULL;
++}
++
+ static void idxd_wq_ref_release(struct percpu_ref *ref)
+ {
+ struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active);
+@@ -427,7 +433,6 @@ void idxd_wq_quiesce(struct idxd_wq *wq)
+ {
+ percpu_ref_kill(&wq->wq_active);
+ wait_for_completion(&wq->wq_dead);
+- percpu_ref_exit(&wq->wq_active);
+ }
+
+ /* Device control bits */
+@@ -584,6 +589,8 @@ void idxd_device_reset(struct idxd_device *idxd)
+ spin_lock(&idxd->dev_lock);
+ idxd_device_clear_state(idxd);
+ idxd->state = IDXD_DEV_DISABLED;
++ idxd_unmask_error_interrupts(idxd);
++ idxd_msix_perm_setup(idxd);
+ spin_unlock(&idxd->dev_lock);
+ }
+
+@@ -692,11 +699,16 @@ static void idxd_groups_clear_state(struct idxd_device *idxd)
+ memset(&group->grpcfg, 0, sizeof(group->grpcfg));
+ group->num_engines = 0;
+ group->num_wqs = 0;
+- group->use_token_limit = false;
+- group->tokens_allowed = 0;
+- group->tokens_reserved = 0;
+- group->tc_a = -1;
+- group->tc_b = -1;
++ group->use_rdbuf_limit = false;
++ group->rdbufs_allowed = 0;
++ group->rdbufs_reserved = 0;
++ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) {
++ group->tc_a = 1;
++ group->tc_b = 1;
++ } else {
++ group->tc_a = -1;
++ group->tc_b = -1;
++ }
+ }
+ }
+
+@@ -708,15 +720,16 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+- if (wq->state == IDXD_WQ_ENABLED) {
+- idxd_wq_disable_cleanup(wq);
+- wq->state = IDXD_WQ_DISABLED;
+- }
++ idxd_wq_disable_cleanup(wq);
++ idxd_wq_device_reset_cleanup(wq);
+ }
+ }
+
+ void idxd_device_clear_state(struct idxd_device *idxd)
+ {
++ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
++ return;
++
+ idxd_groups_clear_state(idxd);
+ idxd_engines_clear_state(idxd);
+ idxd_device_wqs_clear_state(idxd);
+@@ -791,10 +804,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
+ int i;
+ struct device *dev = &idxd->pdev->dev;
+
+- /* Setup bandwidth token limit */
+- if (idxd->token_limit) {
++ /* Setup bandwidth rdbuf limit */
++ if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) {
+ reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+- reg.token_limit = idxd->token_limit;
++ reg.rdbuf_limit = idxd->rdbuf_limit;
+ iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
+ }
+
+@@ -935,13 +948,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd)
+ group->tc_b = group->grpcfg.flags.tc_b = 1;
+ else
+ group->grpcfg.flags.tc_b = group->tc_b;
+- group->grpcfg.flags.use_token_limit = group->use_token_limit;
+- group->grpcfg.flags.tokens_reserved = group->tokens_reserved;
+- if (group->tokens_allowed)
+- group->grpcfg.flags.tokens_allowed =
+- group->tokens_allowed;
++ group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit;
++ group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved;
++ if (group->rdbufs_allowed)
++ group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed;
+ else
+- group->grpcfg.flags.tokens_allowed = idxd->max_tokens;
++ group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs;
+ }
+ }
+
+@@ -1136,7 +1148,7 @@ int idxd_device_load_config(struct idxd_device *idxd)
+ int i, rc;
+
+ reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+- idxd->token_limit = reg.token_limit;
++ idxd->rdbuf_limit = reg.rdbuf_limit;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *group = idxd->groups[i];
+@@ -1236,8 +1248,7 @@ int __drv_enable_wq(struct idxd_wq *wq)
+ return 0;
+
+ err_map_portal:
+- rc = idxd_wq_disable(wq, false);
+- if (rc < 0)
++ if (idxd_wq_disable(wq, false))
+ dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq)));
+ err:
+ return rc;
+diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
+index e0f056c1d1f56..29af898f3c242 100644
+--- a/drivers/dma/idxd/dma.c
++++ b/drivers/dma/idxd/dma.c
+@@ -77,6 +77,27 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq,
+ hw->completion_addr = compl;
+ }
+
++static struct dma_async_tx_descriptor *
++idxd_dma_prep_interrupt(struct dma_chan *c, unsigned long flags)
++{
++ struct idxd_wq *wq = to_idxd_wq(c);
++ u32 desc_flags;
++ struct idxd_desc *desc;
++
++ if (wq->state != IDXD_WQ_ENABLED)
++ return NULL;
++
++ op_flag_setup(flags, &desc_flags);
++ desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
++ if (IS_ERR(desc))
++ return NULL;
++
++ idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_NOOP,
++ 0, 0, 0, desc->compl_dma, desc_flags);
++ desc->txd.flags = flags;
++ return &desc->txd;
++}
++
+ static struct dma_async_tx_descriptor *
+ idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+@@ -181,10 +202,12 @@ int idxd_register_dma_device(struct idxd_device *idxd)
+ INIT_LIST_HEAD(&dma->channels);
+ dma->dev = dev;
+
++ dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+ dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask);
+ dma->device_release = idxd_dma_release;
+
++ dma->device_prep_dma_interrupt = idxd_dma_prep_interrupt;
+ if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) {
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy;
+@@ -311,6 +334,7 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
+
+ err_dma:
+ idxd_wq_quiesce(wq);
++ percpu_ref_exit(&wq->wq_active);
+ err_ref:
+ idxd_wq_free_resources(wq);
+ err_res_alloc:
+@@ -328,9 +352,9 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
+ mutex_lock(&wq->wq_lock);
+ idxd_wq_quiesce(wq);
+ idxd_unregister_dma_channel(wq);
+- __drv_disable_wq(wq);
+ idxd_wq_free_resources(wq);
+- wq->type = IDXD_WQT_NONE;
++ __drv_disable_wq(wq);
++ percpu_ref_exit(&wq->wq_active);
+ mutex_unlock(&wq->wq_lock);
+ }
+
+diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
+index bfcb03329f778..833af18a99ee5 100644
+--- a/drivers/dma/idxd/idxd.h
++++ b/drivers/dma/idxd/idxd.h
+@@ -84,9 +84,9 @@ struct idxd_group {
+ int id;
+ int num_engines;
+ int num_wqs;
+- bool use_token_limit;
+- u8 tokens_allowed;
+- u8 tokens_reserved;
++ bool use_rdbuf_limit;
++ u8 rdbufs_allowed;
++ u8 rdbufs_reserved;
+ int tc_a;
+ int tc_b;
+ };
+@@ -278,11 +278,11 @@ struct idxd_device {
+ u32 max_batch_size;
+ int max_groups;
+ int max_engines;
+- int max_tokens;
++ int max_rdbufs;
+ int max_wqs;
+ int max_wq_size;
+- int token_limit;
+- int nr_tokens; /* non-reserved tokens */
++ int rdbuf_limit;
++ int nr_rdbufs; /* non-reserved read buffers */
+ unsigned int wqcfg_size;
+
+ union sw_err_reg sw_err;
+diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
+index eb09bc591c316..e0e0c7f286b67 100644
+--- a/drivers/dma/idxd/init.c
++++ b/drivers/dma/idxd/init.c
+@@ -340,7 +340,7 @@ static int idxd_setup_groups(struct idxd_device *idxd)
+ }
+
+ idxd->groups[i] = group;
+- if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) {
++ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) {
+ group->tc_a = 1;
+ group->tc_b = 1;
+ } else {
+@@ -464,9 +464,9 @@ static void idxd_read_caps(struct idxd_device *idxd)
+ dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
+ idxd->max_groups = idxd->hw.group_cap.num_groups;
+ dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
+- idxd->max_tokens = idxd->hw.group_cap.total_tokens;
+- dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
+- idxd->nr_tokens = idxd->max_tokens;
++ idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs;
++ dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs);
++ idxd->nr_rdbufs = idxd->max_rdbufs;
+
+ /* read engine capabilities */
+ idxd->hw.engine_cap.bits =
+@@ -797,11 +797,19 @@ static void idxd_remove(struct pci_dev *pdev)
+ int msixcnt = pci_msix_vec_count(pdev);
+ int i;
+
+- dev_dbg(&pdev->dev, "%s called\n", __func__);
++ idxd_unregister_devices(idxd);
++ /*
++ * When ->release() is called for the idxd->conf_dev, it frees all the memory related
++ * to the idxd context. The driver still needs those bits in order to do the rest of
++ * the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref
++ * on the device here to hold off the freeing while allowing the idxd sub-driver
++ * to unbind.
++ */
++ get_device(idxd_confdev(idxd));
++ device_unregister(idxd_confdev(idxd));
+ idxd_shutdown(pdev);
+ if (device_pasid_enabled(idxd))
+ idxd_disable_system_pasid(idxd);
+- idxd_unregister_devices(idxd);
+
+ for (i = 0; i < msixcnt; i++) {
+ irq_entry = &idxd->irq_entries[i];
+@@ -815,7 +823,7 @@ static void idxd_remove(struct pci_dev *pdev)
+ pci_disable_device(pdev);
+ destroy_workqueue(idxd->wq);
+ perfmon_pmu_remove(idxd);
+- device_unregister(idxd_confdev(idxd));
++ put_device(idxd_confdev(idxd));
+ }
+
+ static struct pci_driver idxd_pci_driver = {
+diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
+index ca88fa7a328e7..6d6af0dc3c0ec 100644
+--- a/drivers/dma/idxd/irq.c
++++ b/drivers/dma/idxd/irq.c
+@@ -63,6 +63,9 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
+ int i;
+ bool err = false;
+
++ if (cause & IDXD_INTC_HALT_STATE)
++ goto halt;
++
+ if (cause & IDXD_INTC_ERR) {
+ spin_lock(&idxd->dev_lock);
+ for (i = 0; i < 4; i++)
+@@ -121,6 +124,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
+ if (!err)
+ return 0;
+
++halt:
+ gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+ if (gensts.state == IDXD_DEVICE_STATE_HALT) {
+ idxd->state = IDXD_DEV_HALTED;
+@@ -133,9 +137,10 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
+ INIT_WORK(&idxd->work, idxd_device_reinit);
+ queue_work(idxd->wq, &idxd->work);
+ } else {
+- spin_lock(&idxd->dev_lock);
++ idxd->state = IDXD_DEV_HALTED;
+ idxd_wqs_quiesce(idxd);
+ idxd_wqs_unmap_portal(idxd);
++ spin_lock(&idxd->dev_lock);
+ idxd_device_clear_state(idxd);
+ dev_err(&idxd->pdev->dev,
+ "idxd halted, need %s.\n",
+diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
+index ffc7550a77eeb..c0961c1ac161d 100644
+--- a/drivers/dma/idxd/registers.h
++++ b/drivers/dma/idxd/registers.h
+@@ -65,9 +65,9 @@ union wq_cap_reg {
+ union group_cap_reg {
+ struct {
+ u64 num_groups:8;
+- u64 total_tokens:8;
+- u64 token_en:1;
+- u64 token_limit:1;
++ u64 total_rdbufs:8; /* formerly total_tokens */
++ u64 rdbuf_ctrl:1; /* formerly token_en */
++ u64 rdbuf_limit:1; /* formerly token_limit */
+ u64 rsvd:46;
+ };
+ u64 bits;
+@@ -111,7 +111,7 @@ union offsets_reg {
+ #define IDXD_GENCFG_OFFSET 0x80
+ union gencfg_reg {
+ struct {
+- u32 token_limit:8;
++ u32 rdbuf_limit:8;
+ u32 rsvd:4;
+ u32 user_int_en:1;
+ u32 rsvd2:19;
+@@ -158,6 +158,7 @@ enum idxd_device_reset_type {
+ #define IDXD_INTC_CMD 0x02
+ #define IDXD_INTC_OCCUPY 0x04
+ #define IDXD_INTC_PERFMON_OVFL 0x08
++#define IDXD_INTC_HALT_STATE 0x10
+
+ #define IDXD_CMD_OFFSET 0xa0
+ union idxd_command_reg {
+@@ -287,10 +288,10 @@ union group_flags {
+ u32 tc_a:3;
+ u32 tc_b:3;
+ u32 rsvd:1;
+- u32 use_token_limit:1;
+- u32 tokens_reserved:8;
++ u32 use_rdbuf_limit:1;
++ u32 rdbufs_reserved:8;
+ u32 rsvd2:4;
+- u32 tokens_allowed:8;
++ u32 rdbufs_allowed:8;
+ u32 rsvd3:4;
+ };
+ u32 bits;
+diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
+index de76fb4abac24..83452fbbb168b 100644
+--- a/drivers/dma/idxd/submit.c
++++ b/drivers/dma/idxd/submit.c
+@@ -106,6 +106,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+ {
+ struct idxd_desc *d, *t, *found = NULL;
+ struct llist_node *head;
++ LIST_HEAD(flist);
+
+ desc->completion->status = IDXD_COMP_DESC_ABORT;
+ /*
+@@ -120,7 +121,11 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+ found = desc;
+ continue;
+ }
+- list_add_tail(&desc->list, &ie->work_list);
++
++ if (d->completion->status)
++ list_add_tail(&d->list, &flist);
++ else
++ list_add_tail(&d->list, &ie->work_list);
+ }
+ }
+
+@@ -130,6 +135,17 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+
+ if (found)
+ complete_desc(found, IDXD_COMPLETE_ABORT);
++
++ /*
++ * complete_desc() will return desc to allocator and the desc can be
++ * acquired by a different process and the desc->list can be modified.
++ * Delete desc from list so the list trasversing does not get corrupted
++ * by the other process.
++ */
++ list_for_each_entry_safe(d, t, &flist, list) {
++ list_del_init(&d->list);
++ complete_desc(d, IDXD_COMPLETE_NORMAL);
++ }
+ }
+
+ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
+index a9025be940db2..489a9d8850764 100644
+--- a/drivers/dma/idxd/sysfs.c
++++ b/drivers/dma/idxd/sysfs.c
+@@ -99,17 +99,17 @@ struct device_type idxd_engine_device_type = {
+
+ /* Group attributes */
+
+-static void idxd_set_free_tokens(struct idxd_device *idxd)
++static void idxd_set_free_rdbufs(struct idxd_device *idxd)
+ {
+- int i, tokens;
++ int i, rdbufs;
+
+- for (i = 0, tokens = 0; i < idxd->max_groups; i++) {
++ for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *g = idxd->groups[i];
+
+- tokens += g->tokens_reserved;
++ rdbufs += g->rdbufs_reserved;
+ }
+
+- idxd->nr_tokens = idxd->max_tokens - tokens;
++ idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs;
+ }
+
+ static ssize_t group_tokens_reserved_show(struct device *dev,
+@@ -118,7 +118,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev,
+ {
+ struct idxd_group *group = confdev_to_group(dev);
+
+- return sysfs_emit(buf, "%u\n", group->tokens_reserved);
++ return sysfs_emit(buf, "%u\n", group->rdbufs_reserved);
+ }
+
+ static ssize_t group_tokens_reserved_store(struct device *dev,
+@@ -143,14 +143,14 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+- if (val > idxd->max_tokens)
++ if (val > idxd->max_rdbufs)
+ return -EINVAL;
+
+- if (val > idxd->nr_tokens + group->tokens_reserved)
++ if (val > idxd->nr_rdbufs + group->rdbufs_reserved)
+ return -EINVAL;
+
+- group->tokens_reserved = val;
+- idxd_set_free_tokens(idxd);
++ group->rdbufs_reserved = val;
++ idxd_set_free_rdbufs(idxd);
+ return count;
+ }
+
+@@ -164,7 +164,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev,
+ {
+ struct idxd_group *group = confdev_to_group(dev);
+
+- return sysfs_emit(buf, "%u\n", group->tokens_allowed);
++ return sysfs_emit(buf, "%u\n", group->rdbufs_allowed);
+ }
+
+ static ssize_t group_tokens_allowed_store(struct device *dev,
+@@ -190,10 +190,10 @@ static ssize_t group_tokens_allowed_store(struct device *dev,
+ return -EPERM;
+
+ if (val < 4 * group->num_engines ||
+- val > group->tokens_reserved + idxd->nr_tokens)
++ val > group->rdbufs_reserved + idxd->nr_rdbufs)
+ return -EINVAL;
+
+- group->tokens_allowed = val;
++ group->rdbufs_allowed = val;
+ return count;
+ }
+
+@@ -207,7 +207,7 @@ static ssize_t group_use_token_limit_show(struct device *dev,
+ {
+ struct idxd_group *group = confdev_to_group(dev);
+
+- return sysfs_emit(buf, "%u\n", group->use_token_limit);
++ return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit);
+ }
+
+ static ssize_t group_use_token_limit_store(struct device *dev,
+@@ -232,10 +232,10 @@ static ssize_t group_use_token_limit_store(struct device *dev,
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+- if (idxd->token_limit == 0)
++ if (idxd->rdbuf_limit == 0)
+ return -EPERM;
+
+- group->use_token_limit = !!val;
++ group->use_rdbuf_limit = !!val;
+ return count;
+ }
+
+@@ -327,7 +327,7 @@ static ssize_t group_traffic_class_a_store(struct device *dev,
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+- if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override)
++ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override)
+ return -EPERM;
+
+ if (val < 0 || val > 7)
+@@ -369,7 +369,7 @@ static ssize_t group_traffic_class_b_store(struct device *dev,
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+- if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override)
++ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override)
+ return -EPERM;
+
+ if (val < 0 || val > 7)
+@@ -842,6 +842,9 @@ static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attr
+ u64 xfer_size;
+ int rc;
+
++ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
++ return -EPERM;
++
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+@@ -876,6 +879,9 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu
+ u64 batch_size;
+ int rc;
+
++ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
++ return -EPERM;
++
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+@@ -1161,7 +1167,7 @@ static ssize_t max_tokens_show(struct device *dev,
+ {
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+- return sysfs_emit(buf, "%u\n", idxd->max_tokens);
++ return sysfs_emit(buf, "%u\n", idxd->max_rdbufs);
+ }
+ static DEVICE_ATTR_RO(max_tokens);
+
+@@ -1170,7 +1176,7 @@ static ssize_t token_limit_show(struct device *dev,
+ {
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+- return sysfs_emit(buf, "%u\n", idxd->token_limit);
++ return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit);
+ }
+
+ static ssize_t token_limit_store(struct device *dev,
+@@ -1191,13 +1197,13 @@ static ssize_t token_limit_store(struct device *dev,
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+- if (!idxd->hw.group_cap.token_limit)
++ if (!idxd->hw.group_cap.rdbuf_limit)
+ return -EPERM;
+
+- if (val > idxd->hw.group_cap.total_tokens)
++ if (val > idxd->hw.group_cap.total_rdbufs)
+ return -EINVAL;
+
+- idxd->token_limit = val;
++ idxd->rdbuf_limit = val;
+ return count;
+ }
+ static DEVICE_ATTR_RW(token_limit);
+diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
+index 2ddc31e64db03..da31e73d24d4c 100644
+--- a/drivers/dma/imx-dma.c
++++ b/drivers/dma/imx-dma.c
+@@ -1047,7 +1047,7 @@ static int __init imxdma_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ imxdma->dev = &pdev->dev;
+- imxdma->devtype = (enum imx_dma_type)of_device_get_match_data(&pdev->dev);
++ imxdma->devtype = (uintptr_t)of_device_get_match_data(&pdev->dev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ imxdma->base = devm_ioremap_resource(&pdev->dev, res);
+diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
+index cacc725ca5459..292f4c9a963dd 100644
+--- a/drivers/dma/imx-sdma.c
++++ b/drivers/dma/imx-sdma.c
+@@ -198,12 +198,12 @@ struct sdma_script_start_addrs {
+ s32 per_2_firi_addr;
+ s32 mcu_2_firi_addr;
+ s32 uart_2_per_addr;
+- s32 uart_2_mcu_ram_addr;
++ s32 uart_2_mcu_addr;
+ s32 per_2_app_addr;
+ s32 mcu_2_app_addr;
+ s32 per_2_per_addr;
+ s32 uartsh_2_per_addr;
+- s32 uartsh_2_mcu_ram_addr;
++ s32 uartsh_2_mcu_addr;
+ s32 per_2_shp_addr;
+ s32 mcu_2_shp_addr;
+ s32 ata_2_mcu_addr;
+@@ -232,8 +232,8 @@ struct sdma_script_start_addrs {
+ s32 mcu_2_ecspi_addr;
+ s32 mcu_2_sai_addr;
+ s32 sai_2_mcu_addr;
+- s32 uart_2_mcu_addr;
+- s32 uartsh_2_mcu_addr;
++ s32 uart_2_mcu_rom_addr;
++ s32 uartsh_2_mcu_rom_addr;
+ /* End of v3 array */
+ s32 mcu_2_zqspi_addr;
+ /* End of v4 array */
+@@ -1428,10 +1428,12 @@ static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac,
+ sdma_config_ownership(sdmac, false, true, false);
+
+ if (sdma_load_context(sdmac))
+- goto err_desc_out;
++ goto err_bd_out;
+
+ return desc;
+
++err_bd_out:
++ sdma_free_bd(desc);
+ err_desc_out:
+ kfree(desc);
+ err_out:
+@@ -1780,17 +1782,17 @@ static void sdma_add_scripts(struct sdma_engine *sdma,
+ saddr_arr[i] = addr_arr[i];
+
+ /*
+- * get uart_2_mcu_addr/uartsh_2_mcu_addr rom script specially because
+- * they are now replaced by uart_2_mcu_ram_addr/uartsh_2_mcu_ram_addr
+- * to be compatible with legacy freescale/nxp sdma firmware, and they
+- * are located in the bottom part of sdma_script_start_addrs which are
+- * beyond the SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1.
++ * For compatibility with NXP internal legacy kernel before 4.19 which
++ * is based on uart ram script and mainline kernel based on uart rom
++ * script, both uart ram/rom scripts are present in newer sdma
++ * firmware. Use the rom versions if they are present (V3 or newer).
+ */
+- if (addr->uart_2_mcu_addr)
+- sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_addr;
+- if (addr->uartsh_2_mcu_addr)
+- sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_addr;
+-
++ if (sdma->script_number >= SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3) {
++ if (addr->uart_2_mcu_rom_addr)
++ sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_rom_addr;
++ if (addr->uartsh_2_mcu_rom_addr)
++ sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_rom_addr;
++ }
+ }
+
+ static void sdma_load_firmware(const struct firmware *fw, void *context)
+@@ -1869,7 +1871,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
+ u32 reg, val, shift, num_map, i;
+ int ret = 0;
+
+- if (IS_ERR(np) || IS_ERR(gpr_np))
++ if (IS_ERR(np) || !gpr_np)
+ goto out;
+
+ event_remap = of_find_property(np, propname, NULL);
+@@ -1917,7 +1919,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
+ }
+
+ out:
+- if (!IS_ERR(gpr_np))
++ if (gpr_np)
+ of_node_put(gpr_np);
+
+ return ret;
+@@ -2264,7 +2266,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver");
+ #if IS_ENABLED(CONFIG_SOC_IMX6Q)
+ MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin");
+ #endif
+-#if IS_ENABLED(CONFIG_SOC_IMX7D)
++#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M)
+ MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin");
+ #endif
+ MODULE_LICENSE("GPL");
+diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
+index 37ff4ec7db76f..e2070df6cad28 100644
+--- a/drivers/dma/ioat/dma.c
++++ b/drivers/dma/ioat/dma.c
+@@ -656,7 +656,7 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
+ if (active - i == 0) {
+ dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n",
+ __func__);
+- mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
++ mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+
+ /* microsecond delay by sysfs variable per pending descriptor */
+@@ -682,7 +682,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
+
+ if (chanerr &
+ (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) {
+- mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
++ mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+ ioat_eh(ioat_chan);
+ }
+ }
+@@ -879,7 +879,7 @@ static void check_active(struct ioatdma_chan *ioat_chan)
+ }
+
+ if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+- mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
++ mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+
+ static void ioat_reboot_chan(struct ioatdma_chan *ioat_chan)
+diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c
+index efe8bd3a0e2aa..1709d159af7e0 100644
+--- a/drivers/dma/lgm/lgm-dma.c
++++ b/drivers/dma/lgm/lgm-dma.c
+@@ -914,7 +914,7 @@ static void ldma_dev_init(struct ldma_dev *d)
+ }
+ }
+
+-static int ldma_cfg_init(struct ldma_dev *d)
++static int ldma_parse_dt(struct ldma_dev *d)
+ {
+ struct fwnode_handle *fwnode = dev_fwnode(d->dev);
+ struct ldma_port *p;
+@@ -1593,11 +1593,12 @@ static int intel_ldma_probe(struct platform_device *pdev)
+ d->core_clk = devm_clk_get_optional(dev, NULL);
+ if (IS_ERR(d->core_clk))
+ return PTR_ERR(d->core_clk);
+- clk_prepare_enable(d->core_clk);
+
+ d->rst = devm_reset_control_get_optional(dev, NULL);
+ if (IS_ERR(d->rst))
+ return PTR_ERR(d->rst);
++
++ clk_prepare_enable(d->core_clk);
+ reset_control_deassert(d->rst);
+
+ ret = devm_add_action_or_reset(dev, ldma_clk_disable, d);
+@@ -1660,10 +1661,6 @@ static int intel_ldma_probe(struct platform_device *pdev)
+ p->ldev = d;
+ }
+
+- ret = ldma_cfg_init(d);
+- if (ret)
+- return ret;
+-
+ dma_dev->dev = &pdev->dev;
+
+ ch_mask = (unsigned long)d->channels_mask;
+@@ -1674,6 +1671,10 @@ static int intel_ldma_probe(struct platform_device *pdev)
+ ldma_dma_init_v3X(j, d);
+ }
+
++ ret = ldma_parse_dt(d);
++ if (ret)
++ return ret;
++
+ dma_dev->device_alloc_chan_resources = ldma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = ldma_free_chan_resources;
+ dma_dev->device_terminate_all = ldma_terminate_all;
+diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c
+index e12b754e6398d..60d3c5f09ad67 100644
+--- a/drivers/dma/mcf-edma.c
++++ b/drivers/dma/mcf-edma.c
+@@ -191,7 +191,13 @@ static int mcf_edma_probe(struct platform_device *pdev)
+ return -EINVAL;
+ }
+
+- chans = pdata->dma_channels;
++ if (!pdata->dma_channels) {
++ dev_info(&pdev->dev, "setting default channel number to 64");
++ chans = 64;
++ } else {
++ chans = pdata->dma_channels;
++ }
++
+ len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
+ mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!mcf_edma)
+@@ -203,11 +209,6 @@ static int mcf_edma_probe(struct platform_device *pdev)
+ mcf_edma->drvdata = &mcf_data;
+ mcf_edma->big_endian = 1;
+
+- if (!mcf_edma->n_chans) {
+- dev_info(&pdev->dev, "setting default channel number to 64");
+- mcf_edma->n_chans = 64;
+- }
+-
+ mutex_init(&mcf_edma->fsl_edma_mutex);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
+index 375e7e647df6b..a1517ef1f4a01 100644
+--- a/drivers/dma/mediatek/mtk-uart-apdma.c
++++ b/drivers/dma/mediatek/mtk-uart-apdma.c
+@@ -274,7 +274,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
+ unsigned int status;
+ int ret;
+
+- ret = pm_runtime_get_sync(mtkd->ddev.dev);
++ ret = pm_runtime_resume_and_get(mtkd->ddev.dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(chan->device->dev);
+ return ret;
+@@ -288,18 +288,21 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
+ ret = readx_poll_timeout(readl, c->base + VFF_EN,
+ status, !status, 10, 100);
+ if (ret)
+- return ret;
++ goto err_pm;
+
+ ret = request_irq(c->irq, mtk_uart_apdma_irq_handler,
+ IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan);
+ if (ret < 0) {
+ dev_err(chan->device->dev, "Can't request dma IRQ\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto err_pm;
+ }
+
+ if (mtkd->support_33bits)
+ mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B);
+
++err_pm:
++ pm_runtime_put_noidle(mtkd->ddev.dev);
+ return ret;
+ }
+
+diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
+index 89f1814ff27a0..26d11885c50ec 100644
+--- a/drivers/dma/mmp_pdma.c
++++ b/drivers/dma/mmp_pdma.c
+@@ -727,12 +727,6 @@ static int mmp_pdma_config_write(struct dma_chan *dchan,
+
+ chan->dir = direction;
+ chan->dev_addr = addr;
+- /* FIXME: drivers should be ported over to use the filter
+- * function. Once that's done, the following two lines can
+- * be removed.
+- */
+- if (cfg->slave_id)
+- chan->drcmr = cfg->slave_id;
+
+ return 0;
+ }
+diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
+index 9b0d463f89bbd..9f3e011fbd914 100644
+--- a/drivers/dma/mv_xor_v2.c
++++ b/drivers/dma/mv_xor_v2.c
+@@ -756,7 +756,7 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
+
+ xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
+ if (PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) {
+- ret = EPROBE_DEFER;
++ ret = -EPROBE_DEFER;
+ goto disable_reg_clk;
+ }
+ if (!IS_ERR(xor_dev->clk)) {
+@@ -899,6 +899,7 @@ static int mv_xor_v2_remove(struct platform_device *pdev)
+ tasklet_kill(&xor_dev->irq_tasklet);
+
+ clk_disable_unprepare(xor_dev->clk);
++ clk_disable_unprepare(xor_dev->reg_clk);
+
+ return 0;
+ }
+diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
+index 994fc4d2aca42..dc147cc2436e9 100644
+--- a/drivers/dma/mxs-dma.c
++++ b/drivers/dma/mxs-dma.c
+@@ -670,7 +670,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
+ return mxs_chan->status;
+ }
+
+-static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
++static int mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+ {
+ int ret;
+
+@@ -741,7 +741,7 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec,
+ ofdma->of_node);
+ }
+
+-static int __init mxs_dma_probe(struct platform_device *pdev)
++static int mxs_dma_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
+ const struct mxs_dma_type *dma_type;
+@@ -839,10 +839,7 @@ static struct platform_driver mxs_dma_driver = {
+ .name = "mxs-dma",
+ .of_match_table = mxs_dma_dt_ids,
+ },
++ .probe = mxs_dma_probe,
+ };
+
+-static int __init mxs_dma_module_init(void)
+-{
+- return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe);
+-}
+-subsys_initcall(mxs_dma_module_init);
++builtin_platform_driver(mxs_dma_driver);
+diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
+index 110de8a600588..ec8a1565630b6 100644
+--- a/drivers/dma/pl330.c
++++ b/drivers/dma/pl330.c
+@@ -403,6 +403,12 @@ enum desc_status {
+ * of a channel can be BUSY at any time.
+ */
+ BUSY,
++ /*
++ * Pause was called while descriptor was BUSY. Due to hardware
++ * limitations, only termination is possible for descriptors
++ * that have been paused.
++ */
++ PAUSED,
+ /*
+ * Sitting on the channel work_list but xfer done
+ * by PL330 core
+@@ -1050,7 +1056,7 @@ static bool _trigger(struct pl330_thread *thrd)
+ return true;
+ }
+
+-static bool _start(struct pl330_thread *thrd)
++static bool pl330_start_thread(struct pl330_thread *thrd)
+ {
+ switch (_state(thrd)) {
+ case PL330_STATE_FAULT_COMPLETING:
+@@ -1702,7 +1708,7 @@ static int pl330_update(struct pl330_dmac *pl330)
+ thrd->req_running = -1;
+
+ /* Get going again ASAP */
+- _start(thrd);
++ pl330_start_thread(thrd);
+
+ /* For now, just make a list of callbacks to be done */
+ list_add_tail(&descdone->rqd, &pl330->req_done);
+@@ -2041,7 +2047,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
+ list_for_each_entry(desc, &pch->work_list, node) {
+
+ /* If already submitted */
+- if (desc->status == BUSY)
++ if (desc->status == BUSY || desc->status == PAUSED)
+ continue;
+
+ ret = pl330_submit_req(pch->thread, desc);
+@@ -2089,7 +2095,7 @@ static void pl330_tasklet(struct tasklet_struct *t)
+ } else {
+ /* Make sure the PL330 Channel thread is active */
+ spin_lock(&pch->thread->dmac->lock);
+- _start(pch->thread);
++ pl330_start_thread(pch->thread);
+ spin_unlock(&pch->thread->dmac->lock);
+ }
+
+@@ -2107,7 +2113,7 @@ static void pl330_tasklet(struct tasklet_struct *t)
+ if (power_down) {
+ pch->active = true;
+ spin_lock(&pch->thread->dmac->lock);
+- _start(pch->thread);
++ pl330_start_thread(pch->thread);
+ spin_unlock(&pch->thread->dmac->lock);
+ power_down = false;
+ }
+@@ -2326,6 +2332,7 @@ static int pl330_pause(struct dma_chan *chan)
+ {
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330 = pch->dmac;
++ struct dma_pl330_desc *desc;
+ unsigned long flags;
+
+ pm_runtime_get_sync(pl330->ddma.dev);
+@@ -2335,6 +2342,10 @@ static int pl330_pause(struct dma_chan *chan)
+ _stop(pch->thread);
+ spin_unlock(&pl330->lock);
+
++ list_for_each_entry(desc, &pch->work_list, node) {
++ if (desc->status == BUSY)
++ desc->status = PAUSED;
++ }
+ spin_unlock_irqrestore(&pch->lock, flags);
+ pm_runtime_mark_last_busy(pl330->ddma.dev);
+ pm_runtime_put_autosuspend(pl330->ddma.dev);
+@@ -2425,7 +2436,7 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ else if (running && desc == running)
+ transferred =
+ pl330_get_current_xferred_count(pch, desc);
+- else if (desc->status == BUSY)
++ else if (desc->status == BUSY || desc->status == PAUSED)
+ /*
+ * Busy but not running means either just enqueued,
+ * or finished and not yet marked done
+@@ -2442,6 +2453,9 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ case DONE:
+ ret = DMA_COMPLETE;
+ break;
++ case PAUSED:
++ ret = DMA_PAUSED;
++ break;
+ case PREP:
+ case BUSY:
+ ret = DMA_IN_PROGRESS;
+@@ -2589,7 +2603,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
+
+ /* If the DMAC pool is empty, alloc new */
+ if (!desc) {
+- DEFINE_SPINLOCK(lock);
++ static DEFINE_SPINLOCK(lock);
+ LIST_HEAD(pool);
+
+ if (!add_desc(&pool, &lock, GFP_ATOMIC, 1))
+diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c
+index 8a6bf291a73fe..bca4063b0dce4 100644
+--- a/drivers/dma/ptdma/ptdma-dev.c
++++ b/drivers/dma/ptdma/ptdma-dev.c
+@@ -71,12 +71,13 @@ static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd
+ bool soc = FIELD_GET(DWORD0_SOC, desc->dw0);
+ u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx];
+ u32 tail;
++ unsigned long flags;
+
+ if (soc) {
+ desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0);
+ desc->dw0 &= ~DWORD0_SOC;
+ }
+- mutex_lock(&cmd_q->q_mutex);
++ spin_lock_irqsave(&cmd_q->q_lock, flags);
+
+ /* Copy 32-byte command descriptor to hw queue. */
+ memcpy(q_desc, desc, 32);
+@@ -91,7 +92,7 @@ static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd
+
+ /* Turn the queue back on using our cached control register */
+ pt_start_queue(cmd_q);
+- mutex_unlock(&cmd_q->q_mutex);
++ spin_unlock_irqrestore(&cmd_q->q_lock, flags);
+
+ return 0;
+ }
+@@ -197,7 +198,7 @@ int pt_core_init(struct pt_device *pt)
+
+ cmd_q->pt = pt;
+ cmd_q->dma_pool = dma_pool;
+- mutex_init(&cmd_q->q_mutex);
++ spin_lock_init(&cmd_q->q_lock);
+
+ /* Page alignment satisfies our needs for N <= 128 */
+ cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+@@ -207,7 +208,7 @@ int pt_core_init(struct pt_device *pt)
+ if (!cmd_q->qbase) {
+ dev_err(dev, "unable to allocate command queue\n");
+ ret = -ENOMEM;
+- goto e_dma_alloc;
++ goto e_destroy_pool;
+ }
+
+ cmd_q->qidx = 0;
+@@ -229,8 +230,10 @@ int pt_core_init(struct pt_device *pt)
+
+ /* Request an irq */
+ ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt);
+- if (ret)
+- goto e_pool;
++ if (ret) {
++ dev_err(dev, "unable to allocate an IRQ\n");
++ goto e_free_dma;
++ }
+
+ /* Update the device registers with queue information. */
+ cmd_q->qcontrol &= ~CMD_Q_SIZE;
+@@ -250,21 +253,20 @@ int pt_core_init(struct pt_device *pt)
+ /* Register the DMA engine support */
+ ret = pt_dmaengine_register(pt);
+ if (ret)
+- goto e_dmaengine;
++ goto e_free_irq;
+
+ /* Set up debugfs entries */
+ ptdma_debugfs_setup(pt);
+
+ return 0;
+
+-e_dmaengine:
++e_free_irq:
+ free_irq(pt->pt_irq, pt);
+
+-e_dma_alloc:
++e_free_dma:
+ dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma);
+
+-e_pool:
+- dev_err(dev, "unable to allocate an IRQ\n");
++e_destroy_pool:
+ dma_pool_destroy(pt->cmd_q.dma_pool);
+
+ return ret;
+diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/ptdma/ptdma.h
+index afbf192c92305..0f0b400a864e4 100644
+--- a/drivers/dma/ptdma/ptdma.h
++++ b/drivers/dma/ptdma/ptdma.h
+@@ -196,7 +196,7 @@ struct pt_cmd_queue {
+ struct ptdma_desc *qbase;
+
+ /* Aligned queue start address (per requirement) */
+- struct mutex q_mutex ____cacheline_aligned;
++ spinlock_t q_lock ____cacheline_aligned;
+ unsigned int qidx;
+
+ unsigned int qsize;
+diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
+index 4a2a796e348c1..e613ace79ea83 100644
+--- a/drivers/dma/pxa_dma.c
++++ b/drivers/dma/pxa_dma.c
+@@ -910,13 +910,6 @@ static void pxad_get_config(struct pxad_chan *chan,
+ *dcmd |= PXA_DCMD_BURST16;
+ else if (maxburst == 32)
+ *dcmd |= PXA_DCMD_BURST32;
+-
+- /* FIXME: drivers should be ported over to use the filter
+- * function. Once that's done, the following two lines can
+- * be removed.
+- */
+- if (chan->cfg.slave_id)
+- chan->drcmr = chan->cfg.slave_id;
+ }
+
+ static struct dma_async_tx_descriptor *
+@@ -1255,14 +1248,14 @@ static int pxad_init_phys(struct platform_device *op,
+ return -ENOMEM;
+
+ for (i = 0; i < nb_phy_chans; i++)
+- if (platform_get_irq(op, i) > 0)
++ if (platform_get_irq_optional(op, i) > 0)
+ nr_irq++;
+
+ for (i = 0; i < nb_phy_chans; i++) {
+ phy = &pdev->phys[i];
+ phy->base = pdev->base;
+ phy->idx = i;
+- irq = platform_get_irq(op, i);
++ irq = platform_get_irq_optional(op, i);
+ if ((nr_irq > 1) && (irq > 0))
+ ret = devm_request_irq(&op->dev, irq,
+ pxad_chan_handler,
+diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
+index c8a77b428b528..ca8c862c9747e 100644
+--- a/drivers/dma/qcom/bam_dma.c
++++ b/drivers/dma/qcom/bam_dma.c
+@@ -515,14 +515,6 @@ static int bam_alloc_chan(struct dma_chan *chan)
+ return 0;
+ }
+
+-static int bam_pm_runtime_get_sync(struct device *dev)
+-{
+- if (pm_runtime_enabled(dev))
+- return pm_runtime_get_sync(dev);
+-
+- return 0;
+-}
+-
+ /**
+ * bam_free_chan - Frees dma resources associated with specific channel
+ * @chan: specified channel
+@@ -538,7 +530,7 @@ static void bam_free_chan(struct dma_chan *chan)
+ unsigned long flags;
+ int ret;
+
+- ret = bam_pm_runtime_get_sync(bdev->dev);
++ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return;
+
+@@ -734,7 +726,7 @@ static int bam_pause(struct dma_chan *chan)
+ unsigned long flag;
+ int ret;
+
+- ret = bam_pm_runtime_get_sync(bdev->dev);
++ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return ret;
+
+@@ -760,7 +752,7 @@ static int bam_resume(struct dma_chan *chan)
+ unsigned long flag;
+ int ret;
+
+- ret = bam_pm_runtime_get_sync(bdev->dev);
++ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return ret;
+
+@@ -869,7 +861,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data)
+ if (srcs & P_IRQ)
+ tasklet_schedule(&bdev->task);
+
+- ret = bam_pm_runtime_get_sync(bdev->dev);
++ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return IRQ_NONE;
+
+@@ -987,7 +979,7 @@ static void bam_start_dma(struct bam_chan *bchan)
+ if (!vd)
+ return;
+
+- ret = bam_pm_runtime_get_sync(bdev->dev);
++ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return;
+
+@@ -1350,11 +1342,6 @@ static int bam_dma_probe(struct platform_device *pdev)
+ if (ret)
+ goto err_unregister_dma;
+
+- if (!bdev->bamclk) {
+- pm_runtime_disable(&pdev->dev);
+- return 0;
+- }
+-
+ pm_runtime_irq_safe(&pdev->dev);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(&pdev->dev);
+@@ -1438,10 +1425,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev)
+ {
+ struct bam_device *bdev = dev_get_drvdata(dev);
+
+- if (bdev->bamclk) {
+- pm_runtime_force_suspend(dev);
+- clk_unprepare(bdev->bamclk);
+- }
++ pm_runtime_force_suspend(dev);
++ clk_unprepare(bdev->bamclk);
+
+ return 0;
+ }
+@@ -1451,13 +1436,11 @@ static int __maybe_unused bam_dma_resume(struct device *dev)
+ struct bam_device *bdev = dev_get_drvdata(dev);
+ int ret;
+
+- if (bdev->bamclk) {
+- ret = clk_prepare(bdev->bamclk);
+- if (ret)
+- return ret;
++ ret = clk_prepare(bdev->bamclk);
++ if (ret)
++ return ret;
+
+- pm_runtime_force_resume(dev);
+- }
++ pm_runtime_force_resume(dev);
+
+ return 0;
+ }
+diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
+index 1a1b7d8458c93..1e87fe6c62af2 100644
+--- a/drivers/dma/qcom/gpi.c
++++ b/drivers/dma/qcom/gpi.c
+@@ -1961,7 +1961,6 @@ error_alloc_ev_ring:
+ error_config_int:
+ gpi_free_ring(&gpii->ev_ring, gpii);
+ exit_gpi_init:
+- mutex_unlock(&gpii->ctrl_lock);
+ return ret;
+ }
+
+diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c
+index f12606aeff87c..dcf2b7a4183c1 100644
+--- a/drivers/dma/sf-pdma/sf-pdma.c
++++ b/drivers/dma/sf-pdma/sf-pdma.c
+@@ -52,16 +52,6 @@ static inline struct sf_pdma_desc *to_sf_pdma_desc(struct virt_dma_desc *vd)
+ static struct sf_pdma_desc *sf_pdma_alloc_desc(struct sf_pdma_chan *chan)
+ {
+ struct sf_pdma_desc *desc;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&chan->lock, flags);
+-
+- if (chan->desc && !chan->desc->in_use) {
+- spin_unlock_irqrestore(&chan->lock, flags);
+- return chan->desc;
+- }
+-
+- spin_unlock_irqrestore(&chan->lock, flags);
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+@@ -106,12 +96,10 @@ sf_pdma_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dest, dma_addr_t src,
+ if (!desc)
+ return NULL;
+
+- desc->in_use = true;
+ desc->dirn = DMA_MEM_TO_MEM;
+ desc->async_tx = vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+ spin_lock_irqsave(&chan->vchan.lock, iflags);
+- chan->desc = desc;
+ sf_pdma_fill_desc(desc, dest, src, len);
+ spin_unlock_irqrestore(&chan->vchan.lock, iflags);
+
+@@ -170,11 +158,17 @@ static size_t sf_pdma_desc_residue(struct sf_pdma_chan *chan,
+ unsigned long flags;
+ u64 residue = 0;
+ struct sf_pdma_desc *desc;
+- struct dma_async_tx_descriptor *tx;
++ struct dma_async_tx_descriptor *tx = NULL;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+- tx = &chan->desc->vdesc.tx;
++ list_for_each_entry(vd, &chan->vchan.desc_submitted, node)
++ if (vd->tx.cookie == cookie)
++ tx = &vd->tx;
++
++ if (!tx)
++ goto out;
++
+ if (cookie == tx->chan->completed_cookie)
+ goto out;
+
+@@ -241,6 +235,19 @@ static void sf_pdma_enable_request(struct sf_pdma_chan *chan)
+ writel(v, regs->ctrl);
+ }
+
++static struct sf_pdma_desc *sf_pdma_get_first_pending_desc(struct sf_pdma_chan *chan)
++{
++ struct virt_dma_chan *vchan = &chan->vchan;
++ struct virt_dma_desc *vdesc;
++
++ if (list_empty(&vchan->desc_issued))
++ return NULL;
++
++ vdesc = list_first_entry(&vchan->desc_issued, struct virt_dma_desc, node);
++
++ return container_of(vdesc, struct sf_pdma_desc, vdesc);
++}
++
+ static void sf_pdma_xfer_desc(struct sf_pdma_chan *chan)
+ {
+ struct sf_pdma_desc *desc = chan->desc;
+@@ -268,8 +275,11 @@ static void sf_pdma_issue_pending(struct dma_chan *dchan)
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+- if (vchan_issue_pending(&chan->vchan) && chan->desc)
++ if (!chan->desc && vchan_issue_pending(&chan->vchan)) {
++ /* vchan_issue_pending has made a check that desc in not NULL */
++ chan->desc = sf_pdma_get_first_pending_desc(chan);
+ sf_pdma_xfer_desc(chan);
++ }
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ }
+@@ -279,7 +289,7 @@ static void sf_pdma_free_desc(struct virt_dma_desc *vdesc)
+ struct sf_pdma_desc *desc;
+
+ desc = to_sf_pdma_desc(vdesc);
+- desc->in_use = false;
++ kfree(desc);
+ }
+
+ static void sf_pdma_donebh_tasklet(struct tasklet_struct *t)
+@@ -298,6 +308,11 @@ static void sf_pdma_donebh_tasklet(struct tasklet_struct *t)
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ list_del(&chan->desc->vdesc.node);
+ vchan_cookie_complete(&chan->desc->vdesc);
++
++ chan->desc = sf_pdma_get_first_pending_desc(chan);
++ if (chan->desc)
++ sf_pdma_xfer_desc(chan);
++
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ }
+
+diff --git a/drivers/dma/sf-pdma/sf-pdma.h b/drivers/dma/sf-pdma/sf-pdma.h
+index 0c20167b097d0..02a229a3ae225 100644
+--- a/drivers/dma/sf-pdma/sf-pdma.h
++++ b/drivers/dma/sf-pdma/sf-pdma.h
+@@ -82,7 +82,6 @@ struct sf_pdma_desc {
+ u64 src_addr;
+ struct virt_dma_desc vdesc;
+ struct sf_pdma_chan *chan;
+- bool in_use;
+ enum dma_transfer_direction dirn;
+ struct dma_async_tx_descriptor *async_tx;
+ };
+diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
+index 6885b3dcd7a97..f4c46b3b6d9d7 100644
+--- a/drivers/dma/sh/rcar-dmac.c
++++ b/drivers/dma/sh/rcar-dmac.c
+@@ -1868,8 +1868,13 @@ static int rcar_dmac_probe(struct platform_device *pdev)
+
+ dmac->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dmac);
+- dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK);
+- dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
++ ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK);
++ if (ret)
++ return ret;
++
++ ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
++ if (ret)
++ return ret;
+
+ ret = rcar_dmac_parse_of(&pdev->dev, dmac);
+ if (ret < 0)
+diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c
+index f9f30cbeccbe7..941a7ef475f4e 100644
+--- a/drivers/dma/sh/rz-dmac.c
++++ b/drivers/dma/sh/rz-dmac.c
+@@ -9,6 +9,7 @@
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+ */
+
++#include <linux/bitfield.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/dmaengine.h>
+ #include <linux/interrupt.h>
+@@ -143,8 +144,8 @@ struct rz_dmac {
+ #define CHCFG_REQD BIT(3)
+ #define CHCFG_SEL(bits) ((bits) & 0x07)
+ #define CHCFG_MEM_COPY (0x80400008)
+-#define CHCFG_FILL_DDS(a) (((a) << 16) & GENMASK(19, 16))
+-#define CHCFG_FILL_SDS(a) (((a) << 12) & GENMASK(15, 12))
++#define CHCFG_FILL_DDS_MASK GENMASK(19, 16)
++#define CHCFG_FILL_SDS_MASK GENMASK(15, 12)
+ #define CHCFG_FILL_TM(a) (((a) & BIT(5)) << 22)
+ #define CHCFG_FILL_AM(a) (((a) & GENMASK(4, 2)) << 6)
+ #define CHCFG_FILL_LVL(a) (((a) & BIT(1)) << 5)
+@@ -607,13 +608,15 @@ static int rz_dmac_config(struct dma_chan *chan,
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+- channel->chcfg |= CHCFG_FILL_DDS(val);
++ channel->chcfg &= ~CHCFG_FILL_DDS_MASK;
++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val);
+
+ val = rz_dmac_ds_to_val_mapping(config->src_addr_width);
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+- channel->chcfg |= CHCFG_FILL_SDS(val);
++ channel->chcfg &= ~CHCFG_FILL_SDS_MASK;
++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val);
+
+ return 0;
+ }
+diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
+index 4357d2395e6b7..60115d8d40832 100644
+--- a/drivers/dma/sprd-dma.c
++++ b/drivers/dma/sprd-dma.c
+@@ -1236,11 +1236,8 @@ static int sprd_dma_remove(struct platform_device *pdev)
+ {
+ struct sprd_dma_dev *sdev = platform_get_drvdata(pdev);
+ struct sprd_dma_chn *c, *cn;
+- int ret;
+
+- ret = pm_runtime_get_sync(&pdev->dev);
+- if (ret < 0)
+- return ret;
++ pm_runtime_get_sync(&pdev->dev);
+
+ /* explicitly free the irq */
+ if (sdev->irq > 0)
+diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c
+index 962b6e05287b5..d95c421877fb7 100644
+--- a/drivers/dma/st_fdma.c
++++ b/drivers/dma/st_fdma.c
+@@ -874,4 +874,4 @@ MODULE_LICENSE("GPL v2");
+ MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver");
+ MODULE_AUTHOR("Ludovic.barre <Ludovic.barre@st.com>");
+ MODULE_AUTHOR("Peter Griffin <peter.griffin@linaro.org>");
+-MODULE_ALIAS("platform: " DRIVER_NAME);
++MODULE_ALIAS("platform:" DRIVER_NAME);
+diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
+index e1827393143f1..cb6b0e9ed5adc 100644
+--- a/drivers/dma/ste_dma40.c
++++ b/drivers/dma/ste_dma40.c
+@@ -3597,6 +3597,10 @@ static int __init d40_probe(struct platform_device *pdev)
+ spin_lock_init(&base->lcla_pool.lock);
+
+ base->irq = platform_get_irq(pdev, 0);
++ if (base->irq < 0) {
++ ret = base->irq;
++ goto destroy_cache;
++ }
+
+ ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
+ if (ret) {
+diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
+index 9063c727962ed..7dfc743ac4338 100644
+--- a/drivers/dma/stm32-dma.c
++++ b/drivers/dma/stm32-dma.c
+@@ -270,7 +270,6 @@ static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len,
+ u32 threshold)
+ {
+ enum dma_slave_buswidth max_width;
+- u64 addr = buf_addr;
+
+ if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL)
+ max_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+@@ -281,7 +280,7 @@ static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len,
+ max_width > DMA_SLAVE_BUSWIDTH_1_BYTE)
+ max_width = max_width >> 1;
+
+- if (do_div(addr, max_width))
++ if (buf_addr & (max_width - 1))
+ max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+
+ return max_width;
+@@ -753,8 +752,14 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
+ if (src_bus_width < 0)
+ return src_bus_width;
+
+- /* Set memory burst size */
+- src_maxburst = STM32_DMA_MAX_BURST;
++ /*
++ * Set memory burst size - burst not possible if address is not aligned on
++ * the address boundary equal to the size of the transfer
++ */
++ if (buf_addr & (buf_len - 1))
++ src_maxburst = 1;
++ else
++ src_maxburst = STM32_DMA_MAX_BURST;
+ src_best_burst = stm32_dma_get_best_burst(buf_len,
+ src_maxburst,
+ fifoth,
+@@ -803,8 +808,14 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
+ if (dst_bus_width < 0)
+ return dst_bus_width;
+
+- /* Set memory burst size */
+- dst_maxburst = STM32_DMA_MAX_BURST;
++ /*
++ * Set memory burst size - burst not possible if address is not aligned on
++ * the address boundary equal to the size of the transfer
++ */
++ if (buf_addr & (buf_len - 1))
++ dst_maxburst = 1;
++ else
++ dst_maxburst = STM32_DMA_MAX_BURST;
+ dst_best_burst = stm32_dma_get_best_burst(buf_len,
+ dst_maxburst,
+ fifoth,
+diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
+index a42164389ebc2..d5d55732adba1 100644
+--- a/drivers/dma/stm32-dmamux.c
++++ b/drivers/dma/stm32-dmamux.c
+@@ -292,10 +292,12 @@ static int stm32_dmamux_probe(struct platform_device *pdev)
+ ret = of_dma_router_register(node, stm32_dmamux_route_allocate,
+ &stm32_dmamux->dmarouter);
+ if (ret)
+- goto err_clk;
++ goto pm_disable;
+
+ return 0;
+
++pm_disable:
++ pm_runtime_disable(&pdev->dev);
+ err_clk:
+ clk_disable_unprepare(stm32_dmamux->clk);
+
+diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
+index 18cbd1e43c2e8..21a7bdc88970a 100644
+--- a/drivers/dma/stm32-mdma.c
++++ b/drivers/dma/stm32-mdma.c
+@@ -40,7 +40,6 @@
+ STM32_MDMA_SHIFT(mask))
+
+ #define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */
+-#define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */
+
+ /* MDMA Channel x interrupt/status register */
+ #define STM32_MDMA_CISR(x) (0x40 + 0x40 * (x)) /* x = 0..62 */
+@@ -184,7 +183,7 @@
+ #define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x))
+ #define STM32_MDMA_CTBR_DBUS BIT(17)
+ #define STM32_MDMA_CTBR_SBUS BIT(16)
+-#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0)
++#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0)
+ #define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \
+ STM32_MDMA_CTBR_TSEL_MASK)
+
+@@ -196,7 +195,7 @@
+
+ #define STM32_MDMA_MAX_BUF_LEN 128
+ #define STM32_MDMA_MAX_BLOCK_LEN 65536
+-#define STM32_MDMA_MAX_CHANNELS 63
++#define STM32_MDMA_MAX_CHANNELS 32
+ #define STM32_MDMA_MAX_REQUESTS 256
+ #define STM32_MDMA_MAX_BURST 128
+ #define STM32_MDMA_VERY_HIGH_PRIORITY 0x3
+@@ -1345,26 +1344,16 @@ static void stm32_mdma_xfer_end(struct stm32_mdma_chan *chan)
+ static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid)
+ {
+ struct stm32_mdma_device *dmadev = devid;
+- struct stm32_mdma_chan *chan = devid;
++ struct stm32_mdma_chan *chan;
+ u32 reg, id, ccr, ien, status;
+
+ /* Find out which channel generates the interrupt */
+ status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0);
+- if (status) {
+- id = __ffs(status);
+- } else {
+- status = readl_relaxed(dmadev->base + STM32_MDMA_GISR1);
+- if (!status) {
+- dev_dbg(mdma2dev(dmadev), "spurious it\n");
+- return IRQ_NONE;
+- }
+- id = __ffs(status);
+- /*
+- * As GISR0 provides status for channel id from 0 to 31,
+- * so GISR1 provides status for channel id from 32 to 62
+- */
+- id += 32;
++ if (!status) {
++ dev_dbg(mdma2dev(dmadev), "spurious it\n");
++ return IRQ_NONE;
+ }
++ id = __ffs(status);
+
+ chan = &dmadev->chan[id];
+ if (!chan) {
+diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
+index b1115a6d1935c..f4f722eacee2b 100644
+--- a/drivers/dma/tegra210-adma.c
++++ b/drivers/dma/tegra210-adma.c
+@@ -224,7 +224,7 @@ static int tegra_adma_init(struct tegra_adma *tdma)
+ int ret;
+
+ /* Clear any interrupts */
+- tdma_write(tdma, tdma->cdata->global_int_clear, 0x1);
++ tdma_write(tdma, tdma->cdata->ch_base_offset + tdma->cdata->global_int_clear, 0x1);
+
+ /* Assert soft reset */
+ tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1);
+@@ -867,7 +867,7 @@ static int tegra_adma_probe(struct platform_device *pdev)
+
+ pm_runtime_enable(&pdev->dev);
+
+- ret = pm_runtime_get_sync(&pdev->dev);
++ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0)
+ goto rpm_disable;
+
+diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c
+index 71d24fc07c003..f744ddbbbad7f 100644
+--- a/drivers/dma/ti/dma-crossbar.c
++++ b/drivers/dma/ti/dma-crossbar.c
+@@ -245,6 +245,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec,
+ if (dma_spec->args[0] >= xbar->xbar_requests) {
+ dev_err(&pdev->dev, "Invalid XBAR request number: %d\n",
+ dma_spec->args[0]);
++ put_device(&pdev->dev);
+ return ERR_PTR(-EINVAL);
+ }
+
+@@ -252,12 +253,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec,
+ dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+ if (!dma_spec->np) {
+ dev_err(&pdev->dev, "Can't get DMA master\n");
++ put_device(&pdev->dev);
+ return ERR_PTR(-EINVAL);
+ }
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ of_node_put(dma_spec->np);
++ put_device(&pdev->dev);
+ return ERR_PTR(-ENOMEM);
+ }
+
+@@ -268,6 +271,8 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec,
+ mutex_unlock(&xbar->mutex);
+ dev_err(&pdev->dev, "Run out of free DMA requests\n");
+ kfree(map);
++ of_node_put(dma_spec->np);
++ put_device(&pdev->dev);
+ return ERR_PTR(-ENOMEM);
+ }
+ set_bit(map->xbar_out, xbar->dma_inuse);
+diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
+index 4fdd9f06b7235..4f1aeb81e9c7f 100644
+--- a/drivers/dma/ti/k3-udma-glue.c
++++ b/drivers/dma/ti/k3-udma-glue.c
+@@ -299,6 +299,7 @@ struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
+ ret = device_register(&tx_chn->common.chan_dev);
+ if (ret) {
+ dev_err(dev, "Channel Device registration failed %d\n", ret);
++ put_device(&tx_chn->common.chan_dev);
+ tx_chn->common.chan_dev.parent = NULL;
+ goto err;
+ }
+@@ -917,6 +918,7 @@ k3_udma_glue_request_rx_chn_priv(struct device *dev, const char *name,
+ ret = device_register(&rx_chn->common.chan_dev);
+ if (ret) {
+ dev_err(dev, "Channel Device registration failed %d\n", ret);
++ put_device(&rx_chn->common.chan_dev);
+ rx_chn->common.chan_dev.parent = NULL;
+ goto err;
+ }
+@@ -1048,6 +1050,7 @@ k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name,
+ ret = device_register(&rx_chn->common.chan_dev);
+ if (ret) {
+ dev_err(dev, "Channel Device registration failed %d\n", ret);
++ put_device(&rx_chn->common.chan_dev);
+ rx_chn->common.chan_dev.parent = NULL;
+ goto err;
+ }
+diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c
+index aada84f40723c..3257b2f5157c3 100644
+--- a/drivers/dma/ti/k3-udma-private.c
++++ b/drivers/dma/ti/k3-udma-private.c
+@@ -31,14 +31,14 @@ struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property)
+ }
+
+ pdev = of_find_device_by_node(udma_node);
++ if (np != udma_node)
++ of_node_put(udma_node);
++
+ if (!pdev) {
+ pr_debug("UDMA device not found\n");
+ return ERR_PTR(-EPROBE_DEFER);
+ }
+
+- if (np != udma_node)
+- of_node_put(udma_node);
+-
+ ud = platform_get_drvdata(pdev);
+ if (!ud) {
+ pr_debug("UDMA has not been probed\n");
+diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
+index a35858610780c..d796e50dfe992 100644
+--- a/drivers/dma/ti/k3-udma.c
++++ b/drivers/dma/ti/k3-udma.c
+@@ -300,8 +300,6 @@ struct udma_chan {
+
+ struct udma_tx_drain tx_drain;
+
+- u32 bcnt; /* number of bytes completed since the start of the channel */
+-
+ /* Channel configuration parameters */
+ struct udma_chan_config config;
+
+@@ -757,6 +755,21 @@ static void udma_reset_rings(struct udma_chan *uc)
+ }
+ }
+
++static void udma_decrement_byte_counters(struct udma_chan *uc, u32 val)
++{
++ if (uc->desc->dir == DMA_DEV_TO_MEM) {
++ udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
++ udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
++ if (uc->config.ep_type != PSIL_EP_NATIVE)
++ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
++ } else {
++ udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
++ udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
++ if (!uc->bchan && uc->config.ep_type != PSIL_EP_NATIVE)
++ udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
++ }
++}
++
+ static void udma_reset_counters(struct udma_chan *uc)
+ {
+ u32 val;
+@@ -790,8 +803,6 @@ static void udma_reset_counters(struct udma_chan *uc)
+ val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+ }
+-
+- uc->bcnt = 0;
+ }
+
+ static int udma_reset_chan(struct udma_chan *uc, bool hard)
+@@ -1115,7 +1126,7 @@ static void udma_check_tx_completion(struct work_struct *work)
+ if (uc->desc) {
+ struct udma_desc *d = uc->desc;
+
+- uc->bcnt += d->residue;
++ udma_decrement_byte_counters(uc, d->residue);
+ udma_start(uc);
+ vchan_cookie_complete(&d->vd);
+ break;
+@@ -1168,7 +1179,7 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data)
+ vchan_cyclic_callback(&d->vd);
+ } else {
+ if (udma_is_desc_really_done(uc, d)) {
+- uc->bcnt += d->residue;
++ udma_decrement_byte_counters(uc, d->residue);
+ udma_start(uc);
+ vchan_cookie_complete(&d->vd);
+ } else {
+@@ -1204,7 +1215,7 @@ static irqreturn_t udma_udma_irq_handler(int irq, void *data)
+ vchan_cyclic_callback(&d->vd);
+ } else {
+ /* TODO: figure out the real amount of data */
+- uc->bcnt += d->residue;
++ udma_decrement_byte_counters(uc, d->residue);
+ udma_start(uc);
+ vchan_cookie_complete(&d->vd);
+ }
+@@ -1348,6 +1359,7 @@ static int bcdma_get_bchan(struct udma_chan *uc)
+ {
+ struct udma_dev *ud = uc->ud;
+ enum udma_tp_level tpl;
++ int ret;
+
+ if (uc->bchan) {
+ dev_dbg(ud->dev, "chan%d: already have bchan%d allocated\n",
+@@ -1365,8 +1377,11 @@ static int bcdma_get_bchan(struct udma_chan *uc)
+ tpl = ud->bchan_tpl.levels - 1;
+
+ uc->bchan = __udma_reserve_bchan(ud, tpl, -1);
+- if (IS_ERR(uc->bchan))
+- return PTR_ERR(uc->bchan);
++ if (IS_ERR(uc->bchan)) {
++ ret = PTR_ERR(uc->bchan);
++ uc->bchan = NULL;
++ return ret;
++ }
+
+ uc->tchan = uc->bchan;
+
+@@ -1376,6 +1391,7 @@ static int bcdma_get_bchan(struct udma_chan *uc)
+ static int udma_get_tchan(struct udma_chan *uc)
+ {
+ struct udma_dev *ud = uc->ud;
++ int ret;
+
+ if (uc->tchan) {
+ dev_dbg(ud->dev, "chan%d: already have tchan%d allocated\n",
+@@ -1390,8 +1406,11 @@ static int udma_get_tchan(struct udma_chan *uc)
+ */
+ uc->tchan = __udma_reserve_tchan(ud, uc->config.channel_tpl,
+ uc->config.mapped_channel_id);
+- if (IS_ERR(uc->tchan))
+- return PTR_ERR(uc->tchan);
++ if (IS_ERR(uc->tchan)) {
++ ret = PTR_ERR(uc->tchan);
++ uc->tchan = NULL;
++ return ret;
++ }
+
+ if (ud->tflow_cnt) {
+ int tflow_id;
+@@ -1421,6 +1440,7 @@ static int udma_get_tchan(struct udma_chan *uc)
+ static int udma_get_rchan(struct udma_chan *uc)
+ {
+ struct udma_dev *ud = uc->ud;
++ int ret;
+
+ if (uc->rchan) {
+ dev_dbg(ud->dev, "chan%d: already have rchan%d allocated\n",
+@@ -1435,8 +1455,13 @@ static int udma_get_rchan(struct udma_chan *uc)
+ */
+ uc->rchan = __udma_reserve_rchan(ud, uc->config.channel_tpl,
+ uc->config.mapped_channel_id);
++ if (IS_ERR(uc->rchan)) {
++ ret = PTR_ERR(uc->rchan);
++ uc->rchan = NULL;
++ return ret;
++ }
+
+- return PTR_ERR_OR_ZERO(uc->rchan);
++ return 0;
+ }
+
+ static int udma_get_chan_pair(struct udma_chan *uc)
+@@ -1490,6 +1515,7 @@ static int udma_get_chan_pair(struct udma_chan *uc)
+ static int udma_get_rflow(struct udma_chan *uc, int flow_id)
+ {
+ struct udma_dev *ud = uc->ud;
++ int ret;
+
+ if (!uc->rchan) {
+ dev_err(ud->dev, "chan%d: does not have rchan??\n", uc->id);
+@@ -1503,8 +1529,13 @@ static int udma_get_rflow(struct udma_chan *uc, int flow_id)
+ }
+
+ uc->rflow = __udma_get_rflow(ud, flow_id);
++ if (IS_ERR(uc->rflow)) {
++ ret = PTR_ERR(uc->rflow);
++ uc->rflow = NULL;
++ return ret;
++ }
+
+- return PTR_ERR_OR_ZERO(uc->rflow);
++ return 0;
+ }
+
+ static void bcdma_put_bchan(struct udma_chan *uc)
+@@ -3791,7 +3822,6 @@ static enum dma_status udma_tx_status(struct dma_chan *chan,
+ bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+ }
+
+- bcnt -= uc->bcnt;
+ if (bcnt && !(bcnt % uc->desc->residue))
+ residue = 0;
+ else
+diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c
+index d6b8a202474f4..290836b7e1be2 100644
+--- a/drivers/dma/uniphier-xdmac.c
++++ b/drivers/dma/uniphier-xdmac.c
+@@ -131,8 +131,9 @@ uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc)
+ static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc,
+ struct uniphier_xdmac_desc *xd)
+ {
+- u32 src_mode, src_addr, src_width;
+- u32 dst_mode, dst_addr, dst_width;
++ u32 src_mode, src_width;
++ u32 dst_mode, dst_width;
++ dma_addr_t src_addr, dst_addr;
+ u32 val, its, tnum;
+ enum dma_slave_buswidth buswidth;
+
+diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
+index a4450bc954665..edc2bb8f0523c 100644
+--- a/drivers/dma/xilinx/xilinx_dma.c
++++ b/drivers/dma/xilinx/xilinx_dma.c
+@@ -3037,9 +3037,10 @@ static int xilinx_dma_probe(struct platform_device *pdev)
+
+ /* Request and map I/O memory */
+ xdev->regs = devm_platform_ioremap_resource(pdev, 0);
+- if (IS_ERR(xdev->regs))
+- return PTR_ERR(xdev->regs);
+-
++ if (IS_ERR(xdev->regs)) {
++ err = PTR_ERR(xdev->regs);
++ goto disable_clks;
++ }
+ /* Retrieve the DMA engine properties from the device tree */
+ xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0);
+ xdev->s2mm_chan_id = xdev->dma_config->max_channels / 2;
+@@ -3067,7 +3068,7 @@ static int xilinx_dma_probe(struct platform_device *pdev)
+ if (err < 0) {
+ dev_err(xdev->dev,
+ "missing xlnx,num-fstores property\n");
+- return err;
++ goto disable_clks;
+ }
+
+ err = of_property_read_u32(node, "xlnx,flush-fsync",
+@@ -3087,7 +3088,11 @@ static int xilinx_dma_probe(struct platform_device *pdev)
+ xdev->ext_addr = false;
+
+ /* Set the dma mask bits */
+- dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width));
++ err = dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width));
++ if (err < 0) {
++ dev_err(xdev->dev, "DMA mask error %d\n", err);
++ goto disable_clks;
++ }
+
+ /* Initialize the DMA engine */
+ xdev->common.dev = &pdev->dev;
+@@ -3133,8 +3138,10 @@ static int xilinx_dma_probe(struct platform_device *pdev)
+ /* Initialize the channels */
+ for_each_child_of_node(node, child) {
+ err = xilinx_dma_child_probe(xdev, child);
+- if (err < 0)
+- goto disable_clks;
++ if (err < 0) {
++ of_node_put(child);
++ goto error;
++ }
+ }
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+@@ -3169,12 +3176,12 @@ static int xilinx_dma_probe(struct platform_device *pdev)
+
+ return 0;
+
+-disable_clks:
+- xdma_disable_allclks(xdev);
+ error:
+ for (i = 0; i < xdev->dma_config->max_channels; i++)
+ if (xdev->chan[i])
+ xilinx_dma_chan_remove(xdev->chan[i]);
++disable_clks:
++ xdma_disable_allclks(xdev);
+
+ return err;
+ }
+diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
+index b280a53e8570a..ce5c66e6897d2 100644
+--- a/drivers/dma/xilinx/xilinx_dpdma.c
++++ b/drivers/dma/xilinx/xilinx_dpdma.c
+@@ -271,9 +271,6 @@ struct xilinx_dpdma_device {
+ /* -----------------------------------------------------------------------------
+ * DebugFS
+ */
+-
+-#ifdef CONFIG_DEBUG_FS
+-
+ #define XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE 32
+ #define XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR "65535"
+
+@@ -299,7 +296,7 @@ struct xilinx_dpdma_debugfs_request {
+
+ static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan)
+ {
+- if (chan->id == dpdma_debugfs.chan_id)
++ if (IS_ENABLED(CONFIG_DEBUG_FS) && chan->id == dpdma_debugfs.chan_id)
+ dpdma_debugfs.xilinx_dpdma_irq_done_count++;
+ }
+
+@@ -462,16 +459,6 @@ static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev)
+ dev_err(xdev->dev, "Failed to create debugfs testcase file\n");
+ }
+
+-#else
+-static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev)
+-{
+-}
+-
+-static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan)
+-{
+-}
+-#endif /* CONFIG_DEBUG_FS */
+-
+ /* -----------------------------------------------------------------------------
+ * I/O Accessors
+ */
+diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
+index 97f02f8eb03a8..5257bdbf77fb0 100644
+--- a/drivers/dma/xilinx/zynqmp_dma.c
++++ b/drivers/dma/xilinx/zynqmp_dma.c
+@@ -232,7 +232,7 @@ struct zynqmp_dma_chan {
+ bool is_dmacoherent;
+ struct tasklet_struct tasklet;
+ bool idle;
+- u32 desc_size;
++ size_t desc_size;
+ bool err;
+ u32 bus_width;
+ u32 src_burst_len;
+@@ -489,7 +489,8 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
+ }
+
+ chan->desc_pool_v = dma_alloc_coherent(chan->dev,
+- (2 * chan->desc_size * ZYNQMP_DMA_NUM_DESCS),
++ (2 * ZYNQMP_DMA_DESC_SIZE(chan) *
++ ZYNQMP_DMA_NUM_DESCS),
+ &chan->desc_pool_p, GFP_KERNEL);
+ if (!chan->desc_pool_v)
+ return -ENOMEM;
+diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
+index 3a6d2416cb0f6..5dd29789f97d3 100644
+--- a/drivers/edac/altera_edac.c
++++ b/drivers/edac/altera_edac.c
+@@ -350,7 +350,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
+ if (irq < 0) {
+ edac_printk(KERN_ERR, EDAC_MC,
+ "No irq %d in DT\n", irq);
+- return -ENODEV;
++ return irq;
+ }
+
+ /* Arria10 has a 2nd IRQ */
+diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
+index 99b06a3e8fb12..4fce75013674f 100644
+--- a/drivers/edac/amd64_edac.c
++++ b/drivers/edac/amd64_edac.c
+@@ -1065,12 +1065,14 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
+ #define CS_ODD_PRIMARY BIT(1)
+ #define CS_EVEN_SECONDARY BIT(2)
+ #define CS_ODD_SECONDARY BIT(3)
++#define CS_3R_INTERLEAVE BIT(4)
+
+ #define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY)
+ #define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY)
+
+ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
+ {
++ u8 base, count = 0;
+ int cs_mode = 0;
+
+ if (csrow_enabled(2 * dimm, ctrl, pvt))
+@@ -1083,6 +1085,20 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
+ if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
+ cs_mode |= CS_ODD_SECONDARY;
+
++ /*
++ * 3 Rank inteleaving support.
++ * There should be only three bases enabled and their two masks should
++ * be equal.
++ */
++ for_each_chip_select(base, ctrl, pvt)
++ count += csrow_enabled(base, ctrl, pvt);
++
++ if (count == 3 &&
++ pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) {
++ edac_dbg(1, "3R interleaving in use.\n");
++ cs_mode |= CS_3R_INTERLEAVE;
++ }
++
+ return cs_mode;
+ }
+
+@@ -1891,10 +1907,14 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
++ *
++ * In the special 3 Rank interleaving case, a single bit is flipped
++ * without swapping with the most significant bit. This can be handled
++ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+- num_zero_bits = msb - weight;
++ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c
+index b8a7d9594afd4..1fa5ca57e9ec1 100644
+--- a/drivers/edac/dmc520_edac.c
++++ b/drivers/edac/dmc520_edac.c
+@@ -489,7 +489,7 @@ static int dmc520_edac_probe(struct platform_device *pdev)
+ dev = &pdev->dev;
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+- irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name);
++ irq = platform_get_irq_byname_optional(pdev, dmc520_irq_configs[idx].name);
+ irqs[idx] = irq;
+ masks[idx] = dmc520_irq_configs[idx].mask;
+ if (irq >= 0) {
+diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
+index 8c4d947fb8486..85c229985f905 100644
+--- a/drivers/edac/edac_device.c
++++ b/drivers/edac/edac_device.c
+@@ -34,6 +34,9 @@
+ static DEFINE_MUTEX(device_ctls_mutex);
+ static LIST_HEAD(edac_device_list);
+
++/* Default workqueue processing interval on this instance, in msecs */
++#define DEFAULT_POLL_INTERVAL 1000
++
+ #ifdef CONFIG_EDAC_DEBUG
+ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
+ {
+@@ -366,7 +369,7 @@ static void edac_device_workq_function(struct work_struct *work_req)
+ * whole one second to save timers firing all over the period
+ * between integral seconds
+ */
+- if (edac_dev->poll_msec == 1000)
++ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
+ edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
+ else
+ edac_queue_work(&edac_dev->work, edac_dev->delay);
+@@ -396,7 +399,7 @@ static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
+ * timers firing on sub-second basis, while they are happy
+ * to fire together on the 1 second exactly
+ */
+- if (edac_dev->poll_msec == 1000)
++ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
+ edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
+ else
+ edac_queue_work(&edac_dev->work, edac_dev->delay);
+@@ -424,17 +427,16 @@ static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev)
+ * Then restart the workq on the new delay
+ */
+ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
+- unsigned long value)
++ unsigned long msec)
+ {
+- unsigned long jiffs = msecs_to_jiffies(value);
+-
+- if (value == 1000)
+- jiffs = round_jiffies_relative(value);
+-
+- edac_dev->poll_msec = value;
+- edac_dev->delay = jiffs;
++ edac_dev->poll_msec = msec;
++ edac_dev->delay = msecs_to_jiffies(msec);
+
+- edac_mod_work(&edac_dev->work, jiffs);
++ /* See comment in edac_device_workq_setup() above */
++ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
++ edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
++ else
++ edac_mod_work(&edac_dev->work, edac_dev->delay);
+ }
+
+ int edac_device_alloc_index(void)
+@@ -473,11 +475,7 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
+ /* This instance is NOW RUNNING */
+ edac_dev->op_state = OP_RUNNING_POLL;
+
+- /*
+- * enable workq processing on this instance,
+- * default = 1000 msec
+- */
+- edac_device_workq_setup(edac_dev, 1000);
++ edac_device_workq_setup(edac_dev, edac_dev->poll_msec ?: DEFAULT_POLL_INTERVAL);
+ } else {
+ edac_dev->op_state = OP_RUNNING_INTERRUPT;
+ }
+diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
+index 2c5975674723a..a859ddd9d4a13 100644
+--- a/drivers/edac/edac_mc.c
++++ b/drivers/edac/edac_mc.c
+@@ -215,7 +215,7 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
+ else
+ return (char *)ptr;
+
+- r = (unsigned long)p % align;
++ r = (unsigned long)ptr % align;
+
+ if (r == 0)
+ return (char *)ptr;
+diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
+index aa1f91688eb8e..841d238bc3f18 100644
+--- a/drivers/edac/edac_module.h
++++ b/drivers/edac/edac_module.h
+@@ -56,7 +56,7 @@ bool edac_stop_work(struct delayed_work *work);
+ bool edac_mod_work(struct delayed_work *work, unsigned long delay);
+
+ extern void edac_device_reset_delay_period(struct edac_device_ctl_info
+- *edac_dev, unsigned long value);
++ *edac_dev, unsigned long msec);
+ extern void edac_mc_reset_delay_period(unsigned long value);
+
+ extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
+diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
+index 6d1ddecbf0da3..d0a9ccf640c4b 100644
+--- a/drivers/edac/ghes_edac.c
++++ b/drivers/edac/ghes_edac.c
+@@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
+
+ dmi_memdev_name(handle, &bank, &device);
+
+- /* both strings must be non-zero */
+- if (bank && *bank && device && *device)
+- snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
++ /*
++ * Set to a NULL string when both bank and device are zero. In this case,
++ * the label assigned by default will be preserved.
++ */
++ snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
++ (bank && *bank) ? bank : "",
++ (bank && *bank && device && *device) ? " " : "",
++ (device && *device) ? device : "");
+ }
+
+ static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
+diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c
+index 61b76ec226af1..19fba258ae108 100644
+--- a/drivers/edac/highbank_mc_edac.c
++++ b/drivers/edac/highbank_mc_edac.c
+@@ -174,8 +174,10 @@ static int highbank_mc_probe(struct platform_device *pdev)
+ drvdata = mci->pvt_info;
+ platform_set_drvdata(pdev, mci);
+
+- if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL))
+- return -ENOMEM;
++ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
++ res = -ENOMEM;
++ goto free;
++ }
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r) {
+@@ -243,6 +245,7 @@ err2:
+ edac_mc_del_mc(&pdev->dev);
+ err:
+ devres_release_group(&pdev->dev, NULL);
++free:
+ edac_mc_free(mci);
+ return res;
+ }
+diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
+index 83345bfac246f..e0af60833d28c 100644
+--- a/drivers/edac/i10nm_base.c
++++ b/drivers/edac/i10nm_base.c
+@@ -198,11 +198,10 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
+ if (unlikely(pci_enable_device(pdev) < 0)) {
+ edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
+ bus, dev, fun);
++ pci_dev_put(pdev);
+ return NULL;
+ }
+
+- pci_dev_get(pdev);
+-
+ return pdev;
+ }
+
+@@ -358,6 +357,9 @@ static int i10nm_get_hbm_munits(void)
+
+ mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
+ if (!mbase) {
++ pci_dev_put(d->imc[lmc].mdev);
++ d->imc[lmc].mdev = NULL;
++
+ i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
+ base + off);
+ return -ENOMEM;
+@@ -368,6 +370,12 @@ static int i10nm_get_hbm_munits(void)
+
+ mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
+ if (!I10NM_IS_HBM_IMC(mcmtr)) {
++ iounmap(d->imc[lmc].mbase);
++ d->imc[lmc].mbase = NULL;
++ d->imc[lmc].hbm_mc = false;
++ pci_dev_put(d->imc[lmc].mdev);
++ d->imc[lmc].mdev = NULL;
++
+ i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
+ return -ENODEV;
+ }
+diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
+index a07bbfd075d06..8ec70da8d84fe 100644
+--- a/drivers/edac/igen6_edac.c
++++ b/drivers/edac/igen6_edac.c
+@@ -27,7 +27,7 @@
+ #include "edac_mc.h"
+ #include "edac_module.h"
+
+-#define IGEN6_REVISION "v2.5"
++#define IGEN6_REVISION "v2.5.1"
+
+ #define EDAC_MOD_STR "igen6_edac"
+ #define IGEN6_NMI_NAME "igen6_ibecc"
+@@ -1216,9 +1216,6 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ INIT_WORK(&ecclog_work, ecclog_work_cb);
+ init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
+
+- /* Check if any pending errors before registering the NMI handler */
+- ecclog_handler();
+-
+ rc = register_err_handler();
+ if (rc)
+ goto fail3;
+@@ -1230,6 +1227,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ goto fail4;
+ }
+
++ /* Check if any pending errors before/during the registration of the error handler */
++ ecclog_handler();
++
+ igen6_debug_setup();
+ return 0;
+ fail4:
+diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
+index 97a27e42dd610..c45519f59dc11 100644
+--- a/drivers/edac/qcom_edac.c
++++ b/drivers/edac/qcom_edac.c
+@@ -252,7 +252,7 @@ clear:
+ static int
+ dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
+ {
+- struct llcc_drv_data *drv = edev_ctl->pvt_info;
++ struct llcc_drv_data *drv = edev_ctl->dev->platform_data;
+ int ret;
+
+ ret = dump_syn_reg_values(drv, bank, err_type);
+@@ -289,7 +289,7 @@ static irqreturn_t
+ llcc_ecc_irq_handler(int irq, void *edev_ctl)
+ {
+ struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
+- struct llcc_drv_data *drv = edac_dev_ctl->pvt_info;
++ struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data;
+ irqreturn_t irq_rc = IRQ_NONE;
+ u32 drp_error, trp_error, i;
+ int ret;
+@@ -358,7 +358,6 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
+ edev_ctl->dev_name = dev_name(dev);
+ edev_ctl->ctl_name = "llcc";
+ edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
+- edev_ctl->pvt_info = llcc_driv_data;
+
+ rc = edac_device_add_device(edev_ctl);
+ if (rc)
+diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
+index 4c626fcd4dcbb..1522d4aa2ca62 100644
+--- a/drivers/edac/sb_edac.c
++++ b/drivers/edac/sb_edac.c
+@@ -1052,7 +1052,7 @@ static u64 haswell_get_tohm(struct sbridge_pvt *pvt)
+ pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_1, &reg);
+ rc = ((reg << 6) | rc) << 26;
+
+- return rc | 0x1ffffff;
++ return rc | 0x3ffffff;
+ }
+
+ static u64 knl_get_tolm(struct sbridge_pvt *pvt)
+diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
+index 1abc020d49ab6..984c93c8825f0 100644
+--- a/drivers/edac/skx_base.c
++++ b/drivers/edac/skx_base.c
+@@ -510,7 +510,7 @@ rir_found:
+ }
+
+ static u8 skx_close_row[] = {
+- 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
++ 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33, 34
+ };
+
+ static u8 skx_close_column[] = {
+@@ -518,7 +518,7 @@ static u8 skx_close_column[] = {
+ };
+
+ static u8 skx_open_row[] = {
+- 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
++ 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34
+ };
+
+ static u8 skx_open_column[] = {
+diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
+index 7d08627e738b3..8557781bb8dce 100644
+--- a/drivers/edac/synopsys_edac.c
++++ b/drivers/edac/synopsys_edac.c
+@@ -163,6 +163,11 @@
+ #define ECC_STAT_CECNT_SHIFT 8
+ #define ECC_STAT_BITNUM_MASK 0x7F
+
++/* ECC error count register definitions */
++#define ECC_ERRCNT_UECNT_MASK 0xFFFF0000
++#define ECC_ERRCNT_UECNT_SHIFT 16
++#define ECC_ERRCNT_CECNT_MASK 0xFFFF
++
+ /* DDR QOS Interrupt register definitions */
+ #define DDR_QOS_IRQ_STAT_OFST 0x20200
+ #define DDR_QOSUE_MASK 0x4
+@@ -418,15 +423,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
+ base = priv->baseaddr;
+ p = &priv->stat;
+
++ regval = readl(base + ECC_ERRCNT_OFST);
++ p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK;
++ p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT;
++ if (!p->ce_cnt)
++ goto ue_err;
++
+ regval = readl(base + ECC_STAT_OFST);
+ if (!regval)
+ return 1;
+
+- p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
+- p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
+- if (!p->ce_cnt)
+- goto ue_err;
+-
+ p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
+
+ regval = readl(base + ECC_CEADDR0_OFST);
+@@ -1352,8 +1358,7 @@ static int mc_probe(struct platform_device *pdev)
+ }
+ }
+
+- if (of_device_is_compatible(pdev->dev.of_node,
+- "xlnx,zynqmp-ddrc-2.40a"))
++ if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT)
+ setup_address_map(priv);
+ #endif
+
+diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
+index 2ccd1db5e98ff..7197f9fa02457 100644
+--- a/drivers/edac/xgene_edac.c
++++ b/drivers/edac/xgene_edac.c
+@@ -1919,7 +1919,7 @@ static int xgene_edac_probe(struct platform_device *pdev)
+ irq = platform_get_irq_optional(pdev, i);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "No IRQ resource\n");
+- rc = -EINVAL;
++ rc = irq;
+ goto out_err;
+ }
+ rc = devm_request_irq(&pdev->dev, irq,
+diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
+index c69d40ae5619a..7684b3afa6304 100644
+--- a/drivers/extcon/Kconfig
++++ b/drivers/extcon/Kconfig
+@@ -180,7 +180,7 @@ config EXTCON_USBC_CROS_EC
+
+ config EXTCON_USBC_TUSB320
+ tristate "TI TUSB320 USB-C extcon support"
+- depends on I2C
++ depends on I2C && TYPEC
+ select REGMAP_I2C
+ help
+ Say Y here to enable support for USB Type C cable detection extcon
+diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c
+index fdb31954cf2b6..8073bc7d3e615 100644
+--- a/drivers/extcon/extcon-axp288.c
++++ b/drivers/extcon/extcon-axp288.c
+@@ -375,8 +375,8 @@ static int axp288_extcon_probe(struct platform_device *pdev)
+ if (adev) {
+ info->id_extcon = extcon_get_extcon_dev(acpi_dev_name(adev));
+ put_device(&adev->dev);
+- if (!info->id_extcon)
+- return -EPROBE_DEFER;
++ if (IS_ERR(info->id_extcon))
++ return PTR_ERR(info->id_extcon);
+
+ dev_info(dev, "controlling USB role\n");
+ } else {
+diff --git a/drivers/extcon/extcon-ptn5150.c b/drivers/extcon/extcon-ptn5150.c
+index 5b9a3cf8df268..2a7874108df87 100644
+--- a/drivers/extcon/extcon-ptn5150.c
++++ b/drivers/extcon/extcon-ptn5150.c
+@@ -194,6 +194,13 @@ static int ptn5150_init_dev_type(struct ptn5150_info *info)
+ return 0;
+ }
+
++static void ptn5150_work_sync_and_put(void *data)
++{
++ struct ptn5150_info *info = data;
++
++ cancel_work_sync(&info->irq_work);
++}
++
+ static int ptn5150_i2c_probe(struct i2c_client *i2c)
+ {
+ struct device *dev = &i2c->dev;
+@@ -284,6 +291,10 @@ static int ptn5150_i2c_probe(struct i2c_client *i2c)
+ if (ret)
+ return -EINVAL;
+
++ ret = devm_add_action_or_reset(dev, ptn5150_work_sync_and_put, info);
++ if (ret)
++ return ret;
++
+ /*
+ * Update current extcon state if for example OTG connection was there
+ * before the probe
+diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c
+index 805af73b41521..b408ce989c223 100644
+--- a/drivers/extcon/extcon-usbc-tusb320.c
++++ b/drivers/extcon/extcon-usbc-tusb320.c
+@@ -1,11 +1,12 @@
+ // SPDX-License-Identifier: GPL-2.0
+-/**
++/*
+ * drivers/extcon/extcon-tusb320.c - TUSB320 extcon driver
+ *
+ * Copyright (C) 2020 National Instruments Corporation
+ * Author: Michael Auchter <michael.auchter@ni.com>
+ */
+
++#include <linux/bitfield.h>
+ #include <linux/extcon-provider.h>
+ #include <linux/i2c.h>
+ #include <linux/init.h>
+@@ -13,21 +14,70 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/regmap.h>
++#include <linux/usb/typec.h>
++
++#define TUSB320_REG8 0x8
++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE GENMASK(7, 6)
++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE_USB 0x0
++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE_15A 0x1
++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE_30A 0x2
++#define TUSB320_REG8_CURRENT_MODE_DETECT GENMASK(5, 4)
++#define TUSB320_REG8_CURRENT_MODE_DETECT_DEF 0x0
++#define TUSB320_REG8_CURRENT_MODE_DETECT_MED 0x1
++#define TUSB320_REG8_CURRENT_MODE_DETECT_ACC 0x2
++#define TUSB320_REG8_CURRENT_MODE_DETECT_HI 0x3
++#define TUSB320_REG8_ACCESSORY_CONNECTED GENMASK(3, 2)
++#define TUSB320_REG8_ACCESSORY_CONNECTED_NONE 0x0
++#define TUSB320_REG8_ACCESSORY_CONNECTED_AUDIO 0x4
++#define TUSB320_REG8_ACCESSORY_CONNECTED_ACC 0x5
++#define TUSB320_REG8_ACCESSORY_CONNECTED_DEBUG 0x6
++#define TUSB320_REG8_ACTIVE_CABLE_DETECTION BIT(0)
+
+ #define TUSB320_REG9 0x9
+ #define TUSB320_REG9_ATTACHED_STATE_SHIFT 6
+ #define TUSB320_REG9_ATTACHED_STATE_MASK 0x3
+ #define TUSB320_REG9_CABLE_DIRECTION BIT(5)
+ #define TUSB320_REG9_INTERRUPT_STATUS BIT(4)
+-#define TUSB320_ATTACHED_STATE_NONE 0x0
+-#define TUSB320_ATTACHED_STATE_DFP 0x1
+-#define TUSB320_ATTACHED_STATE_UFP 0x2
+-#define TUSB320_ATTACHED_STATE_ACC 0x3
++
++#define TUSB320_REGA 0xa
++#define TUSB320L_REGA_DISABLE_TERM BIT(0)
++#define TUSB320_REGA_I2C_SOFT_RESET BIT(3)
++#define TUSB320_REGA_MODE_SELECT_SHIFT 4
++#define TUSB320_REGA_MODE_SELECT_MASK 0x3
++
++#define TUSB320L_REGA0_REVISION 0xa0
++
++enum tusb320_attached_state {
++ TUSB320_ATTACHED_STATE_NONE,
++ TUSB320_ATTACHED_STATE_DFP,
++ TUSB320_ATTACHED_STATE_UFP,
++ TUSB320_ATTACHED_STATE_ACC,
++};
++
++enum tusb320_mode {
++ TUSB320_MODE_PORT,
++ TUSB320_MODE_UFP,
++ TUSB320_MODE_DFP,
++ TUSB320_MODE_DRP,
++};
++
++struct tusb320_priv;
++
++struct tusb320_ops {
++ int (*set_mode)(struct tusb320_priv *priv, enum tusb320_mode mode);
++ int (*get_revision)(struct tusb320_priv *priv, unsigned int *revision);
++};
+
+ struct tusb320_priv {
+ struct device *dev;
+ struct regmap *regmap;
+ struct extcon_dev *edev;
++ struct tusb320_ops *ops;
++ enum tusb320_attached_state state;
++ struct typec_port *port;
++ struct typec_capability cap;
++ enum typec_port_type port_type;
++ enum typec_pwr_opmode pwr_opmode;
+ };
+
+ static const char * const tusb_attached_states[] = {
+@@ -62,19 +112,142 @@ static int tusb320_check_signature(struct tusb320_priv *priv)
+ return 0;
+ }
+
+-static irqreturn_t tusb320_irq_handler(int irq, void *dev_id)
++static int tusb320_set_mode(struct tusb320_priv *priv, enum tusb320_mode mode)
+ {
+- struct tusb320_priv *priv = dev_id;
+- int state, polarity;
+- unsigned reg;
++ int ret;
+
+- if (regmap_read(priv->regmap, TUSB320_REG9, &reg)) {
+- dev_err(priv->dev, "error during i2c read!\n");
+- return IRQ_NONE;
++ /* Mode cannot be changed while cable is attached */
++ if (priv->state != TUSB320_ATTACHED_STATE_NONE)
++ return -EBUSY;
++
++ /* Write mode */
++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA,
++ TUSB320_REGA_MODE_SELECT_MASK << TUSB320_REGA_MODE_SELECT_SHIFT,
++ mode << TUSB320_REGA_MODE_SELECT_SHIFT);
++ if (ret) {
++ dev_err(priv->dev, "failed to write mode: %d\n", ret);
++ return ret;
+ }
+
+- if (!(reg & TUSB320_REG9_INTERRUPT_STATUS))
+- return IRQ_NONE;
++ return 0;
++}
++
++static int tusb320l_set_mode(struct tusb320_priv *priv, enum tusb320_mode mode)
++{
++ int ret;
++
++ /* Disable CC state machine */
++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA,
++ TUSB320L_REGA_DISABLE_TERM, 1);
++ if (ret) {
++ dev_err(priv->dev,
++ "failed to disable CC state machine: %d\n", ret);
++ return ret;
++ }
++
++ /* Write mode */
++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA,
++ TUSB320_REGA_MODE_SELECT_MASK << TUSB320_REGA_MODE_SELECT_SHIFT,
++ mode << TUSB320_REGA_MODE_SELECT_SHIFT);
++ if (ret) {
++ dev_err(priv->dev, "failed to write mode: %d\n", ret);
++ goto err;
++ }
++
++ msleep(5);
++err:
++ /* Re-enable CC state machine */
++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA,
++ TUSB320L_REGA_DISABLE_TERM, 0);
++ if (ret)
++ dev_err(priv->dev,
++ "failed to re-enable CC state machine: %d\n", ret);
++
++ return ret;
++}
++
++static int tusb320_reset(struct tusb320_priv *priv)
++{
++ int ret;
++
++ /* Set mode to default (follow PORT pin) */
++ ret = priv->ops->set_mode(priv, TUSB320_MODE_PORT);
++ if (ret && ret != -EBUSY) {
++ dev_err(priv->dev,
++ "failed to set mode to PORT: %d\n", ret);
++ return ret;
++ }
++
++ /* Perform soft reset */
++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA,
++ TUSB320_REGA_I2C_SOFT_RESET, 1);
++ if (ret) {
++ dev_err(priv->dev,
++ "failed to write soft reset bit: %d\n", ret);
++ return ret;
++ }
++
++ /* Wait for chip to go through reset */
++ msleep(95);
++
++ return 0;
++}
++
++static int tusb320l_get_revision(struct tusb320_priv *priv, unsigned int *revision)
++{
++ return regmap_read(priv->regmap, TUSB320L_REGA0_REVISION, revision);
++}
++
++static struct tusb320_ops tusb320_ops = {
++ .set_mode = tusb320_set_mode,
++};
++
++static struct tusb320_ops tusb320l_ops = {
++ .set_mode = tusb320l_set_mode,
++ .get_revision = tusb320l_get_revision,
++};
++
++static int tusb320_set_adv_pwr_mode(struct tusb320_priv *priv)
++{
++ u8 mode;
++
++ if (priv->pwr_opmode == TYPEC_PWR_MODE_USB)
++ mode = TUSB320_REG8_CURRENT_MODE_ADVERTISE_USB;
++ else if (priv->pwr_opmode == TYPEC_PWR_MODE_1_5A)
++ mode = TUSB320_REG8_CURRENT_MODE_ADVERTISE_15A;
++ else if (priv->pwr_opmode == TYPEC_PWR_MODE_3_0A)
++ mode = TUSB320_REG8_CURRENT_MODE_ADVERTISE_30A;
++ else /* No other mode is supported. */
++ return -EINVAL;
++
++ return regmap_write_bits(priv->regmap, TUSB320_REG8,
++ TUSB320_REG8_CURRENT_MODE_ADVERTISE,
++ FIELD_PREP(TUSB320_REG8_CURRENT_MODE_ADVERTISE,
++ mode));
++}
++
++static int tusb320_port_type_set(struct typec_port *port,
++ enum typec_port_type type)
++{
++ struct tusb320_priv *priv = typec_get_drvdata(port);
++
++ if (type == TYPEC_PORT_SRC)
++ return priv->ops->set_mode(priv, TUSB320_MODE_DFP);
++ else if (type == TYPEC_PORT_SNK)
++ return priv->ops->set_mode(priv, TUSB320_MODE_UFP);
++ else if (type == TYPEC_PORT_DRP)
++ return priv->ops->set_mode(priv, TUSB320_MODE_DRP);
++ else
++ return priv->ops->set_mode(priv, TUSB320_MODE_PORT);
++}
++
++static const struct typec_operations tusb320_typec_ops = {
++ .port_type_set = tusb320_port_type_set,
++};
++
++static void tusb320_extcon_irq_handler(struct tusb320_priv *priv, u8 reg)
++{
++ int state, polarity;
+
+ state = (reg >> TUSB320_REG9_ATTACHED_STATE_SHIFT) &
+ TUSB320_REG9_ATTACHED_STATE_MASK;
+@@ -96,20 +269,170 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id)
+ extcon_sync(priv->edev, EXTCON_USB);
+ extcon_sync(priv->edev, EXTCON_USB_HOST);
+
++ priv->state = state;
++}
++
++static void tusb320_typec_irq_handler(struct tusb320_priv *priv, u8 reg9)
++{
++ struct typec_port *port = priv->port;
++ struct device *dev = priv->dev;
++ u8 mode, role, state;
++ int ret, reg8;
++ bool ori;
++
++ ori = reg9 & TUSB320_REG9_CABLE_DIRECTION;
++ typec_set_orientation(port, ori ? TYPEC_ORIENTATION_REVERSE :
++ TYPEC_ORIENTATION_NORMAL);
++
++ state = (reg9 >> TUSB320_REG9_ATTACHED_STATE_SHIFT) &
++ TUSB320_REG9_ATTACHED_STATE_MASK;
++ if (state == TUSB320_ATTACHED_STATE_DFP)
++ role = TYPEC_SOURCE;
++ else
++ role = TYPEC_SINK;
++
++ typec_set_vconn_role(port, role);
++ typec_set_pwr_role(port, role);
++ typec_set_data_role(port, role == TYPEC_SOURCE ?
++ TYPEC_HOST : TYPEC_DEVICE);
++
++ ret = regmap_read(priv->regmap, TUSB320_REG8, &reg8);
++ if (ret) {
++ dev_err(dev, "error during reg8 i2c read, ret=%d!\n", ret);
++ return;
++ }
++
++ mode = FIELD_GET(TUSB320_REG8_CURRENT_MODE_DETECT, reg8);
++ if (mode == TUSB320_REG8_CURRENT_MODE_DETECT_DEF)
++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_USB);
++ else if (mode == TUSB320_REG8_CURRENT_MODE_DETECT_MED)
++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_1_5A);
++ else if (mode == TUSB320_REG8_CURRENT_MODE_DETECT_HI)
++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_3_0A);
++ else /* Charge through accessory */
++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_USB);
++}
++
++static irqreturn_t tusb320_state_update_handler(struct tusb320_priv *priv,
++ bool force_update)
++{
++ unsigned int reg;
++
++ if (regmap_read(priv->regmap, TUSB320_REG9, &reg)) {
++ dev_err(priv->dev, "error during i2c read!\n");
++ return IRQ_NONE;
++ }
++
++ if (!force_update && !(reg & TUSB320_REG9_INTERRUPT_STATUS))
++ return IRQ_NONE;
++
++ tusb320_extcon_irq_handler(priv, reg);
++
++ /*
++ * Type-C support is optional. Only call the Type-C handler if a
++ * port had been registered previously.
++ */
++ if (priv->port)
++ tusb320_typec_irq_handler(priv, reg);
++
+ regmap_write(priv->regmap, TUSB320_REG9, reg);
+
+ return IRQ_HANDLED;
+ }
+
++static irqreturn_t tusb320_irq_handler(int irq, void *dev_id)
++{
++ struct tusb320_priv *priv = dev_id;
++
++ return tusb320_state_update_handler(priv, false);
++}
++
+ static const struct regmap_config tusb320_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ };
+
+-static int tusb320_extcon_probe(struct i2c_client *client,
+- const struct i2c_device_id *id)
++static int tusb320_extcon_probe(struct tusb320_priv *priv)
++{
++ int ret;
++
++ priv->edev = devm_extcon_dev_allocate(priv->dev, tusb320_extcon_cable);
++ if (IS_ERR(priv->edev)) {
++ dev_err(priv->dev, "failed to allocate extcon device\n");
++ return PTR_ERR(priv->edev);
++ }
++
++ ret = devm_extcon_dev_register(priv->dev, priv->edev);
++ if (ret < 0) {
++ dev_err(priv->dev, "failed to register extcon device\n");
++ return ret;
++ }
++
++ extcon_set_property_capability(priv->edev, EXTCON_USB,
++ EXTCON_PROP_USB_TYPEC_POLARITY);
++ extcon_set_property_capability(priv->edev, EXTCON_USB_HOST,
++ EXTCON_PROP_USB_TYPEC_POLARITY);
++
++ return 0;
++}
++
++static int tusb320_typec_probe(struct i2c_client *client,
++ struct tusb320_priv *priv)
++{
++ struct fwnode_handle *connector;
++ const char *cap_str;
++ int ret;
++
++ /* The Type-C connector is optional, for backward compatibility. */
++ connector = device_get_named_child_node(&client->dev, "connector");
++ if (!connector)
++ return 0;
++
++ /* Type-C connector found. */
++ ret = typec_get_fw_cap(&priv->cap, connector);
++ if (ret)
++ return ret;
++
++ priv->port_type = priv->cap.type;
++
++ /* This goes into register 0x8 field CURRENT_MODE_ADVERTISE */
++ ret = fwnode_property_read_string(connector, "typec-power-opmode", &cap_str);
++ if (ret)
++ return ret;
++
++ ret = typec_find_pwr_opmode(cap_str);
++ if (ret < 0)
++ return ret;
++ if (ret == TYPEC_PWR_MODE_PD)
++ return -EINVAL;
++
++ priv->pwr_opmode = ret;
++
++ /* Initialize the hardware with the devicetree settings. */
++ ret = tusb320_set_adv_pwr_mode(priv);
++ if (ret)
++ return ret;
++
++ priv->cap.revision = USB_TYPEC_REV_1_1;
++ priv->cap.accessory[0] = TYPEC_ACCESSORY_AUDIO;
++ priv->cap.accessory[1] = TYPEC_ACCESSORY_DEBUG;
++ priv->cap.orientation_aware = true;
++ priv->cap.driver_data = priv;
++ priv->cap.ops = &tusb320_typec_ops;
++ priv->cap.fwnode = connector;
++
++ priv->port = typec_register_port(&client->dev, &priv->cap);
++ if (IS_ERR(priv->port))
++ return PTR_ERR(priv->port);
++
++ return 0;
++}
++
++static int tusb320_probe(struct i2c_client *client)
+ {
+ struct tusb320_priv *priv;
++ const void *match_data;
++ unsigned int revision;
+ int ret;
+
+ priv = devm_kzalloc(&client->dev, sizeof(*priv), GFP_KERNEL);
+@@ -125,25 +448,42 @@ static int tusb320_extcon_probe(struct i2c_client *client,
+ if (ret)
+ return ret;
+
+- priv->edev = devm_extcon_dev_allocate(priv->dev, tusb320_extcon_cable);
+- if (IS_ERR(priv->edev)) {
+- dev_err(priv->dev, "failed to allocate extcon device\n");
+- return PTR_ERR(priv->edev);
++ match_data = device_get_match_data(&client->dev);
++ if (!match_data)
++ return -EINVAL;
++
++ priv->ops = (struct tusb320_ops*)match_data;
++
++ if (priv->ops->get_revision) {
++ ret = priv->ops->get_revision(priv, &revision);
++ if (ret)
++ dev_warn(priv->dev,
++ "failed to read revision register: %d\n", ret);
++ else
++ dev_info(priv->dev, "chip revision %d\n", revision);
+ }
+
+- ret = devm_extcon_dev_register(priv->dev, priv->edev);
+- if (ret < 0) {
+- dev_err(priv->dev, "failed to register extcon device\n");
++ ret = tusb320_extcon_probe(priv);
++ if (ret)
+ return ret;
+- }
+
+- extcon_set_property_capability(priv->edev, EXTCON_USB,
+- EXTCON_PROP_USB_TYPEC_POLARITY);
+- extcon_set_property_capability(priv->edev, EXTCON_USB_HOST,
+- EXTCON_PROP_USB_TYPEC_POLARITY);
++ ret = tusb320_typec_probe(client, priv);
++ if (ret)
++ return ret;
+
+ /* update initial state */
+- tusb320_irq_handler(client->irq, priv);
++ tusb320_state_update_handler(priv, true);
++
++ /* Reset chip to its default state */
++ ret = tusb320_reset(priv);
++ if (ret)
++ dev_warn(priv->dev, "failed to reset chip: %d\n", ret);
++ else
++ /*
++ * State and polarity might change after a reset, so update
++ * them again and make sure the interrupt status bit is cleared.
++ */
++ tusb320_state_update_handler(priv, true);
+
+ ret = devm_request_threaded_irq(priv->dev, client->irq, NULL,
+ tusb320_irq_handler,
+@@ -154,13 +494,14 @@ static int tusb320_extcon_probe(struct i2c_client *client,
+ }
+
+ static const struct of_device_id tusb320_extcon_dt_match[] = {
+- { .compatible = "ti,tusb320", },
++ { .compatible = "ti,tusb320", .data = &tusb320_ops, },
++ { .compatible = "ti,tusb320l", .data = &tusb320l_ops, },
+ { }
+ };
+ MODULE_DEVICE_TABLE(of, tusb320_extcon_dt_match);
+
+ static struct i2c_driver tusb320_extcon_driver = {
+- .probe = tusb320_extcon_probe,
++ .probe_new = tusb320_probe,
+ .driver = {
+ .name = "extcon-tusb320",
+ .of_match_table = tusb320_extcon_dt_match,
+diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
+index e7a9561a826d3..6a0d55d627ad0 100644
+--- a/drivers/extcon/extcon.c
++++ b/drivers/extcon/extcon.c
+@@ -196,6 +196,14 @@ static const struct __extcon_info {
+ * @attr_name: "name" sysfs entry
+ * @attr_state: "state" sysfs entry
+ * @attrs: the array pointing to attr_name and attr_state for attr_g
++ * @usb_propval: the array of USB connector properties
++ * @chg_propval: the array of charger connector properties
++ * @jack_propval: the array of jack connector properties
++ * @disp_propval: the array of display connector properties
++ * @usb_bits: the bit array of the USB connector property capabilities
++ * @chg_bits: the bit array of the charger connector property capabilities
++ * @jack_bits: the bit array of the jack connector property capabilities
++ * @disp_bits: the bit array of the display connector property capabilities
+ */
+ struct extcon_cable {
+ struct extcon_dev *edev;
+@@ -863,6 +871,8 @@ EXPORT_SYMBOL_GPL(extcon_set_property_capability);
+ * @extcon_name: the extcon name provided with extcon_dev_register()
+ *
+ * Return the pointer of extcon device if success or ERR_PTR(err) if fail.
++ * NOTE: This function returns -EPROBE_DEFER so it may only be called from
++ * probe() functions.
+ */
+ struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name)
+ {
+@@ -876,7 +886,7 @@ struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name)
+ if (!strcmp(sd->name, extcon_name))
+ goto out;
+ }
+- sd = NULL;
++ sd = ERR_PTR(-EPROBE_DEFER);
+ out:
+ mutex_unlock(&extcon_dev_list_lock);
+ return sd;
+@@ -1230,19 +1240,14 @@ int extcon_dev_register(struct extcon_dev *edev)
+ edev->dev.type = &edev->extcon_dev_type;
+ }
+
+- ret = device_register(&edev->dev);
+- if (ret) {
+- put_device(&edev->dev);
+- goto err_dev;
+- }
+-
+ spin_lock_init(&edev->lock);
+- edev->nh = devm_kcalloc(&edev->dev, edev->max_supported,
+- sizeof(*edev->nh), GFP_KERNEL);
+- if (!edev->nh) {
+- ret = -ENOMEM;
+- device_unregister(&edev->dev);
+- goto err_dev;
++ if (edev->max_supported) {
++ edev->nh = kcalloc(edev->max_supported, sizeof(*edev->nh),
++ GFP_KERNEL);
++ if (!edev->nh) {
++ ret = -ENOMEM;
++ goto err_alloc_nh;
++ }
+ }
+
+ for (index = 0; index < edev->max_supported; index++)
+@@ -1253,6 +1258,12 @@ int extcon_dev_register(struct extcon_dev *edev)
+ dev_set_drvdata(&edev->dev, edev);
+ edev->state = 0;
+
++ ret = device_register(&edev->dev);
++ if (ret) {
++ put_device(&edev->dev);
++ goto err_dev;
++ }
++
+ mutex_lock(&extcon_dev_list_lock);
+ list_add(&edev->entry, &extcon_dev_list);
+ mutex_unlock(&extcon_dev_list_lock);
+@@ -1260,6 +1271,9 @@ int extcon_dev_register(struct extcon_dev *edev)
+ return 0;
+
+ err_dev:
++ if (edev->max_supported)
++ kfree(edev->nh);
++err_alloc_nh:
+ if (edev->max_supported)
+ kfree(edev->extcon_dev_type.groups);
+ err_alloc_groups:
+@@ -1320,6 +1334,7 @@ void extcon_dev_unregister(struct extcon_dev *edev)
+ if (edev->max_supported) {
+ kfree(edev->extcon_dev_type.groups);
+ kfree(edev->cables);
++ kfree(edev->nh);
+ }
+
+ put_device(&edev->dev);
+diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
+index 54be88167c60b..f3b3953cac834 100644
+--- a/drivers/firewire/core-card.c
++++ b/drivers/firewire/core-card.c
+@@ -668,6 +668,7 @@ EXPORT_SYMBOL_GPL(fw_card_release);
+ void fw_core_remove_card(struct fw_card *card)
+ {
+ struct fw_card_driver dummy_driver = dummy_driver_template;
++ unsigned long flags;
+
+ card->driver->update_phy_reg(card, 4,
+ PHY_LINK_ACTIVE | PHY_CONTENDER, 0);
+@@ -682,7 +683,9 @@ void fw_core_remove_card(struct fw_card *card)
+ dummy_driver.stop_iso = card->driver->stop_iso;
+ card->driver = &dummy_driver;
+
++ spin_lock_irqsave(&card->lock, flags);
+ fw_destroy_nodes(card);
++ spin_unlock_irqrestore(&card->lock, flags);
+
+ /* Wait for all users, especially device workqueue jobs, to finish. */
+ fw_card_put(card);
+diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
+index fb6c651214f32..16ea847ade5fd 100644
+--- a/drivers/firewire/core-cdev.c
++++ b/drivers/firewire/core-cdev.c
+@@ -818,8 +818,10 @@ static int ioctl_send_response(struct client *client, union ioctl_arg *arg)
+
+ r = container_of(resource, struct inbound_transaction_resource,
+ resource);
+- if (is_fcp_request(r->request))
++ if (is_fcp_request(r->request)) {
++ kfree(r->data);
+ goto out;
++ }
+
+ if (a->length != fw_get_response_length(r->request)) {
+ ret = -EINVAL;
+@@ -1480,6 +1482,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet,
+ {
+ struct outbound_phy_packet_event *e =
+ container_of(packet, struct outbound_phy_packet_event, p);
++ struct client *e_client;
+
+ switch (status) {
+ /* expected: */
+@@ -1496,9 +1499,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet,
+ }
+ e->phy_packet.data[0] = packet->timestamp;
+
++ e_client = e->client;
+ queue_event(e->client, &e->event, &e->phy_packet,
+ sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0);
+- client_put(e->client);
++ client_put(e_client);
+ }
+
+ static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg)
+diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
+index b63d55f5ebd33..f40c815343812 100644
+--- a/drivers/firewire/core-topology.c
++++ b/drivers/firewire/core-topology.c
+@@ -375,16 +375,13 @@ static void report_found_node(struct fw_card *card,
+ card->bm_retries = 0;
+ }
+
++/* Must be called with card->lock held */
+ void fw_destroy_nodes(struct fw_card *card)
+ {
+- unsigned long flags;
+-
+- spin_lock_irqsave(&card->lock, flags);
+ card->color++;
+ if (card->local_node != NULL)
+ for_each_fw_node(card, card->local_node, report_lost_node);
+ card->local_node = NULL;
+- spin_unlock_irqrestore(&card->lock, flags);
+ }
+
+ static void move_tree(struct fw_node *node0, struct fw_node *node1, int port)
+@@ -510,6 +507,8 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
+ struct fw_node *local_node;
+ unsigned long flags;
+
++ spin_lock_irqsave(&card->lock, flags);
++
+ /*
+ * If the selfID buffer is not the immediate successor of the
+ * previously processed one, we cannot reliably compare the
+@@ -521,8 +520,6 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
+ card->bm_retries = 0;
+ }
+
+- spin_lock_irqsave(&card->lock, flags);
+-
+ card->broadcast_channel_allocated = card->broadcast_channel_auto_allocated;
+ card->node_id = node_id;
+ /*
+diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
+index ac487c96bb717..6c20815cc8d16 100644
+--- a/drivers/firewire/core-transaction.c
++++ b/drivers/firewire/core-transaction.c
+@@ -73,24 +73,25 @@ static int try_cancel_split_timeout(struct fw_transaction *t)
+ static int close_transaction(struct fw_transaction *transaction,
+ struct fw_card *card, int rcode)
+ {
+- struct fw_transaction *t;
++ struct fw_transaction *t = NULL, *iter;
+ unsigned long flags;
+
+ spin_lock_irqsave(&card->lock, flags);
+- list_for_each_entry(t, &card->transaction_list, link) {
+- if (t == transaction) {
+- if (!try_cancel_split_timeout(t)) {
++ list_for_each_entry(iter, &card->transaction_list, link) {
++ if (iter == transaction) {
++ if (!try_cancel_split_timeout(iter)) {
+ spin_unlock_irqrestore(&card->lock, flags);
+ goto timed_out;
+ }
+- list_del_init(&t->link);
+- card->tlabel_mask &= ~(1ULL << t->tlabel);
++ list_del_init(&iter->link);
++ card->tlabel_mask &= ~(1ULL << iter->tlabel);
++ t = iter;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&card->lock, flags);
+
+- if (&t->link != &card->transaction_list) {
++ if (t) {
+ t->callback(card, rcode, NULL, 0, t->callback_data);
+ return 0;
+ }
+@@ -935,7 +936,7 @@ EXPORT_SYMBOL(fw_core_handle_request);
+
+ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
+ {
+- struct fw_transaction *t;
++ struct fw_transaction *t = NULL, *iter;
+ unsigned long flags;
+ u32 *data;
+ size_t data_length;
+@@ -947,20 +948,21 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
+ rcode = HEADER_GET_RCODE(p->header[1]);
+
+ spin_lock_irqsave(&card->lock, flags);
+- list_for_each_entry(t, &card->transaction_list, link) {
+- if (t->node_id == source && t->tlabel == tlabel) {
+- if (!try_cancel_split_timeout(t)) {
++ list_for_each_entry(iter, &card->transaction_list, link) {
++ if (iter->node_id == source && iter->tlabel == tlabel) {
++ if (!try_cancel_split_timeout(iter)) {
+ spin_unlock_irqrestore(&card->lock, flags);
+ goto timed_out;
+ }
+- list_del_init(&t->link);
+- card->tlabel_mask &= ~(1ULL << t->tlabel);
++ list_del_init(&iter->link);
++ card->tlabel_mask &= ~(1ULL << iter->tlabel);
++ t = iter;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&card->lock, flags);
+
+- if (&t->link == &card->transaction_list) {
++ if (!t) {
+ timed_out:
+ fw_notice(card, "unsolicited response (source %x, tlabel %x)\n",
+ source, tlabel);
+diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
+index 4c3fd2eed1da4..beba0a56bb9ae 100644
+--- a/drivers/firewire/net.c
++++ b/drivers/firewire/net.c
+@@ -488,7 +488,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
+ struct sk_buff *skb, u16 source_node_id,
+ bool is_broadcast, u16 ether_type)
+ {
+- int status;
++ int status, len;
+
+ switch (ether_type) {
+ case ETH_P_ARP:
+@@ -542,13 +542,15 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
+ }
+ skb->protocol = protocol;
+ }
++
++ len = skb->len;
+ status = netif_rx(skb);
+ if (status == NET_RX_DROP) {
+ net->stats.rx_errors++;
+ net->stats.rx_dropped++;
+ } else {
+ net->stats.rx_packets++;
+- net->stats.rx_bytes += skb->len;
++ net->stats.rx_bytes += len;
+ }
+
+ return 0;
+diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
+index 4d5054211550b..2ceed9287435f 100644
+--- a/drivers/firewire/sbp2.c
++++ b/drivers/firewire/sbp2.c
+@@ -408,7 +408,7 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
+ void *payload, size_t length, void *callback_data)
+ {
+ struct sbp2_logical_unit *lu = callback_data;
+- struct sbp2_orb *orb;
++ struct sbp2_orb *orb = NULL, *iter;
+ struct sbp2_status status;
+ unsigned long flags;
+
+@@ -433,17 +433,18 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
+
+ /* Lookup the orb corresponding to this status write. */
+ spin_lock_irqsave(&lu->tgt->lock, flags);
+- list_for_each_entry(orb, &lu->orb_list, link) {
++ list_for_each_entry(iter, &lu->orb_list, link) {
+ if (STATUS_GET_ORB_HIGH(status) == 0 &&
+- STATUS_GET_ORB_LOW(status) == orb->request_bus) {
+- orb->rcode = RCODE_COMPLETE;
+- list_del(&orb->link);
++ STATUS_GET_ORB_LOW(status) == iter->request_bus) {
++ iter->rcode = RCODE_COMPLETE;
++ list_del(&iter->link);
++ orb = iter;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&lu->tgt->lock, flags);
+
+- if (&orb->link != &lu->orb_list) {
++ if (orb) {
+ orb->callback(orb, &status);
+ kref_put(&orb->kref, free_orb); /* orb callback reference */
+ } else {
+diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
+index cda7d7162cbbd..97ce31e667fca 100644
+--- a/drivers/firmware/Kconfig
++++ b/drivers/firmware/Kconfig
+@@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
+ config ARM_SDE_INTERFACE
+ bool "ARM Software Delegated Exception Interface (SDEI)"
+ depends on ARM64
++ depends on ACPI_APEI_GHES
+ help
+ The Software Delegated Exception Interface (SDEI) is an ARM
+ standard for registering callbacks from the platform firmware
+diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
+index 641a918190880..edef31c413123 100644
+--- a/drivers/firmware/arm_ffa/bus.c
++++ b/drivers/firmware/arm_ffa/bus.c
+@@ -15,6 +15,8 @@
+
+ #include "common.h"
+
++static DEFINE_IDA(ffa_bus_id);
++
+ static int ffa_device_match(struct device *dev, struct device_driver *drv)
+ {
+ const struct ffa_device_id *id_table;
+@@ -53,7 +55,8 @@ static void ffa_device_remove(struct device *dev)
+ {
+ struct ffa_driver *ffa_drv = to_ffa_driver(dev->driver);
+
+- ffa_drv->remove(to_ffa_dev(dev));
++ if (ffa_drv->remove)
++ ffa_drv->remove(to_ffa_dev(dev));
+ }
+
+ static int ffa_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+@@ -130,6 +133,7 @@ static void ffa_release_device(struct device *dev)
+ {
+ struct ffa_device *ffa_dev = to_ffa_dev(dev);
+
++ ida_free(&ffa_bus_id, ffa_dev->id);
+ kfree(ffa_dev);
+ }
+
+@@ -169,18 +173,24 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev)
+
+ struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id)
+ {
+- int ret;
++ int id, ret;
+ struct device *dev;
+ struct ffa_device *ffa_dev;
+
++ id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL);
++ if (id < 0)
++ return NULL;
++
+ ffa_dev = kzalloc(sizeof(*ffa_dev), GFP_KERNEL);
+- if (!ffa_dev)
++ if (!ffa_dev) {
++ ida_free(&ffa_bus_id, id);
+ return NULL;
++ }
+
+ dev = &ffa_dev->dev;
+ dev->bus = &ffa_bus_type;
+ dev->release = ffa_release_device;
+- dev_set_name(&ffa_dev->dev, "arm-ffa-%04x", vm_id);
++ dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id);
+
+ ffa_dev->vm_id = vm_id;
+ uuid_copy(&ffa_dev->uuid, uuid);
+@@ -215,4 +225,5 @@ void arm_ffa_bus_exit(void)
+ {
+ ffa_devices_unregister();
+ bus_unregister(&ffa_bus_type);
++ ida_destroy(&ffa_bus_id);
+ }
+diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
+index c9fb56afbcb49..e4fb0c1ae4869 100644
+--- a/drivers/firmware/arm_ffa/driver.c
++++ b/drivers/firmware/arm_ffa/driver.c
+@@ -451,12 +451,18 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize,
+ ep_mem_access->receiver = args->attrs[idx].receiver;
+ ep_mem_access->attrs = args->attrs[idx].attrs;
+ ep_mem_access->composite_off = COMPOSITE_OFFSET(args->nattrs);
++ ep_mem_access->flag = 0;
++ ep_mem_access->reserved = 0;
+ }
++ mem_region->handle = 0;
++ mem_region->reserved_0 = 0;
++ mem_region->reserved_1 = 0;
+ mem_region->ep_count = args->nattrs;
+
+ composite = buffer + COMPOSITE_OFFSET(args->nattrs);
+ composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg);
+ composite->addr_range_cnt = num_entries;
++ composite->reserved = 0;
+
+ length = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, num_entries);
+ frag_len = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, 0);
+@@ -491,6 +497,7 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize,
+
+ constituents->address = sg_phys(args->sg);
+ constituents->pg_cnt = args->sg->length / FFA_PAGE_SIZE;
++ constituents->reserved = 0;
+ constituents++;
+ frag_len += sizeof(struct ffa_mem_region_addr_range);
+ } while ((args->sg = sg_next(args->sg)));
+@@ -556,7 +563,7 @@ static int ffa_partition_info_get(const char *uuid_str,
+ return -ENODEV;
+ }
+
+- count = ffa_partition_probe(&uuid_null, &pbuf);
++ count = ffa_partition_probe(&uuid, &pbuf);
+ if (count <= 0)
+ return -ENOENT;
+
+@@ -645,8 +652,6 @@ static void ffa_setup_partitions(void)
+ __func__, tpbuf->id);
+ continue;
+ }
+-
+- ffa_dev_set_drvdata(ffa_dev, drv_info);
+ }
+ kfree(pbuf);
+ }
+diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c
+index de416f9e79213..3fe172c03c247 100644
+--- a/drivers/firmware/arm_scmi/base.c
++++ b/drivers/firmware/arm_scmi/base.c
+@@ -34,6 +34,12 @@ struct scmi_msg_resp_base_attributes {
+ __le16 reserved;
+ };
+
++struct scmi_msg_resp_base_discover_agent {
++ __le32 agent_id;
++ u8 name[SCMI_MAX_STR_SIZE];
++};
++
++
+ struct scmi_msg_base_error_notify {
+ __le32 event_control;
+ #define BASE_TP_NOTIFY_ALL BIT(0)
+@@ -191,7 +197,7 @@ scmi_base_implementation_list_get(const struct scmi_protocol_handle *ph,
+ break;
+
+ loop_num_ret = le32_to_cpu(*num_ret);
+- if (tot_num_ret + loop_num_ret > MAX_PROTOCOLS_IMP) {
++ if (loop_num_ret > MAX_PROTOCOLS_IMP - tot_num_ret) {
+ dev_err(dev, "No. of Protocol > MAX_PROTOCOLS_IMP");
+ break;
+ }
+@@ -225,18 +231,21 @@ static int scmi_base_discover_agent_get(const struct scmi_protocol_handle *ph,
+ int id, char *name)
+ {
+ int ret;
++ struct scmi_msg_resp_base_discover_agent *agent_info;
+ struct scmi_xfer *t;
+
+ ret = ph->xops->xfer_get_init(ph, BASE_DISCOVER_AGENT,
+- sizeof(__le32), SCMI_MAX_STR_SIZE, &t);
++ sizeof(__le32), sizeof(*agent_info), &t);
+ if (ret)
+ return ret;
+
+ put_unaligned_le32(id, t->tx.buf);
+
+ ret = ph->xops->do_xfer(ph, t);
+- if (!ret)
+- strlcpy(name, t->rx.buf, SCMI_MAX_STR_SIZE);
++ if (!ret) {
++ agent_info = t->rx.buf;
++ strlcpy(name, agent_info->name, SCMI_MAX_STR_SIZE);
++ }
+
+ ph->xops->xfer_put(ph, t);
+
+diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c
+index f6fe723ab869e..7c1c0951e562d 100644
+--- a/drivers/firmware/arm_scmi/bus.c
++++ b/drivers/firmware/arm_scmi/bus.c
+@@ -216,9 +216,20 @@ void scmi_device_destroy(struct scmi_device *scmi_dev)
+ device_unregister(&scmi_dev->dev);
+ }
+
++void scmi_device_link_add(struct device *consumer, struct device *supplier)
++{
++ struct device_link *link;
++
++ link = device_link_add(consumer, supplier, DL_FLAG_AUTOREMOVE_CONSUMER);
++
++ WARN_ON(!link);
++}
++
+ void scmi_set_handle(struct scmi_device *scmi_dev)
+ {
+ scmi_dev->handle = scmi_handle_get(&scmi_dev->dev);
++ if (scmi_dev->handle)
++ scmi_device_link_add(&scmi_dev->dev, scmi_dev->handle->dev);
+ }
+
+ int scmi_protocol_register(const struct scmi_protocol *proto)
+diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c
+index 35b56c8ba0c0e..e76194a60edf9 100644
+--- a/drivers/firmware/arm_scmi/clock.c
++++ b/drivers/firmware/arm_scmi/clock.c
+@@ -204,7 +204,8 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id,
+
+ if (rate_discrete && rate) {
+ clk->list.num_rates = tot_rate_cnt;
+- sort(rate, tot_rate_cnt, sizeof(*rate), rate_cmp_func, NULL);
++ sort(clk->list.rates, tot_rate_cnt, sizeof(*rate),
++ rate_cmp_func, NULL);
+ }
+
+ clk->rate_discrete = rate_discrete;
+@@ -314,9 +315,13 @@ static int scmi_clock_count_get(const struct scmi_protocol_handle *ph)
+ static const struct scmi_clock_info *
+ scmi_clock_info_get(const struct scmi_protocol_handle *ph, u32 clk_id)
+ {
++ struct scmi_clock_info *clk;
+ struct clock_info *ci = ph->get_priv(ph);
+- struct scmi_clock_info *clk = ci->clk + clk_id;
+
++ if (clk_id >= ci->num_clocks)
++ return NULL;
++
++ clk = ci->clk + clk_id;
+ if (!clk->name[0])
+ return NULL;
+
+diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h
+index dea1bfbe10527..b9f5829c0c4dd 100644
+--- a/drivers/firmware/arm_scmi/common.h
++++ b/drivers/firmware/arm_scmi/common.h
+@@ -272,6 +272,7 @@ struct scmi_xfer_ops {
+ struct scmi_revision_info *
+ scmi_revision_area_get(const struct scmi_protocol_handle *ph);
+ int scmi_handle_put(const struct scmi_handle *handle);
++void scmi_device_link_add(struct device *consumer, struct device *supplier);
+ struct scmi_handle *scmi_handle_get(struct device *dev);
+ void scmi_set_handle(struct scmi_device *scmi_dev);
+ void scmi_setup_protocol_implemented(const struct scmi_protocol_handle *ph,
+diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
+index b406b3f78f467..7ccda7d720a19 100644
+--- a/drivers/firmware/arm_scmi/driver.c
++++ b/drivers/firmware/arm_scmi/driver.c
+@@ -652,7 +652,8 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo,
+
+ xfer = scmi_xfer_command_acquire(cinfo, msg_hdr);
+ if (IS_ERR(xfer)) {
+- scmi_clear_channel(info, cinfo);
++ if (MSG_XTRACT_TYPE(msg_hdr) == MSG_TYPE_DELAYED_RESP)
++ scmi_clear_channel(info, cinfo);
+ return;
+ }
+
+@@ -782,6 +783,8 @@ static int do_xfer(const struct scmi_protocol_handle *ph,
+ xfer->hdr.protocol_id, xfer->hdr.seq,
+ xfer->hdr.poll_completion);
+
++ /* Clear any stale status */
++ xfer->hdr.status = SCMI_SUCCESS;
+ xfer->state = SCMI_XFER_SENT_OK;
+ /*
+ * Even though spinlocking is not needed here since no race is possible
+@@ -1460,7 +1463,7 @@ static int scmi_xfer_info_init(struct scmi_info *sinfo)
+ return ret;
+
+ ret = __scmi_xfer_info_init(sinfo, &sinfo->tx_minfo);
+- if (!ret && idr_find(&sinfo->rx_idr, SCMI_PROTOCOL_BASE))
++ if (!ret && !idr_is_empty(&sinfo->rx_idr))
+ ret = __scmi_xfer_info_init(sinfo, &sinfo->rx_minfo);
+
+ return ret;
+@@ -1515,8 +1518,12 @@ scmi_txrx_setup(struct scmi_info *info, struct device *dev, int prot_id)
+ {
+ int ret = scmi_chan_setup(info, dev, prot_id, true);
+
+- if (!ret) /* Rx is optional, hence no error check */
+- scmi_chan_setup(info, dev, prot_id, false);
++ if (!ret) {
++ /* Rx is optional, report only memory errors */
++ ret = scmi_chan_setup(info, dev, prot_id, false);
++ if (ret && ret != -ENOMEM)
++ ret = 0;
++ }
+
+ return ret;
+ }
+@@ -1726,10 +1733,16 @@ int scmi_protocol_device_request(const struct scmi_device_id *id_table)
+ sdev = scmi_get_protocol_device(child, info,
+ id_table->protocol_id,
+ id_table->name);
+- /* Set handle if not already set: device existed */
+- if (sdev && !sdev->handle)
+- sdev->handle =
+- scmi_handle_get_from_info_unlocked(info);
++ if (sdev) {
++ /* Set handle if not already set: device existed */
++ if (!sdev->handle)
++ sdev->handle =
++ scmi_handle_get_from_info_unlocked(info);
++ /* Relink consumer and suppliers */
++ if (sdev->handle)
++ scmi_device_link_add(&sdev->dev,
++ sdev->handle->dev);
++ }
+ } else {
+ dev_err(info->dev,
+ "Failed. SCMI protocol %d not active.\n",
+@@ -1915,20 +1928,17 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id)
+
+ static int scmi_remove(struct platform_device *pdev)
+ {
+- int ret = 0, id;
++ int ret, id;
+ struct scmi_info *info = platform_get_drvdata(pdev);
+ struct device_node *child;
+
+ mutex_lock(&scmi_list_mutex);
+ if (info->users)
+- ret = -EBUSY;
+- else
+- list_del(&info->node);
++ dev_warn(&pdev->dev,
++ "Still active SCMI users will be forcibly unbound.\n");
++ list_del(&info->node);
+ mutex_unlock(&scmi_list_mutex);
+
+- if (ret)
+- return ret;
+-
+ scmi_notification_exit(&info->handle);
+
+ mutex_lock(&info->protocols_mtx);
+@@ -1940,7 +1950,11 @@ static int scmi_remove(struct platform_device *pdev)
+ idr_destroy(&info->active_protocols);
+
+ /* Safe to free channels since no more users */
+- return scmi_cleanup_txrx_channels(info);
++ ret = scmi_cleanup_txrx_channels(info);
++ if (ret)
++ dev_warn(&pdev->dev, "Failed to cleanup SCMI channels.\n");
++
++ return 0;
+ }
+
+ static ssize_t protocol_version_show(struct device *dev,
+@@ -2008,6 +2022,7 @@ MODULE_DEVICE_TABLE(of, scmi_of_match);
+ static struct platform_driver scmi_driver = {
+ .driver = {
+ .name = "arm-scmi",
++ .suppress_bind_attrs = true,
+ .of_match_table = scmi_of_match,
+ .dev_groups = versions_groups,
+ },
+@@ -2112,7 +2127,7 @@ static void __exit scmi_driver_exit(void)
+ }
+ module_exit(scmi_driver_exit);
+
+-MODULE_ALIAS("platform: arm-scmi");
++MODULE_ALIAS("platform:arm-scmi");
+ MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+ MODULE_DESCRIPTION("ARM SCMI protocol driver");
+ MODULE_LICENSE("GPL v2");
+diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c
+index e09eb12bf4219..d1400de17eca7 100644
+--- a/drivers/firmware/arm_scmi/mailbox.c
++++ b/drivers/firmware/arm_scmi/mailbox.c
+@@ -52,6 +52,39 @@ static bool mailbox_chan_available(struct device *dev, int idx)
+ "#mbox-cells", idx, NULL);
+ }
+
++static int mailbox_chan_validate(struct device *cdev)
++{
++ int num_mb, num_sh, ret = 0;
++ struct device_node *np = cdev->of_node;
++
++ num_mb = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
++ num_sh = of_count_phandle_with_args(np, "shmem", NULL);
++ /* Bail out if mboxes and shmem descriptors are inconsistent */
++ if (num_mb <= 0 || num_sh > 2 || num_mb != num_sh) {
++ dev_warn(cdev, "Invalid channel descriptor for '%s'\n",
++ of_node_full_name(np));
++ return -EINVAL;
++ }
++
++ if (num_sh > 1) {
++ struct device_node *np_tx, *np_rx;
++
++ np_tx = of_parse_phandle(np, "shmem", 0);
++ np_rx = of_parse_phandle(np, "shmem", 1);
++ /* SCMI Tx and Rx shared mem areas have to be distinct */
++ if (!np_tx || !np_rx || np_tx == np_rx) {
++ dev_warn(cdev, "Invalid shmem descriptor for '%s'\n",
++ of_node_full_name(np));
++ ret = -EINVAL;
++ }
++
++ of_node_put(np_tx);
++ of_node_put(np_rx);
++ }
++
++ return ret;
++}
++
+ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
+ bool tx)
+ {
+@@ -64,13 +97,19 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
+ resource_size_t size;
+ struct resource res;
+
++ ret = mailbox_chan_validate(cdev);
++ if (ret)
++ return ret;
++
+ smbox = devm_kzalloc(dev, sizeof(*smbox), GFP_KERNEL);
+ if (!smbox)
+ return -ENOMEM;
+
+ shmem = of_parse_phandle(cdev->of_node, "shmem", idx);
+- if (!of_device_is_compatible(shmem, "arm,scmi-shmem"))
++ if (!of_device_is_compatible(shmem, "arm,scmi-shmem")) {
++ of_node_put(shmem);
+ return -ENXIO;
++ }
+
+ ret = of_address_to_resource(shmem, 0, &res);
+ of_node_put(shmem);
+diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c
+index 9bf2478ec6d17..e80a782058458 100644
+--- a/drivers/firmware/arm_scmi/reset.c
++++ b/drivers/firmware/arm_scmi/reset.c
+@@ -152,9 +152,13 @@ static int scmi_domain_reset(const struct scmi_protocol_handle *ph, u32 domain,
+ struct scmi_xfer *t;
+ struct scmi_msg_reset_domain_reset *dom;
+ struct scmi_reset_info *pi = ph->get_priv(ph);
+- struct reset_dom_info *rdom = pi->dom_info + domain;
++ struct reset_dom_info *rdom;
+
+- if (rdom->async_reset)
++ if (domain >= pi->num_domains)
++ return -EINVAL;
++
++ rdom = pi->dom_info + domain;
++ if (rdom->async_reset && flags & AUTONOMOUS_RESET)
+ flags |= ASYNCHRONOUS_RESET;
+
+ ret = ph->xops->xfer_get_init(ph, RESET, sizeof(*dom), 0, &t);
+@@ -166,7 +170,7 @@ static int scmi_domain_reset(const struct scmi_protocol_handle *ph, u32 domain,
+ dom->flags = cpu_to_le32(flags);
+ dom->reset_state = cpu_to_le32(state);
+
+- if (rdom->async_reset)
++ if (flags & ASYNCHRONOUS_RESET)
+ ret = ph->xops->do_xfer_with_response(ph, t);
+ else
+ ret = ph->xops->do_xfer(ph, t);
+diff --git a/drivers/firmware/arm_scmi/scmi_pm_domain.c b/drivers/firmware/arm_scmi/scmi_pm_domain.c
+index 4371fdcd5a73f..0e05a79de82d8 100644
+--- a/drivers/firmware/arm_scmi/scmi_pm_domain.c
++++ b/drivers/firmware/arm_scmi/scmi_pm_domain.c
+@@ -8,7 +8,6 @@
+ #include <linux/err.h>
+ #include <linux/io.h>
+ #include <linux/module.h>
+-#include <linux/pm_clock.h>
+ #include <linux/pm_domain.h>
+ #include <linux/scmi_protocol.h>
+
+@@ -53,27 +52,6 @@ static int scmi_pd_power_off(struct generic_pm_domain *domain)
+ return scmi_pd_power(domain, false);
+ }
+
+-static int scmi_pd_attach_dev(struct generic_pm_domain *pd, struct device *dev)
+-{
+- int ret;
+-
+- ret = pm_clk_create(dev);
+- if (ret)
+- return ret;
+-
+- ret = of_pm_clk_add_clks(dev);
+- if (ret >= 0)
+- return 0;
+-
+- pm_clk_destroy(dev);
+- return ret;
+-}
+-
+-static void scmi_pd_detach_dev(struct generic_pm_domain *pd, struct device *dev)
+-{
+- pm_clk_destroy(dev);
+-}
+-
+ static int scmi_pm_domain_probe(struct scmi_device *sdev)
+ {
+ int num_domains, i;
+@@ -124,10 +102,6 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev)
+ scmi_pd->genpd.name = scmi_pd->name;
+ scmi_pd->genpd.power_off = scmi_pd_power_off;
+ scmi_pd->genpd.power_on = scmi_pd_power_on;
+- scmi_pd->genpd.attach_dev = scmi_pd_attach_dev;
+- scmi_pd->genpd.detach_dev = scmi_pd_detach_dev;
+- scmi_pd->genpd.flags = GENPD_FLAG_PM_CLK |
+- GENPD_FLAG_ACTIVE_WAKEUP;
+
+ pm_genpd_init(&scmi_pd->genpd, NULL,
+ state == SCMI_POWER_STATE_GENERIC_OFF);
+@@ -138,9 +112,26 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev)
+ scmi_pd_data->domains = domains;
+ scmi_pd_data->num_domains = num_domains;
+
+- of_genpd_add_provider_onecell(np, scmi_pd_data);
++ dev_set_drvdata(dev, scmi_pd_data);
+
+- return 0;
++ return of_genpd_add_provider_onecell(np, scmi_pd_data);
++}
++
++static void scmi_pm_domain_remove(struct scmi_device *sdev)
++{
++ int i;
++ struct genpd_onecell_data *scmi_pd_data;
++ struct device *dev = &sdev->dev;
++ struct device_node *np = dev->of_node;
++
++ of_genpd_del_provider(np);
++
++ scmi_pd_data = dev_get_drvdata(dev);
++ for (i = 0; i < scmi_pd_data->num_domains; i++) {
++ if (!scmi_pd_data->domains[i])
++ continue;
++ pm_genpd_remove(scmi_pd_data->domains[i]);
++ }
+ }
+
+ static const struct scmi_device_id scmi_id_table[] = {
+@@ -152,6 +143,7 @@ MODULE_DEVICE_TABLE(scmi, scmi_id_table);
+ static struct scmi_driver scmi_power_domain_driver = {
+ .name = "scmi-power-domain",
+ .probe = scmi_pm_domain_probe,
++ .remove = scmi_pm_domain_remove,
+ .id_table = scmi_id_table,
+ };
+ module_scmi_driver(scmi_power_domain_driver);
+diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c
+index 308471586381f..1ed66d13c06c4 100644
+--- a/drivers/firmware/arm_scmi/sensors.c
++++ b/drivers/firmware/arm_scmi/sensors.c
+@@ -631,16 +631,19 @@ static int scmi_sensor_config_get(const struct scmi_protocol_handle *ph,
+ {
+ int ret;
+ struct scmi_xfer *t;
++ struct sensors_info *si = ph->get_priv(ph);
++
++ if (sensor_id >= si->num_sensors)
++ return -EINVAL;
+
+ ret = ph->xops->xfer_get_init(ph, SENSOR_CONFIG_GET,
+ sizeof(__le32), sizeof(__le32), &t);
+ if (ret)
+ return ret;
+
+- put_unaligned_le32(cpu_to_le32(sensor_id), t->tx.buf);
++ put_unaligned_le32(sensor_id, t->tx.buf);
+ ret = ph->xops->do_xfer(ph, t);
+ if (!ret) {
+- struct sensors_info *si = ph->get_priv(ph);
+ struct scmi_sensor_info *s = si->sensors + sensor_id;
+
+ *sensor_config = get_unaligned_le64(t->rx.buf);
+@@ -657,6 +660,10 @@ static int scmi_sensor_config_set(const struct scmi_protocol_handle *ph,
+ int ret;
+ struct scmi_xfer *t;
+ struct scmi_msg_sensor_config_set *msg;
++ struct sensors_info *si = ph->get_priv(ph);
++
++ if (sensor_id >= si->num_sensors)
++ return -EINVAL;
+
+ ret = ph->xops->xfer_get_init(ph, SENSOR_CONFIG_SET,
+ sizeof(*msg), 0, &t);
+@@ -669,7 +676,6 @@ static int scmi_sensor_config_set(const struct scmi_protocol_handle *ph,
+
+ ret = ph->xops->do_xfer(ph, t);
+ if (!ret) {
+- struct sensors_info *si = ph->get_priv(ph);
+ struct scmi_sensor_info *s = si->sensors + sensor_id;
+
+ s->sensor_config = sensor_config;
+@@ -700,8 +706,11 @@ static int scmi_sensor_reading_get(const struct scmi_protocol_handle *ph,
+ int ret;
+ struct scmi_xfer *t;
+ struct scmi_msg_sensor_reading_get *sensor;
++ struct scmi_sensor_info *s;
+ struct sensors_info *si = ph->get_priv(ph);
+- struct scmi_sensor_info *s = si->sensors + sensor_id;
++
++ if (sensor_id >= si->num_sensors)
++ return -EINVAL;
+
+ ret = ph->xops->xfer_get_init(ph, SENSOR_READING_GET,
+ sizeof(*sensor), 0, &t);
+@@ -710,6 +719,7 @@ static int scmi_sensor_reading_get(const struct scmi_protocol_handle *ph,
+
+ sensor = t->tx.buf;
+ sensor->id = cpu_to_le32(sensor_id);
++ s = si->sensors + sensor_id;
+ if (s->async) {
+ sensor->flags = cpu_to_le32(SENSOR_READ_ASYNC);
+ ret = ph->xops->do_xfer_with_response(ph, t);
+@@ -764,9 +774,13 @@ scmi_sensor_reading_get_timestamped(const struct scmi_protocol_handle *ph,
+ int ret;
+ struct scmi_xfer *t;
+ struct scmi_msg_sensor_reading_get *sensor;
++ struct scmi_sensor_info *s;
+ struct sensors_info *si = ph->get_priv(ph);
+- struct scmi_sensor_info *s = si->sensors + sensor_id;
+
++ if (sensor_id >= si->num_sensors)
++ return -EINVAL;
++
++ s = si->sensors + sensor_id;
+ if (!count || !readings ||
+ (!s->num_axis && count > 1) || (s->num_axis && count > s->num_axis))
+ return -EINVAL;
+@@ -817,6 +831,9 @@ scmi_sensor_info_get(const struct scmi_protocol_handle *ph, u32 sensor_id)
+ {
+ struct sensors_info *si = ph->get_priv(ph);
+
++ if (sensor_id >= si->num_sensors)
++ return NULL;
++
+ return si->sensors + sensor_id;
+ }
+
+diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c
+index 0e3eaea5d8526..56a1f61aa3ff2 100644
+--- a/drivers/firmware/arm_scmi/shmem.c
++++ b/drivers/firmware/arm_scmi/shmem.c
+@@ -58,10 +58,11 @@ u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem)
+ void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem,
+ struct scmi_xfer *xfer)
+ {
++ size_t len = ioread32(&shmem->length);
++
+ xfer->hdr.status = ioread32(shmem->msg_payload);
+ /* Skip the length of header and status in shmem area i.e 8 bytes */
+- xfer->rx.len = min_t(size_t, xfer->rx.len,
+- ioread32(&shmem->length) - 8);
++ xfer->rx.len = min_t(size_t, xfer->rx.len, len > 8 ? len - 8 : 0);
+
+ /* Take a copy to the rx buffer.. */
+ memcpy_fromio(xfer->rx.buf, shmem->msg_payload + 4, xfer->rx.len);
+@@ -70,8 +71,10 @@ void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem,
+ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem,
+ size_t max_len, struct scmi_xfer *xfer)
+ {
++ size_t len = ioread32(&shmem->length);
++
+ /* Skip only the length of header in shmem area i.e 4 bytes */
+- xfer->rx.len = min_t(size_t, max_len, ioread32(&shmem->length) - 4);
++ xfer->rx.len = min_t(size_t, max_len, len > 4 ? len - 4 : 0);
+
+ /* Take a copy to the rx buffer.. */
+ memcpy_fromio(xfer->rx.buf, shmem->msg_payload, xfer->rx.len);
+diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c
+index 4effecc3bb463..ea1caf70e8df9 100644
+--- a/drivers/firmware/arm_scmi/smc.c
++++ b/drivers/firmware/arm_scmi/smc.c
+@@ -76,8 +76,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
+ return -ENOMEM;
+
+ np = of_parse_phandle(cdev->of_node, "shmem", 0);
+- if (!of_device_is_compatible(np, "arm,scmi-shmem"))
++ if (!of_device_is_compatible(np, "arm,scmi-shmem")) {
++ of_node_put(np);
+ return -ENXIO;
++ }
+
+ ret = of_address_to_resource(np, 0, &res);
+ of_node_put(np);
+diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c
+index 11e8efb713751..0c351eeee7463 100644
+--- a/drivers/firmware/arm_scmi/virtio.c
++++ b/drivers/firmware/arm_scmi/virtio.c
+@@ -82,7 +82,8 @@ static bool scmi_vio_have_vq_rx(struct virtio_device *vdev)
+ }
+
+ static int scmi_vio_feed_vq_rx(struct scmi_vio_channel *vioch,
+- struct scmi_vio_msg *msg)
++ struct scmi_vio_msg *msg,
++ struct device *dev)
+ {
+ struct scatterlist sg_in;
+ int rc;
+@@ -94,8 +95,7 @@ static int scmi_vio_feed_vq_rx(struct scmi_vio_channel *vioch,
+
+ rc = virtqueue_add_inbuf(vioch->vqueue, &sg_in, 1, msg, GFP_ATOMIC);
+ if (rc)
+- dev_err_once(vioch->cinfo->dev,
+- "failed to add to virtqueue (%d)\n", rc);
++ dev_err_once(dev, "failed to add to virtqueue (%d)\n", rc);
+ else
+ virtqueue_kick(vioch->vqueue);
+
+@@ -108,7 +108,7 @@ static void scmi_finalize_message(struct scmi_vio_channel *vioch,
+ struct scmi_vio_msg *msg)
+ {
+ if (vioch->is_rx) {
+- scmi_vio_feed_vq_rx(vioch, msg);
++ scmi_vio_feed_vq_rx(vioch, msg, vioch->cinfo->dev);
+ } else {
+ /* Here IRQs are assumed to be already disabled by the caller */
+ spin_lock(&vioch->lock);
+@@ -247,19 +247,19 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
+ for (i = 0; i < vioch->max_msg; i++) {
+ struct scmi_vio_msg *msg;
+
+- msg = devm_kzalloc(cinfo->dev, sizeof(*msg), GFP_KERNEL);
++ msg = devm_kzalloc(dev, sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ if (tx) {
+- msg->request = devm_kzalloc(cinfo->dev,
++ msg->request = devm_kzalloc(dev,
+ VIRTIO_SCMI_MAX_PDU_SIZE,
+ GFP_KERNEL);
+ if (!msg->request)
+ return -ENOMEM;
+ }
+
+- msg->input = devm_kzalloc(cinfo->dev, VIRTIO_SCMI_MAX_PDU_SIZE,
++ msg->input = devm_kzalloc(dev, VIRTIO_SCMI_MAX_PDU_SIZE,
+ GFP_KERNEL);
+ if (!msg->input)
+ return -ENOMEM;
+@@ -269,7 +269,7 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
+ list_add_tail(&msg->list, &vioch->free_list);
+ spin_unlock_irqrestore(&vioch->lock, flags);
+ } else {
+- scmi_vio_feed_vq_rx(vioch, msg);
++ scmi_vio_feed_vq_rx(vioch, msg, cinfo->dev);
+ }
+ }
+
+diff --git a/drivers/firmware/arm_scmi/voltage.c b/drivers/firmware/arm_scmi/voltage.c
+index a5048956a0be9..ac08e819088bb 100644
+--- a/drivers/firmware/arm_scmi/voltage.c
++++ b/drivers/firmware/arm_scmi/voltage.c
+@@ -156,7 +156,7 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph,
+ int cnt;
+
+ cmd->domain_id = cpu_to_le32(v->id);
+- cmd->level_index = desc_index;
++ cmd->level_index = cpu_to_le32(desc_index);
+ ret = ph->xops->do_xfer(ph, tl);
+ if (ret)
+ break;
+diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
+index ddf0b9ff9e15c..435d0e2658a42 100644
+--- a/drivers/firmware/arm_scpi.c
++++ b/drivers/firmware/arm_scpi.c
+@@ -815,7 +815,7 @@ static int scpi_init_versions(struct scpi_drvinfo *info)
+ info->firmware_version = le32_to_cpu(caps.platform_version);
+ }
+ /* Ignore error if not implemented */
+- if (scpi_info->is_legacy && ret == -EOPNOTSUPP)
++ if (info->is_legacy && ret == -EOPNOTSUPP)
+ return 0;
+
+ return ret;
+@@ -913,13 +913,14 @@ static int scpi_probe(struct platform_device *pdev)
+ struct resource res;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
++ struct scpi_drvinfo *scpi_drvinfo;
+
+- scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL);
+- if (!scpi_info)
++ scpi_drvinfo = devm_kzalloc(dev, sizeof(*scpi_drvinfo), GFP_KERNEL);
++ if (!scpi_drvinfo)
+ return -ENOMEM;
+
+ if (of_match_device(legacy_scpi_of_match, &pdev->dev))
+- scpi_info->is_legacy = true;
++ scpi_drvinfo->is_legacy = true;
+
+ count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
+ if (count < 0) {
+@@ -927,19 +928,19 @@ static int scpi_probe(struct platform_device *pdev)
+ return -ENODEV;
+ }
+
+- scpi_info->channels = devm_kcalloc(dev, count, sizeof(struct scpi_chan),
+- GFP_KERNEL);
+- if (!scpi_info->channels)
++ scpi_drvinfo->channels =
++ devm_kcalloc(dev, count, sizeof(struct scpi_chan), GFP_KERNEL);
++ if (!scpi_drvinfo->channels)
+ return -ENOMEM;
+
+- ret = devm_add_action(dev, scpi_free_channels, scpi_info);
++ ret = devm_add_action(dev, scpi_free_channels, scpi_drvinfo);
+ if (ret)
+ return ret;
+
+- for (; scpi_info->num_chans < count; scpi_info->num_chans++) {
++ for (; scpi_drvinfo->num_chans < count; scpi_drvinfo->num_chans++) {
+ resource_size_t size;
+- int idx = scpi_info->num_chans;
+- struct scpi_chan *pchan = scpi_info->channels + idx;
++ int idx = scpi_drvinfo->num_chans;
++ struct scpi_chan *pchan = scpi_drvinfo->channels + idx;
+ struct mbox_client *cl = &pchan->cl;
+ struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
+
+@@ -986,45 +987,53 @@ static int scpi_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- scpi_info->commands = scpi_std_commands;
++ scpi_drvinfo->commands = scpi_std_commands;
+
+- platform_set_drvdata(pdev, scpi_info);
++ platform_set_drvdata(pdev, scpi_drvinfo);
+
+- if (scpi_info->is_legacy) {
++ if (scpi_drvinfo->is_legacy) {
+ /* Replace with legacy variants */
+ scpi_ops.clk_set_val = legacy_scpi_clk_set_val;
+- scpi_info->commands = scpi_legacy_commands;
++ scpi_drvinfo->commands = scpi_legacy_commands;
+
+ /* Fill priority bitmap */
+ for (idx = 0; idx < ARRAY_SIZE(legacy_hpriority_cmds); idx++)
+ set_bit(legacy_hpriority_cmds[idx],
+- scpi_info->cmd_priority);
++ scpi_drvinfo->cmd_priority);
+ }
+
+- ret = scpi_init_versions(scpi_info);
++ scpi_info = scpi_drvinfo;
++
++ ret = scpi_init_versions(scpi_drvinfo);
+ if (ret) {
+ dev_err(dev, "incorrect or no SCP firmware found\n");
++ scpi_info = NULL;
+ return ret;
+ }
+
+- if (scpi_info->is_legacy && !scpi_info->protocol_version &&
+- !scpi_info->firmware_version)
++ if (scpi_drvinfo->is_legacy && !scpi_drvinfo->protocol_version &&
++ !scpi_drvinfo->firmware_version)
+ dev_info(dev, "SCP Protocol legacy pre-1.0 firmware\n");
+ else
+ dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n",
+ FIELD_GET(PROTO_REV_MAJOR_MASK,
+- scpi_info->protocol_version),
++ scpi_drvinfo->protocol_version),
+ FIELD_GET(PROTO_REV_MINOR_MASK,
+- scpi_info->protocol_version),
++ scpi_drvinfo->protocol_version),
+ FIELD_GET(FW_REV_MAJOR_MASK,
+- scpi_info->firmware_version),
++ scpi_drvinfo->firmware_version),
+ FIELD_GET(FW_REV_MINOR_MASK,
+- scpi_info->firmware_version),
++ scpi_drvinfo->firmware_version),
+ FIELD_GET(FW_REV_PATCH_MASK,
+- scpi_info->firmware_version));
+- scpi_info->scpi_ops = &scpi_ops;
++ scpi_drvinfo->firmware_version));
++
++ scpi_drvinfo->scpi_ops = &scpi_ops;
+
+- return devm_of_platform_populate(dev);
++ ret = devm_of_platform_populate(dev);
++ if (ret)
++ scpi_info = NULL;
++
++ return ret;
+ }
+
+ static const struct of_device_id scpi_of_match[] = {
+diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
+index a7e762c352f95..285fe7ad490d1 100644
+--- a/drivers/firmware/arm_sdei.c
++++ b/drivers/firmware/arm_sdei.c
+@@ -43,6 +43,8 @@ static asmlinkage void (*sdei_firmware_call)(unsigned long function_id,
+ /* entry point from firmware to arch asm code */
+ static unsigned long sdei_entry_point;
+
++static int sdei_hp_state;
++
+ struct sdei_event {
+ /* These three are protected by the sdei_list_lock */
+ struct list_head list;
+@@ -301,8 +303,6 @@ int sdei_mask_local_cpu(void)
+ {
+ int err;
+
+- WARN_ON_ONCE(preemptible());
+-
+ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_MASK, 0, 0, 0, 0, 0, NULL);
+ if (err && err != -EIO) {
+ pr_warn_once("failed to mask CPU[%u]: %d\n",
+@@ -315,6 +315,7 @@ int sdei_mask_local_cpu(void)
+
+ static void _ipi_mask_cpu(void *ignored)
+ {
++ WARN_ON_ONCE(preemptible());
+ sdei_mask_local_cpu();
+ }
+
+@@ -322,8 +323,6 @@ int sdei_unmask_local_cpu(void)
+ {
+ int err;
+
+- WARN_ON_ONCE(preemptible());
+-
+ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_UNMASK, 0, 0, 0, 0, 0, NULL);
+ if (err && err != -EIO) {
+ pr_warn_once("failed to unmask CPU[%u]: %d\n",
+@@ -336,6 +335,7 @@ int sdei_unmask_local_cpu(void)
+
+ static void _ipi_unmask_cpu(void *ignored)
+ {
++ WARN_ON_ONCE(preemptible());
+ sdei_unmask_local_cpu();
+ }
+
+@@ -343,6 +343,8 @@ static void _ipi_private_reset(void *ignored)
+ {
+ int err;
+
++ WARN_ON_ONCE(preemptible());
++
+ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PRIVATE_RESET, 0, 0, 0, 0, 0,
+ NULL);
+ if (err && err != -EIO)
+@@ -389,8 +391,6 @@ static void _local_event_enable(void *data)
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+- WARN_ON_ONCE(preemptible());
+-
+ err = sdei_api_event_enable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+@@ -479,8 +479,6 @@ static void _local_event_unregister(void *data)
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+- WARN_ON_ONCE(preemptible());
+-
+ err = sdei_api_event_unregister(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+@@ -561,8 +559,6 @@ static void _local_event_register(void *data)
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+- WARN_ON(preemptible());
+-
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ err = sdei_api_event_register(arg->event->event_num, sdei_entry_point,
+ reg, 0, 0);
+@@ -717,6 +713,8 @@ static int sdei_pm_notifier(struct notifier_block *nb, unsigned long action,
+ {
+ int rv;
+
++ WARN_ON_ONCE(preemptible());
++
+ switch (action) {
+ case CPU_PM_ENTER:
+ rv = sdei_mask_local_cpu();
+@@ -765,7 +763,7 @@ static int sdei_device_freeze(struct device *dev)
+ int err;
+
+ /* unregister private events */
+- cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
++ cpuhp_remove_state(sdei_entry_point);
+
+ err = sdei_unregister_shared();
+ if (err)
+@@ -786,12 +784,15 @@ static int sdei_device_thaw(struct device *dev)
+ return err;
+ }
+
+- err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI",
++ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SDEI",
+ &sdei_cpuhp_up, &sdei_cpuhp_down);
+- if (err)
++ if (err < 0) {
+ pr_warn("Failed to re-register CPU hotplug notifier...\n");
++ return err;
++ }
+
+- return err;
++ sdei_hp_state = err;
++ return 0;
+ }
+
+ static int sdei_device_restore(struct device *dev)
+@@ -823,7 +824,7 @@ static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action,
+ * We are going to reset the interface, after this there is no point
+ * doing work when we take CPUs offline.
+ */
+- cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
++ cpuhp_remove_state(sdei_hp_state);
+
+ sdei_platform_reset();
+
+@@ -1003,13 +1004,15 @@ static int sdei_probe(struct platform_device *pdev)
+ goto remove_cpupm;
+ }
+
+- err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI",
++ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SDEI",
+ &sdei_cpuhp_up, &sdei_cpuhp_down);
+- if (err) {
++ if (err < 0) {
+ pr_warn("Failed to register CPU hotplug notifier...\n");
+ goto remove_reboot;
+ }
+
++ sdei_hp_state = err;
++
+ return 0;
+
+ remove_reboot:
+@@ -1059,14 +1062,14 @@ static bool __init sdei_present_acpi(void)
+ return true;
+ }
+
+-static int __init sdei_init(void)
++void __init sdei_init(void)
+ {
+ struct platform_device *pdev;
+ int ret;
+
+ ret = platform_driver_register(&sdei_driver);
+ if (ret || !sdei_present_acpi())
+- return ret;
++ return;
+
+ pdev = platform_device_register_simple(sdei_driver.driver.name,
+ 0, NULL, 0);
+@@ -1076,17 +1079,8 @@ static int __init sdei_init(void)
+ pr_info("Failed to register ACPI:SDEI platform device %d\n",
+ ret);
+ }
+-
+- return ret;
+ }
+
+-/*
+- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
+- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
+- * by device_initcall(). We want to be called in the middle.
+- */
+-subsys_initcall_sync(sdei_init);
+-
+ int sdei_event_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg)
+ {
+@@ -1101,3 +1095,22 @@ int sdei_event_handler(struct pt_regs *regs,
+ return err;
+ }
+ NOKPROBE_SYMBOL(sdei_event_handler);
++
++void sdei_handler_abort(void)
++{
++ /*
++ * If the crash happened in an SDEI event handler then we need to
++ * finish the handler with the firmware so that we can have working
++ * interrupts in the crash kernel.
++ */
++ if (__this_cpu_read(sdei_active_critical_event)) {
++ pr_warn("still in SDEI critical event context, attempting to finish handler.\n");
++ __sdei_handler_abort();
++ __this_cpu_write(sdei_active_critical_event, NULL);
++ }
++ if (__this_cpu_read(sdei_active_normal_event)) {
++ pr_warn("still in SDEI normal event context, attempting to finish handler.\n");
++ __sdei_handler_abort();
++ __this_cpu_write(sdei_active_normal_event, NULL);
++ }
++}
+diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c
+index 8b8127fa89553..3d57b08320df9 100644
+--- a/drivers/firmware/dmi-sysfs.c
++++ b/drivers/firmware/dmi-sysfs.c
+@@ -602,16 +602,16 @@ static void __init dmi_sysfs_register_handle(const struct dmi_header *dh,
+ *ret = kobject_init_and_add(&entry->kobj, &dmi_sysfs_entry_ktype, NULL,
+ "%d-%d", dh->type, entry->instance);
+
+- if (*ret) {
+- kfree(entry);
+- return;
+- }
+-
+ /* Thread on the global list for cleanup */
+ spin_lock(&entry_list_lock);
+ list_add_tail(&entry->list, &entry_list);
+ spin_unlock(&entry_list_lock);
+
++ if (*ret) {
++ kobject_put(&entry->kobj);
++ return;
++ }
++
+ /* Handle specializations by type */
+ switch (dh->type) {
+ case DMI_ENTRY_SYSTEM_EVENT_LOG:
+diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
+index 4c3201e290e29..ea84108035eb0 100644
+--- a/drivers/firmware/efi/apple-properties.c
++++ b/drivers/firmware/efi/apple-properties.c
+@@ -24,7 +24,7 @@ static bool dump_properties __initdata;
+ static int __init dump_properties_enable(char *arg)
+ {
+ dump_properties = true;
+- return 0;
++ return 1;
+ }
+
+ __setup("dump_apple_properties", dump_properties_enable);
+diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
+index 4dde8edd53b62..3e8d4b51a8140 100644
+--- a/drivers/firmware/efi/capsule-loader.c
++++ b/drivers/firmware/efi/capsule-loader.c
+@@ -242,29 +242,6 @@ failed:
+ return ret;
+ }
+
+-/**
+- * efi_capsule_flush - called by file close or file flush
+- * @file: file pointer
+- * @id: not used
+- *
+- * If a capsule is being partially uploaded then calling this function
+- * will be treated as upload termination and will free those completed
+- * buffer pages and -ECANCELED will be returned.
+- **/
+-static int efi_capsule_flush(struct file *file, fl_owner_t id)
+-{
+- int ret = 0;
+- struct capsule_info *cap_info = file->private_data;
+-
+- if (cap_info->index > 0) {
+- pr_err("capsule upload not complete\n");
+- efi_free_all_buff_pages(cap_info);
+- ret = -ECANCELED;
+- }
+-
+- return ret;
+-}
+-
+ /**
+ * efi_capsule_release - called by file close
+ * @inode: not used
+@@ -277,6 +254,13 @@ static int efi_capsule_release(struct inode *inode, struct file *file)
+ {
+ struct capsule_info *cap_info = file->private_data;
+
++ if (cap_info->index > 0 &&
++ (cap_info->header.headersize == 0 ||
++ cap_info->count < cap_info->total_size)) {
++ pr_err("capsule upload not complete\n");
++ efi_free_all_buff_pages(cap_info);
++ }
++
+ kfree(cap_info->pages);
+ kfree(cap_info->phys);
+ kfree(file->private_data);
+@@ -324,7 +308,6 @@ static const struct file_operations efi_capsule_fops = {
+ .owner = THIS_MODULE,
+ .open = efi_capsule_open,
+ .write = efi_capsule_write,
+- .flush = efi_capsule_flush,
+ .release = efi_capsule_release,
+ .llseek = no_llseek,
+ };
+diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c
+index b19ce1a83f91a..b2c829e95bd14 100644
+--- a/drivers/firmware/efi/efi-init.c
++++ b/drivers/firmware/efi/efi-init.c
+@@ -235,6 +235,11 @@ void __init efi_init(void)
+ }
+
+ reserve_regions();
++ /*
++ * For memblock manipulation, the cap should come after the memblock_add().
++ * And now, memblock is fully populated, it is time to do capping.
++ */
++ early_init_dt_check_for_usable_mem_range();
+ efi_esrt_init();
+ efi_mokvar_table_init();
+
+diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
+index 0ef086e43090b..7e771c56c13c6 100644
+--- a/drivers/firmware/efi/efi-pstore.c
++++ b/drivers/firmware/efi/efi-pstore.c
+@@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record)
+ efi_name[i] = name[i];
+
+ ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
+- preemptible(), record->size, record->psi->buf);
++ false, record->size, record->psi->buf);
+
+ if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE))
+ if (!schedule_work(&efivar_work))
+diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
+index 847f33ffc4aed..332739f3eded5 100644
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -209,7 +209,7 @@ static int __init efivar_ssdt_setup(char *str)
+ memcpy(efivar_ssdt, str, strlen(str));
+ else
+ pr_warn("efivar_ssdt: name too long: %s\n", str);
+- return 0;
++ return 1;
+ }
+ __setup("efivar_ssdt=", efivar_ssdt_setup);
+
+@@ -385,8 +385,8 @@ static int __init efisubsys_init(void)
+ efi_kobj = kobject_create_and_add("efi", firmware_kobj);
+ if (!efi_kobj) {
+ pr_err("efi: Firmware registration failed.\n");
+- destroy_workqueue(efi_rts_wq);
+- return -ENOMEM;
++ error = -ENOMEM;
++ goto err_destroy_wq;
+ }
+
+ if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE |
+@@ -429,7 +429,10 @@ err_unregister:
+ generic_ops_unregister();
+ err_put:
+ kobject_put(efi_kobj);
+- destroy_workqueue(efi_rts_wq);
++err_destroy_wq:
++ if (efi_rts_wq)
++ destroy_workqueue(efi_rts_wq);
++
+ return error;
+ }
+
+@@ -590,7 +593,7 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables,
+
+ seed = early_memremap(efi_rng_seed, sizeof(*seed));
+ if (seed != NULL) {
+- size = READ_ONCE(seed->size);
++ size = min_t(u32, seed->size, SZ_1K); // sanity check
+ early_memunmap(seed, sizeof(*seed));
+ } else {
+ pr_err("Could not map UEFI random seed!\n");
+@@ -599,8 +602,8 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables,
+ seed = early_memremap(efi_rng_seed,
+ sizeof(*seed) + size);
+ if (seed != NULL) {
+- pr_notice("seeding entropy pool\n");
+ add_bootloader_randomness(seed->bits, size);
++ memzero_explicit(seed->bits, size);
+ early_memunmap(seed, sizeof(*seed) + size);
+ } else {
+ pr_err("Could not map UEFI random seed!\n");
+@@ -719,6 +722,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr,
+ systab_hdr->revision >> 16,
+ systab_hdr->revision & 0xffff,
+ vendor);
++
++ if (IS_ENABLED(CONFIG_X86_64) &&
++ systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION &&
++ !strcmp(vendor, "Apple")) {
++ pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n");
++ efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION;
++ }
+ }
+
+ static __initdata char memory_type_name[][13] = {
+@@ -940,6 +950,8 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
+ /* first try to find a slot in an existing linked list entry */
+ for (prsv = efi_memreserve_root->next; prsv; ) {
+ rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
++ if (!rsv)
++ return -ENOMEM;
+ index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size);
+ if (index < rsv->size) {
+ rsv->entry[index].base = addr;
+diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
+index d0537573501e9..2c67f71f23753 100644
+--- a/drivers/firmware/efi/libstub/Makefile
++++ b/drivers/firmware/efi/libstub/Makefile
+@@ -37,6 +37,13 @@ KBUILD_CFLAGS := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
+ $(call cc-option,-fno-addrsig) \
+ -D__DISABLE_EXPORTS
+
++#
++# struct randomization only makes sense for Linux internal types, which the EFI
++# stub code never touches, so let's turn off struct randomization for the stub
++# altogether
++#
++KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS), $(KBUILD_CFLAGS))
++
+ # remove SCS flags from all objects in this directory
+ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+ # disable LTO
+diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
+index 2363fee9211c9..9cc556013d085 100644
+--- a/drivers/firmware/efi/libstub/arm64-stub.c
++++ b/drivers/firmware/efi/libstub/arm64-stub.c
+@@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
+ if (image->image_base != _text)
+ efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
+
+- if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN))
+- efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n",
+- EFI_KIMG_ALIGN >> 10);
++ if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN))
++ efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n",
++ SEGMENT_ALIGN >> 10);
+
+ kernel_size = _edata - _text;
+ kernel_memsize = kernel_size + (_end - _edata);
+diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
+index d489bdc645fe1..2a00eb627c3c3 100644
+--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
+@@ -439,8 +439,10 @@ efi_status_t efi_exit_boot_services(void *handle,
+ {
+ efi_status_t status;
+
+- status = efi_get_memory_map(map);
++ if (efi_disable_pci_dma)
++ efi_pci_disable_bridge_busmaster();
+
++ status = efi_get_memory_map(map);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+@@ -448,9 +450,6 @@ efi_status_t efi_exit_boot_services(void *handle,
+ if (status != EFI_SUCCESS)
+ goto free_map;
+
+- if (efi_disable_pci_dma)
+- efi_pci_disable_bridge_busmaster();
+-
+ status = efi_bs_call(exit_boot_services, handle, *map->key_ptr);
+
+ if (status == EFI_INVALID_PARAMETER) {
+diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
+index cde0a2ef507d9..fbffdd7290a31 100644
+--- a/drivers/firmware/efi/libstub/efistub.h
++++ b/drivers/firmware/efi/libstub/efistub.h
+@@ -766,6 +766,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
+ efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
+ unsigned long *addr, unsigned long random_seed);
+
++efi_status_t efi_random_get_seed(void);
++
+ efi_status_t check_platform_features(void);
+
+ void *get_efi_config_table(efi_guid_t guid);
+diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
+index fe567be0f118b..804f542be3f28 100644
+--- a/drivers/firmware/efi/libstub/fdt.c
++++ b/drivers/firmware/efi/libstub/fdt.c
+@@ -280,14 +280,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
+ goto fail;
+ }
+
+- /*
+- * Now that we have done our final memory allocation (and free)
+- * we can get the memory map key needed for exit_boot_services().
+- */
+- status = efi_get_memory_map(&map);
+- if (status != EFI_SUCCESS)
+- goto fail_free_new_fdt;
+-
+ status = update_fdt((void *)fdt_addr, fdt_size,
+ (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr,
+ initrd_addr, initrd_size);
+diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
+index 24aa375353724..f85d2c0668777 100644
+--- a/drivers/firmware/efi/libstub/random.c
++++ b/drivers/firmware/efi/libstub/random.c
+@@ -67,22 +67,43 @@ efi_status_t efi_random_get_seed(void)
+ efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
+ efi_guid_t rng_algo_raw = EFI_RNG_ALGORITHM_RAW;
+ efi_guid_t rng_table_guid = LINUX_EFI_RANDOM_SEED_TABLE_GUID;
++ struct linux_efi_random_seed *prev_seed, *seed = NULL;
++ int prev_seed_size = 0, seed_size = EFI_RANDOM_SEED_SIZE;
+ efi_rng_protocol_t *rng = NULL;
+- struct linux_efi_random_seed *seed = NULL;
+ efi_status_t status;
+
+ status = efi_bs_call(locate_protocol, &rng_proto, NULL, (void **)&rng);
+ if (status != EFI_SUCCESS)
+ return status;
+
+- status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
+- sizeof(*seed) + EFI_RANDOM_SEED_SIZE,
++ /*
++ * Check whether a seed was provided by a prior boot stage. In that
++ * case, instead of overwriting it, let's create a new buffer that can
++ * hold both, and concatenate the existing and the new seeds.
++ * Note that we should read the seed size with caution, in case the
++ * table got corrupted in memory somehow.
++ */
++ prev_seed = get_efi_config_table(LINUX_EFI_RANDOM_SEED_TABLE_GUID);
++ if (prev_seed && prev_seed->size <= 512U) {
++ prev_seed_size = prev_seed->size;
++ seed_size += prev_seed_size;
++ }
++
++ /*
++ * Use EFI_ACPI_RECLAIM_MEMORY here so that it is guaranteed that the
++ * allocation will survive a kexec reboot (although we refresh the seed
++ * beforehand)
++ */
++ status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY,
++ struct_size(seed, bits, seed_size),
+ (void **)&seed);
+- if (status != EFI_SUCCESS)
+- return status;
++ if (status != EFI_SUCCESS) {
++ efi_warn("Failed to allocate memory for RNG seed.\n");
++ goto err_warn;
++ }
+
+ status = efi_call_proto(rng, get_rng, &rng_algo_raw,
+- EFI_RANDOM_SEED_SIZE, seed->bits);
++ EFI_RANDOM_SEED_SIZE, seed->bits);
+
+ if (status == EFI_UNSUPPORTED)
+ /*
+@@ -95,14 +116,28 @@ efi_status_t efi_random_get_seed(void)
+ if (status != EFI_SUCCESS)
+ goto err_freepool;
+
+- seed->size = EFI_RANDOM_SEED_SIZE;
++ seed->size = seed_size;
++ if (prev_seed_size)
++ memcpy(seed->bits + EFI_RANDOM_SEED_SIZE, prev_seed->bits,
++ prev_seed_size);
++
+ status = efi_bs_call(install_configuration_table, &rng_table_guid, seed);
+ if (status != EFI_SUCCESS)
+ goto err_freepool;
+
++ if (prev_seed_size) {
++ /* wipe and free the old seed if we managed to install the new one */
++ memzero_explicit(prev_seed->bits, prev_seed_size);
++ efi_bs_call(free_pool, prev_seed);
++ }
+ return EFI_SUCCESS;
+
+ err_freepool:
++ memzero_explicit(seed, struct_size(seed, bits, seed_size));
+ efi_bs_call(free_pool, seed);
++ efi_warn("Failed to obtain seed from EFI_RNG_PROTOCOL\n");
++err_warn:
++ if (prev_seed)
++ efi_warn("Retaining bootloader-supplied seed only");
+ return status;
+ }
+diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c
+index 380e4e2513994..9c460843442f5 100644
+--- a/drivers/firmware/efi/libstub/riscv-stub.c
++++ b/drivers/firmware/efi/libstub/riscv-stub.c
+@@ -25,7 +25,7 @@ typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
+
+ static u32 hartid;
+
+-static u32 get_boot_hartid_from_fdt(void)
++static int get_boot_hartid_from_fdt(void)
+ {
+ const void *fdt;
+ int chosen_node, len;
+@@ -33,23 +33,26 @@ static u32 get_boot_hartid_from_fdt(void)
+
+ fdt = get_efi_config_table(DEVICE_TREE_GUID);
+ if (!fdt)
+- return U32_MAX;
++ return -EINVAL;
+
+ chosen_node = fdt_path_offset(fdt, "/chosen");
+ if (chosen_node < 0)
+- return U32_MAX;
++ return -EINVAL;
+
+ prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid", &len);
+ if (!prop || len != sizeof(u32))
+- return U32_MAX;
++ return -EINVAL;
+
+- return fdt32_to_cpu(*prop);
++ hartid = fdt32_to_cpu(*prop);
++ return 0;
+ }
+
+ efi_status_t check_platform_features(void)
+ {
+- hartid = get_boot_hartid_from_fdt();
+- if (hartid == U32_MAX) {
++ int ret;
++
++ ret = get_boot_hartid_from_fdt();
++ if (ret) {
+ efi_err("/chosen/boot-hartid missing or invalid!\n");
+ return EFI_UNSUPPORTED;
+ }
+diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
+index 8a18930f3eb69..516f4f0069bd2 100644
+--- a/drivers/firmware/efi/libstub/secureboot.c
++++ b/drivers/firmware/efi/libstub/secureboot.c
+@@ -14,7 +14,7 @@
+
+ /* SHIM variables */
+ static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
+-static const efi_char16_t shim_MokSBState_name[] = L"MokSBState";
++static const efi_char16_t shim_MokSBState_name[] = L"MokSBStateRT";
+
+ static efi_status_t get_var(efi_char16_t *name, efi_guid_t *vendor, u32 *attr,
+ unsigned long *data_size, void *data)
+@@ -43,8 +43,8 @@ enum efi_secureboot_mode efi_get_secureboot(void)
+
+ /*
+ * See if a user has put the shim into insecure mode. If so, and if the
+- * variable doesn't have the runtime attribute set, we might as well
+- * honor that.
++ * variable doesn't have the non-volatile attribute set, we might as
++ * well honor that.
+ */
+ size = sizeof(moksbstate);
+ status = get_efi_var(shim_MokSBState_name, &shim_guid,
+@@ -53,7 +53,7 @@ enum efi_secureboot_mode efi_get_secureboot(void)
+ /* If it fails, we don't care why. Default to secure */
+ if (status != EFI_SUCCESS)
+ goto secure_boot_enabled;
+- if (!(attr & EFI_VARIABLE_RUNTIME_ACCESS) && moksbstate == 1)
++ if (!(attr & EFI_VARIABLE_NON_VOLATILE) && moksbstate == 1)
+ return efi_secureboot_mode_disabled;
+
+ secure_boot_enabled:
+diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
+index f14c4ff5839f9..1cb4466e3c108 100644
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -60,7 +60,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
+ rom->data.type = SETUP_PCI;
+ rom->data.len = size - sizeof(struct setup_data);
+ rom->data.next = 0;
+- rom->pcilen = pci->romsize;
++ rom->pcilen = romsize;
+ *__rom = rom;
+
+ status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
+@@ -414,6 +414,13 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
+ hdr->ramdisk_image = 0;
+ hdr->ramdisk_size = 0;
+
++ /*
++ * Disregard any setup data that was provided by the bootloader:
++ * setup_data could be pointing anywhere, and we have no way of
++ * authenticating or validating the payload.
++ */
++ hdr->setup_data = 0;
++
+ efi_stub_entry(handle, sys_table_arg, boot_params);
+ /* not reached */
+
+diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c
+index 0a9aba5f9ceff..f178b2984dfb2 100644
+--- a/drivers/firmware/efi/memattr.c
++++ b/drivers/firmware/efi/memattr.c
+@@ -33,7 +33,7 @@ int __init efi_memattr_init(void)
+ return -ENOMEM;
+ }
+
+- if (tbl->version > 1) {
++ if (tbl->version > 2) {
+ pr_warn("Unexpected EFI Memory Attributes table version %d\n",
+ tbl->version);
+ goto unmap;
+diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
+index f3e54f6616f02..60075e0e4943a 100644
+--- a/drivers/firmware/efi/runtime-wrappers.c
++++ b/drivers/firmware/efi/runtime-wrappers.c
+@@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work;
+ \
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \
+ pr_warn_once("EFI Runtime Services are disabled!\n"); \
++ efi_rts_work.status = EFI_DEVICE_ERROR; \
+ goto exit; \
+ } \
+ \
+diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c
+index 4c7c9dd7733f9..24d6f6e08df8b 100644
+--- a/drivers/firmware/efi/sysfb_efi.c
++++ b/drivers/firmware/efi/sysfb_efi.c
+@@ -266,6 +266,22 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
+ "Lenovo ideapad D330-10IGM"),
+ },
+ },
++ {
++ /* Lenovo IdeaPad Duet 3 10IGL5 with 1200x1920 portrait screen */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
++ "IdeaPad Duet 3 10IGL5"),
++ },
++ },
++ {
++ /* Lenovo Yoga Book X91F / X91L */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ /* Non exact match to match F + L versions */
++ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
++ },
++ },
+ {},
+ };
+
+@@ -335,7 +351,7 @@ static const struct fwnode_operations efifb_fwnode_ops = {
+ #ifdef CONFIG_EFI
+ static struct fwnode_handle efifb_fwnode;
+
+-__init void sysfb_apply_efi_quirks(struct platform_device *pd)
++__init void sysfb_apply_efi_quirks(void)
+ {
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
+ !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
+@@ -349,7 +365,10 @@ __init void sysfb_apply_efi_quirks(struct platform_device *pd)
+ screen_info.lfb_height = temp;
+ screen_info.lfb_linelength = 4 * screen_info.lfb_width;
+ }
++}
+
++__init void sysfb_set_efifb_fwnode(struct platform_device *pd)
++{
+ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI && IS_ENABLED(CONFIG_PCI)) {
+ fwnode_init(&efifb_fwnode, &efifb_fwnode_ops);
+ pd->dev.fwnode = &efifb_fwnode;
+diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c
+index 8f665678e9e39..e8d69bd548f3f 100644
+--- a/drivers/firmware/efi/tpm.c
++++ b/drivers/firmware/efi/tpm.c
+@@ -97,7 +97,7 @@ int __init efi_tpm_eventlog_init(void)
+ goto out_calc;
+ }
+
+- memblock_reserve((unsigned long)final_tbl,
++ memblock_reserve(efi.tpm_final_log,
+ tbl_size + sizeof(*final_tbl));
+ efi_tpm_final_log_size = tbl_size;
+
+diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
+index abdc8a6a39631..cae590bd08f27 100644
+--- a/drivers/firmware/efi/vars.c
++++ b/drivers/firmware/efi/vars.c
+@@ -742,6 +742,7 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
+ {
+ const struct efivar_operations *ops;
+ efi_status_t status;
++ unsigned long varsize;
+
+ if (!__efivars)
+ return -EINVAL;
+@@ -764,15 +765,17 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
+ return efivar_entry_set_nonblocking(name, vendor, attributes,
+ size, data);
+
++ varsize = size + ucs2_strsize(name, 1024);
+ if (!block) {
+ if (down_trylock(&efivars_lock))
+ return -EBUSY;
++ status = check_var_size_nonblocking(attributes, varsize);
+ } else {
+ if (down_interruptible(&efivars_lock))
+ return -EINTR;
++ status = check_var_size(attributes, varsize);
+ }
+
+- status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
+ if (status != EFI_SUCCESS) {
+ up(&efivars_lock);
+ return -ENOSPC;
+diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
+index 97968aece54f8..983e07dc022ed 100644
+--- a/drivers/firmware/google/Kconfig
++++ b/drivers/firmware/google/Kconfig
+@@ -3,9 +3,9 @@ menuconfig GOOGLE_FIRMWARE
+ bool "Google Firmware Drivers"
+ default n
+ help
+- These firmware drivers are used by Google's servers. They are
+- only useful if you are working directly on one of their
+- proprietary servers. If in doubt, say "N".
++ These firmware drivers are used by Google servers,
++ Chromebooks and other devices using coreboot firmware.
++ If in doubt, say "N".
+
+ if GOOGLE_FIRMWARE
+
+@@ -21,7 +21,7 @@ config GOOGLE_SMI
+
+ config GOOGLE_COREBOOT_TABLE
+ tristate "Coreboot Table Access"
+- depends on ACPI || OF
++ depends on HAS_IOMEM && (ACPI || OF)
+ help
+ This option enables the coreboot_table module, which provides other
+ firmware modules access to the coreboot table. The coreboot table
+diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
+index c52bcaa9def60..f3694d3478019 100644
+--- a/drivers/firmware/google/coreboot_table.c
++++ b/drivers/firmware/google/coreboot_table.c
+@@ -93,7 +93,12 @@ static int coreboot_table_populate(struct device *dev, void *ptr)
+ for (i = 0; i < header->table_entries; i++) {
+ entry = ptr_entry;
+
+- device = kzalloc(sizeof(struct device) + entry->size, GFP_KERNEL);
++ if (entry->size < sizeof(*entry)) {
++ dev_warn(dev, "coreboot table entry too small!\n");
++ return -EINVAL;
++ }
++
++ device = kzalloc(sizeof(device->dev) + entry->size, GFP_KERNEL);
+ if (!device)
+ return -ENOMEM;
+
+@@ -101,7 +106,7 @@ static int coreboot_table_populate(struct device *dev, void *ptr)
+ device->dev.parent = dev;
+ device->dev.bus = &coreboot_bus_type;
+ device->dev.release = coreboot_device_release;
+- memcpy(&device->entry, ptr_entry, entry->size);
++ memcpy(device->raw, ptr_entry, entry->size);
+
+ ret = device_register(&device->dev);
+ if (ret) {
+@@ -149,12 +154,8 @@ static int coreboot_table_probe(struct platform_device *pdev)
+ if (!ptr)
+ return -ENOMEM;
+
+- ret = bus_register(&coreboot_bus_type);
+- if (!ret) {
+- ret = coreboot_table_populate(dev, ptr);
+- if (ret)
+- bus_unregister(&coreboot_bus_type);
+- }
++ ret = coreboot_table_populate(dev, ptr);
++
+ memunmap(ptr);
+
+ return ret;
+@@ -169,7 +170,6 @@ static int __cb_dev_unregister(struct device *dev, void *dummy)
+ static int coreboot_table_remove(struct platform_device *pdev)
+ {
+ bus_for_each_dev(&coreboot_bus_type, NULL, NULL, __cb_dev_unregister);
+- bus_unregister(&coreboot_bus_type);
+ return 0;
+ }
+
+@@ -199,6 +199,32 @@ static struct platform_driver coreboot_table_driver = {
+ .of_match_table = of_match_ptr(coreboot_of_match),
+ },
+ };
+-module_platform_driver(coreboot_table_driver);
++
++static int __init coreboot_table_driver_init(void)
++{
++ int ret;
++
++ ret = bus_register(&coreboot_bus_type);
++ if (ret)
++ return ret;
++
++ ret = platform_driver_register(&coreboot_table_driver);
++ if (ret) {
++ bus_unregister(&coreboot_bus_type);
++ return ret;
++ }
++
++ return 0;
++}
++
++static void __exit coreboot_table_driver_exit(void)
++{
++ platform_driver_unregister(&coreboot_table_driver);
++ bus_unregister(&coreboot_bus_type);
++}
++
++module_init(coreboot_table_driver_init);
++module_exit(coreboot_table_driver_exit);
++
+ MODULE_AUTHOR("Google, Inc.");
+ MODULE_LICENSE("GPL");
+diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h
+index beb778674acdc..4a89277b99a39 100644
+--- a/drivers/firmware/google/coreboot_table.h
++++ b/drivers/firmware/google/coreboot_table.h
+@@ -66,6 +66,7 @@ struct coreboot_device {
+ struct coreboot_table_entry entry;
+ struct lb_cbmem_ref cbmem_ref;
+ struct lb_framebuffer framebuffer;
++ DECLARE_FLEX_ARRAY(u8, raw);
+ };
+ };
+
+diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c
+index c6dcc1ef93acf..c323a818805cc 100644
+--- a/drivers/firmware/google/framebuffer-coreboot.c
++++ b/drivers/firmware/google/framebuffer-coreboot.c
+@@ -43,9 +43,7 @@ static int framebuffer_probe(struct coreboot_device *dev)
+ fb->green_mask_pos == formats[i].green.offset &&
+ fb->green_mask_size == formats[i].green.length &&
+ fb->blue_mask_pos == formats[i].blue.offset &&
+- fb->blue_mask_size == formats[i].blue.length &&
+- fb->reserved_mask_pos == formats[i].transp.offset &&
+- fb->reserved_mask_size == formats[i].transp.length)
++ fb->blue_mask_size == formats[i].blue.length)
+ pdata.format = formats[i].name;
+ }
+ if (!pdata.format)
+diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
+index adaa492c3d2df..871bedf533a80 100644
+--- a/drivers/firmware/google/gsmi.c
++++ b/drivers/firmware/google/gsmi.c
+@@ -361,9 +361,10 @@ static efi_status_t gsmi_get_variable(efi_char16_t *name,
+ memcpy(data, gsmi_dev.data_buf->start, *data_size);
+
+ /* All variables are have the following attributes */
+- *attr = EFI_VARIABLE_NON_VOLATILE |
+- EFI_VARIABLE_BOOTSERVICE_ACCESS |
+- EFI_VARIABLE_RUNTIME_ACCESS;
++ if (attr)
++ *attr = EFI_VARIABLE_NON_VOLATILE |
++ EFI_VARIABLE_BOOTSERVICE_ACCESS |
++ EFI_VARIABLE_RUNTIME_ACCESS;
+ }
+
+ spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+@@ -681,6 +682,15 @@ static struct notifier_block gsmi_die_notifier = {
+ static int gsmi_panic_callback(struct notifier_block *nb,
+ unsigned long reason, void *arg)
+ {
++
++ /*
++ * Panic callbacks are executed with all other CPUs stopped,
++ * so we must not attempt to spin waiting for gsmi_dev.lock
++ * to be released.
++ */
++ if (spin_is_locked(&gsmi_dev.lock))
++ return NOTIFY_DONE;
++
+ gsmi_shutdown_reason(GSMI_SHUTDOWN_PANIC);
+ return NOTIFY_DONE;
+ }
+diff --git a/drivers/firmware/meson/meson_sm.c b/drivers/firmware/meson/meson_sm.c
+index 77aa5c6398aa6..d081a6312627b 100644
+--- a/drivers/firmware/meson/meson_sm.c
++++ b/drivers/firmware/meson/meson_sm.c
+@@ -292,6 +292,8 @@ static int __init meson_sm_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ chip = of_match_device(meson_sm_ids, dev)->data;
++ if (!chip)
++ return -EINVAL;
+
+ if (chip->cmd_shmem_in_base) {
+ fw->sm_shmem_in_base = meson_sm_map_shmem(chip->cmd_shmem_in_base,
+diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c
+index 9a369a2eda71d..116eb465cdb42 100644
+--- a/drivers/firmware/psci/psci_checker.c
++++ b/drivers/firmware/psci/psci_checker.c
+@@ -155,7 +155,7 @@ static int alloc_init_cpu_groups(cpumask_var_t **pcpu_groups)
+ if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
+ return -ENOMEM;
+
+- cpu_groups = kcalloc(nb_available_cpus, sizeof(cpu_groups),
++ cpu_groups = kcalloc(nb_available_cpus, sizeof(*cpu_groups),
+ GFP_KERNEL);
+ if (!cpu_groups) {
+ free_cpumask_var(tmp);
+diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
+index 2ee97bab74409..18e1a4b80401c 100644
+--- a/drivers/firmware/qcom_scm.c
++++ b/drivers/firmware/qcom_scm.c
+@@ -252,7 +252,7 @@ static bool __qcom_scm_is_call_available(struct device *dev, u32 svc_id,
+ break;
+ default:
+ pr_err("Unknown SMC convention being used\n");
+- return -EINVAL;
++ return false;
+ }
+
+ ret = qcom_scm_call(dev, &desc, &res);
+@@ -749,12 +749,6 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare)
+ };
+ int ret;
+
+- desc.args[0] = addr;
+- desc.args[1] = size;
+- desc.args[2] = spare;
+- desc.arginfo = QCOM_SCM_ARGS(3, QCOM_SCM_RW, QCOM_SCM_VAL,
+- QCOM_SCM_VAL);
+-
+ ret = qcom_scm_call(__scm->dev, &desc, NULL);
+
+ /* the pg table has been initialized already, ignore the error */
+@@ -1326,8 +1320,7 @@ static int qcom_scm_probe(struct platform_device *pdev)
+ static void qcom_scm_shutdown(struct platform_device *pdev)
+ {
+ /* Clean shutdown, disable download mode to allow normal restart */
+- if (download_mode)
+- qcom_scm_set_download_mode(false);
++ qcom_scm_set_download_mode(false);
+ }
+
+ static const struct of_device_id qcom_scm_dt_match[] = {
+diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
+index 172c751a4f6c2..f08e056ed0ae4 100644
+--- a/drivers/firmware/qemu_fw_cfg.c
++++ b/drivers/firmware/qemu_fw_cfg.c
+@@ -388,9 +388,7 @@ static void fw_cfg_sysfs_cache_cleanup(void)
+ struct fw_cfg_sysfs_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) {
+- /* will end up invoking fw_cfg_sysfs_cache_delist()
+- * via each object's release() method (i.e. destructor)
+- */
++ fw_cfg_sysfs_cache_delist(entry);
+ kobject_put(&entry->kobj);
+ }
+ }
+@@ -448,7 +446,6 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj)
+ {
+ struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
+
+- fw_cfg_sysfs_cache_delist(entry);
+ kfree(entry);
+ }
+
+@@ -601,20 +598,18 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
+ /* set file entry information */
+ entry->size = be32_to_cpu(f->size);
+ entry->select = be16_to_cpu(f->select);
+- memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
++ strscpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
+
+ /* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
+ err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
+ fw_cfg_sel_ko, "%d", entry->select);
+- if (err) {
+- kobject_put(&entry->kobj);
+- return err;
+- }
++ if (err)
++ goto err_put_entry;
+
+ /* add raw binary content access */
+ err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw);
+ if (err)
+- goto err_add_raw;
++ goto err_del_entry;
+
+ /* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
+ fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
+@@ -623,9 +618,10 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
+ fw_cfg_sysfs_cache_enlist(entry);
+ return 0;
+
+-err_add_raw:
++err_del_entry:
+ kobject_del(&entry->kobj);
+- kfree(entry);
++err_put_entry:
++ kobject_put(&entry->kobj);
+ return err;
+ }
+
+diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c
+index 4b8978b254f9a..dba315f675bc7 100644
+--- a/drivers/firmware/raspberrypi.c
++++ b/drivers/firmware/raspberrypi.c
+@@ -272,6 +272,7 @@ static int rpi_firmware_probe(struct platform_device *pdev)
+ int ret = PTR_ERR(fw->chan);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to get mbox channel: %d\n", ret);
++ kfree(fw);
+ return ret;
+ }
+
+diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c
+index 51201600d789b..800673910b511 100644
+--- a/drivers/firmware/scpi_pm_domain.c
++++ b/drivers/firmware/scpi_pm_domain.c
+@@ -16,7 +16,6 @@ struct scpi_pm_domain {
+ struct generic_pm_domain genpd;
+ struct scpi_ops *ops;
+ u32 domain;
+- char name[30];
+ };
+
+ /*
+@@ -110,8 +109,13 @@ static int scpi_pm_domain_probe(struct platform_device *pdev)
+
+ scpi_pd->domain = i;
+ scpi_pd->ops = scpi_ops;
+- sprintf(scpi_pd->name, "%pOFn.%d", np, i);
+- scpi_pd->genpd.name = scpi_pd->name;
++ scpi_pd->genpd.name = devm_kasprintf(dev, GFP_KERNEL,
++ "%pOFn.%d", np, i);
++ if (!scpi_pd->genpd.name) {
++ dev_err(dev, "Failed to allocate genpd name:%pOFn.%d\n",
++ np, i);
++ continue;
++ }
+ scpi_pd->genpd.power_off = scpi_pd_power_off;
+ scpi_pd->genpd.power_on = scpi_pd_power_on;
+
+diff --git a/drivers/firmware/smccc/soc_id.c b/drivers/firmware/smccc/soc_id.c
+index 581aa5e9b0778..dd7c3d5e8b0bb 100644
+--- a/drivers/firmware/smccc/soc_id.c
++++ b/drivers/firmware/smccc/soc_id.c
+@@ -50,7 +50,7 @@ static int __init smccc_soc_init(void)
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_SOC_ID, &res);
+
+- if (res.a0 == SMCCC_RET_NOT_SUPPORTED) {
++ if ((int)res.a0 == SMCCC_RET_NOT_SUPPORTED) {
+ pr_info("ARCH_SOC_ID not implemented, skipping ....\n");
+ return 0;
+ }
+diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c
+index 2a7687911c097..c61d55ed71a38 100644
+--- a/drivers/firmware/stratix10-svc.c
++++ b/drivers/firmware/stratix10-svc.c
+@@ -477,7 +477,7 @@ static int svc_normal_to_secure_thread(void *data)
+ case INTEL_SIP_SMC_RSU_ERROR:
+ pr_err("%s: STATUS_ERROR\n", __func__);
+ cbdata->status = BIT(SVC_STATUS_ERROR);
+- cbdata->kaddr1 = NULL;
++ cbdata->kaddr1 = &res.a1;
+ cbdata->kaddr2 = NULL;
+ cbdata->kaddr3 = NULL;
+ pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata);
+@@ -622,8 +622,8 @@ svc_create_memory_pool(struct platform_device *pdev,
+ end = rounddown(sh_memory->addr + sh_memory->size, PAGE_SIZE);
+ paddr = begin;
+ size = end - begin;
+- va = memremap(paddr, size, MEMREMAP_WC);
+- if (!va) {
++ va = devm_memremap(dev, paddr, size, MEMREMAP_WC);
++ if (IS_ERR(va)) {
+ dev_err(dev, "fail to remap shared memory\n");
+ return ERR_PTR(-EINVAL);
+ }
+@@ -941,17 +941,17 @@ EXPORT_SYMBOL_GPL(stratix10_svc_allocate_memory);
+ void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr)
+ {
+ struct stratix10_svc_data_mem *pmem;
+- size_t size = 0;
+
+ list_for_each_entry(pmem, &svc_data_mem, node)
+ if (pmem->vaddr == kaddr) {
+- size = pmem->size;
+- break;
++ gen_pool_free(chan->ctrl->genpool,
++ (unsigned long)kaddr, pmem->size);
++ pmem->vaddr = NULL;
++ list_del(&pmem->node);
++ return;
+ }
+
+- gen_pool_free(chan->ctrl->genpool, (unsigned long)kaddr, size);
+- pmem->vaddr = NULL;
+- list_del(&pmem->node);
++ list_del(&svc_data_mem);
+ }
+ EXPORT_SYMBOL_GPL(stratix10_svc_free_memory);
+
+@@ -989,18 +989,22 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev)
+ return ret;
+
+ genpool = svc_create_memory_pool(pdev, sh_memory);
+- if (!genpool)
+- return -ENOMEM;
++ if (IS_ERR(genpool))
++ return PTR_ERR(genpool);
+
+ /* allocate service controller and supporting channel */
+ controller = devm_kzalloc(dev, sizeof(*controller), GFP_KERNEL);
+- if (!controller)
+- return -ENOMEM;
++ if (!controller) {
++ ret = -ENOMEM;
++ goto err_destroy_pool;
++ }
+
+ chans = devm_kmalloc_array(dev, SVC_NUM_CHANNEL,
+ sizeof(*chans), GFP_KERNEL | __GFP_ZERO);
+- if (!chans)
+- return -ENOMEM;
++ if (!chans) {
++ ret = -ENOMEM;
++ goto err_destroy_pool;
++ }
+
+ controller->dev = dev;
+ controller->num_chans = SVC_NUM_CHANNEL;
+@@ -1015,7 +1019,7 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev)
+ ret = kfifo_alloc(&controller->svc_fifo, fifo_size, GFP_KERNEL);
+ if (ret) {
+ dev_err(dev, "failed to allocate FIFO\n");
+- return ret;
++ goto err_destroy_pool;
+ }
+ spin_lock_init(&controller->svc_fifo_lock);
+
+@@ -1060,6 +1064,8 @@ err_put_device:
+ platform_device_put(svc->stratix10_svc_rsu);
+ err_free_kfifo:
+ kfifo_free(&controller->svc_fifo);
++err_destroy_pool:
++ gen_pool_destroy(genpool);
+ return ret;
+ }
+
+diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
+index 2bfbb05f7d896..abc3279c706d1 100644
+--- a/drivers/firmware/sysfb.c
++++ b/drivers/firmware/sysfb.c
+@@ -34,21 +34,61 @@
+ #include <linux/screen_info.h>
+ #include <linux/sysfb.h>
+
++static struct platform_device *pd;
++static DEFINE_MUTEX(disable_lock);
++static bool disabled;
++
++static bool sysfb_unregister(void)
++{
++ if (IS_ERR_OR_NULL(pd))
++ return false;
++
++ platform_device_unregister(pd);
++ pd = NULL;
++
++ return true;
++}
++
++/**
++ * sysfb_disable() - disable the Generic System Framebuffers support
++ *
++ * This disables the registration of system framebuffer devices that match the
++ * generic drivers that make use of the system framebuffer set up by firmware.
++ *
++ * It also unregisters a device if this was already registered by sysfb_init().
++ *
++ * Context: The function can sleep. A @disable_lock mutex is acquired to serialize
++ * against sysfb_init(), that registers a system framebuffer device.
++ */
++void sysfb_disable(void)
++{
++ mutex_lock(&disable_lock);
++ sysfb_unregister();
++ disabled = true;
++ mutex_unlock(&disable_lock);
++}
++EXPORT_SYMBOL_GPL(sysfb_disable);
++
+ static __init int sysfb_init(void)
+ {
+ struct screen_info *si = &screen_info;
+ struct simplefb_platform_data mode;
+- struct platform_device *pd;
+ const char *name;
+ bool compatible;
+- int ret;
++ int ret = 0;
++
++ mutex_lock(&disable_lock);
++ if (disabled)
++ goto unlock_mutex;
++
++ sysfb_apply_efi_quirks();
+
+ /* try to create a simple-framebuffer device */
+ compatible = sysfb_parse_mode(si, &mode);
+ if (compatible) {
+- ret = sysfb_create_simplefb(si, &mode);
+- if (!ret)
+- return 0;
++ pd = sysfb_create_simplefb(si, &mode);
++ if (!IS_ERR(pd))
++ goto unlock_mutex;
+ }
+
+ /* if the FB is incompatible, create a legacy framebuffer device */
+@@ -60,10 +100,12 @@ static __init int sysfb_init(void)
+ name = "platform-framebuffer";
+
+ pd = platform_device_alloc(name, 0);
+- if (!pd)
+- return -ENOMEM;
++ if (!pd) {
++ ret = -ENOMEM;
++ goto unlock_mutex;
++ }
+
+- sysfb_apply_efi_quirks(pd);
++ sysfb_set_efifb_fwnode(pd);
+
+ ret = platform_device_add_data(pd, si, sizeof(*si));
+ if (ret)
+@@ -73,9 +115,11 @@ static __init int sysfb_init(void)
+ if (ret)
+ goto err;
+
+- return 0;
++ goto unlock_mutex;
+ err:
+ platform_device_put(pd);
++unlock_mutex:
++ mutex_unlock(&disable_lock);
+ return ret;
+ }
+
+diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c
+index b86761904949c..fd4fa923088af 100644
+--- a/drivers/firmware/sysfb_simplefb.c
++++ b/drivers/firmware/sysfb_simplefb.c
+@@ -57,8 +57,8 @@ __init bool sysfb_parse_mode(const struct screen_info *si,
+ return false;
+ }
+
+-__init int sysfb_create_simplefb(const struct screen_info *si,
+- const struct simplefb_platform_data *mode)
++__init struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
++ const struct simplefb_platform_data *mode)
+ {
+ struct platform_device *pd;
+ struct resource res;
+@@ -76,7 +76,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si,
+ base |= (u64)si->ext_lfb_base << 32;
+ if (!base || (u64)(resource_size_t)base != base) {
+ printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n");
+- return -EINVAL;
++ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+@@ -93,7 +93,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si,
+ length = mode->height * mode->stride;
+ if (length > size) {
+ printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
+- return -EINVAL;
++ return ERR_PTR(-EINVAL);
+ }
+ length = PAGE_ALIGN(length);
+
+@@ -104,21 +104,30 @@ __init int sysfb_create_simplefb(const struct screen_info *si,
+ res.start = base;
+ res.end = res.start + length - 1;
+ if (res.end <= res.start)
+- return -EINVAL;
++ return ERR_PTR(-EINVAL);
+
+ pd = platform_device_alloc("simple-framebuffer", 0);
+ if (!pd)
+- return -ENOMEM;
++ return ERR_PTR(-ENOMEM);
+
+- sysfb_apply_efi_quirks(pd);
++ sysfb_set_efifb_fwnode(pd);
+
+ ret = platform_device_add_resources(pd, &res, 1);
+ if (ret)
+- return ret;
++ goto err_put_device;
+
+ ret = platform_device_add_data(pd, mode, sizeof(*mode));
+ if (ret)
+- return ret;
++ goto err_put_device;
+
+- return platform_device_add(pd);
++ ret = platform_device_add(pd);
++ if (ret)
++ goto err_put_device;
++
++ return pd;
++
++err_put_device:
++ platform_device_put(pd);
++
++ return ERR_PTR(ret);
+ }
+diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c
+index 3e9fa4b543588..1ed881a567d5c 100644
+--- a/drivers/firmware/tegra/bpmp-debugfs.c
++++ b/drivers/firmware/tegra/bpmp-debugfs.c
+@@ -465,7 +465,7 @@ static int bpmp_populate_debugfs_inband(struct tegra_bpmp *bpmp,
+ mode |= attrs & DEBUGFS_S_IWUSR ? 0200 : 0;
+ dentry = debugfs_create_file(name, mode, parent, bpmp,
+ &bpmp_debug_fops);
+- if (!dentry) {
++ if (IS_ERR(dentry)) {
+ err = -ENOMEM;
+ goto out;
+ }
+@@ -716,7 +716,7 @@ static int bpmp_populate_dir(struct tegra_bpmp *bpmp, struct seqbuf *seqbuf,
+
+ if (t & DEBUGFS_S_ISDIR) {
+ dentry = debugfs_create_dir(name, parent);
+- if (!dentry)
++ if (IS_ERR(dentry))
+ return -ENOMEM;
+ err = bpmp_populate_dir(bpmp, seqbuf, dentry, depth+1);
+ if (err < 0)
+@@ -729,7 +729,7 @@ static int bpmp_populate_dir(struct tegra_bpmp *bpmp, struct seqbuf *seqbuf,
+ dentry = debugfs_create_file(name, mode,
+ parent, bpmp,
+ &debugfs_fops);
+- if (!dentry)
++ if (IS_ERR(dentry))
+ return -ENOMEM;
+ }
+ }
+@@ -779,11 +779,11 @@ int tegra_bpmp_init_debugfs(struct tegra_bpmp *bpmp)
+ return 0;
+
+ root = debugfs_create_dir("bpmp", NULL);
+- if (!root)
++ if (IS_ERR(root))
+ return -ENOMEM;
+
+ bpmp->debugfs_mirror = debugfs_create_dir("debug", root);
+- if (!bpmp->debugfs_mirror) {
++ if (IS_ERR(bpmp->debugfs_mirror)) {
+ err = -ENOMEM;
+ goto out;
+ }
+diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
+index 5654c5e9862b1..037db21de510c 100644
+--- a/drivers/firmware/tegra/bpmp.c
++++ b/drivers/firmware/tegra/bpmp.c
+@@ -201,7 +201,7 @@ static ssize_t __tegra_bpmp_channel_read(struct tegra_bpmp_channel *channel,
+ int err;
+
+ if (data && size > 0)
+- memcpy(data, channel->ib->data, size);
++ memcpy_fromio(data, channel->ib->data, size);
+
+ err = tegra_bpmp_ack_response(channel);
+ if (err < 0)
+@@ -245,7 +245,7 @@ static ssize_t __tegra_bpmp_channel_write(struct tegra_bpmp_channel *channel,
+ channel->ob->flags = flags;
+
+ if (data && size > 0)
+- memcpy(channel->ob->data, data, size);
++ memcpy_toio(channel->ob->data, data, size);
+
+ return tegra_bpmp_post_request(channel);
+ }
+@@ -420,7 +420,7 @@ void tegra_bpmp_mrq_return(struct tegra_bpmp_channel *channel, int code,
+ channel->ob->code = code;
+
+ if (data && size > 0)
+- memcpy(channel->ob->data, data, size);
++ memcpy_toio(channel->ob->data, data, size);
+
+ err = tegra_bpmp_post_response(channel);
+ if (WARN_ON(err < 0))
+diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
+index a3cadbaf3cba7..0dac35406a38d 100644
+--- a/drivers/firmware/xilinx/zynqmp.c
++++ b/drivers/firmware/xilinx/zynqmp.c
+@@ -171,7 +171,7 @@ static int zynqmp_pm_feature(u32 api_id)
+ }
+
+ /* Add new entry if not present */
+- feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL);
++ feature_data = kmalloc(sizeof(*feature_data), GFP_ATOMIC);
+ if (!feature_data)
+ return -ENOMEM;
+
+diff --git a/drivers/fpga/altera-pr-ip-core.c b/drivers/fpga/altera-pr-ip-core.c
+index dfdf21ed34c4e..c24b6fb2d7c37 100644
+--- a/drivers/fpga/altera-pr-ip-core.c
++++ b/drivers/fpga/altera-pr-ip-core.c
+@@ -108,7 +108,7 @@ static int alt_pr_fpga_write(struct fpga_manager *mgr, const char *buf,
+ u32 *buffer_32 = (u32 *)buf;
+ size_t i = 0;
+
+- if (count <= 0)
++ if (!count)
+ return -EINVAL;
+
+ /* Write out the complete 32-bit chunks */
+diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
+index f86666cf2c6a8..c38143ef23c64 100644
+--- a/drivers/fpga/dfl.c
++++ b/drivers/fpga/dfl.c
+@@ -1864,7 +1864,7 @@ long dfl_feature_ioctl_set_irq(struct platform_device *pdev,
+ return -EINVAL;
+
+ fds = memdup_user((void __user *)(arg + sizeof(hdr)),
+- hdr.count * sizeof(s32));
++ array_size(hdr.count, sizeof(s32)));
+ if (IS_ERR(fds))
+ return PTR_ERR(fds);
+
+diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
+index 798f55670646c..75a24b0457243 100644
+--- a/drivers/fpga/fpga-bridge.c
++++ b/drivers/fpga/fpga-bridge.c
+@@ -115,7 +115,7 @@ static int fpga_bridge_dev_match(struct device *dev, const void *data)
+ /**
+ * fpga_bridge_get - get an exclusive reference to an fpga bridge
+ * @dev: parent device that fpga bridge was registered with
+- * @info: fpga manager info
++ * @info: fpga image specific information
+ *
+ * Given a device, get an exclusive reference to an fpga bridge.
+ *
+diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c
+index 047fd7f237069..91212bab58717 100644
+--- a/drivers/fpga/stratix10-soc.c
++++ b/drivers/fpga/stratix10-soc.c
+@@ -213,9 +213,9 @@ static int s10_ops_write_init(struct fpga_manager *mgr,
+ /* Allocate buffers from the service layer's pool. */
+ for (i = 0; i < NUM_SVC_BUFS; i++) {
+ kbuf = stratix10_svc_allocate_memory(priv->chan, SVC_BUF_SIZE);
+- if (!kbuf) {
++ if (IS_ERR(kbuf)) {
+ s10_free_buffers(mgr);
+- ret = -ENOMEM;
++ ret = PTR_ERR(kbuf);
+ goto init_done;
+ }
+
+diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
+index 59ddc9fd5bca4..92e6eebd1851e 100644
+--- a/drivers/fsi/fsi-core.c
++++ b/drivers/fsi/fsi-core.c
+@@ -1309,6 +1309,9 @@ int fsi_master_register(struct fsi_master *master)
+
+ mutex_init(&master->scan_lock);
+ master->idx = ida_simple_get(&master_ida, 0, INT_MAX, GFP_KERNEL);
++ if (master->idx < 0)
++ return master->idx;
++
+ dev_set_name(&master->dev, "fsi%d", master->idx);
+ master->dev.class = &fsi_master_class;
+
+diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c
+index 8606e55c1721c..a3645da1f1bf3 100644
+--- a/drivers/fsi/fsi-master-aspeed.c
++++ b/drivers/fsi/fsi-master-aspeed.c
+@@ -453,6 +453,8 @@ static ssize_t cfam_reset_store(struct device *dev, struct device_attribute *att
+ gpiod_set_value(aspeed->cfam_reset_gpio, 1);
+ usleep_range(900, 1000);
+ gpiod_set_value(aspeed->cfam_reset_gpio, 0);
++ usleep_range(900, 1000);
++ opb_writel(aspeed, ctrl_base + FSI_MRESP0, cpu_to_be32(FSI_MRESP_RST_ALL_MASTER));
+ mutex_unlock(&aspeed->lock);
+
+ return count;
+@@ -542,25 +544,28 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev)
+ return rc;
+ }
+
+- aspeed = devm_kzalloc(&pdev->dev, sizeof(*aspeed), GFP_KERNEL);
++ aspeed = kzalloc(sizeof(*aspeed), GFP_KERNEL);
+ if (!aspeed)
+ return -ENOMEM;
+
+ aspeed->dev = &pdev->dev;
+
+ aspeed->base = devm_platform_ioremap_resource(pdev, 0);
+- if (IS_ERR(aspeed->base))
+- return PTR_ERR(aspeed->base);
++ if (IS_ERR(aspeed->base)) {
++ rc = PTR_ERR(aspeed->base);
++ goto err_free_aspeed;
++ }
+
+ aspeed->clk = devm_clk_get(aspeed->dev, NULL);
+ if (IS_ERR(aspeed->clk)) {
+ dev_err(aspeed->dev, "couldn't get clock\n");
+- return PTR_ERR(aspeed->clk);
++ rc = PTR_ERR(aspeed->clk);
++ goto err_free_aspeed;
+ }
+ rc = clk_prepare_enable(aspeed->clk);
+ if (rc) {
+ dev_err(aspeed->dev, "couldn't enable clock\n");
+- return rc;
++ goto err_free_aspeed;
+ }
+
+ rc = setup_cfam_reset(aspeed);
+@@ -595,7 +600,7 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev)
+ rc = opb_readl(aspeed, ctrl_base + FSI_MVER, &raw);
+ if (rc) {
+ dev_err(&pdev->dev, "failed to read hub version\n");
+- return rc;
++ goto err_release;
+ }
+
+ reg = be32_to_cpu(raw);
+@@ -634,6 +639,8 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev)
+
+ err_release:
+ clk_disable_unprepare(aspeed->clk);
++err_free_aspeed:
++ kfree(aspeed);
+ return rc;
+ }
+
+diff --git a/drivers/fsi/fsi-master-ast-cf.c b/drivers/fsi/fsi-master-ast-cf.c
+index 24292acdbaf84..a9abebb07f35a 100644
+--- a/drivers/fsi/fsi-master-ast-cf.c
++++ b/drivers/fsi/fsi-master-ast-cf.c
+@@ -1439,3 +1439,4 @@ static struct platform_driver fsi_master_acf = {
+
+ module_platform_driver(fsi_master_acf);
+ MODULE_LICENSE("GPL");
++MODULE_FIRMWARE(FW_FILE_NAME);
+diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c
+index b223f0ef337b9..ecf738411fe22 100644
+--- a/drivers/fsi/fsi-occ.c
++++ b/drivers/fsi/fsi-occ.c
+@@ -50,6 +50,7 @@ struct occ {
+ struct device *sbefifo;
+ char name[32];
+ int idx;
++ u8 sequence_number;
+ enum versions version;
+ struct miscdevice mdev;
+ struct mutex occ_lock;
+@@ -141,8 +142,7 @@ static ssize_t occ_write(struct file *file, const char __user *buf,
+ {
+ struct occ_client *client = file->private_data;
+ size_t rlen, data_length;
+- u16 checksum = 0;
+- ssize_t rc, i;
++ ssize_t rc;
+ u8 *cmd;
+
+ if (!client)
+@@ -156,9 +156,6 @@ static ssize_t occ_write(struct file *file, const char __user *buf,
+ /* Construct the command */
+ cmd = client->buffer;
+
+- /* Sequence number (we could increment and compare with response) */
+- cmd[0] = 1;
+-
+ /*
+ * Copy the user command (assume user data follows the occ command
+ * format)
+@@ -178,14 +175,7 @@ static ssize_t occ_write(struct file *file, const char __user *buf,
+ goto done;
+ }
+
+- /* Calculate checksum */
+- for (i = 0; i < data_length + 4; ++i)
+- checksum += cmd[i];
+-
+- cmd[data_length + 4] = checksum >> 8;
+- cmd[data_length + 5] = checksum & 0xFF;
+-
+- /* Submit command */
++ /* Submit command; 4 bytes before the data and 2 bytes after */
+ rlen = PAGE_SIZE;
+ rc = fsi_occ_submit(client->occ->dev, cmd, data_length + 6, cmd,
+ &rlen);
+@@ -314,11 +304,13 @@ free:
+ return rc;
+ }
+
+-static int occ_putsram(struct occ *occ, const void *data, ssize_t len)
++static int occ_putsram(struct occ *occ, const void *data, ssize_t len,
++ u8 seq_no, u16 checksum)
+ {
+ size_t cmd_len, buf_len, resp_len, resp_data_len;
+ u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */
+ __be32 *buf;
++ u8 *byte_buf;
+ int idx = 0, rc;
+
+ cmd_len = (occ->version == occ_p10) ? 6 : 5;
+@@ -358,6 +350,15 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len)
+ buf[4 + idx] = cpu_to_be32(data_len);
+ memcpy(&buf[5 + idx], data, len);
+
++ byte_buf = (u8 *)&buf[5 + idx];
++ /*
++ * Overwrite the first byte with our sequence number and the last two
++ * bytes with the checksum.
++ */
++ byte_buf[0] = seq_no;
++ byte_buf[len - 2] = checksum >> 8;
++ byte_buf[len - 1] = checksum & 0xff;
++
+ rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len);
+ if (rc)
+ goto free;
+@@ -467,9 +468,12 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len,
+ struct occ *occ = dev_get_drvdata(dev);
+ struct occ_response *resp = response;
+ u8 seq_no;
++ u16 checksum = 0;
+ u16 resp_data_length;
++ const u8 *byte_request = (const u8 *)request;
+ unsigned long start;
+ int rc;
++ size_t i;
+
+ if (!occ)
+ return -ENODEV;
+@@ -479,11 +483,26 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len,
+ return -EINVAL;
+ }
+
++ /* Checksum the request, ignoring first byte (sequence number). */
++ for (i = 1; i < req_len - 2; ++i)
++ checksum += byte_request[i];
++
+ mutex_lock(&occ->occ_lock);
+
+- /* Extract the seq_no from the command (first byte) */
+- seq_no = *(const u8 *)request;
+- rc = occ_putsram(occ, request, req_len);
++ /*
++ * Get a sequence number and update the counter. Avoid a sequence
++ * number of 0 which would pass the response check below even if the
++ * OCC response is uninitialized. Any sequence number the user is
++ * trying to send is overwritten since this function is the only common
++ * interface to the OCC and therefore the only place we can guarantee
++ * unique sequence numbers.
++ */
++ seq_no = occ->sequence_number++;
++ if (!occ->sequence_number)
++ occ->sequence_number = 1;
++ checksum += seq_no;
++
++ rc = occ_putsram(occ, request, req_len, seq_no, checksum);
+ if (rc)
+ goto done;
+
+@@ -574,6 +593,7 @@ static int occ_probe(struct platform_device *pdev)
+ occ->version = (uintptr_t)of_device_get_match_data(dev);
+ occ->dev = dev;
+ occ->sbefifo = dev->parent;
++ occ->sequence_number = 1;
+ mutex_init(&occ->occ_lock);
+
+ if (dev->of_node) {
+diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c
+index 84cb965bfed5c..97045a8d94224 100644
+--- a/drivers/fsi/fsi-sbefifo.c
++++ b/drivers/fsi/fsi-sbefifo.c
+@@ -640,7 +640,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
+ }
+ ffdc_iov.iov_base = ffdc;
+ ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;
+- iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
++ iov_iter_kvec(&ffdc_iter, READ, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+ cmd[0] = cpu_to_be32(2);
+ cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
+ rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
+@@ -737,7 +737,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
+ rbytes = (*resp_len) * sizeof(__be32);
+ resp_iov.iov_base = response;
+ resp_iov.iov_len = rbytes;
+- iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
++ iov_iter_kvec(&resp_iter, READ, &resp_iov, 1, rbytes);
+
+ /* Perform the command */
+ mutex_lock(&sbefifo->lock);
+@@ -817,7 +817,7 @@ static ssize_t sbefifo_user_read(struct file *file, char __user *buf,
+ /* Prepare iov iterator */
+ resp_iov.iov_base = buf;
+ resp_iov.iov_len = len;
+- iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len);
++ iov_iter_init(&resp_iter, READ, &resp_iov, 1, len);
+
+ /* Perform the command */
+ mutex_lock(&sbefifo->lock);
+diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c
+index da1486bb6a144..bcb756dc98663 100644
+--- a/drivers/fsi/fsi-scom.c
++++ b/drivers/fsi/fsi-scom.c
+@@ -145,7 +145,7 @@ static int put_indirect_scom_form0(struct scom_device *scom, uint64_t value,
+ uint64_t addr, uint32_t *status)
+ {
+ uint64_t ind_data, ind_addr;
+- int rc, retries, err = 0;
++ int rc, err;
+
+ if (value & ~XSCOM_DATA_IND_DATA)
+ return -EINVAL;
+@@ -156,19 +156,14 @@ static int put_indirect_scom_form0(struct scom_device *scom, uint64_t value,
+ if (rc || (*status & SCOM_STATUS_ANY_ERR))
+ return rc;
+
+- for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) {
+- rc = __get_scom(scom, &ind_data, addr, status);
+- if (rc || (*status & SCOM_STATUS_ANY_ERR))
+- return rc;
++ rc = __get_scom(scom, &ind_data, addr, status);
++ if (rc || (*status & SCOM_STATUS_ANY_ERR))
++ return rc;
+
+- err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT;
+- *status = err << SCOM_STATUS_PIB_RESP_SHIFT;
+- if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED))
+- return 0;
++ err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT;
++ *status = err << SCOM_STATUS_PIB_RESP_SHIFT;
+
+- msleep(1);
+- }
+- return rc;
++ return 0;
+ }
+
+ static int put_indirect_scom_form1(struct scom_device *scom, uint64_t value,
+@@ -188,7 +183,7 @@ static int get_indirect_scom_form0(struct scom_device *scom, uint64_t *value,
+ uint64_t addr, uint32_t *status)
+ {
+ uint64_t ind_data, ind_addr;
+- int rc, retries, err = 0;
++ int rc, err;
+
+ ind_addr = addr & XSCOM_ADDR_DIRECT_PART;
+ ind_data = (addr & XSCOM_ADDR_INDIRECT_PART) | XSCOM_DATA_IND_READ;
+@@ -196,21 +191,15 @@ static int get_indirect_scom_form0(struct scom_device *scom, uint64_t *value,
+ if (rc || (*status & SCOM_STATUS_ANY_ERR))
+ return rc;
+
+- for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) {
+- rc = __get_scom(scom, &ind_data, addr, status);
+- if (rc || (*status & SCOM_STATUS_ANY_ERR))
+- return rc;
+-
+- err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT;
+- *status = err << SCOM_STATUS_PIB_RESP_SHIFT;
+- *value = ind_data & XSCOM_DATA_IND_DATA;
++ rc = __get_scom(scom, &ind_data, addr, status);
++ if (rc || (*status & SCOM_STATUS_ANY_ERR))
++ return rc;
+
+- if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED))
+- return 0;
++ err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT;
++ *status = err << SCOM_STATUS_PIB_RESP_SHIFT;
++ *value = ind_data & XSCOM_DATA_IND_DATA;
+
+- msleep(1);
+- }
+- return rc;
++ return 0;
+ }
+
+ static int raw_put_scom(struct scom_device *scom, uint64_t value,
+@@ -289,7 +278,7 @@ static int put_scom(struct scom_device *scom, uint64_t value,
+ int rc;
+
+ rc = raw_put_scom(scom, value, addr, &status);
+- if (rc == -ENODEV)
++ if (rc)
+ return rc;
+
+ rc = handle_fsi2pib_status(scom, status);
+@@ -308,7 +297,7 @@ static int get_scom(struct scom_device *scom, uint64_t *value,
+ int rc;
+
+ rc = raw_get_scom(scom, value, addr, &status);
+- if (rc == -ENODEV)
++ if (rc)
+ return rc;
+
+ rc = handle_fsi2pib_status(scom, status);
+diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
+index fae5141251e5d..7b9def6b10047 100644
+--- a/drivers/gpio/Kconfig
++++ b/drivers/gpio/Kconfig
+@@ -100,7 +100,7 @@ config GPIO_GENERIC
+ tristate
+
+ config GPIO_REGMAP
+- depends on REGMAP
++ select REGMAP
+ tristate
+
+ # put drivers in the right section, in alphabetical order
+@@ -523,6 +523,7 @@ config GPIO_REG
+ config GPIO_ROCKCHIP
+ tristate "Rockchip GPIO support"
+ depends on ARCH_ROCKCHIP || COMPILE_TEST
++ select GENERIC_IRQ_CHIP
+ select GPIOLIB_IRQCHIP
+ default ARCH_ROCKCHIP
+ help
+diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c
+index 34e35b64dcdc0..23047dc84ef1b 100644
+--- a/drivers/gpio/gpio-aggregator.c
++++ b/drivers/gpio/gpio-aggregator.c
+@@ -273,7 +273,8 @@ static int gpio_fwd_get(struct gpio_chip *chip, unsigned int offset)
+ {
+ struct gpiochip_fwd *fwd = gpiochip_get_data(chip);
+
+- return gpiod_get_value(fwd->descs[offset]);
++ return chip->can_sleep ? gpiod_get_value_cansleep(fwd->descs[offset])
++ : gpiod_get_value(fwd->descs[offset]);
+ }
+
+ static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
+@@ -292,7 +293,10 @@ static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
+ for_each_set_bit(i, mask, fwd->chip.ngpio)
+ descs[j++] = fwd->descs[i];
+
+- error = gpiod_get_array_value(j, descs, NULL, values);
++ if (fwd->chip.can_sleep)
++ error = gpiod_get_array_value_cansleep(j, descs, NULL, values);
++ else
++ error = gpiod_get_array_value(j, descs, NULL, values);
+ if (error)
+ return error;
+
+@@ -327,7 +331,10 @@ static void gpio_fwd_set(struct gpio_chip *chip, unsigned int offset, int value)
+ {
+ struct gpiochip_fwd *fwd = gpiochip_get_data(chip);
+
+- gpiod_set_value(fwd->descs[offset], value);
++ if (chip->can_sleep)
++ gpiod_set_value_cansleep(fwd->descs[offset], value);
++ else
++ gpiod_set_value(fwd->descs[offset], value);
+ }
+
+ static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
+@@ -346,7 +353,10 @@ static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask,
+ descs[j++] = fwd->descs[i];
+ }
+
+- gpiod_set_array_value(j, descs, NULL, values);
++ if (fwd->chip.can_sleep)
++ gpiod_set_array_value_cansleep(j, descs, NULL, values);
++ else
++ gpiod_set_array_value(j, descs, NULL, values);
+ }
+
+ static void gpio_fwd_set_multiple_locked(struct gpio_chip *chip,
+diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c
+index 14e6b3e64add5..6f3ded619c8b2 100644
+--- a/drivers/gpio/gpio-amd8111.c
++++ b/drivers/gpio/gpio-amd8111.c
+@@ -226,7 +226,10 @@ found:
+ ioport_unmap(gp.pm);
+ goto out;
+ }
++ return 0;
++
+ out:
++ pci_dev_put(pdev);
+ return err;
+ }
+
+@@ -234,6 +237,7 @@ static void __exit amd_gpio_exit(void)
+ {
+ gpiochip_remove(&gp.chip);
+ ioport_unmap(gp.pm);
++ pci_dev_put(gp.pdev);
+ }
+
+ module_init(amd_gpio_init);
+diff --git a/drivers/gpio/gpio-amdpt.c b/drivers/gpio/gpio-amdpt.c
+index 44398992ae15f..dba4836a18f80 100644
+--- a/drivers/gpio/gpio-amdpt.c
++++ b/drivers/gpio/gpio-amdpt.c
+@@ -35,19 +35,19 @@ static int pt_gpio_request(struct gpio_chip *gc, unsigned offset)
+
+ dev_dbg(gc->parent, "pt_gpio_request offset=%x\n", offset);
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ using_pins = readl(pt_gpio->reg_base + PT_SYNC_REG);
+ if (using_pins & BIT(offset)) {
+ dev_warn(gc->parent, "PT GPIO pin %x reconfigured\n",
+ offset);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ return -EINVAL;
+ }
+
+ writel(using_pins | BIT(offset), pt_gpio->reg_base + PT_SYNC_REG);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ return 0;
+ }
+@@ -58,13 +58,13 @@ static void pt_gpio_free(struct gpio_chip *gc, unsigned offset)
+ unsigned long flags;
+ u32 using_pins;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ using_pins = readl(pt_gpio->reg_base + PT_SYNC_REG);
+ using_pins &= ~BIT(offset);
+ writel(using_pins, pt_gpio->reg_base + PT_SYNC_REG);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ dev_dbg(gc->parent, "pt_gpio_free offset=%x\n", offset);
+ }
+diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c
+index 3d6ef37a7702a..454cefbeecf0e 100644
+--- a/drivers/gpio/gpio-aspeed-sgpio.c
++++ b/drivers/gpio/gpio-aspeed-sgpio.c
+@@ -31,7 +31,7 @@ struct aspeed_sgpio {
+ struct gpio_chip chip;
+ struct irq_chip intc;
+ struct clk *pclk;
+- spinlock_t lock;
++ raw_spinlock_t lock;
+ void __iomem *base;
+ int irq;
+ };
+@@ -173,12 +173,12 @@ static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset)
+ enum aspeed_sgpio_reg reg;
+ int rc = 0;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ reg = aspeed_sgpio_is_input(offset) ? reg_val : reg_rdata;
+ rc = !!(ioread32(bank_reg(gpio, bank, reg)) & GPIO_BIT(offset));
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return rc;
+ }
+@@ -215,11 +215,11 @@ static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val)
+ struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
+ unsigned long flags;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ sgpio_set_value(gc, offset, val);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ }
+
+ static int aspeed_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset)
+@@ -236,9 +236,9 @@ static int aspeed_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int v
+ /* No special action is required for setting the direction; we'll
+ * error-out in sgpio_set_value if this isn't an output GPIO */
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+ rc = sgpio_set_value(gc, offset, val);
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return rc;
+ }
+@@ -277,11 +277,11 @@ static void aspeed_sgpio_irq_ack(struct irq_data *d)
+
+ status_addr = bank_reg(gpio, bank, reg_irq_status);
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ iowrite32(bit, status_addr);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ }
+
+ static void aspeed_sgpio_irq_set_mask(struct irq_data *d, bool set)
+@@ -296,7 +296,7 @@ static void aspeed_sgpio_irq_set_mask(struct irq_data *d, bool set)
+ irqd_to_aspeed_sgpio_data(d, &gpio, &bank, &bit, &offset);
+ addr = bank_reg(gpio, bank, reg_irq_enable);
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ reg = ioread32(addr);
+ if (set)
+@@ -306,7 +306,7 @@ static void aspeed_sgpio_irq_set_mask(struct irq_data *d, bool set)
+
+ iowrite32(reg, addr);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ }
+
+ static void aspeed_sgpio_irq_mask(struct irq_data *d)
+@@ -355,7 +355,7 @@ static int aspeed_sgpio_set_type(struct irq_data *d, unsigned int type)
+ return -EINVAL;
+ }
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ addr = bank_reg(gpio, bank, reg_irq_type0);
+ reg = ioread32(addr);
+@@ -372,7 +372,7 @@ static int aspeed_sgpio_set_type(struct irq_data *d, unsigned int type)
+ reg = (reg & ~bit) | type2;
+ iowrite32(reg, addr);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ irq_set_handler_locked(d, handler);
+
+@@ -395,7 +395,7 @@ static void aspeed_sgpio_irq_handler(struct irq_desc *desc)
+ reg = ioread32(bank_reg(data, bank, reg_irq_status));
+
+ for_each_set_bit(p, &reg, 32)
+- generic_handle_domain_irq(gc->irq.domain, i * 32 + p * 2);
++ generic_handle_domain_irq(gc->irq.domain, (i * 32 + p) * 2);
+ }
+
+ chained_irq_exit(ic, desc);
+@@ -467,7 +467,7 @@ static int aspeed_sgpio_reset_tolerance(struct gpio_chip *chip,
+
+ reg = bank_reg(gpio, to_bank(offset), reg_tolerance);
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ val = readl(reg);
+
+@@ -478,7 +478,7 @@ static int aspeed_sgpio_reset_tolerance(struct gpio_chip *chip,
+
+ writel(val, reg);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return 0;
+ }
+@@ -575,7 +575,7 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev)
+ iowrite32(FIELD_PREP(ASPEED_SGPIO_CLK_DIV_MASK, sgpio_clk_div) | gpio_cnt_regval |
+ ASPEED_SGPIO_ENABLE, gpio->base + ASPEED_SGPIO_CTRL);
+
+- spin_lock_init(&gpio->lock);
++ raw_spin_lock_init(&gpio->lock);
+
+ gpio->chip.parent = &pdev->dev;
+ gpio->chip.ngpio = nr_gpios * 2;
+diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
+index 3c8f20c57695f..318a7d95a1a8b 100644
+--- a/drivers/gpio/gpio-aspeed.c
++++ b/drivers/gpio/gpio-aspeed.c
+@@ -53,7 +53,7 @@ struct aspeed_gpio_config {
+ struct aspeed_gpio {
+ struct gpio_chip chip;
+ struct irq_chip irqc;
+- spinlock_t lock;
++ raw_spinlock_t lock;
+ void __iomem *base;
+ int irq;
+ const struct aspeed_gpio_config *config;
+@@ -413,14 +413,14 @@ static void aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset,
+ unsigned long flags;
+ bool copro;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+ copro = aspeed_gpio_copro_request(gpio, offset);
+
+ __aspeed_gpio_set(gc, offset, val);
+
+ if (copro)
+ aspeed_gpio_copro_release(gpio, offset);
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ }
+
+ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
+@@ -435,7 +435,7 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
+ if (!have_input(gpio, offset))
+ return -ENOTSUPP;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ reg = ioread32(addr);
+ reg &= ~GPIO_BIT(offset);
+@@ -445,7 +445,7 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
+ if (copro)
+ aspeed_gpio_copro_release(gpio, offset);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return 0;
+ }
+@@ -463,7 +463,7 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc,
+ if (!have_output(gpio, offset))
+ return -ENOTSUPP;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ reg = ioread32(addr);
+ reg |= GPIO_BIT(offset);
+@@ -474,7 +474,7 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc,
+
+ if (copro)
+ aspeed_gpio_copro_release(gpio, offset);
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return 0;
+ }
+@@ -492,11 +492,11 @@ static int aspeed_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
+ if (!have_output(gpio, offset))
+ return GPIO_LINE_DIRECTION_IN;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ val = ioread32(bank_reg(gpio, bank, reg_dir)) & GPIO_BIT(offset);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return val ? GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN;
+ }
+@@ -539,14 +539,14 @@ static void aspeed_gpio_irq_ack(struct irq_data *d)
+
+ status_addr = bank_reg(gpio, bank, reg_irq_status);
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+ copro = aspeed_gpio_copro_request(gpio, offset);
+
+ iowrite32(bit, status_addr);
+
+ if (copro)
+ aspeed_gpio_copro_release(gpio, offset);
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ }
+
+ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
+@@ -565,7 +565,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
+
+ addr = bank_reg(gpio, bank, reg_irq_enable);
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+ copro = aspeed_gpio_copro_request(gpio, offset);
+
+ reg = ioread32(addr);
+@@ -577,7 +577,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
+
+ if (copro)
+ aspeed_gpio_copro_release(gpio, offset);
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ }
+
+ static void aspeed_gpio_irq_mask(struct irq_data *d)
+@@ -629,7 +629,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
+ return -EINVAL;
+ }
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+ copro = aspeed_gpio_copro_request(gpio, offset);
+
+ addr = bank_reg(gpio, bank, reg_irq_type0);
+@@ -649,7 +649,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
+
+ if (copro)
+ aspeed_gpio_copro_release(gpio, offset);
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ irq_set_handler_locked(d, handler);
+
+@@ -716,7 +716,7 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip,
+
+ treg = bank_reg(gpio, to_bank(offset), reg_tolerance);
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+ copro = aspeed_gpio_copro_request(gpio, offset);
+
+ val = readl(treg);
+@@ -730,7 +730,7 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip,
+
+ if (copro)
+ aspeed_gpio_copro_release(gpio, offset);
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return 0;
+ }
+@@ -856,7 +856,7 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset,
+ return rc;
+ }
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ if (timer_allocation_registered(gpio, offset)) {
+ rc = unregister_allocated_timer(gpio, offset);
+@@ -916,7 +916,7 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset,
+ configure_timer(gpio, offset, i);
+
+ out:
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return rc;
+ }
+@@ -927,13 +927,13 @@ static int disable_debounce(struct gpio_chip *chip, unsigned int offset)
+ unsigned long flags;
+ int rc;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ rc = unregister_allocated_timer(gpio, offset);
+ if (!rc)
+ configure_timer(gpio, offset, 0);
+
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+
+ return rc;
+ }
+@@ -1015,7 +1015,7 @@ int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc,
+ return -EINVAL;
+ bindex = offset >> 3;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ /* Sanity check, this shouldn't happen */
+ if (gpio->cf_copro_bankmap[bindex] == 0xff) {
+@@ -1036,7 +1036,7 @@ int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc,
+ if (bit)
+ *bit = GPIO_OFFSET(offset);
+ bail:
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ return rc;
+ }
+ EXPORT_SYMBOL_GPL(aspeed_gpio_copro_grab_gpio);
+@@ -1060,7 +1060,7 @@ int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc)
+ return -EINVAL;
+ bindex = offset >> 3;
+
+- spin_lock_irqsave(&gpio->lock, flags);
++ raw_spin_lock_irqsave(&gpio->lock, flags);
+
+ /* Sanity check, this shouldn't happen */
+ if (gpio->cf_copro_bankmap[bindex] == 0) {
+@@ -1074,7 +1074,7 @@ int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc)
+ aspeed_gpio_change_cmd_source(gpio, bank, bindex,
+ GPIO_CMDSRC_ARM);
+ bail:
+- spin_unlock_irqrestore(&gpio->lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->lock, flags);
+ return rc;
+ }
+ EXPORT_SYMBOL_GPL(aspeed_gpio_copro_release_gpio);
+@@ -1148,7 +1148,7 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
+ if (IS_ERR(gpio->base))
+ return PTR_ERR(gpio->base);
+
+- spin_lock_init(&gpio->lock);
++ raw_spin_lock_init(&gpio->lock);
+
+ gpio_id = of_match_node(aspeed_gpio_of_table, pdev->dev.of_node);
+ if (!gpio_id)
+diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
+index 895a79936248d..c5d85e931f2a9 100644
+--- a/drivers/gpio/gpio-brcmstb.c
++++ b/drivers/gpio/gpio-brcmstb.c
+@@ -92,9 +92,9 @@ brcmstb_gpio_get_active_irqs(struct brcmstb_gpio_bank *bank)
+ unsigned long status;
+ unsigned long flags;
+
+- spin_lock_irqsave(&bank->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&bank->gc.bgpio_lock, flags);
+ status = __brcmstb_gpio_get_active_irqs(bank);
+- spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags);
+
+ return status;
+ }
+@@ -114,14 +114,14 @@ static void brcmstb_gpio_set_imask(struct brcmstb_gpio_bank *bank,
+ u32 imask;
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ imask = gc->read_reg(priv->reg_base + GIO_MASK(bank->id));
+ if (enable)
+ imask |= mask;
+ else
+ imask &= ~mask;
+ gc->write_reg(priv->reg_base + GIO_MASK(bank->id), imask);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static int brcmstb_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
+@@ -204,7 +204,7 @@ static int brcmstb_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+ return -EINVAL;
+ }
+
+- spin_lock_irqsave(&bank->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&bank->gc.bgpio_lock, flags);
+
+ iedge_config = bank->gc.read_reg(priv->reg_base +
+ GIO_EC(bank->id)) & ~mask;
+@@ -220,7 +220,7 @@ static int brcmstb_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+ bank->gc.write_reg(priv->reg_base + GIO_LEVEL(bank->id),
+ ilevel | level);
+
+- spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags);
+ return 0;
+ }
+
+diff --git a/drivers/gpio/gpio-cadence.c b/drivers/gpio/gpio-cadence.c
+index 562f8f7e7d1fc..137aea49ba026 100644
+--- a/drivers/gpio/gpio-cadence.c
++++ b/drivers/gpio/gpio-cadence.c
+@@ -41,12 +41,12 @@ static int cdns_gpio_request(struct gpio_chip *chip, unsigned int offset)
+ struct cdns_gpio_chip *cgpio = gpiochip_get_data(chip);
+ unsigned long flags;
+
+- spin_lock_irqsave(&chip->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&chip->bgpio_lock, flags);
+
+ iowrite32(ioread32(cgpio->regs + CDNS_GPIO_BYPASS_MODE) & ~BIT(offset),
+ cgpio->regs + CDNS_GPIO_BYPASS_MODE);
+
+- spin_unlock_irqrestore(&chip->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&chip->bgpio_lock, flags);
+ return 0;
+ }
+
+@@ -55,13 +55,13 @@ static void cdns_gpio_free(struct gpio_chip *chip, unsigned int offset)
+ struct cdns_gpio_chip *cgpio = gpiochip_get_data(chip);
+ unsigned long flags;
+
+- spin_lock_irqsave(&chip->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&chip->bgpio_lock, flags);
+
+ iowrite32(ioread32(cgpio->regs + CDNS_GPIO_BYPASS_MODE) |
+ (BIT(offset) & cgpio->bypass_orig),
+ cgpio->regs + CDNS_GPIO_BYPASS_MODE);
+
+- spin_unlock_irqrestore(&chip->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&chip->bgpio_lock, flags);
+ }
+
+ static void cdns_gpio_irq_mask(struct irq_data *d)
+@@ -90,7 +90,7 @@ static int cdns_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+ u32 mask = BIT(d->hwirq);
+ int ret = 0;
+
+- spin_lock_irqsave(&chip->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&chip->bgpio_lock, flags);
+
+ int_value = ioread32(cgpio->regs + CDNS_GPIO_IRQ_VALUE) & ~mask;
+ int_type = ioread32(cgpio->regs + CDNS_GPIO_IRQ_TYPE) & ~mask;
+@@ -115,7 +115,7 @@ static int cdns_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+ iowrite32(int_type, cgpio->regs + CDNS_GPIO_IRQ_TYPE);
+
+ err_irq_type:
+- spin_unlock_irqrestore(&chip->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&chip->bgpio_lock, flags);
+ return ret;
+ }
+
+diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
+index cb5afaa7ed482..0214244e9f01f 100644
+--- a/drivers/gpio/gpio-davinci.c
++++ b/drivers/gpio/gpio-davinci.c
+@@ -326,7 +326,7 @@ static struct irq_chip gpio_irqchip = {
+ .irq_enable = gpio_irq_enable,
+ .irq_disable = gpio_irq_disable,
+ .irq_set_type = gpio_irq_type,
+- .flags = IRQCHIP_SET_TYPE_MASKED,
++ .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE,
+ };
+
+ static void gpio_irq_handler(struct irq_desc *desc)
+diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c
+index 026903e3ef543..08b9e2cf4f2d6 100644
+--- a/drivers/gpio/gpio-dln2.c
++++ b/drivers/gpio/gpio-dln2.c
+@@ -46,6 +46,7 @@
+ struct dln2_gpio {
+ struct platform_device *pdev;
+ struct gpio_chip gpio;
++ struct irq_chip irqchip;
+
+ /*
+ * Cache pin direction to save us one transfer, since the hardware has
+@@ -383,15 +384,6 @@ static void dln2_irq_bus_unlock(struct irq_data *irqd)
+ mutex_unlock(&dln2->irq_lock);
+ }
+
+-static struct irq_chip dln2_gpio_irqchip = {
+- .name = "dln2-irq",
+- .irq_mask = dln2_irq_mask,
+- .irq_unmask = dln2_irq_unmask,
+- .irq_set_type = dln2_irq_set_type,
+- .irq_bus_lock = dln2_irq_bus_lock,
+- .irq_bus_sync_unlock = dln2_irq_bus_unlock,
+-};
+-
+ static void dln2_gpio_event(struct platform_device *pdev, u16 echo,
+ const void *data, int len)
+ {
+@@ -473,8 +465,15 @@ static int dln2_gpio_probe(struct platform_device *pdev)
+ dln2->gpio.direction_output = dln2_gpio_direction_output;
+ dln2->gpio.set_config = dln2_gpio_set_config;
+
++ dln2->irqchip.name = "dln2-irq",
++ dln2->irqchip.irq_mask = dln2_irq_mask,
++ dln2->irqchip.irq_unmask = dln2_irq_unmask,
++ dln2->irqchip.irq_set_type = dln2_irq_set_type,
++ dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock,
++ dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock,
++
+ girq = &dln2->gpio.irq;
+- girq->chip = &dln2_gpio_irqchip;
++ girq->chip = &dln2->irqchip;
+ /* The event comes from the outside so no parent handler */
+ girq->parent_handler = NULL;
+ girq->num_parents = 0;
+diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
+index f98fa33e16790..a503f37001ebb 100644
+--- a/drivers/gpio/gpio-dwapb.c
++++ b/drivers/gpio/gpio-dwapb.c
+@@ -242,9 +242,9 @@ static void dwapb_irq_ack(struct irq_data *d)
+ u32 val = BIT(irqd_to_hwirq(d));
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ dwapb_write(gpio, GPIO_PORTA_EOI, val);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void dwapb_irq_mask(struct irq_data *d)
+@@ -254,10 +254,10 @@ static void dwapb_irq_mask(struct irq_data *d)
+ unsigned long flags;
+ u32 val;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ val = dwapb_read(gpio, GPIO_INTMASK) | BIT(irqd_to_hwirq(d));
+ dwapb_write(gpio, GPIO_INTMASK, val);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void dwapb_irq_unmask(struct irq_data *d)
+@@ -267,10 +267,10 @@ static void dwapb_irq_unmask(struct irq_data *d)
+ unsigned long flags;
+ u32 val;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ val = dwapb_read(gpio, GPIO_INTMASK) & ~BIT(irqd_to_hwirq(d));
+ dwapb_write(gpio, GPIO_INTMASK, val);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void dwapb_irq_enable(struct irq_data *d)
+@@ -280,11 +280,11 @@ static void dwapb_irq_enable(struct irq_data *d)
+ unsigned long flags;
+ u32 val;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ val = dwapb_read(gpio, GPIO_INTEN);
+ val |= BIT(irqd_to_hwirq(d));
+ dwapb_write(gpio, GPIO_INTEN, val);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void dwapb_irq_disable(struct irq_data *d)
+@@ -294,11 +294,11 @@ static void dwapb_irq_disable(struct irq_data *d)
+ unsigned long flags;
+ u32 val;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ val = dwapb_read(gpio, GPIO_INTEN);
+ val &= ~BIT(irqd_to_hwirq(d));
+ dwapb_write(gpio, GPIO_INTEN, val);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static int dwapb_irq_set_type(struct irq_data *d, u32 type)
+@@ -308,7 +308,7 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type)
+ irq_hw_number_t bit = irqd_to_hwirq(d);
+ unsigned long level, polarity, flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ level = dwapb_read(gpio, GPIO_INTTYPE_LEVEL);
+ polarity = dwapb_read(gpio, GPIO_INT_POLARITY);
+
+@@ -343,7 +343,7 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type)
+ dwapb_write(gpio, GPIO_INTTYPE_LEVEL, level);
+ if (type != IRQ_TYPE_EDGE_BOTH)
+ dwapb_write(gpio, GPIO_INT_POLARITY, polarity);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ return 0;
+ }
+@@ -373,7 +373,7 @@ static int dwapb_gpio_set_debounce(struct gpio_chip *gc,
+ unsigned long flags, val_deb;
+ unsigned long mask = BIT(offset);
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ val_deb = dwapb_read(gpio, GPIO_PORTA_DEBOUNCE);
+ if (debounce)
+@@ -382,7 +382,7 @@ static int dwapb_gpio_set_debounce(struct gpio_chip *gc,
+ val_deb &= ~mask;
+ dwapb_write(gpio, GPIO_PORTA_DEBOUNCE, val_deb);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ return 0;
+ }
+@@ -653,10 +653,9 @@ static int dwapb_get_clks(struct dwapb_gpio *gpio)
+ gpio->clks[1].id = "db";
+ err = devm_clk_bulk_get_optional(gpio->dev, DWAPB_NR_CLOCKS,
+ gpio->clks);
+- if (err) {
+- dev_err(gpio->dev, "Cannot get APB/Debounce clocks\n");
+- return err;
+- }
++ if (err)
++ return dev_err_probe(gpio->dev, err,
++ "Cannot get APB/Debounce clocks\n");
+
+ err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks);
+ if (err) {
+@@ -739,7 +738,7 @@ static int dwapb_gpio_suspend(struct device *dev)
+ unsigned long flags;
+ int i;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ for (i = 0; i < gpio->nr_ports; i++) {
+ unsigned int offset;
+ unsigned int idx = gpio->ports[i].idx;
+@@ -766,7 +765,7 @@ static int dwapb_gpio_suspend(struct device *dev)
+ dwapb_write(gpio, GPIO_INTMASK, ~ctx->wake_en);
+ }
+ }
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ clk_bulk_disable_unprepare(DWAPB_NR_CLOCKS, gpio->clks);
+
+@@ -786,7 +785,7 @@ static int dwapb_gpio_resume(struct device *dev)
+ return err;
+ }
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ for (i = 0; i < gpio->nr_ports; i++) {
+ unsigned int offset;
+ unsigned int idx = gpio->ports[i].idx;
+@@ -813,7 +812,7 @@ static int dwapb_gpio_resume(struct device *dev)
+ dwapb_write(gpio, GPIO_PORTA_EOI, 0xffffffff);
+ }
+ }
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ return 0;
+ }
+diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
+index f954359c9544e..21204a5dca3d4 100644
+--- a/drivers/gpio/gpio-grgpio.c
++++ b/drivers/gpio/gpio-grgpio.c
+@@ -145,7 +145,7 @@ static int grgpio_irq_set_type(struct irq_data *d, unsigned int type)
+ return -EINVAL;
+ }
+
+- spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
+
+ ipol = priv->gc.read_reg(priv->regs + GRGPIO_IPOL) & ~mask;
+ iedge = priv->gc.read_reg(priv->regs + GRGPIO_IEDGE) & ~mask;
+@@ -153,7 +153,7 @@ static int grgpio_irq_set_type(struct irq_data *d, unsigned int type)
+ priv->gc.write_reg(priv->regs + GRGPIO_IPOL, ipol | pol);
+ priv->gc.write_reg(priv->regs + GRGPIO_IEDGE, iedge | edge);
+
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+
+ return 0;
+ }
+@@ -164,11 +164,11 @@ static void grgpio_irq_mask(struct irq_data *d)
+ int offset = d->hwirq;
+ unsigned long flags;
+
+- spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
+
+ grgpio_set_imask(priv, offset, 0);
+
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+ }
+
+ static void grgpio_irq_unmask(struct irq_data *d)
+@@ -177,11 +177,11 @@ static void grgpio_irq_unmask(struct irq_data *d)
+ int offset = d->hwirq;
+ unsigned long flags;
+
+- spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
+
+ grgpio_set_imask(priv, offset, 1);
+
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+ }
+
+ static struct irq_chip grgpio_irq_chip = {
+@@ -199,7 +199,7 @@ static irqreturn_t grgpio_irq_handler(int irq, void *dev)
+ int i;
+ int match = 0;
+
+- spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
+
+ /*
+ * For each gpio line, call its interrupt handler if it its underlying
+@@ -215,7 +215,7 @@ static irqreturn_t grgpio_irq_handler(int irq, void *dev)
+ }
+ }
+
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+
+ if (!match)
+ dev_warn(priv->dev, "No gpio line matched irq %d\n", irq);
+@@ -247,13 +247,13 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq,
+ dev_dbg(priv->dev, "Mapping irq %d for gpio line %d\n",
+ irq, offset);
+
+- spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
+
+ /* Request underlying irq if not already requested */
+ lirq->irq = irq;
+ uirq = &priv->uirqs[lirq->index];
+ if (uirq->refcnt == 0) {
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+ ret = request_irq(uirq->uirq, grgpio_irq_handler, 0,
+ dev_name(priv->dev), priv);
+ if (ret) {
+@@ -262,11 +262,11 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq,
+ uirq->uirq);
+ return ret;
+ }
+- spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
+ }
+ uirq->refcnt++;
+
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+
+ /* Setup irq */
+ irq_set_chip_data(irq, priv);
+@@ -290,7 +290,7 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq)
+ irq_set_chip_and_handler(irq, NULL, NULL);
+ irq_set_chip_data(irq, NULL);
+
+- spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
+
+ /* Free underlying irq if last user unmapped */
+ index = -1;
+@@ -309,13 +309,13 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq)
+ uirq = &priv->uirqs[lirq->index];
+ uirq->refcnt--;
+ if (uirq->refcnt == 0) {
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+ free_irq(uirq->uirq, priv);
+ return;
+ }
+ }
+
+- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
+ }
+
+ static const struct irq_domain_ops grgpio_irq_domain_ops = {
+diff --git a/drivers/gpio/gpio-hlwd.c b/drivers/gpio/gpio-hlwd.c
+index 641719a96a1a9..4e13e937f8324 100644
+--- a/drivers/gpio/gpio-hlwd.c
++++ b/drivers/gpio/gpio-hlwd.c
+@@ -65,7 +65,7 @@ static void hlwd_gpio_irqhandler(struct irq_desc *desc)
+ int hwirq;
+ u32 emulated_pending;
+
+- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
+ pending = ioread32be(hlwd->regs + HW_GPIOB_INTFLAG);
+ pending &= ioread32be(hlwd->regs + HW_GPIOB_INTMASK);
+
+@@ -93,7 +93,7 @@ static void hlwd_gpio_irqhandler(struct irq_desc *desc)
+ /* Mark emulated interrupts as pending */
+ pending |= rising | falling;
+ }
+- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
+
+ chained_irq_enter(chip, desc);
+
+@@ -118,11 +118,11 @@ static void hlwd_gpio_irq_mask(struct irq_data *data)
+ unsigned long flags;
+ u32 mask;
+
+- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
+ mask = ioread32be(hlwd->regs + HW_GPIOB_INTMASK);
+ mask &= ~BIT(data->hwirq);
+ iowrite32be(mask, hlwd->regs + HW_GPIOB_INTMASK);
+- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
+ }
+
+ static void hlwd_gpio_irq_unmask(struct irq_data *data)
+@@ -132,11 +132,11 @@ static void hlwd_gpio_irq_unmask(struct irq_data *data)
+ unsigned long flags;
+ u32 mask;
+
+- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
+ mask = ioread32be(hlwd->regs + HW_GPIOB_INTMASK);
+ mask |= BIT(data->hwirq);
+ iowrite32be(mask, hlwd->regs + HW_GPIOB_INTMASK);
+- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
+ }
+
+ static void hlwd_gpio_irq_enable(struct irq_data *data)
+@@ -173,7 +173,7 @@ static int hlwd_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
+ unsigned long flags;
+ u32 level;
+
+- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags);
+
+ hlwd->edge_emulation &= ~BIT(data->hwirq);
+
+@@ -194,11 +194,11 @@ static int hlwd_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
+ hlwd_gpio_irq_setup_emulation(hlwd, data->hwirq, flow_type);
+ break;
+ default:
+- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
+ return -EINVAL;
+ }
+
+- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags);
+ return 0;
+ }
+
+diff --git a/drivers/gpio/gpio-idt3243x.c b/drivers/gpio/gpio-idt3243x.c
+index 50003ad2e5898..1cafdf46f8756 100644
+--- a/drivers/gpio/gpio-idt3243x.c
++++ b/drivers/gpio/gpio-idt3243x.c
+@@ -57,7 +57,7 @@ static int idt_gpio_irq_set_type(struct irq_data *d, unsigned int flow_type)
+ if (sense == IRQ_TYPE_NONE || (sense & IRQ_TYPE_EDGE_BOTH))
+ return -EINVAL;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ ilevel = readl(ctrl->gpio + IDT_GPIO_ILEVEL);
+ if (sense & IRQ_TYPE_LEVEL_HIGH)
+@@ -68,7 +68,7 @@ static int idt_gpio_irq_set_type(struct irq_data *d, unsigned int flow_type)
+ writel(ilevel, ctrl->gpio + IDT_GPIO_ILEVEL);
+ irq_set_handler_locked(d, handle_level_irq);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ return 0;
+ }
+
+@@ -86,12 +86,12 @@ static void idt_gpio_mask(struct irq_data *d)
+ struct idt_gpio_ctrl *ctrl = gpiochip_get_data(gc);
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ ctrl->mask_cache |= BIT(d->hwirq);
+ writel(ctrl->mask_cache, ctrl->pic + IDT_PIC_IRQ_MASK);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void idt_gpio_unmask(struct irq_data *d)
+@@ -100,12 +100,12 @@ static void idt_gpio_unmask(struct irq_data *d)
+ struct idt_gpio_ctrl *ctrl = gpiochip_get_data(gc);
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ ctrl->mask_cache &= ~BIT(d->hwirq);
+ writel(ctrl->mask_cache, ctrl->pic + IDT_PIC_IRQ_MASK);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static int idt_gpio_irq_init_hw(struct gpio_chip *gc)
+@@ -132,7 +132,7 @@ static int idt_gpio_probe(struct platform_device *pdev)
+ struct device *dev = &pdev->dev;
+ struct gpio_irq_chip *girq;
+ struct idt_gpio_ctrl *ctrl;
+- unsigned int parent_irq;
++ int parent_irq;
+ int ngpios;
+ int ret;
+
+@@ -164,8 +164,8 @@ static int idt_gpio_probe(struct platform_device *pdev)
+ return PTR_ERR(ctrl->pic);
+
+ parent_irq = platform_get_irq(pdev, 0);
+- if (!parent_irq)
+- return -EINVAL;
++ if (parent_irq < 0)
++ return parent_irq;
+
+ girq = &ctrl->gc.irq;
+ girq->chip = &idt_gpio_irqchip;
+diff --git a/drivers/gpio/gpio-ixp4xx.c b/drivers/gpio/gpio-ixp4xx.c
+index b3b050604e0be..6b184502fa3f8 100644
+--- a/drivers/gpio/gpio-ixp4xx.c
++++ b/drivers/gpio/gpio-ixp4xx.c
+@@ -128,7 +128,7 @@ static int ixp4xx_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+ int_reg = IXP4XX_REG_GPIT1;
+ }
+
+- spin_lock_irqsave(&g->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&g->gc.bgpio_lock, flags);
+
+ /* Clear the style for the appropriate pin */
+ val = __raw_readl(g->base + int_reg);
+@@ -147,7 +147,7 @@ static int ixp4xx_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+ val |= BIT(d->hwirq);
+ __raw_writel(val, g->base + IXP4XX_REG_GPOE);
+
+- spin_unlock_irqrestore(&g->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&g->gc.bgpio_lock, flags);
+
+ /* This parent only accept level high (asserted) */
+ return irq_chip_set_type_parent(d, IRQ_TYPE_LEVEL_HIGH);
+diff --git a/drivers/gpio/gpio-loongson1.c b/drivers/gpio/gpio-loongson1.c
+index 1b1ee94eeab47..5d90b3bc5a256 100644
+--- a/drivers/gpio/gpio-loongson1.c
++++ b/drivers/gpio/gpio-loongson1.c
+@@ -25,10 +25,10 @@ static int ls1x_gpio_request(struct gpio_chip *gc, unsigned int offset)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ __raw_writel(__raw_readl(gpio_reg_base + GPIO_CFG) | BIT(offset),
+ gpio_reg_base + GPIO_CFG);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ return 0;
+ }
+@@ -37,10 +37,10 @@ static void ls1x_gpio_free(struct gpio_chip *gc, unsigned int offset)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ __raw_writel(__raw_readl(gpio_reg_base + GPIO_CFG) & ~BIT(offset),
+ gpio_reg_base + GPIO_CFG);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static int ls1x_gpio_probe(struct platform_device *pdev)
+diff --git a/drivers/gpio/gpio-menz127.c b/drivers/gpio/gpio-menz127.c
+index 1e21c661d79d6..a035a9bcb57c6 100644
+--- a/drivers/gpio/gpio-menz127.c
++++ b/drivers/gpio/gpio-menz127.c
+@@ -64,7 +64,7 @@ static int men_z127_debounce(struct gpio_chip *gc, unsigned gpio,
+ debounce /= 50;
+ }
+
+- spin_lock(&gc->bgpio_lock);
++ raw_spin_lock(&gc->bgpio_lock);
+
+ db_en = readl(priv->reg_base + MEN_Z127_DBER);
+
+@@ -79,7 +79,7 @@ static int men_z127_debounce(struct gpio_chip *gc, unsigned gpio,
+ writel(db_en, priv->reg_base + MEN_Z127_DBER);
+ writel(db_cnt, priv->reg_base + GPIO_TO_DBCNT_REG(gpio));
+
+- spin_unlock(&gc->bgpio_lock);
++ raw_spin_unlock(&gc->bgpio_lock);
+
+ return 0;
+ }
+@@ -91,7 +91,7 @@ static int men_z127_set_single_ended(struct gpio_chip *gc,
+ struct men_z127_gpio *priv = gpiochip_get_data(gc);
+ u32 od_en;
+
+- spin_lock(&gc->bgpio_lock);
++ raw_spin_lock(&gc->bgpio_lock);
+ od_en = readl(priv->reg_base + MEN_Z127_ODER);
+
+ if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN)
+@@ -101,7 +101,7 @@ static int men_z127_set_single_ended(struct gpio_chip *gc,
+ od_en &= ~BIT(offset);
+
+ writel(od_en, priv->reg_base + MEN_Z127_ODER);
+- spin_unlock(&gc->bgpio_lock);
++ raw_spin_unlock(&gc->bgpio_lock);
+
+ return 0;
+ }
+diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c
+index 40a052bc67849..5a09070e5f78c 100644
+--- a/drivers/gpio/gpio-mlxbf2.c
++++ b/drivers/gpio/gpio-mlxbf2.c
+@@ -120,7 +120,7 @@ static int mlxbf2_gpio_lock_acquire(struct mlxbf2_gpio_context *gs)
+ u32 arm_gpio_lock_val;
+
+ mutex_lock(yu_arm_gpio_lock_param.lock);
+- spin_lock(&gs->gc.bgpio_lock);
++ raw_spin_lock(&gs->gc.bgpio_lock);
+
+ arm_gpio_lock_val = readl(yu_arm_gpio_lock_param.io);
+
+@@ -128,7 +128,7 @@ static int mlxbf2_gpio_lock_acquire(struct mlxbf2_gpio_context *gs)
+ * When lock active bit[31] is set, ModeX is write enabled
+ */
+ if (YU_LOCK_ACTIVE_BIT(arm_gpio_lock_val)) {
+- spin_unlock(&gs->gc.bgpio_lock);
++ raw_spin_unlock(&gs->gc.bgpio_lock);
+ mutex_unlock(yu_arm_gpio_lock_param.lock);
+ return -EINVAL;
+ }
+@@ -146,7 +146,7 @@ static void mlxbf2_gpio_lock_release(struct mlxbf2_gpio_context *gs)
+ __releases(yu_arm_gpio_lock_param.lock)
+ {
+ writel(YU_ARM_GPIO_LOCK_RELEASE, yu_arm_gpio_lock_param.io);
+- spin_unlock(&gs->gc.bgpio_lock);
++ raw_spin_unlock(&gs->gc.bgpio_lock);
+ mutex_unlock(yu_arm_gpio_lock_param.lock);
+ }
+
+diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
+index c335a0309ba31..d9dff3dc92ae5 100644
+--- a/drivers/gpio/gpio-mmio.c
++++ b/drivers/gpio/gpio-mmio.c
+@@ -220,7 +220,7 @@ static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+ unsigned long mask = bgpio_line2mask(gc, gpio);
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ if (val)
+ gc->bgpio_data |= mask;
+@@ -229,7 +229,7 @@ static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+
+ gc->write_reg(gc->reg_dat, gc->bgpio_data);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void bgpio_set_with_clear(struct gpio_chip *gc, unsigned int gpio,
+@@ -248,7 +248,7 @@ static void bgpio_set_set(struct gpio_chip *gc, unsigned int gpio, int val)
+ unsigned long mask = bgpio_line2mask(gc, gpio);
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ if (val)
+ gc->bgpio_data |= mask;
+@@ -257,7 +257,7 @@ static void bgpio_set_set(struct gpio_chip *gc, unsigned int gpio, int val)
+
+ gc->write_reg(gc->reg_set, gc->bgpio_data);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void bgpio_multiple_get_masks(struct gpio_chip *gc,
+@@ -286,7 +286,7 @@ static void bgpio_set_multiple_single_reg(struct gpio_chip *gc,
+ unsigned long flags;
+ unsigned long set_mask, clear_mask;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ bgpio_multiple_get_masks(gc, mask, bits, &set_mask, &clear_mask);
+
+@@ -295,7 +295,7 @@ static void bgpio_set_multiple_single_reg(struct gpio_chip *gc,
+
+ gc->write_reg(reg, gc->bgpio_data);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void bgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
+@@ -347,7 +347,7 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
+
+@@ -356,7 +356,7 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+ if (gc->reg_dir_out)
+ gc->write_reg(gc->reg_dir_out, gc->bgpio_dir);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ return 0;
+ }
+@@ -387,7 +387,7 @@ static void bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
+
+@@ -396,7 +396,7 @@ static void bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+ if (gc->reg_dir_out)
+ gc->write_reg(gc->reg_dir_out, gc->bgpio_dir);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static int bgpio_dir_out_dir_first(struct gpio_chip *gc, unsigned int gpio,
+@@ -610,7 +610,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
+ if (gc->bgpio_bits > BITS_PER_LONG)
+ return -EINVAL;
+
+- spin_lock_init(&gc->bgpio_lock);
++ raw_spin_lock_init(&gc->bgpio_lock);
+ gc->parent = dev;
+ gc->label = dev_name(dev);
+ gc->base = -1;
+diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
+index d26bff29157b5..5c84dd7880a47 100644
+--- a/drivers/gpio/gpio-mockup.c
++++ b/drivers/gpio/gpio-mockup.c
+@@ -368,11 +368,18 @@ static void gpio_mockup_debugfs_setup(struct device *dev,
+ priv->offset = i;
+ priv->desc = gpiochip_get_desc(gc, i);
+
+- debugfs_create_file(name, 0200, chip->dbg_dir, priv,
++ debugfs_create_file(name, 0600, chip->dbg_dir, priv,
+ &gpio_mockup_debugfs_ops);
+ }
+ }
+
++static void gpio_mockup_debugfs_cleanup(void *data)
++{
++ struct gpio_mockup_chip *chip = data;
++
++ debugfs_remove_recursive(chip->dbg_dir);
++}
++
+ static void gpio_mockup_dispose_mappings(void *data)
+ {
+ struct gpio_mockup_chip *chip = data;
+@@ -455,7 +462,7 @@ static int gpio_mockup_probe(struct platform_device *pdev)
+
+ gpio_mockup_debugfs_setup(dev, chip);
+
+- return 0;
++ return devm_add_action_or_reset(dev, gpio_mockup_debugfs_cleanup, chip);
+ }
+
+ static const struct of_device_id gpio_mockup_of_match[] = {
+@@ -547,8 +554,10 @@ static int __init gpio_mockup_register_chip(int idx)
+ }
+
+ fwnode = fwnode_create_software_node(properties, NULL);
+- if (IS_ERR(fwnode))
++ if (IS_ERR(fwnode)) {
++ kfree_strarray(line_names, ngpio);
+ return PTR_ERR(fwnode);
++ }
+
+ pdevinfo.name = "gpio-mockup";
+ pdevinfo.id = idx;
+@@ -611,9 +620,9 @@ static int __init gpio_mockup_init(void)
+
+ static void __exit gpio_mockup_exit(void)
+ {
++ gpio_mockup_unregister_pdevs();
+ debugfs_remove_recursive(gpio_mockup_dbg_dir);
+ platform_driver_unregister(&gpio_mockup_driver);
+- gpio_mockup_unregister_pdevs();
+ }
+
+ module_init(gpio_mockup_init);
+diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
+index 70d6ae20b1da5..763256efddc2b 100644
+--- a/drivers/gpio/gpio-mpc8xxx.c
++++ b/drivers/gpio/gpio-mpc8xxx.c
+@@ -47,7 +47,7 @@ struct mpc8xxx_gpio_chip {
+ unsigned offset, int value);
+
+ struct irq_domain *irq;
+- unsigned int irqn;
++ int irqn;
+ };
+
+ /*
+@@ -172,6 +172,7 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
+
+ switch (flow_type) {
+ case IRQ_TYPE_EDGE_FALLING:
++ case IRQ_TYPE_LEVEL_LOW:
+ raw_spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
+ gc->write_reg(mpc8xxx_gc->regs + GPIO_ICR,
+ gc->read_reg(mpc8xxx_gc->regs + GPIO_ICR)
+@@ -388,8 +389,8 @@ static int mpc8xxx_probe(struct platform_device *pdev)
+ }
+
+ mpc8xxx_gc->irqn = platform_get_irq(pdev, 0);
+- if (!mpc8xxx_gc->irqn)
+- return 0;
++ if (mpc8xxx_gc->irqn < 0)
++ return mpc8xxx_gc->irqn;
+
+ mpc8xxx_gc->irq = irq_domain_create_linear(fwnode,
+ MPC8XXX_GPIO_PINS,
+diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
+index 8f429d9f36616..b965513f44fea 100644
+--- a/drivers/gpio/gpio-mvebu.c
++++ b/drivers/gpio/gpio-mvebu.c
+@@ -707,6 +707,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ unsigned long flags;
+ unsigned int on, off;
+
++ if (state->polarity != PWM_POLARITY_NORMAL)
++ return -EINVAL;
++
+ val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle;
+ do_div(val, NSEC_PER_SEC);
+ if (val > UINT_MAX + 1ULL)
+@@ -790,8 +793,12 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
+ u32 offset;
+ u32 set;
+
+- if (of_device_is_compatible(mvchip->chip.of_node,
+- "marvell,armada-370-gpio")) {
++ if (mvchip->soc_variant == MVEBU_GPIO_SOC_VARIANT_A8K) {
++ int ret = of_property_read_u32(dev->of_node,
++ "marvell,pwm-offset", &offset);
++ if (ret < 0)
++ return 0;
++ } else {
+ /*
+ * There are only two sets of PWM configuration registers for
+ * all the GPIO lines on those SoCs which this driver reserves
+@@ -801,13 +808,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
+ if (!platform_get_resource_byname(pdev, IORESOURCE_MEM, "pwm"))
+ return 0;
+ offset = 0;
+- } else if (mvchip->soc_variant == MVEBU_GPIO_SOC_VARIANT_A8K) {
+- int ret = of_property_read_u32(dev->of_node,
+- "marvell,pwm-offset", &offset);
+- if (ret < 0)
+- return 0;
+- } else {
+- return 0;
+ }
+
+ if (IS_ERR(mvchip->clk))
+@@ -871,17 +871,10 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
+ mvpwm->chip.dev = dev;
+ mvpwm->chip.ops = &mvebu_pwm_ops;
+ mvpwm->chip.npwm = mvchip->chip.ngpio;
+- /*
+- * There may already be some PWM allocated, so we can't force
+- * mvpwm->chip.base to a fixed point like mvchip->chip.base.
+- * So, we let pwmchip_add() do the numbering and take the next free
+- * region.
+- */
+- mvpwm->chip.base = -1;
+
+ spin_lock_init(&mvpwm->lock);
+
+- return pwmchip_add(&mvpwm->chip);
++ return devm_pwmchip_add(dev, &mvpwm->chip);
+ }
+
+ #ifdef CONFIG_DEBUG_FS
+@@ -1119,6 +1112,13 @@ static int mvebu_gpio_probe_syscon(struct platform_device *pdev,
+ return 0;
+ }
+
++static void mvebu_gpio_remove_irq_domain(void *data)
++{
++ struct irq_domain *domain = data;
++
++ irq_domain_remove(domain);
++}
++
+ static int mvebu_gpio_probe(struct platform_device *pdev)
+ {
+ struct mvebu_gpio_chip *mvchip;
+@@ -1251,17 +1251,21 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
+ if (!mvchip->domain) {
+ dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
+ mvchip->chip.label);
+- err = -ENODEV;
+- goto err_pwm;
++ return -ENODEV;
+ }
+
++ err = devm_add_action_or_reset(&pdev->dev, mvebu_gpio_remove_irq_domain,
++ mvchip->domain);
++ if (err)
++ return err;
++
+ err = irq_alloc_domain_generic_chips(
+ mvchip->domain, ngpios, 2, np->name, handle_level_irq,
+ IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_LEVEL, 0, 0);
+ if (err) {
+ dev_err(&pdev->dev, "couldn't allocate irq chips %s (DT).\n",
+ mvchip->chip.label);
+- goto err_domain;
++ return err;
+ }
+
+ /*
+@@ -1301,13 +1305,6 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
+ }
+
+ return 0;
+-
+-err_domain:
+- irq_domain_remove(mvchip->domain);
+-err_pwm:
+- pwmchip_remove(&mvchip->mvpwm->chip);
+-
+- return err;
+ }
+
+ static struct platform_driver mvebu_gpio_driver = {
+diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
+index c871602fc5ba9..853d9aa6b3b1f 100644
+--- a/drivers/gpio/gpio-mxc.c
++++ b/drivers/gpio/gpio-mxc.c
+@@ -18,6 +18,7 @@
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/slab.h>
++#include <linux/spinlock.h>
+ #include <linux/syscore_ops.h>
+ #include <linux/gpio/driver.h>
+ #include <linux/of.h>
+@@ -147,6 +148,7 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type)
+ {
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ struct mxc_gpio_port *port = gc->private;
++ unsigned long flags;
+ u32 bit, val;
+ u32 gpio_idx = d->hwirq;
+ int edge;
+@@ -185,6 +187,8 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type)
+ return -EINVAL;
+ }
+
++ raw_spin_lock_irqsave(&port->gc.bgpio_lock, flags);
++
+ if (GPIO_EDGE_SEL >= 0) {
+ val = readl(port->base + GPIO_EDGE_SEL);
+ if (edge == GPIO_INT_BOTH_EDGES)
+@@ -204,15 +208,20 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type)
+
+ writel(1 << gpio_idx, port->base + GPIO_ISR);
+
+- return 0;
++ raw_spin_unlock_irqrestore(&port->gc.bgpio_lock, flags);
++
++ return port->gc.direction_input(&port->gc, gpio_idx);
+ }
+
+ static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio)
+ {
+ void __iomem *reg = port->base;
++ unsigned long flags;
+ u32 bit, val;
+ int edge;
+
++ raw_spin_lock_irqsave(&port->gc.bgpio_lock, flags);
++
+ reg += GPIO_ICR1 + ((gpio & 0x10) >> 2); /* lower or upper register */
+ bit = gpio & 0xf;
+ val = readl(reg);
+@@ -227,9 +236,12 @@ static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio)
+ } else {
+ pr_err("mxc: invalid configuration for GPIO %d: %x\n",
+ gpio, edge);
+- return;
++ goto unlock;
+ }
+ writel(val | (edge << (bit << 1)), reg);
++
++unlock:
++ raw_spin_unlock_irqrestore(&port->gc.bgpio_lock, flags);
+ }
+
+ /* handle 32 interrupts in one status register */
+diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
+index d2fe76f3f34fd..4860bf3b7e002 100644
+--- a/drivers/gpio/gpio-pca953x.c
++++ b/drivers/gpio/gpio-pca953x.c
+@@ -351,6 +351,9 @@ static const struct regmap_config pca953x_i2c_regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+
++ .use_single_read = true,
++ .use_single_write = true,
++
+ .readable_reg = pca953x_readable_register,
+ .writeable_reg = pca953x_writeable_register,
+ .volatile_reg = pca953x_volatile_register,
+@@ -762,11 +765,11 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
+ bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio);
+ bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio);
+
++ bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
++
+ if (bitmap_empty(trigger, gc->ngpio))
+ return false;
+
+- bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
+-
+ bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
+ bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
+ bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
+@@ -894,15 +897,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
+ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
+ {
+ DECLARE_BITMAP(val, MAX_LINE);
++ u8 regaddr;
+ int ret;
+
+- ret = regcache_sync_region(chip->regmap, chip->regs->output,
+- chip->regs->output + NBANK(chip));
++ regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
++ ret = regcache_sync_region(chip->regmap, regaddr,
++ regaddr + NBANK(chip) - 1);
+ if (ret)
+ goto out;
+
+- ret = regcache_sync_region(chip->regmap, chip->regs->direction,
+- chip->regs->direction + NBANK(chip));
++ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
++ ret = regcache_sync_region(chip->regmap, regaddr,
++ regaddr + NBANK(chip) - 1);
+ if (ret)
+ goto out;
+
+@@ -1108,20 +1114,21 @@ static int pca953x_regcache_sync(struct device *dev)
+ {
+ struct pca953x_chip *chip = dev_get_drvdata(dev);
+ int ret;
++ u8 regaddr;
+
+ /*
+ * The ordering between direction and output is important,
+ * sync these registers first and only then sync the rest.
+ */
+- ret = regcache_sync_region(chip->regmap, chip->regs->direction,
+- chip->regs->direction + NBANK(chip));
++ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
++ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
+ if (ret) {
+ dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret);
+ return ret;
+ }
+
+- ret = regcache_sync_region(chip->regmap, chip->regs->output,
+- chip->regs->output + NBANK(chip));
++ regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
++ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
+ if (ret) {
+ dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret);
+ return ret;
+@@ -1129,16 +1136,18 @@ static int pca953x_regcache_sync(struct device *dev)
+
+ #ifdef CONFIG_GPIO_PCA953X_IRQ
+ if (chip->driver_data & PCA_PCAL) {
+- ret = regcache_sync_region(chip->regmap, PCAL953X_IN_LATCH,
+- PCAL953X_IN_LATCH + NBANK(chip));
++ regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0);
++ ret = regcache_sync_region(chip->regmap, regaddr,
++ regaddr + NBANK(chip) - 1);
+ if (ret) {
+ dev_err(dev, "Failed to sync INT latch registers: %d\n",
+ ret);
+ return ret;
+ }
+
+- ret = regcache_sync_region(chip->regmap, PCAL953X_INT_MASK,
+- PCAL953X_INT_MASK + NBANK(chip));
++ regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0);
++ ret = regcache_sync_region(chip->regmap, regaddr,
++ regaddr + NBANK(chip) - 1);
+ if (ret) {
+ dev_err(dev, "Failed to sync INT mask registers: %d\n",
+ ret);
+@@ -1154,7 +1163,9 @@ static int pca953x_suspend(struct device *dev)
+ {
+ struct pca953x_chip *chip = dev_get_drvdata(dev);
+
++ mutex_lock(&chip->i2c_lock);
+ regcache_cache_only(chip->regmap, true);
++ mutex_unlock(&chip->i2c_lock);
+
+ if (atomic_read(&chip->wakeup_path))
+ device_set_wakeup_path(dev);
+@@ -1177,13 +1188,17 @@ static int pca953x_resume(struct device *dev)
+ }
+ }
+
++ mutex_lock(&chip->i2c_lock);
+ regcache_cache_only(chip->regmap, false);
+ regcache_mark_dirty(chip->regmap);
+ ret = pca953x_regcache_sync(dev);
+- if (ret)
++ if (ret) {
++ mutex_unlock(&chip->i2c_lock);
+ return ret;
++ }
+
+ ret = regcache_sync(chip->regmap);
++ mutex_unlock(&chip->i2c_lock);
+ if (ret) {
+ dev_err(dev, "Failed to restore register map: %d\n", ret);
+ return ret;
+diff --git a/drivers/gpio/gpio-realtek-otto.c b/drivers/gpio/gpio-realtek-otto.c
+index eeeb39bc171dc..bd75401b549d1 100644
+--- a/drivers/gpio/gpio-realtek-otto.c
++++ b/drivers/gpio/gpio-realtek-otto.c
+@@ -205,7 +205,7 @@ static void realtek_gpio_irq_handler(struct irq_desc *desc)
+ status = realtek_gpio_read_isr(ctrl, lines_done / 8);
+ port_pin_count = min(gc->ngpio - lines_done, 8U);
+ for_each_set_bit(offset, &status, port_pin_count)
+- generic_handle_domain_irq(gc->irq.domain, offset);
++ generic_handle_domain_irq(gc->irq.domain, offset + lines_done);
+ }
+
+ chained_irq_exit(irq_chip, desc);
+diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
+index ce63cbd14d69a..a197f698efebb 100644
+--- a/drivers/gpio/gpio-rockchip.c
++++ b/drivers/gpio/gpio-rockchip.c
+@@ -19,6 +19,8 @@
+ #include <linux/of_address.h>
+ #include <linux/of_device.h>
+ #include <linux/of_irq.h>
++#include <linux/pinctrl/consumer.h>
++#include <linux/pinctrl/pinconf-generic.h>
+ #include <linux/regmap.h>
+
+ #include "../pinctrl/core.h"
+@@ -154,6 +156,12 @@ static int rockchip_gpio_set_direction(struct gpio_chip *chip,
+ unsigned long flags;
+ u32 data = input ? 0 : 1;
+
++
++ if (input)
++ pinctrl_gpio_direction_input(bank->pin_base + offset);
++ else
++ pinctrl_gpio_direction_output(bank->pin_base + offset);
++
+ raw_spin_lock_irqsave(&bank->slock, flags);
+ rockchip_gpio_writel_bit(bank, offset, data, bank->gpio_regs->port_ddr);
+ raw_spin_unlock_irqrestore(&bank->slock, flags);
+@@ -410,20 +418,18 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
+ level = rockchip_gpio_readl(bank, bank->gpio_regs->int_type);
+ polarity = rockchip_gpio_readl(bank, bank->gpio_regs->int_polarity);
+
+- switch (type) {
+- case IRQ_TYPE_EDGE_BOTH:
++ if (type == IRQ_TYPE_EDGE_BOTH) {
+ if (bank->gpio_type == GPIO_TYPE_V2) {
+- bank->toggle_edge_mode &= ~mask;
+ rockchip_gpio_writel_bit(bank, d->hwirq, 1,
+ bank->gpio_regs->int_bothedge);
+ goto out;
+ } else {
+ bank->toggle_edge_mode |= mask;
+- level |= mask;
++ level &= ~mask;
+
+ /*
+ * Determine gpio state. If 1 next interrupt should be
+- * falling otherwise rising.
++ * low otherwise high.
+ */
+ data = readl(bank->reg_base + bank->gpio_regs->ext_port);
+ if (data & mask)
+@@ -431,30 +437,34 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
+ else
+ polarity |= mask;
+ }
+- break;
+- case IRQ_TYPE_EDGE_RISING:
+- bank->toggle_edge_mode &= ~mask;
+- level |= mask;
+- polarity |= mask;
+- break;
+- case IRQ_TYPE_EDGE_FALLING:
+- bank->toggle_edge_mode &= ~mask;
+- level |= mask;
+- polarity &= ~mask;
+- break;
+- case IRQ_TYPE_LEVEL_HIGH:
+- bank->toggle_edge_mode &= ~mask;
+- level &= ~mask;
+- polarity |= mask;
+- break;
+- case IRQ_TYPE_LEVEL_LOW:
+- bank->toggle_edge_mode &= ~mask;
+- level &= ~mask;
+- polarity &= ~mask;
+- break;
+- default:
+- ret = -EINVAL;
+- goto out;
++ } else {
++ if (bank->gpio_type == GPIO_TYPE_V2) {
++ rockchip_gpio_writel_bit(bank, d->hwirq, 0,
++ bank->gpio_regs->int_bothedge);
++ } else {
++ bank->toggle_edge_mode &= ~mask;
++ }
++ switch (type) {
++ case IRQ_TYPE_EDGE_RISING:
++ level |= mask;
++ polarity |= mask;
++ break;
++ case IRQ_TYPE_EDGE_FALLING:
++ level |= mask;
++ polarity &= ~mask;
++ break;
++ case IRQ_TYPE_LEVEL_HIGH:
++ level &= ~mask;
++ polarity |= mask;
++ break;
++ case IRQ_TYPE_LEVEL_LOW:
++ level &= ~mask;
++ polarity &= ~mask;
++ break;
++ default:
++ ret = -EINVAL;
++ goto out;
++ }
+ }
+
+ rockchip_gpio_writel(bank, level, bank->gpio_regs->int_type);
+@@ -595,6 +605,7 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank)
+ return -ENODATA;
+
+ pctldev = of_pinctrl_get(pctlnp);
++ of_node_put(pctlnp);
+ if (!pctldev)
+ return -ENODEV;
+
+@@ -689,7 +700,7 @@ static int rockchip_gpio_probe(struct platform_device *pdev)
+ struct device_node *pctlnp = of_get_parent(np);
+ struct pinctrl_dev *pctldev = NULL;
+ struct rockchip_pin_bank *bank = NULL;
+- struct rockchip_pin_output_deferred *cfg;
++ struct rockchip_pin_deferred *cfg;
+ static int gpio;
+ int id, ret;
+
+@@ -730,15 +741,22 @@ static int rockchip_gpio_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- while (!list_empty(&bank->deferred_output)) {
+- cfg = list_first_entry(&bank->deferred_output,
+- struct rockchip_pin_output_deferred, head);
++ while (!list_empty(&bank->deferred_pins)) {
++ cfg = list_first_entry(&bank->deferred_pins,
++ struct rockchip_pin_deferred, head);
+ list_del(&cfg->head);
+
+- ret = rockchip_gpio_direction_output(&bank->gpio_chip, cfg->pin, cfg->arg);
+- if (ret)
+- dev_warn(dev, "setting output pin %u to %u failed\n", cfg->pin, cfg->arg);
+-
++ switch (cfg->param) {
++ case PIN_CONFIG_OUTPUT:
++ ret = rockchip_gpio_direction_output(&bank->gpio_chip, cfg->pin, cfg->arg);
++ if (ret)
++ dev_warn(dev, "setting output pin %u to %u failed\n", cfg->pin,
++ cfg->arg);
++ break;
++ default:
++ dev_warn(dev, "unknown deferred config param %d\n", cfg->param);
++ break;
++ }
+ kfree(cfg);
+ }
+
+diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
+index 403f9e833d6a3..5ffab0fc1b765 100644
+--- a/drivers/gpio/gpio-sifive.c
++++ b/drivers/gpio/gpio-sifive.c
+@@ -44,7 +44,7 @@ static void sifive_gpio_set_ie(struct sifive_gpio *chip, unsigned int offset)
+ unsigned long flags;
+ unsigned int trigger;
+
+- spin_lock_irqsave(&chip->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&chip->gc.bgpio_lock, flags);
+ trigger = (chip->irq_state & BIT(offset)) ? chip->trigger[offset] : 0;
+ regmap_update_bits(chip->regs, SIFIVE_GPIO_RISE_IE, BIT(offset),
+ (trigger & IRQ_TYPE_EDGE_RISING) ? BIT(offset) : 0);
+@@ -54,7 +54,7 @@ static void sifive_gpio_set_ie(struct sifive_gpio *chip, unsigned int offset)
+ (trigger & IRQ_TYPE_LEVEL_HIGH) ? BIT(offset) : 0);
+ regmap_update_bits(chip->regs, SIFIVE_GPIO_LOW_IE, BIT(offset),
+ (trigger & IRQ_TYPE_LEVEL_LOW) ? BIT(offset) : 0);
+- spin_unlock_irqrestore(&chip->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&chip->gc.bgpio_lock, flags);
+ }
+
+ static int sifive_gpio_irq_set_type(struct irq_data *d, unsigned int trigger)
+@@ -84,13 +84,13 @@ static void sifive_gpio_irq_enable(struct irq_data *d)
+ /* Switch to input */
+ gc->direction_input(gc, offset);
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ /* Clear any sticky pending interrupts */
+ regmap_write(chip->regs, SIFIVE_GPIO_RISE_IP, bit);
+ regmap_write(chip->regs, SIFIVE_GPIO_FALL_IP, bit);
+ regmap_write(chip->regs, SIFIVE_GPIO_HIGH_IP, bit);
+ regmap_write(chip->regs, SIFIVE_GPIO_LOW_IP, bit);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ /* Enable interrupts */
+ assign_bit(offset, &chip->irq_state, 1);
+@@ -116,13 +116,13 @@ static void sifive_gpio_irq_eoi(struct irq_data *d)
+ u32 bit = BIT(offset);
+ unsigned long flags;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+ /* Clear all pending interrupts */
+ regmap_write(chip->regs, SIFIVE_GPIO_RISE_IP, bit);
+ regmap_write(chip->regs, SIFIVE_GPIO_FALL_IP, bit);
+ regmap_write(chip->regs, SIFIVE_GPIO_HIGH_IP, bit);
+ regmap_write(chip->regs, SIFIVE_GPIO_LOW_IP, bit);
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+
+ irq_chip_eoi_parent(d);
+ }
+@@ -209,13 +209,18 @@ static int sifive_gpio_probe(struct platform_device *pdev)
+ return -ENODEV;
+ }
+ parent = irq_find_host(irq_parent);
++ of_node_put(irq_parent);
+ if (!parent) {
+ dev_err(dev, "no IRQ parent domain\n");
+ return -ENODEV;
+ }
+
+- for (i = 0; i < ngpio; i++)
+- chip->irq_number[i] = platform_get_irq(pdev, i);
++ for (i = 0; i < ngpio; i++) {
++ ret = platform_get_irq(pdev, i);
++ if (ret < 0)
++ return ret;
++ chip->irq_number[i] = ret;
++ }
+
+ ret = bgpio_init(&chip->gc, dev, 4,
+ chip->base + SIFIVE_GPIO_INPUT_VAL,
+@@ -223,7 +228,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
+ NULL,
+ chip->base + SIFIVE_GPIO_OUTPUT_EN,
+ chip->base + SIFIVE_GPIO_INPUT_EN,
+- 0);
++ BGPIOF_READ_OUTPUT_REG_SET);
+ if (ret) {
+ dev_err(dev, "unable to init generic GPIO\n");
+ return ret;
+diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c
+index 718a508d3b2f8..de6afa3f97168 100644
+--- a/drivers/gpio/gpio-tb10x.c
++++ b/drivers/gpio/gpio-tb10x.c
+@@ -62,14 +62,14 @@ static inline void tb10x_set_bits(struct tb10x_gpio *gpio, unsigned int offs,
+ u32 r;
+ unsigned long flags;
+
+- spin_lock_irqsave(&gpio->gc.bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gpio->gc.bgpio_lock, flags);
+
+ r = tb10x_reg_read(gpio, offs);
+ r = (r & ~mask) | (val & mask);
+
+ tb10x_reg_write(gpio, offs, r);
+
+- spin_unlock_irqrestore(&gpio->gc.bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gpio->gc.bgpio_lock, flags);
+ }
+
+ static int tb10x_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
+index c99858f40a27e..00762de3d4096 100644
+--- a/drivers/gpio/gpio-tegra186.c
++++ b/drivers/gpio/gpio-tegra186.c
+@@ -337,9 +337,12 @@ static int tegra186_gpio_of_xlate(struct gpio_chip *chip,
+ return offset + pin;
+ }
+
++#define to_tegra_gpio(x) container_of((x), struct tegra_gpio, gpio)
++
+ static void tegra186_irq_ack(struct irq_data *data)
+ {
+- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
++ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
++ struct tegra_gpio *gpio = to_tegra_gpio(gc);
+ void __iomem *base;
+
+ base = tegra186_gpio_get_base(gpio, data->hwirq);
+@@ -351,7 +354,8 @@ static void tegra186_irq_ack(struct irq_data *data)
+
+ static void tegra186_irq_mask(struct irq_data *data)
+ {
+- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
++ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
++ struct tegra_gpio *gpio = to_tegra_gpio(gc);
+ void __iomem *base;
+ u32 value;
+
+@@ -366,7 +370,8 @@ static void tegra186_irq_mask(struct irq_data *data)
+
+ static void tegra186_irq_unmask(struct irq_data *data)
+ {
+- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
++ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
++ struct tegra_gpio *gpio = to_tegra_gpio(gc);
+ void __iomem *base;
+ u32 value;
+
+@@ -381,7 +386,8 @@ static void tegra186_irq_unmask(struct irq_data *data)
+
+ static int tegra186_irq_set_type(struct irq_data *data, unsigned int type)
+ {
+- struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data);
++ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
++ struct tegra_gpio *gpio = to_tegra_gpio(gc);
+ void __iomem *base;
+ u32 value;
+
+diff --git a/drivers/gpio/gpio-tps68470.c b/drivers/gpio/gpio-tps68470.c
+index 423b7bc30ae88..03a523a6d6fa4 100644
+--- a/drivers/gpio/gpio-tps68470.c
++++ b/drivers/gpio/gpio-tps68470.c
+@@ -91,13 +91,13 @@ static int tps68470_gpio_output(struct gpio_chip *gc, unsigned int offset,
+ struct tps68470_gpio_data *tps68470_gpio = gpiochip_get_data(gc);
+ struct regmap *regmap = tps68470_gpio->tps68470_regmap;
+
++ /* Set the initial value */
++ tps68470_gpio_set(gc, offset, value);
++
+ /* rest are always outputs */
+ if (offset >= TPS68470_N_REGULAR_GPIO)
+ return 0;
+
+- /* Set the initial value */
+- tps68470_gpio_set(gc, offset, value);
+-
+ return regmap_update_bits(regmap, TPS68470_GPIO_CTL_REG_A(offset),
+ TPS68470_GPIO_MODE_MASK,
+ TPS68470_GPIO_MODE_OUT_CMOS);
+diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c
+index d885032cf814d..d918d2df4de2c 100644
+--- a/drivers/gpio/gpio-ts4900.c
++++ b/drivers/gpio/gpio-ts4900.c
+@@ -1,7 +1,7 @@
+ /*
+ * Digital I/O driver for Technologic Systems I2C FPGA Core
+ *
+- * Copyright (C) 2015 Technologic Systems
++ * Copyright (C) 2015, 2018 Technologic Systems
+ * Copyright (C) 2016 Savoir-Faire Linux
+ *
+ * This program is free software; you can redistribute it and/or
+@@ -55,19 +55,33 @@ static int ts4900_gpio_direction_input(struct gpio_chip *chip,
+ {
+ struct ts4900_gpio_priv *priv = gpiochip_get_data(chip);
+
+- /*
+- * This will clear the output enable bit, the other bits are
+- * dontcare when this is cleared
++ /* Only clear the OE bit here, requires a RMW. Prevents potential issue
++ * with OE and data getting to the physical pin at different times.
+ */
+- return regmap_write(priv->regmap, offset, 0);
++ return regmap_update_bits(priv->regmap, offset, TS4900_GPIO_OE, 0);
+ }
+
+ static int ts4900_gpio_direction_output(struct gpio_chip *chip,
+ unsigned int offset, int value)
+ {
+ struct ts4900_gpio_priv *priv = gpiochip_get_data(chip);
++ unsigned int reg;
+ int ret;
+
++ /* If changing from an input to an output, we need to first set the
++ * proper data bit to what is requested and then set OE bit. This
++ * prevents a glitch that can occur on the IO line
++ */
++ regmap_read(priv->regmap, offset, &reg);
++ if (!(reg & TS4900_GPIO_OE)) {
++ if (value)
++ reg = TS4900_GPIO_OUT;
++ else
++ reg &= ~TS4900_GPIO_OUT;
++
++ regmap_write(priv->regmap, offset, reg);
++ }
++
+ if (value)
+ ret = regmap_write(priv->regmap, offset, TS4900_GPIO_OE |
+ TS4900_GPIO_OUT);
+diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
+index e0f2b67558e74..edb28af7ba3b0 100644
+--- a/drivers/gpio/gpio-vf610.c
++++ b/drivers/gpio/gpio-vf610.c
+@@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
+ {
+ struct vf610_gpio_port *port = gpiochip_get_data(chip);
+ unsigned long mask = BIT(gpio);
++ u32 val;
+
+- if (port->sdata && port->sdata->have_paddr)
+- vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR);
++ if (port->sdata && port->sdata->have_paddr) {
++ val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR);
++ val |= mask;
++ vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR);
++ }
+
+ vf610_gpio_set(chip, gpio, value);
+
+@@ -300,7 +304,7 @@ static int vf610_gpio_probe(struct platform_device *pdev)
+ gc = &port->gc;
+ gc->of_node = np;
+ gc->parent = dev;
+- gc->label = "vf610-gpio";
++ gc->label = dev_name(dev);
+ gc->ngpio = VF610_GPIO_PER_PORT;
+ gc->base = of_alias_get_id(np, "gpio") * VF610_GPIO_PER_PORT;
+
+diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c
+index d24f1c9264bc9..dd3b23c9580b1 100644
+--- a/drivers/gpio/gpio-virtio.c
++++ b/drivers/gpio/gpio-virtio.c
+@@ -81,11 +81,7 @@ static int _virtio_gpio_req(struct virtio_gpio *vgpio, u16 type, u16 gpio,
+ virtqueue_kick(vgpio->request_vq);
+ mutex_unlock(&vgpio->lock);
+
+- if (!wait_for_completion_timeout(&line->completion, HZ)) {
+- dev_err(dev, "GPIO operation timed out\n");
+- ret = -ETIMEDOUT;
+- goto out;
+- }
++ wait_for_completion(&line->completion);
+
+ if (unlikely(res->status != VIRTIO_GPIO_STATUS_OK)) {
+ dev_err(dev, "GPIO request failed: %d\n", gpio);
+diff --git a/drivers/gpio/gpio-visconti.c b/drivers/gpio/gpio-visconti.c
+index 47455810bdb91..e6534ea1eaa7a 100644
+--- a/drivers/gpio/gpio-visconti.c
++++ b/drivers/gpio/gpio-visconti.c
+@@ -130,7 +130,6 @@ static int visconti_gpio_probe(struct platform_device *pdev)
+ struct gpio_irq_chip *girq;
+ struct irq_domain *parent;
+ struct device_node *irq_parent;
+- struct fwnode_handle *fwnode;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+@@ -150,14 +149,12 @@ static int visconti_gpio_probe(struct platform_device *pdev)
+ }
+
+ parent = irq_find_host(irq_parent);
++ of_node_put(irq_parent);
+ if (!parent) {
+ dev_err(dev, "No IRQ parent domain\n");
+ return -ENODEV;
+ }
+
+- fwnode = of_node_to_fwnode(irq_parent);
+- of_node_put(irq_parent);
+-
+ ret = bgpio_init(&priv->gpio_chip, dev, 4,
+ priv->base + GPIO_IDATA,
+ priv->base + GPIO_OSET,
+@@ -180,7 +177,7 @@ static int visconti_gpio_probe(struct platform_device *pdev)
+
+ girq = &priv->gpio_chip.irq;
+ girq->chip = irq_chip;
+- girq->fwnode = fwnode;
++ girq->fwnode = of_node_to_fwnode(dev->of_node);
+ girq->parent_domain = parent;
+ girq->child_to_parent_hwirq = visconti_gpio_child_to_parent_hwirq;
+ girq->populate_parent_alloc_arg = visconti_gpio_populate_parent_fwspec;
+diff --git a/drivers/gpio/gpio-vr41xx.c b/drivers/gpio/gpio-vr41xx.c
+index 98cd715ccc33c..8d09b619c1669 100644
+--- a/drivers/gpio/gpio-vr41xx.c
++++ b/drivers/gpio/gpio-vr41xx.c
+@@ -217,8 +217,6 @@ static int giu_get_irq(unsigned int irq)
+ printk(KERN_ERR "spurious GIU interrupt: %04x(%04x),%04x(%04x)\n",
+ maskl, pendl, maskh, pendh);
+
+- atomic_inc(&irq_err_count);
+-
+ return -EINVAL;
+ }
+
+diff --git a/drivers/gpio/gpio-winbond.c b/drivers/gpio/gpio-winbond.c
+index 7f8f5b02e31d5..4b61d975cc0ec 100644
+--- a/drivers/gpio/gpio-winbond.c
++++ b/drivers/gpio/gpio-winbond.c
+@@ -385,12 +385,13 @@ static int winbond_gpio_get(struct gpio_chip *gc, unsigned int offset)
+ unsigned long *base = gpiochip_get_data(gc);
+ const struct winbond_gpio_info *info;
+ bool val;
++ int ret;
+
+ winbond_gpio_get_info(&offset, &info);
+
+- val = winbond_sio_enter(*base);
+- if (val)
+- return val;
++ ret = winbond_sio_enter(*base);
++ if (ret)
++ return ret;
+
+ winbond_sio_select_logical(*base, info->dev);
+
+diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
+index a1b66338d077d..db616ae560a3c 100644
+--- a/drivers/gpio/gpio-xilinx.c
++++ b/drivers/gpio/gpio-xilinx.c
+@@ -99,7 +99,7 @@ static inline void xgpio_set_value32(unsigned long *map, int bit, u32 v)
+ const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5);
+
+ map[index] &= ~(0xFFFFFFFFul << offset);
+- map[index] |= v << offset;
++ map[index] |= (unsigned long)v << offset;
+ }
+
+ static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch)
+diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
+index 47712b6903b51..53be0bdf2bc38 100644
+--- a/drivers/gpio/gpiolib-acpi.c
++++ b/drivers/gpio/gpiolib-acpi.c
+@@ -311,7 +311,8 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
+ if (IS_ERR(desc))
+ return desc;
+
+- ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout);
++ /* ACPI uses hundredths of milliseconds units */
++ ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10);
+ if (ret)
+ dev_warn(chip->parent,
+ "Failed to set debounce-timeout for pin 0x%04X, err %d\n",
+@@ -391,8 +392,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
+ pin = agpio->pin_table[0];
+
+ if (pin <= 255) {
+- char ev_name[5];
+- sprintf(ev_name, "_%c%02hhX",
++ char ev_name[8];
++ sprintf(ev_name, "_%c%02X",
+ agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L',
+ pin);
+ if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle)))
+@@ -1052,17 +1053,25 @@ int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int ind
+ if (ret < 0)
+ return ret;
+
+- ret = gpio_set_debounce_timeout(desc, info.debounce);
++ /* ACPI uses hundredths of milliseconds units */
++ ret = gpio_set_debounce_timeout(desc, info.debounce * 10);
+ if (ret)
+ return ret;
+
+ irq_flags = acpi_dev_get_irq_type(info.triggering,
+ info.polarity);
+
+- /* Set type if specified and different than the current one */
+- if (irq_flags != IRQ_TYPE_NONE &&
+- irq_flags != irq_get_trigger_type(irq))
+- irq_set_irq_type(irq, irq_flags);
++ /*
++ * If the IRQ is not already in use then set type
++ * if specified and different than the current one.
++ */
++ if (can_request_irq(irq, irq_flags)) {
++ if (irq_flags != IRQ_TYPE_NONE &&
++ irq_flags != irq_get_trigger_type(irq))
++ irq_set_irq_type(irq, irq_flags);
++ } else {
++ dev_dbg(&adev->dev, "IRQ %d already in use\n", irq);
++ }
+
+ return irq;
+ }
+diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
+index c7b5446d01fd2..2a2e0691462bf 100644
+--- a/drivers/gpio/gpiolib-cdev.c
++++ b/drivers/gpio/gpiolib-cdev.c
+@@ -54,6 +54,50 @@ static_assert(IS_ALIGNED(sizeof(struct gpio_v2_line_values), 8));
+ * interface to gpiolib GPIOs via ioctl()s.
+ */
+
++typedef __poll_t (*poll_fn)(struct file *, struct poll_table_struct *);
++typedef long (*ioctl_fn)(struct file *, unsigned int, unsigned long);
++typedef ssize_t (*read_fn)(struct file *, char __user *,
++ size_t count, loff_t *);
++
++static __poll_t call_poll_locked(struct file *file,
++ struct poll_table_struct *wait,
++ struct gpio_device *gdev, poll_fn func)
++{
++ __poll_t ret;
++
++ down_read(&gdev->sem);
++ ret = func(file, wait);
++ up_read(&gdev->sem);
++
++ return ret;
++}
++
++static long call_ioctl_locked(struct file *file, unsigned int cmd,
++ unsigned long arg, struct gpio_device *gdev,
++ ioctl_fn func)
++{
++ long ret;
++
++ down_read(&gdev->sem);
++ ret = func(file, cmd, arg);
++ up_read(&gdev->sem);
++
++ return ret;
++}
++
++static ssize_t call_read_locked(struct file *file, char __user *buf,
++ size_t count, loff_t *f_ps,
++ struct gpio_device *gdev, read_fn func)
++{
++ ssize_t ret;
++
++ down_read(&gdev->sem);
++ ret = func(file, buf, count, f_ps);
++ up_read(&gdev->sem);
++
++ return ret;
++}
++
+ /*
+ * GPIO line handle management
+ */
+@@ -190,23 +234,25 @@ static long linehandle_set_config(struct linehandle_state *lh,
+ return 0;
+ }
+
+-static long linehandle_ioctl(struct file *file, unsigned int cmd,
+- unsigned long arg)
++static long linehandle_ioctl_unlocked(struct file *file, unsigned int cmd,
++ unsigned long arg)
+ {
+ struct linehandle_state *lh = file->private_data;
+ void __user *ip = (void __user *)arg;
+ struct gpiohandle_data ghd;
+ DECLARE_BITMAP(vals, GPIOHANDLES_MAX);
+- int i;
++ unsigned int i;
++ int ret;
+
+- if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) {
+- /* NOTE: It's ok to read values of output lines. */
+- int ret = gpiod_get_array_value_complex(false,
+- true,
+- lh->num_descs,
+- lh->descs,
+- NULL,
+- vals);
++ if (!lh->gdev->chip)
++ return -ENODEV;
++
++ switch (cmd) {
++ case GPIOHANDLE_GET_LINE_VALUES_IOCTL:
++ /* NOTE: It's okay to read values of output lines */
++ ret = gpiod_get_array_value_complex(false, true,
++ lh->num_descs, lh->descs,
++ NULL, vals);
+ if (ret)
+ return ret;
+
+@@ -218,7 +264,7 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd,
+ return -EFAULT;
+
+ return 0;
+- } else if (cmd == GPIOHANDLE_SET_LINE_VALUES_IOCTL) {
++ case GPIOHANDLE_SET_LINE_VALUES_IOCTL:
+ /*
+ * All line descriptors were created at once with the same
+ * flags so just check if the first one is really output.
+@@ -240,10 +286,20 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd,
+ lh->descs,
+ NULL,
+ vals);
+- } else if (cmd == GPIOHANDLE_SET_CONFIG_IOCTL) {
++ case GPIOHANDLE_SET_CONFIG_IOCTL:
+ return linehandle_set_config(lh, ip);
++ default:
++ return -EINVAL;
+ }
+- return -EINVAL;
++}
++
++static long linehandle_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ struct linehandle_state *lh = file->private_data;
++
++ return call_ioctl_locked(file, cmd, arg, lh->gdev,
++ linehandle_ioctl_unlocked);
+ }
+
+ #ifdef CONFIG_COMPAT
+@@ -330,7 +386,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
+ goto out_free_lh;
+ }
+
+- ret = gpiod_request(desc, lh->label);
++ ret = gpiod_request_user(desc, lh->label);
+ if (ret)
+ goto out_free_lh;
+ lh->descs[i] = desc;
+@@ -1182,20 +1238,34 @@ static long linereq_set_config(struct linereq *lr, void __user *ip)
+ return ret;
+ }
+
+-static long linereq_ioctl(struct file *file, unsigned int cmd,
+- unsigned long arg)
++static long linereq_ioctl_unlocked(struct file *file, unsigned int cmd,
++ unsigned long arg)
+ {
+ struct linereq *lr = file->private_data;
+ void __user *ip = (void __user *)arg;
+
+- if (cmd == GPIO_V2_LINE_GET_VALUES_IOCTL)
++ if (!lr->gdev->chip)
++ return -ENODEV;
++
++ switch (cmd) {
++ case GPIO_V2_LINE_GET_VALUES_IOCTL:
+ return linereq_get_values(lr, ip);
+- else if (cmd == GPIO_V2_LINE_SET_VALUES_IOCTL)
++ case GPIO_V2_LINE_SET_VALUES_IOCTL:
+ return linereq_set_values(lr, ip);
+- else if (cmd == GPIO_V2_LINE_SET_CONFIG_IOCTL)
++ case GPIO_V2_LINE_SET_CONFIG_IOCTL:
+ return linereq_set_config(lr, ip);
++ default:
++ return -EINVAL;
++ }
++}
+
+- return -EINVAL;
++static long linereq_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ struct linereq *lr = file->private_data;
++
++ return call_ioctl_locked(file, cmd, arg, lr->gdev,
++ linereq_ioctl_unlocked);
+ }
+
+ #ifdef CONFIG_COMPAT
+@@ -1206,12 +1276,15 @@ static long linereq_ioctl_compat(struct file *file, unsigned int cmd,
+ }
+ #endif
+
+-static __poll_t linereq_poll(struct file *file,
+- struct poll_table_struct *wait)
++static __poll_t linereq_poll_unlocked(struct file *file,
++ struct poll_table_struct *wait)
+ {
+ struct linereq *lr = file->private_data;
+ __poll_t events = 0;
+
++ if (!lr->gdev->chip)
++ return EPOLLHUP | EPOLLERR;
++
+ poll_wait(file, &lr->wait, wait);
+
+ if (!kfifo_is_empty_spinlocked_noirqsave(&lr->events,
+@@ -1221,16 +1294,25 @@ static __poll_t linereq_poll(struct file *file,
+ return events;
+ }
+
+-static ssize_t linereq_read(struct file *file,
+- char __user *buf,
+- size_t count,
+- loff_t *f_ps)
++static __poll_t linereq_poll(struct file *file,
++ struct poll_table_struct *wait)
++{
++ struct linereq *lr = file->private_data;
++
++ return call_poll_locked(file, wait, lr->gdev, linereq_poll_unlocked);
++}
++
++static ssize_t linereq_read_unlocked(struct file *file, char __user *buf,
++ size_t count, loff_t *f_ps)
+ {
+ struct linereq *lr = file->private_data;
+ struct gpio_v2_line_event le;
+ ssize_t bytes_read = 0;
+ int ret;
+
++ if (!lr->gdev->chip)
++ return -ENODEV;
++
+ if (count < sizeof(le))
+ return -EINVAL;
+
+@@ -1275,6 +1357,15 @@ static ssize_t linereq_read(struct file *file,
+ return bytes_read;
+ }
+
++static ssize_t linereq_read(struct file *file, char __user *buf,
++ size_t count, loff_t *f_ps)
++{
++ struct linereq *lr = file->private_data;
++
++ return call_read_locked(file, buf, count, f_ps, lr->gdev,
++ linereq_read_unlocked);
++}
++
+ static void linereq_free(struct linereq *lr)
+ {
+ unsigned int i;
+@@ -1378,7 +1469,7 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip)
+ goto out_free_linereq;
+ }
+
+- ret = gpiod_request(desc, lr->label);
++ ret = gpiod_request_user(desc, lr->label);
+ if (ret)
+ goto out_free_linereq;
+
+@@ -1490,12 +1581,15 @@ struct lineevent_state {
+ (GPIOEVENT_REQUEST_RISING_EDGE | \
+ GPIOEVENT_REQUEST_FALLING_EDGE)
+
+-static __poll_t lineevent_poll(struct file *file,
+- struct poll_table_struct *wait)
++static __poll_t lineevent_poll_unlocked(struct file *file,
++ struct poll_table_struct *wait)
+ {
+ struct lineevent_state *le = file->private_data;
+ __poll_t events = 0;
+
++ if (!le->gdev->chip)
++ return EPOLLHUP | EPOLLERR;
++
+ poll_wait(file, &le->wait, wait);
+
+ if (!kfifo_is_empty_spinlocked_noirqsave(&le->events, &le->wait.lock))
+@@ -1504,15 +1598,21 @@ static __poll_t lineevent_poll(struct file *file,
+ return events;
+ }
+
++static __poll_t lineevent_poll(struct file *file,
++ struct poll_table_struct *wait)
++{
++ struct lineevent_state *le = file->private_data;
++
++ return call_poll_locked(file, wait, le->gdev, lineevent_poll_unlocked);
++}
++
+ struct compat_gpioeevent_data {
+ compat_u64 timestamp;
+ u32 id;
+ };
+
+-static ssize_t lineevent_read(struct file *file,
+- char __user *buf,
+- size_t count,
+- loff_t *f_ps)
++static ssize_t lineevent_read_unlocked(struct file *file, char __user *buf,
++ size_t count, loff_t *f_ps)
+ {
+ struct lineevent_state *le = file->private_data;
+ struct gpioevent_data ge;
+@@ -1520,6 +1620,9 @@ static ssize_t lineevent_read(struct file *file,
+ ssize_t ge_size;
+ int ret;
+
++ if (!le->gdev->chip)
++ return -ENODEV;
++
+ /*
+ * When compatible system call is being used the struct gpioevent_data,
+ * in case of at least ia32, has different size due to the alignment
+@@ -1577,6 +1680,15 @@ static ssize_t lineevent_read(struct file *file,
+ return bytes_read;
+ }
+
++static ssize_t lineevent_read(struct file *file, char __user *buf,
++ size_t count, loff_t *f_ps)
++{
++ struct lineevent_state *le = file->private_data;
++
++ return call_read_locked(file, buf, count, f_ps, le->gdev,
++ lineevent_read_unlocked);
++}
++
+ static void lineevent_free(struct lineevent_state *le)
+ {
+ if (le->irq)
+@@ -1594,13 +1706,16 @@ static int lineevent_release(struct inode *inode, struct file *file)
+ return 0;
+ }
+
+-static long lineevent_ioctl(struct file *file, unsigned int cmd,
+- unsigned long arg)
++static long lineevent_ioctl_unlocked(struct file *file, unsigned int cmd,
++ unsigned long arg)
+ {
+ struct lineevent_state *le = file->private_data;
+ void __user *ip = (void __user *)arg;
+ struct gpiohandle_data ghd;
+
++ if (!le->gdev->chip)
++ return -ENODEV;
++
+ /*
+ * We can get the value for an event line but not set it,
+ * because it is input by definition.
+@@ -1623,6 +1738,15 @@ static long lineevent_ioctl(struct file *file, unsigned int cmd,
+ return -EINVAL;
+ }
+
++static long lineevent_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ struct lineevent_state *le = file->private_data;
++
++ return call_ioctl_locked(file, cmd, arg, le->gdev,
++ lineevent_ioctl_unlocked);
++}
++
+ #ifdef CONFIG_COMPAT
+ static long lineevent_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg)
+@@ -1764,7 +1888,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ }
+ }
+
+- ret = gpiod_request(desc, le->label);
++ ret = gpiod_request_user(desc, le->label);
+ if (ret)
+ goto out_free_le;
+ le->desc = desc;
+@@ -1784,7 +1908,6 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ ret = -ENODEV;
+ goto out_free_le;
+ }
+- le->irq = irq;
+
+ if (eflags & GPIOEVENT_REQUEST_RISING_EDGE)
+ irqflags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ?
+@@ -1798,7 +1921,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ init_waitqueue_head(&le->wait);
+
+ /* Request a thread to read the events */
+- ret = request_threaded_irq(le->irq,
++ ret = request_threaded_irq(irq,
+ lineevent_irq_handler,
+ lineevent_irq_thread,
+ irqflags,
+@@ -1807,6 +1930,8 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ if (ret)
+ goto out_free_le;
+
++ le->irq = irq;
++
+ fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = fd;
+@@ -2113,28 +2238,30 @@ static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ return -ENODEV;
+
+ /* Fill in the struct and pass to userspace */
+- if (cmd == GPIO_GET_CHIPINFO_IOCTL) {
++ switch (cmd) {
++ case GPIO_GET_CHIPINFO_IOCTL:
+ return chipinfo_get(cdev, ip);
+ #ifdef CONFIG_GPIO_CDEV_V1
+- } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) {
++ case GPIO_GET_LINEHANDLE_IOCTL:
+ return linehandle_create(gdev, ip);
+- } else if (cmd == GPIO_GET_LINEEVENT_IOCTL) {
++ case GPIO_GET_LINEEVENT_IOCTL:
+ return lineevent_create(gdev, ip);
+- } else if (cmd == GPIO_GET_LINEINFO_IOCTL ||
+- cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) {
+- return lineinfo_get_v1(cdev, ip,
+- cmd == GPIO_GET_LINEINFO_WATCH_IOCTL);
++ case GPIO_GET_LINEINFO_IOCTL:
++ return lineinfo_get_v1(cdev, ip, false);
++ case GPIO_GET_LINEINFO_WATCH_IOCTL:
++ return lineinfo_get_v1(cdev, ip, true);
+ #endif /* CONFIG_GPIO_CDEV_V1 */
+- } else if (cmd == GPIO_V2_GET_LINEINFO_IOCTL ||
+- cmd == GPIO_V2_GET_LINEINFO_WATCH_IOCTL) {
+- return lineinfo_get(cdev, ip,
+- cmd == GPIO_V2_GET_LINEINFO_WATCH_IOCTL);
+- } else if (cmd == GPIO_V2_GET_LINE_IOCTL) {
++ case GPIO_V2_GET_LINEINFO_IOCTL:
++ return lineinfo_get(cdev, ip, false);
++ case GPIO_V2_GET_LINEINFO_WATCH_IOCTL:
++ return lineinfo_get(cdev, ip, true);
++ case GPIO_V2_GET_LINE_IOCTL:
+ return linereq_create(gdev, ip);
+- } else if (cmd == GPIO_GET_LINEINFO_UNWATCH_IOCTL) {
++ case GPIO_GET_LINEINFO_UNWATCH_IOCTL:
+ return lineinfo_unwatch(cdev, ip);
++ default:
++ return -EINVAL;
+ }
+- return -EINVAL;
+ }
+
+ #ifdef CONFIG_COMPAT
+@@ -2176,12 +2303,15 @@ static int lineinfo_changed_notify(struct notifier_block *nb,
+ return NOTIFY_OK;
+ }
+
+-static __poll_t lineinfo_watch_poll(struct file *file,
+- struct poll_table_struct *pollt)
++static __poll_t lineinfo_watch_poll_unlocked(struct file *file,
++ struct poll_table_struct *pollt)
+ {
+ struct gpio_chardev_data *cdev = file->private_data;
+ __poll_t events = 0;
+
++ if (!cdev->gdev->chip)
++ return EPOLLHUP | EPOLLERR;
++
+ poll_wait(file, &cdev->wait, pollt);
+
+ if (!kfifo_is_empty_spinlocked_noirqsave(&cdev->events,
+@@ -2191,8 +2321,17 @@ static __poll_t lineinfo_watch_poll(struct file *file,
+ return events;
+ }
+
+-static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
+- size_t count, loff_t *off)
++static __poll_t lineinfo_watch_poll(struct file *file,
++ struct poll_table_struct *pollt)
++{
++ struct gpio_chardev_data *cdev = file->private_data;
++
++ return call_poll_locked(file, pollt, cdev->gdev,
++ lineinfo_watch_poll_unlocked);
++}
++
++static ssize_t lineinfo_watch_read_unlocked(struct file *file, char __user *buf,
++ size_t count, loff_t *off)
+ {
+ struct gpio_chardev_data *cdev = file->private_data;
+ struct gpio_v2_line_info_changed event;
+@@ -2200,6 +2339,9 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
+ int ret;
+ size_t event_size;
+
++ if (!cdev->gdev->chip)
++ return -ENODEV;
++
+ #ifndef CONFIG_GPIO_CDEV_V1
+ event_size = sizeof(struct gpio_v2_line_info_changed);
+ if (count < event_size)
+@@ -2267,6 +2409,15 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
+ return bytes_read;
+ }
+
++static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
++ size_t count, loff_t *off)
++{
++ struct gpio_chardev_data *cdev = file->private_data;
++
++ return call_read_locked(file, buf, count, off, cdev->gdev,
++ lineinfo_watch_read_unlocked);
++}
++
+ /**
+ * gpio_chrdev_open() - open the chardev for ioctl operations
+ * @inode: inode for this chardev
+@@ -2280,13 +2431,17 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
+ struct gpio_chardev_data *cdev;
+ int ret = -ENOMEM;
+
++ down_read(&gdev->sem);
++
+ /* Fail on open if the backing gpiochip is gone */
+- if (!gdev->chip)
+- return -ENODEV;
++ if (!gdev->chip) {
++ ret = -ENODEV;
++ goto out_unlock;
++ }
+
+ cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+ if (!cdev)
+- return -ENOMEM;
++ goto out_unlock;
+
+ cdev->watched_lines = bitmap_zalloc(gdev->chip->ngpio, GFP_KERNEL);
+ if (!cdev->watched_lines)
+@@ -2309,6 +2464,8 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
+ if (ret)
+ goto out_unregister_notifier;
+
++ up_read(&gdev->sem);
++
+ return ret;
+
+ out_unregister_notifier:
+@@ -2318,6 +2475,8 @@ out_free_bitmap:
+ bitmap_free(cdev->watched_lines);
+ out_free_cdev:
+ kfree(cdev);
++out_unlock:
++ up_read(&gdev->sem);
+ return ret;
+ }
+
+diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
+index 0ad288ab6262d..7a96eb626a08b 100644
+--- a/drivers/gpio/gpiolib-of.c
++++ b/drivers/gpio/gpiolib-of.c
+@@ -863,7 +863,8 @@ int of_mm_gpiochip_add_data(struct device_node *np,
+ if (mm_gc->save_regs)
+ mm_gc->save_regs(mm_gc);
+
+- mm_gc->gc.of_node = np;
++ of_node_put(mm_gc->gc.of_node);
++ mm_gc->gc.of_node = of_node_get(np);
+
+ ret = gpiochip_add_data(gc, data);
+ if (ret)
+@@ -871,6 +872,7 @@ int of_mm_gpiochip_add_data(struct device_node *np,
+
+ return 0;
+ err2:
++ of_node_put(np);
+ iounmap(mm_gc->regs);
+ err1:
+ kfree(gc->label);
+@@ -912,7 +914,7 @@ static void of_gpiochip_init_valid_mask(struct gpio_chip *chip)
+ i, &start);
+ of_property_read_u32_index(np, "gpio-reserved-ranges",
+ i + 1, &count);
+- if (start >= chip->ngpio || start + count >= chip->ngpio)
++ if (start >= chip->ngpio || start + count > chip->ngpio)
+ continue;
+
+ bitmap_clear(chip->valid_mask, start, count);
+@@ -933,6 +935,11 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip)
+ if (!np)
+ return 0;
+
++ if (!of_property_read_bool(np, "gpio-ranges") &&
++ chip->of_gpio_ranges_fallback) {
++ return chip->of_gpio_ranges_fallback(chip, np);
++ }
++
+ group_names = of_find_property(np, group_names_propname, NULL);
+
+ for (;; index++) {
+diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
+index 4098bc7f88b7e..44c1ad51b3fe9 100644
+--- a/drivers/gpio/gpiolib-sysfs.c
++++ b/drivers/gpio/gpiolib-sysfs.c
+@@ -475,12 +475,9 @@ static ssize_t export_store(struct class *class,
+ * they may be undone on its behalf too.
+ */
+
+- status = gpiod_request(desc, "sysfs");
+- if (status) {
+- if (status == -EPROBE_DEFER)
+- status = -ENODEV;
++ status = gpiod_request_user(desc, "sysfs");
++ if (status)
+ goto done;
+- }
+
+ status = gpiod_set_transitory(desc, false);
+ if (!status) {
+diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
+index d1b9b721218f2..f9fdd117c654c 100644
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -189,9 +189,8 @@ static int gpiochip_find_base(int ngpio)
+ /* found a free space? */
+ if (gdev->base + gdev->ngpio <= base)
+ break;
+- else
+- /* nope, check the space right before the chip */
+- base = gdev->base - ngpio;
++ /* nope, check the space right before the chip */
++ base = gdev->base - ngpio;
+ }
+
+ if (gpio_is_valid(base)) {
+@@ -525,12 +524,13 @@ static int gpiochip_setup_dev(struct gpio_device *gdev)
+ if (ret)
+ return ret;
+
++ /* From this point, the .release() function cleans up gpio_device */
++ gdev->dev.release = gpiodevice_release;
++
+ ret = gpiochip_sysfs_register(gdev);
+ if (ret)
+ goto err_remove_device;
+
+- /* From this point, the .release() function cleans up gpio_device */
+- gdev->dev.release = gpiodevice_release;
+ dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base,
+ gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic");
+
+@@ -594,11 +594,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
+ struct lock_class_key *request_key)
+ {
+ struct fwnode_handle *fwnode = gc->parent ? dev_fwnode(gc->parent) : NULL;
+- unsigned long flags;
+- int ret = 0;
+- unsigned i;
+- int base = gc->base;
+ struct gpio_device *gdev;
++ unsigned long flags;
++ unsigned int i;
++ u32 ngpios = 0;
++ int base = 0;
++ int ret = 0;
+
+ /*
+ * First: allocate and populate the internal stat container, and
+@@ -640,22 +641,43 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
+ else
+ gdev->owner = THIS_MODULE;
+
+- gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL);
+- if (!gdev->descs) {
+- ret = -ENOMEM;
+- goto err_free_dev_name;
++ /*
++ * Try the device properties if the driver didn't supply the number
++ * of GPIO lines.
++ */
++ ngpios = gc->ngpio;
++ if (ngpios == 0) {
++ ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios);
++ if (ret == -ENODATA)
++ /*
++ * -ENODATA means that there is no property found and
++ * we want to issue the error message to the user.
++ * Besides that, we want to return different error code
++ * to state that supplied value is not valid.
++ */
++ ngpios = 0;
++ else if (ret)
++ goto err_free_dev_name;
++
++ gc->ngpio = ngpios;
+ }
+
+ if (gc->ngpio == 0) {
+ chip_err(gc, "tried to insert a GPIO chip with zero lines\n");
+ ret = -EINVAL;
+- goto err_free_descs;
++ goto err_free_dev_name;
+ }
+
+ if (gc->ngpio > FASTPATH_NGPIO)
+ chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n",
+ gc->ngpio, FASTPATH_NGPIO);
+
++ gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL);
++ if (!gdev->descs) {
++ ret = -ENOMEM;
++ goto err_free_dev_name;
++ }
++
+ gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL);
+ if (!gdev->label) {
+ ret = -ENOMEM;
+@@ -674,11 +696,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
+ * it may be a pipe dream. It will not happen before we get rid
+ * of the sysfs interface anyways.
+ */
++ base = gc->base;
+ if (base < 0) {
+ base = gpiochip_find_base(gc->ngpio);
+ if (base < 0) {
+- ret = base;
+ spin_unlock_irqrestore(&gpio_lock, flags);
++ ret = base;
++ base = 0;
+ goto err_free_label;
+ }
+ /*
+@@ -703,6 +727,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+ BLOCKING_INIT_NOTIFIER_HEAD(&gdev->notifier);
++ init_rwsem(&gdev->sem);
+
+ #ifdef CONFIG_PINCTRL
+ INIT_LIST_HEAD(&gdev->pin_ranges);
+@@ -786,6 +811,11 @@ err_remove_of_chip:
+ err_free_gpiochip_mask:
+ gpiochip_remove_pin_ranges(gc);
+ gpiochip_free_valid_mask(gc);
++ if (gdev->dev.release) {
++ /* release() has been registered by gpiochip_setup_dev() */
++ put_device(&gdev->dev);
++ goto err_print_message;
++ }
+ err_remove_from_list:
+ spin_lock_irqsave(&gpio_lock, flags);
+ list_del(&gdev->list);
+@@ -799,13 +829,14 @@ err_free_dev_name:
+ err_free_ida:
+ ida_free(&gpio_ida, gdev->id);
+ err_free_gdev:
++ kfree(gdev);
++err_print_message:
+ /* failures here can mean systems won't boot... */
+ if (ret != -EPROBE_DEFER) {
+ pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
+- gdev->base, gdev->base + gdev->ngpio - 1,
++ base, base + (int)ngpios - 1,
+ gc->label ? : "generic", ret);
+ }
+- kfree(gdev);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key);
+@@ -835,6 +866,8 @@ void gpiochip_remove(struct gpio_chip *gc)
+ unsigned long flags;
+ unsigned int i;
+
++ down_write(&gdev->sem);
++
+ /* FIXME: should the legacy sysfs handling be moved to gpio_device? */
+ gpiochip_sysfs_unregister(gdev);
+ gpiochip_free_hogs(gc);
+@@ -869,6 +902,7 @@ void gpiochip_remove(struct gpio_chip *gc)
+ * gone.
+ */
+ gcdev_unregister(gdev);
++ up_write(&gdev->sem);
+ put_device(&gdev->dev);
+ }
+ EXPORT_SYMBOL_GPL(gpiochip_remove);
+@@ -1368,6 +1402,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
+ {
+ struct irq_domain *domain = gc->irq.domain;
+
++#ifdef CONFIG_GPIOLIB_IRQCHIP
++ /*
++ * Avoid race condition with other code, which tries to lookup
++ * an IRQ before the irqchip has been properly registered,
++ * i.e. while gpiochip is still being brought up.
++ */
++ if (!gc->irq.initialized)
++ return -EPROBE_DEFER;
++#endif
++
+ if (!gpiochip_irqchip_irq_valid(gc, offset))
+ return -ENXIO;
+
+@@ -1534,9 +1578,14 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
+ }
+
+ if (gc->irq.parent_handler) {
+- void *data = gc->irq.parent_handler_data ?: gc;
+-
+ for (i = 0; i < gc->irq.num_parents; i++) {
++ void *data;
++
++ if (gc->irq.per_parent_data)
++ data = gc->irq.parent_handler_data_array[i];
++ else
++ data = gc->irq.parent_handler_data ?: gc;
++
+ /*
+ * The parent IRQ chip is already using the chip_data
+ * for this IRQ chip, so our callbacks simply use the
+@@ -1550,6 +1599,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
+
+ gpiochip_set_irq_hooks(gc);
+
++ /*
++ * Using barrier() here to prevent compiler from reordering
++ * gc->irq.initialized before initialization of above
++ * GPIO chip irq members.
++ */
++ barrier();
++
++ gc->irq.initialized = true;
++
+ acpi_gpiochip_request_interrupts(gc);
+
+ return 0;
+@@ -1625,6 +1683,14 @@ int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
+ gc->to_irq = gpiochip_to_irq;
+ gc->irq.domain = domain;
+
++ /*
++ * Using barrier() here to prevent compiler from reordering
++ * gc->irq.initialized before adding irqdomain.
++ */
++ barrier();
++
++ gc->irq.initialized = true;
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_domain);
+@@ -2186,6 +2252,16 @@ static int gpio_set_bias(struct gpio_desc *desc)
+ return gpio_set_config_with_argument_optional(desc, bias, arg);
+ }
+
++/**
++ * gpio_set_debounce_timeout() - Set debounce timeout
++ * @desc: GPIO descriptor to set the debounce timeout
++ * @debounce: Debounce timeout in microseconds
++ *
++ * The function calls the certain GPIO driver to set debounce timeout
++ * in the hardware.
++ *
++ * Returns 0 on success, or negative error code otherwise.
++ */
+ int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce)
+ {
+ return gpio_set_config_with_argument_optional(desc,
+@@ -2350,8 +2426,7 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
+ ret = gpiod_direction_input(desc);
+ goto set_output_flag;
+ }
+- }
+- else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) {
++ } else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) {
+ ret = gpio_set_config(desc, PIN_CONFIG_DRIVE_OPEN_SOURCE);
+ if (!ret)
+ goto set_output_value;
+@@ -2508,9 +2583,9 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc)
+ static int gpio_chip_get_multiple(struct gpio_chip *gc,
+ unsigned long *mask, unsigned long *bits)
+ {
+- if (gc->get_multiple) {
++ if (gc->get_multiple)
+ return gc->get_multiple(gc, mask, bits);
+- } else if (gc->get) {
++ if (gc->get) {
+ int i, value;
+
+ for_each_set_bit(i, mask, gc->ngpio) {
+@@ -3106,6 +3181,16 @@ int gpiod_to_irq(const struct gpio_desc *desc)
+
+ return retirq;
+ }
++#ifdef CONFIG_GPIOLIB_IRQCHIP
++ if (gc->irq.chip) {
++ /*
++ * Avoid race condition with other code, which tries to lookup
++ * an IRQ before the irqchip has been properly registered,
++ * i.e. while gpiochip is still being brought up.
++ */
++ return -EPROBE_DEFER;
++ }
++#endif
+ return -ENXIO;
+ }
+ EXPORT_SYMBOL_GPL(gpiod_to_irq);
+diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
+index 30bc3f80f83e6..73b732a1d9c94 100644
+--- a/drivers/gpio/gpiolib.h
++++ b/drivers/gpio/gpiolib.h
+@@ -15,6 +15,7 @@
+ #include <linux/device.h>
+ #include <linux/module.h>
+ #include <linux/cdev.h>
++#include <linux/rwsem.h>
+
+ #define GPIOCHIP_NAME "gpiochip"
+
+@@ -37,6 +38,12 @@
+ * or name of the IP component in a System on Chip.
+ * @data: per-instance data assigned by the driver
+ * @list: links gpio_device:s together for traversal
++ * @notifier: used to notify subscribers about lines being requested, released
++ * or reconfigured
++ * @sem: protects the structure from a NULL-pointer dereference of @chip by
++ * user-space operations when the device gets unregistered during
++ * a hot-unplug event
++ * @pin_ranges: range of pins served by the GPIO driver
+ *
+ * This state container holds most of the runtime variable data
+ * for a GPIO device and can hold references and live on after the
+@@ -57,6 +64,7 @@ struct gpio_device {
+ void *data;
+ struct list_head list;
+ struct blocking_notifier_head notifier;
++ struct rw_semaphore sem;
+
+ #ifdef CONFIG_PINCTRL
+ /*
+@@ -72,6 +80,20 @@ struct gpio_device {
+ /* gpio suffixes used for ACPI and device tree lookup */
+ static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" };
+
++/**
++ * struct gpio_array - Opaque descriptor for a structure of GPIO array attributes
++ *
++ * @desc: Array of pointers to the GPIO descriptors
++ * @size: Number of elements in desc
++ * @chip: Parent GPIO chip
++ * @get_mask: Get mask used in fastpath
++ * @set_mask: Set mask used in fastpath
++ * @invert_mask: Invert mask used in fastpath
++ *
++ * This structure is attached to struct gpiod_descs obtained from
++ * gpiod_get_array() and can be passed back to get/set array functions in order
++ * to activate fast processing path if applicable.
++ */
+ struct gpio_array {
+ struct gpio_desc **desc;
+ unsigned int size;
+@@ -96,6 +118,23 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
+ extern spinlock_t gpio_lock;
+ extern struct list_head gpio_devices;
+
++
++/**
++ * struct gpio_desc - Opaque descriptor for a GPIO
++ *
++ * @gdev: Pointer to the parent GPIO device
++ * @flags: Binary descriptor flags
++ * @label: Name of the consumer
++ * @name: Line name
++ * @hog: Pointer to the device node that hogs this line (if any)
++ * @debounce_period_us: Debounce period in microseconds
++ *
++ * These are obtained using gpiod_get() and are preferable to the old
++ * integer-based handles.
++ *
++ * Contrary to integers, a pointer to a &struct gpio_desc is guaranteed to be
++ * valid until the GPIO is released.
++ */
+ struct gpio_desc {
+ struct gpio_device *gdev;
+ unsigned long flags;
+@@ -135,6 +174,18 @@ struct gpio_desc {
+
+ int gpiod_request(struct gpio_desc *desc, const char *label);
+ void gpiod_free(struct gpio_desc *desc);
++
++static inline int gpiod_request_user(struct gpio_desc *desc, const char *label)
++{
++ int ret;
++
++ ret = gpiod_request(desc, label);
++ if (ret == -EPROBE_DEFER)
++ ret = -ENODEV;
++
++ return ret;
++}
++
+ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
+ unsigned long lflags, enum gpiod_flags dflags);
+ int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce);
+diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+index 5b393622f5920..a0f0a17e224fe 100644
+--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
++++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+@@ -119,6 +119,7 @@
+ #define CONNECTOR_OBJECT_ID_eDP 0x14
+ #define CONNECTOR_OBJECT_ID_MXM 0x15
+ #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16
++#define CONNECTOR_OBJECT_ID_USBC 0x17
+
+ /* deleted */
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 269437b013280..d90da384d1851 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -312,7 +312,7 @@ enum amdgpu_kiq_irq {
+ AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
+ AMDGPU_CP_KIQ_IRQ_LAST
+ };
+-
++#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
+ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
+ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
+ #define MAX_KIQ_REG_TRY 1000
+@@ -1069,6 +1069,7 @@ struct amdgpu_device {
+ bool runpm;
+ bool in_runpm;
+ bool has_pr3;
++ bool is_fw_fb;
+
+ bool pm_sysfs_en;
+ bool ucode_sysfs_en;
+@@ -1078,8 +1079,6 @@ struct amdgpu_device {
+ char product_name[32];
+ char serial[20];
+
+- struct amdgpu_autodump autodump;
+-
+ atomic_t throttling_logging_enabled;
+ struct ratelimit_state throttling_logging_rs;
+ uint32_t ras_hw_enabled;
+@@ -1286,6 +1285,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
+ int amdgpu_device_pci_reset(struct amdgpu_device *adev);
+ bool amdgpu_device_need_post(struct amdgpu_device *adev);
++bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
++bool amdgpu_device_aspm_support_quirk(void);
+
+ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+ u64 num_vis_bytes);
+@@ -1398,12 +1399,10 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
+ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
+
+ void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
+-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+ void amdgpu_acpi_detect(void);
+ #else
+ static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
+ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
+-static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+ static inline void amdgpu_acpi_detect(void) { }
+ static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
+ static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+@@ -1412,6 +1411,16 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
+ enum amdgpu_ss ss_state) { return 0; }
+ #endif
+
++#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
++bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
++bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
++bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
++#else
++static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
++static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
++static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
++#endif
++
+ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
+ uint64_t addr, struct amdgpu_bo **bo,
+ struct amdgpu_bo_va_mapping **mapping);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+index 4811b0faafd9a..6cded09d5878a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+@@ -1031,6 +1031,38 @@ void amdgpu_acpi_detect(void)
+ }
+ }
+
++#if IS_ENABLED(CONFIG_SUSPEND)
++/**
++ * amdgpu_acpi_is_s3_active
++ *
++ * @adev: amdgpu_device_pointer
++ *
++ * returns true if supported, false if not.
++ */
++bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
++{
++ return !(adev->flags & AMD_IS_APU) ||
++ (pm_suspend_target_state == PM_SUSPEND_MEM);
++}
++
++/**
++ * amdgpu_acpi_should_gpu_reset
++ *
++ * @adev: amdgpu_device_pointer
++ *
++ * returns true if should reset GPU, false if not
++ */
++bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
++{
++ if (adev->flags & AMD_IS_APU)
++ return false;
++
++ if (amdgpu_sriov_vf(adev))
++ return false;
++
++ return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
++}
++
+ /**
+ * amdgpu_acpi_is_s0ix_active
+ *
+@@ -1040,11 +1072,24 @@ void amdgpu_acpi_detect(void)
+ */
+ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
+ {
+-#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND)
+- if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
+- if (adev->flags & AMD_IS_APU)
+- return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
++ if (!(adev->flags & AMD_IS_APU) ||
++ (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
++ return false;
++
++ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
++ dev_warn_once(adev->dev,
++ "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
++ "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
++ return false;
+ }
+-#endif
++
++#if !IS_ENABLED(CONFIG_AMD_PMC)
++ dev_warn_once(adev->dev,
++ "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
+ return false;
++#else
++ return true;
++#endif /* CONFIG_AMD_PMC */
+ }
++
++#endif /* CONFIG_SUSPEND */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+index 1d41c2c00623b..5690cb6d27fed 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+@@ -768,7 +768,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid,
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ bool all_hub = false;
+
+- if (adev->family == AMDGPU_FAMILY_AI)
++ if (adev->family == AMDGPU_FAMILY_AI ||
++ adev->family == AMDGPU_FAMILY_RV)
+ all_hub = true;
+
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+index 46cd4ee6bafb7..f3743089a1c99 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+@@ -44,5 +44,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
++ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index 054c1a224defb..00a8aef48a696 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -476,13 +476,13 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ int ret;
+
++ if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
++ return -EINVAL;
++
+ ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
+ if (unlikely(!ttm->sg))
+ return -ENOMEM;
+
+- if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+- return -EINVAL;
+-
+ /* Same sequence as in amdgpu_ttm_tt_pin_userptr */
+ ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
+ ttm->num_pages, 0,
+@@ -1318,16 +1318,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+ {
+ struct amdkfd_process_info *process_info = vm->process_info;
+- struct amdgpu_bo *pd = vm->root.bo;
+
+ if (!process_info)
+ return;
+
+- /* Release eviction fence from PD */
+- amdgpu_bo_reserve(pd, false);
+- amdgpu_bo_fence(pd, NULL, false);
+- amdgpu_bo_unreserve(pd);
+-
+ /* Update process info */
+ mutex_lock(&process_info->lock);
+ process_info->n_vms--;
+@@ -1393,7 +1387,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+ struct sg_table *sg = NULL;
+ uint64_t user_addr = 0;
+ struct amdgpu_bo *bo;
+- struct drm_gem_object *gobj;
++ struct drm_gem_object *gobj = NULL;
+ u32 domain, alloc_domain;
+ u64 alloc_flags;
+ int ret;
+@@ -1503,14 +1497,16 @@ allocate_init_user_pages_failed:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&gobj->vma_node, drm_priv);
+ err_node_allow:
+- amdgpu_bo_unref(&bo);
+ /* Don't unreserve system mem limit twice */
+ goto err_reserve_limit;
+ err_bo_create:
+ unreserve_mem_limit(adev, size, alloc_domain, !!sg);
+ err_reserve_limit:
+ mutex_destroy(&(*mem)->lock);
+- kfree(*mem);
++ if (gobj)
++ drm_gem_object_put(gobj);
++ else
++ kfree(*mem);
+ err:
+ if (sg) {
+ sg_free_table(sg);
+@@ -1826,9 +1822,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+ return -EINVAL;
+ }
+
+- /* delete kgd_mem from kfd_bo_list to avoid re-validating
+- * this BO in BO's restoring after eviction.
+- */
+ mutex_lock(&mem->process_info->lock);
+
+ ret = amdgpu_bo_reserve(bo, true);
+@@ -1851,7 +1844,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, mem->process_info->eviction_fence);
+- list_del_init(&mem->validate_list.head);
+
+ if (size)
+ *size = amdgpu_bo_size(bo);
+@@ -1918,7 +1910,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+
+ ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
+ if (ret) {
+- kfree(mem);
++ kfree(*mem);
+ return ret;
+ }
+
+@@ -2358,6 +2350,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+ if (!attachment->is_mapped)
+ continue;
+
++ if (attachment->bo_va->base.bo->tbo.pin_count)
++ continue;
++
+ kfd_mem_dmaunmap_attachment(mem, attachment);
+ ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
+ if (ret) {
+@@ -2397,12 +2392,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+ process_info->eviction_fence = new_fence;
+ *ef = dma_fence_get(&new_fence->base);
+
+- /* Attach new eviction fence to all BOs */
++ /* Attach new eviction fence to all BOs except pinned ones */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+- validate_list.head)
++ validate_list.head) {
++ if (mem->bo->tbo.pin_count)
++ continue;
++
+ amdgpu_bo_fence(mem->bo,
+ &process_info->eviction_fence->base, true);
+-
++ }
+ /* Attach eviction fence to PD / PT BOs */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+index 27b19503773b9..71354f505b84b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+@@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
+
+ if (!found)
+ return false;
++ pci_dev_put(pdev);
+
+ adev->bios = kmalloc(size, GFP_KERNEL);
+ if (!adev->bios) {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+index 15c45b2a39835..714178f1b6c6e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+@@ -61,7 +61,7 @@ static void amdgpu_bo_list_free(struct kref *ref)
+
+ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+- unsigned num_entries, struct amdgpu_bo_list **result)
++ size_t num_entries, struct amdgpu_bo_list **result)
+ {
+ unsigned last_entry = 0, first_userptr = num_entries;
+ struct amdgpu_bo_list_entry *array;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+index c905a4cfc173d..044b41f0bfd9c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+@@ -61,7 +61,7 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+ int amdgpu_bo_list_create(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+- unsigned num_entries,
++ size_t num_entries,
+ struct amdgpu_bo_list **list);
+
+ static inline struct amdgpu_bo_list_entry *
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+index b9c11c2b2885a..c777aff164b76 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+@@ -175,7 +175,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
+
+ /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
+ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
+- if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) &&
++ if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) &&
+ (mode_clock * 5/4 <= max_tmds_clock))
+ bpc = 10;
+ else
+@@ -315,8 +315,10 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector)
+ if (!amdgpu_connector->edid) {
+ /* some laptops provide a hardcoded edid in rom for LCDs */
+ if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) ||
+- (connector->connector_type == DRM_MODE_CONNECTOR_eDP)))
++ (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) {
+ amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev);
++ drm_connector_update_edid_property(connector, amdgpu_connector->edid);
++ }
+ }
+ }
+
+@@ -387,6 +389,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
+ native_mode->vdisplay != 0 &&
+ native_mode->clock != 0) {
+ mode = drm_mode_duplicate(dev, native_mode);
++ if (!mode)
++ return NULL;
++
+ mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+ drm_mode_set_name(mode);
+
+@@ -401,6 +406,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
+ * simpler.
+ */
+ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
++ if (!mode)
++ return NULL;
++
+ mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+ DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
+ }
+@@ -827,6 +835,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
+
+ amdgpu_connector_get_edid(connector);
+ ret = amdgpu_connector_ddc_get_modes(connector);
++ amdgpu_get_native_mode(connector);
+
+ return ret;
+ }
+@@ -1664,10 +1673,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
+ adev->mode_info.dither_property,
+ AMDGPU_FMT_DITHER_DISABLE);
+
+- if (amdgpu_audio != 0)
++ if (amdgpu_audio != 0) {
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
++ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
++ }
+
+ subpixel_order = SubPixelHorizontalRGB;
+ connector->interlace_allowed = true;
+@@ -1789,6 +1800,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
++ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.dither_property,
+@@ -1842,6 +1854,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
++ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.dither_property,
+@@ -1892,6 +1905,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
++ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.dither_property,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index 913f9eaa9cd65..4b01188385b28 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -115,7 +115,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
+ int ret;
+
+ if (cs->in.num_chunks == 0)
+- return 0;
++ return -EINVAL;
+
+ chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
+ if (!chunk_array)
+@@ -1508,6 +1508,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
+ return 0;
+
+ default:
++ dma_fence_put(fence);
+ return -EINVAL;
+ }
+ }
+@@ -1540,15 +1541,15 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
+ continue;
+
+ r = dma_fence_wait_timeout(fence, true, timeout);
++ if (r > 0 && fence->error)
++ r = fence->error;
++
+ dma_fence_put(fence);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+-
+- if (fence->error)
+- return fence->error;
+ }
+
+ memset(wait, 0, sizeof(*wait));
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+index 463b9c0283f7e..348629ea0e153 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+@@ -27,7 +27,6 @@
+ #include <linux/pci.h>
+ #include <linux/uaccess.h>
+ #include <linux/pm_runtime.h>
+-#include <linux/poll.h>
+
+ #include "amdgpu.h"
+ #include "amdgpu_pm.h"
+@@ -37,85 +36,7 @@
+ #include "amdgpu_securedisplay.h"
+ #include "amdgpu_fw_attestation.h"
+
+-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
+-{
+ #if defined(CONFIG_DEBUG_FS)
+- unsigned long timeout = 600 * HZ;
+- int ret;
+-
+- wake_up_interruptible(&adev->autodump.gpu_hang);
+-
+- ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
+- if (ret == 0) {
+- pr_err("autodump: timeout, move on to gpu recovery\n");
+- return -ETIMEDOUT;
+- }
+-#endif
+- return 0;
+-}
+-
+-#if defined(CONFIG_DEBUG_FS)
+-
+-static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
+-{
+- struct amdgpu_device *adev = inode->i_private;
+- int ret;
+-
+- file->private_data = adev;
+-
+- ret = down_read_killable(&adev->reset_sem);
+- if (ret)
+- return ret;
+-
+- if (adev->autodump.dumping.done) {
+- reinit_completion(&adev->autodump.dumping);
+- ret = 0;
+- } else {
+- ret = -EBUSY;
+- }
+-
+- up_read(&adev->reset_sem);
+-
+- return ret;
+-}
+-
+-static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
+-{
+- struct amdgpu_device *adev = file->private_data;
+-
+- complete_all(&adev->autodump.dumping);
+- return 0;
+-}
+-
+-static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
+-{
+- struct amdgpu_device *adev = file->private_data;
+-
+- poll_wait(file, &adev->autodump.gpu_hang, poll_table);
+-
+- if (amdgpu_in_reset(adev))
+- return POLLIN | POLLRDNORM | POLLWRNORM;
+-
+- return 0;
+-}
+-
+-static const struct file_operations autodump_debug_fops = {
+- .owner = THIS_MODULE,
+- .open = amdgpu_debugfs_autodump_open,
+- .poll = amdgpu_debugfs_autodump_poll,
+- .release = amdgpu_debugfs_autodump_release,
+-};
+-
+-static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
+-{
+- init_completion(&adev->autodump.dumping);
+- complete_all(&adev->autodump.dumping);
+- init_waitqueue_head(&adev->autodump.gpu_hang);
+-
+- debugfs_create_file("amdgpu_autodump", 0600,
+- adev_to_drm(adev)->primary->debugfs_root,
+- adev, &autodump_debug_fops);
+-}
+
+ /**
+ * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
+@@ -1255,7 +1176,7 @@ static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
+ return r;
+ }
+
+- *val = amdgpu_bo_evict_vram(adev);
++ *val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+@@ -1268,17 +1189,15 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+- struct ttm_resource_manager *man;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
++ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+ }
+
+- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+- *val = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
++ *val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+@@ -1588,7 +1507,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
+ }
+
+ amdgpu_ras_debugfs_create_all(adev);
+- amdgpu_debugfs_autodump_init(adev);
+ amdgpu_rap_debugfs_init(adev);
+ amdgpu_securedisplay_debugfs_init(adev);
+ amdgpu_fw_attestation_debugfs_init(adev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+index 141a8474e24f2..8b641f40fdf66 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+@@ -26,10 +26,6 @@
+ /*
+ * Debugfs
+ */
+-struct amdgpu_autodump {
+- struct completion dumping;
+- struct wait_queue_head gpu_hang;
+-};
+
+ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
+ int amdgpu_debugfs_init(struct amdgpu_device *adev);
+@@ -37,4 +33,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
+ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
+ void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+ void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index af9bdf16eefd4..8b6b47fd9b880 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -30,7 +30,9 @@
+ #include <linux/module.h>
+ #include <linux/console.h>
+ #include <linux/slab.h>
++#include <linux/pci.h>
+
++#include <drm/drm_aperture.h>
+ #include <drm/drm_atomic_helper.h>
+ #include <drm/drm_probe_helper.h>
+ #include <drm/amdgpu_drm.h>
+@@ -73,6 +75,10 @@
+
+ #include <drm/drm_drv.h>
+
++#if IS_ENABLED(CONFIG_X86)
++#include <asm/intel-family.h>
++#endif
++
+ MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
+ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
+ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
+@@ -88,6 +94,8 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin");
+
+ #define AMDGPU_RESUME_MS 2000
+
++static const struct drm_driver amdgpu_kms_driver;
++
+ const char *amdgpu_asic_name[] = {
+ "TAHITI",
+ "PITCAIRN",
+@@ -1187,6 +1195,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
+ u16 cmd;
+ int r;
+
++ if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
++ return 0;
++
+ /* Bypass for VF */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+@@ -1308,6 +1319,42 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
+ return true;
+ }
+
++/**
++ * amdgpu_device_should_use_aspm - check if the device should program ASPM
++ *
++ * @adev: amdgpu_device pointer
++ *
++ * Confirm whether the module parameter and pcie bridge agree that ASPM should
++ * be set for this device.
++ *
++ * Returns true if it should be used or false if not.
++ */
++bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
++{
++ switch (amdgpu_aspm) {
++ case -1:
++ break;
++ case 0:
++ return false;
++ case 1:
++ return true;
++ default:
++ return false;
++ }
++ return pcie_aspm_enabled(adev->pdev);
++}
++
++bool amdgpu_device_aspm_support_quirk(void)
++{
++#if IS_ENABLED(CONFIG_X86)
++ struct cpuinfo_x86 *c = &cpu_data(0);
++
++ return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
++#else
++ return true;
++#endif
++}
++
+ /* if we get transitioned to only one device, take VGA back */
+ /**
+ * amdgpu_device_vga_set_decode - enable/disable vga decode
+@@ -2069,6 +2116,8 @@ out:
+ */
+ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
+ {
++ struct drm_device *dev = adev_to_drm(adev);
++ struct pci_dev *parent;
+ int i, r;
+
+ amdgpu_device_enable_virtual_display(adev);
+@@ -2168,6 +2217,18 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
+ return -EINVAL;
+ }
+
++ if (amdgpu_has_atpx() &&
++ (amdgpu_is_atpx_hybrid() ||
++ amdgpu_has_atpx_dgpu_power_cntl()) &&
++ ((adev->flags & AMD_IS_APU) == 0) &&
++ !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
++ adev->flags |= AMD_IS_PX;
++
++ if (!(adev->flags & AMD_IS_APU)) {
++ parent = pci_upstream_bridge(adev->pdev);
++ adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
++ }
++
+ amdgpu_amdkfd_device_probe(adev);
+
+ adev->pm.pp_feature = amdgpu_pp_feature_mask;
+@@ -2348,8 +2409,20 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ }
+ adev->ip_blocks[i].status.sw = true;
+
+- /* need to do gmc hw init early so we can allocate gpu mem */
+- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
++ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
++ /* need to do common hw init early so everything is set up for gmc */
++ r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
++ if (r) {
++ DRM_ERROR("hw_init %d failed %d\n", i, r);
++ goto init_failed;
++ }
++ adev->ip_blocks[i].status.hw = true;
++ } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
++ /* need to do gmc hw init early so we can allocate gpu mem */
++ /* Try to reserve bad pages early */
++ if (amdgpu_sriov_vf(adev))
++ amdgpu_virt_exchange_data(adev);
++
+ r = amdgpu_device_vram_scratch_init(adev);
+ if (r) {
+ DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
+@@ -2394,6 +2467,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ if (r)
+ goto init_failed;
+
++ r = amdgpu_amdkfd_resume_iommu(adev);
++ if (r)
++ goto init_failed;
++
+ r = amdgpu_device_ip_hw_init_phase1(adev);
+ if (r)
+ goto init_failed;
+@@ -2432,15 +2509,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ if (!adev->gmc.xgmi.pending_reset)
+ amdgpu_amdkfd_device_init(adev);
+
+- r = amdgpu_amdkfd_resume_iommu(adev);
+- if (r)
+- goto init_failed;
+-
+ amdgpu_fru_get_product_info(adev);
+
+ init_failed:
+- if (amdgpu_sriov_vf(adev))
+- amdgpu_virt_release_full_gpu(adev, true);
+
+ return r;
+ }
+@@ -2745,6 +2816,11 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+ adev->ip_blocks[i].status.hw = false;
+ }
+
++ if (amdgpu_sriov_vf(adev)) {
++ if (amdgpu_virt_release_full_gpu(adev, false))
++ DRM_ERROR("failed to release exclusive mode on fini\n");
++ }
++
+ return 0;
+ }
+
+@@ -2805,10 +2881,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+
+ amdgpu_ras_fini(adev);
+
+- if (amdgpu_sriov_vf(adev))
+- if (amdgpu_virt_release_full_gpu(adev, false))
+- DRM_ERROR("failed to release exclusive mode on fini\n");
+-
+ return 0;
+ }
+
+@@ -2992,8 +3064,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
+ int i, r;
+
+ static enum amd_ip_block_type ip_order[] = {
+- AMD_IP_BLOCK_TYPE_GMC,
+ AMD_IP_BLOCK_TYPE_COMMON,
++ AMD_IP_BLOCK_TYPE_GMC,
+ AMD_IP_BLOCK_TYPE_PSP,
+ AMD_IP_BLOCK_TYPE_IH,
+ };
+@@ -3084,7 +3156,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
++ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
++ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
+
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+@@ -3131,6 +3204,15 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = true;
++
++ if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
++ /* disable gfxoff for IP resume. The gfxoff will be re-enabled in
++ * amdgpu_device_resume() after IP resume.
++ */
++ amdgpu_gfx_off_ctrl(adev, false);
++ DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
++ }
++
+ }
+
+ return 0;
+@@ -3415,6 +3497,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ int r, i;
+ bool px = false;
+ u32 max_MBps;
++ int tmp;
+
+ adev->shutdown = false;
+ adev->flags = flags;
+@@ -3531,6 +3614,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ adev->rmmio_size = pci_resource_len(adev->pdev, 2);
+ }
+
++ for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
++ atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
++
+ adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
+ if (adev->rmmio == NULL) {
+ return -ENOMEM;
+@@ -3571,6 +3657,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ if (r)
+ return r;
+
++ /* Get rid of things like offb */
++ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
++ if (r)
++ return r;
++
+ /* doorbell bar mapping and doorbell index init*/
+ amdgpu_device_doorbell_init(adev);
+
+@@ -3606,7 +3697,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ }
+ }
+ } else {
++ tmp = amdgpu_reset_method;
++ /* It should do a default reset when loading or reloading the driver,
++ * regardless of the module parameter reset_method.
++ */
++ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
++ amdgpu_reset_method = tmp;
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
+@@ -3666,18 +3763,6 @@ fence_driver_init:
+
+ r = amdgpu_device_ip_init(adev);
+ if (r) {
+- /* failed in exclusive mode due to timeout */
+- if (amdgpu_sriov_vf(adev) &&
+- !amdgpu_sriov_runtime(adev) &&
+- amdgpu_virt_mmio_blocked(adev) &&
+- !amdgpu_virt_wait_reset(adev)) {
+- dev_err(adev->dev, "VF exclusive mode timeout\n");
+- /* Don't send request since VF is inactive. */
+- adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+- adev->virt.ops = NULL;
+- r = -EAGAIN;
+- goto release_ras_con;
+- }
+ dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
+ goto release_ras_con;
+@@ -3756,8 +3841,10 @@ fence_driver_init:
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
+ }
+
+- if (amdgpu_sriov_vf(adev))
++ if (amdgpu_sriov_vf(adev)) {
++ amdgpu_virt_release_full_gpu(adev, true);
+ flush_delayed_work(&adev->delayed_init_work);
++ }
+
+ r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ if (r)
+@@ -3792,6 +3879,20 @@ fence_driver_init:
+ return 0;
+
+ release_ras_con:
++ if (amdgpu_sriov_vf(adev))
++ amdgpu_virt_release_full_gpu(adev, true);
++
++ /* failed in exclusive mode due to timeout */
++ if (amdgpu_sriov_vf(adev) &&
++ !amdgpu_sriov_runtime(adev) &&
++ amdgpu_virt_mmio_blocked(adev) &&
++ !amdgpu_virt_wait_reset(adev)) {
++ dev_err(adev->dev, "VF exclusive mode timeout\n");
++ /* Don't send request since VF is inactive. */
++ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
++ adev->virt.ops = NULL;
++ r = -EAGAIN;
++ }
+ amdgpu_release_ras_context(adev);
+
+ failed:
+@@ -3850,7 +3951,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
+ /* disable all interrupts */
+ amdgpu_irq_disable_all(adev);
+ if (adev->mode_info.mode_config_initialized){
+- if (!amdgpu_device_has_dc_support(adev))
++ if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
+ drm_helper_force_disable_all(adev_to_drm(adev));
+ else
+ drm_atomic_helper_shutdown(adev_to_drm(adev));
+@@ -3876,8 +3977,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
+
+ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+ {
+- amdgpu_device_ip_fini(adev);
+ amdgpu_fence_driver_sw_fini(adev);
++ amdgpu_device_ip_fini(adev);
+ release_firmware(adev->firmware.gpu_info_fw);
+ adev->firmware.gpu_info_fw = NULL;
+ adev->accel_working = false;
+@@ -3909,6 +4010,25 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+
+ }
+
++/**
++ * amdgpu_device_evict_resources - evict device resources
++ * @adev: amdgpu device object
++ *
++ * Evicts all ttm device resources(vram BOs, gart table) from the lru list
++ * of the vram memory type. Mainly used for evicting device resources
++ * at suspend time.
++ *
++ */
++static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
++{
++ /* No need to evict vram on APUs for suspend to ram or s2idle */
++ if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
++ return;
++
++ if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
++ DRM_WARN("evicting device resources failed\n");
++
++}
+
+ /*
+ * Suspend & resume.
+@@ -3926,12 +4046,20 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+ {
+ struct amdgpu_device *adev = drm_to_adev(dev);
++ int r = 0;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ adev->in_suspend = true;
+
++ if (amdgpu_sriov_vf(adev)) {
++ amdgpu_virt_fini_data_exchange(adev);
++ r = amdgpu_virt_request_full_gpu(adev, false);
++ if (r)
++ return r;
++ }
++
+ if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
+ DRM_WARN("smart shift update failed\n");
+
+@@ -3941,6 +4069,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+ amdgpu_fbdev_set_suspend(adev, 1);
+
+ cancel_delayed_work_sync(&adev->delayed_init_work);
++ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
+ amdgpu_ras_suspend(adev);
+
+@@ -3949,17 +4078,19 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+ if (!adev->in_s0ix)
+ amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+
+- /* evict vram memory */
+- amdgpu_bo_evict_vram(adev);
++ /* First evict vram memory */
++ amdgpu_device_evict_resources(adev);
+
+ amdgpu_fence_driver_hw_fini(adev);
+
+ amdgpu_device_ip_suspend_phase2(adev);
+- /* evict remaining vram memory
+- * This second call to evict vram is to evict the gart page table
+- * using the CPU.
++ /* This second call to evict device resources is to evict
++ * the gart page table using the CPU.
+ */
+- amdgpu_bo_evict_vram(adev);
++ amdgpu_device_evict_resources(adev);
++
++ if (amdgpu_sriov_vf(adev))
++ amdgpu_virt_release_full_gpu(adev, false);
+
+ return 0;
+ }
+@@ -3979,6 +4110,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r = 0;
+
++ if (amdgpu_sriov_vf(adev)) {
++ r = amdgpu_virt_request_full_gpu(adev, true);
++ if (r)
++ return r;
++ }
++
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+@@ -3993,6 +4130,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+ }
+
+ r = amdgpu_device_ip_resume(adev);
++
++ /* no matter what r is, always need to properly release full GPU */
++ if (amdgpu_sriov_vf(adev)) {
++ amdgpu_virt_init_data_exchange(adev);
++ amdgpu_virt_release_full_gpu(adev, true);
++ }
++
+ if (r) {
+ dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
+ return r;
+@@ -4015,6 +4159,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+ /* Make sure IB tests flushed */
+ flush_delayed_work(&adev->delayed_init_work);
+
++ if (adev->in_s0ix) {
++ /* re-enable gfxoff after IP resume. This re-enables gfxoff after
++ * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
++ */
++ amdgpu_gfx_off_ctrl(adev, true);
++ DRM_DEBUG("will enable gfxoff for the mission mode\n");
++ }
+ if (fbcon)
+ amdgpu_fbdev_set_suspend(adev, 0);
+
+@@ -4230,7 +4381,11 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
+ dev_info(adev->dev, "recover vram bo from shadow start\n");
+ mutex_lock(&adev->shadow_list_lock);
+ list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
+- shadow = &vmbo->bo;
++ /* If vm is compute context or adev is APU, shadow will be NULL */
++ if (!vmbo->shadow)
++ continue;
++ shadow = vmbo->shadow;
++
+ /* No need to recover an evicted BO */
+ if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
+ shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
+@@ -4466,10 +4621,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
+ if (reset_context->reset_req_dev == adev)
+ job = reset_context->job;
+
+- /* no need to dump if device is not in good state during probe period */
+- if (!adev->gmc.xgmi.pending_reset)
+- amdgpu_debugfs_wait_dump(adev);
+-
+ if (amdgpu_sriov_vf(adev)) {
+ /* stop the data exchange thread */
+ amdgpu_virt_fini_data_exchange(adev);
+@@ -4791,6 +4942,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+ pm_runtime_enable(&(p->dev));
+ pm_runtime_resume(&(p->dev));
+ }
++
++ pci_dev_put(p);
+ }
+
+ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+@@ -4829,6 +4982,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+
+ if (expires < ktime_get_mono_fast_ns()) {
+ dev_warn(adev->dev, "failed to suspend display audio\n");
++ pci_dev_put(p);
+ /* TODO: abort the succeeding gpu reset? */
+ return -ETIMEDOUT;
+ }
+@@ -4836,6 +4990,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+
+ pm_runtime_disable(&(p->dev));
+
++ pci_dev_put(p);
+ return 0;
+ }
+
+@@ -5130,7 +5285,7 @@ skip_hw_reset:
+ drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+ }
+
+- if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
++ if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
+ drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
+ }
+
+@@ -5610,7 +5765,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+ #ifdef CONFIG_X86_64
+- if (adev->flags & AMD_IS_APU)
++ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+ return;
+ #endif
+ if (adev->gmc.xgmi.connected_to_cpu)
+@@ -5626,7 +5781,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+ #ifdef CONFIG_X86_64
+- if (adev->flags & AMD_IS_APU)
++ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+ return;
+ #endif
+ if (adev->gmc.xgmi.connected_to_cpu)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+index ada7bc19118ac..a919f5daacd91 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+@@ -415,10 +415,15 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+ }
+ }
+
++union gc_info {
++ struct gc_info_v1_0 v1;
++ struct gc_info_v2_0 v2;
++};
++
+ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+ {
+ struct binary_header *bhdr;
+- struct gc_info_v1_0 *gc_info;
++ union gc_info *gc_info;
+
+ if (!adev->mman.discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+@@ -426,27 +431,54 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+- gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
++ gc_info = (union gc_info *)(adev->mman.discovery_bin +
+ le16_to_cpu(bhdr->table_list[GC].offset));
+-
+- adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
+- adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
+- le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
+- adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
+- adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
+- adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
+- adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
+- adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
+- adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
+- adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
+- adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
+- adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
+- adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
+- adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
+- adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
+- adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
+- le32_to_cpu(gc_info->gc_num_sa_per_se);
+- adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
+-
++ switch (gc_info->v1.header.version_major) {
++ case 1:
++ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
++ adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
++ le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
++ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
++ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
++ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
++ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
++ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
++ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
++ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
++ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
++ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
++ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
++ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
++ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
++ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
++ le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
++ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
++ break;
++ case 2:
++ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
++ adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
++ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
++ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
++ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
++ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
++ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
++ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
++ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
++ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
++ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
++ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
++ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
++ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
++ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
++ le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
++ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
++ break;
++ default:
++ dev_err(adev->dev,
++ "Unhandled GC info table %d.%d\n",
++ gc_info->v1.header.version_major,
++ gc_info->v1.header.version_minor);
++ return -EINVAL;
++ }
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+index dc50c05f23fc2..d2286a83e302f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+@@ -1110,6 +1110,7 @@ int amdgpu_display_gem_fb_verify_and_init(
+ goto err;
+
+ ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
++
+ if (ret)
+ goto err;
+
+@@ -1145,7 +1146,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ if (ret)
+ return ret;
+
+- if (!dev->mode_config.allow_fb_modifiers) {
++ if (!dev->mode_config.allow_fb_modifiers && !adev->enable_virtual_display) {
+ drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
+ "GFX9+ requires FB check based on format modifier\n");
+ ret = check_tiling_flags_gfx6(rfb);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+index ae6ab93c868b8..7444484a12bf8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+@@ -384,7 +384,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+ struct amdgpu_vm_bo_base *bo_base;
+ int r;
+
+- if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
++ if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
+ return;
+
+ r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index f18240f873878..deae92fde3b88 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -23,7 +23,6 @@
+ */
+
+ #include <drm/amdgpu_drm.h>
+-#include <drm/drm_aperture.h>
+ #include <drm/drm_drv.h>
+ #include <drm/drm_gem.h>
+ #include <drm/drm_vblank.h>
+@@ -38,6 +37,7 @@
+ #include <drm/drm_probe_helper.h>
+ #include <linux/mmu_notifier.h>
+ #include <linux/suspend.h>
++#include <linux/fb.h>
+
+ #include "amdgpu.h"
+ #include "amdgpu_irq.h"
+@@ -679,7 +679,7 @@ MODULE_PARM_DESC(sched_policy,
+ * Maximum number of processes that HWS can schedule concurrently. The maximum is the
+ * number of VMIDs assigned to the HWS, which is also the default.
+ */
+-int hws_max_conc_proc = 8;
++int hws_max_conc_proc = -1;
+ module_param(hws_max_conc_proc, int, 0444);
+ MODULE_PARM_DESC(hws_max_conc_proc,
+ "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+@@ -890,6 +890,718 @@ MODULE_PARM_DESC(smu_pptable_id,
+ "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
+ module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
+
++/* These devices are not supported by amdgpu.
++ * They are supported by the mach64, r128, radeon drivers
++ */
++static const u16 amdgpu_unsupported_pciidlist[] = {
++ /* mach64 */
++ 0x4354,
++ 0x4358,
++ 0x4554,
++ 0x4742,
++ 0x4744,
++ 0x4749,
++ 0x474C,
++ 0x474D,
++ 0x474E,
++ 0x474F,
++ 0x4750,
++ 0x4751,
++ 0x4752,
++ 0x4753,
++ 0x4754,
++ 0x4755,
++ 0x4756,
++ 0x4757,
++ 0x4758,
++ 0x4759,
++ 0x475A,
++ 0x4C42,
++ 0x4C44,
++ 0x4C47,
++ 0x4C49,
++ 0x4C4D,
++ 0x4C4E,
++ 0x4C50,
++ 0x4C51,
++ 0x4C52,
++ 0x4C53,
++ 0x5654,
++ 0x5655,
++ 0x5656,
++ /* r128 */
++ 0x4c45,
++ 0x4c46,
++ 0x4d46,
++ 0x4d4c,
++ 0x5041,
++ 0x5042,
++ 0x5043,
++ 0x5044,
++ 0x5045,
++ 0x5046,
++ 0x5047,
++ 0x5048,
++ 0x5049,
++ 0x504A,
++ 0x504B,
++ 0x504C,
++ 0x504D,
++ 0x504E,
++ 0x504F,
++ 0x5050,
++ 0x5051,
++ 0x5052,
++ 0x5053,
++ 0x5054,
++ 0x5055,
++ 0x5056,
++ 0x5057,
++ 0x5058,
++ 0x5245,
++ 0x5246,
++ 0x5247,
++ 0x524b,
++ 0x524c,
++ 0x534d,
++ 0x5446,
++ 0x544C,
++ 0x5452,
++ /* radeon */
++ 0x3150,
++ 0x3151,
++ 0x3152,
++ 0x3154,
++ 0x3155,
++ 0x3E50,
++ 0x3E54,
++ 0x4136,
++ 0x4137,
++ 0x4144,
++ 0x4145,
++ 0x4146,
++ 0x4147,
++ 0x4148,
++ 0x4149,
++ 0x414A,
++ 0x414B,
++ 0x4150,
++ 0x4151,
++ 0x4152,
++ 0x4153,
++ 0x4154,
++ 0x4155,
++ 0x4156,
++ 0x4237,
++ 0x4242,
++ 0x4336,
++ 0x4337,
++ 0x4437,
++ 0x4966,
++ 0x4967,
++ 0x4A48,
++ 0x4A49,
++ 0x4A4A,
++ 0x4A4B,
++ 0x4A4C,
++ 0x4A4D,
++ 0x4A4E,
++ 0x4A4F,
++ 0x4A50,
++ 0x4A54,
++ 0x4B48,
++ 0x4B49,
++ 0x4B4A,
++ 0x4B4B,
++ 0x4B4C,
++ 0x4C57,
++ 0x4C58,
++ 0x4C59,
++ 0x4C5A,
++ 0x4C64,
++ 0x4C66,
++ 0x4C67,
++ 0x4E44,
++ 0x4E45,
++ 0x4E46,
++ 0x4E47,
++ 0x4E48,
++ 0x4E49,
++ 0x4E4A,
++ 0x4E4B,
++ 0x4E50,
++ 0x4E51,
++ 0x4E52,
++ 0x4E53,
++ 0x4E54,
++ 0x4E56,
++ 0x5144,
++ 0x5145,
++ 0x5146,
++ 0x5147,
++ 0x5148,
++ 0x514C,
++ 0x514D,
++ 0x5157,
++ 0x5158,
++ 0x5159,
++ 0x515A,
++ 0x515E,
++ 0x5460,
++ 0x5462,
++ 0x5464,
++ 0x5548,
++ 0x5549,
++ 0x554A,
++ 0x554B,
++ 0x554C,
++ 0x554D,
++ 0x554E,
++ 0x554F,
++ 0x5550,
++ 0x5551,
++ 0x5552,
++ 0x5554,
++ 0x564A,
++ 0x564B,
++ 0x564F,
++ 0x5652,
++ 0x5653,
++ 0x5657,
++ 0x5834,
++ 0x5835,
++ 0x5954,
++ 0x5955,
++ 0x5974,
++ 0x5975,
++ 0x5960,
++ 0x5961,
++ 0x5962,
++ 0x5964,
++ 0x5965,
++ 0x5969,
++ 0x5a41,
++ 0x5a42,
++ 0x5a61,
++ 0x5a62,
++ 0x5b60,
++ 0x5b62,
++ 0x5b63,
++ 0x5b64,
++ 0x5b65,
++ 0x5c61,
++ 0x5c63,
++ 0x5d48,
++ 0x5d49,
++ 0x5d4a,
++ 0x5d4c,
++ 0x5d4d,
++ 0x5d4e,
++ 0x5d4f,
++ 0x5d50,
++ 0x5d52,
++ 0x5d57,
++ 0x5e48,
++ 0x5e4a,
++ 0x5e4b,
++ 0x5e4c,
++ 0x5e4d,
++ 0x5e4f,
++ 0x6700,
++ 0x6701,
++ 0x6702,
++ 0x6703,
++ 0x6704,
++ 0x6705,
++ 0x6706,
++ 0x6707,
++ 0x6708,
++ 0x6709,
++ 0x6718,
++ 0x6719,
++ 0x671c,
++ 0x671d,
++ 0x671f,
++ 0x6720,
++ 0x6721,
++ 0x6722,
++ 0x6723,
++ 0x6724,
++ 0x6725,
++ 0x6726,
++ 0x6727,
++ 0x6728,
++ 0x6729,
++ 0x6738,
++ 0x6739,
++ 0x673e,
++ 0x6740,
++ 0x6741,
++ 0x6742,
++ 0x6743,
++ 0x6744,
++ 0x6745,
++ 0x6746,
++ 0x6747,
++ 0x6748,
++ 0x6749,
++ 0x674A,
++ 0x6750,
++ 0x6751,
++ 0x6758,
++ 0x6759,
++ 0x675B,
++ 0x675D,
++ 0x675F,
++ 0x6760,
++ 0x6761,
++ 0x6762,
++ 0x6763,
++ 0x6764,
++ 0x6765,
++ 0x6766,
++ 0x6767,
++ 0x6768,
++ 0x6770,
++ 0x6771,
++ 0x6772,
++ 0x6778,
++ 0x6779,
++ 0x677B,
++ 0x6840,
++ 0x6841,
++ 0x6842,
++ 0x6843,
++ 0x6849,
++ 0x684C,
++ 0x6850,
++ 0x6858,
++ 0x6859,
++ 0x6880,
++ 0x6888,
++ 0x6889,
++ 0x688A,
++ 0x688C,
++ 0x688D,
++ 0x6898,
++ 0x6899,
++ 0x689b,
++ 0x689c,
++ 0x689d,
++ 0x689e,
++ 0x68a0,
++ 0x68a1,
++ 0x68a8,
++ 0x68a9,
++ 0x68b0,
++ 0x68b8,
++ 0x68b9,
++ 0x68ba,
++ 0x68be,
++ 0x68bf,
++ 0x68c0,
++ 0x68c1,
++ 0x68c7,
++ 0x68c8,
++ 0x68c9,
++ 0x68d8,
++ 0x68d9,
++ 0x68da,
++ 0x68de,
++ 0x68e0,
++ 0x68e1,
++ 0x68e4,
++ 0x68e5,
++ 0x68e8,
++ 0x68e9,
++ 0x68f1,
++ 0x68f2,
++ 0x68f8,
++ 0x68f9,
++ 0x68fa,
++ 0x68fe,
++ 0x7100,
++ 0x7101,
++ 0x7102,
++ 0x7103,
++ 0x7104,
++ 0x7105,
++ 0x7106,
++ 0x7108,
++ 0x7109,
++ 0x710A,
++ 0x710B,
++ 0x710C,
++ 0x710E,
++ 0x710F,
++ 0x7140,
++ 0x7141,
++ 0x7142,
++ 0x7143,
++ 0x7144,
++ 0x7145,
++ 0x7146,
++ 0x7147,
++ 0x7149,
++ 0x714A,
++ 0x714B,
++ 0x714C,
++ 0x714D,
++ 0x714E,
++ 0x714F,
++ 0x7151,
++ 0x7152,
++ 0x7153,
++ 0x715E,
++ 0x715F,
++ 0x7180,
++ 0x7181,
++ 0x7183,
++ 0x7186,
++ 0x7187,
++ 0x7188,
++ 0x718A,
++ 0x718B,
++ 0x718C,
++ 0x718D,
++ 0x718F,
++ 0x7193,
++ 0x7196,
++ 0x719B,
++ 0x719F,
++ 0x71C0,
++ 0x71C1,
++ 0x71C2,
++ 0x71C3,
++ 0x71C4,
++ 0x71C5,
++ 0x71C6,
++ 0x71C7,
++ 0x71CD,
++ 0x71CE,
++ 0x71D2,
++ 0x71D4,
++ 0x71D5,
++ 0x71D6,
++ 0x71DA,
++ 0x71DE,
++ 0x7200,
++ 0x7210,
++ 0x7211,
++ 0x7240,
++ 0x7243,
++ 0x7244,
++ 0x7245,
++ 0x7246,
++ 0x7247,
++ 0x7248,
++ 0x7249,
++ 0x724A,
++ 0x724B,
++ 0x724C,
++ 0x724D,
++ 0x724E,
++ 0x724F,
++ 0x7280,
++ 0x7281,
++ 0x7283,
++ 0x7284,
++ 0x7287,
++ 0x7288,
++ 0x7289,
++ 0x728B,
++ 0x728C,
++ 0x7290,
++ 0x7291,
++ 0x7293,
++ 0x7297,
++ 0x7834,
++ 0x7835,
++ 0x791e,
++ 0x791f,
++ 0x793f,
++ 0x7941,
++ 0x7942,
++ 0x796c,
++ 0x796d,
++ 0x796e,
++ 0x796f,
++ 0x9400,
++ 0x9401,
++ 0x9402,
++ 0x9403,
++ 0x9405,
++ 0x940A,
++ 0x940B,
++ 0x940F,
++ 0x94A0,
++ 0x94A1,
++ 0x94A3,
++ 0x94B1,
++ 0x94B3,
++ 0x94B4,
++ 0x94B5,
++ 0x94B9,
++ 0x9440,
++ 0x9441,
++ 0x9442,
++ 0x9443,
++ 0x9444,
++ 0x9446,
++ 0x944A,
++ 0x944B,
++ 0x944C,
++ 0x944E,
++ 0x9450,
++ 0x9452,
++ 0x9456,
++ 0x945A,
++ 0x945B,
++ 0x945E,
++ 0x9460,
++ 0x9462,
++ 0x946A,
++ 0x946B,
++ 0x947A,
++ 0x947B,
++ 0x9480,
++ 0x9487,
++ 0x9488,
++ 0x9489,
++ 0x948A,
++ 0x948F,
++ 0x9490,
++ 0x9491,
++ 0x9495,
++ 0x9498,
++ 0x949C,
++ 0x949E,
++ 0x949F,
++ 0x94C0,
++ 0x94C1,
++ 0x94C3,
++ 0x94C4,
++ 0x94C5,
++ 0x94C6,
++ 0x94C7,
++ 0x94C8,
++ 0x94C9,
++ 0x94CB,
++ 0x94CC,
++ 0x94CD,
++ 0x9500,
++ 0x9501,
++ 0x9504,
++ 0x9505,
++ 0x9506,
++ 0x9507,
++ 0x9508,
++ 0x9509,
++ 0x950F,
++ 0x9511,
++ 0x9515,
++ 0x9517,
++ 0x9519,
++ 0x9540,
++ 0x9541,
++ 0x9542,
++ 0x954E,
++ 0x954F,
++ 0x9552,
++ 0x9553,
++ 0x9555,
++ 0x9557,
++ 0x955f,
++ 0x9580,
++ 0x9581,
++ 0x9583,
++ 0x9586,
++ 0x9587,
++ 0x9588,
++ 0x9589,
++ 0x958A,
++ 0x958B,
++ 0x958C,
++ 0x958D,
++ 0x958E,
++ 0x958F,
++ 0x9590,
++ 0x9591,
++ 0x9593,
++ 0x9595,
++ 0x9596,
++ 0x9597,
++ 0x9598,
++ 0x9599,
++ 0x959B,
++ 0x95C0,
++ 0x95C2,
++ 0x95C4,
++ 0x95C5,
++ 0x95C6,
++ 0x95C7,
++ 0x95C9,
++ 0x95CC,
++ 0x95CD,
++ 0x95CE,
++ 0x95CF,
++ 0x9610,
++ 0x9611,
++ 0x9612,
++ 0x9613,
++ 0x9614,
++ 0x9615,
++ 0x9616,
++ 0x9640,
++ 0x9641,
++ 0x9642,
++ 0x9643,
++ 0x9644,
++ 0x9645,
++ 0x9647,
++ 0x9648,
++ 0x9649,
++ 0x964a,
++ 0x964b,
++ 0x964c,
++ 0x964e,
++ 0x964f,
++ 0x9710,
++ 0x9711,
++ 0x9712,
++ 0x9713,
++ 0x9714,
++ 0x9715,
++ 0x9802,
++ 0x9803,
++ 0x9804,
++ 0x9805,
++ 0x9806,
++ 0x9807,
++ 0x9808,
++ 0x9809,
++ 0x980A,
++ 0x9900,
++ 0x9901,
++ 0x9903,
++ 0x9904,
++ 0x9905,
++ 0x9906,
++ 0x9907,
++ 0x9908,
++ 0x9909,
++ 0x990A,
++ 0x990B,
++ 0x990C,
++ 0x990D,
++ 0x990E,
++ 0x990F,
++ 0x9910,
++ 0x9913,
++ 0x9917,
++ 0x9918,
++ 0x9919,
++ 0x9990,
++ 0x9991,
++ 0x9992,
++ 0x9993,
++ 0x9994,
++ 0x9995,
++ 0x9996,
++ 0x9997,
++ 0x9998,
++ 0x9999,
++ 0x999A,
++ 0x999B,
++ 0x999C,
++ 0x999D,
++ 0x99A0,
++ 0x99A2,
++ 0x99A4,
++ /* radeon secondary ids */
++ 0x3171,
++ 0x3e70,
++ 0x4164,
++ 0x4165,
++ 0x4166,
++ 0x4168,
++ 0x4170,
++ 0x4171,
++ 0x4172,
++ 0x4173,
++ 0x496e,
++ 0x4a69,
++ 0x4a6a,
++ 0x4a6b,
++ 0x4a70,
++ 0x4a74,
++ 0x4b69,
++ 0x4b6b,
++ 0x4b6c,
++ 0x4c6e,
++ 0x4e64,
++ 0x4e65,
++ 0x4e66,
++ 0x4e67,
++ 0x4e68,
++ 0x4e69,
++ 0x4e6a,
++ 0x4e71,
++ 0x4f73,
++ 0x5569,
++ 0x556b,
++ 0x556d,
++ 0x556f,
++ 0x5571,
++ 0x5854,
++ 0x5874,
++ 0x5940,
++ 0x5941,
++ 0x5b70,
++ 0x5b72,
++ 0x5b73,
++ 0x5b74,
++ 0x5b75,
++ 0x5d44,
++ 0x5d45,
++ 0x5d6d,
++ 0x5d6f,
++ 0x5d72,
++ 0x5d77,
++ 0x5e6b,
++ 0x5e6d,
++ 0x7120,
++ 0x7124,
++ 0x7129,
++ 0x712e,
++ 0x712f,
++ 0x7162,
++ 0x7163,
++ 0x7166,
++ 0x7167,
++ 0x7172,
++ 0x7173,
++ 0x71a0,
++ 0x71a1,
++ 0x71a3,
++ 0x71a7,
++ 0x71bb,
++ 0x71e0,
++ 0x71e1,
++ 0x71e2,
++ 0x71e6,
++ 0x71e7,
++ 0x71f2,
++ 0x7269,
++ 0x726b,
++ 0x726e,
++ 0x72a0,
++ 0x72a8,
++ 0x72b1,
++ 0x72b3,
++ 0x793f,
++};
++
+ static const struct pci_device_id pciidlist[] = {
+ #ifdef CONFIG_DRM_AMDGPU_SI
+ {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+@@ -1224,10 +1936,10 @@ static const struct pci_device_id pciidlist[] = {
+ {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+
+ /* Aldebaran */
+- {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+- {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+- {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+- {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
++ {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
++ {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
++ {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
++ {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+
+ /* CYAN_SKILLFISH */
+ {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+@@ -1237,6 +1949,7 @@ static const struct pci_device_id pciidlist[] = {
+ {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
++ {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+
+ {0, 0, 0}
+@@ -1246,14 +1959,45 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
+
+ static const struct drm_driver amdgpu_kms_driver;
+
++static bool amdgpu_is_fw_framebuffer(resource_size_t base,
++ resource_size_t size)
++{
++ bool found = false;
++#if IS_REACHABLE(CONFIG_FB)
++ struct apertures_struct *a;
++
++ a = alloc_apertures(1);
++ if (!a)
++ return false;
++
++ a->ranges[0].base = base;
++ a->ranges[0].size = size;
++
++ found = is_firmware_framebuffer(a);
++ kfree(a);
++#endif
++ return found;
++}
++
+ static int amdgpu_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+ {
+ struct drm_device *ddev;
+ struct amdgpu_device *adev;
+ unsigned long flags = ent->driver_data;
+- int ret, retry = 0;
++ int ret, retry = 0, i;
+ bool supports_atomic = false;
++ bool is_fw_fb;
++ resource_size_t base, size;
++
++ if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev))
++ amdgpu_aspm = 0;
++
++ /* skip devices which are owned by radeon */
++ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
++ if (amdgpu_unsupported_pciidlist[i] == pdev->device)
++ return -ENODEV;
++ }
+
+ if (amdgpu_virtual_display ||
+ amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+@@ -1264,6 +2008,15 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+ "See modparam exp_hw_support\n");
+ return -ENODEV;
+ }
++ /* differentiate between P10 and P11 asics with the same DID */
++ if (pdev->device == 0x67FF &&
++ (pdev->revision == 0xE3 ||
++ pdev->revision == 0xE7 ||
++ pdev->revision == 0xF3 ||
++ pdev->revision == 0xF7)) {
++ flags &= ~AMD_ASIC_MASK;
++ flags |= CHIP_POLARIS10;
++ }
+
+ /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
+ * however, SME requires an indirect IOMMU mapping because the encryption
+@@ -1310,10 +2063,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+ }
+ #endif
+
+- /* Get rid of things like offb */
+- ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
+- if (ret)
+- return ret;
++ base = pci_resource_start(pdev, 0);
++ size = pci_resource_len(pdev, 0);
++ is_fw_fb = amdgpu_is_fw_framebuffer(base, size);
+
+ adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
+ if (IS_ERR(adev))
+@@ -1322,6 +2074,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+ adev->dev = &pdev->dev;
+ adev->pdev = pdev;
+ ddev = adev_to_drm(adev);
++ adev->is_fw_fb = is_fw_fb;
+
+ if (!supports_atomic)
+ ddev->driver_features &= ~DRIVER_ATOMIC;
+@@ -1332,12 +2085,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+
+ pci_set_drvdata(pdev, ddev);
+
+- ret = amdgpu_driver_load_kms(adev, ent->driver_data);
++ ret = amdgpu_driver_load_kms(adev, flags);
+ if (ret)
+ goto err_pci;
+
+ retry_init:
+- ret = drm_dev_register(ddev, ent->driver_data);
++ ret = drm_dev_register(ddev, flags);
+ if (ret == -EAGAIN && ++retry <= 3) {
+ DRM_INFO("retry init %d\n", retry);
+ /* Don't request EX mode too frequently which is attacking */
+@@ -1471,13 +2224,20 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
+ static int amdgpu_pmops_prepare(struct device *dev)
+ {
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
++ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ /* Return a positive number here so
+ * DPM_FLAG_SMART_SUSPEND works properly
+ */
+ if (amdgpu_device_supports_boco(drm_dev))
+- return pm_runtime_suspended(dev) &&
+- pm_suspend_via_firmware();
++ return pm_runtime_suspended(dev);
++
++ /* if we will not support s3 or s2i for the device
++ * then skip suspend
++ */
++ if (!amdgpu_acpi_is_s0ix_active(adev) &&
++ !amdgpu_acpi_is_s3_active(adev))
++ return 1;
+
+ return 0;
+ }
+@@ -1491,15 +2251,23 @@ static int amdgpu_pmops_suspend(struct device *dev)
+ {
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+- int r;
+
+ if (amdgpu_acpi_is_s0ix_active(adev))
+ adev->in_s0ix = true;
+- adev->in_s3 = true;
+- r = amdgpu_device_suspend(drm_dev, true);
+- adev->in_s3 = false;
++ else
++ adev->in_s3 = true;
++ return amdgpu_device_suspend(drm_dev, true);
++}
+
+- return r;
++static int amdgpu_pmops_suspend_noirq(struct device *dev)
++{
++ struct drm_device *drm_dev = dev_get_drvdata(dev);
++ struct amdgpu_device *adev = drm_to_adev(drm_dev);
++
++ if (amdgpu_acpi_should_gpu_reset(adev))
++ return amdgpu_asic_reset(adev);
++
++ return 0;
+ }
+
+ static int amdgpu_pmops_resume(struct device *dev)
+@@ -1511,6 +2279,8 @@ static int amdgpu_pmops_resume(struct device *dev)
+ r = amdgpu_device_resume(drm_dev, true);
+ if (amdgpu_acpi_is_s0ix_active(adev))
+ adev->in_s0ix = false;
++ else
++ adev->in_s3 = false;
+ return r;
+ }
+
+@@ -1575,12 +2345,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
+ if (amdgpu_device_supports_px(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
++ /*
++ * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
++ * proper cleanups and put itself into a state ready for PNP. That
++ * can address some random resuming failure observed on BOCO capable
++ * platforms.
++ * TODO: this may be also needed for PX capable platform.
++ */
++ if (amdgpu_device_supports_boco(drm_dev))
++ adev->mp1_state = PP_MP1_STATE_UNLOAD;
++
+ ret = amdgpu_device_suspend(drm_dev, false);
+ if (ret) {
+ adev->in_runpm = false;
++ if (amdgpu_device_supports_boco(drm_dev))
++ adev->mp1_state = PP_MP1_STATE_NONE;
+ return ret;
+ }
+
++ if (amdgpu_device_supports_boco(drm_dev))
++ adev->mp1_state = PP_MP1_STATE_NONE;
++
+ if (amdgpu_device_supports_px(drm_dev)) {
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+@@ -1634,8 +2419,11 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
+ amdgpu_device_baco_exit(drm_dev);
+ }
+ ret = amdgpu_device_resume(drm_dev, false);
+- if (ret)
++ if (ret) {
++ if (amdgpu_device_supports_px(drm_dev))
++ pci_disable_device(pdev);
+ return ret;
++ }
+
+ if (amdgpu_device_supports_px(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+@@ -1719,6 +2507,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
+ .prepare = amdgpu_pmops_prepare,
+ .complete = amdgpu_pmops_complete,
+ .suspend = amdgpu_pmops_suspend,
++ .suspend_noirq = amdgpu_pmops_suspend_noirq,
+ .resume = amdgpu_pmops_resume,
+ .freeze = amdgpu_pmops_freeze,
+ .thaw = amdgpu_pmops_thaw,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+index cd0acbea75da6..d58ab9deb0280 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+@@ -341,7 +341,8 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
+ }
+
+ /* disable all the possible outputs/crtcs before entering KMS mode */
+- if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display)
++ if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display &&
++ !amdgpu_sriov_vf(adev))
+ drm_helper_disable_unused_functions(adev_to_drm(adev));
+
+ drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index 8d682befe0d68..8599e0ffa8292 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -552,9 +552,6 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
+ if (!ring || !ring->fence_drv.initialized)
+ continue;
+
+- if (!ring->no_scheduler)
+- drm_sched_stop(&ring->sched, NULL);
+-
+ /* You can't wait for HW to signal if it's gone */
+ if (!drm_dev_is_unplugged(&adev->ddev))
+ r = amdgpu_fence_wait_empty(ring);
+@@ -564,7 +561,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
+ if (r)
+ amdgpu_fence_driver_force_completion(ring);
+
+- if (ring->fence_drv.irq_src)
++ if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
++ ring->fence_drv.irq_src)
+ amdgpu_irq_put(adev, ring->fence_drv.irq_src,
+ ring->fence_drv.irq_type);
+
+@@ -582,7 +580,13 @@ void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
+ if (!ring || !ring->fence_drv.initialized)
+ continue;
+
+- if (!ring->no_scheduler)
++ /*
++ * Notice we check for sched.ops since there's some
++ * override on the meaning of sched.ready by amdgpu.
++ * The natural check would be sched.ready, which is
++ * set as drm_sched_init() finishes...
++ */
++ if (ring->sched.ops)
+ drm_sched_fini(&ring->sched);
+
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+@@ -614,11 +618,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
+ if (!ring || !ring->fence_drv.initialized)
+ continue;
+
+- if (!ring->no_scheduler) {
+- drm_sched_resubmit_jobs(&ring->sched);
+- drm_sched_start(&ring->sched, true);
+- }
+-
+ /* enable the interrupt */
+ if (ring->fence_drv.irq_src)
+ amdgpu_irq_get(adev, ring->fence_drv.irq_src,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+index d6aa032890ee8..13ca51ff8bd0b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+@@ -61,7 +61,7 @@ static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
+ }
+
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+- TTM_BO_VM_NUM_PREFAULT, 1);
++ TTM_BO_VM_NUM_PREFAULT);
+
+ drm_dev_exit(idx);
+ } else {
+@@ -419,11 +419,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+ if (r)
+ goto release_object;
+
+- if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) {
+- r = amdgpu_mn_register(bo, args->addr);
+- if (r)
+- goto release_object;
+- }
++ r = amdgpu_mn_register(bo, args->addr);
++ if (r)
++ goto release_object;
+
+ if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
+ r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+index 1916ec84dd71f..252712f930f4e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+@@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
+ * adev->gfx.mec.num_pipe_per_mec
+ * adev->gfx.mec.num_queue_per_pipe;
+
+- while (queue_bit-- >= 0) {
++ while (--queue_bit >= 0) {
+ if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+ continue;
+
+@@ -579,9 +579,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+
+ if (adev->gfx.gfx_off_req_count == 0 &&
+ !adev->gfx.gfx_off_state) {
+- /* If going to s2idle, no need to wait */
+- if (adev->in_s0ix)
+- delay = GFX_OFF_NO_DELAY;
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ delay);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+index f3d62e196901a..0c7963dfacad1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+@@ -223,7 +223,7 @@ int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
+ */
+ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+ {
+- unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
++ unsigned int count;
+ u32 wptr;
+
+ if (!ih->enabled || adev->shutdown)
+@@ -232,6 +232,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+ wptr = amdgpu_ih_get_wptr(adev, ih);
+
+ restart_ih:
++ count = AMDGPU_IH_MAX_NUM_IVS;
+ DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
+
+ /* Order reading of wptr vs. reading of IH ring data */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index 7e45640fbee02..e8485b1f02ed6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -43,6 +43,17 @@
+ #include "amdgpu_display.h"
+ #include "amdgpu_ras.h"
+
++static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev)
++{
++ /*
++ * Add below quirk on several sienna_cichlid cards to disable
++ * runtime pm to fix EMI failures.
++ */
++ if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) ||
++ ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF)))
++ adev->runpm = false;
++}
++
+ void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+ {
+ struct amdgpu_gpu_instance *gpu_instance;
+@@ -152,21 +163,10 @@ static void amdgpu_get_audio_func(struct amdgpu_device *adev)
+ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
+ {
+ struct drm_device *dev;
+- struct pci_dev *parent;
+ int r, acpi_status;
+
+ dev = adev_to_drm(adev);
+
+- if (amdgpu_has_atpx() &&
+- (amdgpu_is_atpx_hybrid() ||
+- amdgpu_has_atpx_dgpu_power_cntl()) &&
+- ((flags & AMD_IS_APU) == 0) &&
+- !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
+- flags |= AMD_IS_PX;
+-
+- parent = pci_upstream_bridge(adev->pdev);
+- adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
+-
+ /* amdgpu_device_init should report only fatal error
+ * like memory allocation failure or iomapping failure,
+ * or memory manager initialization failure, it must
+@@ -206,6 +206,15 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
+ adev->runpm = true;
+ break;
+ }
++ /* XXX: disable runtime pm if we are the primary adapter
++ * to avoid displays being re-enabled after DPMS.
++ * This needs to be sorted out and fixed properly.
++ */
++ if (adev->is_fw_fb)
++ adev->runpm = false;
++
++ amdgpu_runtime_pm_quirk(adev);
++
+ if (adev->runpm)
+ dev_info(adev->dev, "Using BACO for runtime pm\n");
+ }
+@@ -573,6 +582,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ crtc = (struct drm_crtc *)minfo->crtcs[i];
+ if (crtc && crtc->base.id == info->mode_crtc.id) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
++
+ ui32 = amdgpu_crtc->crtc_id;
+ found = 1;
+ break;
+@@ -591,7 +601,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ if (ret)
+ return ret;
+
+- ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip)));
++ ret = copy_to_user(out, &ip, min_t(size_t, size, sizeof(ip)));
+ return ret ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_HW_IP_COUNT: {
+@@ -739,17 +749,18 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_READ_MMR_REG: {
+- unsigned n, alloc_size;
++ unsigned int n, alloc_size;
+ uint32_t *regs;
+- unsigned se_num = (info->read_mmr_reg.instance >>
++ unsigned int se_num = (info->read_mmr_reg.instance >>
+ AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
+ AMDGPU_INFO_MMR_SE_INDEX_MASK;
+- unsigned sh_num = (info->read_mmr_reg.instance >>
++ unsigned int sh_num = (info->read_mmr_reg.instance >>
+ AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
+ AMDGPU_INFO_MMR_SH_INDEX_MASK;
+
+ /* set full masks if the userspace set all bits
+- * in the bitfields */
++ * in the bitfields
++ */
+ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
+ se_num = 0xffffffff;
+ else if (se_num >= AMDGPU_GFX_MAX_SE)
+@@ -873,7 +884,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ return ret;
+ }
+ case AMDGPU_INFO_VCE_CLOCK_TABLE: {
+- unsigned i;
++ unsigned int i;
+ struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};
+ struct amd_vce_state *vce_state;
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index 01a78c7865367..8a0b652da4f4b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -78,9 +78,10 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
+ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
+ {
+ struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
+- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
++ struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
+ struct amdgpu_bo_vm *vmbo;
+
++ bo = shadow_bo->parent;
+ vmbo = to_amdgpu_bo_vm(bo);
+ /* in case amdgpu_device_recover_vram got NULL of bo->parent */
+ if (!list_empty(&vmbo->shadow_list)) {
+@@ -684,13 +685,11 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+ * num of amdgpu_vm_pt entries.
+ */
+ BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm));
+- bp->destroy = &amdgpu_bo_vm_destroy;
+ r = amdgpu_bo_create(adev, bp, &bo_ptr);
+ if (r)
+ return r;
+
+ *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
+- INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list);
+ return r;
+ }
+
+@@ -741,6 +740,8 @@ void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
+
+ mutex_lock(&adev->shadow_list_lock);
+ list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
++ vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
++ vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
+ mutex_unlock(&adev->shadow_list_lock);
+ }
+
+@@ -912,6 +913,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
+ if (WARN_ON_ONCE(min_offset > max_offset))
+ return -EINVAL;
+
++ /* Check domain to be pinned to against preferred domains */
++ if (bo->preferred_domains & domain)
++ domain = bo->preferred_domains & domain;
++
+ /* A shared bo cannot be migrated to VRAM */
+ if (bo->tbo.base.import_attach) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
+@@ -1038,29 +1043,6 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
+ }
+ }
+
+-/**
+- * amdgpu_bo_evict_vram - evict VRAM buffers
+- * @adev: amdgpu device object
+- *
+- * Evicts all VRAM buffers on the lru list of the memory type.
+- * Mainly used for evicting vram at suspend time.
+- *
+- * Returns:
+- * 0 for success or a negative error code on failure.
+- */
+-int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
+-{
+- struct ttm_resource_manager *man;
+-
+- if (adev->in_s3 && (adev->flags & AMD_IS_APU)) {
+- /* No need to evict vram on APUs for suspend to ram */
+- return 0;
+- }
+-
+- man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+- return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+-}
+-
+ static const char *amdgpu_vram_names[] = {
+ "UNKNOWN",
+ "GDDR1",
+@@ -1343,7 +1325,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
+ !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
+ return;
+
+- dma_resv_lock(bo->base.resv, NULL);
++ if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
++ return;
+
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
+ if (!WARN_ON(r)) {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+index 9d6c001c15f89..d8ef8a53a562d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+@@ -304,7 +304,6 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
+ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
+ u64 min_offset, u64 max_offset);
+ void amdgpu_bo_unpin(struct amdgpu_bo *bo);
+-int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
+ int amdgpu_bo_init(struct amdgpu_device *adev);
+ void amdgpu_bo_fini(struct amdgpu_device *adev);
+ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+index 9b41cb8c3de54..f305a0f8e9b9a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+@@ -314,7 +314,39 @@ static int psp_sw_init(void *handle)
+ }
+ }
+
++ ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
++ amdgpu_sriov_vf(adev) ?
++ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
++ &psp->fw_pri_bo,
++ &psp->fw_pri_mc_addr,
++ &psp->fw_pri_buf);
++ if (ret)
++ return ret;
++
++ ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
++ AMDGPU_GEM_DOMAIN_VRAM,
++ &psp->fence_buf_bo,
++ &psp->fence_buf_mc_addr,
++ &psp->fence_buf);
++ if (ret)
++ goto failed1;
++
++ ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
++ AMDGPU_GEM_DOMAIN_VRAM,
++ &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
++ (void **)&psp->cmd_buf_mem);
++ if (ret)
++ goto failed2;
++
+ return 0;
++
++failed2:
++ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
++ &psp->fence_buf_mc_addr, &psp->fence_buf);
++failed1:
++ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
++ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
++ return ret;
+ }
+
+ static int psp_sw_fini(void *handle)
+@@ -344,6 +376,13 @@ static int psp_sw_fini(void *handle)
+ kfree(cmd);
+ cmd = NULL;
+
++ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
++ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
++ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
++ &psp->fence_buf_mc_addr, &psp->fence_buf);
++ amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
++ (void **)&psp->cmd_buf_mem);
++
+ return 0;
+ }
+
+@@ -2207,12 +2246,16 @@ static int psp_hw_start(struct psp_context *psp)
+ return ret;
+ }
+
++ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
++ goto skip_pin_bo;
++
+ ret = psp_tmr_init(psp);
+ if (ret) {
+ DRM_ERROR("PSP tmr init failed!\n");
+ return ret;
+ }
+
++skip_pin_bo:
+ /*
+ * For ASICs with DF Cstate management centralized
+ * to PMFW, TMR setup should be performed after PMFW
+@@ -2462,7 +2505,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
+ static bool fw_load_skip_check(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
+ {
+- if (!ucode->fw)
++ if (!ucode->fw || !ucode->ucode_size)
+ return true;
+
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+@@ -2576,51 +2619,18 @@ static int psp_load_fw(struct amdgpu_device *adev)
+ struct psp_context *psp = &adev->psp;
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+- psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
+- goto skip_memalloc;
+- }
+-
+- if (amdgpu_sriov_vf(adev)) {
+- ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+- AMDGPU_GEM_DOMAIN_VRAM,
+- &psp->fw_pri_bo,
+- &psp->fw_pri_mc_addr,
+- &psp->fw_pri_buf);
++ /* should not destroy ring, only stop */
++ psp_ring_stop(psp, PSP_RING_TYPE__KM);
+ } else {
+- ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+- AMDGPU_GEM_DOMAIN_GTT,
+- &psp->fw_pri_bo,
+- &psp->fw_pri_mc_addr,
+- &psp->fw_pri_buf);
+- }
+-
+- if (ret)
+- goto failed;
++ memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+
+- ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+- AMDGPU_GEM_DOMAIN_VRAM,
+- &psp->fence_buf_bo,
+- &psp->fence_buf_mc_addr,
+- &psp->fence_buf);
+- if (ret)
+- goto failed;
+-
+- ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+- AMDGPU_GEM_DOMAIN_VRAM,
+- &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+- (void **)&psp->cmd_buf_mem);
+- if (ret)
+- goto failed;
+-
+- memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+-
+- ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+- if (ret) {
+- DRM_ERROR("PSP ring init failed!\n");
+- goto failed;
++ ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
++ if (ret) {
++ DRM_ERROR("PSP ring init failed!\n");
++ goto failed;
++ }
+ }
+
+-skip_memalloc:
+ ret = psp_hw_start(psp);
+ if (ret)
+ goto failed;
+@@ -2719,6 +2729,9 @@ static int psp_hw_fini(void *handle)
+ psp_rap_terminate(psp);
+ psp_dtm_terminate(psp);
+ psp_hdcp_terminate(psp);
++
++ if (adev->gmc.xgmi.num_physical_nodes > 1)
++ psp_xgmi_terminate(psp);
+ }
+
+ psp_asd_unload(psp);
+@@ -2726,13 +2739,6 @@ static int psp_hw_fini(void *handle)
+ psp_tmr_terminate(psp);
+ psp_ring_destroy(psp, PSP_RING_TYPE__KM);
+
+- amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+- &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+- amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+- &psp->fence_buf_mc_addr, &psp->fence_buf);
+- amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+- (void **)&psp->cmd_buf_mem);
+-
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+index b7d861ed52849..88f986a61c93a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+@@ -66,6 +66,7 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
+ {
+ struct fd f = fdget(fd);
+ struct amdgpu_fpriv *fpriv;
++ struct amdgpu_ctx_mgr *mgr;
+ struct amdgpu_ctx *ctx;
+ uint32_t id;
+ int r;
+@@ -79,8 +80,11 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
+ return r;
+ }
+
+- idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
++ mgr = &fpriv->ctx_mgr;
++ mutex_lock(&mgr->lock);
++ idr_for_each_entry(&mgr->ctx_handles, ctx, id)
+ amdgpu_ctx_priority_override(ctx, priority);
++ mutex_unlock(&mgr->lock);
+
+ fdput(f);
+ return 0;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 94126dc396888..51c76d6322c94 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -1892,7 +1892,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+ unsigned i;
+ int r;
+
+- if (direct_submit && !ring->sched.ready) {
++ if (!direct_submit && !ring->sched.ready) {
+ DRM_ERROR("Trying to move memory with ring turned off.\n");
+ return -EINVAL;
+ }
+@@ -2036,6 +2036,36 @@ error_free:
+ return r;
+ }
+
++/**
++ * amdgpu_ttm_evict_resources - evict memory buffers
++ * @adev: amdgpu device object
++ * @mem_type: evicted BO's memory type
++ *
++ * Evicts all @mem_type buffers on the lru list of the memory type.
++ *
++ * Returns:
++ * 0 for success or a negative error code on failure.
++ */
++int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
++{
++ struct ttm_resource_manager *man;
++
++ switch (mem_type) {
++ case TTM_PL_VRAM:
++ case TTM_PL_TT:
++ case AMDGPU_PL_GWS:
++ case AMDGPU_PL_GDS:
++ case AMDGPU_PL_OA:
++ man = ttm_manager_type(&adev->mman.bdev, mem_type);
++ break;
++ default:
++ DRM_ERROR("Trying to evict invalid memory type\n");
++ return -EINVAL;
++ }
++
++ return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
++}
++
+ #if defined(CONFIG_DEBUG_FS)
+
+ static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+index 3205fd5200601..639c7b41e30b9 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+@@ -190,6 +190,7 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
+ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem);
+ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+ struct ttm_resource *mem);
++int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);
+
+ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+index abd8469380e51..0ed0736d515aa 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+@@ -723,8 +723,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
+
+ void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
+ {
+- if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
+- amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
++ amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
+ &adev->firmware.fw_buf_mc,
+ &adev->firmware.fw_buf_ptr);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+index 008a308a4ecaf..0c10222707902 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+@@ -149,6 +149,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+ break;
+ case CHIP_VANGOGH:
+ fw_name = FIRMWARE_VANGOGH;
++ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
++ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
++ adev->vcn.indirect_sram = true;
+ break;
+ case CHIP_DIMGREY_CAVEFISH:
+ fw_name = FIRMWARE_DIMGREY_CAVEFISH;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+index ca058fbcccd43..b508126a9738f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+@@ -24,6 +24,7 @@
+ #include <linux/module.h>
+
+ #include <drm/drm_drv.h>
++#include <xen/xen.h>
+
+ #include "amdgpu.h"
+ #include "amdgpu_ras.h"
+@@ -613,16 +614,34 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
+
+ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+ {
+- uint64_t bp_block_offset = 0;
+- uint32_t bp_block_size = 0;
+- struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+-
+ adev->virt.fw_reserve.p_pf2vf = NULL;
+ adev->virt.fw_reserve.p_vf2pf = NULL;
+ adev->virt.vf2pf_update_interval_ms = 0;
+
+ if (adev->mman.fw_vram_usage_va != NULL) {
+- adev->virt.vf2pf_update_interval_ms = 2000;
++ /* go through this logic in ip_init and reset to init workqueue*/
++ amdgpu_virt_exchange_data(adev);
++
++ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
++ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
++ } else if (adev->bios != NULL) {
++ /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
++ adev->virt.fw_reserve.p_pf2vf =
++ (struct amd_sriov_msg_pf2vf_info_header *)
++ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
++
++ amdgpu_virt_read_pf2vf_data(adev);
++ }
++}
++
++
++void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
++{
++ uint64_t bp_block_offset = 0;
++ uint32_t bp_block_size = 0;
++ struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
++
++ if (adev->mman.fw_vram_usage_va != NULL) {
+
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+@@ -648,22 +667,10 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+ if (adev->virt.ras_init_done)
+ amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+ }
+- } else if (adev->bios != NULL) {
+- adev->virt.fw_reserve.p_pf2vf =
+- (struct amd_sriov_msg_pf2vf_info_header *)
+- (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+-
+- amdgpu_virt_read_pf2vf_data(adev);
+-
+- return;
+- }
+-
+- if (adev->virt.vf2pf_update_interval_ms != 0) {
+- INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+- schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
+ }
+ }
+
++
+ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+ {
+ uint32_t reg;
+@@ -694,10 +701,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+
+ if (!reg) {
+- if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
++ /* passthrough mode exclus sriov mod */
++ if (is_virtual_machine() && !xen_initial_domain())
+ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+ }
+
++ if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
++ /* VF MMIO access (except mailbox range) from CPU
++ * will be blocked during sriov runtime
++ */
++ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
++
+ /* we have the ability to check now */
+ if (amdgpu_sriov_vf(adev)) {
+ switch (adev->asic_type) {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+index 8d4c20bb71c59..4af3610f4a827 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+@@ -31,6 +31,7 @@
+ #define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
+ #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
+ #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
++#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */
+
+ /* all asic after AI use this offset */
+ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
+@@ -61,6 +62,8 @@ struct amdgpu_vf_error_buffer {
+ uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
+ };
+
++enum idh_request;
++
+ /**
+ * struct amdgpu_virt_ops - amdgpu device virt operations
+ */
+@@ -70,7 +73,8 @@ struct amdgpu_virt_ops {
+ int (*req_init_data)(struct amdgpu_device *adev);
+ int (*reset_gpu)(struct amdgpu_device *adev);
+ int (*wait_reset)(struct amdgpu_device *adev);
+- void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
++ void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
++ u32 data1, u32 data2, u32 data3);
+ };
+
+ /*
+@@ -278,6 +282,9 @@ struct amdgpu_video_codec_info;
+ #define amdgpu_passthrough(adev) \
+ ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
+
++#define amdgpu_sriov_vf_mmio_access_protection(adev) \
++((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
++
+ static inline bool is_virtual_machine(void)
+ {
+ #ifdef CONFIG_X86
+@@ -308,6 +315,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
+ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
+ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
++void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
+ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
+ void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+index ce982afeff913..4e8274de8fc0c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+@@ -16,6 +16,8 @@
+ #include "ivsrcid/ivsrcid_vislands30.h"
+ #include "amdgpu_vkms.h"
+ #include "amdgpu_display.h"
++#include "atom.h"
++#include "amdgpu_irq.h"
+
+ /**
+ * DOC: amdgpu_vkms
+@@ -41,20 +43,20 @@ static const u32 amdgpu_vkms_formats[] = {
+
+ static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
+ {
+- struct amdgpu_vkms_output *output = container_of(timer,
+- struct amdgpu_vkms_output,
+- vblank_hrtimer);
+- struct drm_crtc *crtc = &output->crtc;
++ struct amdgpu_crtc *amdgpu_crtc = container_of(timer, struct amdgpu_crtc, vblank_timer);
++ struct drm_crtc *crtc = &amdgpu_crtc->base;
++ struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+ u64 ret_overrun;
+ bool ret;
+
+- ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer,
++ ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer,
+ output->period_ns);
+ WARN_ON(ret_overrun != 1);
+
+ ret = drm_crtc_handle_vblank(crtc);
++ /* Don't queue timer again when vblank is disabled. */
+ if (!ret)
+- DRM_ERROR("amdgpu_vkms failure on handling vblank");
++ return HRTIMER_NORESTART;
+
+ return HRTIMER_RESTART;
+ }
+@@ -65,22 +67,21 @@ static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc)
+ unsigned int pipe = drm_crtc_index(crtc);
+ struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+ struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
++ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ drm_calc_timestamping_constants(crtc, &crtc->mode);
+
+- hrtimer_init(&out->vblank_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+- out->vblank_hrtimer.function = &amdgpu_vkms_vblank_simulate;
+ out->period_ns = ktime_set(0, vblank->framedur_ns);
+- hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL);
++ hrtimer_start(&amdgpu_crtc->vblank_timer, out->period_ns, HRTIMER_MODE_REL);
+
+ return 0;
+ }
+
+ static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc)
+ {
+- struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
++ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+- hrtimer_cancel(&out->vblank_hrtimer);
++ hrtimer_try_to_cancel(&amdgpu_crtc->vblank_timer);
+ }
+
+ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
+@@ -92,13 +93,14 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
+ unsigned int pipe = crtc->index;
+ struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+ struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
++ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (!READ_ONCE(vblank->enabled)) {
+ *vblank_time = ktime_get();
+ return true;
+ }
+
+- *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires);
++ *vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires);
+
+ if (WARN_ON(*vblank_time == vblank->time))
+ return true;
+@@ -142,15 +144,16 @@ static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc,
+ static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+ {
++ unsigned long flags;
+ if (crtc->state->event) {
+- spin_lock(&crtc->dev->event_lock);
++ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+ if (drm_crtc_vblank_get(crtc) != 0)
+ drm_crtc_send_vblank_event(crtc, crtc->state->event);
+ else
+ drm_crtc_arm_vblank_event(crtc, crtc->state->event);
+
+- spin_unlock(&crtc->dev->event_lock);
++ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+ crtc->state->event = NULL;
+ }
+@@ -165,6 +168,8 @@ static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = {
+ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+ struct drm_plane *primary, struct drm_plane *cursor)
+ {
++ struct amdgpu_device *adev = drm_to_adev(dev);
++ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ int ret;
+
+ ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor,
+@@ -176,6 +181,17 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+
+ drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs);
+
++ amdgpu_crtc->crtc_id = drm_crtc_index(crtc);
++ adev->mode_info.crtcs[drm_crtc_index(crtc)] = amdgpu_crtc;
++
++ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
++ amdgpu_crtc->encoder = NULL;
++ amdgpu_crtc->connector = NULL;
++ amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
++
++ hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++ amdgpu_crtc->vblank_timer.function = &amdgpu_vkms_vblank_simulate;
++
+ return ret;
+ }
+
+@@ -401,7 +417,7 @@ int amdgpu_vkms_output_init(struct drm_device *dev,
+ {
+ struct drm_connector *connector = &output->connector;
+ struct drm_encoder *encoder = &output->encoder;
+- struct drm_crtc *crtc = &output->crtc;
++ struct drm_crtc *crtc = &output->crtc.base;
+ struct drm_plane *primary, *cursor = NULL;
+ int ret;
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
+index 97f1b79c0724e..4f8722ff37c25 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
+@@ -10,15 +10,14 @@
+ #define YRES_MAX 16384
+
+ #define drm_crtc_to_amdgpu_vkms_output(target) \
+- container_of(target, struct amdgpu_vkms_output, crtc)
++ container_of(target, struct amdgpu_vkms_output, crtc.base)
+
+ extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block;
+
+ struct amdgpu_vkms_output {
+- struct drm_crtc crtc;
++ struct amdgpu_crtc crtc;
+ struct drm_encoder encoder;
+ struct drm_connector connector;
+- struct hrtimer vblank_hrtimer;
+ ktime_t period_ns;
+ struct drm_pending_vblank_event *event;
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+index 6b15cad78de9d..0e4554950e072 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -768,11 +768,17 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ * Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+- * True if eviction list is empty.
++ * True if VM is not evicting.
+ */
+ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
+ {
+- return list_empty(&vm->evicted);
++ bool ret;
++
++ amdgpu_vm_eviction_lock(vm);
++ ret = !vm->evicting;
++ amdgpu_vm_eviction_unlock(vm);
++
++ return ret && list_empty(&vm->evicted);
+ }
+
+ /**
+@@ -977,7 +983,6 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
+ return r;
+ }
+
+- (*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
+ amdgpu_bo_add_to_shadow_list(*vmbo);
+
+ return 0;
+@@ -2329,14 +2334,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ uint64_t eaddr;
+
+ /* validate the parameters */
+- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
+- size == 0 || size & ~PAGE_MASK)
++ if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
++ return -EINVAL;
++ if (saddr + size <= saddr || offset + size <= offset)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ eaddr = saddr + size - 1;
+- if (saddr >= eaddr ||
+- (bo && offset + size > amdgpu_bo_size(bo)) ||
++ if ((bo && offset + size > amdgpu_bo_size(bo)) ||
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+
+@@ -2395,14 +2400,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
+ int r;
+
+ /* validate the parameters */
+- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
+- size == 0 || size & ~PAGE_MASK)
++ if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
++ return -EINVAL;
++ if (saddr + size <= saddr || offset + size <= offset)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ eaddr = saddr + size - 1;
+- if (saddr >= eaddr ||
+- (bo && offset + size > amdgpu_bo_size(bo)) ||
++ if ((bo && offset + size > amdgpu_bo_size(bo)) ||
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+
+@@ -2570,18 +2575,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
+
+ /* Insert partial mapping before the range */
+ if (!list_empty(&before->list)) {
++ struct amdgpu_bo *bo = before->bo_va->base.bo;
++
+ amdgpu_vm_it_insert(before, &vm->va);
+ if (before->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
++
++ if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
++ !before->bo_va->base.moved)
++ amdgpu_vm_bo_moved(&before->bo_va->base);
+ } else {
+ kfree(before);
+ }
+
+ /* Insert partial mapping after the range */
+ if (!list_empty(&after->list)) {
++ struct amdgpu_bo *bo = after->bo_va->base.bo;
++
+ amdgpu_vm_it_insert(after, &vm->va);
+ if (after->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
++
++ if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
++ !after->bo_va->base.moved)
++ amdgpu_vm_bo_moved(&after->bo_va->base);
+ } else {
+ kfree(after);
+ }
+@@ -3218,7 +3235,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
+ */
+ #ifdef CONFIG_X86_64
+ if (amdgpu_vm_update_mode == -1) {
+- if (amdgpu_gmc_vram_full_visible(&adev->gmc))
++ /* For asic with VF MMIO access protection
++ * avoid using CPU for VM table updates
++ */
++ if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
++ !amdgpu_sriov_vf_mmio_access_protection(adev))
+ adev->vm_manager.vm_update_mode =
+ AMDGPU_VM_USE_CPU_FOR_COMPUTE;
+ else
+@@ -3265,6 +3286,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+ long timeout = msecs_to_jiffies(2000);
+ int r;
+
++ /* No valid flags defined yet */
++ if (args->in.flags)
++ return -EINVAL;
++
+ switch (args->in.op) {
+ case AMDGPU_VM_OP_RESERVE_VMID:
+ /* We only have requirement to reserve vmid from gfxhub */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+index 978ac927ac11d..ce0b9cb61f582 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+@@ -386,6 +386,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
+ "%s", "xgmi_hive_info");
+ if (ret) {
+ dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
++ kobject_put(&hive->kobj);
+ kfree(hive);
+ hive = NULL;
+ goto pro_end;
+@@ -722,7 +723,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+- return psp_xgmi_terminate(&adev->psp);
++ return 0;
+ }
+
+ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
+index 54f28c075f214..9be6da37032a7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/cik.c
++++ b/drivers/gpu/drm/amd/amdgpu/cik.c
+@@ -1428,6 +1428,10 @@ static int cik_asic_reset(struct amdgpu_device *adev)
+ {
+ int r;
+
++ /* APUs don't have full asic reset */
++ if (adev->flags & AMD_IS_APU)
++ return 0;
++
+ if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ dev_info(adev->dev, "BACO reset\n");
+ r = amdgpu_dpm_baco_reset(adev);
+@@ -1570,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &bridge_cfg);
+- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+- &gpu_cfg);
+-
+- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
+-
+- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+
+ tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
+@@ -1633,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
+ msleep(100);
+
+ /* linkctl */
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+- tmp16);
+-
+- pcie_capability_read_word(adev->pdev,
+- PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(adev->pdev,
+- PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ bridge_cfg &
++ PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ gpu_cfg &
++ PCI_EXP_LNKCTL_HAWD);
+
+ /* linkctl2 */
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+@@ -1715,7 +1703,7 @@ static void cik_program_aspm(struct amdgpu_device *adev)
+ bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+ bool disable_clkreq = false;
+
+- if (amdgpu_aspm == 0)
++ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (pci_is_root_bus(adev->pdev->bus))
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+index 16dbe593cba2e..938f13956aeef 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+@@ -7197,8 +7197,10 @@ static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+- if (unlikely(r != 0))
++ if (unlikely(r != 0)) {
++ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
++ }
+
+ gfx_v10_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+@@ -7729,8 +7731,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+ switch (adev->asic_type) {
+ case CHIP_VANGOGH:
+ case CHIP_YELLOW_CARP:
+- clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) |
+- ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
++ preempt_disable();
++ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
++ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
++ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
++ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
++ * roughly every 42 seconds.
++ */
++ if (hi_check != clock_hi) {
++ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
++ clock_hi = hi_check;
++ }
++ preempt_enable();
++ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ default:
+ preempt_disable();
+@@ -8411,8 +8424,14 @@ static int gfx_v10_0_set_powergating_state(void *handle,
+ break;
+ case CHIP_VANGOGH:
+ case CHIP_YELLOW_CARP:
++ if (!enable)
++ amdgpu_gfx_off_ctrl(adev, false);
++
+ gfx_v10_cntl_pg(adev, enable);
+- amdgpu_gfx_off_ctrl(adev, enable);
++
++ if (enable)
++ amdgpu_gfx_off_ctrl(adev, true);
++
+ break;
+ default:
+ break;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+index 025184a556ee6..de1fab165041f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+@@ -140,6 +140,11 @@ MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
+ #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
+ #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
+
++#define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
++#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
++#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
++#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
++
+ enum ta_ras_gfx_subblock {
+ /*CPC*/
+ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+@@ -1267,6 +1272,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
+ { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
+ /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
+ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
++ /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
++ { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
+ { 0, 0, 0, 0, 0 },
+ };
+
+@@ -2617,7 +2624,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
+
+ gfx_v9_0_tiling_mode_table_init(adev);
+
+- gfx_v9_0_setup_rb(adev);
++ if (adev->gfx.num_gfx_rings)
++ gfx_v9_0_setup_rb(adev);
+ gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
+
+@@ -3056,8 +3064,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+- WREG32(mmRLC_JUMP_TABLE_RESTORE,
+- adev->gfx.rlc.cp_table_gpu_addr >> 8);
++ WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
++ adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ gfx_v9_0_init_gfx_power_gating(adev);
+ }
+ }
+@@ -3863,8 +3871,10 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+- if (unlikely(r != 0))
++ if (unlikely(r != 0)) {
++ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
++ }
+
+ gfx_v9_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+@@ -4010,7 +4020,8 @@ static int gfx_v9_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+- amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
++ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
++ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+@@ -4228,19 +4239,38 @@ failed_kiq_read:
+
+ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+ {
+- uint64_t clock;
++ uint64_t clock, clock_lo, clock_hi, hi_check;
+
+- amdgpu_gfx_off_ctrl(adev, false);
+- mutex_lock(&adev->gfx.gpu_clock_mutex);
+- if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
+- clock = gfx_v9_0_kiq_read_clock(adev);
+- } else {
+- WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+- clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+- ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
++ switch (adev->asic_type) {
++ case CHIP_RENOIR:
++ preempt_disable();
++ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
++ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
++ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
++ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
++ * roughly every 42 seconds.
++ */
++ if (hi_check != clock_hi) {
++ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
++ clock_hi = hi_check;
++ }
++ preempt_enable();
++ clock = clock_lo | (clock_hi << 32ULL);
++ break;
++ default:
++ amdgpu_gfx_off_ctrl(adev, false);
++ mutex_lock(&adev->gfx.gpu_clock_mutex);
++ if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
++ clock = gfx_v9_0_kiq_read_clock(adev);
++ } else {
++ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
++ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
++ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
++ }
++ mutex_unlock(&adev->gfx.gpu_clock_mutex);
++ amdgpu_gfx_off_ctrl(adev, true);
++ break;
+ }
+- mutex_unlock(&adev->gfx.gpu_clock_mutex);
+- amdgpu_gfx_off_ctrl(adev, true);
+ return clock;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+index bda1542ef1ddf..f51fd0688eca7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+@@ -162,7 +162,6 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+index 14c1c1a297dd3..6e0ace2fbfab1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+@@ -196,7 +196,6 @@ static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+index 1a374ec0514a5..9328991e8807f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+@@ -197,7 +197,6 @@ static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+index e47104a1f5596..9c07ec8b97327 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+@@ -414,6 +414,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
++ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+@@ -432,7 +433,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
++ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+@@ -788,7 +789,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+ #ifdef CONFIG_X86_64
+- if (adev->flags & AMD_IS_APU) {
++ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+@@ -1021,10 +1022,14 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
+ return -EINVAL;
+ }
+
++ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
++ goto skip_pin_bo;
++
+ r = amdgpu_gart_table_vram_pin(adev);
+ if (r)
+ return r;
+
++skip_pin_bo:
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+index 0e81e03e9b498..0fe714f54cca9 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+@@ -841,12 +841,12 @@ static int gmc_v6_0_sw_init(void *handle)
+
+ adev->gmc.mc_mask = 0xffffffffffULL;
+
+- r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
++ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
+ if (r) {
+ dev_warn(adev->dev, "No suitable DMA available.\n");
+ return r;
+ }
+- adev->need_swiotlb = drm_need_swiotlb(44);
++ adev->need_swiotlb = drm_need_swiotlb(40);
+
+ r = gmc_v6_0_init_microcode(adev);
+ if (r) {
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+index 0a50fdaced7e5..63c47f61d0dfd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+@@ -381,8 +381,9 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+ #ifdef CONFIG_X86_64
+- if (adev->flags & AMD_IS_APU &&
+- adev->gmc.real_vram_size > adev->gmc.aper_size) {
++ if ((adev->flags & AMD_IS_APU) &&
++ adev->gmc.real_vram_size > adev->gmc.aper_size &&
++ !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+index 492ebed2915be..bef9610084f10 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+@@ -515,10 +515,10 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
+ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
+ {
+ int r;
++ u32 tmp;
+
+ adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
+ if (!adev->gmc.vram_width) {
+- u32 tmp;
+ int chansize, numchan;
+
+ /* Get VRAM informations */
+@@ -562,8 +562,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
+ adev->gmc.vram_width = numchan * chansize;
+ }
+ /* size in MB on si */
+- adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+- adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
++ tmp = RREG32(mmCONFIG_MEMSIZE);
++ /* some boards may have garbage in the upper 16 bits */
++ if (tmp & 0xffff0000) {
++ DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
++ if (tmp & 0xffff)
++ tmp &= 0xffff;
++ }
++ adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
++ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+@@ -574,7 +581,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+ #ifdef CONFIG_X86_64
+- if (adev->flags & AMD_IS_APU) {
++ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+index 5551359d5dfdc..342e540410b18 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+@@ -72,6 +72,9 @@
+ #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
+ #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
+
++#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
++#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
++
+
+ static const char *gfxhub_client_ids[] = {
+ "CB",
+@@ -860,6 +863,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
++ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+@@ -899,7 +903,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
++ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+ up_read(&adev->reset_sem);
+@@ -1103,6 +1107,8 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+ unsigned size;
+
++ /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
++
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+@@ -1110,7 +1116,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+- case CHIP_RENOIR:
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+@@ -1118,6 +1123,14 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
++ case CHIP_RENOIR:
++ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
++ size = (REG_GET_FIELD(viewport,
++ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
++ REG_GET_FIELD(viewport,
++ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
++ 4);
++ break;
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
+@@ -1375,7 +1388,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
+ */
+
+ /* check whether both host-gpu and gpu-gpu xgmi links exist */
+- if ((adev->flags & AMD_IS_APU) ||
++ if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
+ (adev->gmc.xgmi.supported &&
+ adev->gmc.xgmi.connected_to_cpu)) {
+ adev->gmc.aper_base =
+@@ -1640,7 +1653,7 @@ static int gmc_v9_0_sw_fini(void *handle)
+ amdgpu_gem_force_release(adev);
+ amdgpu_vm_manager_fini(adev);
+ amdgpu_gart_table_vram_free(adev);
+- amdgpu_bo_unref(&adev->gmc.pdb0_bo);
++ amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+@@ -1708,10 +1721,14 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
+ return -EINVAL;
+ }
+
++ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
++ goto skip_pin_bo;
++
+ r = amdgpu_gart_table_vram_pin(adev);
+ if (r)
+ return r;
+
++skip_pin_bo:
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+@@ -1802,7 +1819,6 @@ static int gmc_v9_0_hw_fini(void *handle)
+ return 0;
+ }
+
+- amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ return 0;
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+index a99953833820e..4259f623a9d7a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+@@ -145,7 +145,6 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+@@ -177,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+
++ tmp = mmVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+index f80a14a1b82dc..f5f7181f9af5f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+@@ -165,7 +165,6 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+index 7ded6b2f058ef..2e58ed2caa485 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+@@ -269,7 +269,6 @@ static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+index 88e457a150e02..c63b6b9349350 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+@@ -194,7 +194,6 @@ static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+index c4ef822bbe8c5..ff49eeaf78824 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+@@ -189,8 +189,6 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+- tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+- ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+index b184b656b9b6b..6f21154d4891f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+@@ -366,6 +366,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+ }
+
++#ifdef CONFIG_PCIEASPM
+ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
+ {
+ uint32_t def, data;
+@@ -387,9 +388,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
++#endif
+
+ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ {
++#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -445,7 +448,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+- nbio_v2_3_program_ltr(adev);
++ /* Don't bother about LTR if LTR is not enabled
++ * in the path */
++ if (adev->pdev->ltr_path)
++ nbio_v2_3_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -469,6 +475,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
++#endif
+ }
+
+ static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+index 0d2d629e2d6a2..be3f6c52c3ffd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+@@ -278,6 +278,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
+ WREG32_PCIE(smnPCIE_CI_CNTL, data);
+ }
+
++#ifdef CONFIG_PCIEASPM
+ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+ {
+ uint32_t def, data;
+@@ -299,9 +300,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
++#endif
+
+ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ {
++#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -357,7 +360,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+- nbio_v6_1_program_ltr(adev);
++ /* Don't bother about LTR if LTR is not enabled
++ * in the path */
++ if (adev->pdev->ltr_path)
++ nbio_v6_1_program_ltr(adev);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -381,6 +387,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
++#endif
+ }
+
+ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+index f50045cebd44c..74cd7543729be 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+@@ -630,6 +630,7 @@ const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = {
+ .ras_fini = amdgpu_nbio_ras_fini,
+ };
+
++#ifdef CONFIG_PCIEASPM
+ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+ {
+ uint32_t def, data;
+@@ -651,9 +652,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
++#endif
+
+ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ {
++#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -709,7 +712,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+- nbio_v7_4_program_ltr(adev);
++ /* Don't bother about LTR if LTR is not enabled
++ * in the path */
++ if (adev->pdev->ltr_path)
++ nbio_v7_4_program_ltr(adev);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -733,6 +739,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
++#endif
+ }
+
+ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
+diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
+index 01efda4398e56..947e8c09493dc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nv.c
++++ b/drivers/gpu/drm/amd/amdgpu/nv.c
+@@ -170,6 +170,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
++ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+ };
+
+ static const struct amdgpu_video_codecs yc_video_codecs_decode = {
+@@ -583,7 +584,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
+
+ static void nv_program_aspm(struct amdgpu_device *adev)
+ {
+- if (!amdgpu_aspm)
++ if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+index 47a500f64db20..bcf356df1ef33 100644
+--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+@@ -101,14 +101,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+ int ret;
+ int retry_loop;
+
++ /* Wait for bootloader to signify that it is ready having bit 31 of
++ * C2PMSG_35 set to 1. All other bits are expected to be cleared.
++ * If there is an error in processing command, bits[7:0] will be set.
++ * This is applicable for PSP v13.0.6 and newer.
++ */
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+- /* Wait for bootloader to signify that is
+- ready having bit 31 of C2PMSG_35 set to 1 */
+- ret = psp_wait_for(psp,
+- SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+- 0x80000000,
+- 0x80000000,
+- false);
++ ret = psp_wait_for(
++ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
++ 0x80000000, 0xffffffff, false);
+
+ if (ret == 0)
+ return 0;
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+index 8931000dcd418..0fad9258e0960 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+@@ -770,8 +770,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
+
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
++ "lower_32_bits(ring->wptr << 2) == 0x%08x "
++ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+@@ -978,13 +978,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
+
+
+ /**
+- * sdma_v4_0_gfx_stop - stop the gfx async dma engines
++ * sdma_v4_0_gfx_enable - enable the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+- *
+- * Stop the gfx async dma ring buffers (VEGA10).
++ * @enable: enable SDMA RB/IB
++ * control the gfx async dma ring buffers (VEGA10).
+ */
+-static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
++static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
+ {
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ u32 rb_cntl, ib_cntl;
+@@ -999,10 +999,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
+ }
+
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
+- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
+- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
++ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
+ }
+ }
+@@ -1129,7 +1129,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
+ int i;
+
+ if (!enable) {
+- sdma_v4_0_gfx_stop(adev);
++ sdma_v4_0_gfx_enable(adev, enable);
+ sdma_v4_0_rlc_stop(adev);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_0_page_stop(adev);
+@@ -2044,9 +2044,11 @@ static int sdma_v4_0_hw_fini(void *handle)
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+- for (i = 0; i < adev->sdma.num_instances; i++) {
+- amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+- AMDGPU_SDMA_IRQ_INSTANCE0 + i);
++ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
++ for (i = 0; i < adev->sdma.num_instances; i++) {
++ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
++ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
++ }
+ }
+
+ sdma_v4_0_ctx_switch_enable(adev, false);
+@@ -2062,6 +2064,12 @@ static int sdma_v4_0_suspend(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ /* SMU saves SDMA state for us */
++ if (adev->in_s0ix) {
++ sdma_v4_0_gfx_enable(adev, false);
++ return 0;
++ }
++
+ return sdma_v4_0_hw_fini(adev);
+ }
+
+@@ -2069,6 +2077,14 @@ static int sdma_v4_0_resume(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ /* SMU restores SDMA state for us */
++ if (adev->in_s0ix) {
++ sdma_v4_0_enable(adev, true);
++ sdma_v4_0_gfx_enable(adev, true);
++ amdgpu_ttm_set_buffer_funcs_status(adev, true);
++ return 0;
++ }
++
+ return sdma_v4_0_hw_init(adev);
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+index 50bf3b71bc93c..0f75864365d61 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+@@ -400,8 +400,8 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
++ "lower_32_bits(ring->wptr << 2) == 0x%08x "
++ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+@@ -782,9 +782,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
+- lower_32_bits(ring->wptr) << 2);
++ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
+- upper_32_bits(ring->wptr) << 2);
++ upper_32_bits(ring->wptr << 2));
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+index e32efcfb0c8b1..f643b977b5f4c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+@@ -287,8 +287,8 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
++ "lower_32_bits(ring->wptr << 2) == 0x%08x "
++ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+@@ -660,8 +660,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
++ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
++ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
+index e6d2f74a79765..fd34c2100bd96 100644
+--- a/drivers/gpu/drm/amd/amdgpu/si.c
++++ b/drivers/gpu/drm/amd/amdgpu/si.c
+@@ -2276,17 +2276,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &bridge_cfg);
+- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+- &gpu_cfg);
+-
+- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
+-
+- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+
+ tmp = RREG32_PCIE(PCIE_LC_STATUS1);
+ max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+@@ -2331,21 +2322,14 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
+
+ mdelay(100);
+
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+- tmp16);
+-
+- pcie_capability_read_word(adev->pdev,
+- PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(adev->pdev,
+- PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ bridge_cfg &
++ PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ gpu_cfg &
++ PCI_EXP_LNKCTL_HAWD);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+@@ -2453,7 +2437,7 @@ static void si_program_aspm(struct amdgpu_device *adev)
+ bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+ bool disable_clkreq = false;
+
+- if (amdgpu_aspm == 0)
++ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (adev->flags & AMD_IS_APU)
+diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
+index 0fc97c364fd76..529bb6c6ac6f5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
+@@ -461,8 +461,9 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
+ en = &soc15_allowed_read_registers[i];
+- if (adev->reg_offset[en->hwip][en->inst] &&
+- reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
++ if (!adev->reg_offset[en->hwip][en->inst])
++ continue;
++ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+@@ -607,8 +608,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
+ static int soc15_asic_reset(struct amdgpu_device *adev)
+ {
+ /* original raven doesn't have full asic reset */
+- if ((adev->apu_flags & AMD_APU_IS_RAVEN) &&
+- !(adev->apu_flags & AMD_APU_IS_RAVEN2))
++ if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
++ (adev->apu_flags & AMD_APU_IS_RAVEN2))
+ return 0;
+
+ switch (soc15_asic_reset_method(adev)) {
+@@ -689,7 +690,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
+
+ static void soc15_program_aspm(struct amdgpu_device *adev)
+ {
+- if (!amdgpu_aspm)
++ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+@@ -1273,8 +1274,11 @@ static int soc15_common_early_init(void *handle)
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
++ /*
++ * MMHUB PG needs to be disabled for Picasso for
++ * stability reasons.
++ */
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+- AMD_PG_SUPPORT_MMHUB |
+ AMD_PG_SUPPORT_VCN;
+ } else {
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+@@ -1413,22 +1417,17 @@ static int soc15_common_sw_fini(void *handle)
+ return 0;
+ }
+
+-static void soc15_doorbell_range_init(struct amdgpu_device *adev)
++static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev)
+ {
+ int i;
+- struct amdgpu_ring *ring;
+
+- /* sdma/ih doorbell range are programed by hypervisor */
++ /* sdma doorbell range is programed by hypervisor */
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+- ring = &adev->sdma.instance[i].ring;
+ adev->nbio.funcs->sdma_doorbell_range(adev, i,
+- ring->use_doorbell, ring->doorbell_index,
++ true, adev->doorbell_index.sdma_engine[i] << 1,
+ adev->doorbell_index.sdma_doorbell_range);
+ }
+-
+- adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+- adev->irq.ih.doorbell_index);
+ }
+ }
+
+@@ -1453,10 +1452,11 @@ static int soc15_common_hw_init(void *handle)
+ soc15_enable_doorbell_aperture(adev, true);
+ /* HW doorbell routing policy: doorbell writing not
+ * in SDMA/IH/MM/ACV range will be routed to CP. So
+- * we need to init SDMA/IH/MM/ACV doorbell range prior
+- * to CP ip block init and ring test.
++ * we need to init SDMA doorbell range prior
++ * to CP ip block init and ring test. IH already
++ * happens before CP.
+ */
+- soc15_doorbell_range_init(adev);
++ soc15_sdma_doorbell_range_init(adev);
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+index 7232241e3bfb2..0fef925b66024 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+@@ -698,6 +698,19 @@ static int uvd_v3_1_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cancel_delayed_work_sync(&adev->uvd.idle_work);
++
++ if (RREG32(mmUVD_STATUS) != 0)
++ uvd_v3_1_stop(adev);
++
++ return 0;
++}
++
++static int uvd_v3_1_suspend(void *handle)
++{
++ int r;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+@@ -722,17 +735,6 @@ static int uvd_v3_1_hw_fini(void *handle)
+ AMD_CG_STATE_GATE);
+ }
+
+- if (RREG32(mmUVD_STATUS) != 0)
+- uvd_v3_1_stop(adev);
+-
+- return 0;
+-}
+-
+-static int uvd_v3_1_suspend(void *handle)
+-{
+- int r;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+ r = uvd_v3_1_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+index 52d6de969f462..c108b83817951 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+@@ -212,6 +212,19 @@ static int uvd_v4_2_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cancel_delayed_work_sync(&adev->uvd.idle_work);
++
++ if (RREG32(mmUVD_STATUS) != 0)
++ uvd_v4_2_stop(adev);
++
++ return 0;
++}
++
++static int uvd_v4_2_suspend(void *handle)
++{
++ int r;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+@@ -236,17 +249,6 @@ static int uvd_v4_2_hw_fini(void *handle)
+ AMD_CG_STATE_GATE);
+ }
+
+- if (RREG32(mmUVD_STATUS) != 0)
+- uvd_v4_2_stop(adev);
+-
+- return 0;
+-}
+-
+-static int uvd_v4_2_suspend(void *handle)
+-{
+- int r;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+ r = uvd_v4_2_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+index db6d06758e4d4..563493d1f8306 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+@@ -210,6 +210,19 @@ static int uvd_v5_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cancel_delayed_work_sync(&adev->uvd.idle_work);
++
++ if (RREG32(mmUVD_STATUS) != 0)
++ uvd_v5_0_stop(adev);
++
++ return 0;
++}
++
++static int uvd_v5_0_suspend(void *handle)
++{
++ int r;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+@@ -234,17 +247,6 @@ static int uvd_v5_0_hw_fini(void *handle)
+ AMD_CG_STATE_GATE);
+ }
+
+- if (RREG32(mmUVD_STATUS) != 0)
+- uvd_v5_0_stop(adev);
+-
+- return 0;
+-}
+-
+-static int uvd_v5_0_suspend(void *handle)
+-{
+- int r;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+ r = uvd_v5_0_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+index bc571833632ea..72f8762907681 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+@@ -543,6 +543,19 @@ static int uvd_v6_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cancel_delayed_work_sync(&adev->uvd.idle_work);
++
++ if (RREG32(mmUVD_STATUS) != 0)
++ uvd_v6_0_stop(adev);
++
++ return 0;
++}
++
++static int uvd_v6_0_suspend(void *handle)
++{
++ int r;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+@@ -567,17 +580,6 @@ static int uvd_v6_0_hw_fini(void *handle)
+ AMD_CG_STATE_GATE);
+ }
+
+- if (RREG32(mmUVD_STATUS) != 0)
+- uvd_v6_0_stop(adev);
+-
+- return 0;
+-}
+-
+-static int uvd_v6_0_suspend(void *handle)
+-{
+- int r;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+ r = uvd_v6_0_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+index b6e82d75561f6..1fd9ca21a091b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+@@ -606,6 +606,23 @@ static int uvd_v7_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cancel_delayed_work_sync(&adev->uvd.idle_work);
++
++ if (!amdgpu_sriov_vf(adev))
++ uvd_v7_0_stop(adev);
++ else {
++ /* full access mode, so don't touch any UVD register */
++ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
++ }
++
++ return 0;
++}
++
++static int uvd_v7_0_suspend(void *handle)
++{
++ int r;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+@@ -630,21 +647,6 @@ static int uvd_v7_0_hw_fini(void *handle)
+ AMD_CG_STATE_GATE);
+ }
+
+- if (!amdgpu_sriov_vf(adev))
+- uvd_v7_0_stop(adev);
+- else {
+- /* full access mode, so don't touch any UVD register */
+- DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+- }
+-
+- return 0;
+-}
+-
+-static int uvd_v7_0_suspend(void *handle)
+-{
+- int r;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+ r = uvd_v7_0_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+index b70c17f0c52e8..98952fd387e73 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+@@ -479,6 +479,17 @@ static int vce_v2_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cancel_delayed_work_sync(&adev->vce.idle_work);
++
++ return 0;
++}
++
++static int vce_v2_0_suspend(void *handle)
++{
++ int r;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
++
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+@@ -502,14 +513,6 @@ static int vce_v2_0_hw_fini(void *handle)
+ AMD_CG_STATE_GATE);
+ }
+
+- return 0;
+-}
+-
+-static int vce_v2_0_suspend(void *handle)
+-{
+- int r;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+ r = vce_v2_0_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+index 9de66893ccd6d..8fb5df7181e09 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+@@ -490,6 +490,21 @@ static int vce_v3_0_hw_fini(void *handle)
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cancel_delayed_work_sync(&adev->vce.idle_work);
++
++ r = vce_v3_0_wait_for_idle(handle);
++ if (r)
++ return r;
++
++ vce_v3_0_stop(adev);
++ return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
++}
++
++static int vce_v3_0_suspend(void *handle)
++{
++ int r;
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+@@ -513,19 +528,6 @@ static int vce_v3_0_hw_fini(void *handle)
+ AMD_CG_STATE_GATE);
+ }
+
+- r = vce_v3_0_wait_for_idle(handle);
+- if (r)
+- return r;
+-
+- vce_v3_0_stop(adev);
+- return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
+-}
+-
+-static int vce_v3_0_suspend(void *handle)
+-{
+- int r;
+- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+-
+ r = vce_v3_0_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+index fec902b800c28..70b8c88d30513 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+@@ -542,29 +542,8 @@ static int vce_v4_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+- /*
+- * Proper cleanups before halting the HW engine:
+- * - cancel the delayed idle work
+- * - enable powergating
+- * - enable clockgating
+- * - disable dpm
+- *
+- * TODO: to align with the VCN implementation, move the
+- * jobs for clockgating/powergating/dpm setting to
+- * ->set_powergating_state().
+- */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+- if (adev->pm.dpm_enabled) {
+- amdgpu_dpm_enable_vce(adev, false);
+- } else {
+- amdgpu_asic_set_vce_clocks(adev, 0, 0);
+- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+- AMD_PG_STATE_GATE);
+- amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+- AMD_CG_STATE_GATE);
+- }
+-
+ if (!amdgpu_sriov_vf(adev)) {
+ /* vce_v4_0_wait_for_idle(handle); */
+ vce_v4_0_stop(adev);
+@@ -594,6 +573,29 @@ static int vce_v4_0_suspend(void *handle)
+ drm_dev_exit(idx);
+ }
+
++ /*
++ * Proper cleanups before halting the HW engine:
++ * - cancel the delayed idle work
++ * - enable powergating
++ * - enable clockgating
++ * - disable dpm
++ *
++ * TODO: to align with the VCN implementation, move the
++ * jobs for clockgating/powergating/dpm setting to
++ * ->set_powergating_state().
++ */
++ cancel_delayed_work_sync(&adev->vce.idle_work);
++
++ if (adev->pm.dpm_enabled) {
++ amdgpu_dpm_enable_vce(adev, false);
++ } else {
++ amdgpu_asic_set_vce_clocks(adev, 0, 0);
++ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
++ AMD_PG_STATE_GATE);
++ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
++ AMD_CG_STATE_GATE);
++ }
++
+ r = vce_v4_0_hw_fini(adev);
+ if (r)
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+index 121ee9f2b8d16..462008d506904 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+@@ -253,6 +253,13 @@ static int vcn_v1_0_suspend(void *handle)
+ {
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++ bool idle_work_unexecuted;
++
++ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
++ if (idle_work_unexecuted) {
++ if (adev->pm.dpm_enabled)
++ amdgpu_dpm_enable_uvd(adev, false);
++ }
+
+ r = vcn_v1_0_hw_fini(adev);
+ if (r)
+diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+index f4686e918e0d1..c405075a572c1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+@@ -22,6 +22,7 @@
+ */
+
+ #include <linux/firmware.h>
++#include <drm/drm_drv.h>
+
+ #include "amdgpu.h"
+ #include "amdgpu_vcn.h"
+@@ -192,11 +193,14 @@ static int vcn_v2_0_sw_init(void *handle)
+ */
+ static int vcn_v2_0_sw_fini(void *handle)
+ {
+- int r;
++ int r, idx;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+
+- fw_shared->present_flag_0 = 0;
++ if (drm_dev_enter(&adev->ddev, &idx)) {
++ fw_shared->present_flag_0 = 0;
++ drm_dev_exit(idx);
++ }
+
+ amdgpu_virt_free_mm_table(adev);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+index e0c0c3734432e..a0956d8623770 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+@@ -22,6 +22,7 @@
+ */
+
+ #include <linux/firmware.h>
++#include <drm/drm_drv.h>
+
+ #include "amdgpu.h"
+ #include "amdgpu_vcn.h"
+@@ -233,17 +234,21 @@ static int vcn_v2_5_sw_init(void *handle)
+ */
+ static int vcn_v2_5_sw_fini(void *handle)
+ {
+- int i, r;
++ int i, r, idx;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ volatile struct amdgpu_fw_shared *fw_shared;
+
+- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+- if (adev->vcn.harvest_config & (1 << i))
+- continue;
+- fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
+- fw_shared->present_flag_0 = 0;
++ if (drm_dev_enter(&adev->ddev, &idx)) {
++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
++ if (adev->vcn.harvest_config & (1 << i))
++ continue;
++ fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
++ fw_shared->present_flag_0 = 0;
++ }
++ drm_dev_exit(idx);
+ }
+
++
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+index 3d18aab88b4e2..1310617f030f7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+@@ -601,8 +601,8 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+- WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+- UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
++ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
++ UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ }
+
+ static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+@@ -1508,8 +1508,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
+
+ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+ {
++ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+ uint32_t tmp;
+
++ vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state);
++
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+index a9ca6988009e3..73728fa859970 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+@@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
+ }
+ }
+
++ if (!amdgpu_sriov_vf(adev))
++ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
++ adev->irq.ih.doorbell_index);
++
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+index f51dfc38ac656..ac34af4cb178c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+@@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
+ }
+ }
+
++ if (!amdgpu_sriov_vf(adev))
++ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
++ adev->irq.ih.doorbell_index);
++
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
+index fe9a7cc8d9eb0..b9555ba6d32fb 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vi.c
++++ b/drivers/gpu/drm/amd/amdgpu/vi.c
+@@ -542,8 +542,15 @@ static u32 vi_get_xclk(struct amdgpu_device *adev)
+ u32 reference_clock = adev->clock.spll.reference_freq;
+ u32 tmp;
+
+- if (adev->flags & AMD_IS_APU)
+- return reference_clock;
++ if (adev->flags & AMD_IS_APU) {
++ switch (adev->asic_type) {
++ case CHIP_STONEY:
++ /* vbios says 48Mhz, but the actual freq is 100Mhz */
++ return 10000;
++ default:
++ return reference_clock;
++ }
++ }
+
+ tmp = RREG32_SMC(ixCG_CLKPIN_CNTL_2);
+ if (REG_GET_FIELD(tmp, CG_CLKPIN_CNTL_2, MUX_TCLK_TO_XCLK))
+@@ -956,6 +963,10 @@ static int vi_asic_reset(struct amdgpu_device *adev)
+ {
+ int r;
+
++ /* APUs don't have full asic reset */
++ if (adev->flags & AMD_IS_APU)
++ return 0;
++
+ if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ dev_info(adev->dev, "BACO reset\n");
+ r = amdgpu_dpm_baco_reset(adev);
+@@ -1136,7 +1147,7 @@ static void vi_program_aspm(struct amdgpu_device *adev)
+ bool bL1SS = false;
+ bool bClkReqSupport = true;
+
+- if (!amdgpu_aspm)
++ if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
+ return;
+
+ if (adev->flags & AMD_IS_APU ||
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 86afd37b098d6..6688129df240e 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -1807,13 +1807,9 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+ if (!args->start_addr || !args->size)
+ return -EINVAL;
+
+- mutex_lock(&p->mutex);
+-
+ r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
+ args->attrs);
+
+- mutex_unlock(&p->mutex);
+-
+ return r;
+ }
+ #else
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+index cfedfb1e8596c..e574aa32a111d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -1060,6 +1060,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+ return -ENODEV;
+ /* same everything but the other direction */
+ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
++ if (!props2)
++ return -ENOMEM;
++
+ props2->node_from = id_to;
+ props2->node_to = id_from;
+ props2->kobj = NULL;
+@@ -1560,7 +1563,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
+ /* Fetch the CRAT table from ACPI */
+ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
+ if (status == AE_NOT_FOUND) {
+- pr_warn("CRAT table not found\n");
++ pr_info("CRAT table not found\n");
+ return -ENODATA;
+ } else if (ACPI_FAILURE(status)) {
+ const char *err = acpi_format_exception(status);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 4a416231b24c8..660eb7097cfc0 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -834,15 +834,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ }
+
+ /* Verify module parameters regarding mapped process number*/
+- if ((hws_max_conc_proc < 0)
+- || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
+- dev_err(kfd_device,
+- "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
+- hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
+- kfd->vm_info.vmid_num_kfd);
++ if (hws_max_conc_proc >= 0)
++ kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd);
++ else
+ kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
+- } else
+- kfd->max_proc_per_quantum = hws_max_conc_proc;
+
+ /* calculate max size of mqds needed for queues */
+ size = max_num_of_queues_per_device *
+@@ -916,6 +911,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ kfd_double_confirm_iommu_support(kfd);
+
+ if (kfd_iommu_device_init(kfd)) {
++ kfd->use_iommu_v2 = false;
+ dev_err(kfd_device, "Error initializing iommuv2\n");
+ goto device_iommu_error;
+ }
+@@ -924,6 +920,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
+
+ svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+
++ if(kgd2kfd_resume_iommu(kfd))
++ goto device_iommu_error;
++
+ if (kfd_resume(kfd))
+ goto kfd_resume_error;
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index f8fce9d05f50c..442857f3bde77 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -138,19 +138,33 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
+ }
+
+ static void increment_queue_count(struct device_queue_manager *dqm,
+- enum kfd_queue_type type)
++ struct qcm_process_device *qpd,
++ struct queue *q)
+ {
+ dqm->active_queue_count++;
+- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
++ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
++ q->properties.type == KFD_QUEUE_TYPE_DIQ)
+ dqm->active_cp_queue_count++;
++
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count++;
++ qpd->mapped_gws_queue = true;
++ }
+ }
+
+ static void decrement_queue_count(struct device_queue_manager *dqm,
+- enum kfd_queue_type type)
++ struct qcm_process_device *qpd,
++ struct queue *q)
+ {
+ dqm->active_queue_count--;
+- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
++ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
++ q->properties.type == KFD_QUEUE_TYPE_DIQ)
+ dqm->active_cp_queue_count--;
++
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count--;
++ qpd->mapped_gws_queue = false;
++ }
+ }
+
+ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+@@ -390,7 +404,7 @@ add_queue_to_list:
+ list_add(&q->list, &qpd->queues_list);
+ qpd->queue_count++;
+ if (q->properties.is_active)
+- increment_queue_count(dqm, q->properties.type);
++ increment_queue_count(dqm, qpd, q);
+
+ /*
+ * Unconditionally increment this counter, regardless of the queue's
+@@ -515,13 +529,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
+ deallocate_vmid(dqm, qpd, q);
+ }
+ qpd->queue_count--;
+- if (q->properties.is_active) {
+- decrement_queue_count(dqm, q->properties.type);
+- if (q->properties.is_gws) {
+- dqm->gws_queue_count--;
+- qpd->mapped_gws_queue = false;
+- }
+- }
++ if (q->properties.is_active)
++ decrement_queue_count(dqm, qpd, q);
+
+ return retval;
+ }
+@@ -613,12 +622,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+ * dqm->active_queue_count to determine whether a new runlist must be
+ * uploaded.
+ */
+- if (q->properties.is_active && !prev_active)
+- increment_queue_count(dqm, q->properties.type);
+- else if (!q->properties.is_active && prev_active)
+- decrement_queue_count(dqm, q->properties.type);
+-
+- if (q->gws && !q->properties.is_gws) {
++ if (q->properties.is_active && !prev_active) {
++ increment_queue_count(dqm, &pdd->qpd, q);
++ } else if (!q->properties.is_active && prev_active) {
++ decrement_queue_count(dqm, &pdd->qpd, q);
++ } else if (q->gws && !q->properties.is_gws) {
+ if (q->properties.is_active) {
+ dqm->gws_queue_count++;
+ pdd->qpd.mapped_gws_queue = true;
+@@ -680,11 +688,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
+ q->properties.is_active = false;
+- decrement_queue_count(dqm, q->properties.type);
+- if (q->properties.is_gws) {
+- dqm->gws_queue_count--;
+- qpd->mapped_gws_queue = false;
+- }
++ decrement_queue_count(dqm, qpd, q);
+
+ if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
+ continue;
+@@ -730,7 +734,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
+ continue;
+
+ q->properties.is_active = false;
+- decrement_queue_count(dqm, q->properties.type);
++ decrement_queue_count(dqm, qpd, q);
+ }
+ pdd->last_evict_timestamp = get_jiffies_64();
+ retval = execute_queues_cpsch(dqm,
+@@ -801,11 +805,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
+ q->properties.is_active = true;
+- increment_queue_count(dqm, q->properties.type);
+- if (q->properties.is_gws) {
+- dqm->gws_queue_count++;
+- qpd->mapped_gws_queue = true;
+- }
++ increment_queue_count(dqm, qpd, q);
+
+ if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
+ continue;
+@@ -863,7 +863,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
+ continue;
+
+ q->properties.is_active = true;
+- increment_queue_count(dqm, q->properties.type);
++ increment_queue_count(dqm, &pdd->qpd, q);
+ }
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+@@ -1225,6 +1225,11 @@ static int stop_cpsch(struct device_queue_manager *dqm)
+ bool hanging;
+
+ dqm_lock(dqm);
++ if (!dqm->sched_running) {
++ dqm_unlock(dqm);
++ return 0;
++ }
++
+ if (!dqm->is_hws_hang)
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ hanging = dqm->is_hws_hang || dqm->is_resetting;
+@@ -1260,7 +1265,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
+ dqm->total_queue_count);
+
+ list_add(&kq->list, &qpd->priv_queue_list);
+- increment_queue_count(dqm, kq->queue->properties.type);
++ increment_queue_count(dqm, qpd, kq->queue);
+ qpd->is_debug = true;
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ dqm_unlock(dqm);
+@@ -1274,7 +1279,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
+ {
+ dqm_lock(dqm);
+ list_del(&kq->list);
+- decrement_queue_count(dqm, kq->queue->properties.type);
++ decrement_queue_count(dqm, qpd, kq->queue);
+ qpd->is_debug = false;
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ /*
+@@ -1341,7 +1346,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ qpd->queue_count++;
+
+ if (q->properties.is_active) {
+- increment_queue_count(dqm, q->properties.type);
++ increment_queue_count(dqm, qpd, q);
+
+ execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+@@ -1543,15 +1548,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+ list_del(&q->list);
+ qpd->queue_count--;
+ if (q->properties.is_active) {
+- decrement_queue_count(dqm, q->properties.type);
++ decrement_queue_count(dqm, qpd, q);
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (retval == -ETIME)
+ qpd->reset_wavefronts = true;
+- if (q->properties.is_gws) {
+- dqm->gws_queue_count--;
+- qpd->mapped_gws_queue = false;
+- }
+ }
+
+ /*
+@@ -1742,7 +1743,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
+ /* Clean all kernel queues */
+ list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
+ list_del(&kq->list);
+- decrement_queue_count(dqm, kq->queue->properties.type);
++ decrement_queue_count(dqm, qpd, kq->queue);
+ qpd->is_debug = false;
+ dqm->total_queue_count--;
+ filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
+@@ -1755,13 +1756,8 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ deallocate_sdma_queue(dqm, q);
+
+- if (q->properties.is_active) {
+- decrement_queue_count(dqm, q->properties.type);
+- if (q->properties.is_gws) {
+- dqm->gws_queue_count--;
+- qpd->mapped_gws_queue = false;
+- }
+- }
++ if (q->properties.is_active)
++ decrement_queue_count(dqm, qpd, q);
+
+ dqm->total_queue_count--;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+index 3eea4edee355d..8b5c82af2acd7 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+@@ -528,14 +528,13 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
+ struct kfd_event_waiter *event_waiters;
+ uint32_t i;
+
+- event_waiters = kmalloc_array(num_events,
+- sizeof(struct kfd_event_waiter),
+- GFP_KERNEL);
++ event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
++ GFP_KERNEL);
++ if (!event_waiters)
++ return NULL;
+
+- for (i = 0; (event_waiters) && (i < num_events) ; i++) {
++ for (i = 0; i < num_events; i++)
+ init_wait(&event_waiters[i].wait);
+- event_waiters[i].activated = false;
+- }
+
+ return event_waiters;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+index 4a16e3c257b92..131d98c600eed 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+@@ -780,7 +780,7 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+ {
+ unsigned long addr = vmf->address;
+- struct vm_area_struct *vma;
++ struct svm_range_bo *svm_bo;
+ enum svm_work_list_ops op;
+ struct svm_range *parent;
+ struct svm_range *prange;
+@@ -788,24 +788,42 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+ struct mm_struct *mm;
+ int r = 0;
+
+- vma = vmf->vma;
+- mm = vma->vm_mm;
++ svm_bo = vmf->page->zone_device_data;
++ if (!svm_bo) {
++ pr_debug("failed get device page at addr 0x%lx\n", addr);
++ return VM_FAULT_SIGBUS;
++ }
++ if (!mmget_not_zero(svm_bo->eviction_fence->mm)) {
++ pr_debug("addr 0x%lx of process mm is detroyed\n", addr);
++ return VM_FAULT_SIGBUS;
++ }
+
+- p = kfd_lookup_process_by_mm(vma->vm_mm);
++ mm = svm_bo->eviction_fence->mm;
++ if (mm != vmf->vma->vm_mm)
++ pr_debug("addr 0x%lx is COW mapping in child process\n", addr);
++
++ p = kfd_lookup_process_by_mm(mm);
+ if (!p) {
+ pr_debug("failed find process at fault address 0x%lx\n", addr);
+- return VM_FAULT_SIGBUS;
++ r = VM_FAULT_SIGBUS;
++ goto out_mmput;
+ }
+- addr >>= PAGE_SHIFT;
++ if (READ_ONCE(p->svms.faulting_task) == current) {
++ pr_debug("skipping ram migration\n");
++ r = 0;
++ goto out_unref_process;
++ }
++
+ pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
++ addr >>= PAGE_SHIFT;
+
+ mutex_lock(&p->svms.lock);
+
+ prange = svm_range_from_addr(&p->svms, addr, &parent);
+ if (!prange) {
+- pr_debug("cannot find svm range at 0x%lx\n", addr);
++ pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
+ r = -EFAULT;
+- goto out;
++ goto out_unlock_svms;
+ }
+
+ mutex_lock(&parent->migrate_mutex);
+@@ -827,10 +845,10 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+ goto out_unlock_prange;
+ }
+
+- r = svm_migrate_vram_to_ram(prange, mm);
++ r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm);
+ if (r)
+- pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
+- prange, prange->start, prange->last);
++ pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
++ r, prange->svms, prange, prange->start, prange->last);
+
+ /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+ if (p->xnack_enabled && parent == prange)
+@@ -844,12 +862,13 @@ out_unlock_prange:
+ if (prange != parent)
+ mutex_unlock(&prange->migrate_mutex);
+ mutex_unlock(&parent->migrate_mutex);
+-out:
++out_unlock_svms:
+ mutex_unlock(&p->svms.lock);
+- kfd_unref_process(p);
+-
++out_unref_process:
+ pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
+-
++ kfd_unref_process(p);
++out_mmput:
++ mmput(mm);
+ return r ? VM_FAULT_SIGBUS : 0;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+index 7f4e102ff4bd3..ddaafcd7b8256 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+@@ -113,18 +113,19 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+ &(mqd_mem_obj->gtt_mem),
+ &(mqd_mem_obj->gpu_addr),
+ (void *)&(mqd_mem_obj->cpu_ptr), true);
++
++ if (retval) {
++ kfree(mqd_mem_obj);
++ return NULL;
++ }
+ } else {
+ retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
+ &mqd_mem_obj);
+- }
+-
+- if (retval) {
+- kfree(mqd_mem_obj);
+- return NULL;
++ if (retval)
++ return NULL;
+ }
+
+ return mqd_mem_obj;
+-
+ }
+
+ static void init_mqd(struct mqd_manager *mm, void **mqd,
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 6d8f9bb2d9057..47ec820cae72b 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -755,6 +755,7 @@ struct svm_range_list {
+ atomic_t evicted_ranges;
+ struct delayed_work restore_work;
+ DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
++ struct task_struct *faulting_task;
+ };
+
+ /* Process data */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+index ed4bc5f844ce7..766b3660c8c86 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+@@ -270,15 +270,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
+ return ret;
+ }
+
+- ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
+- O_RDWR);
+- if (ret < 0) {
+- kfifo_free(&client->fifo);
+- kfree(client);
+- return ret;
+- }
+- *fd = ret;
+-
+ init_waitqueue_head(&client->wait_queue);
+ spin_lock_init(&client->lock);
+ client->events = 0;
+@@ -288,5 +279,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
+ list_add_rcu(&client->list, &dev->smi_clients);
+ spin_unlock(&dev->smi_lock);
+
++ ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
++ O_RDWR);
++ if (ret < 0) {
++ spin_lock(&dev->smi_lock);
++ list_del_rcu(&client->list);
++ spin_unlock(&dev->smi_lock);
++
++ synchronize_rcu();
++
++ kfifo_free(&client->fifo);
++ kfree(client);
++ return ret;
++ }
++ *fd = ret;
++
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+index 9d0f65a90002d..22a70aaccf13c 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+@@ -936,7 +936,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
+ }
+
+ static int
+-svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
++svm_range_split_tail(struct svm_range *prange,
+ uint64_t new_last, struct list_head *insert_list)
+ {
+ struct svm_range *tail;
+@@ -948,7 +948,7 @@ svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
+ }
+
+ static int
+-svm_range_split_head(struct svm_range *prange, struct svm_range *new,
++svm_range_split_head(struct svm_range *prange,
+ uint64_t new_start, struct list_head *insert_list)
+ {
+ struct svm_range *head;
+@@ -1307,7 +1307,7 @@ struct svm_validate_context {
+ struct svm_range *prange;
+ bool intr;
+ unsigned long bitmap[MAX_GPU_INSTANCE];
+- struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1];
++ struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
+ struct list_head validate_list;
+ struct ww_acquire_ctx ticket;
+ };
+@@ -1334,11 +1334,6 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
+ ctx->tv[gpuidx].num_shared = 4;
+ list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
+ }
+- if (ctx->prange->svm_bo && ctx->prange->ttm_res) {
+- ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo;
+- ctx->tv[MAX_GPU_INSTANCE].num_shared = 1;
+- list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list);
+- }
+
+ r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
+ ctx->intr, NULL);
+@@ -1494,9 +1489,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
+
+ next = min(vma->vm_end, end);
+ npages = (next - addr) >> PAGE_SHIFT;
++ WRITE_ONCE(p->svms.faulting_task, current);
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+ addr, npages, &hmm_range,
+ readonly, true, owner);
++ WRITE_ONCE(p->svms.faulting_task, NULL);
+ if (r) {
+ pr_debug("failed %d to get svm range pages\n", r);
+ goto unreserve_out;
+@@ -1570,7 +1567,6 @@ retry_flush_work:
+ static void svm_range_restore_work(struct work_struct *work)
+ {
+ struct delayed_work *dwork = to_delayed_work(work);
+- struct amdkfd_process_info *process_info;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct kfd_process *p;
+@@ -1590,12 +1586,10 @@ static void svm_range_restore_work(struct work_struct *work)
+ * the lifetime of this thread, kfd_process and mm will be valid.
+ */
+ p = container_of(svms, struct kfd_process, svms);
+- process_info = p->kgd_process_info;
+ mm = p->mm;
+ if (!mm)
+ return;
+
+- mutex_lock(&process_info->lock);
+ svm_range_list_lock_and_flush_work(svms, mm);
+ mutex_lock(&svms->lock);
+
+@@ -1648,7 +1642,6 @@ static void svm_range_restore_work(struct work_struct *work)
+ out_reschedule:
+ mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+- mutex_unlock(&process_info->lock);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_ranges) {
+@@ -1764,49 +1757,54 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
+ }
+
+ /**
+- * svm_range_handle_overlap - split overlap ranges
+- * @svms: svm range list header
+- * @new: range added with this attributes
+- * @start: range added start address, in pages
+- * @last: range last address, in pages
+- * @update_list: output, the ranges attributes are updated. For set_attr, this
+- * will do validation and map to GPUs. For unmap, this will be
+- * removed and unmap from GPUs
+- * @insert_list: output, the ranges will be inserted into svms, attributes are
+- * not changes. For set_attr, this will add into svms.
+- * @remove_list:output, the ranges will be removed from svms
+- * @left: the remaining range after overlap, For set_attr, this will be added
+- * as new range.
++ * svm_range_add - add svm range and handle overlap
++ * @p: the range add to this process svms
++ * @start: page size aligned
++ * @size: page size aligned
++ * @nattr: number of attributes
++ * @attrs: array of attributes
++ * @update_list: output, the ranges need validate and update GPU mapping
++ * @insert_list: output, the ranges need insert to svms
++ * @remove_list: output, the ranges are replaced and need remove from svms
+ *
+- * Total have 5 overlap cases.
++ * Check if the virtual address range has overlap with any existing ranges,
++ * split partly overlapping ranges and add new ranges in the gaps. All changes
++ * should be applied to the range_list and interval tree transactionally. If
++ * any range split or allocation fails, the entire update fails. Therefore any
++ * existing overlapping svm_ranges are cloned and the original svm_ranges left
++ * unchanged.
+ *
+- * This function handles overlap of an address interval with existing
+- * struct svm_ranges for applying new attributes. This may require
+- * splitting existing struct svm_ranges. All changes should be applied to
+- * the range_list and interval tree transactionally. If any split operation
+- * fails, the entire update fails. Therefore the existing overlapping
+- * svm_ranges are cloned and the original svm_ranges left unchanged. If the
+- * transaction succeeds, the modified clones are added and the originals
+- * freed. Otherwise the clones are removed and the old svm_ranges remain.
++ * If the transaction succeeds, the caller can update and insert clones and
++ * new ranges, then free the originals.
+ *
+- * Context: The caller must hold svms->lock
++ * Otherwise the caller can free the clones and new ranges, while the old
++ * svm_ranges remain unchanged.
++ *
++ * Context: Process context, caller must hold svms->lock
++ *
++ * Return:
++ * 0 - OK, otherwise error code
+ */
+ static int
+-svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
+- unsigned long start, unsigned long last,
+- struct list_head *update_list,
+- struct list_head *insert_list,
+- struct list_head *remove_list,
+- unsigned long *left)
++svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
++ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
++ struct list_head *update_list, struct list_head *insert_list,
++ struct list_head *remove_list)
+ {
++ unsigned long last = start + size - 1UL;
++ struct svm_range_list *svms = &p->svms;
+ struct interval_tree_node *node;
++ struct svm_range new = {0};
+ struct svm_range *prange;
+ struct svm_range *tmp;
+ int r = 0;
+
++ pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
++
+ INIT_LIST_HEAD(update_list);
+ INIT_LIST_HEAD(insert_list);
+ INIT_LIST_HEAD(remove_list);
++ svm_range_apply_attrs(p, &new, nattr, attrs);
+
+ node = interval_tree_iter_first(&svms->objects, start, last);
+ while (node) {
+@@ -1834,14 +1832,14 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
+
+ if (node->start < start) {
+ pr_debug("change old range start\n");
+- r = svm_range_split_head(prange, new, start,
++ r = svm_range_split_head(prange, start,
+ insert_list);
+ if (r)
+ goto out;
+ }
+ if (node->last > last) {
+ pr_debug("change old range last\n");
+- r = svm_range_split_tail(prange, new, last,
++ r = svm_range_split_tail(prange, last,
+ insert_list);
+ if (r)
+ goto out;
+@@ -1853,7 +1851,7 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
+ prange = old;
+ }
+
+- if (!svm_range_is_same_attrs(prange, new))
++ if (!svm_range_is_same_attrs(prange, &new))
+ list_add(&prange->update_list, update_list);
+
+ /* insert a new node if needed */
+@@ -1873,8 +1871,16 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
+ start = next_start;
+ }
+
+- if (left && start <= last)
+- *left = last - start + 1;
++ /* add a final range at the end if needed */
++ if (start <= last) {
++ prange = svm_range_new(svms, start, last);
++ if (!prange) {
++ r = -ENOMEM;
++ goto out;
++ }
++ list_add(&prange->insert_list, insert_list);
++ list_add(&prange->update_list, update_list);
++ }
+
+ out:
+ if (r)
+@@ -2177,6 +2183,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+
+ if (range->event == MMU_NOTIFY_RELEASE)
+ return true;
++ if (!mmget_not_zero(mni->mm))
++ return true;
+
+ start = mni->interval_tree.start;
+ last = mni->interval_tree.last;
+@@ -2203,6 +2211,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+ }
+
+ svm_range_unlock(prange);
++ mmput(mni->mm);
+
+ return true;
+ }
+@@ -2702,59 +2711,6 @@ svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size)
+ return true;
+ }
+
+-/**
+- * svm_range_add - add svm range and handle overlap
+- * @p: the range add to this process svms
+- * @start: page size aligned
+- * @size: page size aligned
+- * @nattr: number of attributes
+- * @attrs: array of attributes
+- * @update_list: output, the ranges need validate and update GPU mapping
+- * @insert_list: output, the ranges need insert to svms
+- * @remove_list: output, the ranges are replaced and need remove from svms
+- *
+- * Check if the virtual address range has overlap with the registered ranges,
+- * split the overlapped range, copy and adjust pages address and vram nodes in
+- * old and new ranges.
+- *
+- * Context: Process context, caller must hold svms->lock
+- *
+- * Return:
+- * 0 - OK, otherwise error code
+- */
+-static int
+-svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+- struct list_head *update_list, struct list_head *insert_list,
+- struct list_head *remove_list)
+-{
+- uint64_t last = start + size - 1UL;
+- struct svm_range_list *svms;
+- struct svm_range new = {0};
+- struct svm_range *prange;
+- unsigned long left = 0;
+- int r = 0;
+-
+- pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
+-
+- svm_range_apply_attrs(p, &new, nattr, attrs);
+-
+- svms = &p->svms;
+-
+- r = svm_range_handle_overlap(svms, &new, start, last, update_list,
+- insert_list, remove_list, &left);
+- if (r)
+- return r;
+-
+- if (left) {
+- prange = svm_range_new(svms, last - left + 1, last);
+- list_add(&prange->insert_list, insert_list);
+- list_add(&prange->update_list, update_list);
+- }
+-
+- return 0;
+-}
+-
+ /**
+ * svm_range_best_prefetch_location - decide the best prefetch location
+ * @prange: svm range structure
+@@ -2979,7 +2935,6 @@ static int
+ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+ {
+- struct amdkfd_process_info *process_info = p->kgd_process_info;
+ struct mm_struct *mm = current->mm;
+ struct list_head update_list;
+ struct list_head insert_list;
+@@ -2998,8 +2953,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+
+ svms = &p->svms;
+
+- mutex_lock(&process_info->lock);
+-
+ svm_range_list_lock_and_flush_work(svms, mm);
+
+ if (!svm_range_is_valid(mm, start, size)) {
+@@ -3075,8 +3028,6 @@ out_unlock_range:
+ mutex_unlock(&svms->lock);
+ mmap_read_unlock(mm);
+ out:
+- mutex_unlock(&process_info->lock);
+-
+ pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
+ &p->svms, start, start + size - 1, r);
+
+diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
+index 127667e549c19..f25a2c80afcfd 100644
+--- a/drivers/gpu/drm/amd/display/Kconfig
++++ b/drivers/gpu/drm/amd/display/Kconfig
+@@ -5,6 +5,7 @@ menu "Display Engine Configuration"
+ config DRM_AMD_DC
+ bool "AMD DC - Enable new display engine"
+ default y
++ depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64
+ select SND_HDA_COMPONENT if SND_HDA_CORE
+ select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+ help
+@@ -12,6 +13,12 @@ config DRM_AMD_DC
+ support for AMDGPU. This adds required support for Vega and
+ Raven ASICs.
+
++ calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
++ architectures built with Clang (all released versions), whereby the stack
++ frame gets blown up to well over 5k. This would cause an immediate kernel
++ panic on most architectures. We'll revert this when the following bug report
++ has been resolved: https://github.com/llvm/llvm-project/issues/41896.
++
+ config DRM_AMD_DC_DCN
+ def_bool n
+ help
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index 1ea31dcc7a8b0..4cf33abfb7cca 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -70,6 +70,7 @@
+ #include <linux/pci.h>
+ #include <linux/firmware.h>
+ #include <linux/component.h>
++#include <linux/dmi.h>
+
+ #include <drm/drm_atomic.h>
+ #include <drm/drm_atomic_uapi.h>
+@@ -215,6 +216,8 @@ static void handle_cursor_update(struct drm_plane *plane,
+ static const struct drm_format_info *
+ amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd);
+
++static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector);
++
+ static bool
+ is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
+ struct drm_crtc_state *new_crtc_state);
+@@ -350,6 +353,35 @@ static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
+ return false;
+ }
+
++/**
++ * update_planes_and_stream_adapter() - Send planes to be updated in DC
++ *
++ * DC has a generic way to update planes and stream via
++ * dc_update_planes_and_stream function; however, DM might need some
++ * adjustments and preparation before calling it. This function is a wrapper
++ * for the dc_update_planes_and_stream that does any required configuration
++ * before passing control to DC.
++ */
++static inline bool update_planes_and_stream_adapter(struct dc *dc,
++ int update_type,
++ int planes_count,
++ struct dc_stream_state *stream,
++ struct dc_stream_update *stream_update,
++ struct dc_surface_update *array_of_surface_update)
++{
++ /*
++ * Previous frame finished and HW is ready for optimization.
++ */
++ if (update_type == UPDATE_TYPE_FAST)
++ dc_post_update_surfaces_to_stream(dc);
++
++ return dc_update_planes_and_stream(dc,
++ array_of_surface_update,
++ planes_count,
++ stream,
++ stream_update);
++}
++
+ /**
+ * dm_pflip_high_irq() - Handle pageflip interrupt
+ * @interrupt_params: ignored
+@@ -618,6 +650,113 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params)
+ }
+ #endif
+
++/**
++ * dmub_aux_setconfig_reply_callback - Callback for AUX or SET_CONFIG command.
++ * @adev: amdgpu_device pointer
++ * @notify: dmub notification structure
++ *
++ * Dmub AUX or SET_CONFIG command completion processing callback
++ * Copies dmub notification to DM which is to be read by AUX command.
++ * issuing thread and also signals the event to wake up the thread.
++ */
++void dmub_aux_setconfig_callback(struct amdgpu_device *adev, struct dmub_notification *notify)
++{
++ if (adev->dm.dmub_notify)
++ memcpy(adev->dm.dmub_notify, notify, sizeof(struct dmub_notification));
++ if (notify->type == DMUB_NOTIFICATION_AUX_REPLY)
++ complete(&adev->dm.dmub_aux_transfer_done);
++}
++
++/**
++ * dmub_hpd_callback - DMUB HPD interrupt processing callback.
++ * @adev: amdgpu_device pointer
++ * @notify: dmub notification structure
++ *
++ * Dmub Hpd interrupt processing callback. Gets displayindex through the
++ * ink index and calls helper to do the processing.
++ */
++void dmub_hpd_callback(struct amdgpu_device *adev, struct dmub_notification *notify)
++{
++ struct amdgpu_dm_connector *aconnector;
++ struct drm_connector *connector;
++ struct drm_connector_list_iter iter;
++ struct dc_link *link;
++ uint8_t link_index = 0;
++ struct drm_device *dev;
++
++ if (adev == NULL)
++ return;
++
++ if (notify == NULL) {
++ DRM_ERROR("DMUB HPD callback notification was NULL");
++ return;
++ }
++
++ if (notify->link_index > adev->dm.dc->link_count) {
++ DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index);
++ return;
++ }
++
++ link_index = notify->link_index;
++ link = adev->dm.dc->links[link_index];
++ dev = adev->dm.ddev;
++
++ drm_connector_list_iter_begin(dev, &iter);
++ drm_for_each_connector_iter(connector, &iter) {
++ aconnector = to_amdgpu_dm_connector(connector);
++ if (link && aconnector->dc_link == link) {
++ DRM_INFO("DMUB HPD callback: link_index=%u\n", link_index);
++ handle_hpd_irq_helper(aconnector);
++ break;
++ }
++ }
++ drm_connector_list_iter_end(&iter);
++
++}
++
++/**
++ * register_dmub_notify_callback - Sets callback for DMUB notify
++ * @adev: amdgpu_device pointer
++ * @type: Type of dmub notification
++ * @callback: Dmub interrupt callback function
++ * @dmub_int_thread_offload: offload indicator
++ *
++ * API to register a dmub callback handler for a dmub notification
++ * Also sets indicator whether callback processing to be offloaded.
++ * to dmub interrupt handling thread
++ * Return: true if successfully registered, false if there is existing registration
++ */
++bool register_dmub_notify_callback(struct amdgpu_device *adev, enum dmub_notification_type type,
++dmub_notify_interrupt_callback_t callback, bool dmub_int_thread_offload)
++{
++ if (callback != NULL && type < ARRAY_SIZE(adev->dm.dmub_thread_offload)) {
++ adev->dm.dmub_callback[type] = callback;
++ adev->dm.dmub_thread_offload[type] = dmub_int_thread_offload;
++ } else
++ return false;
++
++ return true;
++}
++
++static void dm_handle_hpd_work(struct work_struct *work)
++{
++ struct dmub_hpd_work *dmub_hpd_wrk;
++
++ dmub_hpd_wrk = container_of(work, struct dmub_hpd_work, handle_hpd_work);
++
++ if (!dmub_hpd_wrk->dmub_notify) {
++ DRM_ERROR("dmub_hpd_wrk dmub_notify is NULL");
++ return;
++ }
++
++ if (dmub_hpd_wrk->dmub_notify->type < ARRAY_SIZE(dmub_hpd_wrk->adev->dm.dmub_callback)) {
++ dmub_hpd_wrk->adev->dm.dmub_callback[dmub_hpd_wrk->dmub_notify->type](dmub_hpd_wrk->adev,
++ dmub_hpd_wrk->dmub_notify);
++ }
++ kfree(dmub_hpd_wrk);
++
++}
++
+ #define DMUB_TRACE_MAX_READ 64
+ /**
+ * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
+@@ -634,18 +773,33 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct dmcub_trace_buf_entry entry = { 0 };
+ uint32_t count = 0;
++ struct dmub_hpd_work *dmub_hpd_wrk;
+
+ if (dc_enable_dmub_notifications(adev->dm.dc)) {
++ dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC);
++ if (!dmub_hpd_wrk) {
++ DRM_ERROR("Failed to allocate dmub_hpd_wrk");
++ return;
++ }
++ INIT_WORK(&dmub_hpd_wrk->handle_hpd_work, dm_handle_hpd_work);
++
+ if (irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) {
+ do {
+ dc_stat_get_dmub_notification(adev->dm.dc, &notify);
+- } while (notify.pending_notification);
++ if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) {
++ DRM_ERROR("DM: notify type %d larger than the array size %zu!", notify.type,
++ ARRAY_SIZE(dm->dmub_thread_offload));
++ continue;
++ }
++ if (dm->dmub_thread_offload[notify.type] == true) {
++ dmub_hpd_wrk->dmub_notify = &notify;
++ dmub_hpd_wrk->adev = adev;
++ queue_work(adev->dm.delayed_hpd_wq, &dmub_hpd_wrk->handle_hpd_work);
++ } else {
++ dm->dmub_callback[notify.type](adev, &notify);
++ }
+
+- if (adev->dm.dmub_notify)
+- memcpy(adev->dm.dmub_notify, &notify, sizeof(struct dmub_notification));
+- if (notify.type == DMUB_NOTIFICATION_AUX_REPLY)
+- complete(&adev->dm.dmub_aux_transfer_done);
+- // TODO : HPD Implementation
++ } while (notify.pending_notification);
+
+ } else {
+ DRM_ERROR("DM: Failed to receive correct outbox IRQ !");
+@@ -900,6 +1054,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
+ return 0;
+ }
+
++ /* Reset DMCUB if it was previously running - before we overwrite its memory. */
++ status = dmub_srv_hw_reset(dmub_srv);
++ if (status != DMUB_STATUS_OK)
++ DRM_WARN("Error resetting DMUB HW: %d\n", status);
++
+ hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data;
+
+ fw_inst_const = dmub_fw->data +
+@@ -989,6 +1148,32 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
+ return 0;
+ }
+
++static void dm_dmub_hw_resume(struct amdgpu_device *adev)
++{
++ struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
++ enum dmub_status status;
++ bool init;
++
++ if (!dmub_srv) {
++ /* DMUB isn't supported on the ASIC. */
++ return;
++ }
++
++ status = dmub_srv_is_hw_init(dmub_srv, &init);
++ if (status != DMUB_STATUS_OK)
++ DRM_WARN("DMUB hardware init check failed: %d\n", status);
++
++ if (status == DMUB_STATUS_OK && init) {
++ /* Wait for firmware load to finish. */
++ status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
++ if (status != DMUB_STATUS_OK)
++ DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
++ } else {
++ /* Perform the full hardware initialization. */
++ dm_dmub_hw_init(adev);
++ }
++}
++
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
+ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config)
+ {
+@@ -1083,6 +1268,194 @@ static void vblank_control_worker(struct work_struct *work)
+ }
+
+ #endif
++
++static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
++{
++ struct hpd_rx_irq_offload_work *offload_work;
++ struct amdgpu_dm_connector *aconnector;
++ struct dc_link *dc_link;
++ struct amdgpu_device *adev;
++ enum dc_connection_type new_connection_type = dc_connection_none;
++ unsigned long flags;
++
++ offload_work = container_of(work, struct hpd_rx_irq_offload_work, work);
++ aconnector = offload_work->offload_wq->aconnector;
++
++ if (!aconnector) {
++ DRM_ERROR("Can't retrieve aconnector in hpd_rx_irq_offload_work");
++ goto skip;
++ }
++
++ adev = drm_to_adev(aconnector->base.dev);
++ dc_link = aconnector->dc_link;
++
++ mutex_lock(&aconnector->hpd_lock);
++ if (!dc_link_detect_sink(dc_link, &new_connection_type))
++ DRM_ERROR("KMS: Failed to detect connector\n");
++ mutex_unlock(&aconnector->hpd_lock);
++
++ if (new_connection_type == dc_connection_none)
++ goto skip;
++
++ if (amdgpu_in_reset(adev))
++ goto skip;
++
++ mutex_lock(&adev->dm.dc_lock);
++ if (offload_work->data.bytes.device_service_irq.bits.AUTOMATED_TEST)
++ dc_link_dp_handle_automated_test(dc_link);
++ else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) &&
++ hpd_rx_irq_check_link_loss_status(dc_link, &offload_work->data) &&
++ dc_link_dp_allow_hpd_rx_irq(dc_link)) {
++ dc_link_dp_handle_link_loss(dc_link);
++ spin_lock_irqsave(&offload_work->offload_wq->offload_lock, flags);
++ offload_work->offload_wq->is_handling_link_loss = false;
++ spin_unlock_irqrestore(&offload_work->offload_wq->offload_lock, flags);
++ }
++ mutex_unlock(&adev->dm.dc_lock);
++
++skip:
++ kfree(offload_work);
++
++}
++
++static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct dc *dc)
++{
++ int max_caps = dc->caps.max_links;
++ int i = 0;
++ struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq = NULL;
++
++ hpd_rx_offload_wq = kcalloc(max_caps, sizeof(*hpd_rx_offload_wq), GFP_KERNEL);
++
++ if (!hpd_rx_offload_wq)
++ return NULL;
++
++
++ for (i = 0; i < max_caps; i++) {
++ hpd_rx_offload_wq[i].wq =
++ create_singlethread_workqueue("amdgpu_dm_hpd_rx_offload_wq");
++
++ if (hpd_rx_offload_wq[i].wq == NULL) {
++ DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!");
++ goto out_err;
++ }
++
++ spin_lock_init(&hpd_rx_offload_wq[i].offload_lock);
++ }
++
++ return hpd_rx_offload_wq;
++
++out_err:
++ for (i = 0; i < max_caps; i++) {
++ if (hpd_rx_offload_wq[i].wq)
++ destroy_workqueue(hpd_rx_offload_wq[i].wq);
++ }
++ kfree(hpd_rx_offload_wq);
++ return NULL;
++}
++
++struct amdgpu_stutter_quirk {
++ u16 chip_vendor;
++ u16 chip_device;
++ u16 subsys_vendor;
++ u16 subsys_device;
++ u8 revision;
++};
++
++static const struct amdgpu_stutter_quirk amdgpu_stutter_quirk_list[] = {
++ /* https://bugzilla.kernel.org/show_bug.cgi?id=214417 */
++ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
++ { 0, 0, 0, 0, 0 },
++};
++
++static bool dm_should_disable_stutter(struct pci_dev *pdev)
++{
++ const struct amdgpu_stutter_quirk *p = amdgpu_stutter_quirk_list;
++
++ while (p && p->chip_device != 0) {
++ if (pdev->vendor == p->chip_vendor &&
++ pdev->device == p->chip_device &&
++ pdev->subsystem_vendor == p->subsys_vendor &&
++ pdev->subsystem_device == p->subsys_device &&
++ pdev->revision == p->revision) {
++ return true;
++ }
++ ++p;
++ }
++ return false;
++}
++
++static const struct dmi_system_id hpd_disconnect_quirk_table[] = {
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"),
++ },
++ },
++ {}
++ /* TODO: refactor this from a fixed table to a dynamic option */
++};
++
++static void retrieve_dmi_info(struct amdgpu_display_manager *dm)
++{
++ const struct dmi_system_id *dmi_id;
++
++ dm->aux_hpd_discon_quirk = false;
++
++ dmi_id = dmi_first_match(hpd_disconnect_quirk_table);
++ if (dmi_id) {
++ dm->aux_hpd_discon_quirk = true;
++ DRM_INFO("aux_hpd_discon_quirk attached\n");
++ }
++}
++
+ static int amdgpu_dm_init(struct amdgpu_device *adev)
+ {
+ struct dc_init_data init_data;
+@@ -1141,8 +1514,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
+ case CHIP_RAVEN:
+ case CHIP_RENOIR:
+ init_data.flags.gpu_vm_support = true;
+- if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
++ switch (adev->dm.dmcub_fw_version) {
++ case 0: /* development */
++ case 0x1: /* linux-firmware.git hash 6d9f399 */
++ case 0x01000000: /* linux-firmware.git hash 9a0b0f4 */
++ init_data.flags.disable_dmcu = false;
++ break;
++ default:
+ init_data.flags.disable_dmcu = true;
++ }
+ break;
+ case CHIP_VANGOGH:
+ case CHIP_YELLOW_CARP:
+@@ -1167,6 +1547,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
+ init_data.flags.power_down_display_on_boot = true;
+
+ INIT_LIST_HEAD(&adev->dm.da_list);
++
++ retrieve_dmi_info(&adev->dm);
++
+ /* Display Core create. */
+ adev->dm.dc = dc_create(&init_data);
+
+@@ -1184,6 +1567,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
+
+ if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY)
+ adev->dm.dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true;
++ if (dm_should_disable_stutter(adev->pdev))
++ adev->dm.dc->debug.disable_stutter = true;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER)
+ adev->dm.dc->debug.disable_stutter = true;
+@@ -1202,6 +1587,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
+
+ dc_hardware_init(adev->dm.dc);
+
++ adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev->dm.dc);
++ if (!adev->dm.hpd_rx_offload_wq) {
++ DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n");
++ goto error;
++ }
++
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
+ if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) {
+ struct dc_phy_addr_space_config pa_config;
+@@ -1254,7 +1645,25 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
+ DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
+ goto error;
+ }
++
++ adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq");
++ if (!adev->dm.delayed_hpd_wq) {
++ DRM_ERROR("amdgpu: failed to create hpd offload workqueue.\n");
++ goto error;
++ }
++
+ amdgpu_dm_outbox_init(adev);
++#if defined(CONFIG_DRM_AMD_DC_DCN)
++ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY,
++ dmub_aux_setconfig_callback, false)) {
++ DRM_ERROR("amdgpu: fail to register dmub aux callback");
++ goto error;
++ }
++ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
++ DRM_ERROR("amdgpu: fail to register dmub hpd callback");
++ goto error;
++ }
++#endif
+ }
+
+ if (amdgpu_dm_initialize_drm_device(adev)) {
+@@ -1308,10 +1717,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
+ }
+ #endif
+
+- for (i = 0; i < adev->dm.display_indexes_num; i++) {
+- drm_encoder_cleanup(&adev->dm.mst_encoders[i].base);
+- }
+-
+ amdgpu_dm_destroy_drm_device(&adev->dm);
+
+ #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+@@ -1331,11 +1736,14 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
+ dc_deinit_callbacks(adev->dm.dc);
+ #endif
+
+- dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv);
++ if (adev->dm.dc)
++ dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv);
+
+ if (dc_enable_dmub_notifications(adev->dm.dc)) {
+ kfree(adev->dm.dmub_notify);
+ adev->dm.dmub_notify = NULL;
++ destroy_workqueue(adev->dm.delayed_hpd_wq);
++ adev->dm.delayed_hpd_wq = NULL;
+ }
+
+ if (adev->dm.dmub_bo)
+@@ -1361,6 +1769,18 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
+ adev->dm.freesync_module = NULL;
+ }
+
++ if (adev->dm.hpd_rx_offload_wq) {
++ for (i = 0; i < adev->dm.dc->caps.max_links; i++) {
++ if (adev->dm.hpd_rx_offload_wq[i].wq) {
++ destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq);
++ adev->dm.hpd_rx_offload_wq[i].wq = NULL;
++ }
++ }
++
++ kfree(adev->dm.hpd_rx_offload_wq);
++ adev->dm.hpd_rx_offload_wq = NULL;
++ }
++
+ mutex_destroy(&adev->dm.audio_lock);
+ mutex_destroy(&adev->dm.dc_lock);
+
+@@ -1980,6 +2400,16 @@ context_alloc_fail:
+ return res;
+ }
+
++static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm)
++{
++ int i;
++
++ if (dm->hpd_rx_offload_wq) {
++ for (i = 0; i < dm->dc->caps.max_links; i++)
++ flush_workqueue(dm->hpd_rx_offload_wq[i].wq);
++ }
++}
++
+ static int dm_suspend(void *handle)
+ {
+ struct amdgpu_device *adev = handle;
+@@ -2001,6 +2431,8 @@ static int dm_suspend(void *handle)
+
+ amdgpu_dm_irq_suspend(adev);
+
++ hpd_rx_irq_work_suspend(dm);
++
+ return ret;
+ }
+
+@@ -2011,6 +2443,8 @@ static int dm_suspend(void *handle)
+
+ amdgpu_dm_irq_suspend(adev);
+
++ hpd_rx_irq_work_suspend(dm);
++
+ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+
+ return 0;
+@@ -2145,10 +2579,13 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
+ bundle->surface_updates[m].surface->force_full_update =
+ true;
+ }
+- dc_commit_updates_for_stream(
+- dm->dc, bundle->surface_updates,
+- dc_state->stream_status->plane_count,
+- dc_state->streams[k], &bundle->stream_update, dc_state);
++
++ update_planes_and_stream_adapter(dm->dc,
++ UPDATE_TYPE_FULL,
++ dc_state->stream_status->plane_count,
++ dc_state->streams[k],
++ &bundle->stream_update,
++ bundle->surface_updates);
+ }
+
+ cleanup:
+@@ -2206,6 +2643,9 @@ static int dm_resume(void *handle)
+ if (amdgpu_in_reset(adev)) {
+ dc_state = dm->cached_dc_state;
+
++ if (dc_enable_dmub_notifications(adev->dm.dc))
++ amdgpu_dm_outbox_init(adev);
++
+ r = dm_dmub_hw_init(adev);
+ if (r)
+ DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+@@ -2217,8 +2657,8 @@ static int dm_resume(void *handle)
+
+ for (i = 0; i < dc_state->stream_count; i++) {
+ dc_state->streams[i]->mode_changed = true;
+- for (j = 0; j < dc_state->stream_status->plane_count; j++) {
+- dc_state->stream_status->plane_states[j]->update_flags.raw
++ for (j = 0; j < dc_state->stream_status[i].plane_count; j++) {
++ dc_state->stream_status[i].plane_states[j]->update_flags.raw
+ = 0xffffffff;
+ }
+ }
+@@ -2253,10 +2693,12 @@ static int dm_resume(void *handle)
+ /* TODO: Remove dc_state->dccg, use dc->dccg directly. */
+ dc_resource_state_construct(dm->dc, dm_state->context);
+
++ /* Re-enable outbox interrupts for DPIA. */
++ if (dc_enable_dmub_notifications(adev->dm.dc))
++ amdgpu_dm_outbox_init(adev);
++
+ /* Before powering on DC we need to re-initialize DMUB. */
+- r = dm_dmub_hw_init(adev);
+- if (r)
+- DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
++ dm_dmub_hw_resume(adev);
+
+ /* power on hardware */
+ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
+@@ -2278,11 +2720,14 @@ static int dm_resume(void *handle)
+ drm_for_each_connector_iter(connector, &iter) {
+ aconnector = to_amdgpu_dm_connector(connector);
+
++ if (!aconnector->dc_link)
++ continue;
++
+ /*
+ * this is the case when traversing through already created
+ * MST connectors, should be skipped
+ */
+- if (aconnector->mst_port)
++ if (aconnector->dc_link->type == dc_connection_mst_branch)
+ continue;
+
+ mutex_lock(&aconnector->hpd_lock);
+@@ -2402,7 +2847,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
+
+ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
+ {
+- u32 max_cll, min_cll, max, min, q, r;
++ u32 max_avg, min_cll, max, min, q, r;
+ struct amdgpu_dm_backlight_caps *caps;
+ struct amdgpu_display_manager *dm;
+ struct drm_connector *conn_base;
+@@ -2432,7 +2877,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
+ caps = &dm->backlight_caps[i];
+ caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
+ caps->aux_support = false;
+- max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
++ max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall;
+ min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
+
+ if (caps->ext_caps->bits.oled == 1 /*||
+@@ -2460,8 +2905,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
+ * The results of the above expressions can be verified at
+ * pre_computed_values.
+ */
+- q = max_cll >> 5;
+- r = max_cll % 32;
++ q = max_avg >> 5;
++ r = max_avg % 32;
+ max = (1 << q) * pre_computed_values[r];
+
+ // min luminance: maxLum * (CV/255)^2 / 100
+@@ -2583,13 +3028,12 @@ void amdgpu_dm_update_connector_after_detect(
+ aconnector->edid =
+ (struct edid *)sink->dc_edid.raw_edid;
+
+- drm_connector_update_edid_property(connector,
+- aconnector->edid);
+ if (aconnector->dc_link->aux_mode)
+ drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
+ aconnector->edid);
+ }
+
++ drm_connector_update_edid_property(connector, aconnector->edid);
+ amdgpu_dm_update_freesync_caps(connector, aconnector->edid);
+ update_connector_ext_caps(aconnector);
+ } else {
+@@ -2615,9 +3059,8 @@ void amdgpu_dm_update_connector_after_detect(
+ dc_sink_release(sink);
+ }
+
+-static void handle_hpd_irq(void *param)
++static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
+ {
+- struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param;
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_device *dev = connector->dev;
+ enum dc_connection_type new_connection_type = dc_connection_none;
+@@ -2676,7 +3119,15 @@ static void handle_hpd_irq(void *param)
+
+ }
+
+-static void dm_handle_hpd_rx_irq(struct amdgpu_dm_connector *aconnector)
++static void handle_hpd_irq(void *param)
++{
++ struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param;
++
++ handle_hpd_irq_helper(aconnector);
++
++}
++
++static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
+ {
+ uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
+ uint8_t dret;
+@@ -2754,6 +3205,25 @@ static void dm_handle_hpd_rx_irq(struct amdgpu_dm_connector *aconnector)
+ DRM_DEBUG_DRIVER("Loop exceeded max iterations\n");
+ }
+
++static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq,
++ union hpd_irq_data hpd_irq_data)
++{
++ struct hpd_rx_irq_offload_work *offload_work =
++ kzalloc(sizeof(*offload_work), GFP_KERNEL);
++
++ if (!offload_work) {
++ DRM_ERROR("Failed to allocate hpd_rx_irq_offload_work.\n");
++ return;
++ }
++
++ INIT_WORK(&offload_work->work, dm_handle_hpd_rx_offload_work);
++ offload_work->data = hpd_irq_data;
++ offload_work->offload_wq = offload_wq;
++
++ queue_work(offload_wq->wq, &offload_work->work);
++ DRM_DEBUG_KMS("queue work to handle hpd_rx offload work");
++}
++
+ static void handle_hpd_rx_irq(void *param)
+ {
+ struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param;
+@@ -2765,14 +3235,16 @@ static void handle_hpd_rx_irq(void *param)
+ enum dc_connection_type new_connection_type = dc_connection_none;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ union hpd_irq_data hpd_irq_data;
+- bool lock_flag = 0;
++ bool link_loss = false;
++ bool has_left_work = false;
++ int idx = aconnector->base.index;
++ struct hpd_rx_irq_offload_work_queue *offload_wq = &adev->dm.hpd_rx_offload_wq[idx];
+
+ memset(&hpd_irq_data, 0, sizeof(hpd_irq_data));
+
+ if (adev->dm.disable_hpd_irq)
+ return;
+
+-
+ /*
+ * TODO:Temporary add mutex to protect hpd interrupt not have a gpio
+ * conflict, after implement i2c helper, this mutex should be
+@@ -2780,43 +3252,41 @@ static void handle_hpd_rx_irq(void *param)
+ */
+ mutex_lock(&aconnector->hpd_lock);
+
+- read_hpd_rx_irq_data(dc_link, &hpd_irq_data);
++ result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data,
++ &link_loss, true, &has_left_work);
+
+- if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
+- (dc_link->type == dc_connection_mst_branch)) {
+- if (hpd_irq_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY) {
+- result = true;
+- dm_handle_hpd_rx_irq(aconnector);
+- goto out;
+- } else if (hpd_irq_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
+- result = false;
+- dm_handle_hpd_rx_irq(aconnector);
++ if (!has_left_work)
++ goto out;
++
++ if (hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
++ schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
++ goto out;
++ }
++
++ if (dc_link_dp_allow_hpd_rx_irq(dc_link)) {
++ if (hpd_irq_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY ||
++ hpd_irq_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
++ dm_handle_mst_sideband_msg(aconnector);
+ goto out;
+ }
+- }
+
+- /*
+- * TODO: We need the lock to avoid touching DC state while it's being
+- * modified during automated compliance testing, or when link loss
+- * happens. While this should be split into subhandlers and proper
+- * interfaces to avoid having to conditionally lock like this in the
+- * outer layer, we need this workaround temporarily to allow MST
+- * lightup in some scenarios to avoid timeout.
+- */
+- if (!amdgpu_in_reset(adev) &&
+- (hpd_rx_irq_check_link_loss_status(dc_link, &hpd_irq_data) ||
+- hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST)) {
+- mutex_lock(&adev->dm.dc_lock);
+- lock_flag = 1;
+- }
++ if (link_loss) {
++ bool skip = false;
+
+-#ifdef CONFIG_DRM_AMD_DC_HDCP
+- result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL);
+-#else
+- result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
+-#endif
+- if (!amdgpu_in_reset(adev) && lock_flag)
+- mutex_unlock(&adev->dm.dc_lock);
++ spin_lock(&offload_wq->offload_lock);
++ skip = offload_wq->is_handling_link_loss;
++
++ if (!skip)
++ offload_wq->is_handling_link_loss = true;
++
++ spin_unlock(&offload_wq->offload_lock);
++
++ if (!skip)
++ schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
++
++ goto out;
++ }
++ }
+
+ out:
+ if (result && !is_mst_root_connector) {
+@@ -2901,6 +3371,10 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
+ amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ handle_hpd_rx_irq,
+ (void *) aconnector);
++
++ if (adev->dm.hpd_rx_offload_wq)
++ adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
++ aconnector;
+ }
+ }
+ }
+@@ -3213,7 +3687,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
+
+ /* Use GRPH_PFLIP interrupt */
+ for (i = DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT;
+- i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + adev->mode_info.num_crtc - 1;
++ i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + dc->caps.max_otg_num - 1;
+ i++) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq);
+ if (r) {
+@@ -3508,7 +3982,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap
+ max - min);
+ }
+
+-static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
++static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+ int bl_idx,
+ u32 user_brightness)
+ {
+@@ -3536,7 +4010,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+ DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx);
+ }
+
+- return rc ? 0 : 1;
++ if (rc)
++ dm->actual_brightness[bl_idx] = user_brightness;
+ }
+
+ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
+@@ -3839,8 +4314,17 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
+ } else if (dc_link_detect(link, DETECT_REASON_BOOT)) {
+ amdgpu_dm_update_connector_after_detect(aconnector);
+ register_backlight_device(dm, link);
++
++ if (dm->num_of_edps)
++ update_connector_ext_caps(aconnector);
+ if (amdgpu_dc_feature_mask & DC_PSR_MASK)
+ amdgpu_dm_set_psr_caps(link);
++
++ /* TODO: Fix vblank control helpers to delay PSR entry to allow this when
++ * PSR is also supported.
++ */
++ if (link->psr_settings.psr_feature_enabled)
++ adev_to_drm(adev)->vblank_disable_immediate = false;
+ }
+
+
+@@ -3979,6 +4463,17 @@ DEVICE_ATTR_WO(s3_debug);
+ static int dm_early_init(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++ struct amdgpu_mode_info *mode_info = &adev->mode_info;
++ struct atom_context *ctx = mode_info->atom_context;
++ int index = GetIndexIntoMasterTable(DATA, Object_Header);
++ u16 data_offset;
++
++ /* if there is no object header, skip DM */
++ if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
++ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
++ dev_info(adev->dev, "No object header, skipping DM\n");
++ return -ENOENT;
++ }
+
+ switch (adev->asic_type) {
+ #if defined(CONFIG_DRM_AMD_DC_SI)
+@@ -5033,7 +5528,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
+ plane_info->visible = true;
+ plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE;
+
+- plane_info->layer_index = 0;
++ plane_info->layer_index = plane_state->normalized_zpos;
+
+ ret = fill_plane_color_attributes(plane_state, plane_info->format,
+ &plane_info->color_space);
+@@ -5100,7 +5595,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
+ dc_plane_state->global_alpha = plane_info.global_alpha;
+ dc_plane_state->global_alpha_value = plane_info.global_alpha_value;
+ dc_plane_state->dcc = plane_info.dcc;
+- dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0
++ dc_plane_state->layer_index = plane_info.layer_index;
+ dc_plane_state->flip_int_enabled = true;
+
+ /*
+@@ -5402,8 +5897,6 @@ static void fill_stream_properties_from_drm_display_mode(
+
+ timing_out->aspect_ratio = get_aspect_ratio(mode_in);
+
+- stream->output_color_space = get_output_color_space(timing_out);
+-
+ stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
+ stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
+ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+@@ -5414,6 +5907,8 @@ static void fill_stream_properties_from_drm_display_mode(
+ adjust_colour_depth_from_display_info(timing_out, info);
+ }
+ }
++
++ stream->output_color_space = get_output_color_space(timing_out);
+ }
+
+ static void fill_audio_info(struct audio_info *audio_info,
+@@ -5587,6 +6082,7 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector,
+ struct dsc_dec_dpcd_caps *dsc_caps)
+ {
+ stream->timing.flags.DSC = 0;
++ dsc_caps->is_dsc_supported = false;
+
+ if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) {
+ dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc,
+@@ -7527,6 +8023,9 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder,
+ mode = amdgpu_dm_create_common_mode(encoder,
+ common_modes[i].name, common_modes[i].w,
+ common_modes[i].h);
++ if (!mode)
++ continue;
++
+ drm_mode_probed_add(connector, mode);
+ amdgpu_dm_connector->num_modes++;
+ }
+@@ -8067,27 +8566,55 @@ is_scaling_state_different(const struct dm_connector_state *dm_state,
+ }
+
+ #ifdef CONFIG_DRM_AMD_DC_HDCP
+-static bool is_content_protection_different(struct drm_connector_state *state,
+- const struct drm_connector_state *old_state,
+- const struct drm_connector *connector, struct hdcp_workqueue *hdcp_w)
++static bool is_content_protection_different(struct drm_crtc_state *new_crtc_state,
++ struct drm_crtc_state *old_crtc_state,
++ struct drm_connector_state *new_conn_state,
++ struct drm_connector_state *old_conn_state,
++ const struct drm_connector *connector,
++ struct hdcp_workqueue *hdcp_w)
+ {
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dm_connector_state *dm_con_state = to_dm_connector_state(connector->state);
+
+- /* Handle: Type0/1 change */
+- if (old_state->hdcp_content_type != state->hdcp_content_type &&
+- state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) {
+- state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
++ pr_debug("[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n",
++ connector->index, connector->status, connector->dpms);
++ pr_debug("[HDCP_DM] state protection old: %x new: %x\n",
++ old_conn_state->content_protection, new_conn_state->content_protection);
++
++ if (old_crtc_state)
++ pr_debug("[HDCP_DM] old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
++ old_crtc_state->enable,
++ old_crtc_state->active,
++ old_crtc_state->mode_changed,
++ old_crtc_state->active_changed,
++ old_crtc_state->connectors_changed);
++
++ if (new_crtc_state)
++ pr_debug("[HDCP_DM] NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
++ new_crtc_state->enable,
++ new_crtc_state->active,
++ new_crtc_state->mode_changed,
++ new_crtc_state->active_changed,
++ new_crtc_state->connectors_changed);
++
++ /* hdcp content type change */
++ if (old_conn_state->hdcp_content_type != new_conn_state->hdcp_content_type &&
++ new_conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) {
++ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
++ pr_debug("[HDCP_DM] Type0/1 change %s :true\n", __func__);
+ return true;
+ }
+
+- /* CP is being re enabled, ignore this
+- *
+- * Handles: ENABLED -> DESIRED
+- */
+- if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
+- state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) {
+- state->content_protection = DRM_MODE_CONTENT_PROTECTION_ENABLED;
++ /* CP is being re enabled, ignore this */
++ if (old_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED &&
++ new_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) {
++ if (new_crtc_state && new_crtc_state->mode_changed) {
++ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
++ pr_debug("[HDCP_DM] ENABLED->DESIRED & mode_changed %s :true\n", __func__);
++ return true;
++ };
++ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_ENABLED;
++ pr_debug("[HDCP_DM] ENABLED -> DESIRED %s :false\n", __func__);
+ return false;
+ }
+
+@@ -8095,9 +8622,9 @@ static bool is_content_protection_different(struct drm_connector_state *state,
+ *
+ * Handles: UNDESIRED -> ENABLED
+ */
+- if (old_state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED &&
+- state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED)
+- state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
++ if (old_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_UNDESIRED &&
++ new_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED)
++ new_conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+
+ /* Stream removed and re-enabled
+ *
+@@ -8107,10 +8634,12 @@ static bool is_content_protection_different(struct drm_connector_state *state,
+ *
+ * Handles: DESIRED -> DESIRED (Special case)
+ */
+- if (!(old_state->crtc && old_state->crtc->enabled) &&
+- state->crtc && state->crtc->enabled &&
++ if (!(old_conn_state->crtc && old_conn_state->crtc->enabled) &&
++ new_conn_state->crtc && new_conn_state->crtc->enabled &&
+ connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) {
+ dm_con_state->update_hdcp = false;
++ pr_debug("[HDCP_DM] DESIRED->DESIRED (Stream removed and re-enabled) %s :true\n",
++ __func__);
+ return true;
+ }
+
+@@ -8122,35 +8651,42 @@ static bool is_content_protection_different(struct drm_connector_state *state,
+ *
+ * Handles: DESIRED -> DESIRED (Special case)
+ */
+- if (dm_con_state->update_hdcp && state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
+- connector->dpms == DRM_MODE_DPMS_ON && aconnector->dc_sink != NULL) {
++ if (dm_con_state->update_hdcp &&
++ new_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED &&
++ connector->dpms == DRM_MODE_DPMS_ON && aconnector->dc_sink != NULL) {
+ dm_con_state->update_hdcp = false;
++ pr_debug("[HDCP_DM] DESIRED->DESIRED (Hot-plug, headless s3, dpms) %s :true\n",
++ __func__);
+ return true;
+ }
+
+- /*
+- * Handles: UNDESIRED -> UNDESIRED
+- * DESIRED -> DESIRED
+- * ENABLED -> ENABLED
+- */
+- if (old_state->content_protection == state->content_protection)
++ if (old_conn_state->content_protection == new_conn_state->content_protection) {
++ if (new_conn_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED) {
++ if (new_crtc_state && new_crtc_state->mode_changed) {
++ pr_debug("[HDCP_DM] DESIRED->DESIRED or ENABLE->ENABLE mode_change %s :true\n",
++ __func__);
++ return true;
++ };
++ pr_debug("[HDCP_DM] DESIRED->DESIRED & ENABLE->ENABLE %s :false\n",
++ __func__);
++ return false;
++ };
++
++ pr_debug("[HDCP_DM] UNDESIRED->UNDESIRED %s :false\n", __func__);
+ return false;
++ }
+
+- /*
+- * Handles: UNDESIRED -> DESIRED
+- * DESIRED -> UNDESIRED
+- * ENABLED -> UNDESIRED
+- */
+- if (state->content_protection != DRM_MODE_CONTENT_PROTECTION_ENABLED)
++ if (new_conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_ENABLED) {
++ pr_debug("[HDCP_DM] UNDESIRED->DESIRED or DESIRED->UNDESIRED or ENABLED->UNDESIRED %s :true\n",
++ __func__);
+ return true;
++ }
+
+- /*
+- * Handles: DESIRED -> ENABLED
+- */
++ pr_debug("[HDCP_DM] DESIRED->ENABLED %s :false\n", __func__);
+ return false;
+ }
+-
+ #endif
++
+ static void remove_stream(struct amdgpu_device *adev,
+ struct amdgpu_crtc *acrtc,
+ struct dc_stream_state *stream)
+@@ -8447,15 +8983,15 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
+ * We also need vupdate irq for the actual core vblank handling
+ * at end of vblank.
+ */
+- dm_set_vupdate_irq(new_state->base.crtc, true);
+- drm_crtc_vblank_get(new_state->base.crtc);
++ WARN_ON(dm_set_vupdate_irq(new_state->base.crtc, true) != 0);
++ WARN_ON(drm_crtc_vblank_get(new_state->base.crtc) != 0);
+ DRM_DEBUG_DRIVER("%s: crtc=%u VRR off->on: Get vblank ref\n",
+ __func__, new_state->base.crtc->base.id);
+ } else if (old_vrr_active && !new_vrr_active) {
+ /* Transition VRR active -> inactive:
+ * Allow vblank irq disable again for fixed refresh rate.
+ */
+- dm_set_vupdate_irq(new_state->base.crtc, false);
++ WARN_ON(dm_set_vupdate_irq(new_state->base.crtc, false) != 0);
+ drm_crtc_vblank_put(new_state->base.crtc);
+ DRM_DEBUG_DRIVER("%s: crtc=%u VRR on->off: Drop vblank ref\n",
+ __func__, new_state->base.crtc->base.id);
+@@ -8477,6 +9013,13 @@ static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state)
+ handle_cursor_update(plane, old_plane_state);
+ }
+
++static inline uint32_t get_mem_type(struct drm_framebuffer *fb)
++{
++ struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]);
++
++ return abo->tbo.resource ? abo->tbo.resource->mem_type : 0;
++}
++
+ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
+ struct dc_state *dc_state,
+ struct drm_device *dev,
+@@ -8546,6 +9089,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
+ continue;
+
+ dc_plane = dm_new_plane_state->dc_state;
++ if (!dc_plane)
++ continue;
+
+ bundle->surface_updates[planes_count].surface = dc_plane;
+ if (new_pcrtc_state->color_mgmt_changed) {
+@@ -8597,11 +9142,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
+
+ /*
+ * Only allow immediate flips for fast updates that don't
+- * change FB pitch, DCC state, rotation or mirroing.
++ * change memory domain, FB pitch, DCC state, rotation or
++ * mirroring.
+ */
+ bundle->flip_addrs[planes_count].flip_immediate =
+ crtc->state->async_flip &&
+- acrtc_state->update_type == UPDATE_TYPE_FAST;
++ acrtc_state->update_type == UPDATE_TYPE_FAST &&
++ get_mem_type(old_plane_state->fb) == get_mem_type(fb);
+
+ timestamp_ns = ktime_get_ns();
+ bundle->flip_addrs[planes_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000);
+@@ -8734,6 +9281,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
+ if (acrtc_state->abm_level != dm_old_crtc_state->abm_level)
+ bundle->stream_update.abm_level = &acrtc_state->abm_level;
+
++ mutex_lock(&dm->dc_lock);
++ if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
++ acrtc_state->stream->link->psr_settings.psr_allow_active)
++ amdgpu_dm_psr_disable(acrtc_state->stream);
++ mutex_unlock(&dm->dc_lock);
++
+ /*
+ * If FreeSync state on the stream has changed then we need to
+ * re-adjust the min/max bounds now that DC doesn't handle this
+@@ -8747,16 +9300,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
+ spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
+ }
+ mutex_lock(&dm->dc_lock);
+- if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
+- acrtc_state->stream->link->psr_settings.psr_allow_active)
+- amdgpu_dm_psr_disable(acrtc_state->stream);
+
+- dc_commit_updates_for_stream(dm->dc,
+- bundle->surface_updates,
+- planes_count,
+- acrtc_state->stream,
+- &bundle->stream_update,
+- dc_state);
++ update_planes_and_stream_adapter(dm->dc,
++ acrtc_state->update_type,
++ planes_count,
++ acrtc_state->stream,
++ &bundle->stream_update,
++ bundle->surface_updates);
+
+ /**
+ * Enable or disable the interrupts on the backend.
+@@ -9084,10 +9634,67 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+ struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
++ if (!adev->dm.hdcp_workqueue)
++ continue;
++
++ pr_debug("[HDCP_DM] -------------- i : %x ----------\n", i);
++
++ if (!connector)
++ continue;
++
++ pr_debug("[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n",
++ connector->index, connector->status, connector->dpms);
++ pr_debug("[HDCP_DM] state protection old: %x new: %x\n",
++ old_con_state->content_protection, new_con_state->content_protection);
++
++ if (aconnector->dc_sink) {
++ if (aconnector->dc_sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
++ aconnector->dc_sink->sink_signal != SIGNAL_TYPE_NONE) {
++ pr_debug("[HDCP_DM] pipe_ctx dispname=%s\n",
++ aconnector->dc_sink->edid_caps.display_name);
++ }
++ }
++
+ new_crtc_state = NULL;
++ old_crtc_state = NULL;
+
+- if (acrtc)
++ if (acrtc) {
+ new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
++ old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
++ }
++
++ if (old_crtc_state)
++ pr_debug("old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
++ old_crtc_state->enable,
++ old_crtc_state->active,
++ old_crtc_state->mode_changed,
++ old_crtc_state->active_changed,
++ old_crtc_state->connectors_changed);
++
++ if (new_crtc_state)
++ pr_debug("NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
++ new_crtc_state->enable,
++ new_crtc_state->active,
++ new_crtc_state->mode_changed,
++ new_crtc_state->active_changed,
++ new_crtc_state->connectors_changed);
++ }
++
++ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
++ struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
++ struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
++ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
++
++ if (!adev->dm.hdcp_workqueue)
++ continue;
++
++ new_crtc_state = NULL;
++ old_crtc_state = NULL;
++
++ if (acrtc) {
++ new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
++ old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
++ }
+
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+@@ -9099,11 +9706,44 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+ continue;
+ }
+
+- if (is_content_protection_different(new_con_state, old_con_state, connector, adev->dm.hdcp_workqueue))
++ if (is_content_protection_different(new_crtc_state, old_crtc_state, new_con_state,
++ old_con_state, connector, adev->dm.hdcp_workqueue)) {
++ /* when display is unplugged from mst hub, connctor will
++ * be destroyed within dm_dp_mst_connector_destroy. connector
++ * hdcp perperties, like type, undesired, desired, enabled,
++ * will be lost. So, save hdcp properties into hdcp_work within
++ * amdgpu_dm_atomic_commit_tail. if the same display is
++ * plugged back with same display index, its hdcp properties
++ * will be retrieved from hdcp_work within dm_dp_mst_get_modes
++ */
++
++ bool enable_encryption = false;
++
++ if (new_con_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED)
++ enable_encryption = true;
++
++ if (aconnector->dc_link && aconnector->dc_sink &&
++ aconnector->dc_link->type == dc_connection_mst_branch) {
++ struct hdcp_workqueue *hdcp_work = adev->dm.hdcp_workqueue;
++ struct hdcp_workqueue *hdcp_w =
++ &hdcp_work[aconnector->dc_link->link_index];
++
++ hdcp_w->hdcp_content_type[connector->index] =
++ new_con_state->hdcp_content_type;
++ hdcp_w->content_protection[connector->index] =
++ new_con_state->content_protection;
++ }
++
++ if (new_crtc_state && new_crtc_state->mode_changed &&
++ new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED)
++ enable_encryption = true;
++
++ DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption);
++
+ hdcp_update_display(
+ adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector,
+- new_con_state->hdcp_content_type,
+- new_con_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED);
++ new_con_state->hdcp_content_type, enable_encryption);
++ }
+ }
+ #endif
+
+@@ -9182,32 +9822,14 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+
+
+ mutex_lock(&dm->dc_lock);
+- dc_commit_updates_for_stream(dm->dc,
+- dummy_updates,
+- status->plane_count,
+- dm_new_crtc_state->stream,
+- &stream_update,
+- dc_state);
++ dc_update_planes_and_stream(dm->dc,
++ dummy_updates,
++ status->plane_count,
++ dm_new_crtc_state->stream,
++ &stream_update);
+ mutex_unlock(&dm->dc_lock);
+ }
+
+- /* Count number of newly disabled CRTCs for dropping PM refs later. */
+- for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+- new_crtc_state, i) {
+- if (old_crtc_state->active && !new_crtc_state->active)
+- crtc_disable_count++;
+-
+- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+- dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+-
+- /* For freesync config update on crtc state and params for irq */
+- update_stream_irq_parameters(dm, dm_new_crtc_state);
+-
+- /* Handle vrr on->off / off->on transitions */
+- amdgpu_dm_handle_vrr_transition(dm_old_crtc_state,
+- dm_new_crtc_state);
+- }
+-
+ /**
+ * Enable interrupts for CRTCs that are newly enabled or went through
+ * a modeset. It was intentionally deferred until after the front end
+@@ -9217,16 +9839,29 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+ #ifdef CONFIG_DEBUG_FS
+- bool configure_crc = false;
+ enum amdgpu_dm_pipe_crc_source cur_crc_src;
+ #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+- struct crc_rd_work *crc_rd_wrk = dm->crc_rd_wrk;
++ struct crc_rd_work *crc_rd_wrk;
++#endif
++#endif
++ /* Count number of newly disabled CRTCs for dropping PM refs later. */
++ if (old_crtc_state->active && !new_crtc_state->active)
++ crtc_disable_count++;
++
++ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
++ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
++
++ /* For freesync config update on crtc state and params for irq */
++ update_stream_irq_parameters(dm, dm_new_crtc_state);
++
++#ifdef CONFIG_DEBUG_FS
++#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
++ crc_rd_wrk = dm->crc_rd_wrk;
+ #endif
+ spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+ cur_crc_src = acrtc->dm_irq_params.crc_src;
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+ #endif
+- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ if (new_crtc_state->active &&
+ (!old_crtc_state->active ||
+@@ -9234,16 +9869,19 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+ dc_stream_retain(dm_new_crtc_state->stream);
+ acrtc->dm_irq_params.stream = dm_new_crtc_state->stream;
+ manage_dm_interrupts(adev, acrtc, true);
++ }
++ /* Handle vrr on->off / off->on transitions */
++ amdgpu_dm_handle_vrr_transition(dm_old_crtc_state, dm_new_crtc_state);
+
+ #ifdef CONFIG_DEBUG_FS
++ if (new_crtc_state->active &&
++ (!old_crtc_state->active ||
++ drm_atomic_crtc_needs_modeset(new_crtc_state))) {
+ /**
+ * Frontend may have changed so reapply the CRC capture
+ * settings for the stream.
+ */
+- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+-
+ if (amdgpu_dm_is_valid_crc_source(cur_crc_src)) {
+- configure_crc = true;
+ #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ if (amdgpu_dm_crc_window_is_activated(crtc)) {
+ spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+@@ -9255,14 +9893,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+ }
+ #endif
+- }
+-
+- if (configure_crc)
+ if (amdgpu_dm_crtc_configure_crc_source(
+ crtc, dm_new_crtc_state, cur_crc_src))
+ DRM_DEBUG_DRIVER("Failed to configure crc source");
+-#endif
++ }
+ }
++#endif
+ }
+
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
+@@ -9286,7 +9922,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+ /* restore the backlight level */
+ for (i = 0; i < dm->num_of_edps; i++) {
+ if (dm->backlight_dev[i] &&
+- (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i]))
++ (dm->actual_brightness[i] != dm->brightness[i]))
+ amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
+ }
+ #endif
+@@ -9686,7 +10322,16 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
+ if (!dm_old_crtc_state->stream)
+ goto skip_modeset;
+
++ /* Unset freesync video if it was active before */
++ if (dm_old_crtc_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) {
++ dm_new_crtc_state->freesync_config.state = VRR_STATE_INACTIVE;
++ dm_new_crtc_state->freesync_config.fixed_refresh_in_uhz = 0;
++ }
++
++ /* Now check if we should set freesync video mode */
+ if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream &&
++ dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
++ dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream) &&
+ is_timing_unchanged_for_freesync(new_crtc_state,
+ old_crtc_state)) {
+ new_crtc_state->mode_changed = false;
+@@ -10070,8 +10715,9 @@ static int dm_update_plane_state(struct dc *dc,
+ return -EINVAL;
+ }
+
++ if (dm_old_plane_state->dc_state)
++ dc_plane_state_release(dm_old_plane_state->dc_state);
+
+- dc_plane_state_release(dm_old_plane_state->dc_state);
+ dm_new_plane_state->dc_state = NULL;
+
+ *lock_and_validation_needed = true;
+@@ -10196,10 +10842,13 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state,
+ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc)
+ {
+ struct drm_connector *connector;
+- struct drm_connector_state *conn_state;
++ struct drm_connector_state *conn_state, *old_conn_state;
+ struct amdgpu_dm_connector *aconnector = NULL;
+ int i;
+- for_each_new_connector_in_state(state, connector, conn_state, i) {
++ for_each_oldnew_connector_in_state(state, connector, old_conn_state, conn_state, i) {
++ if (!conn_state->crtc)
++ conn_state = old_conn_state;
++
+ if (conn_state->crtc != crtc)
+ continue;
+
+@@ -10332,8 +10981,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
+ goto fail;
+ }
+
+- if (dm_old_con_state->abm_level !=
+- dm_new_con_state->abm_level)
++ if (dm_old_con_state->abm_level != dm_new_con_state->abm_level ||
++ dm_old_con_state->scaling != dm_new_con_state->scaling)
+ new_crtc_state->connectors_changed = true;
+ }
+
+@@ -10412,6 +11061,18 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
+ }
+ }
+
++ /*
++ * DC consults the zpos (layer_index in DC terminology) to determine the
++ * hw plane on which to enable the hw cursor (see
++ * `dcn10_can_pipe_disable_cursor`). By now, all modified planes are in
++ * atomic state, so call drm helper to normalize zpos.
++ */
++ ret = drm_atomic_normalize_zpos(dev, state);
++ if (ret) {
++ drm_dbg(dev, "drm_atomic_normalize_zpos() failed\n");
++ goto fail;
++ }
++
+ /* Remove exiting planes if they are modified */
+ for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+ ret = dm_update_plane_state(dc, state, plane,
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+index d1d353a7c77d3..f9c3e5a417138 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+@@ -47,6 +47,8 @@
+ #define AMDGPU_DM_MAX_CRTC 6
+
+ #define AMDGPU_DM_MAX_NUM_EDP 2
++
++#define AMDGPU_DMUB_NOTIFICATION_MAX 5
+ /*
+ #include "include/amdgpu_dal_power_if.h"
+ #include "amdgpu_dm_irq.h"
+@@ -86,6 +88,21 @@ struct dm_compressor_info {
+ uint64_t gpu_addr;
+ };
+
++typedef void (*dmub_notify_interrupt_callback_t)(struct amdgpu_device *adev, struct dmub_notification *notify);
++
++/**
++ * struct dmub_hpd_work - Handle time consuming work in low priority outbox IRQ
++ *
++ * @handle_hpd_work: Work to be executed in a separate thread to handle hpd_low_irq
++ * @dmub_notify: notification for callback function
++ * @adev: amdgpu_device pointer
++ */
++struct dmub_hpd_work {
++ struct work_struct handle_hpd_work;
++ struct dmub_notification *dmub_notify;
++ struct amdgpu_device *adev;
++};
++
+ /**
+ * struct vblank_control_work - Work data for vblank control
+ * @work: Kernel work data for the work event
+@@ -154,6 +171,48 @@ struct dal_allocation {
+ u64 gpu_addr;
+ };
+
++/**
++ * struct hpd_rx_irq_offload_work_queue - Work queue to handle hpd_rx_irq
++ * offload work
++ */
++struct hpd_rx_irq_offload_work_queue {
++ /**
++ * @wq: workqueue structure to queue offload work.
++ */
++ struct workqueue_struct *wq;
++ /**
++ * @offload_lock: To protect fields of offload work queue.
++ */
++ spinlock_t offload_lock;
++ /**
++ * @is_handling_link_loss: Used to prevent inserting link loss event when
++ * we're handling link loss
++ */
++ bool is_handling_link_loss;
++ /**
++ * @aconnector: The aconnector that this work queue is attached to
++ */
++ struct amdgpu_dm_connector *aconnector;
++};
++
++/**
++ * struct hpd_rx_irq_offload_work - hpd_rx_irq offload work structure
++ */
++struct hpd_rx_irq_offload_work {
++ /**
++ * @work: offload work
++ */
++ struct work_struct work;
++ /**
++ * @data: reference irq data which is used while handling offload work
++ */
++ union hpd_irq_data data;
++ /**
++ * @offload_wq: offload work queue that this work is queued to
++ */
++ struct hpd_rx_irq_offload_work_queue *offload_wq;
++};
++
+ /**
+ * struct amdgpu_display_manager - Central amdgpu display manager device
+ *
+@@ -190,8 +249,30 @@ struct amdgpu_display_manager {
+ */
+ struct dmub_srv *dmub_srv;
+
++ /**
++ * @dmub_notify:
++ *
++ * Notification from DMUB.
++ */
++
+ struct dmub_notification *dmub_notify;
+
++ /**
++ * @dmub_callback:
++ *
++ * Callback functions to handle notification from DMUB.
++ */
++
++ dmub_notify_interrupt_callback_t dmub_callback[AMDGPU_DMUB_NOTIFICATION_MAX];
++
++ /**
++ * @dmub_thread_offload:
++ *
++ * Flag to indicate if callback is offload.
++ */
++
++ bool dmub_thread_offload[AMDGPU_DMUB_NOTIFICATION_MAX];
++
+ /**
+ * @dmub_fb_info:
+ *
+@@ -422,7 +503,12 @@ struct amdgpu_display_manager {
+ */
+ struct crc_rd_work *crc_rd_wrk;
+ #endif
+-
++ /**
++ * @hpd_rx_offload_wq:
++ *
++ * Work queue to offload works of hpd_rx_irq
++ */
++ struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq;
+ /**
+ * @mst_encoders:
+ *
+@@ -439,6 +525,7 @@ struct amdgpu_display_manager {
+ */
+ struct list_head da_list;
+ struct completion dmub_aux_transfer_done;
++ struct workqueue_struct *delayed_hpd_wq;
+
+ /**
+ * @brightness:
+@@ -446,6 +533,20 @@ struct amdgpu_display_manager {
+ * cached backlight values.
+ */
+ u32 brightness[AMDGPU_DM_MAX_NUM_EDP];
++ /**
++ * @actual_brightness:
++ *
++ * last successfully applied backlight values.
++ */
++ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
++
++ /**
++ * @aux_hpd_discon_quirk:
++ *
++ * quirk for hpd discon while aux is on-going.
++ * occurred on certain intel platform
++ */
++ bool aux_hpd_discon_quirk;
+ };
+
+ enum dsc_clock_force_state {
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+index cce062adc4391..8a441a22c46ec 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+@@ -314,6 +314,14 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
+ ret = -EINVAL;
+ goto cleanup;
+ }
++
++ if ((aconn->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) &&
++ (aconn->base.connector_type != DRM_MODE_CONNECTOR_eDP)) {
++ DRM_DEBUG_DRIVER("No DP connector available for CRC source\n");
++ ret = -EINVAL;
++ goto cleanup;
++ }
++
+ }
+
+ #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+index 8080bba5b7a76..6d694cea24201 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+@@ -229,8 +229,10 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -247,6 +249,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
+ {
+ struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
+ struct dc_link *link = connector->dc_link;
++ struct dc *dc = (struct dc *)link->dc;
+ struct dc_link_settings prefer_link_settings;
+ char *wr_buf = NULL;
+ const uint32_t wr_buf_size = 40;
+@@ -313,7 +316,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
+ prefer_link_settings.lane_count = param[0];
+ prefer_link_settings.link_rate = param[1];
+
+- dp_retrain_link_dp_test(link, &prefer_link_settings, false);
++ dc_link_set_preferred_training_settings(dc, &prefer_link_settings, NULL, link, true);
+
+ kfree(wr_buf);
+ return size;
+@@ -387,8 +390,10 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user((*(rd_buf + result)), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -1315,8 +1320,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -1332,8 +1339,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -1502,8 +1511,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -1519,8 +1530,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -1687,8 +1700,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -1704,8 +1719,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -1868,8 +1885,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -1885,8 +1904,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -2044,8 +2065,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -2061,8 +2084,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -2101,8 +2126,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -2118,8 +2145,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -2173,8 +2202,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -2190,8 +2221,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -2245,8 +2278,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
+ break;
+ }
+
+- if (!pipe_ctx)
++ if (!pipe_ctx) {
++ kfree(rd_buf);
+ return -ENXIO;
++ }
+
+ dsc = pipe_ctx->stream_res.dsc;
+ if (dsc)
+@@ -2262,8 +2297,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
+ break;
+
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+
+ buf += 1;
+ size -= 1;
+@@ -2907,10 +2944,13 @@ static int crc_win_update_set(void *data, u64 val)
+ struct amdgpu_device *adev = drm_to_adev(new_crtc->dev);
+ struct crc_rd_work *crc_rd_wrk = adev->dm.crc_rd_wrk;
+
++ if (!crc_rd_wrk)
++ return 0;
++
+ if (val) {
+ spin_lock_irq(&adev_to_drm(adev)->event_lock);
+ spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock);
+- if (crc_rd_wrk && crc_rd_wrk->crtc) {
++ if (crc_rd_wrk->crtc) {
+ old_crtc = crc_rd_wrk->crtc;
+ old_acrtc = to_amdgpu_crtc(old_crtc);
+ }
+@@ -2967,7 +3007,7 @@ void crtc_debugfs_init(struct drm_crtc *crtc)
+ &crc_win_y_end_fops);
+ debugfs_create_file_unsafe("crc_win_update", 0644, dir, crtc,
+ &crc_win_update_fops);
+-
++ dput(dir);
+ }
+ #endif
+ /*
+@@ -3250,8 +3290,10 @@ static ssize_t dcc_en_bits_read(
+ dc->hwss.get_dcc_en_bits(dc, dcc_en_bits);
+
+ rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL);
+- if (!rd_buf)
++ if (!rd_buf) {
++ kfree(dcc_en_bits);
+ return -ENOMEM;
++ }
+
+ for (i = 0; i < num_pipes; i++)
+ offset += snprintf(rd_buf + offset, rd_buf_size - offset,
+@@ -3264,8 +3306,10 @@ static ssize_t dcc_en_bits_read(
+ if (*pos >= rd_buf_size)
+ break;
+ r = put_user(*(rd_buf + result), buf);
+- if (r)
++ if (r) {
++ kfree(rd_buf);
+ return r; /* r = -EFAULT */
++ }
+ buf += 1;
+ size -= 1;
+ *pos += 1;
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
+index 09294ff122fea..bbbf7d0eff82f 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
+@@ -52,6 +52,20 @@ struct hdcp_workqueue {
+ struct mod_hdcp_link link;
+
+ enum mod_hdcp_encryption_status encryption_status;
++
++ /* when display is unplugged from mst hub, connctor will be
++ * destroyed within dm_dp_mst_connector_destroy. connector
++ * hdcp perperties, like type, undesired, desired, enabled,
++ * will be lost. So, save hdcp properties into hdcp_work within
++ * amdgpu_dm_atomic_commit_tail. if the same display is
++ * plugged back with same display index, its hdcp properties
++ * will be retrieved from hdcp_work within dm_dp_mst_get_modes
++ */
++ /* un-desired, desired, enabled */
++ unsigned int content_protection[AMDGPU_DM_MAX_DISPLAY_INDEX];
++ /* hdcp1.x, hdcp2.x */
++ unsigned int hdcp_content_type[AMDGPU_DM_MAX_DISPLAY_INDEX];
++
+ uint8_t max_link;
+
+ uint8_t *srm;
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+index 7af0d58c231b6..0b58a93864490 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+@@ -32,10 +32,16 @@
+ #include "amdgpu_dm.h"
+ #include "amdgpu_dm_mst_types.h"
+
++#ifdef CONFIG_DRM_AMD_DC_HDCP
++#include "amdgpu_dm_hdcp.h"
++#endif
++
+ #include "dc.h"
+ #include "dm_helpers.h"
+
+ #include "dc_link_ddc.h"
++#include "ddc_service_types.h"
++#include "dpcd_defs.h"
+
+ #include "i2caux_interface.h"
+ #include "dmub_cmd.h"
+@@ -53,6 +59,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
+ ssize_t result = 0;
+ struct aux_payload payload;
+ enum aux_return_code_type operation_result;
++ struct amdgpu_device *adev;
++ struct ddc_service *ddc;
+
+ if (WARN_ON(msg->size > 16))
+ return -E2BIG;
+@@ -69,6 +77,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
+ result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
+ &operation_result);
+
++ /*
++ * w/a on certain intel platform where hpd is unexpected to pull low during
++ * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON
++ * aux transaction is succuess in such case, therefore bypass the error
++ */
++ ddc = TO_DM_AUX(aux)->ddc_service;
++ adev = ddc->ctx->driver_context;
++ if (adev->dm.aux_hpd_discon_quirk) {
++ if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE &&
++ operation_result == AUX_RET_ERROR_HPD_DISCON) {
++ result = 0;
++ operation_result = AUX_RET_SUCCESS;
++ }
++ }
++
+ if (payload.write && result >= 0)
+ result = msg->size;
+
+@@ -155,6 +178,31 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
+ };
+
+ #if defined(CONFIG_DRM_AMD_DC_DCN)
++static bool needs_dsc_aux_workaround(struct dc_link *link)
++{
++ if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
++ (link->dpcd_caps.dpcd_rev.raw == DPCD_REV_14 || link->dpcd_caps.dpcd_rev.raw == DPCD_REV_12) &&
++ link->dpcd_caps.sink_count.bits.SINK_COUNT >= 2)
++ return true;
++
++ return false;
++}
++
++bool is_synaptics_cascaded_panamera(struct dc_link *link, struct drm_dp_mst_port *port)
++{
++ u8 branch_vendor_data[4] = { 0 }; // Vendor data 0x50C ~ 0x50F
++
++ if (drm_dp_dpcd_read(port->mgr->aux, DP_BRANCH_VENDOR_SPECIFIC_START, &branch_vendor_data, 4) == 4) {
++ if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
++ IS_SYNAPTICS_CASCADED_PANAMERA(link->dpcd_caps.branch_dev_name, branch_vendor_data)) {
++ DRM_INFO("Synaptics Cascaded MST hub\n");
++ return true;
++ }
++ }
++
++ return false;
++}
++
+ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector)
+ {
+ struct dc_sink *dc_sink = aconnector->dc_sink;
+@@ -164,7 +212,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
+ u8 *dsc_branch_dec_caps = NULL;
+
+ aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port);
+-#if defined(CONFIG_HP_HOOK_WORKAROUND)
++
+ /*
+ * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs
+ * because it only check the dsc/fec caps of the "port variable" and not the dock
+@@ -174,10 +222,14 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
+ * Workaround: explicitly check the use case above and use the mst dock's aux as dsc_aux
+ *
+ */
+-
+- if (!aconnector->dsc_aux && !port->parent->port_parent)
++ if (!aconnector->dsc_aux && !port->parent->port_parent &&
++ needs_dsc_aux_workaround(aconnector->dc_link))
+ aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux;
+-#endif
++
++ /* synaptics cascaded MST hub case */
++ if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port))
++ aconnector->dsc_aux = port->mgr->aux;
++
+ if (!aconnector->dsc_aux)
+ return false;
+
+@@ -267,6 +319,32 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
+ /* dc_link_add_remote_sink returns a new reference */
+ aconnector->dc_sink = dc_sink;
+
++ /* when display is unplugged from mst hub, connctor will be
++ * destroyed within dm_dp_mst_connector_destroy. connector
++ * hdcp perperties, like type, undesired, desired, enabled,
++ * will be lost. So, save hdcp properties into hdcp_work within
++ * amdgpu_dm_atomic_commit_tail. if the same display is
++ * plugged back with same display index, its hdcp properties
++ * will be retrieved from hdcp_work within dm_dp_mst_get_modes
++ */
++#ifdef CONFIG_DRM_AMD_DC_HDCP
++ if (aconnector->dc_sink && connector->state) {
++ struct drm_device *dev = connector->dev;
++ struct amdgpu_device *adev = drm_to_adev(dev);
++
++ if (adev->dm.hdcp_workqueue) {
++ struct hdcp_workqueue *hdcp_work = adev->dm.hdcp_workqueue;
++ struct hdcp_workqueue *hdcp_w =
++ &hdcp_work[aconnector->dc_link->link_index];
++
++ connector->state->hdcp_content_type =
++ hdcp_w->hdcp_content_type[connector->index];
++ connector->state->content_protection =
++ hdcp_w->content_protection[connector->index];
++ }
++ }
++#endif
++
+ if (aconnector->dc_sink) {
+ amdgpu_dm_update_freesync_caps(
+ connector, aconnector->edid);
+@@ -356,7 +434,6 @@ static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs
+ static void amdgpu_dm_encoder_destroy(struct drm_encoder *encoder)
+ {
+ drm_encoder_cleanup(encoder);
+- kfree(encoder);
+ }
+
+ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = {
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+index 900d3f7a84989..f7523fd23f543 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+@@ -26,6 +26,18 @@
+ #ifndef __DAL_AMDGPU_DM_MST_TYPES_H__
+ #define __DAL_AMDGPU_DM_MST_TYPES_H__
+
++#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C
++
++/**
++ * Panamera MST Hub detection
++ * Offset DPCD 050Eh == 0x5A indicates cascaded MST hub case
++ * Check from beginning of branch device vendor specific field (050Ch)
++ */
++#define IS_SYNAPTICS_PANAMERA(branchDevName) (((int)branchDevName[4] & 0xF0) == 0x50 ? 1 : 0)
++#define BRANCH_HW_REVISION_PANAMERA_A2 0x10
++#define SYNAPTICS_CASCADED_HUB_ID 0x5A
++#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0)
++
+ struct amdgpu_display_manager;
+ struct amdgpu_dm_connector;
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+index 70a554f1e725a..278ff281a1bd5 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+@@ -36,10 +36,14 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link)
+ {
+ uint8_t dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE];
+
+- if (!(link->connector_signal & SIGNAL_TYPE_EDP))
++ if (!(link->connector_signal & SIGNAL_TYPE_EDP)) {
++ link->psr_settings.psr_feature_enabled = false;
+ return;
+- if (link->type == dc_connection_none)
++ }
++ if (link->type == dc_connection_none) {
++ link->psr_settings.psr_feature_enabled = false;
+ return;
++ }
+ if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT,
+ dpcd_data, sizeof(dpcd_data))) {
+ link->dpcd_caps.psr_caps.psr_version = dpcd_data[0];
+@@ -74,10 +78,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
+
+ link = stream->link;
+
+- psr_config.psr_version = link->dpcd_caps.psr_caps.psr_version;
+-
+- if (psr_config.psr_version > 0) {
+- psr_config.psr_exit_link_training_required = 0x1;
++ if (link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) {
++ psr_config.psr_version = link->psr_settings.psr_version;
+ psr_config.psr_frame_capture_indication_req = 0;
+ psr_config.psr_rfb_setup_time = 0x37;
+ psr_config.psr_sdp_transmit_line_num_deadline = 0x20;
+diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+index 6dbde74c1e069..228f098e5d88f 100644
+--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+@@ -352,6 +352,7 @@ static enum bp_result get_gpio_i2c_info(
+ uint32_t count = 0;
+ unsigned int table_index = 0;
+ bool find_valid = false;
++ struct atom_gpio_pin_assignment *pin;
+
+ if (!info)
+ return BP_RESULT_BADINPUT;
+@@ -379,20 +380,17 @@ static enum bp_result get_gpio_i2c_info(
+ - sizeof(struct atom_common_table_header))
+ / sizeof(struct atom_gpio_pin_assignment);
+
++ pin = (struct atom_gpio_pin_assignment *) header->gpio_pin;
++
+ for (table_index = 0; table_index < count; table_index++) {
+- if (((record->i2c_id & I2C_HW_CAP) == (
+- header->gpio_pin[table_index].gpio_id &
+- I2C_HW_CAP)) &&
+- ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) ==
+- (header->gpio_pin[table_index].gpio_id &
+- I2C_HW_ENGINE_ID_MASK)) &&
+- ((record->i2c_id & I2C_HW_LANE_MUX) ==
+- (header->gpio_pin[table_index].gpio_id &
+- I2C_HW_LANE_MUX))) {
++ if (((record->i2c_id & I2C_HW_CAP) == (pin->gpio_id & I2C_HW_CAP)) &&
++ ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) == (pin->gpio_id & I2C_HW_ENGINE_ID_MASK)) &&
++ ((record->i2c_id & I2C_HW_LANE_MUX) == (pin->gpio_id & I2C_HW_LANE_MUX))) {
+ /* still valid */
+ find_valid = true;
+ break;
+ }
++ pin = (struct atom_gpio_pin_assignment *)((uint8_t *)pin + sizeof(struct atom_gpio_pin_assignment));
+ }
+
+ /* If we don't find the entry that we are looking for then
+@@ -408,11 +406,8 @@ static enum bp_result get_gpio_i2c_info(
+ info->i2c_slave_address = record->i2c_slave_addr;
+
+ /* TODO: check how to get register offset for en, Y, etc. */
+- info->gpio_info.clk_a_register_index =
+- le16_to_cpu(
+- header->gpio_pin[table_index].data_a_reg_index);
+- info->gpio_info.clk_a_shift =
+- header->gpio_pin[table_index].gpio_bitshift;
++ info->gpio_info.clk_a_register_index = le16_to_cpu(pin->data_a_reg_index);
++ info->gpio_info.clk_a_shift = pin->gpio_bitshift;
+
+ return BP_RESULT_OK;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c
+index 6ca288fb5fb9e..2d46bc527b218 100644
+--- a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c
++++ b/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c
+@@ -26,12 +26,12 @@
+ #include "bw_fixed.h"
+
+
+-#define MIN_I64 \
+- (int64_t)(-(1LL << 63))
+-
+ #define MAX_I64 \
+ (int64_t)((1ULL << 63) - 1)
+
++#define MIN_I64 \
++ (-MAX_I64 - 1)
++
+ #define FRACTIONAL_PART_MASK \
+ ((1ULL << BW_FIXED_BITS_PER_FRACTIONAL_PART) - 1)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+index bb31541f80723..6420527fe476c 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+@@ -306,8 +306,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
+ case FAMILY_NV:
+ if (ASICREV_IS_SIENNA_CICHLID_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
+ dcn3_clk_mgr_destroy(clk_mgr);
+- }
+- if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
++ } else if (ASICREV_IS_DIMGREY_CAVEFISH_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
+ dcn3_clk_mgr_destroy(clk_mgr);
+ }
+ if (ASICREV_IS_BEIGE_GOBY_P(clk_mgr_base->ctx->asic_id.hw_internal_rev)) {
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+index 1861a147a7fa1..5c5cbeb59c4d9 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+@@ -437,8 +437,10 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
+ clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1;
+
+ /* Refresh bounding box */
++ DC_FP_START();
+ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
+ clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
++ DC_FP_END();
+ }
+
+ static bool dcn3_is_smu_present(struct clk_mgr *clk_mgr_base)
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
+index 8ecc708bcd9ec..766759420eebb 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
+@@ -302,6 +302,9 @@ void dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, b
+ /* bits 8:7 for cache timer scale, bits 6:1 for cache timer delay, bit 0 = 1 for enable, = 0 for disable */
+ uint32_t param = (cache_timer_scale << 7) | (cache_timer_delay << 1) | (enable ? 1 : 0);
+
++ smu_print("SMU Set display refresh from mall: enable = %d, cache_timer_delay = %d, cache_timer_scale = %d\n",
++ enable, cache_timer_delay, cache_timer_scale);
++
+ dcn30_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetDisplayRefreshFromMall, param, NULL);
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+index 7046da14bb2a5..329ce4e84b83c 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+@@ -582,32 +582,32 @@ static struct wm_table lpddr5_wm_table = {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+- .sr_exit_time_us = 5.32,
+- .sr_enter_plus_exit_time_us = 6.38,
++ .sr_exit_time_us = 13.5,
++ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+- .sr_exit_time_us = 9.82,
+- .sr_enter_plus_exit_time_us = 11.196,
++ .sr_exit_time_us = 13.5,
++ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+- .sr_exit_time_us = 9.89,
+- .sr_enter_plus_exit_time_us = 11.24,
++ .sr_exit_time_us = 13.5,
++ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+- .sr_exit_time_us = 9.748,
+- .sr_enter_plus_exit_time_us = 11.102,
++ .sr_exit_time_us = 13.5,
++ .sr_enter_plus_exit_time_us = 16.5,
+ .valid = true,
+ },
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+index 377c4e53a2b37..5357620627afc 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+@@ -81,6 +81,11 @@ int dcn31_get_active_display_cnt_wa(
+ stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+ stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ tmds_present = true;
++
++ /* Checking stream / link detection ensuring that PHY is active*/
++ if (dc_is_dp_signal(stream->signal) && !stream->dpms_off)
++ display_count++;
++
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+@@ -157,6 +162,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
+ union display_idle_optimization_u idle_info = { 0 };
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
++ idle_info.idle_info.s0i2_rdy = 1;
+ dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+@@ -323,38 +329,38 @@ static struct clk_bw_params dcn31_bw_params = {
+
+ };
+
+-static struct wm_table ddr4_wm_table = {
++static struct wm_table ddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+- .sr_exit_time_us = 6.09,
+- .sr_enter_plus_exit_time_us = 7.14,
++ .sr_exit_time_us = 9,
++ .sr_enter_plus_exit_time_us = 11,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+- .sr_exit_time_us = 10.12,
+- .sr_enter_plus_exit_time_us = 11.48,
++ .sr_exit_time_us = 9,
++ .sr_enter_plus_exit_time_us = 11,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+- .sr_exit_time_us = 10.12,
+- .sr_enter_plus_exit_time_us = 11.48,
++ .sr_exit_time_us = 9,
++ .sr_enter_plus_exit_time_us = 11,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+- .sr_exit_time_us = 10.12,
+- .sr_enter_plus_exit_time_us = 11.48,
++ .sr_exit_time_us = 9,
++ .sr_enter_plus_exit_time_us = 11,
+ .valid = true,
+ },
+ }
+@@ -682,7 +688,7 @@ void dcn31_clk_mgr_construct(
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
+ dcn31_bw_params.wm_table = lpddr5_wm_table;
+ } else {
+- dcn31_bw_params.wm_table = ddr4_wm_table;
++ dcn31_bw_params.wm_table = ddr5_wm_table;
+ }
+ /* Saved clocks configured at boot for debug purposes */
+ dcn31_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+index 8c2b77eb94593..21d2cbc3cbb20 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+@@ -119,6 +119,16 @@ int dcn31_smu_send_msg_with_param(
+
+ result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
+
++ if (result == VBIOSSMC_Result_Failed) {
++ if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu &&
++ param == TABLE_WATERMARKS)
++ DC_LOG_WARNING("Watermarks table not configured properly by SMU");
++ else
++ ASSERT(0);
++ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
++ return -1;
++ }
++
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
+index c798c65d42765..634640d5c0ff4 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
+@@ -771,6 +771,7 @@ static bool dc_construct_ctx(struct dc *dc,
+
+ dc_ctx->perf_trace = dc_perf_trace_create();
+ if (!dc_ctx->perf_trace) {
++ kfree(dc_ctx);
+ ASSERT_CRITICAL(false);
+ return false;
+ }
+@@ -891,10 +892,13 @@ static bool dc_construct(struct dc *dc,
+ goto fail;
+ #ifdef CONFIG_DRM_AMD_DC_DCN
+ dc->clk_mgr->force_smu_not_present = init_params->force_smu_not_present;
+-#endif
+
+- if (dc->res_pool->funcs->update_bw_bounding_box)
++ if (dc->res_pool->funcs->update_bw_bounding_box) {
++ DC_FP_START();
+ dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params);
++ DC_FP_END();
++ }
++#endif
+
+ /* Creation of current_state must occur after dc->dml
+ * is initialized in dc_create_resource_pool because
+@@ -1118,6 +1122,8 @@ struct dc *dc_create(const struct dc_init_data *init_params)
+
+ dc->caps.max_dp_protocol_version = DP_VERSION_1_4;
+
++ dc->caps.max_otg_num = dc->res_pool->res_cap->num_timing_generator;
++
+ if (dc->res_pool->dmcu != NULL)
+ dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version;
+ }
+@@ -1783,6 +1789,11 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
+
+ post_surface_trace(dc);
+
++ if (dc->ctx->dce_version >= DCE_VERSION_MAX)
++ TRACE_DCN_CLOCK_STATE(&context->bw_ctx.bw.dcn.clk);
++ else
++ TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce);
++
+ if (is_flip_pending_in_pipes(dc, context))
+ return;
+
+@@ -2100,9 +2111,6 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
+ enum surface_update_type overall_type = UPDATE_TYPE_FAST;
+ union surface_update_flags *update_flags = &u->surface->update_flags;
+
+- if (u->flip_addr)
+- update_flags->bits.addr_update = 1;
+-
+ if (!is_surface_in_context(context, u->surface) || u->surface->force_full_update) {
+ update_flags->raw = 0xFFFFFFFF;
+ return UPDATE_TYPE_FULL;
+@@ -2452,11 +2460,8 @@ static void copy_stream_update_to_stream(struct dc *dc,
+ if (update->abm_level)
+ stream->abm_level = *update->abm_level;
+
+- if (update->periodic_interrupt0)
+- stream->periodic_interrupt0 = *update->periodic_interrupt0;
+-
+- if (update->periodic_interrupt1)
+- stream->periodic_interrupt1 = *update->periodic_interrupt1;
++ if (update->periodic_interrupt)
++ stream->periodic_interrupt = *update->periodic_interrupt;
+
+ if (update->gamut_remap)
+ stream->gamut_remap_matrix = *update->gamut_remap;
+@@ -2526,6 +2531,137 @@ static void copy_stream_update_to_stream(struct dc *dc,
+ }
+ }
+
++void dc_reset_state(struct dc *dc, struct dc_state *context)
++{
++ dc_resource_state_destruct(context);
++
++ /* clear the structure, but don't reset the reference count */
++ memset(context, 0, offsetof(struct dc_state, refcount));
++
++ init_state(dc, context);
++}
++
++static bool update_planes_and_stream_state(struct dc *dc,
++ struct dc_surface_update *srf_updates, int surface_count,
++ struct dc_stream_state *stream,
++ struct dc_stream_update *stream_update,
++ enum surface_update_type *new_update_type,
++ struct dc_state **new_context)
++{
++ struct dc_state *context;
++ int i, j;
++ enum surface_update_type update_type;
++ const struct dc_stream_status *stream_status;
++ struct dc_context *dc_ctx = dc->ctx;
++
++ stream_status = dc_stream_get_status(stream);
++
++ if (!stream_status) {
++ if (surface_count) /* Only an error condition if surf_count non-zero*/
++ ASSERT(false);
++
++ return false; /* Cannot commit surface to stream that is not committed */
++ }
++
++ context = dc->current_state;
++
++ update_type = dc_check_update_surfaces_for_stream(
++ dc, srf_updates, surface_count, stream_update, stream_status);
++
++ /* update current stream with the new updates */
++ copy_stream_update_to_stream(dc, context, stream, stream_update);
++
++ /* do not perform surface update if surface has invalid dimensions
++ * (all zero) and no scaling_info is provided
++ */
++ if (surface_count > 0) {
++ for (i = 0; i < surface_count; i++) {
++ if ((srf_updates[i].surface->src_rect.width == 0 ||
++ srf_updates[i].surface->src_rect.height == 0 ||
++ srf_updates[i].surface->dst_rect.width == 0 ||
++ srf_updates[i].surface->dst_rect.height == 0) &&
++ (!srf_updates[i].scaling_info ||
++ srf_updates[i].scaling_info->src_rect.width == 0 ||
++ srf_updates[i].scaling_info->src_rect.height == 0 ||
++ srf_updates[i].scaling_info->dst_rect.width == 0 ||
++ srf_updates[i].scaling_info->dst_rect.height == 0)) {
++ DC_ERROR("Invalid src/dst rects in surface update!\n");
++ return false;
++ }
++ }
++ }
++
++ if (update_type >= update_surface_trace_level)
++ update_surface_trace(dc, srf_updates, surface_count);
++
++ if (update_type >= UPDATE_TYPE_FULL) {
++ struct dc_plane_state *new_planes[MAX_SURFACES] = {0};
++
++ for (i = 0; i < surface_count; i++)
++ new_planes[i] = srf_updates[i].surface;
++
++ /* initialize scratch memory for building context */
++ context = dc_create_state(dc);
++ if (context == NULL) {
++ DC_ERROR("Failed to allocate new validate context!\n");
++ return false;
++ }
++
++ dc_resource_state_copy_construct(
++ dc->current_state, context);
++
++ /*remove old surfaces from context */
++ if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
++
++ BREAK_TO_DEBUGGER();
++ goto fail;
++ }
++
++ /* add surface to context */
++ if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
++
++ BREAK_TO_DEBUGGER();
++ goto fail;
++ }
++ }
++
++ /* save update parameters into surface */
++ for (i = 0; i < surface_count; i++) {
++ struct dc_plane_state *surface = srf_updates[i].surface;
++
++ copy_surface_update_to_plane(surface, &srf_updates[i]);
++
++ if (update_type >= UPDATE_TYPE_MED) {
++ for (j = 0; j < dc->res_pool->pipe_count; j++) {
++ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
++
++ if (pipe_ctx->plane_state != surface)
++ continue;
++
++ resource_build_scaling_params(pipe_ctx);
++ }
++ }
++ }
++
++ if (update_type == UPDATE_TYPE_FULL) {
++ if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
++ BREAK_TO_DEBUGGER();
++ goto fail;
++ }
++ }
++
++ *new_context = context;
++ *new_update_type = update_type;
++
++ return true;
++
++fail:
++ dc_release_state(context);
++
++ return false;
++
++}
++
+ static void commit_planes_do_stream_update(struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+@@ -2540,13 +2676,8 @@ static void commit_planes_do_stream_update(struct dc *dc,
+
+ if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe && pipe_ctx->stream == stream) {
+
+- if (stream_update->periodic_interrupt0 &&
+- dc->hwss.setup_periodic_interrupt)
+- dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE0);
+-
+- if (stream_update->periodic_interrupt1 &&
+- dc->hwss.setup_periodic_interrupt)
+- dc->hwss.setup_periodic_interrupt(dc, pipe_ctx, VLINE1);
++ if (stream_update->periodic_interrupt && dc->hwss.setup_periodic_interrupt)
++ dc->hwss.setup_periodic_interrupt(dc, pipe_ctx);
+
+ if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) ||
+ stream_update->vrr_infopacket ||
+@@ -2703,7 +2834,8 @@ static void commit_planes_for_stream(struct dc *dc,
+ #endif
+
+ if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
+- if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
++ if (top_pipe_to_program &&
++ top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
+ if (should_use_dmub_lock(stream->link)) {
+ union dmub_hw_lock_flags hw_locks = { 0 };
+ struct dmub_hw_lock_inst_flags inst_flags = { 0 };
+@@ -2927,6 +3059,152 @@ static void commit_planes_for_stream(struct dc *dc,
+ }
+ }
+
++static bool commit_minimal_transition_state(struct dc *dc,
++ struct dc_state *transition_base_context)
++{
++ struct dc_state *transition_context = dc_create_state(dc);
++ enum pipe_split_policy tmp_policy;
++ enum dc_status ret = DC_ERROR_UNEXPECTED;
++ unsigned int i, j;
++
++ if (!transition_context)
++ return false;
++
++ tmp_policy = dc->debug.pipe_split_policy;
++ dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
++
++ dc_resource_state_copy_construct(transition_base_context, transition_context);
++
++ //commit minimal state
++ if (dc->res_pool->funcs->validate_bandwidth(dc, transition_context, false)) {
++ for (i = 0; i < transition_context->stream_count; i++) {
++ struct dc_stream_status *stream_status = &transition_context->stream_status[i];
++
++ for (j = 0; j < stream_status->plane_count; j++) {
++ struct dc_plane_state *plane_state = stream_status->plane_states[j];
++
++ /* force vsync flip when reconfiguring pipes to prevent underflow
++ * and corruption
++ */
++ plane_state->flip_immediate = false;
++ }
++ }
++
++ ret = dc_commit_state_no_check(dc, transition_context);
++ }
++
++ //always release as dc_commit_state_no_check retains in good case
++ dc_release_state(transition_context);
++
++ //restore previous pipe split policy
++ dc->debug.pipe_split_policy = tmp_policy;
++
++ if (ret != DC_OK) {
++ //this should never happen
++ BREAK_TO_DEBUGGER();
++ return false;
++ }
++
++ //force full surface update
++ for (i = 0; i < dc->current_state->stream_count; i++) {
++ for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
++ dc->current_state->stream_status[i].plane_states[j]->update_flags.raw = 0xFFFFFFFF;
++ }
++ }
++
++ return true;
++}
++
++bool dc_update_planes_and_stream(struct dc *dc,
++ struct dc_surface_update *srf_updates, int surface_count,
++ struct dc_stream_state *stream,
++ struct dc_stream_update *stream_update)
++{
++ struct dc_state *context;
++ enum surface_update_type update_type;
++ int i;
++
++ /* In cases where MPO and split or ODM are used transitions can
++ * cause underflow. Apply stream configuration with minimal pipe
++ * split first to avoid unsupported transitions for active pipes.
++ */
++ bool force_minimal_pipe_splitting = false;
++ bool is_plane_addition = false;
++
++ struct dc_stream_status *cur_stream_status = stream_get_status(dc->current_state, stream);
++
++ if (cur_stream_status &&
++ dc->current_state->stream_count > 0 &&
++ dc->debug.pipe_split_policy != MPC_SPLIT_AVOID) {
++ /* determine if minimal transition is required */
++ if (cur_stream_status->plane_count > surface_count) {
++ force_minimal_pipe_splitting = true;
++ } else if (cur_stream_status->plane_count < surface_count) {
++ force_minimal_pipe_splitting = true;
++ is_plane_addition = true;
++ }
++ }
++
++ /* on plane addition, minimal state is the current one */
++ if (force_minimal_pipe_splitting && is_plane_addition &&
++ !commit_minimal_transition_state(dc, dc->current_state))
++ return false;
++
++ if (!update_planes_and_stream_state(
++ dc,
++ srf_updates,
++ surface_count,
++ stream,
++ stream_update,
++ &update_type,
++ &context))
++ return false;
++
++ /* on plane addition, minimal state is the new one */
++ if (force_minimal_pipe_splitting && !is_plane_addition) {
++ if (!commit_minimal_transition_state(dc, context)) {
++ dc_release_state(context);
++ return false;
++ }
++
++ update_type = UPDATE_TYPE_FULL;
++ }
++
++ commit_planes_for_stream(
++ dc,
++ srf_updates,
++ surface_count,
++ stream,
++ stream_update,
++ update_type,
++ context);
++
++ if (dc->current_state != context) {
++
++ /* Since memory free requires elevated IRQL, an interrupt
++ * request is generated by mem free. If this happens
++ * between freeing and reassigning the context, our vsync
++ * interrupt will call into dc and cause a memory
++ * corruption BSOD. Hence, we first reassign the context,
++ * then free the old context.
++ */
++
++ struct dc_state *old = dc->current_state;
++
++ dc->current_state = context;
++ dc_release_state(old);
++
++ // clear any forced full updates
++ for (i = 0; i < dc->res_pool->pipe_count; i++) {
++ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
++
++ if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
++ pipe_ctx->plane_state->force_full_update = false;
++ }
++ }
++ return true;
++}
++
+ void dc_commit_updates_for_stream(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+@@ -2968,6 +3246,14 @@ void dc_commit_updates_for_stream(struct dc *dc,
+ if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state)
+ new_pipe->plane_state->force_full_update = true;
+ }
++ } else if (update_type == UPDATE_TYPE_FAST && dc_ctx->dce_version >= DCE_VERSION_MAX) {
++ /*
++ * Previous frame finished and HW is ready for optimization.
++ *
++ * Only relevant for DCN behavior where we can guarantee the optimization
++ * is safe to apply - retain the legacy behavior for DCE.
++ */
++ dc_post_update_surfaces_to_stream(dc);
+ }
+
+
+@@ -3024,14 +3310,11 @@ void dc_commit_updates_for_stream(struct dc *dc,
+ pipe_ctx->plane_state->force_full_update = false;
+ }
+ }
+- /*let's use current_state to update watermark etc*/
+- if (update_type >= UPDATE_TYPE_FULL) {
+- dc_post_update_surfaces_to_stream(dc);
+
+- if (dc_ctx->dce_version >= DCE_VERSION_MAX)
+- TRACE_DCN_CLOCK_STATE(&context->bw_ctx.bw.dcn.clk);
+- else
+- TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce);
++ /* Legacy optimization path for DCE. */
++ if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < DCE_VERSION_MAX) {
++ dc_post_update_surfaces_to_stream(dc);
++ TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce);
+ }
+
+ return;
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+index 9039fb134db59..f858ae68aa5f6 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+@@ -92,8 +92,8 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
+ { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
+ 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
+ { COLOR_SPACE_YCBCR2020_TYPE,
+- { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2,
+- 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} },
++ { 0x1000, 0xF149, 0xFEB7, 0x1004, 0x0868, 0x15B2,
++ 0x01E6, 0x201, 0xFB88, 0xF478, 0x1000, 0x1004} },
+ { COLOR_SPACE_YCBCR709_BLACK_TYPE,
+ { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000,
+ 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} },
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+index 1e44b13c1c7de..b727bd7e039d7 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+@@ -1665,12 +1665,6 @@ struct dc_link *link_create(const struct link_init_data *init_params)
+ if (false == dc_link_construct(link, init_params))
+ goto construct_fail;
+
+- /*
+- * Must use preferred_link_setting, not reported_link_cap or verified_link_cap,
+- * since struct preferred_link_setting won't be reset after S3.
+- */
+- link->preferred_link_setting.dpcd_source_device_specific_field_support = true;
+-
+ return link;
+
+ construct_fail:
+@@ -1696,6 +1690,8 @@ static void enable_stream_features(struct pipe_ctx *pipe_ctx)
+ union down_spread_ctrl old_downspread;
+ union down_spread_ctrl new_downspread;
+
++ memset(&old_downspread, 0, sizeof(old_downspread));
++
+ core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL,
+ &old_downspread.raw, sizeof(old_downspread));
+
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+index 6d655e158267a..6777adb66f9d7 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+@@ -2075,7 +2075,7 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link)
+ return max_link_cap;
+ }
+
+-enum dc_status read_hpd_rx_irq_data(
++static enum dc_status read_hpd_rx_irq_data(
+ struct dc_link *link,
+ union hpd_irq_data *irq_data)
+ {
+@@ -2743,7 +2743,7 @@ void decide_link_settings(struct dc_stream_state *stream,
+ }
+
+ /*************************Short Pulse IRQ***************************/
+-static bool allow_hpd_rx_irq(const struct dc_link *link)
++bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link)
+ {
+ /*
+ * Don't handle RX IRQ unless one of following is met:
+@@ -3118,7 +3118,7 @@ static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video
+ &dpcd_pattern_type.value,
+ sizeof(dpcd_pattern_type));
+
+- channel_count = dpcd_test_mode.bits.channel_count + 1;
++ channel_count = min(dpcd_test_mode.bits.channel_count + 1, AUDIO_CHANNELS_COUNT);
+
+ // read pattern periods for requested channels when sawTooth pattern is requested
+ if (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH ||
+@@ -3177,7 +3177,7 @@ static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video
+ }
+ }
+
+-static void handle_automated_test(struct dc_link *link)
++void dc_link_dp_handle_automated_test(struct dc_link *link)
+ {
+ union test_request test_request;
+ union test_response test_response;
+@@ -3226,17 +3226,50 @@ static void handle_automated_test(struct dc_link *link)
+ sizeof(test_response));
+ }
+
+-bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss)
++void dc_link_dp_handle_link_loss(struct dc_link *link)
++{
++ int i;
++ struct pipe_ctx *pipe_ctx;
++
++ for (i = 0; i < MAX_PIPES; i++) {
++ pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
++ if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link)
++ break;
++ }
++
++ if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
++ return;
++
++ for (i = 0; i < MAX_PIPES; i++) {
++ pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
++ if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off &&
++ pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) {
++ core_link_disable_stream(pipe_ctx);
++ }
++ }
++
++ for (i = 0; i < MAX_PIPES; i++) {
++ pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
++ if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off &&
++ pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) {
++ core_link_enable_stream(link->dc->current_state, pipe_ctx);
++ }
++ }
++}
++
++bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd_irq_dpcd_data, bool *out_link_loss,
++ bool defer_handling, bool *has_left_work)
+ {
+ union hpd_irq_data hpd_irq_dpcd_data = { { { {0} } } };
+ union device_service_irq device_service_clear = { { 0 } };
+ enum dc_status result;
+ bool status = false;
+- struct pipe_ctx *pipe_ctx;
+- int i;
+
+ if (out_link_loss)
+ *out_link_loss = false;
++
++ if (has_left_work)
++ *has_left_work = false;
+ /* For use cases related to down stream connection status change,
+ * PSR and device auto test, refer to function handle_sst_hpd_irq
+ * in DAL2.1*/
+@@ -3268,11 +3301,14 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
+ &device_service_clear.raw,
+ sizeof(device_service_clear.raw));
+ device_service_clear.raw = 0;
+- handle_automated_test(link);
++ if (defer_handling && has_left_work)
++ *has_left_work = true;
++ else
++ dc_link_dp_handle_automated_test(link);
+ return false;
+ }
+
+- if (!allow_hpd_rx_irq(link)) {
++ if (!dc_link_dp_allow_hpd_rx_irq(link)) {
+ DC_LOG_HW_HPD_IRQ("%s: skipping HPD handling on %d\n",
+ __func__, link->link_index);
+ return false;
+@@ -3286,12 +3322,18 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
+ * so do not handle as a normal sink status change interrupt.
+ */
+
+- if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY)
++ if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY) {
++ if (defer_handling && has_left_work)
++ *has_left_work = true;
+ return true;
++ }
+
+ /* check if we have MST msg and return since we poll for it */
+- if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY)
++ if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
++ if (defer_handling && has_left_work)
++ *has_left_work = true;
+ return false;
++ }
+
+ /* For now we only handle 'Downstream port status' case.
+ * If we got sink count changed it means
+@@ -3308,29 +3350,10 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
+ sizeof(hpd_irq_dpcd_data),
+ "Status: ");
+
+- for (i = 0; i < MAX_PIPES; i++) {
+- pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+- if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link == link)
+- break;
+- }
+-
+- if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
+- return false;
+-
+-
+- for (i = 0; i < MAX_PIPES; i++) {
+- pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+- if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off &&
+- pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe)
+- core_link_disable_stream(pipe_ctx);
+- }
+-
+- for (i = 0; i < MAX_PIPES; i++) {
+- pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+- if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off &&
+- pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe)
+- core_link_enable_stream(link->dc->current_state, pipe_ctx);
+- }
++ if (defer_handling && has_left_work)
++ *has_left_work = true;
++ else
++ dc_link_dp_handle_link_loss(link);
+
+ status = false;
+ if (out_link_loss)
+@@ -3650,7 +3673,9 @@ bool dp_retrieve_lttpr_cap(struct dc_link *link)
+ lttpr_dpcd_data,
+ sizeof(lttpr_dpcd_data));
+ if (status != DC_OK) {
+- dm_error("%s: Read LTTPR caps data failed.\n", __func__);
++#if defined(CONFIG_DRM_AMD_DC_DCN)
++ DC_LOG_DP2("%s: Read LTTPR caps data failed.\n", __func__);
++#endif
+ return false;
+ }
+
+@@ -3678,6 +3703,14 @@ bool dp_retrieve_lttpr_cap(struct dc_link *link)
+ lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
+ DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
++ /* If this chip cap is set, at least one retimer must exist in the chain
++ * Override count to 1 if we receive a known bad count (0 or an invalid value) */
++ if (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN &&
++ (dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) {
++ ASSERT(0);
++ link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80;
++ }
++
+ /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
+ is_lttpr_present = (dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 &&
+ link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
+@@ -3913,6 +3946,26 @@ static bool retrieve_link_cap(struct dc_link *link)
+ dp_hw_fw_revision.ieee_fw_rev,
+ sizeof(dp_hw_fw_revision.ieee_fw_rev));
+
++ /* Quirk for Apple MBP 2018 15" Retina panels: wrong DP_MAX_LINK_RATE */
++ {
++ uint8_t str_mbp_2018[] = { 101, 68, 21, 103, 98, 97 };
++ uint8_t fwrev_mbp_2018[] = { 7, 4 };
++ uint8_t fwrev_mbp_2018_vega[] = { 8, 4 };
++
++ /* We also check for the firmware revision as 16,1 models have an
++ * identical device id and are incorrectly quirked otherwise.
++ */
++ if ((link->dpcd_caps.sink_dev_id == 0x0010fa) &&
++ !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2018,
++ sizeof(str_mbp_2018)) &&
++ (!memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018,
++ sizeof(fwrev_mbp_2018)) ||
++ !memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018_vega,
++ sizeof(fwrev_mbp_2018_vega)))) {
++ link->reported_link_cap.link_rate = LINK_RATE_RBR2;
++ }
++ }
++
+ memset(&link->dpcd_caps.dsc_caps, '\0',
+ sizeof(link->dpcd_caps.dsc_caps));
+ memset(&link->dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap));
+@@ -4690,7 +4743,7 @@ enum dc_status dp_set_fec_ready(struct dc_link *link, bool ready)
+ link_enc->funcs->fec_set_ready(link_enc, true);
+ link->fec_state = dc_link_fec_ready;
+ } else {
+- link_enc->funcs->fec_set_ready(link->link_enc, false);
++ link_enc->funcs->fec_set_ready(link_enc, false);
+ link->fec_state = dc_link_fec_not_ready;
+ dm_error("dpcd write failed to set fec_ready");
+ }
+@@ -4788,18 +4841,10 @@ void dpcd_set_source_specific_data(struct dc_link *link)
+
+ uint8_t hblank_size = (uint8_t)link->dc->caps.min_horizontal_blanking_period;
+
+- if (link->preferred_link_setting.dpcd_source_device_specific_field_support) {
+- result_write_min_hblank = core_link_write_dpcd(link,
+- DP_SOURCE_MINIMUM_HBLANK_SUPPORTED, (uint8_t *)(&hblank_size),
+- sizeof(hblank_size));
+-
+- if (result_write_min_hblank == DC_ERROR_UNEXPECTED)
+- link->preferred_link_setting.dpcd_source_device_specific_field_support = false;
+- } else {
+- DC_LOG_DC("Sink device does not support 00340h DPCD write. Skipping on purpose.\n");
+- }
++ result_write_min_hblank = core_link_write_dpcd(link,
++ DP_SOURCE_MINIMUM_HBLANK_SUPPORTED, (uint8_t *)(&hblank_size),
++ sizeof(hblank_size));
+ }
+-
+ DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
+ WPP_BIT_FLAG_DC_DETECTION_DP_CAPS,
+ "result=%u link_index=%u enum dce_version=%d DPCD=0x%04X min_hblank=%u branch_dev_id=0x%x branch_dev_name='%c%c%c%c%c%c'",
+diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+index a60396d5be445..fa4d671b5b2cc 100644
+--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+@@ -1062,12 +1062,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
+ * on certain displays, such as the Sharp 4k. 36bpp is needed
+ * to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and
+ * SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc
+- * precision on at least DCN display engines. However, at least
+- * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
+- * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
+- * did not show such problems, so this seems to be the exception.
++ * precision on DCN display engines, but apparently not for DCE, as
++ * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have
++ * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
++ * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel
++ * passthrough). Therefore only use 36 bpp on DCN where it is actually needed.
+ */
+- if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
++ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
+ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
+ else
+ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
+@@ -1433,6 +1434,9 @@ bool dc_remove_plane_from_context(
+ struct dc_stream_status *stream_status = NULL;
+ struct resource_pool *pool = dc->res_pool;
+
++ if (!plane_state)
++ return true;
++
+ for (i = 0; i < context->stream_count; i++)
+ if (context->streams[i] == stream) {
+ stream_status = &context->stream_status[i];
+@@ -1599,6 +1603,9 @@ static bool are_stream_backends_same(
+ if (is_timing_changed(stream_a, stream_b))
+ return false;
+
++ if (stream_a->signal != stream_b->signal)
++ return false;
++
+ if (stream_a->dpms_off != stream_b->dpms_off)
+ return false;
+
+@@ -1623,6 +1630,10 @@ bool dc_is_stream_unchanged(
+ if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param)
+ return false;
+
++ /*compare audio info*/
++ if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0)
++ return false;
++
+ return true;
+ }
+
+@@ -1795,9 +1806,6 @@ enum dc_status dc_remove_stream_from_ctx(
+ dc->res_pool,
+ del_pipe->stream_res.stream_enc,
+ false);
+- /* Release link encoder from stream in new dc_state. */
+- if (dc->res_pool->funcs->link_enc_unassign)
+- dc->res_pool->funcs->link_enc_unassign(new_ctx, del_pipe->stream);
+
+ if (del_pipe->stream_res.audio)
+ update_audio_usage(
+diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
+index 3ab52d9a82cf6..e0f58fab5e8ed 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -185,6 +185,7 @@ struct dc_caps {
+ struct dc_color_caps color;
+ bool vbios_lttpr_aware;
+ bool vbios_lttpr_enable;
++ uint32_t max_otg_num;
+ };
+
+ struct dc_bug_wa {
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+index 4f54bde1bb1c7..1948cd9427d7e 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+@@ -109,7 +109,6 @@ struct dc_link_settings {
+ enum dc_link_spread link_spread;
+ bool use_link_rate_set;
+ uint8_t link_rate_set;
+- bool dpcd_source_device_specific_field_support;
+ };
+
+ struct dc_lane_settings {
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
+index 83845d006c54a..9b7c32f7fd86f 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
+@@ -296,7 +296,8 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx);
+ * false - no change in Downstream port status. No further action required
+ * from DM. */
+ bool dc_link_handle_hpd_rx_irq(struct dc_link *dc_link,
+- union hpd_irq_data *hpd_irq_dpcd_data, bool *out_link_loss);
++ union hpd_irq_data *hpd_irq_dpcd_data, bool *out_link_loss,
++ bool defer_handling, bool *has_left_work);
+
+ /*
+ * On eDP links this function call will stall until T12 has elapsed.
+@@ -305,9 +306,9 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *dc_link,
+ */
+ bool dc_link_wait_for_t12(struct dc_link *link);
+
+-enum dc_status read_hpd_rx_irq_data(
+- struct dc_link *link,
+- union hpd_irq_data *irq_data);
++void dc_link_dp_handle_automated_test(struct dc_link *link);
++void dc_link_dp_handle_link_loss(struct dc_link *link);
++bool dc_link_dp_allow_hpd_rx_irq(const struct dc_link *link);
+
+ struct dc_sink_init_data;
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
+index b8ebc1f095389..3e606faff58f4 100644
+--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
++++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
+@@ -193,8 +193,7 @@ struct dc_stream_state {
+ /* DMCU info */
+ unsigned int abm_level;
+
+- struct periodic_interrupt_config periodic_interrupt0;
+- struct periodic_interrupt_config periodic_interrupt1;
++ struct periodic_interrupt_config periodic_interrupt;
+
+ /* from core_stream struct */
+ struct dc_context *ctx;
+@@ -260,8 +259,7 @@ struct dc_stream_update {
+ struct dc_info_packet *hdr_static_metadata;
+ unsigned int *abm_level;
+
+- struct periodic_interrupt_config *periodic_interrupt0;
+- struct periodic_interrupt_config *periodic_interrupt1;
++ struct periodic_interrupt_config *periodic_interrupt;
+
+ struct dc_info_packet *vrr_infopacket;
+ struct dc_info_packet *vsc_infopacket;
+@@ -290,6 +288,9 @@ bool dc_is_stream_scaling_unchanged(
+ struct dc_stream_state *old_stream, struct dc_stream_state *stream);
+
+ /*
++ * Setup stream attributes if no stream updates are provided
++ * there will be no impact on the stream parameters
++ *
+ * Set up surface attributes and associate to a stream
+ * The surfaces parameter is an absolute set of all surface active for the stream.
+ * If no surfaces are provided, the stream will be blanked; no memory read.
+@@ -298,8 +299,23 @@ bool dc_is_stream_scaling_unchanged(
+ * After this call:
+ * Surfaces attributes are programmed and configured to be composed into stream.
+ * This does not trigger a flip. No surface address is programmed.
++ *
+ */
++bool dc_update_planes_and_stream(struct dc *dc,
++ struct dc_surface_update *surface_updates, int surface_count,
++ struct dc_stream_state *dc_stream,
++ struct dc_stream_update *stream_update);
+
++/*
++ * Set up surface attributes and associate to a stream
++ * The surfaces parameter is an absolute set of all surface active for the stream.
++ * If no surfaces are provided, the stream will be blanked; no memory read.
++ * Any flip related attribute changes must be done through this interface.
++ *
++ * After this call:
++ * Surfaces attributes are programmed and configured to be composed into stream.
++ * This does not trigger a flip. No surface address is programmed.
++ */
+ void dc_commit_updates_for_stream(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+index 2c7eb982eabca..5f1b735da5063 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
++++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+@@ -545,9 +545,11 @@ static void dce112_get_pix_clk_dividers_helper (
+ switch (pix_clk_params->color_depth) {
+ case COLOR_DEPTH_101010:
+ actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2;
++ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
+ break;
+ case COLOR_DEPTH_121212:
+ actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2;
++ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
+ break;
+ case COLOR_DEPTH_161616:
+ actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
+@@ -1013,9 +1015,12 @@ static bool get_pixel_clk_frequency_100hz(
+ * not be programmed equal to DPREFCLK
+ */
+ modulo_hz = REG_READ(MODULO[inst]);
+- *pixel_clk_khz = div_u64((uint64_t)clock_hz*
+- clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
+- modulo_hz);
++ if (modulo_hz)
++ *pixel_clk_khz = div_u64((uint64_t)clock_hz*
++ clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
++ modulo_hz);
++ else
++ *pixel_clk_khz = 0;
+ } else {
+ /* NOTE: There is agreement with VBIOS here that MODULO is
+ * programmed equal to DPREFCLK, in which case PHASE will be
+diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
+index d9fd4ec60588f..670d5ab9d9984 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
++++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
+@@ -1009,7 +1009,7 @@ static void dce_transform_set_pixel_storage_depth(
+ color_depth = COLOR_DEPTH_101010;
+ pixel_depth = 0;
+ expan_mode = 1;
+- BREAK_TO_DEBUGGER();
++ DC_LOG_DC("The pixel depth %d is not valid, set COLOR_DEPTH_101010 instead.", depth);
+ break;
+ }
+
+@@ -1023,8 +1023,7 @@ static void dce_transform_set_pixel_storage_depth(
+ if (!(xfm_dce->lb_pixel_depth_supported & depth)) {
+ /*we should use unsupported capabilities
+ * unless it is required by w/a*/
+- DC_LOG_WARNING("%s: Capability not supported",
+- __func__);
++ DC_LOG_DC("%s: Capability not supported", __func__);
+ }
+ }
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+index 62d595ded8668..52142d272c868 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
++++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+@@ -1744,10 +1744,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
+ hws->funcs.edp_backlight_control(edp_link_with_sink, false);
+ }
+ /*resume from S3, no vbios posting, no need to power down again*/
++ clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
++
+ power_down_all_hw_blocks(dc);
+ disable_vga_and_power_gate_all_controllers(dc);
+ if (edp_link_with_sink && !keep_edp_vdd_on)
+ dc->hwss.edp_power_control(edp_link_with_sink, false);
++ clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
+ }
+ bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1);
+ }
+@@ -2108,7 +2111,8 @@ static void dce110_setup_audio_dto(
+ continue;
+ if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A)
+ continue;
+- if (pipe_ctx->stream_res.audio != NULL) {
++ if (pipe_ctx->stream_res.audio != NULL &&
++ pipe_ctx->stream_res.audio->enabled == false) {
+ struct audio_output audio_output;
+
+ build_audio_output(context, pipe_ctx, &audio_output);
+@@ -2156,7 +2160,8 @@ static void dce110_setup_audio_dto(
+ if (!dc_is_dp_signal(pipe_ctx->stream->signal))
+ continue;
+
+- if (pipe_ctx->stream_res.audio != NULL) {
++ if (pipe_ctx->stream_res.audio != NULL &&
++ pipe_ctx->stream_res.audio->enabled == false) {
+ struct audio_output audio_output;
+
+ build_audio_output(context, pipe_ctx, &audio_output);
+diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+index c65e4d125c8e2..013fca9b9c68c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+@@ -361,7 +361,8 @@ static const struct dce_audio_registers audio_regs[] = {
+ audio_regs(2),
+ audio_regs(3),
+ audio_regs(4),
+- audio_regs(5)
++ audio_regs(5),
++ audio_regs(6),
+ };
+
+ #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
+diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
+index dda596fa1cd76..fee331accc0e7 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
+@@ -23,7 +23,7 @@
+ # Makefile for the 'controller' sub-component of DAL.
+ # It provides the control and status of HW CRTC block.
+
+-CFLAGS_AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init)
++CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init)
+
+ DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \
+ dce60_resource.o
+diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+index dcfa0a3efa00d..bf72d3f60d7f4 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+@@ -1127,6 +1127,7 @@ struct resource_pool *dce60_create_resource_pool(
+ if (dce60_construct(num_virtual_links, dc, pool))
+ return &pool->base;
+
++ kfree(pool);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+@@ -1324,6 +1325,7 @@ struct resource_pool *dce61_create_resource_pool(
+ if (dce61_construct(num_virtual_links, dc, pool))
+ return &pool->base;
+
++ kfree(pool);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+@@ -1517,6 +1519,7 @@ struct resource_pool *dce64_create_resource_pool(
+ if (dce64_construct(num_virtual_links, dc, pool))
+ return &pool->base;
+
++ kfree(pool);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+index 725d92e40cd30..52d1f9746e8cb 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+@@ -1138,6 +1138,7 @@ struct resource_pool *dce80_create_resource_pool(
+ if (dce80_construct(num_virtual_links, dc, pool))
+ return &pool->base;
+
++ kfree(pool);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+@@ -1337,6 +1338,7 @@ struct resource_pool *dce81_create_resource_pool(
+ if (dce81_construct(num_virtual_links, dc, pool))
+ return &pool->base;
+
++ kfree(pool);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+index f4f423d0b8c3f..80595d7f060c3 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+@@ -940,6 +940,7 @@ static const struct hubbub_funcs hubbub1_funcs = {
+ .program_watermarks = hubbub1_program_watermarks,
+ .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
+ .allow_self_refresh_control = hubbub1_allow_self_refresh_control,
++ .verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high,
+ };
+
+ void hubbub1_construct(struct hubbub *hubbub,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+index df8a7718a85fc..aa5a1fa68da05 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+@@ -804,6 +804,32 @@ static void false_optc_underflow_wa(
+ tg->funcs->clear_optc_underflow(tg);
+ }
+
++static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
++{
++ struct pipe_ctx *other_pipe;
++ int vready_offset = pipe->pipe_dlg_param.vready_offset;
++
++ /* Always use the largest vready_offset of all connected pipes */
++ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++
++ return vready_offset;
++}
++
+ enum dc_status dcn10_enable_stream_timing(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+@@ -838,7 +864,7 @@ enum dc_status dcn10_enable_stream_timing(
+ pipe_ctx->stream_res.tg->funcs->program_timing(
+ pipe_ctx->stream_res.tg,
+ &stream->timing,
+- pipe_ctx->pipe_dlg_param.vready_offset,
++ calculate_vready_offset_for_group(pipe_ctx),
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width,
+@@ -1052,9 +1078,13 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc)
+
+ void dcn10_verify_allow_pstate_change_high(struct dc *dc)
+ {
++ struct hubbub *hubbub = dc->res_pool->hubbub;
+ static bool should_log_hw_state; /* prevent hw state log by default */
+
+- if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub)) {
++ if (!hubbub->funcs->verify_allow_pstate_change_high)
++ return;
++
++ if (!hubbub->funcs->verify_allow_pstate_change_high(hubbub)) {
+ int i = 0;
+
+ if (should_log_hw_state)
+@@ -1063,8 +1093,8 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc)
+ TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES);
+ BREAK_TO_DEBUGGER();
+ if (dcn10_hw_wa_force_recovery(dc)) {
+- /*check again*/
+- if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub))
++ /*check again*/
++ if (!hubbub->funcs->verify_allow_pstate_change_high(hubbub))
+ BREAK_TO_DEBUGGER();
+ }
+ }
+@@ -1435,6 +1465,9 @@ void dcn10_init_hw(struct dc *dc)
+ }
+ }
+
++ if (hws->funcs.enable_power_gating_plane)
++ hws->funcs.enable_power_gating_plane(dc->hwseq, true);
++
+ /* If taking control over from VBIOS, we may want to optimize our first
+ * mode set, so we need to skip powering down pipes until we know which
+ * pipes we want to use.
+@@ -1487,8 +1520,6 @@ void dcn10_init_hw(struct dc *dc)
+
+ REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
+ }
+- if (hws->funcs.enable_power_gating_plane)
+- hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
+ if (dc->clk_mgr->funcs->notify_wm_ranges)
+ dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
+@@ -1522,7 +1553,7 @@ void dcn10_power_down_on_boot(struct dc *dc)
+ for (i = 0; i < dc->link_count; i++) {
+ struct dc_link *link = dc->links[i];
+
+- if (link->link_enc->funcs->is_dig_enabled &&
++ if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
+ dc->hwss.power_down) {
+ dc->hwss.power_down(dc);
+@@ -2455,14 +2486,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
+ struct mpc *mpc = dc->res_pool->mpc;
+ struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
+
+- if (per_pixel_alpha)
+- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+- else
+- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
+-
+ blnd_cfg.overlap_only = false;
+ blnd_cfg.global_gain = 0xff;
+
++ if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
++ blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
++ blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
++ } else if (per_pixel_alpha) {
++ blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
++ } else {
++ blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
++ }
++
+ if (pipe_ctx->plane_state->global_alpha)
+ blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
+ else
+@@ -2767,7 +2802,7 @@ void dcn10_program_pipe(
+
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+- pipe_ctx->pipe_dlg_param.vready_offset,
++ calculate_vready_offset_for_group(pipe_ctx),
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width);
+@@ -3107,7 +3142,9 @@ void dcn10_wait_for_mpcc_disconnect(
+ if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) {
+ struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst);
+
+- res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
++ if (pipe_ctx->stream_res.tg &&
++ pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg))
++ res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
+ pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;
+ hubp->funcs->set_blank(hubp, true);
+ }
+@@ -3176,13 +3213,11 @@ void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
+
+ static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
+ {
+- struct pipe_ctx *test_pipe;
++ struct pipe_ctx *test_pipe, *split_pipe;
+ const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data;
+- const struct rect *r1 = &scl_data->recout, *r2;
+- int r1_r = r1->x + r1->width, r1_b = r1->y + r1->height, r2_r, r2_b;
++ struct rect r1 = scl_data->recout, r2, r2_half;
++ int r1_r = r1.x + r1.width, r1_b = r1.y + r1.height, r2_r, r2_b;
+ int cur_layer = pipe_ctx->plane_state->layer_index;
+- bool upper_pipe_exists = false;
+- struct fixed31_32 one = dc_fixpt_from_int(1);
+
+ /**
+ * Disable the cursor if there's another pipe above this with a
+@@ -3191,26 +3226,35 @@ static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
+ */
+ for (test_pipe = pipe_ctx->top_pipe; test_pipe;
+ test_pipe = test_pipe->top_pipe) {
+- if (!test_pipe->plane_state->visible)
++ // Skip invisible layer and pipe-split plane on same layer
++ if (!test_pipe->plane_state ||
++ !test_pipe->plane_state->visible ||
++ test_pipe->plane_state->layer_index == cur_layer)
+ continue;
+
+- r2 = &test_pipe->plane_res.scl_data.recout;
+- r2_r = r2->x + r2->width;
+- r2_b = r2->y + r2->height;
++ r2 = test_pipe->plane_res.scl_data.recout;
++ r2_r = r2.x + r2.width;
++ r2_b = r2.y + r2.height;
++ split_pipe = test_pipe;
+
+- if (r1->x >= r2->x && r1->y >= r2->y && r1_r <= r2_r && r1_b <= r2_b)
+- return true;
++ /**
++ * There is another half plane on same layer because of
++ * pipe-split, merge together per same height.
++ */
++ for (split_pipe = pipe_ctx->top_pipe; split_pipe;
++ split_pipe = split_pipe->top_pipe)
++ if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) {
++ r2_half = split_pipe->plane_res.scl_data.recout;
++ r2.x = (r2_half.x < r2.x) ? r2_half.x : r2.x;
++ r2.width = r2.width + r2_half.width;
++ r2_r = r2.x + r2.width;
++ break;
++ }
+
+- if (test_pipe->plane_state->layer_index < cur_layer)
+- upper_pipe_exists = true;
++ if (r1.x >= r2.x && r1.y >= r2.y && r1_r <= r2_r && r1_b <= r2_b)
++ return true;
+ }
+
+- // if plane scaled, assume an upper plane can handle cursor if it exists.
+- if (upper_pipe_exists &&
+- (scl_data->ratios.horz.value != one.value ||
+- scl_data->ratios.vert.value != one.value))
+- return true;
+-
+ return false;
+ }
+
+@@ -3508,7 +3552,7 @@ void dcn10_calc_vupdate_position(
+ {
+ const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
+ int vline_int_offset_from_vupdate =
+- pipe_ctx->stream->periodic_interrupt0.lines_offset;
++ pipe_ctx->stream->periodic_interrupt.lines_offset;
+ int vupdate_offset_from_vsync = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx);
+ int start_position;
+
+@@ -3533,18 +3577,10 @@ void dcn10_calc_vupdate_position(
+ static void dcn10_cal_vline_position(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+- enum vline_select vline,
+ uint32_t *start_line,
+ uint32_t *end_line)
+ {
+- enum vertical_interrupt_ref_point ref_point = INVALID_POINT;
+-
+- if (vline == VLINE0)
+- ref_point = pipe_ctx->stream->periodic_interrupt0.ref_point;
+- else if (vline == VLINE1)
+- ref_point = pipe_ctx->stream->periodic_interrupt1.ref_point;
+-
+- switch (ref_point) {
++ switch (pipe_ctx->stream->periodic_interrupt.ref_point) {
+ case START_V_UPDATE:
+ dcn10_calc_vupdate_position(
+ dc,
+@@ -3553,7 +3589,9 @@ static void dcn10_cal_vline_position(
+ end_line);
+ break;
+ case START_V_SYNC:
+- // Suppose to do nothing because vsync is 0;
++ // vsync is line 0 so start_line is just the requested line offset
++ *start_line = pipe_ctx->stream->periodic_interrupt.lines_offset;
++ *end_line = *start_line + 2;
+ break;
+ default:
+ ASSERT(0);
+@@ -3563,24 +3601,15 @@ static void dcn10_cal_vline_position(
+
+ void dcn10_setup_periodic_interrupt(
+ struct dc *dc,
+- struct pipe_ctx *pipe_ctx,
+- enum vline_select vline)
++ struct pipe_ctx *pipe_ctx)
+ {
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
++ uint32_t start_line = 0;
++ uint32_t end_line = 0;
+
+- if (vline == VLINE0) {
+- uint32_t start_line = 0;
+- uint32_t end_line = 0;
+-
+- dcn10_cal_vline_position(dc, pipe_ctx, vline, &start_line, &end_line);
++ dcn10_cal_vline_position(dc, pipe_ctx, &start_line, &end_line);
+
+- tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line);
+-
+- } else if (vline == VLINE1) {
+- pipe_ctx->stream_res.tg->funcs->setup_vertical_interrupt1(
+- tg,
+- pipe_ctx->stream->periodic_interrupt1.lines_offset);
+- }
++ tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line);
+ }
+
+ void dcn10_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+index 9ae07c77fdc01..0ef7bf7ddb75e 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+@@ -175,8 +175,7 @@ void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx);
+ void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx);
+ void dcn10_setup_periodic_interrupt(
+ struct dc *dc,
+- struct pipe_ctx *pipe_ctx,
+- enum vline_select vline);
++ struct pipe_ctx *pipe_ctx);
+ enum dc_status dcn10_set_clock(struct dc *dc,
+ enum dc_clock_type clock_type,
+ uint32_t clk_khz,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
+index 34001a30d449a..10e613ec7d24f 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
+@@ -78,6 +78,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
+ .get_clock = dcn10_get_clock,
+ .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+ .calc_vupdate_position = dcn10_calc_vupdate_position,
++ .power_down = dce110_power_down,
+ .set_backlight_level = dce110_set_backlight_level,
+ .set_abm_immediate_disable = dce110_set_abm_immediate_disable,
+ .set_pipe = dce110_set_pipe,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+index 11019c2c62ccb..d3681db36c30b 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+@@ -126,6 +126,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
+ while (tmp_mpcc != NULL) {
+ if (tmp_mpcc->dpp_id == dpp_id)
+ return tmp_mpcc;
++
++ /* avoid circular linked list */
++ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
++ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
++ break;
++
+ tmp_mpcc = tmp_mpcc->mpcc_bot;
+ }
+ return NULL;
+@@ -201,8 +207,9 @@ struct mpcc *mpc1_insert_plane(
+ /* check insert_above_mpcc exist in tree->opp_list */
+ struct mpcc *temp_mpcc = tree->opp_list;
+
+- while (temp_mpcc && temp_mpcc->mpcc_bot != insert_above_mpcc)
+- temp_mpcc = temp_mpcc->mpcc_bot;
++ if (temp_mpcc != insert_above_mpcc)
++ while (temp_mpcc && temp_mpcc->mpcc_bot != insert_above_mpcc)
++ temp_mpcc = temp_mpcc->mpcc_bot;
+ if (temp_mpcc == NULL)
+ return NULL;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+index 37848f4577b18..92fee47278e5a 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+@@ -480,6 +480,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable)
+ OTG_CLOCK_ON, 1,
+ 1, 1000);
+ } else {
++
++ //last chance to clear underflow, otherwise, it will always there due to clock is off.
++ if (optc->funcs->is_optc_underflow_occurred(optc) == true)
++ optc->funcs->clear_optc_underflow(optc);
++
+ REG_UPDATE_2(OTG_CLOCK_CONTROL,
+ OTG_CLOCK_GATE_DIS, 0,
+ OTG_CLOCK_EN, 0);
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+index a47ba1d45be92..bf2a8f53694b4 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+@@ -1513,6 +1513,7 @@ static void dcn20_update_dchubp_dpp(
+ /* Any updates are handled in dc interface, just need
+ * to apply existing for plane enable / opp change */
+ if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed
++ || pipe_ctx->update_flags.bits.plane_changed
+ || pipe_ctx->stream->update_flags.bits.gamut_remap
+ || pipe_ctx->stream->update_flags.bits.out_csc) {
+ /* dpp/cm gamut remap*/
+@@ -1563,6 +1564,31 @@ static void dcn20_update_dchubp_dpp(
+ hubp->funcs->set_blank(hubp, false);
+ }
+
++static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
++{
++ struct pipe_ctx *other_pipe;
++ int vready_offset = pipe->pipe_dlg_param.vready_offset;
++
++ /* Always use the largest vready_offset of all connected pipes */
++ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
++ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
++ }
++
++ return vready_offset;
++}
+
+ static void dcn20_program_pipe(
+ struct dc *dc,
+@@ -1581,7 +1607,7 @@ static void dcn20_program_pipe(
+
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+- pipe_ctx->pipe_dlg_param.vready_offset,
++ calculate_vready_offset_for_group(pipe_ctx),
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width);
+@@ -1764,7 +1790,7 @@ void dcn20_post_unlock_program_front_end(
+
+ for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000
+ && hubp->funcs->hubp_is_flip_pending(hubp); j++)
+- mdelay(1);
++ udelay(1);
+ }
+ }
+
+@@ -1874,7 +1900,7 @@ bool dcn20_update_bandwidth(
+
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+- pipe_ctx->pipe_dlg_param.vready_offset,
++ calculate_vready_offset_for_group(pipe_ctx),
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width);
+@@ -2297,14 +2323,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
+ struct mpc *mpc = dc->res_pool->mpc;
+ struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
+
+- if (per_pixel_alpha)
+- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
+- else
+- blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
+-
+ blnd_cfg.overlap_only = false;
+ blnd_cfg.global_gain = 0xff;
+
++ if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) {
++ blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN;
++ blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value;
++ } else if (per_pixel_alpha) {
++ blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA;
++ } else {
++ blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA;
++ }
++
+ if (pipe_ctx->plane_state->global_alpha)
+ blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value;
+ else
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+index 947eb0df3f125..142fc0a3a536c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+@@ -532,6 +532,12 @@ struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
+ while (tmp_mpcc != NULL) {
+ if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id)
+ return tmp_mpcc;
++
++ /* avoid circular linked list */
++ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
++ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
++ break;
++
+ tmp_mpcc = tmp_mpcc->mpcc_bot;
+ }
+ return NULL;
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+index e3e01b17c164e..ede11eb120d4f 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+@@ -1854,7 +1854,9 @@ static void swizzle_to_dml_params(
+ case DC_SW_VAR_D_X:
+ *sw_mode = dm_sw_var_d_x;
+ break;
+-
++ case DC_SW_VAR_R_X:
++ *sw_mode = dm_sw_var_r_x;
++ break;
+ default:
+ ASSERT(0); /* Not supported */
+ break;
+@@ -3152,7 +3154,7 @@ void dcn20_calculate_dlg_params(
+
+ context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
+ &context->res_ctx.pipe_ctx[i].rq_regs,
+- pipes[pipe_idx].pipe);
++ &pipes[pipe_idx].pipe);
+ pipe_idx++;
+ }
+ }
+@@ -3668,16 +3670,22 @@ static bool init_soc_bounding_box(struct dc *dc,
+ clock_limits_available = (status == PP_SMU_RESULT_OK);
+ }
+
+- if (clock_limits_available && uclk_states_available && num_states)
++ if (clock_limits_available && uclk_states_available && num_states) {
++ DC_FP_START();
+ dcn20_update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states);
+- else if (clock_limits_available)
++ DC_FP_END();
++ } else if (clock_limits_available) {
++ DC_FP_START();
+ dcn20_cap_soc_clocks(loaded_bb, max_clocks);
++ DC_FP_END();
++ }
+ }
+
+ loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator;
+ loaded_ip->max_num_dpp = pool->base.pipe_count;
++ DC_FP_START();
+ dcn20_patch_bounding_box(dc, loaded_bb);
+-
++ DC_FP_END();
+ return true;
+ }
+
+@@ -3697,8 +3705,6 @@ static bool dcn20_resource_construct(
+ enum dml_project dml_project_version =
+ get_dml_project_version(ctx->asic_id.hw_internal_rev);
+
+- DC_FP_START();
+-
+ ctx->dc_bios->regs = &bios_regs;
+ pool->base.funcs = &dcn20_res_pool_funcs;
+
+@@ -4047,12 +4053,10 @@ static bool dcn20_resource_construct(
+ pool->base.oem_device = NULL;
+ }
+
+- DC_FP_END();
+ return true;
+
+ create_fail:
+
+- DC_FP_END();
+ dcn20_resource_destruct(pool);
+
+ return false;
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+index 36044cb8ec834..1c0f56d8ba8bb 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+@@ -67,9 +67,15 @@ static uint32_t convert_and_clamp(
+ void dcn21_dchvm_init(struct hubbub *hubbub)
+ {
+ struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
+- uint32_t riommu_active;
++ uint32_t riommu_active, prefetch_done;
+ int i;
+
++ REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done);
++
++ if (prefetch_done) {
++ hubbub->riommu_active = true;
++ return;
++ }
+ //Init DCHVM block
+ REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+index fbbdf99761838..5b8274b8c3845 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+@@ -1428,6 +1428,7 @@ static struct clock_source *dcn21_clock_source_create(
+ return &clk_src->base;
+ }
+
++ kfree(clk_src);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+index 23a52d47e61c4..0601c17426af2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+@@ -355,8 +355,11 @@ void dpp3_set_cursor_attributes(
+ int cur_rom_en = 0;
+
+ if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
+- color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA)
+- cur_rom_en = 1;
++ color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
++ if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
++ cur_rom_en = 1;
++ }
++ }
+
+ REG_UPDATE_3(CURSOR0_CONTROL,
+ CUR0_MODE, color_format,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c
+index f4414de96acc5..152c9c5733f1c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c
+@@ -448,6 +448,7 @@ static const struct hubbub_funcs hubbub30_funcs = {
+ .program_watermarks = hubbub3_program_watermarks,
+ .allow_self_refresh_control = hubbub1_allow_self_refresh_control,
+ .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
++ .verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high,
+ .force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes,
+ .force_pstate_change_control = hubbub3_force_pstate_change_control,
+ .init_watermarks = hubbub3_init_watermarks,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+index f246125232482..33c2337c4edf3 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+@@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr(
+ VMID, address->vmid);
+
+ if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
+- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
++ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0);
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
+
+ } else {
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+index fafed1e4a998d..f834573758113 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+@@ -570,6 +570,9 @@ void dcn30_init_hw(struct dc *dc)
+ }
+ }
+
++ if (hws->funcs.enable_power_gating_plane)
++ hws->funcs.enable_power_gating_plane(dc->hwseq, true);
++
+ /* If taking control over from VBIOS, we may want to optimize our first
+ * mode set, so we need to skip powering down pipes until we know which
+ * pipes we want to use.
+@@ -647,8 +650,6 @@ void dcn30_init_hw(struct dc *dc)
+
+ REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
+ }
+- if (hws->funcs.enable_power_gating_plane)
+- hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
+ if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
+ dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
+@@ -1002,7 +1003,8 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc,
+ /* turning off DPG */
+ pipe_ctx->plane_res.hubp->funcs->set_blank(pipe_ctx->plane_res.hubp, false);
+ for (mpcc_pipe = pipe_ctx->bottom_pipe; mpcc_pipe; mpcc_pipe = mpcc_pipe->bottom_pipe)
+- mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false);
++ if (mpcc_pipe->plane_res.hubp)
++ mpcc_pipe->plane_res.hubp->funcs->set_blank(mpcc_pipe->plane_res.hubp, false);
+
+ stream_res->opp->funcs->opp_set_disp_pattern_generator(stream_res->opp, test_pattern, color_space,
+ color_depth, solid_color, width, height, offset);
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+index a0de309475a97..735c92a5aa36a 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+@@ -840,7 +840,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .timing_trace = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = true,
+- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
++ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .vsr_support = true,
+@@ -1856,7 +1856,7 @@ static struct pipe_ctx *dcn30_find_split_pipe(
+ return pipe;
+ }
+
+-static noinline bool dcn30_internal_validate_bw(
++noinline bool dcn30_internal_validate_bw(
+ struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+@@ -1879,7 +1879,6 @@ static noinline bool dcn30_internal_validate_bw(
+ dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+
+- DC_FP_START();
+ if (!pipe_cnt) {
+ out = true;
+ goto validate_out;
+@@ -2103,7 +2102,6 @@ validate_fail:
+ out = false;
+
+ validate_out:
+- DC_FP_END();
+ return out;
+ }
+
+@@ -2304,7 +2302,9 @@ bool dcn30_validate_bandwidth(struct dc *dc,
+
+ BW_VAL_TRACE_COUNT();
+
++ DC_FP_START();
+ out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
++ DC_FP_END();
+
+ if (pipe_cnt == 0)
+ goto validate_out;
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
+index b754b89beadfb..b92e4cc0232f2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
+@@ -55,6 +55,13 @@ unsigned int dcn30_calc_max_scaled_time(
+
+ bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context,
+ bool fast_validate);
++bool dcn30_internal_validate_bw(
++ struct dc *dc,
++ struct dc_state *context,
++ display_e2e_pipe_params_st *pipes,
++ int *pipe_cnt_out,
++ int *vlevel_out,
++ bool fast_validate);
+ void dcn30_calculate_wm_and_dlg(
+ struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c
+index 1e3bd2e9cdcc4..a046664e20316 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c
+@@ -60,6 +60,7 @@ static const struct hubbub_funcs hubbub301_funcs = {
+ .program_watermarks = hubbub3_program_watermarks,
+ .allow_self_refresh_control = hubbub1_allow_self_refresh_control,
+ .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
++ .verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high,
+ .force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes,
+ .force_pstate_change_control = hubbub3_force_pstate_change_control,
+ .hubbub_read_state = hubbub2_read_state,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+index 912285fdce18e..dea358b01791c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+@@ -863,7 +863,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .disable_clock_gate = true,
+ .disable_pplib_clock_request = true,
+ .disable_pplib_wm_range = true,
+- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
++ .pipe_split_policy = MPC_SPLIT_AVOID,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .vsr_support = true,
+@@ -1622,12 +1622,106 @@ static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b
+ dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30);
+ }
+
++static void calculate_wm_set_for_vlevel(
++ int vlevel,
++ struct wm_range_table_entry *table_entry,
++ struct dcn_watermarks *wm_set,
++ struct display_mode_lib *dml,
++ display_e2e_pipe_params_st *pipes,
++ int pipe_cnt)
++{
++ double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us;
++
++ ASSERT(vlevel < dml->soc.num_states);
++ /* only pipe 0 is read for voltage and dcf/soc clocks */
++ pipes[0].clks_cfg.voltage = vlevel;
++ pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz;
++ pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
++
++ dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
++ dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
++ dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
++
++ wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
++ wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
++ wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000;
++ wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
++ wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000;
++ wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000;
++ wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000;
++ wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000;
++ dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached;
++
++}
++
++static void dcn301_calculate_wm_and_dlg(
++ struct dc *dc, struct dc_state *context,
++ display_e2e_pipe_params_st *pipes,
++ int pipe_cnt,
++ int vlevel_req)
++{
++ int i, pipe_idx;
++ int vlevel, vlevel_max;
++ struct wm_range_table_entry *table_entry;
++ struct clk_bw_params *bw_params = dc->clk_mgr->bw_params;
++
++ ASSERT(bw_params);
++
++ vlevel_max = bw_params->clk_table.num_entries - 1;
++
++ /* WM Set D */
++ table_entry = &bw_params->wm_table.entries[WM_D];
++ if (table_entry->wm_type == WM_TYPE_RETRAINING)
++ vlevel = 0;
++ else
++ vlevel = vlevel_max;
++ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d,
++ &context->bw_ctx.dml, pipes, pipe_cnt);
++ /* WM Set C */
++ table_entry = &bw_params->wm_table.entries[WM_C];
++ vlevel = min(max(vlevel_req, 2), vlevel_max);
++ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
++ &context->bw_ctx.dml, pipes, pipe_cnt);
++ /* WM Set B */
++ table_entry = &bw_params->wm_table.entries[WM_B];
++ vlevel = min(max(vlevel_req, 1), vlevel_max);
++ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
++ &context->bw_ctx.dml, pipes, pipe_cnt);
++
++ /* WM Set A */
++ table_entry = &bw_params->wm_table.entries[WM_A];
++ vlevel = min(vlevel_req, vlevel_max);
++ calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a,
++ &context->bw_ctx.dml, pipes, pipe_cnt);
++
++ for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
++ if (!context->res_ctx.pipe_ctx[i].stream)
++ continue;
++
++ pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
++ pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
++
++ if (dc->config.forced_clocks) {
++ pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
++ pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
++ }
++ if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
++ pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
++ if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
++ pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
++
++ pipe_idx++;
++ }
++
++ dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
++}
++
+ static struct resource_funcs dcn301_res_pool_funcs = {
+ .destroy = dcn301_destroy_resource_pool,
+ .link_enc_create = dcn301_link_encoder_create,
+ .panel_cntl_create = dcn301_panel_cntl_create,
+ .validate_bandwidth = dcn30_validate_bandwidth,
+- .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg,
++ .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg,
+ .update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
+ .populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
+ .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+index 7d3ff5d444023..2292bb82026e2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+@@ -211,7 +211,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .timing_trace = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = true,
+- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
++ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .vsr_support = true,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+index dd38796ba30ad..9d9b0d343c6b3 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+@@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .timing_trace = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = true,
+- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
++ .pipe_split_policy = MPC_SPLIT_AVOID,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .vsr_support = true,
+@@ -500,7 +500,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
+ int afmt_inst;
+
+ /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
+- if (eng_id <= ENGINE_ID_DIGE) {
++ if (eng_id <= ENGINE_ID_DIGB) {
+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+ } else
+@@ -1344,6 +1344,20 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
+ dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz;
+ dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz;
+ }
++
++ // WA: patch strobe modes to compensate for DCN303 BW issue
++ if (dcn3_03_soc.num_chans <= 4) {
++ for (i = 0; i < dcn3_03_soc.num_states; i++) {
++ if (dcn3_03_soc.clock_limits[i].dram_speed_mts > 1700)
++ break;
++
++ if (dcn3_03_soc.clock_limits[i].dram_speed_mts >= 1500) {
++ dcn3_03_soc.clock_limits[i].dcfclk_mhz = 100;
++ dcn3_03_soc.clock_limits[i].fabricclk_mhz = 100;
++ }
++ }
++ }
++
+ /* re-init DML with updated bb */
+ dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30);
+ if (dc->current_state)
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+index b0892443fbd57..c7c27a605f159 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+@@ -168,9 +168,7 @@ void enc31_hw_init(struct link_encoder *enc)
+ AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3
+ AUX_RX_DETECTION_THRESHOLD [30:28] = 1
+ */
+- AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110);
+-
+- AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a);
++ // dmub will read AUX_DPHY_RX_CONTROL0/AUX_DPHY_TX_CONTROL from vbios table in dp_aux_init
+
+ //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32;
+ // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
+index 90c73a1cb9861..208d2dc8b1d1a 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
+@@ -24,6 +24,7 @@
+ */
+
+
++#include <linux/delay.h>
+ #include "dcn30/dcn30_hubbub.h"
+ #include "dcn31_hubbub.h"
+ #include "dm_services.h"
+@@ -138,8 +139,11 @@ static uint32_t convert_and_clamp(
+ ret_val = wm_ns * refclk_mhz;
+ ret_val /= 1000;
+
+- if (ret_val > clamp_value)
++ if (ret_val > clamp_value) {
++ /* clamping WMs is abnormal, unexpected and may lead to underflow*/
++ ASSERT(0);
+ ret_val = clamp_value;
++ }
+
+ return ret_val;
+ }
+@@ -159,7 +163,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->a.urgent_ns > hubbub2->watermarks.a.urgent_ns) {
+ hubbub2->watermarks.a.urgent_ns = watermarks->a.urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->a.urgent_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value);
+
+@@ -193,7 +197,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->a.urgent_latency_ns > hubbub2->watermarks.a.urgent_latency_ns) {
+ hubbub2->watermarks.a.urgent_latency_ns = watermarks->a.urgent_latency_ns;
+ prog_wm_value = convert_and_clamp(watermarks->a.urgent_latency_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0,
+ DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, prog_wm_value);
+ } else if (watermarks->a.urgent_latency_ns < hubbub2->watermarks.a.urgent_latency_ns)
+@@ -203,7 +207,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->b.urgent_ns > hubbub2->watermarks.b.urgent_ns) {
+ hubbub2->watermarks.b.urgent_ns = watermarks->b.urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->b.urgent_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value);
+
+@@ -237,7 +241,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->b.urgent_latency_ns > hubbub2->watermarks.b.urgent_latency_ns) {
+ hubbub2->watermarks.b.urgent_latency_ns = watermarks->b.urgent_latency_ns;
+ prog_wm_value = convert_and_clamp(watermarks->b.urgent_latency_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0,
+ DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, prog_wm_value);
+ } else if (watermarks->b.urgent_latency_ns < hubbub2->watermarks.b.urgent_latency_ns)
+@@ -247,7 +251,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->c.urgent_ns > hubbub2->watermarks.c.urgent_ns) {
+ hubbub2->watermarks.c.urgent_ns = watermarks->c.urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->c.urgent_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, 0,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value);
+
+@@ -281,7 +285,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->c.urgent_latency_ns > hubbub2->watermarks.c.urgent_latency_ns) {
+ hubbub2->watermarks.c.urgent_latency_ns = watermarks->c.urgent_latency_ns;
+ prog_wm_value = convert_and_clamp(watermarks->c.urgent_latency_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, 0,
+ DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, prog_wm_value);
+ } else if (watermarks->c.urgent_latency_ns < hubbub2->watermarks.c.urgent_latency_ns)
+@@ -291,7 +295,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->d.urgent_ns > hubbub2->watermarks.d.urgent_ns) {
+ hubbub2->watermarks.d.urgent_ns = watermarks->d.urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->d.urgent_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, 0,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value);
+
+@@ -325,7 +329,7 @@ static bool hubbub31_program_urgent_watermarks(
+ if (safe_to_lower || watermarks->d.urgent_latency_ns > hubbub2->watermarks.d.urgent_latency_ns) {
+ hubbub2->watermarks.d.urgent_latency_ns = watermarks->d.urgent_latency_ns;
+ prog_wm_value = convert_and_clamp(watermarks->d.urgent_latency_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0x3fff);
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, 0,
+ DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, prog_wm_value);
+ } else if (watermarks->d.urgent_latency_ns < hubbub2->watermarks.d.urgent_latency_ns)
+@@ -351,7 +355,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
+@@ -367,7 +371,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->a.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
+@@ -383,7 +387,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_A calculated =%d\n"
+@@ -399,7 +403,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->a.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_A calculated =%d\n"
+@@ -416,7 +420,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
+@@ -432,7 +436,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->b.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
+@@ -448,7 +452,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_B calculated =%d\n"
+@@ -464,7 +468,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->b.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_B calculated =%d\n"
+@@ -481,7 +485,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
+@@ -497,7 +501,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->c.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
+@@ -513,7 +517,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_C calculated =%d\n"
+@@ -529,7 +533,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->c.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_C calculated =%d\n"
+@@ -546,7 +550,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
+@@ -562,7 +566,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->d.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_exit_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
+@@ -578,7 +582,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_D calculated =%d\n"
+@@ -594,7 +598,7 @@ static bool hubbub31_program_stutter_watermarks(
+ watermarks->d.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_exit_z8_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_D calculated =%d\n"
+@@ -625,7 +629,7 @@ static bool hubbub31_program_pstate_watermarks(
+ watermarks->a.cstate_pstate.pstate_change_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.pstate_change_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
+@@ -642,7 +646,7 @@ static bool hubbub31_program_pstate_watermarks(
+ watermarks->b.cstate_pstate.pstate_change_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.pstate_change_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
+@@ -659,7 +663,7 @@ static bool hubbub31_program_pstate_watermarks(
+ watermarks->c.cstate_pstate.pstate_change_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.pstate_change_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
+@@ -676,7 +680,7 @@ static bool hubbub31_program_pstate_watermarks(
+ watermarks->d.cstate_pstate.pstate_change_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.pstate_change_ns,
+- refclk_mhz, 0x1fffff);
++ refclk_mhz, 0xffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, 0,
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_D calculated =%d\n"
+@@ -946,6 +950,65 @@ static void hubbub31_get_dchub_ref_freq(struct hubbub *hubbub,
+ }
+ }
+
++static bool hubbub31_verify_allow_pstate_change_high(struct hubbub *hubbub)
++{
++ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
++
++ /*
++ * Pstate latency is ~20us so if we wait over 40us and pstate allow
++ * still not asserted, we are probably stuck and going to hang
++ */
++ const unsigned int pstate_wait_timeout_us = 100;
++ const unsigned int pstate_wait_expected_timeout_us = 40;
++
++ static unsigned int max_sampled_pstate_wait_us; /* data collection */
++ static bool forced_pstate_allow; /* help with revert wa */
++
++ unsigned int debug_data = 0;
++ unsigned int i;
++
++ if (forced_pstate_allow) {
++ /* we hacked to force pstate allow to prevent hang last time
++ * we verify_allow_pstate_change_high. so disable force
++ * here so we can check status
++ */
++ REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL,
++ DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, 0,
++ DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, 0);
++ forced_pstate_allow = false;
++ }
++
++ REG_WRITE(DCHUBBUB_TEST_DEBUG_INDEX, hubbub2->debug_test_index_pstate);
++
++ for (i = 0; i < pstate_wait_timeout_us; i++) {
++ debug_data = REG_READ(DCHUBBUB_TEST_DEBUG_DATA);
++
++ /* Debug bit is specific to ASIC. */
++ if (debug_data & (1 << 26)) {
++ if (i > pstate_wait_expected_timeout_us)
++ DC_LOG_WARNING("pstate took longer than expected ~%dus\n", i);
++ return true;
++ }
++ if (max_sampled_pstate_wait_us < i)
++ max_sampled_pstate_wait_us = i;
++
++ udelay(1);
++ }
++
++ /* force pstate allow to prevent system hang
++ * and break to debugger to investigate
++ */
++ REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL,
++ DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, 1,
++ DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, 1);
++ forced_pstate_allow = true;
++
++ DC_LOG_WARNING("pstate TEST_DEBUG_DATA: 0x%X\n",
++ debug_data);
++
++ return false;
++}
++
+ static const struct hubbub_funcs hubbub31_funcs = {
+ .update_dchub = hubbub2_update_dchub,
+ .init_dchub_sys_ctx = hubbub31_init_dchub_sys_ctx,
+@@ -958,6 +1021,7 @@ static const struct hubbub_funcs hubbub31_funcs = {
+ .program_watermarks = hubbub31_program_watermarks,
+ .allow_self_refresh_control = hubbub1_allow_self_refresh_control,
+ .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
++ .verify_allow_pstate_change_high = hubbub31_verify_allow_pstate_change_high,
+ .program_det_size = dcn31_program_det_size,
+ .program_compbuf_size = dcn31_program_compbuf_size,
+ .init_crb = dcn31_init_crb,
+@@ -979,5 +1043,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31,
+ hubbub31->detile_buf_size = det_size_kb * 1024;
+ hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024;
+ hubbub31->crb_size_segs = config_return_buffer_size_kb / DCN31_CRB_SEGMENT_SIZE_KB;
++
++ hubbub31->debug_test_index_pstate = 0x6;
+ }
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
+index 53b792b997b7e..127055044cf1a 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
+@@ -79,6 +79,7 @@ static struct hubp_funcs dcn31_hubp_funcs = {
+ .hubp_init = hubp3_init,
+ .set_unbounded_requesting = hubp31_set_unbounded_requesting,
+ .hubp_soft_reset = hubp31_soft_reset,
++ .hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_in_blank = hubp1_in_blank,
+ };
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+index 3afa1159a5f7d..b72d080b302a1 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+@@ -204,6 +204,9 @@ void dcn31_init_hw(struct dc *dc)
+ }
+ }
+
++ if (hws->funcs.enable_power_gating_plane)
++ hws->funcs.enable_power_gating_plane(dc->hwseq, true);
++
+ /* If taking control over from VBIOS, we may want to optimize our first
+ * mode set, so we need to skip powering down pipes until we know which
+ * pipes we want to use.
+@@ -287,8 +290,6 @@ void dcn31_init_hw(struct dc *dc)
+
+ REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
+ }
+- if (hws->funcs.enable_power_gating_plane)
+- hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
+ if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
+ dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+index 40011cd3c8ef0..4e9fe090b770a 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+@@ -100,6 +100,8 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
+ .z10_save_init = dcn31_z10_save_init,
+ .is_abm_supported = dcn31_is_abm_supported,
+ .set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
++ .optimize_pwr_state = dcn21_optimize_pwr_state,
++ .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+ .update_visual_confirm_color = dcn20_update_visual_confirm_color,
+ };
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+index 79e92ecca96c1..e224c52132581 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+@@ -352,6 +352,14 @@ static const struct dce110_clk_src_regs clk_src_regs[] = {
+ clk_src_regs(3, D),
+ clk_src_regs(4, E)
+ };
++/*pll_id being rempped in dmub, in driver it is logical instance*/
++static const struct dce110_clk_src_regs clk_src_regs_b0[] = {
++ clk_src_regs(0, A),
++ clk_src_regs(1, B),
++ clk_src_regs(2, F),
++ clk_src_regs(3, G),
++ clk_src_regs(4, E)
++};
+
+ static const struct dce110_clk_src_shift cs_shift = {
+ CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT)
+@@ -462,7 +470,8 @@ static const struct dcn30_afmt_mask afmt_mask = {
+ SE_DCN3_REG_LIST(id)\
+ }
+
+-static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
++/* Some encoders won't be initialized here - but they're logical, not physical. */
++static const struct dcn10_stream_enc_registers stream_enc_regs[ENGINE_ID_COUNT] = {
+ stream_enc_regs(0),
+ stream_enc_regs(1),
+ stream_enc_regs(2),
+@@ -923,7 +932,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .timing_trace = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = false,
+- .pipe_split_policy = MPC_SPLIT_AVOID,
++ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .vsr_support = true,
+@@ -931,7 +940,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .max_downscale_src_width = 4096,/*upto true 4K*/
+ .disable_pplib_wm_range = false,
+ .scl_reset_length10 = true,
+- .sanity_checks = false,
++ .sanity_checks = true,
+ .underflow_assert_delay_us = 0xFFFFFFFF,
+ .dwb_fi_phase = -1, // -1 = disable,
+ .dmub_command_table = true,
+@@ -948,6 +957,7 @@ static const struct dc_debug_options debug_defaults_drv = {
+ .optc = false,
+ }
+ },
++ .disable_z10 = true,
+ .optimize_edp_link_rate = true,
+ .enable_sw_cntl_psr = true,
+ };
+@@ -1284,12 +1294,6 @@ static struct stream_encoder *dcn31_stream_encoder_create(
+ if (!enc1 || !vpg || !afmt)
+ return NULL;
+
+- if (ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
+- ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
+- if ((eng_id == ENGINE_ID_DIGC) || (eng_id == ENGINE_ID_DIGD))
+- eng_id = eng_id + 3; // For B0 only. C->F, D->G.
+- }
+-
+ dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios,
+ eng_id, vpg, afmt,
+ &stream_enc_regs[eng_id],
+@@ -1660,6 +1664,15 @@ static void dcn31_calculate_wm_and_dlg_fp(
+ if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
+ dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
+
++ /* We don't recalculate clocks for 0 pipe configs, which can block
++ * S0i3 as high clocks will block low power states
++ * Override any clocks that can block S0i3 to min here
++ */
++ if (pipe_cnt == 0) {
++ context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0
++ return;
++ }
++
+ pipes[0].clks_cfg.voltage = vlevel;
+ pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
+ pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+@@ -1785,6 +1798,60 @@ static void dcn31_calculate_wm_and_dlg(
+ DC_FP_END();
+ }
+
++bool dcn31_validate_bandwidth(struct dc *dc,
++ struct dc_state *context,
++ bool fast_validate)
++{
++ bool out = false;
++
++ BW_VAL_TRACE_SETUP();
++
++ int vlevel = 0;
++ int pipe_cnt = 0;
++ display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
++ DC_LOGGER_INIT(dc->ctx->logger);
++
++ BW_VAL_TRACE_COUNT();
++
++ DC_FP_START();
++ out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
++ DC_FP_END();
++
++ // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg
++ if (pipe_cnt == 0)
++ fast_validate = false;
++
++ if (!out)
++ goto validate_fail;
++
++ BW_VAL_TRACE_END_VOLTAGE_LEVEL();
++
++ if (fast_validate) {
++ BW_VAL_TRACE_SKIP(fast);
++ goto validate_out;
++ }
++
++ dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
++
++ BW_VAL_TRACE_END_WATERMARKS();
++
++ goto validate_out;
++
++validate_fail:
++ DC_LOG_WARNING("Mode Validation Warning: %s failed alidation.\n",
++ dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
++
++ BW_VAL_TRACE_SKIP(fail);
++ out = false;
++
++validate_out:
++ kfree(pipes);
++
++ BW_VAL_TRACE_FINISH();
++
++ return out;
++}
++
+ static struct dc_cap_funcs cap_funcs = {
+ .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
+ };
+@@ -1867,7 +1934,7 @@ static struct resource_funcs dcn31_res_pool_funcs = {
+ .link_encs_assign = link_enc_cfg_link_encs_assign,
+ .link_enc_unassign = link_enc_cfg_link_enc_unassign,
+ .panel_cntl_create = dcn31_panel_cntl_create,
+- .validate_bandwidth = dcn30_validate_bandwidth,
++ .validate_bandwidth = dcn31_validate_bandwidth,
+ .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg,
+ .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
+ .populate_dml_pipes = dcn31_populate_dml_pipes_from_context,
+@@ -2019,14 +2086,27 @@ static bool dcn31_resource_construct(
+ dcn30_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL1,
+ &clk_src_regs[1], false);
+- pool->base.clock_sources[DCN31_CLK_SRC_PLL2] =
++ /*move phypllx_pixclk_resync to dmub next*/
++ if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
++ pool->base.clock_sources[DCN31_CLK_SRC_PLL2] =
++ dcn30_clock_source_create(ctx, ctx->dc_bios,
++ CLOCK_SOURCE_COMBO_PHY_PLL2,
++ &clk_src_regs_b0[2], false);
++ pool->base.clock_sources[DCN31_CLK_SRC_PLL3] =
++ dcn30_clock_source_create(ctx, ctx->dc_bios,
++ CLOCK_SOURCE_COMBO_PHY_PLL3,
++ &clk_src_regs_b0[3], false);
++ } else {
++ pool->base.clock_sources[DCN31_CLK_SRC_PLL2] =
+ dcn30_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL2,
+ &clk_src_regs[2], false);
+- pool->base.clock_sources[DCN31_CLK_SRC_PLL3] =
++ pool->base.clock_sources[DCN31_CLK_SRC_PLL3] =
+ dcn30_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL3,
+ &clk_src_regs[3], false);
++ }
++
+ pool->base.clock_sources[DCN31_CLK_SRC_PLL4] =
+ dcn30_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL4,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+index 93571c9769967..cc4bed675588c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+@@ -39,4 +39,35 @@ struct resource_pool *dcn31_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc);
+
++/*temp: B0 specific before switch to dcn313 headers*/
++#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL
++#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e
++#define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1
++#define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f
++#define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1
++
++//PHYPLLF_PIXCLK_RESYNC_CNTL
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L
++#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L
++
++//PHYPLLG_PIXCLK_RESYNC_CNTL
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L
++#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L
++#endif
+ #endif /* _DCN31_RESOURCE_H_ */
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+index 56055df2e8d2e..9009b92490f34 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
+@@ -70,6 +70,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
+ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
+ CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
+ CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
++CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
+ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
+ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
+ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
+@@ -84,6 +85,7 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcfla
+ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
+ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
+ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
++CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
+ endif
+ CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
+ CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
+@@ -99,6 +101,7 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
+ DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
+ DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
+ DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
++DML += dsc/rc_calc_fpu.o
+ endif
+
+ AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML))
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+index d3b5b6fedf042..6266b0788387e 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+@@ -3897,14 +3897,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
+ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+
+- locals->ODMCombineEnablePerState[i][k] = false;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
+ if (mode_lib->vba.ODMCapability) {
+ if (locals->PlaneRequiredDISPCLKWithoutODMCombine > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ } else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ }
+ }
+@@ -3957,7 +3957,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ locals->RequiredDISPCLK[i][j] = 0.0;
+ locals->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
+- locals->ODMCombineEnablePerState[i][k] = false;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
+ locals->NoOfDPP[i][j][k] = 1;
+ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+index 63bbdf8b8678b..0053a6d5178c9 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+@@ -4008,17 +4008,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
+ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
+ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+
+- locals->ODMCombineEnablePerState[i][k] = false;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
+ if (mode_lib->vba.ODMCapability) {
+ if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN20_MAX_DSC_IMAGE_WIDTH)) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ } else if (locals->HActive[k] > DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ }
+ }
+@@ -4071,7 +4071,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
+ locals->RequiredDISPCLK[i][j] = 0.0;
+ locals->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
+- locals->ODMCombineEnablePerState[i][k] = false;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
+ locals->NoOfDPP[i][j][k] = 1;
+ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+index 2091dd8c252da..8c168f348a27f 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+@@ -768,12 +768,12 @@ static void dml20_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
+
+ void dml20_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param)
++ const display_pipe_params_st *pipe_param)
+ {
+ display_rq_params_st rq_param = {0};
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+- dml20_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param.src);
++ dml20_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param->src);
+ extract_rq_regs(mode_lib, rq_regs, rq_param);
+
+ print__rq_regs_st(mode_lib, *rq_regs);
+@@ -1549,7 +1549,7 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
+ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
+index d0b90947f5409..8b23867e97c18 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.h
+@@ -43,7 +43,7 @@ struct display_mode_lib;
+ void dml20_rq_dlg_get_rq_reg(
+ struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param);
++ const display_pipe_params_st *pipe_param);
+
+
+ // Function: dml_rq_dlg_get_dlg_reg
+@@ -61,7 +61,7 @@ void dml20_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+index 1a0c14e465faa..26ececfd40cdc 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+@@ -768,12 +768,12 @@ static void dml20v2_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
+
+ void dml20v2_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param)
++ const display_pipe_params_st *pipe_param)
+ {
+ display_rq_params_st rq_param = {0};
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+- dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param.src);
++ dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param->src);
+ extract_rq_regs(mode_lib, rq_regs, rq_param);
+
+ print__rq_regs_st(mode_lib, *rq_regs);
+@@ -1550,7 +1550,7 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
+ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
+index 27cf8bed9376f..2b4e46ea1c3df 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h
+@@ -43,7 +43,7 @@ struct display_mode_lib;
+ void dml20v2_rq_dlg_get_rq_reg(
+ struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param);
++ const display_pipe_params_st *pipe_param);
+
+
+ // Function: dml_rq_dlg_get_dlg_reg
+@@ -61,7 +61,7 @@ void dml20v2_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+index 4136eb8256cb5..26f839ce710f5 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+@@ -3979,17 +3979,17 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
+ * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+
+- locals->ODMCombineEnablePerState[i][k] = false;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
+ if (mode_lib->vba.ODMCapability) {
+ if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN21_MAX_DSC_IMAGE_WIDTH)) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ } else if (locals->HActive[k] > DCN21_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
+- locals->ODMCombineEnablePerState[i][k] = true;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
+ mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
+ }
+ }
+@@ -4042,7 +4042,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ locals->RequiredDISPCLK[i][j] = 0.0;
+ locals->DISPCLK_DPPCLK_Support[i][j] = true;
+ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
+- locals->ODMCombineEnablePerState[i][k] = false;
++ locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
+ if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
+ locals->NoOfDPP[i][j][k] = 1;
+ locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
+@@ -5218,7 +5218,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ mode_lib->vba.ODMCombineEnabled[k] =
+ locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
+ } else {
+- mode_lib->vba.ODMCombineEnabled[k] = false;
++ mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled;
+ }
+ mode_lib->vba.DSCEnabled[k] =
+ locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+index 287e31052b307..736978c4d40a1 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+@@ -694,7 +694,7 @@ static void get_surf_rq_param(
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+- const display_pipe_params_st pipe_param,
++ const display_pipe_params_st *pipe_param,
+ bool is_chroma)
+ {
+ bool mode_422 = false;
+@@ -706,30 +706,30 @@ static void get_surf_rq_param(
+
+ // FIXME check if ppe apply for both luma and chroma in 422 case
+ if (is_chroma) {
+- vp_width = pipe_param.src.viewport_width_c / ppe;
+- vp_height = pipe_param.src.viewport_height_c;
+- data_pitch = pipe_param.src.data_pitch_c;
+- meta_pitch = pipe_param.src.meta_pitch_c;
++ vp_width = pipe_param->src.viewport_width_c / ppe;
++ vp_height = pipe_param->src.viewport_height_c;
++ data_pitch = pipe_param->src.data_pitch_c;
++ meta_pitch = pipe_param->src.meta_pitch_c;
+ } else {
+- vp_width = pipe_param.src.viewport_width / ppe;
+- vp_height = pipe_param.src.viewport_height;
+- data_pitch = pipe_param.src.data_pitch;
+- meta_pitch = pipe_param.src.meta_pitch;
++ vp_width = pipe_param->src.viewport_width / ppe;
++ vp_height = pipe_param->src.viewport_height;
++ data_pitch = pipe_param->src.data_pitch;
++ meta_pitch = pipe_param->src.meta_pitch;
+ }
+
+- if (pipe_param.dest.odm_combine) {
++ if (pipe_param->dest.odm_combine) {
+ unsigned int access_dir;
+ unsigned int full_src_vp_width;
+ unsigned int hactive_half;
+ unsigned int src_hactive_half;
+- access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+- hactive_half = pipe_param.dest.hactive / 2;
++ access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
++ hactive_half = pipe_param->dest.hactive / 2;
+ if (is_chroma) {
+- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width;
+- src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_half;
++ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
++ src_hactive_half = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_half;
+ } else {
+- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width;
+- src_hactive_half = pipe_param.scale_ratio_depth.hscl_ratio * hactive_half;
++ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
++ src_hactive_half = pipe_param->scale_ratio_depth.hscl_ratio * hactive_half;
+ }
+
+ if (access_dir == 0) {
+@@ -754,7 +754,7 @@ static void get_surf_rq_param(
+ rq_sizing_param->meta_chunk_bytes = 2048;
+ rq_sizing_param->min_meta_chunk_bytes = 256;
+
+- if (pipe_param.src.hostvm)
++ if (pipe_param->src.hostvm)
+ rq_sizing_param->mpte_group_bytes = 512;
+ else
+ rq_sizing_param->mpte_group_bytes = 2048;
+@@ -768,23 +768,23 @@ static void get_surf_rq_param(
+ vp_height,
+ data_pitch,
+ meta_pitch,
+- pipe_param.src.source_format,
+- pipe_param.src.sw_mode,
+- pipe_param.src.macro_tile_size,
+- pipe_param.src.source_scan,
+- pipe_param.src.hostvm,
++ pipe_param->src.source_format,
++ pipe_param->src.sw_mode,
++ pipe_param->src.macro_tile_size,
++ pipe_param->src.source_scan,
++ pipe_param->src.hostvm,
+ is_chroma);
+ }
+
+ static void dml_rq_dlg_get_rq_params(
+ struct display_mode_lib *mode_lib,
+ display_rq_params_st *rq_param,
+- const display_pipe_params_st pipe_param)
++ const display_pipe_params_st *pipe_param)
+ {
+ // get param for luma surface
+- rq_param->yuv420 = pipe_param.src.source_format == dm_420_8
+- || pipe_param.src.source_format == dm_420_10;
+- rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10;
++ rq_param->yuv420 = pipe_param->src.source_format == dm_420_8
++ || pipe_param->src.source_format == dm_420_10;
++ rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;
+
+ get_surf_rq_param(
+ mode_lib,
+@@ -794,7 +794,7 @@ static void dml_rq_dlg_get_rq_params(
+ pipe_param,
+ 0);
+
+- if (is_dual_plane((enum source_format_class) (pipe_param.src.source_format))) {
++ if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) {
+ // get param for chroma surface
+ get_surf_rq_param(
+ mode_lib,
+@@ -806,14 +806,14 @@ static void dml_rq_dlg_get_rq_params(
+ }
+
+ // calculate how to split the det buffer space between luma and chroma
+- handle_det_buf_split(mode_lib, rq_param, pipe_param.src);
++ handle_det_buf_split(mode_lib, rq_param, pipe_param->src);
+ print__rq_params_st(mode_lib, *rq_param);
+ }
+
+ void dml21_rq_dlg_get_rq_reg(
+ struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param)
++ const display_pipe_params_st *pipe_param)
+ {
+ display_rq_params_st rq_param = {0};
+
+@@ -1658,7 +1658,7 @@ void dml21_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+@@ -1696,7 +1696,7 @@ void dml21_rq_dlg_get_dlg_reg(
+ // system parameter calculation done
+
+ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
+- dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe);
++ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
+ dml_rq_dlg_get_dlg_params(
+ mode_lib,
+ e2e_pipe_param,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
+index e8f7785e3fc63..af6ad0ca9cf8a 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.h
+@@ -44,7 +44,7 @@ struct display_mode_lib;
+ void dml21_rq_dlg_get_rq_reg(
+ struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param);
++ const display_pipe_params_st *pipe_param);
+
+ // Function: dml_rq_dlg_get_dlg_reg
+ // Calculate and return DLG and TTU register struct given the system setting
+@@ -61,7 +61,7 @@ void dml21_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+index e3d9f1decdfc7..de0fa87b301a5 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+@@ -1868,7 +1868,10 @@ static unsigned int CalculateVMAndRowBytes(
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+- *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
++ if (PTEBufferSizeInRequests == 0)
++ *dpte_row_height = 1;
++ else
++ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else if (ScanDirection != dm_vert) {
+@@ -6658,8 +6661,7 @@ static double CalculateUrgentLatency(
+ return ret;
+ }
+
+-
+-static void UseMinimumDCFCLK(
++static noinline_for_stack void UseMinimumDCFCLK(
+ struct display_mode_lib *mode_lib,
+ int MaxInterDCNTileRepeaters,
+ int MaxPrefetchMode,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+index 0d934fae1c3a6..2120e0941a095 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+@@ -747,7 +747,7 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+- const display_pipe_params_st pipe_param,
++ const display_pipe_params_st *pipe_param,
+ bool is_chroma,
+ bool is_alpha)
+ {
+@@ -761,32 +761,32 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
+
+ // FIXME check if ppe apply for both luma and chroma in 422 case
+ if (is_chroma | is_alpha) {
+- vp_width = pipe_param.src.viewport_width_c / ppe;
+- vp_height = pipe_param.src.viewport_height_c;
+- data_pitch = pipe_param.src.data_pitch_c;
+- meta_pitch = pipe_param.src.meta_pitch_c;
+- surface_height = pipe_param.src.surface_height_y / 2.0;
++ vp_width = pipe_param->src.viewport_width_c / ppe;
++ vp_height = pipe_param->src.viewport_height_c;
++ data_pitch = pipe_param->src.data_pitch_c;
++ meta_pitch = pipe_param->src.meta_pitch_c;
++ surface_height = pipe_param->src.surface_height_y / 2.0;
+ } else {
+- vp_width = pipe_param.src.viewport_width / ppe;
+- vp_height = pipe_param.src.viewport_height;
+- data_pitch = pipe_param.src.data_pitch;
+- meta_pitch = pipe_param.src.meta_pitch;
+- surface_height = pipe_param.src.surface_height_y;
++ vp_width = pipe_param->src.viewport_width / ppe;
++ vp_height = pipe_param->src.viewport_height;
++ data_pitch = pipe_param->src.data_pitch;
++ meta_pitch = pipe_param->src.meta_pitch;
++ surface_height = pipe_param->src.surface_height_y;
+ }
+
+- if (pipe_param.dest.odm_combine) {
++ if (pipe_param->dest.odm_combine) {
+ unsigned int access_dir = 0;
+ unsigned int full_src_vp_width = 0;
+ unsigned int hactive_odm = 0;
+ unsigned int src_hactive_odm = 0;
+- access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+- hactive_odm = pipe_param.dest.hactive / ((unsigned int)pipe_param.dest.odm_combine*2);
++ access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
++ hactive_odm = pipe_param->dest.hactive / ((unsigned int) pipe_param->dest.odm_combine*2);
+ if (is_chroma) {
+- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width;
+- src_hactive_odm = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_odm;
++ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
++ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm;
+ } else {
+- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width;
+- src_hactive_odm = pipe_param.scale_ratio_depth.hscl_ratio * hactive_odm;
++ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
++ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm;
+ }
+
+ if (access_dir == 0) {
+@@ -815,7 +815,7 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
+ rq_sizing_param->meta_chunk_bytes = 2048;
+ rq_sizing_param->min_meta_chunk_bytes = 256;
+
+- if (pipe_param.src.hostvm)
++ if (pipe_param->src.hostvm)
+ rq_sizing_param->mpte_group_bytes = 512;
+ else
+ rq_sizing_param->mpte_group_bytes = 2048;
+@@ -828,28 +828,28 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
+ vp_height,
+ data_pitch,
+ meta_pitch,
+- pipe_param.src.source_format,
+- pipe_param.src.sw_mode,
+- pipe_param.src.macro_tile_size,
+- pipe_param.src.source_scan,
+- pipe_param.src.hostvm,
++ pipe_param->src.source_format,
++ pipe_param->src.sw_mode,
++ pipe_param->src.macro_tile_size,
++ pipe_param->src.source_scan,
++ pipe_param->src.hostvm,
+ is_chroma,
+ surface_height);
+ }
+
+ static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
+ display_rq_params_st *rq_param,
+- const display_pipe_params_st pipe_param)
++ const display_pipe_params_st *pipe_param)
+ {
+ // get param for luma surface
+- rq_param->yuv420 = pipe_param.src.source_format == dm_420_8
+- || pipe_param.src.source_format == dm_420_10
+- || pipe_param.src.source_format == dm_rgbe_alpha
+- || pipe_param.src.source_format == dm_420_12;
++ rq_param->yuv420 = pipe_param->src.source_format == dm_420_8
++ || pipe_param->src.source_format == dm_420_10
++ || pipe_param->src.source_format == dm_rgbe_alpha
++ || pipe_param->src.source_format == dm_420_12;
+
+- rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10;
++ rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;
+
+- rq_param->rgbe_alpha = (pipe_param.src.source_format == dm_rgbe_alpha)?1:0;
++ rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha)?1:0;
+
+ get_surf_rq_param(mode_lib,
+ &(rq_param->sizing.rq_l),
+@@ -859,7 +859,7 @@ static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
+ 0,
+ 0);
+
+- if (is_dual_plane((enum source_format_class)(pipe_param.src.source_format))) {
++ if (is_dual_plane((enum source_format_class)(pipe_param->src.source_format))) {
+ // get param for chroma surface
+ get_surf_rq_param(mode_lib,
+ &(rq_param->sizing.rq_c),
+@@ -871,13 +871,13 @@ static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib,
+ }
+
+ // calculate how to split the det buffer space between luma and chroma
+- handle_det_buf_split(mode_lib, rq_param, pipe_param.src);
++ handle_det_buf_split(mode_lib, rq_param, pipe_param->src);
+ print__rq_params_st(mode_lib, *rq_param);
+ }
+
+ void dml30_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param)
++ const display_pipe_params_st *pipe_param)
+ {
+ display_rq_params_st rq_param = { 0 };
+
+@@ -1831,7 +1831,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
+ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+@@ -1866,7 +1866,7 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ // system parameter calculation done
+
+ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
+- dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe);
++ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
+ dml_rq_dlg_get_dlg_params(mode_lib,
+ e2e_pipe_param,
+ num_pipes,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h
+index c04965cceff35..625e41f8d5751 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h
+@@ -41,7 +41,7 @@ struct display_mode_lib;
+ // See also: <display_rq_regs_st>
+ void dml30_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param);
++ const display_pipe_params_st *pipe_param);
+
+ // Function: dml_rq_dlg_get_dlg_reg
+ // Calculate and return DLG and TTU register struct given the system setting
+@@ -57,7 +57,7 @@ void dml30_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+index d58925cff420e..aa0507e017926 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+@@ -259,33 +259,13 @@ static void CalculateRowBandwidth(
+
+ static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
++ unsigned int k,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+- unsigned int GPUVMMaxPageTableLevels,
+- bool HostVMEnable,
+- unsigned int HostVMMaxNonCachedPageTableLevels,
+- bool GPUVMEnable,
+- double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+- double DPTEBytesPerRow,
+- double BandwidthAvailableForImmediateFlip,
+- unsigned int TotImmediateFlipBytes,
+- enum source_format_class SourcePixelFormat,
+- double LineTime,
+- double VRatio,
+- double VRatioChroma,
+- double Tno_bw,
+- bool DCCEnable,
+- unsigned int dpte_row_height,
+- unsigned int meta_row_height,
+- unsigned int dpte_row_height_chroma,
+- unsigned int meta_row_height_chroma,
+- double *DestinationLinesToRequestVMInImmediateFlip,
+- double *DestinationLinesToRequestRowInImmediateFlip,
+- double *final_flip_bw,
+- bool *ImmediateFlipSupportedForPipe);
++ double DPTEBytesPerRow);
+ static double CalculateWriteBackDelay(
+ enum source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+@@ -319,64 +299,28 @@ static void CalculateVupdateAndDynamicMetadataParameters(
+ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+- unsigned int NumberOfActivePlanes,
+- unsigned int MaxLineBufferLines,
+- unsigned int LineBufferSize,
+- unsigned int WritebackInterfaceBufferSize,
+ double DCFCLK,
+ double ReturnBW,
+- bool SynchronizedVBlank,
+- unsigned int dpte_group_bytes[],
+- unsigned int MetaChunkSize,
+ double UrgentLatency,
+ double ExtraLatency,
+- double WritebackLatency,
+- double WritebackChunkSize,
+ double SOCCLK,
+- double DRAMClockChangeLatency,
+- double SRExitTime,
+- double SREnterPlusExitTime,
+- double SRExitZ8Time,
+- double SREnterPlusExitZ8Time,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+- unsigned int LBBitPerPixel[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+- double HRatio[],
+- double HRatioChroma[],
+- unsigned int vtaps[],
+- unsigned int VTAPsChroma[],
+- double VRatio[],
+- double VRatioChroma[],
+- unsigned int HTotal[],
+- double PixelClock[],
+- unsigned int BlendingAndTiming[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+- double DSTXAfterScaler[],
+- double DSTYAfterScaler[],
+- bool WritebackEnable[],
+- enum source_format_class WritebackPixelFormat[],
+- double WritebackDestinationWidth[],
+- double WritebackDestinationHeight[],
+- double WritebackSourceHeight[],
+ bool UnboundedRequestEnabled,
+ int unsigned CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+- double *UrgentWatermark,
+- double *WritebackUrgentWatermark,
+- double *DRAMClockChangeWatermark,
+- double *WritebackDRAMClockChangeWatermark,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+- double *Z8StutterEnterPlusExitWatermark,
+- double *MinActiveDRAMClockChangeLatencySupported);
++ double *Z8StutterEnterPlusExitWatermark);
+
+ static void CalculateDCFCLKDeepSleep(
+ struct display_mode_lib *mode_lib,
+@@ -2959,33 +2903,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ CalculateFlipSchedule(
+ mode_lib,
++ k,
+ HostVMInefficiencyFactor,
+ v->UrgentExtraLatency,
+ v->UrgentLatency,
+- v->GPUVMMaxPageTableLevels,
+- v->HostVMEnable,
+- v->HostVMMaxNonCachedPageTableLevels,
+- v->GPUVMEnable,
+- v->HostVMMinPageSize,
+ v->PDEAndMetaPTEBytesFrame[k],
+ v->MetaRowByte[k],
+- v->PixelPTEBytesPerRow[k],
+- v->BandwidthAvailableForImmediateFlip,
+- v->TotImmediateFlipBytes,
+- v->SourcePixelFormat[k],
+- v->HTotal[k] / v->PixelClock[k],
+- v->VRatio[k],
+- v->VRatioChroma[k],
+- v->Tno_bw[k],
+- v->DCCEnable[k],
+- v->dpte_row_height[k],
+- v->meta_row_height[k],
+- v->dpte_row_height_chroma[k],
+- v->meta_row_height_chroma[k],
+- &v->DestinationLinesToRequestVMInImmediateFlip[k],
+- &v->DestinationLinesToRequestRowInImmediateFlip[k],
+- &v->final_flip_bw[k],
+- &v->ImmediateFlipSupportedForPipe[k]);
++ v->PixelPTEBytesPerRow[k]);
+ }
+
+ v->total_dcn_read_bw_with_flip = 0.0;
+@@ -3072,64 +2996,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ PrefetchMode,
+- v->NumberOfActivePlanes,
+- v->MaxLineBufferLines,
+- v->LineBufferSize,
+- v->WritebackInterfaceBufferSize,
+ v->DCFCLK,
+ v->ReturnBW,
+- v->SynchronizedVBlank,
+- v->dpte_group_bytes,
+- v->MetaChunkSize,
+ v->UrgentLatency,
+ v->UrgentExtraLatency,
+- v->WritebackLatency,
+- v->WritebackChunkSize,
+ v->SOCCLK,
+- v->DRAMClockChangeLatency,
+- v->SRExitTime,
+- v->SREnterPlusExitTime,
+- v->SRExitZ8Time,
+- v->SREnterPlusExitZ8Time,
+ v->DCFCLKDeepSleep,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ v->SwathHeightY,
+ v->SwathHeightC,
+- v->LBBitPerPixel,
+ v->SwathWidthY,
+ v->SwathWidthC,
+- v->HRatio,
+- v->HRatioChroma,
+- v->vtaps,
+- v->VTAPsChroma,
+- v->VRatio,
+- v->VRatioChroma,
+- v->HTotal,
+- v->PixelClock,
+- v->BlendingAndTiming,
+ v->DPPPerPlane,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+- v->DSTXAfterScaler,
+- v->DSTYAfterScaler,
+- v->WritebackEnable,
+- v->WritebackPixelFormat,
+- v->WritebackDestinationWidth,
+- v->WritebackDestinationHeight,
+- v->WritebackSourceHeight,
+ v->UnboundedRequestEnabled,
+ v->CompressedBufferSizeInkByte,
+ &DRAMClockChangeSupport,
+- &v->UrgentWatermark,
+- &v->WritebackUrgentWatermark,
+- &v->DRAMClockChangeWatermark,
+- &v->WritebackDRAMClockChangeWatermark,
+ &v->StutterExitWatermark,
+ &v->StutterEnterPlusExitWatermark,
+ &v->Z8StutterExitWatermark,
+- &v->Z8StutterEnterPlusExitWatermark,
+- &v->MinActiveDRAMClockChangeLatencySupported);
++ &v->Z8StutterEnterPlusExitWatermark);
+
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
+@@ -3741,61 +3629,43 @@ static void CalculateRowBandwidth(
+
+ static void CalculateFlipSchedule(
+ struct display_mode_lib *mode_lib,
++ unsigned int k,
+ double HostVMInefficiencyFactor,
+ double UrgentExtraLatency,
+ double UrgentLatency,
+- unsigned int GPUVMMaxPageTableLevels,
+- bool HostVMEnable,
+- unsigned int HostVMMaxNonCachedPageTableLevels,
+- bool GPUVMEnable,
+- double HostVMMinPageSize,
+ double PDEAndMetaPTEBytesPerFrame,
+ double MetaRowBytes,
+- double DPTEBytesPerRow,
+- double BandwidthAvailableForImmediateFlip,
+- unsigned int TotImmediateFlipBytes,
+- enum source_format_class SourcePixelFormat,
+- double LineTime,
+- double VRatio,
+- double VRatioChroma,
+- double Tno_bw,
+- bool DCCEnable,
+- unsigned int dpte_row_height,
+- unsigned int meta_row_height,
+- unsigned int dpte_row_height_chroma,
+- unsigned int meta_row_height_chroma,
+- double *DestinationLinesToRequestVMInImmediateFlip,
+- double *DestinationLinesToRequestRowInImmediateFlip,
+- double *final_flip_bw,
+- bool *ImmediateFlipSupportedForPipe)
++ double DPTEBytesPerRow)
+ {
++ struct vba_vars_st *v = &mode_lib->vba;
+ double min_row_time = 0.0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double TimeForFetchingMetaPTEImmediateFlip;
+ double TimeForFetchingRowInVBlankImmediateFlip;
+ double ImmediateFlipBW;
++ double LineTime = v->HTotal[k] / v->PixelClock[k];
+
+- if (GPUVMEnable == true && HostVMEnable == true) {
+- HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
++ if (v->GPUVMEnable == true && v->HostVMEnable == true) {
++ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+
+- if (GPUVMEnable == true || DCCEnable == true) {
+- ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
++ if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
++ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
+ }
+
+- if (GPUVMEnable == true) {
++ if (v->GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(
+- Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+- UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
++ v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
++ UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+
+- *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+- if ((GPUVMEnable == true || DCCEnable == true)) {
++ v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
++ if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ UrgentLatency * (HostVMDynamicLevelsTrips + 1),
+@@ -3804,54 +3674,54 @@ static void CalculateFlipSchedule(
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+- *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
++ v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+
+- if (GPUVMEnable == true) {
+- *final_flip_bw = dml_max(
+- PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+- (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+- } else if ((GPUVMEnable == true || DCCEnable == true)) {
+- *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
++ if (v->GPUVMEnable == true) {
++ v->final_flip_bw[k] = dml_max(
++ PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
++ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
++ } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
++ v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
+ } else {
+- *final_flip_bw = 0;
++ v->final_flip_bw[k] = 0;
+ }
+
+- if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
+- if (GPUVMEnable == true && DCCEnable != true) {
+- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+- } else if (GPUVMEnable != true && DCCEnable == true) {
+- min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
++ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
++ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
++ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
++ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
++ min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else {
+ min_row_time = dml_min4(
+- dpte_row_height * LineTime / VRatio,
+- meta_row_height * LineTime / VRatio,
+- dpte_row_height_chroma * LineTime / VRatioChroma,
+- meta_row_height_chroma * LineTime / VRatioChroma);
++ v->dpte_row_height[k] * LineTime / v->VRatio[k],
++ v->meta_row_height[k] * LineTime / v->VRatio[k],
++ v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
++ v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ }
+ } else {
+- if (GPUVMEnable == true && DCCEnable != true) {
+- min_row_time = dpte_row_height * LineTime / VRatio;
+- } else if (GPUVMEnable != true && DCCEnable == true) {
+- min_row_time = meta_row_height * LineTime / VRatio;
++ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
++ min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
++ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
++ min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
+ } else {
+- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
++ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
+ }
+ }
+
+- if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
++ if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
+ || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
+- *ImmediateFlipSupportedForPipe = false;
++ v->ImmediateFlipSupportedForPipe[k] = false;
+ } else {
+- *ImmediateFlipSupportedForPipe = true;
++ v->ImmediateFlipSupportedForPipe[k] = true;
+ }
+
+ #ifdef __DML_VBA_DEBUG__
+- dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
+- dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
++ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
++ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+- dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
++ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
+ #endif
+
+ }
+@@ -5477,33 +5347,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ CalculateFlipSchedule(
+ mode_lib,
++ k,
+ HostVMInefficiencyFactor,
+ v->ExtraLatency,
+ v->UrgLatency[i],
+- v->GPUVMMaxPageTableLevels,
+- v->HostVMEnable,
+- v->HostVMMaxNonCachedPageTableLevels,
+- v->GPUVMEnable,
+- v->HostVMMinPageSize,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+- v->DPTEBytesPerRow[i][j][k],
+- v->BandwidthAvailableForImmediateFlip,
+- v->TotImmediateFlipBytes,
+- v->SourcePixelFormat[k],
+- v->HTotal[k] / v->PixelClock[k],
+- v->VRatio[k],
+- v->VRatioChroma[k],
+- v->Tno_bw[k],
+- v->DCCEnable[k],
+- v->dpte_row_height[k],
+- v->meta_row_height[k],
+- v->dpte_row_height_chroma[k],
+- v->meta_row_height_chroma[k],
+- &v->DestinationLinesToRequestVMInImmediateFlip[k],
+- &v->DestinationLinesToRequestRowInImmediateFlip[k],
+- &v->final_flip_bw[k],
+- &v->ImmediateFlipSupportedForPipe[k]);
++ v->DPTEBytesPerRow[i][j][k]);
+ }
+ v->total_dcn_read_bw_with_flip = 0.0;
+ for (k = 0; k < v->NumberOfActivePlanes; k++) {
+@@ -5561,64 +5411,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ CalculateWatermarksAndDRAMSpeedChangeSupport(
+ mode_lib,
+ v->PrefetchModePerState[i][j],
+- v->NumberOfActivePlanes,
+- v->MaxLineBufferLines,
+- v->LineBufferSize,
+- v->WritebackInterfaceBufferSize,
+ v->DCFCLKState[i][j],
+ v->ReturnBWPerState[i][j],
+- v->SynchronizedVBlank,
+- v->dpte_group_bytes,
+- v->MetaChunkSize,
+ v->UrgLatency[i],
+ v->ExtraLatency,
+- v->WritebackLatency,
+- v->WritebackChunkSize,
+ v->SOCCLKPerState[i],
+- v->DRAMClockChangeLatency,
+- v->SRExitTime,
+- v->SREnterPlusExitTime,
+- v->SRExitZ8Time,
+- v->SREnterPlusExitZ8Time,
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+- v->LBBitPerPixel,
+ v->SwathWidthYThisState,
+ v->SwathWidthCThisState,
+- v->HRatio,
+- v->HRatioChroma,
+- v->vtaps,
+- v->VTAPsChroma,
+- v->VRatio,
+- v->VRatioChroma,
+- v->HTotal,
+- v->PixelClock,
+- v->BlendingAndTiming,
+ v->NoOfDPPThisState,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
+- v->DSTXAfterScaler,
+- v->DSTYAfterScaler,
+- v->WritebackEnable,
+- v->WritebackPixelFormat,
+- v->WritebackDestinationWidth,
+- v->WritebackDestinationHeight,
+- v->WritebackSourceHeight,
+ UnboundedRequestEnabledThisState,
+ CompressedBufferSizeInkByteThisState,
+ &v->DRAMClockChangeSupport[i][j],
+- &v->UrgentWatermark,
+- &v->WritebackUrgentWatermark,
+- &v->DRAMClockChangeWatermark,
+- &v->WritebackDRAMClockChangeWatermark,
+- &dummy,
+ &dummy,
+ &dummy,
+ &dummy,
+- &v->MinActiveDRAMClockChangeLatencySupported);
++ &dummy);
+ }
+ }
+
+@@ -5743,64 +5557,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ struct display_mode_lib *mode_lib,
+ unsigned int PrefetchMode,
+- unsigned int NumberOfActivePlanes,
+- unsigned int MaxLineBufferLines,
+- unsigned int LineBufferSize,
+- unsigned int WritebackInterfaceBufferSize,
+ double DCFCLK,
+ double ReturnBW,
+- bool SynchronizedVBlank,
+- unsigned int dpte_group_bytes[],
+- unsigned int MetaChunkSize,
+ double UrgentLatency,
+ double ExtraLatency,
+- double WritebackLatency,
+- double WritebackChunkSize,
+ double SOCCLK,
+- double DRAMClockChangeLatency,
+- double SRExitTime,
+- double SREnterPlusExitTime,
+- double SRExitZ8Time,
+- double SREnterPlusExitZ8Time,
+ double DCFCLKDeepSleep,
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+- unsigned int LBBitPerPixel[],
+ double SwathWidthY[],
+ double SwathWidthC[],
+- double HRatio[],
+- double HRatioChroma[],
+- unsigned int vtaps[],
+- unsigned int VTAPsChroma[],
+- double VRatio[],
+- double VRatioChroma[],
+- unsigned int HTotal[],
+- double PixelClock[],
+- unsigned int BlendingAndTiming[],
+ unsigned int DPPPerPlane[],
+ double BytePerPixelDETY[],
+ double BytePerPixelDETC[],
+- double DSTXAfterScaler[],
+- double DSTYAfterScaler[],
+- bool WritebackEnable[],
+- enum source_format_class WritebackPixelFormat[],
+- double WritebackDestinationWidth[],
+- double WritebackDestinationHeight[],
+- double WritebackSourceHeight[],
+ bool UnboundedRequestEnabled,
+ int unsigned CompressedBufferSizeInkByte,
+ enum clock_change_support *DRAMClockChangeSupport,
+- double *UrgentWatermark,
+- double *WritebackUrgentWatermark,
+- double *DRAMClockChangeWatermark,
+- double *WritebackDRAMClockChangeWatermark,
+ double *StutterExitWatermark,
+ double *StutterEnterPlusExitWatermark,
+ double *Z8StutterExitWatermark,
+- double *Z8StutterEnterPlusExitWatermark,
+- double *MinActiveDRAMClockChangeLatencySupported)
++ double *Z8StutterEnterPlusExitWatermark)
+ {
+ struct vba_vars_st *v = &mode_lib->vba;
+ double EffectiveLBLatencyHidingY;
+@@ -5820,103 +5598,103 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ double TotalPixelBW = 0.0;
+ int k, j;
+
+- *UrgentWatermark = UrgentLatency + ExtraLatency;
++ v->UrgentWatermark = UrgentLatency + ExtraLatency;
+
+ #ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
+- dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
++ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
+ #endif
+
+- *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
++ v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
+
+ #ifdef __DML_VBA_DEBUG__
+- dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
+- dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
++ dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
++ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
+ #endif
+
+ v->TotalActiveWriteback = 0;
+- for (k = 0; k < NumberOfActivePlanes; ++k) {
+- if (WritebackEnable[k] == true) {
++ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
++ if (v->WritebackEnable[k] == true) {
+ v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+- *WritebackUrgentWatermark = WritebackLatency;
++ v->WritebackUrgentWatermark = v->WritebackLatency;
+ } else {
+- *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
++ v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+ if (v->TotalActiveWriteback <= 1) {
+- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
++ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
+ } else {
+- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
++ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ }
+
+- for (k = 0; k < NumberOfActivePlanes; ++k) {
++ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ TotalPixelBW = TotalPixelBW
+- + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
+- / (HTotal[k] / PixelClock[k]);
++ + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
++ / (v->HTotal[k] / v->PixelClock[k]);
+ }
+
+- for (k = 0; k < NumberOfActivePlanes; ++k) {
++ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ double EffectiveDETBufferSizeY = DETBufferSizeY[k];
+
+ v->LBLatencyHidingSourceLinesY = dml_min(
+- (double) MaxLineBufferLines,
+- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
++ (double) v->MaxLineBufferLines,
++ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
+
+ v->LBLatencyHidingSourceLinesC = dml_min(
+- (double) MaxLineBufferLines,
+- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
++ (double) v->MaxLineBufferLines,
++ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
+
+- EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
++ EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
+
+- EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
++ EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
+
+ if (UnboundedRequestEnabled) {
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+- + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
++ + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
+ }
+
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+- FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
++ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
+ if (BytePerPixelDETC[k] > 0) {
+ LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
+- FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
++ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
+ } else {
+ LinesInDETC = 0;
+ FullDETBufferingTimeC = 999999;
+ }
+
+ ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
++ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+
+- if (NumberOfActivePlanes > 1) {
++ if (v->NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
+- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
++ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
+ }
+
+ if (BytePerPixelDETC[k] > 0) {
+ ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
++ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+
+- if (NumberOfActivePlanes > 1) {
++ if (v->NumberOfActivePlanes > 1) {
+ ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
+- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
++ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
+ }
+ v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
+ } else {
+ v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
+ }
+
+- if (WritebackEnable[k] == true) {
+- WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
+- / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
+- if (WritebackPixelFormat[k] == dm_444_64) {
++ if (v->WritebackEnable[k] == true) {
++ WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
++ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
++ if (v->WritebackPixelFormat[k] == dm_444_64) {
+ WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
+ }
+ WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
+@@ -5926,14 +5704,14 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+
+ v->MinActiveDRAMClockChangeMargin = 999999;
+ PlaneWithMinActiveDRAMClockChangeMargin = 0;
+- for (k = 0; k < NumberOfActivePlanes; ++k) {
++ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
+ v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
+- if (BlendingAndTiming[k] == k) {
++ if (v->BlendingAndTiming[k] == k) {
+ PlaneWithMinActiveDRAMClockChangeMargin = k;
+ } else {
+- for (j = 0; j < NumberOfActivePlanes; ++j) {
+- if (BlendingAndTiming[k] == j) {
++ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
++ if (v->BlendingAndTiming[k] == j) {
+ PlaneWithMinActiveDRAMClockChangeMargin = j;
+ }
+ }
+@@ -5941,11 +5719,11 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ }
+ }
+
+- *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
++ v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
+
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
+- for (k = 0; k < NumberOfActivePlanes; ++k) {
+- if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
++ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
++ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
+ SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
+ }
+@@ -5953,25 +5731,25 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+
+ v->TotalNumberOfActiveOTG = 0;
+
+- for (k = 0; k < NumberOfActivePlanes; ++k) {
+- if (BlendingAndTiming[k] == k) {
++ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
++ if (v->BlendingAndTiming[k] == k) {
+ v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
+ }
+ }
+
+ if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+- } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
++ } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
+ *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ } else {
+ *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ }
+
+- *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+- *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+- *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+- *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
++ *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
++ *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
++ *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
++ *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+
+ #ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+index c23905bc733ae..57bd4e3f8a823 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+@@ -738,7 +738,7 @@ static void get_surf_rq_param(
+ display_data_rq_sizing_params_st *rq_sizing_param,
+ display_data_rq_dlg_params_st *rq_dlg_param,
+ display_data_rq_misc_params_st *rq_misc_param,
+- const display_pipe_params_st pipe_param,
++ const display_pipe_params_st *pipe_param,
+ bool is_chroma,
+ bool is_alpha)
+ {
+@@ -752,33 +752,33 @@ static void get_surf_rq_param(
+
+ // FIXME check if ppe apply for both luma and chroma in 422 case
+ if (is_chroma | is_alpha) {
+- vp_width = pipe_param.src.viewport_width_c / ppe;
+- vp_height = pipe_param.src.viewport_height_c;
+- data_pitch = pipe_param.src.data_pitch_c;
+- meta_pitch = pipe_param.src.meta_pitch_c;
+- surface_height = pipe_param.src.surface_height_y / 2.0;
++ vp_width = pipe_param->src.viewport_width_c / ppe;
++ vp_height = pipe_param->src.viewport_height_c;
++ data_pitch = pipe_param->src.data_pitch_c;
++ meta_pitch = pipe_param->src.meta_pitch_c;
++ surface_height = pipe_param->src.surface_height_y / 2.0;
+ } else {
+- vp_width = pipe_param.src.viewport_width / ppe;
+- vp_height = pipe_param.src.viewport_height;
+- data_pitch = pipe_param.src.data_pitch;
+- meta_pitch = pipe_param.src.meta_pitch;
+- surface_height = pipe_param.src.surface_height_y;
++ vp_width = pipe_param->src.viewport_width / ppe;
++ vp_height = pipe_param->src.viewport_height;
++ data_pitch = pipe_param->src.data_pitch;
++ meta_pitch = pipe_param->src.meta_pitch;
++ surface_height = pipe_param->src.surface_height_y;
+ }
+
+- if (pipe_param.dest.odm_combine) {
++ if (pipe_param->dest.odm_combine) {
+ unsigned int access_dir;
+ unsigned int full_src_vp_width;
+ unsigned int hactive_odm;
+ unsigned int src_hactive_odm;
+
+- access_dir = (pipe_param.src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
+- hactive_odm = pipe_param.dest.hactive / ((unsigned int) pipe_param.dest.odm_combine * 2);
++ access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
++ hactive_odm = pipe_param->dest.hactive / ((unsigned int) pipe_param->dest.odm_combine * 2);
+ if (is_chroma) {
+- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio_c * pipe_param.dest.full_recout_width;
+- src_hactive_odm = pipe_param.scale_ratio_depth.hscl_ratio_c * hactive_odm;
++ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
++ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_odm;
+ } else {
+- full_src_vp_width = pipe_param.scale_ratio_depth.hscl_ratio * pipe_param.dest.full_recout_width;
+- src_hactive_odm = pipe_param.scale_ratio_depth.hscl_ratio * hactive_odm;
++ full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
++ src_hactive_odm = pipe_param->scale_ratio_depth.hscl_ratio * hactive_odm;
+ }
+
+ if (access_dir == 0) {
+@@ -808,7 +808,7 @@ static void get_surf_rq_param(
+ rq_sizing_param->meta_chunk_bytes = 2048;
+ rq_sizing_param->min_meta_chunk_bytes = 256;
+
+- if (pipe_param.src.hostvm)
++ if (pipe_param->src.hostvm)
+ rq_sizing_param->mpte_group_bytes = 512;
+ else
+ rq_sizing_param->mpte_group_bytes = 2048;
+@@ -822,38 +822,38 @@ static void get_surf_rq_param(
+ vp_height,
+ data_pitch,
+ meta_pitch,
+- pipe_param.src.source_format,
+- pipe_param.src.sw_mode,
+- pipe_param.src.macro_tile_size,
+- pipe_param.src.source_scan,
+- pipe_param.src.hostvm,
++ pipe_param->src.source_format,
++ pipe_param->src.sw_mode,
++ pipe_param->src.macro_tile_size,
++ pipe_param->src.source_scan,
++ pipe_param->src.hostvm,
+ is_chroma,
+ surface_height);
+ }
+
+-static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_params_st pipe_param)
++static void dml_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, display_rq_params_st *rq_param, const display_pipe_params_st *pipe_param)
+ {
+ // get param for luma surface
+- rq_param->yuv420 = pipe_param.src.source_format == dm_420_8 || pipe_param.src.source_format == dm_420_10 || pipe_param.src.source_format == dm_rgbe_alpha
+- || pipe_param.src.source_format == dm_420_12;
++ rq_param->yuv420 = pipe_param->src.source_format == dm_420_8 || pipe_param->src.source_format == dm_420_10 || pipe_param->src.source_format == dm_rgbe_alpha
++ || pipe_param->src.source_format == dm_420_12;
+
+- rq_param->yuv420_10bpc = pipe_param.src.source_format == dm_420_10;
++ rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;
+
+- rq_param->rgbe_alpha = (pipe_param.src.source_format == dm_rgbe_alpha) ? 1 : 0;
++ rq_param->rgbe_alpha = (pipe_param->src.source_format == dm_rgbe_alpha) ? 1 : 0;
+
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_l), &(rq_param->dlg.rq_l), &(rq_param->misc.rq_l), pipe_param, 0, 0);
+
+- if (is_dual_plane((enum source_format_class) (pipe_param.src.source_format))) {
++ if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) {
+ // get param for chroma surface
+ get_surf_rq_param(mode_lib, &(rq_param->sizing.rq_c), &(rq_param->dlg.rq_c), &(rq_param->misc.rq_c), pipe_param, 1, rq_param->rgbe_alpha);
+ }
+
+ // calculate how to split the det buffer space between luma and chroma
+- handle_det_buf_split(mode_lib, rq_param, pipe_param.src);
++ handle_det_buf_split(mode_lib, rq_param, pipe_param->src);
+ print__rq_params_st(mode_lib, *rq_param);
+ }
+
+-void dml31_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_pipe_params_st pipe_param)
++void dml31_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, display_rq_regs_st *rq_regs, const display_pipe_params_st *pipe_param)
+ {
+ display_rq_params_st rq_param = {0};
+
+@@ -1677,7 +1677,7 @@ void dml31_rq_dlg_get_dlg_reg(
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+@@ -1704,7 +1704,7 @@ void dml31_rq_dlg_get_dlg_reg(
+ // system parameter calculation done
+
+ dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
+- dml_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe);
++ dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
+ dml_rq_dlg_get_dlg_params(
+ mode_lib,
+ e2e_pipe_param,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h
+index adf8518f761f9..8ee991351699d 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.h
+@@ -41,7 +41,7 @@ struct display_mode_lib;
+ // See also: <display_rq_regs_st>
+ void dml31_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param);
++ const display_pipe_params_st *pipe_param);
+
+ // Function: dml_rq_dlg_get_dlg_reg
+ // Calculate and return DLG and TTU register struct given the system setting
+@@ -57,7 +57,7 @@ void dml31_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib,
+ void dml31_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+index 1051ca1a23b8a..edb9f7567d6d9 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+@@ -80,11 +80,11 @@ enum dm_swizzle_mode {
+ dm_sw_SPARE_13 = 24,
+ dm_sw_64kb_s_x = 25,
+ dm_sw_64kb_d_x = 26,
+- dm_sw_SPARE_14 = 27,
++ dm_sw_64kb_r_x = 27,
+ dm_sw_SPARE_15 = 28,
+ dm_sw_var_s_x = 29,
+ dm_sw_var_d_x = 30,
+- dm_sw_64kb_r_x,
++ dm_sw_var_r_x = 31,
+ dm_sw_gfx7_2d_thin_l_vp,
+ dm_sw_gfx7_2d_thin_gl,
+ };
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+index d42a0aeca6be2..72b1957022aa2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
++++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+@@ -49,7 +49,7 @@ struct dml_funcs {
+ struct display_mode_lib *mode_lib,
+ display_dlg_regs_st *dlg_regs,
+ display_ttu_regs_st *ttu_regs,
+- display_e2e_pipe_params_st *e2e_pipe_param,
++ const display_e2e_pipe_params_st *e2e_pipe_param,
+ const unsigned int num_pipes,
+ const unsigned int pipe_idx,
+ const bool cstate_en,
+@@ -60,7 +60,7 @@ struct dml_funcs {
+ void (*rq_dlg_get_rq_reg)(
+ struct display_mode_lib *mode_lib,
+ display_rq_regs_st *rq_regs,
+- const display_pipe_params_st pipe_param);
++ const display_pipe_params_st *pipe_param);
+ void (*recalculate)(struct display_mode_lib *mode_lib);
+ void (*validate)(struct display_mode_lib *mode_lib);
+ };
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
+new file mode 100644
+index 0000000000000..e5fac9f4181d8
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
+@@ -0,0 +1,704 @@
++
++/*
++ * Copyright 2017 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++
++const qp_table qp_table_422_10bpc_min = {
++ { 6, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
++ { 6.5, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
++ { 7, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 7, 9, 9, 9, 11, 15} },
++ { 7.5, { 0, 2, 4, 6, 6, 6, 6, 7, 7, 7, 8, 9, 9, 11, 15} },
++ { 8, { 0, 2, 3, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 11, 14} },
++ { 8.5, { 0, 2, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 9, 11, 14} },
++ { 9, { 0, 2, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 9, 11, 13} },
++ { 9.5, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 9, 11, 13} },
++ { 10, { 0, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9, 11, 12} },
++ {10.5, { 0, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 11, 12} },
++ { 11, { 0, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 11} },
++ {11.5, { 0, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8, 8, 10, 11} },
++ { 12, { 0, 2, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 9, 10} },
++ {12.5, { 0, 1, 2, 2, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10} },
++ { 13, { 0, 1, 2, 2, 4, 4, 4, 5, 5, 6, 6, 6, 8, 8, 9} },
++ {13.5, { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 9} },
++ { 14, { 0, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 8} },
++ {14.5, { 0, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8} },
++ { 15, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6, 6, 6, 8} },
++ {15.5, { 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
++ { 16, { 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 7} },
++ {16.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6} },
++ { 17, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 6} },
++ {17.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
++ { 18, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 5} },
++ {18.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 5} },
++ { 19, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 4} },
++ {19.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 4} },
++ { 20, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 3} }
++};
++
++
++const qp_table qp_table_444_8bpc_max = {
++ { 6, { 4, 6, 8, 8, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 15} },
++ { 6.5, { 4, 6, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15} },
++ { 7, { 4, 5, 7, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 13, 14} },
++ { 7.5, { 4, 5, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
++ { 8, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
++ { 8.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
++ { 9, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 13} },
++ { 9.5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 13} },
++ { 10, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
++ {10.5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 10, 11, 12} },
++ { 11, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 11} },
++ {11.5, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
++ { 12, { 2, 3, 4, 5, 6, 6, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
++ {12.5, { 2, 3, 4, 5, 6, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
++ { 13, { 1, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 9, 10} },
++ {13.5, { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10} },
++ { 14, { 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 8, 8, 8, 10} },
++ {14.5, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 9} },
++ { 15, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
++ {15.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
++ { 16, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8} },
++ {16.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8} },
++ { 17, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 8} },
++ {17.5, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 8} },
++ { 18, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 7} },
++ {18.5, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 7} },
++ { 19, { 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6} },
++ {19.5, { 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6} },
++ { 20, { 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 4, 6} },
++ {20.5, { 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 4, 6} },
++ { 21, { 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
++ {21.5, { 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
++ { 22, { 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 5} },
++ {22.5, { 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4} },
++ { 23, { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 4} },
++ {23.5, { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 4} },
++ { 24, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4} }
++};
++
++
++const qp_table qp_table_420_12bpc_max = {
++ { 4, {11, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 21, 22} },
++ { 4.5, {10, 11, 12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
++ { 5, { 9, 11, 12, 13, 14, 15, 15, 16, 17, 17, 18, 18, 19, 20, 21} },
++ { 5.5, { 8, 10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18, 19, 20} },
++ { 6, { 6, 9, 11, 12, 13, 14, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
++ { 6.5, { 6, 8, 10, 11, 11, 13, 14, 15, 15, 16, 16, 17, 17, 18, 19} },
++ { 7, { 5, 7, 9, 10, 10, 12, 13, 14, 14, 15, 16, 16, 17, 17, 18} },
++ { 7.5, { 5, 7, 8, 9, 9, 11, 12, 13, 14, 14, 15, 15, 16, 16, 17} },
++ { 8, { 4, 6, 7, 8, 8, 10, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
++ { 8.5, { 3, 6, 6, 7, 7, 10, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
++ { 9, { 3, 5, 6, 7, 7, 10, 11, 12, 12, 13, 13, 14, 14, 14, 15} },
++ { 9.5, { 2, 5, 6, 6, 7, 9, 10, 11, 12, 12, 13, 13, 13, 14, 15} },
++ { 10, { 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 13, 13, 15} },
++ {10.5, { 2, 3, 5, 5, 6, 7, 8, 9, 11, 11, 12, 12, 12, 12, 14} },
++ { 11, { 1, 3, 4, 5, 6, 6, 7, 9, 10, 11, 11, 11, 12, 12, 13} },
++ {11.5, { 1, 2, 3, 4, 5, 6, 6, 8, 9, 10, 10, 11, 11, 11, 13} },
++ { 12, { 1, 1, 3, 3, 4, 5, 6, 7, 8, 9, 9, 10, 10, 10, 12} },
++ {12.5, { 1, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 9, 10, 11} },
++ { 13, { 1, 1, 1, 2, 4, 4, 6, 6, 7, 8, 8, 9, 9, 9, 11} },
++ {13.5, { 1, 1, 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 8, 9, 11} },
++ { 14, { 1, 1, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 8, 10} },
++ {14.5, { 0, 1, 1, 1, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
++ { 15, { 0, 1, 1, 1, 1, 2, 3, 3, 5, 5, 5, 6, 6, 7, 9} },
++ {15.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 8} },
++ { 16, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 7} },
++ {16.5, { 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 7} },
++ { 17, { 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 6} },
++ {17.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 6} },
++ { 18, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 5} }
++};
++
++
++const qp_table qp_table_444_10bpc_min = {
++ { 6, { 0, 4, 7, 7, 9, 9, 9, 9, 9, 10, 10, 10, 10, 12, 18} },
++ { 6.5, { 0, 4, 6, 7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 12, 18} },
++ { 7, { 0, 4, 6, 6, 8, 8, 8, 8, 8, 9, 9, 10, 10, 12, 17} },
++ { 7.5, { 0, 4, 6, 6, 7, 8, 8, 8, 8, 8, 9, 9, 10, 12, 17} },
++ { 8, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 8, 9, 9, 9, 12, 16} },
++ { 8.5, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 8, 9, 9, 9, 12, 16} },
++ { 9, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
++ { 9.5, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
++ { 10, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 15} },
++ {10.5, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 15} },
++ { 11, { 0, 3, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
++ {11.5, { 0, 3, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
++ { 12, { 0, 2, 4, 4, 6, 6, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
++ {12.5, { 0, 2, 4, 4, 6, 6, 7, 7, 7, 7, 8, 9, 9, 11, 14} },
++ { 13, { 0, 2, 4, 4, 5, 6, 7, 7, 7, 7, 8, 9, 9, 11, 13} },
++ {13.5, { 0, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 9, 9, 11, 13} },
++ { 14, { 0, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9, 11, 13} },
++ {14.5, { 0, 2, 3, 4, 5, 5, 6, 6, 6, 7, 7, 8, 9, 11, 12} },
++ { 15, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 11, 12} },
++ {15.5, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 11, 12} },
++ { 16, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 8, 10, 11} },
++ {16.5, { 0, 1, 2, 3, 4, 5, 5, 6, 6, 6, 7, 8, 8, 10, 11} },
++ { 17, { 0, 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 9, 11} },
++ {17.5, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9, 11} },
++ { 18, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10} },
++ {18.5, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10} },
++ { 19, { 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 7, 7, 8, 9} },
++ {19.5, { 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 7, 7, 8, 9} },
++ { 20, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 9} },
++ {20.5, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 9} },
++ { 21, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6, 6, 7, 9} },
++ {21.5, { 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 8} },
++ { 22, { 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 8} },
++ {22.5, { 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
++ { 23, { 0, 0, 1, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5, 7} },
++ {23.5, { 0, 0, 0, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5, 7} },
++ { 24, { 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 4, 4, 4, 5, 7} },
++ {24.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 7} },
++ { 25, { 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 6} },
++ {25.5, { 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
++ { 26, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 5} },
++ {26.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 5} },
++ { 27, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 5} },
++ {27.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 5} },
++ { 28, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 4} },
++ {28.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 4} },
++ { 29, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3} },
++ {29.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3} },
++ { 30, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} }
++};
++
++
++const qp_table qp_table_420_8bpc_max = {
++ { 4, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 13, 14} },
++ { 4.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
++ { 5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 13} },
++ { 5.5, { 3, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12} },
++ { 6, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
++ { 6.5, { 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
++ { 7, { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 9, 9, 10} },
++ { 7.5, { 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 9} },
++ { 8, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
++ { 8.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8} },
++ { 9, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7} },
++ { 9.5, { 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
++ { 10, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6} },
++ {10.5, { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 6} },
++ { 11, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5} },
++ {11.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
++ { 12, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 4} }
++};
++
++
++const qp_table qp_table_444_8bpc_min = {
++ { 6, { 0, 1, 3, 3, 5, 5, 5, 5, 5, 6, 6, 6, 6, 9, 14} },
++ { 6.5, { 0, 1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 9, 14} },
++ { 7, { 0, 0, 2, 2, 4, 4, 4, 4, 4, 5, 5, 6, 6, 9, 13} },
++ { 7.5, { 0, 0, 2, 2, 3, 4, 4, 4, 4, 4, 5, 5, 6, 9, 13} },
++ { 8, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 4, 5, 5, 5, 8, 12} },
++ { 8.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 4, 5, 5, 5, 8, 12} },
++ { 9, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 12} },
++ { 9.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 12} },
++ { 10, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
++ {10.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
++ { 11, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
++ {11.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
++ { 12, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
++ {12.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 4, 5, 5, 7, 10} },
++ { 13, { 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 4, 5, 5, 7, 9} },
++ {13.5, { 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
++ { 14, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
++ {14.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 5, 7, 8} },
++ { 15, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
++ {15.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
++ { 16, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
++ {16.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
++ { 17, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
++ {17.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
++ { 18, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
++ {18.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
++ { 19, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5} },
++ {19.5, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5} },
++ { 20, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 5} },
++ {20.5, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 5} },
++ { 21, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4} },
++ {21.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4} },
++ { 22, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 4} },
++ {22.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3} },
++ { 23, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} },
++ {23.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} },
++ { 24, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3} }
++};
++
++
++const qp_table qp_table_444_12bpc_min = {
++ { 6, { 0, 5, 11, 11, 13, 13, 13, 13, 13, 14, 14, 14, 14, 17, 22} },
++ { 6.5, { 0, 5, 10, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 17, 22} },
++ { 7, { 0, 5, 10, 10, 12, 12, 12, 12, 12, 13, 13, 14, 14, 17, 21} },
++ { 7.5, { 0, 5, 9, 10, 11, 12, 12, 12, 12, 12, 13, 13, 14, 17, 21} },
++ { 8, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 12, 13, 13, 13, 16, 20} },
++ { 8.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 12, 13, 13, 13, 16, 20} },
++ { 9, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
++ { 9.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
++ { 10, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
++ {10.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
++ { 11, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
++ {11.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
++ { 12, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
++ {12.5, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
++ { 13, { 0, 4, 7, 8, 9, 11, 11, 11, 11, 11, 13, 13, 13, 15, 17} },
++ {13.5, { 0, 3, 6, 7, 9, 10, 10, 11, 11, 11, 12, 13, 13, 15, 17} },
++ { 14, { 0, 3, 5, 6, 9, 9, 9, 10, 11, 11, 12, 13, 13, 15, 17} },
++ {14.5, { 0, 2, 5, 6, 8, 9, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
++ { 15, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
++ {15.5, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
++ { 16, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 11, 12, 12, 14, 15} },
++ {16.5, { 0, 2, 3, 5, 7, 8, 9, 10, 11, 11, 11, 12, 12, 14, 15} },
++ { 17, { 0, 2, 3, 5, 5, 6, 9, 9, 10, 10, 11, 11, 12, 13, 15} },
++ {17.5, { 0, 2, 3, 5, 5, 6, 8, 9, 10, 10, 11, 11, 12, 13, 15} },
++ { 18, { 0, 2, 3, 5, 5, 6, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
++ {18.5, { 0, 2, 3, 5, 5, 6, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
++ { 19, { 0, 1, 2, 4, 5, 5, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
++ {19.5, { 0, 1, 2, 4, 5, 5, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
++ { 20, { 0, 1, 2, 3, 4, 5, 7, 8, 8, 8, 9, 10, 10, 11, 13} },
++ {20.5, { 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 13} },
++ { 21, { 0, 1, 2, 3, 4, 5, 5, 7, 7, 8, 9, 10, 10, 11, 13} },
++ {21.5, { 0, 1, 2, 3, 3, 4, 5, 7, 7, 8, 9, 10, 10, 11, 12} },
++ { 22, { 0, 0, 1, 3, 3, 4, 5, 6, 7, 8, 9, 9, 9, 10, 12} },
++ {22.5, { 0, 0, 1, 3, 3, 4, 5, 6, 7, 8, 9, 9, 9, 10, 11} },
++ { 23, { 0, 0, 1, 3, 3, 4, 5, 6, 6, 7, 9, 9, 9, 9, 11} },
++ {23.5, { 0, 0, 1, 3, 3, 4, 5, 6, 6, 7, 9, 9, 9, 9, 11} },
++ { 24, { 0, 0, 1, 2, 3, 4, 5, 6, 6, 7, 8, 8, 8, 9, 11} },
++ {24.5, { 0, 0, 1, 2, 3, 4, 4, 6, 6, 7, 8, 8, 8, 9, 11} },
++ { 25, { 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 8, 8, 10} },
++ {25.5, { 0, 0, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 8, 10} },
++ { 26, { 0, 0, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 9} },
++ {26.5, { 0, 0, 1, 2, 2, 3, 4, 5, 5, 5, 7, 7, 7, 7, 9} },
++ { 27, { 0, 0, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
++ {27.5, { 0, 0, 1, 1, 2, 2, 4, 4, 4, 5, 6, 7, 7, 7, 9} },
++ { 28, { 0, 0, 0, 1, 1, 2, 3, 4, 4, 4, 6, 6, 6, 7, 9} },
++ {28.5, { 0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 6, 6, 6, 8} },
++ { 29, { 0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 8} },
++ {29.5, { 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7} },
++ { 30, { 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 5, 5, 5, 5, 7} },
++ {30.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 4, 5, 7} },
++ { 31, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 4, 5, 7} },
++ {31.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
++ { 32, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 6} },
++ {32.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 6} },
++ { 33, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 5} },
++ {33.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 5} },
++ { 34, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 5} },
++ {34.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 5} },
++ { 35, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 4} },
++ {35.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 4} },
++ { 36, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} }
++};
++
++
++const qp_table qp_table_420_12bpc_min = {
++ { 4, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 21} },
++ { 4.5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
++ { 5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
++ { 5.5, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
++ { 6, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
++ { 6.5, { 0, 4, 6, 8, 9, 10, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
++ { 7, { 0, 3, 5, 7, 9, 10, 10, 11, 11, 11, 13, 13, 13, 15, 17} },
++ { 7.5, { 0, 3, 5, 7, 8, 9, 10, 10, 11, 11, 12, 13, 13, 15, 16} },
++ { 8, { 0, 2, 4, 6, 7, 9, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
++ { 8.5, { 0, 2, 4, 6, 6, 9, 9, 10, 11, 11, 12, 12, 13, 14, 15} },
++ { 9, { 0, 2, 4, 6, 6, 9, 9, 10, 10, 11, 11, 12, 13, 13, 14} },
++ { 9.5, { 0, 2, 4, 5, 6, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14} },
++ { 10, { 0, 2, 3, 5, 6, 7, 8, 8, 9, 10, 10, 12, 12, 12, 14} },
++ {10.5, { 0, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 11, 11, 13} },
++ { 11, { 0, 2, 3, 4, 5, 5, 6, 8, 8, 9, 9, 10, 11, 11, 12} },
++ {11.5, { 0, 1, 2, 3, 4, 5, 5, 7, 8, 8, 9, 10, 10, 10, 12} },
++ { 12, { 0, 0, 2, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 9, 11} },
++ {12.5, { 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 8, 8, 9, 10} },
++ { 13, { 0, 0, 0, 1, 3, 3, 5, 5, 6, 7, 7, 8, 8, 8, 10} },
++ {13.5, { 0, 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 7, 7, 8, 10} },
++ { 14, { 0, 0, 0, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 7, 9} },
++ {14.5, { 0, 0, 0, 0, 1, 2, 3, 3, 4, 4, 5, 6, 6, 6, 8} },
++ { 15, { 0, 0, 0, 0, 0, 1, 2, 2, 4, 4, 4, 5, 5, 6, 8} },
++ {15.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
++ { 16, { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 6} },
++ {16.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 6} },
++ { 17, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 5} },
++ {17.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 5} },
++ { 18, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 4} }
++};
++
++
++const qp_table qp_table_422_12bpc_min = {
++ { 6, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
++ { 6.5, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
++ { 7, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
++ { 7.5, { 0, 4, 8, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
++ { 8, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
++ { 8.5, { 0, 3, 6, 8, 9, 10, 10, 11, 11, 11, 12, 13, 13, 15, 18} },
++ { 9, { 0, 3, 5, 8, 9, 10, 10, 10, 11, 11, 12, 13, 13, 15, 17} },
++ { 9.5, { 0, 3, 5, 7, 8, 9, 10, 10, 11, 11, 12, 13, 13, 15, 17} },
++ { 10, { 0, 2, 4, 6, 7, 9, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
++ {10.5, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
++ { 11, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 12, 13, 14, 15} },
++ {11.5, { 0, 2, 4, 6, 7, 7, 9, 9, 10, 11, 11, 12, 12, 14, 15} },
++ { 12, { 0, 2, 4, 6, 6, 6, 8, 8, 9, 9, 11, 11, 12, 13, 14} },
++ {12.5, { 0, 1, 4, 5, 6, 6, 7, 8, 8, 9, 10, 11, 11, 13, 14} },
++ { 13, { 0, 1, 3, 4, 5, 5, 7, 8, 8, 9, 10, 10, 11, 12, 13} },
++ {13.5, { 0, 1, 3, 3, 4, 5, 7, 7, 8, 8, 10, 10, 10, 12, 13} },
++ { 14, { 0, 0, 2, 3, 4, 5, 6, 6, 7, 7, 9, 10, 10, 11, 12} },
++ {14.5, { 0, 0, 1, 3, 4, 4, 6, 6, 6, 7, 9, 9, 9, 11, 12} },
++ { 15, { 0, 0, 1, 3, 3, 4, 5, 6, 6, 6, 8, 9, 9, 10, 12} },
++ {15.5, { 0, 0, 1, 2, 3, 4, 5, 5, 6, 6, 8, 8, 8, 10, 11} },
++ { 16, { 0, 0, 1, 2, 3, 4, 5, 5, 6, 6, 8, 8, 8, 9, 11} },
++ {16.5, { 0, 0, 0, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7, 9, 10} },
++ { 17, { 0, 0, 0, 1, 2, 2, 4, 4, 4, 5, 6, 6, 6, 8, 10} },
++ {17.5, { 0, 0, 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 8, 9} },
++ { 18, { 0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 5, 5, 6, 7, 9} },
++ {18.5, { 0, 0, 0, 1, 2, 2, 3, 3, 3, 3, 5, 5, 5, 7, 9} },
++ { 19, { 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 8} },
++ {19.5, { 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 6, 8} },
++ { 20, { 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 7} },
++ {20.5, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 7} },
++ { 21, { 0, 0, 0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
++ {21.5, { 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 6} },
++ { 22, { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 6} },
++ {22.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 5} },
++ { 23, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 5} },
++ {23.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 4} },
++ { 24, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 4} }
++};
++
++
++const qp_table qp_table_422_12bpc_max = {
++ { 6, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
++ { 6.5, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
++ { 7, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20} },
++ { 7.5, { 9, 10, 12, 14, 15, 15, 15, 16, 16, 17, 17, 18, 18, 19, 20} },
++ { 8, { 6, 9, 10, 12, 14, 15, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
++ { 8.5, { 6, 8, 9, 11, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 19} },
++ { 9, { 5, 7, 8, 10, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 18} },
++ { 9.5, { 5, 7, 7, 9, 10, 12, 12, 13, 14, 14, 15, 15, 16, 17, 18} },
++ { 10, { 4, 6, 6, 8, 9, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
++ {10.5, { 4, 6, 6, 8, 9, 10, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
++ { 11, { 4, 5, 6, 8, 9, 10, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
++ {11.5, { 3, 5, 6, 8, 9, 9, 11, 11, 12, 13, 13, 14, 14, 15, 16} },
++ { 12, { 3, 5, 6, 8, 8, 8, 10, 10, 11, 11, 13, 13, 14, 14, 15} },
++ {12.5, { 3, 4, 6, 7, 8, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15} },
++ { 13, { 2, 4, 5, 6, 7, 7, 9, 10, 10, 11, 12, 12, 13, 13, 14} },
++ {13.5, { 2, 4, 5, 5, 6, 7, 9, 9, 10, 10, 12, 12, 12, 13, 14} },
++ { 14, { 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 11, 12, 12, 12, 13} },
++ {14.5, { 2, 3, 3, 5, 6, 6, 8, 8, 8, 9, 11, 11, 11, 12, 13} },
++ { 15, { 2, 3, 3, 5, 5, 6, 7, 8, 8, 8, 10, 11, 11, 11, 13} },
++ {15.5, { 2, 2, 3, 4, 5, 6, 7, 7, 8, 8, 10, 10, 10, 11, 12} },
++ { 16, { 2, 2, 3, 4, 5, 6, 7, 7, 8, 8, 10, 10, 10, 10, 12} },
++ {16.5, { 1, 2, 2, 4, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 11} },
++ { 17, { 1, 1, 2, 3, 4, 4, 6, 6, 6, 7, 8, 8, 8, 9, 11} },
++ {17.5, { 1, 1, 2, 3, 4, 4, 5, 6, 6, 6, 7, 8, 8, 9, 10} },
++ { 18, { 1, 1, 1, 2, 3, 3, 5, 5, 5, 6, 7, 7, 8, 8, 10} },
++ {18.5, { 1, 1, 1, 2, 3, 3, 5, 5, 5, 5, 7, 7, 7, 8, 10} },
++ { 19, { 1, 1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 7, 9} },
++ {19.5, { 1, 1, 1, 2, 2, 2, 4, 5, 5, 5, 6, 6, 6, 7, 9} },
++ { 20, { 1, 1, 1, 2, 2, 2, 4, 5, 5, 5, 6, 6, 6, 6, 8} },
++ {20.5, { 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 8} },
++ { 21, { 0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 5, 7} },
++ {21.5, { 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 7} },
++ { 22, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 7} },
++ {22.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6} },
++ { 23, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6} },
++ {23.5, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 5} },
++ { 24, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 5} }
++};
++
++
++const qp_table qp_table_444_12bpc_max = {
++ { 6, {12, 14, 16, 16, 17, 17, 17, 18, 19, 20, 20, 20, 20, 21, 23} },
++ { 6.5, {12, 14, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 20, 21, 23} },
++ { 7, {12, 13, 15, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 21, 22} },
++ { 7.5, {12, 13, 14, 15, 15, 16, 16, 17, 18, 18, 19, 19, 20, 21, 22} },
++ { 8, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
++ { 8.5, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
++ { 9, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 21} },
++ { 9.5, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 21} },
++ { 10, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20} },
++ {10.5, {10, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 18, 19, 20} },
++ { 11, { 9, 11, 13, 14, 15, 15, 15, 16, 16, 17, 17, 17, 18, 18, 19} },
++ {11.5, { 9, 11, 13, 14, 15, 15, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
++ { 12, { 6, 9, 12, 13, 14, 14, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
++ {12.5, { 6, 9, 12, 13, 14, 14, 14, 15, 15, 16, 16, 17, 17, 18, 19} },
++ { 13, { 5, 9, 12, 13, 13, 14, 14, 15, 15, 16, 16, 16, 16, 17, 18} },
++ {13.5, { 5, 8, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 18} },
++ { 14, { 5, 8, 10, 11, 12, 12, 12, 13, 14, 14, 15, 16, 16, 16, 18} },
++ {14.5, { 4, 7, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 17} },
++ { 15, { 4, 7, 9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
++ {15.5, { 4, 7, 9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
++ { 16, { 4, 7, 9, 10, 10, 11, 11, 12, 13, 13, 13, 14, 14, 15, 16} },
++ {16.5, { 4, 5, 7, 8, 10, 11, 11, 12, 13, 13, 13, 14, 14, 15, 16} },
++ { 17, { 4, 5, 7, 8, 8, 9, 11, 11, 12, 12, 12, 13, 13, 14, 16} },
++ {17.5, { 3, 5, 7, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 14, 16} },
++ { 18, { 3, 5, 7, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 14, 15} },
++ {18.5, { 3, 5, 7, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 14, 15} },
++ { 19, { 3, 4, 6, 7, 8, 8, 9, 10, 11, 11, 11, 12, 12, 13, 14} },
++ {19.5, { 3, 4, 6, 7, 8, 8, 9, 10, 11, 11, 11, 12, 12, 13, 14} },
++ { 20, { 2, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11, 11, 12, 14} },
++ {20.5, { 2, 3, 5, 5, 7, 8, 8, 8, 9, 10, 10, 11, 11, 12, 14} },
++ { 21, { 2, 3, 5, 5, 7, 7, 7, 8, 8, 9, 10, 11, 11, 12, 14} },
++ {21.5, { 2, 3, 5, 5, 6, 6, 7, 8, 8, 9, 10, 11, 11, 12, 13} },
++ { 22, { 2, 2, 4, 5, 6, 6, 7, 7, 8, 9, 10, 10, 10, 11, 13} },
++ {22.5, { 2, 2, 4, 5, 5, 6, 7, 7, 8, 9, 10, 10, 10, 11, 12} },
++ { 23, { 2, 2, 4, 5, 5, 6, 7, 7, 7, 8, 10, 10, 10, 10, 12} },
++ {23.5, { 2, 2, 3, 5, 5, 6, 7, 7, 7, 8, 10, 10, 10, 10, 12} },
++ { 24, { 2, 2, 3, 4, 4, 5, 7, 7, 7, 8, 9, 9, 9, 10, 12} },
++ {24.5, { 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 9, 9, 10, 12} },
++ { 25, { 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 9, 9, 11} },
++ {25.5, { 1, 1, 3, 3, 4, 5, 6, 6, 7, 7, 8, 9, 9, 9, 11} },
++ { 26, { 1, 1, 3, 3, 3, 4, 5, 6, 6, 7, 8, 8, 8, 8, 10} },
++ {26.5, { 1, 1, 2, 3, 3, 4, 5, 6, 6, 6, 8, 8, 8, 8, 10} },
++ { 27, { 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 10} },
++ {27.5, { 1, 1, 2, 2, 3, 3, 5, 5, 5, 6, 7, 8, 8, 8, 10} },
++ { 28, { 0, 1, 1, 2, 2, 3, 4, 5, 5, 5, 7, 7, 7, 8, 10} },
++ {28.5, { 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
++ { 29, { 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 9} },
++ {29.5, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 5, 6, 6, 7, 7, 8} },
++ { 30, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 6, 6, 6, 6, 8} },
++ {30.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8} },
++ { 31, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8} },
++ {31.5, { 0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 8} },
++ { 32, { 0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 7} },
++ {32.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 7} },
++ { 33, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
++ {33.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
++ { 34, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 6} },
++ {34.5, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3, 3, 4, 6} },
++ { 35, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 5} },
++ {35.5, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 5} },
++ { 36, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 4} }
++};
++
++
++const qp_table qp_table_420_8bpc_min = {
++ { 4, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 9, 13} },
++ { 4.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
++ { 5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
++ { 5.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
++ { 6, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
++ { 6.5, { 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 5, 5, 7, 10} },
++ { 7, { 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
++ { 7.5, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 4, 4, 5, 7, 8} },
++ { 8, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
++ { 8.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
++ { 9, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6} },
++ { 9.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
++ { 10, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5} },
++ {10.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 5} },
++ { 11, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4} },
++ {11.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4} },
++ { 12, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3} }
++};
++
++
++const qp_table qp_table_422_8bpc_min = {
++ { 6, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
++ { 6.5, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
++ { 7, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
++ { 7.5, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
++ { 8, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
++ { 8.5, { 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 4, 5, 5, 7, 10} },
++ { 9, { 0, 0, 0, 1, 2, 2, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
++ { 9.5, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 9} },
++ { 10, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
++ {10.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
++ { 11, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
++ {11.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
++ { 12, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6} },
++ {12.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
++ { 13, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5} },
++ {13.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5} },
++ { 14, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4} },
++ {14.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 4} },
++ { 15, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 4} },
++ {15.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3} },
++ { 16, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3} }
++};
++
++
++const qp_table qp_table_422_10bpc_max = {
++ { 6, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
++ { 6.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
++ { 7, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
++ { 7.5, { 5, 6, 8, 10, 11, 11, 11, 12, 12, 13, 13, 14, 14, 15, 16} },
++ { 8, { 4, 6, 7, 9, 10, 11, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
++ { 8.5, { 4, 5, 6, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 15} },
++ { 9, { 3, 4, 5, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14} },
++ { 9.5, { 3, 4, 4, 6, 6, 8, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
++ { 10, { 2, 3, 3, 5, 5, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
++ {10.5, { 2, 3, 3, 5, 5, 6, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
++ { 11, { 2, 3, 3, 5, 5, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
++ {11.5, { 2, 3, 3, 5, 5, 5, 7, 7, 8, 9, 9, 10, 10, 11, 12} },
++ { 12, { 2, 3, 3, 5, 5, 5, 7, 7, 8, 8, 9, 9, 10, 10, 11} },
++ {12.5, { 2, 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
++ { 13, { 1, 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 8, 9, 9, 10} },
++ {13.5, { 1, 2, 3, 3, 4, 5, 6, 6, 7, 7, 8, 8, 8, 9, 10} },
++ { 14, { 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9} },
++ {14.5, { 1, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 7, 7, 8, 9} },
++ { 15, { 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7, 7, 7, 9} },
++ {15.5, { 1, 1, 2, 2, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 8} },
++ { 16, { 1, 1, 2, 2, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 8} },
++ {16.5, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7} },
++ { 17, { 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 7} },
++ {17.5, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6} },
++ { 18, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 6} },
++ {18.5, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 6} },
++ { 19, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 5} },
++ {19.5, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 5} },
++ { 20, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 4} }
++};
++
++
++const qp_table qp_table_420_10bpc_max = {
++ { 4, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 17, 18} },
++ { 4.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
++ { 5, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 16, 17} },
++ { 5.5, { 6, 7, 8, 9, 10, 10, 11, 12, 12, 13, 13, 14, 14, 15, 16} },
++ { 6, { 4, 6, 8, 9, 10, 10, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
++ { 6.5, { 4, 5, 7, 8, 8, 9, 10, 11, 11, 12, 12, 13, 13, 14, 15} },
++ { 7, { 3, 4, 6, 7, 7, 8, 9, 10, 10, 11, 12, 12, 13, 13, 14} },
++ { 7.5, { 3, 4, 5, 6, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 13} },
++ { 8, { 2, 3, 4, 5, 5, 6, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
++ { 8.5, { 1, 3, 3, 4, 4, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
++ { 9, { 1, 3, 3, 4, 4, 6, 7, 8, 8, 9, 9, 10, 10, 10, 11} },
++ { 9.5, { 1, 3, 3, 3, 4, 5, 6, 7, 8, 8, 9, 9, 9, 10, 11} },
++ { 10, { 1, 2, 3, 3, 4, 4, 5, 6, 7, 8, 8, 9, 9, 9, 11} },
++ {10.5, { 1, 1, 3, 3, 3, 4, 5, 5, 7, 7, 8, 8, 8, 8, 10} },
++ { 11, { 0, 1, 2, 3, 3, 3, 4, 5, 6, 7, 7, 7, 8, 8, 9} },
++ {11.5, { 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 7, 7, 7, 9} },
++ { 12, { 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 8} },
++ {12.5, { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
++ { 13, { 0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7} },
++ {13.5, { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 6} },
++ { 14, { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
++ {14.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5} },
++ { 15, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 5} }
++};
++
++
++const qp_table qp_table_420_10bpc_min = {
++ { 4, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 13, 17} },
++ { 4.5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
++ { 5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
++ { 5.5, { 0, 3, 3, 4, 6, 7, 7, 7, 7, 7, 9, 9, 9, 11, 15} },
++ { 6, { 0, 2, 3, 4, 6, 7, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
++ { 6.5, { 0, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 9, 9, 11, 14} },
++ { 7, { 0, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 11, 13} },
++ { 7.5, { 0, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8, 8, 9, 11, 12} },
++ { 8, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 9, 11, 12} },
++ { 8.5, { 0, 2, 2, 3, 3, 5, 5, 6, 6, 7, 8, 8, 9, 10, 11} },
++ { 9, { 0, 2, 2, 3, 3, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10} },
++ { 9.5, { 0, 2, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10} },
++ { 10, { 0, 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 8, 8, 8, 10} },
++ {10.5, { 0, 0, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
++ { 11, { 0, 0, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8} },
++ {11.5, { 0, 0, 0, 1, 2, 2, 2, 3, 4, 4, 5, 6, 6, 6, 8} },
++ { 12, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 5, 7} },
++ {12.5, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6} },
++ { 13, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
++ {13.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 5} },
++ { 14, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 5} },
++ {14.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 4} },
++ { 15, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 4} }
++};
++
++
++const qp_table qp_table_444_10bpc_max = {
++ { 6, { 8, 10, 12, 12, 13, 13, 13, 14, 15, 16, 16, 16, 16, 17, 19} },
++ { 6.5, { 8, 10, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 16, 17, 19} },
++ { 7, { 8, 9, 11, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 17, 18} },
++ { 7.5, { 8, 9, 10, 11, 11, 12, 12, 13, 14, 14, 15, 15, 16, 17, 18} },
++ { 8, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
++ { 8.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
++ { 9, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 17} },
++ { 9.5, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 17} },
++ { 10, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
++ {10.5, { 6, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14, 15, 16} },
++ { 11, { 5, 7, 9, 10, 11, 11, 11, 12, 12, 13, 13, 13, 14, 14, 15} },
++ {11.5, { 5, 7, 9, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
++ { 12, { 4, 6, 8, 9, 10, 10, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
++ {12.5, { 4, 6, 8, 9, 10, 10, 10, 11, 11, 12, 12, 13, 13, 14, 15} },
++ { 13, { 3, 6, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 14} },
++ {13.5, { 3, 5, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14} },
++ { 14, { 3, 5, 6, 7, 8, 8, 8, 9, 10, 10, 11, 12, 12, 12, 14} },
++ {14.5, { 2, 4, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13} },
++ { 15, { 2, 4, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
++ {15.5, { 2, 4, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
++ { 16, { 2, 4, 5, 6, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 12} },
++ {16.5, { 2, 3, 4, 5, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 12} },
++ { 17, { 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 8, 9, 9, 10, 12} },
++ {17.5, { 1, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 9, 9, 10, 12} },
++ { 18, { 1, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 9, 9, 10, 11} },
++ {18.5, { 1, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 9, 9, 10, 11} },
++ { 19, { 1, 2, 3, 4, 5, 5, 5, 6, 7, 7, 7, 8, 8, 9, 10} },
++ {19.5, { 1, 2, 3, 4, 5, 5, 5, 6, 7, 7, 7, 8, 8, 9, 10} },
++ { 20, { 1, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7, 7, 8, 10} },
++ {20.5, { 1, 2, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 7, 8, 10} },
++ { 21, { 1, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7, 7, 8, 10} },
++ {21.5, { 1, 2, 3, 3, 3, 3, 4, 5, 5, 5, 6, 7, 7, 8, 9} },
++ { 22, { 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 9} },
++ {22.5, { 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8} },
++ { 23, { 1, 1, 2, 3, 3, 3, 4, 4, 4, 4, 6, 6, 6, 6, 8} },
++ {23.5, { 1, 1, 1, 3, 3, 3, 4, 4, 4, 4, 6, 6, 6, 6, 8} },
++ { 24, { 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 5, 6, 8} },
++ {24.5, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 8} },
++ { 25, { 0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 7} },
++ {25.5, { 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 7} },
++ { 26, { 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 6} },
++ {26.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 6} },
++ { 27, { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
++ {27.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
++ { 28, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 5} },
++ {28.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 5} },
++ { 29, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4} },
++ {29.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4} },
++ { 30, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 4} }
++};
++
++
++const qp_table qp_table_422_8bpc_max = {
++ { 6, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
++ { 6.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
++ { 7, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
++ { 7.5, { 3, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12} },
++ { 8, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
++ { 8.5, { 2, 3, 4, 5, 6, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
++ { 9, { 1, 2, 3, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10} },
++ { 9.5, { 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 9, 10} },
++ { 10, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
++ {10.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
++ { 11, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8} },
++ {11.5, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8} },
++ { 12, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7} },
++ {12.5, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7} },
++ { 13, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6} },
++ {13.5, { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 6} },
++ { 14, { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5} },
++ {14.5, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 5} },
++ { 15, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 5} },
++ {15.5, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4} },
++ { 16, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4} }
++};
++
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
+new file mode 100644
+index 0000000000000..3ee858f311d12
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c
+@@ -0,0 +1,291 @@
++/*
++ * Copyright 2021 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#include "rc_calc_fpu.h"
++
++#include "qp_tables.h"
++#include "amdgpu_dm/dc_fpu.h"
++
++#define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
++
++#define MODE_SELECT(val444, val422, val420) \
++ (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
++
++
++#define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \
++ table = qp_table_##mode##_##bpc##bpc_##max; \
++ table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
++ break
++
++static int median3(int a, int b, int c)
++{
++ if (a > b)
++ swap(a, b);
++ if (b > c)
++ swap(b, c);
++ if (a > b)
++ swap(b, c);
++
++ return b;
++}
++
++static double dsc_roundf(double num)
++{
++ if (num < 0.0)
++ num = num - 0.5;
++ else
++ num = num + 0.5;
++
++ return (int)(num);
++}
++
++static double dsc_ceil(double num)
++{
++ double retval = (int)num;
++
++ if (retval != num && num > 0)
++ retval = num + 1;
++
++ return (int)retval;
++}
++
++static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
++ enum max_min max_min, float bpp)
++{
++ int mode = MODE_SELECT(444, 422, 420);
++ int sel = table_hash(mode, bpc, max_min);
++ int table_size = 0;
++ int index;
++ const struct qp_entry *table = 0L;
++
++ // alias enum
++ enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
++ switch (sel) {
++ TABLE_CASE(444, 8, max);
++ TABLE_CASE(444, 8, min);
++ TABLE_CASE(444, 10, max);
++ TABLE_CASE(444, 10, min);
++ TABLE_CASE(444, 12, max);
++ TABLE_CASE(444, 12, min);
++ TABLE_CASE(422, 8, max);
++ TABLE_CASE(422, 8, min);
++ TABLE_CASE(422, 10, max);
++ TABLE_CASE(422, 10, min);
++ TABLE_CASE(422, 12, max);
++ TABLE_CASE(422, 12, min);
++ TABLE_CASE(420, 8, max);
++ TABLE_CASE(420, 8, min);
++ TABLE_CASE(420, 10, max);
++ TABLE_CASE(420, 10, min);
++ TABLE_CASE(420, 12, max);
++ TABLE_CASE(420, 12, min);
++ }
++
++ if (table == 0)
++ return;
++
++ index = (bpp - table[0].bpp) * 2;
++
++ /* requested size is bigger than the table */
++ if (index >= table_size) {
++ dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
++ return;
++ }
++
++ memcpy(qps, table[index].qps, sizeof(qp_set));
++}
++
++static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
++{
++ int *p = ofs;
++
++ if (mode == CM_444 || mode == CM_RGB) {
++ *p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
++ *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
++ *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
++ *p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
++ *p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
++ *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
++ *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
++ *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
++ *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
++ *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
++ *p++ = -10;
++ *p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0))));
++ *p++ = -12;
++ *p++ = -12;
++ *p++ = -12;
++ } else if (mode == CM_422) {
++ *p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0))));
++ *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0))));
++ *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0))));
++ *p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0))));
++ *p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0))));
++ *p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0))));
++ *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0))));
++ *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0))));
++ *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0))));
++ *p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0))));
++ *p++ = -10;
++ *p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1))));
++ *p++ = -12;
++ *p++ = -12;
++ *p++ = -12;
++ } else {
++ *p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0))));
++ *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0))));
++ *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0))));
++ *p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0))));
++ *p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0))));
++ *p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0))));
++ *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0))));
++ *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0))));
++ *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0))));
++ *p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0))));
++ *p++ = -10;
++ *p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0))));
++ *p++ = -12;
++ *p++ = -12;
++ *p++ = -12;
++ }
++}
++
++void _do_calc_rc_params(struct rc_params *rc,
++ enum colour_mode cm,
++ enum bits_per_comp bpc,
++ u16 drm_bpp,
++ bool is_navite_422_or_420,
++ int slice_width,
++ int slice_height,
++ int minor_version)
++{
++ float bpp;
++ float bpp_group;
++ float initial_xmit_delay_factor;
++ int padding_pixels;
++ int i;
++
++ dc_assert_fp_enabled();
++
++ bpp = ((float)drm_bpp / 16.0);
++ /* in native_422 or native_420 modes, the bits_per_pixel is double the
++ * target bpp (the latter is what calc_rc_params expects)
++ */
++ if (is_navite_422_or_420)
++ bpp /= 2.0;
++
++ rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
++ rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
++
++ bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
++
++ switch (cm) {
++ case CM_420:
++ rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584)))));
++ rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
++ rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
++ break;
++ case CM_422:
++ rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
++ rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
++ rc->second_line_bpg_offset = 0;
++ break;
++ case CM_444:
++ case CM_RGB:
++ rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
++ rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
++ rc->second_line_bpg_offset = 0;
++ break;
++ }
++
++ initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
++ rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
++
++ if (cm == CM_422 || cm == CM_420)
++ slice_width /= 2;
++
++ padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
++ if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
++ if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
++ rc->initial_xmit_delay++;
++ }
++
++ rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
++ rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
++ rc->flatness_det_thresh = 2 << (bpc - 8);
++
++ get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp);
++ get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp);
++ if (cm == CM_444 && minor_version == 1) {
++ for (i = 0; i < QP_SET_SIZE; ++i) {
++ rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
++ rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
++ }
++ }
++ get_ofs_set(rc->ofs, cm, bpp);
++
++ /* fixed parameters */
++ rc->rc_model_size = 8192;
++ rc->rc_edge_factor = 6;
++ rc->rc_tgt_offset_hi = 3;
++ rc->rc_tgt_offset_lo = 3;
++
++ rc->rc_buf_thresh[0] = 896;
++ rc->rc_buf_thresh[1] = 1792;
++ rc->rc_buf_thresh[2] = 2688;
++ rc->rc_buf_thresh[3] = 3584;
++ rc->rc_buf_thresh[4] = 4480;
++ rc->rc_buf_thresh[5] = 5376;
++ rc->rc_buf_thresh[6] = 6272;
++ rc->rc_buf_thresh[7] = 6720;
++ rc->rc_buf_thresh[8] = 7168;
++ rc->rc_buf_thresh[9] = 7616;
++ rc->rc_buf_thresh[10] = 7744;
++ rc->rc_buf_thresh[11] = 7872;
++ rc->rc_buf_thresh[12] = 8000;
++ rc->rc_buf_thresh[13] = 8064;
++}
++
++u32 _do_bytes_per_pixel_calc(int slice_width,
++ u16 drm_bpp,
++ bool is_navite_422_or_420)
++{
++ float bpp;
++ u32 bytes_per_pixel;
++ double d_bytes_per_pixel;
++
++ dc_assert_fp_enabled();
++
++ bpp = ((float)drm_bpp / 16.0);
++ d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
++ // TODO: Make sure the formula for calculating this is precise (ceiling
++ // vs. floor, and at what point they should be applied)
++ if (is_navite_422_or_420)
++ d_bytes_per_pixel /= 2;
++
++ bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
++
++ return bytes_per_pixel;
++}
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
+new file mode 100644
+index 0000000000000..b93b95409fbe2
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.h
+@@ -0,0 +1,94 @@
++/*
++ * Copyright 2021 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#ifndef __RC_CALC_FPU_H__
++#define __RC_CALC_FPU_H__
++
++#include "os_types.h"
++#include <drm/drm_dsc.h>
++
++#define QP_SET_SIZE 15
++
++typedef int qp_set[QP_SET_SIZE];
++
++struct rc_params {
++ int rc_quant_incr_limit0;
++ int rc_quant_incr_limit1;
++ int initial_fullness_offset;
++ int initial_xmit_delay;
++ int first_line_bpg_offset;
++ int second_line_bpg_offset;
++ int flatness_min_qp;
++ int flatness_max_qp;
++ int flatness_det_thresh;
++ qp_set qp_min;
++ qp_set qp_max;
++ qp_set ofs;
++ int rc_model_size;
++ int rc_edge_factor;
++ int rc_tgt_offset_hi;
++ int rc_tgt_offset_lo;
++ int rc_buf_thresh[QP_SET_SIZE - 1];
++};
++
++enum colour_mode {
++ CM_RGB, /* 444 RGB */
++ CM_444, /* 444 YUV or simple 422 */
++ CM_422, /* native 422 */
++ CM_420 /* native 420 */
++};
++
++enum bits_per_comp {
++ BPC_8 = 8,
++ BPC_10 = 10,
++ BPC_12 = 12
++};
++
++enum max_min {
++ DAL_MM_MIN = 0,
++ DAL_MM_MAX = 1
++};
++
++struct qp_entry {
++ float bpp;
++ const qp_set qps;
++};
++
++typedef struct qp_entry qp_table[];
++
++u32 _do_bytes_per_pixel_calc(int slice_width,
++ u16 drm_bpp,
++ bool is_navite_422_or_420);
++
++void _do_calc_rc_params(struct rc_params *rc,
++ enum colour_mode cm,
++ enum bits_per_comp bpc,
++ u16 drm_bpp,
++ bool is_navite_422_or_420,
++ int slice_width,
++ int slice_height,
++ int minor_version);
++
++#endif
+diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+index 8d31eb75c6a6e..a2537229ee88b 100644
+--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+@@ -1,35 +1,6 @@
+ # SPDX-License-Identifier: MIT
+ #
+ # Makefile for the 'dsc' sub-component of DAL.
+-
+-ifdef CONFIG_X86
+-dsc_ccflags := -mhard-float -msse
+-endif
+-
+-ifdef CONFIG_PPC64
+-dsc_ccflags := -mhard-float -maltivec
+-endif
+-
+-ifdef CONFIG_CC_IS_GCC
+-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+-IS_OLD_GCC = 1
+-endif
+-endif
+-
+-ifdef CONFIG_X86
+-ifdef IS_OLD_GCC
+-# Stack alignment mismatch, proceed with caution.
+-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+-# (8B stack alignment).
+-dsc_ccflags += -mpreferred-stack-boundary=4
+-else
+-dsc_ccflags += -msse2
+-endif
+-endif
+-
+-CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags)
+-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_rcflags)
+-
+ DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
+
+ AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC))
+diff --git a/drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h b/drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h
+deleted file mode 100644
+index e5fac9f4181d8..0000000000000
+--- a/drivers/gpu/drm/amd/display/dc/dsc/qp_tables.h
++++ /dev/null
+@@ -1,704 +0,0 @@
+-
+-/*
+- * Copyright 2017 Advanced Micro Devices, Inc.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and associated documentation files (the "Software"),
+- * to deal in the Software without restriction, including without limitation
+- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+- * and/or sell copies of the Software, and to permit persons to whom the
+- * Software is furnished to do so, subject to the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+- * OTHER DEALINGS IN THE SOFTWARE.
+- *
+- * Authors: AMD
+- *
+- */
+-
+-
+-const qp_table qp_table_422_10bpc_min = {
+- { 6, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
+- { 6.5, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
+- { 7, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 7, 9, 9, 9, 11, 15} },
+- { 7.5, { 0, 2, 4, 6, 6, 6, 6, 7, 7, 7, 8, 9, 9, 11, 15} },
+- { 8, { 0, 2, 3, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 11, 14} },
+- { 8.5, { 0, 2, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 9, 11, 14} },
+- { 9, { 0, 2, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 9, 11, 13} },
+- { 9.5, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 9, 11, 13} },
+- { 10, { 0, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9, 11, 12} },
+- {10.5, { 0, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 11, 12} },
+- { 11, { 0, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 11} },
+- {11.5, { 0, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8, 8, 10, 11} },
+- { 12, { 0, 2, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 9, 10} },
+- {12.5, { 0, 1, 2, 2, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10} },
+- { 13, { 0, 1, 2, 2, 4, 4, 4, 5, 5, 6, 6, 6, 8, 8, 9} },
+- {13.5, { 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 9} },
+- { 14, { 0, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 8} },
+- {14.5, { 0, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8} },
+- { 15, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6, 6, 6, 8} },
+- {15.5, { 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
+- { 16, { 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 7} },
+- {16.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6} },
+- { 17, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 6} },
+- {17.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
+- { 18, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 5} },
+- {18.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 5} },
+- { 19, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 4} },
+- {19.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 4} },
+- { 20, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 3} }
+-};
+-
+-
+-const qp_table qp_table_444_8bpc_max = {
+- { 6, { 4, 6, 8, 8, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 15} },
+- { 6.5, { 4, 6, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15} },
+- { 7, { 4, 5, 7, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 13, 14} },
+- { 7.5, { 4, 5, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
+- { 8, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
+- { 8.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
+- { 9, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 13} },
+- { 9.5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 13} },
+- { 10, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
+- {10.5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 10, 11, 12} },
+- { 11, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 11} },
+- {11.5, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
+- { 12, { 2, 3, 4, 5, 6, 6, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
+- {12.5, { 2, 3, 4, 5, 6, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
+- { 13, { 1, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 9, 10} },
+- {13.5, { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10} },
+- { 14, { 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 8, 8, 8, 10} },
+- {14.5, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 9} },
+- { 15, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
+- {15.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
+- { 16, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8} },
+- {16.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8} },
+- { 17, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 8} },
+- {17.5, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 8} },
+- { 18, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 7} },
+- {18.5, { 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 7} },
+- { 19, { 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6} },
+- {19.5, { 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6} },
+- { 20, { 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 4, 6} },
+- {20.5, { 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 4, 6} },
+- { 21, { 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
+- {21.5, { 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
+- { 22, { 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 5} },
+- {22.5, { 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4} },
+- { 23, { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 4} },
+- {23.5, { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 4} },
+- { 24, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4} }
+-};
+-
+-
+-const qp_table qp_table_420_12bpc_max = {
+- { 4, {11, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 21, 22} },
+- { 4.5, {10, 11, 12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
+- { 5, { 9, 11, 12, 13, 14, 15, 15, 16, 17, 17, 18, 18, 19, 20, 21} },
+- { 5.5, { 8, 10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18, 19, 20} },
+- { 6, { 6, 9, 11, 12, 13, 14, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
+- { 6.5, { 6, 8, 10, 11, 11, 13, 14, 15, 15, 16, 16, 17, 17, 18, 19} },
+- { 7, { 5, 7, 9, 10, 10, 12, 13, 14, 14, 15, 16, 16, 17, 17, 18} },
+- { 7.5, { 5, 7, 8, 9, 9, 11, 12, 13, 14, 14, 15, 15, 16, 16, 17} },
+- { 8, { 4, 6, 7, 8, 8, 10, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
+- { 8.5, { 3, 6, 6, 7, 7, 10, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
+- { 9, { 3, 5, 6, 7, 7, 10, 11, 12, 12, 13, 13, 14, 14, 14, 15} },
+- { 9.5, { 2, 5, 6, 6, 7, 9, 10, 11, 12, 12, 13, 13, 13, 14, 15} },
+- { 10, { 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 13, 13, 15} },
+- {10.5, { 2, 3, 5, 5, 6, 7, 8, 9, 11, 11, 12, 12, 12, 12, 14} },
+- { 11, { 1, 3, 4, 5, 6, 6, 7, 9, 10, 11, 11, 11, 12, 12, 13} },
+- {11.5, { 1, 2, 3, 4, 5, 6, 6, 8, 9, 10, 10, 11, 11, 11, 13} },
+- { 12, { 1, 1, 3, 3, 4, 5, 6, 7, 8, 9, 9, 10, 10, 10, 12} },
+- {12.5, { 1, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 9, 10, 11} },
+- { 13, { 1, 1, 1, 2, 4, 4, 6, 6, 7, 8, 8, 9, 9, 9, 11} },
+- {13.5, { 1, 1, 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 8, 9, 11} },
+- { 14, { 1, 1, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 8, 10} },
+- {14.5, { 0, 1, 1, 1, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
+- { 15, { 0, 1, 1, 1, 1, 2, 3, 3, 5, 5, 5, 6, 6, 7, 9} },
+- {15.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 8} },
+- { 16, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 7} },
+- {16.5, { 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 7} },
+- { 17, { 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 6} },
+- {17.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 6} },
+- { 18, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 5} }
+-};
+-
+-
+-const qp_table qp_table_444_10bpc_min = {
+- { 6, { 0, 4, 7, 7, 9, 9, 9, 9, 9, 10, 10, 10, 10, 12, 18} },
+- { 6.5, { 0, 4, 6, 7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 12, 18} },
+- { 7, { 0, 4, 6, 6, 8, 8, 8, 8, 8, 9, 9, 10, 10, 12, 17} },
+- { 7.5, { 0, 4, 6, 6, 7, 8, 8, 8, 8, 8, 9, 9, 10, 12, 17} },
+- { 8, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 8, 9, 9, 9, 12, 16} },
+- { 8.5, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 8, 9, 9, 9, 12, 16} },
+- { 9, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
+- { 9.5, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
+- { 10, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 15} },
+- {10.5, { 0, 4, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 15} },
+- { 11, { 0, 3, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
+- {11.5, { 0, 3, 5, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
+- { 12, { 0, 2, 4, 4, 6, 6, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
+- {12.5, { 0, 2, 4, 4, 6, 6, 7, 7, 7, 7, 8, 9, 9, 11, 14} },
+- { 13, { 0, 2, 4, 4, 5, 6, 7, 7, 7, 7, 8, 9, 9, 11, 13} },
+- {13.5, { 0, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 9, 9, 11, 13} },
+- { 14, { 0, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9, 11, 13} },
+- {14.5, { 0, 2, 3, 4, 5, 5, 6, 6, 6, 7, 7, 8, 9, 11, 12} },
+- { 15, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 11, 12} },
+- {15.5, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 11, 12} },
+- { 16, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 8, 10, 11} },
+- {16.5, { 0, 1, 2, 3, 4, 5, 5, 6, 6, 6, 7, 8, 8, 10, 11} },
+- { 17, { 0, 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 9, 11} },
+- {17.5, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9, 11} },
+- { 18, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10} },
+- {18.5, { 0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10} },
+- { 19, { 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 7, 7, 8, 9} },
+- {19.5, { 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 7, 7, 8, 9} },
+- { 20, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 9} },
+- {20.5, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 9} },
+- { 21, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6, 6, 7, 9} },
+- {21.5, { 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 8} },
+- { 22, { 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 8} },
+- {22.5, { 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
+- { 23, { 0, 0, 1, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5, 7} },
+- {23.5, { 0, 0, 0, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 5, 7} },
+- { 24, { 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 4, 4, 4, 5, 7} },
+- {24.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 7} },
+- { 25, { 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 6} },
+- {25.5, { 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
+- { 26, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 5} },
+- {26.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 5} },
+- { 27, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 5} },
+- {27.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 5} },
+- { 28, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 4} },
+- {28.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 4} },
+- { 29, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3} },
+- {29.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3} },
+- { 30, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} }
+-};
+-
+-
+-const qp_table qp_table_420_8bpc_max = {
+- { 4, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 13, 14} },
+- { 4.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
+- { 5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 13} },
+- { 5.5, { 3, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12} },
+- { 6, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
+- { 6.5, { 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
+- { 7, { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 9, 9, 10} },
+- { 7.5, { 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 9} },
+- { 8, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
+- { 8.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8} },
+- { 9, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7} },
+- { 9.5, { 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
+- { 10, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6} },
+- {10.5, { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 6} },
+- { 11, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5} },
+- {11.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5} },
+- { 12, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 4} }
+-};
+-
+-
+-const qp_table qp_table_444_8bpc_min = {
+- { 6, { 0, 1, 3, 3, 5, 5, 5, 5, 5, 6, 6, 6, 6, 9, 14} },
+- { 6.5, { 0, 1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 9, 14} },
+- { 7, { 0, 0, 2, 2, 4, 4, 4, 4, 4, 5, 5, 6, 6, 9, 13} },
+- { 7.5, { 0, 0, 2, 2, 3, 4, 4, 4, 4, 4, 5, 5, 6, 9, 13} },
+- { 8, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 4, 5, 5, 5, 8, 12} },
+- { 8.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 4, 5, 5, 5, 8, 12} },
+- { 9, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 12} },
+- { 9.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 12} },
+- { 10, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
+- {10.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
+- { 11, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
+- {11.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
+- { 12, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
+- {12.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 4, 5, 5, 7, 10} },
+- { 13, { 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 4, 5, 5, 7, 9} },
+- {13.5, { 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
+- { 14, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
+- {14.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 5, 7, 8} },
+- { 15, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
+- {15.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
+- { 16, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
+- {16.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
+- { 17, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
+- {17.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
+- { 18, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
+- {18.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
+- { 19, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5} },
+- {19.5, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5} },
+- { 20, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 5} },
+- {20.5, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 5} },
+- { 21, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4} },
+- {21.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4} },
+- { 22, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 4} },
+- {22.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3} },
+- { 23, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} },
+- {23.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} },
+- { 24, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3} }
+-};
+-
+-
+-const qp_table qp_table_444_12bpc_min = {
+- { 6, { 0, 5, 11, 11, 13, 13, 13, 13, 13, 14, 14, 14, 14, 17, 22} },
+- { 6.5, { 0, 5, 10, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 17, 22} },
+- { 7, { 0, 5, 10, 10, 12, 12, 12, 12, 12, 13, 13, 14, 14, 17, 21} },
+- { 7.5, { 0, 5, 9, 10, 11, 12, 12, 12, 12, 12, 13, 13, 14, 17, 21} },
+- { 8, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 12, 13, 13, 13, 16, 20} },
+- { 8.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 12, 13, 13, 13, 16, 20} },
+- { 9, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
+- { 9.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
+- { 10, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
+- {10.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
+- { 11, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
+- {11.5, { 0, 4, 8, 9, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
+- { 12, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
+- {12.5, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
+- { 13, { 0, 4, 7, 8, 9, 11, 11, 11, 11, 11, 13, 13, 13, 15, 17} },
+- {13.5, { 0, 3, 6, 7, 9, 10, 10, 11, 11, 11, 12, 13, 13, 15, 17} },
+- { 14, { 0, 3, 5, 6, 9, 9, 9, 10, 11, 11, 12, 13, 13, 15, 17} },
+- {14.5, { 0, 2, 5, 6, 8, 9, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
+- { 15, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
+- {15.5, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
+- { 16, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 11, 12, 12, 14, 15} },
+- {16.5, { 0, 2, 3, 5, 7, 8, 9, 10, 11, 11, 11, 12, 12, 14, 15} },
+- { 17, { 0, 2, 3, 5, 5, 6, 9, 9, 10, 10, 11, 11, 12, 13, 15} },
+- {17.5, { 0, 2, 3, 5, 5, 6, 8, 9, 10, 10, 11, 11, 12, 13, 15} },
+- { 18, { 0, 2, 3, 5, 5, 6, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
+- {18.5, { 0, 2, 3, 5, 5, 6, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
+- { 19, { 0, 1, 2, 4, 5, 5, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
+- {19.5, { 0, 1, 2, 4, 5, 5, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
+- { 20, { 0, 1, 2, 3, 4, 5, 7, 8, 8, 8, 9, 10, 10, 11, 13} },
+- {20.5, { 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 13} },
+- { 21, { 0, 1, 2, 3, 4, 5, 5, 7, 7, 8, 9, 10, 10, 11, 13} },
+- {21.5, { 0, 1, 2, 3, 3, 4, 5, 7, 7, 8, 9, 10, 10, 11, 12} },
+- { 22, { 0, 0, 1, 3, 3, 4, 5, 6, 7, 8, 9, 9, 9, 10, 12} },
+- {22.5, { 0, 0, 1, 3, 3, 4, 5, 6, 7, 8, 9, 9, 9, 10, 11} },
+- { 23, { 0, 0, 1, 3, 3, 4, 5, 6, 6, 7, 9, 9, 9, 9, 11} },
+- {23.5, { 0, 0, 1, 3, 3, 4, 5, 6, 6, 7, 9, 9, 9, 9, 11} },
+- { 24, { 0, 0, 1, 2, 3, 4, 5, 6, 6, 7, 8, 8, 8, 9, 11} },
+- {24.5, { 0, 0, 1, 2, 3, 4, 4, 6, 6, 7, 8, 8, 8, 9, 11} },
+- { 25, { 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 8, 8, 10} },
+- {25.5, { 0, 0, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 8, 10} },
+- { 26, { 0, 0, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 9} },
+- {26.5, { 0, 0, 1, 2, 2, 3, 4, 5, 5, 5, 7, 7, 7, 7, 9} },
+- { 27, { 0, 0, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
+- {27.5, { 0, 0, 1, 1, 2, 2, 4, 4, 4, 5, 6, 7, 7, 7, 9} },
+- { 28, { 0, 0, 0, 1, 1, 2, 3, 4, 4, 4, 6, 6, 6, 7, 9} },
+- {28.5, { 0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 6, 6, 6, 8} },
+- { 29, { 0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 8} },
+- {29.5, { 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7} },
+- { 30, { 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 5, 5, 5, 5, 7} },
+- {30.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 4, 5, 7} },
+- { 31, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 4, 5, 7} },
+- {31.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
+- { 32, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 6} },
+- {32.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 6} },
+- { 33, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 5} },
+- {33.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 5} },
+- { 34, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 5} },
+- {34.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 5} },
+- { 35, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 4} },
+- {35.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 4} },
+- { 36, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3} }
+-};
+-
+-
+-const qp_table qp_table_420_12bpc_min = {
+- { 4, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 21} },
+- { 4.5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
+- { 5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
+- { 5.5, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
+- { 6, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
+- { 6.5, { 0, 4, 6, 8, 9, 10, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
+- { 7, { 0, 3, 5, 7, 9, 10, 10, 11, 11, 11, 13, 13, 13, 15, 17} },
+- { 7.5, { 0, 3, 5, 7, 8, 9, 10, 10, 11, 11, 12, 13, 13, 15, 16} },
+- { 8, { 0, 2, 4, 6, 7, 9, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
+- { 8.5, { 0, 2, 4, 6, 6, 9, 9, 10, 11, 11, 12, 12, 13, 14, 15} },
+- { 9, { 0, 2, 4, 6, 6, 9, 9, 10, 10, 11, 11, 12, 13, 13, 14} },
+- { 9.5, { 0, 2, 4, 5, 6, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14} },
+- { 10, { 0, 2, 3, 5, 6, 7, 8, 8, 9, 10, 10, 12, 12, 12, 14} },
+- {10.5, { 0, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 11, 11, 13} },
+- { 11, { 0, 2, 3, 4, 5, 5, 6, 8, 8, 9, 9, 10, 11, 11, 12} },
+- {11.5, { 0, 1, 2, 3, 4, 5, 5, 7, 8, 8, 9, 10, 10, 10, 12} },
+- { 12, { 0, 0, 2, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 9, 11} },
+- {12.5, { 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 8, 8, 9, 10} },
+- { 13, { 0, 0, 0, 1, 3, 3, 5, 5, 6, 7, 7, 8, 8, 8, 10} },
+- {13.5, { 0, 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 7, 7, 8, 10} },
+- { 14, { 0, 0, 0, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 7, 9} },
+- {14.5, { 0, 0, 0, 0, 1, 2, 3, 3, 4, 4, 5, 6, 6, 6, 8} },
+- { 15, { 0, 0, 0, 0, 0, 1, 2, 2, 4, 4, 4, 5, 5, 6, 8} },
+- {15.5, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7} },
+- { 16, { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 6} },
+- {16.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 6} },
+- { 17, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 5} },
+- {17.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 5} },
+- { 18, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 4} }
+-};
+-
+-
+-const qp_table qp_table_422_12bpc_min = {
+- { 6, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
+- { 6.5, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
+- { 7, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
+- { 7.5, { 0, 4, 8, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
+- { 8, { 0, 4, 7, 8, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 18} },
+- { 8.5, { 0, 3, 6, 8, 9, 10, 10, 11, 11, 11, 12, 13, 13, 15, 18} },
+- { 9, { 0, 3, 5, 8, 9, 10, 10, 10, 11, 11, 12, 13, 13, 15, 17} },
+- { 9.5, { 0, 3, 5, 7, 8, 9, 10, 10, 11, 11, 12, 13, 13, 15, 17} },
+- { 10, { 0, 2, 4, 6, 7, 9, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
+- {10.5, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13, 15, 16} },
+- { 11, { 0, 2, 4, 6, 7, 8, 9, 10, 11, 11, 12, 12, 13, 14, 15} },
+- {11.5, { 0, 2, 4, 6, 7, 7, 9, 9, 10, 11, 11, 12, 12, 14, 15} },
+- { 12, { 0, 2, 4, 6, 6, 6, 8, 8, 9, 9, 11, 11, 12, 13, 14} },
+- {12.5, { 0, 1, 4, 5, 6, 6, 7, 8, 8, 9, 10, 11, 11, 13, 14} },
+- { 13, { 0, 1, 3, 4, 5, 5, 7, 8, 8, 9, 10, 10, 11, 12, 13} },
+- {13.5, { 0, 1, 3, 3, 4, 5, 7, 7, 8, 8, 10, 10, 10, 12, 13} },
+- { 14, { 0, 0, 2, 3, 4, 5, 6, 6, 7, 7, 9, 10, 10, 11, 12} },
+- {14.5, { 0, 0, 1, 3, 4, 4, 6, 6, 6, 7, 9, 9, 9, 11, 12} },
+- { 15, { 0, 0, 1, 3, 3, 4, 5, 6, 6, 6, 8, 9, 9, 10, 12} },
+- {15.5, { 0, 0, 1, 2, 3, 4, 5, 5, 6, 6, 8, 8, 8, 10, 11} },
+- { 16, { 0, 0, 1, 2, 3, 4, 5, 5, 6, 6, 8, 8, 8, 9, 11} },
+- {16.5, { 0, 0, 0, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7, 9, 10} },
+- { 17, { 0, 0, 0, 1, 2, 2, 4, 4, 4, 5, 6, 6, 6, 8, 10} },
+- {17.5, { 0, 0, 0, 1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 8, 9} },
+- { 18, { 0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 5, 5, 6, 7, 9} },
+- {18.5, { 0, 0, 0, 1, 2, 2, 3, 3, 3, 3, 5, 5, 5, 7, 9} },
+- { 19, { 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 8} },
+- {19.5, { 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 6, 8} },
+- { 20, { 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 7} },
+- {20.5, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 7} },
+- { 21, { 0, 0, 0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
+- {21.5, { 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 4, 6} },
+- { 22, { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 6} },
+- {22.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 5} },
+- { 23, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 5} },
+- {23.5, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 4} },
+- { 24, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 4} }
+-};
+-
+-
+-const qp_table qp_table_422_12bpc_max = {
+- { 6, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
+- { 6.5, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
+- { 7, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20} },
+- { 7.5, { 9, 10, 12, 14, 15, 15, 15, 16, 16, 17, 17, 18, 18, 19, 20} },
+- { 8, { 6, 9, 10, 12, 14, 15, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
+- { 8.5, { 6, 8, 9, 11, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 19} },
+- { 9, { 5, 7, 8, 10, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 18} },
+- { 9.5, { 5, 7, 7, 9, 10, 12, 12, 13, 14, 14, 15, 15, 16, 17, 18} },
+- { 10, { 4, 6, 6, 8, 9, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
+- {10.5, { 4, 6, 6, 8, 9, 10, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
+- { 11, { 4, 5, 6, 8, 9, 10, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
+- {11.5, { 3, 5, 6, 8, 9, 9, 11, 11, 12, 13, 13, 14, 14, 15, 16} },
+- { 12, { 3, 5, 6, 8, 8, 8, 10, 10, 11, 11, 13, 13, 14, 14, 15} },
+- {12.5, { 3, 4, 6, 7, 8, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15} },
+- { 13, { 2, 4, 5, 6, 7, 7, 9, 10, 10, 11, 12, 12, 13, 13, 14} },
+- {13.5, { 2, 4, 5, 5, 6, 7, 9, 9, 10, 10, 12, 12, 12, 13, 14} },
+- { 14, { 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 11, 12, 12, 12, 13} },
+- {14.5, { 2, 3, 3, 5, 6, 6, 8, 8, 8, 9, 11, 11, 11, 12, 13} },
+- { 15, { 2, 3, 3, 5, 5, 6, 7, 8, 8, 8, 10, 11, 11, 11, 13} },
+- {15.5, { 2, 2, 3, 4, 5, 6, 7, 7, 8, 8, 10, 10, 10, 11, 12} },
+- { 16, { 2, 2, 3, 4, 5, 6, 7, 7, 8, 8, 10, 10, 10, 10, 12} },
+- {16.5, { 1, 2, 2, 4, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 11} },
+- { 17, { 1, 1, 2, 3, 4, 4, 6, 6, 6, 7, 8, 8, 8, 9, 11} },
+- {17.5, { 1, 1, 2, 3, 4, 4, 5, 6, 6, 6, 7, 8, 8, 9, 10} },
+- { 18, { 1, 1, 1, 2, 3, 3, 5, 5, 5, 6, 7, 7, 8, 8, 10} },
+- {18.5, { 1, 1, 1, 2, 3, 3, 5, 5, 5, 5, 7, 7, 7, 8, 10} },
+- { 19, { 1, 1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 7, 9} },
+- {19.5, { 1, 1, 1, 2, 2, 2, 4, 5, 5, 5, 6, 6, 6, 7, 9} },
+- { 20, { 1, 1, 1, 2, 2, 2, 4, 5, 5, 5, 6, 6, 6, 6, 8} },
+- {20.5, { 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 8} },
+- { 21, { 0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4, 5, 5, 5, 7} },
+- {21.5, { 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 7} },
+- { 22, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 7} },
+- {22.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6} },
+- { 23, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6} },
+- {23.5, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 5} },
+- { 24, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 5} }
+-};
+-
+-
+-const qp_table qp_table_444_12bpc_max = {
+- { 6, {12, 14, 16, 16, 17, 17, 17, 18, 19, 20, 20, 20, 20, 21, 23} },
+- { 6.5, {12, 14, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 20, 21, 23} },
+- { 7, {12, 13, 15, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 21, 22} },
+- { 7.5, {12, 13, 14, 15, 15, 16, 16, 17, 18, 18, 19, 19, 20, 21, 22} },
+- { 8, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
+- { 8.5, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
+- { 9, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 21} },
+- { 9.5, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 21} },
+- { 10, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20} },
+- {10.5, {10, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 18, 19, 20} },
+- { 11, { 9, 11, 13, 14, 15, 15, 15, 16, 16, 17, 17, 17, 18, 18, 19} },
+- {11.5, { 9, 11, 13, 14, 15, 15, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
+- { 12, { 6, 9, 12, 13, 14, 14, 15, 16, 16, 17, 17, 17, 17, 18, 19} },
+- {12.5, { 6, 9, 12, 13, 14, 14, 14, 15, 15, 16, 16, 17, 17, 18, 19} },
+- { 13, { 5, 9, 12, 13, 13, 14, 14, 15, 15, 16, 16, 16, 16, 17, 18} },
+- {13.5, { 5, 8, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 18} },
+- { 14, { 5, 8, 10, 11, 12, 12, 12, 13, 14, 14, 15, 16, 16, 16, 18} },
+- {14.5, { 4, 7, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 17} },
+- { 15, { 4, 7, 9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
+- {15.5, { 4, 7, 9, 10, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17} },
+- { 16, { 4, 7, 9, 10, 10, 11, 11, 12, 13, 13, 13, 14, 14, 15, 16} },
+- {16.5, { 4, 5, 7, 8, 10, 11, 11, 12, 13, 13, 13, 14, 14, 15, 16} },
+- { 17, { 4, 5, 7, 8, 8, 9, 11, 11, 12, 12, 12, 13, 13, 14, 16} },
+- {17.5, { 3, 5, 7, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 14, 16} },
+- { 18, { 3, 5, 7, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 14, 15} },
+- {18.5, { 3, 5, 7, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 14, 15} },
+- { 19, { 3, 4, 6, 7, 8, 8, 9, 10, 11, 11, 11, 12, 12, 13, 14} },
+- {19.5, { 3, 4, 6, 7, 8, 8, 9, 10, 11, 11, 11, 12, 12, 13, 14} },
+- { 20, { 2, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11, 11, 12, 14} },
+- {20.5, { 2, 3, 5, 5, 7, 8, 8, 8, 9, 10, 10, 11, 11, 12, 14} },
+- { 21, { 2, 3, 5, 5, 7, 7, 7, 8, 8, 9, 10, 11, 11, 12, 14} },
+- {21.5, { 2, 3, 5, 5, 6, 6, 7, 8, 8, 9, 10, 11, 11, 12, 13} },
+- { 22, { 2, 2, 4, 5, 6, 6, 7, 7, 8, 9, 10, 10, 10, 11, 13} },
+- {22.5, { 2, 2, 4, 5, 5, 6, 7, 7, 8, 9, 10, 10, 10, 11, 12} },
+- { 23, { 2, 2, 4, 5, 5, 6, 7, 7, 7, 8, 10, 10, 10, 10, 12} },
+- {23.5, { 2, 2, 3, 5, 5, 6, 7, 7, 7, 8, 10, 10, 10, 10, 12} },
+- { 24, { 2, 2, 3, 4, 4, 5, 7, 7, 7, 8, 9, 9, 9, 10, 12} },
+- {24.5, { 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 9, 9, 10, 12} },
+- { 25, { 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 9, 9, 11} },
+- {25.5, { 1, 1, 3, 3, 4, 5, 6, 6, 7, 7, 8, 9, 9, 9, 11} },
+- { 26, { 1, 1, 3, 3, 3, 4, 5, 6, 6, 7, 8, 8, 8, 8, 10} },
+- {26.5, { 1, 1, 2, 3, 3, 4, 5, 6, 6, 6, 8, 8, 8, 8, 10} },
+- { 27, { 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 10} },
+- {27.5, { 1, 1, 2, 2, 3, 3, 5, 5, 5, 6, 7, 8, 8, 8, 10} },
+- { 28, { 0, 1, 1, 2, 2, 3, 4, 5, 5, 5, 7, 7, 7, 8, 10} },
+- {28.5, { 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
+- { 29, { 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 9} },
+- {29.5, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 5, 6, 6, 7, 7, 8} },
+- { 30, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 6, 6, 6, 6, 8} },
+- {30.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8} },
+- { 31, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8} },
+- {31.5, { 0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 8} },
+- { 32, { 0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 7} },
+- {32.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 7} },
+- { 33, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
+- {33.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
+- { 34, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 6} },
+- {34.5, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3, 3, 4, 6} },
+- { 35, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 5} },
+- {35.5, { 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 5} },
+- { 36, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 4} }
+-};
+-
+-
+-const qp_table qp_table_420_8bpc_min = {
+- { 4, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 9, 13} },
+- { 4.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
+- { 5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
+- { 5.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
+- { 6, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
+- { 6.5, { 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 4, 5, 5, 7, 10} },
+- { 7, { 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
+- { 7.5, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 4, 4, 5, 7, 8} },
+- { 8, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
+- { 8.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
+- { 9, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6} },
+- { 9.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
+- { 10, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5} },
+- {10.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 5} },
+- { 11, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4} },
+- {11.5, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4} },
+- { 12, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3} }
+-};
+-
+-
+-const qp_table qp_table_422_8bpc_min = {
+- { 6, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
+- { 6.5, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
+- { 7, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
+- { 7.5, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
+- { 8, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 10} },
+- { 8.5, { 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 4, 5, 5, 7, 10} },
+- { 9, { 0, 0, 0, 1, 2, 2, 2, 2, 2, 3, 4, 5, 5, 7, 9} },
+- { 9.5, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 9} },
+- { 10, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
+- {10.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 7, 8} },
+- { 11, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
+- {11.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7} },
+- { 12, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6} },
+- {12.5, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6} },
+- { 13, { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5} },
+- {13.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5} },
+- { 14, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4} },
+- {14.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 4} },
+- { 15, { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 4} },
+- {15.5, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3} },
+- { 16, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3} }
+-};
+-
+-
+-const qp_table qp_table_422_10bpc_max = {
+- { 6, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
+- { 6.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
+- { 7, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
+- { 7.5, { 5, 6, 8, 10, 11, 11, 11, 12, 12, 13, 13, 14, 14, 15, 16} },
+- { 8, { 4, 6, 7, 9, 10, 11, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
+- { 8.5, { 4, 5, 6, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 15} },
+- { 9, { 3, 4, 5, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14} },
+- { 9.5, { 3, 4, 4, 6, 6, 8, 8, 9, 10, 10, 11, 11, 12, 13, 14} },
+- { 10, { 2, 3, 3, 5, 5, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
+- {10.5, { 2, 3, 3, 5, 5, 6, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
+- { 11, { 2, 3, 3, 5, 5, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
+- {11.5, { 2, 3, 3, 5, 5, 5, 7, 7, 8, 9, 9, 10, 10, 11, 12} },
+- { 12, { 2, 3, 3, 5, 5, 5, 7, 7, 8, 8, 9, 9, 10, 10, 11} },
+- {12.5, { 2, 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
+- { 13, { 1, 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 8, 9, 9, 10} },
+- {13.5, { 1, 2, 3, 3, 4, 5, 6, 6, 7, 7, 8, 8, 8, 9, 10} },
+- { 14, { 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9} },
+- {14.5, { 1, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 7, 7, 8, 9} },
+- { 15, { 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7, 7, 7, 9} },
+- {15.5, { 1, 1, 2, 2, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 8} },
+- { 16, { 1, 1, 2, 2, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 8} },
+- {16.5, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7} },
+- { 17, { 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 7} },
+- {17.5, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6} },
+- { 18, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 6} },
+- {18.5, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 6} },
+- { 19, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 5} },
+- {19.5, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 5} },
+- { 20, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 4} }
+-};
+-
+-
+-const qp_table qp_table_420_10bpc_max = {
+- { 4, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 17, 18} },
+- { 4.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
+- { 5, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 16, 17} },
+- { 5.5, { 6, 7, 8, 9, 10, 10, 11, 12, 12, 13, 13, 14, 14, 15, 16} },
+- { 6, { 4, 6, 8, 9, 10, 10, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
+- { 6.5, { 4, 5, 7, 8, 8, 9, 10, 11, 11, 12, 12, 13, 13, 14, 15} },
+- { 7, { 3, 4, 6, 7, 7, 8, 9, 10, 10, 11, 12, 12, 13, 13, 14} },
+- { 7.5, { 3, 4, 5, 6, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 13} },
+- { 8, { 2, 3, 4, 5, 5, 6, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
+- { 8.5, { 1, 3, 3, 4, 4, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
+- { 9, { 1, 3, 3, 4, 4, 6, 7, 8, 8, 9, 9, 10, 10, 10, 11} },
+- { 9.5, { 1, 3, 3, 3, 4, 5, 6, 7, 8, 8, 9, 9, 9, 10, 11} },
+- { 10, { 1, 2, 3, 3, 4, 4, 5, 6, 7, 8, 8, 9, 9, 9, 11} },
+- {10.5, { 1, 1, 3, 3, 3, 4, 5, 5, 7, 7, 8, 8, 8, 8, 10} },
+- { 11, { 0, 1, 2, 3, 3, 3, 4, 5, 6, 7, 7, 7, 8, 8, 9} },
+- {11.5, { 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 7, 7, 7, 9} },
+- { 12, { 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 8} },
+- {12.5, { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 7} },
+- { 13, { 0, 0, 0, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7} },
+- {13.5, { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 6} },
+- { 14, { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
+- {14.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5} },
+- { 15, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 5} }
+-};
+-
+-
+-const qp_table qp_table_420_10bpc_min = {
+- { 4, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 13, 17} },
+- { 4.5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
+- { 5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
+- { 5.5, { 0, 3, 3, 4, 6, 7, 7, 7, 7, 7, 9, 9, 9, 11, 15} },
+- { 6, { 0, 2, 3, 4, 6, 7, 7, 7, 7, 7, 9, 9, 9, 11, 14} },
+- { 6.5, { 0, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 9, 9, 11, 14} },
+- { 7, { 0, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 11, 13} },
+- { 7.5, { 0, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8, 8, 9, 11, 12} },
+- { 8, { 0, 2, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 9, 11, 12} },
+- { 8.5, { 0, 2, 2, 3, 3, 5, 5, 6, 6, 7, 8, 8, 9, 10, 11} },
+- { 9, { 0, 2, 2, 3, 3, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10} },
+- { 9.5, { 0, 2, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10} },
+- { 10, { 0, 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 8, 8, 8, 10} },
+- {10.5, { 0, 0, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 7, 9} },
+- { 11, { 0, 0, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8} },
+- {11.5, { 0, 0, 0, 1, 2, 2, 2, 3, 4, 4, 5, 6, 6, 6, 8} },
+- { 12, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 5, 7} },
+- {12.5, { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 6} },
+- { 13, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
+- {13.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 5} },
+- { 14, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 5} },
+- {14.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 4} },
+- { 15, { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 4} }
+-};
+-
+-
+-const qp_table qp_table_444_10bpc_max = {
+- { 6, { 8, 10, 12, 12, 13, 13, 13, 14, 15, 16, 16, 16, 16, 17, 19} },
+- { 6.5, { 8, 10, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 16, 17, 19} },
+- { 7, { 8, 9, 11, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 17, 18} },
+- { 7.5, { 8, 9, 10, 11, 11, 12, 12, 13, 14, 14, 15, 15, 16, 17, 18} },
+- { 8, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
+- { 8.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
+- { 9, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 17} },
+- { 9.5, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 17} },
+- { 10, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
+- {10.5, { 6, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 14, 15, 16} },
+- { 11, { 5, 7, 9, 10, 11, 11, 11, 12, 12, 13, 13, 13, 14, 14, 15} },
+- {11.5, { 5, 7, 9, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
+- { 12, { 4, 6, 8, 9, 10, 10, 11, 12, 12, 13, 13, 13, 13, 14, 15} },
+- {12.5, { 4, 6, 8, 9, 10, 10, 10, 11, 11, 12, 12, 13, 13, 14, 15} },
+- { 13, { 3, 6, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 14} },
+- {13.5, { 3, 5, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14} },
+- { 14, { 3, 5, 6, 7, 8, 8, 8, 9, 10, 10, 11, 12, 12, 12, 14} },
+- {14.5, { 2, 4, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13} },
+- { 15, { 2, 4, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
+- {15.5, { 2, 4, 5, 6, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13} },
+- { 16, { 2, 4, 5, 6, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 12} },
+- {16.5, { 2, 3, 4, 5, 6, 7, 7, 8, 9, 9, 9, 10, 10, 11, 12} },
+- { 17, { 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 8, 9, 9, 10, 12} },
+- {17.5, { 1, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 9, 9, 10, 12} },
+- { 18, { 1, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 9, 9, 10, 11} },
+- {18.5, { 1, 3, 4, 5, 5, 6, 6, 7, 8, 8, 8, 9, 9, 10, 11} },
+- { 19, { 1, 2, 3, 4, 5, 5, 5, 6, 7, 7, 7, 8, 8, 9, 10} },
+- {19.5, { 1, 2, 3, 4, 5, 5, 5, 6, 7, 7, 7, 8, 8, 9, 10} },
+- { 20, { 1, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7, 7, 8, 10} },
+- {20.5, { 1, 2, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 7, 8, 10} },
+- { 21, { 1, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7, 7, 8, 10} },
+- {21.5, { 1, 2, 3, 3, 3, 3, 4, 5, 5, 5, 6, 7, 7, 8, 9} },
+- { 22, { 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 9} },
+- {22.5, { 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8} },
+- { 23, { 1, 1, 2, 3, 3, 3, 4, 4, 4, 4, 6, 6, 6, 6, 8} },
+- {23.5, { 1, 1, 1, 3, 3, 3, 4, 4, 4, 4, 6, 6, 6, 6, 8} },
+- { 24, { 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 5, 6, 8} },
+- {24.5, { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 8} },
+- { 25, { 0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 7} },
+- {25.5, { 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 7} },
+- { 26, { 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 6} },
+- {26.5, { 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 6} },
+- { 27, { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 6} },
+- {27.5, { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 6} },
+- { 28, { 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 5} },
+- {28.5, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 5} },
+- { 29, { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4} },
+- {29.5, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4} },
+- { 30, { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 4} }
+-};
+-
+-
+-const qp_table qp_table_422_8bpc_max = {
+- { 6, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
+- { 6.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
+- { 7, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
+- { 7.5, { 3, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12} },
+- { 8, { 2, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 10, 11} },
+- { 8.5, { 2, 3, 4, 5, 6, 6, 6, 7, 7, 8, 8, 9, 9, 10, 11} },
+- { 9, { 1, 2, 3, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10} },
+- { 9.5, { 1, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 9, 10} },
+- { 10, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
+- {10.5, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9} },
+- { 11, { 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8} },
+- {11.5, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8} },
+- { 12, { 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7} },
+- {12.5, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7} },
+- { 13, { 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6} },
+- {13.5, { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 6} },
+- { 14, { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5} },
+- {14.5, { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 5} },
+- { 15, { 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 5} },
+- {15.5, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4} },
+- { 16, { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4} }
+-};
+-
+diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
+index 7b294f637881a..b19d3aeb5962c 100644
+--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
++++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
+@@ -23,266 +23,7 @@
+ * Authors: AMD
+ *
+ */
+-#include <drm/drm_dsc.h>
+-
+-#include "os_types.h"
+ #include "rc_calc.h"
+-#include "qp_tables.h"
+-
+-#define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
+-
+-#define MODE_SELECT(val444, val422, val420) \
+- (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
+-
+-
+-#define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \
+- table = qp_table_##mode##_##bpc##bpc_##max; \
+- table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
+- break
+-
+-
+-static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
+- enum max_min max_min, float bpp)
+-{
+- int mode = MODE_SELECT(444, 422, 420);
+- int sel = table_hash(mode, bpc, max_min);
+- int table_size = 0;
+- int index;
+- const struct qp_entry *table = 0L;
+-
+- // alias enum
+- enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
+- switch (sel) {
+- TABLE_CASE(444, 8, max);
+- TABLE_CASE(444, 8, min);
+- TABLE_CASE(444, 10, max);
+- TABLE_CASE(444, 10, min);
+- TABLE_CASE(444, 12, max);
+- TABLE_CASE(444, 12, min);
+- TABLE_CASE(422, 8, max);
+- TABLE_CASE(422, 8, min);
+- TABLE_CASE(422, 10, max);
+- TABLE_CASE(422, 10, min);
+- TABLE_CASE(422, 12, max);
+- TABLE_CASE(422, 12, min);
+- TABLE_CASE(420, 8, max);
+- TABLE_CASE(420, 8, min);
+- TABLE_CASE(420, 10, max);
+- TABLE_CASE(420, 10, min);
+- TABLE_CASE(420, 12, max);
+- TABLE_CASE(420, 12, min);
+- }
+-
+- if (table == 0)
+- return;
+-
+- index = (bpp - table[0].bpp) * 2;
+-
+- /* requested size is bigger than the table */
+- if (index >= table_size) {
+- dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
+- return;
+- }
+-
+- memcpy(qps, table[index].qps, sizeof(qp_set));
+-}
+-
+-static double dsc_roundf(double num)
+-{
+- if (num < 0.0)
+- num = num - 0.5;
+- else
+- num = num + 0.5;
+-
+- return (int)(num);
+-}
+-
+-static double dsc_ceil(double num)
+-{
+- double retval = (int)num;
+-
+- if (retval != num && num > 0)
+- retval = num + 1;
+-
+- return (int)retval;
+-}
+-
+-static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
+-{
+- int *p = ofs;
+-
+- if (mode == CM_444 || mode == CM_RGB) {
+- *p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
+- *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
+- *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
+- *p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
+- *p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
+- *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
+- *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
+- *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
+- *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
+- *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
+- *p++ = -10;
+- *p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0))));
+- *p++ = -12;
+- *p++ = -12;
+- *p++ = -12;
+- } else if (mode == CM_422) {
+- *p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0))));
+- *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0))));
+- *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0))));
+- *p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0))));
+- *p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0))));
+- *p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0))));
+- *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0))));
+- *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0))));
+- *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0))));
+- *p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0))));
+- *p++ = -10;
+- *p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1))));
+- *p++ = -12;
+- *p++ = -12;
+- *p++ = -12;
+- } else {
+- *p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0))));
+- *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0))));
+- *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0))));
+- *p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0))));
+- *p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0))));
+- *p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0))));
+- *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0))));
+- *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0))));
+- *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0))));
+- *p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0))));
+- *p++ = -10;
+- *p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0))));
+- *p++ = -12;
+- *p++ = -12;
+- *p++ = -12;
+- }
+-}
+-
+-static int median3(int a, int b, int c)
+-{
+- if (a > b)
+- swap(a, b);
+- if (b > c)
+- swap(b, c);
+- if (a > b)
+- swap(b, c);
+-
+- return b;
+-}
+-
+-static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
+- enum bits_per_comp bpc, u16 drm_bpp,
+- bool is_navite_422_or_420,
+- int slice_width, int slice_height,
+- int minor_version)
+-{
+- float bpp;
+- float bpp_group;
+- float initial_xmit_delay_factor;
+- int padding_pixels;
+- int i;
+-
+- bpp = ((float)drm_bpp / 16.0);
+- /* in native_422 or native_420 modes, the bits_per_pixel is double the
+- * target bpp (the latter is what calc_rc_params expects)
+- */
+- if (is_navite_422_or_420)
+- bpp /= 2.0;
+-
+- rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
+- rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
+-
+- bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
+-
+- switch (cm) {
+- case CM_420:
+- rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584)))));
+- rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
+- rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
+- break;
+- case CM_422:
+- rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
+- rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
+- rc->second_line_bpg_offset = 0;
+- break;
+- case CM_444:
+- case CM_RGB:
+- rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
+- rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
+- rc->second_line_bpg_offset = 0;
+- break;
+- }
+-
+- initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
+- rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
+-
+- if (cm == CM_422 || cm == CM_420)
+- slice_width /= 2;
+-
+- padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
+- if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
+- if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
+- rc->initial_xmit_delay++;
+- }
+-
+- rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
+- rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
+- rc->flatness_det_thresh = 2 << (bpc - 8);
+-
+- get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp);
+- get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp);
+- if (cm == CM_444 && minor_version == 1) {
+- for (i = 0; i < QP_SET_SIZE; ++i) {
+- rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
+- rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
+- }
+- }
+- get_ofs_set(rc->ofs, cm, bpp);
+-
+- /* fixed parameters */
+- rc->rc_model_size = 8192;
+- rc->rc_edge_factor = 6;
+- rc->rc_tgt_offset_hi = 3;
+- rc->rc_tgt_offset_lo = 3;
+-
+- rc->rc_buf_thresh[0] = 896;
+- rc->rc_buf_thresh[1] = 1792;
+- rc->rc_buf_thresh[2] = 2688;
+- rc->rc_buf_thresh[3] = 3584;
+- rc->rc_buf_thresh[4] = 4480;
+- rc->rc_buf_thresh[5] = 5376;
+- rc->rc_buf_thresh[6] = 6272;
+- rc->rc_buf_thresh[7] = 6720;
+- rc->rc_buf_thresh[8] = 7168;
+- rc->rc_buf_thresh[9] = 7616;
+- rc->rc_buf_thresh[10] = 7744;
+- rc->rc_buf_thresh[11] = 7872;
+- rc->rc_buf_thresh[12] = 8000;
+- rc->rc_buf_thresh[13] = 8064;
+-}
+-
+-static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp,
+- bool is_navite_422_or_420)
+-{
+- float bpp;
+- u32 bytes_per_pixel;
+- double d_bytes_per_pixel;
+-
+- bpp = ((float)drm_bpp / 16.0);
+- d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
+- // TODO: Make sure the formula for calculating this is precise (ceiling
+- // vs. floor, and at what point they should be applied)
+- if (is_navite_422_or_420)
+- d_bytes_per_pixel /= 2;
+-
+- bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
+-
+- return bytes_per_pixel;
+-}
+
+ /**
+ * calc_rc_params - reads the user's cmdline mode
+diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
+index 262f06afcbf95..c2340e001b578 100644
+--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
++++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
+@@ -27,55 +27,7 @@
+ #ifndef __RC_CALC_H__
+ #define __RC_CALC_H__
+
+-
+-#define QP_SET_SIZE 15
+-
+-typedef int qp_set[QP_SET_SIZE];
+-
+-struct rc_params {
+- int rc_quant_incr_limit0;
+- int rc_quant_incr_limit1;
+- int initial_fullness_offset;
+- int initial_xmit_delay;
+- int first_line_bpg_offset;
+- int second_line_bpg_offset;
+- int flatness_min_qp;
+- int flatness_max_qp;
+- int flatness_det_thresh;
+- qp_set qp_min;
+- qp_set qp_max;
+- qp_set ofs;
+- int rc_model_size;
+- int rc_edge_factor;
+- int rc_tgt_offset_hi;
+- int rc_tgt_offset_lo;
+- int rc_buf_thresh[QP_SET_SIZE - 1];
+-};
+-
+-enum colour_mode {
+- CM_RGB, /* 444 RGB */
+- CM_444, /* 444 YUV or simple 422 */
+- CM_422, /* native 422 */
+- CM_420 /* native 420 */
+-};
+-
+-enum bits_per_comp {
+- BPC_8 = 8,
+- BPC_10 = 10,
+- BPC_12 = 12
+-};
+-
+-enum max_min {
+- DAL_MM_MIN = 0,
+- DAL_MM_MAX = 1
+-};
+-
+-struct qp_entry {
+- float bpp;
+- const qp_set qps;
+-};
+-
+-typedef struct qp_entry qp_table[];
++#include "dml/dsc/rc_calc_fpu.h"
+
+ void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps);
+ u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps);
+diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
+index ef830aded5b1c..1e19dd674e5a2 100644
+--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
++++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
+@@ -22,7 +22,6 @@
+ * Authors: AMD
+ *
+ */
+-#include "os_types.h"
+ #include <drm/drm_dsc.h>
+ #include "dscc_types.h"
+ #include "rc_calc.h"
+diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+index 713f5558f5e17..9195dec294c2d 100644
+--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
++++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+@@ -154,6 +154,8 @@ struct hubbub_funcs {
+ bool (*is_allow_self_refresh_enabled)(struct hubbub *hubbub);
+ void (*allow_self_refresh_control)(struct hubbub *hubbub, bool allow);
+
++ bool (*verify_allow_pstate_change_high)(struct hubbub *hubbub);
++
+ void (*apply_DEDCN21_147_wa)(struct hubbub *hubbub);
+
+ void (*force_wm_propagate_to_pipes)(struct hubbub *hubbub);
+diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+index ad5f2adcc40d5..c8427d738c87e 100644
+--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
++++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+@@ -32,11 +32,6 @@
+ #include "inc/hw/link_encoder.h"
+ #include "core_status.h"
+
+-enum vline_select {
+- VLINE0,
+- VLINE1
+-};
+-
+ struct pipe_ctx;
+ struct dc_state;
+ struct dc_stream_status;
+@@ -115,8 +110,7 @@ struct hw_sequencer_funcs {
+ int group_index, int group_size,
+ struct pipe_ctx *grouped_pipes[]);
+ void (*setup_periodic_interrupt)(struct dc *dc,
+- struct pipe_ctx *pipe_ctx,
+- enum vline_select vline);
++ struct pipe_ctx *pipe_ctx);
+ void (*set_drr)(struct pipe_ctx **pipe_ctx, int num_pipes,
+ struct dc_crtc_timing_adjust adjust);
+ void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx,
+diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+index ed54e1c819bed..a728087b3f3d6 100644
+--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
++++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+@@ -266,14 +266,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+ .funcs = &pflip_irq_info_funcs\
+ }
+
+-#define vupdate_int_entry(reg_num)\
+- [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
+- IRQ_REG_ENTRY(OTG, reg_num,\
+- OTG_GLOBAL_SYNC_STATUS, VUPDATE_INT_EN,\
+- OTG_GLOBAL_SYNC_STATUS, VUPDATE_EVENT_CLEAR),\
+- .funcs = &vblank_irq_info_funcs\
+- }
+-
+ /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
+ * of DCE's DC_IRQ_SOURCE_VUPDATEx.
+ */
+@@ -402,12 +394,6 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = {
+ dc_underflow_int_entry(6),
+ [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(),
+ [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(),
+- vupdate_int_entry(0),
+- vupdate_int_entry(1),
+- vupdate_int_entry(2),
+- vupdate_int_entry(3),
+- vupdate_int_entry(4),
+- vupdate_int_entry(5),
+ vupdate_no_lock_int_entry(0),
+ vupdate_no_lock_int_entry(1),
+ vupdate_no_lock_int_entry(2),
+diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+index caf961bb633f6..0fc4f90d9e3e9 100644
+--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
++++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+@@ -445,7 +445,7 @@ struct dmub_notification {
+ * of a firmware to know if feature or functionality is supported or present.
+ */
+ #define DMUB_FW_VERSION(major, minor, revision) \
+- ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | ((revision) & 0xFFFF))
++ ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | (((revision) & 0xFF) << 8))
+
+ /**
+ * dmub_srv_create() - creates the DMUB service.
+diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
+index 571fcf23cea92..a3a9ea077f505 100644
+--- a/drivers/gpu/drm/amd/display/include/logger_types.h
++++ b/drivers/gpu/drm/amd/display/include/logger_types.h
+@@ -72,6 +72,9 @@
+ #define DC_LOG_DSC(...) DRM_DEBUG_KMS(__VA_ARGS__)
+ #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__)
+ #define DC_LOG_DWB(...) DRM_DEBUG_KMS(__VA_ARGS__)
++#if defined(CONFIG_DRM_AMD_DC_DCN)
++#define DC_LOG_DP2(...) DRM_DEBUG_KMS(__VA_ARGS__)
++#endif
+
+ struct dal_logger;
+
+diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+index ef742d95ef057..c707c9bfed433 100644
+--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
++++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+@@ -1597,6 +1597,7 @@ static void interpolate_user_regamma(uint32_t hw_points_num,
+ struct fixed31_32 lut2;
+ struct fixed31_32 delta_lut;
+ struct fixed31_32 delta_index;
++ const struct fixed31_32 one = dc_fixpt_from_int(1);
+
+ i = 0;
+ /* fixed_pt library has problems handling too small values */
+@@ -1625,6 +1626,9 @@ static void interpolate_user_regamma(uint32_t hw_points_num,
+ } else
+ hw_x = coordinates_x[i].x;
+
++ if (dc_fixpt_le(one, hw_x))
++ hw_x = one;
++
+ norm_x = dc_fixpt_mul(norm_factor, hw_x);
+ index = dc_fixpt_floor(norm_x);
+ if (index < 0 || index > 255)
+diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+index b99aa232bd8b1..6230861e78d10 100644
+--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
++++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+@@ -327,7 +327,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
+ * - Delta for CEIL: delta_from_mid_point_in_us_1
+ * - Delta for FLOOR: delta_from_mid_point_in_us_2
+ */
+- if ((last_render_time_in_us / mid_point_frames_ceil) < in_out_vrr->min_duration_in_us) {
++ if (mid_point_frames_ceil &&
++ (last_render_time_in_us / mid_point_frames_ceil) <
++ in_out_vrr->min_duration_in_us) {
+ /* Check for out of range.
+ * If using CEIL produces a value that is out of range,
+ * then we are forced to use FLOOR.
+@@ -374,8 +376,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
+ /* Either we've calculated the number of frames to insert,
+ * or we need to insert min duration frames
+ */
+- if (last_render_time_in_us / frames_to_insert <
+- in_out_vrr->min_duration_in_us){
++ if (frames_to_insert &&
++ (last_render_time_in_us / frames_to_insert) <
++ in_out_vrr->min_duration_in_us){
+ frames_to_insert -= (frames_to_insert > 1) ?
+ 1 : 0;
+ }
+@@ -567,10 +570,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr,
+ * Note: We should never go above the field rate of the mode timing set.
+ */
+ infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
+-
+- /* FreeSync HDR */
+- infopacket->sb[9] = 0;
+- infopacket->sb[10] = 0;
+ }
+
+ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
+@@ -638,10 +637,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
+
+ /* PB16 : Reserved bits 7:1, FixedRate bit 0 */
+ infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0;
+-
+- //FreeSync HDR
+- infopacket->sb[9] = 0;
+- infopacket->sb[10] = 0;
+ }
+
+ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
+@@ -726,8 +721,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal,
+ /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */
+ infopacket->hb2 = 0x09;
+
+- *payload_size = 0x0A;
+-
++ *payload_size = 0x09;
+ } else if (dc_is_dp_signal(signal)) {
+
+ /* HEADER */
+@@ -776,9 +770,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal,
+ infopacket->hb1 = version;
+
+ /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */
+- *payload_size = 0x10;
+- infopacket->hb2 = *payload_size - 1; //-1 for checksum
++ infopacket->hb2 = 0x10;
+
++ *payload_size = 0x10;
+ } else if (dc_is_dp_signal(signal)) {
+
+ /* HEADER */
+diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+index 57f198de5e2cb..4e075b01d48bb 100644
+--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
++++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+@@ -100,7 +100,8 @@ enum vsc_packet_revision {
+ //PB7 = MD0
+ #define MASK_VTEM_MD0__VRR_EN 0x01
+ #define MASK_VTEM_MD0__M_CONST 0x02
+-#define MASK_VTEM_MD0__RESERVED2 0x0C
++#define MASK_VTEM_MD0__QMS_EN 0x04
++#define MASK_VTEM_MD0__RESERVED2 0x08
+ #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0
+
+ //MD1
+@@ -109,7 +110,7 @@ enum vsc_packet_revision {
+ //MD2
+ #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03
+ #define MASK_VTEM_MD2__RB 0x04
+-#define MASK_VTEM_MD2__RESERVED3 0xF8
++#define MASK_VTEM_MD2__NEXT_TFR 0xF8
+
+ //MD3
+ #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF
+diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
+index 257f280d3d53f..bd077ea224a40 100644
+--- a/drivers/gpu/drm/amd/include/amd_shared.h
++++ b/drivers/gpu/drm/amd/include/amd_shared.h
+@@ -98,7 +98,8 @@ enum amd_ip_block_type {
+ AMD_IP_BLOCK_TYPE_ACP,
+ AMD_IP_BLOCK_TYPE_VCN,
+ AMD_IP_BLOCK_TYPE_MES,
+- AMD_IP_BLOCK_TYPE_JPEG
++ AMD_IP_BLOCK_TYPE_JPEG,
++ AMD_IP_BLOCK_TYPE_NUM,
+ };
+
+ enum amd_clockgating_state {
+diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h
+index 7ec4331e67f26..a486769b66c6a 100644
+--- a/drivers/gpu/drm/amd/include/discovery.h
++++ b/drivers/gpu/drm/amd/include/discovery.h
+@@ -143,6 +143,55 @@ struct gc_info_v1_0 {
+ uint32_t gc_num_gl2a;
+ };
+
++struct gc_info_v1_1 {
++ struct gpu_info_header header;
++
++ uint32_t gc_num_se;
++ uint32_t gc_num_wgp0_per_sa;
++ uint32_t gc_num_wgp1_per_sa;
++ uint32_t gc_num_rb_per_se;
++ uint32_t gc_num_gl2c;
++ uint32_t gc_num_gprs;
++ uint32_t gc_num_max_gs_thds;
++ uint32_t gc_gs_table_depth;
++ uint32_t gc_gsprim_buff_depth;
++ uint32_t gc_parameter_cache_depth;
++ uint32_t gc_double_offchip_lds_buffer;
++ uint32_t gc_wave_size;
++ uint32_t gc_max_waves_per_simd;
++ uint32_t gc_max_scratch_slots_per_cu;
++ uint32_t gc_lds_size;
++ uint32_t gc_num_sc_per_se;
++ uint32_t gc_num_sa_per_se;
++ uint32_t gc_num_packer_per_sc;
++ uint32_t gc_num_gl2a;
++ uint32_t gc_num_tcp_per_sa;
++ uint32_t gc_num_sdp_interface;
++ uint32_t gc_num_tcps;
++};
++
++struct gc_info_v2_0 {
++ struct gpu_info_header header;
++
++ uint32_t gc_num_se;
++ uint32_t gc_num_cu_per_sh;
++ uint32_t gc_num_sh_per_se;
++ uint32_t gc_num_rb_per_se;
++ uint32_t gc_num_tccs;
++ uint32_t gc_num_gprs;
++ uint32_t gc_num_max_gs_thds;
++ uint32_t gc_gs_table_depth;
++ uint32_t gc_gsprim_buff_depth;
++ uint32_t gc_parameter_cache_depth;
++ uint32_t gc_double_offchip_lds_buffer;
++ uint32_t gc_wave_size;
++ uint32_t gc_max_waves_per_simd;
++ uint32_t gc_max_scratch_slots_per_cu;
++ uint32_t gc_lds_size;
++ uint32_t gc_num_sc_per_se;
++ uint32_t gc_num_packer_per_sc;
++};
++
+ typedef struct harvest_info_header {
+ uint32_t signature; /* Table Signature */
+ uint32_t version; /* Table Version */
+diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+index bac15c466733d..6e27c8b16391f 100644
+--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
++++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+@@ -341,7 +341,8 @@ struct amd_pm_funcs {
+ int (*get_power_profile_mode)(void *handle, char *buf);
+ int (*set_power_profile_mode)(void *handle, long *input, uint32_t size);
+ int (*set_fine_grain_clk_vol)(void *handle, uint32_t type, long *input, uint32_t size);
+- int (*odn_edit_dpm_table)(void *handle, uint32_t type, long *input, uint32_t size);
++ int (*odn_edit_dpm_table)(void *handle, enum PP_OD_DPM_TABLE_COMMAND type,
++ long *input, uint32_t size);
+ int (*set_mp1_state)(void *handle, enum pp_mp1_state mp1_state);
+ int (*smu_i2c_bus_access)(void *handle, bool acquire);
+ int (*gfx_state_change_set)(void *handle, uint32_t state);
+diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+index 03581d5b18360..a68496b3f9296 100644
+--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
++++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+@@ -927,6 +927,13 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
+ {
+ int ret = 0;
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
++ enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON;
++
++ if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state) {
++ dev_dbg(adev->dev, "IP block%d already in the target %s state!",
++ block_type, gate ? "gate" : "ungate");
++ return 0;
++ }
+
+ switch (block_type) {
+ case AMD_IP_BLOCK_TYPE_UVD:
+@@ -979,6 +986,9 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
+ break;
+ }
+
++ if (!ret)
++ atomic_set(&adev->pm.pwr_state[block_type], pwr_state);
++
+ return ret;
+ }
+
+@@ -1035,6 +1045,17 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
+
+ if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
+ return false;
++ /* Don't use baco for reset in S3.
++ * This is a workaround for some platforms
++ * where entering BACO during suspend
++ * seems to cause reboots or hangs.
++ * This might be related to the fact that BACO controls
++ * power to the whole GPU including devices like audio and USB.
++ * Powering down/up everything may adversely affect these other
++ * devices. Needs more investigation.
++ */
++ if (adev->in_s3)
++ return false;
+
+ if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap))
+ return false;
+diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+index 249cb0aeb5ae4..73794c1c12082 100644
+--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+@@ -2117,6 +2117,12 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
+ }
+ }
+
++ /* setting should not be allowed from VF if not in one VF mode */
++ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) {
++ dev_attr->attr.mode &= ~S_IWUGO;
++ dev_attr->store = NULL;
++ }
++
+ #undef DEVICE_ATTR_IS
+
+ return 0;
+@@ -2128,15 +2134,19 @@ static int amdgpu_device_attr_create(struct amdgpu_device *adev,
+ uint32_t mask, struct list_head *attr_list)
+ {
+ int ret = 0;
+- struct device_attribute *dev_attr = &attr->dev_attr;
+- const char *name = dev_attr->attr.name;
+ enum amdgpu_device_attr_states attr_states = ATTR_STATE_SUPPORTED;
+ struct amdgpu_device_attr_entry *attr_entry;
++ struct device_attribute *dev_attr;
++ const char *name;
+
+ int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
+ uint32_t mask, enum amdgpu_device_attr_states *states) = default_attr_update;
+
+- BUG_ON(!attr);
++ if (!attr)
++ return -EINVAL;
++
++ dev_attr = &attr->dev_attr;
++ name = dev_attr->attr.name;
+
+ attr_update = attr->attr_update ? attr->attr_update : default_attr_update;
+
+@@ -3439,8 +3449,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
+ attr == &sensor_dev_attr_power2_cap_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_power2_cap.dev_attr.attr ||
+ attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr ||
+- attr == &sensor_dev_attr_power2_label.dev_attr.attr ||
+- attr == &sensor_dev_attr_power1_label.dev_attr.attr))
++ attr == &sensor_dev_attr_power2_label.dev_attr.attr))
+ return 0;
+
+ return effective_mode;
+diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+index 98f1b3d8c1d59..16e3f72d31b9f 100644
+--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
++++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+@@ -417,6 +417,12 @@ struct amdgpu_dpm {
+ enum amd_dpm_forced_level forced_level;
+ };
+
++enum ip_power_state {
++ POWER_STATE_UNKNOWN,
++ POWER_STATE_ON,
++ POWER_STATE_OFF,
++};
++
+ struct amdgpu_pm {
+ struct mutex mutex;
+ u32 current_sclk;
+@@ -452,6 +458,8 @@ struct amdgpu_pm {
+ struct i2c_adapter smu_i2c;
+ struct mutex smu_i2c_mutex;
+ struct list_head pm_attr_list;
++
++ atomic_t pwr_state[AMD_IP_BLOCK_TYPE_NUM];
+ };
+
+ #define R600_SSTU_DFLT 0
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+index 321215003643b..0f5930e797bd5 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+@@ -924,7 +924,8 @@ static int pp_set_fine_grain_clk_vol(void *handle, uint32_t type, long *input, u
+ return hwmgr->hwmgr_func->set_fine_grain_clk_vol(hwmgr, type, input, size);
+ }
+
+-static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint32_t size)
++static int pp_odn_edit_dpm_table(void *handle, enum PP_OD_DPM_TABLE_COMMAND type,
++ long *input, uint32_t size)
+ {
+ struct pp_hwmgr *hwmgr = handle;
+
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
+index 67d7da0b6fed5..1d829402cd2e2 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
+@@ -75,8 +75,10 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr)
+ for (i = 0; i < table_entries; i++) {
+ result = hwmgr->hwmgr_func->get_pp_table_entry(hwmgr, i, state);
+ if (result) {
++ kfree(hwmgr->current_ps);
+ kfree(hwmgr->request_ps);
+ kfree(hwmgr->ps);
++ hwmgr->current_ps = NULL;
+ hwmgr->request_ps = NULL;
+ hwmgr->ps = NULL;
+ return -EINVAL;
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+index 1de3ae77e03ed..cf74621f94a75 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+@@ -773,13 +773,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetHardMinFclkByFreq,
+ hwmgr->display_config->num_display > 3 ?
+- data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk :
++ (data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) :
+ min_mclk,
+ NULL);
+
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetHardMinSocclkByFreq,
+- data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk,
++ data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100,
+ NULL);
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetHardMinVcn,
+@@ -792,11 +792,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
+ NULL);
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMaxFclkByFreq,
+- data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk,
++ data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100,
+ NULL);
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMaxSocclkByFreq,
+- data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk,
++ data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100,
+ NULL);
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMaxVcn,
+@@ -1036,13 +1036,13 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ else
+ i = 1;
+
+- size += sysfs_emit_at(buf, size, "0: %uMhz %s\n",
++ size += sprintf(buf + size, "0: %uMhz %s\n",
+ data->gfx_min_freq_limit/100,
+ i == 0 ? "*" : "");
+- size += sysfs_emit_at(buf, size, "1: %uMhz %s\n",
++ size += sprintf(buf + size, "1: %uMhz %s\n",
+ i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK,
+ i == 1 ? "*" : "");
+- size += sysfs_emit_at(buf, size, "2: %uMhz %s\n",
++ size += sprintf(buf + size, "2: %uMhz %s\n",
+ data->gfx_max_freq_limit/100,
+ i == 2 ? "*" : "");
+ break;
+@@ -1050,7 +1050,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency, &now);
+
+ for (i = 0; i < mclk_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i,
+ mclk_table->entries[i].clk / 100,
+ ((mclk_table->entries[i].clk / 100)
+@@ -1065,10 +1065,10 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ if (ret)
+ return ret;
+
+- size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
+- size += sysfs_emit_at(buf, size, "0: %10uMhz\n",
++ size += sprintf(buf + size, "%s:\n", "OD_SCLK");
++ size += sprintf(buf + size, "0: %10uMhz\n",
+ (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq);
+- size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
++ size += sprintf(buf + size, "1: %10uMhz\n",
+ (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq);
+ }
+ break;
+@@ -1081,8 +1081,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ if (ret)
+ return ret;
+
+- size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
+- size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
++ size += sprintf(buf + size, "%s:\n", "OD_RANGE");
++ size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
+ min_freq, max_freq);
+ }
+ break;
+@@ -1456,6 +1456,8 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
+ if (!buf)
+ return -EINVAL;
+
++ phm_get_sysfs_buf(&buf, &size);
++
+ size += sysfs_emit_at(buf, size, "%s %16s %s %s %s %s\n",title[0],
+ title[1], title[2], title[3], title[4], title[5]);
+
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+index e7803ce8f67aa..611969bf45207 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+@@ -4926,7 +4926,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
+ now = i;
+
+ for (i = 0; i < sclk_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, sclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+@@ -4941,7 +4941,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
+ now = i;
+
+ for (i = 0; i < mclk_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, mclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+@@ -4955,7 +4955,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
+ now = i;
+
+ for (i = 0; i < pcie_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %s %s\n", i,
++ size += sprintf(buf + size, "%d: %s %s\n", i,
+ (pcie_table->dpm_levels[i].value == 0) ? "2.5GT/s, x8" :
+ (pcie_table->dpm_levels[i].value == 1) ? "5.0GT/s, x16" :
+ (pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "",
+@@ -4963,32 +4963,32 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
+ break;
+ case OD_SCLK:
+ if (hwmgr->od_enabled) {
+- size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
++ size += sprintf(buf + size, "%s:\n", "OD_SCLK");
+ for (i = 0; i < odn_sclk_table->num_of_pl; i++)
+- size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n",
++ size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
+ i, odn_sclk_table->entries[i].clock/100,
+ odn_sclk_table->entries[i].vddc);
+ }
+ break;
+ case OD_MCLK:
+ if (hwmgr->od_enabled) {
+- size = sysfs_emit(buf, "%s:\n", "OD_MCLK");
++ size += sprintf(buf + size, "%s:\n", "OD_MCLK");
+ for (i = 0; i < odn_mclk_table->num_of_pl; i++)
+- size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n",
++ size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
+ i, odn_mclk_table->entries[i].clock/100,
+ odn_mclk_table->entries[i].vddc);
+ }
+ break;
+ case OD_RANGE:
+ if (hwmgr->od_enabled) {
+- size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
+- size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
++ size += sprintf(buf + size, "%s:\n", "OD_RANGE");
++ size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.sclk_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+- size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n",
++ size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.mclk_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+- size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n",
++ size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
+ data->odn_dpm_table.min_vddc,
+ data->odn_dpm_table.max_vddc);
+ }
+@@ -5518,6 +5518,8 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
+ if (!buf)
+ return -EINVAL;
+
++ phm_get_sysfs_buf(&buf, &size);
++
+ size += sysfs_emit_at(buf, size, "%s %16s %16s %16s %16s %16s %16s %16s\n",
+ title[0], title[1], title[2], title[3],
+ title[4], title[5], title[6], title[7]);
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
+index b94a77e4e7147..03bf8f0692228 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
+@@ -1559,7 +1559,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr,
+ CURR_SCLK_INDEX);
+
+ for (i = 0; i < sclk_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, sclk_table->entries[i].clk / 100,
+ (i == now) ? "*" : "");
+ break;
+@@ -1571,7 +1571,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr,
+ CURR_MCLK_INDEX);
+
+ for (i = SMU8_NUM_NBPMEMORYCLOCK; i > 0; i--)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ SMU8_NUM_NBPMEMORYCLOCK-i, data->sys_info.nbp_memory_clock[i-1] / 100,
+ (SMU8_NUM_NBPMEMORYCLOCK-i == now) ? "*" : "");
+ break;
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h
+index ad33983a8064e..2a75da1e9f035 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h
+@@ -109,6 +109,19 @@ int phm_irq_process(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+
++/*
++ * Helper function to make sysfs_emit_at() happy. Align buf to
++ * the current page boundary and record the offset.
++ */
++static inline void phm_get_sysfs_buf(char **buf, int *offset)
++{
++ if (!*buf || !offset)
++ return;
++
++ *offset = offset_in_page(*buf);
++ *buf -= *offset;
++}
++
+ int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr);
+
+ void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size,
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+index c152a61ddd2c9..e6336654c5655 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+@@ -4548,6 +4548,8 @@ static int vega10_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf)
+ int ret = 0;
+ int size = 0;
+
++ phm_get_sysfs_buf(&buf, &size);
++
+ ret = vega10_get_enabled_smc_features(hwmgr, &features_enabled);
+ PP_ASSERT_WITH_CODE(!ret,
+ "[EnableAllSmuFeatures] Failed to get enabled smc features!",
+@@ -4650,7 +4652,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ else
+ count = sclk_table->count;
+ for (i = 0; i < count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, sclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+@@ -4661,7 +4663,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now);
+
+ for (i = 0; i < mclk_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, mclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+@@ -4672,7 +4674,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now);
+
+ for (i = 0; i < soc_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, soc_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+@@ -4684,7 +4686,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK, &now);
+
+ for (i = 0; i < dcef_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, dcef_table->dpm_levels[i].value / 100,
+ (dcef_table->dpm_levels[i].value / 100 == now) ?
+ "*" : "");
+@@ -4698,7 +4700,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
+ gen_speed = pptable->PcieGenSpeed[i];
+ lane_width = pptable->PcieLaneCount[i];
+
+- size += sysfs_emit_at(buf, size, "%d: %s %s %s\n", i,
++ size += sprintf(buf + size, "%d: %s %s %s\n", i,
+ (gen_speed == 0) ? "2.5GT/s," :
+ (gen_speed == 1) ? "5.0GT/s," :
+ (gen_speed == 2) ? "8.0GT/s," :
+@@ -4717,34 +4719,34 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
+
+ case OD_SCLK:
+ if (hwmgr->od_enabled) {
+- size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
++ size += sprintf(buf + size, "%s:\n", "OD_SCLK");
+ podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
+ for (i = 0; i < podn_vdd_dep->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n",
++ size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
+ i, podn_vdd_dep->entries[i].clk / 100,
+ podn_vdd_dep->entries[i].vddc);
+ }
+ break;
+ case OD_MCLK:
+ if (hwmgr->od_enabled) {
+- size = sysfs_emit(buf, "%s:\n", "OD_MCLK");
++ size += sprintf(buf + size, "%s:\n", "OD_MCLK");
+ podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
+ for (i = 0; i < podn_vdd_dep->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n",
++ size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
+ i, podn_vdd_dep->entries[i].clk/100,
+ podn_vdd_dep->entries[i].vddc);
+ }
+ break;
+ case OD_RANGE:
+ if (hwmgr->od_enabled) {
+- size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
+- size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
++ size += sprintf(buf + size, "%s:\n", "OD_RANGE");
++ size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.gfx_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+- size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n",
++ size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.mem_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+- size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n",
++ size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
+ data->odn_dpm_table.min_vddc,
+ data->odn_dpm_table.max_vddc);
+ }
+@@ -5112,6 +5114,8 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
+ if (!buf)
+ return -EINVAL;
+
++ phm_get_sysfs_buf(&buf, &size);
++
+ size += sysfs_emit_at(buf, size, "%s %16s %s %s %s %s\n",title[0],
+ title[1], title[2], title[3], title[4], title[5]);
+
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
+index dad3e3741a4e8..190af79f3236f 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
+@@ -67,22 +67,21 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr,
+ int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr,
+ uint32_t *speed)
+ {
+- uint32_t current_rpm;
+- uint32_t percent = 0;
+-
+- if (hwmgr->thermal_controller.fanInfo.bNoFan)
+- return 0;
++ struct amdgpu_device *adev = hwmgr->adev;
++ uint32_t duty100, duty;
++ uint64_t tmp64;
+
+- if (vega10_get_current_rpm(hwmgr, &current_rpm))
+- return -1;
++ duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
++ CG_FDO_CTRL1, FMAX_DUTY100);
++ duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS),
++ CG_THERMAL_STATUS, FDO_PWM_DUTY);
+
+- if (hwmgr->thermal_controller.
+- advanceFanControlParameters.usMaxFanRPM != 0)
+- percent = current_rpm * 255 /
+- hwmgr->thermal_controller.
+- advanceFanControlParameters.usMaxFanRPM;
++ if (!duty100)
++ return -EINVAL;
+
+- *speed = MIN(percent, 255);
++ tmp64 = (uint64_t)duty * 255;
++ do_div(tmp64, duty100);
++ *speed = MIN((uint32_t)tmp64, 255);
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
+index 8558718e15a8f..a2f4d6773d458 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
+@@ -2141,6 +2141,8 @@ static int vega12_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf)
+ int ret = 0;
+ int size = 0;
+
++ phm_get_sysfs_buf(&buf, &size);
++
+ ret = vega12_get_enabled_smc_features(hwmgr, &features_enabled);
+ PP_ASSERT_WITH_CODE(!ret,
+ "[EnableAllSmuFeatures] Failed to get enabled smc features!",
+@@ -2256,7 +2258,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
+ "Attempt to get gfx clk levels Failed!",
+ return -1);
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : "");
+ break;
+@@ -2272,7 +2274,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
+ "Attempt to get memory clk levels Failed!",
+ return -1);
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : "");
+ break;
+@@ -2290,7 +2292,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
+ "Attempt to get soc clk levels Failed!",
+ return -1);
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : "");
+ break;
+@@ -2308,7 +2310,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
+ "Attempt to get dcef clk levels Failed!",
+ return -1);
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : "");
+ break;
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+index 0cf39c1244b1c..299b5c838bf70 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+@@ -2961,7 +2961,8 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
+ data->od8_settings.od8_settings_array;
+ OverDriveTable_t *od_table =
+ &(data->smc_state_table.overdrive_table);
+- int32_t input_index, input_clk, input_vol, i;
++ int32_t input_clk, input_vol, i;
++ uint32_t input_index;
+ int od8_id;
+ int ret;
+
+@@ -3238,6 +3239,8 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf)
+ int ret = 0;
+ int size = 0;
+
++ phm_get_sysfs_buf(&buf, &size);
++
+ ret = vega20_get_enabled_smc_features(hwmgr, &features_enabled);
+ PP_ASSERT_WITH_CODE(!ret,
+ "[EnableAllSmuFeatures] Failed to get enabled smc features!",
+@@ -3372,13 +3375,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ return ret);
+
+ if (vega20_get_sclks(hwmgr, &clocks)) {
+- size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
++ size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+ now / 100);
+ break;
+ }
+
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
+ break;
+@@ -3390,13 +3393,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ return ret);
+
+ if (vega20_get_memclocks(hwmgr, &clocks)) {
+- size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
++ size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+ now / 100);
+ break;
+ }
+
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
+ break;
+@@ -3408,13 +3411,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ return ret);
+
+ if (vega20_get_socclocks(hwmgr, &clocks)) {
+- size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
++ size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+ now / 100);
+ break;
+ }
+
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
+ break;
+@@ -3426,7 +3429,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ return ret);
+
+ for (i = 0; i < fclk_dpm_table->count; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, fclk_dpm_table->dpm_levels[i].value,
+ fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : "");
+ break;
+@@ -3438,13 +3441,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ return ret);
+
+ if (vega20_get_dcefclocks(hwmgr, &clocks)) {
+- size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
++ size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
+ now / 100);
+ break;
+ }
+
+ for (i = 0; i < clocks.num_levels; i++)
+- size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
++ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, clocks.data[i].clocks_in_khz / 1000,
+ (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
+ break;
+@@ -3458,7 +3461,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ gen_speed = pptable->PcieGenSpeed[i];
+ lane_width = pptable->PcieLaneCount[i];
+
+- size += sysfs_emit_at(buf, size, "%d: %s %s %dMhz %s\n", i,
++ size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
+ (gen_speed == 0) ? "2.5GT/s," :
+ (gen_speed == 1) ? "5.0GT/s," :
+ (gen_speed == 2) ? "8.0GT/s," :
+@@ -3479,18 +3482,18 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ case OD_SCLK:
+ if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id &&
+ od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) {
+- size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
+- size += sysfs_emit_at(buf, size, "0: %10uMhz\n",
++ size += sprintf(buf + size, "%s:\n", "OD_SCLK");
++ size += sprintf(buf + size, "0: %10uMhz\n",
+ od_table->GfxclkFmin);
+- size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
++ size += sprintf(buf + size, "1: %10uMhz\n",
+ od_table->GfxclkFmax);
+ }
+ break;
+
+ case OD_MCLK:
+ if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) {
+- size = sysfs_emit(buf, "%s:\n", "OD_MCLK");
+- size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
++ size += sprintf(buf + size, "%s:\n", "OD_MCLK");
++ size += sprintf(buf + size, "1: %10uMhz\n",
+ od_table->UclkFmax);
+ }
+
+@@ -3503,14 +3506,14 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id &&
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id &&
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) {
+- size = sysfs_emit(buf, "%s:\n", "OD_VDDC_CURVE");
+- size += sysfs_emit_at(buf, size, "0: %10uMhz %10dmV\n",
++ size += sprintf(buf + size, "%s:\n", "OD_VDDC_CURVE");
++ size += sprintf(buf + size, "0: %10uMhz %10dmV\n",
+ od_table->GfxclkFreq1,
+ od_table->GfxclkVolt1 / VOLTAGE_SCALE);
+- size += sysfs_emit_at(buf, size, "1: %10uMhz %10dmV\n",
++ size += sprintf(buf + size, "1: %10uMhz %10dmV\n",
+ od_table->GfxclkFreq2,
+ od_table->GfxclkVolt2 / VOLTAGE_SCALE);
+- size += sysfs_emit_at(buf, size, "2: %10uMhz %10dmV\n",
++ size += sprintf(buf + size, "2: %10uMhz %10dmV\n",
+ od_table->GfxclkFreq3,
+ od_table->GfxclkVolt3 / VOLTAGE_SCALE);
+ }
+@@ -3518,17 +3521,17 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ break;
+
+ case OD_RANGE:
+- size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
++ size += sprintf(buf + size, "%s:\n", "OD_RANGE");
+
+ if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id &&
+ od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) {
+- size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n",
++ size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n",
+ od8_settings[OD8_SETTING_GFXCLK_FMIN].min_value,
+ od8_settings[OD8_SETTING_GFXCLK_FMAX].max_value);
+ }
+
+ if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) {
+- size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n",
++ size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
+ od8_settings[OD8_SETTING_UCLK_FMAX].min_value,
+ od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
+ }
+@@ -3539,22 +3542,22 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id &&
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id &&
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) {
+- size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
++ size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
+ od8_settings[OD8_SETTING_GFXCLK_FREQ1].min_value,
+ od8_settings[OD8_SETTING_GFXCLK_FREQ1].max_value);
+- size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n",
++ size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n",
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].min_value,
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].max_value);
+- size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n",
++ size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n",
+ od8_settings[OD8_SETTING_GFXCLK_FREQ2].min_value,
+ od8_settings[OD8_SETTING_GFXCLK_FREQ2].max_value);
+- size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n",
++ size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n",
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].min_value,
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].max_value);
+- size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n",
++ size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n",
+ od8_settings[OD8_SETTING_GFXCLK_FREQ3].min_value,
+ od8_settings[OD8_SETTING_GFXCLK_FREQ3].max_value);
+- size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n",
++ size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n",
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].min_value,
+ od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].max_value);
+ }
+@@ -4003,6 +4006,8 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
+ if (!buf)
+ return -EINVAL;
+
++ phm_get_sysfs_buf(&buf, &size);
++
+ size += sysfs_emit_at(buf, size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
+ title[0], title[1], title[2], title[3], title[4], title[5],
+ title[6], title[7], title[8], title[9], title[10]);
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c
+index bcae42cef3743..6ba4c2ae69a63 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c
+@@ -1609,19 +1609,7 @@ static int kv_update_samu_dpm(struct amdgpu_device *adev, bool gate)
+
+ static u8 kv_get_acp_boot_level(struct amdgpu_device *adev)
+ {
+- u8 i;
+- struct amdgpu_clock_voltage_dependency_table *table =
+- &adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table;
+-
+- for (i = 0; i < table->count; i++) {
+- if (table->entries[i].clk >= 0) /* XXX */
+- break;
+- }
+-
+- if (i >= table->count)
+- i = table->count - 1;
+-
+- return i;
++ return 0;
+ }
+
+ static void kv_update_acp_boot_level(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
+index 81f82aa05ec28..66fc63f1f1c17 100644
+--- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
+@@ -7247,17 +7247,15 @@ static int si_parse_power_table(struct amdgpu_device *adev)
+ if (!adev->pm.dpm.ps)
+ return -ENOMEM;
+ power_state_offset = (u8 *)state_array->states;
+- for (i = 0; i < state_array->ucNumEntries; i++) {
++ for (adev->pm.dpm.num_ps = 0, i = 0; i < state_array->ucNumEntries; i++) {
+ u8 *idx;
+ power_state = (union pplib_power_state *)power_state_offset;
+ non_clock_array_index = power_state->v2.nonClockInfoIndex;
+ non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
+ &non_clock_info_array->nonClockInfo[non_clock_array_index];
+ ps = kzalloc(sizeof(struct si_ps), GFP_KERNEL);
+- if (ps == NULL) {
+- kfree(adev->pm.dpm.ps);
++ if (ps == NULL)
+ return -ENOMEM;
+- }
+ adev->pm.dpm.ps[i].ps_priv = ps;
+ si_parse_pplib_non_clock_info(adev, &adev->pm.dpm.ps[i],
+ non_clock_info,
+@@ -7279,8 +7277,8 @@ static int si_parse_power_table(struct amdgpu_device *adev)
+ k++;
+ }
+ power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
++ adev->pm.dpm.num_ps++;
+ }
+- adev->pm.dpm.num_ps = state_array->ucNumEntries;
+
+ /* fill in the vce power states */
+ for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) {
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+index 04863a7971155..952a8aa69b9ee 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -138,7 +138,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu,
+ uint32_t *min,
+ uint32_t *max)
+ {
+- int ret = 0;
++ int ret = -ENOTSUPP;
+
+ if (!min && !max)
+ return -EINVAL;
+@@ -1536,9 +1536,7 @@ static int smu_suspend(void *handle)
+
+ smu->watermarks_bitmap &= ~(WATERMARKS_LOADED);
+
+- /* skip CGPG when in S0ix */
+- if (smu->is_apu && !adev->in_s0ix)
+- smu_set_gfx_cgpg(&adev->smu, false);
++ smu_set_gfx_cgpg(&adev->smu, false);
+
+ return 0;
+ }
+@@ -1569,8 +1567,7 @@ static int smu_resume(void *handle)
+ return ret;
+ }
+
+- if (smu->is_apu)
+- smu_set_gfx_cgpg(&adev->smu, true);
++ smu_set_gfx_cgpg(&adev->smu, true);
+
+ smu->disable_uclk_switch = 0;
+
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+index b1ad451af06bd..dfba0bc732073 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+@@ -1265,7 +1265,7 @@ static int navi10_print_clk_levels(struct smu_context *smu,
+ enum smu_clk_type clk_type, char *buf)
+ {
+ uint16_t *curve_settings;
+- int i, size = 0, ret = 0;
++ int i, levels, size = 0, ret = 0;
+ uint32_t cur_value = 0, value = 0, count = 0;
+ uint32_t freq_values[3] = {0};
+ uint32_t mark_index = 0;
+@@ -1319,14 +1319,17 @@ static int navi10_print_clk_levels(struct smu_context *smu,
+ freq_values[1] = cur_value;
+ mark_index = cur_value == freq_values[0] ? 0 :
+ cur_value == freq_values[2] ? 2 : 1;
+- if (mark_index != 1)
+- freq_values[1] = (freq_values[0] + freq_values[2]) / 2;
+
+- for (i = 0; i < 3; i++) {
++ levels = 3;
++ if (mark_index != 1) {
++ levels = 2;
++ freq_values[1] = freq_values[2];
++ }
++
++ for (i = 0; i < levels; i++) {
+ size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i],
+ i == mark_index ? "*" : "");
+ }
+-
+ }
+ break;
+ case SMU_PCIE:
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+index ca57221e39629..d4fde146bd4c9 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+@@ -338,7 +338,7 @@ sienna_cichlid_get_allowed_feature_mask(struct smu_context *smu,
+ if (smu->dc_controlled_by_gpio)
+ *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ACDC_BIT);
+
+- if (amdgpu_aspm)
++ if (amdgpu_device_should_use_aspm(adev))
+ *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT);
+
+ return 0;
+@@ -358,6 +358,23 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu)
+ smu_baco->platform_support =
+ (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true :
+ false;
++
++ /*
++ * Disable BACO entry/exit completely on below SKUs to
++ * avoid hardware intermittent failures.
++ */
++ if (((adev->pdev->device == 0x73A1) &&
++ (adev->pdev->revision == 0x00)) ||
++ ((adev->pdev->device == 0x73BF) &&
++ (adev->pdev->revision == 0xCF)) ||
++ ((adev->pdev->device == 0x7422) &&
++ (adev->pdev->revision == 0x00)) ||
++ ((adev->pdev->device == 0x73A3) &&
++ (adev->pdev->revision == 0x00)) ||
++ ((adev->pdev->device == 0x73E3) &&
++ (adev->pdev->revision == 0x00)))
++ smu_baco->platform_support = false;
++
+ }
+ }
+
+@@ -418,6 +435,36 @@ static int sienna_cichlid_store_powerplay_table(struct smu_context *smu)
+ return 0;
+ }
+
++static int sienna_cichlid_patch_pptable_quirk(struct smu_context *smu)
++{
++ struct amdgpu_device *adev = smu->adev;
++ uint32_t *board_reserved;
++ uint16_t *freq_table_gfx;
++ uint32_t i;
++
++ /* Fix some OEM SKU specific stability issues */
++ GET_PPTABLE_MEMBER(BoardReserved, &board_reserved);
++ if ((adev->pdev->device == 0x73DF) &&
++ (adev->pdev->revision == 0XC3) &&
++ (adev->pdev->subsystem_device == 0x16C2) &&
++ (adev->pdev->subsystem_vendor == 0x1043))
++ board_reserved[0] = 1387;
++
++ GET_PPTABLE_MEMBER(FreqTableGfx, &freq_table_gfx);
++ if ((adev->pdev->device == 0x73DF) &&
++ (adev->pdev->revision == 0XC3) &&
++ ((adev->pdev->subsystem_device == 0x16C2) ||
++ (adev->pdev->subsystem_device == 0x133C)) &&
++ (adev->pdev->subsystem_vendor == 0x1043)) {
++ for (i = 0; i < NUM_GFXCLK_DPM_LEVELS; i++) {
++ if (freq_table_gfx[i] > 2500)
++ freq_table_gfx[i] = 2500;
++ }
++ }
++
++ return 0;
++}
++
+ static int sienna_cichlid_setup_pptable(struct smu_context *smu)
+ {
+ int ret = 0;
+@@ -438,7 +485,7 @@ static int sienna_cichlid_setup_pptable(struct smu_context *smu)
+ if (ret)
+ return ret;
+
+- return ret;
++ return sienna_cichlid_patch_pptable_quirk(smu);
+ }
+
+ static int sienna_cichlid_tables_init(struct smu_context *smu)
+@@ -1278,21 +1325,37 @@ static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu)
+ &dpm_context->dpm_tables.soc_table;
+ struct smu_umd_pstate_table *pstate_table =
+ &smu->pstate_table;
++ struct amdgpu_device *adev = smu->adev;
+
+ pstate_table->gfxclk_pstate.min = gfx_table->min;
+ pstate_table->gfxclk_pstate.peak = gfx_table->max;
+- if (gfx_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK)
+- pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK;
+
+ pstate_table->uclk_pstate.min = mem_table->min;
+ pstate_table->uclk_pstate.peak = mem_table->max;
+- if (mem_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK)
+- pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK;
+
+ pstate_table->socclk_pstate.min = soc_table->min;
+ pstate_table->socclk_pstate.peak = soc_table->max;
+- if (soc_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK)
++
++ switch (adev->asic_type) {
++ case CHIP_SIENNA_CICHLID:
++ case CHIP_NAVY_FLOUNDER:
++ pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK;
++ pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK;
+ pstate_table->socclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK;
++ break;
++ case CHIP_DIMGREY_CAVEFISH:
++ pstate_table->gfxclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK;
++ pstate_table->uclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK;
++ pstate_table->socclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK;
++ break;
++ case CHIP_BEIGE_GOBY:
++ pstate_table->gfxclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK;
++ pstate_table->uclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK;
++ pstate_table->socclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK;
++ break;
++ default:
++ break;
++ }
+
+ return 0;
+ }
+@@ -1865,33 +1928,94 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu,
+ return 0;
+ }
+
++static void sienna_cichlid_get_override_pcie_settings(struct smu_context *smu,
++ uint32_t *gen_speed_override,
++ uint32_t *lane_width_override)
++{
++ struct amdgpu_device *adev = smu->adev;
++
++ *gen_speed_override = 0xff;
++ *lane_width_override = 0xff;
++
++ switch (adev->pdev->device) {
++ case 0x73A0:
++ case 0x73A1:
++ case 0x73A2:
++ case 0x73A3:
++ case 0x73AB:
++ case 0x73AE:
++ /* Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */
++ *lane_width_override = 6;
++ break;
++ case 0x73E0:
++ case 0x73E1:
++ case 0x73E3:
++ *lane_width_override = 4;
++ break;
++ case 0x7420:
++ case 0x7421:
++ case 0x7422:
++ case 0x7423:
++ case 0x7424:
++ *lane_width_override = 3;
++ break;
++ default:
++ break;
++ }
++}
++
++#define MAX(a, b) ((a) > (b) ? (a) : (b))
++
+ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
+ uint32_t pcie_gen_cap,
+ uint32_t pcie_width_cap)
+ {
+ struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
+-
+- uint32_t smu_pcie_arg;
++ struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table;
++ uint32_t gen_speed_override, lane_width_override;
+ uint8_t *table_member1, *table_member2;
++ uint32_t min_gen_speed, max_gen_speed;
++ uint32_t min_lane_width, max_lane_width;
++ uint32_t smu_pcie_arg;
+ int ret, i;
+
+ GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1);
+ GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2);
+
+- /* lclk dpm table setup */
+- for (i = 0; i < MAX_PCIE_CONF; i++) {
+- dpm_context->dpm_tables.pcie_table.pcie_gen[i] = table_member1[i];
+- dpm_context->dpm_tables.pcie_table.pcie_lane[i] = table_member2[i];
++ sienna_cichlid_get_override_pcie_settings(smu,
++ &gen_speed_override,
++ &lane_width_override);
++
++ /* PCIE gen speed override */
++ if (gen_speed_override != 0xff) {
++ min_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
++ max_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
++ } else {
++ min_gen_speed = MAX(0, table_member1[0]);
++ max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
++ min_gen_speed = min_gen_speed > max_gen_speed ?
++ max_gen_speed : min_gen_speed;
++ }
++ pcie_table->pcie_gen[0] = min_gen_speed;
++ pcie_table->pcie_gen[1] = max_gen_speed;
++
++ /* PCIE lane width override */
++ if (lane_width_override != 0xff) {
++ min_lane_width = MIN(pcie_width_cap, lane_width_override);
++ max_lane_width = MIN(pcie_width_cap, lane_width_override);
++ } else {
++ min_lane_width = MAX(1, table_member2[0]);
++ max_lane_width = MIN(pcie_width_cap, table_member2[1]);
++ min_lane_width = min_lane_width > max_lane_width ?
++ max_lane_width : min_lane_width;
+ }
++ pcie_table->pcie_lane[0] = min_lane_width;
++ pcie_table->pcie_lane[1] = max_lane_width;
+
+ for (i = 0; i < NUM_LINK_LEVELS; i++) {
+- smu_pcie_arg = (i << 16) |
+- ((table_member1[i] <= pcie_gen_cap) ?
+- (table_member1[i] << 8) :
+- (pcie_gen_cap << 8)) |
+- ((table_member2[i] <= pcie_width_cap) ?
+- table_member2[i] :
+- pcie_width_cap);
++ smu_pcie_arg = (i << 16 |
++ pcie_table->pcie_gen[i] << 8 |
++ pcie_table->pcie_lane[i]);
+
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_OverridePcieParameters,
+@@ -1899,11 +2023,6 @@ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
+ NULL);
+ if (ret)
+ return ret;
+-
+- if (table_member1[i] > pcie_gen_cap)
+- dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap;
+- if (table_member2[i] > pcie_width_cap)
+- dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap;
+ }
+
+ return 0;
+@@ -1950,16 +2069,9 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu)
+ (OverDriveTable_t *)smu->smu_table.boot_overdrive_table;
+ OverDriveTable_t *user_od_table =
+ (OverDriveTable_t *)smu->smu_table.user_overdrive_table;
++ OverDriveTable_t user_od_table_bak;
+ int ret = 0;
+
+- /*
+- * For S3/S4/Runpm resume, no need to setup those overdrive tables again as
+- * - either they already have the default OD settings got during cold bootup
+- * - or they have some user customized OD settings which cannot be overwritten
+- */
+- if (smu->adev->in_suspend)
+- return 0;
+-
+ ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE,
+ 0, (void *)boot_od_table, false);
+ if (ret) {
+@@ -1970,7 +2082,23 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu)
+ sienna_cichlid_dump_od_table(smu, boot_od_table);
+
+ memcpy(od_table, boot_od_table, sizeof(OverDriveTable_t));
+- memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
++
++ /*
++ * For S3/S4/Runpm resume, we need to setup those overdrive tables again,
++ * but we have to preserve user defined values in "user_od_table".
++ */
++ if (!smu->adev->in_suspend) {
++ memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
++ smu->user_dpm_profile.user_od = false;
++ } else if (smu->user_dpm_profile.user_od) {
++ memcpy(&user_od_table_bak, user_od_table, sizeof(OverDriveTable_t));
++ memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
++ user_od_table->GfxclkFmin = user_od_table_bak.GfxclkFmin;
++ user_od_table->GfxclkFmax = user_od_table_bak.GfxclkFmax;
++ user_od_table->UclkFmin = user_od_table_bak.UclkFmin;
++ user_od_table->UclkFmax = user_od_table_bak.UclkFmax;
++ user_od_table->VddGfxOffset = user_od_table_bak.VddGfxOffset;
++ }
+
+ return 0;
+ }
+@@ -2180,6 +2308,20 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu,
+ return ret;
+ }
+
++static int sienna_cichlid_restore_user_od_settings(struct smu_context *smu)
++{
++ struct smu_table_context *table_context = &smu->smu_table;
++ OverDriveTable_t *od_table = table_context->overdrive_table;
++ OverDriveTable_t *user_od_table = table_context->user_overdrive_table;
++ int res;
++
++ res = smu_v11_0_restore_user_od_settings(smu);
++ if (res == 0)
++ memcpy(od_table, user_od_table, sizeof(OverDriveTable_t));
++
++ return res;
++}
++
+ static int sienna_cichlid_run_btc(struct smu_context *smu)
+ {
+ return smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL);
+@@ -3728,14 +3870,14 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
+
+ static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu)
+ {
+- struct smu_table_context *table_context = &smu->smu_table;
+- PPTable_t *smc_pptable = table_context->driver_pptable;
++ uint16_t *mgpu_fan_boost_limit_rpm;
+
++ GET_PPTABLE_MEMBER(MGpuFanBoostLimitRpm, &mgpu_fan_boost_limit_rpm);
+ /*
+ * Skip the MGpuFanBoost setting for those ASICs
+ * which do not support it
+ */
+- if (!smc_pptable->MGpuFanBoostLimitRpm)
++ if (*mgpu_fan_boost_limit_rpm == 0)
+ return 0;
+
+ return smu_cmn_send_smc_msg_with_param(smu,
+@@ -3869,6 +4011,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
+ .dump_pptable = sienna_cichlid_dump_pptable,
+ .init_microcode = smu_v11_0_init_microcode,
+ .load_microcode = smu_v11_0_load_microcode,
++ .fini_microcode = smu_v11_0_fini_microcode,
+ .init_smc_tables = sienna_cichlid_init_smc_tables,
+ .fini_smc_tables = smu_v11_0_fini_smc_tables,
+ .init_power = smu_v11_0_init_power,
+@@ -3916,7 +4059,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
+ .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
+ .set_default_od_settings = sienna_cichlid_set_default_od_settings,
+ .od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table,
+- .restore_user_od_settings = smu_v11_0_restore_user_od_settings,
++ .restore_user_od_settings = sienna_cichlid_restore_user_od_settings,
+ .run_btc = sienna_cichlid_run_btc,
+ .set_power_source = smu_v11_0_set_power_source,
+ .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
+index 38cd0ece24f6b..42f705c7a36f8 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
+@@ -33,6 +33,14 @@ typedef enum {
+ #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK 960
+ #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK 1000
+
++#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK 1950
++#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK 960
++#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK 676
++
++#define BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK 2200
++#define BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK 960
++#define BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK 1000
++
+ extern void sienna_cichlid_set_ppt_funcs(struct smu_context *smu);
+
+ #endif
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+index 87b055466a33f..83fa3d20a1d57 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+@@ -772,7 +772,7 @@ int smu_v11_0_set_allowed_mask(struct smu_context *smu)
+ goto failed;
+ }
+
+- bitmap_copy((unsigned long *)feature_mask, feature->allowed, 64);
++ bitmap_to_arr32(feature_mask, feature->allowed, 64);
+
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetAllowedFeaturesMaskHigh,
+ feature_mask[1], NULL);
+@@ -1235,6 +1235,8 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
+ uint32_t crystal_clock_freq = 2500;
+ uint32_t tach_period;
+
++ if (speed == 0)
++ return -EINVAL;
+ /*
+ * To prevent from possible overheat, some ASICs may have requirement
+ * for minimum fan speed:
+@@ -1593,6 +1595,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu)
+ if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
+ return false;
+
++ /* return true if ASIC is in BACO state already */
++ if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
++ return true;
++
+ /* Arcturus does not support this bit mask */
+ if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
+ !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+index f6ef0ce6e9e2c..5a9b47133db12 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+@@ -579,7 +579,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu,
+ DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+ SmuMetrics_legacy_t metrics;
+ struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+- int i, size = 0, ret = 0;
++ int i, idx, size = 0, ret = 0;
+ uint32_t cur_value = 0, value = 0, count = 0;
+ bool cur_value_match_level = false;
+
+@@ -653,7 +653,8 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu,
+ case SMU_MCLK:
+ case SMU_FCLK:
+ for (i = 0; i < count; i++) {
+- ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value);
++ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
++ ret = vangogh_get_dpm_clk_limited(smu, clk_type, idx, &value);
+ if (ret)
+ return ret;
+ if (!value)
+@@ -680,7 +681,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu,
+ DpmClocks_t *clk_table = smu->smu_table.clocks_table;
+ SmuMetrics_t metrics;
+ struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+- int i, size = 0, ret = 0;
++ int i, idx, size = 0, ret = 0;
+ uint32_t cur_value = 0, value = 0, count = 0;
+ bool cur_value_match_level = false;
+
+@@ -754,7 +755,8 @@ static int vangogh_print_clk_levels(struct smu_context *smu,
+ case SMU_MCLK:
+ case SMU_FCLK:
+ for (i = 0; i < count; i++) {
+- ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value);
++ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
++ ret = vangogh_get_dpm_clk_limited(smu, clk_type, idx, &value);
+ if (ret)
+ return ret;
+ if (!value)
+@@ -1386,52 +1388,38 @@ static int vangogh_set_performance_level(struct smu_context *smu,
+ uint32_t soc_mask, mclk_mask, fclk_mask;
+ uint32_t vclk_mask = 0, dclk_mask = 0;
+
++ smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
++ smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
++
+ switch (level) {
+ case AMD_DPM_FORCED_LEVEL_HIGH:
+- smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
++ smu->gfx_actual_hard_min_freq = smu->gfx_default_soft_max_freq;
+ smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
+
+- smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
+- smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
+
+ ret = vangogh_force_dpm_limit_value(smu, true);
++ if (ret)
++ return ret;
+ break;
+ case AMD_DPM_FORCED_LEVEL_LOW:
+ smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+- smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
+-
+- smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
+- smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
++ smu->gfx_actual_soft_max_freq = smu->gfx_default_hard_min_freq;
+
+ ret = vangogh_force_dpm_limit_value(smu, false);
++ if (ret)
++ return ret;
+ break;
+ case AMD_DPM_FORCED_LEVEL_AUTO:
+ smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+ smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
+
+- smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
+- smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
+-
+ ret = vangogh_unforce_dpm_levels(smu);
+- break;
+- case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+- smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+- smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
+-
+- smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
+- smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
+-
+- ret = smu_cmn_send_smc_msg_with_param(smu,
+- SMU_MSG_SetHardMinGfxClk,
+- VANGOGH_UMD_PSTATE_STANDARD_GFXCLK, NULL);
+- if (ret)
+- return ret;
+-
+- ret = smu_cmn_send_smc_msg_with_param(smu,
+- SMU_MSG_SetSoftMaxGfxClk,
+- VANGOGH_UMD_PSTATE_STANDARD_GFXCLK, NULL);
+ if (ret)
+ return ret;
++ break;
++ case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
++ smu->gfx_actual_hard_min_freq = VANGOGH_UMD_PSTATE_STANDARD_GFXCLK;
++ smu->gfx_actual_soft_max_freq = VANGOGH_UMD_PSTATE_STANDARD_GFXCLK;
+
+ ret = vangogh_get_profiling_clk_mask(smu, level,
+ &vclk_mask,
+@@ -1446,32 +1434,15 @@ static int vangogh_set_performance_level(struct smu_context *smu,
+ vangogh_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
+ vangogh_force_clk_levels(smu, SMU_VCLK, 1 << vclk_mask);
+ vangogh_force_clk_levels(smu, SMU_DCLK, 1 << dclk_mask);
+-
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+ smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+- smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
+-
+- smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
+- smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
+-
+- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinVcn,
+- VANGOGH_UMD_PSTATE_PEAK_DCLK, NULL);
+- if (ret)
+- return ret;
+-
+- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxVcn,
+- VANGOGH_UMD_PSTATE_PEAK_DCLK, NULL);
+- if (ret)
+- return ret;
++ smu->gfx_actual_soft_max_freq = smu->gfx_default_hard_min_freq;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+ smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+ smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
+
+- smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
+- smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
+-
+ ret = vangogh_get_profiling_clk_mask(smu, level,
+ NULL,
+ NULL,
+@@ -1484,29 +1455,29 @@ static int vangogh_set_performance_level(struct smu_context *smu,
+ vangogh_force_clk_levels(smu, SMU_FCLK, 1 << fclk_mask);
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+- smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq;
+- smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq;
+-
+- smu->cpu_actual_soft_min_freq = smu->cpu_default_soft_min_freq;
+- smu->cpu_actual_soft_max_freq = smu->cpu_default_soft_max_freq;
+-
+- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk,
+- VANGOGH_UMD_PSTATE_PEAK_GFXCLK, NULL);
+- if (ret)
+- return ret;
++ smu->gfx_actual_hard_min_freq = VANGOGH_UMD_PSTATE_PEAK_GFXCLK;
++ smu->gfx_actual_soft_max_freq = VANGOGH_UMD_PSTATE_PEAK_GFXCLK;
+
+- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk,
+- VANGOGH_UMD_PSTATE_PEAK_GFXCLK, NULL);
++ ret = vangogh_set_peak_clock_by_device(smu);
+ if (ret)
+ return ret;
+-
+- ret = vangogh_set_peak_clock_by_device(smu);
+ break;
+ case AMD_DPM_FORCED_LEVEL_MANUAL:
+ case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
+ default:
+- break;
++ return 0;
+ }
++
++ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk,
++ smu->gfx_actual_hard_min_freq, NULL);
++ if (ret)
++ return ret;
++
++ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk,
++ smu->gfx_actual_soft_max_freq, NULL);
++ if (ret)
++ return ret;
++
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+index 145f13b8c977d..9a2584b593531 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+@@ -485,7 +485,7 @@ static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+ static int renoir_print_clk_levels(struct smu_context *smu,
+ enum smu_clk_type clk_type, char *buf)
+ {
+- int i, size = 0, ret = 0;
++ int i, idx, size = 0, ret = 0;
+ uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0;
+ SmuMetrics_t metrics;
+ struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+@@ -585,7 +585,8 @@ static int renoir_print_clk_levels(struct smu_context *smu,
+ case SMU_VCLK:
+ case SMU_DCLK:
+ for (i = 0; i < count; i++) {
+- ret = renoir_get_dpm_clk_limited(smu, clk_type, i, &value);
++ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
++ ret = renoir_get_dpm_clk_limited(smu, clk_type, idx, &value);
+ if (ret)
+ return ret;
+ if (!value)
+@@ -1127,6 +1128,39 @@ static int renoir_get_power_profile_mode(struct smu_context *smu,
+ return size;
+ }
+
++static void renoir_get_ss_power_percent(SmuMetrics_t *metrics,
++ uint32_t *apu_percent, uint32_t *dgpu_percent)
++{
++ uint32_t apu_boost = 0;
++ uint32_t dgpu_boost = 0;
++ uint16_t apu_limit = 0;
++ uint16_t dgpu_limit = 0;
++ uint16_t apu_power = 0;
++ uint16_t dgpu_power = 0;
++
++ apu_power = metrics->ApuPower;
++ apu_limit = metrics->StapmOriginalLimit;
++ if (apu_power > apu_limit && apu_limit != 0)
++ apu_boost = ((apu_power - apu_limit) * 100) / apu_limit;
++ apu_boost = (apu_boost > 100) ? 100 : apu_boost;
++
++ dgpu_power = metrics->dGpuPower;
++ if (metrics->StapmCurrentLimit > metrics->StapmOriginalLimit)
++ dgpu_limit = metrics->StapmCurrentLimit - metrics->StapmOriginalLimit;
++ if (dgpu_power > dgpu_limit && dgpu_limit != 0)
++ dgpu_boost = ((dgpu_power - dgpu_limit) * 100) / dgpu_limit;
++ dgpu_boost = (dgpu_boost > 100) ? 100 : dgpu_boost;
++
++ if (dgpu_boost >= apu_boost)
++ apu_boost = 0;
++ else
++ dgpu_boost = 0;
++
++ *apu_percent = apu_boost;
++ *dgpu_percent = dgpu_boost;
++}
++
++
+ static int renoir_get_smu_metrics_data(struct smu_context *smu,
+ MetricsMember_t member,
+ uint32_t *value)
+@@ -1135,6 +1169,9 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu,
+
+ SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table;
+ int ret = 0;
++ uint32_t apu_percent = 0;
++ uint32_t dgpu_percent = 0;
++
+
+ mutex_lock(&smu->metrics_lock);
+
+@@ -1183,26 +1220,18 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu,
+ *value = metrics->Voltage[1];
+ break;
+ case METRICS_SS_APU_SHARE:
+- /* return the percentage of APU power with respect to APU's power limit.
+- * percentage is reported, this isn't boost value. Smartshift power
+- * boost/shift is only when the percentage is more than 100.
++ /* return the percentage of APU power boost
++ * with respect to APU's power limit.
+ */
+- if (metrics->StapmOriginalLimit > 0)
+- *value = (metrics->ApuPower * 100) / metrics->StapmOriginalLimit;
+- else
+- *value = 0;
++ renoir_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent);
++ *value = apu_percent;
+ break;
+ case METRICS_SS_DGPU_SHARE:
+- /* return the percentage of dGPU power with respect to dGPU's power limit.
+- * percentage is reported, this isn't boost value. Smartshift power
+- * boost/shift is only when the percentage is more than 100.
++ /* return the percentage of dGPU power boost
++ * with respect to dGPU's power limit.
+ */
+- if ((metrics->dGpuPower > 0) &&
+- (metrics->StapmCurrentLimit > metrics->StapmOriginalLimit))
+- *value = (metrics->dGpuPower * 100) /
+- (metrics->StapmCurrentLimit - metrics->StapmOriginalLimit);
+- else
+- *value = 0;
++ renoir_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent);
++ *value = dgpu_percent;
+ break;
+ default:
+ *value = UINT_MAX;
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
+index d60b8c5e87157..9c91e79c955fb 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
+@@ -120,7 +120,8 @@ int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate)
+
+ int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable)
+ {
+- if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
++ /* Until now the SMU12 only implemented for Renoir series so here neen't do APU check. */
++ if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) || smu->adev->in_s0ix)
+ return 0;
+
+ return smu_cmn_send_smc_msg_with_param(smu,
+@@ -191,6 +192,9 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu)
+ kfree(smu_table->watermarks_table);
+ smu_table->watermarks_table = NULL;
+
++ kfree(smu_table->gpu_metrics_table);
++ smu_table->gpu_metrics_table = NULL;
++
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+index 5019903db492a..d0c6b864d00af 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+@@ -1619,7 +1619,7 @@ static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en)
+ {
+ return smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_GmiPwrDnControl,
+- en ? 1 : 0,
++ en ? 0 : 1,
+ NULL);
+ }
+
+@@ -1627,6 +1627,7 @@ static const struct throttling_logging_label {
+ uint32_t feature_mask;
+ const char *label;
+ } logging_label[] = {
++ {(1U << THROTTLER_TEMP_GPU_BIT), "GPU"},
+ {(1U << THROTTLER_TEMP_MEM_BIT), "HBM"},
+ {(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"},
+ {(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"},
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+index a0e50f23b1dd7..a3723ba359231 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+@@ -197,6 +197,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
+
+ int smu_v13_0_check_fw_version(struct smu_context *smu)
+ {
++ struct amdgpu_device *adev = smu->adev;
+ uint32_t if_version = 0xff, smu_version = 0xff;
+ uint16_t smu_major;
+ uint8_t smu_minor, smu_debug;
+@@ -209,6 +210,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
+ smu_major = (smu_version >> 16) & 0xffff;
+ smu_minor = (smu_version >> 8) & 0xff;
+ smu_debug = (smu_version >> 0) & 0xff;
++ if (smu->is_apu)
++ adev->pm.fw_version = smu_version;
+
+ switch (smu->adev->asic_type) {
+ case CHIP_ALDEBARAN:
+@@ -453,11 +456,11 @@ int smu_v13_0_init_power(struct smu_context *smu)
+ if (smu_power->power_context || smu_power->power_context_size != 0)
+ return -EINVAL;
+
+- smu_power->power_context = kzalloc(sizeof(struct smu_13_0_dpm_context),
++ smu_power->power_context = kzalloc(sizeof(struct smu_13_0_power_context),
+ GFP_KERNEL);
+ if (!smu_power->power_context)
+ return -ENOMEM;
+- smu_power->power_context_size = sizeof(struct smu_13_0_dpm_context);
++ smu_power->power_context_size = sizeof(struct smu_13_0_power_context);
+
+ return 0;
+ }
+@@ -718,7 +721,7 @@ int smu_v13_0_set_allowed_mask(struct smu_context *smu)
+ if (bitmap_empty(feature->allowed, SMU_FEATURE_MAX) || feature->feature_num < 64)
+ goto failed;
+
+- bitmap_copy((unsigned long *)feature_mask, feature->allowed, 64);
++ bitmap_to_arr32(feature_mask, feature->allowed, 64);
+
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetAllowedFeaturesMaskHigh,
+ feature_mask[1], NULL);
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+index a403657151ba1..81b1d4ea8a96c 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+@@ -291,14 +291,9 @@ static int yellow_carp_post_smu_init(struct smu_context *smu)
+
+ static int yellow_carp_mode_reset(struct smu_context *smu, int type)
+ {
+- int ret = 0, index = 0;
+-
+- index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG,
+- SMU_MSG_GfxDeviceDriverReset);
+- if (index < 0)
+- return index == -EACCES ? 0 : index;
++ int ret = 0;
+
+- ret = smu_cmn_send_smc_msg_with_param(smu, (uint16_t)index, type, NULL);
++ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, type, NULL);
+ if (ret)
+ dev_err(smu->adev->dev, "Failed to mode reset!\n");
+
+@@ -310,6 +305,42 @@ static int yellow_carp_mode2_reset(struct smu_context *smu)
+ return yellow_carp_mode_reset(smu, SMU_RESET_MODE_2);
+ }
+
++
++static void yellow_carp_get_ss_power_percent(SmuMetrics_t *metrics,
++ uint32_t *apu_percent, uint32_t *dgpu_percent)
++{
++ uint32_t apu_boost = 0;
++ uint32_t dgpu_boost = 0;
++ uint16_t apu_limit = 0;
++ uint16_t dgpu_limit = 0;
++ uint16_t apu_power = 0;
++ uint16_t dgpu_power = 0;
++
++ /* APU and dGPU power values are reported in milli Watts
++ * and STAPM power limits are in Watts */
++ apu_power = metrics->ApuPower/1000;
++ apu_limit = metrics->StapmOpnLimit;
++ if (apu_power > apu_limit && apu_limit != 0)
++ apu_boost = ((apu_power - apu_limit) * 100) / apu_limit;
++ apu_boost = (apu_boost > 100) ? 100 : apu_boost;
++
++ dgpu_power = metrics->dGpuPower/1000;
++ if (metrics->StapmCurrentLimit > metrics->StapmOpnLimit)
++ dgpu_limit = metrics->StapmCurrentLimit - metrics->StapmOpnLimit;
++ if (dgpu_power > dgpu_limit && dgpu_limit != 0)
++ dgpu_boost = ((dgpu_power - dgpu_limit) * 100) / dgpu_limit;
++ dgpu_boost = (dgpu_boost > 100) ? 100 : dgpu_boost;
++
++ if (dgpu_boost >= apu_boost)
++ apu_boost = 0;
++ else
++ dgpu_boost = 0;
++
++ *apu_percent = apu_boost;
++ *dgpu_percent = dgpu_boost;
++
++}
++
+ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu,
+ MetricsMember_t member,
+ uint32_t *value)
+@@ -318,6 +349,8 @@ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu,
+
+ SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table;
+ int ret = 0;
++ uint32_t apu_percent = 0;
++ uint32_t dgpu_percent = 0;
+
+ mutex_lock(&smu->metrics_lock);
+
+@@ -370,26 +403,18 @@ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu,
+ *value = metrics->Voltage[1];
+ break;
+ case METRICS_SS_APU_SHARE:
+- /* return the percentage of APU power with respect to APU's power limit.
+- * percentage is reported, this isn't boost value. Smartshift power
+- * boost/shift is only when the percentage is more than 100.
++ /* return the percentage of APU power boost
++ * with respect to APU's power limit.
+ */
+- if (metrics->StapmOpnLimit > 0)
+- *value = (metrics->ApuPower * 100) / metrics->StapmOpnLimit;
+- else
+- *value = 0;
++ yellow_carp_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent);
++ *value = apu_percent;
+ break;
+ case METRICS_SS_DGPU_SHARE:
+- /* return the percentage of dGPU power with respect to dGPU's power limit.
+- * percentage is reported, this isn't boost value. Smartshift power
+- * boost/shift is only when the percentage is more than 100.
++ /* return the percentage of dGPU power boost
++ * with respect to dGPU's power limit.
+ */
+- if ((metrics->dGpuPower > 0) &&
+- (metrics->StapmCurrentLimit > metrics->StapmOpnLimit))
+- *value = (metrics->dGpuPower * 100) /
+- (metrics->StapmCurrentLimit - metrics->StapmOpnLimit);
+- else
+- *value = 0;
++ yellow_carp_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent);
++ *value = dgpu_percent;
+ break;
+ default:
+ *value = UINT_MAX;
+@@ -1049,7 +1074,7 @@ out:
+ static int yellow_carp_print_clk_levels(struct smu_context *smu,
+ enum smu_clk_type clk_type, char *buf)
+ {
+- int i, size = 0, ret = 0;
++ int i, idx, size = 0, ret = 0;
+ uint32_t cur_value = 0, value = 0, count = 0;
+
+ smu_cmn_get_sysfs_buf(&buf, &size);
+@@ -1081,7 +1106,8 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu,
+ goto print_clk_out;
+
+ for (i = 0; i < count; i++) {
+- ret = yellow_carp_get_dpm_freq_by_index(smu, clk_type, i, &value);
++ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
++ ret = yellow_carp_get_dpm_freq_by_index(smu, clk_type, idx, &value);
+ if (ret)
+ goto print_clk_out;
+
+diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
+index 59172acb97380..292f533d8cf0d 100644
+--- a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
++++ b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
+@@ -235,7 +235,7 @@ void komeda_crtc_handle_event(struct komeda_crtc *kcrtc,
+ crtc->state->event = NULL;
+ drm_crtc_send_vblank_event(crtc, event);
+ } else {
+- DRM_WARN("CRTC[%d]: FLIP happen but no pending commit.\n",
++ DRM_WARN("CRTC[%d]: FLIP happened but no pending commit.\n",
+ drm_crtc_index(&kcrtc->base));
+ }
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+@@ -286,7 +286,7 @@ komeda_crtc_atomic_enable(struct drm_crtc *crtc,
+ komeda_crtc_do_flush(crtc, old);
+ }
+
+-static void
++void
+ komeda_crtc_flush_and_wait_for_flip_done(struct komeda_crtc *kcrtc,
+ struct completion *input_flip_done)
+ {
+diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+index 93b7f09b96ca9..327051bba5b68 100644
+--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
++++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+@@ -69,6 +69,25 @@ static const struct drm_driver komeda_kms_driver = {
+ .minor = 1,
+ };
+
++static void komeda_kms_atomic_commit_hw_done(struct drm_atomic_state *state)
++{
++ struct drm_device *dev = state->dev;
++ struct komeda_kms_dev *kms = to_kdev(dev);
++ int i;
++
++ for (i = 0; i < kms->n_crtcs; i++) {
++ struct komeda_crtc *kcrtc = &kms->crtcs[i];
++
++ if (kcrtc->base.state->active) {
++ struct completion *flip_done = NULL;
++ if (kcrtc->base.state->event)
++ flip_done = kcrtc->base.state->event->base.completion;
++ komeda_crtc_flush_and_wait_for_flip_done(kcrtc, flip_done);
++ }
++ }
++ drm_atomic_helper_commit_hw_done(state);
++}
++
+ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state)
+ {
+ struct drm_device *dev = old_state->dev;
+@@ -81,7 +100,7 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state)
+
+ drm_atomic_helper_commit_modeset_enables(dev, old_state);
+
+- drm_atomic_helper_commit_hw_done(old_state);
++ komeda_kms_atomic_commit_hw_done(old_state);
+
+ drm_atomic_helper_wait_for_flip_done(dev, old_state);
+
+diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
+index 456f3c4357193..bf6e8fba50613 100644
+--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
++++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
+@@ -182,6 +182,8 @@ void komeda_kms_cleanup_private_objs(struct komeda_kms_dev *kms);
+
+ void komeda_crtc_handle_event(struct komeda_crtc *kcrtc,
+ struct komeda_events *evts);
++void komeda_crtc_flush_and_wait_for_flip_done(struct komeda_crtc *kcrtc,
++ struct completion *input_flip_done);
+
+ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev);
+ void komeda_kms_detach(struct komeda_kms_dev *kms);
+diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
+index d63d83800a8a3..517b94c3bcaf9 100644
+--- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
++++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
+@@ -265,6 +265,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms,
+
+ formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl,
+ layer->layer_type, &n_formats);
++ if (!formats) {
++ kfree(kplane);
++ return -ENOMEM;
++ }
+
+ err = drm_universal_plane_init(&kms->base, plane,
+ get_possible_crtcs(kms, c->pipeline),
+@@ -275,8 +279,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms,
+
+ komeda_put_fourcc_list(formats);
+
+- if (err)
+- goto cleanup;
++ if (err) {
++ kfree(kplane);
++ return err;
++ }
+
+ drm_plane_helper_add(plane, &komeda_plane_helper_funcs);
+
+diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
+index 494075ddbef68..b5928b52e2791 100644
+--- a/drivers/gpu/drm/arm/malidp_crtc.c
++++ b/drivers/gpu/drm/arm/malidp_crtc.c
+@@ -487,7 +487,10 @@ static void malidp_crtc_reset(struct drm_crtc *crtc)
+ if (crtc->state)
+ malidp_crtc_destroy_state(crtc, crtc->state);
+
+- __drm_atomic_helper_crtc_reset(crtc, &state->base);
++ if (state)
++ __drm_atomic_helper_crtc_reset(crtc, &state->base);
++ else
++ __drm_atomic_helper_crtc_reset(crtc, NULL);
+ }
+
+ static int malidp_crtc_enable_vblank(struct drm_crtc *crtc)
+diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
+index 8c2ab3d653b70..f67c816050f22 100644
+--- a/drivers/gpu/drm/arm/malidp_planes.c
++++ b/drivers/gpu/drm/arm/malidp_planes.c
+@@ -348,7 +348,7 @@ static bool malidp_check_pages_threshold(struct malidp_plane_state *ms,
+ else
+ sgt = obj->funcs->get_sg_table(obj);
+
+- if (!sgt)
++ if (IS_ERR(sgt))
+ return false;
+
+ sgl = sgt->sgl;
+diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
+index 8e3e98f13db49..54168134d9b93 100644
+--- a/drivers/gpu/drm/armada/armada_drv.c
++++ b/drivers/gpu/drm/armada/armada_drv.c
+@@ -99,7 +99,6 @@ static int armada_drm_bind(struct device *dev)
+ if (ret) {
+ dev_err(dev, "[" DRM_NAME ":%s] can't kick out simple-fb: %d\n",
+ __func__, ret);
+- kfree(priv);
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
+index 21909642ee4ca..8fbb25913327c 100644
+--- a/drivers/gpu/drm/armada/armada_gem.c
++++ b/drivers/gpu/drm/armada/armada_gem.c
+@@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+ struct drm_armada_gem_pwrite *args = data;
+ struct armada_gem_object *dobj;
+ char __user *ptr;
+- int ret;
++ int ret = 0;
+
+ DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
+ args->handle, args->offset, args->size, args->ptr);
+@@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+ if (!access_ok(ptr, args->size))
+ return -EFAULT;
+
+- ret = fault_in_pages_readable(ptr, args->size);
+- if (ret)
+- return ret;
++ if (fault_in_readable(ptr, args->size))
++ return -EFAULT;
+
+ dobj = armada_gem_object_lookup(file, args->handle);
+ if (dobj == NULL)
+diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
+index 424250535fed9..0383deb970bbb 100644
+--- a/drivers/gpu/drm/armada/armada_overlay.c
++++ b/drivers/gpu/drm/armada/armada_overlay.c
+@@ -4,6 +4,8 @@
+ * Rewritten from the dovefb driver, and Armada510 manuals.
+ */
+
++#include <linux/bitfield.h>
++
+ #include <drm/armada_drm.h>
+ #include <drm/drm_atomic.h>
+ #include <drm/drm_atomic_helper.h>
+@@ -451,8 +453,8 @@ static int armada_overlay_get_property(struct drm_plane *plane,
+ drm_to_overlay_state(state)->colorkey_ug,
+ drm_to_overlay_state(state)->colorkey_vb, 0);
+ } else if (property == priv->colorkey_mode_prop) {
+- *val = (drm_to_overlay_state(state)->colorkey_mode &
+- CFG_CKMODE_MASK) >> ffs(CFG_CKMODE_MASK);
++ *val = FIELD_GET(CFG_CKMODE_MASK,
++ drm_to_overlay_state(state)->colorkey_mode);
+ } else if (property == priv->brightness_prop) {
+ *val = drm_to_overlay_state(state)->brightness + 256;
+ } else if (property == priv->contrast_prop) {
+diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
+index b53fee6f1c170..65f172807a0d5 100644
+--- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
++++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
+@@ -291,7 +291,7 @@ vga_pw_show(struct device *dev, struct device_attribute *attr, char *buf)
+ if (rc)
+ return rc;
+
+- return sprintf(buf, "%u\n", reg & 1);
++ return sprintf(buf, "%u\n", reg);
+ }
+ static DEVICE_ATTR_RO(vga_pw);
+
+diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
+index 79a3618679554..754a08c92d3d1 100644
+--- a/drivers/gpu/drm/ast/ast_main.c
++++ b/drivers/gpu/drm/ast/ast_main.c
+@@ -423,11 +423,12 @@ struct ast_private *ast_device_create(const struct drm_driver *drv,
+ return ERR_PTR(-EIO);
+
+ /*
+- * If we don't have IO space at all, use MMIO now and
+- * assume the chip has MMIO enabled by default (rev 0x20
+- * and higher).
++ * After AST2500, MMIO is enabled by default, and it should be adopted
++ * to be compatible with Arm.
+ */
+- if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
++ if (pdev->revision >= 0x40) {
++ ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
++ } else if (!(pci_resource_flags(pdev, 2) & IORESOURCE_IO)) {
+ drm_info(dev, "platform has no IO space, trying MMIO\n");
+ ast->ioregs = ast->regs + AST_IO_MM_OFFSET;
+ }
+diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
+index 1e30eaeb0e1b3..08ed0d08d03b8 100644
+--- a/drivers/gpu/drm/ast/ast_mode.c
++++ b/drivers/gpu/drm/ast/ast_mode.c
+@@ -474,7 +474,10 @@ static void ast_set_color_reg(struct ast_private *ast,
+ static void ast_set_crtthd_reg(struct ast_private *ast)
+ {
+ /* Set Threshold */
+- if (ast->chip == AST2300 || ast->chip == AST2400 ||
++ if (ast->chip == AST2600) {
++ ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa7, 0xe0);
++ ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa6, 0xa0);
++ } else if (ast->chip == AST2300 || ast->chip == AST2400 ||
+ ast->chip == AST2500) {
+ ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa7, 0x78);
+ ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa6, 0x60);
+@@ -1121,7 +1124,10 @@ static void ast_crtc_reset(struct drm_crtc *crtc)
+ if (crtc->state)
+ crtc->funcs->atomic_destroy_state(crtc, crtc->state);
+
+- __drm_atomic_helper_crtc_reset(crtc, &ast_state->base);
++ if (ast_state)
++ __drm_atomic_helper_crtc_reset(crtc, &ast_state->base);
++ else
++ __drm_atomic_helper_crtc_reset(crtc, NULL);
+ }
+
+ static struct drm_crtc_state *
+diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
+index b5d92f652fd85..1dac7f987a61d 100644
+--- a/drivers/gpu/drm/ast/ast_post.c
++++ b/drivers/gpu/drm/ast/ast_post.c
+@@ -291,7 +291,7 @@ static void ast_init_dram_reg(struct drm_device *dev)
+ ;
+ } while (ast_read32(ast, 0x10100) != 0xa8);
+ } else {/* AST2100/1100 */
+- if (ast->chip == AST2100 || ast->chip == 2200)
++ if (ast->chip == AST2100 || ast->chip == AST2200)
+ dram_reg_info = ast2100_dram_table_data;
+ else
+ dram_reg_info = ast1100_dram_table_data;
+diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
+index d9eb353a4bf09..dbe1cc620f6e6 100644
+--- a/drivers/gpu/drm/ast/ast_tables.h
++++ b/drivers/gpu/drm/ast/ast_tables.h
+@@ -282,8 +282,6 @@ static const struct ast_vbios_enhtable res_1360x768[] = {
+ };
+
+ static const struct ast_vbios_enhtable res_1600x900[] = {
+- {1800, 1600, 24, 80, 1000, 900, 1, 3, VCLK108, /* 60Hz */
+- (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 3, 0x3A },
+ {1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75, /* 60Hz CVT RB */
+ (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+ AST2500PreCatchCRT), 60, 1, 0x3A },
+diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
+index 431b6e12a81fe..68ec45abc1fbf 100644
+--- a/drivers/gpu/drm/bridge/Kconfig
++++ b/drivers/gpu/drm/bridge/Kconfig
+@@ -8,7 +8,6 @@ config DRM_BRIDGE
+ config DRM_PANEL_BRIDGE
+ def_bool y
+ depends on DRM_BRIDGE
+- depends on DRM_KMS_HELPER
+ select DRM_PANEL
+ help
+ DRM bridge wrapper of DRM panels
+@@ -30,6 +29,7 @@ config DRM_CDNS_DSI
+ config DRM_CHIPONE_ICN6211
+ tristate "Chipone ICN6211 MIPI-DSI/RGB Converter bridge"
+ depends on OF
++ select DRM_KMS_HELPER
+ select DRM_MIPI_DSI
+ select DRM_PANEL_BRIDGE
+ help
+diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h
+index 05e3abb5a0c9a..fdd8e3d3232ec 100644
+--- a/drivers/gpu/drm/bridge/adv7511/adv7511.h
++++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h
+@@ -169,6 +169,7 @@
+ #define ADV7511_PACKET_ENABLE_SPARE2 BIT(1)
+ #define ADV7511_PACKET_ENABLE_SPARE1 BIT(0)
+
++#define ADV7535_REG_POWER2_HPD_OVERRIDE BIT(6)
+ #define ADV7511_REG_POWER2_HPD_SRC_MASK 0xc0
+ #define ADV7511_REG_POWER2_HPD_SRC_BOTH 0x00
+ #define ADV7511_REG_POWER2_HPD_SRC_HPD 0x40
+@@ -386,10 +387,7 @@ void adv7511_cec_irq_process(struct adv7511 *adv7511, unsigned int irq1);
+ #else
+ static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511)
+ {
+- unsigned int offset = adv7511->type == ADV7533 ?
+- ADV7533_REG_CEC_OFFSET : 0;
+-
+- regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset,
++ regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL,
+ ADV7511_CEC_CTRL_POWER_DOWN);
+ return 0;
+ }
+@@ -397,7 +395,8 @@ static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511)
+
+ void adv7533_dsi_power_on(struct adv7511 *adv);
+ void adv7533_dsi_power_off(struct adv7511 *adv);
+-void adv7533_mode_set(struct adv7511 *adv, const struct drm_display_mode *mode);
++enum drm_mode_status adv7533_mode_valid(struct adv7511 *adv,
++ const struct drm_display_mode *mode);
+ int adv7533_patch_registers(struct adv7511 *adv);
+ int adv7533_patch_cec_registers(struct adv7511 *adv);
+ int adv7533_attach_dsi(struct adv7511 *adv);
+diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c
+index a20a45c0b353f..ddd1305b82b2c 100644
+--- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c
++++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c
+@@ -316,7 +316,7 @@ int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511)
+ goto err_cec_alloc;
+ }
+
+- regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset, 0);
++ regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL, 0);
+ /* cec soft reset */
+ regmap_write(adv7511->regmap_cec,
+ ADV7511_REG_CEC_SOFT_RESET + offset, 0x01);
+@@ -343,7 +343,7 @@ err_cec_alloc:
+ dev_info(dev, "Initializing CEC failed with error %d, disabling CEC\n",
+ ret);
+ err_cec_parse_dt:
+- regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL + offset,
++ regmap_write(adv7511->regmap, ADV7511_REG_CEC_CTRL,
+ ADV7511_CEC_CTRL_POWER_DOWN);
+ return ret == -EPROBE_DEFER ? ret : 0;
+ }
+diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+index 76555ae64e9ce..ce40cd1ae1670 100644
+--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
++++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+@@ -351,11 +351,17 @@ static void __adv7511_power_on(struct adv7511 *adv7511)
+ * from standby or are enabled. When the HPD goes low the adv7511 is
+ * reset and the outputs are disabled which might cause the monitor to
+ * go to standby again. To avoid this we ignore the HPD pin for the
+- * first few seconds after enabling the output.
++ * first few seconds after enabling the output. On the other hand
++ * adv7535 require to enable HPD Override bit for proper HPD.
+ */
+- regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
+- ADV7511_REG_POWER2_HPD_SRC_MASK,
+- ADV7511_REG_POWER2_HPD_SRC_NONE);
++ if (adv7511->type == ADV7535)
++ regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
++ ADV7535_REG_POWER2_HPD_OVERRIDE,
++ ADV7535_REG_POWER2_HPD_OVERRIDE);
++ else
++ regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
++ ADV7511_REG_POWER2_HPD_SRC_MASK,
++ ADV7511_REG_POWER2_HPD_SRC_NONE);
+ }
+
+ static void adv7511_power_on(struct adv7511 *adv7511)
+@@ -375,6 +381,10 @@ static void adv7511_power_on(struct adv7511 *adv7511)
+ static void __adv7511_power_off(struct adv7511 *adv7511)
+ {
+ /* TODO: setup additional power down modes */
++ if (adv7511->type == ADV7535)
++ regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
++ ADV7535_REG_POWER2_HPD_OVERRIDE, 0);
++
+ regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER,
+ ADV7511_POWER_POWER_DOWN,
+ ADV7511_POWER_POWER_DOWN);
+@@ -672,9 +682,14 @@ adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector)
+ status = connector_status_disconnected;
+ } else {
+ /* Renable HPD sensing */
+- regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
+- ADV7511_REG_POWER2_HPD_SRC_MASK,
+- ADV7511_REG_POWER2_HPD_SRC_BOTH);
++ if (adv7511->type == ADV7535)
++ regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
++ ADV7535_REG_POWER2_HPD_OVERRIDE,
++ ADV7535_REG_POWER2_HPD_OVERRIDE);
++ else
++ regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2,
++ ADV7511_REG_POWER2_HPD_SRC_MASK,
++ ADV7511_REG_POWER2_HPD_SRC_BOTH);
+ }
+
+ adv7511->status = status;
+@@ -682,7 +697,7 @@ adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector)
+ }
+
+ static enum drm_mode_status adv7511_mode_valid(struct adv7511 *adv7511,
+- struct drm_display_mode *mode)
++ const struct drm_display_mode *mode)
+ {
+ if (mode->clock > 165000)
+ return MODE_CLOCK_HIGH;
+@@ -771,14 +786,16 @@ static void adv7511_mode_set(struct adv7511 *adv7511,
+ else
+ low_refresh_rate = ADV7511_LOW_REFRESH_RATE_NONE;
+
+- regmap_update_bits(adv7511->regmap, 0xfb,
+- 0x6, low_refresh_rate << 1);
++ if (adv7511->type == ADV7511)
++ regmap_update_bits(adv7511->regmap, 0xfb,
++ 0x6, low_refresh_rate << 1);
++ else
++ regmap_update_bits(adv7511->regmap, 0x4a,
++ 0xc, low_refresh_rate << 2);
++
+ regmap_update_bits(adv7511->regmap, 0x17,
+ 0x60, (vsync_polarity << 6) | (hsync_polarity << 5));
+
+- if (adv7511->type == ADV7533 || adv7511->type == ADV7535)
+- adv7533_mode_set(adv7511, adj_mode);
+-
+ drm_mode_copy(&adv7511->curr_mode, adj_mode);
+
+ /*
+@@ -898,6 +915,18 @@ static void adv7511_bridge_mode_set(struct drm_bridge *bridge,
+ adv7511_mode_set(adv, mode, adj_mode);
+ }
+
++static enum drm_mode_status adv7511_bridge_mode_valid(struct drm_bridge *bridge,
++ const struct drm_display_info *info,
++ const struct drm_display_mode *mode)
++{
++ struct adv7511 *adv = bridge_to_adv7511(bridge);
++
++ if (adv->type == ADV7533 || adv->type == ADV7535)
++ return adv7533_mode_valid(adv, mode);
++ else
++ return adv7511_mode_valid(adv, mode);
++}
++
+ static int adv7511_bridge_attach(struct drm_bridge *bridge,
+ enum drm_bridge_attach_flags flags)
+ {
+@@ -948,6 +977,7 @@ static const struct drm_bridge_funcs adv7511_bridge_funcs = {
+ .enable = adv7511_bridge_enable,
+ .disable = adv7511_bridge_disable,
+ .mode_set = adv7511_bridge_mode_set,
++ .mode_valid = adv7511_bridge_mode_valid,
+ .attach = adv7511_bridge_attach,
+ .detect = adv7511_bridge_detect,
+ .get_edid = adv7511_bridge_get_edid,
+@@ -1048,6 +1078,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv)
+ ADV7511_CEC_I2C_ADDR_DEFAULT);
+ if (IS_ERR(adv->i2c_cec))
+ return PTR_ERR(adv->i2c_cec);
++
++ regmap_write(adv->regmap, ADV7511_REG_CEC_I2C_ADDR,
++ adv->i2c_cec->addr << 1);
++
+ i2c_set_clientdata(adv->i2c_cec, adv);
+
+ adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
+@@ -1252,9 +1286,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+ if (ret)
+ goto err_i2c_unregister_packet;
+
+- regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR,
+- adv7511->i2c_cec->addr << 1);
+-
+ INIT_WORK(&adv7511->hpd_work, adv7511_hpd_work);
+
+ if (i2c->irq) {
+@@ -1291,6 +1322,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+ return 0;
+
+ err_unregister_cec:
++ cec_unregister_adapter(adv7511->cec_adap);
+ i2c_unregister_device(adv7511->i2c_cec);
+ clk_disable_unprepare(adv7511->cec_clk);
+ err_i2c_unregister_packet:
+@@ -1309,8 +1341,6 @@ static int adv7511_remove(struct i2c_client *i2c)
+
+ if (adv7511->type == ADV7533 || adv7511->type == ADV7535)
+ adv7533_detach_dsi(adv7511);
+- i2c_unregister_device(adv7511->i2c_cec);
+- clk_disable_unprepare(adv7511->cec_clk);
+
+ adv7511_uninit_regulators(adv7511);
+
+@@ -1319,6 +1349,8 @@ static int adv7511_remove(struct i2c_client *i2c)
+ adv7511_audio_exit(adv7511);
+
+ cec_unregister_adapter(adv7511->cec_adap);
++ i2c_unregister_device(adv7511->i2c_cec);
++ clk_disable_unprepare(adv7511->cec_clk);
+
+ i2c_unregister_device(adv7511->i2c_packet);
+ i2c_unregister_device(adv7511->i2c_edid);
+@@ -1362,10 +1394,21 @@ static struct i2c_driver adv7511_driver = {
+
+ static int __init adv7511_init(void)
+ {
+- if (IS_ENABLED(CONFIG_DRM_MIPI_DSI))
+- mipi_dsi_driver_register(&adv7533_dsi_driver);
++ int ret;
++
++ if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) {
++ ret = mipi_dsi_driver_register(&adv7533_dsi_driver);
++ if (ret)
++ return ret;
++ }
+
+- return i2c_add_driver(&adv7511_driver);
++ ret = i2c_add_driver(&adv7511_driver);
++ if (ret) {
++ if (IS_ENABLED(CONFIG_DRM_MIPI_DSI))
++ mipi_dsi_driver_unregister(&adv7533_dsi_driver);
++ }
++
++ return ret;
+ }
+ module_init(adv7511_init);
+
+diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c
+index 59d718bde8c41..babc0be0bbb56 100644
+--- a/drivers/gpu/drm/bridge/adv7511/adv7533.c
++++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c
+@@ -100,26 +100,24 @@ void adv7533_dsi_power_off(struct adv7511 *adv)
+ regmap_write(adv->regmap_cec, 0x27, 0x0b);
+ }
+
+-void adv7533_mode_set(struct adv7511 *adv, const struct drm_display_mode *mode)
++enum drm_mode_status adv7533_mode_valid(struct adv7511 *adv,
++ const struct drm_display_mode *mode)
+ {
++ unsigned long max_lane_freq;
+ struct mipi_dsi_device *dsi = adv->dsi;
+- int lanes, ret;
+-
+- if (adv->num_dsi_lanes != 4)
+- return;
+-
+- if (mode->clock > 80000)
+- lanes = 4;
+- else
+- lanes = 3;
+-
+- if (lanes != dsi->lanes) {
+- mipi_dsi_detach(dsi);
+- dsi->lanes = lanes;
+- ret = mipi_dsi_attach(dsi);
+- if (ret)
+- dev_err(&dsi->dev, "failed to change host lanes\n");
+- }
++ u8 bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
++
++ /* Check max clock for either 7533 or 7535 */
++ if (mode->clock > (adv->type == ADV7533 ? 80000 : 148500))
++ return MODE_CLOCK_HIGH;
++
++ /* Check max clock for each lane */
++ max_lane_freq = (adv->type == ADV7533 ? 800000 : 891000);
++
++ if (mode->clock * bpp > max_lane_freq * adv->num_dsi_lanes)
++ return MODE_CLOCK_HIGH;
++
++ return MODE_OK;
+ }
+
+ int adv7533_patch_registers(struct adv7511 *adv)
+diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+index b7d2e4449cfaa..f0305f833b6c0 100644
+--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
++++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+@@ -1268,6 +1268,25 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge,
+ return 0;
+ }
+
++static
++struct drm_crtc *analogix_dp_get_old_crtc(struct analogix_dp_device *dp,
++ struct drm_atomic_state *state)
++{
++ struct drm_encoder *encoder = dp->encoder;
++ struct drm_connector *connector;
++ struct drm_connector_state *conn_state;
++
++ connector = drm_atomic_get_old_connector_for_encoder(state, encoder);
++ if (!connector)
++ return NULL;
++
++ conn_state = drm_atomic_get_old_connector_state(state, connector);
++ if (!conn_state)
++ return NULL;
++
++ return conn_state->crtc;
++}
++
+ static
+ struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp,
+ struct drm_atomic_state *state)
+@@ -1448,14 +1467,16 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge,
+ {
+ struct drm_atomic_state *old_state = old_bridge_state->base.state;
+ struct analogix_dp_device *dp = bridge->driver_private;
+- struct drm_crtc *crtc;
++ struct drm_crtc *old_crtc, *new_crtc;
++ struct drm_crtc_state *old_crtc_state = NULL;
+ struct drm_crtc_state *new_crtc_state = NULL;
++ int ret;
+
+- crtc = analogix_dp_get_new_crtc(dp, old_state);
+- if (!crtc)
++ new_crtc = analogix_dp_get_new_crtc(dp, old_state);
++ if (!new_crtc)
+ goto out;
+
+- new_crtc_state = drm_atomic_get_new_crtc_state(old_state, crtc);
++ new_crtc_state = drm_atomic_get_new_crtc_state(old_state, new_crtc);
+ if (!new_crtc_state)
+ goto out;
+
+@@ -1464,6 +1485,19 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge,
+ return;
+
+ out:
++ old_crtc = analogix_dp_get_old_crtc(dp, old_state);
++ if (old_crtc) {
++ old_crtc_state = drm_atomic_get_old_crtc_state(old_state,
++ old_crtc);
++
++ /* When moving from PSR to fully disabled, exit PSR first. */
++ if (old_crtc_state && old_crtc_state->self_refresh_active) {
++ ret = analogix_dp_disable_psr(dp);
++ if (ret)
++ DRM_ERROR("Failed to disable psr (%d)\n", ret);
++ }
++ }
++
+ analogix_dp_bridge_disable(bridge);
+ }
+
+@@ -1632,8 +1666,19 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux,
+ struct drm_dp_aux_msg *msg)
+ {
+ struct analogix_dp_device *dp = to_dp(aux);
++ int ret;
++
++ pm_runtime_get_sync(dp->dev);
++
++ ret = analogix_dp_detect_hpd(dp);
++ if (ret)
++ goto out;
++
++ ret = analogix_dp_transfer(dp, msg);
++out:
++ pm_runtime_put(dp->dev);
+
+- return analogix_dp_transfer(dp, msg);
++ return ret;
+ }
+
+ struct analogix_dp_device *
+@@ -1698,8 +1743,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data)
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ dp->reg_base = devm_ioremap_resource(&pdev->dev, res);
+- if (IS_ERR(dp->reg_base))
+- return ERR_CAST(dp->reg_base);
++ if (IS_ERR(dp->reg_base)) {
++ ret = PTR_ERR(dp->reg_base);
++ goto err_disable_clk;
++ }
+
+ dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd");
+
+@@ -1711,7 +1758,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data)
+ if (IS_ERR(dp->hpd_gpiod)) {
+ dev_err(dev, "error getting HDP GPIO: %ld\n",
+ PTR_ERR(dp->hpd_gpiod));
+- return ERR_CAST(dp->hpd_gpiod);
++ ret = PTR_ERR(dp->hpd_gpiod);
++ goto err_disable_clk;
+ }
+
+ if (dp->hpd_gpiod) {
+@@ -1731,7 +1779,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data)
+
+ if (dp->irq == -ENXIO) {
+ dev_err(&pdev->dev, "failed to get irq\n");
+- return ERR_PTR(-ENODEV);
++ ret = -ENODEV;
++ goto err_disable_clk;
+ }
+
+ ret = devm_request_threaded_irq(&pdev->dev, dp->irq,
+@@ -1740,11 +1789,15 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data)
+ irq_flags, "analogix-dp", dp);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request irq\n");
+- return ERR_PTR(ret);
++ goto err_disable_clk;
+ }
+ disable_irq(dp->irq);
+
+ return dp;
++
++err_disable_clk:
++ clk_disable_unprepare(dp->clock);
++ return ERR_PTR(ret);
+ }
+ EXPORT_SYMBOL_GPL(analogix_dp_probe);
+
+@@ -1807,12 +1860,6 @@ EXPORT_SYMBOL_GPL(analogix_dp_remove);
+ int analogix_dp_suspend(struct analogix_dp_device *dp)
+ {
+ clk_disable_unprepare(dp->clock);
+-
+- if (dp->plat_data->panel) {
+- if (drm_panel_unprepare(dp->plat_data->panel))
+- DRM_ERROR("failed to turnoff the panel\n");
+- }
+-
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(analogix_dp_suspend);
+@@ -1827,13 +1874,6 @@ int analogix_dp_resume(struct analogix_dp_device *dp)
+ return ret;
+ }
+
+- if (dp->plat_data->panel) {
+- if (drm_panel_prepare(dp->plat_data->panel)) {
+- DRM_ERROR("failed to setup the panel\n");
+- return -EBUSY;
+- }
+- }
+-
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(analogix_dp_resume);
+diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
+index cab6c8b92efd4..6a4f20fccf841 100644
+--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
++++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
+@@ -998,11 +998,21 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp,
+ if (!blocking)
+ return 0;
+
++ /*
++ * db[1]!=0: entering PSR, wait for fully active remote frame buffer.
++ * db[1]==0: exiting PSR, wait for either
++ * (a) ACTIVE_RESYNC - the sink "must display the
++ * incoming active frames from the Source device with no visible
++ * glitches and/or artifacts", even though timings may still be
++ * re-synchronizing; or
++ * (b) INACTIVE - the transition is fully complete.
++ */
+ ret = readx_poll_timeout(analogix_dp_get_psr_status, dp, psr_status,
+ psr_status >= 0 &&
+ ((vsc->db[1] && psr_status == DP_PSR_SINK_ACTIVE_RFB) ||
+- (!vsc->db[1] && psr_status == DP_PSR_SINK_INACTIVE)), 1500,
+- DP_TIMEOUT_PSR_LOOP_MS * 1000);
++ (!vsc->db[1] && (psr_status == DP_PSR_SINK_ACTIVE_RESYNC ||
++ psr_status == DP_PSR_SINK_INACTIVE))),
++ 1500, DP_TIMEOUT_PSR_LOOP_MS * 1000);
+ if (ret) {
+ dev_warn(dp->dev, "Failed to apply PSR %d\n", ret);
+ return ret;
+diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c
+index 14d73fb1dd15b..f895ef1939fa0 100644
+--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
++++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
+@@ -720,7 +720,7 @@ static int edid_read(struct anx7625_data *ctx,
+ ret = sp_tx_aux_rd(ctx, 0xf1);
+
+ if (ret) {
+- sp_tx_rst_aux(ctx);
++ ret = sp_tx_rst_aux(ctx);
+ DRM_DEV_DEBUG_DRIVER(dev, "edid read fail, reset!\n");
+ } else {
+ ret = anx7625_reg_block_read(ctx, ctx->i2c.rx_p0_client,
+@@ -735,7 +735,7 @@ static int edid_read(struct anx7625_data *ctx,
+ if (cnt > EDID_TRY_CNT)
+ return -EIO;
+
+- return 0;
++ return ret;
+ }
+
+ static int segments_edid_read(struct anx7625_data *ctx,
+@@ -785,17 +785,18 @@ static int segments_edid_read(struct anx7625_data *ctx,
+ if (cnt > EDID_TRY_CNT)
+ return -EIO;
+
+- return 0;
++ return ret;
+ }
+
+ static int sp_tx_edid_read(struct anx7625_data *ctx,
+ u8 *pedid_blocks_buf)
+ {
+- u8 offset, edid_pos;
++ u8 offset;
++ int edid_pos;
+ int count, blocks_num;
+ u8 pblock_buf[MAX_DPCD_BUFFER_SIZE];
+ u8 i, j;
+- u8 g_edid_break = 0;
++ int g_edid_break = 0;
+ int ret;
+ struct device *dev = &ctx->client->dev;
+
+@@ -826,7 +827,7 @@ static int sp_tx_edid_read(struct anx7625_data *ctx,
+ g_edid_break = edid_read(ctx, offset,
+ pblock_buf);
+
+- if (g_edid_break)
++ if (g_edid_break < 0)
+ break;
+
+ memcpy(&pedid_blocks_buf[offset],
+@@ -887,7 +888,11 @@ static int sp_tx_edid_read(struct anx7625_data *ctx,
+ }
+
+ /* Reset aux channel */
+- sp_tx_rst_aux(ctx);
++ ret = sp_tx_rst_aux(ctx);
++ if (ret < 0) {
++ DRM_DEV_ERROR(dev, "Failed to reset aux channel!\n");
++ return ret;
++ }
+
+ return (blocks_num + 1);
+ }
+diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c
+index e6e331071a00d..dd57b104aec3a 100644
+--- a/drivers/gpu/drm/bridge/cdns-dsi.c
++++ b/drivers/gpu/drm/bridge/cdns-dsi.c
+@@ -1286,6 +1286,7 @@ static const struct of_device_id cdns_dsi_of_match[] = {
+ { .compatible = "cdns,dsi" },
+ { },
+ };
++MODULE_DEVICE_TABLE(of, cdns_dsi_of_match);
+
+ static struct platform_driver cdns_dsi_platform_driver = {
+ .probe = cdns_dsi_drm_probe,
+diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c
+index a6151db955868..d7eedf35e8415 100644
+--- a/drivers/gpu/drm/bridge/chipone-icn6211.c
++++ b/drivers/gpu/drm/bridge/chipone-icn6211.c
+@@ -14,8 +14,19 @@
+ #include <linux/of_device.h>
+ #include <linux/regulator/consumer.h>
+
+-#include <video/mipi_display.h>
+-
++#define VENDOR_ID 0x00
++#define DEVICE_ID_H 0x01
++#define DEVICE_ID_L 0x02
++#define VERSION_ID 0x03
++#define FIRMWARE_VERSION 0x08
++#define CONFIG_FINISH 0x09
++#define PD_CTRL(n) (0x0a + ((n) & 0x3)) /* 0..3 */
++#define RST_CTRL(n) (0x0e + ((n) & 0x1)) /* 0..1 */
++#define SYS_CTRL(n) (0x10 + ((n) & 0x7)) /* 0..4 */
++#define RGB_DRV(n) (0x18 + ((n) & 0x3)) /* 0..3 */
++#define RGB_DLY(n) (0x1c + ((n) & 0x1)) /* 0..1 */
++#define RGB_TEST_CTRL 0x1e
++#define ATE_PLL_EN 0x1f
+ #define HACTIVE_LI 0x20
+ #define VACTIVE_LI 0x21
+ #define VACTIVE_HACTIVE_HI 0x22
+@@ -23,9 +34,101 @@
+ #define HSYNC_LI 0x24
+ #define HBP_LI 0x25
+ #define HFP_HSW_HBP_HI 0x26
++#define HFP_HSW_HBP_HI_HFP(n) (((n) & 0x300) >> 4)
++#define HFP_HSW_HBP_HI_HS(n) (((n) & 0x300) >> 6)
++#define HFP_HSW_HBP_HI_HBP(n) (((n) & 0x300) >> 8)
+ #define VFP 0x27
+ #define VSYNC 0x28
+ #define VBP 0x29
++#define BIST_POL 0x2a
++#define BIST_POL_BIST_MODE(n) (((n) & 0xf) << 4)
++#define BIST_POL_BIST_GEN BIT(3)
++#define BIST_POL_HSYNC_POL BIT(2)
++#define BIST_POL_VSYNC_POL BIT(1)
++#define BIST_POL_DE_POL BIT(0)
++#define BIST_RED 0x2b
++#define BIST_GREEN 0x2c
++#define BIST_BLUE 0x2d
++#define BIST_CHESS_X 0x2e
++#define BIST_CHESS_Y 0x2f
++#define BIST_CHESS_XY_H 0x30
++#define BIST_FRAME_TIME_L 0x31
++#define BIST_FRAME_TIME_H 0x32
++#define FIFO_MAX_ADDR_LOW 0x33
++#define SYNC_EVENT_DLY 0x34
++#define HSW_MIN 0x35
++#define HFP_MIN 0x36
++#define LOGIC_RST_NUM 0x37
++#define OSC_CTRL(n) (0x48 + ((n) & 0x7)) /* 0..5 */
++#define BG_CTRL 0x4e
++#define LDO_PLL 0x4f
++#define PLL_CTRL(n) (0x50 + ((n) & 0xf)) /* 0..15 */
++#define PLL_CTRL_6_EXTERNAL 0x90
++#define PLL_CTRL_6_MIPI_CLK 0x92
++#define PLL_CTRL_6_INTERNAL 0x93
++#define PLL_REM(n) (0x60 + ((n) & 0x3)) /* 0..2 */
++#define PLL_DIV(n) (0x63 + ((n) & 0x3)) /* 0..2 */
++#define PLL_FRAC(n) (0x66 + ((n) & 0x3)) /* 0..2 */
++#define PLL_INT(n) (0x69 + ((n) & 0x1)) /* 0..1 */
++#define PLL_REF_DIV 0x6b
++#define PLL_REF_DIV_P(n) ((n) & 0xf)
++#define PLL_REF_DIV_Pe BIT(4)
++#define PLL_REF_DIV_S(n) (((n) & 0x7) << 5)
++#define PLL_SSC_P(n) (0x6c + ((n) & 0x3)) /* 0..2 */
++#define PLL_SSC_STEP(n) (0x6f + ((n) & 0x3)) /* 0..2 */
++#define PLL_SSC_OFFSET(n) (0x72 + ((n) & 0x3)) /* 0..3 */
++#define GPIO_OEN 0x79
++#define MIPI_CFG_PW 0x7a
++#define MIPI_CFG_PW_CONFIG_DSI 0xc1
++#define MIPI_CFG_PW_CONFIG_I2C 0x3e
++#define GPIO_SEL(n) (0x7b + ((n) & 0x1)) /* 0..1 */
++#define IRQ_SEL 0x7d
++#define DBG_SEL 0x7e
++#define DBG_SIGNAL 0x7f
++#define MIPI_ERR_VECTOR_L 0x80
++#define MIPI_ERR_VECTOR_H 0x81
++#define MIPI_ERR_VECTOR_EN_L 0x82
++#define MIPI_ERR_VECTOR_EN_H 0x83
++#define MIPI_MAX_SIZE_L 0x84
++#define MIPI_MAX_SIZE_H 0x85
++#define DSI_CTRL 0x86
++#define DSI_CTRL_UNKNOWN 0x28
++#define DSI_CTRL_DSI_LANES(n) ((n) & 0x3)
++#define MIPI_PN_SWAP 0x87
++#define MIPI_PN_SWAP_CLK BIT(4)
++#define MIPI_PN_SWAP_D(n) BIT((n) & 0x3)
++#define MIPI_SOT_SYNC_BIT_(n) (0x88 + ((n) & 0x1)) /* 0..1 */
++#define MIPI_ULPS_CTRL 0x8a
++#define MIPI_CLK_CHK_VAR 0x8e
++#define MIPI_CLK_CHK_INI 0x8f
++#define MIPI_T_TERM_EN 0x90
++#define MIPI_T_HS_SETTLE 0x91
++#define MIPI_T_TA_SURE_PRE 0x92
++#define MIPI_T_LPX_SET 0x94
++#define MIPI_T_CLK_MISS 0x95
++#define MIPI_INIT_TIME_L 0x96
++#define MIPI_INIT_TIME_H 0x97
++#define MIPI_T_CLK_TERM_EN 0x99
++#define MIPI_T_CLK_SETTLE 0x9a
++#define MIPI_TO_HS_RX_L 0x9e
++#define MIPI_TO_HS_RX_H 0x9f
++#define MIPI_PHY_(n) (0xa0 + ((n) & 0x7)) /* 0..5 */
++#define MIPI_PD_RX 0xb0
++#define MIPI_PD_TERM 0xb1
++#define MIPI_PD_HSRX 0xb2
++#define MIPI_PD_LPTX 0xb3
++#define MIPI_PD_LPRX 0xb4
++#define MIPI_PD_CK_LANE 0xb5
++#define MIPI_FORCE_0 0xb6
++#define MIPI_RST_CTRL 0xb7
++#define MIPI_RST_NUM 0xb8
++#define MIPI_DBG_SET_(n) (0xc0 + ((n) & 0xf)) /* 0..9 */
++#define MIPI_DBG_SEL 0xe0
++#define MIPI_DBG_DATA 0xe1
++#define MIPI_ATE_TEST_SEL 0xe2
++#define MIPI_ATE_STATUS_(n) (0xe3 + ((n) & 0x1)) /* 0..1 */
++#define MIPI_ATE_STATUS_1 0xe4
++#define ICN6211_MAX_REGISTER MIPI_ATE_STATUS(1)
+
+ struct chipone {
+ struct device *dev;
+@@ -65,14 +168,15 @@ static void chipone_enable(struct drm_bridge *bridge)
+ {
+ struct chipone *icn = bridge_to_chipone(bridge);
+ struct drm_display_mode *mode = bridge_to_mode(bridge);
++ u16 hfp, hbp, hsync;
+
+- ICN6211_DSI(icn, 0x7a, 0xc1);
++ ICN6211_DSI(icn, MIPI_CFG_PW, MIPI_CFG_PW_CONFIG_DSI);
+
+ ICN6211_DSI(icn, HACTIVE_LI, mode->hdisplay & 0xff);
+
+ ICN6211_DSI(icn, VACTIVE_LI, mode->vdisplay & 0xff);
+
+- /**
++ /*
+ * lsb nibble: 2nd nibble of hdisplay
+ * msb nibble: 2nd nibble of vdisplay
+ */
+@@ -80,13 +184,18 @@ static void chipone_enable(struct drm_bridge *bridge)
+ ((mode->hdisplay >> 8) & 0xf) |
+ (((mode->vdisplay >> 8) & 0xf) << 4));
+
+- ICN6211_DSI(icn, HFP_LI, mode->hsync_start - mode->hdisplay);
+-
+- ICN6211_DSI(icn, HSYNC_LI, mode->hsync_end - mode->hsync_start);
+-
+- ICN6211_DSI(icn, HBP_LI, mode->htotal - mode->hsync_end);
++ hfp = mode->hsync_start - mode->hdisplay;
++ hsync = mode->hsync_end - mode->hsync_start;
++ hbp = mode->htotal - mode->hsync_end;
+
+- ICN6211_DSI(icn, HFP_HSW_HBP_HI, 0x00);
++ ICN6211_DSI(icn, HFP_LI, hfp & 0xff);
++ ICN6211_DSI(icn, HSYNC_LI, hsync & 0xff);
++ ICN6211_DSI(icn, HBP_LI, hbp & 0xff);
++ /* Top two bits of Horizontal Front porch/Sync/Back porch */
++ ICN6211_DSI(icn, HFP_HSW_HBP_HI,
++ HFP_HSW_HBP_HI_HFP(hfp) |
++ HFP_HSW_HBP_HI_HS(hsync) |
++ HFP_HSW_HBP_HI_HBP(hbp));
+
+ ICN6211_DSI(icn, VFP, mode->vsync_start - mode->vdisplay);
+
+@@ -95,21 +204,21 @@ static void chipone_enable(struct drm_bridge *bridge)
+ ICN6211_DSI(icn, VBP, mode->vtotal - mode->vsync_end);
+
+ /* dsi specific sequence */
+- ICN6211_DSI(icn, MIPI_DCS_SET_TEAR_OFF, 0x80);
+- ICN6211_DSI(icn, MIPI_DCS_SET_ADDRESS_MODE, 0x28);
+- ICN6211_DSI(icn, 0xb5, 0xa0);
+- ICN6211_DSI(icn, 0x5c, 0xff);
+- ICN6211_DSI(icn, MIPI_DCS_SET_COLUMN_ADDRESS, 0x01);
+- ICN6211_DSI(icn, MIPI_DCS_GET_POWER_SAVE, 0x92);
+- ICN6211_DSI(icn, 0x6b, 0x71);
+- ICN6211_DSI(icn, 0x69, 0x2b);
+- ICN6211_DSI(icn, MIPI_DCS_ENTER_SLEEP_MODE, 0x40);
+- ICN6211_DSI(icn, MIPI_DCS_EXIT_SLEEP_MODE, 0x98);
++ ICN6211_DSI(icn, SYNC_EVENT_DLY, 0x80);
++ ICN6211_DSI(icn, HFP_MIN, hfp & 0xff);
++ ICN6211_DSI(icn, MIPI_PD_CK_LANE, 0xa0);
++ ICN6211_DSI(icn, PLL_CTRL(12), 0xff);
++ ICN6211_DSI(icn, BIST_POL, BIST_POL_DE_POL);
++ ICN6211_DSI(icn, PLL_CTRL(6), PLL_CTRL_6_MIPI_CLK);
++ ICN6211_DSI(icn, PLL_REF_DIV, 0x71);
++ ICN6211_DSI(icn, PLL_INT(0), 0x2b);
++ ICN6211_DSI(icn, SYS_CTRL(0), 0x40);
++ ICN6211_DSI(icn, SYS_CTRL(1), 0x98);
+
+ /* icn6211 specific sequence */
+- ICN6211_DSI(icn, 0xb6, 0x20);
+- ICN6211_DSI(icn, 0x51, 0x20);
+- ICN6211_DSI(icn, 0x09, 0x10);
++ ICN6211_DSI(icn, MIPI_FORCE_0, 0x20);
++ ICN6211_DSI(icn, PLL_CTRL(1), 0x20);
++ ICN6211_DSI(icn, CONFIG_FINISH, 0x10);
+
+ usleep_range(10000, 11000);
+ }
+diff --git a/drivers/gpu/drm/bridge/display-connector.c b/drivers/gpu/drm/bridge/display-connector.c
+index 05eb759da6fc6..d24f5b90feabf 100644
+--- a/drivers/gpu/drm/bridge/display-connector.c
++++ b/drivers/gpu/drm/bridge/display-connector.c
+@@ -13,6 +13,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/regulator/consumer.h>
+
++#include <drm/drm_atomic_helper.h>
+ #include <drm/drm_bridge.h>
+ #include <drm/drm_edid.h>
+
+@@ -87,10 +88,95 @@ static struct edid *display_connector_get_edid(struct drm_bridge *bridge,
+ return drm_get_edid(connector, conn->bridge.ddc);
+ }
+
++/*
++ * Since this bridge is tied to the connector, it acts like a passthrough,
++ * so concerning the output bus formats, either pass the bus formats from the
++ * previous bridge or return fallback data like done in the bridge function:
++ * drm_atomic_bridge_chain_select_bus_fmts().
++ * This supports negotiation if the bridge chain has all bits in place.
++ */
++static u32 *display_connector_get_output_bus_fmts(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state,
++ struct drm_crtc_state *crtc_state,
++ struct drm_connector_state *conn_state,
++ unsigned int *num_output_fmts)
++{
++ struct drm_bridge *prev_bridge = drm_bridge_get_prev_bridge(bridge);
++ struct drm_bridge_state *prev_bridge_state;
++
++ if (!prev_bridge || !prev_bridge->funcs->atomic_get_output_bus_fmts) {
++ struct drm_connector *conn = conn_state->connector;
++ u32 *out_bus_fmts;
++
++ *num_output_fmts = 1;
++ out_bus_fmts = kmalloc(sizeof(*out_bus_fmts), GFP_KERNEL);
++ if (!out_bus_fmts)
++ return NULL;
++
++ if (conn->display_info.num_bus_formats &&
++ conn->display_info.bus_formats)
++ out_bus_fmts[0] = conn->display_info.bus_formats[0];
++ else
++ out_bus_fmts[0] = MEDIA_BUS_FMT_FIXED;
++
++ return out_bus_fmts;
++ }
++
++ prev_bridge_state = drm_atomic_get_new_bridge_state(crtc_state->state,
++ prev_bridge);
++
++ return prev_bridge->funcs->atomic_get_output_bus_fmts(prev_bridge, prev_bridge_state,
++ crtc_state, conn_state,
++ num_output_fmts);
++}
++
++/*
++ * Since this bridge is tied to the connector, it acts like a passthrough,
++ * so concerning the input bus formats, either pass the bus formats from the
++ * previous bridge or MEDIA_BUS_FMT_FIXED (like select_bus_fmt_recursive())
++ * when atomic_get_input_bus_fmts is not supported.
++ * This supports negotiation if the bridge chain has all bits in place.
++ */
++static u32 *display_connector_get_input_bus_fmts(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state,
++ struct drm_crtc_state *crtc_state,
++ struct drm_connector_state *conn_state,
++ u32 output_fmt,
++ unsigned int *num_input_fmts)
++{
++ struct drm_bridge *prev_bridge = drm_bridge_get_prev_bridge(bridge);
++ struct drm_bridge_state *prev_bridge_state;
++
++ if (!prev_bridge || !prev_bridge->funcs->atomic_get_input_bus_fmts) {
++ u32 *in_bus_fmts;
++
++ *num_input_fmts = 1;
++ in_bus_fmts = kmalloc(sizeof(*in_bus_fmts), GFP_KERNEL);
++ if (!in_bus_fmts)
++ return NULL;
++
++ in_bus_fmts[0] = MEDIA_BUS_FMT_FIXED;
++
++ return in_bus_fmts;
++ }
++
++ prev_bridge_state = drm_atomic_get_new_bridge_state(crtc_state->state,
++ prev_bridge);
++
++ return prev_bridge->funcs->atomic_get_input_bus_fmts(prev_bridge, prev_bridge_state,
++ crtc_state, conn_state, output_fmt,
++ num_input_fmts);
++}
++
+ static const struct drm_bridge_funcs display_connector_bridge_funcs = {
+ .attach = display_connector_attach,
+ .detect = display_connector_detect,
+ .get_edid = display_connector_get_edid,
++ .atomic_get_output_bus_fmts = display_connector_get_output_bus_fmts,
++ .atomic_get_input_bus_fmts = display_connector_get_input_bus_fmts,
++ .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
++ .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
++ .atomic_reset = drm_atomic_helper_bridge_reset,
+ };
+
+ static irqreturn_t display_connector_hpd_irq(int irq, void *arg)
+@@ -107,7 +193,7 @@ static int display_connector_probe(struct platform_device *pdev)
+ {
+ struct display_connector *conn;
+ unsigned int type;
+- const char *label;
++ const char *label = NULL;
+ int ret;
+
+ conn = devm_kzalloc(&pdev->dev, sizeof(*conn), GFP_KERNEL);
+diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c
+index 2f2a09adb4bc8..64912b770086f 100644
+--- a/drivers/gpu/drm/bridge/ite-it66121.c
++++ b/drivers/gpu/drm/bridge/ite-it66121.c
+@@ -227,7 +227,7 @@ static const struct regmap_range_cfg it66121_regmap_banks[] = {
+ .selector_mask = 0x1,
+ .selector_shift = 0,
+ .window_start = 0x00,
+- .window_len = 0x130,
++ .window_len = 0x100,
+ },
+ };
+
+@@ -889,7 +889,7 @@ unlock:
+ static int it66121_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+ {
+- u32 vendor_ids[2], device_ids[2], revision_id;
++ u32 revision_id, vendor_ids[2] = { 0 }, device_ids[2] = { 0 };
+ struct device_node *ep;
+ int ret;
+ struct it66121_ctx *ctx;
+@@ -918,11 +918,26 @@ static int it66121_probe(struct i2c_client *client,
+ return -EINVAL;
+
+ ep = of_graph_get_remote_node(dev->of_node, 1, -1);
+- if (!ep)
+- return -EPROBE_DEFER;
++ if (!ep) {
++ dev_err(ctx->dev, "The endpoint is unconnected\n");
++ return -EINVAL;
++ }
++
++ if (!of_device_is_available(ep)) {
++ of_node_put(ep);
++ dev_err(ctx->dev, "The remote device is disabled\n");
++ return -ENODEV;
++ }
+
+ ctx->next_bridge = of_drm_find_bridge(ep);
+ of_node_put(ep);
++ if (!ctx->next_bridge) {
++ dev_dbg(ctx->dev, "Next bridge not found, deferring probe\n");
++ return -EPROBE_DEFER;
++ }
++
++ if (!ctx->next_bridge)
++ return -EPROBE_DEFER;
+
+ i2c_set_clientdata(client, ctx);
+ mutex_init(&ctx->lock);
+diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c
+index 1b0c7eaf6c849..d3fd76a0a34ae 100644
+--- a/drivers/gpu/drm/bridge/lontium-lt8912b.c
++++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c
+@@ -186,7 +186,7 @@ static int lt8912_write_lvds_config(struct lt8912 *lt)
+ {0x03, 0xff},
+ };
+
+- return regmap_multi_reg_write(lt->regmap[I2C_CEC_DSI], seq, ARRAY_SIZE(seq));
++ return regmap_multi_reg_write(lt->regmap[I2C_MAIN], seq, ARRAY_SIZE(seq));
+ };
+
+ static inline struct lt8912 *bridge_to_lt8912(struct drm_bridge *b)
+@@ -266,7 +266,7 @@ static int lt8912_video_setup(struct lt8912 *lt)
+ u32 hactive, h_total, hpw, hfp, hbp;
+ u32 vactive, v_total, vpw, vfp, vbp;
+ u8 settle = 0x08;
+- int ret;
++ int ret, hsync_activehigh, vsync_activehigh;
+
+ if (!lt)
+ return -EINVAL;
+@@ -276,12 +276,14 @@ static int lt8912_video_setup(struct lt8912 *lt)
+ hpw = lt->mode.hsync_len;
+ hbp = lt->mode.hback_porch;
+ h_total = hactive + hfp + hpw + hbp;
++ hsync_activehigh = lt->mode.flags & DISPLAY_FLAGS_HSYNC_HIGH;
+
+ vactive = lt->mode.vactive;
+ vfp = lt->mode.vfront_porch;
+ vpw = lt->mode.vsync_len;
+ vbp = lt->mode.vback_porch;
+ v_total = vactive + vfp + vpw + vbp;
++ vsync_activehigh = lt->mode.flags & DISPLAY_FLAGS_VSYNC_HIGH;
+
+ if (vactive <= 600)
+ settle = 0x04;
+@@ -315,6 +317,13 @@ static int lt8912_video_setup(struct lt8912 *lt)
+ ret |= regmap_write(lt->regmap[I2C_CEC_DSI], 0x3e, hfp & 0xff);
+ ret |= regmap_write(lt->regmap[I2C_CEC_DSI], 0x3f, hfp >> 8);
+
++ ret |= regmap_update_bits(lt->regmap[I2C_MAIN], 0xab, BIT(0),
++ vsync_activehigh ? BIT(0) : 0);
++ ret |= regmap_update_bits(lt->regmap[I2C_MAIN], 0xab, BIT(1),
++ hsync_activehigh ? BIT(1) : 0);
++ ret |= regmap_update_bits(lt->regmap[I2C_MAIN], 0xb2, BIT(0),
++ lt->connector.display_info.is_hdmi ? BIT(0) : 0);
++
+ return ret;
+ }
+
+@@ -485,7 +494,6 @@ static int lt8912_attach_dsi(struct lt8912 *lt)
+ dsi->format = MIPI_DSI_FMT_RGB888;
+
+ dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
+- MIPI_DSI_MODE_VIDEO_BURST |
+ MIPI_DSI_MODE_LPM |
+ MIPI_DSI_MODE_NO_EOT_PACKET;
+
+@@ -661,8 +669,8 @@ static int lt8912_parse_dt(struct lt8912 *lt)
+
+ lt->hdmi_port = of_drm_find_bridge(port_node);
+ if (!lt->hdmi_port) {
+- dev_err(lt->dev, "%s: Failed to get hdmi port\n", __func__);
+- ret = -ENODEV;
++ ret = -EPROBE_DEFER;
++ dev_err_probe(lt->dev, ret, "%s: Failed to get hdmi port\n", __func__);
+ goto err_free_host_node;
+ }
+
+diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c
+index 29b1ce2140abc..660e05fa4a704 100644
+--- a/drivers/gpu/drm/bridge/lontium-lt9611.c
++++ b/drivers/gpu/drm/bridge/lontium-lt9611.c
+@@ -185,12 +185,14 @@ static void lt9611_mipi_video_setup(struct lt9611 *lt9611,
+
+ regmap_write(lt9611->regmap, 0x8319, (u8)(hfront_porch % 256));
+
+- regmap_write(lt9611->regmap, 0x831a, (u8)(hsync_porch / 256));
++ regmap_write(lt9611->regmap, 0x831a, (u8)(hsync_porch / 256) |
++ ((hfront_porch / 256) << 4));
+ regmap_write(lt9611->regmap, 0x831b, (u8)(hsync_porch % 256));
+ }
+
+-static void lt9611_pcr_setup(struct lt9611 *lt9611, const struct drm_display_mode *mode)
++static void lt9611_pcr_setup(struct lt9611 *lt9611, const struct drm_display_mode *mode, unsigned int postdiv)
+ {
++ unsigned int pcr_m = mode->clock * 5 * postdiv / 27000;
+ const struct reg_sequence reg_cfg[] = {
+ { 0x830b, 0x01 },
+ { 0x830c, 0x10 },
+@@ -205,7 +207,6 @@ static void lt9611_pcr_setup(struct lt9611 *lt9611, const struct drm_display_mod
+
+ /* stage 2 */
+ { 0x834a, 0x40 },
+- { 0x831d, 0x10 },
+
+ /* MK limit */
+ { 0x832d, 0x38 },
+@@ -220,30 +221,28 @@ static void lt9611_pcr_setup(struct lt9611 *lt9611, const struct drm_display_mod
+ { 0x8325, 0x00 },
+ { 0x832a, 0x01 },
+ { 0x834a, 0x10 },
+- { 0x831d, 0x10 },
+- { 0x8326, 0x37 },
+ };
++ u8 pol = 0x10;
+
+- regmap_multi_reg_write(lt9611->regmap, reg_cfg, ARRAY_SIZE(reg_cfg));
++ if (mode->flags & DRM_MODE_FLAG_NHSYNC)
++ pol |= 0x2;
++ if (mode->flags & DRM_MODE_FLAG_NVSYNC)
++ pol |= 0x1;
++ regmap_write(lt9611->regmap, 0x831d, pol);
+
+- switch (mode->hdisplay) {
+- case 640:
+- regmap_write(lt9611->regmap, 0x8326, 0x14);
+- break;
+- case 1920:
+- regmap_write(lt9611->regmap, 0x8326, 0x37);
+- break;
+- case 3840:
++ if (mode->hdisplay == 3840)
+ regmap_multi_reg_write(lt9611->regmap, reg_cfg2, ARRAY_SIZE(reg_cfg2));
+- break;
+- }
++ else
++ regmap_multi_reg_write(lt9611->regmap, reg_cfg, ARRAY_SIZE(reg_cfg));
++
++ regmap_write(lt9611->regmap, 0x8326, pcr_m);
+
+ /* pcr rst */
+ regmap_write(lt9611->regmap, 0x8011, 0x5a);
+ regmap_write(lt9611->regmap, 0x8011, 0xfa);
+ }
+
+-static int lt9611_pll_setup(struct lt9611 *lt9611, const struct drm_display_mode *mode)
++static int lt9611_pll_setup(struct lt9611 *lt9611, const struct drm_display_mode *mode, unsigned int *postdiv)
+ {
+ unsigned int pclk = mode->clock;
+ const struct reg_sequence reg_cfg[] = {
+@@ -257,16 +256,21 @@ static int lt9611_pll_setup(struct lt9611 *lt9611, const struct drm_display_mode
+ { 0x8126, 0x55 },
+ { 0x8127, 0x66 },
+ { 0x8128, 0x88 },
++ { 0x812a, 0x20 },
+ };
+
+ regmap_multi_reg_write(lt9611->regmap, reg_cfg, ARRAY_SIZE(reg_cfg));
+
+- if (pclk > 150000)
++ if (pclk > 150000) {
+ regmap_write(lt9611->regmap, 0x812d, 0x88);
+- else if (pclk > 70000)
++ *postdiv = 1;
++ } else if (pclk > 70000) {
+ regmap_write(lt9611->regmap, 0x812d, 0x99);
+- else
++ *postdiv = 2;
++ } else {
+ regmap_write(lt9611->regmap, 0x812d, 0xaa);
++ *postdiv = 4;
++ }
+
+ /*
+ * first divide pclk by 2 first
+@@ -446,12 +450,11 @@ static void lt9611_sleep_setup(struct lt9611 *lt9611)
+ { 0x8023, 0x01 },
+ { 0x8157, 0x03 }, /* set addr pin as output */
+ { 0x8149, 0x0b },
+- { 0x8151, 0x30 }, /* disable IRQ */
++
+ { 0x8102, 0x48 }, /* MIPI Rx power down */
+ { 0x8123, 0x80 },
+ { 0x8130, 0x00 },
+- { 0x8100, 0x01 }, /* bandgap power down */
+- { 0x8101, 0x00 }, /* system clk power down */
++ { 0x8011, 0x0a },
+ };
+
+ regmap_multi_reg_write(lt9611->regmap,
+@@ -757,7 +760,7 @@ static const struct drm_connector_funcs lt9611_bridge_connector_funcs = {
+ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611,
+ struct device_node *dsi_node)
+ {
+- const struct mipi_dsi_device_info info = { "lt9611", 0, NULL };
++ const struct mipi_dsi_device_info info = { "lt9611", 0, lt9611->dev->of_node};
+ struct mipi_dsi_device *dsi;
+ struct mipi_dsi_host *host;
+ int ret;
+@@ -816,13 +819,14 @@ static int lt9611_connector_init(struct drm_bridge *bridge, struct lt9611 *lt961
+
+ drm_connector_helper_add(&lt9611->connector,
+ &lt9611_bridge_connector_helper_funcs);
+- drm_connector_attach_encoder(&lt9611->connector, bridge->encoder);
+
+ if (!bridge->encoder) {
+ DRM_ERROR("Parent encoder object not found");
+ return -ENODEV;
+ }
+
++ drm_connector_attach_encoder(&lt9611->connector, bridge->encoder);
++
+ return 0;
+ }
+
+@@ -880,12 +884,18 @@ static enum drm_mode_status lt9611_bridge_mode_valid(struct drm_bridge *bridge,
+ static void lt9611_bridge_pre_enable(struct drm_bridge *bridge)
+ {
+ struct lt9611 *lt9611 = bridge_to_lt9611(bridge);
++ static const struct reg_sequence reg_cfg[] = {
++ { 0x8102, 0x12 },
++ { 0x8123, 0x40 },
++ { 0x8130, 0xea },
++ { 0x8011, 0xfa },
++ };
+
+ if (!lt9611->sleep)
+ return;
+
+- lt9611_reset(lt9611);
+- regmap_write(lt9611->regmap, 0x80ee, 0x01);
++ regmap_multi_reg_write(lt9611->regmap,
++ reg_cfg, ARRAY_SIZE(reg_cfg));
+
+ lt9611->sleep = false;
+ }
+@@ -903,14 +913,15 @@ static void lt9611_bridge_mode_set(struct drm_bridge *bridge,
+ {
+ struct lt9611 *lt9611 = bridge_to_lt9611(bridge);
+ struct hdmi_avi_infoframe avi_frame;
++ unsigned int postdiv;
+ int ret;
+
+ lt9611_bridge_pre_enable(bridge);
+
+ lt9611_mipi_input_digital(lt9611, mode);
+- lt9611_pll_setup(lt9611, mode);
++ lt9611_pll_setup(lt9611, mode, &postdiv);
+ lt9611_mipi_video_setup(lt9611, mode);
+- lt9611_pcr_setup(lt9611, mode);
++ lt9611_pcr_setup(lt9611, mode, postdiv);
+
+ ret = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame,
+ &lt9611->connector,
+diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c
+index 3cac16db970f0..c4454d0f6cad5 100644
+--- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c
++++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c
+@@ -167,9 +167,10 @@ static void lt9611uxc_hpd_work(struct work_struct *work)
+ struct lt9611uxc *lt9611uxc = container_of(work, struct lt9611uxc, work);
+ bool connected;
+
+- if (lt9611uxc->connector.dev)
+- drm_kms_helper_hotplug_event(lt9611uxc->connector.dev);
+- else {
++ if (lt9611uxc->connector.dev) {
++ if (lt9611uxc->connector.dev->mode_config.funcs)
++ drm_kms_helper_hotplug_event(lt9611uxc->connector.dev);
++ } else {
+
+ mutex_lock(&lt9611uxc->ocm_lock);
+ connected = lt9611uxc->hdmi_connected;
+@@ -339,6 +340,8 @@ static int lt9611uxc_connector_init(struct drm_bridge *bridge, struct lt9611uxc
+ return -ENODEV;
+ }
+
++ lt9611uxc->connector.polled = DRM_CONNECTOR_POLL_HPD;
++
+ drm_connector_helper_add(&lt9611uxc->connector,
+ &lt9611uxc_bridge_connector_helper_funcs);
+ ret = drm_connector_init(bridge->dev, &lt9611uxc->connector,
+@@ -994,7 +997,7 @@ static int lt9611uxc_remove(struct i2c_client *client)
+ struct lt9611uxc *lt9611uxc = i2c_get_clientdata(client);
+
+ disable_irq(client->irq);
+- flush_scheduled_work();
++ cancel_work_sync(&lt9611uxc->work);
+ lt9611uxc_audio_exit(lt9611uxc);
+ drm_bridge_remove(&lt9611uxc->bridge);
+
+diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
+index d2808c4a6fb1c..e41afcc5326b1 100644
+--- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
++++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
+@@ -296,7 +296,9 @@ static void ge_b850v3_lvds_remove(void)
+ * This check is to avoid both the drivers
+ * removing the bridge in their remove() function
+ */
+- if (!ge_b850v3_lvds_ptr)
++ if (!ge_b850v3_lvds_ptr ||
++ !ge_b850v3_lvds_ptr->stdp2690_i2c ||
++ !ge_b850v3_lvds_ptr->stdp4028_i2c)
+ goto out;
+
+ drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge);
+@@ -306,19 +308,10 @@ out:
+ mutex_unlock(&ge_b850v3_lvds_dev_mutex);
+ }
+
+-static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c,
+- const struct i2c_device_id *id)
++static int ge_b850v3_register(void)
+ {
++ struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c;
+ struct device *dev = &stdp4028_i2c->dev;
+- int ret;
+-
+- ret = ge_b850v3_lvds_init(dev);
+-
+- if (ret)
+- return ret;
+-
+- ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c;
+- i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr);
+
+ /* drm bridge initialization */
+ ge_b850v3_lvds_ptr->bridge.funcs = &ge_b850v3_lvds_funcs;
+@@ -343,6 +336,27 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c,
+ "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr);
+ }
+
++static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c,
++ const struct i2c_device_id *id)
++{
++ struct device *dev = &stdp4028_i2c->dev;
++ int ret;
++
++ ret = ge_b850v3_lvds_init(dev);
++
++ if (ret)
++ return ret;
++
++ ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c;
++ i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr);
++
++ /* Only register after both bridges are probed */
++ if (!ge_b850v3_lvds_ptr->stdp2690_i2c)
++ return 0;
++
++ return ge_b850v3_register();
++}
++
+ static int stdp4028_ge_b850v3_fw_remove(struct i2c_client *stdp4028_i2c)
+ {
+ ge_b850v3_lvds_remove();
+@@ -386,7 +400,11 @@ static int stdp2690_ge_b850v3_fw_probe(struct i2c_client *stdp2690_i2c,
+ ge_b850v3_lvds_ptr->stdp2690_i2c = stdp2690_i2c;
+ i2c_set_clientdata(stdp2690_i2c, ge_b850v3_lvds_ptr);
+
+- return 0;
++ /* Only register after both bridges are probed */
++ if (!ge_b850v3_lvds_ptr->stdp4028_i2c)
++ return 0;
++
++ return ge_b850v3_register();
+ }
+
+ static int stdp2690_ge_b850v3_fw_remove(struct i2c_client *stdp2690_i2c)
+@@ -426,7 +444,11 @@ static int __init stdpxxxx_ge_b850v3_init(void)
+ if (ret)
+ return ret;
+
+- return i2c_add_driver(&stdp2690_ge_b850v3_fw_driver);
++ ret = i2c_add_driver(&stdp2690_ge_b850v3_fw_driver);
++ if (ret)
++ i2c_del_driver(&stdp4028_ge_b850v3_fw_driver);
++
++ return ret;
+ }
+ module_init(stdpxxxx_ge_b850v3_init);
+
+diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c
+index ed8ac5059cd26..691039aba87f4 100644
+--- a/drivers/gpu/drm/bridge/nwl-dsi.c
++++ b/drivers/gpu/drm/bridge/nwl-dsi.c
+@@ -7,6 +7,7 @@
+ */
+
+ #include <linux/bitfield.h>
++#include <linux/bits.h>
+ #include <linux/clk.h>
+ #include <linux/irq.h>
+ #include <linux/math64.h>
+@@ -196,12 +197,9 @@ static u32 ps2bc(struct nwl_dsi *dsi, unsigned long long ps)
+ /*
+ * ui2bc - UI time periods to byte clock cycles
+ */
+-static u32 ui2bc(struct nwl_dsi *dsi, unsigned long long ui)
++static u32 ui2bc(unsigned int ui)
+ {
+- u32 bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
+-
+- return DIV64_U64_ROUND_UP(ui * dsi->lanes,
+- dsi->mode.clock * 1000 * bpp);
++ return DIV_ROUND_UP(ui, BITS_PER_BYTE);
+ }
+
+ /*
+@@ -232,12 +230,12 @@ static int nwl_dsi_config_host(struct nwl_dsi *dsi)
+ }
+
+ /* values in byte clock cycles */
+- cycles = ui2bc(dsi, cfg->clk_pre);
++ cycles = ui2bc(cfg->clk_pre);
+ DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_pre: 0x%x\n", cycles);
+ nwl_dsi_write(dsi, NWL_DSI_CFG_T_PRE, cycles);
+ cycles = ps2bc(dsi, cfg->lpx + cfg->clk_prepare + cfg->clk_zero);
+ DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_tx_gap (pre): 0x%x\n", cycles);
+- cycles += ui2bc(dsi, cfg->clk_pre);
++ cycles += ui2bc(cfg->clk_pre);
+ DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_post: 0x%x\n", cycles);
+ nwl_dsi_write(dsi, NWL_DSI_CFG_T_POST, cycles);
+ cycles = ps2bc(dsi, cfg->hs_exit);
+@@ -863,18 +861,19 @@ nwl_dsi_bridge_mode_set(struct drm_bridge *bridge,
+ memcpy(&dsi->mode, adjusted_mode, sizeof(dsi->mode));
+ drm_mode_debug_printmodeline(adjusted_mode);
+
+- pm_runtime_get_sync(dev);
++ if (pm_runtime_resume_and_get(dev) < 0)
++ return;
+
+ if (clk_prepare_enable(dsi->lcdif_clk) < 0)
+- return;
++ goto runtime_put;
+ if (clk_prepare_enable(dsi->core_clk) < 0)
+- return;
++ goto runtime_put;
+
+ /* Step 1 from DSI reset-out instructions */
+ ret = reset_control_deassert(dsi->rst_pclk);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev, "Failed to deassert PCLK: %d\n", ret);
+- return;
++ goto runtime_put;
+ }
+
+ /* Step 2 from DSI reset-out instructions */
+@@ -884,13 +883,18 @@ nwl_dsi_bridge_mode_set(struct drm_bridge *bridge,
+ ret = reset_control_deassert(dsi->rst_esc);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev, "Failed to deassert ESC: %d\n", ret);
+- return;
++ goto runtime_put;
+ }
+ ret = reset_control_deassert(dsi->rst_byte);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev, "Failed to deassert BYTE: %d\n", ret);
+- return;
++ goto runtime_put;
+ }
++
++ return;
++
++runtime_put:
++ pm_runtime_put_sync(dev);
+ }
+
+ static void
+@@ -939,6 +943,40 @@ static void nwl_dsi_bridge_detach(struct drm_bridge *bridge)
+ drm_of_panel_bridge_remove(dsi->dev->of_node, 1, 0);
+ }
+
++static u32 *nwl_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state,
++ struct drm_crtc_state *crtc_state,
++ struct drm_connector_state *conn_state,
++ u32 output_fmt,
++ unsigned int *num_input_fmts)
++{
++ u32 *input_fmts, input_fmt;
++
++ *num_input_fmts = 0;
++
++ switch (output_fmt) {
++ /* If MEDIA_BUS_FMT_FIXED is tested, return default bus format */
++ case MEDIA_BUS_FMT_FIXED:
++ input_fmt = MEDIA_BUS_FMT_RGB888_1X24;
++ break;
++ case MEDIA_BUS_FMT_RGB888_1X24:
++ case MEDIA_BUS_FMT_RGB666_1X18:
++ case MEDIA_BUS_FMT_RGB565_1X16:
++ input_fmt = output_fmt;
++ break;
++ default:
++ return NULL;
++ }
++
++ input_fmts = kcalloc(1, sizeof(*input_fmts), GFP_KERNEL);
++ if (!input_fmts)
++ return NULL;
++ input_fmts[0] = input_fmt;
++ *num_input_fmts = 1;
++
++ return input_fmts;
++}
++
+ static const struct drm_bridge_funcs nwl_dsi_bridge_funcs = {
+ .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
+@@ -946,6 +984,7 @@ static const struct drm_bridge_funcs nwl_dsi_bridge_funcs = {
+ .atomic_check = nwl_dsi_bridge_atomic_check,
+ .atomic_enable = nwl_dsi_bridge_atomic_enable,
+ .atomic_disable = nwl_dsi_bridge_atomic_disable,
++ .atomic_get_input_bus_fmts = nwl_bridge_atomic_get_input_bus_fmts,
+ .mode_set = nwl_dsi_bridge_mode_set,
+ .mode_valid = nwl_dsi_bridge_mode_valid,
+ .attach = nwl_dsi_bridge_attach,
+@@ -1171,6 +1210,7 @@ static int nwl_dsi_probe(struct platform_device *pdev)
+
+ ret = nwl_dsi_select_input(dsi);
+ if (ret < 0) {
++ pm_runtime_disable(dev);
+ mipi_dsi_host_unregister(&dsi->dsi_host);
+ return ret;
+ }
+diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
+index c916f4b8907ef..b32295abd9e75 100644
+--- a/drivers/gpu/drm/bridge/panel.c
++++ b/drivers/gpu/drm/bridge/panel.c
+@@ -9,6 +9,7 @@
+ #include <drm/drm_connector.h>
+ #include <drm/drm_encoder.h>
+ #include <drm/drm_modeset_helper_vtables.h>
++#include <drm/drm_of.h>
+ #include <drm/drm_panel.h>
+ #include <drm/drm_print.h>
+ #include <drm/drm_probe_helper.h>
+@@ -332,3 +333,39 @@ struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge)
+ return &panel_bridge->connector;
+ }
+ EXPORT_SYMBOL(drm_panel_bridge_connector);
++
++#ifdef CONFIG_OF
++/**
++ * devm_drm_of_get_bridge - Return next bridge in the chain
++ * @dev: device to tie the bridge lifetime to
++ * @np: device tree node containing encoder output ports
++ * @port: port in the device tree node
++ * @endpoint: endpoint in the device tree node
++ *
++ * Given a DT node's port and endpoint number, finds the connected node
++ * and returns the associated bridge if any, or creates and returns a
++ * drm panel bridge instance if a panel is connected.
++ *
++ * Returns a pointer to the bridge if successful, or an error pointer
++ * otherwise.
++ */
++struct drm_bridge *devm_drm_of_get_bridge(struct device *dev,
++ struct device_node *np,
++ u32 port, u32 endpoint)
++{
++ struct drm_bridge *bridge;
++ struct drm_panel *panel;
++ int ret;
++
++ ret = drm_of_find_panel_or_bridge(np, port, endpoint,
++ &panel, &bridge);
++ if (ret)
++ return ERR_PTR(ret);
++
++ if (panel)
++ bridge = devm_drm_panel_bridge_add(dev, panel);
++
++ return bridge;
++}
++EXPORT_SYMBOL(devm_drm_of_get_bridge);
++#endif
+diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c
+index 7bd0affa057a5..9248510104005 100644
+--- a/drivers/gpu/drm/bridge/parade-ps8640.c
++++ b/drivers/gpu/drm/bridge/parade-ps8640.c
+@@ -333,8 +333,8 @@ static int ps8640_probe(struct i2c_client *client)
+ if (IS_ERR(ps_bridge->panel_bridge))
+ return PTR_ERR(ps_bridge->panel_bridge);
+
+- ps_bridge->supplies[0].supply = "vdd33";
+- ps_bridge->supplies[1].supply = "vdd12";
++ ps_bridge->supplies[0].supply = "vdd12";
++ ps_bridge->supplies[1].supply = "vdd33";
+ ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies),
+ ps_bridge->supplies);
+ if (ret)
+diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
+index 843265d7f1b12..ab0bce4a988c5 100644
+--- a/drivers/gpu/drm/bridge/sil-sii8620.c
++++ b/drivers/gpu/drm/bridge/sil-sii8620.c
+@@ -605,7 +605,7 @@ static void *sii8620_burst_get_tx_buf(struct sii8620 *ctx, int len)
+ u8 *buf = &ctx->burst.tx_buf[ctx->burst.tx_count];
+ int size = len + 2;
+
+- if (ctx->burst.tx_count + size > ARRAY_SIZE(ctx->burst.tx_buf)) {
++ if (ctx->burst.tx_count + size >= ARRAY_SIZE(ctx->burst.tx_buf)) {
+ dev_err(ctx->dev, "TX-BLK buffer exhausted\n");
+ ctx->error = -EINVAL;
+ return NULL;
+@@ -622,7 +622,7 @@ static u8 *sii8620_burst_get_rx_buf(struct sii8620 *ctx, int len)
+ u8 *buf = &ctx->burst.rx_buf[ctx->burst.rx_count];
+ int size = len + 1;
+
+- if (ctx->burst.tx_count + size > ARRAY_SIZE(ctx->burst.tx_buf)) {
++ if (ctx->burst.rx_count + size >= ARRAY_SIZE(ctx->burst.rx_buf)) {
+ dev_err(ctx->dev, "RX-BLK buffer exhausted\n");
+ ctx->error = -EINVAL;
+ return NULL;
+@@ -2120,7 +2120,7 @@ static void sii8620_init_rcp_input_dev(struct sii8620 *ctx)
+ if (ret) {
+ dev_err(ctx->dev, "Failed to register RC device\n");
+ ctx->error = ret;
+- rc_free_device(ctx->rc_dev);
++ rc_free_device(rc_dev);
+ return;
+ }
+ ctx->rc_dev = rc_dev;
+diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
+index d0db1acf11d73..7d2ed0ed2fe26 100644
+--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
+@@ -320,13 +320,17 @@ static int dw_hdmi_open(struct snd_pcm_substream *substream)
+ struct snd_pcm_runtime *runtime = substream->runtime;
+ struct snd_dw_hdmi *dw = substream->private_data;
+ void __iomem *base = dw->data.base;
++ u8 *eld;
+ int ret;
+
+ runtime->hw = dw_hdmi_hw;
+
+- ret = snd_pcm_hw_constraint_eld(runtime, dw->data.eld);
+- if (ret < 0)
+- return ret;
++ eld = dw->data.get_eld(dw->data.hdmi);
++ if (eld) {
++ ret = snd_pcm_hw_constraint_eld(runtime, eld);
++ if (ret < 0)
++ return ret;
++ }
+
+ ret = snd_pcm_limit_hw_rates(runtime);
+ if (ret < 0)
+diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h
+index cb07dc0da5a70..f72d27208ebef 100644
+--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h
++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h
+@@ -9,15 +9,15 @@ struct dw_hdmi_audio_data {
+ void __iomem *base;
+ int irq;
+ struct dw_hdmi *hdmi;
+- u8 *eld;
++ u8 *(*get_eld)(struct dw_hdmi *hdmi);
+ };
+
+ struct dw_hdmi_i2s_audio_data {
+ struct dw_hdmi *hdmi;
+- u8 *eld;
+
+ void (*write)(struct dw_hdmi *hdmi, u8 val, int offset);
+ u8 (*read)(struct dw_hdmi *hdmi, int offset);
++ u8 *(*get_eld)(struct dw_hdmi *hdmi);
+ };
+
+ #endif
+diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
+index feb04f127b550..f50b47ac11a82 100644
+--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
+@@ -135,8 +135,15 @@ static int dw_hdmi_i2s_get_eld(struct device *dev, void *data, uint8_t *buf,
+ size_t len)
+ {
+ struct dw_hdmi_i2s_audio_data *audio = data;
++ u8 *eld;
++
++ eld = audio->get_eld(audio->hdmi);
++ if (eld)
++ memcpy(buf, eld, min_t(size_t, MAX_ELD_BYTES, len));
++ else
++ /* Pass en empty ELD if connector not available */
++ memset(buf, 0, len);
+
+- memcpy(buf, audio->eld, min_t(size_t, MAX_ELD_BYTES, len));
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+index f08d0fded61f7..8bb403bc712a4 100644
+--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
++++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+@@ -757,6 +757,14 @@ static void hdmi_enable_audio_clk(struct dw_hdmi *hdmi, bool enable)
+ hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS);
+ }
+
++static u8 *hdmi_audio_get_eld(struct dw_hdmi *hdmi)
++{
++ if (!hdmi->curr_conn)
++ return NULL;
++
++ return hdmi->curr_conn->eld;
++}
++
+ static void dw_hdmi_ahb_audio_enable(struct dw_hdmi *hdmi)
+ {
+ hdmi_set_cts_n(hdmi, hdmi->audio_cts, hdmi->audio_n);
+@@ -2543,8 +2551,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge,
+ if (!output_fmts)
+ return NULL;
+
+- /* If dw-hdmi is the only bridge, avoid negociating with ourselves */
+- if (list_is_singular(&bridge->encoder->bridge_chain)) {
++ /* If dw-hdmi is the first or only bridge, avoid negociating with ourselves */
++ if (list_is_singular(&bridge->encoder->bridge_chain) ||
++ list_is_first(&bridge->chain_node, &bridge->encoder->bridge_chain)) {
+ *num_output_fmts = 1;
+ output_fmts[0] = MEDIA_BUS_FMT_FIXED;
+
+@@ -2585,6 +2594,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge,
+ * if supported. In any case the default RGB888 format is added
+ */
+
++ /* Default 8bit RGB fallback */
++ output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24;
++
+ if (max_bpc >= 16 && info->bpc == 16) {
+ if (info->color_formats & DRM_COLOR_FORMAT_YCRCB444)
+ output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48;
+@@ -2618,9 +2630,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge,
+ if (info->color_formats & DRM_COLOR_FORMAT_YCRCB444)
+ output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24;
+
+- /* Default 8bit RGB fallback */
+- output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24;
+-
+ *num_output_fmts = i;
+
+ return output_fmts;
+@@ -2961,6 +2970,7 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id)
+ {
+ struct dw_hdmi *hdmi = dev_id;
+ u8 intr_stat, phy_int_pol, phy_pol_mask, phy_stat;
++ enum drm_connector_status status = connector_status_unknown;
+
+ intr_stat = hdmi_readb(hdmi, HDMI_IH_PHY_STAT0);
+ phy_int_pol = hdmi_readb(hdmi, HDMI_PHY_POL0);
+@@ -2999,13 +3009,15 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id)
+ cec_notifier_phys_addr_invalidate(hdmi->cec_notifier);
+ mutex_unlock(&hdmi->cec_notifier_mutex);
+ }
+- }
+
+- if (intr_stat & HDMI_IH_PHY_STAT0_HPD) {
+- enum drm_connector_status status = phy_int_pol & HDMI_PHY_HPD
+- ? connector_status_connected
+- : connector_status_disconnected;
++ if (phy_stat & HDMI_PHY_HPD)
++ status = connector_status_connected;
++
++ if (!(phy_stat & (HDMI_PHY_HPD | HDMI_PHY_RX_SENSE)))
++ status = connector_status_disconnected;
++ }
+
++ if (status != connector_status_unknown) {
+ dev_dbg(hdmi->dev, "EVENT=%s\n",
+ status == connector_status_connected ?
+ "plugin" : "plugout");
+@@ -3431,7 +3443,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev,
+ audio.base = hdmi->regs;
+ audio.irq = irq;
+ audio.hdmi = hdmi;
+- audio.eld = hdmi->connector.eld;
++ audio.get_eld = hdmi_audio_get_eld;
+ hdmi->enable_audio = dw_hdmi_ahb_audio_enable;
+ hdmi->disable_audio = dw_hdmi_ahb_audio_disable;
+
+@@ -3444,7 +3456,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev,
+ struct dw_hdmi_i2s_audio_data audio;
+
+ audio.hdmi = hdmi;
+- audio.eld = hdmi->connector.eld;
++ audio.get_eld = hdmi_audio_get_eld;
+ audio.write = hdmi_writeb;
+ audio.read = hdmi_readb;
+ hdmi->enable_audio = dw_hdmi_i2s_audio_enable;
+diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
+index e44e18a0112af..56c3fd08c6a0b 100644
+--- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
++++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
+@@ -1199,6 +1199,7 @@ __dw_mipi_dsi_probe(struct platform_device *pdev,
+ ret = mipi_dsi_host_register(&dsi->dsi_host);
+ if (ret) {
+ dev_err(dev, "Failed to register MIPI host: %d\n", ret);
++ pm_runtime_disable(dev);
+ dw_mipi_dsi_debugfs_remove(dsi);
+ return ERR_PTR(ret);
+ }
+diff --git a/drivers/gpu/drm/bridge/tc358764.c b/drivers/gpu/drm/bridge/tc358764.c
+index c1e35bdf9232a..ba4e869f58a4a 100644
+--- a/drivers/gpu/drm/bridge/tc358764.c
++++ b/drivers/gpu/drm/bridge/tc358764.c
+@@ -181,7 +181,7 @@ static void tc358764_read(struct tc358764 *ctx, u16 addr, u32 *val)
+ if (ret >= 0)
+ le32_to_cpus(val);
+
+- dev_dbg(ctx->dev, "read: %d, addr: %d\n", addr, *val);
++ dev_dbg(ctx->dev, "read: addr=0x%04x data=0x%08x\n", addr, *val);
+ }
+
+ static void tc358764_write(struct tc358764 *ctx, u16 addr, u32 val)
+diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
+index 23a6f90b694b3..14d6717429639 100644
+--- a/drivers/gpu/drm/bridge/tc358767.c
++++ b/drivers/gpu/drm/bridge/tc358767.c
+@@ -1549,19 +1549,12 @@ static irqreturn_t tc_irq_handler(int irq, void *arg)
+ return IRQ_HANDLED;
+ }
+
+-static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
++static int tc_probe_edp_bridge_endpoint(struct tc_data *tc)
+ {
+- struct device *dev = &client->dev;
++ struct device *dev = tc->dev;
+ struct drm_panel *panel;
+- struct tc_data *tc;
+ int ret;
+
+- tc = devm_kzalloc(dev, sizeof(*tc), GFP_KERNEL);
+- if (!tc)
+- return -ENOMEM;
+-
+- tc->dev = dev;
+-
+ /* port@2 is the output port */
+ ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &panel, NULL);
+ if (ret && ret != -ENODEV)
+@@ -1580,6 +1573,25 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ tc->bridge.type = DRM_MODE_CONNECTOR_DisplayPort;
+ }
+
++ return 0;
++}
++
++static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
++{
++ struct device *dev = &client->dev;
++ struct tc_data *tc;
++ int ret;
++
++ tc = devm_kzalloc(dev, sizeof(*tc), GFP_KERNEL);
++ if (!tc)
++ return -ENOMEM;
++
++ tc->dev = dev;
++
++ ret = tc_probe_edp_bridge_endpoint(tc);
++ if (ret)
++ return ret;
++
+ /* Shut down GPIO is optional */
+ tc->sd_gpio = devm_gpiod_get_optional(dev, "shutdown", GPIOD_OUT_HIGH);
+ if (IS_ERR(tc->sd_gpio))
+diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c
+index a3db532bbdd16..8b1bdffc5005d 100644
+--- a/drivers/gpu/drm/bridge/tc358768.c
++++ b/drivers/gpu/drm/bridge/tc358768.c
+@@ -9,6 +9,8 @@
+ #include <linux/gpio/consumer.h>
+ #include <linux/i2c.h>
+ #include <linux/kernel.h>
++#include <linux/media-bus-format.h>
++#include <linux/minmax.h>
+ #include <linux/module.h>
+ #include <linux/regmap.h>
+ #include <linux/regulator/consumer.h>
+@@ -147,6 +149,7 @@ struct tc358768_priv {
+
+ u32 pd_lines; /* number of Parallel Port Input Data Lines */
+ u32 dsi_lanes; /* number of DSI Lanes */
++ u32 dsi_bpp; /* number of Bits Per Pixel over DSI */
+
+ /* Parameters for PLL programming */
+ u32 fbd; /* PLL feedback divider */
+@@ -279,12 +282,12 @@ static void tc358768_hw_disable(struct tc358768_priv *priv)
+
+ static u32 tc358768_pll_to_pclk(struct tc358768_priv *priv, u32 pll_clk)
+ {
+- return (u32)div_u64((u64)pll_clk * priv->dsi_lanes, priv->pd_lines);
++ return (u32)div_u64((u64)pll_clk * priv->dsi_lanes, priv->dsi_bpp);
+ }
+
+ static u32 tc358768_pclk_to_pll(struct tc358768_priv *priv, u32 pclk)
+ {
+- return (u32)div_u64((u64)pclk * priv->pd_lines, priv->dsi_lanes);
++ return (u32)div_u64((u64)pclk * priv->dsi_bpp, priv->dsi_lanes);
+ }
+
+ static int tc358768_calc_pll(struct tc358768_priv *priv,
+@@ -329,13 +332,17 @@ static int tc358768_calc_pll(struct tc358768_priv *priv,
+ u32 fbd;
+
+ for (fbd = 0; fbd < 512; ++fbd) {
+- u32 pll, diff;
++ u32 pll, diff, pll_in;
+
+ pll = (u32)div_u64((u64)refclk * (fbd + 1), divisor);
+
+ if (pll >= max_pll || pll < min_pll)
+ continue;
+
++ pll_in = (u32)div_u64((u64)refclk, prd + 1);
++ if (pll_in < 4000000)
++ continue;
++
+ diff = max(pll, target_pll) - min(pll, target_pll);
+
+ if (diff < best_diff) {
+@@ -417,6 +424,7 @@ static int tc358768_dsi_host_attach(struct mipi_dsi_host *host,
+ priv->output.panel = panel;
+
+ priv->dsi_lanes = dev->lanes;
++ priv->dsi_bpp = mipi_dsi_pixel_format_to_bpp(dev->format);
+
+ /* get input ep (port0/endpoint0) */
+ ret = -EINVAL;
+@@ -428,7 +436,7 @@ static int tc358768_dsi_host_attach(struct mipi_dsi_host *host,
+ }
+
+ if (ret)
+- priv->pd_lines = mipi_dsi_pixel_format_to_bpp(dev->format);
++ priv->pd_lines = priv->dsi_bpp;
+
+ drm_bridge_add(&priv->bridge);
+
+@@ -626,6 +634,7 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
+ struct tc358768_priv *priv = bridge_to_tc358768(bridge);
+ struct mipi_dsi_device *dsi_dev = priv->output.dev;
+ u32 val, val2, lptxcnt, hact, data_type;
++ s32 raw_val;
+ const struct drm_display_mode *mode;
+ u32 dsibclk_nsk, dsiclk_nsk, ui_nsk, phy_delay_nsk;
+ u32 dsiclk, dsibclk;
+@@ -719,25 +728,26 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
+
+ /* 38ns < TCLK_PREPARE < 95ns */
+ val = tc358768_ns_to_cnt(65, dsibclk_nsk) - 1;
+- /* TCLK_PREPARE > 300ns */
+- val2 = tc358768_ns_to_cnt(300 + tc358768_to_ns(3 * ui_nsk),
+- dsibclk_nsk);
+- val |= (val2 - tc358768_to_ns(phy_delay_nsk - dsibclk_nsk)) << 8;
++ /* TCLK_PREPARE + TCLK_ZERO > 300ns */
++ val2 = tc358768_ns_to_cnt(300 - tc358768_to_ns(2 * ui_nsk),
++ dsibclk_nsk) - 2;
++ val |= val2 << 8;
+ dev_dbg(priv->dev, "TCLK_HEADERCNT: 0x%x\n", val);
+ tc358768_write(priv, TC358768_TCLK_HEADERCNT, val);
+
+- /* TCLK_TRAIL > 60ns + 3*UI */
+- val = 60 + tc358768_to_ns(3 * ui_nsk);
+- val = tc358768_ns_to_cnt(val, dsibclk_nsk) - 5;
++ /* TCLK_TRAIL > 60ns AND TEOT <= 105 ns + 12*UI */
++ raw_val = tc358768_ns_to_cnt(60 + tc358768_to_ns(2 * ui_nsk), dsibclk_nsk) - 5;
++ val = clamp(raw_val, 0, 127);
+ dev_dbg(priv->dev, "TCLK_TRAILCNT: 0x%x\n", val);
+ tc358768_write(priv, TC358768_TCLK_TRAILCNT, val);
+
+ /* 40ns + 4*UI < THS_PREPARE < 85ns + 6*UI */
+ val = 50 + tc358768_to_ns(4 * ui_nsk);
+ val = tc358768_ns_to_cnt(val, dsibclk_nsk) - 1;
+- /* THS_ZERO > 145ns + 10*UI */
+- val2 = tc358768_ns_to_cnt(145 - tc358768_to_ns(ui_nsk), dsibclk_nsk);
+- val |= (val2 - tc358768_to_ns(phy_delay_nsk)) << 8;
++ /* THS_PREPARE + THS_ZERO > 145ns + 10*UI */
++ raw_val = tc358768_ns_to_cnt(145 - tc358768_to_ns(3 * ui_nsk), dsibclk_nsk) - 10;
++ val2 = clamp(raw_val, 0, 127);
++ val |= val2 << 8;
+ dev_dbg(priv->dev, "THS_HEADERCNT: 0x%x\n", val);
+ tc358768_write(priv, TC358768_THS_HEADERCNT, val);
+
+@@ -753,9 +763,10 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
+ dev_dbg(priv->dev, "TCLK_POSTCNT: 0x%x\n", val);
+ tc358768_write(priv, TC358768_TCLK_POSTCNT, val);
+
+- /* 60ns + 4*UI < THS_PREPARE < 105ns + 12*UI */
+- val = tc358768_ns_to_cnt(60 + tc358768_to_ns(15 * ui_nsk),
+- dsibclk_nsk) - 5;
++ /* max(60ns + 4*UI, 8*UI) < THS_TRAILCNT < 105ns + 12*UI */
++ raw_val = tc358768_ns_to_cnt(60 + tc358768_to_ns(18 * ui_nsk),
++ dsibclk_nsk) - 4;
++ val = clamp(raw_val, 0, 15);
+ dev_dbg(priv->dev, "THS_TRAILCNT: 0x%x\n", val);
+ tc358768_write(priv, TC358768_THS_TRAILCNT, val);
+
+@@ -769,7 +780,7 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
+
+ /* TXTAGOCNT[26:16] RXTASURECNT[10:0] */
+ val = tc358768_to_ns((lptxcnt + 1) * dsibclk_nsk * 4);
+- val = tc358768_ns_to_cnt(val, dsibclk_nsk) - 1;
++ val = tc358768_ns_to_cnt(val, dsibclk_nsk) / 4 - 1;
+ val2 = tc358768_ns_to_cnt(tc358768_to_ns((lptxcnt + 1) * dsibclk_nsk),
+ dsibclk_nsk) - 2;
+ val |= val2 << 16;
+@@ -819,8 +830,7 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge)
+ val = TC358768_DSI_CONFW_MODE_SET | TC358768_DSI_CONFW_ADDR_DSI_CONTROL;
+ val |= (dsi_dev->lanes - 1) << 1;
+
+- if (!(dsi_dev->mode_flags & MIPI_DSI_MODE_LPM))
+- val |= TC358768_DSI_CONTROL_TXMD;
++ val |= TC358768_DSI_CONTROL_TXMD;
+
+ if (!(dsi_dev->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS))
+ val |= TC358768_DSI_CONTROL_HSCKMD;
+@@ -866,6 +876,44 @@ static void tc358768_bridge_enable(struct drm_bridge *bridge)
+ }
+ }
+
++#define MAX_INPUT_SEL_FORMATS 1
++
++static u32 *
++tc358768_atomic_get_input_bus_fmts(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state,
++ struct drm_crtc_state *crtc_state,
++ struct drm_connector_state *conn_state,
++ u32 output_fmt,
++ unsigned int *num_input_fmts)
++{
++ struct tc358768_priv *priv = bridge_to_tc358768(bridge);
++ u32 *input_fmts;
++
++ *num_input_fmts = 0;
++
++ input_fmts = kcalloc(MAX_INPUT_SEL_FORMATS, sizeof(*input_fmts),
++ GFP_KERNEL);
++ if (!input_fmts)
++ return NULL;
++
++ switch (priv->pd_lines) {
++ case 16:
++ input_fmts[0] = MEDIA_BUS_FMT_RGB565_1X16;
++ break;
++ case 18:
++ input_fmts[0] = MEDIA_BUS_FMT_RGB666_1X18;
++ break;
++ default:
++ case 24:
++ input_fmts[0] = MEDIA_BUS_FMT_RGB888_1X24;
++ break;
++ };
++
++ *num_input_fmts = MAX_INPUT_SEL_FORMATS;
++
++ return input_fmts;
++}
++
+ static const struct drm_bridge_funcs tc358768_bridge_funcs = {
+ .attach = tc358768_bridge_attach,
+ .mode_valid = tc358768_bridge_mode_valid,
+@@ -873,6 +921,11 @@ static const struct drm_bridge_funcs tc358768_bridge_funcs = {
+ .enable = tc358768_bridge_enable,
+ .disable = tc358768_bridge_disable,
+ .post_disable = tc358768_bridge_post_disable,
++
++ .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
++ .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
++ .atomic_reset = drm_atomic_helper_bridge_reset,
++ .atomic_get_input_bus_fmts = tc358768_atomic_get_input_bus_fmts,
+ };
+
+ static const struct drm_bridge_timings default_tc358768_timings = {
+diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
+index a32f70bc68ea4..b3cb910b30852 100644
+--- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
++++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
+@@ -381,6 +381,8 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge,
+ u16 val;
+ int ret;
+
++ usleep_range(10000, 11000);
++
+ /* Get the LVDS format from the bridge state. */
+ bridge_state = drm_atomic_get_new_bridge_state(state, bridge);
+
+@@ -608,10 +610,14 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model)
+ ctx->host_node = of_graph_get_remote_port_parent(endpoint);
+ of_node_put(endpoint);
+
+- if (ctx->dsi_lanes < 0 || ctx->dsi_lanes > 4)
+- return -EINVAL;
+- if (!ctx->host_node)
+- return -ENODEV;
++ if (ctx->dsi_lanes <= 0 || ctx->dsi_lanes > 4) {
++ ret = -EINVAL;
++ goto err_put_node;
++ }
++ if (!ctx->host_node) {
++ ret = -ENODEV;
++ goto err_put_node;
++ }
+
+ ctx->lvds_dual_link = false;
+ ctx->lvds_dual_link_even_odd_swap = false;
+@@ -638,16 +644,22 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model)
+
+ ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &panel, &panel_bridge);
+ if (ret < 0)
+- return ret;
++ goto err_put_node;
+ if (panel) {
+ panel_bridge = devm_drm_panel_bridge_add(dev, panel);
+- if (IS_ERR(panel_bridge))
+- return PTR_ERR(panel_bridge);
++ if (IS_ERR(panel_bridge)) {
++ ret = PTR_ERR(panel_bridge);
++ goto err_put_node;
++ }
+ }
+
+ ctx->panel_bridge = panel_bridge;
+
+ return 0;
++
++err_put_node:
++ of_node_put(ctx->host_node);
++ return ret;
+ }
+
+ static int sn65dsi83_probe(struct i2c_client *client,
+@@ -680,8 +692,10 @@ static int sn65dsi83_probe(struct i2c_client *client,
+ return ret;
+
+ ctx->regmap = devm_regmap_init_i2c(client, &sn65dsi83_regmap_config);
+- if (IS_ERR(ctx->regmap))
+- return PTR_ERR(ctx->regmap);
++ if (IS_ERR(ctx->regmap)) {
++ ret = PTR_ERR(ctx->regmap);
++ goto err_put_node;
++ }
+
+ dev_set_drvdata(dev, ctx);
+ i2c_set_clientdata(client, ctx);
+@@ -691,6 +705,10 @@ static int sn65dsi83_probe(struct i2c_client *client,
+ drm_bridge_add(&ctx->bridge);
+
+ return 0;
++
++err_put_node:
++ of_node_put(ctx->host_node);
++ return ret;
+ }
+
+ static int sn65dsi83_remove(struct i2c_client *client)
+diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+index 41d48a393e7f5..22c2ff5272c60 100644
+--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
++++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+@@ -147,9 +147,9 @@
+ * each other's read-modify-write.
+ */
+ struct ti_sn65dsi86 {
+- struct auxiliary_device bridge_aux;
+- struct auxiliary_device gpio_aux;
+- struct auxiliary_device aux_aux;
++ struct auxiliary_device *bridge_aux;
++ struct auxiliary_device *gpio_aux;
++ struct auxiliary_device *aux_aux;
+
+ struct device *dev;
+ struct regmap *regmap;
+@@ -188,6 +188,7 @@ static const struct regmap_config ti_sn65dsi86_regmap_config = {
+ .val_bits = 8,
+ .volatile_table = &ti_sn_bridge_volatile_table,
+ .cache_type = REGCACHE_NONE,
++ .max_register = 0xFF,
+ };
+
+ static void ti_sn65dsi86_write_u16(struct ti_sn65dsi86 *pdata,
+@@ -411,27 +412,34 @@ static void ti_sn65dsi86_delete_aux(void *data)
+ auxiliary_device_delete(data);
+ }
+
+-/*
+- * AUX bus docs say that a non-NULL release is mandatory, but it makes no
+- * sense for the model used here where all of the aux devices are allocated
+- * in the single shared structure. We'll use this noop as a workaround.
+- */
+-static void ti_sn65dsi86_noop(struct device *dev) {}
++static void ti_sn65dsi86_aux_device_release(struct device *dev)
++{
++ struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev);
++
++ kfree(aux);
++}
+
+ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata,
+- struct auxiliary_device *aux,
++ struct auxiliary_device **aux_out,
+ const char *name)
+ {
+ struct device *dev = pdata->dev;
++ struct auxiliary_device *aux;
+ int ret;
+
++ aux = kzalloc(sizeof(*aux), GFP_KERNEL);
++ if (!aux)
++ return -ENOMEM;
++
+ aux->name = name;
+ aux->dev.parent = dev;
+- aux->dev.release = ti_sn65dsi86_noop;
++ aux->dev.release = ti_sn65dsi86_aux_device_release;
+ device_set_of_node_from_dev(&aux->dev, dev);
+ ret = auxiliary_device_init(aux);
+- if (ret)
++ if (ret) {
++ kfree(aux);
+ return ret;
++ }
+ ret = devm_add_action_or_reset(dev, ti_sn65dsi86_uninit_aux, aux);
+ if (ret)
+ return ret;
+@@ -440,6 +448,8 @@ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata,
+ if (ret)
+ return ret;
+ ret = devm_add_action_or_reset(dev, ti_sn65dsi86_delete_aux, aux);
++ if (!ret)
++ *aux_out = aux;
+
+ return ret;
+ }
+@@ -919,9 +929,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata)
+ &pdata->bridge.encoder->crtc->state->adjusted_mode;
+ u8 hsync_polarity = 0, vsync_polarity = 0;
+
+- if (mode->flags & DRM_MODE_FLAG_PHSYNC)
++ if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+ hsync_polarity = CHA_HSYNC_POLARITY;
+- if (mode->flags & DRM_MODE_FLAG_PVSYNC)
++ if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+ vsync_polarity = CHA_VSYNC_POLARITY;
+
+ ti_sn65dsi86_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG,
+@@ -1472,6 +1482,7 @@ static inline void ti_sn_gpio_unregister(void) {}
+
+ static void ti_sn65dsi86_runtime_disable(void *data)
+ {
++ pm_runtime_dont_use_autosuspend(data);
+ pm_runtime_disable(data);
+ }
+
+@@ -1531,11 +1542,11 @@ static int ti_sn65dsi86_probe(struct i2c_client *client,
+ "failed to get reference clock\n");
+
+ pm_runtime_enable(dev);
++ pm_runtime_set_autosuspend_delay(pdata->dev, 500);
++ pm_runtime_use_autosuspend(pdata->dev);
+ ret = devm_add_action_or_reset(dev, ti_sn65dsi86_runtime_disable, dev);
+ if (ret)
+ return ret;
+- pm_runtime_set_autosuspend_delay(pdata->dev, 500);
+- pm_runtime_use_autosuspend(pdata->dev);
+
+ ti_sn65dsi86_debugfs_init(pdata);
+
+diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c
+index 74bd4a76b253c..059fd71424f6b 100644
+--- a/drivers/gpu/drm/drm_aperture.c
++++ b/drivers/gpu/drm/drm_aperture.c
+@@ -329,7 +329,20 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
+ const struct drm_driver *req_driver)
+ {
+ resource_size_t base, size;
+- int bar, ret = 0;
++ int bar, ret;
++
++ /*
++ * WARNING: Apparently we must kick fbdev drivers before vgacon,
++ * otherwise the vga fbdev driver falls over.
++ */
++#if IS_REACHABLE(CONFIG_FB)
++ ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name);
++ if (ret)
++ return ret;
++#endif
++ ret = vga_remove_vgacon(pdev);
++ if (ret)
++ return ret;
+
+ for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) {
+ if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
+@@ -339,15 +352,6 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
+ drm_aperture_detach_drivers(base, size);
+ }
+
+- /*
+- * WARNING: Apparently we must kick fbdev drivers before vgacon,
+- * otherwise the vga fbdev driver falls over.
+- */
+-#if IS_REACHABLE(CONFIG_FB)
+- ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name);
+-#endif
+- if (ret == 0)
+- ret = vga_remove_vgacon(pdev);
+- return ret;
++ return 0;
+ }
+ EXPORT_SYMBOL(drm_aperture_remove_conflicting_pci_framebuffers);
+diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
+index ff1416cd609a5..166d329de9397 100644
+--- a/drivers/gpu/drm/drm_atomic.c
++++ b/drivers/gpu/drm/drm_atomic.c
+@@ -138,6 +138,12 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
+ if (!state->planes)
+ goto fail;
+
++ /*
++ * Because drm_atomic_state can be committed asynchronously we need our
++ * own reference and cannot rely on the on implied by drm_file in the
++ * ioctl call.
++ */
++ drm_dev_get(dev);
+ state->dev = dev;
+
+ DRM_DEBUG_ATOMIC("Allocated atomic state %p\n", state);
+@@ -297,7 +303,8 @@ EXPORT_SYMBOL(drm_atomic_state_clear);
+ void __drm_atomic_state_free(struct kref *ref)
+ {
+ struct drm_atomic_state *state = container_of(ref, typeof(*state), ref);
+- struct drm_mode_config *config = &state->dev->mode_config;
++ struct drm_device *dev = state->dev;
++ struct drm_mode_config *config = &dev->mode_config;
+
+ drm_atomic_state_clear(state);
+
+@@ -309,6 +316,8 @@ void __drm_atomic_state_free(struct kref *ref)
+ drm_atomic_state_default_release(state);
+ kfree(state);
+ }
++
++ drm_dev_put(dev);
+ }
+ EXPORT_SYMBOL(__drm_atomic_state_free);
+
+@@ -1052,6 +1061,7 @@ static void drm_atomic_connector_print_state(struct drm_printer *p,
+ drm_printf(p, "connector[%u]: %s\n", connector->base.id, connector->name);
+ drm_printf(p, "\tcrtc=%s\n", state->crtc ? state->crtc->name : "(null)");
+ drm_printf(p, "\tself_refresh_aware=%d\n", state->self_refresh_aware);
++ drm_printf(p, "\tmax_requested_bpc=%d\n", state->max_requested_bpc);
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ if (state->writeback_job && state->writeback_job->fb)
+@@ -1310,8 +1320,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
+
+ DRM_DEBUG_ATOMIC("checking %p\n", state);
+
+- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
+- requested_crtc |= drm_crtc_mask(crtc);
++ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
++ if (new_crtc_state->enable)
++ requested_crtc |= drm_crtc_mask(crtc);
++ }
+
+ for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
+ ret = drm_atomic_plane_check(old_plane_state, new_plane_state);
+@@ -1360,8 +1372,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
+ }
+ }
+
+- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
+- affected_crtc |= drm_crtc_mask(crtc);
++ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
++ if (new_crtc_state->enable)
++ affected_crtc |= drm_crtc_mask(crtc);
++ }
+
+ /*
+ * For commits that allow modesets drivers can add other CRTCs to the
+diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
+index 2c0c6ec928200..5ba316391381d 100644
+--- a/drivers/gpu/drm/drm_atomic_helper.c
++++ b/drivers/gpu/drm/drm_atomic_helper.c
+@@ -996,12 +996,22 @@ crtc_needs_disable(struct drm_crtc_state *old_state,
+ return drm_atomic_crtc_effectively_active(old_state);
+
+ /*
+- * We need to run through the crtc_funcs->disable() function if the CRTC
+- * is currently on, if it's transitioning to self refresh mode, or if
+- * it's in self refresh mode and needs to be fully disabled.
++ * We need to disable bridge(s) and CRTC if we're transitioning out of
++ * self-refresh and changing CRTCs at the same time, because the
++ * bridge tracks self-refresh status via CRTC state.
++ */
++ if (old_state->self_refresh_active &&
++ old_state->crtc != new_state->crtc)
++ return true;
++
++ /*
++ * We also need to run through the crtc_funcs->disable() function if
++ * the CRTC is currently on, if it's transitioning to self refresh
++ * mode, or if it's in self refresh mode and needs to be fully
++ * disabled.
+ */
+ return old_state->active ||
+- (old_state->self_refresh_active && !new_state->enable) ||
++ (old_state->self_refresh_active && !new_state->active) ||
+ new_state->self_refresh_active;
+ }
+
+@@ -1105,7 +1115,16 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
+ continue;
+
+ ret = drm_crtc_vblank_get(crtc);
+- WARN_ONCE(ret != -EINVAL, "driver forgot to call drm_crtc_vblank_off()\n");
++ /*
++ * Self-refresh is not a true "disable"; ensure vblank remains
++ * enabled.
++ */
++ if (new_crtc_state->self_refresh_active)
++ WARN_ONCE(ret != 0,
++ "driver disabled vblank in self-refresh\n");
++ else
++ WARN_ONCE(ret != -EINVAL,
++ "driver forgot to call drm_crtc_vblank_off()\n");
+ if (ret == 0)
+ drm_crtc_vblank_put(crtc);
+ }
+diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
+index 909f318331816..f195c70131373 100644
+--- a/drivers/gpu/drm/drm_atomic_uapi.c
++++ b/drivers/gpu/drm/drm_atomic_uapi.c
+@@ -76,15 +76,17 @@ int drm_atomic_set_mode_for_crtc(struct drm_crtc_state *state,
+ state->mode_blob = NULL;
+
+ if (mode) {
++ struct drm_property_blob *blob;
++
+ drm_mode_convert_to_umode(&umode, mode);
+- state->mode_blob =
+- drm_property_create_blob(state->crtc->dev,
+- sizeof(umode),
+- &umode);
+- if (IS_ERR(state->mode_blob))
+- return PTR_ERR(state->mode_blob);
++ blob = drm_property_create_blob(crtc->dev,
++ sizeof(umode), &umode);
++ if (IS_ERR(blob))
++ return PTR_ERR(blob);
+
+ drm_mode_copy(&state->mode, mode);
++
++ state->mode_blob = blob;
+ state->enable = true;
+ drm_dbg_atomic(crtc->dev,
+ "Set [MODE:%s] for [CRTC:%d:%s] state %p\n",
+diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
+index a8ed66751c2d7..78bc315b0b737 100644
+--- a/drivers/gpu/drm/drm_bridge.c
++++ b/drivers/gpu/drm/drm_bridge.c
+@@ -28,6 +28,7 @@
+ #include <drm/drm_atomic_state_helper.h>
+ #include <drm/drm_bridge.h>
+ #include <drm/drm_encoder.h>
++#include <drm/drm_of.h>
+ #include <drm/drm_print.h>
+
+ #include "drm_crtc_internal.h"
+@@ -51,10 +52,8 @@
+ *
+ * Display drivers are responsible for linking encoders with the first bridge
+ * in the chains. This is done by acquiring the appropriate bridge with
+- * of_drm_find_bridge() or drm_of_find_panel_or_bridge(), or creating it for a
+- * panel with drm_panel_bridge_add_typed() (or the managed version
+- * devm_drm_panel_bridge_add_typed()). Once acquired, the bridge shall be
+- * attached to the encoder with a call to drm_bridge_attach().
++ * devm_drm_of_get_bridge(). Once acquired, the bridge shall be attached to the
++ * encoder with a call to drm_bridge_attach().
+ *
+ * Bridges are responsible for linking themselves with the next bridge in the
+ * chain, if any. This is done the same way as for encoders, with the call to
+@@ -763,8 +762,8 @@ static int select_bus_fmt_recursive(struct drm_bridge *first_bridge,
+ struct drm_connector_state *conn_state,
+ u32 out_bus_fmt)
+ {
++ unsigned int i, num_in_bus_fmts = 0;
+ struct drm_bridge_state *cur_state;
+- unsigned int num_in_bus_fmts, i;
+ struct drm_bridge *prev_bridge;
+ u32 *in_bus_fmts;
+ int ret;
+@@ -885,7 +884,7 @@ drm_atomic_bridge_chain_select_bus_fmts(struct drm_bridge *bridge,
+ struct drm_connector *conn = conn_state->connector;
+ struct drm_encoder *encoder = bridge->encoder;
+ struct drm_bridge_state *last_bridge_state;
+- unsigned int i, num_out_bus_fmts;
++ unsigned int i, num_out_bus_fmts = 0;
+ struct drm_bridge *last_bridge;
+ u32 *out_bus_fmts;
+ int ret = 0;
+diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c
+index 791379816837d..4f20137ef21d5 100644
+--- a/drivers/gpu/drm/drm_bridge_connector.c
++++ b/drivers/gpu/drm/drm_bridge_connector.c
+@@ -369,8 +369,10 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm,
+ connector_type, ddc);
+ drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs);
+
+- if (bridge_connector->bridge_hpd)
++ if (bridge_connector->bridge_hpd) {
+ connector->polled = DRM_CONNECTOR_POLL_HPD;
++ drm_bridge_connector_enable_hpd(connector);
++ }
+ else if (bridge_connector->bridge_detect)
+ connector->polled = DRM_CONNECTOR_POLL_CONNECT
+ | DRM_CONNECTOR_POLL_DISCONNECT;
+diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
+index ced09c7c06f90..447ea279e6915 100644
+--- a/drivers/gpu/drm/drm_client_modeset.c
++++ b/drivers/gpu/drm/drm_client_modeset.c
+@@ -309,6 +309,9 @@ static bool drm_client_target_cloned(struct drm_device *dev,
+ can_clone = true;
+ dmt_mode = drm_mode_find_dmt(dev, 1024, 768, 60, false);
+
++ if (!dmt_mode)
++ goto fail;
++
+ for (i = 0; i < connector_count; i++) {
+ if (!enabled[i])
+ continue;
+@@ -324,11 +327,13 @@ static bool drm_client_target_cloned(struct drm_device *dev,
+ if (!modes[i])
+ can_clone = false;
+ }
++ kfree(dmt_mode);
+
+ if (can_clone) {
+ DRM_DEBUG_KMS("can clone using 1024x768\n");
+ return true;
+ }
++fail:
+ DRM_INFO("kms: can't enable cloning when we probably wanted to.\n");
+ return false;
+ }
+@@ -860,6 +865,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
+ break;
+ }
+
++ kfree(modeset->mode);
+ modeset->mode = drm_mode_duplicate(dev, mode);
+ drm_connector_get(connector);
+ modeset->connectors[modeset->num_connectors++] = connector;
+diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
+index 2ba257b1ae208..cfe163103cfd7 100644
+--- a/drivers/gpu/drm/drm_connector.c
++++ b/drivers/gpu/drm/drm_connector.c
+@@ -487,6 +487,9 @@ void drm_connector_cleanup(struct drm_connector *connector)
+ mutex_destroy(&connector->mutex);
+
+ memset(connector, 0, sizeof(*connector));
++
++ if (dev->registered)
++ drm_sysfs_hotplug_event(dev);
+ }
+ EXPORT_SYMBOL(drm_connector_cleanup);
+
+@@ -2233,6 +2236,9 @@ EXPORT_SYMBOL(drm_connector_atomic_hdr_metadata_equal);
+ void drm_connector_set_vrr_capable_property(
+ struct drm_connector *connector, bool capable)
+ {
++ if (!connector->vrr_capable_property)
++ return;
++
+ drm_object_property_set_value(&connector->base,
+ connector->vrr_capable_property,
+ capable);
+diff --git a/drivers/gpu/drm/drm_displayid.c b/drivers/gpu/drm/drm_displayid.c
+index 32da557b960fd..82b7f0bb44097 100644
+--- a/drivers/gpu/drm/drm_displayid.c
++++ b/drivers/gpu/drm/drm_displayid.c
+@@ -7,13 +7,28 @@
+ #include <drm/drm_edid.h>
+ #include <drm/drm_print.h>
+
++static const struct displayid_header *
++displayid_get_header(const u8 *displayid, int length, int index)
++{
++ const struct displayid_header *base;
++
++ if (sizeof(*base) > length - index)
++ return ERR_PTR(-EINVAL);
++
++ base = (const struct displayid_header *)&displayid[index];
++
++ return base;
++}
++
+ static int validate_displayid(const u8 *displayid, int length, int idx)
+ {
+ int i, dispid_length;
+ u8 csum = 0;
+ const struct displayid_header *base;
+
+- base = (const struct displayid_header *)&displayid[idx];
++ base = displayid_get_header(displayid, length, idx);
++ if (IS_ERR(base))
++ return PTR_ERR(base);
+
+ DRM_DEBUG_KMS("base revision 0x%x, length %d, %d %d\n",
+ base->rev, base->bytes, base->prod_id, base->ext_count);
+diff --git a/drivers/gpu/drm/drm_dp_aux_bus.c b/drivers/gpu/drm/drm_dp_aux_bus.c
+index 298ea7a495913..f7c03ad5a15a5 100644
+--- a/drivers/gpu/drm/drm_dp_aux_bus.c
++++ b/drivers/gpu/drm/drm_dp_aux_bus.c
+@@ -66,7 +66,6 @@ static int dp_aux_ep_probe(struct device *dev)
+ * @dev: The device to remove.
+ *
+ * Calls through to the endpoint driver remove.
+- *
+ */
+ static void dp_aux_ep_remove(struct device *dev)
+ {
+@@ -120,8 +119,6 @@ ATTRIBUTE_GROUPS(dp_aux_ep_dev);
+ /**
+ * dp_aux_ep_dev_release() - Free memory for the dp_aux_ep device
+ * @dev: The device to free.
+- *
+- * Return: 0 if no error or negative error code.
+ */
+ static void dp_aux_ep_dev_release(struct device *dev)
+ {
+@@ -256,6 +253,7 @@ int of_dp_aux_populate_ep_devices(struct drm_dp_aux *aux)
+
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(of_dp_aux_populate_ep_devices);
+
+ static void of_dp_aux_depopulate_ep_devices_void(void *data)
+ {
+diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
+index 9faf49354cabd..cb52a00ae1b11 100644
+--- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c
++++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
+@@ -63,23 +63,45 @@
+ ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter,
+ u8 offset, void *buffer, size_t size)
+ {
++ u8 zero = 0;
++ char *tmpbuf = NULL;
++ /*
++ * As sub-addressing is not supported by all adaptors,
++ * always explicitly read from the start and discard
++ * any bytes that come before the requested offset.
++ * This way, no matter whether the adaptor supports it
++ * or not, we'll end up reading the proper data.
++ */
+ struct i2c_msg msgs[] = {
+ {
+ .addr = DP_DUAL_MODE_SLAVE_ADDRESS,
+ .flags = 0,
+ .len = 1,
+- .buf = &offset,
++ .buf = &zero,
+ },
+ {
+ .addr = DP_DUAL_MODE_SLAVE_ADDRESS,
+ .flags = I2C_M_RD,
+- .len = size,
++ .len = size + offset,
+ .buf = buffer,
+ },
+ };
+ int ret;
+
++ if (offset) {
++ tmpbuf = kmalloc(size + offset, GFP_KERNEL);
++ if (!tmpbuf)
++ return -ENOMEM;
++
++ msgs[1].buf = tmpbuf;
++ }
++
+ ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs));
++ if (tmpbuf)
++ memcpy(buffer, tmpbuf + offset, size);
++
++ kfree(tmpbuf);
++
+ if (ret < 0)
+ return ret;
+ if (ret != ARRAY_SIZE(msgs))
+@@ -208,18 +230,6 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev,
+ if (ret)
+ return DRM_DP_DUAL_MODE_UNKNOWN;
+
+- /*
+- * Sigh. Some (maybe all?) type 1 adaptors are broken and ack
+- * the offset but ignore it, and instead they just always return
+- * data from the start of the HDMI ID buffer. So for a broken
+- * type 1 HDMI adaptor a single byte read will always give us
+- * 0x44, and for a type 1 DVI adaptor it should give 0x00
+- * (assuming it implements any registers). Fortunately neither
+- * of those values will match the type 2 signature of the
+- * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with
+- * the type 2 adaptor detection safely even in the presence
+- * of broken type 1 adaptors.
+- */
+ ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID,
+ &adaptor_id, sizeof(adaptor_id));
+ drm_dbg_kms(dev, "DP dual mode adaptor ID: %02x (err %zd)\n", adaptor_id, ret);
+@@ -233,11 +243,10 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev,
+ return DRM_DP_DUAL_MODE_TYPE2_DVI;
+ }
+ /*
+- * If neither a proper type 1 ID nor a broken type 1 adaptor
+- * as described above, assume type 1, but let the user know
+- * that we may have misdetected the type.
++ * If not a proper type 1 ID, still assume type 1, but let
++ * the user know that we may have misdetected the type.
+ */
+- if (!is_type1_adaptor(adaptor_id) && adaptor_id != hdmi_id[0])
++ if (!is_type1_adaptor(adaptor_id))
+ drm_err(dev, "Unexpected DP dual mode adaptor ID %02x\n", adaptor_id);
+
+ }
+@@ -343,10 +352,8 @@ EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output);
+ * @enable: enable (as opposed to disable) the TMDS output buffers
+ *
+ * Set the state of the TMDS output buffers in the adaptor. For
+- * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As
+- * some type 1 adaptors have problems with registers (see comments
+- * in drm_dp_dual_mode_detect()) we avoid touching the register,
+- * making this function a no-op on type 1 adaptors.
++ * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register.
++ * Type1 adaptors do not support any register writes.
+ *
+ * Returns:
+ * 0 on success, negative error code on failure
+diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
+index 6d0f2c447f3b9..b8815e7f5832e 100644
+--- a/drivers/gpu/drm/drm_dp_helper.c
++++ b/drivers/gpu/drm/drm_dp_helper.c
+@@ -2376,17 +2376,8 @@ int drm_dp_set_phy_test_pattern(struct drm_dp_aux *aux,
+ struct drm_dp_phy_test_params *data, u8 dp_rev)
+ {
+ int err, i;
+- u8 link_config[2];
+ u8 test_pattern;
+
+- link_config[0] = drm_dp_link_rate_to_bw_code(data->link_rate);
+- link_config[1] = data->num_lanes;
+- if (data->enhanced_frame_cap)
+- link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+- err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, link_config, 2);
+- if (err < 0)
+- return err;
+-
+ test_pattern = data->phy_pattern;
+ if (dp_rev < 0x12) {
+ test_pattern = (test_pattern << 2) &
+@@ -3214,27 +3205,13 @@ int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backli
+ const u16 level)
+ {
+ int ret;
+- u8 dpcd_buf, new_dpcd_buf;
+-
+- ret = drm_dp_dpcd_readb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf);
+- if (ret != 1) {
+- drm_dbg_kms(aux->drm_dev,
+- "%s: Failed to read backlight mode: %d\n", aux->name, ret);
+- return ret < 0 ? ret : -EIO;
+- }
+-
+- new_dpcd_buf = dpcd_buf;
+-
+- if ((dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) != DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) {
+- new_dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
+- new_dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
++ u8 dpcd_buf = DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
+
+- if (bl->pwmgen_bit_count) {
+- ret = drm_dp_dpcd_writeb(aux, DP_EDP_PWMGEN_BIT_COUNT, bl->pwmgen_bit_count);
+- if (ret != 1)
+- drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux pwmgen bit count: %d\n",
+- aux->name, ret);
+- }
++ if (bl->pwmgen_bit_count) {
++ ret = drm_dp_dpcd_writeb(aux, DP_EDP_PWMGEN_BIT_COUNT, bl->pwmgen_bit_count);
++ if (ret != 1)
++ drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux pwmgen bit count: %d\n",
++ aux->name, ret);
+ }
+
+ if (bl->pwm_freq_pre_divider) {
+@@ -3244,16 +3221,14 @@ int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backli
+ "%s: Failed to write aux backlight frequency: %d\n",
+ aux->name, ret);
+ else
+- new_dpcd_buf |= DP_EDP_BACKLIGHT_FREQ_AUX_SET_ENABLE;
++ dpcd_buf |= DP_EDP_BACKLIGHT_FREQ_AUX_SET_ENABLE;
+ }
+
+- if (new_dpcd_buf != dpcd_buf) {
+- ret = drm_dp_dpcd_writeb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, new_dpcd_buf);
+- if (ret != 1) {
+- drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux backlight mode: %d\n",
+- aux->name, ret);
+- return ret < 0 ? ret : -EIO;
+- }
++ ret = drm_dp_dpcd_writeb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf);
++ if (ret != 1) {
++ drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux backlight mode: %d\n",
++ aux->name, ret);
++ return ret < 0 ? ret : -EIO;
+ }
+
+ ret = drm_edp_backlight_set_level(aux, bl, level);
+diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
+index 86d13d6bc4631..d02e323a4ecde 100644
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -3781,6 +3781,9 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
+ set_bit(0, &mgr->payload_mask);
+ mgr->vcpi_mask = 0;
+ mgr->payload_id_table_cleared = false;
++
++ memset(&mgr->down_rep_recv, 0, sizeof(mgr->down_rep_recv));
++ memset(&mgr->up_req_recv, 0, sizeof(mgr->up_req_recv));
+ }
+
+ out_unlock:
+@@ -3860,9 +3863,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
+ if (!mgr->mst_primary)
+ goto out_fail;
+
+- ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd,
+- DP_RECEIVER_CAP_SIZE);
+- if (ret != DP_RECEIVER_CAP_SIZE) {
++ if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
+ drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+@@ -3996,7 +3997,7 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
+ struct drm_dp_sideband_msg_rx *msg = &mgr->down_rep_recv;
+
+ if (!drm_dp_get_one_sb_msg(mgr, false, &mstb))
+- goto out;
++ goto out_clear_reply;
+
+ /* Multi-packet message transmission, don't clear the reply */
+ if (!msg->have_eomt)
+@@ -4834,6 +4835,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr,
+
+ mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port);
+ drm_edid_get_monitor_name(mst_edid, name, namelen);
++ kfree(mst_edid);
+ }
+
+ /**
+@@ -4893,22 +4895,21 @@ void drm_dp_mst_dump_topology(struct seq_file *m,
+ u8 buf[DP_PAYLOAD_TABLE_SIZE];
+ int ret;
+
+- ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE);
+- if (ret) {
++ if (drm_dp_read_dpcd_caps(mgr->aux, buf) < 0) {
+ seq_printf(m, "dpcd read failed\n");
+ goto out;
+ }
+ seq_printf(m, "dpcd: %*ph\n", DP_RECEIVER_CAP_SIZE, buf);
+
+ ret = drm_dp_dpcd_read(mgr->aux, DP_FAUX_CAP, buf, 2);
+- if (ret) {
++ if (ret != 2) {
+ seq_printf(m, "faux/mst read failed\n");
+ goto out;
+ }
+ seq_printf(m, "faux/mst: %*ph\n", 2, buf);
+
+ ret = drm_dp_dpcd_read(mgr->aux, DP_MSTM_CTRL, buf, 1);
+- if (ret) {
++ if (ret != 1) {
+ seq_printf(m, "mst ctrl read failed\n");
+ goto out;
+ }
+@@ -4916,7 +4917,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m,
+
+ /* dump the standard OUI branch header */
+ ret = drm_dp_dpcd_read(mgr->aux, DP_BRANCH_OUI, buf, DP_BRANCH_OUI_HEADER_SIZE);
+- if (ret) {
++ if (ret != DP_BRANCH_OUI_HEADER_SIZE) {
+ seq_printf(m, "branch oui read failed\n");
+ goto out;
+ }
+@@ -5287,7 +5288,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm
+ mst_state = drm_atomic_get_mst_topology_state(state, mgr);
+
+ if (IS_ERR(mst_state))
+- return -EINVAL;
++ return PTR_ERR(mst_state);
+
+ list_for_each_entry(pos, &mst_state->vcpis, next) {
+
+diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
+index 7a5097467ba5c..6f1791613757b 100644
+--- a/drivers/gpu/drm/drm_drv.c
++++ b/drivers/gpu/drm/drm_drv.c
+@@ -581,6 +581,7 @@ static int drm_dev_init(struct drm_device *dev,
+ const struct drm_driver *driver,
+ struct device *parent)
+ {
++ struct inode *inode;
+ int ret;
+
+ if (!drm_core_init_complete) {
+@@ -613,17 +614,19 @@ static int drm_dev_init(struct drm_device *dev,
+ mutex_init(&dev->clientlist_mutex);
+ mutex_init(&dev->master_mutex);
+
+- ret = drmm_add_action(dev, drm_dev_init_release, NULL);
++ ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
+ if (ret)
+ return ret;
+
+- dev->anon_inode = drm_fs_inode_new();
+- if (IS_ERR(dev->anon_inode)) {
+- ret = PTR_ERR(dev->anon_inode);
++ inode = drm_fs_inode_new();
++ if (IS_ERR(inode)) {
++ ret = PTR_ERR(inode);
+ DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret);
+ goto err;
+ }
+
++ dev->anon_inode = inode;
++
+ if (drm_core_check_feature(dev, DRIVER_RENDER)) {
+ ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
+ if (ret)
+diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
+index ea9a79bc95839..720956893b56c 100644
+--- a/drivers/gpu/drm/drm_edid.c
++++ b/drivers/gpu/drm/drm_edid.c
+@@ -1994,9 +1994,6 @@ struct edid *drm_do_get_edid(struct drm_connector *connector,
+
+ connector_bad_edid(connector, edid, edid[0x7e] + 1);
+
+- edid[EDID_LENGTH-1] += edid[0x7e] - valid_extensions;
+- edid[0x7e] = valid_extensions;
+-
+ new = kmalloc_array(valid_extensions + 1, EDID_LENGTH,
+ GFP_KERNEL);
+ if (!new)
+@@ -2013,6 +2010,9 @@ struct edid *drm_do_get_edid(struct drm_connector *connector,
+ base += EDID_LENGTH;
+ }
+
++ new[EDID_LENGTH - 1] += new[0x7e] - valid_extensions;
++ new[0x7e] = valid_extensions;
++
+ kfree(edid);
+ edid = new;
+ }
+@@ -4776,7 +4776,8 @@ bool drm_detect_monitor_audio(struct edid *edid)
+ if (!edid_ext)
+ goto end;
+
+- has_audio = ((edid_ext[3] & EDID_BASIC_AUDIO) != 0);
++ has_audio = (edid_ext[0] == CEA_EXT &&
++ (edid_ext[3] & EDID_BASIC_AUDIO) != 0);
+
+ if (has_audio) {
+ DRM_DEBUG_KMS("Monitor has basic audio support\n");
+@@ -4941,7 +4942,8 @@ static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector,
+ else if (hf_vsdb[11] & DRM_EDID_DSC_10BPC)
+ hdmi_dsc->bpc_supported = 10;
+ else
+- hdmi_dsc->bpc_supported = 0;
++ /* Supports min 8 BPC if DSC 1.2 is supported*/
++ hdmi_dsc->bpc_supported = 8;
+
+ dsc_max_frl_rate = (hf_vsdb[12] & DRM_EDID_DSC_MAX_FRL_RATE_MASK) >> 4;
+ drm_get_max_frl_rate(dsc_max_frl_rate, &hdmi_dsc->max_lanes,
+@@ -5003,21 +5005,21 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector,
+
+ if (hdmi[6] & DRM_EDID_HDMI_DC_30) {
+ dc_bpc = 10;
+- info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_30;
++ info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_30;
+ DRM_DEBUG("%s: HDMI sink does deep color 30.\n",
+ connector->name);
+ }
+
+ if (hdmi[6] & DRM_EDID_HDMI_DC_36) {
+ dc_bpc = 12;
+- info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_36;
++ info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_36;
+ DRM_DEBUG("%s: HDMI sink does deep color 36.\n",
+ connector->name);
+ }
+
+ if (hdmi[6] & DRM_EDID_HDMI_DC_48) {
+ dc_bpc = 16;
+- info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_48;
++ info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_48;
+ DRM_DEBUG("%s: HDMI sink does deep color 48.\n",
+ connector->name);
+ }
+@@ -5032,16 +5034,9 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector,
+ connector->name, dc_bpc);
+ info->bpc = dc_bpc;
+
+- /*
+- * Deep color support mandates RGB444 support for all video
+- * modes and forbids YCRCB422 support for all video modes per
+- * HDMI 1.3 spec.
+- */
+- info->color_formats = DRM_COLOR_FORMAT_RGB444;
+-
+ /* YCRCB444 is optional according to spec. */
+ if (hdmi[6] & DRM_EDID_HDMI_DC_Y444) {
+- info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
++ info->edid_hdmi_ycbcr444_dc_modes = info->edid_hdmi_rgb444_dc_modes;
+ DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n",
+ connector->name);
+ }
+@@ -5205,6 +5200,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi
+ if (!(edid->input & DRM_EDID_INPUT_DIGITAL))
+ return quirks;
+
++ info->color_formats |= DRM_COLOR_FORMAT_RGB444;
+ drm_parse_cea_ext(connector, edid);
+
+ /*
+@@ -5253,7 +5249,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi
+ DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n",
+ connector->name, info->bpc);
+
+- info->color_formats |= DRM_COLOR_FORMAT_RGB444;
+ if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444)
+ info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
+ if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422)
+@@ -5603,8 +5598,6 @@ static u8 drm_mode_hdmi_vic(const struct drm_connector *connector,
+ static u8 drm_mode_cea_vic(const struct drm_connector *connector,
+ const struct drm_display_mode *mode)
+ {
+- u8 vic;
+-
+ /*
+ * HDMI spec says if a mode is found in HDMI 1.4b 4K modes
+ * we should send its VIC in vendor infoframes, else send the
+@@ -5614,13 +5607,18 @@ static u8 drm_mode_cea_vic(const struct drm_connector *connector,
+ if (drm_mode_hdmi_vic(connector, mode))
+ return 0;
+
+- vic = drm_match_cea_mode(mode);
++ return drm_match_cea_mode(mode);
++}
+
+- /*
+- * HDMI 1.4 VIC range: 1 <= VIC <= 64 (CEA-861-D) but
+- * HDMI 2.0 VIC range: 1 <= VIC <= 107 (CEA-861-F). So we
+- * have to make sure we dont break HDMI 1.4 sinks.
+- */
++/*
++ * Avoid sending VICs defined in HDMI 2.0 in AVI infoframes to sinks that
++ * conform to HDMI 1.4.
++ *
++ * HDMI 1.4 (CTA-861-D) VIC range: [1..64]
++ * HDMI 2.0 (CTA-861-F) VIC range: [1..107]
++ */
++static u8 vic_for_avi_infoframe(const struct drm_connector *connector, u8 vic)
++{
+ if (!is_hdmi2_sink(connector) && vic > 64)
+ return 0;
+
+@@ -5696,7 +5694,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
+ picture_aspect = HDMI_PICTURE_ASPECT_NONE;
+ }
+
+- frame->video_code = vic;
++ frame->video_code = vic_for_avi_infoframe(connector, vic);
+ frame->picture_aspect = picture_aspect;
+ frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
+ frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN;
+diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
+index 8e7a124d6c5a3..3d9c0444df406 100644
+--- a/drivers/gpu/drm/drm_fb_helper.c
++++ b/drivers/gpu/drm/drm_fb_helper.c
+@@ -1327,6 +1327,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
+ return -EINVAL;
+ }
+
++ var->xres_virtual = fb->width;
++ var->yres_virtual = fb->height;
++
+ /*
+ * Workaround for SDL 1.2, which is known to be setting all pixel format
+ * fields values to zero in some cases. We treat this situation as a
+@@ -1743,7 +1746,13 @@ void drm_fb_helper_fill_info(struct fb_info *info,
+ sizes->fb_width, sizes->fb_height);
+
+ info->par = fb_helper;
+- snprintf(info->fix.id, sizeof(info->fix.id), "%s",
++ /*
++ * The DRM drivers fbdev emulation device name can be confusing if the
++ * driver name also has a "drm" suffix on it. Leading to names such as
++ * "simpledrmdrmfb" in /proc/fb. Unfortunately, it's an uAPI and can't
++ * be changed due user-space tools (e.g: pm-utils) matching against it.
++ */
++ snprintf(info->fix.id, sizeof(info->fix.id), "%sdrmfb",
+ fb_helper->dev->driver->name);
+
+ }
+@@ -2340,6 +2349,7 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
+ fbi->fbops = &drm_fbdev_fb_ops;
+ fbi->screen_size = fb->height * fb->pitches[0];
+ fbi->fix.smem_len = fbi->screen_size;
++ fbi->flags = FBINFO_DEFAULT;
+
+ drm_fb_helper_fill_info(fbi, fb_helper, sizes);
+
+@@ -2347,19 +2357,21 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
+ fbi->screen_buffer = vzalloc(fbi->screen_size);
+ if (!fbi->screen_buffer)
+ return -ENOMEM;
++ fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST;
+
+ fbi->fbdefio = &drm_fbdev_defio;
+-
+ fb_deferred_io_init(fbi);
+ } else {
+ /* buffer is mapped for HW framebuffer */
+ ret = drm_client_buffer_vmap(fb_helper->buffer, &map);
+ if (ret)
+ return ret;
+- if (map.is_iomem)
++ if (map.is_iomem) {
+ fbi->screen_base = map.vaddr_iomem;
+- else
++ } else {
+ fbi->screen_buffer = map.vaddr;
++ fbi->flags |= FBINFO_VIRTFB;
++ }
+
+ /*
+ * Shamelessly leak the physical address to user-space. As
+diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
+index eda832f9200db..7940d948ffdcb 100644
+--- a/drivers/gpu/drm/drm_fourcc.c
++++ b/drivers/gpu/drm/drm_fourcc.c
+@@ -153,6 +153,10 @@ const struct drm_format_info *__drm_format_info(u32 format)
+ { .format = DRM_FORMAT_BGRA5551, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true },
+ { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 },
+ { .format = DRM_FORMAT_BGR565, .depth = 16, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 },
++#ifdef __BIG_ENDIAN
++ { .format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN, .depth = 15, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 },
++ { .format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN, .depth = 16, .num_planes = 1, .cpp = { 2, 0, 0 }, .hsub = 1, .vsub = 1 },
++#endif
+ { .format = DRM_FORMAT_RGB888, .depth = 24, .num_planes = 1, .cpp = { 3, 0, 0 }, .hsub = 1, .vsub = 1 },
+ { .format = DRM_FORMAT_BGR888, .depth = 24, .num_planes = 1, .cpp = { 3, 0, 0 }, .hsub = 1, .vsub = 1 },
+ { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1 },
+@@ -260,12 +264,15 @@ const struct drm_format_info *__drm_format_info(u32 format)
+ .vsub = 2, .is_yuv = true },
+ { .format = DRM_FORMAT_Q410, .depth = 0,
+ .num_planes = 3, .char_per_block = { 2, 2, 2 },
+- .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 0,
+- .vsub = 0, .is_yuv = true },
++ .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1,
++ .vsub = 1, .is_yuv = true },
+ { .format = DRM_FORMAT_Q401, .depth = 0,
+ .num_planes = 3, .char_per_block = { 2, 2, 2 },
+- .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 0,
+- .vsub = 0, .is_yuv = true },
++ .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1,
++ .vsub = 1, .is_yuv = true },
++ { .format = DRM_FORMAT_P030, .depth = 0, .num_planes = 2,
++ .char_per_block = { 4, 8, 0 }, .block_w = { 3, 3, 0 }, .block_h = { 1, 1, 0 },
++ .hsub = 2, .vsub = 2, .is_yuv = true},
+ };
+
+ unsigned int i;
+diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
+index 09c8200458594..dbd19a34b517b 100644
+--- a/drivers/gpu/drm/drm_gem.c
++++ b/drivers/gpu/drm/drm_gem.c
+@@ -167,21 +167,6 @@ void drm_gem_private_object_init(struct drm_device *dev,
+ }
+ EXPORT_SYMBOL(drm_gem_private_object_init);
+
+-static void
+-drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp)
+-{
+- /*
+- * Note: obj->dma_buf can't disappear as long as we still hold a
+- * handle reference in obj->handle_count.
+- */
+- mutex_lock(&filp->prime.lock);
+- if (obj->dma_buf) {
+- drm_prime_remove_buf_handle_locked(&filp->prime,
+- obj->dma_buf);
+- }
+- mutex_unlock(&filp->prime.lock);
+-}
+-
+ /**
+ * drm_gem_object_handle_free - release resources bound to userspace handles
+ * @obj: GEM object to clean up.
+@@ -252,7 +237,7 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
+ if (obj->funcs->close)
+ obj->funcs->close(obj, file_priv);
+
+- drm_gem_remove_prime_handles(obj, file_priv);
++ drm_prime_remove_buf_handle(&file_priv->prime, id);
+ drm_vma_node_revoke(&obj->vma_node, file_priv);
+
+ drm_gem_object_handle_put_unlocked(obj);
+@@ -1224,7 +1209,7 @@ retry:
+ ret = dma_resv_lock_slow_interruptible(obj->resv,
+ acquire_ctx);
+ if (ret) {
+- ww_acquire_done(acquire_ctx);
++ ww_acquire_fini(acquire_ctx);
+ return ret;
+ }
+ }
+@@ -1249,7 +1234,7 @@ retry:
+ goto retry;
+ }
+
+- ww_acquire_done(acquire_ctx);
++ ww_acquire_fini(acquire_ctx);
+ return ret;
+ }
+ }
+diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
+index d53388199f34c..6533efa840204 100644
+--- a/drivers/gpu/drm/drm_gem_cma_helper.c
++++ b/drivers/gpu/drm/drm_gem_cma_helper.c
+@@ -210,8 +210,13 @@ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj)
+ dma_buf_vunmap(gem_obj->import_attach->dmabuf, &map);
+ drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
+ } else if (cma_obj->vaddr) {
+- dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
+- cma_obj->vaddr, cma_obj->paddr);
++ if (cma_obj->map_noncoherent)
++ dma_free_noncoherent(gem_obj->dev->dev, cma_obj->base.size,
++ cma_obj->vaddr, cma_obj->paddr,
++ DMA_TO_DEVICE);
++ else
++ dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
++ cma_obj->vaddr, cma_obj->paddr);
+ }
+
+ drm_gem_object_release(gem_obj);
+@@ -510,6 +515,7 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+ */
+ vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node);
+ vma->vm_flags &= ~VM_PFNMAP;
++ vma->vm_flags |= VM_DONTEXPAND;
+
+ cma_obj = to_drm_gem_cma_obj(obj);
+
+diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
+index a61946374c826..54f1ab3071f98 100644
+--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
++++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
+@@ -22,17 +22,22 @@
+ *
+ * This library provides helpers for GEM objects backed by shmem buffers
+ * allocated using anonymous pageable memory.
++ *
++ * Functions that operate on the GEM object receive struct &drm_gem_shmem_object.
++ * For GEM callback helpers in struct &drm_gem_object functions, see likewise
++ * named functions with an _object_ infix (e.g., drm_gem_shmem_object_vmap() wraps
++ * drm_gem_shmem_vmap()). These helpers perform the necessary type conversion.
+ */
+
+ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = {
+- .free = drm_gem_shmem_free_object,
+- .print_info = drm_gem_shmem_print_info,
+- .pin = drm_gem_shmem_pin,
+- .unpin = drm_gem_shmem_unpin,
+- .get_sg_table = drm_gem_shmem_get_sg_table,
+- .vmap = drm_gem_shmem_vmap,
+- .vunmap = drm_gem_shmem_vunmap,
+- .mmap = drm_gem_shmem_mmap,
++ .free = drm_gem_shmem_object_free,
++ .print_info = drm_gem_shmem_object_print_info,
++ .pin = drm_gem_shmem_object_pin,
++ .unpin = drm_gem_shmem_object_unpin,
++ .get_sg_table = drm_gem_shmem_object_get_sg_table,
++ .vmap = drm_gem_shmem_object_vmap,
++ .vunmap = drm_gem_shmem_object_vunmap,
++ .mmap = drm_gem_shmem_object_mmap,
+ };
+
+ static struct drm_gem_shmem_object *
+@@ -112,16 +117,15 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t
+ EXPORT_SYMBOL_GPL(drm_gem_shmem_create);
+
+ /**
+- * drm_gem_shmem_free_object - Free resources associated with a shmem GEM object
+- * @obj: GEM object to free
++ * drm_gem_shmem_free - Free resources associated with a shmem GEM object
++ * @shmem: shmem GEM object to free
+ *
+ * This function cleans up the GEM object state and frees the memory used to
+- * store the object itself. It should be used to implement
+- * &drm_gem_object_funcs.free.
++ * store the object itself.
+ */
+-void drm_gem_shmem_free_object(struct drm_gem_object *obj)
++void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++ struct drm_gem_object *obj = &shmem->base;
+
+ WARN_ON(shmem->vmap_use_count);
+
+@@ -145,7 +149,7 @@ void drm_gem_shmem_free_object(struct drm_gem_object *obj)
+ mutex_destroy(&shmem->vmap_lock);
+ kfree(shmem);
+ }
+-EXPORT_SYMBOL_GPL(drm_gem_shmem_free_object);
++EXPORT_SYMBOL_GPL(drm_gem_shmem_free);
+
+ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem)
+ {
+@@ -225,19 +229,16 @@ EXPORT_SYMBOL(drm_gem_shmem_put_pages);
+
+ /**
+ * drm_gem_shmem_pin - Pin backing pages for a shmem GEM object
+- * @obj: GEM object
++ * @shmem: shmem GEM object
+ *
+ * This function makes sure the backing pages are pinned in memory while the
+- * buffer is exported. It should only be used to implement
+- * &drm_gem_object_funcs.pin.
++ * buffer is exported.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+-int drm_gem_shmem_pin(struct drm_gem_object *obj)
++int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+-
+ WARN_ON(shmem->base.import_attach);
+
+ return drm_gem_shmem_get_pages(shmem);
+@@ -246,15 +247,13 @@ EXPORT_SYMBOL(drm_gem_shmem_pin);
+
+ /**
+ * drm_gem_shmem_unpin - Unpin backing pages for a shmem GEM object
+- * @obj: GEM object
++ * @shmem: shmem GEM object
+ *
+ * This function removes the requirement that the backing pages are pinned in
+- * memory. It should only be used to implement &drm_gem_object_funcs.unpin.
++ * memory.
+ */
+-void drm_gem_shmem_unpin(struct drm_gem_object *obj)
++void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+-
+ WARN_ON(shmem->base.import_attach);
+
+ drm_gem_shmem_put_pages(shmem);
+@@ -275,6 +274,7 @@ static int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, struct
+ ret = dma_buf_vmap(obj->import_attach->dmabuf, map);
+ if (!ret) {
+ if (WARN_ON(map->is_iomem)) {
++ dma_buf_vunmap(obj->import_attach->dmabuf, map);
+ ret = -EIO;
+ goto err_put_pages;
+ }
+@@ -320,20 +320,16 @@ err_zero_use:
+ * store.
+ *
+ * This function makes sure that a contiguous kernel virtual address mapping
+- * exists for the buffer backing the shmem GEM object.
+- *
+- * This function can be used to implement &drm_gem_object_funcs.vmap. But it can
+- * also be called by drivers directly, in which case it will hide the
+- * differences between dma-buf imported and natively allocated objects.
++ * exists for the buffer backing the shmem GEM object. It hides the differences
++ * between dma-buf imported and natively allocated objects.
+ *
+ * Acquired mappings should be cleaned up by calling drm_gem_shmem_vunmap().
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+-int drm_gem_shmem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
++int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+ int ret;
+
+ ret = mutex_lock_interruptible(&shmem->vmap_lock);
+@@ -376,21 +372,18 @@ static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
+ * drm_gem_shmem_vmap(). The mapping is only removed when the use count drops to
+ * zero.
+ *
+- * This function can be used to implement &drm_gem_object_funcs.vmap. But it can
+- * also be called by drivers directly, in which case it will hide the
+- * differences between dma-buf imported and natively allocated objects.
++ * This function hides the differences between dma-buf imported and natively
++ * allocated objects.
+ */
+-void drm_gem_shmem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
++void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+-
+ mutex_lock(&shmem->vmap_lock);
+ drm_gem_shmem_vunmap_locked(shmem, map);
+ mutex_unlock(&shmem->vmap_lock);
+ }
+ EXPORT_SYMBOL(drm_gem_shmem_vunmap);
+
+-struct drm_gem_shmem_object *
++static struct drm_gem_shmem_object *
+ drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
+ struct drm_device *dev, size_t size,
+ uint32_t *handle)
+@@ -414,15 +407,12 @@ drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
+
+ return shmem;
+ }
+-EXPORT_SYMBOL(drm_gem_shmem_create_with_handle);
+
+ /* Update madvise status, returns true if not purged, else
+ * false or -errno.
+ */
+-int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv)
++int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+-
+ mutex_lock(&shmem->pages_lock);
+
+ if (shmem->madv >= 0)
+@@ -436,14 +426,14 @@ int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv)
+ }
+ EXPORT_SYMBOL(drm_gem_shmem_madvise);
+
+-void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
++void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem)
+ {
++ struct drm_gem_object *obj = &shmem->base;
+ struct drm_device *dev = obj->dev;
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+ WARN_ON(!drm_gem_shmem_is_purgeable(shmem));
+
+- dma_unmap_sgtable(obj->dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0);
++ dma_unmap_sgtable(dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0);
+ sg_free_table(shmem->sgt);
+ kfree(shmem->sgt);
+ shmem->sgt = NULL;
+@@ -462,18 +452,15 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
+ */
+ shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1);
+
+- invalidate_mapping_pages(file_inode(obj->filp)->i_mapping,
+- 0, (loff_t)-1);
++ invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1);
+ }
+ EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
+
+-bool drm_gem_shmem_purge(struct drm_gem_object *obj)
++bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+-
+ if (!mutex_trylock(&shmem->pages_lock))
+ return false;
+- drm_gem_shmem_purge_locked(obj);
++ drm_gem_shmem_purge_locked(shmem);
+ mutex_unlock(&shmem->pages_lock);
+
+ return true;
+@@ -554,12 +541,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
+ {
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+- int ret;
+
+ WARN_ON(shmem->base.import_attach);
+
+- ret = drm_gem_shmem_get_pages(shmem);
+- WARN_ON_ONCE(ret != 0);
++ mutex_lock(&shmem->pages_lock);
++
++ /*
++ * We should have already pinned the pages when the buffer was first
++ * mmap'd, vm_open() just grabs an additional reference for the new
++ * mm the vma is getting copied into (ie. on fork()).
++ */
++ if (!WARN_ON_ONCE(!shmem->pages_use_count))
++ shmem->pages_use_count++;
++
++ mutex_unlock(&shmem->pages_lock);
+
+ drm_gem_vm_open(vma);
+ }
+@@ -581,37 +576,41 @@ static const struct vm_operations_struct drm_gem_shmem_vm_ops = {
+
+ /**
+ * drm_gem_shmem_mmap - Memory-map a shmem GEM object
+- * @obj: gem object
++ * @shmem: shmem GEM object
+ * @vma: VMA for the area to be mapped
+ *
+ * This function implements an augmented version of the GEM DRM file mmap
+- * operation for shmem objects. Drivers which employ the shmem helpers should
+- * use this function as their &drm_gem_object_funcs.mmap handler.
++ * operation for shmem objects.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+-int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
++int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma)
+ {
+- struct drm_gem_shmem_object *shmem;
++ struct drm_gem_object *obj = &shmem->base;
+ int ret;
+
+ if (obj->import_attach) {
+- /* Drop the reference drm_gem_mmap_obj() acquired.*/
+- drm_gem_object_put(obj);
++ /* Reset both vm_ops and vm_private_data, so we don't end up with
++ * vm_ops pointing to our implementation if the dma-buf backend
++ * doesn't set those fields.
++ */
+ vma->vm_private_data = NULL;
++ vma->vm_ops = NULL;
+
+- return dma_buf_mmap(obj->dma_buf, vma, 0);
+- }
++ ret = dma_buf_mmap(obj->dma_buf, vma, 0);
+
+- shmem = to_drm_gem_shmem_obj(obj);
++ /* Drop the reference drm_gem_mmap_obj() acquired.*/
++ if (!ret)
++ drm_gem_object_put(obj);
+
+- ret = drm_gem_shmem_get_pages(shmem);
+- if (ret) {
+- drm_gem_vm_close(vma);
+ return ret;
+ }
+
++ ret = drm_gem_shmem_get_pages(shmem);
++ if (ret)
++ return ret;
++
+ vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ if (shmem->map_wc)
+@@ -624,17 +623,13 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_mmap);
+
+ /**
+ * drm_gem_shmem_print_info() - Print &drm_gem_shmem_object info for debugfs
++ * @shmem: shmem GEM object
+ * @p: DRM printer
+ * @indent: Tab indentation level
+- * @obj: GEM object
+- *
+- * This implements the &drm_gem_object_funcs.info callback.
+ */
+-void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent,
+- const struct drm_gem_object *obj)
++void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem,
++ struct drm_printer *p, unsigned int indent)
+ {
+- const struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+-
+ drm_printf_indent(p, indent, "pages_use_count=%u\n", shmem->pages_use_count);
+ drm_printf_indent(p, indent, "vmap_use_count=%u\n", shmem->vmap_use_count);
+ drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr);
+@@ -644,12 +639,10 @@ EXPORT_SYMBOL(drm_gem_shmem_print_info);
+ /**
+ * drm_gem_shmem_get_sg_table - Provide a scatter/gather table of pinned
+ * pages for a shmem GEM object
+- * @obj: GEM object
++ * @shmem: shmem GEM object
+ *
+ * This function exports a scatter/gather table suitable for PRIME usage by
+- * calling the standard DMA mapping API. Drivers should not call this function
+- * directly, instead it should only be used as an implementation for
+- * &drm_gem_object_funcs.get_sg_table.
++ * calling the standard DMA mapping API.
+ *
+ * Drivers who need to acquire an scatter/gather table for objects need to call
+ * drm_gem_shmem_get_pages_sgt() instead.
+@@ -657,9 +650,9 @@ EXPORT_SYMBOL(drm_gem_shmem_print_info);
+ * Returns:
+ * A pointer to the scatter/gather table of pinned pages or NULL on failure.
+ */
+-struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj)
++struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem)
+ {
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++ struct drm_gem_object *obj = &shmem->base;
+
+ WARN_ON(shmem->base.import_attach);
+
+@@ -667,26 +660,10 @@ struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj)
+ }
+ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
+
+-/**
+- * drm_gem_shmem_get_pages_sgt - Pin pages, dma map them, and return a
+- * scatter/gather table for a shmem GEM object.
+- * @obj: GEM object
+- *
+- * This function returns a scatter/gather table suitable for driver usage. If
+- * the sg table doesn't exist, the pages are pinned, dma-mapped, and a sg
+- * table created.
+- *
+- * This is the main function for drivers to get at backing storage, and it hides
+- * and difference between dma-buf imported and natively allocated objects.
+- * drm_gem_shmem_get_sg_table() should not be directly called by drivers.
+- *
+- * Returns:
+- * A pointer to the scatter/gather table of pinned pages or errno on failure.
+- */
+-struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj)
++static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
+ {
++ struct drm_gem_object *obj = &shmem->base;
+ int ret;
+- struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+ struct sg_table *sgt;
+
+ if (shmem->sgt)
+@@ -694,11 +671,11 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj)
+
+ WARN_ON(obj->import_attach);
+
+- ret = drm_gem_shmem_get_pages(shmem);
++ ret = drm_gem_shmem_get_pages_locked(shmem);
+ if (ret)
+ return ERR_PTR(ret);
+
+- sgt = drm_gem_shmem_get_sg_table(&shmem->base);
++ sgt = drm_gem_shmem_get_sg_table(shmem);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ goto err_put_pages;
+@@ -716,9 +693,39 @@ err_free_sgt:
+ sg_free_table(sgt);
+ kfree(sgt);
+ err_put_pages:
+- drm_gem_shmem_put_pages(shmem);
++ drm_gem_shmem_put_pages_locked(shmem);
+ return ERR_PTR(ret);
+ }
++
++/**
++ * drm_gem_shmem_get_pages_sgt - Pin pages, dma map them, and return a
++ * scatter/gather table for a shmem GEM object.
++ * @shmem: shmem GEM object
++ *
++ * This function returns a scatter/gather table suitable for driver usage. If
++ * the sg table doesn't exist, the pages are pinned, dma-mapped, and a sg
++ * table created.
++ *
++ * This is the main function for drivers to get at backing storage, and it hides
++ * and difference between dma-buf imported and natively allocated objects.
++ * drm_gem_shmem_get_sg_table() should not be directly called by drivers.
++ *
++ * Returns:
++ * A pointer to the scatter/gather table of pinned pages or errno on failure.
++ */
++struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem)
++{
++ int ret;
++ struct sg_table *sgt;
++
++ ret = mutex_lock_interruptible(&shmem->pages_lock);
++ if (ret)
++ return ERR_PTR(ret);
++ sgt = drm_gem_shmem_get_pages_sgt_locked(shmem);
++ mutex_unlock(&shmem->pages_lock);
++
++ return sgt;
++}
+ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_pages_sgt);
+
+ /**
+diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c
+index ecf3d2a54a98c..759c65bfd2845 100644
+--- a/drivers/gpu/drm/drm_gem_ttm_helper.c
++++ b/drivers/gpu/drm/drm_gem_ttm_helper.c
+@@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem,
+ struct dma_buf_map *map)
+ {
+ struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
++ int ret;
++
++ dma_resv_lock(gem->resv, NULL);
++ ret = ttm_bo_vmap(bo, map);
++ dma_resv_unlock(gem->resv);
+
+- return ttm_bo_vmap(bo, map);
++ return ret;
+ }
+ EXPORT_SYMBOL(drm_gem_ttm_vmap);
+
+@@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem,
+ {
+ struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+
++ dma_resv_lock(gem->resv, NULL);
+ ttm_bo_vunmap(bo, map);
++ dma_resv_unlock(gem->resv);
+ }
+ EXPORT_SYMBOL(drm_gem_ttm_vunmap);
+
+diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
+index 43cf7e887d1a5..aaf4f7dcc581d 100644
+--- a/drivers/gpu/drm/drm_gem_vram_helper.c
++++ b/drivers/gpu/drm/drm_gem_vram_helper.c
+@@ -43,7 +43,7 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs;
+ * the frame's scanout buffer or the cursor image. If there's no more space
+ * left in VRAM, inactive GEM objects can be moved to system memory.
+ *
+- * To initialize the VRAM helper library call drmm_vram_helper_alloc_mm().
++ * To initialize the VRAM helper library call drmm_vram_helper_init().
+ * The function allocates and initializes an instance of &struct drm_vram_mm
+ * in &struct drm_device.vram_mm . Use &DRM_GEM_VRAM_DRIVER to initialize
+ * &struct drm_driver and &DRM_VRAM_MM_FILE_OPERATIONS to initialize
+@@ -71,7 +71,7 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs;
+ * // setup device, vram base and size
+ * // ...
+ *
+- * ret = drmm_vram_helper_alloc_mm(dev, vram_base, vram_size);
++ * ret = drmm_vram_helper_init(dev, vram_base, vram_size);
+ * if (ret)
+ * return ret;
+ * return 0;
+@@ -84,7 +84,7 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs;
+ * to userspace.
+ *
+ * You don't have to clean up the instance of VRAM MM.
+- * drmm_vram_helper_alloc_mm() is a managed interface that installs a
++ * drmm_vram_helper_init() is a managed interface that installs a
+ * clean-up handler to run during the DRM device's release.
+ *
+ * For drawing or scanout operations, rsp. buffer objects have to be pinned
+diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
+index 17f3548c8ed25..f97a0875b9a12 100644
+--- a/drivers/gpu/drm/drm_internal.h
++++ b/drivers/gpu/drm/drm_internal.h
+@@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
+
+ void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
+ void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
+-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
+- struct dma_buf *dma_buf);
++void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
++ uint32_t handle);
+
+ /* drm_drv.c */
+ struct drm_minor *drm_minor_acquire(unsigned int minor_id);
+@@ -104,7 +104,8 @@ static inline void drm_vblank_flush_worker(struct drm_vblank_crtc *vblank)
+
+ static inline void drm_vblank_destroy_worker(struct drm_vblank_crtc *vblank)
+ {
+- kthread_destroy_worker(vblank->worker);
++ if (vblank->worker)
++ kthread_destroy_worker(vblank->worker);
+ }
+
+ int drm_vblank_worker_init(struct drm_vblank_crtc *vblank);
+diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
+index be4a52dc4d6fa..fb5e6f86dea20 100644
+--- a/drivers/gpu/drm/drm_ioctl.c
++++ b/drivers/gpu/drm/drm_ioctl.c
+@@ -472,7 +472,13 @@ EXPORT_SYMBOL(drm_invalid_op);
+ */
+ static int drm_copy_field(char __user *buf, size_t *buf_len, const char *value)
+ {
+- int len;
++ size_t len;
++
++ /* don't attempt to copy a NULL pointer */
++ if (WARN_ONCE(!value, "BUG: the value to copy was not set!")) {
++ *buf_len = 0;
++ return 0;
++ }
+
+ /* don't overflow userbuf */
+ len = strlen(value);
+diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
+index 71b646c4131fc..00d470ff071d3 100644
+--- a/drivers/gpu/drm/drm_mipi_dbi.c
++++ b/drivers/gpu/drm/drm_mipi_dbi.c
+@@ -1183,6 +1183,13 @@ int mipi_dbi_spi_transfer(struct spi_device *spi, u32 speed_hz,
+ size_t chunk;
+ int ret;
+
++ /* In __spi_validate, there's a validation that no partial transfers
++ * are accepted (xfer->len % w_size must be zero).
++ * Here we align max_chunk to multiple of 2 (16bits),
++ * to prevent transfers from being rejected.
++ */
++ max_chunk = ALIGN_DOWN(max_chunk, 2);
++
+ spi_message_init_with_transfers(&m, &tr, 1);
+
+ while (len) {
+diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
+index 5dd475e829950..0c806e99e8690 100644
+--- a/drivers/gpu/drm/drm_mipi_dsi.c
++++ b/drivers/gpu/drm/drm_mipi_dsi.c
+@@ -221,7 +221,7 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host,
+ return dsi;
+ }
+
+- dsi->dev.of_node = info->node;
++ device_set_node(&dsi->dev, of_fwnode_handle(info->node));
+ dsi->channel = info->channel;
+ strlcpy(dsi->name, info->type, sizeof(dsi->name));
+
+@@ -300,6 +300,7 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv)
+ {
+ struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
+
++ mipi_dsi_detach(dsi);
+ mipi_dsi_device_unregister(dsi);
+
+ return 0;
+@@ -1142,6 +1143,58 @@ int mipi_dsi_dcs_get_display_brightness(struct mipi_dsi_device *dsi,
+ }
+ EXPORT_SYMBOL(mipi_dsi_dcs_get_display_brightness);
+
++/**
++ * mipi_dsi_dcs_set_display_brightness_large() - sets the 16-bit brightness value
++ * of the display
++ * @dsi: DSI peripheral device
++ * @brightness: brightness value
++ *
++ * Return: 0 on success or a negative error code on failure.
++ */
++int mipi_dsi_dcs_set_display_brightness_large(struct mipi_dsi_device *dsi,
++ u16 brightness)
++{
++ u8 payload[2] = { brightness >> 8, brightness & 0xff };
++ ssize_t err;
++
++ err = mipi_dsi_dcs_write(dsi, MIPI_DCS_SET_DISPLAY_BRIGHTNESS,
++ payload, sizeof(payload));
++ if (err < 0)
++ return err;
++
++ return 0;
++}
++EXPORT_SYMBOL(mipi_dsi_dcs_set_display_brightness_large);
++
++/**
++ * mipi_dsi_dcs_get_display_brightness_large() - gets the current 16-bit
++ * brightness value of the display
++ * @dsi: DSI peripheral device
++ * @brightness: brightness value
++ *
++ * Return: 0 on success or a negative error code on failure.
++ */
++int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
++ u16 *brightness)
++{
++ u8 brightness_be[2];
++ ssize_t err;
++
++ err = mipi_dsi_dcs_read(dsi, MIPI_DCS_GET_DISPLAY_BRIGHTNESS,
++ brightness_be, sizeof(brightness_be));
++ if (err <= 0) {
++ if (err == 0)
++ err = -ENODATA;
++
++ return err;
++ }
++
++ *brightness = (brightness_be[0] << 8) | brightness_be[1];
++
++ return 0;
++}
++EXPORT_SYMBOL(mipi_dsi_dcs_get_display_brightness_large);
++
+ static int mipi_dsi_drv_probe(struct device *dev)
+ {
+ struct mipi_dsi_driver *drv = to_mipi_dsi_driver(dev->driver);
+diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
+index 37b4b9f0e468a..1bd4f0b2cc4d3 100644
+--- a/drivers/gpu/drm/drm_mode_config.c
++++ b/drivers/gpu/drm/drm_mode_config.c
+@@ -398,6 +398,8 @@ static void drm_mode_config_init_release(struct drm_device *dev, void *ptr)
+ */
+ int drmm_mode_config_init(struct drm_device *dev)
+ {
++ int ret;
++
+ mutex_init(&dev->mode_config.mutex);
+ drm_modeset_lock_init(&dev->mode_config.connection_mutex);
+ mutex_init(&dev->mode_config.idr_mutex);
+@@ -419,7 +421,11 @@ int drmm_mode_config_init(struct drm_device *dev)
+ init_llist_head(&dev->mode_config.connector_free_list);
+ INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn);
+
+- drm_mode_create_standard_properties(dev);
++ ret = drm_mode_create_standard_properties(dev);
++ if (ret) {
++ drm_mode_config_cleanup(dev);
++ return ret;
++ }
+
+ /* Just to be sure */
+ dev->mode_config.num_fb = 0;
+diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
+index 997b8827fed27..37c34146eea83 100644
+--- a/drivers/gpu/drm/drm_of.c
++++ b/drivers/gpu/drm/drm_of.c
+@@ -231,6 +231,9 @@ EXPORT_SYMBOL_GPL(drm_of_encoder_active_endpoint);
+ * return either the associated struct drm_panel or drm_bridge device. Either
+ * @panel or @bridge must not be NULL.
+ *
++ * This function is deprecated and should not be used in new drivers. Use
++ * devm_drm_of_get_bridge() instead.
++ *
+ * Returns zero if successful, or one of the standard error codes if it fails.
+ */
+ int drm_of_find_panel_or_bridge(const struct device_node *np,
+diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
+index e1b2ce4921ae7..6106fa7c43028 100644
+--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
++++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
+@@ -109,6 +109,18 @@ static const struct drm_dmi_panel_orientation_data lcd1200x1920_rightside_up = {
+ .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
+ };
+
++static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = {
++ .width = 1280,
++ .height = 1920,
++ .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
++};
++
++static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = {
++ .width = 1600,
++ .height = 2560,
++ .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP,
++};
++
+ static const struct dmi_system_id orientation_data[] = {
+ { /* Acer One 10 (S1003) */
+ .matches = {
+@@ -116,6 +128,18 @@ static const struct dmi_system_id orientation_data[] = {
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "One S1003"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
++ }, { /* Acer Switch V 10 (SW5-017) */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"),
++ },
++ .driver_data = (void *)&lcd800x1280_rightside_up,
++ }, { /* Anbernic Win600 */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Anbernic"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Win600"),
++ },
++ .driver_data = (void *)&lcd720x1280_rightside_up,
+ }, { /* Asus T100HA */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+@@ -154,6 +178,12 @@ static const struct dmi_system_id orientation_data[] = {
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"),
+ },
+ .driver_data = (void *)&lcd720x1280_rightside_up,
++ }, { /* GPD Win Max */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"),
++ },
++ .driver_data = (void *)&lcd800x1280_rightside_up,
+ }, { /*
+ * GPD Pocket, note that the the DMI data is less generic then
+ * it seems, devices with a board-vendor of "AMI Corporation"
+@@ -205,6 +235,13 @@ static const struct dmi_system_id orientation_data[] = {
+ DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"),
+ },
+ .driver_data = (void *)&itworks_tw891,
++ }, { /* KD Kurio Smart C15200 2-in-1 */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "KD Interactive"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Kurio Smart"),
++ DMI_EXACT_MATCH(DMI_BOARD_NAME, "KDM960BCP"),
++ },
++ .driver_data = (void *)&lcd800x1280_rightside_up,
+ }, { /*
+ * Lenovo Ideapad Miix 310 laptop, only some production batches
+ * have a portrait screen, the resolution checks makes the quirk
+@@ -223,12 +260,42 @@ static const struct dmi_system_id orientation_data[] = {
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
+- }, { /* Lenovo Ideapad D330 */
++ }, { /* Lenovo Ideapad D330-10IGM (HD) */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81H3"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"),
+ },
++ .driver_data = (void *)&lcd800x1280_rightside_up,
++ }, { /* Lenovo Ideapad D330-10IGM (FHD) */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"),
++ },
++ .driver_data = (void *)&lcd1200x1920_rightside_up,
++ }, { /* Lenovo Ideapad D330-10IGL (HD) */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGL"),
++ },
++ .driver_data = (void *)&lcd800x1280_rightside_up,
++ }, { /* Lenovo IdeaPad Duet 3 10IGL5 */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
++ },
++ .driver_data = (void *)&lcd1200x1920_rightside_up,
++ }, { /* Lenovo Yoga Book X90F / X90L */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
++ },
++ .driver_data = (void *)&lcd1200x1920_rightside_up,
++ }, { /* Lenovo Yoga Book X91F / X91L */
++ .matches = {
++ /* Non exact match to match F + L versions */
++ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
++ },
+ .driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* OneGX1 Pro */
+ .matches = {
+@@ -237,6 +304,25 @@ static const struct dmi_system_id orientation_data[] = {
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"),
+ },
+ .driver_data = (void *)&onegx1_pro,
++ }, { /* OneXPlayer */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"),
++ },
++ .driver_data = (void *)&lcd1600x2560_leftside_up,
++ }, { /* Samsung GalaxyBook 10.6 */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"),
++ },
++ .driver_data = (void *)&lcd1280x1920_rightside_up,
++ }, { /* Valve Steam Deck */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"),
++ },
++ .driver_data = (void *)&lcd800x1280_rightside_up,
+ }, { /* VIOS LTH17 */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"),
+diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
+index 82afb854141b2..fd0bf90fb4c28 100644
+--- a/drivers/gpu/drm/drm_plane.c
++++ b/drivers/gpu/drm/drm_plane.c
+@@ -249,6 +249,13 @@ static int __drm_universal_plane_init(struct drm_device *dev,
+ if (WARN_ON(config->num_total_plane >= 32))
+ return -EINVAL;
+
++ /*
++ * First driver to need more than 64 formats needs to fix this. Each
++ * format is encoded as a bit and the current code only supports a u64.
++ */
++ if (WARN_ON(format_count > 64))
++ return -EINVAL;
++
+ WARN_ON(drm_drv_uses_atomic_modeset(dev) &&
+ (!funcs->atomic_destroy_state ||
+ !funcs->atomic_duplicate_state));
+@@ -270,13 +277,6 @@ static int __drm_universal_plane_init(struct drm_device *dev,
+ return -ENOMEM;
+ }
+
+- /*
+- * First driver to need more than 64 formats needs to fix this. Each
+- * format is encoded as a bit and the current code only supports a u64.
+- */
+- if (WARN_ON(format_count > 64))
+- return -EINVAL;
+-
+ if (format_modifiers) {
+ const uint64_t *temp_modifiers = format_modifiers;
+
+diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
+index 5b2d0ca03705c..838b32b70bce6 100644
+--- a/drivers/gpu/drm/drm_plane_helper.c
++++ b/drivers/gpu/drm/drm_plane_helper.c
+@@ -123,7 +123,6 @@ static int drm_plane_helper_check_update(struct drm_plane *plane,
+ .crtc_w = drm_rect_width(dst),
+ .crtc_h = drm_rect_height(dst),
+ .rotation = rotation,
+- .visible = *visible,
+ };
+ struct drm_crtc_state crtc_state = {
+ .crtc = crtc,
+diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
+index deb23dbec8b52..a350310b65d89 100644
+--- a/drivers/gpu/drm/drm_prime.c
++++ b/drivers/gpu/drm/drm_prime.c
+@@ -187,29 +187,33 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
+ return -ENOENT;
+ }
+
+-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
+- struct dma_buf *dma_buf)
++void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
++ uint32_t handle)
+ {
+ struct rb_node *rb;
+
+- rb = prime_fpriv->dmabufs.rb_node;
++ mutex_lock(&prime_fpriv->lock);
++
++ rb = prime_fpriv->handles.rb_node;
+ while (rb) {
+ struct drm_prime_member *member;
+
+- member = rb_entry(rb, struct drm_prime_member, dmabuf_rb);
+- if (member->dma_buf == dma_buf) {
++ member = rb_entry(rb, struct drm_prime_member, handle_rb);
++ if (member->handle == handle) {
+ rb_erase(&member->handle_rb, &prime_fpriv->handles);
+ rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs);
+
+- dma_buf_put(dma_buf);
++ dma_buf_put(member->dma_buf);
+ kfree(member);
+- return;
+- } else if (member->dma_buf < dma_buf) {
++ break;
++ } else if (member->handle < handle) {
+ rb = rb->rb_right;
+ } else {
+ rb = rb->rb_left;
+ }
+ }
++
++ mutex_unlock(&prime_fpriv->lock);
+ }
+
+ void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv)
+@@ -719,11 +723,13 @@ int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+ if (obj->funcs && obj->funcs->mmap) {
+ vma->vm_ops = obj->funcs->vm_ops;
+
++ drm_gem_object_get(obj);
+ ret = obj->funcs->mmap(obj, vma);
+- if (ret)
++ if (ret) {
++ drm_gem_object_put(obj);
+ return ret;
++ }
+ vma->vm_private_data = obj;
+- drm_gem_object_get(obj);
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
+index 5606bca3caa83..f6b72e03688d4 100644
+--- a/drivers/gpu/drm/drm_probe_helper.c
++++ b/drivers/gpu/drm/drm_probe_helper.c
+@@ -488,8 +488,9 @@ retry:
+ */
+ dev->mode_config.delayed_event = true;
+ if (dev->mode_config.poll_enabled)
+- schedule_delayed_work(&dev->mode_config.output_poll_work,
+- 0);
++ mod_delayed_work(system_wq,
++ &dev->mode_config.output_poll_work,
++ 0);
+ }
+
+ /* Re-enable polling in case the global poll config changed. */
+diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
+index c9a9d74f338c1..7e48dcd1bee4d 100644
+--- a/drivers/gpu/drm/drm_syncobj.c
++++ b/drivers/gpu/drm/drm_syncobj.c
+@@ -404,8 +404,17 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
+
+ if (*fence) {
+ ret = dma_fence_chain_find_seqno(fence, point);
+- if (!ret)
++ if (!ret) {
++ /* If the requested seqno is already signaled
++ * drm_syncobj_find_fence may return a NULL
++ * fence. To make sure the recipient gets
++ * signalled, use a new fence instead.
++ */
++ if (!*fence)
++ *fence = dma_fence_get_stub();
++
+ goto out;
++ }
+ dma_fence_put(*fence);
+ } else {
+ ret = -EINVAL;
+@@ -844,12 +853,57 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
+ &args->handle);
+ }
+
++
++/*
++ * Try to flatten a dma_fence_chain into a dma_fence_array so that it can be
++ * added as timeline fence to a chain again.
++ */
++static int drm_syncobj_flatten_chain(struct dma_fence **f)
++{
++ struct dma_fence_chain *chain = to_dma_fence_chain(*f);
++ struct dma_fence *tmp, **fences;
++ struct dma_fence_array *array;
++ unsigned int count;
++
++ if (!chain)
++ return 0;
++
++ count = 0;
++ dma_fence_chain_for_each(tmp, &chain->base)
++ ++count;
++
++ fences = kmalloc_array(count, sizeof(*fences), GFP_KERNEL);
++ if (!fences)
++ return -ENOMEM;
++
++ count = 0;
++ dma_fence_chain_for_each(tmp, &chain->base)
++ fences[count++] = dma_fence_get(tmp);
++
++ array = dma_fence_array_create(count, fences,
++ dma_fence_context_alloc(1),
++ 1, false);
++ if (!array)
++ goto free_fences;
++
++ dma_fence_put(*f);
++ *f = &array->base;
++ return 0;
++
++free_fences:
++ while (count--)
++ dma_fence_put(fences[count]);
++
++ kfree(fences);
++ return -ENOMEM;
++}
++
+ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
+ struct drm_syncobj_transfer *args)
+ {
+ struct drm_syncobj *timeline_syncobj = NULL;
+- struct dma_fence *fence;
+ struct dma_fence_chain *chain;
++ struct dma_fence *fence;
+ int ret;
+
+ timeline_syncobj = drm_syncobj_find(file_private, args->dst_handle);
+@@ -860,16 +914,22 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
+ args->src_point, args->flags,
+ &fence);
+ if (ret)
+- goto err;
++ goto err_put_timeline;
++
++ ret = drm_syncobj_flatten_chain(&fence);
++ if (ret)
++ goto err_free_fence;
++
+ chain = dma_fence_chain_alloc();
+ if (!chain) {
+ ret = -ENOMEM;
+- goto err1;
++ goto err_free_fence;
+ }
++
+ drm_syncobj_add_point(timeline_syncobj, chain, fence, args->dst_point);
+-err1:
++err_free_fence:
+ dma_fence_put(fence);
+-err:
++err_put_timeline:
+ drm_syncobj_put(timeline_syncobj);
+
+ return ret;
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+index f418e0b75772e..0edcf8ceb4a78 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+@@ -125,9 +125,9 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
+ return;
+ etnaviv_dump_core = false;
+
+- mutex_lock(&gpu->mmu_context->lock);
++ mutex_lock(&submit->mmu_context->lock);
+
+- mmu_size = etnaviv_iommu_dump_size(gpu->mmu_context);
++ mmu_size = etnaviv_iommu_dump_size(submit->mmu_context);
+
+ /* We always dump registers, mmu, ring, hanging cmdbuf and end marker */
+ n_obj = 5;
+@@ -157,7 +157,7 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
+ iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NORETRY);
+ if (!iter.start) {
+- mutex_unlock(&gpu->mmu_context->lock);
++ mutex_unlock(&submit->mmu_context->lock);
+ dev_warn(gpu->dev, "failed to allocate devcoredump file\n");
+ return;
+ }
+@@ -169,18 +169,18 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
+ memset(iter.hdr, 0, iter.data - iter.start);
+
+ etnaviv_core_dump_registers(&iter, gpu);
+- etnaviv_core_dump_mmu(&iter, gpu->mmu_context, mmu_size);
++ etnaviv_core_dump_mmu(&iter, submit->mmu_context, mmu_size);
+ etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr,
+ gpu->buffer.size,
+ etnaviv_cmdbuf_get_va(&gpu->buffer,
+- &gpu->mmu_context->cmdbuf_mapping));
++ &submit->mmu_context->cmdbuf_mapping));
+
+ etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
+ submit->cmdbuf.vaddr, submit->cmdbuf.size,
+ etnaviv_cmdbuf_get_va(&submit->cmdbuf,
+- &gpu->mmu_context->cmdbuf_mapping));
++ &submit->mmu_context->cmdbuf_mapping));
+
+- mutex_unlock(&gpu->mmu_context->lock);
++ mutex_unlock(&submit->mmu_context->lock);
+
+ /* Reserve space for the bomap */
+ if (n_bomap_pages) {
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+index 6d8bed9c739d5..aa7227bfb5c54 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+@@ -88,7 +88,15 @@ static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+ static int etnaviv_gem_prime_mmap_obj(struct etnaviv_gem_object *etnaviv_obj,
+ struct vm_area_struct *vma)
+ {
+- return dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0);
++ int ret;
++
++ ret = dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0);
++ if (!ret) {
++ /* Drop the reference acquired by drm_gem_mmap_obj(). */
++ drm_gem_object_put(&etnaviv_obj->base);
++ }
++
++ return ret;
+ }
+
+ static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+index 486259e154aff..90488ab8c6d8e 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+@@ -469,6 +469,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
+ return -EINVAL;
+ }
+
++ if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K ||
++ args->nr_bos > SZ_128K || args->nr_pmrs > 128) {
++ DRM_ERROR("submit arguments out of size limits\n");
++ return -EINVAL;
++ }
++
+ /*
+ * Copy the command submission and bo array to kernel space in
+ * one go, and do this outside of any locks.
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+index cc5b07f863463..e8ff70be449ac 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+@@ -416,6 +416,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
+ if (gpu->identity.model == chipModel_GC700)
+ gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
+
++ /* These models/revisions don't have the 2D pipe bit */
++ if ((gpu->identity.model == chipModel_GC500 &&
++ gpu->identity.revision <= 2) ||
++ gpu->identity.model == chipModel_GC300)
++ gpu->identity.features |= chipFeatures_PIPE_2D;
++
+ if ((gpu->identity.model == chipModel_GC500 &&
+ gpu->identity.revision < 2) ||
+ (gpu->identity.model == chipModel_GC300 &&
+@@ -449,8 +455,9 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
+ gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
+ }
+
+- /* GC600 idle register reports zero bits where modules aren't present */
+- if (gpu->identity.model == chipModel_GC600)
++ /* GC600/300 idle register reports zero bits where modules aren't present */
++ if (gpu->identity.model == chipModel_GC600 ||
++ gpu->identity.model == chipModel_GC300)
+ gpu->idle_mask = VIVS_HI_IDLE_STATE_TX |
+ VIVS_HI_IDLE_STATE_RA |
+ VIVS_HI_IDLE_STATE_SE |
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+index 1c75c8ed5bcea..85eddd492774d 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+@@ -130,6 +130,7 @@ struct etnaviv_gpu {
+
+ /* hang detection */
+ u32 hangcheck_dma_addr;
++ u32 hangcheck_fence;
+
+ void __iomem *mmio;
+ int irq;
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+index 9fb1a2aadbcb0..2de806173b3aa 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+@@ -80,10 +80,10 @@ static int etnaviv_iommu_map(struct etnaviv_iommu_context *context, u32 iova,
+ return -EINVAL;
+
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+- u32 pa = sg_dma_address(sg) - sg->offset;
++ phys_addr_t pa = sg_dma_address(sg) - sg->offset;
+ size_t bytes = sg_dma_len(sg) + sg->offset;
+
+- VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
++ VERB("map[%d]: %08x %pap(%zx)", i, iova, &pa, bytes);
+
+ ret = etnaviv_context_map(context, da, pa, bytes, prot);
+ if (ret)
+@@ -286,6 +286,12 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context,
+
+ mutex_lock(&context->lock);
+
++ /* Bail if the mapping has been reaped by another thread */
++ if (!mapping->context) {
++ mutex_unlock(&context->lock);
++ return;
++ }
++
+ /* If the vram node is on the mm, unmap and remove the node */
+ if (mapping->vram_node.mm == &context->mm)
+ etnaviv_iommu_remove_mapping(context, mapping);
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+index feb6da1b6cebc..bbf391f48f949 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+@@ -107,8 +107,10 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
+ */
+ dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+ change = dma_addr - gpu->hangcheck_dma_addr;
+- if (change < 0 || change > 16) {
++ if (gpu->completed_fence != gpu->hangcheck_fence ||
++ change < 0 || change > 16) {
+ gpu->hangcheck_dma_addr = dma_addr;
++ gpu->hangcheck_fence = gpu->completed_fence;
+ goto out_no_timeout;
+ }
+
+diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+index 12571ac455404..12989a47eb66e 100644
+--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
++++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+@@ -806,31 +806,40 @@ static int exynos7_decon_resume(struct device *dev)
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev, "Failed to prepare_enable the pclk [%d]\n",
+ ret);
+- return ret;
++ goto err_pclk_enable;
+ }
+
+ ret = clk_prepare_enable(ctx->aclk);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev, "Failed to prepare_enable the aclk [%d]\n",
+ ret);
+- return ret;
++ goto err_aclk_enable;
+ }
+
+ ret = clk_prepare_enable(ctx->eclk);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev, "Failed to prepare_enable the eclk [%d]\n",
+ ret);
+- return ret;
++ goto err_eclk_enable;
+ }
+
+ ret = clk_prepare_enable(ctx->vclk);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev, "Failed to prepare_enable the vclk [%d]\n",
+ ret);
+- return ret;
++ goto err_vclk_enable;
+ }
+
+ return 0;
++
++err_vclk_enable:
++ clk_disable_unprepare(ctx->eclk);
++err_eclk_enable:
++ clk_disable_unprepare(ctx->aclk);
++err_aclk_enable:
++ clk_disable_unprepare(ctx->pclk);
++err_pclk_enable:
++ return ret;
+ }
+ #endif
+
+diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+index 8d137857818ca..e0465b604f210 100644
+--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
++++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+@@ -809,15 +809,15 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi)
+ reg |= DSIM_AUTO_MODE;
+ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_HSE)
+ reg |= DSIM_HSE_MODE;
+- if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP))
++ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP)
+ reg |= DSIM_HFP_MODE;
+- if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP))
++ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP)
+ reg |= DSIM_HBP_MODE;
+- if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HSA))
++ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HSA)
+ reg |= DSIM_HSA_MODE;
+ }
+
+- if (!(dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET))
++ if (dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET)
+ reg |= DSIM_EOT_DISABLE;
+
+ switch (dsi->format) {
+diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+index 471fd6c8135f2..27613abeed961 100644
+--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
++++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+@@ -1335,7 +1335,7 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data,
+ /* Let the runqueue know that there is work to do. */
+ queue_work(g2d->g2d_workq, &g2d->runqueue_work);
+
+- if (runqueue_node->async)
++ if (req->async)
+ goto out;
+
+ wait_for_completion(&runqueue_node->complete);
+diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.h b/drivers/gpu/drm/exynos/exynos_drm_g2d.h
+index 74ea3c26deadc..1a5ae781b56c6 100644
+--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.h
++++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.h
+@@ -34,11 +34,11 @@ static inline int exynos_g2d_exec_ioctl(struct drm_device *dev, void *data,
+ return -ENODEV;
+ }
+
+-int g2d_open(struct drm_device *drm_dev, struct drm_file *file)
++static inline int g2d_open(struct drm_device *drm_dev, struct drm_file *file)
+ {
+ return 0;
+ }
+
+-void g2d_close(struct drm_device *drm_dev, struct drm_file *file)
++static inline void g2d_close(struct drm_device *drm_dev, struct drm_file *file)
+ { }
+ #endif
+diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+index e5662bdcbbde3..e96436e11a36c 100644
+--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
++++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+@@ -468,8 +468,6 @@ static int vidi_remove(struct platform_device *pdev)
+ if (ctx->raw_edid != (struct edid *)fake_edid_info) {
+ kfree(ctx->raw_edid);
+ ctx->raw_edid = NULL;
+-
+- return -EINVAL;
+ }
+
+ component_del(&pdev->dev, &vidi_component_ops);
+diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+index 4d4a715b429d1..2c2b92324a2e9 100644
+--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
++++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+@@ -60,8 +60,9 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector)
+ return drm_panel_get_modes(fsl_connector->panel, connector);
+ }
+
+-static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
++static enum drm_mode_status
++fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector,
++ struct drm_display_mode *mode)
+ {
+ if (mode->hdisplay & 0xf)
+ return MODE_ERROR;
+diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
+index b03f7b8241f2b..7162f4c946afe 100644
+--- a/drivers/gpu/drm/gma500/gma_display.c
++++ b/drivers/gpu/drm/gma500/gma_display.c
+@@ -529,15 +529,18 @@ int gma_crtc_page_flip(struct drm_crtc *crtc,
+ WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+ gma_crtc->page_flip_event = event;
++ spin_unlock_irqrestore(&dev->event_lock, flags);
+
+ /* Call this locked if we want an event at vblank interrupt. */
+ ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
+ if (ret) {
+- gma_crtc->page_flip_event = NULL;
+- drm_crtc_vblank_put(crtc);
++ spin_lock_irqsave(&dev->event_lock, flags);
++ if (gma_crtc->page_flip_event) {
++ gma_crtc->page_flip_event = NULL;
++ drm_crtc_vblank_put(crtc);
++ }
++ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+-
+- spin_unlock_irqrestore(&dev->event_lock, flags);
+ } else {
+ ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
+ }
+diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
+index 3596064293167..6a578078e02f2 100644
+--- a/drivers/gpu/drm/gma500/psb_intel_display.c
++++ b/drivers/gpu/drm/gma500/psb_intel_display.c
+@@ -536,14 +536,15 @@ void psb_intel_crtc_init(struct drm_device *dev, int pipe,
+
+ struct drm_crtc *psb_intel_get_crtc_from_pipe(struct drm_device *dev, int pipe)
+ {
+- struct drm_crtc *crtc = NULL;
++ struct drm_crtc *crtc;
+
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
++
+ if (gma_crtc->pipe == pipe)
+- break;
++ return crtc;
+ }
+- return crtc;
++ return NULL;
+ }
+
+ int gma_connector_clones(struct drm_device *dev, int type_mask)
+diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
+index 43943e9802036..4e41c144a2902 100644
+--- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig
++++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
+@@ -1,7 +1,8 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ config DRM_HISI_HIBMC
+ tristate "DRM Support for Hisilicon Hibmc"
+- depends on DRM && PCI && ARM64
++ depends on DRM && PCI && (ARM64 || COMPILE_TEST)
++ depends on MMU
+ select DRM_KMS_HELPER
+ select DRM_VRAM_HELPER
+ select DRM_TTM
+diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+index cd818a6291835..584d3a73db96c 100644
+--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
++++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+@@ -198,8 +198,6 @@ static int hyperv_vmbus_probe(struct hv_device *hdev,
+ if (ret)
+ drm_warn(dev, "Failed to update vram location.\n");
+
+- hv->dirt_needed = true;
+-
+ ret = hyperv_mode_config_init(hv);
+ if (ret)
+ goto err_vmbus_close;
+@@ -225,12 +223,29 @@ static int hyperv_vmbus_remove(struct hv_device *hdev)
+ {
+ struct drm_device *dev = hv_get_drvdata(hdev);
+ struct hyperv_drm_device *hv = to_hv(dev);
++ struct pci_dev *pdev;
+
+ drm_dev_unplug(dev);
+ drm_atomic_helper_shutdown(dev);
+ vmbus_close(hdev->channel);
+ hv_set_drvdata(hdev, NULL);
+- vmbus_free_mmio(hv->mem->start, hv->fb_size);
++
++ /*
++ * Free allocated MMIO memory only on Gen2 VMs.
++ * On Gen1 VMs, release the PCI device
++ */
++ if (efi_enabled(EFI_BOOT)) {
++ vmbus_free_mmio(hv->mem->start, hv->fb_size);
++ } else {
++ pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT,
++ PCI_DEVICE_ID_HYPERV_VIDEO, NULL);
++ if (!pdev) {
++ drm_err(dev, "Unable to find PCI Hyper-V video\n");
++ return -ENODEV;
++ }
++ pci_release_region(pdev, 0);
++ pci_dev_put(pdev);
++ }
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
+index 8c97a20dfe231..22bebeb9c3ac0 100644
+--- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
++++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
+@@ -7,9 +7,11 @@
+
+ #include <drm/drm_damage_helper.h>
+ #include <drm/drm_drv.h>
++#include <drm/drm_edid.h>
+ #include <drm/drm_fb_helper.h>
+ #include <drm/drm_format_helper.h>
+ #include <drm/drm_fourcc.h>
++#include <drm/drm_framebuffer.h>
+ #include <drm/drm_gem_atomic_helper.h>
+ #include <drm/drm_gem_framebuffer_helper.h>
+ #include <drm/drm_gem_shmem_helper.h>
+diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
+index f960f5d7664e6..fe6b34774483f 100644
+--- a/drivers/gpu/drm/i915/Kconfig
++++ b/drivers/gpu/drm/i915/Kconfig
+@@ -101,6 +101,7 @@ config DRM_I915_USERPTR
+ config DRM_I915_GVT
+ bool "Enable Intel GVT-g graphics virtualization host support"
+ depends on DRM_I915
++ depends on X86
+ depends on 64BIT
+ default n
+ help
+diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
+index 335ba9f43d8f7..9d371be7dc5cd 100644
+--- a/drivers/gpu/drm/i915/Makefile
++++ b/drivers/gpu/drm/i915/Makefile
+@@ -211,6 +211,8 @@ i915-y += \
+ display/intel_dpio_phy.o \
+ display/intel_dpll.o \
+ display/intel_dpll_mgr.o \
++ display/intel_dpt.o \
++ display/intel_drrs.o \
+ display/intel_dsb.o \
+ display/intel_fb.o \
+ display/intel_fbc.o \
+@@ -247,6 +249,7 @@ i915-y += \
+ display/g4x_dp.o \
+ display/g4x_hdmi.o \
+ display/icl_dsi.o \
++ display/intel_backlight.o \
+ display/intel_crt.o \
+ display/intel_ddi.o \
+ display/intel_ddi_buf_trans.o \
+diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
+index de0f358184aa3..29c0eca647e34 100644
+--- a/drivers/gpu/drm/i915/display/g4x_dp.c
++++ b/drivers/gpu/drm/i915/display/g4x_dp.c
+@@ -7,6 +7,7 @@
+
+ #include "g4x_dp.h"
+ #include "intel_audio.h"
++#include "intel_backlight.h"
+ #include "intel_connector.h"
+ #include "intel_de.h"
+ #include "intel_display_types.h"
+@@ -16,7 +17,6 @@
+ #include "intel_fifo_underrun.h"
+ #include "intel_hdmi.h"
+ #include "intel_hotplug.h"
+-#include "intel_panel.h"
+ #include "intel_pps.h"
+ #include "intel_sideband.h"
+
+diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
+index be352e9f0afc1..63baaf6988ade 100644
+--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
++++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
+@@ -584,6 +584,7 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv,
+ else
+ intel_encoder->enable = g4x_enable_hdmi;
+ }
++ intel_encoder->shutdown = intel_hdmi_encoder_shutdown;
+
+ intel_encoder->type = INTEL_OUTPUT_HDMI;
+ intel_encoder->power_domain = intel_port_to_power_domain(port);
+diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
+index a3eae3f3eadce..26dd5a2bd5024 100644
+--- a/drivers/gpu/drm/i915/display/icl_dsi.c
++++ b/drivers/gpu/drm/i915/display/icl_dsi.c
+@@ -29,6 +29,7 @@
+ #include <drm/drm_mipi_dsi.h>
+
+ #include "intel_atomic.h"
++#include "intel_backlight.h"
+ #include "intel_combo_phy.h"
+ #include "intel_connector.h"
+ #include "intel_crtc.h"
+@@ -306,9 +307,21 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
+ {
+ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+ struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
++ i915_reg_t dss_ctl1_reg, dss_ctl2_reg;
+ u32 dss_ctl1;
+
+- dss_ctl1 = intel_de_read(dev_priv, DSS_CTL1);
++ /* FIXME: Move all DSS handling to intel_vdsc.c */
++ if (DISPLAY_VER(dev_priv) >= 12) {
++ struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
++
++ dss_ctl1_reg = ICL_PIPE_DSS_CTL1(crtc->pipe);
++ dss_ctl2_reg = ICL_PIPE_DSS_CTL2(crtc->pipe);
++ } else {
++ dss_ctl1_reg = DSS_CTL1;
++ dss_ctl2_reg = DSS_CTL2;
++ }
++
++ dss_ctl1 = intel_de_read(dev_priv, dss_ctl1_reg);
+ dss_ctl1 |= SPLITTER_ENABLE;
+ dss_ctl1 &= ~OVERLAP_PIXELS_MASK;
+ dss_ctl1 |= OVERLAP_PIXELS(intel_dsi->pixel_overlap);
+@@ -329,16 +342,16 @@ static void configure_dual_link_mode(struct intel_encoder *encoder,
+
+ dss_ctl1 &= ~LEFT_DL_BUF_TARGET_DEPTH_MASK;
+ dss_ctl1 |= LEFT_DL_BUF_TARGET_DEPTH(dl_buffer_depth);
+- dss_ctl2 = intel_de_read(dev_priv, DSS_CTL2);
++ dss_ctl2 = intel_de_read(dev_priv, dss_ctl2_reg);
+ dss_ctl2 &= ~RIGHT_DL_BUF_TARGET_DEPTH_MASK;
+ dss_ctl2 |= RIGHT_DL_BUF_TARGET_DEPTH(dl_buffer_depth);
+- intel_de_write(dev_priv, DSS_CTL2, dss_ctl2);
++ intel_de_write(dev_priv, dss_ctl2_reg, dss_ctl2);
+ } else {
+ /* Interleave */
+ dss_ctl1 |= DUAL_LINK_MODE_INTERLEAVE;
+ }
+
+- intel_de_write(dev_priv, DSS_CTL1, dss_ctl1);
++ intel_de_write(dev_priv, dss_ctl1_reg, dss_ctl1);
+ }
+
+ /* aka DSI 8X clock */
+@@ -711,10 +724,7 @@ static void gen11_dsi_map_pll(struct intel_encoder *encoder,
+ intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
+
+ for_each_dsi_phy(phy, intel_dsi->phys) {
+- if (DISPLAY_VER(dev_priv) >= 12)
+- val |= ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
+- else
+- val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
++ val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
+ }
+ intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
+
+@@ -1150,8 +1160,6 @@ static void
+ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
+ const struct intel_crtc_state *crtc_state)
+ {
+- struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+-
+ /* step 4a: power up all lanes of the DDI used by DSI */
+ gen11_dsi_power_up_lanes(encoder);
+
+@@ -1177,8 +1185,7 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
+ gen11_dsi_configure_transcoder(encoder, crtc_state);
+
+ /* Step 4l: Gate DDI clocks */
+- if (DISPLAY_VER(dev_priv) == 11)
+- gen11_dsi_gate_clocks(encoder);
++ gen11_dsi_gate_clocks(encoder);
+ }
+
+ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
+diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
+new file mode 100644
+index 0000000000000..60f91ac7d1427
+--- /dev/null
++++ b/drivers/gpu/drm/i915/display/intel_backlight.c
+@@ -0,0 +1,1776 @@
++// SPDX-License-Identifier: MIT
++/*
++ * Copyright © 2021 Intel Corporation
++ */
++
++#include <linux/kernel.h>
++#include <linux/pwm.h>
++
++#include "intel_backlight.h"
++#include "intel_connector.h"
++#include "intel_de.h"
++#include "intel_display_types.h"
++#include "intel_dp_aux_backlight.h"
++#include "intel_dsi_dcs_backlight.h"
++#include "intel_panel.h"
++
++/**
++ * scale - scale values from one range to another
++ * @source_val: value in range [@source_min..@source_max]
++ * @source_min: minimum legal value for @source_val
++ * @source_max: maximum legal value for @source_val
++ * @target_min: corresponding target value for @source_min
++ * @target_max: corresponding target value for @source_max
++ *
++ * Return @source_val in range [@source_min..@source_max] scaled to range
++ * [@target_min..@target_max].
++ */
++static u32 scale(u32 source_val,
++ u32 source_min, u32 source_max,
++ u32 target_min, u32 target_max)
++{
++ u64 target_val;
++
++ WARN_ON(source_min > source_max);
++ WARN_ON(target_min > target_max);
++
++ /* defensive */
++ source_val = clamp(source_val, source_min, source_max);
++
++ /* avoid overflows */
++ target_val = mul_u32_u32(source_val - source_min,
++ target_max - target_min);
++ target_val = DIV_ROUND_CLOSEST_ULL(target_val, source_max - source_min);
++ target_val += target_min;
++
++ return target_val;
++}
++
++/*
++ * Scale user_level in range [0..user_max] to [0..hw_max], clamping the result
++ * to [hw_min..hw_max].
++ */
++static u32 clamp_user_to_hw(struct intel_connector *connector,
++ u32 user_level, u32 user_max)
++{
++ struct intel_panel *panel = &connector->panel;
++ u32 hw_level;
++
++ hw_level = scale(user_level, 0, user_max, 0, panel->backlight.max);
++ hw_level = clamp(hw_level, panel->backlight.min, panel->backlight.max);
++
++ return hw_level;
++}
++
++/* Scale hw_level in range [hw_min..hw_max] to [0..user_max]. */
++static u32 scale_hw_to_user(struct intel_connector *connector,
++ u32 hw_level, u32 user_max)
++{
++ struct intel_panel *panel = &connector->panel;
++
++ return scale(hw_level, panel->backlight.min, panel->backlight.max,
++ 0, user_max);
++}
++
++u32 intel_panel_invert_pwm_level(struct intel_connector *connector, u32 val)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ drm_WARN_ON(&dev_priv->drm, panel->backlight.pwm_level_max == 0);
++
++ if (dev_priv->params.invert_brightness < 0)
++ return val;
++
++ if (dev_priv->params.invert_brightness > 0 ||
++ dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS) {
++ return panel->backlight.pwm_level_max - val + panel->backlight.pwm_level_min;
++ }
++
++ return val;
++}
++
++void intel_panel_set_pwm_level(const struct drm_connector_state *conn_state, u32 val)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *i915 = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ drm_dbg_kms(&i915->drm, "set backlight PWM = %d\n", val);
++ panel->backlight.pwm_funcs->set(conn_state, val);
++}
++
++u32 intel_panel_backlight_level_to_pwm(struct intel_connector *connector, u32 val)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ drm_WARN_ON_ONCE(&dev_priv->drm,
++ panel->backlight.max == 0 || panel->backlight.pwm_level_max == 0);
++
++ val = scale(val, panel->backlight.min, panel->backlight.max,
++ panel->backlight.pwm_level_min, panel->backlight.pwm_level_max);
++
++ return intel_panel_invert_pwm_level(connector, val);
++}
++
++u32 intel_panel_backlight_level_from_pwm(struct intel_connector *connector, u32 val)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ drm_WARN_ON_ONCE(&dev_priv->drm,
++ panel->backlight.max == 0 || panel->backlight.pwm_level_max == 0);
++
++ if (dev_priv->params.invert_brightness > 0 ||
++ (dev_priv->params.invert_brightness == 0 && dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS))
++ val = panel->backlight.pwm_level_max - (val - panel->backlight.pwm_level_min);
++
++ return scale(val, panel->backlight.pwm_level_min, panel->backlight.pwm_level_max,
++ panel->backlight.min, panel->backlight.max);
++}
++
++static u32 lpt_get_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
++ return intel_de_read(dev_priv, BLC_PWM_PCH_CTL2) & BACKLIGHT_DUTY_CYCLE_MASK;
++}
++
++static u32 pch_get_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
++ return intel_de_read(dev_priv, BLC_PWM_CPU_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
++}
++
++static u32 i9xx_get_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 val;
++
++ val = intel_de_read(dev_priv, BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
++ if (DISPLAY_VER(dev_priv) < 4)
++ val >>= 1;
++
++ if (panel->backlight.combination_mode) {
++ u8 lbpc;
++
++ pci_read_config_byte(to_pci_dev(dev_priv->drm.dev), LBPC, &lbpc);
++ val *= lbpc;
++ }
++
++ return val;
++}
++
++static u32 vlv_get_backlight(struct intel_connector *connector, enum pipe pipe)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
++ if (drm_WARN_ON(&dev_priv->drm, pipe != PIPE_A && pipe != PIPE_B))
++ return 0;
++
++ return intel_de_read(dev_priv, VLV_BLC_PWM_CTL(pipe)) & BACKLIGHT_DUTY_CYCLE_MASK;
++}
++
++static u32 bxt_get_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ return intel_de_read(dev_priv,
++ BXT_BLC_PWM_DUTY(panel->backlight.controller));
++}
++
++static u32 ext_pwm_get_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct intel_panel *panel = &connector->panel;
++ struct pwm_state state;
++
++ pwm_get_state(panel->backlight.pwm, &state);
++ return pwm_get_relative_duty_cycle(&state, 100);
++}
++
++static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
++ u32 val = intel_de_read(dev_priv, BLC_PWM_PCH_CTL2) & ~BACKLIGHT_DUTY_CYCLE_MASK;
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL2, val | level);
++}
++
++static void pch_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ u32 tmp;
++
++ tmp = intel_de_read(dev_priv, BLC_PWM_CPU_CTL) & ~BACKLIGHT_DUTY_CYCLE_MASK;
++ intel_de_write(dev_priv, BLC_PWM_CPU_CTL, tmp | level);
++}
++
++static void i9xx_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 tmp, mask;
++
++ drm_WARN_ON(&dev_priv->drm, panel->backlight.pwm_level_max == 0);
++
++ if (panel->backlight.combination_mode) {
++ u8 lbpc;
++
++ lbpc = level * 0xfe / panel->backlight.pwm_level_max + 1;
++ level /= lbpc;
++ pci_write_config_byte(to_pci_dev(dev_priv->drm.dev), LBPC, lbpc);
++ }
++
++ if (DISPLAY_VER(dev_priv) == 4) {
++ mask = BACKLIGHT_DUTY_CYCLE_MASK;
++ } else {
++ level <<= 1;
++ mask = BACKLIGHT_DUTY_CYCLE_MASK_PNV;
++ }
++
++ tmp = intel_de_read(dev_priv, BLC_PWM_CTL) & ~mask;
++ intel_de_write(dev_priv, BLC_PWM_CTL, tmp | level);
++}
++
++static void vlv_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ enum pipe pipe = to_intel_crtc(conn_state->crtc)->pipe;
++ u32 tmp;
++
++ tmp = intel_de_read(dev_priv, VLV_BLC_PWM_CTL(pipe)) & ~BACKLIGHT_DUTY_CYCLE_MASK;
++ intel_de_write(dev_priv, VLV_BLC_PWM_CTL(pipe), tmp | level);
++}
++
++static void bxt_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ intel_de_write(dev_priv,
++ BXT_BLC_PWM_DUTY(panel->backlight.controller), level);
++}
++
++static void ext_pwm_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel;
++
++ pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100);
++ pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
++}
++
++static void
++intel_panel_actually_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *i915 = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ drm_dbg_kms(&i915->drm, "set backlight level = %d\n", level);
++
++ panel->backlight.funcs->set(conn_state, level);
++}
++
++/* set backlight brightness to level in range [0..max], assuming hw min is
++ * respected.
++ */
++void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state,
++ u32 user_level, u32 user_max)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 hw_level;
++
++ /*
++ * Lack of crtc may occur during driver init because
++ * connection_mutex isn't held across the entire backlight
++ * setup + modeset readout, and the BIOS can issue the
++ * requests at any time.
++ */
++ if (!panel->backlight.present || !conn_state->crtc)
++ return;
++
++ mutex_lock(&dev_priv->backlight_lock);
++
++ drm_WARN_ON(&dev_priv->drm, panel->backlight.max == 0);
++
++ hw_level = clamp_user_to_hw(connector, user_level, user_max);
++ panel->backlight.level = hw_level;
++
++ if (panel->backlight.device)
++ panel->backlight.device->props.brightness =
++ scale_hw_to_user(connector,
++ panel->backlight.level,
++ panel->backlight.device->props.max_brightness);
++
++ if (panel->backlight.enabled)
++ intel_panel_actually_set_backlight(conn_state, hw_level);
++
++ mutex_unlock(&dev_priv->backlight_lock);
++}
++
++static void lpt_disable_backlight(const struct drm_connector_state *old_conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ u32 tmp;
++
++ intel_panel_set_pwm_level(old_conn_state, level);
++
++ /*
++ * Although we don't support or enable CPU PWM with LPT/SPT based
++ * systems, it may have been enabled prior to loading the
++ * driver. Disable to avoid warnings on LCPLL disable.
++ *
++ * This needs rework if we need to add support for CPU PWM on PCH split
++ * platforms.
++ */
++ tmp = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
++ if (tmp & BLM_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm,
++ "cpu backlight was enabled, disabling\n");
++ intel_de_write(dev_priv, BLC_PWM_CPU_CTL2,
++ tmp & ~BLM_PWM_ENABLE);
++ }
++
++ tmp = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, tmp & ~BLM_PCH_PWM_ENABLE);
++}
++
++static void pch_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
++{
++ struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ u32 tmp;
++
++ intel_panel_set_pwm_level(old_conn_state, val);
++
++ tmp = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
++ intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, tmp & ~BLM_PWM_ENABLE);
++
++ tmp = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, tmp & ~BLM_PCH_PWM_ENABLE);
++}
++
++static void i9xx_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
++{
++ intel_panel_set_pwm_level(old_conn_state, val);
++}
++
++static void i965_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
++{
++ struct drm_i915_private *dev_priv = to_i915(old_conn_state->connector->dev);
++ u32 tmp;
++
++ intel_panel_set_pwm_level(old_conn_state, val);
++
++ tmp = intel_de_read(dev_priv, BLC_PWM_CTL2);
++ intel_de_write(dev_priv, BLC_PWM_CTL2, tmp & ~BLM_PWM_ENABLE);
++}
++
++static void vlv_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
++{
++ struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ enum pipe pipe = to_intel_crtc(old_conn_state->crtc)->pipe;
++ u32 tmp;
++
++ intel_panel_set_pwm_level(old_conn_state, val);
++
++ tmp = intel_de_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
++ intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe),
++ tmp & ~BLM_PWM_ENABLE);
++}
++
++static void bxt_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
++{
++ struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 tmp;
++
++ intel_panel_set_pwm_level(old_conn_state, val);
++
++ tmp = intel_de_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++ intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
++ tmp & ~BXT_BLC_PWM_ENABLE);
++
++ if (panel->backlight.controller == 1) {
++ val = intel_de_read(dev_priv, UTIL_PIN_CTL);
++ val &= ~UTIL_PIN_ENABLE;
++ intel_de_write(dev_priv, UTIL_PIN_CTL, val);
++ }
++}
++
++static void cnp_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
++{
++ struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 tmp;
++
++ intel_panel_set_pwm_level(old_conn_state, val);
++
++ tmp = intel_de_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++ intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
++ tmp & ~BXT_BLC_PWM_ENABLE);
++}
++
++static void ext_pwm_disable_backlight(const struct drm_connector_state *old_conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
++ struct intel_panel *panel = &connector->panel;
++
++ panel->backlight.pwm_state.enabled = false;
++ pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
++}
++
++void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state)
++{
++ struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ if (!panel->backlight.present)
++ return;
++
++ /*
++ * Do not disable backlight on the vga_switcheroo path. When switching
++ * away from i915, the other client may depend on i915 to handle the
++ * backlight. This will leave the backlight on unnecessarily when
++ * another client is not activated.
++ */
++ if (dev_priv->drm.switch_power_state == DRM_SWITCH_POWER_CHANGING) {
++ drm_dbg_kms(&dev_priv->drm,
++ "Skipping backlight disable on vga switch\n");
++ return;
++ }
++
++ mutex_lock(&dev_priv->backlight_lock);
++
++ if (panel->backlight.device)
++ panel->backlight.device->props.power = FB_BLANK_POWERDOWN;
++ panel->backlight.enabled = false;
++ panel->backlight.funcs->disable(old_conn_state, 0);
++
++ mutex_unlock(&dev_priv->backlight_lock);
++}
++
++static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 pch_ctl1, pch_ctl2, schicken;
++
++ pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
++ if (pch_ctl1 & BLM_PCH_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm, "pch backlight already enabled\n");
++ pch_ctl1 &= ~BLM_PCH_PWM_ENABLE;
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
++ }
++
++ if (HAS_PCH_LPT(dev_priv)) {
++ schicken = intel_de_read(dev_priv, SOUTH_CHICKEN2);
++ if (panel->backlight.alternate_pwm_increment)
++ schicken |= LPT_PWM_GRANULARITY;
++ else
++ schicken &= ~LPT_PWM_GRANULARITY;
++ intel_de_write(dev_priv, SOUTH_CHICKEN2, schicken);
++ } else {
++ schicken = intel_de_read(dev_priv, SOUTH_CHICKEN1);
++ if (panel->backlight.alternate_pwm_increment)
++ schicken |= SPT_PWM_GRANULARITY;
++ else
++ schicken &= ~SPT_PWM_GRANULARITY;
++ intel_de_write(dev_priv, SOUTH_CHICKEN1, schicken);
++ }
++
++ pch_ctl2 = panel->backlight.pwm_level_max << 16;
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL2, pch_ctl2);
++
++ pch_ctl1 = 0;
++ if (panel->backlight.active_low_pwm)
++ pch_ctl1 |= BLM_PCH_POLARITY;
++
++ /* After LPT, override is the default. */
++ if (HAS_PCH_LPT(dev_priv))
++ pch_ctl1 |= BLM_PCH_OVERRIDE_ENABLE;
++
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
++ intel_de_posting_read(dev_priv, BLC_PWM_PCH_CTL1);
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1,
++ pch_ctl1 | BLM_PCH_PWM_ENABLE);
++
++ /* This won't stick until the above enable. */
++ intel_panel_set_pwm_level(conn_state, level);
++}
++
++static void pch_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
++ u32 cpu_ctl2, pch_ctl1, pch_ctl2;
++
++ cpu_ctl2 = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
++ if (cpu_ctl2 & BLM_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm, "cpu backlight already enabled\n");
++ cpu_ctl2 &= ~BLM_PWM_ENABLE;
++ intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, cpu_ctl2);
++ }
++
++ pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
++ if (pch_ctl1 & BLM_PCH_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm, "pch backlight already enabled\n");
++ pch_ctl1 &= ~BLM_PCH_PWM_ENABLE;
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
++ }
++
++ if (cpu_transcoder == TRANSCODER_EDP)
++ cpu_ctl2 = BLM_TRANSCODER_EDP;
++ else
++ cpu_ctl2 = BLM_PIPE(cpu_transcoder);
++ intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, cpu_ctl2);
++ intel_de_posting_read(dev_priv, BLC_PWM_CPU_CTL2);
++ intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, cpu_ctl2 | BLM_PWM_ENABLE);
++
++ /* This won't stick until the above enable. */
++ intel_panel_set_pwm_level(conn_state, level);
++
++ pch_ctl2 = panel->backlight.pwm_level_max << 16;
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL2, pch_ctl2);
++
++ pch_ctl1 = 0;
++ if (panel->backlight.active_low_pwm)
++ pch_ctl1 |= BLM_PCH_POLARITY;
++
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
++ intel_de_posting_read(dev_priv, BLC_PWM_PCH_CTL1);
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1,
++ pch_ctl1 | BLM_PCH_PWM_ENABLE);
++}
++
++static void i9xx_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 ctl, freq;
++
++ ctl = intel_de_read(dev_priv, BLC_PWM_CTL);
++ if (ctl & BACKLIGHT_DUTY_CYCLE_MASK_PNV) {
++ drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
++ intel_de_write(dev_priv, BLC_PWM_CTL, 0);
++ }
++
++ freq = panel->backlight.pwm_level_max;
++ if (panel->backlight.combination_mode)
++ freq /= 0xff;
++
++ ctl = freq << 17;
++ if (panel->backlight.combination_mode)
++ ctl |= BLM_LEGACY_MODE;
++ if (IS_PINEVIEW(dev_priv) && panel->backlight.active_low_pwm)
++ ctl |= BLM_POLARITY_PNV;
++
++ intel_de_write(dev_priv, BLC_PWM_CTL, ctl);
++ intel_de_posting_read(dev_priv, BLC_PWM_CTL);
++
++ /* XXX: combine this into above write? */
++ intel_panel_set_pwm_level(conn_state, level);
++
++ /*
++ * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is
++ * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2
++ * that has backlight.
++ */
++ if (DISPLAY_VER(dev_priv) == 2)
++ intel_de_write(dev_priv, BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE);
++}
++
++static void i965_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ enum pipe pipe = to_intel_crtc(conn_state->crtc)->pipe;
++ u32 ctl, ctl2, freq;
++
++ ctl2 = intel_de_read(dev_priv, BLC_PWM_CTL2);
++ if (ctl2 & BLM_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
++ ctl2 &= ~BLM_PWM_ENABLE;
++ intel_de_write(dev_priv, BLC_PWM_CTL2, ctl2);
++ }
++
++ freq = panel->backlight.pwm_level_max;
++ if (panel->backlight.combination_mode)
++ freq /= 0xff;
++
++ ctl = freq << 16;
++ intel_de_write(dev_priv, BLC_PWM_CTL, ctl);
++
++ ctl2 = BLM_PIPE(pipe);
++ if (panel->backlight.combination_mode)
++ ctl2 |= BLM_COMBINATION_MODE;
++ if (panel->backlight.active_low_pwm)
++ ctl2 |= BLM_POLARITY_I965;
++ intel_de_write(dev_priv, BLC_PWM_CTL2, ctl2);
++ intel_de_posting_read(dev_priv, BLC_PWM_CTL2);
++ intel_de_write(dev_priv, BLC_PWM_CTL2, ctl2 | BLM_PWM_ENABLE);
++
++ intel_panel_set_pwm_level(conn_state, level);
++}
++
++static void vlv_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
++ u32 ctl, ctl2;
++
++ ctl2 = intel_de_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
++ if (ctl2 & BLM_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
++ ctl2 &= ~BLM_PWM_ENABLE;
++ intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe), ctl2);
++ }
++
++ ctl = panel->backlight.pwm_level_max << 16;
++ intel_de_write(dev_priv, VLV_BLC_PWM_CTL(pipe), ctl);
++
++ /* XXX: combine this into above write? */
++ intel_panel_set_pwm_level(conn_state, level);
++
++ ctl2 = 0;
++ if (panel->backlight.active_low_pwm)
++ ctl2 |= BLM_POLARITY_I965;
++ intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe), ctl2);
++ intel_de_posting_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
++ intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe),
++ ctl2 | BLM_PWM_ENABLE);
++}
++
++static void bxt_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
++ u32 pwm_ctl, val;
++
++ /* Controller 1 uses the utility pin. */
++ if (panel->backlight.controller == 1) {
++ val = intel_de_read(dev_priv, UTIL_PIN_CTL);
++ if (val & UTIL_PIN_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm,
++ "util pin already enabled\n");
++ val &= ~UTIL_PIN_ENABLE;
++ intel_de_write(dev_priv, UTIL_PIN_CTL, val);
++ }
++
++ val = 0;
++ if (panel->backlight.util_pin_active_low)
++ val |= UTIL_PIN_POLARITY;
++ intel_de_write(dev_priv, UTIL_PIN_CTL,
++ val | UTIL_PIN_PIPE(pipe) | UTIL_PIN_MODE_PWM | UTIL_PIN_ENABLE);
++ }
++
++ pwm_ctl = intel_de_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++ if (pwm_ctl & BXT_BLC_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
++ pwm_ctl &= ~BXT_BLC_PWM_ENABLE;
++ intel_de_write(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller),
++ pwm_ctl);
++ }
++
++ intel_de_write(dev_priv,
++ BXT_BLC_PWM_FREQ(panel->backlight.controller),
++ panel->backlight.pwm_level_max);
++
++ intel_panel_set_pwm_level(conn_state, level);
++
++ pwm_ctl = 0;
++ if (panel->backlight.active_low_pwm)
++ pwm_ctl |= BXT_BLC_PWM_POLARITY;
++
++ intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
++ pwm_ctl);
++ intel_de_posting_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++ intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
++ pwm_ctl | BXT_BLC_PWM_ENABLE);
++}
++
++static void cnp_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 pwm_ctl;
++
++ pwm_ctl = intel_de_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++ if (pwm_ctl & BXT_BLC_PWM_ENABLE) {
++ drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
++ pwm_ctl &= ~BXT_BLC_PWM_ENABLE;
++ intel_de_write(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller),
++ pwm_ctl);
++ }
++
++ intel_de_write(dev_priv,
++ BXT_BLC_PWM_FREQ(panel->backlight.controller),
++ panel->backlight.pwm_level_max);
++
++ intel_panel_set_pwm_level(conn_state, level);
++
++ pwm_ctl = 0;
++ if (panel->backlight.active_low_pwm)
++ pwm_ctl |= BXT_BLC_PWM_POLARITY;
++
++ intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
++ pwm_ctl);
++ intel_de_posting_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++ intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
++ pwm_ctl | BXT_BLC_PWM_ENABLE);
++}
++
++static void ext_pwm_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct intel_panel *panel = &connector->panel;
++
++ pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100);
++ panel->backlight.pwm_state.enabled = true;
++ pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
++}
++
++static void __intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct intel_panel *panel = &connector->panel;
++
++ WARN_ON(panel->backlight.max == 0);
++
++ if (panel->backlight.level <= panel->backlight.min) {
++ panel->backlight.level = panel->backlight.max;
++ if (panel->backlight.device)
++ panel->backlight.device->props.brightness =
++ scale_hw_to_user(connector,
++ panel->backlight.level,
++ panel->backlight.device->props.max_brightness);
++ }
++
++ panel->backlight.funcs->enable(crtc_state, conn_state, panel->backlight.level);
++ panel->backlight.enabled = true;
++ if (panel->backlight.device)
++ panel->backlight.device->props.power = FB_BLANK_UNBLANK;
++}
++
++void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
++
++ if (!panel->backlight.present)
++ return;
++
++ drm_dbg_kms(&dev_priv->drm, "pipe %c\n", pipe_name(pipe));
++
++ mutex_lock(&dev_priv->backlight_lock);
++
++ __intel_panel_enable_backlight(crtc_state, conn_state);
++
++ mutex_unlock(&dev_priv->backlight_lock);
++}
++
++#if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE)
++static u32 intel_panel_get_backlight(struct intel_connector *connector)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 val = 0;
++
++ mutex_lock(&dev_priv->backlight_lock);
++
++ if (panel->backlight.enabled)
++ val = panel->backlight.funcs->get(connector, intel_connector_get_pipe(connector));
++
++ mutex_unlock(&dev_priv->backlight_lock);
++
++ drm_dbg_kms(&dev_priv->drm, "get backlight PWM = %d\n", val);
++ return val;
++}
++
++/* Scale user_level in range [0..user_max] to [hw_min..hw_max]. */
++static u32 scale_user_to_hw(struct intel_connector *connector,
++ u32 user_level, u32 user_max)
++{
++ struct intel_panel *panel = &connector->panel;
++
++ return scale(user_level, 0, user_max,
++ panel->backlight.min, panel->backlight.max);
++}
++
++/* set backlight brightness to level in range [0..max], scaling wrt hw min */
++static void intel_panel_set_backlight(const struct drm_connector_state *conn_state,
++ u32 user_level, u32 user_max)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 hw_level;
++
++ if (!panel->backlight.present)
++ return;
++
++ mutex_lock(&dev_priv->backlight_lock);
++
++ drm_WARN_ON(&dev_priv->drm, panel->backlight.max == 0);
++
++ hw_level = scale_user_to_hw(connector, user_level, user_max);
++ panel->backlight.level = hw_level;
++
++ if (panel->backlight.enabled)
++ intel_panel_actually_set_backlight(conn_state, hw_level);
++
++ mutex_unlock(&dev_priv->backlight_lock);
++}
++
++static int intel_backlight_device_update_status(struct backlight_device *bd)
++{
++ struct intel_connector *connector = bl_get_data(bd);
++ struct intel_panel *panel = &connector->panel;
++ struct drm_device *dev = connector->base.dev;
++
++ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
++ DRM_DEBUG_KMS("updating intel_backlight, brightness=%d/%d\n",
++ bd->props.brightness, bd->props.max_brightness);
++ intel_panel_set_backlight(connector->base.state, bd->props.brightness,
++ bd->props.max_brightness);
++
++ /*
++ * Allow flipping bl_power as a sub-state of enabled. Sadly the
++ * backlight class device does not make it easy to differentiate
++ * between callbacks for brightness and bl_power, so our backlight_power
++ * callback needs to take this into account.
++ */
++ if (panel->backlight.enabled) {
++ if (panel->backlight.power) {
++ bool enable = bd->props.power == FB_BLANK_UNBLANK &&
++ bd->props.brightness != 0;
++ panel->backlight.power(connector, enable);
++ }
++ } else {
++ bd->props.power = FB_BLANK_POWERDOWN;
++ }
++
++ drm_modeset_unlock(&dev->mode_config.connection_mutex);
++ return 0;
++}
++
++static int intel_backlight_device_get_brightness(struct backlight_device *bd)
++{
++ struct intel_connector *connector = bl_get_data(bd);
++ struct drm_device *dev = connector->base.dev;
++ struct drm_i915_private *dev_priv = to_i915(dev);
++ intel_wakeref_t wakeref;
++ int ret = 0;
++
++ with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
++ u32 hw_level;
++
++ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
++
++ hw_level = intel_panel_get_backlight(connector);
++ ret = scale_hw_to_user(connector,
++ hw_level, bd->props.max_brightness);
++
++ drm_modeset_unlock(&dev->mode_config.connection_mutex);
++ }
++
++ return ret;
++}
++
++static const struct backlight_ops intel_backlight_device_ops = {
++ .update_status = intel_backlight_device_update_status,
++ .get_brightness = intel_backlight_device_get_brightness,
++};
++
++int intel_backlight_device_register(struct intel_connector *connector)
++{
++ struct drm_i915_private *i915 = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ struct backlight_properties props;
++ struct backlight_device *bd;
++ const char *name;
++ int ret = 0;
++
++ if (WARN_ON(panel->backlight.device))
++ return -ENODEV;
++
++ if (!panel->backlight.present)
++ return 0;
++
++ WARN_ON(panel->backlight.max == 0);
++
++ memset(&props, 0, sizeof(props));
++ props.type = BACKLIGHT_RAW;
++
++ /*
++ * Note: Everything should work even if the backlight device max
++ * presented to the userspace is arbitrarily chosen.
++ */
++ props.max_brightness = panel->backlight.max;
++ props.brightness = scale_hw_to_user(connector,
++ panel->backlight.level,
++ props.max_brightness);
++
++ if (panel->backlight.enabled)
++ props.power = FB_BLANK_UNBLANK;
++ else
++ props.power = FB_BLANK_POWERDOWN;
++
++ name = kstrdup("intel_backlight", GFP_KERNEL);
++ if (!name)
++ return -ENOMEM;
++
++ bd = backlight_device_get_by_name(name);
++ if (bd) {
++ put_device(&bd->dev);
++ /*
++ * Using the same name independent of the drm device or connector
++ * prevents registration of multiple backlight devices in the
++ * driver. However, we need to use the default name for backward
++ * compatibility. Use unique names for subsequent backlight devices as a
++ * fallback when the default name already exists.
++ */
++ kfree(name);
++ name = kasprintf(GFP_KERNEL, "card%d-%s-backlight",
++ i915->drm.primary->index, connector->base.name);
++ if (!name)
++ return -ENOMEM;
++ }
++ bd = backlight_device_register(name, connector->base.kdev, connector,
++ &intel_backlight_device_ops, &props);
++
++ if (IS_ERR(bd)) {
++ drm_err(&i915->drm,
++ "[CONNECTOR:%d:%s] backlight device %s register failed: %ld\n",
++ connector->base.base.id, connector->base.name, name, PTR_ERR(bd));
++ ret = PTR_ERR(bd);
++ goto out;
++ }
++
++ panel->backlight.device = bd;
++
++ drm_dbg_kms(&i915->drm,
++ "[CONNECTOR:%d:%s] backlight device %s registered\n",
++ connector->base.base.id, connector->base.name, name);
++
++out:
++ kfree(name);
++
++ return ret;
++}
++
++void intel_backlight_device_unregister(struct intel_connector *connector)
++{
++ struct intel_panel *panel = &connector->panel;
++
++ if (panel->backlight.device) {
++ backlight_device_unregister(panel->backlight.device);
++ panel->backlight.device = NULL;
++ }
++}
++#endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */
++
++/*
++ * CNP: PWM clock frequency is 19.2 MHz or 24 MHz.
++ * PWM increment = 1
++ */
++static u32 cnp_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
++ return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(dev_priv)->rawclk_freq),
++ pwm_freq_hz);
++}
++
++/*
++ * BXT: PWM clock frequency = 19.2 MHz.
++ */
++static u32 bxt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ return DIV_ROUND_CLOSEST(KHz(19200), pwm_freq_hz);
++}
++
++/*
++ * SPT: This value represents the period of the PWM stream in clock periods
++ * multiplied by 16 (default increment) or 128 (alternate increment selected in
++ * SCHICKEN_1 bit 0). PWM clock is 24 MHz.
++ */
++static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ struct intel_panel *panel = &connector->panel;
++ u32 mul;
++
++ if (panel->backlight.alternate_pwm_increment)
++ mul = 128;
++ else
++ mul = 16;
++
++ return DIV_ROUND_CLOSEST(MHz(24), pwm_freq_hz * mul);
++}
++
++/*
++ * LPT: This value represents the period of the PWM stream in clock periods
++ * multiplied by 128 (default increment) or 16 (alternate increment, selected in
++ * LPT SOUTH_CHICKEN2 register bit 5).
++ */
++static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 mul, clock;
++
++ if (panel->backlight.alternate_pwm_increment)
++ mul = 16;
++ else
++ mul = 128;
++
++ if (HAS_PCH_LPT_H(dev_priv))
++ clock = MHz(135); /* LPT:H */
++ else
++ clock = MHz(24); /* LPT:LP */
++
++ return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * mul);
++}
++
++/*
++ * ILK/SNB/IVB: This value represents the period of the PWM stream in PCH
++ * display raw clocks multiplied by 128.
++ */
++static u32 pch_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
++ return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(dev_priv)->rawclk_freq),
++ pwm_freq_hz * 128);
++}
++
++/*
++ * Gen2: This field determines the number of time base events (display core
++ * clock frequency/32) in total for a complete cycle of modulated backlight
++ * control.
++ *
++ * Gen3: A time base event equals the display core clock ([DevPNV] HRAW clock)
++ * divided by 32.
++ */
++static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ int clock;
++
++ if (IS_PINEVIEW(dev_priv))
++ clock = KHz(RUNTIME_INFO(dev_priv)->rawclk_freq);
++ else
++ clock = KHz(dev_priv->cdclk.hw.cdclk);
++
++ return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 32);
++}
++
++/*
++ * Gen4: This value represents the period of the PWM stream in display core
++ * clocks ([DevCTG] HRAW clocks) multiplied by 128.
++ *
++ */
++static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ int clock;
++
++ if (IS_G4X(dev_priv))
++ clock = KHz(RUNTIME_INFO(dev_priv)->rawclk_freq);
++ else
++ clock = KHz(dev_priv->cdclk.hw.cdclk);
++
++ return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 128);
++}
++
++/*
++ * VLV: This value represents the period of the PWM stream in display core
++ * clocks ([DevCTG] 200MHz HRAW clocks) multiplied by 128 or 25MHz S0IX clocks
++ * multiplied by 16. CHV uses a 19.2MHz S0IX clock.
++ */
++static u32 vlv_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ int mul, clock;
++
++ if ((intel_de_read(dev_priv, CBR1_VLV) & CBR_PWM_CLOCK_MUX_SELECT) == 0) {
++ if (IS_CHERRYVIEW(dev_priv))
++ clock = KHz(19200);
++ else
++ clock = MHz(25);
++ mul = 16;
++ } else {
++ clock = KHz(RUNTIME_INFO(dev_priv)->rawclk_freq);
++ mul = 128;
++ }
++
++ return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * mul);
++}
++
++static u16 get_vbt_pwm_freq(struct drm_i915_private *dev_priv)
++{
++ u16 pwm_freq_hz = dev_priv->vbt.backlight.pwm_freq_hz;
++
++ if (pwm_freq_hz) {
++ drm_dbg_kms(&dev_priv->drm,
++ "VBT defined backlight frequency %u Hz\n",
++ pwm_freq_hz);
++ } else {
++ pwm_freq_hz = 200;
++ drm_dbg_kms(&dev_priv->drm,
++ "default backlight frequency %u Hz\n",
++ pwm_freq_hz);
++ }
++
++ return pwm_freq_hz;
++}
++
++static u32 get_backlight_max_vbt(struct intel_connector *connector)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u16 pwm_freq_hz = get_vbt_pwm_freq(dev_priv);
++ u32 pwm;
++
++ if (!panel->backlight.pwm_funcs->hz_to_pwm) {
++ drm_dbg_kms(&dev_priv->drm,
++ "backlight frequency conversion not supported\n");
++ return 0;
++ }
++
++ pwm = panel->backlight.pwm_funcs->hz_to_pwm(connector, pwm_freq_hz);
++ if (!pwm) {
++ drm_dbg_kms(&dev_priv->drm,
++ "backlight frequency conversion failed\n");
++ return 0;
++ }
++
++ return pwm;
++}
++
++/*
++ * Note: The setup hooks can't assume pipe is set!
++ */
++static u32 get_backlight_min_vbt(struct intel_connector *connector)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ int min;
++
++ drm_WARN_ON(&dev_priv->drm, panel->backlight.pwm_level_max == 0);
++
++ /*
++ * XXX: If the vbt value is 255, it makes min equal to max, which leads
++ * to problems. There are such machines out there. Either our
++ * interpretation is wrong or the vbt has bogus data. Or both. Safeguard
++ * against this by letting the minimum be at most (arbitrarily chosen)
++ * 25% of the max.
++ */
++ min = clamp_t(int, dev_priv->vbt.backlight.min_brightness, 0, 64);
++ if (min != dev_priv->vbt.backlight.min_brightness) {
++ drm_dbg_kms(&dev_priv->drm,
++ "clamping VBT min backlight %d/255 to %d/255\n",
++ dev_priv->vbt.backlight.min_brightness, min);
++ }
++
++ /* vbt value is a coefficient in range [0..255] */
++ return scale(min, 0, 255, 0, panel->backlight.pwm_level_max);
++}
++
++static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 cpu_ctl2, pch_ctl1, pch_ctl2, val;
++ bool alt, cpu_mode;
++
++ if (HAS_PCH_LPT(dev_priv))
++ alt = intel_de_read(dev_priv, SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY;
++ else
++ alt = intel_de_read(dev_priv, SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY;
++ panel->backlight.alternate_pwm_increment = alt;
++
++ pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
++ panel->backlight.active_low_pwm = pch_ctl1 & BLM_PCH_POLARITY;
++
++ pch_ctl2 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL2);
++ panel->backlight.pwm_level_max = pch_ctl2 >> 16;
++
++ cpu_ctl2 = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
++
++ if (!panel->backlight.pwm_level_max)
++ panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
++
++ if (!panel->backlight.pwm_level_max)
++ return -ENODEV;
++
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ panel->backlight.pwm_enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE;
++
++ cpu_mode = panel->backlight.pwm_enabled && HAS_PCH_LPT(dev_priv) &&
++ !(pch_ctl1 & BLM_PCH_OVERRIDE_ENABLE) &&
++ (cpu_ctl2 & BLM_PWM_ENABLE);
++
++ if (cpu_mode) {
++ val = pch_get_backlight(connector, unused);
++
++ drm_dbg_kms(&dev_priv->drm,
++ "CPU backlight register was enabled, switching to PCH override\n");
++
++ /* Write converted CPU PWM value to PCH override register */
++ lpt_set_backlight(connector->base.state, val);
++ intel_de_write(dev_priv, BLC_PWM_PCH_CTL1,
++ pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE);
++
++ intel_de_write(dev_priv, BLC_PWM_CPU_CTL2,
++ cpu_ctl2 & ~BLM_PWM_ENABLE);
++ }
++
++ return 0;
++}
++
++static int pch_setup_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 cpu_ctl2, pch_ctl1, pch_ctl2;
++
++ pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
++ panel->backlight.active_low_pwm = pch_ctl1 & BLM_PCH_POLARITY;
++
++ pch_ctl2 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL2);
++ panel->backlight.pwm_level_max = pch_ctl2 >> 16;
++
++ if (!panel->backlight.pwm_level_max)
++ panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
++
++ if (!panel->backlight.pwm_level_max)
++ return -ENODEV;
++
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ cpu_ctl2 = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
++ panel->backlight.pwm_enabled = (cpu_ctl2 & BLM_PWM_ENABLE) &&
++ (pch_ctl1 & BLM_PCH_PWM_ENABLE);
++
++ return 0;
++}
++
++static int i9xx_setup_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 ctl, val;
++
++ ctl = intel_de_read(dev_priv, BLC_PWM_CTL);
++
++ if (DISPLAY_VER(dev_priv) == 2 || IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
++ panel->backlight.combination_mode = ctl & BLM_LEGACY_MODE;
++
++ if (IS_PINEVIEW(dev_priv))
++ panel->backlight.active_low_pwm = ctl & BLM_POLARITY_PNV;
++
++ panel->backlight.pwm_level_max = ctl >> 17;
++
++ if (!panel->backlight.pwm_level_max) {
++ panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
++ panel->backlight.pwm_level_max >>= 1;
++ }
++
++ if (!panel->backlight.pwm_level_max)
++ return -ENODEV;
++
++ if (panel->backlight.combination_mode)
++ panel->backlight.pwm_level_max *= 0xff;
++
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ val = i9xx_get_backlight(connector, unused);
++ val = intel_panel_invert_pwm_level(connector, val);
++ val = clamp(val, panel->backlight.pwm_level_min, panel->backlight.pwm_level_max);
++
++ panel->backlight.pwm_enabled = val != 0;
++
++ return 0;
++}
++
++static int i965_setup_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 ctl, ctl2;
++
++ ctl2 = intel_de_read(dev_priv, BLC_PWM_CTL2);
++ panel->backlight.combination_mode = ctl2 & BLM_COMBINATION_MODE;
++ panel->backlight.active_low_pwm = ctl2 & BLM_POLARITY_I965;
++
++ ctl = intel_de_read(dev_priv, BLC_PWM_CTL);
++ panel->backlight.pwm_level_max = ctl >> 16;
++
++ if (!panel->backlight.pwm_level_max)
++ panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
++
++ if (!panel->backlight.pwm_level_max)
++ return -ENODEV;
++
++ if (panel->backlight.combination_mode)
++ panel->backlight.pwm_level_max *= 0xff;
++
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ panel->backlight.pwm_enabled = ctl2 & BLM_PWM_ENABLE;
++
++ return 0;
++}
++
++static int vlv_setup_backlight(struct intel_connector *connector, enum pipe pipe)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 ctl, ctl2;
++
++ if (drm_WARN_ON(&dev_priv->drm, pipe != PIPE_A && pipe != PIPE_B))
++ return -ENODEV;
++
++ ctl2 = intel_de_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
++ panel->backlight.active_low_pwm = ctl2 & BLM_POLARITY_I965;
++
++ ctl = intel_de_read(dev_priv, VLV_BLC_PWM_CTL(pipe));
++ panel->backlight.pwm_level_max = ctl >> 16;
++
++ if (!panel->backlight.pwm_level_max)
++ panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
++
++ if (!panel->backlight.pwm_level_max)
++ return -ENODEV;
++
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ panel->backlight.pwm_enabled = ctl2 & BLM_PWM_ENABLE;
++
++ return 0;
++}
++
++static int
++bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 pwm_ctl, val;
++
++ panel->backlight.controller = dev_priv->vbt.backlight.controller;
++
++ pwm_ctl = intel_de_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++
++ /* Controller 1 uses the utility pin. */
++ if (panel->backlight.controller == 1) {
++ val = intel_de_read(dev_priv, UTIL_PIN_CTL);
++ panel->backlight.util_pin_active_low =
++ val & UTIL_PIN_POLARITY;
++ }
++
++ panel->backlight.active_low_pwm = pwm_ctl & BXT_BLC_PWM_POLARITY;
++ panel->backlight.pwm_level_max =
++ intel_de_read(dev_priv, BXT_BLC_PWM_FREQ(panel->backlight.controller));
++
++ if (!panel->backlight.pwm_level_max)
++ panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
++
++ if (!panel->backlight.pwm_level_max)
++ return -ENODEV;
++
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ panel->backlight.pwm_enabled = pwm_ctl & BXT_BLC_PWM_ENABLE;
++
++ return 0;
++}
++
++static int
++cnp_setup_backlight(struct intel_connector *connector, enum pipe unused)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++ u32 pwm_ctl;
++
++ /*
++ * CNP has the BXT implementation of backlight, but with only one
++ * controller. TODO: ICP has multiple controllers but we only use
++ * controller 0 for now.
++ */
++ panel->backlight.controller = 0;
++
++ pwm_ctl = intel_de_read(dev_priv,
++ BXT_BLC_PWM_CTL(panel->backlight.controller));
++
++ panel->backlight.active_low_pwm = pwm_ctl & BXT_BLC_PWM_POLARITY;
++ panel->backlight.pwm_level_max =
++ intel_de_read(dev_priv, BXT_BLC_PWM_FREQ(panel->backlight.controller));
++
++ if (!panel->backlight.pwm_level_max)
++ panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
++
++ if (!panel->backlight.pwm_level_max)
++ return -ENODEV;
++
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ panel->backlight.pwm_enabled = pwm_ctl & BXT_BLC_PWM_ENABLE;
++
++ return 0;
++}
++
++static int ext_pwm_setup_backlight(struct intel_connector *connector,
++ enum pipe pipe)
++{
++ struct drm_device *dev = connector->base.dev;
++ struct drm_i915_private *dev_priv = to_i915(dev);
++ struct intel_panel *panel = &connector->panel;
++ const char *desc;
++ u32 level;
++
++ /* Get the right PWM chip for DSI backlight according to VBT */
++ if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) {
++ panel->backlight.pwm = pwm_get(dev->dev, "pwm_pmic_backlight");
++ desc = "PMIC";
++ } else {
++ panel->backlight.pwm = pwm_get(dev->dev, "pwm_soc_backlight");
++ desc = "SoC";
++ }
++
++ if (IS_ERR(panel->backlight.pwm)) {
++ drm_err(&dev_priv->drm, "Failed to get the %s PWM chip\n",
++ desc);
++ panel->backlight.pwm = NULL;
++ return -ENODEV;
++ }
++
++ panel->backlight.pwm_level_max = 100; /* 100% */
++ panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
++
++ if (pwm_is_enabled(panel->backlight.pwm)) {
++ /* PWM is already enabled, use existing settings */
++ pwm_get_state(panel->backlight.pwm, &panel->backlight.pwm_state);
++
++ level = pwm_get_relative_duty_cycle(&panel->backlight.pwm_state,
++ 100);
++ level = intel_panel_invert_pwm_level(connector, level);
++ panel->backlight.pwm_enabled = true;
++
++ drm_dbg_kms(&dev_priv->drm, "PWM already enabled at freq %ld, VBT freq %d, level %d\n",
++ NSEC_PER_SEC / (unsigned long)panel->backlight.pwm_state.period,
++ get_vbt_pwm_freq(dev_priv), level);
++ } else {
++ /* Set period from VBT frequency, leave other settings at 0. */
++ panel->backlight.pwm_state.period =
++ NSEC_PER_SEC / get_vbt_pwm_freq(dev_priv);
++ }
++
++ drm_info(&dev_priv->drm, "Using %s PWM for LCD backlight control\n",
++ desc);
++ return 0;
++}
++
++static void intel_pwm_set_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct intel_panel *panel = &connector->panel;
++
++ panel->backlight.pwm_funcs->set(conn_state,
++ intel_panel_invert_pwm_level(connector, level));
++}
++
++static u32 intel_pwm_get_backlight(struct intel_connector *connector, enum pipe pipe)
++{
++ struct intel_panel *panel = &connector->panel;
++
++ return intel_panel_invert_pwm_level(connector,
++ panel->backlight.pwm_funcs->get(connector, pipe));
++}
++
++static void intel_pwm_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct intel_panel *panel = &connector->panel;
++
++ panel->backlight.pwm_funcs->enable(crtc_state, conn_state,
++ intel_panel_invert_pwm_level(connector, level));
++}
++
++static void intel_pwm_disable_backlight(const struct drm_connector_state *conn_state, u32 level)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct intel_panel *panel = &connector->panel;
++
++ panel->backlight.pwm_funcs->disable(conn_state,
++ intel_panel_invert_pwm_level(connector, level));
++}
++
++static int intel_pwm_setup_backlight(struct intel_connector *connector, enum pipe pipe)
++{
++ struct intel_panel *panel = &connector->panel;
++ int ret = panel->backlight.pwm_funcs->setup(connector, pipe);
++
++ if (ret < 0)
++ return ret;
++
++ panel->backlight.min = panel->backlight.pwm_level_min;
++ panel->backlight.max = panel->backlight.pwm_level_max;
++ panel->backlight.level = intel_pwm_get_backlight(connector, pipe);
++ panel->backlight.enabled = panel->backlight.pwm_enabled;
++
++ return 0;
++}
++
++void intel_panel_update_backlight(struct intel_atomic_state *state,
++ struct intel_encoder *encoder,
++ const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state)
++{
++ struct intel_connector *connector = to_intel_connector(conn_state->connector);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_panel *panel = &connector->panel;
++
++ if (!panel->backlight.present)
++ return;
++
++ mutex_lock(&dev_priv->backlight_lock);
++ if (!panel->backlight.enabled)
++ __intel_panel_enable_backlight(crtc_state, conn_state);
++
++ mutex_unlock(&dev_priv->backlight_lock);
++}
++
++int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->dev);
++ struct intel_connector *intel_connector = to_intel_connector(connector);
++ struct intel_panel *panel = &intel_connector->panel;
++ int ret;
++
++ if (!dev_priv->vbt.backlight.present) {
++ if (dev_priv->quirks & QUIRK_BACKLIGHT_PRESENT) {
++ drm_dbg_kms(&dev_priv->drm,
++ "no backlight present per VBT, but present per quirk\n");
++ } else {
++ drm_dbg_kms(&dev_priv->drm,
++ "no backlight present per VBT\n");
++ return 0;
++ }
++ }
++
++ /* ensure intel_panel has been initialized first */
++ if (drm_WARN_ON(&dev_priv->drm, !panel->backlight.funcs))
++ return -ENODEV;
++
++ /* set level and max in panel struct */
++ mutex_lock(&dev_priv->backlight_lock);
++ ret = panel->backlight.funcs->setup(intel_connector, pipe);
++ mutex_unlock(&dev_priv->backlight_lock);
++
++ if (ret) {
++ drm_dbg_kms(&dev_priv->drm,
++ "failed to setup backlight for connector %s\n",
++ connector->name);
++ return ret;
++ }
++
++ panel->backlight.present = true;
++
++ drm_dbg_kms(&dev_priv->drm,
++ "Connector %s backlight initialized, %s, brightness %u/%u\n",
++ connector->name,
++ enableddisabled(panel->backlight.enabled),
++ panel->backlight.level, panel->backlight.max);
++
++ return 0;
++}
++
++void intel_panel_destroy_backlight(struct intel_panel *panel)
++{
++ /* dispose of the pwm */
++ if (panel->backlight.pwm)
++ pwm_put(panel->backlight.pwm);
++
++ panel->backlight.present = false;
++}
++
++static const struct intel_panel_bl_funcs bxt_pwm_funcs = {
++ .setup = bxt_setup_backlight,
++ .enable = bxt_enable_backlight,
++ .disable = bxt_disable_backlight,
++ .set = bxt_set_backlight,
++ .get = bxt_get_backlight,
++ .hz_to_pwm = bxt_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs cnp_pwm_funcs = {
++ .setup = cnp_setup_backlight,
++ .enable = cnp_enable_backlight,
++ .disable = cnp_disable_backlight,
++ .set = bxt_set_backlight,
++ .get = bxt_get_backlight,
++ .hz_to_pwm = cnp_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs lpt_pwm_funcs = {
++ .setup = lpt_setup_backlight,
++ .enable = lpt_enable_backlight,
++ .disable = lpt_disable_backlight,
++ .set = lpt_set_backlight,
++ .get = lpt_get_backlight,
++ .hz_to_pwm = lpt_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs spt_pwm_funcs = {
++ .setup = lpt_setup_backlight,
++ .enable = lpt_enable_backlight,
++ .disable = lpt_disable_backlight,
++ .set = lpt_set_backlight,
++ .get = lpt_get_backlight,
++ .hz_to_pwm = spt_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs pch_pwm_funcs = {
++ .setup = pch_setup_backlight,
++ .enable = pch_enable_backlight,
++ .disable = pch_disable_backlight,
++ .set = pch_set_backlight,
++ .get = pch_get_backlight,
++ .hz_to_pwm = pch_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs ext_pwm_funcs = {
++ .setup = ext_pwm_setup_backlight,
++ .enable = ext_pwm_enable_backlight,
++ .disable = ext_pwm_disable_backlight,
++ .set = ext_pwm_set_backlight,
++ .get = ext_pwm_get_backlight,
++};
++
++static const struct intel_panel_bl_funcs vlv_pwm_funcs = {
++ .setup = vlv_setup_backlight,
++ .enable = vlv_enable_backlight,
++ .disable = vlv_disable_backlight,
++ .set = vlv_set_backlight,
++ .get = vlv_get_backlight,
++ .hz_to_pwm = vlv_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs i965_pwm_funcs = {
++ .setup = i965_setup_backlight,
++ .enable = i965_enable_backlight,
++ .disable = i965_disable_backlight,
++ .set = i9xx_set_backlight,
++ .get = i9xx_get_backlight,
++ .hz_to_pwm = i965_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs i9xx_pwm_funcs = {
++ .setup = i9xx_setup_backlight,
++ .enable = i9xx_enable_backlight,
++ .disable = i9xx_disable_backlight,
++ .set = i9xx_set_backlight,
++ .get = i9xx_get_backlight,
++ .hz_to_pwm = i9xx_hz_to_pwm,
++};
++
++static const struct intel_panel_bl_funcs pwm_bl_funcs = {
++ .setup = intel_pwm_setup_backlight,
++ .enable = intel_pwm_enable_backlight,
++ .disable = intel_pwm_disable_backlight,
++ .set = intel_pwm_set_backlight,
++ .get = intel_pwm_get_backlight,
++};
++
++/* Set up chip specific backlight functions */
++void
++intel_panel_init_backlight_funcs(struct intel_panel *panel)
++{
++ struct intel_connector *connector =
++ container_of(panel, struct intel_connector, panel);
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
++ if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI &&
++ intel_dsi_dcs_init_backlight_funcs(connector) == 0)
++ return;
++
++ if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) {
++ panel->backlight.pwm_funcs = &bxt_pwm_funcs;
++ } else if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) {
++ panel->backlight.pwm_funcs = &cnp_pwm_funcs;
++ } else if (INTEL_PCH_TYPE(dev_priv) >= PCH_LPT) {
++ if (HAS_PCH_LPT(dev_priv))
++ panel->backlight.pwm_funcs = &lpt_pwm_funcs;
++ else
++ panel->backlight.pwm_funcs = &spt_pwm_funcs;
++ } else if (HAS_PCH_SPLIT(dev_priv)) {
++ panel->backlight.pwm_funcs = &pch_pwm_funcs;
++ } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
++ if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI) {
++ panel->backlight.pwm_funcs = &ext_pwm_funcs;
++ } else {
++ panel->backlight.pwm_funcs = &vlv_pwm_funcs;
++ }
++ } else if (DISPLAY_VER(dev_priv) == 4) {
++ panel->backlight.pwm_funcs = &i965_pwm_funcs;
++ } else {
++ panel->backlight.pwm_funcs = &i9xx_pwm_funcs;
++ }
++
++ if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP &&
++ intel_dp_aux_init_backlight_funcs(connector) == 0)
++ return;
++
++ /* We're using a standard PWM backlight interface */
++ panel->backlight.funcs = &pwm_bl_funcs;
++}
+diff --git a/drivers/gpu/drm/i915/display/intel_backlight.h b/drivers/gpu/drm/i915/display/intel_backlight.h
+new file mode 100644
+index 0000000000000..282020cb47d5b
+--- /dev/null
++++ b/drivers/gpu/drm/i915/display/intel_backlight.h
+@@ -0,0 +1,51 @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Copyright © 2021 Intel Corporation
++ */
++
++#ifndef __INTEL_BACKLIGHT_H__
++#define __INTEL_BACKLIGHT_H__
++
++#include <linux/types.h>
++
++struct drm_connector;
++struct drm_connector_state;
++struct intel_atomic_state;
++struct intel_connector;
++struct intel_crtc_state;
++struct intel_encoder;
++struct intel_panel;
++enum pipe;
++
++void intel_panel_init_backlight_funcs(struct intel_panel *panel);
++void intel_panel_destroy_backlight(struct intel_panel *panel);
++void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state,
++ u32 level, u32 max);
++int intel_panel_setup_backlight(struct drm_connector *connector,
++ enum pipe pipe);
++void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state);
++void intel_panel_update_backlight(struct intel_atomic_state *state,
++ struct intel_encoder *encoder,
++ const struct intel_crtc_state *crtc_state,
++ const struct drm_connector_state *conn_state);
++void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state);
++void intel_panel_set_pwm_level(const struct drm_connector_state *conn_state, u32 level);
++u32 intel_panel_invert_pwm_level(struct intel_connector *connector, u32 level);
++u32 intel_panel_backlight_level_to_pwm(struct intel_connector *connector, u32 level);
++u32 intel_panel_backlight_level_from_pwm(struct intel_connector *connector, u32 val);
++
++#if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE)
++int intel_backlight_device_register(struct intel_connector *connector);
++void intel_backlight_device_unregister(struct intel_connector *connector);
++#else /* CONFIG_BACKLIGHT_CLASS_DEVICE */
++static inline int intel_backlight_device_register(struct intel_connector *connector)
++{
++ return 0;
++}
++static inline void intel_backlight_device_unregister(struct intel_connector *connector)
++{
++}
++#endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */
++
++#endif /* __INTEL_BACKLIGHT_H__ */
+diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
+index fd71346aac7bc..aa0b936075597 100644
+--- a/drivers/gpu/drm/i915/display/intel_bios.c
++++ b/drivers/gpu/drm/i915/display/intel_bios.c
+@@ -1692,6 +1692,39 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin)
+ return 0;
+ }
+
++static u8 dvo_port_type(u8 dvo_port)
++{
++ switch (dvo_port) {
++ case DVO_PORT_HDMIA:
++ case DVO_PORT_HDMIB:
++ case DVO_PORT_HDMIC:
++ case DVO_PORT_HDMID:
++ case DVO_PORT_HDMIE:
++ case DVO_PORT_HDMIF:
++ case DVO_PORT_HDMIG:
++ case DVO_PORT_HDMIH:
++ case DVO_PORT_HDMII:
++ return DVO_PORT_HDMIA;
++ case DVO_PORT_DPA:
++ case DVO_PORT_DPB:
++ case DVO_PORT_DPC:
++ case DVO_PORT_DPD:
++ case DVO_PORT_DPE:
++ case DVO_PORT_DPF:
++ case DVO_PORT_DPG:
++ case DVO_PORT_DPH:
++ case DVO_PORT_DPI:
++ return DVO_PORT_DPA;
++ case DVO_PORT_MIPIA:
++ case DVO_PORT_MIPIB:
++ case DVO_PORT_MIPIC:
++ case DVO_PORT_MIPID:
++ return DVO_PORT_MIPIA;
++ default:
++ return dvo_port;
++ }
++}
++
+ static enum port __dvo_port_to_port(int n_ports, int n_dvo,
+ const int port_mapping[][3], u8 dvo_port)
+ {
+@@ -1787,6 +1820,22 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915,
+ dvo_port);
+ }
+
++static enum port
++dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port)
++{
++ switch (dvo_port) {
++ case DVO_PORT_MIPIA:
++ return PORT_A;
++ case DVO_PORT_MIPIC:
++ if (DISPLAY_VER(i915) >= 11)
++ return PORT_B;
++ else
++ return PORT_C;
++ default:
++ return PORT_NONE;
++ }
++}
++
+ static int parse_bdb_230_dp_max_link_rate(const int vbt_max_link_rate)
+ {
+ switch (vbt_max_link_rate) {
+@@ -2622,35 +2671,17 @@ bool intel_bios_is_port_edp(struct drm_i915_private *i915, enum port port)
+ return false;
+ }
+
+-static bool child_dev_is_dp_dual_mode(const struct child_device_config *child,
+- enum port port)
++static bool child_dev_is_dp_dual_mode(const struct child_device_config *child)
+ {
+- static const struct {
+- u16 dp, hdmi;
+- } port_mapping[] = {
+- /*
+- * Buggy VBTs may declare DP ports as having
+- * HDMI type dvo_port :( So let's check both.
+- */
+- [PORT_B] = { DVO_PORT_DPB, DVO_PORT_HDMIB, },
+- [PORT_C] = { DVO_PORT_DPC, DVO_PORT_HDMIC, },
+- [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, },
+- [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, },
+- [PORT_F] = { DVO_PORT_DPF, DVO_PORT_HDMIF, },
+- };
+-
+- if (port == PORT_A || port >= ARRAY_SIZE(port_mapping))
+- return false;
+-
+ if ((child->device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) !=
+ (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS))
+ return false;
+
+- if (child->dvo_port == port_mapping[port].dp)
++ if (dvo_port_type(child->dvo_port) == DVO_PORT_DPA)
+ return true;
+
+ /* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */
+- if (child->dvo_port == port_mapping[port].hdmi &&
++ if (dvo_port_type(child->dvo_port) == DVO_PORT_HDMIA &&
+ child->aux_channel != 0)
+ return true;
+
+@@ -2660,10 +2691,36 @@ static bool child_dev_is_dp_dual_mode(const struct child_device_config *child,
+ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *i915,
+ enum port port)
+ {
++ static const struct {
++ u16 dp, hdmi;
++ } port_mapping[] = {
++ /*
++ * Buggy VBTs may declare DP ports as having
++ * HDMI type dvo_port :( So let's check both.
++ */
++ [PORT_B] = { DVO_PORT_DPB, DVO_PORT_HDMIB, },
++ [PORT_C] = { DVO_PORT_DPC, DVO_PORT_HDMIC, },
++ [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, },
++ [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, },
++ [PORT_F] = { DVO_PORT_DPF, DVO_PORT_HDMIF, },
++ };
+ const struct intel_bios_encoder_data *devdata;
+
++ if (HAS_DDI(i915)) {
++ const struct intel_bios_encoder_data *devdata;
++
++ devdata = intel_bios_encoder_data_lookup(i915, port);
++
++ return devdata && child_dev_is_dp_dual_mode(&devdata->child);
++ }
++
++ if (port == PORT_A || port >= ARRAY_SIZE(port_mapping))
++ return false;
++
+ list_for_each_entry(devdata, &i915->vbt.display_devices, node) {
+- if (child_dev_is_dp_dual_mode(&devdata->child, port))
++ if ((devdata->child.dvo_port == port_mapping[port].dp ||
++ devdata->child.dvo_port == port_mapping[port].hdmi) &&
++ child_dev_is_dp_dual_mode(&devdata->child))
+ return true;
+ }
+
+@@ -2692,19 +2749,16 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *i915,
+
+ dvo_port = child->dvo_port;
+
+- if (dvo_port == DVO_PORT_MIPIA ||
+- (dvo_port == DVO_PORT_MIPIB && DISPLAY_VER(i915) >= 11) ||
+- (dvo_port == DVO_PORT_MIPIC && DISPLAY_VER(i915) < 11)) {
+- if (port)
+- *port = dvo_port - DVO_PORT_MIPIA;
+- return true;
+- } else if (dvo_port == DVO_PORT_MIPIB ||
+- dvo_port == DVO_PORT_MIPIC ||
+- dvo_port == DVO_PORT_MIPID) {
++ if (dsi_dvo_port_to_port(i915, dvo_port) == PORT_NONE) {
+ drm_dbg_kms(&i915->drm,
+ "VBT has unsupported DSI port %c\n",
+ port_name(dvo_port - DVO_PORT_MIPIA));
++ continue;
+ }
++
++ if (port)
++ *port = dsi_dvo_port_to_port(i915, dvo_port);
++ return true;
+ }
+
+ return false;
+@@ -2789,7 +2843,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder,
+ if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT))
+ continue;
+
+- if (child->dvo_port - DVO_PORT_MIPIA == encoder->port) {
++ if (dsi_dvo_port_to_port(i915, child->dvo_port) == encoder->port) {
+ if (!devdata->dsc)
+ return false;
+
+diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
+index 4b94256d73197..ea48620f76d9c 100644
+--- a/drivers/gpu/drm/i915/display/intel_bw.c
++++ b/drivers/gpu/drm/i915/display/intel_bw.c
+@@ -681,6 +681,7 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
+ unsigned int max_bw_point = 0, max_bw = 0;
+ unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
+ unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
++ bool changed = false;
+ u32 mask = 0;
+
+ /* FIXME earlier gens need some checks too */
+@@ -724,6 +725,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
+ new_bw_state->data_rate[crtc->pipe] = new_data_rate;
+ new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
+
++ changed = true;
++
+ drm_dbg_kms(&dev_priv->drm,
+ "pipe %c data rate %u num active planes %u\n",
+ pipe_name(crtc->pipe),
+@@ -731,7 +734,19 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
+ new_bw_state->num_active_planes[crtc->pipe]);
+ }
+
+- if (!new_bw_state)
++ old_bw_state = intel_atomic_get_old_bw_state(state);
++ new_bw_state = intel_atomic_get_new_bw_state(state);
++
++ if (new_bw_state &&
++ intel_can_enable_sagv(dev_priv, old_bw_state) !=
++ intel_can_enable_sagv(dev_priv, new_bw_state))
++ changed = true;
++
++ /*
++ * If none of our inputs (data rates, number of active
++ * planes, SAGV yes/no) changed then nothing to do here.
++ */
++ if (!changed)
+ return 0;
+
+ ret = intel_atomic_lock_global_state(&new_bw_state->base);
+@@ -804,7 +819,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
+ * cause.
+ */
+ if (!intel_can_enable_sagv(dev_priv, new_bw_state)) {
+- allowed_points = BIT(max_bw_point);
++ allowed_points &= ADLS_PSF_PT_MASK;
++ allowed_points |= BIT(max_bw_point);
+ drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n",
+ max_bw_point);
+ }
+@@ -814,7 +830,6 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
+ */
+ new_bw_state->qgv_points_mask = ~allowed_points & mask;
+
+- old_bw_state = intel_atomic_get_old_bw_state(state);
+ /*
+ * If the actual mask had changed we need to make sure that
+ * the commits are serialized(in case this is a nomodeset, nonblocking)
+diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
+index 46c6eecbd9175..0ceaed1c96562 100644
+--- a/drivers/gpu/drm/i915/display/intel_bw.h
++++ b/drivers/gpu/drm/i915/display/intel_bw.h
+@@ -30,19 +30,19 @@ struct intel_bw_state {
+ */
+ u8 pipe_sagv_reject;
+
++ /* bitmask of active pipes */
++ u8 active_pipes;
++
+ /*
+ * Current QGV points mask, which restricts
+ * some particular SAGV states, not to confuse
+ * with pipe_sagv_mask.
+ */
+- u8 qgv_points_mask;
++ u16 qgv_points_mask;
+
+ unsigned int data_rate[I915_MAX_PIPES];
+ u8 num_active_planes[I915_MAX_PIPES];
+
+- /* bitmask of active pipes */
+- u8 active_pipes;
+-
+ int min_cdclk;
+ };
+
+diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
+index 34fa4130d5c4f..745ffa7572e85 100644
+--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
++++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
+@@ -1269,7 +1269,7 @@ static const struct intel_cdclk_vals adlp_cdclk_table[] = {
+ { .refclk = 24000, .cdclk = 192000, .divider = 2, .ratio = 16 },
+ { .refclk = 24000, .cdclk = 312000, .divider = 2, .ratio = 26 },
+ { .refclk = 24000, .cdclk = 552000, .divider = 2, .ratio = 46 },
+- { .refclk = 24400, .cdclk = 648000, .divider = 2, .ratio = 54 },
++ { .refclk = 24000, .cdclk = 648000, .divider = 2, .ratio = 54 },
+
+ { .refclk = 38400, .cdclk = 179200, .divider = 3, .ratio = 14 },
+ { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 },
+diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c
+index 9bed1ccecea0d..4f49d782eca23 100644
+--- a/drivers/gpu/drm/i915/display/intel_connector.c
++++ b/drivers/gpu/drm/i915/display/intel_connector.c
+@@ -29,13 +29,13 @@
+ #include <drm/drm_atomic_helper.h>
+ #include <drm/drm_edid.h>
+
+-#include "display/intel_panel.h"
+-
+ #include "i915_drv.h"
++#include "intel_backlight.h"
+ #include "intel_connector.h"
+ #include "intel_display_debugfs.h"
+ #include "intel_display_types.h"
+ #include "intel_hdcp.h"
++#include "intel_panel.h"
+
+ int intel_connector_init(struct intel_connector *connector)
+ {
+diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
+index bd184325d0c75..68489c7298302 100644
+--- a/drivers/gpu/drm/i915/display/intel_ddi.c
++++ b/drivers/gpu/drm/i915/display/intel_ddi.c
+@@ -29,6 +29,7 @@
+
+ #include "i915_drv.h"
+ #include "intel_audio.h"
++#include "intel_backlight.h"
+ #include "intel_combo_phy.h"
+ #include "intel_connector.h"
+ #include "intel_crtc.h"
+@@ -40,6 +41,7 @@
+ #include "intel_dp_link_training.h"
+ #include "intel_dp_mst.h"
+ #include "intel_dpio_phy.h"
++#include "intel_drrs.h"
+ #include "intel_dsi.h"
+ #include "intel_fdi.h"
+ #include "intel_fifo_underrun.h"
+@@ -48,7 +50,6 @@
+ #include "intel_hdmi.h"
+ #include "intel_hotplug.h"
+ #include "intel_lspcon.h"
+-#include "intel_panel.h"
+ #include "intel_pps.h"
+ #include "intel_psr.h"
+ #include "intel_snps_phy.h"
+@@ -4432,6 +4433,7 @@ static void intel_ddi_encoder_shutdown(struct intel_encoder *encoder)
+ enum phy phy = intel_port_to_phy(i915, encoder->port);
+
+ intel_dp_encoder_shutdown(encoder);
++ intel_hdmi_encoder_shutdown(encoder);
+
+ if (!intel_phy_is_tc(i915, phy))
+ return;
+diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
+index ba2c08f1a797c..876620455ed31 100644
+--- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
++++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
+@@ -476,14 +476,14 @@ static const struct intel_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi = {
+ static const union intel_ddi_buf_trans_entry _ehl_combo_phy_ddi_translations_dp[] = {
+ /* NT mV Trans mV db */
+ { .icl = { 0xA, 0x33, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */
+- { .icl = { 0xA, 0x47, 0x36, 0x00, 0x09 } }, /* 350 500 3.1 */
+- { .icl = { 0xC, 0x64, 0x34, 0x00, 0x0B } }, /* 350 700 6.0 */
+- { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 350 900 8.2 */
++ { .icl = { 0xA, 0x47, 0x38, 0x00, 0x07 } }, /* 350 500 3.1 */
++ { .icl = { 0xC, 0x64, 0x33, 0x00, 0x0C } }, /* 350 700 6.0 */
++ { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 350 900 8.2 */
+ { .icl = { 0xA, 0x46, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */
+- { .icl = { 0xC, 0x64, 0x38, 0x00, 0x07 } }, /* 500 700 2.9 */
++ { .icl = { 0xC, 0x64, 0x37, 0x00, 0x08 } }, /* 500 700 2.9 */
+ { .icl = { 0x6, 0x7F, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */
+ { .icl = { 0xC, 0x61, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */
+- { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } }, /* 600 900 3.5 */
++ { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } }, /* 600 900 3.5 */
+ { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */
+ };
+
+diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
+index 17f44ffea5866..26811176846dc 100644
+--- a/drivers/gpu/drm/i915/display/intel_display.c
++++ b/drivers/gpu/drm/i915/display/intel_display.c
+@@ -84,6 +84,7 @@
+ #include "intel_display_types.h"
+ #include "intel_dmc.h"
+ #include "intel_dp_link_training.h"
++#include "intel_dpt.h"
+ #include "intel_fbc.h"
+ #include "intel_fdi.h"
+ #include "intel_fbdev.h"
+@@ -126,182 +127,6 @@ static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state);
+ static void intel_modeset_setup_hw_state(struct drm_device *dev,
+ struct drm_modeset_acquire_ctx *ctx);
+
+-struct i915_dpt {
+- struct i915_address_space vm;
+-
+- struct drm_i915_gem_object *obj;
+- struct i915_vma *vma;
+- void __iomem *iomem;
+-};
+-
+-#define i915_is_dpt(vm) ((vm)->is_dpt)
+-
+-static inline struct i915_dpt *
+-i915_vm_to_dpt(struct i915_address_space *vm)
+-{
+- BUILD_BUG_ON(offsetof(struct i915_dpt, vm));
+- GEM_BUG_ON(!i915_is_dpt(vm));
+- return container_of(vm, struct i915_dpt, vm);
+-}
+-
+-#define dpt_total_entries(dpt) ((dpt)->vm.total >> PAGE_SHIFT)
+-
+-static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+-{
+- writeq(pte, addr);
+-}
+-
+-static void dpt_insert_page(struct i915_address_space *vm,
+- dma_addr_t addr,
+- u64 offset,
+- enum i915_cache_level level,
+- u32 flags)
+-{
+- struct i915_dpt *dpt = i915_vm_to_dpt(vm);
+- gen8_pte_t __iomem *base = dpt->iomem;
+-
+- gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
+- vm->pte_encode(addr, level, flags));
+-}
+-
+-static void dpt_insert_entries(struct i915_address_space *vm,
+- struct i915_vma *vma,
+- enum i915_cache_level level,
+- u32 flags)
+-{
+- struct i915_dpt *dpt = i915_vm_to_dpt(vm);
+- gen8_pte_t __iomem *base = dpt->iomem;
+- const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+- struct sgt_iter sgt_iter;
+- dma_addr_t addr;
+- int i;
+-
+- /*
+- * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+- * not to allow the user to override access to a read only page.
+- */
+-
+- i = vma->node.start / I915_GTT_PAGE_SIZE;
+- for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+- gen8_set_pte(&base[i++], pte_encode | addr);
+-}
+-
+-static void dpt_clear_range(struct i915_address_space *vm,
+- u64 start, u64 length)
+-{
+-}
+-
+-static void dpt_bind_vma(struct i915_address_space *vm,
+- struct i915_vm_pt_stash *stash,
+- struct i915_vma *vma,
+- enum i915_cache_level cache_level,
+- u32 flags)
+-{
+- struct drm_i915_gem_object *obj = vma->obj;
+- u32 pte_flags;
+-
+- /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+- pte_flags = 0;
+- if (vma->vm->has_read_only && i915_gem_object_is_readonly(obj))
+- pte_flags |= PTE_READ_ONLY;
+- if (i915_gem_object_is_lmem(obj))
+- pte_flags |= PTE_LM;
+-
+- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+-
+- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+-
+- /*
+- * Without aliasing PPGTT there's no difference between
+- * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+- * upgrade to both bound if we bind either to avoid double-binding.
+- */
+- atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+-}
+-
+-static void dpt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+-{
+- vm->clear_range(vm, vma->node.start, vma->size);
+-}
+-
+-static void dpt_cleanup(struct i915_address_space *vm)
+-{
+- struct i915_dpt *dpt = i915_vm_to_dpt(vm);
+-
+- i915_gem_object_put(dpt->obj);
+-}
+-
+-static struct i915_address_space *
+-intel_dpt_create(struct intel_framebuffer *fb)
+-{
+- struct drm_gem_object *obj = &intel_fb_obj(&fb->base)->base;
+- struct drm_i915_private *i915 = to_i915(obj->dev);
+- struct drm_i915_gem_object *dpt_obj;
+- struct i915_address_space *vm;
+- struct i915_dpt *dpt;
+- size_t size;
+- int ret;
+-
+- if (intel_fb_needs_pot_stride_remap(fb))
+- size = intel_remapped_info_size(&fb->remapped_view.gtt.remapped);
+- else
+- size = DIV_ROUND_UP_ULL(obj->size, I915_GTT_PAGE_SIZE);
+-
+- size = round_up(size * sizeof(gen8_pte_t), I915_GTT_PAGE_SIZE);
+-
+- if (HAS_LMEM(i915))
+- dpt_obj = i915_gem_object_create_lmem(i915, size, 0);
+- else
+- dpt_obj = i915_gem_object_create_stolen(i915, size);
+- if (IS_ERR(dpt_obj))
+- return ERR_CAST(dpt_obj);
+-
+- ret = i915_gem_object_set_cache_level(dpt_obj, I915_CACHE_NONE);
+- if (ret) {
+- i915_gem_object_put(dpt_obj);
+- return ERR_PTR(ret);
+- }
+-
+- dpt = kzalloc(sizeof(*dpt), GFP_KERNEL);
+- if (!dpt) {
+- i915_gem_object_put(dpt_obj);
+- return ERR_PTR(-ENOMEM);
+- }
+-
+- vm = &dpt->vm;
+-
+- vm->gt = &i915->gt;
+- vm->i915 = i915;
+- vm->dma = i915->drm.dev;
+- vm->total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+- vm->is_dpt = true;
+-
+- i915_address_space_init(vm, VM_CLASS_DPT);
+-
+- vm->insert_page = dpt_insert_page;
+- vm->clear_range = dpt_clear_range;
+- vm->insert_entries = dpt_insert_entries;
+- vm->cleanup = dpt_cleanup;
+-
+- vm->vma_ops.bind_vma = dpt_bind_vma;
+- vm->vma_ops.unbind_vma = dpt_unbind_vma;
+- vm->vma_ops.set_pages = ggtt_set_pages;
+- vm->vma_ops.clear_pages = clear_pages;
+-
+- vm->pte_encode = gen8_ggtt_pte_encode;
+-
+- dpt->obj = dpt_obj;
+-
+- return &dpt->vm;
+-}
+-
+-static void intel_dpt_destroy(struct i915_address_space *vm)
+-{
+- struct i915_dpt *dpt = i915_vm_to_dpt(vm);
+-
+- i915_vm_close(&dpt->vm);
+-}
+-
+ /* returns HPLL frequency in kHz */
+ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv)
+ {
+@@ -1833,8 +1658,8 @@ static void fixup_plane_bitmasks(struct intel_crtc_state *crtc_state)
+ }
+ }
+
+-static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
+- struct intel_plane *plane)
++void intel_plane_disable_noatomic(struct intel_crtc *crtc,
++ struct intel_plane *plane)
+ {
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ struct intel_crtc_state *crtc_state =
+@@ -1879,49 +1704,6 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
+ intel_wait_for_vblank(dev_priv, crtc->pipe);
+ }
+
+-static struct i915_vma *intel_dpt_pin(struct i915_address_space *vm)
+-{
+- struct drm_i915_private *i915 = vm->i915;
+- struct i915_dpt *dpt = i915_vm_to_dpt(vm);
+- intel_wakeref_t wakeref;
+- struct i915_vma *vma;
+- void __iomem *iomem;
+-
+- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+- atomic_inc(&i915->gpu_error.pending_fb_pin);
+-
+- vma = i915_gem_object_ggtt_pin(dpt->obj, NULL, 0, 4096,
+- HAS_LMEM(i915) ? 0 : PIN_MAPPABLE);
+- if (IS_ERR(vma))
+- goto err;
+-
+- iomem = i915_vma_pin_iomap(vma);
+- i915_vma_unpin(vma);
+- if (IS_ERR(iomem)) {
+- vma = iomem;
+- goto err;
+- }
+-
+- dpt->vma = vma;
+- dpt->iomem = iomem;
+-
+- i915_vma_get(vma);
+-
+-err:
+- atomic_dec(&i915->gpu_error.pending_fb_pin);
+- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+-
+- return vma;
+-}
+-
+-static void intel_dpt_unpin(struct i915_address_space *vm)
+-{
+- struct i915_dpt *dpt = i915_vm_to_dpt(vm);
+-
+- i915_vma_unpin_iomap(dpt->vma);
+- i915_vma_put(dpt->vma);
+-}
+-
+ static bool
+ intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
+ const struct intel_initial_plane_config *plane_config,
+@@ -2521,7 +2303,7 @@ intel_get_crtc_new_encoder(const struct intel_atomic_state *state,
+ num_encoders++;
+ }
+
+- drm_WARN(encoder->base.dev, num_encoders != 1,
++ drm_WARN(state->base.dev, num_encoders != 1,
+ "%d encoders for pipe %c\n",
+ num_encoders, pipe_name(crtc->pipe));
+
+@@ -8042,6 +7824,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state,
+ * only fields that are know to not cause problems are preserved. */
+
+ saved_state->uapi = crtc_state->uapi;
++ saved_state->inherited = crtc_state->inherited;
+ saved_state->scaler_state = crtc_state->scaler_state;
+ saved_state->shared_dpll = crtc_state->shared_dpll;
+ saved_state->dpll_hw_state = crtc_state->dpll_hw_state;
+@@ -13435,6 +13218,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
+ vlv_wm_sanitize(dev_priv);
+ } else if (DISPLAY_VER(dev_priv) >= 9) {
+ skl_wm_get_hw_state(dev_priv);
++ skl_wm_sanitize(dev_priv);
+ } else if (HAS_PCH_SPLIT(dev_priv)) {
+ ilk_wm_get_hw_state(dev_priv);
+ }
+diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
+index 284936f0ddab2..6a7a91b380805 100644
+--- a/drivers/gpu/drm/i915/display/intel_display.h
++++ b/drivers/gpu/drm/i915/display/intel_display.h
+@@ -629,6 +629,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state);
+ struct intel_encoder *
+ intel_get_crtc_new_encoder(const struct intel_atomic_state *state,
+ const struct intel_crtc_state *crtc_state);
++void intel_plane_disable_noatomic(struct intel_crtc *crtc,
++ struct intel_plane *plane);
+
+ unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
+ int color_plane);
+diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+index 8fdacb252bb19..b136a0fc0963b 100644
+--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
++++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+@@ -13,6 +13,7 @@
+ #include "intel_display_types.h"
+ #include "intel_dmc.h"
+ #include "intel_dp.h"
++#include "intel_drrs.h"
+ #include "intel_fbc.h"
+ #include "intel_hdcp.h"
+ #include "intel_hdmi.h"
+diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
+index 6beeeeba1bed2..90e055f056994 100644
+--- a/drivers/gpu/drm/i915/display/intel_display_types.h
++++ b/drivers/gpu/drm/i915/display/intel_display_types.h
+@@ -1520,6 +1520,8 @@ struct intel_psr {
+ bool psr2_sel_fetch_enabled;
+ bool req_psr2_sdp_prior_scanline;
+ u8 sink_sync_latency;
++ u8 io_wake_lines;
++ u8 fast_wake_lines;
+ ktime_t last_entry_attempt;
+ ktime_t last_exit;
+ bool sink_not_reliable;
+@@ -1639,6 +1641,9 @@ struct intel_dp {
+ struct intel_dp_pcon_frl frl;
+
+ struct intel_psr psr;
++
++ /* When we last wrote the OUI for eDP */
++ unsigned long last_oui_write;
+ };
+
+ enum lspcon_vendor {
+diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
+index b3c8e1c450efb..0e04d4dd1c132 100644
+--- a/drivers/gpu/drm/i915/display/intel_dmc.c
++++ b/drivers/gpu/drm/i915/display/intel_dmc.c
+@@ -375,6 +375,44 @@ static void dmc_set_fw_offset(struct intel_dmc *dmc,
+ }
+ }
+
++static bool dmc_mmio_addr_sanity_check(struct intel_dmc *dmc,
++ const u32 *mmioaddr, u32 mmio_count,
++ int header_ver, u8 dmc_id)
++{
++ struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc);
++ u32 start_range, end_range;
++ int i;
++
++ if (dmc_id >= DMC_FW_MAX) {
++ drm_warn(&i915->drm, "Unsupported firmware id %u\n", dmc_id);
++ return false;
++ }
++
++ if (header_ver == 1) {
++ start_range = DMC_MMIO_START_RANGE;
++ end_range = DMC_MMIO_END_RANGE;
++ } else if (dmc_id == DMC_FW_MAIN) {
++ start_range = TGL_MAIN_MMIO_START;
++ end_range = TGL_MAIN_MMIO_END;
++ } else if (DISPLAY_VER(i915) >= 13) {
++ start_range = ADLP_PIPE_MMIO_START;
++ end_range = ADLP_PIPE_MMIO_END;
++ } else if (DISPLAY_VER(i915) >= 12) {
++ start_range = TGL_PIPE_MMIO_START(dmc_id);
++ end_range = TGL_PIPE_MMIO_END(dmc_id);
++ } else {
++ drm_warn(&i915->drm, "Unknown mmio range for sanity check");
++ return false;
++ }
++
++ for (i = 0; i < mmio_count; i++) {
++ if (mmioaddr[i] < start_range || mmioaddr[i] > end_range)
++ return false;
++ }
++
++ return true;
++}
++
+ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
+ const struct intel_dmc_header_base *dmc_header,
+ size_t rem_size, u8 dmc_id)
+@@ -444,6 +482,12 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
+ return 0;
+ }
+
++ if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count,
++ dmc_header->header_ver, dmc_id)) {
++ drm_err(&i915->drm, "DMC firmware has Wrong MMIO Addresses\n");
++ return 0;
++ }
++
+ for (i = 0; i < mmio_count; i++) {
+ dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]);
+ dmc_info->mmiodata[i] = mmiodata[i];
+@@ -606,7 +650,7 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv,
+ continue;
+
+ offset = readcount + dmc->dmc_info[id].dmc_offset * 4;
+- if (fw->size - offset < 0) {
++ if (offset > fw->size) {
+ drm_err(&dev_priv->drm, "Reading beyond the fw_size\n");
+ continue;
+ }
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
+index 5cf152be44877..6cc1258578088 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.c
++++ b/drivers/gpu/drm/i915/display/intel_dp.c
+@@ -29,6 +29,7 @@
+ #include <linux/i2c.h>
+ #include <linux/notifier.h>
+ #include <linux/slab.h>
++#include <linux/timekeeping.h>
+ #include <linux/types.h>
+
+ #include <asm/byteorder.h>
+@@ -44,6 +45,7 @@
+ #include "i915_drv.h"
+ #include "intel_atomic.h"
+ #include "intel_audio.h"
++#include "intel_backlight.h"
+ #include "intel_connector.h"
+ #include "intel_ddi.h"
+ #include "intel_de.h"
+@@ -55,6 +57,7 @@
+ #include "intel_dp_mst.h"
+ #include "intel_dpio_phy.h"
+ #include "intel_dpll.h"
++#include "intel_drrs.h"
+ #include "intel_fifo_underrun.h"
+ #include "intel_hdcp.h"
+ #include "intel_hdmi.h"
+@@ -111,6 +114,12 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp)
+ static void intel_dp_unset_edid(struct intel_dp *intel_dp);
+ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc);
+
++static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp)
++{
++ intel_dp->sink_rates[0] = 162000;
++ intel_dp->num_sink_rates = 1;
++}
++
+ /* update sink rates from dpcd */
+ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
+ {
+@@ -1222,6 +1231,11 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
+ pipe_config->dsc.slice_count =
+ drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd,
+ true);
++ if (!pipe_config->dsc.slice_count) {
++ drm_dbg_kms(&dev_priv->drm, "Unsupported Slice Count %d\n",
++ pipe_config->dsc.slice_count);
++ return -EINVAL;
++ }
+ } else {
+ u16 dsc_max_output_bpp;
+ u8 dsc_dp_slice_count;
+@@ -1603,46 +1617,6 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
+ intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA);
+ }
+
+-static void
+-intel_dp_drrs_compute_config(struct intel_dp *intel_dp,
+- struct intel_crtc_state *pipe_config,
+- int output_bpp, bool constant_n)
+-{
+- struct intel_connector *intel_connector = intel_dp->attached_connector;
+- struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+- int pixel_clock;
+-
+- if (pipe_config->vrr.enable)
+- return;
+-
+- /*
+- * DRRS and PSR can't be enable together, so giving preference to PSR
+- * as it allows more power-savings by complete shutting down display,
+- * so to guarantee this, intel_dp_drrs_compute_config() must be called
+- * after intel_psr_compute_config().
+- */
+- if (pipe_config->has_psr)
+- return;
+-
+- if (!intel_connector->panel.downclock_mode ||
+- dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT)
+- return;
+-
+- pipe_config->has_drrs = true;
+-
+- pixel_clock = intel_connector->panel.downclock_mode->clock;
+- if (pipe_config->splitter.enable)
+- pixel_clock /= pipe_config->splitter.link_count;
+-
+- intel_link_compute_m_n(output_bpp, pipe_config->lane_count, pixel_clock,
+- pipe_config->port_clock, &pipe_config->dp_m2_n2,
+- constant_n, pipe_config->fec_enable);
+-
+- /* FIXME: abstract this better */
+- if (pipe_config->splitter.enable)
+- pipe_config->dp_m2_n2.gmch_m *= pipe_config->splitter.link_count;
+-}
+-
+ int
+ intel_dp_compute_config(struct intel_encoder *encoder,
+ struct intel_crtc_state *pipe_config,
+@@ -1767,6 +1741,12 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp,
+ intel_dp->lane_count = lane_count;
+ }
+
++static void intel_dp_reset_max_link_params(struct intel_dp *intel_dp)
++{
++ intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
++ intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
++}
++
+ /* Enable backlight PWM and backlight PP control. */
+ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state)
+@@ -1852,6 +1832,16 @@ intel_edp_init_source_oui(struct intel_dp *intel_dp, bool careful)
+
+ if (drm_dp_dpcd_write(&intel_dp->aux, DP_SOURCE_OUI, oui, sizeof(oui)) < 0)
+ drm_err(&i915->drm, "Failed to write source OUI\n");
++
++ intel_dp->last_oui_write = jiffies;
++}
++
++void intel_dp_wait_source_oui(struct intel_dp *intel_dp)
++{
++ struct drm_i915_private *i915 = dp_to_i915(intel_dp);
++
++ drm_dbg_kms(&i915->drm, "Performing OUI wait\n");
++ wait_remaining_ms_from_jiffies(intel_dp->last_oui_write, 30);
+ }
+
+ /* If the device supports it, try to set the power state appropriately */
+@@ -1926,8 +1916,7 @@ void intel_dp_sync_state(struct intel_encoder *encoder,
+ if (intel_dp->dpcd[DP_DPCD_REV] == 0)
+ intel_dp_get_dpcd(intel_dp);
+
+- intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
+- intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
++ intel_dp_reset_max_link_params(intel_dp);
+ }
+
+ bool intel_dp_initial_fastset_check(struct intel_encoder *encoder,
+@@ -2462,6 +2451,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
+ */
+ intel_psr_init_dpcd(intel_dp);
+
++ /* Clear the default sink rates */
++ intel_dp->num_sink_rates = 0;
++
+ /* Read the eDP 1.4+ supported link rates. */
+ if (intel_dp->edp_dpcd[0] >= DP_EDP_14) {
+ __le16 sink_rates[DP_MAX_SUPPORTED_RATES];
+@@ -2497,6 +2489,7 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
+ intel_dp_set_sink_rates(intel_dp);
+
+ intel_dp_set_common_rates(intel_dp);
++ intel_dp_reset_max_link_params(intel_dp);
+
+ /* Read the eDP DSC DPCD registers */
+ if (DISPLAY_VER(dev_priv) >= 10)
+@@ -3257,61 +3250,6 @@ static void intel_dp_phy_pattern_update(struct intel_dp *intel_dp,
+ }
+ }
+
+-static void
+-intel_dp_autotest_phy_ddi_disable(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state)
+-{
+- struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+- struct drm_device *dev = dig_port->base.base.dev;
+- struct drm_i915_private *dev_priv = to_i915(dev);
+- struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
+- enum pipe pipe = crtc->pipe;
+- u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value;
+-
+- trans_ddi_func_ctl_value = intel_de_read(dev_priv,
+- TRANS_DDI_FUNC_CTL(pipe));
+- trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe));
+- dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe));
+-
+- trans_ddi_func_ctl_value &= ~(TRANS_DDI_FUNC_ENABLE |
+- TGL_TRANS_DDI_PORT_MASK);
+- trans_conf_value &= ~PIPECONF_ENABLE;
+- dp_tp_ctl_value &= ~DP_TP_CTL_ENABLE;
+-
+- intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value);
+- intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe),
+- trans_ddi_func_ctl_value);
+- intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value);
+-}
+-
+-static void
+-intel_dp_autotest_phy_ddi_enable(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state)
+-{
+- struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+- struct drm_device *dev = dig_port->base.base.dev;
+- struct drm_i915_private *dev_priv = to_i915(dev);
+- enum port port = dig_port->base.port;
+- struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
+- enum pipe pipe = crtc->pipe;
+- u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value;
+-
+- trans_ddi_func_ctl_value = intel_de_read(dev_priv,
+- TRANS_DDI_FUNC_CTL(pipe));
+- trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe));
+- dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe));
+-
+- trans_ddi_func_ctl_value |= TRANS_DDI_FUNC_ENABLE |
+- TGL_TRANS_DDI_SELECT_PORT(port);
+- trans_conf_value |= PIPECONF_ENABLE;
+- dp_tp_ctl_value |= DP_TP_CTL_ENABLE;
+-
+- intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value);
+- intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value);
+- intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe),
+- trans_ddi_func_ctl_value);
+-}
+-
+ static void intel_dp_process_phy_request(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state)
+ {
+@@ -3329,14 +3267,10 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp,
+ intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX,
+ link_status);
+
+- intel_dp_autotest_phy_ddi_disable(intel_dp, crtc_state);
+-
+ intel_dp_set_signal_levels(intel_dp, crtc_state, DP_PHY_DPRX);
+
+ intel_dp_phy_pattern_update(intel_dp, crtc_state);
+
+- intel_dp_autotest_phy_ddi_enable(intel_dp, crtc_state);
+-
+ drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET,
+ intel_dp->train_set, crtc_state->lane_count);
+
+@@ -3509,6 +3443,8 @@ intel_dp_handle_hdmi_link_status_change(struct intel_dp *intel_dp)
+
+ drm_dp_pcon_hdmi_frl_link_error_count(&intel_dp->aux, &intel_dp->attached_connector->base);
+
++ intel_dp->frl.is_trained = false;
++
+ /* Restart FRL training or fall back to TMDS mode */
+ intel_dp_check_frl_training(intel_dp);
+ }
+@@ -4240,12 +4176,7 @@ intel_dp_detect(struct drm_connector *connector,
+ * supports link training fallback params.
+ */
+ if (intel_dp->reset_link_params || intel_dp->is_mst) {
+- /* Initial max link lane count */
+- intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
+-
+- /* Initial max link rate */
+- intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
+-
++ intel_dp_reset_max_link_params(intel_dp);
+ intel_dp->reset_link_params = false;
+ }
+
+@@ -4617,7 +4548,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd)
+ struct intel_dp *intel_dp = &dig_port->dp;
+
+ if (dig_port->base.type == INTEL_OUTPUT_EDP &&
+- (long_hpd || !intel_pps_have_power(intel_dp))) {
++ (long_hpd || !intel_pps_have_panel_power_or_vdd(intel_dp))) {
+ /*
+ * vdd off can generate a long/short pulse on eDP which
+ * would require vdd on to handle it, and thus we
+@@ -4716,432 +4647,6 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect
+ drm_connector_attach_vrr_capable_property(connector);
+ }
+
+-/**
+- * intel_dp_set_drrs_state - program registers for RR switch to take effect
+- * @dev_priv: i915 device
+- * @crtc_state: a pointer to the active intel_crtc_state
+- * @refresh_rate: RR to be programmed
+- *
+- * This function gets called when refresh rate (RR) has to be changed from
+- * one frequency to another. Switches can be between high and low RR
+- * supported by the panel or to any other RR based on media playback (in
+- * this case, RR value needs to be passed from user space).
+- *
+- * The caller of this function needs to take a lock on dev_priv->drrs.
+- */
+-static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv,
+- const struct intel_crtc_state *crtc_state,
+- int refresh_rate)
+-{
+- struct intel_dp *intel_dp = dev_priv->drrs.dp;
+- struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+- enum drrs_refresh_rate_type index = DRRS_HIGH_RR;
+-
+- if (refresh_rate <= 0) {
+- drm_dbg_kms(&dev_priv->drm,
+- "Refresh rate should be positive non-zero.\n");
+- return;
+- }
+-
+- if (intel_dp == NULL) {
+- drm_dbg_kms(&dev_priv->drm, "DRRS not supported.\n");
+- return;
+- }
+-
+- if (!crtc) {
+- drm_dbg_kms(&dev_priv->drm,
+- "DRRS: intel_crtc not initialized\n");
+- return;
+- }
+-
+- if (dev_priv->drrs.type < SEAMLESS_DRRS_SUPPORT) {
+- drm_dbg_kms(&dev_priv->drm, "Only Seamless DRRS supported.\n");
+- return;
+- }
+-
+- if (drm_mode_vrefresh(intel_dp->attached_connector->panel.downclock_mode) ==
+- refresh_rate)
+- index = DRRS_LOW_RR;
+-
+- if (index == dev_priv->drrs.refresh_rate_type) {
+- drm_dbg_kms(&dev_priv->drm,
+- "DRRS requested for previously set RR...ignoring\n");
+- return;
+- }
+-
+- if (!crtc_state->hw.active) {
+- drm_dbg_kms(&dev_priv->drm,
+- "eDP encoder disabled. CRTC not Active\n");
+- return;
+- }
+-
+- if (DISPLAY_VER(dev_priv) >= 8 && !IS_CHERRYVIEW(dev_priv)) {
+- switch (index) {
+- case DRRS_HIGH_RR:
+- intel_dp_set_m_n(crtc_state, M1_N1);
+- break;
+- case DRRS_LOW_RR:
+- intel_dp_set_m_n(crtc_state, M2_N2);
+- break;
+- case DRRS_MAX_RR:
+- default:
+- drm_err(&dev_priv->drm,
+- "Unsupported refreshrate type\n");
+- }
+- } else if (DISPLAY_VER(dev_priv) > 6) {
+- i915_reg_t reg = PIPECONF(crtc_state->cpu_transcoder);
+- u32 val;
+-
+- val = intel_de_read(dev_priv, reg);
+- if (index > DRRS_HIGH_RR) {
+- if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+- val |= PIPECONF_EDP_RR_MODE_SWITCH_VLV;
+- else
+- val |= PIPECONF_EDP_RR_MODE_SWITCH;
+- } else {
+- if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+- val &= ~PIPECONF_EDP_RR_MODE_SWITCH_VLV;
+- else
+- val &= ~PIPECONF_EDP_RR_MODE_SWITCH;
+- }
+- intel_de_write(dev_priv, reg, val);
+- }
+-
+- dev_priv->drrs.refresh_rate_type = index;
+-
+- drm_dbg_kms(&dev_priv->drm, "eDP Refresh Rate set to : %dHz\n",
+- refresh_rate);
+-}
+-
+-static void
+-intel_edp_drrs_enable_locked(struct intel_dp *intel_dp)
+-{
+- struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+-
+- dev_priv->drrs.busy_frontbuffer_bits = 0;
+- dev_priv->drrs.dp = intel_dp;
+-}
+-
+-/**
+- * intel_edp_drrs_enable - init drrs struct if supported
+- * @intel_dp: DP struct
+- * @crtc_state: A pointer to the active crtc state.
+- *
+- * Initializes frontbuffer_bits and drrs.dp
+- */
+-void intel_edp_drrs_enable(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state)
+-{
+- struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+-
+- if (!crtc_state->has_drrs)
+- return;
+-
+- drm_dbg_kms(&dev_priv->drm, "Enabling DRRS\n");
+-
+- mutex_lock(&dev_priv->drrs.mutex);
+-
+- if (dev_priv->drrs.dp) {
+- drm_warn(&dev_priv->drm, "DRRS already enabled\n");
+- goto unlock;
+- }
+-
+- intel_edp_drrs_enable_locked(intel_dp);
+-
+-unlock:
+- mutex_unlock(&dev_priv->drrs.mutex);
+-}
+-
+-static void
+-intel_edp_drrs_disable_locked(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state)
+-{
+- struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+-
+- if (dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) {
+- int refresh;
+-
+- refresh = drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode);
+- intel_dp_set_drrs_state(dev_priv, crtc_state, refresh);
+- }
+-
+- dev_priv->drrs.dp = NULL;
+-}
+-
+-/**
+- * intel_edp_drrs_disable - Disable DRRS
+- * @intel_dp: DP struct
+- * @old_crtc_state: Pointer to old crtc_state.
+- *
+- */
+-void intel_edp_drrs_disable(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *old_crtc_state)
+-{
+- struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+-
+- if (!old_crtc_state->has_drrs)
+- return;
+-
+- mutex_lock(&dev_priv->drrs.mutex);
+- if (!dev_priv->drrs.dp) {
+- mutex_unlock(&dev_priv->drrs.mutex);
+- return;
+- }
+-
+- intel_edp_drrs_disable_locked(intel_dp, old_crtc_state);
+- mutex_unlock(&dev_priv->drrs.mutex);
+-
+- cancel_delayed_work_sync(&dev_priv->drrs.work);
+-}
+-
+-/**
+- * intel_edp_drrs_update - Update DRRS state
+- * @intel_dp: Intel DP
+- * @crtc_state: new CRTC state
+- *
+- * This function will update DRRS states, disabling or enabling DRRS when
+- * executing fastsets. For full modeset, intel_edp_drrs_disable() and
+- * intel_edp_drrs_enable() should be called instead.
+- */
+-void
+-intel_edp_drrs_update(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state)
+-{
+- struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+-
+- if (dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT)
+- return;
+-
+- mutex_lock(&dev_priv->drrs.mutex);
+-
+- /* New state matches current one? */
+- if (crtc_state->has_drrs == !!dev_priv->drrs.dp)
+- goto unlock;
+-
+- if (crtc_state->has_drrs)
+- intel_edp_drrs_enable_locked(intel_dp);
+- else
+- intel_edp_drrs_disable_locked(intel_dp, crtc_state);
+-
+-unlock:
+- mutex_unlock(&dev_priv->drrs.mutex);
+-}
+-
+-static void intel_edp_drrs_downclock_work(struct work_struct *work)
+-{
+- struct drm_i915_private *dev_priv =
+- container_of(work, typeof(*dev_priv), drrs.work.work);
+- struct intel_dp *intel_dp;
+-
+- mutex_lock(&dev_priv->drrs.mutex);
+-
+- intel_dp = dev_priv->drrs.dp;
+-
+- if (!intel_dp)
+- goto unlock;
+-
+- /*
+- * The delayed work can race with an invalidate hence we need to
+- * recheck.
+- */
+-
+- if (dev_priv->drrs.busy_frontbuffer_bits)
+- goto unlock;
+-
+- if (dev_priv->drrs.refresh_rate_type != DRRS_LOW_RR) {
+- struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
+-
+- intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config,
+- drm_mode_vrefresh(intel_dp->attached_connector->panel.downclock_mode));
+- }
+-
+-unlock:
+- mutex_unlock(&dev_priv->drrs.mutex);
+-}
+-
+-/**
+- * intel_edp_drrs_invalidate - Disable Idleness DRRS
+- * @dev_priv: i915 device
+- * @frontbuffer_bits: frontbuffer plane tracking bits
+- *
+- * This function gets called everytime rendering on the given planes start.
+- * Hence DRRS needs to be Upclocked, i.e. (LOW_RR -> HIGH_RR).
+- *
+- * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
+- */
+-void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
+- unsigned int frontbuffer_bits)
+-{
+- struct intel_dp *intel_dp;
+- struct drm_crtc *crtc;
+- enum pipe pipe;
+-
+- if (dev_priv->drrs.type == DRRS_NOT_SUPPORTED)
+- return;
+-
+- cancel_delayed_work(&dev_priv->drrs.work);
+-
+- mutex_lock(&dev_priv->drrs.mutex);
+-
+- intel_dp = dev_priv->drrs.dp;
+- if (!intel_dp) {
+- mutex_unlock(&dev_priv->drrs.mutex);
+- return;
+- }
+-
+- crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
+- pipe = to_intel_crtc(crtc)->pipe;
+-
+- frontbuffer_bits &= INTEL_FRONTBUFFER_ALL_MASK(pipe);
+- dev_priv->drrs.busy_frontbuffer_bits |= frontbuffer_bits;
+-
+- /* invalidate means busy screen hence upclock */
+- if (frontbuffer_bits && dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR)
+- intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config,
+- drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode));
+-
+- mutex_unlock(&dev_priv->drrs.mutex);
+-}
+-
+-/**
+- * intel_edp_drrs_flush - Restart Idleness DRRS
+- * @dev_priv: i915 device
+- * @frontbuffer_bits: frontbuffer plane tracking bits
+- *
+- * This function gets called every time rendering on the given planes has
+- * completed or flip on a crtc is completed. So DRRS should be upclocked
+- * (LOW_RR -> HIGH_RR). And also Idleness detection should be started again,
+- * if no other planes are dirty.
+- *
+- * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
+- */
+-void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
+- unsigned int frontbuffer_bits)
+-{
+- struct intel_dp *intel_dp;
+- struct drm_crtc *crtc;
+- enum pipe pipe;
+-
+- if (dev_priv->drrs.type == DRRS_NOT_SUPPORTED)
+- return;
+-
+- cancel_delayed_work(&dev_priv->drrs.work);
+-
+- mutex_lock(&dev_priv->drrs.mutex);
+-
+- intel_dp = dev_priv->drrs.dp;
+- if (!intel_dp) {
+- mutex_unlock(&dev_priv->drrs.mutex);
+- return;
+- }
+-
+- crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
+- pipe = to_intel_crtc(crtc)->pipe;
+-
+- frontbuffer_bits &= INTEL_FRONTBUFFER_ALL_MASK(pipe);
+- dev_priv->drrs.busy_frontbuffer_bits &= ~frontbuffer_bits;
+-
+- /* flush means busy screen hence upclock */
+- if (frontbuffer_bits && dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR)
+- intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config,
+- drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode));
+-
+- /*
+- * flush also means no more activity hence schedule downclock, if all
+- * other fbs are quiescent too
+- */
+- if (!dev_priv->drrs.busy_frontbuffer_bits)
+- schedule_delayed_work(&dev_priv->drrs.work,
+- msecs_to_jiffies(1000));
+- mutex_unlock(&dev_priv->drrs.mutex);
+-}
+-
+-/**
+- * DOC: Display Refresh Rate Switching (DRRS)
+- *
+- * Display Refresh Rate Switching (DRRS) is a power conservation feature
+- * which enables swtching between low and high refresh rates,
+- * dynamically, based on the usage scenario. This feature is applicable
+- * for internal panels.
+- *
+- * Indication that the panel supports DRRS is given by the panel EDID, which
+- * would list multiple refresh rates for one resolution.
+- *
+- * DRRS is of 2 types - static and seamless.
+- * Static DRRS involves changing refresh rate (RR) by doing a full modeset
+- * (may appear as a blink on screen) and is used in dock-undock scenario.
+- * Seamless DRRS involves changing RR without any visual effect to the user
+- * and can be used during normal system usage. This is done by programming
+- * certain registers.
+- *
+- * Support for static/seamless DRRS may be indicated in the VBT based on
+- * inputs from the panel spec.
+- *
+- * DRRS saves power by switching to low RR based on usage scenarios.
+- *
+- * The implementation is based on frontbuffer tracking implementation. When
+- * there is a disturbance on the screen triggered by user activity or a periodic
+- * system activity, DRRS is disabled (RR is changed to high RR). When there is
+- * no movement on screen, after a timeout of 1 second, a switch to low RR is
+- * made.
+- *
+- * For integration with frontbuffer tracking code, intel_edp_drrs_invalidate()
+- * and intel_edp_drrs_flush() are called.
+- *
+- * DRRS can be further extended to support other internal panels and also
+- * the scenario of video playback wherein RR is set based on the rate
+- * requested by userspace.
+- */
+-
+-/**
+- * intel_dp_drrs_init - Init basic DRRS work and mutex.
+- * @connector: eDP connector
+- * @fixed_mode: preferred mode of panel
+- *
+- * This function is called only once at driver load to initialize basic
+- * DRRS stuff.
+- *
+- * Returns:
+- * Downclock mode if panel supports it, else return NULL.
+- * DRRS support is determined by the presence of downclock mode (apart
+- * from VBT setting).
+- */
+-static struct drm_display_mode *
+-intel_dp_drrs_init(struct intel_connector *connector,
+- struct drm_display_mode *fixed_mode)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct drm_display_mode *downclock_mode = NULL;
+-
+- INIT_DELAYED_WORK(&dev_priv->drrs.work, intel_edp_drrs_downclock_work);
+- mutex_init(&dev_priv->drrs.mutex);
+-
+- if (DISPLAY_VER(dev_priv) <= 6) {
+- drm_dbg_kms(&dev_priv->drm,
+- "DRRS supported for Gen7 and above\n");
+- return NULL;
+- }
+-
+- if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) {
+- drm_dbg_kms(&dev_priv->drm, "VBT doesn't support DRRS\n");
+- return NULL;
+- }
+-
+- downclock_mode = intel_panel_edid_downclock_mode(connector, fixed_mode);
+- if (!downclock_mode) {
+- drm_dbg_kms(&dev_priv->drm,
+- "Downclock mode is not found. DRRS not supported\n");
+- return NULL;
+- }
+-
+- dev_priv->drrs.type = dev_priv->vbt.drrs_type;
+-
+- dev_priv->drrs.refresh_rate_type = DRRS_HIGH_RR;
+- drm_dbg_kms(&dev_priv->drm,
+- "seamless DRRS supported for eDP panel.\n");
+- return downclock_mode;
+-}
+-
+ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
+ struct intel_connector *intel_connector)
+ {
+@@ -5296,6 +4801,9 @@ intel_dp_init_connector(struct intel_digital_port *dig_port,
+ return false;
+
+ intel_dp_set_source_rates(intel_dp);
++ intel_dp_set_default_sink_rates(intel_dp);
++ intel_dp_set_common_rates(intel_dp);
++ intel_dp_reset_max_link_params(intel_dp);
+
+ intel_dp->reset_link_params = true;
+ intel_dp->pps.pps_pipe = INVALID_PIPE;
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
+index 680631b5b4378..3dd6ebc2f6b14 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.h
++++ b/drivers/gpu/drm/i915/display/intel_dp.h
+@@ -70,17 +70,6 @@ int intel_dp_max_link_rate(struct intel_dp *intel_dp);
+ int intel_dp_max_lane_count(struct intel_dp *intel_dp);
+ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate);
+
+-void intel_edp_drrs_enable(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state);
+-void intel_edp_drrs_disable(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state);
+-void intel_edp_drrs_update(struct intel_dp *intel_dp,
+- const struct intel_crtc_state *crtc_state);
+-void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
+- unsigned int frontbuffer_bits);
+-void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
+- unsigned int frontbuffer_bits);
+-
+ void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
+ u8 *link_bw, u8 *rate_select);
+ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp);
+@@ -129,4 +118,6 @@ void intel_dp_pcon_dsc_configure(struct intel_dp *intel_dp,
+ const struct intel_crtc_state *crtc_state);
+ void intel_dp_phy_test(struct intel_encoder *encoder);
+
++void intel_dp_wait_source_oui(struct intel_dp *intel_dp);
++
+ #endif /* __INTEL_DP_H__ */
+diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
+index f483f479dd0b2..d507a20822db1 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
+@@ -119,6 +119,32 @@ static u32 skl_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
+ return index ? 0 : 1;
+ }
+
++static int intel_dp_aux_sync_len(void)
++{
++ int precharge = 16; /* 10-16 */
++ int preamble = 16;
++
++ return precharge + preamble;
++}
++
++static int intel_dp_aux_fw_sync_len(void)
++{
++ int precharge = 10; /* 10-16 */
++ int preamble = 8;
++
++ return precharge + preamble;
++}
++
++static int g4x_dp_aux_precharge_len(void)
++{
++ int precharge_min = 10;
++ int preamble = 16;
++
++ /* HW wants the length of the extra precharge in 2us units */
++ return (intel_dp_aux_sync_len() -
++ precharge_min - preamble) / 2;
++}
++
+ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
+ int send_bytes,
+ u32 aux_clock_divider)
+@@ -141,7 +167,7 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
+ timeout |
+ DP_AUX_CH_CTL_RECEIVE_ERROR |
+ (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) |
+- (3 << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) |
++ (g4x_dp_aux_precharge_len() << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) |
+ (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT);
+ }
+
+@@ -167,8 +193,8 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
+ DP_AUX_CH_CTL_TIME_OUT_MAX |
+ DP_AUX_CH_CTL_RECEIVE_ERROR |
+ (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) |
+- DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) |
+- DP_AUX_CH_CTL_SYNC_PULSE_SKL(32);
++ DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(intel_dp_aux_fw_sync_len()) |
++ DP_AUX_CH_CTL_SYNC_PULSE_SKL(intel_dp_aux_sync_len());
+
+ if (intel_phy_is_tc(i915, phy) &&
+ dig_port->tc_mode == TC_PORT_TBT_ALT)
+diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+index 6ac568617ef37..0a77f0e48aa11 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+@@ -34,9 +34,10 @@
+ * for some reason.
+ */
+
++#include "intel_backlight.h"
+ #include "intel_display_types.h"
++#include "intel_dp.h"
+ #include "intel_dp_aux_backlight.h"
+-#include "intel_panel.h"
+
+ /* TODO:
+ * Implement HDR, right now we just implement the bare minimum to bring us back into SDR mode so we
+@@ -95,6 +96,14 @@
+
+ #define INTEL_EDP_BRIGHTNESS_OPTIMIZATION_1 0x359
+
++enum intel_dp_aux_backlight_modparam {
++ INTEL_DP_AUX_BACKLIGHT_AUTO = -1,
++ INTEL_DP_AUX_BACKLIGHT_OFF = 0,
++ INTEL_DP_AUX_BACKLIGHT_ON = 1,
++ INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2,
++ INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3,
++};
++
+ /* Intel EDP backlight callbacks */
+ static bool
+ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
+@@ -106,6 +115,8 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
+ int ret;
+ u8 tcon_cap[4];
+
++ intel_dp_wait_source_oui(intel_dp);
++
+ ret = drm_dp_dpcd_read(aux, INTEL_EDP_HDR_TCON_CAP0, tcon_cap, sizeof(tcon_cap));
+ if (ret != sizeof(tcon_cap))
+ return false;
+@@ -122,6 +133,24 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
+ return false;
+ }
+
++ /*
++ * If we don't have HDR static metadata there is no way to
++ * runtime detect used range for nits based control. For now
++ * do not use Intel proprietary eDP backlight control if we
++ * don't have this data in panel EDID. In case we find panel
++ * which supports only nits based control, but doesn't provide
++ * HDR static metadata we need to start maintaining table of
++ * ranges for such panels.
++ */
++ if (i915->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL &&
++ !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type &
++ BIT(HDMI_STATIC_METADATA_TYPE1))) {
++ drm_info(&i915->drm,
++ "Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n",
++ INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL);
++ return false;
++ }
++
+ panel->backlight.edp.intel.sdr_uses_aux =
+ tcon_cap[2] & INTEL_EDP_SDR_TCON_BRIGHTNESS_AUX_CAP;
+
+@@ -204,6 +233,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state,
+ int ret;
+ u8 old_ctrl, ctrl;
+
++ intel_dp_wait_source_oui(intel_dp);
++
+ ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl);
+ if (ret != 1) {
+ drm_err(&i915->drm, "Failed to read current backlight control mode: %d\n", ret);
+@@ -368,14 +399,6 @@ static const struct intel_panel_bl_funcs intel_dp_vesa_bl_funcs = {
+ .get = intel_dp_aux_vesa_get_backlight,
+ };
+
+-enum intel_dp_aux_backlight_modparam {
+- INTEL_DP_AUX_BACKLIGHT_AUTO = -1,
+- INTEL_DP_AUX_BACKLIGHT_OFF = 0,
+- INTEL_DP_AUX_BACKLIGHT_ON = 1,
+- INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2,
+- INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3,
+-};
+-
+ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector)
+ {
+ struct drm_device *dev = connector->base.dev;
+diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+index 508a514c5e37d..d77d91c0a03af 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+@@ -475,6 +475,28 @@ intel_dp_prepare_link_train(struct intel_dp *intel_dp,
+ intel_dp_compute_rate(intel_dp, crtc_state->port_clock,
+ &link_bw, &rate_select);
+
++ /*
++ * WaEdpLinkRateDataReload
++ *
++ * Parade PS8461E MUX (used on varius TGL+ laptops) needs
++ * to snoop the link rates reported by the sink when we
++ * use LINK_RATE_SET in order to operate in jitter cleaning
++ * mode (as opposed to redriver mode). Unfortunately it
++ * loses track of the snooped link rates when powered down,
++ * so we need to make it re-snoop often. Without this high
++ * link rates are not stable.
++ */
++ if (!link_bw) {
++ struct intel_connector *connector = intel_dp->attached_connector;
++ __le16 sink_rates[DP_MAX_SUPPORTED_RATES];
++
++ drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Reloading eDP link rates\n",
++ connector->base.base.id, connector->base.name);
++
++ drm_dp_dpcd_read(&intel_dp->aux, DP_SUPPORTED_LINK_RATES,
++ sink_rates, sizeof(sink_rates));
++ }
++
+ if (link_bw)
+ drm_dbg_kms(&i915->drm,
+ "Using LINK_BW_SET value %02x\n", link_bw);
+diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
+index 8d13d7b26a25b..2a20487effccd 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
+@@ -817,6 +817,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
+ ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
+ DRM_MODE_CONNECTOR_DisplayPort);
+ if (ret) {
++ drm_dp_mst_put_port_malloc(port);
+ intel_connector_free(intel_connector);
+ return NULL;
+ }
+diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+index 5c91d125a3371..3dfa600fb86d6 100644
+--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
++++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+@@ -2434,7 +2434,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params,
+ }
+
+ /*
+- * Display WA #22010492432: ehl, tgl, adl-p
++ * Display WA #22010492432: ehl, tgl, adl-s, adl-p
+ * Program half of the nominal DCO divider fraction value.
+ */
+ static bool
+@@ -2442,7 +2442,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
+ {
+ return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) &&
+ IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
+- IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) &&
++ IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
+ i915->dpll.ref_clks.nssc == 38400;
+ }
+
+diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
+new file mode 100644
+index 0000000000000..22acd945a9e47
+--- /dev/null
++++ b/drivers/gpu/drm/i915/display/intel_dpt.c
+@@ -0,0 +1,229 @@
++// SPDX-License-Identifier: MIT
++/*
++ * Copyright © 2021 Intel Corporation
++ */
++
++#include "i915_drv.h"
++#include "intel_display_types.h"
++#include "intel_dpt.h"
++#include "intel_fb.h"
++#include "gt/gen8_ppgtt.h"
++
++struct i915_dpt {
++ struct i915_address_space vm;
++
++ struct drm_i915_gem_object *obj;
++ struct i915_vma *vma;
++ void __iomem *iomem;
++};
++
++#define i915_is_dpt(vm) ((vm)->is_dpt)
++
++static inline struct i915_dpt *
++i915_vm_to_dpt(struct i915_address_space *vm)
++{
++ BUILD_BUG_ON(offsetof(struct i915_dpt, vm));
++ GEM_BUG_ON(!i915_is_dpt(vm));
++ return container_of(vm, struct i915_dpt, vm);
++}
++
++#define dpt_total_entries(dpt) ((dpt)->vm.total >> PAGE_SHIFT)
++
++static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
++{
++ writeq(pte, addr);
++}
++
++static void dpt_insert_page(struct i915_address_space *vm,
++ dma_addr_t addr,
++ u64 offset,
++ enum i915_cache_level level,
++ u32 flags)
++{
++ struct i915_dpt *dpt = i915_vm_to_dpt(vm);
++ gen8_pte_t __iomem *base = dpt->iomem;
++
++ gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
++ vm->pte_encode(addr, level, flags));
++}
++
++static void dpt_insert_entries(struct i915_address_space *vm,
++ struct i915_vma *vma,
++ enum i915_cache_level level,
++ u32 flags)
++{
++ struct i915_dpt *dpt = i915_vm_to_dpt(vm);
++ gen8_pte_t __iomem *base = dpt->iomem;
++ const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
++ struct sgt_iter sgt_iter;
++ dma_addr_t addr;
++ int i;
++
++ /*
++ * Note that we ignore PTE_READ_ONLY here. The caller must be careful
++ * not to allow the user to override access to a read only page.
++ */
++
++ i = vma->node.start / I915_GTT_PAGE_SIZE;
++ for_each_sgt_daddr(addr, sgt_iter, vma->pages)
++ gen8_set_pte(&base[i++], pte_encode | addr);
++}
++
++static void dpt_clear_range(struct i915_address_space *vm,
++ u64 start, u64 length)
++{
++}
++
++static void dpt_bind_vma(struct i915_address_space *vm,
++ struct i915_vm_pt_stash *stash,
++ struct i915_vma *vma,
++ enum i915_cache_level cache_level,
++ u32 flags)
++{
++ struct drm_i915_gem_object *obj = vma->obj;
++ u32 pte_flags;
++
++ /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
++ pte_flags = 0;
++ if (vma->vm->has_read_only && i915_gem_object_is_readonly(obj))
++ pte_flags |= PTE_READ_ONLY;
++ if (i915_gem_object_is_lmem(obj))
++ pte_flags |= PTE_LM;
++
++ vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
++
++ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
++
++ /*
++ * Without aliasing PPGTT there's no difference between
++ * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
++ * upgrade to both bound if we bind either to avoid double-binding.
++ */
++ atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
++}
++
++static void dpt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
++{
++ vm->clear_range(vm, vma->node.start, vma->size);
++}
++
++static void dpt_cleanup(struct i915_address_space *vm)
++{
++ struct i915_dpt *dpt = i915_vm_to_dpt(vm);
++
++ i915_gem_object_put(dpt->obj);
++}
++
++struct i915_vma *intel_dpt_pin(struct i915_address_space *vm)
++{
++ struct drm_i915_private *i915 = vm->i915;
++ struct i915_dpt *dpt = i915_vm_to_dpt(vm);
++ intel_wakeref_t wakeref;
++ struct i915_vma *vma;
++ void __iomem *iomem;
++
++ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
++ atomic_inc(&i915->gpu_error.pending_fb_pin);
++
++ vma = i915_gem_object_ggtt_pin(dpt->obj, NULL, 0, 4096,
++ HAS_LMEM(i915) ? 0 : PIN_MAPPABLE);
++ if (IS_ERR(vma))
++ goto err;
++
++ iomem = i915_vma_pin_iomap(vma);
++ i915_vma_unpin(vma);
++ if (IS_ERR(iomem)) {
++ vma = iomem;
++ goto err;
++ }
++
++ dpt->vma = vma;
++ dpt->iomem = iomem;
++
++ i915_vma_get(vma);
++
++err:
++ atomic_dec(&i915->gpu_error.pending_fb_pin);
++ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
++
++ return vma;
++}
++
++void intel_dpt_unpin(struct i915_address_space *vm)
++{
++ struct i915_dpt *dpt = i915_vm_to_dpt(vm);
++
++ i915_vma_unpin_iomap(dpt->vma);
++ i915_vma_put(dpt->vma);
++}
++
++struct i915_address_space *
++intel_dpt_create(struct intel_framebuffer *fb)
++{
++ struct drm_gem_object *obj = &intel_fb_obj(&fb->base)->base;
++ struct drm_i915_private *i915 = to_i915(obj->dev);
++ struct drm_i915_gem_object *dpt_obj;
++ struct i915_address_space *vm;
++ struct i915_dpt *dpt;
++ size_t size;
++ int ret;
++
++ if (intel_fb_needs_pot_stride_remap(fb))
++ size = intel_remapped_info_size(&fb->remapped_view.gtt.remapped);
++ else
++ size = DIV_ROUND_UP_ULL(obj->size, I915_GTT_PAGE_SIZE);
++
++ size = round_up(size * sizeof(gen8_pte_t), I915_GTT_PAGE_SIZE);
++
++ if (HAS_LMEM(i915))
++ dpt_obj = i915_gem_object_create_lmem(i915, size, 0);
++ else
++ dpt_obj = i915_gem_object_create_stolen(i915, size);
++ if (IS_ERR(dpt_obj))
++ return ERR_CAST(dpt_obj);
++
++ ret = i915_gem_object_set_cache_level(dpt_obj, I915_CACHE_NONE);
++ if (ret) {
++ i915_gem_object_put(dpt_obj);
++ return ERR_PTR(ret);
++ }
++
++ dpt = kzalloc(sizeof(*dpt), GFP_KERNEL);
++ if (!dpt) {
++ i915_gem_object_put(dpt_obj);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ vm = &dpt->vm;
++
++ vm->gt = &i915->gt;
++ vm->i915 = i915;
++ vm->dma = i915->drm.dev;
++ vm->total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
++ vm->is_dpt = true;
++
++ i915_address_space_init(vm, VM_CLASS_DPT);
++
++ vm->insert_page = dpt_insert_page;
++ vm->clear_range = dpt_clear_range;
++ vm->insert_entries = dpt_insert_entries;
++ vm->cleanup = dpt_cleanup;
++
++ vm->vma_ops.bind_vma = dpt_bind_vma;
++ vm->vma_ops.unbind_vma = dpt_unbind_vma;
++ vm->vma_ops.set_pages = ggtt_set_pages;
++ vm->vma_ops.clear_pages = clear_pages;
++
++ vm->pte_encode = gen8_ggtt_pte_encode;
++
++ dpt->obj = dpt_obj;
++
++ return &dpt->vm;
++}
++
++void intel_dpt_destroy(struct i915_address_space *vm)
++{
++ struct i915_dpt *dpt = i915_vm_to_dpt(vm);
++
++ i915_vm_close(&dpt->vm);
++}
+diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h
+new file mode 100644
+index 0000000000000..45142b8f849f6
+--- /dev/null
++++ b/drivers/gpu/drm/i915/display/intel_dpt.h
+@@ -0,0 +1,19 @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Copyright © 2021 Intel Corporation
++ */
++
++#ifndef __INTEL_DPT_H__
++#define __INTEL_DPT_H__
++
++struct i915_address_space;
++struct i915_vma;
++struct intel_framebuffer;
++
++void intel_dpt_destroy(struct i915_address_space *vm);
++struct i915_vma *intel_dpt_pin(struct i915_address_space *vm);
++void intel_dpt_unpin(struct i915_address_space *vm);
++struct i915_address_space *
++intel_dpt_create(struct intel_framebuffer *fb);
++
++#endif /* __INTEL_DPT_H__ */
+diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c
+new file mode 100644
+index 0000000000000..3c7d6bf579484
+--- /dev/null
++++ b/drivers/gpu/drm/i915/display/intel_drrs.c
+@@ -0,0 +1,485 @@
++// SPDX-License-Identifier: MIT
++/*
++ * Copyright © 2021 Intel Corporation
++ */
++
++#include "i915_drv.h"
++#include "intel_atomic.h"
++#include "intel_de.h"
++#include "intel_display_types.h"
++#include "intel_drrs.h"
++#include "intel_panel.h"
++
++/**
++ * DOC: Display Refresh Rate Switching (DRRS)
++ *
++ * Display Refresh Rate Switching (DRRS) is a power conservation feature
++ * which enables swtching between low and high refresh rates,
++ * dynamically, based on the usage scenario. This feature is applicable
++ * for internal panels.
++ *
++ * Indication that the panel supports DRRS is given by the panel EDID, which
++ * would list multiple refresh rates for one resolution.
++ *
++ * DRRS is of 2 types - static and seamless.
++ * Static DRRS involves changing refresh rate (RR) by doing a full modeset
++ * (may appear as a blink on screen) and is used in dock-undock scenario.
++ * Seamless DRRS involves changing RR without any visual effect to the user
++ * and can be used during normal system usage. This is done by programming
++ * certain registers.
++ *
++ * Support for static/seamless DRRS may be indicated in the VBT based on
++ * inputs from the panel spec.
++ *
++ * DRRS saves power by switching to low RR based on usage scenarios.
++ *
++ * The implementation is based on frontbuffer tracking implementation. When
++ * there is a disturbance on the screen triggered by user activity or a periodic
++ * system activity, DRRS is disabled (RR is changed to high RR). When there is
++ * no movement on screen, after a timeout of 1 second, a switch to low RR is
++ * made.
++ *
++ * For integration with frontbuffer tracking code, intel_edp_drrs_invalidate()
++ * and intel_edp_drrs_flush() are called.
++ *
++ * DRRS can be further extended to support other internal panels and also
++ * the scenario of video playback wherein RR is set based on the rate
++ * requested by userspace.
++ */
++
++void
++intel_dp_drrs_compute_config(struct intel_dp *intel_dp,
++ struct intel_crtc_state *pipe_config,
++ int output_bpp, bool constant_n)
++{
++ struct intel_connector *intel_connector = intel_dp->attached_connector;
++ struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++ int pixel_clock;
++
++ if (pipe_config->vrr.enable)
++ return;
++
++ /*
++ * DRRS and PSR can't be enable together, so giving preference to PSR
++ * as it allows more power-savings by complete shutting down display,
++ * so to guarantee this, intel_dp_drrs_compute_config() must be called
++ * after intel_psr_compute_config().
++ */
++ if (pipe_config->has_psr)
++ return;
++
++ if (!intel_connector->panel.downclock_mode ||
++ dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT)
++ return;
++
++ pipe_config->has_drrs = true;
++
++ pixel_clock = intel_connector->panel.downclock_mode->clock;
++ if (pipe_config->splitter.enable)
++ pixel_clock /= pipe_config->splitter.link_count;
++
++ intel_link_compute_m_n(output_bpp, pipe_config->lane_count, pixel_clock,
++ pipe_config->port_clock, &pipe_config->dp_m2_n2,
++ constant_n, pipe_config->fec_enable);
++
++ /* FIXME: abstract this better */
++ if (pipe_config->splitter.enable)
++ pipe_config->dp_m2_n2.gmch_m *= pipe_config->splitter.link_count;
++}
++
++/**
++ * intel_dp_set_drrs_state - program registers for RR switch to take effect
++ * @dev_priv: i915 device
++ * @crtc_state: a pointer to the active intel_crtc_state
++ * @refresh_rate: RR to be programmed
++ *
++ * This function gets called when refresh rate (RR) has to be changed from
++ * one frequency to another. Switches can be between high and low RR
++ * supported by the panel or to any other RR based on media playback (in
++ * this case, RR value needs to be passed from user space).
++ *
++ * The caller of this function needs to take a lock on dev_priv->drrs.
++ */
++static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv,
++ const struct intel_crtc_state *crtc_state,
++ int refresh_rate)
++{
++ struct intel_dp *intel_dp = dev_priv->drrs.dp;
++ struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
++ enum drrs_refresh_rate_type index = DRRS_HIGH_RR;
++
++ if (refresh_rate <= 0) {
++ drm_dbg_kms(&dev_priv->drm,
++ "Refresh rate should be positive non-zero.\n");
++ return;
++ }
++
++ if (intel_dp == NULL) {
++ drm_dbg_kms(&dev_priv->drm, "DRRS not supported.\n");
++ return;
++ }
++
++ if (!crtc) {
++ drm_dbg_kms(&dev_priv->drm,
++ "DRRS: intel_crtc not initialized\n");
++ return;
++ }
++
++ if (dev_priv->drrs.type < SEAMLESS_DRRS_SUPPORT) {
++ drm_dbg_kms(&dev_priv->drm, "Only Seamless DRRS supported.\n");
++ return;
++ }
++
++ if (drm_mode_vrefresh(intel_dp->attached_connector->panel.downclock_mode) ==
++ refresh_rate)
++ index = DRRS_LOW_RR;
++
++ if (index == dev_priv->drrs.refresh_rate_type) {
++ drm_dbg_kms(&dev_priv->drm,
++ "DRRS requested for previously set RR...ignoring\n");
++ return;
++ }
++
++ if (!crtc_state->hw.active) {
++ drm_dbg_kms(&dev_priv->drm,
++ "eDP encoder disabled. CRTC not Active\n");
++ return;
++ }
++
++ if (DISPLAY_VER(dev_priv) >= 8 && !IS_CHERRYVIEW(dev_priv)) {
++ switch (index) {
++ case DRRS_HIGH_RR:
++ intel_dp_set_m_n(crtc_state, M1_N1);
++ break;
++ case DRRS_LOW_RR:
++ intel_dp_set_m_n(crtc_state, M2_N2);
++ break;
++ case DRRS_MAX_RR:
++ default:
++ drm_err(&dev_priv->drm,
++ "Unsupported refreshrate type\n");
++ }
++ } else if (DISPLAY_VER(dev_priv) > 6) {
++ i915_reg_t reg = PIPECONF(crtc_state->cpu_transcoder);
++ u32 val;
++
++ val = intel_de_read(dev_priv, reg);
++ if (index > DRRS_HIGH_RR) {
++ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
++ val |= PIPECONF_EDP_RR_MODE_SWITCH_VLV;
++ else
++ val |= PIPECONF_EDP_RR_MODE_SWITCH;
++ } else {
++ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
++ val &= ~PIPECONF_EDP_RR_MODE_SWITCH_VLV;
++ else
++ val &= ~PIPECONF_EDP_RR_MODE_SWITCH;
++ }
++ intel_de_write(dev_priv, reg, val);
++ }
++
++ dev_priv->drrs.refresh_rate_type = index;
++
++ drm_dbg_kms(&dev_priv->drm, "eDP Refresh Rate set to : %dHz\n",
++ refresh_rate);
++}
++
++static void
++intel_edp_drrs_enable_locked(struct intel_dp *intel_dp)
++{
++ struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++
++ dev_priv->drrs.busy_frontbuffer_bits = 0;
++ dev_priv->drrs.dp = intel_dp;
++}
++
++/**
++ * intel_edp_drrs_enable - init drrs struct if supported
++ * @intel_dp: DP struct
++ * @crtc_state: A pointer to the active crtc state.
++ *
++ * Initializes frontbuffer_bits and drrs.dp
++ */
++void intel_edp_drrs_enable(struct intel_dp *intel_dp,
++ const struct intel_crtc_state *crtc_state)
++{
++ struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++
++ if (!crtc_state->has_drrs)
++ return;
++
++ drm_dbg_kms(&dev_priv->drm, "Enabling DRRS\n");
++
++ mutex_lock(&dev_priv->drrs.mutex);
++
++ if (dev_priv->drrs.dp) {
++ drm_warn(&dev_priv->drm, "DRRS already enabled\n");
++ goto unlock;
++ }
++
++ intel_edp_drrs_enable_locked(intel_dp);
++
++unlock:
++ mutex_unlock(&dev_priv->drrs.mutex);
++}
++
++static void
++intel_edp_drrs_disable_locked(struct intel_dp *intel_dp,
++ const struct intel_crtc_state *crtc_state)
++{
++ struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++
++ if (dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) {
++ int refresh;
++
++ refresh = drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode);
++ intel_dp_set_drrs_state(dev_priv, crtc_state, refresh);
++ }
++
++ dev_priv->drrs.dp = NULL;
++}
++
++/**
++ * intel_edp_drrs_disable - Disable DRRS
++ * @intel_dp: DP struct
++ * @old_crtc_state: Pointer to old crtc_state.
++ *
++ */
++void intel_edp_drrs_disable(struct intel_dp *intel_dp,
++ const struct intel_crtc_state *old_crtc_state)
++{
++ struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++
++ if (!old_crtc_state->has_drrs)
++ return;
++
++ mutex_lock(&dev_priv->drrs.mutex);
++ if (!dev_priv->drrs.dp) {
++ mutex_unlock(&dev_priv->drrs.mutex);
++ return;
++ }
++
++ intel_edp_drrs_disable_locked(intel_dp, old_crtc_state);
++ mutex_unlock(&dev_priv->drrs.mutex);
++
++ cancel_delayed_work_sync(&dev_priv->drrs.work);
++}
++
++/**
++ * intel_edp_drrs_update - Update DRRS state
++ * @intel_dp: Intel DP
++ * @crtc_state: new CRTC state
++ *
++ * This function will update DRRS states, disabling or enabling DRRS when
++ * executing fastsets. For full modeset, intel_edp_drrs_disable() and
++ * intel_edp_drrs_enable() should be called instead.
++ */
++void
++intel_edp_drrs_update(struct intel_dp *intel_dp,
++ const struct intel_crtc_state *crtc_state)
++{
++ struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++
++ if (dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT)
++ return;
++
++ mutex_lock(&dev_priv->drrs.mutex);
++
++ /* New state matches current one? */
++ if (crtc_state->has_drrs == !!dev_priv->drrs.dp)
++ goto unlock;
++
++ if (crtc_state->has_drrs)
++ intel_edp_drrs_enable_locked(intel_dp);
++ else
++ intel_edp_drrs_disable_locked(intel_dp, crtc_state);
++
++unlock:
++ mutex_unlock(&dev_priv->drrs.mutex);
++}
++
++static void intel_edp_drrs_downclock_work(struct work_struct *work)
++{
++ struct drm_i915_private *dev_priv =
++ container_of(work, typeof(*dev_priv), drrs.work.work);
++ struct intel_dp *intel_dp;
++
++ mutex_lock(&dev_priv->drrs.mutex);
++
++ intel_dp = dev_priv->drrs.dp;
++
++ if (!intel_dp)
++ goto unlock;
++
++ /*
++ * The delayed work can race with an invalidate hence we need to
++ * recheck.
++ */
++
++ if (dev_priv->drrs.busy_frontbuffer_bits)
++ goto unlock;
++
++ if (dev_priv->drrs.refresh_rate_type != DRRS_LOW_RR) {
++ struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
++
++ intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config,
++ drm_mode_vrefresh(intel_dp->attached_connector->panel.downclock_mode));
++ }
++
++unlock:
++ mutex_unlock(&dev_priv->drrs.mutex);
++}
++
++/**
++ * intel_edp_drrs_invalidate - Disable Idleness DRRS
++ * @dev_priv: i915 device
++ * @frontbuffer_bits: frontbuffer plane tracking bits
++ *
++ * This function gets called everytime rendering on the given planes start.
++ * Hence DRRS needs to be Upclocked, i.e. (LOW_RR -> HIGH_RR).
++ *
++ * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
++ */
++void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
++ unsigned int frontbuffer_bits)
++{
++ struct intel_dp *intel_dp;
++ struct drm_crtc *crtc;
++ enum pipe pipe;
++
++ if (dev_priv->drrs.type == DRRS_NOT_SUPPORTED)
++ return;
++
++ cancel_delayed_work(&dev_priv->drrs.work);
++
++ mutex_lock(&dev_priv->drrs.mutex);
++
++ intel_dp = dev_priv->drrs.dp;
++ if (!intel_dp) {
++ mutex_unlock(&dev_priv->drrs.mutex);
++ return;
++ }
++
++ crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
++ pipe = to_intel_crtc(crtc)->pipe;
++
++ frontbuffer_bits &= INTEL_FRONTBUFFER_ALL_MASK(pipe);
++ dev_priv->drrs.busy_frontbuffer_bits |= frontbuffer_bits;
++
++ /* invalidate means busy screen hence upclock */
++ if (frontbuffer_bits && dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR)
++ intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config,
++ drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode));
++
++ mutex_unlock(&dev_priv->drrs.mutex);
++}
++
++/**
++ * intel_edp_drrs_flush - Restart Idleness DRRS
++ * @dev_priv: i915 device
++ * @frontbuffer_bits: frontbuffer plane tracking bits
++ *
++ * This function gets called every time rendering on the given planes has
++ * completed or flip on a crtc is completed. So DRRS should be upclocked
++ * (LOW_RR -> HIGH_RR). And also Idleness detection should be started again,
++ * if no other planes are dirty.
++ *
++ * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits.
++ */
++void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
++ unsigned int frontbuffer_bits)
++{
++ struct intel_dp *intel_dp;
++ struct drm_crtc *crtc;
++ enum pipe pipe;
++
++ if (dev_priv->drrs.type == DRRS_NOT_SUPPORTED)
++ return;
++
++ cancel_delayed_work(&dev_priv->drrs.work);
++
++ mutex_lock(&dev_priv->drrs.mutex);
++
++ intel_dp = dev_priv->drrs.dp;
++ if (!intel_dp) {
++ mutex_unlock(&dev_priv->drrs.mutex);
++ return;
++ }
++
++ crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
++ pipe = to_intel_crtc(crtc)->pipe;
++
++ frontbuffer_bits &= INTEL_FRONTBUFFER_ALL_MASK(pipe);
++ dev_priv->drrs.busy_frontbuffer_bits &= ~frontbuffer_bits;
++
++ /* flush means busy screen hence upclock */
++ if (frontbuffer_bits && dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR)
++ intel_dp_set_drrs_state(dev_priv, to_intel_crtc(crtc)->config,
++ drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode));
++
++ /*
++ * flush also means no more activity hence schedule downclock, if all
++ * other fbs are quiescent too
++ */
++ if (!dev_priv->drrs.busy_frontbuffer_bits)
++ schedule_delayed_work(&dev_priv->drrs.work,
++ msecs_to_jiffies(1000));
++ mutex_unlock(&dev_priv->drrs.mutex);
++}
++
++/**
++ * intel_dp_drrs_init - Init basic DRRS work and mutex.
++ * @connector: eDP connector
++ * @fixed_mode: preferred mode of panel
++ *
++ * This function is called only once at driver load to initialize basic
++ * DRRS stuff.
++ *
++ * Returns:
++ * Downclock mode if panel supports it, else return NULL.
++ * DRRS support is determined by the presence of downclock mode (apart
++ * from VBT setting).
++ */
++struct drm_display_mode *
++intel_dp_drrs_init(struct intel_connector *connector,
++ struct drm_display_mode *fixed_mode)
++{
++ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++ struct intel_encoder *encoder = connector->encoder;
++ struct drm_display_mode *downclock_mode = NULL;
++
++ INIT_DELAYED_WORK(&dev_priv->drrs.work, intel_edp_drrs_downclock_work);
++ mutex_init(&dev_priv->drrs.mutex);
++
++ if (DISPLAY_VER(dev_priv) <= 6) {
++ drm_dbg_kms(&dev_priv->drm,
++ "DRRS supported for Gen7 and above\n");
++ return NULL;
++ }
++
++ if ((DISPLAY_VER(dev_priv) < 8 && !HAS_GMCH(dev_priv)) &&
++ encoder->port != PORT_A) {
++ drm_dbg_kms(&dev_priv->drm,
++ "DRRS only supported on eDP port A\n");
++ return NULL;
++ }
++
++ if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) {
++ drm_dbg_kms(&dev_priv->drm, "VBT doesn't support DRRS\n");
++ return NULL;
++ }
++
++ downclock_mode = intel_panel_edid_downclock_mode(connector, fixed_mode);
++ if (!downclock_mode) {
++ drm_dbg_kms(&dev_priv->drm,
++ "Downclock mode is not found. DRRS not supported\n");
++ return NULL;
++ }
++
++ dev_priv->drrs.type = dev_priv->vbt.drrs_type;
++
++ dev_priv->drrs.refresh_rate_type = DRRS_HIGH_RR;
++ drm_dbg_kms(&dev_priv->drm,
++ "seamless DRRS supported for eDP panel.\n");
++ return downclock_mode;
++}
+diff --git a/drivers/gpu/drm/i915/display/intel_drrs.h b/drivers/gpu/drm/i915/display/intel_drrs.h
+new file mode 100644
+index 0000000000000..ffa175b4cf4f4
+--- /dev/null
++++ b/drivers/gpu/drm/i915/display/intel_drrs.h
+@@ -0,0 +1,32 @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Copyright © 2021 Intel Corporation
++ */
++
++#ifndef __INTEL_DRRS_H__
++#define __INTEL_DRRS_H__
++
++#include <linux/types.h>
++
++struct drm_i915_private;
++struct intel_crtc_state;
++struct intel_connector;
++struct intel_dp;
++
++void intel_edp_drrs_enable(struct intel_dp *intel_dp,
++ const struct intel_crtc_state *crtc_state);
++void intel_edp_drrs_disable(struct intel_dp *intel_dp,
++ const struct intel_crtc_state *crtc_state);
++void intel_edp_drrs_update(struct intel_dp *intel_dp,
++ const struct intel_crtc_state *crtc_state);
++void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv,
++ unsigned int frontbuffer_bits);
++void intel_edp_drrs_flush(struct drm_i915_private *dev_priv,
++ unsigned int frontbuffer_bits);
++void intel_dp_drrs_compute_config(struct intel_dp *intel_dp,
++ struct intel_crtc_state *pipe_config,
++ int output_bpp, bool constant_n);
++struct drm_display_mode *intel_dp_drrs_init(struct intel_connector *connector,
++ struct drm_display_mode *fixed_mode);
++
++#endif /* __INTEL_DRRS_H__ */
+diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+index c2a2cd1f84dc5..55dd02a01f1ac 100644
+--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
++++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+@@ -121,9 +121,25 @@ struct i2c_adapter_lookup {
+ #define ICL_GPIO_DDPA_CTRLCLK_2 8
+ #define ICL_GPIO_DDPA_CTRLDATA_2 9
+
+-static enum port intel_dsi_seq_port_to_port(u8 port)
++static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
++ u8 seq_port)
+ {
+- return port ? PORT_C : PORT_A;
++ /*
++ * If single link DSI is being used on any port, the VBT sequence block
++ * send packet apparently always has 0 for the port. Just use the port
++ * we have configured, and ignore the sequence block port.
++ */
++ if (hweight8(intel_dsi->ports) == 1)
++ return ffs(intel_dsi->ports) - 1;
++
++ if (seq_port) {
++ if (intel_dsi->ports & BIT(PORT_B))
++ return PORT_B;
++ else if (intel_dsi->ports & BIT(PORT_C))
++ return PORT_C;
++ }
++
++ return PORT_A;
+ }
+
+ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
+@@ -145,15 +161,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
+
+ seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
+
+- /* For DSI single link on Port A & C, the seq_port value which is
+- * parsed from Sequence Block#53 of VBT has been set to 0
+- * Now, read/write of packets for the DSI single link on Port A and
+- * Port C will based on the DVO port from VBT block 2.
+- */
+- if (intel_dsi->ports == (1 << PORT_C))
+- port = PORT_C;
+- else
+- port = intel_dsi_seq_port_to_port(seq_port);
++ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
++
++ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
++ goto out;
+
+ dsi_device = intel_dsi->dsi_hosts[port]->device;
+ if (!dsi_device) {
+diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
+index c60a81a81c09c..c6413c5409420 100644
+--- a/drivers/gpu/drm/i915/display/intel_fb.c
++++ b/drivers/gpu/drm/i915/display/intel_fb.c
+@@ -172,8 +172,9 @@ static void intel_fb_plane_dims(const struct intel_framebuffer *fb, int color_pl
+
+ intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, &fb->base, main_plane);
+ intel_fb_plane_get_subsampling(&hsub, &vsub, &fb->base, color_plane);
+- *w = fb->base.width / main_hsub / hsub;
+- *h = fb->base.height / main_vsub / vsub;
++
++ *w = DIV_ROUND_UP(fb->base.width, main_hsub * hsub);
++ *h = DIV_ROUND_UP(fb->base.height, main_vsub * vsub);
+ }
+
+ static u32 intel_adjust_tile_offset(int *x, int *y,
+diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+index 8e75debcce1a9..e4834d84ce5e3 100644
+--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
++++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+@@ -62,6 +62,7 @@
+ #include "intel_display_types.h"
+ #include "intel_fbc.h"
+ #include "intel_frontbuffer.h"
++#include "intel_drrs.h"
+ #include "intel_psr.h"
+
+ /**
+diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
+index b04685bb6439c..926ddc6599f5e 100644
+--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
++++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
+@@ -53,21 +53,20 @@
+ #include "intel_panel.h"
+ #include "intel_snps_phy.h"
+
+-static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi)
++static struct drm_i915_private *intel_hdmi_to_i915(struct intel_hdmi *intel_hdmi)
+ {
+- return hdmi_to_dig_port(intel_hdmi)->base.base.dev;
++ return to_i915(hdmi_to_dig_port(intel_hdmi)->base.base.dev);
+ }
+
+ static void
+ assert_hdmi_port_disabled(struct intel_hdmi *intel_hdmi)
+ {
+- struct drm_device *dev = intel_hdmi_to_dev(intel_hdmi);
+- struct drm_i915_private *dev_priv = to_i915(dev);
++ struct drm_i915_private *dev_priv = intel_hdmi_to_i915(intel_hdmi);
+ u32 enabled_bits;
+
+ enabled_bits = HAS_DDI(dev_priv) ? DDI_BUF_CTL_ENABLE : SDVO_ENABLE;
+
+- drm_WARN(dev,
++ drm_WARN(&dev_priv->drm,
+ intel_de_read(dev_priv, intel_hdmi->hdmi_reg) & enabled_bits,
+ "HDMI port enabled, expecting disabled\n");
+ }
+@@ -1246,13 +1245,14 @@ static void hsw_set_infoframes(struct intel_encoder *encoder,
+
+ void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable)
+ {
+- struct drm_i915_private *dev_priv = to_i915(intel_hdmi_to_dev(hdmi));
+- struct i2c_adapter *adapter =
+- intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
++ struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi);
++ struct i2c_adapter *adapter;
+
+ if (hdmi->dp_dual_mode.type < DRM_DP_DUAL_MODE_TYPE2_DVI)
+ return;
+
++ adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
++
+ drm_dbg_kms(&dev_priv->drm, "%s DP dual mode adaptor TMDS output\n",
+ enable ? "Enabling" : "Disabling");
+
+@@ -1830,7 +1830,8 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi,
+ int clock, bool respect_downstream_limits,
+ bool has_hdmi_sink)
+ {
+- struct drm_i915_private *dev_priv = to_i915(intel_hdmi_to_dev(hdmi));
++ struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi);
++ enum phy phy = intel_port_to_phy(dev_priv, hdmi_to_dig_port(hdmi)->base.port);
+
+ if (clock < 25000)
+ return MODE_CLOCK_LOW;
+@@ -1851,6 +1852,14 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi,
+ if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000)
+ return MODE_CLOCK_RANGE;
+
++ /* ICL+ combo PHY PLL can't generate 500-533.2 MHz */
++ if (intel_phy_is_combo(dev_priv, phy) && clock > 500000 && clock < 533200)
++ return MODE_CLOCK_RANGE;
++
++ /* ICL+ TC PHY PLL can't generate 500-532.8 MHz */
++ if (intel_phy_is_tc(dev_priv, phy) && clock > 500000 && clock < 532800)
++ return MODE_CLOCK_RANGE;
++
+ /*
+ * SNPS PHYs' MPLLB table-based programming can only handle a fixed
+ * set of link rates.
+@@ -1892,7 +1901,7 @@ static bool intel_hdmi_bpc_possible(struct drm_connector *connector,
+ if (ycbcr420_output)
+ return hdmi->y420_dc_modes & DRM_EDID_YCBCR420_DC_36;
+ else
+- return info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_36;
++ return info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_36;
+ case 10:
+ if (DISPLAY_VER(i915) < 11)
+ return false;
+@@ -1903,7 +1912,7 @@ static bool intel_hdmi_bpc_possible(struct drm_connector *connector,
+ if (ycbcr420_output)
+ return hdmi->y420_dc_modes & DRM_EDID_YCBCR420_DC_30;
+ else
+- return info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30;
++ return info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30;
+ case 8:
+ return true;
+ default:
+@@ -1946,8 +1955,7 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+ {
+ struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
+- struct drm_device *dev = intel_hdmi_to_dev(hdmi);
+- struct drm_i915_private *dev_priv = to_i915(dev);
++ struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi);
+ enum drm_mode_status status;
+ int clock = mode->clock;
+ int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
+@@ -2260,6 +2268,17 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
+ return 0;
+ }
+
++void intel_hdmi_encoder_shutdown(struct intel_encoder *encoder)
++{
++ struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
++
++ /*
++ * Give a hand to buggy BIOSen which forget to turn
++ * the TMDS output buffers back on after a reboot.
++ */
++ intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
++}
++
+ static void
+ intel_hdmi_unset_edid(struct drm_connector *connector)
+ {
+diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h
+index b43a180d007e0..2bf440eb400ab 100644
+--- a/drivers/gpu/drm/i915/display/intel_hdmi.h
++++ b/drivers/gpu/drm/i915/display/intel_hdmi.h
+@@ -28,6 +28,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port,
+ int intel_hdmi_compute_config(struct intel_encoder *encoder,
+ struct intel_crtc_state *pipe_config,
+ struct drm_connector_state *conn_state);
++void intel_hdmi_encoder_shutdown(struct intel_encoder *encoder);
+ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
+ struct drm_connector *connector,
+ bool high_tmds_clock_ratio,
+diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
+index e0381b0fce914..8f5741ebd58dd 100644
+--- a/drivers/gpu/drm/i915/display/intel_lvds.c
++++ b/drivers/gpu/drm/i915/display/intel_lvds.c
+@@ -40,6 +40,7 @@
+
+ #include "i915_drv.h"
+ #include "intel_atomic.h"
++#include "intel_backlight.h"
+ #include "intel_connector.h"
+ #include "intel_de.h"
+ #include "intel_display_types.h"
+diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c
+index 3855fba709807..aad5c1cd3898e 100644
+--- a/drivers/gpu/drm/i915/display/intel_opregion.c
++++ b/drivers/gpu/drm/i915/display/intel_opregion.c
+@@ -30,10 +30,9 @@
+ #include <linux/firmware.h>
+ #include <acpi/video.h>
+
+-#include "display/intel_panel.h"
+-
+ #include "i915_drv.h"
+ #include "intel_acpi.h"
++#include "intel_backlight.h"
+ #include "intel_display_types.h"
+ #include "intel_opregion.h"
+
+@@ -361,6 +360,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
+ port++;
+ }
+
++ /*
++ * The port numbering and mapping here is bizarre. The now-obsolete
++ * swsci spec supports ports numbered [0..4]. Port E is handled as a
++ * special case, but port F and beyond are not. The functionality is
++ * supposed to be obsolete for new platforms. Just bail out if the port
++ * number is out of bounds after mapping.
++ */
++ if (port > 4) {
++ drm_dbg_kms(&dev_priv->drm,
++ "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n",
++ intel_encoder->base.base.id, intel_encoder->base.name,
++ port_name(intel_encoder->port), port);
++ return -EINVAL;
++ }
++
+ if (!enable)
+ parm |= 4 << 8;
+
+diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
+index 7e3f5c6ca4846..dfa5f18171e3b 100644
+--- a/drivers/gpu/drm/i915/display/intel_overlay.c
++++ b/drivers/gpu/drm/i915/display/intel_overlay.c
+@@ -959,6 +959,9 @@ static int check_overlay_dst(struct intel_overlay *overlay,
+ const struct intel_crtc_state *pipe_config =
+ overlay->crtc->config;
+
++ if (rec->dst_height == 0 || rec->dst_width == 0)
++ return -EINVAL;
++
+ if (rec->dst_x < pipe_config->pipe_src_w &&
+ rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w &&
+ rec->dst_y < pipe_config->pipe_src_h &&
+diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
+index 7d7a60b4d2de7..ad54767440c15 100644
+--- a/drivers/gpu/drm/i915/display/intel_panel.c
++++ b/drivers/gpu/drm/i915/display/intel_panel.c
+@@ -28,17 +28,13 @@
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ */
+
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+ #include <linux/kernel.h>
+-#include <linux/moduleparam.h>
+ #include <linux/pwm.h>
+
++#include "intel_backlight.h"
+ #include "intel_connector.h"
+ #include "intel_de.h"
+ #include "intel_display_types.h"
+-#include "intel_dp_aux_backlight.h"
+-#include "intel_dsi_dcs_backlight.h"
+ #include "intel_panel.h"
+
+ void
+@@ -456,1767 +452,6 @@ out:
+ return 0;
+ }
+
+-/**
+- * scale - scale values from one range to another
+- * @source_val: value in range [@source_min..@source_max]
+- * @source_min: minimum legal value for @source_val
+- * @source_max: maximum legal value for @source_val
+- * @target_min: corresponding target value for @source_min
+- * @target_max: corresponding target value for @source_max
+- *
+- * Return @source_val in range [@source_min..@source_max] scaled to range
+- * [@target_min..@target_max].
+- */
+-static u32 scale(u32 source_val,
+- u32 source_min, u32 source_max,
+- u32 target_min, u32 target_max)
+-{
+- u64 target_val;
+-
+- WARN_ON(source_min > source_max);
+- WARN_ON(target_min > target_max);
+-
+- /* defensive */
+- source_val = clamp(source_val, source_min, source_max);
+-
+- /* avoid overflows */
+- target_val = mul_u32_u32(source_val - source_min,
+- target_max - target_min);
+- target_val = DIV_ROUND_CLOSEST_ULL(target_val, source_max - source_min);
+- target_val += target_min;
+-
+- return target_val;
+-}
+-
+-/* Scale user_level in range [0..user_max] to [0..hw_max], clamping the result
+- * to [hw_min..hw_max]. */
+-static u32 clamp_user_to_hw(struct intel_connector *connector,
+- u32 user_level, u32 user_max)
+-{
+- struct intel_panel *panel = &connector->panel;
+- u32 hw_level;
+-
+- hw_level = scale(user_level, 0, user_max, 0, panel->backlight.max);
+- hw_level = clamp(hw_level, panel->backlight.min, panel->backlight.max);
+-
+- return hw_level;
+-}
+-
+-/* Scale hw_level in range [hw_min..hw_max] to [0..user_max]. */
+-static u32 scale_hw_to_user(struct intel_connector *connector,
+- u32 hw_level, u32 user_max)
+-{
+- struct intel_panel *panel = &connector->panel;
+-
+- return scale(hw_level, panel->backlight.min, panel->backlight.max,
+- 0, user_max);
+-}
+-
+-u32 intel_panel_invert_pwm_level(struct intel_connector *connector, u32 val)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- drm_WARN_ON(&dev_priv->drm, panel->backlight.pwm_level_max == 0);
+-
+- if (dev_priv->params.invert_brightness < 0)
+- return val;
+-
+- if (dev_priv->params.invert_brightness > 0 ||
+- dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS) {
+- return panel->backlight.pwm_level_max - val + panel->backlight.pwm_level_min;
+- }
+-
+- return val;
+-}
+-
+-void intel_panel_set_pwm_level(const struct drm_connector_state *conn_state, u32 val)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *i915 = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- drm_dbg_kms(&i915->drm, "set backlight PWM = %d\n", val);
+- panel->backlight.pwm_funcs->set(conn_state, val);
+-}
+-
+-u32 intel_panel_backlight_level_to_pwm(struct intel_connector *connector, u32 val)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- drm_WARN_ON_ONCE(&dev_priv->drm,
+- panel->backlight.max == 0 || panel->backlight.pwm_level_max == 0);
+-
+- val = scale(val, panel->backlight.min, panel->backlight.max,
+- panel->backlight.pwm_level_min, panel->backlight.pwm_level_max);
+-
+- return intel_panel_invert_pwm_level(connector, val);
+-}
+-
+-u32 intel_panel_backlight_level_from_pwm(struct intel_connector *connector, u32 val)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- drm_WARN_ON_ONCE(&dev_priv->drm,
+- panel->backlight.max == 0 || panel->backlight.pwm_level_max == 0);
+-
+- if (dev_priv->params.invert_brightness > 0 ||
+- (dev_priv->params.invert_brightness == 0 && dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS))
+- val = panel->backlight.pwm_level_max - (val - panel->backlight.pwm_level_min);
+-
+- return scale(val, panel->backlight.pwm_level_min, panel->backlight.pwm_level_max,
+- panel->backlight.min, panel->backlight.max);
+-}
+-
+-static u32 lpt_get_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+-
+- return intel_de_read(dev_priv, BLC_PWM_PCH_CTL2) & BACKLIGHT_DUTY_CYCLE_MASK;
+-}
+-
+-static u32 pch_get_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+-
+- return intel_de_read(dev_priv, BLC_PWM_CPU_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
+-}
+-
+-static u32 i9xx_get_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 val;
+-
+- val = intel_de_read(dev_priv, BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
+- if (DISPLAY_VER(dev_priv) < 4)
+- val >>= 1;
+-
+- if (panel->backlight.combination_mode) {
+- u8 lbpc;
+-
+- pci_read_config_byte(to_pci_dev(dev_priv->drm.dev), LBPC, &lbpc);
+- val *= lbpc;
+- }
+-
+- return val;
+-}
+-
+-static u32 vlv_get_backlight(struct intel_connector *connector, enum pipe pipe)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+-
+- if (drm_WARN_ON(&dev_priv->drm, pipe != PIPE_A && pipe != PIPE_B))
+- return 0;
+-
+- return intel_de_read(dev_priv, VLV_BLC_PWM_CTL(pipe)) & BACKLIGHT_DUTY_CYCLE_MASK;
+-}
+-
+-static u32 bxt_get_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- return intel_de_read(dev_priv,
+- BXT_BLC_PWM_DUTY(panel->backlight.controller));
+-}
+-
+-static u32 ext_pwm_get_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct intel_panel *panel = &connector->panel;
+- struct pwm_state state;
+-
+- pwm_get_state(panel->backlight.pwm, &state);
+- return pwm_get_relative_duty_cycle(&state, 100);
+-}
+-
+-static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+-
+- u32 val = intel_de_read(dev_priv, BLC_PWM_PCH_CTL2) & ~BACKLIGHT_DUTY_CYCLE_MASK;
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL2, val | level);
+-}
+-
+-static void pch_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- u32 tmp;
+-
+- tmp = intel_de_read(dev_priv, BLC_PWM_CPU_CTL) & ~BACKLIGHT_DUTY_CYCLE_MASK;
+- intel_de_write(dev_priv, BLC_PWM_CPU_CTL, tmp | level);
+-}
+-
+-static void i9xx_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 tmp, mask;
+-
+- drm_WARN_ON(&dev_priv->drm, panel->backlight.pwm_level_max == 0);
+-
+- if (panel->backlight.combination_mode) {
+- u8 lbpc;
+-
+- lbpc = level * 0xfe / panel->backlight.pwm_level_max + 1;
+- level /= lbpc;
+- pci_write_config_byte(to_pci_dev(dev_priv->drm.dev), LBPC, lbpc);
+- }
+-
+- if (DISPLAY_VER(dev_priv) == 4) {
+- mask = BACKLIGHT_DUTY_CYCLE_MASK;
+- } else {
+- level <<= 1;
+- mask = BACKLIGHT_DUTY_CYCLE_MASK_PNV;
+- }
+-
+- tmp = intel_de_read(dev_priv, BLC_PWM_CTL) & ~mask;
+- intel_de_write(dev_priv, BLC_PWM_CTL, tmp | level);
+-}
+-
+-static void vlv_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- enum pipe pipe = to_intel_crtc(conn_state->crtc)->pipe;
+- u32 tmp;
+-
+- tmp = intel_de_read(dev_priv, VLV_BLC_PWM_CTL(pipe)) & ~BACKLIGHT_DUTY_CYCLE_MASK;
+- intel_de_write(dev_priv, VLV_BLC_PWM_CTL(pipe), tmp | level);
+-}
+-
+-static void bxt_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- intel_de_write(dev_priv,
+- BXT_BLC_PWM_DUTY(panel->backlight.controller), level);
+-}
+-
+-static void ext_pwm_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel;
+-
+- pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100);
+- pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
+-}
+-
+-static void
+-intel_panel_actually_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *i915 = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- drm_dbg_kms(&i915->drm, "set backlight level = %d\n", level);
+-
+- panel->backlight.funcs->set(conn_state, level);
+-}
+-
+-/* set backlight brightness to level in range [0..max], assuming hw min is
+- * respected.
+- */
+-void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state,
+- u32 user_level, u32 user_max)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 hw_level;
+-
+- /*
+- * Lack of crtc may occur during driver init because
+- * connection_mutex isn't held across the entire backlight
+- * setup + modeset readout, and the BIOS can issue the
+- * requests at any time.
+- */
+- if (!panel->backlight.present || !conn_state->crtc)
+- return;
+-
+- mutex_lock(&dev_priv->backlight_lock);
+-
+- drm_WARN_ON(&dev_priv->drm, panel->backlight.max == 0);
+-
+- hw_level = clamp_user_to_hw(connector, user_level, user_max);
+- panel->backlight.level = hw_level;
+-
+- if (panel->backlight.device)
+- panel->backlight.device->props.brightness =
+- scale_hw_to_user(connector,
+- panel->backlight.level,
+- panel->backlight.device->props.max_brightness);
+-
+- if (panel->backlight.enabled)
+- intel_panel_actually_set_backlight(conn_state, hw_level);
+-
+- mutex_unlock(&dev_priv->backlight_lock);
+-}
+-
+-static void lpt_disable_backlight(const struct drm_connector_state *old_conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- u32 tmp;
+-
+- intel_panel_set_pwm_level(old_conn_state, level);
+-
+- /*
+- * Although we don't support or enable CPU PWM with LPT/SPT based
+- * systems, it may have been enabled prior to loading the
+- * driver. Disable to avoid warnings on LCPLL disable.
+- *
+- * This needs rework if we need to add support for CPU PWM on PCH split
+- * platforms.
+- */
+- tmp = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
+- if (tmp & BLM_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm,
+- "cpu backlight was enabled, disabling\n");
+- intel_de_write(dev_priv, BLC_PWM_CPU_CTL2,
+- tmp & ~BLM_PWM_ENABLE);
+- }
+-
+- tmp = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, tmp & ~BLM_PCH_PWM_ENABLE);
+-}
+-
+-static void pch_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
+-{
+- struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- u32 tmp;
+-
+- intel_panel_set_pwm_level(old_conn_state, val);
+-
+- tmp = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
+- intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, tmp & ~BLM_PWM_ENABLE);
+-
+- tmp = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, tmp & ~BLM_PCH_PWM_ENABLE);
+-}
+-
+-static void i9xx_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
+-{
+- intel_panel_set_pwm_level(old_conn_state, val);
+-}
+-
+-static void i965_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
+-{
+- struct drm_i915_private *dev_priv = to_i915(old_conn_state->connector->dev);
+- u32 tmp;
+-
+- intel_panel_set_pwm_level(old_conn_state, val);
+-
+- tmp = intel_de_read(dev_priv, BLC_PWM_CTL2);
+- intel_de_write(dev_priv, BLC_PWM_CTL2, tmp & ~BLM_PWM_ENABLE);
+-}
+-
+-static void vlv_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
+-{
+- struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- enum pipe pipe = to_intel_crtc(old_conn_state->crtc)->pipe;
+- u32 tmp;
+-
+- intel_panel_set_pwm_level(old_conn_state, val);
+-
+- tmp = intel_de_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
+- intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe),
+- tmp & ~BLM_PWM_ENABLE);
+-}
+-
+-static void bxt_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
+-{
+- struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 tmp;
+-
+- intel_panel_set_pwm_level(old_conn_state, val);
+-
+- tmp = intel_de_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+- intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
+- tmp & ~BXT_BLC_PWM_ENABLE);
+-
+- if (panel->backlight.controller == 1) {
+- val = intel_de_read(dev_priv, UTIL_PIN_CTL);
+- val &= ~UTIL_PIN_ENABLE;
+- intel_de_write(dev_priv, UTIL_PIN_CTL, val);
+- }
+-}
+-
+-static void cnp_disable_backlight(const struct drm_connector_state *old_conn_state, u32 val)
+-{
+- struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 tmp;
+-
+- intel_panel_set_pwm_level(old_conn_state, val);
+-
+- tmp = intel_de_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+- intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
+- tmp & ~BXT_BLC_PWM_ENABLE);
+-}
+-
+-static void ext_pwm_disable_backlight(const struct drm_connector_state *old_conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
+- struct intel_panel *panel = &connector->panel;
+-
+- panel->backlight.pwm_state.enabled = false;
+- pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
+-}
+-
+-void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state)
+-{
+- struct intel_connector *connector = to_intel_connector(old_conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- if (!panel->backlight.present)
+- return;
+-
+- /*
+- * Do not disable backlight on the vga_switcheroo path. When switching
+- * away from i915, the other client may depend on i915 to handle the
+- * backlight. This will leave the backlight on unnecessarily when
+- * another client is not activated.
+- */
+- if (dev_priv->drm.switch_power_state == DRM_SWITCH_POWER_CHANGING) {
+- drm_dbg_kms(&dev_priv->drm,
+- "Skipping backlight disable on vga switch\n");
+- return;
+- }
+-
+- mutex_lock(&dev_priv->backlight_lock);
+-
+- if (panel->backlight.device)
+- panel->backlight.device->props.power = FB_BLANK_POWERDOWN;
+- panel->backlight.enabled = false;
+- panel->backlight.funcs->disable(old_conn_state, 0);
+-
+- mutex_unlock(&dev_priv->backlight_lock);
+-}
+-
+-static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 pch_ctl1, pch_ctl2, schicken;
+-
+- pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
+- if (pch_ctl1 & BLM_PCH_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm, "pch backlight already enabled\n");
+- pch_ctl1 &= ~BLM_PCH_PWM_ENABLE;
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
+- }
+-
+- if (HAS_PCH_LPT(dev_priv)) {
+- schicken = intel_de_read(dev_priv, SOUTH_CHICKEN2);
+- if (panel->backlight.alternate_pwm_increment)
+- schicken |= LPT_PWM_GRANULARITY;
+- else
+- schicken &= ~LPT_PWM_GRANULARITY;
+- intel_de_write(dev_priv, SOUTH_CHICKEN2, schicken);
+- } else {
+- schicken = intel_de_read(dev_priv, SOUTH_CHICKEN1);
+- if (panel->backlight.alternate_pwm_increment)
+- schicken |= SPT_PWM_GRANULARITY;
+- else
+- schicken &= ~SPT_PWM_GRANULARITY;
+- intel_de_write(dev_priv, SOUTH_CHICKEN1, schicken);
+- }
+-
+- pch_ctl2 = panel->backlight.pwm_level_max << 16;
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL2, pch_ctl2);
+-
+- pch_ctl1 = 0;
+- if (panel->backlight.active_low_pwm)
+- pch_ctl1 |= BLM_PCH_POLARITY;
+-
+- /* After LPT, override is the default. */
+- if (HAS_PCH_LPT(dev_priv))
+- pch_ctl1 |= BLM_PCH_OVERRIDE_ENABLE;
+-
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
+- intel_de_posting_read(dev_priv, BLC_PWM_PCH_CTL1);
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1,
+- pch_ctl1 | BLM_PCH_PWM_ENABLE);
+-
+- /* This won't stick until the above enable. */
+- intel_panel_set_pwm_level(conn_state, level);
+-}
+-
+-static void pch_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+- u32 cpu_ctl2, pch_ctl1, pch_ctl2;
+-
+- cpu_ctl2 = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
+- if (cpu_ctl2 & BLM_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm, "cpu backlight already enabled\n");
+- cpu_ctl2 &= ~BLM_PWM_ENABLE;
+- intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, cpu_ctl2);
+- }
+-
+- pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
+- if (pch_ctl1 & BLM_PCH_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm, "pch backlight already enabled\n");
+- pch_ctl1 &= ~BLM_PCH_PWM_ENABLE;
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
+- }
+-
+- if (cpu_transcoder == TRANSCODER_EDP)
+- cpu_ctl2 = BLM_TRANSCODER_EDP;
+- else
+- cpu_ctl2 = BLM_PIPE(cpu_transcoder);
+- intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, cpu_ctl2);
+- intel_de_posting_read(dev_priv, BLC_PWM_CPU_CTL2);
+- intel_de_write(dev_priv, BLC_PWM_CPU_CTL2, cpu_ctl2 | BLM_PWM_ENABLE);
+-
+- /* This won't stick until the above enable. */
+- intel_panel_set_pwm_level(conn_state, level);
+-
+- pch_ctl2 = panel->backlight.pwm_level_max << 16;
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL2, pch_ctl2);
+-
+- pch_ctl1 = 0;
+- if (panel->backlight.active_low_pwm)
+- pch_ctl1 |= BLM_PCH_POLARITY;
+-
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1);
+- intel_de_posting_read(dev_priv, BLC_PWM_PCH_CTL1);
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1,
+- pch_ctl1 | BLM_PCH_PWM_ENABLE);
+-}
+-
+-static void i9xx_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 ctl, freq;
+-
+- ctl = intel_de_read(dev_priv, BLC_PWM_CTL);
+- if (ctl & BACKLIGHT_DUTY_CYCLE_MASK_PNV) {
+- drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
+- intel_de_write(dev_priv, BLC_PWM_CTL, 0);
+- }
+-
+- freq = panel->backlight.pwm_level_max;
+- if (panel->backlight.combination_mode)
+- freq /= 0xff;
+-
+- ctl = freq << 17;
+- if (panel->backlight.combination_mode)
+- ctl |= BLM_LEGACY_MODE;
+- if (IS_PINEVIEW(dev_priv) && panel->backlight.active_low_pwm)
+- ctl |= BLM_POLARITY_PNV;
+-
+- intel_de_write(dev_priv, BLC_PWM_CTL, ctl);
+- intel_de_posting_read(dev_priv, BLC_PWM_CTL);
+-
+- /* XXX: combine this into above write? */
+- intel_panel_set_pwm_level(conn_state, level);
+-
+- /*
+- * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is
+- * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2
+- * that has backlight.
+- */
+- if (DISPLAY_VER(dev_priv) == 2)
+- intel_de_write(dev_priv, BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE);
+-}
+-
+-static void i965_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- enum pipe pipe = to_intel_crtc(conn_state->crtc)->pipe;
+- u32 ctl, ctl2, freq;
+-
+- ctl2 = intel_de_read(dev_priv, BLC_PWM_CTL2);
+- if (ctl2 & BLM_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
+- ctl2 &= ~BLM_PWM_ENABLE;
+- intel_de_write(dev_priv, BLC_PWM_CTL2, ctl2);
+- }
+-
+- freq = panel->backlight.pwm_level_max;
+- if (panel->backlight.combination_mode)
+- freq /= 0xff;
+-
+- ctl = freq << 16;
+- intel_de_write(dev_priv, BLC_PWM_CTL, ctl);
+-
+- ctl2 = BLM_PIPE(pipe);
+- if (panel->backlight.combination_mode)
+- ctl2 |= BLM_COMBINATION_MODE;
+- if (panel->backlight.active_low_pwm)
+- ctl2 |= BLM_POLARITY_I965;
+- intel_de_write(dev_priv, BLC_PWM_CTL2, ctl2);
+- intel_de_posting_read(dev_priv, BLC_PWM_CTL2);
+- intel_de_write(dev_priv, BLC_PWM_CTL2, ctl2 | BLM_PWM_ENABLE);
+-
+- intel_panel_set_pwm_level(conn_state, level);
+-}
+-
+-static void vlv_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
+- u32 ctl, ctl2;
+-
+- ctl2 = intel_de_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
+- if (ctl2 & BLM_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
+- ctl2 &= ~BLM_PWM_ENABLE;
+- intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe), ctl2);
+- }
+-
+- ctl = panel->backlight.pwm_level_max << 16;
+- intel_de_write(dev_priv, VLV_BLC_PWM_CTL(pipe), ctl);
+-
+- /* XXX: combine this into above write? */
+- intel_panel_set_pwm_level(conn_state, level);
+-
+- ctl2 = 0;
+- if (panel->backlight.active_low_pwm)
+- ctl2 |= BLM_POLARITY_I965;
+- intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe), ctl2);
+- intel_de_posting_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
+- intel_de_write(dev_priv, VLV_BLC_PWM_CTL2(pipe),
+- ctl2 | BLM_PWM_ENABLE);
+-}
+-
+-static void bxt_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
+- u32 pwm_ctl, val;
+-
+- /* Controller 1 uses the utility pin. */
+- if (panel->backlight.controller == 1) {
+- val = intel_de_read(dev_priv, UTIL_PIN_CTL);
+- if (val & UTIL_PIN_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm,
+- "util pin already enabled\n");
+- val &= ~UTIL_PIN_ENABLE;
+- intel_de_write(dev_priv, UTIL_PIN_CTL, val);
+- }
+-
+- val = 0;
+- if (panel->backlight.util_pin_active_low)
+- val |= UTIL_PIN_POLARITY;
+- intel_de_write(dev_priv, UTIL_PIN_CTL,
+- val | UTIL_PIN_PIPE(pipe) | UTIL_PIN_MODE_PWM | UTIL_PIN_ENABLE);
+- }
+-
+- pwm_ctl = intel_de_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+- if (pwm_ctl & BXT_BLC_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
+- pwm_ctl &= ~BXT_BLC_PWM_ENABLE;
+- intel_de_write(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller),
+- pwm_ctl);
+- }
+-
+- intel_de_write(dev_priv,
+- BXT_BLC_PWM_FREQ(panel->backlight.controller),
+- panel->backlight.pwm_level_max);
+-
+- intel_panel_set_pwm_level(conn_state, level);
+-
+- pwm_ctl = 0;
+- if (panel->backlight.active_low_pwm)
+- pwm_ctl |= BXT_BLC_PWM_POLARITY;
+-
+- intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
+- pwm_ctl);
+- intel_de_posting_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+- intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
+- pwm_ctl | BXT_BLC_PWM_ENABLE);
+-}
+-
+-static void cnp_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 pwm_ctl;
+-
+- pwm_ctl = intel_de_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+- if (pwm_ctl & BXT_BLC_PWM_ENABLE) {
+- drm_dbg_kms(&dev_priv->drm, "backlight already enabled\n");
+- pwm_ctl &= ~BXT_BLC_PWM_ENABLE;
+- intel_de_write(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller),
+- pwm_ctl);
+- }
+-
+- intel_de_write(dev_priv,
+- BXT_BLC_PWM_FREQ(panel->backlight.controller),
+- panel->backlight.pwm_level_max);
+-
+- intel_panel_set_pwm_level(conn_state, level);
+-
+- pwm_ctl = 0;
+- if (panel->backlight.active_low_pwm)
+- pwm_ctl |= BXT_BLC_PWM_POLARITY;
+-
+- intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
+- pwm_ctl);
+- intel_de_posting_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+- intel_de_write(dev_priv, BXT_BLC_PWM_CTL(panel->backlight.controller),
+- pwm_ctl | BXT_BLC_PWM_ENABLE);
+-}
+-
+-static void ext_pwm_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct intel_panel *panel = &connector->panel;
+-
+- pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100);
+- panel->backlight.pwm_state.enabled = true;
+- pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state);
+-}
+-
+-static void __intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct intel_panel *panel = &connector->panel;
+-
+- WARN_ON(panel->backlight.max == 0);
+-
+- if (panel->backlight.level <= panel->backlight.min) {
+- panel->backlight.level = panel->backlight.max;
+- if (panel->backlight.device)
+- panel->backlight.device->props.brightness =
+- scale_hw_to_user(connector,
+- panel->backlight.level,
+- panel->backlight.device->props.max_brightness);
+- }
+-
+- panel->backlight.funcs->enable(crtc_state, conn_state, panel->backlight.level);
+- panel->backlight.enabled = true;
+- if (panel->backlight.device)
+- panel->backlight.device->props.power = FB_BLANK_UNBLANK;
+-}
+-
+-void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
+-
+- if (!panel->backlight.present)
+- return;
+-
+- drm_dbg_kms(&dev_priv->drm, "pipe %c\n", pipe_name(pipe));
+-
+- mutex_lock(&dev_priv->backlight_lock);
+-
+- __intel_panel_enable_backlight(crtc_state, conn_state);
+-
+- mutex_unlock(&dev_priv->backlight_lock);
+-}
+-
+-#if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE)
+-static u32 intel_panel_get_backlight(struct intel_connector *connector)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 val = 0;
+-
+- mutex_lock(&dev_priv->backlight_lock);
+-
+- if (panel->backlight.enabled)
+- val = panel->backlight.funcs->get(connector, intel_connector_get_pipe(connector));
+-
+- mutex_unlock(&dev_priv->backlight_lock);
+-
+- drm_dbg_kms(&dev_priv->drm, "get backlight PWM = %d\n", val);
+- return val;
+-}
+-
+-/* Scale user_level in range [0..user_max] to [hw_min..hw_max]. */
+-static u32 scale_user_to_hw(struct intel_connector *connector,
+- u32 user_level, u32 user_max)
+-{
+- struct intel_panel *panel = &connector->panel;
+-
+- return scale(user_level, 0, user_max,
+- panel->backlight.min, panel->backlight.max);
+-}
+-
+-/* set backlight brightness to level in range [0..max], scaling wrt hw min */
+-static void intel_panel_set_backlight(const struct drm_connector_state *conn_state,
+- u32 user_level, u32 user_max)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 hw_level;
+-
+- if (!panel->backlight.present)
+- return;
+-
+- mutex_lock(&dev_priv->backlight_lock);
+-
+- drm_WARN_ON(&dev_priv->drm, panel->backlight.max == 0);
+-
+- hw_level = scale_user_to_hw(connector, user_level, user_max);
+- panel->backlight.level = hw_level;
+-
+- if (panel->backlight.enabled)
+- intel_panel_actually_set_backlight(conn_state, hw_level);
+-
+- mutex_unlock(&dev_priv->backlight_lock);
+-}
+-
+-static int intel_backlight_device_update_status(struct backlight_device *bd)
+-{
+- struct intel_connector *connector = bl_get_data(bd);
+- struct intel_panel *panel = &connector->panel;
+- struct drm_device *dev = connector->base.dev;
+-
+- drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+- DRM_DEBUG_KMS("updating intel_backlight, brightness=%d/%d\n",
+- bd->props.brightness, bd->props.max_brightness);
+- intel_panel_set_backlight(connector->base.state, bd->props.brightness,
+- bd->props.max_brightness);
+-
+- /*
+- * Allow flipping bl_power as a sub-state of enabled. Sadly the
+- * backlight class device does not make it easy to to differentiate
+- * between callbacks for brightness and bl_power, so our backlight_power
+- * callback needs to take this into account.
+- */
+- if (panel->backlight.enabled) {
+- if (panel->backlight.power) {
+- bool enable = bd->props.power == FB_BLANK_UNBLANK &&
+- bd->props.brightness != 0;
+- panel->backlight.power(connector, enable);
+- }
+- } else {
+- bd->props.power = FB_BLANK_POWERDOWN;
+- }
+-
+- drm_modeset_unlock(&dev->mode_config.connection_mutex);
+- return 0;
+-}
+-
+-static int intel_backlight_device_get_brightness(struct backlight_device *bd)
+-{
+- struct intel_connector *connector = bl_get_data(bd);
+- struct drm_device *dev = connector->base.dev;
+- struct drm_i915_private *dev_priv = to_i915(dev);
+- intel_wakeref_t wakeref;
+- int ret = 0;
+-
+- with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
+- u32 hw_level;
+-
+- drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+-
+- hw_level = intel_panel_get_backlight(connector);
+- ret = scale_hw_to_user(connector,
+- hw_level, bd->props.max_brightness);
+-
+- drm_modeset_unlock(&dev->mode_config.connection_mutex);
+- }
+-
+- return ret;
+-}
+-
+-static const struct backlight_ops intel_backlight_device_ops = {
+- .update_status = intel_backlight_device_update_status,
+- .get_brightness = intel_backlight_device_get_brightness,
+-};
+-
+-int intel_backlight_device_register(struct intel_connector *connector)
+-{
+- struct drm_i915_private *i915 = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- struct backlight_properties props;
+- struct backlight_device *bd;
+- const char *name;
+- int ret = 0;
+-
+- if (WARN_ON(panel->backlight.device))
+- return -ENODEV;
+-
+- if (!panel->backlight.present)
+- return 0;
+-
+- WARN_ON(panel->backlight.max == 0);
+-
+- memset(&props, 0, sizeof(props));
+- props.type = BACKLIGHT_RAW;
+-
+- /*
+- * Note: Everything should work even if the backlight device max
+- * presented to the userspace is arbitrarily chosen.
+- */
+- props.max_brightness = panel->backlight.max;
+- props.brightness = scale_hw_to_user(connector,
+- panel->backlight.level,
+- props.max_brightness);
+-
+- if (panel->backlight.enabled)
+- props.power = FB_BLANK_UNBLANK;
+- else
+- props.power = FB_BLANK_POWERDOWN;
+-
+- name = kstrdup("intel_backlight", GFP_KERNEL);
+- if (!name)
+- return -ENOMEM;
+-
+- bd = backlight_device_register(name, connector->base.kdev, connector,
+- &intel_backlight_device_ops, &props);
+-
+- /*
+- * Using the same name independent of the drm device or connector
+- * prevents registration of multiple backlight devices in the
+- * driver. However, we need to use the default name for backward
+- * compatibility. Use unique names for subsequent backlight devices as a
+- * fallback when the default name already exists.
+- */
+- if (IS_ERR(bd) && PTR_ERR(bd) == -EEXIST) {
+- kfree(name);
+- name = kasprintf(GFP_KERNEL, "card%d-%s-backlight",
+- i915->drm.primary->index, connector->base.name);
+- if (!name)
+- return -ENOMEM;
+-
+- bd = backlight_device_register(name, connector->base.kdev, connector,
+- &intel_backlight_device_ops, &props);
+- }
+-
+- if (IS_ERR(bd)) {
+- drm_err(&i915->drm,
+- "[CONNECTOR:%d:%s] backlight device %s register failed: %ld\n",
+- connector->base.base.id, connector->base.name, name, PTR_ERR(bd));
+- ret = PTR_ERR(bd);
+- goto out;
+- }
+-
+- panel->backlight.device = bd;
+-
+- drm_dbg_kms(&i915->drm,
+- "[CONNECTOR:%d:%s] backlight device %s registered\n",
+- connector->base.base.id, connector->base.name, name);
+-
+-out:
+- kfree(name);
+-
+- return ret;
+-}
+-
+-void intel_backlight_device_unregister(struct intel_connector *connector)
+-{
+- struct intel_panel *panel = &connector->panel;
+-
+- if (panel->backlight.device) {
+- backlight_device_unregister(panel->backlight.device);
+- panel->backlight.device = NULL;
+- }
+-}
+-#endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */
+-
+-/*
+- * CNP: PWM clock frequency is 19.2 MHz or 24 MHz.
+- * PWM increment = 1
+- */
+-static u32 cnp_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+-
+- return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(dev_priv)->rawclk_freq),
+- pwm_freq_hz);
+-}
+-
+-/*
+- * BXT: PWM clock frequency = 19.2 MHz.
+- */
+-static u32 bxt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- return DIV_ROUND_CLOSEST(KHz(19200), pwm_freq_hz);
+-}
+-
+-/*
+- * SPT: This value represents the period of the PWM stream in clock periods
+- * multiplied by 16 (default increment) or 128 (alternate increment selected in
+- * SCHICKEN_1 bit 0). PWM clock is 24 MHz.
+- */
+-static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- struct intel_panel *panel = &connector->panel;
+- u32 mul;
+-
+- if (panel->backlight.alternate_pwm_increment)
+- mul = 128;
+- else
+- mul = 16;
+-
+- return DIV_ROUND_CLOSEST(MHz(24), pwm_freq_hz * mul);
+-}
+-
+-/*
+- * LPT: This value represents the period of the PWM stream in clock periods
+- * multiplied by 128 (default increment) or 16 (alternate increment, selected in
+- * LPT SOUTH_CHICKEN2 register bit 5).
+- */
+-static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 mul, clock;
+-
+- if (panel->backlight.alternate_pwm_increment)
+- mul = 16;
+- else
+- mul = 128;
+-
+- if (HAS_PCH_LPT_H(dev_priv))
+- clock = MHz(135); /* LPT:H */
+- else
+- clock = MHz(24); /* LPT:LP */
+-
+- return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * mul);
+-}
+-
+-/*
+- * ILK/SNB/IVB: This value represents the period of the PWM stream in PCH
+- * display raw clocks multiplied by 128.
+- */
+-static u32 pch_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+-
+- return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(dev_priv)->rawclk_freq),
+- pwm_freq_hz * 128);
+-}
+-
+-/*
+- * Gen2: This field determines the number of time base events (display core
+- * clock frequency/32) in total for a complete cycle of modulated backlight
+- * control.
+- *
+- * Gen3: A time base event equals the display core clock ([DevPNV] HRAW clock)
+- * divided by 32.
+- */
+-static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- int clock;
+-
+- if (IS_PINEVIEW(dev_priv))
+- clock = KHz(RUNTIME_INFO(dev_priv)->rawclk_freq);
+- else
+- clock = KHz(dev_priv->cdclk.hw.cdclk);
+-
+- return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 32);
+-}
+-
+-/*
+- * Gen4: This value represents the period of the PWM stream in display core
+- * clocks ([DevCTG] HRAW clocks) multiplied by 128.
+- *
+- */
+-static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- int clock;
+-
+- if (IS_G4X(dev_priv))
+- clock = KHz(RUNTIME_INFO(dev_priv)->rawclk_freq);
+- else
+- clock = KHz(dev_priv->cdclk.hw.cdclk);
+-
+- return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 128);
+-}
+-
+-/*
+- * VLV: This value represents the period of the PWM stream in display core
+- * clocks ([DevCTG] 200MHz HRAW clocks) multiplied by 128 or 25MHz S0IX clocks
+- * multiplied by 16. CHV uses a 19.2MHz S0IX clock.
+- */
+-static u32 vlv_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- int mul, clock;
+-
+- if ((intel_de_read(dev_priv, CBR1_VLV) & CBR_PWM_CLOCK_MUX_SELECT) == 0) {
+- if (IS_CHERRYVIEW(dev_priv))
+- clock = KHz(19200);
+- else
+- clock = MHz(25);
+- mul = 16;
+- } else {
+- clock = KHz(RUNTIME_INFO(dev_priv)->rawclk_freq);
+- mul = 128;
+- }
+-
+- return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * mul);
+-}
+-
+-static u16 get_vbt_pwm_freq(struct drm_i915_private *dev_priv)
+-{
+- u16 pwm_freq_hz = dev_priv->vbt.backlight.pwm_freq_hz;
+-
+- if (pwm_freq_hz) {
+- drm_dbg_kms(&dev_priv->drm,
+- "VBT defined backlight frequency %u Hz\n",
+- pwm_freq_hz);
+- } else {
+- pwm_freq_hz = 200;
+- drm_dbg_kms(&dev_priv->drm,
+- "default backlight frequency %u Hz\n",
+- pwm_freq_hz);
+- }
+-
+- return pwm_freq_hz;
+-}
+-
+-static u32 get_backlight_max_vbt(struct intel_connector *connector)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u16 pwm_freq_hz = get_vbt_pwm_freq(dev_priv);
+- u32 pwm;
+-
+- if (!panel->backlight.pwm_funcs->hz_to_pwm) {
+- drm_dbg_kms(&dev_priv->drm,
+- "backlight frequency conversion not supported\n");
+- return 0;
+- }
+-
+- pwm = panel->backlight.pwm_funcs->hz_to_pwm(connector, pwm_freq_hz);
+- if (!pwm) {
+- drm_dbg_kms(&dev_priv->drm,
+- "backlight frequency conversion failed\n");
+- return 0;
+- }
+-
+- return pwm;
+-}
+-
+-/*
+- * Note: The setup hooks can't assume pipe is set!
+- */
+-static u32 get_backlight_min_vbt(struct intel_connector *connector)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- int min;
+-
+- drm_WARN_ON(&dev_priv->drm, panel->backlight.pwm_level_max == 0);
+-
+- /*
+- * XXX: If the vbt value is 255, it makes min equal to max, which leads
+- * to problems. There are such machines out there. Either our
+- * interpretation is wrong or the vbt has bogus data. Or both. Safeguard
+- * against this by letting the minimum be at most (arbitrarily chosen)
+- * 25% of the max.
+- */
+- min = clamp_t(int, dev_priv->vbt.backlight.min_brightness, 0, 64);
+- if (min != dev_priv->vbt.backlight.min_brightness) {
+- drm_dbg_kms(&dev_priv->drm,
+- "clamping VBT min backlight %d/255 to %d/255\n",
+- dev_priv->vbt.backlight.min_brightness, min);
+- }
+-
+- /* vbt value is a coefficient in range [0..255] */
+- return scale(min, 0, 255, 0, panel->backlight.pwm_level_max);
+-}
+-
+-static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 cpu_ctl2, pch_ctl1, pch_ctl2, val;
+- bool alt, cpu_mode;
+-
+- if (HAS_PCH_LPT(dev_priv))
+- alt = intel_de_read(dev_priv, SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY;
+- else
+- alt = intel_de_read(dev_priv, SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY;
+- panel->backlight.alternate_pwm_increment = alt;
+-
+- pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
+- panel->backlight.active_low_pwm = pch_ctl1 & BLM_PCH_POLARITY;
+-
+- pch_ctl2 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL2);
+- panel->backlight.pwm_level_max = pch_ctl2 >> 16;
+-
+- cpu_ctl2 = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
+-
+- if (!panel->backlight.pwm_level_max)
+- panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
+-
+- if (!panel->backlight.pwm_level_max)
+- return -ENODEV;
+-
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- panel->backlight.pwm_enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE;
+-
+- cpu_mode = panel->backlight.pwm_enabled && HAS_PCH_LPT(dev_priv) &&
+- !(pch_ctl1 & BLM_PCH_OVERRIDE_ENABLE) &&
+- (cpu_ctl2 & BLM_PWM_ENABLE);
+-
+- if (cpu_mode) {
+- val = pch_get_backlight(connector, unused);
+-
+- drm_dbg_kms(&dev_priv->drm,
+- "CPU backlight register was enabled, switching to PCH override\n");
+-
+- /* Write converted CPU PWM value to PCH override register */
+- lpt_set_backlight(connector->base.state, val);
+- intel_de_write(dev_priv, BLC_PWM_PCH_CTL1,
+- pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE);
+-
+- intel_de_write(dev_priv, BLC_PWM_CPU_CTL2,
+- cpu_ctl2 & ~BLM_PWM_ENABLE);
+- }
+-
+- return 0;
+-}
+-
+-static int pch_setup_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 cpu_ctl2, pch_ctl1, pch_ctl2;
+-
+- pch_ctl1 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL1);
+- panel->backlight.active_low_pwm = pch_ctl1 & BLM_PCH_POLARITY;
+-
+- pch_ctl2 = intel_de_read(dev_priv, BLC_PWM_PCH_CTL2);
+- panel->backlight.pwm_level_max = pch_ctl2 >> 16;
+-
+- if (!panel->backlight.pwm_level_max)
+- panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
+-
+- if (!panel->backlight.pwm_level_max)
+- return -ENODEV;
+-
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- cpu_ctl2 = intel_de_read(dev_priv, BLC_PWM_CPU_CTL2);
+- panel->backlight.pwm_enabled = (cpu_ctl2 & BLM_PWM_ENABLE) &&
+- (pch_ctl1 & BLM_PCH_PWM_ENABLE);
+-
+- return 0;
+-}
+-
+-static int i9xx_setup_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 ctl, val;
+-
+- ctl = intel_de_read(dev_priv, BLC_PWM_CTL);
+-
+- if (DISPLAY_VER(dev_priv) == 2 || IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
+- panel->backlight.combination_mode = ctl & BLM_LEGACY_MODE;
+-
+- if (IS_PINEVIEW(dev_priv))
+- panel->backlight.active_low_pwm = ctl & BLM_POLARITY_PNV;
+-
+- panel->backlight.pwm_level_max = ctl >> 17;
+-
+- if (!panel->backlight.pwm_level_max) {
+- panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
+- panel->backlight.pwm_level_max >>= 1;
+- }
+-
+- if (!panel->backlight.pwm_level_max)
+- return -ENODEV;
+-
+- if (panel->backlight.combination_mode)
+- panel->backlight.pwm_level_max *= 0xff;
+-
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- val = i9xx_get_backlight(connector, unused);
+- val = intel_panel_invert_pwm_level(connector, val);
+- val = clamp(val, panel->backlight.pwm_level_min, panel->backlight.pwm_level_max);
+-
+- panel->backlight.pwm_enabled = val != 0;
+-
+- return 0;
+-}
+-
+-static int i965_setup_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 ctl, ctl2;
+-
+- ctl2 = intel_de_read(dev_priv, BLC_PWM_CTL2);
+- panel->backlight.combination_mode = ctl2 & BLM_COMBINATION_MODE;
+- panel->backlight.active_low_pwm = ctl2 & BLM_POLARITY_I965;
+-
+- ctl = intel_de_read(dev_priv, BLC_PWM_CTL);
+- panel->backlight.pwm_level_max = ctl >> 16;
+-
+- if (!panel->backlight.pwm_level_max)
+- panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
+-
+- if (!panel->backlight.pwm_level_max)
+- return -ENODEV;
+-
+- if (panel->backlight.combination_mode)
+- panel->backlight.pwm_level_max *= 0xff;
+-
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- panel->backlight.pwm_enabled = ctl2 & BLM_PWM_ENABLE;
+-
+- return 0;
+-}
+-
+-static int vlv_setup_backlight(struct intel_connector *connector, enum pipe pipe)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 ctl, ctl2;
+-
+- if (drm_WARN_ON(&dev_priv->drm, pipe != PIPE_A && pipe != PIPE_B))
+- return -ENODEV;
+-
+- ctl2 = intel_de_read(dev_priv, VLV_BLC_PWM_CTL2(pipe));
+- panel->backlight.active_low_pwm = ctl2 & BLM_POLARITY_I965;
+-
+- ctl = intel_de_read(dev_priv, VLV_BLC_PWM_CTL(pipe));
+- panel->backlight.pwm_level_max = ctl >> 16;
+-
+- if (!panel->backlight.pwm_level_max)
+- panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
+-
+- if (!panel->backlight.pwm_level_max)
+- return -ENODEV;
+-
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- panel->backlight.pwm_enabled = ctl2 & BLM_PWM_ENABLE;
+-
+- return 0;
+-}
+-
+-static int
+-bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 pwm_ctl, val;
+-
+- panel->backlight.controller = dev_priv->vbt.backlight.controller;
+-
+- pwm_ctl = intel_de_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+-
+- /* Controller 1 uses the utility pin. */
+- if (panel->backlight.controller == 1) {
+- val = intel_de_read(dev_priv, UTIL_PIN_CTL);
+- panel->backlight.util_pin_active_low =
+- val & UTIL_PIN_POLARITY;
+- }
+-
+- panel->backlight.active_low_pwm = pwm_ctl & BXT_BLC_PWM_POLARITY;
+- panel->backlight.pwm_level_max =
+- intel_de_read(dev_priv, BXT_BLC_PWM_FREQ(panel->backlight.controller));
+-
+- if (!panel->backlight.pwm_level_max)
+- panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
+-
+- if (!panel->backlight.pwm_level_max)
+- return -ENODEV;
+-
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- panel->backlight.pwm_enabled = pwm_ctl & BXT_BLC_PWM_ENABLE;
+-
+- return 0;
+-}
+-
+-static int
+-cnp_setup_backlight(struct intel_connector *connector, enum pipe unused)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+- u32 pwm_ctl;
+-
+- /*
+- * CNP has the BXT implementation of backlight, but with only one
+- * controller. TODO: ICP has multiple controllers but we only use
+- * controller 0 for now.
+- */
+- panel->backlight.controller = 0;
+-
+- pwm_ctl = intel_de_read(dev_priv,
+- BXT_BLC_PWM_CTL(panel->backlight.controller));
+-
+- panel->backlight.active_low_pwm = pwm_ctl & BXT_BLC_PWM_POLARITY;
+- panel->backlight.pwm_level_max =
+- intel_de_read(dev_priv, BXT_BLC_PWM_FREQ(panel->backlight.controller));
+-
+- if (!panel->backlight.pwm_level_max)
+- panel->backlight.pwm_level_max = get_backlight_max_vbt(connector);
+-
+- if (!panel->backlight.pwm_level_max)
+- return -ENODEV;
+-
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- panel->backlight.pwm_enabled = pwm_ctl & BXT_BLC_PWM_ENABLE;
+-
+- return 0;
+-}
+-
+-static int ext_pwm_setup_backlight(struct intel_connector *connector,
+- enum pipe pipe)
+-{
+- struct drm_device *dev = connector->base.dev;
+- struct drm_i915_private *dev_priv = to_i915(dev);
+- struct intel_panel *panel = &connector->panel;
+- const char *desc;
+- u32 level;
+-
+- /* Get the right PWM chip for DSI backlight according to VBT */
+- if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) {
+- panel->backlight.pwm = pwm_get(dev->dev, "pwm_pmic_backlight");
+- desc = "PMIC";
+- } else {
+- panel->backlight.pwm = pwm_get(dev->dev, "pwm_soc_backlight");
+- desc = "SoC";
+- }
+-
+- if (IS_ERR(panel->backlight.pwm)) {
+- drm_err(&dev_priv->drm, "Failed to get the %s PWM chip\n",
+- desc);
+- panel->backlight.pwm = NULL;
+- return -ENODEV;
+- }
+-
+- panel->backlight.pwm_level_max = 100; /* 100% */
+- panel->backlight.pwm_level_min = get_backlight_min_vbt(connector);
+-
+- if (pwm_is_enabled(panel->backlight.pwm)) {
+- /* PWM is already enabled, use existing settings */
+- pwm_get_state(panel->backlight.pwm, &panel->backlight.pwm_state);
+-
+- level = pwm_get_relative_duty_cycle(&panel->backlight.pwm_state,
+- 100);
+- level = intel_panel_invert_pwm_level(connector, level);
+- panel->backlight.pwm_enabled = true;
+-
+- drm_dbg_kms(&dev_priv->drm, "PWM already enabled at freq %ld, VBT freq %d, level %d\n",
+- NSEC_PER_SEC / (unsigned long)panel->backlight.pwm_state.period,
+- get_vbt_pwm_freq(dev_priv), level);
+- } else {
+- /* Set period from VBT frequency, leave other settings at 0. */
+- panel->backlight.pwm_state.period =
+- NSEC_PER_SEC / get_vbt_pwm_freq(dev_priv);
+- }
+-
+- drm_info(&dev_priv->drm, "Using %s PWM for LCD backlight control\n",
+- desc);
+- return 0;
+-}
+-
+-static void intel_pwm_set_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct intel_panel *panel = &connector->panel;
+-
+- panel->backlight.pwm_funcs->set(conn_state,
+- intel_panel_invert_pwm_level(connector, level));
+-}
+-
+-static u32 intel_pwm_get_backlight(struct intel_connector *connector, enum pipe pipe)
+-{
+- struct intel_panel *panel = &connector->panel;
+-
+- return intel_panel_invert_pwm_level(connector,
+- panel->backlight.pwm_funcs->get(connector, pipe));
+-}
+-
+-static void intel_pwm_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct intel_panel *panel = &connector->panel;
+-
+- panel->backlight.pwm_funcs->enable(crtc_state, conn_state,
+- intel_panel_invert_pwm_level(connector, level));
+-}
+-
+-static void intel_pwm_disable_backlight(const struct drm_connector_state *conn_state, u32 level)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct intel_panel *panel = &connector->panel;
+-
+- panel->backlight.pwm_funcs->disable(conn_state,
+- intel_panel_invert_pwm_level(connector, level));
+-}
+-
+-static int intel_pwm_setup_backlight(struct intel_connector *connector, enum pipe pipe)
+-{
+- struct intel_panel *panel = &connector->panel;
+- int ret = panel->backlight.pwm_funcs->setup(connector, pipe);
+-
+- if (ret < 0)
+- return ret;
+-
+- panel->backlight.min = panel->backlight.pwm_level_min;
+- panel->backlight.max = panel->backlight.pwm_level_max;
+- panel->backlight.level = intel_pwm_get_backlight(connector, pipe);
+- panel->backlight.enabled = panel->backlight.pwm_enabled;
+-
+- return 0;
+-}
+-
+-void intel_panel_update_backlight(struct intel_atomic_state *state,
+- struct intel_encoder *encoder,
+- const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state)
+-{
+- struct intel_connector *connector = to_intel_connector(conn_state->connector);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+- struct intel_panel *panel = &connector->panel;
+-
+- if (!panel->backlight.present)
+- return;
+-
+- mutex_lock(&dev_priv->backlight_lock);
+- if (!panel->backlight.enabled)
+- __intel_panel_enable_backlight(crtc_state, conn_state);
+-
+- mutex_unlock(&dev_priv->backlight_lock);
+-}
+-
+-int intel_panel_setup_backlight(struct drm_connector *connector, enum pipe pipe)
+-{
+- struct drm_i915_private *dev_priv = to_i915(connector->dev);
+- struct intel_connector *intel_connector = to_intel_connector(connector);
+- struct intel_panel *panel = &intel_connector->panel;
+- int ret;
+-
+- if (!dev_priv->vbt.backlight.present) {
+- if (dev_priv->quirks & QUIRK_BACKLIGHT_PRESENT) {
+- drm_dbg_kms(&dev_priv->drm,
+- "no backlight present per VBT, but present per quirk\n");
+- } else {
+- drm_dbg_kms(&dev_priv->drm,
+- "no backlight present per VBT\n");
+- return 0;
+- }
+- }
+-
+- /* ensure intel_panel has been initialized first */
+- if (drm_WARN_ON(&dev_priv->drm, !panel->backlight.funcs))
+- return -ENODEV;
+-
+- /* set level and max in panel struct */
+- mutex_lock(&dev_priv->backlight_lock);
+- ret = panel->backlight.funcs->setup(intel_connector, pipe);
+- mutex_unlock(&dev_priv->backlight_lock);
+-
+- if (ret) {
+- drm_dbg_kms(&dev_priv->drm,
+- "failed to setup backlight for connector %s\n",
+- connector->name);
+- return ret;
+- }
+-
+- panel->backlight.present = true;
+-
+- drm_dbg_kms(&dev_priv->drm,
+- "Connector %s backlight initialized, %s, brightness %u/%u\n",
+- connector->name,
+- enableddisabled(panel->backlight.enabled),
+- panel->backlight.level, panel->backlight.max);
+-
+- return 0;
+-}
+-
+-static void intel_panel_destroy_backlight(struct intel_panel *panel)
+-{
+- /* dispose of the pwm */
+- if (panel->backlight.pwm)
+- pwm_put(panel->backlight.pwm);
+-
+- panel->backlight.present = false;
+-}
+-
+-static const struct intel_panel_bl_funcs bxt_pwm_funcs = {
+- .setup = bxt_setup_backlight,
+- .enable = bxt_enable_backlight,
+- .disable = bxt_disable_backlight,
+- .set = bxt_set_backlight,
+- .get = bxt_get_backlight,
+- .hz_to_pwm = bxt_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs cnp_pwm_funcs = {
+- .setup = cnp_setup_backlight,
+- .enable = cnp_enable_backlight,
+- .disable = cnp_disable_backlight,
+- .set = bxt_set_backlight,
+- .get = bxt_get_backlight,
+- .hz_to_pwm = cnp_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs lpt_pwm_funcs = {
+- .setup = lpt_setup_backlight,
+- .enable = lpt_enable_backlight,
+- .disable = lpt_disable_backlight,
+- .set = lpt_set_backlight,
+- .get = lpt_get_backlight,
+- .hz_to_pwm = lpt_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs spt_pwm_funcs = {
+- .setup = lpt_setup_backlight,
+- .enable = lpt_enable_backlight,
+- .disable = lpt_disable_backlight,
+- .set = lpt_set_backlight,
+- .get = lpt_get_backlight,
+- .hz_to_pwm = spt_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs pch_pwm_funcs = {
+- .setup = pch_setup_backlight,
+- .enable = pch_enable_backlight,
+- .disable = pch_disable_backlight,
+- .set = pch_set_backlight,
+- .get = pch_get_backlight,
+- .hz_to_pwm = pch_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs ext_pwm_funcs = {
+- .setup = ext_pwm_setup_backlight,
+- .enable = ext_pwm_enable_backlight,
+- .disable = ext_pwm_disable_backlight,
+- .set = ext_pwm_set_backlight,
+- .get = ext_pwm_get_backlight,
+-};
+-
+-static const struct intel_panel_bl_funcs vlv_pwm_funcs = {
+- .setup = vlv_setup_backlight,
+- .enable = vlv_enable_backlight,
+- .disable = vlv_disable_backlight,
+- .set = vlv_set_backlight,
+- .get = vlv_get_backlight,
+- .hz_to_pwm = vlv_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs i965_pwm_funcs = {
+- .setup = i965_setup_backlight,
+- .enable = i965_enable_backlight,
+- .disable = i965_disable_backlight,
+- .set = i9xx_set_backlight,
+- .get = i9xx_get_backlight,
+- .hz_to_pwm = i965_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs i9xx_pwm_funcs = {
+- .setup = i9xx_setup_backlight,
+- .enable = i9xx_enable_backlight,
+- .disable = i9xx_disable_backlight,
+- .set = i9xx_set_backlight,
+- .get = i9xx_get_backlight,
+- .hz_to_pwm = i9xx_hz_to_pwm,
+-};
+-
+-static const struct intel_panel_bl_funcs pwm_bl_funcs = {
+- .setup = intel_pwm_setup_backlight,
+- .enable = intel_pwm_enable_backlight,
+- .disable = intel_pwm_disable_backlight,
+- .set = intel_pwm_set_backlight,
+- .get = intel_pwm_get_backlight,
+-};
+-
+-/* Set up chip specific backlight functions */
+-static void
+-intel_panel_init_backlight_funcs(struct intel_panel *panel)
+-{
+- struct intel_connector *connector =
+- container_of(panel, struct intel_connector, panel);
+- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+-
+- if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI &&
+- intel_dsi_dcs_init_backlight_funcs(connector) == 0)
+- return;
+-
+- if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) {
+- panel->backlight.pwm_funcs = &bxt_pwm_funcs;
+- } else if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) {
+- panel->backlight.pwm_funcs = &cnp_pwm_funcs;
+- } else if (INTEL_PCH_TYPE(dev_priv) >= PCH_LPT) {
+- if (HAS_PCH_LPT(dev_priv))
+- panel->backlight.pwm_funcs = &lpt_pwm_funcs;
+- else
+- panel->backlight.pwm_funcs = &spt_pwm_funcs;
+- } else if (HAS_PCH_SPLIT(dev_priv)) {
+- panel->backlight.pwm_funcs = &pch_pwm_funcs;
+- } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+- if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI) {
+- panel->backlight.pwm_funcs = &ext_pwm_funcs;
+- } else {
+- panel->backlight.pwm_funcs = &vlv_pwm_funcs;
+- }
+- } else if (DISPLAY_VER(dev_priv) == 4) {
+- panel->backlight.pwm_funcs = &i965_pwm_funcs;
+- } else {
+- panel->backlight.pwm_funcs = &i9xx_pwm_funcs;
+- }
+-
+- if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP &&
+- intel_dp_aux_init_backlight_funcs(connector) == 0)
+- return;
+-
+- /* We're using a standard PWM backlight interface */
+- panel->backlight.funcs = &pwm_bl_funcs;
+-}
+-
+ enum drm_connector_status
+ intel_panel_detect(struct drm_connector *connector, bool force)
+ {
+diff --git a/drivers/gpu/drm/i915/display/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h
+index 1d340f77bffc7..67dbb15026bf1 100644
+--- a/drivers/gpu/drm/i915/display/intel_panel.h
++++ b/drivers/gpu/drm/i915/display/intel_panel.h
+@@ -8,15 +8,13 @@
+
+ #include <linux/types.h>
+
+-#include "intel_display.h"
+-
++enum drm_connector_status;
+ struct drm_connector;
+ struct drm_connector_state;
+ struct drm_display_mode;
++struct drm_i915_private;
+ struct intel_connector;
+-struct intel_crtc;
+ struct intel_crtc_state;
+-struct intel_encoder;
+ struct intel_panel;
+
+ int intel_panel_init(struct intel_panel *panel,
+@@ -31,17 +29,6 @@ int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state);
+ int intel_gmch_panel_fitting(struct intel_crtc_state *crtc_state,
+ const struct drm_connector_state *conn_state);
+-void intel_panel_set_backlight_acpi(const struct drm_connector_state *conn_state,
+- u32 level, u32 max);
+-int intel_panel_setup_backlight(struct drm_connector *connector,
+- enum pipe pipe);
+-void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state);
+-void intel_panel_update_backlight(struct intel_atomic_state *state,
+- struct intel_encoder *encoder,
+- const struct intel_crtc_state *crtc_state,
+- const struct drm_connector_state *conn_state);
+-void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state);
+ struct drm_display_mode *
+ intel_panel_edid_downclock_mode(struct intel_connector *connector,
+ const struct drm_display_mode *fixed_mode);
+@@ -49,22 +36,5 @@ struct drm_display_mode *
+ intel_panel_edid_fixed_mode(struct intel_connector *connector);
+ struct drm_display_mode *
+ intel_panel_vbt_fixed_mode(struct intel_connector *connector);
+-void intel_panel_set_pwm_level(const struct drm_connector_state *conn_state, u32 level);
+-u32 intel_panel_invert_pwm_level(struct intel_connector *connector, u32 level);
+-u32 intel_panel_backlight_level_to_pwm(struct intel_connector *connector, u32 level);
+-u32 intel_panel_backlight_level_from_pwm(struct intel_connector *connector, u32 val);
+-
+-#if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE)
+-int intel_backlight_device_register(struct intel_connector *connector);
+-void intel_backlight_device_unregister(struct intel_connector *connector);
+-#else /* CONFIG_BACKLIGHT_CLASS_DEVICE */
+-static inline int intel_backlight_device_register(struct intel_connector *connector)
+-{
+- return 0;
+-}
+-static inline void intel_backlight_device_unregister(struct intel_connector *connector)
+-{
+-}
+-#endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */
+
+ #endif /* __INTEL_PANEL_H__ */
+diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c
+index a36ec4a818ff5..466bf6820641d 100644
+--- a/drivers/gpu/drm/i915/display/intel_pps.c
++++ b/drivers/gpu/drm/i915/display/intel_pps.c
+@@ -1074,14 +1074,14 @@ static void intel_pps_vdd_sanitize(struct intel_dp *intel_dp)
+ edp_panel_vdd_schedule_off(intel_dp);
+ }
+
+-bool intel_pps_have_power(struct intel_dp *intel_dp)
++bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp)
+ {
+ intel_wakeref_t wakeref;
+ bool have_power = false;
+
+ with_intel_pps_lock(intel_dp, wakeref) {
+- have_power = edp_have_panel_power(intel_dp) &&
+- edp_have_panel_vdd(intel_dp);
++ have_power = edp_have_panel_power(intel_dp) ||
++ edp_have_panel_vdd(intel_dp);
+ }
+
+ return have_power;
+diff --git a/drivers/gpu/drm/i915/display/intel_pps.h b/drivers/gpu/drm/i915/display/intel_pps.h
+index fbbcca782e7b6..9fe7be4fe867e 100644
+--- a/drivers/gpu/drm/i915/display/intel_pps.h
++++ b/drivers/gpu/drm/i915/display/intel_pps.h
+@@ -36,7 +36,7 @@ void intel_pps_vdd_on(struct intel_dp *intel_dp);
+ void intel_pps_on(struct intel_dp *intel_dp);
+ void intel_pps_off(struct intel_dp *intel_dp);
+ void intel_pps_vdd_off_sync(struct intel_dp *intel_dp);
+-bool intel_pps_have_power(struct intel_dp *intel_dp);
++bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp);
+ void intel_pps_wait_power_cycle(struct intel_dp *intel_dp);
+
+ void intel_pps_init(struct intel_dp *intel_dp);
+diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
+index 1b0daf649e823..93d2fd4cd16b7 100644
+--- a/drivers/gpu/drm/i915/display/intel_psr.c
++++ b/drivers/gpu/drm/i915/display/intel_psr.c
+@@ -22,6 +22,7 @@
+ */
+
+ #include <drm/drm_atomic_helper.h>
++#include <drm/drm_damage_helper.h>
+
+ #include "display/intel_dp.h"
+
+@@ -548,6 +549,14 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
+ val |= EDP_PSR2_FRAME_BEFORE_SU(intel_dp->psr.sink_sync_latency + 1);
+ val |= intel_psr2_get_tp_time(intel_dp);
+
++ if (DISPLAY_VER(dev_priv) >= 12) {
++ if (intel_dp->psr.io_wake_lines < 9 &&
++ intel_dp->psr.fast_wake_lines < 9)
++ val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
++ else
++ val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_3;
++ }
++
+ /* Wa_22012278275:adl-p */
+ if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_E0)) {
+ static const u8 map[] = {
+@@ -564,31 +573,21 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
+ * Still using the default IO_BUFFER_WAKE and FAST_WAKE, see
+ * comments bellow for more information
+ */
+- u32 tmp, lines = 7;
++ u32 tmp;
+
+- val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
+-
+- tmp = map[lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES];
++ tmp = map[intel_dp->psr.io_wake_lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES];
+ tmp = tmp << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT;
+ val |= tmp;
+
+- tmp = map[lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES];
++ tmp = map[intel_dp->psr.fast_wake_lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES];
+ tmp = tmp << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT;
+ val |= tmp;
+ } else if (DISPLAY_VER(dev_priv) >= 12) {
+- /*
+- * TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default
+- * values from BSpec. In order to setting an optimal power
+- * consumption, lower than 4k resoluition mode needs to decrese
+- * IO_BUFFER_WAKE and FAST_WAKE. And higher than 4K resolution
+- * mode needs to increase IO_BUFFER_WAKE and FAST_WAKE.
+- */
+- val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
+- val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(7);
+- val |= TGL_EDP_PSR2_FAST_WAKE(7);
++ val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines);
++ val |= TGL_EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines);
+ } else if (DISPLAY_VER(dev_priv) >= 9) {
+- val |= EDP_PSR2_IO_BUFFER_WAKE(7);
+- val |= EDP_PSR2_FAST_WAKE(7);
++ val |= EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines);
++ val |= EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines);
+ }
+
+ if (intel_dp->psr.req_psr2_sdp_prior_scanline)
+@@ -755,11 +754,7 @@ tgl_dc3co_exitline_compute_config(struct intel_dp *intel_dp,
+ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp,
+ struct intel_crtc_state *crtc_state)
+ {
+- struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state);
+ struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+- struct intel_plane_state *plane_state;
+- struct intel_plane *plane;
+- int i;
+
+ if (!dev_priv->params.enable_psr2_sel_fetch &&
+ intel_dp->psr.debug != I915_PSR_DEBUG_ENABLE_SEL_FETCH) {
+@@ -774,14 +769,6 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp,
+ return false;
+ }
+
+- for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+- if (plane_state->uapi.rotation != DRM_MODE_ROTATE_0) {
+- drm_dbg_kms(&dev_priv->drm,
+- "PSR2 sel fetch not enabled, plane rotated\n");
+- return false;
+- }
+- }
+-
+ /* Wa_14010254185 Wa_14010103792 */
+ if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) {
+ drm_dbg_kms(&dev_priv->drm,
+@@ -853,6 +840,46 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d
+ return true;
+ }
+
++static bool _compute_psr2_wake_times(struct intel_dp *intel_dp,
++ struct intel_crtc_state *crtc_state)
++{
++ struct drm_i915_private *i915 = dp_to_i915(intel_dp);
++ int io_wake_lines, io_wake_time, fast_wake_lines, fast_wake_time;
++ u8 max_wake_lines;
++
++ if (DISPLAY_VER(i915) >= 12) {
++ io_wake_time = 42;
++ /*
++ * According to Bspec it's 42us, but based on testing
++ * it is not enough -> use 45 us.
++ */
++ fast_wake_time = 45;
++ max_wake_lines = 12;
++ } else {
++ io_wake_time = 50;
++ fast_wake_time = 32;
++ max_wake_lines = 8;
++ }
++
++ io_wake_lines = intel_usecs_to_scanlines(
++ &crtc_state->hw.adjusted_mode, io_wake_time);
++ fast_wake_lines = intel_usecs_to_scanlines(
++ &crtc_state->hw.adjusted_mode, fast_wake_time);
++
++ if (io_wake_lines > max_wake_lines ||
++ fast_wake_lines > max_wake_lines)
++ return false;
++
++ if (i915->params.psr_safest_params)
++ io_wake_lines = fast_wake_lines = max_wake_lines;
++
++ /* According to Bspec lower limit should be set as 7 lines. */
++ intel_dp->psr.io_wake_lines = max(io_wake_lines, 7);
++ intel_dp->psr.fast_wake_lines = max(fast_wake_lines, 7);
++
++ return true;
++}
++
+ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
+ struct intel_crtc_state *crtc_state)
+ {
+@@ -936,6 +963,26 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
+ return false;
+ }
+
++ /* Wa_16011303918:adl-p */
++ if (crtc_state->vrr.enable &&
++ IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) {
++ drm_dbg_kms(&dev_priv->drm,
++ "PSR2 not enabled, not compatible with HW stepping + VRR\n");
++ return false;
++ }
++
++ if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) {
++ drm_dbg_kms(&dev_priv->drm,
++ "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n");
++ return false;
++ }
++
++ if (!_compute_psr2_wake_times(intel_dp, crtc_state)) {
++ drm_dbg_kms(&dev_priv->drm,
++ "PSR2 not enabled, Unable to use long enough wake times\n");
++ return false;
++ }
++
+ if (HAS_PSR2_SEL_FETCH(dev_priv)) {
+ if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) &&
+ !HAS_PSR_HW_TRACKING(dev_priv)) {
+@@ -949,12 +996,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
+ if (!crtc_state->enable_psr2_sel_fetch &&
+ IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) {
+ drm_dbg_kms(&dev_priv->drm, "PSR2 HW tracking is not supported this Display stepping\n");
+- return false;
++ goto unsupported;
+ }
+
+ if (!psr2_granularity_check(intel_dp, crtc_state)) {
+ drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, SU granularity not compatible\n");
+- return false;
++ goto unsupported;
+ }
+
+ if (!crtc_state->enable_psr2_sel_fetch &&
+@@ -963,25 +1010,15 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
+ "PSR2 not enabled, resolution %dx%d > max supported %dx%d\n",
+ crtc_hdisplay, crtc_vdisplay,
+ psr_max_h, psr_max_v);
+- return false;
+- }
+-
+- if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) {
+- drm_dbg_kms(&dev_priv->drm,
+- "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n");
+- return false;
+- }
+-
+- /* Wa_16011303918:adl-p */
+- if (crtc_state->vrr.enable &&
+- IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) {
+- drm_dbg_kms(&dev_priv->drm,
+- "PSR2 not enabled, not compatible with HW stepping + VRR\n");
+- return false;
++ goto unsupported;
+ }
+
+ tgl_dc3co_exitline_compute_config(intel_dp, crtc_state);
+ return true;
++
++unsupported:
++ crtc_state->enable_psr2_sel_fetch = false;
++ return false;
+ }
+
+ void intel_psr_compute_config(struct intel_dp *intel_dp,
+@@ -993,7 +1030,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
+ int psr_setup_time;
+
+ /*
+- * Current PSR panels dont work reliably with VRR enabled
++ * Current PSR panels don't work reliably with VRR enabled
+ * So if VRR is enabled, do not enable PSR.
+ */
+ if (crtc_state->vrr.enable)
+@@ -1597,6 +1634,63 @@ static void intel_psr2_sel_fetch_pipe_alignment(const struct intel_crtc_state *c
+ drm_warn(&dev_priv->drm, "Missing PSR2 sel fetch alignment with DSC\n");
+ }
+
++/*
++ * FIXME: Not sure why but when moving the cursor fast it causes some artifacts
++ * of the cursor to be left in the cursor path, adding some pixels above the
++ * cursor to the damaged area fixes the issue.
++ */
++static void cursor_area_workaround(const struct intel_plane_state *new_plane_state,
++ struct drm_rect *damaged_area,
++ struct drm_rect *pipe_clip)
++{
++ const struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane);
++ int height;
++
++ if (plane->id != PLANE_CURSOR)
++ return;
++
++ height = drm_rect_height(&new_plane_state->uapi.dst) / 2;
++ damaged_area->y1 -= height;
++ damaged_area->y1 = max(damaged_area->y1, 0);
++
++ clip_area_update(pipe_clip, damaged_area);
++}
++
++/*
++ * TODO: Not clear how to handle planes with negative position,
++ * also planes are not updated if they have a negative X
++ * position so for now doing a full update in this cases
++ *
++ * Plane scaling and rotation is not supported by selective fetch and both
++ * properties can change without a modeset, so need to be check at every
++ * atomic commit.
++ */
++static bool psr2_sel_fetch_plane_state_supported(const struct intel_plane_state *plane_state)
++{
++ if (plane_state->uapi.dst.y1 < 0 ||
++ plane_state->uapi.dst.x1 < 0 ||
++ plane_state->scaler_id >= 0 ||
++ plane_state->uapi.rotation != DRM_MODE_ROTATE_0)
++ return false;
++
++ return true;
++}
++
++/*
++ * Check for pipe properties that is not supported by selective fetch.
++ *
++ * TODO: pipe scaling causes a modeset but skl_update_scaler_crtc() is executed
++ * after intel_psr_compute_config(), so for now keeping PSR2 selective fetch
++ * enabled and going to the full update path.
++ */
++static bool psr2_sel_fetch_pipe_state_supported(const struct intel_crtc_state *crtc_state)
++{
++ if (crtc_state->scaler_state.scaler_id >= 0)
++ return false;
++
++ return true;
++}
++
+ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ struct intel_crtc *crtc)
+ {
+@@ -1610,9 +1704,10 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ if (!crtc_state->enable_psr2_sel_fetch)
+ return 0;
+
+- ret = drm_atomic_add_affected_planes(&state->base, &crtc->base);
+- if (ret)
+- return ret;
++ if (!psr2_sel_fetch_pipe_state_supported(crtc_state)) {
++ full_update = true;
++ goto skip_sel_fetch_set_loop;
++ }
+
+ /*
+ * Calculate minimal selective fetch area of each plane and calculate
+@@ -1623,8 +1718,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
+ new_plane_state, i) {
+ struct drm_rect src, damaged_area = { .y1 = -1 };
+- struct drm_mode_rect *damaged_clips;
+- u32 num_clips, j;
++ struct drm_atomic_helper_damage_iter iter;
++ struct drm_rect clip;
+
+ if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc)
+ continue;
+@@ -1633,19 +1728,11 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ !old_plane_state->uapi.visible)
+ continue;
+
+- /*
+- * TODO: Not clear how to handle planes with negative position,
+- * also planes are not updated if they have a negative X
+- * position so for now doing a full update in this cases
+- */
+- if (new_plane_state->uapi.dst.y1 < 0 ||
+- new_plane_state->uapi.dst.x1 < 0) {
++ if (!psr2_sel_fetch_plane_state_supported(new_plane_state)) {
+ full_update = true;
+ break;
+ }
+
+- num_clips = drm_plane_get_damage_clips_count(&new_plane_state->uapi);
+-
+ /*
+ * If visibility or plane moved, mark the whole plane area as
+ * damaged as it needs to be complete redraw in the new and old
+@@ -1665,15 +1752,12 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ damaged_area.y2 = new_plane_state->uapi.dst.y2;
+ clip_area_update(&pipe_clip, &damaged_area);
+ }
++
++ cursor_area_workaround(new_plane_state, &damaged_area,
++ &pipe_clip);
+ continue;
+- } else if (new_plane_state->uapi.alpha != old_plane_state->uapi.alpha ||
+- (!num_clips &&
+- new_plane_state->uapi.fb != old_plane_state->uapi.fb)) {
+- /*
+- * If the plane don't have damaged areas but the
+- * framebuffer changed or alpha changed, mark the whole
+- * plane area as damaged.
+- */
++ } else if (new_plane_state->uapi.alpha != old_plane_state->uapi.alpha) {
++ /* If alpha changed mark the whole plane area as damaged */
+ damaged_area.y1 = new_plane_state->uapi.dst.y1;
+ damaged_area.y2 = new_plane_state->uapi.dst.y2;
+ clip_area_update(&pipe_clip, &damaged_area);
+@@ -1681,15 +1765,11 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ }
+
+ drm_rect_fp_to_int(&src, &new_plane_state->uapi.src);
+- damaged_clips = drm_plane_get_damage_clips(&new_plane_state->uapi);
+-
+- for (j = 0; j < num_clips; j++) {
+- struct drm_rect clip;
+
+- clip.x1 = damaged_clips[j].x1;
+- clip.y1 = damaged_clips[j].y1;
+- clip.x2 = damaged_clips[j].x2;
+- clip.y2 = damaged_clips[j].y2;
++ drm_atomic_helper_damage_iter_init(&iter,
++ &old_plane_state->uapi,
++ &new_plane_state->uapi);
++ drm_atomic_for_each_plane_damage(&iter, &clip) {
+ if (drm_rect_intersect(&clip, &src))
+ clip_area_update(&damaged_area, &clip);
+ }
+@@ -1705,6 +1785,10 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ if (full_update)
+ goto skip_sel_fetch_set_loop;
+
++ ret = drm_atomic_add_affected_planes(&state->base, &crtc->base);
++ if (ret)
++ return ret;
++
+ intel_psr2_sel_fetch_pipe_alignment(crtc_state, &pipe_clip);
+
+ /*
+@@ -1723,6 +1807,11 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
+ if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+ continue;
+
++ if (!psr2_sel_fetch_plane_state_supported(new_plane_state)) {
++ full_update = true;
++ break;
++ }
++
+ sel_fetch_area = &new_plane_state->psr2_sel_fetch_area;
+ sel_fetch_area->y1 = inter.y1 - new_plane_state->uapi.dst.y1;
+ sel_fetch_area->y2 = inter.y2 - new_plane_state->uapi.dst.y1;
+@@ -2022,7 +2111,7 @@ unlock:
+ }
+
+ /**
+- * intel_psr_invalidate - Invalidade PSR
++ * intel_psr_invalidate - Invalidate PSR
+ * @dev_priv: i915 device
+ * @frontbuffer_bits: frontbuffer plane tracking bits
+ * @origin: which operation caused the invalidate
+diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
+index 8a52b7a167746..015e5b806b6d5 100644
+--- a/drivers/gpu/drm/i915/display/intel_quirks.c
++++ b/drivers/gpu/drm/i915/display/intel_quirks.c
+@@ -190,6 +190,11 @@ static struct intel_quirk intel_quirks[] = {
+ /* ASRock ITX*/
+ { 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time },
+ { 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time },
++ /* ECS Liva Q2 */
++ { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
++ { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
++ /* HP Notebook - 14-r206nv */
++ { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness },
+ };
+
+ void intel_init_quirks(struct drm_i915_private *i915)
+diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
+index 6cb27599ea030..adb1693b15758 100644
+--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
++++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
+@@ -2762,13 +2762,10 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
+ if (!intel_sdvo_connector)
+ return false;
+
+- if (device == 0) {
+- intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS0;
++ if (device == 0)
+ intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS0;
+- } else if (device == 1) {
+- intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS1;
++ else if (device == 1)
+ intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS1;
+- }
+
+ intel_connector = &intel_sdvo_connector->base;
+ connector = &intel_connector->base;
+@@ -2823,7 +2820,6 @@ intel_sdvo_tv_init(struct intel_sdvo *intel_sdvo, int type)
+ encoder->encoder_type = DRM_MODE_ENCODER_TVDAC;
+ connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO;
+
+- intel_sdvo->controlled_output |= type;
+ intel_sdvo_connector->output_flag = type;
+
+ if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) {
+@@ -2864,13 +2860,10 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device)
+ encoder->encoder_type = DRM_MODE_ENCODER_DAC;
+ connector->connector_type = DRM_MODE_CONNECTOR_VGA;
+
+- if (device == 0) {
+- intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB0;
++ if (device == 0)
+ intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB0;
+- } else if (device == 1) {
+- intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB1;
++ else if (device == 1)
+ intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB1;
+- }
+
+ if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) {
+ kfree(intel_sdvo_connector);
+@@ -2900,13 +2893,10 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device)
+ encoder->encoder_type = DRM_MODE_ENCODER_LVDS;
+ connector->connector_type = DRM_MODE_CONNECTOR_LVDS;
+
+- if (device == 0) {
+- intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS0;
++ if (device == 0)
+ intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS0;
+- } else if (device == 1) {
+- intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS1;
++ else if (device == 1)
+ intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS1;
+- }
+
+ if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) {
+ kfree(intel_sdvo_connector);
+@@ -2939,16 +2929,39 @@ err:
+ return false;
+ }
+
++static u16 intel_sdvo_filter_output_flags(u16 flags)
++{
++ flags &= SDVO_OUTPUT_MASK;
++
++ /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/
++ if (!(flags & SDVO_OUTPUT_TMDS0))
++ flags &= ~SDVO_OUTPUT_TMDS1;
++
++ if (!(flags & SDVO_OUTPUT_RGB0))
++ flags &= ~SDVO_OUTPUT_RGB1;
++
++ if (!(flags & SDVO_OUTPUT_LVDS0))
++ flags &= ~SDVO_OUTPUT_LVDS1;
++
++ return flags;
++}
++
+ static bool
+ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags)
+ {
+- /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/
++ struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev);
++
++ flags = intel_sdvo_filter_output_flags(flags);
++
++ intel_sdvo->controlled_output = flags;
++
++ intel_sdvo_select_ddc_bus(i915, intel_sdvo);
+
+ if (flags & SDVO_OUTPUT_TMDS0)
+ if (!intel_sdvo_dvi_init(intel_sdvo, 0))
+ return false;
+
+- if ((flags & SDVO_TMDS_MASK) == SDVO_TMDS_MASK)
++ if (flags & SDVO_OUTPUT_TMDS1)
+ if (!intel_sdvo_dvi_init(intel_sdvo, 1))
+ return false;
+
+@@ -2969,7 +2982,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags)
+ if (!intel_sdvo_analog_init(intel_sdvo, 0))
+ return false;
+
+- if ((flags & SDVO_RGB_MASK) == SDVO_RGB_MASK)
++ if (flags & SDVO_OUTPUT_RGB1)
+ if (!intel_sdvo_analog_init(intel_sdvo, 1))
+ return false;
+
+@@ -2977,14 +2990,13 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags)
+ if (!intel_sdvo_lvds_init(intel_sdvo, 0))
+ return false;
+
+- if ((flags & SDVO_LVDS_MASK) == SDVO_LVDS_MASK)
++ if (flags & SDVO_OUTPUT_LVDS1)
+ if (!intel_sdvo_lvds_init(intel_sdvo, 1))
+ return false;
+
+- if ((flags & SDVO_OUTPUT_MASK) == 0) {
++ if (flags == 0) {
+ unsigned char bytes[2];
+
+- intel_sdvo->controlled_output = 0;
+ memcpy(bytes, &intel_sdvo->caps.output_flags, 2);
+ DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n",
+ SDVO_NAME(intel_sdvo),
+@@ -3396,8 +3408,6 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv,
+ */
+ intel_sdvo->base.cloneable = 0;
+
+- intel_sdvo_select_ddc_bus(dev_priv, intel_sdvo);
+-
+ /* Set the input timing to the screen. Assume always input 0. */
+ if (!intel_sdvo_set_target_input(intel_sdvo))
+ goto err_output;
+diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
+index 18b52b64af955..170690fc735b5 100644
+--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
++++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
+@@ -32,7 +32,7 @@ void intel_snps_phy_wait_for_calibration(struct drm_i915_private *dev_priv)
+ if (intel_de_wait_for_clear(dev_priv, ICL_PHY_MISC(phy),
+ DG2_PHY_DP_TX_ACK_MASK, 25))
+ DRM_ERROR("SNPS PHY %c failed to calibrate after 25ms.\n",
+- phy);
++ phy_name(phy));
+ }
+ }
+
+@@ -582,6 +582,1177 @@ static const struct intel_mpllb_state dg2_hdmi_148_5 = {
+ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
+ };
+
++/* values in the below table are calculted using the algo */
++static const struct intel_mpllb_state dg2_hdmi_25200 = {
++ .clock = 25200,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 128) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 41943) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2621),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_27027 = {
++ .clock = 27027,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 140) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 31876) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 46555),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_28320 = {
++ .clock = 28320,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 148) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 40894) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 30408),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_30240 = {
++ .clock = 30240,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 160) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 50331) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 42466),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_31500 = {
++ .clock = 31500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 4) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 68) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 26214),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_36000 = {
++ .clock = 36000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 4) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 82) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 39321) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 39320),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_40000 = {
++ .clock = 40000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 4) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 96) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 0),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_49500 = {
++ .clock = 49500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 4) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 1),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 126) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 13107) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13107),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_50000 = {
++ .clock = 50000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 4) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 1),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 128) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 0),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_57284 = {
++ .clock = 57284,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 4) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 150) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 42886) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 49701),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_58000 = {
++ .clock = 58000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 4) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 152) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 52428) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 52427),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_65000 = {
++ .clock = 65000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 72) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 0),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_71000 = {
++ .clock = 71000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 80) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 52428) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 52427),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_74176 = {
++ .clock = 74176,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 22334) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 43829),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_75000 = {
++ .clock = 75000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 88) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 0),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_78750 = {
++ .clock = 78750,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 94) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 0),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_85500 = {
++ .clock = 85500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 104) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 26214),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_88750 = {
++ .clock = 88750,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 1),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 110) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 0),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_106500 = {
++ .clock = 106500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 138) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 13107) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13107),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_108000 = {
++ .clock = 108000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 140) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 26214),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_115500 = {
++ .clock = 115500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 152) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 26214),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_119000 = {
++ .clock = 119000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 158) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 13107) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13107),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_135000 = {
++ .clock = 135000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 76) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 0),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_138500 = {
++ .clock = 138500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 78) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 26214),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_147160 = {
++ .clock = 147160,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 84) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 56623) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 6815),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_148352 = {
++ .clock = 148352,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 22334) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 43829),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_154000 = {
++ .clock = 154000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 13) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 90) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 39321) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 39320),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_162000 = {
++ .clock = 162000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 96) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 52428) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 52427),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_209800 = {
++ .clock = 209800,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 134) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 60293) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 7864),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_262750 = {
++ .clock = 262750,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 72) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 36044) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 52427),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_267300 = {
++ .clock = 267300,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 74) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 30146) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 36699),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_268500 = {
++ .clock = 268500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 74) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 45875) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13107),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_296703 = {
++ .clock = 296703,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 22321) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 36804),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_241500 = {
++ .clock = 241500,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 160) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 39321) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 39320),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_319890 = {
++ .clock = 319890,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 94) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 64094) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13631),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_497750 = {
++ .clock = 497750,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 0),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 166) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 36044) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 52427),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_592000 = {
++ .clock = 592000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 13107) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13107),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_593407 = {
++ .clock = 593407,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 0) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 22328) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 7549),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
++static const struct intel_mpllb_state dg2_hdmi_297 = {
++ .clock = 297000,
++ .ref_control =
++ REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
++ .mpllb_cp =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
++ .mpllb_div =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
++ .mpllb_div2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
++ .mpllb_fracn1 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
++ .mpllb_fracn2 =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) |
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 26214),
++ .mpllb_sscen =
++ REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
++};
++
+ static const struct intel_mpllb_state dg2_hdmi_594 = {
+ .clock = 594000,
+ .ref_control =
+@@ -616,7 +1787,46 @@ static const struct intel_mpllb_state *dg2_hdmi_tables[] = {
+ &dg2_hdmi_27_0,
+ &dg2_hdmi_74_25,
+ &dg2_hdmi_148_5,
++ &dg2_hdmi_297,
+ &dg2_hdmi_594,
++ &dg2_hdmi_25200,
++ &dg2_hdmi_27027,
++ &dg2_hdmi_28320,
++ &dg2_hdmi_30240,
++ &dg2_hdmi_31500,
++ &dg2_hdmi_36000,
++ &dg2_hdmi_40000,
++ &dg2_hdmi_49500,
++ &dg2_hdmi_50000,
++ &dg2_hdmi_57284,
++ &dg2_hdmi_58000,
++ &dg2_hdmi_65000,
++ &dg2_hdmi_71000,
++ &dg2_hdmi_74176,
++ &dg2_hdmi_75000,
++ &dg2_hdmi_78750,
++ &dg2_hdmi_85500,
++ &dg2_hdmi_88750,
++ &dg2_hdmi_106500,
++ &dg2_hdmi_108000,
++ &dg2_hdmi_115500,
++ &dg2_hdmi_119000,
++ &dg2_hdmi_135000,
++ &dg2_hdmi_138500,
++ &dg2_hdmi_147160,
++ &dg2_hdmi_148352,
++ &dg2_hdmi_154000,
++ &dg2_hdmi_162000,
++ &dg2_hdmi_209800,
++ &dg2_hdmi_241500,
++ &dg2_hdmi_262750,
++ &dg2_hdmi_267300,
++ &dg2_hdmi_268500,
++ &dg2_hdmi_296703,
++ &dg2_hdmi_319890,
++ &dg2_hdmi_497750,
++ &dg2_hdmi_592000,
++ &dg2_hdmi_593407,
+ NULL,
+ };
+
+diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
+index 3ffece568ed98..1b5b4d252d5b8 100644
+--- a/drivers/gpu/drm/i915/display/intel_tc.c
++++ b/drivers/gpu/drm/i915/display/intel_tc.c
+@@ -291,10 +291,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port)
+ static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port)
+ {
+ struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
++ enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
+ struct intel_uncore *uncore = &i915->uncore;
+ u32 val;
+
+- val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx));
++ val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port));
+ if (val == 0xffffffff) {
+ drm_dbg_kms(&i915->drm,
+ "Port %s: PHY in TCCOLD, assuming not complete\n",
+@@ -385,9 +386,9 @@ static bool icl_tc_phy_is_owned(struct intel_digital_port *dig_port)
+ PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia));
+ if (val == 0xffffffff) {
+ drm_dbg_kms(&i915->drm,
+- "Port %s: PHY in TCCOLD, assume safe mode\n",
++ "Port %s: PHY in TCCOLD, assume not owned\n",
+ dig_port->tc_port_name);
+- return true;
++ return false;
+ }
+
+ return val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx);
+diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
+index 724e7b04f3b63..b97b4b3b85e07 100644
+--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
++++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
+@@ -1473,7 +1473,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
+ u32 offset;
+ int ret;
+
+- if (w > max_width || w < min_width || h > max_height) {
++ if (w > max_width || w < min_width || h > max_height || h < 1) {
+ drm_dbg_kms(&dev_priv->drm,
+ "requested Y/RGB source size %dx%d outside limits (min: %dx1 max: %dx%d)\n",
+ w, h, min_width, max_width, max_height);
+diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
+index 0ee4ff341e25d..b27738df447d0 100644
+--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
++++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
+@@ -32,6 +32,7 @@
+
+ #include "i915_drv.h"
+ #include "intel_atomic.h"
++#include "intel_backlight.h"
+ #include "intel_connector.h"
+ #include "intel_crtc.h"
+ #include "intel_de.h"
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+index 166bb46408a9b..60f6a731f1bf6 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+@@ -442,6 +442,13 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
+ u16 idx, num_bonds;
+ int err, n;
+
++ if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) &&
++ !IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) {
++ drm_dbg(&i915->drm,
++ "Bonding on gen12+ aside from TGL, RKL, and ADL_S not supported\n");
++ return -ENODEV;
++ }
++
+ if (get_user(idx, &ext->virtual_index))
+ return -EFAULT;
+
+@@ -720,8 +727,9 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
+ case I915_CONTEXT_PARAM_PERSISTENCE:
+ if (args->size)
+ ret = -EINVAL;
+- ret = proto_context_set_persistence(fpriv->dev_priv, pc,
+- args->value);
++ else
++ ret = proto_context_set_persistence(fpriv->dev_priv, pc,
++ args->value);
+ break;
+
+ case I915_CONTEXT_PARAM_NO_ZEROMAP:
+@@ -989,6 +997,10 @@ void i915_gem_context_release(struct kref *ref)
+ trace_i915_context_free(ctx);
+ GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+
++ spin_lock(&ctx->i915->gem.contexts.lock);
++ list_del(&ctx->link);
++ spin_unlock(&ctx->i915->gem.contexts.lock);
++
+ if (ctx->syncobj)
+ drm_syncobj_put(ctx->syncobj);
+
+@@ -1220,10 +1232,6 @@ static void context_close(struct i915_gem_context *ctx)
+ */
+ lut_close(ctx);
+
+- spin_lock(&ctx->i915->gem.contexts.lock);
+- list_del(&ctx->link);
+- spin_unlock(&ctx->i915->gem.contexts.lock);
+-
+ mutex_unlock(&ctx->mutex);
+
+ /*
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+index afa34111de02e..af74c9c37c9cc 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+@@ -34,13 +34,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
+ goto err;
+ }
+
+- ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
++ ret = sg_alloc_table(st, obj->mm.pages->orig_nents, GFP_KERNEL);
+ if (ret)
+ goto err_free;
+
+ src = obj->mm.pages->sgl;
+ dst = st->sgl;
+- for (i = 0; i < obj->mm.pages->nents; i++) {
++ for (i = 0; i < obj->mm.pages->orig_nents; i++) {
+ sg_set_page(dst, sg_page(src), src->length, 0);
+ dst = sg_next(dst);
+ src = sg_next(src);
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+index 1aa249908b645..0d480867fc0c2 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+@@ -1060,6 +1060,47 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+ return &i915->ggtt;
+ }
+
++static void reloc_cache_unmap(struct reloc_cache *cache)
++{
++ void *vaddr;
++
++ if (!cache->vaddr)
++ return;
++
++ vaddr = unmask_page(cache->vaddr);
++ if (cache->vaddr & KMAP)
++ kunmap_atomic(vaddr);
++ else
++ io_mapping_unmap_atomic((void __iomem *)vaddr);
++}
++
++static void reloc_cache_remap(struct reloc_cache *cache,
++ struct drm_i915_gem_object *obj)
++{
++ void *vaddr;
++
++ if (!cache->vaddr)
++ return;
++
++ if (cache->vaddr & KMAP) {
++ struct page *page = i915_gem_object_get_page(obj, cache->page);
++
++ vaddr = kmap_atomic(page);
++ cache->vaddr = unmask_flags(cache->vaddr) |
++ (unsigned long)vaddr;
++ } else {
++ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
++ unsigned long offset;
++
++ offset = cache->node.start;
++ if (!drm_mm_node_allocated(&cache->node))
++ offset += cache->page << PAGE_SHIFT;
++
++ cache->vaddr = (unsigned long)
++ io_mapping_map_atomic_wc(&ggtt->iomap, offset);
++ }
++}
++
+ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
+ {
+ void *vaddr;
+@@ -1324,10 +1365,17 @@ eb_relocate_entry(struct i915_execbuffer *eb,
+ * batchbuffers.
+ */
+ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
+- GRAPHICS_VER(eb->i915) == 6) {
++ GRAPHICS_VER(eb->i915) == 6 &&
++ !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) {
++ struct i915_vma *vma = target->vma;
++
++ reloc_cache_unmap(&eb->reloc_cache);
++ mutex_lock(&vma->vm->mutex);
+ err = i915_vma_bind(target->vma,
+ target->vma->obj->cache_level,
+ PIN_GLOBAL, NULL);
++ mutex_unlock(&vma->vm->mutex);
++ reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
+ if (err)
+ return err;
+ }
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+index e5ae9c06510cc..3c8de65bfb393 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+@@ -143,24 +143,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
+ .put_pages = i915_gem_object_put_pages_internal,
+ };
+
+-/**
+- * i915_gem_object_create_internal: create an object with volatile pages
+- * @i915: the i915 device
+- * @size: the size in bytes of backing storage to allocate for the object
+- *
+- * Creates a new object that wraps some internal memory for private use.
+- * This object is not backed by swappable storage, and as such its contents
+- * are volatile and only valid whilst pinned. If the object is reaped by the
+- * shrinker, its pages and data will be discarded. Equally, it is not a full
+- * GEM object and so not valid for access from userspace. This makes it useful
+- * for hardware interfaces like ringbuffers (which are pinned from the time
+- * the request is written to the time the hardware stops accessing it), but
+- * not for contexts (which need to be preserved when not active for later
+- * reuse). Note that it is not cleared upon allocation.
+- */
+ struct drm_i915_gem_object *
+-i915_gem_object_create_internal(struct drm_i915_private *i915,
+- phys_addr_t size)
++__i915_gem_object_create_internal(struct drm_i915_private *i915,
++ const struct drm_i915_gem_object_ops *ops,
++ phys_addr_t size)
+ {
+ static struct lock_class_key lock_class;
+ struct drm_i915_gem_object *obj;
+@@ -177,7 +163,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
+ return ERR_PTR(-ENOMEM);
+
+ drm_gem_private_object_init(&i915->drm, &obj->base, size);
+- i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0);
++ i915_gem_object_init(obj, ops, &lock_class, 0);
+ obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+
+ /*
+@@ -197,3 +183,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
+
+ return obj;
+ }
++
++/**
++ * i915_gem_object_create_internal: create an object with volatile pages
++ * @i915: the i915 device
++ * @size: the size in bytes of backing storage to allocate for the object
++ *
++ * Creates a new object that wraps some internal memory for private use.
++ * This object is not backed by swappable storage, and as such its contents
++ * are volatile and only valid whilst pinned. If the object is reaped by the
++ * shrinker, its pages and data will be discarded. Equally, it is not a full
++ * GEM object and so not valid for access from userspace. This makes it useful
++ * for hardware interfaces like ringbuffers (which are pinned from the time
++ * the request is written to the time the hardware stops accessing it), but
++ * not for contexts (which need to be preserved when not active for later
++ * reuse). Note that it is not cleared upon allocation.
++ */
++struct drm_i915_gem_object *
++i915_gem_object_create_internal(struct drm_i915_private *i915,
++ phys_addr_t size)
++{
++ return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
++}
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+index 5130e8ed95647..28e07040cf47a 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+@@ -66,7 +66,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+ * mmap ioctl is disallowed for all discrete platforms,
+ * and for all platforms with GRAPHICS_VER > 12.
+ */
+- if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12)
++ if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0))
+ return -EOPNOTSUPP;
+
+ if (args->flags & ~(I915_MMAP_WC))
+@@ -438,7 +438,7 @@ vm_access(struct vm_area_struct *area, unsigned long addr,
+ return -EACCES;
+
+ addr -= area->vm_start;
+- if (addr >= obj->base.size)
++ if (range_overflows_t(u64, addr, len, obj->base.size))
+ return -EINVAL;
+
+ i915_gem_ww_ctx_init(&ww, true);
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
+index 6fb9afb65034b..5f48d5ea5c158 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
+@@ -224,6 +224,12 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj)
+ GEM_BUG_ON(vma->obj != obj);
+ spin_unlock(&obj->vma.lock);
+
++ /* Verify that the vma is unbound under the vm mutex. */
++ mutex_lock(&vma->vm->mutex);
++ atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
++ __i915_vma_unbind(vma);
++ mutex_unlock(&vma->vm->mutex);
++
+ __i915_vma_put(vma);
+
+ spin_lock(&obj->vma.lock);
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+index 2471f36aaff38..3012cbe5b0b7c 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+@@ -298,6 +298,7 @@ struct drm_i915_gem_object {
+ I915_BO_ALLOC_USER)
+ #define I915_BO_READONLY BIT(4)
+ #define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */
++#define I915_BO_WAS_BOUND_BIT 6
+
+ /**
+ * @mem_flags - Mutable placement-related flags
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+index 8eb1c3a6fc9cd..9053cea3395a6 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+@@ -10,6 +10,8 @@
+ #include "i915_gem_lmem.h"
+ #include "i915_gem_mman.h"
+
++#include "gt/intel_gt.h"
++
+ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages,
+ unsigned int sg_page_sizes)
+@@ -160,7 +162,6 @@ retry:
+ /* Immediately discard the backing storage */
+ void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+ {
+- drm_gem_free_mmap_offset(&obj->base);
+ if (obj->ops->truncate)
+ obj->ops->truncate(obj);
+ }
+@@ -218,6 +219,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+ __i915_gem_object_reset_page_iter(obj);
+ obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
++ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
++ struct drm_i915_private *i915 = to_i915(obj->base.dev);
++ intel_wakeref_t wakeref;
++
++ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
++ intel_gt_invalidate_tlbs(&i915->gt);
++ }
++
+ return pages;
+ }
+
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+index 11f072193f3b1..827f2f9dcda6a 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+@@ -533,7 +533,7 @@ static int shmem_object_init(struct intel_memory_region *mem,
+ mapping_set_gfp_mask(mapping, mask);
+ GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+- i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0);
++ i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags);
+ obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+ obj->write_domain = I915_GEM_DOMAIN_CPU;
+ obj->read_domains = I915_GEM_DOMAIN_CPU;
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+index ef4d0f7dc1186..d4897ce0ad0c0 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+@@ -294,10 +294,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+ spin_unlock(&obj->vma.lock);
+
+ obj->tiling_and_stride = tiling | stride;
+- i915_gem_object_unlock(obj);
+-
+- /* Force the fence to be reacquired for GTT access */
+- i915_gem_object_release_mmap_gtt(obj);
+
+ /* Try to preallocate memory required to save swizzling on put-pages */
+ if (i915_gem_object_needs_bit17_swizzle(obj)) {
+@@ -310,6 +306,11 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+ obj->bit_17 = NULL;
+ }
+
++ i915_gem_object_unlock(obj);
++
++ /* Force the fence to be reacquired for GTT access */
++ i915_gem_object_release_mmap_gtt(obj);
++
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+index 6ea13159bffcc..4b823fbfe76a1 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+@@ -759,11 +759,9 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
+ if (obj->mm.madv != I915_MADV_WILLNEED) {
+ bo->priority = I915_TTM_PRIO_PURGE;
+ } else if (!i915_gem_object_has_pages(obj)) {
+- if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
+- bo->priority = I915_TTM_PRIO_HAS_PAGES;
++ bo->priority = I915_TTM_PRIO_NO_PAGES;
+ } else {
+- if (bo->priority > I915_TTM_PRIO_NO_PAGES)
+- bo->priority = I915_TTM_PRIO_NO_PAGES;
++ bo->priority = I915_TTM_PRIO_HAS_PAGES;
+ }
+
+ ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
+diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+index 8eb5050f8cb3e..907e02d4085cc 100644
+--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
++++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+@@ -182,97 +182,108 @@ out_file:
+ }
+
+ struct parallel_switch {
+- struct task_struct *tsk;
++ struct kthread_worker *worker;
++ struct kthread_work work;
+ struct intel_context *ce[2];
++ int result;
+ };
+
+-static int __live_parallel_switch1(void *data)
++static void __live_parallel_switch1(struct kthread_work *work)
+ {
+- struct parallel_switch *arg = data;
++ struct parallel_switch *arg =
++ container_of(work, typeof(*arg), work);
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+
+ count = 0;
++ arg->result = 0;
+ do {
+ struct i915_request *rq = NULL;
+- int err, n;
++ int n;
+
+- err = 0;
+- for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
++ for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
+ struct i915_request *prev = rq;
+
+ rq = i915_request_create(arg->ce[n]);
+ if (IS_ERR(rq)) {
+ i915_request_put(prev);
+- return PTR_ERR(rq);
++ arg->result = PTR_ERR(rq);
++ break;
+ }
+
+ i915_request_get(rq);
+ if (prev) {
+- err = i915_request_await_dma_fence(rq, &prev->fence);
++ arg->result =
++ i915_request_await_dma_fence(rq,
++ &prev->fence);
+ i915_request_put(prev);
+ }
+
+ i915_request_add(rq);
+ }
+- if (i915_request_wait(rq, 0, HZ / 5) < 0)
+- err = -ETIME;
++
++ if (IS_ERR_OR_NULL(rq))
++ break;
++
++ if (i915_request_wait(rq, 0, HZ) < 0)
++ arg->result = -ETIME;
++
+ i915_request_put(rq);
+- if (err)
+- return err;
+
+ count++;
+- } while (!__igt_timeout(end_time, NULL));
++ } while (!arg->result && !__igt_timeout(end_time, NULL));
+
+- pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
+- return 0;
++ pr_info("%s: %lu switches (sync) <%d>\n",
++ arg->ce[0]->engine->name, count, arg->result);
+ }
+
+-static int __live_parallel_switchN(void *data)
++static void __live_parallel_switchN(struct kthread_work *work)
+ {
+- struct parallel_switch *arg = data;
++ struct parallel_switch *arg =
++ container_of(work, typeof(*arg), work);
+ struct i915_request *rq = NULL;
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+ int n;
+
+ count = 0;
++ arg->result = 0;
+ do {
+- for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
++ for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
+ struct i915_request *prev = rq;
+- int err = 0;
+
+ rq = i915_request_create(arg->ce[n]);
+ if (IS_ERR(rq)) {
+ i915_request_put(prev);
+- return PTR_ERR(rq);
++ arg->result = PTR_ERR(rq);
++ break;
+ }
+
+ i915_request_get(rq);
+ if (prev) {
+- err = i915_request_await_dma_fence(rq, &prev->fence);
++ arg->result =
++ i915_request_await_dma_fence(rq,
++ &prev->fence);
+ i915_request_put(prev);
+ }
+
+ i915_request_add(rq);
+- if (err) {
+- i915_request_put(rq);
+- return err;
+- }
+ }
+
+ count++;
+- } while (!__igt_timeout(end_time, NULL));
+- i915_request_put(rq);
++ } while (!arg->result && !__igt_timeout(end_time, NULL));
+
+- pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
+- return 0;
++ if (!IS_ERR_OR_NULL(rq))
++ i915_request_put(rq);
++
++ pr_info("%s: %lu switches (many) <%d>\n",
++ arg->ce[0]->engine->name, count, arg->result);
+ }
+
+ static int live_parallel_switch(void *arg)
+ {
+ struct drm_i915_private *i915 = arg;
+- static int (* const func[])(void *arg) = {
++ static void (* const func[])(struct kthread_work *) = {
+ __live_parallel_switch1,
+ __live_parallel_switchN,
+ NULL,
+@@ -280,7 +291,7 @@ static int live_parallel_switch(void *arg)
+ struct parallel_switch *data = NULL;
+ struct i915_gem_engines *engines;
+ struct i915_gem_engines_iter it;
+- int (* const *fn)(void *arg);
++ void (* const *fn)(struct kthread_work *);
+ struct i915_gem_context *ctx;
+ struct intel_context *ce;
+ struct file *file;
+@@ -338,8 +349,10 @@ static int live_parallel_switch(void *arg)
+ continue;
+
+ ce = intel_context_create(data[m].ce[0]->engine);
+- if (IS_ERR(ce))
++ if (IS_ERR(ce)) {
++ err = PTR_ERR(ce);
+ goto out;
++ }
+
+ err = intel_context_pin(ce);
+ if (err) {
+@@ -351,9 +364,24 @@ static int live_parallel_switch(void *arg)
+ }
+ }
+
++ for (n = 0; n < count; n++) {
++ struct kthread_worker *worker;
++
++ if (!data[n].ce[0])
++ continue;
++
++ worker = kthread_create_worker(0, "igt/parallel:%s",
++ data[n].ce[0]->engine->name);
++ if (IS_ERR(worker)) {
++ err = PTR_ERR(worker);
++ goto out;
++ }
++
++ data[n].worker = worker;
++ }
++
+ for (fn = func; !err && *fn; fn++) {
+ struct igt_live_test t;
+- int n;
+
+ err = igt_live_test_begin(&t, i915, __func__, "");
+ if (err)
+@@ -363,34 +391,23 @@ static int live_parallel_switch(void *arg)
+ if (!data[n].ce[0])
+ continue;
+
+- data[n].tsk = kthread_run(*fn, &data[n],
+- "igt/parallel:%s",
+- data[n].ce[0]->engine->name);
+- if (IS_ERR(data[n].tsk)) {
+- err = PTR_ERR(data[n].tsk);
+- break;
+- }
+- get_task_struct(data[n].tsk);
++ data[n].result = 0;
++ kthread_init_work(&data[n].work, *fn);
++ kthread_queue_work(data[n].worker, &data[n].work);
+ }
+
+- yield(); /* start all threads before we kthread_stop() */
+-
+ for (n = 0; n < count; n++) {
+- int status;
+-
+- if (IS_ERR_OR_NULL(data[n].tsk))
+- continue;
+-
+- status = kthread_stop(data[n].tsk);
+- if (status && !err)
+- err = status;
+-
+- put_task_struct(data[n].tsk);
+- data[n].tsk = NULL;
++ if (data[n].ce[0]) {
++ kthread_flush_work(&data[n].work);
++ if (data[n].result && !err)
++ err = data[n].result;
++ }
+ }
+
+- if (igt_live_test_end(&t))
+- err = -EIO;
++ if (igt_live_test_end(&t)) {
++ err = err ?: -EIO;
++ break;
++ }
+ }
+
+ out:
+@@ -402,6 +419,9 @@ out:
+ intel_context_unpin(data[n].ce[m]);
+ intel_context_put(data[n].ce[m]);
+ }
++
++ if (data[n].worker)
++ kthread_destroy_worker(data[n].worker);
+ }
+ kfree(data);
+ out_file:
+diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+index 1aee5e6b1b23f..b257666a26fc2 100644
+--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
++++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+@@ -244,6 +244,7 @@ err_scratch1:
+ i915_gem_object_put(vm->scratch[1]);
+ err_scratch0:
+ i915_gem_object_put(vm->scratch[0]);
++ vm->scratch[0] = NULL;
+ return ret;
+ }
+
+@@ -262,15 +263,13 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
+ {
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+
+- __i915_vma_put(ppgtt->vma);
+-
+ gen6_ppgtt_free_pd(ppgtt);
+ free_scratch(vm);
+
+- mutex_destroy(&ppgtt->flush);
+- mutex_destroy(&ppgtt->pin_mutex);
++ if (ppgtt->base.pd)
++ free_pd(&ppgtt->base.vm, ppgtt->base.pd);
+
+- free_pd(&ppgtt->base.vm, ppgtt->base.pd);
++ mutex_destroy(&ppgtt->flush);
+ }
+
+ static int pd_vma_set_pages(struct i915_vma *vma)
+@@ -331,37 +330,6 @@ static const struct i915_vma_ops pd_vma_ops = {
+ .unbind_vma = pd_vma_unbind,
+ };
+
+-static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
+-{
+- struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
+- struct i915_vma *vma;
+-
+- GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+- GEM_BUG_ON(size > ggtt->vm.total);
+-
+- vma = i915_vma_alloc();
+- if (!vma)
+- return ERR_PTR(-ENOMEM);
+-
+- i915_active_init(&vma->active, NULL, NULL, 0);
+-
+- kref_init(&vma->ref);
+- mutex_init(&vma->pages_mutex);
+- vma->vm = i915_vm_get(&ggtt->vm);
+- vma->ops = &pd_vma_ops;
+- vma->private = ppgtt;
+-
+- vma->size = size;
+- vma->fence_size = size;
+- atomic_set(&vma->flags, I915_VMA_GGTT);
+- vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
+-
+- INIT_LIST_HEAD(&vma->obj_link);
+- INIT_LIST_HEAD(&vma->closed_link);
+-
+- return vma;
+-}
+-
+ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
+ {
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+@@ -378,24 +346,85 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
+ if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
+ return 0;
+
+- if (mutex_lock_interruptible(&ppgtt->pin_mutex))
+- return -EINTR;
++ /* grab the ppgtt resv to pin the object */
++ err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
++ if (err)
++ return err;
+
+ /*
+ * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
+ * allocator works in address space sizes, so it's multiplied by page
+ * size. We allocate at the top of the GTT to avoid fragmentation.
+ */
+- err = 0;
+- if (!atomic_read(&ppgtt->pin_count))
++ if (!atomic_read(&ppgtt->pin_count)) {
+ err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
++
++ GEM_BUG_ON(ppgtt->vma->fence);
++ clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
++ }
+ if (!err)
+ atomic_inc(&ppgtt->pin_count);
+- mutex_unlock(&ppgtt->pin_mutex);
+
+ return err;
+ }
+
++static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
++{
++ obj->mm.pages = ZERO_SIZE_PTR;
++ return 0;
++}
++
++static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
++ struct sg_table *pages)
++{
++}
++
++static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
++ .name = "pd_dummy_obj",
++ .get_pages = pd_dummy_obj_get_pages,
++ .put_pages = pd_dummy_obj_put_pages,
++};
++
++static struct i915_page_directory *
++gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
++{
++ struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
++ struct i915_page_directory *pd;
++ int err;
++
++ pd = __alloc_pd(I915_PDES);
++ if (unlikely(!pd))
++ return ERR_PTR(-ENOMEM);
++
++ pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
++ &pd_dummy_obj_ops,
++ I915_PDES * SZ_4K);
++ if (IS_ERR(pd->pt.base)) {
++ err = PTR_ERR(pd->pt.base);
++ pd->pt.base = NULL;
++ goto err_pd;
++ }
++
++ pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
++ pd->pt.base->shares_resv_from = &ppgtt->base.vm;
++
++ ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
++ if (IS_ERR(ppgtt->vma)) {
++ err = PTR_ERR(ppgtt->vma);
++ ppgtt->vma = NULL;
++ goto err_pd;
++ }
++
++ /* The dummy object we create is special, override ops.. */
++ ppgtt->vma->ops = &pd_vma_ops;
++ ppgtt->vma->private = ppgtt;
++ return pd;
++
++err_pd:
++ free_pd(&ppgtt->base.vm, pd);
++ return ERR_PTR(err);
++}
++
+ void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+ {
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+@@ -427,7 +456,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&ppgtt->flush);
+- mutex_init(&ppgtt->pin_mutex);
+
+ ppgtt_init(&ppgtt->base, gt);
+ ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
+@@ -442,30 +470,19 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
+ ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
+ ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
+
+- ppgtt->base.pd = __alloc_pd(I915_PDES);
+- if (!ppgtt->base.pd) {
+- err = -ENOMEM;
+- goto err_free;
+- }
+-
+ err = gen6_ppgtt_init_scratch(ppgtt);
+ if (err)
+- goto err_pd;
++ goto err_put;
+
+- ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
+- if (IS_ERR(ppgtt->vma)) {
+- err = PTR_ERR(ppgtt->vma);
+- goto err_scratch;
++ ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
++ if (IS_ERR(ppgtt->base.pd)) {
++ err = PTR_ERR(ppgtt->base.pd);
++ goto err_put;
+ }
+
+ return &ppgtt->base;
+
+-err_scratch:
+- free_scratch(&ppgtt->base.vm);
+-err_pd:
+- free_pd(&ppgtt->base.vm, ppgtt->base.pd);
+-err_free:
+- mutex_destroy(&ppgtt->pin_mutex);
+- kfree(ppgtt);
++err_put:
++ i915_vm_put(&ppgtt->base.vm);
+ return ERR_PTR(err);
+ }
+diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
+index 6a61a5c3a85a6..9b498ca76ac6b 100644
+--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
++++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
+@@ -19,7 +19,6 @@ struct gen6_ppgtt {
+ u32 pp_dir;
+
+ atomic_t pin_count;
+- struct mutex pin_mutex;
+
+ bool scan_for_unused_pt;
+ };
+diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+index 6e0e52eeb87a6..0cf604c5a6c24 100644
+--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
++++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+@@ -196,7 +196,10 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+ if (intel_vgpu_active(vm->i915))
+ gen8_ppgtt_notify_vgt(ppgtt, false);
+
+- __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
++ if (ppgtt->pd)
++ __gen8_ppgtt_cleanup(vm, ppgtt->pd,
++ gen8_pd_top_count(vm), vm->top);
++
+ free_scratch(vm);
+ }
+
+@@ -656,8 +659,10 @@ static int gen8_init_scratch(struct i915_address_space *vm)
+ struct drm_i915_gem_object *obj;
+
+ obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+- if (IS_ERR(obj))
++ if (IS_ERR(obj)) {
++ ret = PTR_ERR(obj);
+ goto free_scratch;
++ }
+
+ ret = map_pt_dma(vm, obj);
+ if (ret) {
+@@ -676,7 +681,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
+ free_scratch:
+ while (i--)
+ i915_gem_object_put(vm->scratch[i]);
+- return -ENOMEM;
++ vm->scratch[0] = NULL;
++ return ret;
+ }
+
+ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
+@@ -753,6 +759,7 @@ err_pd:
+ */
+ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+ {
++ struct i915_page_directory *pd;
+ struct i915_ppgtt *ppgtt;
+ int err;
+
+@@ -779,44 +786,39 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+ else
+ ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
+
++ ppgtt->vm.pte_encode = gen8_pte_encode;
++
++ ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
++ ppgtt->vm.insert_entries = gen8_ppgtt_insert;
++ ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
++ ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
++ ppgtt->vm.clear_range = gen8_ppgtt_clear;
++ ppgtt->vm.foreach = gen8_ppgtt_foreach;
++ ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
++
+ err = gen8_init_scratch(&ppgtt->vm);
+ if (err)
+- goto err_free;
++ goto err_put;
+
+- ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
+- if (IS_ERR(ppgtt->pd)) {
+- err = PTR_ERR(ppgtt->pd);
+- goto err_free_scratch;
++ pd = gen8_alloc_top_pd(&ppgtt->vm);
++ if (IS_ERR(pd)) {
++ err = PTR_ERR(pd);
++ goto err_put;
+ }
++ ppgtt->pd = pd;
+
+ if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+ err = gen8_preallocate_top_level_pdp(ppgtt);
+ if (err)
+- goto err_free_pd;
++ goto err_put;
+ }
+
+- ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+- ppgtt->vm.insert_entries = gen8_ppgtt_insert;
+- ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
+- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
+- ppgtt->vm.clear_range = gen8_ppgtt_clear;
+- ppgtt->vm.foreach = gen8_ppgtt_foreach;
+-
+- ppgtt->vm.pte_encode = gen8_pte_encode;
+-
+ if (intel_vgpu_active(gt->i915))
+ gen8_ppgtt_notify_vgt(ppgtt, true);
+
+- ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
+-
+ return ppgtt;
+
+-err_free_pd:
+- __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+- gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
+-err_free_scratch:
+- free_scratch(&ppgtt->vm);
+-err_free:
+- kfree(ppgtt);
++err_put:
++ i915_vm_put(&ppgtt->vm);
+ return ERR_PTR(err);
+ }
+diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
+index e54351a170e2c..a63631ea0ec47 100644
+--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
+@@ -152,6 +152,14 @@ struct intel_context {
+ /** sseu: Control eu/slice partitioning */
+ struct intel_sseu sseu;
+
++ /**
++ * pinned_contexts_link: List link for the engine's pinned contexts.
++ * This is only used if this is a perma-pinned kernel context and
++ * the list is assumed to only be manipulated during driver load
++ * or unload time so no mutex protection currently.
++ */
++ struct list_head pinned_contexts_link;
++
+ u8 wa_bb_page; /* if set, page num reserved for context workarounds */
+
+ struct {
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+index 0d9105a31d84e..eb99441e0ada0 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+@@ -320,6 +320,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
+
+ BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
+
++ INIT_LIST_HEAD(&engine->pinned_contexts_list);
+ engine->id = id;
+ engine->legacy_idx = INVALID_ENGINE;
+ engine->mask = BIT(id);
+@@ -875,6 +876,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
+ return ERR_PTR(err);
+ }
+
++ list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);
++
+ /*
+ * Give our perma-pinned kernel timelines a separate lockdep class,
+ * so that we can use them from within the normal user timelines
+@@ -897,6 +900,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce)
+ list_del(&ce->timeline->engine_link);
+ mutex_unlock(&hwsp->vm->mutex);
+
++ list_del(&ce->pinned_contexts_link);
+ intel_context_unpin(ce);
+ intel_context_put(ce);
+ }
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+index 1f07ac4e0672a..dacd627737359 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+@@ -298,6 +298,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
+ intel_engine_init_heartbeat(engine);
+ }
+
++/**
++ * intel_engine_reset_pinned_contexts - Reset the pinned contexts of
++ * an engine.
++ * @engine: The engine whose pinned contexts we want to reset.
++ *
++ * Typically the pinned context LMEM images lose or get their content
++ * corrupted on suspend. This function resets their images.
++ */
++void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine)
++{
++ struct intel_context *ce;
++
++ list_for_each_entry(ce, &engine->pinned_contexts_list,
++ pinned_contexts_link) {
++ /* kernel context gets reset at __engine_unpark() */
++ if (ce == engine->kernel_context)
++ continue;
++
++ dbg_poison_ce(ce);
++ ce->ops->reset(ce);
++ }
++}
++
+ #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+ #include "selftest_engine_pm.c"
+ #endif
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+index 70ea46d6cfb00..8520c595f5e18 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+@@ -69,4 +69,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine)
+
+ void intel_engine_init__pm(struct intel_engine_cs *engine);
+
++void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine);
++
+ #endif /* INTEL_ENGINE_PM_H */
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
+index ed91bcff20eb5..bf5aeb97a4586 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
+@@ -143,6 +143,21 @@ struct intel_engine_execlists {
+ */
+ struct timer_list preempt;
+
++ /**
++ * @preempt_target: active request at the time of the preemption request
++ *
++ * We force a preemption to occur if the pending contexts have not
++ * been promoted to active upon receipt of the CS ack event within
++ * the timeout. This timeout maybe chosen based on the target,
++ * using a very short timeout if the context is no longer schedulable.
++ * That short timeout may not be applicable to other contexts, so
++ * if a context switch should happen within before the preemption
++ * timeout, we may shoot early at an innocent context. To prevent this,
++ * we record which context was active at the time of the preemption
++ * request and only reset that context upon the timeout.
++ */
++ const struct i915_request *preempt_target;
++
+ /**
+ * @ccid: identifier for contexts submitted to this engine
+ */
+@@ -304,6 +319,13 @@ struct intel_engine_cs {
+
+ struct intel_context *kernel_context; /* pinned */
+
++ /**
++ * pinned_contexts_list: List of pinned contexts. This list is only
++ * assumed to be manipulated during driver load- or unload time and
++ * does therefore not have any additional protection.
++ */
++ struct list_head pinned_contexts_list;
++
+ intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
+ struct {
+diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+index de5f9c86b9a44..773ff51218335 100644
+--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+@@ -1225,6 +1225,9 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
+ if (!rq)
+ return 0;
+
++ /* Only allow ourselves to force reset the currently active context */
++ engine->execlists.preempt_target = rq;
++
+ /* Force a fast reset for terminated contexts (ignoring sysfs!) */
+ if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
+ return 1;
+@@ -2140,10 +2143,6 @@ static void __execlists_unhold(struct i915_request *rq)
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
+- /* Propagate any change in error status */
+- if (rq->fence.error)
+- i915_request_set_error_once(w, rq->fence.error);
+-
+ if (w->engine != rq->engine)
+ continue;
+
+@@ -2405,8 +2404,24 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
+ GEM_BUG_ON(inactive - post > ARRAY_SIZE(post));
+
+ if (unlikely(preempt_timeout(engine))) {
++ const struct i915_request *rq = *engine->execlists.active;
++
++ /*
++ * If after the preempt-timeout expired, we are still on the
++ * same active request/context as before we initiated the
++ * preemption, reset the engine.
++ *
++ * However, if we have processed a CS event to switch contexts,
++ * but not yet processed the CS event for the pending
++ * preemption, reset the timer allowing the new context to
++ * gracefully exit.
++ */
+ cancel_timer(&engine->execlists.preempt);
+- engine->execlists.error_interrupt |= ERROR_PREEMPT;
++ if (rq == engine->execlists.preempt_target)
++ engine->execlists.error_interrupt |= ERROR_PREEMPT;
++ else
++ set_timer_ms(&engine->execlists.preempt,
++ active_preempt_timeout(engine, rq));
+ }
+
+ if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
+@@ -2791,6 +2806,8 @@ static void execlists_sanitize(struct intel_engine_cs *engine)
+
+ /* And scrub the dirty cachelines for the HWSP */
+ clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
++
++ intel_engine_reset_pinned_contexts(engine);
+ }
+
+ static void enable_error_interrupt(struct intel_engine_cs *engine)
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
+index 62d40c9866427..b2a003127d319 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt.c
+@@ -29,6 +29,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+
+ spin_lock_init(&gt->irq_lock);
+
++ mutex_init(&gt->tlb_invalidate_lock);
++
+ INIT_LIST_HEAD(&gt->closed_vma);
+ spin_lock_init(&gt->closed_lock);
+
+@@ -648,8 +650,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+ return -EINTR;
+ }
+
+- return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
+- remaining_timeout);
++ if (timeout)
++ return timeout;
++
++ if (remaining_timeout < 0)
++ remaining_timeout = 0;
++
++ return intel_uc_wait_for_idle(&gt->uc, remaining_timeout);
+ }
+
+ int intel_gt_init(struct intel_gt *gt)
+@@ -702,12 +709,12 @@ int intel_gt_init(struct intel_gt *gt)
+ if (err)
+ goto err_gt;
+
+- intel_uc_init_late(&gt->uc);
+-
+ err = i915_inject_probe_error(gt->i915, -EIO);
+ if (err)
+ goto err_gt;
+
++ intel_uc_init_late(&gt->uc);
++
+ intel_migrate_init(&gt->migrate, gt);
+
+ goto out_fw;
+@@ -895,3 +902,123 @@ void intel_gt_info_print(const struct intel_gt_info *info,
+
+ intel_sseu_dump(&info->sseu, p);
+ }
++
++struct reg_and_bit {
++ i915_reg_t reg;
++ u32 bit;
++};
++
++static struct reg_and_bit
++get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
++ const i915_reg_t *regs, const unsigned int num)
++{
++ const unsigned int class = engine->class;
++ struct reg_and_bit rb = { };
++
++ if (drm_WARN_ON_ONCE(&engine->i915->drm,
++ class >= num || !regs[class].reg))
++ return rb;
++
++ rb.reg = regs[class];
++ if (gen8 && class == VIDEO_DECODE_CLASS)
++ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
++ else
++ rb.bit = engine->instance;
++
++ rb.bit = BIT(rb.bit);
++
++ return rb;
++}
++
++void intel_gt_invalidate_tlbs(struct intel_gt *gt)
++{
++ static const i915_reg_t gen8_regs[] = {
++ [RENDER_CLASS] = GEN8_RTCR,
++ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
++ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
++ [COPY_ENGINE_CLASS] = GEN8_BTCR,
++ };
++ static const i915_reg_t gen12_regs[] = {
++ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
++ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
++ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
++ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
++ };
++ struct drm_i915_private *i915 = gt->i915;
++ struct intel_uncore *uncore = gt->uncore;
++ struct intel_engine_cs *engine;
++ enum intel_engine_id id;
++ const i915_reg_t *regs;
++ unsigned int num = 0;
++
++ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
++ return;
++
++ if (intel_gt_is_wedged(gt))
++ return;
++
++ if (GRAPHICS_VER(i915) == 12) {
++ regs = gen12_regs;
++ num = ARRAY_SIZE(gen12_regs);
++ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
++ regs = gen8_regs;
++ num = ARRAY_SIZE(gen8_regs);
++ } else if (GRAPHICS_VER(i915) < 8) {
++ return;
++ }
++
++ if (drm_WARN_ONCE(&i915->drm, !num,
++ "Platform does not implement TLB invalidation!"))
++ return;
++
++ GEM_TRACE("\n");
++
++ assert_rpm_wakelock_held(&i915->runtime_pm);
++
++ mutex_lock(&gt->tlb_invalidate_lock);
++ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
++
++ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
++
++ for_each_engine(engine, gt, id) {
++ struct reg_and_bit rb;
++
++ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
++ if (!i915_mmio_reg_offset(rb.reg))
++ continue;
++
++ if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
++ engine->class == VIDEO_ENHANCEMENT_CLASS))
++ rb.bit = _MASKED_BIT_ENABLE(rb.bit);
++
++ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
++ }
++
++ spin_unlock_irq(&uncore->lock);
++
++ for_each_engine(engine, gt, id) {
++ /*
++ * HW architecture suggest typical invalidation time at 40us,
++ * with pessimistic cases up to 100us and a recommendation to
++ * cap at 1ms. We go a bit higher just in case.
++ */
++ const unsigned int timeout_us = 100;
++ const unsigned int timeout_ms = 4;
++ struct reg_and_bit rb;
++
++ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
++ if (!i915_mmio_reg_offset(rb.reg))
++ continue;
++
++ if (__intel_wait_for_register_fw(uncore,
++ rb.reg, rb.bit, 0,
++ timeout_us, timeout_ms,
++ NULL))
++ drm_err_ratelimited(&gt->i915->drm,
++ "%s TLB invalidation did not complete in %ums!\n",
++ engine->name, timeout_ms);
++ }
++
++ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
++ mutex_unlock(&gt->tlb_invalidate_lock);
++}
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
+index 74e771871a9bd..c0169d6017c2d 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt.h
+@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
+
+ void intel_gt_watchdog_work(struct work_struct *work);
+
++void intel_gt_invalidate_tlbs(struct intel_gt *gt);
++
+ #endif /* __INTEL_GT_H__ */
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+index edb881d756309..1dfd01668c79c 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+@@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock);
+ if (remaining_timeout)
+ *remaining_timeout = timeout;
+
+- return active_count ? timeout : 0;
++ return active_count ? timeout ?: -ETIME : 0;
+ }
+
+ static void retire_work_handler(struct work_struct *work)
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
+index a81e21bf1bd1a..9fbcbcc6c35db 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
+@@ -72,6 +72,8 @@ struct intel_gt {
+
+ struct intel_uc uc;
+
++ struct mutex tlb_invalidate_lock;
++
+ struct intel_gt_timelines {
+ spinlock_t lock; /* protects active_list */
+ struct list_head active_list;
+diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
+index e137dd32b5b8b..f9d4094916e3d 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
++++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
+@@ -341,6 +341,9 @@ void free_scratch(struct i915_address_space *vm)
+ {
+ int i;
+
++ if (!vm->scratch[0])
++ return;
++
+ for (i = 0; i <= vm->top; i++)
+ i915_gem_object_put(vm->scratch[i]);
+ }
+@@ -544,7 +547,7 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+- i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
++ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
+index 1dac21aa7e5c3..5b59a6effc207 100644
+--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
++++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
+@@ -13,7 +13,6 @@
+
+ struct insert_pte_data {
+ u64 offset;
+- bool is_lmem;
+ };
+
+ #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
+@@ -40,7 +39,7 @@ static void insert_pte(struct i915_address_space *vm,
+ struct insert_pte_data *d = data;
+
+ vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE,
+- d->is_lmem ? PTE_LM : 0);
++ i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
+ d->offset += PAGE_SIZE;
+ }
+
+@@ -134,8 +133,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
+ goto err_vm;
+
+ /* Now allow the GPU to rewrite the PTE via its own ppGTT */
+- d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]);
+- vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d);
++ vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d);
+ }
+
+ return &vm->vm;
+@@ -281,10 +279,10 @@ static int emit_pte(struct i915_request *rq,
+ GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
+
+ /* Compute the page directory offset for the target address range */
+- offset += (u64)rq->engine->instance << 32;
+ offset >>= 12;
+ offset *= sizeof(u64);
+ offset += 2 * CHUNK_SZ;
++ offset += (u64)rq->engine->instance << 32;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
+index 91200c43951f7..9dc244b70ce4b 100644
+--- a/drivers/gpu/drm/i915/gt/intel_reset.c
++++ b/drivers/gpu/drm/i915/gt/intel_reset.c
+@@ -271,6 +271,7 @@ out:
+ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
+ {
+ struct intel_uncore *uncore = gt->uncore;
++ int loops = 2;
+ int err;
+
+ /*
+@@ -278,24 +279,45 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
+ * for fifo space for the write or forcewake the chip for
+ * the read
+ */
+- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
++ do {
++ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+
+- /* Wait for the device to ack the reset requests */
+- err = __intel_wait_for_register_fw(uncore,
+- GEN6_GDRST, hw_domain_mask, 0,
+- 500, 0,
+- NULL);
++ /*
++ * Wait for the device to ack the reset requests.
++ *
++ * On some platforms, e.g. Jasperlake, we see that the
++ * engine register state is not cleared until shortly after
++ * GDRST reports completion, causing a failure as we try
++ * to immediately resume while the internal state is still
++ * in flux. If we immediately repeat the reset, the second
++ * reset appears to serialise with the first, and since
++ * it is a no-op, the registers should retain their reset
++ * value. However, there is still a concern that upon
++ * leaving the second reset, the internal engine state
++ * is still in flux and not ready for resuming.
++ */
++ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
++ hw_domain_mask, 0,
++ 2000, 0,
++ NULL);
++ } while (err == 0 && --loops);
+ if (err)
+ GT_TRACE(gt,
+ "Wait for 0x%08x engines reset failed\n",
+ hw_domain_mask);
+
++ /*
++ * As we have observed that the engine state is still volatile
++ * after GDRST is acked, impose a small delay to let everything settle.
++ */
++ udelay(50);
++
+ return err;
+ }
+
+-static int gen6_reset_engines(struct intel_gt *gt,
+- intel_engine_mask_t engine_mask,
+- unsigned int retry)
++static int __gen6_reset_engines(struct intel_gt *gt,
++ intel_engine_mask_t engine_mask,
++ unsigned int retry)
+ {
+ static const u32 hw_engine_mask[] = {
+ [RCS0] = GEN6_GRDOM_RENDER,
+@@ -322,6 +344,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
+ return gen6_hw_domain_reset(gt, hw_mask);
+ }
+
++static int gen6_reset_engines(struct intel_gt *gt,
++ intel_engine_mask_t engine_mask,
++ unsigned int retry)
++{
++ unsigned long flags;
++ int ret;
++
++ spin_lock_irqsave(&gt->uncore->lock, flags);
++ ret = __gen6_reset_engines(gt, engine_mask, retry);
++ spin_unlock_irqrestore(&gt->uncore->lock, flags);
++
++ return ret;
++}
++
+ static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
+ {
+ int vecs_id;
+@@ -488,9 +524,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
+ rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
+ }
+
+-static int gen11_reset_engines(struct intel_gt *gt,
+- intel_engine_mask_t engine_mask,
+- unsigned int retry)
++static int __gen11_reset_engines(struct intel_gt *gt,
++ intel_engine_mask_t engine_mask,
++ unsigned int retry)
+ {
+ static const u32 hw_engine_mask[] = {
+ [RCS0] = GEN11_GRDOM_RENDER,
+@@ -601,8 +637,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
+ struct intel_engine_cs *engine;
+ const bool reset_non_ready = retry >= 1;
+ intel_engine_mask_t tmp;
++ unsigned long flags;
+ int ret;
+
++ spin_lock_irqsave(&gt->uncore->lock, flags);
++
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
+ ret = gen8_engine_reset_prepare(engine);
+ if (ret && !reset_non_ready)
+@@ -623,15 +662,26 @@ static int gen8_reset_engines(struct intel_gt *gt,
+ */
+ }
+
++ /*
++ * Wa_22011100796:dg2, whenever Full soft reset is required,
++ * reset all individual engines firstly, and then do a full soft reset.
++ *
++ * This is best effort, so ignore any error from the initial reset.
++ */
++ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
++ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
++
+ if (GRAPHICS_VER(gt->i915) >= 11)
+- ret = gen11_reset_engines(gt, engine_mask, retry);
++ ret = __gen11_reset_engines(gt, engine_mask, retry);
+ else
+- ret = gen6_reset_engines(gt, engine_mask, retry);
++ ret = __gen6_reset_engines(gt, engine_mask, retry);
+
+ skip_reset:
+ for_each_engine_masked(engine, gt, engine_mask, tmp)
+ gen8_engine_reset_cancel(engine);
+
++ spin_unlock_irqrestore(&gt->uncore->lock, flags);
++
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
+index 7c4d5158e03bb..7d82545d15e5c 100644
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -113,7 +113,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
+- if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
++ if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, size);
+diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+index 2958e2fae3800..02e18e70c78ea 100644
+--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+@@ -17,6 +17,7 @@
+ #include "intel_ring.h"
+ #include "shmem_utils.h"
+ #include "intel_engine_heartbeat.h"
++#include "intel_engine_pm.h"
+
+ /* Rough estimate of the typical request size, performing a flush,
+ * set-context and then emitting the batch.
+@@ -291,7 +292,9 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
+ sanitize_hwsp(engine);
+
+ /* And scrub the dirty cachelines for the HWSP */
+- clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
++ drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
++
++ intel_engine_reset_pinned_contexts(engine);
+ }
+
+ static void reset_prepare(struct intel_engine_cs *engine)
+diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+index aae609d7d85dd..de93a1e988f29 100644
+--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+@@ -621,13 +621,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
+ FF_MODE2_GS_TIMER_MASK,
+ FF_MODE2_GS_TIMER_224,
+ 0, false);
+-
+- /*
+- * Wa_14012131227:dg1
+- * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
+- */
+- wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1,
+- GEN9_RHWO_OPTIMIZATION_DISABLE);
+ }
+
+ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
+@@ -1056,6 +1049,22 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+ GAMT_CHKN_BIT_REG,
+ GAMT_CHKN_DISABLE_L3_COH_PIPE);
+
++ /*
++ * Wa_1408615072:icl,ehl (vsunit)
++ * Wa_1407596294:icl,ehl (hsunit)
++ */
++ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
++ VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
++
++ /* Wa_1407352427:icl,ehl */
++ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
++ PSDUNIT_CLKGATE_DIS);
++
++ /* Wa_1406680159:icl,ehl */
++ wa_write_or(wal,
++ SUBSLICE_UNIT_LEVEL_CLKGATE,
++ GWUNIT_CLKGATE_DIS);
++
+ /* Wa_1607087056:icl,ehl,jsl */
+ if (IS_ICELAKE(i915) ||
+ IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0))
+@@ -1745,22 +1754,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
+ GEN11_ENABLE_32_PLANE_MODE);
+
+- /*
+- * Wa_1408615072:icl,ehl (vsunit)
+- * Wa_1407596294:icl,ehl (hsunit)
+- */
+- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+- VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+-
+- /* Wa_1407352427:icl,ehl */
+- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+- PSDUNIT_CLKGATE_DIS);
+-
+- /* Wa_1406680159:icl,ehl */
+- wa_write_or(wal,
+- SUBSLICE_UNIT_LEVEL_CLKGATE,
+- GWUNIT_CLKGATE_DIS);
+-
+ /*
+ * Wa_1408767742:icl[a2..forever],ehl[all]
+ * Wa_1605460711:icl[a0..c0]
+diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
+index 2c1af030310c0..8b89215afe46b 100644
+--- a/drivers/gpu/drm/i915/gt/mock_engine.c
++++ b/drivers/gpu/drm/i915/gt/mock_engine.c
+@@ -376,6 +376,8 @@ int mock_engine_init(struct intel_engine_cs *engine)
+ {
+ struct intel_context *ce;
+
++ INIT_LIST_HEAD(&engine->pinned_contexts_list);
++
+ engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK);
+ if (!engine->sched_engine)
+ return -ENOMEM;
+diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
+index f12ffe7976394..5d541bbcfeff3 100644
+--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
++++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
+@@ -1531,8 +1531,8 @@ static int live_busywait_preempt(void *arg)
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ enum intel_engine_id id;
+- int err = -ENOMEM;
+ u32 *map;
++ int err;
+
+ /*
+ * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
+@@ -1540,13 +1540,17 @@ static int live_busywait_preempt(void *arg)
+ */
+
+ ctx_hi = kernel_context(gt->i915, NULL);
+- if (!ctx_hi)
+- return -ENOMEM;
++ if (IS_ERR(ctx_hi))
++ return PTR_ERR(ctx_hi);
++
+ ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+
+ ctx_lo = kernel_context(gt->i915, NULL);
+- if (!ctx_lo)
++ if (IS_ERR(ctx_lo)) {
++ err = PTR_ERR(ctx_lo);
+ goto err_ctx_hi;
++ }
++
+ ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+@@ -3468,12 +3472,14 @@ static int random_priority(struct rnd_state *rnd)
+
+ struct preempt_smoke {
+ struct intel_gt *gt;
++ struct kthread_work work;
+ struct i915_gem_context **contexts;
+ struct intel_engine_cs *engine;
+ struct drm_i915_gem_object *batch;
+ unsigned int ncontext;
+ struct rnd_state prng;
+ unsigned long count;
++ int result;
+ };
+
+ static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
+@@ -3533,34 +3539,31 @@ unpin:
+ return err;
+ }
+
+-static int smoke_crescendo_thread(void *arg)
++static void smoke_crescendo_work(struct kthread_work *work)
+ {
+- struct preempt_smoke *smoke = arg;
++ struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+
+ count = 0;
+ do {
+ struct i915_gem_context *ctx = smoke_context(smoke);
+- int err;
+
+- err = smoke_submit(smoke,
+- ctx, count % I915_PRIORITY_MAX,
+- smoke->batch);
+- if (err)
+- return err;
++ smoke->result = smoke_submit(smoke, ctx,
++ count % I915_PRIORITY_MAX,
++ smoke->batch);
+
+ count++;
+- } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
++ } while (!smoke->result && count < smoke->ncontext &&
++ !__igt_timeout(end_time, NULL));
+
+ smoke->count = count;
+- return 0;
+ }
+
+ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
+ #define BATCH BIT(0)
+ {
+- struct task_struct *tsk[I915_NUM_ENGINES] = {};
++ struct kthread_worker *worker[I915_NUM_ENGINES] = {};
+ struct preempt_smoke *arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+@@ -3571,6 +3574,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
+ if (!arg)
+ return -ENOMEM;
+
++ memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
++
+ for_each_engine(engine, smoke->gt, id) {
+ arg[id] = *smoke;
+ arg[id].engine = engine;
+@@ -3578,31 +3583,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
+ arg[id].batch = NULL;
+ arg[id].count = 0;
+
+- tsk[id] = kthread_run(smoke_crescendo_thread, arg,
+- "igt/smoke:%d", id);
+- if (IS_ERR(tsk[id])) {
+- err = PTR_ERR(tsk[id]);
++ worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
++ if (IS_ERR(worker[id])) {
++ err = PTR_ERR(worker[id]);
+ break;
+ }
+- get_task_struct(tsk[id]);
+- }
+
+- yield(); /* start all threads before we kthread_stop() */
++ kthread_init_work(&arg[id].work, smoke_crescendo_work);
++ kthread_queue_work(worker[id], &arg[id].work);
++ }
+
+ count = 0;
+ for_each_engine(engine, smoke->gt, id) {
+- int status;
+-
+- if (IS_ERR_OR_NULL(tsk[id]))
++ if (IS_ERR_OR_NULL(worker[id]))
+ continue;
+
+- status = kthread_stop(tsk[id]);
+- if (status && !err)
+- err = status;
++ kthread_flush_work(&arg[id].work);
++ if (arg[id].result && !err)
++ err = arg[id].result;
+
+ count += arg[id].count;
+
+- put_task_struct(tsk[id]);
++ kthread_destroy_worker(worker[id]);
+ }
+
+ pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
+diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+index 2c1ed32ca5acd..f164912cea30f 100644
+--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
++++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+@@ -861,10 +861,13 @@ static int igt_reset_active_engine(void *arg)
+ }
+
+ struct active_engine {
+- struct task_struct *task;
++ struct kthread_worker *worker;
++ struct kthread_work work;
+ struct intel_engine_cs *engine;
+ unsigned long resets;
+ unsigned int flags;
++ bool stop;
++ int result;
+ };
+
+ #define TEST_ACTIVE BIT(0)
+@@ -895,10 +898,10 @@ static int active_request_put(struct i915_request *rq)
+ return err;
+ }
+
+-static int active_engine(void *data)
++static void active_engine(struct kthread_work *work)
+ {
+ I915_RND_STATE(prng);
+- struct active_engine *arg = data;
++ struct active_engine *arg = container_of(work, typeof(*arg), work);
+ struct intel_engine_cs *engine = arg->engine;
+ struct i915_request *rq[8] = {};
+ struct intel_context *ce[ARRAY_SIZE(rq)];
+@@ -908,16 +911,17 @@ static int active_engine(void *data)
+ for (count = 0; count < ARRAY_SIZE(ce); count++) {
+ ce[count] = intel_context_create(engine);
+ if (IS_ERR(ce[count])) {
+- err = PTR_ERR(ce[count]);
+- pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err);
++ arg->result = PTR_ERR(ce[count]);
++ pr_err("[%s] Create context #%ld failed: %d!\n",
++ engine->name, count, arg->result);
+ while (--count)
+ intel_context_put(ce[count]);
+- return err;
++ return;
+ }
+ }
+
+ count = 0;
+- while (!kthread_should_stop()) {
++ while (!READ_ONCE(arg->stop)) {
+ unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
+ struct i915_request *old = rq[idx];
+ struct i915_request *new;
+@@ -962,7 +966,7 @@ static int active_engine(void *data)
+ intel_context_put(ce[count]);
+ }
+
+- return err;
++ arg->result = err;
+ }
+
+ static int __igt_reset_engines(struct intel_gt *gt,
+@@ -1013,7 +1017,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
+
+ memset(threads, 0, sizeof(threads));
+ for_each_engine(other, gt, tmp) {
+- struct task_struct *tsk;
++ struct kthread_worker *worker;
+
+ threads[tmp].resets =
+ i915_reset_engine_count(global, other);
+@@ -1027,19 +1031,21 @@ static int __igt_reset_engines(struct intel_gt *gt,
+ threads[tmp].engine = other;
+ threads[tmp].flags = flags;
+
+- tsk = kthread_run(active_engine, &threads[tmp],
+- "igt/%s", other->name);
+- if (IS_ERR(tsk)) {
+- err = PTR_ERR(tsk);
+- pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
++ worker = kthread_create_worker(0, "igt/%s",
++ other->name);
++ if (IS_ERR(worker)) {
++ err = PTR_ERR(worker);
++ pr_err("[%s] Worker create failed: %d!\n",
++ engine->name, err);
+ goto unwind;
+ }
+
+- threads[tmp].task = tsk;
+- get_task_struct(tsk);
+- }
++ threads[tmp].worker = worker;
+
+- yield(); /* start all threads before we begin */
++ kthread_init_work(&threads[tmp].work, active_engine);
++ kthread_queue_work(threads[tmp].worker,
++ &threads[tmp].work);
++ }
+
+ st_engine_heartbeat_disable_no_pm(engine);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+@@ -1187,17 +1193,20 @@ unwind:
+ for_each_engine(other, gt, tmp) {
+ int ret;
+
+- if (!threads[tmp].task)
++ if (!threads[tmp].worker)
+ continue;
+
+- ret = kthread_stop(threads[tmp].task);
++ WRITE_ONCE(threads[tmp].stop, true);
++ kthread_flush_work(&threads[tmp].work);
++ ret = READ_ONCE(threads[tmp].result);
+ if (ret) {
+ pr_err("kthread for other engine %s failed, err=%d\n",
+ other->name, ret);
+ if (!err)
+ err = ret;
+ }
+- put_task_struct(threads[tmp].task);
++
++ kthread_destroy_worker(threads[tmp].worker);
+
+ /* GuC based resets are not logged per engine */
+ if (!using_guc) {
+diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
+index b0977a3b699b8..bc2950fbbaf93 100644
+--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
++++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
+@@ -153,8 +153,8 @@ static int live_lrc_layout(void *arg)
+ continue;
+
+ hw = shmem_pin_map(engine->default_state);
+- if (IS_ERR(hw)) {
+- err = PTR_ERR(hw);
++ if (!hw) {
++ err = -ENOMEM;
+ break;
+ }
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+@@ -329,8 +329,8 @@ static int live_lrc_fixed(void *arg)
+ continue;
+
+ hw = shmem_pin_map(engine->default_state);
+- if (IS_ERR(hw)) {
+- err = PTR_ERR(hw);
++ if (!hw) {
++ err = -ENOMEM;
+ break;
+ }
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+index 76fe766ad1bc6..bb951b8d52033 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+@@ -159,6 +159,6 @@ int intel_guc_fw_upload(struct intel_guc *guc)
+ return 0;
+
+ out:
+- intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL);
++ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+ return ret;
+ }
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+index 65a3e7fdb2b2c..95ff630157b9c 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+@@ -133,7 +133,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
+ {
+ u32 request[] = {
+ GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+- SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
++ SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
+ id,
+ };
+
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+index 87d8dc8f51b96..97b5ba2fc834f 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+@@ -148,11 +148,12 @@ static inline void clr_context_registered(struct intel_context *ce)
+ #define SCHED_STATE_BLOCKED_SHIFT 4
+ #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
+ #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
++
+ static inline void init_sched_state(struct intel_context *ce)
+ {
+ /* Only should be called from guc_lrc_desc_pin() */
+ atomic_set(&ce->guc_sched_state_no_lock, 0);
+- ce->guc_state.sched_state = 0;
++ ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
+ }
+
+ static inline bool
+@@ -352,20 +353,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+ }
+
++static void decr_outstanding_submission_g2h(struct intel_guc *guc)
++{
++ if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
++ wake_up_all(&guc->ct.wq);
++}
++
+ static int guc_submission_send_busy_loop(struct intel_guc *guc,
+ const u32 *action,
+ u32 len,
+ u32 g2h_len_dw,
+ bool loop)
+ {
+- int err;
+-
+- err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
++ /*
++ * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
++ * so we don't handle the case where we don't get a reply because we
++ * aborted the send due to the channel being busy.
++ */
++ GEM_BUG_ON(g2h_len_dw && !loop);
+
+- if (!err && g2h_len_dw)
++ if (g2h_len_dw)
+ atomic_inc(&guc->outstanding_submission_g2h);
+
+- return err;
++ return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ }
+
+ int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
+@@ -616,7 +626,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
+ init_sched_state(ce);
+
+ if (pending_enable || destroyed || deregister) {
+- atomic_dec(&guc->outstanding_submission_g2h);
++ decr_outstanding_submission_g2h(guc);
+ if (deregister)
+ guc_signal_context_fence(ce);
+ if (destroyed) {
+@@ -635,7 +645,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
+ intel_engine_signal_breadcrumbs(ce->engine);
+ }
+ intel_context_sched_disable_unpin(ce);
+- atomic_dec(&guc->outstanding_submission_g2h);
++ decr_outstanding_submission_g2h(guc);
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ guc_blocked_fence_complete(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+@@ -797,15 +807,13 @@ __unwind_incomplete_requests(struct intel_context *ce)
+
+ spin_lock_irqsave(&sched_engine->lock, flags);
+ spin_lock(&ce->guc_active.lock);
+- list_for_each_entry_safe(rq, rn,
+- &ce->guc_active.requests,
+- sched.link) {
++ list_for_each_entry_safe_reverse(rq, rn,
++ &ce->guc_active.requests,
++ sched.link) {
+ if (i915_request_completed(rq))
+ continue;
+
+ list_del_init(&rq->sched.link);
+- spin_unlock(&ce->guc_active.lock);
+-
+ __i915_request_unsubmit(rq);
+
+ /* Push the request back into the queue for later resubmission. */
+@@ -816,10 +824,8 @@ __unwind_incomplete_requests(struct intel_context *ce)
+ }
+ GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
+
+- list_add_tail(&rq->sched.link, pl);
++ list_add(&rq->sched.link, pl);
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+-
+- spin_lock(&ce->guc_active.lock);
+ }
+ spin_unlock(&ce->guc_active.lock);
+ spin_unlock_irqrestore(&sched_engine->lock, flags);
+@@ -828,17 +834,33 @@ __unwind_incomplete_requests(struct intel_context *ce)
+ static void __guc_reset_context(struct intel_context *ce, bool stalled)
+ {
+ struct i915_request *rq;
++ unsigned long flags;
+ u32 head;
++ bool skip = false;
+
+ intel_context_get(ce);
+
+ /*
+- * GuC will implicitly mark the context as non-schedulable
+- * when it sends the reset notification. Make sure our state
+- * reflects this change. The context will be marked enabled
+- * on resubmission.
++ * GuC will implicitly mark the context as non-schedulable when it sends
++ * the reset notification. Make sure our state reflects this change. The
++ * context will be marked enabled on resubmission.
++ *
++ * XXX: If the context is reset as a result of the request cancellation
++ * this G2H is received after the schedule disable complete G2H which is
++ * wrong as this creates a race between the request cancellation code
++ * re-submitting the context and this G2H handler. This is a bug in the
++ * GuC but can be worked around in the meantime but converting this to a
++ * NOP if a pending enable is in flight as this indicates that a request
++ * cancellation has occurred.
+ */
+- clr_context_enabled(ce);
++ spin_lock_irqsave(&ce->guc_state.lock, flags);
++ if (likely(!context_pending_enable(ce)))
++ clr_context_enabled(ce);
++ else
++ skip = true;
++ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
++ if (unlikely(skip))
++ goto out_put;
+
+ rq = intel_context_find_active_request(ce);
+ if (!rq) {
+@@ -857,6 +879,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
+ out_replay:
+ guc_reset_state(ce, head, stalled);
+ __unwind_incomplete_requests(ce);
++out_put:
+ intel_context_put(ce);
+ }
+
+@@ -1233,8 +1256,7 @@ static int register_context(struct intel_context *ce, bool loop)
+ }
+
+ static int __guc_action_deregister_context(struct intel_guc *guc,
+- u32 guc_id,
+- bool loop)
++ u32 guc_id)
+ {
+ u32 action[] = {
+ INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
+@@ -1243,16 +1265,16 @@ static int __guc_action_deregister_context(struct intel_guc *guc,
+
+ return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_DEREGISTER_CONTEXT,
+- loop);
++ true);
+ }
+
+-static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
++static int deregister_context(struct intel_context *ce, u32 guc_id)
+ {
+ struct intel_guc *guc = ce_to_guc(ce);
+
+ trace_intel_context_deregister(ce);
+
+- return __guc_action_deregister_context(guc, guc_id, loop);
++ return __guc_action_deregister_context(guc, guc_id);
+ }
+
+ static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
+@@ -1340,26 +1362,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
+ * registering this context.
+ */
+ if (context_registered) {
++ bool disabled;
++ unsigned long flags;
++
+ trace_intel_context_steal_guc_id(ce);
+- if (!loop) {
++ GEM_BUG_ON(!loop);
++
++ /* Seal race with Reset */
++ spin_lock_irqsave(&ce->guc_state.lock, flags);
++ disabled = submission_disabled(guc);
++ if (likely(!disabled)) {
+ set_context_wait_for_deregister_to_register(ce);
+ intel_context_get(ce);
+- } else {
+- bool disabled;
+- unsigned long flags;
+-
+- /* Seal race with Reset */
+- spin_lock_irqsave(&ce->guc_state.lock, flags);
+- disabled = submission_disabled(guc);
+- if (likely(!disabled)) {
+- set_context_wait_for_deregister_to_register(ce);
+- intel_context_get(ce);
+- }
+- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+- if (unlikely(disabled)) {
+- reset_lrc_desc(guc, desc_idx);
+- return 0; /* Will get registered later */
+- }
++ }
++ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
++ if (unlikely(disabled)) {
++ reset_lrc_desc(guc, desc_idx);
++ return 0; /* Will get registered later */
+ }
+
+ /*
+@@ -1367,13 +1386,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
+ * context whose guc_id was stolen.
+ */
+ with_intel_runtime_pm(runtime_pm, wakeref)
+- ret = deregister_context(ce, ce->guc_id, loop);
+- if (unlikely(ret == -EBUSY)) {
+- clr_context_wait_for_deregister_to_register(ce);
+- intel_context_put(ce);
+- } else if (unlikely(ret == -ENODEV)) {
++ ret = deregister_context(ce, ce->guc_id);
++ if (unlikely(ret == -ENODEV))
+ ret = 0; /* Will get registered later */
+- }
+ } else {
+ with_intel_runtime_pm(runtime_pm, wakeref)
+ ret = register_context(ce, loop);
+@@ -1548,6 +1563,23 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
+ return &ce->guc_blocked;
+ }
+
++#define SCHED_STATE_MULTI_BLOCKED_MASK \
++ (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
++#define SCHED_STATE_NO_UNBLOCK \
++ (SCHED_STATE_MULTI_BLOCKED_MASK | \
++ SCHED_STATE_PENDING_DISABLE | \
++ SCHED_STATE_BANNED)
++
++static bool context_cant_unblock(struct intel_context *ce)
++{
++ lockdep_assert_held(&ce->guc_state.lock);
++
++ return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
++ context_guc_id_invalid(ce) ||
++ !lrc_desc_registered(ce_to_guc(ce), ce->guc_id) ||
++ !intel_context_is_pinned(ce);
++}
++
+ static void guc_context_unblock(struct intel_context *ce)
+ {
+ struct intel_guc *guc = ce_to_guc(ce);
+@@ -1562,9 +1594,7 @@ static void guc_context_unblock(struct intel_context *ce)
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+
+ if (unlikely(submission_disabled(guc) ||
+- !intel_context_is_pinned(ce) ||
+- context_pending_disable(ce) ||
+- context_blocked(ce) > 1)) {
++ context_cant_unblock(ce))) {
+ enable = false;
+ } else {
+ enable = true;
+@@ -1601,6 +1631,13 @@ static void guc_context_cancel_request(struct intel_context *ce,
+ guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
+ true);
+ }
++
++ /*
++ * XXX: Racey if context is reset, see comment in
++ * __guc_reset_context().
++ */
++ flush_work(&ce_to_guc(ce)->ct.requests.worker);
++
+ guc_context_unblock(ce);
+ }
+ }
+@@ -1730,7 +1767,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+ GEM_BUG_ON(context_enabled(ce));
+
+ clr_context_registered(ce);
+- deregister_context(ce, ce->guc_id, true);
++ deregister_context(ce, ce->guc_id);
+ }
+
+ static void __guc_context_destroy(struct intel_context *ce)
+@@ -2310,6 +2347,8 @@ static void guc_sanitize(struct intel_engine_cs *engine)
+
+ /* And scrub the dirty cachelines for the HWSP */
+ clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
++
++ intel_engine_reset_pinned_contexts(engine);
+ }
+
+ static void setup_hwsp(struct intel_engine_cs *engine)
+@@ -2385,9 +2424,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
+ * and even it did this code would be run again.
+ */
+
+- for_each_engine(engine, gt, id)
+- if (engine->kernel_context)
+- guc_kernel_context_pin(guc, engine->kernel_context);
++ for_each_engine(engine, gt, id) {
++ struct intel_context *ce;
++
++ list_for_each_entry(ce, &engine->pinned_contexts_list,
++ pinned_contexts_link)
++ guc_kernel_context_pin(guc, ce);
++ }
+ }
+
+ static void guc_release(struct intel_engine_cs *engine)
+@@ -2583,12 +2626,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
+ return ce;
+ }
+
+-static void decr_outstanding_submission_g2h(struct intel_guc *guc)
+-{
+- if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
+- wake_up_all(&guc->ct.wq);
+-}
+-
+ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
+ const u32 *msg,
+ u32 len)
+@@ -2721,7 +2758,12 @@ static void guc_handle_context_reset(struct intel_guc *guc,
+ {
+ trace_intel_context_reset(ce);
+
+- if (likely(!intel_context_is_banned(ce))) {
++ /*
++ * XXX: Racey if request cancellation has occurred, see comment in
++ * __guc_reset_context().
++ */
++ if (likely(!intel_context_is_banned(ce) &&
++ !context_blocked(ce))) {
+ capture_error_state(guc, ce);
+ guc_context_replay(ce);
+ }
+@@ -2803,6 +2845,8 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
+ return;
+
+ xa_for_each(&guc->context_lookup, index, ce) {
++ bool found;
++
+ if (!intel_context_is_pinned(ce))
+ continue;
+
+@@ -2814,10 +2858,18 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
+ continue;
+ }
+
++ found = false;
++ spin_lock(&ce->guc_state.lock);
+ list_for_each_entry(rq, &ce->guc_active.requests, sched.link) {
+ if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
+ continue;
+
++ found = true;
++ break;
++ }
++ spin_unlock(&ce->guc_state.lock);
++
++ if (found) {
+ intel_engine_set_hung_context(engine, ce);
+
+ /* Can only cope with one hang at a time... */
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+index fc5387b410a2b..9ee22ac925409 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+@@ -191,7 +191,7 @@ int intel_huc_auth(struct intel_huc *huc)
+
+ fail:
+ i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
+- intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL);
++ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+index 3a16d08608a54..6be7fbf9d18a8 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+@@ -526,7 +526,7 @@ fail:
+ i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ err);
+- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
++ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+ return err;
+ }
+
+@@ -544,7 +544,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
+ if (err) {
+ DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n",
+ intel_uc_fw_type_repr(uc_fw->type), err);
+- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
++ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
+ }
+
+ return err;
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+index 99bb1fe1af668..c1a7246fb7d6e 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
++++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+@@ -31,11 +31,12 @@ struct intel_gt;
+ * | | MISSING <--/ | \--> ERROR |
+ * | fetch | V |
+ * | | AVAILABLE |
+- * +------------+- | -+
++ * +------------+- | \ -+
++ * | | | \--> INIT FAIL |
+ * | init | V |
+ * | | /------> LOADABLE <----<-----------\ |
+ * +------------+- \ / \ \ \ -+
+- * | | FAIL <--< \--> TRANSFERRED \ |
++ * | | LOAD FAIL <--< \--> TRANSFERRED \ |
+ * | upload | \ / \ / |
+ * | | \---------/ \--> RUNNING |
+ * +------------+---------------------------------------------------+
+@@ -49,8 +50,9 @@ enum intel_uc_fw_status {
+ INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
+ INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
+ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
++ INTEL_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
+ INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
+- INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
++ INTEL_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
+ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
+ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
+ };
+@@ -121,10 +123,12 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
+ return "ERROR";
+ case INTEL_UC_FIRMWARE_AVAILABLE:
+ return "AVAILABLE";
++ case INTEL_UC_FIRMWARE_INIT_FAIL:
++ return "INIT FAIL";
+ case INTEL_UC_FIRMWARE_LOADABLE:
+ return "LOADABLE";
+- case INTEL_UC_FIRMWARE_FAIL:
+- return "FAIL";
++ case INTEL_UC_FIRMWARE_LOAD_FAIL:
++ return "LOAD FAIL";
+ case INTEL_UC_FIRMWARE_TRANSFERRED:
+ return "TRANSFERRED";
+ case INTEL_UC_FIRMWARE_RUNNING:
+@@ -146,7 +150,8 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
+ return -ENOENT;
+ case INTEL_UC_FIRMWARE_ERROR:
+ return -ENOEXEC;
+- case INTEL_UC_FIRMWARE_FAIL:
++ case INTEL_UC_FIRMWARE_INIT_FAIL:
++ case INTEL_UC_FIRMWARE_LOAD_FAIL:
+ return -EIO;
+ case INTEL_UC_FIRMWARE_SELECTED:
+ return -ESTALE;
+diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
+index c4118b8082682..11971ee929f89 100644
+--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
++++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
+@@ -3115,9 +3115,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
+ continue;
+
+ vaddr = shmem_pin_map(engine->default_state);
+- if (IS_ERR(vaddr)) {
+- gvt_err("failed to map %s->default state, err:%zd\n",
+- engine->name, PTR_ERR(vaddr));
++ if (!vaddr) {
++ gvt_err("failed to map %s->default state\n",
++ engine->name);
+ return;
+ }
+
+diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
+index 9f1c209d92511..e08ed0e9f1653 100644
+--- a/drivers/gpu/drm/i915/gvt/debugfs.c
++++ b/drivers/gpu/drm/i915/gvt/debugfs.c
+@@ -175,8 +175,13 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
+ */
+ void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu)
+ {
+- debugfs_remove_recursive(vgpu->debugfs);
+- vgpu->debugfs = NULL;
++ struct intel_gvt *gvt = vgpu->gvt;
++ struct drm_minor *minor = gvt->gt->i915->drm.primary;
++
++ if (minor->debugfs_root && gvt->debugfs_root) {
++ debugfs_remove_recursive(vgpu->debugfs);
++ vgpu->debugfs = NULL;
++ }
+ }
+
+ /**
+@@ -199,6 +204,10 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt)
+ */
+ void intel_gvt_debugfs_clean(struct intel_gvt *gvt)
+ {
+- debugfs_remove_recursive(gvt->debugfs_root);
+- gvt->debugfs_root = NULL;
++ struct drm_minor *minor = gvt->gt->i915->drm.primary;
++
++ if (minor->debugfs_root) {
++ debugfs_remove_recursive(gvt->debugfs_root);
++ gvt->debugfs_root = NULL;
++ }
+ }
+diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
+index e5c2fdfc20e33..0344a0eef95c0 100644
+--- a/drivers/gpu/drm/i915/gvt/gtt.c
++++ b/drivers/gpu/drm/i915/gvt/gtt.c
+@@ -1195,10 +1195,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
+ for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
+ ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
+ start_gfn + sub_index, PAGE_SIZE, &dma_addr);
+- if (ret) {
+- ppgtt_invalidate_spt(spt);
+- return ret;
+- }
++ if (ret)
++ goto err;
+ sub_se.val64 = se->val64;
+
+ /* Copy the PAT field from PDE. */
+@@ -1217,6 +1215,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
+ ops->set_pfn(se, sub_spt->shadow_page.mfn);
+ ppgtt_set_shadow_entry(spt, se, index);
+ return 0;
++err:
++ /* Cancel the existing addess mappings of DMA addr. */
++ for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
++ gvt_vdbg_mm("invalidate 4K entry\n");
++ ppgtt_invalidate_pte(sub_spt, &sub_se);
++ }
++ /* Release the new allocated spt. */
++ trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
++ sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
++ ppgtt_free_spt(sub_spt);
++ return ret;
+ }
+
+ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
+@@ -2855,24 +2864,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
+ ggtt_invalidate(gvt->gt);
+ }
+
+-/**
+- * intel_vgpu_reset_gtt - reset the all GTT related status
+- * @vgpu: a vGPU
+- *
+- * This function is called from vfio core to reset reset all
+- * GTT related status, including GGTT, PPGTT, scratch page.
+- *
+- */
+-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
+-{
+- /* Shadow pages are only created when there is no page
+- * table tracking data, so remove page tracking data after
+- * removing the shadow pages.
+- */
+- intel_vgpu_destroy_all_ppgtt_mm(vgpu);
+- intel_vgpu_reset_ggtt(vgpu, true);
+-}
+-
+ /**
+ * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
+ * @gvt: intel gvt device
+diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
+index 3bf45672ef987..8e8fe21186243 100644
+--- a/drivers/gpu/drm/i915/gvt/gtt.h
++++ b/drivers/gpu/drm/i915/gvt/gtt.h
+@@ -224,7 +224,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
+ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
+
+ int intel_gvt_init_gtt(struct intel_gvt *gvt);
+-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
+ void intel_gvt_clean_gtt(struct intel_gvt *gvt);
+
+ struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
+diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
+index cde0a477fb497..7ed7dba42c834 100644
+--- a/drivers/gpu/drm/i915/gvt/handlers.c
++++ b/drivers/gpu/drm/i915/gvt/handlers.c
+@@ -909,7 +909,7 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
+ else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX)
+ index = FDI_RX_IMR_TO_PIPE(offset);
+ else {
+- gvt_vgpu_err("Unsupport registers %x\n", offset);
++ gvt_vgpu_err("Unsupported registers %x\n", offset);
+ return -EINVAL;
+ }
+
+diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
+index 1bb1be5c48c84..0291d42cfba8d 100644
+--- a/drivers/gpu/drm/i915/gvt/scheduler.c
++++ b/drivers/gpu/drm/i915/gvt/scheduler.c
+@@ -694,6 +694,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
+
+ if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
+ !workload->shadow_mm->ppgtt_mm.shadowed) {
++ intel_vgpu_unpin_mm(workload->shadow_mm);
+ gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
+ return -EINVAL;
+ }
+diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
+index 3103c1e1fd148..3034ce392ac1f 100644
+--- a/drivers/gpu/drm/i915/i915_active.c
++++ b/drivers/gpu/drm/i915/i915_active.c
+@@ -92,8 +92,7 @@ static void debug_active_init(struct i915_active *ref)
+ static void debug_active_activate(struct i915_active *ref)
+ {
+ lockdep_assert_held(&ref->tree_lock);
+- if (!atomic_read(&ref->count)) /* before the first inc */
+- debug_object_activate(ref, &active_debug_desc);
++ debug_object_activate(ref, &active_debug_desc);
+ }
+
+ static void debug_active_deactivate(struct i915_active *ref)
+@@ -422,8 +421,7 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
+ * we can use it to substitute for the pending idle-barrer
+ * request that we want to emit on the kernel_context.
+ */
+- __active_del_barrier(ref, node_from_active(active));
+- return true;
++ return __active_del_barrier(ref, node_from_active(active));
+ }
+
+ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+@@ -436,18 +434,24 @@ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+ if (err)
+ return err;
+
+- active = active_instance(ref, idx);
+- if (!active) {
+- err = -ENOMEM;
+- goto out;
+- }
++ do {
++ active = active_instance(ref, idx);
++ if (!active) {
++ err = -ENOMEM;
++ goto out;
++ }
+
+- if (replace_barrier(ref, active)) {
+- RCU_INIT_POINTER(active->fence, NULL);
+- atomic_dec(&ref->count);
+- }
+- if (!__i915_active_fence_set(active, fence))
++ if (replace_barrier(ref, active)) {
++ RCU_INIT_POINTER(active->fence, NULL);
++ atomic_dec(&ref->count);
++ }
++ } while (unlikely(is_barrier(active)));
++
++ fence = __i915_active_fence_set(active, fence);
++ if (!fence)
+ __i915_active_acquire(ref);
++ else
++ dma_fence_put(fence);
+
+ out:
+ i915_active_release(ref);
+@@ -466,13 +470,9 @@ __i915_active_set_fence(struct i915_active *ref,
+ return NULL;
+ }
+
+- rcu_read_lock();
+ prev = __i915_active_fence_set(active, fence);
+- if (prev)
+- prev = dma_fence_get_rcu(prev);
+- else
++ if (!prev)
+ __i915_active_acquire(ref);
+- rcu_read_unlock();
+
+ return prev;
+ }
+@@ -1039,10 +1039,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
+ *
+ * Records the new @fence as the last active fence along its timeline in
+ * this active tracker, moving the tracking callbacks from the previous
+- * fence onto this one. Returns the previous fence (if not already completed),
+- * which the caller must ensure is executed before the new fence. To ensure
+- * that the order of fences within the timeline of the i915_active_fence is
+- * understood, it should be locked by the caller.
++ * fence onto this one. Gets and returns a reference to the previous fence
++ * (if not already completed), which the caller must put after making sure
++ * that it is executed before the new fence. To ensure that the order of
++ * fences within the timeline of the i915_active_fence is understood, it
++ * should be locked by the caller.
+ */
+ struct dma_fence *
+ __i915_active_fence_set(struct i915_active_fence *active,
+@@ -1051,7 +1052,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
+ struct dma_fence *prev;
+ unsigned long flags;
+
+- if (fence == rcu_access_pointer(active->fence))
++ /*
++ * In case of fences embedded in i915_requests, their memory is
++ * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
++ * by new requests. Then, there is a risk of passing back a pointer
++ * to a new, completely unrelated fence that reuses the same memory
++ * while tracked under a different active tracker. Combined with i915
++ * perf open/close operations that build await dependencies between
++ * engine kernel context requests and user requests from different
++ * timelines, this can lead to dependency loops and infinite waits.
++ *
++ * As a countermeasure, we try to get a reference to the active->fence
++ * first, so if we succeed and pass it back to our user then it is not
++ * released and potentially reused by an unrelated request before the
++ * user has a chance to set up an await dependency on it.
++ */
++ prev = i915_active_fence_get(active);
++ if (fence == prev)
+ return fence;
+
+ GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+@@ -1060,27 +1077,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
+ * Consider that we have two threads arriving (A and B), with
+ * C already resident as the active->fence.
+ *
+- * A does the xchg first, and so it sees C or NULL depending
+- * on the timing of the interrupt handler. If it is NULL, the
+- * previous fence must have been signaled and we know that
+- * we are first on the timeline. If it is still present,
+- * we acquire the lock on that fence and serialise with the interrupt
+- * handler, in the process removing it from any future interrupt
+- * callback. A will then wait on C before executing (if present).
+- *
+- * As B is second, it sees A as the previous fence and so waits for
+- * it to complete its transition and takes over the occupancy for
+- * itself -- remembering that it needs to wait on A before executing.
++ * Both A and B have got a reference to C or NULL, depending on the
++ * timing of the interrupt handler. Let's assume that if A has got C
++ * then it has locked C first (before B).
+ *
+ * Note the strong ordering of the timeline also provides consistent
+ * nesting rules for the fence->lock; the inner lock is always the
+ * older lock.
+ */
+ spin_lock_irqsave(fence->lock, flags);
+- prev = xchg(__active_fence_slot(active), fence);
+- if (prev) {
+- GEM_BUG_ON(prev == fence);
++ if (prev)
+ spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
++
++ /*
++ * A does the cmpxchg first, and so it sees C or NULL, as before, or
++ * something else, depending on the timing of other threads and/or
++ * interrupt handler. If not the same as before then A unlocks C if
++ * applicable and retries, starting from an attempt to get a new
++ * active->fence. Meanwhile, B follows the same path as A.
++ * Once A succeeds with cmpxch, B fails again, retires, gets A from
++ * active->fence, locks it as soon as A completes, and possibly
++ * succeeds with cmpxchg.
++ */
++ while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
++ if (prev) {
++ spin_unlock(prev->lock);
++ dma_fence_put(prev);
++ }
++ spin_unlock_irqrestore(fence->lock, flags);
++
++ prev = i915_active_fence_get(active);
++ GEM_BUG_ON(prev == fence);
++
++ spin_lock_irqsave(fence->lock, flags);
++ if (prev)
++ spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
++ }
++
++ /*
++ * If prev is NULL then the previous fence must have been signaled
++ * and we know that we are first on the timeline. If it is still
++ * present then, having the lock on that fence already acquired, we
++ * serialise with the interrupt handler, in the process of removing it
++ * from any future interrupt callback. A will then wait on C before
++ * executing (if present).
++ *
++ * As B is second, it sees A as the previous fence and so waits for
++ * it to complete its transition and takes over the occupancy for
++ * itself -- remembering that it needs to wait on A before executing.
++ */
++ if (prev) {
+ __list_del_entry(&active->cb.node);
+ spin_unlock(prev->lock); /* serialise with prev->cb_list */
+ }
+@@ -1097,11 +1143,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
+ int err = 0;
+
+ /* Must maintain timeline ordering wrt previous active requests */
+- rcu_read_lock();
+ fence = __i915_active_fence_set(active, &rq->fence);
+- if (fence) /* but the previous fence may not belong to that timeline! */
+- fence = dma_fence_get_rcu(fence);
+- rcu_read_unlock();
+ if (fence) {
+ err = i915_request_await_dma_fence(rq, fence);
+ dma_fence_put(fence);
+diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
+index 59fb4c710c8ca..41094d51fc6fd 100644
+--- a/drivers/gpu/drm/i915/i915_drv.c
++++ b/drivers/gpu/drm/i915/i915_drv.c
+@@ -986,12 +986,9 @@ static int i915_driver_open(struct drm_device *dev, struct drm_file *file)
+ */
+ static void i915_driver_lastclose(struct drm_device *dev)
+ {
+- struct drm_i915_private *i915 = to_i915(dev);
+-
+ intel_fbdev_restore_mode(dev);
+
+- if (HAS_DISPLAY(i915))
+- vga_switcheroo_process_delayed_switch();
++ vga_switcheroo_process_delayed_switch();
+ }
+
+ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index 005b1cec70075..236cfee1cbf0a 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -1905,6 +1905,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm);
+ struct drm_i915_gem_object *
+ i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
+ phys_addr_t size);
++struct drm_i915_gem_object *
++__i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
++ const struct drm_i915_gem_object_ops *ops,
++ phys_addr_t size);
+
+ /* i915_gem_tiling.c */
+ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
+diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
+index 1bbd09ad52873..0bba1c5baca0b 100644
+--- a/drivers/gpu/drm/i915/i915_pci.c
++++ b/drivers/gpu/drm/i915/i915_pci.c
+@@ -401,7 +401,8 @@ static const struct intel_device_info ilk_m_info = {
+ .has_coherent_ggtt = true, \
+ .has_llc = 1, \
+ .has_rc6 = 1, \
+- .has_rc6p = 1, \
++ /* snb does support rc6p, but enabling it causes various issues */ \
++ .has_rc6p = 0, \
+ .has_rps = true, \
+ .dma_mask_size = 40, \
+ .ppgtt_type = INTEL_PPGTT_ALIASING, \
+@@ -865,7 +866,7 @@ static const struct intel_device_info jsl_info = {
+ }, \
+ TGL_CURSOR_OFFSETS, \
+ .has_global_mocs = 1, \
+- .display.has_dsb = 1
++ .display.has_dsb = 0 /* FIXME: LUT load is broken with DSB */
+
+ static const struct intel_device_info tgl_info = {
+ GEN12_FEATURES,
+diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
+index 2f01b8c0284c8..774d45142091b 100644
+--- a/drivers/gpu/drm/i915/i915_perf.c
++++ b/drivers/gpu/drm/i915/i915_perf.c
+@@ -879,8 +879,9 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
+ if (ret)
+ return ret;
+
+- DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
+- stream->period_exponent);
++ drm_dbg(&stream->perf->i915->drm,
++ "OA buffer overflow (exponent = %d): force restart\n",
++ stream->period_exponent);
+
+ stream->perf->ops.oa_disable(stream);
+ stream->perf->ops.oa_enable(stream);
+@@ -1102,8 +1103,9 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
+ if (ret)
+ return ret;
+
+- DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
+- stream->period_exponent);
++ drm_dbg(&stream->perf->i915->drm,
++ "OA buffer overflow (exponent = %d): force restart\n",
++ stream->period_exponent);
+
+ stream->perf->ops.oa_disable(stream);
+ stream->perf->ops.oa_enable(stream);
+@@ -2857,7 +2859,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
+ int ret;
+
+ if (!props->engine) {
+- DRM_DEBUG("OA engine not specified\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "OA engine not specified\n");
+ return -EINVAL;
+ }
+
+@@ -2867,18 +2870,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
+ * IDs
+ */
+ if (!perf->metrics_kobj) {
+- DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "OA metrics weren't advertised via sysfs\n");
+ return -EINVAL;
+ }
+
+ if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
+ (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) {
+- DRM_DEBUG("Only OA report sampling supported\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "Only OA report sampling supported\n");
+ return -EINVAL;
+ }
+
+ if (!perf->ops.enable_metric_set) {
+- DRM_DEBUG("OA unit not supported\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "OA unit not supported\n");
+ return -ENODEV;
+ }
+
+@@ -2888,12 +2894,14 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
+ * we currently only allow exclusive access
+ */
+ if (perf->exclusive_stream) {
+- DRM_DEBUG("OA unit already in use\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "OA unit already in use\n");
+ return -EBUSY;
+ }
+
+ if (!props->oa_format) {
+- DRM_DEBUG("OA report format not specified\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "OA report format not specified\n");
+ return -EINVAL;
+ }
+
+@@ -2923,20 +2931,23 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
+ if (stream->ctx) {
+ ret = oa_get_render_ctx_id(stream);
+ if (ret) {
+- DRM_DEBUG("Invalid context id to filter with\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "Invalid context id to filter with\n");
+ return ret;
+ }
+ }
+
+ ret = alloc_noa_wait(stream);
+ if (ret) {
+- DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "Unable to allocate NOA wait batch buffer\n");
+ goto err_noa_wait_alloc;
+ }
+
+ stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
+ if (!stream->oa_config) {
+- DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
++ drm_dbg(&stream->perf->i915->drm,
++ "Invalid OA config id=%i\n", props->metrics_set);
+ ret = -EINVAL;
+ goto err_config;
+ }
+@@ -2967,11 +2978,13 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
+
+ ret = i915_perf_stream_enable_sync(stream);
+ if (ret) {
+- DRM_DEBUG("Unable to enable metric set\n");
++ drm_dbg(&stream->perf->i915->drm,
++ "Unable to enable metric set\n");
+ goto err_enable;
+ }
+
+- DRM_DEBUG("opening stream oa config uuid=%s\n",
++ drm_dbg(&stream->perf->i915->drm,
++ "opening stream oa config uuid=%s\n",
+ stream->oa_config->uuid);
+
+ hrtimer_init(&stream->poll_check_timer,
+@@ -3423,7 +3436,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
+
+ specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
+ if (IS_ERR(specific_ctx)) {
+- DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
++ drm_dbg(&perf->i915->drm,
++ "Failed to look up context with ID %u for opening perf stream\n",
+ ctx_handle);
+ ret = PTR_ERR(specific_ctx);
+ goto err;
+@@ -3457,7 +3471,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
+
+ if (props->hold_preemption) {
+ if (!props->single_context) {
+- DRM_DEBUG("preemption disable with no context\n");
++ drm_dbg(&perf->i915->drm,
++ "preemption disable with no context\n");
+ ret = -EINVAL;
+ goto err;
+ }
+@@ -3479,7 +3494,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
+ */
+ if (privileged_op &&
+ i915_perf_stream_paranoid && !perfmon_capable()) {
+- DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
++ drm_dbg(&perf->i915->drm,
++ "Insufficient privileges to open i915 perf stream\n");
+ ret = -EACCES;
+ goto err_ctx;
+ }
+@@ -3586,7 +3602,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
+
+ if (!n_props) {
+- DRM_DEBUG("No i915 perf properties given\n");
++ drm_dbg(&perf->i915->drm,
++ "No i915 perf properties given\n");
+ return -EINVAL;
+ }
+
+@@ -3595,7 +3612,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ I915_ENGINE_CLASS_RENDER,
+ 0);
+ if (!props->engine) {
+- DRM_DEBUG("No RENDER-capable engines\n");
++ drm_dbg(&perf->i915->drm,
++ "No RENDER-capable engines\n");
+ return -EINVAL;
+ }
+
+@@ -3606,7 +3624,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ * from userspace.
+ */
+ if (n_props >= DRM_I915_PERF_PROP_MAX) {
+- DRM_DEBUG("More i915 perf properties specified than exist\n");
++ drm_dbg(&perf->i915->drm,
++ "More i915 perf properties specified than exist\n");
+ return -EINVAL;
+ }
+
+@@ -3623,7 +3642,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ return ret;
+
+ if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
+- DRM_DEBUG("Unknown i915 perf property ID\n");
++ drm_dbg(&perf->i915->drm,
++ "Unknown i915 perf property ID\n");
+ return -EINVAL;
+ }
+
+@@ -3638,19 +3658,22 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ break;
+ case DRM_I915_PERF_PROP_OA_METRICS_SET:
+ if (value == 0) {
+- DRM_DEBUG("Unknown OA metric set ID\n");
++ drm_dbg(&perf->i915->drm,
++ "Unknown OA metric set ID\n");
+ return -EINVAL;
+ }
+ props->metrics_set = value;
+ break;
+ case DRM_I915_PERF_PROP_OA_FORMAT:
+ if (value == 0 || value >= I915_OA_FORMAT_MAX) {
+- DRM_DEBUG("Out-of-range OA report format %llu\n",
++ drm_dbg(&perf->i915->drm,
++ "Out-of-range OA report format %llu\n",
+ value);
+ return -EINVAL;
+ }
+ if (!oa_format_valid(perf, value)) {
+- DRM_DEBUG("Unsupported OA report format %llu\n",
++ drm_dbg(&perf->i915->drm,
++ "Unsupported OA report format %llu\n",
+ value);
+ return -EINVAL;
+ }
+@@ -3658,7 +3681,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ break;
+ case DRM_I915_PERF_PROP_OA_EXPONENT:
+ if (value > OA_EXPONENT_MAX) {
+- DRM_DEBUG("OA timer exponent too high (> %u)\n",
++ drm_dbg(&perf->i915->drm,
++ "OA timer exponent too high (> %u)\n",
+ OA_EXPONENT_MAX);
+ return -EINVAL;
+ }
+@@ -3686,7 +3710,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ oa_freq_hz = 0;
+
+ if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) {
+- DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
++ drm_dbg(&perf->i915->drm,
++ "OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
+ i915_oa_max_sample_rate);
+ return -EACCES;
+ }
+@@ -3703,13 +3728,15 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ if (copy_from_user(&user_sseu,
+ u64_to_user_ptr(value),
+ sizeof(user_sseu))) {
+- DRM_DEBUG("Unable to copy global sseu parameter\n");
++ drm_dbg(&perf->i915->drm,
++ "Unable to copy global sseu parameter\n");
+ return -EFAULT;
+ }
+
+ ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
+ if (ret) {
+- DRM_DEBUG("Invalid SSEU configuration\n");
++ drm_dbg(&perf->i915->drm,
++ "Invalid SSEU configuration\n");
+ return ret;
+ }
+ props->has_sseu = true;
+@@ -3717,7 +3744,8 @@ static int read_properties_unlocked(struct i915_perf *perf,
+ }
+ case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
+ if (value < 100000 /* 100us */) {
+- DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",
++ drm_dbg(&perf->i915->drm,
++ "OA availability timer too small (%lluns < 100us)\n",
+ value);
+ return -EINVAL;
+ }
+@@ -3768,7 +3796,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
+ int ret;
+
+ if (!perf->i915) {
+- DRM_DEBUG("i915 perf interface not available for this system\n");
++ drm_dbg(&perf->i915->drm,
++ "i915 perf interface not available for this system\n");
+ return -ENOTSUPP;
+ }
+
+@@ -3776,7 +3805,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
+ I915_PERF_FLAG_FD_NONBLOCK |
+ I915_PERF_FLAG_DISABLED;
+ if (param->flags & ~known_open_flags) {
+- DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
++ drm_dbg(&perf->i915->drm,
++ "Unknown drm_i915_perf_open_param flag\n");
+ return -EINVAL;
+ }
+
+@@ -3986,7 +4016,8 @@ static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
+ goto addr_err;
+
+ if (!is_valid(perf, addr)) {
+- DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
++ drm_dbg(&perf->i915->drm,
++ "Invalid oa_reg address: %X\n", addr);
+ err = -EINVAL;
+ goto addr_err;
+ }
+@@ -4008,8 +4039,8 @@ addr_err:
+ return ERR_PTR(err);
+ }
+
+-static ssize_t show_dynamic_id(struct device *dev,
+- struct device_attribute *attr,
++static ssize_t show_dynamic_id(struct kobject *kobj,
++ struct kobj_attribute *attr,
+ char *buf)
+ {
+ struct i915_oa_config *oa_config =
+@@ -4060,30 +4091,35 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+ int err, id;
+
+ if (!perf->i915) {
+- DRM_DEBUG("i915 perf interface not available for this system\n");
++ drm_dbg(&perf->i915->drm,
++ "i915 perf interface not available for this system\n");
+ return -ENOTSUPP;
+ }
+
+ if (!perf->metrics_kobj) {
+- DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
++ drm_dbg(&perf->i915->drm,
++ "OA metrics weren't advertised via sysfs\n");
+ return -EINVAL;
+ }
+
+ if (i915_perf_stream_paranoid && !perfmon_capable()) {
+- DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Insufficient privileges to add i915 OA config\n");
+ return -EACCES;
+ }
+
+ if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
+ (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
+ (!args->flex_regs_ptr || !args->n_flex_regs)) {
+- DRM_DEBUG("No OA registers given\n");
++ drm_dbg(&perf->i915->drm,
++ "No OA registers given\n");
+ return -EINVAL;
+ }
+
+ oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
+ if (!oa_config) {
+- DRM_DEBUG("Failed to allocate memory for the OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to allocate memory for the OA config\n");
+ return -ENOMEM;
+ }
+
+@@ -4091,7 +4127,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+ kref_init(&oa_config->ref);
+
+ if (!uuid_is_valid(args->uuid)) {
+- DRM_DEBUG("Invalid uuid format for OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Invalid uuid format for OA config\n");
+ err = -EINVAL;
+ goto reg_err;
+ }
+@@ -4108,7 +4145,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+ args->n_mux_regs);
+
+ if (IS_ERR(regs)) {
+- DRM_DEBUG("Failed to create OA config for mux_regs\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to create OA config for mux_regs\n");
+ err = PTR_ERR(regs);
+ goto reg_err;
+ }
+@@ -4121,7 +4159,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+ args->n_boolean_regs);
+
+ if (IS_ERR(regs)) {
+- DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to create OA config for b_counter_regs\n");
+ err = PTR_ERR(regs);
+ goto reg_err;
+ }
+@@ -4140,7 +4179,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+ args->n_flex_regs);
+
+ if (IS_ERR(regs)) {
+- DRM_DEBUG("Failed to create OA config for flex_regs\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to create OA config for flex_regs\n");
+ err = PTR_ERR(regs);
+ goto reg_err;
+ }
+@@ -4156,7 +4196,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+ */
+ idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+ if (!strcmp(tmp->uuid, oa_config->uuid)) {
+- DRM_DEBUG("OA config already exists with this uuid\n");
++ drm_dbg(&perf->i915->drm,
++ "OA config already exists with this uuid\n");
+ err = -EADDRINUSE;
+ goto sysfs_err;
+ }
+@@ -4164,7 +4205,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+
+ err = create_dynamic_oa_sysfs_entry(perf, oa_config);
+ if (err) {
+- DRM_DEBUG("Failed to create sysfs entry for OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to create sysfs entry for OA config\n");
+ goto sysfs_err;
+ }
+
+@@ -4173,22 +4215,25 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
+ oa_config, 2,
+ 0, GFP_KERNEL);
+ if (oa_config->id < 0) {
+- DRM_DEBUG("Failed to create sysfs entry for OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to create sysfs entry for OA config\n");
+ err = oa_config->id;
+ goto sysfs_err;
+ }
++ id = oa_config->id;
+
++ drm_dbg(&perf->i915->drm,
++ "Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+ mutex_unlock(&perf->metrics_lock);
+
+- DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+-
+- return oa_config->id;
++ return id;
+
+ sysfs_err:
+ mutex_unlock(&perf->metrics_lock);
+ reg_err:
+ i915_oa_config_put(oa_config);
+- DRM_DEBUG("Failed to add new OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to add new OA config\n");
+ return err;
+ }
+
+@@ -4212,12 +4257,14 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
+ int ret;
+
+ if (!perf->i915) {
+- DRM_DEBUG("i915 perf interface not available for this system\n");
++ drm_dbg(&perf->i915->drm,
++ "i915 perf interface not available for this system\n");
+ return -ENOTSUPP;
+ }
+
+ if (i915_perf_stream_paranoid && !perfmon_capable()) {
+- DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Insufficient privileges to remove i915 OA config\n");
+ return -EACCES;
+ }
+
+@@ -4227,7 +4274,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
+
+ oa_config = idr_find(&perf->metrics_idr, *arg);
+ if (!oa_config) {
+- DRM_DEBUG("Failed to remove unknown OA config\n");
++ drm_dbg(&perf->i915->drm,
++ "Failed to remove unknown OA config\n");
+ ret = -ENOENT;
+ goto err_unlock;
+ }
+@@ -4240,7 +4288,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
+
+ mutex_unlock(&perf->metrics_lock);
+
+- DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
++ drm_dbg(&perf->i915->drm,
++ "Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
+
+ i915_oa_config_put(oa_config);
+
+diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
+index aa14354a51203..f682c7a6474d2 100644
+--- a/drivers/gpu/drm/i915/i915_perf_types.h
++++ b/drivers/gpu/drm/i915/i915_perf_types.h
+@@ -55,7 +55,7 @@ struct i915_oa_config {
+
+ struct attribute_group sysfs_metric;
+ struct attribute *attrs[2];
+- struct device_attribute sysfs_metric_id;
++ struct kobj_attribute sysfs_metric_id;
+
+ struct kref ref;
+ struct rcu_head rcu;
+diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
+index 9023d4ecf3b37..3c70aa5229e5a 100644
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -2669,6 +2669,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
+ #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28)
+ #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24)
+
++#define GEN8_RTCR _MMIO(0x4260)
++#define GEN8_M1TCR _MMIO(0x4264)
++#define GEN8_M2TCR _MMIO(0x4268)
++#define GEN8_BTCR _MMIO(0x426c)
++#define GEN8_VTCR _MMIO(0x4270)
++
+ #if 0
+ #define PRB0_TAIL _MMIO(0x2030)
+ #define PRB0_HEAD _MMIO(0x2034)
+@@ -2763,6 +2769,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
+ #define FAULT_VA_HIGH_BITS (0xf << 0)
+ #define FAULT_GTT_SEL (1 << 4)
+
++#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
++#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
++#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
++#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
++
+ #define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
+
+ #define FPGA_DBG _MMIO(0x42300)
+@@ -7447,7 +7458,7 @@ enum {
+ #define _SEL_FETCH_PLANE_BASE_6_A 0x70940
+ #define _SEL_FETCH_PLANE_BASE_7_A 0x70960
+ #define _SEL_FETCH_PLANE_BASE_CUR_A 0x70880
+-#define _SEL_FETCH_PLANE_BASE_1_B 0x70990
++#define _SEL_FETCH_PLANE_BASE_1_B 0x71890
+
+ #define _SEL_FETCH_PLANE_BASE_A(plane) _PICK(plane, \
+ _SEL_FETCH_PLANE_BASE_1_A, \
+@@ -7807,6 +7818,22 @@ enum {
+ /* MMIO address range for DMC program (0x80000 - 0x82FFF) */
+ #define DMC_MMIO_START_RANGE 0x80000
+ #define DMC_MMIO_END_RANGE 0x8FFFF
++#define DMC_V1_MMIO_START_RANGE 0x80000
++#define TGL_MAIN_MMIO_START 0x8F000
++#define TGL_MAIN_MMIO_END 0x8FFFF
++#define _TGL_PIPEA_MMIO_START 0x92000
++#define _TGL_PIPEA_MMIO_END 0x93FFF
++#define _TGL_PIPEB_MMIO_START 0x96000
++#define _TGL_PIPEB_MMIO_END 0x97FFF
++#define ADLP_PIPE_MMIO_START 0x5F000
++#define ADLP_PIPE_MMIO_END 0x5FFFF
++
++#define TGL_PIPE_MMIO_START(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_START,\
++ _TGL_PIPEB_MMIO_START)
++
++#define TGL_PIPE_MMIO_END(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_END,\
++ _TGL_PIPEB_MMIO_END)
++
+ #define SKL_DMC_DC3_DC5_COUNT _MMIO(0x80030)
+ #define SKL_DMC_DC5_DC6_COUNT _MMIO(0x8002C)
+ #define BXT_DMC_DC3_DC5_COUNT _MMIO(0x80038)
+diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
+index 79da5eca60af5..bd85113ad150c 100644
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -1596,6 +1596,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
+ &rq->dep,
+ 0);
+ }
++ if (prev)
++ i915_request_put(prev);
+
+ /*
+ * Make sure that no request gazumped us - if it was allocated after
+diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c
+index de0e224b56ce3..f1ce9f591efaf 100644
+--- a/drivers/gpu/drm/i915/i915_switcheroo.c
++++ b/drivers/gpu/drm/i915/i915_switcheroo.c
+@@ -18,6 +18,10 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev,
+ dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n");
+ return;
+ }
++ if (!HAS_DISPLAY(i915)) {
++ dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n");
++ return;
++ }
+
+ if (state == VGA_SWITCHEROO_ON) {
+ drm_info(&i915->drm, "switched on\n");
+@@ -43,7 +47,7 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
+ * locking inversion with the driver load path. And the access here is
+ * completely racy anyway. So don't bother with locking for now.
+ */
+- return i915 && atomic_read(&i915->drm.open_count) == 0;
++ return i915 && HAS_DISPLAY(i915) && atomic_read(&i915->drm.open_count) == 0;
+ }
+
+ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
+diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
+index cdf0e9c6fd73e..313c0000a814e 100644
+--- a/drivers/gpu/drm/i915/i915_sysfs.c
++++ b/drivers/gpu/drm/i915/i915_sysfs.c
+@@ -445,7 +445,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
+ struct device *kdev = kobj_to_dev(kobj);
+ struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
+ struct i915_gpu_coredump *gpu;
+- ssize_t ret;
++ ssize_t ret = 0;
++
++ /*
++ * FIXME: Concurrent clients triggering resets and reading + clearing
++ * dumps can cause inconsistent sysfs reads when a user calls in with a
++ * non-zero offset to complete a prior partial read but the
++ * gpu_coredump has been cleared or replaced.
++ */
+
+ gpu = i915_first_error_state(i915);
+ if (IS_ERR(gpu)) {
+@@ -457,8 +464,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
+ const char *str = "No error state collected\n";
+ size_t len = strlen(str);
+
+- ret = min_t(size_t, count, len - off);
+- memcpy(buf, str + off, ret);
++ if (off < len) {
++ ret = min_t(size_t, count, len - off);
++ memcpy(buf, str + off, ret);
++ }
+ }
+
+ return ret;
+diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
+index 4b7fc4647e460..3df304edabc79 100644
+--- a/drivers/gpu/drm/i915/i915_vma.c
++++ b/drivers/gpu/drm/i915/i915_vma.c
+@@ -376,6 +376,7 @@ int i915_vma_bind(struct i915_vma *vma,
+ u32 bind_flags;
+ u32 vma_flags;
+
++ lockdep_assert_held(&vma->vm->mutex);
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+ GEM_BUG_ON(vma->size > vma->node.size);
+
+@@ -434,6 +435,9 @@ int i915_vma_bind(struct i915_vma *vma,
+ vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
+ }
+
++ if (vma->obj)
++ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
++
+ atomic_or(bind_flags, &vma->flags);
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
+index d1d4b97b86f59..287f5a3d0b354 100644
+--- a/drivers/gpu/drm/i915/intel_pch.c
++++ b/drivers/gpu/drm/i915/intel_pch.c
+@@ -108,6 +108,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
+ /* Comet Lake V PCH is based on KBP, which is SPT compatible */
+ return PCH_SPT;
+ case INTEL_PCH_ICP_DEVICE_ID_TYPE:
++ case INTEL_PCH_ICP2_DEVICE_ID_TYPE:
+ drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n");
+ drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv));
+ return PCH_ICP;
+@@ -123,7 +124,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
+ !IS_GEN9_BC(dev_priv));
+ return PCH_TGP;
+ case INTEL_PCH_JSP_DEVICE_ID_TYPE:
+- case INTEL_PCH_JSP2_DEVICE_ID_TYPE:
+ drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n");
+ drm_WARN_ON(&dev_priv->drm, !IS_JSL_EHL(dev_priv));
+ return PCH_JSP;
+diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
+index 7c0d83d292dcc..994c56fcb1991 100644
+--- a/drivers/gpu/drm/i915/intel_pch.h
++++ b/drivers/gpu/drm/i915/intel_pch.h
+@@ -50,11 +50,11 @@ enum intel_pch {
+ #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
+ #define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380
+ #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
++#define INTEL_PCH_ICP2_DEVICE_ID_TYPE 0x3880
+ #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
+ #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080
+ #define INTEL_PCH_TGP2_DEVICE_ID_TYPE 0x4380
+ #define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80
+-#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880
+ #define INTEL_PCH_ADP_DEVICE_ID_TYPE 0x7A80
+ #define INTEL_PCH_ADP2_DEVICE_ID_TYPE 0x5180
+ #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100
+diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
+index a725792d5248b..614cb84f2ceec 100644
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -2863,7 +2863,7 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
+ }
+
+ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
+- u16 wm[8])
++ u16 wm[])
+ {
+ struct intel_uncore *uncore = &dev_priv->uncore;
+
+@@ -3063,9 +3063,9 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
+ * The BIOS provided WM memory latency values are often
+ * inadequate for high resolution displays. Adjust them.
+ */
+- changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
+- ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
+- ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
++ changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12);
++ changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12);
++ changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
+
+ if (!changed)
+ return;
+@@ -3713,8 +3713,7 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv)
+ MISSING_CASE(DISPLAY_VER(dev_priv));
+ }
+
+- /* Default to an unusable block time */
+- dev_priv->sagv_block_time_us = -1;
++ dev_priv->sagv_block_time_us = 0;
+ }
+
+ /*
+@@ -4020,6 +4019,17 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state)
+ return ret;
+ }
+
++ if (intel_can_enable_sagv(dev_priv, new_bw_state) !=
++ intel_can_enable_sagv(dev_priv, old_bw_state)) {
++ ret = intel_atomic_serialize_global_state(&new_bw_state->base);
++ if (ret)
++ return ret;
++ } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) {
++ ret = intel_atomic_lock_global_state(&new_bw_state->base);
++ if (ret)
++ return ret;
++ }
++
+ for_each_new_intel_crtc_in_state(state, crtc,
+ new_crtc_state, i) {
+ struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal;
+@@ -4035,17 +4045,6 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state)
+ intel_can_enable_sagv(dev_priv, new_bw_state);
+ }
+
+- if (intel_can_enable_sagv(dev_priv, new_bw_state) !=
+- intel_can_enable_sagv(dev_priv, old_bw_state)) {
+- ret = intel_atomic_serialize_global_state(&new_bw_state->base);
+- if (ret)
+- return ret;
+- } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) {
+- ret = intel_atomic_lock_global_state(&new_bw_state->base);
+- if (ret)
+- return ret;
+- }
+-
+ return 0;
+ }
+
+@@ -4708,6 +4707,10 @@ static const struct dbuf_slice_conf_entry dg2_allowed_dbufs[] = {
+ };
+
+ static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = {
++ /*
++ * Keep the join_mbus cases first so check_mbus_joined()
++ * will prefer them over the !join_mbus cases.
++ */
+ {
+ .active_pipes = BIT(PIPE_A),
+ .dbuf_mask = {
+@@ -4722,6 +4725,20 @@ static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = {
+ },
+ .join_mbus = true,
+ },
++ {
++ .active_pipes = BIT(PIPE_A),
++ .dbuf_mask = {
++ [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2),
++ },
++ .join_mbus = false,
++ },
++ {
++ .active_pipes = BIT(PIPE_B),
++ .dbuf_mask = {
++ [PIPE_B] = BIT(DBUF_S3) | BIT(DBUF_S4),
++ },
++ .join_mbus = false,
++ },
+ {
+ .active_pipes = BIT(PIPE_A) | BIT(PIPE_B),
+ .dbuf_mask = {
+@@ -4826,7 +4843,7 @@ static bool check_mbus_joined(u8 active_pipes,
+ {
+ int i;
+
+- for (i = 0; i < dbuf_slices[i].active_pipes; i++) {
++ for (i = 0; dbuf_slices[i].active_pipes != 0; i++) {
+ if (dbuf_slices[i].active_pipes == active_pipes)
+ return dbuf_slices[i].join_mbus;
+ }
+@@ -4838,13 +4855,14 @@ static bool adlp_check_mbus_joined(u8 active_pipes)
+ return check_mbus_joined(active_pipes, adlp_allowed_dbufs);
+ }
+
+-static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes,
++static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus,
+ const struct dbuf_slice_conf_entry *dbuf_slices)
+ {
+ int i;
+
+- for (i = 0; i < dbuf_slices[i].active_pipes; i++) {
+- if (dbuf_slices[i].active_pipes == active_pipes)
++ for (i = 0; dbuf_slices[i].active_pipes != 0; i++) {
++ if (dbuf_slices[i].active_pipes == active_pipes &&
++ dbuf_slices[i].join_mbus == join_mbus)
+ return dbuf_slices[i].dbuf_mask[pipe];
+ }
+ return 0;
+@@ -4855,7 +4873,7 @@ static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes,
+ * returns correspondent DBuf slice mask as stated in BSpec for particular
+ * platform.
+ */
+-static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
++static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
+ {
+ /*
+ * FIXME: For ICL this is still a bit unclear as prev BSpec revision
+@@ -4869,37 +4887,41 @@ static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
+ * still here - we will need it once those additional constraints
+ * pop up.
+ */
+- return compute_dbuf_slices(pipe, active_pipes, icl_allowed_dbufs);
++ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
++ icl_allowed_dbufs);
+ }
+
+-static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes)
++static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
+ {
+- return compute_dbuf_slices(pipe, active_pipes, tgl_allowed_dbufs);
++ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
++ tgl_allowed_dbufs);
+ }
+
+-static u32 adlp_compute_dbuf_slices(enum pipe pipe, u32 active_pipes)
++static u8 adlp_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
+ {
+- return compute_dbuf_slices(pipe, active_pipes, adlp_allowed_dbufs);
++ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
++ adlp_allowed_dbufs);
+ }
+
+-static u32 dg2_compute_dbuf_slices(enum pipe pipe, u32 active_pipes)
++static u8 dg2_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus)
+ {
+- return compute_dbuf_slices(pipe, active_pipes, dg2_allowed_dbufs);
++ return compute_dbuf_slices(pipe, active_pipes, join_mbus,
++ dg2_allowed_dbufs);
+ }
+
+-static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes)
++static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes, bool join_mbus)
+ {
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ enum pipe pipe = crtc->pipe;
+
+ if (IS_DG2(dev_priv))
+- return dg2_compute_dbuf_slices(pipe, active_pipes);
++ return dg2_compute_dbuf_slices(pipe, active_pipes, join_mbus);
+ else if (IS_ALDERLAKE_P(dev_priv))
+- return adlp_compute_dbuf_slices(pipe, active_pipes);
++ return adlp_compute_dbuf_slices(pipe, active_pipes, join_mbus);
+ else if (DISPLAY_VER(dev_priv) == 12)
+- return tgl_compute_dbuf_slices(pipe, active_pipes);
++ return tgl_compute_dbuf_slices(pipe, active_pipes, join_mbus);
+ else if (DISPLAY_VER(dev_priv) == 11)
+- return icl_compute_dbuf_slices(pipe, active_pipes);
++ return icl_compute_dbuf_slices(pipe, active_pipes, join_mbus);
+ /*
+ * For anything else just return one slice yet.
+ * Should be extended for other platforms.
+@@ -5380,10 +5402,16 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
+ wp->y_tiled = modifier == I915_FORMAT_MOD_Y_TILED ||
+ modifier == I915_FORMAT_MOD_Yf_TILED ||
+ modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
+- modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
++ modifier == I915_FORMAT_MOD_Yf_TILED_CCS ||
++ modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
++ modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS ||
++ modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC;
+ wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED;
+ wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
+- modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
++ modifier == I915_FORMAT_MOD_Yf_TILED_CCS ||
++ modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
++ modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS ||
++ modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC;
+ wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier);
+
+ wp->width = width;
+@@ -5612,7 +5640,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
+ result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1;
+ result->enable = true;
+
+- if (DISPLAY_VER(dev_priv) < 12)
++ if (DISPLAY_VER(dev_priv) < 12 && dev_priv->sagv_block_time_us)
+ result->can_sagv = latency >= dev_priv->sagv_block_time_us;
+ }
+
+@@ -5643,7 +5671,10 @@ static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state,
+ struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+ struct skl_wm_level *sagv_wm = &plane_wm->sagv.wm0;
+ struct skl_wm_level *levels = plane_wm->wm;
+- unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us;
++ unsigned int latency = 0;
++
++ if (dev_priv->sagv_block_time_us)
++ latency = dev_priv->sagv_block_time_us + dev_priv->wm.skl_latency[0];
+
+ skl_compute_plane_wm(crtc_state, 0, latency,
+ wm_params, &levels[0],
+@@ -6110,11 +6141,16 @@ skl_compute_ddb(struct intel_atomic_state *state)
+ return ret;
+ }
+
++ if (IS_ALDERLAKE_P(dev_priv))
++ new_dbuf_state->joined_mbus =
++ adlp_check_mbus_joined(new_dbuf_state->active_pipes);
++
+ for_each_intel_crtc(&dev_priv->drm, crtc) {
+ enum pipe pipe = crtc->pipe;
+
+ new_dbuf_state->slices[pipe] =
+- skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes);
++ skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes,
++ new_dbuf_state->joined_mbus);
+
+ if (old_dbuf_state->slices[pipe] == new_dbuf_state->slices[pipe])
+ continue;
+@@ -6126,9 +6162,6 @@ skl_compute_ddb(struct intel_atomic_state *state)
+
+ new_dbuf_state->enabled_slices = intel_dbuf_enabled_slices(new_dbuf_state);
+
+- if (IS_ALDERLAKE_P(dev_priv))
+- new_dbuf_state->joined_mbus = adlp_check_mbus_joined(new_dbuf_state->active_pipes);
+-
+ if (old_dbuf_state->enabled_slices != new_dbuf_state->enabled_slices ||
+ old_dbuf_state->joined_mbus != new_dbuf_state->joined_mbus) {
+ ret = intel_atomic_serialize_global_state(&new_dbuf_state->base);
+@@ -6609,8 +6642,12 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
+ enum pipe pipe = crtc->pipe;
+ unsigned int mbus_offset;
+ enum plane_id plane_id;
++ u8 slices;
+
+- skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
++ memset(&crtc_state->wm.skl.optimal, 0,
++ sizeof(crtc_state->wm.skl.optimal));
++ if (crtc_state->hw.active)
++ skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
+ crtc_state->wm.skl.raw = crtc_state->wm.skl.optimal;
+
+ memset(&dbuf_state->ddb[pipe], 0, sizeof(dbuf_state->ddb[pipe]));
+@@ -6621,6 +6658,9 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
+ struct skl_ddb_entry *ddb_uv =
+ &crtc_state->wm.skl.plane_ddb_uv[plane_id];
+
++ if (!crtc_state->hw.active)
++ continue;
++
+ skl_ddb_get_hw_plane_state(dev_priv, crtc->pipe,
+ plane_id, ddb_y, ddb_uv);
+
+@@ -6628,19 +6668,22 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
+ skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb_uv);
+ }
+
+- dbuf_state->slices[pipe] =
+- skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes);
+-
+ dbuf_state->weight[pipe] = intel_crtc_ddb_weight(crtc_state);
+
+ /*
+ * Used for checking overlaps, so we need absolute
+ * offsets instead of MBUS relative offsets.
+ */
+- mbus_offset = mbus_ddb_offset(dev_priv, dbuf_state->slices[pipe]);
++ slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes,
++ dbuf_state->joined_mbus);
++ mbus_offset = mbus_ddb_offset(dev_priv, slices);
+ crtc_state->wm.skl.ddb.start = mbus_offset + dbuf_state->ddb[pipe].start;
+ crtc_state->wm.skl.ddb.end = mbus_offset + dbuf_state->ddb[pipe].end;
+
++ /* The slices actually used by the planes on the pipe */
++ dbuf_state->slices[pipe] =
++ skl_ddb_dbuf_slice_mask(dev_priv, &crtc_state->wm.skl.ddb);
++
+ drm_dbg_kms(&dev_priv->drm,
+ "[CRTC:%d:%s] dbuf slices 0x%x, ddb (%d - %d), active pipes 0x%x, mbus joined: %s\n",
+ crtc->base.base.id, crtc->base.name,
+@@ -6652,6 +6695,74 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
+ dbuf_state->enabled_slices = dev_priv->dbuf.enabled_slices;
+ }
+
++static bool skl_dbuf_is_misconfigured(struct drm_i915_private *i915)
++{
++ const struct intel_dbuf_state *dbuf_state =
++ to_intel_dbuf_state(i915->dbuf.obj.state);
++ struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
++ struct intel_crtc *crtc;
++
++ for_each_intel_crtc(&i915->drm, crtc) {
++ const struct intel_crtc_state *crtc_state =
++ to_intel_crtc_state(crtc->base.state);
++
++ entries[crtc->pipe] = crtc_state->wm.skl.ddb;
++ }
++
++ for_each_intel_crtc(&i915->drm, crtc) {
++ const struct intel_crtc_state *crtc_state =
++ to_intel_crtc_state(crtc->base.state);
++ u8 slices;
++
++ slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes,
++ dbuf_state->joined_mbus);
++ if (dbuf_state->slices[crtc->pipe] & ~slices)
++ return true;
++
++ if (skl_ddb_allocation_overlaps(&crtc_state->wm.skl.ddb, entries,
++ I915_MAX_PIPES, crtc->pipe))
++ return true;
++ }
++
++ return false;
++}
++
++void skl_wm_sanitize(struct drm_i915_private *i915)
++{
++ struct intel_crtc *crtc;
++
++ /*
++ * On TGL/RKL (at least) the BIOS likes to assign the planes
++ * to the wrong DBUF slices. This will cause an infinite loop
++ * in skl_commit_modeset_enables() as it can't find a way to
++ * transition between the old bogus DBUF layout to the new
++ * proper DBUF layout without DBUF allocation overlaps between
++ * the planes (which cannot be allowed or else the hardware
++ * may hang). If we detect a bogus DBUF layout just turn off
++ * all the planes so that skl_commit_modeset_enables() can
++ * simply ignore them.
++ */
++ if (!skl_dbuf_is_misconfigured(i915))
++ return;
++
++ drm_dbg_kms(&i915->drm, "BIOS has misprogrammed the DBUF, disabling all planes\n");
++
++ for_each_intel_crtc(&i915->drm, crtc) {
++ struct intel_plane *plane = to_intel_plane(crtc->base.primary);
++ const struct intel_plane_state *plane_state =
++ to_intel_plane_state(plane->base.state);
++ struct intel_crtc_state *crtc_state =
++ to_intel_crtc_state(crtc->base.state);
++
++ if (plane_state->uapi.visible)
++ intel_plane_disable_noatomic(crtc, plane);
++
++ drm_WARN_ON(&i915->drm, crtc_state->active_planes != 0);
++
++ memset(&crtc_state->wm.skl.ddb, 0, sizeof(crtc_state->wm.skl.ddb));
++ }
++}
++
+ static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
+ {
+ struct drm_device *dev = crtc->base.dev;
+diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
+index 91f23b7f0af2e..79d89fe22d8c8 100644
+--- a/drivers/gpu/drm/i915/intel_pm.h
++++ b/drivers/gpu/drm/i915/intel_pm.h
+@@ -48,6 +48,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
+ struct skl_pipe_wm *out);
+ void g4x_wm_sanitize(struct drm_i915_private *dev_priv);
+ void vlv_wm_sanitize(struct drm_i915_private *dev_priv);
++void skl_wm_sanitize(struct drm_i915_private *dev_priv);
+ bool intel_can_enable_sagv(struct drm_i915_private *dev_priv,
+ const struct intel_bw_state *bw_state);
+ void intel_sagv_pre_plane_update(struct intel_atomic_state *state);
+diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
+index 6b38bc2811c1b..de8d0558389c4 100644
+--- a/drivers/gpu/drm/i915/intel_uncore.c
++++ b/drivers/gpu/drm/i915/intel_uncore.c
+@@ -718,7 +718,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
+ }
+
+ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
+- enum forcewake_domains fw_domains)
++ enum forcewake_domains fw_domains,
++ bool delayed)
+ {
+ struct intel_uncore_forcewake_domain *domain;
+ unsigned int tmp;
+@@ -733,7 +734,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
+ continue;
+ }
+
+- uncore->funcs.force_wake_put(uncore, domain->mask);
++ if (delayed &&
++ !(domain->uncore->fw_domains_timer & domain->mask))
++ fw_domain_arm_timer(domain);
++ else
++ uncore->funcs.force_wake_put(uncore, domain->mask);
+ }
+ }
+
+@@ -754,7 +759,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
+ return;
+
+ spin_lock_irqsave(&uncore->lock, irqflags);
+- __intel_uncore_forcewake_put(uncore, fw_domains);
++ __intel_uncore_forcewake_put(uncore, fw_domains, false);
++ spin_unlock_irqrestore(&uncore->lock, irqflags);
++}
++
++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
++ enum forcewake_domains fw_domains)
++{
++ unsigned long irqflags;
++
++ if (!uncore->funcs.force_wake_put)
++ return;
++
++ spin_lock_irqsave(&uncore->lock, irqflags);
++ __intel_uncore_forcewake_put(uncore, fw_domains, true);
+ spin_unlock_irqrestore(&uncore->lock, irqflags);
+ }
+
+@@ -796,7 +814,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
+ if (!uncore->funcs.force_wake_put)
+ return;
+
+- __intel_uncore_forcewake_put(uncore, fw_domains);
++ __intel_uncore_forcewake_put(uncore, fw_domains, false);
+ }
+
+ void assert_forcewakes_inactive(struct intel_uncore *uncore)
+diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
+index 3c0b0a8b5250d..4c63209dcf530 100644
+--- a/drivers/gpu/drm/i915/intel_uncore.h
++++ b/drivers/gpu/drm/i915/intel_uncore.h
+@@ -229,6 +229,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
+ enum forcewake_domains domains);
+ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
+ enum forcewake_domains domains);
++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
++ enum forcewake_domains domains);
+ void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
+ enum forcewake_domains fw_domains);
+
+diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
+index d67710d10615d..0e1a64b179a55 100644
+--- a/drivers/gpu/drm/i915/selftests/i915_request.c
++++ b/drivers/gpu/drm/i915/selftests/i915_request.c
+@@ -288,9 +288,18 @@ __live_request_alloc(struct intel_context *ce)
+ return intel_context_create_request(ce);
+ }
+
+-static int __igt_breadcrumbs_smoketest(void *arg)
++struct smoke_thread {
++ struct kthread_worker *worker;
++ struct kthread_work work;
++ struct smoketest *t;
++ bool stop;
++ int result;
++};
++
++static void __igt_breadcrumbs_smoketest(struct kthread_work *work)
+ {
+- struct smoketest *t = arg;
++ struct smoke_thread *thread = container_of(work, typeof(*thread), work);
++ struct smoketest *t = thread->t;
+ const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
+ const unsigned int total = 4 * t->ncontexts + 1;
+ unsigned int num_waits = 0, num_fences = 0;
+@@ -309,8 +318,10 @@ static int __igt_breadcrumbs_smoketest(void *arg)
+ */
+
+ requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
+- if (!requests)
+- return -ENOMEM;
++ if (!requests) {
++ thread->result = -ENOMEM;
++ return;
++ }
+
+ order = i915_random_order(total, &prng);
+ if (!order) {
+@@ -318,7 +329,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
+ goto out_requests;
+ }
+
+- while (!kthread_should_stop()) {
++ while (!READ_ONCE(thread->stop)) {
+ struct i915_sw_fence *submit, *wait;
+ unsigned int n, count;
+
+@@ -426,7 +437,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
+ kfree(order);
+ out_requests:
+ kfree(requests);
+- return err;
++ thread->result = err;
+ }
+
+ static int mock_breadcrumbs_smoketest(void *arg)
+@@ -439,7 +450,7 @@ static int mock_breadcrumbs_smoketest(void *arg)
+ .request_alloc = __mock_request_alloc
+ };
+ unsigned int ncpus = num_online_cpus();
+- struct task_struct **threads;
++ struct smoke_thread *threads;
+ unsigned int n;
+ int ret = 0;
+
+@@ -468,28 +479,37 @@ static int mock_breadcrumbs_smoketest(void *arg)
+ }
+
+ for (n = 0; n < ncpus; n++) {
+- threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
+- &t, "igt/%d", n);
+- if (IS_ERR(threads[n])) {
+- ret = PTR_ERR(threads[n]);
++ struct kthread_worker *worker;
++
++ worker = kthread_create_worker(0, "igt/%d", n);
++ if (IS_ERR(worker)) {
++ ret = PTR_ERR(worker);
+ ncpus = n;
+ break;
+ }
+
+- get_task_struct(threads[n]);
++ threads[n].worker = worker;
++ threads[n].t = &t;
++ threads[n].stop = false;
++ threads[n].result = 0;
++
++ kthread_init_work(&threads[n].work,
++ __igt_breadcrumbs_smoketest);
++ kthread_queue_work(worker, &threads[n].work);
+ }
+
+- yield(); /* start all threads before we begin */
+ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+ for (n = 0; n < ncpus; n++) {
+ int err;
+
+- err = kthread_stop(threads[n]);
++ WRITE_ONCE(threads[n].stop, true);
++ kthread_flush_work(&threads[n].work);
++ err = READ_ONCE(threads[n].result);
+ if (err < 0 && !ret)
+ ret = err;
+
+- put_task_struct(threads[n]);
++ kthread_destroy_worker(threads[n].worker);
+ }
+ pr_info("Completed %lu waits for %lu fence across %d cpus\n",
+ atomic_long_read(&t.num_waits),
+@@ -1291,9 +1311,18 @@ out_free:
+ return err;
+ }
+
+-static int __live_parallel_engine1(void *arg)
++struct parallel_thread {
++ struct kthread_worker *worker;
++ struct kthread_work work;
++ struct intel_engine_cs *engine;
++ int result;
++};
++
++static void __live_parallel_engine1(struct kthread_work *work)
+ {
+- struct intel_engine_cs *engine = arg;
++ struct parallel_thread *thread =
++ container_of(work, typeof(*thread), work);
++ struct intel_engine_cs *engine = thread->engine;
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+ int err = 0;
+@@ -1324,12 +1353,14 @@ static int __live_parallel_engine1(void *arg)
+ intel_engine_pm_put(engine);
+
+ pr_info("%s: %lu request + sync\n", engine->name, count);
+- return err;
++ thread->result = err;
+ }
+
+-static int __live_parallel_engineN(void *arg)
++static void __live_parallel_engineN(struct kthread_work *work)
+ {
+- struct intel_engine_cs *engine = arg;
++ struct parallel_thread *thread =
++ container_of(work, typeof(*thread), work);
++ struct intel_engine_cs *engine = thread->engine;
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+ int err = 0;
+@@ -1351,7 +1382,7 @@ static int __live_parallel_engineN(void *arg)
+ intel_engine_pm_put(engine);
+
+ pr_info("%s: %lu requests\n", engine->name, count);
+- return err;
++ thread->result = err;
+ }
+
+ static bool wake_all(struct drm_i915_private *i915)
+@@ -1377,9 +1408,11 @@ static int wait_for_all(struct drm_i915_private *i915)
+ return -ETIME;
+ }
+
+-static int __live_parallel_spin(void *arg)
++static void __live_parallel_spin(struct kthread_work *work)
+ {
+- struct intel_engine_cs *engine = arg;
++ struct parallel_thread *thread =
++ container_of(work, typeof(*thread), work);
++ struct intel_engine_cs *engine = thread->engine;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ int err = 0;
+@@ -1392,7 +1425,8 @@ static int __live_parallel_spin(void *arg)
+
+ if (igt_spinner_init(&spin, engine->gt)) {
+ wake_all(engine->i915);
+- return -ENOMEM;
++ thread->result = -ENOMEM;
++ return;
+ }
+
+ intel_engine_pm_get(engine);
+@@ -1425,22 +1459,22 @@ static int __live_parallel_spin(void *arg)
+
+ out_spin:
+ igt_spinner_fini(&spin);
+- return err;
++ thread->result = err;
+ }
+
+ static int live_parallel_engines(void *arg)
+ {
+ struct drm_i915_private *i915 = arg;
+- static int (* const func[])(void *arg) = {
++ static void (* const func[])(struct kthread_work *) = {
+ __live_parallel_engine1,
+ __live_parallel_engineN,
+ __live_parallel_spin,
+ NULL,
+ };
+ const unsigned int nengines = num_uabi_engines(i915);
++ struct parallel_thread *threads;
+ struct intel_engine_cs *engine;
+- int (* const *fn)(void *arg);
+- struct task_struct **tsk;
++ void (* const *fn)(struct kthread_work *);
+ int err = 0;
+
+ /*
+@@ -1448,8 +1482,8 @@ static int live_parallel_engines(void *arg)
+ * tests that we load up the system maximally.
+ */
+
+- tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
+- if (!tsk)
++ threads = kcalloc(nengines, sizeof(*threads), GFP_KERNEL);
++ if (!threads)
+ return -ENOMEM;
+
+ for (fn = func; !err && *fn; fn++) {
+@@ -1466,37 +1500,44 @@ static int live_parallel_engines(void *arg)
+
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+- tsk[idx] = kthread_run(*fn, engine,
+- "igt/parallel:%s",
+- engine->name);
+- if (IS_ERR(tsk[idx])) {
+- err = PTR_ERR(tsk[idx]);
++ struct kthread_worker *worker;
++
++ worker = kthread_create_worker(0, "igt/parallel:%s",
++ engine->name);
++ if (IS_ERR(worker)) {
++ err = PTR_ERR(worker);
+ break;
+ }
+- get_task_struct(tsk[idx++]);
+- }
+
+- yield(); /* start all threads before we kthread_stop() */
++ threads[idx].worker = worker;
++ threads[idx].result = 0;
++ threads[idx].engine = engine;
++
++ kthread_init_work(&threads[idx].work, *fn);
++ kthread_queue_work(worker, &threads[idx].work);
++ idx++;
++ }
+
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+ int status;
+
+- if (IS_ERR(tsk[idx]))
++ if (!threads[idx].worker)
+ break;
+
+- status = kthread_stop(tsk[idx]);
++ kthread_flush_work(&threads[idx].work);
++ status = READ_ONCE(threads[idx].result);
+ if (status && !err)
+ err = status;
+
+- put_task_struct(tsk[idx++]);
++ kthread_destroy_worker(threads[idx++].worker);
+ }
+
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ }
+
+- kfree(tsk);
++ kfree(threads);
+ return err;
+ }
+
+@@ -1544,7 +1585,7 @@ static int live_breadcrumbs_smoketest(void *arg)
+ const unsigned int ncpus = num_online_cpus();
+ unsigned long num_waits, num_fences;
+ struct intel_engine_cs *engine;
+- struct task_struct **threads;
++ struct smoke_thread *threads;
+ struct igt_live_test live;
+ intel_wakeref_t wakeref;
+ struct smoketest *smoke;
+@@ -1618,23 +1659,26 @@ static int live_breadcrumbs_smoketest(void *arg)
+ smoke[idx].max_batch, engine->name);
+
+ for (n = 0; n < ncpus; n++) {
+- struct task_struct *tsk;
++ unsigned int i = idx * ncpus + n;
++ struct kthread_worker *worker;
+
+- tsk = kthread_run(__igt_breadcrumbs_smoketest,
+- &smoke[idx], "igt/%d.%d", idx, n);
+- if (IS_ERR(tsk)) {
+- ret = PTR_ERR(tsk);
++ worker = kthread_create_worker(0, "igt/%d.%d", idx, n);
++ if (IS_ERR(worker)) {
++ ret = PTR_ERR(worker);
+ goto out_flush;
+ }
+
+- get_task_struct(tsk);
+- threads[idx * ncpus + n] = tsk;
++ threads[i].worker = worker;
++ threads[i].t = &smoke[idx];
++
++ kthread_init_work(&threads[i].work,
++ __igt_breadcrumbs_smoketest);
++ kthread_queue_work(worker, &threads[i].work);
+ }
+
+ idx++;
+ }
+
+- yield(); /* start all threads before we begin */
+ msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+ out_flush:
+@@ -1643,17 +1687,19 @@ out_flush:
+ num_fences = 0;
+ for_each_uabi_engine(engine, i915) {
+ for (n = 0; n < ncpus; n++) {
+- struct task_struct *tsk = threads[idx * ncpus + n];
++ unsigned int i = idx * ncpus + n;
+ int err;
+
+- if (!tsk)
++ if (!threads[i].worker)
+ continue;
+
+- err = kthread_stop(tsk);
++ WRITE_ONCE(threads[i].stop, true);
++ kthread_flush_work(&threads[i].work);
++ err = READ_ONCE(threads[i].result);
+ if (err < 0 && !ret)
+ ret = err;
+
+- put_task_struct(tsk);
++ kthread_destroy_worker(threads[i].worker);
+ }
+
+ num_waits += atomic_long_read(&smoke[idx].num_waits);
+@@ -2763,9 +2809,18 @@ out:
+ return err;
+ }
+
+-static int p_sync0(void *arg)
++struct p_thread {
++ struct perf_stats p;
++ struct kthread_worker *worker;
++ struct kthread_work work;
++ struct intel_engine_cs *engine;
++ int result;
++};
++
++static void p_sync0(struct kthread_work *work)
+ {
+- struct perf_stats *p = arg;
++ struct p_thread *thread = container_of(work, typeof(*thread), work);
++ struct perf_stats *p = &thread->p;
+ struct intel_engine_cs *engine = p->engine;
+ struct intel_context *ce;
+ IGT_TIMEOUT(end_time);
+@@ -2774,13 +2829,16 @@ static int p_sync0(void *arg)
+ int err = 0;
+
+ ce = intel_context_create(engine);
+- if (IS_ERR(ce))
+- return PTR_ERR(ce);
++ if (IS_ERR(ce)) {
++ thread->result = PTR_ERR(ce);
++ return;
++ }
+
+ err = intel_context_pin(ce);
+ if (err) {
+ intel_context_put(ce);
+- return err;
++ thread->result = err;
++ return;
+ }
+
+ if (intel_engine_supports_stats(engine)) {
+@@ -2830,12 +2888,13 @@ static int p_sync0(void *arg)
+
+ intel_context_unpin(ce);
+ intel_context_put(ce);
+- return err;
++ thread->result = err;
+ }
+
+-static int p_sync1(void *arg)
++static void p_sync1(struct kthread_work *work)
+ {
+- struct perf_stats *p = arg;
++ struct p_thread *thread = container_of(work, typeof(*thread), work);
++ struct perf_stats *p = &thread->p;
+ struct intel_engine_cs *engine = p->engine;
+ struct i915_request *prev = NULL;
+ struct intel_context *ce;
+@@ -2845,13 +2904,16 @@ static int p_sync1(void *arg)
+ int err = 0;
+
+ ce = intel_context_create(engine);
+- if (IS_ERR(ce))
+- return PTR_ERR(ce);
++ if (IS_ERR(ce)) {
++ thread->result = PTR_ERR(ce);
++ return;
++ }
+
+ err = intel_context_pin(ce);
+ if (err) {
+ intel_context_put(ce);
+- return err;
++ thread->result = err;
++ return;
+ }
+
+ if (intel_engine_supports_stats(engine)) {
+@@ -2903,12 +2965,13 @@ static int p_sync1(void *arg)
+
+ intel_context_unpin(ce);
+ intel_context_put(ce);
+- return err;
++ thread->result = err;
+ }
+
+-static int p_many(void *arg)
++static void p_many(struct kthread_work *work)
+ {
+- struct perf_stats *p = arg;
++ struct p_thread *thread = container_of(work, typeof(*thread), work);
++ struct perf_stats *p = &thread->p;
+ struct intel_engine_cs *engine = p->engine;
+ struct intel_context *ce;
+ IGT_TIMEOUT(end_time);
+@@ -2917,13 +2980,16 @@ static int p_many(void *arg)
+ bool busy;
+
+ ce = intel_context_create(engine);
+- if (IS_ERR(ce))
+- return PTR_ERR(ce);
++ if (IS_ERR(ce)) {
++ thread->result = PTR_ERR(ce);
++ return;
++ }
+
+ err = intel_context_pin(ce);
+ if (err) {
+ intel_context_put(ce);
+- return err;
++ thread->result = err;
++ return;
+ }
+
+ if (intel_engine_supports_stats(engine)) {
+@@ -2964,26 +3030,23 @@ static int p_many(void *arg)
+
+ intel_context_unpin(ce);
+ intel_context_put(ce);
+- return err;
++ thread->result = err;
+ }
+
+ static int perf_parallel_engines(void *arg)
+ {
+ struct drm_i915_private *i915 = arg;
+- static int (* const func[])(void *arg) = {
++ static void (* const func[])(struct kthread_work *) = {
+ p_sync0,
+ p_sync1,
+ p_many,
+ NULL,
+ };
+ const unsigned int nengines = num_uabi_engines(i915);
++ void (* const *fn)(struct kthread_work *);
+ struct intel_engine_cs *engine;
+- int (* const *fn)(void *arg);
+ struct pm_qos_request qos;
+- struct {
+- struct perf_stats p;
+- struct task_struct *tsk;
+- } *engines;
++ struct p_thread *engines;
+ int err = 0;
+
+ engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+@@ -3006,36 +3069,45 @@ static int perf_parallel_engines(void *arg)
+
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
++ struct kthread_worker *worker;
++
+ intel_engine_pm_get(engine);
+
+ memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+- engines[idx].p.engine = engine;
+
+- engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+- "igt:%s", engine->name);
+- if (IS_ERR(engines[idx].tsk)) {
+- err = PTR_ERR(engines[idx].tsk);
++ worker = kthread_create_worker(0, "igt:%s",
++ engine->name);
++ if (IS_ERR(worker)) {
++ err = PTR_ERR(worker);
+ intel_engine_pm_put(engine);
+ break;
+ }
+- get_task_struct(engines[idx++].tsk);
+- }
++ engines[idx].worker = worker;
++ engines[idx].result = 0;
++ engines[idx].p.engine = engine;
++ engines[idx].engine = engine;
+
+- yield(); /* start all threads before we kthread_stop() */
++ kthread_init_work(&engines[idx].work, *fn);
++ kthread_queue_work(worker, &engines[idx].work);
++ idx++;
++ }
+
+ idx = 0;
+ for_each_uabi_engine(engine, i915) {
+ int status;
+
+- if (IS_ERR(engines[idx].tsk))
++ if (!engines[idx].worker)
+ break;
+
+- status = kthread_stop(engines[idx].tsk);
++ kthread_flush_work(&engines[idx].work);
++ status = READ_ONCE(engines[idx].result);
+ if (status && !err)
+ err = status;
+
+ intel_engine_pm_put(engine);
+- put_task_struct(engines[idx++].tsk);
++
++ kthread_destroy_worker(engines[idx].worker);
++ idx++;
+ }
+
+ if (igt_live_test_end(&t))
+diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
+index 4b328346b48a2..83ffd175ca894 100644
+--- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
++++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
+@@ -16,8 +16,7 @@
+
+ int intel_selftest_modify_policy(struct intel_engine_cs *engine,
+ struct intel_selftest_saved_policy *saved,
+- u32 modify_type)
+-
++ enum selftest_scheduler_modify modify_type)
+ {
+ int err;
+
+diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c
+index c849533ca83e3..3f5750cc2673e 100644
+--- a/drivers/gpu/drm/imx/dcss/dcss-dev.c
++++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c
+@@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output)
+
+ ret = dcss_submodules_init(dcss);
+ if (ret) {
++ of_node_put(dcss->of_port);
+ dev_err(dev, "submodules initialization failed\n");
+ goto clks_err;
+ }
+@@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss)
+ dcss_clocks_disable(dcss);
+ }
+
++ of_node_put(dcss->of_port);
++
+ pm_runtime_disable(dcss->dev);
+
+ dcss_submodules_stop(dcss);
+diff --git a/drivers/gpu/drm/imx/dcss/dcss-kms.c b/drivers/gpu/drm/imx/dcss/dcss-kms.c
+index 9b84df34a6a12..8cf3352d88582 100644
+--- a/drivers/gpu/drm/imx/dcss/dcss-kms.c
++++ b/drivers/gpu/drm/imx/dcss/dcss-kms.c
+@@ -142,8 +142,6 @@ struct dcss_kms_dev *dcss_kms_attach(struct dcss_dev *dcss)
+
+ drm_kms_helper_poll_init(drm);
+
+- drm_bridge_connector_enable_hpd(kms->connector);
+-
+ ret = drm_dev_register(drm, 0);
+ if (ret)
+ goto cleanup_crtc;
+diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
+index 87428fb23d9ff..a2277a0d6d06f 100644
+--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
++++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
+@@ -222,6 +222,7 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev)
+ struct device_node *np = pdev->dev.of_node;
+ const struct of_device_id *match = of_match_node(dw_hdmi_imx_dt_ids, np);
+ struct imx_hdmi *hdmi;
++ int ret;
+
+ hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
+ if (!hdmi)
+@@ -243,10 +244,15 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev)
+ hdmi->bridge = of_drm_find_bridge(np);
+ if (!hdmi->bridge) {
+ dev_err(hdmi->dev, "Unable to find bridge\n");
++ dw_hdmi_remove(hdmi->hdmi);
+ return -ENODEV;
+ }
+
+- return component_add(&pdev->dev, &dw_hdmi_imx_ops);
++ ret = component_add(&pdev->dev, &dw_hdmi_imx_ops);
++ if (ret)
++ dw_hdmi_remove(hdmi->hdmi);
++
++ return ret;
+ }
+
+ static int dw_hdmi_imx_remove(struct platform_device *pdev)
+diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
+index 9558e9e1b431b..cb685fe2039b4 100644
+--- a/drivers/gpu/drm/imx/imx-drm-core.c
++++ b/drivers/gpu/drm/imx/imx-drm-core.c
+@@ -81,7 +81,6 @@ static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state)
+ struct drm_plane_state *old_plane_state, *new_plane_state;
+ bool plane_disabling = false;
+ int i;
+- bool fence_cookie = dma_fence_begin_signalling();
+
+ drm_atomic_helper_commit_modeset_disables(dev, state);
+
+@@ -112,7 +111,6 @@ static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state)
+ }
+
+ drm_atomic_helper_commit_hw_done(state);
+- dma_fence_end_signalling(fence_cookie);
+ }
+
+ static const struct drm_mode_config_helper_funcs imx_drm_mode_config_helpers = {
+diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
+index e5078d03020d9..fb0e951248f68 100644
+--- a/drivers/gpu/drm/imx/imx-ldb.c
++++ b/drivers/gpu/drm/imx/imx-ldb.c
+@@ -572,6 +572,8 @@ static int imx_ldb_panel_ddc(struct device *dev,
+ edidp = of_get_property(child, "edid", &edid_len);
+ if (edidp) {
+ channel->edid = kmemdup(edidp, edid_len, GFP_KERNEL);
++ if (!channel->edid)
++ return -ENOMEM;
+ } else if (!channel->panel) {
+ /* fallback to display-timings node */
+ ret = of_get_drm_display_mode(child,
+diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
+index bc8c3f802a152..fbfb7adead0b3 100644
+--- a/drivers/gpu/drm/imx/imx-tve.c
++++ b/drivers/gpu/drm/imx/imx-tve.c
+@@ -217,8 +217,9 @@ static int imx_tve_connector_get_modes(struct drm_connector *connector)
+ return ret;
+ }
+
+-static int imx_tve_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
++static enum drm_mode_status
++imx_tve_connector_mode_valid(struct drm_connector *connector,
++ struct drm_display_mode *mode)
+ {
+ struct imx_tve *tve = con_to_tve(connector);
+ unsigned long rate;
+diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
+index 9c8829f945b23..ba5b16618c237 100644
+--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
++++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
+@@ -69,7 +69,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc,
+ drm_atomic_crtc_state_for_each_plane(plane, old_crtc_state) {
+ if (plane == &ipu_crtc->plane[0]->base)
+ disable_full = true;
+- if (&ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base)
++ if (ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base)
+ disable_partial = true;
+ }
+
+@@ -311,7 +311,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
+ dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n",
+ sig_cfg.mode.hactive, new_hactive);
+
+- sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive;
++ sig_cfg.mode.hfront_porch -= new_hactive - sig_cfg.mode.hactive;
+ sig_cfg.mode.hactive = new_hactive;
+ }
+
+diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
+index 846c1aae69c82..924a66f539511 100644
+--- a/drivers/gpu/drm/imx/ipuv3-plane.c
++++ b/drivers/gpu/drm/imx/ipuv3-plane.c
+@@ -619,6 +619,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+ break;
+ }
+
++ if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG)
++ width = ipu_src_rect_width(new_state);
++ else
++ width = drm_rect_width(&new_state->src) >> 16;
++
+ eba = drm_plane_state_to_eba(new_state, 0);
+
+ /*
+@@ -627,8 +632,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+ */
+ if (ipu_state->use_pre) {
+ axi_id = ipu_chan_assign_axi_id(ipu_plane->dma);
+- ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id,
+- ipu_src_rect_width(new_state),
++ ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width,
+ drm_rect_height(&new_state->src) >> 16,
+ fb->pitches[0], fb->format->format,
+ fb->modifier, &eba);
+@@ -683,9 +687,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+ break;
+ }
+
+- ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8));
++ ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width);
+
+- width = ipu_src_rect_width(new_state);
+ height = drm_rect_height(&new_state->src) >> 16;
+ info = drm_format_info(fb->format->format);
+ ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0],
+@@ -749,8 +752,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
+ ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16);
+
+ ipu_cpmem_zero(ipu_plane->alpha_ch);
+- ipu_cpmem_set_resolution(ipu_plane->alpha_ch,
+- ipu_src_rect_width(new_state),
++ ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width,
+ drm_rect_height(&new_state->src) >> 16);
+ ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8);
+ ipu_cpmem_set_high_priority(ipu_plane->alpha_ch);
+diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
+index a8aba0141ce71..63ba2ad846791 100644
+--- a/drivers/gpu/drm/imx/parallel-display.c
++++ b/drivers/gpu/drm/imx/parallel-display.c
+@@ -75,8 +75,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
+ ret = of_get_drm_display_mode(np, &imxpd->mode,
+ &imxpd->bus_flags,
+ OF_USE_NATIVE_MODE);
+- if (ret)
++ if (ret) {
++ drm_mode_destroy(connector->dev, mode);
+ return ret;
++ }
+
+ drm_mode_copy(mode, &imxpd->mode);
+ mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+@@ -217,14 +219,6 @@ static int imx_pd_bridge_atomic_check(struct drm_bridge *bridge,
+ if (!imx_pd_format_supported(bus_fmt))
+ return -EINVAL;
+
+- if (bus_flags &
+- ~(DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_DE_HIGH |
+- DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE |
+- DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)) {
+- dev_warn(imxpd->dev, "invalid bus_flags (%x)\n", bus_flags);
+- return -EINVAL;
+- }
+-
+ bridge_state->output_bus_cfg.flags = bus_flags;
+ bridge_state->input_bus_cfg.flags = bus_flags;
+ imx_crtc_state->bus_flags = bus_flags;
+diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+index a5df1c8d34cde..d9231b89d73e8 100644
+--- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
++++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+@@ -1326,7 +1326,11 @@ static int ingenic_drm_init(void)
+ return err;
+ }
+
+- return platform_driver_register(&ingenic_drm_driver);
++ err = platform_driver_register(&ingenic_drm_driver);
++ if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && err)
++ platform_driver_unregister(ingenic_ipu_driver_ptr);
++
++ return err;
+ }
+ module_init(ingenic_drm_init);
+
+diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c
+index 00404ba4126dd..2735b8eb35376 100644
+--- a/drivers/gpu/drm/kmb/kmb_plane.c
++++ b/drivers/gpu/drm/kmb/kmb_plane.c
+@@ -158,12 +158,6 @@ static void kmb_plane_atomic_disable(struct drm_plane *plane,
+ case LAYER_1:
+ kmb->plane_status[plane_id].ctrl = LCD_CTRL_VL2_ENABLE;
+ break;
+- case LAYER_2:
+- kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL1_ENABLE;
+- break;
+- case LAYER_3:
+- kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL2_ENABLE;
+- break;
+ }
+
+ kmb->plane_status[plane_id].disable = true;
+diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
+index 65fdca366e41f..36c9905894278 100644
+--- a/drivers/gpu/drm/lima/lima_device.c
++++ b/drivers/gpu/drm/lima/lima_device.c
+@@ -357,6 +357,7 @@ int lima_device_init(struct lima_device *ldev)
+ int err, i;
+
+ dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32));
++ dma_set_max_seg_size(ldev->dev, UINT_MAX);
+
+ err = lima_clk_init(ldev);
+ if (err)
+diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
+index 7b8d7178d09aa..39cab4a55f572 100644
+--- a/drivers/gpu/drm/lima/lima_drv.c
++++ b/drivers/gpu/drm/lima/lima_drv.c
+@@ -392,8 +392,10 @@ static int lima_pdev_probe(struct platform_device *pdev)
+
+ /* Allocate and initialize the DRM device. */
+ ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev);
+- if (IS_ERR(ddev))
+- return PTR_ERR(ddev);
++ if (IS_ERR(ddev)) {
++ err = PTR_ERR(ddev);
++ goto err_out0;
++ }
+
+ ddev->dev_private = ldev;
+ ldev->ddev = ddev;
+diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
+index de62966243cd5..09ea621a4806c 100644
+--- a/drivers/gpu/drm/lima/lima_gem.c
++++ b/drivers/gpu/drm/lima/lima_gem.c
+@@ -127,7 +127,7 @@ int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
+ if (err)
+ goto out;
+ } else {
+- struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(obj);
++ struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(shmem);
+
+ if (IS_ERR(sgt)) {
+ err = PTR_ERR(sgt);
+@@ -151,7 +151,7 @@ static void lima_gem_free_object(struct drm_gem_object *obj)
+ if (!list_empty(&bo->va))
+ dev_err(obj->dev->dev, "lima gem free bo still has va\n");
+
+- drm_gem_shmem_free_object(obj);
++ drm_gem_shmem_free(&bo->base);
+ }
+
+ static int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file)
+@@ -179,7 +179,7 @@ static int lima_gem_pin(struct drm_gem_object *obj)
+ if (bo->heap_size)
+ return -EINVAL;
+
+- return drm_gem_shmem_pin(obj);
++ return drm_gem_shmem_pin(&bo->base);
+ }
+
+ static int lima_gem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+@@ -189,7 +189,7 @@ static int lima_gem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+ if (bo->heap_size)
+ return -EINVAL;
+
+- return drm_gem_shmem_vmap(obj, map);
++ return drm_gem_shmem_vmap(&bo->base, map);
+ }
+
+ static int lima_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+@@ -199,19 +199,19 @@ static int lima_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+ if (bo->heap_size)
+ return -EINVAL;
+
+- return drm_gem_shmem_mmap(obj, vma);
++ return drm_gem_shmem_mmap(&bo->base, vma);
+ }
+
+ static const struct drm_gem_object_funcs lima_gem_funcs = {
+ .free = lima_gem_free_object,
+ .open = lima_gem_object_open,
+ .close = lima_gem_object_close,
+- .print_info = drm_gem_shmem_print_info,
++ .print_info = drm_gem_shmem_object_print_info,
+ .pin = lima_gem_pin,
+- .unpin = drm_gem_shmem_unpin,
+- .get_sg_table = drm_gem_shmem_get_sg_table,
++ .unpin = drm_gem_shmem_object_unpin,
++ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = lima_gem_vmap,
+- .vunmap = drm_gem_shmem_vunmap,
++ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = lima_gem_mmap,
+ };
+
+diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
+index dba8329937a32..2e817dbdcad75 100644
+--- a/drivers/gpu/drm/lima/lima_sched.c
++++ b/drivers/gpu/drm/lima/lima_sched.c
+@@ -390,7 +390,7 @@ static void lima_sched_build_error_task_list(struct lima_sched_task *task)
+ } else {
+ buffer_chunk->size = lima_bo_size(bo);
+
+- ret = drm_gem_shmem_vmap(&bo->base.base, &map);
++ ret = drm_gem_shmem_vmap(&bo->base, &map);
+ if (ret) {
+ kvfree(et);
+ goto out;
+@@ -398,7 +398,7 @@ static void lima_sched_build_error_task_list(struct lima_sched_task *task)
+
+ memcpy(buffer_chunk + 1, map.vaddr, buffer_chunk->size);
+
+- drm_gem_shmem_vunmap(&bo->base.base, &map);
++ drm_gem_shmem_vunmap(&bo->base, &map);
+ }
+
+ buffer_chunk = (void *)(buffer_chunk + 1) + buffer_chunk->size;
+diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c
+index 180ebbccbedaf..0b58d7f4ba780 100644
+--- a/drivers/gpu/drm/mcde/mcde_dsi.c
++++ b/drivers/gpu/drm/mcde/mcde_dsi.c
+@@ -1111,6 +1111,7 @@ static int mcde_dsi_bind(struct device *dev, struct device *master,
+ bridge = of_drm_find_bridge(child);
+ if (!bridge) {
+ dev_err(dev, "failed to find bridge\n");
++ of_node_put(child);
+ return -EINVAL;
+ }
+ }
+diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c
+index e9cef5c0c8f7e..cdfa648910b23 100644
+--- a/drivers/gpu/drm/mediatek/mtk_cec.c
++++ b/drivers/gpu/drm/mediatek/mtk_cec.c
+@@ -85,7 +85,7 @@ static void mtk_cec_mask(struct mtk_cec *cec, unsigned int offset,
+ u32 tmp = readl(cec->regs + offset) & ~mask;
+
+ tmp |= val & mask;
+- writel(val, cec->regs + offset);
++ writel(tmp, cec->regs + offset);
+ }
+
+ void mtk_cec_set_hpd_event(struct device *dev,
+diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+index 86c3068894b11..974462831133b 100644
+--- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h
++++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h
+@@ -76,9 +76,11 @@ void mtk_ovl_layer_off(struct device *dev, unsigned int idx,
+ void mtk_ovl_start(struct device *dev);
+ void mtk_ovl_stop(struct device *dev);
+ unsigned int mtk_ovl_supported_rotations(struct device *dev);
+-void mtk_ovl_enable_vblank(struct device *dev,
+- void (*vblank_cb)(void *),
+- void *vblank_cb_data);
++void mtk_ovl_register_vblank_cb(struct device *dev,
++ void (*vblank_cb)(void *),
++ void *vblank_cb_data);
++void mtk_ovl_unregister_vblank_cb(struct device *dev);
++void mtk_ovl_enable_vblank(struct device *dev);
+ void mtk_ovl_disable_vblank(struct device *dev);
+
+ void mtk_rdma_bypass_shadow(struct device *dev);
+@@ -93,9 +95,11 @@ void mtk_rdma_layer_config(struct device *dev, unsigned int idx,
+ struct cmdq_pkt *cmdq_pkt);
+ void mtk_rdma_start(struct device *dev);
+ void mtk_rdma_stop(struct device *dev);
+-void mtk_rdma_enable_vblank(struct device *dev,
+- void (*vblank_cb)(void *),
+- void *vblank_cb_data);
++void mtk_rdma_register_vblank_cb(struct device *dev,
++ void (*vblank_cb)(void *),
++ void *vblank_cb_data);
++void mtk_rdma_unregister_vblank_cb(struct device *dev);
++void mtk_rdma_enable_vblank(struct device *dev);
+ void mtk_rdma_disable_vblank(struct device *dev);
+
+ #endif
+diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+index 5326989d52061..411cf0f216611 100644
+--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
++++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+@@ -96,14 +96,28 @@ static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void *dev_id)
+ return IRQ_HANDLED;
+ }
+
+-void mtk_ovl_enable_vblank(struct device *dev,
+- void (*vblank_cb)(void *),
+- void *vblank_cb_data)
++void mtk_ovl_register_vblank_cb(struct device *dev,
++ void (*vblank_cb)(void *),
++ void *vblank_cb_data)
+ {
+ struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
+
+ ovl->vblank_cb = vblank_cb;
+ ovl->vblank_cb_data = vblank_cb_data;
++}
++
++void mtk_ovl_unregister_vblank_cb(struct device *dev)
++{
++ struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
++
++ ovl->vblank_cb = NULL;
++ ovl->vblank_cb_data = NULL;
++}
++
++void mtk_ovl_enable_vblank(struct device *dev)
++{
++ struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
++
+ writel(0x0, ovl->regs + DISP_REG_OVL_INTSTA);
+ writel_relaxed(OVL_FME_CPL_INT, ovl->regs + DISP_REG_OVL_INTEN);
+ }
+@@ -112,8 +126,6 @@ void mtk_ovl_disable_vblank(struct device *dev)
+ {
+ struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
+
+- ovl->vblank_cb = NULL;
+- ovl->vblank_cb_data = NULL;
+ writel_relaxed(0x0, ovl->regs + DISP_REG_OVL_INTEN);
+ }
+
+diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
+index 75d7f45579e26..a6a6cb5f75af7 100644
+--- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
++++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
+@@ -94,24 +94,32 @@ static void rdma_update_bits(struct device *dev, unsigned int reg,
+ writel(tmp, rdma->regs + reg);
+ }
+
+-void mtk_rdma_enable_vblank(struct device *dev,
+- void (*vblank_cb)(void *),
+- void *vblank_cb_data)
++void mtk_rdma_register_vblank_cb(struct device *dev,
++ void (*vblank_cb)(void *),
++ void *vblank_cb_data)
+ {
+ struct mtk_disp_rdma *rdma = dev_get_drvdata(dev);
+
+ rdma->vblank_cb = vblank_cb;
+ rdma->vblank_cb_data = vblank_cb_data;
+- rdma_update_bits(dev, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT,
+- RDMA_FRAME_END_INT);
+ }
+
+-void mtk_rdma_disable_vblank(struct device *dev)
++void mtk_rdma_unregister_vblank_cb(struct device *dev)
+ {
+ struct mtk_disp_rdma *rdma = dev_get_drvdata(dev);
+
+ rdma->vblank_cb = NULL;
+ rdma->vblank_cb_data = NULL;
++}
++
++void mtk_rdma_enable_vblank(struct device *dev)
++{
++ rdma_update_bits(dev, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT,
++ RDMA_FRAME_END_INT);
++}
++
++void mtk_rdma_disable_vblank(struct device *dev)
++{
+ rdma_update_bits(dev, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT, 0);
+ }
+
+diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
+index 4554e2de14309..94c6bd3b00823 100644
+--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
++++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
+@@ -54,13 +54,7 @@ enum mtk_dpi_out_channel_swap {
+ };
+
+ enum mtk_dpi_out_color_format {
+- MTK_DPI_COLOR_FORMAT_RGB,
+- MTK_DPI_COLOR_FORMAT_RGB_FULL,
+- MTK_DPI_COLOR_FORMAT_YCBCR_444,
+- MTK_DPI_COLOR_FORMAT_YCBCR_422,
+- MTK_DPI_COLOR_FORMAT_XV_YCC,
+- MTK_DPI_COLOR_FORMAT_YCBCR_444_FULL,
+- MTK_DPI_COLOR_FORMAT_YCBCR_422_FULL
++ MTK_DPI_COLOR_FORMAT_RGB
+ };
+
+ struct mtk_dpi {
+@@ -364,24 +358,11 @@ static void mtk_dpi_config_disable_edge(struct mtk_dpi *dpi)
+ static void mtk_dpi_config_color_format(struct mtk_dpi *dpi,
+ enum mtk_dpi_out_color_format format)
+ {
+- if ((format == MTK_DPI_COLOR_FORMAT_YCBCR_444) ||
+- (format == MTK_DPI_COLOR_FORMAT_YCBCR_444_FULL)) {
+- mtk_dpi_config_yuv422_enable(dpi, false);
+- mtk_dpi_config_csc_enable(dpi, true);
+- mtk_dpi_config_swap_input(dpi, false);
+- mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_BGR);
+- } else if ((format == MTK_DPI_COLOR_FORMAT_YCBCR_422) ||
+- (format == MTK_DPI_COLOR_FORMAT_YCBCR_422_FULL)) {
+- mtk_dpi_config_yuv422_enable(dpi, true);
+- mtk_dpi_config_csc_enable(dpi, true);
+- mtk_dpi_config_swap_input(dpi, true);
+- mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_RGB);
+- } else {
+- mtk_dpi_config_yuv422_enable(dpi, false);
+- mtk_dpi_config_csc_enable(dpi, false);
+- mtk_dpi_config_swap_input(dpi, false);
+- mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_RGB);
+- }
++ /* only support RGB888 */
++ mtk_dpi_config_yuv422_enable(dpi, false);
++ mtk_dpi_config_csc_enable(dpi, false);
++ mtk_dpi_config_swap_input(dpi, false);
++ mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_RGB);
+ }
+
+ static void mtk_dpi_dual_edge(struct mtk_dpi *dpi)
+@@ -406,9 +387,6 @@ static void mtk_dpi_power_off(struct mtk_dpi *dpi)
+ if (--dpi->refcount != 0)
+ return;
+
+- if (dpi->pinctrl && dpi->pins_gpio)
+- pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio);
+-
+ mtk_dpi_disable(dpi);
+ clk_disable_unprepare(dpi->pixel_clk);
+ clk_disable_unprepare(dpi->engine_clk);
+@@ -433,10 +411,6 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi)
+ goto err_pixel;
+ }
+
+- if (dpi->pinctrl && dpi->pins_dpi)
+- pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi);
+-
+- mtk_dpi_enable(dpi);
+ return 0;
+
+ err_pixel:
+@@ -650,14 +624,21 @@ static void mtk_dpi_bridge_disable(struct drm_bridge *bridge)
+ struct mtk_dpi *dpi = bridge_to_dpi(bridge);
+
+ mtk_dpi_power_off(dpi);
++
++ if (dpi->pinctrl && dpi->pins_gpio)
++ pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio);
+ }
+
+ static void mtk_dpi_bridge_enable(struct drm_bridge *bridge)
+ {
+ struct mtk_dpi *dpi = bridge_to_dpi(bridge);
+
++ if (dpi->pinctrl && dpi->pins_dpi)
++ pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi);
++
+ mtk_dpi_power_on(dpi);
+ mtk_dpi_set_display_mode(dpi, &dpi->mode);
++ mtk_dpi_enable(dpi);
+ }
+
+ static enum drm_mode_status
+@@ -819,8 +800,8 @@ static const struct mtk_dpi_conf mt8192_conf = {
+ .cal_factor = mt8183_calculate_factor,
+ .reg_h_fre_con = 0xe0,
+ .max_clock_khz = 150000,
+- .output_fmts = mt8173_output_fmts,
+- .num_output_fmts = ARRAY_SIZE(mt8173_output_fmts),
++ .output_fmts = mt8183_output_fmts,
++ .num_output_fmts = ARRAY_SIZE(mt8183_output_fmts),
+ };
+
+ static int mtk_dpi_probe(struct platform_device *pdev)
+diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+index a4e80e4996748..0b93013061e6a 100644
+--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
++++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+@@ -4,6 +4,8 @@
+ */
+
+ #include <linux/clk.h>
++#include <linux/dma-mapping.h>
++#include <linux/mailbox_controller.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/soc/mediatek/mtk-cmdq.h>
+ #include <linux/soc/mediatek/mtk-mmsys.h>
+@@ -50,8 +52,10 @@ struct mtk_drm_crtc {
+ bool pending_async_planes;
+
+ #if IS_REACHABLE(CONFIG_MTK_CMDQ)
+- struct cmdq_client *cmdq_client;
++ struct cmdq_client cmdq_client;
++ struct cmdq_pkt cmdq_handle;
+ u32 cmdq_event;
++ u32 cmdq_vblank_cnt;
+ #endif
+
+ struct device *mmsys_dev;
+@@ -104,11 +108,60 @@ static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc)
+ }
+ }
+
++#if IS_REACHABLE(CONFIG_MTK_CMDQ)
++static int mtk_drm_cmdq_pkt_create(struct cmdq_client *client, struct cmdq_pkt *pkt,
++ size_t size)
++{
++ struct device *dev;
++ dma_addr_t dma_addr;
++
++ pkt->va_base = kzalloc(size, GFP_KERNEL);
++ if (!pkt->va_base)
++ return -ENOMEM;
++
++ pkt->buf_size = size;
++ pkt->cl = (void *)client;
++
++ dev = client->chan->mbox->dev;
++ dma_addr = dma_map_single(dev, pkt->va_base, pkt->buf_size,
++ DMA_TO_DEVICE);
++ if (dma_mapping_error(dev, dma_addr)) {
++ dev_err(dev, "dma map failed, size=%u\n", (u32)(u64)size);
++ kfree(pkt->va_base);
++ return -ENOMEM;
++ }
++
++ pkt->pa_base = dma_addr;
++
++ return 0;
++}
++
++static void mtk_drm_cmdq_pkt_destroy(struct cmdq_pkt *pkt)
++{
++ struct cmdq_client *client = (struct cmdq_client *)pkt->cl;
++
++ dma_unmap_single(client->chan->mbox->dev, pkt->pa_base, pkt->buf_size,
++ DMA_TO_DEVICE);
++ kfree(pkt->va_base);
++}
++#endif
++
+ static void mtk_drm_crtc_destroy(struct drm_crtc *crtc)
+ {
+ struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
++ int i;
+
+ mtk_mutex_put(mtk_crtc->mutex);
++#if IS_REACHABLE(CONFIG_MTK_CMDQ)
++ mtk_drm_cmdq_pkt_destroy(&mtk_crtc->cmdq_handle);
++#endif
++
++ for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
++ struct mtk_ddp_comp *comp;
++
++ comp = mtk_crtc->ddp_comp[i];
++ mtk_ddp_comp_unregister_vblank_cb(comp);
++ }
+
+ drm_crtc_cleanup(crtc);
+ }
+@@ -222,9 +275,12 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc,
+ }
+
+ #if IS_REACHABLE(CONFIG_MTK_CMDQ)
+-static void ddp_cmdq_cb(struct cmdq_cb_data data)
++static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg)
+ {
+- cmdq_pkt_destroy(data.data);
++ struct cmdq_client *cmdq_cl = container_of(cl, struct cmdq_client, client);
++ struct mtk_drm_crtc *mtk_crtc = container_of(cmdq_cl, struct mtk_drm_crtc, cmdq_client);
++
++ mtk_crtc->cmdq_vblank_cnt = 0;
+ }
+ #endif
+
+@@ -430,7 +486,7 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc,
+ bool needs_vblank)
+ {
+ #if IS_REACHABLE(CONFIG_MTK_CMDQ)
+- struct cmdq_pkt *cmdq_handle;
++ struct cmdq_pkt *cmdq_handle = &mtk_crtc->cmdq_handle;
+ #endif
+ struct drm_crtc *crtc = &mtk_crtc->base;
+ struct mtk_drm_private *priv = crtc->dev->dev_private;
+@@ -468,14 +524,28 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc,
+ mtk_mutex_release(mtk_crtc->mutex);
+ }
+ #if IS_REACHABLE(CONFIG_MTK_CMDQ)
+- if (mtk_crtc->cmdq_client) {
+- mbox_flush(mtk_crtc->cmdq_client->chan, 2000);
+- cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE);
++ if (mtk_crtc->cmdq_client.chan) {
++ mbox_flush(mtk_crtc->cmdq_client.chan, 2000);
++ cmdq_handle->cmd_buf_size = 0;
+ cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event);
+ cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event, false);
+ mtk_crtc_ddp_config(crtc, cmdq_handle);
+ cmdq_pkt_finalize(cmdq_handle);
+- cmdq_pkt_flush_async(cmdq_handle, ddp_cmdq_cb, cmdq_handle);
++ dma_sync_single_for_device(mtk_crtc->cmdq_client.chan->mbox->dev,
++ cmdq_handle->pa_base,
++ cmdq_handle->cmd_buf_size,
++ DMA_TO_DEVICE);
++ /*
++ * CMDQ command should execute in next 3 vblank.
++ * One vblank interrupt before send message (occasionally)
++ * and one vblank interrupt after cmdq done,
++ * so it's timeout after 3 vblank interrupt.
++ * If it fail to execute in next 3 vblank, timeout happen.
++ */
++ mtk_crtc->cmdq_vblank_cnt = 3;
++
++ mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle);
++ mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0);
+ }
+ #endif
+ mtk_crtc->config_updating = false;
+@@ -489,12 +559,15 @@ static void mtk_crtc_ddp_irq(void *data)
+ struct mtk_drm_private *priv = crtc->dev->dev_private;
+
+ #if IS_REACHABLE(CONFIG_MTK_CMDQ)
+- if (!priv->data->shadow_register && !mtk_crtc->cmdq_client)
++ if (!priv->data->shadow_register && !mtk_crtc->cmdq_client.chan)
++ mtk_crtc_ddp_config(crtc, NULL);
++ else if (mtk_crtc->cmdq_vblank_cnt > 0 && --mtk_crtc->cmdq_vblank_cnt == 0)
++ DRM_ERROR("mtk_crtc %d CMDQ execute command timeout!\n",
++ drm_crtc_index(&mtk_crtc->base));
+ #else
+ if (!priv->data->shadow_register)
+-#endif
+ mtk_crtc_ddp_config(crtc, NULL);
+-
++#endif
+ mtk_drm_finish_page_flip(mtk_crtc);
+ }
+
+@@ -503,7 +576,7 @@ static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc)
+ struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+ struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0];
+
+- mtk_ddp_comp_enable_vblank(comp, mtk_crtc_ddp_irq, &mtk_crtc->base);
++ mtk_ddp_comp_enable_vblank(comp);
+
+ return 0;
+ }
+@@ -803,6 +876,9 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
+ if (comp->funcs->ctm_set)
+ has_ctm = true;
+ }
++
++ mtk_ddp_comp_register_vblank_cb(comp, mtk_crtc_ddp_irq,
++ &mtk_crtc->base);
+ }
+
+ for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
+@@ -810,6 +886,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
+
+ mtk_crtc->planes = devm_kcalloc(dev, num_comp_planes,
+ sizeof(struct drm_plane), GFP_KERNEL);
++ if (!mtk_crtc->planes)
++ return -ENOMEM;
+
+ for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
+ ret = mtk_drm_crtc_init_comp_planes(drm_dev, mtk_crtc, i,
+@@ -829,16 +907,20 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
+ mutex_init(&mtk_crtc->hw_lock);
+
+ #if IS_REACHABLE(CONFIG_MTK_CMDQ)
+- mtk_crtc->cmdq_client =
+- cmdq_mbox_create(mtk_crtc->mmsys_dev,
+- drm_crtc_index(&mtk_crtc->base));
+- if (IS_ERR(mtk_crtc->cmdq_client)) {
++ mtk_crtc->cmdq_client.client.dev = mtk_crtc->mmsys_dev;
++ mtk_crtc->cmdq_client.client.tx_block = false;
++ mtk_crtc->cmdq_client.client.knows_txdone = true;
++ mtk_crtc->cmdq_client.client.rx_callback = ddp_cmdq_cb;
++ mtk_crtc->cmdq_client.chan =
++ mbox_request_channel(&mtk_crtc->cmdq_client.client,
++ drm_crtc_index(&mtk_crtc->base));
++ if (IS_ERR(mtk_crtc->cmdq_client.chan)) {
+ dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n",
+ drm_crtc_index(&mtk_crtc->base));
+- mtk_crtc->cmdq_client = NULL;
++ mtk_crtc->cmdq_client.chan = NULL;
+ }
+
+- if (mtk_crtc->cmdq_client) {
++ if (mtk_crtc->cmdq_client.chan) {
+ ret = of_property_read_u32_index(priv->mutex_node,
+ "mediatek,gce-events",
+ drm_crtc_index(&mtk_crtc->base),
+@@ -846,8 +928,18 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
+ if (ret) {
+ dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n",
+ drm_crtc_index(&mtk_crtc->base));
+- cmdq_mbox_destroy(mtk_crtc->cmdq_client);
+- mtk_crtc->cmdq_client = NULL;
++ mbox_free_channel(mtk_crtc->cmdq_client.chan);
++ mtk_crtc->cmdq_client.chan = NULL;
++ } else {
++ ret = mtk_drm_cmdq_pkt_create(&mtk_crtc->cmdq_client,
++ &mtk_crtc->cmdq_handle,
++ PAGE_SIZE);
++ if (ret) {
++ dev_dbg(dev, "mtk_crtc %d failed to create cmdq packet\n",
++ drm_crtc_index(&mtk_crtc->base));
++ mbox_free_channel(mtk_crtc->cmdq_client.chan);
++ mtk_crtc->cmdq_client.chan = NULL;
++ }
+ }
+ }
+ #endif
+diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+index 99cbf44463e40..22d23668b4840 100644
+--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
++++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+@@ -276,6 +276,8 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = {
+ .config = mtk_ovl_config,
+ .start = mtk_ovl_start,
+ .stop = mtk_ovl_stop,
++ .register_vblank_cb = mtk_ovl_register_vblank_cb,
++ .unregister_vblank_cb = mtk_ovl_unregister_vblank_cb,
+ .enable_vblank = mtk_ovl_enable_vblank,
+ .disable_vblank = mtk_ovl_disable_vblank,
+ .supported_rotations = mtk_ovl_supported_rotations,
+@@ -292,6 +294,8 @@ static const struct mtk_ddp_comp_funcs ddp_rdma = {
+ .config = mtk_rdma_config,
+ .start = mtk_rdma_start,
+ .stop = mtk_rdma_stop,
++ .register_vblank_cb = mtk_rdma_register_vblank_cb,
++ .unregister_vblank_cb = mtk_rdma_unregister_vblank_cb,
+ .enable_vblank = mtk_rdma_enable_vblank,
+ .disable_vblank = mtk_rdma_disable_vblank,
+ .layer_nr = mtk_rdma_layer_nr,
+diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+index bb914d976cf5d..25cb50f2391fa 100644
+--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
++++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+@@ -47,9 +47,11 @@ struct mtk_ddp_comp_funcs {
+ unsigned int bpc, struct cmdq_pkt *cmdq_pkt);
+ void (*start)(struct device *dev);
+ void (*stop)(struct device *dev);
+- void (*enable_vblank)(struct device *dev,
+- void (*vblank_cb)(void *),
+- void *vblank_cb_data);
++ void (*register_vblank_cb)(struct device *dev,
++ void (*vblank_cb)(void *),
++ void *vblank_cb_data);
++ void (*unregister_vblank_cb)(struct device *dev);
++ void (*enable_vblank)(struct device *dev);
+ void (*disable_vblank)(struct device *dev);
+ unsigned int (*supported_rotations)(struct device *dev);
+ unsigned int (*layer_nr)(struct device *dev);
+@@ -110,12 +112,25 @@ static inline void mtk_ddp_comp_stop(struct mtk_ddp_comp *comp)
+ comp->funcs->stop(comp->dev);
+ }
+
+-static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp,
+- void (*vblank_cb)(void *),
+- void *vblank_cb_data)
++static inline void mtk_ddp_comp_register_vblank_cb(struct mtk_ddp_comp *comp,
++ void (*vblank_cb)(void *),
++ void *vblank_cb_data)
++{
++ if (comp->funcs && comp->funcs->register_vblank_cb)
++ comp->funcs->register_vblank_cb(comp->dev, vblank_cb,
++ vblank_cb_data);
++}
++
++static inline void mtk_ddp_comp_unregister_vblank_cb(struct mtk_ddp_comp *comp)
++{
++ if (comp->funcs && comp->funcs->unregister_vblank_cb)
++ comp->funcs->unregister_vblank_cb(comp->dev);
++}
++
++static inline void mtk_ddp_comp_enable_vblank(struct mtk_ddp_comp *comp)
+ {
+ if (comp->funcs && comp->funcs->enable_vblank)
+- comp->funcs->enable_vblank(comp->dev, vblank_cb, vblank_cb_data);
++ comp->funcs->enable_vblank(comp->dev);
+ }
+
+ static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp)
+diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+index aec39724ebeb6..8b3928c2c7d78 100644
+--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
++++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+@@ -376,6 +376,7 @@ static int mtk_drm_bind(struct device *dev)
+ err_deinit:
+ mtk_drm_kms_deinit(drm);
+ err_free:
++ private->drm = NULL;
+ drm_dev_put(drm);
+ return ret;
+ }
+diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+index d0544962cfc1a..b983adffa3929 100644
+--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
++++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+@@ -164,8 +164,6 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj,
+
+ ret = dma_mmap_attrs(priv->dma_dev, vma, mtk_gem->cookie,
+ mtk_gem->dma_addr, obj->size, mtk_gem->dma_attrs);
+- if (ret)
+- drm_gem_vm_close(vma);
+
+ return ret;
+ }
+@@ -244,7 +242,11 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
+
+ mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP,
+ pgprot_writecombine(PAGE_KERNEL));
+-
++ if (!mtk_gem->kvaddr) {
++ kfree(sgt);
++ kfree(mtk_gem->pages);
++ return -ENOMEM;
++ }
+ out:
+ kfree(sgt);
+ dma_buf_map_set_vaddr(map, mtk_gem->kvaddr);
+@@ -261,6 +263,6 @@ void mtk_drm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *ma
+ return;
+
+ vunmap(vaddr);
+- mtk_gem->kvaddr = 0;
++ mtk_gem->kvaddr = NULL;
+ kfree(mtk_gem->pages);
+ }
+diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
+index 93b40c245f007..98b1204c92906 100644
+--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
++++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
+@@ -11,6 +11,7 @@
+ #include <linux/of_platform.h>
+ #include <linux/phy/phy.h>
+ #include <linux/platform_device.h>
++#include <linux/reset.h>
+
+ #include <video/mipi_display.h>
+ #include <video/videomode.h>
+@@ -202,6 +203,7 @@ struct mtk_dsi {
+ struct mtk_phy_timing phy_timing;
+ int refcount;
+ bool enabled;
++ bool lanes_ready;
+ u32 irq_data;
+ wait_queue_head_t irq_wait_queue;
+ const struct mtk_dsi_driver_data *driver_data;
+@@ -648,18 +650,11 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi)
+ mtk_dsi_reset_engine(dsi);
+ mtk_dsi_phy_timconfig(dsi);
+
+- mtk_dsi_rxtx_control(dsi);
+- usleep_range(30, 100);
+- mtk_dsi_reset_dphy(dsi);
+ mtk_dsi_ps_control_vact(dsi);
+ mtk_dsi_set_vm_cmd(dsi);
+ mtk_dsi_config_vdo_timing(dsi);
+ mtk_dsi_set_interrupt_enable(dsi);
+
+- mtk_dsi_clk_ulp_mode_leave(dsi);
+- mtk_dsi_lane0_ulp_mode_leave(dsi);
+- mtk_dsi_clk_hs_mode(dsi, 0);
+-
+ return 0;
+ err_disable_engine_clk:
+ clk_disable_unprepare(dsi->engine_clk);
+@@ -691,6 +686,8 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
+ mtk_dsi_reset_engine(dsi);
+ mtk_dsi_lane0_ulp_mode_enter(dsi);
+ mtk_dsi_clk_ulp_mode_enter(dsi);
++ /* set the lane number as 0 to pull down mipi */
++ writel(0, dsi->regs + DSI_TXRX_CTRL);
+
+ mtk_dsi_disable(dsi);
+
+@@ -698,21 +695,31 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
+ clk_disable_unprepare(dsi->digital_clk);
+
+ phy_power_off(dsi->phy);
++
++ dsi->lanes_ready = false;
+ }
+
+-static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
++static void mtk_dsi_lane_ready(struct mtk_dsi *dsi)
+ {
+- int ret;
++ if (!dsi->lanes_ready) {
++ dsi->lanes_ready = true;
++ mtk_dsi_rxtx_control(dsi);
++ usleep_range(30, 100);
++ mtk_dsi_reset_dphy(dsi);
++ mtk_dsi_clk_ulp_mode_leave(dsi);
++ mtk_dsi_lane0_ulp_mode_leave(dsi);
++ mtk_dsi_clk_hs_mode(dsi, 0);
++ usleep_range(1000, 3000);
++ /* The reaction time after pulling up the mipi signal for dsi_rx */
++ }
++}
+
++static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
++{
+ if (dsi->enabled)
+ return;
+
+- ret = mtk_dsi_poweron(dsi);
+- if (ret < 0) {
+- DRM_ERROR("failed to power on dsi\n");
+- return;
+- }
+-
++ mtk_dsi_lane_ready(dsi);
+ mtk_dsi_set_mode(dsi);
+ mtk_dsi_clk_hs_mode(dsi, 1);
+
+@@ -726,8 +733,6 @@ static void mtk_output_dsi_disable(struct mtk_dsi *dsi)
+ if (!dsi->enabled)
+ return;
+
+- mtk_dsi_poweroff(dsi);
+-
+ dsi->enabled = false;
+ }
+
+@@ -750,24 +755,53 @@ static void mtk_dsi_bridge_mode_set(struct drm_bridge *bridge,
+ drm_display_mode_to_videomode(adjusted, &dsi->vm);
+ }
+
+-static void mtk_dsi_bridge_disable(struct drm_bridge *bridge)
++static void mtk_dsi_bridge_atomic_disable(struct drm_bridge *bridge,
++ struct drm_bridge_state *old_bridge_state)
+ {
+ struct mtk_dsi *dsi = bridge_to_dsi(bridge);
+
+ mtk_output_dsi_disable(dsi);
+ }
+
+-static void mtk_dsi_bridge_enable(struct drm_bridge *bridge)
++static void mtk_dsi_bridge_atomic_enable(struct drm_bridge *bridge,
++ struct drm_bridge_state *old_bridge_state)
+ {
+ struct mtk_dsi *dsi = bridge_to_dsi(bridge);
+
++ if (dsi->refcount == 0)
++ return;
++
+ mtk_output_dsi_enable(dsi);
+ }
+
++static void mtk_dsi_bridge_atomic_pre_enable(struct drm_bridge *bridge,
++ struct drm_bridge_state *old_bridge_state)
++{
++ struct mtk_dsi *dsi = bridge_to_dsi(bridge);
++ int ret;
++
++ ret = mtk_dsi_poweron(dsi);
++ if (ret < 0)
++ DRM_ERROR("failed to power on dsi\n");
++}
++
++static void mtk_dsi_bridge_atomic_post_disable(struct drm_bridge *bridge,
++ struct drm_bridge_state *old_bridge_state)
++{
++ struct mtk_dsi *dsi = bridge_to_dsi(bridge);
++
++ mtk_dsi_poweroff(dsi);
++}
++
+ static const struct drm_bridge_funcs mtk_dsi_bridge_funcs = {
+ .attach = mtk_dsi_bridge_attach,
+- .disable = mtk_dsi_bridge_disable,
+- .enable = mtk_dsi_bridge_enable,
++ .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
++ .atomic_disable = mtk_dsi_bridge_atomic_disable,
++ .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
++ .atomic_enable = mtk_dsi_bridge_atomic_enable,
++ .atomic_pre_enable = mtk_dsi_bridge_atomic_pre_enable,
++ .atomic_post_disable = mtk_dsi_bridge_atomic_post_disable,
++ .atomic_reset = drm_atomic_helper_bridge_reset,
+ .mode_set = mtk_dsi_bridge_mode_set,
+ };
+
+@@ -890,24 +924,35 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host,
+ u8 read_data[16];
+ void *src_addr;
+ u8 irq_flag = CMD_DONE_INT_FLAG;
++ u32 dsi_mode;
++ int ret;
+
+- if (readl(dsi->regs + DSI_MODE_CTRL) & MODE) {
+- DRM_ERROR("dsi engine is not command mode\n");
+- return -EINVAL;
++ dsi_mode = readl(dsi->regs + DSI_MODE_CTRL);
++ if (dsi_mode & MODE) {
++ mtk_dsi_stop(dsi);
++ ret = mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500);
++ if (ret)
++ goto restore_dsi_mode;
+ }
+
+ if (MTK_DSI_HOST_IS_READ(msg->type))
+ irq_flag |= LPRX_RD_RDY_INT_FLAG;
+
+- if (mtk_dsi_host_send_cmd(dsi, msg, irq_flag) < 0)
+- return -ETIME;
++ mtk_dsi_lane_ready(dsi);
+
+- if (!MTK_DSI_HOST_IS_READ(msg->type))
+- return 0;
++ ret = mtk_dsi_host_send_cmd(dsi, msg, irq_flag);
++ if (ret)
++ goto restore_dsi_mode;
++
++ if (!MTK_DSI_HOST_IS_READ(msg->type)) {
++ recv_cnt = 0;
++ goto restore_dsi_mode;
++ }
+
+ if (!msg->rx_buf) {
+ DRM_ERROR("dsi receive buffer size may be NULL\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto restore_dsi_mode;
+ }
+
+ for (i = 0; i < 16; i++)
+@@ -932,7 +977,13 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host,
+ DRM_INFO("dsi get %d byte data from the panel address(0x%x)\n",
+ recv_cnt, *((u8 *)(msg->tx_buf)));
+
+- return recv_cnt;
++restore_dsi_mode:
++ if (dsi_mode & MODE) {
++ mtk_dsi_set_mode(dsi);
++ mtk_dsi_start(dsi);
++ }
++
++ return ret < 0 ? ret : recv_cnt;
+ }
+
+ static const struct mipi_dsi_host_ops mtk_dsi_ops = {
+@@ -980,8 +1031,10 @@ static int mtk_dsi_bind(struct device *dev, struct device *master, void *data)
+ struct mtk_dsi *dsi = dev_get_drvdata(dev);
+
+ ret = mtk_dsi_encoder_init(drm, dsi);
++ if (ret)
++ return ret;
+
+- return ret;
++ return device_reset_optional(dev);
+ }
+
+ static void mtk_dsi_unbind(struct device *dev, struct device *master,
+diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
+index 5838c44cbf6f0..7613b0fa2be6e 100644
+--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
++++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
+@@ -1203,9 +1203,10 @@ static enum drm_connector_status mtk_hdmi_detect(struct mtk_hdmi *hdmi)
+ return mtk_hdmi_update_plugged_status(hdmi);
+ }
+
+-static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge,
+- const struct drm_display_info *info,
+- const struct drm_display_mode *mode)
++static enum drm_mode_status
++mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge,
++ const struct drm_display_info *info,
++ const struct drm_display_mode *mode)
+ {
+ struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge);
+ struct drm_bridge *next_bridge;
+@@ -1224,12 +1225,14 @@ static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge,
+ return MODE_BAD;
+ }
+
+- if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode))
+- return MODE_BAD;
++ if (hdmi->conf) {
++ if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode))
++ return MODE_BAD;
+
+- if (hdmi->conf->max_mode_clock &&
+- mode->clock > hdmi->conf->max_mode_clock)
+- return MODE_CLOCK_HIGH;
++ if (hdmi->conf->max_mode_clock &&
++ mode->clock > hdmi->conf->max_mode_clock)
++ return MODE_CLOCK_HIGH;
++ }
+
+ if (mode->clock < 27000)
+ return MODE_CLOCK_LOW;
+diff --git a/drivers/gpu/drm/meson/Kconfig b/drivers/gpu/drm/meson/Kconfig
+index 9f9281dd49f84..a4e1ed96e5e8b 100644
+--- a/drivers/gpu/drm/meson/Kconfig
++++ b/drivers/gpu/drm/meson/Kconfig
+@@ -6,9 +6,11 @@ config DRM_MESON
+ select DRM_KMS_HELPER
+ select DRM_KMS_CMA_HELPER
+ select DRM_GEM_CMA_HELPER
++ select DRM_DISPLAY_CONNECTOR
+ select VIDEOMODE_HELPERS
+ select REGMAP_MMIO
+ select MESON_CANVAS
++ select CEC_CORE if CEC_NOTIFIER
+
+ config DRM_MESON_DW_HDMI
+ tristate "HDMI Synopsys Controller support for Amlogic Meson Display"
+diff --git a/drivers/gpu/drm/meson/Makefile b/drivers/gpu/drm/meson/Makefile
+index 28a519cdf66b8..523fce45f16ba 100644
+--- a/drivers/gpu/drm/meson/Makefile
++++ b/drivers/gpu/drm/meson/Makefile
+@@ -2,6 +2,7 @@
+ meson-drm-y := meson_drv.o meson_plane.o meson_crtc.o meson_venc_cvbs.o
+ meson-drm-y += meson_viu.o meson_vpp.o meson_venc.o meson_vclk.o meson_overlay.o
+ meson-drm-y += meson_rdma.o meson_osd_afbcd.o
++meson-drm-y += meson_encoder_hdmi.o
+
+ obj-$(CONFIG_DRM_MESON) += meson-drm.o
+ obj-$(CONFIG_DRM_MESON_DW_HDMI) += meson_dw_hdmi.o
+diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
+index bc0d60df04ae4..207b309a21c07 100644
+--- a/drivers/gpu/drm/meson/meson_drv.c
++++ b/drivers/gpu/drm/meson/meson_drv.c
+@@ -32,6 +32,7 @@
+ #include "meson_osd_afbcd.h"
+ #include "meson_registers.h"
+ #include "meson_venc_cvbs.h"
++#include "meson_encoder_hdmi.h"
+ #include "meson_viu.h"
+ #include "meson_vpp.h"
+ #include "meson_rdma.h"
+@@ -113,8 +114,11 @@ static bool meson_vpu_has_available_connectors(struct device *dev)
+ for_each_endpoint_of_node(dev->of_node, ep) {
+ /* If the endpoint node exists, consider it enabled */
+ remote = of_graph_get_remote_port(ep);
+- if (remote)
++ if (remote) {
++ of_node_put(remote);
++ of_node_put(ep);
+ return true;
++ }
+ }
+
+ return false;
+@@ -206,8 +210,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
+ priv->compat = match->compat;
+ priv->afbcd.ops = match->afbcd_ops;
+
+- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vpu");
+- regs = devm_ioremap_resource(dev, res);
++ regs = devm_platform_ioremap_resource_byname(pdev, "vpu");
+ if (IS_ERR(regs)) {
+ ret = PTR_ERR(regs);
+ goto free_drm;
+@@ -302,38 +305,42 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
+ if (priv->afbcd.ops) {
+ ret = priv->afbcd.ops->init(priv);
+ if (ret)
+- return ret;
++ goto free_drm;
+ }
+
+ /* Encoder Initialization */
+
+ ret = meson_venc_cvbs_create(priv);
+ if (ret)
+- goto free_drm;
++ goto exit_afbcd;
+
+ if (has_components) {
+ ret = component_bind_all(drm->dev, drm);
+ if (ret) {
+ dev_err(drm->dev, "Couldn't bind all components\n");
+- goto free_drm;
++ goto exit_afbcd;
+ }
+ }
+
++ ret = meson_encoder_hdmi_init(priv);
++ if (ret)
++ goto unbind_all;
++
+ ret = meson_plane_create(priv);
+ if (ret)
+- goto free_drm;
++ goto unbind_all;
+
+ ret = meson_overlay_create(priv);
+ if (ret)
+- goto free_drm;
++ goto unbind_all;
+
+ ret = meson_crtc_create(priv);
+ if (ret)
+- goto free_drm;
++ goto unbind_all;
+
+ ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm);
+ if (ret)
+- goto free_drm;
++ goto unbind_all;
+
+ drm_mode_config_reset(drm);
+
+@@ -351,6 +358,12 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
+
+ uninstall_irq:
+ free_irq(priv->vsync_irq, drm);
++unbind_all:
++ if (has_components)
++ component_unbind_all(drm->dev, drm);
++exit_afbcd:
++ if (priv->afbcd.ops)
++ priv->afbcd.ops->exit(priv);
+ free_drm:
+ drm_dev_put(drm);
+
+@@ -377,14 +390,12 @@ static void meson_drv_unbind(struct device *dev)
+ drm_dev_unregister(drm);
+ drm_kms_helper_poll_fini(drm);
+ drm_atomic_helper_shutdown(drm);
+- component_unbind_all(dev, drm);
+ free_irq(priv->vsync_irq, drm);
+ drm_dev_put(drm);
++ component_unbind_all(dev, drm);
+
+- if (priv->afbcd.ops) {
+- priv->afbcd.ops->reset(priv);
+- meson_rdma_free(priv);
+- }
++ if (priv->afbcd.ops)
++ priv->afbcd.ops->exit(priv);
+ }
+
+ static const struct component_master_ops meson_drv_master_ops = {
+@@ -512,6 +523,13 @@ static int meson_drv_probe(struct platform_device *pdev)
+ return 0;
+ };
+
++static int meson_drv_remove(struct platform_device *pdev)
++{
++ component_master_del(&pdev->dev, &meson_drv_master_ops);
++
++ return 0;
++}
++
+ static struct meson_drm_match_data meson_drm_gxbb_data = {
+ .compat = VPU_COMPATIBLE_GXBB,
+ };
+@@ -549,6 +567,7 @@ static const struct dev_pm_ops meson_drv_pm_ops = {
+
+ static struct platform_driver meson_drm_platform_driver = {
+ .probe = meson_drv_probe,
++ .remove = meson_drv_remove,
+ .shutdown = meson_drv_shutdown,
+ .driver = {
+ .name = "meson-drm",
+diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
+index 2ed87cfdd7353..5cd2b2ebbbd33 100644
+--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
++++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
+@@ -22,14 +22,11 @@
+ #include <drm/drm_probe_helper.h>
+ #include <drm/drm_print.h>
+
+-#include <linux/media-bus-format.h>
+ #include <linux/videodev2.h>
+
+ #include "meson_drv.h"
+ #include "meson_dw_hdmi.h"
+ #include "meson_registers.h"
+-#include "meson_vclk.h"
+-#include "meson_venc.h"
+
+ #define DRIVER_NAME "meson-dw-hdmi"
+ #define DRIVER_DESC "Amlogic Meson HDMI-TX DRM driver"
+@@ -135,8 +132,6 @@ struct meson_dw_hdmi_data {
+ };
+
+ struct meson_dw_hdmi {
+- struct drm_encoder encoder;
+- struct drm_bridge bridge;
+ struct dw_hdmi_plat_data dw_plat_data;
+ struct meson_drm *priv;
+ struct device *dev;
+@@ -148,12 +143,8 @@ struct meson_dw_hdmi {
+ struct regulator *hdmi_supply;
+ u32 irq_stat;
+ struct dw_hdmi *hdmi;
+- unsigned long output_bus_fmt;
++ struct drm_bridge *bridge;
+ };
+-#define encoder_to_meson_dw_hdmi(x) \
+- container_of(x, struct meson_dw_hdmi, encoder)
+-#define bridge_to_meson_dw_hdmi(x) \
+- container_of(x, struct meson_dw_hdmi, bridge)
+
+ static inline int dw_hdmi_is_compatible(struct meson_dw_hdmi *dw_hdmi,
+ const char *compat)
+@@ -295,14 +286,14 @@ static inline void dw_hdmi_dwc_write_bits(struct meson_dw_hdmi *dw_hdmi,
+
+ /* Setup PHY bandwidth modes */
+ static void meson_hdmi_phy_setup_mode(struct meson_dw_hdmi *dw_hdmi,
+- const struct drm_display_mode *mode)
++ const struct drm_display_mode *mode,
++ bool mode_is_420)
+ {
+ struct meson_drm *priv = dw_hdmi->priv;
+ unsigned int pixel_clock = mode->clock;
+
+ /* For 420, pixel clock is half unlike venc clock */
+- if (dw_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
+- pixel_clock /= 2;
++ if (mode_is_420) pixel_clock /= 2;
+
+ if (dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxl-dw-hdmi") ||
+ dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxm-dw-hdmi")) {
+@@ -374,68 +365,25 @@ static inline void meson_dw_hdmi_phy_reset(struct meson_dw_hdmi *dw_hdmi)
+ mdelay(2);
+ }
+
+-static void dw_hdmi_set_vclk(struct meson_dw_hdmi *dw_hdmi,
+- const struct drm_display_mode *mode)
+-{
+- struct meson_drm *priv = dw_hdmi->priv;
+- int vic = drm_match_cea_mode(mode);
+- unsigned int phy_freq;
+- unsigned int vclk_freq;
+- unsigned int venc_freq;
+- unsigned int hdmi_freq;
+-
+- vclk_freq = mode->clock;
+-
+- /* For 420, pixel clock is half unlike venc clock */
+- if (dw_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
+- vclk_freq /= 2;
+-
+- /* TMDS clock is pixel_clock * 10 */
+- phy_freq = vclk_freq * 10;
+-
+- if (!vic) {
+- meson_vclk_setup(priv, MESON_VCLK_TARGET_DMT, phy_freq,
+- vclk_freq, vclk_freq, vclk_freq, false);
+- return;
+- }
+-
+- /* 480i/576i needs global pixel doubling */
+- if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+- vclk_freq *= 2;
+-
+- venc_freq = vclk_freq;
+- hdmi_freq = vclk_freq;
+-
+- /* VENC double pixels for 1080i, 720p and YUV420 modes */
+- if (meson_venc_hdmi_venc_repeat(vic) ||
+- dw_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
+- venc_freq *= 2;
+-
+- vclk_freq = max(venc_freq, hdmi_freq);
+-
+- if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+- venc_freq /= 2;
+-
+- DRM_DEBUG_DRIVER("vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n",
+- phy_freq, vclk_freq, venc_freq, hdmi_freq,
+- priv->venc.hdmi_use_enci);
+-
+- meson_vclk_setup(priv, MESON_VCLK_TARGET_HDMI, phy_freq, vclk_freq,
+- venc_freq, hdmi_freq, priv->venc.hdmi_use_enci);
+-}
+-
+ static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data,
+ const struct drm_display_info *display,
+ const struct drm_display_mode *mode)
+ {
+ struct meson_dw_hdmi *dw_hdmi = (struct meson_dw_hdmi *)data;
++ bool is_hdmi2_sink = display->hdmi.scdc.supported;
+ struct meson_drm *priv = dw_hdmi->priv;
+ unsigned int wr_clk =
+ readl_relaxed(priv->io_base + _REG(VPU_HDMI_SETTING));
++ bool mode_is_420 = false;
+
+ DRM_DEBUG_DRIVER("\"%s\" div%d\n", mode->name,
+ mode->clock > 340000 ? 40 : 10);
+
++ if (drm_mode_is_420_only(display, mode) ||
++ (!is_hdmi2_sink &&
++ drm_mode_is_420_also(display, mode)))
++ mode_is_420 = true;
++
+ /* Enable clocks */
+ regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL, 0xffff, 0x100);
+
+@@ -457,8 +405,7 @@ static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data,
+ dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_BIST_CNTL, BIT(12));
+
+ /* TMDS pattern setup */
+- if (mode->clock > 340000 &&
+- dw_hdmi->output_bus_fmt == MEDIA_BUS_FMT_YUV8_1X24) {
++ if (mode->clock > 340000 && !mode_is_420) {
+ dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_01,
+ 0);
+ dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_23,
+@@ -476,7 +423,7 @@ static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data,
+ dw_hdmi->data->top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_CNTL, 0x2);
+
+ /* Setup PHY parameters */
+- meson_hdmi_phy_setup_mode(dw_hdmi, mode);
++ meson_hdmi_phy_setup_mode(dw_hdmi, mode, mode_is_420);
+
+ /* Setup PHY */
+ regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1,
+@@ -622,214 +569,15 @@ static irqreturn_t dw_hdmi_top_thread_irq(int irq, void *dev_id)
+ dw_hdmi_setup_rx_sense(dw_hdmi->hdmi, hpd_connected,
+ hpd_connected);
+
+- drm_helper_hpd_irq_event(dw_hdmi->encoder.dev);
++ drm_helper_hpd_irq_event(dw_hdmi->bridge->dev);
++ drm_bridge_hpd_notify(dw_hdmi->bridge,
++ hpd_connected ? connector_status_connected
++ : connector_status_disconnected);
+ }
+
+ return IRQ_HANDLED;
+ }
+
+-static enum drm_mode_status
+-dw_hdmi_mode_valid(struct dw_hdmi *hdmi, void *data,
+- const struct drm_display_info *display_info,
+- const struct drm_display_mode *mode)
+-{
+- struct meson_dw_hdmi *dw_hdmi = data;
+- struct meson_drm *priv = dw_hdmi->priv;
+- bool is_hdmi2_sink = display_info->hdmi.scdc.supported;
+- unsigned int phy_freq;
+- unsigned int vclk_freq;
+- unsigned int venc_freq;
+- unsigned int hdmi_freq;
+- int vic = drm_match_cea_mode(mode);
+- enum drm_mode_status status;
+-
+- DRM_DEBUG_DRIVER("Modeline " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode));
+-
+- /* If sink does not support 540MHz, reject the non-420 HDMI2 modes */
+- if (display_info->max_tmds_clock &&
+- mode->clock > display_info->max_tmds_clock &&
+- !drm_mode_is_420_only(display_info, mode) &&
+- !drm_mode_is_420_also(display_info, mode))
+- return MODE_BAD;
+-
+- /* Check against non-VIC supported modes */
+- if (!vic) {
+- status = meson_venc_hdmi_supported_mode(mode);
+- if (status != MODE_OK)
+- return status;
+-
+- return meson_vclk_dmt_supported_freq(priv, mode->clock);
+- /* Check against supported VIC modes */
+- } else if (!meson_venc_hdmi_supported_vic(vic))
+- return MODE_BAD;
+-
+- vclk_freq = mode->clock;
+-
+- /* For 420, pixel clock is half unlike venc clock */
+- if (drm_mode_is_420_only(display_info, mode) ||
+- (!is_hdmi2_sink &&
+- drm_mode_is_420_also(display_info, mode)))
+- vclk_freq /= 2;
+-
+- /* TMDS clock is pixel_clock * 10 */
+- phy_freq = vclk_freq * 10;
+-
+- /* 480i/576i needs global pixel doubling */
+- if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+- vclk_freq *= 2;
+-
+- venc_freq = vclk_freq;
+- hdmi_freq = vclk_freq;
+-
+- /* VENC double pixels for 1080i, 720p and YUV420 modes */
+- if (meson_venc_hdmi_venc_repeat(vic) ||
+- drm_mode_is_420_only(display_info, mode) ||
+- (!is_hdmi2_sink &&
+- drm_mode_is_420_also(display_info, mode)))
+- venc_freq *= 2;
+-
+- vclk_freq = max(venc_freq, hdmi_freq);
+-
+- if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+- venc_freq /= 2;
+-
+- dev_dbg(dw_hdmi->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n",
+- __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq);
+-
+- return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq);
+-}
+-
+-/* Encoder */
+-
+-static const u32 meson_dw_hdmi_out_bus_fmts[] = {
+- MEDIA_BUS_FMT_YUV8_1X24,
+- MEDIA_BUS_FMT_UYYVYY8_0_5X24,
+-};
+-
+-static void meson_venc_hdmi_encoder_destroy(struct drm_encoder *encoder)
+-{
+- drm_encoder_cleanup(encoder);
+-}
+-
+-static const struct drm_encoder_funcs meson_venc_hdmi_encoder_funcs = {
+- .destroy = meson_venc_hdmi_encoder_destroy,
+-};
+-
+-static u32 *
+-meson_venc_hdmi_encoder_get_inp_bus_fmts(struct drm_bridge *bridge,
+- struct drm_bridge_state *bridge_state,
+- struct drm_crtc_state *crtc_state,
+- struct drm_connector_state *conn_state,
+- u32 output_fmt,
+- unsigned int *num_input_fmts)
+-{
+- u32 *input_fmts = NULL;
+- int i;
+-
+- *num_input_fmts = 0;
+-
+- for (i = 0 ; i < ARRAY_SIZE(meson_dw_hdmi_out_bus_fmts) ; ++i) {
+- if (output_fmt == meson_dw_hdmi_out_bus_fmts[i]) {
+- *num_input_fmts = 1;
+- input_fmts = kcalloc(*num_input_fmts,
+- sizeof(*input_fmts),
+- GFP_KERNEL);
+- if (!input_fmts)
+- return NULL;
+-
+- input_fmts[0] = output_fmt;
+-
+- break;
+- }
+- }
+-
+- return input_fmts;
+-}
+-
+-static int meson_venc_hdmi_encoder_atomic_check(struct drm_bridge *bridge,
+- struct drm_bridge_state *bridge_state,
+- struct drm_crtc_state *crtc_state,
+- struct drm_connector_state *conn_state)
+-{
+- struct meson_dw_hdmi *dw_hdmi = bridge_to_meson_dw_hdmi(bridge);
+-
+- dw_hdmi->output_bus_fmt = bridge_state->output_bus_cfg.format;
+-
+- DRM_DEBUG_DRIVER("output_bus_fmt %lx\n", dw_hdmi->output_bus_fmt);
+-
+- return 0;
+-}
+-
+-static void meson_venc_hdmi_encoder_disable(struct drm_bridge *bridge)
+-{
+- struct meson_dw_hdmi *dw_hdmi = bridge_to_meson_dw_hdmi(bridge);
+- struct meson_drm *priv = dw_hdmi->priv;
+-
+- DRM_DEBUG_DRIVER("\n");
+-
+- writel_bits_relaxed(0x3, 0,
+- priv->io_base + _REG(VPU_HDMI_SETTING));
+-
+- writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_EN));
+- writel_relaxed(0, priv->io_base + _REG(ENCP_VIDEO_EN));
+-}
+-
+-static void meson_venc_hdmi_encoder_enable(struct drm_bridge *bridge)
+-{
+- struct meson_dw_hdmi *dw_hdmi = bridge_to_meson_dw_hdmi(bridge);
+- struct meson_drm *priv = dw_hdmi->priv;
+-
+- DRM_DEBUG_DRIVER("%s\n", priv->venc.hdmi_use_enci ? "VENCI" : "VENCP");
+-
+- if (priv->venc.hdmi_use_enci)
+- writel_relaxed(1, priv->io_base + _REG(ENCI_VIDEO_EN));
+- else
+- writel_relaxed(1, priv->io_base + _REG(ENCP_VIDEO_EN));
+-}
+-
+-static void meson_venc_hdmi_encoder_mode_set(struct drm_bridge *bridge,
+- const struct drm_display_mode *mode,
+- const struct drm_display_mode *adjusted_mode)
+-{
+- struct meson_dw_hdmi *dw_hdmi = bridge_to_meson_dw_hdmi(bridge);
+- struct meson_drm *priv = dw_hdmi->priv;
+- int vic = drm_match_cea_mode(mode);
+- unsigned int ycrcb_map = VPU_HDMI_OUTPUT_CBYCR;
+- bool yuv420_mode = false;
+-
+- DRM_DEBUG_DRIVER("\"%s\" vic %d\n", mode->name, vic);
+-
+- if (dw_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) {
+- ycrcb_map = VPU_HDMI_OUTPUT_CRYCB;
+- yuv420_mode = true;
+- }
+-
+- /* VENC + VENC-DVI Mode setup */
+- meson_venc_hdmi_mode_set(priv, vic, ycrcb_map, yuv420_mode, mode);
+-
+- /* VCLK Set clock */
+- dw_hdmi_set_vclk(dw_hdmi, mode);
+-
+- if (dw_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
+- /* Setup YUV420 to HDMI-TX, no 10bit diphering */
+- writel_relaxed(2 | (2 << 2),
+- priv->io_base + _REG(VPU_HDMI_FMT_CTRL));
+- else
+- /* Setup YUV444 to HDMI-TX, no 10bit diphering */
+- writel_relaxed(0, priv->io_base + _REG(VPU_HDMI_FMT_CTRL));
+-}
+-
+-static const struct drm_bridge_funcs meson_venc_hdmi_encoder_bridge_funcs = {
+- .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
+- .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
+- .atomic_get_input_bus_fmts = meson_venc_hdmi_encoder_get_inp_bus_fmts,
+- .atomic_reset = drm_atomic_helper_bridge_reset,
+- .atomic_check = meson_venc_hdmi_encoder_atomic_check,
+- .enable = meson_venc_hdmi_encoder_enable,
+- .disable = meson_venc_hdmi_encoder_disable,
+- .mode_set = meson_venc_hdmi_encoder_mode_set,
+-};
+-
+ /* DW HDMI Regmap */
+
+ static int meson_dw_hdmi_reg_read(void *context, unsigned int reg,
+@@ -876,28 +624,6 @@ static const struct meson_dw_hdmi_data meson_dw_hdmi_g12a_data = {
+ .dwc_write = dw_hdmi_g12a_dwc_write,
+ };
+
+-static bool meson_hdmi_connector_is_available(struct device *dev)
+-{
+- struct device_node *ep, *remote;
+-
+- /* HDMI Connector is on the second port, first endpoint */
+- ep = of_graph_get_endpoint_by_regs(dev->of_node, 1, 0);
+- if (!ep)
+- return false;
+-
+- /* If the endpoint node exists, consider it enabled */
+- remote = of_graph_get_remote_port(ep);
+- if (remote) {
+- of_node_put(ep);
+- return true;
+- }
+-
+- of_node_put(ep);
+- of_node_put(remote);
+-
+- return false;
+-}
+-
+ static void meson_dw_hdmi_init(struct meson_dw_hdmi *meson_dw_hdmi)
+ {
+ struct meson_drm *priv = meson_dw_hdmi->priv;
+@@ -976,19 +702,11 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
+ struct drm_device *drm = data;
+ struct meson_drm *priv = drm->dev_private;
+ struct dw_hdmi_plat_data *dw_plat_data;
+- struct drm_bridge *next_bridge;
+- struct drm_encoder *encoder;
+- struct resource *res;
+ int irq;
+ int ret;
+
+ DRM_DEBUG_DRIVER("\n");
+
+- if (!meson_hdmi_connector_is_available(dev)) {
+- dev_info(drm->dev, "HDMI Output connector not available\n");
+- return -ENODEV;
+- }
+-
+ match = of_device_get_match_data(&pdev->dev);
+ if (!match) {
+ dev_err(&pdev->dev, "failed to get match data\n");
+@@ -1004,7 +722,6 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
+ meson_dw_hdmi->dev = dev;
+ meson_dw_hdmi->data = match;
+ dw_plat_data = &meson_dw_hdmi->dw_plat_data;
+- encoder = &meson_dw_hdmi->encoder;
+
+ meson_dw_hdmi->hdmi_supply = devm_regulator_get_optional(dev, "hdmi");
+ if (IS_ERR(meson_dw_hdmi->hdmi_supply)) {
+@@ -1042,8 +759,7 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
+ return PTR_ERR(meson_dw_hdmi->hdmitx_phy);
+ }
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- meson_dw_hdmi->hdmitx = devm_ioremap_resource(dev, res);
++ meson_dw_hdmi->hdmitx = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(meson_dw_hdmi->hdmitx))
+ return PTR_ERR(meson_dw_hdmi->hdmitx);
+
+@@ -1076,34 +792,18 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
+ return ret;
+ }
+
+- /* Encoder */
+-
+- ret = drm_encoder_init(drm, encoder, &meson_venc_hdmi_encoder_funcs,
+- DRM_MODE_ENCODER_TMDS, "meson_hdmi");
+- if (ret) {
+- dev_err(priv->dev, "Failed to init HDMI encoder\n");
+- return ret;
+- }
+-
+- meson_dw_hdmi->bridge.funcs = &meson_venc_hdmi_encoder_bridge_funcs;
+- drm_bridge_attach(encoder, &meson_dw_hdmi->bridge, NULL, 0);
+-
+- encoder->possible_crtcs = BIT(0);
+-
+ meson_dw_hdmi_init(meson_dw_hdmi);
+
+- DRM_DEBUG_DRIVER("encoder initialized\n");
+-
+ /* Bridge / Connector */
+
+ dw_plat_data->priv_data = meson_dw_hdmi;
+- dw_plat_data->mode_valid = dw_hdmi_mode_valid;
+ dw_plat_data->phy_ops = &meson_dw_hdmi_phy_ops;
+ dw_plat_data->phy_name = "meson_dw_hdmi_phy";
+ dw_plat_data->phy_data = meson_dw_hdmi;
+ dw_plat_data->input_bus_encoding = V4L2_YCBCR_ENC_709;
+ dw_plat_data->ycbcr_420_allowed = true;
+ dw_plat_data->disable_cec = true;
++ dw_plat_data->output_port = 1;
+
+ if (dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-gxl-dw-hdmi") ||
+ dw_hdmi_is_compatible(meson_dw_hdmi, "amlogic,meson-gxm-dw-hdmi") ||
+@@ -1112,15 +812,11 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
+
+ platform_set_drvdata(pdev, meson_dw_hdmi);
+
+- meson_dw_hdmi->hdmi = dw_hdmi_probe(pdev,
+- &meson_dw_hdmi->dw_plat_data);
++ meson_dw_hdmi->hdmi = dw_hdmi_probe(pdev, &meson_dw_hdmi->dw_plat_data);
+ if (IS_ERR(meson_dw_hdmi->hdmi))
+ return PTR_ERR(meson_dw_hdmi->hdmi);
+
+- next_bridge = of_drm_find_bridge(pdev->dev.of_node);
+- if (next_bridge)
+- drm_bridge_attach(encoder, next_bridge,
+- &meson_dw_hdmi->bridge, 0);
++ meson_dw_hdmi->bridge = of_drm_find_bridge(pdev->dev.of_node);
+
+ DRM_DEBUG_DRIVER("HDMI controller initialized\n");
+
+diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+new file mode 100644
+index 0000000000000..a7692584487cc
+--- /dev/null
++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+@@ -0,0 +1,456 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * Copyright (C) 2016 BayLibre, SAS
++ * Author: Neil Armstrong <narmstrong@baylibre.com>
++ * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
++ */
++
++#include <linux/clk.h>
++#include <linux/component.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/of_device.h>
++#include <linux/of_graph.h>
++#include <linux/regulator/consumer.h>
++#include <linux/reset.h>
++
++#include <media/cec-notifier.h>
++
++#include <drm/drm_atomic_helper.h>
++#include <drm/drm_bridge.h>
++#include <drm/drm_bridge_connector.h>
++#include <drm/drm_device.h>
++#include <drm/drm_edid.h>
++#include <drm/drm_probe_helper.h>
++#include <drm/drm_simple_kms_helper.h>
++
++#include <linux/media-bus-format.h>
++#include <linux/videodev2.h>
++
++#include "meson_drv.h"
++#include "meson_registers.h"
++#include "meson_vclk.h"
++#include "meson_venc.h"
++#include "meson_encoder_hdmi.h"
++
++struct meson_encoder_hdmi {
++ struct drm_encoder encoder;
++ struct drm_bridge bridge;
++ struct drm_bridge *next_bridge;
++ struct drm_connector *connector;
++ struct meson_drm *priv;
++ unsigned long output_bus_fmt;
++ struct cec_notifier *cec_notifier;
++};
++
++#define bridge_to_meson_encoder_hdmi(x) \
++ container_of(x, struct meson_encoder_hdmi, bridge)
++
++static int meson_encoder_hdmi_attach(struct drm_bridge *bridge,
++ enum drm_bridge_attach_flags flags)
++{
++ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
++
++ return drm_bridge_attach(bridge->encoder, encoder_hdmi->next_bridge,
++ &encoder_hdmi->bridge, flags);
++}
++
++static void meson_encoder_hdmi_detach(struct drm_bridge *bridge)
++{
++ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
++
++ cec_notifier_conn_unregister(encoder_hdmi->cec_notifier);
++ encoder_hdmi->cec_notifier = NULL;
++}
++
++static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi,
++ const struct drm_display_mode *mode)
++{
++ struct meson_drm *priv = encoder_hdmi->priv;
++ int vic = drm_match_cea_mode(mode);
++ unsigned int phy_freq;
++ unsigned int vclk_freq;
++ unsigned int venc_freq;
++ unsigned int hdmi_freq;
++
++ vclk_freq = mode->clock;
++
++ /* For 420, pixel clock is half unlike venc clock */
++ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
++ vclk_freq /= 2;
++
++ /* TMDS clock is pixel_clock * 10 */
++ phy_freq = vclk_freq * 10;
++
++ if (!vic) {
++ meson_vclk_setup(priv, MESON_VCLK_TARGET_DMT, phy_freq,
++ vclk_freq, vclk_freq, vclk_freq, false);
++ return;
++ }
++
++ /* 480i/576i needs global pixel doubling */
++ if (mode->flags & DRM_MODE_FLAG_DBLCLK)
++ vclk_freq *= 2;
++
++ venc_freq = vclk_freq;
++ hdmi_freq = vclk_freq;
++
++ /* VENC double pixels for 1080i, 720p and YUV420 modes */
++ if (meson_venc_hdmi_venc_repeat(vic) ||
++ encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
++ venc_freq *= 2;
++
++ vclk_freq = max(venc_freq, hdmi_freq);
++
++ if (mode->flags & DRM_MODE_FLAG_DBLCLK)
++ venc_freq /= 2;
++
++ dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n",
++ phy_freq, vclk_freq, venc_freq, hdmi_freq,
++ priv->venc.hdmi_use_enci);
++
++ meson_vclk_setup(priv, MESON_VCLK_TARGET_HDMI, phy_freq, vclk_freq,
++ venc_freq, hdmi_freq, priv->venc.hdmi_use_enci);
++}
++
++static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bridge,
++ const struct drm_display_info *display_info,
++ const struct drm_display_mode *mode)
++{
++ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
++ struct meson_drm *priv = encoder_hdmi->priv;
++ bool is_hdmi2_sink = display_info->hdmi.scdc.supported;
++ unsigned int phy_freq;
++ unsigned int vclk_freq;
++ unsigned int venc_freq;
++ unsigned int hdmi_freq;
++ int vic = drm_match_cea_mode(mode);
++ enum drm_mode_status status;
++
++ dev_dbg(priv->dev, "Modeline " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode));
++
++ /* If sink does not support 540MHz, reject the non-420 HDMI2 modes */
++ if (display_info->max_tmds_clock &&
++ mode->clock > display_info->max_tmds_clock &&
++ !drm_mode_is_420_only(display_info, mode) &&
++ !drm_mode_is_420_also(display_info, mode))
++ return MODE_BAD;
++
++ /* Check against non-VIC supported modes */
++ if (!vic) {
++ status = meson_venc_hdmi_supported_mode(mode);
++ if (status != MODE_OK)
++ return status;
++
++ return meson_vclk_dmt_supported_freq(priv, mode->clock);
++ /* Check against supported VIC modes */
++ } else if (!meson_venc_hdmi_supported_vic(vic))
++ return MODE_BAD;
++
++ vclk_freq = mode->clock;
++
++ /* For 420, pixel clock is half unlike venc clock */
++ if (drm_mode_is_420_only(display_info, mode) ||
++ (!is_hdmi2_sink &&
++ drm_mode_is_420_also(display_info, mode)))
++ vclk_freq /= 2;
++
++ /* TMDS clock is pixel_clock * 10 */
++ phy_freq = vclk_freq * 10;
++
++ /* 480i/576i needs global pixel doubling */
++ if (mode->flags & DRM_MODE_FLAG_DBLCLK)
++ vclk_freq *= 2;
++
++ venc_freq = vclk_freq;
++ hdmi_freq = vclk_freq;
++
++ /* VENC double pixels for 1080i, 720p and YUV420 modes */
++ if (meson_venc_hdmi_venc_repeat(vic) ||
++ drm_mode_is_420_only(display_info, mode) ||
++ (!is_hdmi2_sink &&
++ drm_mode_is_420_also(display_info, mode)))
++ venc_freq *= 2;
++
++ vclk_freq = max(venc_freq, hdmi_freq);
++
++ if (mode->flags & DRM_MODE_FLAG_DBLCLK)
++ venc_freq /= 2;
++
++ dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n",
++ __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq);
++
++ return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq);
++}
++
++static void meson_encoder_hdmi_atomic_enable(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state)
++{
++ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
++ struct drm_atomic_state *state = bridge_state->base.state;
++ unsigned int ycrcb_map = VPU_HDMI_OUTPUT_CBYCR;
++ struct meson_drm *priv = encoder_hdmi->priv;
++ struct drm_connector_state *conn_state;
++ const struct drm_display_mode *mode;
++ struct drm_crtc_state *crtc_state;
++ struct drm_connector *connector;
++ bool yuv420_mode = false;
++ int vic;
++
++ connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder);
++ if (WARN_ON(!connector))
++ return;
++
++ conn_state = drm_atomic_get_new_connector_state(state, connector);
++ if (WARN_ON(!conn_state))
++ return;
++
++ crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
++ if (WARN_ON(!crtc_state))
++ return;
++
++ mode = &crtc_state->adjusted_mode;
++
++ vic = drm_match_cea_mode(mode);
++
++ dev_dbg(priv->dev, "\"%s\" vic %d\n", mode->name, vic);
++
++ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) {
++ ycrcb_map = VPU_HDMI_OUTPUT_CRYCB;
++ yuv420_mode = true;
++ }
++
++ /* VENC + VENC-DVI Mode setup */
++ meson_venc_hdmi_mode_set(priv, vic, ycrcb_map, yuv420_mode, mode);
++
++ /* VCLK Set clock */
++ meson_encoder_hdmi_set_vclk(encoder_hdmi, mode);
++
++ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
++ /* Setup YUV420 to HDMI-TX, no 10bit diphering */
++ writel_relaxed(2 | (2 << 2),
++ priv->io_base + _REG(VPU_HDMI_FMT_CTRL));
++ else
++ /* Setup YUV444 to HDMI-TX, no 10bit diphering */
++ writel_relaxed(0, priv->io_base + _REG(VPU_HDMI_FMT_CTRL));
++
++ dev_dbg(priv->dev, "%s\n", priv->venc.hdmi_use_enci ? "VENCI" : "VENCP");
++
++ if (priv->venc.hdmi_use_enci)
++ writel_relaxed(1, priv->io_base + _REG(ENCI_VIDEO_EN));
++ else
++ writel_relaxed(1, priv->io_base + _REG(ENCP_VIDEO_EN));
++}
++
++static void meson_encoder_hdmi_atomic_disable(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state)
++{
++ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
++ struct meson_drm *priv = encoder_hdmi->priv;
++
++ writel_bits_relaxed(0x3, 0,
++ priv->io_base + _REG(VPU_HDMI_SETTING));
++
++ writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_EN));
++ writel_relaxed(0, priv->io_base + _REG(ENCP_VIDEO_EN));
++}
++
++static const u32 meson_encoder_hdmi_out_bus_fmts[] = {
++ MEDIA_BUS_FMT_YUV8_1X24,
++ MEDIA_BUS_FMT_UYYVYY8_0_5X24,
++};
++
++static u32 *
++meson_encoder_hdmi_get_inp_bus_fmts(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state,
++ struct drm_crtc_state *crtc_state,
++ struct drm_connector_state *conn_state,
++ u32 output_fmt,
++ unsigned int *num_input_fmts)
++{
++ u32 *input_fmts = NULL;
++ int i;
++
++ *num_input_fmts = 0;
++
++ for (i = 0 ; i < ARRAY_SIZE(meson_encoder_hdmi_out_bus_fmts) ; ++i) {
++ if (output_fmt == meson_encoder_hdmi_out_bus_fmts[i]) {
++ *num_input_fmts = 1;
++ input_fmts = kcalloc(*num_input_fmts,
++ sizeof(*input_fmts),
++ GFP_KERNEL);
++ if (!input_fmts)
++ return NULL;
++
++ input_fmts[0] = output_fmt;
++
++ break;
++ }
++ }
++
++ return input_fmts;
++}
++
++static int meson_encoder_hdmi_atomic_check(struct drm_bridge *bridge,
++ struct drm_bridge_state *bridge_state,
++ struct drm_crtc_state *crtc_state,
++ struct drm_connector_state *conn_state)
++{
++ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
++ struct drm_connector_state *old_conn_state =
++ drm_atomic_get_old_connector_state(conn_state->state, conn_state->connector);
++ struct meson_drm *priv = encoder_hdmi->priv;
++
++ encoder_hdmi->output_bus_fmt = bridge_state->output_bus_cfg.format;
++
++ dev_dbg(priv->dev, "output_bus_fmt %lx\n", encoder_hdmi->output_bus_fmt);
++
++ if (!drm_connector_atomic_hdr_metadata_equal(old_conn_state, conn_state))
++ crtc_state->mode_changed = true;
++
++ return 0;
++}
++
++static void meson_encoder_hdmi_hpd_notify(struct drm_bridge *bridge,
++ enum drm_connector_status status)
++{
++ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
++ struct edid *edid;
++
++ if (!encoder_hdmi->cec_notifier)
++ return;
++
++ if (status == connector_status_connected) {
++ edid = drm_bridge_get_edid(encoder_hdmi->next_bridge, encoder_hdmi->connector);
++ if (!edid)
++ return;
++
++ cec_notifier_set_phys_addr_from_edid(encoder_hdmi->cec_notifier, edid);
++ } else
++ cec_notifier_phys_addr_invalidate(encoder_hdmi->cec_notifier);
++}
++
++static const struct drm_bridge_funcs meson_encoder_hdmi_bridge_funcs = {
++ .attach = meson_encoder_hdmi_attach,
++ .detach = meson_encoder_hdmi_detach,
++ .mode_valid = meson_encoder_hdmi_mode_valid,
++ .hpd_notify = meson_encoder_hdmi_hpd_notify,
++ .atomic_enable = meson_encoder_hdmi_atomic_enable,
++ .atomic_disable = meson_encoder_hdmi_atomic_disable,
++ .atomic_get_input_bus_fmts = meson_encoder_hdmi_get_inp_bus_fmts,
++ .atomic_check = meson_encoder_hdmi_atomic_check,
++ .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
++ .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
++ .atomic_reset = drm_atomic_helper_bridge_reset,
++};
++
++int meson_encoder_hdmi_init(struct meson_drm *priv)
++{
++ struct meson_encoder_hdmi *meson_encoder_hdmi;
++ struct platform_device *pdev;
++ struct device_node *remote;
++ int ret;
++
++ meson_encoder_hdmi = devm_kzalloc(priv->dev, sizeof(*meson_encoder_hdmi), GFP_KERNEL);
++ if (!meson_encoder_hdmi)
++ return -ENOMEM;
++
++ /* HDMI Transceiver Bridge */
++ remote = of_graph_get_remote_node(priv->dev->of_node, 1, 0);
++ if (!remote) {
++ dev_err(priv->dev, "HDMI transceiver device is disabled");
++ return 0;
++ }
++
++ meson_encoder_hdmi->next_bridge = of_drm_find_bridge(remote);
++ if (!meson_encoder_hdmi->next_bridge) {
++ dev_err(priv->dev, "Failed to find HDMI transceiver bridge\n");
++ ret = -EPROBE_DEFER;
++ goto err_put_node;
++ }
++
++ /* HDMI Encoder Bridge */
++ meson_encoder_hdmi->bridge.funcs = &meson_encoder_hdmi_bridge_funcs;
++ meson_encoder_hdmi->bridge.of_node = priv->dev->of_node;
++ meson_encoder_hdmi->bridge.type = DRM_MODE_CONNECTOR_HDMIA;
++ meson_encoder_hdmi->bridge.interlace_allowed = true;
++
++ drm_bridge_add(&meson_encoder_hdmi->bridge);
++
++ meson_encoder_hdmi->priv = priv;
++
++ /* Encoder */
++ ret = drm_simple_encoder_init(priv->drm, &meson_encoder_hdmi->encoder,
++ DRM_MODE_ENCODER_TMDS);
++ if (ret) {
++ dev_err(priv->dev, "Failed to init HDMI encoder: %d\n", ret);
++ goto err_put_node;
++ }
++
++ meson_encoder_hdmi->encoder.possible_crtcs = BIT(0);
++
++ /* Attach HDMI Encoder Bridge to Encoder */
++ ret = drm_bridge_attach(&meson_encoder_hdmi->encoder, &meson_encoder_hdmi->bridge, NULL,
++ DRM_BRIDGE_ATTACH_NO_CONNECTOR);
++ if (ret) {
++ dev_err(priv->dev, "Failed to attach bridge: %d\n", ret);
++ goto err_put_node;
++ }
++
++ /* Initialize & attach Bridge Connector */
++ meson_encoder_hdmi->connector = drm_bridge_connector_init(priv->drm,
++ &meson_encoder_hdmi->encoder);
++ if (IS_ERR(meson_encoder_hdmi->connector)) {
++ dev_err(priv->dev, "Unable to create HDMI bridge connector\n");
++ ret = PTR_ERR(meson_encoder_hdmi->connector);
++ goto err_put_node;
++ }
++ drm_connector_attach_encoder(meson_encoder_hdmi->connector,
++ &meson_encoder_hdmi->encoder);
++
++ /*
++ * We should have now in place:
++ * encoder->[hdmi encoder bridge]->[dw-hdmi bridge]->[display connector bridge]->[display connector]
++ */
++
++ /*
++ * drm_connector_attach_max_bpc_property() requires the
++ * connector to have a state.
++ */
++ drm_atomic_helper_connector_reset(meson_encoder_hdmi->connector);
++
++ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL) ||
++ meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) ||
++ meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
++ drm_connector_attach_hdr_output_metadata_property(meson_encoder_hdmi->connector);
++
++ drm_connector_attach_max_bpc_property(meson_encoder_hdmi->connector, 8, 8);
++
++ /* Handle this here until handled by drm_bridge_connector_init() */
++ meson_encoder_hdmi->connector->ycbcr_420_allowed = true;
++
++ pdev = of_find_device_by_node(remote);
++ of_node_put(remote);
++ if (pdev) {
++ struct cec_connector_info conn_info;
++ struct cec_notifier *notifier;
++
++ cec_fill_conn_info_from_drm(&conn_info, meson_encoder_hdmi->connector);
++
++ notifier = cec_notifier_conn_register(&pdev->dev, NULL, &conn_info);
++ if (!notifier) {
++ put_device(&pdev->dev);
++ return -ENOMEM;
++ }
++
++ meson_encoder_hdmi->cec_notifier = notifier;
++ }
++
++ dev_dbg(priv->dev, "HDMI encoder initialized\n");
++
++ return 0;
++
++err_put_node:
++ of_node_put(remote);
++ return ret;
++}
+diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.h b/drivers/gpu/drm/meson/meson_encoder_hdmi.h
+new file mode 100644
+index 0000000000000..ed19494f09563
+--- /dev/null
++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.h
+@@ -0,0 +1,12 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++/*
++ * Copyright (C) 2021 BayLibre, SAS
++ * Author: Neil Armstrong <narmstrong@baylibre.com>
++ */
++
++#ifndef __MESON_ENCODER_HDMI_H
++#define __MESON_ENCODER_HDMI_H
++
++int meson_encoder_hdmi_init(struct meson_drm *priv);
++
++#endif /* __MESON_ENCODER_HDMI_H */
+diff --git a/drivers/gpu/drm/meson/meson_osd_afbcd.c b/drivers/gpu/drm/meson/meson_osd_afbcd.c
+index ffc6b584dbf85..0cdbe899402f8 100644
+--- a/drivers/gpu/drm/meson/meson_osd_afbcd.c
++++ b/drivers/gpu/drm/meson/meson_osd_afbcd.c
+@@ -79,11 +79,6 @@ static bool meson_gxm_afbcd_supported_fmt(u64 modifier, uint32_t format)
+ return meson_gxm_afbcd_pixel_fmt(modifier, format) >= 0;
+ }
+
+-static int meson_gxm_afbcd_init(struct meson_drm *priv)
+-{
+- return 0;
+-}
+-
+ static int meson_gxm_afbcd_reset(struct meson_drm *priv)
+ {
+ writel_relaxed(VIU_SW_RESET_OSD1_AFBCD,
+@@ -93,6 +88,16 @@ static int meson_gxm_afbcd_reset(struct meson_drm *priv)
+ return 0;
+ }
+
++static int meson_gxm_afbcd_init(struct meson_drm *priv)
++{
++ return 0;
++}
++
++static void meson_gxm_afbcd_exit(struct meson_drm *priv)
++{
++ meson_gxm_afbcd_reset(priv);
++}
++
+ static int meson_gxm_afbcd_enable(struct meson_drm *priv)
+ {
+ writel_relaxed(FIELD_PREP(OSD1_AFBCD_ID_FIFO_THRD, 0x40) |
+@@ -172,6 +177,7 @@ static int meson_gxm_afbcd_setup(struct meson_drm *priv)
+
+ struct meson_afbcd_ops meson_afbcd_gxm_ops = {
+ .init = meson_gxm_afbcd_init,
++ .exit = meson_gxm_afbcd_exit,
+ .reset = meson_gxm_afbcd_reset,
+ .enable = meson_gxm_afbcd_enable,
+ .disable = meson_gxm_afbcd_disable,
+@@ -269,6 +275,18 @@ static bool meson_g12a_afbcd_supported_fmt(u64 modifier, uint32_t format)
+ return meson_g12a_afbcd_pixel_fmt(modifier, format) >= 0;
+ }
+
++static int meson_g12a_afbcd_reset(struct meson_drm *priv)
++{
++ meson_rdma_reset(priv);
++
++ meson_rdma_writel_sync(priv, VIU_SW_RESET_G12A_AFBC_ARB |
++ VIU_SW_RESET_G12A_OSD1_AFBCD,
++ VIU_SW_RESET);
++ meson_rdma_writel_sync(priv, 0, VIU_SW_RESET);
++
++ return 0;
++}
++
+ static int meson_g12a_afbcd_init(struct meson_drm *priv)
+ {
+ int ret;
+@@ -286,16 +304,10 @@ static int meson_g12a_afbcd_init(struct meson_drm *priv)
+ return 0;
+ }
+
+-static int meson_g12a_afbcd_reset(struct meson_drm *priv)
++static void meson_g12a_afbcd_exit(struct meson_drm *priv)
+ {
+- meson_rdma_reset(priv);
+-
+- meson_rdma_writel_sync(priv, VIU_SW_RESET_G12A_AFBC_ARB |
+- VIU_SW_RESET_G12A_OSD1_AFBCD,
+- VIU_SW_RESET);
+- meson_rdma_writel_sync(priv, 0, VIU_SW_RESET);
+-
+- return 0;
++ meson_g12a_afbcd_reset(priv);
++ meson_rdma_free(priv);
+ }
+
+ static int meson_g12a_afbcd_enable(struct meson_drm *priv)
+@@ -380,6 +392,7 @@ static int meson_g12a_afbcd_setup(struct meson_drm *priv)
+
+ struct meson_afbcd_ops meson_afbcd_g12a_ops = {
+ .init = meson_g12a_afbcd_init,
++ .exit = meson_g12a_afbcd_exit,
+ .reset = meson_g12a_afbcd_reset,
+ .enable = meson_g12a_afbcd_enable,
+ .disable = meson_g12a_afbcd_disable,
+diff --git a/drivers/gpu/drm/meson/meson_osd_afbcd.h b/drivers/gpu/drm/meson/meson_osd_afbcd.h
+index 5e5523304f42f..e77ddeb6416f3 100644
+--- a/drivers/gpu/drm/meson/meson_osd_afbcd.h
++++ b/drivers/gpu/drm/meson/meson_osd_afbcd.h
+@@ -14,6 +14,7 @@
+
+ struct meson_afbcd_ops {
+ int (*init)(struct meson_drm *priv);
++ void (*exit)(struct meson_drm *priv);
+ int (*reset)(struct meson_drm *priv);
+ int (*enable)(struct meson_drm *priv);
+ int (*disable)(struct meson_drm *priv);
+diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
+index 8640a8a8a4691..44aa526294439 100644
+--- a/drivers/gpu/drm/meson/meson_plane.c
++++ b/drivers/gpu/drm/meson/meson_plane.c
+@@ -168,7 +168,7 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
+
+ /* Enable OSD and BLK0, set max global alpha */
+ priv->viu.osd1_ctrl_stat = OSD_ENABLE |
+- (0xFF << OSD_GLOBAL_ALPHA_SHIFT) |
++ (0x100 << OSD_GLOBAL_ALPHA_SHIFT) |
+ OSD_BLK0_ENABLE;
+
+ priv->viu.osd1_ctrl_stat2 = readl(priv->io_base +
+diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
+index 259f3e6bec90a..cd399b0b71814 100644
+--- a/drivers/gpu/drm/meson/meson_viu.c
++++ b/drivers/gpu/drm/meson/meson_viu.c
+@@ -94,7 +94,7 @@ static void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv,
+ priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF11_12));
+ writel(((m[9] & 0x1fff) << 16) | (m[10] & 0x1fff),
+ priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF20_21));
+- writel((m[11] & 0x1fff) << 16,
++ writel((m[11] & 0x1fff),
+ priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF22));
+
+ writel(((m[18] & 0xfff) << 16) | (m[19] & 0xfff),
+@@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv)
+
+ /* Initialize OSD1 fifo control register */
+ reg = VIU_OSD_DDR_PRIORITY_URGENT |
+- VIU_OSD_HOLD_FIFO_LINES(31) |
+ VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */
+ VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */
+ VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */
+
+ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+- reg |= VIU_OSD_BURST_LENGTH_32;
++ reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31));
+ else
+- reg |= VIU_OSD_BURST_LENGTH_64;
++ reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4));
+
+ writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT));
+ writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT));
+@@ -469,17 +468,17 @@ void meson_viu_init(struct meson_drm *priv)
+ priv->io_base + _REG(VD2_IF0_LUMA_FIFO_SIZE));
+
+ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) {
+- writel_relaxed(VIU_OSD_BLEND_REORDER(0, 1) |
+- VIU_OSD_BLEND_REORDER(1, 0) |
+- VIU_OSD_BLEND_REORDER(2, 0) |
+- VIU_OSD_BLEND_REORDER(3, 0) |
+- VIU_OSD_BLEND_DIN_EN(1) |
+- VIU_OSD_BLEND1_DIN3_BYPASS_TO_DOUT1 |
+- VIU_OSD_BLEND1_DOUT_BYPASS_TO_BLEND2 |
+- VIU_OSD_BLEND_DIN0_BYPASS_TO_DOUT0 |
+- VIU_OSD_BLEND_BLEN2_PREMULT_EN(1) |
+- VIU_OSD_BLEND_HOLD_LINES(4),
+- priv->io_base + _REG(VIU_OSD_BLEND_CTRL));
++ u32 val = (u32)VIU_OSD_BLEND_REORDER(0, 1) |
++ (u32)VIU_OSD_BLEND_REORDER(1, 0) |
++ (u32)VIU_OSD_BLEND_REORDER(2, 0) |
++ (u32)VIU_OSD_BLEND_REORDER(3, 0) |
++ (u32)VIU_OSD_BLEND_DIN_EN(1) |
++ (u32)VIU_OSD_BLEND1_DIN3_BYPASS_TO_DOUT1 |
++ (u32)VIU_OSD_BLEND1_DOUT_BYPASS_TO_BLEND2 |
++ (u32)VIU_OSD_BLEND_DIN0_BYPASS_TO_DOUT0 |
++ (u32)VIU_OSD_BLEND_BLEN2_PREMULT_EN(1) |
++ (u32)VIU_OSD_BLEND_HOLD_LINES(4);
++ writel_relaxed(val, priv->io_base + _REG(VIU_OSD_BLEND_CTRL));
+
+ writel_relaxed(OSD_BLEND_PATH_SEL_ENABLE,
+ priv->io_base + _REG(OSD1_BLEND_SRC_CTRL));
+diff --git a/drivers/gpu/drm/meson/meson_vpp.c b/drivers/gpu/drm/meson/meson_vpp.c
+index 154837688ab0d..5df1957c8e41f 100644
+--- a/drivers/gpu/drm/meson/meson_vpp.c
++++ b/drivers/gpu/drm/meson/meson_vpp.c
+@@ -100,6 +100,8 @@ void meson_vpp_init(struct meson_drm *priv)
+ priv->io_base + _REG(VPP_DOLBY_CTRL));
+ writel_relaxed(0x1020080,
+ priv->io_base + _REG(VPP_DUMMY_DATA1));
++ writel_relaxed(0x42020,
++ priv->io_base + _REG(VPP_DUMMY_DATA));
+ } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+ writel_relaxed(0xf, priv->io_base + _REG(DOLBY_PATH_CTRL));
+
+diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
+index fd98e8bbc5500..2c7271f545dcc 100644
+--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
++++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
+@@ -529,7 +529,10 @@ static void mgag200_set_format_regs(struct mga_device *mdev,
+ WREG_GFX(3, 0x00);
+ WREG_GFX(4, 0x00);
+ WREG_GFX(5, 0x40);
+- WREG_GFX(6, 0x05);
++ /* GCTL6 should be 0x05, but we configure memmapsl to 0xb8000 (text mode),
++ * so that it doesn't hang when running kexec/kdump on G200_SE rev42.
++ */
++ WREG_GFX(6, 0x0d);
+ WREG_GFX(7, 0x0f);
+ WREG_GFX(8, 0x0f);
+
+diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c
+index e9ae22b4f8138..87f9846b9b4ff 100644
+--- a/drivers/gpu/drm/mgag200/mgag200_pll.c
++++ b/drivers/gpu/drm/mgag200/mgag200_pll.c
+@@ -268,7 +268,8 @@ static void mgag200_pixpll_update_g200se_04(struct mgag200_pll *pixpll,
+ pixpllcp = pixpllc->p - 1;
+ pixpllcs = pixpllc->s;
+
+- xpixpllcm = pixpllcm | ((pixpllcn & BIT(8)) >> 1);
++ // For G200SE A, BIT(7) should be set unconditionally.
++ xpixpllcm = BIT(7) | pixpllcm;
+ xpixpllcn = pixpllcn;
+ xpixpllcp = (pixpllcs << 3) | pixpllcp;
+
+@@ -404,9 +405,9 @@ mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pl
+ udelay(50);
+
+ /* program pixel pll register */
+- WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn);
+- WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm);
+- WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp);
++ WREG_DAC(MGA1064_WB_PIX_PLLC_N, xpixpllcn);
++ WREG_DAC(MGA1064_WB_PIX_PLLC_M, xpixpllcm);
++ WREG_DAC(MGA1064_WB_PIX_PLLC_P, xpixpllcp);
+
+ udelay(50);
+
+diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
+index 3ddf739a6f9b8..c49b239231190 100644
+--- a/drivers/gpu/drm/msm/Kconfig
++++ b/drivers/gpu/drm/msm/Kconfig
+@@ -63,6 +63,7 @@ config DRM_MSM_HDMI_HDCP
+ config DRM_MSM_DP
+ bool "Enable DisplayPort support in MSM DRM driver"
+ depends on DRM_MSM
++ select RATIONAL
+ default y
+ help
+ Compile in support for DP driver in MSM DRM driver. DP external
+diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
+index 904535eda0c4c..91b09cda8a9c2 100644
+--- a/drivers/gpu/drm/msm/Makefile
++++ b/drivers/gpu/drm/msm/Makefile
+@@ -19,7 +19,7 @@ msm-y := \
+ hdmi/hdmi.o \
+ hdmi/hdmi_audio.o \
+ hdmi/hdmi_bridge.o \
+- hdmi/hdmi_connector.o \
++ hdmi/hdmi_hpd.o \
+ hdmi/hdmi_i2c.o \
+ hdmi/hdmi_phy.o \
+ hdmi/hdmi_phy_8960.o \
+diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+index bdc989183c648..17d6a1ecb1110 100644
+--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+@@ -521,6 +521,10 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
+ gpu->perfcntrs = perfcntrs;
+ gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
+
++ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
++ if (ret)
++ goto fail;
++
+ if (adreno_is_a20x(adreno_gpu))
+ adreno_gpu->registers = a200_registers;
+ else if (adreno_is_a225(adreno_gpu))
+@@ -528,10 +532,6 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
+ else
+ adreno_gpu->registers = a220_registers;
+
+- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
+- if (ret)
+- goto fail;
+-
+ if (!gpu->aspace) {
+ dev_err(dev->dev, "No memory protection without MMU\n");
+ if (!allow_vram_carveout) {
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
+index c9d11d57aed66..1aa39aa73e745 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
+@@ -107,7 +107,7 @@ reset_set(void *data, u64 val)
+ * try to reset an active GPU.
+ */
+
+- mutex_lock(&dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+
+ release_firmware(adreno_gpu->fw[ADRENO_FW_PM4]);
+ adreno_gpu->fw[ADRENO_FW_PM4] = NULL;
+@@ -133,7 +133,7 @@ reset_set(void *data, u64 val)
+ gpu->funcs->recover(gpu);
+
+ pm_runtime_put_sync(&gpu->pdev->dev);
+- mutex_unlock(&dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+index 5e2750eb3810c..e9c8111122bd6 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+@@ -90,7 +90,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit
+ * since we've already mapped it once in
+ * submit_reloc()
+ */
+- if (WARN_ON(!ptr))
++ if (WARN_ON(IS_ERR_OR_NULL(ptr)))
+ return;
+
+ for (i = 0; i < dwords; i++) {
+@@ -153,8 +153,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
+ OUT_RING(ring, 1);
+
+ /* Enable local preemption for finegrain preemption */
+- OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+- OUT_RING(ring, 0x02);
++ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
++ OUT_RING(ring, 0x1);
+
+ /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+@@ -801,7 +801,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
+ gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
+
+ /* Set the highest bank bit */
+- if (adreno_is_a540(adreno_gpu))
++ if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu))
+ regbit = 2;
+ else
+ regbit = 1;
+@@ -1746,9 +1746,11 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
+ {
+ struct msm_drm_private *priv = dev->dev_private;
+ struct platform_device *pdev = priv->gpu_pdev;
++ struct adreno_platform_config *config = pdev->dev.platform_data;
+ struct a5xx_gpu *a5xx_gpu = NULL;
+ struct adreno_gpu *adreno_gpu;
+ struct msm_gpu *gpu;
++ unsigned int nr_rings;
+ int ret;
+
+ if (!pdev) {
+@@ -1769,7 +1771,12 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
+
+ check_speed_bin(&pdev->dev);
+
+- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
++ nr_rings = 4;
++
++ if (adreno_cmp_rev(ADRENO_REV(5, 1, 0, ANY_ID), config->rev))
++ nr_rings = 1;
++
++ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
+ if (ret) {
+ a5xx_destroy(&(a5xx_gpu->base.base));
+ return ERR_PTR(ret);
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+index 8abc9a2b114a2..e0eef47dae632 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+@@ -63,7 +63,7 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+ spin_lock_irqsave(&ring->preempt_lock, flags);
+- empty = (get_wptr(ring) == ring->memptrs->rptr);
++ empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring));
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ if (!empty)
+@@ -208,6 +208,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
+ a5xx_gpu->preempt[i]->wptr = 0;
+ a5xx_gpu->preempt[i]->rptr = 0;
+ a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova;
++ a5xx_gpu->preempt[i]->rptr_addr = shadowptr(a5xx_gpu, gpu->rb[i]);
+ }
+
+ /* Write a 0 to signal that we aren't switching pagetables */
+@@ -259,7 +260,6 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
+ ptr->data = 0;
+ ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE;
+
+- ptr->rptr_addr = shadowptr(a5xx_gpu, ring);
+ ptr->counter = counters_iova;
+
+ return 0;
+diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+index 8b73f70766a47..4347a104755a9 100644
+--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
++++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+@@ -516,11 +516,11 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ struct platform_device *pdev = to_platform_device(gmu->dev);
+ void __iomem *pdcptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc");
+- void __iomem *seqptr;
++ void __iomem *seqptr = NULL;
+ uint32_t pdc_address_offset;
+ bool pdc_in_aop = false;
+
+- if (!pdcptr)
++ if (IS_ERR(pdcptr))
+ goto err;
+
+ if (adreno_is_a650(adreno_gpu) || adreno_is_a660_family(adreno_gpu))
+@@ -532,7 +532,7 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
+
+ if (!pdc_in_aop) {
+ seqptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc_seq");
+- if (!seqptr)
++ if (IS_ERR(seqptr))
+ goto err;
+ }
+
+diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+index 267a880811d65..2d07c02c59f14 100644
+--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+@@ -658,19 +658,23 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
+ {
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ const u32 *regs = a6xx_protect;
+- unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32;
+-
+- BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
+- BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
++ unsigned i, count, count_max;
+
+ if (adreno_is_a650(adreno_gpu)) {
+ regs = a650_protect;
+ count = ARRAY_SIZE(a650_protect);
+ count_max = 48;
++ BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
+ } else if (adreno_is_a660_family(adreno_gpu)) {
+ regs = a660_protect;
+ count = ARRAY_SIZE(a660_protect);
+ count_max = 48;
++ BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
++ } else {
++ regs = a6xx_protect;
++ count = ARRAY_SIZE(a6xx_protect);
++ count_max = 32;
++ BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
+ }
+
+ /*
+@@ -1424,17 +1428,24 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
+ {
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ struct msm_gpu *gpu = &adreno_gpu->base;
+- u32 gpu_scid, cntl1_regval = 0;
++ u32 cntl1_regval = 0;
+
+ if (IS_ERR(a6xx_gpu->llc_mmio))
+ return;
+
+ if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
+- gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
++ u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
+
+ gpu_scid &= 0x1f;
+ cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
+ (gpu_scid << 15) | (gpu_scid << 20);
++
++ /* On A660, the SCID programming for UCHE traffic is done in
++ * A6XX_GBIF_SCACHE_CNTL0[14:10]
++ */
++ if (adreno_is_a660_family(adreno_gpu))
++ gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
++ (1 << 8), (gpu_scid << 10) | (1 << 8));
+ }
+
+ /*
+@@ -1471,13 +1482,6 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
+ }
+
+ gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
+-
+- /* On A660, the SCID programming for UCHE traffic is done in
+- * A6XX_GBIF_SCACHE_CNTL0[14:10]
+- */
+- if (adreno_is_a660_family(adreno_gpu))
+- gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
+- (1 << 8), (gpu_scid << 10) | (1 << 8));
+ }
+
+ static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
+@@ -1557,6 +1561,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
+ for (i = 0; i < gpu->nr_rings; i++)
+ a6xx_gpu->shadow[i] = 0;
+
++ gpu->suspend_count++;
++
+ return 0;
+ }
+
+@@ -1705,7 +1711,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu)
+ return ERR_CAST(mmu);
+
+ return msm_gem_address_space_create(mmu,
+- "gpu", 0x100000000ULL, 0x1ffffffffULL);
++ "gpu", 0x100000000ULL, SZ_4G);
+ }
+
+ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+@@ -1740,7 +1746,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
+
+ if (val == UINT_MAX) {
+ DRM_DEV_ERROR(dev,
+- "missing support for speed-bin: %u. Some OPPs may not be supported by hardware",
++ "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
+ fuse);
+ return UINT_MAX;
+ }
+@@ -1750,7 +1756,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
+
+ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
+ {
+- u32 supp_hw = UINT_MAX;
++ u32 supp_hw;
+ u32 speedbin;
+ int ret;
+
+@@ -1762,15 +1768,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
+ if (ret == -ENOENT) {
+ return 0;
+ } else if (ret) {
+- DRM_DEV_ERROR(dev,
+- "failed to read speed-bin (%d). Some OPPs may not be supported by hardware",
+- ret);
+- goto done;
++ dev_err_probe(dev, ret,
++ "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
++ return ret;
+ }
+
+ supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
+
+-done:
+ ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
+ if (ret)
+ return ret;
+@@ -1866,6 +1870,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
+ BUG_ON(!node);
+
+ ret = a6xx_gmu_init(a6xx_gpu, node);
++ of_node_put(node);
+ if (ret) {
+ a6xx_destroy(&(a6xx_gpu->base.base));
+ return ERR_PTR(ret);
+diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+index e8f65cd8eca6e..bfac7e47cb396 100644
+--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
++++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+@@ -777,12 +777,12 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+
+ a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
+- 2, sizeof(*a6xx_state->gmu_registers));
++ 3, sizeof(*a6xx_state->gmu_registers));
+
+ if (!a6xx_state->gmu_registers)
+ return;
+
+- a6xx_state->nr_gmu_registers = 2;
++ a6xx_state->nr_gmu_registers = 3;
+
+ /* Get the CX GMU registers from AHB */
+ _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
+diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
+index 2fb58b7098e4b..3bd2065a9d30e 100644
+--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
++++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
+@@ -200,7 +200,7 @@ static const struct a6xx_shader_block {
+ SHADER(A6XX_SP_LB_3_DATA, 0x800),
+ SHADER(A6XX_SP_LB_4_DATA, 0x800),
+ SHADER(A6XX_SP_LB_5_DATA, 0x200),
+- SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000),
++ SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x800),
+ SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280),
+ SHADER(A6XX_SP_UAV_DATA, 0x80),
+ SHADER(A6XX_SP_INST_TAG, 0x80),
+diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
+index 2a6ce76656aa2..6749ea7669b9e 100644
+--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
++++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
+@@ -398,25 +398,29 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
+ if (ret)
+ return NULL;
+
+- /* Make sure pm runtime is active and reset any previous errors */
+- pm_runtime_set_active(&pdev->dev);
++ /*
++ * Now that we have firmware loaded, and are ready to begin
++ * booting the gpu, go ahead and enable runpm:
++ */
++ pm_runtime_enable(&pdev->dev);
+
+ ret = pm_runtime_get_sync(&pdev->dev);
+ if (ret < 0) {
+- pm_runtime_put_sync(&pdev->dev);
++ pm_runtime_put_noidle(&pdev->dev);
+ DRM_DEV_ERROR(dev->dev, "Couldn't power up the GPU: %d\n", ret);
+- return NULL;
++ goto err_disable_rpm;
+ }
+
+- mutex_lock(&dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+ ret = msm_gpu_hw_init(gpu);
+- mutex_unlock(&dev->struct_mutex);
+- pm_runtime_put_autosuspend(&pdev->dev);
++ mutex_unlock(&gpu->lock);
+ if (ret) {
+ DRM_DEV_ERROR(dev->dev, "gpu hw init failed: %d\n", ret);
+- return NULL;
++ goto err_put_rpm;
+ }
+
++ pm_runtime_put_autosuspend(&pdev->dev);
++
+ #ifdef CONFIG_DEBUG_FS
+ if (gpu->funcs->debugfs_init) {
+ gpu->funcs->debugfs_init(gpu, dev->primary);
+@@ -425,6 +429,13 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
+ #endif
+
+ return gpu;
++
++err_put_rpm:
++ pm_runtime_put_sync_suspend(&pdev->dev);
++err_disable_rpm:
++ pm_runtime_disable(&pdev->dev);
++
++ return NULL;
+ }
+
+ static void set_gpu_pdev(struct drm_device *dev,
+diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+index 748665232d296..47a260715a89c 100644
+--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+@@ -943,7 +943,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
+ pm_runtime_set_autosuspend_delay(dev,
+ adreno_gpu->info->inactive_period);
+ pm_runtime_use_autosuspend(dev);
+- pm_runtime_enable(dev);
+
+ return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
+ adreno_gpu->info->name, &adreno_gpu_config);
+@@ -952,13 +951,14 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
+ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
+ {
+ struct msm_gpu *gpu = &adreno_gpu->base;
+- struct msm_drm_private *priv = gpu->dev->dev_private;
++ struct msm_drm_private *priv = gpu->dev ? gpu->dev->dev_private : NULL;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++)
+ release_firmware(adreno_gpu->fw[i]);
+
+- pm_runtime_disable(&priv->gpu_pdev->dev);
++ if (priv && pm_runtime_enabled(&priv->gpu_pdev->dev))
++ pm_runtime_disable(&priv->gpu_pdev->dev);
+
+ msm_gpu_cleanup(&adreno_gpu->base);
+ }
+diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+index 225c277a6223e..588722e824f6f 100644
+--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
++++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+@@ -29,11 +29,9 @@ enum {
+ ADRENO_FW_MAX,
+ };
+
+-enum adreno_quirks {
+- ADRENO_QUIRK_TWO_PASS_USE_WFI = 1,
+- ADRENO_QUIRK_FAULT_DETECT_MASK = 2,
+- ADRENO_QUIRK_LMLOADKILL_DISABLE = 3,
+-};
++#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0)
++#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(1)
++#define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2)
+
+ struct adreno_rev {
+ uint8_t core;
+@@ -65,7 +63,7 @@ struct adreno_info {
+ const char *name;
+ const char *fw[ADRENO_FW_MAX];
+ uint32_t gmem;
+- enum adreno_quirks quirks;
++ u64 quirks;
+ struct msm_gpu *(*init)(struct drm_device *dev);
+ const char *zapfw;
+ u32 inactive_period;
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
+index cf4b9b5964c6c..cd6c3518ba021 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
+@@ -14,19 +14,6 @@
+
+ #define DPU_PERF_DEFAULT_MAX_CORE_CLK_RATE 412500000
+
+-/**
+- * enum dpu_core_perf_data_bus_id - data bus identifier
+- * @DPU_CORE_PERF_DATA_BUS_ID_MNOC: DPU/MNOC data bus
+- * @DPU_CORE_PERF_DATA_BUS_ID_LLCC: MNOC/LLCC data bus
+- * @DPU_CORE_PERF_DATA_BUS_ID_EBI: LLCC/EBI data bus
+- */
+-enum dpu_core_perf_data_bus_id {
+- DPU_CORE_PERF_DATA_BUS_ID_MNOC,
+- DPU_CORE_PERF_DATA_BUS_ID_LLCC,
+- DPU_CORE_PERF_DATA_BUS_ID_EBI,
+- DPU_CORE_PERF_DATA_BUS_ID_MAX,
+-};
+-
+ /**
+ * struct dpu_core_perf_params - definition of performance parameters
+ * @max_per_pipe_ib: maximum instantaneous bandwidth request
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+index 768012243b440..8be941c9b6a94 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+@@ -230,6 +230,9 @@ static void _dpu_crtc_blend_setup_mixer(struct drm_crtc *crtc,
+ if (!state)
+ continue;
+
++ if (!state->visible)
++ continue;
++
+ pstate = to_dpu_plane_state(state);
+ fb = state->fb;
+
+@@ -767,7 +770,10 @@ static void dpu_crtc_reset(struct drm_crtc *crtc)
+ if (crtc->state)
+ dpu_crtc_destroy_state(crtc, crtc->state);
+
+- __drm_atomic_helper_crtc_reset(crtc, &cstate->base);
++ if (cstate)
++ __drm_atomic_helper_crtc_reset(crtc, &cstate->base);
++ else
++ __drm_atomic_helper_crtc_reset(crtc, NULL);
+ }
+
+ /**
+@@ -898,6 +904,20 @@ struct plane_state {
+ u32 pipe_id;
+ };
+
++static bool dpu_crtc_needs_dirtyfb(struct drm_crtc_state *cstate)
++{
++ struct drm_crtc *crtc = cstate->crtc;
++ struct drm_encoder *encoder;
++
++ drm_for_each_encoder_mask (encoder, crtc->dev, cstate->encoder_mask) {
++ if (dpu_encoder_get_intf_mode(encoder) == INTF_MODE_CMD) {
++ return true;
++ }
++ }
++
++ return false;
++}
++
+ static int dpu_crtc_atomic_check(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+ {
+@@ -918,8 +938,11 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc,
+ const struct drm_plane_state *pipe_staged[SSPP_MAX];
+ int left_zpos_cnt = 0, right_zpos_cnt = 0;
+ struct drm_rect crtc_rect = { 0 };
++ bool needs_dirtyfb = dpu_crtc_needs_dirtyfb(crtc_state);
+
+ pstates = kzalloc(sizeof(*pstates) * DPU_STAGE_MAX * 4, GFP_KERNEL);
++ if (!pstates)
++ return -ENOMEM;
+
+ if (!crtc_state->enable || !crtc_state->active) {
+ DRM_DEBUG_ATOMIC("crtc%d -> enable %d, active %d, skip atomic_check\n",
+@@ -949,6 +972,7 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc,
+
+ /* get plane state for all drm planes associated with crtc state */
+ drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
++ struct dpu_plane_state *dpu_pstate = to_dpu_plane_state(pstate);
+ struct drm_rect dst, clip = crtc_rect;
+
+ if (IS_ERR_OR_NULL(pstate)) {
+@@ -960,11 +984,16 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc,
+ if (cnt >= DPU_STAGE_MAX * 4)
+ continue;
+
+- pstates[cnt].dpu_pstate = to_dpu_plane_state(pstate);
++ if (!pstate->visible)
++ continue;
++
++ pstates[cnt].dpu_pstate = dpu_pstate;
+ pstates[cnt].drm_pstate = pstate;
+ pstates[cnt].stage = pstate->normalized_zpos;
+ pstates[cnt].pipe_id = dpu_plane_pipe(plane);
+
++ dpu_pstate->needs_dirtyfb = needs_dirtyfb;
++
+ if (pipe_staged[pstates[cnt].pipe_id]) {
+ multirect_plane[multirect_count].r0 =
+ pipe_staged[pstates[cnt].pipe_id];
+@@ -1351,6 +1380,8 @@ static const struct drm_crtc_helper_funcs dpu_crtc_helper_funcs = {
+ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
+ struct drm_plane *cursor)
+ {
++ struct msm_drm_private *priv = dev->dev_private;
++ struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms);
+ struct drm_crtc *crtc = NULL;
+ struct dpu_crtc *dpu_crtc = NULL;
+ int i;
+@@ -1382,7 +1413,8 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
+
+ drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs);
+
+- drm_crtc_enable_color_mgmt(crtc, 0, true, 0);
++ if (dpu_kms->catalog->dspp_count)
++ drm_crtc_enable_color_mgmt(crtc, 0, true, 0);
+
+ /* save user friendly CRTC name for later */
+ snprintf(dpu_crtc->name, DPU_CRTC_NAME_SIZE, "crtc%u", crtc->base.id);
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+index 0e9d3fa1544be..03bddd904d1a1 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+@@ -168,6 +168,7 @@ enum dpu_enc_rc_states {
+ * @vsync_event_work: worker to handle vsync event for autorefresh
+ * @topology: topology of the display
+ * @idle_timeout: idle timeout duration in milliseconds
++ * @dp: msm_dp pointer, for DP encoders
+ */
+ struct dpu_encoder_virt {
+ struct drm_encoder base;
+@@ -206,6 +207,8 @@ struct dpu_encoder_virt {
+ struct msm_display_topology topology;
+
+ u32 idle_timeout;
++
++ struct msm_dp *dp;
+ };
+
+ #define to_dpu_encoder_virt(x) container_of(x, struct dpu_encoder_virt, base)
+@@ -634,7 +637,7 @@ static int dpu_encoder_virt_atomic_check(
+ if (drm_atomic_crtc_needs_modeset(crtc_state)) {
+ dpu_rm_release(global_state, drm_enc);
+
+- if (!crtc_state->active_changed || crtc_state->active)
++ if (!crtc_state->active_changed || crtc_state->enable)
+ ret = dpu_rm_reserve(&dpu_kms->rm, global_state,
+ drm_enc, crtc_state, topology);
+ }
+@@ -1000,8 +1003,8 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc,
+
+ trace_dpu_enc_mode_set(DRMID(drm_enc));
+
+- if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp)
+- msm_dp_display_mode_set(priv->dp, drm_enc, mode, adj_mode);
++ if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS)
++ msm_dp_display_mode_set(dpu_enc->dp, drm_enc, mode, adj_mode);
+
+ list_for_each_entry(conn_iter, connector_list, head)
+ if (conn_iter->encoder == drm_enc)
+@@ -1107,7 +1110,7 @@ static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc)
+ }
+
+
+- if (dpu_enc->disp_info.intf_type == DRM_MODE_CONNECTOR_DisplayPort &&
++ if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_TMDS &&
+ dpu_enc->cur_master->hw_mdptop &&
+ dpu_enc->cur_master->hw_mdptop->ops.intf_audio_select)
+ dpu_enc->cur_master->hw_mdptop->ops.intf_audio_select(
+@@ -1182,9 +1185,8 @@ static void dpu_encoder_virt_enable(struct drm_encoder *drm_enc)
+
+ _dpu_encoder_virt_enable_helper(drm_enc);
+
+- if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp) {
+- ret = msm_dp_display_enable(priv->dp,
+- drm_enc);
++ if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS) {
++ ret = msm_dp_display_enable(dpu_enc->dp, drm_enc);
+ if (ret) {
+ DPU_ERROR_ENC(dpu_enc, "dp display enable failed: %d\n",
+ ret);
+@@ -1224,8 +1226,8 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
+ /* wait for idle */
+ dpu_encoder_wait_for_event(drm_enc, MSM_ENC_TX_COMPLETE);
+
+- if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp) {
+- if (msm_dp_display_pre_disable(priv->dp, drm_enc))
++ if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS) {
++ if (msm_dp_display_pre_disable(dpu_enc->dp, drm_enc))
+ DPU_ERROR_ENC(dpu_enc, "dp display push idle failed\n");
+ }
+
+@@ -1253,8 +1255,8 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
+
+ DPU_DEBUG_ENC(dpu_enc, "encoder disabled\n");
+
+- if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS && priv->dp) {
+- if (msm_dp_display_disable(priv->dp, drm_enc))
++ if (drm_enc->encoder_type == DRM_MODE_ENCODER_TMDS) {
++ if (msm_dp_display_disable(dpu_enc->dp, drm_enc))
+ DPU_ERROR_ENC(dpu_enc, "dp display disable failed\n");
+ }
+
+@@ -2170,7 +2172,8 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc,
+ timer_setup(&dpu_enc->vsync_event_timer,
+ dpu_encoder_vsync_event_handler,
+ 0);
+-
++ else if (disp_info->intf_type == DRM_MODE_ENCODER_TMDS)
++ dpu_enc->dp = priv->dp;
+
+ INIT_DELAYED_WORK(&dpu_enc->delayed_off_work,
+ dpu_encoder_off_work);
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+index 700d65e39feb0..272a3d7e1aef2 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+@@ -53,9 +53,13 @@
+
+ #define INTF_SDM845_MASK (0)
+
+-#define INTF_SC7180_MASK BIT(DPU_INTF_INPUT_CTRL) | BIT(DPU_INTF_TE)
++#define INTF_SC7180_MASK \
++ (BIT(DPU_INTF_INPUT_CTRL) | \
++ BIT(DPU_INTF_TE) | \
++ BIT(DPU_INTF_STATUS_SUPPORTED) | \
++ BIT(DPU_DATA_HCTL_EN))
+
+-#define INTF_SC7280_MASK INTF_SC7180_MASK | BIT(DPU_DATA_HCTL_EN)
++#define INTF_SC7280_MASK (INTF_SC7180_MASK)
+
+ #define IRQ_SDM845_MASK (BIT(MDP_SSPP_TOP0_INTR) | \
+ BIT(MDP_SSPP_TOP0_INTR2) | \
+@@ -375,19 +379,19 @@ static const struct dpu_ctl_cfg sdm845_ctl[] = {
+ static const struct dpu_ctl_cfg sc7180_ctl[] = {
+ {
+ .name = "ctl_0", .id = CTL_0,
+- .base = 0x1000, .len = 0xE4,
++ .base = 0x1000, .len = 0x1dc,
+ .features = BIT(DPU_CTL_ACTIVE_CFG),
+ .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9),
+ },
+ {
+ .name = "ctl_1", .id = CTL_1,
+- .base = 0x1200, .len = 0xE4,
++ .base = 0x1200, .len = 0x1dc,
+ .features = BIT(DPU_CTL_ACTIVE_CFG),
+ .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10),
+ },
+ {
+ .name = "ctl_2", .id = CTL_2,
+- .base = 0x1400, .len = 0xE4,
++ .base = 0x1400, .len = 0x1dc,
+ .features = BIT(DPU_CTL_ACTIVE_CFG),
+ .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11),
+ },
+@@ -804,7 +808,7 @@ static const struct dpu_pingpong_cfg sm8150_pp[] = {
+ #define MERGE_3D_BLK(_name, _id, _base) \
+ {\
+ .name = _name, .id = _id, \
+- .base = _base, .len = 0x100, \
++ .base = _base, .len = 0x8, \
+ .features = MERGE_3D_SM8150_MASK, \
+ .sblk = NULL \
+ }
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+index d2a945a27cfaa..321b7599fe2d0 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+@@ -191,17 +191,19 @@ enum {
+
+ /**
+ * INTF sub-blocks
+- * @DPU_INTF_INPUT_CTRL Supports the setting of pp block from which
+- * pixel data arrives to this INTF
+- * @DPU_INTF_TE INTF block has TE configuration support
+- * @DPU_DATA_HCTL_EN Allows data to be transferred at different rate
+- than video timing
++ * @DPU_INTF_INPUT_CTRL Supports the setting of pp block from which
++ * pixel data arrives to this INTF
++ * @DPU_INTF_TE INTF block has TE configuration support
++ * @DPU_DATA_HCTL_EN Allows data to be transferred at different rate
++ * than video timing
++ * @DPU_INTF_STATUS_SUPPORTED INTF block has INTF_STATUS register
+ * @DPU_INTF_MAX
+ */
+ enum {
+ DPU_INTF_INPUT_CTRL = 0x1,
+ DPU_INTF_TE,
+ DPU_DATA_HCTL_EN,
++ DPU_INTF_STATUS_SUPPORTED,
+ DPU_INTF_MAX
+ };
+
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
+index a98e964c3b6fa..355894a3b48c3 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
+@@ -26,9 +26,16 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx,
+ struct dpu_hw_pcc_cfg *cfg)
+ {
+
+- u32 base = ctx->cap->sblk->pcc.base;
++ u32 base;
+
+- if (!ctx || !base) {
++ if (!ctx) {
++ DRM_ERROR("invalid ctx %pK\n", ctx);
++ return;
++ }
++
++ base = ctx->cap->sblk->pcc.base;
++
++ if (!base) {
+ DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base);
+ return;
+ }
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+index 2e816f232e859..ac0c221f8aa19 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+@@ -12,7 +12,7 @@
+
+ /**
+ * Register offsets in MDSS register file for the interrupt registers
+- * w.r.t. to the MDP base
++ * w.r.t. the MDP base
+ */
+ #define MDP_SSPP_TOP0_OFF 0x0
+ #define MDP_INTF_0_OFF 0x6A000
+@@ -20,6 +20,10 @@
+ #define MDP_INTF_2_OFF 0x6B000
+ #define MDP_INTF_3_OFF 0x6B800
+ #define MDP_INTF_4_OFF 0x6C000
++#define MDP_INTF_5_OFF 0x6C800
++#define INTF_INTR_EN 0x1c0
++#define INTF_INTR_STATUS 0x1c4
++#define INTF_INTR_CLEAR 0x1c8
+ #define MDP_AD4_0_OFF 0x7C000
+ #define MDP_AD4_1_OFF 0x7D000
+ #define MDP_AD4_INTR_EN_OFF 0x41c
+@@ -87,6 +91,11 @@ static const struct dpu_intr_reg dpu_intr_set[] = {
+ MDP_INTF_4_OFF+INTF_INTR_EN,
+ MDP_INTF_4_OFF+INTF_INTR_STATUS
+ },
++ {
++ MDP_INTF_5_OFF+INTF_INTR_CLEAR,
++ MDP_INTF_5_OFF+INTF_INTR_EN,
++ MDP_INTF_5_OFF+INTF_INTR_STATUS
++ },
+ {
+ MDP_AD4_0_OFF + MDP_AD4_INTR_CLEAR_OFF,
+ MDP_AD4_0_OFF + MDP_AD4_INTR_EN_OFF,
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
+index ac83c1159815f..d90dac77c26fe 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
+@@ -22,6 +22,7 @@ enum dpu_hw_intr_reg {
+ MDP_INTF2_INTR,
+ MDP_INTF3_INTR,
+ MDP_INTF4_INTR,
++ MDP_INTF5_INTR,
+ MDP_AD4_0_INTR,
+ MDP_AD4_1_INTR,
+ MDP_INTF0_7xxx_INTR,
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+index 116e2b5b1a90f..7c09d30a62b8d 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+@@ -53,12 +53,8 @@
+ #define INTF_TPG_RGB_MAPPING 0x11C
+ #define INTF_PROG_FETCH_START 0x170
+ #define INTF_PROG_ROT_START 0x174
+-
+-#define INTF_FRAME_LINE_COUNT_EN 0x0A8
+-#define INTF_FRAME_COUNT 0x0AC
+-#define INTF_LINE_COUNT 0x0B0
+-
+ #define INTF_MUX 0x25C
++#define INTF_STATUS 0x26C
+
+ static const struct dpu_intf_cfg *_intf_offset(enum dpu_intf intf,
+ const struct dpu_mdss_cfg *m,
+@@ -148,6 +144,7 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx,
+ active_v_end = active_v_start + (p->yres * hsync_period) - 1;
+
+ display_v_start += p->hsync_pulse_width + p->h_back_porch;
++ display_v_end -= p->h_front_porch;
+
+ active_hctl = (active_h_end << 16) | active_h_start;
+ display_hctl = active_hctl;
+@@ -263,8 +260,13 @@ static void dpu_hw_intf_get_status(
+ struct intf_status *s)
+ {
+ struct dpu_hw_blk_reg_map *c = &intf->hw;
++ unsigned long cap = intf->cap->features;
++
++ if (cap & BIT(DPU_INTF_STATUS_SUPPORTED))
++ s->is_en = DPU_REG_READ(c, INTF_STATUS) & BIT(0);
++ else
++ s->is_en = DPU_REG_READ(c, INTF_TIMING_ENGINE_EN);
+
+- s->is_en = DPU_REG_READ(c, INTF_TIMING_ENGINE_EN);
+ s->is_prog_fetch_en = !!(DPU_REG_READ(c, INTF_CONFIG) & BIT(31));
+ if (s->is_en) {
+ s->frame_count = DPU_REG_READ(c, INTF_FRAME_COUNT);
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+index 69eed79324865..f9460672176aa 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+@@ -138,11 +138,13 @@ static int _sspp_subblk_offset(struct dpu_hw_pipe *ctx,
+ u32 *idx)
+ {
+ int rc = 0;
+- const struct dpu_sspp_sub_blks *sblk = ctx->cap->sblk;
++ const struct dpu_sspp_sub_blks *sblk;
+
+- if (!ctx)
++ if (!ctx || !ctx->cap || !ctx->cap->sblk)
+ return -EINVAL;
+
++ sblk = ctx->cap->sblk;
++
+ switch (s_id) {
+ case DPU_SSPP_SRC:
+ *idx = sblk->src_blk.base;
+@@ -419,7 +421,7 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_pipe *ctx,
+
+ (void)pe;
+ if (_sspp_subblk_offset(ctx, DPU_SSPP_SCALER_QSEED3, &idx) || !sspp
+- || !scaler3_cfg || !ctx || !ctx->cap || !ctx->cap->sblk)
++ || !scaler3_cfg)
+ return;
+
+ dpu_hw_setup_scaler3(&ctx->hw, scaler3_cfg, idx,
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h
+index c8156ed4b7fb8..93081e82c6d74 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h
+@@ -20,9 +20,6 @@
+ #define HIST_INTR_EN 0x01c
+ #define HIST_INTR_STATUS 0x020
+ #define HIST_INTR_CLEAR 0x024
+-#define INTF_INTR_EN 0x1C0
+-#define INTF_INTR_STATUS 0x1C4
+-#define INTF_INTR_CLEAR 0x1C8
+ #define SPLIT_DISPLAY_EN 0x2F4
+ #define SPLIT_DISPLAY_UPPER_PIPE_CTRL 0x2F8
+ #define DSPP_IGC_COLOR0_RAM_LUTN 0x300
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+index ae48f41821cfe..6d36622977af4 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+@@ -73,8 +73,8 @@ static int _dpu_danger_signal_status(struct seq_file *s,
+ &status);
+ } else {
+ seq_puts(s, "\nSafe signal status:\n");
+- if (kms->hw_mdp->ops.get_danger_status)
+- kms->hw_mdp->ops.get_danger_status(kms->hw_mdp,
++ if (kms->hw_mdp->ops.get_safe_status)
++ kms->hw_mdp->ops.get_safe_status(kms->hw_mdp,
+ &status);
+ }
+ pm_runtime_put_sync(&kms->pdev->dev);
+@@ -725,11 +725,11 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
+ _dpu_kms_mmu_destroy(dpu_kms);
+
+ if (dpu_kms->catalog) {
+- for (i = 0; i < dpu_kms->catalog->vbif_count; i++) {
+- u32 vbif_idx = dpu_kms->catalog->vbif[i].id;
+-
+- if ((vbif_idx < VBIF_MAX) && dpu_kms->hw_vbif[vbif_idx])
+- dpu_hw_vbif_destroy(dpu_kms->hw_vbif[vbif_idx]);
++ for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
++ if (dpu_kms->hw_vbif[i]) {
++ dpu_hw_vbif_destroy(dpu_kms->hw_vbif[i]);
++ dpu_kms->hw_vbif[i] = NULL;
++ }
+ }
+ }
+
+@@ -908,6 +908,10 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms)
+ return 0;
+
+ mmu = msm_iommu_new(dpu_kms->dev->dev, domain);
++ if (IS_ERR(mmu)) {
++ iommu_domain_free(domain);
++ return PTR_ERR(mmu);
++ }
+ aspace = msm_gem_address_space_create(mmu, "dpu1",
+ 0x1000, 0x100000000 - 0x1000);
+
+@@ -995,7 +999,9 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
+
+ dpu_kms_parse_data_bus_icc_path(dpu_kms);
+
+- pm_runtime_get_sync(&dpu_kms->pdev->dev);
++ rc = pm_runtime_resume_and_get(&dpu_kms->pdev->dev);
++ if (rc < 0)
++ goto error;
+
+ dpu_kms->core_rev = readl_relaxed(dpu_kms->mmio + 0x0);
+
+@@ -1041,7 +1047,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
+ for (i = 0; i < dpu_kms->catalog->vbif_count; i++) {
+ u32 vbif_idx = dpu_kms->catalog->vbif[i].id;
+
+- dpu_kms->hw_vbif[i] = dpu_hw_vbif_init(vbif_idx,
++ dpu_kms->hw_vbif[vbif_idx] = dpu_hw_vbif_init(vbif_idx,
+ dpu_kms->vbif[vbif_idx], dpu_kms->catalog);
+ if (IS_ERR_OR_NULL(dpu_kms->hw_vbif[vbif_idx])) {
+ rc = PTR_ERR(dpu_kms->hw_vbif[vbif_idx]);
+@@ -1180,7 +1186,7 @@ static int dpu_bind(struct device *dev, struct device *master, void *data)
+
+ priv->kms = &dpu_kms->base;
+
+- return ret;
++ return 0;
+ }
+
+ static void dpu_unbind(struct device *dev, struct device *master, void *data)
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+index c989621209aa7..59390dc3d1b8c 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+@@ -894,7 +894,7 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane,
+
+ if (pstate->aspace) {
+ ret = msm_framebuffer_prepare(new_state->fb,
+- pstate->aspace);
++ pstate->aspace, pstate->needs_dirtyfb);
+ if (ret) {
+ DPU_ERROR("failed to prepare framebuffer\n");
+ return ret;
+@@ -925,7 +925,8 @@ static void dpu_plane_cleanup_fb(struct drm_plane *plane,
+
+ DPU_DEBUG_PLANE(pdpu, "FB[%u]\n", old_state->fb->base.id);
+
+- msm_framebuffer_cleanup(old_state->fb, old_pstate->aspace);
++ msm_framebuffer_cleanup(old_state->fb, old_pstate->aspace,
++ old_pstate->needs_dirtyfb);
+ }
+
+ static bool dpu_plane_validate_src(struct drm_rect *src,
+@@ -1088,7 +1089,7 @@ static void dpu_plane_sspp_atomic_update(struct drm_plane *plane)
+ struct dpu_plane_state *pstate = to_dpu_plane_state(state);
+ struct drm_crtc *crtc = state->crtc;
+ struct drm_framebuffer *fb = state->fb;
+- bool is_rt_pipe, update_qos_remap;
++ bool is_rt_pipe;
+ const struct dpu_format *fmt =
+ to_dpu_format(msm_framebuffer_format(fb));
+
+@@ -1099,6 +1100,9 @@ static void dpu_plane_sspp_atomic_update(struct drm_plane *plane)
+ pstate->pending = true;
+
+ is_rt_pipe = (dpu_crtc_get_client_type(crtc) != NRT_CLIENT);
++ pstate->needs_qos_remap |= (is_rt_pipe != pdpu->is_rt_pipe);
++ pdpu->is_rt_pipe = is_rt_pipe;
++
+ _dpu_plane_set_qos_ctrl(plane, false, DPU_PLANE_QOS_PANIC_CTRL);
+
+ DPU_DEBUG_PLANE(pdpu, "FB[%u] " DRM_RECT_FP_FMT "->crtc%u " DRM_RECT_FMT
+@@ -1204,14 +1208,8 @@ static void dpu_plane_sspp_atomic_update(struct drm_plane *plane)
+ _dpu_plane_set_ot_limit(plane, crtc);
+ }
+
+- update_qos_remap = (is_rt_pipe != pdpu->is_rt_pipe) ||
+- pstate->needs_qos_remap;
+-
+- if (update_qos_remap) {
+- if (is_rt_pipe != pdpu->is_rt_pipe)
+- pdpu->is_rt_pipe = is_rt_pipe;
+- else if (pstate->needs_qos_remap)
+- pstate->needs_qos_remap = false;
++ if (pstate->needs_qos_remap) {
++ pstate->needs_qos_remap = false;
+ _dpu_plane_set_qos_remap(plane);
+ }
+
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
+index 34e03ac05f4a8..17ff48564c8a7 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
+@@ -28,6 +28,7 @@
+ * @cdp_cfg: CDP configuration
+ * @plane_fetch_bw: calculated BW per plane
+ * @plane_clk: calculated clk per plane
++ * @needs_dirtyfb: whether attached CRTC needs pixel data explicitly flushed
+ */
+ struct dpu_plane_state {
+ struct drm_plane_state base;
+@@ -45,6 +46,8 @@ struct dpu_plane_state {
+ struct dpu_hw_pipe_cdp_cfg cdp_cfg;
+ u64 plane_fetch_bw;
+ u64 plane_clk;
++
++ bool needs_dirtyfb;
+ };
+
+ /**
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+index f9c83d6e427ad..932275b2dfe74 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+@@ -35,6 +35,14 @@ int dpu_rm_destroy(struct dpu_rm *rm)
+ {
+ int i;
+
++ for (i = 0; i < ARRAY_SIZE(rm->dspp_blks); i++) {
++ struct dpu_hw_dspp *hw;
++
++ if (rm->dspp_blks[i]) {
++ hw = to_dpu_hw_dspp(rm->dspp_blks[i]);
++ dpu_hw_dspp_destroy(hw);
++ }
++ }
+ for (i = 0; i < ARRAY_SIZE(rm->pingpong_blks); i++) {
+ struct dpu_hw_pingpong *hw;
+
+@@ -655,6 +663,11 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm,
+ blks_size, enc_id);
+ break;
+ }
++ if (!hw_blks[i]) {
++ DPU_ERROR("Allocated resource %d unavailable to assign to enc %d\n",
++ type, enc_id);
++ break;
++ }
+ blks[num_blks++] = hw_blks[i];
+ }
+
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
+index 21d20373eb8b3..a18fb649301c9 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
+@@ -11,6 +11,14 @@
+ #include "dpu_hw_vbif.h"
+ #include "dpu_trace.h"
+
++static struct dpu_hw_vbif *dpu_get_vbif(struct dpu_kms *dpu_kms, enum dpu_vbif vbif_idx)
++{
++ if (vbif_idx < ARRAY_SIZE(dpu_kms->hw_vbif))
++ return dpu_kms->hw_vbif[vbif_idx];
++
++ return NULL;
++}
++
+ /**
+ * _dpu_vbif_wait_for_xin_halt - wait for the xin to halt
+ * @vbif: Pointer to hardware vbif driver
+@@ -148,20 +156,15 @@ exit:
+ void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms,
+ struct dpu_vbif_set_ot_params *params)
+ {
+- struct dpu_hw_vbif *vbif = NULL;
++ struct dpu_hw_vbif *vbif;
+ struct dpu_hw_mdp *mdp;
+ bool forced_on = false;
+ u32 ot_lim;
+- int ret, i;
++ int ret;
+
+ mdp = dpu_kms->hw_mdp;
+
+- for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
+- if (dpu_kms->hw_vbif[i] &&
+- dpu_kms->hw_vbif[i]->idx == params->vbif_idx)
+- vbif = dpu_kms->hw_vbif[i];
+- }
+-
++ vbif = dpu_get_vbif(dpu_kms, params->vbif_idx);
+ if (!vbif || !mdp) {
+ DRM_DEBUG_ATOMIC("invalid arguments vbif %d mdp %d\n",
+ vbif != NULL, mdp != NULL);
+@@ -204,7 +207,7 @@ void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms,
+ void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms,
+ struct dpu_vbif_set_qos_params *params)
+ {
+- struct dpu_hw_vbif *vbif = NULL;
++ struct dpu_hw_vbif *vbif;
+ struct dpu_hw_mdp *mdp;
+ bool forced_on = false;
+ const struct dpu_vbif_qos_tbl *qos_tbl;
+@@ -216,13 +219,7 @@ void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms,
+ }
+ mdp = dpu_kms->hw_mdp;
+
+- for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
+- if (dpu_kms->hw_vbif[i] &&
+- dpu_kms->hw_vbif[i]->idx == params->vbif_idx) {
+- vbif = dpu_kms->hw_vbif[i];
+- break;
+- }
+- }
++ vbif = dpu_get_vbif(dpu_kms, params->vbif_idx);
+
+ if (!vbif || !vbif->cap) {
+ DPU_ERROR("invalid vbif %d\n", params->vbif_idx);
+diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+index cdcaf470f1480..97ae68182f3ed 100644
+--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
++++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+@@ -223,6 +223,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
+ encoder = mdp4_lcdc_encoder_init(dev, panel_node);
+ if (IS_ERR(encoder)) {
+ DRM_DEV_ERROR(dev->dev, "failed to construct LCDC encoder\n");
++ of_node_put(panel_node);
+ return PTR_ERR(encoder);
+ }
+
+@@ -232,6 +233,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
+ connector = mdp4_lvds_connector_init(dev, panel_node, encoder);
+ if (IS_ERR(connector)) {
+ DRM_DEV_ERROR(dev->dev, "failed to initialize LVDS connector\n");
++ of_node_put(panel_node);
+ return PTR_ERR(connector);
+ }
+
+diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
+index 7288041dd86ad..7444b75c42157 100644
+--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
++++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
+@@ -56,8 +56,9 @@ static int mdp4_lvds_connector_get_modes(struct drm_connector *connector)
+ return ret;
+ }
+
+-static int mdp4_lvds_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
++static enum drm_mode_status
++mdp4_lvds_connector_mode_valid(struct drm_connector *connector,
++ struct drm_display_mode *mode)
+ {
+ struct mdp4_lvds_connector *mdp4_lvds_connector =
+ to_mdp4_lvds_connector(connector);
+diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c
+index 49bdabea8ed59..3e20f72d75efd 100644
+--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c
++++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c
+@@ -7,6 +7,7 @@
+ #include <drm/drm_atomic.h>
+ #include <drm/drm_damage_helper.h>
+ #include <drm/drm_fourcc.h>
++#include <drm/drm_gem_atomic_helper.h>
+
+ #include "mdp4_kms.h"
+
+@@ -90,6 +91,20 @@ static const struct drm_plane_funcs mdp4_plane_funcs = {
+ .atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+ };
+
++static int mdp4_plane_prepare_fb(struct drm_plane *plane,
++ struct drm_plane_state *new_state)
++{
++ struct msm_drm_private *priv = plane->dev->dev_private;
++ struct msm_kms *kms = priv->kms;
++
++ if (!new_state->fb)
++ return 0;
++
++ drm_gem_plane_helper_prepare_fb(plane, new_state);
++
++ return msm_framebuffer_prepare(new_state->fb, kms->aspace, false);
++}
++
+ static void mdp4_plane_cleanup_fb(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+ {
+@@ -102,7 +117,7 @@ static void mdp4_plane_cleanup_fb(struct drm_plane *plane,
+ return;
+
+ DBG("%s: cleanup: FB[%u]", mdp4_plane->name, fb->base.id);
+- msm_framebuffer_cleanup(fb, kms->aspace);
++ msm_framebuffer_cleanup(fb, kms->aspace, false);
+ }
+
+
+@@ -130,7 +145,7 @@ static void mdp4_plane_atomic_update(struct drm_plane *plane,
+ }
+
+ static const struct drm_plane_helper_funcs mdp4_plane_helper_funcs = {
+- .prepare_fb = msm_atomic_prepare_fb,
++ .prepare_fb = mdp4_plane_prepare_fb,
+ .cleanup_fb = mdp4_plane_cleanup_fb,
+ .atomic_check = mdp4_plane_atomic_check,
+ .atomic_update = mdp4_plane_atomic_update,
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+index bb7d066618e64..2b15f10eeae02 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+@@ -612,9 +612,15 @@ static int mdp5_crtc_setup_pipeline(struct drm_crtc *crtc,
+ if (ret)
+ return ret;
+
+- mdp5_mixer_release(new_crtc_state->state, old_mixer);
++ ret = mdp5_mixer_release(new_crtc_state->state, old_mixer);
++ if (ret)
++ return ret;
++
+ if (old_r_mixer) {
+- mdp5_mixer_release(new_crtc_state->state, old_r_mixer);
++ ret = mdp5_mixer_release(new_crtc_state->state, old_r_mixer);
++ if (ret)
++ return ret;
++
+ if (!need_right_mixer)
+ pipeline->r_mixer = NULL;
+ }
+@@ -690,6 +696,8 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
+ {
+ struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state,
+ crtc);
++ struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc_state);
++ struct mdp5_interface *intf = mdp5_cstate->pipeline.intf;
+ struct mdp5_kms *mdp5_kms = get_kms(crtc);
+ struct drm_plane *plane;
+ struct drm_device *dev = crtc->dev;
+@@ -706,12 +714,18 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
+ DBG("%s: check", crtc->name);
+
+ drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
++ struct mdp5_plane_state *mdp5_pstate =
++ to_mdp5_plane_state(pstate);
++
+ if (!pstate->visible)
+ continue;
+
+ pstates[cnt].plane = plane;
+ pstates[cnt].state = to_mdp5_plane_state(pstate);
+
++ mdp5_pstate->needs_dirtyfb =
++ intf->mode == MDP5_INTF_DSI_MODE_COMMAND;
++
+ /*
+ * if any plane on this crtc uses 2 hwpipes, then we need
+ * the crtc to have a right hwmixer.
+@@ -983,8 +997,10 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
+
+ ret = msm_gem_get_and_pin_iova(cursor_bo, kms->aspace,
+ &mdp5_crtc->cursor.iova);
+- if (ret)
++ if (ret) {
++ drm_gem_object_put(cursor_bo);
+ return -EINVAL;
++ }
+
+ pm_runtime_get_sync(&pdev->dev);
+
+@@ -1122,7 +1138,10 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc)
+ if (crtc->state)
+ mdp5_crtc_destroy_state(crtc, crtc->state);
+
+- __drm_atomic_helper_crtc_reset(crtc, &mdp5_cstate->base);
++ if (mdp5_cstate)
++ __drm_atomic_helper_crtc_reset(crtc, &mdp5_cstate->base);
++ else
++ __drm_atomic_helper_crtc_reset(crtc, NULL);
+ }
+
+ static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+index b3b42672b2d47..a2b276ae96733 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+@@ -598,9 +598,9 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
+ }
+
+ irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+- if (irq < 0) {
+- ret = irq;
+- DRM_DEV_ERROR(&pdev->dev, "failed to get irq: %d\n", ret);
++ if (!irq) {
++ ret = -EINVAL;
++ DRM_DEV_ERROR(&pdev->dev, "failed to get irq\n");
+ goto fail;
+ }
+
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
+index ac269a6802df2..29bf11f086011 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
+@@ -100,6 +100,11 @@ struct mdp5_plane_state {
+
+ /* assigned by crtc blender */
+ enum mdp_mixer_stage_id stage;
++
++ /* whether attached CRTC needs pixel data explicitly flushed to
++ * display (ex. DSI command mode display)
++ */
++ bool needs_dirtyfb;
+ };
+ #define to_mdp5_plane_state(x) \
+ container_of(x, struct mdp5_plane_state, base)
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
+index 954db683ae444..2536def2a0005 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c
+@@ -116,21 +116,28 @@ int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc,
+ return 0;
+ }
+
+-void mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer)
++int mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer)
+ {
+ struct mdp5_global_state *global_state = mdp5_get_global_state(s);
+- struct mdp5_hw_mixer_state *new_state = &global_state->hwmixer;
++ struct mdp5_hw_mixer_state *new_state;
+
+ if (!mixer)
+- return;
++ return 0;
++
++ if (IS_ERR(global_state))
++ return PTR_ERR(global_state);
++
++ new_state = &global_state->hwmixer;
+
+ if (WARN_ON(!new_state->hwmixer_to_crtc[mixer->idx]))
+- return;
++ return -EINVAL;
+
+ DBG("%s: release from crtc %s", mixer->name,
+ new_state->hwmixer_to_crtc[mixer->idx]->name);
+
+ new_state->hwmixer_to_crtc[mixer->idx] = NULL;
++
++ return 0;
+ }
+
+ void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer)
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
+index 43c9ba43ce185..545ee223b9d74 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h
+@@ -30,7 +30,7 @@ void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm);
+ int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc,
+ uint32_t caps, struct mdp5_hw_mixer **mixer,
+ struct mdp5_hw_mixer **r_mixer);
+-void mdp5_mixer_release(struct drm_atomic_state *s,
+- struct mdp5_hw_mixer *mixer);
++int mdp5_mixer_release(struct drm_atomic_state *s,
++ struct mdp5_hw_mixer *mixer);
+
+ #endif /* __MDP5_LM_H__ */
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
+index ba6695963aa66..e4b8a789835a4 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c
+@@ -119,18 +119,24 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane,
+ return 0;
+ }
+
+-void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe)
++int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe)
+ {
+ struct msm_drm_private *priv = s->dev->dev_private;
+ struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms));
+- struct mdp5_global_state *state = mdp5_get_global_state(s);
+- struct mdp5_hw_pipe_state *new_state = &state->hwpipe;
++ struct mdp5_global_state *state;
++ struct mdp5_hw_pipe_state *new_state;
+
+ if (!hwpipe)
+- return;
++ return 0;
++
++ state = mdp5_get_global_state(s);
++ if (IS_ERR(state))
++ return PTR_ERR(state);
++
++ new_state = &state->hwpipe;
+
+ if (WARN_ON(!new_state->hwpipe_to_plane[hwpipe->idx]))
+- return;
++ return -EINVAL;
+
+ DBG("%s: release from plane %s", hwpipe->name,
+ new_state->hwpipe_to_plane[hwpipe->idx]->name);
+@@ -141,6 +147,8 @@ void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe)
+ }
+
+ new_state->hwpipe_to_plane[hwpipe->idx] = NULL;
++
++ return 0;
+ }
+
+ void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe)
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
+index 9b26d0761bd4f..cca67938cab21 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h
+@@ -37,7 +37,7 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane,
+ uint32_t caps, uint32_t blkcfg,
+ struct mdp5_hw_pipe **hwpipe,
+ struct mdp5_hw_pipe **r_hwpipe);
+-void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe);
++int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe);
+
+ struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe,
+ uint32_t reg_offset, uint32_t caps);
+diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
+index c6b69afcbac89..f9cae6460c3be 100644
+--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
++++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
+@@ -8,6 +8,7 @@
+ #include <drm/drm_atomic.h>
+ #include <drm/drm_damage_helper.h>
+ #include <drm/drm_fourcc.h>
++#include <drm/drm_gem_atomic_helper.h>
+ #include <drm/drm_print.h>
+
+ #include "mdp5_kms.h"
+@@ -90,7 +91,10 @@ static void mdp5_plane_reset(struct drm_plane *plane)
+ __drm_atomic_helper_plane_destroy_state(plane->state);
+
+ kfree(to_mdp5_plane_state(plane->state));
++ plane->state = NULL;
+ mdp5_state = kzalloc(sizeof(*mdp5_state), GFP_KERNEL);
++ if (!mdp5_state)
++ return;
+
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY)
+ mdp5_state->base.zpos = STAGE_BASE;
+@@ -124,8 +128,7 @@ static void mdp5_plane_destroy_state(struct drm_plane *plane,
+ {
+ struct mdp5_plane_state *pstate = to_mdp5_plane_state(state);
+
+- if (state->fb)
+- drm_framebuffer_put(state->fb);
++ __drm_atomic_helper_plane_destroy_state(state);
+
+ kfree(pstate);
+ }
+@@ -140,18 +143,34 @@ static const struct drm_plane_funcs mdp5_plane_funcs = {
+ .atomic_print_state = mdp5_plane_atomic_print_state,
+ };
+
++static int mdp5_plane_prepare_fb(struct drm_plane *plane,
++ struct drm_plane_state *new_state)
++{
++ struct msm_drm_private *priv = plane->dev->dev_private;
++ struct msm_kms *kms = priv->kms;
++ bool needs_dirtyfb = to_mdp5_plane_state(new_state)->needs_dirtyfb;
++
++ if (!new_state->fb)
++ return 0;
++
++ drm_gem_plane_helper_prepare_fb(plane, new_state);
++
++ return msm_framebuffer_prepare(new_state->fb, kms->aspace, needs_dirtyfb);
++}
++
+ static void mdp5_plane_cleanup_fb(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+ {
+ struct mdp5_kms *mdp5_kms = get_kms(plane);
+ struct msm_kms *kms = &mdp5_kms->base.base;
+ struct drm_framebuffer *fb = old_state->fb;
++ bool needed_dirtyfb = to_mdp5_plane_state(old_state)->needs_dirtyfb;
+
+ if (!fb)
+ return;
+
+ DBG("%s: cleanup: FB[%u]", plane->name, fb->base.id);
+- msm_framebuffer_cleanup(fb, kms->aspace);
++ msm_framebuffer_cleanup(fb, kms->aspace, needed_dirtyfb);
+ }
+
+ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
+@@ -294,12 +313,24 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
+ mdp5_state->r_hwpipe = NULL;
+
+
+- mdp5_pipe_release(state->state, old_hwpipe);
+- mdp5_pipe_release(state->state, old_right_hwpipe);
++ ret = mdp5_pipe_release(state->state, old_hwpipe);
++ if (ret)
++ return ret;
++
++ ret = mdp5_pipe_release(state->state, old_right_hwpipe);
++ if (ret)
++ return ret;
++
+ }
+ } else {
+- mdp5_pipe_release(state->state, mdp5_state->hwpipe);
+- mdp5_pipe_release(state->state, mdp5_state->r_hwpipe);
++ ret = mdp5_pipe_release(state->state, mdp5_state->hwpipe);
++ if (ret)
++ return ret;
++
++ ret = mdp5_pipe_release(state->state, mdp5_state->r_hwpipe);
++ if (ret)
++ return ret;
++
+ mdp5_state->hwpipe = mdp5_state->r_hwpipe = NULL;
+ }
+
+@@ -437,7 +468,7 @@ static void mdp5_plane_atomic_async_update(struct drm_plane *plane,
+ }
+
+ static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = {
+- .prepare_fb = msm_atomic_prepare_fb,
++ .prepare_fb = mdp5_plane_prepare_fb,
+ .cleanup_fb = mdp5_plane_cleanup_fb,
+ .atomic_check = mdp5_plane_atomic_check,
+ .atomic_update = mdp5_plane_atomic_update,
+diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
+index cabe15190ec18..8746ceae8fca9 100644
+--- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
++++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
+@@ -169,6 +169,8 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len,
+ va_list va;
+
+ new_blk = kzalloc(sizeof(struct msm_disp_state_block), GFP_KERNEL);
++ if (!new_blk)
++ return;
+
+ va_start(va, fmt);
+
+@@ -183,5 +185,5 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len,
+ new_blk->base_addr = base_addr;
+
+ msm_disp_state_dump_regs(&new_blk->state, new_blk->size, base_addr);
+- list_add(&new_blk->node, &disp_state->blocks);
++ list_add_tail(&new_blk->node, &disp_state->blocks);
+ }
+diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c
+index d7e4a39a904e2..0eaaaa94563a3 100644
+--- a/drivers/gpu/drm/msm/dp/dp_audio.c
++++ b/drivers/gpu/drm/msm/dp/dp_audio.c
+@@ -577,6 +577,18 @@ static struct hdmi_codec_pdata codec_data = {
+ .i2s = 1,
+ };
+
++void dp_unregister_audio_driver(struct device *dev, struct dp_audio *dp_audio)
++{
++ struct dp_audio_private *audio_priv;
++
++ audio_priv = container_of(dp_audio, struct dp_audio_private, dp_audio);
++
++ if (audio_priv->audio_pdev) {
++ platform_device_unregister(audio_priv->audio_pdev);
++ audio_priv->audio_pdev = NULL;
++ }
++}
++
+ int dp_register_audio_driver(struct device *dev,
+ struct dp_audio *dp_audio)
+ {
+diff --git a/drivers/gpu/drm/msm/dp/dp_audio.h b/drivers/gpu/drm/msm/dp/dp_audio.h
+index 84e5f4a5d26ba..4ab78880af829 100644
+--- a/drivers/gpu/drm/msm/dp/dp_audio.h
++++ b/drivers/gpu/drm/msm/dp/dp_audio.h
+@@ -53,6 +53,8 @@ struct dp_audio *dp_audio_get(struct platform_device *pdev,
+ int dp_register_audio_driver(struct device *dev,
+ struct dp_audio *dp_audio);
+
++void dp_unregister_audio_driver(struct device *dev, struct dp_audio *dp_audio);
++
+ /**
+ * dp_audio_put()
+ *
+diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
+index eb40d8413bca9..4742aca2af482 100644
+--- a/drivers/gpu/drm/msm/dp/dp_aux.c
++++ b/drivers/gpu/drm/msm/dp/dp_aux.c
+@@ -33,6 +33,7 @@ struct dp_aux_private {
+ bool read;
+ bool no_send_addr;
+ bool no_send_stop;
++ bool initted;
+ u32 offset;
+ u32 segment;
+
+@@ -160,47 +161,6 @@ static ssize_t dp_aux_cmd_fifo_rx(struct dp_aux_private *aux,
+ return i;
+ }
+
+-static void dp_aux_native_handler(struct dp_aux_private *aux, u32 isr)
+-{
+- if (isr & DP_INTR_AUX_I2C_DONE)
+- aux->aux_error_num = DP_AUX_ERR_NONE;
+- else if (isr & DP_INTR_WRONG_ADDR)
+- aux->aux_error_num = DP_AUX_ERR_ADDR;
+- else if (isr & DP_INTR_TIMEOUT)
+- aux->aux_error_num = DP_AUX_ERR_TOUT;
+- if (isr & DP_INTR_NACK_DEFER)
+- aux->aux_error_num = DP_AUX_ERR_NACK;
+- if (isr & DP_INTR_AUX_ERROR) {
+- aux->aux_error_num = DP_AUX_ERR_PHY;
+- dp_catalog_aux_clear_hw_interrupts(aux->catalog);
+- }
+-}
+-
+-static void dp_aux_i2c_handler(struct dp_aux_private *aux, u32 isr)
+-{
+- if (isr & DP_INTR_AUX_I2C_DONE) {
+- if (isr & (DP_INTR_I2C_NACK | DP_INTR_I2C_DEFER))
+- aux->aux_error_num = DP_AUX_ERR_NACK;
+- else
+- aux->aux_error_num = DP_AUX_ERR_NONE;
+- } else {
+- if (isr & DP_INTR_WRONG_ADDR)
+- aux->aux_error_num = DP_AUX_ERR_ADDR;
+- else if (isr & DP_INTR_TIMEOUT)
+- aux->aux_error_num = DP_AUX_ERR_TOUT;
+- if (isr & DP_INTR_NACK_DEFER)
+- aux->aux_error_num = DP_AUX_ERR_NACK_DEFER;
+- if (isr & DP_INTR_I2C_NACK)
+- aux->aux_error_num = DP_AUX_ERR_NACK;
+- if (isr & DP_INTR_I2C_DEFER)
+- aux->aux_error_num = DP_AUX_ERR_DEFER;
+- if (isr & DP_INTR_AUX_ERROR) {
+- aux->aux_error_num = DP_AUX_ERR_PHY;
+- dp_catalog_aux_clear_hw_interrupts(aux->catalog);
+- }
+- }
+-}
+-
+ static void dp_aux_update_offset_and_segment(struct dp_aux_private *aux,
+ struct drm_dp_aux_msg *input_msg)
+ {
+@@ -331,6 +291,10 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
+ }
+
+ mutex_lock(&aux->mutex);
++ if (!aux->initted) {
++ ret = -EIO;
++ goto exit;
++ }
+
+ dp_aux_update_offset_and_segment(aux, msg);
+ dp_aux_transfer_helper(aux, msg, true);
+@@ -380,6 +344,8 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
+ }
+
+ aux->cmd_busy = false;
++
++exit:
+ mutex_unlock(&aux->mutex);
+
+ return ret;
+@@ -399,13 +365,46 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux)
+
+ isr = dp_catalog_aux_get_irq(aux->catalog);
+
+- if (!aux->cmd_busy)
++ /* no interrupts pending, return immediately */
++ if (!isr)
+ return;
+
+- if (aux->native)
+- dp_aux_native_handler(aux, isr);
+- else
+- dp_aux_i2c_handler(aux, isr);
++ if (!aux->cmd_busy) {
++ DRM_ERROR("Unexpected DP AUX IRQ %#010x when not busy\n", isr);
++ return;
++ }
++
++ /*
++ * The logic below assumes only one error bit is set (other than "done"
++ * which can apparently be set at the same time as some of the other
++ * bits). Warn if more than one get set so we know we need to improve
++ * the logic.
++ */
++ if (hweight32(isr & ~DP_INTR_AUX_XFER_DONE) > 1)
++ DRM_WARN("Some DP AUX interrupts unhandled: %#010x\n", isr);
++
++ if (isr & DP_INTR_AUX_ERROR) {
++ aux->aux_error_num = DP_AUX_ERR_PHY;
++ dp_catalog_aux_clear_hw_interrupts(aux->catalog);
++ } else if (isr & DP_INTR_NACK_DEFER) {
++ aux->aux_error_num = DP_AUX_ERR_NACK_DEFER;
++ } else if (isr & DP_INTR_WRONG_ADDR) {
++ aux->aux_error_num = DP_AUX_ERR_ADDR;
++ } else if (isr & DP_INTR_TIMEOUT) {
++ aux->aux_error_num = DP_AUX_ERR_TOUT;
++ } else if (!aux->native && (isr & DP_INTR_I2C_NACK)) {
++ aux->aux_error_num = DP_AUX_ERR_NACK;
++ } else if (!aux->native && (isr & DP_INTR_I2C_DEFER)) {
++ if (isr & DP_INTR_AUX_XFER_DONE)
++ aux->aux_error_num = DP_AUX_ERR_NACK;
++ else
++ aux->aux_error_num = DP_AUX_ERR_DEFER;
++ } else if (isr & DP_INTR_AUX_XFER_DONE) {
++ aux->aux_error_num = DP_AUX_ERR_NONE;
++ } else {
++ DRM_WARN("Unexpected interrupt: %#010x\n", isr);
++ return;
++ }
+
+ complete(&aux->comp);
+ }
+@@ -431,8 +430,13 @@ void dp_aux_init(struct drm_dp_aux *dp_aux)
+
+ aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
+
++ mutex_lock(&aux->mutex);
++
+ dp_catalog_aux_enable(aux->catalog, true);
+ aux->retry_cnt = 0;
++ aux->initted = true;
++
++ mutex_unlock(&aux->mutex);
+ }
+
+ void dp_aux_deinit(struct drm_dp_aux *dp_aux)
+@@ -441,7 +445,12 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux)
+
+ aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
+
++ mutex_lock(&aux->mutex);
++
++ aux->initted = false;
+ dp_catalog_aux_enable(aux->catalog, false);
++
++ mutex_unlock(&aux->mutex);
+ }
+
+ int dp_aux_register(struct drm_dp_aux *dp_aux)
+diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c
+index cc2bb82953296..8df5dfd6ad17f 100644
+--- a/drivers/gpu/drm/msm/dp/dp_catalog.c
++++ b/drivers/gpu/drm/msm/dp/dp_catalog.c
+@@ -34,7 +34,7 @@
+ #define MSM_DP_CONTROLLER_P0_SIZE 0x0400
+
+ #define DP_INTERRUPT_STATUS1 \
+- (DP_INTR_AUX_I2C_DONE| \
++ (DP_INTR_AUX_XFER_DONE| \
+ DP_INTR_WRONG_ADDR | DP_INTR_TIMEOUT | \
+ DP_INTR_NACK_DEFER | DP_INTR_WRONG_DATA_CNT | \
+ DP_INTR_I2C_NACK | DP_INTR_I2C_DEFER | \
+@@ -437,7 +437,7 @@ void dp_catalog_ctrl_config_msa(struct dp_catalog *dp_catalog,
+
+ if (rate == link_rate_hbr3)
+ pixel_div = 6;
+- else if (rate == 1620000 || rate == 270000)
++ else if (rate == 162000 || rate == 270000)
+ pixel_div = 2;
+ else if (rate == link_rate_hbr2)
+ pixel_div = 4;
+diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h
+index 6965afa81aad2..32d3e14c98f7f 100644
+--- a/drivers/gpu/drm/msm/dp/dp_catalog.h
++++ b/drivers/gpu/drm/msm/dp/dp_catalog.h
+@@ -13,7 +13,7 @@
+
+ /* interrupts */
+ #define DP_INTR_HPD BIT(0)
+-#define DP_INTR_AUX_I2C_DONE BIT(3)
++#define DP_INTR_AUX_XFER_DONE BIT(3)
+ #define DP_INTR_WRONG_ADDR BIT(6)
+ #define DP_INTR_TIMEOUT BIT(9)
+ #define DP_INTR_NACK_DEFER BIT(12)
+diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
+index 62e75dc8afc63..6d9eec98e0d38 100644
+--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
++++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
+@@ -1198,7 +1198,7 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl,
+ if (ret)
+ return ret;
+
+- dp_ctrl_train_pattern_set(ctrl, pattern | DP_RECOVERED_CLOCK_OUT_EN);
++ dp_ctrl_train_pattern_set(ctrl, pattern);
+
+ for (tries = 0; tries <= maximum_retries; tries++) {
+ drm_dp_link_train_channel_eq_delay(ctrl->aux, ctrl->panel->dpcd);
+@@ -1348,60 +1348,49 @@ static int dp_ctrl_enable_stream_clocks(struct dp_ctrl_private *ctrl)
+ return ret;
+ }
+
+-int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset)
++void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable)
++{
++ struct dp_ctrl_private *ctrl;
++
++ ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl);
++
++ dp_catalog_ctrl_reset(ctrl->catalog);
++
++ /*
++ * all dp controller programmable registers will not
++ * be reset to default value after DP_SW_RESET
++ * therefore interrupt mask bits have to be updated
++ * to enable/disable interrupts
++ */
++ dp_catalog_ctrl_enable_irq(ctrl->catalog, enable);
++}
++
++void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl)
+ {
+ struct dp_ctrl_private *ctrl;
+ struct dp_io *dp_io;
+ struct phy *phy;
+
+- if (!dp_ctrl) {
+- DRM_ERROR("Invalid input data\n");
+- return -EINVAL;
+- }
+-
+ ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl);
+ dp_io = &ctrl->parser->io;
+ phy = dp_io->phy;
+
+- ctrl->dp_ctrl.orientation = flip;
+-
+- if (reset)
+- dp_catalog_ctrl_reset(ctrl->catalog);
+-
+- DRM_DEBUG_DP("flip=%d\n", flip);
+ dp_catalog_ctrl_phy_reset(ctrl->catalog);
+ phy_init(phy);
+- dp_catalog_ctrl_enable_irq(ctrl->catalog, true);
+-
+- return 0;
+ }
+
+-/**
+- * dp_ctrl_host_deinit() - Uninitialize DP controller
+- * @dp_ctrl: Display Port Driver data
+- *
+- * Perform required steps to uninitialize DP controller
+- * and its resources.
+- */
+-void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl)
++void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl)
+ {
+ struct dp_ctrl_private *ctrl;
+ struct dp_io *dp_io;
+ struct phy *phy;
+
+- if (!dp_ctrl) {
+- DRM_ERROR("Invalid input data\n");
+- return;
+- }
+-
+ ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl);
+ dp_io = &ctrl->parser->io;
+ phy = dp_io->phy;
+
+- dp_catalog_ctrl_enable_irq(ctrl->catalog, false);
++ dp_catalog_ctrl_phy_reset(ctrl->catalog);
+ phy_exit(phy);
+-
+- DRM_DEBUG_DP("Host deinitialized successfully\n");
+ }
+
+ static bool dp_ctrl_use_fixed_nvid(struct dp_ctrl_private *ctrl)
+@@ -1471,7 +1460,10 @@ static int dp_ctrl_deinitialize_mainlink(struct dp_ctrl_private *ctrl)
+ }
+
+ phy_power_off(phy);
++
++ /* aux channel down, reinit phy */
+ phy_exit(phy);
++ phy_init(phy);
+
+ return 0;
+ }
+@@ -1501,6 +1493,8 @@ end:
+ return ret;
+ }
+
++static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl);
++
+ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl)
+ {
+ int ret = 0;
+@@ -1515,7 +1509,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl)
+ * running. Add the global reset just before disabling the
+ * link clocks and core clocks.
+ */
+- ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl);
++ ret = dp_ctrl_off(&ctrl->dp_ctrl);
+ if (ret) {
+ DRM_ERROR("failed to disable DP controller\n");
+ return ret;
+@@ -1523,7 +1517,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl)
+
+ ret = dp_ctrl_on_link(&ctrl->dp_ctrl);
+ if (!ret)
+- ret = dp_ctrl_on_stream(&ctrl->dp_ctrl);
++ ret = dp_ctrl_on_stream_phy_test_report(&ctrl->dp_ctrl);
+ else
+ DRM_ERROR("failed to enable DP link controller\n");
+
+@@ -1682,8 +1676,6 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
+ ctrl->link->link_params.rate,
+ ctrl->link->link_params.num_lanes, ctrl->dp_ctrl.pixel_rate);
+
+- ctrl->link->phy_params.p_level = 0;
+- ctrl->link->phy_params.v_level = 0;
+
+ rc = dp_ctrl_enable_mainlink_clocks(ctrl);
+ if (rc)
+@@ -1744,6 +1736,9 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
+ /* end with failure */
+ break; /* lane == 1 already */
+ }
++
++ /* stop link training before start re training */
++ dp_ctrl_clear_training_pattern(ctrl);
+ }
+ }
+
+@@ -1777,7 +1772,27 @@ static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl)
+ return dp_ctrl_setup_main_link(ctrl, &training_step);
+ }
+
+-int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
++static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl)
++{
++ int ret;
++ struct dp_ctrl_private *ctrl;
++
++ ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl);
++
++ ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock;
++
++ ret = dp_ctrl_enable_stream_clocks(ctrl);
++ if (ret) {
++ DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret);
++ return ret;
++ }
++
++ dp_ctrl_send_phy_test_pattern(ctrl);
++
++ return 0;
++}
++
++int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train)
+ {
+ int ret = 0;
+ bool mainlink_ready = false;
+@@ -1802,22 +1817,17 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
+ }
+ }
+
+- if (!dp_ctrl_channel_eq_ok(ctrl))
+- dp_ctrl_link_retrain(ctrl);
+-
+- /* stop txing train pattern to end link training */
+- dp_ctrl_clear_training_pattern(ctrl);
+-
+ ret = dp_ctrl_enable_stream_clocks(ctrl);
+ if (ret) {
+ DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret);
+ goto end;
+ }
+
+- if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) {
+- dp_ctrl_send_phy_test_pattern(ctrl);
+- return 0;
+- }
++ if (force_link_train || !dp_ctrl_channel_eq_ok(ctrl))
++ dp_ctrl_link_retrain(ctrl);
++
++ /* stop txing train pattern to end link training */
++ dp_ctrl_clear_training_pattern(ctrl);
+
+ /*
+ * Set up transfer unit values and set controller state to send
+@@ -1876,8 +1886,14 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl)
+ return ret;
+ }
+
++ DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n",
++ (u32)(uintptr_t)phy, phy->init_count, phy->power_count);
++
+ phy_power_off(phy);
+
++ DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n",
++ (u32)(uintptr_t)phy, phy->init_count, phy->power_count);
++
+ /* aux channel down, reinit phy */
+ phy_exit(phy);
+ phy_init(phy);
+@@ -1886,23 +1902,6 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl)
+ return ret;
+ }
+
+-void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl)
+-{
+- struct dp_ctrl_private *ctrl;
+- struct dp_io *dp_io;
+- struct phy *phy;
+-
+- ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl);
+- dp_io = &ctrl->parser->io;
+- phy = dp_io->phy;
+-
+- dp_catalog_ctrl_reset(ctrl->catalog);
+-
+- phy_exit(phy);
+-
+- DRM_DEBUG_DP("DP off phy done\n");
+-}
+-
+ int dp_ctrl_off(struct dp_ctrl *dp_ctrl)
+ {
+ struct dp_ctrl_private *ctrl;
+@@ -1930,10 +1929,14 @@ int dp_ctrl_off(struct dp_ctrl *dp_ctrl)
+ DRM_ERROR("Failed to disable link clocks. ret=%d\n", ret);
+ }
+
++ DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n",
++ (u32)(uintptr_t)phy, phy->init_count, phy->power_count);
++
+ phy_power_off(phy);
+- phy_exit(phy);
+
+- DRM_DEBUG_DP("DP off done\n");
++ DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n",
++ (u32)(uintptr_t)phy, phy->init_count, phy->power_count);
++
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h
+index 2363a2df9597b..dcc7af21a5f05 100644
+--- a/drivers/gpu/drm/msm/dp/dp_ctrl.h
++++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h
+@@ -19,12 +19,9 @@ struct dp_ctrl {
+ u32 pixel_rate;
+ };
+
+-int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset);
+-void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl);
+ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl);
+-int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl);
++int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train);
+ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl);
+-void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl);
+ int dp_ctrl_off(struct dp_ctrl *dp_ctrl);
+ void dp_ctrl_push_idle(struct dp_ctrl *dp_ctrl);
+ void dp_ctrl_isr(struct dp_ctrl *dp_ctrl);
+@@ -34,4 +31,9 @@ struct dp_ctrl *dp_ctrl_get(struct device *dev, struct dp_link *link,
+ struct dp_power *power, struct dp_catalog *catalog,
+ struct dp_parser *parser);
+
++void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable);
++void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl);
++void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl);
++void dp_ctrl_irq_phy_exit(struct dp_ctrl *dp_ctrl);
++
+ #endif /* _DP_CTRL_H_ */
+diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
+index a0392e4d8134c..bbe350fab797c 100644
+--- a/drivers/gpu/drm/msm/dp/dp_display.c
++++ b/drivers/gpu/drm/msm/dp/dp_display.c
+@@ -81,6 +81,7 @@ struct dp_display_private {
+
+ /* state variables */
+ bool core_initialized;
++ bool phy_initialized;
+ bool hpd_irq_on;
+ bool audio_supported;
+
+@@ -110,6 +111,7 @@ struct dp_display_private {
+ u32 hpd_state;
+ u32 event_pndx;
+ u32 event_gndx;
++ struct task_struct *ev_tsk;
+ struct dp_event event_list[DP_EVENT_Q_MAX];
+ spinlock_t event_lock;
+
+@@ -193,6 +195,8 @@ void dp_display_signal_audio_complete(struct msm_dp *dp_display)
+ complete_all(&dp->audio_comp);
+ }
+
++static int dp_hpd_event_thread_start(struct dp_display_private *dp_priv);
++
+ static int dp_display_bind(struct device *dev, struct device *master,
+ void *data)
+ {
+@@ -230,9 +234,18 @@ static int dp_display_bind(struct device *dev, struct device *master,
+ }
+
+ rc = dp_register_audio_driver(dev, dp->audio);
+- if (rc)
++ if (rc) {
+ DRM_ERROR("Audio registration Dp failed\n");
++ goto end;
++ }
++
++ rc = dp_hpd_event_thread_start(dp);
++ if (rc) {
++ DRM_ERROR("Event thread create failed\n");
++ goto end;
++ }
+
++ return 0;
+ end:
+ return rc;
+ }
+@@ -247,7 +260,14 @@ static void dp_display_unbind(struct device *dev, struct device *master,
+ dp = container_of(g_dp_display,
+ struct dp_display_private, dp_display);
+
++ /* disable all HPD interrupts */
++ if (dp->core_initialized)
++ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false);
++
++ kthread_stop(dp->ev_tsk);
++
+ dp_power_client_deinit(dp->power);
++ dp_unregister_audio_driver(dev, dp->audio);
+ dp_aux_unregister(dp->aux);
+ priv->dp = NULL;
+ }
+@@ -344,36 +364,45 @@ end:
+ return rc;
+ }
+
+-static void dp_display_host_init(struct dp_display_private *dp, int reset)
++static void dp_display_host_phy_init(struct dp_display_private *dp)
+ {
+- bool flip = false;
++ DRM_DEBUG_DP("core_init=%d phy_init=%d\n",
++ dp->core_initialized, dp->phy_initialized);
+
+- DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized);
+- if (dp->core_initialized) {
+- DRM_DEBUG_DP("DP core already initialized\n");
+- return;
++ if (!dp->phy_initialized) {
++ dp_ctrl_phy_init(dp->ctrl);
++ dp->phy_initialized = true;
+ }
++}
+
+- if (dp->usbpd->orientation == ORIENTATION_CC2)
+- flip = true;
++static void dp_display_host_phy_exit(struct dp_display_private *dp)
++{
++ DRM_DEBUG_DP("core_init=%d phy_init=%d\n",
++ dp->core_initialized, dp->phy_initialized);
+
+- dp_power_init(dp->power, flip);
+- dp_ctrl_host_init(dp->ctrl, flip, reset);
++ if (dp->phy_initialized) {
++ dp_ctrl_phy_exit(dp->ctrl);
++ dp->phy_initialized = false;
++ }
++}
++
++static void dp_display_host_init(struct dp_display_private *dp)
++{
++ DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized);
++
++ dp_power_init(dp->power, false);
++ dp_ctrl_reset_irq_ctrl(dp->ctrl, true);
+ dp_aux_init(dp->aux);
+ dp->core_initialized = true;
+ }
+
+ static void dp_display_host_deinit(struct dp_display_private *dp)
+ {
+- if (!dp->core_initialized) {
+- DRM_DEBUG_DP("DP core not initialized\n");
+- return;
+- }
++ DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized);
+
+- dp_ctrl_host_deinit(dp->ctrl);
++ dp_ctrl_reset_irq_ctrl(dp->ctrl, false);
+ dp_aux_deinit(dp->aux);
+ dp_power_deinit(dp->power);
+-
+ dp->core_initialized = false;
+ }
+
+@@ -391,7 +420,7 @@ static int dp_display_usbpd_configure_cb(struct device *dev)
+ dp = container_of(g_dp_display,
+ struct dp_display_private, dp_display);
+
+- dp_display_host_init(dp, false);
++ dp_display_host_phy_init(dp);
+
+ rc = dp_display_process_hpd_high(dp);
+ end:
+@@ -529,17 +558,9 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
+
+ dp->hpd_state = ST_CONNECT_PENDING;
+
+- hpd->hpd_high = 1;
+-
+ ret = dp_display_usbpd_configure_cb(&dp->pdev->dev);
+ if (ret) { /* link train failed */
+- hpd->hpd_high = 0;
+ dp->hpd_state = ST_DISCONNECTED;
+-
+- if (ret == -ECONNRESET) { /* cable unplugged */
+- dp->core_initialized = false;
+- }
+-
+ } else {
+ /* start sentinel checking in case of missing uevent */
+ dp_add_event(dp, EV_CONNECT_PENDING_TIMEOUT, 0, tout);
+@@ -609,9 +630,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
+ if (state == ST_DISCONNECTED) {
+ /* triggered by irq_hdp with sink_count = 0 */
+ if (dp->link->sink_count == 0) {
+- dp_ctrl_off_phy(dp->ctrl);
+- hpd->hpd_high = 0;
+- dp->core_initialized = false;
++ dp_display_host_phy_exit(dp);
+ }
+ mutex_unlock(&dp->event_mutex);
+ return 0;
+@@ -634,8 +653,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
+ /* disable HPD plug interrupts */
+ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK, false);
+
+- hpd->hpd_high = 0;
+-
+ /*
+ * We don't need separate work for disconnect as
+ * connect/attention interrupts are disabled
+@@ -675,7 +692,6 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data
+ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
+ {
+ u32 state;
+- int ret;
+
+ mutex_lock(&dp->event_mutex);
+
+@@ -700,10 +716,8 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
+ return 0;
+ }
+
+- ret = dp_display_usbpd_attention_cb(&dp->pdev->dev);
+- if (ret == -ECONNRESET) { /* cable unplugged */
+- dp->core_initialized = false;
+- }
++ dp_display_usbpd_attention_cb(&dp->pdev->dev);
++
+ DRM_DEBUG_DP("hpd_state=%d\n", state);
+
+ mutex_unlock(&dp->event_mutex);
+@@ -827,7 +841,7 @@ static int dp_display_set_mode(struct msm_dp *dp_display,
+
+ dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+- dp->panel->dp_mode.drm_mode = mode->drm_mode;
++ drm_mode_copy(&dp->panel->dp_mode.drm_mode, &mode->drm_mode);
+ dp->panel->dp_mode.bpp = mode->bpp;
+ dp->panel->dp_mode.capabilities = mode->capabilities;
+ dp_panel_init_panel_info(dp->panel);
+@@ -852,7 +866,7 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data)
+ return 0;
+ }
+
+- rc = dp_ctrl_on_stream(dp->ctrl);
++ rc = dp_ctrl_on_stream(dp->ctrl, data);
+ if (!rc)
+ dp_display->power_on = true;
+
+@@ -898,12 +912,19 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data)
+
+ dp_display->audio_enabled = false;
+
+- /* triggered by irq_hpd with sink_count = 0 */
+ if (dp->link->sink_count == 0) {
++ /*
++ * irq_hpd with sink_count = 0
++ * hdmi unplugged out of dongle
++ */
+ dp_ctrl_off_link_stream(dp->ctrl);
+ } else {
++ /*
++ * unplugged interrupt
++ * dongle unplugged out of DUT
++ */
+ dp_ctrl_off(dp->ctrl);
+- dp->core_initialized = false;
++ dp_display_host_phy_exit(dp);
+ }
+
+ dp_display->power_on = false;
+@@ -1033,7 +1054,7 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp)
+ static void dp_display_config_hpd(struct dp_display_private *dp)
+ {
+
+- dp_display_host_init(dp, true);
++ dp_display_host_init(dp);
+ dp_catalog_ctrl_hpd_config(dp->catalog);
+
+ /* Enable interrupt first time
+@@ -1055,12 +1076,17 @@ static int hpd_event_thread(void *data)
+ while (1) {
+ if (timeout_mode) {
+ wait_event_timeout(dp_priv->event_q,
+- (dp_priv->event_pndx == dp_priv->event_gndx),
+- EVENT_TIMEOUT);
++ (dp_priv->event_pndx == dp_priv->event_gndx) ||
++ kthread_should_stop(), EVENT_TIMEOUT);
+ } else {
+ wait_event_interruptible(dp_priv->event_q,
+- (dp_priv->event_pndx != dp_priv->event_gndx));
++ (dp_priv->event_pndx != dp_priv->event_gndx) ||
++ kthread_should_stop());
+ }
++
++ if (kthread_should_stop())
++ break;
++
+ spin_lock_irqsave(&dp_priv->event_lock, flag);
+ todo = &dp_priv->event_list[dp_priv->event_gndx];
+ if (todo->delay) {
+@@ -1130,12 +1156,17 @@ static int hpd_event_thread(void *data)
+ return 0;
+ }
+
+-static void dp_hpd_event_setup(struct dp_display_private *dp_priv)
++static int dp_hpd_event_thread_start(struct dp_display_private *dp_priv)
+ {
+- init_waitqueue_head(&dp_priv->event_q);
+- spin_lock_init(&dp_priv->event_lock);
++ /* set event q to empty */
++ dp_priv->event_gndx = 0;
++ dp_priv->event_pndx = 0;
+
+- kthread_run(hpd_event_thread, dp_priv, "dp_hpd_handler");
++ dp_priv->ev_tsk = kthread_run(hpd_event_thread, dp_priv, "dp_hpd_handler");
++ if (IS_ERR(dp_priv->ev_tsk))
++ return PTR_ERR(dp_priv->ev_tsk);
++
++ return 0;
+ }
+
+ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id)
+@@ -1194,13 +1225,12 @@ int dp_display_request_irq(struct msm_dp *dp_display)
+ dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+ dp->irq = irq_of_parse_and_map(dp->pdev->dev.of_node, 0);
+- if (dp->irq < 0) {
+- rc = dp->irq;
+- DRM_ERROR("failed to get irq: %d\n", rc);
+- return rc;
++ if (!dp->irq) {
++ DRM_ERROR("failed to get irq\n");
++ return -EINVAL;
+ }
+
+- rc = devm_request_irq(&dp->pdev->dev, dp->irq,
++ rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq,
+ dp_display_irq_handler,
+ IRQF_TRIGGER_HIGH, "dp_display_isr", dp);
+ if (rc < 0) {
+@@ -1236,8 +1266,11 @@ static int dp_display_probe(struct platform_device *pdev)
+ return -EPROBE_DEFER;
+ }
+
++ /* setup event q */
+ mutex_init(&dp->event_mutex);
+ g_dp_display = &dp->dp_display;
++ init_waitqueue_head(&dp->event_q);
++ spin_lock_init(&dp->event_lock);
+
+ /* Store DP audio handle inside DP display */
+ g_dp_display->dp_audio = dp->audio;
+@@ -1262,9 +1295,9 @@ static int dp_display_remove(struct platform_device *pdev)
+ dp = container_of(g_dp_display,
+ struct dp_display_private, dp_display);
+
++ component_del(&pdev->dev, &dp_display_comp_ops);
+ dp_display_deinit_sub_modules(dp);
+
+- component_del(&pdev->dev, &dp_display_comp_ops);
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+@@ -1288,20 +1321,23 @@ static int dp_pm_resume(struct device *dev)
+ dp->hpd_state = ST_DISCONNECTED;
+
+ /* turn on dp ctrl/phy */
+- dp_display_host_init(dp, true);
++ dp_display_host_init(dp);
+
+ dp_catalog_ctrl_hpd_config(dp->catalog);
+
+- /*
+- * set sink to normal operation mode -- D0
+- * before dpcd read
+- */
+- dp_link_psm_config(dp->link, &dp->panel->link_info, false);
+
+ if (dp_catalog_link_is_connected(dp->catalog)) {
++ /*
++ * set sink to normal operation mode -- D0
++ * before dpcd read
++ */
++ dp_display_host_phy_init(dp);
++ dp_link_psm_config(dp->link, &dp->panel->link_info, false);
+ sink_count = drm_dp_read_sink_count(dp->aux);
+ if (sink_count < 0)
+ sink_count = 0;
++
++ dp_display_host_phy_exit(dp);
+ }
+
+ dp->link->sink_count = sink_count;
+@@ -1340,18 +1376,16 @@ static int dp_pm_suspend(struct device *dev)
+ DRM_DEBUG_DP("Before, core_inited=%d power_on=%d\n",
+ dp->core_initialized, dp_display->power_on);
+
+- if (dp->core_initialized == true) {
+- /* mainlink enabled */
+- if (dp_power_clk_status(dp->power, DP_CTRL_PM))
+- dp_ctrl_off_link_stream(dp->ctrl);
+-
+- dp_display_host_deinit(dp);
+- }
++ /* mainlink enabled */
++ if (dp_power_clk_status(dp->power, DP_CTRL_PM))
++ dp_ctrl_off_link_stream(dp->ctrl);
+
+- dp->hpd_state = ST_SUSPENDED;
++ dp_display_host_phy_exit(dp);
+
+ /* host_init will be called at pm_resume */
+- dp->core_initialized = false;
++ dp_display_host_deinit(dp);
++
++ dp->hpd_state = ST_SUSPENDED;
+
+ DRM_DEBUG_DP("After, core_inited=%d power_on=%d\n",
+ dp->core_initialized, dp_display->power_on);
+@@ -1414,8 +1448,6 @@ void msm_dp_irq_postinstall(struct msm_dp *dp_display)
+
+ dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+- dp_hpd_event_setup(dp);
+-
+ dp_add_event(dp, EV_HPD_INIT_SETUP, 0, 100);
+ }
+
+@@ -1442,6 +1474,7 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
+ struct drm_encoder *encoder)
+ {
+ struct msm_drm_private *priv;
++ struct dp_display_private *dp_priv;
+ int ret;
+
+ if (WARN_ON(!encoder) || WARN_ON(!dp_display) || WARN_ON(!dev))
+@@ -1450,6 +1483,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
+ priv = dev->dev_private;
+ dp_display->drm_dev = dev;
+
++ dp_priv = container_of(dp_display, struct dp_display_private, dp_display);
++
+ ret = dp_display_request_irq(dp_display);
+ if (ret) {
+ DRM_ERROR("request_irq failed, ret=%d\n", ret);
+@@ -1467,6 +1502,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
+ return ret;
+ }
+
++ dp_priv->panel->connector = dp_display->connector;
++
+ priv->connectors[priv->num_connectors++] = dp_display->connector;
+ return 0;
+ }
+@@ -1476,6 +1513,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder)
+ int rc = 0;
+ struct dp_display_private *dp_display;
+ u32 state;
++ bool force_link_train = false;
+
+ dp_display = container_of(dp, struct dp_display_private, dp_display);
+ if (!dp_display->dp_mode.drm_mode.clock) {
+@@ -1504,10 +1542,12 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder)
+
+ state = dp_display->hpd_state;
+
+- if (state == ST_DISPLAY_OFF)
+- dp_display_host_init(dp_display, true);
++ if (state == ST_DISPLAY_OFF) {
++ dp_display_host_phy_init(dp_display);
++ force_link_train = true;
++ }
+
+- dp_display_enable(dp_display, 0);
++ dp_display_enable(dp_display, force_link_train);
+
+ rc = dp_display_post_enable(dp);
+ if (rc) {
+@@ -1516,10 +1556,6 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder)
+ dp_display_unprepare(dp);
+ }
+
+- /* manual kick off plug event to train link */
+- if (state == ST_DISPLAY_OFF)
+- dp_add_event(dp_display, EV_IRQ_HPD_INT, 0, 0);
+-
+ /* completed connection */
+ dp_display->hpd_state = ST_CONNECTED;
+
+diff --git a/drivers/gpu/drm/msm/dp/dp_hpd.c b/drivers/gpu/drm/msm/dp/dp_hpd.c
+index e1c90fa47411f..db98a1d431eb6 100644
+--- a/drivers/gpu/drm/msm/dp/dp_hpd.c
++++ b/drivers/gpu/drm/msm/dp/dp_hpd.c
+@@ -32,8 +32,6 @@ int dp_hpd_connect(struct dp_usbpd *dp_usbpd, bool hpd)
+ hpd_priv = container_of(dp_usbpd, struct dp_hpd_private,
+ dp_usbpd);
+
+- dp_usbpd->hpd_high = hpd;
+-
+ if (!hpd_priv->dp_cb || !hpd_priv->dp_cb->configure
+ || !hpd_priv->dp_cb->disconnect) {
+ pr_err("hpd dp_cb not initialized\n");
+diff --git a/drivers/gpu/drm/msm/dp/dp_hpd.h b/drivers/gpu/drm/msm/dp/dp_hpd.h
+index 5bc5bb64680fb..8feec5aa50271 100644
+--- a/drivers/gpu/drm/msm/dp/dp_hpd.h
++++ b/drivers/gpu/drm/msm/dp/dp_hpd.h
+@@ -26,7 +26,6 @@ enum plug_orientation {
+ * @multi_func: multi-function preferred
+ * @usb_config_req: request to switch to usb
+ * @exit_dp_mode: request exit from displayport mode
+- * @hpd_high: Hot Plug Detect signal is high.
+ * @hpd_irq: Change in the status since last message
+ * @alt_mode_cfg_done: bool to specify alt mode status
+ * @debug_en: bool to specify debug mode
+@@ -39,7 +38,6 @@ struct dp_usbpd {
+ bool multi_func;
+ bool usb_config_req;
+ bool exit_dp_mode;
+- bool hpd_high;
+ bool hpd_irq;
+ bool alt_mode_cfg_done;
+ bool debug_en;
+diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c
+index a5bdfc5029dee..d4d31e5bda070 100644
+--- a/drivers/gpu/drm/msm/dp/dp_link.c
++++ b/drivers/gpu/drm/msm/dp/dp_link.c
+@@ -737,18 +737,25 @@ static int dp_link_parse_sink_count(struct dp_link *dp_link)
+ return 0;
+ }
+
+-static void dp_link_parse_sink_status_field(struct dp_link_private *link)
++static int dp_link_parse_sink_status_field(struct dp_link_private *link)
+ {
+ int len = 0;
+
+ link->prev_sink_count = link->dp_link.sink_count;
+- dp_link_parse_sink_count(&link->dp_link);
++ len = dp_link_parse_sink_count(&link->dp_link);
++ if (len < 0) {
++ DRM_ERROR("DP parse sink count failed\n");
++ return len;
++ }
+
+ len = drm_dp_dpcd_read_link_status(link->aux,
+ link->link_status);
+- if (len < DP_LINK_STATUS_SIZE)
++ if (len < DP_LINK_STATUS_SIZE) {
+ DRM_ERROR("DP link status read failed\n");
+- dp_link_parse_request(link);
++ return len;
++ }
++
++ return dp_link_parse_request(link);
+ }
+
+ /**
+@@ -1023,7 +1030,9 @@ int dp_link_process_request(struct dp_link *dp_link)
+
+ dp_link_reset_data(link);
+
+- dp_link_parse_sink_status_field(link);
++ ret = dp_link_parse_sink_status_field(link);
++ if (ret)
++ return ret;
+
+ if (link->request.test_requested == DP_TEST_LINK_EDID_READ) {
+ dp_link->sink_request |= DP_TEST_LINK_EDID_READ;
+diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c
+index 2181b60e1d1d8..62b742e701d2c 100644
+--- a/drivers/gpu/drm/msm/dp/dp_panel.c
++++ b/drivers/gpu/drm/msm/dp/dp_panel.c
+@@ -206,12 +206,6 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
+ rc = -ETIMEDOUT;
+ goto end;
+ }
+-
+- /* fail safe edid */
+- mutex_lock(&connector->dev->mode_config.mutex);
+- if (drm_add_modes_noedid(connector, 640, 480))
+- drm_set_preferred_mode(connector, 640, 480);
+- mutex_unlock(&connector->dev->mode_config.mutex);
+ }
+
+ if (panel->aux_cfg_update_done) {
+diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
+index 75ae3008b68f4..fb8b21837c296 100644
+--- a/drivers/gpu/drm/msm/dsi/dsi.c
++++ b/drivers/gpu/drm/msm/dsi/dsi.c
+@@ -40,7 +40,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi)
+
+ of_node_put(phy_node);
+
+- if (!phy_pdev || !msm_dsi->phy) {
++ if (!phy_pdev) {
++ DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__);
++ return -EPROBE_DEFER;
++ }
++ if (!msm_dsi->phy) {
++ put_device(&phy_pdev->dev);
+ DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__);
+ return -EPROBE_DEFER;
+ }
+@@ -207,6 +212,12 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
+ return -EINVAL;
+
+ priv = dev->dev_private;
++
++ if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) {
++ DRM_DEV_ERROR(dev->dev, "too many bridges\n");
++ return -ENOSPC;
++ }
++
+ msm_dsi->dev = dev;
+
+ ret = msm_dsi_host_modeset_init(msm_dsi->host, dev);
+@@ -215,9 +226,13 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
+ goto fail;
+ }
+
+- if (!msm_dsi_manager_validate_current_config(msm_dsi->id)) {
+- ret = -EINVAL;
+- goto fail;
++ if (msm_dsi_is_bonded_dsi(msm_dsi) &&
++ !msm_dsi_is_master_dsi(msm_dsi)) {
++ /*
++ * Do not return an eror here,
++ * Just skip creating encoder/connector for the slave-DSI.
++ */
++ return 0;
+ }
+
+ msm_dsi->encoder = encoder;
+diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
+index b50db91cb8a7e..a63666e59d19e 100644
+--- a/drivers/gpu/drm/msm/dsi/dsi.h
++++ b/drivers/gpu/drm/msm/dsi/dsi.h
+@@ -82,7 +82,6 @@ int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg);
+ bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len);
+ int msm_dsi_manager_register(struct msm_dsi *msm_dsi);
+ void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi);
+-bool msm_dsi_manager_validate_current_config(u8 id);
+ void msm_dsi_manager_tpg_enable(void);
+
+ /* msm dsi */
+@@ -107,6 +106,8 @@ void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host,
+ u32 dma_base, u32 len);
+ int msm_dsi_host_enable(struct mipi_dsi_host *host);
+ int msm_dsi_host_disable(struct mipi_dsi_host *host);
++void msm_dsi_host_enable_irq(struct mipi_dsi_host *host);
++void msm_dsi_host_disable_irq(struct mipi_dsi_host *host);
+ int msm_dsi_host_power_on(struct mipi_dsi_host *host,
+ struct msm_dsi_phy_shared_timings *phy_shared_timings,
+ bool is_bonded_dsi, struct msm_dsi_phy *phy);
+diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+index 96bbc8b6d0092..68a3f8fea9fe6 100644
+--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
++++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+@@ -109,7 +109,7 @@ static const char * const dsi_8996_bus_clk_names[] = {
+ static const struct msm_dsi_config msm8996_dsi_cfg = {
+ .io_offset = DSI_6G_REG_SHIFT,
+ .reg_cfg = {
+- .num = 2,
++ .num = 3,
+ .regs = {
+ {"vdda", 18160, 1 }, /* 1.25 V */
+ {"vcca", 17000, 32 }, /* 0.925 V */
+@@ -148,7 +148,7 @@ static const char * const dsi_sdm660_bus_clk_names[] = {
+ static const struct msm_dsi_config sdm660_dsi_cfg = {
+ .io_offset = DSI_6G_REG_SHIFT,
+ .reg_cfg = {
+- .num = 2,
++ .num = 1,
+ .regs = {
+ {"vdda", 12560, 4 }, /* 1.2 V */
+ },
+@@ -209,8 +209,8 @@ static const struct msm_dsi_config sc7280_dsi_cfg = {
+ },
+ .bus_clk_names = dsi_sc7280_bus_clk_names,
+ .num_bus_clks = ARRAY_SIZE(dsi_sc7280_bus_clk_names),
+- .io_start = { 0xae94000 },
+- .num_dsi = 1,
++ .io_start = { 0xae94000, 0xae96000 },
++ .num_dsi = 2,
+ };
+
+ static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = {
+diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
+index c86b5090fae60..d3ec4d67a9a35 100644
+--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
++++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
+@@ -115,16 +115,16 @@ struct msm_dsi_host {
+ struct clk *pixel_clk_src;
+ struct clk *byte_intf_clk;
+
+- u32 byte_clk_rate;
+- u32 pixel_clk_rate;
+- u32 esc_clk_rate;
++ unsigned long byte_clk_rate;
++ unsigned long pixel_clk_rate;
++ unsigned long esc_clk_rate;
+
+ /* DSI v2 specific clocks */
+ struct clk *src_clk;
+ struct clk *esc_clk_src;
+ struct clk *dsi_clk_src;
+
+- u32 src_clk_rate;
++ unsigned long src_clk_rate;
+
+ struct gpio_desc *disp_en_gpio;
+ struct gpio_desc *te_gpio;
+@@ -498,10 +498,10 @@ int msm_dsi_runtime_resume(struct device *dev)
+
+ int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host)
+ {
+- u32 byte_intf_rate;
++ unsigned long byte_intf_rate;
+ int ret;
+
+- DBG("Set clk rates: pclk=%d, byteclk=%d",
++ DBG("Set clk rates: pclk=%d, byteclk=%lu",
+ msm_host->mode->clock, msm_host->byte_clk_rate);
+
+ ret = dev_pm_opp_set_rate(&msm_host->pdev->dev,
+@@ -583,7 +583,7 @@ int dsi_link_clk_set_rate_v2(struct msm_dsi_host *msm_host)
+ {
+ int ret;
+
+- DBG("Set clk rates: pclk=%d, byteclk=%d, esc_clk=%d, dsi_src_clk=%d",
++ DBG("Set clk rates: pclk=%d, byteclk=%lu, esc_clk=%lu, dsi_src_clk=%lu",
+ msm_host->mode->clock, msm_host->byte_clk_rate,
+ msm_host->esc_clk_rate, msm_host->src_clk_rate);
+
+@@ -673,10 +673,10 @@ void dsi_link_clk_disable_v2(struct msm_dsi_host *msm_host)
+ clk_disable_unprepare(msm_host->byte_clk);
+ }
+
+-static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
++static unsigned long dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
+ {
+ struct drm_display_mode *mode = msm_host->mode;
+- u32 pclk_rate;
++ unsigned long pclk_rate;
+
+ pclk_rate = mode->clock * 1000;
+
+@@ -696,7 +696,7 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
+ {
+ u8 lanes = msm_host->lanes;
+ u32 bpp = dsi_get_bpp(msm_host->format);
+- u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_bonded_dsi);
++ unsigned long pclk_rate = dsi_get_pclk_rate(msm_host, is_bonded_dsi);
+ u64 pclk_bpp = (u64)pclk_rate * bpp;
+
+ if (lanes == 0) {
+@@ -713,7 +713,7 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
+ msm_host->pixel_clk_rate = pclk_rate;
+ msm_host->byte_clk_rate = pclk_bpp;
+
+- DBG("pclk=%d, bclk=%d", msm_host->pixel_clk_rate,
++ DBG("pclk=%lu, bclk=%lu", msm_host->pixel_clk_rate,
+ msm_host->byte_clk_rate);
+
+ }
+@@ -772,7 +772,7 @@ int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
+
+ msm_host->esc_clk_rate = msm_host->byte_clk_rate / esc_div;
+
+- DBG("esc=%d, src=%d", msm_host->esc_clk_rate,
++ DBG("esc=%lu, src=%lu", msm_host->esc_clk_rate,
+ msm_host->src_clk_rate);
+
+ return 0;
+@@ -1375,10 +1375,10 @@ static int dsi_cmds2buf_tx(struct msm_dsi_host *msm_host,
+ dsi_get_bpp(msm_host->format) / 8;
+
+ len = dsi_cmd_dma_add(msm_host, msg);
+- if (!len) {
++ if (len < 0) {
+ pr_err("%s: failed to add cmd type = 0x%x\n",
+ __func__, msg->type);
+- return -EINVAL;
++ return len;
+ }
+
+ /* for video mode, do not send cmds more than
+@@ -1397,10 +1397,14 @@ static int dsi_cmds2buf_tx(struct msm_dsi_host *msm_host,
+ }
+
+ ret = dsi_cmd_dma_tx(msm_host, len);
+- if (ret < len) {
+- pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, len=%d\n",
+- __func__, msg->type, (*(u8 *)(msg->tx_buf)), len);
+- return -ECOMM;
++ if (ret < 0) {
++ pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, len=%d, ret=%d\n",
++ __func__, msg->type, (*(u8 *)(msg->tx_buf)), len, ret);
++ return ret;
++ } else if (ret < len) {
++ pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, ret=%d len=%d\n",
++ __func__, msg->type, (*(u8 *)(msg->tx_buf)), ret, len);
++ return -EIO;
+ }
+
+ return len;
+@@ -1696,6 +1700,8 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
+ if (!prop) {
+ DRM_DEV_DEBUG(dev,
+ "failed to find data lane mapping, using default\n");
++ /* Set the number of date lanes to 4 by default. */
++ msm_host->num_data_lanes = 4;
+ return 0;
+ }
+
+@@ -1898,6 +1904,23 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
+ return ret;
+ }
+
++ msm_host->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
++ if (msm_host->irq < 0) {
++ ret = msm_host->irq;
++ dev_err(&pdev->dev, "failed to get irq: %d\n", ret);
++ return ret;
++ }
++
++ /* do not autoenable, will be enabled later */
++ ret = devm_request_irq(&pdev->dev, msm_host->irq, dsi_host_irq,
++ IRQF_TRIGGER_HIGH | IRQF_NO_AUTOEN,
++ "dsi_isr", msm_host);
++ if (ret < 0) {
++ dev_err(&pdev->dev, "failed to request IRQ%u: %d\n",
++ msm_host->irq, ret);
++ return ret;
++ }
++
+ init_completion(&msm_host->dma_comp);
+ init_completion(&msm_host->video_comp);
+ mutex_init(&msm_host->dev_mutex);
+@@ -1906,6 +1929,9 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
+
+ /* setup workqueue */
+ msm_host->workqueue = alloc_ordered_workqueue("dsi_drm_work", 0);
++ if (!msm_host->workqueue)
++ return -ENOMEM;
++
+ INIT_WORK(&msm_host->err_work, dsi_err_worker);
+ INIT_WORK(&msm_host->hpd_work, dsi_hpd_worker);
+
+@@ -1941,25 +1967,8 @@ int msm_dsi_host_modeset_init(struct mipi_dsi_host *host,
+ {
+ struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+ const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
+- struct platform_device *pdev = msm_host->pdev;
+ int ret;
+
+- msm_host->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+- if (msm_host->irq < 0) {
+- ret = msm_host->irq;
+- DRM_DEV_ERROR(dev->dev, "failed to get irq: %d\n", ret);
+- return ret;
+- }
+-
+- ret = devm_request_irq(&pdev->dev, msm_host->irq,
+- dsi_host_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+- "dsi_isr", msm_host);
+- if (ret < 0) {
+- DRM_DEV_ERROR(&pdev->dev, "failed to request IRQ%u: %d\n",
+- msm_host->irq, ret);
+- return ret;
+- }
+-
+ msm_host->dev = dev;
+ ret = cfg_hnd->ops->tx_buf_alloc(msm_host, SZ_4K);
+ if (ret) {
+@@ -2133,9 +2142,12 @@ int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host,
+ }
+
+ ret = dsi_cmds2buf_tx(msm_host, msg);
+- if (ret < msg->tx_len) {
++ if (ret < 0) {
+ pr_err("%s: Read cmd Tx failed, %d\n", __func__, ret);
+ return ret;
++ } else if (ret < msg->tx_len) {
++ pr_err("%s: Read cmd Tx failed, too short: %d\n", __func__, ret);
++ return -ECOMM;
+ }
+
+ /*
+@@ -2315,6 +2327,20 @@ void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host,
+ clk_req->escclk_rate = msm_host->esc_clk_rate;
+ }
+
++void msm_dsi_host_enable_irq(struct mipi_dsi_host *host)
++{
++ struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
++
++ enable_irq(msm_host->irq);
++}
++
++void msm_dsi_host_disable_irq(struct mipi_dsi_host *host)
++{
++ struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
++
++ disable_irq(msm_host->irq);
++}
++
+ int msm_dsi_host_enable(struct mipi_dsi_host *host)
+ {
+ struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
+index c41d39f5b7cf4..6e43672f58078 100644
+--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
++++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
+@@ -377,6 +377,14 @@ static void dsi_mgr_bridge_pre_enable(struct drm_bridge *bridge)
+ }
+ }
+
++ /*
++ * Enable before preparing the panel, disable after unpreparing, so
++ * that the panel can communicate over the DSI link.
++ */
++ msm_dsi_host_enable_irq(host);
++ if (is_bonded_dsi && msm_dsi1)
++ msm_dsi_host_enable_irq(msm_dsi1->host);
++
+ /* Always call panel functions once, because even for dual panels,
+ * there is only one drm_panel instance.
+ */
+@@ -411,6 +419,10 @@ host_en_fail:
+ if (panel)
+ drm_panel_unprepare(panel);
+ panel_prep_fail:
++ msm_dsi_host_disable_irq(host);
++ if (is_bonded_dsi && msm_dsi1)
++ msm_dsi_host_disable_irq(msm_dsi1->host);
++
+ if (is_bonded_dsi && msm_dsi1)
+ msm_dsi_host_power_off(msm_dsi1->host);
+ host1_on_fail:
+@@ -523,6 +535,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge)
+ id, ret);
+ }
+
++ msm_dsi_host_disable_irq(host);
++ if (is_bonded_dsi && msm_dsi1)
++ msm_dsi_host_disable_irq(msm_dsi1->host);
++
+ /* Save PHY status if it is a clock source */
+ msm_dsi_phy_pll_save_state(msm_dsi->phy);
+
+@@ -627,27 +643,10 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
+ return connector;
+
+ fail:
+- connector->funcs->destroy(msm_dsi->connector);
++ connector->funcs->destroy(connector);
+ return ERR_PTR(ret);
+ }
+
+-bool msm_dsi_manager_validate_current_config(u8 id)
+-{
+- bool is_bonded_dsi = IS_BONDED_DSI();
+-
+- /*
+- * For bonded DSI, we only have one drm panel. For this
+- * use case, we register only one bridge/connector.
+- * Skip bridge/connector initialisation if it is
+- * slave-DSI for bonded DSI configuration.
+- */
+- if (is_bonded_dsi && !IS_MASTER_DSI_LINK(id)) {
+- DBG("Skip bridge registration for slave DSI->id: %d\n", id);
+- return false;
+- }
+- return true;
+-}
+-
+ /* initialize bridge */
+ struct drm_bridge *msm_dsi_manager_bridge_init(u8 id)
+ {
+diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+index 8c65ef6968caf..6a917fe69a833 100644
+--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
++++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+@@ -347,7 +347,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing,
+ } else {
+ timing->shared_timings.clk_pre =
+ linear_inter(tmax, tmin, pcnt2, 0, false);
+- timing->shared_timings.clk_pre_inc_by_2 = 0;
++ timing->shared_timings.clk_pre_inc_by_2 = 0;
+ }
+
+ timing->ta_go = 3;
+@@ -806,12 +806,14 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy,
+ struct msm_dsi_phy_clk_request *clk_req,
+ struct msm_dsi_phy_shared_timings *shared_timings)
+ {
+- struct device *dev = &phy->pdev->dev;
++ struct device *dev;
+ int ret;
+
+ if (!phy || !phy->cfg->ops.enable)
+ return -EINVAL;
+
++ dev = &phy->pdev->dev;
++
+ ret = dsi_phy_enable_resource(phy);
+ if (ret) {
+ DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n",
+diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
+index d8128f50b0dd5..0b782cc18b3f4 100644
+--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
++++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
+@@ -562,7 +562,9 @@ static int pll_10nm_register(struct dsi_pll_10nm *pll_10nm, struct clk_hw **prov
+ char clk_name[32], parent[32], vco_name[32];
+ char parent2[32], parent3[32], parent4[32];
+ struct clk_init_data vco_init = {
+- .parent_names = (const char *[]){ "xo" },
++ .parent_data = &(const struct clk_parent_data) {
++ .fw_name = "ref",
++ },
+ .num_parents = 1,
+ .name = vco_name,
+ .flags = CLK_IGNORE_UNUSED,
+diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+index 5b4e991f220d6..66507eb713048 100644
+--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
++++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+@@ -541,6 +541,9 @@ static int dsi_pll_14nm_vco_prepare(struct clk_hw *hw)
+ if (unlikely(pll_14nm->phy->pll_on))
+ return 0;
+
++ if (dsi_pll_14nm_vco_recalc_rate(hw, VCO_REF_CLK_RATE) == 0)
++ dsi_pll_14nm_vco_set_rate(hw, pll_14nm->phy->cfg->min_pll_rate, VCO_REF_CLK_RATE);
++
+ dsi_phy_write(base + REG_DSI_14nm_PHY_PLL_VREF_CFG1, 0x10);
+ dsi_phy_write(cmn_base + REG_DSI_14nm_PHY_CMN_PLL_CNTRL, 1);
+
+@@ -804,7 +807,9 @@ static int pll_14nm_register(struct dsi_pll_14nm *pll_14nm, struct clk_hw **prov
+ {
+ char clk_name[32], parent[32], vco_name[32];
+ struct clk_init_data vco_init = {
+- .parent_names = (const char *[]){ "xo" },
++ .parent_data = &(const struct clk_parent_data) {
++ .fw_name = "ref",
++ },
+ .num_parents = 1,
+ .name = vco_name,
+ .flags = CLK_IGNORE_UNUSED,
+@@ -1062,6 +1067,6 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs = {
+ },
+ .min_pll_rate = VCO_MIN_RATE,
+ .max_pll_rate = VCO_MAX_RATE,
+- .io_start = { 0xc994400, 0xc996000 },
++ .io_start = { 0xc994400, 0xc996400 },
+ .num_dsi_phy = 2,
+ };
+diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
+index 2da673a2add69..48eab80b548e1 100644
+--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
++++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
+@@ -521,7 +521,9 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov
+ {
+ char clk_name[32], parent1[32], parent2[32], vco_name[32];
+ struct clk_init_data vco_init = {
+- .parent_names = (const char *[]){ "xo" },
++ .parent_data = &(const struct clk_parent_data) {
++ .fw_name = "ref", .name = "xo",
++ },
+ .num_parents = 1,
+ .name = vco_name,
+ .flags = CLK_IGNORE_UNUSED,
+diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+index 71ed4aa0dc67e..fc56cdcc9ad64 100644
+--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
++++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+@@ -385,7 +385,9 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov
+ {
+ char *clk_name, *parent_name, *vco_name;
+ struct clk_init_data vco_init = {
+- .parent_names = (const char *[]){ "pxo" },
++ .parent_data = &(const struct clk_parent_data) {
++ .fw_name = "ref",
++ },
+ .num_parents = 1,
+ .flags = CLK_IGNORE_UNUSED,
+ .ops = &clk_ops_dsi_pll_28nm_vco,
+diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
+index cb297b08458e4..8cc1ef8199ac9 100644
+--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
++++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
+@@ -590,7 +590,9 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide
+ char clk_name[32], parent[32], vco_name[32];
+ char parent2[32], parent3[32], parent4[32];
+ struct clk_init_data vco_init = {
+- .parent_names = (const char *[]){ "bi_tcxo" },
++ .parent_data = &(const struct clk_parent_data) {
++ .fw_name = "ref",
++ },
+ .num_parents = 1,
+ .name = vco_name,
+ .flags = CLK_IGNORE_UNUSED,
+@@ -864,20 +866,26 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy,
+ /* Alter PHY configurations if data rate less than 1.5GHZ*/
+ less_than_1500_mhz = (clk_req->bitclk_rate <= 1500000000);
+
+- /* For C-PHY, no low power settings for lower clk rate */
+- if (phy->cphy_mode)
+- less_than_1500_mhz = false;
+-
+ if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_1) {
+ vreg_ctrl_0 = less_than_1500_mhz ? 0x53 : 0x52;
+- glbl_rescode_top_ctrl = less_than_1500_mhz ? 0x3d : 0x00;
+- glbl_rescode_bot_ctrl = less_than_1500_mhz ? 0x39 : 0x3c;
++ if (phy->cphy_mode) {
++ glbl_rescode_top_ctrl = 0x00;
++ glbl_rescode_bot_ctrl = 0x3c;
++ } else {
++ glbl_rescode_top_ctrl = less_than_1500_mhz ? 0x3d : 0x00;
++ glbl_rescode_bot_ctrl = less_than_1500_mhz ? 0x39 : 0x3c;
++ }
+ glbl_str_swi_cal_sel_ctrl = 0x00;
+ glbl_hstx_str_ctrl_0 = 0x88;
+ } else {
+ vreg_ctrl_0 = less_than_1500_mhz ? 0x5B : 0x59;
+- glbl_str_swi_cal_sel_ctrl = less_than_1500_mhz ? 0x03 : 0x00;
+- glbl_hstx_str_ctrl_0 = less_than_1500_mhz ? 0x66 : 0x88;
++ if (phy->cphy_mode) {
++ glbl_str_swi_cal_sel_ctrl = 0x03;
++ glbl_hstx_str_ctrl_0 = 0x66;
++ } else {
++ glbl_str_swi_cal_sel_ctrl = less_than_1500_mhz ? 0x03 : 0x00;
++ glbl_hstx_str_ctrl_0 = less_than_1500_mhz ? 0x66 : 0x88;
++ }
+ glbl_rescode_top_ctrl = 0x03;
+ glbl_rescode_bot_ctrl = 0x3c;
+ }
+diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
+index 737453b6e5966..2c944419e1758 100644
+--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
++++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
+@@ -8,6 +8,8 @@
+ #include <linux/of_irq.h>
+ #include <linux/of_gpio.h>
+
++#include <drm/drm_bridge_connector.h>
++
+ #include <sound/hdmi-codec.h>
+ #include "hdmi.h"
+
+@@ -41,7 +43,7 @@ static irqreturn_t msm_hdmi_irq(int irq, void *dev_id)
+ struct hdmi *hdmi = dev_id;
+
+ /* Process HPD: */
+- msm_hdmi_connector_irq(hdmi->connector);
++ msm_hdmi_hpd_irq(hdmi->bridge);
+
+ /* Process DDC: */
+ msm_hdmi_i2c_irq(hdmi->i2c);
+@@ -97,8 +99,13 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi)
+
+ of_node_put(phy_node);
+
+- if (!phy_pdev || !hdmi->phy) {
++ if (!phy_pdev) {
++ DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
++ return -EPROBE_DEFER;
++ }
++ if (!hdmi->phy) {
+ DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
++ put_device(&phy_pdev->dev);
+ return -EPROBE_DEFER;
+ }
+
+@@ -137,6 +144,10 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev)
+ /* HDCP needs physical address of hdmi register */
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ config->mmio_name);
++ if (!res) {
++ ret = -EINVAL;
++ goto fail;
++ }
+ hdmi->mmio_phy_addr = res->start;
+
+ hdmi->qfprom_mmio = msm_ioremap(pdev,
+@@ -236,9 +247,27 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev)
+ hdmi->pwr_clks[i] = clk;
+ }
+
+- pm_runtime_enable(&pdev->dev);
++ hdmi->hpd_gpiod = devm_gpiod_get_optional(&pdev->dev, "hpd", GPIOD_IN);
++ /* This will catch e.g. -EPROBE_DEFER */
++ if (IS_ERR(hdmi->hpd_gpiod)) {
++ ret = PTR_ERR(hdmi->hpd_gpiod);
++ DRM_DEV_ERROR(&pdev->dev, "failed to get hpd gpio: (%d)\n", ret);
++ goto fail;
++ }
++
++ if (!hdmi->hpd_gpiod)
++ DBG("failed to get HPD gpio");
++
++ if (hdmi->hpd_gpiod)
++ gpiod_set_consumer_name(hdmi->hpd_gpiod, "HDMI_HPD");
++
++ devm_pm_runtime_enable(&pdev->dev);
+
+ hdmi->workq = alloc_ordered_workqueue("msm_hdmi", 0);
++ if (!hdmi->workq) {
++ ret = -ENOMEM;
++ goto fail;
++ }
+
+ hdmi->i2c = msm_hdmi_i2c_init(hdmi);
+ if (IS_ERR(hdmi->i2c)) {
+@@ -284,6 +313,11 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
+ struct platform_device *pdev = hdmi->pdev;
+ int ret;
+
++ if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) {
++ DRM_DEV_ERROR(dev->dev, "too many bridges\n");
++ return -ENOSPC;
++ }
++
+ hdmi->dev = dev;
+ hdmi->encoder = encoder;
+
+@@ -297,7 +331,7 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
+ goto fail;
+ }
+
+- hdmi->connector = msm_hdmi_connector_init(hdmi);
++ hdmi->connector = drm_bridge_connector_init(hdmi->dev, encoder);
+ if (IS_ERR(hdmi->connector)) {
+ ret = PTR_ERR(hdmi->connector);
+ DRM_DEV_ERROR(dev->dev, "failed to create HDMI connector: %d\n", ret);
+@@ -305,15 +339,17 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
+ goto fail;
+ }
+
++ drm_connector_attach_encoder(hdmi->connector, hdmi->encoder);
++
+ hdmi->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+- if (hdmi->irq < 0) {
+- ret = hdmi->irq;
+- DRM_DEV_ERROR(dev->dev, "failed to get irq: %d\n", ret);
++ if (!hdmi->irq) {
++ ret = -EINVAL;
++ DRM_DEV_ERROR(dev->dev, "failed to get irq\n");
+ goto fail;
+ }
+
+- ret = devm_request_irq(&pdev->dev, hdmi->irq,
+- msm_hdmi_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
++ ret = devm_request_irq(dev->dev, hdmi->irq,
++ msm_hdmi_irq, IRQF_TRIGGER_HIGH,
+ "hdmi_isr", hdmi);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev->dev, "failed to request IRQ%u: %d\n",
+@@ -321,7 +357,9 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
+ goto fail;
+ }
+
+- ret = msm_hdmi_hpd_enable(hdmi->connector);
++ drm_bridge_connector_enable_hpd(hdmi->connector);
++
++ ret = msm_hdmi_hpd_enable(hdmi->bridge);
+ if (ret < 0) {
+ DRM_DEV_ERROR(&hdmi->pdev->dev, "failed to enable HPD: %d\n", ret);
+ goto fail;
+@@ -409,20 +447,6 @@ static struct hdmi_platform_config hdmi_tx_8996_config = {
+ .hpd_freq = hpd_clk_freq_8x74,
+ };
+
+-static const struct {
+- const char *name;
+- const bool output;
+- const int value;
+- const char *label;
+-} msm_hdmi_gpio_pdata[] = {
+- { "qcom,hdmi-tx-ddc-clk", true, 1, "HDMI_DDC_CLK" },
+- { "qcom,hdmi-tx-ddc-data", true, 1, "HDMI_DDC_DATA" },
+- { "qcom,hdmi-tx-hpd", false, 1, "HDMI_HPD" },
+- { "qcom,hdmi-tx-mux-en", true, 1, "HDMI_MUX_EN" },
+- { "qcom,hdmi-tx-mux-sel", true, 0, "HDMI_MUX_SEL" },
+- { "qcom,hdmi-tx-mux-lpm", true, 1, "HDMI_MUX_LPM" },
+-};
+-
+ /*
+ * HDMI audio codec callbacks
+ */
+@@ -535,7 +559,7 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
+ struct hdmi_platform_config *hdmi_cfg;
+ struct hdmi *hdmi;
+ struct device_node *of_node = dev->of_node;
+- int i, err;
++ int err;
+
+ hdmi_cfg = (struct hdmi_platform_config *)
+ of_device_get_match_data(dev);
+@@ -547,42 +571,6 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
+ hdmi_cfg->mmio_name = "core_physical";
+ hdmi_cfg->qfprom_mmio_name = "qfprom_physical";
+
+- for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
+- const char *name = msm_hdmi_gpio_pdata[i].name;
+- struct gpio_desc *gpiod;
+-
+- /*
+- * We are fetching the GPIO lines "as is" since the connector
+- * code is enabling and disabling the lines. Until that point
+- * the power-on default value will be kept.
+- */
+- gpiod = devm_gpiod_get_optional(dev, name, GPIOD_ASIS);
+- /* This will catch e.g. -PROBE_DEFER */
+- if (IS_ERR(gpiod))
+- return PTR_ERR(gpiod);
+- if (!gpiod) {
+- /* Try a second time, stripping down the name */
+- char name3[32];
+-
+- /*
+- * Try again after stripping out the "qcom,hdmi-tx"
+- * prefix. This is mainly to match "hpd-gpios" used
+- * in the upstream bindings.
+- */
+- if (sscanf(name, "qcom,hdmi-tx-%s", name3))
+- gpiod = devm_gpiod_get_optional(dev, name3, GPIOD_ASIS);
+- if (IS_ERR(gpiod))
+- return PTR_ERR(gpiod);
+- if (!gpiod)
+- DBG("failed to get gpio: %s", name);
+- }
+- hdmi_cfg->gpios[i].gpiod = gpiod;
+- if (gpiod)
+- gpiod_set_consumer_name(gpiod, msm_hdmi_gpio_pdata[i].label);
+- hdmi_cfg->gpios[i].output = msm_hdmi_gpio_pdata[i].output;
+- hdmi_cfg->gpios[i].value = msm_hdmi_gpio_pdata[i].value;
+- }
+-
+ dev->platform_data = hdmi_cfg;
+
+ hdmi = msm_hdmi_init(to_platform_device(dev));
+diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
+index d0b84f0abee17..20f554312b17c 100644
+--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
++++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
+@@ -19,17 +19,9 @@
+ #include "msm_drv.h"
+ #include "hdmi.xml.h"
+
+-#define HDMI_MAX_NUM_GPIO 6
+-
+ struct hdmi_phy;
+ struct hdmi_platform_config;
+
+-struct hdmi_gpio_data {
+- struct gpio_desc *gpiod;
+- bool output;
+- int value;
+-};
+-
+ struct hdmi_audio {
+ bool enabled;
+ struct hdmi_audio_infoframe infoframe;
+@@ -61,6 +53,8 @@ struct hdmi {
+ struct clk **hpd_clks;
+ struct clk **pwr_clks;
+
++ struct gpio_desc *hpd_gpiod;
++
+ struct hdmi_phy *phy;
+ struct device *phy_dev;
+
+@@ -109,10 +103,14 @@ struct hdmi_platform_config {
+ /* clks that need to be on for screen pwr (ie pixel clk): */
+ const char **pwr_clk_names;
+ int pwr_clk_cnt;
++};
+
+- /* gpio's: */
+- struct hdmi_gpio_data gpios[HDMI_MAX_NUM_GPIO];
++struct hdmi_bridge {
++ struct drm_bridge base;
++ struct hdmi *hdmi;
++ struct work_struct hpd_work;
+ };
++#define to_hdmi_bridge(x) container_of(x, struct hdmi_bridge, base)
+
+ void msm_hdmi_set_mode(struct hdmi *hdmi, bool power_on);
+
+@@ -230,13 +228,11 @@ void msm_hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate);
+ struct drm_bridge *msm_hdmi_bridge_init(struct hdmi *hdmi);
+ void msm_hdmi_bridge_destroy(struct drm_bridge *bridge);
+
+-/*
+- * hdmi connector:
+- */
+-
+-void msm_hdmi_connector_irq(struct drm_connector *connector);
+-struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi);
+-int msm_hdmi_hpd_enable(struct drm_connector *connector);
++void msm_hdmi_hpd_irq(struct drm_bridge *bridge);
++enum drm_connector_status msm_hdmi_bridge_detect(
++ struct drm_bridge *bridge);
++int msm_hdmi_hpd_enable(struct drm_bridge *bridge);
++void msm_hdmi_hpd_disable(struct hdmi_bridge *hdmi_bridge);
+
+ /*
+ * i2c adapter for ddc:
+diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
+index 6e380db9287ba..efcfdd70a02e0 100644
+--- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
++++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
+@@ -5,17 +5,16 @@
+ */
+
+ #include <linux/delay.h>
++#include <drm/drm_bridge_connector.h>
+
++#include "msm_kms.h"
+ #include "hdmi.h"
+
+-struct hdmi_bridge {
+- struct drm_bridge base;
+- struct hdmi *hdmi;
+-};
+-#define to_hdmi_bridge(x) container_of(x, struct hdmi_bridge, base)
+-
+ void msm_hdmi_bridge_destroy(struct drm_bridge *bridge)
+ {
++ struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
++
++ msm_hdmi_hpd_disable(hdmi_bridge);
+ }
+
+ static void msm_hdmi_power_on(struct drm_bridge *bridge)
+@@ -259,14 +258,76 @@ static void msm_hdmi_bridge_mode_set(struct drm_bridge *bridge,
+ msm_hdmi_audio_update(hdmi);
+ }
+
++static struct edid *msm_hdmi_bridge_get_edid(struct drm_bridge *bridge,
++ struct drm_connector *connector)
++{
++ struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
++ struct hdmi *hdmi = hdmi_bridge->hdmi;
++ struct edid *edid;
++ uint32_t hdmi_ctrl;
++
++ hdmi_ctrl = hdmi_read(hdmi, REG_HDMI_CTRL);
++ hdmi_write(hdmi, REG_HDMI_CTRL, hdmi_ctrl | HDMI_CTRL_ENABLE);
++
++ edid = drm_get_edid(connector, hdmi->i2c);
++
++ hdmi_write(hdmi, REG_HDMI_CTRL, hdmi_ctrl);
++
++ hdmi->hdmi_mode = drm_detect_hdmi_monitor(edid);
++
++ return edid;
++}
++
++static enum drm_mode_status msm_hdmi_bridge_mode_valid(struct drm_bridge *bridge,
++ const struct drm_display_info *info,
++ const struct drm_display_mode *mode)
++{
++ struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
++ struct hdmi *hdmi = hdmi_bridge->hdmi;
++ const struct hdmi_platform_config *config = hdmi->config;
++ struct msm_drm_private *priv = bridge->dev->dev_private;
++ struct msm_kms *kms = priv->kms;
++ long actual, requested;
++
++ requested = 1000 * mode->clock;
++ actual = kms->funcs->round_pixclk(kms,
++ requested, hdmi_bridge->hdmi->encoder);
++
++ /* for mdp5/apq8074, we manage our own pixel clk (as opposed to
++ * mdp4/dtv stuff where pixel clk is assigned to mdp/encoder
++ * instead):
++ */
++ if (config->pwr_clk_cnt > 0)
++ actual = clk_round_rate(hdmi->pwr_clks[0], actual);
++
++ DBG("requested=%ld, actual=%ld", requested, actual);
++
++ if (actual != requested)
++ return MODE_CLOCK_RANGE;
++
++ return 0;
++}
++
+ static const struct drm_bridge_funcs msm_hdmi_bridge_funcs = {
+ .pre_enable = msm_hdmi_bridge_pre_enable,
+ .enable = msm_hdmi_bridge_enable,
+ .disable = msm_hdmi_bridge_disable,
+ .post_disable = msm_hdmi_bridge_post_disable,
+ .mode_set = msm_hdmi_bridge_mode_set,
++ .mode_valid = msm_hdmi_bridge_mode_valid,
++ .get_edid = msm_hdmi_bridge_get_edid,
++ .detect = msm_hdmi_bridge_detect,
+ };
+
++static void
++msm_hdmi_hotplug_work(struct work_struct *work)
++{
++ struct hdmi_bridge *hdmi_bridge =
++ container_of(work, struct hdmi_bridge, hpd_work);
++ struct drm_bridge *bridge = &hdmi_bridge->base;
++
++ drm_bridge_hpd_notify(bridge, drm_bridge_detect(bridge));
++}
+
+ /* initialize bridge */
+ struct drm_bridge *msm_hdmi_bridge_init(struct hdmi *hdmi)
+@@ -283,11 +344,17 @@ struct drm_bridge *msm_hdmi_bridge_init(struct hdmi *hdmi)
+ }
+
+ hdmi_bridge->hdmi = hdmi;
++ INIT_WORK(&hdmi_bridge->hpd_work, msm_hdmi_hotplug_work);
+
+ bridge = &hdmi_bridge->base;
+ bridge->funcs = &msm_hdmi_bridge_funcs;
++ bridge->ddc = hdmi->i2c;
++ bridge->type = DRM_MODE_CONNECTOR_HDMIA;
++ bridge->ops = DRM_BRIDGE_OP_HPD |
++ DRM_BRIDGE_OP_DETECT |
++ DRM_BRIDGE_OP_EDID;
+
+- ret = drm_bridge_attach(hdmi->encoder, bridge, NULL, 0);
++ ret = drm_bridge_attach(hdmi->encoder, bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+ if (ret)
+ goto fail;
+
+diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+deleted file mode 100644
+index 58707a1f3878f..0000000000000
+--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
++++ /dev/null
+@@ -1,451 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Copyright (C) 2013 Red Hat
+- * Author: Rob Clark <robdclark@gmail.com>
+- */
+-
+-#include <linux/delay.h>
+-#include <linux/gpio/consumer.h>
+-#include <linux/pinctrl/consumer.h>
+-
+-#include "msm_kms.h"
+-#include "hdmi.h"
+-
+-struct hdmi_connector {
+- struct drm_connector base;
+- struct hdmi *hdmi;
+- struct work_struct hpd_work;
+-};
+-#define to_hdmi_connector(x) container_of(x, struct hdmi_connector, base)
+-
+-static void msm_hdmi_phy_reset(struct hdmi *hdmi)
+-{
+- unsigned int val;
+-
+- val = hdmi_read(hdmi, REG_HDMI_PHY_CTRL);
+-
+- if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
+- /* pull low */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val & ~HDMI_PHY_CTRL_SW_RESET);
+- } else {
+- /* pull high */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val | HDMI_PHY_CTRL_SW_RESET);
+- }
+-
+- if (val & HDMI_PHY_CTRL_SW_RESET_PLL_LOW) {
+- /* pull low */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val & ~HDMI_PHY_CTRL_SW_RESET_PLL);
+- } else {
+- /* pull high */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val | HDMI_PHY_CTRL_SW_RESET_PLL);
+- }
+-
+- msleep(100);
+-
+- if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
+- /* pull high */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val | HDMI_PHY_CTRL_SW_RESET);
+- } else {
+- /* pull low */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val & ~HDMI_PHY_CTRL_SW_RESET);
+- }
+-
+- if (val & HDMI_PHY_CTRL_SW_RESET_PLL_LOW) {
+- /* pull high */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val | HDMI_PHY_CTRL_SW_RESET_PLL);
+- } else {
+- /* pull low */
+- hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+- val & ~HDMI_PHY_CTRL_SW_RESET_PLL);
+- }
+-}
+-
+-static int gpio_config(struct hdmi *hdmi, bool on)
+-{
+- const struct hdmi_platform_config *config = hdmi->config;
+- int i;
+-
+- if (on) {
+- for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
+- struct hdmi_gpio_data gpio = config->gpios[i];
+-
+- if (gpio.gpiod) {
+- if (gpio.output) {
+- gpiod_direction_output(gpio.gpiod,
+- gpio.value);
+- } else {
+- gpiod_direction_input(gpio.gpiod);
+- gpiod_set_value_cansleep(gpio.gpiod,
+- gpio.value);
+- }
+- }
+- }
+-
+- DBG("gpio on");
+- } else {
+- for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
+- struct hdmi_gpio_data gpio = config->gpios[i];
+-
+- if (!gpio.gpiod)
+- continue;
+-
+- if (gpio.output) {
+- int value = gpio.value ? 0 : 1;
+-
+- gpiod_set_value_cansleep(gpio.gpiod, value);
+- }
+- }
+-
+- DBG("gpio off");
+- }
+-
+- return 0;
+-}
+-
+-static void enable_hpd_clocks(struct hdmi *hdmi, bool enable)
+-{
+- const struct hdmi_platform_config *config = hdmi->config;
+- struct device *dev = &hdmi->pdev->dev;
+- int i, ret;
+-
+- if (enable) {
+- for (i = 0; i < config->hpd_clk_cnt; i++) {
+- if (config->hpd_freq && config->hpd_freq[i]) {
+- ret = clk_set_rate(hdmi->hpd_clks[i],
+- config->hpd_freq[i]);
+- if (ret)
+- dev_warn(dev,
+- "failed to set clk %s (%d)\n",
+- config->hpd_clk_names[i], ret);
+- }
+-
+- ret = clk_prepare_enable(hdmi->hpd_clks[i]);
+- if (ret) {
+- DRM_DEV_ERROR(dev,
+- "failed to enable hpd clk: %s (%d)\n",
+- config->hpd_clk_names[i], ret);
+- }
+- }
+- } else {
+- for (i = config->hpd_clk_cnt - 1; i >= 0; i--)
+- clk_disable_unprepare(hdmi->hpd_clks[i]);
+- }
+-}
+-
+-int msm_hdmi_hpd_enable(struct drm_connector *connector)
+-{
+- struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+- struct hdmi *hdmi = hdmi_connector->hdmi;
+- const struct hdmi_platform_config *config = hdmi->config;
+- struct device *dev = &hdmi->pdev->dev;
+- uint32_t hpd_ctrl;
+- int i, ret;
+- unsigned long flags;
+-
+- for (i = 0; i < config->hpd_reg_cnt; i++) {
+- ret = regulator_enable(hdmi->hpd_regs[i]);
+- if (ret) {
+- DRM_DEV_ERROR(dev, "failed to enable hpd regulator: %s (%d)\n",
+- config->hpd_reg_names[i], ret);
+- goto fail;
+- }
+- }
+-
+- ret = pinctrl_pm_select_default_state(dev);
+- if (ret) {
+- DRM_DEV_ERROR(dev, "pinctrl state chg failed: %d\n", ret);
+- goto fail;
+- }
+-
+- ret = gpio_config(hdmi, true);
+- if (ret) {
+- DRM_DEV_ERROR(dev, "failed to configure GPIOs: %d\n", ret);
+- goto fail;
+- }
+-
+- pm_runtime_get_sync(dev);
+- enable_hpd_clocks(hdmi, true);
+-
+- msm_hdmi_set_mode(hdmi, false);
+- msm_hdmi_phy_reset(hdmi);
+- msm_hdmi_set_mode(hdmi, true);
+-
+- hdmi_write(hdmi, REG_HDMI_USEC_REFTIMER, 0x0001001b);
+-
+- /* enable HPD events: */
+- hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL,
+- HDMI_HPD_INT_CTRL_INT_CONNECT |
+- HDMI_HPD_INT_CTRL_INT_EN);
+-
+- /* set timeout to 4.1ms (max) for hardware debounce */
+- spin_lock_irqsave(&hdmi->reg_lock, flags);
+- hpd_ctrl = hdmi_read(hdmi, REG_HDMI_HPD_CTRL);
+- hpd_ctrl |= HDMI_HPD_CTRL_TIMEOUT(0x1fff);
+-
+- /* Toggle HPD circuit to trigger HPD sense */
+- hdmi_write(hdmi, REG_HDMI_HPD_CTRL,
+- ~HDMI_HPD_CTRL_ENABLE & hpd_ctrl);
+- hdmi_write(hdmi, REG_HDMI_HPD_CTRL,
+- HDMI_HPD_CTRL_ENABLE | hpd_ctrl);
+- spin_unlock_irqrestore(&hdmi->reg_lock, flags);
+-
+- return 0;
+-
+-fail:
+- return ret;
+-}
+-
+-static void hdp_disable(struct hdmi_connector *hdmi_connector)
+-{
+- struct hdmi *hdmi = hdmi_connector->hdmi;
+- const struct hdmi_platform_config *config = hdmi->config;
+- struct device *dev = &hdmi->pdev->dev;
+- int i, ret = 0;
+-
+- /* Disable HPD interrupt */
+- hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, 0);
+-
+- msm_hdmi_set_mode(hdmi, false);
+-
+- enable_hpd_clocks(hdmi, false);
+- pm_runtime_put_autosuspend(dev);
+-
+- ret = gpio_config(hdmi, false);
+- if (ret)
+- dev_warn(dev, "failed to unconfigure GPIOs: %d\n", ret);
+-
+- ret = pinctrl_pm_select_sleep_state(dev);
+- if (ret)
+- dev_warn(dev, "pinctrl state chg failed: %d\n", ret);
+-
+- for (i = 0; i < config->hpd_reg_cnt; i++) {
+- ret = regulator_disable(hdmi->hpd_regs[i]);
+- if (ret)
+- dev_warn(dev, "failed to disable hpd regulator: %s (%d)\n",
+- config->hpd_reg_names[i], ret);
+- }
+-}
+-
+-static void
+-msm_hdmi_hotplug_work(struct work_struct *work)
+-{
+- struct hdmi_connector *hdmi_connector =
+- container_of(work, struct hdmi_connector, hpd_work);
+- struct drm_connector *connector = &hdmi_connector->base;
+- drm_helper_hpd_irq_event(connector->dev);
+-}
+-
+-void msm_hdmi_connector_irq(struct drm_connector *connector)
+-{
+- struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+- struct hdmi *hdmi = hdmi_connector->hdmi;
+- uint32_t hpd_int_status, hpd_int_ctrl;
+-
+- /* Process HPD: */
+- hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
+- hpd_int_ctrl = hdmi_read(hdmi, REG_HDMI_HPD_INT_CTRL);
+-
+- if ((hpd_int_ctrl & HDMI_HPD_INT_CTRL_INT_EN) &&
+- (hpd_int_status & HDMI_HPD_INT_STATUS_INT)) {
+- bool detected = !!(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED);
+-
+- /* ack & disable (temporarily) HPD events: */
+- hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL,
+- HDMI_HPD_INT_CTRL_INT_ACK);
+-
+- DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl);
+-
+- /* detect disconnect if we are connected or visa versa: */
+- hpd_int_ctrl = HDMI_HPD_INT_CTRL_INT_EN;
+- if (!detected)
+- hpd_int_ctrl |= HDMI_HPD_INT_CTRL_INT_CONNECT;
+- hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, hpd_int_ctrl);
+-
+- queue_work(hdmi->workq, &hdmi_connector->hpd_work);
+- }
+-}
+-
+-static enum drm_connector_status detect_reg(struct hdmi *hdmi)
+-{
+- uint32_t hpd_int_status;
+-
+- pm_runtime_get_sync(&hdmi->pdev->dev);
+- enable_hpd_clocks(hdmi, true);
+-
+- hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
+-
+- enable_hpd_clocks(hdmi, false);
+- pm_runtime_put_autosuspend(&hdmi->pdev->dev);
+-
+- return (hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED) ?
+- connector_status_connected : connector_status_disconnected;
+-}
+-
+-#define HPD_GPIO_INDEX 2
+-static enum drm_connector_status detect_gpio(struct hdmi *hdmi)
+-{
+- const struct hdmi_platform_config *config = hdmi->config;
+- struct hdmi_gpio_data hpd_gpio = config->gpios[HPD_GPIO_INDEX];
+-
+- return gpiod_get_value(hpd_gpio.gpiod) ?
+- connector_status_connected :
+- connector_status_disconnected;
+-}
+-
+-static enum drm_connector_status hdmi_connector_detect(
+- struct drm_connector *connector, bool force)
+-{
+- struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+- struct hdmi *hdmi = hdmi_connector->hdmi;
+- const struct hdmi_platform_config *config = hdmi->config;
+- struct hdmi_gpio_data hpd_gpio = config->gpios[HPD_GPIO_INDEX];
+- enum drm_connector_status stat_gpio, stat_reg;
+- int retry = 20;
+-
+- /*
+- * some platforms may not have hpd gpio. Rely only on the status
+- * provided by REG_HDMI_HPD_INT_STATUS in this case.
+- */
+- if (!hpd_gpio.gpiod)
+- return detect_reg(hdmi);
+-
+- do {
+- stat_gpio = detect_gpio(hdmi);
+- stat_reg = detect_reg(hdmi);
+-
+- if (stat_gpio == stat_reg)
+- break;
+-
+- mdelay(10);
+- } while (--retry);
+-
+- /* the status we get from reading gpio seems to be more reliable,
+- * so trust that one the most if we didn't manage to get hdmi and
+- * gpio status to agree:
+- */
+- if (stat_gpio != stat_reg) {
+- DBG("HDMI_HPD_INT_STATUS tells us: %d", stat_reg);
+- DBG("hpd gpio tells us: %d", stat_gpio);
+- }
+-
+- return stat_gpio;
+-}
+-
+-static void hdmi_connector_destroy(struct drm_connector *connector)
+-{
+- struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+-
+- hdp_disable(hdmi_connector);
+-
+- drm_connector_cleanup(connector);
+-
+- kfree(hdmi_connector);
+-}
+-
+-static int msm_hdmi_connector_get_modes(struct drm_connector *connector)
+-{
+- struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+- struct hdmi *hdmi = hdmi_connector->hdmi;
+- struct edid *edid;
+- uint32_t hdmi_ctrl;
+- int ret = 0;
+-
+- hdmi_ctrl = hdmi_read(hdmi, REG_HDMI_CTRL);
+- hdmi_write(hdmi, REG_HDMI_CTRL, hdmi_ctrl | HDMI_CTRL_ENABLE);
+-
+- edid = drm_get_edid(connector, hdmi->i2c);
+-
+- hdmi_write(hdmi, REG_HDMI_CTRL, hdmi_ctrl);
+-
+- hdmi->hdmi_mode = drm_detect_hdmi_monitor(edid);
+- drm_connector_update_edid_property(connector, edid);
+-
+- if (edid) {
+- ret = drm_add_edid_modes(connector, edid);
+- kfree(edid);
+- }
+-
+- return ret;
+-}
+-
+-static int msm_hdmi_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
+-{
+- struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+- struct hdmi *hdmi = hdmi_connector->hdmi;
+- const struct hdmi_platform_config *config = hdmi->config;
+- struct msm_drm_private *priv = connector->dev->dev_private;
+- struct msm_kms *kms = priv->kms;
+- long actual, requested;
+-
+- requested = 1000 * mode->clock;
+- actual = kms->funcs->round_pixclk(kms,
+- requested, hdmi_connector->hdmi->encoder);
+-
+- /* for mdp5/apq8074, we manage our own pixel clk (as opposed to
+- * mdp4/dtv stuff where pixel clk is assigned to mdp/encoder
+- * instead):
+- */
+- if (config->pwr_clk_cnt > 0)
+- actual = clk_round_rate(hdmi->pwr_clks[0], actual);
+-
+- DBG("requested=%ld, actual=%ld", requested, actual);
+-
+- if (actual != requested)
+- return MODE_CLOCK_RANGE;
+-
+- return 0;
+-}
+-
+-static const struct drm_connector_funcs hdmi_connector_funcs = {
+- .detect = hdmi_connector_detect,
+- .fill_modes = drm_helper_probe_single_connector_modes,
+- .destroy = hdmi_connector_destroy,
+- .reset = drm_atomic_helper_connector_reset,
+- .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+- .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+-};
+-
+-static const struct drm_connector_helper_funcs msm_hdmi_connector_helper_funcs = {
+- .get_modes = msm_hdmi_connector_get_modes,
+- .mode_valid = msm_hdmi_connector_mode_valid,
+-};
+-
+-/* initialize connector */
+-struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi)
+-{
+- struct drm_connector *connector = NULL;
+- struct hdmi_connector *hdmi_connector;
+-
+- hdmi_connector = kzalloc(sizeof(*hdmi_connector), GFP_KERNEL);
+- if (!hdmi_connector)
+- return ERR_PTR(-ENOMEM);
+-
+- hdmi_connector->hdmi = hdmi;
+- INIT_WORK(&hdmi_connector->hpd_work, msm_hdmi_hotplug_work);
+-
+- connector = &hdmi_connector->base;
+-
+- drm_connector_init_with_ddc(hdmi->dev, connector,
+- &hdmi_connector_funcs,
+- DRM_MODE_CONNECTOR_HDMIA,
+- hdmi->i2c);
+- drm_connector_helper_add(connector, &msm_hdmi_connector_helper_funcs);
+-
+- connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+- DRM_CONNECTOR_POLL_DISCONNECT;
+-
+- connector->interlace_allowed = 0;
+- connector->doublescan_allowed = 0;
+-
+- drm_connector_attach_encoder(connector, hdmi->encoder);
+-
+- return connector;
+-}
+diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c
+new file mode 100644
+index 0000000000000..52ebe562ca9be
+--- /dev/null
++++ b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c
+@@ -0,0 +1,269 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (C) 2013 Red Hat
++ * Author: Rob Clark <robdclark@gmail.com>
++ */
++
++#include <linux/delay.h>
++#include <linux/gpio/consumer.h>
++#include <linux/pinctrl/consumer.h>
++
++#include "msm_kms.h"
++#include "hdmi.h"
++
++static void msm_hdmi_phy_reset(struct hdmi *hdmi)
++{
++ unsigned int val;
++
++ val = hdmi_read(hdmi, REG_HDMI_PHY_CTRL);
++
++ if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
++ /* pull low */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val & ~HDMI_PHY_CTRL_SW_RESET);
++ } else {
++ /* pull high */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val | HDMI_PHY_CTRL_SW_RESET);
++ }
++
++ if (val & HDMI_PHY_CTRL_SW_RESET_PLL_LOW) {
++ /* pull low */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val & ~HDMI_PHY_CTRL_SW_RESET_PLL);
++ } else {
++ /* pull high */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val | HDMI_PHY_CTRL_SW_RESET_PLL);
++ }
++
++ msleep(100);
++
++ if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
++ /* pull high */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val | HDMI_PHY_CTRL_SW_RESET);
++ } else {
++ /* pull low */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val & ~HDMI_PHY_CTRL_SW_RESET);
++ }
++
++ if (val & HDMI_PHY_CTRL_SW_RESET_PLL_LOW) {
++ /* pull high */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val | HDMI_PHY_CTRL_SW_RESET_PLL);
++ } else {
++ /* pull low */
++ hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
++ val & ~HDMI_PHY_CTRL_SW_RESET_PLL);
++ }
++}
++
++static void enable_hpd_clocks(struct hdmi *hdmi, bool enable)
++{
++ const struct hdmi_platform_config *config = hdmi->config;
++ struct device *dev = &hdmi->pdev->dev;
++ int i, ret;
++
++ if (enable) {
++ for (i = 0; i < config->hpd_clk_cnt; i++) {
++ if (config->hpd_freq && config->hpd_freq[i]) {
++ ret = clk_set_rate(hdmi->hpd_clks[i],
++ config->hpd_freq[i]);
++ if (ret)
++ dev_warn(dev,
++ "failed to set clk %s (%d)\n",
++ config->hpd_clk_names[i], ret);
++ }
++
++ ret = clk_prepare_enable(hdmi->hpd_clks[i]);
++ if (ret) {
++ DRM_DEV_ERROR(dev,
++ "failed to enable hpd clk: %s (%d)\n",
++ config->hpd_clk_names[i], ret);
++ }
++ }
++ } else {
++ for (i = config->hpd_clk_cnt - 1; i >= 0; i--)
++ clk_disable_unprepare(hdmi->hpd_clks[i]);
++ }
++}
++
++int msm_hdmi_hpd_enable(struct drm_bridge *bridge)
++{
++ struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
++ struct hdmi *hdmi = hdmi_bridge->hdmi;
++ const struct hdmi_platform_config *config = hdmi->config;
++ struct device *dev = &hdmi->pdev->dev;
++ uint32_t hpd_ctrl;
++ int i, ret;
++ unsigned long flags;
++
++ for (i = 0; i < config->hpd_reg_cnt; i++) {
++ ret = regulator_enable(hdmi->hpd_regs[i]);
++ if (ret) {
++ DRM_DEV_ERROR(dev, "failed to enable hpd regulator: %s (%d)\n",
++ config->hpd_reg_names[i], ret);
++ goto fail;
++ }
++ }
++
++ ret = pinctrl_pm_select_default_state(dev);
++ if (ret) {
++ DRM_DEV_ERROR(dev, "pinctrl state chg failed: %d\n", ret);
++ goto fail;
++ }
++
++ if (hdmi->hpd_gpiod)
++ gpiod_set_value_cansleep(hdmi->hpd_gpiod, 1);
++
++ pm_runtime_get_sync(dev);
++ enable_hpd_clocks(hdmi, true);
++
++ msm_hdmi_set_mode(hdmi, false);
++ msm_hdmi_phy_reset(hdmi);
++ msm_hdmi_set_mode(hdmi, true);
++
++ hdmi_write(hdmi, REG_HDMI_USEC_REFTIMER, 0x0001001b);
++
++ /* enable HPD events: */
++ hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL,
++ HDMI_HPD_INT_CTRL_INT_CONNECT |
++ HDMI_HPD_INT_CTRL_INT_EN);
++
++ /* set timeout to 4.1ms (max) for hardware debounce */
++ spin_lock_irqsave(&hdmi->reg_lock, flags);
++ hpd_ctrl = hdmi_read(hdmi, REG_HDMI_HPD_CTRL);
++ hpd_ctrl |= HDMI_HPD_CTRL_TIMEOUT(0x1fff);
++
++ /* Toggle HPD circuit to trigger HPD sense */
++ hdmi_write(hdmi, REG_HDMI_HPD_CTRL,
++ ~HDMI_HPD_CTRL_ENABLE & hpd_ctrl);
++ hdmi_write(hdmi, REG_HDMI_HPD_CTRL,
++ HDMI_HPD_CTRL_ENABLE | hpd_ctrl);
++ spin_unlock_irqrestore(&hdmi->reg_lock, flags);
++
++ return 0;
++
++fail:
++ return ret;
++}
++
++void msm_hdmi_hpd_disable(struct hdmi_bridge *hdmi_bridge)
++{
++ struct hdmi *hdmi = hdmi_bridge->hdmi;
++ const struct hdmi_platform_config *config = hdmi->config;
++ struct device *dev = &hdmi->pdev->dev;
++ int i, ret = 0;
++
++ /* Disable HPD interrupt */
++ hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, 0);
++
++ msm_hdmi_set_mode(hdmi, false);
++
++ enable_hpd_clocks(hdmi, false);
++ pm_runtime_put_autosuspend(dev);
++
++ ret = pinctrl_pm_select_sleep_state(dev);
++ if (ret)
++ dev_warn(dev, "pinctrl state chg failed: %d\n", ret);
++
++ for (i = 0; i < config->hpd_reg_cnt; i++) {
++ ret = regulator_disable(hdmi->hpd_regs[i]);
++ if (ret)
++ dev_warn(dev, "failed to disable hpd regulator: %s (%d)\n",
++ config->hpd_reg_names[i], ret);
++ }
++}
++
++void msm_hdmi_hpd_irq(struct drm_bridge *bridge)
++{
++ struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
++ struct hdmi *hdmi = hdmi_bridge->hdmi;
++ uint32_t hpd_int_status, hpd_int_ctrl;
++
++ /* Process HPD: */
++ hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
++ hpd_int_ctrl = hdmi_read(hdmi, REG_HDMI_HPD_INT_CTRL);
++
++ if ((hpd_int_ctrl & HDMI_HPD_INT_CTRL_INT_EN) &&
++ (hpd_int_status & HDMI_HPD_INT_STATUS_INT)) {
++ bool detected = !!(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED);
++
++ /* ack & disable (temporarily) HPD events: */
++ hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL,
++ HDMI_HPD_INT_CTRL_INT_ACK);
++
++ DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl);
++
++ /* detect disconnect if we are connected or visa versa: */
++ hpd_int_ctrl = HDMI_HPD_INT_CTRL_INT_EN;
++ if (!detected)
++ hpd_int_ctrl |= HDMI_HPD_INT_CTRL_INT_CONNECT;
++ hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, hpd_int_ctrl);
++
++ queue_work(hdmi->workq, &hdmi_bridge->hpd_work);
++ }
++}
++
++static enum drm_connector_status detect_reg(struct hdmi *hdmi)
++{
++ uint32_t hpd_int_status;
++
++ pm_runtime_get_sync(&hdmi->pdev->dev);
++ enable_hpd_clocks(hdmi, true);
++
++ hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
++
++ enable_hpd_clocks(hdmi, false);
++ pm_runtime_put_autosuspend(&hdmi->pdev->dev);
++
++ return (hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED) ?
++ connector_status_connected : connector_status_disconnected;
++}
++
++#define HPD_GPIO_INDEX 2
++static enum drm_connector_status detect_gpio(struct hdmi *hdmi)
++{
++ return gpiod_get_value(hdmi->hpd_gpiod) ?
++ connector_status_connected :
++ connector_status_disconnected;
++}
++
++enum drm_connector_status msm_hdmi_bridge_detect(
++ struct drm_bridge *bridge)
++{
++ struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
++ struct hdmi *hdmi = hdmi_bridge->hdmi;
++ enum drm_connector_status stat_gpio, stat_reg;
++ int retry = 20;
++
++ /*
++ * some platforms may not have hpd gpio. Rely only on the status
++ * provided by REG_HDMI_HPD_INT_STATUS in this case.
++ */
++ if (!hdmi->hpd_gpiod)
++ return detect_reg(hdmi);
++
++ do {
++ stat_gpio = detect_gpio(hdmi);
++ stat_reg = detect_reg(hdmi);
++
++ if (stat_gpio == stat_reg)
++ break;
++
++ mdelay(10);
++ } while (--retry);
++
++ /* the status we get from reading gpio seems to be more reliable,
++ * so trust that one the most if we didn't manage to get hdmi and
++ * gpio status to agree:
++ */
++ if (stat_gpio != stat_reg) {
++ DBG("HDMI_HPD_INT_STATUS tells us: %d", stat_reg);
++ DBG("hpd gpio tells us: %d", stat_gpio);
++ }
++
++ return stat_gpio;
++}
+diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
+index fab09e7c6efc3..458f4e4316dd4 100644
+--- a/drivers/gpu/drm/msm/msm_atomic.c
++++ b/drivers/gpu/drm/msm/msm_atomic.c
+@@ -5,7 +5,6 @@
+ */
+
+ #include <drm/drm_atomic_uapi.h>
+-#include <drm/drm_gem_atomic_helper.h>
+ #include <drm/drm_vblank.h>
+
+ #include "msm_atomic_trace.h"
+@@ -13,20 +12,6 @@
+ #include "msm_gem.h"
+ #include "msm_kms.h"
+
+-int msm_atomic_prepare_fb(struct drm_plane *plane,
+- struct drm_plane_state *new_state)
+-{
+- struct msm_drm_private *priv = plane->dev->dev_private;
+- struct msm_kms *kms = priv->kms;
+-
+- if (!new_state->fb)
+- return 0;
+-
+- drm_gem_plane_helper_prepare_fb(plane, new_state);
+-
+- return msm_framebuffer_prepare(new_state->fb, kms->aspace);
+-}
+-
+ /*
+ * Helpers to control vblanks while we flush.. basically just to ensure
+ * that vblank accounting is switched on, so we get valid seqn/timestamp
+diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
+index 09d2d279c30ae..f970a14b66336 100644
+--- a/drivers/gpu/drm/msm/msm_debugfs.c
++++ b/drivers/gpu/drm/msm/msm_debugfs.c
+@@ -29,14 +29,14 @@ static int msm_gpu_show(struct seq_file *m, void *arg)
+ struct msm_gpu *gpu = priv->gpu;
+ int ret;
+
+- ret = mutex_lock_interruptible(&show_priv->dev->struct_mutex);
++ ret = mutex_lock_interruptible(&gpu->lock);
+ if (ret)
+ return ret;
+
+ drm_printf(&p, "%s Status:\n", gpu->name);
+ gpu->funcs->show(gpu, show_priv->state, &p);
+
+- mutex_unlock(&show_priv->dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+
+ return 0;
+ }
+@@ -48,9 +48,9 @@ static int msm_gpu_release(struct inode *inode, struct file *file)
+ struct msm_drm_private *priv = show_priv->dev->dev_private;
+ struct msm_gpu *gpu = priv->gpu;
+
+- mutex_lock(&show_priv->dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+ gpu->funcs->gpu_state_put(show_priv->state);
+- mutex_unlock(&show_priv->dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+
+ kfree(show_priv);
+
+@@ -72,15 +72,16 @@ static int msm_gpu_open(struct inode *inode, struct file *file)
+ if (!show_priv)
+ return -ENOMEM;
+
+- ret = mutex_lock_interruptible(&dev->struct_mutex);
++ ret = mutex_lock_interruptible(&gpu->lock);
+ if (ret)
+ goto free_priv;
+
+ pm_runtime_get_sync(&gpu->pdev->dev);
++ msm_gpu_hw_init(gpu);
+ show_priv->state = gpu->funcs->gpu_state_get(gpu);
+ pm_runtime_put_sync(&gpu->pdev->dev);
+
+- mutex_unlock(&dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+
+ if (IS_ERR(show_priv->state)) {
+ ret = PTR_ERR(show_priv->state);
+diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
+index d4e09703a87db..e238d2beb7abe 100644
+--- a/drivers/gpu/drm/msm/msm_drv.c
++++ b/drivers/gpu/drm/msm/msm_drv.c
+@@ -11,6 +11,7 @@
+ #include <linux/uaccess.h>
+ #include <uapi/linux/sched/types.h>
+
++#include <drm/drm_bridge.h>
+ #include <drm/drm_drv.h>
+ #include <drm/drm_file.h>
+ #include <drm/drm_ioctl.h>
+@@ -236,6 +237,8 @@ static int msm_irq_postinstall(struct drm_device *dev)
+
+ static int msm_irq_install(struct drm_device *dev, unsigned int irq)
+ {
++ struct msm_drm_private *priv = dev->dev_private;
++ struct msm_kms *kms = priv->kms;
+ int ret;
+
+ if (irq == IRQ_NOTCONNECTED)
+@@ -247,6 +250,8 @@ static int msm_irq_install(struct drm_device *dev, unsigned int irq)
+ if (ret)
+ return ret;
+
++ kms->irq_requested = true;
++
+ ret = msm_irq_postinstall(dev);
+ if (ret) {
+ free_irq(irq, dev);
+@@ -262,7 +267,8 @@ static void msm_irq_uninstall(struct drm_device *dev)
+ struct msm_kms *kms = priv->kms;
+
+ kms->funcs->irq_uninstall(kms);
+- free_irq(kms->irq, dev);
++ if (kms->irq_requested)
++ free_irq(kms->irq, dev);
+ }
+
+ struct msm_vblank_work {
+@@ -353,13 +359,16 @@ static int msm_drm_uninit(struct device *dev)
+ msm_fbdev_free(ddev);
+ #endif
+
+- msm_disp_snapshot_destroy(ddev);
++ if (kms)
++ msm_disp_snapshot_destroy(ddev);
+
+ drm_mode_config_cleanup(ddev);
+
+- pm_runtime_get_sync(dev);
+- msm_irq_uninstall(ddev);
+- pm_runtime_put_sync(dev);
++ if (kms) {
++ pm_runtime_get_sync(dev);
++ msm_irq_uninstall(ddev);
++ pm_runtime_put_sync(dev);
++ }
+
+ if (kms && kms->funcs)
+ kms->funcs->destroy(kms);
+@@ -437,7 +446,7 @@ static int msm_init_vram(struct drm_device *dev)
+ of_node_put(node);
+ if (ret)
+ return ret;
+- size = r.end - r.start;
++ size = r.end - r.start + 1;
+ DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start);
+
+ /* if we have no IOMMU, then we need to use carveout allocator.
+@@ -603,7 +612,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
+ if (IS_ERR(priv->event_thread[i].worker)) {
+ ret = PTR_ERR(priv->event_thread[i].worker);
+ DRM_DEV_ERROR(dev, "failed to create crtc_event kthread\n");
+- ret = PTR_ERR(priv->event_thread[i].worker);
++ priv->event_thread[i].worker = NULL;
+ goto err_msm_uninit;
+ }
+
+@@ -938,29 +947,18 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
+ return ret;
+ }
+
+-static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+- struct drm_file *file)
++static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
++ ktime_t timeout)
+ {
+- struct msm_drm_private *priv = dev->dev_private;
+- struct drm_msm_wait_fence *args = data;
+- ktime_t timeout = to_ktime(args->timeout);
+- struct msm_gpu_submitqueue *queue;
+- struct msm_gpu *gpu = priv->gpu;
+ struct dma_fence *fence;
+ int ret;
+
+- if (args->pad) {
+- DRM_ERROR("invalid pad: %08x\n", args->pad);
++ if (fence_id > queue->last_fence) {
++ DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n",
++ fence_id, queue->last_fence);
+ return -EINVAL;
+ }
+
+- if (!gpu)
+- return 0;
+-
+- queue = msm_submitqueue_get(file->driver_priv, args->queueid);
+- if (!queue)
+- return -ENOENT;
+-
+ /*
+ * Map submitqueue scoped "seqno" (which is actually an idr key)
+ * back to underlying dma-fence
+@@ -972,7 +970,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+ ret = mutex_lock_interruptible(&queue->lock);
+ if (ret)
+ return ret;
+- fence = idr_find(&queue->fence_idr, args->fence);
++ fence = idr_find(&queue->fence_idr, fence_id);
+ if (fence)
+ fence = dma_fence_get_rcu(fence);
+ mutex_unlock(&queue->lock);
+@@ -988,6 +986,32 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+ }
+
+ dma_fence_put(fence);
++
++ return ret;
++}
++
++static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
++ struct drm_file *file)
++{
++ struct msm_drm_private *priv = dev->dev_private;
++ struct drm_msm_wait_fence *args = data;
++ struct msm_gpu_submitqueue *queue;
++ int ret;
++
++ if (args->pad) {
++ DRM_ERROR("invalid pad: %08x\n", args->pad);
++ return -EINVAL;
++ }
++
++ if (!priv->gpu)
++ return 0;
++
++ queue = msm_submitqueue_get(file->driver_priv, args->queueid);
++ if (!queue)
++ return -ENOENT;
++
++ ret = wait_fence(queue, args->fence, to_ktime(args->timeout));
++
+ msm_submitqueue_put(queue);
+
+ return ret;
+@@ -1081,7 +1105,7 @@ static const struct drm_driver msm_driver = {
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ .gem_prime_import_sg_table = msm_gem_prime_import_sg_table,
+- .gem_prime_mmap = drm_gem_prime_mmap,
++ .gem_prime_mmap = msm_gem_prime_mmap,
+ #ifdef CONFIG_DEBUG_FS
+ .debugfs_init = msm_debugfs_init,
+ #endif
+diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
+index c552f0c3890c1..8488e49817e1e 100644
+--- a/drivers/gpu/drm/msm/msm_drv.h
++++ b/drivers/gpu/drm/msm/msm_drv.h
+@@ -247,8 +247,6 @@ struct msm_format {
+
+ struct msm_pending_timer;
+
+-int msm_atomic_prepare_fb(struct drm_plane *plane,
+- struct drm_plane_state *new_state);
+ int msm_atomic_init_pending_timer(struct msm_pending_timer *timer,
+ struct msm_kms *kms, int crtc_idx);
+ void msm_atomic_destroy_pending_timer(struct msm_pending_timer *timer);
+@@ -298,6 +296,7 @@ unsigned long msm_gem_shrinker_shrink(struct drm_device *dev, unsigned long nr_t
+ void msm_gem_shrinker_init(struct drm_device *dev);
+ void msm_gem_shrinker_cleanup(struct drm_device *dev);
+
++int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj);
+ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
+ void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map);
+@@ -307,9 +306,9 @@ int msm_gem_prime_pin(struct drm_gem_object *obj);
+ void msm_gem_prime_unpin(struct drm_gem_object *obj);
+
+ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
+- struct msm_gem_address_space *aspace);
++ struct msm_gem_address_space *aspace, bool needs_dirtyfb);
+ void msm_framebuffer_cleanup(struct drm_framebuffer *fb,
+- struct msm_gem_address_space *aspace);
++ struct msm_gem_address_space *aspace, bool needed_dirtyfb);
+ uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb,
+ struct msm_gem_address_space *aspace, int plane);
+ struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane);
+diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
+index 4d34df5354e07..7137492fe78e2 100644
+--- a/drivers/gpu/drm/msm/msm_fb.c
++++ b/drivers/gpu/drm/msm/msm_fb.c
+@@ -18,16 +18,36 @@
+ struct msm_framebuffer {
+ struct drm_framebuffer base;
+ const struct msm_format *format;
++
++ /* Count of # of attached planes which need dirtyfb: */
++ refcount_t dirtyfb;
+ };
+ #define to_msm_framebuffer(x) container_of(x, struct msm_framebuffer, base)
+
+ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
+ const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
+
++static int msm_framebuffer_dirtyfb(struct drm_framebuffer *fb,
++ struct drm_file *file_priv, unsigned int flags,
++ unsigned int color, struct drm_clip_rect *clips,
++ unsigned int num_clips)
++{
++ struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
++
++ /* If this fb is not used on any display requiring pixel data to be
++ * flushed, then skip dirtyfb
++ */
++ if (refcount_read(&msm_fb->dirtyfb) == 1)
++ return 0;
++
++ return drm_atomic_helper_dirtyfb(fb, file_priv, flags, color,
++ clips, num_clips);
++}
++
+ static const struct drm_framebuffer_funcs msm_framebuffer_funcs = {
+ .create_handle = drm_gem_fb_create_handle,
+ .destroy = drm_gem_fb_destroy,
+- .dirty = drm_atomic_helper_dirtyfb,
++ .dirty = msm_framebuffer_dirtyfb,
+ };
+
+ #ifdef CONFIG_DEBUG_FS
+@@ -48,17 +68,19 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m)
+ }
+ #endif
+
+-/* prepare/pin all the fb's bo's for scanout. Note that it is not valid
+- * to prepare an fb more multiple different initiator 'id's. But that
+- * should be fine, since only the scanout (mdpN) side of things needs
+- * this, the gpu doesn't care about fb's.
++/* prepare/pin all the fb's bo's for scanout.
+ */
+ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
+- struct msm_gem_address_space *aspace)
++ struct msm_gem_address_space *aspace,
++ bool needs_dirtyfb)
+ {
++ struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
+ int ret, i, n = fb->format->num_planes;
+ uint64_t iova;
+
++ if (needs_dirtyfb)
++ refcount_inc(&msm_fb->dirtyfb);
++
+ for (i = 0; i < n; i++) {
+ ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &iova);
+ drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)", fb->base.id, i, iova, ret);
+@@ -70,10 +92,15 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
+ }
+
+ void msm_framebuffer_cleanup(struct drm_framebuffer *fb,
+- struct msm_gem_address_space *aspace)
++ struct msm_gem_address_space *aspace,
++ bool needed_dirtyfb)
+ {
++ struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
+ int i, n = fb->format->num_planes;
+
++ if (needed_dirtyfb)
++ refcount_dec(&msm_fb->dirtyfb);
++
+ for (i = 0; i < n; i++)
+ msm_gem_unpin_iova(fb->obj[i], aspace);
+ }
+@@ -194,6 +221,8 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
+ goto fail;
+ }
+
++ refcount_set(&msm_fb->dirtyfb, 1);
++
+ drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb);
+
+ return fb;
+diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
+index f2cece542c3f7..76439678919c2 100644
+--- a/drivers/gpu/drm/msm/msm_fence.c
++++ b/drivers/gpu/drm/msm/msm_fence.c
+@@ -21,7 +21,7 @@ msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr,
+ return ERR_PTR(-ENOMEM);
+
+ fctx->dev = dev;
+- strncpy(fctx->name, name, sizeof(fctx->name));
++ strscpy(fctx->name, name, sizeof(fctx->name));
+ fctx->context = dma_fence_context_alloc(1);
+ fctx->fenceptr = fenceptr;
+ spin_lock_init(&fctx->spinlock);
+diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
+index 22308a1b66fc3..d280dd64744de 100644
+--- a/drivers/gpu/drm/msm/msm_gem.c
++++ b/drivers/gpu/drm/msm/msm_gem.c
+@@ -937,6 +937,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m,
+ get_pid_task(aspace->pid, PIDTYPE_PID);
+ if (task) {
+ comm = kstrdup(task->comm, GFP_KERNEL);
++ put_task_struct(task);
+ } else {
+ comm = NULL;
+ }
+@@ -1055,8 +1056,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct
+ {
+ struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+- vma->vm_flags &= ~VM_PFNMAP;
+- vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
++ vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_page_prot = msm_gem_pgprot(msm_obj, vm_get_page_prot(vma->vm_flags));
+
+ return 0;
+@@ -1132,6 +1132,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
+ msm_obj->flags = flags;
+ msm_obj->madv = MSM_MADV_WILLNEED;
+
++ INIT_LIST_HEAD(&msm_obj->node);
+ INIT_LIST_HEAD(&msm_obj->vmas);
+
+ *obj = &msm_obj->base;
+@@ -1166,7 +1167,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32
+
+ ret = msm_gem_new_impl(dev, size, flags, &obj);
+ if (ret)
+- goto fail;
++ return ERR_PTR(ret);
+
+ msm_obj = to_msm_bo(obj);
+
+@@ -1250,7 +1251,7 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
+
+ ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj);
+ if (ret)
+- goto fail;
++ return ERR_PTR(ret);
+
+ drm_gem_private_object_init(dev, obj, size);
+
+diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
+index fc94e061d6a7c..02c70a0b2a036 100644
+--- a/drivers/gpu/drm/msm/msm_gem_prime.c
++++ b/drivers/gpu/drm/msm/msm_gem_prime.c
+@@ -11,13 +11,28 @@
+ #include "msm_drv.h"
+ #include "msm_gem.h"
+
++int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
++{
++ int ret;
++
++ /* Ensure the mmap offset is initialized. We lazily initialize it,
++ * so if it has not been first mmap'd directly as a GEM object, the
++ * mmap offset will not be already initialized.
++ */
++ ret = drm_gem_create_mmap_offset(obj);
++ if (ret)
++ return ret;
++
++ return drm_gem_prime_mmap(obj, vma);
++}
++
+ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj)
+ {
+ struct msm_gem_object *msm_obj = to_msm_bo(obj);
+ int npages = obj->size >> PAGE_SHIFT;
+
+ if (WARN_ON(!msm_obj->pages)) /* should have already pinned! */
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+
+ return drm_prime_pages_to_sg(obj->dev, msm_obj->pages, npages);
+ }
+diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
+index 151d19e4453cd..fc2fb1019ea1c 100644
+--- a/drivers/gpu/drm/msm/msm_gem_submit.c
++++ b/drivers/gpu/drm/msm/msm_gem_submit.c
+@@ -220,6 +220,10 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
+ goto out;
+ }
+ submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL);
++ if (!submit->cmd[i].relocs) {
++ ret = -ENOMEM;
++ goto out;
++ }
+ ret = copy_from_user(submit->cmd[i].relocs, userptr, sz);
+ if (ret) {
+ ret = -EFAULT;
+@@ -636,8 +640,8 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
+ int ret = 0;
+ uint32_t i, j;
+
+- post_deps = kmalloc_array(nr_syncobjs, sizeof(*post_deps),
+- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
++ post_deps = kcalloc(nr_syncobjs, sizeof(*post_deps),
++ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+ if (!post_deps)
+ return ERR_PTR(-ENOMEM);
+
+@@ -652,7 +656,6 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
+ }
+
+ post_deps[i].point = syncobj_desc.point;
+- post_deps[i].chain = NULL;
+
+ if (syncobj_desc.flags) {
+ ret = -EINVAL;
+@@ -780,6 +783,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
+ args->nr_cmds);
+ if (IS_ERR(submit)) {
+ ret = PTR_ERR(submit);
++ submit = NULL;
+ goto out_unlock;
+ }
+
+@@ -886,9 +890,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
+ * to the underlying fence.
+ */
+ submit->fence_id = idr_alloc_cyclic(&queue->fence_idr,
+- submit->user_fence, 0, INT_MAX, GFP_KERNEL);
++ submit->user_fence, 1, INT_MAX, GFP_KERNEL);
+ if (submit->fence_id < 0) {
+- ret = submit->fence_id = 0;
++ ret = submit->fence_id;
+ submit->fence_id = 0;
+ goto out;
+ }
+@@ -911,6 +915,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
+ drm_sched_entity_push_job(&submit->base, queue->entity);
+
+ args->fence = submit->fence_id;
++ queue->last_fence = submit->fence_id;
+
+ msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
+ msm_process_post_deps(post_deps, args->nr_out_syncobjs,
+diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
+index 8a3a592da3a4d..a2f21b89d077c 100644
+--- a/drivers/gpu/drm/msm/msm_gpu.c
++++ b/drivers/gpu/drm/msm/msm_gpu.c
+@@ -150,7 +150,7 @@ int msm_gpu_hw_init(struct msm_gpu *gpu)
+ {
+ int ret;
+
+- WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex));
++ WARN_ON(!mutex_is_locked(&gpu->lock));
+
+ if (!gpu->needs_hw_init)
+ return 0;
+@@ -296,7 +296,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
+ state->bos = kcalloc(nr,
+ sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
+
+- for (i = 0; i < submit->nr_bos; i++) {
++ for (i = 0; state->bos && i < submit->nr_bos; i++) {
+ if (should_dump(submit, i)) {
+ msm_gpu_crashstate_get_bo(state, submit->bos[i].obj,
+ submit->bos[i].iova, submit->bos[i].flags);
+@@ -361,7 +361,7 @@ static void recover_worker(struct kthread_work *work)
+ char *comm = NULL, *cmd = NULL;
+ int i;
+
+- mutex_lock(&dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+
+ DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
+
+@@ -442,7 +442,7 @@ static void recover_worker(struct kthread_work *work)
+ }
+ }
+
+- mutex_unlock(&dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+
+ msm_gpu_retire(gpu);
+ }
+@@ -450,12 +450,11 @@ static void recover_worker(struct kthread_work *work)
+ static void fault_worker(struct kthread_work *work)
+ {
+ struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work);
+- struct drm_device *dev = gpu->dev;
+ struct msm_gem_submit *submit;
+ struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
+ char *comm = NULL, *cmd = NULL;
+
+- mutex_lock(&dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+
+ submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
+ if (submit && submit->fault_dumped)
+@@ -490,7 +489,7 @@ resume_smmu:
+ memset(&gpu->fault_info, 0, sizeof(gpu->fault_info));
+ gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
+
+- mutex_unlock(&dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+ }
+
+ static void hangcheck_timer_reset(struct msm_gpu *gpu)
+@@ -658,7 +657,6 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ msm_submit_retire(submit);
+
+ pm_runtime_mark_last_busy(&gpu->pdev->dev);
+- pm_runtime_put_autosuspend(&gpu->pdev->dev);
+
+ spin_lock_irqsave(&ring->submit_lock, flags);
+ list_del(&submit->node);
+@@ -672,6 +670,8 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ msm_devfreq_idle(gpu);
+ mutex_unlock(&gpu->active_lock);
+
++ pm_runtime_put_autosuspend(&gpu->pdev->dev);
++
+ msm_gem_submit_put(submit);
+ }
+
+@@ -733,7 +733,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
+ struct msm_ringbuffer *ring = submit->ring;
+ unsigned long flags;
+
+- WARN_ON(!mutex_is_locked(&dev->struct_mutex));
++ WARN_ON(!mutex_is_locked(&gpu->lock));
+
+ pm_runtime_get_sync(&gpu->pdev->dev);
+
+@@ -848,6 +848,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
+
+ INIT_LIST_HEAD(&gpu->active_list);
+ mutex_init(&gpu->active_lock);
++ mutex_init(&gpu->lock);
+ kthread_init_work(&gpu->retire_work, retire_worker);
+ kthread_init_work(&gpu->recover_work, recover_worker);
+ kthread_init_work(&gpu->fault_work, fault_worker);
+diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
+index ee25d556c8a10..461ff5a5aa5bb 100644
+--- a/drivers/gpu/drm/msm/msm_gpu.h
++++ b/drivers/gpu/drm/msm/msm_gpu.h
+@@ -143,13 +143,23 @@ struct msm_gpu {
+ */
+ struct list_head active_list;
+
++ /**
++ * lock:
++ *
++ * General lock for serializing all the gpu things.
++ *
++ * TODO move to per-ring locking where feasible (ie. submit/retire
++ * path, etc)
++ */
++ struct mutex lock;
++
+ /**
+ * active_submits:
+ *
+ * The number of submitted but not yet retired submits, used to
+ * determine transitions between active and idle.
+ *
+- * Protected by lock
++ * Protected by active_lock
+ */
+ int active_submits;
+
+@@ -352,6 +362,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
+ * @ring_nr: the ringbuffer used by this submitqueue, which is determined
+ * by the submitqueue's priority
+ * @faults: the number of GPU hangs associated with this submitqueue
++ * @last_fence: the sequence number of the last allocated fence (for error
++ * checking)
+ * @ctx: the per-drm_file context associated with the submitqueue (ie.
+ * which set of pgtables do submits jobs associated with the
+ * submitqueue use)
+@@ -367,6 +379,7 @@ struct msm_gpu_submitqueue {
+ u32 flags;
+ u32 ring_nr;
+ int faults;
++ uint32_t last_fence;
+ struct msm_file_private *ctx;
+ struct list_head node;
+ struct idr fence_idr;
+@@ -527,28 +540,28 @@ static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu)
+ {
+ struct msm_gpu_state *state = NULL;
+
+- mutex_lock(&gpu->dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+
+ if (gpu->crashstate) {
+ kref_get(&gpu->crashstate->ref);
+ state = gpu->crashstate;
+ }
+
+- mutex_unlock(&gpu->dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+
+ return state;
+ }
+
+ static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
+ {
+- mutex_lock(&gpu->dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+
+ if (gpu->crashstate) {
+ if (gpu->funcs->gpu_state_put(gpu->crashstate))
+ gpu->crashstate = NULL;
+ }
+
+- mutex_unlock(&gpu->dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+ }
+
+ /*
+diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+index 20006d060b5b5..4ac2a4eb984d8 100644
+--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
++++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+@@ -20,6 +20,10 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
+ struct msm_gpu *gpu = dev_to_gpu(dev);
+ struct dev_pm_opp *opp;
+
++ /*
++ * Note that devfreq_recommended_opp() can modify the freq
++ * to something that actually is in the opp table:
++ */
+ opp = devfreq_recommended_opp(dev, freq, flags);
+
+ /*
+@@ -28,6 +32,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
+ */
+ if (gpu->devfreq.idle_freq) {
+ gpu->devfreq.idle_freq = *freq;
++ dev_pm_opp_put(opp);
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
+index bcaddbba564df..ef4da3f0cd22d 100644
+--- a/drivers/gpu/drm/msm/msm_iommu.c
++++ b/drivers/gpu/drm/msm/msm_iommu.c
+@@ -58,7 +58,7 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
+ u64 addr = iova;
+ unsigned int i;
+
+- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
++ for_each_sgtable_sg(sgt, sg, i) {
+ size_t size = sg->length;
+ phys_addr_t phys = sg_phys(sg);
+
+@@ -157,7 +157,12 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
+ /* Get the pagetable configuration from the domain */
+ if (adreno_smmu->cookie)
+ ttbr1_cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie);
+- if (!ttbr1_cfg)
++
++ /*
++ * If you hit this WARN_ONCE() you are probably missing an entry in
++ * qcom_smmu_impl_of_match[] in arm-smmu-qcom.c
++ */
++ if (WARN_ONCE(!ttbr1_cfg, "No per-process page tables"))
+ return ERR_PTR(-ENODEV);
+
+ /*
+diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
+index de2bc3467bb53..afa30e2ba1f14 100644
+--- a/drivers/gpu/drm/msm/msm_kms.h
++++ b/drivers/gpu/drm/msm/msm_kms.h
+@@ -149,6 +149,7 @@ struct msm_kms {
+
+ /* irq number to be passed on to msm_irq_install */
+ int irq;
++ bool irq_requested;
+
+ /* mapper-id used to request GEM buffer mapped for scanout: */
+ struct msm_gem_address_space *aspace;
+diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
+index 3a27153eef084..3d3da79fec2aa 100644
+--- a/drivers/gpu/drm/msm/msm_perf.c
++++ b/drivers/gpu/drm/msm/msm_perf.c
+@@ -155,9 +155,12 @@ static int perf_open(struct inode *inode, struct file *file)
+ struct msm_gpu *gpu = priv->gpu;
+ int ret = 0;
+
+- mutex_lock(&dev->struct_mutex);
++ if (!gpu)
++ return -ENODEV;
+
+- if (perf->open || !gpu) {
++ mutex_lock(&gpu->lock);
++
++ if (perf->open) {
+ ret = -EBUSY;
+ goto out;
+ }
+@@ -171,7 +174,7 @@ static int perf_open(struct inode *inode, struct file *file)
+ perf->next_jiffies = jiffies + SAMPLE_TIME;
+
+ out:
+- mutex_unlock(&dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
+index b55398a34fa48..15a44491a42c3 100644
+--- a/drivers/gpu/drm/msm/msm_rd.c
++++ b/drivers/gpu/drm/msm/msm_rd.c
+@@ -86,7 +86,7 @@ struct msm_rd_state {
+ struct msm_gem_submit *submit;
+
+ /* fifo access is synchronized on the producer side by
+- * struct_mutex held by submit code (otherwise we could
++ * gpu->lock held by submit code (otherwise we could
+ * end up w/ cmds logged in different order than they
+ * were executed). And read_lock synchronizes the reads
+ */
+@@ -181,9 +181,12 @@ static int rd_open(struct inode *inode, struct file *file)
+ uint32_t gpu_id;
+ int ret = 0;
+
+- mutex_lock(&dev->struct_mutex);
++ if (!gpu)
++ return -ENODEV;
+
+- if (rd->open || !gpu) {
++ mutex_lock(&gpu->lock);
++
++ if (rd->open) {
+ ret = -EBUSY;
+ goto out;
+ }
+@@ -191,6 +194,9 @@ static int rd_open(struct inode *inode, struct file *file)
+ file->private_data = rd;
+ rd->open = true;
+
++ /* Reset fifo to clear any previously unread data: */
++ rd->fifo.head = rd->fifo.tail = 0;
++
+ /* the parsing tools need to know gpu-id to know which
+ * register database to load.
+ */
+@@ -200,7 +206,7 @@ static int rd_open(struct inode *inode, struct file *file)
+ rd_write_section(rd, RD_GPU_ID, &gpu_id, sizeof(gpu_id));
+
+ out:
+- mutex_unlock(&dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+ return ret;
+ }
+
+@@ -340,11 +346,10 @@ out_unlock:
+ msm_gem_unlock(&obj->base);
+ }
+
+-/* called under struct_mutex */
++/* called under gpu->lock */
+ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
+ const char *fmt, ...)
+ {
+- struct drm_device *dev = submit->dev;
+ struct task_struct *task;
+ char msg[256];
+ int i, n;
+@@ -355,7 +360,7 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
+ /* writing into fifo is serialized by caller, and
+ * rd->read_lock is used to serialize the reads
+ */
+- WARN_ON(!mutex_is_locked(&dev->struct_mutex));
++ WARN_ON(!mutex_is_locked(&submit->gpu->lock));
+
+ if (fmt) {
+ va_list args;
+diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
+index bd54c14126497..a2314b75962fd 100644
+--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
+@@ -32,11 +32,11 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
+ pm_runtime_get_sync(&gpu->pdev->dev);
+
+ /* TODO move submit path over to using a per-ring lock.. */
+- mutex_lock(&gpu->dev->struct_mutex);
++ mutex_lock(&gpu->lock);
+
+ msm_gpu_submit(gpu, submit);
+
+- mutex_unlock(&gpu->dev->struct_mutex);
++ mutex_unlock(&gpu->lock);
+
+ pm_runtime_put(&gpu->pdev->dev);
+
+diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
+index b8621c6e05546..7cb158bcbcf67 100644
+--- a/drivers/gpu/drm/msm/msm_submitqueue.c
++++ b/drivers/gpu/drm/msm/msm_submitqueue.c
+@@ -101,6 +101,7 @@ get_sched_entity(struct msm_file_private *ctx, struct msm_ringbuffer *ring,
+
+ ret = drm_sched_entity_init(entity, sched_prio, &sched, 1, NULL);
+ if (ret) {
++ mutex_unlock(&entity_lock);
+ kfree(entity);
+ return ERR_PTR(ret);
+ }
+diff --git a/drivers/gpu/drm/mxsfb/Kconfig b/drivers/gpu/drm/mxsfb/Kconfig
+index ee22cd25d3e3d..e7201e16119a4 100644
+--- a/drivers/gpu/drm/mxsfb/Kconfig
++++ b/drivers/gpu/drm/mxsfb/Kconfig
+@@ -8,6 +8,7 @@ config DRM_MXSFB
+ tristate "i.MX (e)LCDIF LCD controller"
+ depends on DRM && OF
+ depends on COMMON_CLK
++ depends on ARCH_MXS || ARCH_MXC || COMPILE_TEST
+ select DRM_MXS
+ select DRM_KMS_HELPER
+ select DRM_KMS_CMA_HELPER
+diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
+index 89dd618d78f31..988bc4fbd78df 100644
+--- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c
++++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
+@@ -361,7 +361,17 @@ static void mxsfb_crtc_atomic_enable(struct drm_crtc *crtc,
+ bridge_state =
+ drm_atomic_get_new_bridge_state(state,
+ mxsfb->bridge);
+- bus_format = bridge_state->input_bus_cfg.format;
++ if (!bridge_state)
++ bus_format = MEDIA_BUS_FMT_FIXED;
++ else
++ bus_format = bridge_state->input_bus_cfg.format;
++
++ if (bus_format == MEDIA_BUS_FMT_FIXED) {
++ dev_warn_once(drm->dev,
++ "Bridge does not provide bus format, assuming MEDIA_BUS_FMT_RGB888_1X24.\n"
++ "Please fix bridge driver by handling atomic_get_input_bus_fmts.\n");
++ bus_format = MEDIA_BUS_FMT_RGB888_1X24;
++ }
+ }
+
+ /* If there is no bridge, use bus format from connector */
+diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
+index 7739f46470d3e..99fee4d8cd318 100644
+--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
++++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
+@@ -205,7 +205,7 @@ nv04_display_destroy(struct drm_device *dev)
+ nvif_notify_dtor(&disp->flip);
+
+ nouveau_display(dev)->priv = NULL;
+- kfree(disp);
++ vfree(disp);
+
+ nvif_object_unmap(&drm->client.device.object);
+ }
+@@ -223,7 +223,7 @@ nv04_display_create(struct drm_device *dev)
+ struct nv04_display *disp;
+ int i, ret;
+
+- disp = kzalloc(sizeof(*disp), GFP_KERNEL);
++ disp = vzalloc(sizeof(*disp));
+ if (!disp)
+ return -ENOMEM;
+
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h
+index 3d82b3c67decc..93f8f4f645784 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/atom.h
++++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h
+@@ -160,14 +160,14 @@ nv50_head_atom_get(struct drm_atomic_state *state, struct drm_crtc *crtc)
+ static inline struct drm_encoder *
+ nv50_head_atom_get_encoder(struct nv50_head_atom *atom)
+ {
+- struct drm_encoder *encoder = NULL;
++ struct drm_encoder *encoder;
+
+ /* We only ever have a single encoder */
+ drm_for_each_encoder_mask(encoder, atom->state.crtc->dev,
+ atom->state.encoder_mask)
+- break;
++ return encoder;
+
+- return encoder;
++ return NULL;
+ }
+
+ #define nv50_wndw_atom(p) container_of((p), struct nv50_wndw_atom, state)
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/crc.c b/drivers/gpu/drm/nouveau/dispnv50/crc.c
+index 66f32d965c723..5624a716e11c1 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/crc.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/crc.c
+@@ -411,9 +411,18 @@ void nv50_crc_atomic_check_outp(struct nv50_atom *atom)
+ struct nv50_head_atom *armh = nv50_head_atom(old_crtc_state);
+ struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state);
+ struct nv50_outp_atom *outp_atom;
+- struct nouveau_encoder *outp =
+- nv50_real_outp(nv50_head_atom_get_encoder(armh));
+- struct drm_encoder *encoder = &outp->base.base;
++ struct nouveau_encoder *outp;
++ struct drm_encoder *encoder, *enc;
++
++ enc = nv50_head_atom_get_encoder(armh);
++ if (!enc)
++ continue;
++
++ outp = nv50_real_outp(enc);
++ if (!outp)
++ continue;
++
++ encoder = &outp->base.base;
+
+ if (!asyh->clr.crc)
+ continue;
+@@ -464,8 +473,16 @@ void nv50_crc_atomic_set(struct nv50_head *head,
+ struct drm_device *dev = crtc->dev;
+ struct nv50_crc *crc = &head->crc;
+ const struct nv50_crc_func *func = nv50_disp(dev)->core->func->crc;
+- struct nouveau_encoder *outp =
+- nv50_real_outp(nv50_head_atom_get_encoder(asyh));
++ struct nouveau_encoder *outp;
++ struct drm_encoder *encoder;
++
++ encoder = nv50_head_atom_get_encoder(asyh);
++ if (!encoder)
++ return;
++
++ outp = nv50_real_outp(encoder);
++ if (!outp)
++ return;
+
+ func->set_src(head, outp->or,
+ nv50_crc_source_type(outp, asyh->crc.src),
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+index d7b9f7f8c9e31..73e24e0c98976 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+@@ -411,6 +411,35 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder,
+ return 0;
+ }
+
++static void
++nv50_outp_atomic_fix_depth(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state)
++{
++ struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
++ struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
++ struct drm_display_mode *mode = &asyh->state.adjusted_mode;
++ unsigned int max_rate, mode_rate;
++
++ switch (nv_encoder->dcb->type) {
++ case DCB_OUTPUT_DP:
++ max_rate = nv_encoder->dp.link_nr * nv_encoder->dp.link_bw;
++
++ /* we don't support more than 10 anyway */
++ asyh->or.bpc = min_t(u8, asyh->or.bpc, 10);
++
++ /* reduce the bpc until it works out */
++ while (asyh->or.bpc > 6) {
++ mode_rate = DIV_ROUND_UP(mode->clock * asyh->or.bpc * 3, 8);
++ if (mode_rate <= max_rate)
++ break;
++
++ asyh->or.bpc -= 2;
++ }
++ break;
++ default:
++ break;
++ }
++}
++
+ static int
+ nv50_outp_atomic_check(struct drm_encoder *encoder,
+ struct drm_crtc_state *crtc_state,
+@@ -429,6 +458,9 @@ nv50_outp_atomic_check(struct drm_encoder *encoder,
+ if (crtc_state->mode_changed || crtc_state->connectors_changed)
+ asyh->or.bpc = connector->display_info.bpc;
+
++ /* We might have to reduce the bpc */
++ nv50_outp_atomic_fix_depth(encoder, crtc_state);
++
+ return 0;
+ }
+
+@@ -2622,14 +2654,6 @@ nv50_display_fini(struct drm_device *dev, bool runtime, bool suspend)
+ {
+ struct nouveau_drm *drm = nouveau_drm(dev);
+ struct drm_encoder *encoder;
+- struct drm_plane *plane;
+-
+- drm_for_each_plane(plane, dev) {
+- struct nv50_wndw *wndw = nv50_wndw(plane);
+- if (plane->funcs != &nv50_wndw)
+- continue;
+- nv50_wndw_fini(wndw);
+- }
+
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST)
+@@ -2645,7 +2669,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
+ {
+ struct nv50_core *core = nv50_disp(dev)->core;
+ struct drm_encoder *encoder;
+- struct drm_plane *plane;
+
+ if (resume || runtime)
+ core->func->init(core);
+@@ -2658,13 +2681,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
+ }
+ }
+
+- drm_for_each_plane(plane, dev) {
+- struct nv50_wndw *wndw = nv50_wndw(plane);
+- if (plane->funcs != &nv50_wndw)
+- continue;
+- nv50_wndw_init(wndw);
+- }
+-
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+index 8d048bacd6f02..e1e62674e82d3 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+@@ -694,18 +694,6 @@ nv50_wndw_notify(struct nvif_notify *notify)
+ return NVIF_NOTIFY_KEEP;
+ }
+
+-void
+-nv50_wndw_fini(struct nv50_wndw *wndw)
+-{
+- nvif_notify_put(&wndw->notify);
+-}
+-
+-void
+-nv50_wndw_init(struct nv50_wndw *wndw)
+-{
+- nvif_notify_get(&wndw->notify);
+-}
+-
+ static const u64 nv50_cursor_format_modifiers[] = {
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_INVALID,
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+index f4e0c50800344..6c64864da4550 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+@@ -38,10 +38,9 @@ struct nv50_wndw {
+
+ int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *,
+ enum drm_plane_type, const char *name, int index,
+- const u32 *format, enum nv50_disp_interlock_type,
+- u32 interlock_data, u32 heads, struct nv50_wndw **);
+-void nv50_wndw_init(struct nv50_wndw *);
+-void nv50_wndw_fini(struct nv50_wndw *);
++ const u32 *format, u32 heads,
++ enum nv50_disp_interlock_type, u32 interlock_data,
++ struct nv50_wndw **);
+ void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock,
+ struct nv50_wndw_atom *);
+ void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush,
+diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+index 1665738948fb4..96113c8bee8c5 100644
+--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
++++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+@@ -62,4 +62,6 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
+ #define nvkm_debug(s,f,a...) nvkm_printk((s), DEBUG, info, f, ##a)
+ #define nvkm_trace(s,f,a...) nvkm_printk((s), TRACE, info, f, ##a)
+ #define nvkm_spam(s,f,a...) nvkm_printk((s), SPAM, dbg, f, ##a)
++
++#define nvkm_error_ratelimited(s,f,a...) nvkm_printk((s), ERROR, err_ratelimited, f, ##a)
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
+index 7c15f64484281..9c55f205ab663 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
++++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
+@@ -220,6 +220,9 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
+ int optimus_funcs;
+ struct pci_dev *parent_pdev;
+
++ if (pdev->vendor != PCI_VENDOR_ID_NVIDIA)
++ return;
++
+ *has_pr3 = false;
+ parent_pdev = pci_upstream_bridge(pdev);
+ if (parent_pdev) {
+diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
+index 1cbd71abc80aa..aa8ed08fe9a7c 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
++++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
+@@ -46,8 +46,9 @@ static bool
+ nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE],
+ struct nouveau_backlight *bl)
+ {
+- const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL);
+- if (nb < 0 || nb >= 100)
++ const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL);
++
++ if (nb < 0)
+ return false;
+ if (nb > 0)
+ snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb);
+@@ -101,7 +102,6 @@ nv40_backlight_init(struct nouveau_encoder *encoder,
+ if (!(nvif_rd32(device, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK))
+ return -ENODEV;
+
+- props->type = BACKLIGHT_RAW;
+ props->max_brightness = 31;
+ *ops = &nv40_bl_ops;
+ return 0;
+@@ -294,7 +294,8 @@ nv50_backlight_init(struct nouveau_backlight *bl,
+ struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
+ struct nvif_object *device = &drm->client.device.object;
+
+- if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)))
++ if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)) ||
++ nv_conn->base.status != connector_status_connected)
+ return -ENODEV;
+
+ if (nv_conn->type == DCB_CONNECTOR_eDP) {
+@@ -339,7 +340,6 @@ nv50_backlight_init(struct nouveau_backlight *bl,
+ else
+ *ops = &nva3_bl_ops;
+
+- props->type = BACKLIGHT_RAW;
+ props->max_brightness = 100;
+
+ return 0;
+@@ -407,11 +407,12 @@ nouveau_backlight_init(struct drm_connector *connector)
+ goto fail_alloc;
+ }
+
++ props.type = BACKLIGHT_RAW;
+ bl->dev = backlight_device_register(backlight_name, connector->kdev,
+ nv_encoder, ops, &props);
+ if (IS_ERR(bl->dev)) {
+ if (bl->id >= 0)
+- ida_simple_remove(&bl_ida, bl->id);
++ ida_free(&bl_ida, bl->id);
+ ret = PTR_ERR(bl->dev);
+ goto fail_alloc;
+ }
+@@ -439,7 +440,7 @@ nouveau_backlight_fini(struct drm_connector *connector)
+ return;
+
+ if (bl->id >= 0)
+- ida_simple_remove(&bl_ida, bl->id);
++ ida_free(&bl_ida, bl->id);
+
+ backlight_device_unregister(bl->dev);
+ nv_conn->backlight = NULL;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
+index c58bcdba2c7aa..da58230bcb1fc 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
+@@ -281,8 +281,10 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
+ break;
+ }
+
+- if (WARN_ON(pi < 0))
++ if (WARN_ON(pi < 0)) {
++ kfree(nvbo);
+ return ERR_PTR(-EINVAL);
++ }
+
+ /* Disable compression if suitable settings couldn't be found. */
+ if (nvbo->comp && !vmm->page[pi].comp) {
+@@ -820,6 +822,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
+ if (ret == 0) {
+ ret = nouveau_fence_new(chan, false, &fence);
+ if (ret == 0) {
++ /* TODO: figure out a better solution here
++ *
++ * wait on the fence here explicitly as going through
++ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
++ *
++ * Without this the operation can timeout and we'll fallback to a
++ * software copy, which might take several minutes to finish.
++ */
++ nouveau_fence_wait(fence, false, false);
+ ret = ttm_bo_move_accel_cleanup(bo,
+ &fence->base,
+ evict, false,
+diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
+index 22b83a6577eb0..fe6c650d23ce0 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
++++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
+@@ -503,7 +503,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
+ connector->interlace_allowed =
+ nv_encoder->caps.dp_interlace;
+ else
+- connector->interlace_allowed = true;
++ connector->interlace_allowed =
++ drm->client.device.info.family < NV_DEVICE_INFO_V0_VOLTA;
+ connector->doublescan_allowed = true;
+ } else
+ if (nv_encoder->dcb->type == DCB_OUTPUT_LVDS ||
+@@ -728,7 +729,8 @@ out:
+ #endif
+
+ nouveau_connector_set_edid(nv_connector, edid);
+- nouveau_connector_set_encoder(connector, nv_encoder);
++ if (nv_encoder)
++ nouveau_connector_set_encoder(connector, nv_encoder);
+ return status;
+ }
+
+@@ -985,7 +987,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
+ * "native" mode as some VBIOS tables require us to use the
+ * pixel clock as part of the lookup...
+ */
+- if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
++ if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
+ nouveau_connector_detect_depth(connector);
+
+ if (nv_encoder->dcb->type == DCB_OUTPUT_TV)
+@@ -1361,13 +1363,11 @@ nouveau_connector_create(struct drm_device *dev,
+ snprintf(aux_name, sizeof(aux_name), "sor-%04x-%04x",
+ dcbe->hasht, dcbe->hashm);
+ nv_connector->aux.name = kstrdup(aux_name, GFP_KERNEL);
+- drm_dp_aux_init(&nv_connector->aux);
+- if (ret) {
+- NV_ERROR(drm, "Failed to init AUX adapter for sor-%04x-%04x: %d\n",
+- dcbe->hasht, dcbe->hashm, ret);
++ if (!nv_connector->aux.name) {
+ kfree(nv_connector);
+- return ERR_PTR(ret);
++ return ERR_PTR(-ENOMEM);
+ }
++ drm_dp_aux_init(&nv_connector->aux);
+ fallthrough;
+ default:
+ funcs = &nouveau_connector_funcs;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
+index 929de41c281f2..b8667bdc04ba0 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_display.c
++++ b/drivers/gpu/drm/nouveau/nouveau_display.c
+@@ -518,7 +518,7 @@ nouveau_display_hpd_work(struct work_struct *work)
+
+ pm_runtime_mark_last_busy(drm->dev->dev);
+ noop:
+- pm_runtime_put_sync(drm->dev->dev);
++ pm_runtime_put_autosuspend(dev->dev);
+ }
+
+ #ifdef CONFIG_ACPI
+@@ -540,7 +540,7 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val,
+ * it's own hotplug events.
+ */
+ pm_runtime_put_autosuspend(drm->dev->dev);
+- } else if (ret == 0) {
++ } else if (ret == 0 || ret == -EINPROGRESS) {
+ /* We've started resuming the GPU already, so
+ * it will handle scheduling a full reprobe
+ * itself
+diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
+index 92987daa5e17d..5e72e6cb2f840 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
+@@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
+ goto out_free_dma;
+
+ for (i = 0; i < npages; i += max) {
+- args.end = start + (max << PAGE_SHIFT);
++ if (args.start + (max << PAGE_SHIFT) > end)
++ args.end = end;
++ else
++ args.end = args.start + (max << PAGE_SHIFT);
++
+ ret = migrate_vma_setup(&args);
+ if (ret)
+ goto out_free_pfns;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
+index 040ed88d362d7..447b7594b35ae 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
++++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
+@@ -220,8 +220,6 @@ void nouveau_dp_irq(struct nouveau_drm *drm,
+ }
+
+ /* TODO:
+- * - Use the minimum possible BPC here, once we add support for the max bpc
+- * property.
+ * - Validate against the DP caps advertised by the GPU (we don't check these
+ * yet)
+ */
+@@ -233,7 +231,11 @@ nv50_dp_mode_valid(struct drm_connector *connector,
+ {
+ const unsigned int min_clock = 25000;
+ unsigned int max_rate, mode_rate, ds_max_dotclock, clock = mode->clock;
+- const u8 bpp = connector->display_info.bpc * 3;
++ /* Check with the minmum bpc always, so we can advertise better modes.
++ * In particlar not doing this causes modes to be dropped on HDR
++ * displays as we might check with a bpc of 16 even.
++ */
++ const u8 bpp = 6 * 3;
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE && !outp->caps.dp_interlace)
+ return MODE_NO_INTERLACE;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
+index 6109cd9e33991..ae00a18bd45d9 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
+@@ -126,10 +126,16 @@ nouveau_name(struct drm_device *dev)
+ static inline bool
+ nouveau_cli_work_ready(struct dma_fence *fence)
+ {
+- if (!dma_fence_is_signaled(fence))
+- return false;
+- dma_fence_put(fence);
+- return true;
++ bool ret = true;
++
++ spin_lock_irq(fence->lock);
++ if (!dma_fence_is_signaled_locked(fence))
++ ret = false;
++ spin_unlock_irq(fence->lock);
++
++ if (ret == true)
++ dma_fence_put(fence);
++ return ret;
+ }
+
+ static void
+@@ -562,6 +568,7 @@ nouveau_drm_device_init(struct drm_device *dev)
+ nvkm_dbgopt(nouveau_debug, "DRM");
+
+ INIT_LIST_HEAD(&drm->clients);
++ mutex_init(&drm->clients_lock);
+ spin_lock_init(&drm->tile.lock);
+
+ /* workaround an odd issue on nvc1 by disabling the device's
+@@ -632,6 +639,7 @@ fail_alloc:
+ static void
+ nouveau_drm_device_fini(struct drm_device *dev)
+ {
++ struct nouveau_cli *cli, *temp_cli;
+ struct nouveau_drm *drm = nouveau_drm(dev);
+
+ if (nouveau_pmops_runtime()) {
+@@ -656,9 +664,28 @@ nouveau_drm_device_fini(struct drm_device *dev)
+ nouveau_ttm_fini(drm);
+ nouveau_vga_fini(drm);
+
++ /*
++ * There may be existing clients from as-yet unclosed files. For now,
++ * clean them up here rather than deferring until the file is closed,
++ * but this likely not correct if we want to support hot-unplugging
++ * properly.
++ */
++ mutex_lock(&drm->clients_lock);
++ list_for_each_entry_safe(cli, temp_cli, &drm->clients, head) {
++ list_del(&cli->head);
++ mutex_lock(&cli->mutex);
++ if (cli->abi16)
++ nouveau_abi16_fini(cli->abi16);
++ mutex_unlock(&cli->mutex);
++ nouveau_cli_fini(cli);
++ kfree(cli);
++ }
++ mutex_unlock(&drm->clients_lock);
++
+ nouveau_cli_fini(&drm->client);
+ nouveau_cli_fini(&drm->master);
+ nvif_parent_dtor(&drm->parent);
++ mutex_destroy(&drm->clients_lock);
+ kfree(drm);
+ }
+
+@@ -796,7 +823,7 @@ nouveau_drm_device_remove(struct drm_device *dev)
+ struct nvkm_client *client;
+ struct nvkm_device *device;
+
+- drm_dev_unregister(dev);
++ drm_dev_unplug(dev);
+
+ client = nvxx_client(&drm->client.base);
+ device = nvkm_device_find(client->device);
+@@ -1090,9 +1117,9 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
+
+ fpriv->driver_priv = cli;
+
+- mutex_lock(&drm->client.mutex);
++ mutex_lock(&drm->clients_lock);
+ list_add(&cli->head, &drm->clients);
+- mutex_unlock(&drm->client.mutex);
++ mutex_unlock(&drm->clients_lock);
+
+ done:
+ if (ret && cli) {
+@@ -1110,6 +1137,16 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
+ {
+ struct nouveau_cli *cli = nouveau_cli(fpriv);
+ struct nouveau_drm *drm = nouveau_drm(dev);
++ int dev_index;
++
++ /*
++ * The device is gone, and as it currently stands all clients are
++ * cleaned up in the removal codepath. In the future this may change
++ * so that we can support hot-unplugging, but for now we immediately
++ * return to avoid a double-free situation.
++ */
++ if (!drm_dev_enter(dev, &dev_index))
++ return;
+
+ pm_runtime_get_sync(dev->dev);
+
+@@ -1118,14 +1155,15 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
+ nouveau_abi16_fini(cli->abi16);
+ mutex_unlock(&cli->mutex);
+
+- mutex_lock(&drm->client.mutex);
++ mutex_lock(&drm->clients_lock);
+ list_del(&cli->head);
+- mutex_unlock(&drm->client.mutex);
++ mutex_unlock(&drm->clients_lock);
+
+ nouveau_cli_fini(cli);
+ kfree(cli);
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
++ drm_dev_exit(dev_index);
+ }
+
+ static const struct drm_ioctl_desc
+diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
+index ba65f136cf481..b2a970aa9bf4b 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
++++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
+@@ -139,6 +139,11 @@ struct nouveau_drm {
+
+ struct list_head clients;
+
++ /**
++ * @clients_lock: Protects access to the @clients list of &struct nouveau_cli.
++ */
++ struct mutex clients_lock;
++
+ u8 old_pm_cap;
+
+ struct {
+diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+index 4f9b3aa5deda9..20ac1ce2c0f14 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+@@ -466,7 +466,7 @@ nouveau_fbcon_set_suspend_work(struct work_struct *work)
+ if (state == FBINFO_STATE_RUNNING) {
+ nouveau_fbcon_hotplug_resume(drm->fbcon);
+ pm_runtime_mark_last_busy(drm->dev->dev);
+- pm_runtime_put_sync(drm->dev->dev);
++ pm_runtime_put_autosuspend(drm->dev->dev);
+ }
+ }
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
+index 05d0b3eb36904..0ae416aa76dcb 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
++++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
+@@ -353,15 +353,22 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
+
+ if (ret)
+ return ret;
+- }
+
+- fobj = dma_resv_shared_list(resv);
+- fence = dma_resv_excl_fence(resv);
++ fobj = NULL;
++ } else {
++ fobj = dma_resv_shared_list(resv);
++ }
+
+- if (fence) {
++ /* Waiting for the exclusive fence first causes performance regressions
++ * under some circumstances. So manually wait for the shared ones first.
++ */
++ for (i = 0; i < (fobj ? fobj->shared_count : 0) && !ret; ++i) {
+ struct nouveau_channel *prev = NULL;
+ bool must_wait = true;
+
++ fence = rcu_dereference_protected(fobj->shared[i],
++ dma_resv_held(resv));
++
+ f = nouveau_local_fence(fence, chan->drm);
+ if (f) {
+ rcu_read_lock();
+@@ -373,20 +380,13 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
+
+ if (must_wait)
+ ret = dma_fence_wait(fence, intr);
+-
+- return ret;
+ }
+
+- if (!exclusive || !fobj)
+- return ret;
+-
+- for (i = 0; i < fobj->shared_count && !ret; ++i) {
++ fence = dma_resv_excl_fence(resv);
++ if (fence) {
+ struct nouveau_channel *prev = NULL;
+ bool must_wait = true;
+
+- fence = rcu_dereference_protected(fobj->shared[i],
+- dma_resv_held(resv));
+-
+ f = nouveau_local_fence(fence, chan->drm);
+ if (f) {
+ rcu_read_lock();
+@@ -398,6 +398,8 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
+
+ if (must_wait)
+ ret = dma_fence_wait(fence, intr);
++
++ return ret;
+ }
+
+ return ret;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
+index 8c2ecc2827232..c89d5964148fd 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
+@@ -56,7 +56,7 @@ static vm_fault_t nouveau_ttm_fault(struct vm_fault *vmf)
+
+ nouveau_bo_del_io_reserve_lru(bo);
+ prot = vm_get_page_prot(vma->vm_flags);
+- ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
++ ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+ nouveau_bo_add_io_reserve_lru(bo);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
+index 60019d0532fcf..531615719f6da 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
++++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
+@@ -71,7 +71,6 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
+ ret = nouveau_bo_init(nvbo, size, align, NOUVEAU_GEM_DOMAIN_GART,
+ sg, robj);
+ if (ret) {
+- nouveau_bo_ref(NULL, &nvbo);
+ obj = ERR_PTR(ret);
+ goto unlock;
+ }
+diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
+index b0c3422cb01fa..9985bfde015a6 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
+@@ -162,10 +162,14 @@ nouveau_svmm_bind(struct drm_device *dev, void *data,
+ */
+
+ mm = get_task_mm(current);
++ if (!mm) {
++ return -EINVAL;
++ }
+ mmap_read_lock(mm);
+
+ if (!cli->svm.svmm) {
+ mmap_read_unlock(mm);
++ mmput(mm);
+ return -EINVAL;
+ }
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
+index 704df0f2d1f16..09a112af2f893 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
+@@ -78,6 +78,6 @@ int
+ gt215_ce_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+ struct nvkm_engine **pengine)
+ {
+- return nvkm_falcon_new_(&gt215_ce, device, type, inst,
++ return nvkm_falcon_new_(&gt215_ce, device, type, -1,
+ (device->chipset != 0xaf), 0x104000, pengine);
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+index ca75c5f6ecaf8..76156833a832a 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+@@ -2605,6 +2605,27 @@ nv172_chipset = {
+ .fifo = { 0x00000001, ga102_fifo_new },
+ };
+
++static const struct nvkm_device_chip
++nv173_chipset = {
++ .name = "GA103",
++ .bar = { 0x00000001, tu102_bar_new },
++ .bios = { 0x00000001, nvkm_bios_new },
++ .devinit = { 0x00000001, ga100_devinit_new },
++ .fb = { 0x00000001, ga102_fb_new },
++ .gpio = { 0x00000001, ga102_gpio_new },
++ .i2c = { 0x00000001, gm200_i2c_new },
++ .imem = { 0x00000001, nv50_instmem_new },
++ .mc = { 0x00000001, ga100_mc_new },
++ .mmu = { 0x00000001, tu102_mmu_new },
++ .pci = { 0x00000001, gp100_pci_new },
++ .privring = { 0x00000001, gm200_privring_new },
++ .timer = { 0x00000001, gk20a_timer_new },
++ .top = { 0x00000001, ga100_top_new },
++ .disp = { 0x00000001, ga102_disp_new },
++ .dma = { 0x00000001, gv100_dma_new },
++ .fifo = { 0x00000001, ga102_fifo_new },
++};
++
+ static const struct nvkm_device_chip
+ nv174_chipset = {
+ .name = "GA104",
+@@ -2626,6 +2647,27 @@ nv174_chipset = {
+ .fifo = { 0x00000001, ga102_fifo_new },
+ };
+
++static const struct nvkm_device_chip
++nv176_chipset = {
++ .name = "GA106",
++ .bar = { 0x00000001, tu102_bar_new },
++ .bios = { 0x00000001, nvkm_bios_new },
++ .devinit = { 0x00000001, ga100_devinit_new },
++ .fb = { 0x00000001, ga102_fb_new },
++ .gpio = { 0x00000001, ga102_gpio_new },
++ .i2c = { 0x00000001, gm200_i2c_new },
++ .imem = { 0x00000001, nv50_instmem_new },
++ .mc = { 0x00000001, ga100_mc_new },
++ .mmu = { 0x00000001, tu102_mmu_new },
++ .pci = { 0x00000001, gp100_pci_new },
++ .privring = { 0x00000001, gm200_privring_new },
++ .timer = { 0x00000001, gk20a_timer_new },
++ .top = { 0x00000001, ga100_top_new },
++ .disp = { 0x00000001, ga102_disp_new },
++ .dma = { 0x00000001, gv100_dma_new },
++ .fifo = { 0x00000001, ga102_fifo_new },
++};
++
+ static const struct nvkm_device_chip
+ nv177_chipset = {
+ .name = "GA107",
+@@ -3071,7 +3113,9 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
+ case 0x167: device->chip = &nv167_chipset; break;
+ case 0x168: device->chip = &nv168_chipset; break;
+ case 0x172: device->chip = &nv172_chipset; break;
++ case 0x173: device->chip = &nv173_chipset; break;
+ case 0x174: device->chip = &nv174_chipset; break;
++ case 0x176: device->chip = &nv176_chipset; break;
+ case 0x177: device->chip = &nv177_chipset; break;
+ default:
+ if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) {
+@@ -3147,8 +3191,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
+ WARN_ON(device->chip->ptr.inst & ~((1 << ARRAY_SIZE(device->ptr)) - 1)); \
+ for (j = 0; device->chip->ptr.inst && j < ARRAY_SIZE(device->ptr); j++) { \
+ if ((device->chip->ptr.inst & BIT(j)) && (subdev_mask & BIT_ULL(type))) { \
+- int inst = (device->chip->ptr.inst == 1) ? -1 : (j); \
+- ret = device->chip->ptr.ctor(device, (type), inst, &device->ptr[j]); \
++ ret = device->chip->ptr.ctor(device, (type), (j), &device->ptr[j]); \
+ subdev = nvkm_device_subdev(device, (type), (j)); \
+ if (ret) { \
+ nvkm_subdev_del(&subdev); \
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index d0d52c1d4aee0..950a3de3e1166 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -123,7 +123,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+
+ mutex_init(&tdev->iommu.mutex);
+
+- if (iommu_present(&platform_bus_type)) {
++ if (device_iommu_mapped(dev)) {
+ tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
+ if (!tdev->iommu.domain)
+ goto error;
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
+index 6e3c450eaacef..3ff49344abc77 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
+@@ -62,7 +62,6 @@ gv100_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
+ nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header);
+ nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low);
+ nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high);
+- nvkm_wr32(device, 0x6f0110 + hdmi, 0x00000000);
+ nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000);
+ nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000);
+ nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000);
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+index 32bbddc0993e8..679aff79f4d6b 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+@@ -123,6 +123,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
+
+ extern const struct gf100_grctx_func gk110_grctx;
+ void gk110_grctx_generate_r419eb0(struct gf100_gr *);
++void gk110_grctx_generate_r419f78(struct gf100_gr *);
+
+ extern const struct gf100_grctx_func gk110b_grctx;
+ extern const struct gf100_grctx_func gk208_grctx;
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+index 304e9d268bad4..f894f82548242 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+@@ -916,7 +916,9 @@ static void
+ gk104_grctx_generate_r419f78(struct gf100_gr *gr)
+ {
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+- nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
++
++ /* bit 3 set disables loads in fp helper invocations, we need it enabled */
++ nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
+ }
+
+ void
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+index 86547cfc38dce..e88740d4e54d4 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+@@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
+ nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
+ }
+
++void
++gk110_grctx_generate_r419f78(struct gf100_gr *gr)
++{
++ struct nvkm_device *device = gr->base.engine.subdev.device;
++
++ /* bit 3 set disables loads in fp helper invocations, we need it enabled */
++ nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
++}
++
+ const struct gf100_grctx_func
+ gk110_grctx = {
+ .main = gf100_grctx_generate_main,
+@@ -852,4 +861,5 @@ gk110_grctx = {
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r418800 = gk104_grctx_generate_r418800,
+ .r419eb0 = gk110_grctx_generate_r419eb0,
++ .r419f78 = gk110_grctx_generate_r419f78,
+ };
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+index ebb947bd1446b..086e4d49e1121 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+@@ -101,4 +101,5 @@ gk110b_grctx = {
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r418800 = gk104_grctx_generate_r418800,
+ .r419eb0 = gk110_grctx_generate_r419eb0,
++ .r419f78 = gk110_grctx_generate_r419f78,
+ };
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+index 4d40512b5c998..0bf438c3f7cbc 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+@@ -566,4 +566,5 @@ gk208_grctx = {
+ .dist_skip_table = gf117_grctx_generate_dist_skip_table,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r418800 = gk104_grctx_generate_r418800,
++ .r419f78 = gk110_grctx_generate_r419f78,
+ };
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+index 0b3964e6b36e2..acdf0932a99e1 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+@@ -991,4 +991,5 @@ gm107_grctx = {
+ .r406500 = gm107_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r419e00 = gm107_grctx_generate_r419e00,
++ .r419f78 = gk110_grctx_generate_r419f78,
+ };
+diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
+index 262641a014b06..c91130a6be2a1 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
+@@ -117,8 +117,12 @@ nvkm_falcon_disable(struct nvkm_falcon *falcon)
+ int
+ nvkm_falcon_reset(struct nvkm_falcon *falcon)
+ {
+- nvkm_falcon_disable(falcon);
+- return nvkm_falcon_enable(falcon);
++ if (!falcon->func->reset) {
++ nvkm_falcon_disable(falcon);
++ return nvkm_falcon_enable(falcon);
++ }
++
++ return falcon->func->reset(falcon);
+ }
+
+ int
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
+index cdb1ead26d84f..82b4c8e1457c2 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
+@@ -207,11 +207,13 @@ int
+ gm200_acr_wpr_parse(struct nvkm_acr *acr)
+ {
+ const struct wpr_header *hdr = (void *)acr->wpr_fw->data;
++ struct nvkm_acr_lsfw *lsfw;
+
+ while (hdr->falcon_id != WPR_HEADER_V0_FALCON_ID_INVALID) {
+ wpr_header_dump(&acr->subdev, hdr);
+- if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id))
+- return -ENOMEM;
++ lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id);
++ if (IS_ERR(lsfw))
++ return PTR_ERR(lsfw);
+ }
+
+ return 0;
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
+index fb9132a39bb1a..fd97a935a380e 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
+@@ -161,11 +161,13 @@ int
+ gp102_acr_wpr_parse(struct nvkm_acr *acr)
+ {
+ const struct wpr_header_v1 *hdr = (void *)acr->wpr_fw->data;
++ struct nvkm_acr_lsfw *lsfw;
+
+ while (hdr->falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID) {
+ wpr_header_v1_dump(&acr->subdev, hdr);
+- if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id))
+- return -ENOMEM;
++ lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id);
++ if (IS_ERR(lsfw))
++ return PTR_ERR(lsfw);
+ }
+
+ return 0;
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
+index 667fa016496ee..a6ea89a5d51ab 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
+@@ -142,11 +142,12 @@ nvkm_acr_hsfw_load_bl(struct nvkm_acr *acr, const char *name, int ver,
+
+ hsfw->imem_size = desc->code_size;
+ hsfw->imem_tag = desc->start_tag;
+- hsfw->imem = kmalloc(desc->code_size, GFP_KERNEL);
+- memcpy(hsfw->imem, data + desc->code_off, desc->code_size);
+-
++ hsfw->imem = kmemdup(data + desc->code_off, desc->code_size, GFP_KERNEL);
+ nvkm_firmware_put(fw);
+- return 0;
++ if (!hsfw->imem)
++ return -ENOMEM;
++ else
++ return 0;
+ }
+
+ int
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+index d0f52d59fc2f9..6c318e41bde04 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+@@ -33,12 +33,12 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size)
+ {
+ u32 p = *addr;
+
+- if (*addr > bios->image0_size && bios->imaged_addr) {
++ if (*addr >= bios->image0_size && bios->imaged_addr) {
+ *addr -= bios->image0_size;
+ *addr += bios->imaged_addr;
+ }
+
+- if (unlikely(*addr + size >= bios->size)) {
++ if (unlikely(*addr + size > bios->size)) {
+ nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr);
+ return false;
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/gf100.c
+index 53a6651ac2258..80b5aaceeaad1 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/gf100.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/gf100.c
+@@ -35,13 +35,13 @@ gf100_bus_intr(struct nvkm_bus *bus)
+ u32 addr = nvkm_rd32(device, 0x009084);
+ u32 data = nvkm_rd32(device, 0x009088);
+
+- nvkm_error(subdev,
+- "MMIO %s of %08x FAULT at %06x [ %s%s%s]\n",
+- (addr & 0x00000002) ? "write" : "read", data,
+- (addr & 0x00fffffc),
+- (stat & 0x00000002) ? "!ENGINE " : "",
+- (stat & 0x00000004) ? "PRIVRING " : "",
+- (stat & 0x00000008) ? "TIMEOUT " : "");
++ nvkm_error_ratelimited(subdev,
++ "MMIO %s of %08x FAULT at %06x [ %s%s%s]\n",
++ (addr & 0x00000002) ? "write" : "read", data,
++ (addr & 0x00fffffc),
++ (stat & 0x00000002) ? "!ENGINE " : "",
++ (stat & 0x00000004) ? "PRIVRING " : "",
++ (stat & 0x00000008) ? "TIMEOUT " : "");
+
+ nvkm_wr32(device, 0x009084, 0x00000000);
+ nvkm_wr32(device, 0x001100, (stat & 0x0000000e));
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv31.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv31.c
+index ad8da523bb22e..c75e463f35013 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv31.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv31.c
+@@ -45,9 +45,9 @@ nv31_bus_intr(struct nvkm_bus *bus)
+ u32 addr = nvkm_rd32(device, 0x009084);
+ u32 data = nvkm_rd32(device, 0x009088);
+
+- nvkm_error(subdev, "MMIO %s of %08x FAULT at %06x\n",
+- (addr & 0x00000002) ? "write" : "read", data,
+- (addr & 0x00fffffc));
++ nvkm_error_ratelimited(subdev, "MMIO %s of %08x FAULT at %06x\n",
++ (addr & 0x00000002) ? "write" : "read", data,
++ (addr & 0x00fffffc));
+
+ stat &= ~0x00000008;
+ nvkm_wr32(device, 0x001100, 0x00000008);
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv50.c
+index 3a1e45adeedc1..2055d0b100d3f 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv50.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv50.c
+@@ -60,9 +60,9 @@ nv50_bus_intr(struct nvkm_bus *bus)
+ u32 addr = nvkm_rd32(device, 0x009084);
+ u32 data = nvkm_rd32(device, 0x009088);
+
+- nvkm_error(subdev, "MMIO %s of %08x FAULT at %06x\n",
+- (addr & 0x00000002) ? "write" : "read", data,
+- (addr & 0x00fffffc));
++ nvkm_error_ratelimited(subdev, "MMIO %s of %08x FAULT at %06x\n",
++ (addr & 0x00000002) ? "write" : "read", data,
++ (addr & 0x00fffffc));
+
+ stat &= ~0x00000008;
+ nvkm_wr32(device, 0x001100, 0x00000008);
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
+index 57199be082fd3..c2b5cc5f97eda 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
+@@ -135,10 +135,10 @@ nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate,
+
+ list_for_each_entry_from_reverse(cstate, &pstate->list, head) {
+ if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp))
+- break;
++ return cstate;
+ }
+
+- return cstate;
++ return NULL;
+ }
+
+ static struct nvkm_cstate *
+@@ -169,6 +169,8 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei)
+ if (!list_empty(&pstate->list)) {
+ cstate = nvkm_cstate_get(clk, pstate, cstatei);
+ cstate = nvkm_cstate_find_best(clk, pstate, cstate);
++ if (!cstate)
++ return -EINVAL;
+ } else {
+ cstate = &pstate->base;
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
+index 634f64f88fc8b..81a1ad2c88a7e 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
+@@ -65,10 +65,33 @@ tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
+ return ret;
+ }
+
++static int
++tu102_devinit_wait(struct nvkm_device *device)
++{
++ unsigned timeout = 50 + 2000;
++
++ do {
++ if (nvkm_rd32(device, 0x118128) & 0x00000001) {
++ if ((nvkm_rd32(device, 0x118234) & 0x000000ff) == 0xff)
++ return 0;
++ }
++
++ usleep_range(1000, 2000);
++ } while (timeout--);
++
++ return -ETIMEDOUT;
++}
++
+ int
+ tu102_devinit_post(struct nvkm_devinit *base, bool post)
+ {
+ struct nv50_devinit *init = nv50_devinit(base);
++ int ret;
++
++ ret = tu102_devinit_wait(init->base.subdev.device);
++ if (ret)
++ return ret;
++
+ gm200_devinit_preos(init, post);
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+index 24382875fb4f3..455e95a89259f 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+@@ -94,20 +94,13 @@ nvkm_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
+ return 0;
+ }
+
+-static int
++static void
+ nvkm_pmu_reset(struct nvkm_pmu *pmu)
+ {
+ struct nvkm_device *device = pmu->subdev.device;
+
+ if (!pmu->func->enabled(pmu))
+- return 0;
+-
+- /* Inhibit interrupts, and wait for idle. */
+- nvkm_wr32(device, 0x10a014, 0x0000ffff);
+- nvkm_msec(device, 2000,
+- if (!nvkm_rd32(device, 0x10a04c))
+- break;
+- );
++ return;
+
+ /* Reset. */
+ if (pmu->func->reset)
+@@ -118,25 +111,37 @@ nvkm_pmu_reset(struct nvkm_pmu *pmu)
+ if (!(nvkm_rd32(device, 0x10a10c) & 0x00000006))
+ break;
+ );
+-
+- return 0;
+ }
+
+ static int
+ nvkm_pmu_preinit(struct nvkm_subdev *subdev)
+ {
+ struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+- return nvkm_pmu_reset(pmu);
++ nvkm_pmu_reset(pmu);
++ return 0;
+ }
+
+ static int
+ nvkm_pmu_init(struct nvkm_subdev *subdev)
+ {
+ struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+- int ret = nvkm_pmu_reset(pmu);
+- if (ret == 0 && pmu->func->init)
+- ret = pmu->func->init(pmu);
+- return ret;
++ struct nvkm_device *device = pmu->subdev.device;
++
++ if (!pmu->func->init)
++ return 0;
++
++ if (pmu->func->enabled(pmu)) {
++ /* Inhibit interrupts, and wait for idle. */
++ nvkm_wr32(device, 0x10a014, 0x0000ffff);
++ nvkm_msec(device, 2000,
++ if (!nvkm_rd32(device, 0x10a04c))
++ break;
++ );
++
++ nvkm_pmu_reset(pmu);
++ }
++
++ return pmu->func->init(pmu);
+ }
+
+ static void *
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c
+index 5968c7696596c..40439e329aa9f 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c
+@@ -23,9 +23,38 @@
+ */
+ #include "priv.h"
+
++static int
++gm200_pmu_flcn_reset(struct nvkm_falcon *falcon)
++{
++ struct nvkm_pmu *pmu = container_of(falcon, typeof(*pmu), falcon);
++
++ nvkm_falcon_wr32(falcon, 0x014, 0x0000ffff);
++ pmu->func->reset(pmu);
++ return nvkm_falcon_enable(falcon);
++}
++
++const struct nvkm_falcon_func
++gm200_pmu_flcn = {
++ .debug = 0xc08,
++ .fbif = 0xe00,
++ .load_imem = nvkm_falcon_v1_load_imem,
++ .load_dmem = nvkm_falcon_v1_load_dmem,
++ .read_dmem = nvkm_falcon_v1_read_dmem,
++ .bind_context = nvkm_falcon_v1_bind_context,
++ .wait_for_halt = nvkm_falcon_v1_wait_for_halt,
++ .clear_interrupt = nvkm_falcon_v1_clear_interrupt,
++ .set_start_addr = nvkm_falcon_v1_set_start_addr,
++ .start = nvkm_falcon_v1_start,
++ .enable = nvkm_falcon_v1_enable,
++ .disable = nvkm_falcon_v1_disable,
++ .reset = gm200_pmu_flcn_reset,
++ .cmdq = { 0x4a0, 0x4b0, 4 },
++ .msgq = { 0x4c8, 0x4cc, 0 },
++};
++
+ static const struct nvkm_pmu_func
+ gm200_pmu = {
+- .flcn = &gt215_pmu_flcn,
++ .flcn = &gm200_pmu_flcn,
+ .enabled = gf100_pmu_enabled,
+ .reset = gf100_pmu_reset,
+ };
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
+index 148706977eec7..612310d5d4812 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
+@@ -211,11 +211,12 @@ gm20b_pmu_recv(struct nvkm_pmu *pmu)
+
+ static const struct nvkm_pmu_func
+ gm20b_pmu = {
+- .flcn = &gt215_pmu_flcn,
++ .flcn = &gm200_pmu_flcn,
+ .enabled = gf100_pmu_enabled,
+ .intr = gt215_pmu_intr,
+ .recv = gm20b_pmu_recv,
+ .initmsg = gm20b_pmu_initmsg,
++ .reset = gf100_pmu_reset,
+ };
+
+ #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
+index 00da1b873ce81..1a6f9c3af5ecd 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
+@@ -23,7 +23,7 @@
+ */
+ #include "priv.h"
+
+-static void
++void
+ gp102_pmu_reset(struct nvkm_pmu *pmu)
+ {
+ struct nvkm_device *device = pmu->subdev.device;
+@@ -39,7 +39,7 @@ gp102_pmu_enabled(struct nvkm_pmu *pmu)
+
+ static const struct nvkm_pmu_func
+ gp102_pmu = {
+- .flcn = &gt215_pmu_flcn,
++ .flcn = &gm200_pmu_flcn,
+ .enabled = gp102_pmu_enabled,
+ .reset = gp102_pmu_reset,
+ };
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
+index 461f722656e24..94cfb1791af6e 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
+@@ -78,11 +78,12 @@ gp10b_pmu_acr = {
+
+ static const struct nvkm_pmu_func
+ gp10b_pmu = {
+- .flcn = &gt215_pmu_flcn,
++ .flcn = &gm200_pmu_flcn,
+ .enabled = gf100_pmu_enabled,
+ .intr = gt215_pmu_intr,
+ .recv = gm20b_pmu_recv,
+ .initmsg = gm20b_pmu_initmsg,
++ .reset = gp102_pmu_reset,
+ };
+
+ #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+index e7860d1773539..21abf31f44420 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+@@ -41,9 +41,12 @@ int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
+
+ bool gf100_pmu_enabled(struct nvkm_pmu *);
+ void gf100_pmu_reset(struct nvkm_pmu *);
++void gp102_pmu_reset(struct nvkm_pmu *pmu);
+
+ void gk110_pmu_pgob(struct nvkm_pmu *, bool);
+
++extern const struct nvkm_falcon_func gm200_pmu_flcn;
++
+ void gm20b_pmu_acr_bld_patch(struct nvkm_acr *, u32, s64);
+ void gm20b_pmu_acr_bld_write(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *);
+ int gm20b_pmu_acr_boot(struct nvkm_falcon *);
+diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
+index 5f1722b040f46..41da86cd8b64c 100644
+--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
++++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
+@@ -1039,22 +1039,26 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
+ {
+ struct dsi_data *dsi = s->private;
+ unsigned long flags;
+- struct dsi_irq_stats stats;
++ struct dsi_irq_stats *stats;
++
++ stats = kmalloc(sizeof(*stats), GFP_KERNEL);
++ if (!stats)
++ return -ENOMEM;
+
+ spin_lock_irqsave(&dsi->irq_stats_lock, flags);
+
+- stats = dsi->irq_stats;
++ *stats = dsi->irq_stats;
+ memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats));
+ dsi->irq_stats.last_reset = jiffies;
+
+ spin_unlock_irqrestore(&dsi->irq_stats_lock, flags);
+
+ seq_printf(s, "period %u ms\n",
+- jiffies_to_msecs(jiffies - stats.last_reset));
++ jiffies_to_msecs(jiffies - stats->last_reset));
+
+- seq_printf(s, "irqs %d\n", stats.irq_count);
++ seq_printf(s, "irqs %d\n", stats->irq_count);
+ #define PIS(x) \
+- seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]);
++ seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1]);
+
+ seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1);
+ PIS(VC0);
+@@ -1078,10 +1082,10 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
+
+ #define PIS(x) \
+ seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \
+- stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
+- stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
+- stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
+- stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
++ stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
++ stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
++ stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
++ stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
+
+ seq_printf(s, "-- VC interrupts --\n");
+ PIS(CS);
+@@ -1097,7 +1101,7 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
+
+ #define PIS(x) \
+ seq_printf(s, "%-20s %10d\n", #x, \
+- stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
++ stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
+
+ seq_printf(s, "-- CIO interrupts --\n");
+ PIS(ERRSYNCESC1);
+@@ -1122,6 +1126,8 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
+ PIS(ULPSACTIVENOT_ALL1);
+ #undef PIS
+
++ kfree(stats);
++
+ return 0;
+ }
+ #endif
+diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c
+index d6a5862b4dbf5..7567e2265aa33 100644
+--- a/drivers/gpu/drm/omapdrm/dss/dss.c
++++ b/drivers/gpu/drm/omapdrm/dss/dss.c
+@@ -1176,6 +1176,7 @@ static void __dss_uninit_ports(struct dss_device *dss, unsigned int num_ports)
+ default:
+ break;
+ }
++ of_node_put(port);
+ }
+ }
+
+@@ -1208,11 +1209,13 @@ static int dss_init_ports(struct dss_device *dss)
+ default:
+ break;
+ }
++ of_node_put(port);
+ }
+
+ return 0;
+
+ error:
++ of_node_put(port);
+ __dss_uninit_ports(dss, i);
+ return r;
+ }
+diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
+index 418638e6e3b0a..479ffdb64486e 100644
+--- a/drivers/gpu/drm/panel/Kconfig
++++ b/drivers/gpu/drm/panel/Kconfig
+@@ -83,6 +83,8 @@ config DRM_PANEL_SIMPLE
+ depends on PM
+ select VIDEOMODE_HELPERS
+ select DRM_DP_AUX_BUS
++ select DRM_DP_HELPER
++ select DRM_KMS_HELPER
+ help
+ DRM panel driver for dumb panels that need at most a regulator and
+ a GPIO to be powered up. Optionally a backlight can be attached so
+@@ -371,6 +373,8 @@ config DRM_PANEL_SAMSUNG_ATNA33XC20
+ depends on OF
+ depends on BACKLIGHT_CLASS_DEVICE
+ depends on PM
++ select DRM_DISPLAY_DP_HELPER
++ select DRM_DISPLAY_HELPER
+ select DRM_DP_AUX_BUS
+ help
+ DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't
+diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
+index 581661b506f81..f9c1f7bc8218c 100644
+--- a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
++++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
+@@ -227,7 +227,13 @@ static int feiyang_dsi_probe(struct mipi_dsi_device *dsi)
+ dsi->format = MIPI_DSI_FMT_RGB888;
+ dsi->lanes = 4;
+
+- return mipi_dsi_attach(dsi);
++ ret = mipi_dsi_attach(dsi);
++ if (ret < 0) {
++ drm_panel_remove(&ctx->panel);
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static int feiyang_dsi_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
+index 2c3378a259b1e..e1542451ef9d0 100644
+--- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
++++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
+@@ -612,8 +612,10 @@ static int ili9341_dbi_probe(struct spi_device *spi, struct gpio_desc *dc,
+ int ret;
+
+ vcc = devm_regulator_get_optional(dev, "vcc");
+- if (IS_ERR(vcc))
++ if (IS_ERR(vcc)) {
+ dev_err(dev, "get optional vcc failed\n");
++ vcc = NULL;
++ }
+
+ dbidev = devm_drm_dev_alloc(dev, &ili9341_dbi_driver,
+ struct mipi_dbi_dev, drm);
+diff --git a/drivers/gpu/drm/panel/panel-innolux-p079zca.c b/drivers/gpu/drm/panel/panel-innolux-p079zca.c
+index aea3162253914..f194b62e290ca 100644
+--- a/drivers/gpu/drm/panel/panel-innolux-p079zca.c
++++ b/drivers/gpu/drm/panel/panel-innolux-p079zca.c
+@@ -484,6 +484,7 @@ static void innolux_panel_del(struct innolux_panel *innolux)
+ static int innolux_panel_probe(struct mipi_dsi_device *dsi)
+ {
+ const struct panel_desc *desc;
++ struct innolux_panel *innolux;
+ int err;
+
+ desc = of_device_get_match_data(&dsi->dev);
+@@ -495,7 +496,14 @@ static int innolux_panel_probe(struct mipi_dsi_device *dsi)
+ if (err < 0)
+ return err;
+
+- return mipi_dsi_attach(dsi);
++ err = mipi_dsi_attach(dsi);
++ if (err < 0) {
++ innolux = mipi_dsi_get_drvdata(dsi);
++ innolux_panel_del(innolux);
++ return err;
++ }
++
++ return 0;
+ }
+
+ static int innolux_panel_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
+index 733010b5e4f53..3c86ad262d5e0 100644
+--- a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
++++ b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
+@@ -473,7 +473,13 @@ static int jdi_panel_probe(struct mipi_dsi_device *dsi)
+ if (ret < 0)
+ return ret;
+
+- return mipi_dsi_attach(dsi);
++ ret = mipi_dsi_attach(dsi);
++ if (ret < 0) {
++ jdi_panel_del(jdi);
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static int jdi_panel_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c
+index 86e4213e8bb13..daccb1fd5fdad 100644
+--- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c
++++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c
+@@ -406,7 +406,13 @@ static int kingdisplay_panel_probe(struct mipi_dsi_device *dsi)
+ if (err < 0)
+ return err;
+
+- return mipi_dsi_attach(dsi);
++ err = mipi_dsi_attach(dsi);
++ if (err < 0) {
++ kingdisplay_panel_del(kingdisplay);
++ return err;
++ }
++
++ return 0;
+ }
+
+ static int kingdisplay_panel_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36672a.c b/drivers/gpu/drm/panel/panel-novatek-nt36672a.c
+index 533cd3934b8b7..839b263fb3c0f 100644
+--- a/drivers/gpu/drm/panel/panel-novatek-nt36672a.c
++++ b/drivers/gpu/drm/panel/panel-novatek-nt36672a.c
+@@ -656,7 +656,13 @@ static int nt36672a_panel_probe(struct mipi_dsi_device *dsi)
+ if (err < 0)
+ return err;
+
+- return mipi_dsi_attach(dsi);
++ err = mipi_dsi_attach(dsi);
++ if (err < 0) {
++ drm_panel_remove(&pinfo->base);
++ return err;
++ }
++
++ return 0;
+ }
+
+ static int nt36672a_panel_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
+index f80b44a8a7003..f8dbccd55033a 100644
+--- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
++++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
+@@ -444,7 +444,7 @@ static int otm8009a_probe(struct mipi_dsi_device *dsi)
+ DRM_MODE_CONNECTOR_DSI);
+
+ ctx->bl_dev = devm_backlight_device_register(dev, dev_name(dev),
+- dsi->host->dev, ctx,
++ dev, ctx,
+ &otm8009a_backlight_ops,
+ NULL);
+ if (IS_ERR(ctx->bl_dev)) {
+diff --git a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
+index 3c20beeb17819..3991f5d950af4 100644
+--- a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
++++ b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
+@@ -241,7 +241,13 @@ static int wuxga_nt_panel_probe(struct mipi_dsi_device *dsi)
+ if (ret < 0)
+ return ret;
+
+- return mipi_dsi_attach(dsi);
++ ret = mipi_dsi_attach(dsi);
++ if (ret < 0) {
++ wuxga_nt_panel_del(wuxga_nt);
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static int wuxga_nt_panel_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+index 46029c5610c80..145047e193946 100644
+--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
++++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+@@ -229,7 +229,7 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts,
+
+ ret = i2c_smbus_write_byte_data(ts->i2c, reg, val);
+ if (ret)
+- dev_err(&ts->dsi->dev, "I2C write failed: %d\n", ret);
++ dev_err(&ts->i2c->dev, "I2C write failed: %d\n", ret);
+ }
+
+ static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val)
+@@ -265,7 +265,7 @@ static int rpi_touchscreen_noop(struct drm_panel *panel)
+ return 0;
+ }
+
+-static int rpi_touchscreen_enable(struct drm_panel *panel)
++static int rpi_touchscreen_prepare(struct drm_panel *panel)
+ {
+ struct rpi_touchscreen *ts = panel_to_ts(panel);
+ int i;
+@@ -295,6 +295,13 @@ static int rpi_touchscreen_enable(struct drm_panel *panel)
+ rpi_touchscreen_write(ts, DSI_STARTDSI, 0x01);
+ msleep(100);
+
++ return 0;
++}
++
++static int rpi_touchscreen_enable(struct drm_panel *panel)
++{
++ struct rpi_touchscreen *ts = panel_to_ts(panel);
++
+ /* Turn on the backlight. */
+ rpi_touchscreen_i2c_write(ts, REG_PWM, 255);
+
+@@ -349,7 +356,7 @@ static int rpi_touchscreen_get_modes(struct drm_panel *panel,
+ static const struct drm_panel_funcs rpi_touchscreen_funcs = {
+ .disable = rpi_touchscreen_disable,
+ .unprepare = rpi_touchscreen_noop,
+- .prepare = rpi_touchscreen_noop,
++ .prepare = rpi_touchscreen_prepare,
+ .enable = rpi_touchscreen_enable,
+ .get_modes = rpi_touchscreen_get_modes,
+ };
+diff --git a/drivers/gpu/drm/panel/panel-ronbo-rb070d30.c b/drivers/gpu/drm/panel/panel-ronbo-rb070d30.c
+index a3782830ae3c4..1fb579a574d9f 100644
+--- a/drivers/gpu/drm/panel/panel-ronbo-rb070d30.c
++++ b/drivers/gpu/drm/panel/panel-ronbo-rb070d30.c
+@@ -199,7 +199,13 @@ static int rb070d30_panel_dsi_probe(struct mipi_dsi_device *dsi)
+ dsi->format = MIPI_DSI_FMT_RGB888;
+ dsi->lanes = 4;
+
+- return mipi_dsi_attach(dsi);
++ ret = mipi_dsi_attach(dsi);
++ if (ret < 0) {
++ drm_panel_remove(&ctx->panel);
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static int rb070d30_panel_dsi_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
+index 0ab1b7ec84cda..166d7d41cd9b5 100644
+--- a/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
++++ b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
+@@ -692,7 +692,9 @@ static int s6e3ha2_probe(struct mipi_dsi_device *dsi)
+
+ dsi->lanes = 4;
+ dsi->format = MIPI_DSI_FMT_RGB888;
+- dsi->mode_flags = MIPI_DSI_CLOCK_NON_CONTINUOUS;
++ dsi->mode_flags = MIPI_DSI_CLOCK_NON_CONTINUOUS |
++ MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP |
++ MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET;
+
+ ctx->supplies[0].supply = "vdd3";
+ ctx->supplies[1].supply = "vci";
+diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c
+index ccc8ed6fe3aed..2fc46fdd0e7a0 100644
+--- a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c
++++ b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c
+@@ -446,7 +446,8 @@ static int s6e63j0x03_probe(struct mipi_dsi_device *dsi)
+
+ dsi->lanes = 1;
+ dsi->format = MIPI_DSI_FMT_RGB888;
+- dsi->mode_flags = MIPI_DSI_MODE_NO_EOT_PACKET;
++ dsi->mode_flags = MIPI_DSI_MODE_VIDEO_NO_HFP |
++ MIPI_DSI_MODE_VIDEO_NO_HBP | MIPI_DSI_MODE_VIDEO_NO_HSA;
+
+ ctx->supplies[0].supply = "vdd3";
+ ctx->supplies[1].supply = "vci";
+diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams452ef01.c b/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams452ef01.c
+index ea63799ff2a1e..29fde3823212b 100644
+--- a/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams452ef01.c
++++ b/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams452ef01.c
+@@ -247,6 +247,7 @@ static int s6e88a0_ams452ef01_probe(struct mipi_dsi_device *dsi)
+ ret = mipi_dsi_attach(dsi);
+ if (ret < 0) {
+ dev_err(dev, "Failed to attach to DSI host: %d\n", ret);
++ drm_panel_remove(&ctx->panel);
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
+index 9b3599d6d2dea..737b8ca22b374 100644
+--- a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
++++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
+@@ -990,8 +990,6 @@ static int s6e8aa0_probe(struct mipi_dsi_device *dsi)
+ dsi->lanes = 4;
+ dsi->format = MIPI_DSI_FMT_RGB888;
+ dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST
+- | MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP
+- | MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET
+ | MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_AUTO_VERT;
+
+ ret = s6e8aa0_parse_dt(ctx);
+diff --git a/drivers/gpu/drm/panel/panel-samsung-sofef00.c b/drivers/gpu/drm/panel/panel-samsung-sofef00.c
+index 8cb1853574bb8..6d107e14fcc55 100644
+--- a/drivers/gpu/drm/panel/panel-samsung-sofef00.c
++++ b/drivers/gpu/drm/panel/panel-samsung-sofef00.c
+@@ -302,6 +302,7 @@ static int sofef00_panel_probe(struct mipi_dsi_device *dsi)
+ ret = mipi_dsi_attach(dsi);
+ if (ret < 0) {
+ dev_err(dev, "Failed to attach to DSI host: %d\n", ret);
++ drm_panel_remove(&ctx->panel);
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
+index b937e24dac8e0..e2f1e983ef7eb 100644
+--- a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
++++ b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
+@@ -192,15 +192,15 @@ static int sharp_nt_panel_enable(struct drm_panel *panel)
+ }
+
+ static const struct drm_display_mode default_mode = {
+- .clock = 41118,
++ .clock = (540 + 48 + 32 + 80) * (960 + 3 + 10 + 15) * 60 / 1000,
+ .hdisplay = 540,
+ .hsync_start = 540 + 48,
+- .hsync_end = 540 + 48 + 80,
+- .htotal = 540 + 48 + 80 + 32,
++ .hsync_end = 540 + 48 + 32,
++ .htotal = 540 + 48 + 32 + 80,
+ .vdisplay = 960,
+ .vsync_start = 960 + 3,
+- .vsync_end = 960 + 3 + 15,
+- .vtotal = 960 + 3 + 15 + 1,
++ .vsync_end = 960 + 3 + 10,
++ .vtotal = 960 + 3 + 10 + 15,
+ };
+
+ static int sharp_nt_panel_get_modes(struct drm_panel *panel,
+@@ -280,6 +280,7 @@ static int sharp_nt_panel_probe(struct mipi_dsi_device *dsi)
+ dsi->lanes = 2;
+ dsi->format = MIPI_DSI_FMT_RGB888;
+ dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
++ MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+ MIPI_DSI_MODE_VIDEO_HSE |
+ MIPI_DSI_CLOCK_NON_CONTINUOUS |
+ MIPI_DSI_MODE_NO_EOT_PACKET;
+@@ -296,7 +297,13 @@ static int sharp_nt_panel_probe(struct mipi_dsi_device *dsi)
+ if (ret < 0)
+ return ret;
+
+- return mipi_dsi_attach(dsi);
++ ret = mipi_dsi_attach(dsi);
++ if (ret < 0) {
++ sharp_nt_panel_del(sharp_nt);
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static int sharp_nt_panel_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
+index 9b6c4e6c38a1b..e58eb93e9bc9e 100644
+--- a/drivers/gpu/drm/panel/panel-simple.c
++++ b/drivers/gpu/drm/panel/panel-simple.c
+@@ -721,6 +721,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc,
+ err = panel_dpi_probe(dev, panel);
+ if (err)
+ goto free_ddc;
++ desc = panel->desc;
+ } else {
+ if (!of_get_display_timing(dev->of_node, "panel-timing", &dt))
+ panel_simple_parse_panel_timing_node(dev, panel, &dt);
+@@ -861,7 +862,7 @@ static const struct drm_display_mode ampire_am_1280800n3tzqw_t00h_mode = {
+ static const struct panel_desc ampire_am_1280800n3tzqw_t00h = {
+ .modes = &ampire_am_1280800n3tzqw_t00h_mode,
+ .num_modes = 1,
+- .bpc = 6,
++ .bpc = 8,
+ .size = {
+ .width = 217,
+ .height = 136,
+@@ -889,8 +890,8 @@ static const struct panel_desc ampire_am_480272h3tmqw_t01h = {
+ .num_modes = 1,
+ .bpc = 8,
+ .size = {
+- .width = 105,
+- .height = 67,
++ .width = 99,
++ .height = 58,
+ },
+ .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+ };
+@@ -1257,21 +1258,21 @@ static const struct panel_desc auo_g104sn02 = {
+ .connector_type = DRM_MODE_CONNECTOR_LVDS,
+ };
+
+-static const struct drm_display_mode auo_g121ean01_mode = {
+- .clock = 66700,
+- .hdisplay = 1280,
+- .hsync_start = 1280 + 58,
+- .hsync_end = 1280 + 58 + 8,
+- .htotal = 1280 + 58 + 8 + 70,
+- .vdisplay = 800,
+- .vsync_start = 800 + 6,
+- .vsync_end = 800 + 6 + 4,
+- .vtotal = 800 + 6 + 4 + 10,
++static const struct display_timing auo_g121ean01_timing = {
++ .pixelclock = { 60000000, 74400000, 90000000 },
++ .hactive = { 1280, 1280, 1280 },
++ .hfront_porch = { 20, 50, 100 },
++ .hback_porch = { 20, 50, 100 },
++ .hsync_len = { 30, 100, 200 },
++ .vactive = { 800, 800, 800 },
++ .vfront_porch = { 2, 10, 25 },
++ .vback_porch = { 2, 10, 25 },
++ .vsync_len = { 4, 18, 50 },
+ };
+
+ static const struct panel_desc auo_g121ean01 = {
+- .modes = &auo_g121ean01_mode,
+- .num_modes = 1,
++ .timings = &auo_g121ean01_timing,
++ .num_timings = 1,
+ .bpc = 8,
+ .size = {
+ .width = 261,
+@@ -1447,7 +1448,9 @@ static const struct panel_desc auo_t215hvn01 = {
+ .delay = {
+ .disable = 5,
+ .unprepare = 1000,
+- }
++ },
++ .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
++ .connector_type = DRM_MODE_CONNECTOR_LVDS,
+ };
+
+ static const struct drm_display_mode avic_tm070ddh03_mode = {
+@@ -2468,6 +2471,7 @@ static const struct panel_desc innolux_at043tn24 = {
+ .height = 54,
+ },
+ .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
++ .connector_type = DRM_MODE_CONNECTOR_DPI,
+ .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
+ };
+
+@@ -2509,7 +2513,7 @@ static const struct display_timing innolux_g070y2_l01_timing = {
+ static const struct panel_desc innolux_g070y2_l01 = {
+ .timings = &innolux_g070y2_l01_timing,
+ .num_timings = 1,
+- .bpc = 6,
++ .bpc = 8,
+ .size = {
+ .width = 152,
+ .height = 91,
+@@ -2521,6 +2525,7 @@ static const struct panel_desc innolux_g070y2_l01 = {
+ .unprepare = 800,
+ },
+ .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
++ .bus_flags = DRM_BUS_FLAG_DE_HIGH,
+ .connector_type = DRM_MODE_CONNECTOR_LVDS,
+ };
+
+@@ -2577,7 +2582,7 @@ static const struct panel_desc innolux_g121i1_l01 = {
+ .enable = 200,
+ .disable = 20,
+ },
+- .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
++ .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG,
+ .connector_type = DRM_MODE_CONNECTOR_LVDS,
+ };
+
+@@ -3088,6 +3093,7 @@ static const struct display_timing logictechno_lt161010_2nh_timing = {
+ static const struct panel_desc logictechno_lt161010_2nh = {
+ .timings = &logictechno_lt161010_2nh_timing,
+ .num_timings = 1,
++ .bpc = 6,
+ .size = {
+ .width = 154,
+ .height = 86,
+@@ -3117,6 +3123,7 @@ static const struct display_timing logictechno_lt170410_2whc_timing = {
+ static const struct panel_desc logictechno_lt170410_2whc = {
+ .timings = &logictechno_lt170410_2whc_timing,
+ .num_timings = 1,
++ .bpc = 8,
+ .size = {
+ .width = 217,
+ .height = 136,
+@@ -3646,6 +3653,7 @@ static const struct drm_display_mode powertip_ph800480t013_idf02_mode = {
+ .vsync_start = 480 + 49,
+ .vsync_end = 480 + 49 + 2,
+ .vtotal = 480 + 49 + 2 + 22,
++ .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+ };
+
+ static const struct panel_desc powertip_ph800480t013_idf02 = {
+diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
+index 320a2a8fd4592..098955526b687 100644
+--- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c
++++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
+@@ -384,7 +384,15 @@ static int st7701_dsi_probe(struct mipi_dsi_device *dsi)
+ st7701->dsi = dsi;
+ st7701->desc = desc;
+
+- return mipi_dsi_attach(dsi);
++ ret = mipi_dsi_attach(dsi);
++ if (ret)
++ goto err_attach;
++
++ return 0;
++
++err_attach:
++ drm_panel_remove(&st7701->panel);
++ return ret;
+ }
+
+ static int st7701_dsi_remove(struct mipi_dsi_device *dsi)
+diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
+index 86cdc0ce79e65..77f4d32e52045 100644
+--- a/drivers/gpu/drm/panfrost/Kconfig
++++ b/drivers/gpu/drm/panfrost/Kconfig
+@@ -3,7 +3,8 @@
+ config DRM_PANFROST
+ tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)"
+ depends on DRM
+- depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
++ depends on ARM || ARM64 || COMPILE_TEST
++ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
+ depends on MMU
+ select DRM_SCHED
+ select IOMMU_SUPPORT
+diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+index 194af7f607a6e..be36dd060a2b4 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
++++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+@@ -132,6 +132,17 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
+ return PTR_ERR(opp);
+
+ panfrost_devfreq_profile.initial_freq = cur_freq;
++
++ /*
++ * Set the recommend OPP this will enable and configure the regulator
++ * if any and will avoid a switch off by regulator_late_cleanup()
++ */
++ ret = dev_pm_opp_set_opp(dev, opp);
++ if (ret) {
++ DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
++ return ret;
++ }
++
+ dev_pm_opp_put(opp);
+
+ /*
+diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
+index 1ffaef5ec5ff5..4c271244092b4 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
+@@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+ struct panfrost_gem_object *bo;
+ struct drm_panfrost_create_bo *args = data;
+ struct panfrost_gem_mapping *mapping;
++ int ret;
+
+ if (!args->size || args->pad ||
+ (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
+@@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+ !(args->flags & PANFROST_BO_NOEXEC))
+ return -EINVAL;
+
+- bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags,
+- &args->handle);
++ bo = panfrost_gem_create(dev, args->size, args->flags);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
++ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
++ if (ret)
++ goto out;
++
+ mapping = panfrost_gem_mapping_get(bo, priv);
+- if (!mapping) {
+- drm_gem_object_put(&bo->base.base);
+- return -EINVAL;
++ if (mapping) {
++ args->offset = mapping->mmnode.start << PAGE_SHIFT;
++ panfrost_gem_mapping_put(mapping);
++ } else {
++ /* This can only happen if the handle from
++ * drm_gem_handle_create() has already been guessed and freed
++ * by user space
++ */
++ ret = -EINVAL;
+ }
+
+- args->offset = mapping->mmnode.start << PAGE_SHIFT;
+- panfrost_gem_mapping_put(mapping);
+-
+- return 0;
++out:
++ drm_gem_object_put(&bo->base.base);
++ return ret;
+ }
+
+ /**
+@@ -418,12 +427,12 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
+ }
+ }
+
+- args->retained = drm_gem_shmem_madvise(gem_obj, args->madv);
++ args->retained = drm_gem_shmem_madvise(&bo->base, args->madv);
+
+ if (args->retained) {
+ if (args->madv == PANFROST_MADV_DONTNEED)
+- list_add_tail(&bo->base.madv_list,
+- &pfdev->shrinker_list);
++ list_move_tail(&bo->base.madv_list,
++ &pfdev->shrinker_list);
+ else if (args->madv == PANFROST_MADV_WILLNEED)
+ list_del_init(&bo->base.madv_list);
+ }
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
+index 23377481f4e31..55e3a68ed28a4 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
+@@ -49,7 +49,7 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj)
+ kvfree(bo->sgts);
+ }
+
+- drm_gem_shmem_free_object(obj);
++ drm_gem_shmem_free(&bo->base);
+ }
+
+ struct panfrost_gem_mapping *
+@@ -187,23 +187,25 @@ void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv)
+
+ static int panfrost_gem_pin(struct drm_gem_object *obj)
+ {
+- if (to_panfrost_bo(obj)->is_heap)
++ struct panfrost_gem_object *bo = to_panfrost_bo(obj);
++
++ if (bo->is_heap)
+ return -EINVAL;
+
+- return drm_gem_shmem_pin(obj);
++ return drm_gem_shmem_pin(&bo->base);
+ }
+
+ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
+ .free = panfrost_gem_free_object,
+ .open = panfrost_gem_open,
+ .close = panfrost_gem_close,
+- .print_info = drm_gem_shmem_print_info,
++ .print_info = drm_gem_shmem_object_print_info,
+ .pin = panfrost_gem_pin,
+- .unpin = drm_gem_shmem_unpin,
+- .get_sg_table = drm_gem_shmem_get_sg_table,
+- .vmap = drm_gem_shmem_vmap,
+- .vunmap = drm_gem_shmem_vunmap,
+- .mmap = drm_gem_shmem_mmap,
++ .unpin = drm_gem_shmem_object_unpin,
++ .get_sg_table = drm_gem_shmem_object_get_sg_table,
++ .vmap = drm_gem_shmem_object_vmap,
++ .vunmap = drm_gem_shmem_object_vunmap,
++ .mmap = drm_gem_shmem_object_mmap,
+ };
+
+ /**
+@@ -232,12 +234,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
+ }
+
+ struct panfrost_gem_object *
+-panfrost_gem_create_with_handle(struct drm_file *file_priv,
+- struct drm_device *dev, size_t size,
+- u32 flags,
+- uint32_t *handle)
++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
+ {
+- int ret;
+ struct drm_gem_shmem_object *shmem;
+ struct panfrost_gem_object *bo;
+
+@@ -253,16 +251,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
+ bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
+ bo->is_heap = !!(flags & PANFROST_BO_HEAP);
+
+- /*
+- * Allocate an id of idr table where the obj is registered
+- * and handle has the id what user can see.
+- */
+- ret = drm_gem_handle_create(file_priv, &shmem->base, handle);
+- /* drop reference from allocate - handle holds it now. */
+- drm_gem_object_put(&shmem->base);
+- if (ret)
+- return ERR_PTR(ret);
+-
+ return bo;
+ }
+
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
+index 8088d5fd8480e..ad2877eeeccdf 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
+@@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev,
+ struct sg_table *sgt);
+
+ struct panfrost_gem_object *
+-panfrost_gem_create_with_handle(struct drm_file *file_priv,
+- struct drm_device *dev, size_t size,
+- u32 flags,
+- uint32_t *handle);
++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags);
+
+ int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv);
+ void panfrost_gem_close(struct drm_gem_object *obj,
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
+index 1b9f68d8e9aa6..b0142341e2235 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
+@@ -52,7 +52,7 @@ static bool panfrost_gem_purge(struct drm_gem_object *obj)
+ goto unlock_mappings;
+
+ panfrost_gem_teardown_mappings_locked(bo);
+- drm_gem_shmem_purge_locked(obj);
++ drm_gem_shmem_purge_locked(&bo->base);
+ ret = true;
+
+ mutex_unlock(&shmem->pages_lock);
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
+index bbe628b306ee3..f8355de6e335d 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
+@@ -360,8 +360,11 @@ int panfrost_gpu_init(struct panfrost_device *pfdev)
+
+ panfrost_gpu_init_features(pfdev);
+
+- dma_set_mask_and_coherent(pfdev->dev,
++ err = dma_set_mask_and_coherent(pfdev->dev,
+ DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features)));
++ if (err)
++ return err;
++
+ dma_set_max_seg_size(pfdev->dev, UINT_MAX);
+
+ irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu");
+diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+index dfe5f1d297636..b5ee076c2841f 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+@@ -253,7 +253,7 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+ if (pm_runtime_active(pfdev->dev))
+ mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
+
+- pm_runtime_put_sync_autosuspend(pfdev->dev);
++ pm_runtime_put_autosuspend(pfdev->dev);
+ }
+
+ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
+@@ -288,7 +288,8 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
+ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
+ {
+ struct panfrost_gem_object *bo = mapping->obj;
+- struct drm_gem_object *obj = &bo->base.base;
++ struct drm_gem_shmem_object *shmem = &bo->base;
++ struct drm_gem_object *obj = &shmem->base;
+ struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
+ struct sg_table *sgt;
+ int prot = IOMMU_READ | IOMMU_WRITE;
+@@ -299,7 +300,7 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
+ if (bo->noexec)
+ prot |= IOMMU_NOEXEC;
+
+- sgt = drm_gem_shmem_get_pages_sgt(obj);
++ sgt = drm_gem_shmem_get_pages_sgt(shmem);
+ if (WARN_ON(IS_ERR(sgt)))
+ return PTR_ERR(sgt);
+
+@@ -468,6 +469,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
+ if (IS_ERR(pages[i])) {
+ mutex_unlock(&bo->base.pages_lock);
+ ret = PTR_ERR(pages[i]);
++ pages[i] = NULL;
+ goto err_pages;
+ }
+ }
+@@ -501,7 +503,7 @@ err_map:
+ err_pages:
+ drm_gem_shmem_put_pages(&bo->base);
+ err_bo:
+- drm_gem_object_put(&bo->base.base);
++ panfrost_gem_mapping_put(bomapping);
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
+index 5ab03d605f57b..9d9c067c1d705 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
++++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
+@@ -105,7 +105,7 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
+ goto err_close_bo;
+ }
+
+- ret = drm_gem_shmem_vmap(&bo->base, &map);
++ ret = drm_gem_shmem_vmap(bo, &map);
+ if (ret)
+ goto err_put_mapping;
+ perfcnt->buf = map.vaddr;
+@@ -164,7 +164,7 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
+ return 0;
+
+ err_vunmap:
+- drm_gem_shmem_vunmap(&bo->base, &map);
++ drm_gem_shmem_vunmap(bo, &map);
+ err_put_mapping:
+ panfrost_gem_mapping_put(perfcnt->mapping);
+ err_close_bo:
+@@ -194,7 +194,7 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
+ GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
+
+ perfcnt->user = NULL;
+- drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base.base, &map);
++ drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base, &map);
+ perfcnt->buf = NULL;
+ panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
+ panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
+diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c
+index bdd883f4f0da5..963a5d5e6987a 100644
+--- a/drivers/gpu/drm/pl111/pl111_versatile.c
++++ b/drivers/gpu/drm/pl111/pl111_versatile.c
+@@ -402,6 +402,7 @@ static int pl111_vexpress_clcd_init(struct device *dev, struct device_node *np,
+ if (of_device_is_compatible(child, "arm,pl111")) {
+ has_coretile_clcd = true;
+ ct_clcd = child;
++ of_node_put(child);
+ break;
+ }
+ if (of_device_is_compatible(child, "arm,hdlcd")) {
+diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
+index 359266d9e8604..f0f512d584976 100644
+--- a/drivers/gpu/drm/qxl/qxl_drv.h
++++ b/drivers/gpu/drm/qxl/qxl_drv.h
+@@ -318,7 +318,7 @@ int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
+ u32 domain,
+ size_t size,
+ struct qxl_surface *surf,
+- struct qxl_bo **qobj,
++ struct drm_gem_object **gobj,
+ uint32_t *handle);
+ void qxl_gem_object_free(struct drm_gem_object *gobj);
+ int qxl_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv);
+diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c
+index d636ba6854513..17df5c7ccf691 100644
+--- a/drivers/gpu/drm/qxl/qxl_dumb.c
++++ b/drivers/gpu/drm/qxl/qxl_dumb.c
+@@ -34,6 +34,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
+ {
+ struct qxl_device *qdev = to_qxl(dev);
+ struct qxl_bo *qobj;
++ struct drm_gem_object *gobj;
+ uint32_t handle;
+ int r;
+ struct qxl_surface surf;
+@@ -62,11 +63,13 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
+
+ r = qxl_gem_object_create_with_handle(qdev, file_priv,
+ QXL_GEM_DOMAIN_CPU,
+- args->size, &surf, &qobj,
++ args->size, &surf, &gobj,
+ &handle);
+ if (r)
+ return r;
++ qobj = gem_to_qxl_bo(gobj);
+ qobj->is_dumb = true;
++ drm_gem_object_put(gobj);
+ args->pitch = pitch;
+ args->handle = handle;
+ return 0;
+diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c
+index a08da0bd9098b..fc5e3763c3595 100644
+--- a/drivers/gpu/drm/qxl/qxl_gem.c
++++ b/drivers/gpu/drm/qxl/qxl_gem.c
+@@ -72,32 +72,41 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size,
+ return 0;
+ }
+
++/*
++ * If the caller passed a valid gobj pointer, it is responsible to call
++ * drm_gem_object_put() when it no longer needs to acess the object.
++ *
++ * If gobj is NULL, it is handled internally.
++ */
+ int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
+ struct drm_file *file_priv,
+ u32 domain,
+ size_t size,
+ struct qxl_surface *surf,
+- struct qxl_bo **qobj,
++ struct drm_gem_object **gobj,
+ uint32_t *handle)
+ {
+- struct drm_gem_object *gobj;
+ int r;
++ struct drm_gem_object *local_gobj;
+
+- BUG_ON(!qobj);
+ BUG_ON(!handle);
+
+ r = qxl_gem_object_create(qdev, size, 0,
+ domain,
+ false, false, surf,
+- &gobj);
++ &local_gobj);
+ if (r)
+ return -ENOMEM;
+- r = drm_gem_handle_create(file_priv, gobj, handle);
++ r = drm_gem_handle_create(file_priv, local_gobj, handle);
+ if (r)
+ return r;
+- /* drop reference from allocate - handle holds it now */
+- *qobj = gem_to_qxl_bo(gobj);
+- drm_gem_object_put(gobj);
++
++ if (gobj)
++ *gobj = local_gobj;
++ else
++ /* drop reference from allocate - handle holds it now */
++ drm_gem_object_put(local_gobj);
++
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
+index 38aabcbe22382..4066499ca79e0 100644
+--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
++++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
+@@ -39,7 +39,6 @@ static int qxl_alloc_ioctl(struct drm_device *dev, void *data,
+ struct qxl_device *qdev = to_qxl(dev);
+ struct drm_qxl_alloc *qxl_alloc = data;
+ int ret;
+- struct qxl_bo *qobj;
+ uint32_t handle;
+ u32 domain = QXL_GEM_DOMAIN_VRAM;
+
+@@ -51,7 +50,7 @@ static int qxl_alloc_ioctl(struct drm_device *dev, void *data,
+ domain,
+ qxl_alloc->size,
+ NULL,
+- &qobj, &handle);
++ NULL, &handle);
+ if (ret) {
+ DRM_ERROR("%s: failed to create gem ret=%d\n",
+ __func__, ret);
+@@ -393,7 +392,6 @@ static int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data,
+ {
+ struct qxl_device *qdev = to_qxl(dev);
+ struct drm_qxl_alloc_surf *param = data;
+- struct qxl_bo *qobj;
+ int handle;
+ int ret;
+ int size, actual_stride;
+@@ -413,7 +411,7 @@ static int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data,
+ QXL_GEM_DOMAIN_SURFACE,
+ size,
+ &surf,
+- &qobj, &handle);
++ NULL, &handle);
+ if (ret) {
+ DRM_ERROR("%s: failed to create gem ret=%d\n",
+ __func__, ret);
+diff --git a/drivers/gpu/drm/radeon/.gitignore b/drivers/gpu/drm/radeon/.gitignore
+index 9c1a941539836..d8777383a64aa 100644
+--- a/drivers/gpu/drm/radeon/.gitignore
++++ b/drivers/gpu/drm/radeon/.gitignore
+@@ -1,4 +1,4 @@
+-# SPDX-License-Identifier: GPL-2.0-only
++# SPDX-License-Identifier: MIT
+ mkregtable
+ *_reg_safe.h
+
+diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
+index 6f60f4840cc58..52819e7f1fca1 100644
+--- a/drivers/gpu/drm/radeon/Kconfig
++++ b/drivers/gpu/drm/radeon/Kconfig
+@@ -1,4 +1,4 @@
+-# SPDX-License-Identifier: GPL-2.0-only
++# SPDX-License-Identifier: MIT
+ config DRM_RADEON_USERPTR
+ bool "Always enable userptr support"
+ depends on DRM_RADEON
+diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
+index 11c97edde54dd..3d502f1bbfcbe 100644
+--- a/drivers/gpu/drm/radeon/Makefile
++++ b/drivers/gpu/drm/radeon/Makefile
+@@ -1,4 +1,4 @@
+-# SPDX-License-Identifier: GPL-2.0
++# SPDX-License-Identifier: MIT
+ #
+ # Makefile for the drm device driver. This driver provides support for the
+ # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
+index 0fce73b9a6469..2b44911372178 100644
+--- a/drivers/gpu/drm/radeon/atombios_encoders.c
++++ b/drivers/gpu/drm/radeon/atombios_encoders.c
+@@ -198,7 +198,8 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder,
+ * so don't register a backlight device
+ */
+ if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
+- (rdev->pdev->device == 0x6741))
++ (rdev->pdev->device == 0x6741) &&
++ !dmi_match(DMI_PRODUCT_NAME, "iMac12,1"))
+ return;
+
+ if (!radeon_encoder->enc_priv)
+@@ -2187,11 +2188,12 @@ int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder, int fe_idx)
+
+ /*
+ * On DCE32 any encoder can drive any block so usually just use crtc id,
+- * but Apple thinks different at least on iMac10,1, so there use linkb,
++ * but Apple thinks different at least on iMac10,1 and iMac11,2, so there use linkb,
+ * otherwise the internal eDP panel will stay dark.
+ */
+ if (ASIC_IS_DCE32(rdev)) {
+- if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1"))
++ if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1") ||
++ dmi_match(DMI_PRODUCT_NAME, "iMac11,2"))
+ enc_idx = (dig->linkb) ? 1 : 0;
+ else
+ enc_idx = radeon_crtc->crtc_id;
+diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
+index f0cfb58da4672..4f93cc81ca7a0 100644
+--- a/drivers/gpu/drm/radeon/ci_dpm.c
++++ b/drivers/gpu/drm/radeon/ci_dpm.c
+@@ -5520,6 +5520,7 @@ static int ci_parse_power_table(struct radeon_device *rdev)
+ u8 frev, crev;
+ u8 *power_state_offset;
+ struct ci_ps *ps;
++ int ret;
+
+ if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset))
+@@ -5549,11 +5550,15 @@ static int ci_parse_power_table(struct radeon_device *rdev)
+ non_clock_array_index = power_state->v2.nonClockInfoIndex;
+ non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
+ &non_clock_info_array->nonClockInfo[non_clock_array_index];
+- if (!rdev->pm.power_state[i].clock_info)
+- return -EINVAL;
++ if (!rdev->pm.power_state[i].clock_info) {
++ ret = -EINVAL;
++ goto err_free_ps;
++ }
+ ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL);
+- if (ps == NULL)
+- return -ENOMEM;
++ if (ps == NULL) {
++ ret = -ENOMEM;
++ goto err_free_ps;
++ }
+ rdev->pm.dpm.ps[i].ps_priv = ps;
+ ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
+ non_clock_info,
+@@ -5593,6 +5598,12 @@ static int ci_parse_power_table(struct radeon_device *rdev)
+ }
+
+ return 0;
++
++err_free_ps:
++ for (i = 0; i < rdev->pm.dpm.num_ps; i++)
++ kfree(rdev->pm.dpm.ps[i].ps_priv);
++ kfree(rdev->pm.dpm.ps);
++ return ret;
+ }
+
+ static int ci_get_vbios_boot_values(struct radeon_device *rdev,
+@@ -5681,25 +5692,26 @@ int ci_dpm_init(struct radeon_device *rdev)
+
+ ret = ci_get_vbios_boot_values(rdev, &pi->vbios_boot_state);
+ if (ret) {
+- ci_dpm_fini(rdev);
++ kfree(rdev->pm.dpm.priv);
+ return ret;
+ }
+
+ ret = r600_get_platform_caps(rdev);
+ if (ret) {
+- ci_dpm_fini(rdev);
++ kfree(rdev->pm.dpm.priv);
+ return ret;
+ }
+
+ ret = r600_parse_extended_power_table(rdev);
+ if (ret) {
+- ci_dpm_fini(rdev);
++ kfree(rdev->pm.dpm.priv);
+ return ret;
+ }
+
+ ret = ci_parse_power_table(rdev);
+ if (ret) {
+- ci_dpm_fini(rdev);
++ kfree(rdev->pm.dpm.priv);
++ r600_free_extended_power_table(rdev);
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
+index 81b4de7be9f2b..a42f29b6ed7cf 100644
+--- a/drivers/gpu/drm/radeon/cik.c
++++ b/drivers/gpu/drm/radeon/cik.c
+@@ -9534,17 +9534,8 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &bridge_cfg);
+- pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
+- &gpu_cfg);
+-
+- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
+-
+- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+ max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+@@ -9591,21 +9582,14 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
+ msleep(100);
+
+ /* linkctl */
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+- tmp16);
+-
+- pcie_capability_read_word(rdev->pdev,
+- PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(rdev->pdev,
+- PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ bridge_cfg &
++ PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ gpu_cfg &
++ PCI_EXP_LNKCTL_HAWD);
+
+ /* linkctl2 */
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
+index fdddbbaecbb74..72a0768df00f7 100644
+--- a/drivers/gpu/drm/radeon/cypress_dpm.c
++++ b/drivers/gpu/drm/radeon/cypress_dpm.c
+@@ -557,8 +557,12 @@ static int cypress_populate_mclk_value(struct radeon_device *rdev,
+ ASIC_INTERNAL_MEMORY_SS, vco_freq)) {
+ u32 reference_clock = rdev->clock.mpll.reference_freq;
+ u32 decoded_ref = rv740_get_decoded_reference_divider(dividers.ref_div);
+- u32 clk_s = reference_clock * 5 / (decoded_ref * ss.rate);
+- u32 clk_v = ss.percentage *
++ u32 clk_s, clk_v;
++
++ if (!decoded_ref)
++ return -EINVAL;
++ clk_s = reference_clock * 5 / (decoded_ref * ss.rate);
++ clk_v = ss.percentage *
+ (0x4000 * dividers.whole_fb_div + 0x800 * dividers.frac_fb_div) / (clk_s * 625);
+
+ mpll_ss1 &= ~CLKV_MASK;
+diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
+index 769f666335ac4..3e1c1a392fb7b 100644
+--- a/drivers/gpu/drm/radeon/ni_dpm.c
++++ b/drivers/gpu/drm/radeon/ni_dpm.c
+@@ -2241,8 +2241,12 @@ static int ni_populate_mclk_value(struct radeon_device *rdev,
+ ASIC_INTERNAL_MEMORY_SS, vco_freq)) {
+ u32 reference_clock = rdev->clock.mpll.reference_freq;
+ u32 decoded_ref = rv740_get_decoded_reference_divider(dividers.ref_div);
+- u32 clk_s = reference_clock * 5 / (decoded_ref * ss.rate);
+- u32 clk_v = ss.percentage *
++ u32 clk_s, clk_v;
++
++ if (!decoded_ref)
++ return -EINVAL;
++ clk_s = reference_clock * 5 / (decoded_ref * ss.rate);
++ clk_v = ss.percentage *
+ (0x4000 * dividers.whole_fb_div + 0x800 * dividers.frac_fb_div) / (clk_s * 625);
+
+ mpll_ss1 &= ~CLKV_MASK;
+@@ -2741,10 +2745,10 @@ static int ni_set_mc_special_registers(struct radeon_device *rdev,
+ table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
+ }
+ j++;
+- if (j > SMC_NISLANDS_MC_REGISTER_ARRAY_SIZE)
+- return -EINVAL;
+ break;
+ case MC_SEQ_RESERVE_M >> 2:
++ if (j >= SMC_NISLANDS_MC_REGISTER_ARRAY_SIZE)
++ return -EINVAL;
+ temp_reg = RREG32(MC_PMG_CMD_MRS1);
+ table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS1 >> 2;
+ table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
+@@ -2753,8 +2757,6 @@ static int ni_set_mc_special_registers(struct radeon_device *rdev,
+ (temp_reg & 0xffff0000) |
+ (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
+ j++;
+- if (j > SMC_NISLANDS_MC_REGISTER_ARRAY_SIZE)
+- return -EINVAL;
+ break;
+ default:
+ break;
+diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
+index 33121655d50bb..63bdc9f6fc243 100644
+--- a/drivers/gpu/drm/radeon/radeon_bios.c
++++ b/drivers/gpu/drm/radeon/radeon_bios.c
+@@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
+
+ if (!found)
+ return false;
++ pci_dev_put(pdev);
+
+ rdev->bios = kmalloc(size, GFP_KERNEL);
+ if (!rdev->bios) {
+@@ -612,13 +613,14 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
+ acpi_size tbl_size;
+ UEFI_ACPI_VFCT *vfct;
+ unsigned offset;
++ bool r = false;
+
+ if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
+ return false;
+ tbl_size = hdr->length;
+ if (tbl_size < sizeof(UEFI_ACPI_VFCT)) {
+ DRM_ERROR("ACPI VFCT table present but broken (too short #1)\n");
+- return false;
++ goto out;
+ }
+
+ vfct = (UEFI_ACPI_VFCT *)hdr;
+@@ -631,13 +633,13 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
+ offset += sizeof(VFCT_IMAGE_HEADER);
+ if (offset > tbl_size) {
+ DRM_ERROR("ACPI VFCT image header truncated\n");
+- return false;
++ goto out;
+ }
+
+ offset += vhdr->ImageLength;
+ if (offset > tbl_size) {
+ DRM_ERROR("ACPI VFCT image truncated\n");
+- return false;
++ goto out;
+ }
+
+ if (vhdr->ImageLength &&
+@@ -649,15 +651,18 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
+ rdev->bios = kmemdup(&vbios->VbiosContent,
+ vhdr->ImageLength,
+ GFP_KERNEL);
++ if (rdev->bios)
++ r = true;
+
+- if (!rdev->bios)
+- return false;
+- return true;
++ goto out;
+ }
+ }
+
+ DRM_ERROR("ACPI VFCT table present but broken (too short #2)\n");
+- return false;
++
++out:
++ acpi_put_table(hdr);
++ return r;
+ }
+ #else
+ static inline bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
+diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
+index 607ad5620bd99..d157bb9072e86 100644
+--- a/drivers/gpu/drm/radeon/radeon_connectors.c
++++ b/drivers/gpu/drm/radeon/radeon_connectors.c
+@@ -204,7 +204,7 @@ int radeon_get_monitor_bpc(struct drm_connector *connector)
+
+ /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
+ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
+- if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) &&
++ if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) &&
+ (mode_clock * 5/4 <= max_tmds_clock))
+ bpc = 10;
+ else
+@@ -473,6 +473,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
+ native_mode->vdisplay != 0 &&
+ native_mode->clock != 0) {
+ mode = drm_mode_duplicate(dev, native_mode);
++ if (!mode)
++ return NULL;
+ mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+ drm_mode_set_name(mode);
+
+@@ -487,6 +489,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode
+ * simpler.
+ */
+ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
++ if (!mode)
++ return NULL;
+ mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+ DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
+ }
+diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
+index 9ed2b2700e0a5..e5fbe851ed930 100644
+--- a/drivers/gpu/drm/radeon/radeon_cs.c
++++ b/drivers/gpu/drm/radeon/radeon_cs.c
+@@ -270,7 +270,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
+ {
+ struct drm_radeon_cs *cs = data;
+ uint64_t *chunk_array_ptr;
+- unsigned size, i;
++ u64 size;
++ unsigned i;
+ u32 ring = RADEON_CS_RING_GFX;
+ s32 priority = 0;
+
+diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
+index 4f0fbf6674316..1c005e0ddd388 100644
+--- a/drivers/gpu/drm/radeon/radeon_device.c
++++ b/drivers/gpu/drm/radeon/radeon_device.c
+@@ -1022,6 +1022,7 @@ void radeon_atombios_fini(struct radeon_device *rdev)
+ {
+ if (rdev->mode_info.atom_context) {
+ kfree(rdev->mode_info.atom_context->scratch);
++ kfree(rdev->mode_info.atom_context->iio);
+ }
+ kfree(rdev->mode_info.atom_context);
+ rdev->mode_info.atom_context = NULL;
+@@ -1617,6 +1618,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
+ if (r) {
+ /* delay GPU reset to resume */
+ radeon_fence_driver_force_completion(rdev, i);
++ } else {
++ /* finish executing delayed work */
++ flush_delayed_work(&rdev->fence_drv[i].lockup_work);
+ }
+ }
+
+diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
+index 458f92a708879..57218263ef3b1 100644
+--- a/drivers/gpu/drm/radeon/radeon_gem.c
++++ b/drivers/gpu/drm/radeon/radeon_gem.c
+@@ -61,7 +61,7 @@ static vm_fault_t radeon_gem_fault(struct vm_fault *vmf)
+ goto unlock_resv;
+
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+- TTM_BO_VM_NUM_PREFAULT, 1);
++ TTM_BO_VM_NUM_PREFAULT);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ goto unlock_mclk;
+
+@@ -456,7 +456,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+ struct radeon_device *rdev = dev->dev_private;
+ struct drm_radeon_gem_set_domain *args = data;
+ struct drm_gem_object *gobj;
+- struct radeon_bo *robj;
+ int r;
+
+ /* for now if someone requests domain CPU -
+@@ -469,13 +468,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+ up_read(&rdev->exclusive_lock);
+ return -ENOENT;
+ }
+- robj = gem_to_radeon_bo(gobj);
+
+ r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
+
+ drm_gem_object_put(gobj);
+ up_read(&rdev->exclusive_lock);
+- r = radeon_gem_handle_lockup(robj->rdev, r);
++ r = radeon_gem_handle_lockup(rdev, r);
+ return r;
+ }
+
+diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
+index 482fb0ae6cb5d..0e14907f2043e 100644
+--- a/drivers/gpu/drm/radeon/radeon_kms.c
++++ b/drivers/gpu/drm/radeon/radeon_kms.c
+@@ -648,6 +648,8 @@ void radeon_driver_lastclose_kms(struct drm_device *dev)
+ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
+ {
+ struct radeon_device *rdev = dev->dev_private;
++ struct radeon_fpriv *fpriv;
++ struct radeon_vm *vm;
+ int r;
+
+ file_priv->driver_priv = NULL;
+@@ -660,48 +662,52 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
+
+ /* new gpu have virtual address space support */
+ if (rdev->family >= CHIP_CAYMAN) {
+- struct radeon_fpriv *fpriv;
+- struct radeon_vm *vm;
+
+ fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
+ if (unlikely(!fpriv)) {
+ r = -ENOMEM;
+- goto out_suspend;
++ goto err_suspend;
+ }
+
+ if (rdev->accel_working) {
+ vm = &fpriv->vm;
+ r = radeon_vm_init(rdev, vm);
+- if (r) {
+- kfree(fpriv);
+- goto out_suspend;
+- }
++ if (r)
++ goto err_fpriv;
+
+ r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
+- if (r) {
+- radeon_vm_fini(rdev, vm);
+- kfree(fpriv);
+- goto out_suspend;
+- }
++ if (r)
++ goto err_vm_fini;
+
+ /* map the ib pool buffer read only into
+ * virtual address space */
+ vm->ib_bo_va = radeon_vm_bo_add(rdev, vm,
+ rdev->ring_tmp_bo.bo);
++ if (!vm->ib_bo_va) {
++ r = -ENOMEM;
++ goto err_vm_fini;
++ }
++
+ r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va,
+ RADEON_VA_IB_OFFSET,
+ RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_SNOOPED);
+- if (r) {
+- radeon_vm_fini(rdev, vm);
+- kfree(fpriv);
+- goto out_suspend;
+- }
++ if (r)
++ goto err_vm_fini;
+ }
+ file_priv->driver_priv = fpriv;
+ }
+
+-out_suspend:
++ pm_runtime_mark_last_busy(dev->dev);
++ pm_runtime_put_autosuspend(dev->dev);
++ return 0;
++
++err_vm_fini:
++ radeon_vm_fini(rdev, vm);
++err_fpriv:
++ kfree(fpriv);
++
++err_suspend:
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+diff --git a/drivers/gpu/drm/radeon/rv740_dpm.c b/drivers/gpu/drm/radeon/rv740_dpm.c
+index d57a3e1df8d63..4464fd21a3029 100644
+--- a/drivers/gpu/drm/radeon/rv740_dpm.c
++++ b/drivers/gpu/drm/radeon/rv740_dpm.c
+@@ -249,8 +249,12 @@ int rv740_populate_mclk_value(struct radeon_device *rdev,
+ ASIC_INTERNAL_MEMORY_SS, vco_freq)) {
+ u32 reference_clock = rdev->clock.mpll.reference_freq;
+ u32 decoded_ref = rv740_get_decoded_reference_divider(dividers.ref_div);
+- u32 clk_s = reference_clock * 5 / (decoded_ref * ss.rate);
+- u32 clk_v = 0x40000 * ss.percentage *
++ u32 clk_s, clk_v;
++
++ if (!decoded_ref)
++ return -EINVAL;
++ clk_s = reference_clock * 5 / (decoded_ref * ss.rate);
++ clk_v = 0x40000 * ss.percentage *
+ (dividers.whole_fb_div + (dividers.frac_fb_div / 8)) / (clk_s * 10000);
+
+ mpll_ss1 &= ~CLKV_MASK;
+diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
+index 013e44ed0f39a..4679b798a0384 100644
+--- a/drivers/gpu/drm/radeon/si.c
++++ b/drivers/gpu/drm/radeon/si.c
+@@ -7131,17 +7131,8 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &bridge_cfg);
+- pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
+- &gpu_cfg);
+-
+- tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
+-
+- tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+- pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+
+ tmp = RREG32_PCIE(PCIE_LC_STATUS1);
+ max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+@@ -7188,22 +7179,14 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
+ msleep(100);
+
+ /* linkctl */
+- pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(root,
+- PCI_EXP_LNKCTL,
+- tmp16);
+-
+- pcie_capability_read_word(rdev->pdev,
+- PCI_EXP_LNKCTL,
+- &tmp16);
+- tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+- tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+- pcie_capability_write_word(rdev->pdev,
+- PCI_EXP_LNKCTL,
+- tmp16);
++ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ bridge_cfg &
++ PCI_EXP_LNKCTL_HAWD);
++ pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_HAWD,
++ gpu_cfg &
++ PCI_EXP_LNKCTL_HAWD);
+
+ /* linkctl2 */
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
+index b47e74421e347..3e588ddba2457 100644
+--- a/drivers/gpu/drm/rcar-du/Kconfig
++++ b/drivers/gpu/drm/rcar-du/Kconfig
+@@ -4,8 +4,6 @@ config DRM_RCAR_DU
+ depends on DRM && OF
+ depends on ARM || ARM64
+ depends on ARCH_RENESAS || COMPILE_TEST
+- imply DRM_RCAR_CMM
+- imply DRM_RCAR_LVDS
+ select DRM_KMS_HELPER
+ select DRM_KMS_CMA_HELPER
+ select DRM_GEM_CMA_HELPER
+@@ -14,13 +12,17 @@ config DRM_RCAR_DU
+ Choose this option if you have an R-Car chipset.
+ If M is selected the module will be called rcar-du-drm.
+
+-config DRM_RCAR_CMM
+- tristate "R-Car DU Color Management Module (CMM) Support"
+- depends on DRM && OF
++config DRM_RCAR_USE_CMM
++ bool "R-Car DU Color Management Module (CMM) Support"
+ depends on DRM_RCAR_DU
++ default DRM_RCAR_DU
+ help
+ Enable support for R-Car Color Management Module (CMM).
+
++config DRM_RCAR_CMM
++ def_tristate DRM_RCAR_DU
++ depends on DRM_RCAR_USE_CMM
++
+ config DRM_RCAR_DW_HDMI
+ tristate "R-Car Gen3 and RZ/G2 DU HDMI Encoder Support"
+ depends on DRM && OF
+@@ -28,15 +30,20 @@ config DRM_RCAR_DW_HDMI
+ help
+ Enable support for R-Car Gen3 or RZ/G2 internal HDMI encoder.
+
++config DRM_RCAR_USE_LVDS
++ bool "R-Car DU LVDS Encoder Support"
++ depends on DRM_BRIDGE && OF
++ default DRM_RCAR_DU
++ help
++ Enable support for the R-Car Display Unit embedded LVDS encoders.
++
+ config DRM_RCAR_LVDS
+- tristate "R-Car DU LVDS Encoder Support"
+- depends on DRM && DRM_BRIDGE && OF
++ def_tristate DRM_RCAR_DU
++ depends on DRM_RCAR_USE_LVDS
+ select DRM_KMS_HELPER
+ select DRM_PANEL
+ select OF_FLATTREE
+ select OF_OVERLAY
+- help
+- Enable support for the R-Car Display Unit embedded LVDS encoders.
+
+ config DRM_RCAR_VSP
+ bool "R-Car DU VSP Compositor Support" if ARM
+diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+index ea7e39d035457..ee7e375ee6724 100644
+--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
++++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+@@ -215,6 +215,7 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc)
+ const struct drm_display_mode *mode = &rcrtc->crtc.state->adjusted_mode;
+ struct rcar_du_device *rcdu = rcrtc->dev;
+ unsigned long mode_clock = mode->clock * 1000;
++ unsigned int hdse_offset;
+ u32 dsmr;
+ u32 escr;
+
+@@ -298,10 +299,15 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc)
+ | DSMR_DIPM_DISP | DSMR_CSPM;
+ rcar_du_crtc_write(rcrtc, DSMR, dsmr);
+
++ hdse_offset = 19;
++ if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2))
++ hdse_offset += 25;
++
+ /* Display timings */
+- rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start - 19);
++ rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start -
++ hdse_offset);
+ rcar_du_crtc_write(rcrtc, HDER, mode->htotal - mode->hsync_start +
+- mode->hdisplay - 19);
++ mode->hdisplay - hdse_offset);
+ rcar_du_crtc_write(rcrtc, HSWR, mode->hsync_end -
+ mode->hsync_start - 1);
+ rcar_du_crtc_write(rcrtc, HCR, mode->htotal - 1);
+@@ -836,6 +842,7 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc,
+ struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
+ struct rcar_du_device *rcdu = rcrtc->dev;
+ bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE;
++ unsigned int min_sync_porch;
+ unsigned int vbp;
+
+ if (interlaced && !rcar_du_has(rcdu, RCAR_DU_FEATURE_INTERLACED))
+@@ -843,9 +850,14 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc,
+
+ /*
+ * The hardware requires a minimum combined horizontal sync and back
+- * porch of 20 pixels and a minimum vertical back porch of 3 lines.
++ * porch of 20 pixels (when CMM isn't used) or 45 pixels (when CMM is
++ * used), and a minimum vertical back porch of 3 lines.
+ */
+- if (mode->htotal - mode->hsync_start < 20)
++ min_sync_porch = 20;
++ if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2))
++ min_sync_porch += 25;
++
++ if (mode->htotal - mode->hsync_start < min_sync_porch)
+ return MODE_HBLANK_NARROW;
+
+ vbp = (mode->vtotal - mode->vsync_end) / (interlaced ? 2 : 1);
+diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+index 4bf4e25d7f011..6bc0c4e6cd965 100644
+--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
++++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+@@ -109,8 +109,8 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
+ renc = drmm_encoder_alloc(&rcdu->ddev, struct rcar_du_encoder, base,
+ &rcar_du_encoder_funcs, DRM_MODE_ENCODER_NONE,
+ NULL);
+- if (!renc)
+- return -ENOMEM;
++ if (IS_ERR(renc))
++ return PTR_ERR(renc);
+
+ renc->output = output;
+
+diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+index ade2327a10e2c..512581698a1e0 100644
+--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
++++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+@@ -398,7 +398,15 @@ static int rockchip_dp_probe(struct platform_device *pdev)
+ if (IS_ERR(dp->adp))
+ return PTR_ERR(dp->adp);
+
+- return component_add(dev, &rockchip_dp_component_ops);
++ ret = component_add(dev, &rockchip_dp_component_ops);
++ if (ret)
++ goto err_dp_remove;
++
++ return 0;
++
++err_dp_remove:
++ analogix_dp_remove(dp->adp);
++ return ret;
+ }
+
+ static int rockchip_dp_remove(struct platform_device *pdev)
+diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
+index 13c6b857158fc..20e63cadec8c7 100644
+--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
++++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
+@@ -277,8 +277,9 @@ static int cdn_dp_connector_get_modes(struct drm_connector *connector)
+ return ret;
+ }
+
+-static int cdn_dp_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
++static enum drm_mode_status
++cdn_dp_connector_mode_valid(struct drm_connector *connector,
++ struct drm_display_mode *mode)
+ {
+ struct cdn_dp_device *dp = connector_to_dp(connector);
+ struct drm_display_info *display_info = &dp->connector.display_info;
+@@ -564,7 +565,7 @@ static void cdn_dp_encoder_mode_set(struct drm_encoder *encoder,
+ video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC);
+ video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC);
+
+- memcpy(&dp->mode, adjusted, sizeof(*mode));
++ drm_mode_copy(&dp->mode, adjusted);
+ }
+
+ static bool cdn_dp_check_link_status(struct cdn_dp_device *dp)
+diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+index a2262bee5aa47..1129f98fe7f94 100644
+--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
++++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+@@ -268,6 +268,8 @@ struct dw_mipi_dsi_rockchip {
+ struct dw_mipi_dsi *dmd;
+ const struct rockchip_dw_dsi_chip_data *cdata;
+ struct dw_mipi_dsi_plat_data pdata;
++
++ bool dsi_bound;
+ };
+
+ struct dphy_pll_parameter_map {
+@@ -773,10 +775,6 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
+ if (mux < 0)
+ return;
+
+- pm_runtime_get_sync(dsi->dev);
+- if (dsi->slave)
+- pm_runtime_get_sync(dsi->slave->dev);
+-
+ /*
+ * For the RK3399, the clk of grf must be enabled before writing grf
+ * register. And for RK3288 or other soc, this grf_clk must be NULL,
+@@ -795,20 +793,10 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
+ clk_disable_unprepare(dsi->grf_clk);
+ }
+
+-static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
+-{
+- struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder);
+-
+- if (dsi->slave)
+- pm_runtime_put(dsi->slave->dev);
+- pm_runtime_put(dsi->dev);
+-}
+-
+ static const struct drm_encoder_helper_funcs
+ dw_mipi_dsi_encoder_helper_funcs = {
+ .atomic_check = dw_mipi_dsi_encoder_atomic_check,
+ .enable = dw_mipi_dsi_encoder_enable,
+- .disable = dw_mipi_dsi_encoder_disable,
+ };
+
+ static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
+@@ -938,10 +926,14 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
+ put_device(second);
+ }
+
++ pm_runtime_get_sync(dsi->dev);
++ if (dsi->slave)
++ pm_runtime_get_sync(dsi->slave->dev);
++
+ ret = clk_prepare_enable(dsi->pllref_clk);
+ if (ret) {
+ DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret);
+- return ret;
++ goto out_pm_runtime;
+ }
+
+ /*
+@@ -953,7 +945,7 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
+ ret = clk_prepare_enable(dsi->grf_clk);
+ if (ret) {
+ DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret);
+- return ret;
++ goto out_pll_clk;
+ }
+
+ dw_mipi_dsi_rockchip_config(dsi);
+@@ -965,16 +957,27 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
+ ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev);
+ if (ret) {
+ DRM_DEV_ERROR(dev, "Failed to create drm encoder\n");
+- return ret;
++ goto out_pll_clk;
+ }
+
+ ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder);
+ if (ret) {
+ DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret);
+- return ret;
++ goto out_pll_clk;
+ }
+
++ dsi->dsi_bound = true;
++
+ return 0;
++
++out_pll_clk:
++ clk_disable_unprepare(dsi->pllref_clk);
++out_pm_runtime:
++ pm_runtime_put(dsi->dev);
++ if (dsi->slave)
++ pm_runtime_put(dsi->slave->dev);
++
++ return ret;
+ }
+
+ static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
+@@ -986,9 +989,15 @@ static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
+ if (dsi->is_slave)
+ return;
+
++ dsi->dsi_bound = false;
++
+ dw_mipi_dsi_unbind(dsi->dmd);
+
+ clk_disable_unprepare(dsi->pllref_clk);
++
++ pm_runtime_put(dsi->dev);
++ if (dsi->slave)
++ pm_runtime_put(dsi->slave->dev);
+ }
+
+ static const struct component_ops dw_mipi_dsi_rockchip_ops = {
+@@ -1018,23 +1027,31 @@ static int dw_mipi_dsi_rockchip_host_attach(void *priv_data,
+ if (ret) {
+ DRM_DEV_ERROR(dsi->dev, "Failed to register component: %d\n",
+ ret);
+- return ret;
++ goto out;
+ }
+
+ second = dw_mipi_dsi_rockchip_find_second(dsi);
+- if (IS_ERR(second))
+- return PTR_ERR(second);
++ if (IS_ERR(second)) {
++ ret = PTR_ERR(second);
++ goto out;
++ }
+ if (second) {
+ ret = component_add(second, &dw_mipi_dsi_rockchip_ops);
+ if (ret) {
+ DRM_DEV_ERROR(second,
+ "Failed to register component: %d\n",
+ ret);
+- return ret;
++ goto out;
+ }
+ }
+
+ return 0;
++
++out:
++ mutex_lock(&dsi->usage_mutex);
++ dsi->usage_mode = DW_DSI_USAGE_IDLE;
++ mutex_unlock(&dsi->usage_mutex);
++ return ret;
+ }
+
+ static int dw_mipi_dsi_rockchip_host_detach(void *priv_data,
+@@ -1276,6 +1293,36 @@ static const struct phy_ops dw_mipi_dsi_dphy_ops = {
+ .exit = dw_mipi_dsi_dphy_exit,
+ };
+
++static int __maybe_unused dw_mipi_dsi_rockchip_resume(struct device *dev)
++{
++ struct dw_mipi_dsi_rockchip *dsi = dev_get_drvdata(dev);
++ int ret;
++
++ /*
++ * Re-configure DSI state, if we were previously initialized. We need
++ * to do this before rockchip_drm_drv tries to re-enable() any panels.
++ */
++ if (dsi->dsi_bound) {
++ ret = clk_prepare_enable(dsi->grf_clk);
++ if (ret) {
++ DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret);
++ return ret;
++ }
++
++ dw_mipi_dsi_rockchip_config(dsi);
++ if (dsi->slave)
++ dw_mipi_dsi_rockchip_config(dsi->slave);
++
++ clk_disable_unprepare(dsi->grf_clk);
++ }
++
++ return 0;
++}
++
++static const struct dev_pm_ops dw_mipi_dsi_rockchip_pm_ops = {
++ SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, dw_mipi_dsi_rockchip_resume)
++};
++
+ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev)
+ {
+ struct device *dev = &pdev->dev;
+@@ -1397,14 +1444,10 @@ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev)
+ if (ret != -EPROBE_DEFER)
+ DRM_DEV_ERROR(dev,
+ "Failed to probe dw_mipi_dsi: %d\n", ret);
+- goto err_clkdisable;
++ return ret;
+ }
+
+ return 0;
+-
+-err_clkdisable:
+- clk_disable_unprepare(dsi->pllref_clk);
+- return ret;
+ }
+
+ static int dw_mipi_dsi_rockchip_remove(struct platform_device *pdev)
+@@ -1593,6 +1636,13 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = {
+ .remove = dw_mipi_dsi_rockchip_remove,
+ .driver = {
+ .of_match_table = dw_mipi_dsi_rockchip_dt_ids,
++ .pm = &dw_mipi_dsi_rockchip_pm_ops,
+ .name = "dw-mipi-dsi-rockchip",
++ /*
++ * For dual-DSI display, one DSI pokes at the other DSI's
++ * drvdata in dw_mipi_dsi_rockchip_find_second(). This is not
++ * safe for asynchronous probe.
++ */
++ .probe_type = PROBE_FORCE_SYNCHRONOUS,
+ },
+ };
+diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+index 830bdd5e9b7ce..8677c82716784 100644
+--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
++++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+@@ -529,13 +529,6 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
+ return ret;
+ }
+
+- ret = clk_prepare_enable(hdmi->vpll_clk);
+- if (ret) {
+- DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n",
+- ret);
+- return ret;
+- }
+-
+ hdmi->phy = devm_phy_optional_get(dev, "hdmi");
+ if (IS_ERR(hdmi->phy)) {
+ ret = PTR_ERR(hdmi->phy);
+@@ -544,6 +537,13 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
+ return ret;
+ }
+
++ ret = clk_prepare_enable(hdmi->vpll_clk);
++ if (ret) {
++ DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n",
++ ret);
++ return ret;
++ }
++
+ drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
+ drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
+
+diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
+index 7afdc54eb3ec1..78120da5e63aa 100644
+--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
++++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
+@@ -488,7 +488,7 @@ static void inno_hdmi_encoder_mode_set(struct drm_encoder *encoder,
+ inno_hdmi_setup(hdmi, adj_mode);
+
+ /* Store the display mode for plugin/DPMS poweron events */
+- memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode));
++ drm_mode_copy(&hdmi->previous_mode, adj_mode);
+ }
+
+ static void inno_hdmi_encoder_enable(struct drm_encoder *encoder)
+diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+index 1c546c3a89984..17e7c40a9e7b9 100644
+--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+@@ -383,7 +383,7 @@ rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder,
+ struct rk3066_hdmi *hdmi = to_rk3066_hdmi(encoder);
+
+ /* Store the display mode for plugin/DPMS poweron events. */
+- memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode));
++ drm_mode_copy(&hdmi->previous_mode, adj_mode);
+ }
+
+ static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder)
+diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+index 7971f57436dd7..3b18b6a7acd3e 100644
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+@@ -251,9 +251,6 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj,
+ else
+ ret = rockchip_drm_gem_object_mmap_dma(obj, vma);
+
+- if (ret)
+- drm_gem_vm_close(vma);
+-
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+index a25b98b7f5bd7..cfe13b203b891 100644
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+@@ -703,13 +703,13 @@ static void vop_crtc_atomic_disable(struct drm_crtc *crtc,
+ if (crtc->state->self_refresh_active)
+ rockchip_drm_set_win_enabled(crtc, false);
+
++ if (crtc->state->self_refresh_active)
++ goto out;
++
+ mutex_lock(&vop->vop_lock);
+
+ drm_crtc_vblank_off(crtc);
+
+- if (crtc->state->self_refresh_active)
+- goto out;
+-
+ /*
+ * Vop standby will take effect at end of current frame,
+ * if dsp hold valid irq happen, it means standby complete.
+@@ -741,9 +741,9 @@ static void vop_crtc_atomic_disable(struct drm_crtc *crtc,
+ vop_core_clks_disable(vop);
+ pm_runtime_put(vop->dev);
+
+-out:
+ mutex_unlock(&vop->vop_lock);
+
++out:
+ if (crtc->state->event && !crtc->state->active) {
+ spin_lock_irq(&crtc->dev->event_lock);
+ drm_crtc_send_vblank_event(crtc, crtc->state->event);
+@@ -820,12 +820,12 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
+ * need align with 2 pixel.
+ */
+ if (fb->format->is_yuv && ((new_plane_state->src.x1 >> 16) % 2)) {
+- DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n");
++ DRM_DEBUG_KMS("Invalid Source: Yuv format not support odd xpos\n");
+ return -EINVAL;
+ }
+
+ if (fb->format->is_yuv && new_plane_state->rotation & DRM_MODE_REFLECT_Y) {
+- DRM_ERROR("Invalid Source: Yuv format does not support this rotation\n");
++ DRM_DEBUG_KMS("Invalid Source: Yuv format does not support this rotation\n");
+ return -EINVAL;
+ }
+
+@@ -833,7 +833,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
+ struct vop *vop = to_vop(crtc);
+
+ if (!vop->data->afbc) {
+- DRM_ERROR("vop does not support AFBC\n");
++ DRM_DEBUG_KMS("vop does not support AFBC\n");
+ return -EINVAL;
+ }
+
+@@ -842,15 +842,16 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
+ return ret;
+
+ if (new_plane_state->src.x1 || new_plane_state->src.y1) {
+- DRM_ERROR("AFBC does not support offset display, xpos=%d, ypos=%d, offset=%d\n",
+- new_plane_state->src.x1,
+- new_plane_state->src.y1, fb->offsets[0]);
++ DRM_DEBUG_KMS("AFBC does not support offset display, " \
++ "xpos=%d, ypos=%d, offset=%d\n",
++ new_plane_state->src.x1, new_plane_state->src.y1,
++ fb->offsets[0]);
+ return -EINVAL;
+ }
+
+ if (new_plane_state->rotation && new_plane_state->rotation != DRM_MODE_ROTATE_0) {
+- DRM_ERROR("No rotation support in AFBC, rotation=%d\n",
+- new_plane_state->rotation);
++ DRM_DEBUG_KMS("No rotation support in AFBC, rotation=%d\n",
++ new_plane_state->rotation);
+ return -EINVAL;
+ }
+ }
+@@ -1550,6 +1551,9 @@ static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
+ {
+ struct rockchip_crtc_state *rockchip_state;
+
++ if (WARN_ON(!crtc->state))
++ return NULL;
++
+ rockchip_state = kzalloc(sizeof(*rockchip_state), GFP_KERNEL);
+ if (!rockchip_state)
+ return NULL;
+@@ -2116,10 +2120,10 @@ static int vop_bind(struct device *dev, struct device *master, void *data)
+ vop_win_init(vop);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- vop->len = resource_size(res);
+ vop->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(vop->regs))
+ return PTR_ERR(vop->regs);
++ vop->len = resource_size(res);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (res) {
+diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c
+index 551653940e396..2550429df49fe 100644
+--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
++++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
+@@ -145,7 +145,7 @@ static int rk3288_lvds_poweron(struct rockchip_lvds *lvds)
+ DRM_DEV_ERROR(lvds->dev, "failed to enable lvds pclk %d\n", ret);
+ return ret;
+ }
+- ret = pm_runtime_get_sync(lvds->dev);
++ ret = pm_runtime_resume_and_get(lvds->dev);
+ if (ret < 0) {
+ DRM_DEV_ERROR(lvds->dev, "failed to get pm runtime: %d\n", ret);
+ clk_disable(lvds->pclk);
+@@ -329,16 +329,20 @@ static int px30_lvds_poweron(struct rockchip_lvds *lvds)
+ {
+ int ret;
+
+- ret = pm_runtime_get_sync(lvds->dev);
++ ret = pm_runtime_resume_and_get(lvds->dev);
+ if (ret < 0) {
+ DRM_DEV_ERROR(lvds->dev, "failed to get pm runtime: %d\n", ret);
+ return ret;
+ }
+
+ /* Enable LVDS mode */
+- return regmap_update_bits(lvds->grf, PX30_LVDS_GRF_PD_VO_CON1,
++ ret = regmap_update_bits(lvds->grf, PX30_LVDS_GRF_PD_VO_CON1,
+ PX30_LVDS_MODE_EN(1) | PX30_LVDS_P2S_EN(1),
+ PX30_LVDS_MODE_EN(1) | PX30_LVDS_P2S_EN(1));
++ if (ret)
++ pm_runtime_put(lvds->dev);
++
++ return ret;
+ }
+
+ static void px30_lvds_poweroff(struct rockchip_lvds *lvds)
+diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+index ca7cc82125cbc..8c873fcd0e99f 100644
+--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+@@ -902,6 +902,7 @@ static const struct vop_win_phy rk3399_win01_data = {
+ .enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
+ .format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
+ .rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
++ .x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21),
+ .y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22),
+ .act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0),
+ .dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0),
+@@ -912,6 +913,7 @@ static const struct vop_win_phy rk3399_win01_data = {
+ .uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16),
+ .src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0),
+ .dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0),
++ .channel = VOP_REG(RK3288_WIN0_CTRL2, 0xff, 0),
+ };
+
+ /*
+@@ -922,11 +924,11 @@ static const struct vop_win_phy rk3399_win01_data = {
+ static const struct vop_win_data rk3399_vop_win_data[] = {
+ { .base = 0x00, .phy = &rk3399_win01_data,
+ .type = DRM_PLANE_TYPE_PRIMARY },
+- { .base = 0x40, .phy = &rk3288_win01_data,
++ { .base = 0x40, .phy = &rk3368_win01_data,
+ .type = DRM_PLANE_TYPE_OVERLAY },
+- { .base = 0x00, .phy = &rk3288_win23_data,
++ { .base = 0x00, .phy = &rk3368_win23_data,
+ .type = DRM_PLANE_TYPE_OVERLAY },
+- { .base = 0x50, .phy = &rk3288_win23_data,
++ { .base = 0x50, .phy = &rk3368_win23_data,
+ .type = DRM_PLANE_TYPE_CURSOR },
+ };
+
+diff --git a/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c b/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c
+index 6b4759ed6bfd4..c491429f1a029 100644
+--- a/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c
++++ b/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c
+@@ -131,8 +131,10 @@ sideband_msg_req_encode_decode(struct drm_dp_sideband_msg_req_body *in)
+ return false;
+
+ txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
+- if (!txmsg)
++ if (!txmsg) {
++ kfree(out);
+ return false;
++ }
+
+ drm_dp_encode_sideband_req(in, txmsg);
+ ret = drm_dp_decode_sideband_req(txmsg, out);
+diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
+index b6ee8a82e656c..577c477b5f467 100644
+--- a/drivers/gpu/drm/sti/sti_dvo.c
++++ b/drivers/gpu/drm/sti/sti_dvo.c
+@@ -288,7 +288,7 @@ static void sti_dvo_set_mode(struct drm_bridge *bridge,
+
+ DRM_DEBUG_DRIVER("\n");
+
+- memcpy(&dvo->mode, mode, sizeof(struct drm_display_mode));
++ drm_mode_copy(&dvo->mode, mode);
+
+ /* According to the path used (main or aux), the dvo clocks should
+ * have a different parent clock. */
+@@ -346,8 +346,9 @@ static int sti_dvo_connector_get_modes(struct drm_connector *connector)
+
+ #define CLK_TOLERANCE_HZ 50
+
+-static int sti_dvo_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
++static enum drm_mode_status
++sti_dvo_connector_mode_valid(struct drm_connector *connector,
++ struct drm_display_mode *mode)
+ {
+ int target = mode->clock * 1000;
+ int target_min = target - CLK_TOLERANCE_HZ;
+diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
+index 03f3377f918c0..aa54a6400ab84 100644
+--- a/drivers/gpu/drm/sti/sti_hda.c
++++ b/drivers/gpu/drm/sti/sti_hda.c
+@@ -523,7 +523,7 @@ static void sti_hda_set_mode(struct drm_bridge *bridge,
+
+ DRM_DEBUG_DRIVER("\n");
+
+- memcpy(&hda->mode, mode, sizeof(struct drm_display_mode));
++ drm_mode_copy(&hda->mode, mode);
+
+ if (!hda_get_mode_idx(hda->mode, &mode_idx)) {
+ DRM_ERROR("Undefined mode\n");
+@@ -600,8 +600,9 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector)
+
+ #define CLK_TOLERANCE_HZ 50
+
+-static int sti_hda_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
++static enum drm_mode_status
++sti_hda_connector_mode_valid(struct drm_connector *connector,
++ struct drm_display_mode *mode)
+ {
+ int target = mode->clock * 1000;
+ int target_min = target - CLK_TOLERANCE_HZ;
+diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
+index f3ace11209dd7..36bea1551ef84 100644
+--- a/drivers/gpu/drm/sti/sti_hdmi.c
++++ b/drivers/gpu/drm/sti/sti_hdmi.c
+@@ -940,7 +940,7 @@ static void sti_hdmi_set_mode(struct drm_bridge *bridge,
+ DRM_DEBUG_DRIVER("\n");
+
+ /* Copy the drm display mode in the connector local structure */
+- memcpy(&hdmi->mode, mode, sizeof(struct drm_display_mode));
++ drm_mode_copy(&hdmi->mode, mode);
+
+ /* Update clock framerate according to the selected mode */
+ ret = clk_set_rate(hdmi->clk_pix, mode->clock * 1000);
+@@ -1003,8 +1003,9 @@ fail:
+
+ #define CLK_TOLERANCE_HZ 50
+
+-static int sti_hdmi_connector_mode_valid(struct drm_connector *connector,
+- struct drm_display_mode *mode)
++static enum drm_mode_status
++sti_hdmi_connector_mode_valid(struct drm_connector *connector,
++ struct drm_display_mode *mode)
+ {
+ int target = mode->clock * 1000;
+ int target_min = target - CLK_TOLERANCE_HZ;
+diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
+index 195de30eb90c7..9d235b60b4286 100644
+--- a/drivers/gpu/drm/stm/ltdc.c
++++ b/drivers/gpu/drm/stm/ltdc.c
+@@ -528,8 +528,8 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc)
+ struct drm_device *ddev = crtc->dev;
+ struct drm_connector_list_iter iter;
+ struct drm_connector *connector = NULL;
+- struct drm_encoder *encoder = NULL;
+- struct drm_bridge *bridge = NULL;
++ struct drm_encoder *encoder = NULL, *en_iter;
++ struct drm_bridge *bridge = NULL, *br_iter;
+ struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+ u32 hsync, vsync, accum_hbp, accum_vbp, accum_act_w, accum_act_h;
+ u32 total_width, total_height;
+@@ -538,15 +538,19 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc)
+ int ret;
+
+ /* get encoder from crtc */
+- drm_for_each_encoder(encoder, ddev)
+- if (encoder->crtc == crtc)
++ drm_for_each_encoder(en_iter, ddev)
++ if (en_iter->crtc == crtc) {
++ encoder = en_iter;
+ break;
++ }
+
+ if (encoder) {
+ /* get bridge from encoder */
+- list_for_each_entry(bridge, &encoder->bridge_chain, chain_node)
+- if (bridge->encoder == encoder)
++ list_for_each_entry(br_iter, &encoder->bridge_chain, chain_node)
++ if (br_iter->encoder == encoder) {
++ bridge = br_iter;
+ break;
++ }
+
+ /* Get the connector from encoder */
+ drm_connector_list_iter_begin(ddev, &iter);
+diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
+index 5755f0432e774..8c796de53222c 100644
+--- a/drivers/gpu/drm/sun4i/Kconfig
++++ b/drivers/gpu/drm/sun4i/Kconfig
+@@ -46,6 +46,7 @@ config DRM_SUN6I_DSI
+ default MACH_SUN8I
+ select CRC_CCITT
+ select DRM_MIPI_DSI
++ select RESET_CONTROLLER
+ select PHY_SUN6I_MIPI_DPHY
+ help
+ Choose this option if you want have an Allwinner SoC with
+diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
+index 54dd562e294ce..0cc64c97385e0 100644
+--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
++++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
+@@ -72,7 +72,6 @@ static int sun4i_drv_bind(struct device *dev)
+ goto free_drm;
+ }
+
+- dev_set_drvdata(dev, drm);
+ drm->dev_private = drv;
+ INIT_LIST_HEAD(&drv->frontend_list);
+ INIT_LIST_HEAD(&drv->engine_list);
+@@ -95,12 +94,12 @@ static int sun4i_drv_bind(struct device *dev)
+ /* drm_vblank_init calls kcalloc, which can fail */
+ ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
+ if (ret)
+- goto cleanup_mode_config;
++ goto unbind_all;
+
+ /* Remove early framebuffers (ie. simplefb) */
+ ret = drm_aperture_remove_framebuffers(false, &sun4i_drv_driver);
+ if (ret)
+- goto cleanup_mode_config;
++ goto unbind_all;
+
+ sun4i_framebuffer_init(drm);
+
+@@ -113,10 +112,14 @@ static int sun4i_drv_bind(struct device *dev)
+
+ drm_fbdev_generic_setup(drm, 32);
+
++ dev_set_drvdata(dev, drm);
++
+ return 0;
+
+ finish_poll:
+ drm_kms_helper_poll_fini(drm);
++unbind_all:
++ component_unbind_all(dev, NULL);
+ cleanup_mode_config:
+ drm_mode_config_cleanup(drm);
+ of_reserved_mem_device_release(dev);
+@@ -129,6 +132,7 @@ static void sun4i_drv_unbind(struct device *dev)
+ {
+ struct drm_device *drm = dev_get_drvdata(dev);
+
++ dev_set_drvdata(dev, NULL);
+ drm_dev_unregister(drm);
+ drm_kms_helper_poll_fini(drm);
+ drm_atomic_helper_shutdown(drm);
+diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.c b/drivers/gpu/drm/sun4i/sun4i_frontend.c
+index edb60ae0a9b75..faecc2935039b 100644
+--- a/drivers/gpu/drm/sun4i/sun4i_frontend.c
++++ b/drivers/gpu/drm/sun4i/sun4i_frontend.c
+@@ -222,13 +222,11 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend,
+
+ /* Set the physical address of the buffer in memory */
+ paddr = drm_fb_cma_get_gem_addr(fb, state, 0);
+- paddr -= PHYS_OFFSET;
+ DRM_DEBUG_DRIVER("Setting buffer #0 address to %pad\n", &paddr);
+ regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR0_REG, paddr);
+
+ if (fb->format->num_planes > 1) {
+ paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 2 : 1);
+- paddr -= PHYS_OFFSET;
+ DRM_DEBUG_DRIVER("Setting buffer #1 address to %pad\n", &paddr);
+ regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR1_REG,
+ paddr);
+@@ -236,7 +234,6 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend,
+
+ if (fb->format->num_planes > 2) {
+ paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 1 : 2);
+- paddr -= PHYS_OFFSET;
+ DRM_DEBUG_DRIVER("Setting buffer #2 address to %pad\n", &paddr);
+ regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR2_REG,
+ paddr);
+diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
+index 9f06dec0fc61d..bb43196d5d83e 100644
+--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
++++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
+@@ -777,21 +777,19 @@ static irqreturn_t sun4i_tcon_handler(int irq, void *private)
+ static int sun4i_tcon_init_clocks(struct device *dev,
+ struct sun4i_tcon *tcon)
+ {
+- tcon->clk = devm_clk_get(dev, "ahb");
++ tcon->clk = devm_clk_get_enabled(dev, "ahb");
+ if (IS_ERR(tcon->clk)) {
+ dev_err(dev, "Couldn't get the TCON bus clock\n");
+ return PTR_ERR(tcon->clk);
+ }
+- clk_prepare_enable(tcon->clk);
+
+ if (tcon->quirks->has_channel_0) {
+- tcon->sclk0 = devm_clk_get(dev, "tcon-ch0");
++ tcon->sclk0 = devm_clk_get_enabled(dev, "tcon-ch0");
+ if (IS_ERR(tcon->sclk0)) {
+ dev_err(dev, "Couldn't get the TCON channel 0 clock\n");
+ return PTR_ERR(tcon->sclk0);
+ }
+ }
+- clk_prepare_enable(tcon->sclk0);
+
+ if (tcon->quirks->has_channel_1) {
+ tcon->sclk1 = devm_clk_get(dev, "tcon-ch1");
+@@ -804,12 +802,6 @@ static int sun4i_tcon_init_clocks(struct device *dev,
+ return 0;
+ }
+
+-static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon)
+-{
+- clk_disable_unprepare(tcon->sclk0);
+- clk_disable_unprepare(tcon->clk);
+-}
+-
+ static int sun4i_tcon_init_irq(struct device *dev,
+ struct sun4i_tcon *tcon)
+ {
+@@ -1224,14 +1216,14 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
+ ret = sun4i_tcon_init_regmap(dev, tcon);
+ if (ret) {
+ dev_err(dev, "Couldn't init our TCON regmap\n");
+- goto err_free_clocks;
++ goto err_assert_reset;
+ }
+
+ if (tcon->quirks->has_channel_0) {
+ ret = sun4i_dclk_create(dev, tcon);
+ if (ret) {
+ dev_err(dev, "Couldn't create our TCON dot clock\n");
+- goto err_free_clocks;
++ goto err_assert_reset;
+ }
+ }
+
+@@ -1294,8 +1286,6 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
+ err_free_dotclock:
+ if (tcon->quirks->has_channel_0)
+ sun4i_dclk_free(tcon);
+-err_free_clocks:
+- sun4i_tcon_free_clocks(tcon);
+ err_assert_reset:
+ reset_control_assert(tcon->lcd_rst);
+ return ret;
+@@ -1309,7 +1299,6 @@ static void sun4i_tcon_unbind(struct device *dev, struct device *master,
+ list_del(&tcon->list);
+ if (tcon->quirks->has_channel_0)
+ sun4i_dclk_free(tcon);
+- sun4i_tcon_free_clocks(tcon);
+ }
+
+ static const struct component_ops sun4i_tcon_ops = {
+diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+index 4f5efcace68ea..51edb4244af7c 100644
+--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
++++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+@@ -531,7 +531,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
+ struct drm_display_mode *mode)
+ {
+ struct mipi_dsi_device *device = dsi->device;
+- unsigned int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8;
++ int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8;
+ u16 hbp = 0, hfp = 0, hsa = 0, hblk = 0, vblk = 0;
+ u32 basic_ctl = 0;
+ size_t bytes;
+@@ -555,7 +555,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
+ * (4 bytes). Its minimal size is therefore 10 bytes
+ */
+ #define HSA_PACKET_OVERHEAD 10
+- hsa = max((unsigned int)HSA_PACKET_OVERHEAD,
++ hsa = max(HSA_PACKET_OVERHEAD,
+ (mode->hsync_end - mode->hsync_start) * Bpp - HSA_PACKET_OVERHEAD);
+
+ /*
+@@ -564,7 +564,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
+ * therefore 6 bytes
+ */
+ #define HBP_PACKET_OVERHEAD 6
+- hbp = max((unsigned int)HBP_PACKET_OVERHEAD,
++ hbp = max(HBP_PACKET_OVERHEAD,
+ (mode->htotal - mode->hsync_end) * Bpp - HBP_PACKET_OVERHEAD);
+
+ /*
+@@ -574,7 +574,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
+ * 16 bytes
+ */
+ #define HFP_PACKET_OVERHEAD 16
+- hfp = max((unsigned int)HFP_PACKET_OVERHEAD,
++ hfp = max(HFP_PACKET_OVERHEAD,
+ (mode->hsync_start - mode->hdisplay) * Bpp - HFP_PACKET_OVERHEAD);
+
+ /*
+@@ -583,7 +583,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
+ * bytes). Its minimal size is therefore 10 bytes.
+ */
+ #define HBLK_PACKET_OVERHEAD 10
+- hblk = max((unsigned int)HBLK_PACKET_OVERHEAD,
++ hblk = max(HBLK_PACKET_OVERHEAD,
+ (mode->htotal - (mode->hsync_end - mode->hsync_start)) * Bpp -
+ HBLK_PACKET_OVERHEAD);
+
+diff --git a/drivers/gpu/drm/sun4i/sun8i_csc.h b/drivers/gpu/drm/sun4i/sun8i_csc.h
+index a55a38ad849c1..022cafa6c06cb 100644
+--- a/drivers/gpu/drm/sun4i/sun8i_csc.h
++++ b/drivers/gpu/drm/sun4i/sun8i_csc.h
+@@ -16,8 +16,8 @@ struct sun8i_mixer;
+ #define CCSC10_OFFSET 0xA0000
+ #define CCSC11_OFFSET 0xF0000
+
+-#define SUN8I_CSC_CTRL(base) (base + 0x0)
+-#define SUN8I_CSC_COEFF(base, i) (base + 0x10 + 4 * i)
++#define SUN8I_CSC_CTRL(base) ((base) + 0x0)
++#define SUN8I_CSC_COEFF(base, i) ((base) + 0x10 + 4 * (i))
+
+ #define SUN8I_CSC_CTRL_EN BIT(0)
+
+diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
+index b64d93da651d2..5e2b0175df36f 100644
+--- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
++++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
+@@ -658,8 +658,10 @@ int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node)
+ return -EPROBE_DEFER;
+
+ phy = platform_get_drvdata(pdev);
+- if (!phy)
++ if (!phy) {
++ put_device(&pdev->dev);
+ return -EPROBE_DEFER;
++ }
+
+ hdmi->phy = phy;
+
+diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h
+index 145833a9d82d4..5b3fbee186713 100644
+--- a/drivers/gpu/drm/sun4i/sun8i_mixer.h
++++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h
+@@ -111,10 +111,10 @@
+ /* format 13 is semi-planar YUV411 VUVU */
+ #define SUN8I_MIXER_FBFMT_YUV411 14
+ /* format 15 doesn't exist */
+-/* format 16 is P010 YVU */
+-#define SUN8I_MIXER_FBFMT_P010_YUV 17
+-/* format 18 is P210 YVU */
+-#define SUN8I_MIXER_FBFMT_P210_YUV 19
++#define SUN8I_MIXER_FBFMT_P010_YUV 16
++/* format 17 is P010 YVU */
++#define SUN8I_MIXER_FBFMT_P210_YUV 18
++/* format 19 is P210 YVU */
+ /* format 20 is packed YVU444 10-bit */
+ /* format 21 is packed YUV444 10-bit */
+
+diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
+index a29d64f875635..abb409b08bc64 100644
+--- a/drivers/gpu/drm/tegra/dc.c
++++ b/drivers/gpu/drm/tegra/dc.c
+@@ -3022,8 +3022,10 @@ static int tegra_dc_probe(struct platform_device *pdev)
+ usleep_range(2000, 4000);
+
+ err = reset_control_assert(dc->rst);
+- if (err < 0)
++ if (err < 0) {
++ clk_disable_unprepare(dc->clk);
+ return err;
++ }
+
+ usleep_range(2000, 4000);
+
+diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
+index 1f96e416fa082..c96c07c6458c3 100644
+--- a/drivers/gpu/drm/tegra/dpaux.c
++++ b/drivers/gpu/drm/tegra/dpaux.c
+@@ -468,7 +468,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
+
+ dpaux->irq = platform_get_irq(pdev, 0);
+ if (dpaux->irq < 0)
+- return -ENXIO;
++ return dpaux->irq;
+
+ if (!pdev->dev.pm_domain) {
+ dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux");
+diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
+index 8d37d6b00562a..4f5affdc60807 100644
+--- a/drivers/gpu/drm/tegra/drm.c
++++ b/drivers/gpu/drm/tegra/drm.c
+@@ -21,6 +21,10 @@
+ #include <drm/drm_prime.h>
+ #include <drm/drm_vblank.h>
+
++#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
++#include <asm/dma-iommu.h>
++#endif
++
+ #include "dc.h"
+ #include "drm.h"
+ #include "gem.h"
+@@ -936,6 +940,17 @@ int host1x_client_iommu_attach(struct host1x_client *client)
+ struct iommu_group *group = NULL;
+ int err;
+
++#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
++ if (client->dev->archdata.mapping) {
++ struct dma_iommu_mapping *mapping =
++ to_dma_iommu_mapping(client->dev);
++ arm_iommu_detach_device(client->dev);
++ arm_iommu_release_mapping(mapping);
++
++ domain = iommu_get_domain_for_dev(client->dev);
++ }
++#endif
++
+ /*
+ * If the host1x client is already attached to an IOMMU domain that is
+ * not the shared IOMMU domain, don't try to attach it to a different
+@@ -1068,6 +1083,10 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
+ struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
+ struct iommu_domain *domain;
+
++ /* Our IOMMU usage policy doesn't currently play well with GART */
++ if (of_machine_is_compatible("nvidia,tegra20"))
++ return false;
++
+ /*
+ * If the Tegra DRM clients are backed by an IOMMU, push buffers are
+ * likely to be allocated beyond the 32-bit boundary if sufficient
+diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
+index f46d377f0c304..de1333dc0d867 100644
+--- a/drivers/gpu/drm/tegra/dsi.c
++++ b/drivers/gpu/drm/tegra/dsi.c
+@@ -1538,8 +1538,10 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi)
+ dsi->slave = platform_get_drvdata(gangster);
+ of_node_put(np);
+
+- if (!dsi->slave)
++ if (!dsi->slave) {
++ put_device(&gangster->dev);
+ return -EPROBE_DEFER;
++ }
+
+ dsi->slave->master = dsi;
+ }
+diff --git a/drivers/gpu/drm/tegra/firewall.c b/drivers/gpu/drm/tegra/firewall.c
+index 1824d2db0e2ce..d53f890fa6893 100644
+--- a/drivers/gpu/drm/tegra/firewall.c
++++ b/drivers/gpu/drm/tegra/firewall.c
+@@ -97,6 +97,9 @@ static int fw_check_regs_imm(struct tegra_drm_firewall *fw, u32 offset)
+ {
+ bool is_addr;
+
++ if (!fw->client->ops->is_addr_reg)
++ return 0;
++
+ is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class,
+ offset);
+ if (is_addr)
+diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
+index de288cba39055..ba3722f1b8651 100644
+--- a/drivers/gpu/drm/tegra/gr2d.c
++++ b/drivers/gpu/drm/tegra/gr2d.c
+@@ -4,9 +4,11 @@
+ */
+
+ #include <linux/clk.h>
++#include <linux/delay.h>
+ #include <linux/iommu.h>
+ #include <linux/module.h>
+ #include <linux/of_device.h>
++#include <linux/reset.h>
+
+ #include "drm.h"
+ #include "gem.h"
+@@ -19,6 +21,7 @@ struct gr2d_soc {
+ struct gr2d {
+ struct tegra_drm_client client;
+ struct host1x_channel *channel;
++ struct reset_control *rst;
+ struct clk *clk;
+
+ const struct gr2d_soc *soc;
+@@ -208,6 +211,12 @@ static int gr2d_probe(struct platform_device *pdev)
+ if (!syncpts)
+ return -ENOMEM;
+
++ gr2d->rst = devm_reset_control_get(dev, NULL);
++ if (IS_ERR(gr2d->rst)) {
++ dev_err(dev, "cannot get reset\n");
++ return PTR_ERR(gr2d->rst);
++ }
++
+ gr2d->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(gr2d->clk)) {
+ dev_err(dev, "cannot get clock\n");
+@@ -220,6 +229,14 @@ static int gr2d_probe(struct platform_device *pdev)
+ return err;
+ }
+
++ usleep_range(2000, 4000);
++
++ err = reset_control_deassert(gr2d->rst);
++ if (err < 0) {
++ dev_err(dev, "failed to deassert reset: %d\n", err);
++ goto disable_clk;
++ }
++
+ INIT_LIST_HEAD(&gr2d->client.base.list);
+ gr2d->client.base.ops = &gr2d_client_ops;
+ gr2d->client.base.dev = dev;
+@@ -234,8 +251,7 @@ static int gr2d_probe(struct platform_device *pdev)
+ err = host1x_client_register(&gr2d->client.base);
+ if (err < 0) {
+ dev_err(dev, "failed to register host1x client: %d\n", err);
+- clk_disable_unprepare(gr2d->clk);
+- return err;
++ goto assert_rst;
+ }
+
+ /* initialize address register map */
+@@ -245,6 +261,13 @@ static int gr2d_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, gr2d);
+
+ return 0;
++
++assert_rst:
++ (void)reset_control_assert(gr2d->rst);
++disable_clk:
++ clk_disable_unprepare(gr2d->clk);
++
++ return err;
+ }
+
+ static int gr2d_remove(struct platform_device *pdev)
+@@ -259,6 +282,12 @@ static int gr2d_remove(struct platform_device *pdev)
+ return err;
+ }
+
++ err = reset_control_assert(gr2d->rst);
++ if (err < 0)
++ dev_err(&pdev->dev, "failed to assert reset: %d\n", err);
++
++ usleep_range(2000, 4000);
++
+ clk_disable_unprepare(gr2d->clk);
+
+ return 0;
+diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
+index 0ea320c1092bd..f2f76a0897a80 100644
+--- a/drivers/gpu/drm/tegra/sor.c
++++ b/drivers/gpu/drm/tegra/sor.c
+@@ -1153,7 +1153,7 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
+ struct drm_dp_link *link)
+ {
+ const u64 f = 100000, link_rate = link->rate * 1000;
+- const u64 pclk = mode->clock * 1000;
++ const u64 pclk = (u64)mode->clock * 1000;
+ u64 input, output, watermark, num;
+ struct tegra_sor_params params;
+ u32 num_syms_per_line;
+diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
+index 776f825df52fa..aba9d0c9d9031 100644
+--- a/drivers/gpu/drm/tegra/submit.c
++++ b/drivers/gpu/drm/tegra/submit.c
+@@ -475,8 +475,10 @@ static void release_job(struct host1x_job *job)
+ kfree(job_data->used_mappings);
+ kfree(job_data);
+
+- if (pm_runtime_enabled(client->base.dev))
++ if (pm_runtime_enabled(client->base.dev)) {
++ pm_runtime_mark_last_busy(client->base.dev);
+ pm_runtime_put_autosuspend(client->base.dev);
++ }
+ }
+
+ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
+diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
+index c02010ff2b7f2..d3e2fab910860 100644
+--- a/drivers/gpu/drm/tegra/vic.c
++++ b/drivers/gpu/drm/tegra/vic.c
+@@ -5,6 +5,7 @@
+
+ #include <linux/clk.h>
+ #include <linux/delay.h>
++#include <linux/dma-mapping.h>
+ #include <linux/host1x.h>
+ #include <linux/iommu.h>
+ #include <linux/module.h>
+@@ -232,10 +233,8 @@ static int vic_load_firmware(struct vic *vic)
+
+ if (!client->group) {
+ virt = dma_alloc_coherent(vic->dev, size, &iova, GFP_KERNEL);
+-
+- err = dma_mapping_error(vic->dev, iova);
+- if (err < 0)
+- return err;
++ if (!virt)
++ return -ENOMEM;
+ } else {
+ virt = tegra_drm_alloc(tegra, size, &iova);
+ }
+@@ -276,7 +275,7 @@ cleanup:
+ }
+
+
+-static int vic_runtime_resume(struct device *dev)
++static int __maybe_unused vic_runtime_resume(struct device *dev)
+ {
+ struct vic *vic = dev_get_drvdata(dev);
+ int err;
+@@ -310,7 +309,7 @@ disable:
+ return err;
+ }
+
+-static int vic_runtime_suspend(struct device *dev)
++static int __maybe_unused vic_runtime_suspend(struct device *dev)
+ {
+ struct vic *vic = dev_get_drvdata(dev);
+ int err;
+diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
+index 60b92df615aa5..f54517698710f 100644
+--- a/drivers/gpu/drm/tidss/tidss_dispc.c
++++ b/drivers/gpu/drm/tidss/tidss_dispc.c
+@@ -1855,8 +1855,8 @@ static const struct {
+ { DRM_FORMAT_XBGR4444, 0x21, },
+ { DRM_FORMAT_RGBX4444, 0x22, },
+
+- { DRM_FORMAT_ARGB1555, 0x25, },
+- { DRM_FORMAT_ABGR1555, 0x26, },
++ { DRM_FORMAT_XRGB1555, 0x25, },
++ { DRM_FORMAT_XBGR1555, 0x26, },
+
+ { DRM_FORMAT_XRGB8888, 0x27, },
+ { DRM_FORMAT_XBGR8888, 0x28, },
+diff --git a/drivers/gpu/drm/tilcdc/tilcdc_external.c b/drivers/gpu/drm/tilcdc/tilcdc_external.c
+index 7594cf6e186eb..3b86d002ef62e 100644
+--- a/drivers/gpu/drm/tilcdc/tilcdc_external.c
++++ b/drivers/gpu/drm/tilcdc/tilcdc_external.c
+@@ -60,11 +60,13 @@ struct drm_connector *tilcdc_encoder_find_connector(struct drm_device *ddev,
+ int tilcdc_add_component_encoder(struct drm_device *ddev)
+ {
+ struct tilcdc_drm_private *priv = ddev->dev_private;
+- struct drm_encoder *encoder;
++ struct drm_encoder *encoder = NULL, *iter;
+
+- list_for_each_entry(encoder, &ddev->mode_config.encoder_list, head)
+- if (encoder->possible_crtcs & (1 << priv->crtc->index))
++ list_for_each_entry(iter, &ddev->mode_config.encoder_list, head)
++ if (iter->possible_crtcs & (1 << priv->crtc->index)) {
++ encoder = iter;
+ break;
++ }
+
+ if (!encoder) {
+ dev_err(ddev->dev, "%s: No suitable encoder found\n", __func__);
+diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c
+index 73415fa9ae0f5..eb8116ff0d902 100644
+--- a/drivers/gpu/drm/tiny/bochs.c
++++ b/drivers/gpu/drm/tiny/bochs.c
+@@ -305,6 +305,8 @@ static void bochs_hw_fini(struct drm_device *dev)
+ static void bochs_hw_blank(struct bochs_device *bochs, bool blank)
+ {
+ DRM_DEBUG_DRIVER("hw_blank %d\n", blank);
++ /* enable color bit (so VGA_IS1_RC access works) */
++ bochs_vga_writeb(bochs, VGA_MIS_W, VGA_MIS_COLOR);
+ /* discard ar_flip_flop */
+ (void)bochs_vga_readb(bochs, VGA_IS1_RC);
+ /* blank or unblank; we need only update index and set 0x20 */
+diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
+index 4611ec408506b..2a81311b22172 100644
+--- a/drivers/gpu/drm/tiny/cirrus.c
++++ b/drivers/gpu/drm/tiny/cirrus.c
+@@ -450,7 +450,7 @@ static void cirrus_pipe_update(struct drm_simple_display_pipe *pipe,
+ if (state->fb && cirrus->cpp != cirrus_cpp(state->fb))
+ cirrus_mode_set(cirrus, &crtc->mode, state->fb);
+
+- if (drm_atomic_helper_damage_merged(old_state, state, &rect))
++ if (state->fb && drm_atomic_helper_damage_merged(old_state, state, &rect))
+ cirrus_fb_blit_rect(state->fb, &shadow_plane_state->data[0], &rect);
+ }
+
+diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c
+index e9a63f4b2993c..e159dfb5f7fe5 100644
+--- a/drivers/gpu/drm/tiny/ili9486.c
++++ b/drivers/gpu/drm/tiny/ili9486.c
+@@ -43,6 +43,7 @@ static int waveshare_command(struct mipi_dbi *mipi, u8 *cmd, u8 *par,
+ size_t num)
+ {
+ struct spi_device *spi = mipi->spi;
++ unsigned int bpw = 8;
+ void *data = par;
+ u32 speed_hz;
+ int i, ret;
+@@ -56,8 +57,6 @@ static int waveshare_command(struct mipi_dbi *mipi, u8 *cmd, u8 *par,
+ * The displays are Raspberry Pi HATs and connected to the 8-bit only
+ * SPI controller, so 16-bit command and parameters need byte swapping
+ * before being transferred as 8-bit on the big endian SPI bus.
+- * Pixel data bytes have already been swapped before this function is
+- * called.
+ */
+ buf[0] = cpu_to_be16(*cmd);
+ gpiod_set_value_cansleep(mipi->dc, 0);
+@@ -71,12 +70,18 @@ static int waveshare_command(struct mipi_dbi *mipi, u8 *cmd, u8 *par,
+ for (i = 0; i < num; i++)
+ buf[i] = cpu_to_be16(par[i]);
+ num *= 2;
+- speed_hz = mipi_dbi_spi_cmd_max_speed(spi, num);
+ data = buf;
+ }
+
++ /*
++ * Check whether pixel data bytes needs to be swapped or not
++ */
++ if (*cmd == MIPI_DCS_WRITE_MEMORY_START && !mipi->swap_bytes)
++ bpw = 16;
++
+ gpiod_set_value_cansleep(mipi->dc, 1);
+- ret = mipi_dbi_spi_transfer(spi, speed_hz, 8, data, num);
++ speed_hz = mipi_dbi_spi_cmd_max_speed(spi, num);
++ ret = mipi_dbi_spi_transfer(spi, speed_hz, bpw, data, num);
+ free:
+ kfree(buf);
+
+diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
+index 481b48bde0473..f3c2c173ca4b5 100644
+--- a/drivers/gpu/drm/tiny/simpledrm.c
++++ b/drivers/gpu/drm/tiny/simpledrm.c
+@@ -458,7 +458,7 @@ static struct drm_display_mode simpledrm_mode(unsigned int width,
+ {
+ struct drm_display_mode mode = { SIMPLEDRM_MODE(width, height) };
+
+- mode.clock = 60 /* Hz */ * mode.hdisplay * mode.vdisplay;
++ mode.clock = mode.hdisplay * mode.vdisplay * 60 / 1000 /* kHz */;
+ drm_mode_set_name(&mode);
+
+ return mode;
+@@ -614,7 +614,7 @@ static const struct drm_connector_funcs simpledrm_connector_funcs = {
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+ };
+
+-static int
++static enum drm_mode_status
+ simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
+ const struct drm_display_mode *mode)
+ {
+@@ -779,6 +779,9 @@ static int simpledrm_device_init_modeset(struct simpledrm_device *sdev)
+ if (ret)
+ return ret;
+ drm_connector_helper_add(connector, &simpledrm_connector_helper_funcs);
++ drm_connector_set_panel_orientation_with_quirk(connector,
++ DRM_MODE_PANEL_ORIENTATION_UNKNOWN,
++ mode->hdisplay, mode->vdisplay);
+
+ formats = simpledrm_device_formats(sdev, &nformats);
+
+diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c
+index fc40dd10efa8f..713e4b2862105 100644
+--- a/drivers/gpu/drm/tiny/st7735r.c
++++ b/drivers/gpu/drm/tiny/st7735r.c
+@@ -174,6 +174,7 @@ MODULE_DEVICE_TABLE(of, st7735r_of_match);
+
+ static const struct spi_device_id st7735r_id[] = {
+ { "jd-t18003-t01", (uintptr_t)&jd_t18003_t01_cfg },
++ { "rh128128t", (uintptr_t)&rh128128t_cfg },
+ { },
+ };
+ MODULE_DEVICE_TABLE(spi, st7735r_id);
+diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
+index bb9e02c31946e..391ed462f7fbb 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo.c
++++ b/drivers/gpu/drm/ttm/ttm_bo.c
+@@ -552,17 +552,18 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
+ goto out;
+ }
+
+-bounce:
+- ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop);
+- if (ret == -EMULTIHOP) {
++ do {
++ ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop);
++ if (ret != -EMULTIHOP)
++ break;
++
+ ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop);
+- if (ret) {
++ } while (!ret);
++
++ if (ret) {
++ ttm_resource_free(bo, &evict_mem);
++ if (ret != -ERESTARTSYS && ret != -EINTR)
+ pr_err("Buffer eviction failed\n");
+- ttm_resource_free(bo, &evict_mem);
+- goto out;
+- }
+- /* try and move to final place now. */
+- goto bounce;
+ }
+ out:
+ return ret;
+@@ -603,6 +604,13 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
+ {
+ bool ret = false;
+
++ if (bo->pin_count) {
++ *locked = false;
++ if (busy)
++ *busy = false;
++ return false;
++ }
++
+ if (bo->base.resv == ctx->resv) {
+ dma_resv_assert_held(bo->base.resv);
+ if (ctx->allow_res_evict)
+@@ -724,6 +732,8 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
+ ret = ttm_bo_evict(bo, ctx);
+ if (locked)
+ ttm_bo_unreserve(bo);
++ else
++ ttm_bo_move_to_lru_tail_unlocked(bo);
+
+ ttm_bo_put(bo);
+ return ret;
+@@ -985,7 +995,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
+ /*
+ * We might need to add a TTM.
+ */
+- if (bo->resource->mem_type == TTM_PL_SYSTEM) {
++ if (!bo->resource || bo->resource->mem_type == TTM_PL_SYSTEM) {
+ ret = ttm_tt_create(bo, true);
+ if (ret)
+ return ret;
+@@ -1185,6 +1195,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ ret = ttm_bo_handle_move_mem(bo, evict_mem, true, &ctx, &hop);
+ if (unlikely(ret != 0)) {
+ WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput - likely driver bug.\n");
++ ttm_resource_free(bo, &evict_mem);
+ goto out;
+ }
+ }
+diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+index f56be5bc0861e..4a655ab23c89d 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+@@ -171,89 +171,6 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+ }
+ EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-/**
+- * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
+- * @vmf: Fault data
+- * @bo: The buffer object
+- * @page_offset: Page offset from bo start
+- * @fault_page_size: The size of the fault in pages.
+- * @pgprot: The page protections.
+- * Does additional checking whether it's possible to insert a PUD or PMD
+- * pfn and performs the insertion.
+- *
+- * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
+- * a huge fault was not possible, or on insertion error.
+- */
+-static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+- struct ttm_buffer_object *bo,
+- pgoff_t page_offset,
+- pgoff_t fault_page_size,
+- pgprot_t pgprot)
+-{
+- pgoff_t i;
+- vm_fault_t ret;
+- unsigned long pfn;
+- pfn_t pfnt;
+- struct ttm_tt *ttm = bo->ttm;
+- bool write = vmf->flags & FAULT_FLAG_WRITE;
+-
+- /* Fault should not cross bo boundary. */
+- page_offset &= ~(fault_page_size - 1);
+- if (page_offset + fault_page_size > bo->resource->num_pages)
+- goto out_fallback;
+-
+- if (bo->resource->bus.is_iomem)
+- pfn = ttm_bo_io_mem_pfn(bo, page_offset);
+- else
+- pfn = page_to_pfn(ttm->pages[page_offset]);
+-
+- /* pfn must be fault_page_size aligned. */
+- if ((pfn & (fault_page_size - 1)) != 0)
+- goto out_fallback;
+-
+- /* Check that memory is contiguous. */
+- if (!bo->resource->bus.is_iomem) {
+- for (i = 1; i < fault_page_size; ++i) {
+- if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
+- goto out_fallback;
+- }
+- } else if (bo->bdev->funcs->io_mem_pfn) {
+- for (i = 1; i < fault_page_size; ++i) {
+- if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
+- goto out_fallback;
+- }
+- }
+-
+- pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
+- if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
+- ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
+-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+- else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
+- ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
+-#endif
+- else
+- WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
+-
+- if (ret != VM_FAULT_NOPAGE)
+- goto out_fallback;
+-
+- return VM_FAULT_NOPAGE;
+-out_fallback:
+- count_vm_event(THP_FAULT_FALLBACK);
+- return VM_FAULT_FALLBACK;
+-}
+-#else
+-static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+- struct ttm_buffer_object *bo,
+- pgoff_t page_offset,
+- pgoff_t fault_page_size,
+- pgprot_t pgprot)
+-{
+- return VM_FAULT_FALLBACK;
+-}
+-#endif
+-
+ /**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+@@ -261,7 +178,6 @@ static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+- * @fault_page_size: The size of the fault in pages.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+@@ -275,8 +191,7 @@ static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+ */
+ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ pgprot_t prot,
+- pgoff_t num_prefault,
+- pgoff_t fault_page_size)
++ pgoff_t num_prefault)
+ {
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+@@ -327,11 +242,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ prot = pgprot_decrypted(prot);
+ }
+
+- /* We don't prefault on huge faults. Yet. */
+- if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1)
+- return ttm_bo_vm_insert_huge(vmf, bo, page_offset,
+- fault_page_size, prot);
+-
+ /*
+ * Speculatively prefault a number of pages. Only error on
+ * first page.
+@@ -429,7 +339,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+
+ prot = vma->vm_page_prot;
+ if (drm_dev_enter(ddev, &idx)) {
+- ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
++ ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+ drm_dev_exit(idx);
+ } else {
+ ret = ttm_bo_vm_dummy_page(vmf, prot);
+@@ -519,11 +429,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+
+ switch (bo->resource->mem_type) {
+ case TTM_PL_SYSTEM:
+- if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
+- ret = ttm_tt_swapin(bo->ttm);
+- if (unlikely(ret != 0))
+- return ret;
+- }
+ fallthrough;
+ case TTM_PL_TT:
+ ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write);
+diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
+index 82cbb29a05aa3..aa3512af051ad 100644
+--- a/drivers/gpu/drm/ttm/ttm_pool.c
++++ b/drivers/gpu/drm/ttm/ttm_pool.c
+@@ -345,6 +345,65 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
+ return p->private;
+ }
+
++/* Called when we got a page, either from a pool or newly allocated */
++static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
++ struct page *p, dma_addr_t **dma_addr,
++ unsigned long *num_pages,
++ struct page ***pages)
++{
++ unsigned int i;
++ int r;
++
++ if (*dma_addr) {
++ r = ttm_pool_map(pool, order, p, dma_addr);
++ if (r)
++ return r;
++ }
++
++ *num_pages -= 1 << order;
++ for (i = 1 << order; i; --i, ++(*pages), ++p)
++ **pages = p;
++
++ return 0;
++}
++
++/**
++ * ttm_pool_free_range() - Free a range of TTM pages
++ * @pool: The pool used for allocating.
++ * @tt: The struct ttm_tt holding the page pointers.
++ * @caching: The page caching mode used by the range.
++ * @start_page: index for first page to free.
++ * @end_page: index for last page to free + 1.
++ *
++ * During allocation the ttm_tt page-vector may be populated with ranges of
++ * pages with different attributes if allocation hit an error without being
++ * able to completely fulfill the allocation. This function can be used
++ * to free these individual ranges.
++ */
++static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
++ enum ttm_caching caching,
++ pgoff_t start_page, pgoff_t end_page)
++{
++ struct page **pages = tt->pages;
++ unsigned int order;
++ pgoff_t i, nr;
++
++ for (i = start_page; i < end_page; i += nr, pages += nr) {
++ struct ttm_pool_type *pt = NULL;
++
++ order = ttm_pool_page_order(pool, *pages);
++ nr = (1UL << order);
++ if (tt->dma_address)
++ ttm_pool_unmap(pool, tt->dma_address[i], nr);
++
++ pt = ttm_pool_select_type(pool, caching, order);
++ if (pt)
++ ttm_pool_type_give(pt, *pages);
++ else
++ ttm_pool_free_page(pool, caching, order, *pages);
++ }
++}
++
+ /**
+ * ttm_pool_alloc - Fill a ttm_tt object
+ *
+@@ -360,12 +419,14 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
+ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
+ struct ttm_operation_ctx *ctx)
+ {
+- unsigned long num_pages = tt->num_pages;
++ pgoff_t num_pages = tt->num_pages;
+ dma_addr_t *dma_addr = tt->dma_address;
+ struct page **caching = tt->pages;
+ struct page **pages = tt->pages;
++ enum ttm_caching page_caching;
+ gfp_t gfp_flags = GFP_USER;
+- unsigned int i, order;
++ pgoff_t caching_divide;
++ unsigned int order;
+ struct page *p;
+ int r;
+
+@@ -386,45 +447,61 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
+ for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
+ num_pages;
+ order = min_t(unsigned int, order, __fls(num_pages))) {
+- bool apply_caching = false;
+ struct ttm_pool_type *pt;
+
++ page_caching = tt->caching;
+ pt = ttm_pool_select_type(pool, tt->caching, order);
+ p = pt ? ttm_pool_type_take(pt) : NULL;
+ if (p) {
+- apply_caching = true;
+- } else {
+- p = ttm_pool_alloc_page(pool, gfp_flags, order);
+- if (p && PageHighMem(p))
+- apply_caching = true;
+- }
+-
+- if (!p) {
+- if (order) {
+- --order;
+- continue;
+- }
+- r = -ENOMEM;
+- goto error_free_all;
+- }
+-
+- if (apply_caching) {
+ r = ttm_pool_apply_caching(caching, pages,
+ tt->caching);
+ if (r)
+ goto error_free_page;
+- caching = pages + (1 << order);
++
++ caching = pages;
++ do {
++ r = ttm_pool_page_allocated(pool, order, p,
++ &dma_addr,
++ &num_pages,
++ &pages);
++ if (r)
++ goto error_free_page;
++
++ caching = pages;
++ if (num_pages < (1 << order))
++ break;
++
++ p = ttm_pool_type_take(pt);
++ } while (p);
+ }
+
+- if (dma_addr) {
+- r = ttm_pool_map(pool, order, p, &dma_addr);
++ page_caching = ttm_cached;
++ while (num_pages >= (1 << order) &&
++ (p = ttm_pool_alloc_page(pool, gfp_flags, order))) {
++
++ if (PageHighMem(p)) {
++ r = ttm_pool_apply_caching(caching, pages,
++ tt->caching);
++ if (r)
++ goto error_free_page;
++ caching = pages;
++ }
++ r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
++ &num_pages, &pages);
+ if (r)
+ goto error_free_page;
++ if (PageHighMem(p))
++ caching = pages;
+ }
+
+- num_pages -= 1 << order;
+- for (i = 1 << order; i; --i)
+- *(pages++) = p++;
++ if (!p) {
++ if (order) {
++ --order;
++ continue;
++ }
++ r = -ENOMEM;
++ goto error_free_all;
++ }
+ }
+
+ r = ttm_pool_apply_caching(caching, pages, tt->caching);
+@@ -434,15 +511,13 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
+ return 0;
+
+ error_free_page:
+- ttm_pool_free_page(pool, tt->caching, order, p);
++ ttm_pool_free_page(pool, page_caching, order, p);
+
+ error_free_all:
+ num_pages = tt->num_pages - num_pages;
+- for (i = 0; i < num_pages; ) {
+- order = ttm_pool_page_order(pool, tt->pages[i]);
+- ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]);
+- i += 1 << order;
+- }
++ caching_divide = caching - tt->pages;
++ ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide);
++ ttm_pool_free_range(pool, tt, ttm_cached, caching_divide, num_pages);
+
+ return r;
+ }
+@@ -458,27 +533,7 @@ EXPORT_SYMBOL(ttm_pool_alloc);
+ */
+ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
+ {
+- unsigned int i;
+-
+- for (i = 0; i < tt->num_pages; ) {
+- struct page *p = tt->pages[i];
+- unsigned int order, num_pages;
+- struct ttm_pool_type *pt;
+-
+- order = ttm_pool_page_order(pool, p);
+- num_pages = 1ULL << order;
+- if (tt->dma_address)
+- ttm_pool_unmap(pool, tt->dma_address[i], num_pages);
+-
+- pt = ttm_pool_select_type(pool, tt->caching, order);
+- if (pt)
+- ttm_pool_type_give(pt, tt->pages[i]);
+- else
+- ttm_pool_free_page(pool, tt->caching, order,
+- tt->pages[i]);
+-
+- i += num_pages;
+- }
++ ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages);
+
+ while (atomic_long_read(&allocated_pages) > page_pool_size)
+ ttm_pool_shrink();
+diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
+index 3750fd2161317..930574ad2bca9 100644
+--- a/drivers/gpu/drm/udl/udl_connector.c
++++ b/drivers/gpu/drm/udl/udl_connector.c
+@@ -30,7 +30,7 @@ static int udl_get_edid_block(void *data, u8 *buf, unsigned int block,
+ int bval = (i + block * EDID_LENGTH) << 8;
+ ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ 0x02, (0x80 | (0x02 << 5)), bval,
+- 0xA1, read_buff, 2, HZ);
++ 0xA1, read_buff, 2, 1000);
+ if (ret < 1) {
+ DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret);
+ kfree(read_buff);
+diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
+index 32232228dae9d..50753d83fbddd 100644
+--- a/drivers/gpu/drm/udl/udl_modeset.c
++++ b/drivers/gpu/drm/udl/udl_modeset.c
+@@ -381,9 +381,6 @@ udl_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe,
+
+ udl_handle_damage(fb, &shadow_plane_state->data[0], 0, 0, fb->width, fb->height);
+
+- if (!crtc_state->mode_changed)
+- return;
+-
+ /* enable display */
+ udl_crtc_write_mode_to_hw(crtc);
+ }
+diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
+index 6a8731ab9d7d0..0d9af62f69ad1 100644
+--- a/drivers/gpu/drm/v3d/v3d_bo.c
++++ b/drivers/gpu/drm/v3d/v3d_bo.c
+@@ -47,18 +47,18 @@ void v3d_free_object(struct drm_gem_object *obj)
+ /* GPU execution may have dirtied any pages in the BO. */
+ bo->base.pages_mark_dirty_on_put = true;
+
+- drm_gem_shmem_free_object(obj);
++ drm_gem_shmem_free(&bo->base);
+ }
+
+ static const struct drm_gem_object_funcs v3d_gem_funcs = {
+ .free = v3d_free_object,
+- .print_info = drm_gem_shmem_print_info,
+- .pin = drm_gem_shmem_pin,
+- .unpin = drm_gem_shmem_unpin,
+- .get_sg_table = drm_gem_shmem_get_sg_table,
+- .vmap = drm_gem_shmem_vmap,
+- .vunmap = drm_gem_shmem_vunmap,
+- .mmap = drm_gem_shmem_mmap,
++ .print_info = drm_gem_shmem_object_print_info,
++ .pin = drm_gem_shmem_object_pin,
++ .unpin = drm_gem_shmem_object_unpin,
++ .get_sg_table = drm_gem_shmem_object_get_sg_table,
++ .vmap = drm_gem_shmem_object_vmap,
++ .vunmap = drm_gem_shmem_object_vunmap,
++ .mmap = drm_gem_shmem_object_mmap,
+ };
+
+ /* gem_create_object function for allocating a BO struct and doing
+@@ -95,7 +95,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj)
+ /* So far we pin the BO in the MMU for its lifetime, so use
+ * shmem's helper for getting a lifetime sgt.
+ */
+- sgt = drm_gem_shmem_get_pages_sgt(&bo->base.base);
++ sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+@@ -141,7 +141,7 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
+ return bo;
+
+ free_obj:
+- drm_gem_shmem_free_object(&shmem_obj->base);
++ drm_gem_shmem_free(shmem_obj);
+ return ERR_PTR(ret);
+ }
+
+@@ -159,7 +159,7 @@ v3d_prime_import_sg_table(struct drm_device *dev,
+
+ ret = v3d_bo_create_finish(obj);
+ if (ret) {
+- drm_gem_shmem_free_object(obj);
++ drm_gem_shmem_free(&to_v3d_bo(obj)->base);
+ return ERR_PTR(ret);
+ }
+
+diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
+index 9403c3b36aca6..6407a006d6ec4 100644
+--- a/drivers/gpu/drm/v3d/v3d_drv.c
++++ b/drivers/gpu/drm/v3d/v3d_drv.c
+@@ -221,6 +221,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
+ int ret;
+ u32 mmu_debug;
+ u32 ident1;
++ u64 mask;
+
+
+ v3d = devm_drm_dev_alloc(dev, &v3d_drm_driver, struct v3d_dev, drm);
+@@ -240,8 +241,11 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
+ return ret;
+
+ mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
+- dma_set_mask_and_coherent(dev,
+- DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH)));
++ mask = DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH));
++ ret = dma_set_mask_and_coherent(dev, mask);
++ if (ret)
++ return ret;
++
+ v3d->va_width = 30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_VA_WIDTH);
+
+ ident1 = V3D_READ(V3D_HUB_IDENT1);
+diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
+index 5689da118197e..805d6f6cba0e2 100644
+--- a/drivers/gpu/drm/v3d/v3d_gem.c
++++ b/drivers/gpu/drm/v3d/v3d_gem.c
+@@ -197,8 +197,8 @@ v3d_clean_caches(struct v3d_dev *v3d)
+
+ V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
+ if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+- V3D_L2TCACTL_L2TFLS), 100)) {
+- DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
++ V3D_L2TCACTL_TMUWCF), 100)) {
++ DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
+ }
+
+ mutex_lock(&v3d->cache_clean_lock);
+@@ -625,7 +625,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+
+ if (!render->base.perfmon) {
+ ret = -ENOENT;
+- goto fail;
++ goto fail_perfmon;
+ }
+ }
+
+@@ -678,6 +678,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+
+ fail_unreserve:
+ mutex_unlock(&v3d->sched_lock);
++fail_perfmon:
+ drm_gem_unlock_reservations(last_job->bo,
+ last_job->bo_count, &acquire_ctx);
+ fail:
+@@ -854,7 +855,7 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
+ args->perfmon_id);
+ if (!job->base.perfmon) {
+ ret = -ENOENT;
+- goto fail;
++ goto fail_perfmon;
+ }
+ }
+
+@@ -886,6 +887,7 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
+
+ fail_unreserve:
+ mutex_unlock(&v3d->sched_lock);
++fail_perfmon:
+ drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
+ &acquire_ctx);
+ fail:
+diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c
+index 0288ef063513e..f6a88abccc7d9 100644
+--- a/drivers/gpu/drm/v3d/v3d_perfmon.c
++++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
+@@ -25,11 +25,12 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon)
+ {
+ unsigned int i;
+ u32 mask;
+- u8 ncounters = perfmon->ncounters;
++ u8 ncounters;
+
+ if (WARN_ON_ONCE(!perfmon || v3d->active_perfmon))
+ return;
+
++ ncounters = perfmon->ncounters;
+ mask = GENMASK(ncounters - 1, 0);
+
+ for (i = 0; i < ncounters; i++) {
+diff --git a/drivers/gpu/drm/vboxvideo/vbox_main.c b/drivers/gpu/drm/vboxvideo/vbox_main.c
+index f28779715ccda..c9e8b3a63c621 100644
+--- a/drivers/gpu/drm/vboxvideo/vbox_main.c
++++ b/drivers/gpu/drm/vboxvideo/vbox_main.c
+@@ -127,8 +127,8 @@ int vbox_hw_init(struct vbox_private *vbox)
+ /* Create guest-heap mem-pool use 2^4 = 16 byte chunks */
+ vbox->guest_pool = devm_gen_pool_create(vbox->ddev.dev, 4, -1,
+ "vboxvideo-accel");
+- if (!vbox->guest_pool)
+- return -ENOMEM;
++ if (IS_ERR(vbox->guest_pool))
++ return PTR_ERR(vbox->guest_pool);
+
+ ret = gen_pool_add_virt(vbox->guest_pool,
+ (unsigned long)vbox->guest_heap,
+diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
+index 345a5570a3da7..e2c147d4015ef 100644
+--- a/drivers/gpu/drm/vc4/Kconfig
++++ b/drivers/gpu/drm/vc4/Kconfig
+@@ -5,6 +5,7 @@ config DRM_VC4
+ depends on DRM
+ depends on SND && SND_SOC
+ depends on COMMON_CLK
++ depends on PM
+ select DRM_KMS_HELPER
+ select DRM_KMS_CMA_HELPER
+ select DRM_GEM_CMA_HELPER
+diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
+index fddaeb0b09c11..f642bd6e71ff4 100644
+--- a/drivers/gpu/drm/vc4/vc4_bo.c
++++ b/drivers/gpu/drm/vc4/vc4_bo.c
+@@ -391,7 +391,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (!bo)
+- return ERR_PTR(-ENOMEM);
++ return NULL;
+
+ bo->madv = VC4_MADV_WILLNEED;
+ refcount_set(&bo->usecnt, 0);
+diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
+index 18f5009ce90e3..3b8576f193214 100644
+--- a/drivers/gpu/drm/vc4/vc4_crtc.c
++++ b/drivers/gpu/drm/vc4/vc4_crtc.c
+@@ -32,6 +32,7 @@
+ #include <linux/clk.h>
+ #include <linux/component.h>
+ #include <linux/of_device.h>
++#include <linux/pm_runtime.h>
+
+ #include <drm/drm_atomic.h>
+ #include <drm/drm_atomic_helper.h>
+@@ -42,6 +43,7 @@
+ #include <drm/drm_vblank.h>
+
+ #include "vc4_drv.h"
++#include "vc4_hdmi.h"
+ #include "vc4_regs.h"
+
+ #define HVS_FIFO_LATENCY_PIX 6
+@@ -121,7 +123,7 @@ static bool vc4_crtc_get_scanout_position(struct drm_crtc *crtc,
+ *vpos /= 2;
+
+ /* Use hpos to correct for field offset in interlaced mode. */
+- if (VC4_GET_FIELD(val, SCALER_DISPSTATX_FRAME_COUNT) % 2)
++ if (vc4_hvs_get_fifo_frame_count(dev, vc4_crtc_state->assigned_channel) % 2)
+ *hpos += mode->crtc_htotal / 2;
+ }
+
+@@ -328,7 +330,8 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_atomic_state *s
+ u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1;
+ bool is_dsi = (vc4_encoder->type == VC4_ENCODER_TYPE_DSI0 ||
+ vc4_encoder->type == VC4_ENCODER_TYPE_DSI1);
+- u32 format = is_dsi ? PV_CONTROL_FORMAT_DSIV_24 : PV_CONTROL_FORMAT_24;
++ bool is_dsi1 = vc4_encoder->type == VC4_ENCODER_TYPE_DSI1;
++ u32 format = is_dsi1 ? PV_CONTROL_FORMAT_DSIV_24 : PV_CONTROL_FORMAT_24;
+ u8 ppc = pv_data->pixels_per_clock;
+ bool debug_dump_regs = false;
+
+@@ -354,7 +357,8 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_atomic_state *s
+ PV_HORZB_HACTIVE));
+
+ CRTC_WRITE(PV_VERTA,
+- VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end,
++ VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end +
++ interlace,
+ PV_VERTA_VBP) |
+ VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start,
+ PV_VERTA_VSYNC));
+@@ -366,7 +370,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_atomic_state *s
+ if (interlace) {
+ CRTC_WRITE(PV_VERTA_EVEN,
+ VC4_SET_FIELD(mode->crtc_vtotal -
+- mode->crtc_vsync_end - 1,
++ mode->crtc_vsync_end,
+ PV_VERTA_VBP) |
+ VC4_SET_FIELD(mode->crtc_vsync_end -
+ mode->crtc_vsync_start,
+@@ -386,7 +390,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_atomic_state *s
+ PV_VCONTROL_CONTINUOUS |
+ (is_dsi ? PV_VCONTROL_DSI : 0) |
+ PV_VCONTROL_INTERLACE |
+- VC4_SET_FIELD(mode->htotal * pixel_rep / 2,
++ VC4_SET_FIELD(mode->htotal * pixel_rep / (2 * ppc),
+ PV_VCONTROL_ODD_DELAY));
+ CRTC_WRITE(PV_VSYNCD_EVEN, 0);
+ } else {
+@@ -496,8 +500,10 @@ int vc4_crtc_disable_at_boot(struct drm_crtc *crtc)
+ enum vc4_encoder_type encoder_type;
+ const struct vc4_pv_data *pv_data;
+ struct drm_encoder *encoder;
++ struct vc4_hdmi *vc4_hdmi;
+ unsigned encoder_sel;
+ int channel;
++ int ret;
+
+ if (!(of_device_is_compatible(vc4_crtc->pdev->dev.of_node,
+ "brcm,bcm2711-pixelvalve2") ||
+@@ -525,7 +531,22 @@ int vc4_crtc_disable_at_boot(struct drm_crtc *crtc)
+ if (WARN_ON(!encoder))
+ return 0;
+
+- return vc4_crtc_disable(crtc, encoder, NULL, channel);
++ vc4_hdmi = encoder_to_vc4_hdmi(encoder);
++ ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
++ if (ret)
++ return ret;
++
++ ret = vc4_crtc_disable(crtc, encoder, NULL, channel);
++ if (ret)
++ return ret;
++
++ /*
++ * post_crtc_powerdown will have called pm_runtime_put, so we
++ * don't need it here otherwise we'll get the reference counting
++ * wrong.
++ */
++
++ return 0;
+ }
+
+ static void vc4_crtc_atomic_disable(struct drm_crtc *crtc,
+@@ -691,14 +712,14 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
+ struct drm_crtc *crtc = &vc4_crtc->base;
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+- struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+- u32 chan = vc4_state->assigned_channel;
++ u32 chan = vc4_crtc->current_hvs_channel;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
++ spin_lock(&vc4_crtc->irq_lock);
+ if (vc4_crtc->event &&
+- (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)) ||
+- vc4_state->feed_txp)) {
++ (vc4_crtc->current_dlist == HVS_READ(SCALER_DISPLACTX(chan)) ||
++ vc4_crtc->feeds_txp)) {
+ drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
+ vc4_crtc->event = NULL;
+ drm_crtc_vblank_put(crtc);
+@@ -711,6 +732,7 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
+ */
+ vc4_hvs_unmask_underrun(dev, chan);
+ }
++ spin_unlock(&vc4_crtc->irq_lock);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+
+@@ -876,7 +898,6 @@ struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc)
+ return NULL;
+
+ old_vc4_state = to_vc4_crtc_state(crtc->state);
+- vc4_state->feed_txp = old_vc4_state->feed_txp;
+ vc4_state->margins = old_vc4_state->margins;
+ vc4_state->assigned_channel = old_vc4_state->assigned_channel;
+
+@@ -937,6 +958,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
+ static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
+ .mode_valid = vc4_crtc_mode_valid,
+ .atomic_check = vc4_crtc_atomic_check,
++ .atomic_begin = vc4_hvs_atomic_begin,
+ .atomic_flush = vc4_hvs_atomic_flush,
+ .atomic_enable = vc4_crtc_atomic_enable,
+ .atomic_disable = vc4_crtc_atomic_disable,
+@@ -1111,6 +1133,7 @@ int vc4_crtc_init(struct drm_device *drm, struct vc4_crtc *vc4_crtc,
+ return PTR_ERR(primary_plane);
+ }
+
++ spin_lock_init(&vc4_crtc->irq_lock);
+ drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
+ crtc_funcs, NULL);
+ drm_crtc_helper_add(crtc, crtc_helper_funcs);
+diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
+index a90f2545baee0..9c8a71d7426a0 100644
+--- a/drivers/gpu/drm/vc4/vc4_dpi.c
++++ b/drivers/gpu/drm/vc4/vc4_dpi.c
+@@ -148,35 +148,45 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder)
+ }
+ drm_connector_list_iter_end(&conn_iter);
+
+- if (connector && connector->display_info.num_bus_formats) {
+- u32 bus_format = connector->display_info.bus_formats[0];
+-
+- switch (bus_format) {
+- case MEDIA_BUS_FMT_RGB888_1X24:
+- dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB,
+- DPI_FORMAT);
+- break;
+- case MEDIA_BUS_FMT_BGR888_1X24:
+- dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB,
+- DPI_FORMAT);
+- dpi_c |= VC4_SET_FIELD(DPI_ORDER_BGR, DPI_ORDER);
+- break;
+- case MEDIA_BUS_FMT_RGB666_1X24_CPADHI:
+- dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_2,
+- DPI_FORMAT);
+- break;
+- case MEDIA_BUS_FMT_RGB666_1X18:
+- dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_1,
+- DPI_FORMAT);
+- break;
+- case MEDIA_BUS_FMT_RGB565_1X16:
+- dpi_c |= VC4_SET_FIELD(DPI_FORMAT_16BIT_565_RGB_3,
+- DPI_FORMAT);
+- break;
+- default:
+- DRM_ERROR("Unknown media bus format %d\n", bus_format);
+- break;
++ if (connector) {
++ if (connector->display_info.num_bus_formats) {
++ u32 bus_format = connector->display_info.bus_formats[0];
++
++ switch (bus_format) {
++ case MEDIA_BUS_FMT_RGB888_1X24:
++ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB,
++ DPI_FORMAT);
++ break;
++ case MEDIA_BUS_FMT_BGR888_1X24:
++ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB,
++ DPI_FORMAT);
++ dpi_c |= VC4_SET_FIELD(DPI_ORDER_BGR,
++ DPI_ORDER);
++ break;
++ case MEDIA_BUS_FMT_RGB666_1X24_CPADHI:
++ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_2,
++ DPI_FORMAT);
++ break;
++ case MEDIA_BUS_FMT_RGB666_1X18:
++ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_1,
++ DPI_FORMAT);
++ break;
++ case MEDIA_BUS_FMT_RGB565_1X16:
++ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_16BIT_565_RGB_1,
++ DPI_FORMAT);
++ break;
++ default:
++ DRM_ERROR("Unknown media bus format %d\n",
++ bus_format);
++ break;
++ }
+ }
++
++ if (connector->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)
++ dpi_c |= DPI_PIXEL_CLK_INVERT;
++
++ if (connector->display_info.bus_flags & DRM_BUS_FLAG_DE_LOW)
++ dpi_c |= DPI_OUTPUT_ENABLE_INVERT;
+ } else {
+ /* Default to 24bit if no connector found. */
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT);
+diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
+index f6c16c5aee683..099df15e1a61c 100644
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -214,6 +214,15 @@ static void vc4_match_add_drivers(struct device *dev,
+ }
+ }
+
++static const struct of_device_id vc4_dma_range_matches[] = {
++ { .compatible = "brcm,bcm2711-hvs" },
++ { .compatible = "brcm,bcm2835-hvs" },
++ { .compatible = "brcm,bcm2835-v3d" },
++ { .compatible = "brcm,cygnus-v3d" },
++ { .compatible = "brcm,vc4-v3d" },
++ {}
++};
++
+ static int vc4_drm_bind(struct device *dev)
+ {
+ struct platform_device *pdev = to_platform_device(dev);
+@@ -231,6 +240,16 @@ static int vc4_drm_bind(struct device *dev)
+ vc4_drm_driver.driver_features &= ~DRIVER_RENDER;
+ of_node_put(node);
+
++ node = of_find_matching_node_and_match(NULL, vc4_dma_range_matches,
++ NULL);
++ if (node) {
++ ret = of_dma_configure(dev, node, true);
++ of_node_put(node);
++
++ if (ret)
++ return ret;
++ }
++
+ vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base);
+ if (IS_ERR(vc4))
+ return PTR_ERR(vc4);
+@@ -364,7 +383,12 @@ static int __init vc4_drm_register(void)
+ if (ret)
+ return ret;
+
+- return platform_driver_register(&vc4_platform_driver);
++ ret = platform_driver_register(&vc4_platform_driver);
++ if (ret)
++ platform_unregister_drivers(component_drivers,
++ ARRAY_SIZE(component_drivers));
++
++ return ret;
+ }
+
+ static void __exit vc4_drm_unregister(void)
+@@ -378,6 +402,7 @@ module_init(vc4_drm_register);
+ module_exit(vc4_drm_unregister);
+
+ MODULE_ALIAS("platform:vc4-drm");
++MODULE_SOFTDEP("pre: snd-soc-hdmi-codec");
+ MODULE_DESCRIPTION("Broadcom VC4 DRM Driver");
+ MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+ MODULE_LICENSE("GPL v2");
+diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
+index ef73e0aaf7261..94c178738fc19 100644
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -495,6 +495,33 @@ struct vc4_crtc {
+ struct drm_pending_vblank_event *event;
+
+ struct debugfs_regset32 regset;
++
++ /**
++ * @feeds_txp: True if the CRTC feeds our writeback controller.
++ */
++ bool feeds_txp;
++
++ /**
++ * @irq_lock: Spinlock protecting the resources shared between
++ * the atomic code and our vblank handler.
++ */
++ spinlock_t irq_lock;
++
++ /**
++ * @current_dlist: Start offset of the display list currently
++ * set in the HVS for that CRTC. Protected by @irq_lock, and
++ * copied in vc4_hvs_update_dlist() for the CRTC interrupt
++ * handler to have access to that value.
++ */
++ unsigned int current_dlist;
++
++ /**
++ * @current_hvs_channel: HVS channel currently assigned to the
++ * CRTC. Protected by @irq_lock, and copied in
++ * vc4_hvs_atomic_begin() for the CRTC interrupt handler to have
++ * access to that value.
++ */
++ unsigned int current_hvs_channel;
+ };
+
+ static inline struct vc4_crtc *
+@@ -521,7 +548,6 @@ struct vc4_crtc_state {
+ struct drm_crtc_state base;
+ /* Dlist area for this CRTC configuration. */
+ struct drm_mm_node mm;
+- bool feed_txp;
+ bool txp_armed;
+ unsigned int assigned_channel;
+
+@@ -907,7 +933,9 @@ void vc4_irq_reset(struct drm_device *dev);
+ extern struct platform_driver vc4_hvs_driver;
+ void vc4_hvs_stop_channel(struct drm_device *dev, unsigned int output);
+ int vc4_hvs_get_fifo_from_output(struct drm_device *dev, unsigned int output);
++u8 vc4_hvs_get_fifo_frame_count(struct drm_device *dev, unsigned int fifo);
+ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state);
++void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state);
+ void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state);
+ void vc4_hvs_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state);
+ void vc4_hvs_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state);
+diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
+index a185027911ce5..b7b2c76770dc6 100644
+--- a/drivers/gpu/drm/vc4/vc4_dsi.c
++++ b/drivers/gpu/drm/vc4/vc4_dsi.c
+@@ -181,8 +181,50 @@
+
+ #define DSI0_TXPKT_PIX_FIFO 0x20 /* AKA PIX_FIFO */
+
+-#define DSI0_INT_STAT 0x24
+-#define DSI0_INT_EN 0x28
++#define DSI0_INT_STAT 0x24
++#define DSI0_INT_EN 0x28
++# define DSI0_INT_FIFO_ERR BIT(25)
++# define DSI0_INT_CMDC_DONE_MASK VC4_MASK(24, 23)
++# define DSI0_INT_CMDC_DONE_SHIFT 23
++# define DSI0_INT_CMDC_DONE_NO_REPEAT 1
++# define DSI0_INT_CMDC_DONE_REPEAT 3
++# define DSI0_INT_PHY_DIR_RTF BIT(22)
++# define DSI0_INT_PHY_D1_ULPS BIT(21)
++# define DSI0_INT_PHY_D1_STOP BIT(20)
++# define DSI0_INT_PHY_RXLPDT BIT(19)
++# define DSI0_INT_PHY_RXTRIG BIT(18)
++# define DSI0_INT_PHY_D0_ULPS BIT(17)
++# define DSI0_INT_PHY_D0_LPDT BIT(16)
++# define DSI0_INT_PHY_D0_FTR BIT(15)
++# define DSI0_INT_PHY_D0_STOP BIT(14)
++/* Signaled when the clock lane enters the given state. */
++# define DSI0_INT_PHY_CLK_ULPS BIT(13)
++# define DSI0_INT_PHY_CLK_HS BIT(12)
++# define DSI0_INT_PHY_CLK_FTR BIT(11)
++/* Signaled on timeouts */
++# define DSI0_INT_PR_TO BIT(10)
++# define DSI0_INT_TA_TO BIT(9)
++# define DSI0_INT_LPRX_TO BIT(8)
++# define DSI0_INT_HSTX_TO BIT(7)
++/* Contention on a line when trying to drive the line low */
++# define DSI0_INT_ERR_CONT_LP1 BIT(6)
++# define DSI0_INT_ERR_CONT_LP0 BIT(5)
++/* Control error: incorrect line state sequence on data lane 0. */
++# define DSI0_INT_ERR_CONTROL BIT(4)
++# define DSI0_INT_ERR_SYNC_ESC BIT(3)
++# define DSI0_INT_RX2_PKT BIT(2)
++# define DSI0_INT_RX1_PKT BIT(1)
++# define DSI0_INT_CMD_PKT BIT(0)
++
++#define DSI0_INTERRUPTS_ALWAYS_ENABLED (DSI0_INT_ERR_SYNC_ESC | \
++ DSI0_INT_ERR_CONTROL | \
++ DSI0_INT_ERR_CONT_LP0 | \
++ DSI0_INT_ERR_CONT_LP1 | \
++ DSI0_INT_HSTX_TO | \
++ DSI0_INT_LPRX_TO | \
++ DSI0_INT_TA_TO | \
++ DSI0_INT_PR_TO)
++
+ # define DSI1_INT_PHY_D3_ULPS BIT(30)
+ # define DSI1_INT_PHY_D3_STOP BIT(29)
+ # define DSI1_INT_PHY_D2_ULPS BIT(28)
+@@ -761,6 +803,9 @@ static void vc4_dsi_encoder_disable(struct drm_encoder *encoder)
+ list_for_each_entry_reverse(iter, &dsi->bridge_chain, chain_node) {
+ if (iter->funcs->disable)
+ iter->funcs->disable(iter);
++
++ if (iter == dsi->bridge)
++ break;
+ }
+
+ vc4_dsi_ulps(dsi, true);
+@@ -805,11 +850,9 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
+ /* Find what divider gets us a faster clock than the requested
+ * pixel clock.
+ */
+- for (divider = 1; divider < 8; divider++) {
+- if (parent_rate / divider < pll_clock) {
+- divider--;
++ for (divider = 1; divider < 255; divider++) {
++ if (parent_rate / (divider + 1) < pll_clock)
+ break;
+- }
+ }
+
+ /* Now that we've picked a PLL divider, calculate back to its
+@@ -846,7 +889,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
+ unsigned long phy_clock;
+ int ret;
+
+- ret = pm_runtime_get_sync(dev);
++ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->variant->port);
+ return;
+@@ -894,6 +937,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
+
+ DSI_PORT_WRITE(PHY_AFEC0, afec0);
+
++ /* AFEC reset hold time */
++ mdelay(1);
++
+ DSI_PORT_WRITE(PHY_AFEC1,
+ VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE1) |
+ VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE0) |
+@@ -1060,12 +1106,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
+ DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI1_CTRL_EN);
+
+ /* Bring AFE out of reset. */
+- if (dsi->variant->port == 0) {
+- } else {
+- DSI_PORT_WRITE(PHY_AFEC0,
+- DSI_PORT_READ(PHY_AFEC0) &
+- ~DSI1_PHY_AFEC0_RESET);
+- }
++ DSI_PORT_WRITE(PHY_AFEC0,
++ DSI_PORT_READ(PHY_AFEC0) &
++ ~DSI_PORT_BIT(PHY_AFEC0_RESET));
+
+ vc4_dsi_ulps(dsi, false);
+
+@@ -1184,13 +1227,28 @@ static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host,
+ /* Enable the appropriate interrupt for the transfer completion. */
+ dsi->xfer_result = 0;
+ reinit_completion(&dsi->xfer_completion);
+- DSI_PORT_WRITE(INT_STAT, DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF);
+- if (msg->rx_len) {
+- DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
+- DSI1_INT_PHY_DIR_RTF));
++ if (dsi->variant->port == 0) {
++ DSI_PORT_WRITE(INT_STAT,
++ DSI0_INT_CMDC_DONE_MASK | DSI1_INT_PHY_DIR_RTF);
++ if (msg->rx_len) {
++ DSI_PORT_WRITE(INT_EN, (DSI0_INTERRUPTS_ALWAYS_ENABLED |
++ DSI0_INT_PHY_DIR_RTF));
++ } else {
++ DSI_PORT_WRITE(INT_EN,
++ (DSI0_INTERRUPTS_ALWAYS_ENABLED |
++ VC4_SET_FIELD(DSI0_INT_CMDC_DONE_NO_REPEAT,
++ DSI0_INT_CMDC_DONE)));
++ }
+ } else {
+- DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
+- DSI1_INT_TXPKT1_DONE));
++ DSI_PORT_WRITE(INT_STAT,
++ DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF);
++ if (msg->rx_len) {
++ DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
++ DSI1_INT_PHY_DIR_RTF));
++ } else {
++ DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
++ DSI1_INT_TXPKT1_DONE));
++ }
+ }
+
+ /* Send the packet. */
+@@ -1207,7 +1265,7 @@ static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host,
+ ret = dsi->xfer_result;
+ }
+
+- DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
++ DSI_PORT_WRITE(INT_EN, DSI_PORT_BIT(INTERRUPTS_ALWAYS_ENABLED));
+
+ if (ret)
+ goto reset_fifo_and_return;
+@@ -1253,7 +1311,7 @@ reset_fifo_and_return:
+ DSI_PORT_BIT(CTRL_RESET_FIFOS));
+
+ DSI_PORT_WRITE(TXPKT1C, 0);
+- DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
++ DSI_PORT_WRITE(INT_EN, DSI_PORT_BIT(INTERRUPTS_ALWAYS_ENABLED));
+ return ret;
+ }
+
+@@ -1262,7 +1320,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *device)
+ {
+ struct vc4_dsi *dsi = host_to_dsi(host);
+- int ret;
+
+ dsi->lanes = device->lanes;
+ dsi->channel = device->channel;
+@@ -1297,18 +1354,15 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
+ return 0;
+ }
+
+- ret = component_add(&dsi->pdev->dev, &vc4_dsi_ops);
+- if (ret) {
+- mipi_dsi_host_unregister(&dsi->dsi_host);
+- return ret;
+- }
+-
+- return 0;
++ return component_add(&dsi->pdev->dev, &vc4_dsi_ops);
+ }
+
+ static int vc4_dsi_host_detach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *device)
+ {
++ struct vc4_dsi *dsi = host_to_dsi(host);
++
++ component_del(&dsi->pdev->dev, &vc4_dsi_ops);
+ return 0;
+ }
+
+@@ -1394,26 +1448,28 @@ static irqreturn_t vc4_dsi_irq_handler(int irq, void *data)
+ DSI_PORT_WRITE(INT_STAT, stat);
+
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_ERR_SYNC_ESC, "LPDT sync");
++ DSI_PORT_BIT(INT_ERR_SYNC_ESC), "LPDT sync");
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_ERR_CONTROL, "data lane 0 sequence");
++ DSI_PORT_BIT(INT_ERR_CONTROL), "data lane 0 sequence");
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_ERR_CONT_LP0, "LP0 contention");
++ DSI_PORT_BIT(INT_ERR_CONT_LP0), "LP0 contention");
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_ERR_CONT_LP1, "LP1 contention");
++ DSI_PORT_BIT(INT_ERR_CONT_LP1), "LP1 contention");
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_HSTX_TO, "HSTX timeout");
++ DSI_PORT_BIT(INT_HSTX_TO), "HSTX timeout");
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_LPRX_TO, "LPRX timeout");
++ DSI_PORT_BIT(INT_LPRX_TO), "LPRX timeout");
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_TA_TO, "turnaround timeout");
++ DSI_PORT_BIT(INT_TA_TO), "turnaround timeout");
+ dsi_handle_error(dsi, &ret, stat,
+- DSI1_INT_PR_TO, "peripheral reset timeout");
++ DSI_PORT_BIT(INT_PR_TO), "peripheral reset timeout");
+
+- if (stat & (DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF)) {
++ if (stat & ((dsi->variant->port ? DSI1_INT_TXPKT1_DONE :
++ DSI0_INT_CMDC_DONE_MASK) |
++ DSI_PORT_BIT(INT_PHY_DIR_RTF))) {
+ complete(&dsi->xfer_completion);
+ ret = IRQ_HANDLED;
+- } else if (stat & DSI1_INT_HSTX_TO) {
++ } else if (stat & DSI_PORT_BIT(INT_HSTX_TO)) {
+ complete(&dsi->xfer_completion);
+ dsi->xfer_result = -ETIMEDOUT;
+ ret = IRQ_HANDLED;
+@@ -1491,22 +1547,32 @@ vc4_dsi_init_phy_clocks(struct vc4_dsi *dsi)
+ dsi->clk_onecell);
+ }
+
++static void vc4_dsi_dma_mem_release(void *ptr)
++{
++ struct vc4_dsi *dsi = ptr;
++ struct device *dev = &dsi->pdev->dev;
++
++ dma_free_coherent(dev, 4, dsi->reg_dma_mem, dsi->reg_dma_paddr);
++ dsi->reg_dma_mem = NULL;
++}
++
++static void vc4_dsi_dma_chan_release(void *ptr)
++{
++ struct vc4_dsi *dsi = ptr;
++
++ dma_release_channel(dsi->reg_dma_chan);
++ dsi->reg_dma_chan = NULL;
++}
++
+ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+ {
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dsi *dsi = dev_get_drvdata(dev);
+ struct vc4_dsi_encoder *vc4_dsi_encoder;
+- struct drm_panel *panel;
+- const struct of_device_id *match;
+- dma_cap_mask_t dma_mask;
+ int ret;
+
+- match = of_match_device(vc4_dsi_dt_match, dev);
+- if (!match)
+- return -ENODEV;
+-
+- dsi->variant = match->data;
++ dsi->variant = of_device_get_match_data(dev);
+
+ vc4_dsi_encoder = devm_kzalloc(dev, sizeof(*vc4_dsi_encoder),
+ GFP_KERNEL);
+@@ -1514,7 +1580,8 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&dsi->bridge_chain);
+- vc4_dsi_encoder->base.type = VC4_ENCODER_TYPE_DSI1;
++ vc4_dsi_encoder->base.type = dsi->variant->port ?
++ VC4_ENCODER_TYPE_DSI1 : VC4_ENCODER_TYPE_DSI0;
+ vc4_dsi_encoder->dsi = dsi;
+ dsi->encoder = &vc4_dsi_encoder->base.base;
+
+@@ -1537,6 +1604,8 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+ * so set up a channel for talking to it.
+ */
+ if (dsi->variant->broken_axi_workaround) {
++ dma_cap_mask_t dma_mask;
++
+ dsi->reg_dma_mem = dma_alloc_coherent(dev, 4,
+ &dsi->reg_dma_paddr,
+ GFP_KERNEL);
+@@ -1545,8 +1614,13 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+ return -ENOMEM;
+ }
+
++ ret = devm_add_action_or_reset(dev, vc4_dsi_dma_mem_release, dsi);
++ if (ret)
++ return ret;
++
+ dma_cap_zero(dma_mask);
+ dma_cap_set(DMA_MEMCPY, dma_mask);
++
+ dsi->reg_dma_chan = dma_request_chan_by_mask(&dma_mask);
+ if (IS_ERR(dsi->reg_dma_chan)) {
+ ret = PTR_ERR(dsi->reg_dma_chan);
+@@ -1556,6 +1630,10 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+ return ret;
+ }
+
++ ret = devm_add_action_or_reset(dev, vc4_dsi_dma_chan_release, dsi);
++ if (ret)
++ return ret;
++
+ /* Get the physical address of the device's registers. The
+ * struct resource for the regs gives us the bus address
+ * instead.
+@@ -1609,27 +1687,9 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+ return ret;
+ }
+
+- ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
+- &panel, &dsi->bridge);
+- if (ret) {
+- /* If the bridge or panel pointed by dev->of_node is not
+- * enabled, just return 0 here so that we don't prevent the DRM
+- * dev from being registered. Of course that means the DSI
+- * encoder won't be exposed, but that's not a problem since
+- * nothing is connected to it.
+- */
+- if (ret == -ENODEV)
+- return 0;
+-
+- return ret;
+- }
+-
+- if (panel) {
+- dsi->bridge = devm_drm_panel_bridge_add_typed(dev, panel,
+- DRM_MODE_CONNECTOR_DSI);
+- if (IS_ERR(dsi->bridge))
+- return PTR_ERR(dsi->bridge);
+- }
++ dsi->bridge = devm_drm_of_get_bridge(dev, dev->of_node, 0, 0);
++ if (IS_ERR(dsi->bridge))
++ return PTR_ERR(dsi->bridge);
+
+ /* The esc clock rate is supposed to always be 100Mhz. */
+ ret = clk_set_rate(dsi->escape_clock, 100 * 1000000);
+@@ -1667,8 +1727,7 @@ static void vc4_dsi_unbind(struct device *dev, struct device *master,
+ {
+ struct vc4_dsi *dsi = dev_get_drvdata(dev);
+
+- if (dsi->bridge)
+- pm_runtime_disable(dev);
++ pm_runtime_disable(dev);
+
+ /*
+ * Restore the bridge_chain so the bridge detach procedure can happen
+@@ -1706,9 +1765,7 @@ static int vc4_dsi_dev_remove(struct platform_device *pdev)
+ struct device *dev = &pdev->dev;
+ struct vc4_dsi *dsi = dev_get_drvdata(dev);
+
+- component_del(&pdev->dev, &vc4_dsi_ops);
+ mipi_dsi_host_unregister(&dsi->dsi_host);
+-
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
+index ed8a4b7f8b6e2..7a8353d7ab36a 100644
+--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
+@@ -38,6 +38,7 @@
+ #include <drm/drm_scdc_helper.h>
+ #include <linux/clk.h>
+ #include <linux/component.h>
++#include <linux/gpio/consumer.h>
+ #include <linux/i2c.h>
+ #include <linux/of_address.h>
+ #include <linux/of_gpio.h>
+@@ -78,6 +79,9 @@
+ #define VC5_HDMI_VERTB_VSPO_SHIFT 16
+ #define VC5_HDMI_VERTB_VSPO_MASK VC4_MASK(29, 16)
+
++#define VC5_HDMI_MISC_CONTROL_PIXEL_REP_SHIFT 0
++#define VC5_HDMI_MISC_CONTROL_PIXEL_REP_MASK VC4_MASK(3, 0)
++
+ #define VC5_HDMI_SCRAMBLER_CTL_ENABLE BIT(0)
+
+ #define VC5_HDMI_DEEP_COLOR_CONFIG_1_INIT_PACK_PHASE_SHIFT 8
+@@ -94,6 +98,7 @@
+ # define VC4_HD_M_SW_RST BIT(2)
+ # define VC4_HD_M_ENABLE BIT(0)
+
++#define HSM_MIN_CLOCK_FREQ 120000000
+ #define CEC_CLOCK_FREQ 40000
+
+ #define HDMI_14_MAX_TMDS_CLK (340 * 1000 * 1000)
+@@ -161,15 +166,19 @@ static void vc4_hdmi_cec_update_clk_div(struct vc4_hdmi *vc4_hdmi)
+ static void vc4_hdmi_cec_update_clk_div(struct vc4_hdmi *vc4_hdmi) {}
+ #endif
+
++static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder);
++
+ static enum drm_connector_status
+ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
+ {
+ struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector);
+ bool connected = false;
+
+- if (vc4_hdmi->hpd_gpio &&
+- gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) {
+- connected = true;
++ WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev));
++
++ if (vc4_hdmi->hpd_gpio) {
++ if (gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio))
++ connected = true;
+ } else if (drm_probe_ddc(vc4_hdmi->ddc)) {
+ connected = true;
+ } else if (HDMI_READ(HDMI_HOTPLUG) & VC4_HDMI_HOTPLUG_CONNECTED) {
+@@ -187,10 +196,13 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
+ }
+ }
+
++ vc4_hdmi_enable_scrambling(&vc4_hdmi->encoder.base.base);
++ pm_runtime_put(&vc4_hdmi->pdev->dev);
+ return connector_status_connected;
+ }
+
+ cec_phys_addr_invalidate(vc4_hdmi->cec_adap);
++ pm_runtime_put(&vc4_hdmi->pdev->dev);
+ return connector_status_disconnected;
+ }
+
+@@ -627,7 +639,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder,
+ vc4_hdmi->variant->phy_disable(vc4_hdmi);
+
+ clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock);
+- clk_disable_unprepare(vc4_hdmi->hsm_clock);
+ clk_disable_unprepare(vc4_hdmi->pixel_clock);
+
+ ret = pm_runtime_put(&vc4_hdmi->pdev->dev);
+@@ -727,12 +738,12 @@ static void vc4_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi,
+ VC4_HDMI_VERTA_VFP) |
+ VC4_SET_FIELD(mode->crtc_vdisplay, VC4_HDMI_VERTA_VAL));
+ u32 vertb = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) |
+- VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end,
++ VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end +
++ interlaced,
+ VC4_HDMI_VERTB_VBP));
+ u32 vertb_even = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) |
+ VC4_SET_FIELD(mode->crtc_vtotal -
+- mode->crtc_vsync_end -
+- interlaced,
++ mode->crtc_vsync_end,
+ VC4_HDMI_VERTB_VBP));
+
+ HDMI_WRITE(HDMI_HORZA,
+@@ -772,13 +783,14 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi,
+ VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay,
+ VC5_HDMI_VERTA_VFP) |
+ VC4_SET_FIELD(mode->crtc_vdisplay, VC5_HDMI_VERTA_VAL));
+- u32 vertb = (VC4_SET_FIELD(0, VC5_HDMI_VERTB_VSPO) |
+- VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end,
++ u32 vertb = (VC4_SET_FIELD(mode->htotal >> (2 - pixel_rep),
++ VC5_HDMI_VERTB_VSPO) |
++ VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end +
++ interlaced,
+ VC4_HDMI_VERTB_VBP));
+ u32 vertb_even = (VC4_SET_FIELD(0, VC5_HDMI_VERTB_VSPO) |
+ VC4_SET_FIELD(mode->crtc_vtotal -
+- mode->crtc_vsync_end -
+- interlaced,
++ mode->crtc_vsync_end,
+ VC4_HDMI_VERTB_VBP));
+ unsigned char gcp;
+ bool gcp_en;
+@@ -841,6 +853,11 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi,
+ reg |= gcp_en ? VC5_HDMI_GCP_CONFIG_GCP_ENABLE : 0;
+ HDMI_WRITE(HDMI_GCP_CONFIG, reg);
+
++ reg = HDMI_READ(HDMI_MISC_CONTROL);
++ reg &= ~VC5_HDMI_MISC_CONTROL_PIXEL_REP_MASK;
++ reg |= VC4_SET_FIELD(0, VC5_HDMI_MISC_CONTROL_PIXEL_REP);
++ HDMI_WRITE(HDMI_MISC_CONTROL, reg);
++
+ HDMI_WRITE(HDMI_CLOCK_STOP, 0);
+ }
+
+@@ -893,28 +910,10 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
+ conn_state_to_vc4_hdmi_conn_state(conn_state);
+ struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
+ struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
+- unsigned long bvb_rate, pixel_rate, hsm_rate;
++ unsigned long pixel_rate = vc4_conn_state->pixel_rate;
++ unsigned long bvb_rate, hsm_rate;
+ int ret;
+
+- ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
+- if (ret < 0) {
+- DRM_ERROR("Failed to retain power domain: %d\n", ret);
+- return;
+- }
+-
+- pixel_rate = vc4_conn_state->pixel_rate;
+- ret = clk_set_rate(vc4_hdmi->pixel_clock, pixel_rate);
+- if (ret) {
+- DRM_ERROR("Failed to set pixel clock rate: %d\n", ret);
+- return;
+- }
+-
+- ret = clk_prepare_enable(vc4_hdmi->pixel_clock);
+- if (ret) {
+- DRM_ERROR("Failed to turn on pixel clock: %d\n", ret);
+- return;
+- }
+-
+ /*
+ * As stated in RPi's vc4 firmware "HDMI state machine (HSM) clock must
+ * be faster than pixel clock, infinitesimally faster, tested in
+@@ -938,13 +937,25 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
+ return;
+ }
+
+- ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
+- if (ret) {
+- DRM_ERROR("Failed to turn on HSM clock: %d\n", ret);
+- clk_disable_unprepare(vc4_hdmi->pixel_clock);
++ ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
++ if (ret < 0) {
++ DRM_ERROR("Failed to retain power domain: %d\n", ret);
+ return;
+ }
+
++ ret = clk_set_rate(vc4_hdmi->pixel_clock, pixel_rate);
++ if (ret) {
++ DRM_ERROR("Failed to set pixel clock rate: %d\n", ret);
++ goto err_put_runtime_pm;
++ }
++
++ ret = clk_prepare_enable(vc4_hdmi->pixel_clock);
++ if (ret) {
++ DRM_ERROR("Failed to turn on pixel clock: %d\n", ret);
++ goto err_put_runtime_pm;
++ }
++
++
+ vc4_hdmi_cec_update_clk_div(vc4_hdmi);
+
+ if (pixel_rate > 297000000)
+@@ -957,17 +968,13 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
+ ret = clk_set_min_rate(vc4_hdmi->pixel_bvb_clock, bvb_rate);
+ if (ret) {
+ DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret);
+- clk_disable_unprepare(vc4_hdmi->hsm_clock);
+- clk_disable_unprepare(vc4_hdmi->pixel_clock);
+- return;
++ goto err_disable_pixel_clock;
+ }
+
+ ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock);
+ if (ret) {
+ DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret);
+- clk_disable_unprepare(vc4_hdmi->hsm_clock);
+- clk_disable_unprepare(vc4_hdmi->pixel_clock);
+- return;
++ goto err_disable_pixel_clock;
+ }
+
+ if (vc4_hdmi->variant->phy_init)
+@@ -980,6 +987,15 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
+
+ if (vc4_hdmi->variant->set_timings)
+ vc4_hdmi->variant->set_timings(vc4_hdmi, conn_state, mode);
++
++ return;
++
++err_disable_pixel_clock:
++ clk_disable_unprepare(vc4_hdmi->pixel_clock);
++err_put_runtime_pm:
++ pm_runtime_put(&vc4_hdmi->pdev->dev);
++
++ return;
+ }
+
+ static void vc4_hdmi_encoder_pre_crtc_enable(struct drm_encoder *encoder,
+@@ -1084,6 +1100,7 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
+ unsigned long long tmds_rate;
+
+ if (vc4_hdmi->variant->unsupported_odd_h_timings &&
++ !(mode->flags & DRM_MODE_FLAG_DBLCLK) &&
+ ((mode->hdisplay % 2) || (mode->hsync_start % 2) ||
+ (mode->hsync_end % 2) || (mode->htotal % 2)))
+ return -EINVAL;
+@@ -1131,6 +1148,7 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder,
+ struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
+
+ if (vc4_hdmi->variant->unsupported_odd_h_timings &&
++ !(mode->flags & DRM_MODE_FLAG_DBLCLK) &&
+ ((mode->hdisplay % 2) || (mode->hsync_start % 2) ||
+ (mode->hsync_end % 2) || (mode->htotal % 2)))
+ return MODE_H_ILLEGAL;
+@@ -1374,10 +1392,10 @@ static int vc4_hdmi_audio_prepare(struct device *dev, void *data,
+
+ /* Set the MAI threshold */
+ HDMI_WRITE(HDMI_MAI_THR,
+- VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICHIGH) |
+- VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICLOW) |
+- VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQHIGH) |
+- VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQLOW));
++ VC4_SET_FIELD(0x08, VC4_HD_MAI_THR_PANICHIGH) |
++ VC4_SET_FIELD(0x08, VC4_HD_MAI_THR_PANICLOW) |
++ VC4_SET_FIELD(0x06, VC4_HD_MAI_THR_DREQHIGH) |
++ VC4_SET_FIELD(0x08, VC4_HD_MAI_THR_DREQLOW));
+
+ HDMI_WRITE(HDMI_MAI_CONFIG,
+ VC4_HDMI_MAI_CONFIG_BIT_REVERSE |
+@@ -1461,12 +1479,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi)
+ struct device *dev = &vc4_hdmi->pdev->dev;
+ struct platform_device *codec_pdev;
+ const __be32 *addr;
+- int index;
++ int index, len;
+ int ret;
+
+- if (!of_find_property(dev->of_node, "dmas", NULL)) {
++ if (!of_find_property(dev->of_node, "dmas", &len) || !len) {
+ dev_warn(dev,
+- "'dmas' DT property is missing, no HDMI audio\n");
++ "'dmas' DT property is missing or empty, no HDMI audio\n");
+ return 0;
+ }
+
+@@ -1514,6 +1532,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi)
+ dev_err(dev, "Couldn't register the HDMI codec: %ld\n", PTR_ERR(codec_pdev));
+ return PTR_ERR(codec_pdev);
+ }
++ vc4_hdmi->audio.codec_pdev = codec_pdev;
+
+ dai_link->cpus = &vc4_hdmi->audio.cpu;
+ dai_link->codecs = &vc4_hdmi->audio.codec;
+@@ -1553,6 +1572,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi)
+
+ }
+
++static void vc4_hdmi_audio_exit(struct vc4_hdmi *vc4_hdmi)
++{
++ platform_device_unregister(vc4_hdmi->audio.codec_pdev);
++ vc4_hdmi->audio.codec_pdev = NULL;
++}
++
+ static irqreturn_t vc4_hdmi_hpd_irq_thread(int irq, void *priv)
+ {
+ struct vc4_hdmi *vc4_hdmi = priv;
+@@ -1830,7 +1855,8 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi)
+ }
+
+ vc4_hdmi->cec_adap = cec_allocate_adapter(&vc4_hdmi_cec_adap_ops,
+- vc4_hdmi, "vc4",
++ vc4_hdmi,
++ vc4_hdmi->variant->card_name,
+ CEC_CAP_DEFAULTS |
+ CEC_CAP_CONNECTOR_INFO, 1);
+ ret = PTR_ERR_OR_ZERO(vc4_hdmi->cec_adap);
+@@ -2098,6 +2124,27 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi)
+ return 0;
+ }
+
++static int vc4_hdmi_runtime_suspend(struct device *dev)
++{
++ struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
++
++ clk_disable_unprepare(vc4_hdmi->hsm_clock);
++
++ return 0;
++}
++
++static int vc4_hdmi_runtime_resume(struct device *dev)
++{
++ struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
++ int ret;
++
++ ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
++ if (ret)
++ return ret;
++
++ return 0;
++}
++
+ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
+ {
+ const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev);
+@@ -2161,6 +2208,29 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
+ vc4_hdmi->disable_4kp60 = true;
+ }
+
++ /*
++ * If we boot without any cable connected to the HDMI connector,
++ * the firmware will skip the HSM initialization and leave it
++ * with a rate of 0, resulting in a bus lockup when we're
++ * accessing the registers even if it's enabled.
++ *
++ * Let's put a sensible default at runtime_resume so that we
++ * don't end up in this situation.
++ */
++ ret = clk_set_min_rate(vc4_hdmi->hsm_clock, HSM_MIN_CLOCK_FREQ);
++ if (ret)
++ goto err_put_ddc;
++
++ pm_runtime_enable(dev);
++
++ /*
++ * We need to have the device powered up at this point to call
++ * our reset hook and for the CEC init.
++ */
++ ret = pm_runtime_resume_and_get(dev);
++ if (ret)
++ goto err_disable_runtime_pm;
++
+ if (vc4_hdmi->variant->reset)
+ vc4_hdmi->variant->reset(vc4_hdmi);
+
+@@ -2172,8 +2242,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
+ clk_prepare_enable(vc4_hdmi->pixel_bvb_clock);
+ }
+
+- pm_runtime_enable(dev);
+-
+ drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
+ drm_encoder_helper_add(encoder, &vc4_hdmi_encoder_helper_funcs);
+
+@@ -2197,6 +2265,8 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
+ vc4_hdmi_debugfs_regs,
+ vc4_hdmi);
+
++ pm_runtime_put_sync(dev);
++
+ return 0;
+
+ err_free_cec:
+@@ -2207,6 +2277,8 @@ err_destroy_conn:
+ vc4_hdmi_connector_destroy(&vc4_hdmi->connector);
+ err_destroy_encoder:
+ drm_encoder_cleanup(encoder);
++ pm_runtime_put_sync(dev);
++err_disable_runtime_pm:
+ pm_runtime_disable(dev);
+ err_put_ddc:
+ put_device(&vc4_hdmi->ddc->dev);
+@@ -2243,6 +2315,7 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master,
+ kfree(vc4_hdmi->hdmi_regset.regs);
+ kfree(vc4_hdmi->hd_regset.regs);
+
++ vc4_hdmi_audio_exit(vc4_hdmi);
+ vc4_hdmi_cec_exit(vc4_hdmi);
+ vc4_hdmi_hotplug_exit(vc4_hdmi);
+ vc4_hdmi_connector_destroy(&vc4_hdmi->connector);
+@@ -2352,11 +2425,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = {
+ {}
+ };
+
++static const struct dev_pm_ops vc4_hdmi_pm_ops = {
++ SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend,
++ vc4_hdmi_runtime_resume,
++ NULL)
++};
++
+ struct platform_driver vc4_hdmi_driver = {
+ .probe = vc4_hdmi_dev_probe,
+ .remove = vc4_hdmi_dev_remove,
+ .driver = {
+ .name = "vc4_hdmi",
+ .of_match_table = vc4_hdmi_dt_match,
++ .pm = &vc4_hdmi_pm_ops,
+ },
+ };
+diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h
+index 33e9f665ab8e4..c0492da736833 100644
+--- a/drivers/gpu/drm/vc4/vc4_hdmi.h
++++ b/drivers/gpu/drm/vc4/vc4_hdmi.h
+@@ -113,6 +113,7 @@ struct vc4_hdmi_audio {
+ struct snd_soc_dai_link_component platform;
+ struct snd_dmaengine_dai_dma_data dma_data;
+ struct hdmi_audio_infoframe infoframe;
++ struct platform_device *codec_pdev;
+ bool streaming;
+ };
+
+diff --git a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h
+index 19d2fdc446bca..f126fa425a1d8 100644
+--- a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h
++++ b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h
+@@ -123,6 +123,7 @@ enum vc4_hdmi_field {
+ HDMI_VERTB0,
+ HDMI_VERTB1,
+ HDMI_VID_CTL,
++ HDMI_MISC_CONTROL,
+ };
+
+ struct vc4_hdmi_register {
+@@ -233,6 +234,7 @@ static const struct vc4_hdmi_register __maybe_unused vc5_hdmi_hdmi0_fields[] = {
+ VC4_HDMI_REG(HDMI_VERTB0, 0x0f0),
+ VC4_HDMI_REG(HDMI_VERTA1, 0x0f4),
+ VC4_HDMI_REG(HDMI_VERTB1, 0x0f8),
++ VC4_HDMI_REG(HDMI_MISC_CONTROL, 0x100),
+ VC4_HDMI_REG(HDMI_MAI_CHANNEL_MAP, 0x09c),
+ VC4_HDMI_REG(HDMI_MAI_CONFIG, 0x0a0),
+ VC4_HDMI_REG(HDMI_DEEP_COLOR_CONFIG_1, 0x170),
+@@ -313,6 +315,7 @@ static const struct vc4_hdmi_register __maybe_unused vc5_hdmi_hdmi1_fields[] = {
+ VC4_HDMI_REG(HDMI_VERTB0, 0x0f0),
+ VC4_HDMI_REG(HDMI_VERTA1, 0x0f4),
+ VC4_HDMI_REG(HDMI_VERTB1, 0x0f8),
++ VC4_HDMI_REG(HDMI_MISC_CONTROL, 0x100),
+ VC4_HDMI_REG(HDMI_MAI_CHANNEL_MAP, 0x09c),
+ VC4_HDMI_REG(HDMI_MAI_CONFIG, 0x0a0),
+ VC4_HDMI_REG(HDMI_DEEP_COLOR_CONFIG_1, 0x170),
+diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
+index c239045e05d6f..3856ac289d380 100644
+--- a/drivers/gpu/drm/vc4/vc4_hvs.c
++++ b/drivers/gpu/drm/vc4/vc4_hvs.c
+@@ -197,6 +197,29 @@ static void vc4_hvs_update_gamma_lut(struct drm_crtc *crtc)
+ vc4_hvs_lut_load(crtc);
+ }
+
++u8 vc4_hvs_get_fifo_frame_count(struct drm_device *dev, unsigned int fifo)
++{
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
++ u8 field = 0;
++
++ switch (fifo) {
++ case 0:
++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
++ SCALER_DISPSTAT1_FRCNT0);
++ break;
++ case 1:
++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
++ SCALER_DISPSTAT1_FRCNT1);
++ break;
++ case 2:
++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2),
++ SCALER_DISPSTAT2_FRCNT2);
++ break;
++ }
++
++ return field;
++}
++
+ int vc4_hvs_get_fifo_from_output(struct drm_device *dev, unsigned int output)
+ {
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+@@ -365,17 +388,16 @@ static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
++ unsigned long flags;
+
+ if (crtc->state->event) {
+- unsigned long flags;
+-
+ crtc->state->event->pipe = drm_crtc_index(crtc);
+
+ WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+
+- if (!vc4_state->feed_txp || vc4_state->txp_armed) {
++ if (!vc4_crtc->feeds_txp || vc4_state->txp_armed) {
+ vc4_crtc->event = crtc->state->event;
+ crtc->state->event = NULL;
+ }
+@@ -388,6 +410,22 @@ static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
+ HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
+ vc4_state->mm.start);
+ }
++
++ spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
++ vc4_crtc->current_dlist = vc4_state->mm.start;
++ spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
++}
++
++void vc4_hvs_atomic_begin(struct drm_crtc *crtc,
++ struct drm_atomic_state *state)
++{
++ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
++ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
++ unsigned long flags;
++
++ spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
++ vc4_crtc->current_hvs_channel = vc4_state->assigned_channel;
++ spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
+ }
+
+ void vc4_hvs_atomic_enable(struct drm_crtc *crtc,
+@@ -395,10 +433,9 @@ void vc4_hvs_atomic_enable(struct drm_crtc *crtc,
+ {
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+- struct drm_crtc_state *new_crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
+- struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(new_crtc_state);
+ struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+- bool oneshot = vc4_state->feed_txp;
++ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
++ bool oneshot = vc4_crtc->feeds_txp;
+
+ vc4_hvs_update_dlist(crtc);
+ vc4_hvs_init_channel(vc4, crtc, mode, oneshot);
+@@ -568,6 +605,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
+ struct vc4_hvs *hvs = NULL;
+ int ret;
+ u32 dispctrl;
++ u32 reg;
+
+ hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
+ if (!hvs)
+@@ -639,6 +677,26 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
+
+ vc4->hvs = hvs;
+
++ reg = HVS_READ(SCALER_DISPECTRL);
++ reg &= ~SCALER_DISPECTRL_DSP2_MUX_MASK;
++ HVS_WRITE(SCALER_DISPECTRL,
++ reg | VC4_SET_FIELD(0, SCALER_DISPECTRL_DSP2_MUX));
++
++ reg = HVS_READ(SCALER_DISPCTRL);
++ reg &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
++ HVS_WRITE(SCALER_DISPCTRL,
++ reg | VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX));
++
++ reg = HVS_READ(SCALER_DISPEOLN);
++ reg &= ~SCALER_DISPEOLN_DSP4_MUX_MASK;
++ HVS_WRITE(SCALER_DISPEOLN,
++ reg | VC4_SET_FIELD(3, SCALER_DISPEOLN_DSP4_MUX));
++
++ reg = HVS_READ(SCALER_DISPDITHER);
++ reg &= ~SCALER_DISPDITHER_DSP5_MUX_MASK;
++ HVS_WRITE(SCALER_DISPDITHER,
++ reg | VC4_SET_FIELD(3, SCALER_DISPDITHER_DSP5_MUX));
++
+ dispctrl = HVS_READ(SCALER_DISPCTRL);
+
+ dispctrl |= SCALER_DISPCTRL_ENABLE;
+@@ -646,10 +704,6 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
+ SCALER_DISPCTRL_DISPEIRQ(1) |
+ SCALER_DISPCTRL_DISPEIRQ(2);
+
+- /* Set DSP3 (PV1) to use HVS channel 2, which would otherwise
+- * be unused.
+- */
+- dispctrl &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
+ dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
+ SCALER_DISPCTRL_SLVWREIRQ |
+ SCALER_DISPCTRL_SLVRDEIRQ |
+@@ -663,7 +717,17 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
+ SCALER_DISPCTRL_DSPEISLUR(1) |
+ SCALER_DISPCTRL_DSPEISLUR(2) |
+ SCALER_DISPCTRL_SCLEIRQ);
+- dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX);
++
++ /* Set AXI panic mode.
++ * VC4 panics when < 2 lines in FIFO.
++ * VC5 panics when less than 1 line in the FIFO.
++ */
++ dispctrl &= ~(SCALER_DISPCTRL_PANIC0_MASK |
++ SCALER_DISPCTRL_PANIC1_MASK |
++ SCALER_DISPCTRL_PANIC2_MASK);
++ dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC0);
++ dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC1);
++ dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC2);
+
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+
+diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
+index f0b3e4cf5bceb..1bb8bcc45d71d 100644
+--- a/drivers/gpu/drm/vc4/vc4_kms.c
++++ b/drivers/gpu/drm/vc4/vc4_kms.c
+@@ -193,8 +193,8 @@ vc4_hvs_get_new_global_state(struct drm_atomic_state *state)
+ struct drm_private_state *priv_state;
+
+ priv_state = drm_atomic_get_new_private_obj_state(state, &vc4->hvs_channels);
+- if (IS_ERR(priv_state))
+- return ERR_CAST(priv_state);
++ if (!priv_state)
++ return ERR_PTR(-EINVAL);
+
+ return to_vc4_hvs_state(priv_state);
+ }
+@@ -206,8 +206,8 @@ vc4_hvs_get_old_global_state(struct drm_atomic_state *state)
+ struct drm_private_state *priv_state;
+
+ priv_state = drm_atomic_get_old_private_obj_state(state, &vc4->hvs_channels);
+- if (IS_ERR(priv_state))
+- return ERR_CAST(priv_state);
++ if (!priv_state)
++ return ERR_PTR(-EINVAL);
+
+ return to_vc4_hvs_state(priv_state);
+ }
+@@ -233,6 +233,7 @@ static void vc4_hvs_pv_muxing_commit(struct vc4_dev *vc4,
+ unsigned int i;
+
+ for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
++ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
+ u32 dispctrl;
+ u32 dsp3_mux;
+@@ -253,7 +254,7 @@ static void vc4_hvs_pv_muxing_commit(struct vc4_dev *vc4,
+ * TXP IP, and we need to disable the FIFO2 -> pixelvalve1
+ * route.
+ */
+- if (vc4_state->feed_txp)
++ if (vc4_crtc->feeds_txp)
+ dsp3_mux = VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX);
+ else
+ dsp3_mux = VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX);
+@@ -337,10 +338,10 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state)
+ struct drm_device *dev = state->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
+- struct drm_crtc_state *old_crtc_state;
+ struct drm_crtc_state *new_crtc_state;
+ struct drm_crtc *crtc;
+ struct vc4_hvs_state *old_hvs_state;
++ unsigned int channel;
+ int i;
+
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+@@ -353,30 +354,32 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state)
+ vc4_hvs_mask_underrun(dev, vc4_crtc_state->assigned_channel);
+ }
+
+- if (vc4->hvs->hvs5)
+- clk_set_min_rate(hvs->core_clk, 500000000);
+-
+ old_hvs_state = vc4_hvs_get_old_global_state(state);
+- if (!old_hvs_state)
++ if (IS_ERR(old_hvs_state))
+ return;
+
+- for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
+- struct vc4_crtc_state *vc4_crtc_state =
+- to_vc4_crtc_state(old_crtc_state);
+- unsigned int channel = vc4_crtc_state->assigned_channel;
++ for (channel = 0; channel < HVS_NUM_CHANNELS; channel++) {
++ struct drm_crtc_commit *commit;
+ int ret;
+
+- if (channel == VC4_HVS_CHANNEL_DISABLED)
++ if (!old_hvs_state->fifo_state[channel].in_use)
+ continue;
+
+- if (!old_hvs_state->fifo_state[channel].in_use)
++ commit = old_hvs_state->fifo_state[channel].pending_commit;
++ if (!commit)
+ continue;
+
+- ret = drm_crtc_commit_wait(old_hvs_state->fifo_state[channel].pending_commit);
++ ret = drm_crtc_commit_wait(commit);
+ if (ret)
+ drm_err(dev, "Timed out waiting for commit\n");
++
++ drm_crtc_commit_put(commit);
++ old_hvs_state->fifo_state[channel].pending_commit = NULL;
+ }
+
++ if (vc4->hvs->hvs5)
++ clk_set_min_rate(hvs->core_clk, 500000000);
++
+ drm_atomic_helper_commit_modeset_disables(dev, state);
+
+ vc4_ctm_commit(vc4, state);
+@@ -410,8 +413,8 @@ static int vc4_atomic_commit_setup(struct drm_atomic_state *state)
+ unsigned int i;
+
+ hvs_state = vc4_hvs_get_new_global_state(state);
+- if (!hvs_state)
+- return -EINVAL;
++ if (WARN_ON(IS_ERR(hvs_state)))
++ return PTR_ERR(hvs_state);
+
+ for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+ struct vc4_crtc_state *vc4_crtc_state =
+@@ -668,12 +671,6 @@ vc4_hvs_channels_duplicate_state(struct drm_private_obj *obj)
+
+ for (i = 0; i < HVS_NUM_CHANNELS; i++) {
+ state->fifo_state[i].in_use = old_state->fifo_state[i].in_use;
+-
+- if (!old_state->fifo_state[i].pending_commit)
+- continue;
+-
+- state->fifo_state[i].pending_commit =
+- drm_crtc_commit_get(old_state->fifo_state[i].pending_commit);
+ }
+
+ return &state->base;
+@@ -762,8 +759,8 @@ static int vc4_pv_muxing_atomic_check(struct drm_device *dev,
+ unsigned int i;
+
+ hvs_new_state = vc4_hvs_get_global_state(state);
+- if (!hvs_new_state)
+- return -EINVAL;
++ if (IS_ERR(hvs_new_state))
++ return PTR_ERR(hvs_new_state);
+
+ for (i = 0; i < ARRAY_SIZE(hvs_new_state->fifo_state); i++)
+ if (!hvs_new_state->fifo_state[i].in_use)
+diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
+index 19161b6ab27fa..4404059810d0a 100644
+--- a/drivers/gpu/drm/vc4/vc4_plane.c
++++ b/drivers/gpu/drm/vc4/vc4_plane.c
+@@ -72,11 +72,13 @@ static const struct hvs_format {
+ .drm = DRM_FORMAT_ARGB1555,
+ .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
++ .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
+ },
+ {
+ .drm = DRM_FORMAT_XRGB1555,
+ .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
++ .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
+ },
+ {
+ .drm = DRM_FORMAT_RGB888,
+@@ -303,16 +305,16 @@ static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
+ adjhdisplay,
+ crtc_state->mode.hdisplay);
+ vc4_pstate->crtc_x += left;
+- if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left)
+- vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left;
++ if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
++ vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
+
+ adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
+ vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
+ adjvdisplay,
+ crtc_state->mode.vdisplay);
+ vc4_pstate->crtc_y += top;
+- if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top)
+- vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top;
++ if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
++ vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
+
+ vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
+ adjhdisplay,
+@@ -332,7 +334,6 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ struct drm_framebuffer *fb = state->fb;
+ struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+- u32 subpixel_src_mask = (1 << 16) - 1;
+ int num_planes = fb->format->num_planes;
+ struct drm_crtc_state *crtc_state;
+ u32 h_subsample = fb->format->hsub;
+@@ -354,18 +355,15 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
+ for (i = 0; i < num_planes; i++)
+ vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
+
+- /* We don't support subpixel source positioning for scaling. */
+- if ((state->src.x1 & subpixel_src_mask) ||
+- (state->src.x2 & subpixel_src_mask) ||
+- (state->src.y1 & subpixel_src_mask) ||
+- (state->src.y2 & subpixel_src_mask)) {
+- return -EINVAL;
+- }
+-
+- vc4_state->src_x = state->src.x1 >> 16;
+- vc4_state->src_y = state->src.y1 >> 16;
+- vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16;
+- vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16;
++ /*
++ * We don't support subpixel source positioning for scaling,
++ * but fractional coordinates can be generated by clipping
++ * so just round for now
++ */
++ vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16);
++ vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16);
++ vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x;
++ vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y;
+
+ vc4_state->crtc_x = state->dst.x1;
+ vc4_state->crtc_y = state->dst.y1;
+diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
+index 489f921ef44d2..fe6d0e21ddd8d 100644
+--- a/drivers/gpu/drm/vc4/vc4_regs.h
++++ b/drivers/gpu/drm/vc4/vc4_regs.h
+@@ -220,6 +220,12 @@
+ #define SCALER_DISPCTRL 0x00000000
+ /* Global register for clock gating the HVS */
+ # define SCALER_DISPCTRL_ENABLE BIT(31)
++# define SCALER_DISPCTRL_PANIC0_MASK VC4_MASK(25, 24)
++# define SCALER_DISPCTRL_PANIC0_SHIFT 24
++# define SCALER_DISPCTRL_PANIC1_MASK VC4_MASK(27, 26)
++# define SCALER_DISPCTRL_PANIC1_SHIFT 26
++# define SCALER_DISPCTRL_PANIC2_MASK VC4_MASK(29, 28)
++# define SCALER_DISPCTRL_PANIC2_SHIFT 28
+ # define SCALER_DISPCTRL_DSP3_MUX_MASK VC4_MASK(19, 18)
+ # define SCALER_DISPCTRL_DSP3_MUX_SHIFT 18
+
+@@ -379,8 +385,6 @@
+ # define SCALER_DISPSTATX_MODE_EOF 3
+ # define SCALER_DISPSTATX_FULL BIT(29)
+ # define SCALER_DISPSTATX_EMPTY BIT(28)
+-# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12)
+-# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12
+ # define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0)
+ # define SCALER_DISPSTATX_LINE_SHIFT 0
+
+@@ -403,9 +407,15 @@
+ (x) * (SCALER_DISPBKGND1 - \
+ SCALER_DISPBKGND0))
+ #define SCALER_DISPSTAT1 0x00000058
++# define SCALER_DISPSTAT1_FRCNT0_MASK VC4_MASK(23, 18)
++# define SCALER_DISPSTAT1_FRCNT0_SHIFT 18
++# define SCALER_DISPSTAT1_FRCNT1_MASK VC4_MASK(17, 12)
++# define SCALER_DISPSTAT1_FRCNT1_SHIFT 12
++
+ #define SCALER_DISPSTATX(x) (SCALER_DISPSTAT0 + \
+ (x) * (SCALER_DISPSTAT1 - \
+ SCALER_DISPSTAT0))
++
+ #define SCALER_DISPBASE1 0x0000005c
+ #define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \
+ (x) * (SCALER_DISPBASE1 - \
+@@ -415,7 +425,11 @@
+ (x) * (SCALER_DISPCTRL1 - \
+ SCALER_DISPCTRL0))
+ #define SCALER_DISPBKGND2 0x00000064
++
+ #define SCALER_DISPSTAT2 0x00000068
++# define SCALER_DISPSTAT2_FRCNT2_MASK VC4_MASK(17, 12)
++# define SCALER_DISPSTAT2_FRCNT2_SHIFT 12
++
+ #define SCALER_DISPBASE2 0x0000006c
+ #define SCALER_DISPALPHA2 0x00000070
+ #define SCALER_GAMADDR 0x00000078
+diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c
+index 2fc7f4b5fa098..82beb8c159f28 100644
+--- a/drivers/gpu/drm/vc4/vc4_txp.c
++++ b/drivers/gpu/drm/vc4/vc4_txp.c
+@@ -298,12 +298,18 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn,
+ if (WARN_ON(i == ARRAY_SIZE(drm_fmts)))
+ return;
+
+- ctrl = TXP_GO | TXP_VSTART_AT_EOF | TXP_EI |
++ ctrl = TXP_GO | TXP_EI |
+ VC4_SET_FIELD(0xf, TXP_BYTE_ENABLE) |
+ VC4_SET_FIELD(txp_fmts[i], TXP_FORMAT);
+
+ if (fb->format->has_alpha)
+ ctrl |= TXP_ALPHA_ENABLE;
++ else
++ /*
++ * If TXP_ALPHA_ENABLE isn't set and TXP_ALPHA_INVERT is, the
++ * hardware will force the output padding to be 0xff.
++ */
++ ctrl |= TXP_ALPHA_INVERT;
+
+ gem = drm_fb_cma_get_gem_obj(fb, 0);
+ TXP_WRITE(TXP_DST_PTR, gem->paddr + fb->offsets[0]);
+@@ -391,7 +397,6 @@ static int vc4_txp_atomic_check(struct drm_crtc *crtc,
+ {
+ struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state,
+ crtc);
+- struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
+ int ret;
+
+ ret = vc4_hvs_atomic_check(crtc, state);
+@@ -399,7 +404,6 @@ static int vc4_txp_atomic_check(struct drm_crtc *crtc,
+ return ret;
+
+ crtc_state->no_vblank = true;
+- vc4_state->feed_txp = true;
+
+ return 0;
+ }
+@@ -437,6 +441,7 @@ static void vc4_txp_atomic_disable(struct drm_crtc *crtc,
+
+ static const struct drm_crtc_helper_funcs vc4_txp_crtc_helper_funcs = {
+ .atomic_check = vc4_txp_atomic_check,
++ .atomic_begin = vc4_hvs_atomic_begin,
+ .atomic_flush = vc4_hvs_atomic_flush,
+ .atomic_enable = vc4_txp_atomic_enable,
+ .atomic_disable = vc4_txp_atomic_disable,
+@@ -482,6 +487,7 @@ static int vc4_txp_bind(struct device *dev, struct device *master, void *data)
+
+ vc4_crtc->pdev = pdev;
+ vc4_crtc->data = &vc4_txp_crtc_data;
++ vc4_crtc->feeds_txp = true;
+
+ txp->pdev = pdev;
+
+diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c
+index 11fc3d6f66b1e..4e2250b8fa23e 100644
+--- a/drivers/gpu/drm/vc4/vc4_vec.c
++++ b/drivers/gpu/drm/vc4/vc4_vec.c
+@@ -256,7 +256,7 @@ static void vc4_vec_ntsc_j_mode_set(struct vc4_vec *vec)
+ static const struct drm_display_mode ntsc_mode = {
+ DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 13500,
+ 720, 720 + 14, 720 + 14 + 64, 720 + 14 + 64 + 60, 0,
+- 480, 480 + 3, 480 + 3 + 3, 480 + 3 + 3 + 16, 0,
++ 480, 480 + 7, 480 + 7 + 6, 525, 0,
+ DRM_MODE_FLAG_INTERLACE)
+ };
+
+@@ -278,7 +278,7 @@ static void vc4_vec_pal_m_mode_set(struct vc4_vec *vec)
+ static const struct drm_display_mode pal_mode = {
+ DRM_MODE("720x576", DRM_MODE_TYPE_DRIVER, 13500,
+ 720, 720 + 20, 720 + 20 + 64, 720 + 20 + 64 + 60, 0,
+- 576, 576 + 2, 576 + 2 + 3, 576 + 2 + 3 + 20, 0,
++ 576, 576 + 4, 576 + 4 + 6, 625, 0,
+ DRM_MODE_FLAG_INTERLACE)
+ };
+
+diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
+index bd6f75285fd95..43fc56d0c4a06 100644
+--- a/drivers/gpu/drm/vgem/vgem_fence.c
++++ b/drivers/gpu/drm/vgem/vgem_fence.c
+@@ -248,4 +248,5 @@ void vgem_fence_close(struct vgem_file *vfile)
+ {
+ idr_for_each(&vfile->fence_idr, __vgem_fence_idr_fini, vfile);
+ idr_destroy(&vfile->fence_idr);
++ mutex_destroy(&vfile->fence_mutex);
+ }
+diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
+index a6caebd4a0dd6..ef1f19083cd31 100644
+--- a/drivers/gpu/drm/virtio/virtgpu_display.c
++++ b/drivers/gpu/drm/virtio/virtgpu_display.c
+@@ -179,6 +179,8 @@ static int virtio_gpu_conn_get_modes(struct drm_connector *connector)
+ DRM_DEBUG("add mode: %dx%d\n", width, height);
+ mode = drm_cvt_mode(connector->dev, width, height, 60,
+ false, false, false);
++ if (!mode)
++ return count;
+ mode->type |= DRM_MODE_TYPE_PREFERRED;
+ drm_mode_probed_add(connector, mode);
+ count++;
+diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
+index 2de61b63ef91d..48d3c9955f0dd 100644
+--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
++++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
+@@ -248,6 +248,9 @@ void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs)
+ {
+ u32 i;
+
++ if (!objs)
++ return;
++
+ for (i = 0; i < objs->nents; i++)
+ drm_gem_object_put(objs->objs[i]);
+ virtio_gpu_array_free(objs);
+diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+index 5c1ad1596889b..3c750ba6ba1fe 100644
+--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
++++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+@@ -292,10 +292,18 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data,
+ drm_gem_object_release(obj);
+ return ret;
+ }
+- drm_gem_object_put(obj);
+
+ rc->res_handle = qobj->hw_res_handle; /* similiar to a VM address */
+ rc->bo_handle = handle;
++
++ /*
++ * The handle owns the reference now. But we must drop our
++ * remaining reference *after* we no longer need to dereference
++ * the obj. Otherwise userspace could guess the handle and
++ * race closing it from another thread.
++ */
++ drm_gem_object_put(obj);
++
+ return 0;
+ }
+
+@@ -512,8 +520,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
+ spin_unlock(&vgdev->display_info_lock);
+
+ /* not in cache - need to talk to hw */
+- virtio_gpu_cmd_get_capset(vgdev, found_valid, args->cap_set_ver,
+- &cache_ent);
++ ret = virtio_gpu_cmd_get_capset(vgdev, found_valid, args->cap_set_ver,
++ &cache_ent);
++ if (ret)
++ return ret;
+ virtio_gpu_notify(vgdev);
+
+ copy_exit:
+@@ -654,11 +664,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev,
+ drm_gem_object_release(obj);
+ return ret;
+ }
+- drm_gem_object_put(obj);
+
+ rc_blob->res_handle = bo->hw_res_handle;
+ rc_blob->bo_handle = handle;
+
++ /*
++ * The handle owns the reference now. But we must drop our
++ * remaining reference *after* we no longer need to dereference
++ * the obj. Otherwise userspace could guess the handle and
++ * race closing it from another thread.
++ */
++ drm_gem_object_put(obj);
++
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
+index f648b0e24447b..25d399b00404c 100644
+--- a/drivers/gpu/drm/virtio/virtgpu_object.c
++++ b/drivers/gpu/drm/virtio/virtgpu_object.c
+@@ -79,10 +79,10 @@ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo)
+ sg_free_table(shmem->pages);
+ kfree(shmem->pages);
+ shmem->pages = NULL;
+- drm_gem_shmem_unpin(&bo->base.base);
++ drm_gem_shmem_unpin(&bo->base);
+ }
+
+- drm_gem_shmem_free_object(&bo->base.base);
++ drm_gem_shmem_free(&bo->base);
+ } else if (virtio_gpu_is_vram(bo)) {
+ struct virtio_gpu_object_vram *vram = to_virtio_gpu_vram(bo);
+
+@@ -116,15 +116,14 @@ static const struct drm_gem_object_funcs virtio_gpu_shmem_funcs = {
+ .free = virtio_gpu_free_object,
+ .open = virtio_gpu_gem_object_open,
+ .close = virtio_gpu_gem_object_close,
+-
+- .print_info = drm_gem_shmem_print_info,
++ .print_info = drm_gem_shmem_object_print_info,
+ .export = virtgpu_gem_prime_export,
+- .pin = drm_gem_shmem_pin,
+- .unpin = drm_gem_shmem_unpin,
+- .get_sg_table = drm_gem_shmem_get_sg_table,
+- .vmap = drm_gem_shmem_vmap,
+- .vunmap = drm_gem_shmem_vunmap,
+- .mmap = drm_gem_shmem_mmap,
++ .pin = drm_gem_shmem_object_pin,
++ .unpin = drm_gem_shmem_object_unpin,
++ .get_sg_table = drm_gem_shmem_object_get_sg_table,
++ .vmap = drm_gem_shmem_object_vmap,
++ .vunmap = drm_gem_shmem_object_vunmap,
++ .mmap = drm_gem_shmem_object_mmap,
+ };
+
+ bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo)
+@@ -157,7 +156,7 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev,
+ struct scatterlist *sg;
+ int si, ret;
+
+- ret = drm_gem_shmem_pin(&bo->base.base);
++ ret = drm_gem_shmem_pin(&bo->base);
+ if (ret < 0)
+ return -EINVAL;
+
+@@ -167,10 +166,12 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev,
+ * dma-ops. This is discouraged for other drivers, but should be fine
+ * since virtio_gpu doesn't support dma-buf import from other devices.
+ */
+- shmem->pages = drm_gem_shmem_get_sg_table(&bo->base.base);
+- if (!shmem->pages) {
+- drm_gem_shmem_unpin(&bo->base.base);
+- return -EINVAL;
++ shmem->pages = drm_gem_shmem_get_sg_table(&bo->base);
++ if (IS_ERR(shmem->pages)) {
++ drm_gem_shmem_unpin(&bo->base);
++ ret = PTR_ERR(shmem->pages);
++ shmem->pages = NULL;
++ return ret;
+ }
+
+ if (use_dma_api) {
+@@ -248,6 +249,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
+
+ ret = virtio_gpu_object_shmem_init(vgdev, bo, &ents, &nents);
+ if (ret != 0) {
++ if (fence)
++ virtio_gpu_array_unlock_resv(objs);
+ virtio_gpu_array_put_free(objs);
+ virtio_gpu_free_object(&shmem_obj->base);
+ return ret;
+@@ -277,6 +280,6 @@ err_put_objs:
+ err_put_id:
+ virtio_gpu_resource_id_put(vgdev, bo->hw_res_handle);
+ err_free_gem:
+- drm_gem_shmem_free_object(&shmem_obj->base);
++ drm_gem_shmem_free(shmem_obj);
+ return ret;
+ }
+diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
+index a49fd9480381d..7e8cbcee1722a 100644
+--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
++++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
+@@ -265,14 +265,14 @@ static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane,
+ }
+
+ static void virtio_gpu_plane_cleanup_fb(struct drm_plane *plane,
+- struct drm_plane_state *old_state)
++ struct drm_plane_state *state)
+ {
+ struct virtio_gpu_framebuffer *vgfb;
+
+- if (!plane->state->fb)
++ if (!state->fb)
+ return;
+
+- vgfb = to_virtio_gpu_framebuffer(plane->state->fb);
++ vgfb = to_virtio_gpu_framebuffer(state->fb);
+ if (vgfb->fence) {
+ dma_fence_put(&vgfb->fence->f);
+ vgfb->fence = NULL;
+diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
+index 2e71e91278b45..19a196b48a383 100644
+--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
++++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
+@@ -91,9 +91,7 @@ virtio_gpu_get_vbuf(struct virtio_gpu_device *vgdev,
+ {
+ struct virtio_gpu_vbuffer *vbuf;
+
+- vbuf = kmem_cache_zalloc(vgdev->vbufs, GFP_KERNEL);
+- if (!vbuf)
+- return ERR_PTR(-ENOMEM);
++ vbuf = kmem_cache_zalloc(vgdev->vbufs, GFP_KERNEL | __GFP_NOFAIL);
+
+ BUG_ON(size > MAX_INLINE_CMD_SIZE ||
+ size < sizeof(struct virtio_gpu_ctrl_hdr));
+@@ -147,10 +145,6 @@ static void *virtio_gpu_alloc_cmd_resp(struct virtio_gpu_device *vgdev,
+
+ vbuf = virtio_gpu_get_vbuf(vgdev, cmd_size,
+ resp_size, resp_buf, cb);
+- if (IS_ERR(vbuf)) {
+- *vbuffer_p = NULL;
+- return ERR_CAST(vbuf);
+- }
+ *vbuffer_p = vbuf;
+ return (struct virtio_gpu_command *)vbuf->buf;
+ }
+@@ -610,7 +604,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
+ bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev);
+ struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo);
+
+- if (use_dma_api)
++ if (virtio_gpu_is_shmem(bo) && use_dma_api)
+ dma_sync_sgtable_for_device(vgdev->vdev->dev.parent,
+ shmem->pages, DMA_TO_DEVICE);
+
+diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
+index 0ffe5f0e33f75..f716c5796f5fc 100644
+--- a/drivers/gpu/drm/vkms/vkms_drv.c
++++ b/drivers/gpu/drm/vkms/vkms_drv.c
+@@ -57,7 +57,8 @@ static void vkms_release(struct drm_device *dev)
+ {
+ struct vkms_device *vkms = drm_device_to_vkms_device(dev);
+
+- destroy_workqueue(vkms->output.composer_workq);
++ if (vkms->output.composer_workq)
++ destroy_workqueue(vkms->output.composer_workq);
+ }
+
+ static void vkms_atomic_commit_tail(struct drm_atomic_state *old_state)
+@@ -218,6 +219,7 @@ out_unregister:
+
+ static int __init vkms_init(void)
+ {
++ int ret;
+ struct vkms_config *config;
+
+ config = kmalloc(sizeof(*config), GFP_KERNEL);
+@@ -230,7 +232,11 @@ static int __init vkms_init(void)
+ config->writeback = enable_writeback;
+ config->overlay = enable_overlay;
+
+- return vkms_create(config);
++ ret = vkms_create(config);
++ if (ret)
++ kfree(config);
++
++ return ret;
+ }
+
+ static void vkms_destroy(struct vkms_config *config)
+diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
+index bc323f7d40321..18edc7ca5b454 100644
+--- a/drivers/gpu/drm/vmwgfx/Makefile
++++ b/drivers/gpu/drm/vmwgfx/Makefile
+@@ -9,9 +9,8 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
+ vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
+ vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
+ vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
+- vmwgfx_devcaps.o ttm_object.o ttm_memory.o
++ vmwgfx_devcaps.o ttm_object.o ttm_memory.o vmwgfx_system_manager.o
+
+ vmwgfx-$(CONFIG_DRM_FBDEV_EMULATION) += vmwgfx_fb.o
+-vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
+
+ obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
+diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
+index edd17c30d5a51..2ced4c06ca451 100644
+--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
++++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
+@@ -34,7 +34,6 @@
+ #include <linux/mm.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
+-#include <linux/swap.h>
+
+ #include <drm/drm_device.h>
+ #include <drm/drm_file.h>
+@@ -173,69 +172,7 @@ static struct kobj_type ttm_mem_zone_kobj_type = {
+ .sysfs_ops = &ttm_mem_zone_ops,
+ .default_attrs = ttm_mem_zone_attrs,
+ };
+-
+-static struct attribute ttm_mem_global_lower_mem_limit = {
+- .name = "lower_mem_limit",
+- .mode = S_IRUGO | S_IWUSR
+-};
+-
+-static ssize_t ttm_mem_global_show(struct kobject *kobj,
+- struct attribute *attr,
+- char *buffer)
+-{
+- struct ttm_mem_global *glob =
+- container_of(kobj, struct ttm_mem_global, kobj);
+- uint64_t val = 0;
+-
+- spin_lock(&glob->lock);
+- val = glob->lower_mem_limit;
+- spin_unlock(&glob->lock);
+- /* convert from number of pages to KB */
+- val <<= (PAGE_SHIFT - 10);
+- return snprintf(buffer, PAGE_SIZE, "%llu\n",
+- (unsigned long long) val);
+-}
+-
+-static ssize_t ttm_mem_global_store(struct kobject *kobj,
+- struct attribute *attr,
+- const char *buffer,
+- size_t size)
+-{
+- int chars;
+- uint64_t val64;
+- unsigned long val;
+- struct ttm_mem_global *glob =
+- container_of(kobj, struct ttm_mem_global, kobj);
+-
+- chars = sscanf(buffer, "%lu", &val);
+- if (chars == 0)
+- return size;
+-
+- val64 = val;
+- /* convert from KB to number of pages */
+- val64 >>= (PAGE_SHIFT - 10);
+-
+- spin_lock(&glob->lock);
+- glob->lower_mem_limit = val64;
+- spin_unlock(&glob->lock);
+-
+- return size;
+-}
+-
+-static struct attribute *ttm_mem_global_attrs[] = {
+- &ttm_mem_global_lower_mem_limit,
+- NULL
+-};
+-
+-static const struct sysfs_ops ttm_mem_global_ops = {
+- .show = &ttm_mem_global_show,
+- .store = &ttm_mem_global_store,
+-};
+-
+-static struct kobj_type ttm_mem_glob_kobj_type = {
+- .sysfs_ops = &ttm_mem_global_ops,
+- .default_attrs = ttm_mem_global_attrs,
+-};
++static struct kobj_type ttm_mem_glob_kobj_type = {0};
+
+ static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
+ bool from_wq, uint64_t extra)
+@@ -435,11 +372,6 @@ int ttm_mem_global_init(struct ttm_mem_global *glob, struct device *dev)
+
+ si_meminfo(&si);
+
+- spin_lock(&glob->lock);
+- /* set it as 0 by default to keep original behavior of OOM */
+- glob->lower_mem_limit = 0;
+- spin_unlock(&glob->lock);
+-
+ ret = ttm_mem_init_kernel_zone(glob, &si);
+ if (unlikely(ret != 0))
+ goto out_no_zone;
+@@ -527,35 +459,6 @@ void ttm_mem_global_free(struct ttm_mem_global *glob,
+ }
+ EXPORT_SYMBOL(ttm_mem_global_free);
+
+-/*
+- * check if the available mem is under lower memory limit
+- *
+- * a. if no swap disk at all or free swap space is under swap_mem_limit
+- * but available system mem is bigger than sys_mem_limit, allow TTM
+- * allocation;
+- *
+- * b. if the available system mem is less than sys_mem_limit but free
+- * swap disk is bigger than swap_mem_limit, allow TTM allocation.
+- */
+-bool
+-ttm_check_under_lowerlimit(struct ttm_mem_global *glob,
+- uint64_t num_pages,
+- struct ttm_operation_ctx *ctx)
+-{
+- int64_t available;
+-
+- /* We allow over commit during suspend */
+- if (ctx->force_alloc)
+- return false;
+-
+- available = get_nr_swap_pages() + si_mem_available();
+- available -= num_pages;
+- if (available < glob->lower_mem_limit)
+- return true;
+-
+- return false;
+-}
+-
+ static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
+ struct ttm_mem_zone *single_zone,
+ uint64_t amount, bool reserve)
+diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.h b/drivers/gpu/drm/vmwgfx/ttm_memory.h
+index c50dba7744854..7b0d617ebcb1e 100644
+--- a/drivers/gpu/drm/vmwgfx/ttm_memory.h
++++ b/drivers/gpu/drm/vmwgfx/ttm_memory.h
+@@ -50,8 +50,6 @@
+ * @work: The workqueue callback for the shrink queue.
+ * @lock: Lock to protect the @shrink - and the memory accounting members,
+ * that is, essentially the whole structure with some exceptions.
+- * @lower_mem_limit: include lower limit of swap space and lower limit of
+- * system memory.
+ * @zones: Array of pointers to accounting zones.
+ * @num_zones: Number of populated entries in the @zones array.
+ * @zone_kernel: Pointer to the kernel zone.
+@@ -69,7 +67,6 @@ extern struct ttm_mem_global {
+ struct workqueue_struct *swap_queue;
+ struct work_struct work;
+ spinlock_t lock;
+- uint64_t lower_mem_limit;
+ struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES];
+ unsigned int num_zones;
+ struct ttm_mem_zone *zone_kernel;
+@@ -91,6 +88,5 @@ int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
+ void ttm_mem_global_free_page(struct ttm_mem_global *glob,
+ struct page *page, uint64_t size);
+ size_t ttm_round_pot(size_t size);
+-bool ttm_check_under_lowerlimit(struct ttm_mem_global *glob, uint64_t num_pages,
+- struct ttm_operation_ctx *ctx);
++
+ #endif
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
+index 67db472d3493c..162dfeb1cc5ad 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
+@@ -145,6 +145,13 @@ struct vmw_fifo_state *vmw_fifo_create(struct vmw_private *dev_priv)
+ (unsigned int) max,
+ (unsigned int) min,
+ (unsigned int) fifo->capabilities);
++
++ if (unlikely(min >= max)) {
++ drm_warn(&dev_priv->drm,
++ "FIFO memory is not usable. Driver failed to initialize.");
++ return ERR_PTR(-ENXIO);
++ }
++
+ return fifo;
+ }
+
+@@ -521,7 +528,7 @@ int vmw_cmd_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
+ *seqno = atomic_add_return(1, &dev_priv->marker_seq);
+ } while (*seqno == 0);
+
+- if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE)) {
++ if (!vmw_has_fences(dev_priv)) {
+
+ /*
+ * Don't request hardware to send a fence. The
+@@ -668,11 +675,14 @@ int vmw_cmd_emit_dummy_query(struct vmw_private *dev_priv,
+ */
+ bool vmw_cmd_supported(struct vmw_private *vmw)
+ {
+- if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
+- SVGA_CAP_CMD_BUFFERS_2)) != 0)
+- return true;
++ bool has_cmdbufs =
++ (vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
++ SVGA_CAP_CMD_BUFFERS_2)) != 0;
++ if (vmw_is_svga_v3(vmw))
++ return (has_cmdbufs &&
++ (vmw->capabilities & SVGA_CAP_GBOBJECTS) != 0);
+ /*
+ * We have FIFO cmd's
+ */
+- return vmw->fifo_mem != NULL;
++ return has_cmdbufs || vmw->fifo_mem != NULL;
+ }
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+index ab9a1750e1dff..8449d09c06f7a 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -707,23 +707,15 @@ static int vmw_dma_masks(struct vmw_private *dev_priv)
+ static int vmw_vram_manager_init(struct vmw_private *dev_priv)
+ {
+ int ret;
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+- ret = vmw_thp_init(dev_priv);
+-#else
+ ret = ttm_range_man_init(&dev_priv->bdev, TTM_PL_VRAM, false,
+ dev_priv->vram_size >> PAGE_SHIFT);
+-#endif
+ ttm_resource_manager_set_used(ttm_manager_type(&dev_priv->bdev, TTM_PL_VRAM), false);
+ return ret;
+ }
+
+ static void vmw_vram_manager_fini(struct vmw_private *dev_priv)
+ {
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+- vmw_thp_fini(dev_priv);
+-#else
+ ttm_range_man_fini(&dev_priv->bdev, TTM_PL_VRAM);
+-#endif
+ }
+
+ static int vmw_setup_pci_resources(struct vmw_private *dev,
+@@ -1071,6 +1063,12 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
+ "3D will be disabled.\n");
+ dev_priv->has_mob = false;
+ }
++ if (vmw_sys_man_init(dev_priv) != 0) {
++ drm_info(&dev_priv->drm,
++ "No MOB page table memory available. "
++ "3D will be disabled.\n");
++ dev_priv->has_mob = false;
++ }
+ }
+
+ if (dev_priv->has_mob && (dev_priv->capabilities & SVGA_CAP_DX)) {
+@@ -1121,8 +1119,10 @@ out_no_fifo:
+ vmw_overlay_close(dev_priv);
+ vmw_kms_close(dev_priv);
+ out_no_kms:
+- if (dev_priv->has_mob)
++ if (dev_priv->has_mob) {
+ vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB);
++ vmw_sys_man_fini(dev_priv);
++ }
+ if (dev_priv->has_gmr)
+ vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR);
+ vmw_devcaps_destroy(dev_priv);
+@@ -1172,8 +1172,10 @@ static void vmw_driver_unload(struct drm_device *dev)
+ vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR);
+
+ vmw_release_device_early(dev_priv);
+- if (dev_priv->has_mob)
++ if (dev_priv->has_mob) {
+ vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB);
++ vmw_sys_man_fini(dev_priv);
++ }
+ vmw_devcaps_destroy(dev_priv);
+ vmw_vram_manager_fini(dev_priv);
+ ttm_device_fini(&dev_priv->bdev);
+@@ -1617,34 +1619,40 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+ if (ret)
+- return ret;
++ goto out_error;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+- return ret;
++ goto out_error;
+
+ vmw = devm_drm_dev_alloc(&pdev->dev, &driver,
+ struct vmw_private, drm);
+- if (IS_ERR(vmw))
+- return PTR_ERR(vmw);
++ if (IS_ERR(vmw)) {
++ ret = PTR_ERR(vmw);
++ goto out_error;
++ }
+
+ pci_set_drvdata(pdev, &vmw->drm);
+
+ ret = ttm_mem_global_init(&ttm_mem_glob, &pdev->dev);
+ if (ret)
+- return ret;
++ goto out_error;
+
+ ret = vmw_driver_load(vmw, ent->device);
+ if (ret)
+- return ret;
++ goto out_release;
+
+ ret = drm_dev_register(&vmw->drm, 0);
+- if (ret) {
+- vmw_driver_unload(&vmw->drm);
+- return ret;
+- }
++ if (ret)
++ goto out_unload;
+
+ return 0;
++out_unload:
++ vmw_driver_unload(&vmw->drm);
++out_release:
++ ttm_mem_global_release(&ttm_mem_glob);
++out_error:
++ return ret;
+ }
+
+ static int __init vmwgfx_init(void)
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+index a833751099b55..7bb7a69321d30 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+@@ -59,11 +59,8 @@
+ #define VMWGFX_DRIVER_MINOR 19
+ #define VMWGFX_DRIVER_PATCHLEVEL 0
+ #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
+-#define VMWGFX_MAX_RELOCATIONS 2048
+-#define VMWGFX_MAX_VALIDATIONS 2048
+ #define VMWGFX_MAX_DISPLAYS 16
+ #define VMWGFX_CMD_BOUNCE_INIT_SIZE 32768
+-#define VMWGFX_ENABLE_SCREEN_TARGET_OTABLE 1
+
+ #define VMWGFX_PCI_ID_SVGA2 0x0405
+ #define VMWGFX_PCI_ID_SVGA3 0x0406
+@@ -82,8 +79,9 @@
+ VMWGFX_NUM_GB_SURFACE +\
+ VMWGFX_NUM_GB_SCREEN_TARGET)
+
+-#define VMW_PL_GMR (TTM_PL_PRIV + 0)
+-#define VMW_PL_MOB (TTM_PL_PRIV + 1)
++#define VMW_PL_GMR (TTM_PL_PRIV + 0)
++#define VMW_PL_MOB (TTM_PL_PRIV + 1)
++#define VMW_PL_SYSTEM (TTM_PL_PRIV + 2)
+
+ #define VMW_RES_CONTEXT ttm_driver_type0
+ #define VMW_RES_SURFACE ttm_driver_type1
+@@ -1039,7 +1037,6 @@ extern struct ttm_placement vmw_vram_placement;
+ extern struct ttm_placement vmw_vram_sys_placement;
+ extern struct ttm_placement vmw_vram_gmr_placement;
+ extern struct ttm_placement vmw_sys_placement;
+-extern struct ttm_placement vmw_evictable_placement;
+ extern struct ttm_placement vmw_srf_placement;
+ extern struct ttm_placement vmw_mob_placement;
+ extern struct ttm_placement vmw_nonfixed_placement;
+@@ -1115,15 +1112,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+ struct vmw_private *dev_priv,
+ struct vmw_fence_obj **p_fence,
+ uint32_t *p_handle);
+-extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
++extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+ struct vmw_fpriv *vmw_fp,
+ int ret,
+ struct drm_vmw_fence_rep __user
+ *user_fence_rep,
+ struct vmw_fence_obj *fence,
+ uint32_t fence_handle,
+- int32_t out_fence_fd,
+- struct sync_file *sync_file);
++ int32_t out_fence_fd);
+ bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd);
+
+ /**
+@@ -1251,6 +1247,12 @@ int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv);
+ int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type);
+ void vmw_gmrid_man_fini(struct vmw_private *dev_priv, int type);
+
++/**
++ * System memory manager
++ */
++int vmw_sys_man_init(struct vmw_private *dev_priv);
++void vmw_sys_man_fini(struct vmw_private *dev_priv);
++
+ /**
+ * Prime - vmwgfx_prime.c
+ */
+@@ -1550,16 +1552,7 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end);
+ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+ vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
+- enum page_entry_size pe_size);
+-#endif
+
+-/* Transparent hugepage support - vmwgfx_thp.c */
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-extern int vmw_thp_init(struct vmw_private *dev_priv);
+-void vmw_thp_fini(struct vmw_private *dev_priv);
+-#endif
+
+ /**
+ * VMW_DEBUG_KMS - Debug output for kernel mode-setting
+@@ -1684,4 +1677,24 @@ static inline void vmw_irq_status_write(struct vmw_private *vmw,
+ outl(status, vmw->io_start + SVGA_IRQSTATUS_PORT);
+ }
+
++static inline bool vmw_has_fences(struct vmw_private *vmw)
++{
++ if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
++ SVGA_CAP_CMD_BUFFERS_2)) != 0)
++ return true;
++ return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
++}
++
++static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model,
++ u32 shader_type)
++{
++ SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX;
++
++ if (shader_model >= VMW_SM_5)
++ max_allowed = SVGA3D_SHADERTYPE_MAX;
++ else if (shader_model >= VMW_SM_4)
++ max_allowed = SVGA3D_SHADERTYPE_DX10_MAX;
++ return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed;
++}
++
+ #endif
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+index 5f2ffa9de5c8f..ed75622bf7082 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+@@ -2003,7 +2003,7 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
+
+ cmd = container_of(header, typeof(*cmd), header);
+
+- if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) {
++ if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) {
+ VMW_DEBUG_USER("Illegal shader type %u.\n",
+ (unsigned int) cmd->body.type);
+ return -EINVAL;
+@@ -2125,8 +2125,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
+ SVGA3dCmdHeader *header)
+ {
+ VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer);
+- SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ?
+- SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10;
+
+ struct vmw_resource *res = NULL;
+ struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
+@@ -2143,6 +2141,14 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
+ if (unlikely(ret != 0))
+ return ret;
+
++ if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) ||
++ cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
++ VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
++ (unsigned int) cmd->body.type,
++ (unsigned int) cmd->body.slot);
++ return -EINVAL;
++ }
++
+ binding.bi.ctx = ctx_node->ctx;
+ binding.bi.res = res;
+ binding.bi.bt = vmw_ctx_binding_cb;
+@@ -2151,14 +2157,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
+ binding.size = cmd->body.sizeInBytes;
+ binding.slot = cmd->body.slot;
+
+- if (binding.shader_slot >= max_shader_num ||
+- binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
+- VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
+- (unsigned int) cmd->body.type,
+- (unsigned int) binding.slot);
+- return -EINVAL;
+- }
+-
+ vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot,
+ binding.slot);
+
+@@ -2179,15 +2177,13 @@ static int vmw_cmd_dx_set_shader_res(struct vmw_private *dev_priv,
+ {
+ VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) =
+ container_of(header, typeof(*cmd), header);
+- SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
+- SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
+
+ u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) /
+ sizeof(SVGA3dShaderResourceViewId);
+
+ if ((u64) cmd->body.startView + (u64) num_sr_view >
+ (u64) SVGA3D_DX_MAX_SRVIEWS ||
+- cmd->body.type >= max_allowed) {
++ !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
+ VMW_DEBUG_USER("Invalid shader binding.\n");
+ return -EINVAL;
+ }
+@@ -2211,8 +2207,6 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
+ SVGA3dCmdHeader *header)
+ {
+ VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader);
+- SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
+- SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
+ struct vmw_resource *res = NULL;
+ struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
+ struct vmw_ctx_bindinfo_shader binding;
+@@ -2223,8 +2217,7 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
+
+ cmd = container_of(header, typeof(*cmd), header);
+
+- if (cmd->body.type >= max_allowed ||
+- cmd->body.type < SVGA3D_SHADERTYPE_MIN) {
++ if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
+ VMW_DEBUG_USER("Illegal shader type %u.\n",
+ (unsigned int) cmd->body.type);
+ return -EINVAL;
+@@ -3823,17 +3816,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+ * Also if copying fails, user-space will be unable to signal the fence object
+ * so we wait for it immediately, and then unreference the user-space reference.
+ */
+-void
++int
+ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+ struct vmw_fpriv *vmw_fp, int ret,
+ struct drm_vmw_fence_rep __user *user_fence_rep,
+ struct vmw_fence_obj *fence, uint32_t fence_handle,
+- int32_t out_fence_fd, struct sync_file *sync_file)
++ int32_t out_fence_fd)
+ {
+ struct drm_vmw_fence_rep fence_rep;
+
+ if (user_fence_rep == NULL)
+- return;
++ return 0;
+
+ memset(&fence_rep, 0, sizeof(fence_rep));
+
+@@ -3861,20 +3854,14 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+ * handle.
+ */
+ if (unlikely(ret != 0) && (fence_rep.error == 0)) {
+- if (sync_file)
+- fput(sync_file->file);
+-
+- if (fence_rep.fd != -1) {
+- put_unused_fd(fence_rep.fd);
+- fence_rep.fd = -1;
+- }
+-
+ ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle,
+ TTM_REF_USAGE);
+ VMW_DEBUG_USER("Fence copy error. Syncing.\n");
+ (void) vmw_fence_obj_wait(fence, false, false,
+ VMW_FENCE_WAIT_TIMEOUT);
+ }
++
++ return ret ? -EFAULT : 0;
+ }
+
+ /**
+@@ -4212,16 +4199,23 @@ int vmw_execbuf_process(struct drm_file *file_priv,
+
+ (void) vmw_fence_obj_wait(fence, false, false,
+ VMW_FENCE_WAIT_TIMEOUT);
++ }
++ }
++
++ ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
++ user_fence_rep, fence, handle, out_fence_fd);
++
++ if (sync_file) {
++ if (ret) {
++ /* usercopy of fence failed, put the file object */
++ fput(sync_file->file);
++ put_unused_fd(out_fence_fd);
+ } else {
+ /* Link the fence with the FD created earlier */
+ fd_install(out_fence_fd, sync_file->file);
+ }
+ }
+
+- vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
+- user_fence_rep, fence, handle, out_fence_fd,
+- sync_file);
+-
+ /* Don't unreference when handing fence out */
+ if (unlikely(out_fence != NULL)) {
+ *out_fence = fence;
+@@ -4239,7 +4233,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
+ */
+ vmw_validation_unref_lists(&val_ctx);
+
+- return 0;
++ return ret;
+
+ out_unlock_binding:
+ mutex_unlock(&dev_priv->binding_mutex);
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+index d18c6a56e3dca..f18ed03a8b2d6 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+@@ -490,7 +490,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par,
+
+ static int vmw_fb_kms_framebuffer(struct fb_info *info)
+ {
+- struct drm_mode_fb_cmd2 mode_cmd;
++ struct drm_mode_fb_cmd2 mode_cmd = {0};
+ struct vmw_fb_par *par = info->par;
+ struct fb_var_screeninfo *var = &info->var;
+ struct drm_framebuffer *cur_fb;
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+index 9fe12329a4d58..b32ddbb992de2 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+@@ -85,6 +85,22 @@ fman_from_fence(struct vmw_fence_obj *fence)
+ return container_of(fence->base.lock, struct vmw_fence_manager, lock);
+ }
+
++static u32 vmw_fence_goal_read(struct vmw_private *vmw)
++{
++ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
++ return vmw_read(vmw, SVGA_REG_FENCE_GOAL);
++ else
++ return vmw_fifo_mem_read(vmw, SVGA_FIFO_FENCE_GOAL);
++}
++
++static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value)
++{
++ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
++ vmw_write(vmw, SVGA_REG_FENCE_GOAL, value);
++ else
++ vmw_fifo_mem_write(vmw, SVGA_FIFO_FENCE_GOAL, value);
++}
++
+ /*
+ * Note on fencing subsystem usage of irqs:
+ * Typically the vmw_fences_update function is called
+@@ -400,7 +416,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
+ if (likely(!fman->seqno_valid))
+ return false;
+
+- goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
++ goal_seqno = vmw_fence_goal_read(fman->dev_priv);
+ if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
+ return false;
+
+@@ -408,9 +424,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
+ list_for_each_entry(fence, &fman->fence_list, head) {
+ if (!list_empty(&fence->seq_passed_actions)) {
+ fman->seqno_valid = true;
+- vmw_fifo_mem_write(fman->dev_priv,
+- SVGA_FIFO_FENCE_GOAL,
+- fence->base.seqno);
++ vmw_fence_goal_write(fman->dev_priv,
++ fence->base.seqno);
+ break;
+ }
+ }
+@@ -442,13 +457,12 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
+ if (dma_fence_is_signaled_locked(&fence->base))
+ return false;
+
+- goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
++ goal_seqno = vmw_fence_goal_read(fman->dev_priv);
+ if (likely(fman->seqno_valid &&
+ goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
+ return false;
+
+- vmw_fifo_mem_write(fman->dev_priv, SVGA_FIFO_FENCE_GOAL,
+- fence->base.seqno);
++ vmw_fence_goal_write(fman->dev_priv, fence->base.seqno);
+ fman->seqno_valid = true;
+
+ return true;
+@@ -1159,7 +1173,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
+ }
+
+ vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
+- handle, -1, NULL);
++ handle, -1);
+ vmw_fence_obj_unreference(&fence);
+ return 0;
+ out_no_create:
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+index c5191de365ca1..fe4732bf2c9d2 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+@@ -32,6 +32,14 @@
+
+ #define VMW_FENCE_WRAP (1 << 24)
+
++static u32 vmw_irqflag_fence_goal(struct vmw_private *vmw)
++{
++ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
++ return SVGA_IRQFLAG_REG_FENCE_GOAL;
++ else
++ return SVGA_IRQFLAG_FENCE_GOAL;
++}
++
+ /**
+ * vmw_thread_fn - Deferred (process context) irq handler
+ *
+@@ -96,7 +104,7 @@ static irqreturn_t vmw_irq_handler(int irq, void *arg)
+ wake_up_all(&dev_priv->fifo_queue);
+
+ if ((masked_status & (SVGA_IRQFLAG_ANY_FENCE |
+- SVGA_IRQFLAG_FENCE_GOAL)) &&
++ vmw_irqflag_fence_goal(dev_priv))) &&
+ !test_and_set_bit(VMW_IRQTHREAD_FENCE, dev_priv->irqthread_pending))
+ ret = IRQ_WAKE_THREAD;
+
+@@ -137,8 +145,7 @@ bool vmw_seqno_passed(struct vmw_private *dev_priv,
+ if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
+ return true;
+
+- if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE) &&
+- vmw_fifo_idle(dev_priv, seqno))
++ if (!vmw_has_fences(dev_priv) && vmw_fifo_idle(dev_priv, seqno))
+ return true;
+
+ /**
+@@ -160,6 +167,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
+ unsigned long timeout)
+ {
+ struct vmw_fifo_state *fifo_state = dev_priv->fifo;
++ bool fifo_down = false;
+
+ uint32_t count = 0;
+ uint32_t signal_seq;
+@@ -176,12 +184,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
+ */
+
+ if (fifo_idle) {
+- down_read(&fifo_state->rwsem);
+ if (dev_priv->cman) {
+ ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible,
+ 10*HZ);
+ if (ret)
+ goto out_err;
++ } else if (fifo_state) {
++ down_read(&fifo_state->rwsem);
++ fifo_down = true;
+ }
+ }
+
+@@ -218,12 +228,12 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
+ }
+ }
+ finish_wait(&dev_priv->fence_queue, &__wait);
+- if (ret == 0 && fifo_idle)
++ if (ret == 0 && fifo_idle && fifo_state)
+ vmw_fence_write(dev_priv, signal_seq);
+
+ wake_up_all(&dev_priv->fence_queue);
+ out_err:
+- if (fifo_idle)
++ if (fifo_down)
+ up_read(&fifo_state->rwsem);
+
+ return ret;
+@@ -266,13 +276,13 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
+
+ void vmw_goal_waiter_add(struct vmw_private *dev_priv)
+ {
+- vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
++ vmw_generic_waiter_add(dev_priv, vmw_irqflag_fence_goal(dev_priv),
+ &dev_priv->goal_queue_waiters);
+ }
+
+ void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
+ {
+- vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
++ vmw_generic_waiter_remove(dev_priv, vmw_irqflag_fence_goal(dev_priv),
+ &dev_priv->goal_queue_waiters);
+ }
+
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+index 74fa419092138..01d5a01af2594 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+@@ -186,7 +186,8 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf,
+ if (cmd->dma.guest.ptr.offset % PAGE_SIZE ||
+ box->x != 0 || box->y != 0 || box->z != 0 ||
+ box->srcx != 0 || box->srcy != 0 || box->srcz != 0 ||
+- box->d != 1 || box_count != 1) {
++ box->d != 1 || box_count != 1 ||
++ box->w > 64 || box->h > 64) {
+ /* TODO handle none page aligned offsets */
+ /* TODO handle more dst & src != 0 */
+ /* TODO handle more then one copy */
+@@ -916,6 +917,15 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
+ * Sanity checks.
+ */
+
++ if (!drm_any_plane_has_format(&dev_priv->drm,
++ mode_cmd->pixel_format,
++ mode_cmd->modifier[0])) {
++ drm_dbg(&dev_priv->drm,
++ "unsupported pixel format %p4cc / modifier 0x%llx\n",
++ &mode_cmd->pixel_format, mode_cmd->modifier[0]);
++ return -EINVAL;
++ }
++
+ /* Surface must be marked as a scanout. */
+ if (unlikely(!surface->metadata.scanout))
+ return -EINVAL;
+@@ -1229,20 +1239,13 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv,
+ return -EINVAL;
+ }
+
+- /* Limited framebuffer color depth support for screen objects */
+- if (dev_priv->active_display_unit == vmw_du_screen_object) {
+- switch (mode_cmd->pixel_format) {
+- case DRM_FORMAT_XRGB8888:
+- case DRM_FORMAT_ARGB8888:
+- break;
+- case DRM_FORMAT_XRGB1555:
+- case DRM_FORMAT_RGB565:
+- break;
+- default:
+- DRM_ERROR("Invalid pixel format: %p4cc\n",
+- &mode_cmd->pixel_format);
+- return -EINVAL;
+- }
++ if (!drm_any_plane_has_format(&dev_priv->drm,
++ mode_cmd->pixel_format,
++ mode_cmd->modifier[0])) {
++ drm_dbg(&dev_priv->drm,
++ "unsupported pixel format %p4cc / modifier 0x%llx\n",
++ &mode_cmd->pixel_format, mode_cmd->modifier[0]);
++ return -EINVAL;
+ }
+
+ vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL);
+@@ -1336,7 +1339,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
+ ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
+ mode_cmd,
+ is_bo_proxy);
+-
+ /*
+ * vmw_create_bo_proxy() adds a reference that is no longer
+ * needed
+@@ -1398,13 +1400,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
+ ret = vmw_user_lookup_handle(dev_priv, tfile,
+ mode_cmd->handles[0],
+ &surface, &bo);
+- if (ret)
++ if (ret) {
++ DRM_ERROR("Invalid buffer object handle %u (0x%x).\n",
++ mode_cmd->handles[0], mode_cmd->handles[0]);
+ goto err_out;
++ }
+
+
+ if (!bo &&
+ !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) {
+- DRM_ERROR("Surface size cannot exceed %dx%d",
++ DRM_ERROR("Surface size cannot exceed %dx%d\n",
+ dev_priv->texture_max_width,
+ dev_priv->texture_max_height);
+ goto err_out;
+@@ -2516,7 +2521,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
+ if (file_priv)
+ vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv),
+ ret, user_fence_rep, fence,
+- handle, -1, NULL);
++ handle, -1);
+ if (out_fence)
+ *out_fence = fence;
+ else
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+index bbc809f7bd8a9..8c8ee87fd3ac7 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+@@ -248,7 +248,6 @@ struct vmw_framebuffer_bo {
+ static const uint32_t __maybe_unused vmw_primary_plane_formats[] = {
+ DRM_FORMAT_XRGB1555,
+ DRM_FORMAT_RGB565,
+- DRM_FORMAT_RGB888,
+ DRM_FORMAT_XRGB8888,
+ DRM_FORMAT_ARGB8888,
+ };
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
+index f9394207dd3cc..632e587519722 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0 OR MIT
+ /**************************************************************************
+ *
+- * Copyright 2012-2015 VMware, Inc., Palo Alto, CA., USA
++ * Copyright 2012-2021 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+@@ -29,12 +29,6 @@
+
+ #include "vmwgfx_drv.h"
+
+-/*
+- * If we set up the screen target otable, screen objects stop working.
+- */
+-
+-#define VMW_OTABLE_SETUP_SUB ((VMWGFX_ENABLE_SCREEN_TARGET_OTABLE ? 0 : 1))
+-
+ #ifdef CONFIG_64BIT
+ #define VMW_PPN_SIZE 8
+ #define VMW_MOBFMT_PTDEPTH_0 SVGA3D_MOBFMT_PT64_0
+@@ -75,7 +69,7 @@ static const struct vmw_otable pre_dx_tables[] = {
+ {VMWGFX_NUM_GB_CONTEXT * sizeof(SVGAOTableContextEntry), NULL, true},
+ {VMWGFX_NUM_GB_SHADER * sizeof(SVGAOTableShaderEntry), NULL, true},
+ {VMWGFX_NUM_GB_SCREEN_TARGET * sizeof(SVGAOTableScreenTargetEntry),
+- NULL, VMWGFX_ENABLE_SCREEN_TARGET_OTABLE}
++ NULL, true}
+ };
+
+ static const struct vmw_otable dx_tables[] = {
+@@ -84,7 +78,7 @@ static const struct vmw_otable dx_tables[] = {
+ {VMWGFX_NUM_GB_CONTEXT * sizeof(SVGAOTableContextEntry), NULL, true},
+ {VMWGFX_NUM_GB_SHADER * sizeof(SVGAOTableShaderEntry), NULL, true},
+ {VMWGFX_NUM_GB_SCREEN_TARGET * sizeof(SVGAOTableScreenTargetEntry),
+- NULL, VMWGFX_ENABLE_SCREEN_TARGET_OTABLE},
++ NULL, true},
+ {VMWGFX_NUM_DXCONTEXT * sizeof(SVGAOTableDXContextEntry), NULL, true},
+ };
+
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+index e50fb82a30300..8d2437fa6894b 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+@@ -1076,6 +1076,7 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data,
+
+ if (desc_len < 0) {
+ atomic_set(&dev_priv->mksstat_user_pids[slot], 0);
++ __free_page(page);
+ return -EFAULT;
+ }
+
+@@ -1084,21 +1085,21 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data,
+ reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs));
+
+ /* Pin mksGuestStat user pages and store those in the instance descriptor */
+- nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL);
++ nr_pinned_stat = pin_user_pages_fast(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat);
+ if (num_pages_stat != nr_pinned_stat)
+ goto err_pin_stat;
+
+ for (i = 0; i < num_pages_stat; ++i)
+ pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]);
+
+- nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL);
++ nr_pinned_info = pin_user_pages_fast(arg->info, num_pages_info, FOLL_LONGTERM, pages_info);
+ if (num_pages_info != nr_pinned_info)
+ goto err_pin_info;
+
+ for (i = 0; i < num_pages_info; ++i)
+ pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]);
+
+- nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL);
++ nr_pinned_strs = pin_user_pages_fast(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs);
+ if (num_pages_strs != nr_pinned_strs)
+ goto err_pin_strs;
+
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h
+old mode 100755
+new mode 100644
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+index e5a9a5cbd01a7..922317d1acc8a 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+@@ -477,7 +477,7 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
+ else
+ prot = vm_get_page_prot(vma->vm_flags);
+
+- ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1);
++ ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+@@ -486,73 +486,3 @@ out_unlock:
+
+ return ret;
+ }
+-
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
+- enum page_entry_size pe_size)
+-{
+- struct vm_area_struct *vma = vmf->vma;
+- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+- vma->vm_private_data;
+- struct vmw_buffer_object *vbo =
+- container_of(bo, struct vmw_buffer_object, base);
+- pgprot_t prot;
+- vm_fault_t ret;
+- pgoff_t fault_page_size;
+- bool write = vmf->flags & FAULT_FLAG_WRITE;
+-
+- switch (pe_size) {
+- case PE_SIZE_PMD:
+- fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
+- break;
+-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+- case PE_SIZE_PUD:
+- fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
+- break;
+-#endif
+- default:
+- WARN_ON_ONCE(1);
+- return VM_FAULT_FALLBACK;
+- }
+-
+- /* Always do write dirty-tracking and COW on PTE level. */
+- if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
+- return VM_FAULT_FALLBACK;
+-
+- ret = ttm_bo_vm_reserve(bo, vmf);
+- if (ret)
+- return ret;
+-
+- if (vbo->dirty) {
+- pgoff_t allowed_prefault;
+- unsigned long page_offset;
+-
+- page_offset = vmf->pgoff -
+- drm_vma_node_start(&bo->base.vma_node);
+- if (page_offset >= bo->resource->num_pages ||
+- vmw_resources_clean(vbo, page_offset,
+- page_offset + PAGE_SIZE,
+- &allowed_prefault)) {
+- ret = VM_FAULT_SIGBUS;
+- goto out_unlock;
+- }
+-
+- /*
+- * Write protect, so we get a new fault on write, and can
+- * split.
+- */
+- prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
+- } else {
+- prot = vm_get_page_prot(vma->vm_flags);
+- }
+-
+- ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
+- if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+- return ret;
+-
+-out_unlock:
+- dma_resv_unlock(bo->base.resv);
+-
+- return ret;
+-}
+-#endif
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+index 8d1e869cc1964..34ab08369e043 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+@@ -862,22 +862,21 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo,
+ struct ttm_device *bdev = bo->bdev;
+ struct vmw_private *dev_priv;
+
+-
+ dev_priv = container_of(bdev, struct vmw_private, bdev);
+
+ mutex_lock(&dev_priv->binding_mutex);
+
+- dx_query_mob = container_of(bo, struct vmw_buffer_object, base);
+- if (!dx_query_mob || !dx_query_mob->dx_query_ctx) {
+- mutex_unlock(&dev_priv->binding_mutex);
+- return;
+- }
+-
+ /* If BO is being moved from MOB to system memory */
+ if (new_mem->mem_type == TTM_PL_SYSTEM &&
+ old_mem->mem_type == VMW_PL_MOB) {
+ struct vmw_fence_obj *fence;
+
++ dx_query_mob = container_of(bo, struct vmw_buffer_object, base);
++ if (!dx_query_mob || !dx_query_mob->dx_query_ctx) {
++ mutex_unlock(&dev_priv->binding_mutex);
++ return;
++ }
++
+ (void) vmw_query_readback_all(dx_query_mob);
+ mutex_unlock(&dev_priv->binding_mutex);
+
+@@ -891,7 +890,6 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo,
+ (void) ttm_bo_wait(bo, false, false);
+ } else
+ mutex_unlock(&dev_priv->binding_mutex);
+-
+ }
+
+ /**
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+index bd157fb21b450..605ff05d449fc 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+@@ -953,6 +953,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
+ struct drm_device *dev = &dev_priv->drm;
+ int i, ret;
+
++ /* Screen objects won't work if GMR's aren't available */
++ if (!dev_priv->has_gmr)
++ return -ENOSYS;
++
+ if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) {
+ return -ENOSYS;
+ }
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+index d85310b2608dd..f5e90d0e2d0f8 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+@@ -1872,8 +1872,8 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv)
+ int i, ret;
+
+
+- /* Do nothing if Screen Target support is turned off */
+- if (!VMWGFX_ENABLE_SCREEN_TARGET_OTABLE || !dev_priv->has_mob)
++ /* Do nothing if there's no support for MOBs */
++ if (!dev_priv->has_mob)
+ return -ENOSYS;
+
+ if (!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS))
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c
+new file mode 100644
+index 0000000000000..b0005b03a6174
+--- /dev/null
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c
+@@ -0,0 +1,90 @@
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/*
++ * Copyright 2021 VMware, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person
++ * obtaining a copy of this software and associated documentation
++ * files (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy,
++ * modify, merge, publish, distribute, sublicense, and/or sell copies
++ * of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be
++ * included in all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ */
++
++#include "vmwgfx_drv.h"
++
++#include <drm/ttm/ttm_bo_driver.h>
++#include <drm/ttm/ttm_device.h>
++#include <drm/ttm/ttm_placement.h>
++#include <drm/ttm/ttm_resource.h>
++#include <linux/slab.h>
++
++
++static int vmw_sys_man_alloc(struct ttm_resource_manager *man,
++ struct ttm_buffer_object *bo,
++ const struct ttm_place *place,
++ struct ttm_resource **res)
++{
++ *res = kzalloc(sizeof(**res), GFP_KERNEL);
++ if (!*res)
++ return -ENOMEM;
++
++ ttm_resource_init(bo, place, *res);
++ return 0;
++}
++
++static void vmw_sys_man_free(struct ttm_resource_manager *man,
++ struct ttm_resource *res)
++{
++ kfree(res);
++}
++
++static const struct ttm_resource_manager_func vmw_sys_manager_func = {
++ .alloc = vmw_sys_man_alloc,
++ .free = vmw_sys_man_free,
++};
++
++int vmw_sys_man_init(struct vmw_private *dev_priv)
++{
++ struct ttm_device *bdev = &dev_priv->bdev;
++ struct ttm_resource_manager *man =
++ kzalloc(sizeof(*man), GFP_KERNEL);
++
++ if (!man)
++ return -ENOMEM;
++
++ man->use_tt = true;
++ man->func = &vmw_sys_manager_func;
++
++ ttm_resource_manager_init(man, 0);
++ ttm_set_driver_manager(bdev, VMW_PL_SYSTEM, man);
++ ttm_resource_manager_set_used(man, true);
++ return 0;
++}
++
++void vmw_sys_man_fini(struct vmw_private *dev_priv)
++{
++ struct ttm_resource_manager *man = ttm_manager_type(&dev_priv->bdev,
++ VMW_PL_SYSTEM);
++
++ ttm_resource_manager_evict_all(&dev_priv->bdev, man);
++
++ ttm_resource_manager_set_used(man, false);
++ ttm_resource_manager_cleanup(man);
++
++ ttm_set_driver_manager(&dev_priv->bdev, VMW_PL_SYSTEM, NULL);
++ kfree(man);
++}
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
+deleted file mode 100644
+index 2a3d3468e4e0a..0000000000000
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
++++ /dev/null
+@@ -1,184 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0 OR MIT
+-/*
+- * Huge page-table-entry support for IO memory.
+- *
+- * Copyright (C) 2007-2019 Vmware, Inc. All rights reservedd.
+- */
+-#include "vmwgfx_drv.h"
+-#include <drm/ttm/ttm_bo_driver.h>
+-#include <drm/ttm/ttm_placement.h>
+-#include <drm/ttm/ttm_range_manager.h>
+-
+-/**
+- * struct vmw_thp_manager - Range manager implementing huge page alignment
+- *
+- * @manager: TTM resource manager.
+- * @mm: The underlying range manager. Protected by @lock.
+- * @lock: Manager lock.
+- */
+-struct vmw_thp_manager {
+- struct ttm_resource_manager manager;
+- struct drm_mm mm;
+- spinlock_t lock;
+-};
+-
+-static struct vmw_thp_manager *to_thp_manager(struct ttm_resource_manager *man)
+-{
+- return container_of(man, struct vmw_thp_manager, manager);
+-}
+-
+-static const struct ttm_resource_manager_func vmw_thp_func;
+-
+-static int vmw_thp_insert_aligned(struct ttm_buffer_object *bo,
+- struct drm_mm *mm, struct drm_mm_node *node,
+- unsigned long align_pages,
+- const struct ttm_place *place,
+- struct ttm_resource *mem,
+- unsigned long lpfn,
+- enum drm_mm_insert_mode mode)
+-{
+- if (align_pages >= bo->page_alignment &&
+- (!bo->page_alignment || align_pages % bo->page_alignment == 0)) {
+- return drm_mm_insert_node_in_range(mm, node,
+- mem->num_pages,
+- align_pages, 0,
+- place->fpfn, lpfn, mode);
+- }
+-
+- return -ENOSPC;
+-}
+-
+-static int vmw_thp_get_node(struct ttm_resource_manager *man,
+- struct ttm_buffer_object *bo,
+- const struct ttm_place *place,
+- struct ttm_resource **res)
+-{
+- struct vmw_thp_manager *rman = to_thp_manager(man);
+- struct drm_mm *mm = &rman->mm;
+- struct ttm_range_mgr_node *node;
+- unsigned long align_pages;
+- unsigned long lpfn;
+- enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
+- int ret;
+-
+- node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
+- if (!node)
+- return -ENOMEM;
+-
+- ttm_resource_init(bo, place, &node->base);
+-
+- lpfn = place->lpfn;
+- if (!lpfn)
+- lpfn = man->size;
+-
+- mode = DRM_MM_INSERT_BEST;
+- if (place->flags & TTM_PL_FLAG_TOPDOWN)
+- mode = DRM_MM_INSERT_HIGH;
+-
+- spin_lock(&rman->lock);
+- if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
+- align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
+- if (node->base.num_pages >= align_pages) {
+- ret = vmw_thp_insert_aligned(bo, mm, &node->mm_nodes[0],
+- align_pages, place,
+- &node->base, lpfn, mode);
+- if (!ret)
+- goto found_unlock;
+- }
+- }
+-
+- align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT);
+- if (node->base.num_pages >= align_pages) {
+- ret = vmw_thp_insert_aligned(bo, mm, &node->mm_nodes[0],
+- align_pages, place, &node->base,
+- lpfn, mode);
+- if (!ret)
+- goto found_unlock;
+- }
+-
+- ret = drm_mm_insert_node_in_range(mm, &node->mm_nodes[0],
+- node->base.num_pages,
+- bo->page_alignment, 0,
+- place->fpfn, lpfn, mode);
+-found_unlock:
+- spin_unlock(&rman->lock);
+-
+- if (unlikely(ret)) {
+- kfree(node);
+- } else {
+- node->base.start = node->mm_nodes[0].start;
+- *res = &node->base;
+- }
+-
+- return ret;
+-}
+-
+-static void vmw_thp_put_node(struct ttm_resource_manager *man,
+- struct ttm_resource *res)
+-{
+- struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+- struct vmw_thp_manager *rman = to_thp_manager(man);
+-
+- spin_lock(&rman->lock);
+- drm_mm_remove_node(&node->mm_nodes[0]);
+- spin_unlock(&rman->lock);
+-
+- kfree(node);
+-}
+-
+-int vmw_thp_init(struct vmw_private *dev_priv)
+-{
+- struct vmw_thp_manager *rman;
+-
+- rman = kzalloc(sizeof(*rman), GFP_KERNEL);
+- if (!rman)
+- return -ENOMEM;
+-
+- ttm_resource_manager_init(&rman->manager,
+- dev_priv->vram_size >> PAGE_SHIFT);
+-
+- rman->manager.func = &vmw_thp_func;
+- drm_mm_init(&rman->mm, 0, rman->manager.size);
+- spin_lock_init(&rman->lock);
+-
+- ttm_set_driver_manager(&dev_priv->bdev, TTM_PL_VRAM, &rman->manager);
+- ttm_resource_manager_set_used(&rman->manager, true);
+- return 0;
+-}
+-
+-void vmw_thp_fini(struct vmw_private *dev_priv)
+-{
+- struct ttm_resource_manager *man = ttm_manager_type(&dev_priv->bdev, TTM_PL_VRAM);
+- struct vmw_thp_manager *rman = to_thp_manager(man);
+- struct drm_mm *mm = &rman->mm;
+- int ret;
+-
+- ttm_resource_manager_set_used(man, false);
+-
+- ret = ttm_resource_manager_evict_all(&dev_priv->bdev, man);
+- if (ret)
+- return;
+- spin_lock(&rman->lock);
+- drm_mm_clean(mm);
+- drm_mm_takedown(mm);
+- spin_unlock(&rman->lock);
+- ttm_resource_manager_cleanup(man);
+- ttm_set_driver_manager(&dev_priv->bdev, TTM_PL_VRAM, NULL);
+- kfree(rman);
+-}
+-
+-static void vmw_thp_debug(struct ttm_resource_manager *man,
+- struct drm_printer *printer)
+-{
+- struct vmw_thp_manager *rman = to_thp_manager(man);
+-
+- spin_lock(&rman->lock);
+- drm_mm_print(&rman->mm, printer);
+- spin_unlock(&rman->lock);
+-}
+-
+-static const struct ttm_resource_manager_func vmw_thp_func = {
+- .alloc = vmw_thp_get_node,
+- .free = vmw_thp_put_node,
+- .debug = vmw_thp_debug
+-};
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+index 8b8991e3ed2d0..450bb1e9626f7 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+@@ -92,6 +92,13 @@ static const struct ttm_place gmr_vram_placement_flags[] = {
+ }
+ };
+
++static const struct ttm_place vmw_sys_placement_flags = {
++ .fpfn = 0,
++ .lpfn = 0,
++ .mem_type = VMW_PL_SYSTEM,
++ .flags = 0
++};
++
+ struct ttm_placement vmw_vram_gmr_placement = {
+ .num_placement = 2,
+ .placement = vram_gmr_placement_flags,
+@@ -113,28 +120,11 @@ struct ttm_placement vmw_sys_placement = {
+ .busy_placement = &sys_placement_flags
+ };
+
+-static const struct ttm_place evictable_placement_flags[] = {
+- {
+- .fpfn = 0,
+- .lpfn = 0,
+- .mem_type = TTM_PL_SYSTEM,
+- .flags = 0
+- }, {
+- .fpfn = 0,
+- .lpfn = 0,
+- .mem_type = TTM_PL_VRAM,
+- .flags = 0
+- }, {
+- .fpfn = 0,
+- .lpfn = 0,
+- .mem_type = VMW_PL_GMR,
+- .flags = 0
+- }, {
+- .fpfn = 0,
+- .lpfn = 0,
+- .mem_type = VMW_PL_MOB,
+- .flags = 0
+- }
++struct ttm_placement vmw_pt_sys_placement = {
++ .num_placement = 1,
++ .placement = &vmw_sys_placement_flags,
++ .num_busy_placement = 1,
++ .busy_placement = &vmw_sys_placement_flags
+ };
+
+ static const struct ttm_place nonfixed_placement_flags[] = {
+@@ -156,13 +146,6 @@ static const struct ttm_place nonfixed_placement_flags[] = {
+ }
+ };
+
+-struct ttm_placement vmw_evictable_placement = {
+- .num_placement = 4,
+- .placement = evictable_placement_flags,
+- .num_busy_placement = 1,
+- .busy_placement = &sys_placement_flags
+-};
+-
+ struct ttm_placement vmw_srf_placement = {
+ .num_placement = 1,
+ .num_busy_placement = 2,
+@@ -484,6 +467,9 @@ static int vmw_ttm_bind(struct ttm_device *bdev,
+ &vmw_be->vsgt, ttm->num_pages,
+ vmw_be->gmr_id);
+ break;
++ case VMW_PL_SYSTEM:
++ /* Nothing to be done for a system bind */
++ break;
+ default:
+ BUG();
+ }
+@@ -507,6 +493,8 @@ static void vmw_ttm_unbind(struct ttm_device *bdev,
+ case VMW_PL_MOB:
+ vmw_mob_unbind(vmw_be->dev_priv, vmw_be->mob);
+ break;
++ case VMW_PL_SYSTEM:
++ break;
+ default:
+ BUG();
+ }
+@@ -628,6 +616,7 @@ static int vmw_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *
+
+ switch (mem->mem_type) {
+ case TTM_PL_SYSTEM:
++ case VMW_PL_SYSTEM:
+ case VMW_PL_GMR:
+ case VMW_PL_MOB:
+ return 0;
+@@ -674,6 +663,11 @@ static void vmw_swap_notify(struct ttm_buffer_object *bo)
+ (void) ttm_bo_wait(bo, false, false);
+ }
+
++static bool vmw_memtype_is_system(uint32_t mem_type)
++{
++ return mem_type == TTM_PL_SYSTEM || mem_type == VMW_PL_SYSTEM;
++}
++
+ static int vmw_move(struct ttm_buffer_object *bo,
+ bool evict,
+ struct ttm_operation_ctx *ctx,
+@@ -684,7 +678,7 @@ static int vmw_move(struct ttm_buffer_object *bo,
+ struct ttm_resource_manager *new_man = ttm_manager_type(bo->bdev, new_mem->mem_type);
+ int ret;
+
+- if (new_man->use_tt && new_mem->mem_type != TTM_PL_SYSTEM) {
++ if (new_man->use_tt && !vmw_memtype_is_system(new_mem->mem_type)) {
+ ret = vmw_ttm_bind(bo->bdev, bo->ttm, new_mem);
+ if (ret)
+ return ret;
+@@ -693,7 +687,7 @@ static int vmw_move(struct ttm_buffer_object *bo,
+ vmw_move_notify(bo, bo->resource, new_mem);
+
+ if (old_man->use_tt && new_man->use_tt) {
+- if (bo->resource->mem_type == TTM_PL_SYSTEM) {
++ if (vmw_memtype_is_system(bo->resource->mem_type)) {
+ ttm_bo_move_null(bo, new_mem);
+ return 0;
+ }
+@@ -740,7 +734,7 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv,
+ int ret;
+
+ ret = vmw_bo_create_kernel(dev_priv, bo_size,
+- &vmw_sys_placement,
++ &vmw_pt_sys_placement,
+ &bo);
+ if (unlikely(ret != 0))
+ return ret;
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+index e6b1f98ec99f0..0a4c340252ec4 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+@@ -61,9 +61,6 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
+ .fault = vmw_bo_vm_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close,
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+- .huge_fault = vmw_bo_vm_huge_fault,
+-#endif
+ };
+ struct drm_file *file_priv = filp->private_data;
+ struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
+diff --git a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c
+index ac37053412a13..5bb42d0a2de98 100644
+--- a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c
++++ b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c
+@@ -200,7 +200,9 @@ static int zynqmp_dpsub_probe(struct platform_device *pdev)
+ dpsub->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dpsub);
+
+- dma_set_mask(dpsub->dev, DMA_BIT_MASK(ZYNQMP_DISP_MAX_DMA_BIT));
++ ret = dma_set_mask(dpsub->dev, DMA_BIT_MASK(ZYNQMP_DISP_MAX_DMA_BIT));
++ if (ret)
++ return ret;
+
+ /* Try the reserved memory. Proceed if there's none. */
+ of_reserved_mem_device_init(&pdev->dev);
+diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
+index 6dab94adf25e5..6815b4db17c1b 100644
+--- a/drivers/gpu/host1x/Kconfig
++++ b/drivers/gpu/host1x/Kconfig
+@@ -2,6 +2,7 @@
+ config TEGRA_HOST1X
+ tristate "NVIDIA Tegra host1x driver"
+ depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
++ select DMA_SHARED_BUFFER
+ select IOMMU_IOVA
+ help
+ Driver for the NVIDIA Tegra host1x hardware.
+diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
+index fbb6447b8659e..c2a4bf2aae615 100644
+--- a/drivers/gpu/host1x/dev.c
++++ b/drivers/gpu/host1x/dev.c
+@@ -18,6 +18,10 @@
+ #include <trace/events/host1x.h>
+ #undef CREATE_TRACE_POINTS
+
++#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
++#include <asm/dma-iommu.h>
++#endif
++
+ #include "bus.h"
+ #include "channel.h"
+ #include "debug.h"
+@@ -200,6 +204,10 @@ static void host1x_setup_sid_table(struct host1x *host)
+
+ static bool host1x_wants_iommu(struct host1x *host1x)
+ {
++ /* Our IOMMU usage policy doesn't currently play well with GART */
++ if (of_machine_is_compatible("nvidia,tegra20"))
++ return false;
++
+ /*
+ * If we support addressing a maximum of 32 bits of physical memory
+ * and if the host1x firewall is enabled, there's no need to enable
+@@ -238,6 +246,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
+ struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
+ int err;
+
++#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
++ if (host->dev->archdata.mapping) {
++ struct dma_iommu_mapping *mapping =
++ to_dma_iommu_mapping(host->dev);
++ arm_iommu_detach_device(host->dev);
++ arm_iommu_release_mapping(mapping);
++
++ domain = iommu_get_domain_for_dev(host->dev);
++ }
++#endif
++
+ /*
+ * We may not always want to enable IOMMU support (for example if the
+ * host1x firewall is already enabled and we don't support addressing
+@@ -511,6 +530,7 @@ static int host1x_remove(struct platform_device *pdev)
+ host1x_syncpt_deinit(host);
+ reset_control_assert(host->rst);
+ clk_disable_unprepare(host->clk);
++ host1x_channel_list_free(&host->channel_list);
+ host1x_iommu_exit(host);
+
+ return 0;
+diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
+index dd39d67ccec36..8cf35b2eff3db 100644
+--- a/drivers/gpu/host1x/hw/syncpt_hw.c
++++ b/drivers/gpu/host1x/hw/syncpt_hw.c
+@@ -106,9 +106,6 @@ static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
+ #if HOST1X_HW >= 6
+ struct host1x *host = sp->host;
+
+- if (!host->hv_regs)
+- return;
+-
+ host1x_sync_writel(host,
+ HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+ HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
+index d198a10848c6b..a89a408182e60 100644
+--- a/drivers/gpu/host1x/syncpt.c
++++ b/drivers/gpu/host1x/syncpt.c
+@@ -225,27 +225,12 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
+ void *ref;
+ struct host1x_waitlist *waiter;
+ int err = 0, check_count = 0;
+- u32 val;
+
+ if (value)
+- *value = 0;
+-
+- /* first check cache */
+- if (host1x_syncpt_is_expired(sp, thresh)) {
+- if (value)
+- *value = host1x_syncpt_load(sp);
++ *value = host1x_syncpt_load(sp);
+
++ if (host1x_syncpt_is_expired(sp, thresh))
+ return 0;
+- }
+-
+- /* try to read from register */
+- val = host1x_hw_syncpt_load(sp->host, sp);
+- if (host1x_syncpt_is_expired(sp, thresh)) {
+- if (value)
+- *value = val;
+-
+- goto done;
+- }
+
+ if (!timeout) {
+ err = -EAGAIN;
+diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
+index 118318513e2d2..c35eac1116f5f 100644
+--- a/drivers/gpu/ipu-v3/ipu-common.c
++++ b/drivers/gpu/ipu-v3/ipu-common.c
+@@ -1165,6 +1165,7 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
+ pdev = platform_device_alloc(reg->name, id++);
+ if (!pdev) {
+ ret = -ENOMEM;
++ of_node_put(of_node);
+ goto err_register;
+ }
+
+diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c
+index 666223c6bec4d..0a34e0ab4fe60 100644
+--- a/drivers/gpu/ipu-v3/ipu-di.c
++++ b/drivers/gpu/ipu-v3/ipu-di.c
+@@ -447,8 +447,9 @@ static void ipu_di_config_clock(struct ipu_di *di,
+
+ error = rate / (sig->mode.pixelclock / 1000);
+
+- dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %d.%u%%\n",
+- rate, div, (signed)(error - 1000) / 10, error % 10);
++ dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %c%d.%d%%\n",
++ rate, div, error < 1000 ? '-' : '+',
++ abs(error - 1000) / 10, abs(error - 1000) % 10);
+
+ /* Allow a 1% error */
+ if (error < 1010 && error >= 990) {
+diff --git a/drivers/greybus/svc.c b/drivers/greybus/svc.c
+index ce7740ef449ba..51d0875a34800 100644
+--- a/drivers/greybus/svc.c
++++ b/drivers/greybus/svc.c
+@@ -866,8 +866,14 @@ static int gb_svc_hello(struct gb_operation *op)
+
+ gb_svc_debugfs_init(svc);
+
+- return gb_svc_queue_deferred_request(op);
++ ret = gb_svc_queue_deferred_request(op);
++ if (ret)
++ goto err_remove_debugfs;
++
++ return 0;
+
++err_remove_debugfs:
++ gb_svc_debugfs_exit(svc);
+ err_unregister_device:
+ gb_svc_watchdog_destroy(svc);
+ device_del(&svc->dev);
+diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
+index 3c33bf572d6d3..9235ab7161e3a 100644
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -207,14 +207,14 @@ config HID_CHERRY
+
+ config HID_CHICONY
+ tristate "Chicony devices"
+- depends on HID
++ depends on USB_HID
+ default !EXPERT
+ help
+ Support for Chicony Tactical pad and special keys on Chicony keyboards.
+
+ config HID_CORSAIR
+ tristate "Corsair devices"
+- depends on HID && USB && LEDS_CLASS
++ depends on USB_HID && LEDS_CLASS
+ help
+ Support for Corsair devices that are not fully compliant with the
+ HID standard.
+@@ -245,7 +245,7 @@ config HID_MACALLY
+
+ config HID_PRODIKEYS
+ tristate "Prodikeys PC-MIDI Keyboard support"
+- depends on HID && SND
++ depends on USB_HID && SND
+ select SND_RAWMIDI
+ help
+ Support for Prodikeys PC-MIDI Keyboard device support.
+@@ -553,7 +553,7 @@ config HID_LENOVO
+
+ config HID_LOGITECH
+ tristate "Logitech devices"
+- depends on HID
++ depends on USB_HID
+ depends on LEDS_CLASS
+ default !EXPERT
+ help
+@@ -919,7 +919,7 @@ config HID_SAITEK
+
+ config HID_SAMSUNG
+ tristate "Samsung InfraRed remote control or keyboards"
+- depends on HID
++ depends on USB_HID
+ help
+ Support for Samsung InfraRed remote control or keyboards.
+
+diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+index 840fd075c56f1..911a23a9bcd1b 100644
+--- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c
++++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+@@ -154,6 +154,8 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
+ dev = &privdata->pdev->dev;
+
+ cl_data->num_hid_devices = amd_mp2_get_sensor_num(privdata, &cl_data->sensor_idx[0]);
++ if (cl_data->num_hid_devices == 0)
++ return -ENODEV;
+
+ INIT_DELAYED_WORK(&cl_data->work, amd_sfh_work);
+ INIT_DELAYED_WORK(&cl_data->work_buffer, amd_sfh_work_buffer);
+@@ -164,6 +166,10 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
+ in_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8,
+ &cl_data->sensor_dma_addr[i],
+ GFP_KERNEL);
++ if (!in_data->sensor_virt_addr[i]) {
++ rc = -ENOMEM;
++ goto cleanup;
++ }
+ cl_data->sensor_sts[i] = SENSOR_DISABLED;
+ cl_data->sensor_requested_cnt[i] = 0;
+ cl_data->cur_hid_dev = i;
+@@ -226,6 +232,17 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
+ dev_dbg(dev, "sid 0x%x status 0x%x\n",
+ cl_data->sensor_idx[i], cl_data->sensor_sts[i]);
+ }
++ if (privdata->mp2_ops->discovery_status &&
++ privdata->mp2_ops->discovery_status(privdata) == 0) {
++ amd_sfh_hid_client_deinit(privdata);
++ for (i = 0; i < cl_data->num_hid_devices; i++) {
++ devm_kfree(dev, cl_data->feature_report[i]);
++ devm_kfree(dev, in_data->input_report[i]);
++ devm_kfree(dev, cl_data->report_descr[i]);
++ }
++ dev_warn(dev, "Failed to discover, sensors not enabled\n");
++ return -EOPNOTSUPP;
++ }
+ schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
+ return 0;
+
+diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c
+index 5ad1e7acd294e..3b0615c6aecff 100644
+--- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c
++++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c
+@@ -98,11 +98,15 @@ static int amdtp_wait_for_response(struct hid_device *hid)
+
+ void amdtp_hid_wakeup(struct hid_device *hid)
+ {
+- struct amdtp_hid_data *hid_data = hid->driver_data;
+- struct amdtp_cl_data *cli_data = hid_data->cli_data;
++ struct amdtp_hid_data *hid_data;
++ struct amdtp_cl_data *cli_data;
+
+- cli_data->request_done[cli_data->cur_hid_dev] = true;
+- wake_up_interruptible(&hid_data->hid_wait);
++ if (hid) {
++ hid_data = hid->driver_data;
++ cli_data = hid_data->cli_data;
++ cli_data->request_done[cli_data->cur_hid_dev] = true;
++ wake_up_interruptible(&hid_data->hid_wait);
++ }
+ }
+
+ static struct hid_ll_driver amdtp_hid_ll_driver = {
+@@ -139,10 +143,10 @@ int amdtp_hid_probe(u32 cur_hid_dev, struct amdtp_cl_data *cli_data)
+
+ hid->driver_data = hid_data;
+ cli_data->hid_sensor_hubs[cur_hid_dev] = hid;
+- hid->bus = BUS_AMD_AMDTP;
++ hid->bus = BUS_AMD_SFH;
+ hid->vendor = AMD_SFH_HID_VENDOR;
+ hid->product = AMD_SFH_HID_PRODUCT;
+- snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "hid-amdtp",
++ snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "hid-amdsfh",
+ hid->vendor, hid->product);
+
+ rc = hid_add_device(hid);
+diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
+index ae2ac9191ba77..741cff350589b 100644
+--- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
++++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
+@@ -10,7 +10,7 @@
+ #define AMDSFH_HID_H
+
+ #define MAX_HID_DEVICES 5
+-#define BUS_AMD_AMDTP 0x20
++#define BUS_AMD_SFH 0x20
+ #define AMD_SFH_HID_VENDOR 0x1022
+ #define AMD_SFH_HID_PRODUCT 0x0001
+
+diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+index 05c007b213f24..6ff8f254dc840 100644
+--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
++++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+@@ -36,11 +36,11 @@ static int amd_sfh_wait_response_v2(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_
+ {
+ union cmd_response cmd_resp;
+
+- /* Get response with status within a max of 800 ms timeout */
++ /* Get response with status within a max of 1600 ms timeout */
+ if (!readl_poll_timeout(mp2->mmio + AMD_P2C_MSG(0), cmd_resp.resp,
+ (cmd_resp.response_v2.response == sensor_sts &&
+ cmd_resp.response_v2.status == 0 && (sid == 0xff ||
+- cmd_resp.response_v2.sensor_id == sid)), 500, 800000))
++ cmd_resp.response_v2.sensor_id == sid)), 500, 1600000))
+ return cmd_resp.response_v2.response;
+
+ return SENSOR_DISABLED;
+@@ -88,6 +88,50 @@ static void amd_stop_all_sensor_v2(struct amd_mp2_dev *privdata)
+ writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
+ }
+
++static void amd_sfh_clear_intr_v2(struct amd_mp2_dev *privdata)
++{
++ if (readl(privdata->mmio + AMD_P2C_MSG(4))) {
++ writel(0, privdata->mmio + AMD_P2C_MSG(4));
++ writel(0xf, privdata->mmio + AMD_P2C_MSG(5));
++ }
++}
++
++static void amd_sfh_clear_intr(struct amd_mp2_dev *privdata)
++{
++ if (privdata->mp2_ops->clear_intr)
++ privdata->mp2_ops->clear_intr(privdata);
++}
++
++static irqreturn_t amd_sfh_irq_handler(int irq, void *data)
++{
++ amd_sfh_clear_intr(data);
++
++ return IRQ_HANDLED;
++}
++
++static int amd_sfh_irq_init_v2(struct amd_mp2_dev *privdata)
++{
++ int rc;
++
++ pci_intx(privdata->pdev, true);
++
++ rc = devm_request_irq(&privdata->pdev->dev, privdata->pdev->irq,
++ amd_sfh_irq_handler, 0, DRIVER_NAME, privdata);
++ if (rc) {
++ dev_err(&privdata->pdev->dev, "failed to request irq %d err=%d\n",
++ privdata->pdev->irq, rc);
++ return rc;
++ }
++
++ return 0;
++}
++
++static int amd_sfh_dis_sts_v2(struct amd_mp2_dev *privdata)
++{
++ return (readl(privdata->mmio + AMD_P2C_MSG(1)) &
++ SENSOR_DISCOVERY_STATUS_MASK) >> SENSOR_DISCOVERY_STATUS_SHIFT;
++}
++
+ void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info)
+ {
+ union sfh_cmd_param cmd_param;
+@@ -192,6 +236,8 @@ static void amd_mp2_pci_remove(void *privdata)
+ struct amd_mp2_dev *mp2 = privdata;
+ amd_sfh_hid_client_deinit(privdata);
+ mp2->mp2_ops->stop_all(mp2);
++ pci_intx(mp2->pdev, false);
++ amd_sfh_clear_intr(mp2);
+ }
+
+ static const struct amd_mp2_ops amd_sfh_ops_v2 = {
+@@ -199,6 +245,9 @@ static const struct amd_mp2_ops amd_sfh_ops_v2 = {
+ .stop = amd_stop_sensor_v2,
+ .stop_all = amd_stop_all_sensor_v2,
+ .response = amd_sfh_wait_response_v2,
++ .clear_intr = amd_sfh_clear_intr_v2,
++ .init_intr = amd_sfh_irq_init_v2,
++ .discovery_status = amd_sfh_dis_sts_v2,
+ };
+
+ static const struct amd_mp2_ops amd_sfh_ops = {
+@@ -224,11 +273,37 @@ static void mp2_select_ops(struct amd_mp2_dev *privdata)
+ }
+ }
+
++static int amd_sfh_irq_init(struct amd_mp2_dev *privdata)
++{
++ if (privdata->mp2_ops->init_intr)
++ return privdata->mp2_ops->init_intr(privdata);
++
++ return 0;
++}
++
++static const struct dmi_system_id dmi_nodevs[] = {
++ {
++ /*
++ * Google Chromebooks use Chrome OS Embedded Controller Sensor
++ * Hub instead of Sensor Hub Fusion and leaves MP2
++ * uninitialized, which disables all functionalities, even
++ * including the registers necessary for feature detections.
++ */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Google"),
++ },
++ },
++ { }
++};
++
+ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ {
+ struct amd_mp2_dev *privdata;
+ int rc;
+
++ if (dmi_first_match(dmi_nodevs))
++ return -ENODEV;
++
+ privdata = devm_kzalloc(&pdev->dev, sizeof(*privdata), GFP_KERNEL);
+ if (!privdata)
+ return -ENOMEM;
+@@ -257,13 +332,33 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
+
+ mp2_select_ops(privdata);
+
++ rc = amd_sfh_irq_init(privdata);
++ if (rc) {
++ dev_err(&pdev->dev, "amd_sfh_irq_init failed\n");
++ return rc;
++ }
++
+ rc = amd_sfh_hid_client_init(privdata);
+- if (rc)
++ if (rc) {
++ amd_sfh_clear_intr(privdata);
++ if (rc != -EOPNOTSUPP)
++ dev_err(&pdev->dev, "amd_sfh_hid_client_init failed\n");
+ return rc;
++ }
++
++ amd_sfh_clear_intr(privdata);
+
+ return devm_add_action_or_reset(&pdev->dev, amd_mp2_pci_remove, privdata);
+ }
+
++static void amd_sfh_shutdown(struct pci_dev *pdev)
++{
++ struct amd_mp2_dev *mp2 = pci_get_drvdata(pdev);
++
++ if (mp2 && mp2->mp2_ops)
++ mp2->mp2_ops->stop_all(mp2);
++}
++
+ static int __maybe_unused amd_mp2_pci_resume(struct device *dev)
+ {
+ struct pci_dev *pdev = to_pci_dev(dev);
+@@ -287,6 +382,9 @@ static int __maybe_unused amd_mp2_pci_resume(struct device *dev)
+ }
+ }
+
++ schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
++ amd_sfh_clear_intr(mp2);
++
+ return 0;
+ }
+
+@@ -310,6 +408,9 @@ static int __maybe_unused amd_mp2_pci_suspend(struct device *dev)
+ }
+ }
+
++ cancel_delayed_work_sync(&cl_data->work_buffer);
++ amd_sfh_clear_intr(mp2);
++
+ return 0;
+ }
+
+@@ -327,6 +428,7 @@ static struct pci_driver amd_mp2_pci_driver = {
+ .id_table = amd_mp2_pci_tbl,
+ .probe = amd_mp2_pci_probe,
+ .driver.pm = &amd_mp2_pm_ops,
++ .shutdown = amd_sfh_shutdown,
+ };
+ module_pci_driver(amd_mp2_pci_driver);
+
+diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
+index 1ff6f83cb6fd1..2d3203d3daeb3 100644
+--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
++++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
+@@ -38,6 +38,9 @@
+
+ #define AMD_SFH_IDLE_LOOP 200
+
++#define SENSOR_DISCOVERY_STATUS_MASK GENMASK(5, 3)
++#define SENSOR_DISCOVERY_STATUS_SHIFT 3
++
+ /* SFH Command register */
+ union sfh_cmd_base {
+ u32 ul;
+@@ -48,7 +51,7 @@ union sfh_cmd_base {
+ } s;
+ struct {
+ u32 cmd_id : 4;
+- u32 intr_enable : 1;
++ u32 intr_disable : 1;
+ u32 rsvd1 : 3;
+ u32 length : 7;
+ u32 mem_type : 1;
+@@ -140,5 +143,8 @@ struct amd_mp2_ops {
+ void (*stop)(struct amd_mp2_dev *privdata, u16 sensor_idx);
+ void (*stop_all)(struct amd_mp2_dev *privdata);
+ int (*response)(struct amd_mp2_dev *mp2, u8 sid, u32 sensor_sts);
++ void (*clear_intr)(struct amd_mp2_dev *privdata);
++ int (*init_intr)(struct amd_mp2_dev *privdata);
++ int (*discovery_status)(struct amd_mp2_dev *privdata);
+ };
+ #endif
+diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
+index 0c36972193821..07eb3281b88db 100644
+--- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
++++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
+@@ -26,6 +26,7 @@
+ #define HID_USAGE_SENSOR_STATE_READY_ENUM 0x02
+ #define HID_USAGE_SENSOR_STATE_INITIALIZING_ENUM 0x05
+ #define HID_USAGE_SENSOR_EVENT_DATA_UPDATED_ENUM 0x04
++#define ILLUMINANCE_MASK GENMASK(14, 0)
+
+ int get_report_descriptor(int sensor_idx, u8 *rep_desc)
+ {
+@@ -245,7 +246,8 @@ u8 get_input_report(u8 current_index, int sensor_idx, int report_id, struct amd_
+ get_common_inputs(&als_input.common_property, report_id);
+ /* For ALS ,V2 Platforms uses C2P_MSG5 register instead of DRAM access method */
+ if (supported_input == V2_STATUS)
+- als_input.illuminance_value = (int)readl(privdata->mmio + AMD_C2P_MSG(5));
++ als_input.illuminance_value =
++ readl(privdata->mmio + AMD_C2P_MSG(5)) & ILLUMINANCE_MASK;
+ else
+ als_input.illuminance_value =
+ (int)sensor_virt_addr[0] / AMD_SFH_FW_MULTIPLIER;
+diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c
+index 2b986d0dbde46..db146d0f7937e 100644
+--- a/drivers/hid/hid-alps.c
++++ b/drivers/hid/hid-alps.c
+@@ -830,6 +830,8 @@ static const struct hid_device_id alps_id[] = {
+ USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) },
+ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY,
+ USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1) },
++ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY,
++ USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_UNICORN_LEGACY) },
+ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY,
+ USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_T4_BTNLESS) },
+ { }
+diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
+index 6ccfa0cb997ab..b683c0e8557d4 100644
+--- a/drivers/hid/hid-apple.c
++++ b/drivers/hid/hid-apple.c
+@@ -429,7 +429,7 @@ static int apple_input_configured(struct hid_device *hdev,
+
+ if ((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) {
+ hid_info(hdev, "Fn key not found (Apple Wireless Keyboard clone?), disabling Fn key handling\n");
+- asc->quirks = 0;
++ asc->quirks &= ~APPLE_HAS_FN;
+ }
+
+ return 0;
+diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
+index f3ecddc519ee8..16832e79f6a87 100644
+--- a/drivers/hid/hid-asus.c
++++ b/drivers/hid/hid-asus.c
+@@ -98,6 +98,7 @@ struct asus_kbd_leds {
+ struct hid_device *hdev;
+ struct work_struct work;
+ unsigned int brightness;
++ spinlock_t lock;
+ bool removed;
+ };
+
+@@ -492,21 +493,42 @@ static int rog_nkey_led_init(struct hid_device *hdev)
+ return ret;
+ }
+
++static void asus_schedule_work(struct asus_kbd_leds *led)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&led->lock, flags);
++ if (!led->removed)
++ schedule_work(&led->work);
++ spin_unlock_irqrestore(&led->lock, flags);
++}
++
+ static void asus_kbd_backlight_set(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+ {
+ struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds,
+ cdev);
++ unsigned long flags;
++
++ spin_lock_irqsave(&led->lock, flags);
+ led->brightness = brightness;
+- schedule_work(&led->work);
++ spin_unlock_irqrestore(&led->lock, flags);
++
++ asus_schedule_work(led);
+ }
+
+ static enum led_brightness asus_kbd_backlight_get(struct led_classdev *led_cdev)
+ {
+ struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds,
+ cdev);
++ enum led_brightness brightness;
++ unsigned long flags;
++
++ spin_lock_irqsave(&led->lock, flags);
++ brightness = led->brightness;
++ spin_unlock_irqrestore(&led->lock, flags);
+
+- return led->brightness;
++ return brightness;
+ }
+
+ static void asus_kbd_backlight_work(struct work_struct *work)
+@@ -514,11 +536,11 @@ static void asus_kbd_backlight_work(struct work_struct *work)
+ struct asus_kbd_leds *led = container_of(work, struct asus_kbd_leds, work);
+ u8 buf[] = { FEATURE_KBD_REPORT_ID, 0xba, 0xc5, 0xc4, 0x00 };
+ int ret;
++ unsigned long flags;
+
+- if (led->removed)
+- return;
+-
++ spin_lock_irqsave(&led->lock, flags);
+ buf[4] = led->brightness;
++ spin_unlock_irqrestore(&led->lock, flags);
+
+ ret = asus_kbd_set_report(led->hdev, buf, sizeof(buf));
+ if (ret < 0)
+@@ -586,6 +608,7 @@ static int asus_kbd_register_leds(struct hid_device *hdev)
+ drvdata->kbd_backlight->cdev.brightness_set = asus_kbd_backlight_set;
+ drvdata->kbd_backlight->cdev.brightness_get = asus_kbd_backlight_get;
+ INIT_WORK(&drvdata->kbd_backlight->work, asus_kbd_backlight_work);
++ spin_lock_init(&drvdata->kbd_backlight->lock);
+
+ ret = devm_led_classdev_register(&hdev->dev, &drvdata->kbd_backlight->cdev);
+ if (ret < 0) {
+@@ -1028,8 +1051,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ if (drvdata->quirks & QUIRK_IS_MULTITOUCH)
+ drvdata->tp = &asus_i2c_tp;
+
+- if ((drvdata->quirks & QUIRK_T100_KEYBOARD) &&
+- hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
++ if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && hid_is_usb(hdev)) {
+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+ if (intf->altsetting->desc.bInterfaceNumber == T100_TPAD_INTF) {
+@@ -1057,8 +1079,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ drvdata->tp = &asus_t100chi_tp;
+ }
+
+- if ((drvdata->quirks & QUIRK_MEDION_E1239T) &&
+- hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
++ if ((drvdata->quirks & QUIRK_MEDION_E1239T) && hid_is_usb(hdev)) {
+ struct usb_host_interface *alt =
+ to_usb_interface(hdev->dev.parent)->altsetting;
+
+@@ -1123,9 +1144,13 @@ err_stop_hw:
+ static void asus_remove(struct hid_device *hdev)
+ {
+ struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
++ unsigned long flags;
+
+ if (drvdata->kbd_backlight) {
++ spin_lock_irqsave(&drvdata->kbd_backlight->lock, flags);
+ drvdata->kbd_backlight->removed = true;
++ spin_unlock_irqrestore(&drvdata->kbd_backlight->lock, flags);
++
+ cancel_work_sync(&drvdata->kbd_backlight->work);
+ }
+
+@@ -1214,6 +1239,13 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ rdesc = new_rdesc;
+ }
+
++ if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD &&
++ *rsize == 331 && rdesc[190] == 0x85 && rdesc[191] == 0x5a &&
++ rdesc[204] == 0x95 && rdesc[205] == 0x05) {
++ hid_info(hdev, "Fixing up Asus N-KEY keyb report descriptor\n");
++ rdesc[205] = 0x01;
++ }
++
+ return rdesc;
+ }
+
+diff --git a/drivers/hid/hid-betopff.c b/drivers/hid/hid-betopff.c
+index 467d789f9bc2d..25ed7b9a917e4 100644
+--- a/drivers/hid/hid-betopff.c
++++ b/drivers/hid/hid-betopff.c
+@@ -60,7 +60,6 @@ static int betopff_init(struct hid_device *hid)
+ struct list_head *report_list =
+ &hid->report_enum[HID_OUTPUT_REPORT].report_list;
+ struct input_dev *dev;
+- int field_count = 0;
+ int error;
+ int i, j;
+
+@@ -86,19 +85,21 @@ static int betopff_init(struct hid_device *hid)
+ * -----------------------------------------
+ * Do init them with default value.
+ */
++ if (report->maxfield < 4) {
++ hid_err(hid, "not enough fields in the report: %d\n",
++ report->maxfield);
++ return -ENODEV;
++ }
+ for (i = 0; i < report->maxfield; i++) {
++ if (report->field[i]->report_count < 1) {
++ hid_err(hid, "no values in the field\n");
++ return -ENODEV;
++ }
+ for (j = 0; j < report->field[i]->report_count; j++) {
+ report->field[i]->value[j] = 0x00;
+- field_count++;
+ }
+ }
+
+- if (field_count < 4) {
+- hid_err(hid, "not enough fields in the report: %d\n",
+- field_count);
+- return -ENODEV;
+- }
+-
+ betopff = kzalloc(sizeof(*betopff), GFP_KERNEL);
+ if (!betopff)
+ return -ENOMEM;
+diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c
+index db6da21ade063..a02cb517b4c47 100644
+--- a/drivers/hid/hid-bigbenff.c
++++ b/drivers/hid/hid-bigbenff.c
+@@ -174,6 +174,7 @@ static __u8 pid0902_rdesc_fixed[] = {
+ struct bigben_device {
+ struct hid_device *hid;
+ struct hid_report *report;
++ spinlock_t lock;
+ bool removed;
+ u8 led_state; /* LED1 = 1 .. LED4 = 8 */
+ u8 right_motor_on; /* right motor off/on 0/1 */
+@@ -184,18 +185,39 @@ struct bigben_device {
+ struct work_struct worker;
+ };
+
++static inline void bigben_schedule_work(struct bigben_device *bigben)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&bigben->lock, flags);
++ if (!bigben->removed)
++ schedule_work(&bigben->worker);
++ spin_unlock_irqrestore(&bigben->lock, flags);
++}
+
+ static void bigben_worker(struct work_struct *work)
+ {
+ struct bigben_device *bigben = container_of(work,
+ struct bigben_device, worker);
+ struct hid_field *report_field = bigben->report->field[0];
+-
+- if (bigben->removed)
++ bool do_work_led = false;
++ bool do_work_ff = false;
++ u8 *buf;
++ u32 len;
++ unsigned long flags;
++
++ buf = hid_alloc_report_buf(bigben->report, GFP_KERNEL);
++ if (!buf)
+ return;
+
++ len = hid_report_len(bigben->report);
++
++ /* LED work */
++ spin_lock_irqsave(&bigben->lock, flags);
++
+ if (bigben->work_led) {
+ bigben->work_led = false;
++ do_work_led = true;
+ report_field->value[0] = 0x01; /* 1 = led message */
+ report_field->value[1] = 0x08; /* reserved value, always 8 */
+ report_field->value[2] = bigben->led_state;
+@@ -204,11 +226,22 @@ static void bigben_worker(struct work_struct *work)
+ report_field->value[5] = 0x00; /* padding */
+ report_field->value[6] = 0x00; /* padding */
+ report_field->value[7] = 0x00; /* padding */
+- hid_hw_request(bigben->hid, bigben->report, HID_REQ_SET_REPORT);
++ hid_output_report(bigben->report, buf);
++ }
++
++ spin_unlock_irqrestore(&bigben->lock, flags);
++
++ if (do_work_led) {
++ hid_hw_raw_request(bigben->hid, bigben->report->id, buf, len,
++ bigben->report->type, HID_REQ_SET_REPORT);
+ }
+
++ /* FF work */
++ spin_lock_irqsave(&bigben->lock, flags);
++
+ if (bigben->work_ff) {
+ bigben->work_ff = false;
++ do_work_ff = true;
+ report_field->value[0] = 0x02; /* 2 = rumble effect message */
+ report_field->value[1] = 0x08; /* reserved value, always 8 */
+ report_field->value[2] = bigben->right_motor_on;
+@@ -217,8 +250,17 @@ static void bigben_worker(struct work_struct *work)
+ report_field->value[5] = 0x00; /* padding */
+ report_field->value[6] = 0x00; /* padding */
+ report_field->value[7] = 0x00; /* padding */
+- hid_hw_request(bigben->hid, bigben->report, HID_REQ_SET_REPORT);
++ hid_output_report(bigben->report, buf);
+ }
++
++ spin_unlock_irqrestore(&bigben->lock, flags);
++
++ if (do_work_ff) {
++ hid_hw_raw_request(bigben->hid, bigben->report->id, buf, len,
++ bigben->report->type, HID_REQ_SET_REPORT);
++ }
++
++ kfree(buf);
+ }
+
+ static int hid_bigben_play_effect(struct input_dev *dev, void *data,
+@@ -228,6 +270,7 @@ static int hid_bigben_play_effect(struct input_dev *dev, void *data,
+ struct bigben_device *bigben = hid_get_drvdata(hid);
+ u8 right_motor_on;
+ u8 left_motor_force;
++ unsigned long flags;
+
+ if (!bigben) {
+ hid_err(hid, "no device data\n");
+@@ -242,10 +285,13 @@ static int hid_bigben_play_effect(struct input_dev *dev, void *data,
+
+ if (right_motor_on != bigben->right_motor_on ||
+ left_motor_force != bigben->left_motor_force) {
++ spin_lock_irqsave(&bigben->lock, flags);
+ bigben->right_motor_on = right_motor_on;
+ bigben->left_motor_force = left_motor_force;
+ bigben->work_ff = true;
+- schedule_work(&bigben->worker);
++ spin_unlock_irqrestore(&bigben->lock, flags);
++
++ bigben_schedule_work(bigben);
+ }
+
+ return 0;
+@@ -259,6 +305,7 @@ static void bigben_set_led(struct led_classdev *led,
+ struct bigben_device *bigben = hid_get_drvdata(hid);
+ int n;
+ bool work;
++ unsigned long flags;
+
+ if (!bigben) {
+ hid_err(hid, "no device data\n");
+@@ -267,6 +314,7 @@ static void bigben_set_led(struct led_classdev *led,
+
+ for (n = 0; n < NUM_LEDS; n++) {
+ if (led == bigben->leds[n]) {
++ spin_lock_irqsave(&bigben->lock, flags);
+ if (value == LED_OFF) {
+ work = (bigben->led_state & BIT(n));
+ bigben->led_state &= ~BIT(n);
+@@ -274,10 +322,11 @@ static void bigben_set_led(struct led_classdev *led,
+ work = !(bigben->led_state & BIT(n));
+ bigben->led_state |= BIT(n);
+ }
++ spin_unlock_irqrestore(&bigben->lock, flags);
+
+ if (work) {
+ bigben->work_led = true;
+- schedule_work(&bigben->worker);
++ bigben_schedule_work(bigben);
+ }
+ return;
+ }
+@@ -307,8 +356,12 @@ static enum led_brightness bigben_get_led(struct led_classdev *led)
+ static void bigben_remove(struct hid_device *hid)
+ {
+ struct bigben_device *bigben = hid_get_drvdata(hid);
++ unsigned long flags;
+
++ spin_lock_irqsave(&bigben->lock, flags);
+ bigben->removed = true;
++ spin_unlock_irqrestore(&bigben->lock, flags);
++
+ cancel_work_sync(&bigben->worker);
+ hid_hw_stop(hid);
+ }
+@@ -318,7 +371,6 @@ static int bigben_probe(struct hid_device *hid,
+ {
+ struct bigben_device *bigben;
+ struct hid_input *hidinput;
+- struct list_head *report_list;
+ struct led_classdev *led;
+ char *name;
+ size_t name_sz;
+@@ -343,14 +395,24 @@ static int bigben_probe(struct hid_device *hid,
+ return error;
+ }
+
+- report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
+- bigben->report = list_entry(report_list->next,
+- struct hid_report, list);
++ bigben->report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 8);
++ if (!bigben->report) {
++ hid_err(hid, "no output report found\n");
++ error = -ENODEV;
++ goto error_hw_stop;
++ }
++
++ if (list_empty(&hid->inputs)) {
++ hid_err(hid, "no inputs found\n");
++ error = -ENODEV;
++ goto error_hw_stop;
++ }
+
+ hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+ set_bit(FF_RUMBLE, hidinput->input->ffbit);
+
+ INIT_WORK(&bigben->worker, bigben_worker);
++ spin_lock_init(&bigben->lock);
+
+ error = input_ff_create_memless(hidinput->input, NULL,
+ hid_bigben_play_effect);
+@@ -391,7 +453,7 @@ static int bigben_probe(struct hid_device *hid,
+ bigben->left_motor_force = 0;
+ bigben->work_led = true;
+ bigben->work_ff = true;
+- schedule_work(&bigben->worker);
++ bigben_schedule_work(bigben);
+
+ hid_info(hid, "LED and force feedback support for BigBen gamepad\n");
+
+diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
+index ca556d39da2ae..f04d2aa23efe4 100644
+--- a/drivers/hid/hid-chicony.c
++++ b/drivers/hid/hid-chicony.c
+@@ -114,6 +114,9 @@ static int ch_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ {
+ int ret;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ hdev->quirks |= HID_QUIRK_INPUT_PER_APP;
+ ret = hid_parse(hdev);
+ if (ret) {
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index dbed2524fd47b..d941023c56289 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -258,6 +258,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
+ {
+ struct hid_report *report;
+ struct hid_field *field;
++ unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
+ unsigned int usages;
+ unsigned int offset;
+ unsigned int i;
+@@ -288,8 +289,11 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
+ offset = report->size;
+ report->size += parser->global.report_size * parser->global.report_count;
+
++ if (parser->device->ll_driver->max_buffer_size)
++ max_buffer_size = parser->device->ll_driver->max_buffer_size;
++
+ /* Total size check: Allow for possible report index byte */
+- if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) {
++ if (report->size > (max_buffer_size - 1) << 3) {
+ hid_err(parser->device, "report is too long\n");
+ return -1;
+ }
+@@ -988,8 +992,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid,
+ * Validating on id 0 means we should examine the first
+ * report in the list.
+ */
+- report = list_entry(
+- hid->report_enum[type].report_list.next,
++ report = list_first_entry_or_null(
++ &hid->report_enum[type].report_list,
+ struct hid_report, list);
+ } else {
+ report = hid->report_enum[type].report_id_hash[id];
+@@ -1197,6 +1201,7 @@ int hid_open_report(struct hid_device *device)
+ __u8 *end;
+ __u8 *next;
+ int ret;
++ int i;
+ static int (*dispatch_type[])(struct hid_parser *parser,
+ struct hid_item *item) = {
+ hid_parser_main,
+@@ -1247,6 +1252,8 @@ int hid_open_report(struct hid_device *device)
+ goto err;
+ }
+ device->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
++ for (i = 0; i < HID_DEFAULT_NUM_COLLECTIONS; i++)
++ device->collection[i].parent_idx = -1;
+
+ ret = -EINVAL;
+ while ((next = fetch_item(start, end, &item)) != NULL) {
+@@ -1310,6 +1317,9 @@ static s32 snto32(__u32 value, unsigned n)
+ if (!value || !n)
+ return 0;
+
++ if (n > 32)
++ n = 32;
++
+ switch (n) {
+ case 8: return ((__s8)value);
+ case 16: return ((__s16)value);
+@@ -1746,6 +1756,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
+ struct hid_report_enum *report_enum = hid->report_enum + type;
+ struct hid_report *report;
+ struct hid_driver *hdrv;
++ int max_buffer_size = HID_MAX_BUFFER_SIZE;
+ unsigned int a;
+ u32 rsize, csize = size;
+ u8 *cdata = data;
+@@ -1762,10 +1773,13 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
+
+ rsize = hid_compute_report_size(report);
+
+- if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
+- rsize = HID_MAX_BUFFER_SIZE - 1;
+- else if (rsize > HID_MAX_BUFFER_SIZE)
+- rsize = HID_MAX_BUFFER_SIZE;
++ if (hid->ll_driver->max_buffer_size)
++ max_buffer_size = hid->ll_driver->max_buffer_size;
++
++ if (report_enum->numbered && rsize >= max_buffer_size)
++ rsize = max_buffer_size - 1;
++ else if (rsize > max_buffer_size)
++ rsize = max_buffer_size;
+
+ if (csize < rsize) {
+ dbg_hid("report %d is too short, (%d < %d)\n", report->id,
+diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c
+index 902a60e249ed2..8c895c820b672 100644
+--- a/drivers/hid/hid-corsair.c
++++ b/drivers/hid/hid-corsair.c
+@@ -553,7 +553,12 @@ static int corsair_probe(struct hid_device *dev, const struct hid_device_id *id)
+ int ret;
+ unsigned long quirks = id->driver_data;
+ struct corsair_drvdata *drvdata;
+- struct usb_interface *usbif = to_usb_interface(dev->dev.parent);
++ struct usb_interface *usbif;
++
++ if (!hid_is_usb(dev))
++ return -EINVAL;
++
++ usbif = to_usb_interface(dev->dev.parent);
+
+ drvdata = devm_kzalloc(&dev->dev, sizeof(struct corsair_drvdata),
+ GFP_KERNEL);
+diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
+index 477baa30889cc..d902fe43cb818 100644
+--- a/drivers/hid/hid-cp2112.c
++++ b/drivers/hid/hid-cp2112.c
+@@ -788,6 +788,11 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
+ data->word = le16_to_cpup((__le16 *)buf);
+ break;
+ case I2C_SMBUS_I2C_BLOCK_DATA:
++ if (read_length > I2C_SMBUS_BLOCK_MAX) {
++ ret = -EINVAL;
++ goto power_normal;
++ }
++
+ memcpy(data->block + 1, buf, read_length);
+ break;
+ case I2C_SMBUS_BLOCK_DATA:
+@@ -1347,6 +1352,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ girq->parents = NULL;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_simple_irq;
++ girq->threaded = true;
+
+ ret = gpiochip_add_data(&dev->gc, dev);
+ if (ret < 0) {
+diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
+index fa57d05badf70..03da865e423c7 100644
+--- a/drivers/hid/hid-debug.c
++++ b/drivers/hid/hid-debug.c
+@@ -825,7 +825,9 @@ static const char *keys[KEY_MAX + 1] = {
+ [KEY_F22] = "F22", [KEY_F23] = "F23",
+ [KEY_F24] = "F24", [KEY_PLAYCD] = "PlayCD",
+ [KEY_PAUSECD] = "PauseCD", [KEY_PROG3] = "Prog3",
+- [KEY_PROG4] = "Prog4", [KEY_SUSPEND] = "Suspend",
++ [KEY_PROG4] = "Prog4",
++ [KEY_ALL_APPLICATIONS] = "AllApplications",
++ [KEY_SUSPEND] = "Suspend",
+ [KEY_CLOSE] = "Close", [KEY_PLAY] = "Play",
+ [KEY_FASTFORWARD] = "FastForward", [KEY_BASSBOOST] = "BassBoost",
+ [KEY_PRINT] = "Print", [KEY_HP] = "HP",
+@@ -934,6 +936,8 @@ static const char *keys[KEY_MAX + 1] = {
+ [KEY_ASSISTANT] = "Assistant",
+ [KEY_KBD_LAYOUT_NEXT] = "KbdLayoutNext",
+ [KEY_EMOJI_PICKER] = "EmojiPicker",
++ [KEY_DICTATE] = "Dictate",
++ [KEY_MICMUTE] = "MicrophoneMute",
+ [KEY_BRIGHTNESS_MIN] = "BrightnessMin",
+ [KEY_BRIGHTNESS_MAX] = "BrightnessMax",
+ [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto",
+diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c
+index 021049805bb71..8e4a5528e25df 100644
+--- a/drivers/hid/hid-elan.c
++++ b/drivers/hid/hid-elan.c
+@@ -50,7 +50,7 @@ struct elan_drvdata {
+
+ static int is_not_elan_touchpad(struct hid_device *hdev)
+ {
+- if (hdev->bus == BUS_USB) {
++ if (hid_is_usb(hdev)) {
+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+ return (intf->altsetting->desc.bInterfaceNumber !=
+@@ -188,7 +188,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi)
+ ret = input_mt_init_slots(input, ELAN_MAX_FINGERS, INPUT_MT_POINTER);
+ if (ret) {
+ hid_err(hdev, "Failed to init elan MT slots: %d\n", ret);
+- input_free_device(input);
+ return ret;
+ }
+
+@@ -200,7 +199,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi)
+ hid_err(hdev, "Failed to register elan input device: %d\n",
+ ret);
+ input_mt_destroy_slots(input);
+- input_free_device(input);
+ return ret;
+ }
+
+diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
+index e59e9911fc370..4fa45ee77503b 100644
+--- a/drivers/hid/hid-elecom.c
++++ b/drivers/hid/hid-elecom.c
+@@ -12,6 +12,7 @@
+ * Copyright (c) 2017 Alex Manoussakis <amanou@gnu.org>
+ * Copyright (c) 2017 Tomasz Kramkowski <tk@the-tk.com>
+ * Copyright (c) 2020 YOSHIOKA Takuma <lo48576@hard-wi.red>
++ * Copyright (c) 2022 Takahiro Fujii <fujii@xaxxi.net>
+ */
+
+ /*
+@@ -89,7 +90,7 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ case USB_DEVICE_ID_ELECOM_M_DT1URBK:
+ case USB_DEVICE_ID_ELECOM_M_DT1DRBK:
+ case USB_DEVICE_ID_ELECOM_M_HT1URBK:
+- case USB_DEVICE_ID_ELECOM_M_HT1DRBK:
++ case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D:
+ /*
+ * Report descriptor format:
+ * 12: button bit count
+@@ -99,6 +100,16 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ */
+ mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 8);
+ break;
++ case USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C:
++ /*
++ * Report descriptor format:
++ * 22: button bit count
++ * 30: padding bit count
++ * 24: button report size
++ * 16: button usage maximum
++ */
++ mouse_button_fixup(hdev, rdesc, *rsize, 22, 30, 24, 16, 8);
++ break;
+ }
+ return rdesc;
+ }
+@@ -112,7 +123,8 @@ static const struct hid_device_id elecom_devices[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) },
+- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK) },
++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) },
++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) },
+ { }
+ };
+ MODULE_DEVICE_TABLE(hid, elecom_devices);
+diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
+index 383dfda8c12fc..2876cb6a7dcab 100644
+--- a/drivers/hid/hid-elo.c
++++ b/drivers/hid/hid-elo.c
+@@ -228,15 +228,16 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ {
+ struct elo_priv *priv;
+ int ret;
+- struct usb_device *udev;
++
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&priv->work, elo_work);
+- udev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
+- priv->usbdev = usb_get_dev(udev);
++ priv->usbdev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
+
+ hid_set_drvdata(hdev, priv);
+
+@@ -267,8 +268,6 @@ static void elo_remove(struct hid_device *hdev)
+ {
+ struct elo_priv *priv = hid_get_drvdata(hdev);
+
+- usb_put_dev(priv->usbdev);
+-
+ hid_hw_stop(hdev);
+ cancel_delayed_work_sync(&priv->work);
+ kfree(priv);
+diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c
+index 4ef1c3b8094ea..183eeb3863b38 100644
+--- a/drivers/hid/hid-ft260.c
++++ b/drivers/hid/hid-ft260.c
+@@ -915,6 +915,9 @@ static int ft260_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ struct ft260_get_chip_version_report version;
+ int ret;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
+index 8123b871a3ebf..6a227e07f8943 100644
+--- a/drivers/hid/hid-google-hammer.c
++++ b/drivers/hid/hid-google-hammer.c
+@@ -585,8 +585,12 @@ static void hammer_remove(struct hid_device *hdev)
+ static const struct hid_device_id hammer_devices[] = {
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) },
++ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
++ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_EEL) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) },
++ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
++ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_JEWEL) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c
+index 0a38e8e9bc783..403506b9697e7 100644
+--- a/drivers/hid/hid-holtek-kbd.c
++++ b/drivers/hid/hid-holtek-kbd.c
+@@ -140,12 +140,17 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type,
+ static int holtek_kbd_probe(struct hid_device *hdev,
+ const struct hid_device_id *id)
+ {
+- struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+- int ret = hid_parse(hdev);
++ struct usb_interface *intf;
++ int ret;
++
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
+
++ ret = hid_parse(hdev);
+ if (!ret)
+ ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+
++ intf = to_usb_interface(hdev->dev.parent);
+ if (!ret && intf->cur_altsetting->desc.bInterfaceNumber == 1) {
+ struct hid_input *hidinput;
+ list_for_each_entry(hidinput, &hdev->inputs, list) {
+diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c
+index 195b735b001d0..7c907939bfae1 100644
+--- a/drivers/hid/hid-holtek-mouse.c
++++ b/drivers/hid/hid-holtek-mouse.c
+@@ -62,6 +62,29 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ return rdesc;
+ }
+
++static int holtek_mouse_probe(struct hid_device *hdev,
++ const struct hid_device_id *id)
++{
++ int ret;
++
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
++ ret = hid_parse(hdev);
++ if (ret) {
++ hid_err(hdev, "hid parse failed: %d\n", ret);
++ return ret;
++ }
++
++ ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
++ if (ret) {
++ hid_err(hdev, "hw start failed: %d\n", ret);
++ return ret;
++ }
++
++ return 0;
++}
++
+ static const struct hid_device_id holtek_mouse_devices[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT,
+ USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) },
+@@ -83,6 +106,7 @@ static struct hid_driver holtek_mouse_driver = {
+ .name = "holtek_mouse",
+ .id_table = holtek_mouse_devices,
+ .report_fixup = holtek_mouse_report_fixup,
++ .probe = holtek_mouse_probe,
+ };
+
+ module_hid_driver(holtek_mouse_driver);
+diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
+index 978ee2aab2d40..b7704dd6809dc 100644
+--- a/drivers/hid/hid-hyperv.c
++++ b/drivers/hid/hid-hyperv.c
+@@ -498,7 +498,7 @@ static int mousevsc_probe(struct hv_device *device,
+
+ ret = hid_add_device(hid_dev);
+ if (ret)
+- goto probe_err1;
++ goto probe_err2;
+
+
+ ret = hid_parse(hid_dev);
+diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
+index 29564b370341e..5fceefb3c707e 100644
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -394,8 +394,12 @@
+ #define USB_DEVICE_ID_HP_X2 0x074d
+ #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755
+ #define I2C_DEVICE_ID_HP_SPECTRE_X360_15 0x2817
++#define I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100 0x29CF
++#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544
+ #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706
+ #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A
++#define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C
++#define I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN 0x279F
+
+ #define USB_VENDOR_ID_ELECOM 0x056e
+ #define USB_DEVICE_ID_ELECOM_BM084 0x0061
+@@ -406,7 +410,8 @@
+ #define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe
+ #define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff
+ #define USB_DEVICE_ID_ELECOM_M_HT1URBK 0x010c
+-#define USB_DEVICE_ID_ELECOM_M_HT1DRBK 0x010d
++#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d
++#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C 0x011c
+
+ #define USB_VENDOR_ID_DREAM_CHEEKY 0x1d34
+ #define USB_DEVICE_ID_DREAM_CHEEKY_WN 0x0004
+@@ -496,6 +501,8 @@
+ #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d
+ #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044
+ #define USB_DEVICE_ID_GOOGLE_DON 0x5050
++#define USB_DEVICE_ID_GOOGLE_EEL 0x5057
++#define USB_DEVICE_ID_GOOGLE_JEWEL 0x5061
+
+ #define USB_VENDOR_ID_GOTOP 0x08f2
+ #define USB_DEVICE_ID_SUPER_Q2 0x007f
+@@ -586,6 +593,7 @@
+ #define USB_DEVICE_ID_UGCI_FIGHTING 0x0030
+
+ #define USB_VENDOR_ID_HP 0x03f0
++#define USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A 0x464a
+ #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A 0x0a4a
+ #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A 0x0b4a
+ #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE 0x134a
+@@ -750,6 +758,7 @@
+ #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085
+ #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3
+ #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5
++#define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe
+ #define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E 0x600e
+ #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d
+ #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019 0x6019
+@@ -839,6 +848,7 @@
+ #define USB_DEVICE_ID_MADCATZ_BEATPAD 0x4540
+ #define USB_DEVICE_ID_MADCATZ_RAT5 0x1705
+ #define USB_DEVICE_ID_MADCATZ_RAT9 0x1709
++#define USB_DEVICE_ID_MADCATZ_MMO7 0x1713
+
+ #define USB_VENDOR_ID_MCC 0x09db
+ #define USB_DEVICE_ID_MCC_PMD1024LS 0x0076
+@@ -881,9 +891,11 @@
+ #define USB_DEVICE_ID_MS_TOUCH_COVER_2 0x07a7
+ #define USB_DEVICE_ID_MS_TYPE_COVER_2 0x07a9
+ #define USB_DEVICE_ID_MS_POWER_COVER 0x07da
++#define USB_DEVICE_ID_MS_SURFACE3_COVER 0x07de
+ #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd
+ #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb
+ #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0
++#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783
+
+ #define USB_VENDOR_ID_MOJO 0x8282
+ #define USB_DEVICE_ID_RETRO_ADAPTER 0x3201
+@@ -956,7 +968,10 @@
+ #define USB_DEVICE_ID_ORTEK_IHOME_IMAC_A210S 0x8003
+
+ #define USB_VENDOR_ID_PLANTRONICS 0x047f
++#define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3210_SERIES 0xc055
+ #define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3220_SERIES 0xc056
++#define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3215_SERIES 0xc057
++#define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3225_SERIES 0xc058
+
+ #define USB_VENDOR_ID_PANASONIC 0x04da
+ #define USB_DEVICE_ID_PANABOARD_UBT780 0x1044
+@@ -1102,6 +1117,7 @@
+ #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_2 0x09cc
+ #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE 0x0ba0
+ #define USB_DEVICE_ID_SONY_PS5_CONTROLLER 0x0ce6
++#define USB_DEVICE_ID_SONY_PS5_CONTROLLER_2 0x0df2
+ #define USB_DEVICE_ID_SONY_MOTION_CONTROLLER 0x03d5
+ #define USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER 0x042f
+ #define USB_DEVICE_ID_SONY_BUZZ_CONTROLLER 0x0002
+@@ -1173,6 +1189,7 @@
+ #define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21
+ #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4
+ #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5
++#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6
+ #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7
+
+ #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047
+@@ -1276,6 +1293,9 @@
+ #define USB_DEVICE_ID_WEIDA_8752 0xC300
+ #define USB_DEVICE_ID_WEIDA_8755 0xC301
+
++#define USB_VENDOR_ID_WINBOND 0x0416
++#define USB_DEVICE_ID_TSTP_MTOUCH 0xc168
++
+ #define USB_VENDOR_ID_WISEGROUP 0x0925
+ #define USB_DEVICE_ID_SMARTJOY_PLUS 0x0005
+ #define USB_DEVICE_ID_SUPER_JOY_BOX_3 0x8888
+@@ -1326,6 +1346,7 @@
+
+ #define USB_VENDOR_ID_PRIMAX 0x0461
+ #define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22
++#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a
+ #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05
+ #define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72
+ #define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f
+@@ -1346,6 +1367,7 @@
+ #define USB_VENDOR_ID_UGTIZER 0x2179
+ #define USB_DEVICE_ID_UGTIZER_TABLET_GP0610 0x0053
+ #define USB_DEVICE_ID_UGTIZER_TABLET_GT5040 0x0077
++#define USB_DEVICE_ID_UGTIZER_TABLET_WP5540 0x0004
+
+ #define USB_VENDOR_ID_VIEWSONIC 0x0543
+ #define USB_DEVICE_ID_VIEWSONIC_PD1011 0xe621
+diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
+index 4b5ebeacd2836..0ae959e54462b 100644
+--- a/drivers/hid/hid-input.c
++++ b/drivers/hid/hid-input.c
+@@ -160,6 +160,7 @@ static int hidinput_setkeycode(struct input_dev *dev,
+ if (usage) {
+ *old_keycode = usage->type == EV_KEY ?
+ usage->code : KEY_RESERVED;
++ usage->type = EV_KEY;
+ usage->code = ke->keycode;
+
+ clear_bit(*old_keycode, dev->keybit);
+@@ -324,10 +325,18 @@ static const struct hid_device_id hid_battery_quirks[] = {
+ HID_BATTERY_QUIRK_IGNORE },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN),
+ HID_BATTERY_QUIRK_IGNORE },
++ { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN),
++ HID_BATTERY_QUIRK_IGNORE },
++ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100),
++ HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
+ HID_BATTERY_QUIRK_IGNORE },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
+ HID_BATTERY_QUIRK_IGNORE },
++ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN),
++ HID_BATTERY_QUIRK_IGNORE },
++ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN),
++ HID_BATTERY_QUIRK_IGNORE },
+ {}
+ };
+
+@@ -650,10 +659,9 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
+ code += KEY_MACRO1;
+ else
+ code += BTN_TRIGGER_HAPPY - 0x1e;
+- } else {
+- goto ignore;
++ break;
+ }
+- break;
++ fallthrough;
+ default:
+ switch (field->physical) {
+ case HID_GD_MOUSE:
+@@ -701,6 +709,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
+ break;
+ }
+
++ if ((usage->hid & 0xf0) == 0xa0) { /* SystemControl */
++ switch (usage->hid & 0xf) {
++ case 0x9: map_key_clear(KEY_MICMUTE); break;
++ default: goto ignore;
++ }
++ break;
++ }
++
+ if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */
+ switch (usage->hid & 0xf) {
+ case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break;
+@@ -987,6 +1003,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
+ case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break;
+ case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break;
+
++ case 0x0d8: map_key_clear(KEY_DICTATE); break;
+ case 0x0d9: map_key_clear(KEY_EMOJI_PICKER); break;
+
+ case 0x0e0: map_abs_clear(ABS_VOLUME); break;
+@@ -1078,6 +1095,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
+
+ case 0x29d: map_key_clear(KEY_KBD_LAYOUT_NEXT); break;
+
++ case 0x2a2: map_key_clear(KEY_ALL_APPLICATIONS); break;
++
+ case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV); break;
+ case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT); break;
+ case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP); break;
+@@ -1328,6 +1347,12 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
+
+ input = field->hidinput->input;
+
++ if (usage->type == EV_ABS &&
++ (((*quirks & HID_QUIRK_X_INVERT) && usage->code == ABS_X) ||
++ ((*quirks & HID_QUIRK_Y_INVERT) && usage->code == ABS_Y))) {
++ value = field->logical_maximum - value;
++ }
++
+ if (usage->hat_min < usage->hat_max || usage->hat_dir) {
+ int hat_dir = usage->hat_dir;
+ if (!hat_dir)
+diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
+index 430fa4f52ed3b..75ebfcf318896 100644
+--- a/drivers/hid/hid-ite.c
++++ b/drivers/hid/hid-ite.c
+@@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = {
+ USB_VENDOR_ID_SYNAPTICS,
+ USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003),
+ .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
++ /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */
++ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
++ USB_VENDOR_ID_SYNAPTICS,
++ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017),
++ .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
+ { }
+ };
+ MODULE_DEVICE_TABLE(hid, ite_devices);
+diff --git a/drivers/hid/hid-led.c b/drivers/hid/hid-led.c
+index c2c66ceca1327..7d82f8d426bbc 100644
+--- a/drivers/hid/hid-led.c
++++ b/drivers/hid/hid-led.c
+@@ -366,7 +366,7 @@ static const struct hidled_config hidled_configs[] = {
+ .type = DREAM_CHEEKY,
+ .name = "Dream Cheeky Webmail Notifier",
+ .short_name = "dream_cheeky",
+- .max_brightness = 31,
++ .max_brightness = 63,
+ .num_leds = 1,
+ .report_size = 9,
+ .report_type = RAW_REQUEST,
+diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
+index d40af911df635..fb3f7258009c2 100644
+--- a/drivers/hid/hid-lg.c
++++ b/drivers/hid/hid-lg.c
+@@ -749,12 +749,18 @@ static int lg_raw_event(struct hid_device *hdev, struct hid_report *report,
+
+ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ {
+- struct usb_interface *iface = to_usb_interface(hdev->dev.parent);
+- __u8 iface_num = iface->cur_altsetting->desc.bInterfaceNumber;
++ struct usb_interface *iface;
++ __u8 iface_num;
+ unsigned int connect_mask = HID_CONNECT_DEFAULT;
+ struct lg_drv_data *drv_data;
+ int ret;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
++ iface = to_usb_interface(hdev->dev.parent);
++ iface_num = iface->cur_altsetting->desc.bInterfaceNumber;
++
+ /* G29 only work with the 1st interface */
+ if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) &&
+ (iface_num != 0)) {
+diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
+index 5e6a0cef2a06d..e3fcf1353fb3b 100644
+--- a/drivers/hid/hid-lg4ff.c
++++ b/drivers/hid/hid-lg4ff.c
+@@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att
+ return -ENOMEM;
+
+ i = strlen(lbuf);
++
++ if (i == 0) {
++ kfree(lbuf);
++ return -EINVAL;
++ }
++
+ if (lbuf[i-1] == '\n') {
+ if (i == 1) {
+ kfree(lbuf);
+diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
+index a0017b010c342..08768e5accedc 100644
+--- a/drivers/hid/hid-logitech-dj.c
++++ b/drivers/hid/hid-logitech-dj.c
+@@ -1068,6 +1068,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
+ workitem.reports_supported |= STD_KEYBOARD;
+ break;
+ case 0x0f:
++ case 0x11:
+ device_type = "eQUAD Lightspeed 1.2";
+ logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
+ workitem.reports_supported |= STD_KEYBOARD;
+@@ -1284,6 +1285,9 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
+ * 50 msec should gives enough time to the receiver to be ready.
+ */
+ msleep(50);
++
++ if (retval)
++ return retval;
+ }
+
+ /*
+@@ -1305,7 +1309,7 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
+ buf[5] = 0x09;
+ buf[6] = 0x00;
+
+- hid_hw_raw_request(hdev, REPORT_ID_HIDPP_SHORT, buf,
++ retval = hid_hw_raw_request(hdev, REPORT_ID_HIDPP_SHORT, buf,
+ HIDPP_REPORT_SHORT_LENGTH, HID_OUTPUT_REPORT,
+ HID_REQ_SET_REPORT);
+
+@@ -1777,7 +1781,7 @@ static int logi_dj_probe(struct hid_device *hdev,
+ case recvr_type_bluetooth: no_dj_interfaces = 2; break;
+ case recvr_type_dinovo: no_dj_interfaces = 2; break;
+ }
+- if (hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
++ if (hid_is_usb(hdev)) {
+ intf = to_usb_interface(hdev->dev.parent);
+ if (intf && intf->altsetting->desc.bInterfaceNumber >=
+ no_dj_interfaces) {
+diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
+index 81de88ab2ecc7..0ac67dd76574e 100644
+--- a/drivers/hid/hid-logitech-hidpp.c
++++ b/drivers/hid/hid-logitech-hidpp.c
+@@ -834,8 +834,7 @@ static int hidpp_unifying_init(struct hidpp_device *hidpp)
+ if (ret)
+ return ret;
+
+- snprintf(hdev->uniq, sizeof(hdev->uniq), "%04x-%4phD",
+- hdev->product, &serial);
++ snprintf(hdev->uniq, sizeof(hdev->uniq), "%4phD", &serial);
+ dbg_hid("HID++ Unifying: Got serial: %s\n", hdev->uniq);
+
+ name = hidpp_unifying_get_name(hidpp);
+@@ -928,6 +927,54 @@ print_version:
+ return 0;
+ }
+
++/* -------------------------------------------------------------------------- */
++/* 0x0003: Device Information */
++/* -------------------------------------------------------------------------- */
++
++#define HIDPP_PAGE_DEVICE_INFORMATION 0x0003
++
++#define CMD_GET_DEVICE_INFO 0x00
++
++static int hidpp_get_serial(struct hidpp_device *hidpp, u32 *serial)
++{
++ struct hidpp_report response;
++ u8 feature_type;
++ u8 feature_index;
++ int ret;
++
++ ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_DEVICE_INFORMATION,
++ &feature_index,
++ &feature_type);
++ if (ret)
++ return ret;
++
++ ret = hidpp_send_fap_command_sync(hidpp, feature_index,
++ CMD_GET_DEVICE_INFO,
++ NULL, 0, &response);
++ if (ret)
++ return ret;
++
++ /* See hidpp_unifying_get_serial() */
++ *serial = *((u32 *)&response.rap.params[1]);
++ return 0;
++}
++
++static int hidpp_serial_init(struct hidpp_device *hidpp)
++{
++ struct hid_device *hdev = hidpp->hid_dev;
++ u32 serial;
++ int ret;
++
++ ret = hidpp_get_serial(hidpp, &serial);
++ if (ret)
++ return ret;
++
++ snprintf(hdev->uniq, sizeof(hdev->uniq), "%4phD", &serial);
++ dbg_hid("HID++ DeviceInformation: Got serial: %s\n", hdev->uniq);
++
++ return 0;
++}
++
+ /* -------------------------------------------------------------------------- */
+ /* 0x0005: GetDeviceNameType */
+ /* -------------------------------------------------------------------------- */
+@@ -4049,6 +4096,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ bool connected;
+ unsigned int connect_mask = HID_CONNECT_DEFAULT;
+ struct hidpp_ff_private_data data;
++ bool will_restart = false;
+
+ /* report_fixup needs drvdata to be set before we call hid_parse */
+ hidpp = devm_kzalloc(&hdev->dev, sizeof(*hidpp), GFP_KERNEL);
+@@ -4104,6 +4152,10 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ return ret;
+ }
+
++ if (hidpp->quirks & HIDPP_QUIRK_DELAYED_INIT ||
++ hidpp->quirks & HIDPP_QUIRK_UNIFYING)
++ will_restart = true;
++
+ INIT_WORK(&hidpp->work, delayed_work_cb);
+ mutex_init(&hidpp->send_mutex);
+ init_waitqueue_head(&hidpp->wait);
+@@ -4118,7 +4170,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ * Plain USB connections need to actually call start and open
+ * on the transport driver to allow incoming data.
+ */
+- ret = hid_hw_start(hdev, 0);
++ ret = hid_hw_start(hdev, will_restart ? 0 : connect_mask);
+ if (ret) {
+ hid_err(hdev, "hw start failed\n");
+ goto hid_hw_start_fail;
+@@ -4136,6 +4188,8 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
+
+ if (hidpp->quirks & HIDPP_QUIRK_UNIFYING)
+ hidpp_unifying_init(hidpp);
++ else if (hid_is_usb(hidpp->hid_dev))
++ hidpp_serial_init(hidpp);
+
+ connected = hidpp_root_get_protocol_version(hidpp) == 0;
+ atomic_set(&hidpp->connected, connected);
+@@ -4155,6 +4209,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ hidpp->wireless_feature_index = 0;
+ else if (ret)
+ goto hid_hw_init_fail;
++ ret = 0;
+ }
+
+ if (connected && (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP)) {
+@@ -4169,19 +4224,21 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
+
+ hidpp_connect_event(hidpp);
+
+- /* Reset the HID node state */
+- hid_device_io_stop(hdev);
+- hid_hw_close(hdev);
+- hid_hw_stop(hdev);
++ if (will_restart) {
++ /* Reset the HID node state */
++ hid_device_io_stop(hdev);
++ hid_hw_close(hdev);
++ hid_hw_stop(hdev);
+
+- if (hidpp->quirks & HIDPP_QUIRK_NO_HIDINPUT)
+- connect_mask &= ~HID_CONNECT_HIDINPUT;
++ if (hidpp->quirks & HIDPP_QUIRK_NO_HIDINPUT)
++ connect_mask &= ~HID_CONNECT_HIDINPUT;
+
+- /* Now export the actual inputs and hidraw nodes to the world */
+- ret = hid_hw_start(hdev, connect_mask);
+- if (ret) {
+- hid_err(hdev, "%s:hid_hw_start returned error\n", __func__);
+- goto hid_hw_start_fail;
++ /* Now export the actual inputs and hidraw nodes to the world */
++ ret = hid_hw_start(hdev, connect_mask);
++ if (ret) {
++ hid_err(hdev, "%s:hid_hw_start returned error\n", __func__);
++ goto hid_hw_start_fail;
++ }
+ }
+
+ if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920) {
+@@ -4238,7 +4295,7 @@ static const struct hid_device_id hidpp_devices[] = {
+ { /* wireless touchpad T651 */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
+ USB_DEVICE_ID_LOGITECH_T651),
+- .driver_data = HIDPP_QUIRK_CLASS_WTP },
++ .driver_data = HIDPP_QUIRK_CLASS_WTP | HIDPP_QUIRK_DELAYED_INIT },
+ { /* Mouse Logitech Anywhere MX */
+ LDJ_DEVICE(0x1017), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
+ { /* Mouse Logitech Cube */
+@@ -4320,6 +4377,8 @@ static const struct hid_device_id hidpp_devices[] = {
+ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC086) },
+ { /* Logitech G903 Hero Gaming Mouse over USB */
+ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC091) },
++ { /* Logitech G915 TKL Keyboard over USB */
++ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC343) },
+ { /* Logitech G920 Wheel over USB */
+ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL),
+ .driver_data = HIDPP_QUIRK_CLASS_G920 | HIDPP_QUIRK_FORCE_OUTPUT_REPORTS},
+@@ -4335,6 +4394,8 @@ static const struct hid_device_id hidpp_devices[] = {
+ { /* MX5500 keyboard over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b),
+ .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
++ { /* Logitech G915 TKL keyboard over Bluetooth */
++ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb35f) },
+ { /* M-RCQ142 V470 Cordless Laser Mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) },
+ { /* MX Master mouse over Bluetooth */
+diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
+index 686788ebf3e1e..c6b8da7160021 100644
+--- a/drivers/hid/hid-magicmouse.c
++++ b/drivers/hid/hid-magicmouse.c
+@@ -57,6 +57,8 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie
+ #define MOUSE_REPORT_ID 0x29
+ #define MOUSE2_REPORT_ID 0x12
+ #define DOUBLE_REPORT_ID 0xf7
++#define USB_BATTERY_TIMEOUT_MS 60000
++
+ /* These definitions are not precise, but they're close enough. (Bits
+ * 0x03 seem to indicate the aspect ratio of the touch, bits 0x70 seem
+ * to be some kind of bit mask -- 0x20 may be a near-field reading,
+@@ -140,6 +142,7 @@ struct magicmouse_sc {
+
+ struct hid_device *hdev;
+ struct delayed_work work;
++ struct timer_list battery_timer;
+ };
+
+ static int magicmouse_firm_touch(struct magicmouse_sc *msc)
+@@ -256,8 +259,11 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda
+ unsigned long now = jiffies;
+ int step_x = msc->touches[id].scroll_x - x;
+ int step_y = msc->touches[id].scroll_y - y;
+- int step_hr = ((64 - (int)scroll_speed) * msc->scroll_accel) /
+- SCROLL_HR_STEPS;
++ int step_hr =
++ max_t(int,
++ ((64 - (int)scroll_speed) * msc->scroll_accel) /
++ SCROLL_HR_STEPS,
++ 1);
+ int step_x_hr = msc->touches[id].scroll_x_hr - x;
+ int step_y_hr = msc->touches[id].scroll_y_hr - y;
+
+@@ -472,7 +478,7 @@ static int magicmouse_raw_event(struct hid_device *hdev,
+ magicmouse_raw_event(hdev, report, data + 2, data[1]);
+ magicmouse_raw_event(hdev, report, data + 2 + data[1],
+ size - 2 - data[1]);
+- break;
++ return 0;
+ default:
+ return 0;
+ }
+@@ -735,6 +741,44 @@ static void magicmouse_enable_mt_work(struct work_struct *work)
+ hid_err(msc->hdev, "unable to request touch data (%d)\n", ret);
+ }
+
++static int magicmouse_fetch_battery(struct hid_device *hdev)
++{
++#ifdef CONFIG_HID_BATTERY_STRENGTH
++ struct hid_report_enum *report_enum;
++ struct hid_report *report;
++
++ if (!hdev->battery || hdev->vendor != USB_VENDOR_ID_APPLE ||
++ (hdev->product != USB_DEVICE_ID_APPLE_MAGICMOUSE2 &&
++ hdev->product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2))
++ return -1;
++
++ report_enum = &hdev->report_enum[hdev->battery_report_type];
++ report = report_enum->report_id_hash[hdev->battery_report_id];
++
++ if (!report || report->maxfield < 1)
++ return -1;
++
++ if (hdev->battery_capacity == hdev->battery_max)
++ return -1;
++
++ hid_hw_request(hdev, report, HID_REQ_GET_REPORT);
++ return 0;
++#else
++ return -1;
++#endif
++}
++
++static void magicmouse_battery_timer_tick(struct timer_list *t)
++{
++ struct magicmouse_sc *msc = from_timer(msc, t, battery_timer);
++ struct hid_device *hdev = msc->hdev;
++
++ if (magicmouse_fetch_battery(hdev) == 0) {
++ mod_timer(&msc->battery_timer,
++ jiffies + msecs_to_jiffies(USB_BATTERY_TIMEOUT_MS));
++ }
++}
++
+ static int magicmouse_probe(struct hid_device *hdev,
+ const struct hid_device_id *id)
+ {
+@@ -742,11 +786,6 @@ static int magicmouse_probe(struct hid_device *hdev,
+ struct hid_report *report;
+ int ret;
+
+- if (id->vendor == USB_VENDOR_ID_APPLE &&
+- id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 &&
+- hdev->type != HID_TYPE_USBMOUSE)
+- return -ENODEV;
+-
+ msc = devm_kzalloc(&hdev->dev, sizeof(*msc), GFP_KERNEL);
+ if (msc == NULL) {
+ hid_err(hdev, "can't alloc magicmouse descriptor\n");
+@@ -772,6 +811,16 @@ static int magicmouse_probe(struct hid_device *hdev,
+ return ret;
+ }
+
++ timer_setup(&msc->battery_timer, magicmouse_battery_timer_tick, 0);
++ mod_timer(&msc->battery_timer,
++ jiffies + msecs_to_jiffies(USB_BATTERY_TIMEOUT_MS));
++ magicmouse_fetch_battery(hdev);
++
++ if (id->vendor == USB_VENDOR_ID_APPLE &&
++ (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 ||
++ (id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 && hdev->type != HID_TYPE_USBMOUSE)))
++ return 0;
++
+ if (!msc->input) {
+ hid_err(hdev, "magicmouse input not registered\n");
+ ret = -ENOMEM;
+@@ -824,6 +873,7 @@ static int magicmouse_probe(struct hid_device *hdev,
+
+ return 0;
+ err_stop_hw:
++ del_timer_sync(&msc->battery_timer);
+ hid_hw_stop(hdev);
+ return ret;
+ }
+@@ -832,17 +882,52 @@ static void magicmouse_remove(struct hid_device *hdev)
+ {
+ struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+
+- if (msc)
++ if (msc) {
+ cancel_delayed_work_sync(&msc->work);
++ del_timer_sync(&msc->battery_timer);
++ }
+
+ hid_hw_stop(hdev);
+ }
+
++static __u8 *magicmouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
++ unsigned int *rsize)
++{
++ /*
++ * Change the usage from:
++ * 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 1) 0
++ * 0x09, 0x0b, // Usage (Vendor Usage 0x0b) 3
++ * To:
++ * 0x05, 0x01, // Usage Page (Generic Desktop) 0
++ * 0x09, 0x02, // Usage (Mouse) 2
++ */
++ if (hdev->vendor == USB_VENDOR_ID_APPLE &&
++ (hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 ||
++ hdev->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) &&
++ *rsize == 83 && rdesc[46] == 0x84 && rdesc[58] == 0x85) {
++ hid_info(hdev,
++ "fixing up magicmouse battery report descriptor\n");
++ *rsize = *rsize - 1;
++ rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL);
++ if (!rdesc)
++ return NULL;
++
++ rdesc[0] = 0x05;
++ rdesc[1] = 0x01;
++ rdesc[2] = 0x09;
++ rdesc[3] = 0x02;
++ }
++
++ return rdesc;
++}
++
+ static const struct hid_device_id magic_mice[] = {
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
+ USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE,
+ USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 },
++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
++ USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
+ USB_DEVICE_ID_APPLE_MAGICTRACKPAD), .driver_data = 0 },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE,
+@@ -858,6 +943,7 @@ static struct hid_driver magicmouse_driver = {
+ .id_table = magic_mice,
+ .probe = magicmouse_probe,
+ .remove = magicmouse_remove,
++ .report_fixup = magicmouse_report_fixup,
+ .raw_event = magicmouse_raw_event,
+ .event = magicmouse_event,
+ .input_mapping = magicmouse_input_mapping,
+diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c
+index 4211b9839209b..560eeec4035aa 100644
+--- a/drivers/hid/hid-mcp2221.c
++++ b/drivers/hid/hid-mcp2221.c
+@@ -385,6 +385,9 @@ static int mcp_smbus_write(struct mcp2221 *mcp, u16 addr,
+ data_len = 7;
+ break;
+ default:
++ if (len > I2C_SMBUS_BLOCK_MAX)
++ return -EINVAL;
++
+ memcpy(&mcp->txbuf[5], buf, len);
+ data_len = len + 5;
+ }
+@@ -837,12 +840,19 @@ static int mcp2221_probe(struct hid_device *hdev,
+ return ret;
+ }
+
+- ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
++ /*
++ * This driver uses the .raw_event callback and therefore does not need any
++ * HID_CONNECT_xxx flags.
++ */
++ ret = hid_hw_start(hdev, 0);
+ if (ret) {
+ hid_err(hdev, "can't start hardware\n");
+ return ret;
+ }
+
++ hid_info(hdev, "USB HID v%x.%02x Device [%s] on %s\n", hdev->version >> 8,
++ hdev->version & 0xff, hdev->name, hdev->phys);
++
+ ret = hid_hw_open(hdev);
+ if (ret) {
+ hid_err(hdev, "can't open device\n");
+@@ -867,8 +877,7 @@ static int mcp2221_probe(struct hid_device *hdev,
+ mcp->adapter.retries = 1;
+ mcp->adapter.dev.parent = &hdev->dev;
+ snprintf(mcp->adapter.name, sizeof(mcp->adapter.name),
+- "MCP2221 usb-i2c bridge on hidraw%d",
+- ((struct hidraw *)hdev->hidraw)->minor);
++ "MCP2221 usb-i2c bridge");
+
+ ret = i2c_add_adapter(&mcp->adapter);
+ if (ret) {
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index 3ea7cb1cda84c..a5bc11750bdc1 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -71,6 +71,7 @@ MODULE_LICENSE("GPL");
+ #define MT_QUIRK_SEPARATE_APP_REPORT BIT(19)
+ #define MT_QUIRK_FORCE_MULTI_INPUT BIT(20)
+ #define MT_QUIRK_DISABLE_WAKEUP BIT(21)
++#define MT_QUIRK_ORIENTATION_INVERT BIT(22)
+
+ #define MT_INPUTMODE_TOUCHSCREEN 0x02
+ #define MT_INPUTMODE_TOUCHPAD 0x03
+@@ -193,6 +194,8 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
+ /* reserved 0x0014 */
+ #define MT_CLS_WIN_8_FORCE_MULTI_INPUT 0x0015
+ #define MT_CLS_WIN_8_DISABLE_WAKEUP 0x0016
++#define MT_CLS_WIN_8_NO_STICKY_FINGERS 0x0017
++#define MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU 0x0018
+
+ /* vendor specific classes */
+ #define MT_CLS_3M 0x0101
+@@ -285,6 +288,15 @@ static const struct mt_class mt_classes[] = {
+ MT_QUIRK_WIN8_PTP_BUTTONS |
+ MT_QUIRK_FORCE_MULTI_INPUT,
+ .export_all_inputs = true },
++ { .name = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU,
++ .quirks = MT_QUIRK_IGNORE_DUPLICATES |
++ MT_QUIRK_HOVERING |
++ MT_QUIRK_CONTACT_CNT_ACCURATE |
++ MT_QUIRK_STICKY_FINGERS |
++ MT_QUIRK_WIN8_PTP_BUTTONS |
++ MT_QUIRK_FORCE_MULTI_INPUT |
++ MT_QUIRK_NOT_SEEN_MEANS_UP,
++ .export_all_inputs = true },
+ { .name = MT_CLS_WIN_8_DISABLE_WAKEUP,
+ .quirks = MT_QUIRK_ALWAYS_VALID |
+ MT_QUIRK_IGNORE_DUPLICATES |
+@@ -294,6 +306,13 @@ static const struct mt_class mt_classes[] = {
+ MT_QUIRK_WIN8_PTP_BUTTONS |
+ MT_QUIRK_DISABLE_WAKEUP,
+ .export_all_inputs = true },
++ { .name = MT_CLS_WIN_8_NO_STICKY_FINGERS,
++ .quirks = MT_QUIRK_ALWAYS_VALID |
++ MT_QUIRK_IGNORE_DUPLICATES |
++ MT_QUIRK_HOVERING |
++ MT_QUIRK_CONTACT_CNT_ACCURATE |
++ MT_QUIRK_WIN8_PTP_BUTTONS,
++ .export_all_inputs = true },
+
+ /*
+ * vendor specific classes
+@@ -775,6 +794,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ case HID_DG_CONFIDENCE:
+ if ((cls->name == MT_CLS_WIN_8 ||
+ cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT ||
++ cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU ||
+ cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP) &&
+ (field->application == HID_DG_TOUCHPAD ||
+ field->application == HID_DG_TOUCHSCREEN))
+@@ -990,6 +1010,7 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
+ struct mt_usages *slot)
+ {
+ struct input_mt *mt = input->mt;
++ struct hid_device *hdev = td->hdev;
+ __s32 quirks = app->quirks;
+ bool valid = true;
+ bool confidence_state = true;
+@@ -1067,6 +1088,10 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
+ int orientation = wide;
+ int max_azimuth;
+ int azimuth;
++ int x;
++ int y;
++ int cx;
++ int cy;
+
+ if (slot->a != DEFAULT_ZERO) {
+ /*
+@@ -1085,6 +1110,9 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
+ if (azimuth > max_azimuth * 2)
+ azimuth -= max_azimuth * 4;
+ orientation = -azimuth;
++ if (quirks & MT_QUIRK_ORIENTATION_INVERT)
++ orientation = -orientation;
++
+ }
+
+ if (quirks & MT_QUIRK_TOUCH_SIZE_SCALING) {
+@@ -1096,10 +1124,23 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
+ minor = minor >> 1;
+ }
+
+- input_event(input, EV_ABS, ABS_MT_POSITION_X, *slot->x);
+- input_event(input, EV_ABS, ABS_MT_POSITION_Y, *slot->y);
+- input_event(input, EV_ABS, ABS_MT_TOOL_X, *slot->cx);
+- input_event(input, EV_ABS, ABS_MT_TOOL_Y, *slot->cy);
++ x = hdev->quirks & HID_QUIRK_X_INVERT ?
++ input_abs_get_max(input, ABS_MT_POSITION_X) - *slot->x :
++ *slot->x;
++ y = hdev->quirks & HID_QUIRK_Y_INVERT ?
++ input_abs_get_max(input, ABS_MT_POSITION_Y) - *slot->y :
++ *slot->y;
++ cx = hdev->quirks & HID_QUIRK_X_INVERT ?
++ input_abs_get_max(input, ABS_MT_POSITION_X) - *slot->cx :
++ *slot->cx;
++ cy = hdev->quirks & HID_QUIRK_Y_INVERT ?
++ input_abs_get_max(input, ABS_MT_POSITION_Y) - *slot->cy :
++ *slot->cy;
++
++ input_event(input, EV_ABS, ABS_MT_POSITION_X, x);
++ input_event(input, EV_ABS, ABS_MT_POSITION_Y, y);
++ input_event(input, EV_ABS, ABS_MT_TOOL_X, cx);
++ input_event(input, EV_ABS, ABS_MT_TOOL_Y, cy);
+ input_event(input, EV_ABS, ABS_MT_DISTANCE, !*slot->tip_state);
+ input_event(input, EV_ABS, ABS_MT_ORIENTATION, orientation);
+ input_event(input, EV_ABS, ABS_MT_PRESSURE, *slot->p);
+@@ -1167,7 +1208,7 @@ static void mt_touch_report(struct hid_device *hid,
+ int contact_count = -1;
+
+ /* sticky fingers release in progress, abort */
+- if (test_and_set_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
++ if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
+ return;
+
+ scantime = *app->scantime;
+@@ -1248,7 +1289,7 @@ static void mt_touch_report(struct hid_device *hid,
+ del_timer(&td->release_timer);
+ }
+
+- clear_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
++ clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
+ }
+
+ static int mt_touch_input_configured(struct hid_device *hdev,
+@@ -1553,7 +1594,6 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app)
+ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
+ {
+ struct mt_device *td = hid_get_drvdata(hdev);
+- char *name;
+ const char *suffix = NULL;
+ struct mt_report_data *rdata;
+ struct mt_application *mt_application = NULL;
+@@ -1607,15 +1647,9 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
+ break;
+ }
+
+- if (suffix) {
+- name = devm_kzalloc(&hi->input->dev,
+- strlen(hdev->name) + strlen(suffix) + 2,
+- GFP_KERNEL);
+- if (name) {
+- sprintf(name, "%s %s", hdev->name, suffix);
+- hi->input->name = name;
+- }
+- }
++ if (suffix)
++ hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
++ "%s %s", hdev->name, suffix);
+
+ return 0;
+ }
+@@ -1683,11 +1717,11 @@ static void mt_expired_timeout(struct timer_list *t)
+ * An input report came in just before we release the sticky fingers,
+ * it will take care of the sticky fingers.
+ */
+- if (test_and_set_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
++ if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
+ return;
+ if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags))
+ mt_release_contacts(hdev);
+- clear_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
++ clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
+ }
+
+ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
+@@ -1719,6 +1753,15 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ if (id->vendor == HID_ANY_ID && id->product == HID_ANY_ID)
+ td->serial_maybe = true;
+
++
++ /* Orientation is inverted if the X or Y axes are
++ * flipped, but normalized if both are inverted.
++ */
++ if (hdev->quirks & (HID_QUIRK_X_INVERT | HID_QUIRK_Y_INVERT) &&
++ !((hdev->quirks & HID_QUIRK_X_INVERT)
++ && (hdev->quirks & HID_QUIRK_Y_INVERT)))
++ td->mtclass.quirks = MT_QUIRK_ORIENTATION_INVERT;
++
+ /* This allows the driver to correctly support devices
+ * that emit events over several HID messages.
+ */
+@@ -1946,6 +1989,10 @@ static const struct hid_device_id mt_devices[] = {
+ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_ELAN, 0x313a) },
+
++ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
++ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
++ USB_VENDOR_ID_ELAN, 0x3148) },
++
+ /* Elitegroup panel */
+ { .driver_data = MT_CLS_SERIAL,
+ MT_USB_DEVICE(USB_VENDOR_ID_ELITEGROUP,
+@@ -2024,6 +2071,12 @@ static const struct hid_device_id mt_devices[] = {
+ USB_VENDOR_ID_LENOVO,
+ USB_DEVICE_ID_LENOVO_X1_TAB3) },
+
++ /* Lenovo X12 TAB Gen 1 */
++ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU,
++ HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
++ USB_VENDOR_ID_LENOVO,
++ USB_DEVICE_ID_LENOVO_X12_TAB) },
++
+ /* MosArt panels */
+ { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
+ MT_USB_DEVICE(USB_VENDOR_ID_ASUS,
+@@ -2120,6 +2173,11 @@ static const struct hid_device_id mt_devices[] = {
+ MT_USB_DEVICE(USB_VENDOR_ID_VTL,
+ USB_DEVICE_ID_VTL_MULTITOUCH_FF3F) },
+
++ /* Winbond Electronics Corp. */
++ { .driver_data = MT_CLS_WIN_8_NO_STICKY_FINGERS,
++ HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8,
++ USB_VENDOR_ID_WINBOND, USB_DEVICE_ID_TSTP_MTOUCH) },
++
+ /* Wistron panels */
+ { .driver_data = MT_CLS_NSMU,
+ MT_USB_DEVICE(USB_VENDOR_ID_WISTRON,
+@@ -2163,6 +2221,9 @@ static const struct hid_device_id mt_devices[] = {
+ { .driver_data = MT_CLS_GOOGLE,
+ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
+ USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) },
++ { .driver_data = MT_CLS_GOOGLE,
++ HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE,
++ USB_DEVICE_ID_GOOGLE_WHISKERS) },
+
+ /* Generic MT device */
+ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) },
+diff --git a/drivers/hid/hid-plantronics.c b/drivers/hid/hid-plantronics.c
+index e81b7cec2d124..3d414ae194acb 100644
+--- a/drivers/hid/hid-plantronics.c
++++ b/drivers/hid/hid-plantronics.c
+@@ -198,9 +198,18 @@ err:
+ }
+
+ static const struct hid_device_id plantronics_devices[] = {
++ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS,
++ USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3210_SERIES),
++ .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS,
+ USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3220_SERIES),
+ .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS },
++ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS,
++ USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3215_SERIES),
++ .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS },
++ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS,
++ USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3225_SERIES),
++ .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) },
+ { }
+ };
+diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
+index ab7c82c2e8867..944e5e5ff1348 100644
+--- a/drivers/hid/hid-playstation.c
++++ b/drivers/hid/hid-playstation.c
+@@ -626,6 +626,7 @@ static const struct attribute_group ps_device_attribute_group = {
+
+ static int dualsense_get_calibration_data(struct dualsense *ds)
+ {
++ struct hid_device *hdev = ds->base.hdev;
+ short gyro_pitch_bias, gyro_pitch_plus, gyro_pitch_minus;
+ short gyro_yaw_bias, gyro_yaw_plus, gyro_yaw_minus;
+ short gyro_roll_bias, gyro_roll_plus, gyro_roll_minus;
+@@ -636,6 +637,7 @@ static int dualsense_get_calibration_data(struct dualsense *ds)
+ int speed_2x;
+ int range_2g;
+ int ret = 0;
++ int i;
+ uint8_t *buf;
+
+ buf = kzalloc(DS_FEATURE_REPORT_CALIBRATION_SIZE, GFP_KERNEL);
+@@ -687,6 +689,21 @@ static int dualsense_get_calibration_data(struct dualsense *ds)
+ ds->gyro_calib_data[2].sens_numer = speed_2x*DS_GYRO_RES_PER_DEG_S;
+ ds->gyro_calib_data[2].sens_denom = gyro_roll_plus - gyro_roll_minus;
+
++ /*
++ * Sanity check gyro calibration data. This is needed to prevent crashes
++ * during report handling of virtual, clone or broken devices not implementing
++ * calibration data properly.
++ */
++ for (i = 0; i < ARRAY_SIZE(ds->gyro_calib_data); i++) {
++ if (ds->gyro_calib_data[i].sens_denom == 0) {
++ hid_warn(hdev, "Invalid gyro calibration data for axis (%d), disabling calibration.",
++ ds->gyro_calib_data[i].abs_code);
++ ds->gyro_calib_data[i].bias = 0;
++ ds->gyro_calib_data[i].sens_numer = DS_GYRO_RANGE;
++ ds->gyro_calib_data[i].sens_denom = S16_MAX;
++ }
++ }
++
+ /*
+ * Set accelerometer calibration and normalization parameters.
+ * Data values will be normalized to 1/DS_ACC_RES_PER_G g.
+@@ -709,6 +726,21 @@ static int dualsense_get_calibration_data(struct dualsense *ds)
+ ds->accel_calib_data[2].sens_numer = 2*DS_ACC_RES_PER_G;
+ ds->accel_calib_data[2].sens_denom = range_2g;
+
++ /*
++ * Sanity check accelerometer calibration data. This is needed to prevent crashes
++ * during report handling of virtual, clone or broken devices not implementing calibration
++ * data properly.
++ */
++ for (i = 0; i < ARRAY_SIZE(ds->accel_calib_data); i++) {
++ if (ds->accel_calib_data[i].sens_denom == 0) {
++ hid_warn(hdev, "Invalid accelerometer calibration data for axis (%d), disabling calibration.",
++ ds->accel_calib_data[i].abs_code);
++ ds->accel_calib_data[i].bias = 0;
++ ds->accel_calib_data[i].sens_numer = DS_ACC_RANGE;
++ ds->accel_calib_data[i].sens_denom = S16_MAX;
++ }
++ }
++
+ err_free:
+ kfree(buf);
+ return ret;
+@@ -1282,7 +1314,8 @@ static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ goto err_stop;
+ }
+
+- if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) {
++ if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER ||
++ hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) {
+ dev = dualsense_create(hdev);
+ if (IS_ERR(dev)) {
+ hid_err(hdev, "Failed to create dualsense.\n");
+@@ -1320,6 +1353,8 @@ static void ps_remove(struct hid_device *hdev)
+ static const struct hid_device_id ps_devices[] = {
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) },
++ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) },
++ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) },
+ { }
+ };
+ MODULE_DEVICE_TABLE(hid, ps_devices);
+diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c
+index 2666af02d5c1a..e4e9471d0f1e9 100644
+--- a/drivers/hid/hid-prodikeys.c
++++ b/drivers/hid/hid-prodikeys.c
+@@ -798,12 +798,18 @@ static int pk_raw_event(struct hid_device *hdev, struct hid_report *report,
+ static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ {
+ int ret;
+- struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+- unsigned short ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
++ struct usb_interface *intf;
++ unsigned short ifnum;
+ unsigned long quirks = id->driver_data;
+ struct pk_device *pk;
+ struct pcmidi_snd *pm = NULL;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
++ intf = to_usb_interface(hdev->dev.parent);
++ ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
++
+ pk = kzalloc(sizeof(*pk), GFP_KERNEL);
+ if (pk == NULL) {
+ hid_err(hdev, "can't alloc descriptor\n");
+diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
+index 2e104682c22b9..96ca7d981ee20 100644
+--- a/drivers/hid/hid-quirks.c
++++ b/drivers/hid/hid-quirks.c
+@@ -96,6 +96,7 @@ static const struct hid_device_id hid_quirks[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A096), HID_QUIRK_NO_INIT_REPORTS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A293), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A), HID_QUIRK_ALWAYS_POLL },
++ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A), HID_QUIRK_ALWAYS_POLL },
+@@ -122,8 +123,10 @@ static const struct hid_device_id hid_quirks[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT },
++ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS },
++ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2), HID_QUIRK_NO_INIT_REPORTS },
+@@ -145,6 +148,7 @@ static const struct hid_device_id hid_quirks[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL },
++ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL },
+@@ -186,6 +190,7 @@ static const struct hid_device_id hid_quirks[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD), HID_QUIRK_NOGET },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT },
++ { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT },
+@@ -377,7 +382,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) },
+- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK) },
++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) },
++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) },
+ #endif
+ #if IS_ENABLED(CONFIG_HID_ELO)
+ { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) },
+@@ -606,6 +612,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) },
++ { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7) },
+ #endif
+ #if IS_ENABLED(CONFIG_HID_SAMSUNG)
+ { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
+@@ -1211,7 +1218,7 @@ EXPORT_SYMBOL_GPL(hid_quirks_exit);
+ static unsigned long hid_gets_squirk(const struct hid_device *hdev)
+ {
+ const struct hid_device_id *bl_entry;
+- unsigned long quirks = 0;
++ unsigned long quirks = hdev->initial_quirks;
+
+ if (hid_match_id(hdev, hid_ignore_list))
+ quirks |= HID_QUIRK_IGNORE;
+diff --git a/drivers/hid/hid-roccat-arvo.c b/drivers/hid/hid-roccat-arvo.c
+index 4556d2a50f754..d94ee0539421e 100644
+--- a/drivers/hid/hid-roccat-arvo.c
++++ b/drivers/hid/hid-roccat-arvo.c
+@@ -344,6 +344,9 @@ static int arvo_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-isku.c b/drivers/hid/hid-roccat-isku.c
+index ce5f22519956a..e95d59cd8d075 100644
+--- a/drivers/hid/hid-roccat-isku.c
++++ b/drivers/hid/hid-roccat-isku.c
+@@ -324,6 +324,9 @@ static int isku_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-kone.c b/drivers/hid/hid-roccat-kone.c
+index 1ca64481145ee..e8522eacf7973 100644
+--- a/drivers/hid/hid-roccat-kone.c
++++ b/drivers/hid/hid-roccat-kone.c
+@@ -749,6 +749,9 @@ static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-koneplus.c b/drivers/hid/hid-roccat-koneplus.c
+index 0316edf8c5bb4..1896c69ea512f 100644
+--- a/drivers/hid/hid-roccat-koneplus.c
++++ b/drivers/hid/hid-roccat-koneplus.c
+@@ -431,6 +431,9 @@ static int koneplus_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-konepure.c b/drivers/hid/hid-roccat-konepure.c
+index 5248b3c7cf785..cf8eeb33a1257 100644
+--- a/drivers/hid/hid-roccat-konepure.c
++++ b/drivers/hid/hid-roccat-konepure.c
+@@ -133,6 +133,9 @@ static int konepure_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-kovaplus.c b/drivers/hid/hid-roccat-kovaplus.c
+index 9600128815705..6fb9b9563769d 100644
+--- a/drivers/hid/hid-roccat-kovaplus.c
++++ b/drivers/hid/hid-roccat-kovaplus.c
+@@ -501,6 +501,9 @@ static int kovaplus_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-lua.c b/drivers/hid/hid-roccat-lua.c
+index 4a88a76d5c622..d5ddf0d68346b 100644
+--- a/drivers/hid/hid-roccat-lua.c
++++ b/drivers/hid/hid-roccat-lua.c
+@@ -160,6 +160,9 @@ static int lua_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c
+index 989927defe8db..4fcc8e7d276f2 100644
+--- a/drivers/hid/hid-roccat-pyra.c
++++ b/drivers/hid/hid-roccat-pyra.c
+@@ -449,6 +449,9 @@ static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-ryos.c b/drivers/hid/hid-roccat-ryos.c
+index 3956a6c9c5217..5bf1971a2b14d 100644
+--- a/drivers/hid/hid-roccat-ryos.c
++++ b/drivers/hid/hid-roccat-ryos.c
+@@ -141,6 +141,9 @@ static int ryos_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat-savu.c b/drivers/hid/hid-roccat-savu.c
+index 818701f7a0281..a784bb4ee6512 100644
+--- a/drivers/hid/hid-roccat-savu.c
++++ b/drivers/hid/hid-roccat-savu.c
+@@ -113,6 +113,9 @@ static int savu_probe(struct hid_device *hdev,
+ {
+ int retval;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ retval = hid_parse(hdev);
+ if (retval) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c
+index 26373b82fe812..6da80e442fdd1 100644
+--- a/drivers/hid/hid-roccat.c
++++ b/drivers/hid/hid-roccat.c
+@@ -257,6 +257,8 @@ int roccat_report_event(int minor, u8 const *data)
+ if (!new_value)
+ return -ENOMEM;
+
++ mutex_lock(&device->cbuf_lock);
++
+ report = &device->cbuf[device->cbuf_end];
+
+ /* passing NULL is safe */
+@@ -276,6 +278,8 @@ int roccat_report_event(int minor, u8 const *data)
+ reader->cbuf_start = (reader->cbuf_start + 1) % ROCCAT_CBUF_SIZE;
+ }
+
++ mutex_unlock(&device->cbuf_lock);
++
+ wake_up_interruptible(&device->wait);
+ return 0;
+ }
+diff --git a/drivers/hid/hid-saitek.c b/drivers/hid/hid-saitek.c
+index c7bf14c019605..b84e975977c42 100644
+--- a/drivers/hid/hid-saitek.c
++++ b/drivers/hid/hid-saitek.c
+@@ -187,6 +187,8 @@ static const struct hid_device_id saitek_devices[] = {
+ .driver_data = SAITEK_RELEASE_MODE_RAT7 },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7),
+ .driver_data = SAITEK_RELEASE_MODE_MMO7 },
++ { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7),
++ .driver_data = SAITEK_RELEASE_MODE_MMO7 },
+ { }
+ };
+
+diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c
+index 2e1c31156eca0..cf5992e970940 100644
+--- a/drivers/hid/hid-samsung.c
++++ b/drivers/hid/hid-samsung.c
+@@ -152,6 +152,9 @@ static int samsung_probe(struct hid_device *hdev,
+ int ret;
+ unsigned int cmask = HID_CONNECT_DEFAULT;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ ret = hid_parse(hdev);
+ if (ret) {
+ hid_err(hdev, "parse failed\n");
+diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
+index 32c2306e240d6..602465ad27458 100644
+--- a/drivers/hid/hid-sensor-custom.c
++++ b/drivers/hid/hid-sensor-custom.c
+@@ -62,7 +62,7 @@ struct hid_sensor_sample {
+ u32 raw_len;
+ } __packed;
+
+-static struct attribute hid_custom_attrs[] = {
++static struct attribute hid_custom_attrs[HID_CUSTOM_TOTAL_ATTRS] = {
+ {.name = "name", .mode = S_IRUGO},
+ {.name = "units", .mode = S_IRUGO},
+ {.name = "unit-expo", .mode = S_IRUGO},
+diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
+index d1b107d547f54..60ec2b29d54de 100644
+--- a/drivers/hid/hid-sony.c
++++ b/drivers/hid/hid-sony.c
+@@ -3000,7 +3000,6 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ sc->quirks = quirks;
+ hid_set_drvdata(hdev, sc);
+ sc->hdev = hdev;
+- usbdev = to_usb_device(sc->hdev->dev.parent->parent);
+
+ ret = hid_parse(hdev);
+ if (ret) {
+@@ -3038,14 +3037,23 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ */
+ if (!(hdev->claimed & HID_CLAIMED_INPUT)) {
+ hid_err(hdev, "failed to claim input\n");
+- hid_hw_stop(hdev);
+- return -ENODEV;
++ ret = -ENODEV;
++ goto err;
+ }
+
+ if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) {
++ if (!hid_is_usb(hdev)) {
++ ret = -EINVAL;
++ goto err;
++ }
++
++ usbdev = to_usb_device(sc->hdev->dev.parent->parent);
++
+ sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC);
+- if (!sc->ghl_urb)
+- return -ENOMEM;
++ if (!sc->ghl_urb) {
++ ret = -ENOMEM;
++ goto err;
++ }
+
+ if (sc->quirks & GHL_GUITAR_PS3WIIU)
+ ret = ghl_init_urb(sc, usbdev, ghl_ps3wiiu_magic_data,
+@@ -3055,7 +3063,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ ARRAY_SIZE(ghl_ps4_magic_data));
+ if (ret) {
+ hid_err(hdev, "error preparing URB\n");
+- return ret;
++ goto err;
+ }
+
+ timer_setup(&sc->ghl_poke_timer, ghl_magic_poke, 0);
+@@ -3064,6 +3072,10 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
+ }
+
+ return ret;
++
++err:
++ hid_hw_stop(hdev);
++ return ret;
+ }
+
+ static void sony_remove(struct hid_device *hdev)
+diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
+index a3b151b29bd71..fc616db4231bb 100644
+--- a/drivers/hid/hid-steam.c
++++ b/drivers/hid/hid-steam.c
+@@ -134,6 +134,11 @@ static int steam_recv_report(struct steam_device *steam,
+ int ret;
+
+ r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0];
++ if (!r) {
++ hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n");
++ return -EINVAL;
++ }
++
+ if (hid_report_len(r) < 64)
+ return -EINVAL;
+
+@@ -165,6 +170,11 @@ static int steam_send_report(struct steam_device *steam,
+ int ret;
+
+ r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0];
++ if (!r) {
++ hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n");
++ return -EINVAL;
++ }
++
+ if (hid_report_len(r) < 64)
+ return -EINVAL;
+
+diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
+index d44550aa88057..2221bc26e611a 100644
+--- a/drivers/hid/hid-thrustmaster.c
++++ b/drivers/hid/hid-thrustmaster.c
+@@ -64,13 +64,16 @@ struct tm_wheel_info {
+ */
+ static const struct tm_wheel_info tm_wheels_infos[] = {
+ {0x0306, 0x0006, "Thrustmaster T150RS"},
++ {0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"},
+ {0x0206, 0x0005, "Thrustmaster T300RS"},
++ {0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"},
++ {0x020a, 0x0005, "Thrustmaster T300RS (Sparco R383 Mod)"},
+ {0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"},
+ {0x0002, 0x0002, "Thrustmaster T500RS"}
+ //{0x0407, 0x0001, "Thrustmaster TMX"}
+ };
+
+-static const uint8_t tm_wheels_infos_length = 4;
++static const uint8_t tm_wheels_infos_length = 7;
+
+ /*
+ * This structs contains (in little endian) the response data
+@@ -158,6 +161,12 @@ static void thrustmaster_interrupts(struct hid_device *hdev)
+ return;
+ }
+
++ if (usbif->cur_altsetting->desc.bNumEndpoints < 2) {
++ kfree(send_buf);
++ hid_err(hdev, "Wrong number of endpoints?\n");
++ return;
++ }
++
+ ep = &usbif->cur_altsetting->endpoint[1];
+ b_ep = ep->desc.bEndpointAddress;
+
+@@ -274,6 +283,9 @@ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_i
+ int ret = 0;
+ struct tm_wheel *tm_wheel = 0;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ ret = hid_parse(hdev);
+ if (ret) {
+ hid_err(hdev, "parse failed with error %d\n", ret);
+diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c
+index d70cd3d7f583b..ac3fd870673d2 100644
+--- a/drivers/hid/hid-u2fzero.c
++++ b/drivers/hid/hid-u2fzero.c
+@@ -132,7 +132,7 @@ static int u2fzero_recv(struct u2fzero_device *dev,
+
+ ret = (wait_for_completion_timeout(
+ &ctx.done, msecs_to_jiffies(USB_CTRL_SET_TIMEOUT)));
+- if (ret < 0) {
++ if (ret == 0) {
+ usb_kill_urb(dev->urb);
+ hid_err(hdev, "urb submission timed out");
+ } else {
+@@ -191,6 +191,8 @@ static int u2fzero_rng_read(struct hwrng *rng, void *data,
+ struct u2f_hid_msg resp;
+ int ret;
+ size_t actual_length;
++ /* valid packets must have a correct header */
++ int min_length = offsetof(struct u2f_hid_msg, init.data);
+
+ if (!dev->present) {
+ hid_dbg(dev->hdev, "device not present");
+@@ -200,12 +202,12 @@ static int u2fzero_rng_read(struct hwrng *rng, void *data,
+ ret = u2fzero_recv(dev, &req, &resp);
+
+ /* ignore errors or packets without data */
+- if (ret < offsetof(struct u2f_hid_msg, init.data))
++ if (ret < min_length)
+ return 0;
+
+ /* only take the minimum amount of data it is safe to take */
+- actual_length = min3((size_t)ret - offsetof(struct u2f_hid_msg,
+- init.data), U2F_HID_MSG_LEN(resp), max);
++ actual_length = min3((size_t)ret - min_length,
++ U2F_HID_MSG_LEN(resp), max);
+
+ memcpy(data, resp.init.data, actual_length);
+
+@@ -288,7 +290,7 @@ static int u2fzero_probe(struct hid_device *hdev,
+ unsigned int minor;
+ int ret;
+
+- if (!hid_is_using_ll_driver(hdev, &usb_hid_driver))
++ if (!hid_is_usb(hdev))
+ return -EINVAL;
+
+ dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL);
+diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
+index 6a9865dd703c0..785d81d61ba48 100644
+--- a/drivers/hid/hid-uclogic-core.c
++++ b/drivers/hid/hid-uclogic-core.c
+@@ -164,11 +164,15 @@ static int uclogic_probe(struct hid_device *hdev,
+ struct uclogic_drvdata *drvdata = NULL;
+ bool params_initialized = false;
+
++ if (!hid_is_usb(hdev))
++ return -EINVAL;
++
+ /*
+ * libinput requires the pad interface to be on a different node
+ * than the pen, so use QUIRK_MULTI_INPUT for all tablets.
+ */
+ hdev->quirks |= HID_QUIRK_MULTI_INPUT;
++ hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE;
+
+ /* Allocate and assign driver data */
+ drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL);
+diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c
+index 3d67b748a3b95..3e70f969fb849 100644
+--- a/drivers/hid/hid-uclogic-params.c
++++ b/drivers/hid/hid-uclogic-params.c
+@@ -66,7 +66,7 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev,
+ __u8 idx, size_t len)
+ {
+ int rc;
+- struct usb_device *udev = hid_to_usb_dev(hdev);
++ struct usb_device *udev;
+ __u8 *buf = NULL;
+
+ /* Check arguments */
+@@ -75,6 +75,8 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev,
+ goto cleanup;
+ }
+
++ udev = hid_to_usb_dev(hdev);
++
+ buf = kmalloc(len, GFP_KERNEL);
+ if (buf == NULL) {
+ rc = -ENOMEM;
+@@ -450,7 +452,7 @@ static int uclogic_params_frame_init_v1_buttonpad(
+ {
+ int rc;
+ bool found = false;
+- struct usb_device *usb_dev = hid_to_usb_dev(hdev);
++ struct usb_device *usb_dev;
+ char *str_buf = NULL;
+ const size_t str_len = 16;
+
+@@ -460,6 +462,8 @@ static int uclogic_params_frame_init_v1_buttonpad(
+ goto cleanup;
+ }
+
++ usb_dev = hid_to_usb_dev(hdev);
++
+ /*
+ * Enable generic button mode
+ */
+@@ -707,9 +711,9 @@ static int uclogic_params_huion_init(struct uclogic_params *params,
+ struct hid_device *hdev)
+ {
+ int rc;
+- struct usb_device *udev = hid_to_usb_dev(hdev);
+- struct usb_interface *iface = to_usb_interface(hdev->dev.parent);
+- __u8 bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;
++ struct usb_device *udev;
++ struct usb_interface *iface;
++ __u8 bInterfaceNumber;
+ bool found;
+ /* The resulting parameters (noop) */
+ struct uclogic_params p = {0, };
+@@ -723,6 +727,10 @@ static int uclogic_params_huion_init(struct uclogic_params *params,
+ goto cleanup;
+ }
+
++ udev = hid_to_usb_dev(hdev);
++ iface = to_usb_interface(hdev->dev.parent);
++ bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;
++
+ /* If it's not a pen interface */
+ if (bInterfaceNumber != 0) {
+ /* TODO: Consider marking the interface invalid */
+@@ -834,21 +842,25 @@ int uclogic_params_init(struct uclogic_params *params,
+ struct hid_device *hdev)
+ {
+ int rc;
+- struct usb_device *udev = hid_to_usb_dev(hdev);
+- __u8 bNumInterfaces = udev->config->desc.bNumInterfaces;
+- struct usb_interface *iface = to_usb_interface(hdev->dev.parent);
+- __u8 bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;
++ struct usb_device *udev;
++ __u8 bNumInterfaces;
++ struct usb_interface *iface;
++ __u8 bInterfaceNumber;
+ bool found;
+ /* The resulting parameters (noop) */
+ struct uclogic_params p = {0, };
+
+ /* Check arguments */
+- if (params == NULL || hdev == NULL ||
+- !hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
++ if (params == NULL || hdev == NULL || !hid_is_usb(hdev)) {
+ rc = -EINVAL;
+ goto cleanup;
+ }
+
++ udev = hid_to_usb_dev(hdev);
++ bNumInterfaces = udev->config->desc.bNumInterfaces;
++ iface = to_usb_interface(hdev->dev.parent);
++ bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;
++
+ /*
+ * Set replacement report descriptor if the original matches the
+ * specified size. Otherwise keep interface unchanged.
+diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c
+index cd7ada48b1d9f..d57ec17670379 100644
+--- a/drivers/hid/hid-vivaldi.c
++++ b/drivers/hid/hid-vivaldi.c
+@@ -57,6 +57,9 @@ static int vivaldi_probe(struct hid_device *hdev,
+ int ret;
+
+ drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL);
++ if (!drvdata)
++ return -ENOMEM;
++
+ hid_set_drvdata(hdev, drvdata);
+
+ ret = hid_parse(hdev);
+@@ -71,10 +74,11 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
+ struct hid_usage *usage)
+ {
+ struct vivaldi_data *drvdata = hid_get_drvdata(hdev);
++ struct hid_report *report = field->report;
+ int fn_key;
+ int ret;
+ u32 report_len;
+- u8 *buf;
++ u8 *report_data, *buf;
+
+ if (field->logical != HID_USAGE_FN_ROW_PHYSMAP ||
+ (usage->hid & HID_USAGE_PAGE) != HID_UP_ORDINAL)
+@@ -86,12 +90,24 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
+ if (fn_key > drvdata->max_function_row_key)
+ drvdata->max_function_row_key = fn_key;
+
+- buf = hid_alloc_report_buf(field->report, GFP_KERNEL);
+- if (!buf)
++ report_data = buf = hid_alloc_report_buf(report, GFP_KERNEL);
++ if (!report_data)
+ return;
+
+- report_len = hid_report_len(field->report);
+- ret = hid_hw_raw_request(hdev, field->report->id, buf,
++ report_len = hid_report_len(report);
++ if (!report->id) {
++ /*
++ * hid_hw_raw_request() will stuff report ID (which will be 0)
++ * into the first byte of the buffer even for unnumbered
++ * reports, so we need to account for this to avoid getting
++ * -EOVERFLOW in return.
++ * Note that hid_alloc_report_buf() adds 7 bytes to the size
++ * so we can safely say that we have space for an extra byte.
++ */
++ report_len++;
++ }
++
++ ret = hid_hw_raw_request(hdev, report->id, report_data,
+ report_len, HID_FEATURE_REPORT,
+ HID_REQ_GET_REPORT);
+ if (ret < 0) {
+@@ -100,7 +116,16 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
+ goto out;
+ }
+
+- ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf,
++ if (!report->id) {
++ /*
++ * Undo the damage from hid_hw_raw_request() for unnumbered
++ * reports.
++ */
++ report_data++;
++ report_len--;
++ }
++
++ ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data,
+ report_len, 0);
+ if (ret) {
+ dev_warn(&hdev->dev, "failed to report feature %d\n",
+@@ -118,7 +143,7 @@ out:
+ static int vivaldi_input_configured(struct hid_device *hdev,
+ struct hid_input *hidinput)
+ {
+- return sysfs_create_group(&hdev->dev.kobj, &input_attribute_group);
++ return devm_device_add_group(&hdev->dev, &input_attribute_group);
+ }
+
+ static const struct hid_device_id vivaldi_table[] = {
+diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
+index 79faac87a06ff..11b0ed4f3f8cc 100644
+--- a/drivers/hid/hidraw.c
++++ b/drivers/hid/hidraw.c
+@@ -346,10 +346,13 @@ static int hidraw_release(struct inode * inode, struct file * file)
+ unsigned int minor = iminor(inode);
+ struct hidraw_list *list = file->private_data;
+ unsigned long flags;
++ int i;
+
+ mutex_lock(&minors_lock);
+
+ spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags);
++ for (i = list->tail; i < list->head; i++)
++ kfree(list->buffer[i].value);
+ list_del(&list->node);
+ spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags);
+ kfree(list);
+diff --git a/drivers/hid/i2c-hid/i2c-hid-acpi.c b/drivers/hid/i2c-hid/i2c-hid-acpi.c
+index a6f0257a26de3..b96ae15e0ad91 100644
+--- a/drivers/hid/i2c-hid/i2c-hid-acpi.c
++++ b/drivers/hid/i2c-hid/i2c-hid-acpi.c
+@@ -111,7 +111,7 @@ static int i2c_hid_acpi_probe(struct i2c_client *client)
+ }
+
+ return i2c_hid_core_probe(client, &ihid_acpi->ops,
+- hid_descriptor_address);
++ hid_descriptor_address, 0);
+ }
+
+ static const struct acpi_device_id i2c_hid_acpi_match[] = {
+diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
+index 517141138b007..7c61bb9291e4e 100644
+--- a/drivers/hid/i2c-hid/i2c-hid-core.c
++++ b/drivers/hid/i2c-hid/i2c-hid-core.c
+@@ -615,6 +615,17 @@ static int i2c_hid_get_raw_report(struct hid_device *hid,
+ if (report_type == HID_OUTPUT_REPORT)
+ return -EINVAL;
+
++ /*
++ * In case of unnumbered reports the response from the device will
++ * not have the report ID that the upper layers expect, so we need
++ * to stash it the buffer ourselves and adjust the data size.
++ */
++ if (!report_number) {
++ buf[0] = 0;
++ buf++;
++ count--;
++ }
++
+ /* +2 bytes to include the size of the reply in the query buffer */
+ ask_count = min(count + 2, (size_t)ihid->bufsize);
+
+@@ -636,6 +647,9 @@ static int i2c_hid_get_raw_report(struct hid_device *hid,
+ count = min(count, ret_count - 2);
+ memcpy(buf, ihid->rawbuf + 2, count);
+
++ if (!report_number)
++ count++;
++
+ return count;
+ }
+
+@@ -652,17 +666,19 @@ static int i2c_hid_output_raw_report(struct hid_device *hid, __u8 *buf,
+
+ mutex_lock(&ihid->reset_lock);
+
+- if (report_id) {
+- buf++;
+- count--;
+- }
+-
++ /*
++ * Note that both numbered and unnumbered reports passed here
++ * are supposed to have report ID stored in the 1st byte of the
++ * buffer, so we strip it off unconditionally before passing payload
++ * to i2c_hid_set_or_send_report which takes care of encoding
++ * everything properly.
++ */
+ ret = i2c_hid_set_or_send_report(client,
+ report_type == HID_FEATURE_REPORT ? 0x03 : 0x02,
+- report_id, buf, count, use_data);
++ report_id, buf + 1, count - 1, use_data);
+
+- if (report_id && ret >= 0)
+- ret++; /* add report_id to the number of transfered bytes */
++ if (ret >= 0)
++ ret++; /* add report_id to the number of transferred bytes */
+
+ mutex_unlock(&ihid->reset_lock);
+
+@@ -912,7 +928,7 @@ static void i2c_hid_core_shutdown_tail(struct i2c_hid *ihid)
+ }
+
+ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops,
+- u16 hid_descriptor_address)
++ u16 hid_descriptor_address, u32 quirks)
+ {
+ int ret;
+ struct i2c_hid *ihid;
+@@ -996,6 +1012,10 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops,
+ hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
+ hid->product = le16_to_cpu(ihid->hdesc.wProductID);
+
++ hid->initial_quirks = quirks;
++ hid->initial_quirks |= i2c_hid_get_dmi_quirks(hid->vendor,
++ hid->product);
++
+ snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X",
+ client->name, (u16)hid->vendor, (u16)hid->product);
+ strlcpy(hid->phys, dev_name(&client->dev), sizeof(hid->phys));
+diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
+index 8e0f67455c098..210f17c3a0be0 100644
+--- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
++++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
+@@ -10,8 +10,10 @@
+ #include <linux/types.h>
+ #include <linux/dmi.h>
+ #include <linux/mod_devicetable.h>
++#include <linux/hid.h>
+
+ #include "i2c-hid.h"
++#include "../hid-ids.h"
+
+
+ struct i2c_hid_desc_override {
+@@ -416,6 +418,28 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = {
+ { } /* Terminate list */
+ };
+
++static const struct hid_device_id i2c_hid_elan_flipped_quirks = {
++ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ELAN, 0x2dcd),
++ HID_QUIRK_X_INVERT | HID_QUIRK_Y_INVERT
++};
++
++/*
++ * This list contains devices which have specific issues based on the system
++ * they're on and not just the device itself. The driver_data will have a
++ * specific hid device to match against.
++ */
++static const struct dmi_system_id i2c_hid_dmi_quirk_table[] = {
++ {
++ .ident = "DynaBook K50/FR",
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dynabook Inc."),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "dynabook K50/FR"),
++ },
++ .driver_data = (void *)&i2c_hid_elan_flipped_quirks,
++ },
++ { } /* Terminate list */
++};
++
+
+ struct i2c_hid_desc *i2c_hid_get_dmi_i2c_hid_desc_override(uint8_t *i2c_name)
+ {
+@@ -450,3 +474,21 @@ char *i2c_hid_get_dmi_hid_report_desc_override(uint8_t *i2c_name,
+ *size = override->hid_report_desc_size;
+ return override->hid_report_desc;
+ }
++
++u32 i2c_hid_get_dmi_quirks(const u16 vendor, const u16 product)
++{
++ u32 quirks = 0;
++ const struct dmi_system_id *system_id =
++ dmi_first_match(i2c_hid_dmi_quirk_table);
++
++ if (system_id) {
++ const struct hid_device_id *device_id =
++ (struct hid_device_id *)(system_id->driver_data);
++
++ if (device_id && device_id->vendor == vendor &&
++ device_id->product == product)
++ quirks = device_id->driver_data;
++ }
++
++ return quirks;
++}
+diff --git a/drivers/hid/i2c-hid/i2c-hid-of-goodix.c b/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
+index 52674149a2750..ec6c73f75ffe0 100644
+--- a/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
++++ b/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
+@@ -27,7 +27,6 @@ struct i2c_hid_of_goodix {
+
+ struct regulator *vdd;
+ struct notifier_block nb;
+- struct mutex regulator_mutex;
+ struct gpio_desc *reset_gpio;
+ const struct goodix_i2c_hid_timing_data *timings;
+ };
+@@ -67,8 +66,6 @@ static int ihid_goodix_vdd_notify(struct notifier_block *nb,
+ container_of(nb, struct i2c_hid_of_goodix, nb);
+ int ret = NOTIFY_OK;
+
+- mutex_lock(&ihid_goodix->regulator_mutex);
+-
+ switch (event) {
+ case REGULATOR_EVENT_PRE_DISABLE:
+ gpiod_set_value_cansleep(ihid_goodix->reset_gpio, 1);
+@@ -87,8 +84,6 @@ static int ihid_goodix_vdd_notify(struct notifier_block *nb,
+ break;
+ }
+
+- mutex_unlock(&ihid_goodix->regulator_mutex);
+-
+ return ret;
+ }
+
+@@ -102,8 +97,6 @@ static int i2c_hid_of_goodix_probe(struct i2c_client *client,
+ if (!ihid_goodix)
+ return -ENOMEM;
+
+- mutex_init(&ihid_goodix->regulator_mutex);
+-
+ ihid_goodix->ops.power_up = goodix_i2c_hid_power_up;
+ ihid_goodix->ops.power_down = goodix_i2c_hid_power_down;
+
+@@ -130,27 +123,30 @@ static int i2c_hid_of_goodix_probe(struct i2c_client *client,
+ * long. Holding the controller in reset apparently draws extra
+ * power.
+ */
+- mutex_lock(&ihid_goodix->regulator_mutex);
+ ihid_goodix->nb.notifier_call = ihid_goodix_vdd_notify;
+ ret = devm_regulator_register_notifier(ihid_goodix->vdd, &ihid_goodix->nb);
+- if (ret) {
+- mutex_unlock(&ihid_goodix->regulator_mutex);
++ if (ret)
+ return dev_err_probe(&client->dev, ret,
+ "regulator notifier request failed\n");
+- }
+
+ /*
+ * If someone else is holding the regulator on (or the regulator is
+ * an always-on one) we might never be told to deassert reset. Do it
+- * now. Here we'll assume that someone else might have _just
+- * barely_ turned the regulator on so we'll do the full
+- * "post_power_delay" just in case.
++ * now... and temporarily bump the regulator reference count just to
++ * make sure it is impossible for this to race with our own notifier!
++ * We also assume that someone else might have _just barely_ turned
++ * the regulator on so we'll do the full "post_power_delay" just in
++ * case.
+ */
+- if (ihid_goodix->reset_gpio && regulator_is_enabled(ihid_goodix->vdd))
++ if (ihid_goodix->reset_gpio && regulator_is_enabled(ihid_goodix->vdd)) {
++ ret = regulator_enable(ihid_goodix->vdd);
++ if (ret)
++ return ret;
+ goodix_i2c_hid_deassert_reset(ihid_goodix, true);
+- mutex_unlock(&ihid_goodix->regulator_mutex);
++ regulator_disable(ihid_goodix->vdd);
++ }
+
+- return i2c_hid_core_probe(client, &ihid_goodix->ops, 0x0001);
++ return i2c_hid_core_probe(client, &ihid_goodix->ops, 0x0001, 0);
+ }
+
+ static const struct goodix_i2c_hid_timing_data goodix_gt7375p_timing_data = {
+diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c
+index 4bf7cea926379..97a27a803f58d 100644
+--- a/drivers/hid/i2c-hid/i2c-hid-of.c
++++ b/drivers/hid/i2c-hid/i2c-hid-of.c
+@@ -21,6 +21,7 @@
+
+ #include <linux/delay.h>
+ #include <linux/device.h>
++#include <linux/hid.h>
+ #include <linux/i2c.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+@@ -71,6 +72,7 @@ static int i2c_hid_of_probe(struct i2c_client *client,
+ struct device *dev = &client->dev;
+ struct i2c_hid_of *ihid_of;
+ u16 hid_descriptor_address;
++ u32 quirks = 0;
+ int ret;
+ u32 val;
+
+@@ -105,8 +107,14 @@ static int i2c_hid_of_probe(struct i2c_client *client,
+ if (ret)
+ return ret;
+
++ if (device_property_read_bool(dev, "touchscreen-inverted-x"))
++ quirks |= HID_QUIRK_X_INVERT;
++
++ if (device_property_read_bool(dev, "touchscreen-inverted-y"))
++ quirks |= HID_QUIRK_Y_INVERT;
++
+ return i2c_hid_core_probe(client, &ihid_of->ops,
+- hid_descriptor_address);
++ hid_descriptor_address, quirks);
+ }
+
+ static const struct of_device_id i2c_hid_of_match[] = {
+diff --git a/drivers/hid/i2c-hid/i2c-hid.h b/drivers/hid/i2c-hid/i2c-hid.h
+index 05a7827d211af..7b93b6c21f126 100644
+--- a/drivers/hid/i2c-hid/i2c-hid.h
++++ b/drivers/hid/i2c-hid/i2c-hid.h
+@@ -9,6 +9,7 @@
+ struct i2c_hid_desc *i2c_hid_get_dmi_i2c_hid_desc_override(uint8_t *i2c_name);
+ char *i2c_hid_get_dmi_hid_report_desc_override(uint8_t *i2c_name,
+ unsigned int *size);
++u32 i2c_hid_get_dmi_quirks(const u16 vendor, const u16 product);
+ #else
+ static inline struct i2c_hid_desc
+ *i2c_hid_get_dmi_i2c_hid_desc_override(uint8_t *i2c_name)
+@@ -16,6 +17,8 @@ static inline struct i2c_hid_desc
+ static inline char *i2c_hid_get_dmi_hid_report_desc_override(uint8_t *i2c_name,
+ unsigned int *size)
+ { return NULL; }
++static inline u32 i2c_hid_get_dmi_quirks(const u16 vendor, const u16 product)
++{ return 0; }
+ #endif
+
+ /**
+@@ -32,7 +35,7 @@ struct i2chid_ops {
+ };
+
+ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops,
+- u16 hid_descriptor_address);
++ u16 hid_descriptor_address, u32 quirks);
+ int i2c_hid_core_remove(struct i2c_client *client);
+
+ void i2c_hid_core_shutdown(struct i2c_client *client);
+diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
+index 45e0c7b1c9ec6..6c942dd1abca2 100644
+--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
++++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
+@@ -5,6 +5,7 @@
+ * Copyright (c) 2014-2016, Intel Corporation.
+ */
+
++#include <linux/devm-helpers.h>
+ #include <linux/sched.h>
+ #include <linux/spinlock.h>
+ #include <linux/delay.h>
+@@ -621,7 +622,6 @@ static void recv_ipc(struct ishtp_device *dev, uint32_t doorbell_val)
+ case MNG_RESET_NOTIFY:
+ if (!ishtp_dev) {
+ ishtp_dev = dev;
+- INIT_WORK(&fw_reset_work, fw_reset_work_fn);
+ }
+ schedule_work(&fw_reset_work);
+ break;
+@@ -936,6 +936,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev)
+ {
+ struct ishtp_device *dev;
+ int i;
++ int ret;
+
+ dev = devm_kzalloc(&pdev->dev,
+ sizeof(struct ishtp_device) + sizeof(struct ish_hw),
+@@ -971,6 +972,12 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev)
+ list_add_tail(&tx_buf->link, &dev->wr_free_list);
+ }
+
++ ret = devm_work_autocancel(&pdev->dev, &fw_reset_work, fw_reset_work_fn);
++ if (ret) {
++ dev_err(dev->devc, "Failed to initialise FW reset work\n");
++ return NULL;
++ }
++
+ dev->ops = &ish_hw_ops;
+ dev->devc = &pdev->dev;
+ dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr);
+diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+index 1c5039081db27..8e9d9450cb835 100644
+--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
++++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+@@ -266,7 +266,8 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work)
+
+ if (ish_should_leave_d0i3(pdev) && !dev->suspend_flag
+ && IPC_IS_ISH_ILUP(fwsts)) {
+- disable_irq_wake(pdev->irq);
++ if (device_may_wakeup(&pdev->dev))
++ disable_irq_wake(pdev->irq);
+
+ ish_set_host_ready(dev);
+
+@@ -337,7 +338,8 @@ static int __maybe_unused ish_suspend(struct device *device)
+ */
+ pci_save_state(pdev);
+
+- enable_irq_wake(pdev->irq);
++ if (device_may_wakeup(&pdev->dev))
++ enable_irq_wake(pdev->irq);
+ }
+ } else {
+ /*
+diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+index 1b486f2627477..6b511fadf7ad2 100644
+--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
++++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+@@ -657,21 +657,12 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
+ */
+ payload_max_size &= ~(L1_CACHE_BYTES - 1);
+
+- dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32);
++ dma_buf = dma_alloc_coherent(devc, payload_max_size, &dma_buf_phy, GFP_KERNEL);
+ if (!dma_buf) {
+ client_data->flag_retry = true;
+ return -ENOMEM;
+ }
+
+- dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size,
+- DMA_TO_DEVICE);
+- if (dma_mapping_error(devc, dma_buf_phy)) {
+- dev_err(cl_data_to_dev(client_data), "DMA map failed\n");
+- client_data->flag_retry = true;
+- rv = -ENOMEM;
+- goto end_err_dma_buf_release;
+- }
+-
+ ldr_xfer_dma_frag.fragment.hdr.command = LOADER_CMD_XFER_FRAGMENT;
+ ldr_xfer_dma_frag.fragment.xfer_mode = LOADER_XFER_MODE_DIRECT_DMA;
+ ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
+@@ -691,14 +682,7 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
+ ldr_xfer_dma_frag.fragment.size = fragment_size;
+ memcpy(dma_buf, &fw->data[fragment_offset], fragment_size);
+
+- dma_sync_single_for_device(devc, dma_buf_phy,
+- payload_max_size,
+- DMA_TO_DEVICE);
+-
+- /*
+- * Flush cache here because the dma_sync_single_for_device()
+- * does not do for x86.
+- */
++ /* Flush cache to be sure the data is in main memory. */
+ clflush_cache_range(dma_buf, payload_max_size);
+
+ dev_dbg(cl_data_to_dev(client_data),
+@@ -721,15 +705,8 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
+ fragment_offset += fragment_size;
+ }
+
+- dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE);
+- kfree(dma_buf);
+- return 0;
+-
+ end_err_resp_buf_release:
+- /* Free ISH buffer if not done already, in error case */
+- dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE);
+-end_err_dma_buf_release:
+- kfree(dma_buf);
++ dma_free_coherent(devc, payload_max_size, dma_buf, dma_buf_phy);
+ return rv;
+ }
+
+diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h
+index 6a5cc11aefd89..35dddc5015b37 100644
+--- a/drivers/hid/intel-ish-hid/ishtp-hid.h
++++ b/drivers/hid/intel-ish-hid/ishtp-hid.h
+@@ -105,7 +105,7 @@ struct report_list {
+ * @multi_packet_cnt: Count of fragmented packet count
+ *
+ * This structure is used to store completion flags and per client data like
+- * like report description, number of HID devices etc.
++ * report description, number of HID devices etc.
+ */
+ struct ishtp_cl_data {
+ /* completion flags */
+diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c
+index 405e0d5212cc8..df0a825694f52 100644
+--- a/drivers/hid/intel-ish-hid/ishtp/client.c
++++ b/drivers/hid/intel-ish-hid/ishtp/client.c
+@@ -626,13 +626,14 @@ static void ishtp_cl_read_complete(struct ishtp_cl_rb *rb)
+ }
+
+ /**
+- * ipc_tx_callback() - IPC tx callback function
++ * ipc_tx_send() - IPC tx send function
+ * @prm: Pointer to client device instance
+ *
+- * Send message over IPC either first time or on callback on previous message
+- * completion
++ * Send message over IPC. Message will be split into fragments
++ * if message size is bigger than IPC FIFO size, and all
++ * fragments will be sent one by one.
+ */
+-static void ipc_tx_callback(void *prm)
++static void ipc_tx_send(void *prm)
+ {
+ struct ishtp_cl *cl = prm;
+ struct ishtp_cl_tx_ring *cl_msg;
+@@ -677,32 +678,41 @@ static void ipc_tx_callback(void *prm)
+ list);
+ rem = cl_msg->send_buf.size - cl->tx_offs;
+
+- ishtp_hdr.host_addr = cl->host_client_id;
+- ishtp_hdr.fw_addr = cl->fw_client_id;
+- ishtp_hdr.reserved = 0;
+- pmsg = cl_msg->send_buf.data + cl->tx_offs;
++ while (rem > 0) {
++ ishtp_hdr.host_addr = cl->host_client_id;
++ ishtp_hdr.fw_addr = cl->fw_client_id;
++ ishtp_hdr.reserved = 0;
++ pmsg = cl_msg->send_buf.data + cl->tx_offs;
++
++ if (rem <= dev->mtu) {
++ /* Last fragment or only one packet */
++ ishtp_hdr.length = rem;
++ ishtp_hdr.msg_complete = 1;
++ /* Submit to IPC queue with no callback */
++ ishtp_write_message(dev, &ishtp_hdr, pmsg);
++ cl->tx_offs = 0;
++ cl->sending = 0;
+
+- if (rem <= dev->mtu) {
+- ishtp_hdr.length = rem;
+- ishtp_hdr.msg_complete = 1;
+- cl->sending = 0;
+- list_del_init(&cl_msg->list); /* Must be before write */
+- spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags);
+- /* Submit to IPC queue with no callback */
+- ishtp_write_message(dev, &ishtp_hdr, pmsg);
+- spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags);
+- list_add_tail(&cl_msg->list, &cl->tx_free_list.list);
+- ++cl->tx_ring_free_size;
+- spin_unlock_irqrestore(&cl->tx_free_list_spinlock,
+- tx_free_flags);
+- } else {
+- /* Send IPC fragment */
+- spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags);
+- cl->tx_offs += dev->mtu;
+- ishtp_hdr.length = dev->mtu;
+- ishtp_hdr.msg_complete = 0;
+- ishtp_send_msg(dev, &ishtp_hdr, pmsg, ipc_tx_callback, cl);
++ break;
++ } else {
++ /* Send ipc fragment */
++ ishtp_hdr.length = dev->mtu;
++ ishtp_hdr.msg_complete = 0;
++ /* All fregments submitted to IPC queue with no callback */
++ ishtp_write_message(dev, &ishtp_hdr, pmsg);
++ cl->tx_offs += dev->mtu;
++ rem = cl_msg->send_buf.size - cl->tx_offs;
++ }
+ }
++
++ list_del_init(&cl_msg->list);
++ spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags);
++
++ spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags);
++ list_add_tail(&cl_msg->list, &cl->tx_free_list.list);
++ ++cl->tx_ring_free_size;
++ spin_unlock_irqrestore(&cl->tx_free_list_spinlock,
++ tx_free_flags);
+ }
+
+ /**
+@@ -720,7 +730,7 @@ static void ishtp_cl_send_msg_ipc(struct ishtp_device *dev,
+ return;
+
+ cl->tx_offs = 0;
+- ipc_tx_callback(cl);
++ ipc_tx_send(cl);
+ ++cl->send_msg_cnt_ipc;
+ }
+
+diff --git a/drivers/hid/intel-ish-hid/ishtp/dma-if.c b/drivers/hid/intel-ish-hid/ishtp/dma-if.c
+index 40554c8daca07..00046cbfd4ed0 100644
+--- a/drivers/hid/intel-ish-hid/ishtp/dma-if.c
++++ b/drivers/hid/intel-ish-hid/ishtp/dma-if.c
+@@ -104,6 +104,11 @@ void *ishtp_cl_get_dma_send_buf(struct ishtp_device *dev,
+ int required_slots = (size / DMA_SLOT_SIZE)
+ + 1 * (size % DMA_SLOT_SIZE != 0);
+
++ if (!dev->ishtp_dma_tx_map) {
++ dev_err(dev->devc, "Fail to allocate Tx map\n");
++ return NULL;
++ }
++
+ spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags);
+ for (i = 0; i <= (dev->ishtp_dma_num_slots - required_slots); i++) {
+ free = 1;
+@@ -150,6 +155,11 @@ void ishtp_cl_release_dma_acked_mem(struct ishtp_device *dev,
+ return;
+ }
+
++ if (!dev->ishtp_dma_tx_map) {
++ dev_err(dev->devc, "Fail to allocate Tx map\n");
++ return;
++ }
++
+ i = (msg_addr - dev->ishtp_host_dma_tx_buf) / DMA_SLOT_SIZE;
+ spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags);
+ for (j = 0; j < acked_slots; j++) {
+diff --git a/drivers/hid/surface-hid/surface_hid.c b/drivers/hid/surface-hid/surface_hid.c
+index a3a70e4f3f6c9..d4aa8c81903ae 100644
+--- a/drivers/hid/surface-hid/surface_hid.c
++++ b/drivers/hid/surface-hid/surface_hid.c
+@@ -209,7 +209,7 @@ static int surface_hid_probe(struct ssam_device *sdev)
+
+ shid->notif.base.priority = 1;
+ shid->notif.base.fn = ssam_hid_event_fn;
+- shid->notif.event.reg = SSAM_EVENT_REGISTRY_REG;
++ shid->notif.event.reg = SSAM_EVENT_REGISTRY_REG(sdev->uid.target);
+ shid->notif.event.id.target_category = sdev->uid.category;
+ shid->notif.event.id.instance = sdev->uid.instance;
+ shid->notif.event.mask = SSAM_EVENT_MASK_STRICT;
+@@ -230,7 +230,7 @@ static void surface_hid_remove(struct ssam_device *sdev)
+ }
+
+ static const struct ssam_device_id surface_hid_match[] = {
+- { SSAM_SDEV(HID, 0x02, SSAM_ANY_IID, 0x00) },
++ { SSAM_SDEV(HID, SSAM_ANY_TID, SSAM_ANY_IID, 0x00) },
+ { },
+ };
+ MODULE_DEVICE_TABLE(ssam, surface_hid_match);
+diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
+index 8fe3efcb83271..ba0ca652b9dab 100644
+--- a/drivers/hid/uhid.c
++++ b/drivers/hid/uhid.c
+@@ -28,11 +28,22 @@
+
+ struct uhid_device {
+ struct mutex devlock;
++
++ /* This flag tracks whether the HID device is usable for commands from
++ * userspace. The flag is already set before hid_add_device(), which
++ * runs in workqueue context, to allow hid_add_device() to communicate
++ * with userspace.
++ * However, if hid_add_device() fails, the flag is cleared without
++ * holding devlock.
++ * We guarantee that if @running changes from true to false while you're
++ * holding @devlock, it's still fine to access @hid.
++ */
+ bool running;
+
+ __u8 *rd_data;
+ uint rd_size;
+
++ /* When this is NULL, userspace may use UHID_CREATE/UHID_CREATE2. */
+ struct hid_device *hid;
+ struct uhid_event input_buf;
+
+@@ -63,9 +74,18 @@ static void uhid_device_add_worker(struct work_struct *work)
+ if (ret) {
+ hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret);
+
+- hid_destroy_device(uhid->hid);
+- uhid->hid = NULL;
++ /* We used to call hid_destroy_device() here, but that's really
++ * messy to get right because we have to coordinate with
++ * concurrent writes from userspace that might be in the middle
++ * of using uhid->hid.
++ * Just leave uhid->hid as-is for now, and clean it up when
++ * userspace tries to close or reinitialize the uhid instance.
++ *
++ * However, we do have to clear the ->running flag and do a
++ * wakeup to make sure userspace knows that the device is gone.
++ */
+ uhid->running = false;
++ wake_up_interruptible(&uhid->report_wait);
+ }
+ }
+
+@@ -375,6 +395,7 @@ struct hid_ll_driver uhid_hid_driver = {
+ .parse = uhid_hid_parse,
+ .raw_request = uhid_hid_raw_request,
+ .output_report = uhid_hid_output_report,
++ .max_buffer_size = UHID_DATA_MAX,
+ };
+ EXPORT_SYMBOL_GPL(uhid_hid_driver);
+
+@@ -474,7 +495,7 @@ static int uhid_dev_create2(struct uhid_device *uhid,
+ void *rd_data;
+ int ret;
+
+- if (uhid->running)
++ if (uhid->hid)
+ return -EALREADY;
+
+ rd_size = ev->u.create2.rd_size;
+@@ -556,7 +577,7 @@ static int uhid_dev_create(struct uhid_device *uhid,
+
+ static int uhid_dev_destroy(struct uhid_device *uhid)
+ {
+- if (!uhid->running)
++ if (!uhid->hid)
+ return -EINVAL;
+
+ uhid->running = false;
+@@ -565,6 +586,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid)
+ cancel_work_sync(&uhid->worker);
+
+ hid_destroy_device(uhid->hid);
++ uhid->hid = NULL;
+ kfree(uhid->rd_data);
+
+ return 0;
+diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h
+index 203d27d198b81..c034a1e850e45 100644
+--- a/drivers/hid/wacom.h
++++ b/drivers/hid/wacom.h
+@@ -91,6 +91,7 @@
+ #include <linux/leds.h>
+ #include <linux/usb/input.h>
+ #include <linux/power_supply.h>
++#include <linux/timer.h>
+ #include <asm/unaligned.h>
+
+ /*
+@@ -152,6 +153,7 @@ struct wacom_remote {
+ struct input_dev *input;
+ bool registered;
+ struct wacom_battery battery;
++ ktime_t active_time;
+ } remotes[WACOM_MAX_REMOTES];
+ };
+
+@@ -167,6 +169,7 @@ struct wacom {
+ struct delayed_work init_work;
+ struct wacom_remote *remote;
+ struct work_struct mode_change_work;
++ struct timer_list idleprox_timer;
+ bool generic_has_leds;
+ struct wacom_leds {
+ struct wacom_group_leds *groups;
+@@ -239,4 +242,5 @@ struct wacom_led *wacom_led_find(struct wacom *wacom, unsigned int group,
+ struct wacom_led *wacom_led_next(struct wacom *wacom, struct wacom_led *cur);
+ int wacom_equivalent_usage(int usage);
+ int wacom_initialize_leds(struct wacom *wacom);
++void wacom_idleprox_timeout(struct timer_list *list);
+ #endif
+diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
+index 93f49b766376e..76561f538eda3 100644
+--- a/drivers/hid/wacom_sys.c
++++ b/drivers/hid/wacom_sys.c
+@@ -160,6 +160,9 @@ static int wacom_raw_event(struct hid_device *hdev, struct hid_report *report,
+ {
+ struct wacom *wacom = hid_get_drvdata(hdev);
+
++ if (wacom->wacom_wac.features.type == BOOTLOADER)
++ return 0;
++
+ if (size > WACOM_PKGLEN_MAX)
+ return 1;
+
+@@ -726,7 +729,7 @@ static void wacom_retrieve_hid_descriptor(struct hid_device *hdev,
+ * Skip the query for this type and modify defaults based on
+ * interface number.
+ */
+- if (features->type == WIRELESS) {
++ if (features->type == WIRELESS && intf) {
+ if (intf->cur_altsetting->desc.bInterfaceNumber == 0)
+ features->device_type = WACOM_DEVICETYPE_WL_MONITOR;
+ else
+@@ -2124,7 +2127,7 @@ static int wacom_register_inputs(struct wacom *wacom)
+
+ error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac);
+ if (error) {
+- /* no pad in use on this interface */
++ /* no pad events using this interface */
+ input_free_device(pad_input_dev);
+ wacom_wac->pad_input = NULL;
+ pad_input_dev = NULL;
+@@ -2217,7 +2220,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix)
+ if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) {
+ char *product_name = wacom->hdev->name;
+
+- if (hid_is_using_ll_driver(wacom->hdev, &usb_hid_driver)) {
++ if (hid_is_usb(wacom->hdev)) {
+ struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent);
+ struct usb_device *dev = interface_to_usbdev(intf);
+ product_name = dev->product;
+@@ -2422,8 +2425,13 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
+ goto fail_quirks;
+ }
+
+- if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
++ if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) {
+ error = hid_hw_open(hdev);
++ if (error) {
++ hid_err(hdev, "hw open failed\n");
++ goto fail_quirks;
++ }
++ }
+
+ wacom_set_shared_values(wacom_wac);
+ devres_close_group(&hdev->dev, wacom);
+@@ -2454,6 +2462,9 @@ static void wacom_wireless_work(struct work_struct *work)
+
+ wacom_destroy_battery(wacom);
+
++ if (!usbdev)
++ return;
++
+ /* Stylus interface */
+ hdev1 = usb_get_intfdata(usbdev->config->interface[1]);
+ wacom1 = hid_get_drvdata(hdev1);
+@@ -2524,6 +2535,18 @@ fail:
+ return;
+ }
+
++static void wacom_remote_destroy_battery(struct wacom *wacom, int index)
++{
++ struct wacom_remote *remote = wacom->remote;
++
++ if (remote->remotes[index].battery.battery) {
++ devres_release_group(&wacom->hdev->dev,
++ &remote->remotes[index].battery.bat_desc);
++ remote->remotes[index].battery.battery = NULL;
++ remote->remotes[index].active_time = 0;
++ }
++}
++
+ static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index)
+ {
+ struct wacom_remote *remote = wacom->remote;
+@@ -2538,9 +2561,7 @@ static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index)
+ remote->remotes[i].registered = false;
+ spin_unlock_irqrestore(&remote->remote_lock, flags);
+
+- if (remote->remotes[i].battery.battery)
+- devres_release_group(&wacom->hdev->dev,
+- &remote->remotes[i].battery.bat_desc);
++ wacom_remote_destroy_battery(wacom, i);
+
+ if (remote->remotes[i].group.name)
+ devres_release_group(&wacom->hdev->dev,
+@@ -2548,7 +2569,6 @@ static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index)
+
+ remote->remotes[i].serial = 0;
+ remote->remotes[i].group.name = NULL;
+- remote->remotes[i].battery.battery = NULL;
+ wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN;
+ }
+ }
+@@ -2633,6 +2653,9 @@ static int wacom_remote_attach_battery(struct wacom *wacom, int index)
+ if (remote->remotes[index].battery.battery)
+ return 0;
+
++ if (!remote->remotes[index].active_time)
++ return 0;
++
+ if (wacom->led.groups[index].select == WACOM_STATUS_UNKNOWN)
+ return 0;
+
+@@ -2648,6 +2671,7 @@ static void wacom_remote_work(struct work_struct *work)
+ {
+ struct wacom *wacom = container_of(work, struct wacom, remote_work);
+ struct wacom_remote *remote = wacom->remote;
++ ktime_t kt = ktime_get();
+ struct wacom_remote_data data;
+ unsigned long flags;
+ unsigned int count;
+@@ -2674,6 +2698,10 @@ static void wacom_remote_work(struct work_struct *work)
+ serial = data.remote[i].serial;
+ if (data.remote[i].connected) {
+
++ if (kt - remote->remotes[i].active_time > WACOM_REMOTE_BATTERY_TIMEOUT
++ && remote->remotes[i].active_time != 0)
++ wacom_remote_destroy_battery(wacom, i);
++
+ if (remote->remotes[i].serial == serial) {
+ wacom_remote_attach_battery(wacom, i);
+ continue;
+@@ -2733,8 +2761,6 @@ static void wacom_mode_change_work(struct work_struct *work)
+ static int wacom_probe(struct hid_device *hdev,
+ const struct hid_device_id *id)
+ {
+- struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+- struct usb_device *dev = interface_to_usbdev(intf);
+ struct wacom *wacom;
+ struct wacom_wac *wacom_wac;
+ struct wacom_features *features;
+@@ -2769,14 +2795,21 @@ static int wacom_probe(struct hid_device *hdev,
+ wacom_wac->hid_data.inputmode = -1;
+ wacom_wac->mode_report = -1;
+
+- wacom->usbdev = dev;
+- wacom->intf = intf;
++ if (hid_is_usb(hdev)) {
++ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
++ struct usb_device *dev = interface_to_usbdev(intf);
++
++ wacom->usbdev = dev;
++ wacom->intf = intf;
++ }
++
+ mutex_init(&wacom->lock);
+ INIT_DELAYED_WORK(&wacom->init_work, wacom_init_work);
+ INIT_WORK(&wacom->wireless_work, wacom_wireless_work);
+ INIT_WORK(&wacom->battery_work, wacom_battery_work);
+ INIT_WORK(&wacom->remote_work, wacom_remote_work);
+ INIT_WORK(&wacom->mode_change_work, wacom_mode_change_work);
++ timer_setup(&wacom->idleprox_timer, &wacom_idleprox_timeout, TIMER_DEFERRABLE);
+
+ /* ask for the report descriptor to be loaded by HID */
+ error = hid_parse(hdev);
+@@ -2785,6 +2818,11 @@ static int wacom_probe(struct hid_device *hdev,
+ return error;
+ }
+
++ if (features->type == BOOTLOADER) {
++ hid_warn(hdev, "Using device in hidraw-only mode");
++ return hid_hw_start(hdev, HID_CONNECT_HIDRAW);
++ }
++
+ error = wacom_parse_and_register(wacom, false);
+ if (error)
+ return error;
+@@ -2817,6 +2855,7 @@ static void wacom_remove(struct hid_device *hdev)
+ cancel_work_sync(&wacom->battery_work);
+ cancel_work_sync(&wacom->remote_work);
+ cancel_work_sync(&wacom->mode_change_work);
++ del_timer_sync(&wacom->idleprox_timer);
+ if (hdev->bus == BUS_BLUETOOTH)
+ device_remove_file(&hdev->dev, &dev_attr_speed);
+
+diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
+index 33a6908995b1b..51a8e8d10519f 100644
+--- a/drivers/hid/wacom_wac.c
++++ b/drivers/hid/wacom_wac.c
+@@ -11,6 +11,7 @@
+ #include "wacom_wac.h"
+ #include "wacom.h"
+ #include <linux/input/mt.h>
++#include <linux/jiffies.h>
+
+ /* resolution for penabled devices */
+ #define WACOM_PL_RES 20
+@@ -41,6 +42,43 @@ static int wacom_numbered_button_to_key(int n);
+
+ static void wacom_update_led(struct wacom *wacom, int button_count, int mask,
+ int group);
++
++static void wacom_force_proxout(struct wacom_wac *wacom_wac)
++{
++ struct input_dev *input = wacom_wac->pen_input;
++
++ wacom_wac->shared->stylus_in_proximity = 0;
++
++ input_report_key(input, BTN_TOUCH, 0);
++ input_report_key(input, BTN_STYLUS, 0);
++ input_report_key(input, BTN_STYLUS2, 0);
++ input_report_key(input, BTN_STYLUS3, 0);
++ input_report_key(input, wacom_wac->tool[0], 0);
++ if (wacom_wac->serial[0]) {
++ input_report_abs(input, ABS_MISC, 0);
++ }
++ input_report_abs(input, ABS_PRESSURE, 0);
++
++ wacom_wac->tool[0] = 0;
++ wacom_wac->id[0] = 0;
++ wacom_wac->serial[0] = 0;
++
++ input_sync(input);
++}
++
++void wacom_idleprox_timeout(struct timer_list *list)
++{
++ struct wacom *wacom = from_timer(wacom, list, idleprox_timer);
++ struct wacom_wac *wacom_wac = &wacom->wacom_wac;
++
++ if (!wacom_wac->hid_data.sense_state) {
++ return;
++ }
++
++ hid_warn(wacom->hdev, "%s: tool appears to be hung in-prox. forcing it out.\n", __func__);
++ wacom_force_proxout(wacom_wac);
++}
++
+ /*
+ * Percent of battery capacity for Graphire.
+ * 8th value means AC online and show 100% capacity.
+@@ -638,9 +676,26 @@ static int wacom_intuos_id_mangle(int tool_id)
+ return (tool_id & ~0xFFF) << 4 | (tool_id & 0xFFF);
+ }
+
++static bool wacom_is_art_pen(int tool_id)
++{
++ bool is_art_pen = false;
++
++ switch (tool_id) {
++ case 0x885: /* Intuos3 Marker Pen */
++ case 0x804: /* Intuos4/5 13HD/24HD Marker Pen */
++ case 0x10804: /* Intuos4/5 13HD/24HD Art Pen */
++ is_art_pen = true;
++ break;
++ }
++ return is_art_pen;
++}
++
+ static int wacom_intuos_get_tool_type(int tool_id)
+ {
+- int tool_type;
++ int tool_type = BTN_TOOL_PEN;
++
++ if (wacom_is_art_pen(tool_id))
++ return tool_type;
+
+ switch (tool_id) {
+ case 0x812: /* Inking pen */
+@@ -655,17 +710,17 @@ static int wacom_intuos_get_tool_type(int tool_id)
+ case 0x852:
+ case 0x823: /* Intuos3 Grip Pen */
+ case 0x813: /* Intuos3 Classic Pen */
+- case 0x885: /* Intuos3 Marker Pen */
+ case 0x802: /* Intuos4/5 13HD/24HD General Pen */
+- case 0x804: /* Intuos4/5 13HD/24HD Marker Pen */
+ case 0x8e2: /* IntuosHT2 pen */
+ case 0x022:
+- case 0x10804: /* Intuos4/5 13HD/24HD Art Pen */
++ case 0x200: /* Pro Pen 3 */
++ case 0x04200: /* Pro Pen 3 */
+ case 0x10842: /* MobileStudio Pro Pro Pen slim */
+ case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */
+ case 0x16802: /* Cintiq 13HD Pro Pen */
+ case 0x18802: /* DTH2242 Pen */
+ case 0x10802: /* Intuos4/5 13HD/24HD General Pen */
++ case 0x80842: /* Intuos Pro and Cintiq Pro 3D Pen */
+ tool_type = BTN_TOOL_PEN;
+ break;
+
+@@ -718,10 +773,6 @@ static int wacom_intuos_get_tool_type(int tool_id)
+ case 0x10902: /* Intuos4/5 13HD/24HD Airbrush */
+ tool_type = BTN_TOOL_AIRBRUSH;
+ break;
+-
+- default: /* Unknown tool */
+- tool_type = BTN_TOOL_PEN;
+- break;
+ }
+ return tool_type;
+ }
+@@ -780,7 +831,7 @@ static int wacom_intuos_inout(struct wacom_wac *wacom)
+ /* Enter report */
+ if ((data[1] & 0xfc) == 0xc0) {
+ /* serial number of the tool */
+- wacom->serial[idx] = ((data[3] & 0x0f) << 28) +
++ wacom->serial[idx] = ((__u64)(data[3] & 0x0f) << 28) +
+ (data[4] << 20) + (data[5] << 12) +
+ (data[6] << 4) + (data[7] >> 4);
+
+@@ -1083,6 +1134,7 @@ static int wacom_remote_irq(struct wacom_wac *wacom_wac, size_t len)
+ if (index < 0 || !remote->remotes[index].registered)
+ goto out;
+
++ remote->remotes[i].active_time = ktime_get();
+ input = remote->remotes[index].input;
+
+ input_report_key(input, BTN_0, (data[9] & 0x01));
+@@ -1262,6 +1314,9 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
+
+ struct input_dev *pen_input = wacom->pen_input;
+ unsigned char *data = wacom->data;
++ int number_of_valid_frames = 0;
++ ktime_t time_interval = 15000000;
++ ktime_t time_packet_received = ktime_get();
+ int i;
+
+ if (wacom->features.type == INTUOSP2_BT ||
+@@ -1282,12 +1337,30 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
+ wacom->id[0] |= (wacom->serial[0] >> 32) & 0xFFFFF;
+ }
+
++ /* number of valid frames */
+ for (i = 0; i < pen_frames; i++) {
+ unsigned char *frame = &data[i*pen_frame_len + 1];
+ bool valid = frame[0] & 0x80;
++
++ if (valid)
++ number_of_valid_frames++;
++ }
++
++ if (number_of_valid_frames) {
++ if (wacom->hid_data.time_delayed)
++ time_interval = ktime_get() - wacom->hid_data.time_delayed;
++ time_interval = div_u64(time_interval, number_of_valid_frames);
++ wacom->hid_data.time_delayed = time_packet_received;
++ }
++
++ for (i = 0; i < number_of_valid_frames; i++) {
++ unsigned char *frame = &data[i*pen_frame_len + 1];
++ bool valid = frame[0] & 0x80;
+ bool prox = frame[0] & 0x40;
+ bool range = frame[0] & 0x20;
+ bool invert = frame[0] & 0x10;
++ int frames_number_reversed = number_of_valid_frames - i - 1;
++ ktime_t event_timestamp = time_packet_received - frames_number_reversed * time_interval;
+
+ if (!valid)
+ continue;
+@@ -1300,6 +1373,7 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
+ wacom->tool[0] = 0;
+ wacom->id[0] = 0;
+ wacom->serial[0] = 0;
++ wacom->hid_data.time_delayed = 0;
+ return;
+ }
+
+@@ -1336,6 +1410,7 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
+ get_unaligned_le16(&frame[11]));
+ }
+ }
++
+ if (wacom->tool[0]) {
+ input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5]));
+ if (wacom->features.type == INTUOSP2_BT ||
+@@ -1359,6 +1434,9 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
+
+ wacom->shared->stylus_in_proximity = prox;
+
++ /* add timestamp to unpack the frames */
++ input_set_timestamp(pen_input, event_timestamp);
++
+ input_sync(pen_input);
+ }
+ }
+@@ -1847,6 +1925,7 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
+ int fmax = field->logical_maximum;
+ unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid);
+ int resolution_code = code;
++ int resolution = hidinput_calc_abs_res(field, resolution_code);
+
+ if (equivalent_usage == HID_DG_TWIST) {
+ resolution_code = ABS_RZ;
+@@ -1867,8 +1946,15 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
+ switch (type) {
+ case EV_ABS:
+ input_set_abs_params(input, code, fmin, fmax, fuzz, 0);
+- input_abs_set_res(input, code,
+- hidinput_calc_abs_res(field, resolution_code));
++
++ /* older tablet may miss physical usage */
++ if ((code == ABS_X || code == ABS_Y) && !resolution) {
++ resolution = WACOM_INTUOS_RES;
++ hid_warn(input,
++ "Wacom usage (%d) missing resolution \n",
++ code);
++ }
++ input_abs_set_res(input, code, resolution);
+ break;
+ case EV_KEY:
+ case EV_MSC:
+@@ -1881,18 +1967,7 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
+ static void wacom_wac_battery_usage_mapping(struct hid_device *hdev,
+ struct hid_field *field, struct hid_usage *usage)
+ {
+- struct wacom *wacom = hid_get_drvdata(hdev);
+- struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+- struct wacom_features *features = &wacom_wac->features;
+- unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);
+-
+- switch (equivalent_usage) {
+- case HID_DG_BATTERYSTRENGTH:
+- case WACOM_HID_WD_BATTERY_LEVEL:
+- case WACOM_HID_WD_BATTERY_CHARGING:
+- features->quirks |= WACOM_QUIRK_BATTERY;
+- break;
+- }
++ return;
+ }
+
+ static void wacom_wac_battery_event(struct hid_device *hdev, struct hid_field *field,
+@@ -1913,18 +1988,21 @@ static void wacom_wac_battery_event(struct hid_device *hdev, struct hid_field *f
+ wacom_wac->hid_data.bat_connected = 1;
+ wacom_wac->hid_data.bat_status = WACOM_POWER_SUPPLY_STATUS_AUTO;
+ }
++ wacom_wac->features.quirks |= WACOM_QUIRK_BATTERY;
+ break;
+ case WACOM_HID_WD_BATTERY_LEVEL:
+ value = value * 100 / (field->logical_maximum - field->logical_minimum);
+ wacom_wac->hid_data.battery_capacity = value;
+ wacom_wac->hid_data.bat_connected = 1;
+ wacom_wac->hid_data.bat_status = WACOM_POWER_SUPPLY_STATUS_AUTO;
++ wacom_wac->features.quirks |= WACOM_QUIRK_BATTERY;
+ break;
+ case WACOM_HID_WD_BATTERY_CHARGING:
+ wacom_wac->hid_data.bat_charging = value;
+ wacom_wac->hid_data.ps_connected = value;
+ wacom_wac->hid_data.bat_connected = 1;
+ wacom_wac->hid_data.bat_status = WACOM_POWER_SUPPLY_STATUS_AUTO;
++ wacom_wac->features.quirks |= WACOM_QUIRK_BATTERY;
+ break;
+ }
+ }
+@@ -1940,18 +2018,15 @@ static void wacom_wac_battery_report(struct hid_device *hdev,
+ {
+ struct wacom *wacom = hid_get_drvdata(hdev);
+ struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+- struct wacom_features *features = &wacom_wac->features;
+
+- if (features->quirks & WACOM_QUIRK_BATTERY) {
+- int status = wacom_wac->hid_data.bat_status;
+- int capacity = wacom_wac->hid_data.battery_capacity;
+- bool charging = wacom_wac->hid_data.bat_charging;
+- bool connected = wacom_wac->hid_data.bat_connected;
+- bool powered = wacom_wac->hid_data.ps_connected;
++ int status = wacom_wac->hid_data.bat_status;
++ int capacity = wacom_wac->hid_data.battery_capacity;
++ bool charging = wacom_wac->hid_data.bat_charging;
++ bool connected = wacom_wac->hid_data.bat_connected;
++ bool powered = wacom_wac->hid_data.ps_connected;
+
+- wacom_notify_battery(wacom_wac, status, capacity, charging,
+- connected, powered);
+- }
++ wacom_notify_battery(wacom_wac, status, capacity, charging,
++ connected, powered);
+ }
+
+ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
+@@ -2007,7 +2082,6 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
+ wacom_wac->has_mute_touch_switch = true;
+ usage->type = EV_SW;
+ usage->code = SW_MUTE_DEVICE;
+- features->device_type |= WACOM_DEVICETYPE_PAD;
+ break;
+ case WACOM_HID_WD_TOUCHSTRIP:
+ wacom_map_usage(input, usage, field, EV_ABS, ABS_RX, 0);
+@@ -2087,6 +2161,30 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field
+ wacom_wac->hid_data.inrange_state |= value;
+ }
+
++ /* Process touch switch state first since it is reported through touch interface,
++ * which is indepentent of pad interface. In the case when there are no other pad
++ * events, the pad interface will not even be created.
++ */
++ if ((equivalent_usage == WACOM_HID_WD_MUTE_DEVICE) ||
++ (equivalent_usage == WACOM_HID_WD_TOUCHONOFF)) {
++ if (wacom_wac->shared->touch_input) {
++ bool *is_touch_on = &wacom_wac->shared->is_touch_on;
++
++ if (equivalent_usage == WACOM_HID_WD_MUTE_DEVICE && value)
++ *is_touch_on = !(*is_touch_on);
++ else if (equivalent_usage == WACOM_HID_WD_TOUCHONOFF)
++ *is_touch_on = value;
++
++ input_report_switch(wacom_wac->shared->touch_input,
++ SW_MUTE_DEVICE, !(*is_touch_on));
++ input_sync(wacom_wac->shared->touch_input);
++ }
++ return;
++ }
++
++ if (!input)
++ return;
++
+ switch (equivalent_usage) {
+ case WACOM_HID_WD_TOUCHRING:
+ /*
+@@ -2122,22 +2220,6 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field
+ input_event(input, usage->type, usage->code, 0);
+ break;
+
+- case WACOM_HID_WD_MUTE_DEVICE:
+- case WACOM_HID_WD_TOUCHONOFF:
+- if (wacom_wac->shared->touch_input) {
+- bool *is_touch_on = &wacom_wac->shared->is_touch_on;
+-
+- if (equivalent_usage == WACOM_HID_WD_MUTE_DEVICE && value)
+- *is_touch_on = !(*is_touch_on);
+- else if (equivalent_usage == WACOM_HID_WD_TOUCHONOFF)
+- *is_touch_on = value;
+-
+- input_report_switch(wacom_wac->shared->touch_input,
+- SW_MUTE_DEVICE, !(*is_touch_on));
+- input_sync(wacom_wac->shared->touch_input);
+- }
+- break;
+-
+ case WACOM_HID_WD_MODE_CHANGE:
+ if (wacom_wac->is_direct_mode != value) {
+ wacom_wac->is_direct_mode = value;
+@@ -2299,6 +2381,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
+ value = field->logical_maximum - value;
+ break;
+ case HID_DG_INRANGE:
++ mod_timer(&wacom->idleprox_timer, jiffies + msecs_to_jiffies(100));
+ wacom_wac->hid_data.inrange_state = value;
+ if (!(features->quirks & WACOM_QUIRK_SENSE))
+ wacom_wac->hid_data.sense_state = value;
+@@ -2323,6 +2406,9 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
+ }
+ return;
+ case HID_DG_TWIST:
++ /* don't modify the value if the pen doesn't support the feature */
++ if (!wacom_is_art_pen(wacom_wac->id[0])) return;
++
+ /*
+ * Userspace expects pen twist to have its zero point when
+ * the buttons/finger is on the tablet's left. HID values
+@@ -2588,6 +2674,24 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
+ }
+ }
+
++static bool wacom_wac_slot_is_active(struct input_dev *dev, int key)
++{
++ struct input_mt *mt = dev->mt;
++ struct input_mt_slot *s;
++
++ if (!mt)
++ return false;
++
++ for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
++ if (s->key == key &&
++ input_mt_get_value(s, ABS_MT_TRACKING_ID) >= 0) {
++ return true;
++ }
++ }
++
++ return false;
++}
++
+ static void wacom_wac_finger_event(struct hid_device *hdev,
+ struct hid_field *field, struct hid_usage *usage, __s32 value)
+ {
+@@ -2603,6 +2707,9 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
+ return;
+
+ switch (equivalent_usage) {
++ case HID_DG_CONFIDENCE:
++ wacom_wac->hid_data.confidence = value;
++ break;
+ case HID_GD_X:
+ wacom_wac->hid_data.x = value;
+ break;
+@@ -2635,8 +2742,14 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
+ }
+
+ if (usage->usage_index + 1 == field->report_count) {
+- if (equivalent_usage == wacom_wac->hid_data.last_slot_field)
+- wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
++ if (equivalent_usage == wacom_wac->hid_data.last_slot_field) {
++ bool touch_removed = wacom_wac_slot_is_active(wacom_wac->touch_input,
++ wacom_wac->hid_data.id) && !wacom_wac->hid_data.tipswitch;
++
++ if (wacom_wac->hid_data.confidence || touch_removed) {
++ wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
++ }
++ }
+ }
+ }
+
+@@ -2653,6 +2766,12 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
+
+ wacom_wac->is_invalid_bt_frame = false;
+
++ hid_data->confidence = true;
++
++ hid_data->cc_report = 0;
++ hid_data->cc_index = -1;
++ hid_data->cc_value_index = -1;
++
+ for (i = 0; i < report->maxfield; i++) {
+ struct hid_field *field = report->field[i];
+ int j;
+@@ -2686,11 +2805,14 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
+ hid_data->cc_index >= 0) {
+ struct hid_field *field = report->field[hid_data->cc_index];
+ int value = field->value[hid_data->cc_value_index];
+- if (value)
++ if (value) {
+ hid_data->num_expected = value;
++ hid_data->num_received = 0;
++ }
+ }
+ else {
+ hid_data->num_expected = wacom_wac->features.touch_max;
++ hid_data->num_received = 0;
+ }
+ }
+
+@@ -2718,6 +2840,7 @@ static void wacom_wac_finger_report(struct hid_device *hdev,
+
+ input_sync(input);
+ wacom_wac->hid_data.num_received = 0;
++ wacom_wac->hid_data.num_expected = 0;
+
+ /* keep touch state for pen event */
+ wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac);
+@@ -2758,7 +2881,7 @@ void wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
+ /* usage tests must precede field tests */
+ if (WACOM_BATTERY_USAGE(usage))
+ wacom_wac_battery_event(hdev, field, usage, value);
+- else if (WACOM_PAD_FIELD(field) && wacom->wacom_wac.pad_input)
++ else if (WACOM_PAD_FIELD(field))
+ wacom_wac_pad_event(hdev, field, usage, value);
+ else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input)
+ wacom_wac_pen_event(hdev, field, usage, value);
+@@ -4752,10 +4875,17 @@ static const struct wacom_features wacom_features_0x3c6 =
+ static const struct wacom_features wacom_features_0x3c8 =
+ { "Wacom Intuos BT M", 21600, 13500, 4095, 63,
+ INTUOSHT3_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 };
++static const struct wacom_features wacom_features_0x3dd =
++ { "Wacom Intuos Pro S", 31920, 19950, 8191, 63,
++ INTUOSP2S_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 7,
++ .touch_max = 10 };
+
+ static const struct wacom_features wacom_features_HID_ANY_ID =
+ { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };
+
++static const struct wacom_features wacom_features_0x94 =
++ { "Wacom Bootloader", .type = BOOTLOADER };
++
+ #define USB_DEVICE_WACOM(prod) \
+ HID_DEVICE(BUS_USB, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\
+ .driver_data = (kernel_ulong_t)&wacom_features_##prod
+@@ -4829,6 +4959,7 @@ const struct hid_device_id wacom_ids[] = {
+ { USB_DEVICE_WACOM(0x84) },
+ { USB_DEVICE_WACOM(0x90) },
+ { USB_DEVICE_WACOM(0x93) },
++ { USB_DEVICE_WACOM(0x94) },
+ { USB_DEVICE_WACOM(0x97) },
+ { USB_DEVICE_WACOM(0x9A) },
+ { USB_DEVICE_WACOM(0x9F) },
+@@ -4927,6 +5058,7 @@ const struct hid_device_id wacom_ids[] = {
+ { BT_DEVICE_WACOM(0x393) },
+ { BT_DEVICE_WACOM(0x3c6) },
+ { BT_DEVICE_WACOM(0x3c8) },
++ { BT_DEVICE_WACOM(0x3dd) },
+ { USB_DEVICE_WACOM(0x4001) },
+ { USB_DEVICE_WACOM(0x4004) },
+ { USB_DEVICE_WACOM(0x5000) },
+diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
+index 8b2d4e5b2303c..4ea1910ec3faa 100644
+--- a/drivers/hid/wacom_wac.h
++++ b/drivers/hid/wacom_wac.h
+@@ -15,6 +15,7 @@
+ #define WACOM_NAME_MAX 64
+ #define WACOM_MAX_REMOTES 5
+ #define WACOM_STATUS_UNKNOWN 255
++#define WACOM_REMOTE_BATTERY_TIMEOUT 21000000000ll
+
+ /* packet length for individual models */
+ #define WACOM_PKGLEN_BBFUN 9
+@@ -242,6 +243,7 @@ enum {
+ MTTPC,
+ MTTPC_B,
+ HID_GENERIC,
++ BOOTLOADER,
+ MAX_TYPE
+ };
+
+@@ -301,6 +303,7 @@ struct hid_data {
+ bool barrelswitch;
+ bool barrelswitch2;
+ bool serialhi;
++ bool confidence;
+ int x;
+ int y;
+ int pressure;
+@@ -319,6 +322,7 @@ struct hid_data {
+ int bat_connected;
+ int ps_connected;
+ bool pad_input_event_flag;
++ ktime_t time_delayed;
+ };
+
+ struct wacom_remote_data {
+diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c
+index 44a3f5660c109..26f2c3c012978 100644
+--- a/drivers/hsi/controllers/omap_ssi_core.c
++++ b/drivers/hsi/controllers/omap_ssi_core.c
+@@ -502,8 +502,10 @@ static int ssi_probe(struct platform_device *pd)
+ platform_set_drvdata(pd, ssi);
+
+ err = ssi_add_controller(ssi, pd);
+- if (err < 0)
++ if (err < 0) {
++ hsi_put_controller(ssi);
+ goto out1;
++ }
+
+ pm_runtime_enable(&pd->dev);
+
+@@ -524,6 +526,7 @@ static int ssi_probe(struct platform_device *pd)
+ if (!childpdev) {
+ err = -ENODEV;
+ dev_err(&pd->dev, "failed to create ssi controller port\n");
++ of_node_put(child);
+ goto out3;
+ }
+ }
+@@ -535,9 +538,9 @@ out3:
+ device_for_each_child(&pd->dev, NULL, ssi_remove_ports);
+ out2:
+ ssi_remove_controller(ssi);
++ pm_runtime_disable(&pd->dev);
+ out1:
+ platform_set_drvdata(pd, NULL);
+- pm_runtime_disable(&pd->dev);
+
+ return err;
+ }
+@@ -628,7 +631,13 @@ static int __init ssi_init(void) {
+ if (ret)
+ return ret;
+
+- return platform_driver_register(&ssi_port_pdriver);
++ ret = platform_driver_register(&ssi_port_pdriver);
++ if (ret) {
++ platform_driver_unregister(&ssi_pdriver);
++ return ret;
++ }
++
++ return 0;
+ }
+ module_init(ssi_init);
+
+diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c
+index a0cb5be246e1c..b9495b720f1bd 100644
+--- a/drivers/hsi/controllers/omap_ssi_port.c
++++ b/drivers/hsi/controllers/omap_ssi_port.c
+@@ -230,10 +230,10 @@ static int ssi_start_dma(struct hsi_msg *msg, int lch)
+ if (msg->ttype == HSI_MSG_READ) {
+ err = dma_map_sg(&ssi->device, msg->sgt.sgl, msg->sgt.nents,
+ DMA_FROM_DEVICE);
+- if (err < 0) {
++ if (!err) {
+ dev_dbg(&ssi->device, "DMA map SG failed !\n");
+ pm_runtime_put_autosuspend(omap_port->pdev);
+- return err;
++ return -EIO;
+ }
+ csdp = SSI_DST_BURST_4x32_BIT | SSI_DST_MEMORY_PORT |
+ SSI_SRC_SINGLE_ACCESS0 | SSI_SRC_PERIPHERAL_PORT |
+@@ -247,10 +247,10 @@ static int ssi_start_dma(struct hsi_msg *msg, int lch)
+ } else {
+ err = dma_map_sg(&ssi->device, msg->sgt.sgl, msg->sgt.nents,
+ DMA_TO_DEVICE);
+- if (err < 0) {
++ if (!err) {
+ dev_dbg(&ssi->device, "DMA map SG failed !\n");
+ pm_runtime_put_autosuspend(omap_port->pdev);
+- return err;
++ return -EIO;
+ }
+ csdp = SSI_SRC_BURST_4x32_BIT | SSI_SRC_MEMORY_PORT |
+ SSI_DST_SINGLE_ACCESS0 | SSI_DST_PERIPHERAL_PORT |
+diff --git a/drivers/hsi/hsi_core.c b/drivers/hsi/hsi_core.c
+index ec90713564e32..884066109699c 100644
+--- a/drivers/hsi/hsi_core.c
++++ b/drivers/hsi/hsi_core.c
+@@ -102,6 +102,7 @@ struct hsi_client *hsi_new_client(struct hsi_port *port,
+ if (device_register(&cl->device) < 0) {
+ pr_err("hsi: failed to register client: %s\n", info->name);
+ put_device(&cl->device);
++ goto err;
+ }
+
+ return cl;
+diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
+index d1123ceb38f3f..9a074cbdef78c 100644
+--- a/drivers/hv/Kconfig
++++ b/drivers/hv/Kconfig
+@@ -18,6 +18,7 @@ config HYPERV_TIMER
+ config HYPERV_UTILS
+ tristate "Microsoft Hyper-V Utilities driver"
+ depends on HYPERV && CONNECTOR && NLS
++ depends on PTP_1588_CLOCK_OPTIONAL
+ help
+ Select this option to enable the Hyper-V Utilities.
+
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
+index f3761c73b0742..6b967bb386907 100644
+--- a/drivers/hv/channel.c
++++ b/drivers/hv/channel.c
+@@ -1221,7 +1221,9 @@ u64 vmbus_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
+
+ /*
+ * Cannot return an ID of 0, which is reserved for an unsolicited
+- * message from Hyper-V.
++ * message from Hyper-V; Hyper-V does not acknowledge (respond to)
++ * VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED requests with ID of
++ * 0 sent by the guest.
+ */
+ return current_id + 1;
+ }
+@@ -1246,7 +1248,7 @@ u64 vmbus_request_addr(struct vmbus_channel *channel, u64 trans_id)
+
+ /* Hyper-V can send an unsolicited message with ID of 0 */
+ if (!trans_id)
+- return trans_id;
++ return VMBUS_RQST_ERROR;
+
+ spin_lock_irqsave(&rqstor->req_lock, flags);
+
+diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
+index 142308526ec6a..62c864f8d991b 100644
+--- a/drivers/hv/channel_mgmt.c
++++ b/drivers/hv/channel_mgmt.c
+@@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
+ * execute:
+ *
+ * (a) In the "normal (i.e., not resuming from hibernation)" path,
+- * the full barrier in smp_store_mb() guarantees that the store
++ * the full barrier in virt_store_mb() guarantees that the store
+ * is propagated to all CPUs before the add_channel_work work
+ * is queued. In turn, add_channel_work is queued before the
+ * channel's ring buffer is allocated/initialized and the
+@@ -392,14 +392,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel)
+ * recv_int_page before retrieving the channel pointer from the
+ * array of channels.
+ *
+- * (b) In the "resuming from hibernation" path, the smp_store_mb()
++ * (b) In the "resuming from hibernation" path, the virt_store_mb()
+ * guarantees that the store is propagated to all CPUs before
+ * the VMBus connection is marked as ready for the resume event
+ * (cf. check_ready_for_resume_event()). The interrupt handler
+ * of the VMBus driver and vmbus_chan_sched() can not run before
+ * vmbus_bus_resume() has completed execution (cf. resume_noirq).
+ */
+- smp_store_mb(
++ virt_store_mb(
+ vmbus_connection.channels[channel->offermsg.child_relid],
+ channel);
+ }
+@@ -531,13 +531,17 @@ static void vmbus_add_channel_work(struct work_struct *work)
+ * Add the new device to the bus. This will kick off device-driver
+ * binding which eventually invokes the device driver's AddDevice()
+ * method.
++ *
++ * If vmbus_device_register() fails, the 'device_obj' is freed in
++ * vmbus_device_release() as called by device_unregister() in the
++ * error path of vmbus_device_register(). In the outside error
++ * path, there's no need to free it.
+ */
+ ret = vmbus_device_register(newchannel->device_obj);
+
+ if (ret != 0) {
+ pr_err("unable to add child device object (relid %d)\n",
+ newchannel->offermsg.child_relid);
+- kfree(newchannel->device_obj);
+ goto err_deq_chan;
+ }
+
+@@ -637,6 +641,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
+ */
+ if (newchannel->offermsg.offer.sub_channel_index == 0) {
+ mutex_unlock(&vmbus_connection.channel_mutex);
++ cpus_read_unlock();
+ /*
+ * Don't call free_channel(), because newchannel->kobj
+ * is not initialized yet.
+@@ -822,11 +827,22 @@ static void vmbus_wait_for_unload(void)
+ if (completion_done(&vmbus_connection.unload_event))
+ goto completed;
+
+- for_each_online_cpu(cpu) {
++ for_each_present_cpu(cpu) {
+ struct hv_per_cpu_context *hv_cpu
+ = per_cpu_ptr(hv_context.cpu_context, cpu);
+
++ /*
++ * In a CoCo VM the synic_message_page is not allocated
++ * in hv_synic_alloc(). Instead it is set/cleared in
++ * hv_synic_enable_regs() and hv_synic_disable_regs()
++ * such that it is set only when the CPU is online. If
++ * not all present CPUs are online, the message page
++ * might be NULL, so skip such CPUs.
++ */
+ page_addr = hv_cpu->synic_message_page;
++ if (!page_addr)
++ continue;
++
+ msg = (struct hv_message *)page_addr
+ + VMBUS_MESSAGE_SINT;
+
+@@ -860,11 +876,14 @@ completed:
+ * maybe-pending messages on all CPUs to be able to receive new
+ * messages after we reconnect.
+ */
+- for_each_online_cpu(cpu) {
++ for_each_present_cpu(cpu) {
+ struct hv_per_cpu_context *hv_cpu
+ = per_cpu_ptr(hv_context.cpu_context, cpu);
+
+ page_addr = hv_cpu->synic_message_page;
++ if (!page_addr)
++ continue;
++
+ msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
+ msg->header.message_type = HVMSG_NONE;
+ }
+diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
+index 5e479d54918cf..47fb412eafd35 100644
+--- a/drivers/hv/connection.c
++++ b/drivers/hv/connection.c
+@@ -315,6 +315,10 @@ void vmbus_disconnect(void)
+ */
+ struct vmbus_channel *relid2channel(u32 relid)
+ {
++ if (vmbus_connection.channels == NULL) {
++ pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid);
++ return NULL;
++ }
+ if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
+ return NULL;
+ return READ_ONCE(vmbus_connection.channels[relid]);
+diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
+index 7f11ea07d698f..3248b48f37f61 100644
+--- a/drivers/hv/hv_balloon.c
++++ b/drivers/hv/hv_balloon.c
+@@ -17,6 +17,7 @@
+ #include <linux/slab.h>
+ #include <linux/kthread.h>
+ #include <linux/completion.h>
++#include <linux/count_zeros.h>
+ #include <linux/memory_hotplug.h>
+ #include <linux/memory.h>
+ #include <linux/notifier.h>
+@@ -480,7 +481,7 @@ module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
+ MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
+ static atomic_t trans_id = ATOMIC_INIT(0);
+
+-static int dm_ring_size = 20 * 1024;
++static int dm_ring_size = VMBUS_RING_SIZE(16 * 1024);
+
+ /*
+ * Driver specific state.
+@@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem_device *dm)
+ struct dm_status status;
+ unsigned long now = jiffies;
+ unsigned long last_post = last_post_time;
++ unsigned long num_pages_avail, num_pages_committed;
+
+ if (pressure_report_delay > 0) {
+ --pressure_report_delay;
+@@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem_device *dm)
+ * num_pages_onlined) as committed to the host, otherwise it can try
+ * asking us to balloon them out.
+ */
+- status.num_avail = si_mem_available();
+- status.num_committed = vm_memory_committed() +
++ num_pages_avail = si_mem_available();
++ num_pages_committed = vm_memory_committed() +
+ dm->num_pages_ballooned +
+ (dm->num_pages_added > dm->num_pages_onlined ?
+ dm->num_pages_added - dm->num_pages_onlined : 0) +
+ compute_balloon_floor();
+
+- trace_balloon_status(status.num_avail, status.num_committed,
++ trace_balloon_status(num_pages_avail, num_pages_committed,
+ vm_memory_committed(), dm->num_pages_ballooned,
+ dm->num_pages_added, dm->num_pages_onlined);
++
++ /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */
++ status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE;
++ status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE;
++
+ /*
+ * If our transaction ID is no longer current, just don't
+ * send the status. This can happen if we were interrupted
+@@ -1563,7 +1570,7 @@ static void balloon_onchannelcallback(void *context)
+ break;
+
+ default:
+- pr_warn("Unhandled message: type: %d\n", dm_hdr->type);
++ pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type);
+
+ }
+ }
+@@ -1653,6 +1660,38 @@ static void disable_page_reporting(void)
+ }
+ }
+
++static int ballooning_enabled(void)
++{
++ /*
++ * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE),
++ * since currently it's unclear to us whether an unballoon request can
++ * make sure all page ranges are guest page size aligned.
++ */
++ if (PAGE_SIZE != HV_HYP_PAGE_SIZE) {
++ pr_info("Ballooning disabled because page size is not 4096 bytes\n");
++ return 0;
++ }
++
++ return 1;
++}
++
++static int hot_add_enabled(void)
++{
++ /*
++ * Disable hot add on ARM64, because we currently rely on
++ * memory_add_physaddr_to_nid() to get a node id of a hot add range,
++ * however ARM64's memory_add_physaddr_to_nid() always return 0 and
++ * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for
++ * add_memory().
++ */
++ if (IS_ENABLED(CONFIG_ARM64)) {
++ pr_info("Memory hot add disabled on ARM64\n");
++ return 0;
++ }
++
++ return 1;
++}
++
+ static int balloon_connect_vsp(struct hv_device *dev)
+ {
+ struct dm_version_request version_req;
+@@ -1660,6 +1699,13 @@ static int balloon_connect_vsp(struct hv_device *dev)
+ unsigned long t;
+ int ret;
+
++ /*
++ * max_pkt_size should be large enough for one vmbus packet header plus
++ * our receive buffer size. Hyper-V sends messages up to
++ * HV_HYP_PAGE_SIZE bytes long on balloon channel.
++ */
++ dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2;
++
+ ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
+ balloon_onchannelcallback, dev);
+ if (ret)
+@@ -1717,8 +1763,8 @@ static int balloon_connect_vsp(struct hv_device *dev)
+ * currently still requires the bits to be set, so we have to add code
+ * to fail the host's hot-add and balloon up/down requests, if any.
+ */
+- cap_msg.caps.cap_bits.balloon = 1;
+- cap_msg.caps.cap_bits.hot_add = 1;
++ cap_msg.caps.cap_bits.balloon = ballooning_enabled();
++ cap_msg.caps.cap_bits.hot_add = hot_add_enabled();
+
+ /*
+ * Specify our alignment requirements as it relates
+diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
+index 314015d9e912d..1475ea77351ef 100644
+--- a/drivers/hv/ring_buffer.c
++++ b/drivers/hv/ring_buffer.c
+@@ -249,6 +249,19 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
+ ring_info->pkt_buffer_size = 0;
+ }
+
++/*
++ * Check if the ring buffer spinlock is available to take or not; used on
++ * atomic contexts, like panic path (see the Hyper-V framebuffer driver).
++ */
++
++bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel)
++{
++ struct hv_ring_buffer_info *rinfo = &channel->outbound;
++
++ return spin_is_locked(&rinfo->ring_lock);
++}
++EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy);
++
+ /* Write to the ring buffer. */
+ int hv_ringbuffer_write(struct vmbus_channel *channel,
+ const struct kvec *kv_list, u32 kv_count,
+@@ -408,7 +421,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
+ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
+ {
+ u32 priv_read_loc = rbi->priv_read_index;
+- u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
++ u32 write_loc;
++
++ /*
++ * The Hyper-V host writes the packet data, then uses
++ * store_release() to update the write_index. Use load_acquire()
++ * here to prevent loads of the packet data from being re-ordered
++ * before the read of the write_index and potentially getting
++ * stale data.
++ */
++ write_loc = virt_load_acquire(&rbi->ring_buffer->write_index);
+
+ if (write_loc >= priv_read_loc)
+ return write_loc - priv_read_loc;
+diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
+index 392c1ac4f8193..115835bd562c7 100644
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -76,8 +76,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
+
+ /*
+ * Hyper-V should be notified only once about a panic. If we will be
+- * doing hyperv_report_panic_msg() later with kmsg data, don't do
+- * the notification here.
++ * doing hv_kmsg_dump() with kmsg data later, don't do the notification
++ * here.
+ */
+ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE
+ && hyperv_report_reg()) {
+@@ -99,8 +99,8 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
+
+ /*
+ * Hyper-V should be notified only once about a panic. If we will be
+- * doing hyperv_report_panic_msg() later with kmsg data, don't do
+- * the notification here.
++ * doing hv_kmsg_dump() with kmsg data later, don't do the notification
++ * here.
+ */
+ if (hyperv_report_reg())
+ hyperv_report_panic(regs, val, true);
+@@ -1381,7 +1381,7 @@ static void vmbus_isr(void)
+ tasklet_schedule(&hv_cpu->msg_dpc);
+ }
+
+- add_interrupt_randomness(vmbus_interrupt, 0);
++ add_interrupt_randomness(vmbus_interrupt);
+ }
+
+ static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
+@@ -1538,21 +1538,27 @@ static int vmbus_bus_init(void)
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
+ hv_synic_init, hv_synic_cleanup);
+ if (ret < 0)
+- goto err_cpuhp;
++ goto err_alloc;
+ hyperv_cpuhp_online = ret;
+
+ ret = vmbus_connect();
+ if (ret)
+ goto err_connect;
+
++ if (hv_is_isolation_supported())
++ sysctl_record_panic_msg = 0;
++
+ /*
+ * Only register if the crash MSRs are available
+ */
+ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+ u64 hyperv_crash_ctl;
+ /*
+- * Sysctl registration is not fatal, since by default
+- * reporting is enabled.
++ * Panic message recording (sysctl_record_panic_msg)
++ * is enabled by default in non-isolated guests and
++ * disabled by default in isolated guests; the panic
++ * message recording won't be available in isolated
++ * guests should the following registration fail.
+ */
+ hv_ctl_table_hdr = register_sysctl_table(hv_root_table);
+ if (!hv_ctl_table_hdr)
+@@ -1583,9 +1589,8 @@ static int vmbus_bus_init(void)
+
+ err_connect:
+ cpuhp_remove_state(hyperv_cpuhp_online);
+-err_cpuhp:
+- hv_synic_free();
+ err_alloc:
++ hv_synic_free();
+ if (vmbus_irq == -1) {
+ hv_remove_vmbus_handler();
+ } else {
+@@ -2027,8 +2032,10 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
+ kobj->kset = dev->channels_kset;
+ ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL,
+ "%u", relid);
+- if (ret)
++ if (ret) {
++ kobject_put(kobj);
+ return ret;
++ }
+
+ ret = sysfs_create_group(kobj, &vmbus_chan_group);
+
+@@ -2037,6 +2044,7 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
+ * The calling functions' error handling paths will cleanup the
+ * empty channel directory.
+ */
++ kobject_put(kobj);
+ dev_err(device, "Unable to set up channel sysfs files\n");
+ return ret;
+ }
+@@ -2100,6 +2108,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
+ ret = device_register(&child_device_obj->device);
+ if (ret) {
+ pr_err("Unable to register child device\n");
++ put_device(&child_device_obj->device);
+ return ret;
+ }
+
+@@ -2331,7 +2340,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
+ bool fb_overlap_ok)
+ {
+ struct resource *iter, *shadow;
+- resource_size_t range_min, range_max, start;
++ resource_size_t range_min, range_max, start, end;
+ const char *dev_n = dev_name(&device_obj->device);
+ int retval;
+
+@@ -2366,6 +2375,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
+ range_max = iter->end;
+ start = (range_min + align - 1) & ~(align - 1);
+ for (; start + size - 1 <= range_max; start += align) {
++ end = start + size - 1;
++
++ /* Skip the whole fb_mmio region if not fb_overlap_ok */
++ if (!fb_overlap_ok && fb_mmio &&
++ (((start >= fb_mmio->start) && (start <= fb_mmio->end)) ||
++ ((end >= fb_mmio->start) && (end <= fb_mmio->end))))
++ continue;
++
+ shadow = __request_region(iter, start, size, NULL,
+ IORESOURCE_BUSY);
+ if (!shadow)
+@@ -2773,10 +2790,15 @@ static void __exit vmbus_exit(void)
+ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+ kmsg_dump_unregister(&hv_kmsg_dumper);
+ unregister_die_notifier(&hyperv_die_block);
+- atomic_notifier_chain_unregister(&panic_notifier_list,
+- &hyperv_panic_block);
+ }
+
++ /*
++ * The panic notifier is always registered, hence we should
++ * also unconditionally unregister it here as well.
++ */
++ atomic_notifier_chain_unregister(&panic_notifier_list,
++ &hyperv_panic_block);
++
+ free_page((unsigned long)hv_panic_page);
+ unregister_sysctl_table(hv_ctl_table_hdr);
+ hv_ctl_table_hdr = NULL;
+diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
+index c4578e8f34bb5..17ba1d9ff0751 100644
+--- a/drivers/hwmon/Kconfig
++++ b/drivers/hwmon/Kconfig
+@@ -776,6 +776,7 @@ config SENSORS_IT87
+ config SENSORS_JC42
+ tristate "JEDEC JC42.4 compliant memory module temperature sensors"
+ depends on I2C
++ select REGMAP_I2C
+ help
+ If you say yes here, you get support for JEDEC JC42.4 compliant
+ temperature sensors, which are used on many DDR3 memory modules for
+@@ -944,7 +945,7 @@ config SENSORS_LTC4261
+
+ config SENSORS_LTQ_CPUTEMP
+ bool "Lantiq cpu temperature sensor driver"
+- depends on LANTIQ
++ depends on SOC_XWAY
+ help
+ If you say yes here you get support for the temperature
+ sensor inside your CPU.
+@@ -1317,7 +1318,7 @@ config SENSORS_LM90
+ Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6654, MAX6657, MAX6658,
+ MAX6659, MAX6680, MAX6681, MAX6692, MAX6695, MAX6696,
+ ON Semiconductor NCT1008, Winbond/Nuvoton W83L771W/G/AWG/ASG,
+- Philips SA56004, GMT G781, and Texas Instruments TMP451
++ Philips SA56004, GMT G781, Texas Instruments TMP451 and TMP461
+ sensor chips.
+
+ This driver can also be built as a module. If so, the module
+diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
+index d519aca4a9d64..c67cd037a93fd 100644
+--- a/drivers/hwmon/adt7470.c
++++ b/drivers/hwmon/adt7470.c
+@@ -19,6 +19,7 @@
+ #include <linux/log2.h>
+ #include <linux/kthread.h>
+ #include <linux/regmap.h>
++#include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/util_macros.h>
+
+@@ -294,11 +295,10 @@ static int adt7470_update_thread(void *p)
+ adt7470_read_temperatures(data);
+ mutex_unlock(&data->lock);
+
+- set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop())
+ break;
+
+- schedule_timeout(msecs_to_jiffies(data->auto_update_interval));
++ schedule_timeout_interruptible(msecs_to_jiffies(data->auto_update_interval));
+ }
+
+ return 0;
+@@ -662,6 +662,9 @@ static int adt7470_fan_write(struct device *dev, u32 attr, int channel, long val
+ struct adt7470_data *data = dev_get_drvdata(dev);
+ int err;
+
++ if (val <= 0)
++ return -EINVAL;
++
+ val = FAN_RPM_TO_PERIOD(val);
+ val = clamp_val(val, 1, 65534);
+
+diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
+index 9d5b019651f2d..22e314725def0 100644
+--- a/drivers/hwmon/adt7475.c
++++ b/drivers/hwmon/adt7475.c
+@@ -486,10 +486,10 @@ static ssize_t temp_store(struct device *dev, struct device_attribute *attr,
+ val = (temp - val) / 1000;
+
+ if (sattr->index != 1) {
+- data->temp[HYSTERSIS][sattr->index] &= 0xF0;
++ data->temp[HYSTERSIS][sattr->index] &= 0x0F;
+ data->temp[HYSTERSIS][sattr->index] |= (val & 0xF) << 4;
+ } else {
+- data->temp[HYSTERSIS][sattr->index] &= 0x0F;
++ data->temp[HYSTERSIS][sattr->index] &= 0xF0;
+ data->temp[HYSTERSIS][sattr->index] |= (val & 0xF);
+ }
+
+@@ -554,11 +554,11 @@ static ssize_t temp_st_show(struct device *dev, struct device_attribute *attr,
+ val = data->enh_acoustics[0] & 0xf;
+ break;
+ case 1:
+- val = (data->enh_acoustics[1] >> 4) & 0xf;
++ val = data->enh_acoustics[1] & 0xf;
+ break;
+ case 2:
+ default:
+- val = data->enh_acoustics[1] & 0xf;
++ val = (data->enh_acoustics[1] >> 4) & 0xf;
+ break;
+ }
+
+@@ -1515,9 +1515,9 @@ static int adt7475_set_pwm_polarity(struct i2c_client *client)
+ int ret, i;
+ u8 val;
+
+- ret = of_property_read_u32_array(client->dev.of_node,
+- "adi,pwm-active-state", states,
+- ARRAY_SIZE(states));
++ ret = device_property_read_u32_array(&client->dev,
++ "adi,pwm-active-state", states,
++ ARRAY_SIZE(states));
+ if (ret)
+ return ret;
+
+diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
+index bb9211215a688..eaae5de2ab616 100644
+--- a/drivers/hwmon/coretemp.c
++++ b/drivers/hwmon/coretemp.c
+@@ -46,9 +46,6 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
+ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
+ #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
+
+-#define TO_CORE_ID(cpu) (cpu_data(cpu).cpu_core_id)
+-#define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
+-
+ #ifdef CONFIG_SMP
+ #define for_each_sibling(i, cpu) \
+ for_each_cpu(i, topology_sibling_cpumask(cpu))
+@@ -91,6 +88,8 @@ struct temp_data {
+ struct platform_data {
+ struct device *hwmon_dev;
+ u16 pkg_id;
++ u16 cpu_map[NUM_REAL_CORES];
++ struct ida ida;
+ struct cpumask cpumask;
+ struct temp_data *core_data[MAX_CORE_DATA];
+ struct device_attribute name_attr;
+@@ -243,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
+ */
+ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) {
+ for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) {
+- if (host_bridge->device == tjmax_pci_table[i].device)
++ if (host_bridge->device == tjmax_pci_table[i].device) {
++ pci_dev_put(host_bridge);
+ return tjmax_pci_table[i].tjmax;
++ }
+ }
+ }
++ pci_dev_put(host_bridge);
+
+ for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) {
+ if (strstr(c->x86_model_id, tjmax_table[i].id))
+@@ -441,7 +443,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
+ MSR_IA32_THERM_STATUS;
+ tdata->is_pkg_data = pkg_flag;
+ tdata->cpu = cpu;
+- tdata->cpu_core_id = TO_CORE_ID(cpu);
++ tdata->cpu_core_id = topology_core_id(cpu);
+ tdata->attr_size = MAX_CORE_ATTRS;
+ mutex_init(&tdata->update_lock);
+ return tdata;
+@@ -454,7 +456,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
+ struct platform_data *pdata = platform_get_drvdata(pdev);
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ u32 eax, edx;
+- int err, attr_no;
++ int err, index, attr_no;
+
+ /*
+ * Find attr number for sysfs:
+@@ -462,14 +464,26 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
+ * The attr number is always core id + 2
+ * The Pkgtemp will always show up as temp1_*, if available
+ */
+- attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
++ if (pkg_flag) {
++ attr_no = PKG_SYSFS_ATTR_NO;
++ } else {
++ index = ida_alloc(&pdata->ida, GFP_KERNEL);
++ if (index < 0)
++ return index;
++ pdata->cpu_map[index] = topology_core_id(cpu);
++ attr_no = index + BASE_SYSFS_ATTR_NO;
++ }
+
+- if (attr_no > MAX_CORE_DATA - 1)
+- return -ERANGE;
++ if (attr_no > MAX_CORE_DATA - 1) {
++ err = -ERANGE;
++ goto ida_free;
++ }
+
+ tdata = init_temp_data(cpu, pkg_flag);
+- if (!tdata)
+- return -ENOMEM;
++ if (!tdata) {
++ err = -ENOMEM;
++ goto ida_free;
++ }
+
+ /* Test if we can access the status register */
+ err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
+@@ -505,6 +519,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
+ exit_free:
+ pdata->core_data[attr_no] = NULL;
+ kfree(tdata);
++ida_free:
++ if (!pkg_flag)
++ ida_free(&pdata->ida, index);
+ return err;
+ }
+
+@@ -519,71 +536,63 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
+ {
+ struct temp_data *tdata = pdata->core_data[indx];
+
++ /* if we errored on add then this is already gone */
++ if (!tdata)
++ return;
++
+ /* Remove the sysfs attributes */
+ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
+
+ kfree(pdata->core_data[indx]);
+ pdata->core_data[indx] = NULL;
++
++ if (indx >= BASE_SYSFS_ATTR_NO)
++ ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
+ }
+
+-static int coretemp_probe(struct platform_device *pdev)
++static int coretemp_device_add(int zoneid)
+ {
+- struct device *dev = &pdev->dev;
++ struct platform_device *pdev;
+ struct platform_data *pdata;
++ int err;
+
+ /* Initialize the per-zone data structures */
+- pdata = devm_kzalloc(dev, sizeof(struct platform_data), GFP_KERNEL);
++ pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+- pdata->pkg_id = pdev->id;
+- platform_set_drvdata(pdev, pdata);
+-
+- pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME,
+- pdata, NULL);
+- return PTR_ERR_OR_ZERO(pdata->hwmon_dev);
+-}
++ pdata->pkg_id = zoneid;
++ ida_init(&pdata->ida);
+
+-static int coretemp_remove(struct platform_device *pdev)
+-{
+- struct platform_data *pdata = platform_get_drvdata(pdev);
+- int i;
++ pdev = platform_device_alloc(DRVNAME, zoneid);
++ if (!pdev) {
++ err = -ENOMEM;
++ goto err_free_pdata;
++ }
+
+- for (i = MAX_CORE_DATA - 1; i >= 0; --i)
+- if (pdata->core_data[i])
+- coretemp_remove_core(pdata, i);
++ err = platform_device_add(pdev);
++ if (err)
++ goto err_put_dev;
+
++ platform_set_drvdata(pdev, pdata);
++ zone_devices[zoneid] = pdev;
+ return 0;
+-}
+
+-static struct platform_driver coretemp_driver = {
+- .driver = {
+- .name = DRVNAME,
+- },
+- .probe = coretemp_probe,
+- .remove = coretemp_remove,
+-};
++err_put_dev:
++ platform_device_put(pdev);
++err_free_pdata:
++ kfree(pdata);
++ return err;
++}
+
+-static struct platform_device *coretemp_device_add(unsigned int cpu)
++static void coretemp_device_remove(int zoneid)
+ {
+- int err, zoneid = topology_logical_die_id(cpu);
+- struct platform_device *pdev;
+-
+- if (zoneid < 0)
+- return ERR_PTR(-ENOMEM);
+-
+- pdev = platform_device_alloc(DRVNAME, zoneid);
+- if (!pdev)
+- return ERR_PTR(-ENOMEM);
+-
+- err = platform_device_add(pdev);
+- if (err) {
+- platform_device_put(pdev);
+- return ERR_PTR(err);
+- }
++ struct platform_device *pdev = zone_devices[zoneid];
++ struct platform_data *pdata = platform_get_drvdata(pdev);
+
+- zone_devices[zoneid] = pdev;
+- return pdev;
++ ida_destroy(&pdata->ida);
++ kfree(pdata);
++ platform_device_unregister(pdev);
+ }
+
+ static int coretemp_cpu_online(unsigned int cpu)
+@@ -607,7 +616,10 @@ static int coretemp_cpu_online(unsigned int cpu)
+ if (!cpu_has(c, X86_FEATURE_DTHERM))
+ return -ENODEV;
+
+- if (!pdev) {
++ pdata = platform_get_drvdata(pdev);
++ if (!pdata->hwmon_dev) {
++ struct device *hwmon;
++
+ /* Check the microcode version of the CPU */
+ if (chk_ucode_version(cpu))
+ return -EINVAL;
+@@ -618,9 +630,11 @@ static int coretemp_cpu_online(unsigned int cpu)
+ * online. So, initialize per-pkg data structures and
+ * then bring this core online.
+ */
+- pdev = coretemp_device_add(cpu);
+- if (IS_ERR(pdev))
+- return PTR_ERR(pdev);
++ hwmon = hwmon_device_register_with_groups(&pdev->dev, DRVNAME,
++ pdata, NULL);
++ if (IS_ERR(hwmon))
++ return PTR_ERR(hwmon);
++ pdata->hwmon_dev = hwmon;
+
+ /*
+ * Check whether pkgtemp support is available.
+@@ -630,7 +644,6 @@ static int coretemp_cpu_online(unsigned int cpu)
+ coretemp_add_core(pdev, cpu, 1);
+ }
+
+- pdata = platform_get_drvdata(pdev);
+ /*
+ * Check whether a thread sibling is already online. If not add the
+ * interface for this CPU core.
+@@ -647,25 +660,28 @@ static int coretemp_cpu_offline(unsigned int cpu)
+ struct platform_device *pdev = coretemp_get_pdev(cpu);
+ struct platform_data *pd;
+ struct temp_data *tdata;
+- int indx, target;
++ int i, indx = -1, target;
+
+- /*
+- * Don't execute this on suspend as the device remove locks
+- * up the machine.
+- */
++ /* No need to tear down any interfaces for suspend */
+ if (cpuhp_tasks_frozen)
+ return 0;
+
+ /* If the physical CPU device does not exist, just return */
+- if (!pdev)
++ pd = platform_get_drvdata(pdev);
++ if (!pd->hwmon_dev)
+ return 0;
+
+- /* The core id is too big, just return */
+- indx = TO_ATTR_NO(cpu);
+- if (indx > MAX_CORE_DATA - 1)
++ for (i = 0; i < NUM_REAL_CORES; i++) {
++ if (pd->cpu_map[i] == topology_core_id(cpu)) {
++ indx = i + BASE_SYSFS_ATTR_NO;
++ break;
++ }
++ }
++
++ /* Too many cores and this core is not populated, just return */
++ if (indx < 0)
+ return 0;
+
+- pd = platform_get_drvdata(pdev);
+ tdata = pd->core_data[indx];
+
+ cpumask_clear_cpu(cpu, &pd->cpumask);
+@@ -685,13 +701,14 @@ static int coretemp_cpu_offline(unsigned int cpu)
+ }
+
+ /*
+- * If all cores in this pkg are offline, remove the device. This
+- * will invoke the platform driver remove function, which cleans up
+- * the rest.
++ * If all cores in this pkg are offline, remove the interface.
+ */
++ tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
+ if (cpumask_empty(&pd->cpumask)) {
+- zone_devices[topology_logical_die_id(cpu)] = NULL;
+- platform_device_unregister(pdev);
++ if (tdata)
++ coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
++ hwmon_device_unregister(pd->hwmon_dev);
++ pd->hwmon_dev = NULL;
+ return 0;
+ }
+
+@@ -699,7 +716,6 @@ static int coretemp_cpu_offline(unsigned int cpu)
+ * Check whether this core is the target for the package
+ * interface. We need to assign it to some other cpu.
+ */
+- tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
+ if (tdata && tdata->cpu == cpu) {
+ target = cpumask_first(&pd->cpumask);
+ mutex_lock(&tdata->update_lock);
+@@ -718,7 +734,7 @@ static enum cpuhp_state coretemp_hp_online;
+
+ static int __init coretemp_init(void)
+ {
+- int err;
++ int i, err;
+
+ /*
+ * CPUID.06H.EAX[0] indicates whether the CPU has thermal
+@@ -734,20 +750,22 @@ static int __init coretemp_init(void)
+ if (!zone_devices)
+ return -ENOMEM;
+
+- err = platform_driver_register(&coretemp_driver);
+- if (err)
+- goto outzone;
++ for (i = 0; i < max_zones; i++) {
++ err = coretemp_device_add(i);
++ if (err)
++ goto outzone;
++ }
+
+ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/coretemp:online",
+ coretemp_cpu_online, coretemp_cpu_offline);
+ if (err < 0)
+- goto outdrv;
++ goto outzone;
+ coretemp_hp_online = err;
+ return 0;
+
+-outdrv:
+- platform_driver_unregister(&coretemp_driver);
+ outzone:
++ while (i--)
++ coretemp_device_remove(i);
+ kfree(zone_devices);
+ return err;
+ }
+@@ -755,8 +773,11 @@ module_init(coretemp_init)
+
+ static void __exit coretemp_exit(void)
+ {
++ int i;
++
+ cpuhp_remove_state(coretemp_hp_online);
+- platform_driver_unregister(&coretemp_driver);
++ for (i = 0; i < max_zones; i++)
++ coretemp_device_remove(i);
+ kfree(zone_devices);
+ }
+ module_exit(coretemp_exit)
+diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c
+index 731d5117f9f10..14389fd7afb89 100644
+--- a/drivers/hwmon/corsair-psu.c
++++ b/drivers/hwmon/corsair-psu.c
+@@ -729,7 +729,7 @@ static int corsairpsu_probe(struct hid_device *hdev, const struct hid_device_id
+ corsairpsu_check_cmd_support(priv);
+
+ priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "corsairpsu", priv,
+- &corsairpsu_chip_info, 0);
++ &corsairpsu_chip_info, NULL);
+
+ if (IS_ERR(priv->hwmon_dev)) {
+ ret = PTR_ERR(priv->hwmon_dev);
+diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
+index 774c1b0715d91..597cbb4391bdc 100644
+--- a/drivers/hwmon/dell-smm-hwmon.c
++++ b/drivers/hwmon/dell-smm-hwmon.c
+@@ -326,7 +326,7 @@ static int i8k_enable_fan_auto_mode(const struct dell_smm_data *data, bool enabl
+ }
+
+ /*
+- * Set the fan speed (off, low, high). Returns the new fan status.
++ * Set the fan speed (off, low, high, ...).
+ */
+ static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed)
+ {
+@@ -338,7 +338,7 @@ static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed)
+ speed = (speed < 0) ? 0 : ((speed > data->i8k_fan_max) ? data->i8k_fan_max : speed);
+ regs.ebx = (fan & 0xff) | (speed << 8);
+
+- return i8k_smm(&regs) ? : i8k_get_fan_status(data, fan);
++ return i8k_smm(&regs);
+ }
+
+ static int __init i8k_get_temp_type(int sensor)
+@@ -452,7 +452,7 @@ static int
+ i8k_ioctl_unlocked(struct file *fp, struct dell_smm_data *data, unsigned int cmd, unsigned long arg)
+ {
+ int val = 0;
+- int speed;
++ int speed, err;
+ unsigned char buff[16];
+ int __user *argp = (int __user *)arg;
+
+@@ -513,7 +513,11 @@ i8k_ioctl_unlocked(struct file *fp, struct dell_smm_data *data, unsigned int cmd
+ if (copy_from_user(&speed, argp + 1, sizeof(int)))
+ return -EFAULT;
+
+- val = i8k_set_fan(data, val, speed);
++ err = i8k_set_fan(data, val, speed);
++ if (err < 0)
++ return err;
++
++ val = i8k_get_fan_status(data, val);
+ break;
+
+ default:
+@@ -623,10 +627,9 @@ static void __init i8k_init_procfs(struct device *dev)
+ {
+ struct dell_smm_data *data = dev_get_drvdata(dev);
+
+- /* Register the proc entry */
+- proc_create_data("i8k", 0, NULL, &i8k_proc_ops, data);
+-
+- devm_add_action_or_reset(dev, i8k_exit_procfs, NULL);
++ /* Only register exit function if creation was successful */
++ if (proc_create_data("i8k", 0, NULL, &i8k_proc_ops, data))
++ devm_add_action_or_reset(dev, i8k_exit_procfs, NULL);
+ }
+
+ #else
+@@ -1195,6 +1198,14 @@ static const struct dmi_system_id i8k_whitelist_fan_control[] __initconst = {
+ },
+ .driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3],
+ },
++ {
++ .ident = "Dell XPS 13 7390",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS 13 7390"),
++ },
++ .driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3],
++ },
+ { }
+ };
+
+diff --git a/drivers/hwmon/drivetemp.c b/drivers/hwmon/drivetemp.c
+index 1eb37106a220b..5bac2b0fc7bb6 100644
+--- a/drivers/hwmon/drivetemp.c
++++ b/drivers/hwmon/drivetemp.c
+@@ -621,3 +621,4 @@ module_exit(drivetemp_exit);
+ MODULE_AUTHOR("Guenter Roeck <linus@roeck-us.net>");
+ MODULE_DESCRIPTION("Hard drive temperature monitor");
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS("platform:drivetemp");
+diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
+index 4dec793fd07d5..94b35723ee7ad 100644
+--- a/drivers/hwmon/f71882fg.c
++++ b/drivers/hwmon/f71882fg.c
+@@ -1577,8 +1577,9 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr,
+ temp *= 125;
+ if (sign)
+ temp -= 128000;
+- } else
+- temp = data->temp[nr] * 1000;
++ } else {
++ temp = ((s8)data->temp[nr]) * 1000;
++ }
+
+ return sprintf(buf, "%d\n", temp);
+ }
+diff --git a/drivers/hwmon/ftsteutates.c b/drivers/hwmon/ftsteutates.c
+index ceffc76a0c515..2998d8cdce006 100644
+--- a/drivers/hwmon/ftsteutates.c
++++ b/drivers/hwmon/ftsteutates.c
+@@ -12,6 +12,7 @@
+ #include <linux/i2c.h>
+ #include <linux/init.h>
+ #include <linux/jiffies.h>
++#include <linux/math.h>
+ #include <linux/module.h>
+ #include <linux/mutex.h>
+ #include <linux/slab.h>
+@@ -347,13 +348,15 @@ static ssize_t in_value_show(struct device *dev,
+ {
+ struct fts_data *data = dev_get_drvdata(dev);
+ int index = to_sensor_dev_attr(devattr)->index;
+- int err;
++ int value, err;
+
+ err = fts_update_device(data);
+ if (err < 0)
+ return err;
+
+- return sprintf(buf, "%u\n", data->volt[index]);
++ value = DIV_ROUND_CLOSEST(data->volt[index] * 3300, 255);
++
++ return sprintf(buf, "%d\n", value);
+ }
+
+ static ssize_t temp_value_show(struct device *dev,
+@@ -361,13 +364,15 @@ static ssize_t temp_value_show(struct device *dev,
+ {
+ struct fts_data *data = dev_get_drvdata(dev);
+ int index = to_sensor_dev_attr(devattr)->index;
+- int err;
++ int value, err;
+
+ err = fts_update_device(data);
+ if (err < 0)
+ return err;
+
+- return sprintf(buf, "%u\n", data->temp_input[index]);
++ value = (data->temp_input[index] - 64) * 1000;
++
++ return sprintf(buf, "%d\n", value);
+ }
+
+ static ssize_t temp_fault_show(struct device *dev,
+@@ -436,13 +441,15 @@ static ssize_t fan_value_show(struct device *dev,
+ {
+ struct fts_data *data = dev_get_drvdata(dev);
+ int index = to_sensor_dev_attr(devattr)->index;
+- int err;
++ int value, err;
+
+ err = fts_update_device(data);
+ if (err < 0)
+ return err;
+
+- return sprintf(buf, "%u\n", data->fan_input[index]);
++ value = data->fan_input[index] * 60;
++
++ return sprintf(buf, "%d\n", value);
+ }
+
+ static ssize_t fan_source_show(struct device *dev,
+diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
+index befe989ca7b94..fbf3f5a4ecb67 100644
+--- a/drivers/hwmon/gpio-fan.c
++++ b/drivers/hwmon/gpio-fan.c
+@@ -391,6 +391,9 @@ static int gpio_fan_set_cur_state(struct thermal_cooling_device *cdev,
+ if (!fan_data)
+ return -EINVAL;
+
++ if (state >= fan_data->num_speed)
++ return -EINVAL;
++
+ set_fan_speed(fan_data, state);
+ return 0;
+ }
+diff --git a/drivers/hwmon/gsc-hwmon.c b/drivers/hwmon/gsc-hwmon.c
+index 1fe37418ff46c..89d036bf88df7 100644
+--- a/drivers/hwmon/gsc-hwmon.c
++++ b/drivers/hwmon/gsc-hwmon.c
+@@ -82,8 +82,8 @@ static ssize_t pwm_auto_point_temp_store(struct device *dev,
+ if (kstrtol(buf, 10, &temp))
+ return -EINVAL;
+
+- temp = clamp_val(temp, 0, 10000);
+- temp = DIV_ROUND_CLOSEST(temp, 10);
++ temp = clamp_val(temp, 0, 100000);
++ temp = DIV_ROUND_CLOSEST(temp, 100);
+
+ regs[0] = temp & 0xff;
+ regs[1] = (temp >> 8) & 0xff;
+@@ -100,7 +100,7 @@ static ssize_t pwm_auto_point_pwm_show(struct device *dev,
+ {
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+
+- return sprintf(buf, "%d\n", 255 * (50 + (attr->index * 10)) / 100);
++ return sprintf(buf, "%d\n", 255 * (50 + (attr->index * 10)));
+ }
+
+ static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point1_pwm, pwm_auto_point_pwm, 0);
+@@ -267,6 +267,7 @@ gsc_hwmon_get_devtree_pdata(struct device *dev)
+ pdata->nchannels = nchannels;
+
+ /* fan controller base address */
++ of_node_get(dev->parent->of_node);
+ fan = of_find_compatible_node(dev->parent->of_node, NULL, "gw,gsc-fan");
+ if (fan && of_property_read_u32(fan, "reg", &pdata->fan_base)) {
+ dev_err(dev, "fan node without base\n");
+diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
+index 8d3b1dae31df1..fd3b277d340a9 100644
+--- a/drivers/hwmon/hwmon.c
++++ b/drivers/hwmon/hwmon.c
+@@ -214,12 +214,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, int index)
+
+ tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata,
+ &hwmon_thermal_ops);
+- /*
+- * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
+- * so ignore that error but forward any other error.
+- */
+- if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
+- return PTR_ERR(tzd);
++ if (IS_ERR(tzd)) {
++ if (PTR_ERR(tzd) != -ENODEV)
++ return PTR_ERR(tzd);
++ dev_info(dev, "temp%d_input not attached to any thermal zone\n",
++ index + 1);
++ devm_kfree(dev, tdata);
++ return 0;
++ }
+
+ err = devm_add_action(dev, hwmon_thermal_remove_sensor, &tdata->node);
+ if (err)
+@@ -734,6 +736,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
+ {
+ struct hwmon_device *hwdev;
+ struct device *hdev;
++ struct device *tdev = dev;
+ int i, err, id;
+
+ /* Complain about invalid characters in hwmon name attribute */
+@@ -791,17 +794,21 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
+ hwdev->name = name;
+ hdev->class = &hwmon_class;
+ hdev->parent = dev;
+- hdev->of_node = dev ? dev->of_node : NULL;
++ while (tdev && !tdev->of_node)
++ tdev = tdev->parent;
++ hdev->of_node = tdev ? tdev->of_node : NULL;
+ hwdev->chip = chip;
+ dev_set_drvdata(hdev, drvdata);
+ dev_set_name(hdev, HWMON_ID_FORMAT, id);
+ err = device_register(hdev);
+- if (err)
+- goto free_hwmon;
++ if (err) {
++ put_device(hdev);
++ goto ida_remove;
++ }
+
+ INIT_LIST_HEAD(&hwdev->tzdata);
+
+- if (dev && dev->of_node && chip && chip->ops->read &&
++ if (hdev->of_node && chip && chip->ops->read &&
+ chip->info[0]->type == hwmon_chip &&
+ (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) {
+ err = hwmon_thermal_register_sensors(hdev);
+diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c
+index 360f5aee13947..d4be03f43fb45 100644
+--- a/drivers/hwmon/i5500_temp.c
++++ b/drivers/hwmon/i5500_temp.c
+@@ -108,7 +108,7 @@ static int i5500_temp_probe(struct pci_dev *pdev,
+ u32 tstimer;
+ s8 tsfsc;
+
+- err = pci_enable_device(pdev);
++ err = pcim_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to enable device\n");
+ return err;
+diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c
+index a4ec85207782d..2e6d6a5cffa16 100644
+--- a/drivers/hwmon/ibmaem.c
++++ b/drivers/hwmon/ibmaem.c
+@@ -550,7 +550,7 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle)
+
+ res = platform_device_add(data->pdev);
+ if (res)
+- goto ipmi_err;
++ goto dev_add_err;
+
+ platform_set_drvdata(data->pdev, data);
+
+@@ -598,7 +598,9 @@ hwmon_reg_err:
+ ipmi_destroy_user(data->ipmi.user);
+ ipmi_err:
+ platform_set_drvdata(data->pdev, NULL);
+- platform_device_unregister(data->pdev);
++ platform_device_del(data->pdev);
++dev_add_err:
++ platform_device_put(data->pdev);
+ dev_err:
+ ida_simple_remove(&aem_ida, data->id);
+ id_err:
+@@ -690,7 +692,7 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe,
+
+ res = platform_device_add(data->pdev);
+ if (res)
+- goto ipmi_err;
++ goto dev_add_err;
+
+ platform_set_drvdata(data->pdev, data);
+
+@@ -738,7 +740,9 @@ hwmon_reg_err:
+ ipmi_destroy_user(data->ipmi.user);
+ ipmi_err:
+ platform_set_drvdata(data->pdev, NULL);
+- platform_device_unregister(data->pdev);
++ platform_device_del(data->pdev);
++dev_add_err:
++ platform_device_put(data->pdev);
+ dev_err:
+ ida_simple_remove(&aem_ida, data->id);
+ id_err:
+diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
+index b2ab83c9fd9a8..fe90f0536d76c 100644
+--- a/drivers/hwmon/ibmpex.c
++++ b/drivers/hwmon/ibmpex.c
+@@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev)
+ return;
+
+ out_register:
++ list_del(&data->list);
+ hwmon_device_unregister(data->hwmon_dev);
+ out_user:
+ ipmi_destroy_user(data->user);
+diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
+index 58d3828e2ec0c..bc90631148ea4 100644
+--- a/drivers/hwmon/ina3221.c
++++ b/drivers/hwmon/ina3221.c
+@@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg,
+ * Shunt Voltage Sum register has 14-bit value with 1-bit shift
+ * Other Shunt Voltage registers have 12 bits with 3-bit shift
+ */
+- if (reg == INA3221_SHUNT_SUM)
++ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM)
+ *val = sign_extend32(regval >> 1, 14);
+ else
+ *val = sign_extend32(regval >> 3, 12);
+@@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr,
+ * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV
+ * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV
+ */
+- if (reg == INA3221_SHUNT_SUM)
++ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM)
+ regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe;
+ else
+ regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8;
+@@ -772,7 +772,7 @@ static int ina3221_probe_child_from_dt(struct device *dev,
+ return ret;
+ } else if (val > INA3221_CHANNEL3) {
+ dev_err(dev, "invalid reg %d of %pOFn\n", val, child);
+- return ret;
++ return -EINVAL;
+ }
+
+ input = &ina->inputs[val];
+diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
+index 1f93134afcb9f..485d68ab79e17 100644
+--- a/drivers/hwmon/it87.c
++++ b/drivers/hwmon/it87.c
+@@ -486,6 +486,8 @@ static const struct it87_devices it87_devices[] = {
+ #define has_pwm_freq2(data) ((data)->features & FEAT_PWM_FREQ2)
+ #define has_six_temp(data) ((data)->features & FEAT_SIX_TEMP)
+ #define has_vin3_5v(data) ((data)->features & FEAT_VIN3_5V)
++#define has_scaling(data) ((data)->features & (FEAT_12MV_ADC | \
++ FEAT_10_9MV_ADC))
+
+ struct it87_sio_data {
+ int sioaddr;
+@@ -3098,7 +3100,7 @@ static int it87_probe(struct platform_device *pdev)
+ "Detected broken BIOS defaults, disabling PWM interface\n");
+
+ /* Starting with IT8721F, we handle scaling of internal voltages */
+- if (has_12mv_adc(data)) {
++ if (has_scaling(data)) {
+ if (sio_data->internal & BIT(0))
+ data->in_scaled |= BIT(3); /* in3 is AVCC */
+ if (sio_data->internal & BIT(1))
+diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
+index 4a03d010ec5a8..52f341d46029b 100644
+--- a/drivers/hwmon/jc42.c
++++ b/drivers/hwmon/jc42.c
+@@ -19,6 +19,7 @@
+ #include <linux/err.h>
+ #include <linux/mutex.h>
+ #include <linux/of.h>
++#include <linux/regmap.h>
+
+ /* Addresses to scan */
+ static const unsigned short normal_i2c[] = {
+@@ -189,31 +190,14 @@ static struct jc42_chips jc42_chips[] = {
+ { STM_MANID, STTS3000_DEVID, STTS3000_DEVID_MASK },
+ };
+
+-enum temp_index {
+- t_input = 0,
+- t_crit,
+- t_min,
+- t_max,
+- t_num_temp
+-};
+-
+-static const u8 temp_regs[t_num_temp] = {
+- [t_input] = JC42_REG_TEMP,
+- [t_crit] = JC42_REG_TEMP_CRITICAL,
+- [t_min] = JC42_REG_TEMP_LOWER,
+- [t_max] = JC42_REG_TEMP_UPPER,
+-};
+-
+ /* Each client has this additional data */
+ struct jc42_data {
+- struct i2c_client *client;
+ struct mutex update_lock; /* protect register access */
++ struct regmap *regmap;
+ bool extended; /* true if extended range supported */
+ bool valid;
+- unsigned long last_updated; /* In jiffies */
+ u16 orig_config; /* original configuration */
+ u16 config; /* current configuration */
+- u16 temp[t_num_temp];/* Temperatures */
+ };
+
+ #define JC42_TEMP_MIN_EXTENDED (-40000)
+@@ -238,85 +222,102 @@ static int jc42_temp_from_reg(s16 reg)
+ return reg * 125 / 2;
+ }
+
+-static struct jc42_data *jc42_update_device(struct device *dev)
+-{
+- struct jc42_data *data = dev_get_drvdata(dev);
+- struct i2c_client *client = data->client;
+- struct jc42_data *ret = data;
+- int i, val;
+-
+- mutex_lock(&data->update_lock);
+-
+- if (time_after(jiffies, data->last_updated + HZ) || !data->valid) {
+- for (i = 0; i < t_num_temp; i++) {
+- val = i2c_smbus_read_word_swapped(client, temp_regs[i]);
+- if (val < 0) {
+- ret = ERR_PTR(val);
+- goto abort;
+- }
+- data->temp[i] = val;
+- }
+- data->last_updated = jiffies;
+- data->valid = true;
+- }
+-abort:
+- mutex_unlock(&data->update_lock);
+- return ret;
+-}
+-
+ static int jc42_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+ {
+- struct jc42_data *data = jc42_update_device(dev);
+- int temp, hyst;
++ struct jc42_data *data = dev_get_drvdata(dev);
++ unsigned int regval;
++ int ret, temp, hyst;
+
+- if (IS_ERR(data))
+- return PTR_ERR(data);
++ mutex_lock(&data->update_lock);
+
+ switch (attr) {
+ case hwmon_temp_input:
+- *val = jc42_temp_from_reg(data->temp[t_input]);
+- return 0;
++ ret = regmap_read(data->regmap, JC42_REG_TEMP, &regval);
++ if (ret)
++ break;
++
++ *val = jc42_temp_from_reg(regval);
++ break;
+ case hwmon_temp_min:
+- *val = jc42_temp_from_reg(data->temp[t_min]);
+- return 0;
++ ret = regmap_read(data->regmap, JC42_REG_TEMP_LOWER, &regval);
++ if (ret)
++ break;
++
++ *val = jc42_temp_from_reg(regval);
++ break;
+ case hwmon_temp_max:
+- *val = jc42_temp_from_reg(data->temp[t_max]);
+- return 0;
++ ret = regmap_read(data->regmap, JC42_REG_TEMP_UPPER, &regval);
++ if (ret)
++ break;
++
++ *val = jc42_temp_from_reg(regval);
++ break;
+ case hwmon_temp_crit:
+- *val = jc42_temp_from_reg(data->temp[t_crit]);
+- return 0;
++ ret = regmap_read(data->regmap, JC42_REG_TEMP_CRITICAL,
++ &regval);
++ if (ret)
++ break;
++
++ *val = jc42_temp_from_reg(regval);
++ break;
+ case hwmon_temp_max_hyst:
+- temp = jc42_temp_from_reg(data->temp[t_max]);
++ ret = regmap_read(data->regmap, JC42_REG_TEMP_UPPER, &regval);
++ if (ret)
++ break;
++
++ temp = jc42_temp_from_reg(regval);
+ hyst = jc42_hysteresis[(data->config & JC42_CFG_HYST_MASK)
+ >> JC42_CFG_HYST_SHIFT];
+ *val = temp - hyst;
+- return 0;
++ break;
+ case hwmon_temp_crit_hyst:
+- temp = jc42_temp_from_reg(data->temp[t_crit]);
++ ret = regmap_read(data->regmap, JC42_REG_TEMP_CRITICAL,
++ &regval);
++ if (ret)
++ break;
++
++ temp = jc42_temp_from_reg(regval);
+ hyst = jc42_hysteresis[(data->config & JC42_CFG_HYST_MASK)
+ >> JC42_CFG_HYST_SHIFT];
+ *val = temp - hyst;
+- return 0;
++ break;
+ case hwmon_temp_min_alarm:
+- *val = (data->temp[t_input] >> JC42_ALARM_MIN_BIT) & 1;
+- return 0;
++ ret = regmap_read(data->regmap, JC42_REG_TEMP, &regval);
++ if (ret)
++ break;
++
++ *val = (regval >> JC42_ALARM_MIN_BIT) & 1;
++ break;
+ case hwmon_temp_max_alarm:
+- *val = (data->temp[t_input] >> JC42_ALARM_MAX_BIT) & 1;
+- return 0;
++ ret = regmap_read(data->regmap, JC42_REG_TEMP, &regval);
++ if (ret)
++ break;
++
++ *val = (regval >> JC42_ALARM_MAX_BIT) & 1;
++ break;
+ case hwmon_temp_crit_alarm:
+- *val = (data->temp[t_input] >> JC42_ALARM_CRIT_BIT) & 1;
+- return 0;
++ ret = regmap_read(data->regmap, JC42_REG_TEMP, &regval);
++ if (ret)
++ break;
++
++ *val = (regval >> JC42_ALARM_CRIT_BIT) & 1;
++ break;
+ default:
+- return -EOPNOTSUPP;
++ ret = -EOPNOTSUPP;
++ break;
+ }
++
++ mutex_unlock(&data->update_lock);
++
++ return ret;
+ }
+
+ static int jc42_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+ {
+ struct jc42_data *data = dev_get_drvdata(dev);
+- struct i2c_client *client = data->client;
++ unsigned int regval;
+ int diff, hyst;
+ int ret;
+
+@@ -324,21 +325,23 @@ static int jc42_write(struct device *dev, enum hwmon_sensor_types type,
+
+ switch (attr) {
+ case hwmon_temp_min:
+- data->temp[t_min] = jc42_temp_to_reg(val, data->extended);
+- ret = i2c_smbus_write_word_swapped(client, temp_regs[t_min],
+- data->temp[t_min]);
++ ret = regmap_write(data->regmap, JC42_REG_TEMP_LOWER,
++ jc42_temp_to_reg(val, data->extended));
+ break;
+ case hwmon_temp_max:
+- data->temp[t_max] = jc42_temp_to_reg(val, data->extended);
+- ret = i2c_smbus_write_word_swapped(client, temp_regs[t_max],
+- data->temp[t_max]);
++ ret = regmap_write(data->regmap, JC42_REG_TEMP_UPPER,
++ jc42_temp_to_reg(val, data->extended));
+ break;
+ case hwmon_temp_crit:
+- data->temp[t_crit] = jc42_temp_to_reg(val, data->extended);
+- ret = i2c_smbus_write_word_swapped(client, temp_regs[t_crit],
+- data->temp[t_crit]);
++ ret = regmap_write(data->regmap, JC42_REG_TEMP_CRITICAL,
++ jc42_temp_to_reg(val, data->extended));
+ break;
+ case hwmon_temp_crit_hyst:
++ ret = regmap_read(data->regmap, JC42_REG_TEMP_CRITICAL,
++ &regval);
++ if (ret)
++ break;
++
+ /*
+ * JC42.4 compliant chips only support four hysteresis values.
+ * Pick best choice and go from there.
+@@ -346,7 +349,7 @@ static int jc42_write(struct device *dev, enum hwmon_sensor_types type,
+ val = clamp_val(val, (data->extended ? JC42_TEMP_MIN_EXTENDED
+ : JC42_TEMP_MIN) - 6000,
+ JC42_TEMP_MAX);
+- diff = jc42_temp_from_reg(data->temp[t_crit]) - val;
++ diff = jc42_temp_from_reg(regval) - val;
+ hyst = 0;
+ if (diff > 0) {
+ if (diff < 2250)
+@@ -358,9 +361,8 @@ static int jc42_write(struct device *dev, enum hwmon_sensor_types type,
+ }
+ data->config = (data->config & ~JC42_CFG_HYST_MASK) |
+ (hyst << JC42_CFG_HYST_SHIFT);
+- ret = i2c_smbus_write_word_swapped(data->client,
+- JC42_REG_CONFIG,
+- data->config);
++ ret = regmap_write(data->regmap, JC42_REG_CONFIG,
++ data->config);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+@@ -458,51 +460,80 @@ static const struct hwmon_chip_info jc42_chip_info = {
+ .info = jc42_info,
+ };
+
++static bool jc42_readable_reg(struct device *dev, unsigned int reg)
++{
++ return (reg >= JC42_REG_CAP && reg <= JC42_REG_DEVICEID) ||
++ reg == JC42_REG_SMBUS;
++}
++
++static bool jc42_writable_reg(struct device *dev, unsigned int reg)
++{
++ return (reg >= JC42_REG_CONFIG && reg <= JC42_REG_TEMP_CRITICAL) ||
++ reg == JC42_REG_SMBUS;
++}
++
++static bool jc42_volatile_reg(struct device *dev, unsigned int reg)
++{
++ return reg == JC42_REG_CONFIG || reg == JC42_REG_TEMP;
++}
++
++static const struct regmap_config jc42_regmap_config = {
++ .reg_bits = 8,
++ .val_bits = 16,
++ .val_format_endian = REGMAP_ENDIAN_BIG,
++ .max_register = JC42_REG_SMBUS,
++ .writeable_reg = jc42_writable_reg,
++ .readable_reg = jc42_readable_reg,
++ .volatile_reg = jc42_volatile_reg,
++ .cache_type = REGCACHE_RBTREE,
++};
++
+ static int jc42_probe(struct i2c_client *client)
+ {
+ struct device *dev = &client->dev;
+ struct device *hwmon_dev;
++ unsigned int config, cap;
+ struct jc42_data *data;
+- int config, cap;
++ int ret;
+
+ data = devm_kzalloc(dev, sizeof(struct jc42_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+- data->client = client;
++ data->regmap = devm_regmap_init_i2c(client, &jc42_regmap_config);
++ if (IS_ERR(data->regmap))
++ return PTR_ERR(data->regmap);
++
+ i2c_set_clientdata(client, data);
+ mutex_init(&data->update_lock);
+
+- cap = i2c_smbus_read_word_swapped(client, JC42_REG_CAP);
+- if (cap < 0)
+- return cap;
++ ret = regmap_read(data->regmap, JC42_REG_CAP, &cap);
++ if (ret)
++ return ret;
+
+ data->extended = !!(cap & JC42_CAP_RANGE);
+
+ if (device_property_read_bool(dev, "smbus-timeout-disable")) {
+- int smbus;
+-
+ /*
+ * Not all chips support this register, but from a
+ * quick read of various datasheets no chip appears
+ * incompatible with the below attempt to disable
+ * the timeout. And the whole thing is opt-in...
+ */
+- smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS);
+- if (smbus < 0)
+- return smbus;
+- i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS,
+- smbus | SMBUS_STMOUT);
++ ret = regmap_set_bits(data->regmap, JC42_REG_SMBUS,
++ SMBUS_STMOUT);
++ if (ret)
++ return ret;
+ }
+
+- config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG);
+- if (config < 0)
+- return config;
++ ret = regmap_read(data->regmap, JC42_REG_CONFIG, &config);
++ if (ret)
++ return ret;
+
+ data->orig_config = config;
+ if (config & JC42_CFG_SHUTDOWN) {
+ config &= ~JC42_CFG_SHUTDOWN;
+- i2c_smbus_write_word_swapped(client, JC42_REG_CONFIG, config);
++ regmap_write(data->regmap, JC42_REG_CONFIG, config);
+ }
+ data->config = config;
+
+@@ -523,7 +554,7 @@ static int jc42_remove(struct i2c_client *client)
+
+ config = (data->orig_config & ~JC42_CFG_HYST_MASK)
+ | (data->config & JC42_CFG_HYST_MASK);
+- i2c_smbus_write_word_swapped(client, JC42_REG_CONFIG, config);
++ regmap_write(data->regmap, JC42_REG_CONFIG, config);
+ }
+ return 0;
+ }
+@@ -535,8 +566,11 @@ static int jc42_suspend(struct device *dev)
+ struct jc42_data *data = dev_get_drvdata(dev);
+
+ data->config |= JC42_CFG_SHUTDOWN;
+- i2c_smbus_write_word_swapped(data->client, JC42_REG_CONFIG,
+- data->config);
++ regmap_write(data->regmap, JC42_REG_CONFIG, data->config);
++
++ regcache_cache_only(data->regmap, true);
++ regcache_mark_dirty(data->regmap);
++
+ return 0;
+ }
+
+@@ -544,10 +578,13 @@ static int jc42_resume(struct device *dev)
+ {
+ struct jc42_data *data = dev_get_drvdata(dev);
+
++ regcache_cache_only(data->regmap, false);
++
+ data->config &= ~JC42_CFG_SHUTDOWN;
+- i2c_smbus_write_word_swapped(data->client, JC42_REG_CONFIG,
+- data->config);
+- return 0;
++ regmap_write(data->regmap, JC42_REG_CONFIG, data->config);
++
++ /* Restore cached register values to hardware */
++ return regcache_sync(data->regmap);
+ }
+
+ static const struct dev_pm_ops jc42_dev_pm_ops = {
+diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
+index 3618a924e78e4..f3cff6c9f7457 100644
+--- a/drivers/hwmon/k10temp.c
++++ b/drivers/hwmon/k10temp.c
+@@ -75,6 +75,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
+
+ #define ZEN_CUR_TEMP_SHIFT 21
+ #define ZEN_CUR_TEMP_RANGE_SEL_MASK BIT(19)
++#define ZEN_CUR_TEMP_TJ_SEL_MASK GENMASK(17, 16)
+
+ #define ZEN_SVI_BASE 0x0005A000
+
+@@ -96,6 +97,13 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
+ #define F19H_M01H_CFACTOR_ICORE 1000000 /* 1A / LSB */
+ #define F19H_M01H_CFACTOR_ISOC 310000 /* 0.31A / LSB */
+
++/*
++ * AMD's Industrial processor 3255 supports temperature from -40 deg to 105 deg Celsius.
++ * Use the model name to identify 3255 CPUs and set a flag to display negative temperature.
++ * Do not round off to zero for negative Tctl or Tdie values if the flag is set
++ */
++#define AMD_I3255_STR "3255"
++
+ struct k10temp_data {
+ struct pci_dev *pdev;
+ void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
+@@ -105,6 +113,7 @@ struct k10temp_data {
+ u32 show_temp;
+ bool is_zen;
+ u32 ccd_offset;
++ bool disp_negative;
+ };
+
+ #define TCTL_BIT 0
+@@ -175,7 +184,8 @@ static long get_raw_temp(struct k10temp_data *data)
+
+ data->read_tempreg(data->pdev, &regval);
+ temp = (regval >> ZEN_CUR_TEMP_SHIFT) * 125;
+- if (regval & data->temp_adjust_mask)
++ if ((regval & data->temp_adjust_mask) ||
++ (regval & ZEN_CUR_TEMP_TJ_SEL_MASK) == ZEN_CUR_TEMP_TJ_SEL_MASK)
+ temp -= 49000;
+ return temp;
+ }
+@@ -218,12 +228,12 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
+ switch (channel) {
+ case 0: /* Tctl */
+ *val = get_raw_temp(data);
+- if (*val < 0)
++ if (*val < 0 && !data->disp_negative)
+ *val = 0;
+ break;
+ case 1: /* Tdie */
+ *val = get_raw_temp(data) - data->temp_offset;
+- if (*val < 0)
++ if (*val < 0 && !data->disp_negative)
+ *val = 0;
+ break;
+ case 2 ... 9: /* Tccd{1-8} */
+@@ -415,6 +425,11 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ data->pdev = pdev;
+ data->show_temp |= BIT(TCTL_BIT); /* Always show Tctl */
+
++ if (boot_cpu_data.x86 == 0x17 &&
++ strstr(boot_cpu_data.x86_model_id, AMD_I3255_STR)) {
++ data->disp_negative = true;
++ }
++
+ if (boot_cpu_data.x86 == 0x15 &&
+ ((boot_cpu_data.x86_model & 0xf0) == 0x60 ||
+ (boot_cpu_data.x86_model & 0xf0) == 0x70)) {
+diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
+index 567b7c521f388..1c9493c708132 100644
+--- a/drivers/hwmon/lm90.c
++++ b/drivers/hwmon/lm90.c
+@@ -35,13 +35,14 @@
+ * explicitly as max6659, or if its address is not 0x4c.
+ * These chips lack the remote temperature offset feature.
+ *
+- * This driver also supports the MAX6654 chip made by Maxim. This chip can
+- * be at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is
+- * otherwise similar to MAX6657/MAX6658/MAX6659. Extended range is available
+- * by setting the configuration register accordingly, and is done during
+- * initialization. Extended precision is only available at conversion rates
+- * of 1 Hz and slower. Note that extended precision is not enabled by
+- * default, as this driver initializes all chips to 2 Hz by design.
++ * This driver also supports the MAX6654 chip made by Maxim. This chip can be
++ * at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is similar
++ * to MAX6657/MAX6658/MAX6659, but does not support critical temperature
++ * limits. Extended range is available by setting the configuration register
++ * accordingly, and is done during initialization. Extended precision is only
++ * available at conversion rates of 1 Hz and slower. Note that extended
++ * precision is not enabled by default, as this driver initializes all chips
++ * to 2 Hz by design.
+ *
+ * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and
+ * MAX6692 chips made by Maxim. These are again similar to the LM86,
+@@ -69,10 +70,10 @@
+ * This driver also supports the G781 from GMT. This device is compatible
+ * with the ADM1032.
+ *
+- * This driver also supports TMP451 from Texas Instruments. This device is
+- * supported in both compatibility and extended mode. It's mostly compatible
+- * with ADT7461 except for local temperature low byte register and max
+- * conversion rate.
++ * This driver also supports TMP451 and TMP461 from Texas Instruments.
++ * Those devices are supported in both compatibility and extended mode.
++ * They are mostly compatible with ADT7461 except for local temperature
++ * low byte register and max conversion rate.
+ *
+ * Since the LM90 was the first chipset supported by this driver, most
+ * comments will refer to this chipset, but are actually general and
+@@ -112,7 +113,7 @@ static const unsigned short normal_i2c[] = {
+ 0x4d, 0x4e, 0x4f, I2C_CLIENT_END };
+
+ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
+- max6646, w83l771, max6696, sa56004, g781, tmp451, max6654 };
++ max6646, w83l771, max6696, sa56004, g781, tmp451, tmp461, max6654 };
+
+ /*
+ * The LM90 registers
+@@ -168,8 +169,12 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
+
+ #define LM90_MAX_CONVRATE_MS 16000 /* Maximum conversion rate in ms */
+
+-/* TMP451 registers */
++/* TMP451/TMP461 registers */
+ #define TMP451_REG_R_LOCAL_TEMPL 0x15
++#define TMP451_REG_CONALERT 0x22
++
++#define TMP461_REG_CHEN 0x16
++#define TMP461_REG_DFC 0x24
+
+ /*
+ * Device flags
+@@ -182,7 +187,10 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
+ #define LM90_HAVE_EMERGENCY_ALARM (1 << 5)/* emergency alarm */
+ #define LM90_HAVE_TEMP3 (1 << 6) /* 3rd temperature sensor */
+ #define LM90_HAVE_BROKEN_ALERT (1 << 7) /* Broken alert */
+-#define LM90_PAUSE_FOR_CONFIG (1 << 8) /* Pause conversion for config */
++#define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/
++#define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */
++#define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */
++#define LM90_HAVE_CRIT_ALRM_SWP (1 << 11)/* critical alarm bits swapped */
+
+ /* LM90 status */
+ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */
+@@ -192,6 +200,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
+ #define LM90_STATUS_RHIGH (1 << 4) /* remote high temp limit tripped */
+ #define LM90_STATUS_LLOW (1 << 5) /* local low temp limit tripped */
+ #define LM90_STATUS_LHIGH (1 << 6) /* local high temp limit tripped */
++#define LM90_STATUS_BUSY (1 << 7) /* conversion is ongoing */
+
+ #define MAX6696_STATUS2_R2THRM (1 << 1) /* remote2 THERM limit tripped */
+ #define MAX6696_STATUS2_R2OPEN (1 << 2) /* remote2 is an open circuit */
+@@ -229,6 +238,7 @@ static const struct i2c_device_id lm90_id[] = {
+ { "w83l771", w83l771 },
+ { "sa56004", sa56004 },
+ { "tmp451", tmp451 },
++ { "tmp461", tmp461 },
+ { }
+ };
+ MODULE_DEVICE_TABLE(i2c, lm90_id);
+@@ -326,6 +336,10 @@ static const struct of_device_id __maybe_unused lm90_of_match[] = {
+ .compatible = "ti,tmp451",
+ .data = (void *)tmp451
+ },
++ {
++ .compatible = "ti,tmp461",
++ .data = (void *)tmp461
++ },
+ { },
+ };
+ MODULE_DEVICE_TABLE(of, lm90_of_match);
+@@ -344,85 +358,99 @@ struct lm90_params {
+ static const struct lm90_params lm90_params[] = {
+ [adm1032] = {
+ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
+- | LM90_HAVE_BROKEN_ALERT,
++ | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 10,
+ },
+ [adt7461] = {
+ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
+- | LM90_HAVE_BROKEN_ALERT,
++ | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP
++ | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 10,
+ },
+ [g781] = {
+ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
+- | LM90_HAVE_BROKEN_ALERT,
++ | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7c,
+- .max_convrate = 8,
++ .max_convrate = 7,
+ },
+ [lm86] = {
+- .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT,
++ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
++ | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7b,
+ .max_convrate = 9,
+ },
+ [lm90] = {
+- .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT,
++ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
++ | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7b,
+ .max_convrate = 9,
+ },
+ [lm99] = {
+- .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT,
++ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
++ | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7b,
+ .max_convrate = 9,
+ },
+ [max6646] = {
++ .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 6,
+ .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
+ },
+ [max6654] = {
++ .flags = LM90_HAVE_BROKEN_ALERT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 7,
+ .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
+ },
+ [max6657] = {
+- .flags = LM90_PAUSE_FOR_CONFIG,
++ .flags = LM90_PAUSE_FOR_CONFIG | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 8,
+ .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
+ },
+ [max6659] = {
+- .flags = LM90_HAVE_EMERGENCY,
++ .flags = LM90_HAVE_EMERGENCY | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 8,
+ .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
+ },
+ [max6680] = {
+- .flags = LM90_HAVE_OFFSET,
++ .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT
++ | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 7,
+ },
+ [max6696] = {
+ .flags = LM90_HAVE_EMERGENCY
+- | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3,
++ | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3 | LM90_HAVE_CRIT,
+ .alert_alarms = 0x1c7c,
+ .max_convrate = 6,
+ .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL,
+ },
+ [w83l771] = {
+- .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT,
++ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 8,
+ },
+ [sa56004] = {
+- .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT,
++ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7b,
+ .max_convrate = 9,
+ .reg_local_ext = SA56004_REG_R_LOCAL_TEMPL,
+ },
+ [tmp451] = {
+ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
+- | LM90_HAVE_BROKEN_ALERT,
++ | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT,
++ .alert_alarms = 0x7c,
++ .max_convrate = 9,
++ .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL,
++ },
++ [tmp461] = {
++ .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT
++ | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT,
+ .alert_alarms = 0x7c,
+ .max_convrate = 9,
+ .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL,
+@@ -651,20 +679,22 @@ static int lm90_update_limits(struct device *dev)
+ struct i2c_client *client = data->client;
+ int val;
+
+- val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT);
+- if (val < 0)
+- return val;
+- data->temp8[LOCAL_CRIT] = val;
++ if (data->flags & LM90_HAVE_CRIT) {
++ val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT);
++ if (val < 0)
++ return val;
++ data->temp8[LOCAL_CRIT] = val;
+
+- val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT);
+- if (val < 0)
+- return val;
+- data->temp8[REMOTE_CRIT] = val;
++ val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT);
++ if (val < 0)
++ return val;
++ data->temp8[REMOTE_CRIT] = val;
+
+- val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST);
+- if (val < 0)
+- return val;
+- data->temp_hyst = val;
++ val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST);
++ if (val < 0)
++ return val;
++ data->temp_hyst = val;
++ }
+
+ val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH);
+ if (val < 0)
+@@ -792,7 +822,7 @@ static int lm90_update_device(struct device *dev)
+ val = lm90_read_reg(client, LM90_REG_R_STATUS);
+ if (val < 0)
+ return val;
+- data->alarms = val; /* lower 8 bit of alarms */
++ data->alarms = val & ~LM90_STATUS_BUSY;
+
+ if (data->kind == max6696) {
+ val = lm90_select_remote_channel(data, 1);
+@@ -819,7 +849,7 @@ static int lm90_update_device(struct device *dev)
+ * Re-enable ALERT# output if it was originally enabled and
+ * relevant alarms are all clear
+ */
+- if (!(data->config_orig & 0x80) &&
++ if ((client->irq || !(data->config_orig & 0x80)) &&
+ !(data->alarms & data->alert_alarms)) {
+ if (data->config & 0x80) {
+ dev_dbg(&client->dev, "Re-enabling ALERT#\n");
+@@ -998,7 +1028,7 @@ static int lm90_get_temp11(struct lm90_data *data, int index)
+ s16 temp11 = data->temp11[index];
+ int temp;
+
+- if (data->kind == adt7461 || data->kind == tmp451)
++ if (data->flags & LM90_HAVE_EXTENDED_TEMP)
+ temp = temp_from_u16_adt7461(data, temp11);
+ else if (data->kind == max6646)
+ temp = temp_from_u16(temp11);
+@@ -1035,7 +1065,7 @@ static int lm90_set_temp11(struct lm90_data *data, int index, long val)
+ val -= 16000;
+ }
+
+- if (data->kind == adt7461 || data->kind == tmp451)
++ if (data->flags & LM90_HAVE_EXTENDED_TEMP)
+ data->temp11[index] = temp_to_u16_adt7461(data, val);
+ else if (data->kind == max6646)
+ data->temp11[index] = temp_to_u8(val) << 8;
+@@ -1062,7 +1092,7 @@ static int lm90_get_temp8(struct lm90_data *data, int index)
+ s8 temp8 = data->temp8[index];
+ int temp;
+
+- if (data->kind == adt7461 || data->kind == tmp451)
++ if (data->flags & LM90_HAVE_EXTENDED_TEMP)
+ temp = temp_from_u8_adt7461(data, temp8);
+ else if (data->kind == max6646)
+ temp = temp_from_u8(temp8);
+@@ -1098,7 +1128,7 @@ static int lm90_set_temp8(struct lm90_data *data, int index, long val)
+ val -= 16000;
+ }
+
+- if (data->kind == adt7461 || data->kind == tmp451)
++ if (data->flags & LM90_HAVE_EXTENDED_TEMP)
+ data->temp8[index] = temp_to_u8_adt7461(data, val);
+ else if (data->kind == max6646)
+ data->temp8[index] = temp_to_u8(val);
+@@ -1116,7 +1146,7 @@ static int lm90_get_temphyst(struct lm90_data *data, int index)
+ {
+ int temp;
+
+- if (data->kind == adt7461 || data->kind == tmp451)
++ if (data->flags & LM90_HAVE_EXTENDED_TEMP)
+ temp = temp_from_u8_adt7461(data, data->temp8[index]);
+ else if (data->kind == max6646)
+ temp = temp_from_u8(data->temp8[index]);
+@@ -1136,15 +1166,15 @@ static int lm90_set_temphyst(struct lm90_data *data, long val)
+ int temp;
+ int err;
+
+- if (data->kind == adt7461 || data->kind == tmp451)
++ if (data->flags & LM90_HAVE_EXTENDED_TEMP)
+ temp = temp_from_u8_adt7461(data, data->temp8[LOCAL_CRIT]);
+ else if (data->kind == max6646)
+ temp = temp_from_u8(data->temp8[LOCAL_CRIT]);
+ else
+ temp = temp_from_s8(data->temp8[LOCAL_CRIT]);
+
+- /* prevent integer underflow */
+- val = max(val, -128000l);
++ /* prevent integer overflow/underflow */
++ val = clamp_val(val, -128000l, 255000l);
+
+ data->temp_hyst = hyst_to_reg(temp - val);
+ err = i2c_smbus_write_byte_data(client, LM90_REG_W_TCRIT_HYST,
+@@ -1175,6 +1205,7 @@ static const u8 lm90_temp_emerg_index[3] = {
+ static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 };
+ static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 };
+ static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 };
++static const u8 lm90_crit_alarm_bits_swapped[3] = { 1, 0, 9 };
+ static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 };
+ static const u8 lm90_fault_bits[3] = { 0, 2, 10 };
+
+@@ -1200,7 +1231,10 @@ static int lm90_temp_read(struct device *dev, u32 attr, int channel, long *val)
+ *val = (data->alarms >> lm90_max_alarm_bits[channel]) & 1;
+ break;
+ case hwmon_temp_crit_alarm:
+- *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1;
++ if (data->flags & LM90_HAVE_CRIT_ALRM_SWP)
++ *val = (data->alarms >> lm90_crit_alarm_bits_swapped[channel]) & 1;
++ else
++ *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1;
+ break;
+ case hwmon_temp_emergency_alarm:
+ *val = (data->alarms >> lm90_emergency_alarm_bits[channel]) & 1;
+@@ -1448,12 +1482,11 @@ static int lm90_detect(struct i2c_client *client,
+ if (man_id < 0 || chip_id < 0 || config1 < 0 || convrate < 0)
+ return -ENODEV;
+
+- if (man_id == 0x01 || man_id == 0x5C || man_id == 0x41) {
++ if (man_id == 0x01 || man_id == 0x5C || man_id == 0xA1) {
+ config2 = i2c_smbus_read_byte_data(client, LM90_REG_R_CONFIG2);
+ if (config2 < 0)
+ return -ENODEV;
+- } else
+- config2 = 0; /* Make compiler happy */
++ }
+
+ if ((address == 0x4C || address == 0x4D)
+ && man_id == 0x01) { /* National Semiconductor */
+@@ -1627,18 +1660,26 @@ static int lm90_detect(struct i2c_client *client,
+ && convrate <= 0x08)
+ name = "g781";
+ } else
+- if (address == 0x4C
+- && man_id == 0x55) { /* Texas Instruments */
+- int local_ext;
++ if (man_id == 0x55 && chip_id == 0x00 &&
++ (config1 & 0x1B) == 0x00 && convrate <= 0x09) {
++ int local_ext, conalert, chen, dfc;
+
+ local_ext = i2c_smbus_read_byte_data(client,
+ TMP451_REG_R_LOCAL_TEMPL);
+-
+- if (chip_id == 0x00 /* TMP451 */
+- && (config1 & 0x1B) == 0x00
+- && convrate <= 0x09
+- && (local_ext & 0x0F) == 0x00)
+- name = "tmp451";
++ conalert = i2c_smbus_read_byte_data(client,
++ TMP451_REG_CONALERT);
++ chen = i2c_smbus_read_byte_data(client, TMP461_REG_CHEN);
++ dfc = i2c_smbus_read_byte_data(client, TMP461_REG_DFC);
++
++ if ((local_ext & 0x0F) == 0x00 &&
++ (conalert & 0xf1) == 0x01 &&
++ (chen & 0xfc) == 0x00 &&
++ (dfc & 0xfc) == 0x00) {
++ if (address == 0x4c && !(chen & 0x03))
++ name = "tmp451";
++ else if (address >= 0x48 && address <= 0x4f)
++ name = "tmp461";
++ }
+ }
+
+ if (!name) { /* identification failed */
+@@ -1685,7 +1726,7 @@ static int lm90_init_client(struct i2c_client *client, struct lm90_data *data)
+ lm90_set_convrate(client, data, 500); /* 500ms; 2Hz conversion rate */
+
+ /* Check Temperature Range Select */
+- if (data->kind == adt7461 || data->kind == tmp451) {
++ if (data->flags & LM90_HAVE_EXTENDED_TEMP) {
+ if (config & 0x04)
+ data->flags |= LM90_FLAG_ADT7461_EXT;
+ }
+@@ -1767,22 +1808,22 @@ static bool lm90_is_tripped(struct i2c_client *client, u16 *status)
+
+ if (st & LM90_STATUS_LLOW)
+ hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+- hwmon_temp_min, 0);
++ hwmon_temp_min_alarm, 0);
+ if (st & LM90_STATUS_RLOW)
+ hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+- hwmon_temp_min, 1);
++ hwmon_temp_min_alarm, 1);
+ if (st2 & MAX6696_STATUS2_R2LOW)
+ hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+- hwmon_temp_min, 2);
++ hwmon_temp_min_alarm, 2);
+ if (st & LM90_STATUS_LHIGH)
+ hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+- hwmon_temp_max, 0);
++ hwmon_temp_max_alarm, 0);
+ if (st & LM90_STATUS_RHIGH)
+ hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+- hwmon_temp_max, 1);
++ hwmon_temp_max_alarm, 1);
+ if (st2 & MAX6696_STATUS2_R2HIGH)
+ hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+- hwmon_temp_max, 2);
++ hwmon_temp_max_alarm, 2);
+
+ return true;
+ }
+@@ -1878,11 +1919,14 @@ static int lm90_probe(struct i2c_client *client)
+ info->config = data->channel_config;
+
+ data->channel_config[0] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX |
+- HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM |
+- HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM;
++ HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM;
+ data->channel_config[1] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX |
+- HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM |
+- HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT;
++ HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM | HWMON_T_FAULT;
++
++ if (data->flags & LM90_HAVE_CRIT) {
++ data->channel_config[0] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST;
++ data->channel_config[1] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST;
++ }
+
+ if (data->flags & LM90_HAVE_OFFSET)
+ data->channel_config[1] |= HWMON_T_OFFSET;
+diff --git a/drivers/hwmon/ltc2945.c b/drivers/hwmon/ltc2945.c
+index 9adebb59f6042..c06ab7317431f 100644
+--- a/drivers/hwmon/ltc2945.c
++++ b/drivers/hwmon/ltc2945.c
+@@ -248,6 +248,8 @@ static ssize_t ltc2945_value_store(struct device *dev,
+
+ /* convert to register value, then clamp and write result */
+ regval = ltc2945_val_to_reg(dev, reg, val);
++ if (regval < 0)
++ return regval;
+ if (is_power_reg(reg)) {
+ regval = clamp_val(regval, 0, 0xffffff);
+ regbuf[0] = regval >> 16;
+diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c
+index 5423466de697a..e918490f3ff75 100644
+--- a/drivers/hwmon/ltc2947-core.c
++++ b/drivers/hwmon/ltc2947-core.c
+@@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val,
+ return ret;
+
+ /* in milidegrees celcius, temp is given by: */
+- *val = (__val * 204) + 550;
++ *val = (__val * 204) + 5500;
+
+ return 0;
+ }
+diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c
+index 2a4bed0ab226b..009a0a5af9236 100644
+--- a/drivers/hwmon/ltc2992.c
++++ b/drivers/hwmon/ltc2992.c
+@@ -324,6 +324,7 @@ static int ltc2992_config_gpio(struct ltc2992_state *st)
+ st->gc.label = name;
+ st->gc.parent = &st->client->dev;
+ st->gc.owner = THIS_MODULE;
++ st->gc.can_sleep = true;
+ st->gc.base = -1;
+ st->gc.names = st->gpio_names;
+ st->gc.ngpio = ARRAY_SIZE(st->gpio_names);
+diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c
+index 89fe7b9fe26be..6ecc45c06849c 100644
+--- a/drivers/hwmon/mlxreg-fan.c
++++ b/drivers/hwmon/mlxreg-fan.c
+@@ -151,6 +151,12 @@ mlxreg_fan_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ if (err)
+ return err;
+
++ if (MLXREG_FAN_GET_FAULT(regval, tacho->mask)) {
++ /* FAN is broken - return zero for FAN speed. */
++ *val = 0;
++ return 0;
++ }
++
+ *val = MLXREG_FAN_GET_RPM(regval, fan->divider,
+ fan->samples);
+ break;
+diff --git a/drivers/hwmon/mr75203.c b/drivers/hwmon/mr75203.c
+index 868243dba1ee0..05da83841536f 100644
+--- a/drivers/hwmon/mr75203.c
++++ b/drivers/hwmon/mr75203.c
+@@ -68,8 +68,9 @@
+
+ /* VM Individual Macro Register */
+ #define VM_COM_REG_SIZE 0x200
+-#define VM_SDIF_DONE(n) (VM_COM_REG_SIZE + 0x34 + 0x200 * (n))
+-#define VM_SDIF_DATA(n) (VM_COM_REG_SIZE + 0x40 + 0x200 * (n))
++#define VM_SDIF_DONE(vm) (VM_COM_REG_SIZE + 0x34 + 0x200 * (vm))
++#define VM_SDIF_DATA(vm, ch) \
++ (VM_COM_REG_SIZE + 0x40 + 0x200 * (vm) + 0x4 * (ch))
+
+ /* SDA Slave Register */
+ #define IP_CTRL 0x00
+@@ -93,7 +94,7 @@
+ #define VM_CH_REQ BIT(21)
+
+ #define IP_TMR 0x05
+-#define POWER_DELAY_CYCLE_256 0x80
++#define POWER_DELAY_CYCLE_256 0x100
+ #define POWER_DELAY_CYCLE_64 0x40
+
+ #define PVT_POLL_DELAY_US 20
+@@ -115,6 +116,7 @@ struct pvt_device {
+ u32 t_num;
+ u32 p_num;
+ u32 v_num;
++ u32 c_num;
+ u32 ip_freq;
+ u8 *vm_idx;
+ };
+@@ -178,14 +180,15 @@ static int pvt_read_in(struct device *dev, u32 attr, int channel, long *val)
+ {
+ struct pvt_device *pvt = dev_get_drvdata(dev);
+ struct regmap *v_map = pvt->v_map;
++ u8 vm_idx, ch_idx;
+ u32 n, stat;
+- u8 vm_idx;
+ int ret;
+
+- if (channel >= pvt->v_num)
++ if (channel >= pvt->v_num * pvt->c_num)
+ return -EINVAL;
+
+- vm_idx = pvt->vm_idx[channel];
++ vm_idx = pvt->vm_idx[channel / pvt->c_num];
++ ch_idx = channel % pvt->c_num;
+
+ switch (attr) {
+ case hwmon_in_input:
+@@ -196,13 +199,23 @@ static int pvt_read_in(struct device *dev, u32 attr, int channel, long *val)
+ if (ret)
+ return ret;
+
+- ret = regmap_read(v_map, VM_SDIF_DATA(vm_idx), &n);
++ ret = regmap_read(v_map, VM_SDIF_DATA(vm_idx, ch_idx), &n);
+ if(ret < 0)
+ return ret;
+
+ n &= SAMPLE_DATA_MSK;
+- /* Convert the N bitstream count into voltage */
+- *val = (PVT_N_CONST * n - PVT_R_CONST) >> PVT_CONV_BITS;
++ /*
++ * Convert the N bitstream count into voltage.
++ * To support negative voltage calculation for 64bit machines
++ * n must be cast to long, since n and *val differ both in
++ * signedness and in size.
++ * Division is used instead of right shift, because for signed
++ * numbers, the sign bit is used to fill the vacated bit
++ * positions, and if the number is negative, 1 is used.
++ * BIT(x) may not be used instead of (1 << x) because it's
++ * unsigned.
++ */
++ *val = (PVT_N_CONST * (long)n - PVT_R_CONST) / (1 << PVT_CONV_BITS);
+
+ return 0;
+ default:
+@@ -385,6 +398,19 @@ static int pvt_init(struct pvt_device *pvt)
+ if (ret)
+ return ret;
+
++ val = (BIT(pvt->c_num) - 1) | VM_CH_INIT |
++ IP_POLL << SDIF_ADDR_SFT | SDIF_WRN_W | SDIF_PROG;
++ ret = regmap_write(v_map, SDIF_W, val);
++ if (ret < 0)
++ return ret;
++
++ ret = regmap_read_poll_timeout(v_map, SDIF_STAT,
++ val, !(val & SDIF_BUSY),
++ PVT_POLL_DELAY_US,
++ PVT_POLL_TIMEOUT_US);
++ if (ret)
++ return ret;
++
+ val = CFG1_VOL_MEAS_MODE | CFG1_PARALLEL_OUT |
+ CFG1_14_BIT | IP_CFG << SDIF_ADDR_SFT |
+ SDIF_WRN_W | SDIF_PROG;
+@@ -499,8 +525,8 @@ static int pvt_reset_control_deassert(struct device *dev, struct pvt_device *pvt
+
+ static int mr75203_probe(struct platform_device *pdev)
+ {
++ u32 ts_num, vm_num, pd_num, ch_num, val, index, i;
+ const struct hwmon_channel_info **pvt_info;
+- u32 ts_num, vm_num, pd_num, val, index, i;
+ struct device *dev = &pdev->dev;
+ u32 *temp_config, *in_config;
+ struct device *hwmon_dev;
+@@ -541,9 +567,11 @@ static int mr75203_probe(struct platform_device *pdev)
+ ts_num = (val & TS_NUM_MSK) >> TS_NUM_SFT;
+ pd_num = (val & PD_NUM_MSK) >> PD_NUM_SFT;
+ vm_num = (val & VM_NUM_MSK) >> VM_NUM_SFT;
++ ch_num = (val & CH_NUM_MSK) >> CH_NUM_SFT;
+ pvt->t_num = ts_num;
+ pvt->p_num = pd_num;
+ pvt->v_num = vm_num;
++ pvt->c_num = ch_num;
+ val = 0;
+ if (ts_num)
+ val++;
+@@ -580,7 +608,7 @@ static int mr75203_probe(struct platform_device *pdev)
+ }
+
+ if (vm_num) {
+- u32 num = vm_num;
++ u32 total_ch;
+
+ ret = pvt_get_regmap(pdev, "vm", pvt);
+ if (ret)
+@@ -594,30 +622,30 @@ static int mr75203_probe(struct platform_device *pdev)
+ ret = device_property_read_u8_array(dev, "intel,vm-map",
+ pvt->vm_idx, vm_num);
+ if (ret) {
+- num = 0;
++ /*
++ * Incase intel,vm-map property is not defined, we
++ * assume incremental channel numbers.
++ */
++ for (i = 0; i < vm_num; i++)
++ pvt->vm_idx[i] = i;
+ } else {
+ for (i = 0; i < vm_num; i++)
+ if (pvt->vm_idx[i] >= vm_num ||
+ pvt->vm_idx[i] == 0xff) {
+- num = i;
++ pvt->v_num = i;
++ vm_num = i;
+ break;
+ }
+ }
+
+- /*
+- * Incase intel,vm-map property is not defined, we assume
+- * incremental channel numbers.
+- */
+- for (i = num; i < vm_num; i++)
+- pvt->vm_idx[i] = i;
+-
+- in_config = devm_kcalloc(dev, num + 1,
++ total_ch = ch_num * vm_num;
++ in_config = devm_kcalloc(dev, total_ch + 1,
+ sizeof(*in_config), GFP_KERNEL);
+ if (!in_config)
+ return -ENOMEM;
+
+- memset32(in_config, HWMON_I_INPUT, num);
+- in_config[num] = 0;
++ memset32(in_config, HWMON_I_INPUT, total_ch);
++ in_config[total_ch] = 0;
+ pvt_in.config = in_config;
+
+ pvt_info[index++] = &pvt_in;
+diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c
+index 604af2f6103a3..88eddb8d61d37 100644
+--- a/drivers/hwmon/nct7802.c
++++ b/drivers/hwmon/nct7802.c
+@@ -708,7 +708,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj,
+ if (index >= 38 && index < 46 && !(reg & 0x01)) /* PECI 0 */
+ return 0;
+
+- if (index >= 0x46 && (!(reg & 0x02))) /* PECI 1 */
++ if (index >= 46 && !(reg & 0x02)) /* PECI 1 */
+ return 0;
+
+ return attr->mode;
+diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
+index ae664613289c4..bbe5e4ef4113c 100644
+--- a/drivers/hwmon/occ/common.c
++++ b/drivers/hwmon/occ/common.c
+@@ -132,22 +132,20 @@ struct extended_sensor {
+ static int occ_poll(struct occ *occ)
+ {
+ int rc;
+- u16 checksum = occ->poll_cmd_data + occ->seq_no + 1;
+- u8 cmd[8];
++ u8 cmd[7];
+ struct occ_poll_response_header *header;
+
+ /* big endian */
+- cmd[0] = occ->seq_no++; /* sequence number */
++ cmd[0] = 0; /* sequence number */
+ cmd[1] = 0; /* cmd type */
+ cmd[2] = 0; /* data length msb */
+ cmd[3] = 1; /* data length lsb */
+ cmd[4] = occ->poll_cmd_data; /* data */
+- cmd[5] = checksum >> 8; /* checksum msb */
+- cmd[6] = checksum & 0xFF; /* checksum lsb */
+- cmd[7] = 0;
++ cmd[5] = 0; /* checksum msb */
++ cmd[6] = 0; /* checksum lsb */
+
+ /* mutex should already be locked if necessary */
+- rc = occ->send_cmd(occ, cmd);
++ rc = occ->send_cmd(occ, cmd, sizeof(cmd), &occ->resp, sizeof(occ->resp));
+ if (rc) {
+ occ->last_error = rc;
+ if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD)
+@@ -184,25 +182,24 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
+ {
+ int rc;
+ u8 cmd[8];
+- u16 checksum = 0x24;
++ u8 resp[8];
+ __be16 user_power_cap_be = cpu_to_be16(user_power_cap);
+
+- cmd[0] = 0;
+- cmd[1] = 0x22;
+- cmd[2] = 0;
+- cmd[3] = 2;
++ cmd[0] = 0; /* sequence number */
++ cmd[1] = 0x22; /* cmd type */
++ cmd[2] = 0; /* data length msb */
++ cmd[3] = 2; /* data length lsb */
+
+ memcpy(&cmd[4], &user_power_cap_be, 2);
+
+- checksum += cmd[4] + cmd[5];
+- cmd[6] = checksum >> 8;
+- cmd[7] = checksum & 0xFF;
++ cmd[6] = 0; /* checksum msb */
++ cmd[7] = 0; /* checksum lsb */
+
+ rc = mutex_lock_interruptible(&occ->lock);
+ if (rc)
+ return rc;
+
+- rc = occ->send_cmd(occ, cmd);
++ rc = occ->send_cmd(occ, cmd, sizeof(cmd), resp, sizeof(resp));
+
+ mutex_unlock(&occ->lock);
+
+@@ -1144,8 +1141,6 @@ int occ_setup(struct occ *occ, const char *name)
+ {
+ int rc;
+
+- /* start with 1 to avoid false match with zero-initialized SRAM buffer */
+- occ->seq_no = 1;
+ mutex_init(&occ->lock);
+ occ->groups[0] = &occ->group;
+
+diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
+index e6df719770e81..7abf191020628 100644
+--- a/drivers/hwmon/occ/common.h
++++ b/drivers/hwmon/occ/common.h
+@@ -95,9 +95,9 @@ struct occ {
+ struct occ_sensors sensors;
+
+ int powr_sample_time_us; /* average power sample time */
+- u8 seq_no;
+ u8 poll_cmd_data; /* to perform OCC poll command */
+- int (*send_cmd)(struct occ *occ, u8 *cmd);
++ int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len, void *resp,
++ size_t resp_len);
+
+ unsigned long next_update;
+ struct mutex lock; /* lock OCC access */
+diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c
+index 0cf8588be35ac..c35c07964d856 100644
+--- a/drivers/hwmon/occ/p8_i2c.c
++++ b/drivers/hwmon/occ/p8_i2c.c
+@@ -97,18 +97,22 @@ static int p8_i2c_occ_putscom_u32(struct i2c_client *client, u32 address,
+ }
+
+ static int p8_i2c_occ_putscom_be(struct i2c_client *client, u32 address,
+- u8 *data)
++ u8 *data, size_t len)
+ {
+- __be32 data0, data1;
++ __be32 data0 = 0, data1 = 0;
+
+- memcpy(&data0, data, 4);
+- memcpy(&data1, data + 4, 4);
++ memcpy(&data0, data, min_t(size_t, len, 4));
++ if (len > 4) {
++ len -= 4;
++ memcpy(&data1, data + 4, min_t(size_t, len, 4));
++ }
+
+ return p8_i2c_occ_putscom_u32(client, address, be32_to_cpu(data0),
+ be32_to_cpu(data1));
+ }
+
+-static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
++static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len,
++ void *resp, size_t resp_len)
+ {
+ int i, rc;
+ unsigned long start;
+@@ -117,7 +121,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
+ const long wait_time = msecs_to_jiffies(OCC_CMD_IN_PRG_WAIT_MS);
+ struct p8_i2c_occ *ctx = to_p8_i2c_occ(occ);
+ struct i2c_client *client = ctx->client;
+- struct occ_response *resp = &occ->resp;
++ struct occ_response *or = (struct occ_response *)resp;
+
+ start = jiffies;
+
+@@ -127,7 +131,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
+ return rc;
+
+ /* write command (expected to already be BE), we need bus-endian... */
+- rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd);
++ rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd, len);
+ if (rc)
+ return rc;
+
+@@ -148,7 +152,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
+ return rc;
+
+ /* wait for OCC */
+- if (resp->return_status == OCC_RESP_CMD_IN_PRG) {
++ if (or->return_status == OCC_RESP_CMD_IN_PRG) {
+ rc = -EALREADY;
+
+ if (time_after(jiffies, start + timeout))
+@@ -160,7 +164,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
+ } while (rc);
+
+ /* check the OCC response */
+- switch (resp->return_status) {
++ switch (or->return_status) {
+ case OCC_RESP_CMD_IN_PRG:
+ rc = -ETIMEDOUT;
+ break;
+@@ -189,8 +193,8 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
+ if (rc < 0)
+ return rc;
+
+- data_length = get_unaligned_be16(&resp->data_length);
+- if (data_length > OCC_RESP_DATA_BYTES)
++ data_length = get_unaligned_be16(&or->data_length);
++ if ((data_length + 7) > resp_len)
+ return -EMSGSIZE;
+
+ /* fetch the rest of the response data */
+diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c
+index f6387cc0b7540..14923e78e1f32 100644
+--- a/drivers/hwmon/occ/p9_sbe.c
++++ b/drivers/hwmon/occ/p9_sbe.c
+@@ -16,18 +16,17 @@ struct p9_sbe_occ {
+
+ #define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ)
+
+-static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd)
++static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len,
++ void *resp, size_t resp_len)
+ {
+- struct occ_response *resp = &occ->resp;
+ struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
+- size_t resp_len = sizeof(*resp);
+ int rc;
+
+- rc = fsi_occ_submit(ctx->sbe, cmd, 8, resp, &resp_len);
++ rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
+ if (rc < 0)
+ return rc;
+
+- switch (resp->return_status) {
++ switch (((struct occ_response *)resp)->return_status) {
+ case OCC_RESP_CMD_IN_PRG:
+ rc = -ETIMEDOUT;
+ break;
+diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c
+index ec5f932fc6f0f..1ac2b2f4c5705 100644
+--- a/drivers/hwmon/pmbus/adm1266.c
++++ b/drivers/hwmon/pmbus/adm1266.c
+@@ -301,6 +301,7 @@ static int adm1266_config_gpio(struct adm1266_data *data)
+ data->gc.label = name;
+ data->gc.parent = &data->client->dev;
+ data->gc.owner = THIS_MODULE;
++ data->gc.can_sleep = true;
+ data->gc.base = -1;
+ data->gc.names = data->gpio_names;
+ data->gc.ngpio = ARRAY_SIZE(data->gpio_names);
+diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
+index d311e0557401c..b8543c06d022a 100644
+--- a/drivers/hwmon/pmbus/adm1275.c
++++ b/drivers/hwmon/pmbus/adm1275.c
+@@ -37,10 +37,13 @@ enum chips { adm1075, adm1272, adm1275, adm1276, adm1278, adm1293, adm1294 };
+
+ #define ADM1272_IRANGE BIT(0)
+
++#define ADM1278_TSFILT BIT(15)
+ #define ADM1278_TEMP1_EN BIT(3)
+ #define ADM1278_VIN_EN BIT(2)
+ #define ADM1278_VOUT_EN BIT(1)
+
++#define ADM1278_PMON_DEFCONFIG (ADM1278_VOUT_EN | ADM1278_TEMP1_EN | ADM1278_TSFILT)
++
+ #define ADM1293_IRANGE_25 0
+ #define ADM1293_IRANGE_50 BIT(6)
+ #define ADM1293_IRANGE_100 BIT(7)
+@@ -462,6 +465,22 @@ static const struct i2c_device_id adm1275_id[] = {
+ };
+ MODULE_DEVICE_TABLE(i2c, adm1275_id);
+
++/* Enable VOUT & TEMP1 if not enabled (disabled by default) */
++static int adm1275_enable_vout_temp(struct i2c_client *client, int config)
++{
++ int ret;
++
++ if ((config & ADM1278_PMON_DEFCONFIG) != ADM1278_PMON_DEFCONFIG) {
++ config |= ADM1278_PMON_DEFCONFIG;
++ ret = i2c_smbus_write_word_data(client, ADM1275_PMON_CONFIG, config);
++ if (ret < 0) {
++ dev_err(&client->dev, "Failed to enable VOUT/TEMP1 monitoring\n");
++ return ret;
++ }
++ }
++ return 0;
++}
++
+ static int adm1275_probe(struct i2c_client *client)
+ {
+ s32 (*config_read_fn)(const struct i2c_client *client, u8 reg);
+@@ -475,6 +494,7 @@ static int adm1275_probe(struct i2c_client *client)
+ int vindex = -1, voindex = -1, cindex = -1, pindex = -1;
+ int tindex = -1;
+ u32 shunt;
++ u32 avg;
+
+ if (!i2c_check_functionality(client->adapter,
+ I2C_FUNC_SMBUS_READ_BYTE_DATA
+@@ -614,19 +634,10 @@ static int adm1275_probe(struct i2c_client *client)
+ PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+ PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
+
+- /* Enable VOUT & TEMP1 if not enabled (disabled by default) */
+- if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) !=
+- (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) {
+- config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN;
+- ret = i2c_smbus_write_byte_data(client,
+- ADM1275_PMON_CONFIG,
+- config);
+- if (ret < 0) {
+- dev_err(&client->dev,
+- "Failed to enable VOUT monitoring\n");
+- return -ENODEV;
+- }
+- }
++ ret = adm1275_enable_vout_temp(client, config);
++ if (ret)
++ return ret;
++
+ if (config & ADM1278_VIN_EN)
+ info->func[0] |= PMBUS_HAVE_VIN;
+ break;
+@@ -683,19 +694,9 @@ static int adm1275_probe(struct i2c_client *client)
+ PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+ PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
+
+- /* Enable VOUT & TEMP1 if not enabled (disabled by default) */
+- if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) !=
+- (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) {
+- config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN;
+- ret = i2c_smbus_write_byte_data(client,
+- ADM1275_PMON_CONFIG,
+- config);
+- if (ret < 0) {
+- dev_err(&client->dev,
+- "Failed to enable VOUT monitoring\n");
+- return -ENODEV;
+- }
+- }
++ ret = adm1275_enable_vout_temp(client, config);
++ if (ret)
++ return ret;
+
+ if (config & ADM1278_VIN_EN)
+ info->func[0] |= PMBUS_HAVE_VIN;
+@@ -756,6 +757,43 @@ static int adm1275_probe(struct i2c_client *client)
+ return -ENODEV;
+ }
+
++ if (data->have_power_sampling &&
++ of_property_read_u32(client->dev.of_node,
++ "adi,power-sample-average", &avg) == 0) {
++ if (!avg || avg > ADM1275_SAMPLES_AVG_MAX ||
++ BIT(__fls(avg)) != avg) {
++ dev_err(&client->dev,
++ "Invalid number of power samples");
++ return -EINVAL;
++ }
++ ret = adm1275_write_pmon_config(data, client, true,
++ ilog2(avg));
++ if (ret < 0) {
++ dev_err(&client->dev,
++ "Setting power sample averaging failed with error %d",
++ ret);
++ return ret;
++ }
++ }
++
++ if (of_property_read_u32(client->dev.of_node,
++ "adi,volt-curr-sample-average", &avg) == 0) {
++ if (!avg || avg > ADM1275_SAMPLES_AVG_MAX ||
++ BIT(__fls(avg)) != avg) {
++ dev_err(&client->dev,
++ "Invalid number of voltage/current samples");
++ return -EINVAL;
++ }
++ ret = adm1275_write_pmon_config(data, client, false,
++ ilog2(avg));
++ if (ret < 0) {
++ dev_err(&client->dev,
++ "Setting voltage and current sample averaging failed with error %d",
++ ret);
++ return ret;
++ }
++ }
++
+ if (voindex < 0)
+ voindex = vindex;
+ if (vindex >= 0) {
+diff --git a/drivers/hwmon/pmbus/bel-pfe.c b/drivers/hwmon/pmbus/bel-pfe.c
+index 4100eefb7ac32..61c195f8fd3b8 100644
+--- a/drivers/hwmon/pmbus/bel-pfe.c
++++ b/drivers/hwmon/pmbus/bel-pfe.c
+@@ -17,12 +17,13 @@
+ enum chips {pfe1100, pfe3000};
+
+ /*
+- * Disable status check for pfe3000 devices, because some devices report
+- * communication error (invalid command) for VOUT_MODE command (0x20)
+- * although correct VOUT_MODE (0x16) is returned: it leads to incorrect
+- * exponent in linear mode.
++ * Disable status check because some devices report communication error
++ * (invalid command) for VOUT_MODE command (0x20) although the correct
++ * VOUT_MODE (0x16) is returned: it leads to incorrect exponent in linear
++ * mode.
++ * This affects both pfe3000 and pfe1100.
+ */
+-static struct pmbus_platform_data pfe3000_plat_data = {
++static struct pmbus_platform_data pfe_plat_data = {
+ .flags = PMBUS_SKIP_STATUS_CHECK,
+ };
+
+@@ -94,16 +95,15 @@ static int pfe_pmbus_probe(struct i2c_client *client)
+ int model;
+
+ model = (int)i2c_match_id(pfe_device_id, client)->driver_data;
++ client->dev.platform_data = &pfe_plat_data;
+
+ /*
+ * PFE3000-12-069RA devices may not stay in page 0 during device
+ * probe which leads to probe failure (read status word failed).
+ * So let's set the device to page 0 at the beginning.
+ */
+- if (model == pfe3000) {
+- client->dev.platform_data = &pfe3000_plat_data;
++ if (model == pfe3000)
+ i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0);
+- }
+
+ return pmbus_do_probe(client, &pfe_driver_info[model]);
+ }
+diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c
+index aec294cc72d1f..c7469d2cdedcf 100644
+--- a/drivers/hwmon/pmbus/fsp-3y.c
++++ b/drivers/hwmon/pmbus/fsp-3y.c
+@@ -180,7 +180,6 @@ static struct pmbus_driver_info fsp3y_info[] = {
+ PMBUS_HAVE_FAN12,
+ .func[YM2151_PAGE_5VSB_LOG] =
+ PMBUS_HAVE_VOUT | PMBUS_HAVE_IOUT,
+- PMBUS_HAVE_IIN,
+ .read_word_data = fsp3y_read_word_data,
+ .read_byte_data = fsp3y_read_byte_data,
+ },
+diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c
+index d209e0afc2caa..66d3e88b54172 100644
+--- a/drivers/hwmon/pmbus/lm25066.c
++++ b/drivers/hwmon/pmbus/lm25066.c
+@@ -51,26 +51,31 @@ struct __coeff {
+ #define PSC_CURRENT_IN_L (PSC_NUM_CLASSES)
+ #define PSC_POWER_L (PSC_NUM_CLASSES + 1)
+
+-static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = {
++static struct __coeff lm25066_coeff[][PSC_NUM_CLASSES + 2] = {
+ [lm25056] = {
+ [PSC_VOLTAGE_IN] = {
+ .m = 16296,
++ .b = 1343,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN] = {
+ .m = 13797,
++ .b = -1833,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN_L] = {
+ .m = 6726,
++ .b = -537,
+ .R = -2,
+ },
+ [PSC_POWER] = {
+ .m = 5501,
++ .b = -2908,
+ .R = -3,
+ },
+ [PSC_POWER_L] = {
+ .m = 26882,
++ .b = -5646,
+ .R = -4,
+ },
+ [PSC_TEMPERATURE] = {
+@@ -82,26 +87,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = {
+ [lm25066] = {
+ [PSC_VOLTAGE_IN] = {
+ .m = 22070,
++ .b = -1800,
+ .R = -2,
+ },
+ [PSC_VOLTAGE_OUT] = {
+ .m = 22070,
++ .b = -1800,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN] = {
+ .m = 13661,
++ .b = -5200,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN_L] = {
+ .m = 6852,
++ .b = -3100,
+ .R = -2,
+ },
+ [PSC_POWER] = {
+ .m = 736,
++ .b = -3300,
+ .R = -2,
+ },
+ [PSC_POWER_L] = {
+ .m = 369,
++ .b = -1900,
+ .R = -2,
+ },
+ [PSC_TEMPERATURE] = {
+@@ -111,26 +122,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = {
+ [lm5064] = {
+ [PSC_VOLTAGE_IN] = {
+ .m = 4611,
++ .b = -642,
+ .R = -2,
+ },
+ [PSC_VOLTAGE_OUT] = {
+ .m = 4621,
++ .b = 423,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN] = {
+ .m = 10742,
++ .b = 1552,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN_L] = {
+ .m = 5456,
++ .b = 2118,
+ .R = -2,
+ },
+ [PSC_POWER] = {
+ .m = 1204,
++ .b = 8524,
+ .R = -3,
+ },
+ [PSC_POWER_L] = {
+ .m = 612,
++ .b = 11202,
+ .R = -3,
+ },
+ [PSC_TEMPERATURE] = {
+@@ -140,26 +157,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = {
+ [lm5066] = {
+ [PSC_VOLTAGE_IN] = {
+ .m = 4587,
++ .b = -1200,
+ .R = -2,
+ },
+ [PSC_VOLTAGE_OUT] = {
+ .m = 4587,
++ .b = -2400,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN] = {
+ .m = 10753,
++ .b = -1200,
+ .R = -2,
+ },
+ [PSC_CURRENT_IN_L] = {
+ .m = 5405,
++ .b = -600,
+ .R = -2,
+ },
+ [PSC_POWER] = {
+ .m = 1204,
++ .b = -6000,
+ .R = -3,
+ },
+ [PSC_POWER_L] = {
+ .m = 605,
++ .b = -8000,
+ .R = -3,
+ },
+ [PSC_TEMPERATURE] = {
+diff --git a/drivers/hwmon/pmbus/mp2888.c b/drivers/hwmon/pmbus/mp2888.c
+index 8ecd4adfef40e..24e5194706cf6 100644
+--- a/drivers/hwmon/pmbus/mp2888.c
++++ b/drivers/hwmon/pmbus/mp2888.c
+@@ -34,7 +34,7 @@ struct mp2888_data {
+ int curr_sense_gain;
+ };
+
+-#define to_mp2888_data(x) container_of(x, struct mp2888_data, info)
++#define to_mp2888_data(x) container_of(x, struct mp2888_data, info)
+
+ static int mp2888_read_byte_data(struct i2c_client *client, int page, int reg)
+ {
+@@ -109,7 +109,7 @@ mp2888_read_phase(struct i2c_client *client, struct mp2888_data *data, int page,
+ * - Kcs is the DrMOS current sense gain of power stage, which is obtained from the
+ * register MP2888_MFR_VR_CONFIG1, bits 13-12 with the following selection of DrMOS
+ * (data->curr_sense_gain):
+- * 00b - 5µA/A, 01b - 8.5µA/A, 10b - 9.7µA/A, 11b - 10µA/A.
++ * 00b - 8.5µA/A, 01b - 9.7µA/A, 1b - 10µA/A, 11b - 5µA/A.
+ * - Rcs is the internal phase current sense resistor. This parameter depends on hardware
+ * assembly. By default it is set to 1kΩ. In case of different assembly, user should
+ * scale this parameter by dividing it by Rcs.
+@@ -118,10 +118,9 @@ mp2888_read_phase(struct i2c_client *client, struct mp2888_data *data, int page,
+ * because sampling of current occurrence of bit weight has a big deviation, especially for
+ * light load.
+ */
+- ret = DIV_ROUND_CLOSEST(ret * 100 - 9800, data->curr_sense_gain);
+- ret = (data->phase_curr_resolution) ? ret * 2 : ret;
++ ret = DIV_ROUND_CLOSEST(ret * 200 - 19600, data->curr_sense_gain);
+ /* Scale according to total current resolution. */
+- ret = (data->total_curr_resolution) ? ret * 8 : ret * 4;
++ ret = (data->total_curr_resolution) ? ret * 2 : ret;
+ return ret;
+ }
+
+@@ -212,7 +211,7 @@ static int mp2888_read_word_data(struct i2c_client *client, int page, int phase,
+ ret = pmbus_read_word_data(client, page, phase, reg);
+ if (ret < 0)
+ return ret;
+- ret = data->total_curr_resolution ? ret * 2 : ret;
++ ret = data->total_curr_resolution ? ret : DIV_ROUND_CLOSEST(ret, 2);
+ break;
+ case PMBUS_POUT_OP_WARN_LIMIT:
+ ret = pmbus_read_word_data(client, page, phase, reg);
+@@ -223,7 +222,7 @@ static int mp2888_read_word_data(struct i2c_client *client, int page, int phase,
+ * set 1. Actual power is reported with 0.5W or 1W respectively resolution. Scaling
+ * is needed to match both.
+ */
+- ret = data->total_curr_resolution ? ret * 4 : ret * 2;
++ ret = data->total_curr_resolution ? ret * 2 : ret;
+ break;
+ /*
+ * The below registers are not implemented by device or implemented not according to the
+diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
+index e0aa8aa46d8c4..ef3a8ecde4dfc 100644
+--- a/drivers/hwmon/pmbus/pmbus.h
++++ b/drivers/hwmon/pmbus/pmbus.h
+@@ -319,6 +319,7 @@ enum pmbus_fan_mode { percent = 0, rpm };
+ /*
+ * STATUS_VOUT, STATUS_INPUT
+ */
++#define PB_VOLTAGE_VIN_OFF BIT(3)
+ #define PB_VOLTAGE_UV_FAULT BIT(4)
+ #define PB_VOLTAGE_UV_WARNING BIT(5)
+ #define PB_VOLTAGE_OV_WARNING BIT(6)
+diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
+index 776ee2237be20..63b616ce3a6e9 100644
+--- a/drivers/hwmon/pmbus/pmbus_core.c
++++ b/drivers/hwmon/pmbus/pmbus_core.c
+@@ -911,6 +911,11 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b,
+ pmbus_update_sensor_data(client, s2);
+
+ regval = status & mask;
++ if (regval) {
++ ret = pmbus_write_byte_data(client, page, reg, regval);
++ if (ret)
++ goto unlock;
++ }
+ if (s1 && s2) {
+ s64 v1, v2;
+
+@@ -1368,7 +1373,7 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = {
+ .reg = PMBUS_VIN_UV_FAULT_LIMIT,
+ .attr = "lcrit",
+ .alarm = "lcrit_alarm",
+- .sbit = PB_VOLTAGE_UV_FAULT,
++ .sbit = PB_VOLTAGE_UV_FAULT | PB_VOLTAGE_VIN_OFF,
+ }, {
+ .reg = PMBUS_VIN_OV_WARN_LIMIT,
+ .attr = "max",
+@@ -2303,6 +2308,21 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
+ struct device *dev = &client->dev;
+ int page, ret;
+
++ /*
++ * Figure out if PEC is enabled before accessing any other register.
++ * Make sure PEC is disabled, will be enabled later if needed.
++ */
++ client->flags &= ~I2C_CLIENT_PEC;
++
++ /* Enable PEC if the controller and bus supports it */
++ if (!(data->flags & PMBUS_NO_CAPABILITY)) {
++ ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
++ if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) {
++ if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC))
++ client->flags |= I2C_CLIENT_PEC;
++ }
++ }
++
+ /*
+ * Some PMBus chips don't support PMBUS_STATUS_WORD, so try
+ * to use PMBUS_STATUS_BYTE instead if that is the case.
+@@ -2321,16 +2341,6 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
+ data->has_status_word = true;
+ }
+
+- /* Enable PEC if the controller and bus supports it */
+- if (!(data->flags & PMBUS_NO_CAPABILITY)) {
+- ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
+- if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) {
+- if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC)) {
+- client->flags |= I2C_CLIENT_PEC;
+- }
+- }
+- }
+-
+ /*
+ * Check if the chip is write protected. If it is, we can not clear
+ * faults, and we should not try it. Also, in that case, writes into
+@@ -2386,10 +2396,14 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev)
+ {
+ struct device *dev = rdev_get_dev(rdev);
+ struct i2c_client *client = to_i2c_client(dev->parent);
++ struct pmbus_data *data = i2c_get_clientdata(client);
+ u8 page = rdev_get_id(rdev);
+ int ret;
+
++ mutex_lock(&data->update_lock);
+ ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION);
++ mutex_unlock(&data->update_lock);
++
+ if (ret < 0)
+ return ret;
+
+@@ -2400,11 +2414,17 @@ static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable)
+ {
+ struct device *dev = rdev_get_dev(rdev);
+ struct i2c_client *client = to_i2c_client(dev->parent);
++ struct pmbus_data *data = i2c_get_clientdata(client);
+ u8 page = rdev_get_id(rdev);
++ int ret;
+
+- return pmbus_update_byte_data(client, page, PMBUS_OPERATION,
+- PB_OPERATION_CONTROL_ON,
+- enable ? PB_OPERATION_CONTROL_ON : 0);
++ mutex_lock(&data->update_lock);
++ ret = pmbus_update_byte_data(client, page, PMBUS_OPERATION,
++ PB_OPERATION_CONTROL_ON,
++ enable ? PB_OPERATION_CONTROL_ON : 0);
++ mutex_unlock(&data->update_lock);
++
++ return ret;
+ }
+
+ static int pmbus_regulator_enable(struct regulator_dev *rdev)
+diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
+index 75fc770c9e403..3daaf22378322 100644
+--- a/drivers/hwmon/pmbus/ucd9000.c
++++ b/drivers/hwmon/pmbus/ucd9000.c
+@@ -7,6 +7,7 @@
+ */
+
+ #include <linux/debugfs.h>
++#include <linux/delay.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/of_device.h>
+@@ -16,6 +17,7 @@
+ #include <linux/i2c.h>
+ #include <linux/pmbus.h>
+ #include <linux/gpio/driver.h>
++#include <linux/timekeeping.h>
+ #include "pmbus.h"
+
+ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd90320, ucd9090,
+@@ -65,6 +67,7 @@ struct ucd9000_data {
+ struct gpio_chip gpio;
+ #endif
+ struct dentry *debugfs;
++ ktime_t write_time;
+ };
+ #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info)
+
+@@ -73,6 +76,73 @@ struct ucd9000_debugfs_entry {
+ u8 index;
+ };
+
++/*
++ * It has been observed that the UCD90320 randomly fails register access when
++ * doing another access right on the back of a register write. To mitigate this
++ * make sure that there is a minimum delay between a write access and the
++ * following access. The 250us is based on experimental data. At a delay of
++ * 200us the issue seems to go away. Add a bit of extra margin to allow for
++ * system to system differences.
++ */
++#define UCD90320_WAIT_DELAY_US 250
++
++static inline void ucd90320_wait(const struct ucd9000_data *data)
++{
++ s64 delta = ktime_us_delta(ktime_get(), data->write_time);
++
++ if (delta < UCD90320_WAIT_DELAY_US)
++ udelay(UCD90320_WAIT_DELAY_US - delta);
++}
++
++static int ucd90320_read_word_data(struct i2c_client *client, int page,
++ int phase, int reg)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++
++ if (reg >= PMBUS_VIRT_BASE)
++ return -ENXIO;
++
++ ucd90320_wait(data);
++ return pmbus_read_word_data(client, page, phase, reg);
++}
++
++static int ucd90320_read_byte_data(struct i2c_client *client, int page, int reg)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++
++ ucd90320_wait(data);
++ return pmbus_read_byte_data(client, page, reg);
++}
++
++static int ucd90320_write_word_data(struct i2c_client *client, int page,
++ int reg, u16 word)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++ int ret;
++
++ ucd90320_wait(data);
++ ret = pmbus_write_word_data(client, page, reg, word);
++ data->write_time = ktime_get();
++
++ return ret;
++}
++
++static int ucd90320_write_byte(struct i2c_client *client, int page, u8 value)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++ int ret;
++
++ ucd90320_wait(data);
++ ret = pmbus_write_byte(client, page, value);
++ data->write_time = ktime_get();
++
++ return ret;
++}
++
+ static int ucd9000_get_fan_config(struct i2c_client *client, int fan)
+ {
+ int fan_config = 0;
+@@ -598,6 +668,11 @@ static int ucd9000_probe(struct i2c_client *client)
+ info->read_byte_data = ucd9000_read_byte_data;
+ info->func[0] |= PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12
+ | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34;
++ } else if (mid->driver_data == ucd90320) {
++ info->read_byte_data = ucd90320_read_byte_data;
++ info->read_word_data = ucd90320_read_word_data;
++ info->write_byte = ucd90320_write_byte;
++ info->write_word_data = ucd90320_write_word_data;
+ }
+
+ ucd9000_probe_gpio(client, mid, data);
+diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c
+index 17518b4cab1b0..f12b9a28a232d 100644
+--- a/drivers/hwmon/pwm-fan.c
++++ b/drivers/hwmon/pwm-fan.c
+@@ -336,8 +336,6 @@ static int pwm_fan_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- ctx->pwm_value = MAX_PWM;
+-
+ pwm_init_state(ctx->pwm, &ctx->pwm_state);
+
+ /*
+diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
+index 40cdadad35e52..f85eede6d7663 100644
+--- a/drivers/hwmon/sch56xx-common.c
++++ b/drivers/hwmon/sch56xx-common.c
+@@ -422,7 +422,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision,
+ data->wddev.max_timeout = 255 * 60;
+ watchdog_set_nowayout(&data->wddev, nowayout);
+ if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE)
+- set_bit(WDOG_ACTIVE, &data->wddev.status);
++ set_bit(WDOG_HW_RUNNING, &data->wddev.status);
+
+ /* Since the watchdog uses a downcounter there is no register to read
+ the BIOS set timeout from (if any was set at all) ->
+diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
+index 7f4a639597306..ae4d14257a11d 100644
+--- a/drivers/hwmon/sht15.c
++++ b/drivers/hwmon/sht15.c
+@@ -1020,25 +1020,20 @@ err_release_reg:
+ static int sht15_remove(struct platform_device *pdev)
+ {
+ struct sht15_data *data = platform_get_drvdata(pdev);
++ int ret;
+
+- /*
+- * Make sure any reads from the device are done and
+- * prevent new ones beginning
+- */
+- mutex_lock(&data->read_lock);
+- if (sht15_soft_reset(data)) {
+- mutex_unlock(&data->read_lock);
+- return -EFAULT;
+- }
+ hwmon_device_unregister(data->hwmon_dev);
+ sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
++
++ ret = sht15_soft_reset(data);
++ if (ret)
++ dev_err(&pdev->dev, "Failed to reset device (%pe)\n", ERR_PTR(ret));
++
+ if (!IS_ERR(data->reg)) {
+ regulator_unregister_notifier(data->reg, &data->nb);
+ regulator_disable(data->reg);
+ }
+
+- mutex_unlock(&data->read_lock);
+-
+ return 0;
+ }
+
+diff --git a/drivers/hwmon/sht4x.c b/drivers/hwmon/sht4x.c
+index 09c2a0b064444..9aeb3dbf6c208 100644
+--- a/drivers/hwmon/sht4x.c
++++ b/drivers/hwmon/sht4x.c
+@@ -129,7 +129,7 @@ unlock:
+
+ static ssize_t sht4x_interval_write(struct sht4x_data *data, long val)
+ {
+- data->update_interval = clamp_val(val, SHT4X_MIN_POLL_INTERVAL, UINT_MAX);
++ data->update_interval = clamp_val(val, SHT4X_MIN_POLL_INTERVAL, INT_MAX);
+
+ return 0;
+ }
+diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
+index 9dc210b55e69b..48466b0a4bb05 100644
+--- a/drivers/hwmon/tmp401.c
++++ b/drivers/hwmon/tmp401.c
+@@ -730,10 +730,21 @@ static int tmp401_probe(struct i2c_client *client)
+ return 0;
+ }
+
++static const struct of_device_id __maybe_unused tmp4xx_of_match[] = {
++ { .compatible = "ti,tmp401", },
++ { .compatible = "ti,tmp411", },
++ { .compatible = "ti,tmp431", },
++ { .compatible = "ti,tmp432", },
++ { .compatible = "ti,tmp435", },
++ { },
++};
++MODULE_DEVICE_TABLE(of, tmp4xx_of_match);
++
+ static struct i2c_driver tmp401_driver = {
+ .class = I2C_CLASS_HWMON,
+ .driver = {
+ .name = "tmp401",
++ .of_match_table = of_match_ptr(tmp4xx_of_match),
+ },
+ .probe_new = tmp401_probe,
+ .id_table = tmp401_id,
+diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
+index 47bbe47e062fd..b9a93ee9c2364 100644
+--- a/drivers/hwmon/tmp513.c
++++ b/drivers/hwmon/tmp513.c
+@@ -434,7 +434,7 @@ static umode_t tmp51x_is_visible(const void *_data,
+
+ switch (type) {
+ case hwmon_temp:
+- if (data->id == tmp512 && channel == 4)
++ if (data->id == tmp512 && channel == 3)
+ return 0;
+ switch (attr) {
+ case hwmon_temp_input:
+@@ -758,7 +758,7 @@ static int tmp51x_probe(struct i2c_client *client)
+ static struct i2c_driver tmp51x_driver = {
+ .driver = {
+ .name = "tmp51x",
+- .of_match_table = of_match_ptr(tmp51x_of_match),
++ .of_match_table = tmp51x_of_match,
+ },
+ .probe_new = tmp51x_probe,
+ .id_table = tmp51x_id,
+diff --git a/drivers/hwmon/tps23861.c b/drivers/hwmon/tps23861.c
+index 8bd6435c13e82..2148fd543bb4b 100644
+--- a/drivers/hwmon/tps23861.c
++++ b/drivers/hwmon/tps23861.c
+@@ -489,18 +489,20 @@ static char *tps23861_port_poe_plus_status(struct tps23861_data *data, int port)
+
+ static int tps23861_port_resistance(struct tps23861_data *data, int port)
+ {
+- u16 regval;
++ unsigned int raw_val;
++ __le16 regval;
+
+ regmap_bulk_read(data->regmap,
+ PORT_1_RESISTANCE_LSB + PORT_N_RESISTANCE_LSB_OFFSET * (port - 1),
+ &regval,
+ 2);
+
+- switch (FIELD_GET(PORT_RESISTANCE_RSN_MASK, regval)) {
++ raw_val = le16_to_cpu(regval);
++ switch (FIELD_GET(PORT_RESISTANCE_RSN_MASK, raw_val)) {
+ case PORT_RESISTANCE_RSN_OTHER:
+- return (FIELD_GET(PORT_RESISTANCE_MASK, regval) * RESISTANCE_LSB) / 10000;
++ return (FIELD_GET(PORT_RESISTANCE_MASK, raw_val) * RESISTANCE_LSB) / 10000;
+ case PORT_RESISTANCE_RSN_LOW:
+- return (FIELD_GET(PORT_RESISTANCE_MASK, regval) * RESISTANCE_LSB_LOW) / 10000;
++ return (FIELD_GET(PORT_RESISTANCE_MASK, raw_val) * RESISTANCE_LSB_LOW) / 10000;
+ case PORT_RESISTANCE_RSN_SHORT:
+ case PORT_RESISTANCE_RSN_OPEN:
+ default:
+diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
+index 382ef0395d8ee..a64f768bf1818 100644
+--- a/drivers/hwmon/xgene-hwmon.c
++++ b/drivers/hwmon/xgene-hwmon.c
+@@ -768,6 +768,7 @@ static int xgene_hwmon_remove(struct platform_device *pdev)
+ {
+ struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev);
+
++ cancel_work_sync(&ctx->workq);
+ hwmon_device_unregister(ctx->hwmon_dev);
+ kfifo_free(&ctx->async_msg_fifo);
+ if (acpi_disabled)
+diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c
+index 3647109666658..e499146648639 100644
+--- a/drivers/hwspinlock/qcom_hwspinlock.c
++++ b/drivers/hwspinlock/qcom_hwspinlock.c
+@@ -105,7 +105,7 @@ static const struct regmap_config tcsr_mutex_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+- .max_register = 0x40000,
++ .max_register = 0x20000,
+ .fast_io = true,
+ };
+
+diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
+index 8a18c71df37a8..3ea6900542223 100644
+--- a/drivers/hwtracing/coresight/coresight-core.c
++++ b/drivers/hwtracing/coresight/coresight-core.c
+@@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
+ continue;
+ conn->child_dev =
+ coresight_find_csdev_by_fwnode(conn->child_fwnode);
+- if (conn->child_dev) {
++ if (conn->child_dev && conn->child_dev->has_conns_grp) {
+ ret = coresight_make_links(csdev, conn,
+ conn->child_dev);
+ if (ret)
+@@ -1421,12 +1421,8 @@ static int coresight_remove_match(struct device *dev, void *data)
+ if (csdev->dev.fwnode == conn->child_fwnode) {
+ iterator->orphan = true;
+ coresight_remove_links(iterator, conn);
+- /*
+- * Drop the reference to the handle for the remote
+- * device acquired in parsing the connections from
+- * platform data.
+- */
+- fwnode_handle_put(conn->child_fwnode);
++
++ conn->child_dev = NULL;
+ /* No need to continue */
+ break;
+ }
+@@ -1574,6 +1570,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
+ int nr_refcnts = 1;
+ atomic_t *refcnts = NULL;
+ struct coresight_device *csdev;
++ bool registered = false;
+
+ csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
+ if (!csdev) {
+@@ -1594,7 +1591,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
+ refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
+ if (!refcnts) {
+ ret = -ENOMEM;
+- goto err_free_csdev;
++ kfree(csdev);
++ goto err_out;
+ }
+
+ csdev->refcnt = refcnts;
+@@ -1619,6 +1617,13 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
+ csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
+ dev_set_name(&csdev->dev, "%s", desc->name);
+
++ /*
++ * Make sure the device registration and the connection fixup
++ * are synchronised, so that we don't see uninitialised devices
++ * on the coresight bus while trying to resolve the connections.
++ */
++ mutex_lock(&coresight_mutex);
++
+ ret = device_register(&csdev->dev);
+ if (ret) {
+ put_device(&csdev->dev);
+@@ -1626,7 +1631,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
+ * All resources are free'd explicitly via
+ * coresight_device_release(), triggered from put_device().
+ */
+- goto err_out;
++ goto out_unlock;
+ }
+
+ if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+@@ -1641,11 +1646,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
+ * from put_device(), which is in turn called from
+ * function device_unregister().
+ */
+- goto err_out;
++ goto out_unlock;
+ }
+ }
+-
+- mutex_lock(&coresight_mutex);
++ /* Device is now registered */
++ registered = true;
+
+ ret = coresight_create_conns_sysfs_group(csdev);
+ if (!ret)
+@@ -1655,16 +1660,18 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
+ if (!ret && cti_assoc_ops && cti_assoc_ops->add)
+ cti_assoc_ops->add(csdev);
+
++out_unlock:
+ mutex_unlock(&coresight_mutex);
+- if (ret) {
++ /* Success */
++ if (!ret)
++ return csdev;
++
++ /* Unregister the device if needed */
++ if (registered) {
+ coresight_unregister(csdev);
+ return ERR_PTR(ret);
+ }
+
+- return csdev;
+-
+-err_free_csdev:
+- kfree(csdev);
+ err_out:
+ /* Cleanup the connection information */
+ coresight_release_platform_data(NULL, desc->pdata);
+diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c
+index 00de46565bc40..c60442970c2a4 100644
+--- a/drivers/hwtracing/coresight/coresight-cpu-debug.c
++++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c
+@@ -380,9 +380,10 @@ static int debug_notifier_call(struct notifier_block *self,
+ int cpu;
+ struct debug_drvdata *drvdata;
+
+- mutex_lock(&debug_lock);
++ /* Bail out if we can't acquire the mutex or the functionality is off */
++ if (!mutex_trylock(&debug_lock))
++ return NOTIFY_DONE;
+
+- /* Bail out if the functionality is disabled */
+ if (!debug_enable)
+ goto skip_dump;
+
+@@ -401,7 +402,7 @@ static int debug_notifier_call(struct notifier_block *self,
+
+ skip_dump:
+ mutex_unlock(&debug_lock);
+- return 0;
++ return NOTIFY_DONE;
+ }
+
+ static struct notifier_block debug_notifier = {
+diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c
+index e2a3620cbf489..932e17f00c0ba 100644
+--- a/drivers/hwtracing/coresight/coresight-cti-core.c
++++ b/drivers/hwtracing/coresight/coresight-cti-core.c
+@@ -90,11 +90,9 @@ void cti_write_all_hw_regs(struct cti_drvdata *drvdata)
+ static int cti_enable_hw(struct cti_drvdata *drvdata)
+ {
+ struct cti_config *config = &drvdata->config;
+- struct device *dev = &drvdata->csdev->dev;
+ unsigned long flags;
+ int rc = 0;
+
+- pm_runtime_get_sync(dev->parent);
+ spin_lock_irqsave(&drvdata->spinlock, flags);
+
+ /* no need to do anything if enabled or unpowered*/
+@@ -119,7 +117,6 @@ cti_state_unchanged:
+ /* cannot enable due to error */
+ cti_err_not_enabled:
+ spin_unlock_irqrestore(&drvdata->spinlock, flags);
+- pm_runtime_put(dev->parent);
+ return rc;
+ }
+
+@@ -153,11 +150,17 @@ cti_hp_not_enabled:
+ static int cti_disable_hw(struct cti_drvdata *drvdata)
+ {
+ struct cti_config *config = &drvdata->config;
+- struct device *dev = &drvdata->csdev->dev;
+ struct coresight_device *csdev = drvdata->csdev;
++ int ret = 0;
+
+ spin_lock(&drvdata->spinlock);
+
++ /* don't allow negative refcounts, return an error */
++ if (!atomic_read(&drvdata->config.enable_req_count)) {
++ ret = -EINVAL;
++ goto cti_not_disabled;
++ }
++
+ /* check refcount - disable on 0 */
+ if (atomic_dec_return(&drvdata->config.enable_req_count) > 0)
+ goto cti_not_disabled;
+@@ -175,13 +178,12 @@ static int cti_disable_hw(struct cti_drvdata *drvdata)
+ coresight_disclaim_device_unlocked(csdev);
+ CS_LOCK(drvdata->base);
+ spin_unlock(&drvdata->spinlock);
+- pm_runtime_put(dev);
+- return 0;
++ return ret;
+
+ /* not disabled this call */
+ cti_not_disabled:
+ spin_unlock(&drvdata->spinlock);
+- return 0;
++ return ret;
+ }
+
+ void cti_write_single_reg(struct cti_drvdata *drvdata, int offset, u32 value)
+diff --git a/drivers/hwtracing/coresight/coresight-cti-sysfs.c b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
+index 7ff7e7780bbfb..92fc3000872a1 100644
+--- a/drivers/hwtracing/coresight/coresight-cti-sysfs.c
++++ b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
+@@ -108,10 +108,19 @@ static ssize_t enable_store(struct device *dev,
+ if (ret)
+ return ret;
+
+- if (val)
++ if (val) {
++ ret = pm_runtime_resume_and_get(dev->parent);
++ if (ret)
++ return ret;
+ ret = cti_enable(drvdata->csdev);
+- else
++ if (ret)
++ pm_runtime_put(dev->parent);
++ } else {
+ ret = cti_disable(drvdata->csdev);
++ if (!ret)
++ pm_runtime_put(dev->parent);
++ }
++
+ if (ret)
+ return ret;
+ return size;
+diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
+index 8ebd728d3a800..1feb8f0e6556a 100644
+--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
++++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
+@@ -830,6 +830,7 @@ int __init etm_perf_init(void)
+ etm_pmu.addr_filters_sync = etm_addr_filters_sync;
+ etm_pmu.addr_filters_validate = etm_addr_filters_validate;
+ etm_pmu.nr_addr_filters = ETM_ADDR_CMP_MAX;
++ etm_pmu.module = THIS_MODULE;
+
+ ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
+ if (ret == 0)
+diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
+index e24252eaf8e40..2b22343918d69 100644
+--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
++++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
+@@ -384,8 +384,10 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
+ etm4x_relaxed_write32(csa, config->vipcssctlr, TRCVIPCSSCTLR);
+ for (i = 0; i < drvdata->nrseqstate - 1; i++)
+ etm4x_relaxed_write32(csa, config->seq_ctrl[i], TRCSEQEVRn(i));
+- etm4x_relaxed_write32(csa, config->seq_rst, TRCSEQRSTEVR);
+- etm4x_relaxed_write32(csa, config->seq_state, TRCSEQSTR);
++ if (drvdata->nrseqstate) {
++ etm4x_relaxed_write32(csa, config->seq_rst, TRCSEQRSTEVR);
++ etm4x_relaxed_write32(csa, config->seq_state, TRCSEQSTR);
++ }
+ etm4x_relaxed_write32(csa, config->ext_inp, TRCEXTINSELR);
+ for (i = 0; i < drvdata->nr_cntr; i++) {
+ etm4x_relaxed_write32(csa, config->cntrldvr[i], TRCCNTRLDVRn(i));
+@@ -409,7 +411,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
+ if (etm4x_sspcicrn_present(drvdata, i))
+ etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
+ }
+- for (i = 0; i < drvdata->nr_addr_cmp; i++) {
++ for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+ etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
+ etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
+ }
+@@ -949,25 +951,21 @@ static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
+ struct csdev_access *csa)
+ {
+ u32 devarch = readl_relaxed(drvdata->base + TRCDEVARCH);
+- u32 idr1 = readl_relaxed(drvdata->base + TRCIDR1);
+
+ /*
+ * All ETMs must implement TRCDEVARCH to indicate that
+- * the component is an ETMv4. To support any broken
+- * implementations we fall back to TRCIDR1 check, which
+- * is not really reliable.
++ * the component is an ETMv4. Even though TRCIDR1 also
++ * contains the information, it is part of the "Trace"
++ * register and must be accessed with the OSLK cleared,
++ * with MMIO. But we cannot touch the OSLK until we are
++ * sure this is an ETM. So rely only on the TRCDEVARCH.
+ */
+- if ((devarch & ETM_DEVARCH_ID_MASK) == ETM_DEVARCH_ETMv4x_ARCH) {
+- drvdata->arch = etm_devarch_to_arch(devarch);
+- } else {
+- pr_warn("CPU%d: ETM4x incompatible TRCDEVARCH: %x, falling back to TRCIDR1\n",
+- smp_processor_id(), devarch);
+-
+- if (ETM_TRCIDR1_ARCH_MAJOR(idr1) != ETM_TRCIDR1_ARCH_ETMv4)
+- return false;
+- drvdata->arch = etm_trcidr_to_arch(idr1);
++ if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH) {
++ pr_warn_once("TRCDEVARCH doesn't match ETMv4 architecture\n");
++ return false;
+ }
+
++ drvdata->arch = etm_devarch_to_arch(devarch);
+ *csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+ return true;
+ }
+@@ -1618,8 +1616,10 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
+ for (i = 0; i < drvdata->nrseqstate - 1; i++)
+ state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i));
+
+- state->trcseqrstevr = etm4x_read32(csa, TRCSEQRSTEVR);
+- state->trcseqstr = etm4x_read32(csa, TRCSEQSTR);
++ if (drvdata->nrseqstate) {
++ state->trcseqrstevr = etm4x_read32(csa, TRCSEQRSTEVR);
++ state->trcseqstr = etm4x_read32(csa, TRCSEQSTR);
++ }
+ state->trcextinselr = etm4x_read32(csa, TRCEXTINSELR);
+
+ for (i = 0; i < drvdata->nr_cntr; i++) {
+@@ -1731,8 +1731,10 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
+ for (i = 0; i < drvdata->nrseqstate - 1; i++)
+ etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i));
+
+- etm4x_relaxed_write32(csa, state->trcseqrstevr, TRCSEQRSTEVR);
+- etm4x_relaxed_write32(csa, state->trcseqstr, TRCSEQSTR);
++ if (drvdata->nrseqstate) {
++ etm4x_relaxed_write32(csa, state->trcseqrstevr, TRCSEQRSTEVR);
++ etm4x_relaxed_write32(csa, state->trcseqstr, TRCSEQSTR);
++ }
+ etm4x_relaxed_write32(csa, state->trcextinselr, TRCEXTINSELR);
+
+ for (i = 0; i < drvdata->nr_cntr; i++) {
+diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+index a0640fa5c55bd..57e94424a8d65 100644
+--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
++++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+@@ -367,8 +367,12 @@ static ssize_t mode_store(struct device *dev,
+ mode = ETM_MODE_QELEM(config->mode);
+ /* start by clearing QE bits */
+ config->cfg &= ~(BIT(13) | BIT(14));
+- /* if supported, Q elements with instruction counts are enabled */
+- if ((mode & BIT(0)) && (drvdata->q_support & BIT(0)))
++ /*
++ * if supported, Q elements with instruction counts are enabled.
++ * Always set the low bit for any requested mode. Valid combos are
++ * 0b00, 0b01 and 0b11.
++ */
++ if (mode && drvdata->q_support)
+ config->cfg |= BIT(13);
+ /*
+ * if supported, Q elements with and without instruction
+diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
+index e5b79bdb9851c..2305f32fedf68 100644
+--- a/drivers/hwtracing/coresight/coresight-etm4x.h
++++ b/drivers/hwtracing/coresight/coresight-etm4x.h
+@@ -7,6 +7,7 @@
+ #define _CORESIGHT_CORESIGHT_ETM_H
+
+ #include <asm/local.h>
++#include <linux/const.h>
+ #include <linux/spinlock.h>
+ #include <linux/types.h>
+ #include "coresight-priv.h"
+@@ -417,7 +418,7 @@
+ ({ \
+ u64 __val; \
+ \
+- if (__builtin_constant_p((offset))) \
++ if (__is_constexpr((offset))) \
+ __val = read_etm4x_sysreg_const_offset((offset)); \
+ else \
+ __val = etm4x_sysreg_read((offset), true, (_64bit)); \
+@@ -668,14 +669,12 @@
+ * TRCDEVARCH - CoreSight architected register
+ * - Bits[15:12] - Major version
+ * - Bits[19:16] - Minor version
+- * TRCIDR1 - ETM architected register
+- * - Bits[11:8] - Major version
+- * - Bits[7:4] - Minor version
+- * We must rely on TRCDEVARCH for the version information,
+- * however we don't want to break the support for potential
+- * old implementations which might not implement it. Thus
+- * we fall back to TRCIDR1 if TRCDEVARCH is not implemented
+- * for memory mapped components.
++ *
++ * We must rely only on TRCDEVARCH for the version information. Even though,
++ * TRCIDR1 also provides the architecture version, it is a "Trace" register
++ * and as such must be accessed only with Trace power domain ON. This may
++ * not be available at probe time.
++ *
+ * Now to make certain decisions easier based on the version
+ * we use an internal representation of the version in the
+ * driver, as follows :
+@@ -701,12 +700,6 @@ static inline u8 etm_devarch_to_arch(u32 devarch)
+ ETM_DEVARCH_REVISION(devarch));
+ }
+
+-static inline u8 etm_trcidr_to_arch(u32 trcidr1)
+-{
+- return ETM_ARCH_VERSION(ETM_TRCIDR1_ARCH_MAJOR(trcidr1),
+- ETM_TRCIDR1_ARCH_MINOR(trcidr1));
+-}
+-
+ enum etm_impdef_type {
+ ETM4_IMPDEF_HISI_CORE_COMMIT,
+ ETM4_IMPDEF_FEATURE_MAX,
+diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c
+index 43054568430f2..c30989e0675f5 100644
+--- a/drivers/hwtracing/coresight/coresight-syscfg.c
++++ b/drivers/hwtracing/coresight/coresight-syscfg.c
+@@ -791,7 +791,7 @@ static int cscfg_create_device(void)
+
+ err = device_register(dev);
+ if (err)
+- cscfg_dev_release(dev);
++ put_device(dev);
+
+ create_dev_exit_unlock:
+ mutex_unlock(&cscfg_mutex);
+diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
+index cd0fb7bfba684..e9c2b0796f372 100644
+--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
++++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
+@@ -428,7 +428,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
+ return -EINVAL;
+
+ /* wrap head around to the amount of space we have */
+- head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
++ head = handle->head & (((unsigned long)buf->nr_pages << PAGE_SHIFT) - 1);
+
+ /* find the page to write to */
+ buf->cur = head / PAGE_SIZE;
+diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
+index acdb59e0e6614..b9cd1f9555523 100644
+--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
++++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
+@@ -47,7 +47,8 @@ struct etr_perf_buffer {
+ };
+
+ /* Convert the perf index to an offset within the ETR buffer */
+-#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
++#define PERF_IDX2OFF(idx, buf) \
++ ((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
+
+ /* Lower limit for ETR hardware buffer */
+ #define TMC_ETR_PERF_MIN_BUF_SIZE SZ_1M
+@@ -926,7 +927,7 @@ tmc_etr_buf_insert_barrier_packet(struct etr_buf *etr_buf, u64 offset)
+
+ len = tmc_etr_buf_get_data(etr_buf, offset,
+ CORESIGHT_BARRIER_PKT_SIZE, &bufp);
+- if (WARN_ON(len < CORESIGHT_BARRIER_PKT_SIZE))
++ if (WARN_ON(len < 0 || len < CORESIGHT_BARRIER_PKT_SIZE))
+ return -EINVAL;
+ coresight_insert_barrier_packet(bufp);
+ return offset + CORESIGHT_BARRIER_PKT_SIZE;
+@@ -1232,7 +1233,7 @@ alloc_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event,
+ * than the size requested via sysfs.
+ */
+ if ((nr_pages << PAGE_SHIFT) > drvdata->size) {
+- etr_buf = tmc_alloc_etr_buf(drvdata, (nr_pages << PAGE_SHIFT),
++ etr_buf = tmc_alloc_etr_buf(drvdata, ((ssize_t)nr_pages << PAGE_SHIFT),
+ 0, node, NULL);
+ if (!IS_ERR(etr_buf))
+ goto done;
+diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
+index b91ec7dde7bc9..3655b3bfb2e32 100644
+--- a/drivers/hwtracing/coresight/coresight-tmc.h
++++ b/drivers/hwtracing/coresight/coresight-tmc.h
+@@ -321,7 +321,7 @@ ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table,
+ static inline unsigned long
+ tmc_sg_table_buf_size(struct tmc_sg_table *sg_table)
+ {
+- return sg_table->data_pages.nr_pages << PAGE_SHIFT;
++ return (unsigned long)sg_table->data_pages.nr_pages << PAGE_SHIFT;
+ }
+
+ struct coresight_device *tmc_etr_get_catu_device(struct tmc_drvdata *drvdata);
+diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
+index 1768684968797..fac63d092c7be 100644
+--- a/drivers/hwtracing/coresight/coresight-trbe.c
++++ b/drivers/hwtracing/coresight/coresight-trbe.c
+@@ -366,7 +366,7 @@ static unsigned long __trbe_normal_offset(struct perf_output_handle *handle)
+
+ static unsigned long trbe_normal_offset(struct perf_output_handle *handle)
+ {
+- struct trbe_buf *buf = perf_get_aux(handle);
++ struct trbe_buf *buf = etm_perf_sink_config(handle);
+ u64 limit = __trbe_normal_offset(handle);
+ u64 head = PERF_IDX2OFF(handle->head, buf);
+
+@@ -869,6 +869,10 @@ static void arm_trbe_register_coresight_cpu(struct trbe_drvdata *drvdata, int cp
+ if (WARN_ON(trbe_csdev))
+ return;
+
++ /* If the TRBE was not probed on the CPU, we shouldn't be here */
++ if (WARN_ON(!cpudata->drvdata))
++ return;
++
+ dev = &cpudata->drvdata->pdev->dev;
+ desc.name = devm_kasprintf(dev, GFP_KERNEL, "trbe%d", cpu);
+ if (!desc.name)
+@@ -950,7 +954,9 @@ static int arm_trbe_probe_coresight(struct trbe_drvdata *drvdata)
+ return -ENOMEM;
+
+ for_each_cpu(cpu, &drvdata->supported_cpus) {
+- smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1);
++ /* If we fail to probe the CPU, let us defer it to hotplug callbacks */
++ if (smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1))
++ continue;
+ if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+ arm_trbe_register_coresight_cpu(drvdata, cpu);
+ if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+@@ -1024,6 +1030,7 @@ static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata)
+
+ static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata)
+ {
++ cpuhp_state_remove_instance(drvdata->trbe_online, &drvdata->hotplug_node);
+ cpuhp_remove_multi_state(drvdata->trbe_online);
+ }
+
+diff --git a/drivers/hwtracing/intel_th/msu-sink.c b/drivers/hwtracing/intel_th/msu-sink.c
+index 2c7f5116be126..891b28ea25fe6 100644
+--- a/drivers/hwtracing/intel_th/msu-sink.c
++++ b/drivers/hwtracing/intel_th/msu-sink.c
+@@ -71,6 +71,9 @@ static int msu_sink_alloc_window(void *data, struct sg_table **sgt, size_t size)
+ block = dma_alloc_coherent(priv->dev->parent->parent,
+ PAGE_SIZE, &sg_dma_address(sg_ptr),
+ GFP_KERNEL);
++ if (!block)
++ return -ENOMEM;
++
+ sg_set_buf(sg_ptr, block, PAGE_SIZE);
+ }
+
+diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
+index 432ade0842f68..d95d916b4682b 100644
+--- a/drivers/hwtracing/intel_th/msu.c
++++ b/drivers/hwtracing/intel_th/msu.c
+@@ -1069,6 +1069,16 @@ msc_buffer_set_uc(struct msc *msc) {}
+ static inline void msc_buffer_set_wb(struct msc *msc) {}
+ #endif /* CONFIG_X86 */
+
++static struct page *msc_sg_page(struct scatterlist *sg)
++{
++ void *addr = sg_virt(sg);
++
++ if (is_vmalloc_addr(addr))
++ return vmalloc_to_page(addr);
++
++ return sg_page(sg);
++}
++
+ /**
+ * msc_buffer_win_alloc() - alloc a window for a multiblock mode
+ * @msc: MSC device
+@@ -1139,7 +1149,7 @@ static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win)
+ int i;
+
+ for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) {
+- struct page *page = sg_page(sg);
++ struct page *page = msc_sg_page(sg);
+
+ page->mapping = NULL;
+ dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
+@@ -1403,7 +1413,7 @@ found:
+ pgoff -= win->pgoff;
+
+ for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+- struct page *page = sg_page(sg);
++ struct page *page = msc_sg_page(sg);
+ size_t pgsz = PFN_DOWN(sg->length);
+
+ if (pgoff < pgsz)
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index 7da4f298ed01e..147d338c191e7 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -100,8 +100,10 @@ static int intel_th_pci_probe(struct pci_dev *pdev,
+ }
+
+ th = intel_th_alloc(&pdev->dev, drvdata, resource, r);
+- if (IS_ERR(th))
+- return PTR_ERR(th);
++ if (IS_ERR(th)) {
++ err = PTR_ERR(th);
++ goto err_free_irq;
++ }
+
+ th->activate = intel_th_pci_activate;
+ th->deactivate = intel_th_pci_deactivate;
+@@ -109,6 +111,10 @@ static int intel_th_pci_probe(struct pci_dev *pdev,
+ pci_set_master(pdev);
+
+ return 0;
++
++err_free_irq:
++ pci_free_irq_vectors(pdev);
++ return err;
+ }
+
+ static void intel_th_pci_remove(struct pci_dev *pdev)
+@@ -278,6 +284,21 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x54a6),
+ .driver_data = (kernel_ulong_t)&intel_th_2x,
+ },
++ {
++ /* Meteor Lake-P */
++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24),
++ .driver_data = (kernel_ulong_t)&intel_th_2x,
++ },
++ {
++ /* Raptor Lake-S */
++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26),
++ .driver_data = (kernel_ulong_t)&intel_th_2x,
++ },
++ {
++ /* Raptor Lake-S CPU */
++ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa76f),
++ .driver_data = (kernel_ulong_t)&intel_th_2x,
++ },
+ {
+ /* Alder Lake CPU */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
+index e17790fe35a74..fea403431f228 100644
+--- a/drivers/i2c/busses/Kconfig
++++ b/drivers/i2c/busses/Kconfig
+@@ -488,7 +488,7 @@ config I2C_BRCMSTB
+
+ config I2C_CADENCE
+ tristate "Cadence I2C Controller"
+- depends on ARCH_ZYNQ || ARM64 || XTENSA
++ depends on ARCH_ZYNQ || ARM64 || XTENSA || COMPILE_TEST
+ help
+ Say yes here to select Cadence I2C Host Controller. This controller is
+ e.g. used by Xilinx Zynq.
+@@ -677,7 +677,7 @@ config I2C_IMG
+
+ config I2C_IMX
+ tristate "IMX I2C interface"
+- depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE
++ depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE || COMPILE_TEST
+ select I2C_SLAVE
+ help
+ Say Y here if you want to use the IIC bus controller on
+@@ -921,7 +921,7 @@ config I2C_QCOM_GENI
+
+ config I2C_QUP
+ tristate "Qualcomm QUP based I2C controller"
+- depends on ARCH_QCOM
++ depends on ARCH_QCOM || COMPILE_TEST
+ help
+ If you say yes to this option, support will be included for the
+ built-in I2C interface on the Qualcomm SoCs.
+diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c
+index b0eae94909f44..c0c35785a0dc4 100644
+--- a/drivers/i2c/busses/i2c-at91-master.c
++++ b/drivers/i2c/busses/i2c-at91-master.c
+@@ -656,6 +656,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
+ unsigned int_addr_flag = 0;
+ struct i2c_msg *m_start = msg;
+ bool is_read;
++ u8 *dma_buf = NULL;
+
+ dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num);
+
+@@ -703,7 +704,17 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
+ dev->msg = m_start;
+ dev->recv_len_abort = false;
+
++ if (dev->use_dma) {
++ dma_buf = i2c_get_dma_safe_msg_buf(m_start, 1);
++ if (!dma_buf) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ dev->buf = dma_buf;
++ }
++
+ ret = at91_do_twi_transfer(dev);
++ i2c_put_dma_safe_msg_buf(dma_buf, m_start, !ret);
+
+ ret = (ret < 0) ? ret : num;
+ out:
+diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
+index 6304d1dd2dd6f..ec6571b82fff4 100644
+--- a/drivers/i2c/busses/i2c-bcm-iproc.c
++++ b/drivers/i2c/busses/i2c-bcm-iproc.c
+@@ -243,13 +243,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
+ u32 offset)
+ {
+ u32 val;
++ unsigned long flags;
+
+ if (iproc_i2c->idm_base) {
+- spin_lock(&iproc_i2c->idm_lock);
++ spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
+ writel(iproc_i2c->ape_addr_mask,
+ iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
+ val = readl(iproc_i2c->base + offset);
+- spin_unlock(&iproc_i2c->idm_lock);
++ spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
+ } else {
+ val = readl(iproc_i2c->base + offset);
+ }
+@@ -260,12 +261,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
+ static inline void iproc_i2c_wr_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
+ u32 offset, u32 val)
+ {
++ unsigned long flags;
++
+ if (iproc_i2c->idm_base) {
+- spin_lock(&iproc_i2c->idm_lock);
++ spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
+ writel(iproc_i2c->ape_addr_mask,
+ iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
+ writel(val, iproc_i2c->base + offset);
+- spin_unlock(&iproc_i2c->idm_lock);
++ spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
+ } else {
+ writel(val, iproc_i2c->base + offset);
+ }
+diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c
+index 37443edbf7546..f72c6576d8a36 100644
+--- a/drivers/i2c/busses/i2c-bcm2835.c
++++ b/drivers/i2c/busses/i2c-bcm2835.c
+@@ -23,6 +23,11 @@
+ #define BCM2835_I2C_FIFO 0x10
+ #define BCM2835_I2C_DIV 0x14
+ #define BCM2835_I2C_DEL 0x18
++/*
++ * 16-bit field for the number of SCL cycles to wait after rising SCL
++ * before deciding the slave is not responding. 0 disables the
++ * timeout detection.
++ */
+ #define BCM2835_I2C_CLKT 0x1c
+
+ #define BCM2835_I2C_C_READ BIT(0)
+@@ -402,7 +407,7 @@ static const struct i2c_adapter_quirks bcm2835_i2c_quirks = {
+ static int bcm2835_i2c_probe(struct platform_device *pdev)
+ {
+ struct bcm2835_i2c_dev *i2c_dev;
+- struct resource *mem, *irq;
++ struct resource *mem;
+ int ret;
+ struct i2c_adapter *adap;
+ struct clk *mclk;
+@@ -449,21 +454,20 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
+ ret = clk_prepare_enable(i2c_dev->bus_clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Couldn't prepare clock");
+- return ret;
++ goto err_put_exclusive_rate;
+ }
+
+- irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+- if (!irq) {
+- dev_err(&pdev->dev, "No IRQ resource\n");
+- return -ENODEV;
++ i2c_dev->irq = platform_get_irq(pdev, 0);
++ if (i2c_dev->irq < 0) {
++ ret = i2c_dev->irq;
++ goto err_disable_unprepare_clk;
+ }
+- i2c_dev->irq = irq->start;
+
+ ret = request_irq(i2c_dev->irq, bcm2835_i2c_isr, IRQF_SHARED,
+ dev_name(&pdev->dev), i2c_dev);
+ if (ret) {
+ dev_err(&pdev->dev, "Could not request IRQ\n");
+- return -ENODEV;
++ goto err_disable_unprepare_clk;
+ }
+
+ adap = &i2c_dev->adapter;
+@@ -477,11 +481,26 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
+ adap->dev.of_node = pdev->dev.of_node;
+ adap->quirks = of_device_get_match_data(&pdev->dev);
+
++ /*
++ * Disable the hardware clock stretching timeout. SMBUS
++ * specifies a limit for how long the device can stretch the
++ * clock, but core I2C doesn't.
++ */
++ bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_CLKT, 0);
+ bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0);
+
+ ret = i2c_add_adapter(adap);
+ if (ret)
+- free_irq(i2c_dev->irq, i2c_dev);
++ goto err_free_irq;
++
++ return 0;
++
++err_free_irq:
++ free_irq(i2c_dev->irq, i2c_dev);
++err_disable_unprepare_clk:
++ clk_disable_unprepare(i2c_dev->bus_clk);
++err_put_exclusive_rate:
++ clk_rate_exclusive_put(i2c_dev->bus_clk);
+
+ return ret;
+ }
+diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
+index 490ee3962645d..b00f35c0b0662 100644
+--- a/drivers/i2c/busses/i2c-brcmstb.c
++++ b/drivers/i2c/busses/i2c-brcmstb.c
+@@ -673,7 +673,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev)
+
+ /* set the data in/out register size for compatible SoCs */
+ if (of_device_is_compatible(dev->device->of_node,
+- "brcmstb,brcmper-i2c"))
++ "brcm,brcmper-i2c"))
+ dev->data_regsz = sizeof(u8);
+ else
+ dev->data_regsz = sizeof(u32);
+diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
+index 805c77143a0f9..5ea92dc97f0c5 100644
+--- a/drivers/i2c/busses/i2c-cadence.c
++++ b/drivers/i2c/busses/i2c-cadence.c
+@@ -388,9 +388,9 @@ static irqreturn_t cdns_i2c_slave_isr(void *ptr)
+ */
+ static irqreturn_t cdns_i2c_master_isr(void *ptr)
+ {
+- unsigned int isr_status, avail_bytes, updatetx;
++ unsigned int isr_status, avail_bytes;
+ unsigned int bytes_to_send;
+- bool hold_quirk;
++ bool updatetx;
+ struct cdns_i2c *id = ptr;
+ /* Signal completion only after everything is updated */
+ int done_flag = 0;
+@@ -410,11 +410,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
+ * Check if transfer size register needs to be updated again for a
+ * large data receive operation.
+ */
+- updatetx = 0;
+- if (id->recv_count > id->curr_recv_count)
+- updatetx = 1;
+-
+- hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx;
++ updatetx = id->recv_count > id->curr_recv_count;
+
+ /* When receiving, handle data interrupt and completion interrupt */
+ if (id->p_recv_buf &&
+@@ -445,7 +441,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
+ break;
+ }
+
+- if (cdns_is_holdquirk(id, hold_quirk))
++ if (cdns_is_holdquirk(id, updatetx))
+ break;
+ }
+
+@@ -456,7 +452,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
+ * maintain transfer size non-zero while performing a large
+ * receive operation.
+ */
+- if (cdns_is_holdquirk(id, hold_quirk)) {
++ if (cdns_is_holdquirk(id, updatetx)) {
+ /* wait while fifo is full */
+ while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) !=
+ (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH))
+@@ -478,22 +474,6 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
+ CDNS_I2C_XFER_SIZE_OFFSET);
+ id->curr_recv_count = id->recv_count;
+ }
+- } else if (id->recv_count && !hold_quirk &&
+- !id->curr_recv_count) {
+-
+- /* Set the slave address in address register*/
+- cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
+- CDNS_I2C_ADDR_OFFSET);
+-
+- if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) {
+- cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE,
+- CDNS_I2C_XFER_SIZE_OFFSET);
+- id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE;
+- } else {
+- cdns_i2c_writereg(id->recv_count,
+- CDNS_I2C_XFER_SIZE_OFFSET);
+- id->curr_recv_count = id->recv_count;
+- }
+ }
+
+ /* Clear hold (if not repeated start) and signal completion */
+@@ -593,8 +573,13 @@ static void cdns_i2c_mrecv(struct cdns_i2c *id)
+ ctrl_reg = cdns_i2c_readreg(CDNS_I2C_CR_OFFSET);
+ ctrl_reg |= CDNS_I2C_CR_RW | CDNS_I2C_CR_CLR_FIFO;
+
++ /*
++ * Receive up to I2C_SMBUS_BLOCK_MAX data bytes, plus one message length
++ * byte, plus one checksum byte if PEC is enabled. p_msg->len will be 2 if
++ * PEC is enabled, otherwise 1.
++ */
+ if (id->p_msg->flags & I2C_M_RECV_LEN)
+- id->recv_count = I2C_SMBUS_BLOCK_MAX + 1;
++ id->recv_count = I2C_SMBUS_BLOCK_MAX + id->p_msg->len;
+
+ id->curr_recv_count = id->recv_count;
+
+@@ -760,7 +745,7 @@ static void cdns_i2c_master_reset(struct i2c_adapter *adap)
+ static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg,
+ struct i2c_adapter *adap)
+ {
+- unsigned long time_left;
++ unsigned long time_left, msg_timeout;
+ u32 reg;
+
+ id->p_msg = msg;
+@@ -785,8 +770,16 @@ static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg,
+ else
+ cdns_i2c_msend(id);
+
++ /* Minimal time to execute this message */
++ msg_timeout = msecs_to_jiffies((1000 * msg->len * BITS_PER_BYTE) / id->i2c_clk);
++ /* Plus some wiggle room */
++ msg_timeout += msecs_to_jiffies(500);
++
++ if (msg_timeout < adap->timeout)
++ msg_timeout = adap->timeout;
++
+ /* Wait for the signal of completion */
+- time_left = wait_for_completion_timeout(&id->xfer_done, adap->timeout);
++ time_left = wait_for_completion_timeout(&id->xfer_done, msg_timeout);
+ if (time_left == 0) {
+ cdns_i2c_master_reset(adap);
+ dev_err(id->adap.dev.parent,
+@@ -801,6 +794,9 @@ static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg,
+ if (id->err_status & CDNS_I2C_IXR_ARB_LOST)
+ return -EAGAIN;
+
++ if (msg->flags & I2C_M_RECV_LEN)
++ msg->len += min_t(unsigned int, msg->buf[0], I2C_SMBUS_BLOCK_MAX);
++
+ return 0;
+ }
+
+@@ -832,8 +828,10 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ #if IS_ENABLED(CONFIG_I2C_SLAVE)
+ /* Check i2c operating mode and switch if possible */
+ if (id->dev_mode == CDNS_I2C_MODE_SLAVE) {
+- if (id->slave_state != CDNS_I2C_SLAVE_STATE_IDLE)
+- return -EAGAIN;
++ if (id->slave_state != CDNS_I2C_SLAVE_STATE_IDLE) {
++ ret = -EAGAIN;
++ goto out;
++ }
+
+ /* Set mode to master */
+ cdns_i2c_set_mode(CDNS_I2C_MODE_MASTER, id);
+@@ -1330,6 +1328,7 @@ static int cdns_i2c_probe(struct platform_device *pdev)
+ return 0;
+
+ err_clk_dis:
++ clk_notifier_unregister(id->clk, &id->clk_rate_change_nb);
+ clk_disable_unprepare(id->clk);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
+diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c
+index 72df563477b1c..f8639a4457d23 100644
+--- a/drivers/i2c/busses/i2c-cbus-gpio.c
++++ b/drivers/i2c/busses/i2c-cbus-gpio.c
+@@ -195,8 +195,9 @@ static u32 cbus_i2c_func(struct i2c_adapter *adapter)
+ }
+
+ static const struct i2c_algorithm cbus_i2c_algo = {
+- .smbus_xfer = cbus_i2c_smbus_xfer,
+- .functionality = cbus_i2c_func,
++ .smbus_xfer = cbus_i2c_smbus_xfer,
++ .smbus_xfer_atomic = cbus_i2c_smbus_xfer,
++ .functionality = cbus_i2c_func,
+ };
+
+ static int cbus_i2c_remove(struct platform_device *pdev)
+diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
+index bf2a4920638ab..4e752321b95e0 100644
+--- a/drivers/i2c/busses/i2c-designware-common.c
++++ b/drivers/i2c/busses/i2c-designware-common.c
+@@ -351,7 +351,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
+ *
+ * If your hardware is free from tHD;STA issue, try this one.
+ */
+- return DIV_ROUND_CLOSEST(ic_clk * tSYMBOL, MICRO) - 8 + offset;
++ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * tSYMBOL, MICRO) -
++ 8 + offset;
+ else
+ /*
+ * Conditional expression:
+@@ -367,7 +368,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
+ * The reason why we need to take into account "tf" here,
+ * is the same as described in i2c_dw_scl_lcnt().
+ */
+- return DIV_ROUND_CLOSEST(ic_clk * (tSYMBOL + tf), MICRO) - 3 + offset;
++ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tSYMBOL + tf), MICRO) -
++ 3 + offset;
+ }
+
+ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
+@@ -383,7 +385,8 @@ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
+ * account the fall time of SCL signal (tf). Default tf value
+ * should be 0.3 us, for safety.
+ */
+- return DIV_ROUND_CLOSEST(ic_clk * (tLOW + tf), MICRO) - 1 + offset;
++ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tLOW + tf), MICRO) -
++ 1 + offset;
+ }
+
+ int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev)
+@@ -462,7 +465,7 @@ void __i2c_dw_disable(struct dw_i2c_dev *dev)
+ dev_warn(dev->dev, "timeout in disabling adapter\n");
+ }
+
+-unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev)
++u32 i2c_dw_clk_rate(struct dw_i2c_dev *dev)
+ {
+ /*
+ * Clock is not necessary if we got LCNT/HCNT values directly from
+@@ -477,9 +480,6 @@ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare)
+ {
+ int ret;
+
+- if (IS_ERR(dev->clk))
+- return PTR_ERR(dev->clk);
+-
+ if (prepare) {
+ /* Optional interface clock */
+ ret = clk_prepare_enable(dev->pclk);
+diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
+index 60a2e750cee96..59b36e0644f31 100644
+--- a/drivers/i2c/busses/i2c-designware-core.h
++++ b/drivers/i2c/busses/i2c-designware-core.h
+@@ -126,8 +126,9 @@
+ * status codes
+ */
+ #define STATUS_IDLE 0x0
+-#define STATUS_WRITE_IN_PROGRESS 0x1
+-#define STATUS_READ_IN_PROGRESS 0x2
++#define STATUS_ACTIVE 0x1
++#define STATUS_WRITE_IN_PROGRESS 0x2
++#define STATUS_READ_IN_PROGRESS 0x4
+
+ /*
+ * operation modes
+@@ -309,7 +310,7 @@ int i2c_dw_init_regmap(struct dw_i2c_dev *dev);
+ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset);
+ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset);
+ int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev);
+-unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev);
++u32 i2c_dw_clk_rate(struct dw_i2c_dev *dev);
+ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare);
+ int i2c_dw_acquire_lock(struct dw_i2c_dev *dev);
+ void i2c_dw_release_lock(struct dw_i2c_dev *dev);
+@@ -322,12 +323,14 @@ void i2c_dw_disable_int(struct dw_i2c_dev *dev);
+
+ static inline void __i2c_dw_enable(struct dw_i2c_dev *dev)
+ {
++ dev->status |= STATUS_ACTIVE;
+ regmap_write(dev->map, DW_IC_ENABLE, 1);
+ }
+
+ static inline void __i2c_dw_disable_nowait(struct dw_i2c_dev *dev)
+ {
+ regmap_write(dev->map, DW_IC_ENABLE, 0);
++ dev->status &= ~STATUS_ACTIVE;
+ }
+
+ void __i2c_dw_disable(struct dw_i2c_dev *dev);
+diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
+index 9b08bb5df38d2..b79e1380ff68d 100644
+--- a/drivers/i2c/busses/i2c-designware-master.c
++++ b/drivers/i2c/busses/i2c-designware-master.c
+@@ -525,9 +525,21 @@ i2c_dw_read(struct dw_i2c_dev *dev)
+ u32 flags = msgs[dev->msg_read_idx].flags;
+
+ regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
++ tmp &= DW_IC_DATA_CMD_DAT;
+ /* Ensure length byte is a valid value */
+- if (flags & I2C_M_RECV_LEN &&
+- (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
++ if (flags & I2C_M_RECV_LEN) {
++ /*
++ * if IC_EMPTYFIFO_HOLD_MASTER_EN is set, which cannot be
++ * detected from the registers, the controller can be
++ * disabled if the STOP bit is set. But it is only set
++ * after receiving block data response length in
++ * I2C_FUNC_SMBUS_BLOCK_DATA case. That needs to read
++ * another byte with STOP bit set when the block data
++ * response length is invalid to complete the transaction.
++ */
++ if (!tmp || tmp > I2C_SMBUS_BLOCK_MAX)
++ tmp = 1;
++
+ len = i2c_dw_recv_len(dev, tmp);
+ }
+ *buf++ = tmp;
+@@ -720,6 +732,19 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
+ u32 stat;
+
+ stat = i2c_dw_read_clear_intrbits(dev);
++
++ if (!(dev->status & STATUS_ACTIVE)) {
++ /*
++ * Unexpected interrupt in driver point of view. State
++ * variables are either unset or stale so acknowledge and
++ * disable interrupts for suppressing further interrupts if
++ * interrupt really came from this HW (E.g. firmware has left
++ * the HW active).
++ */
++ regmap_write(dev->map, DW_IC_INTR_MASK, 0);
++ return 0;
++ }
++
+ if (stat & DW_IC_INTR_TX_ABRT) {
+ dev->cmd_err |= DW_IC_ERR_TX_ABRT;
+ dev->status = STATUS_IDLE;
+diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
+index 0f409a4c2da0d..de8dd3e3333ed 100644
+--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
++++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
+@@ -39,10 +39,10 @@ enum dw_pci_ctl_id_t {
+ };
+
+ struct dw_scl_sda_cfg {
+- u32 ss_hcnt;
+- u32 fs_hcnt;
+- u32 ss_lcnt;
+- u32 fs_lcnt;
++ u16 ss_hcnt;
++ u16 fs_hcnt;
++ u16 ss_lcnt;
++ u16 fs_lcnt;
+ u32 sda_hold;
+ };
+
+@@ -398,6 +398,8 @@ static const struct pci_device_id i2_designware_pci_ids[] = {
+ { PCI_VDEVICE(ATI, 0x73a4), navi_amd },
+ { PCI_VDEVICE(ATI, 0x73e4), navi_amd },
+ { PCI_VDEVICE(ATI, 0x73c4), navi_amd },
++ { PCI_VDEVICE(ATI, 0x7444), navi_amd },
++ { PCI_VDEVICE(ATI, 0x7464), navi_amd },
+ { 0,}
+ };
+ MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids);
+diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
+index 21113665ddeac..718bebe4fb877 100644
+--- a/drivers/i2c/busses/i2c-designware-platdrv.c
++++ b/drivers/i2c/busses/i2c-designware-platdrv.c
+@@ -262,8 +262,17 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
+ goto exit_reset;
+ }
+
+- dev->clk = devm_clk_get(&pdev->dev, NULL);
+- if (!i2c_dw_prepare_clk(dev, true)) {
++ dev->clk = devm_clk_get_optional(&pdev->dev, NULL);
++ if (IS_ERR(dev->clk)) {
++ ret = PTR_ERR(dev->clk);
++ goto exit_reset;
++ }
++
++ ret = i2c_dw_prepare_clk(dev, true);
++ if (ret)
++ goto exit_reset;
++
++ if (dev->clk) {
+ u64 clk_khz;
+
+ dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
+diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c
+index acf3948120613..6bdebe51ea119 100644
+--- a/drivers/i2c/busses/i2c-hisi.c
++++ b/drivers/i2c/busses/i2c-hisi.c
+@@ -315,6 +315,13 @@ static void hisi_i2c_xfer_msg(struct hisi_i2c_controller *ctlr)
+ max_write == 0)
+ break;
+ }
++
++ /*
++ * Disable the TX_EMPTY interrupt after finishing all the messages to
++ * avoid overwhelming the CPU.
++ */
++ if (ctlr->msg_tx_idx == ctlr->msg_num)
++ hisi_i2c_disable_int(ctlr, HISI_I2C_INT_TX_EMPTY);
+ }
+
+ static irqreturn_t hisi_i2c_irq(int irq, void *context)
+@@ -322,6 +329,14 @@ static irqreturn_t hisi_i2c_irq(int irq, void *context)
+ struct hisi_i2c_controller *ctlr = context;
+ u32 int_stat;
+
++ /*
++ * Don't handle the interrupt if cltr->completion is NULL. We may
++ * reach here because the interrupt is spurious or the transfer is
++ * started by another port (e.g. firmware) rather than us.
++ */
++ if (!ctlr->completion)
++ return IRQ_NONE;
++
+ int_stat = readl(ctlr->iobase + HISI_I2C_INT_MSTAT);
+ hisi_i2c_clear_int(ctlr, int_stat);
+ if (!(int_stat & HISI_I2C_INT_ALL))
+@@ -340,7 +355,11 @@ static irqreturn_t hisi_i2c_irq(int irq, void *context)
+ hisi_i2c_read_rx_fifo(ctlr);
+
+ out:
+- if (int_stat & HISI_I2C_INT_TRANS_CPLT || ctlr->xfer_err) {
++ /*
++ * Only use TRANS_CPLT to indicate the completion. On error cases we'll
++ * get two interrupts, INT_ERR first then TRANS_CPLT.
++ */
++ if (int_stat & HISI_I2C_INT_TRANS_CPLT) {
+ hisi_i2c_disable_int(ctlr, HISI_I2C_INT_ALL);
+ hisi_i2c_clear_int(ctlr, HISI_I2C_INT_ALL);
+ complete(ctlr->completion);
+diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
+index 89ae78ef1a1cc..74d343d1a36b8 100644
+--- a/drivers/i2c/busses/i2c-i801.c
++++ b/drivers/i2c/busses/i2c-i801.c
+@@ -763,6 +763,11 @@ static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data *
+ int result = 0;
+ unsigned char hostc;
+
++ if (read_write == I2C_SMBUS_READ && command == I2C_SMBUS_BLOCK_DATA)
++ data->block[0] = I2C_SMBUS_BLOCK_MAX;
++ else if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
++ return -EPROTO;
++
+ if (command == I2C_SMBUS_I2C_BLOCK_DATA) {
+ if (read_write == I2C_SMBUS_WRITE) {
+ /* set I2C_EN bit in configuration register */
+@@ -776,16 +781,6 @@ static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data *
+ }
+ }
+
+- if (read_write == I2C_SMBUS_WRITE
+- || command == I2C_SMBUS_I2C_BLOCK_DATA) {
+- if (data->block[0] < 1)
+- data->block[0] = 1;
+- if (data->block[0] > I2C_SMBUS_BLOCK_MAX)
+- data->block[0] = I2C_SMBUS_BLOCK_MAX;
+- } else {
+- data->block[0] = 32; /* max for SMBus block reads */
+- }
+-
+ /* Experience has shown that the block buffer can only be used for
+ SMBus (not I2C) block transactions, even though the datasheet
+ doesn't mention this limitation. */
+@@ -1247,6 +1242,7 @@ static const struct {
+ */
+ { "Latitude 5480", 0x29 },
+ { "Vostro V131", 0x1d },
++ { "Vostro 5568", 0x29 },
+ };
+
+ static void register_dell_lis3lv02d_i2c_device(struct i801_priv *priv)
+@@ -1493,7 +1489,6 @@ static struct platform_device *
+ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
+ struct resource *tco_res)
+ {
+- static DEFINE_MUTEX(p2sb_mutex);
+ struct resource *res;
+ unsigned int devfn;
+ u64 base64_addr;
+@@ -1506,7 +1501,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
+ * enumerated by the PCI subsystem, so we need to unhide/hide it
+ * to lookup the P2SB BAR.
+ */
+- mutex_lock(&p2sb_mutex);
++ pci_lock_rescan_remove();
+
+ devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 1);
+
+@@ -1524,7 +1519,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
+ /* Hide the P2SB device, if it was hidden before */
+ if (hidden)
+ pci_bus_write_config_byte(pci_dev->bus, devfn, 0xe1, hidden);
+- mutex_unlock(&p2sb_mutex);
++ pci_unlock_rescan_remove();
+
+ res = &tco_res[1];
+ if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS)
+diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
+index 9f71daf6db64b..c073f5b8833a2 100644
+--- a/drivers/i2c/busses/i2c-ibm_iic.c
++++ b/drivers/i2c/busses/i2c-ibm_iic.c
+@@ -694,10 +694,8 @@ static int iic_probe(struct platform_device *ofdev)
+ int ret;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+- if (!dev) {
+- dev_err(&ofdev->dev, "failed to allocate device data\n");
++ if (!dev)
+ return -ENOMEM;
+- }
+
+ platform_set_drvdata(ofdev, dev);
+
+diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
+index 8b9ba055c4186..c688f11ae5c9f 100644
+--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
++++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
+@@ -200,8 +200,8 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct *lpi2c_imx)
+ /* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */
+ static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
+ {
+- u8 prescale, filt, sethold, clkhi, clklo, datavd;
+- unsigned int clk_rate, clk_cycle;
++ u8 prescale, filt, sethold, datavd;
++ unsigned int clk_rate, clk_cycle, clkhi, clklo;
+ enum lpi2c_imx_pincfg pincfg;
+ unsigned int temp;
+
+@@ -462,6 +462,8 @@ static int lpi2c_imx_xfer(struct i2c_adapter *adapter,
+ if (num == 1 && msgs[0].len == 0)
+ goto stop;
+
++ lpi2c_imx->rx_buf = NULL;
++ lpi2c_imx->tx_buf = NULL;
+ lpi2c_imx->delivered = 0;
+ lpi2c_imx->msglen = msgs[i].len;
+ init_completion(&lpi2c_imx->complete);
+@@ -502,10 +504,14 @@ disable:
+ static irqreturn_t lpi2c_imx_isr(int irq, void *dev_id)
+ {
+ struct lpi2c_imx_struct *lpi2c_imx = dev_id;
++ unsigned int enabled;
+ unsigned int temp;
+
++ enabled = readl(lpi2c_imx->base + LPI2C_MIER);
++
+ lpi2c_imx_intctrl(lpi2c_imx, 0);
+ temp = readl(lpi2c_imx->base + LPI2C_MSR);
++ temp &= enabled;
+
+ if (temp & MSR_RDF)
+ lpi2c_imx_read_rxfifo(lpi2c_imx);
+diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
+index 3576b63a6c037..5e8853d3f8da7 100644
+--- a/drivers/i2c/busses/i2c-imx.c
++++ b/drivers/i2c/busses/i2c-imx.c
+@@ -1051,7 +1051,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs,
+ int i, result;
+ unsigned int temp;
+ int block_data = msgs->flags & I2C_M_RECV_LEN;
+- int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data;
++ int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE &&
++ msgs->len >= DMA_THRESHOLD && !block_data;
+
+ dev_dbg(&i2c_imx->adapter.dev,
+ "<%s> write slave address: addr=0x%x\n",
+@@ -1217,7 +1218,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter,
+ result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic);
+ } else {
+ if (!atomic &&
+- i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD)
++ i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD &&
++ msgs[i].flags & I2C_M_DMA_SAFE)
+ result = i2c_imx_dma_write(i2c_imx, &msgs[i]);
+ else
+ result = i2c_imx_write(i2c_imx, &msgs[i], atomic);
+@@ -1487,9 +1489,7 @@ static int i2c_imx_remove(struct platform_device *pdev)
+ struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev);
+ int irq, ret;
+
+- ret = pm_runtime_resume_and_get(&pdev->dev);
+- if (ret < 0)
+- return ret;
++ ret = pm_runtime_get_sync(&pdev->dev);
+
+ /* remove adapter */
+ dev_dbg(&i2c_imx->adapter.dev, "adapter removed\n");
+@@ -1498,17 +1498,21 @@ static int i2c_imx_remove(struct platform_device *pdev)
+ if (i2c_imx->dma)
+ i2c_imx_dma_free(i2c_imx);
+
+- /* setup chip registers to defaults */
+- imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IADR);
+- imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IFDR);
+- imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2CR);
+- imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR);
++ if (ret >= 0) {
++ /* setup chip registers to defaults */
++ imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IADR);
++ imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IFDR);
++ imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2CR);
++ imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR);
++ clk_disable(i2c_imx->clk);
++ }
+
+ clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
+ irq = platform_get_irq(pdev, 0);
+ if (irq >= 0)
+ free_irq(irq, i2c_imx);
+- clk_disable_unprepare(i2c_imx->clk);
++
++ clk_unprepare(i2c_imx->clk);
+
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
+index a6187cbec2c94..10cdd501d4c52 100644
+--- a/drivers/i2c/busses/i2c-ismt.c
++++ b/drivers/i2c/busses/i2c-ismt.c
+@@ -82,6 +82,7 @@
+
+ #define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */
+ #define ISMT_MAX_RETRIES 3 /* number of SMBus retries to attempt */
++#define ISMT_LOG_ENTRIES 3 /* number of interrupt cause log entries */
+
+ /* Hardware Descriptor Constants - Control Field */
+ #define ISMT_DESC_CWRL 0x01 /* Command/Write Length */
+@@ -175,6 +176,8 @@ struct ismt_priv {
+ u8 head; /* ring buffer head pointer */
+ struct completion cmp; /* interrupt completion */
+ u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */
++ dma_addr_t log_dma;
++ u32 *log;
+ };
+
+ static const struct pci_device_id ismt_ids[] = {
+@@ -411,6 +414,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
+ memset(desc, 0, sizeof(struct ismt_desc));
+ desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write);
+
++ /* Always clear the log entries */
++ memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32));
++
+ /* Initialize common control bits */
+ if (likely(pci_dev_msi_enabled(priv->pci_dev)))
+ desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR;
+@@ -503,6 +509,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
+ if (read_write == I2C_SMBUS_WRITE) {
+ /* Block Write */
+ dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA: WRITE\n");
++ if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
++ return -EINVAL;
++
+ dma_size = data->block[0] + 1;
+ dma_direction = DMA_TO_DEVICE;
+ desc->wr_len_cmd = dma_size;
+@@ -522,6 +531,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
+
+ case I2C_SMBUS_BLOCK_PROC_CALL:
+ dev_dbg(dev, "I2C_SMBUS_BLOCK_PROC_CALL\n");
++ if (data->block[0] > I2C_SMBUS_BLOCK_MAX)
++ return -EINVAL;
++
+ dma_size = I2C_SMBUS_BLOCK_MAX;
+ desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, 1);
+ desc->wr_len_cmd = data->block[0] + 1;
+@@ -708,6 +720,8 @@ static void ismt_hw_init(struct ismt_priv *priv)
+ /* initialize the Master Descriptor Base Address (MDBA) */
+ writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA);
+
++ writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL);
++
+ /* initialize the Master Control Register (MCTRL) */
+ writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL);
+
+@@ -795,6 +809,12 @@ static int ismt_dev_init(struct ismt_priv *priv)
+ priv->head = 0;
+ init_completion(&priv->cmp);
+
++ priv->log = dmam_alloc_coherent(&priv->pci_dev->dev,
++ ISMT_LOG_ENTRIES * sizeof(u32),
++ &priv->log_dma, GFP_KERNEL);
++ if (!priv->log)
++ return -ENOMEM;
++
+ return 0;
+ }
+
+diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c
+index ef73a42577cc7..07eb819072c4f 100644
+--- a/drivers/i2c/busses/i2c-meson.c
++++ b/drivers/i2c/busses/i2c-meson.c
+@@ -465,18 +465,18 @@ static int meson_i2c_probe(struct platform_device *pdev)
+ */
+ meson_i2c_set_mask(i2c, REG_CTRL, REG_CTRL_START, 0);
+
+- ret = i2c_add_adapter(&i2c->adap);
+- if (ret < 0) {
+- clk_disable_unprepare(i2c->clk);
+- return ret;
+- }
+-
+ /* Disable filtering */
+ meson_i2c_set_mask(i2c, REG_SLAVE_ADDR,
+ REG_SLV_SDA_FILTER | REG_SLV_SCL_FILTER, 0);
+
+ meson_i2c_set_clk_div(i2c, timings.bus_freq_hz);
+
++ ret = i2c_add_adapter(&i2c->adap);
++ if (ret < 0) {
++ clk_disable_unprepare(i2c->clk);
++ return ret;
++ }
++
+ return 0;
+ }
+
+diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c
+index 8716032f030a0..0e840eba4fd64 100644
+--- a/drivers/i2c/busses/i2c-mlxbf.c
++++ b/drivers/i2c/busses/i2c-mlxbf.c
+@@ -6,6 +6,7 @@
+ */
+
+ #include <linux/acpi.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+ #include <linux/err.h>
+ #include <linux/interrupt.h>
+@@ -63,13 +64,14 @@
+ */
+ #define MLXBF_I2C_TYU_PLL_OUT_FREQ (400 * 1000 * 1000)
+ /* Reference clock for Bluefield - 156 MHz. */
+-#define MLXBF_I2C_PLL_IN_FREQ (156 * 1000 * 1000)
++#define MLXBF_I2C_PLL_IN_FREQ 156250000ULL
+
+ /* Constant used to determine the PLL frequency. */
+-#define MLNXBF_I2C_COREPLL_CONST 16384
++#define MLNXBF_I2C_COREPLL_CONST 16384ULL
++
++#define MLXBF_I2C_FREQUENCY_1GHZ 1000000000ULL
+
+ /* PLL registers. */
+-#define MLXBF_I2C_CORE_PLL_REG0 0x0
+ #define MLXBF_I2C_CORE_PLL_REG1 0x4
+ #define MLXBF_I2C_CORE_PLL_REG2 0x8
+
+@@ -181,22 +183,15 @@
+ #define MLXBF_I2C_COREPLL_FREQ MLXBF_I2C_TYU_PLL_OUT_FREQ
+
+ /* Core PLL TYU configuration. */
+-#define MLXBF_I2C_COREPLL_CORE_F_TYU_MASK GENMASK(12, 0)
+-#define MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK GENMASK(3, 0)
+-#define MLXBF_I2C_COREPLL_CORE_R_TYU_MASK GENMASK(5, 0)
+-
+-#define MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT 3
+-#define MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT 16
+-#define MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT 20
++#define MLXBF_I2C_COREPLL_CORE_F_TYU_MASK GENMASK(15, 3)
++#define MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK GENMASK(19, 16)
++#define MLXBF_I2C_COREPLL_CORE_R_TYU_MASK GENMASK(25, 20)
+
+ /* Core PLL YU configuration. */
+ #define MLXBF_I2C_COREPLL_CORE_F_YU_MASK GENMASK(25, 0)
+ #define MLXBF_I2C_COREPLL_CORE_OD_YU_MASK GENMASK(3, 0)
+-#define MLXBF_I2C_COREPLL_CORE_R_YU_MASK GENMASK(5, 0)
++#define MLXBF_I2C_COREPLL_CORE_R_YU_MASK GENMASK(31, 26)
+
+-#define MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT 0
+-#define MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT 1
+-#define MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT 26
+
+ /* Core PLL frequency. */
+ static u64 mlxbf_i2c_corepll_frequency;
+@@ -311,6 +306,7 @@ static u64 mlxbf_i2c_corepll_frequency;
+ * exact.
+ */
+ #define MLXBF_I2C_SMBUS_TIMEOUT (300 * 1000) /* 300ms */
++#define MLXBF_I2C_SMBUS_LOCK_POLL_TIMEOUT (300 * 1000) /* 300ms */
+
+ /* Encapsulates timing parameters. */
+ struct mlxbf_i2c_timings {
+@@ -479,8 +475,6 @@ static struct mutex mlxbf_i2c_bus_lock;
+ #define MLXBF_I2C_MASK_8 GENMASK(7, 0)
+ #define MLXBF_I2C_MASK_16 GENMASK(15, 0)
+
+-#define MLXBF_I2C_FREQUENCY_1GHZ 1000000000
+-
+ /*
+ * Function to poll a set of bits at a specific address; it checks whether
+ * the bits are equal to zero when eq_zero is set to 'true', and not equal
+@@ -521,6 +515,25 @@ static bool mlxbf_smbus_master_wait_for_idle(struct mlxbf_i2c_priv *priv)
+ return false;
+ }
+
++/*
++ * wait for the lock to be released before acquiring it.
++ */
++static bool mlxbf_i2c_smbus_master_lock(struct mlxbf_i2c_priv *priv)
++{
++ if (mlxbf_smbus_poll(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_GW,
++ MLXBF_I2C_MASTER_LOCK_BIT, true,
++ MLXBF_I2C_SMBUS_LOCK_POLL_TIMEOUT))
++ return true;
++
++ return false;
++}
++
++static void mlxbf_i2c_smbus_master_unlock(struct mlxbf_i2c_priv *priv)
++{
++ /* Clear the gw to clear the lock */
++ writel(0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_GW);
++}
++
+ static bool mlxbf_i2c_smbus_transaction_success(u32 master_status,
+ u32 cause_status)
+ {
+@@ -669,7 +682,7 @@ static int mlxbf_i2c_smbus_enable(struct mlxbf_i2c_priv *priv, u8 slave,
+ /* Clear status bits. */
+ writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_STATUS);
+ /* Set the cause data. */
+- writel(~0x0, priv->smbus->io + MLXBF_I2C_CAUSE_OR_CLEAR);
++ writel(~0x0, priv->mst_cause->io + MLXBF_I2C_CAUSE_OR_CLEAR);
+ /* Zero PEC byte. */
+ writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_PEC);
+ /* Zero byte count. */
+@@ -712,10 +725,19 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv,
+ slave = request->slave & GENMASK(6, 0);
+ addr = slave << 1;
+
+- /* First of all, check whether the HW is idle. */
+- if (WARN_ON(!mlxbf_smbus_master_wait_for_idle(priv)))
++ /*
++ * Try to acquire the smbus gw lock before any reads of the GW register since
++ * a read sets the lock.
++ */
++ if (WARN_ON(!mlxbf_i2c_smbus_master_lock(priv)))
+ return -EBUSY;
+
++ /* Check whether the HW is idle */
++ if (WARN_ON(!mlxbf_smbus_master_wait_for_idle(priv))) {
++ ret = -EBUSY;
++ goto out_unlock;
++ }
++
+ /* Set first byte. */
+ data_desc[data_idx++] = addr;
+
+@@ -738,6 +760,11 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv,
+ if (flags & MLXBF_I2C_F_WRITE) {
+ write_en = 1;
+ write_len += operation->length;
++ if (data_idx + operation->length >
++ MLXBF_I2C_MASTER_DATA_DESC_SIZE) {
++ ret = -ENOBUFS;
++ goto out_unlock;
++ }
+ memcpy(data_desc + data_idx,
+ operation->buffer, operation->length);
+ data_idx += operation->length;
+@@ -769,7 +796,7 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv,
+ ret = mlxbf_i2c_smbus_enable(priv, slave, write_len, block_en,
+ pec_en, 0);
+ if (ret)
+- return ret;
++ goto out_unlock;
+ }
+
+ if (read_en) {
+@@ -796,6 +823,9 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv,
+ priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_FSM);
+ }
+
++out_unlock:
++ mlxbf_i2c_smbus_master_unlock(priv);
++
+ return ret;
+ }
+
+@@ -1407,24 +1437,19 @@ static int mlxbf_i2c_init_master(struct platform_device *pdev,
+ return 0;
+ }
+
+-static u64 mlxbf_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res)
++static u64 mlxbf_i2c_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res)
+ {
+- u64 core_frequency, pad_frequency;
++ u64 core_frequency;
+ u8 core_od, core_r;
+ u32 corepll_val;
+ u16 core_f;
+
+- pad_frequency = MLXBF_I2C_PLL_IN_FREQ;
+-
+ corepll_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG1);
+
+ /* Get Core PLL configuration bits. */
+- core_f = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT) &
+- MLXBF_I2C_COREPLL_CORE_F_TYU_MASK;
+- core_od = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT) &
+- MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK;
+- core_r = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT) &
+- MLXBF_I2C_COREPLL_CORE_R_TYU_MASK;
++ core_f = FIELD_GET(MLXBF_I2C_COREPLL_CORE_F_TYU_MASK, corepll_val);
++ core_od = FIELD_GET(MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK, corepll_val);
++ core_r = FIELD_GET(MLXBF_I2C_COREPLL_CORE_R_TYU_MASK, corepll_val);
+
+ /*
+ * Compute PLL output frequency as follow:
+@@ -1436,31 +1461,26 @@ static u64 mlxbf_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res)
+ * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency
+ * and PadFrequency, respectively.
+ */
+- core_frequency = pad_frequency * (++core_f);
++ core_frequency = MLXBF_I2C_PLL_IN_FREQ * (++core_f);
+ core_frequency /= (++core_r) * (++core_od);
+
+ return core_frequency;
+ }
+
+-static u64 mlxbf_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res)
++static u64 mlxbf_i2c_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res)
+ {
+ u32 corepll_reg1_val, corepll_reg2_val;
+- u64 corepll_frequency, pad_frequency;
++ u64 corepll_frequency;
+ u8 core_od, core_r;
+ u32 core_f;
+
+- pad_frequency = MLXBF_I2C_PLL_IN_FREQ;
+-
+ corepll_reg1_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG1);
+ corepll_reg2_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG2);
+
+ /* Get Core PLL configuration bits */
+- core_f = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT) &
+- MLXBF_I2C_COREPLL_CORE_F_YU_MASK;
+- core_r = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT) &
+- MLXBF_I2C_COREPLL_CORE_R_YU_MASK;
+- core_od = rol32(corepll_reg2_val, MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT) &
+- MLXBF_I2C_COREPLL_CORE_OD_YU_MASK;
++ core_f = FIELD_GET(MLXBF_I2C_COREPLL_CORE_F_YU_MASK, corepll_reg1_val);
++ core_r = FIELD_GET(MLXBF_I2C_COREPLL_CORE_R_YU_MASK, corepll_reg1_val);
++ core_od = FIELD_GET(MLXBF_I2C_COREPLL_CORE_OD_YU_MASK, corepll_reg2_val);
+
+ /*
+ * Compute PLL output frequency as follow:
+@@ -1472,7 +1492,7 @@ static u64 mlxbf_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res)
+ * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency
+ * and PadFrequency, respectively.
+ */
+- corepll_frequency = (pad_frequency * core_f) / MLNXBF_I2C_COREPLL_CONST;
++ corepll_frequency = (MLXBF_I2C_PLL_IN_FREQ * core_f) / MLNXBF_I2C_COREPLL_CONST;
+ corepll_frequency /= (++core_r) * (++core_od);
+
+ return corepll_frequency;
+@@ -2180,14 +2200,14 @@ static struct mlxbf_i2c_chip_info mlxbf_i2c_chip[] = {
+ [1] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_1],
+ [2] = &mlxbf_i2c_gpio_res[MLXBF_I2C_CHIP_TYPE_1]
+ },
+- .calculate_freq = mlxbf_calculate_freq_from_tyu
++ .calculate_freq = mlxbf_i2c_calculate_freq_from_tyu
+ },
+ [MLXBF_I2C_CHIP_TYPE_2] = {
+ .type = MLXBF_I2C_CHIP_TYPE_2,
+ .shared_res = {
+ [0] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_2]
+ },
+- .calculate_freq = mlxbf_calculate_freq_from_yu
++ .calculate_freq = mlxbf_i2c_calculate_freq_from_yu
+ }
+ };
+
+diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c
+index 015e11c4663f3..077d716c73caa 100644
+--- a/drivers/i2c/busses/i2c-mlxcpld.c
++++ b/drivers/i2c/busses/i2c-mlxcpld.c
+@@ -49,7 +49,7 @@
+ #define MLXCPLD_LPCI2C_NACK_IND 2
+
+ #define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04
+-#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c
++#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0e
+ #define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42
+
+ enum mlxcpld_i2c_frequency {
+diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
+index a6ea1eb1394e1..6c698c10d3cdb 100644
+--- a/drivers/i2c/busses/i2c-mpc.c
++++ b/drivers/i2c/busses/i2c-mpc.c
+@@ -119,23 +119,30 @@ static inline void writeccr(struct mpc_i2c *i2c, u32 x)
+ /* Sometimes 9th clock pulse isn't generated, and slave doesn't release
+ * the bus, because it wants to send ACK.
+ * Following sequence of enabling/disabling and sending start/stop generates
+- * the 9 pulses, so it's all OK.
++ * the 9 pulses, each with a START then ending with STOP, so it's all OK.
+ */
+ static void mpc_i2c_fixup(struct mpc_i2c *i2c)
+ {
+ int k;
+- u32 delay_val = 1000000 / i2c->real_clk + 1;
+-
+- if (delay_val < 2)
+- delay_val = 2;
++ unsigned long flags;
+
+ for (k = 9; k; k--) {
+ writeccr(i2c, 0);
+- writeccr(i2c, CCR_MSTA | CCR_MTX | CCR_MEN);
++ writeb(0, i2c->base + MPC_I2C_SR); /* clear any status bits */
++ writeccr(i2c, CCR_MEN | CCR_MSTA); /* START */
++ readb(i2c->base + MPC_I2C_DR); /* init xfer */
++ udelay(15); /* let it hit the bus */
++ local_irq_save(flags); /* should not be delayed further */
++ writeccr(i2c, CCR_MEN | CCR_MSTA | CCR_RSTA); /* delay SDA */
+ readb(i2c->base + MPC_I2C_DR);
+- writeccr(i2c, CCR_MEN);
+- udelay(delay_val << 1);
++ if (k != 1)
++ udelay(5);
++ local_irq_restore(flags);
+ }
++ writeccr(i2c, CCR_MEN); /* Initiate STOP */
++ readb(i2c->base + MPC_I2C_DR);
++ udelay(15); /* Let STOP propagate */
++ writeccr(i2c, 0);
+ }
+
+ static int i2c_mpc_wait_sr(struct mpc_i2c *i2c, int mask)
+@@ -492,7 +499,7 @@ static void mpc_i2c_finish(struct mpc_i2c *i2c, int rc)
+
+ static void mpc_i2c_do_action(struct mpc_i2c *i2c)
+ {
+- struct i2c_msg *msg = &i2c->msgs[i2c->curr_msg];
++ struct i2c_msg *msg = NULL;
+ int dir = 0;
+ int recv_len = 0;
+ u8 byte;
+@@ -501,10 +508,13 @@ static void mpc_i2c_do_action(struct mpc_i2c *i2c)
+
+ i2c->cntl_bits &= ~(CCR_RSTA | CCR_MTX | CCR_TXAK);
+
+- if (msg->flags & I2C_M_RD)
+- dir = 1;
+- if (msg->flags & I2C_M_RECV_LEN)
+- recv_len = 1;
++ if (i2c->action != MPC_I2C_ACTION_STOP) {
++ msg = &i2c->msgs[i2c->curr_msg];
++ if (msg->flags & I2C_M_RD)
++ dir = 1;
++ if (msg->flags & I2C_M_RECV_LEN)
++ recv_len = 1;
++ }
+
+ switch (i2c->action) {
+ case MPC_I2C_ACTION_RESTART:
+@@ -581,7 +591,7 @@ static void mpc_i2c_do_action(struct mpc_i2c *i2c)
+ break;
+ }
+
+- if (msg->len == i2c->byte_posn) {
++ if (msg && msg->len == i2c->byte_posn) {
+ i2c->curr_msg++;
+ i2c->byte_posn = 0;
+
+@@ -636,7 +646,7 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id)
+ status = readb(i2c->base + MPC_I2C_SR);
+ if (status & CSR_MIF) {
+ /* Wait up to 100us for transfer to properly complete */
+- readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100);
++ readb_poll_timeout_atomic(i2c->base + MPC_I2C_SR, status, status & CSR_MCF, 0, 100);
+ writeb(0, i2c->base + MPC_I2C_SR);
+ mpc_i2c_do_intr(i2c, status);
+ return IRQ_HANDLED;
+diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
+index 7d4b3eb7077ad..72acda59eb399 100644
+--- a/drivers/i2c/busses/i2c-mt65xx.c
++++ b/drivers/i2c/busses/i2c-mt65xx.c
+@@ -195,7 +195,7 @@ static const u16 mt_i2c_regs_v2[] = {
+ [OFFSET_CLOCK_DIV] = 0x48,
+ [OFFSET_SOFTRESET] = 0x50,
+ [OFFSET_SCL_MIS_COMP_POINT] = 0x90,
+- [OFFSET_DEBUGSTAT] = 0xe0,
++ [OFFSET_DEBUGSTAT] = 0xe4,
+ [OFFSET_DEBUGCTRL] = 0xe8,
+ [OFFSET_FIFO_STAT] = 0xf4,
+ [OFFSET_FIFO_THRESH] = 0xf8,
+diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c
+index 45fe4a7fe0c03..901f0fb04fee4 100644
+--- a/drivers/i2c/busses/i2c-mt7621.c
++++ b/drivers/i2c/busses/i2c-mt7621.c
+@@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev)
+
+ if (i2c->bus_freq == 0) {
+ dev_warn(i2c->dev, "clock-frequency 0 not supported\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto err_disable_clk;
+ }
+
+ adap = &i2c->adap;
+@@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev)
+
+ ret = i2c_add_adapter(adap);
+ if (ret < 0)
+- return ret;
++ goto err_disable_clk;
+
+ dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000);
+
++ return 0;
++
++err_disable_clk:
++ clk_disable_unprepare(i2c->clk);
++
+ return ret;
+ }
+
+diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
+index 5c8e94b6cdb5a..9729a71b25672 100644
+--- a/drivers/i2c/busses/i2c-mv64xxx.c
++++ b/drivers/i2c/busses/i2c-mv64xxx.c
+@@ -150,6 +150,7 @@ struct mv64xxx_i2c_data {
+ /* Clk div is 2 to the power n, not 2 to the power n + 1 */
+ bool clk_n_base_0;
+ struct i2c_bus_recovery_info rinfo;
++ bool atomic;
+ };
+
+ static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = {
+@@ -179,7 +180,10 @@ mv64xxx_i2c_prepare_for_io(struct mv64xxx_i2c_data *drv_data,
+ u32 dir = 0;
+
+ drv_data->cntl_bits = MV64XXX_I2C_REG_CONTROL_ACK |
+- MV64XXX_I2C_REG_CONTROL_INTEN | MV64XXX_I2C_REG_CONTROL_TWSIEN;
++ MV64XXX_I2C_REG_CONTROL_TWSIEN;
++
++ if (!drv_data->atomic)
++ drv_data->cntl_bits |= MV64XXX_I2C_REG_CONTROL_INTEN;
+
+ if (msg->flags & I2C_M_RD)
+ dir = 1;
+@@ -409,7 +413,8 @@ mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data)
+ case MV64XXX_I2C_ACTION_RCV_DATA_STOP:
+ drv_data->msg->buf[drv_data->byte_posn++] =
+ readl(drv_data->reg_base + drv_data->reg_offsets.data);
+- drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN;
++ if (!drv_data->atomic)
++ drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN;
+ writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_STOP,
+ drv_data->reg_base + drv_data->reg_offsets.control);
+ drv_data->block = 0;
+@@ -427,7 +432,8 @@ mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data)
+ drv_data->rc = -EIO;
+ fallthrough;
+ case MV64XXX_I2C_ACTION_SEND_STOP:
+- drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN;
++ if (!drv_data->atomic)
++ drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN;
+ writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_STOP,
+ drv_data->reg_base + drv_data->reg_offsets.control);
+ drv_data->block = 0;
+@@ -514,6 +520,17 @@ mv64xxx_i2c_intr(int irq, void *dev_id)
+
+ while (readl(drv_data->reg_base + drv_data->reg_offsets.control) &
+ MV64XXX_I2C_REG_CONTROL_IFLG) {
++ /*
++ * It seems that sometime the controller updates the status
++ * register only after it asserts IFLG in control register.
++ * This may result in weird bugs when in atomic mode. A delay
++ * of 100 ns before reading the status register solves this
++ * issue. This bug does not seem to appear when using
++ * interrupts.
++ */
++ if (drv_data->atomic)
++ ndelay(100);
++
+ status = readl(drv_data->reg_base + drv_data->reg_offsets.status);
+ mv64xxx_i2c_fsm(drv_data, status);
+ mv64xxx_i2c_do_action(drv_data);
+@@ -575,6 +592,17 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data *drv_data)
+ spin_unlock_irqrestore(&drv_data->lock, flags);
+ }
+
++static void mv64xxx_i2c_wait_polling(struct mv64xxx_i2c_data *drv_data)
++{
++ ktime_t timeout = ktime_add_ms(ktime_get(), drv_data->adapter.timeout);
++
++ while (READ_ONCE(drv_data->block) &&
++ ktime_compare(ktime_get(), timeout) < 0) {
++ udelay(5);
++ mv64xxx_i2c_intr(0, drv_data);
++ }
++}
++
+ static int
+ mv64xxx_i2c_execute_msg(struct mv64xxx_i2c_data *drv_data, struct i2c_msg *msg,
+ int is_last)
+@@ -590,7 +618,11 @@ mv64xxx_i2c_execute_msg(struct mv64xxx_i2c_data *drv_data, struct i2c_msg *msg,
+ mv64xxx_i2c_send_start(drv_data);
+ spin_unlock_irqrestore(&drv_data->lock, flags);
+
+- mv64xxx_i2c_wait_for_completion(drv_data);
++ if (!drv_data->atomic)
++ mv64xxx_i2c_wait_for_completion(drv_data);
++ else
++ mv64xxx_i2c_wait_polling(drv_data);
++
+ return drv_data->rc;
+ }
+
+@@ -717,7 +749,7 @@ mv64xxx_i2c_functionality(struct i2c_adapter *adap)
+ }
+
+ static int
+-mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
++mv64xxx_i2c_xfer_core(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+ {
+ struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap);
+ int rc, ret = num;
+@@ -730,7 +762,7 @@ mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+ drv_data->msgs = msgs;
+ drv_data->num_msgs = num;
+
+- if (mv64xxx_i2c_can_offload(drv_data))
++ if (mv64xxx_i2c_can_offload(drv_data) && !drv_data->atomic)
+ rc = mv64xxx_i2c_offload_xfer(drv_data);
+ else
+ rc = mv64xxx_i2c_execute_msg(drv_data, &msgs[0], num == 1);
+@@ -747,8 +779,27 @@ mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+ return ret;
+ }
+
++static int
++mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
++{
++ struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap);
++
++ drv_data->atomic = 0;
++ return mv64xxx_i2c_xfer_core(adap, msgs, num);
++}
++
++static int mv64xxx_i2c_xfer_atomic(struct i2c_adapter *adap,
++ struct i2c_msg msgs[], int num)
++{
++ struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap);
++
++ drv_data->atomic = 1;
++ return mv64xxx_i2c_xfer_core(adap, msgs, num);
++}
++
+ static const struct i2c_algorithm mv64xxx_i2c_algo = {
+ .master_xfer = mv64xxx_i2c_xfer,
++ .master_xfer_atomic = mv64xxx_i2c_xfer_atomic,
+ .functionality = mv64xxx_i2c_functionality,
+ };
+
+@@ -1047,14 +1098,6 @@ mv64xxx_i2c_remove(struct platform_device *pd)
+ return 0;
+ }
+
+-static void
+-mv64xxx_i2c_shutdown(struct platform_device *pd)
+-{
+- pm_runtime_disable(&pd->dev);
+- if (!pm_runtime_status_suspended(&pd->dev))
+- mv64xxx_i2c_runtime_suspend(&pd->dev);
+-}
+-
+ static const struct dev_pm_ops mv64xxx_i2c_pm_ops = {
+ SET_RUNTIME_PM_OPS(mv64xxx_i2c_runtime_suspend,
+ mv64xxx_i2c_runtime_resume, NULL)
+@@ -1065,7 +1108,6 @@ static const struct dev_pm_ops mv64xxx_i2c_pm_ops = {
+ static struct platform_driver mv64xxx_i2c_driver = {
+ .probe = mv64xxx_i2c_probe,
+ .remove = mv64xxx_i2c_remove,
+- .shutdown = mv64xxx_i2c_shutdown,
+ .driver = {
+ .name = MV64XXX_I2C_CTLR_NAME,
+ .pm = &mv64xxx_i2c_pm_ops,
+diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
+index 864a3f1bd4e14..b353732f593b1 100644
+--- a/drivers/i2c/busses/i2c-mxs.c
++++ b/drivers/i2c/busses/i2c-mxs.c
+@@ -799,7 +799,7 @@ static int mxs_i2c_probe(struct platform_device *pdev)
+ if (!i2c)
+ return -ENOMEM;
+
+- i2c->dev_type = (enum mxs_i2c_devtype)of_device_get_match_data(&pdev->dev);
++ i2c->dev_type = (uintptr_t)of_device_get_match_data(&pdev->dev);
+
+ i2c->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(i2c->regs))
+@@ -826,8 +826,8 @@ static int mxs_i2c_probe(struct platform_device *pdev)
+ /* Setup the DMA */
+ i2c->dmach = dma_request_chan(dev, "rx-tx");
+ if (IS_ERR(i2c->dmach)) {
+- dev_err(dev, "Failed to request dma\n");
+- return PTR_ERR(i2c->dmach);
++ return dev_err_probe(dev, PTR_ERR(i2c->dmach),
++ "Failed to request dma\n");
+ }
+
+ platform_set_drvdata(pdev, i2c);
+diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
+index a2d12a5b1c34c..9c5d66bd6dc1c 100644
+--- a/drivers/i2c/busses/i2c-nomadik.c
++++ b/drivers/i2c/busses/i2c-nomadik.c
+@@ -970,12 +970,10 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id)
+ struct i2c_vendor_data *vendor = id->data;
+ u32 max_fifo_threshold = (vendor->fifodepth / 2) - 1;
+
+- dev = devm_kzalloc(&adev->dev, sizeof(struct nmk_i2c_dev), GFP_KERNEL);
+- if (!dev) {
+- dev_err(&adev->dev, "cannot allocate memory\n");
+- ret = -ENOMEM;
+- goto err_no_mem;
+- }
++ dev = devm_kzalloc(&adev->dev, sizeof(*dev), GFP_KERNEL);
++ if (!dev)
++ return -ENOMEM;
++
+ dev->vendor = vendor;
+ dev->adev = adev;
+ nmk_i2c_of_probe(np, dev);
+@@ -996,30 +994,21 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id)
+
+ dev->virtbase = devm_ioremap(&adev->dev, adev->res.start,
+ resource_size(&adev->res));
+- if (!dev->virtbase) {
+- ret = -ENOMEM;
+- goto err_no_mem;
+- }
++ if (!dev->virtbase)
++ return -ENOMEM;
+
+ dev->irq = adev->irq[0];
+ ret = devm_request_irq(&adev->dev, dev->irq, i2c_irq_handler, 0,
+ DRIVER_NAME, dev);
+ if (ret) {
+ dev_err(&adev->dev, "cannot claim the irq %d\n", dev->irq);
+- goto err_no_mem;
++ return ret;
+ }
+
+- dev->clk = devm_clk_get(&adev->dev, NULL);
++ dev->clk = devm_clk_get_enabled(&adev->dev, NULL);
+ if (IS_ERR(dev->clk)) {
+- dev_err(&adev->dev, "could not get i2c clock\n");
+- ret = PTR_ERR(dev->clk);
+- goto err_no_mem;
+- }
+-
+- ret = clk_prepare_enable(dev->clk);
+- if (ret) {
+- dev_err(&adev->dev, "can't prepare_enable clock\n");
+- goto err_no_mem;
++ dev_err(&adev->dev, "could enable i2c clock\n");
++ return PTR_ERR(dev->clk);
+ }
+
+ init_hw(dev);
+@@ -1042,22 +1031,15 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id)
+
+ ret = i2c_add_adapter(adap);
+ if (ret)
+- goto err_no_adap;
++ return ret;
+
+ pm_runtime_put(&adev->dev);
+
+ return 0;
+-
+- err_no_adap:
+- clk_disable_unprepare(dev->clk);
+- err_no_mem:
+-
+- return ret;
+ }
+
+ static void nmk_i2c_remove(struct amba_device *adev)
+ {
+- struct resource *res = &adev->res;
+ struct nmk_i2c_dev *dev = amba_get_drvdata(adev);
+
+ i2c_del_adapter(&dev->adap);
+@@ -1066,8 +1048,6 @@ static void nmk_i2c_remove(struct amba_device *adev)
+ clear_all_interrupts(dev);
+ /* disable the controller */
+ i2c_clr_bit(dev->virtbase + I2C_CR, I2C_CR_PE);
+- clk_disable_unprepare(dev->clk);
+- release_mem_region(res->start, resource_size(res));
+ }
+
+ static struct i2c_vendor_data vendor_stn8815 = {
+diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
+index 2ad166355ec9b..c1b6797372409 100644
+--- a/drivers/i2c/busses/i2c-npcm7xx.c
++++ b/drivers/i2c/busses/i2c-npcm7xx.c
+@@ -123,11 +123,11 @@ enum i2c_addr {
+ * Since the addr regs are sprinkled all over the address space,
+ * use this array to get the address or each register.
+ */
+-#define I2C_NUM_OWN_ADDR 10
++#define I2C_NUM_OWN_ADDR 2
++#define I2C_NUM_OWN_ADDR_SUPPORTED 2
++
+ static const int npcm_i2caddr[I2C_NUM_OWN_ADDR] = {
+- NPCM_I2CADDR1, NPCM_I2CADDR2, NPCM_I2CADDR3, NPCM_I2CADDR4,
+- NPCM_I2CADDR5, NPCM_I2CADDR6, NPCM_I2CADDR7, NPCM_I2CADDR8,
+- NPCM_I2CADDR9, NPCM_I2CADDR10,
++ NPCM_I2CADDR1, NPCM_I2CADDR2,
+ };
+ #endif
+
+@@ -359,14 +359,14 @@ static int npcm_i2c_get_SCL(struct i2c_adapter *_adap)
+ {
+ struct npcm_i2c *bus = container_of(_adap, struct npcm_i2c, adap);
+
+- return !!(I2CCTL3_SCL_LVL & ioread32(bus->reg + NPCM_I2CCTL3));
++ return !!(I2CCTL3_SCL_LVL & ioread8(bus->reg + NPCM_I2CCTL3));
+ }
+
+ static int npcm_i2c_get_SDA(struct i2c_adapter *_adap)
+ {
+ struct npcm_i2c *bus = container_of(_adap, struct npcm_i2c, adap);
+
+- return !!(I2CCTL3_SDA_LVL & ioread32(bus->reg + NPCM_I2CCTL3));
++ return !!(I2CCTL3_SDA_LVL & ioread8(bus->reg + NPCM_I2CCTL3));
+ }
+
+ static inline u16 npcm_i2c_get_index(struct npcm_i2c *bus)
+@@ -391,14 +391,10 @@ static void npcm_i2c_disable(struct npcm_i2c *bus)
+ #if IS_ENABLED(CONFIG_I2C_SLAVE)
+ int i;
+
+- /* select bank 0 for I2C addresses */
+- npcm_i2c_select_bank(bus, I2C_BANK_0);
+-
+ /* Slave addresses removal */
+- for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR; i++)
++ for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR_SUPPORTED; i++)
+ iowrite8(0, bus->reg + npcm_i2caddr[i]);
+
+- npcm_i2c_select_bank(bus, I2C_BANK_1);
+ #endif
+ /* Disable module */
+ i2cctl2 = ioread8(bus->reg + NPCM_I2CCTL2);
+@@ -563,6 +559,15 @@ static inline void npcm_i2c_nack(struct npcm_i2c *bus)
+ iowrite8(val, bus->reg + NPCM_I2CCTL1);
+ }
+
++static inline void npcm_i2c_clear_master_status(struct npcm_i2c *bus)
++{
++ u8 val;
++
++ /* Clear NEGACK, STASTR and BER bits */
++ val = NPCM_I2CST_BER | NPCM_I2CST_NEGACK | NPCM_I2CST_STASTR;
++ iowrite8(val, bus->reg + NPCM_I2CST);
++}
++
+ #if IS_ENABLED(CONFIG_I2C_SLAVE)
+ static void npcm_i2c_slave_int_enable(struct npcm_i2c *bus, bool enable)
+ {
+@@ -594,8 +599,7 @@ static int npcm_i2c_slave_enable(struct npcm_i2c *bus, enum i2c_addr addr_type,
+ i2cctl1 &= ~NPCM_I2CCTL1_GCMEN;
+ iowrite8(i2cctl1, bus->reg + NPCM_I2CCTL1);
+ return 0;
+- }
+- if (addr_type == I2C_ARP_ADDR) {
++ } else if (addr_type == I2C_ARP_ADDR) {
+ i2cctl3 = ioread8(bus->reg + NPCM_I2CCTL3);
+ if (enable)
+ i2cctl3 |= I2CCTL3_ARPMEN;
+@@ -604,16 +608,16 @@ static int npcm_i2c_slave_enable(struct npcm_i2c *bus, enum i2c_addr addr_type,
+ iowrite8(i2cctl3, bus->reg + NPCM_I2CCTL3);
+ return 0;
+ }
++ if (addr_type > I2C_SLAVE_ADDR2 && addr_type <= I2C_SLAVE_ADDR10)
++ dev_err(bus->dev, "try to enable more than 2 SA not supported\n");
++
+ if (addr_type >= I2C_ARP_ADDR)
+ return -EFAULT;
+- /* select bank 0 for address 3 to 10 */
+- if (addr_type > I2C_SLAVE_ADDR2)
+- npcm_i2c_select_bank(bus, I2C_BANK_0);
++
+ /* Set and enable the address */
+ iowrite8(sa_reg, bus->reg + npcm_i2caddr[addr_type]);
+ npcm_i2c_slave_int_enable(bus, enable);
+- if (addr_type > I2C_SLAVE_ADDR2)
+- npcm_i2c_select_bank(bus, I2C_BANK_1);
++
+ return 0;
+ }
+ #endif
+@@ -642,8 +646,8 @@ static void npcm_i2c_reset(struct npcm_i2c *bus)
+ iowrite8(NPCM_I2CCST_BB, bus->reg + NPCM_I2CCST);
+ iowrite8(0xFF, bus->reg + NPCM_I2CST);
+
+- /* Clear EOB bit */
+- iowrite8(NPCM_I2CCST3_EO_BUSY, bus->reg + NPCM_I2CCST3);
++ /* Clear and disable EOB */
++ npcm_i2c_eob_int(bus, false);
+
+ /* Clear all fifo bits: */
+ iowrite8(NPCM_I2CFIF_CTS_CLR_FIFO, bus->reg + NPCM_I2CFIF_CTS);
+@@ -655,6 +659,9 @@ static void npcm_i2c_reset(struct npcm_i2c *bus)
+ }
+ #endif
+
++ /* clear status bits for spurious interrupts */
++ npcm_i2c_clear_master_status(bus);
++
+ bus->state = I2C_IDLE;
+ }
+
+@@ -815,15 +822,6 @@ static void npcm_i2c_read_fifo(struct npcm_i2c *bus, u8 bytes_in_fifo)
+ }
+ }
+
+-static inline void npcm_i2c_clear_master_status(struct npcm_i2c *bus)
+-{
+- u8 val;
+-
+- /* Clear NEGACK, STASTR and BER bits */
+- val = NPCM_I2CST_BER | NPCM_I2CST_NEGACK | NPCM_I2CST_STASTR;
+- iowrite8(val, bus->reg + NPCM_I2CST);
+-}
+-
+ static void npcm_i2c_master_abort(struct npcm_i2c *bus)
+ {
+ /* Only current master is allowed to issue a stop condition */
+@@ -840,15 +838,11 @@ static u8 npcm_i2c_get_slave_addr(struct npcm_i2c *bus, enum i2c_addr addr_type)
+ {
+ u8 slave_add;
+
+- /* select bank 0 for address 3 to 10 */
+- if (addr_type > I2C_SLAVE_ADDR2)
+- npcm_i2c_select_bank(bus, I2C_BANK_0);
++ if (addr_type > I2C_SLAVE_ADDR2 && addr_type <= I2C_SLAVE_ADDR10)
++ dev_err(bus->dev, "get slave: try to use more than 2 SA not supported\n");
+
+ slave_add = ioread8(bus->reg + npcm_i2caddr[(int)addr_type]);
+
+- if (addr_type > I2C_SLAVE_ADDR2)
+- npcm_i2c_select_bank(bus, I2C_BANK_1);
+-
+ return slave_add;
+ }
+
+@@ -858,12 +852,12 @@ static int npcm_i2c_remove_slave_addr(struct npcm_i2c *bus, u8 slave_add)
+
+ /* Set the enable bit */
+ slave_add |= 0x80;
+- npcm_i2c_select_bank(bus, I2C_BANK_0);
+- for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR; i++) {
++
++ for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR_SUPPORTED; i++) {
+ if (ioread8(bus->reg + npcm_i2caddr[i]) == slave_add)
+ iowrite8(0, bus->reg + npcm_i2caddr[i]);
+ }
+- npcm_i2c_select_bank(bus, I2C_BANK_1);
++
+ return 0;
+ }
+
+@@ -918,11 +912,15 @@ static int npcm_i2c_slave_get_wr_buf(struct npcm_i2c *bus)
+ for (i = 0; i < I2C_HW_FIFO_SIZE; i++) {
+ if (bus->slv_wr_size >= I2C_HW_FIFO_SIZE)
+ break;
+- i2c_slave_event(bus->slave, I2C_SLAVE_READ_REQUESTED, &value);
++ if (bus->state == I2C_SLAVE_MATCH) {
++ i2c_slave_event(bus->slave, I2C_SLAVE_READ_REQUESTED, &value);
++ bus->state = I2C_OPER_STARTED;
++ } else {
++ i2c_slave_event(bus->slave, I2C_SLAVE_READ_PROCESSED, &value);
++ }
+ ind = (bus->slv_wr_ind + bus->slv_wr_size) % I2C_HW_FIFO_SIZE;
+ bus->slv_wr_buf[ind] = value;
+ bus->slv_wr_size++;
+- i2c_slave_event(bus->slave, I2C_SLAVE_READ_PROCESSED, &value);
+ }
+ return I2C_HW_FIFO_SIZE - ret;
+ }
+@@ -970,7 +968,6 @@ static void npcm_i2c_slave_xmit(struct npcm_i2c *bus, u16 nwrite,
+ if (nwrite == 0)
+ return;
+
+- bus->state = I2C_OPER_STARTED;
+ bus->operation = I2C_WRITE_OPER;
+
+ /* get the next buffer */
+@@ -1231,7 +1228,16 @@ static irqreturn_t npcm_i2c_int_slave_handler(struct npcm_i2c *bus)
+ ret = IRQ_HANDLED;
+ } /* SDAST */
+
+- return ret;
++ /*
++ * if irq is not one of the above, make sure EOB is disabled and all
++ * status bits are cleared.
++ */
++ if (ret == IRQ_NONE) {
++ npcm_i2c_eob_int(bus, false);
++ npcm_i2c_clear_master_status(bus);
++ }
++
++ return IRQ_HANDLED;
+ }
+
+ static int npcm_i2c_reg_slave(struct i2c_client *client)
+@@ -1467,6 +1473,9 @@ static void npcm_i2c_irq_handle_nack(struct npcm_i2c *bus)
+ npcm_i2c_eob_int(bus, false);
+ npcm_i2c_master_stop(bus);
+
++ /* Clear SDA Status bit (by reading dummy byte) */
++ npcm_i2c_rd_byte(bus);
++
+ /*
+ * The bus is released from stall only after the SW clears
+ * NEGACK bit. Then a Stop condition is sent.
+@@ -1474,6 +1483,8 @@ static void npcm_i2c_irq_handle_nack(struct npcm_i2c *bus)
+ npcm_i2c_clear_master_status(bus);
+ readx_poll_timeout_atomic(ioread8, bus->reg + NPCM_I2CCST, val,
+ !(val & NPCM_I2CCST_BUSY), 10, 200);
++ /* verify no status bits are still set after bus is released */
++ npcm_i2c_clear_master_status(bus);
+ }
+ bus->state = I2C_IDLE;
+
+@@ -1672,10 +1683,10 @@ static int npcm_i2c_recovery_tgclk(struct i2c_adapter *_adap)
+ int iter = 27;
+
+ if ((npcm_i2c_get_SDA(_adap) == 1) && (npcm_i2c_get_SCL(_adap) == 1)) {
+- dev_dbg(bus->dev, "bus%d recovery skipped, bus not stuck",
+- bus->num);
++ dev_dbg(bus->dev, "bus%d-0x%x recovery skipped, bus not stuck",
++ bus->num, bus->dest_addr);
+ npcm_i2c_reset(bus);
+- return status;
++ return 0;
+ }
+
+ npcm_i2c_int_enable(bus, false);
+@@ -1909,6 +1920,7 @@ static int npcm_i2c_init_module(struct npcm_i2c *bus, enum i2c_mode mode,
+ bus_freq_hz < I2C_FREQ_MIN_HZ || bus_freq_hz > I2C_FREQ_MAX_HZ)
+ return -EINVAL;
+
++ npcm_i2c_int_enable(bus, false);
+ npcm_i2c_disable(bus);
+
+ /* Configure FIFO mode : */
+@@ -1937,10 +1949,17 @@ static int npcm_i2c_init_module(struct npcm_i2c *bus, enum i2c_mode mode,
+ val = (val | NPCM_I2CCTL1_NMINTE) & ~NPCM_I2CCTL1_RWS;
+ iowrite8(val, bus->reg + NPCM_I2CCTL1);
+
+- npcm_i2c_int_enable(bus, true);
+-
+ npcm_i2c_reset(bus);
+
++ /* check HW is OK: SDA and SCL should be high at this point. */
++ if ((npcm_i2c_get_SDA(&bus->adap) == 0) || (npcm_i2c_get_SCL(&bus->adap) == 0)) {
++ dev_err(bus->dev, "I2C%d init fail: lines are low\n", bus->num);
++ dev_err(bus->dev, "SDA=%d SCL=%d\n", npcm_i2c_get_SDA(&bus->adap),
++ npcm_i2c_get_SCL(&bus->adap));
++ return -ENXIO;
++ }
++
++ npcm_i2c_int_enable(bus, true);
+ return 0;
+ }
+
+@@ -1988,10 +2007,14 @@ static irqreturn_t npcm_i2c_bus_irq(int irq, void *dev_id)
+ #if IS_ENABLED(CONFIG_I2C_SLAVE)
+ if (bus->slave) {
+ bus->master_or_slave = I2C_SLAVE;
+- return npcm_i2c_int_slave_handler(bus);
++ if (npcm_i2c_int_slave_handler(bus))
++ return IRQ_HANDLED;
+ }
+ #endif
+- return IRQ_NONE;
++ /* clear status bits for spurious interrupts */
++ npcm_i2c_clear_master_status(bus);
++
++ return IRQ_HANDLED;
+ }
+
+ static bool npcm_i2c_master_start_xmit(struct npcm_i2c *bus,
+@@ -2047,8 +2070,7 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ u16 nwrite, nread;
+ u8 *write_data, *read_data;
+ u8 slave_addr;
+- int timeout;
+- int ret = 0;
++ unsigned long timeout;
+ bool read_block = false;
+ bool read_PEC = false;
+ u8 bus_busy;
+@@ -2099,13 +2121,13 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ * 9: bits per transaction (including the ack/nack)
+ */
+ timeout_usec = (2 * 9 * USEC_PER_SEC / bus->bus_freq) * (2 + nread + nwrite);
+- timeout = max(msecs_to_jiffies(35), usecs_to_jiffies(timeout_usec));
++ timeout = max_t(unsigned long, bus->adap.timeout, usecs_to_jiffies(timeout_usec));
+ if (nwrite >= 32 * 1024 || nread >= 32 * 1024) {
+ dev_err(bus->dev, "i2c%d buffer too big\n", bus->num);
+ return -EINVAL;
+ }
+
+- time_left = jiffies + msecs_to_jiffies(DEFAULT_STALL_COUNT) + 1;
++ time_left = jiffies + timeout + 1;
+ do {
+ /*
+ * we must clear slave address immediately when the bus is not
+@@ -2138,12 +2160,12 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ bus->read_block_use = read_block;
+
+ reinit_completion(&bus->cmd_complete);
+- if (!npcm_i2c_master_start_xmit(bus, slave_addr, nwrite, nread,
+- write_data, read_data, read_PEC,
+- read_block))
+- ret = -EBUSY;
+
+- if (ret != -EBUSY) {
++ npcm_i2c_int_enable(bus, true);
++
++ if (npcm_i2c_master_start_xmit(bus, slave_addr, nwrite, nread,
++ write_data, read_data, read_PEC,
++ read_block)) {
+ time_left = wait_for_completion_timeout(&bus->cmd_complete,
+ timeout);
+
+@@ -2157,26 +2179,31 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ }
+ }
+ }
+- ret = bus->cmd_err;
+
+ /* if there was BER, check if need to recover the bus: */
+ if (bus->cmd_err == -EAGAIN)
+- ret = i2c_recover_bus(adap);
++ bus->cmd_err = i2c_recover_bus(adap);
+
+ /*
+ * After any type of error, check if LAST bit is still set,
+ * due to a HW issue.
+ * It cannot be cleared without resetting the module.
+ */
+- if (bus->cmd_err &&
+- (NPCM_I2CRXF_CTL_LAST_PEC & ioread8(bus->reg + NPCM_I2CRXF_CTL)))
++ else if (bus->cmd_err &&
++ (NPCM_I2CRXF_CTL_LAST_PEC & ioread8(bus->reg + NPCM_I2CRXF_CTL)))
+ npcm_i2c_reset(bus);
+
++ /* after any xfer, successful or not, stall and EOB must be disabled */
++ npcm_i2c_stall_after_start(bus, false);
++ npcm_i2c_eob_int(bus, false);
++
+ #if IS_ENABLED(CONFIG_I2C_SLAVE)
+ /* reenable slave if it was enabled */
+ if (bus->slave)
+ iowrite8((bus->slave->addr & 0x7F) | NPCM_I2CADDR_SAEN,
+ bus->reg + NPCM_I2CADDR1);
++#else
++ npcm_i2c_int_enable(bus, false);
+ #endif
+ return bus->cmd_err;
+ }
+@@ -2269,7 +2296,7 @@ static int npcm_i2c_probe_bus(struct platform_device *pdev)
+ adap = &bus->adap;
+ adap->owner = THIS_MODULE;
+ adap->retries = 3;
+- adap->timeout = HZ;
++ adap->timeout = msecs_to_jiffies(35);
+ adap->algo = &npcm_i2c_algo;
+ adap->quirks = &npcm_i2c_quirks;
+ adap->algo_data = bus;
+@@ -2335,8 +2362,16 @@ static struct platform_driver npcm_i2c_bus_driver = {
+
+ static int __init npcm_i2c_init(void)
+ {
++ int ret;
++
+ npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL);
+- platform_driver_register(&npcm_i2c_bus_driver);
++
++ ret = platform_driver_register(&npcm_i2c_bus_driver);
++ if (ret) {
++ debugfs_remove_recursive(npcm_i2c_debugfs_dir);
++ return ret;
++ }
++
+ return 0;
+ }
+ module_init(npcm_i2c_init);
+diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
+index a0af027db04c1..2e575856c5cd5 100644
+--- a/drivers/i2c/busses/i2c-ocores.c
++++ b/drivers/i2c/busses/i2c-ocores.c
+@@ -342,18 +342,18 @@ static int ocores_poll_wait(struct ocores_i2c *i2c)
+ * ocores_isr(), we just add our polling code around it.
+ *
+ * It can run in atomic context
++ *
++ * Return: 0 on success, -ETIMEDOUT on timeout
+ */
+-static void ocores_process_polling(struct ocores_i2c *i2c)
++static int ocores_process_polling(struct ocores_i2c *i2c)
+ {
+- while (1) {
+- irqreturn_t ret;
+- int err;
++ irqreturn_t ret;
++ int err = 0;
+
++ while (1) {
+ err = ocores_poll_wait(i2c);
+- if (err) {
+- i2c->state = STATE_ERROR;
++ if (err)
+ break; /* timeout */
+- }
+
+ ret = ocores_isr(-1, i2c);
+ if (ret == IRQ_NONE)
+@@ -364,13 +364,15 @@ static void ocores_process_polling(struct ocores_i2c *i2c)
+ break;
+ }
+ }
++
++ return err;
+ }
+
+ static int ocores_xfer_core(struct ocores_i2c *i2c,
+ struct i2c_msg *msgs, int num,
+ bool polling)
+ {
+- int ret;
++ int ret = 0;
+ u8 ctrl;
+
+ ctrl = oc_getreg(i2c, OCI2C_CONTROL);
+@@ -388,15 +390,16 @@ static int ocores_xfer_core(struct ocores_i2c *i2c,
+ oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_START);
+
+ if (polling) {
+- ocores_process_polling(i2c);
++ ret = ocores_process_polling(i2c);
+ } else {
+- ret = wait_event_timeout(i2c->wait,
+- (i2c->state == STATE_ERROR) ||
+- (i2c->state == STATE_DONE), HZ);
+- if (ret == 0) {
+- ocores_process_timeout(i2c);
+- return -ETIMEDOUT;
+- }
++ if (wait_event_timeout(i2c->wait,
++ (i2c->state == STATE_ERROR) ||
++ (i2c->state == STATE_DONE), HZ) == 0)
++ ret = -ETIMEDOUT;
++ }
++ if (ret) {
++ ocores_process_timeout(i2c);
++ return ret;
+ }
+
+ return (i2c->state == STATE_DONE) ? num : -EIO;
+diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
+index d4f6c6d60683a..8955f62b497e6 100644
+--- a/drivers/i2c/busses/i2c-omap.c
++++ b/drivers/i2c/busses/i2c-omap.c
+@@ -1058,7 +1058,7 @@ omap_i2c_isr(int irq, void *dev_id)
+ u16 stat;
+
+ stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
+- mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
++ mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG) & ~OMAP_I2C_STAT_NACK;
+
+ if (stat & mask)
+ ret = IRQ_WAKE_THREAD;
+diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c
+index 20f2772c0e79b..2c909522f0f38 100644
+--- a/drivers/i2c/busses/i2c-pasemi.c
++++ b/drivers/i2c/busses/i2c-pasemi.c
+@@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter,
+
+ TXFIFO_WR(smbus, msg->buf[msg->len-1] |
+ (stop ? MTXFIFO_STOP : 0));
++
++ if (stop) {
++ err = pasemi_smb_waitready(smbus);
++ if (err)
++ goto reset_out;
++ }
+ }
+
+ return 0;
+diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
+index 8c1b31ed0c429..809fbd014cd68 100644
+--- a/drivers/i2c/busses/i2c-piix4.c
++++ b/drivers/i2c/busses/i2c-piix4.c
+@@ -77,6 +77,7 @@
+
+ /* SB800 constants */
+ #define SB800_PIIX4_SMB_IDX 0xcd6
++#define SB800_PIIX4_SMB_MAP_SIZE 2
+
+ #define KERNCZ_IMC_IDX 0x3e
+ #define KERNCZ_IMC_DATA 0x3f
+@@ -97,6 +98,9 @@
+ #define SB800_PIIX4_PORT_IDX_MASK_KERNCZ 0x18
+ #define SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ 3
+
++#define SB800_PIIX4_FCH_PM_ADDR 0xFED80300
++#define SB800_PIIX4_FCH_PM_SIZE 8
++
+ /* insmod parameters */
+
+ /* If force is set to anything different from 0, we forcibly enable the
+@@ -155,6 +159,11 @@ static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
+ };
+ static const char *piix4_aux_port_name_sb800 = " port 1";
+
++struct sb800_mmio_cfg {
++ void __iomem *addr;
++ bool use_mmio;
++};
++
+ struct i2c_piix4_adapdata {
+ unsigned short smba;
+
+@@ -162,8 +171,74 @@ struct i2c_piix4_adapdata {
+ bool sb800_main;
+ bool notify_imc;
+ u8 port; /* Port number, shifted */
++ struct sb800_mmio_cfg mmio_cfg;
+ };
+
++static int piix4_sb800_region_request(struct device *dev,
++ struct sb800_mmio_cfg *mmio_cfg)
++{
++ if (mmio_cfg->use_mmio) {
++ void __iomem *addr;
++
++ if (!request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR,
++ SB800_PIIX4_FCH_PM_SIZE,
++ "sb800_piix4_smb")) {
++ dev_err(dev,
++ "SMBus base address memory region 0x%x already in use.\n",
++ SB800_PIIX4_FCH_PM_ADDR);
++ return -EBUSY;
++ }
++
++ addr = ioremap(SB800_PIIX4_FCH_PM_ADDR,
++ SB800_PIIX4_FCH_PM_SIZE);
++ if (!addr) {
++ release_mem_region(SB800_PIIX4_FCH_PM_ADDR,
++ SB800_PIIX4_FCH_PM_SIZE);
++ dev_err(dev, "SMBus base address mapping failed.\n");
++ return -ENOMEM;
++ }
++
++ mmio_cfg->addr = addr;
++
++ return 0;
++ }
++
++ if (!request_muxed_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE,
++ "sb800_piix4_smb")) {
++ dev_err(dev,
++ "SMBus base address index region 0x%x already in use.\n",
++ SB800_PIIX4_SMB_IDX);
++ return -EBUSY;
++ }
++
++ return 0;
++}
++
++static void piix4_sb800_region_release(struct device *dev,
++ struct sb800_mmio_cfg *mmio_cfg)
++{
++ if (mmio_cfg->use_mmio) {
++ iounmap(mmio_cfg->addr);
++ release_mem_region(SB800_PIIX4_FCH_PM_ADDR,
++ SB800_PIIX4_FCH_PM_SIZE);
++ return;
++ }
++
++ release_region(SB800_PIIX4_SMB_IDX, SB800_PIIX4_SMB_MAP_SIZE);
++}
++
++static bool piix4_sb800_use_mmio(struct pci_dev *PIIX4_dev)
++{
++ /*
++ * cd6h/cd7h port I/O accesses can be disabled on AMD processors
++ * w/ SMBus PCI revision ID 0x51 or greater. MMIO is supported on
++ * the same processors and is the recommended access method.
++ */
++ return (PIIX4_dev->vendor == PCI_VENDOR_ID_AMD &&
++ PIIX4_dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS &&
++ PIIX4_dev->revision >= 0x51);
++}
++
+ static int piix4_setup(struct pci_dev *PIIX4_dev,
+ const struct pci_device_id *id)
+ {
+@@ -263,12 +338,61 @@ static int piix4_setup(struct pci_dev *PIIX4_dev,
+ return piix4_smba;
+ }
+
++static int piix4_setup_sb800_smba(struct pci_dev *PIIX4_dev,
++ u8 smb_en,
++ u8 aux,
++ u8 *smb_en_status,
++ unsigned short *piix4_smba)
++{
++ struct sb800_mmio_cfg mmio_cfg;
++ u8 smba_en_lo;
++ u8 smba_en_hi;
++ int retval;
++
++ mmio_cfg.use_mmio = piix4_sb800_use_mmio(PIIX4_dev);
++ retval = piix4_sb800_region_request(&PIIX4_dev->dev, &mmio_cfg);
++ if (retval)
++ return retval;
++
++ if (mmio_cfg.use_mmio) {
++ smba_en_lo = ioread8(mmio_cfg.addr);
++ smba_en_hi = ioread8(mmio_cfg.addr + 1);
++ } else {
++ outb_p(smb_en, SB800_PIIX4_SMB_IDX);
++ smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
++ outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
++ smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
++ }
++
++ piix4_sb800_region_release(&PIIX4_dev->dev, &mmio_cfg);
++
++ if (!smb_en) {
++ *smb_en_status = smba_en_lo & 0x10;
++ *piix4_smba = smba_en_hi << 8;
++ if (aux)
++ *piix4_smba |= 0x20;
++ } else {
++ *smb_en_status = smba_en_lo & 0x01;
++ *piix4_smba = ((smba_en_hi << 8) | smba_en_lo) & 0xffe0;
++ }
++
++ if (!*smb_en_status) {
++ dev_err(&PIIX4_dev->dev,
++ "SMBus Host Controller not enabled!\n");
++ return -ENODEV;
++ }
++
++ return 0;
++}
++
+ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
+ const struct pci_device_id *id, u8 aux)
+ {
+ unsigned short piix4_smba;
+- u8 smba_en_lo, smba_en_hi, smb_en, smb_en_status, port_sel;
++ u8 smb_en, smb_en_status, port_sel;
+ u8 i2ccfg, i2ccfg_offset = 0x10;
++ struct sb800_mmio_cfg mmio_cfg;
++ int retval;
+
+ /* SB800 and later SMBus does not support forcing address */
+ if (force || force_addr) {
+@@ -290,35 +414,11 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
+ else
+ smb_en = (aux) ? 0x28 : 0x2c;
+
+- if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, "sb800_piix4_smb")) {
+- dev_err(&PIIX4_dev->dev,
+- "SMB base address index region 0x%x already in use.\n",
+- SB800_PIIX4_SMB_IDX);
+- return -EBUSY;
+- }
+-
+- outb_p(smb_en, SB800_PIIX4_SMB_IDX);
+- smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
+- outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
+- smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
++ retval = piix4_setup_sb800_smba(PIIX4_dev, smb_en, aux, &smb_en_status,
++ &piix4_smba);
+
+- release_region(SB800_PIIX4_SMB_IDX, 2);
+-
+- if (!smb_en) {
+- smb_en_status = smba_en_lo & 0x10;
+- piix4_smba = smba_en_hi << 8;
+- if (aux)
+- piix4_smba |= 0x20;
+- } else {
+- smb_en_status = smba_en_lo & 0x01;
+- piix4_smba = ((smba_en_hi << 8) | smba_en_lo) & 0xffe0;
+- }
+-
+- if (!smb_en_status) {
+- dev_err(&PIIX4_dev->dev,
+- "SMBus Host Controller not enabled!\n");
+- return -ENODEV;
+- }
++ if (retval)
++ return retval;
+
+ if (acpi_check_region(piix4_smba, SMBIOSIZE, piix4_driver.name))
+ return -ENODEV;
+@@ -371,10 +471,11 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
+ piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT;
+ }
+ } else {
+- if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2,
+- "sb800_piix4_smb")) {
++ mmio_cfg.use_mmio = piix4_sb800_use_mmio(PIIX4_dev);
++ retval = piix4_sb800_region_request(&PIIX4_dev->dev, &mmio_cfg);
++ if (retval) {
+ release_region(piix4_smba, SMBIOSIZE);
+- return -EBUSY;
++ return retval;
+ }
+
+ outb_p(SB800_PIIX4_PORT_IDX_SEL, SB800_PIIX4_SMB_IDX);
+@@ -384,7 +485,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
+ SB800_PIIX4_PORT_IDX;
+ piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK;
+ piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT;
+- release_region(SB800_PIIX4_SMB_IDX, 2);
++ piix4_sb800_region_release(&PIIX4_dev->dev, &mmio_cfg);
+ }
+
+ dev_info(&PIIX4_dev->dev,
+@@ -662,6 +763,29 @@ static void piix4_imc_wakeup(void)
+ release_region(KERNCZ_IMC_IDX, 2);
+ }
+
++static int piix4_sb800_port_sel(u8 port, struct sb800_mmio_cfg *mmio_cfg)
++{
++ u8 smba_en_lo, val;
++
++ if (mmio_cfg->use_mmio) {
++ smba_en_lo = ioread8(mmio_cfg->addr + piix4_port_sel_sb800);
++ val = (smba_en_lo & ~piix4_port_mask_sb800) | port;
++ if (smba_en_lo != val)
++ iowrite8(val, mmio_cfg->addr + piix4_port_sel_sb800);
++
++ return (smba_en_lo & piix4_port_mask_sb800);
++ }
++
++ outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX);
++ smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
++
++ val = (smba_en_lo & ~piix4_port_mask_sb800) | port;
++ if (smba_en_lo != val)
++ outb_p(val, SB800_PIIX4_SMB_IDX + 1);
++
++ return (smba_en_lo & piix4_port_mask_sb800);
++}
++
+ /*
+ * Handles access to multiple SMBus ports on the SB800.
+ * The port is selected by bits 2:1 of the smb_en register (0x2c).
+@@ -678,12 +802,12 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
+ unsigned short piix4_smba = adapdata->smba;
+ int retries = MAX_TIMEOUT;
+ int smbslvcnt;
+- u8 smba_en_lo;
+- u8 port;
++ u8 prev_port;
+ int retval;
+
+- if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, "sb800_piix4_smb"))
+- return -EBUSY;
++ retval = piix4_sb800_region_request(&adap->dev, &adapdata->mmio_cfg);
++ if (retval)
++ return retval;
+
+ /* Request the SMBUS semaphore, avoid conflicts with the IMC */
+ smbslvcnt = inb_p(SMBSLVCNT);
+@@ -738,18 +862,12 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
+ }
+ }
+
+- outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX);
+- smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
+-
+- port = adapdata->port;
+- if ((smba_en_lo & piix4_port_mask_sb800) != port)
+- outb_p((smba_en_lo & ~piix4_port_mask_sb800) | port,
+- SB800_PIIX4_SMB_IDX + 1);
++ prev_port = piix4_sb800_port_sel(adapdata->port, &adapdata->mmio_cfg);
+
+ retval = piix4_access(adap, addr, flags, read_write,
+ command, size, data);
+
+- outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
++ piix4_sb800_port_sel(prev_port, &adapdata->mmio_cfg);
+
+ /* Release the semaphore */
+ outb_p(smbslvcnt | 0x20, SMBSLVCNT);
+@@ -758,7 +876,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
+ piix4_imc_wakeup();
+
+ release:
+- release_region(SB800_PIIX4_SMB_IDX, 2);
++ piix4_sb800_region_release(&adap->dev, &adapdata->mmio_cfg);
+ return retval;
+ }
+
+@@ -836,6 +954,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
+ return -ENOMEM;
+ }
+
++ adapdata->mmio_cfg.use_mmio = piix4_sb800_use_mmio(dev);
+ adapdata->smba = smba;
+ adapdata->sb800_main = sb800_main;
+ adapdata->port = port << piix4_port_shift_sb800;
+@@ -961,6 +1080,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ "", &piix4_main_adapters[0]);
+ if (retval < 0)
+ return retval;
++ piix4_adapter_count = 1;
+ }
+
+ /* Check for auxiliary SMBus on some AMD chipsets */
+diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c
+index f614cade432bb..30e38bc8b6db8 100644
+--- a/drivers/i2c/busses/i2c-pxa-pci.c
++++ b/drivers/i2c/busses/i2c-pxa-pci.c
+@@ -105,7 +105,7 @@ static int ce4100_i2c_probe(struct pci_dev *dev,
+ int i;
+ struct ce4100_devices *sds;
+
+- ret = pci_enable_device_mem(dev);
++ ret = pcim_enable_device(dev);
+ if (ret)
+ return ret;
+
+@@ -114,10 +114,8 @@ static int ce4100_i2c_probe(struct pci_dev *dev,
+ return -EINVAL;
+ }
+ sds = kzalloc(sizeof(*sds), GFP_KERNEL);
+- if (!sds) {
+- ret = -ENOMEM;
+- goto err_mem;
+- }
++ if (!sds)
++ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) {
+ sds->pdev[i] = add_i2c_device(dev, i);
+@@ -133,8 +131,6 @@ static int ce4100_i2c_probe(struct pci_dev *dev,
+
+ err_dev_add:
+ kfree(sds);
+-err_mem:
+- pci_disable_device(dev);
+ return ret;
+ }
+
+diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c
+index c1de8eb66169f..2bdb86ab2ea81 100644
+--- a/drivers/i2c/busses/i2c-qcom-cci.c
++++ b/drivers/i2c/busses/i2c-qcom-cci.c
+@@ -558,7 +558,7 @@ static int cci_probe(struct platform_device *pdev)
+ cci->master[idx].adap.quirks = &cci->data->quirks;
+ cci->master[idx].adap.algo = &cci_algo;
+ cci->master[idx].adap.dev.parent = dev;
+- cci->master[idx].adap.dev.of_node = child;
++ cci->master[idx].adap.dev.of_node = of_node_get(child);
+ cci->master[idx].master = idx;
+ cci->master[idx].cci = cci;
+
+@@ -638,26 +638,33 @@ static int cci_probe(struct platform_device *pdev)
+ if (ret < 0)
+ goto error;
+
++ pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
++ pm_runtime_use_autosuspend(dev);
++ pm_runtime_set_active(dev);
++ pm_runtime_enable(dev);
++
+ for (i = 0; i < cci->data->num_masters; i++) {
+ if (!cci->master[i].cci)
+ continue;
+
+ ret = i2c_add_adapter(&cci->master[i].adap);
+- if (ret < 0)
++ if (ret < 0) {
++ of_node_put(cci->master[i].adap.dev.of_node);
+ goto error_i2c;
++ }
+ }
+
+- pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
+- pm_runtime_use_autosuspend(dev);
+- pm_runtime_set_active(dev);
+- pm_runtime_enable(dev);
+-
+ return 0;
+
+ error_i2c:
+- for (; i >= 0; i--) {
+- if (cci->master[i].cci)
++ pm_runtime_disable(dev);
++ pm_runtime_dont_use_autosuspend(dev);
++
++ for (--i ; i >= 0; i--) {
++ if (cci->master[i].cci) {
+ i2c_del_adapter(&cci->master[i].adap);
++ of_node_put(cci->master[i].adap.dev.of_node);
++ }
+ }
+ error:
+ disable_irq(cci->irq);
+@@ -673,8 +680,10 @@ static int cci_remove(struct platform_device *pdev)
+ int i;
+
+ for (i = 0; i < cci->data->num_masters; i++) {
+- if (cci->master[i].cci)
++ if (cci->master[i].cci) {
+ i2c_del_adapter(&cci->master[i].adap);
++ of_node_put(cci->master[i].adap.dev.of_node);
++ }
+ cci_halt(cci, i);
+ }
+
+diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
+index fcd35e8de83c9..b89eca2398d90 100644
+--- a/drivers/i2c/busses/i2c-qup.c
++++ b/drivers/i2c/busses/i2c-qup.c
+@@ -1752,16 +1752,21 @@ nodma:
+ if (!clk_freq || clk_freq > I2C_MAX_FAST_MODE_PLUS_FREQ) {
+ dev_err(qup->dev, "clock frequency not supported %d\n",
+ clk_freq);
+- return -EINVAL;
++ ret = -EINVAL;
++ goto fail_dma;
+ }
+
+ qup->base = devm_platform_ioremap_resource(pdev, 0);
+- if (IS_ERR(qup->base))
+- return PTR_ERR(qup->base);
++ if (IS_ERR(qup->base)) {
++ ret = PTR_ERR(qup->base);
++ goto fail_dma;
++ }
+
+ qup->irq = platform_get_irq(pdev, 0);
+- if (qup->irq < 0)
+- return qup->irq;
++ if (qup->irq < 0) {
++ ret = qup->irq;
++ goto fail_dma;
++ }
+
+ if (has_acpi_companion(qup->dev)) {
+ ret = device_property_read_u32(qup->dev,
+@@ -1775,13 +1780,15 @@ nodma:
+ qup->clk = devm_clk_get(qup->dev, "core");
+ if (IS_ERR(qup->clk)) {
+ dev_err(qup->dev, "Could not get core clock\n");
+- return PTR_ERR(qup->clk);
++ ret = PTR_ERR(qup->clk);
++ goto fail_dma;
+ }
+
+ qup->pclk = devm_clk_get(qup->dev, "iface");
+ if (IS_ERR(qup->pclk)) {
+ dev_err(qup->dev, "Could not get iface clock\n");
+- return PTR_ERR(qup->pclk);
++ ret = PTR_ERR(qup->pclk);
++ goto fail_dma;
+ }
+ qup_i2c_enable_clocks(qup);
+ src_clk_freq = clk_get_rate(qup->clk);
+diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
+index bff9913c37b8b..2c016f0299fce 100644
+--- a/drivers/i2c/busses/i2c-rcar.c
++++ b/drivers/i2c/busses/i2c-rcar.c
+@@ -1070,8 +1070,10 @@ static int rcar_i2c_probe(struct platform_device *pdev)
+ pm_runtime_enable(dev);
+ pm_runtime_get_sync(dev);
+ ret = rcar_i2c_clock_calculate(priv);
+- if (ret < 0)
+- goto out_pm_put;
++ if (ret < 0) {
++ pm_runtime_put(dev);
++ goto out_pm_disable;
++ }
+
+ rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */
+
+@@ -1100,19 +1102,19 @@ static int rcar_i2c_probe(struct platform_device *pdev)
+
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+- goto out_pm_disable;
++ goto out_pm_put;
+ priv->irq = ret;
+ ret = devm_request_irq(dev, priv->irq, irqhandler, irqflags, dev_name(dev), priv);
+ if (ret < 0) {
+ dev_err(dev, "cannot get irq %d\n", priv->irq);
+- goto out_pm_disable;
++ goto out_pm_put;
+ }
+
+ platform_set_drvdata(pdev, priv);
+
+ ret = i2c_add_numbered_adapter(adap);
+ if (ret < 0)
+- goto out_pm_disable;
++ goto out_pm_put;
+
+ if (priv->flags & ID_P_HOST_NOTIFY) {
+ priv->host_notify_client = i2c_new_slave_host_notify_device(adap);
+@@ -1129,7 +1131,8 @@ static int rcar_i2c_probe(struct platform_device *pdev)
+ out_del_device:
+ i2c_del_adapter(&priv->adap);
+ out_pm_put:
+- pm_runtime_put(dev);
++ if (priv->flags & ID_P_PM_BLOCKED)
++ pm_runtime_put(dev);
+ out_pm_disable:
+ pm_runtime_disable(dev);
+ return ret;
+diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c
+index 819ab4ee517e1..13c14eb175e94 100644
+--- a/drivers/i2c/busses/i2c-rk3x.c
++++ b/drivers/i2c/busses/i2c-rk3x.c
+@@ -80,7 +80,7 @@ enum {
+ #define DEFAULT_SCL_RATE (100 * 1000) /* Hz */
+
+ /**
+- * struct i2c_spec_values:
++ * struct i2c_spec_values - I2C specification values for various modes
+ * @min_hold_start_ns: min hold time (repeated) START condition
+ * @min_low_ns: min LOW period of the SCL clock
+ * @min_high_ns: min HIGH period of the SCL cloc
+@@ -136,7 +136,7 @@ static const struct i2c_spec_values fast_mode_plus_spec = {
+ };
+
+ /**
+- * struct rk3x_i2c_calced_timings:
++ * struct rk3x_i2c_calced_timings - calculated V1 timings
+ * @div_low: Divider output for low
+ * @div_high: Divider output for high
+ * @tuning: Used to adjust setup/hold data time,
+@@ -159,7 +159,7 @@ enum rk3x_i2c_state {
+ };
+
+ /**
+- * struct rk3x_i2c_soc_data:
++ * struct rk3x_i2c_soc_data - SOC-specific data
+ * @grf_offset: offset inside the grf regmap for setting the i2c type
+ * @calc_timings: Callback function for i2c timing information calculated
+ */
+@@ -239,7 +239,8 @@ static inline void rk3x_i2c_clean_ipd(struct rk3x_i2c *i2c)
+ }
+
+ /**
+- * Generate a START condition, which triggers a REG_INT_START interrupt.
++ * rk3x_i2c_start - Generate a START condition, which triggers a REG_INT_START interrupt.
++ * @i2c: target controller data
+ */
+ static void rk3x_i2c_start(struct rk3x_i2c *i2c)
+ {
+@@ -258,8 +259,8 @@ static void rk3x_i2c_start(struct rk3x_i2c *i2c)
+ }
+
+ /**
+- * Generate a STOP condition, which triggers a REG_INT_STOP interrupt.
+- *
++ * rk3x_i2c_stop - Generate a STOP condition, which triggers a REG_INT_STOP interrupt.
++ * @i2c: target controller data
+ * @error: Error code to return in rk3x_i2c_xfer
+ */
+ static void rk3x_i2c_stop(struct rk3x_i2c *i2c, int error)
+@@ -298,7 +299,8 @@ static void rk3x_i2c_stop(struct rk3x_i2c *i2c, int error)
+ }
+
+ /**
+- * Setup a read according to i2c->msg
++ * rk3x_i2c_prepare_read - Setup a read according to i2c->msg
++ * @i2c: target controller data
+ */
+ static void rk3x_i2c_prepare_read(struct rk3x_i2c *i2c)
+ {
+@@ -329,7 +331,8 @@ static void rk3x_i2c_prepare_read(struct rk3x_i2c *i2c)
+ }
+
+ /**
+- * Fill the transmit buffer with data from i2c->msg
++ * rk3x_i2c_fill_transmit_buf - Fill the transmit buffer with data from i2c->msg
++ * @i2c: target controller data
+ */
+ static void rk3x_i2c_fill_transmit_buf(struct rk3x_i2c *i2c)
+ {
+@@ -423,8 +426,8 @@ static void rk3x_i2c_handle_read(struct rk3x_i2c *i2c, unsigned int ipd)
+ if (!(ipd & REG_INT_MBRF))
+ return;
+
+- /* ack interrupt */
+- i2c_writel(i2c, REG_INT_MBRF, REG_IPD);
++ /* ack interrupt (read also produces a spurious START flag, clear it too) */
++ i2c_writel(i2c, REG_INT_MBRF | REG_INT_START, REG_IPD);
+
+ /* Can only handle a maximum of 32 bytes at a time */
+ if (len > 32)
+@@ -532,11 +535,10 @@ out:
+ }
+
+ /**
+- * Get timing values of I2C specification
+- *
++ * rk3x_i2c_get_spec - Get timing values of I2C specification
+ * @speed: Desired SCL frequency
+ *
+- * Returns: Matched i2c spec values.
++ * Return: Matched i2c_spec_values.
+ */
+ static const struct i2c_spec_values *rk3x_i2c_get_spec(unsigned int speed)
+ {
+@@ -549,13 +551,12 @@ static const struct i2c_spec_values *rk3x_i2c_get_spec(unsigned int speed)
+ }
+
+ /**
+- * Calculate divider values for desired SCL frequency
+- *
++ * rk3x_i2c_v0_calc_timings - Calculate divider values for desired SCL frequency
+ * @clk_rate: I2C input clock rate
+ * @t: Known I2C timing information
+ * @t_calc: Caculated rk3x private timings that would be written into regs
+ *
+- * Returns: 0 on success, -EINVAL if the goal SCL rate is too slow. In that case
++ * Return: %0 on success, -%EINVAL if the goal SCL rate is too slow. In that case
+ * a best-effort divider value is returned in divs. If the target rate is
+ * too high, we silently use the highest possible rate.
+ */
+@@ -710,13 +711,12 @@ static int rk3x_i2c_v0_calc_timings(unsigned long clk_rate,
+ }
+
+ /**
+- * Calculate timing values for desired SCL frequency
+- *
++ * rk3x_i2c_v1_calc_timings - Calculate timing values for desired SCL frequency
+ * @clk_rate: I2C input clock rate
+ * @t: Known I2C timing information
+ * @t_calc: Caculated rk3x private timings that would be written into regs
+ *
+- * Returns: 0 on success, -EINVAL if the goal SCL rate is too slow. In that case
++ * Return: %0 on success, -%EINVAL if the goal SCL rate is too slow. In that case
+ * a best-effort divider value is returned in divs. If the target rate is
+ * too high, we silently use the highest possible rate.
+ * The following formulas are v1's method to calculate timings.
+@@ -960,14 +960,14 @@ static int rk3x_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long
+ }
+
+ /**
+- * Setup I2C registers for an I2C operation specified by msgs, num.
+- *
+- * Must be called with i2c->lock held.
+- *
++ * rk3x_i2c_setup - Setup I2C registers for an I2C operation specified by msgs, num.
++ * @i2c: target controller data
+ * @msgs: I2C msgs to process
+ * @num: Number of msgs
+ *
+- * returns: Number of I2C msgs processed or negative in case of error
++ * Must be called with i2c->lock held.
++ *
++ * Return: Number of I2C msgs processed or negative in case of error
+ */
+ static int rk3x_i2c_setup(struct rk3x_i2c *i2c, struct i2c_msg *msgs, int num)
+ {
+diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c
+index 319d1fa617c88..051b904cb35f6 100644
+--- a/drivers/i2c/busses/i2c-sh7760.c
++++ b/drivers/i2c/busses/i2c-sh7760.c
+@@ -443,9 +443,8 @@ static int sh7760_i2c_probe(struct platform_device *pdev)
+ goto out0;
+ }
+
+- id = kzalloc(sizeof(struct cami2c), GFP_KERNEL);
++ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ if (!id) {
+- dev_err(&pdev->dev, "no mem for private data\n");
+ ret = -ENOMEM;
+ goto out0;
+ }
+diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c
+index 4fe15cd78907e..ffc54fbf814dd 100644
+--- a/drivers/i2c/busses/i2c-sprd.c
++++ b/drivers/i2c/busses/i2c-sprd.c
+@@ -576,12 +576,14 @@ static int sprd_i2c_remove(struct platform_device *pdev)
+ struct sprd_i2c *i2c_dev = platform_get_drvdata(pdev);
+ int ret;
+
+- ret = pm_runtime_resume_and_get(i2c_dev->dev);
++ ret = pm_runtime_get_sync(i2c_dev->dev);
+ if (ret < 0)
+- return ret;
++ dev_err(&pdev->dev, "Failed to resume device (%pe)\n", ERR_PTR(ret));
+
+ i2c_del_adapter(&i2c_dev->adap);
+- clk_disable_unprepare(i2c_dev->clk);
++
++ if (ret >= 0)
++ clk_disable_unprepare(i2c_dev->clk);
+
+ pm_runtime_put_noidle(i2c_dev->dev);
+ pm_runtime_disable(i2c_dev->dev);
+diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
+index b9b19a2a2ffa0..50d5ae81d2271 100644
+--- a/drivers/i2c/busses/i2c-stm32f7.c
++++ b/drivers/i2c/busses/i2c-stm32f7.c
+@@ -1493,6 +1493,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
+ {
+ struct stm32f7_i2c_dev *i2c_dev = data;
+ struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg;
++ struct stm32_i2c_dma *dma = i2c_dev->dma;
+ void __iomem *base = i2c_dev->base;
+ u32 status, mask;
+ int ret = IRQ_HANDLED;
+@@ -1518,6 +1519,10 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
+ dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n",
+ __func__, f7_msg->addr);
+ writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR);
++ if (i2c_dev->use_dma) {
++ stm32f7_i2c_disable_dma_req(i2c_dev);
++ dmaengine_terminate_all(dma->chan_using);
++ }
+ f7_msg->result = -ENXIO;
+ }
+
+@@ -1533,7 +1538,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
+ /* Clear STOP flag */
+ writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR);
+
+- if (i2c_dev->use_dma) {
++ if (i2c_dev->use_dma && !f7_msg->result) {
+ ret = IRQ_WAKE_THREAD;
+ } else {
+ i2c_dev->master_mode = false;
+@@ -1546,7 +1551,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
+ if (f7_msg->stop) {
+ mask = STM32F7_I2C_CR2_STOP;
+ stm32f7_i2c_set_bits(base + STM32F7_I2C_CR2, mask);
+- } else if (i2c_dev->use_dma) {
++ } else if (i2c_dev->use_dma && !f7_msg->result) {
+ ret = IRQ_WAKE_THREAD;
+ } else if (f7_msg->smbus) {
+ stm32f7_i2c_smbus_rep_start(i2c_dev);
+@@ -1696,12 +1701,23 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap,
+ time_left = wait_for_completion_timeout(&i2c_dev->complete,
+ i2c_dev->adap.timeout);
+ ret = f7_msg->result;
++ if (ret) {
++ /*
++ * It is possible that some unsent data have already been
++ * written into TXDR. To avoid sending old data in a
++ * further transfer, flush TXDR in case of any error
++ */
++ writel_relaxed(STM32F7_I2C_ISR_TXE,
++ i2c_dev->base + STM32F7_I2C_ISR);
++ goto pm_free;
++ }
+
+ if (!time_left) {
+ dev_dbg(i2c_dev->dev, "Access to slave 0x%x timed out\n",
+ i2c_dev->msg->addr);
+ if (i2c_dev->use_dma)
+ dmaengine_terminate_all(dma->chan_using);
++ stm32f7_i2c_wait_free_bus(i2c_dev);
+ ret = -ETIMEDOUT;
+ }
+
+@@ -1744,13 +1760,22 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+ timeout = wait_for_completion_timeout(&i2c_dev->complete,
+ i2c_dev->adap.timeout);
+ ret = f7_msg->result;
+- if (ret)
++ if (ret) {
++ /*
++ * It is possible that some unsent data have already been
++ * written into TXDR. To avoid sending old data in a
++ * further transfer, flush TXDR in case of any error
++ */
++ writel_relaxed(STM32F7_I2C_ISR_TXE,
++ i2c_dev->base + STM32F7_I2C_ISR);
+ goto pm_free;
++ }
+
+ if (!timeout) {
+ dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr);
+ if (i2c_dev->use_dma)
+ dmaengine_terminate_all(dma->chan_using);
++ stm32f7_i2c_wait_free_bus(i2c_dev);
+ ret = -ETIMEDOUT;
+ goto pm_free;
+ }
+diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
+index c883044715f3b..444867cef682f 100644
+--- a/drivers/i2c/busses/i2c-tegra.c
++++ b/drivers/i2c/busses/i2c-tegra.c
+@@ -283,6 +283,7 @@ struct tegra_i2c_dev {
+ struct dma_chan *tx_dma_chan;
+ struct dma_chan *rx_dma_chan;
+ unsigned int dma_buf_size;
++ struct device *dma_dev;
+ dma_addr_t dma_phys;
+ void *dma_buf;
+
+@@ -419,7 +420,7 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len)
+ static void tegra_i2c_release_dma(struct tegra_i2c_dev *i2c_dev)
+ {
+ if (i2c_dev->dma_buf) {
+- dma_free_coherent(i2c_dev->dev, i2c_dev->dma_buf_size,
++ dma_free_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size,
+ i2c_dev->dma_buf, i2c_dev->dma_phys);
+ i2c_dev->dma_buf = NULL;
+ }
+@@ -466,10 +467,13 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev)
+
+ i2c_dev->tx_dma_chan = chan;
+
++ WARN_ON(i2c_dev->tx_dma_chan->device != i2c_dev->rx_dma_chan->device);
++ i2c_dev->dma_dev = chan->device->dev;
++
+ i2c_dev->dma_buf_size = i2c_dev->hw->quirks->max_write_len +
+ I2C_PACKET_HEADER_SIZE;
+
+- dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size,
++ dma_buf = dma_alloc_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size,
+ &dma_phys, GFP_KERNEL | __GFP_NOWARN);
+ if (!dma_buf) {
+ dev_err(i2c_dev->dev, "failed to allocate DMA buffer\n");
+@@ -1255,7 +1259,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
+
+ if (i2c_dev->dma_mode) {
+ if (i2c_dev->msg_read) {
+- dma_sync_single_for_device(i2c_dev->dev,
++ dma_sync_single_for_device(i2c_dev->dma_dev,
+ i2c_dev->dma_phys,
+ xfer_size, DMA_FROM_DEVICE);
+
+@@ -1263,7 +1267,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
+ if (err)
+ return err;
+ } else {
+- dma_sync_single_for_cpu(i2c_dev->dev,
++ dma_sync_single_for_cpu(i2c_dev->dma_dev,
+ i2c_dev->dma_phys,
+ xfer_size, DMA_TO_DEVICE);
+ }
+@@ -1276,7 +1280,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
+ memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE,
+ msg->buf, msg->len);
+
+- dma_sync_single_for_device(i2c_dev->dev,
++ dma_sync_single_for_device(i2c_dev->dma_dev,
+ i2c_dev->dma_phys,
+ xfer_size, DMA_TO_DEVICE);
+
+@@ -1327,7 +1331,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
+ }
+
+ if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) {
+- dma_sync_single_for_cpu(i2c_dev->dev,
++ dma_sync_single_for_cpu(i2c_dev->dma_dev,
+ i2c_dev->dma_phys,
+ xfer_size, DMA_FROM_DEVICE);
+
+diff --git a/drivers/i2c/busses/i2c-thunderx-pcidrv.c b/drivers/i2c/busses/i2c-thunderx-pcidrv.c
+index 12c90aa0900e6..a77cd86fe75ed 100644
+--- a/drivers/i2c/busses/i2c-thunderx-pcidrv.c
++++ b/drivers/i2c/busses/i2c-thunderx-pcidrv.c
+@@ -213,6 +213,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev,
+ i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info;
+ i2c->adap.dev.parent = dev;
+ i2c->adap.dev.of_node = pdev->dev.of_node;
++ i2c->adap.dev.fwnode = dev->fwnode;
+ snprintf(i2c->adap.name, sizeof(i2c->adap.name),
+ "Cavium ThunderX i2c adapter at %s", dev_name(dev));
+ i2c_set_adapdata(&i2c->adap, i2c);
+diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c
+index 7279ca0eaa2d0..d1fa9ff5aeab4 100644
+--- a/drivers/i2c/busses/i2c-tiny-usb.c
++++ b/drivers/i2c/busses/i2c-tiny-usb.c
+@@ -226,10 +226,8 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface,
+
+ /* allocate memory for our device state and initialize it */
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+- if (dev == NULL) {
+- dev_err(&interface->dev, "Out of memory\n");
++ if (!dev)
+ goto error;
+- }
+
+ dev->usb_dev = usb_get_dev(interface_to_usbdev(interface));
+ dev->interface = interface;
+diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
+index f10a603b13fb0..5cb21d7da05b6 100644
+--- a/drivers/i2c/busses/i2c-virtio.c
++++ b/drivers/i2c/busses/i2c-virtio.c
+@@ -22,24 +22,24 @@
+ /**
+ * struct virtio_i2c - virtio I2C data
+ * @vdev: virtio device for this controller
+- * @completion: completion of virtio I2C message
+ * @adap: I2C adapter for this controller
+ * @vq: the virtio virtqueue for communication
+ */
+ struct virtio_i2c {
+ struct virtio_device *vdev;
+- struct completion completion;
+ struct i2c_adapter adap;
+ struct virtqueue *vq;
+ };
+
+ /**
+ * struct virtio_i2c_req - the virtio I2C request structure
++ * @completion: completion of virtio I2C message
+ * @out_hdr: the OUT header of the virtio I2C message
+ * @buf: the buffer into which data is read, or from which it's written
+ * @in_hdr: the IN header of the virtio I2C message
+ */
+ struct virtio_i2c_req {
++ struct completion completion;
+ struct virtio_i2c_out_hdr out_hdr ____cacheline_aligned;
+ uint8_t *buf ____cacheline_aligned;
+ struct virtio_i2c_in_hdr in_hdr ____cacheline_aligned;
+@@ -47,9 +47,11 @@ struct virtio_i2c_req {
+
+ static void virtio_i2c_msg_done(struct virtqueue *vq)
+ {
+- struct virtio_i2c *vi = vq->vdev->priv;
++ struct virtio_i2c_req *req;
++ unsigned int len;
+
+- complete(&vi->completion);
++ while ((req = virtqueue_get_buf(vq, &len)))
++ complete(&req->completion);
+ }
+
+ static int virtio_i2c_prepare_reqs(struct virtqueue *vq,
+@@ -62,6 +64,8 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq,
+ for (i = 0; i < num; i++) {
+ int outcnt = 0, incnt = 0;
+
++ init_completion(&reqs[i].completion);
++
+ /*
+ * We don't support 0 length messages and so filter out
+ * 0 length transfers by using i2c_adapter_quirks.
+@@ -106,24 +110,17 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq,
+
+ static int virtio_i2c_complete_reqs(struct virtqueue *vq,
+ struct virtio_i2c_req *reqs,
+- struct i2c_msg *msgs, int num,
+- bool timedout)
++ struct i2c_msg *msgs, int num)
+ {
+- struct virtio_i2c_req *req;
+- bool failed = timedout;
+- unsigned int len;
++ bool failed = false;
+ int i, j = 0;
+
+ for (i = 0; i < num; i++) {
+- /* Detach the ith request from the vq */
+- req = virtqueue_get_buf(vq, &len);
++ struct virtio_i2c_req *req = &reqs[i];
+
+- /*
+- * Condition req == &reqs[i] should always meet since we have
+- * total num requests in the vq. reqs[i] can never be NULL here.
+- */
+- if (!failed && (WARN_ON(req != &reqs[i]) ||
+- req->in_hdr.status != VIRTIO_I2C_MSG_OK))
++ wait_for_completion(&req->completion);
++
++ if (!failed && req->in_hdr.status != VIRTIO_I2C_MSG_OK)
+ failed = true;
+
+ i2c_put_dma_safe_msg_buf(reqs[i].buf, &msgs[i], !failed);
+@@ -132,7 +129,7 @@ static int virtio_i2c_complete_reqs(struct virtqueue *vq,
+ j++;
+ }
+
+- return timedout ? -ETIMEDOUT : j;
++ return j;
+ }
+
+ static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+@@ -141,7 +138,6 @@ static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ struct virtio_i2c *vi = i2c_get_adapdata(adap);
+ struct virtqueue *vq = vi->vq;
+ struct virtio_i2c_req *reqs;
+- unsigned long time_left;
+ int count;
+
+ reqs = kcalloc(num, sizeof(*reqs), GFP_KERNEL);
+@@ -160,15 +156,9 @@ static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ * remote here to clear the virtqueue, so we can try another set of
+ * messages later on.
+ */
+-
+- reinit_completion(&vi->completion);
+ virtqueue_kick(vq);
+
+- time_left = wait_for_completion_timeout(&vi->completion, adap->timeout);
+- if (!time_left)
+- dev_err(&adap->dev, "virtio i2c backend timeout.\n");
+-
+- count = virtio_i2c_complete_reqs(vq, reqs, msgs, count, !time_left);
++ count = virtio_i2c_complete_reqs(vq, reqs, msgs, count);
+
+ err_free:
+ kfree(reqs);
+@@ -215,8 +205,6 @@ static int virtio_i2c_probe(struct virtio_device *vdev)
+ vdev->priv = vi;
+ vi->vdev = vdev;
+
+- init_completion(&vi->completion);
+-
+ ret = virtio_i2c_setup_vqs(vi);
+ if (ret)
+ return ret;
+diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
+index bba08cbce6e18..6c39881d9e0fe 100644
+--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
++++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
+@@ -307,6 +307,9 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip,
+ u32 msg[3];
+ int rc;
+
++ if (writelen > I2C_SMBUS_BLOCK_MAX)
++ return -EINVAL;
++
+ memcpy(ctx->dma_buffer, data, writelen);
+ paddr = dma_map_single(ctx->dev, ctx->dma_buffer, writelen,
+ DMA_TO_DEVICE);
+diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
+index bb93db98404ef..9652e8bea2d0b 100644
+--- a/drivers/i2c/busses/i2c-xiic.c
++++ b/drivers/i2c/busses/i2c-xiic.c
+@@ -375,6 +375,9 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
+ struct xiic_i2c *i2c = dev_id;
+ u32 pend, isr, ier;
+ u32 clr = 0;
++ int xfer_more = 0;
++ int wakeup_req = 0;
++ int wakeup_code = 0;
+
+ /* Get the interrupt Status from the IPIF. There is no clearing of
+ * interrupts in the IPIF. Interrupts must be cleared at the source.
+@@ -411,10 +414,16 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
+ */
+ xiic_reinit(i2c);
+
+- if (i2c->rx_msg)
+- xiic_wakeup(i2c, STATE_ERROR);
+- if (i2c->tx_msg)
+- xiic_wakeup(i2c, STATE_ERROR);
++ if (i2c->rx_msg) {
++ wakeup_req = 1;
++ wakeup_code = STATE_ERROR;
++ }
++ if (i2c->tx_msg) {
++ wakeup_req = 1;
++ wakeup_code = STATE_ERROR;
++ }
++ /* don't try to handle other events */
++ goto out;
+ }
+ if (pend & XIIC_INTR_RX_FULL_MASK) {
+ /* Receive register/FIFO is full */
+@@ -448,8 +457,7 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
+ i2c->tx_msg++;
+ dev_dbg(i2c->adap.dev.parent,
+ "%s will start next...\n", __func__);
+-
+- __xiic_start_xfer(i2c);
++ xfer_more = 1;
+ }
+ }
+ }
+@@ -463,11 +471,13 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
+ if (!i2c->tx_msg)
+ goto out;
+
+- if ((i2c->nmsgs == 1) && !i2c->rx_msg &&
+- xiic_tx_space(i2c) == 0)
+- xiic_wakeup(i2c, STATE_DONE);
++ wakeup_req = 1;
++
++ if (i2c->nmsgs == 1 && !i2c->rx_msg &&
++ xiic_tx_space(i2c) == 0)
++ wakeup_code = STATE_DONE;
+ else
+- xiic_wakeup(i2c, STATE_ERROR);
++ wakeup_code = STATE_ERROR;
+ }
+ if (pend & (XIIC_INTR_TX_EMPTY_MASK | XIIC_INTR_TX_HALF_MASK)) {
+ /* Transmit register/FIFO is empty or ½ empty */
+@@ -491,7 +501,7 @@ static irqreturn_t xiic_process(int irq, void *dev_id)
+ if (i2c->nmsgs > 1) {
+ i2c->nmsgs--;
+ i2c->tx_msg++;
+- __xiic_start_xfer(i2c);
++ xfer_more = 1;
+ } else {
+ xiic_irq_dis(i2c, XIIC_INTR_TX_HALF_MASK);
+
+@@ -509,6 +519,13 @@ out:
+ dev_dbg(i2c->adap.dev.parent, "%s clr: 0x%x\n", __func__, clr);
+
+ xiic_setreg32(i2c, XIIC_IISR_OFFSET, clr);
++ if (xfer_more)
++ __xiic_start_xfer(i2c);
++ if (wakeup_req)
++ xiic_wakeup(i2c, wakeup_code);
++
++ WARN_ON(xfer_more && wakeup_req);
++
+ mutex_unlock(&i2c->lock);
+ return IRQ_HANDLED;
+ }
+@@ -756,7 +773,6 @@ static const struct i2c_adapter_quirks xiic_quirks = {
+
+ static const struct i2c_adapter xiic_adapter = {
+ .owner = THIS_MODULE,
+- .name = DRIVER_NAME,
+ .class = I2C_CLASS_DEPRECATED,
+ .algo = &xiic_algorithm,
+ .quirks = &xiic_quirks,
+@@ -793,6 +809,8 @@ static int xiic_i2c_probe(struct platform_device *pdev)
+ i2c_set_adapdata(&i2c->adap, i2c);
+ i2c->adap.dev.parent = &pdev->dev;
+ i2c->adap.dev.of_node = pdev->dev.of_node;
++ snprintf(i2c->adap.name, sizeof(i2c->adap.name),
++ DRIVER_NAME " %s", pdev->name);
+
+ mutex_init(&i2c->lock);
+ init_waitqueue_head(&i2c->wait);
+@@ -933,6 +951,7 @@ static struct platform_driver xiic_i2c_driver = {
+
+ module_platform_driver(xiic_i2c_driver);
+
++MODULE_ALIAS("platform:" DRIVER_NAME);
+ MODULE_AUTHOR("info@mocean-labs.com");
+ MODULE_DESCRIPTION("Xilinx I2C bus driver");
+ MODULE_LICENSE("GPL v2");
+diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c
+index 126d1393e548b..9ce20652d4942 100644
+--- a/drivers/i2c/busses/i2c-xlr.c
++++ b/drivers/i2c/busses/i2c-xlr.c
+@@ -431,11 +431,15 @@ static int xlr_i2c_probe(struct platform_device *pdev)
+ i2c_set_adapdata(&priv->adap, priv);
+ ret = i2c_add_numbered_adapter(&priv->adap);
+ if (ret < 0)
+- return ret;
++ goto err_unprepare_clk;
+
+ platform_set_drvdata(pdev, priv);
+ dev_info(&priv->adap.dev, "Added I2C Bus.\n");
+ return 0;
++
++err_unprepare_clk:
++ clk_unprepare(clk);
++ return ret;
+ }
+
+ static int xlr_i2c_remove(struct platform_device *pdev)
+diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
+index 54964fbe3f033..8fb065caf30b4 100644
+--- a/drivers/i2c/i2c-core-base.c
++++ b/drivers/i2c/i2c-core-base.c
+@@ -466,14 +466,12 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
+ static int i2c_device_probe(struct device *dev)
+ {
+ struct i2c_client *client = i2c_verify_client(dev);
+- struct i2c_adapter *adap;
+ struct i2c_driver *driver;
+ int status;
+
+ if (!client)
+ return 0;
+
+- adap = client->adapter;
+ client->irq = client->init_irq;
+
+ if (!client->irq) {
+@@ -539,14 +537,6 @@ static int i2c_device_probe(struct device *dev)
+
+ dev_dbg(dev, "probe\n");
+
+- if (adap->bus_regulator) {
+- status = regulator_enable(adap->bus_regulator);
+- if (status < 0) {
+- dev_err(&adap->dev, "Failed to enable bus regulator\n");
+- goto err_clear_wakeup_irq;
+- }
+- }
+-
+ status = of_clk_set_defaults(dev->of_node, false);
+ if (status < 0)
+ goto err_clear_wakeup_irq;
+@@ -604,10 +594,8 @@ put_sync_adapter:
+ static void i2c_device_remove(struct device *dev)
+ {
+ struct i2c_client *client = to_i2c_client(dev);
+- struct i2c_adapter *adap;
+ struct i2c_driver *driver;
+
+- adap = client->adapter;
+ driver = to_i2c_driver(dev->driver);
+ if (driver->remove) {
+ int status;
+@@ -622,8 +610,6 @@ static void i2c_device_remove(struct device *dev)
+ devres_release_group(&client->dev, client->devres_group_id);
+
+ dev_pm_domain_detach(&client->dev, true);
+- if (!pm_runtime_status_suspended(&client->dev) && adap->bus_regulator)
+- regulator_disable(adap->bus_regulator);
+
+ dev_pm_clear_wake_irq(&client->dev);
+ device_init_wakeup(&client->dev, false);
+@@ -633,86 +619,6 @@ static void i2c_device_remove(struct device *dev)
+ pm_runtime_put(&client->adapter->dev);
+ }
+
+-#ifdef CONFIG_PM_SLEEP
+-static int i2c_resume_early(struct device *dev)
+-{
+- struct i2c_client *client = i2c_verify_client(dev);
+- int err;
+-
+- if (!client)
+- return 0;
+-
+- if (pm_runtime_status_suspended(&client->dev) &&
+- client->adapter->bus_regulator) {
+- err = regulator_enable(client->adapter->bus_regulator);
+- if (err)
+- return err;
+- }
+-
+- return pm_generic_resume_early(&client->dev);
+-}
+-
+-static int i2c_suspend_late(struct device *dev)
+-{
+- struct i2c_client *client = i2c_verify_client(dev);
+- int err;
+-
+- if (!client)
+- return 0;
+-
+- err = pm_generic_suspend_late(&client->dev);
+- if (err)
+- return err;
+-
+- if (!pm_runtime_status_suspended(&client->dev) &&
+- client->adapter->bus_regulator)
+- return regulator_disable(client->adapter->bus_regulator);
+-
+- return 0;
+-}
+-#endif
+-
+-#ifdef CONFIG_PM
+-static int i2c_runtime_resume(struct device *dev)
+-{
+- struct i2c_client *client = i2c_verify_client(dev);
+- int err;
+-
+- if (!client)
+- return 0;
+-
+- if (client->adapter->bus_regulator) {
+- err = regulator_enable(client->adapter->bus_regulator);
+- if (err)
+- return err;
+- }
+-
+- return pm_generic_runtime_resume(&client->dev);
+-}
+-
+-static int i2c_runtime_suspend(struct device *dev)
+-{
+- struct i2c_client *client = i2c_verify_client(dev);
+- int err;
+-
+- if (!client)
+- return 0;
+-
+- err = pm_generic_runtime_suspend(&client->dev);
+- if (err)
+- return err;
+-
+- if (client->adapter->bus_regulator)
+- return regulator_disable(client->adapter->bus_regulator);
+- return 0;
+-}
+-#endif
+-
+-static const struct dev_pm_ops i2c_device_pm = {
+- SET_LATE_SYSTEM_SLEEP_PM_OPS(i2c_suspend_late, i2c_resume_early)
+- SET_RUNTIME_PM_OPS(i2c_runtime_suspend, i2c_runtime_resume, NULL)
+-};
+-
+ static void i2c_device_shutdown(struct device *dev)
+ {
+ struct i2c_client *client = i2c_verify_client(dev);
+@@ -772,7 +678,6 @@ struct bus_type i2c_bus_type = {
+ .probe = i2c_device_probe,
+ .remove = i2c_device_remove,
+ .shutdown = i2c_device_shutdown,
+- .pm = &i2c_device_pm,
+ };
+ EXPORT_SYMBOL_GPL(i2c_bus_type);
+
+@@ -2559,8 +2464,9 @@ void i2c_put_adapter(struct i2c_adapter *adap)
+ if (!adap)
+ return;
+
+- put_device(&adap->dev);
+ module_put(adap->owner);
++ /* Should be last, otherwise we risk use-after-free with 'adap' */
++ put_device(&adap->dev);
+ }
+ EXPORT_SYMBOL(i2c_put_adapter);
+
+diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
+index bce0e8bb78520..6fd2b6718b086 100644
+--- a/drivers/i2c/i2c-dev.c
++++ b/drivers/i2c/i2c-dev.c
+@@ -535,6 +535,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo
+ sizeof(rdwr_arg)))
+ return -EFAULT;
+
++ if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0)
++ return -EINVAL;
++
+ if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS)
+ return -EINVAL;
+
+@@ -665,16 +668,21 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy)
+ i2c_dev->dev.class = i2c_dev_class;
+ i2c_dev->dev.parent = &adap->dev;
+ i2c_dev->dev.release = i2cdev_dev_release;
+- dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr);
++
++ res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr);
++ if (res)
++ goto err_put_i2c_dev;
+
+ res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev);
+- if (res) {
+- put_i2c_dev(i2c_dev, false);
+- return res;
+- }
++ if (res)
++ goto err_put_i2c_dev;
+
+ pr_debug("adapter [%s] registered as minor %d\n", adap->name, adap->nr);
+ return 0;
++
++err_put_i2c_dev:
++ put_i2c_dev(i2c_dev, false);
++ return res;
+ }
+
+ static int i2cdev_detach_adapter(struct device *dev, void *dummy)
+diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c
+index 5365199a31f41..f7a7405d4350a 100644
+--- a/drivers/i2c/muxes/i2c-demux-pinctrl.c
++++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c
+@@ -261,7 +261,7 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
+
+ err = device_create_file(&pdev->dev, &dev_attr_available_masters);
+ if (err)
+- goto err_rollback;
++ goto err_rollback_activation;
+
+ err = device_create_file(&pdev->dev, &dev_attr_current_master);
+ if (err)
+@@ -271,8 +271,9 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
+
+ err_rollback_available:
+ device_remove_file(&pdev->dev, &dev_attr_available_masters);
+-err_rollback:
++err_rollback_activation:
+ i2c_demux_deactivate_master(priv);
++err_rollback:
+ for (j = 0; j < i; j++) {
+ of_node_put(priv->chan[j].parent_np);
+ of_changeset_destroy(&priv->chan[j].chgset);
+diff --git a/drivers/i2c/muxes/i2c-mux-gpmux.c b/drivers/i2c/muxes/i2c-mux-gpmux.c
+index d3acd8d66c323..33024acaac02b 100644
+--- a/drivers/i2c/muxes/i2c-mux-gpmux.c
++++ b/drivers/i2c/muxes/i2c-mux-gpmux.c
+@@ -134,6 +134,7 @@ static int i2c_mux_probe(struct platform_device *pdev)
+ return 0;
+
+ err_children:
++ of_node_put(child);
+ i2c_mux_del_adapters(muxc);
+ err_parent:
+ i2c_put_adapter(parent);
+diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
+index 0e0679f65cf77..30a6de1694e07 100644
+--- a/drivers/i2c/muxes/i2c-mux-reg.c
++++ b/drivers/i2c/muxes/i2c-mux-reg.c
+@@ -183,13 +183,12 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
+ if (!mux->data.reg) {
+ dev_info(&pdev->dev,
+ "Register not set, using platform resource\n");
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- mux->data.reg_size = resource_size(res);
+- mux->data.reg = devm_ioremap_resource(&pdev->dev, res);
++ mux->data.reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(mux->data.reg)) {
+ ret = PTR_ERR(mux->data.reg);
+ goto err_put_parent;
+ }
++ mux->data.reg_size = resource_size(res);
+ }
+
+ if (mux->data.reg_size != 4 && mux->data.reg_size != 2 &&
+diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
+index c3b4c677b4429..dfe18dcd008d4 100644
+--- a/drivers/i3c/master.c
++++ b/drivers/i3c/master.c
+@@ -343,7 +343,8 @@ struct bus_type i3c_bus_type = {
+ static enum i3c_addr_slot_status
+ i3c_bus_get_addr_slot_status(struct i3c_bus *bus, u16 addr)
+ {
+- int status, bitpos = addr * 2;
++ unsigned long status;
++ int bitpos = addr * 2;
+
+ if (addr > I2C_MAX_ADDR)
+ return I3C_ADDR_SLOT_RSVD;
+diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
+index 03a368da51b95..51a8608203de7 100644
+--- a/drivers/i3c/master/dw-i3c-master.c
++++ b/drivers/i3c/master/dw-i3c-master.c
+@@ -793,6 +793,10 @@ static int dw_i3c_master_daa(struct i3c_master_controller *m)
+ return -ENOMEM;
+
+ pos = dw_i3c_master_get_free_pos(master);
++ if (pos < 0) {
++ dw_i3c_master_free_xfer(xfer);
++ return pos;
++ }
+ cmd = &xfer->cmds[0];
+ cmd->cmd_hi = 0x1;
+ cmd->cmd_lo = COMMAND_PORT_DEV_COUNT(master->maxdevs - pos) |
+diff --git a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c
+index 783e551a2c85a..97bb49ff5b53b 100644
+--- a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c
++++ b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c
+@@ -160,9 +160,7 @@ static int hci_dat_v1_get_index(struct i3c_hci *hci, u8 dev_addr)
+ unsigned int dat_idx;
+ u32 dat_w0;
+
+- for (dat_idx = find_first_bit(hci->DAT_data, hci->DAT_entries);
+- dat_idx < hci->DAT_entries;
+- dat_idx = find_next_bit(hci->DAT_data, hci->DAT_entries, dat_idx)) {
++ for_each_set_bit(dat_idx, hci->DAT_data, hci->DAT_entries) {
+ dat_w0 = dat_w0_read(dat_idx);
+ if (FIELD_GET(DAT_0_DYNAMIC_ADDRESS, dat_w0) == dev_addr)
+ return dat_idx;
+diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c
+index 879e5a64acaf4..d495c61863817 100644
+--- a/drivers/i3c/master/svc-i3c-master.c
++++ b/drivers/i3c/master/svc-i3c-master.c
+@@ -723,6 +723,10 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master,
+ */
+ break;
+ } else if (SVC_I3C_MSTATUS_NACKED(reg)) {
++ /* No I3C devices attached */
++ if (dev_nb == 0)
++ break;
++
+ /*
+ * A slave device nacked the address, this is
+ * allowed only once, DAA will be stopped and
+@@ -1152,11 +1156,17 @@ static int svc_i3c_master_send_ccc_cmd(struct i3c_master_controller *m,
+ {
+ struct svc_i3c_master *master = to_svc_i3c_master(m);
+ bool broadcast = cmd->id < 0x80;
++ int ret;
+
+ if (broadcast)
+- return svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
++ ret = svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
+ else
+- return svc_i3c_master_send_direct_ccc_cmd(master, cmd);
++ ret = svc_i3c_master_send_direct_ccc_cmd(master, cmd);
++
++ if (ret)
++ cmd->err = I3C_ERROR_M2;
++
++ return ret;
+ }
+
+ static int svc_i3c_master_priv_xfers(struct i3c_dev_desc *dev,
+diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
+index e6c543b5ee1dd..376e631e80d69 100644
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -47,11 +47,13 @@
+ #include <linux/tick.h>
+ #include <trace/events/power.h>
+ #include <linux/sched.h>
++#include <linux/sched/smt.h>
+ #include <linux/notifier.h>
+ #include <linux/cpu.h>
+ #include <linux/moduleparam.h>
+ #include <asm/cpu_device_id.h>
+ #include <asm/intel-family.h>
++#include <asm/nospec-branch.h>
+ #include <asm/mwait.h>
+ #include <asm/msr.h>
+
+@@ -93,6 +95,12 @@ static unsigned int mwait_substates __initdata;
+ */
+ #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
+
++/*
++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
++ * above.
++ */
++#define CPUIDLE_FLAG_IBRS BIT(16)
++
+ /*
+ * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+ * the C-state (top nibble) and sub-state (bottom nibble)
+@@ -132,6 +140,24 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
+ return index;
+ }
+
++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
++ struct cpuidle_driver *drv, int index)
++{
++ bool smt_active = sched_smt_active();
++ u64 spec_ctrl = spec_ctrl_current();
++ int ret;
++
++ if (smt_active)
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
++ ret = intel_idle(dev, drv, index);
++
++ if (smt_active)
++ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
++
++ return ret;
++}
++
+ /**
+ * intel_idle_s2idle - Ask the processor to enter the given idle state.
+ * @dev: cpuidle device of the target CPU.
+@@ -653,7 +679,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 85,
+ .target_residency = 200,
+ .enter = &intel_idle,
+@@ -661,7 +687,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
+ {
+ .name = "C7s",
+ .desc = "MWAIT 0x33",
+- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 124,
+ .target_residency = 800,
+ .enter = &intel_idle,
+@@ -669,7 +695,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 200,
+ .target_residency = 800,
+ .enter = &intel_idle,
+@@ -677,7 +703,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
+ {
+ .name = "C9",
+ .desc = "MWAIT 0x50",
+- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 480,
+ .target_residency = 5000,
+ .enter = &intel_idle,
+@@ -685,7 +711,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 890,
+ .target_residency = 5000,
+ .enter = &intel_idle,
+@@ -714,7 +740,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
+ .exit_latency = 133,
+ .target_residency = 600,
+ .enter = &intel_idle,
+@@ -1574,6 +1600,11 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
+ /* Structure copy. */
+ drv->states[drv->state_count] = cpuidle_state_table[cstate];
+
++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
++ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
++ drv->states[drv->state_count].enter = intel_idle_ibrs;
++ }
++
+ if ((disabled_states_mask & BIT(drv->state_count)) ||
+ ((icpu->use_acpi || force_use_acpi) &&
+ intel_idle_off_by_default(mwait_hint) &&
+diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
+index 2334ad249b462..4fb4321a72cb1 100644
+--- a/drivers/iio/Kconfig
++++ b/drivers/iio/Kconfig
+@@ -70,6 +70,7 @@ config IIO_TRIGGERED_EVENT
+
+ source "drivers/iio/accel/Kconfig"
+ source "drivers/iio/adc/Kconfig"
++source "drivers/iio/addac/Kconfig"
+ source "drivers/iio/afe/Kconfig"
+ source "drivers/iio/amplifiers/Kconfig"
+ source "drivers/iio/cdc/Kconfig"
+diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile
+index 65e39bd4f9346..8d48c70fee4d3 100644
+--- a/drivers/iio/Makefile
++++ b/drivers/iio/Makefile
+@@ -15,6 +15,7 @@ obj-$(CONFIG_IIO_TRIGGERED_EVENT) += industrialio-triggered-event.o
+
+ obj-y += accel/
+ obj-y += adc/
++obj-y += addac/
+ obj-y += afe/
+ obj-y += amplifiers/
+ obj-y += buffer/
+diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c
+index 7a434e2884d43..dfb8e2e5bdf58 100644
+--- a/drivers/iio/accel/adis16201.c
++++ b/drivers/iio/accel/adis16201.c
+@@ -300,3 +300,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16201 Dual-Axis Digital Inclinometer and Accelerometer");
+ MODULE_LICENSE("GPL v2");
+ MODULE_ALIAS("spi:adis16201");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/accel/adis16209.c b/drivers/iio/accel/adis16209.c
+index ac08e866d6128..5a9c6e2296f1d 100644
+--- a/drivers/iio/accel/adis16209.c
++++ b/drivers/iio/accel/adis16209.c
+@@ -310,3 +310,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16209 Dual-Axis Digital Inclinometer and Accelerometer");
+ MODULE_LICENSE("GPL v2");
+ MODULE_ALIAS("spi:adis16209");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c
+index fc95924077176..4a358f8c27f36 100644
+--- a/drivers/iio/accel/adxl372.c
++++ b/drivers/iio/accel/adxl372.c
+@@ -998,17 +998,30 @@ static ssize_t adxl372_get_fifo_watermark(struct device *dev,
+ return sprintf(buf, "%d\n", st->watermark);
+ }
+
+-static IIO_CONST_ATTR(hwfifo_watermark_min, "1");
+-static IIO_CONST_ATTR(hwfifo_watermark_max,
+- __stringify(ADXL372_FIFO_SIZE));
++static ssize_t hwfifo_watermark_min_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ return sysfs_emit(buf, "%s\n", "1");
++}
++
++static ssize_t hwfifo_watermark_max_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ return sysfs_emit(buf, "%s\n", __stringify(ADXL372_FIFO_SIZE));
++}
++
++static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0);
++static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
+ static IIO_DEVICE_ATTR(hwfifo_watermark, 0444,
+ adxl372_get_fifo_watermark, NULL, 0);
+ static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
+ adxl372_get_fifo_enabled, NULL, 0);
+
+ static const struct attribute *adxl372_fifo_attributes[] = {
+- &iio_const_attr_hwfifo_watermark_min.dev_attr.attr,
+- &iio_const_attr_hwfifo_watermark_max.dev_attr.attr,
++ &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
++ &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
+ &iio_dev_attr_hwfifo_watermark.dev_attr.attr,
+ &iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+ NULL,
+diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c
+index 2edfcb4819b7d..3a1f47c7288ff 100644
+--- a/drivers/iio/accel/bma180.c
++++ b/drivers/iio/accel/bma180.c
+@@ -1006,11 +1006,12 @@ static int bma180_probe(struct i2c_client *client,
+
+ data->trig->ops = &bma180_trigger_ops;
+ iio_trigger_set_drvdata(data->trig, indio_dev);
+- indio_dev->trig = iio_trigger_get(data->trig);
+
+ ret = iio_trigger_register(data->trig);
+ if (ret)
+ goto err_trigger_free;
++
++ indio_dev->trig = iio_trigger_get(data->trig);
+ }
+
+ ret = iio_triggered_buffer_setup(indio_dev, NULL,
+diff --git a/drivers/iio/accel/bma400.h b/drivers/iio/accel/bma400.h
+index 5ad10db9819fe..416090c6b1e81 100644
+--- a/drivers/iio/accel/bma400.h
++++ b/drivers/iio/accel/bma400.h
+@@ -83,8 +83,27 @@
+ #define BMA400_ACC_ODR_MIN_WHOLE_HZ 25
+ #define BMA400_ACC_ODR_MIN_HZ 12
+
+-#define BMA400_SCALE_MIN 38357
+-#define BMA400_SCALE_MAX 306864
++/*
++ * BMA400_SCALE_MIN macro value represents m/s^2 for 1 LSB before
++ * converting to micro values for +-2g range.
++ *
++ * For +-2g - 1 LSB = 0.976562 milli g = 0.009576 m/s^2
++ * For +-4g - 1 LSB = 1.953125 milli g = 0.019153 m/s^2
++ * For +-16g - 1 LSB = 7.8125 milli g = 0.076614 m/s^2
++ *
++ * The raw value which is used to select the different ranges is determined
++ * by the first bit set position from the scale value, so BMA400_SCALE_MIN
++ * should be odd.
++ *
++ * Scale values for +-2g, +-4g, +-8g and +-16g are populated into bma400_scales
++ * array by left shifting BMA400_SCALE_MIN.
++ * e.g.:
++ * To select +-2g = 9577 << 0 = raw value to write is 0.
++ * To select +-8g = 9577 << 2 = raw value to write is 2.
++ * To select +-16g = 9577 << 3 = raw value to write is 3.
++ */
++#define BMA400_SCALE_MIN 9577
++#define BMA400_SCALE_MAX 76617
+
+ #define BMA400_NUM_REGULATORS 2
+ #define BMA400_VDD_REGULATOR 0
+diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c
+index 21520e022a212..7e65985346504 100644
+--- a/drivers/iio/accel/bma400_core.c
++++ b/drivers/iio/accel/bma400_core.c
+@@ -13,14 +13,14 @@
+
+ #include <linux/bitops.h>
+ #include <linux/device.h>
+-#include <linux/iio/iio.h>
+-#include <linux/iio/sysfs.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/mutex.h>
+ #include <linux/regmap.h>
+ #include <linux/regulator/consumer.h>
+
++#include <linux/iio/iio.h>
++
+ #include "bma400.h"
+
+ /*
+diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
+index e8693a42ad464..3af763b4a9737 100644
+--- a/drivers/iio/accel/bmc150-accel-core.c
++++ b/drivers/iio/accel/bmc150-accel-core.c
+@@ -1782,11 +1782,14 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq,
+ ret = iio_device_register(indio_dev);
+ if (ret < 0) {
+ dev_err(dev, "Unable to register iio device\n");
+- goto err_trigger_unregister;
++ goto err_pm_cleanup;
+ }
+
+ return 0;
+
++err_pm_cleanup:
++ pm_runtime_dont_use_autosuspend(dev);
++ pm_runtime_disable(dev);
+ err_trigger_unregister:
+ bmc150_accel_unregister_triggers(data, BMC150_ACCEL_TRIGGERS - 1);
+ err_buffer_cleanup:
+diff --git a/drivers/iio/accel/cros_ec_accel_legacy.c b/drivers/iio/accel/cros_ec_accel_legacy.c
+index b6f3471b62dcf..3b77fded2dc07 100644
+--- a/drivers/iio/accel/cros_ec_accel_legacy.c
++++ b/drivers/iio/accel/cros_ec_accel_legacy.c
+@@ -215,7 +215,7 @@ static int cros_ec_accel_legacy_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ ret = cros_ec_sensors_core_init(pdev, indio_dev, true,
+- cros_ec_sensors_capture, NULL);
++ cros_ec_sensors_capture);
+ if (ret)
+ return ret;
+
+@@ -235,7 +235,7 @@ static int cros_ec_accel_legacy_probe(struct platform_device *pdev)
+ state->sign[CROS_EC_SENSOR_Z] = -1;
+ }
+
+- return devm_iio_device_register(dev, indio_dev);
++ return cros_ec_sensors_core_register(dev, indio_dev, NULL);
+ }
+
+ static struct platform_driver cros_ec_accel_platform_driver = {
+diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c
+index f41db9e0249a7..548a8c4269e70 100644
+--- a/drivers/iio/accel/fxls8962af-core.c
++++ b/drivers/iio/accel/fxls8962af-core.c
+@@ -154,12 +154,20 @@ struct fxls8962af_data {
+ u8 watermark;
+ };
+
+-const struct regmap_config fxls8962af_regmap_conf = {
++const struct regmap_config fxls8962af_i2c_regmap_conf = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = FXLS8962AF_MAX_REG,
+ };
+-EXPORT_SYMBOL_GPL(fxls8962af_regmap_conf);
++EXPORT_SYMBOL_GPL(fxls8962af_i2c_regmap_conf);
++
++const struct regmap_config fxls8962af_spi_regmap_conf = {
++ .reg_bits = 8,
++ .pad_bits = 8,
++ .val_bits = 8,
++ .max_register = FXLS8962AF_MAX_REG,
++};
++EXPORT_SYMBOL_GPL(fxls8962af_spi_regmap_conf);
+
+ enum {
+ fxls8962af_idx_x,
+@@ -478,8 +486,7 @@ static int fxls8962af_set_watermark(struct iio_dev *indio_dev, unsigned val)
+ .sign = 's', \
+ .realbits = 12, \
+ .storagebits = 16, \
+- .shift = 4, \
+- .endianness = IIO_BE, \
++ .endianness = IIO_LE, \
+ }, \
+ }
+
+@@ -648,9 +655,10 @@ static int fxls8962af_fifo_transfer(struct fxls8962af_data *data,
+ int total_length = samples * sample_length;
+ int ret;
+
+- if (i2c_verify_client(dev))
++ if (i2c_verify_client(dev) &&
++ data->chip_info->chip_id == FXLS8962AF_DEVICE_ID)
+ /*
+- * Due to errata bug:
++ * Due to errata bug (only applicable on fxls8962af):
+ * E3: FIFO burst read operation error using I2C interface
+ * We have to avoid burst reads on I2C..
+ */
+diff --git a/drivers/iio/accel/fxls8962af-i2c.c b/drivers/iio/accel/fxls8962af-i2c.c
+index cfb004b204559..6bde9891effbf 100644
+--- a/drivers/iio/accel/fxls8962af-i2c.c
++++ b/drivers/iio/accel/fxls8962af-i2c.c
+@@ -18,7 +18,7 @@ static int fxls8962af_probe(struct i2c_client *client)
+ {
+ struct regmap *regmap;
+
+- regmap = devm_regmap_init_i2c(client, &fxls8962af_regmap_conf);
++ regmap = devm_regmap_init_i2c(client, &fxls8962af_i2c_regmap_conf);
+ if (IS_ERR(regmap)) {
+ dev_err(&client->dev, "Failed to initialize i2c regmap\n");
+ return PTR_ERR(regmap);
+diff --git a/drivers/iio/accel/fxls8962af-spi.c b/drivers/iio/accel/fxls8962af-spi.c
+index 57108d3d480b6..6f4dff3238d3c 100644
+--- a/drivers/iio/accel/fxls8962af-spi.c
++++ b/drivers/iio/accel/fxls8962af-spi.c
+@@ -18,7 +18,7 @@ static int fxls8962af_probe(struct spi_device *spi)
+ {
+ struct regmap *regmap;
+
+- regmap = devm_regmap_init_spi(spi, &fxls8962af_regmap_conf);
++ regmap = devm_regmap_init_spi(spi, &fxls8962af_spi_regmap_conf);
+ if (IS_ERR(regmap)) {
+ dev_err(&spi->dev, "Failed to initialize spi regmap\n");
+ return PTR_ERR(regmap);
+diff --git a/drivers/iio/accel/fxls8962af.h b/drivers/iio/accel/fxls8962af.h
+index b67572c3ef069..9cbe98c3ba9a2 100644
+--- a/drivers/iio/accel/fxls8962af.h
++++ b/drivers/iio/accel/fxls8962af.h
+@@ -17,6 +17,7 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq);
+ int fxls8962af_core_remove(struct device *dev);
+
+ extern const struct dev_pm_ops fxls8962af_pm_ops;
+-extern const struct regmap_config fxls8962af_regmap_conf;
++extern const struct regmap_config fxls8962af_i2c_regmap_conf;
++extern const struct regmap_config fxls8962af_spi_regmap_conf;
+
+ #endif /* _FXLS8962AF_H_ */
+diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
+index a2def6f9380a3..5eac7ea199931 100644
+--- a/drivers/iio/accel/hid-sensor-accel-3d.c
++++ b/drivers/iio/accel/hid-sensor-accel-3d.c
+@@ -280,6 +280,7 @@ static int accel_3d_capture_sample(struct hid_sensor_hub_device *hsdev,
+ hid_sensor_convert_timestamp(
+ &accel_state->common_attributes,
+ *(int64_t *)raw_data);
++ ret = 0;
+ break;
+ default:
+ break;
+diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
+index a51fdd3c9b5b5..594a383169c75 100644
+--- a/drivers/iio/accel/kxcjk-1013.c
++++ b/drivers/iio/accel/kxcjk-1013.c
+@@ -1553,12 +1553,12 @@ static int kxcjk1013_probe(struct i2c_client *client,
+
+ data->dready_trig->ops = &kxcjk1013_trigger_ops;
+ iio_trigger_set_drvdata(data->dready_trig, indio_dev);
+- indio_dev->trig = data->dready_trig;
+- iio_trigger_get(indio_dev->trig);
+ ret = iio_trigger_register(data->dready_trig);
+ if (ret)
+ goto err_poweroff;
+
++ indio_dev->trig = iio_trigger_get(data->dready_trig);
++
+ data->motion_trig->ops = &kxcjk1013_trigger_ops;
+ iio_trigger_set_drvdata(data->motion_trig, indio_dev);
+ ret = iio_trigger_register(data->motion_trig);
+@@ -1589,14 +1589,16 @@ static int kxcjk1013_probe(struct i2c_client *client,
+ ret = iio_device_register(indio_dev);
+ if (ret < 0) {
+ dev_err(&client->dev, "unable to register iio device\n");
+- goto err_buffer_cleanup;
++ goto err_pm_cleanup;
+ }
+
+ return 0;
+
++err_pm_cleanup:
++ pm_runtime_dont_use_autosuspend(&client->dev);
++ pm_runtime_disable(&client->dev);
+ err_buffer_cleanup:
+- if (data->dready_trig)
+- iio_triggered_buffer_cleanup(indio_dev);
++ iio_triggered_buffer_cleanup(indio_dev);
+ err_trigger_unregister:
+ if (data->dready_trig)
+ iio_trigger_unregister(data->dready_trig);
+@@ -1618,8 +1620,8 @@ static int kxcjk1013_remove(struct i2c_client *client)
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
+
++ iio_triggered_buffer_cleanup(indio_dev);
+ if (data->dready_trig) {
+- iio_triggered_buffer_cleanup(indio_dev);
+ iio_trigger_unregister(data->dready_trig);
+ iio_trigger_unregister(data->motion_trig);
+ }
+diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
+index bf7ed9e7d00f4..e56ecc075014e 100644
+--- a/drivers/iio/accel/kxsd9.c
++++ b/drivers/iio/accel/kxsd9.c
+@@ -224,14 +224,14 @@ static irqreturn_t kxsd9_trigger_handler(int irq, void *p)
+ hw_values.chan,
+ sizeof(hw_values.chan));
+ if (ret) {
+- dev_err(st->dev,
+- "error reading data\n");
+- return ret;
++ dev_err(st->dev, "error reading data: %d\n", ret);
++ goto out;
+ }
+
+ iio_push_to_buffers_with_timestamp(indio_dev,
+ &hw_values,
+ iio_get_time_ns(indio_dev));
++out:
+ iio_trigger_notify_done(indio_dev->trig);
+
+ return IRQ_HANDLED;
+diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c
+index 715b8138fb715..1f46a73aafeac 100644
+--- a/drivers/iio/accel/mma8452.c
++++ b/drivers/iio/accel/mma8452.c
+@@ -176,6 +176,7 @@ static const struct mma8452_event_regs trans_ev_regs = {
+ * @enabled_events: event flags enabled and handled by this driver
+ */
+ struct mma_chip_info {
++ const char *name;
+ u8 chip_id;
+ const struct iio_chan_spec *channels;
+ int num_channels;
+@@ -379,8 +380,8 @@ static ssize_t mma8452_show_scale_avail(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+ {
+- struct mma8452_data *data = iio_priv(i2c_get_clientdata(
+- to_i2c_client(dev)));
++ struct iio_dev *indio_dev = dev_to_iio_dev(dev);
++ struct mma8452_data *data = iio_priv(indio_dev);
+
+ return mma8452_show_int_plus_micros(buf, data->chip_info->mma_scales,
+ ARRAY_SIZE(data->chip_info->mma_scales));
+@@ -1301,6 +1302,7 @@ enum {
+
+ static const struct mma_chip_info mma_chip_info_table[] = {
+ [mma8451] = {
++ .name = "mma8451",
+ .chip_id = MMA8451_DEVICE_ID,
+ .channels = mma8451_channels,
+ .num_channels = ARRAY_SIZE(mma8451_channels),
+@@ -1325,6 +1327,7 @@ static const struct mma_chip_info mma_chip_info_table[] = {
+ MMA8452_INT_FF_MT,
+ },
+ [mma8452] = {
++ .name = "mma8452",
+ .chip_id = MMA8452_DEVICE_ID,
+ .channels = mma8452_channels,
+ .num_channels = ARRAY_SIZE(mma8452_channels),
+@@ -1341,6 +1344,7 @@ static const struct mma_chip_info mma_chip_info_table[] = {
+ MMA8452_INT_FF_MT,
+ },
+ [mma8453] = {
++ .name = "mma8453",
+ .chip_id = MMA8453_DEVICE_ID,
+ .channels = mma8453_channels,
+ .num_channels = ARRAY_SIZE(mma8453_channels),
+@@ -1357,6 +1361,7 @@ static const struct mma_chip_info mma_chip_info_table[] = {
+ MMA8452_INT_FF_MT,
+ },
+ [mma8652] = {
++ .name = "mma8652",
+ .chip_id = MMA8652_DEVICE_ID,
+ .channels = mma8652_channels,
+ .num_channels = ARRAY_SIZE(mma8652_channels),
+@@ -1366,6 +1371,7 @@ static const struct mma_chip_info mma_chip_info_table[] = {
+ .enabled_events = MMA8452_INT_FF_MT,
+ },
+ [mma8653] = {
++ .name = "mma8653",
+ .chip_id = MMA8653_DEVICE_ID,
+ .channels = mma8653_channels,
+ .num_channels = ARRAY_SIZE(mma8653_channels),
+@@ -1380,6 +1386,7 @@ static const struct mma_chip_info mma_chip_info_table[] = {
+ .enabled_events = MMA8452_INT_FF_MT,
+ },
+ [fxls8471] = {
++ .name = "fxls8471",
+ .chip_id = FXLS8471_DEVICE_ID,
+ .channels = mma8451_channels,
+ .num_channels = ARRAY_SIZE(mma8451_channels),
+@@ -1470,7 +1477,7 @@ static int mma8452_trigger_setup(struct iio_dev *indio_dev)
+ if (ret)
+ return ret;
+
+- indio_dev->trig = trig;
++ indio_dev->trig = iio_trigger_get(trig);
+
+ return 0;
+ }
+@@ -1486,10 +1493,14 @@ static int mma8452_reset(struct i2c_client *client)
+ int i;
+ int ret;
+
+- ret = i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2,
++ /*
++ * Find on fxls8471, after config reset bit, it reset immediately,
++ * and will not give ACK, so here do not check the return value.
++ * The following code will read the reset register, and check whether
++ * this reset works.
++ */
++ i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2,
+ MMA8452_CTRL_REG2_RST);
+- if (ret < 0)
+- return ret;
+
+ for (i = 0; i < 10; i++) {
+ usleep_range(100, 200);
+@@ -1522,13 +1533,6 @@ static int mma8452_probe(struct i2c_client *client,
+ struct mma8452_data *data;
+ struct iio_dev *indio_dev;
+ int ret;
+- const struct of_device_id *match;
+-
+- match = of_match_device(mma8452_dt_ids, &client->dev);
+- if (!match) {
+- dev_err(&client->dev, "unknown device model\n");
+- return -ENODEV;
+- }
+
+ indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
+ if (!indio_dev)
+@@ -1537,7 +1541,16 @@ static int mma8452_probe(struct i2c_client *client,
+ data = iio_priv(indio_dev);
+ data->client = client;
+ mutex_init(&data->lock);
+- data->chip_info = match->data;
++
++ data->chip_info = device_get_match_data(&client->dev);
++ if (!data->chip_info) {
++ if (id) {
++ data->chip_info = &mma_chip_info_table[id->driver_data];
++ } else {
++ dev_err(&client->dev, "unknown device model\n");
++ return -ENODEV;
++ }
++ }
+
+ data->vdd_reg = devm_regulator_get(&client->dev, "vdd");
+ if (IS_ERR(data->vdd_reg))
+@@ -1581,11 +1594,11 @@ static int mma8452_probe(struct i2c_client *client,
+ }
+
+ dev_info(&client->dev, "registering %s accelerometer; ID 0x%x\n",
+- match->compatible, data->chip_info->chip_id);
++ data->chip_info->name, data->chip_info->chip_id);
+
+ i2c_set_clientdata(client, indio_dev);
+ indio_dev->info = &mma8452_info;
+- indio_dev->name = id->name;
++ indio_dev->name = data->chip_info->name;
+ indio_dev->modes = INDIO_DIRECT_MODE;
+ indio_dev->channels = data->chip_info->channels;
+ indio_dev->num_channels = data->chip_info->num_channels;
+@@ -1810,7 +1823,7 @@ MODULE_DEVICE_TABLE(i2c, mma8452_id);
+ static struct i2c_driver mma8452_driver = {
+ .driver = {
+ .name = "mma8452",
+- .of_match_table = of_match_ptr(mma8452_dt_ids),
++ .of_match_table = mma8452_dt_ids,
+ .pm = &mma8452_pm_ops,
+ },
+ .probe = mma8452_probe,
+diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c
+index 4c359fb054801..c53a3398b14c4 100644
+--- a/drivers/iio/accel/mma9551.c
++++ b/drivers/iio/accel/mma9551.c
+@@ -495,11 +495,14 @@ static int mma9551_probe(struct i2c_client *client,
+ ret = iio_device_register(indio_dev);
+ if (ret < 0) {
+ dev_err(&client->dev, "unable to register iio device\n");
+- goto out_poweroff;
++ goto err_pm_cleanup;
+ }
+
+ return 0;
+
++err_pm_cleanup:
++ pm_runtime_dont_use_autosuspend(&client->dev);
++ pm_runtime_disable(&client->dev);
+ out_poweroff:
+ mma9551_set_device_state(client, false);
+
+diff --git a/drivers/iio/accel/mma9551_core.c b/drivers/iio/accel/mma9551_core.c
+index fbf2e2c45678b..9023c07bb57b4 100644
+--- a/drivers/iio/accel/mma9551_core.c
++++ b/drivers/iio/accel/mma9551_core.c
+@@ -296,9 +296,12 @@ int mma9551_read_config_word(struct i2c_client *client, u8 app_id,
+
+ ret = mma9551_transfer(client, app_id, MMA9551_CMD_READ_CONFIG,
+ reg, NULL, 0, (u8 *)&v, 2);
++ if (ret < 0)
++ return ret;
++
+ *val = be16_to_cpu(v);
+
+- return ret;
++ return 0;
+ }
+ EXPORT_SYMBOL(mma9551_read_config_word);
+
+@@ -354,9 +357,12 @@ int mma9551_read_status_word(struct i2c_client *client, u8 app_id,
+
+ ret = mma9551_transfer(client, app_id, MMA9551_CMD_READ_STATUS,
+ reg, NULL, 0, (u8 *)&v, 2);
++ if (ret < 0)
++ return ret;
++
+ *val = be16_to_cpu(v);
+
+- return ret;
++ return 0;
+ }
+ EXPORT_SYMBOL(mma9551_read_status_word);
+
+diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c
+index ba3ecb3b57dcd..1599b75724d4f 100644
+--- a/drivers/iio/accel/mma9553.c
++++ b/drivers/iio/accel/mma9553.c
+@@ -1134,12 +1134,15 @@ static int mma9553_probe(struct i2c_client *client,
+ ret = iio_device_register(indio_dev);
+ if (ret < 0) {
+ dev_err(&client->dev, "unable to register iio device\n");
+- goto out_poweroff;
++ goto err_pm_cleanup;
+ }
+
+ dev_dbg(&indio_dev->dev, "Registered device %s\n", name);
+ return 0;
+
++err_pm_cleanup:
++ pm_runtime_dont_use_autosuspend(&client->dev);
++ pm_runtime_disable(&client->dev);
+ out_poweroff:
+ mma9551_set_device_state(client, false);
+ return ret;
+diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c
+index b3afbf0649152..df600d2917c0a 100644
+--- a/drivers/iio/accel/mxc4005.c
++++ b/drivers/iio/accel/mxc4005.c
+@@ -456,8 +456,6 @@ static int mxc4005_probe(struct i2c_client *client,
+
+ data->dready_trig->ops = &mxc4005_trigger_ops;
+ iio_trigger_set_drvdata(data->dready_trig, indio_dev);
+- indio_dev->trig = data->dready_trig;
+- iio_trigger_get(indio_dev->trig);
+ ret = devm_iio_trigger_register(&client->dev,
+ data->dready_trig);
+ if (ret) {
+@@ -465,6 +463,8 @@ static int mxc4005_probe(struct i2c_client *client,
+ "failed to register trigger\n");
+ return ret;
+ }
++
++ indio_dev->trig = iio_trigger_get(data->dready_trig);
+ }
+
+ return devm_iio_device_register(&client->dev, indio_dev);
+diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
+index f1e6ec380667c..e845d133b809d 100644
+--- a/drivers/iio/accel/st_accel_core.c
++++ b/drivers/iio/accel/st_accel_core.c
+@@ -1212,12 +1212,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
+
+ adev = ACPI_COMPANION(adata->dev);
+ if (!adev)
+- return 0;
++ return -ENXIO;
+
+ /* Read _ONT data, which should be a package of 6 integers. */
+ status = acpi_evaluate_object(adev->handle, "_ONT", NULL, &buffer);
+ if (status == AE_NOT_FOUND) {
+- return 0;
++ return -ENXIO;
+ } else if (ACPI_FAILURE(status)) {
+ dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
+ status);
+diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
+index f711756e41e3d..cba57459e90ab 100644
+--- a/drivers/iio/accel/st_accel_i2c.c
++++ b/drivers/iio/accel/st_accel_i2c.c
+@@ -193,10 +193,10 @@ static int st_accel_i2c_remove(struct i2c_client *client)
+ {
+ struct iio_dev *indio_dev = i2c_get_clientdata(client);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_accel_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c
+index bb45d9ff95b85..5167fae1ee8ec 100644
+--- a/drivers/iio/accel/st_accel_spi.c
++++ b/drivers/iio/accel/st_accel_spi.c
+@@ -143,10 +143,10 @@ static int st_accel_spi_remove(struct spi_device *spi)
+ {
+ struct iio_dev *indio_dev = spi_get_drvdata(spi);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_accel_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
+index af168e1c9fdb5..86b83dc7b7d99 100644
+--- a/drivers/iio/adc/Kconfig
++++ b/drivers/iio/adc/Kconfig
+@@ -991,22 +991,6 @@ config STMPE_ADC
+ Say yes here to build support for ST Microelectronics STMPE
+ built-in ADC block (stmpe811).
+
+-config STX104
+- tristate "Apex Embedded Systems STX104 driver"
+- depends on PC104 && X86
+- select ISA_BUS_API
+- select GPIOLIB
+- help
+- Say yes here to build support for the Apex Embedded Systems STX104
+- integrated analog PC/104 card.
+-
+- This driver supports the 16 channels of single-ended (8 channels of
+- differential) analog inputs, 2 channels of analog output, 4 digital
+- inputs, and 4 digital outputs provided by the STX104.
+-
+- The base port addresses for the devices may be configured via the base
+- array module parameter.
+-
+ config SUN4I_GPADC
+ tristate "Support for the Allwinner SoCs GPADC"
+ depends on IIO
+diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
+index d68550f493e34..13668c4cfbaf6 100644
+--- a/drivers/iio/adc/Makefile
++++ b/drivers/iio/adc/Makefile
+@@ -85,7 +85,6 @@ obj-$(CONFIG_ROCKCHIP_SARADC) += rockchip_saradc.o
+ obj-$(CONFIG_RZG2L_ADC) += rzg2l_adc.o
+ obj-$(CONFIG_SC27XX_ADC) += sc27xx_adc.o
+ obj-$(CONFIG_SPEAR_ADC) += spear_adc.o
+-obj-$(CONFIG_STX104) += stx104.o
+ obj-$(CONFIG_SUN4I_GPADC) += sun4i-gpadc-iio.o
+ obj-$(CONFIG_STM32_ADC_CORE) += stm32-adc-core.o
+ obj-$(CONFIG_STM32_ADC) += stm32-adc.o
+diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
+index e45c600fccc0b..101f2da2811b2 100644
+--- a/drivers/iio/adc/ad7124.c
++++ b/drivers/iio/adc/ad7124.c
+@@ -76,7 +76,7 @@
+ #define AD7124_CONFIG_REF_SEL(x) FIELD_PREP(AD7124_CONFIG_REF_SEL_MSK, x)
+ #define AD7124_CONFIG_PGA_MSK GENMASK(2, 0)
+ #define AD7124_CONFIG_PGA(x) FIELD_PREP(AD7124_CONFIG_PGA_MSK, x)
+-#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(7, 6)
++#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(6, 5)
+ #define AD7124_CONFIG_IN_BUFF(x) FIELD_PREP(AD7124_CONFIG_IN_BUFF_MSK, x)
+
+ /* AD7124_FILTER_X */
+@@ -188,7 +188,6 @@ static const struct iio_chan_spec ad7124_channel_template = {
+ .sign = 'u',
+ .realbits = 24,
+ .storagebits = 32,
+- .shift = 8,
+ .endianness = IIO_BE,
+ },
+ };
+diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
+index 2121a812b0c31..6df435e3c4218 100644
+--- a/drivers/iio/adc/ad7192.c
++++ b/drivers/iio/adc/ad7192.c
+@@ -327,7 +327,7 @@ static int ad7192_of_clock_select(struct ad7192_state *st)
+ clock_sel = AD7192_CLK_INT;
+
+ /* use internal clock */
+- if (st->mclk) {
++ if (!st->mclk) {
+ if (of_property_read_bool(np, "adi,int-clock-output-enable"))
+ clock_sel = AD7192_CLK_INT_CO;
+ } else {
+@@ -340,9 +340,9 @@ static int ad7192_of_clock_select(struct ad7192_state *st)
+ return clock_sel;
+ }
+
+-static int ad7192_setup(struct ad7192_state *st, struct device_node *np)
++static int ad7192_setup(struct iio_dev *indio_dev, struct device_node *np)
+ {
+- struct iio_dev *indio_dev = spi_get_drvdata(st->sd.spi);
++ struct ad7192_state *st = iio_priv(indio_dev);
+ bool rej60_en, refin2_en;
+ bool buf_en, bipolar, burnout_curr_en;
+ unsigned long long scale_uv;
+@@ -835,10 +835,6 @@ static const struct iio_info ad7195_info = {
+ __AD719x_CHANNEL(_si, _channel1, -1, _address, NULL, IIO_VOLTAGE, \
+ BIT(IIO_CHAN_INFO_SCALE), ad7192_calibsys_ext_info)
+
+-#define AD719x_SHORTED_CHANNEL(_si, _channel1, _address) \
+- __AD719x_CHANNEL(_si, _channel1, -1, _address, "shorted", IIO_VOLTAGE, \
+- BIT(IIO_CHAN_INFO_SCALE), ad7192_calibsys_ext_info)
+-
+ #define AD719x_TEMP_CHANNEL(_si, _address) \
+ __AD719x_CHANNEL(_si, 0, -1, _address, NULL, IIO_TEMP, 0, NULL)
+
+@@ -846,7 +842,7 @@ static const struct iio_chan_spec ad7192_channels[] = {
+ AD719x_DIFF_CHANNEL(0, 1, 2, AD7192_CH_AIN1P_AIN2M),
+ AD719x_DIFF_CHANNEL(1, 3, 4, AD7192_CH_AIN3P_AIN4M),
+ AD719x_TEMP_CHANNEL(2, AD7192_CH_TEMP),
+- AD719x_SHORTED_CHANNEL(3, 2, AD7192_CH_AIN2P_AIN2M),
++ AD719x_DIFF_CHANNEL(3, 2, 2, AD7192_CH_AIN2P_AIN2M),
+ AD719x_CHANNEL(4, 1, AD7192_CH_AIN1),
+ AD719x_CHANNEL(5, 2, AD7192_CH_AIN2),
+ AD719x_CHANNEL(6, 3, AD7192_CH_AIN3),
+@@ -860,7 +856,7 @@ static const struct iio_chan_spec ad7193_channels[] = {
+ AD719x_DIFF_CHANNEL(2, 5, 6, AD7193_CH_AIN5P_AIN6M),
+ AD719x_DIFF_CHANNEL(3, 7, 8, AD7193_CH_AIN7P_AIN8M),
+ AD719x_TEMP_CHANNEL(4, AD7193_CH_TEMP),
+- AD719x_SHORTED_CHANNEL(5, 2, AD7193_CH_AIN2P_AIN2M),
++ AD719x_DIFF_CHANNEL(5, 2, 2, AD7193_CH_AIN2P_AIN2M),
+ AD719x_CHANNEL(6, 1, AD7193_CH_AIN1),
+ AD719x_CHANNEL(7, 2, AD7193_CH_AIN2),
+ AD719x_CHANNEL(8, 3, AD7193_CH_AIN3),
+@@ -1019,7 +1015,7 @@ static int ad7192_probe(struct spi_device *spi)
+ }
+ }
+
+- ret = ad7192_setup(st, spi->dev.of_node);
++ ret = ad7192_setup(indio_dev, spi->dev.of_node);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/iio/adc/ad7292.c b/drivers/iio/adc/ad7292.c
+index 3271a31afde1c..e3e14a1253e89 100644
+--- a/drivers/iio/adc/ad7292.c
++++ b/drivers/iio/adc/ad7292.c
+@@ -287,10 +287,8 @@ static int ad7292_probe(struct spi_device *spi)
+
+ ret = devm_add_action_or_reset(&spi->dev,
+ ad7292_regulator_disable, st);
+- if (ret) {
+- regulator_disable(st->reg);
++ if (ret)
+ return ret;
+- }
+
+ ret = regulator_get_voltage(st->reg);
+ if (ret < 0)
+diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c
+index 2c5c8a3672b2d..aa42ba759fa1a 100644
+--- a/drivers/iio/adc/ad7768-1.c
++++ b/drivers/iio/adc/ad7768-1.c
+@@ -480,8 +480,8 @@ static irqreturn_t ad7768_trigger_handler(int irq, void *p)
+ iio_push_to_buffers_with_timestamp(indio_dev, &st->data.scan,
+ iio_get_time_ns(indio_dev));
+
+- iio_trigger_notify_done(indio_dev->trig);
+ err_unlock:
++ iio_trigger_notify_done(indio_dev->trig);
+ mutex_unlock(&st->lock);
+
+ return IRQ_HANDLED;
+diff --git a/drivers/iio/adc/ad7791.c b/drivers/iio/adc/ad7791.c
+index cb579aa89f39c..f7d7bc1e44455 100644
+--- a/drivers/iio/adc/ad7791.c
++++ b/drivers/iio/adc/ad7791.c
+@@ -253,7 +253,7 @@ static const struct ad_sigma_delta_info ad7791_sigma_delta_info = {
+ .has_registers = true,
+ .addr_shift = 4,
+ .read_mask = BIT(3),
+- .irq_flags = IRQF_TRIGGER_LOW,
++ .irq_flags = IRQF_TRIGGER_FALLING,
+ };
+
+ static int ad7791_read_raw(struct iio_dev *indio_dev,
+diff --git a/drivers/iio/adc/ad7923.c b/drivers/iio/adc/ad7923.c
+index 069b561ee7689..b8cc94b7dd80a 100644
+--- a/drivers/iio/adc/ad7923.c
++++ b/drivers/iio/adc/ad7923.c
+@@ -93,6 +93,7 @@ enum ad7923_id {
+ .sign = 'u', \
+ .realbits = (bits), \
+ .storagebits = 16, \
++ .shift = 12 - (bits), \
+ .endianness = IIO_BE, \
+ }, \
+ }
+@@ -268,7 +269,8 @@ static int ad7923_read_raw(struct iio_dev *indio_dev,
+ return ret;
+
+ if (chan->address == EXTRACT(ret, 12, 4))
+- *val = EXTRACT(ret, 0, 12);
++ *val = EXTRACT(ret, chan->scan_type.shift,
++ chan->scan_type.realbits);
+ else
+ return -EIO;
+
+diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
+index 1d652d9b2f5cd..ed8aa0599b393 100644
+--- a/drivers/iio/adc/ad_sigma_delta.c
++++ b/drivers/iio/adc/ad_sigma_delta.c
+@@ -280,10 +280,10 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev,
+ unsigned int data_reg;
+ int ret = 0;
+
+- if (iio_buffer_enabled(indio_dev))
+- return -EBUSY;
++ ret = iio_device_claim_direct_mode(indio_dev);
++ if (ret)
++ return ret;
+
+- mutex_lock(&indio_dev->mlock);
+ ad_sigma_delta_set_channel(sigma_delta, chan->address);
+
+ spi_bus_lock(sigma_delta->spi->master);
+@@ -322,7 +322,7 @@ out:
+ ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE);
+ sigma_delta->bus_locked = false;
+ spi_bus_unlock(sigma_delta->spi->master);
+- mutex_unlock(&indio_dev->mlock);
++ iio_device_release_direct_mode(indio_dev);
+
+ if (ret)
+ return ret;
+@@ -490,6 +490,10 @@ static int devm_ad_sd_probe_trigger(struct device *dev, struct iio_dev *indio_de
+ init_completion(&sigma_delta->completion);
+
+ sigma_delta->irq_dis = true;
++
++ /* the IRQ core clears IRQ_DISABLE_UNLAZY flag when freeing an IRQ */
++ irq_set_status_flags(sigma_delta->spi->irq, IRQ_DISABLE_UNLAZY);
++
+ ret = devm_request_irq(dev, sigma_delta->spi->irq,
+ ad_sd_data_rdy_trig_poll,
+ sigma_delta->info->irq_flags | IRQF_NO_AUTOEN,
+diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c
+index a73e3c2d212fa..a9e655e69eaa2 100644
+--- a/drivers/iio/adc/adi-axi-adc.c
++++ b/drivers/iio/adc/adi-axi-adc.c
+@@ -322,16 +322,19 @@ static struct adi_axi_adc_client *adi_axi_adc_attach_client(struct device *dev)
+
+ if (!try_module_get(cl->dev->driver->owner)) {
+ mutex_unlock(&registered_clients_lock);
++ of_node_put(cln);
+ return ERR_PTR(-ENODEV);
+ }
+
+ get_device(cl->dev);
+ cl->info = info;
+ mutex_unlock(&registered_clients_lock);
++ of_node_put(cln);
+ return cl;
+ }
+
+ mutex_unlock(&registered_clients_lock);
++ of_node_put(cln);
+
+ return ERR_PTR(-EPROBE_DEFER);
+ }
+diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
+index ea5ca163d8796..ecb49bc452ae6 100644
+--- a/drivers/iio/adc/at91-sama5d2_adc.c
++++ b/drivers/iio/adc/at91-sama5d2_adc.c
+@@ -74,7 +74,7 @@
+ #define AT91_SAMA5D2_MR_ANACH BIT(23)
+ /* Tracking Time */
+ #define AT91_SAMA5D2_MR_TRACKTIM(v) ((v) << 24)
+-#define AT91_SAMA5D2_MR_TRACKTIM_MAX 0xff
++#define AT91_SAMA5D2_MR_TRACKTIM_MAX 0xf
+ /* Transfer Time */
+ #define AT91_SAMA5D2_MR_TRANSFER(v) ((v) << 28)
+ #define AT91_SAMA5D2_MR_TRANSFER_MAX 0x3
+@@ -1000,7 +1000,7 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *indio,
+ trig = devm_iio_trigger_alloc(&indio->dev, "%s-dev%d-%s", indio->name,
+ iio_device_id(indio), trigger_name);
+ if (!trig)
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+
+ trig->dev.parent = indio->dev.parent;
+ iio_trigger_set_drvdata(trig, indio);
+@@ -1329,10 +1329,12 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev,
+ ret = at91_adc_read_position(st, chan->channel,
+ &tmp_val);
+ *val = tmp_val;
++ if (ret > 0)
++ ret = at91_adc_adjust_val_osr(st, val);
+ mutex_unlock(&st->lock);
+ iio_device_release_direct_mode(indio_dev);
+
+- return at91_adc_adjust_val_osr(st, val);
++ return ret;
+ }
+ if (chan->type == IIO_PRESSURE) {
+ ret = iio_device_claim_direct_mode(indio_dev);
+@@ -1343,10 +1345,12 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev,
+ ret = at91_adc_read_pressure(st, chan->channel,
+ &tmp_val);
+ *val = tmp_val;
++ if (ret > 0)
++ ret = at91_adc_adjust_val_osr(st, val);
+ mutex_unlock(&st->lock);
+ iio_device_release_direct_mode(indio_dev);
+
+- return at91_adc_adjust_val_osr(st, val);
++ return ret;
+ }
+
+ /* in this case we have a voltage channel */
+@@ -1377,7 +1381,8 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev,
+ *val = st->conversion_value;
+ ret = at91_adc_adjust_val_osr(st, val);
+ if (chan->scan_type.sign == 's')
+- *val = sign_extend32(*val, 11);
++ *val = sign_extend32(*val,
++ chan->scan_type.realbits - 1);
+ st->conversion_done = false;
+ }
+
+@@ -1436,16 +1441,20 @@ static int at91_adc_write_raw(struct iio_dev *indio_dev,
+ /* if no change, optimize out */
+ if (val == st->oversampling_ratio)
+ return 0;
++ mutex_lock(&st->lock);
+ st->oversampling_ratio = val;
+ /* update ratio */
+ at91_adc_config_emr(st);
++ mutex_unlock(&st->lock);
+ return 0;
+ case IIO_CHAN_INFO_SAMP_FREQ:
+ if (val < st->soc_info.min_sample_rate ||
+ val > st->soc_info.max_sample_rate)
+ return -EINVAL;
+
++ mutex_lock(&st->lock);
+ at91_adc_setup_samp_freq(indio_dev, val);
++ mutex_unlock(&st->lock);
+ return 0;
+ default:
+ return -EINVAL;
+@@ -1894,6 +1903,9 @@ static __maybe_unused int at91_adc_suspend(struct device *dev)
+ struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct at91_adc_state *st = iio_priv(indio_dev);
+
++ if (iio_buffer_enabled(indio_dev))
++ at91_adc_buffer_postdisable(indio_dev);
++
+ /*
+ * Do a sofware reset of the ADC before we go to suspend.
+ * this will ensure that all pins are free from being muxed by the ADC
+@@ -1937,14 +1949,11 @@ static __maybe_unused int at91_adc_resume(struct device *dev)
+ if (!iio_buffer_enabled(indio_dev))
+ return 0;
+
+- /* check if we are enabling triggered buffer or the touchscreen */
+- if (at91_adc_current_chan_is_touch(indio_dev))
+- return at91_adc_configure_touch(st, true);
+- else
+- return at91_adc_configure_trigger(st->trig, true);
++ ret = at91_adc_buffer_prepare(indio_dev);
++ if (ret)
++ goto vref_disable_resume;
+
+- /* not needed but more explicit */
+- return 0;
++ return at91_adc_configure_trigger(st->trig, true);
+
+ vref_disable_resume:
+ regulator_disable(st->vref);
+diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
+index 5a7d3a3a5fa82..d61b8ce643a80 100644
+--- a/drivers/iio/adc/at91_adc.c
++++ b/drivers/iio/adc/at91_adc.c
+@@ -634,8 +634,10 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *idev,
+ trig->ops = &at91_adc_trigger_ops;
+
+ ret = iio_trigger_register(trig);
+- if (ret)
++ if (ret) {
++ iio_trigger_free(trig);
+ return NULL;
++ }
+
+ return trig;
+ }
+diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c
+index 3e0c0233b4315..df99f1365c398 100644
+--- a/drivers/iio/adc/axp20x_adc.c
++++ b/drivers/iio/adc/axp20x_adc.c
+@@ -251,19 +251,8 @@ static int axp22x_adc_raw(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan, int *val)
+ {
+ struct axp20x_adc_iio *info = iio_priv(indio_dev);
+- int size;
+
+- /*
+- * N.B.: Unlike the Chinese datasheets tell, the charging current is
+- * stored on 12 bits, not 13 bits. Only discharging current is on 13
+- * bits.
+- */
+- if (chan->type == IIO_CURRENT && chan->channel == AXP22X_BATT_DISCHRG_I)
+- size = 13;
+- else
+- size = 12;
+-
+- *val = axp20x_read_variable_width(info->regmap, chan->address, size);
++ *val = axp20x_read_variable_width(info->regmap, chan->address, 12);
+ if (*val < 0)
+ return *val;
+
+@@ -386,9 +375,8 @@ static int axp22x_adc_scale(struct iio_chan_spec const *chan, int *val,
+ return IIO_VAL_INT_PLUS_MICRO;
+
+ case IIO_CURRENT:
+- *val = 0;
+- *val2 = 500000;
+- return IIO_VAL_INT_PLUS_MICRO;
++ *val = 1;
++ return IIO_VAL_INT;
+
+ case IIO_TEMP:
+ *val = 100;
+diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c
+index 5f5e8b39e4d22..84dbe9e2f0eff 100644
+--- a/drivers/iio/adc/axp288_adc.c
++++ b/drivers/iio/adc/axp288_adc.c
+@@ -196,6 +196,14 @@ static const struct dmi_system_id axp288_adc_ts_bias_override[] = {
+ },
+ .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA,
+ },
++ {
++ /* Nuvision Solo 10 Draw */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TMAX"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "TM101W610L"),
++ },
++ .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA,
++ },
+ {}
+ };
+
+diff --git a/drivers/iio/adc/berlin2-adc.c b/drivers/iio/adc/berlin2-adc.c
+index 8b04b95b7b7ae..fa2c87946e16f 100644
+--- a/drivers/iio/adc/berlin2-adc.c
++++ b/drivers/iio/adc/berlin2-adc.c
+@@ -289,8 +289,10 @@ static int berlin2_adc_probe(struct platform_device *pdev)
+ int ret;
+
+ indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*priv));
+- if (!indio_dev)
++ if (!indio_dev) {
++ of_node_put(parent_np);
+ return -ENOMEM;
++ }
+
+ priv = iio_priv(indio_dev);
+ platform_set_drvdata(pdev, indio_dev);
+diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c
+index 16407664182ce..97d162a3cba4e 100644
+--- a/drivers/iio/adc/dln2-adc.c
++++ b/drivers/iio/adc/dln2-adc.c
+@@ -248,7 +248,6 @@ static int dln2_adc_set_chan_period(struct dln2_adc *dln2,
+ static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel)
+ {
+ int ret, i;
+- struct iio_dev *indio_dev = platform_get_drvdata(dln2->pdev);
+ u16 conflict;
+ __le16 value;
+ int olen = sizeof(value);
+@@ -257,13 +256,9 @@ static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel)
+ .chan = channel,
+ };
+
+- ret = iio_device_claim_direct_mode(indio_dev);
+- if (ret < 0)
+- return ret;
+-
+ ret = dln2_adc_set_chan_enabled(dln2, channel, true);
+ if (ret < 0)
+- goto release_direct;
++ return ret;
+
+ ret = dln2_adc_set_port_enabled(dln2, true, &conflict);
+ if (ret < 0) {
+@@ -300,8 +295,6 @@ disable_port:
+ dln2_adc_set_port_enabled(dln2, false, NULL);
+ disable_chan:
+ dln2_adc_set_chan_enabled(dln2, channel, false);
+-release_direct:
+- iio_device_release_direct_mode(indio_dev);
+
+ return ret;
+ }
+@@ -337,10 +330,16 @@ static int dln2_adc_read_raw(struct iio_dev *indio_dev,
+
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
++ ret = iio_device_claim_direct_mode(indio_dev);
++ if (ret < 0)
++ return ret;
++
+ mutex_lock(&dln2->mutex);
+ ret = dln2_adc_read(dln2, chan->channel);
+ mutex_unlock(&dln2->mutex);
+
++ iio_device_release_direct_mode(indio_dev);
++
+ if (ret < 0)
+ return ret;
+
+@@ -656,7 +655,11 @@ static int dln2_adc_probe(struct platform_device *pdev)
+ return -ENOMEM;
+ }
+ iio_trigger_set_drvdata(dln2->trig, dln2);
+- devm_iio_trigger_register(dev, dln2->trig);
++ ret = devm_iio_trigger_register(dev, dln2->trig);
++ if (ret) {
++ dev_err(dev, "failed to register trigger: %d\n", ret);
++ return ret;
++ }
+ iio_trigger_set_immutable(indio_dev, dln2->trig);
+
+ ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL,
+diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
+index a4b2ff9e0dd5e..9403c26040661 100644
+--- a/drivers/iio/adc/ina2xx-adc.c
++++ b/drivers/iio/adc/ina2xx-adc.c
+@@ -124,6 +124,7 @@ static const struct regmap_config ina2xx_regmap_config = {
+ enum ina2xx_ids { ina219, ina226 };
+
+ struct ina2xx_config {
++ const char *name;
+ u16 config_default;
+ int calibration_value;
+ int shunt_voltage_lsb; /* nV */
+@@ -155,6 +156,7 @@ struct ina2xx_chip_info {
+
+ static const struct ina2xx_config ina2xx_config[] = {
+ [ina219] = {
++ .name = "ina219",
+ .config_default = INA219_CONFIG_DEFAULT,
+ .calibration_value = 4096,
+ .shunt_voltage_lsb = 10000,
+@@ -164,6 +166,7 @@ static const struct ina2xx_config ina2xx_config[] = {
+ .chip_id = ina219,
+ },
+ [ina226] = {
++ .name = "ina226",
+ .config_default = INA226_CONFIG_DEFAULT,
+ .calibration_value = 2048,
+ .shunt_voltage_lsb = 2500,
+@@ -999,7 +1002,7 @@ static int ina2xx_probe(struct i2c_client *client,
+ /* Patch the current config register with default. */
+ val = chip->config->config_default;
+
+- if (id->driver_data == ina226) {
++ if (type == ina226) {
+ ina226_set_average(chip, INA226_DEFAULT_AVG, &val);
+ ina226_set_int_time_vbus(chip, INA226_DEFAULT_IT, &val);
+ ina226_set_int_time_vshunt(chip, INA226_DEFAULT_IT, &val);
+@@ -1018,7 +1021,7 @@ static int ina2xx_probe(struct i2c_client *client,
+ }
+
+ indio_dev->modes = INDIO_DIRECT_MODE;
+- if (id->driver_data == ina226) {
++ if (type == ina226) {
+ indio_dev->channels = ina226_channels;
+ indio_dev->num_channels = ARRAY_SIZE(ina226_channels);
+ indio_dev->info = &ina226_info;
+@@ -1027,7 +1030,7 @@ static int ina2xx_probe(struct i2c_client *client,
+ indio_dev->num_channels = ARRAY_SIZE(ina219_channels);
+ indio_dev->info = &ina219_info;
+ }
+- indio_dev->name = id->name;
++ indio_dev->name = id ? id->name : chip->config->name;
+
+ ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev,
+ INDIO_BUFFER_SOFTWARE,
+diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c
+index 1adddf5a88a94..61f373fab9a11 100644
+--- a/drivers/iio/adc/ltc2497.c
++++ b/drivers/iio/adc/ltc2497.c
+@@ -41,6 +41,19 @@ static int ltc2497_result_and_measure(struct ltc2497core_driverdata *ddata,
+ }
+
+ *val = (be32_to_cpu(st->buf) >> 14) - (1 << 17);
++
++ /*
++ * The part started a new conversion at the end of the above i2c
++ * transfer, so if the address didn't change since the last call
++ * everything is fine and we can return early.
++ * If not (which should only happen when some sort of bulk
++ * conversion is implemented) we have to program the new
++ * address. Note that this probably fails as the conversion that
++ * was triggered above is like not complete yet and the two
++ * operations have to be done in a single transfer.
++ */
++ if (ddata->addr_prev == address)
++ return 0;
+ }
+
+ ret = i2c_smbus_write_byte(st->client,
+diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c
+index e573da5397bb3..65278270a75ce 100644
+--- a/drivers/iio/adc/mcp3911.c
++++ b/drivers/iio/adc/mcp3911.c
+@@ -38,8 +38,8 @@
+ #define MCP3911_CHANNEL(x) (MCP3911_REG_CHANNEL0 + x * 3)
+ #define MCP3911_OFFCAL(x) (MCP3911_REG_OFFCAL_CH0 + x * 6)
+
+-/* Internal voltage reference in uV */
+-#define MCP3911_INT_VREF_UV 1200000
++/* Internal voltage reference in mV */
++#define MCP3911_INT_VREF_MV 1200
+
+ #define MCP3911_REG_READ(reg, id) ((((reg) << 1) | ((id) << 5) | (1 << 0)) & 0xff)
+ #define MCP3911_REG_WRITE(reg, id) ((((reg) << 1) | ((id) << 5) | (0 << 0)) & 0xff)
+@@ -111,6 +111,8 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev,
+ if (ret)
+ goto out;
+
++ *val = sign_extend32(*val, 23);
++
+ ret = IIO_VAL_INT;
+ break;
+
+@@ -135,11 +137,18 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev,
+
+ *val = ret / 1000;
+ } else {
+- *val = MCP3911_INT_VREF_UV;
++ *val = MCP3911_INT_VREF_MV;
+ }
+
+- *val2 = 24;
+- ret = IIO_VAL_FRACTIONAL_LOG2;
++ /*
++ * For 24bit Conversion
++ * Raw = ((Voltage)/(Vref) * 2^23 * Gain * 1.5
++ * Voltage = Raw * (Vref)/(2^23 * Gain * 1.5)
++ */
++
++ /* val2 = (2^23 * 1.5) */
++ *val2 = 12582912;
++ ret = IIO_VAL_FRACTIONAL;
+ break;
+ }
+
+diff --git a/drivers/iio/adc/men_z188_adc.c b/drivers/iio/adc/men_z188_adc.c
+index 42ea8bc7e7805..adc5ceaef8c93 100644
+--- a/drivers/iio/adc/men_z188_adc.c
++++ b/drivers/iio/adc/men_z188_adc.c
+@@ -103,6 +103,7 @@ static int men_z188_probe(struct mcb_device *dev,
+ struct z188_adc *adc;
+ struct iio_dev *indio_dev;
+ struct resource *mem;
++ int ret;
+
+ indio_dev = devm_iio_device_alloc(&dev->dev, sizeof(struct z188_adc));
+ if (!indio_dev)
+@@ -128,8 +129,14 @@ static int men_z188_probe(struct mcb_device *dev,
+ adc->mem = mem;
+ mcb_set_drvdata(dev, indio_dev);
+
+- return iio_device_register(indio_dev);
++ ret = iio_device_register(indio_dev);
++ if (ret)
++ goto err_unmap;
++
++ return 0;
+
++err_unmap:
++ iounmap(adc->base);
+ err:
+ mcb_release_mem(mem);
+ return -ENXIO;
+diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
+index 705d5e11a54be..36ec07ad70178 100644
+--- a/drivers/iio/adc/meson_saradc.c
++++ b/drivers/iio/adc/meson_saradc.c
+@@ -71,7 +71,7 @@
+ #define MESON_SAR_ADC_REG3_PANEL_DETECT_COUNT_MASK GENMASK(20, 18)
+ #define MESON_SAR_ADC_REG3_PANEL_DETECT_FILTER_TB_MASK GENMASK(17, 16)
+ #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_SHIFT 10
+- #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_WIDTH 5
++ #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_WIDTH 6
+ #define MESON_SAR_ADC_REG3_BLOCK_DLY_SEL_MASK GENMASK(9, 8)
+ #define MESON_SAR_ADC_REG3_BLOCK_DLY_MASK GENMASK(7, 0)
+
+diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c
+index aca084f1e78a5..79d8fd79b0549 100644
+--- a/drivers/iio/adc/mp2629_adc.c
++++ b/drivers/iio/adc/mp2629_adc.c
+@@ -56,7 +56,8 @@ static struct iio_map mp2629_adc_maps[] = {
+ MP2629_MAP(SYSTEM_VOLT, "system-volt"),
+ MP2629_MAP(INPUT_VOLT, "input-volt"),
+ MP2629_MAP(BATT_CURRENT, "batt-current"),
+- MP2629_MAP(INPUT_CURRENT, "input-current")
++ MP2629_MAP(INPUT_CURRENT, "input-current"),
++ { }
+ };
+
+ static int mp2629_read_raw(struct iio_dev *indio_dev,
+@@ -73,7 +74,7 @@ static int mp2629_read_raw(struct iio_dev *indio_dev,
+ if (ret)
+ return ret;
+
+- if (chan->address == MP2629_INPUT_VOLT)
++ if (chan->channel == MP2629_INPUT_VOLT)
+ rval &= GENMASK(6, 0);
+ *val = rval;
+ return IIO_VAL_INT;
+diff --git a/drivers/iio/adc/mxs-lradc-adc.c b/drivers/iio/adc/mxs-lradc-adc.c
+index bca79a93cbe43..a50f39143d3ea 100644
+--- a/drivers/iio/adc/mxs-lradc-adc.c
++++ b/drivers/iio/adc/mxs-lradc-adc.c
+@@ -757,13 +757,13 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev)
+
+ ret = mxs_lradc_adc_trigger_init(iio);
+ if (ret)
+- goto err_trig;
++ return ret;
+
+ ret = iio_triggered_buffer_setup(iio, &iio_pollfunc_store_time,
+ &mxs_lradc_adc_trigger_handler,
+ &mxs_lradc_adc_buffer_ops);
+ if (ret)
+- return ret;
++ goto err_trig;
+
+ adc->vref_mv = mxs_lradc_adc_vref_mv[lradc->soc];
+
+@@ -801,9 +801,9 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev)
+
+ err_dev:
+ mxs_lradc_adc_hw_stop(adc);
+- mxs_lradc_adc_trigger_remove(iio);
+-err_trig:
+ iio_triggered_buffer_cleanup(iio);
++err_trig:
++ mxs_lradc_adc_trigger_remove(iio);
+ return ret;
+ }
+
+@@ -814,8 +814,8 @@ static int mxs_lradc_adc_remove(struct platform_device *pdev)
+
+ iio_device_unregister(iio);
+ mxs_lradc_adc_hw_stop(adc);
+- mxs_lradc_adc_trigger_remove(iio);
+ iio_triggered_buffer_cleanup(iio);
++ mxs_lradc_adc_trigger_remove(iio);
+
+ return 0;
+ }
+diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c
+index f9c8385c72d3d..496aab94570a1 100644
+--- a/drivers/iio/adc/palmas_gpadc.c
++++ b/drivers/iio/adc/palmas_gpadc.c
+@@ -638,7 +638,7 @@ out:
+
+ static int palmas_gpadc_remove(struct platform_device *pdev)
+ {
+- struct iio_dev *indio_dev = dev_to_iio_dev(&pdev->dev);
++ struct iio_dev *indio_dev = dev_get_drvdata(&pdev->dev);
+ struct palmas_gpadc *adc = iio_priv(indio_dev);
+
+ if (adc->wakeup1_enable || adc->wakeup2_enable)
+diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c
+index 32fbf57c362fa..2fa41b90bcfa9 100644
+--- a/drivers/iio/adc/rzg2l_adc.c
++++ b/drivers/iio/adc/rzg2l_adc.c
+@@ -334,11 +334,15 @@ static int rzg2l_adc_parse_properties(struct platform_device *pdev, struct rzg2l
+ i = 0;
+ device_for_each_child_node(&pdev->dev, fwnode) {
+ ret = fwnode_property_read_u32(fwnode, "reg", &channel);
+- if (ret)
++ if (ret) {
++ fwnode_handle_put(fwnode);
+ return ret;
++ }
+
+- if (channel >= RZG2L_ADC_MAX_CHANNELS)
++ if (channel >= RZG2L_ADC_MAX_CHANNELS) {
++ fwnode_handle_put(fwnode);
+ return -EINVAL;
++ }
+
+ chan_array[i].type = IIO_VOLTAGE;
+ chan_array[i].indexed = 1;
+diff --git a/drivers/iio/adc/sc27xx_adc.c b/drivers/iio/adc/sc27xx_adc.c
+index 00098caf6d9ee..cfe003cc4f0b6 100644
+--- a/drivers/iio/adc/sc27xx_adc.c
++++ b/drivers/iio/adc/sc27xx_adc.c
+@@ -36,8 +36,8 @@
+
+ /* Bits and mask definition for SC27XX_ADC_CH_CFG register */
+ #define SC27XX_ADC_CHN_ID_MASK GENMASK(4, 0)
+-#define SC27XX_ADC_SCALE_MASK GENMASK(10, 8)
+-#define SC27XX_ADC_SCALE_SHIFT 8
++#define SC27XX_ADC_SCALE_MASK GENMASK(10, 9)
++#define SC27XX_ADC_SCALE_SHIFT 9
+
+ /* Bits definitions for SC27XX_ADC_INT_EN registers */
+ #define SC27XX_ADC_IRQ_EN BIT(0)
+@@ -103,14 +103,14 @@ static struct sc27xx_adc_linear_graph small_scale_graph = {
+ 100, 341,
+ };
+
+-static const struct sc27xx_adc_linear_graph big_scale_graph_calib = {
+- 4200, 856,
+- 3600, 733,
++static const struct sc27xx_adc_linear_graph sc2731_big_scale_graph_calib = {
++ 4200, 850,
++ 3600, 728,
+ };
+
+-static const struct sc27xx_adc_linear_graph small_scale_graph_calib = {
+- 1000, 833,
+- 100, 80,
++static const struct sc27xx_adc_linear_graph sc2731_small_scale_graph_calib = {
++ 1000, 838,
++ 100, 84,
+ };
+
+ static int sc27xx_adc_get_calib_data(u32 calib_data, int calib_adc)
+@@ -130,11 +130,11 @@ static int sc27xx_adc_scale_calibration(struct sc27xx_adc_data *data,
+ size_t len;
+
+ if (big_scale) {
+- calib_graph = &big_scale_graph_calib;
++ calib_graph = &sc2731_big_scale_graph_calib;
+ graph = &big_scale_graph;
+ cell_name = "big_scale_calib";
+ } else {
+- calib_graph = &small_scale_graph_calib;
++ calib_graph = &sc2731_small_scale_graph_calib;
+ graph = &small_scale_graph;
+ cell_name = "small_scale_calib";
+ }
+diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c
+index c088cb990193c..42faca457ace8 100644
+--- a/drivers/iio/adc/stm32-adc-core.c
++++ b/drivers/iio/adc/stm32-adc-core.c
+@@ -64,6 +64,7 @@ struct stm32_adc_priv;
+ * @max_clk_rate_hz: maximum analog clock rate (Hz, from datasheet)
+ * @has_syscfg: SYSCFG capability flags
+ * @num_irqs: number of interrupt lines
++ * @num_adcs: maximum number of ADC instances in the common registers
+ */
+ struct stm32_adc_priv_cfg {
+ const struct stm32_adc_common_regs *regs;
+@@ -71,6 +72,7 @@ struct stm32_adc_priv_cfg {
+ u32 max_clk_rate_hz;
+ unsigned int has_syscfg;
+ unsigned int num_irqs;
++ unsigned int num_adcs;
+ };
+
+ /**
+@@ -352,7 +354,7 @@ static void stm32_adc_irq_handler(struct irq_desc *desc)
+ * before invoking the interrupt handler (e.g. call ISR only for
+ * IRQ-enabled ADCs).
+ */
+- for (i = 0; i < priv->cfg->num_irqs; i++) {
++ for (i = 0; i < priv->cfg->num_adcs; i++) {
+ if ((status & priv->cfg->regs->eoc_msk[i] &&
+ stm32_adc_eoc_enabled(priv, i)) ||
+ (status & priv->cfg->regs->ovr_msk[i]))
+@@ -796,6 +798,7 @@ static const struct stm32_adc_priv_cfg stm32f4_adc_priv_cfg = {
+ .clk_sel = stm32f4_adc_clk_sel,
+ .max_clk_rate_hz = 36000000,
+ .num_irqs = 1,
++ .num_adcs = 3,
+ };
+
+ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = {
+@@ -804,14 +807,16 @@ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = {
+ .max_clk_rate_hz = 36000000,
+ .has_syscfg = HAS_VBOOSTER,
+ .num_irqs = 1,
++ .num_adcs = 2,
+ };
+
+ static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = {
+ .regs = &stm32h7_adc_common_regs,
+ .clk_sel = stm32h7_adc_clk_sel,
+- .max_clk_rate_hz = 40000000,
++ .max_clk_rate_hz = 36000000,
+ .has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD,
+ .num_irqs = 2,
++ .num_adcs = 2,
+ };
+
+ static const struct of_device_id stm32_adc_of_match[] = {
+diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
+index 5088de835bb15..ef5b54ed96614 100644
+--- a/drivers/iio/adc/stm32-adc.c
++++ b/drivers/iio/adc/stm32-adc.c
+@@ -975,6 +975,7 @@ static void stm32h7_adc_unprepare(struct iio_dev *indio_dev)
+ {
+ struct stm32_adc *adc = iio_priv(indio_dev);
+
++ stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0);
+ stm32h7_adc_disable(indio_dev);
+ stm32h7_adc_enter_pwr_down(adc);
+ }
+@@ -1258,7 +1259,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data)
+ struct stm32_adc *adc = iio_priv(indio_dev);
+ const struct stm32_adc_regspec *regs = adc->cfg->regs;
+ u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg);
+- u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg);
+
+ /* Check ovr status right now, as ovr mask should be already disabled */
+ if (status & regs->isr_ovr.mask) {
+@@ -1273,11 +1273,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data)
+ return IRQ_HANDLED;
+ }
+
+- if (!(status & mask))
+- dev_err_ratelimited(&indio_dev->dev,
+- "Unexpected IRQ: IER=0x%08x, ISR=0x%08x\n",
+- mask, status);
+-
+ return IRQ_NONE;
+ }
+
+@@ -1287,10 +1282,6 @@ static irqreturn_t stm32_adc_isr(int irq, void *data)
+ struct stm32_adc *adc = iio_priv(indio_dev);
+ const struct stm32_adc_regspec *regs = adc->cfg->regs;
+ u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg);
+- u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg);
+-
+- if (!(status & mask))
+- return IRQ_WAKE_THREAD;
+
+ if (status & regs->isr_ovr.mask) {
+ /*
+diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c
+index 1cfefb3b5e56c..6592221cbe21d 100644
+--- a/drivers/iio/adc/stm32-dfsdm-adc.c
++++ b/drivers/iio/adc/stm32-dfsdm-adc.c
+@@ -1521,6 +1521,7 @@ static const struct of_device_id stm32_dfsdm_adc_match[] = {
+ },
+ {}
+ };
++MODULE_DEVICE_TABLE(of, stm32_dfsdm_adc_match);
+
+ static int stm32_dfsdm_adc_probe(struct platform_device *pdev)
+ {
+diff --git a/drivers/iio/adc/stmpe-adc.c b/drivers/iio/adc/stmpe-adc.c
+index fba659bfdb40a..64305d9fa5602 100644
+--- a/drivers/iio/adc/stmpe-adc.c
++++ b/drivers/iio/adc/stmpe-adc.c
+@@ -61,7 +61,7 @@ struct stmpe_adc {
+ static int stmpe_read_voltage(struct stmpe_adc *info,
+ struct iio_chan_spec const *chan, int *val)
+ {
+- long ret;
++ unsigned long ret;
+
+ mutex_lock(&info->lock);
+
+@@ -79,7 +79,7 @@ static int stmpe_read_voltage(struct stmpe_adc *info,
+
+ ret = wait_for_completion_timeout(&info->completion, STMPE_ADC_TIMEOUT);
+
+- if (ret <= 0) {
++ if (ret == 0) {
+ stmpe_reg_write(info->stmpe, STMPE_REG_ADC_INT_STA,
+ STMPE_ADC_CH(info->channel));
+ mutex_unlock(&info->lock);
+@@ -96,7 +96,7 @@ static int stmpe_read_voltage(struct stmpe_adc *info,
+ static int stmpe_read_temp(struct stmpe_adc *info,
+ struct iio_chan_spec const *chan, int *val)
+ {
+- long ret;
++ unsigned long ret;
+
+ mutex_lock(&info->lock);
+
+@@ -114,7 +114,7 @@ static int stmpe_read_temp(struct stmpe_adc *info,
+
+ ret = wait_for_completion_timeout(&info->completion, STMPE_ADC_TIMEOUT);
+
+- if (ret <= 0) {
++ if (ret == 0) {
+ mutex_unlock(&info->lock);
+ return -ETIMEDOUT;
+ }
+diff --git a/drivers/iio/adc/stx104.c b/drivers/iio/adc/stx104.c
+deleted file mode 100644
+index 55bd2dc514e93..0000000000000
+--- a/drivers/iio/adc/stx104.c
++++ /dev/null
+@@ -1,374 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * IIO driver for the Apex Embedded Systems STX104
+- * Copyright (C) 2016 William Breathitt Gray
+- */
+-#include <linux/bitops.h>
+-#include <linux/device.h>
+-#include <linux/errno.h>
+-#include <linux/gpio/driver.h>
+-#include <linux/iio/iio.h>
+-#include <linux/iio/types.h>
+-#include <linux/io.h>
+-#include <linux/ioport.h>
+-#include <linux/isa.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/moduleparam.h>
+-#include <linux/spinlock.h>
+-
+-#define STX104_OUT_CHAN(chan) { \
+- .type = IIO_VOLTAGE, \
+- .channel = chan, \
+- .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
+- .indexed = 1, \
+- .output = 1 \
+-}
+-#define STX104_IN_CHAN(chan, diff) { \
+- .type = IIO_VOLTAGE, \
+- .channel = chan, \
+- .channel2 = chan, \
+- .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_HARDWAREGAIN) | \
+- BIT(IIO_CHAN_INFO_OFFSET) | BIT(IIO_CHAN_INFO_SCALE), \
+- .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
+- .indexed = 1, \
+- .differential = diff \
+-}
+-
+-#define STX104_NUM_OUT_CHAN 2
+-
+-#define STX104_EXTENT 16
+-
+-static unsigned int base[max_num_isa_dev(STX104_EXTENT)];
+-static unsigned int num_stx104;
+-module_param_hw_array(base, uint, ioport, &num_stx104, 0);
+-MODULE_PARM_DESC(base, "Apex Embedded Systems STX104 base addresses");
+-
+-/**
+- * struct stx104_iio - IIO device private data structure
+- * @chan_out_states: channels' output states
+- * @base: base port address of the IIO device
+- */
+-struct stx104_iio {
+- unsigned int chan_out_states[STX104_NUM_OUT_CHAN];
+- unsigned int base;
+-};
+-
+-/**
+- * struct stx104_gpio - GPIO device private data structure
+- * @chip: instance of the gpio_chip
+- * @lock: synchronization lock to prevent I/O race conditions
+- * @base: base port address of the GPIO device
+- * @out_state: output bits state
+- */
+-struct stx104_gpio {
+- struct gpio_chip chip;
+- spinlock_t lock;
+- unsigned int base;
+- unsigned int out_state;
+-};
+-
+-static int stx104_read_raw(struct iio_dev *indio_dev,
+- struct iio_chan_spec const *chan, int *val, int *val2, long mask)
+-{
+- struct stx104_iio *const priv = iio_priv(indio_dev);
+- unsigned int adc_config;
+- int adbu;
+- int gain;
+-
+- switch (mask) {
+- case IIO_CHAN_INFO_HARDWAREGAIN:
+- /* get gain configuration */
+- adc_config = inb(priv->base + 11);
+- gain = adc_config & 0x3;
+-
+- *val = 1 << gain;
+- return IIO_VAL_INT;
+- case IIO_CHAN_INFO_RAW:
+- if (chan->output) {
+- *val = priv->chan_out_states[chan->channel];
+- return IIO_VAL_INT;
+- }
+-
+- /* select ADC channel */
+- outb(chan->channel | (chan->channel << 4), priv->base + 2);
+-
+- /* trigger ADC sample capture and wait for completion */
+- outb(0, priv->base);
+- while (inb(priv->base + 8) & BIT(7));
+-
+- *val = inw(priv->base);
+- return IIO_VAL_INT;
+- case IIO_CHAN_INFO_OFFSET:
+- /* get ADC bipolar/unipolar configuration */
+- adc_config = inb(priv->base + 11);
+- adbu = !(adc_config & BIT(2));
+-
+- *val = -32768 * adbu;
+- return IIO_VAL_INT;
+- case IIO_CHAN_INFO_SCALE:
+- /* get ADC bipolar/unipolar and gain configuration */
+- adc_config = inb(priv->base + 11);
+- adbu = !(adc_config & BIT(2));
+- gain = adc_config & 0x3;
+-
+- *val = 5;
+- *val2 = 15 - adbu + gain;
+- return IIO_VAL_FRACTIONAL_LOG2;
+- }
+-
+- return -EINVAL;
+-}
+-
+-static int stx104_write_raw(struct iio_dev *indio_dev,
+- struct iio_chan_spec const *chan, int val, int val2, long mask)
+-{
+- struct stx104_iio *const priv = iio_priv(indio_dev);
+-
+- switch (mask) {
+- case IIO_CHAN_INFO_HARDWAREGAIN:
+- /* Only four gain states (x1, x2, x4, x8) */
+- switch (val) {
+- case 1:
+- outb(0, priv->base + 11);
+- break;
+- case 2:
+- outb(1, priv->base + 11);
+- break;
+- case 4:
+- outb(2, priv->base + 11);
+- break;
+- case 8:
+- outb(3, priv->base + 11);
+- break;
+- default:
+- return -EINVAL;
+- }
+-
+- return 0;
+- case IIO_CHAN_INFO_RAW:
+- if (chan->output) {
+- /* DAC can only accept up to a 16-bit value */
+- if ((unsigned int)val > 65535)
+- return -EINVAL;
+-
+- priv->chan_out_states[chan->channel] = val;
+- outw(val, priv->base + 4 + 2 * chan->channel);
+-
+- return 0;
+- }
+- return -EINVAL;
+- }
+-
+- return -EINVAL;
+-}
+-
+-static const struct iio_info stx104_info = {
+- .read_raw = stx104_read_raw,
+- .write_raw = stx104_write_raw
+-};
+-
+-/* single-ended input channels configuration */
+-static const struct iio_chan_spec stx104_channels_sing[] = {
+- STX104_OUT_CHAN(0), STX104_OUT_CHAN(1),
+- STX104_IN_CHAN(0, 0), STX104_IN_CHAN(1, 0), STX104_IN_CHAN(2, 0),
+- STX104_IN_CHAN(3, 0), STX104_IN_CHAN(4, 0), STX104_IN_CHAN(5, 0),
+- STX104_IN_CHAN(6, 0), STX104_IN_CHAN(7, 0), STX104_IN_CHAN(8, 0),
+- STX104_IN_CHAN(9, 0), STX104_IN_CHAN(10, 0), STX104_IN_CHAN(11, 0),
+- STX104_IN_CHAN(12, 0), STX104_IN_CHAN(13, 0), STX104_IN_CHAN(14, 0),
+- STX104_IN_CHAN(15, 0)
+-};
+-/* differential input channels configuration */
+-static const struct iio_chan_spec stx104_channels_diff[] = {
+- STX104_OUT_CHAN(0), STX104_OUT_CHAN(1),
+- STX104_IN_CHAN(0, 1), STX104_IN_CHAN(1, 1), STX104_IN_CHAN(2, 1),
+- STX104_IN_CHAN(3, 1), STX104_IN_CHAN(4, 1), STX104_IN_CHAN(5, 1),
+- STX104_IN_CHAN(6, 1), STX104_IN_CHAN(7, 1)
+-};
+-
+-static int stx104_gpio_get_direction(struct gpio_chip *chip,
+- unsigned int offset)
+-{
+- /* GPIO 0-3 are input only, while the rest are output only */
+- if (offset < 4)
+- return 1;
+-
+- return 0;
+-}
+-
+-static int stx104_gpio_direction_input(struct gpio_chip *chip,
+- unsigned int offset)
+-{
+- if (offset >= 4)
+- return -EINVAL;
+-
+- return 0;
+-}
+-
+-static int stx104_gpio_direction_output(struct gpio_chip *chip,
+- unsigned int offset, int value)
+-{
+- if (offset < 4)
+- return -EINVAL;
+-
+- chip->set(chip, offset, value);
+- return 0;
+-}
+-
+-static int stx104_gpio_get(struct gpio_chip *chip, unsigned int offset)
+-{
+- struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
+-
+- if (offset >= 4)
+- return -EINVAL;
+-
+- return !!(inb(stx104gpio->base) & BIT(offset));
+-}
+-
+-static int stx104_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
+- unsigned long *bits)
+-{
+- struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
+-
+- *bits = inb(stx104gpio->base);
+-
+- return 0;
+-}
+-
+-static void stx104_gpio_set(struct gpio_chip *chip, unsigned int offset,
+- int value)
+-{
+- struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
+- const unsigned int mask = BIT(offset) >> 4;
+- unsigned long flags;
+-
+- if (offset < 4)
+- return;
+-
+- spin_lock_irqsave(&stx104gpio->lock, flags);
+-
+- if (value)
+- stx104gpio->out_state |= mask;
+- else
+- stx104gpio->out_state &= ~mask;
+-
+- outb(stx104gpio->out_state, stx104gpio->base);
+-
+- spin_unlock_irqrestore(&stx104gpio->lock, flags);
+-}
+-
+-#define STX104_NGPIO 8
+-static const char *stx104_names[STX104_NGPIO] = {
+- "DIN0", "DIN1", "DIN2", "DIN3", "DOUT0", "DOUT1", "DOUT2", "DOUT3"
+-};
+-
+-static void stx104_gpio_set_multiple(struct gpio_chip *chip,
+- unsigned long *mask, unsigned long *bits)
+-{
+- struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
+- unsigned long flags;
+-
+- /* verify masked GPIO are output */
+- if (!(*mask & 0xF0))
+- return;
+-
+- *mask >>= 4;
+- *bits >>= 4;
+-
+- spin_lock_irqsave(&stx104gpio->lock, flags);
+-
+- stx104gpio->out_state &= ~*mask;
+- stx104gpio->out_state |= *mask & *bits;
+- outb(stx104gpio->out_state, stx104gpio->base);
+-
+- spin_unlock_irqrestore(&stx104gpio->lock, flags);
+-}
+-
+-static int stx104_probe(struct device *dev, unsigned int id)
+-{
+- struct iio_dev *indio_dev;
+- struct stx104_iio *priv;
+- struct stx104_gpio *stx104gpio;
+- int err;
+-
+- indio_dev = devm_iio_device_alloc(dev, sizeof(*priv));
+- if (!indio_dev)
+- return -ENOMEM;
+-
+- stx104gpio = devm_kzalloc(dev, sizeof(*stx104gpio), GFP_KERNEL);
+- if (!stx104gpio)
+- return -ENOMEM;
+-
+- if (!devm_request_region(dev, base[id], STX104_EXTENT,
+- dev_name(dev))) {
+- dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n",
+- base[id], base[id] + STX104_EXTENT);
+- return -EBUSY;
+- }
+-
+- indio_dev->info = &stx104_info;
+- indio_dev->modes = INDIO_DIRECT_MODE;
+-
+- /* determine if differential inputs */
+- if (inb(base[id] + 8) & BIT(5)) {
+- indio_dev->num_channels = ARRAY_SIZE(stx104_channels_diff);
+- indio_dev->channels = stx104_channels_diff;
+- } else {
+- indio_dev->num_channels = ARRAY_SIZE(stx104_channels_sing);
+- indio_dev->channels = stx104_channels_sing;
+- }
+-
+- indio_dev->name = dev_name(dev);
+-
+- priv = iio_priv(indio_dev);
+- priv->base = base[id];
+-
+- /* configure device for software trigger operation */
+- outb(0, base[id] + 9);
+-
+- /* initialize gain setting to x1 */
+- outb(0, base[id] + 11);
+-
+- /* initialize DAC output to 0V */
+- outw(0, base[id] + 4);
+- outw(0, base[id] + 6);
+-
+- stx104gpio->chip.label = dev_name(dev);
+- stx104gpio->chip.parent = dev;
+- stx104gpio->chip.owner = THIS_MODULE;
+- stx104gpio->chip.base = -1;
+- stx104gpio->chip.ngpio = STX104_NGPIO;
+- stx104gpio->chip.names = stx104_names;
+- stx104gpio->chip.get_direction = stx104_gpio_get_direction;
+- stx104gpio->chip.direction_input = stx104_gpio_direction_input;
+- stx104gpio->chip.direction_output = stx104_gpio_direction_output;
+- stx104gpio->chip.get = stx104_gpio_get;
+- stx104gpio->chip.get_multiple = stx104_gpio_get_multiple;
+- stx104gpio->chip.set = stx104_gpio_set;
+- stx104gpio->chip.set_multiple = stx104_gpio_set_multiple;
+- stx104gpio->base = base[id] + 3;
+- stx104gpio->out_state = 0x0;
+-
+- spin_lock_init(&stx104gpio->lock);
+-
+- err = devm_gpiochip_add_data(dev, &stx104gpio->chip, stx104gpio);
+- if (err) {
+- dev_err(dev, "GPIO registering failed (%d)\n", err);
+- return err;
+- }
+-
+- return devm_iio_device_register(dev, indio_dev);
+-}
+-
+-static struct isa_driver stx104_driver = {
+- .probe = stx104_probe,
+- .driver = {
+- .name = "stx104"
+- },
+-};
+-
+-module_isa_driver(stx104_driver, num_stx104);
+-
+-MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
+-MODULE_DESCRIPTION("Apex Embedded Systems STX104 IIO driver");
+-MODULE_LICENSE("GPL v2");
+diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c
+index 16fc608db36a5..bd48b073e7200 100644
+--- a/drivers/iio/adc/ti-adc081c.c
++++ b/drivers/iio/adc/ti-adc081c.c
+@@ -19,6 +19,7 @@
+ #include <linux/i2c.h>
+ #include <linux/module.h>
+ #include <linux/mod_devicetable.h>
++#include <linux/property.h>
+
+ #include <linux/iio/iio.h>
+ #include <linux/iio/buffer.h>
+@@ -156,13 +157,16 @@ static int adc081c_probe(struct i2c_client *client,
+ {
+ struct iio_dev *iio;
+ struct adc081c *adc;
+- struct adcxx1c_model *model;
++ const struct adcxx1c_model *model;
+ int err;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA))
+ return -EOPNOTSUPP;
+
+- model = &adcxx1c_models[id->driver_data];
++ if (dev_fwnode(&client->dev))
++ model = device_get_match_data(&client->dev);
++ else
++ model = &adcxx1c_models[id->driver_data];
+
+ iio = devm_iio_device_alloc(&client->dev, sizeof(*adc));
+ if (!iio)
+@@ -210,10 +214,17 @@ static const struct i2c_device_id adc081c_id[] = {
+ };
+ MODULE_DEVICE_TABLE(i2c, adc081c_id);
+
++static const struct acpi_device_id adc081c_acpi_match[] = {
++ /* Used on some AAEON boards */
++ { "ADC081C", (kernel_ulong_t)&adcxx1c_models[ADC081C] },
++ { }
++};
++MODULE_DEVICE_TABLE(acpi, adc081c_acpi_match);
++
+ static const struct of_device_id adc081c_of_match[] = {
+- { .compatible = "ti,adc081c" },
+- { .compatible = "ti,adc101c" },
+- { .compatible = "ti,adc121c" },
++ { .compatible = "ti,adc081c", .data = &adcxx1c_models[ADC081C] },
++ { .compatible = "ti,adc101c", .data = &adcxx1c_models[ADC101C] },
++ { .compatible = "ti,adc121c", .data = &adcxx1c_models[ADC121C] },
+ { }
+ };
+ MODULE_DEVICE_TABLE(of, adc081c_of_match);
+@@ -222,6 +233,7 @@ static struct i2c_driver adc081c_driver = {
+ .driver = {
+ .name = "adc081c",
+ .of_match_table = adc081c_of_match,
++ .acpi_match_table = adc081c_acpi_match,
+ },
+ .probe = adc081c_probe,
+ .id_table = adc081c_id,
+diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c
+index 83c1ae07b3e9a..8618ae7bc0671 100644
+--- a/drivers/iio/adc/ti-adc128s052.c
++++ b/drivers/iio/adc/ti-adc128s052.c
+@@ -193,13 +193,13 @@ static int adc128_remove(struct spi_device *spi)
+ }
+
+ static const struct of_device_id adc128_of_match[] = {
+- { .compatible = "ti,adc128s052", },
+- { .compatible = "ti,adc122s021", },
+- { .compatible = "ti,adc122s051", },
+- { .compatible = "ti,adc122s101", },
+- { .compatible = "ti,adc124s021", },
+- { .compatible = "ti,adc124s051", },
+- { .compatible = "ti,adc124s101", },
++ { .compatible = "ti,adc128s052", .data = (void*)0L, },
++ { .compatible = "ti,adc122s021", .data = (void*)1L, },
++ { .compatible = "ti,adc122s051", .data = (void*)1L, },
++ { .compatible = "ti,adc122s101", .data = (void*)1L, },
++ { .compatible = "ti,adc124s021", .data = (void*)2L, },
++ { .compatible = "ti,adc124s051", .data = (void*)2L, },
++ { .compatible = "ti,adc124s101", .data = (void*)2L, },
+ { /* sentinel */ },
+ };
+ MODULE_DEVICE_TABLE(of, adc128_of_match);
+diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c
+index 0c2025a225750..80a09817c1194 100644
+--- a/drivers/iio/adc/ti-ads131e08.c
++++ b/drivers/iio/adc/ti-ads131e08.c
+@@ -739,7 +739,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev)
+ device_for_each_child_node(dev, node) {
+ ret = fwnode_property_read_u32(node, "reg", &channel);
+ if (ret)
+- return ret;
++ goto err_child_out;
+
+ ret = fwnode_property_read_u32(node, "ti,gain", &tmp);
+ if (ret) {
+@@ -747,7 +747,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev)
+ } else {
+ ret = ads131e08_pga_gain_to_field_value(st, tmp);
+ if (ret < 0)
+- return ret;
++ goto err_child_out;
+
+ channel_config[i].pga_gain = tmp;
+ }
+@@ -758,7 +758,7 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev)
+ } else {
+ ret = ads131e08_validate_channel_mux(st, tmp);
+ if (ret)
+- return ret;
++ goto err_child_out;
+
+ channel_config[i].mux = tmp;
+ }
+@@ -784,6 +784,10 @@ static int ads131e08_alloc_channels(struct iio_dev *indio_dev)
+ st->channel_config = channel_config;
+
+ return 0;
++
++err_child_out:
++ fwnode_handle_put(node);
++ return ret;
+ }
+
+ static void ads131e08_regulator_disable(void *data)
+diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c
+index a2b83f0bd5260..d4583b76f1fe3 100644
+--- a/drivers/iio/adc/ti-ads7950.c
++++ b/drivers/iio/adc/ti-ads7950.c
+@@ -634,6 +634,7 @@ static int ti_ads7950_probe(struct spi_device *spi)
+ st->chip.label = dev_name(&st->spi->dev);
+ st->chip.parent = &st->spi->dev;
+ st->chip.owner = THIS_MODULE;
++ st->chip.can_sleep = true;
+ st->chip.base = -1;
+ st->chip.ngpio = TI_ADS7950_NUM_GPIOS;
+ st->chip.get_direction = ti_ads7950_get_direction;
+diff --git a/drivers/iio/adc/ti-tsc2046.c b/drivers/iio/adc/ti-tsc2046.c
+index 170950d5dd499..e8fc4d01f30b6 100644
+--- a/drivers/iio/adc/ti-tsc2046.c
++++ b/drivers/iio/adc/ti-tsc2046.c
+@@ -388,7 +388,7 @@ static int tsc2046_adc_update_scan_mode(struct iio_dev *indio_dev,
+ mutex_lock(&priv->slock);
+
+ size = 0;
+- for_each_set_bit(ch_idx, active_scan_mask, indio_dev->num_channels) {
++ for_each_set_bit(ch_idx, active_scan_mask, ARRAY_SIZE(priv->l)) {
+ size += tsc2046_adc_group_set_layout(priv, group, ch_idx);
+ tsc2046_adc_group_set_cmd(priv, group, ch_idx);
+ group++;
+@@ -398,7 +398,7 @@ static int tsc2046_adc_update_scan_mode(struct iio_dev *indio_dev,
+ priv->xfer.len = size;
+ priv->time_per_scan_us = size * 8 * priv->time_per_bit_ns / NSEC_PER_USEC;
+
+- if (priv->scan_interval_us > priv->time_per_scan_us)
++ if (priv->scan_interval_us < priv->time_per_scan_us)
+ dev_warn(&priv->spi->dev, "The scan interval (%d) is less then calculated scan time (%d)\n",
+ priv->scan_interval_us, priv->time_per_scan_us);
+
+@@ -548,7 +548,7 @@ static int tsc2046_adc_setup_spi_msg(struct tsc2046_adc_priv *priv)
+ * enabled.
+ */
+ size = 0;
+- for (ch_idx = 0; ch_idx < priv->dcfg->num_channels; ch_idx++)
++ for (ch_idx = 0; ch_idx < ARRAY_SIZE(priv->l); ch_idx++)
+ size += tsc2046_adc_group_set_layout(priv, ch_idx, ch_idx);
+
+ priv->tx = devm_kzalloc(&priv->spi->dev, size, GFP_KERNEL);
+diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c
+index c6416ad795ca4..024bdc1ef77e6 100644
+--- a/drivers/iio/adc/twl6030-gpadc.c
++++ b/drivers/iio/adc/twl6030-gpadc.c
+@@ -57,6 +57,18 @@
+ #define TWL6030_GPADCS BIT(1)
+ #define TWL6030_GPADCR BIT(0)
+
++#define USB_VBUS_CTRL_SET 0x04
++#define USB_ID_CTRL_SET 0x06
++
++#define TWL6030_MISC1 0xE4
++#define VBUS_MEAS 0x01
++#define ID_MEAS 0x01
++
++#define VAC_MEAS 0x04
++#define VBAT_MEAS 0x02
++#define BB_MEAS 0x01
++
++
+ /**
+ * struct twl6030_chnl_calib - channel calibration
+ * @gain: slope coefficient for ideal curve
+@@ -911,6 +923,8 @@ static int twl6030_gpadc_probe(struct platform_device *pdev)
+ ret = devm_request_threaded_irq(dev, irq, NULL,
+ twl6030_gpadc_irq_handler,
+ IRQF_ONESHOT, "twl6030_gpadc", indio_dev);
++ if (ret)
++ return ret;
+
+ ret = twl6030_gpadc_enable_irq(TWL6030_GPADC_RT_SW1_EOC_MASK);
+ if (ret < 0) {
+@@ -925,6 +939,26 @@ static int twl6030_gpadc_probe(struct platform_device *pdev)
+ return ret;
+ }
+
++ ret = twl_i2c_write_u8(TWL_MODULE_USB, VBUS_MEAS, USB_VBUS_CTRL_SET);
++ if (ret < 0) {
++ dev_err(dev, "failed to wire up inputs\n");
++ return ret;
++ }
++
++ ret = twl_i2c_write_u8(TWL_MODULE_USB, ID_MEAS, USB_ID_CTRL_SET);
++ if (ret < 0) {
++ dev_err(dev, "failed to wire up inputs\n");
++ return ret;
++ }
++
++ ret = twl_i2c_write_u8(TWL6030_MODULE_ID0,
++ VBAT_MEAS | BB_MEAS | VAC_MEAS,
++ TWL6030_MISC1);
++ if (ret < 0) {
++ dev_err(dev, "failed to wire up inputs\n");
++ return ret;
++ }
++
+ indio_dev->name = DRIVER_NAME;
+ indio_dev->info = &twl6030_gpadc_iio_info;
+ indio_dev->modes = INDIO_DIRECT_MODE;
+diff --git a/drivers/iio/addac/Kconfig b/drivers/iio/addac/Kconfig
+new file mode 100644
+index 0000000000000..1f598670e84fb
+--- /dev/null
++++ b/drivers/iio/addac/Kconfig
+@@ -0,0 +1,24 @@
++#
++# ADC DAC drivers
++#
++# When adding new entries keep the list in alphabetical order
++
++menu "Analog to digital and digital to analog converters"
++
++config STX104
++ tristate "Apex Embedded Systems STX104 driver"
++ depends on PC104 && X86
++ select ISA_BUS_API
++ select GPIOLIB
++ help
++ Say yes here to build support for the Apex Embedded Systems STX104
++ integrated analog PC/104 card.
++
++ This driver supports the 16 channels of single-ended (8 channels of
++ differential) analog inputs, 2 channels of analog output, 4 digital
++ inputs, and 4 digital outputs provided by the STX104.
++
++ The base port addresses for the devices may be configured via the base
++ array module parameter.
++
++endmenu
+diff --git a/drivers/iio/addac/Makefile b/drivers/iio/addac/Makefile
+new file mode 100644
+index 0000000000000..8629145233544
+--- /dev/null
++++ b/drivers/iio/addac/Makefile
+@@ -0,0 +1,7 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Makefile for industrial I/O ADDAC drivers
++#
++
++# When adding new entries keep the list in alphabetical order
++obj-$(CONFIG_STX104) += stx104.o
+diff --git a/drivers/iio/addac/stx104.c b/drivers/iio/addac/stx104.c
+new file mode 100644
+index 0000000000000..b658a75d4e3a8
+--- /dev/null
++++ b/drivers/iio/addac/stx104.c
+@@ -0,0 +1,414 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * IIO driver for the Apex Embedded Systems STX104
++ * Copyright (C) 2016 William Breathitt Gray
++ */
++#include <linux/bitops.h>
++#include <linux/device.h>
++#include <linux/errno.h>
++#include <linux/gpio/driver.h>
++#include <linux/iio/iio.h>
++#include <linux/iio/types.h>
++#include <linux/io.h>
++#include <linux/ioport.h>
++#include <linux/isa.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/mutex.h>
++#include <linux/spinlock.h>
++#include <linux/types.h>
++
++#define STX104_OUT_CHAN(chan) { \
++ .type = IIO_VOLTAGE, \
++ .channel = chan, \
++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
++ .indexed = 1, \
++ .output = 1 \
++}
++#define STX104_IN_CHAN(chan, diff) { \
++ .type = IIO_VOLTAGE, \
++ .channel = chan, \
++ .channel2 = chan, \
++ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_HARDWAREGAIN) | \
++ BIT(IIO_CHAN_INFO_OFFSET) | BIT(IIO_CHAN_INFO_SCALE), \
++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
++ .indexed = 1, \
++ .differential = diff \
++}
++
++#define STX104_NUM_OUT_CHAN 2
++
++#define STX104_EXTENT 16
++
++static unsigned int base[max_num_isa_dev(STX104_EXTENT)];
++static unsigned int num_stx104;
++module_param_hw_array(base, uint, ioport, &num_stx104, 0);
++MODULE_PARM_DESC(base, "Apex Embedded Systems STX104 base addresses");
++
++/**
++ * struct stx104_reg - device register structure
++ * @ssr_ad: Software Strobe Register and ADC Data
++ * @achan: ADC Channel
++ * @dio: Digital I/O
++ * @dac: DAC Channels
++ * @cir_asr: Clear Interrupts and ADC Status
++ * @acr: ADC Control
++ * @pccr_fsh: Pacer Clock Control and FIFO Status MSB
++ * @acfg: ADC Configuration
++ */
++struct stx104_reg {
++ u16 ssr_ad;
++ u8 achan;
++ u8 dio;
++ u16 dac[2];
++ u8 cir_asr;
++ u8 acr;
++ u8 pccr_fsh;
++ u8 acfg;
++};
++
++/**
++ * struct stx104_iio - IIO device private data structure
++ * @lock: synchronization lock to prevent I/O race conditions
++ * @chan_out_states: channels' output states
++ * @reg: I/O address offset for the device registers
++ */
++struct stx104_iio {
++ struct mutex lock;
++ unsigned int chan_out_states[STX104_NUM_OUT_CHAN];
++ struct stx104_reg __iomem *reg;
++};
++
++/**
++ * struct stx104_gpio - GPIO device private data structure
++ * @chip: instance of the gpio_chip
++ * @lock: synchronization lock to prevent I/O race conditions
++ * @base: base port address of the GPIO device
++ * @out_state: output bits state
++ */
++struct stx104_gpio {
++ struct gpio_chip chip;
++ spinlock_t lock;
++ u8 __iomem *base;
++ unsigned int out_state;
++};
++
++static int stx104_read_raw(struct iio_dev *indio_dev,
++ struct iio_chan_spec const *chan, int *val, int *val2, long mask)
++{
++ struct stx104_iio *const priv = iio_priv(indio_dev);
++ struct stx104_reg __iomem *const reg = priv->reg;
++ unsigned int adc_config;
++ int adbu;
++ int gain;
++
++ switch (mask) {
++ case IIO_CHAN_INFO_HARDWAREGAIN:
++ /* get gain configuration */
++ adc_config = ioread8(&reg->acfg);
++ gain = adc_config & 0x3;
++
++ *val = 1 << gain;
++ return IIO_VAL_INT;
++ case IIO_CHAN_INFO_RAW:
++ if (chan->output) {
++ *val = priv->chan_out_states[chan->channel];
++ return IIO_VAL_INT;
++ }
++
++ mutex_lock(&priv->lock);
++
++ /* select ADC channel */
++ iowrite8(chan->channel | (chan->channel << 4), &reg->achan);
++
++ /* trigger ADC sample capture by writing to the 8-bit
++ * Software Strobe Register and wait for completion
++ */
++ iowrite8(0, &reg->ssr_ad);
++ while (ioread8(&reg->cir_asr) & BIT(7));
++
++ *val = ioread16(&reg->ssr_ad);
++
++ mutex_unlock(&priv->lock);
++ return IIO_VAL_INT;
++ case IIO_CHAN_INFO_OFFSET:
++ /* get ADC bipolar/unipolar configuration */
++ adc_config = ioread8(&reg->acfg);
++ adbu = !(adc_config & BIT(2));
++
++ *val = -32768 * adbu;
++ return IIO_VAL_INT;
++ case IIO_CHAN_INFO_SCALE:
++ /* get ADC bipolar/unipolar and gain configuration */
++ adc_config = ioread8(&reg->acfg);
++ adbu = !(adc_config & BIT(2));
++ gain = adc_config & 0x3;
++
++ *val = 5;
++ *val2 = 15 - adbu + gain;
++ return IIO_VAL_FRACTIONAL_LOG2;
++ }
++
++ return -EINVAL;
++}
++
++static int stx104_write_raw(struct iio_dev *indio_dev,
++ struct iio_chan_spec const *chan, int val, int val2, long mask)
++{
++ struct stx104_iio *const priv = iio_priv(indio_dev);
++
++ switch (mask) {
++ case IIO_CHAN_INFO_HARDWAREGAIN:
++ /* Only four gain states (x1, x2, x4, x8) */
++ switch (val) {
++ case 1:
++ iowrite8(0, &priv->reg->acfg);
++ break;
++ case 2:
++ iowrite8(1, &priv->reg->acfg);
++ break;
++ case 4:
++ iowrite8(2, &priv->reg->acfg);
++ break;
++ case 8:
++ iowrite8(3, &priv->reg->acfg);
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ return 0;
++ case IIO_CHAN_INFO_RAW:
++ if (chan->output) {
++ /* DAC can only accept up to a 16-bit value */
++ if ((unsigned int)val > 65535)
++ return -EINVAL;
++
++ mutex_lock(&priv->lock);
++
++ priv->chan_out_states[chan->channel] = val;
++ iowrite16(val, &priv->reg->dac[chan->channel]);
++
++ mutex_unlock(&priv->lock);
++ return 0;
++ }
++ return -EINVAL;
++ }
++
++ return -EINVAL;
++}
++
++static const struct iio_info stx104_info = {
++ .read_raw = stx104_read_raw,
++ .write_raw = stx104_write_raw
++};
++
++/* single-ended input channels configuration */
++static const struct iio_chan_spec stx104_channels_sing[] = {
++ STX104_OUT_CHAN(0), STX104_OUT_CHAN(1),
++ STX104_IN_CHAN(0, 0), STX104_IN_CHAN(1, 0), STX104_IN_CHAN(2, 0),
++ STX104_IN_CHAN(3, 0), STX104_IN_CHAN(4, 0), STX104_IN_CHAN(5, 0),
++ STX104_IN_CHAN(6, 0), STX104_IN_CHAN(7, 0), STX104_IN_CHAN(8, 0),
++ STX104_IN_CHAN(9, 0), STX104_IN_CHAN(10, 0), STX104_IN_CHAN(11, 0),
++ STX104_IN_CHAN(12, 0), STX104_IN_CHAN(13, 0), STX104_IN_CHAN(14, 0),
++ STX104_IN_CHAN(15, 0)
++};
++/* differential input channels configuration */
++static const struct iio_chan_spec stx104_channels_diff[] = {
++ STX104_OUT_CHAN(0), STX104_OUT_CHAN(1),
++ STX104_IN_CHAN(0, 1), STX104_IN_CHAN(1, 1), STX104_IN_CHAN(2, 1),
++ STX104_IN_CHAN(3, 1), STX104_IN_CHAN(4, 1), STX104_IN_CHAN(5, 1),
++ STX104_IN_CHAN(6, 1), STX104_IN_CHAN(7, 1)
++};
++
++static int stx104_gpio_get_direction(struct gpio_chip *chip,
++ unsigned int offset)
++{
++ /* GPIO 0-3 are input only, while the rest are output only */
++ if (offset < 4)
++ return 1;
++
++ return 0;
++}
++
++static int stx104_gpio_direction_input(struct gpio_chip *chip,
++ unsigned int offset)
++{
++ if (offset >= 4)
++ return -EINVAL;
++
++ return 0;
++}
++
++static int stx104_gpio_direction_output(struct gpio_chip *chip,
++ unsigned int offset, int value)
++{
++ if (offset < 4)
++ return -EINVAL;
++
++ chip->set(chip, offset, value);
++ return 0;
++}
++
++static int stx104_gpio_get(struct gpio_chip *chip, unsigned int offset)
++{
++ struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
++
++ if (offset >= 4)
++ return -EINVAL;
++
++ return !!(ioread8(stx104gpio->base) & BIT(offset));
++}
++
++static int stx104_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
++ unsigned long *bits)
++{
++ struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
++
++ *bits = ioread8(stx104gpio->base);
++
++ return 0;
++}
++
++static void stx104_gpio_set(struct gpio_chip *chip, unsigned int offset,
++ int value)
++{
++ struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
++ const unsigned int mask = BIT(offset) >> 4;
++ unsigned long flags;
++
++ if (offset < 4)
++ return;
++
++ spin_lock_irqsave(&stx104gpio->lock, flags);
++
++ if (value)
++ stx104gpio->out_state |= mask;
++ else
++ stx104gpio->out_state &= ~mask;
++
++ iowrite8(stx104gpio->out_state, stx104gpio->base);
++
++ spin_unlock_irqrestore(&stx104gpio->lock, flags);
++}
++
++#define STX104_NGPIO 8
++static const char *stx104_names[STX104_NGPIO] = {
++ "DIN0", "DIN1", "DIN2", "DIN3", "DOUT0", "DOUT1", "DOUT2", "DOUT3"
++};
++
++static void stx104_gpio_set_multiple(struct gpio_chip *chip,
++ unsigned long *mask, unsigned long *bits)
++{
++ struct stx104_gpio *const stx104gpio = gpiochip_get_data(chip);
++ unsigned long flags;
++
++ /* verify masked GPIO are output */
++ if (!(*mask & 0xF0))
++ return;
++
++ *mask >>= 4;
++ *bits >>= 4;
++
++ spin_lock_irqsave(&stx104gpio->lock, flags);
++
++ stx104gpio->out_state &= ~*mask;
++ stx104gpio->out_state |= *mask & *bits;
++ iowrite8(stx104gpio->out_state, stx104gpio->base);
++
++ spin_unlock_irqrestore(&stx104gpio->lock, flags);
++}
++
++static int stx104_probe(struct device *dev, unsigned int id)
++{
++ struct iio_dev *indio_dev;
++ struct stx104_iio *priv;
++ struct stx104_gpio *stx104gpio;
++ int err;
++
++ indio_dev = devm_iio_device_alloc(dev, sizeof(*priv));
++ if (!indio_dev)
++ return -ENOMEM;
++
++ stx104gpio = devm_kzalloc(dev, sizeof(*stx104gpio), GFP_KERNEL);
++ if (!stx104gpio)
++ return -ENOMEM;
++
++ if (!devm_request_region(dev, base[id], STX104_EXTENT,
++ dev_name(dev))) {
++ dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n",
++ base[id], base[id] + STX104_EXTENT);
++ return -EBUSY;
++ }
++
++ priv = iio_priv(indio_dev);
++ priv->reg = devm_ioport_map(dev, base[id], STX104_EXTENT);
++ if (!priv->reg)
++ return -ENOMEM;
++
++ indio_dev->info = &stx104_info;
++ indio_dev->modes = INDIO_DIRECT_MODE;
++
++ /* determine if differential inputs */
++ if (ioread8(&priv->reg->cir_asr) & BIT(5)) {
++ indio_dev->num_channels = ARRAY_SIZE(stx104_channels_diff);
++ indio_dev->channels = stx104_channels_diff;
++ } else {
++ indio_dev->num_channels = ARRAY_SIZE(stx104_channels_sing);
++ indio_dev->channels = stx104_channels_sing;
++ }
++
++ indio_dev->name = dev_name(dev);
++
++ mutex_init(&priv->lock);
++
++ /* configure device for software trigger operation */
++ iowrite8(0, &priv->reg->acr);
++
++ /* initialize gain setting to x1 */
++ iowrite8(0, &priv->reg->acfg);
++
++ /* initialize DAC output to 0V */
++ iowrite16(0, &priv->reg->dac[0]);
++ iowrite16(0, &priv->reg->dac[1]);
++
++ stx104gpio->chip.label = dev_name(dev);
++ stx104gpio->chip.parent = dev;
++ stx104gpio->chip.owner = THIS_MODULE;
++ stx104gpio->chip.base = -1;
++ stx104gpio->chip.ngpio = STX104_NGPIO;
++ stx104gpio->chip.names = stx104_names;
++ stx104gpio->chip.get_direction = stx104_gpio_get_direction;
++ stx104gpio->chip.direction_input = stx104_gpio_direction_input;
++ stx104gpio->chip.direction_output = stx104_gpio_direction_output;
++ stx104gpio->chip.get = stx104_gpio_get;
++ stx104gpio->chip.get_multiple = stx104_gpio_get_multiple;
++ stx104gpio->chip.set = stx104_gpio_set;
++ stx104gpio->chip.set_multiple = stx104_gpio_set_multiple;
++ stx104gpio->base = &priv->reg->dio;
++ stx104gpio->out_state = 0x0;
++
++ spin_lock_init(&stx104gpio->lock);
++
++ err = devm_gpiochip_add_data(dev, &stx104gpio->chip, stx104gpio);
++ if (err) {
++ dev_err(dev, "GPIO registering failed (%d)\n", err);
++ return err;
++ }
++
++ return devm_iio_device_register(dev, indio_dev);
++}
++
++static struct isa_driver stx104_driver = {
++ .probe = stx104_probe,
++ .driver = {
++ .name = "stx104"
++ },
++};
++
++module_isa_driver(stx104_driver, num_stx104);
++
++MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
++MODULE_DESCRIPTION("Apex Embedded Systems STX104 IIO driver");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c
+index 774eb3044edd8..cc28713b0dc8b 100644
+--- a/drivers/iio/afe/iio-rescale.c
++++ b/drivers/iio/afe/iio-rescale.c
+@@ -39,7 +39,7 @@ static int rescale_read_raw(struct iio_dev *indio_dev,
+ int *val, int *val2, long mask)
+ {
+ struct rescale *rescale = iio_priv(indio_dev);
+- unsigned long long tmp;
++ s64 tmp;
+ int ret;
+
+ switch (mask) {
+@@ -77,10 +77,10 @@ static int rescale_read_raw(struct iio_dev *indio_dev,
+ *val2 = rescale->denominator;
+ return IIO_VAL_FRACTIONAL;
+ case IIO_VAL_FRACTIONAL_LOG2:
+- tmp = *val * 1000000000LL;
+- do_div(tmp, rescale->denominator);
++ tmp = (s64)*val * 1000000000LL;
++ tmp = div_s64(tmp, rescale->denominator);
+ tmp *= rescale->numerator;
+- do_div(tmp, 1000000000LL);
++ tmp = div_s64(tmp, 1000000000LL);
+ *val = tmp;
+ return ret;
+ default:
+@@ -148,7 +148,7 @@ static int rescale_configure_channel(struct device *dev,
+ chan->ext_info = rescale->ext_info;
+ chan->type = rescale->cfg->type;
+
+- if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) ||
++ if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) &&
+ iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) {
+ dev_info(dev, "using raw+scale source channel\n");
+ } else if (iio_channel_has_info(schan, IIO_CHAN_INFO_PROCESSED)) {
+diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c
+index 847194fa1e464..80ef1aa9aae3b 100644
+--- a/drivers/iio/chemical/ccs811.c
++++ b/drivers/iio/chemical/ccs811.c
+@@ -499,11 +499,11 @@ static int ccs811_probe(struct i2c_client *client,
+
+ data->drdy_trig->ops = &ccs811_trigger_ops;
+ iio_trigger_set_drvdata(data->drdy_trig, indio_dev);
+- indio_dev->trig = data->drdy_trig;
+- iio_trigger_get(indio_dev->trig);
+ ret = iio_trigger_register(data->drdy_trig);
+ if (ret)
+ goto err_poweroff;
++
++ indio_dev->trig = iio_trigger_get(data->drdy_trig);
+ }
+
+ ret = iio_triggered_buffer_setup(indio_dev, NULL,
+diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c b/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c
+index af801e203623e..02d3cf36acb0c 100644
+--- a/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c
++++ b/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c
+@@ -97,7 +97,7 @@ static int cros_ec_lid_angle_probe(struct platform_device *pdev)
+ if (!indio_dev)
+ return -ENOMEM;
+
+- ret = cros_ec_sensors_core_init(pdev, indio_dev, false, NULL, NULL);
++ ret = cros_ec_sensors_core_init(pdev, indio_dev, false, NULL);
+ if (ret)
+ return ret;
+
+@@ -113,7 +113,7 @@ static int cros_ec_lid_angle_probe(struct platform_device *pdev)
+ if (ret)
+ return ret;
+
+- return devm_iio_device_register(dev, indio_dev);
++ return cros_ec_sensors_core_register(dev, indio_dev, NULL);
+ }
+
+ static const struct platform_device_id cros_ec_lid_angle_ids[] = {
+diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+index 376a5b30010ae..5cce34fdff022 100644
+--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
++++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+@@ -235,8 +235,7 @@ static int cros_ec_sensors_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ ret = cros_ec_sensors_core_init(pdev, indio_dev, true,
+- cros_ec_sensors_capture,
+- cros_ec_sensors_push_data);
++ cros_ec_sensors_capture);
+ if (ret)
+ return ret;
+
+@@ -297,7 +296,8 @@ static int cros_ec_sensors_probe(struct platform_device *pdev)
+ else
+ state->core.read_ec_sensors_data = cros_ec_sensors_read_cmd;
+
+- return devm_iio_device_register(dev, indio_dev);
++ return cros_ec_sensors_core_register(dev, indio_dev,
++ cros_ec_sensors_push_data);
+ }
+
+ static const struct platform_device_id cros_ec_sensors_ids[] = {
+diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+index 28bde13003b74..f529c01ac66b2 100644
+--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
++++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+@@ -234,21 +234,18 @@ static void cros_ec_sensors_core_clean(void *arg)
+
+ /**
+ * cros_ec_sensors_core_init() - basic initialization of the core structure
+- * @pdev: platform device created for the sensors
++ * @pdev: platform device created for the sensor
+ * @indio_dev: iio device structure of the device
+ * @physical_device: true if the device refers to a physical device
+ * @trigger_capture: function pointer to call buffer is triggered,
+ * for backward compatibility.
+- * @push_data: function to call when cros_ec_sensorhub receives
+- * a sample for that sensor.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+ int cros_ec_sensors_core_init(struct platform_device *pdev,
+ struct iio_dev *indio_dev,
+ bool physical_device,
+- cros_ec_sensors_capture_t trigger_capture,
+- cros_ec_sensorhub_push_data_cb_t push_data)
++ cros_ec_sensors_capture_t trigger_capture)
+ {
+ struct device *dev = &pdev->dev;
+ struct cros_ec_sensors_core_state *state = iio_priv(indio_dev);
+@@ -262,7 +259,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
+ platform_set_drvdata(pdev, indio_dev);
+
+ state->ec = ec->ec_dev;
+- state->msg = devm_kzalloc(&pdev->dev,
++ state->msg = devm_kzalloc(&pdev->dev, sizeof(*state->msg) +
+ max((u16)sizeof(struct ec_params_motion_sense),
+ state->ec->max_response), GFP_KERNEL);
+ if (!state->msg)
+@@ -339,17 +336,6 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
+ if (ret)
+ return ret;
+
+- ret = cros_ec_sensorhub_register_push_data(
+- sensor_hub, sensor_platform->sensor_num,
+- indio_dev, push_data);
+- if (ret)
+- return ret;
+-
+- ret = devm_add_action_or_reset(
+- dev, cros_ec_sensors_core_clean, pdev);
+- if (ret)
+- return ret;
+-
+ /* Timestamp coming from FIFO are in ns since boot. */
+ ret = iio_device_set_clock(indio_dev, CLOCK_BOOTTIME);
+ if (ret)
+@@ -371,6 +357,46 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
+ }
+ EXPORT_SYMBOL_GPL(cros_ec_sensors_core_init);
+
++/**
++ * cros_ec_sensors_core_register() - Register callback to FIFO and IIO when
++ * sensor is ready.
++ * It must be called at the end of the sensor probe routine.
++ * @dev: device created for the sensor
++ * @indio_dev: iio device structure of the device
++ * @push_data: function to call when cros_ec_sensorhub receives
++ * a sample for that sensor.
++ *
++ * Return: 0 on success, -errno on failure.
++ */
++int cros_ec_sensors_core_register(struct device *dev,
++ struct iio_dev *indio_dev,
++ cros_ec_sensorhub_push_data_cb_t push_data)
++{
++ struct cros_ec_sensor_platform *sensor_platform = dev_get_platdata(dev);
++ struct cros_ec_sensorhub *sensor_hub = dev_get_drvdata(dev->parent);
++ struct platform_device *pdev = to_platform_device(dev);
++ struct cros_ec_dev *ec = sensor_hub->ec;
++ int ret;
++
++ ret = devm_iio_device_register(dev, indio_dev);
++ if (ret)
++ return ret;
++
++ if (!push_data ||
++ !cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO))
++ return 0;
++
++ ret = cros_ec_sensorhub_register_push_data(
++ sensor_hub, sensor_platform->sensor_num,
++ indio_dev, push_data);
++ if (ret)
++ return ret;
++
++ return devm_add_action_or_reset(
++ dev, cros_ec_sensors_core_clean, pdev);
++}
++EXPORT_SYMBOL_GPL(cros_ec_sensors_core_register);
++
+ /**
+ * cros_ec_motion_send_host_cmd() - send motion sense host command
+ * @state: pointer to state information for device
+diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
+index 0bbb090b108c7..aff981551617e 100644
+--- a/drivers/iio/common/st_sensors/st_sensors_core.c
++++ b/drivers/iio/common/st_sensors/st_sensors_core.c
+@@ -71,16 +71,18 @@ st_sensors_match_odr_error:
+
+ int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr)
+ {
+- int err;
++ int err = 0;
+ struct st_sensor_odr_avl odr_out = {0, 0};
+ struct st_sensor_data *sdata = iio_priv(indio_dev);
+
++ mutex_lock(&sdata->odr_lock);
++
+ if (!sdata->sensor_settings->odr.mask)
+- return 0;
++ goto unlock_mutex;
+
+ err = st_sensors_match_odr(sdata->sensor_settings, odr, &odr_out);
+ if (err < 0)
+- goto st_sensors_match_odr_error;
++ goto unlock_mutex;
+
+ if ((sdata->sensor_settings->odr.addr ==
+ sdata->sensor_settings->pw.addr) &&
+@@ -103,7 +105,9 @@ int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr)
+ if (err >= 0)
+ sdata->odr = odr_out.hz;
+
+-st_sensors_match_odr_error:
++unlock_mutex:
++ mutex_unlock(&sdata->odr_lock);
++
+ return err;
+ }
+ EXPORT_SYMBOL(st_sensors_set_odr);
+@@ -365,6 +369,8 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
+ struct st_sensors_platform_data *of_pdata;
+ int err = 0;
+
++ mutex_init(&sdata->odr_lock);
++
+ /* If OF/DT pdata exists, it will take precedence of anything else */
+ of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata);
+ if (IS_ERR(of_pdata))
+@@ -558,18 +564,24 @@ int st_sensors_read_info_raw(struct iio_dev *indio_dev,
+ err = -EBUSY;
+ goto out;
+ } else {
++ mutex_lock(&sdata->odr_lock);
+ err = st_sensors_set_enable(indio_dev, true);
+- if (err < 0)
++ if (err < 0) {
++ mutex_unlock(&sdata->odr_lock);
+ goto out;
++ }
+
+ msleep((sdata->sensor_settings->bootime * 1000) / sdata->odr);
+ err = st_sensors_read_axis_data(indio_dev, ch, val);
+- if (err < 0)
++ if (err < 0) {
++ mutex_unlock(&sdata->odr_lock);
+ goto out;
++ }
+
+ *val = *val >> ch->scan_type.shift;
+
+ err = st_sensors_set_enable(indio_dev, false);
++ mutex_unlock(&sdata->odr_lock);
+ }
+ out:
+ mutex_unlock(&indio_dev->mlock);
+diff --git a/drivers/iio/dac/Makefile b/drivers/iio/dac/Makefile
+index 33e16f14902a4..7974a1eff16c7 100644
+--- a/drivers/iio/dac/Makefile
++++ b/drivers/iio/dac/Makefile
+@@ -16,7 +16,7 @@ obj-$(CONFIG_AD5592R_BASE) += ad5592r-base.o
+ obj-$(CONFIG_AD5592R) += ad5592r.o
+ obj-$(CONFIG_AD5593R) += ad5593r.o
+ obj-$(CONFIG_AD5755) += ad5755.o
+-obj-$(CONFIG_AD5755) += ad5758.o
++obj-$(CONFIG_AD5758) += ad5758.o
+ obj-$(CONFIG_AD5761) += ad5761.o
+ obj-$(CONFIG_AD5764) += ad5764.o
+ obj-$(CONFIG_AD5766) += ad5766.o
+diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
+index 488ec69967d67..cafb8c7790154 100644
+--- a/drivers/iio/dac/ad5446.c
++++ b/drivers/iio/dac/ad5446.c
+@@ -178,7 +178,7 @@ static int ad5446_read_raw(struct iio_dev *indio_dev,
+
+ switch (m) {
+ case IIO_CHAN_INFO_RAW:
+- *val = st->cached_val;
++ *val = st->cached_val >> chan->scan_type.shift;
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_SCALE:
+ *val = st->vref_mv;
+@@ -531,8 +531,15 @@ static int ad5622_write(struct ad5446_state *st, unsigned val)
+ {
+ struct i2c_client *client = to_i2c_client(st->dev);
+ __be16 data = cpu_to_be16(val);
++ int ret;
++
++ ret = i2c_master_send(client, (char *)&data, sizeof(data));
++ if (ret < 0)
++ return ret;
++ if (ret != sizeof(data))
++ return -EIO;
+
+- return i2c_master_send(client, (char *)&data, sizeof(data));
++ return 0;
+ }
+
+ /*
+diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c
+index 0405e92b9e8c3..987264410278c 100644
+--- a/drivers/iio/dac/ad5592r-base.c
++++ b/drivers/iio/dac/ad5592r-base.c
+@@ -523,7 +523,7 @@ static int ad5592r_alloc_channels(struct iio_dev *iio_dev)
+ if (!ret)
+ st->channel_modes[reg] = tmp;
+
+- fwnode_property_read_u32(child, "adi,off-state", &tmp);
++ ret = fwnode_property_read_u32(child, "adi,off-state", &tmp);
+ if (!ret)
+ st->channel_offstate[reg] = tmp;
+ }
+diff --git a/drivers/iio/dac/ad5593r.c b/drivers/iio/dac/ad5593r.c
+index 5b4df36fdc2ad..4cc855c781218 100644
+--- a/drivers/iio/dac/ad5593r.c
++++ b/drivers/iio/dac/ad5593r.c
+@@ -13,6 +13,8 @@
+ #include <linux/module.h>
+ #include <linux/mod_devicetable.h>
+
++#include <asm/unaligned.h>
++
+ #define AD5593R_MODE_CONF (0 << 4)
+ #define AD5593R_MODE_DAC_WRITE (1 << 4)
+ #define AD5593R_MODE_ADC_READBACK (4 << 4)
+@@ -20,6 +22,24 @@
+ #define AD5593R_MODE_GPIO_READBACK (6 << 4)
+ #define AD5593R_MODE_REG_READBACK (7 << 4)
+
++static int ad5593r_read_word(struct i2c_client *i2c, u8 reg, u16 *value)
++{
++ int ret;
++ u8 buf[2];
++
++ ret = i2c_smbus_write_byte(i2c, reg);
++ if (ret < 0)
++ return ret;
++
++ ret = i2c_master_recv(i2c, buf, sizeof(buf));
++ if (ret < 0)
++ return ret;
++
++ *value = get_unaligned_be16(buf);
++
++ return 0;
++}
++
+ static int ad5593r_write_dac(struct ad5592r_state *st, unsigned chan, u16 value)
+ {
+ struct i2c_client *i2c = to_i2c_client(st->dev);
+@@ -38,13 +58,7 @@ static int ad5593r_read_adc(struct ad5592r_state *st, unsigned chan, u16 *value)
+ if (val < 0)
+ return (int) val;
+
+- val = i2c_smbus_read_word_swapped(i2c, AD5593R_MODE_ADC_READBACK);
+- if (val < 0)
+- return (int) val;
+-
+- *value = (u16) val;
+-
+- return 0;
++ return ad5593r_read_word(i2c, AD5593R_MODE_ADC_READBACK, value);
+ }
+
+ static int ad5593r_reg_write(struct ad5592r_state *st, u8 reg, u16 value)
+@@ -58,25 +72,19 @@ static int ad5593r_reg_write(struct ad5592r_state *st, u8 reg, u16 value)
+ static int ad5593r_reg_read(struct ad5592r_state *st, u8 reg, u16 *value)
+ {
+ struct i2c_client *i2c = to_i2c_client(st->dev);
+- s32 val;
+-
+- val = i2c_smbus_read_word_swapped(i2c, AD5593R_MODE_REG_READBACK | reg);
+- if (val < 0)
+- return (int) val;
+
+- *value = (u16) val;
+-
+- return 0;
++ return ad5593r_read_word(i2c, AD5593R_MODE_REG_READBACK | reg, value);
+ }
+
+ static int ad5593r_gpio_read(struct ad5592r_state *st, u8 *value)
+ {
+ struct i2c_client *i2c = to_i2c_client(st->dev);
+- s32 val;
++ u16 val;
++ int ret;
+
+- val = i2c_smbus_read_word_swapped(i2c, AD5593R_MODE_GPIO_READBACK);
+- if (val < 0)
+- return (int) val;
++ ret = ad5593r_read_word(i2c, AD5593R_MODE_GPIO_READBACK, &val);
++ if (ret)
++ return ret;
+
+ *value = (u8) val;
+
+diff --git a/drivers/iio/dac/ad5766.c b/drivers/iio/dac/ad5766.c
+index 3104ec32dfaca..dafda84fdea35 100644
+--- a/drivers/iio/dac/ad5766.c
++++ b/drivers/iio/dac/ad5766.c
+@@ -503,13 +503,13 @@ static int ad5766_get_output_range(struct ad5766_state *st)
+ int i, ret, min, max, tmp[2];
+
+ ret = device_property_read_u32_array(&st->spi->dev,
+- "output-range-voltage",
++ "output-range-microvolts",
+ tmp, 2);
+ if (ret)
+ return ret;
+
+- min = tmp[0] / 1000;
+- max = tmp[1] / 1000;
++ min = tmp[0] / 1000000;
++ max = tmp[1] / 1000000;
+ for (i = 0; i < ARRAY_SIZE(ad5766_span_tbl); i++) {
+ if (ad5766_span_tbl[i].min != min ||
+ ad5766_span_tbl[i].max != max)
+diff --git a/drivers/iio/dac/ad5770r.c b/drivers/iio/dac/ad5770r.c
+index 8107f7bbbe3c5..7e2fd32e993a6 100644
+--- a/drivers/iio/dac/ad5770r.c
++++ b/drivers/iio/dac/ad5770r.c
+@@ -522,7 +522,7 @@ static int ad5770r_channel_config(struct ad5770r_state *st)
+ return -EINVAL;
+
+ device_for_each_child_node(&st->spi->dev, child) {
+- ret = fwnode_property_read_u32(child, "num", &num);
++ ret = fwnode_property_read_u32(child, "reg", &num);
+ if (ret)
+ goto err_child_out;
+ if (num >= AD5770R_MAX_CHANNELS) {
+diff --git a/drivers/iio/dac/cio-dac.c b/drivers/iio/dac/cio-dac.c
+index 95813569f3940..77a6916b3d6c6 100644
+--- a/drivers/iio/dac/cio-dac.c
++++ b/drivers/iio/dac/cio-dac.c
+@@ -66,8 +66,8 @@ static int cio_dac_write_raw(struct iio_dev *indio_dev,
+ if (mask != IIO_CHAN_INFO_RAW)
+ return -EINVAL;
+
+- /* DAC can only accept up to a 16-bit value */
+- if ((unsigned int)val > 65535)
++ /* DAC can only accept up to a 12-bit value */
++ if ((unsigned int)val > 4095)
+ return -EINVAL;
+
+ priv->chan_out_states[chan->channel] = val;
+diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
+index 34b14aafb6301..795b5b9e67726 100644
+--- a/drivers/iio/dac/mcp4725.c
++++ b/drivers/iio/dac/mcp4725.c
+@@ -47,12 +47,18 @@ static int __maybe_unused mcp4725_suspend(struct device *dev)
+ struct mcp4725_data *data = iio_priv(i2c_get_clientdata(
+ to_i2c_client(dev)));
+ u8 outbuf[2];
++ int ret;
+
+ outbuf[0] = (data->powerdown_mode + 1) << 4;
+ outbuf[1] = 0;
+ data->powerdown = true;
+
+- return i2c_master_send(data->client, outbuf, 2);
++ ret = i2c_master_send(data->client, outbuf, 2);
++ if (ret < 0)
++ return ret;
++ else if (ret != 2)
++ return -EIO;
++ return 0;
+ }
+
+ static int __maybe_unused mcp4725_resume(struct device *dev)
+@@ -60,13 +66,19 @@ static int __maybe_unused mcp4725_resume(struct device *dev)
+ struct mcp4725_data *data = iio_priv(i2c_get_clientdata(
+ to_i2c_client(dev)));
+ u8 outbuf[2];
++ int ret;
+
+ /* restore previous DAC value */
+ outbuf[0] = (data->dac_value >> 8) & 0xf;
+ outbuf[1] = data->dac_value & 0xff;
+ data->powerdown = false;
+
+- return i2c_master_send(data->client, outbuf, 2);
++ ret = i2c_master_send(data->client, outbuf, 2);
++ if (ret < 0)
++ return ret;
++ else if (ret != 2)
++ return -EIO;
++ return 0;
+ }
+ static SIMPLE_DEV_PM_OPS(mcp4725_pm_ops, mcp4725_suspend, mcp4725_resume);
+
+diff --git a/drivers/iio/dummy/iio_simple_dummy.c b/drivers/iio/dummy/iio_simple_dummy.c
+index c0b7ef9007354..c24f609c2ade6 100644
+--- a/drivers/iio/dummy/iio_simple_dummy.c
++++ b/drivers/iio/dummy/iio_simple_dummy.c
+@@ -575,10 +575,9 @@ static struct iio_sw_device *iio_dummy_probe(const char *name)
+ */
+
+ swd = kzalloc(sizeof(*swd), GFP_KERNEL);
+- if (!swd) {
+- ret = -ENOMEM;
+- goto error_kzalloc;
+- }
++ if (!swd)
++ return ERR_PTR(-ENOMEM);
++
+ /*
+ * Allocate an IIO device.
+ *
+@@ -590,7 +589,7 @@ static struct iio_sw_device *iio_dummy_probe(const char *name)
+ indio_dev = iio_device_alloc(parent, sizeof(*st));
+ if (!indio_dev) {
+ ret = -ENOMEM;
+- goto error_ret;
++ goto error_free_swd;
+ }
+
+ st = iio_priv(indio_dev);
+@@ -616,6 +615,10 @@ static struct iio_sw_device *iio_dummy_probe(const char *name)
+ * indio_dev->name = spi_get_device_id(spi)->name;
+ */
+ indio_dev->name = kstrdup(name, GFP_KERNEL);
++ if (!indio_dev->name) {
++ ret = -ENOMEM;
++ goto error_free_device;
++ }
+
+ /* Provide description of available channels */
+ indio_dev->channels = iio_dummy_channels;
+@@ -632,7 +635,7 @@ static struct iio_sw_device *iio_dummy_probe(const char *name)
+
+ ret = iio_simple_dummy_events_register(indio_dev);
+ if (ret < 0)
+- goto error_free_device;
++ goto error_free_name;
+
+ ret = iio_simple_dummy_configure_buffer(indio_dev);
+ if (ret < 0)
+@@ -649,11 +652,12 @@ error_unconfigure_buffer:
+ iio_simple_dummy_unconfigure_buffer(indio_dev);
+ error_unregister_events:
+ iio_simple_dummy_events_unregister(indio_dev);
++error_free_name:
++ kfree(indio_dev->name);
+ error_free_device:
+ iio_device_free(indio_dev);
+-error_ret:
++error_free_swd:
+ kfree(swd);
+-error_kzalloc:
+ return ERR_PTR(ret);
+ }
+
+diff --git a/drivers/iio/gyro/adis16136.c b/drivers/iio/gyro/adis16136.c
+index 36879f01e28ca..71295709f2b96 100644
+--- a/drivers/iio/gyro/adis16136.c
++++ b/drivers/iio/gyro/adis16136.c
+@@ -591,3 +591,4 @@ module_spi_driver(adis16136_driver);
+ MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16133/ADIS16135/ADIS16136 gyroscope driver");
+ MODULE_LICENSE("GPL v2");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/gyro/adis16260.c b/drivers/iio/gyro/adis16260.c
+index 66b6b7bd5e1bc..eaf57bd339edd 100644
+--- a/drivers/iio/gyro/adis16260.c
++++ b/drivers/iio/gyro/adis16260.c
+@@ -433,3 +433,4 @@ module_spi_driver(adis16260_driver);
+ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16260/5 Digital Gyroscope Sensor");
+ MODULE_LICENSE("GPL v2");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/gyro/adxrs290.c b/drivers/iio/gyro/adxrs290.c
+index 3e0734ddafe36..600e9725da788 100644
+--- a/drivers/iio/gyro/adxrs290.c
++++ b/drivers/iio/gyro/adxrs290.c
+@@ -7,6 +7,7 @@
+ */
+
+ #include <linux/bitfield.h>
++#include <linux/bitops.h>
+ #include <linux/delay.h>
+ #include <linux/device.h>
+ #include <linux/kernel.h>
+@@ -124,7 +125,7 @@ static int adxrs290_get_rate_data(struct iio_dev *indio_dev, const u8 cmd, int *
+ goto err_unlock;
+ }
+
+- *val = temp;
++ *val = sign_extend32(temp, 15);
+
+ err_unlock:
+ mutex_unlock(&st->lock);
+@@ -146,7 +147,7 @@ static int adxrs290_get_temp_data(struct iio_dev *indio_dev, int *val)
+ }
+
+ /* extract lower 12 bits temperature reading */
+- *val = temp & 0x0FFF;
++ *val = sign_extend32(temp, 11);
+
+ err_unlock:
+ mutex_unlock(&st->lock);
+diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
+index 17b939a367ad0..81a6d09788bd7 100644
+--- a/drivers/iio/gyro/bmg160_core.c
++++ b/drivers/iio/gyro/bmg160_core.c
+@@ -1188,11 +1188,14 @@ int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq,
+ ret = iio_device_register(indio_dev);
+ if (ret < 0) {
+ dev_err(dev, "unable to register iio device\n");
+- goto err_buffer_cleanup;
++ goto err_pm_cleanup;
+ }
+
+ return 0;
+
++err_pm_cleanup:
++ pm_runtime_dont_use_autosuspend(dev);
++ pm_runtime_disable(dev);
+ err_buffer_cleanup:
+ iio_triggered_buffer_cleanup(indio_dev);
+ err_trigger_unregister:
+diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
+index 8f0ad022c7f1b..698c50da1f109 100644
+--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
++++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
+@@ -231,6 +231,7 @@ static int gyro_3d_capture_sample(struct hid_sensor_hub_device *hsdev,
+ gyro_state->timestamp =
+ hid_sensor_convert_timestamp(&gyro_state->common_attributes,
+ *(s64 *)raw_data);
++ ret = 0;
+ break;
+ default:
+ break;
+diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c
+index 04dd6a7969ea7..4cfa0d4395605 100644
+--- a/drivers/iio/gyro/itg3200_buffer.c
++++ b/drivers/iio/gyro/itg3200_buffer.c
+@@ -61,9 +61,9 @@ static irqreturn_t itg3200_trigger_handler(int irq, void *p)
+
+ iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp);
+
++error_ret:
+ iio_trigger_notify_done(indio_dev->trig);
+
+-error_ret:
+ return IRQ_HANDLED;
+ }
+
+diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c
+index 3225de1f023b3..5311bee5475ff 100644
+--- a/drivers/iio/gyro/mpu3050-core.c
++++ b/drivers/iio/gyro/mpu3050-core.c
+@@ -876,6 +876,7 @@ static int mpu3050_power_up(struct mpu3050 *mpu3050)
+ ret = regmap_update_bits(mpu3050->map, MPU3050_PWR_MGM,
+ MPU3050_PWR_MGM_SLEEP, 0);
+ if (ret) {
++ regulator_bulk_disable(ARRAY_SIZE(mpu3050->regs), mpu3050->regs);
+ dev_err(mpu3050->dev, "error setting power mode\n");
+ return ret;
+ }
+diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c
+index 3ef86e16ee656..a8164fe48b857 100644
+--- a/drivers/iio/gyro/st_gyro_i2c.c
++++ b/drivers/iio/gyro/st_gyro_i2c.c
+@@ -106,10 +106,10 @@ static int st_gyro_i2c_remove(struct i2c_client *client)
+ {
+ struct iio_dev *indio_dev = i2c_get_clientdata(client);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_gyro_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/iio/gyro/st_gyro_spi.c b/drivers/iio/gyro/st_gyro_spi.c
+index 41d835493347c..9d8916871b4bf 100644
+--- a/drivers/iio/gyro/st_gyro_spi.c
++++ b/drivers/iio/gyro/st_gyro_spi.c
+@@ -110,10 +110,10 @@ static int st_gyro_spi_remove(struct spi_device *spi)
+ {
+ struct iio_dev *indio_dev = spi_get_drvdata(spi);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_gyro_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c
+index d4921385aaf7d..b5f959bba4229 100644
+--- a/drivers/iio/health/afe4403.c
++++ b/drivers/iio/health/afe4403.c
+@@ -245,14 +245,14 @@ static int afe4403_read_raw(struct iio_dev *indio_dev,
+ int *val, int *val2, long mask)
+ {
+ struct afe4403_data *afe = iio_priv(indio_dev);
+- unsigned int reg = afe4403_channel_values[chan->address];
+- unsigned int field = afe4403_channel_leds[chan->address];
++ unsigned int reg, field;
+ int ret;
+
+ switch (chan->type) {
+ case IIO_INTENSITY:
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
++ reg = afe4403_channel_values[chan->address];
+ ret = afe4403_read(afe, reg, val);
+ if (ret)
+ return ret;
+@@ -262,6 +262,7 @@ static int afe4403_read_raw(struct iio_dev *indio_dev,
+ case IIO_CURRENT:
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
++ field = afe4403_channel_leds[chan->address];
+ ret = regmap_field_read(afe->fields[field], val);
+ if (ret)
+ return ret;
+diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c
+index d8a27dfe074a3..70f0f6f6351cd 100644
+--- a/drivers/iio/health/afe4404.c
++++ b/drivers/iio/health/afe4404.c
+@@ -250,20 +250,20 @@ static int afe4404_read_raw(struct iio_dev *indio_dev,
+ int *val, int *val2, long mask)
+ {
+ struct afe4404_data *afe = iio_priv(indio_dev);
+- unsigned int value_reg = afe4404_channel_values[chan->address];
+- unsigned int led_field = afe4404_channel_leds[chan->address];
+- unsigned int offdac_field = afe4404_channel_offdacs[chan->address];
++ unsigned int value_reg, led_field, offdac_field;
+ int ret;
+
+ switch (chan->type) {
+ case IIO_INTENSITY:
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
++ value_reg = afe4404_channel_values[chan->address];
+ ret = regmap_read(afe->regmap, value_reg, val);
+ if (ret)
+ return ret;
+ return IIO_VAL_INT;
+ case IIO_CHAN_INFO_OFFSET:
++ offdac_field = afe4404_channel_offdacs[chan->address];
+ ret = regmap_field_read(afe->fields[offdac_field], val);
+ if (ret)
+ return ret;
+@@ -273,6 +273,7 @@ static int afe4404_read_raw(struct iio_dev *indio_dev,
+ case IIO_CURRENT:
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
++ led_field = afe4404_channel_leds[chan->address];
+ ret = regmap_field_read(afe->fields[led_field], val);
+ if (ret)
+ return ret;
+@@ -295,19 +296,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev,
+ int val, int val2, long mask)
+ {
+ struct afe4404_data *afe = iio_priv(indio_dev);
+- unsigned int led_field = afe4404_channel_leds[chan->address];
+- unsigned int offdac_field = afe4404_channel_offdacs[chan->address];
++ unsigned int led_field, offdac_field;
+
+ switch (chan->type) {
+ case IIO_INTENSITY:
+ switch (mask) {
+ case IIO_CHAN_INFO_OFFSET:
++ offdac_field = afe4404_channel_offdacs[chan->address];
+ return regmap_field_write(afe->fields[offdac_field], val);
+ }
+ break;
+ case IIO_CURRENT:
+ switch (mask) {
+ case IIO_CHAN_INFO_RAW:
++ led_field = afe4404_channel_leds[chan->address];
+ return regmap_field_write(afe->fields[led_field], val);
+ }
+ break;
+diff --git a/drivers/iio/humidity/hts221_buffer.c b/drivers/iio/humidity/hts221_buffer.c
+index f29692b9d2db0..66b32413cf5e2 100644
+--- a/drivers/iio/humidity/hts221_buffer.c
++++ b/drivers/iio/humidity/hts221_buffer.c
+@@ -135,9 +135,12 @@ int hts221_allocate_trigger(struct iio_dev *iio_dev)
+
+ iio_trigger_set_drvdata(hw->trig, iio_dev);
+ hw->trig->ops = &hts221_trigger_ops;
++
++ err = devm_iio_trigger_register(hw->dev, hw->trig);
++
+ iio_dev->trig = iio_trigger_get(hw->trig);
+
+- return devm_iio_trigger_register(hw->dev, hw->trig);
++ return err;
+ }
+
+ static int hts221_buffer_preenable(struct iio_dev *iio_dev)
+diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig
+index 001ca2c3ff953..1f8ed7b1ae848 100644
+--- a/drivers/iio/imu/Kconfig
++++ b/drivers/iio/imu/Kconfig
+@@ -47,6 +47,7 @@ config ADIS16480
+ depends on SPI
+ select IIO_ADIS_LIB
+ select IIO_ADIS_LIB_BUFFER if IIO_BUFFER
++ select CRC32
+ help
+ Say yes here to build support for Analog Devices ADIS16375, ADIS16480,
+ ADIS16485, ADIS16488 inertial sensors.
+diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
+index b9a06ca29beec..bc40240b29e26 100644
+--- a/drivers/iio/imu/adis.c
++++ b/drivers/iio/imu/adis.c
+@@ -30,8 +30,8 @@
+ * @value: The value to write to device (up to 4 bytes)
+ * @size: The size of the @value (in bytes)
+ */
+-int __adis_write_reg(struct adis *adis, unsigned int reg,
+- unsigned int value, unsigned int size)
++int __adis_write_reg(struct adis *adis, unsigned int reg, unsigned int value,
++ unsigned int size)
+ {
+ unsigned int page = reg / ADIS_PAGE_SIZE;
+ int ret, i;
+@@ -114,14 +114,14 @@ int __adis_write_reg(struct adis *adis, unsigned int reg,
+ ret = spi_sync(adis->spi, &msg);
+ if (ret) {
+ dev_err(&adis->spi->dev, "Failed to write register 0x%02X: %d\n",
+- reg, ret);
++ reg, ret);
+ } else {
+ adis->current_page = page;
+ }
+
+ return ret;
+ }
+-EXPORT_SYMBOL_GPL(__adis_write_reg);
++EXPORT_SYMBOL_NS_GPL(__adis_write_reg, IIO_ADISLIB);
+
+ /**
+ * __adis_read_reg() - read N bytes from register (unlocked version)
+@@ -130,8 +130,8 @@ EXPORT_SYMBOL_GPL(__adis_write_reg);
+ * @val: The value read back from the device
+ * @size: The size of the @val buffer
+ */
+-int __adis_read_reg(struct adis *adis, unsigned int reg,
+- unsigned int *val, unsigned int size)
++int __adis_read_reg(struct adis *adis, unsigned int reg, unsigned int *val,
++ unsigned int size)
+ {
+ unsigned int page = reg / ADIS_PAGE_SIZE;
+ struct spi_message msg;
+@@ -201,12 +201,12 @@ int __adis_read_reg(struct adis *adis, unsigned int reg,
+ ret = spi_sync(adis->spi, &msg);
+ if (ret) {
+ dev_err(&adis->spi->dev, "Failed to read register 0x%02X: %d\n",
+- reg, ret);
++ reg, ret);
+ return ret;
+- } else {
+- adis->current_page = page;
+ }
+
++ adis->current_page = page;
++
+ switch (size) {
+ case 4:
+ *val = get_unaligned_be32(adis->rx);
+@@ -218,7 +218,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg,
+
+ return ret;
+ }
+-EXPORT_SYMBOL_GPL(__adis_read_reg);
++EXPORT_SYMBOL_NS_GPL(__adis_read_reg, IIO_ADISLIB);
+ /**
+ * __adis_update_bits_base() - ADIS Update bits function - Unlocked version
+ * @adis: The adis device
+@@ -243,17 +243,17 @@ int __adis_update_bits_base(struct adis *adis, unsigned int reg, const u32 mask,
+
+ return __adis_write_reg(adis, reg, __val, size);
+ }
+-EXPORT_SYMBOL_GPL(__adis_update_bits_base);
++EXPORT_SYMBOL_NS_GPL(__adis_update_bits_base, IIO_ADISLIB);
+
+ #ifdef CONFIG_DEBUG_FS
+
+-int adis_debugfs_reg_access(struct iio_dev *indio_dev,
+- unsigned int reg, unsigned int writeval, unsigned int *readval)
++int adis_debugfs_reg_access(struct iio_dev *indio_dev, unsigned int reg,
++ unsigned int writeval, unsigned int *readval)
+ {
+ struct adis *adis = iio_device_get_drvdata(indio_dev);
+
+ if (readval) {
+- uint16_t val16;
++ u16 val16;
+ int ret;
+
+ ret = adis_read_reg_16(adis, reg, &val16);
+@@ -261,36 +261,41 @@ int adis_debugfs_reg_access(struct iio_dev *indio_dev,
+ *readval = val16;
+
+ return ret;
+- } else {
+- return adis_write_reg_16(adis, reg, writeval);
+ }
++
++ return adis_write_reg_16(adis, reg, writeval);
+ }
+-EXPORT_SYMBOL(adis_debugfs_reg_access);
++EXPORT_SYMBOL_NS(adis_debugfs_reg_access, IIO_ADISLIB);
+
+ #endif
+
+ /**
+- * adis_enable_irq() - Enable or disable data ready IRQ
++ * __adis_enable_irq() - Enable or disable data ready IRQ (unlocked)
+ * @adis: The adis device
+ * @enable: Whether to enable the IRQ
+ *
+ * Returns 0 on success, negative error code otherwise
+ */
+-int adis_enable_irq(struct adis *adis, bool enable)
++int __adis_enable_irq(struct adis *adis, bool enable)
+ {
+- int ret = 0;
+- uint16_t msc;
++ int ret;
++ u16 msc;
+
+- mutex_lock(&adis->state_lock);
++ if (adis->data->enable_irq)
++ return adis->data->enable_irq(adis, enable);
++
++ if (adis->data->unmasked_drdy) {
++ if (enable)
++ enable_irq(adis->spi->irq);
++ else
++ disable_irq(adis->spi->irq);
+
+- if (adis->data->enable_irq) {
+- ret = adis->data->enable_irq(adis, enable);
+- goto out_unlock;
++ return 0;
+ }
+
+ ret = __adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc);
+ if (ret)
+- goto out_unlock;
++ return ret;
+
+ msc |= ADIS_MSC_CTRL_DATA_RDY_POL_HIGH;
+ msc &= ~ADIS_MSC_CTRL_DATA_RDY_DIO2;
+@@ -299,13 +304,9 @@ int adis_enable_irq(struct adis *adis, bool enable)
+ else
+ msc &= ~ADIS_MSC_CTRL_DATA_RDY_EN;
+
+- ret = __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc);
+-
+-out_unlock:
+- mutex_unlock(&adis->state_lock);
+- return ret;
++ return __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc);
+ }
+-EXPORT_SYMBOL(adis_enable_irq);
++EXPORT_SYMBOL_NS(__adis_enable_irq, IIO_ADISLIB);
+
+ /**
+ * __adis_check_status() - Check the device for error conditions (unlocked)
+@@ -315,7 +316,7 @@ EXPORT_SYMBOL(adis_enable_irq);
+ */
+ int __adis_check_status(struct adis *adis)
+ {
+- uint16_t status;
++ u16 status;
+ int ret;
+ int i;
+
+@@ -337,7 +338,7 @@ int __adis_check_status(struct adis *adis)
+
+ return -EIO;
+ }
+-EXPORT_SYMBOL_GPL(__adis_check_status);
++EXPORT_SYMBOL_NS_GPL(__adis_check_status, IIO_ADISLIB);
+
+ /**
+ * __adis_reset() - Reset the device (unlocked version)
+@@ -351,7 +352,7 @@ int __adis_reset(struct adis *adis)
+ const struct adis_timeout *timeouts = adis->data->timeouts;
+
+ ret = __adis_write_reg_8(adis, adis->data->glob_cmd_reg,
+- ADIS_GLOB_CMD_SW_RESET);
++ ADIS_GLOB_CMD_SW_RESET);
+ if (ret) {
+ dev_err(&adis->spi->dev, "Failed to reset device: %d\n", ret);
+ return ret;
+@@ -361,7 +362,7 @@ int __adis_reset(struct adis *adis)
+
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(__adis_reset);
++EXPORT_SYMBOL_NS_GPL(__adis_reset, IIO_ADIS_LIB);
+
+ static int adis_self_test(struct adis *adis)
+ {
+@@ -407,7 +408,7 @@ int __adis_initial_startup(struct adis *adis)
+ {
+ const struct adis_timeout *timeouts = adis->data->timeouts;
+ struct gpio_desc *gpio;
+- uint16_t prod_id;
++ u16 prod_id;
+ int ret;
+
+ /* check if the device has rst pin low */
+@@ -416,7 +417,7 @@ int __adis_initial_startup(struct adis *adis)
+ return PTR_ERR(gpio);
+
+ if (gpio) {
+- msleep(10);
++ usleep_range(10, 12);
+ /* bring device out of reset */
+ gpiod_set_value_cansleep(gpio, 0);
+ msleep(timeouts->reset_ms);
+@@ -430,6 +431,14 @@ int __adis_initial_startup(struct adis *adis)
+ if (ret)
+ return ret;
+
++ /*
++ * don't bother calling this if we can't unmask the IRQ as in this case
++ * the IRQ is most likely not yet requested and we will request it
++ * with 'IRQF_NO_AUTOEN' anyways.
++ */
++ if (!adis->data->unmasked_drdy)
++ __adis_enable_irq(adis, false);
++
+ if (!adis->data->prod_id_reg)
+ return 0;
+
+@@ -444,7 +453,7 @@ int __adis_initial_startup(struct adis *adis)
+
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(__adis_initial_startup);
++EXPORT_SYMBOL_NS_GPL(__adis_initial_startup, IIO_ADISLIB);
+
+ /**
+ * adis_single_conversion() - Performs a single sample conversion
+@@ -462,7 +471,8 @@ EXPORT_SYMBOL_GPL(__adis_initial_startup);
+ * a error bit in the channels raw value set error_mask to 0.
+ */
+ int adis_single_conversion(struct iio_dev *indio_dev,
+- const struct iio_chan_spec *chan, unsigned int error_mask, int *val)
++ const struct iio_chan_spec *chan,
++ unsigned int error_mask, int *val)
+ {
+ struct adis *adis = iio_device_get_drvdata(indio_dev);
+ unsigned int uval;
+@@ -471,7 +481,7 @@ int adis_single_conversion(struct iio_dev *indio_dev,
+ mutex_lock(&adis->state_lock);
+
+ ret = __adis_read_reg(adis, chan->address, &uval,
+- chan->scan_type.storagebits / 8);
++ chan->scan_type.storagebits / 8);
+ if (ret)
+ goto err_unlock;
+
+@@ -491,7 +501,7 @@ err_unlock:
+ mutex_unlock(&adis->state_lock);
+ return ret;
+ }
+-EXPORT_SYMBOL_GPL(adis_single_conversion);
++EXPORT_SYMBOL_NS_GPL(adis_single_conversion, IIO_ADISLIB);
+
+ /**
+ * adis_init() - Initialize adis device structure
+@@ -506,7 +516,7 @@ EXPORT_SYMBOL_GPL(adis_single_conversion);
+ * called.
+ */
+ int adis_init(struct adis *adis, struct iio_dev *indio_dev,
+- struct spi_device *spi, const struct adis_data *data)
++ struct spi_device *spi, const struct adis_data *data)
+ {
+ if (!data || !data->timeouts) {
+ dev_err(&spi->dev, "No config data or timeouts not defined!\n");
+@@ -526,9 +536,9 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev,
+ adis->current_page = 0;
+ }
+
+- return adis_enable_irq(adis, false);
++ return 0;
+ }
+-EXPORT_SYMBOL_GPL(adis_init);
++EXPORT_SYMBOL_NS_GPL(adis_init, IIO_ADISLIB);
+
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+diff --git a/drivers/iio/imu/adis16400.c b/drivers/iio/imu/adis16400.c
+index b12917a7cb602..9bcd9a9261b92 100644
+--- a/drivers/iio/imu/adis16400.c
++++ b/drivers/iio/imu/adis16400.c
+@@ -1230,3 +1230,4 @@ module_spi_driver(adis16400_driver);
+ MODULE_AUTHOR("Manuel Stahl <manuel.stahl@iis.fraunhofer.de>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16400/5 IMU SPI driver");
+ MODULE_LICENSE("GPL v2");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/imu/adis16460.c b/drivers/iio/imu/adis16460.c
+index a6f9fba3e03f4..40fc0e582a9fd 100644
+--- a/drivers/iio/imu/adis16460.c
++++ b/drivers/iio/imu/adis16460.c
+@@ -444,3 +444,4 @@ module_spi_driver(adis16460_driver);
+ MODULE_AUTHOR("Dragos Bogdan <dragos.bogdan@analog.com>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16460 IMU driver");
+ MODULE_LICENSE("GPL");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/imu/adis16475.c b/drivers/iio/imu/adis16475.c
+index 287fff39a927a..9d28534db3b08 100644
+--- a/drivers/iio/imu/adis16475.c
++++ b/drivers/iio/imu/adis16475.c
+@@ -1382,3 +1382,4 @@ module_spi_driver(adis16475_driver);
+ MODULE_AUTHOR("Nuno Sa <nuno.sa@analog.com>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16475 IMU driver");
+ MODULE_LICENSE("GPL");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c
+index ed129321a14da..44bbe3d199073 100644
+--- a/drivers/iio/imu/adis16480.c
++++ b/drivers/iio/imu/adis16480.c
+@@ -1403,6 +1403,7 @@ static int adis16480_probe(struct spi_device *spi)
+ {
+ const struct spi_device_id *id = spi_get_device_id(spi);
+ const struct adis_data *adis16480_data;
++ irq_handler_t trigger_handler = NULL;
+ struct iio_dev *indio_dev;
+ struct adis16480 *st;
+ int ret;
+@@ -1474,8 +1475,12 @@ static int adis16480_probe(struct spi_device *spi)
+ st->clk_freq = st->chip_info->int_clk;
+ }
+
++ /* Only use our trigger handler if burst mode is supported */
++ if (adis16480_data->burst_len)
++ trigger_handler = adis16480_trigger_handler;
++
+ ret = devm_adis_setup_buffer_and_trigger(&st->adis, indio_dev,
+- adis16480_trigger_handler);
++ trigger_handler);
+ if (ret)
+ return ret;
+
+@@ -1533,3 +1538,4 @@ module_spi_driver(adis16480_driver);
+ MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16480 IMU driver");
+ MODULE_LICENSE("GPL v2");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c
+index 351c303c8a8c0..928933027ae34 100644
+--- a/drivers/iio/imu/adis_buffer.c
++++ b/drivers/iio/imu/adis_buffer.c
+@@ -20,7 +20,7 @@
+ #include <linux/iio/imu/adis.h>
+
+ static int adis_update_scan_mode_burst(struct iio_dev *indio_dev,
+- const unsigned long *scan_mask)
++ const unsigned long *scan_mask)
+ {
+ struct adis *adis = iio_device_get_drvdata(indio_dev);
+ unsigned int burst_length, burst_max_length;
+@@ -67,7 +67,7 @@ static int adis_update_scan_mode_burst(struct iio_dev *indio_dev,
+ }
+
+ int adis_update_scan_mode(struct iio_dev *indio_dev,
+- const unsigned long *scan_mask)
++ const unsigned long *scan_mask)
+ {
+ struct adis *adis = iio_device_get_drvdata(indio_dev);
+ const struct iio_chan_spec *chan;
+@@ -124,7 +124,7 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
+
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(adis_update_scan_mode);
++EXPORT_SYMBOL_NS_GPL(adis_update_scan_mode, IIO_ADISLIB);
+
+ static irqreturn_t adis_trigger_handler(int irq, void *p)
+ {
+@@ -158,7 +158,7 @@ static irqreturn_t adis_trigger_handler(int irq, void *p)
+ }
+
+ iio_push_to_buffers_with_timestamp(indio_dev, adis->buffer,
+- pf->timestamp);
++ pf->timestamp);
+
+ irq_done:
+ iio_trigger_notify_done(indio_dev->trig);
+@@ -212,5 +212,5 @@ devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev,
+ return devm_add_action_or_reset(&adis->spi->dev, adis_buffer_cleanup,
+ adis);
+ }
+-EXPORT_SYMBOL_GPL(devm_adis_setup_buffer_and_trigger);
++EXPORT_SYMBOL_NS_GPL(devm_adis_setup_buffer_and_trigger, IIO_ADISLIB);
+
+diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c
+index 48eedc29b28a8..f890bf842db86 100644
+--- a/drivers/iio/imu/adis_trigger.c
++++ b/drivers/iio/imu/adis_trigger.c
+@@ -15,8 +15,7 @@
+ #include <linux/iio/trigger.h>
+ #include <linux/iio/imu/adis.h>
+
+-static int adis_data_rdy_trigger_set_state(struct iio_trigger *trig,
+- bool state)
++static int adis_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state)
+ {
+ struct adis *adis = iio_trigger_get_drvdata(trig);
+
+@@ -30,6 +29,10 @@ static const struct iio_trigger_ops adis_trigger_ops = {
+ static int adis_validate_irq_flag(struct adis *adis)
+ {
+ unsigned long direction = adis->irq_flag & IRQF_TRIGGER_MASK;
++
++ /* We cannot mask the interrupt so ensure it's not enabled at request */
++ if (adis->data->unmasked_drdy)
++ adis->irq_flag |= IRQF_NO_AUTOEN;
+ /*
+ * Typically this devices have data ready either on the rising edge or
+ * on the falling edge of the data ready pin. This checks enforces that
+@@ -84,5 +87,5 @@ int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
+
+ return devm_iio_trigger_register(&adis->spi->dev, adis->trig);
+ }
+-EXPORT_SYMBOL_GPL(devm_adis_probe_trigger);
++EXPORT_SYMBOL_NS_GPL(devm_adis_probe_trigger, IIO_ADISLIB);
+
+diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c
+index 824b5124a5f55..01336105792ee 100644
+--- a/drivers/iio/imu/bmi160/bmi160_core.c
++++ b/drivers/iio/imu/bmi160/bmi160_core.c
+@@ -730,7 +730,7 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi)
+
+ ret = regmap_write(data->regmap, BMI160_REG_CMD, BMI160_CMD_SOFTRESET);
+ if (ret)
+- return ret;
++ goto disable_regulator;
+
+ usleep_range(BMI160_SOFTRESET_USLEEP, BMI160_SOFTRESET_USLEEP + 1);
+
+@@ -741,29 +741,37 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi)
+ if (use_spi) {
+ ret = regmap_read(data->regmap, BMI160_REG_DUMMY, &val);
+ if (ret)
+- return ret;
++ goto disable_regulator;
+ }
+
+ ret = regmap_read(data->regmap, BMI160_REG_CHIP_ID, &val);
+ if (ret) {
+ dev_err(dev, "Error reading chip id\n");
+- return ret;
++ goto disable_regulator;
+ }
+ if (val != BMI160_CHIP_ID_VAL) {
+ dev_err(dev, "Wrong chip id, got %x expected %x\n",
+ val, BMI160_CHIP_ID_VAL);
+- return -ENODEV;
++ ret = -ENODEV;
++ goto disable_regulator;
+ }
+
+ ret = bmi160_set_mode(data, BMI160_ACCEL, true);
+ if (ret)
+- return ret;
++ goto disable_regulator;
+
+ ret = bmi160_set_mode(data, BMI160_GYRO, true);
+ if (ret)
+- return ret;
++ goto disable_accel;
+
+ return 0;
++
++disable_accel:
++ bmi160_set_mode(data, BMI160_ACCEL, false);
++
++disable_regulator:
++ regulator_bulk_disable(ARRAY_SIZE(data->supplies), data->supplies);
++ return ret;
+ }
+
+ static int bmi160_data_rdy_trigger_set_state(struct iio_trigger *trig,
+diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c
+index ab288186f36e4..04d3778fcc153 100644
+--- a/drivers/iio/imu/fxos8700_core.c
++++ b/drivers/iio/imu/fxos8700_core.c
+@@ -10,6 +10,7 @@
+ #include <linux/regmap.h>
+ #include <linux/acpi.h>
+ #include <linux/bitops.h>
++#include <linux/bitfield.h>
+
+ #include <linux/iio/iio.h>
+ #include <linux/iio/sysfs.h>
+@@ -144,9 +145,8 @@
+ #define FXOS8700_NVM_DATA_BNK0 0xa7
+
+ /* Bit definitions for FXOS8700_CTRL_REG1 */
+-#define FXOS8700_CTRL_ODR_MSK 0x38
+ #define FXOS8700_CTRL_ODR_MAX 0x00
+-#define FXOS8700_CTRL_ODR_MIN GENMASK(4, 3)
++#define FXOS8700_CTRL_ODR_MSK GENMASK(5, 3)
+
+ /* Bit definitions for FXOS8700_M_CTRL_REG1 */
+ #define FXOS8700_HMS_MASK GENMASK(1, 0)
+@@ -320,7 +320,7 @@ static enum fxos8700_sensor fxos8700_to_sensor(enum iio_chan_type iio_type)
+ switch (iio_type) {
+ case IIO_ACCEL:
+ return FXOS8700_ACCEL;
+- case IIO_ANGL_VEL:
++ case IIO_MAGN:
+ return FXOS8700_MAGN;
+ default:
+ return -EINVAL;
+@@ -345,15 +345,35 @@ static int fxos8700_set_active_mode(struct fxos8700_data *data,
+ static int fxos8700_set_scale(struct fxos8700_data *data,
+ enum fxos8700_sensor t, int uscale)
+ {
+- int i;
++ int i, ret, val;
++ bool active_mode;
+ static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale);
+ struct device *dev = regmap_get_device(data->regmap);
+
+ if (t == FXOS8700_MAGN) {
+- dev_err(dev, "Magnetometer scale is locked at 1200uT\n");
++ dev_err(dev, "Magnetometer scale is locked at 0.001Gs\n");
+ return -EINVAL;
+ }
+
++ /*
++ * When device is in active mode, it failed to set an ACCEL
++ * full-scale range(2g/4g/8g) in FXOS8700_XYZ_DATA_CFG.
++ * This is not align with the datasheet, but it is a fxos8700
++ * chip behavier. Set the device in standby mode before setting
++ * an ACCEL full-scale range.
++ */
++ ret = regmap_read(data->regmap, FXOS8700_CTRL_REG1, &val);
++ if (ret)
++ return ret;
++
++ active_mode = val & FXOS8700_ACTIVE;
++ if (active_mode) {
++ ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1,
++ val & ~FXOS8700_ACTIVE);
++ if (ret)
++ return ret;
++ }
++
+ for (i = 0; i < scale_num; i++)
+ if (fxos8700_accel_scale[i].uscale == uscale)
+ break;
+@@ -361,8 +381,12 @@ static int fxos8700_set_scale(struct fxos8700_data *data,
+ if (i == scale_num)
+ return -EINVAL;
+
+- return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG,
++ ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG,
+ fxos8700_accel_scale[i].bits);
++ if (ret)
++ return ret;
++ return regmap_write(data->regmap, FXOS8700_CTRL_REG1,
++ active_mode);
+ }
+
+ static int fxos8700_get_scale(struct fxos8700_data *data,
+@@ -372,7 +396,7 @@ static int fxos8700_get_scale(struct fxos8700_data *data,
+ static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale);
+
+ if (t == FXOS8700_MAGN) {
+- *uscale = 1200; /* Magnetometer is locked at 1200uT */
++ *uscale = 1000; /* Magnetometer is locked at 0.001Gs */
+ return 0;
+ }
+
+@@ -394,22 +418,61 @@ static int fxos8700_get_data(struct fxos8700_data *data, int chan_type,
+ int axis, int *val)
+ {
+ u8 base, reg;
++ s16 tmp;
+ int ret;
+- enum fxos8700_sensor type = fxos8700_to_sensor(chan_type);
+
+- base = type ? FXOS8700_OUT_X_MSB : FXOS8700_M_OUT_X_MSB;
++ /*
++ * Different register base addresses varies with channel types.
++ * This bug hasn't been noticed before because using an enum is
++ * really hard to read. Use an a switch statement to take over that.
++ */
++ switch (chan_type) {
++ case IIO_ACCEL:
++ base = FXOS8700_OUT_X_MSB;
++ break;
++ case IIO_MAGN:
++ base = FXOS8700_M_OUT_X_MSB;
++ break;
++ default:
++ return -EINVAL;
++ }
+
+ /* Block read 6 bytes of device output registers to avoid data loss */
+ ret = regmap_bulk_read(data->regmap, base, data->buf,
+- FXOS8700_DATA_BUF_SIZE);
++ sizeof(data->buf));
+ if (ret)
+ return ret;
+
+ /* Convert axis to buffer index */
+ reg = axis - IIO_MOD_X;
+
++ /*
++ * Convert to native endianness. The accel data and magn data
++ * are signed, so a forced type conversion is needed.
++ */
++ tmp = be16_to_cpu(data->buf[reg]);
++
++ /*
++ * ACCEL output data registers contain the X-axis, Y-axis, and Z-axis
++ * 14-bit left-justified sample data and MAGN output data registers
++ * contain the X-axis, Y-axis, and Z-axis 16-bit sample data. Apply
++ * a signed 2 bits right shift to the readback raw data from ACCEL
++ * output data register and keep that from MAGN sensor as the origin.
++ * Value should be extended to 32 bit.
++ */
++ switch (chan_type) {
++ case IIO_ACCEL:
++ tmp = tmp >> 2;
++ break;
++ case IIO_MAGN:
++ /* Nothing to do */
++ break;
++ default:
++ return -EINVAL;
++ }
++
+ /* Convert to native endianness */
+- *val = sign_extend32(be16_to_cpu(data->buf[reg]), 15);
++ *val = sign_extend32(tmp, 15);
+
+ return 0;
+ }
+@@ -445,10 +508,9 @@ static int fxos8700_set_odr(struct fxos8700_data *data, enum fxos8700_sensor t,
+ if (i >= odr_num)
+ return -EINVAL;
+
+- return regmap_update_bits(data->regmap,
+- FXOS8700_CTRL_REG1,
+- FXOS8700_CTRL_ODR_MSK + FXOS8700_ACTIVE,
+- fxos8700_odr[i].bits << 3 | active_mode);
++ val &= ~FXOS8700_CTRL_ODR_MSK;
++ val |= FIELD_PREP(FXOS8700_CTRL_ODR_MSK, fxos8700_odr[i].bits) | FXOS8700_ACTIVE;
++ return regmap_write(data->regmap, FXOS8700_CTRL_REG1, val);
+ }
+
+ static int fxos8700_get_odr(struct fxos8700_data *data, enum fxos8700_sensor t,
+@@ -461,7 +523,7 @@ static int fxos8700_get_odr(struct fxos8700_data *data, enum fxos8700_sensor t,
+ if (ret)
+ return ret;
+
+- val &= FXOS8700_CTRL_ODR_MSK;
++ val = FIELD_GET(FXOS8700_CTRL_ODR_MSK, val);
+
+ for (i = 0; i < odr_num; i++)
+ if (val == fxos8700_odr[i].bits)
+@@ -526,7 +588,7 @@ static IIO_CONST_ATTR(in_accel_sampling_frequency_available,
+ static IIO_CONST_ATTR(in_magn_sampling_frequency_available,
+ "1.5625 6.25 12.5 50 100 200 400 800");
+ static IIO_CONST_ATTR(in_accel_scale_available, "0.000244 0.000488 0.000976");
+-static IIO_CONST_ATTR(in_magn_scale_available, "0.000001200");
++static IIO_CONST_ATTR(in_magn_scale_available, "0.001000");
+
+ static struct attribute *fxos8700_attrs[] = {
+ &iio_const_attr_in_accel_sampling_frequency_available.dev_attr.attr,
+@@ -592,14 +654,19 @@ static int fxos8700_chip_init(struct fxos8700_data *data, bool use_spi)
+ if (ret)
+ return ret;
+
+- /* Max ODR (800Hz individual or 400Hz hybrid), active mode */
+- ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1,
+- FXOS8700_CTRL_ODR_MAX | FXOS8700_ACTIVE);
++ /*
++ * Set max full-scale range (+/-8G) for ACCEL sensor in chip
++ * initialization then activate the device.
++ */
++ ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G);
+ if (ret)
+ return ret;
+
+- /* Set for max full-scale range (+/-8G) */
+- return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G);
++ /* Max ODR (800Hz individual or 400Hz hybrid), active mode */
++ return regmap_update_bits(data->regmap, FXOS8700_CTRL_REG1,
++ FXOS8700_CTRL_ODR_MSK | FXOS8700_ACTIVE,
++ FIELD_PREP(FXOS8700_CTRL_ODR_MSK, FXOS8700_CTRL_ODR_MAX) |
++ FXOS8700_ACTIVE);
+ }
+
+ static void fxos8700_chip_uninit(void *data)
+diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h
+index c0f5059b13b31..995a9dc06521d 100644
+--- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h
++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h
+@@ -17,6 +17,7 @@
+ #include "inv_icm42600_buffer.h"
+
+ enum inv_icm42600_chip {
++ INV_CHIP_INVALID,
+ INV_CHIP_ICM42600,
+ INV_CHIP_ICM42602,
+ INV_CHIP_ICM42605,
+diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
+index 99576b2c171f4..32d7f83642303 100644
+--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
+@@ -275,9 +275,14 @@ static int inv_icm42600_buffer_preenable(struct iio_dev *indio_dev)
+ {
+ struct inv_icm42600_state *st = iio_device_get_drvdata(indio_dev);
+ struct device *dev = regmap_get_device(st->map);
++ struct inv_icm42600_timestamp *ts = iio_priv(indio_dev);
+
+ pm_runtime_get_sync(dev);
+
++ mutex_lock(&st->lock);
++ inv_icm42600_timestamp_reset(ts);
++ mutex_unlock(&st->lock);
++
+ return 0;
+ }
+
+@@ -375,7 +380,6 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev)
+ struct device *dev = regmap_get_device(st->map);
+ unsigned int sensor;
+ unsigned int *watermark;
+- struct inv_icm42600_timestamp *ts;
+ struct inv_icm42600_sensor_conf conf = INV_ICM42600_SENSOR_CONF_INIT;
+ unsigned int sleep_temp = 0;
+ unsigned int sleep_sensor = 0;
+@@ -385,11 +389,9 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev)
+ if (indio_dev == st->indio_gyro) {
+ sensor = INV_ICM42600_SENSOR_GYRO;
+ watermark = &st->fifo.watermark.gyro;
+- ts = iio_priv(st->indio_gyro);
+ } else if (indio_dev == st->indio_accel) {
+ sensor = INV_ICM42600_SENSOR_ACCEL;
+ watermark = &st->fifo.watermark.accel;
+- ts = iio_priv(st->indio_accel);
+ } else {
+ return -EINVAL;
+ }
+@@ -417,8 +419,6 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev)
+ if (!st->fifo.on)
+ ret = inv_icm42600_set_temp_conf(st, false, &sleep_temp);
+
+- inv_icm42600_timestamp_reset(ts);
+-
+ out_unlock:
+ mutex_unlock(&st->lock);
+
+diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+index 86858da9cc38f..ca85fccc98393 100644
+--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+@@ -565,7 +565,7 @@ int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq,
+ bool open_drain;
+ int ret;
+
+- if (chip < 0 || chip >= INV_CHIP_NB) {
++ if (chip <= INV_CHIP_INVALID || chip >= INV_CHIP_NB) {
+ dev_err(dev, "invalid chip = %d\n", chip);
+ return -ENODEV;
+ }
+diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
+index 85b1934cec60e..53891010a91de 100644
+--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
+@@ -18,12 +18,15 @@ static int inv_icm42600_i2c_bus_setup(struct inv_icm42600_state *st)
+ unsigned int mask, val;
+ int ret;
+
+- /* setup interface registers */
+- ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6,
+- INV_ICM42600_INTF_CONFIG6_MASK,
+- INV_ICM42600_INTF_CONFIG6_I3C_EN);
+- if (ret)
+- return ret;
++ /*
++ * setup interface registers
++ * This register write to REG_INTF_CONFIG6 enables a spike filter that
++ * is impacting the line and can prevent the I2C ACK to be seen by the
++ * controller. So we don't test the return value.
++ */
++ regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6,
++ INV_ICM42600_INTF_CONFIG6_MASK,
++ INV_ICM42600_INTF_CONFIG6_I3C_EN);
+
+ ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG4,
+ INV_ICM42600_INTF_CONFIG4_I3C_BUS_ONLY, 0);
+diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_magn.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_magn.c
+index f282e9cc34c53..6aee6c989485e 100644
+--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_magn.c
++++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_magn.c
+@@ -261,6 +261,7 @@ int inv_mpu_magn_set_rate(const struct inv_mpu6050_state *st, int fifo_rate)
+ */
+ int inv_mpu_magn_set_orient(struct inv_mpu6050_state *st)
+ {
++ struct device *dev = regmap_get_device(st->map);
+ const char *orient;
+ char *str;
+ int i;
+@@ -279,22 +280,27 @@ int inv_mpu_magn_set_orient(struct inv_mpu6050_state *st)
+ st->magn_orient.rotation[4] = st->orientation.rotation[1];
+ st->magn_orient.rotation[5] = st->orientation.rotation[2];
+ /* z <- -z */
+- for (i = 0; i < 3; ++i) {
+- orient = st->orientation.rotation[6 + i];
+- /* use length + 2 for adding minus sign if needed */
+- str = devm_kzalloc(regmap_get_device(st->map),
+- strlen(orient) + 2, GFP_KERNEL);
+- if (str == NULL)
++ for (i = 6; i < 9; ++i) {
++ orient = st->orientation.rotation[i];
++
++ /*
++ * The value is negated according to one of the following
++ * rules:
++ *
++ * 1) Drop leading minus.
++ * 2) Leave 0 as is.
++ * 3) Add leading minus.
++ */
++ if (orient[0] == '-')
++ str = devm_kstrdup(dev, orient + 1, GFP_KERNEL);
++ else if (!strcmp(orient, "0"))
++ str = devm_kstrdup(dev, orient, GFP_KERNEL);
++ else
++ str = devm_kasprintf(dev, GFP_KERNEL, "-%s", orient);
++ if (!str)
+ return -ENOMEM;
+- if (strcmp(orient, "0") == 0) {
+- strcpy(str, orient);
+- } else if (orient[0] == '-') {
+- strcpy(str, &orient[1]);
+- } else {
+- str[0] = '-';
+- strcpy(&str[1], orient);
+- }
+- st->magn_orient.rotation[6 + i] = str;
++
++ st->magn_orient.rotation[i] = str;
+ }
+ break;
+ default:
+diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c
+index 1dabfd615dabf..f89724481df93 100644
+--- a/drivers/iio/imu/kmx61.c
++++ b/drivers/iio/imu/kmx61.c
+@@ -1385,7 +1385,7 @@ static int kmx61_probe(struct i2c_client *client,
+ ret = iio_device_register(data->acc_indio_dev);
+ if (ret < 0) {
+ dev_err(&client->dev, "Failed to register acc iio device\n");
+- goto err_buffer_cleanup_mag;
++ goto err_pm_cleanup;
+ }
+
+ ret = iio_device_register(data->mag_indio_dev);
+@@ -1398,6 +1398,9 @@ static int kmx61_probe(struct i2c_client *client,
+
+ err_iio_unregister_acc:
+ iio_device_unregister(data->acc_indio_dev);
++err_pm_cleanup:
++ pm_runtime_dont_use_autosuspend(&client->dev);
++ pm_runtime_disable(&client->dev);
+ err_buffer_cleanup_mag:
+ if (client->irq > 0)
+ iio_triggered_buffer_cleanup(data->mag_indio_dev);
+diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+index db45f1fc0b817..a778aceba3b10 100644
+--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
++++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+@@ -1279,6 +1279,8 @@ st_lsm6dsx_set_odr(struct st_lsm6dsx_sensor *sensor, u32 req_odr)
+ int err;
+
+ switch (sensor->id) {
++ case ST_LSM6DSX_ID_GYRO:
++ break;
+ case ST_LSM6DSX_ID_EXT0:
+ case ST_LSM6DSX_ID_EXT1:
+ case ST_LSM6DSX_ID_EXT2:
+@@ -1304,8 +1306,8 @@ st_lsm6dsx_set_odr(struct st_lsm6dsx_sensor *sensor, u32 req_odr)
+ }
+ break;
+ }
+- default:
+- break;
++ default: /* should never occur */
++ return -EINVAL;
+ }
+
+ if (req_odr > 0) {
+@@ -1370,8 +1372,12 @@ static int st_lsm6dsx_read_oneshot(struct st_lsm6dsx_sensor *sensor,
+ if (err < 0)
+ return err;
+
++ /*
++ * we need to wait for sensor settling time before
++ * reading data in order to avoid corrupted samples
++ */
+ delay = 1000000000 / sensor->odr;
+- usleep_range(delay, 2 * delay);
++ usleep_range(3 * delay, 4 * delay);
+
+ err = st_lsm6dsx_read_locked(hw, addr, &data, sizeof(data));
+ if (err < 0)
+diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
+index a95cc2da56be6..c81dbd2f09727 100644
+--- a/drivers/iio/industrialio-buffer.c
++++ b/drivers/iio/industrialio-buffer.c
+@@ -1312,6 +1312,11 @@ static struct attribute *iio_buffer_wrap_attr(struct iio_buffer *buffer,
+ iio_attr->buffer = buffer;
+ memcpy(&iio_attr->dev_attr, dattr, sizeof(iio_attr->dev_attr));
+ iio_attr->dev_attr.attr.name = kstrdup_const(attr->name, GFP_KERNEL);
++ if (!iio_attr->dev_attr.attr.name) {
++ kfree(iio_attr);
++ return NULL;
++ }
++
+ sysfs_attr_init(&iio_attr->dev_attr.attr);
+
+ list_add(&iio_attr->l, &buffer->buffer_attr_list);
+@@ -1362,10 +1367,10 @@ static int iio_buffer_register_legacy_sysfs_groups(struct iio_dev *indio_dev,
+
+ return 0;
+
+-error_free_buffer_attrs:
+- kfree(iio_dev_opaque->legacy_buffer_group.attrs);
+ error_free_scan_el_attrs:
+ kfree(iio_dev_opaque->legacy_scan_el_group.attrs);
++error_free_buffer_attrs:
++ kfree(iio_dev_opaque->legacy_buffer_group.attrs);
+
+ return ret;
+ }
+@@ -1441,9 +1446,17 @@ static long iio_device_buffer_getfd(struct iio_dev *indio_dev, unsigned long arg
+ }
+
+ if (copy_to_user(ival, &fd, sizeof(fd))) {
+- put_unused_fd(fd);
+- ret = -EFAULT;
+- goto error_free_ib;
++ /*
++ * "Leak" the fd, as there's not much we can do about this
++ * anyway. 'fd' might have been closed already, as
++ * anon_inode_getfd() called fd_install() on it, which made
++ * it reachable by userland.
++ *
++ * Instead of allowing a malicious user to play tricks with
++ * us, rely on the process exit path to do any necessary
++ * cleanup, as in releasing the file, if still needed.
++ */
++ return -EFAULT;
+ }
+
+ return 0;
+@@ -1531,6 +1544,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
+ sizeof(struct attribute *) * buffer_attrcount);
+
+ buffer_attrcount += ARRAY_SIZE(iio_buffer_attrs);
++ buffer->buffer_group.attrs = attr;
+
+ for (i = 0; i < buffer_attrcount; i++) {
+ struct attribute *wrapped;
+@@ -1538,7 +1552,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
+ wrapped = iio_buffer_wrap_attr(buffer, attr[i]);
+ if (!wrapped) {
+ ret = -ENOMEM;
+- goto error_free_scan_mask;
++ goto error_free_buffer_attrs;
+ }
+ attr[i] = wrapped;
+ }
+@@ -1553,8 +1567,6 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
+ goto error_free_buffer_attrs;
+ }
+
+- buffer->buffer_group.attrs = attr;
+-
+ ret = iio_device_register_sysfs_group(indio_dev, &buffer->buffer_group);
+ if (ret)
+ goto error_free_buffer_attr_group_name;
+@@ -1583,8 +1595,12 @@ error_cleanup_dynamic:
+ return ret;
+ }
+
+-static void __iio_buffer_free_sysfs_and_mask(struct iio_buffer *buffer)
++static void __iio_buffer_free_sysfs_and_mask(struct iio_buffer *buffer,
++ struct iio_dev *indio_dev,
++ int index)
+ {
++ if (index == 0)
++ iio_buffer_unregister_legacy_sysfs_groups(indio_dev);
+ bitmap_free(buffer->scan_mask);
+ kfree(buffer->buffer_group.name);
+ kfree(buffer->buffer_group.attrs);
+@@ -1616,7 +1632,7 @@ int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev)
+ buffer = iio_dev_opaque->attached_buffers[i];
+ ret = __iio_buffer_alloc_sysfs_and_mask(buffer, indio_dev, i);
+ if (ret) {
+- unwind_idx = i;
++ unwind_idx = i - 1;
+ goto error_unwind_sysfs_and_mask;
+ }
+ }
+@@ -1638,7 +1654,7 @@ int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev)
+ error_unwind_sysfs_and_mask:
+ for (; unwind_idx >= 0; unwind_idx--) {
+ buffer = iio_dev_opaque->attached_buffers[unwind_idx];
+- __iio_buffer_free_sysfs_and_mask(buffer);
++ __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, unwind_idx);
+ }
+ return ret;
+ }
+@@ -1655,11 +1671,9 @@ void iio_buffers_free_sysfs_and_mask(struct iio_dev *indio_dev)
+ iio_device_ioctl_handler_unregister(iio_dev_opaque->buffer_ioctl_handler);
+ kfree(iio_dev_opaque->buffer_ioctl_handler);
+
+- iio_buffer_unregister_legacy_sysfs_groups(indio_dev);
+-
+ for (i = iio_dev_opaque->attached_buffers_cnt - 1; i >= 0; i--) {
+ buffer = iio_dev_opaque->attached_buffers[i];
+- __iio_buffer_free_sysfs_and_mask(buffer);
++ __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, i);
+ }
+ }
+
+diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
+index 2dbb37e09b8cf..a7f5d432c95d9 100644
+--- a/drivers/iio/industrialio-core.c
++++ b/drivers/iio/industrialio-core.c
+@@ -818,7 +818,23 @@ static ssize_t iio_format_avail_list(char *buf, const int *vals,
+
+ static ssize_t iio_format_avail_range(char *buf, const int *vals, int type)
+ {
+- return iio_format_list(buf, vals, type, 3, "[", "]");
++ int length;
++
++ /*
++ * length refers to the array size , not the number of elements.
++ * The purpose is to print the range [min , step ,max] so length should
++ * be 3 in case of int, and 6 for other types.
++ */
++ switch (type) {
++ case IIO_VAL_INT:
++ length = 3;
++ break;
++ default:
++ length = 6;
++ break;
++ }
++
++ return iio_format_list(buf, vals, type, length, "[", "]");
+ }
+
+ static ssize_t iio_read_channel_info_avail(struct device *dev,
+@@ -1600,6 +1616,7 @@ static void iio_device_unregister_sysfs(struct iio_dev *indio_dev)
+ kfree(iio_dev_opaque->chan_attr_group.attrs);
+ iio_dev_opaque->chan_attr_group.attrs = NULL;
+ kfree(iio_dev_opaque->groups);
++ iio_dev_opaque->groups = NULL;
+ }
+
+ static void iio_dev_release(struct device *device)
+@@ -1664,7 +1681,13 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv)
+ kfree(iio_dev_opaque);
+ return NULL;
+ }
+- dev_set_name(&indio_dev->dev, "iio:device%d", iio_dev_opaque->id);
++
++ if (dev_set_name(&indio_dev->dev, "iio:device%d", iio_dev_opaque->id)) {
++ ida_simple_remove(&iio_ida, iio_dev_opaque->id);
++ kfree(iio_dev_opaque);
++ return NULL;
++ }
++
+ INIT_LIST_HEAD(&iio_dev_opaque->buffer_list);
+ INIT_LIST_HEAD(&iio_dev_opaque->ioctl_handlers);
+
+diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
+index d0732eac0f0ac..07bf47a1a3567 100644
+--- a/drivers/iio/industrialio-event.c
++++ b/drivers/iio/industrialio-event.c
+@@ -549,7 +549,7 @@ int iio_device_register_eventset(struct iio_dev *indio_dev)
+
+ ret = iio_device_register_sysfs_group(indio_dev, &ev_int->group);
+ if (ret)
+- goto error_free_setup_event_lines;
++ goto error_free_group_attrs;
+
+ ev_int->ioctl_handler.ioctl = iio_event_ioctl;
+ iio_device_ioctl_handler_register(&iio_dev_opaque->indio_dev,
+@@ -557,6 +557,8 @@ int iio_device_register_eventset(struct iio_dev *indio_dev)
+
+ return 0;
+
++error_free_group_attrs:
++ kfree(ev_int->group.attrs);
+ error_free_setup_event_lines:
+ iio_free_chan_devattr_list(&ev_int->dev_attr_list);
+ kfree(ev_int);
+diff --git a/drivers/iio/industrialio-sw-trigger.c b/drivers/iio/industrialio-sw-trigger.c
+index 9ae793a70b8bf..a7714d32a6418 100644
+--- a/drivers/iio/industrialio-sw-trigger.c
++++ b/drivers/iio/industrialio-sw-trigger.c
+@@ -58,8 +58,12 @@ int iio_register_sw_trigger_type(struct iio_sw_trigger_type *t)
+
+ t->group = configfs_register_default_group(iio_triggers_group, t->name,
+ &iio_trigger_type_group_type);
+- if (IS_ERR(t->group))
++ if (IS_ERR(t->group)) {
++ mutex_lock(&iio_trigger_types_lock);
++ list_del(&t->list);
++ mutex_unlock(&iio_trigger_types_lock);
+ ret = PTR_ERR(t->group);
++ }
+
+ return ret;
+ }
+diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
+index b23caa2f2aa1f..f504ed351b3e2 100644
+--- a/drivers/iio/industrialio-trigger.c
++++ b/drivers/iio/industrialio-trigger.c
+@@ -162,6 +162,39 @@ static struct iio_trigger *iio_trigger_acquire_by_name(const char *name)
+ return trig;
+ }
+
++static void iio_reenable_work_fn(struct work_struct *work)
++{
++ struct iio_trigger *trig = container_of(work, struct iio_trigger,
++ reenable_work);
++
++ /*
++ * This 'might' occur after the trigger state is set to disabled -
++ * in that case the driver should skip reenabling.
++ */
++ trig->ops->reenable(trig);
++}
++
++/*
++ * In general, reenable callbacks may need to sleep and this path is
++ * not performance sensitive, so just queue up a work item
++ * to reneable the trigger for us.
++ *
++ * Races that can cause this.
++ * 1) A handler occurs entirely in interrupt context so the counter
++ * the final decrement is still in this interrupt.
++ * 2) The trigger has been removed, but one last interrupt gets through.
++ *
++ * For (1) we must call reenable, but not in atomic context.
++ * For (2) it should be safe to call reenanble, if drivers never blindly
++ * reenable after state is off.
++ */
++static void iio_trigger_notify_done_atomic(struct iio_trigger *trig)
++{
++ if (atomic_dec_and_test(&trig->use_count) && trig->ops &&
++ trig->ops->reenable)
++ schedule_work(&trig->reenable_work);
++}
++
+ void iio_trigger_poll(struct iio_trigger *trig)
+ {
+ int i;
+@@ -173,7 +206,7 @@ void iio_trigger_poll(struct iio_trigger *trig)
+ if (trig->subirqs[i].enabled)
+ generic_handle_irq(trig->subirq_base + i);
+ else
+- iio_trigger_notify_done(trig);
++ iio_trigger_notify_done_atomic(trig);
+ }
+ }
+ }
+@@ -535,6 +568,7 @@ struct iio_trigger *viio_trigger_alloc(struct device *parent,
+ trig->dev.type = &iio_trig_type;
+ trig->dev.bus = &iio_bus_type;
+ device_initialize(&trig->dev);
++ INIT_WORK(&trig->reenable_work, iio_reenable_work_fn);
+
+ mutex_init(&trig->pool_lock);
+ trig->subirq_base = irq_alloc_descs(-1, 0,
+@@ -556,7 +590,6 @@ struct iio_trigger *viio_trigger_alloc(struct device *parent,
+ irq_modify_status(trig->subirq_base + i,
+ IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE);
+ }
+- get_device(&trig->dev);
+
+ return trig;
+
+diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
+index 391a3380a1d10..bf9ce01c854bb 100644
+--- a/drivers/iio/inkern.c
++++ b/drivers/iio/inkern.c
+@@ -148,9 +148,10 @@ static int __of_iio_channel_get(struct iio_channel *channel,
+
+ idev = bus_find_device(&iio_bus_type, NULL, iiospec.np,
+ iio_dev_node_match);
+- of_node_put(iiospec.np);
+- if (idev == NULL)
++ if (idev == NULL) {
++ of_node_put(iiospec.np);
+ return -EPROBE_DEFER;
++ }
+
+ indio_dev = dev_to_iio_dev(idev);
+ channel->indio_dev = indio_dev;
+@@ -158,6 +159,7 @@ static int __of_iio_channel_get(struct iio_channel *channel,
+ index = indio_dev->info->of_xlate(indio_dev, &iiospec);
+ else
+ index = __of_iio_simple_xlate(indio_dev, &iiospec);
++ of_node_put(iiospec.np);
+ if (index < 0)
+ goto err_put;
+ channel->channel = &indio_dev->channels[index];
+@@ -393,6 +395,8 @@ struct iio_channel *devm_of_iio_channel_get_by_name(struct device *dev,
+ channel = of_iio_channel_get_by_name(np, channel_name);
+ if (IS_ERR(channel))
+ return channel;
++ if (!channel)
++ return ERR_PTR(-ENODEV);
+
+ ret = devm_add_action_or_reset(dev, devm_iio_channel_free, channel);
+ if (ret)
+@@ -578,28 +582,50 @@ EXPORT_SYMBOL_GPL(iio_read_channel_average_raw);
+ static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan,
+ int raw, int *processed, unsigned int scale)
+ {
+- int scale_type, scale_val, scale_val2, offset;
++ int scale_type, scale_val, scale_val2;
++ int offset_type, offset_val, offset_val2;
+ s64 raw64 = raw;
+- int ret;
+
+- ret = iio_channel_read(chan, &offset, NULL, IIO_CHAN_INFO_OFFSET);
+- if (ret >= 0)
+- raw64 += offset;
++ offset_type = iio_channel_read(chan, &offset_val, &offset_val2,
++ IIO_CHAN_INFO_OFFSET);
++ if (offset_type >= 0) {
++ switch (offset_type) {
++ case IIO_VAL_INT:
++ break;
++ case IIO_VAL_INT_PLUS_MICRO:
++ case IIO_VAL_INT_PLUS_NANO:
++ /*
++ * Both IIO_VAL_INT_PLUS_MICRO and IIO_VAL_INT_PLUS_NANO
++ * implicitely truncate the offset to it's integer form.
++ */
++ break;
++ case IIO_VAL_FRACTIONAL:
++ offset_val /= offset_val2;
++ break;
++ case IIO_VAL_FRACTIONAL_LOG2:
++ offset_val >>= offset_val2;
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ raw64 += offset_val;
++ }
+
+ scale_type = iio_channel_read(chan, &scale_val, &scale_val2,
+ IIO_CHAN_INFO_SCALE);
+ if (scale_type < 0) {
+ /*
+- * Just pass raw values as processed if no scaling is
+- * available.
++ * If no channel scaling is available apply consumer scale to
++ * raw value and return.
+ */
+- *processed = raw;
++ *processed = raw * scale;
+ return 0;
+ }
+
+ switch (scale_type) {
+ case IIO_VAL_INT:
+- *processed = raw64 * scale_val;
++ *processed = raw64 * scale_val * scale;
+ break;
+ case IIO_VAL_INT_PLUS_MICRO:
+ if (scale_val2 < 0)
+diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
+index a62c7b4b86784..b46eac71941c9 100644
+--- a/drivers/iio/light/Kconfig
++++ b/drivers/iio/light/Kconfig
+@@ -294,6 +294,8 @@ config RPR0521
+ tristate "ROHM RPR0521 ALS and proximity sensor driver"
+ depends on I2C
+ select REGMAP_I2C
++ select IIO_BUFFER
++ select IIO_TRIGGERED_BUFFER
+ help
+ Say Y here if you want to build support for ROHM's RPR0521
+ ambient light and proximity sensor device.
+diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
+index 4141c0fa7bc46..7c4353317337e 100644
+--- a/drivers/iio/light/apds9960.c
++++ b/drivers/iio/light/apds9960.c
+@@ -54,9 +54,6 @@
+ #define APDS9960_REG_CONTROL_PGAIN_MASK_SHIFT 2
+
+ #define APDS9960_REG_CONFIG_2 0x90
+-#define APDS9960_REG_CONFIG_2_GGAIN_MASK 0x60
+-#define APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT 5
+-
+ #define APDS9960_REG_ID 0x92
+
+ #define APDS9960_REG_STATUS 0x93
+@@ -77,6 +74,9 @@
+ #define APDS9960_REG_GCONF_1_GFIFO_THRES_MASK_SHIFT 6
+
+ #define APDS9960_REG_GCONF_2 0xa3
++#define APDS9960_REG_GCONF_2_GGAIN_MASK 0x60
++#define APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT 5
++
+ #define APDS9960_REG_GOFFSET_U 0xa4
+ #define APDS9960_REG_GOFFSET_D 0xa5
+ #define APDS9960_REG_GPULSE 0xa6
+@@ -396,9 +396,9 @@ static int apds9960_set_pxs_gain(struct apds9960_data *data, int val)
+ }
+
+ ret = regmap_update_bits(data->regmap,
+- APDS9960_REG_CONFIG_2,
+- APDS9960_REG_CONFIG_2_GGAIN_MASK,
+- idx << APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT);
++ APDS9960_REG_GCONF_2,
++ APDS9960_REG_GCONF_2_GGAIN_MASK,
++ idx << APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT);
+ if (!ret)
+ data->pxs_gain = idx;
+ mutex_unlock(&data->lock);
+diff --git a/drivers/iio/light/cm32181.c b/drivers/iio/light/cm32181.c
+index 97649944f1df6..c14a630dd683b 100644
+--- a/drivers/iio/light/cm32181.c
++++ b/drivers/iio/light/cm32181.c
+@@ -429,6 +429,14 @@ static const struct iio_info cm32181_info = {
+ .attrs = &cm32181_attribute_group,
+ };
+
++static void cm32181_unregister_dummy_client(void *data)
++{
++ struct i2c_client *client = data;
++
++ /* Unregister the dummy client */
++ i2c_unregister_device(client);
++}
++
+ static int cm32181_probe(struct i2c_client *client)
+ {
+ struct device *dev = &client->dev;
+@@ -458,6 +466,10 @@ static int cm32181_probe(struct i2c_client *client)
+ client = i2c_acpi_new_device(dev, 1, &board_info);
+ if (IS_ERR(client))
+ return PTR_ERR(client);
++
++ ret = devm_add_action_or_reset(dev, cm32181_unregister_dummy_client, client);
++ if (ret)
++ return ret;
+ }
+
+ cm32181 = iio_priv(indio_dev);
+diff --git a/drivers/iio/light/cros_ec_light_prox.c b/drivers/iio/light/cros_ec_light_prox.c
+index de472f23d1cba..16b893bae3881 100644
+--- a/drivers/iio/light/cros_ec_light_prox.c
++++ b/drivers/iio/light/cros_ec_light_prox.c
+@@ -181,8 +181,7 @@ static int cros_ec_light_prox_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ ret = cros_ec_sensors_core_init(pdev, indio_dev, true,
+- cros_ec_sensors_capture,
+- cros_ec_sensors_push_data);
++ cros_ec_sensors_capture);
+ if (ret)
+ return ret;
+
+@@ -240,7 +239,8 @@ static int cros_ec_light_prox_probe(struct platform_device *pdev)
+
+ state->core.read_ec_sensors_data = cros_ec_sensors_read_cmd;
+
+- return devm_iio_device_register(dev, indio_dev);
++ return cros_ec_sensors_core_register(dev, indio_dev,
++ cros_ec_sensors_push_data);
+ }
+
+ static const struct platform_device_id cros_ec_light_prox_ids[] = {
+diff --git a/drivers/iio/light/isl29028.c b/drivers/iio/light/isl29028.c
+index 9de3262aa6883..a62787f5d5e7b 100644
+--- a/drivers/iio/light/isl29028.c
++++ b/drivers/iio/light/isl29028.c
+@@ -625,7 +625,7 @@ static int isl29028_probe(struct i2c_client *client,
+ ISL29028_POWER_OFF_DELAY_MS);
+ pm_runtime_use_autosuspend(&client->dev);
+
+- ret = devm_iio_device_register(indio_dev->dev.parent, indio_dev);
++ ret = iio_device_register(indio_dev);
+ if (ret < 0) {
+ dev_err(&client->dev,
+ "%s(): iio registration failed with error %d\n",
+diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
+index 1830221da48d2..f0bd0ad34f222 100644
+--- a/drivers/iio/light/ltr501.c
++++ b/drivers/iio/light/ltr501.c
+@@ -1273,7 +1273,7 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p)
+ ret = regmap_bulk_read(data->regmap, LTR501_ALS_DATA1,
+ als_buf, sizeof(als_buf));
+ if (ret < 0)
+- return ret;
++ goto done;
+ if (test_bit(0, indio_dev->active_scan_mask))
+ scan.channels[j++] = le16_to_cpu(als_buf[1]);
+ if (test_bit(1, indio_dev->active_scan_mask))
+diff --git a/drivers/iio/light/max44009.c b/drivers/iio/light/max44009.c
+index 801e5a0ad496b..f3648f20ef2c0 100644
+--- a/drivers/iio/light/max44009.c
++++ b/drivers/iio/light/max44009.c
+@@ -528,6 +528,12 @@ static int max44009_probe(struct i2c_client *client,
+ return devm_iio_device_register(&client->dev, indio_dev);
+ }
+
++static const struct of_device_id max44009_of_match[] = {
++ { .compatible = "maxim,max44009" },
++ { }
++};
++MODULE_DEVICE_TABLE(of, max44009_of_match);
++
+ static const struct i2c_device_id max44009_id[] = {
+ { "max44009", 0 },
+ { }
+@@ -537,18 +543,13 @@ MODULE_DEVICE_TABLE(i2c, max44009_id);
+ static struct i2c_driver max44009_driver = {
+ .driver = {
+ .name = MAX44009_DRV_NAME,
++ .of_match_table = max44009_of_match,
+ },
+ .probe = max44009_probe,
+ .id_table = max44009_id,
+ };
+ module_i2c_driver(max44009_driver);
+
+-static const struct of_device_id max44009_of_match[] = {
+- { .compatible = "maxim,max44009" },
+- { }
+-};
+-MODULE_DEVICE_TABLE(of, max44009_of_match);
+-
+ MODULE_AUTHOR("Robert Eshleman <bobbyeshleman@gmail.com>");
+ MODULE_LICENSE("GPL v2");
+ MODULE_DESCRIPTION("MAX44009 ambient light sensor driver");
+diff --git a/drivers/iio/light/stk3310.c b/drivers/iio/light/stk3310.c
+index 07e91846307c7..fc63856ed54de 100644
+--- a/drivers/iio/light/stk3310.c
++++ b/drivers/iio/light/stk3310.c
+@@ -546,9 +546,8 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private)
+ mutex_lock(&data->lock);
+ ret = regmap_field_read(data->reg_flag_nf, &dir);
+ if (ret < 0) {
+- dev_err(&data->client->dev, "register read failed\n");
+- mutex_unlock(&data->lock);
+- return ret;
++ dev_err(&data->client->dev, "register read failed: %d\n", ret);
++ goto out;
+ }
+ event = IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 1,
+ IIO_EV_TYPE_THRESH,
+@@ -560,6 +559,7 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private)
+ ret = regmap_field_write(data->reg_flag_psint, 0);
+ if (ret < 0)
+ dev_err(&data->client->dev, "failed to reset interrupts\n");
++out:
+ mutex_unlock(&data->lock);
+
+ return IRQ_HANDLED;
+diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
+index 5bf2bfbc5379e..af616352fe715 100644
+--- a/drivers/iio/light/tsl2563.c
++++ b/drivers/iio/light/tsl2563.c
+@@ -705,6 +705,7 @@ static int tsl2563_probe(struct i2c_client *client,
+ struct iio_dev *indio_dev;
+ struct tsl2563_chip *chip;
+ struct tsl2563_platform_data *pdata = client->dev.platform_data;
++ unsigned long irq_flags;
+ int err = 0;
+ u8 id = 0;
+
+@@ -760,10 +761,15 @@ static int tsl2563_probe(struct i2c_client *client,
+ indio_dev->info = &tsl2563_info_no_irq;
+
+ if (client->irq) {
++ irq_flags = irq_get_trigger_type(client->irq);
++ if (irq_flags == IRQF_TRIGGER_NONE)
++ irq_flags = IRQF_TRIGGER_RISING;
++ irq_flags |= IRQF_ONESHOT;
++
+ err = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL,
+ &tsl2563_event_handler,
+- IRQF_TRIGGER_RISING | IRQF_ONESHOT,
++ irq_flags,
+ "tsl2563_event",
+ indio_dev);
+ if (err) {
+diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c
+index 7e101d5f72eea..d696d19e2e8e9 100644
+--- a/drivers/iio/light/tsl2583.c
++++ b/drivers/iio/light/tsl2583.c
+@@ -858,7 +858,7 @@ static int tsl2583_probe(struct i2c_client *clientp,
+ TSL2583_POWER_OFF_DELAY_MS);
+ pm_runtime_use_autosuspend(&clientp->dev);
+
+- ret = devm_iio_device_register(indio_dev->dev.parent, indio_dev);
++ ret = iio_device_register(indio_dev);
+ if (ret) {
+ dev_err(&clientp->dev, "%s: iio registration failed\n",
+ __func__);
+diff --git a/drivers/iio/light/tsl2772.c b/drivers/iio/light/tsl2772.c
+index d79205361dfac..ff33ad3714206 100644
+--- a/drivers/iio/light/tsl2772.c
++++ b/drivers/iio/light/tsl2772.c
+@@ -606,6 +606,7 @@ static int tsl2772_read_prox_diodes(struct tsl2772_chip *chip)
+ return -EINVAL;
+ }
+ }
++ chip->settings.prox_diode = prox_diode_mask;
+
+ return 0;
+ }
+diff --git a/drivers/iio/light/vcnl4035.c b/drivers/iio/light/vcnl4035.c
+index 0db306ee910e0..2c439610ddb91 100644
+--- a/drivers/iio/light/vcnl4035.c
++++ b/drivers/iio/light/vcnl4035.c
+@@ -8,6 +8,7 @@
+ * TODO: Proximity
+ */
+ #include <linux/bitops.h>
++#include <linux/bitfield.h>
+ #include <linux/i2c.h>
+ #include <linux/module.h>
+ #include <linux/pm_runtime.h>
+@@ -42,6 +43,7 @@
+ #define VCNL4035_ALS_PERS_MASK GENMASK(3, 2)
+ #define VCNL4035_INT_ALS_IF_H_MASK BIT(12)
+ #define VCNL4035_INT_ALS_IF_L_MASK BIT(13)
++#define VCNL4035_DEV_ID_MASK GENMASK(7, 0)
+
+ /* Default values */
+ #define VCNL4035_MODE_ALS_ENABLE BIT(0)
+@@ -413,6 +415,7 @@ static int vcnl4035_init(struct vcnl4035_data *data)
+ return ret;
+ }
+
++ id = FIELD_GET(VCNL4035_DEV_ID_MASK, id);
+ if (id != VCNL4035_DEV_ID_VAL) {
+ dev_err(&data->client->dev, "Wrong id, got %x, expected %x\n",
+ id, VCNL4035_DEV_ID_VAL);
+diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
+index 42b8a2680e3aa..1509fd0cbb50f 100644
+--- a/drivers/iio/magnetometer/ak8975.c
++++ b/drivers/iio/magnetometer/ak8975.c
+@@ -389,6 +389,7 @@ static int ak8975_power_on(const struct ak8975_data *data)
+ if (ret) {
+ dev_warn(&data->client->dev,
+ "Failed to enable specified Vid supply\n");
++ regulator_disable(data->vdd);
+ return ret;
+ }
+
+diff --git a/drivers/iio/magnetometer/bmc150_magn.c b/drivers/iio/magnetometer/bmc150_magn.c
+index f96f531753495..3d4d21f979fab 100644
+--- a/drivers/iio/magnetometer/bmc150_magn.c
++++ b/drivers/iio/magnetometer/bmc150_magn.c
+@@ -962,13 +962,14 @@ int bmc150_magn_probe(struct device *dev, struct regmap *regmap,
+ ret = iio_device_register(indio_dev);
+ if (ret < 0) {
+ dev_err(dev, "unable to register iio device\n");
+- goto err_disable_runtime_pm;
++ goto err_pm_cleanup;
+ }
+
+ dev_dbg(dev, "Registered device %s\n", name);
+ return 0;
+
+-err_disable_runtime_pm:
++err_pm_cleanup:
++ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
+ err_buffer_cleanup:
+ iio_triggered_buffer_cleanup(indio_dev);
+diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c
+index 2dfe4ee99591b..fa78f0a3b53ea 100644
+--- a/drivers/iio/magnetometer/st_magn_i2c.c
++++ b/drivers/iio/magnetometer/st_magn_i2c.c
+@@ -102,10 +102,10 @@ static int st_magn_i2c_remove(struct i2c_client *client)
+ {
+ struct iio_dev *indio_dev = i2c_get_clientdata(client);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_magn_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c
+index fba9787963952..ff43cbf61b056 100644
+--- a/drivers/iio/magnetometer/st_magn_spi.c
++++ b/drivers/iio/magnetometer/st_magn_spi.c
+@@ -96,10 +96,10 @@ static int st_magn_spi_remove(struct spi_device *spi)
+ {
+ struct iio_dev *indio_dev = spi_get_drvdata(spi);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_magn_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/iio/magnetometer/yamaha-yas530.c b/drivers/iio/magnetometer/yamaha-yas530.c
+index 9ff7b0e56cf67..40192aa46b048 100644
+--- a/drivers/iio/magnetometer/yamaha-yas530.c
++++ b/drivers/iio/magnetometer/yamaha-yas530.c
+@@ -132,7 +132,7 @@ struct yas5xx {
+ unsigned int version;
+ char name[16];
+ struct yas5xx_calibration calibration;
+- u8 hard_offsets[3];
++ s8 hard_offsets[3];
+ struct iio_mount_matrix orientation;
+ struct regmap *map;
+ struct regulator_bulk_data regs[2];
+@@ -639,7 +639,7 @@ static int yas532_get_calibration_data(struct yas5xx *yas5xx)
+ dev_dbg(yas5xx->dev, "calibration data: %*ph\n", 14, data);
+
+ /* Sanity check, is this all zeroes? */
+- if (memchr_inv(data, 0x00, 13)) {
++ if (memchr_inv(data, 0x00, 13) == NULL) {
+ if (!(data[13] & BIT(7)))
+ dev_warn(yas5xx->dev, "calibration is blank!\n");
+ }
+diff --git a/drivers/iio/pressure/cros_ec_baro.c b/drivers/iio/pressure/cros_ec_baro.c
+index 2f882e1094232..0511edbf868d7 100644
+--- a/drivers/iio/pressure/cros_ec_baro.c
++++ b/drivers/iio/pressure/cros_ec_baro.c
+@@ -138,8 +138,7 @@ static int cros_ec_baro_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ ret = cros_ec_sensors_core_init(pdev, indio_dev, true,
+- cros_ec_sensors_capture,
+- cros_ec_sensors_push_data);
++ cros_ec_sensors_capture);
+ if (ret)
+ return ret;
+
+@@ -186,7 +185,8 @@ static int cros_ec_baro_probe(struct platform_device *pdev)
+
+ state->core.read_ec_sensors_data = cros_ec_sensors_read_cmd;
+
+- return devm_iio_device_register(dev, indio_dev);
++ return cros_ec_sensors_core_register(dev, indio_dev,
++ cros_ec_sensors_push_data);
+ }
+
+ static const struct platform_device_id cros_ec_baro_ids[] = {
+diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c
+index 0730380ceb692..cf8b92fae1b3d 100644
+--- a/drivers/iio/pressure/dps310.c
++++ b/drivers/iio/pressure/dps310.c
+@@ -89,6 +89,7 @@ struct dps310_data {
+ s32 c00, c10, c20, c30, c01, c11, c21;
+ s32 pressure_raw;
+ s32 temp_raw;
++ bool timeout_recovery_failed;
+ };
+
+ static const struct iio_chan_spec dps310_channels[] = {
+@@ -159,6 +160,102 @@ static int dps310_get_coefs(struct dps310_data *data)
+ return 0;
+ }
+
++/*
++ * Some versions of the chip will read temperatures in the ~60C range when
++ * it's actually ~20C. This is the manufacturer recommended workaround
++ * to correct the issue. The registers used below are undocumented.
++ */
++static int dps310_temp_workaround(struct dps310_data *data)
++{
++ int rc;
++ int reg;
++
++ rc = regmap_read(data->regmap, 0x32, &reg);
++ if (rc)
++ return rc;
++
++ /*
++ * If bit 1 is set then the device is okay, and the workaround does not
++ * need to be applied
++ */
++ if (reg & BIT(1))
++ return 0;
++
++ rc = regmap_write(data->regmap, 0x0e, 0xA5);
++ if (rc)
++ return rc;
++
++ rc = regmap_write(data->regmap, 0x0f, 0x96);
++ if (rc)
++ return rc;
++
++ rc = regmap_write(data->regmap, 0x62, 0x02);
++ if (rc)
++ return rc;
++
++ rc = regmap_write(data->regmap, 0x0e, 0x00);
++ if (rc)
++ return rc;
++
++ return regmap_write(data->regmap, 0x0f, 0x00);
++}
++
++static int dps310_startup(struct dps310_data *data)
++{
++ int rc;
++ int ready;
++
++ /*
++ * Set up pressure sensor in single sample, one measurement per second
++ * mode
++ */
++ rc = regmap_write(data->regmap, DPS310_PRS_CFG, 0);
++ if (rc)
++ return rc;
++
++ /*
++ * Set up external (MEMS) temperature sensor in single sample, one
++ * measurement per second mode
++ */
++ rc = regmap_write(data->regmap, DPS310_TMP_CFG, DPS310_TMP_EXT);
++ if (rc)
++ return rc;
++
++ /* Temp and pressure shifts are disabled when PRC <= 8 */
++ rc = regmap_write_bits(data->regmap, DPS310_CFG_REG,
++ DPS310_PRS_SHIFT_EN | DPS310_TMP_SHIFT_EN, 0);
++ if (rc)
++ return rc;
++
++ /* MEAS_CFG doesn't update correctly unless first written with 0 */
++ rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
++ DPS310_MEAS_CTRL_BITS, 0);
++ if (rc)
++ return rc;
++
++ /* Turn on temperature and pressure measurement in the background */
++ rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
++ DPS310_MEAS_CTRL_BITS, DPS310_PRS_EN |
++ DPS310_TEMP_EN | DPS310_BACKGROUND);
++ if (rc)
++ return rc;
++
++ /*
++ * Calibration coefficients required for reporting temperature.
++ * They are available 40ms after the device has started
++ */
++ rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
++ ready & DPS310_COEF_RDY, 10000, 40000);
++ if (rc)
++ return rc;
++
++ rc = dps310_get_coefs(data);
++ if (rc)
++ return rc;
++
++ return dps310_temp_workaround(data);
++}
++
+ static int dps310_get_pres_precision(struct dps310_data *data)
+ {
+ int rc;
+@@ -297,11 +394,69 @@ static int dps310_get_temp_k(struct dps310_data *data)
+ return scale_factors[ilog2(rc)];
+ }
+
++static int dps310_reset_wait(struct dps310_data *data)
++{
++ int rc;
++
++ rc = regmap_write(data->regmap, DPS310_RESET, DPS310_RESET_MAGIC);
++ if (rc)
++ return rc;
++
++ /* Wait for device chip access: 2.5ms in specification */
++ usleep_range(2500, 12000);
++ return 0;
++}
++
++static int dps310_reset_reinit(struct dps310_data *data)
++{
++ int rc;
++
++ rc = dps310_reset_wait(data);
++ if (rc)
++ return rc;
++
++ return dps310_startup(data);
++}
++
++static int dps310_ready_status(struct dps310_data *data, int ready_bit, int timeout)
++{
++ int sleep = DPS310_POLL_SLEEP_US(timeout);
++ int ready;
++
++ return regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready, ready & ready_bit,
++ sleep, timeout);
++}
++
++static int dps310_ready(struct dps310_data *data, int ready_bit, int timeout)
++{
++ int rc;
++
++ rc = dps310_ready_status(data, ready_bit, timeout);
++ if (rc) {
++ if (rc == -ETIMEDOUT && !data->timeout_recovery_failed) {
++ /* Reset and reinitialize the chip. */
++ if (dps310_reset_reinit(data)) {
++ data->timeout_recovery_failed = true;
++ } else {
++ /* Try again to get sensor ready status. */
++ if (dps310_ready_status(data, ready_bit, timeout))
++ data->timeout_recovery_failed = true;
++ else
++ return 0;
++ }
++ }
++
++ return rc;
++ }
++
++ data->timeout_recovery_failed = false;
++ return 0;
++}
++
+ static int dps310_read_pres_raw(struct dps310_data *data)
+ {
+ int rc;
+ int rate;
+- int ready;
+ int timeout;
+ s32 raw;
+ u8 val[3];
+@@ -313,9 +468,7 @@ static int dps310_read_pres_raw(struct dps310_data *data)
+ timeout = DPS310_POLL_TIMEOUT_US(rate);
+
+ /* Poll for sensor readiness; base the timeout upon the sample rate. */
+- rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
+- ready & DPS310_PRS_RDY,
+- DPS310_POLL_SLEEP_US(timeout), timeout);
++ rc = dps310_ready(data, DPS310_PRS_RDY, timeout);
+ if (rc)
+ goto done;
+
+@@ -352,7 +505,6 @@ static int dps310_read_temp_raw(struct dps310_data *data)
+ {
+ int rc;
+ int rate;
+- int ready;
+ int timeout;
+
+ if (mutex_lock_interruptible(&data->lock))
+@@ -362,10 +514,8 @@ static int dps310_read_temp_raw(struct dps310_data *data)
+ timeout = DPS310_POLL_TIMEOUT_US(rate);
+
+ /* Poll for sensor readiness; base the timeout upon the sample rate. */
+- rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
+- ready & DPS310_TMP_RDY,
+- DPS310_POLL_SLEEP_US(timeout), timeout);
+- if (rc < 0)
++ rc = dps310_ready(data, DPS310_TMP_RDY, timeout);
++ if (rc)
+ goto done;
+
+ rc = dps310_read_temp_ready(data);
+@@ -660,7 +810,7 @@ static void dps310_reset(void *action_data)
+ {
+ struct dps310_data *data = action_data;
+
+- regmap_write(data->regmap, DPS310_RESET, DPS310_RESET_MAGIC);
++ dps310_reset_wait(data);
+ }
+
+ static const struct regmap_config dps310_regmap_config = {
+@@ -677,52 +827,12 @@ static const struct iio_info dps310_info = {
+ .write_raw = dps310_write_raw,
+ };
+
+-/*
+- * Some verions of chip will read temperatures in the ~60C range when
+- * its actually ~20C. This is the manufacturer recommended workaround
+- * to correct the issue. The registers used below are undocumented.
+- */
+-static int dps310_temp_workaround(struct dps310_data *data)
+-{
+- int rc;
+- int reg;
+-
+- rc = regmap_read(data->regmap, 0x32, &reg);
+- if (rc < 0)
+- return rc;
+-
+- /*
+- * If bit 1 is set then the device is okay, and the workaround does not
+- * need to be applied
+- */
+- if (reg & BIT(1))
+- return 0;
+-
+- rc = regmap_write(data->regmap, 0x0e, 0xA5);
+- if (rc < 0)
+- return rc;
+-
+- rc = regmap_write(data->regmap, 0x0f, 0x96);
+- if (rc < 0)
+- return rc;
+-
+- rc = regmap_write(data->regmap, 0x62, 0x02);
+- if (rc < 0)
+- return rc;
+-
+- rc = regmap_write(data->regmap, 0x0e, 0x00);
+- if (rc < 0)
+- return rc;
+-
+- return regmap_write(data->regmap, 0x0f, 0x00);
+-}
+-
+ static int dps310_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+ {
+ struct dps310_data *data;
+ struct iio_dev *iio;
+- int rc, ready;
++ int rc;
+
+ iio = devm_iio_device_alloc(&client->dev, sizeof(*data));
+ if (!iio)
+@@ -747,54 +857,8 @@ static int dps310_probe(struct i2c_client *client,
+ if (rc)
+ return rc;
+
+- /*
+- * Set up pressure sensor in single sample, one measurement per second
+- * mode
+- */
+- rc = regmap_write(data->regmap, DPS310_PRS_CFG, 0);
+-
+- /*
+- * Set up external (MEMS) temperature sensor in single sample, one
+- * measurement per second mode
+- */
+- rc = regmap_write(data->regmap, DPS310_TMP_CFG, DPS310_TMP_EXT);
+- if (rc < 0)
+- return rc;
+-
+- /* Temp and pressure shifts are disabled when PRC <= 8 */
+- rc = regmap_write_bits(data->regmap, DPS310_CFG_REG,
+- DPS310_PRS_SHIFT_EN | DPS310_TMP_SHIFT_EN, 0);
+- if (rc < 0)
+- return rc;
+-
+- /* MEAS_CFG doesn't update correctly unless first written with 0 */
+- rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
+- DPS310_MEAS_CTRL_BITS, 0);
+- if (rc < 0)
+- return rc;
+-
+- /* Turn on temperature and pressure measurement in the background */
+- rc = regmap_write_bits(data->regmap, DPS310_MEAS_CFG,
+- DPS310_MEAS_CTRL_BITS, DPS310_PRS_EN |
+- DPS310_TEMP_EN | DPS310_BACKGROUND);
+- if (rc < 0)
+- return rc;
+-
+- /*
+- * Calibration coefficients required for reporting temperature.
+- * They are available 40ms after the device has started
+- */
+- rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
+- ready & DPS310_COEF_RDY, 10000, 40000);
+- if (rc < 0)
+- return rc;
+-
+- rc = dps310_get_coefs(data);
+- if (rc < 0)
+- return rc;
+-
+- rc = dps310_temp_workaround(data);
+- if (rc < 0)
++ rc = dps310_startup(data);
++ if (rc)
+ return rc;
+
+ rc = devm_iio_device_register(&client->dev, iio);
+diff --git a/drivers/iio/pressure/ms5611.h b/drivers/iio/pressure/ms5611.h
+index bc06271fa38bc..5e2d2d4d87b56 100644
+--- a/drivers/iio/pressure/ms5611.h
++++ b/drivers/iio/pressure/ms5611.h
+@@ -25,13 +25,6 @@ enum {
+ MS5607,
+ };
+
+-struct ms5611_chip_info {
+- u16 prom[MS5611_PROM_WORDS_NB];
+-
+- int (*temp_and_pressure_compensate)(struct ms5611_chip_info *chip_info,
+- s32 *temp, s32 *pressure);
+-};
+-
+ /*
+ * OverSampling Rate descriptor.
+ * Warning: cmd MUST be kept aligned on a word boundary (see
+@@ -50,12 +43,15 @@ struct ms5611_state {
+ const struct ms5611_osr *pressure_osr;
+ const struct ms5611_osr *temp_osr;
+
+- int (*reset)(struct device *dev);
+- int (*read_prom_word)(struct device *dev, int index, u16 *word);
+- int (*read_adc_temp_and_pressure)(struct device *dev,
++ u16 prom[MS5611_PROM_WORDS_NB];
++
++ int (*reset)(struct ms5611_state *st);
++ int (*read_prom_word)(struct ms5611_state *st, int index, u16 *word);
++ int (*read_adc_temp_and_pressure)(struct ms5611_state *st,
+ s32 *temp, s32 *pressure);
+
+- struct ms5611_chip_info *chip_info;
++ int (*compensate_temp_and_pressure)(struct ms5611_state *st, s32 *temp,
++ s32 *pressure);
+ struct regulator *vdd;
+ };
+
+diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c
+index 214b0d25f5980..874a73b3ea9d6 100644
+--- a/drivers/iio/pressure/ms5611_core.c
++++ b/drivers/iio/pressure/ms5611_core.c
+@@ -85,8 +85,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev)
+ struct ms5611_state *st = iio_priv(indio_dev);
+
+ for (i = 0; i < MS5611_PROM_WORDS_NB; i++) {
+- ret = st->read_prom_word(&indio_dev->dev,
+- i, &st->chip_info->prom[i]);
++ ret = st->read_prom_word(st, i, &st->prom[i]);
+ if (ret < 0) {
+ dev_err(&indio_dev->dev,
+ "failed to read prom at %d\n", i);
+@@ -94,7 +93,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev)
+ }
+ }
+
+- if (!ms5611_prom_is_valid(st->chip_info->prom, MS5611_PROM_WORDS_NB)) {
++ if (!ms5611_prom_is_valid(st->prom, MS5611_PROM_WORDS_NB)) {
+ dev_err(&indio_dev->dev, "PROM integrity check failed\n");
+ return -ENODEV;
+ }
+@@ -108,28 +107,27 @@ static int ms5611_read_temp_and_pressure(struct iio_dev *indio_dev,
+ int ret;
+ struct ms5611_state *st = iio_priv(indio_dev);
+
+- ret = st->read_adc_temp_and_pressure(&indio_dev->dev, temp, pressure);
++ ret = st->read_adc_temp_and_pressure(st, temp, pressure);
+ if (ret < 0) {
+ dev_err(&indio_dev->dev,
+ "failed to read temperature and pressure\n");
+ return ret;
+ }
+
+- return st->chip_info->temp_and_pressure_compensate(st->chip_info,
+- temp, pressure);
++ return st->compensate_temp_and_pressure(st, temp, pressure);
+ }
+
+-static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info,
++static int ms5611_temp_and_pressure_compensate(struct ms5611_state *st,
+ s32 *temp, s32 *pressure)
+ {
+ s32 t = *temp, p = *pressure;
+ s64 off, sens, dt;
+
+- dt = t - (chip_info->prom[5] << 8);
+- off = ((s64)chip_info->prom[2] << 16) + ((chip_info->prom[4] * dt) >> 7);
+- sens = ((s64)chip_info->prom[1] << 15) + ((chip_info->prom[3] * dt) >> 8);
++ dt = t - (st->prom[5] << 8);
++ off = ((s64)st->prom[2] << 16) + ((st->prom[4] * dt) >> 7);
++ sens = ((s64)st->prom[1] << 15) + ((st->prom[3] * dt) >> 8);
+
+- t = 2000 + ((chip_info->prom[6] * dt) >> 23);
++ t = 2000 + ((st->prom[6] * dt) >> 23);
+ if (t < 2000) {
+ s64 off2, sens2, t2;
+
+@@ -155,17 +153,17 @@ static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_inf
+ return 0;
+ }
+
+-static int ms5607_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info,
++static int ms5607_temp_and_pressure_compensate(struct ms5611_state *st,
+ s32 *temp, s32 *pressure)
+ {
+ s32 t = *temp, p = *pressure;
+ s64 off, sens, dt;
+
+- dt = t - (chip_info->prom[5] << 8);
+- off = ((s64)chip_info->prom[2] << 17) + ((chip_info->prom[4] * dt) >> 6);
+- sens = ((s64)chip_info->prom[1] << 16) + ((chip_info->prom[3] * dt) >> 7);
++ dt = t - (st->prom[5] << 8);
++ off = ((s64)st->prom[2] << 17) + ((st->prom[4] * dt) >> 6);
++ sens = ((s64)st->prom[1] << 16) + ((st->prom[3] * dt) >> 7);
+
+- t = 2000 + ((chip_info->prom[6] * dt) >> 23);
++ t = 2000 + ((st->prom[6] * dt) >> 23);
+ if (t < 2000) {
+ s64 off2, sens2, t2, tmp;
+
+@@ -196,7 +194,7 @@ static int ms5611_reset(struct iio_dev *indio_dev)
+ int ret;
+ struct ms5611_state *st = iio_priv(indio_dev);
+
+- ret = st->reset(&indio_dev->dev);
++ ret = st->reset(st);
+ if (ret < 0) {
+ dev_err(&indio_dev->dev, "failed to reset device\n");
+ return ret;
+@@ -343,15 +341,6 @@ static int ms5611_write_raw(struct iio_dev *indio_dev,
+
+ static const unsigned long ms5611_scan_masks[] = {0x3, 0};
+
+-static struct ms5611_chip_info chip_info_tbl[] = {
+- [MS5611] = {
+- .temp_and_pressure_compensate = ms5611_temp_and_pressure_compensate,
+- },
+- [MS5607] = {
+- .temp_and_pressure_compensate = ms5607_temp_and_pressure_compensate,
+- }
+-};
+-
+ static const struct iio_chan_spec ms5611_channels[] = {
+ {
+ .type = IIO_PRESSURE,
+@@ -434,7 +423,20 @@ int ms5611_probe(struct iio_dev *indio_dev, struct device *dev,
+ struct ms5611_state *st = iio_priv(indio_dev);
+
+ mutex_init(&st->lock);
+- st->chip_info = &chip_info_tbl[type];
++
++ switch (type) {
++ case MS5611:
++ st->compensate_temp_and_pressure =
++ ms5611_temp_and_pressure_compensate;
++ break;
++ case MS5607:
++ st->compensate_temp_and_pressure =
++ ms5607_temp_and_pressure_compensate;
++ break;
++ default:
++ return -EINVAL;
++ }
++
+ st->temp_osr =
+ &ms5611_avail_temp_osr[ARRAY_SIZE(ms5611_avail_temp_osr) - 1];
+ st->pressure_osr =
+diff --git a/drivers/iio/pressure/ms5611_i2c.c b/drivers/iio/pressure/ms5611_i2c.c
+index 7c04f730430c7..cccc40f7df0b9 100644
+--- a/drivers/iio/pressure/ms5611_i2c.c
++++ b/drivers/iio/pressure/ms5611_i2c.c
+@@ -20,17 +20,15 @@
+
+ #include "ms5611.h"
+
+-static int ms5611_i2c_reset(struct device *dev)
++static int ms5611_i2c_reset(struct ms5611_state *st)
+ {
+- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
+-
+ return i2c_smbus_write_byte(st->client, MS5611_RESET);
+ }
+
+-static int ms5611_i2c_read_prom_word(struct device *dev, int index, u16 *word)
++static int ms5611_i2c_read_prom_word(struct ms5611_state *st, int index,
++ u16 *word)
+ {
+ int ret;
+- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
+
+ ret = i2c_smbus_read_word_swapped(st->client,
+ MS5611_READ_PROM_WORD + (index << 1));
+@@ -57,11 +55,10 @@ static int ms5611_i2c_read_adc(struct ms5611_state *st, s32 *val)
+ return 0;
+ }
+
+-static int ms5611_i2c_read_adc_temp_and_pressure(struct device *dev,
++static int ms5611_i2c_read_adc_temp_and_pressure(struct ms5611_state *st,
+ s32 *temp, s32 *pressure)
+ {
+ int ret;
+- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
+ const struct ms5611_osr *osr = st->temp_osr;
+
+ ret = i2c_smbus_write_byte(st->client, osr->cmd);
+diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c
+index 45d3a7d5be8e4..3039fe8aa2a2d 100644
+--- a/drivers/iio/pressure/ms5611_spi.c
++++ b/drivers/iio/pressure/ms5611_spi.c
+@@ -15,18 +15,17 @@
+
+ #include "ms5611.h"
+
+-static int ms5611_spi_reset(struct device *dev)
++static int ms5611_spi_reset(struct ms5611_state *st)
+ {
+ u8 cmd = MS5611_RESET;
+- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
+
+ return spi_write_then_read(st->client, &cmd, 1, NULL, 0);
+ }
+
+-static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word)
++static int ms5611_spi_read_prom_word(struct ms5611_state *st, int index,
++ u16 *word)
+ {
+ int ret;
+- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
+
+ ret = spi_w8r16be(st->client, MS5611_READ_PROM_WORD + (index << 1));
+ if (ret < 0)
+@@ -37,11 +36,10 @@ static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word)
+ return 0;
+ }
+
+-static int ms5611_spi_read_adc(struct device *dev, s32 *val)
++static int ms5611_spi_read_adc(struct ms5611_state *st, s32 *val)
+ {
+ int ret;
+ u8 buf[3] = { MS5611_READ_ADC };
+- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
+
+ ret = spi_write_then_read(st->client, buf, 1, buf, 3);
+ if (ret < 0)
+@@ -52,11 +50,10 @@ static int ms5611_spi_read_adc(struct device *dev, s32 *val)
+ return 0;
+ }
+
+-static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev,
++static int ms5611_spi_read_adc_temp_and_pressure(struct ms5611_state *st,
+ s32 *temp, s32 *pressure)
+ {
+ int ret;
+- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev));
+ const struct ms5611_osr *osr = st->temp_osr;
+
+ /*
+@@ -68,7 +65,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev,
+ return ret;
+
+ usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL));
+- ret = ms5611_spi_read_adc(dev, temp);
++ ret = ms5611_spi_read_adc(st, temp);
+ if (ret < 0)
+ return ret;
+
+@@ -78,7 +75,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev,
+ return ret;
+
+ usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL));
+- return ms5611_spi_read_adc(dev, pressure);
++ return ms5611_spi_read_adc(st, pressure);
+ }
+
+ static int ms5611_spi_probe(struct spi_device *spi)
+@@ -94,7 +91,7 @@ static int ms5611_spi_probe(struct spi_device *spi)
+ spi_set_drvdata(spi, indio_dev);
+
+ spi->mode = SPI_MODE_0;
+- spi->max_speed_hz = 20000000;
++ spi->max_speed_hz = min(spi->max_speed_hz, 20000000U);
+ spi->bits_per_word = 8;
+ ret = spi_setup(spi);
+ if (ret < 0)
+diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c
+index 52fa98f24478d..6215de677017e 100644
+--- a/drivers/iio/pressure/st_pressure_i2c.c
++++ b/drivers/iio/pressure/st_pressure_i2c.c
+@@ -119,10 +119,10 @@ static int st_press_i2c_remove(struct i2c_client *client)
+ {
+ struct iio_dev *indio_dev = i2c_get_clientdata(client);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_press_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/iio/pressure/st_pressure_spi.c b/drivers/iio/pressure/st_pressure_spi.c
+index ee393df54cee8..e220cf0b125f1 100644
+--- a/drivers/iio/pressure/st_pressure_spi.c
++++ b/drivers/iio/pressure/st_pressure_spi.c
+@@ -102,10 +102,10 @@ static int st_press_spi_remove(struct spi_device *spi)
+ {
+ struct iio_dev *indio_dev = spi_get_drvdata(spi);
+
+- st_sensors_power_disable(indio_dev);
+-
+ st_press_common_remove(indio_dev);
+
++ st_sensors_power_disable(indio_dev);
++
+ return 0;
+ }
+
+@@ -117,6 +117,10 @@ static const struct spi_device_id st_press_id_table[] = {
+ { LPS33HW_PRESS_DEV_NAME },
+ { LPS35HW_PRESS_DEV_NAME },
+ { LPS22HH_PRESS_DEV_NAME },
++ { "lps001wp-press" },
++ { "lps25h-press", },
++ { "lps331ap-press" },
++ { "lps22hb-press" },
+ {},
+ };
+ MODULE_DEVICE_TABLE(spi, st_press_id_table);
+diff --git a/drivers/iio/proximity/vl53l0x-i2c.c b/drivers/iio/proximity/vl53l0x-i2c.c
+index cf38144b6f954..13a87d3e3544f 100644
+--- a/drivers/iio/proximity/vl53l0x-i2c.c
++++ b/drivers/iio/proximity/vl53l0x-i2c.c
+@@ -104,6 +104,7 @@ static int vl53l0x_read_proximity(struct vl53l0x_data *data,
+ u16 tries = 20;
+ u8 buffer[12];
+ int ret;
++ unsigned long time_left;
+
+ ret = i2c_smbus_write_byte_data(client, VL_REG_SYSRANGE_START, 1);
+ if (ret < 0)
+@@ -112,10 +113,8 @@ static int vl53l0x_read_proximity(struct vl53l0x_data *data,
+ if (data->client->irq) {
+ reinit_completion(&data->completion);
+
+- ret = wait_for_completion_timeout(&data->completion, HZ/10);
+- if (ret < 0)
+- return ret;
+- else if (ret == 0)
++ time_left = wait_for_completion_timeout(&data->completion, HZ/10);
++ if (time_left == 0)
+ return -ETIMEDOUT;
+
+ vl53l0x_clear_irq(data);
+diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c
+index 3b4a0e60e6059..b2ae2d2c7eefc 100644
+--- a/drivers/iio/temperature/ltc2983.c
++++ b/drivers/iio/temperature/ltc2983.c
+@@ -205,6 +205,7 @@ struct ltc2983_data {
+ * Holds the converted temperature
+ */
+ __be32 temp ____cacheline_aligned;
++ __be32 chan_val;
+ };
+
+ struct ltc2983_sensor {
+@@ -309,19 +310,18 @@ static int __ltc2983_fault_handler(const struct ltc2983_data *st,
+ return 0;
+ }
+
+-static int __ltc2983_chan_assign_common(const struct ltc2983_data *st,
++static int __ltc2983_chan_assign_common(struct ltc2983_data *st,
+ const struct ltc2983_sensor *sensor,
+ u32 chan_val)
+ {
+ u32 reg = LTC2983_CHAN_START_ADDR(sensor->chan);
+- __be32 __chan_val;
+
+ chan_val |= LTC2983_CHAN_TYPE(sensor->type);
+ dev_dbg(&st->spi->dev, "Assign reg:0x%04X, val:0x%08X\n", reg,
+ chan_val);
+- __chan_val = cpu_to_be32(chan_val);
+- return regmap_bulk_write(st->regmap, reg, &__chan_val,
+- sizeof(__chan_val));
++ st->chan_val = cpu_to_be32(chan_val);
++ return regmap_bulk_write(st->regmap, reg, &st->chan_val,
++ sizeof(st->chan_val));
+ }
+
+ static int __ltc2983_chan_custom_sensor_assign(struct ltc2983_data *st,
+@@ -1376,13 +1376,6 @@ static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio)
+ return ret;
+ }
+
+- st->iio_chan = devm_kzalloc(&st->spi->dev,
+- st->iio_channels * sizeof(*st->iio_chan),
+- GFP_KERNEL);
+-
+- if (!st->iio_chan)
+- return -ENOMEM;
+-
+ ret = regmap_update_bits(st->regmap, LTC2983_GLOBAL_CONFIG_REG,
+ LTC2983_NOTCH_FREQ_MASK,
+ LTC2983_NOTCH_FREQ(st->filter_notch_freq));
+@@ -1494,6 +1487,12 @@ static int ltc2983_probe(struct spi_device *spi)
+ if (ret)
+ return ret;
+
++ st->iio_chan = devm_kzalloc(&spi->dev,
++ st->iio_channels * sizeof(*st->iio_chan),
++ GFP_KERNEL);
++ if (!st->iio_chan)
++ return -ENOMEM;
++
+ ret = ltc2983_setup(st, true);
+ if (ret)
+ return ret;
+diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c
+index e9adfff45b39b..33986e9963a5a 100644
+--- a/drivers/iio/trigger/iio-trig-sysfs.c
++++ b/drivers/iio/trigger/iio-trig-sysfs.c
+@@ -195,6 +195,7 @@ static int iio_sysfs_trigger_remove(int id)
+ }
+
+ iio_trigger_unregister(t->trig);
++ irq_work_sync(&t->work);
+ iio_trigger_free(t->trig);
+
+ list_del(&t->l);
+@@ -207,9 +208,13 @@ static int iio_sysfs_trigger_remove(int id)
+
+ static int __init iio_sysfs_trig_init(void)
+ {
++ int ret;
+ device_initialize(&iio_sysfs_trig_dev);
+ dev_set_name(&iio_sysfs_trig_dev, "iio_sysfs_trigger");
+- return device_add(&iio_sysfs_trig_dev);
++ ret = device_add(&iio_sysfs_trig_dev);
++ if (ret)
++ put_device(&iio_sysfs_trig_dev);
++ return ret;
+ }
+ module_init(iio_sysfs_trig_init);
+
+diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
+index 33083877cd19d..4353b749ecef2 100644
+--- a/drivers/iio/trigger/stm32-timer-trigger.c
++++ b/drivers/iio/trigger/stm32-timer-trigger.c
+@@ -912,6 +912,6 @@ static struct platform_driver stm32_timer_trigger_driver = {
+ };
+ module_platform_driver(stm32_timer_trigger_driver);
+
+-MODULE_ALIAS("platform: stm32-timer-trigger");
++MODULE_ALIAS("platform:stm32-timer-trigger");
+ MODULE_DESCRIPTION("STMicroelectronics STM32 Timer Trigger driver");
+ MODULE_LICENSE("GPL v2");
+diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
+index c903b74f46a46..680c3ac8cd4c0 100644
+--- a/drivers/infiniband/core/cm.c
++++ b/drivers/infiniband/core/cm.c
+@@ -1252,8 +1252,10 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
+ return ERR_CAST(cm_id_priv);
+
+ err = cm_init_listen(cm_id_priv, service_id, 0);
+- if (err)
++ if (err) {
++ ib_destroy_cm_id(&cm_id_priv->id);
+ return ERR_PTR(err);
++ }
+
+ spin_lock_irq(&cm_id_priv->lock);
+ listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
+@@ -1630,14 +1632,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
+
+ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
+ struct sa_path_rec *primary_path,
+- struct sa_path_rec *alt_path)
++ struct sa_path_rec *alt_path,
++ struct ib_wc *wc)
+ {
+ u32 lid;
+
+ if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
+- sa_path_set_dlid(primary_path,
+- IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
+- req_msg));
++ sa_path_set_dlid(primary_path, wc->slid);
+ sa_path_set_slid(primary_path,
+ IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+ req_msg));
+@@ -1674,7 +1675,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
+
+ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
+ struct sa_path_rec *primary_path,
+- struct sa_path_rec *alt_path)
++ struct sa_path_rec *alt_path,
++ struct ib_wc *wc)
+ {
+ primary_path->dgid =
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
+@@ -1732,7 +1734,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
+ if (sa_path_is_roce(alt_path))
+ alt_path->roce.route_resolved = false;
+ }
+- cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
++ cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc);
+ }
+
+ static u16 cm_get_bth_pkey(struct cm_work *work)
+@@ -2146,7 +2148,7 @@ static int cm_req_handler(struct cm_work *work)
+ if (cm_req_has_alt_path(req_msg))
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+- &work->path[1]);
++ &work->path[1], work->mad_recv_wc->wc);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+@@ -2824,6 +2826,7 @@ static int cm_dreq_handler(struct cm_work *work)
+ switch (cm_id_priv->id.state) {
+ case IB_CM_REP_SENT:
+ case IB_CM_DREQ_SENT:
++ case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
+ break;
+ case IB_CM_ESTABLISHED:
+@@ -2831,8 +2834,6 @@ static int cm_dreq_handler(struct cm_work *work)
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->msg);
+ break;
+- case IB_CM_MRA_REP_RCVD:
+- break;
+ case IB_CM_TIMEWAIT:
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
+@@ -2913,6 +2914,8 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
+ return -EINVAL;
+
++ trace_icm_send_rej(&cm_id_priv->id, reason);
++
+ switch (state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+@@ -2943,7 +2946,6 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ return -EINVAL;
+ }
+
+- trace_icm_send_rej(&cm_id_priv->id, reason);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ cm_free_msg(msg);
+@@ -3322,7 +3324,7 @@ static int cm_lap_handler(struct cm_work *work)
+ ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
+ if (ret) {
+ rdma_destroy_ah_attr(&ah_attr);
+- return -EINVAL;
++ goto deref;
+ }
+
+ spin_lock_irq(&cm_id_priv->lock);
+diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
+index 704ce595542c5..044f9d44001bb 100644
+--- a/drivers/infiniband/core/cma.c
++++ b/drivers/infiniband/core/cma.c
+@@ -67,8 +67,8 @@ static const char * const cma_events[] = {
+ [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
+ };
+
+-static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
+- union ib_gid *mgid);
++static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
++ enum ib_gid_type gid_type);
+
+ const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
+ {
+@@ -496,22 +496,11 @@ static inline unsigned short cma_family(struct rdma_id_private *id_priv)
+ return id_priv->id.route.addr.src_addr.ss_family;
+ }
+
+-static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
++static int cma_set_default_qkey(struct rdma_id_private *id_priv)
+ {
+ struct ib_sa_mcmember_rec rec;
+ int ret = 0;
+
+- if (id_priv->qkey) {
+- if (qkey && id_priv->qkey != qkey)
+- return -EINVAL;
+- return 0;
+- }
+-
+- if (qkey) {
+- id_priv->qkey = qkey;
+- return 0;
+- }
+-
+ switch (id_priv->id.ps) {
+ case RDMA_PS_UDP:
+ case RDMA_PS_IB:
+@@ -531,6 +520,16 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+ return ret;
+ }
+
++static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
++{
++ if (!qkey ||
++ (id_priv->qkey && (id_priv->qkey != qkey)))
++ return -EINVAL;
++
++ id_priv->qkey = qkey;
++ return 0;
++}
++
+ static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
+ {
+ dev_addr->dev_type = ARPHRD_INFINIBAND;
+@@ -766,6 +765,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
+ unsigned int p;
+ u16 pkey, index;
+ enum ib_port_state port_state;
++ int ret;
+ int i;
+
+ cma_dev = NULL;
+@@ -784,9 +784,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
+
+ if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
+ continue;
+- for (i = 0; !rdma_query_gid(cur_dev->device,
+- p, i, &gid);
+- i++) {
++
++ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
++ ++i) {
++ ret = rdma_query_gid(cur_dev->device, p, i,
++ &gid);
++ if (ret)
++ continue;
++
+ if (!memcmp(&gid, dgid, sizeof(gid))) {
+ cma_dev = cur_dev;
+ sgid = gid;
+@@ -1093,7 +1098,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
+ *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+ if (id_priv->id.qp_type == IB_QPT_UD) {
+- ret = cma_set_qkey(id_priv, 0);
++ ret = cma_set_default_qkey(id_priv);
+ if (ret)
+ return ret;
+
+@@ -1427,7 +1432,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
+ return false;
+
+ memset(&fl4, 0, sizeof(fl4));
+- fl4.flowi4_iif = net_dev->ifindex;
++ fl4.flowi4_oif = net_dev->ifindex;
+ fl4.daddr = daddr;
+ fl4.saddr = saddr;
+
+@@ -1712,8 +1717,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
+ }
+
+ if (!validate_net_dev(*net_dev,
+- (struct sockaddr *)&req->listen_addr_storage,
+- (struct sockaddr *)&req->src_addr_storage)) {
++ (struct sockaddr *)&req->src_addr_storage,
++ (struct sockaddr *)&req->listen_addr_storage)) {
+ id_priv = ERR_PTR(-EHOSTUNREACH);
+ goto err;
+ }
+@@ -1838,17 +1843,19 @@ static void destroy_mc(struct rdma_id_private *id_priv,
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net,
+ dev_addr->bound_dev_if);
+- if (ndev) {
++ if (ndev && !send_only) {
++ enum ib_gid_type gid_type;
+ union ib_gid mgid;
+
+- cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
+- &mgid);
+-
+- if (!send_only)
+- cma_igmp_send(ndev, &mgid, false);
+-
+- dev_put(ndev);
++ gid_type = id_priv->cma_dev->default_gid_type
++ [id_priv->id.port_num -
++ rdma_start_port(
++ id_priv->cma_dev->device)];
++ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
++ gid_type);
++ cma_igmp_send(ndev, &mgid, false);
+ }
++ dev_put(ndev);
+
+ cancel_work_sync(&mc->iboe_join.work);
+ }
+@@ -2632,7 +2639,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
+ {
+ struct rdma_id_private *id_priv;
+
+- if (id->qp_type != IB_QPT_RC)
++ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
+ return -EINVAL;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+@@ -3106,7 +3113,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
+ route->path_rec->traffic_class = tos;
+ route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
+ route->path_rec->rate_selector = IB_SA_EQ;
+- route->path_rec->rate = iboe_get_rate(ndev);
++ route->path_rec->rate = IB_RATE_PORT_CURRENT;
+ dev_put(ndev);
+ route->path_rec->packet_life_time_selector = IB_SA_EQ;
+ /* In case ACK timeout is set, use this value to calculate
+@@ -3360,22 +3367,30 @@ err:
+ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+ {
+- if (!src_addr || !src_addr->sa_family) {
+- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+- src_addr->sa_family = dst_addr->sa_family;
+- if (IS_ENABLED(CONFIG_IPV6) &&
+- dst_addr->sa_family == AF_INET6) {
+- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
+- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
+- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
+- } else if (dst_addr->sa_family == AF_IB) {
+- ((struct sockaddr_ib *) src_addr)->sib_pkey =
+- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
+- }
++ struct sockaddr_storage zero_sock = {};
++
++ if (src_addr && src_addr->sa_family)
++ return rdma_bind_addr(id, src_addr);
++
++ /*
++ * When the src_addr is not specified, automatically supply an any addr
++ */
++ zero_sock.ss_family = dst_addr->sa_family;
++ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
++ struct sockaddr_in6 *src_addr6 =
++ (struct sockaddr_in6 *)&zero_sock;
++ struct sockaddr_in6 *dst_addr6 =
++ (struct sockaddr_in6 *)dst_addr;
++
++ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
++ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
++ id->route.addr.dev_addr.bound_dev_if =
++ dst_addr6->sin6_scope_id;
++ } else if (dst_addr->sa_family == AF_IB) {
++ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
++ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+ }
+- return rdma_bind_addr(id, src_addr);
++ return rdma_bind_addr(id, (struct sockaddr *)&zero_sock);
+ }
+
+ /*
+@@ -4031,8 +4046,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
+
+ memset(&req, 0, sizeof req);
+ offset = cma_user_data_offset(id_priv);
+- req.private_data_len = offset + conn_param->private_data_len;
+- if (req.private_data_len < conn_param->private_data_len)
++ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
+ return -EINVAL;
+
+ if (req.private_data_len) {
+@@ -4091,8 +4105,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
+
+ memset(&req, 0, sizeof req);
+ offset = cma_user_data_offset(id_priv);
+- req.private_data_len = offset + conn_param->private_data_len;
+- if (req.private_data_len < conn_param->private_data_len)
++ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
+ return -EINVAL;
+
+ if (req.private_data_len) {
+@@ -4359,7 +4372,10 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
+ memset(&rep, 0, sizeof rep);
+ rep.status = status;
+ if (status == IB_SIDR_SUCCESS) {
+- ret = cma_set_qkey(id_priv, qkey);
++ if (qkey)
++ ret = cma_set_qkey(id_priv, qkey);
++ else
++ ret = cma_set_default_qkey(id_priv);
+ if (ret)
+ return ret;
+ rep.qp_num = id_priv->qp_num;
+@@ -4564,9 +4580,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ enum ib_gid_type gid_type;
+ struct net_device *ndev;
+
+- if (!status)
+- status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+- else
++ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
+
+@@ -4594,7 +4608,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ }
+
+ event->param.ud.qp_num = 0xFFFFFF;
+- event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
++ event->param.ud.qkey = id_priv->qkey;
+
+ out:
+ if (ndev)
+@@ -4613,8 +4627,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
+ goto out;
+
+- cma_make_mc_event(status, id_priv, multicast, &event, mc);
+- ret = cma_cm_event_handler(id_priv, &event);
++ ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
++ if (!ret) {
++ cma_make_mc_event(status, id_priv, multicast, &event, mc);
++ ret = cma_cm_event_handler(id_priv, &event);
++ }
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
+ WARN_ON(ret);
+
+@@ -4667,9 +4684,11 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+ if (ret)
+ return ret;
+
+- ret = cma_set_qkey(id_priv, 0);
+- if (ret)
+- return ret;
++ if (!id_priv->qkey) {
++ ret = cma_set_default_qkey(id_priv);
++ if (ret)
++ return ret;
++ }
+
+ cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
+ rec.qkey = cpu_to_be32(id_priv->qkey);
+@@ -4746,15 +4765,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
+
+ ib.rec.pkey = cpu_to_be16(0xffff);
+- if (id_priv->id.ps == RDMA_PS_UDP)
+- ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+-
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ if (!ndev)
+ return -ENODEV;
+
+- ib.rec.rate = iboe_get_rate(ndev);
++ ib.rec.rate = IB_RATE_PORT_CURRENT;
+ ib.rec.hop_limit = 1;
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
+
+@@ -4774,6 +4790,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
+ if (err || !ib.rec.mtu)
+ return err ?: -EINVAL;
+
++ if (!id_priv->qkey)
++ cma_set_default_qkey(id_priv);
++
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &ib.rec.port_gid);
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
+@@ -4799,6 +4818,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
+ return -EINVAL;
+
++ if (id_priv->id.qp_type != IB_QPT_UD)
++ return -EINVAL;
++
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
+index f4814bb7f082f..ab2106a09f9c6 100644
+--- a/drivers/infiniband/core/device.c
++++ b/drivers/infiniband/core/device.c
+@@ -2461,7 +2461,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+ ++i) {
+ ret = rdma_query_gid(device, port, i, &tmp_gid);
+ if (ret)
+- return ret;
++ continue;
++
+ if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
+ *port_num = port;
+ if (index)
+@@ -2813,10 +2814,18 @@ static int __init ib_core_init(void)
+
+ nldev_init();
+ rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
+- roce_gid_mgmt_init();
++ ret = roce_gid_mgmt_init();
++ if (ret) {
++ pr_warn("Couldn't init RoCE GID management\n");
++ goto err_parent;
++ }
+
+ return 0;
+
++err_parent:
++ rdma_nl_unregister(RDMA_NL_LS);
++ nldev_exit();
++ unregister_pernet_device(&rdma_dev_net_ops);
+ err_compat:
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
+ err_sa:
+@@ -2839,8 +2848,8 @@ err:
+ static void __exit ib_core_cleanup(void)
+ {
+ roce_gid_mgmt_cleanup();
+- nldev_exit();
+ rdma_nl_unregister(RDMA_NL_LS);
++ nldev_exit();
+ unregister_pernet_device(&rdma_dev_net_ops);
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
+ ib_sa_cleanup();
+diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
+index 1893aa613ad73..674344eb8e2f4 100644
+--- a/drivers/infiniband/core/mad.c
++++ b/drivers/infiniband/core/mad.c
+@@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_qp_info *qp_info,
+ struct trace_event_raw_ib_mad_send_template *entry)
+ {
+- u16 pkey;
+- struct ib_device *dev = qp_info->port_priv->device;
+- u32 pnum = qp_info->port_priv->port_num;
+ struct ib_ud_wr *wr = &mad_send_wr->send_wr;
+ struct rdma_ah_attr attr = {};
+
+@@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
+
+ /* These are common */
+ entry->sl = attr.sl;
+- ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
+- entry->pkey = pkey;
+ entry->rqpn = wr->remote_qpn;
+ entry->rqkey = wr->remote_qkey;
+ entry->dlid = rdma_ah_get_dlid(&attr);
+diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
+index e9b4b2cccaa0f..7ad3ba7d5a0a1 100644
+--- a/drivers/infiniband/core/nldev.c
++++ b/drivers/infiniband/core/nldev.c
+@@ -511,7 +511,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
+
+ /* In create_qp() port is not set yet */
+ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
+- return -EINVAL;
++ return -EMSGSIZE;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
+ if (ret)
+@@ -550,7 +550,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_cm_id *cm_id = &id_priv->id;
+
+ if (port && port != cm_id->port_num)
+- return 0;
++ return -EAGAIN;
+
+ if (cm_id->port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+@@ -892,6 +892,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
+ int ret = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
++ if (!table_attr)
++ return -EMSGSIZE;
+
+ rt = &counter->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+@@ -2349,7 +2351,7 @@ void __init nldev_init(void)
+ rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
+ }
+
+-void __exit nldev_exit(void)
++void nldev_exit(void)
+ {
+ rdma_nl_unregister(RDMA_NL_NLDEV);
+ }
+diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
+index 1f935d9f61785..01a499a8b88db 100644
+--- a/drivers/infiniband/core/restrack.c
++++ b/drivers/infiniband/core/restrack.c
+@@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
+ rt = &dev->res[res->type];
+
+ old = xa_erase(&rt->xa, res->id);
+- if (res->type == RDMA_RESTRACK_MR)
+- return;
+ WARN_ON(old != res);
+
+ out:
+diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
+index 6146c3c1cbe5c..253ccaf343f69 100644
+--- a/drivers/infiniband/core/sysfs.c
++++ b/drivers/infiniband/core/sysfs.c
+@@ -757,7 +757,7 @@ static void ib_port_release(struct kobject *kobj)
+ if (port->hw_stats_data)
+ kfree(port->hw_stats_data->stats);
+ kfree(port->hw_stats_data);
+- kfree(port);
++ kvfree(port);
+ }
+
+ static void ib_port_gid_attr_release(struct kobject *kobj)
+@@ -1189,7 +1189,7 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
+ struct ib_port *p;
+ int ret;
+
+- p = kzalloc(struct_size(p, attrs_list,
++ p = kvzalloc(struct_size(p, attrs_list,
+ attr->gid_tbl_len + attr->pkey_tbl_len),
+ GFP_KERNEL);
+ if (!p)
+@@ -1198,6 +1198,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
+ p->port_num = port_num;
+ kobject_init(&p->kobj, &port_type);
+
++ if (device->port_data && is_full_dev)
++ device->port_data[port_num].sysfs = p;
++
+ cur_group = p->groups_list;
+ ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
+ attr->gid_tbl_len, show_port_gid);
+@@ -1243,9 +1246,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
+ }
+
+ list_add_tail(&p->kobj.entry, &coredev->port_list);
+- if (device->port_data && is_full_dev)
+- device->port_data[port_num].sysfs = p;
+-
+ return p;
+
+ err_groups:
+@@ -1253,6 +1253,8 @@ err_groups:
+ err_del:
+ kobject_del(&p->kobj);
+ err_put:
++ if (device->port_data && is_full_dev)
++ device->port_data[port_num].sysfs = NULL;
+ kobject_put(&p->kobj);
+ return ERR_PTR(ret);
+ }
+@@ -1261,14 +1263,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
+ {
+ bool is_full_dev = &port->ibdev->coredev == coredev;
+
+- if (port->ibdev->port_data &&
+- port->ibdev->port_data[port->port_num].sysfs == port)
+- port->ibdev->port_data[port->port_num].sysfs = NULL;
+ list_del(&port->kobj.entry);
+ if (is_full_dev)
+ sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
++
+ sysfs_remove_groups(&port->kobj, port->groups_list);
+ kobject_del(&port->kobj);
++
++ if (port->ibdev->port_data &&
++ port->ibdev->port_data[port->port_num].sysfs == port)
++ port->ibdev->port_data[port->port_num].sysfs = NULL;
++
+ kobject_put(&port->kobj);
+ }
+
+diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
+index 2b72c4fa95506..9d6ac9dff39a2 100644
+--- a/drivers/infiniband/core/ucma.c
++++ b/drivers/infiniband/core/ucma.c
+@@ -95,6 +95,7 @@ struct ucma_context {
+ u64 uid;
+
+ struct list_head list;
++ struct list_head mc_list;
+ struct work_struct close_work;
+ };
+
+@@ -105,6 +106,7 @@ struct ucma_multicast {
+
+ u64 uid;
+ u8 join_state;
++ struct list_head list;
+ struct sockaddr_storage addr;
+ };
+
+@@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
+
+ INIT_WORK(&ctx->close_work, ucma_close_id);
+ init_completion(&ctx->comp);
++ INIT_LIST_HEAD(&ctx->mc_list);
+ /* So list_del() will work if we don't do ucma_finish_ctx() */
+ INIT_LIST_HEAD(&ctx->list);
+ ctx->file = file;
+@@ -484,19 +487,19 @@ err1:
+
+ static void ucma_cleanup_multicast(struct ucma_context *ctx)
+ {
+- struct ucma_multicast *mc;
+- unsigned long index;
++ struct ucma_multicast *mc, *tmp;
+
+- xa_for_each(&multicast_table, index, mc) {
+- if (mc->ctx != ctx)
+- continue;
++ xa_lock(&multicast_table);
++ list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
++ list_del(&mc->list);
+ /*
+ * At this point mc->ctx->ref is 0 so the mc cannot leave the
+ * lock on the reader and this is enough serialization
+ */
+- xa_erase(&multicast_table, index);
++ __xa_erase(&multicast_table, mc->id);
+ kfree(mc);
+ }
++ xa_unlock(&multicast_table);
+ }
+
+ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
+@@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file,
+ mc->uid = cmd->uid;
+ memcpy(&mc->addr, addr, cmd->addr_size);
+
+- if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
++ xa_lock(&multicast_table);
++ if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_free_mc;
+ }
+
++ list_add_tail(&mc->list, &ctx->mc_list);
++ xa_unlock(&multicast_table);
++
+ mutex_lock(&ctx->mutex);
+ ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
+ join_state, mc);
+@@ -1500,8 +1507,11 @@ err_leave_multicast:
+ mutex_unlock(&ctx->mutex);
+ ucma_cleanup_mc_events(mc);
+ err_xa_erase:
+- xa_erase(&multicast_table, mc->id);
++ xa_lock(&multicast_table);
++ list_del(&mc->list);
++ __xa_erase(&multicast_table, mc->id);
+ err_free_mc:
++ xa_unlock(&multicast_table);
+ kfree(mc);
+ err_put_ctx:
+ ucma_put_ctx(ctx);
+@@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
+ mc = ERR_PTR(-EINVAL);
+ else if (!refcount_inc_not_zero(&mc->ctx->ref))
+ mc = ERR_PTR(-ENXIO);
+- else
+- __xa_erase(&multicast_table, mc->id);
+- xa_unlock(&multicast_table);
+
+ if (IS_ERR(mc)) {
++ xa_unlock(&multicast_table);
+ ret = PTR_ERR(mc);
+ goto out;
+ }
+
++ list_del(&mc->list);
++ __xa_erase(&multicast_table, mc->id);
++ xa_unlock(&multicast_table);
++
+ mutex_lock(&mc->ctx->mutex);
+ rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_unlock(&mc->ctx->mutex);
+diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
+index 86d479772fbc6..957634eceba8f 100644
+--- a/drivers/infiniband/core/umem.c
++++ b/drivers/infiniband/core/umem.c
+@@ -85,6 +85,8 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ dma_addr_t mask;
+ int i;
+
++ umem->iova = va = virt;
++
+ if (umem->is_odp) {
+ unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
+
+@@ -100,7 +102,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ */
+ pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
+
+- umem->iova = va = virt;
+ /* The best result is the smallest page size that results in the minimum
+ * number of required pages. Compute the largest page size that could
+ * work based on VA address bits that don't change.
+diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
+index 7a47343d11f9f..b052de1b9ccb9 100644
+--- a/drivers/infiniband/core/umem_odp.c
++++ b/drivers/infiniband/core/umem_odp.c
+@@ -463,7 +463,7 @@ retry:
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ out_put_mm:
+- mmput(owning_mm);
++ mmput_async(owning_mm);
+ out_put_task:
+ if (owning_process)
+ put_task_struct(owning_process);
+diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
+index 98cb594cd9a69..a61c9ede43387 100644
+--- a/drivers/infiniband/core/user_mad.c
++++ b/drivers/infiniband/core/user_mad.c
+@@ -131,6 +131,11 @@ struct ib_umad_packet {
+ struct ib_user_mad mad;
+ };
+
++struct ib_rmpp_mad_hdr {
++ struct ib_mad_hdr mad_hdr;
++ struct ib_rmpp_hdr rmpp_hdr;
++} __packed;
++
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/ib_umad.h>
+
+@@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+ {
+ struct ib_umad_file *file = filp->private_data;
++ struct ib_rmpp_mad_hdr *rmpp_mad_hdr;
+ struct ib_umad_packet *packet;
+ struct ib_mad_agent *agent;
+ struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
+- struct ib_rmpp_mad *rmpp_mad;
+ __be64 *tid;
+ int ret, data_len, hdr_len, copy_offset, rmpp_active;
+ u8 base_version;
+@@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
+ return -EINVAL;
+
+- packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
++ packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+ if (!packet)
+ return -ENOMEM;
+
+@@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ goto err_up;
+ }
+
+- rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
+- hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
++ rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data;
++ hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class);
+
+- if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
++ if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
+ && ib_mad_kernel_rmpp_agent(agent)) {
+ copy_offset = IB_MGMT_RMPP_HDR;
+- rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
++ rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE;
+ } else {
+ copy_offset = IB_MGMT_MAD_HDR;
+@@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
+ *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
+ (be64_to_cpup(tid) & 0xffffffff));
+- rmpp_mad->mad_hdr.tid = *tid;
++ rmpp_mad_hdr->mad_hdr.tid = *tid;
+ }
+
+ if (!ib_mad_kernel_rmpp_agent(agent)
+- && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+- && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
++ && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
++ && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ spin_lock_irq(&file->send_lock);
+ list_add_tail(&packet->list, &file->send_list);
+ spin_unlock_irq(&file->send_lock);
+diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
+index 740e6b2efe0e7..e9a5fa4daa3e7 100644
+--- a/drivers/infiniband/core/uverbs_cmd.c
++++ b/drivers/infiniband/core/uverbs_cmd.c
+@@ -739,6 +739,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ mr->iova = cmd.hca_va;
++ mr->length = cmd.length;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+@@ -837,11 +838,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
+ new_mr->device = new_pd->device;
+ new_mr->pd = new_pd;
+ new_mr->type = IB_MR_TYPE_USER;
+- new_mr->dm = NULL;
+- new_mr->sig_attrs = NULL;
+ new_mr->uobject = uobj;
+ atomic_inc(&new_pd->usecnt);
+- new_mr->iova = cmd.hca_va;
+ new_uobj->object = new_mr;
+
+ rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
+@@ -864,8 +862,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
+ mr->pd = new_pd;
+ atomic_inc(&new_pd->usecnt);
+ }
+- if (cmd.flags & IB_MR_REREG_TRANS)
++ if (cmd.flags & IB_MR_REREG_TRANS) {
+ mr->iova = cmd.hca_va;
++ mr->length = cmd.length;
++ }
+ }
+
+ memset(&resp, 0, sizeof(resp));
+@@ -1851,8 +1851,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
+ attr->path_mtu = cmd->base.path_mtu;
+ if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
+ attr->path_mig_state = cmd->base.path_mig_state;
+- if (cmd->base.attr_mask & IB_QP_QKEY)
++ if (cmd->base.attr_mask & IB_QP_QKEY) {
++ if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) {
++ ret = -EPERM;
++ goto release_qp;
++ }
+ attr->qkey = cmd->base.qkey;
++ }
+ if (cmd->base.attr_mask & IB_QP_RQ_PSN)
+ attr->rq_psn = cmd->base.rq_psn;
+ if (cmd->base.attr_mask & IB_QP_SQ_PSN)
+diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
+index d544340887277..fa937cd268219 100644
+--- a/drivers/infiniband/core/uverbs_main.c
++++ b/drivers/infiniband/core/uverbs_main.c
+@@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ spin_lock_irq(&ev_queue->lock);
+
+ while (list_empty(&ev_queue->event_list)) {
+- spin_unlock_irq(&ev_queue->lock);
++ if (ev_queue->is_closed) {
++ spin_unlock_irq(&ev_queue->lock);
++ return -EIO;
++ }
+
++ spin_unlock_irq(&ev_queue->lock);
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+@@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ return -ERESTARTSYS;
+
+ spin_lock_irq(&ev_queue->lock);
+-
+- /* If device was disassociated and no event exists set an error */
+- if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
+- spin_unlock_irq(&ev_queue->lock);
+- return -EIO;
+- }
+ }
+
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
+diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
+index b8d715c68ca44..11a0806469162 100644
+--- a/drivers/infiniband/core/uverbs_marshall.c
++++ b/drivers/infiniband/core/uverbs_marshall.c
+@@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device,
+ struct rdma_ah_attr *src = ah_attr;
+ struct rdma_ah_attr conv_ah;
+
+- memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
++ memset(&dst->grh, 0, sizeof(dst->grh));
+
+ if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
+ (rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) &&
+diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
+index 999da9c798668..381aa57976417 100644
+--- a/drivers/infiniband/core/uverbs_std_types_counters.c
++++ b/drivers/infiniband/core/uverbs_std_types_counters.c
+@@ -107,6 +107,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
+ return ret;
+
+ uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
++ if (IS_ERR(uattr))
++ return PTR_ERR(uattr);
+ read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
+ read_attr.counters_buff = uverbs_zalloc(
+ attrs, array_size(read_attr.ncounters, sizeof(u64)));
+diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
+index 2f2c7646fce17..a02916a3a79ce 100644
+--- a/drivers/infiniband/core/uverbs_uapi.c
++++ b/drivers/infiniband/core/uverbs_uapi.c
+@@ -447,6 +447,9 @@ static int uapi_finalize(struct uverbs_api *uapi)
+ uapi->num_write_ex = max_write_ex + 1;
+ data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
+ sizeof(*uapi->write_methods), GFP_KERNEL);
++ if (!data)
++ return -ENOMEM;
++
+ for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
+ data[i] = &uapi->notsupp_method;
+ uapi->write_methods = data;
+diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
+index 89a2b21976d63..cae013130eb1d 100644
+--- a/drivers/infiniband/core/verbs.c
++++ b/drivers/infiniband/core/verbs.c
+@@ -540,6 +540,8 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
+ else
+ ret = device->ops.create_ah(ah, &init_attr, NULL);
+ if (ret) {
++ if (ah->sgid_attr)
++ rdma_put_gid_attr(ah->sgid_attr);
+ kfree(ah);
+ return ERR_PTR(ret);
+ }
+@@ -1232,6 +1234,9 @@ static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd,
+ INIT_LIST_HEAD(&qp->rdma_mrs);
+ INIT_LIST_HEAD(&qp->sig_mrs);
+
++ qp->send_cq = attr->send_cq;
++ qp->recv_cq = attr->recv_cq;
++
+ rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
+ WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
+ rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
+@@ -2150,9 +2155,12 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ return mr;
+
+ mr->device = pd->device;
++ mr->type = IB_MR_TYPE_USER;
+ mr->pd = pd;
+ mr->dm = NULL;
+ atomic_inc(&pd->usecnt);
++ mr->iova = virt_addr;
++ mr->length = length;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+@@ -2959,15 +2967,18 @@ EXPORT_SYMBOL(__rdma_block_iter_start);
+ bool __rdma_block_iter_next(struct ib_block_iter *biter)
+ {
+ unsigned int block_offset;
++ unsigned int sg_delta;
+
+ if (!biter->__sg_nents || !biter->__sg)
+ return false;
+
+ biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
+ block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
+- biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset;
++ sg_delta = BIT_ULL(biter->__pg_bit) - block_offset;
+
+- if (biter->__sg_advance >= sg_dma_len(biter->__sg)) {
++ if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) {
++ biter->__sg_advance += sg_delta;
++ } else {
+ biter->__sg_advance = 0;
+ biter->__sg = sg_next(biter->__sg);
+ biter->__sg_nents--;
+diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+index 408dfbcc47b5e..87ee616e69384 100644
+--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
++++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+@@ -792,7 +792,10 @@ fail:
+ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
+ {
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
++ struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
+ struct bnxt_re_dev *rdev = qp->rdev;
++ struct bnxt_qplib_nq *scq_nq = NULL;
++ struct bnxt_qplib_nq *rcq_nq = NULL;
+ unsigned int flags;
+ int rc;
+
+@@ -826,6 +829,15 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
+
++ /* Flush all the entries of notification queue associated with
++ * given qp.
++ */
++ scq_nq = qplib_qp->scq->nq;
++ rcq_nq = qplib_qp->rcq->nq;
++ bnxt_re_synchronize_nq(scq_nq);
++ if (scq_nq != rcq_nq)
++ bnxt_re_synchronize_nq(rcq_nq);
++
+ return 0;
+ }
+
+@@ -3235,9 +3247,7 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
+ udwr.remote_qkey = gsi_sqp->qplib_qp.qkey;
+
+ /* post data received in the send queue */
+- rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
+-
+- return 0;
++ return bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
+ }
+
+ static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
+@@ -3354,8 +3364,11 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
+ struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+ {
++ struct bnxt_re_dev *rdev;
++ u16 vlan_id = 0;
+ u8 nw_type;
+
++ rdev = qp->rdev;
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rc_to_ib_wc_status(cqe->status);
+
+@@ -3367,9 +3380,12 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->wc_flags |= IB_WC_WITH_SMAC;
+ if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) {
+- wc->vlan_id = (cqe->cfa_meta & 0xFFF);
+- if (wc->vlan_id < 0x1000)
+- wc->wc_flags |= IB_WC_WITH_VLAN;
++ vlan_id = (cqe->cfa_meta & 0xFFF);
++ }
++ /* Mark only if vlan_id is non zero */
++ if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) {
++ wc->vlan_id = vlan_id;
++ wc->wc_flags |= IB_WC_WITH_VLAN;
+ }
+ nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >>
+ CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT;
+diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
+index 66268e41b470e..7b85eef113fc0 100644
+--- a/drivers/infiniband/hw/bnxt_re/main.c
++++ b/drivers/infiniband/hw/bnxt_re/main.c
+@@ -331,15 +331,21 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
+ for (indx = 0; indx < rdev->num_msix; indx++)
+ rdev->msix_entries[indx].vector = ent[indx].vector;
+
+- bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
+- false);
++ rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
++ false);
++ if (rc) {
++ ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
++ return;
++ }
+ for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
+ nq = &rdev->nq[indx - 1];
+ rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
+ msix_ent[indx].vector, false);
+- if (rc)
++ if (rc) {
+ ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
+ indx - 1);
++ return;
++ }
+ }
+ }
+
+@@ -1173,12 +1179,6 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
+ if (!ib_device_try_get(&rdev->ibdev))
+ return 0;
+
+- if (!sgid_tbl) {
+- ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated");
+- rc = -EINVAL;
+- goto out;
+- }
+-
+ for (index = 0; index < sgid_tbl->active; index++) {
+ gid_idx = sgid_tbl->hw_id[index];
+
+@@ -1196,7 +1196,7 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
+ rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
+ rdev->qplib_res.netdev->dev_addr);
+ }
+-out:
++
+ ib_device_put(&rdev->ibdev);
+ return rc;
+ }
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+index d4d4959c2434c..f1aa3e19b6de6 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
++++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+@@ -386,6 +386,24 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
+ spin_unlock_bh(&hwq->lock);
+ }
+
++/* bnxt_re_synchronize_nq - self polling notification queue.
++ * @nq - notification queue pointer
++ *
++ * This function will start polling entries of a given notification queue
++ * for all pending entries.
++ * This function is useful to synchronize notification entries while resources
++ * are going away.
++ */
++
++void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq)
++{
++ int budget = nq->budget;
++
++ nq->budget = nq->hwq.max_elements;
++ bnxt_qplib_service_nq(&nq->nq_tasklet);
++ nq->budget = budget;
++}
++
+ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
+ {
+ struct bnxt_qplib_nq *nq = dev_instance;
+@@ -404,6 +422,9 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
+
+ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
+ {
++ if (!nq->requested)
++ return;
++
+ tasklet_disable(&nq->nq_tasklet);
+ /* Mask h/w interrupt */
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false);
+@@ -411,11 +432,12 @@ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
+ synchronize_irq(nq->msix_vec);
+ if (kill)
+ tasklet_kill(&nq->nq_tasklet);
+- if (nq->requested) {
+- irq_set_affinity_hint(nq->msix_vec, NULL);
+- free_irq(nq->msix_vec, nq);
+- nq->requested = false;
+- }
++
++ irq_set_affinity_hint(nq->msix_vec, NULL);
++ free_irq(nq->msix_vec, nq);
++ kfree(nq->name);
++ nq->name = NULL;
++ nq->requested = false;
+ }
+
+ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
+@@ -441,6 +463,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
+ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
+ int msix_vector, bool need_init)
+ {
++ struct bnxt_qplib_res *res = nq->res;
+ int rc;
+
+ if (nq->requested)
+@@ -452,10 +475,17 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
+ else
+ tasklet_enable(&nq->nq_tasklet);
+
+- snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx);
++ nq->name = kasprintf(GFP_KERNEL, "bnxt_re-nq-%d@pci:%s",
++ nq_indx, pci_name(res->pdev));
++ if (!nq->name)
++ return -ENOMEM;
+ rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq);
+- if (rc)
++ if (rc) {
++ kfree(nq->name);
++ nq->name = NULL;
++ tasklet_disable(&nq->nq_tasklet);
+ return rc;
++ }
+
+ cpumask_clear(&nq->mask);
+ cpumask_set_cpu(nq_indx, &nq->mask);
+@@ -466,7 +496,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
+ nq->msix_vec, nq_indx);
+ }
+ nq->requested = true;
+- bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
++ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, res->cctx, true);
+
+ return rc;
+ }
+@@ -707,12 +737,13 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
+ int rc = 0;
+
+ RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags);
+- req.srq_cid = cpu_to_le32(srq->id);
+
+ /* Configure the request */
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf)
+ return -ENOMEM;
++ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
++ req.srq_cid = cpu_to_le32(srq->id);
+ sb = sbuf->sb;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ (void *)sbuf, 0);
+@@ -1598,7 +1629,7 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
+ il_src = (void *)wqe->sg_list[indx].addr;
+ t_len += len;
+ if (t_len > qp->max_inline_data)
+- goto bad;
++ return -ENOMEM;
+ while (len) {
+ if (pull_dst) {
+ pull_dst = false;
+@@ -1622,8 +1653,6 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
+ }
+
+ return t_len;
+-bad:
+- return -ENOMEM;
+ }
+
+ static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq,
+@@ -2040,6 +2069,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+ u32 pg_sz_lvl;
+ int rc;
+
++ if (!cq->dpi) {
++ dev_err(&rcfw->pdev->dev,
++ "FP: CREATE_CQ failed due to NULL DPI\n");
++ return -EINVAL;
++ }
++
+ hwq_attr.res = res;
+ hwq_attr.depth = cq->max_wqe;
+ hwq_attr.stride = sizeof(struct cq_base);
+@@ -2047,15 +2082,10 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+ hwq_attr.sginfo = &cq->sg_info;
+ rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr);
+ if (rc)
+- goto exit;
++ return rc;
+
+ RCFW_CMD_PREP(req, CREATE_CQ, cmd_flags);
+
+- if (!cq->dpi) {
+- dev_err(&rcfw->pdev->dev,
+- "FP: CREATE_CQ failed due to NULL DPI\n");
+- return -EINVAL;
+- }
+ req.dpi = cpu_to_le32(cq->dpi->dpi);
+ req.cq_handle = cpu_to_le64(cq->cq_handle);
+ req.cq_size = cpu_to_le32(cq->hwq.max_elements);
+@@ -2093,7 +2123,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+
+ fail:
+ bnxt_qplib_free_hwq(res, &cq->hwq);
+-exit:
+ return rc;
+ }
+
+@@ -2721,11 +2750,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+- if (!qp) {
+- dev_err(&cq->hwq.pdev->dev,
+- "FP: CQ Process terminal qp is NULL\n");
++ if (!qp)
+ return -EINVAL;
+- }
+
+ /* Must block new posting of SQ and RQ */
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+index 0375019525431..49d89c0808275 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
++++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+@@ -471,7 +471,7 @@ typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_nq {
+ struct pci_dev *pdev;
+ struct bnxt_qplib_res *res;
+- char name[32];
++ char *name;
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_nq_db nq_db;
+ u16 ring_id;
+@@ -548,6 +548,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
+ struct bnxt_qplib_cqe *cqe,
+ int num_cqes);
+ void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp);
++void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq);
+
+ static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx)
+ {
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+index 5d384def5e5fe..3b8cb46551bf2 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
++++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+@@ -181,7 +181,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
+ } while (size > 0);
+ cmdq->seq_num++;
+
+- cmdq_prod = hwq->prod;
++ cmdq_prod = hwq->prod & 0xFFFF;
+ if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
+ /* The very first doorbell write
+ * is required to set this flag
+@@ -299,7 +299,8 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
+ }
+
+ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+- struct creq_qp_event *qp_event)
++ struct creq_qp_event *qp_event,
++ u32 *num_wait)
+ {
+ struct creq_qp_error_notification *err_event;
+ struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
+@@ -308,6 +309,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+ u16 cbit, blocked = 0;
+ struct pci_dev *pdev;
+ unsigned long flags;
++ u32 wait_cmds = 0;
+ __le16 mcookie;
+ u16 cookie;
+ int rc = 0;
+@@ -367,9 +369,10 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+ crsqe->req_size = 0;
+
+ if (!blocked)
+- wake_up(&rcfw->cmdq.waitq);
++ wait_cmds++;
+ spin_unlock_irqrestore(&hwq->lock, flags);
+ }
++ *num_wait += wait_cmds;
+ return rc;
+ }
+
+@@ -383,6 +386,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
+ struct creq_base *creqe;
+ u32 sw_cons, raw_cons;
+ unsigned long flags;
++ u32 num_wakeup = 0;
+
+ /* Service the CREQ until budget is over */
+ spin_lock_irqsave(&hwq->lock, flags);
+@@ -401,7 +405,8 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
+ switch (type) {
+ case CREQ_BASE_TYPE_QP_EVENT:
+ bnxt_qplib_process_qp_event
+- (rcfw, (struct creq_qp_event *)creqe);
++ (rcfw, (struct creq_qp_event *)creqe,
++ &num_wakeup);
+ creq->stats.creq_qp_event_processed++;
+ break;
+ case CREQ_BASE_TYPE_FUNC_EVENT:
+@@ -429,6 +434,8 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
+ rcfw->res->cctx, true);
+ }
+ spin_unlock_irqrestore(&hwq->lock, flags);
++ if (num_wakeup)
++ wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
+ }
+
+ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
+@@ -599,7 +606,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192;
+
+ sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth);
+- hwq_attr.depth = rcfw->cmdq_depth;
++ hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF;
+ hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
+@@ -618,8 +625,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
+ if (!cmdq->cmdq_bitmap)
+ goto fail;
+
+- cmdq->bmap_size = bmap_size;
+-
+ /* Allocate one extra to hold the QP1 entries */
+ rcfw->qp_tbl_size = qp_tbl_sz + 1;
+ rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
+@@ -639,6 +644,10 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
+ struct bnxt_qplib_creq_ctx *creq;
+
+ creq = &rcfw->creq;
++
++ if (!creq->requested)
++ return;
++
+ tasklet_disable(&creq->creq_tasklet);
+ /* Mask h/w interrupts */
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
+@@ -647,10 +656,10 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
+ if (kill)
+ tasklet_kill(&creq->creq_tasklet);
+
+- if (creq->requested) {
+- free_irq(creq->msix_vec, rcfw);
+- creq->requested = false;
+- }
++ free_irq(creq->msix_vec, rcfw);
++ kfree(creq->irq_name);
++ creq->irq_name = NULL;
++ creq->requested = false;
+ }
+
+ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+@@ -667,8 +676,8 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+ iounmap(cmdq->cmdq_mbox.reg.bar_reg);
+ iounmap(creq->creq_db.reg.bar_reg);
+
+- indx = find_first_bit(cmdq->cmdq_bitmap, cmdq->bmap_size);
+- if (indx != cmdq->bmap_size)
++ indx = find_first_bit(cmdq->cmdq_bitmap, rcfw->cmdq_depth);
++ if (indx != rcfw->cmdq_depth)
+ dev_err(&rcfw->pdev->dev,
+ "disabling RCFW with pending cmd-bit %lx\n", indx);
+
+@@ -682,9 +691,11 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
+ bool need_init)
+ {
+ struct bnxt_qplib_creq_ctx *creq;
++ struct bnxt_qplib_res *res;
+ int rc;
+
+ creq = &rcfw->creq;
++ res = rcfw->res;
+
+ if (creq->requested)
+ return -EFAULT;
+@@ -694,13 +705,22 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
+ tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq);
+ else
+ tasklet_enable(&creq->creq_tasklet);
++
++ creq->irq_name = kasprintf(GFP_KERNEL, "bnxt_re-creq@pci:%s",
++ pci_name(res->pdev));
++ if (!creq->irq_name)
++ return -ENOMEM;
+ rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
+- "bnxt_qplib_creq", rcfw);
+- if (rc)
++ creq->irq_name, rcfw);
++ if (rc) {
++ kfree(creq->irq_name);
++ creq->irq_name = NULL;
++ tasklet_disable(&creq->creq_tasklet);
+ return rc;
++ }
+ creq->requested = true;
+
+- bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true);
++ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true);
+
+ return 0;
+ }
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+index 9474c00465821..2acdec55a667e 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
++++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+@@ -152,7 +152,6 @@ struct bnxt_qplib_cmdq_ctx {
+ wait_queue_head_t waitq;
+ unsigned long flags;
+ unsigned long *cmdq_bitmap;
+- u32 bmap_size;
+ u32 seq_num;
+ };
+
+@@ -175,6 +174,7 @@ struct bnxt_qplib_creq_ctx {
+ u16 ring_id;
+ int msix_vec;
+ bool requested; /*irq handler installed */
++ char *irq_name;
+ };
+
+ /* RCFW Communication Channels */
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
+index 44282a8cdd4f2..384d41072c63c 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
++++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
+@@ -215,17 +215,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
+ return -EINVAL;
+ hwq_attr->sginfo->npages = npages;
+ } else {
+- unsigned long sginfo_num_pages = ib_umem_num_dma_blocks(
+- hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize);
+-
++ npages = ib_umem_num_dma_blocks(hwq_attr->sginfo->umem,
++ hwq_attr->sginfo->pgsize);
+ hwq->is_user = true;
+- npages = sginfo_num_pages;
+- npages = (npages * PAGE_SIZE) /
+- BIT_ULL(hwq_attr->sginfo->pgshft);
+- if ((sginfo_num_pages * PAGE_SIZE) %
+- BIT_ULL(hwq_attr->sginfo->pgshft))
+- if (!npages)
+- npages++;
+ }
+
+ if (npages == MAX_PBL_LVL_0_PGS) {
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+index 3d9259632eb3d..a161e0d3cb444 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
++++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+@@ -680,16 +680,15 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+ /* Free the hwq if it already exist, must be a rereg */
+ if (mr->hwq.max_elements)
+ bnxt_qplib_free_hwq(res, &mr->hwq);
+- /* Use system PAGE_SIZE */
+ hwq_attr.res = res;
+ hwq_attr.depth = pages;
+- hwq_attr.stride = buf_pg_size;
++ hwq_attr.stride = sizeof(dma_addr_t);
+ hwq_attr.type = HWQ_TYPE_MR;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.sginfo->umem = umem;
+ hwq_attr.sginfo->npages = pages;
+- hwq_attr.sginfo->pgsize = PAGE_SIZE;
+- hwq_attr.sginfo->pgshft = PAGE_SHIFT;
++ hwq_attr.sginfo->pgsize = buf_pg_size;
++ hwq_attr.sginfo->pgshft = ilog2(buf_pg_size);
+ rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr);
+ if (rc) {
+ dev_err(&res->pdev->dev,
+diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
+index 291471d12197f..a3e4913904b75 100644
+--- a/drivers/infiniband/hw/cxgb4/cm.c
++++ b/drivers/infiniband/hw/cxgb4/cm.c
+@@ -2682,6 +2682,9 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
+ u16 tcp_opt = ntohs(req->tcp_opt);
+
+ ep = get_ep_from_tid(dev, tid);
++ if (!ep)
++ return 0;
++
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
+ ep->snd_seq = be32_to_cpu(req->snd_isn);
+ ep->rcv_seq = be32_to_cpu(req->rcv_isn);
+@@ -4150,6 +4153,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
+
+ if (neigh->dev->flags & IFF_LOOPBACK) {
+ pdev = ip_dev_find(&init_net, iph->daddr);
++ if (!pdev) {
++ pr_err("%s - failed to find device!\n", __func__);
++ goto free_dst;
++ }
+ e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
+ pdev, 0);
+ pi = (struct port_info *)netdev_priv(pdev);
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index d20b4ef2c853d..ffbd9a89981e7 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -2460,6 +2460,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
+ attr->qp_state = to_ib_qp_state(qhp->attr.state);
++ attr->cur_qp_state = to_ib_qp_state(qhp->attr.state);
+ init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
+ init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
+ init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
+diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
+index ff645b955a082..fd22c85d35f4f 100644
+--- a/drivers/infiniband/hw/cxgb4/restrack.c
++++ b/drivers/infiniband/hw/cxgb4/restrack.c
+@@ -238,7 +238,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
+ if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history))
+ goto err_cancel_table;
+
+- if (epcp->state == LISTEN) {
++ if (listen_ep) {
+ if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog))
+diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
+index 417dea5f90cfe..d6d48db866814 100644
+--- a/drivers/infiniband/hw/efa/efa_main.c
++++ b/drivers/infiniband/hw/efa/efa_main.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+ /*
+- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
++ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+ #include <linux/module.h>
+@@ -14,10 +14,12 @@
+
+ #define PCI_DEV_ID_EFA0_VF 0xefa0
+ #define PCI_DEV_ID_EFA1_VF 0xefa1
++#define PCI_DEV_ID_EFA2_VF 0xefa2
+
+ static const struct pci_device_id efa_pci_tbl[] = {
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
++ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) },
+ { }
+ };
+
+diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
+index e5f9d90aad5ee..1aab6c3e9f539 100644
+--- a/drivers/infiniband/hw/efa/efa_verbs.c
++++ b/drivers/infiniband/hw/efa/efa_verbs.c
+@@ -1334,7 +1334,7 @@ static int pbl_continuous_initialize(struct efa_dev *dev,
+ */
+ static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
+ {
+- u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
++ u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE);
+ struct scatterlist *sgl;
+ int sg_dma_cnt, err;
+
+diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
+index 98c813ba4304c..4c403d9e90cb3 100644
+--- a/drivers/infiniband/hw/hfi1/affinity.c
++++ b/drivers/infiniband/hw/hfi1/affinity.c
+@@ -178,6 +178,8 @@ out:
+ for (node = 0; node < node_affinity.num_possible_nodes; node++)
+ hfi1_per_node_cntr[node] = 1;
+
++ pci_dev_put(dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
+index 37273dc0c03ca..b69dd618146ef 100644
+--- a/drivers/infiniband/hw/hfi1/chip.c
++++ b/drivers/infiniband/hw/hfi1/chip.c
+@@ -1055,7 +1055,7 @@ static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
+ static void handle_temp_err(struct hfi1_devdata *dd);
+ static void dc_shutdown(struct hfi1_devdata *dd);
+ static void dc_start(struct hfi1_devdata *dd);
+-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
++static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
+ unsigned int *np);
+ static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+ static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
+@@ -8414,6 +8414,8 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
+ */
+ static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+ {
++ if (!rcd->rcvhdrq)
++ return;
+ clear_recv_intr(rcd);
+ if (check_packet_present(rcd))
+ force_recv_intr(rcd);
+@@ -12304,6 +12306,7 @@ static void free_cntrs(struct hfi1_devdata *dd)
+
+ if (dd->synth_stats_timer.function)
+ del_timer_sync(&dd->synth_stats_timer);
++ cancel_work_sync(&dd->update_cntr_work);
+ ppd = (struct hfi1_pportdata *)(dd + 1);
+ for (i = 0; i < dd->num_pports; i++, ppd++) {
+ kfree(ppd->cntrs);
+@@ -13359,7 +13362,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
+ int ret;
+ unsigned ngroups;
+ int rmt_count;
+- int user_rmt_reduced;
+ u32 n_usr_ctxts;
+ u32 send_contexts = chip_send_contexts(dd);
+ u32 rcv_contexts = chip_rcv_contexts(dd);
+@@ -13418,28 +13420,34 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
+ (num_kernel_contexts + n_usr_ctxts),
+ &node_affinity.real_cpu_mask);
+ /*
+- * The RMT entries are currently allocated as shown below:
+- * 1. QOS (0 to 128 entries);
+- * 2. FECN (num_kernel_context - 1 + num_user_contexts +
+- * num_netdev_contexts);
+- * 3. netdev (num_netdev_contexts).
+- * It should be noted that FECN oversubscribe num_netdev_contexts
+- * entries of RMT because both netdev and PSM could allocate any receive
+- * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
+- * and PSM FECN must reserve an RMT entry for each possible PSM receive
+- * context.
++ * RMT entries are allocated as follows:
++ * 1. QOS (0 to 128 entries)
++ * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts +
++ * num_netdev_contexts [b])
++ * 3. netdev (NUM_NETDEV_MAP_ENTRIES)
++ *
++ * Notes:
++ * [a] Kernel contexts (except control) are included in FECN if kernel
++ * TID_RDMA is active.
++ * [b] Netdev and user contexts are randomly allocated from the same
++ * context pool, so FECN must cover all contexts in the pool.
+ */
+- rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
+- if (HFI1_CAP_IS_KSET(TID_RDMA))
+- rmt_count += num_kernel_contexts - 1;
+- if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
+- user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
+- dd_dev_err(dd,
+- "RMT size is reducing the number of user receive contexts from %u to %d\n",
+- n_usr_ctxts,
+- user_rmt_reduced);
+- /* recalculate */
+- n_usr_ctxts = user_rmt_reduced;
++ rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL)
++ + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1
++ : 0)
++ + n_usr_ctxts
++ + num_netdev_contexts
++ + NUM_NETDEV_MAP_ENTRIES;
++ if (rmt_count > NUM_MAP_ENTRIES) {
++ int over = rmt_count - NUM_MAP_ENTRIES;
++ /* try to squish user contexts, minimum of 1 */
++ if (over >= n_usr_ctxts) {
++ dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n");
++ return -EINVAL;
++ }
++ dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n",
++ n_usr_ctxts, n_usr_ctxts - over);
++ n_usr_ctxts -= over;
+ }
+
+ /* the first N are kernel contexts, the rest are user/netdev contexts */
+@@ -14296,15 +14304,15 @@ static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+ }
+
+ /* return the number of RSM map table entries that will be used for QOS */
+-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
++static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
+ unsigned int *np)
+ {
+ int i;
+ unsigned int m, n;
+- u8 max_by_vl = 0;
++ uint max_by_vl = 0;
+
+ /* is QOS active at all? */
+- if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
++ if (n_krcv_queues < MIN_KERNEL_KCTXTS ||
+ num_vls == 1 ||
+ krcvqsset <= 1)
+ goto no_qos;
+@@ -14362,7 +14370,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
+
+ if (!rmt)
+ goto bail;
+- rmt_entries = qos_rmt_entries(dd, &m, &n);
++ rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n);
+ if (rmt_entries == 0)
+ goto bail;
+ qpns_per_vl = 1 << m;
+diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
+index de411884386bf..385e6cff0d279 100644
+--- a/drivers/infiniband/hw/hfi1/driver.c
++++ b/drivers/infiniband/hw/hfi1/driver.c
+@@ -1011,6 +1011,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
+ struct hfi1_packet packet;
+ int skip_pkt = 0;
+
++ if (!rcd->rcvhdrq)
++ return RCV_PKT_OK;
+ /* Control context will always use the slow path interrupt handler */
+ needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
+
+diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
+index 1783a6ea5427b..1c1172aeb36e9 100644
+--- a/drivers/infiniband/hw/hfi1/file_ops.c
++++ b/drivers/infiniband/hw/hfi1/file_ops.c
+@@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
+ unsigned long dim = from->nr_segs;
+ int idx;
+
++ if (!HFI1_CAP_IS_KSET(SDMA))
++ return -EINVAL;
+ idx = srcu_read_lock(&fd->pq_srcu);
+ pq = srcu_dereference(fd->pq, &fd->pq_srcu);
+ if (!cq || !pq) {
+@@ -1177,8 +1179,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
+ goto done;
+
+ ret = init_user_ctxt(fd, uctxt);
+- if (ret)
++ if (ret) {
++ hfi1_free_ctxt_rcv_groups(uctxt);
+ goto done;
++ }
+
+ user_init(uctxt);
+
+@@ -1314,12 +1318,15 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg,
+ addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
+- return -EFAULT;
++ ret = -EFAULT;
+
+ addr = arg + offsetof(struct hfi1_tid_info, length);
+- if (copy_to_user((void __user *)addr, &tinfo.length,
++ if (!ret && copy_to_user((void __user *)addr, &tinfo.length,
+ sizeof(tinfo.length)))
+ ret = -EFAULT;
++
++ if (ret)
++ hfi1_user_exp_rcv_invalid(fd, &tinfo);
+ }
+
+ return ret;
+diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
+index 31e63e245ea92..ddf3217893f86 100644
+--- a/drivers/infiniband/hw/hfi1/firmware.c
++++ b/drivers/infiniband/hw/hfi1/firmware.c
+@@ -1744,6 +1744,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
+
+ if (!dd->platform_config.data) {
+ dd_dev_err(dd, "%s: Missing config file\n", __func__);
++ ret = -EINVAL;
+ goto bail;
+ }
+ ptr = (u32 *)dd->platform_config.data;
+@@ -1752,6 +1753,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
+ ptr++;
+ if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
+ dd_dev_err(dd, "%s: Bad config file\n", __func__);
++ ret = -EINVAL;
+ goto bail;
+ }
+
+@@ -1775,6 +1777,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
+ if (file_length > dd->platform_config.size) {
+ dd_dev_info(dd, "%s:File claims to be larger than read size\n",
+ __func__);
++ ret = -EINVAL;
+ goto bail;
+ } else if (file_length < dd->platform_config.size) {
+ dd_dev_info(dd,
+@@ -1795,6 +1798,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
+ dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
+ __func__, (ptr - (u32 *)
+ dd->platform_config.data));
++ ret = -EINVAL;
+ goto bail;
+ }
+
+@@ -1838,6 +1842,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
+ __func__, table_type,
+ (ptr - (u32 *)
+ dd->platform_config.data));
++ ret = -EINVAL;
+ goto bail; /* We don't trust this file now */
+ }
+ pcfgcache->config_tables[table_type].table = ptr;
+@@ -1857,6 +1862,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
+ __func__, table_type,
+ (ptr -
+ (u32 *)dd->platform_config.data));
++ ret = -EINVAL;
+ goto bail; /* We don't trust this file now */
+ }
+ pcfgcache->config_tables[table_type].table_metadata =
+diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
+index e3679d076eaaf..7facc04cc36c3 100644
+--- a/drivers/infiniband/hw/hfi1/init.c
++++ b/drivers/infiniband/hw/hfi1/init.c
+@@ -112,7 +112,6 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
+ rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+ handle_receive_interrupt_dma_rtail :
+ handle_receive_interrupt_nodma_rtail;
+- rcd->slow_handler = handle_receive_interrupt;
+
+ hfi1_set_seq_cnt(rcd, 1);
+
+@@ -333,6 +332,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
+ rcd->numa_id = numa;
+ rcd->rcv_array_groups = dd->rcv_entries.ngroups;
+ rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
++ rcd->slow_handler = handle_receive_interrupt;
++ rcd->do_interrupt = rcd->slow_handler;
+ rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
+
+ mutex_init(&rcd->exp_mutex);
+@@ -487,7 +488,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
+ u16 shift, mult;
+ u64 src;
+ u32 current_egress_rate; /* Mbits /sec */
+- u32 max_pkt_time;
++ u64 max_pkt_time;
+ /*
+ * max_pkt_time is the maximum packet egress time in units
+ * of the fabric clock period 1/(805 MHz).
+@@ -873,18 +874,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
+ if (ret)
+ goto done;
+
+- /* allocate dummy tail memory for all receive contexts */
+- dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
+- sizeof(u64),
+- &dd->rcvhdrtail_dummy_dma,
+- GFP_KERNEL);
+-
+- if (!dd->rcvhdrtail_dummy_kvaddr) {
+- dd_dev_err(dd, "cannot allocate dummy tail memory\n");
+- ret = -ENOMEM;
+- goto done;
+- }
+-
+ /* dd->rcd can be NULL if early initialization failed */
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
+ /*
+@@ -897,8 +886,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
+ if (!rcd)
+ continue;
+
+- rcd->do_interrupt = &handle_receive_interrupt;
+-
+ lastfail = hfi1_create_rcvhdrq(dd, rcd);
+ if (!lastfail)
+ lastfail = hfi1_setup_eagerbufs(rcd);
+@@ -1119,7 +1106,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
+ rcd->egrbufs.rcvtids = NULL;
+
+ for (e = 0; e < rcd->egrbufs.alloced; e++) {
+- if (rcd->egrbufs.buffers[e].dma)
++ if (rcd->egrbufs.buffers[e].addr)
+ dma_free_coherent(&dd->pcidev->dev,
+ rcd->egrbufs.buffers[e].len,
+ rcd->egrbufs.buffers[e].addr,
+@@ -1200,6 +1187,11 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
+ dd->tx_opstats = NULL;
+ kfree(dd->comp_vect);
+ dd->comp_vect = NULL;
++ if (dd->rcvhdrtail_dummy_kvaddr)
++ dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
++ (void *)dd->rcvhdrtail_dummy_kvaddr,
++ dd->rcvhdrtail_dummy_dma);
++ dd->rcvhdrtail_dummy_kvaddr = NULL;
+ sdma_clean(dd, dd->num_sdma);
+ rvt_dealloc_device(&dd->verbs_dev.rdi);
+ }
+@@ -1297,6 +1289,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
+ goto bail;
+ }
+
++ /* allocate dummy tail memory for all receive contexts */
++ dd->rcvhdrtail_dummy_kvaddr =
++ dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
++ &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
++ if (!dd->rcvhdrtail_dummy_kvaddr) {
++ ret = -ENOMEM;
++ goto bail;
++ }
++
+ atomic_set(&dd->ipoib_rsm_usr_num, 0);
+ return dd;
+
+@@ -1504,13 +1505,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
+
+ free_credit_return(dd);
+
+- if (dd->rcvhdrtail_dummy_kvaddr) {
+- dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
+- (void *)dd->rcvhdrtail_dummy_kvaddr,
+- dd->rcvhdrtail_dummy_dma);
+- dd->rcvhdrtail_dummy_kvaddr = NULL;
+- }
+-
+ /*
+ * Free any resources still in use (usually just kernel contexts)
+ * at unload; we do for ctxtcnt, because that's what we allocate.
+diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
+index e594a961f513e..3e475814b6fa6 100644
+--- a/drivers/infiniband/hw/hfi1/ipoib_main.c
++++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
+@@ -22,26 +22,35 @@ static int hfi1_ipoib_dev_init(struct net_device *dev)
+ int ret;
+
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
++ if (!dev->tstats)
++ return -ENOMEM;
+
+ ret = priv->netdev_ops->ndo_init(dev);
+ if (ret)
+- return ret;
++ goto out_ret;
+
+ ret = hfi1_netdev_add_data(priv->dd,
+ qpn_from_mac(priv->netdev->dev_addr),
+ dev);
+ if (ret < 0) {
+ priv->netdev_ops->ndo_uninit(dev);
+- return ret;
++ goto out_ret;
+ }
+
+ return 0;
++out_ret:
++ free_percpu(dev->tstats);
++ dev->tstats = NULL;
++ return ret;
+ }
+
+ static void hfi1_ipoib_dev_uninit(struct net_device *dev)
+ {
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
++ free_percpu(dev->tstats);
++ dev->tstats = NULL;
++
+ hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
+
+ priv->netdev_ops->ndo_uninit(dev);
+@@ -166,12 +175,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
+ hfi1_ipoib_rxq_deinit(priv->netdev);
+
+ free_percpu(dev->tstats);
+-}
+-
+-static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev)
+-{
+- hfi1_ipoib_netdev_dtor(dev);
+- free_netdev(dev);
++ dev->tstats = NULL;
+ }
+
+ static void hfi1_ipoib_set_id(struct net_device *dev, int id)
+@@ -211,24 +215,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device,
+ priv->port_num = port_num;
+ priv->netdev_ops = netdev->netdev_ops;
+
+- netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+-
+ ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
+
+ rc = hfi1_ipoib_txreq_init(priv);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
+- hfi1_ipoib_free_rdma_netdev(netdev);
+ return rc;
+ }
+
+ rc = hfi1_ipoib_rxq_init(netdev);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
+- hfi1_ipoib_free_rdma_netdev(netdev);
++ hfi1_ipoib_txreq_deinit(priv);
+ return rc;
+ }
+
++ netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
++
+ netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
+ netdev->needs_free_netdev = true;
+
+diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
+index 15b0cb0f363f4..a89d1bd99a332 100644
+--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
++++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
+@@ -254,7 +254,8 @@ static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
+ txreq,
+ skb_frag_page(frag),
+ frag->bv_offset,
+- skb_frag_size(frag));
++ skb_frag_size(frag),
++ NULL, NULL, NULL);
+ if (unlikely(ret))
+ break;
+ }
+diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
+index 876cc78a22cca..94f1701667301 100644
+--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
++++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
+@@ -19,8 +19,7 @@ static int mmu_notifier_range_start(struct mmu_notifier *,
+ const struct mmu_notifier_range *);
+ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
+ unsigned long, unsigned long);
+-static void do_remove(struct mmu_rb_handler *handler,
+- struct list_head *del_list);
++static void release_immediate(struct kref *refcount);
+ static void handle_remove(struct work_struct *work);
+
+ static const struct mmu_notifier_ops mn_opts = {
+@@ -80,6 +79,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
+ unsigned long flags;
+ struct list_head del_list;
+
++ /* Prevent freeing of mm until we are completely finished. */
++ mmgrab(handler->mn.mm);
++
+ /* Unregister first so we don't get any more notifications. */
+ mmu_notifier_unregister(&handler->mn, handler->mn.mm);
+
+@@ -100,7 +102,14 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
+ }
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+- do_remove(handler, &del_list);
++ while (!list_empty(&del_list)) {
++ rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
++ list_del(&rbnode->list);
++ kref_put(&rbnode->refcount, release_immediate);
++ }
++
++ /* Now the mm may be freed. */
++ mmdrop(handler->mn.mm);
+
+ kfree(handler);
+ }
+@@ -120,23 +129,30 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
+ spin_lock_irqsave(&handler->lock, flags);
+ node = __mmu_rb_search(handler, mnode->addr, mnode->len);
+ if (node) {
+- ret = -EINVAL;
++ ret = -EEXIST;
+ goto unlock;
+ }
+ __mmu_int_rb_insert(mnode, &handler->root);
+- list_add(&mnode->list, &handler->lru_list);
+-
+- ret = handler->ops->insert(handler->ops_arg, mnode);
+- if (ret) {
+- __mmu_int_rb_remove(mnode, &handler->root);
+- list_del(&mnode->list); /* remove from LRU list */
+- }
++ list_add_tail(&mnode->list, &handler->lru_list);
+ mnode->handler = handler;
+ unlock:
+ spin_unlock_irqrestore(&handler->lock, flags);
+ return ret;
+ }
+
++/* Caller must hold handler lock */
++struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
++ unsigned long addr, unsigned long len)
++{
++ struct mmu_rb_node *node;
++
++ trace_hfi1_mmu_rb_search(addr, len);
++ node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1);
++ if (node)
++ list_move_tail(&node->list, &handler->lru_list);
++ return node;
++}
++
+ /* Caller must hold handler lock */
+ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
+ unsigned long addr,
+@@ -161,30 +177,46 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
+ return node;
+ }
+
+-bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+- unsigned long addr, unsigned long len,
+- struct mmu_rb_node **rb_node)
++/*
++ * Must NOT call while holding mnode->handler->lock.
++ * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
++ * spinlock.
++ */
++static void release_immediate(struct kref *refcount)
+ {
+- struct mmu_rb_node *node;
+- unsigned long flags;
+- bool ret = false;
++ struct mmu_rb_node *mnode =
++ container_of(refcount, struct mmu_rb_node, refcount);
++ mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
++}
+
+- if (current->mm != handler->mn.mm)
+- return ret;
++/* Caller must hold mnode->handler->lock */
++static void release_nolock(struct kref *refcount)
++{
++ struct mmu_rb_node *mnode =
++ container_of(refcount, struct mmu_rb_node, refcount);
++ list_move(&mnode->list, &mnode->handler->del_list);
++ queue_work(mnode->handler->wq, &mnode->handler->del_work);
++}
++
++/*
++ * struct mmu_rb_node->refcount kref_put() callback.
++ * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
++ * handler->del_work on handler->wq.
++ * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
++ * Acquires mmu_rb_node->handler->lock; do not call while already holding
++ * handler->lock.
++ */
++void hfi1_mmu_rb_release(struct kref *refcount)
++{
++ struct mmu_rb_node *mnode =
++ container_of(refcount, struct mmu_rb_node, refcount);
++ struct mmu_rb_handler *handler = mnode->handler;
++ unsigned long flags;
+
+ spin_lock_irqsave(&handler->lock, flags);
+- node = __mmu_rb_search(handler, addr, len);
+- if (node) {
+- if (node->addr == addr && node->len == len)
+- goto unlock;
+- __mmu_int_rb_remove(node, &handler->root);
+- list_del(&node->list); /* remove from LRU list */
+- ret = true;
+- }
+-unlock:
++ list_move(&mnode->list, &mnode->handler->del_list);
+ spin_unlock_irqrestore(&handler->lock, flags);
+- *rb_node = node;
+- return ret;
++ queue_work(handler->wq, &handler->del_work);
+ }
+
+ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
+@@ -200,8 +232,11 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
+ INIT_LIST_HEAD(&del_list);
+
+ spin_lock_irqsave(&handler->lock, flags);
+- list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
+- list) {
++ list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
++ /* refcount == 1 implies mmu_rb_handler has only rbnode ref */
++ if (kref_read(&rbnode->refcount) > 1)
++ continue;
++
+ if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
+ &stop)) {
+ __mmu_int_rb_remove(rbnode, &handler->root);
+@@ -213,36 +248,11 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
+ }
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+- while (!list_empty(&del_list)) {
+- rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
+- list_del(&rbnode->list);
+- handler->ops->remove(handler->ops_arg, rbnode);
++ list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
++ kref_put(&rbnode->refcount, release_immediate);
+ }
+ }
+
+-/*
+- * It is up to the caller to ensure that this function does not race with the
+- * mmu invalidate notifier which may be calling the users remove callback on
+- * 'node'.
+- */
+-void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
+- struct mmu_rb_node *node)
+-{
+- unsigned long flags;
+-
+- if (current->mm != handler->mn.mm)
+- return;
+-
+- /* Validity of handler and node pointers has been checked by caller. */
+- trace_hfi1_mmu_rb_remove(node->addr, node->len);
+- spin_lock_irqsave(&handler->lock, flags);
+- __mmu_int_rb_remove(node, &handler->root);
+- list_del(&node->list); /* remove from LRU list */
+- spin_unlock_irqrestore(&handler->lock, flags);
+-
+- handler->ops->remove(handler->ops_arg, node);
+-}
+-
+ static int mmu_notifier_range_start(struct mmu_notifier *mn,
+ const struct mmu_notifier_range *range)
+ {
+@@ -251,7 +261,6 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
+ struct rb_root_cached *root = &handler->root;
+ struct mmu_rb_node *node, *ptr = NULL;
+ unsigned long flags;
+- bool added = false;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
+@@ -260,38 +269,16 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
+ ptr = __mmu_int_rb_iter_next(node, range->start,
+ range->end - 1);
+ trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
+- if (handler->ops->invalidate(handler->ops_arg, node)) {
+- __mmu_int_rb_remove(node, root);
+- /* move from LRU list to delete list */
+- list_move(&node->list, &handler->del_list);
+- added = true;
+- }
++ /* Remove from rb tree and lru_list. */
++ __mmu_int_rb_remove(node, root);
++ list_del_init(&node->list);
++ kref_put(&node->refcount, release_nolock);
+ }
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+- if (added)
+- queue_work(handler->wq, &handler->del_work);
+-
+ return 0;
+ }
+
+-/*
+- * Call the remove function for the given handler and the list. This
+- * is expected to be called with a delete list extracted from handler.
+- * The caller should not be holding the handler lock.
+- */
+-static void do_remove(struct mmu_rb_handler *handler,
+- struct list_head *del_list)
+-{
+- struct mmu_rb_node *node;
+-
+- while (!list_empty(del_list)) {
+- node = list_first_entry(del_list, struct mmu_rb_node, list);
+- list_del(&node->list);
+- handler->ops->remove(handler->ops_arg, node);
+- }
+-}
+-
+ /*
+ * Work queue function to remove all nodes that have been queued up to
+ * be removed. The key feature is that mm->mmap_lock is not being held
+@@ -304,11 +291,16 @@ static void handle_remove(struct work_struct *work)
+ del_work);
+ struct list_head del_list;
+ unsigned long flags;
++ struct mmu_rb_node *node;
+
+ /* remove anything that is queued to get removed */
+ spin_lock_irqsave(&handler->lock, flags);
+ list_replace_init(&handler->del_list, &del_list);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+- do_remove(handler, &del_list);
++ while (!list_empty(&del_list)) {
++ node = list_first_entry(&del_list, struct mmu_rb_node, list);
++ list_del(&node->list);
++ handler->ops->remove(handler->ops_arg, node);
++ }
+ }
+diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
+index 7417be2b9dc8a..dd2c4a0ae95b1 100644
+--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
++++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
+@@ -16,6 +16,7 @@ struct mmu_rb_node {
+ struct rb_node node;
+ struct mmu_rb_handler *handler;
+ struct list_head list;
++ struct kref refcount;
+ };
+
+ /*
+@@ -51,11 +52,11 @@ int hfi1_mmu_rb_register(void *ops_arg,
+ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
+ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *mnode);
++void hfi1_mmu_rb_release(struct kref *refcount);
++
+ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
+-void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
+- struct mmu_rb_node *mnode);
+-bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+- unsigned long addr, unsigned long len,
+- struct mmu_rb_node **rb_node);
++struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
++ unsigned long addr,
++ unsigned long len);
+
+ #endif /* _HFI1_MMU_RB_H */
+diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
+index 3d42bd2b36bd4..51ae58c02b15c 100644
+--- a/drivers/infiniband/hw/hfi1/pio.c
++++ b/drivers/infiniband/hw/hfi1/pio.c
+@@ -913,8 +913,7 @@ void sc_disable(struct send_context *sc)
+ spin_unlock(&sc->release_lock);
+
+ write_seqlock(&sc->waitlock);
+- if (!list_empty(&sc->piowait))
+- list_move(&sc->piowait, &wake_list);
++ list_splice_init(&sc->piowait, &wake_list);
+ write_sequnlock(&sc->waitlock);
+ while (!list_empty(&wake_list)) {
+ struct iowait *wait;
+diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
+index 2b6c24b7b5865..26c62162759ba 100644
+--- a/drivers/infiniband/hw/hfi1/sdma.c
++++ b/drivers/infiniband/hw/hfi1/sdma.c
+@@ -838,8 +838,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ if (current->nr_cpus_allowed != 1)
+ goto out;
+
+- cpu_id = smp_processor_id();
+ rcu_read_lock();
++ cpu_id = smp_processor_id();
+ rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
+
+@@ -1288,11 +1288,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
+ kvfree(sde->tx_ring);
+ sde->tx_ring = NULL;
+ }
+- spin_lock_irq(&dd->sde_map_lock);
+- sdma_map_free(rcu_access_pointer(dd->sdma_map));
+- RCU_INIT_POINTER(dd->sdma_map, NULL);
+- spin_unlock_irq(&dd->sde_map_lock);
+- synchronize_rcu();
++ if (rcu_access_pointer(dd->sdma_map)) {
++ spin_lock_irq(&dd->sde_map_lock);
++ sdma_map_free(rcu_access_pointer(dd->sdma_map));
++ RCU_INIT_POINTER(dd->sdma_map, NULL);
++ spin_unlock_irq(&dd->sde_map_lock);
++ synchronize_rcu();
++ }
+ kfree(dd->per_sdma);
+ dd->per_sdma = NULL;
+
+@@ -1593,20 +1595,18 @@ static inline void sdma_unmap_desc(
+ {
+ switch (sdma_mapping_type(descp)) {
+ case SDMA_MAP_SINGLE:
+- dma_unmap_single(
+- &dd->pcidev->dev,
+- sdma_mapping_addr(descp),
+- sdma_mapping_len(descp),
+- DMA_TO_DEVICE);
++ dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp),
++ sdma_mapping_len(descp), DMA_TO_DEVICE);
+ break;
+ case SDMA_MAP_PAGE:
+- dma_unmap_page(
+- &dd->pcidev->dev,
+- sdma_mapping_addr(descp),
+- sdma_mapping_len(descp),
+- DMA_TO_DEVICE);
++ dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp),
++ sdma_mapping_len(descp), DMA_TO_DEVICE);
+ break;
+ }
++
++ if (descp->pinning_ctx && descp->ctx_put)
++ descp->ctx_put(descp->pinning_ctx);
++ descp->pinning_ctx = NULL;
+ }
+
+ /*
+@@ -3127,7 +3127,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
+ /* Add descriptor for coalesce buffer */
+ tx->desc_limit = MAX_DESC;
+ return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
+- addr, tx->tlen);
++ addr, tx->tlen, NULL, NULL, NULL);
+ }
+
+ return 1;
+@@ -3158,20 +3158,22 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
+ {
+ int rval = 0;
+
+- tx->num_desc++;
+- if ((unlikely(tx->num_desc == tx->desc_limit))) {
++ if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) {
+ rval = _extend_sdma_tx_descs(dd, tx);
+ if (rval) {
+ __sdma_txclean(dd, tx);
+ return rval;
+ }
+ }
++
+ /* finish the one just added */
+ make_tx_sdma_desc(
+ tx,
+ SDMA_MAP_NONE,
+ dd->sdma_pad_phys,
+- sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
++ sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)),
++ NULL, NULL, NULL);
++ tx->num_desc++;
+ _sdma_close_tx(dd, tx);
+ return rval;
+ }
+diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
+index d8170fcbfbdd5..7fdebab202c4f 100644
+--- a/drivers/infiniband/hw/hfi1/sdma.h
++++ b/drivers/infiniband/hw/hfi1/sdma.h
+@@ -595,7 +595,10 @@ static inline void make_tx_sdma_desc(
+ struct sdma_txreq *tx,
+ int type,
+ dma_addr_t addr,
+- size_t len)
++ size_t len,
++ void *pinning_ctx,
++ void (*ctx_get)(void *),
++ void (*ctx_put)(void *))
+ {
+ struct sdma_desc *desc = &tx->descp[tx->num_desc];
+
+@@ -612,6 +615,11 @@ static inline void make_tx_sdma_desc(
+ << SDMA_DESC0_PHY_ADDR_SHIFT) |
+ (((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
+ << SDMA_DESC0_BYTE_COUNT_SHIFT);
++
++ desc->pinning_ctx = pinning_ctx;
++ desc->ctx_put = ctx_put;
++ if (pinning_ctx && ctx_get)
++ ctx_get(pinning_ctx);
+ }
+
+ /* helper to extend txreq */
+@@ -631,14 +639,13 @@ static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
+ static inline void _sdma_close_tx(struct hfi1_devdata *dd,
+ struct sdma_txreq *tx)
+ {
+- tx->descp[tx->num_desc].qw[0] |=
+- SDMA_DESC0_LAST_DESC_FLAG;
+- tx->descp[tx->num_desc].qw[1] |=
+- dd->default_desc1;
++ u16 last_desc = tx->num_desc - 1;
++
++ tx->descp[last_desc].qw[0] |= SDMA_DESC0_LAST_DESC_FLAG;
++ tx->descp[last_desc].qw[1] |= dd->default_desc1;
+ if (tx->flags & SDMA_TXREQ_F_URGENT)
+- tx->descp[tx->num_desc].qw[1] |=
+- (SDMA_DESC1_HEAD_TO_HOST_FLAG |
+- SDMA_DESC1_INT_REQ_FLAG);
++ tx->descp[last_desc].qw[1] |= (SDMA_DESC1_HEAD_TO_HOST_FLAG |
++ SDMA_DESC1_INT_REQ_FLAG);
+ }
+
+ static inline int _sdma_txadd_daddr(
+@@ -646,15 +653,20 @@ static inline int _sdma_txadd_daddr(
+ int type,
+ struct sdma_txreq *tx,
+ dma_addr_t addr,
+- u16 len)
++ u16 len,
++ void *pinning_ctx,
++ void (*ctx_get)(void *),
++ void (*ctx_put)(void *))
+ {
+ int rval = 0;
+
+ make_tx_sdma_desc(
+ tx,
+ type,
+- addr, len);
++ addr, len,
++ pinning_ctx, ctx_get, ctx_put);
+ WARN_ON(len > tx->tlen);
++ tx->num_desc++;
+ tx->tlen -= len;
+ /* special cases for last */
+ if (!tx->tlen) {
+@@ -666,7 +678,6 @@ static inline int _sdma_txadd_daddr(
+ _sdma_close_tx(dd, tx);
+ }
+ }
+- tx->num_desc++;
+ return rval;
+ }
+
+@@ -677,6 +688,14 @@ static inline int _sdma_txadd_daddr(
+ * @page: page to map
+ * @offset: offset within the page
+ * @len: length in bytes
++ * @pinning_ctx: context to be stored on struct sdma_desc .pinning_ctx. Not
++ * added if coalesce buffer is used. E.g. pointer to pinned-page
++ * cache entry for the sdma_desc.
++ * @ctx_get: optional function to take reference to @pinning_ctx. Not called if
++ * @pinning_ctx is NULL.
++ * @ctx_put: optional function to release reference to @pinning_ctx after
++ * sdma_desc completes. May be called in interrupt context so must
++ * not sleep. Not called if @pinning_ctx is NULL.
+ *
+ * This is used to add a page/offset/length descriptor.
+ *
+@@ -691,7 +710,10 @@ static inline int sdma_txadd_page(
+ struct sdma_txreq *tx,
+ struct page *page,
+ unsigned long offset,
+- u16 len)
++ u16 len,
++ void *pinning_ctx,
++ void (*ctx_get)(void *),
++ void (*ctx_put)(void *))
+ {
+ dma_addr_t addr;
+ int rval;
+@@ -715,8 +737,8 @@ static inline int sdma_txadd_page(
+ return -ENOSPC;
+ }
+
+- return _sdma_txadd_daddr(
+- dd, SDMA_MAP_PAGE, tx, addr, len);
++ return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, tx, addr, len,
++ pinning_ctx, ctx_get, ctx_put);
+ }
+
+ /**
+@@ -750,7 +772,8 @@ static inline int sdma_txadd_daddr(
+ return rval;
+ }
+
+- return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len);
++ return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len,
++ NULL, NULL, NULL);
+ }
+
+ /**
+@@ -796,8 +819,8 @@ static inline int sdma_txadd_kvaddr(
+ return -ENOSPC;
+ }
+
+- return _sdma_txadd_daddr(
+- dd, SDMA_MAP_SINGLE, tx, addr, len);
++ return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, addr, len,
++ NULL, NULL, NULL);
+ }
+
+ struct iowait_work;
+@@ -1030,5 +1053,4 @@ u16 sdma_get_descq_cnt(void);
+ extern uint mod_num_sdma;
+
+ void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
+-
+ #endif
+diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
+index e262fb5c5ec61..85ae7293c2741 100644
+--- a/drivers/infiniband/hw/hfi1/sdma_txreq.h
++++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
+@@ -19,6 +19,9 @@
+ struct sdma_desc {
+ /* private: don't use directly */
+ u64 qw[2];
++ void *pinning_ctx;
++ /* Release reference to @pinning_ctx. May be called in interrupt context. Must not sleep. */
++ void (*ctx_put)(void *ctx);
+ };
+
+ /**
+diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h
+index 187e9244fe5ed..57900ebb7702e 100644
+--- a/drivers/infiniband/hw/hfi1/trace_mmu.h
++++ b/drivers/infiniband/hw/hfi1/trace_mmu.h
+@@ -37,10 +37,6 @@ DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search,
+ TP_PROTO(unsigned long addr, unsigned long len),
+ TP_ARGS(addr, len));
+
+-DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove,
+- TP_PROTO(unsigned long addr, unsigned long len),
+- TP_ARGS(addr, len));
+-
+ DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
+ TP_PROTO(unsigned long addr, unsigned long len),
+ TP_ARGS(addr, len));
+diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+index 0c86e9d354f8e..1d2020c30ef3b 100644
+--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+@@ -23,18 +23,25 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
+ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
++static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
++ const struct mmu_notifier_range *range,
++ unsigned long cur_seq);
+ static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
+ struct tid_group *grp,
+ unsigned int start, u16 count,
+ u32 *tidlist, unsigned int *tididx,
+ unsigned int *pmapped);
+-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
+- struct tid_group **grp);
++static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo);
++static void __clear_tid_node(struct hfi1_filedata *fd,
++ struct tid_rb_node *node);
+ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
+
+ static const struct mmu_interval_notifier_ops tid_mn_ops = {
+ .invalidate = tid_rb_invalidate,
+ };
++static const struct mmu_interval_notifier_ops tid_cover_ops = {
++ .invalidate = tid_cover_invalidate,
++};
+
+ /*
+ * Initialize context and file private data needed for Expected
+@@ -153,16 +160,11 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
+ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
+ {
+ int pinned;
+- unsigned int npages;
++ unsigned int npages = tidbuf->npages;
+ unsigned long vaddr = tidbuf->vaddr;
+ struct page **pages = NULL;
+ struct hfi1_devdata *dd = fd->uctxt->dd;
+
+- /* Get the number of pages the user buffer spans */
+- npages = num_user_pages(vaddr, tidbuf->length);
+- if (!npages)
+- return -EINVAL;
+-
+ if (npages > fd->uctxt->expected_count) {
+ dd_dev_err(dd, "Expected buffer too big\n");
+ return -EINVAL;
+@@ -189,7 +191,6 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
+ return pinned;
+ }
+ tidbuf->pages = pages;
+- tidbuf->npages = npages;
+ fd->tid_n_pinned += pinned;
+ return pinned;
+ }
+@@ -253,53 +254,66 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ tididx = 0, mapped, mapped_pages = 0;
+ u32 *tidlist = NULL;
+ struct tid_user_buf *tidbuf;
++ unsigned long mmu_seq = 0;
+
+ if (!PAGE_ALIGNED(tinfo->vaddr))
+ return -EINVAL;
++ if (tinfo->length == 0)
++ return -EINVAL;
+
+ tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL);
+ if (!tidbuf)
+ return -ENOMEM;
+
++ mutex_init(&tidbuf->cover_mutex);
+ tidbuf->vaddr = tinfo->vaddr;
+ tidbuf->length = tinfo->length;
++ tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length);
+ tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets),
+ GFP_KERNEL);
+ if (!tidbuf->psets) {
+- kfree(tidbuf);
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto fail_release_mem;
++ }
++
++ if (fd->use_mn) {
++ ret = mmu_interval_notifier_insert(
++ &tidbuf->notifier, current->mm,
++ tidbuf->vaddr, tidbuf->npages * PAGE_SIZE,
++ &tid_cover_ops);
++ if (ret)
++ goto fail_release_mem;
++ mmu_seq = mmu_interval_read_begin(&tidbuf->notifier);
+ }
+
+ pinned = pin_rcv_pages(fd, tidbuf);
+ if (pinned <= 0) {
+- kfree(tidbuf->psets);
+- kfree(tidbuf);
+- return pinned;
++ ret = (pinned < 0) ? pinned : -ENOSPC;
++ goto fail_unpin;
+ }
+
+ /* Find sets of physically contiguous pages */
+ tidbuf->n_psets = find_phys_blocks(tidbuf, pinned);
+
+- /*
+- * We don't need to access this under a lock since tid_used is per
+- * process and the same process cannot be in hfi1_user_exp_rcv_clear()
+- * and hfi1_user_exp_rcv_setup() at the same time.
+- */
++ /* Reserve the number of expected tids to be used. */
+ spin_lock(&fd->tid_lock);
+ if (fd->tid_used + tidbuf->n_psets > fd->tid_limit)
+ pageset_count = fd->tid_limit - fd->tid_used;
+ else
+ pageset_count = tidbuf->n_psets;
++ fd->tid_used += pageset_count;
+ spin_unlock(&fd->tid_lock);
+
+- if (!pageset_count)
+- goto bail;
++ if (!pageset_count) {
++ ret = -ENOSPC;
++ goto fail_unreserve;
++ }
+
+ ngroups = pageset_count / dd->rcv_entries.group_size;
+ tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
+ if (!tidlist) {
+ ret = -ENOMEM;
+- goto nomem;
++ goto fail_unreserve;
+ }
+
+ tididx = 0;
+@@ -395,43 +409,78 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ }
+ unlock:
+ mutex_unlock(&uctxt->exp_mutex);
+-nomem:
+ hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
+ mapped_pages, ret);
+- if (tididx) {
+- spin_lock(&fd->tid_lock);
+- fd->tid_used += tididx;
+- spin_unlock(&fd->tid_lock);
+- tinfo->tidcnt = tididx;
+- tinfo->length = mapped_pages * PAGE_SIZE;
+-
+- if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
+- tidlist, sizeof(tidlist[0]) * tididx)) {
+- /*
+- * On failure to copy to the user level, we need to undo
+- * everything done so far so we don't leak resources.
+- */
+- tinfo->tidlist = (unsigned long)&tidlist;
+- hfi1_user_exp_rcv_clear(fd, tinfo);
+- tinfo->tidlist = 0;
+- ret = -EFAULT;
+- goto bail;
++
++ /* fail if nothing was programmed, set error if none provided */
++ if (tididx == 0) {
++ if (ret >= 0)
++ ret = -ENOSPC;
++ goto fail_unreserve;
++ }
++
++ /* adjust reserved tid_used to actual count */
++ spin_lock(&fd->tid_lock);
++ fd->tid_used -= pageset_count - tididx;
++ spin_unlock(&fd->tid_lock);
++
++ /* unpin all pages not covered by a TID */
++ unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages,
++ false);
++
++ if (fd->use_mn) {
++ /* check for an invalidate during setup */
++ bool fail = false;
++
++ mutex_lock(&tidbuf->cover_mutex);
++ fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq);
++ mutex_unlock(&tidbuf->cover_mutex);
++
++ if (fail) {
++ ret = -EBUSY;
++ goto fail_unprogram;
+ }
+ }
+
+- /*
+- * If not everything was mapped (due to insufficient RcvArray entries,
+- * for example), unpin all unmapped pages so we can pin them nex time.
+- */
+- if (mapped_pages != pinned)
+- unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages,
+- (pinned - mapped_pages), false);
+-bail:
++ tinfo->tidcnt = tididx;
++ tinfo->length = mapped_pages * PAGE_SIZE;
++
++ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
++ tidlist, sizeof(tidlist[0]) * tididx)) {
++ ret = -EFAULT;
++ goto fail_unprogram;
++ }
++
++ if (fd->use_mn)
++ mmu_interval_notifier_remove(&tidbuf->notifier);
++ kfree(tidbuf->pages);
+ kfree(tidbuf->psets);
++ kfree(tidbuf);
+ kfree(tidlist);
++ return 0;
++
++fail_unprogram:
++ /* unprogram, unmap, and unpin all allocated TIDs */
++ tinfo->tidlist = (unsigned long)tidlist;
++ hfi1_user_exp_rcv_clear(fd, tinfo);
++ tinfo->tidlist = 0;
++ pinned = 0; /* nothing left to unpin */
++ pageset_count = 0; /* nothing left reserved */
++fail_unreserve:
++ spin_lock(&fd->tid_lock);
++ fd->tid_used -= pageset_count;
++ spin_unlock(&fd->tid_lock);
++fail_unpin:
++ if (fd->use_mn)
++ mmu_interval_notifier_remove(&tidbuf->notifier);
++ if (pinned > 0)
++ unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false);
++fail_release_mem:
+ kfree(tidbuf->pages);
++ kfree(tidbuf->psets);
+ kfree(tidbuf);
+- return ret > 0 ? 0 : ret;
++ kfree(tidlist);
++ return ret;
+ }
+
+ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+@@ -452,7 +501,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+
+ mutex_lock(&uctxt->exp_mutex);
+ for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
+- ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
++ ret = unprogram_rcvarray(fd, tidinfo[tididx]);
+ if (ret) {
+ hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
+ ret);
+@@ -707,6 +756,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
+ }
+
+ node->fdata = fd;
++ mutex_init(&node->invalidate_mutex);
+ node->phys = page_to_phys(pages[0]);
+ node->npages = npages;
+ node->rcventry = rcventry;
+@@ -722,11 +772,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
+ &tid_mn_ops);
+ if (ret)
+ goto out_unmap;
+- /*
+- * FIXME: This is in the wrong order, the notifier should be
+- * established before the pages are pinned by pin_rcv_pages.
+- */
+- mmu_interval_read_begin(&node->notifier);
+ }
+ fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
+@@ -746,8 +791,7 @@ out_unmap:
+ return -EFAULT;
+ }
+
+-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
+- struct tid_group **grp)
++static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo)
+ {
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+@@ -770,9 +814,6 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
+ if (!node || node->rcventry != (uctxt->expected_base + rcventry))
+ return -EBADF;
+
+- if (grp)
+- *grp = node->grp;
+-
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&node->notifier);
+ cacheless_tid_rb_remove(fd, node);
+@@ -780,23 +821,34 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
+ return 0;
+ }
+
+-static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
++static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
+ {
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = uctxt->dd;
+
++ mutex_lock(&node->invalidate_mutex);
++ if (node->freed)
++ goto done;
++ node->freed = true;
++
+ trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
+ node->npages,
+ node->notifier.interval_tree.start, node->phys,
+ node->dma_addr);
+
+- /*
+- * Make sure device has seen the write before we unpin the
+- * pages.
+- */
++ /* Make sure device has seen the write before pages are unpinned */
+ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
+
+ unpin_rcv_pages(fd, NULL, node, 0, node->npages, true);
++done:
++ mutex_unlock(&node->invalidate_mutex);
++}
++
++static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
++{
++ struct hfi1_ctxtdata *uctxt = fd->uctxt;
++
++ __clear_tid_node(fd, node);
+
+ node->grp->used--;
+ node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
+@@ -855,10 +907,16 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ if (node->freed)
+ return true;
+
++ /* take action only if unmapping */
++ if (range->event != MMU_NOTIFY_UNMAP)
++ return true;
++
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+ node->notifier.interval_tree.start,
+ node->rcventry, node->npages, node->dma_addr);
+- node->freed = true;
++
++ /* clear the hardware rcvarray entry */
++ __clear_tid_node(fdata, node);
+
+ spin_lock(&fdata->invalid_lock);
+ if (fdata->invalid_tid_idx < uctxt->expected_count) {
+@@ -888,6 +946,23 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ return true;
+ }
+
++static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
++ const struct mmu_notifier_range *range,
++ unsigned long cur_seq)
++{
++ struct tid_user_buf *tidbuf =
++ container_of(mni, struct tid_user_buf, notifier);
++
++ /* take action only if unmapping */
++ if (range->event == MMU_NOTIFY_UNMAP) {
++ mutex_lock(&tidbuf->cover_mutex);
++ mmu_interval_set_seq(mni, cur_seq);
++ mutex_unlock(&tidbuf->cover_mutex);
++ }
++
++ return true;
++}
++
+ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
+ struct tid_rb_node *tnode)
+ {
+diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+index 8c53e416bf843..f8ee997d0050e 100644
+--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+@@ -16,6 +16,8 @@ struct tid_pageset {
+ };
+
+ struct tid_user_buf {
++ struct mmu_interval_notifier notifier;
++ struct mutex cover_mutex;
+ unsigned long vaddr;
+ unsigned long length;
+ unsigned int npages;
+@@ -27,6 +29,7 @@ struct tid_user_buf {
+ struct tid_rb_node {
+ struct mmu_interval_notifier notifier;
+ struct hfi1_filedata *fdata;
++ struct mutex invalidate_mutex; /* covers hw removal */
+ unsigned long phys;
+ struct tid_group *grp;
+ u32 rcventry;
+diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
+index 7bce963e2ae69..36aaedc651456 100644
+--- a/drivers/infiniband/hw/hfi1/user_pages.c
++++ b/drivers/infiniband/hw/hfi1/user_pages.c
+@@ -29,33 +29,52 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
+ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
+ u32 nlocked, u32 npages)
+ {
+- unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
+- size = (cache_size * (1UL << 20)); /* convert to bytes */
+- unsigned int usr_ctxts =
+- dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
+- bool can_lock = capable(CAP_IPC_LOCK);
++ unsigned long ulimit_pages;
++ unsigned long cache_limit_pages;
++ unsigned int usr_ctxts;
+
+ /*
+- * Calculate per-cache size. The calculation below uses only a quarter
+- * of the available per-context limit. This leaves space for other
+- * pinning. Should we worry about shared ctxts?
++ * Perform RLIMIT_MEMLOCK based checks unless CAP_IPC_LOCK is present.
+ */
+- cache_limit = (ulimit / usr_ctxts) / 4;
+-
+- /* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
+- if (ulimit != (-1UL) && size > cache_limit)
+- size = cache_limit;
+-
+- /* Convert to number of pages */
+- size = DIV_ROUND_UP(size, PAGE_SIZE);
+-
+- pinned = atomic64_read(&mm->pinned_vm);
++ if (!capable(CAP_IPC_LOCK)) {
++ ulimit_pages =
++ DIV_ROUND_DOWN_ULL(rlimit(RLIMIT_MEMLOCK), PAGE_SIZE);
++
++ /*
++ * Pinning these pages would exceed this process's locked memory
++ * limit.
++ */
++ if (atomic64_read(&mm->pinned_vm) + npages > ulimit_pages)
++ return false;
++
++ /*
++ * Only allow 1/4 of the user's RLIMIT_MEMLOCK to be used for HFI
++ * caches. This fraction is then equally distributed among all
++ * existing user contexts. Note that if RLIMIT_MEMLOCK is
++ * 'unlimited' (-1), the value of this limit will be > 2^42 pages
++ * (2^64 / 2^12 / 2^8 / 2^2).
++ *
++ * The effectiveness of this check may be reduced if I/O occurs on
++ * some user contexts before all user contexts are created. This
++ * check assumes that this process is the only one using this
++ * context (e.g., the corresponding fd was not passed to another
++ * process for concurrent access) as there is no per-context,
++ * per-process tracking of pinned pages. It also assumes that each
++ * user context has only one cache to limit.
++ */
++ usr_ctxts = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
++ if (nlocked + npages > (ulimit_pages / usr_ctxts / 4))
++ return false;
++ }
+
+- /* First, check the absolute limit against all pinned pages. */
+- if (pinned + npages >= ulimit && !can_lock)
++ /*
++ * Pinning these pages would exceed the size limit for this cache.
++ */
++ cache_limit_pages = cache_size * (1024 * 1024) / PAGE_SIZE;
++ if (nlocked + npages > cache_limit_pages)
+ return false;
+
+- return ((nlocked + npages) <= size) || can_lock;
++ return true;
+ }
+
+ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,
+diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
+index 5b11c82827445..02bd62b857b75 100644
+--- a/drivers/infiniband/hw/hfi1/user_sdma.c
++++ b/drivers/infiniband/hw/hfi1/user_sdma.c
+@@ -24,7 +24,6 @@
+
+ #include "hfi.h"
+ #include "sdma.h"
+-#include "mmu_rb.h"
+ #include "user_sdma.h"
+ #include "verbs.h" /* for the headers */
+ #include "common.h" /* for struct hfi1_tid_info */
+@@ -39,11 +38,7 @@ static unsigned initial_pkt_count = 8;
+ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
+ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
+ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
+-static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
+-static int pin_vector_pages(struct user_sdma_request *req,
+- struct user_sdma_iovec *iovec);
+-static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+- unsigned start, unsigned npages);
++static void user_sdma_free_request(struct user_sdma_request *req);
+ static int check_header_template(struct user_sdma_request *req,
+ struct hfi1_pkt_header *hdr, u32 lrhlen,
+ u32 datalen);
+@@ -67,20 +62,21 @@ static int defer_packet_queue(
+ static void activate_packet_queue(struct iowait *wait, int reason);
+ static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+-static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
+ static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *arg2, bool *stop);
+ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
+-static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+
+ static struct mmu_rb_ops sdma_rb_ops = {
+ .filter = sdma_rb_filter,
+- .insert = sdma_rb_insert,
+ .evict = sdma_rb_evict,
+ .remove = sdma_rb_remove,
+- .invalidate = sdma_rb_invalidate
+ };
+
++static int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
++ struct user_sdma_txreq *tx,
++ struct user_sdma_iovec *iovec,
++ u32 *pkt_remaining);
++
+ static int defer_packet_queue(
+ struct sdma_engine *sde,
+ struct iowait_work *wait,
+@@ -161,9 +157,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+ if (!pq->reqs)
+ goto pq_reqs_nomem;
+
+- pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
+- sizeof(*pq->req_in_use),
+- GFP_KERNEL);
++ pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL);
+ if (!pq->req_in_use)
+ goto pq_reqs_no_in_use;
+
+@@ -210,7 +204,7 @@ cq_comps_nomem:
+ cq_nomem:
+ kmem_cache_destroy(pq->txreq_cache);
+ pq_txreq_nomem:
+- kfree(pq->req_in_use);
++ bitmap_free(pq->req_in_use);
+ pq_reqs_no_in_use:
+ kfree(pq->reqs);
+ pq_reqs_nomem:
+@@ -249,15 +243,15 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
+ spin_unlock(&fd->pq_rcu_lock);
+ synchronize_srcu(&fd->pq_srcu);
+ /* at this point there can be no more new requests */
+- if (pq->handler)
+- hfi1_mmu_rb_unregister(pq->handler);
+ iowait_sdma_drain(&pq->busy);
+ /* Wait until all requests have been freed. */
+ wait_event_interruptible(
+ pq->wait,
+ !atomic_read(&pq->n_reqs));
+ kfree(pq->reqs);
+- kfree(pq->req_in_use);
++ if (pq->handler)
++ hfi1_mmu_rb_unregister(pq->handler);
++ bitmap_free(pq->req_in_use);
+ kmem_cache_destroy(pq->txreq_cache);
+ flush_pq_iowait(pq);
+ kfree(pq);
+@@ -412,6 +406,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ ret = -EINVAL;
+ goto free_req;
+ }
++
+ /* Copy the header from the user buffer */
+ ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
+ sizeof(req->hdr));
+@@ -486,9 +481,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ memcpy(&req->iovs[i].iov,
+ iovec + idx++,
+ sizeof(req->iovs[i].iov));
+- ret = pin_vector_pages(req, &req->iovs[i]);
+- if (ret) {
+- req->data_iovs = i;
++ if (req->iovs[i].iov.iov_len == 0) {
++ ret = -EINVAL;
+ goto free_req;
+ }
+ req->data_len += req->iovs[i].iov.iov_len;
+@@ -586,7 +580,7 @@ free_req:
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+- user_sdma_free_request(req, true);
++ user_sdma_free_request(req);
+ pq_update(pq);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
+@@ -698,48 +692,6 @@ static int user_sdma_txadd_ahg(struct user_sdma_request *req,
+ return ret;
+ }
+
+-static int user_sdma_txadd(struct user_sdma_request *req,
+- struct user_sdma_txreq *tx,
+- struct user_sdma_iovec *iovec, u32 datalen,
+- u32 *queued_ptr, u32 *data_sent_ptr,
+- u64 *iov_offset_ptr)
+-{
+- int ret;
+- unsigned int pageidx, len;
+- unsigned long base, offset;
+- u64 iov_offset = *iov_offset_ptr;
+- u32 queued = *queued_ptr, data_sent = *data_sent_ptr;
+- struct hfi1_user_sdma_pkt_q *pq = req->pq;
+-
+- base = (unsigned long)iovec->iov.iov_base;
+- offset = offset_in_page(base + iovec->offset + iov_offset);
+- pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >>
+- PAGE_SHIFT);
+- len = offset + req->info.fragsize > PAGE_SIZE ?
+- PAGE_SIZE - offset : req->info.fragsize;
+- len = min((datalen - queued), len);
+- ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx],
+- offset, len);
+- if (ret) {
+- SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret);
+- return ret;
+- }
+- iov_offset += len;
+- queued += len;
+- data_sent += len;
+- if (unlikely(queued < datalen && pageidx == iovec->npages &&
+- req->iov_idx < req->data_iovs - 1)) {
+- iovec->offset += iov_offset;
+- iovec = &req->iovs[++req->iov_idx];
+- iov_offset = 0;
+- }
+-
+- *queued_ptr = queued;
+- *data_sent_ptr = data_sent;
+- *iov_offset_ptr = iov_offset;
+- return ret;
+-}
+-
+ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
+ {
+ int ret = 0;
+@@ -771,8 +723,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
+ maxpkts = req->info.npkts - req->seqnum;
+
+ while (npkts < maxpkts) {
+- u32 datalen = 0, queued = 0, data_sent = 0;
+- u64 iov_offset = 0;
++ u32 datalen = 0;
+
+ /*
+ * Check whether any of the completions have come back
+@@ -865,27 +816,17 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
+ goto free_txreq;
+ }
+
+- /*
+- * If the request contains any data vectors, add up to
+- * fragsize bytes to the descriptor.
+- */
+- while (queued < datalen &&
+- (req->sent + data_sent) < req->data_len) {
+- ret = user_sdma_txadd(req, tx, iovec, datalen,
+- &queued, &data_sent, &iov_offset);
+- if (ret)
+- goto free_txreq;
+- }
+- /*
+- * The txreq was submitted successfully so we can update
+- * the counters.
+- */
+ req->koffset += datalen;
+ if (req_opcode(req->info.ctrl) == EXPECTED)
+ req->tidoffset += datalen;
+- req->sent += data_sent;
+- if (req->data_len)
+- iovec->offset += iov_offset;
++ req->sent += datalen;
++ while (datalen) {
++ ret = add_system_pages_to_sdma_packet(req, tx, iovec,
++ &datalen);
++ if (ret)
++ goto free_txreq;
++ iovec = &req->iovs[req->iov_idx];
++ }
+ list_add_tail(&tx->txreq.list, &req->txps);
+ /*
+ * It is important to increment this here as it is used to
+@@ -922,133 +863,14 @@ free_tx:
+ static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+ {
+ struct evict_data evict_data;
++ struct mmu_rb_handler *handler = pq->handler;
+
+ evict_data.cleared = 0;
+ evict_data.target = npages;
+- hfi1_mmu_rb_evict(pq->handler, &evict_data);
++ hfi1_mmu_rb_evict(handler, &evict_data);
+ return evict_data.cleared;
+ }
+
+-static int pin_sdma_pages(struct user_sdma_request *req,
+- struct user_sdma_iovec *iovec,
+- struct sdma_mmu_node *node,
+- int npages)
+-{
+- int pinned, cleared;
+- struct page **pages;
+- struct hfi1_user_sdma_pkt_q *pq = req->pq;
+-
+- pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+- if (!pages)
+- return -ENOMEM;
+- memcpy(pages, node->pages, node->npages * sizeof(*pages));
+-
+- npages -= node->npages;
+-retry:
+- if (!hfi1_can_pin_pages(pq->dd, current->mm,
+- atomic_read(&pq->n_locked), npages)) {
+- cleared = sdma_cache_evict(pq, npages);
+- if (cleared >= npages)
+- goto retry;
+- }
+- pinned = hfi1_acquire_user_pages(current->mm,
+- ((unsigned long)iovec->iov.iov_base +
+- (node->npages * PAGE_SIZE)), npages, 0,
+- pages + node->npages);
+- if (pinned < 0) {
+- kfree(pages);
+- return pinned;
+- }
+- if (pinned != npages) {
+- unpin_vector_pages(current->mm, pages, node->npages, pinned);
+- return -EFAULT;
+- }
+- kfree(node->pages);
+- node->rb.len = iovec->iov.iov_len;
+- node->pages = pages;
+- atomic_add(pinned, &pq->n_locked);
+- return pinned;
+-}
+-
+-static void unpin_sdma_pages(struct sdma_mmu_node *node)
+-{
+- if (node->npages) {
+- unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
+- node->npages);
+- atomic_sub(node->npages, &node->pq->n_locked);
+- }
+-}
+-
+-static int pin_vector_pages(struct user_sdma_request *req,
+- struct user_sdma_iovec *iovec)
+-{
+- int ret = 0, pinned, npages;
+- struct hfi1_user_sdma_pkt_q *pq = req->pq;
+- struct sdma_mmu_node *node = NULL;
+- struct mmu_rb_node *rb_node;
+- struct iovec *iov;
+- bool extracted;
+-
+- extracted =
+- hfi1_mmu_rb_remove_unless_exact(pq->handler,
+- (unsigned long)
+- iovec->iov.iov_base,
+- iovec->iov.iov_len, &rb_node);
+- if (rb_node) {
+- node = container_of(rb_node, struct sdma_mmu_node, rb);
+- if (!extracted) {
+- atomic_inc(&node->refcount);
+- iovec->pages = node->pages;
+- iovec->npages = node->npages;
+- iovec->node = node;
+- return 0;
+- }
+- }
+-
+- if (!node) {
+- node = kzalloc(sizeof(*node), GFP_KERNEL);
+- if (!node)
+- return -ENOMEM;
+-
+- node->rb.addr = (unsigned long)iovec->iov.iov_base;
+- node->pq = pq;
+- atomic_set(&node->refcount, 0);
+- }
+-
+- iov = &iovec->iov;
+- npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len);
+- if (node->npages < npages) {
+- pinned = pin_sdma_pages(req, iovec, node, npages);
+- if (pinned < 0) {
+- ret = pinned;
+- goto bail;
+- }
+- node->npages += pinned;
+- npages = node->npages;
+- }
+- iovec->pages = node->pages;
+- iovec->npages = npages;
+- iovec->node = node;
+-
+- ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb);
+- if (ret) {
+- iovec->node = NULL;
+- goto bail;
+- }
+- return 0;
+-bail:
+- unpin_sdma_pages(node);
+- kfree(node);
+- return ret;
+-}
+-
+-static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+- unsigned start, unsigned npages)
+-{
+- hfi1_release_user_pages(mm, pages + start, npages, false);
+- kfree(pages);
+-}
+-
+ static int check_header_template(struct user_sdma_request *req,
+ struct hfi1_pkt_header *hdr, u32 lrhlen,
+ u32 datalen)
+@@ -1390,7 +1212,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+- user_sdma_free_request(req, false);
++ user_sdma_free_request(req);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
+ }
+@@ -1401,10 +1223,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
+ wake_up(&pq->wait);
+ }
+
+-static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
++static void user_sdma_free_request(struct user_sdma_request *req)
+ {
+- int i;
+-
+ if (!list_empty(&req->txps)) {
+ struct sdma_txreq *t, *p;
+
+@@ -1417,21 +1237,6 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
+ }
+ }
+
+- for (i = 0; i < req->data_iovs; i++) {
+- struct sdma_mmu_node *node = req->iovs[i].node;
+-
+- if (!node)
+- continue;
+-
+- req->iovs[i].node = NULL;
+-
+- if (unpin)
+- hfi1_mmu_rb_remove(req->pq->handler,
+- &node->rb);
+- else
+- atomic_dec(&node->refcount);
+- }
+-
+ kfree(req->tids);
+ clear_bit(req->info.comp_idx, req->pq->req_in_use);
+ }
+@@ -1449,21 +1254,374 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+ idx, state, ret);
+ }
+
+-static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+- unsigned long len)
++static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
++ unsigned int start, unsigned int npages)
+ {
+- return (bool)(node->addr == addr);
++ hfi1_release_user_pages(mm, pages + start, npages, false);
++ kfree(pages);
+ }
+
+-static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode)
++static void free_system_node(struct sdma_mmu_node *node)
+ {
+- struct sdma_mmu_node *node =
+- container_of(mnode, struct sdma_mmu_node, rb);
++ if (node->npages) {
++ unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
++ node->npages);
++ atomic_sub(node->npages, &node->pq->n_locked);
++ }
++ kfree(node);
++}
++
++/*
++ * kref_get()'s an additional kref on the returned rb_node to prevent rb_node
++ * from being released until after rb_node is assigned to an SDMA descriptor
++ * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the
++ * virtual address range for rb_node is invalidated between now and then.
++ */
++static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
++ unsigned long start,
++ unsigned long end)
++{
++ struct mmu_rb_node *rb_node;
++ unsigned long flags;
++
++ spin_lock_irqsave(&handler->lock, flags);
++ rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start));
++ if (!rb_node) {
++ spin_unlock_irqrestore(&handler->lock, flags);
++ return NULL;
++ }
++
++ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
++ kref_get(&rb_node->refcount);
++ spin_unlock_irqrestore(&handler->lock, flags);
++
++ return container_of(rb_node, struct sdma_mmu_node, rb);
++}
++
++static int pin_system_pages(struct user_sdma_request *req,
++ uintptr_t start_address, size_t length,
++ struct sdma_mmu_node *node, int npages)
++{
++ struct hfi1_user_sdma_pkt_q *pq = req->pq;
++ int pinned, cleared;
++ struct page **pages;
++
++ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
++ if (!pages)
++ return -ENOMEM;
++
++retry:
++ if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked),
++ npages)) {
++ SDMA_DBG(req, "Evicting: nlocked %u npages %u",
++ atomic_read(&pq->n_locked), npages);
++ cleared = sdma_cache_evict(pq, npages);
++ if (cleared >= npages)
++ goto retry;
++ }
++
++ SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u",
++ start_address, node->npages, npages);
++ pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0,
++ pages);
+
+- atomic_inc(&node->refcount);
++ if (pinned < 0) {
++ kfree(pages);
++ SDMA_DBG(req, "pinned %d", pinned);
++ return pinned;
++ }
++ if (pinned != npages) {
++ unpin_vector_pages(current->mm, pages, node->npages, pinned);
++ SDMA_DBG(req, "npages %u pinned %d", npages, pinned);
++ return -EFAULT;
++ }
++ node->rb.addr = start_address;
++ node->rb.len = length;
++ node->pages = pages;
++ node->npages = npages;
++ atomic_add(pinned, &pq->n_locked);
++ SDMA_DBG(req, "done. pinned %d", pinned);
+ return 0;
+ }
+
++/*
++ * kref refcount on *node_p will be 2 on successful addition: one kref from
++ * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being
++ * released until after *node_p is assigned to an SDMA descriptor (struct
++ * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual
++ * address range for *node_p is invalidated between now and then.
++ */
++static int add_system_pinning(struct user_sdma_request *req,
++ struct sdma_mmu_node **node_p,
++ unsigned long start, unsigned long len)
++
++{
++ struct hfi1_user_sdma_pkt_q *pq = req->pq;
++ struct sdma_mmu_node *node;
++ int ret;
++
++ node = kzalloc(sizeof(*node), GFP_KERNEL);
++ if (!node)
++ return -ENOMEM;
++
++ /* First kref "moves" to mmu_rb_handler */
++ kref_init(&node->rb.refcount);
++
++ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
++ kref_get(&node->rb.refcount);
++
++ node->pq = pq;
++ ret = pin_system_pages(req, start, len, node, PFN_DOWN(len));
++ if (ret == 0) {
++ ret = hfi1_mmu_rb_insert(pq->handler, &node->rb);
++ if (ret)
++ free_system_node(node);
++ else
++ *node_p = node;
++
++ return ret;
++ }
++
++ kfree(node);
++ return ret;
++}
++
++static int get_system_cache_entry(struct user_sdma_request *req,
++ struct sdma_mmu_node **node_p,
++ size_t req_start, size_t req_len)
++{
++ struct hfi1_user_sdma_pkt_q *pq = req->pq;
++ u64 start = ALIGN_DOWN(req_start, PAGE_SIZE);
++ u64 end = PFN_ALIGN(req_start + req_len);
++ struct mmu_rb_handler *handler = pq->handler;
++ int ret;
++
++ if ((end - start) == 0) {
++ SDMA_DBG(req,
++ "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx",
++ req_start, req_len, start, end);
++ return -EINVAL;
++ }
++
++ SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len);
++
++ while (1) {
++ struct sdma_mmu_node *node =
++ find_system_node(handler, start, end);
++ u64 prepend_len = 0;
++
++ SDMA_DBG(req, "node %p start %llx end %llu", node, start, end);
++ if (!node) {
++ ret = add_system_pinning(req, node_p, start,
++ end - start);
++ if (ret == -EEXIST) {
++ /*
++ * Another execution context has inserted a
++ * conficting entry first.
++ */
++ continue;
++ }
++ return ret;
++ }
++
++ if (node->rb.addr <= start) {
++ /*
++ * This entry covers at least part of the region. If it doesn't extend
++ * to the end, then this will be called again for the next segment.
++ */
++ *node_p = node;
++ return 0;
++ }
++
++ SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d",
++ node->rb.addr, kref_read(&node->rb.refcount));
++ prepend_len = node->rb.addr - start;
++
++ /*
++ * This node will not be returned, instead a new node
++ * will be. So release the reference.
++ */
++ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
++
++ /* Prepend a node to cover the beginning of the allocation */
++ ret = add_system_pinning(req, node_p, start, prepend_len);
++ if (ret == -EEXIST) {
++ /* Another execution context has inserted a conficting entry first. */
++ continue;
++ }
++ return ret;
++ }
++}
++
++static void sdma_mmu_rb_node_get(void *ctx)
++{
++ struct mmu_rb_node *node = ctx;
++
++ kref_get(&node->refcount);
++}
++
++static void sdma_mmu_rb_node_put(void *ctx)
++{
++ struct sdma_mmu_node *node = ctx;
++
++ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
++}
++
++static int add_mapping_to_sdma_packet(struct user_sdma_request *req,
++ struct user_sdma_txreq *tx,
++ struct sdma_mmu_node *cache_entry,
++ size_t start,
++ size_t from_this_cache_entry)
++{
++ struct hfi1_user_sdma_pkt_q *pq = req->pq;
++ unsigned int page_offset;
++ unsigned int from_this_page;
++ size_t page_index;
++ void *ctx;
++ int ret;
++
++ /*
++ * Because the cache may be more fragmented than the memory that is being accessed,
++ * it's not strictly necessary to have a descriptor per cache entry.
++ */
++
++ while (from_this_cache_entry) {
++ page_index = PFN_DOWN(start - cache_entry->rb.addr);
++
++ if (page_index >= cache_entry->npages) {
++ SDMA_DBG(req,
++ "Request for page_index %zu >= cache_entry->npages %u",
++ page_index, cache_entry->npages);
++ return -EINVAL;
++ }
++
++ page_offset = start - ALIGN_DOWN(start, PAGE_SIZE);
++ from_this_page = PAGE_SIZE - page_offset;
++
++ if (from_this_page < from_this_cache_entry) {
++ ctx = NULL;
++ } else {
++ /*
++ * In the case they are equal the next line has no practical effect,
++ * but it's better to do a register to register copy than a conditional
++ * branch.
++ */
++ from_this_page = from_this_cache_entry;
++ ctx = cache_entry;
++ }
++
++ ret = sdma_txadd_page(pq->dd, &tx->txreq,
++ cache_entry->pages[page_index],
++ page_offset, from_this_page,
++ ctx,
++ sdma_mmu_rb_node_get,
++ sdma_mmu_rb_node_put);
++ if (ret) {
++ /*
++ * When there's a failure, the entire request is freed by
++ * user_sdma_send_pkts().
++ */
++ SDMA_DBG(req,
++ "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u",
++ ret, page_index, page_offset, from_this_page);
++ return ret;
++ }
++ start += from_this_page;
++ from_this_cache_entry -= from_this_page;
++ }
++ return 0;
++}
++
++static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
++ struct user_sdma_txreq *tx,
++ struct user_sdma_iovec *iovec,
++ size_t from_this_iovec)
++{
++ while (from_this_iovec > 0) {
++ struct sdma_mmu_node *cache_entry;
++ size_t from_this_cache_entry;
++ size_t start;
++ int ret;
++
++ start = (uintptr_t)iovec->iov.iov_base + iovec->offset;
++ ret = get_system_cache_entry(req, &cache_entry, start,
++ from_this_iovec);
++ if (ret) {
++ SDMA_DBG(req, "pin system segment failed %d", ret);
++ return ret;
++ }
++
++ from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr);
++ if (from_this_cache_entry > from_this_iovec)
++ from_this_cache_entry = from_this_iovec;
++
++ ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start,
++ from_this_cache_entry);
++
++ /*
++ * Done adding cache_entry to zero or more sdma_desc. Can
++ * kref_put() the "safety" kref taken under
++ * get_system_cache_entry().
++ */
++ kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release);
++
++ if (ret) {
++ SDMA_DBG(req, "add system segment failed %d", ret);
++ return ret;
++ }
++
++ iovec->offset += from_this_cache_entry;
++ from_this_iovec -= from_this_cache_entry;
++ }
++
++ return 0;
++}
++
++static int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
++ struct user_sdma_txreq *tx,
++ struct user_sdma_iovec *iovec,
++ u32 *pkt_data_remaining)
++{
++ size_t remaining_to_add = *pkt_data_remaining;
++ /*
++ * Walk through iovec entries, ensure the associated pages
++ * are pinned and mapped, add data to the packet until no more
++ * data remains to be added.
++ */
++ while (remaining_to_add > 0) {
++ struct user_sdma_iovec *cur_iovec;
++ size_t from_this_iovec;
++ int ret;
++
++ cur_iovec = iovec;
++ from_this_iovec = iovec->iov.iov_len - iovec->offset;
++
++ if (from_this_iovec > remaining_to_add) {
++ from_this_iovec = remaining_to_add;
++ } else {
++ /* The current iovec entry will be consumed by this pass. */
++ req->iov_idx++;
++ iovec++;
++ }
++
++ ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec,
++ from_this_iovec);
++ if (ret)
++ return ret;
++
++ remaining_to_add -= from_this_iovec;
++ }
++ *pkt_data_remaining = remaining_to_add;
++
++ return 0;
++}
++
++static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
++ unsigned long len)
++{
++ return (bool)(node->addr == addr);
++}
++
+ /*
+ * Return 1 to remove the node from the rb tree and call the remove op.
+ *
+@@ -1476,10 +1634,6 @@ static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ container_of(mnode, struct sdma_mmu_node, rb);
+ struct evict_data *evict_data = evict_arg;
+
+- /* is this node still being used? */
+- if (atomic_read(&node->refcount))
+- return 0; /* keep this node */
+-
+ /* this node will be evicted, add its pages to our count */
+ evict_data->cleared += node->npages;
+
+@@ -1495,16 +1649,5 @@ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+
+- unpin_sdma_pages(node);
+- kfree(node);
+-}
+-
+-static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+-{
+- struct sdma_mmu_node *node =
+- container_of(mnode, struct sdma_mmu_node, rb);
+-
+- if (!atomic_read(&node->refcount))
+- return 1;
+- return 0;
++ free_system_node(node);
+ }
+diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
+index ea56eb57e6568..548347d4c5bc2 100644
+--- a/drivers/infiniband/hw/hfi1/user_sdma.h
++++ b/drivers/infiniband/hw/hfi1/user_sdma.h
+@@ -104,7 +104,6 @@ struct hfi1_user_sdma_comp_q {
+ struct sdma_mmu_node {
+ struct mmu_rb_node rb;
+ struct hfi1_user_sdma_pkt_q *pq;
+- atomic_t refcount;
+ struct page **pages;
+ unsigned int npages;
+ };
+@@ -112,16 +111,11 @@ struct sdma_mmu_node {
+ struct user_sdma_iovec {
+ struct list_head list;
+ struct iovec iov;
+- /* number of pages in this vector */
+- unsigned int npages;
+- /* array of pinned pages for this vector */
+- struct page **pages;
+ /*
+ * offset into the virtual address space of the vector at
+ * which we last left off.
+ */
+ u64 offset;
+- struct sdma_mmu_node *node;
+ };
+
+ /* evict operation argument */
+diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
+index 26bea51869bf0..dcc167dcfc61b 100644
+--- a/drivers/infiniband/hw/hfi1/verbs.c
++++ b/drivers/infiniband/hw/hfi1/verbs.c
+@@ -778,8 +778,8 @@ static int build_verbs_tx_desc(
+
+ /* add icrc, lt byte, and padding to flit */
+ if (extra_bytes)
+- ret = sdma_txadd_daddr(sde->dd, &tx->txreq,
+- sde->dd->sdma_pad_phys, extra_bytes);
++ ret = sdma_txadd_daddr(sde->dd, &tx->txreq, sde->dd->sdma_pad_phys,
++ extra_bytes);
+
+ bail_txadd:
+ return ret;
+@@ -1397,8 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u32 port_num,
+ 4096 : hfi1_max_mtu), IB_MTU_4096);
+ props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
+ mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+- props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
+- ib_mtu_enum_to_int(props->max_mtu);
++ props->phys_mtu = hfi1_max_mtu;
+
+ return 0;
+ }
+diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
+index c3f0f8d877c37..cc6324d2d1ddc 100644
+--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
++++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
+@@ -67,7 +67,8 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
+ &tx->txreq,
+ skb_frag_page(frag),
+ skb_frag_off(frag),
+- skb_frag_size(frag));
++ skb_frag_size(frag),
++ NULL, NULL, NULL);
+ if (unlikely(ret))
+ goto bail_txadd;
+ }
+diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
+index 9467c39e3d288..e02107123c970 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_device.h
++++ b/drivers/infiniband/hw/hns/hns_roce_device.h
+@@ -121,6 +121,7 @@
+ #define HNS_ROCE_CQ_BANK_NUM 4
+
+ #define CQ_BANKID_SHIFT 2
++#define CQ_BANKID_MASK GENMASK(1, 0)
+
+ /* The chip implementation of the consumer index is calculated
+ * according to twice the actual EQ depth
+@@ -559,6 +560,11 @@ struct hns_roce_cmd_context {
+ u16 busy;
+ };
+
++enum hns_roce_cmdq_state {
++ HNS_ROCE_CMDQ_STATE_NORMAL,
++ HNS_ROCE_CMDQ_STATE_FATAL_ERR,
++};
++
+ struct hns_roce_cmdq {
+ struct dma_pool *pool;
+ struct semaphore poll_sem;
+@@ -578,6 +584,7 @@ struct hns_roce_cmdq {
+ * close device, switch into poll mode(non event mode)
+ */
+ u8 use_events;
++ enum hns_roce_cmdq_state state;
+ };
+
+ struct hns_roce_cmd_mailbox {
+@@ -753,7 +760,6 @@ struct hns_roce_caps {
+ u32 num_pi_qps;
+ u32 reserved_qps;
+ int num_qpc_timer;
+- int num_cqc_timer;
+ int num_srqs;
+ u32 max_wqes;
+ u32 max_srq_wrs;
+diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
+index fa15d79eabb36..267474070f271 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
++++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
+@@ -597,11 +597,12 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
+ }
+
+ /* Set HEM base address(128K/page, pa) to Hardware */
+- if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) {
++ ret = hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
++ if (ret) {
+ hns_roce_free_hem(hr_dev, table->hem[i]);
+ table->hem[i] = NULL;
+- ret = -ENODEV;
+- dev_err(dev, "set HEM base address to HW failed.\n");
++ dev_err(dev, "set HEM base address to HW failed, ret = %d.\n",
++ ret);
+ goto out;
+ }
+
+diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+index d5f3faa1627a4..4554d3e78b37b 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+@@ -33,6 +33,7 @@
+ #include <linux/acpi.h>
+ #include <linux/etherdevice.h>
+ #include <linux/interrupt.h>
++#include <linux/iopoll.h>
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <net/addrconf.h>
+@@ -81,7 +82,6 @@ static const u32 hns_roce_op_code[] = {
+ HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP),
+ HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD),
+ HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
+- HR_OPC_MAP(LOCAL_INV, LOCAL_INV),
+ HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP),
+ HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
+ HR_OPC_MAP(REG_MR, FAST_REG_PMR),
+@@ -148,8 +148,15 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
+ aseg->cmp_data = 0;
+ }
+
+- roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
++}
++
++static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
++{
++ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
++ return 0;
++
++ return HNS_ROCE_SGE_IN_WQE;
+ }
+
+ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+@@ -157,16 +164,16 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+ unsigned int *sge_idx, u32 msg_len)
+ {
+ struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
+- unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg);
+- unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len;
+ unsigned int left_len_in_pg;
+ unsigned int idx = *sge_idx;
++ unsigned int std_sge_num;
+ unsigned int i = 0;
+ unsigned int len;
+ void *addr;
+ void *dseg;
+
+- if (msg_len > ext_sge_sz) {
++ std_sge_num = get_std_sge_num(qp);
++ if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) {
+ ibdev_err(ibdev,
+ "no enough extended sge space for inline data.\n");
+ return -EINVAL;
+@@ -186,7 +193,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+ if (len <= left_len_in_pg) {
+ memcpy(dseg, addr, len);
+
+- idx += len / dseg_len;
++ idx += len / HNS_ROCE_SGE_SIZE;
+
+ i++;
+ if (i >= wr->num_sge)
+@@ -201,7 +208,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+
+ len -= left_len_in_pg;
+ addr += left_len_in_pg;
+- idx += left_len_in_pg / dseg_len;
++ idx += left_len_in_pg / HNS_ROCE_SGE_SIZE;
+ dseg = hns_roce_get_extend_sge(qp,
+ idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
+@@ -270,8 +277,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+ dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
+
+ if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
+- roce_set_bit(rc_sq_wqe->byte_20,
+- V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0);
++ hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dseg, ((void *)wr->sg_list[i].addr),
+@@ -279,17 +285,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+ dseg += wr->sg_list[i].length;
+ }
+ } else {
+- roce_set_bit(rc_sq_wqe->byte_20,
+- V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1);
++ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
+ if (ret)
+ return ret;
+
+- roce_set_field(rc_sq_wqe->byte_16,
+- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+- curr_idx - *sge_idx);
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx);
+ }
+
+ *sge_idx = curr_idx;
+@@ -308,12 +310,10 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ int j = 0;
+ int i;
+
+- roce_set_field(rc_sq_wqe->byte_20,
+- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+- (*sge_ind) & (qp->sge.sge_cnt - 1));
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX,
++ (*sge_ind) & (qp->sge.sge_cnt - 1));
+
+- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE,
+ !!(wr->send_flags & IB_SEND_INLINE));
+ if (wr->send_flags & IB_SEND_INLINE)
+ return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
+@@ -338,9 +338,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ valid_num_sge - HNS_ROCE_SGE_IN_WQE);
+ }
+
+- roce_set_field(rc_sq_wqe->byte_16,
+- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
+
+ return 0;
+ }
+@@ -411,8 +409,7 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+
+ ud_sq_wqe->immtdata = get_immtdata(wr);
+
+- roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
+- V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
+
+ return 0;
+ }
+@@ -423,21 +420,15 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ struct ib_device *ib_dev = ah->ibah.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+- roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
+- V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
+-
+- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
+- V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
+- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
+- V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
+- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
+- V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
+
+ if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
+ return -EINVAL;
+
+- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
+- V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
+
+ ud_sq_wqe->sgid_index = ah->av.gid_index;
+
+@@ -447,10 +438,8 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+- roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
+- ah->av.vlan_en);
+- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
+- V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id);
+
+ return 0;
+ }
+@@ -475,27 +464,19 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
+
+ ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE,
+ !!(wr->send_flags & IB_SEND_SIGNALED));
+-
+- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S,
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE,
+ !!(wr->send_flags & IB_SEND_SOLICITED));
+
+- roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M,
+- V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn);
+-
+- roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
+- V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
+-
+- roce_set_field(ud_sq_wqe->byte_20,
+- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+- curr_idx & (qp->sge.sge_cnt - 1));
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX,
++ curr_idx & (qp->sge.sge_cnt - 1));
+
+ ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
+ qp->qkey : ud_wr(wr)->remote_qkey);
+- roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
+- V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn);
+
+ ret = fill_ud_av(ud_sq_wqe, ah);
+ if (ret)
+@@ -515,8 +496,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
+- owner_bit);
++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit);
+
+ return 0;
+ }
+@@ -551,9 +531,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
+ else
+ ret = -EOPNOTSUPP;
+ break;
+- case IB_WR_LOCAL_INV:
+- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+- fallthrough;
+ case IB_WR_SEND_WITH_INV:
+ rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+@@ -564,11 +541,11 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
+ if (unlikely(ret))
+ return ret;
+
+- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+- V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
+
+ return ret;
+ }
++
+ static inline int set_rc_wqe(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ void *wqe, unsigned int *sge_idx,
+@@ -589,13 +566,13 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
+ if (WARN_ON(ret))
+ return ret;
+
+- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
+ (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+
+- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S,
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
+ (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE,
+ (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+@@ -615,8 +592,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
+- owner_bit);
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit);
+
+ return ret;
+ }
+@@ -677,16 +653,15 @@ static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
+ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ void *wqe)
+ {
++#define HNS_ROCE_SL_SHIFT 2
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+
+ /* All kinds of DirectWQE have the same header field layout */
+- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1);
+- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M,
+- V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl);
+- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M,
+- V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2);
+- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M,
+- V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
++ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H,
++ qp->sl >> HNS_ROCE_SL_SHIFT);
++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head);
+
+ hns_roce_write512(hr_dev, wqe, qp->sq.db_reg);
+ }
+@@ -756,7 +731,8 @@ out:
+ qp->sq.head += nreq;
+ qp->next_sge = sge_idx;
+
+- if (nreq == 1 && (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
++ if (nreq == 1 && !ret &&
++ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ write_dwqe(hr_dev, qp, wqe);
+ else
+ update_sq_db(hr_dev, qp);
+@@ -1050,9 +1026,14 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
+ unsigned long instance_stage,
+ unsigned long reset_stage)
+ {
++#define HW_RESET_TIMEOUT_US 1000000
++#define HW_RESET_SLEEP_US 1000
++
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
++ unsigned long val;
++ int ret;
+
+ /* When hardware reset is detected, we should stop sending mailbox&cmq&
+ * doorbell to hardware. If now in .init_instance() function, we should
+@@ -1064,7 +1045,11 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
+ * again.
+ */
+ hr_dev->dis_db = true;
+- if (!ops->get_hw_reset_stat(handle))
++
++ ret = read_poll_timeout(ops->ae_dev_reset_cnt, val,
++ val > hr_dev->reset_cnt, HW_RESET_SLEEP_US,
++ HW_RESET_TIMEOUT_US, false, handle);
++ if (!ret)
+ hr_dev->is_reset = true;
+
+ if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT ||
+@@ -1263,6 +1248,40 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
+ return tail == priv->cmq.csq.head;
+ }
+
++static void update_cmdq_status(struct hns_roce_dev *hr_dev)
++{
++ struct hns_roce_v2_priv *priv = hr_dev->priv;
++ struct hnae3_handle *handle = priv->handle;
++
++ if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
++ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
++ hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
++}
++
++static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
++{
++ struct hns_roce_cmd_errcode errcode_table[] = {
++ {CMD_EXEC_SUCCESS, 0},
++ {CMD_NO_AUTH, -EPERM},
++ {CMD_NOT_EXIST, -EOPNOTSUPP},
++ {CMD_CRQ_FULL, -EXFULL},
++ {CMD_NEXT_ERR, -ENOSR},
++ {CMD_NOT_EXEC, -ENOTBLK},
++ {CMD_PARA_ERR, -EINVAL},
++ {CMD_RESULT_ERR, -ERANGE},
++ {CMD_TIMEOUT, -ETIME},
++ {CMD_HILINK_ERR, -ENOLINK},
++ {CMD_INFO_ILLEGAL, -ENXIO},
++ {CMD_INVALID, -EBADR},
++ };
++ u16 i;
++
++ for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
++ if (desc_ret == errcode_table[i].return_status)
++ return errcode_table[i].errno;
++ return -EIO;
++}
++
+ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc, int num)
+ {
+@@ -1307,7 +1326,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ dev_err_ratelimited(hr_dev->dev,
+ "Cmdq IO error, opcode = %x, return = %x\n",
+ desc->opcode, desc_ret);
+- ret = -EIO;
++ ret = hns_roce_cmd_err_convert_errno(desc_ret);
+ }
+ } else {
+ /* FW/HW reset or incorrect number of desc */
+@@ -1316,6 +1335,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ csq->head, tail);
+ csq->head = tail;
+
++ update_cmdq_status(hr_dev);
++
+ ret = -EAGAIN;
+ }
+
+@@ -1330,6 +1351,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ bool busy;
+ int ret;
+
++ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
++ return -EIO;
++
+ if (!v2_chk_mbox_is_avail(hr_dev, &busy))
+ return busy ? -EBUSY : 0;
+
+@@ -1526,6 +1550,9 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
+ {
+ int i;
+
++ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
++ return;
++
+ for (i = hr_dev->func_num - 1; i >= 0; i--) {
+ __hns_roce_function_clear(hr_dev, i);
+ if (i != 0)
+@@ -1594,11 +1621,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
+ {
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
++ u32 clock_cycles_of_1us;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM,
+ false);
+
+- hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8);
++ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
++ clock_cycles_of_1us = HNS_ROCE_1NS_CFG;
++ else
++ clock_cycles_of_1us = HNS_ROCE_1US_CFG;
++
++ hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us);
+ hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+@@ -1749,17 +1782,16 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+ swt = (struct hns_roce_vf_switch *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
+ swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+- roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M,
+- VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id);
++ hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
+ desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
+- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0);
+- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
++ hr_reg_enable(swt, VF_SWITCH_ALW_LPBK);
++ hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK);
++ hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+ }
+@@ -1939,7 +1971,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
+ caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
+ caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
+ caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
+- caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
++ caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM;
+
+ caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
+ caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
+@@ -2233,7 +2265,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
+ caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
+ caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
+ caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
+- caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
+ caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
+ caps->num_aeq_vectors = resp_a->num_aeq_vectors;
+@@ -2365,6 +2396,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
+ V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
+ V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
+
++ if (!(caps->page_size_cap & PAGE_SIZE))
++ caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
++
+ return 0;
+ }
+
+@@ -2802,6 +2836,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
+ mb_st = (struct hns_roce_mbox_status *)desc.data;
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
++ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
++ return -EIO;
++
+ status = 0;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
+ true);
+@@ -2906,10 +2943,8 @@ static int config_sgid_table(struct hns_roce_dev *hr_dev,
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
+
+- roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M,
+- CFG_SGID_TB_TABLE_IDX_S, gid_index);
+- roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M,
+- CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
++ hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index);
++ hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type);
+
+ copy_gid(&sgid_tb->vf_sgid_l, gid);
+
+@@ -2944,19 +2979,14 @@ static int config_gmv_table(struct hns_roce_dev *hr_dev,
+
+ copy_gid(&tb_a->vf_sgid_l, gid);
+
+- roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M,
+- CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type);
+- roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S,
+- vlan_id < VLAN_CFI_MASK);
+- roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M,
+- CFG_GMV_TB_VF_VLAN_ID_S, vlan_id);
++ hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type);
++ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK);
++ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id);
+
+ tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
+- roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M,
+- CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]);
+
+- roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M,
+- CFG_GMV_TB_SGID_IDX_S, gid_index);
++ hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]);
++ hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+ }
+@@ -3005,10 +3035,8 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
+ reg_smac_l = *(u32 *)(&addr[0]);
+ reg_smac_h = *(u16 *)(&addr[4]);
+
+- roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M,
+- CFG_SMAC_TB_IDX_S, phy_port);
+- roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M,
+- CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
++ hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port);
++ hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h);
+ smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+@@ -3024,7 +3052,8 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
+ int i, count;
+
+ count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+- ARRAY_SIZE(pages), &pbl_ba);
++ min_t(int, ARRAY_SIZE(pages), mr->npages),
++ &pbl_ba);
+ if (count < 1) {
+ ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
+ count);
+@@ -3037,21 +3066,15 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+ mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
+- roce_set_field(mpt_entry->byte_48_mode_ba,
+- V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
+- upper_32_bits(pbl_ba >> 3));
++ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+
+ mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
+- roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
+- V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
++ hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
+
+ mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
+- roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M,
+- V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1]));
+- roce_set_field(mpt_entry->byte_64_buf_pa1,
+- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
++ hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1]));
++ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
++ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+
+ return 0;
+ }
+@@ -3068,7 +3091,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+- hr_reg_enable(mpt_entry, MPT_L_INV_EN);
+
+ hr_reg_write_bool(mpt_entry, MPT_BIND_EN,
+ mr->access & IB_ACCESS_MW_BIND);
+@@ -3113,24 +3135,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
+ u32 mr_access_flags = mr->access;
+ int ret = 0;
+
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
+-
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+- V2_MPT_BYTE_4_PD_S, mr->pd);
++ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
++ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ if (flags & IB_MR_REREG_ACCESS) {
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
+- V2_MPT_BYTE_8_BIND_EN_S,
++ hr_reg_write(mpt_entry, MPT_BIND_EN,
+ (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0));
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
+- V2_MPT_BYTE_8_ATOMIC_EN_S,
++ hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
++ hr_reg_write(mpt_entry, MPT_RR_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
++ hr_reg_write(mpt_entry, MPT_RW_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
++ hr_reg_write(mpt_entry, MPT_LW_EN,
+ mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+ }
+
+@@ -3161,37 +3178,27 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
+ return -ENOBUFS;
+ }
+
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+- V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
+- roce_set_field(mpt_entry->byte_4_pd_hop_st,
+- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+- V2_MPT_BYTE_4_PD_S, mr->pd);
++ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
++ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
++
++ hr_reg_enable(mpt_entry, MPT_RA_EN);
++ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
+
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
++ hr_reg_enable(mpt_entry, MPT_FRE);
++ hr_reg_clear(mpt_entry, MPT_MR_MW);
++ hr_reg_enable(mpt_entry, MPT_BPD);
++ hr_reg_clear(mpt_entry, MPT_PA);
+
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
++ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1);
++ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
++ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
++ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
++ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
+- roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
+- V2_MPT_BYTE_48_PBL_BA_H_S,
+- upper_32_bits(pbl_ba >> 3));
+-
+- roce_set_field(mpt_entry->byte_64_buf_pa1,
+- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
++ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+
+ return 0;
+ }
+@@ -3203,36 +3210,28 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+- V2_MPT_BYTE_4_PD_S, mw->pdn);
+- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+- V2_MPT_BYTE_4_PBL_HOP_NUM_S,
+- mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+- mw->pbl_hop_num);
+- roce_set_field(mpt_entry->byte_4_pd_hop_st,
+- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+- mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+-
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1);
+-
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
+- mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
++ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
++ hr_reg_write(mpt_entry, MPT_PD, mw->pdn);
++
++ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
++ hr_reg_enable(mpt_entry, MPT_LW_EN);
+
+- roce_set_field(mpt_entry->byte_64_buf_pa1,
+- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+- mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
++ hr_reg_enable(mpt_entry, MPT_MR_MW);
++ hr_reg_enable(mpt_entry, MPT_BPD);
++ hr_reg_clear(mpt_entry, MPT_PA);
++ hr_reg_write(mpt_entry, MPT_BQP,
++ mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
+
+ mpt_entry->lkey = cpu_to_le32(mw->rkey);
+
++ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM,
++ mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
++ mw->pbl_hop_num);
++ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
++ mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
++ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
++ mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
++
+ return 0;
+ }
+
+@@ -3328,7 +3327,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
+ memset(cq_context, 0, sizeof(*cq_context));
+
+ hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID);
+- hr_reg_write(cq_context, CQC_ARM_ST, REG_NXT_CEQE);
++ hr_reg_write(cq_context, CQC_ARM_ST, NO_ARMED);
+ hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth));
+ hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector);
+ hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn);
+@@ -3571,7 +3570,6 @@ static const u32 wc_send_op_map[] = {
+ HR_WC_OP_MAP(RDMA_READ, RDMA_READ),
+ HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE),
+ HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE),
+- HR_WC_OP_MAP(LOCAL_INV, LOCAL_INV),
+ HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP),
+ HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD),
+ HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP),
+@@ -3621,9 +3619,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+- case HNS_ROCE_V2_WQE_OP_LOCAL_INV:
+- wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+- break;
+ case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
+@@ -4399,11 +4394,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
+ mtu = ib_mtu_enum_to_int(ib_mtu);
+ if (WARN_ON(mtu <= 0))
+ return -EINVAL;
+-#define MAX_LP_MSG_LEN 65536
+- /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
+- lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
+- if (WARN_ON(lp_pktn_ini >= 0xF))
+- return -EINVAL;
++#define MIN_LP_MSG_LEN 1024
++ /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
++ lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu);
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ hr_reg_write(context, QPC_MTU, ib_mtu);
+@@ -4802,6 +4795,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
+ return ret;
+ }
+
++static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
++{
++#define QP_ACK_TIMEOUT_MAX_HIP08 20
++#define QP_ACK_TIMEOUT_OFFSET 10
++#define QP_ACK_TIMEOUT_MAX 31
++
++ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
++ if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) {
++ ibdev_warn(&hr_dev->ib_dev,
++ "Local ACK timeout shall be 0 to 20.\n");
++ return false;
++ }
++ *timeout += QP_ACK_TIMEOUT_OFFSET;
++ } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
++ if (*timeout > QP_ACK_TIMEOUT_MAX) {
++ ibdev_warn(&hr_dev->ib_dev,
++ "Local ACK timeout shall be 0 to 31.\n");
++ return false;
++ }
++ }
++
++ return true;
++}
++
+ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+@@ -4811,6 +4828,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int ret = 0;
++ u8 timeout;
+
+ if (attr_mask & IB_QP_AV) {
+ ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
+@@ -4820,12 +4838,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+- if (attr->timeout < 31) {
+- hr_reg_write(context, QPC_AT, attr->timeout);
++ timeout = attr->timeout;
++ if (check_qp_timeout_cfg_range(hr_dev, &timeout)) {
++ hr_reg_write(context, QPC_AT, timeout);
+ hr_reg_clear(qpc_mask, QPC_AT);
+- } else {
+- ibdev_warn(&hr_dev->ib_dev,
+- "Local ACK timeout shall be 0 to 30.\n");
+ }
+ }
+
+@@ -4882,7 +4898,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+- hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer);
++ hr_reg_write(context, QPC_MIN_RNR_TIME,
++ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
++ HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer);
+ hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME);
+ }
+
+@@ -5138,6 +5156,8 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+
+ rdma_ah_set_sl(&qp_attr->ah_attr,
+ hr_reg_read(&context, QPC_SL));
++ rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1);
++ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
+ grh->flow_label = hr_reg_read(&context, QPC_FL);
+ grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX);
+ grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT);
+@@ -5499,6 +5519,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+
+ hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count);
+ hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT);
++
++ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
++ if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
++ dev_info(hr_dev->dev,
++ "cq_period(%u) reached the upper limit, adjusted to 65.\n",
++ cq_period);
++ cq_period = HNS_ROCE_MAX_CQ_PERIOD;
++ }
++ cq_period *= HNS_ROCE_CLOCK_ADJUST;
++ }
+ hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period);
+ hr_reg_clear(cqc_mask, CQC_CQ_PERIOD);
+
+@@ -5783,8 +5813,8 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+
+ dev_err(dev, "AEQ overflow!\n");
+
+- int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S;
+- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
++ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
++ 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
+
+ /* Set reset level for reset_event() */
+ if (ops->set_default_reset_request)
+@@ -5894,6 +5924,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
+ hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX);
+ hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt);
+
++ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
++ if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
++ dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n",
++ eq->eq_period);
++ eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD;
++ }
++ eq->eq_period *= HNS_ROCE_CLOCK_ADJUST;
++ }
++
+ hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period);
+ hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER);
+ hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3);
+@@ -6397,10 +6436,8 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
+ if (!hr_dev)
+ return 0;
+
+- hr_dev->is_reset = true;
+ hr_dev->active = false;
+ hr_dev->dis_db = true;
+-
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
+
+ return 0;
+diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+index 4d904d5e82be4..67f5b6fcfa1b1 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+@@ -51,7 +51,7 @@
+ #define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
+ #define HNS_ROCE_V2_MAX_SRQ_SGE 64
+ #define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
+-#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100
++#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100
+ #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
+ #define HNS_ROCE_V2_MAX_CQE_NUM 0x400000
+ #define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
+@@ -98,7 +98,7 @@
+
+ #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
+ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
+-#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
++#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFF000
+ #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
+ #define HNS_ROCE_INVALID_LKEY 0x0
+ #define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
+@@ -184,7 +184,6 @@ enum {
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8,
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
+ HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
+- HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb,
+ HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
+ HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
+ };
+@@ -277,6 +276,11 @@ enum hns_roce_cmd_return_status {
+ CMD_OTHER_ERR = 0xff
+ };
+
++struct hns_roce_cmd_errcode {
++ enum hns_roce_cmd_return_status return_status;
++ int errno;
++};
++
+ enum hns_roce_sgid_type {
+ GID_TYPE_FLAG_ROCE_V1 = 0,
+ GID_TYPE_FLAG_ROCE_V2_IPV4,
+@@ -790,12 +794,15 @@ struct hns_roce_v2_mpt_entry {
+ #define MPT_LKEY MPT_FIELD_LOC(223, 192)
+ #define MPT_VA MPT_FIELD_LOC(287, 224)
+ #define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
+-#define MPT_PBL_BA MPT_FIELD_LOC(380, 320)
++#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320)
++#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352)
+ #define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
+ #define MPT_RSV0 MPT_FIELD_LOC(383, 382)
+-#define MPT_PA0 MPT_FIELD_LOC(441, 384)
++#define MPT_PA0_L MPT_FIELD_LOC(415, 384)
++#define MPT_PA0_H MPT_FIELD_LOC(441, 416)
+ #define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
+-#define MPT_PA1 MPT_FIELD_LOC(505, 448)
++#define MPT_PA1_L MPT_FIELD_LOC(479, 448)
++#define MPT_PA1_H MPT_FIELD_LOC(505, 480)
+ #define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
+ #define MPT_RSV2 MPT_FIELD_LOC(507, 507)
+ #define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
+@@ -901,48 +908,24 @@ struct hns_roce_v2_ud_send_wqe {
+ u8 dgid[GID_LEN_V2];
+ };
+
+-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
+-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
+-
+-#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
+-
+-#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8
+-
+-#define V2_UD_SEND_WQE_BYTE_4_SE_S 11
+-
+-#define V2_UD_SEND_WQE_BYTE_16_PD_S 0
+-#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0)
+-
+-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24
+-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
+-
+-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
+-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+-
+-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16
+-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16)
+-
+-#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0
+-#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0)
+-
+-#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0
+-#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0)
+-
+-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16
+-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16)
+-
+-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24
+-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24)
+-
+-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0
+-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0)
+-
+-#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
+-#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
+-
+-#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
+-
+-#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
++#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l)
++
++#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0)
++#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7)
++#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8)
++#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11)
++#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96)
++#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120)
++#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128)
++#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176)
++#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224)
++#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256)
++#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272)
++#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280)
++#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288)
++#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308)
++#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318)
++#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319)
+
+ struct hns_roce_v2_rc_send_wqe {
+ __le32 byte_4;
+@@ -957,42 +940,22 @@ struct hns_roce_v2_rc_send_wqe {
+ __le64 va;
+ };
+
+-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
+-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
+-
+-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5
+-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
+-
+-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13
+-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
+-
+-#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15
+-#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
+-
+-#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
+-
+-#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
+-
+-#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9
+-
+-#define V2_RC_SEND_WQE_BYTE_4_SO_S 10
+-
+-#define V2_RC_SEND_WQE_BYTE_4_SE_S 11
+-
+-#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
+-
+-#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31
+-
+-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
+-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
+-
+-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24
+-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
+-
+-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
+-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+-
+-#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31
++#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l)
++
++#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0)
++#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5)
++#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13)
++#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
++#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
++#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
++#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
++#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
++#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
++#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31)
++#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96)
++#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120)
++#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128)
++#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
+
+ struct hns_roce_wqe_frmr_seg {
+ __le32 pbl_size;
+@@ -1114,12 +1077,12 @@ struct hns_roce_vf_switch {
+ __le32 resv3;
+ };
+
+-#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
+-#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
++#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l)
+
+-#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
+-#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
+-#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
++#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35)
++#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65)
++#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66)
++#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67)
+
+ struct hns_roce_post_mbox {
+ __le32 in_param_l;
+@@ -1182,11 +1145,10 @@ struct hns_roce_cfg_sgid_tb {
+ __le32 vf_sgid_type_rsv;
+ };
+
+-#define CFG_SGID_TB_TABLE_IDX_S 0
+-#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
++#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l)
+
+-#define CFG_SGID_TB_VF_SGID_TYPE_S 0
+-#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
++#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0)
++#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160)
+
+ struct hns_roce_cfg_smac_tb {
+ __le32 tb_idx_rsv;
+@@ -1194,11 +1156,11 @@ struct hns_roce_cfg_smac_tb {
+ __le32 vf_smac_h_rsv;
+ __le32 rsv[3];
+ };
+-#define CFG_SMAC_TB_IDX_S 0
+-#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
+
+-#define CFG_SMAC_TB_VF_SMAC_H_S 0
+-#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
++#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l)
++
++#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0)
++#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64)
+
+ struct hns_roce_cfg_gmv_tb_a {
+ __le32 vf_sgid_l;
+@@ -1209,16 +1171,11 @@ struct hns_roce_cfg_gmv_tb_a {
+ __le32 resv;
+ };
+
+-#define CFG_GMV_TB_SGID_IDX_S 0
+-#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0)
++#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l)
+
+-#define CFG_GMV_TB_VF_SGID_TYPE_S 0
+-#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0)
+-
+-#define CFG_GMV_TB_VF_VLAN_EN_S 2
+-
+-#define CFG_GMV_TB_VF_VLAN_ID_S 16
+-#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16)
++#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128)
++#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130)
++#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144)
+
+ struct hns_roce_cfg_gmv_tb_b {
+ __le32 vf_smac_l;
+@@ -1227,8 +1184,10 @@ struct hns_roce_cfg_gmv_tb_b {
+ __le32 resv[3];
+ };
+
+-#define CFG_GMV_TB_SMAC_H_S 0
+-#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0)
++#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l)
++
++#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
++#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
+
+ #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
+ struct hns_roce_query_pf_caps_a {
+@@ -1444,6 +1403,14 @@ struct hns_roce_dip {
+ struct list_head node; /* all dips are on a list */
+ };
+
++/* only for RNR timeout issue of HIP08 */
++#define HNS_ROCE_CLOCK_ADJUST 1000
++#define HNS_ROCE_MAX_CQ_PERIOD 65
++#define HNS_ROCE_MAX_EQ_PERIOD 65
++#define HNS_ROCE_RNR_TIMER_10NS 1
++#define HNS_ROCE_1US_CFG 999
++#define HNS_ROCE_1NS_CFG 0
++
+ #define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
+ #define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
+ #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0
+diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
+index 5d39bd08582af..80b9a9a45c68e 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_main.c
++++ b/drivers/infiniband/hw/hns/hns_roce_main.c
+@@ -222,6 +222,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
+ unsigned long flags;
+ enum ib_mtu mtu;
+ u32 port;
++ int ret;
+
+ port = port_num - 1;
+
+@@ -234,8 +235,10 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
+ IB_PORT_BOOT_MGMT_SUP;
+ props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN;
+ props->pkey_tbl_len = 1;
+- props->active_width = IB_WIDTH_4X;
+- props->active_speed = 1;
++ ret = ib_get_eth_speed(ib_dev, port_num, &props->active_speed,
++ &props->active_width);
++ if (ret)
++ ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret);
+
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
+@@ -269,6 +272,9 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
+ static int hns_roce_query_pkey(struct ib_device *ib_dev, u32 port, u16 index,
+ u16 *pkey)
+ {
++ if (index > 0)
++ return -EINVAL;
++
+ *pkey = PKEY_ID;
+
+ return 0;
+@@ -349,7 +355,7 @@ static int hns_roce_mmap(struct ib_ucontext *context,
+ return rdma_user_mmap_io(context, vma,
+ to_hr_ucontext(context)->uar.pfn,
+ PAGE_SIZE,
+- pgprot_noncached(vma->vm_page_prot),
++ pgprot_device(vma->vm_page_prot),
+ NULL);
+
+ /* vm_pgoff: 1 -- TPTR */
+@@ -660,7 +666,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
+ HEM_TYPE_CQC_TIMER,
+ hr_dev->caps.cqc_timer_entry_sz,
+- hr_dev->caps.num_cqc_timer, 1);
++ hr_dev->caps.cqc_timer_bt_num, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init CQC timer memory, aborting.\n");
+diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
+index 7089ac7802913..12c482f4a1c48 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
++++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
+@@ -34,6 +34,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/vmalloc.h>
+ #include <rdma/ib_umem.h>
++#include <linux/math.h>
+ #include "hns_roce_device.h"
+ #include "hns_roce_cmd.h"
+ #include "hns_roce_hem.h"
+@@ -272,7 +273,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ goto err_alloc_pbl;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+- mr->ibmr.length = length;
+
+ return &mr->ibmr;
+
+@@ -416,10 +416,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+
+ return &mr->ibmr;
+
+-err_key:
+- free_mr_key(hr_dev, mr);
+ err_pbl:
+ free_mr_pbl(hr_dev, mr);
++err_key:
++ free_mr_key(hr_dev, mr);
+ err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+@@ -939,6 +939,44 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+ return page_cnt;
+ }
+
++static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
++{
++ return int_pow(ba_per_bt, hopnum - 1);
++}
++
++static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
++ struct hns_roce_mtr *mtr,
++ unsigned int pg_shift)
++{
++ unsigned long cap = hr_dev->caps.page_size_cap;
++ struct hns_roce_buf_region *re;
++ unsigned int pgs_per_l1ba;
++ unsigned int ba_per_bt;
++ unsigned int ba_num;
++ int i;
++
++ for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
++ if (!(BIT(pg_shift) & cap))
++ continue;
++
++ ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
++ ba_num = 0;
++ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
++ re = &mtr->hem_cfg.region[i];
++ if (re->hopnum == 0)
++ continue;
++
++ pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
++ ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
++ }
++
++ if (ba_num <= ba_per_bt)
++ return pg_shift;
++ }
++
++ return 0;
++}
++
+ static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ unsigned int ba_page_shift)
+ {
+@@ -947,6 +985,10 @@ static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
++ ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
++ if (!ba_page_shift)
++ return -ERANGE;
++
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
+index 9af4509894e68..00dade1cfff20 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
++++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
+@@ -172,14 +172,29 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
+ }
+ }
+
+-static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank)
++static u8 get_affinity_cq_bank(u8 qp_bank)
+ {
+- u32 least_load = bank[0].inuse;
++ return (qp_bank >> 1) & CQ_BANKID_MASK;
++}
++
++static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
++ struct hns_roce_bank *bank)
++{
++#define INVALID_LOAD_QPNUM 0xFFFFFFFF
++ struct ib_cq *scq = init_attr->send_cq;
++ u32 least_load = INVALID_LOAD_QPNUM;
++ unsigned long cqn = 0;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+- for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) {
++ if (scq)
++ cqn = to_hr_cq(scq)->cqn;
++
++ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
++ if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
++ continue;
++
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+@@ -211,7 +226,8 @@ static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
+
+ return 0;
+ }
+-static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
++static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
++ struct ib_qp_init_attr *init_attr)
+ {
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ unsigned long num = 0;
+@@ -229,7 +245,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+ hr_qp->doorbell_qpn = 1;
+ } else {
+ mutex_lock(&qp_table->bank_mutex);
+- bankid = get_least_load_bankid_for_qp(qp_table->bank);
++ bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
+
+ ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
+ &num);
+@@ -495,11 +511,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ hr_qp->rq.rsv_sge);
+
+- if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
+- hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
+- else
+- hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
+- hr_qp->rq.max_gs);
++ hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
++ hr_qp->rq.max_gs);
+
+ hr_qp->rq.wqe_cnt = cnt;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE &&
+@@ -1070,7 +1083,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
+ goto err_buf;
+ }
+
+- ret = alloc_qpn(hr_dev, hr_qp);
++ ret = alloc_qpn(hr_dev, hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
+ goto err_qpn;
+diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
+index 6eee9deadd122..e64ef6903fb4f 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
++++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
+@@ -259,7 +259,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+
+ static void free_srq_wrid(struct hns_roce_srq *srq)
+ {
+- kfree(srq->wrid);
++ kvfree(srq->wrid);
+ srq->wrid = NULL;
+ }
+
+diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
+index 6dea0a49d1718..64d4bb0e9a12f 100644
+--- a/drivers/infiniband/hw/irdma/cm.c
++++ b/drivers/infiniband/hw/irdma/cm.c
+@@ -1458,13 +1458,15 @@ static int irdma_send_fin(struct irdma_cm_node *cm_node)
+ * irdma_find_listener - find a cm node listening on this addr-port pair
+ * @cm_core: cm's core
+ * @dst_addr: listener ip addr
++ * @ipv4: flag indicating IPv4 when true
+ * @dst_port: listener tcp port num
+ * @vlan_id: virtual LAN ID
+ * @listener_state: state to match with listen node's
+ */
+ static struct irdma_cm_listener *
+-irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
+- u16 vlan_id, enum irdma_cm_listener_state listener_state)
++irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4,
++ u16 dst_port, u16 vlan_id,
++ enum irdma_cm_listener_state listener_state)
+ {
+ struct irdma_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+@@ -1477,12 +1479,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port,
+ list_for_each_entry (listen_node, &cm_core->listen_list, list) {
+ memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
+ listen_port = listen_node->loc_port;
++ if (listen_node->ipv4 != ipv4 || listen_port != dst_port ||
++ !(listener_state & listen_node->listener_state))
++ continue;
+ /* compare node pair, return node handle if a match */
+- if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
+- !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
+- listen_port == dst_port &&
+- vlan_id == listen_node->vlan_id &&
+- (listener_state & listen_node->listener_state)) {
++ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
++ (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
++ vlan_id == listen_node->vlan_id)) {
+ refcount_inc(&listen_node->refcnt);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock,
+ flags);
+@@ -1722,6 +1725,9 @@ irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info,
+ continue;
+
+ idev = in_dev_get(ip_dev);
++ if (!idev)
++ continue;
++
+ in_dev_for_each_ifa_rtnl(ifa, idev) {
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
+@@ -2305,10 +2311,8 @@ err:
+ return NULL;
+ }
+
+-static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
++static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
+ {
+- struct irdma_cm_node *cm_node =
+- container_of(rcu_head, struct irdma_cm_node, rcu_head);
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ struct irdma_qp *iwqp;
+ struct irdma_cm_info nfo;
+@@ -2356,7 +2360,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
+ }
+
+ cm_core->cm_free_ah(cm_node);
+- kfree(cm_node);
+ }
+
+ /**
+@@ -2384,8 +2387,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
+
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+- /* wait for all list walkers to exit their grace period */
+- call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb);
++ irdma_destroy_connection(cm_node);
++
++ kfree_rcu(cm_node, rcu_head);
+ }
+
+ /**
+@@ -2897,9 +2901,10 @@ irdma_make_listen_node(struct irdma_cm_core *cm_core,
+ unsigned long flags;
+
+ /* cannot have multiple matching listeners */
+- listener = irdma_find_listener(cm_core, cm_info->loc_addr,
+- cm_info->loc_port, cm_info->vlan_id,
+- IRDMA_CM_LISTENER_EITHER_STATE);
++ listener =
++ irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4,
++ cm_info->loc_port, cm_info->vlan_id,
++ IRDMA_CM_LISTENER_EITHER_STATE);
+ if (listener &&
+ listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) {
+ refcount_dec(&listener->refcnt);
+@@ -3148,6 +3153,7 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf)
+
+ listener = irdma_find_listener(cm_core,
+ cm_info.loc_addr,
++ cm_info.ipv4,
+ cm_info.loc_port,
+ cm_info.vlan_id,
+ IRDMA_CM_LISTENER_ACTIVE_STATE);
+@@ -3244,15 +3250,10 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev,
+ */
+ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
+ {
+- unsigned long flags;
+-
+ if (!cm_core)
+ return;
+
+- spin_lock_irqsave(&cm_core->ht_lock, flags);
+- if (timer_pending(&cm_core->tcp_timer))
+- del_timer_sync(&cm_core->tcp_timer);
+- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
++ del_timer_sync(&cm_core->tcp_timer);
+
+ destroy_workqueue(cm_core->event_wq);
+ cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
+@@ -3465,12 +3466,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
+ }
+
+ cm_id = iwqp->cm_id;
+- /* make sure we havent already closed this connection */
+- if (!cm_id) {
+- spin_unlock_irqrestore(&iwqp->lock, flags);
+- return;
+- }
+-
+ original_hw_tcp_state = iwqp->hw_tcp_state;
+ original_ibqp_state = iwqp->ibqp_state;
+ last_ae = iwqp->last_aeq;
+@@ -3492,11 +3487,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
+ disconn_status = -ECONNRESET;
+ }
+
+- if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
+- original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
+- last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
+- last_ae == IRDMA_AE_BAD_CLOSE ||
+- last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
++ if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
++ original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
++ last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
++ last_ae == IRDMA_AE_BAD_CLOSE ||
++ last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ qp->term_flags = 0;
+@@ -4234,10 +4229,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
+ struct irdma_cm_node *cm_node;
+ struct list_head teardown_list;
+ struct ib_qp_attr attr;
+- struct irdma_sc_vsi *vsi = &iwdev->vsi;
+- struct irdma_sc_qp *sc_qp;
+- struct irdma_qp *qp;
+- int i;
+
+ INIT_LIST_HEAD(&teardown_list);
+
+@@ -4254,52 +4245,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
+ irdma_cm_disconn(cm_node->iwqp);
+ irdma_rem_ref_cm_node(cm_node);
+ }
+- if (!iwdev->roce_mode)
+- return;
+-
+- INIT_LIST_HEAD(&teardown_list);
+- for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+- mutex_lock(&vsi->qos[i].qos_mutex);
+- list_for_each_safe (list_node, list_core_temp,
+- &vsi->qos[i].qplist) {
+- u32 qp_ip[4];
+-
+- sc_qp = container_of(list_node, struct irdma_sc_qp,
+- list);
+- if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC)
+- continue;
+-
+- qp = sc_qp->qp_uk.back_qp;
+- if (!disconnect_all) {
+- if (nfo->ipv4)
+- qp_ip[0] = qp->udp_info.local_ipaddr[3];
+- else
+- memcpy(qp_ip,
+- &qp->udp_info.local_ipaddr[0],
+- sizeof(qp_ip));
+- }
+-
+- if (disconnect_all ||
+- (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) &&
+- !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) {
+- spin_lock(&iwdev->rf->qptable_lock);
+- if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
+- irdma_qp_add_ref(&qp->ibqp);
+- list_add(&qp->teardown_entry,
+- &teardown_list);
+- }
+- spin_unlock(&iwdev->rf->qptable_lock);
+- }
+- }
+- mutex_unlock(&vsi->qos[i].qos_mutex);
+- }
+-
+- list_for_each_safe (list_node, list_core_temp, &teardown_list) {
+- qp = container_of(list_node, struct irdma_qp, teardown_entry);
+- attr.qp_state = IB_QPS_ERR;
+- irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
+- irdma_qp_rem_ref(&qp->ibqp);
+- }
+ }
+
+ /**
+diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h
+index d03cd29333eab..2b0fb5a6b3001 100644
+--- a/drivers/infiniband/hw/irdma/cm.h
++++ b/drivers/infiniband/hw/irdma/cm.h
+@@ -41,7 +41,7 @@
+ #define TCP_OPTIONS_PADDING 3
+
+ #define IRDMA_DEFAULT_RETRYS 64
+-#define IRDMA_DEFAULT_RETRANS 8
++#define IRDMA_DEFAULT_RETRANS 32
+ #define IRDMA_DEFAULT_TTL 0x40
+ #define IRDMA_DEFAULT_RTT_VAR 6
+ #define IRDMA_DEFAULT_SS_THRESH 0x3fffffff
+diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
+index f1e5515256e0a..ad14c2404e94c 100644
+--- a/drivers/infiniband/hw/irdma/ctrl.c
++++ b/drivers/infiniband/hw/irdma/ctrl.c
+@@ -431,7 +431,7 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c
+
+ cqp = qp->dev->cqp;
+ if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id ||
+- qp->qp_uk.qp_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt - 1))
++ qp->qp_uk.qp_id >= (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt))
+ return IRDMA_ERR_INVALID_QP_ID;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+@@ -2551,10 +2551,10 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq,
+ enum irdma_status_code ret_code = 0;
+
+ cqp = cq->dev->cqp;
+- if (cq->cq_uk.cq_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt - 1))
++ if (cq->cq_uk.cq_id >= (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt))
+ return IRDMA_ERR_INVALID_CQ_ID;
+
+- if (cq->ceq_id > (cq->dev->hmc_fpm_misc.max_ceqs - 1))
++ if (cq->ceq_id >= (cq->dev->hmc_fpm_misc.max_ceqs))
+ return IRDMA_ERR_INVALID_CEQ_ID;
+
+ ceq = cq->dev->ceq[cq->ceq_id];
+@@ -2741,13 +2741,13 @@ irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info,
+ */
+ void irdma_check_cqp_progress(struct irdma_cqp_timeout *timeout, struct irdma_sc_dev *dev)
+ {
+- if (timeout->compl_cqp_cmds != dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]) {
+- timeout->compl_cqp_cmds = dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
++ u64 completed_ops = atomic64_read(&dev->cqp->completed_ops);
++
++ if (timeout->compl_cqp_cmds != completed_ops) {
++ timeout->compl_cqp_cmds = completed_ops;
+ timeout->count = 0;
+- } else {
+- if (dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS] !=
+- timeout->compl_cqp_cmds)
+- timeout->count++;
++ } else if (timeout->compl_cqp_cmds != dev->cqp->requested_ops) {
++ timeout->count++;
+ }
+ }
+
+@@ -2790,7 +2790,7 @@ static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp,
+ if (newtail != tail) {
+ /* SUCCESS */
+ IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+- cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++;
++ atomic64_inc(&cqp->completed_ops);
+ return 0;
+ }
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+@@ -3152,8 +3152,8 @@ enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+ info->dev->cqp = cqp;
+
+ IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size);
+- cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS] = 0;
+- cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS] = 0;
++ cqp->requested_ops = 0;
++ atomic64_set(&cqp->completed_ops, 0);
+ /* for the cqp commands backlog. */
+ INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head);
+
+@@ -3306,7 +3306,7 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch
+ if (ret_code)
+ return NULL;
+
+- cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS]++;
++ cqp->requested_ops++;
+ if (!*wqe_idx)
+ cqp->polarity = !cqp->polarity;
+ wqe = cqp->sq_base[*wqe_idx].elem;
+@@ -3395,6 +3395,9 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ if (polarity != ccq->cq_uk.polarity)
+ return IRDMA_ERR_Q_EMPTY;
+
++ /* Ensure CEQE contents are read after valid bit is checked */
++ dma_rmb();
++
+ get_64bit_val(cqe, 8, &qp_ctx);
+ cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx;
+ info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, temp);
+@@ -3429,7 +3432,7 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ dma_wmb(); /* make sure shadow area is updated before moving tail */
+
+ IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+- ccq->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++;
++ atomic64_inc(&cqp->completed_ops);
+
+ return ret_code;
+ }
+@@ -3656,7 +3659,7 @@ enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
+ info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size)
+ return IRDMA_ERR_INVALID_SIZE;
+
+- if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1))
++ if (info->ceq_id >= (info->dev->hmc_fpm_misc.max_ceqs))
+ return IRDMA_ERR_INVALID_CEQ_ID;
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+@@ -4046,13 +4049,17 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
+ u8 polarity;
+
+ aeqe = IRDMA_GET_CURRENT_AEQ_ELEM(aeq);
+- get_64bit_val(aeqe, 0, &compl_ctx);
+ get_64bit_val(aeqe, 8, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp);
+
+ if (aeq->polarity != polarity)
+ return IRDMA_ERR_Q_EMPTY;
+
++ /* Ensure AEQE contents are read after valid bit is checked */
++ dma_rmb();
++
++ get_64bit_val(aeqe, 0, &compl_ctx);
++
+ print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ aeqe, 16, false);
+
+@@ -4205,7 +4212,7 @@ enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq,
+ info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size)
+ return IRDMA_ERR_INVALID_SIZE;
+
+- if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1))
++ if (info->ceq_id >= (info->dev->hmc_fpm_misc.max_ceqs ))
+ return IRDMA_ERR_INVALID_CEQ_ID;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
+index cc3d9a365b35a..afd16a93ac69c 100644
+--- a/drivers/infiniband/hw/irdma/defs.h
++++ b/drivers/infiniband/hw/irdma/defs.h
+@@ -190,32 +190,30 @@ enum irdma_cqp_op_type {
+ IRDMA_OP_MANAGE_VF_PBLE_BP = 25,
+ IRDMA_OP_QUERY_FPM_VAL = 26,
+ IRDMA_OP_COMMIT_FPM_VAL = 27,
+- IRDMA_OP_REQ_CMDS = 28,
+- IRDMA_OP_CMPL_CMDS = 29,
+- IRDMA_OP_AH_CREATE = 30,
+- IRDMA_OP_AH_MODIFY = 31,
+- IRDMA_OP_AH_DESTROY = 32,
+- IRDMA_OP_MC_CREATE = 33,
+- IRDMA_OP_MC_DESTROY = 34,
+- IRDMA_OP_MC_MODIFY = 35,
+- IRDMA_OP_STATS_ALLOCATE = 36,
+- IRDMA_OP_STATS_FREE = 37,
+- IRDMA_OP_STATS_GATHER = 38,
+- IRDMA_OP_WS_ADD_NODE = 39,
+- IRDMA_OP_WS_MODIFY_NODE = 40,
+- IRDMA_OP_WS_DELETE_NODE = 41,
+- IRDMA_OP_WS_FAILOVER_START = 42,
+- IRDMA_OP_WS_FAILOVER_COMPLETE = 43,
+- IRDMA_OP_SET_UP_MAP = 44,
+- IRDMA_OP_GEN_AE = 45,
+- IRDMA_OP_QUERY_RDMA_FEATURES = 46,
+- IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 47,
+- IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 48,
+- IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 49,
+- IRDMA_OP_CQ_MODIFY = 50,
++ IRDMA_OP_AH_CREATE = 28,
++ IRDMA_OP_AH_MODIFY = 29,
++ IRDMA_OP_AH_DESTROY = 30,
++ IRDMA_OP_MC_CREATE = 31,
++ IRDMA_OP_MC_DESTROY = 32,
++ IRDMA_OP_MC_MODIFY = 33,
++ IRDMA_OP_STATS_ALLOCATE = 34,
++ IRDMA_OP_STATS_FREE = 35,
++ IRDMA_OP_STATS_GATHER = 36,
++ IRDMA_OP_WS_ADD_NODE = 37,
++ IRDMA_OP_WS_MODIFY_NODE = 38,
++ IRDMA_OP_WS_DELETE_NODE = 39,
++ IRDMA_OP_WS_FAILOVER_START = 40,
++ IRDMA_OP_WS_FAILOVER_COMPLETE = 41,
++ IRDMA_OP_SET_UP_MAP = 42,
++ IRDMA_OP_GEN_AE = 43,
++ IRDMA_OP_QUERY_RDMA_FEATURES = 44,
++ IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 45,
++ IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46,
++ IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47,
++ IRDMA_OP_CQ_MODIFY = 48,
+
+ /* Must be last entry*/
+- IRDMA_MAX_CQP_OPS = 51,
++ IRDMA_MAX_CQP_OPS = 49,
+ };
+
+ /* CQP SQ WQES */
+@@ -314,6 +312,7 @@ enum irdma_cqp_op_type {
+ #define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d
+ #define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e
+ #define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220
++#define IRDMA_AE_INVALID_REQUEST 0x0223
+ #define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
+ #define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
+ #define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
+diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
+index 7de525a5ccf8c..70dffa9a9f674 100644
+--- a/drivers/infiniband/hw/irdma/hw.c
++++ b/drivers/infiniband/hw/irdma/hw.c
+@@ -41,6 +41,7 @@ static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = {
+ IRDMA_HMC_IW_XFFL,
+ IRDMA_HMC_IW_Q1,
+ IRDMA_HMC_IW_Q1FL,
++ IRDMA_HMC_IW_PBLE,
+ IRDMA_HMC_IW_TIMER,
+ IRDMA_HMC_IW_FSIMC,
+ IRDMA_HMC_IW_FSIAV,
+@@ -60,6 +61,8 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
+ {
+ struct irdma_cq *cq = iwcq->back_cq;
+
++ if (!cq->user_mode)
++ atomic_set(&cq->armed, 0);
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ }
+@@ -136,56 +139,69 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+
+ switch (info->ae_id) {
+- case IRDMA_AE_AMP_UNALLOCATED_STAG:
+ case IRDMA_AE_AMP_BOUNDS_VIOLATION:
+ case IRDMA_AE_AMP_INVALID_STAG:
+- qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+- fallthrough;
++ case IRDMA_AE_AMP_RIGHTS_VIOLATION:
++ case IRDMA_AE_AMP_UNALLOCATED_STAG:
+ case IRDMA_AE_AMP_BAD_PD:
+- case IRDMA_AE_UDA_XMIT_BAD_PD:
+- qp->flush_code = FLUSH_PROT_ERR;
+- break;
+ case IRDMA_AE_AMP_BAD_QP:
+- qp->flush_code = FLUSH_LOC_QP_OP_ERR;
+- break;
+ case IRDMA_AE_AMP_BAD_STAG_KEY:
+ case IRDMA_AE_AMP_BAD_STAG_INDEX:
+ case IRDMA_AE_AMP_TO_WRAP:
+- case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+- case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+- case IRDMA_AE_IB_INVALID_REQUEST:
+- case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
+- case IRDMA_AE_IB_REMOTE_OP_ERROR:
+- qp->flush_code = FLUSH_REM_ACCESS_ERR;
++ qp->flush_code = FLUSH_PROT_ERR;
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+- case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+- case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
++ case IRDMA_AE_UDA_XMIT_BAD_PD:
++ case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
++ qp->flush_code = FLUSH_LOC_QP_OP_ERR;
++ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
++ break;
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+ case IRDMA_AE_UDA_L4LEN_INVALID:
+- case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
++ case IRDMA_AE_DDP_UBE_INVALID_MO:
++ case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp->flush_code = FLUSH_LOC_LEN_ERR;
++ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
++ break;
++ case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
++ case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
++ qp->flush_code = FLUSH_REM_ACCESS_ERR;
++ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
++ break;
++ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
++ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
++ case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
++ case IRDMA_AE_IB_REMOTE_OP_ERROR:
++ qp->flush_code = FLUSH_REM_OP_ERR;
++ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ qp->flush_code = FLUSH_FATAL_ERR;
++ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+- case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
+- case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ qp->flush_code = FLUSH_GENERAL_ERR;
+ break;
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ qp->flush_code = FLUSH_RETRY_EXC_ERR;
++ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
+ case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
+ case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
++ case IRDMA_AE_AMP_MWBIND_VALID_STAG:
+ qp->flush_code = FLUSH_MW_BIND_ERR;
++ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
++ break;
++ case IRDMA_AE_IB_INVALID_REQUEST:
++ qp->flush_code = FLUSH_REM_INV_REQ_ERR;
++ qp->event_type = IRDMA_QP_EVENT_REQ_ERR;
+ break;
+ default:
+- qp->flush_code = FLUSH_FATAL_ERR;
++ qp->flush_code = FLUSH_GENERAL_ERR;
++ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ }
+ }
+@@ -252,10 +268,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
+ iwqp->last_aeq = info->ae_id;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ ctx_info = &iwqp->ctx_info;
+- if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1))
+- ctx_info->roce_info->err_rq_idx_valid = true;
+- else
+- ctx_info->iwarp_info->err_rq_idx_valid = true;
+ } else {
+ if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR)
+ continue;
+@@ -365,16 +377,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
+ case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
+ case IRDMA_AE_LCE_CQ_CATASTROPHIC:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+- if (rdma_protocol_roce(&iwdev->ibdev, 1))
+- ctx_info->roce_info->err_rq_idx_valid = false;
+- else
+- ctx_info->iwarp_info->err_rq_idx_valid = false;
+- fallthrough;
+ default:
+ ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d\n",
+ info->ae_id, info->qp, info->qp_cq_id);
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+- if (!info->sq && ctx_info->roce_info->err_rq_idx_valid) {
++ ctx_info->roce_info->err_rq_idx_valid = info->rq;
++ if (info->rq) {
+ ctx_info->roce_info->err_rq_idx = info->wqe_idx;
+ irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
+ ctx_info);
+@@ -383,7 +391,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
+ irdma_cm_disconn(iwqp);
+ break;
+ }
+- if (!info->sq && ctx_info->iwarp_info->err_rq_idx_valid) {
++ ctx_info->iwarp_info->err_rq_idx_valid = info->rq;
++ if (info->rq) {
+ ctx_info->iwarp_info->err_rq_idx = info->wqe_idx;
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = true;
+@@ -476,6 +485,8 @@ static enum irdma_status_code irdma_save_msix_info(struct irdma_pci_f *rf)
+ iw_qvlist->num_vectors = rf->msix_count;
+ if (rf->msix_count <= num_online_cpus())
+ rf->msix_shared = true;
++ else if (rf->msix_count > num_online_cpus() + 1)
++ rf->msix_count = num_online_cpus() + 1;
+
+ pmsix = rf->msix_entries;
+ for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
+@@ -820,6 +831,8 @@ irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers v
+ info.entry_type = rf->sd_type;
+
+ for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
++ if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE)
++ continue;
+ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) {
+ info.rsrc_type = iw_hmc_obj_types[i];
+ info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+@@ -1603,7 +1616,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf)
+ info.fpm_commit_buf = mem.va;
+
+ info.bar0 = rf->hw.hw_addr;
+- info.hmc_fn_id = PCI_FUNC(rf->pcidev->devfn);
++ info.hmc_fn_id = rf->pf_id;
+ info.hw = &rf->hw;
+ status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info);
+ if (status)
+@@ -2072,7 +2085,7 @@ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
+ cqp_request->compl_info.error = info.error;
+
+ if (cqp_request->waiting) {
+- cqp_request->request_done = true;
++ WRITE_ONCE(cqp_request->request_done, true);
+ wake_up(&cqp_request->waitq);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ } else {
+@@ -2699,24 +2712,29 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
+ info.sq = flush_mask & IRDMA_FLUSH_SQ;
+ info.rq = flush_mask & IRDMA_FLUSH_RQ;
+
+- if (flush_mask & IRDMA_REFLUSH) {
+- if (info.sq)
+- iwqp->sc_qp.flush_sq = false;
+- if (info.rq)
+- iwqp->sc_qp.flush_rq = false;
+- }
+-
+ /* Generate userflush errors in CQE */
+ info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
+ info.sq_minor_code = FLUSH_GENERAL_ERR;
+ info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
+ info.rq_minor_code = FLUSH_GENERAL_ERR;
+ info.userflushcode = true;
+- if (flush_code) {
+- if (info.sq && iwqp->sc_qp.sq_flush_code)
+- info.sq_minor_code = flush_code;
+- if (info.rq && iwqp->sc_qp.rq_flush_code)
+- info.rq_minor_code = flush_code;
++
++ if (flush_mask & IRDMA_REFLUSH) {
++ if (info.sq)
++ iwqp->sc_qp.flush_sq = false;
++ if (info.rq)
++ iwqp->sc_qp.flush_rq = false;
++ } else {
++ if (flush_code) {
++ if (info.sq && iwqp->sc_qp.sq_flush_code)
++ info.sq_minor_code = flush_code;
++ if (info.rq && iwqp->sc_qp.rq_flush_code)
++ info.rq_minor_code = flush_code;
++ }
++ if (!iwqp->user_mode)
++ queue_delayed_work(iwqp->iwdev->cleanup_wq,
++ &iwqp->dwork_flush,
++ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ /* Issue flush */
+diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
+index 64148ad8a604e..040d4e2b97676 100644
+--- a/drivers/infiniband/hw/irdma/i40iw_hw.c
++++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
+@@ -202,6 +202,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev)
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
+ dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
++ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
+ dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
+ dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
+diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c
+index d219f64b2c3d5..a6f758b61b0c4 100644
+--- a/drivers/infiniband/hw/irdma/i40iw_if.c
++++ b/drivers/infiniband/hw/irdma/i40iw_if.c
+@@ -77,6 +77,7 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info
+ rf->rdma_ver = IRDMA_GEN_1;
+ rf->gen_ops.request_reset = i40iw_request_reset;
+ rf->pcidev = cdev_info->pcidev;
++ rf->pf_id = cdev_info->fid;
+ rf->hw.hw_addr = cdev_info->hw_addr;
+ rf->cdev = cdev_info;
+ rf->msix_count = cdev_info->msix_count;
+diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
+index cf53b17510cdb..5986fd906308c 100644
+--- a/drivers/infiniband/hw/irdma/icrdma_hw.c
++++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
+@@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev)
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+ dev->irq_ops = &icrdma_irq_ops;
++ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
+ dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
+diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
+index 46c12334c7354..4789e85d717b3 100644
+--- a/drivers/infiniband/hw/irdma/irdma.h
++++ b/drivers/infiniband/hw/irdma/irdma.h
+@@ -127,6 +127,7 @@ struct irdma_hw_attrs {
+ u64 max_hw_outbound_msg_size;
+ u64 max_hw_inbound_msg_size;
+ u64 max_mr_size;
++ u64 page_size_cap;
+ u32 min_hw_qp_id;
+ u32 min_hw_aeq_size;
+ u32 max_hw_aeq_size;
+diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
+index 51a41359e0b41..c556a36e76703 100644
+--- a/drivers/infiniband/hw/irdma/main.c
++++ b/drivers/infiniband/hw/irdma/main.c
+@@ -226,6 +226,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf
+ rf->hw.hw_addr = pf->hw.hw_addr;
+ rf->pcidev = pf->pdev;
+ rf->msix_count = pf->num_rdma_msix;
++ rf->pf_id = pf->hw.pf_id;
+ rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector];
+ rf->default_vsi.vsi_idx = vsi->vsi_num;
+ rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
+diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
+index b678fe712447e..bd13cc38e5ae1 100644
+--- a/drivers/infiniband/hw/irdma/main.h
++++ b/drivers/infiniband/hw/irdma/main.h
+@@ -160,8 +160,8 @@ struct irdma_cqp_request {
+ void (*callback_fcn)(struct irdma_cqp_request *cqp_request);
+ void *param;
+ struct irdma_cqp_compl_info compl_info;
++ bool request_done; /* READ/WRITE_ONCE macros operate on it */
+ bool waiting:1;
+- bool request_done:1;
+ bool dynamic:1;
+ };
+
+@@ -237,7 +237,7 @@ struct irdma_qv_info {
+
+ struct irdma_qvlist_info {
+ u32 num_vectors;
+- struct irdma_qv_info qv_info[1];
++ struct irdma_qv_info qv_info[];
+ };
+
+ struct irdma_gen_ops {
+@@ -257,6 +257,7 @@ struct irdma_pci_f {
+ u8 *mem_rsrc;
+ u8 rdma_ver;
+ u8 rst_to;
++ u8 pf_id;
+ enum irdma_protocol_used protocol_used;
+ u32 sd_type;
+ u32 msix_count;
+@@ -541,6 +542,7 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
+ void (*callback_fcn)(struct irdma_cqp_request *cqp_request),
+ void *cb_param);
+ void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request);
++bool irdma_cq_empty(struct irdma_cq *iwcq);
+ int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+ int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
+index aeeb1c310965d..fed49da770f3b 100644
+--- a/drivers/infiniband/hw/irdma/pble.c
++++ b/drivers/infiniband/hw/irdma/pble.c
+@@ -25,8 +25,7 @@ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+ list_del(&chunk->list);
+ if (chunk->type == PBLE_SD_PAGED)
+ irdma_pble_free_paged_mem(chunk);
+- if (chunk->bitmapbuf)
+- kfree(chunk->bitmapmem.va);
++ bitmap_free(chunk->bitmapbuf);
+ kfree(chunk->chunkmem.va);
+ }
+ }
+@@ -283,7 +282,6 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+ "PBLE: next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n",
+ pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
+ pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3);
+- list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+ sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa :
+ sd_entry->u.bp.addr.pa;
+@@ -295,12 +293,12 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+ goto error;
+ }
+
++ list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+ sd_entry->valid = true;
+ return 0;
+
+ error:
+- if (chunk->bitmapbuf)
+- kfree(chunk->bitmapmem.va);
++ bitmap_free(chunk->bitmapbuf);
+ kfree(chunk->chunkmem.va);
+
+ return ret_code;
+diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h
+index e1b3b8118a2ca..aa20827dcc9de 100644
+--- a/drivers/infiniband/hw/irdma/pble.h
++++ b/drivers/infiniband/hw/irdma/pble.h
+@@ -78,7 +78,6 @@ struct irdma_chunk {
+ u32 pg_cnt;
+ enum irdma_alloc_type type;
+ struct irdma_sc_dev *dev;
+- struct irdma_virt_mem bitmapmem;
+ struct irdma_virt_mem chunkmem;
+ };
+
+diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
+index 58e7d875643b8..197eba5eb78fa 100644
+--- a/drivers/infiniband/hw/irdma/puda.c
++++ b/drivers/infiniband/hw/irdma/puda.c
+@@ -235,6 +235,9 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info)
+ if (valid_bit != cq_uk->polarity)
+ return IRDMA_ERR_Q_EMPTY;
+
++ /* Ensure CQE contents are read after valid bit is checked */
++ dma_rmb();
++
+ if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
+
+@@ -248,6 +251,9 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info)
+ if (polarity != cq_uk->polarity)
+ return IRDMA_ERR_Q_EMPTY;
+
++ /* Ensure ext CQE contents are read after ext valid bit is checked */
++ dma_rmb();
++
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
+ cq_uk->polarity = !cq_uk->polarity;
+diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
+index 874bc25a938b6..8b75e2610e5ba 100644
+--- a/drivers/infiniband/hw/irdma/type.h
++++ b/drivers/infiniband/hw/irdma/type.h
+@@ -99,6 +99,7 @@ enum irdma_term_mpa_errors {
+ enum irdma_qp_event_type {
+ IRDMA_QP_EVENT_CATASTROPHIC,
+ IRDMA_QP_EVENT_ACCESS_ERR,
++ IRDMA_QP_EVENT_REQ_ERR,
+ };
+
+ enum irdma_hw_stats_index_32b {
+@@ -410,6 +411,8 @@ struct irdma_sc_cqp {
+ struct irdma_dcqcn_cc_params dcqcn_params;
+ __le64 *host_ctx;
+ u64 *scratch_array;
++ u64 requested_ops;
++ atomic64_t completed_ops;
+ u32 cqp_id;
+ u32 sq_size;
+ u32 hw_sq_size;
+diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
+index 9b544a3b12886..4b00a9adbe3a5 100644
+--- a/drivers/infiniband/hw/irdma/uk.c
++++ b/drivers/infiniband/hw/irdma/uk.c
+@@ -94,16 +94,18 @@ static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp)
+ */
+ void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx)
+ {
+- __le64 *wqe;
++ struct irdma_qp_quanta *sq;
+ u32 wqe_idx;
+
+ if (!(qp_wqe_idx & 0x7F)) {
+ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
+- wqe = qp->sq_base[wqe_idx].elem;
++ sq = qp->sq_base + wqe_idx;
+ if (wqe_idx)
+- memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000);
++ memset(sq, qp->swqe_polarity ? 0 : 0xFF,
++ 128 * sizeof(*sq));
+ else
+- memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000);
++ memset(sq, qp->swqe_polarity ? 0xFF : 0,
++ 128 * sizeof(*sq));
+ }
+ }
+
+@@ -501,7 +503,8 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ i = 0;
+ } else {
+- qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->sg_list,
++ qp->wqe_ops.iw_set_fragment(wqe, 0,
++ frag_cnt ? op_info->sg_list : NULL,
+ qp->swqe_polarity);
+ i = 1;
+ }
+@@ -1068,6 +1071,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
+ enum irdma_status_code ret_code;
+ bool move_cq_head = true;
+ u8 polarity;
++ u8 op_type;
+ bool ext_valid;
+ __le64 *ext_cqe;
+
+@@ -1250,7 +1254,6 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
+ do {
+ __le64 *sw_wqe;
+ u64 wqe_qword;
+- u8 op_type;
+ u32 tail;
+
+ tail = qp->sq_ring.tail;
+@@ -1267,6 +1270,8 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
+ break;
+ }
+ } while (1);
++ if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR)
++ info->minor_err = FLUSH_MW_BIND_ERR;
+ qp->sq_flush_seen = true;
+ if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
+ qp->sq_flush_complete = true;
+@@ -1544,6 +1549,9 @@ void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq)
+ if (polarity != temp)
+ break;
+
++ /* Ensure CQE contents are read after valid bit is checked */
++ dma_rmb();
++
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if ((void *)(unsigned long)comp_ctx == q)
+ set_64bit_val(cqe, 8, 0);
+diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
+index 3dcbb1fbf2c66..7c3cb42889694 100644
+--- a/drivers/infiniband/hw/irdma/user.h
++++ b/drivers/infiniband/hw/irdma/user.h
+@@ -104,6 +104,7 @@ enum irdma_flush_opcode {
+ FLUSH_FATAL_ERR,
+ FLUSH_RETRY_EXC_ERR,
+ FLUSH_MW_BIND_ERR,
++ FLUSH_REM_INV_REQ_ERR,
+ };
+
+ enum irdma_cmpl_status {
+diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
+index ac91ea5296db9..a47eedb6df82f 100644
+--- a/drivers/infiniband/hw/irdma/utils.c
++++ b/drivers/infiniband/hw/irdma/utils.c
+@@ -150,31 +150,35 @@ int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+ {
+ struct in_ifaddr *ifa = ptr;
+- struct net_device *netdev = ifa->ifa_dev->dev;
++ struct net_device *real_dev, *netdev = ifa->ifa_dev->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ u32 local_ipaddr;
+
+- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA);
++ real_dev = rdma_vlan_dev_real_dev(netdev);
++ if (!real_dev)
++ real_dev = netdev;
++
++ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ local_ipaddr = ntohl(ifa->ifa_address);
+ ibdev_dbg(&iwdev->ibdev,
+- "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", netdev,
+- event, &local_ipaddr, netdev->dev_addr);
++ "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev,
++ event, &local_ipaddr, real_dev->dev_addr);
+ switch (event) {
+ case NETDEV_DOWN:
+- irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr,
++ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
+ &local_ipaddr, true, IRDMA_ARP_DELETE);
+- irdma_if_notify(iwdev, netdev, &local_ipaddr, true, false);
++ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ case NETDEV_UP:
+ case NETDEV_CHANGEADDR:
+- irdma_add_arp(iwdev->rf, &local_ipaddr, true, netdev->dev_addr);
+- irdma_if_notify(iwdev, netdev, &local_ipaddr, true, true);
++ irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr);
++ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ default:
+@@ -196,32 +200,36 @@ int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+ {
+ struct inet6_ifaddr *ifa = ptr;
+- struct net_device *netdev = ifa->idev->dev;
++ struct net_device *real_dev, *netdev = ifa->idev->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ u32 local_ipaddr6[4];
+
+- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA);
++ real_dev = rdma_vlan_dev_real_dev(netdev);
++ if (!real_dev)
++ real_dev = netdev;
++
++ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+ ibdev_dbg(&iwdev->ibdev,
+- "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", netdev,
+- event, local_ipaddr6, netdev->dev_addr);
++ "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev,
++ event, local_ipaddr6, real_dev->dev_addr);
+ switch (event) {
+ case NETDEV_DOWN:
+- irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr,
++ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
+ local_ipaddr6, false, IRDMA_ARP_DELETE);
+- irdma_if_notify(iwdev, netdev, local_ipaddr6, false, false);
++ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ case NETDEV_UP:
+ case NETDEV_CHANGEADDR:
+ irdma_add_arp(iwdev->rf, local_ipaddr6, false,
+- netdev->dev_addr);
+- irdma_if_notify(iwdev, netdev, local_ipaddr6, false, true);
++ real_dev->dev_addr);
++ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ default:
+@@ -243,21 +251,23 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+ {
+ struct neighbour *neigh = ptr;
++ struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ __be32 *p;
+ u32 local_ipaddr[4] = {};
+ bool ipv4 = true;
+
+- ibdev = ib_device_get_by_netdev((struct net_device *)neigh->dev,
+- RDMA_DRIVER_IRDMA);
+- if (!ibdev)
+- return NOTIFY_DONE;
+-
+- iwdev = to_iwdev(ibdev);
+-
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
++ real_dev = rdma_vlan_dev_real_dev(netdev);
++ if (!real_dev)
++ real_dev = netdev;
++ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
++ if (!ibdev)
++ return NOTIFY_DONE;
++
++ iwdev = to_iwdev(ibdev);
+ p = (__be32 *)neigh->primary_key;
+ if (neigh->tbl->family == AF_INET6) {
+ ipv4 = false;
+@@ -278,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
+ irdma_manage_arp_cache(iwdev->rf, neigh->ha,
+ local_ipaddr, ipv4,
+ IRDMA_ARP_DELETE);
++ ib_device_put(ibdev);
+ break;
+ default:
+ break;
+ }
+
+- ib_device_put(ibdev);
+-
+ return NOTIFY_DONE;
+ }
+
+@@ -472,7 +481,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp,
+ if (cqp_request->dynamic) {
+ kfree(cqp_request);
+ } else {
+- cqp_request->request_done = false;
++ WRITE_ONCE(cqp_request->request_done, false);
+ cqp_request->callback_fcn = NULL;
+ cqp_request->waiting = false;
+
+@@ -506,7 +515,7 @@ irdma_free_pending_cqp_request(struct irdma_cqp *cqp,
+ {
+ if (cqp_request->waiting) {
+ cqp_request->compl_info.error = true;
+- cqp_request->request_done = true;
++ WRITE_ONCE(cqp_request->request_done, true);
+ wake_up(&cqp_request->waitq);
+ }
+ wait_event_timeout(cqp->remove_wq,
+@@ -558,11 +567,11 @@ static enum irdma_status_code irdma_wait_event(struct irdma_pci_f *rf,
+ bool cqp_error = false;
+ enum irdma_status_code err_code = 0;
+
+- cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
++ cqp_timeout.compl_cqp_cmds = atomic64_read(&rf->sc_dev.cqp->completed_ops);
+ do {
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+ if (wait_event_timeout(cqp_request->waitq,
+- cqp_request->request_done,
++ READ_ONCE(cqp_request->request_done),
+ msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS)))
+ break;
+
+@@ -2284,15 +2293,10 @@ enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+
+ sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift;
+
+- pchunk->bitmapmem.size = sizeofbitmap >> 3;
+- pchunk->bitmapmem.va = kzalloc(pchunk->bitmapmem.size, GFP_KERNEL);
+-
+- if (!pchunk->bitmapmem.va)
++ pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL);
++ if (!pchunk->bitmapbuf)
+ return IRDMA_ERR_NO_MEMORY;
+
+- pchunk->bitmapbuf = pchunk->bitmapmem.va;
+- bitmap_zero(pchunk->bitmapbuf, sizeofbitmap);
+-
+ pchunk->sizeofbitmap = sizeofbitmap;
+ /* each pble is 8 bytes hence shift by 3 */
+ pprm->total_pble_alloc += pchunk->size >> 3;
+@@ -2531,8 +2535,176 @@ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event)
+ case IRDMA_QP_EVENT_ACCESS_ERR:
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
++ case IRDMA_QP_EVENT_REQ_ERR:
++ ibevent.event = IB_EVENT_QP_REQ_ERR;
++ break;
+ }
+ ibevent.device = iwqp->ibqp.device;
+ ibevent.element.qp = &iwqp->ibqp;
+ iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
+ }
++
++bool irdma_cq_empty(struct irdma_cq *iwcq)
++{
++ struct irdma_cq_uk *ukcq;
++ u64 qword3;
++ __le64 *cqe;
++ u8 polarity;
++
++ ukcq = &iwcq->sc_cq.cq_uk;
++ cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq);
++ get_64bit_val(cqe, 24, &qword3);
++ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
++
++ return polarity != ukcq->polarity;
++}
++
++void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
++{
++ struct irdma_cmpl_gen *cmpl_node;
++ struct list_head *tmp_node, *list_node;
++
++ list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
++ cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
++ list_del(&cmpl_node->list);
++ kfree(cmpl_node);
++ }
++}
++
++int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
++{
++ struct irdma_cmpl_gen *cmpl;
++
++ if (list_empty(&iwcq->cmpl_generated))
++ return -ENOENT;
++ cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
++ list_del(&cmpl->list);
++ memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
++ kfree(cmpl);
++
++ ibdev_dbg(iwcq->ibcq.device,
++ "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
++ __func__, cq_poll_info->qp_id, cq_poll_info->op_type,
++ cq_poll_info->wr_id);
++
++ return 0;
++}
++
++/**
++ * irdma_set_cpi_common_values - fill in values for polling info struct
++ * @cpi: resulting structure of cq_poll_info type
++ * @qp: QPair
++ * @qp_num: id of the QP
++ */
++static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
++ struct irdma_qp_uk *qp, u32 qp_num)
++{
++ cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
++ cpi->error = true;
++ cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
++ cpi->minor_err = FLUSH_GENERAL_ERR;
++ cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
++ cpi->qp_id = qp_num;
++}
++
++static inline void irdma_comp_handler(struct irdma_cq *cq)
++{
++ if (!cq->ibcq.comp_handler)
++ return;
++ if (atomic_cmpxchg(&cq->armed, 1, 0))
++ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
++}
++
++void irdma_generate_flush_completions(struct irdma_qp *iwqp)
++{
++ struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
++ struct irdma_ring *sq_ring = &qp->sq_ring;
++ struct irdma_ring *rq_ring = &qp->rq_ring;
++ struct irdma_cmpl_gen *cmpl;
++ __le64 *sw_wqe;
++ u64 wqe_qword;
++ u32 wqe_idx;
++ bool compl_generated = false;
++ unsigned long flags1;
++
++ spin_lock_irqsave(&iwqp->iwscq->lock, flags1);
++ if (irdma_cq_empty(iwqp->iwscq)) {
++ unsigned long flags2;
++
++ spin_lock_irqsave(&iwqp->lock, flags2);
++ while (IRDMA_RING_MORE_WORK(*sq_ring)) {
++ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
++ if (!cmpl) {
++ spin_unlock_irqrestore(&iwqp->lock, flags2);
++ spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
++ return;
++ }
++
++ wqe_idx = sq_ring->tail;
++ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
++
++ cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
++ sw_wqe = qp->sq_base[wqe_idx].elem;
++ get_64bit_val(sw_wqe, 24, &wqe_qword);
++ cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
++ /* remove the SQ WR by moving SQ tail*/
++ IRDMA_RING_SET_TAIL(*sq_ring,
++ sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
++ if (cmpl->cpi.op_type == IRDMAQP_OP_NOP) {
++ kfree(cmpl);
++ continue;
++ }
++ ibdev_dbg(iwqp->iwscq->ibcq.device,
++ "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
++ __func__, cmpl->cpi.wr_id, qp->qp_id);
++ list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated);
++ compl_generated = true;
++ }
++ spin_unlock_irqrestore(&iwqp->lock, flags2);
++ spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
++ if (compl_generated)
++ irdma_comp_handler(iwqp->iwscq);
++ } else {
++ spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
++ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
++ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
++ }
++
++ spin_lock_irqsave(&iwqp->iwrcq->lock, flags1);
++ if (irdma_cq_empty(iwqp->iwrcq)) {
++ unsigned long flags2;
++
++ spin_lock_irqsave(&iwqp->lock, flags2);
++ while (IRDMA_RING_MORE_WORK(*rq_ring)) {
++ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
++ if (!cmpl) {
++ spin_unlock_irqrestore(&iwqp->lock, flags2);
++ spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
++ return;
++ }
++
++ wqe_idx = rq_ring->tail;
++ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
++
++ cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
++ cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
++ /* remove the RQ WR by moving RQ tail */
++ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
++ ibdev_dbg(iwqp->iwrcq->ibcq.device,
++ "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
++ __func__, cmpl->cpi.wr_id, qp->qp_id,
++ wqe_idx);
++ list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated);
++
++ compl_generated = true;
++ }
++ spin_unlock_irqrestore(&iwqp->lock, flags2);
++ spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
++ if (compl_generated)
++ irdma_comp_handler(iwqp->iwrcq);
++ } else {
++ spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
++ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
++ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
++ }
++}
+diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
+index 102dc9342f2a2..8ccbe761b8607 100644
+--- a/drivers/infiniband/hw/irdma/verbs.c
++++ b/drivers/infiniband/hw/irdma/verbs.c
+@@ -29,22 +29,25 @@ static int irdma_query_device(struct ib_device *ibdev,
+ props->vendor_part_id = pcidev->device;
+
+ props->hw_ver = rf->pcidev->revision;
+- props->page_size_cap = SZ_4K | SZ_2M | SZ_1G;
++ props->page_size_cap = hw_attrs->page_size_cap;
+ props->max_mr_size = hw_attrs->max_mr_size;
+ props->max_qp = rf->max_qp - rf->used_qps;
+ props->max_qp_wr = hw_attrs->max_qp_wr;
+ props->max_send_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ props->max_recv_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ props->max_cq = rf->max_cq - rf->used_cqs;
+- props->max_cqe = rf->max_cqe;
++ props->max_cqe = rf->max_cqe - 1;
+ props->max_mr = rf->max_mr - rf->used_mrs;
+ props->max_mw = props->max_mr;
+ props->max_pd = rf->max_pd - rf->used_pds;
+ props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
+ props->max_qp_rd_atom = hw_attrs->max_hw_ird;
+ props->max_qp_init_rd_atom = hw_attrs->max_hw_ord;
+- if (rdma_protocol_roce(ibdev, 1))
++ if (rdma_protocol_roce(ibdev, 1)) {
++ props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_pkeys = IRDMA_PKEY_TBL_SZ;
++ }
++
+ props->max_ah = rf->max_ah;
+ props->max_mcast_grp = rf->max_mcg;
+ props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
+@@ -57,36 +60,6 @@ static int irdma_query_device(struct ib_device *ibdev,
+ return 0;
+ }
+
+-/**
+- * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed
+- * @link_speed: netdev phy link speed
+- * @active_speed: IB port speed
+- * @active_width: IB port width
+- */
+-static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed,
+- u8 *active_width)
+-{
+- if (link_speed <= SPEED_1000) {
+- *active_width = IB_WIDTH_1X;
+- *active_speed = IB_SPEED_SDR;
+- } else if (link_speed <= SPEED_10000) {
+- *active_width = IB_WIDTH_1X;
+- *active_speed = IB_SPEED_FDR10;
+- } else if (link_speed <= SPEED_20000) {
+- *active_width = IB_WIDTH_4X;
+- *active_speed = IB_SPEED_DDR;
+- } else if (link_speed <= SPEED_25000) {
+- *active_width = IB_WIDTH_1X;
+- *active_speed = IB_SPEED_EDR;
+- } else if (link_speed <= SPEED_40000) {
+- *active_width = IB_WIDTH_4X;
+- *active_speed = IB_SPEED_FDR10;
+- } else {
+- *active_width = IB_WIDTH_4X;
+- *active_speed = IB_SPEED_EDR;
+- }
+-}
+-
+ /**
+ * irdma_query_port - get port attributes
+ * @ibdev: device pointer from stack
+@@ -114,8 +87,9 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port,
+ props->state = IB_PORT_DOWN;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+- irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed,
+- &props->active_width);
++
++ ib_get_eth_speed(ibdev, port, &props->active_speed,
++ &props->active_width);
+
+ if (rdma_protocol_roce(ibdev, 1)) {
+ props->gid_tbl_len = 32;
+@@ -532,10 +506,8 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
+ irdma_modify_qp_to_err(&iwqp->sc_qp);
+
+- irdma_qp_rem_ref(&iwqp->ibqp);
+- wait_for_completion(&iwqp->free_qp);
+- irdma_free_lsmm_rsrc(iwqp);
+- irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
++ if (!iwqp->user_mode)
++ cancel_delayed_work_sync(&iwqp->dwork_flush);
+
+ if (!iwqp->user_mode) {
+ if (iwqp->iwscq) {
+@@ -544,6 +516,12 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+ irdma_clean_cqes(iwqp, iwqp->iwrcq);
+ }
+ }
++
++ irdma_qp_rem_ref(&iwqp->ibqp);
++ wait_for_completion(&iwqp->free_qp);
++ irdma_free_lsmm_rsrc(iwqp);
++ irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
++
+ irdma_remove_push_mmap_entries(iwqp);
+ irdma_free_qp_rsrc(iwqp);
+
+@@ -787,6 +765,14 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
+ return 0;
+ }
+
++static void irdma_flush_worker(struct work_struct *work)
++{
++ struct delayed_work *dwork = to_delayed_work(work);
++ struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
++
++ irdma_generate_flush_completions(iwqp);
++}
++
+ /**
+ * irdma_create_qp - create qp
+ * @ibqp: ptr of qp
+@@ -908,6 +894,7 @@ static int irdma_create_qp(struct ib_qp *ibqp,
+ init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
+ irdma_setup_virt_qp(iwdev, iwqp, &init_info);
+ } else {
++ INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
+ init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
+ err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
+ }
+@@ -1397,11 +1384,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ }
+ if (iwqp->ibqp_state > IB_QPS_RTS &&
+ !iwqp->flush_issued) {
+- iwqp->flush_issued = 1;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
+ IRDMA_FLUSH_RQ |
+ IRDMA_FLUSH_WAIT);
++ iwqp->flush_issued = 1;
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+@@ -1617,13 +1604,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+
+ if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
+ if (dont_wait) {
+- if (iwqp->cm_id && iwqp->hw_tcp_state) {
++ if (iwqp->hw_tcp_state) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
+ iwqp->last_aeq = IRDMA_AE_RESET_SENT;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+- irdma_cm_disconn(iwqp);
+ }
++ irdma_cm_disconn(iwqp);
+ } else {
+ int close_timer_started;
+
+@@ -1754,16 +1741,18 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
++ if (!list_empty(&iwcq->cmpl_generated))
++ irdma_remove_cmpls_list(iwcq);
+ if (!list_empty(&iwcq->resize_list))
+ irdma_process_resize_list(iwcq, iwdev, NULL);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ irdma_cq_wq_destroy(iwdev->rf, cq);
+- irdma_cq_free_rsrc(iwdev->rf, iwcq);
+
+ spin_lock_irqsave(&iwceq->ce_lock, flags);
+ irdma_sc_cleanup_ceqes(cq, ceq);
+ spin_unlock_irqrestore(&iwceq->ce_lock, flags);
++ irdma_cq_free_rsrc(iwdev->rf, iwcq);
+
+ return 0;
+ }
+@@ -1962,6 +1951,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
+ cq->back_cq = iwcq;
+ spin_lock_init(&iwcq->lock);
+ INIT_LIST_HEAD(&iwcq->resize_list);
++ INIT_LIST_HEAD(&iwcq->cmpl_generated);
+ info.dev = dev;
+ ukinfo->cq_size = max(entries, 4);
+ ukinfo->cq_id = cq_num;
+@@ -2298,9 +2288,10 @@ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
+ * @rf: RDMA PCI function
+ * @iwmr: mr pointer for this memory registration
+ * @use_pbles: flag if to use pble's
++ * @lvl_1_only: request only level 1 pble if true
+ */
+ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
+- bool use_pbles)
++ bool use_pbles, bool lvl_1_only)
+ {
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+@@ -2311,7 +2302,7 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
+
+ if (use_pbles) {
+ status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
+- false);
++ lvl_1_only);
+ if (status)
+ return -ENOMEM;
+
+@@ -2354,16 +2345,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev,
+ bool ret = true;
+
+ pg_size = iwmr->page_size;
+- err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
++ err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, true);
+ if (err)
+ return err;
+
+- if (use_pbles && palloc->level != PBLE_LEVEL_1) {
+- irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+- iwpbl->pbl_allocated = false;
+- return -ENOMEM;
+- }
+-
+ if (use_pbles)
+ arr = palloc->level1.addr;
+
+@@ -2506,7 +2491,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw)
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+- info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff;
++ info->pd_id = iwpd->sc_pd.pd_id;
+ info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->mr = false;
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+@@ -2776,7 +2761,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
+
+ if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
+ iwmr->page_size = ib_umem_find_best_pgsz(region,
+- SZ_4K | SZ_2M | SZ_1G,
++ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
+ virt);
+ if (unlikely(!iwmr->page_size)) {
+ kfree(iwmr);
+@@ -2834,7 +2819,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
+ case IRDMA_MEMREG_TYPE_MEM:
+ use_pbles = (iwmr->page_cnt != 1);
+
+- err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
++ err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
+ if (err)
+ goto error;
+
+@@ -3018,7 +3003,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+- info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff;
++ info->pd_id = iwpd->sc_pd.pd_id;
+ info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->mr = true;
+ if (iwpbl->pbl_allocated)
+@@ -3077,15 +3062,12 @@ static int irdma_post_send(struct ib_qp *ibqp,
+ unsigned long flags;
+ bool inv_stag;
+ struct irdma_ah *ah;
+- bool reflush = false;
+
+ iwqp = to_iwqp(ibqp);
+ ukqp = &iwqp->sc_qp.qp_uk;
+ dev = &iwqp->iwdev->rf->sc_dev;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+- if (iwqp->flush_issued && ukqp->sq_flush_complete)
+- reflush = true;
+ while (ib_wr) {
+ memset(&info, 0, sizeof(info));
+ inv_stag = false;
+@@ -3214,6 +3196,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
+ break;
+ case IB_WR_LOCAL_INV:
+ info.op_type = IRDMA_OP_TYPE_INV_STAG;
++ info.local_fence = info.read_fence;
+ info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
+ ret = irdma_uk_stag_local_invalidate(ukqp, &info, true);
+ if (ret)
+@@ -3258,15 +3241,14 @@ static int irdma_post_send(struct ib_qp *ibqp,
+ ib_wr = ib_wr->next;
+ }
+
+- if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) {
+- irdma_uk_qp_post_wr(ukqp);
+- spin_unlock_irqrestore(&iwqp->lock, flags);
+- } else if (reflush) {
+- ukqp->sq_flush_complete = false;
++ if (!iwqp->flush_issued) {
++ if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
++ irdma_uk_qp_post_wr(ukqp);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+- irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
++ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
++ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+ if (err)
+ *bad_wr = ib_wr;
+@@ -3291,14 +3273,11 @@ static int irdma_post_recv(struct ib_qp *ibqp,
+ enum irdma_status_code ret = 0;
+ unsigned long flags;
+ int err = 0;
+- bool reflush = false;
+
+ iwqp = to_iwqp(ibqp);
+ ukqp = &iwqp->sc_qp.qp_uk;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+- if (iwqp->flush_issued && ukqp->rq_flush_complete)
+- reflush = true;
+ while (ib_wr) {
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+@@ -3319,13 +3298,10 @@ static int irdma_post_recv(struct ib_qp *ibqp,
+ }
+
+ out:
+- if (reflush) {
+- ukqp->rq_flush_complete = false;
+- spin_unlock_irqrestore(&iwqp->lock, flags);
+- irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH);
+- } else {
+- spin_unlock_irqrestore(&iwqp->lock, flags);
+- }
++ spin_unlock_irqrestore(&iwqp->lock, flags);
++ if (iwqp->flush_issued)
++ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
++ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+
+ if (err)
+ *bad_wr = ib_wr;
+@@ -3356,6 +3332,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
+ return IB_WC_RETRY_EXC_ERR;
+ case FLUSH_MW_BIND_ERR:
+ return IB_WC_MW_BIND_ERR;
++ case FLUSH_REM_INV_REQ_ERR:
++ return IB_WC_REM_INV_REQ_ERR;
+ case FLUSH_FATAL_ERR:
+ default:
+ return IB_WC_FATAL_ERR;
+@@ -3537,6 +3515,11 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc
+ /* check the current CQ for new cqes */
+ while (npolled < num_entries) {
+ ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
++ if (ret == -ENOENT) {
++ ret = irdma_generated_cmpls(iwcq, cur_cqe);
++ if (!ret)
++ irdma_process_cqe(entry + npolled, cur_cqe);
++ }
+ if (!ret) {
+ ++npolled;
+ cq_new_cqe = true;
+@@ -3604,18 +3587,31 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
+ struct irdma_cq *iwcq;
+ struct irdma_cq_uk *ukcq;
+ unsigned long flags;
+- enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT;
++ enum irdma_cmpl_notify cq_notify;
++ bool promo_event = false;
++ int ret = 0;
+
++ cq_notify = notify_flags == IB_CQ_SOLICITED ?
++ IRDMA_CQ_COMPL_SOLICITED : IRDMA_CQ_COMPL_EVENT;
+ iwcq = to_iwcq(ibcq);
+ ukcq = &iwcq->sc_cq.cq_uk;
+- if (notify_flags == IB_CQ_SOLICITED)
+- cq_notify = IRDMA_CQ_COMPL_SOLICITED;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+- irdma_uk_cq_request_notification(ukcq, cq_notify);
++ /* Only promote to arm the CQ for any event if the last arm event was solicited. */
++ if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
++ promo_event = true;
++
++ if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
++ iwcq->last_notify = cq_notify;
++ irdma_uk_cq_request_notification(ukcq, cq_notify);
++ }
++
++ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
++ (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
++ ret = 1;
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+- return 0;
++ return ret;
+ }
+
+ static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
+@@ -4314,7 +4310,6 @@ static int irdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+ ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos;
+ ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl;
+ ah_attr->grh.sgid_index = ah->sgid_index;
+- ah_attr->grh.sgid_index = ah->sgid_index;
+ memcpy(&ah_attr->grh.dgid, &ah->dgid,
+ sizeof(ah_attr->grh.dgid));
+ }
+diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
+index 5c244cd321a3a..5af3c8e9b3941 100644
+--- a/drivers/infiniband/hw/irdma/verbs.h
++++ b/drivers/infiniband/hw/irdma/verbs.h
+@@ -4,6 +4,7 @@
+ #define IRDMA_VERBS_H
+
+ #define IRDMA_MAX_SAVED_PHY_PGADDR 4
++#define IRDMA_FLUSH_DELAY_MS 20
+
+ #define IRDMA_PKEY_TBL_SZ 1
+ #define IRDMA_DEFAULT_PKEY 0xFFFF
+@@ -110,6 +111,8 @@ struct irdma_cq {
+ u16 cq_size;
+ u16 cq_num;
+ bool user_mode;
++ atomic_t armed;
++ enum irdma_cmpl_notify last_notify;
+ u32 polled_cmpls;
+ u32 cq_mem_size;
+ struct irdma_dma_mem kmem;
+@@ -119,6 +122,12 @@ struct irdma_cq {
+ struct irdma_pbl *iwpbl_shadow;
+ struct list_head resize_list;
+ struct irdma_cq_poll_info cur_cqe;
++ struct list_head cmpl_generated;
++};
++
++struct irdma_cmpl_gen {
++ struct list_head list;
++ struct irdma_cq_poll_info cpi;
+ };
+
+ struct disconn_work {
+@@ -159,6 +168,7 @@ struct irdma_qp {
+ refcount_t refcnt;
+ struct iw_cm_id *cm_id;
+ struct irdma_cm_node *cm_node;
++ struct delayed_work dwork_flush;
+ struct ib_mr *lsmm_mr;
+ atomic_t hw_mod_qp_pend;
+ enum ib_qp_state ibqp_state;
+@@ -222,4 +232,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev);
+ void irdma_ib_unregister_device(struct irdma_device *iwdev);
+ void irdma_ib_dealloc_device(struct ib_device *ibdev);
+ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event);
++void irdma_generate_flush_completions(struct irdma_qp *iwqp);
++void irdma_remove_cmpls_list(struct irdma_cq *iwcq);
++int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info);
+ #endif /* IRDMA_VERBS_H */
+diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
+index f367f4a4abffc..53d83212cda81 100644
+--- a/drivers/infiniband/hw/mlx4/main.c
++++ b/drivers/infiniband/hw/mlx4/main.c
+@@ -2217,6 +2217,11 @@ static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
+ .get_hw_stats = mlx4_ib_get_hw_stats,
+ };
+
++static const struct ib_device_ops mlx4_ib_hw_stats_ops1 = {
++ .alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
++ .get_hw_stats = mlx4_ib_get_hw_stats,
++};
++
+ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
+ {
+ struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
+@@ -2229,9 +2234,16 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
+ return 0;
+
+ for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
+- /* i == 1 means we are building port counters */
+- if (i && !per_port)
+- continue;
++ /*
++ * i == 1 means we are building port counters, set a different
++ * stats ops without port stats callback.
++ */
++ if (i && !per_port) {
++ ib_set_device_ops(&ibdev->ib_dev,
++ &mlx4_ib_hw_stats_ops1);
++
++ return 0;
++ }
+
+ ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
+ &diag[i].offset,
+@@ -3237,7 +3249,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
+ ew = kmalloc(sizeof *ew, GFP_ATOMIC);
+ if (!ew)
+- break;
++ return;
+
+ INIT_WORK(&ew->work, handle_port_mgmt_change_event);
+ memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
+diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
+index 04a67b4816086..a40bf58bcdd3a 100644
+--- a/drivers/infiniband/hw/mlx4/mr.c
++++ b/drivers/infiniband/hw/mlx4/mr.c
+@@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+- mr->ibmr.length = length;
+ mr->ibmr.page_size = 1U << shift;
+
+ return &mr->ibmr;
+diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
+index 8662f462e2a5f..43b2aad845917 100644
+--- a/drivers/infiniband/hw/mlx4/qp.c
++++ b/drivers/infiniband/hw/mlx4/qp.c
+@@ -412,9 +412,13 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_qp *qp,
+ struct mlx4_ib_create_qp *ucmd)
+ {
++ u32 cnt;
++
+ /* Sanity check SQ size before proceeding */
+- if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
+- ucmd->log_sq_stride >
++ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
++ cnt > dev->dev->caps.max_wqes)
++ return -EINVAL;
++ if (ucmd->log_sq_stride >
+ ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
+ ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
+ return -EINVAL;
+@@ -526,15 +530,15 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
+ return (-EOPNOTSUPP);
+ }
+
+- if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4 |
+- MLX4_IB_RX_HASH_DST_IPV4 |
+- MLX4_IB_RX_HASH_SRC_IPV6 |
+- MLX4_IB_RX_HASH_DST_IPV6 |
+- MLX4_IB_RX_HASH_SRC_PORT_TCP |
+- MLX4_IB_RX_HASH_DST_PORT_TCP |
+- MLX4_IB_RX_HASH_SRC_PORT_UDP |
+- MLX4_IB_RX_HASH_DST_PORT_UDP |
+- MLX4_IB_RX_HASH_INNER)) {
++ if (ucmd->rx_hash_fields_mask & ~(u64)(MLX4_IB_RX_HASH_SRC_IPV4 |
++ MLX4_IB_RX_HASH_DST_IPV4 |
++ MLX4_IB_RX_HASH_SRC_IPV6 |
++ MLX4_IB_RX_HASH_DST_IPV6 |
++ MLX4_IB_RX_HASH_SRC_PORT_TCP |
++ MLX4_IB_RX_HASH_DST_PORT_TCP |
++ MLX4_IB_RX_HASH_SRC_PORT_UDP |
++ MLX4_IB_RX_HASH_DST_PORT_UDP |
++ MLX4_IB_RX_HASH_INNER)) {
+ pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
+ ucmd->rx_hash_fields_mask);
+ return (-EOPNOTSUPP);
+@@ -1099,8 +1103,10 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+ if (dev->steering_support ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ qp->flags |= MLX4_IB_QP_NETIF;
+- else
++ else {
++ err = -EINVAL;
+ goto err;
++ }
+ }
+
+ err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
+diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
+index 224ba36f2946c..1a0ecf439c099 100644
+--- a/drivers/infiniband/hw/mlx5/counters.c
++++ b/drivers/infiniband/hw/mlx5/counters.c
+@@ -249,7 +249,6 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
+ struct mlx5_core_dev *mdev;
+ int ret, num_counters;
+- u32 mdev_port_num;
+
+ if (!stats)
+ return -EINVAL;
+@@ -270,8 +269,9 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
+- &mdev_port_num);
++ if (!port_num)
++ port_num = 1;
++ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
+ if (!mdev) {
+ /* If port is not affiliated yet, its in down state
+ * which doesn't have any counters yet, so it would be
+diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
+index e95967aefe788..104e5cbba066b 100644
+--- a/drivers/infiniband/hw/mlx5/devx.c
++++ b/drivers/infiniband/hw/mlx5/devx.c
+@@ -666,7 +666,21 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
+ obj_id;
+
+ case MLX5_IB_OBJECT_DEVX_OBJ:
+- return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
++ {
++ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
++ struct devx_obj *devx_uobj = uobj->object;
++
++ if (opcode == MLX5_CMD_OP_QUERY_FLOW_COUNTER &&
++ devx_uobj->flow_counter_bulk_size) {
++ u64 end;
++
++ end = devx_uobj->obj_id +
++ devx_uobj->flow_counter_bulk_size;
++ return devx_uobj->obj_id <= obj_id && end > obj_id;
++ }
++
++ return devx_uobj->obj_id == obj_id;
++ }
+
+ default:
+ return false;
+@@ -1515,10 +1529,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
+ goto obj_free;
+
+ if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
+- u8 bulk = MLX5_GET(alloc_flow_counter_in,
+- cmd_in,
+- flow_counter_bulk);
+- obj->flow_counter_bulk_size = 128UL * bulk;
++ u32 bulk = MLX5_GET(alloc_flow_counter_in,
++ cmd_in,
++ flow_counter_bulk_log_size);
++
++ if (bulk)
++ bulk = 1 << bulk;
++ else
++ bulk = 128UL * MLX5_GET(alloc_flow_counter_in,
++ cmd_in,
++ flow_counter_bulk);
++ obj->flow_counter_bulk_size = bulk;
+ }
+
+ uobj->object = obj;
+@@ -1891,8 +1912,10 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
+ key_level2,
+ obj_event,
+ GFP_KERNEL);
+- if (err)
++ if (err) {
++ kfree(obj_event);
+ return err;
++ }
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
+ }
+
+diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
+index 5fbc0a8454b91..8a7e182af5303 100644
+--- a/drivers/infiniband/hw/mlx5/fs.c
++++ b/drivers/infiniband/hw/mlx5/fs.c
+@@ -2078,12 +2078,10 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
+ if (err)
+ return err;
+
+- if (flags) {
+- mlx5_ib_ft_type_to_namespace(
++ if (flags)
++ return mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
+ &obj->ns_type);
+- return 0;
+- }
+ }
+
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
+diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
+index ec242a5a17a35..f6f2df855c2ed 100644
+--- a/drivers/infiniband/hw/mlx5/mad.c
++++ b/drivers/infiniband/hw/mlx5/mad.c
+@@ -166,6 +166,12 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
++ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) {
++ /* set local port to one for Function-Per-Port HCA. */
++ mdev = dev->mdev;
++ mdev_port_num = 1;
++ }
++
+ /* Declaring support of extended counters */
+ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
+ struct ib_class_port_info cpi = {};
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index 8664bcf6d3f59..0ebd3c7b2d2a3 100644
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -443,6 +443,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_NDR;
+ break;
++ case MLX5E_PROT_MASK(MLX5E_400GAUI_8):
++ *active_width = IB_WIDTH_8X;
++ *active_speed = IB_SPEED_HDR;
++ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_NDR;
+@@ -1847,6 +1851,9 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
+ if (MLX5_CAP_GEN(dev->mdev, drain_sigerr))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS;
+
++ resp->comp_mask |=
++ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG;
++
+ return 0;
+ }
+
+@@ -4369,6 +4376,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+ mlx5_ib_stage_post_ib_reg_umr_init,
+ NULL),
++ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
++ mlx5_ib_stage_delay_drop_init,
++ mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index 22e2f4d79743d..cf203f879d340 100644
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -536,8 +536,10 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
+ spin_lock_irq(&ent->lock);
+ if (ent->disabled)
+ goto out;
+- if (need_delay)
++ if (need_delay) {
+ queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
++ goto out;
++ }
+ remove_cache_mr_locked(ent);
+ queue_adjust_cache_locked(ent);
+ }
+@@ -580,6 +582,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ ent = &cache->ent[entry];
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
++ queue_adjust_cache_locked(ent);
++ ent->miss++;
+ spin_unlock_irq(&ent->lock);
+ mr = create_cache_mr(ent);
+ if (IS_ERR(mr))
+@@ -631,6 +635,7 @@ static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+ {
+ struct mlx5_cache_ent *ent = mr->cache_ent;
+
++ WRITE_ONCE(dev->cache.last_add, jiffies);
+ spin_lock_irq(&ent->lock);
+ list_add_tail(&mr->list, &ent->head);
+ ent->available_mrs++;
+diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
+index d0d98e584ebcc..fcf6447b4a4e0 100644
+--- a/drivers/infiniband/hw/mlx5/odp.c
++++ b/drivers/infiniband/hw/mlx5/odp.c
+@@ -792,7 +792,8 @@ static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key)
+ {
+ if (!mmkey)
+ return false;
+- if (mmkey->type == MLX5_MKEY_MW)
++ if (mmkey->type == MLX5_MKEY_MW ||
++ mmkey->type == MLX5_MKEY_INDIRECT_DEVX)
+ return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key);
+ return mmkey->key == key;
+ }
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index e5abbcfc1d574..1080daf3a546f 100644
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -4406,7 +4406,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ return -EINVAL;
+
+ if (attr->port_num == 0 ||
+- attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)) {
++ attr->port_num > dev->num_ports) {
+ mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
+ attr->port_num, dev->num_ports);
+ return -EINVAL;
+@@ -4499,6 +4499,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
+ return false;
+ }
+
++static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr,
++ int attr_mask, enum ib_qp_type qp_type)
++{
++ int log_max_ra_res;
++ int log_max_ra_req;
++
++ if (qp_type == MLX5_IB_QPT_DCI) {
++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_res_dc);
++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_req_dc);
++ } else {
++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_res_qp);
++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_req_qp);
++ }
++
++ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
++ attr->max_rd_atomic > log_max_ra_res) {
++ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
++ attr->max_rd_atomic);
++ return false;
++ }
++
++ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
++ attr->max_dest_rd_atomic > log_max_ra_req) {
++ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
++ attr->max_dest_rd_atomic);
++ return false;
++ }
++ return true;
++}
++
+ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+ {
+@@ -4586,21 +4620,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ goto out;
+ }
+
+- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+- attr->max_rd_atomic >
+- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
+- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+- attr->max_rd_atomic);
+- goto out;
+- }
+-
+- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+- attr->max_dest_rd_atomic >
+- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
+- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+- attr->max_dest_rd_atomic);
++ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type))
+ goto out;
+- }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
+index 8844eacf2380e..e508c0753dd37 100644
+--- a/drivers/infiniband/hw/mlx5/qpc.c
++++ b/drivers/infiniband/hw/mlx5/qpc.c
+@@ -297,8 +297,7 @@ int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, in, uid, qp->uid);
+- mlx5_cmd_exec_in(dev->mdev, destroy_qp, in);
+- return 0;
++ return mlx5_cmd_exec_in(dev->mdev, destroy_qp, in);
+ }
+
+ int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev,
+@@ -548,14 +547,14 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn)
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_xrcd, in);
+ }
+
+-static void destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid)
++static int destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid)
+ {
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ MLX5_SET(destroy_rq_in, in, uid, uid);
+- mlx5_cmd_exec_in(dev->mdev, destroy_rq, in);
++ return mlx5_cmd_exec_in(dev->mdev, destroy_rq, in);
+ }
+
+ int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+@@ -586,8 +585,7 @@ int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq)
+ {
+ destroy_resource_common(dev, rq);
+- destroy_rq_tracked(dev, rq->qpn, rq->uid);
+- return 0;
++ return destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ }
+
+ static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
+diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
+index 69bba0ef4a5df..53f43649f7d08 100644
+--- a/drivers/infiniband/hw/mthca/mthca_qp.c
++++ b/drivers/infiniband/hw/mthca/mthca_qp.c
+@@ -1393,7 +1393,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
+ if (mthca_array_get(&dev->qp_table.qp, mqpn))
+ err = -EBUSY;
+ else
+- mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp);
++ mthca_array_set(&dev->qp_table.qp, mqpn, qp);
+ spin_unlock_irq(&dev->qp_table.lock);
+
+ if (err)
+diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
+index 755930be01b8e..6b59f97a182b0 100644
+--- a/drivers/infiniband/hw/qedr/main.c
++++ b/drivers/infiniband/hw/qedr/main.c
+@@ -345,6 +345,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
+ if (IS_IWARP(dev)) {
+ xa_init(&dev->qps);
+ dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
++ if (!dev->iwarp_wq) {
++ rc = -ENOMEM;
++ goto err1;
++ }
+ }
+
+ /* Allocate Status blocks for CNQ */
+@@ -352,7 +356,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
+ GFP_KERNEL);
+ if (!dev->sb_array) {
+ rc = -ENOMEM;
+- goto err1;
++ goto err_destroy_wq;
+ }
+
+ dev->cnq_array = kcalloc(dev->num_cnq,
+@@ -403,6 +407,9 @@ err3:
+ kfree(dev->cnq_array);
+ err2:
+ kfree(dev->sb_array);
++err_destroy_wq:
++ if (IS_IWARP(dev))
++ destroy_workqueue(dev->iwarp_wq);
+ err1:
+ kfree(dev->sgid_tbl);
+ return rc;
+diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
+index 8def88cfa3009..db9ef3e1eb97c 100644
+--- a/drivers/infiniband/hw/qedr/qedr.h
++++ b/drivers/infiniband/hw/qedr/qedr.h
+@@ -418,6 +418,7 @@ struct qedr_qp {
+ u32 sq_psn;
+ u32 qkey;
+ u32 dest_qp_num;
++ u8 timeout;
+
+ /* Relevant to qps created from kernel space only (ULPs) */
+ u8 prev_wqe_size;
+diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
+index dcb3653db72d7..bb0c2b93a34d8 100644
+--- a/drivers/infiniband/hw/qedr/verbs.c
++++ b/drivers/infiniband/hw/qedr/verbs.c
+@@ -1941,6 +1941,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
+ /* db offset was calculated in copy_qp_uresp, now set in the user q */
+ if (qedr_qp_has_sq(qp)) {
+ qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
++ qp->sq.max_wr = attrs->cap.max_send_wr;
+ rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+@@ -1951,6 +1952,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
+
+ if (qedr_qp_has_rq(qp)) {
+ qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
++ qp->rq.max_wr = attrs->cap.max_recv_wr;
+ rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+@@ -2620,6 +2622,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ 1 << max_t(int, attr->timeout - 8, 0);
+ else
+ qp_params.ack_timeout = 0;
++
++ qp->timeout = attr->timeout;
+ }
+
+ if (attr_mask & IB_QP_RETRY_CNT) {
+@@ -2744,15 +2748,18 @@ int qedr_query_qp(struct ib_qp *ibqp,
+ int rc = 0;
+
+ memset(&params, 0, sizeof(params));
+-
+- rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
+- if (rc)
+- goto err;
+-
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+- qp_attr->qp_state = qedr_get_ibqp_state(params.state);
++ if (qp->qp_type != IB_QPT_GSI) {
++ rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
++ if (rc)
++ goto err;
++ qp_attr->qp_state = qedr_get_ibqp_state(params.state);
++ } else {
++ qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS);
++ }
++
+ qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
+ qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
+ qp_attr->path_mig_state = IB_MIG_MIGRATED;
+@@ -2776,7 +2783,7 @@ int qedr_query_qp(struct ib_qp *ibqp,
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
+ rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+ rdma_ah_set_sl(&qp_attr->ah_attr, 0);
+- qp_attr->timeout = params.timeout;
++ qp_attr->timeout = qp->timeout;
+ qp_attr->rnr_retry = params.rnr_retry;
+ qp_attr->retry_cnt = params.retry_cnt;
+ qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
+@@ -3086,7 +3093,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
+ else
+ DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
+
+- goto err0;
++ goto err1;
+ }
+
+ /* Index only, 18 bit long, lkey = itid << 8 | key */
+@@ -3110,7 +3117,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
+ rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
+ if (rc) {
+ DP_ERR(dev, "roce register tid returned an error %d\n", rc);
+- goto err1;
++ goto err2;
+ }
+
+ mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+@@ -3119,8 +3126,10 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
+ DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
+ return mr;
+
+-err1:
++err2:
+ dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
++err1:
++ qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+ err0:
+ kfree(mr);
+ return ERR_PTR(rc);
+diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
+index 0a3b28142c05b..41c272980f91c 100644
+--- a/drivers/infiniband/hw/qib/qib_sysfs.c
++++ b/drivers/infiniband/hw/qib/qib_sysfs.c
+@@ -541,7 +541,7 @@ static struct attribute *port_diagc_attributes[] = {
+ };
+
+ static const struct attribute_group port_diagc_group = {
+- .name = "linkcontrol",
++ .name = "diag_counters",
+ .attrs = port_diagc_attributes,
+ };
+
+diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
+index ac11943a5ddb0..bf2f30d67949d 100644
+--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
+@@ -941,7 +941,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ &addrlimit) ||
+ addrlimit > type_max(typeof(pkt->addrlimit))) {
+ ret = -EINVAL;
+- goto free_pbc;
++ goto free_pkt;
+ }
+ pkt->addrlimit = addrlimit;
+
+diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
+index 760b254ba42d6..48a57568cad69 100644
+--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
++++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
+@@ -281,8 +281,8 @@ iter_chunk:
+ size = pa_end - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
+ va_start, &pa_start, size, flags);
+- err = iommu_map(pd->domain, va_start, pa_start,
+- size, flags);
++ err = iommu_map_atomic(pd->domain, va_start,
++ pa_start, size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+@@ -298,8 +298,8 @@ iter_chunk:
+ size = pa - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
+ va_start, &pa_start, size, flags);
+- err = iommu_map(pd->domain, va_start, pa_start,
+- size, flags);
++ err = iommu_map_atomic(pd->domain, va_start,
++ pa_start, size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
+index 3305f2744bfaa..2bdc4486c3daa 100644
+--- a/drivers/infiniband/sw/rdmavt/qp.c
++++ b/drivers/infiniband/sw/rdmavt/qp.c
+@@ -464,8 +464,6 @@ void rvt_qp_exit(struct rvt_dev_info *rdi)
+ if (qps_inuse)
+ rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
+ qps_inuse);
+- if (!rdi->qp_dev)
+- return;
+
+ kfree(rdi->qp_dev->qp_table);
+ free_qpn_table(&rdi->qp_dev->qpn_table);
+@@ -2775,7 +2773,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
+ EXPORT_SYMBOL(rvt_qp_iter);
+
+ /*
+- * This should be called with s_lock held.
++ * This should be called with s_lock and r_lock held.
+ */
+ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ enum ib_wc_status status)
+@@ -3073,6 +3071,8 @@ do_write:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
++ if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1)))
++ goto inv_err;
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
+@@ -3132,7 +3132,9 @@ send_comp:
+ rvp->n_loop_pkts++;
+ flush_send:
+ sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
++ spin_lock(&sqp->r_lock);
+ rvt_send_complete(sqp, wqe, send_status);
++ spin_unlock(&sqp->r_lock);
+ if (local_ops) {
+ atomic_dec(&sqp->local_ops_pending);
+ local_ops = 0;
+@@ -3186,9 +3188,15 @@ serr:
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+ serr_no_r_lock:
+ spin_lock_irqsave(&sqp->s_lock, flags);
++ spin_lock(&sqp->r_lock);
+ rvt_send_complete(sqp, wqe, send_status);
++ spin_unlock(&sqp->r_lock);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
++ int lastwqe;
++
++ spin_lock(&sqp->r_lock);
++ lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
++ spin_unlock(&sqp->r_lock);
+
+ sqp->s_flags &= ~RVT_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
+index d2d802c776fdc..48a3864ada29a 100644
+--- a/drivers/infiniband/sw/rxe/rxe_comp.c
++++ b/drivers/infiniband/sw/rxe/rxe_comp.c
+@@ -142,10 +142,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp,
+ /* we come here whether or not we found a response packet to see if
+ * there are any posted WQEs
+ */
+- if (qp->is_user)
+- wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER);
+- else
+- wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL);
++ wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
+ *wqe_p = wqe;
+
+ /* no WQE or requester has not started it yet */
+@@ -432,10 +429,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+ if (post)
+ make_send_cqe(qp, wqe, &cqe);
+
+- if (qp->is_user)
+- advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER);
+- else
+- advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL);
++ queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
+
+ if (post)
+ rxe_cq_post(qp->scq, &cqe, 0);
+@@ -539,7 +533,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
+ wqe->status = IB_WC_WR_FLUSH_ERR;
+ do_complete(qp, wqe);
+ } else {
+- advance_consumer(q, q->type);
++ queue_advance_consumer(q, q->type);
+ }
+ }
+ }
+diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
+index aef288f164fdd..4eedaa0244b39 100644
+--- a/drivers/infiniband/sw/rxe/rxe_cq.c
++++ b/drivers/infiniband/sw/rxe/rxe_cq.c
+@@ -25,11 +25,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
+ }
+
+ if (cq) {
+- if (cq->is_user)
+- count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
+-
++ count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
+ if (cqe < count) {
+ pr_warn("cqe(%d) < current # elements in queue (%d)",
+ cqe, count);
+@@ -65,7 +61,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
+ int err;
+ enum queue_type type;
+
+- type = uresp ? QUEUE_TYPE_TO_USER : QUEUE_TYPE_KERNEL;
++ type = QUEUE_TYPE_TO_CLIENT;
+ cq->queue = rxe_queue_init(rxe, &cqe,
+ sizeof(struct rxe_cqe), type);
+ if (!cq->queue) {
+@@ -117,11 +113,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+
+- if (cq->is_user)
+- full = queue_full(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- full = queue_full(cq->queue, QUEUE_TYPE_KERNEL);
+-
++ full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
+ if (unlikely(full)) {
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ if (cq->ibcq.event_handler) {
+@@ -134,17 +126,10 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
+ return -EBUSY;
+ }
+
+- if (cq->is_user)
+- addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL);
+-
++ addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT);
+ memcpy(addr, cqe, sizeof(*cqe));
+
+- if (cq->is_user)
+- advance_producer(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- advance_producer(cq->queue, QUEUE_TYPE_KERNEL);
++ queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
+
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
+index f0c954575bdec..21bd969718bd7 100644
+--- a/drivers/infiniband/sw/rxe/rxe_loc.h
++++ b/drivers/infiniband/sw/rxe/rxe_loc.h
+@@ -85,7 +85,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
+ enum rxe_mr_lookup_type type);
+ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
+ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+-int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
++int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
++int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
+ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+ void rxe_mr_cleanup(struct rxe_pool_entry *arg);
+
+diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
+index 5890a82462161..7c2e7b291b653 100644
+--- a/drivers/infiniband/sw/rxe/rxe_mr.c
++++ b/drivers/infiniband/sw/rxe/rxe_mr.c
+@@ -48,8 +48,14 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
+ u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
+ u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
+
+- mr->ibmr.lkey = lkey;
+- mr->ibmr.rkey = rkey;
++ /* set ibmr->l/rkey and also copy into private l/rkey
++ * for user MRs these will always be the same
++ * for cases where caller 'owns' the key portion
++ * they may be different until REG_MR WQE is executed.
++ */
++ mr->lkey = mr->ibmr.lkey = lkey;
++ mr->rkey = mr->ibmr.rkey = rkey;
++
+ mr->state = RXE_MR_STATE_INVALID;
+ mr->type = RXE_MR_TYPE_NONE;
+ mr->map_shift = ilog2(RXE_BUF_PER_MAP);
+@@ -191,10 +197,8 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
+ {
+ int err;
+
+- rxe_mr_init(0, mr);
+-
+- /* In fastreg, we also set the rkey */
+- mr->ibmr.rkey = mr->ibmr.lkey;
++ /* always allow remote access for FMRs */
++ rxe_mr_init(IB_ACCESS_REMOTE, mr);
+
+ err = rxe_mr_alloc(mr, max_pages);
+ if (err)
+@@ -507,8 +511,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
+ if (!mr)
+ return NULL;
+
+- if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
+- (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
++ if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
++ (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
+ mr_pd(mr) != pd || (access && !(access & mr->access)) ||
+ mr->state != RXE_MR_STATE_VALID)) {
+ rxe_drop_ref(mr);
+@@ -518,22 +522,22 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
+ return mr;
+ }
+
+-int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
++int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
+ {
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mr *mr;
+ int ret;
+
+- mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
++ mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
+ if (!mr) {
+- pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
++ pr_err("%s: No MR for key %#x\n", __func__, key);
+ ret = -EINVAL;
+ goto err;
+ }
+
+- if (rkey != mr->ibmr.rkey) {
+- pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
+- __func__, rkey, mr->ibmr.rkey);
++ if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
++ pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
++ __func__, key, (mr->rkey ? mr->rkey : mr->lkey));
+ ret = -EINVAL;
+ goto err_drop_ref;
+ }
+@@ -554,6 +558,49 @@ err:
+ return ret;
+ }
+
++/* user can (re)register fast MR by executing a REG_MR WQE.
++ * user is expected to hold a reference on the ib mr until the
++ * WQE completes.
++ * Once a fast MR is created this is the only way to change the
++ * private keys. It is the responsibility of the user to maintain
++ * the ib mr keys in sync with rxe mr keys.
++ */
++int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
++{
++ struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
++ u32 key = wqe->wr.wr.reg.key;
++ u32 access = wqe->wr.wr.reg.access;
++
++ /* user can only register MR in free state */
++ if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
++ pr_warn("%s: mr->lkey = 0x%x not free\n",
++ __func__, mr->lkey);
++ return -EINVAL;
++ }
++
++ /* user can only register mr with qp in same protection domain */
++ if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
++ pr_warn("%s: qp->pd and mr->pd don't match\n",
++ __func__);
++ return -EINVAL;
++ }
++
++ /* user is only allowed to change key portion of l/rkey */
++ if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
++ pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
++ __func__, key, mr->lkey);
++ return -EINVAL;
++ }
++
++ mr->access = access;
++ mr->lkey = key;
++ mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
++ mr->iova = wqe->wr.wr.reg.mr->iova;
++ mr->state = RXE_MR_STATE_VALID;
++
++ return 0;
++}
++
+ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+ {
+ struct rxe_mr *mr = to_rmr(ibmr);
+diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
+index 5ba77df7598ed..933a0b29275b9 100644
+--- a/drivers/infiniband/sw/rxe/rxe_mw.c
++++ b/drivers/infiniband/sw/rxe/rxe_mw.c
+@@ -21,7 +21,7 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+ }
+
+ rxe_add_index(mw);
+- ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1);
++ mw->rkey = ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1);
+ mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ?
+ RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
+ spin_lock_init(&mw->lock);
+@@ -108,11 +108,6 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ }
+ }
+
+- if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
+- pr_err_once("attempt to bind MW with same key\n");
+- return -EINVAL;
+- }
+-
+ /* remaining checks only apply to a nonzero MR */
+ if (!mr)
+ return 0;
+@@ -161,13 +156,9 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_mw *mw, struct rxe_mr *mr)
+ {
+- u32 rkey;
+- u32 new_rkey;
+-
+- rkey = mw->ibmw.rkey;
+- new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff);
++ u32 key = wqe->wr.wr.mw.rkey & 0xff;
+
+- mw->ibmw.rkey = new_rkey;
++ mw->rkey = (mw->rkey & ~0xff) | key;
+ mw->access = wqe->wr.wr.mw.access;
+ mw->state = RXE_MW_STATE_VALID;
+ mw->addr = wqe->wr.wr.mw.addr;
+@@ -197,29 +188,29 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+ struct rxe_mw *mw;
+ struct rxe_mr *mr;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
++ u32 mw_rkey = wqe->wr.wr.mw.mw_rkey;
++ u32 mr_lkey = wqe->wr.wr.mw.mr_lkey;
+ unsigned long flags;
+
+- mw = rxe_pool_get_index(&rxe->mw_pool,
+- wqe->wr.wr.mw.mw_rkey >> 8);
++ mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8);
+ if (unlikely(!mw)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+- if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) {
++ if (unlikely(mw->rkey != mw_rkey)) {
+ ret = -EINVAL;
+ goto err_drop_mw;
+ }
+
+ if (likely(wqe->wr.wr.mw.length)) {
+- mr = rxe_pool_get_index(&rxe->mr_pool,
+- wqe->wr.wr.mw.mr_lkey >> 8);
++ mr = rxe_pool_get_index(&rxe->mr_pool, mr_lkey >> 8);
+ if (unlikely(!mr)) {
+ ret = -EINVAL;
+ goto err_drop_mw;
+ }
+
+- if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) {
++ if (unlikely(mr->lkey != mr_lkey)) {
+ ret = -EINVAL;
+ goto err_drop_mr;
+ }
+@@ -292,7 +283,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
+ goto err;
+ }
+
+- if (rkey != mw->ibmw.rkey) {
++ if (rkey != mw->rkey) {
+ ret = -EINVAL;
+ goto err_drop_ref;
+ }
+@@ -323,7 +314,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
+ if (!mw)
+ return NULL;
+
+- if (unlikely((rxe_mw_rkey(mw) != rkey) || rxe_mw_pd(mw) != pd ||
++ if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd ||
+ (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
+ (mw->length == 0) ||
+ (access && !(access & mw->access)) ||
+diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
+index 2cb810cb890a5..be86b879a0d53 100644
+--- a/drivers/infiniband/sw/rxe/rxe_net.c
++++ b/drivers/infiniband/sw/rxe/rxe_net.c
+@@ -179,6 +179,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+ pkt->mask = RXE_GRH_MASK;
+ pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
+
++ /* remove udp header */
++ skb_pull(skb, sizeof(struct udphdr));
++
+ rxe_rcv(skb);
+
+ return 0;
+@@ -419,6 +422,9 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
+ return -EIO;
+ }
+
++ /* remove udp header */
++ skb_pull(skb, sizeof(struct udphdr));
++
+ rxe_rcv(skb);
+
+ return 0;
+diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
+index 3ef5a10a6efd8..47ebaac8f4754 100644
+--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
++++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
+@@ -117,7 +117,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
+ }
+ },
+ [IB_OPCODE_RC_SEND_MIDDLE] = {
+- .name = "IB_OPCODE_RC_SEND_MIDDLE]",
++ .name = "IB_OPCODE_RC_SEND_MIDDLE",
+ .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
+ | RXE_MIDDLE_MASK,
+ .length = RXE_BTH_BYTES,
+diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
+index 742e6ec93686c..8723898701063 100644
+--- a/drivers/infiniband/sw/rxe/rxe_param.h
++++ b/drivers/infiniband/sw/rxe/rxe_param.h
+@@ -103,6 +103,12 @@ enum rxe_device_param {
+ RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
+ RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,
+
++ /* Max number of interations of each tasklet
++ * before yielding the cpu to let other
++ * work make progress
++ */
++ RXE_MAX_ITERATIONS = 1024,
++
+ /* Delay before calling arbiter timer */
+ RXE_NSEC_ARB_TIMER_DELAY = 200,
+
+@@ -113,7 +119,7 @@ enum rxe_device_param {
+ /* default/initial rxe port parameters */
+ enum rxe_port_param {
+ RXE_PORT_GID_TBL_LEN = 1024,
+- RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
++ RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP,
+ RXE_PORT_MAX_MSG_SZ = 0x800000,
+ RXE_PORT_BAD_PKEY_CNTR = 0,
+ RXE_PORT_QKEY_VIOL_CNTR = 0,
+diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
+index 1ab6af7ddb254..13b237d93a616 100644
+--- a/drivers/infiniband/sw/rxe/rxe_qp.c
++++ b/drivers/infiniband/sw/rxe/rxe_qp.c
+@@ -195,6 +195,17 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
+ spin_lock_init(&qp->grp_lock);
+ spin_lock_init(&qp->state_lock);
+
++ spin_lock_init(&qp->req.task.state_lock);
++ spin_lock_init(&qp->resp.task.state_lock);
++ spin_lock_init(&qp->comp.task.state_lock);
++
++ spin_lock_init(&qp->sq.sq_lock);
++ spin_lock_init(&qp->rq.producer_lock);
++ spin_lock_init(&qp->rq.consumer_lock);
++
++ skb_queue_head_init(&qp->req_pkts);
++ skb_queue_head_init(&qp->resp_pkts);
++
+ atomic_set(&qp->ssn, 0);
+ atomic_set(&qp->skb_out, 0);
+ }
+@@ -231,7 +242,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
+ qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
+ wqe_size += sizeof(struct rxe_send_wqe);
+
+- type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
++ type = QUEUE_TYPE_FROM_CLIENT;
+ qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr,
+ wqe_size, type);
+ if (!qp->sq.queue)
+@@ -248,24 +259,15 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
+ return err;
+ }
+
+- if (qp->is_user)
+- qp->req.wqe_index = producer_index(qp->sq.queue,
+- QUEUE_TYPE_FROM_USER);
+- else
+- qp->req.wqe_index = producer_index(qp->sq.queue,
+- QUEUE_TYPE_KERNEL);
++ qp->req.wqe_index = queue_get_producer(qp->sq.queue,
++ QUEUE_TYPE_FROM_CLIENT);
+
+ qp->req.state = QP_STATE_RESET;
+ qp->req.opcode = -1;
+ qp->comp.opcode = -1;
+
+- spin_lock_init(&qp->sq.sq_lock);
+- skb_queue_head_init(&qp->req_pkts);
+-
+- rxe_init_task(rxe, &qp->req.task, qp,
+- rxe_requester, "req");
+- rxe_init_task(rxe, &qp->comp.task, qp,
+- rxe_completer, "comp");
++ rxe_init_task(&qp->req.task, qp, rxe_requester);
++ rxe_init_task(&qp->comp.task, qp, rxe_completer);
+
+ qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
+ if (init->qp_type == IB_QPT_RC) {
+@@ -293,7 +295,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
+ pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
+ qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
+
+- type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
++ type = QUEUE_TYPE_FROM_CLIENT;
+ qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
+ wqe_size, type);
+ if (!qp->rq.queue)
+@@ -310,15 +312,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
+ }
+ }
+
+- spin_lock_init(&qp->rq.producer_lock);
+- spin_lock_init(&qp->rq.consumer_lock);
+-
+- qp->rq.is_user = qp->is_user;
+-
+- skb_queue_head_init(&qp->resp_pkts);
+-
+- rxe_init_task(rxe, &qp->resp.task, qp,
+- rxe_responder, "resp");
++ rxe_init_task(&qp->resp.task, qp, rxe_responder);
+
+ qp->resp.opcode = OPCODE_NONE;
+ qp->resp.msn = 0;
+@@ -367,6 +361,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
+
+ err2:
+ rxe_queue_cleanup(qp->sq.queue);
++ qp->sq.queue = NULL;
+ err1:
+ qp->pd = NULL;
+ qp->rcq = NULL;
+@@ -794,7 +789,9 @@ void rxe_qp_destroy(struct rxe_qp *qp)
+ rxe_cleanup_task(&qp->comp.task);
+
+ /* flush out any receive wr's or pending requests */
+- __rxe_do_task(&qp->req.task);
++ if (qp->req.task.func)
++ __rxe_do_task(&qp->req.task);
++
+ if (qp->sq.queue) {
+ __rxe_do_task(&qp->comp.task);
+ __rxe_do_task(&qp->req.task);
+@@ -829,13 +826,15 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
+ qp->resp.mr = NULL;
+ }
+
+- if (qp_type(qp) == IB_QPT_RC)
+- sk_dst_reset(qp->sk->sk);
+-
+ free_rd_atomic_resources(qp);
+
+- kernel_sock_shutdown(qp->sk, SHUT_RDWR);
+- sock_release(qp->sk);
++ if (qp->sk) {
++ if (qp_type(qp) == IB_QPT_RC)
++ sk_dst_reset(qp->sk->sk);
++
++ kernel_sock_shutdown(qp->sk, SHUT_RDWR);
++ sock_release(qp->sk);
++ }
+ }
+
+ /* called when the last reference to the qp is dropped */
+diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
+index 72d95398e6041..03157de52f5fe 100644
+--- a/drivers/infiniband/sw/rxe/rxe_queue.c
++++ b/drivers/infiniband/sw/rxe/rxe_queue.c
+@@ -111,17 +111,35 @@ err1:
+ static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
+ unsigned int num_elem)
+ {
+- if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type)))
++ enum queue_type type = q->type;
++ u32 new_prod;
++ u32 prod;
++ u32 cons;
++
++ if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type)))
+ return -EINVAL;
+
+- while (!queue_empty(q, q->type)) {
+- memcpy(producer_addr(new_q, new_q->type),
+- consumer_addr(q, q->type),
+- new_q->elem_size);
+- advance_producer(new_q, new_q->type);
+- advance_consumer(q, q->type);
++ new_prod = queue_get_producer(new_q, type);
++ prod = queue_get_producer(q, type);
++ cons = queue_get_consumer(q, type);
++
++ while ((prod - cons) & q->index_mask) {
++ memcpy(queue_addr_from_index(new_q, new_prod),
++ queue_addr_from_index(q, cons), new_q->elem_size);
++ new_prod = queue_next_index(new_q, new_prod);
++ cons = queue_next_index(q, cons);
+ }
+
++ new_q->buf->producer_index = new_prod;
++ q->buf->consumer_index = cons;
++
++ /* update private index copies */
++ if (type == QUEUE_TYPE_TO_CLIENT)
++ new_q->index = new_q->buf->producer_index;
++ else
++ q->index = q->buf->consumer_index;
++
++ /* exchange rxe_queue headers */
+ swap(*q, *new_q);
+
+ return 0;
+diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
+index 2702b0e55fc33..6227112ef7a2f 100644
+--- a/drivers/infiniband/sw/rxe/rxe_queue.h
++++ b/drivers/infiniband/sw/rxe/rxe_queue.h
+@@ -10,34 +10,47 @@
+ /* for definition of shared struct rxe_queue_buf */
+ #include <uapi/rdma/rdma_user_rxe.h>
+
+-/* implements a simple circular buffer that can optionally be
+- * shared between user space and the kernel and can be resized
+- * the requested element size is rounded up to a power of 2
+- * and the number of elements in the buffer is also rounded
+- * up to a power of 2. Since the queue is empty when the
+- * producer and consumer indices match the maximum capacity
+- * of the queue is one less than the number of element slots
++/* Implements a simple circular buffer that is shared between user
++ * and the driver and can be resized. The requested element size is
++ * rounded up to a power of 2 and the number of elements in the buffer
++ * is also rounded up to a power of 2. Since the queue is empty when
++ * the producer and consumer indices match the maximum capacity of the
++ * queue is one less than the number of element slots.
+ *
+ * Notes:
+- * - Kernel space indices are always masked off to q->index_mask
+- * before storing so do not need to be checked on reads.
+- * - User space indices may be out of range and must be
+- * masked before use when read.
+- * - The kernel indices for shared queues must not be written
+- * by user space so a local copy is used and a shared copy is
+- * stored when the local copy changes.
++ * - The driver indices are always masked off to q->index_mask
++ * before storing so do not need to be checked on reads.
++ * - The user whether user space or kernel is generally
++ * not trusted so its parameters are masked to make sure
++ * they do not access the queue out of bounds on reads.
++ * - The driver indices for queues must not be written
++ * by user so a local copy is used and a shared copy is
++ * stored when the local copy is changed.
+ * - By passing the type in the parameter list separate from q
+- * the compiler can eliminate the switch statement when the
+- * actual queue type is known when the function is called.
+- * In the performance path this is done. In less critical
+- * paths just q->type is passed.
++ * the compiler can eliminate the switch statement when the
++ * actual queue type is known when the function is called at
++ * compile time.
++ * - These queues are lock free. The user and driver must protect
++ * changes to their end of the queues with locks if more than one
++ * CPU can be accessing it at the same time.
+ */
+
+-/* type of queue */
++/**
++ * enum queue_type - type of queue
++ * @QUEUE_TYPE_TO_CLIENT: Queue is written by rxe driver and
++ * read by client. Used by rxe driver only.
++ * @QUEUE_TYPE_FROM_CLIENT: Queue is written by client and
++ * read by rxe driver. Used by rxe driver only.
++ * @QUEUE_TYPE_TO_DRIVER: Queue is written by client and
++ * read by rxe driver. Used by kernel client only.
++ * @QUEUE_TYPE_FROM_DRIVER: Queue is written by rxe driver and
++ * read by client. Used by kernel client only.
++ */
+ enum queue_type {
+- QUEUE_TYPE_KERNEL,
+- QUEUE_TYPE_TO_USER,
+- QUEUE_TYPE_FROM_USER,
++ QUEUE_TYPE_TO_CLIENT,
++ QUEUE_TYPE_FROM_CLIENT,
++ QUEUE_TYPE_TO_DRIVER,
++ QUEUE_TYPE_FROM_DRIVER,
+ };
+
+ struct rxe_queue {
+@@ -69,238 +82,171 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
+ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
+ unsigned int elem_size, struct ib_udata *udata,
+ struct mminfo __user *outbuf,
+- /* Protect producers while resizing queue */
+- spinlock_t *producer_lock,
+- /* Protect consumers while resizing queue */
+- spinlock_t *consumer_lock);
++ spinlock_t *producer_lock, spinlock_t *consumer_lock);
+
+ void rxe_queue_cleanup(struct rxe_queue *queue);
+
+-static inline int next_index(struct rxe_queue *q, int index)
++static inline u32 queue_next_index(struct rxe_queue *q, int index)
+ {
+- return (index + 1) & q->buf->index_mask;
++ return (index + 1) & q->index_mask;
+ }
+
+-static inline int queue_empty(struct rxe_queue *q, enum queue_type type)
++static inline u32 queue_get_producer(const struct rxe_queue *q,
++ enum queue_type type)
+ {
+ u32 prod;
+- u32 cons;
+
+ switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- /* protect user space index */
++ case QUEUE_TYPE_FROM_CLIENT:
++ /* protect user index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+- cons = q->index;
+ break;
+- case QUEUE_TYPE_TO_USER:
++ case QUEUE_TYPE_TO_CLIENT:
+ prod = q->index;
+- /* protect user space index */
+- cons = smp_load_acquire(&q->buf->consumer_index);
+ break;
+- case QUEUE_TYPE_KERNEL:
++ case QUEUE_TYPE_FROM_DRIVER:
++ /* protect driver index */
++ prod = smp_load_acquire(&q->buf->producer_index);
++ break;
++ case QUEUE_TYPE_TO_DRIVER:
+ prod = q->buf->producer_index;
+- cons = q->buf->consumer_index;
+ break;
+ }
+
+- return ((prod - cons) & q->index_mask) == 0;
++ return prod;
+ }
+
+-static inline int queue_full(struct rxe_queue *q, enum queue_type type)
++static inline u32 queue_get_consumer(const struct rxe_queue *q,
++ enum queue_type type)
+ {
+- u32 prod;
+ u32 cons;
+
+ switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- /* protect user space index */
+- prod = smp_load_acquire(&q->buf->producer_index);
++ case QUEUE_TYPE_FROM_CLIENT:
+ cons = q->index;
+ break;
+- case QUEUE_TYPE_TO_USER:
+- prod = q->index;
+- /* protect user space index */
++ case QUEUE_TYPE_TO_CLIENT:
++ /* protect user index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ break;
+- case QUEUE_TYPE_KERNEL:
+- prod = q->buf->producer_index;
++ case QUEUE_TYPE_FROM_DRIVER:
+ cons = q->buf->consumer_index;
+ break;
++ case QUEUE_TYPE_TO_DRIVER:
++ /* protect driver index */
++ cons = smp_load_acquire(&q->buf->consumer_index);
++ break;
+ }
+
+- return ((prod + 1 - cons) & q->index_mask) == 0;
++ return cons;
+ }
+
+-static inline unsigned int queue_count(const struct rxe_queue *q,
+- enum queue_type type)
++static inline int queue_empty(struct rxe_queue *q, enum queue_type type)
+ {
+- u32 prod;
+- u32 cons;
+-
+- switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- /* protect user space index */
+- prod = smp_load_acquire(&q->buf->producer_index);
+- cons = q->index;
+- break;
+- case QUEUE_TYPE_TO_USER:
+- prod = q->index;
+- /* protect user space index */
+- cons = smp_load_acquire(&q->buf->consumer_index);
+- break;
+- case QUEUE_TYPE_KERNEL:
+- prod = q->buf->producer_index;
+- cons = q->buf->consumer_index;
+- break;
+- }
++ u32 prod = queue_get_producer(q, type);
++ u32 cons = queue_get_consumer(q, type);
+
+- return (prod - cons) & q->index_mask;
++ return ((prod - cons) & q->index_mask) == 0;
+ }
+
+-static inline void advance_producer(struct rxe_queue *q, enum queue_type type)
++static inline int queue_full(struct rxe_queue *q, enum queue_type type)
+ {
+- u32 prod;
++ u32 prod = queue_get_producer(q, type);
++ u32 cons = queue_get_consumer(q, type);
+
+- switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- pr_warn_once("Normally kernel should not write user space index\n");
+- /* protect user space index */
+- prod = smp_load_acquire(&q->buf->producer_index);
+- prod = (prod + 1) & q->index_mask;
+- /* same */
+- smp_store_release(&q->buf->producer_index, prod);
+- break;
+- case QUEUE_TYPE_TO_USER:
+- prod = q->index;
+- q->index = (prod + 1) & q->index_mask;
+- q->buf->producer_index = q->index;
+- break;
+- case QUEUE_TYPE_KERNEL:
+- prod = q->buf->producer_index;
+- q->buf->producer_index = (prod + 1) & q->index_mask;
+- break;
+- }
++ return ((prod + 1 - cons) & q->index_mask) == 0;
+ }
+
+-static inline void advance_consumer(struct rxe_queue *q, enum queue_type type)
++static inline u32 queue_count(const struct rxe_queue *q,
++ enum queue_type type)
+ {
+- u32 cons;
++ u32 prod = queue_get_producer(q, type);
++ u32 cons = queue_get_consumer(q, type);
+
+- switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- cons = q->index;
+- q->index = (cons + 1) & q->index_mask;
+- q->buf->consumer_index = q->index;
+- break;
+- case QUEUE_TYPE_TO_USER:
+- pr_warn_once("Normally kernel should not write user space index\n");
+- /* protect user space index */
+- cons = smp_load_acquire(&q->buf->consumer_index);
+- cons = (cons + 1) & q->index_mask;
+- /* same */
+- smp_store_release(&q->buf->consumer_index, cons);
+- break;
+- case QUEUE_TYPE_KERNEL:
+- cons = q->buf->consumer_index;
+- q->buf->consumer_index = (cons + 1) & q->index_mask;
+- break;
+- }
++ return (prod - cons) & q->index_mask;
+ }
+
+-static inline void *producer_addr(struct rxe_queue *q, enum queue_type type)
++static inline void queue_advance_producer(struct rxe_queue *q,
++ enum queue_type type)
+ {
+ u32 prod;
+
+ switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- /* protect user space index */
+- prod = smp_load_acquire(&q->buf->producer_index);
+- prod &= q->index_mask;
++ case QUEUE_TYPE_FROM_CLIENT:
++ pr_warn("%s: attempt to advance client index\n",
++ __func__);
+ break;
+- case QUEUE_TYPE_TO_USER:
++ case QUEUE_TYPE_TO_CLIENT:
+ prod = q->index;
++ prod = (prod + 1) & q->index_mask;
++ q->index = prod;
++ /* protect user index */
++ smp_store_release(&q->buf->producer_index, prod);
++ break;
++ case QUEUE_TYPE_FROM_DRIVER:
++ pr_warn("%s: attempt to advance driver index\n",
++ __func__);
+ break;
+- case QUEUE_TYPE_KERNEL:
++ case QUEUE_TYPE_TO_DRIVER:
+ prod = q->buf->producer_index;
++ prod = (prod + 1) & q->index_mask;
++ q->buf->producer_index = prod;
+ break;
+ }
+-
+- return q->buf->data + (prod << q->log2_elem_size);
+ }
+
+-static inline void *consumer_addr(struct rxe_queue *q, enum queue_type type)
++static inline void queue_advance_consumer(struct rxe_queue *q,
++ enum queue_type type)
+ {
+ u32 cons;
+
+ switch (type) {
+- case QUEUE_TYPE_FROM_USER:
++ case QUEUE_TYPE_FROM_CLIENT:
+ cons = q->index;
++ cons = (cons + 1) & q->index_mask;
++ q->index = cons;
++ /* protect user index */
++ smp_store_release(&q->buf->consumer_index, cons);
+ break;
+- case QUEUE_TYPE_TO_USER:
+- /* protect user space index */
+- cons = smp_load_acquire(&q->buf->consumer_index);
+- cons &= q->index_mask;
++ case QUEUE_TYPE_TO_CLIENT:
++ pr_warn("%s: attempt to advance client index\n",
++ __func__);
+ break;
+- case QUEUE_TYPE_KERNEL:
++ case QUEUE_TYPE_FROM_DRIVER:
+ cons = q->buf->consumer_index;
++ cons = (cons + 1) & q->index_mask;
++ q->buf->consumer_index = cons;
++ break;
++ case QUEUE_TYPE_TO_DRIVER:
++ pr_warn("%s: attempt to advance driver index\n",
++ __func__);
+ break;
+ }
+-
+- return q->buf->data + (cons << q->log2_elem_size);
+ }
+
+-static inline unsigned int producer_index(struct rxe_queue *q,
+- enum queue_type type)
++static inline void *queue_producer_addr(struct rxe_queue *q,
++ enum queue_type type)
+ {
+- u32 prod;
++ u32 prod = queue_get_producer(q, type);
+
+- switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- /* protect user space index */
+- prod = smp_load_acquire(&q->buf->producer_index);
+- prod &= q->index_mask;
+- break;
+- case QUEUE_TYPE_TO_USER:
+- prod = q->index;
+- break;
+- case QUEUE_TYPE_KERNEL:
+- prod = q->buf->producer_index;
+- break;
+- }
+-
+- return prod;
++ return q->buf->data + (prod << q->log2_elem_size);
+ }
+
+-static inline unsigned int consumer_index(struct rxe_queue *q,
+- enum queue_type type)
++static inline void *queue_consumer_addr(struct rxe_queue *q,
++ enum queue_type type)
+ {
+- u32 cons;
+-
+- switch (type) {
+- case QUEUE_TYPE_FROM_USER:
+- cons = q->index;
+- break;
+- case QUEUE_TYPE_TO_USER:
+- /* protect user space index */
+- cons = smp_load_acquire(&q->buf->consumer_index);
+- cons &= q->index_mask;
+- break;
+- case QUEUE_TYPE_KERNEL:
+- cons = q->buf->consumer_index;
+- break;
+- }
++ u32 cons = queue_get_consumer(q, type);
+
+- return cons;
++ return q->buf->data + (cons << q->log2_elem_size);
+ }
+
+-static inline void *addr_from_index(struct rxe_queue *q,
+- unsigned int index)
++static inline void *queue_addr_from_index(struct rxe_queue *q, u32 index)
+ {
+ return q->buf->data + ((index & q->index_mask)
+- << q->buf->log2_elem_size);
++ << q->log2_elem_size);
+ }
+
+-static inline unsigned int index_from_addr(const struct rxe_queue *q,
++static inline u32 queue_index_from_addr(const struct rxe_queue *q,
+ const void *addr)
+ {
+ return (((u8 *)addr - q->buf->data) >> q->log2_elem_size)
+@@ -309,7 +255,7 @@ static inline unsigned int index_from_addr(const struct rxe_queue *q,
+
+ static inline void *queue_head(struct rxe_queue *q, enum queue_type type)
+ {
+- return queue_empty(q, type) ? NULL : consumer_addr(q, type);
++ return queue_empty(q, type) ? NULL : queue_consumer_addr(q, type);
+ }
+
+ #endif /* RXE_QUEUE_H */
+diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
+index 3894197a82f62..8c0e7ecd41414 100644
+--- a/drivers/infiniband/sw/rxe/rxe_req.c
++++ b/drivers/infiniband/sw/rxe/rxe_req.c
+@@ -49,21 +49,16 @@ static void req_retry(struct rxe_qp *qp)
+ unsigned int cons;
+ unsigned int prod;
+
+- if (qp->is_user) {
+- cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
+- prod = producer_index(q, QUEUE_TYPE_FROM_USER);
+- } else {
+- cons = consumer_index(q, QUEUE_TYPE_KERNEL);
+- prod = producer_index(q, QUEUE_TYPE_KERNEL);
+- }
++ cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
++ prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
+
+ qp->req.wqe_index = cons;
+ qp->req.psn = qp->comp.psn;
+ qp->req.opcode = -1;
+
+ for (wqe_index = cons; wqe_index != prod;
+- wqe_index = next_index(q, wqe_index)) {
+- wqe = addr_from_index(qp->sq.queue, wqe_index);
++ wqe_index = queue_next_index(q, wqe_index)) {
++ wqe = queue_addr_from_index(qp->sq.queue, wqe_index);
+ mask = wr_opcode_mask(wqe->wr.opcode, qp);
+
+ if (wqe->state == wqe_state_posted)
+@@ -121,15 +116,9 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
+ unsigned int cons;
+ unsigned int prod;
+
+- if (qp->is_user) {
+- wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
+- cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
+- prod = producer_index(q, QUEUE_TYPE_FROM_USER);
+- } else {
+- wqe = queue_head(q, QUEUE_TYPE_KERNEL);
+- cons = consumer_index(q, QUEUE_TYPE_KERNEL);
+- prod = producer_index(q, QUEUE_TYPE_KERNEL);
+- }
++ wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
++ cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
++ prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
+
+ if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
+ /* check to see if we are drained;
+@@ -170,7 +159,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
+ if (index == prod)
+ return NULL;
+
+- wqe = addr_from_index(q, index);
++ wqe = queue_addr_from_index(q, index);
+
+ if (unlikely((qp->req.state == QP_STATE_DRAIN ||
+ qp->req.state == QP_STATE_DRAINED) &&
+@@ -560,7 +549,8 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ qp->req.opcode = pkt->opcode;
+
+ if (pkt->mask & RXE_END_MASK)
+- qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
++ qp->req.wqe_index = queue_next_index(qp->sq.queue,
++ qp->req.wqe_index);
+
+ qp->need_req_skb = 0;
+
+@@ -572,7 +562,6 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+ {
+ u8 opcode = wqe->wr.opcode;
+- struct rxe_mr *mr;
+ u32 rkey;
+ int ret;
+
+@@ -590,14 +579,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+ }
+ break;
+ case IB_WR_REG_MR:
+- mr = to_rmr(wqe->wr.wr.reg.mr);
+- rxe_add_ref(mr);
+- mr->state = RXE_MR_STATE_VALID;
+- mr->access = wqe->wr.wr.reg.access;
+- mr->ibmr.lkey = wqe->wr.wr.reg.key;
+- mr->ibmr.rkey = wqe->wr.wr.reg.key;
+- mr->iova = wqe->wr.wr.reg.mr->iova;
+- rxe_drop_ref(mr);
++ ret = rxe_reg_fast_mr(qp, wqe);
++ if (unlikely(ret)) {
++ wqe->status = IB_WC_LOC_QP_OP_ERR;
++ return ret;
++ }
+ break;
+ case IB_WR_BIND_MW:
+ ret = rxe_bind_mw(qp, wqe);
+@@ -614,11 +600,13 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+- qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
++ qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
+
+- if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+- qp->sq_sig_type == IB_SIGNAL_ALL_WR)
+- rxe_run_task(&qp->comp.task, 1);
++ /* There is no ack coming for local work requests
++ * which can lead to a deadlock. So go ahead and complete
++ * it now.
++ */
++ rxe_run_task(&qp->comp.task, 1);
+
+ return 0;
+ }
+@@ -645,7 +633,8 @@ next_wqe:
+ goto exit;
+
+ if (unlikely(qp->req.state == QP_STATE_RESET)) {
+- qp->req.wqe_index = consumer_index(q, q->type);
++ qp->req.wqe_index = queue_get_consumer(q,
++ QUEUE_TYPE_FROM_CLIENT);
+ qp->req.opcode = -1;
+ qp->req.need_rd_atomic = 0;
+ qp->req.wait_psn = 0;
+@@ -687,7 +676,7 @@ next_wqe:
+ opcode = next_opcode(qp, wqe, wqe->wr.opcode);
+ if (unlikely(opcode < 0)) {
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+- goto exit;
++ goto err;
+ }
+
+ mask = rxe_opcode[opcode].mask;
+@@ -711,7 +700,7 @@ next_wqe:
+ wqe->last_psn = qp->req.psn;
+ qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
+ qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
+- qp->req.wqe_index = next_index(qp->sq.queue,
++ qp->req.wqe_index = queue_next_index(qp->sq.queue,
+ qp->req.wqe_index);
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
+index 5501227ddc650..e7dec84810614 100644
+--- a/drivers/infiniband/sw/rxe/rxe_resp.c
++++ b/drivers/infiniband/sw/rxe/rxe_resp.c
+@@ -303,10 +303,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
+
+ spin_lock_bh(&srq->rq.consumer_lock);
+
+- if (qp->is_user)
+- wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
+- else
+- wqe = queue_head(q, QUEUE_TYPE_KERNEL);
++ wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
+ if (!wqe) {
+ spin_unlock_bh(&srq->rq.consumer_lock);
+ return RESPST_ERR_RNR;
+@@ -322,13 +319,8 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
+ memcpy(&qp->resp.srq_wqe, wqe, size);
+
+ qp->resp.wqe = &qp->resp.srq_wqe.wqe;
+- if (qp->is_user) {
+- advance_consumer(q, QUEUE_TYPE_FROM_USER);
+- count = queue_count(q, QUEUE_TYPE_FROM_USER);
+- } else {
+- advance_consumer(q, QUEUE_TYPE_KERNEL);
+- count = queue_count(q, QUEUE_TYPE_KERNEL);
+- }
++ queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
++ count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
+
+ if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
+ srq->limit = 0;
+@@ -357,12 +349,8 @@ static enum resp_states check_resource(struct rxe_qp *qp,
+ qp->resp.status = IB_WC_WR_FLUSH_ERR;
+ return RESPST_COMPLETE;
+ } else if (!srq) {
+- if (qp->is_user)
+- qp->resp.wqe = queue_head(qp->rq.queue,
+- QUEUE_TYPE_FROM_USER);
+- else
+- qp->resp.wqe = queue_head(qp->rq.queue,
+- QUEUE_TYPE_KERNEL);
++ qp->resp.wqe = queue_head(qp->rq.queue,
++ QUEUE_TYPE_FROM_CLIENT);
+ if (qp->resp.wqe) {
+ qp->resp.status = IB_WC_WR_FLUSH_ERR;
+ return RESPST_COMPLETE;
+@@ -389,12 +377,8 @@ static enum resp_states check_resource(struct rxe_qp *qp,
+ if (srq)
+ return get_srq_wqe(qp);
+
+- if (qp->is_user)
+- qp->resp.wqe = queue_head(qp->rq.queue,
+- QUEUE_TYPE_FROM_USER);
+- else
+- qp->resp.wqe = queue_head(qp->rq.queue,
+- QUEUE_TYPE_KERNEL);
++ qp->resp.wqe = queue_head(qp->rq.queue,
++ QUEUE_TYPE_FROM_CLIENT);
+ return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
+ }
+
+@@ -830,6 +814,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+ return RESPST_ERR_INVALIDATE_RKEY;
+ }
+
++ if (pkt->mask & RXE_END_MASK)
++ /* We successfully processed this new request. */
++ qp->resp.msn++;
++
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+@@ -837,11 +825,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+- if (pkt->mask & RXE_COMP_MASK) {
+- /* We successfully processed this new request. */
+- qp->resp.msn++;
++ if (pkt->mask & RXE_COMP_MASK)
+ return RESPST_COMPLETE;
+- } else if (qp_type(qp) == IB_QPT_RC)
++ else if (qp_type(qp) == IB_QPT_RC)
+ return RESPST_ACKNOWLEDGE;
+ else
+ return RESPST_CLEANUP;
+@@ -936,12 +922,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
+ }
+
+ /* have copy for srq and reference for !srq */
+- if (!qp->srq) {
+- if (qp->is_user)
+- advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER);
+- else
+- advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL);
+- }
++ if (!qp->srq)
++ queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
+
+ qp->resp.wqe = NULL;
+
+@@ -1213,7 +1195,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
+ return;
+
+ while (!qp->srq && q && queue_head(q, q->type))
+- advance_consumer(q, q->type);
++ queue_advance_consumer(q, q->type);
+ }
+
+ int rxe_responder(void *arg)
+diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
+index 610c98d24b5cc..eb1c4c3b3a786 100644
+--- a/drivers/infiniband/sw/rxe/rxe_srq.c
++++ b/drivers/infiniband/sw/rxe/rxe_srq.c
+@@ -86,14 +86,13 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
+ srq->srq_num = srq->pelem.index;
+ srq->rq.max_wr = init->attr.max_wr;
+ srq->rq.max_sge = init->attr.max_sge;
+- srq->rq.is_user = srq->is_user;
+
+ srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);
+
+ spin_lock_init(&srq->rq.producer_lock);
+ spin_lock_init(&srq->rq.consumer_lock);
+
+- type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
++ type = QUEUE_TYPE_FROM_CLIENT;
+ q = rxe_queue_init(rxe, &srq->rq.max_wr,
+ srq_wqe_size, type);
+ if (!q) {
+diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
+index 6951fdcb31bf5..5aa69947a9791 100644
+--- a/drivers/infiniband/sw/rxe/rxe_task.c
++++ b/drivers/infiniband/sw/rxe/rxe_task.c
+@@ -8,7 +8,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/hardirq.h>
+
+-#include "rxe_task.h"
++#include "rxe.h"
+
+ int __rxe_do_task(struct rxe_task *task)
+
+@@ -34,6 +34,7 @@ void rxe_do_task(struct tasklet_struct *t)
+ int ret;
+ unsigned long flags;
+ struct rxe_task *task = from_tasklet(task, t, tasklet);
++ unsigned int iterations = RXE_MAX_ITERATIONS;
+
+ spin_lock_irqsave(&task->state_lock, flags);
+ switch (task->state) {
+@@ -62,13 +63,20 @@ void rxe_do_task(struct tasklet_struct *t)
+ spin_lock_irqsave(&task->state_lock, flags);
+ switch (task->state) {
+ case TASK_STATE_BUSY:
+- if (ret)
++ if (ret) {
+ task->state = TASK_STATE_START;
+- else
++ } else if (iterations--) {
+ cont = 1;
++ } else {
++ /* reschedule the tasklet and exit
++ * the loop to give up the cpu
++ */
++ tasklet_schedule(&task->tasklet);
++ task->state = TASK_STATE_START;
++ }
+ break;
+
+- /* soneone tried to run the task since the last time we called
++ /* someone tried to run the task since the last time we called
+ * func, so we will call one more time regardless of the
+ * return value
+ */
+@@ -87,13 +95,10 @@ void rxe_do_task(struct tasklet_struct *t)
+ task->ret = ret;
+ }
+
+-int rxe_init_task(void *obj, struct rxe_task *task,
+- void *arg, int (*func)(void *), char *name)
++int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
+ {
+- task->obj = obj;
+ task->arg = arg;
+ task->func = func;
+- snprintf(task->name, sizeof(task->name), "%s", name);
+ task->destroyed = false;
+
+ tasklet_setup(&task->tasklet, rxe_do_task);
+diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
+index 11d183fd33386..b3dfd970d1dc6 100644
+--- a/drivers/infiniband/sw/rxe/rxe_task.h
++++ b/drivers/infiniband/sw/rxe/rxe_task.h
+@@ -19,14 +19,12 @@ enum {
+ * called again.
+ */
+ struct rxe_task {
+- void *obj;
+ struct tasklet_struct tasklet;
+ int state;
+ spinlock_t state_lock; /* spinlock for task state */
+ void *arg;
+ int (*func)(void *arg);
+ int ret;
+- char name[16];
+ bool destroyed;
+ };
+
+@@ -35,8 +33,7 @@ struct rxe_task {
+ * arg => parameter to pass to fcn
+ * func => function to call until it returns != 0
+ */
+-int rxe_init_task(void *obj, struct rxe_task *task,
+- void *arg, int (*func)(void *), char *name);
++int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *));
+
+ /* cleanup task */
+ void rxe_cleanup_task(struct rxe_task *task);
+diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
+index 267b5a9c345d0..e40927cf5772e 100644
+--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
++++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
+@@ -218,11 +218,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
+ int num_sge = ibwr->num_sge;
+ int full;
+
+- if (rq->is_user)
+- full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER);
+- else
+- full = queue_full(rq->queue, QUEUE_TYPE_KERNEL);
+-
++ full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
+ if (unlikely(full)) {
+ err = -ENOMEM;
+ goto err1;
+@@ -237,11 +233,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
+ for (i = 0; i < num_sge; i++)
+ length += ibwr->sg_list[i].length;
+
+- if (rq->is_user)
+- recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER);
+- else
+- recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL);
+-
++ recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER);
+ recv_wqe->wr_id = ibwr->wr_id;
+ recv_wqe->num_sge = num_sge;
+
+@@ -254,10 +246,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
+ recv_wqe->dma.cur_sge = 0;
+ recv_wqe->dma.sge_offset = 0;
+
+- if (rq->is_user)
+- advance_producer(rq->queue, QUEUE_TYPE_FROM_USER);
+- else
+- advance_producer(rq->queue, QUEUE_TYPE_KERNEL);
++ queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
+
+ return 0;
+
+@@ -281,9 +270,6 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
+ if (udata->outlen < sizeof(*uresp))
+ return -EINVAL;
+ uresp = udata->outbuf;
+- srq->is_user = true;
+- } else {
+- srq->is_user = false;
+ }
+
+ err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
+@@ -633,27 +619,17 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
+
+ spin_lock_irqsave(&qp->sq.sq_lock, flags);
+
+- if (qp->is_user)
+- full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
+- else
+- full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
++ full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER);
+
+ if (unlikely(full)) {
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
+ return -ENOMEM;
+ }
+
+- if (qp->is_user)
+- send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
+- else
+- send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
+-
++ send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER);
+ init_send_wqe(qp, ibwr, mask, length, send_wqe);
+
+- if (qp->is_user)
+- advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
+- else
+- advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
++ queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER);
+
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
+
+@@ -845,18 +821,12 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+- if (cq->is_user)
+- cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
++ cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER);
+ if (!cqe)
+ break;
+
+ memcpy(wc++, &cqe->ibwc, sizeof(*wc));
+- if (cq->is_user)
+- advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
++ queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER);
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+@@ -868,10 +838,7 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
+ struct rxe_cq *cq = to_rcq(ibcq);
+ int count;
+
+- if (cq->is_user)
+- count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
++ count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER);
+
+ return (count > wc_cnt) ? wc_cnt : count;
+ }
+@@ -887,10 +854,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+ if (cq->notify != IB_CQ_NEXT_COMP)
+ cq->notify = flags & IB_CQ_SOLICITED_MASK;
+
+- if (cq->is_user)
+- empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER);
+- else
+- empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL);
++ empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER);
+
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
+ ret = 1;
+diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
+index ac2a2148027f4..c852a9907bad4 100644
+--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
++++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
+@@ -77,7 +77,6 @@ enum wqe_state {
+ };
+
+ struct rxe_sq {
+- bool is_user;
+ int max_wr;
+ int max_sge;
+ int max_inline;
+@@ -86,7 +85,6 @@ struct rxe_sq {
+ };
+
+ struct rxe_rq {
+- bool is_user;
+ int max_wr;
+ int max_sge;
+ spinlock_t producer_lock; /* guard queue producer */
+@@ -100,7 +98,6 @@ struct rxe_srq {
+ struct rxe_pd *pd;
+ struct rxe_rq rq;
+ u32 srq_num;
+- bool is_user;
+
+ int limit;
+ int error;
+@@ -313,6 +310,8 @@ struct rxe_mr {
+
+ struct ib_umem *umem;
+
++ u32 lkey;
++ u32 rkey;
+ enum rxe_mr_state state;
+ enum rxe_mr_type type;
+ u64 va;
+@@ -350,6 +349,7 @@ struct rxe_mw {
+ enum rxe_mw_state state;
+ struct rxe_qp *qp; /* Type 2 only */
+ struct rxe_mr *mr;
++ u32 rkey;
+ int access;
+ u64 addr;
+ u64 length;
+@@ -474,26 +474,11 @@ static inline struct rxe_pd *mr_pd(struct rxe_mr *mr)
+ return to_rpd(mr->ibmr.pd);
+ }
+
+-static inline u32 mr_lkey(struct rxe_mr *mr)
+-{
+- return mr->ibmr.lkey;
+-}
+-
+-static inline u32 mr_rkey(struct rxe_mr *mr)
+-{
+- return mr->ibmr.rkey;
+-}
+-
+ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw)
+ {
+ return to_rpd(mw->ibmw.pd);
+ }
+
+-static inline u32 rxe_mw_rkey(struct rxe_mw *mw)
+-{
+- return mw->ibmw.rkey;
+-}
+-
+ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
+
+ void rxe_mc_cleanup(struct rxe_pool_entry *arg);
+diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
+index 368959ae9a8cc..2f3a9cda3850f 100644
+--- a/drivers/infiniband/sw/siw/siw.h
++++ b/drivers/infiniband/sw/siw/siw.h
+@@ -418,6 +418,7 @@ struct siw_qp {
+ struct ib_qp base_qp;
+ struct siw_device *sdev;
+ struct kref ref;
++ struct completion qp_free;
+ struct list_head devq;
+ int tx_cpu;
+ struct siw_qp_attrs attrs;
+@@ -644,14 +645,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
+ return &qp->orq[qp->orq_get % qp->attrs.orq_size];
+ }
+
+-static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
+-{
+- return &qp->orq[qp->orq_put % qp->attrs.orq_size];
+-}
+-
+ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
+ {
+- struct siw_sqe *orq_e = orq_get_tail(qp);
++ struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size];
+
+ if (READ_ONCE(orq_e->flags) == 0)
+ return orq_e;
+diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
+index 7a5ed86ffc9f9..69d639cab8985 100644
+--- a/drivers/infiniband/sw/siw/siw_cm.c
++++ b/drivers/infiniband/sw/siw/siw_cm.c
+@@ -725,11 +725,11 @@ static int siw_proc_mpareply(struct siw_cep *cep)
+ enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR;
+
+ rv = siw_recv_mpa_rr(cep);
+- if (rv != -EAGAIN)
+- siw_cancel_mpatimer(cep);
+ if (rv)
+ goto out_err;
+
++ siw_cancel_mpatimer(cep);
++
+ rep = &cep->mpa.hdr;
+
+ if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) {
+@@ -895,7 +895,8 @@ static int siw_proc_mpareply(struct siw_cep *cep)
+ }
+
+ out_err:
+- siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
++ if (rv != -EAGAIN)
++ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+
+ return rv;
+ }
+@@ -968,14 +969,15 @@ static void siw_accept_newconn(struct siw_cep *cep)
+
+ siw_cep_set_inuse(new_cep);
+ rv = siw_proc_mpareq(new_cep);
+- siw_cep_set_free(new_cep);
+-
+ if (rv != -EAGAIN) {
+ siw_cep_put(cep);
+ new_cep->listen_cep = NULL;
+- if (rv)
++ if (rv) {
++ siw_cep_set_free(new_cep);
+ goto error;
++ }
+ }
++ siw_cep_set_free(new_cep);
+ }
+ return;
+
+diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
+index d68e37859e73b..403029de6b92d 100644
+--- a/drivers/infiniband/sw/siw/siw_cq.c
++++ b/drivers/infiniband/sw/siw/siw_cq.c
+@@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
+ if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
+ memset(wc, 0, sizeof(*wc));
+ wc->wr_id = cqe->id;
+- wc->status = map_cqe_status[cqe->status].ib;
+- wc->opcode = map_wc_opcode[cqe->opcode];
+ wc->byte_len = cqe->bytes;
+
+ /*
+@@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ }
+ wc->qp = cqe->base_qp;
++ wc->opcode = map_wc_opcode[cqe->opcode];
++ wc->status = map_cqe_status[cqe->status].ib;
+ siw_dbg_cq(cq,
+ "idx %u, type %d, flags %2x, id 0x%pK\n",
+ cq->cq_get % cq->num_cqe, cqe->opcode,
+ cqe->flags, (void *)(uintptr_t)cqe->id);
++ } else {
++ /*
++ * A malicious user may set invalid opcode or
++ * status in the user mmapped CQE array.
++ * Sanity check and correct values in that case
++ * to avoid out-of-bounds access to global arrays
++ * for opcode and status mapping.
++ */
++ u8 opcode = cqe->opcode;
++ u16 status = cqe->status;
++
++ if (opcode >= SIW_NUM_OPCODES) {
++ opcode = 0;
++ status = SIW_WC_GENERAL_ERR;
++ } else if (status >= SIW_NUM_WC_STATUS) {
++ status = SIW_WC_GENERAL_ERR;
++ }
++ wc->opcode = map_wc_opcode[opcode];
++ wc->status = map_cqe_status[status].ib;
++
+ }
+ WRITE_ONCE(cqe->flags, 0);
+ cq->cq_get++;
+diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
+index 9093e6a80b260..f853f3c23540a 100644
+--- a/drivers/infiniband/sw/siw/siw_main.c
++++ b/drivers/infiniband/sw/siw/siw_main.c
+@@ -437,9 +437,6 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
+
+ dev_dbg(&netdev->dev, "siw: event %lu\n", event);
+
+- if (dev_net(netdev) != &init_net)
+- return NOTIFY_OK;
+-
+ base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW);
+ if (!base_dev)
+ return NOTIFY_OK;
+diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
+index 61c17db70d658..bf69566e2eb63 100644
+--- a/drivers/infiniband/sw/siw/siw_mem.c
++++ b/drivers/infiniband/sw/siw/siw_mem.c
+@@ -398,7 +398,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+
+ mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+- if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
++ if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
+ rv = -ENOMEM;
+ goto out_sem_up;
+ }
+@@ -411,18 +411,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+ goto out_sem_up;
+ }
+ for (i = 0; num_pages; i++) {
+- int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
+-
+- umem->page_chunk[i].plist =
++ int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
++ struct page **plist =
+ kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
+- if (!umem->page_chunk[i].plist) {
++
++ if (!plist) {
+ rv = -ENOMEM;
+ goto out_sem_up;
+ }
+- got = 0;
++ umem->page_chunk[i].plist = plist;
+ while (nents) {
+- struct page **plist = &umem->page_chunk[i].plist[got];
+-
+ rv = pin_user_pages(first_page_va, nents,
+ foll_flags | FOLL_LONGTERM,
+ plist, NULL);
+@@ -430,12 +428,11 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+ goto out_sem_up;
+
+ umem->num_pages += rv;
+- atomic64_add(rv, &mm_s->pinned_vm);
+ first_page_va += rv * PAGE_SIZE;
++ plist += rv;
+ nents -= rv;
+- got += rv;
++ num_pages -= rv;
+ }
+- num_pages -= got;
+ }
+ out_sem_up:
+ mmap_read_unlock(mm_s);
+@@ -443,6 +440,10 @@ out_sem_up:
+ if (rv > 0)
+ return umem;
+
++ /* Adjust accounting for pages not pinned */
++ if (num_pages)
++ atomic64_sub(num_pages, &mm_s->pinned_vm);
++
+ siw_umem_release(umem, false);
+
+ return ERR_PTR(rv);
+diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
+index 7e01f2438afc5..e6f634971228e 100644
+--- a/drivers/infiniband/sw/siw/siw_qp.c
++++ b/drivers/infiniband/sw/siw/siw_qp.c
+@@ -1342,6 +1342,6 @@ void siw_free_qp(struct kref *ref)
+ vfree(qp->orq);
+
+ siw_put_tx_cpu(qp->tx_cpu);
+-
++ complete(&qp->qp_free);
+ atomic_dec(&sdev->num_qp);
+ }
+diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
+index 60116f20653c7..fd721cc19682e 100644
+--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
++++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
+@@ -961,27 +961,28 @@ out:
+ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
+ {
+ struct sk_buff *skb = srx->skb;
++ int avail = min(srx->skb_new, srx->fpdu_part_rem);
+ u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
+ __wsum crc_in, crc_own = 0;
+
+ siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
+ srx->fpdu_part_rem, srx->skb_new, srx->pad);
+
+- if (srx->skb_new < srx->fpdu_part_rem)
+- return -EAGAIN;
+-
+- skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
++ skb_copy_bits(skb, srx->skb_offset, tbuf, avail);
+
+- if (srx->mpa_crc_hd && srx->pad)
+- crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
++ srx->skb_new -= avail;
++ srx->skb_offset += avail;
++ srx->skb_copied += avail;
++ srx->fpdu_part_rem -= avail;
+
+- srx->skb_new -= srx->fpdu_part_rem;
+- srx->skb_offset += srx->fpdu_part_rem;
+- srx->skb_copied += srx->fpdu_part_rem;
++ if (srx->fpdu_part_rem)
++ return -EAGAIN;
+
+ if (!srx->mpa_crc_hd)
+ return 0;
+
++ if (srx->pad)
++ crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
+ /*
+ * CRC32 is computed, transmitted and received directly in NBO,
+ * so there's never a reason to convert byte order.
+@@ -1083,10 +1084,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
+ * completely received.
+ */
+ if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
+- bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
++ int hdrlen = iwarp_pktinfo[opcode].hdr_len;
+
+- if (srx->skb_new < bytes)
+- return -EAGAIN;
++ bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new);
+
+ skb_copy_bits(skb, srx->skb_offset,
+ (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
+@@ -1096,6 +1096,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
+ srx->skb_new -= bytes;
+ srx->skb_offset += bytes;
+ srx->skb_copied += bytes;
++
++ if (srx->fpdu_part_rcvd < hdrlen)
++ return -EAGAIN;
+ }
+
+ /*
+@@ -1153,11 +1156,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
+
+ spin_lock_irqsave(&qp->orq_lock, flags);
+
+- rreq = orq_get_current(qp);
+-
+ /* free current orq entry */
++ rreq = orq_get_current(qp);
+ WRITE_ONCE(rreq->flags, 0);
+
++ qp->orq_get++;
++
+ if (qp->tx_ctx.orq_fence) {
+ if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
+ pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
+@@ -1165,10 +1169,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
+ rv = -EPROTO;
+ goto out;
+ }
+- /* resume SQ processing */
++ /* resume SQ processing, if possible */
+ if (tx_waiting->sqe.opcode == SIW_OP_READ ||
+ tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+- rreq = orq_get_tail(qp);
++
++ /* SQ processing was stopped because of a full ORQ */
++ rreq = orq_get_free(qp);
+ if (unlikely(!rreq)) {
+ pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
+ rv = -EPROTO;
+@@ -1181,15 +1187,14 @@ static int siw_check_tx_fence(struct siw_qp *qp)
+ resume_tx = 1;
+
+ } else if (siw_orq_empty(qp)) {
++ /*
++ * SQ processing was stopped by fenced work request.
++ * Resume since all previous Read's are now completed.
++ */
+ qp->tx_ctx.orq_fence = 0;
+ resume_tx = 1;
+- } else {
+- pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
+- qp_id(qp), qp->orq_get, qp->orq_put);
+- rv = -EPROTO;
+ }
+ }
+- qp->orq_get++;
+ out:
+ spin_unlock_irqrestore(&qp->orq_lock, flags);
+
+diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
+index 1f4e60257700e..6bb9e9e81ff4c 100644
+--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
++++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
+@@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
+ dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx);
+
+ if (paddr)
+- return virt_to_page(paddr);
++ return virt_to_page((void *)(uintptr_t)paddr);
+
+ return NULL;
+ }
+@@ -533,13 +533,23 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
+ kunmap_local(kaddr);
+ }
+ } else {
+- u64 va = sge->laddr + sge_off;
++ /*
++ * Cast to an uintptr_t to preserve all 64 bits
++ * in sge->laddr.
++ */
++ uintptr_t va = (uintptr_t)(sge->laddr + sge_off);
+
+- page_array[seg] = virt_to_page(va & PAGE_MASK);
++ /*
++ * virt_to_page() takes a (void *) pointer
++ * so cast to a (void *) meaning it will be 64
++ * bits on a 64 bit platform and 32 bits on a
++ * 32 bit platform.
++ */
++ page_array[seg] = virt_to_page((void *)(va & PAGE_MASK));
+ if (do_crc)
+ crypto_shash_update(
+ c_tx->mpa_crc_hd,
+- (void *)(uintptr_t)va,
++ (void *)va,
+ plen);
+ }
+
+@@ -548,7 +558,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
+ data_len -= plen;
+ fp_off = 0;
+
+- if (++seg > (int)MAX_ARRAY) {
++ if (++seg >= (int)MAX_ARRAY) {
+ siw_dbg_qp(tx_qp(c_tx), "to many fragments\n");
+ siw_unmap_pages(iov, kmap_mask, seg-1);
+ wqe->processed -= c_tx->bytes_unsent;
+diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
+index 1b36350601faa..9c7fbda9e068a 100644
+--- a/drivers/infiniband/sw/siw/siw_verbs.c
++++ b/drivers/infiniband/sw/siw/siw_verbs.c
+@@ -311,7 +311,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+
+ if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
+ siw_dbg(base_dev, "too many QP's\n");
+- return -ENOMEM;
++ rv = -ENOMEM;
++ goto err_atomic;
+ }
+ if (attrs->qp_type != IB_QPT_RC) {
+ siw_dbg(base_dev, "only RC QP's supported\n");
+@@ -477,6 +478,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+ list_add_tail(&qp->devq, &sdev->qp_list);
+ spin_unlock_irqrestore(&sdev->lock, flags);
+
++ init_completion(&qp->qp_free);
++
+ return 0;
+
+ err_out_xa:
+@@ -621,6 +624,7 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
+ qp->scq = qp->rcq = NULL;
+
+ siw_qp_put(qp);
++ wait_for_completion(&qp->qp_free);
+
+ return 0;
+ }
+@@ -670,13 +674,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
+ static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+ {
+- struct siw_sqe sqe = {};
+ int rv = 0;
+
+ while (wr) {
+- sqe.id = wr->wr_id;
+- sqe.opcode = wr->opcode;
+- rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
++ struct siw_sqe sqe = {};
++
++ switch (wr->opcode) {
++ case IB_WR_RDMA_WRITE:
++ sqe.opcode = SIW_OP_WRITE;
++ break;
++ case IB_WR_RDMA_READ:
++ sqe.opcode = SIW_OP_READ;
++ break;
++ case IB_WR_RDMA_READ_WITH_INV:
++ sqe.opcode = SIW_OP_READ_LOCAL_INV;
++ break;
++ case IB_WR_SEND:
++ sqe.opcode = SIW_OP_SEND;
++ break;
++ case IB_WR_SEND_WITH_IMM:
++ sqe.opcode = SIW_OP_SEND_WITH_IMM;
++ break;
++ case IB_WR_SEND_WITH_INV:
++ sqe.opcode = SIW_OP_SEND_REMOTE_INV;
++ break;
++ case IB_WR_LOCAL_INV:
++ sqe.opcode = SIW_OP_INVAL_STAG;
++ break;
++ case IB_WR_REG_MR:
++ sqe.opcode = SIW_OP_REG_MR;
++ break;
++ default:
++ rv = -EINVAL;
++ break;
++ }
++ if (!rv) {
++ sqe.id = wr->wr_id;
++ rv = siw_sqe_complete(qp, &sqe, 0,
++ SIW_WC_WR_FLUSH_ERR);
++ }
+ if (rv) {
+ if (bad_wr)
+ *bad_wr = wr;
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+index 0aa8629fdf62e..1ea95f8009b82 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -2197,6 +2197,14 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name,
+ rn->attach_mcast = ipoib_mcast_attach;
+ rn->detach_mcast = ipoib_mcast_detach;
+ rn->hca = hca;
++
++ rc = netif_set_real_num_tx_queues(dev, 1);
++ if (rc)
++ goto out;
++
++ rc = netif_set_real_num_rx_queues(dev, 1);
++ if (rc)
++ goto out;
+ }
+
+ priv->rn_ops = dev->netdev_ops;
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+index 5b05cf3837da1..28e9b70844e44 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+@@ -42,6 +42,11 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
+ [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 },
+ };
+
++static unsigned int ipoib_get_max_num_queues(void)
++{
++ return min_t(unsigned int, num_possible_cpus(), 128);
++}
++
+ static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
+ {
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+@@ -173,6 +178,8 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
+ .changelink = ipoib_changelink,
+ .get_size = ipoib_get_size,
+ .fill_info = ipoib_fill_info,
++ .get_num_rx_queues = ipoib_get_max_num_queues,
++ .get_num_tx_queues = ipoib_get_max_num_queues,
+ };
+
+ struct rtnl_link_ops *ipoib_get_link_ops(void)
+diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
+index 776e46ee95dad..ef2d165d15a8b 100644
+--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
++++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
+@@ -584,7 +584,7 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
+ struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+
+ iscsi_session_teardown(cls_session);
+- iscsi_host_remove(shost);
++ iscsi_host_remove(shost, false);
+ iscsi_host_free(shost);
+ }
+
+@@ -702,7 +702,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
+ return cls_session;
+
+ remove_host:
+- iscsi_host_remove(shost);
++ iscsi_host_remove(shost, false);
+ free_host:
+ iscsi_host_free(shost);
+ return NULL;
+diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
+index 636d590765f95..6082695a02d88 100644
+--- a/drivers/infiniband/ulp/isert/ib_isert.c
++++ b/drivers/infiniband/ulp/isert/ib_isert.c
+@@ -656,9 +656,13 @@ static int
+ isert_connect_error(struct rdma_cm_id *cma_id)
+ {
+ struct isert_conn *isert_conn = cma_id->qp->qp_context;
++ struct isert_np *isert_np = cma_id->context;
+
+ ib_drain_qp(isert_conn->qp);
++
++ mutex_lock(&isert_np->mutex);
+ list_del_init(&isert_conn->node);
++ mutex_unlock(&isert_np->mutex);
+ isert_conn->cm_id = NULL;
+ isert_put_conn(isert_conn);
+
+@@ -2431,6 +2435,7 @@ isert_free_np(struct iscsi_np *np)
+ {
+ struct isert_np *isert_np = np->np_context;
+ struct isert_conn *isert_conn, *n;
++ LIST_HEAD(drop_conn_list);
+
+ if (isert_np->cm_id)
+ rdma_destroy_id(isert_np->cm_id);
+@@ -2450,7 +2455,7 @@ isert_free_np(struct iscsi_np *np)
+ node) {
+ isert_info("cleaning isert_conn %p state (%d)\n",
+ isert_conn, isert_conn->state);
+- isert_connect_release(isert_conn);
++ list_move_tail(&isert_conn->node, &drop_conn_list);
+ }
+ }
+
+@@ -2461,11 +2466,16 @@ isert_free_np(struct iscsi_np *np)
+ node) {
+ isert_info("cleaning isert_conn %p state (%d)\n",
+ isert_conn, isert_conn->state);
+- isert_connect_release(isert_conn);
++ list_move_tail(&isert_conn->node, &drop_conn_list);
+ }
+ }
+ mutex_unlock(&isert_np->mutex);
+
++ list_for_each_entry_safe(isert_conn, n, &drop_conn_list, node) {
++ list_del_init(&isert_conn->node);
++ isert_connect_release(isert_conn);
++ }
++
+ np->np_context = NULL;
+ kfree(isert_np);
+ }
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
+index 5e780bdd763d3..e7b57bdfe3ea0 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
+@@ -13,8 +13,8 @@
+
+ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+- struct rtrs_clt_stats *stats = sess->stats;
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
++ struct rtrs_clt_stats *stats = clt_path->stats;
+ struct rtrs_clt_stats_pcpu *s;
+ int cpu;
+
+@@ -174,8 +174,8 @@ static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats,
+ void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir)
+ {
+ struct rtrs_clt_con *con = req->con;
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+- struct rtrs_clt_stats *stats = sess->stats;
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
++ struct rtrs_clt_stats *stats = clt_path->stats;
+ unsigned int len;
+
+ len = req->usr_len + req->data_len;
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
+index 4ee592ccf979b..dbf9a778c3bd7 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
+@@ -16,21 +16,21 @@
+ #define MIN_MAX_RECONN_ATT -1
+ #define MAX_MAX_RECONN_ATT 9999
+
+-static void rtrs_clt_sess_release(struct kobject *kobj)
++static void rtrs_clt_path_release(struct kobject *kobj)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+
+- free_sess(sess);
++ free_path(clt_path);
+ }
+
+ static struct kobj_type ktype_sess = {
+ .sysfs_ops = &kobj_sysfs_ops,
+- .release = rtrs_clt_sess_release
++ .release = rtrs_clt_path_release
+ };
+
+-static void rtrs_clt_sess_stats_release(struct kobject *kobj)
++static void rtrs_clt_path_stats_release(struct kobject *kobj)
+ {
+ struct rtrs_clt_stats *stats;
+
+@@ -43,7 +43,7 @@ static void rtrs_clt_sess_stats_release(struct kobject *kobj)
+
+ static struct kobj_type ktype_stats = {
+ .sysfs_ops = &kobj_sysfs_ops,
+- .release = rtrs_clt_sess_stats_release,
++ .release = rtrs_clt_path_stats_release,
+ };
+
+ static ssize_t max_reconnect_attempts_show(struct device *dev,
+@@ -197,10 +197,10 @@ static DEVICE_ATTR_RW(add_path);
+ static ssize_t rtrs_clt_state_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+- if (sess->state == RTRS_CLT_CONNECTED)
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
++ if (clt_path->state == RTRS_CLT_CONNECTED)
+ return sysfs_emit(page, "connected\n");
+
+ return sysfs_emit(page, "disconnected\n");
+@@ -219,16 +219,16 @@ static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int ret;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+ if (!sysfs_streq(buf, "1")) {
+- rtrs_err(sess->clt, "%s: unknown value: '%s'\n",
++ rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n",
+ attr->attr.name, buf);
+ return -EINVAL;
+ }
+- ret = rtrs_clt_reconnect_from_sysfs(sess);
++ ret = rtrs_clt_reconnect_from_sysfs(clt_path);
+ if (ret)
+ return ret;
+
+@@ -249,15 +249,15 @@ static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+ if (!sysfs_streq(buf, "1")) {
+- rtrs_err(sess->clt, "%s: unknown value: '%s'\n",
++ rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n",
+ attr->attr.name, buf);
+ return -EINVAL;
+ }
+- rtrs_clt_close_conns(sess, true);
++ rtrs_clt_close_conns(clt_path, true);
+
+ return count;
+ }
+@@ -276,16 +276,16 @@ static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int ret;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+ if (!sysfs_streq(buf, "1")) {
+- rtrs_err(sess->clt, "%s: unknown value: '%s'\n",
++ rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n",
+ attr->attr.name, buf);
+ return -EINVAL;
+ }
+- ret = rtrs_clt_remove_path_from_sysfs(sess, &attr->attr);
++ ret = rtrs_clt_remove_path_from_sysfs(clt_path, &attr->attr);
+ if (ret)
+ return ret;
+
+@@ -328,11 +328,11 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = container_of(kobj, typeof(*sess), kobj);
++ clt_path = container_of(kobj, typeof(*clt_path), kobj);
+
+- return sysfs_emit(page, "%u\n", sess->hca_port);
++ return sysfs_emit(page, "%u\n", clt_path->hca_port);
+ }
+
+ static struct kobj_attribute rtrs_clt_hca_port_attr =
+@@ -342,11 +342,11 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+
+- return sysfs_emit(page, "%s\n", sess->hca_name);
++ return sysfs_emit(page, "%s\n", clt_path->hca_name);
+ }
+
+ static struct kobj_attribute rtrs_clt_hca_name_attr =
+@@ -356,12 +356,12 @@ static ssize_t rtrs_clt_cur_latency_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
+
+ return sysfs_emit(page, "%lld ns\n",
+- ktime_to_ns(sess->s.hb_cur_latency));
++ ktime_to_ns(clt_path->s.hb_cur_latency));
+ }
+
+ static struct kobj_attribute rtrs_clt_cur_latency_attr =
+@@ -371,11 +371,11 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int len;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+- len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page,
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
++ len = sockaddr_to_str((struct sockaddr *)&clt_path->s.src_addr, page,
+ PAGE_SIZE);
+ len += sysfs_emit_at(page, len, "\n");
+ return len;
+@@ -388,11 +388,11 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int len;
+
+- sess = container_of(kobj, struct rtrs_clt_sess, kobj);
+- len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page,
++ clt_path = container_of(kobj, struct rtrs_clt_path, kobj);
++ len = sockaddr_to_str((struct sockaddr *)&clt_path->s.dst_addr, page,
+ PAGE_SIZE);
+ len += sysfs_emit_at(page, len, "\n");
+ return len;
+@@ -401,7 +401,7 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj,
+ static struct kobj_attribute rtrs_clt_dst_addr_attr =
+ __ATTR(dst_addr, 0444, rtrs_clt_dst_addr_show, NULL);
+
+-static struct attribute *rtrs_clt_sess_attrs[] = {
++static struct attribute *rtrs_clt_path_attrs[] = {
+ &rtrs_clt_hca_name_attr.attr,
+ &rtrs_clt_hca_port_attr.attr,
+ &rtrs_clt_src_addr_attr.attr,
+@@ -414,42 +414,43 @@ static struct attribute *rtrs_clt_sess_attrs[] = {
+ NULL,
+ };
+
+-static const struct attribute_group rtrs_clt_sess_attr_group = {
+- .attrs = rtrs_clt_sess_attrs,
++static const struct attribute_group rtrs_clt_path_attr_group = {
++ .attrs = rtrs_clt_path_attrs,
+ };
+
+-int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
++int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path)
+ {
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt *clt = clt_path->clt;
+ char str[NAME_MAX];
+ int err;
+ struct rtrs_addr path = {
+- .src = &sess->s.src_addr,
+- .dst = &sess->s.dst_addr,
++ .src = &clt_path->s.src_addr,
++ .dst = &clt_path->s.dst_addr,
+ };
+
+ rtrs_addr_to_str(&path, str, sizeof(str));
+- err = kobject_init_and_add(&sess->kobj, &ktype_sess, clt->kobj_paths,
++ err = kobject_init_and_add(&clt_path->kobj, &ktype_sess,
++ clt->kobj_paths,
+ "%s", str);
+ if (err) {
+ pr_err("kobject_init_and_add: %d\n", err);
+- kobject_put(&sess->kobj);
++ kobject_put(&clt_path->kobj);
+ return err;
+ }
+- err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group);
++ err = sysfs_create_group(&clt_path->kobj, &rtrs_clt_path_attr_group);
+ if (err) {
+ pr_err("sysfs_create_group(): %d\n", err);
+ goto put_kobj;
+ }
+- err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats,
+- &sess->kobj, "stats");
++ err = kobject_init_and_add(&clt_path->stats->kobj_stats, &ktype_stats,
++ &clt_path->kobj, "stats");
+ if (err) {
+ pr_err("kobject_init_and_add: %d\n", err);
+- kobject_put(&sess->stats->kobj_stats);
++ kobject_put(&clt_path->stats->kobj_stats);
+ goto remove_group;
+ }
+
+- err = sysfs_create_group(&sess->stats->kobj_stats,
++ err = sysfs_create_group(&clt_path->stats->kobj_stats,
+ &rtrs_clt_stats_attr_group);
+ if (err) {
+ pr_err("failed to create stats sysfs group, err: %d\n", err);
+@@ -459,25 +460,25 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess)
+ return 0;
+
+ put_kobj_stats:
+- kobject_del(&sess->stats->kobj_stats);
+- kobject_put(&sess->stats->kobj_stats);
++ kobject_del(&clt_path->stats->kobj_stats);
++ kobject_put(&clt_path->stats->kobj_stats);
+ remove_group:
+- sysfs_remove_group(&sess->kobj, &rtrs_clt_sess_attr_group);
++ sysfs_remove_group(&clt_path->kobj, &rtrs_clt_path_attr_group);
+ put_kobj:
+- kobject_del(&sess->kobj);
+- kobject_put(&sess->kobj);
++ kobject_del(&clt_path->kobj);
++ kobject_put(&clt_path->kobj);
+
+ return err;
+ }
+
+-void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess,
++void rtrs_clt_destroy_path_files(struct rtrs_clt_path *clt_path,
+ const struct attribute *sysfs_self)
+ {
+- kobject_del(&sess->stats->kobj_stats);
+- kobject_put(&sess->stats->kobj_stats);
++ kobject_del(&clt_path->stats->kobj_stats);
++ kobject_put(&clt_path->stats->kobj_stats);
+ if (sysfs_self)
+- sysfs_remove_file_self(&sess->kobj, sysfs_self);
+- kobject_del(&sess->kobj);
++ sysfs_remove_file_self(&clt_path->kobj, sysfs_self);
++ kobject_del(&clt_path->kobj);
+ }
+
+ static struct attribute *rtrs_clt_attrs[] = {
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+index bc8824b4ee0d4..afe8670f9e555 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+@@ -48,12 +48,12 @@ static struct class *rtrs_clt_dev_class;
+
+ static inline bool rtrs_clt_is_connected(const struct rtrs_clt *clt)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ bool connected = false;
+
+ rcu_read_lock();
+- list_for_each_entry_rcu(sess, &clt->paths_list, s.entry)
+- connected |= READ_ONCE(sess->state) == RTRS_CLT_CONNECTED;
++ list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry)
++ connected |= READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED;
+ rcu_read_unlock();
+
+ return connected;
+@@ -163,29 +163,29 @@ EXPORT_SYMBOL(rtrs_clt_put_permit);
+
+ /**
+ * rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit
+- * @sess: client session pointer
++ * @clt_path: client path pointer
+ * @permit: permit for the allocation of the RDMA buffer
+ * Note:
+ * IO connection starts from 1.
+ * 0 connection is for user messages.
+ */
+ static
+-struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
++struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_path *clt_path,
+ struct rtrs_permit *permit)
+ {
+ int id = 0;
+
+ if (permit->con_type == RTRS_IO_CON)
+- id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
++ id = (permit->cpu_id % (clt_path->s.irq_con_num - 1)) + 1;
+
+- return to_clt_con(sess->s.con[id]);
++ return to_clt_con(clt_path->s.con[id]);
+ }
+
+ /**
+ * rtrs_clt_change_state() - change the session state through session state
+ * machine.
+ *
+- * @sess: client session to change the state of.
++ * @clt_path: client path to change the state of.
+ * @new_state: state to change to.
+ *
+ * returns true if sess's state is changed to new state, otherwise return false.
+@@ -193,15 +193,15 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
+ * Locks:
+ * state_wq lock must be hold.
+ */
+-static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
++static bool rtrs_clt_change_state(struct rtrs_clt_path *clt_path,
+ enum rtrs_clt_state new_state)
+ {
+ enum rtrs_clt_state old_state;
+ bool changed = false;
+
+- lockdep_assert_held(&sess->state_wq.lock);
++ lockdep_assert_held(&clt_path->state_wq.lock);
+
+- old_state = sess->state;
++ old_state = clt_path->state;
+ switch (new_state) {
+ case RTRS_CLT_CONNECTING:
+ switch (old_state) {
+@@ -275,42 +275,42 @@ static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess,
+ break;
+ }
+ if (changed) {
+- sess->state = new_state;
+- wake_up_locked(&sess->state_wq);
++ clt_path->state = new_state;
++ wake_up_locked(&clt_path->state_wq);
+ }
+
+ return changed;
+ }
+
+-static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess,
++static bool rtrs_clt_change_state_from_to(struct rtrs_clt_path *clt_path,
+ enum rtrs_clt_state old_state,
+ enum rtrs_clt_state new_state)
+ {
+ bool changed = false;
+
+- spin_lock_irq(&sess->state_wq.lock);
+- if (sess->state == old_state)
+- changed = rtrs_clt_change_state(sess, new_state);
+- spin_unlock_irq(&sess->state_wq.lock);
++ spin_lock_irq(&clt_path->state_wq.lock);
++ if (clt_path->state == old_state)
++ changed = rtrs_clt_change_state(clt_path, new_state);
++ spin_unlock_irq(&clt_path->state_wq.lock);
+
+ return changed;
+ }
+
+ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+
+- if (rtrs_clt_change_state_from_to(sess,
++ if (rtrs_clt_change_state_from_to(clt_path,
+ RTRS_CLT_CONNECTED,
+ RTRS_CLT_RECONNECTING)) {
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt *clt = clt_path->clt;
+ unsigned int delay_ms;
+
+ /*
+ * Normal scenario, reconnect if we were successfully connected
+ */
+ delay_ms = clt->reconnect_delay_sec * 1000;
+- queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
++ queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork,
+ msecs_to_jiffies(delay_ms +
+ prandom_u32() % RTRS_RECONNECT_SEED));
+ } else {
+@@ -319,7 +319,7 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con)
+ * so notify waiter with error state, waiter is responsible
+ * for cleaning the rest and reconnect if needed.
+ */
+- rtrs_clt_change_state_from_to(sess,
++ rtrs_clt_change_state_from_to(clt_path,
+ RTRS_CLT_CONNECTING,
+ RTRS_CLT_CONNECTING_ERR);
+ }
+@@ -330,7 +330,7 @@ static void rtrs_clt_fast_reg_done(struct ib_cq *cq, struct ib_wc *wc)
+ struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
+
+ if (wc->status != IB_WC_SUCCESS) {
+- rtrs_err(con->c.sess, "Failed IB_WR_REG_MR: %s\n",
++ rtrs_err(con->c.path, "Failed IB_WR_REG_MR: %s\n",
+ ib_wc_status_msg(wc->status));
+ rtrs_rdma_error_recovery(con);
+ }
+@@ -350,7 +350,7 @@ static void rtrs_clt_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
+ struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
+
+ if (wc->status != IB_WC_SUCCESS) {
+- rtrs_err(con->c.sess, "Failed IB_WR_LOCAL_INV: %s\n",
++ rtrs_err(con->c.path, "Failed IB_WR_LOCAL_INV: %s\n",
+ ib_wc_status_msg(wc->status));
+ rtrs_rdma_error_recovery(con);
+ }
+@@ -380,14 +380,14 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
+ bool notify, bool can_wait)
+ {
+ struct rtrs_clt_con *con = req->con;
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int err;
+
+ if (WARN_ON(!req->in_use))
+ return;
+ if (WARN_ON(!req->con))
+ return;
+- sess = to_clt_sess(con->c.sess);
++ clt_path = to_clt_path(con->c.path);
+
+ if (req->sg_cnt) {
+ if (req->dir == DMA_FROM_DEVICE && req->need_inv) {
+@@ -417,7 +417,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
+ refcount_inc(&req->ref);
+ err = rtrs_inv_rkey(req);
+ if (err) {
+- rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n",
++ rtrs_err(con->c.path, "Send INV WR key=%#x: %d\n",
+ req->mr->rkey, err);
+ } else if (can_wait) {
+ wait_for_completion(&req->inv_comp);
+@@ -433,21 +433,21 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
+ if (!refcount_dec_and_test(&req->ref))
+ return;
+ }
+- ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
++ ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist,
+ req->sg_cnt, req->dir);
+ }
+ if (!refcount_dec_and_test(&req->ref))
+ return;
+ if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
+- atomic_dec(&sess->stats->inflight);
++ atomic_dec(&clt_path->stats->inflight);
+
+ req->in_use = false;
+ req->con = NULL;
+
+ if (errno) {
+- rtrs_err_rl(con->c.sess, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n",
+- errno, kobject_name(&sess->kobj), sess->hca_name,
+- sess->hca_port, notify);
++ rtrs_err_rl(con->c.path, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n",
++ errno, kobject_name(&clt_path->kobj), clt_path->hca_name,
++ clt_path->hca_port, notify);
+ }
+
+ if (notify)
+@@ -459,12 +459,12 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
+ struct rtrs_rbuf *rbuf, u32 off,
+ u32 imm, struct ib_send_wr *wr)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ enum ib_send_flags flags;
+ struct ib_sge sge;
+
+ if (!req->sg_size) {
+- rtrs_wrn(con->c.sess,
++ rtrs_wrn(con->c.path,
+ "Doing RDMA Write failed, no data supplied\n");
+ return -EINVAL;
+ }
+@@ -472,16 +472,17 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
+ /* user data and user message in the first list element */
+ sge.addr = req->iu->dma_addr;
+ sge.length = req->sg_size;
+- sge.lkey = sess->s.dev->ib_pd->local_dma_lkey;
++ sge.lkey = clt_path->s.dev->ib_pd->local_dma_lkey;
+
+ /*
+ * From time to time we have to post signalled sends,
+ * or send queue will fill up and only QP reset can help.
+ */
+- flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ?
++ flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ?
+ 0 : IB_SEND_SIGNALED;
+
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr,
++ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev,
++ req->iu->dma_addr,
+ req->sg_size, DMA_TO_DEVICE);
+
+ return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, &sge, 1,
+@@ -489,15 +490,15 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con,
+ imm, flags, wr, NULL);
+ }
+
+-static void process_io_rsp(struct rtrs_clt_sess *sess, u32 msg_id,
++static void process_io_rsp(struct rtrs_clt_path *clt_path, u32 msg_id,
+ s16 errno, bool w_inval)
+ {
+ struct rtrs_clt_io_req *req;
+
+- if (WARN_ON(msg_id >= sess->queue_depth))
++ if (WARN_ON(msg_id >= clt_path->queue_depth))
+ return;
+
+- req = &sess->reqs[msg_id];
++ req = &clt_path->reqs[msg_id];
+ /* Drop need_inv if server responded with send with invalidation */
+ req->need_inv &= !w_inval;
+ complete_rdma_req(req, errno, true, false);
+@@ -507,21 +508,21 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc)
+ {
+ struct rtrs_iu *iu;
+ int err;
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+
+- WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
++ WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0);
+ iu = container_of(wc->wr_cqe, struct rtrs_iu,
+ cqe);
+ err = rtrs_iu_post_recv(&con->c, iu);
+ if (err) {
+- rtrs_err(con->c.sess, "post iu failed %d\n", err);
++ rtrs_err(con->c.path, "post iu failed %d\n", err);
+ rtrs_rdma_error_recovery(con);
+ }
+ }
+
+ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct rtrs_msg_rkey_rsp *msg;
+ u32 imm_type, imm_payload;
+ bool w_inval = false;
+@@ -529,25 +530,26 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
+ u32 buf_id;
+ int err;
+
+- WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0);
++ WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0);
+
+ iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
+
+ if (wc->byte_len < sizeof(*msg)) {
+- rtrs_err(con->c.sess, "rkey response is malformed: size %d\n",
++ rtrs_err(con->c.path, "rkey response is malformed: size %d\n",
+ wc->byte_len);
+ goto out;
+ }
+- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
++ ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr,
+ iu->size, DMA_FROM_DEVICE);
+ msg = iu->buf;
+ if (le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP) {
+- rtrs_err(sess->clt, "rkey response is malformed: type %d\n",
++ rtrs_err(clt_path->clt,
++ "rkey response is malformed: type %d\n",
+ le16_to_cpu(msg->type));
+ goto out;
+ }
+ buf_id = le16_to_cpu(msg->buf_id);
+- if (WARN_ON(buf_id >= sess->queue_depth))
++ if (WARN_ON(buf_id >= clt_path->queue_depth))
+ goto out;
+
+ rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload);
+@@ -560,10 +562,10 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc)
+
+ if (WARN_ON(buf_id != msg_id))
+ goto out;
+- sess->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey);
+- process_io_rsp(sess, msg_id, err, w_inval);
++ clt_path->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey);
++ process_io_rsp(clt_path, msg_id, err, w_inval);
+ }
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, iu->dma_addr,
++ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, iu->dma_addr,
+ iu->size, DMA_FROM_DEVICE);
+ return rtrs_clt_recv_done(con, wc);
+ out:
+@@ -600,14 +602,14 @@ static int rtrs_post_recv_empty_x2(struct rtrs_con *con, struct ib_cqe *cqe)
+ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+ struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ u32 imm_type, imm_payload;
+ bool w_inval = false;
+ int err;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+- rtrs_err(sess->clt, "RDMA failed: %s\n",
++ rtrs_err(clt_path->clt, "RDMA failed: %s\n",
+ ib_wc_status_msg(wc->status));
+ rtrs_rdma_error_recovery(con);
+ }
+@@ -632,21 +634,21 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM);
+ rtrs_from_io_rsp_imm(imm_payload, &msg_id, &err);
+
+- process_io_rsp(sess, msg_id, err, w_inval);
++ process_io_rsp(clt_path, msg_id, err, w_inval);
+ } else if (imm_type == RTRS_HB_MSG_IMM) {
+ WARN_ON(con->c.cid);
+- rtrs_send_hb_ack(&sess->s);
+- if (sess->flags & RTRS_MSG_NEW_RKEY_F)
++ rtrs_send_hb_ack(&clt_path->s);
++ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F)
+ return rtrs_clt_recv_done(con, wc);
+ } else if (imm_type == RTRS_HB_ACK_IMM) {
+ WARN_ON(con->c.cid);
+- sess->s.hb_missed_cnt = 0;
+- sess->s.hb_cur_latency =
+- ktime_sub(ktime_get(), sess->s.hb_last_sent);
+- if (sess->flags & RTRS_MSG_NEW_RKEY_F)
++ clt_path->s.hb_missed_cnt = 0;
++ clt_path->s.hb_cur_latency =
++ ktime_sub(ktime_get(), clt_path->s.hb_last_sent);
++ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F)
+ return rtrs_clt_recv_done(con, wc);
+ } else {
+- rtrs_wrn(con->c.sess, "Unknown IMM type %u\n",
++ rtrs_wrn(con->c.path, "Unknown IMM type %u\n",
+ imm_type);
+ }
+ if (w_inval)
+@@ -658,7 +660,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ else
+ err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
+ if (err) {
+- rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n",
++ rtrs_err(con->c.path, "rtrs_post_recv_empty(): %d\n",
+ err);
+ rtrs_rdma_error_recovery(con);
+ }
+@@ -670,7 +672,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE ||
+ wc->wc_flags & IB_WC_WITH_IMM));
+ WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done);
+- if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
++ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) {
+ if (wc->wc_flags & IB_WC_WITH_INVALIDATE)
+ return rtrs_clt_recv_done(con, wc);
+
+@@ -685,7 +687,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ break;
+
+ default:
+- rtrs_wrn(sess->clt, "Unexpected WC type: %d\n", wc->opcode);
++ rtrs_wrn(clt_path->clt, "Unexpected WC type: %d\n", wc->opcode);
+ return;
+ }
+ }
+@@ -693,10 +695,10 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size)
+ {
+ int err, i;
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+
+ for (i = 0; i < q_size; i++) {
+- if (sess->flags & RTRS_MSG_NEW_RKEY_F) {
++ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) {
+ struct rtrs_iu *iu = &con->rsp_ius[i];
+
+ err = rtrs_iu_post_recv(&con->c, iu);
+@@ -710,16 +712,16 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size)
+ return 0;
+ }
+
+-static int post_recv_sess(struct rtrs_clt_sess *sess)
++static int post_recv_path(struct rtrs_clt_path *clt_path)
+ {
+ size_t q_size = 0;
+ int err, cid;
+
+- for (cid = 0; cid < sess->s.con_num; cid++) {
++ for (cid = 0; cid < clt_path->s.con_num; cid++) {
+ if (cid == 0)
+ q_size = SERVICE_CON_QUEUE_DEPTH;
+ else
+- q_size = sess->queue_depth;
++ q_size = clt_path->queue_depth;
+
+ /*
+ * x2 for RDMA read responses + FR key invalidations,
+@@ -727,9 +729,10 @@ static int post_recv_sess(struct rtrs_clt_sess *sess)
+ */
+ q_size *= 2;
+
+- err = post_recv_io(to_clt_con(sess->s.con[cid]), q_size);
++ err = post_recv_io(to_clt_con(clt_path->s.con[cid]), q_size);
+ if (err) {
+- rtrs_err(sess->clt, "post_recv_io(), err: %d\n", err);
++ rtrs_err(clt_path->clt, "post_recv_io(), err: %d\n",
++ err);
+ return err;
+ }
+ }
+@@ -741,28 +744,28 @@ struct path_it {
+ int i;
+ struct list_head skip_list;
+ struct rtrs_clt *clt;
+- struct rtrs_clt_sess *(*next_path)(struct path_it *it);
++ struct rtrs_clt_path *(*next_path)(struct path_it *it);
+ };
+
+-/**
+- * list_next_or_null_rr_rcu - get next list element in round-robin fashion.
++/*
++ * rtrs_clt_get_next_path_or_null - get clt path from the list or return NULL
+ * @head: the head for the list.
+- * @ptr: the list head to take the next element from.
+- * @type: the type of the struct this is embedded in.
+- * @memb: the name of the list_head within the struct.
++ * @clt_path: The element to take the next clt_path from.
+ *
+- * Next element returned in round-robin fashion, i.e. head will be skipped,
++ * Next clt path returned in round-robin fashion, i.e. head will be skipped,
+ * but if list is observed as empty, NULL will be returned.
+ *
+- * This primitive may safely run concurrently with the _rcu list-mutation
++ * This function may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+-#define list_next_or_null_rr_rcu(head, ptr, type, memb) \
+-({ \
+- list_next_or_null_rcu(head, ptr, type, memb) ?: \
+- list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \
+- type, memb); \
+-})
++static inline struct rtrs_clt_path *
++rtrs_clt_get_next_path_or_null(struct list_head *head, struct rtrs_clt_path *clt_path)
++{
++ return list_next_or_null_rcu(head, &clt_path->s.entry, typeof(*clt_path), s.entry) ?:
++ list_next_or_null_rcu(head,
++ READ_ONCE((&clt_path->s.entry)->next),
++ typeof(*clt_path), s.entry);
++}
+
+ /**
+ * get_next_path_rr() - Returns path in round-robin fashion.
+@@ -773,10 +776,10 @@ struct path_it {
+ * Locks:
+ * rcu_read_lock() must be hold.
+ */
+-static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it)
++static struct rtrs_clt_path *get_next_path_rr(struct path_it *it)
+ {
+- struct rtrs_clt_sess __rcu **ppcpu_path;
+- struct rtrs_clt_sess *path;
++ struct rtrs_clt_path __rcu **ppcpu_path;
++ struct rtrs_clt_path *path;
+ struct rtrs_clt *clt;
+
+ clt = it->clt;
+@@ -793,10 +796,8 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it)
+ path = list_first_or_null_rcu(&clt->paths_list,
+ typeof(*path), s.entry);
+ else
+- path = list_next_or_null_rr_rcu(&clt->paths_list,
+- &path->s.entry,
+- typeof(*path),
+- s.entry);
++ path = rtrs_clt_get_next_path_or_null(&clt->paths_list, path);
++
+ rcu_assign_pointer(*ppcpu_path, path);
+
+ return path;
+@@ -811,26 +812,26 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it)
+ * Locks:
+ * rcu_read_lock() must be hold.
+ */
+-static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it)
++static struct rtrs_clt_path *get_next_path_min_inflight(struct path_it *it)
+ {
+- struct rtrs_clt_sess *min_path = NULL;
++ struct rtrs_clt_path *min_path = NULL;
+ struct rtrs_clt *clt = it->clt;
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int min_inflight = INT_MAX;
+ int inflight;
+
+- list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) {
+- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
++ list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) {
++ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
+ continue;
+
+- if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))
++ if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry)))
+ continue;
+
+- inflight = atomic_read(&sess->stats->inflight);
++ inflight = atomic_read(&clt_path->stats->inflight);
+
+ if (inflight < min_inflight) {
+ min_inflight = inflight;
+- min_path = sess;
++ min_path = clt_path;
+ }
+ }
+
+@@ -862,26 +863,26 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it)
+ * Therefore the caller MUST check the returned
+ * path is NULL and trigger the IO error.
+ */
+-static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it)
++static struct rtrs_clt_path *get_next_path_min_latency(struct path_it *it)
+ {
+- struct rtrs_clt_sess *min_path = NULL;
++ struct rtrs_clt_path *min_path = NULL;
+ struct rtrs_clt *clt = it->clt;
+- struct rtrs_clt_sess *sess;
+- ktime_t min_latency = INT_MAX;
++ struct rtrs_clt_path *clt_path;
++ ktime_t min_latency = KTIME_MAX;
+ ktime_t latency;
+
+- list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) {
+- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
++ list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) {
++ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
+ continue;
+
+- if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))
++ if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry)))
+ continue;
+
+- latency = sess->s.hb_cur_latency;
++ latency = clt_path->s.hb_cur_latency;
+
+ if (latency < min_latency) {
+ min_latency = latency;
+- min_path = sess;
++ min_path = clt_path;
+ }
+ }
+
+@@ -928,7 +929,7 @@ static inline void path_it_deinit(struct path_it *it)
+ * the corresponding buffer of rtrs_iu (req->iu->buf), which later on will
+ * also hold the control message of rtrs.
+ * @req: an io request holding information about IO.
+- * @sess: client session
++ * @clt_path: client path
+ * @conf: conformation callback function to notify upper layer.
+ * @permit: permit for allocation of RDMA remote buffer
+ * @priv: private pointer
+@@ -940,7 +941,7 @@ static inline void path_it_deinit(struct path_it *it)
+ * @dir: direction of the IO.
+ */
+ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
+- struct rtrs_clt_sess *sess,
++ struct rtrs_clt_path *clt_path,
+ void (*conf)(void *priv, int errno),
+ struct rtrs_permit *permit, void *priv,
+ const struct kvec *vec, size_t usr_len,
+@@ -958,15 +959,15 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
+ req->sg_cnt = sg_cnt;
+ req->priv = priv;
+ req->dir = dir;
+- req->con = rtrs_permit_to_clt_con(sess, permit);
++ req->con = rtrs_permit_to_clt_con(clt_path, permit);
+ req->conf = conf;
+ req->need_inv = false;
+ req->need_inv_comp = false;
+ req->inv_errno = 0;
+ refcount_set(&req->ref, 1);
+- req->mp_policy = sess->clt->mp_policy;
++ req->mp_policy = clt_path->clt->mp_policy;
+
+- iov_iter_kvec(&iter, READ, vec, 1, usr_len);
++ iov_iter_kvec(&iter, WRITE, vec, 1, usr_len);
+ len = _copy_from_iter(req->iu->buf, usr_len, &iter);
+ WARN_ON(len != usr_len);
+
+@@ -974,7 +975,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
+ }
+
+ static struct rtrs_clt_io_req *
+-rtrs_clt_get_req(struct rtrs_clt_sess *sess,
++rtrs_clt_get_req(struct rtrs_clt_path *clt_path,
+ void (*conf)(void *priv, int errno),
+ struct rtrs_permit *permit, void *priv,
+ const struct kvec *vec, size_t usr_len,
+@@ -983,14 +984,14 @@ rtrs_clt_get_req(struct rtrs_clt_sess *sess,
+ {
+ struct rtrs_clt_io_req *req;
+
+- req = &sess->reqs[permit->mem_id];
+- rtrs_clt_init_req(req, sess, conf, permit, priv, vec, usr_len,
++ req = &clt_path->reqs[permit->mem_id];
++ rtrs_clt_init_req(req, clt_path, conf, permit, priv, vec, usr_len,
+ sg, sg_cnt, data_len, dir);
+ return req;
+ }
+
+ static struct rtrs_clt_io_req *
+-rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess,
++rtrs_clt_get_copy_req(struct rtrs_clt_path *alive_path,
+ struct rtrs_clt_io_req *fail_req)
+ {
+ struct rtrs_clt_io_req *req;
+@@ -999,8 +1000,8 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess,
+ .iov_len = fail_req->usr_len
+ };
+
+- req = &alive_sess->reqs[fail_req->permit->mem_id];
+- rtrs_clt_init_req(req, alive_sess, fail_req->conf, fail_req->permit,
++ req = &alive_path->reqs[fail_req->permit->mem_id];
++ rtrs_clt_init_req(req, alive_path, fail_req->conf, fail_req->permit,
+ fail_req->priv, &vec, fail_req->usr_len,
+ fail_req->sglist, fail_req->sg_cnt,
+ fail_req->data_len, fail_req->dir);
+@@ -1010,10 +1011,11 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess,
+ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
+ struct rtrs_clt_io_req *req,
+ struct rtrs_rbuf *rbuf, bool fr_en,
+- u32 size, u32 imm, struct ib_send_wr *wr,
++ u32 count, u32 size, u32 imm,
++ struct ib_send_wr *wr,
+ struct ib_send_wr *tail)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct ib_sge *sge = req->sge;
+ enum ib_send_flags flags;
+ struct scatterlist *sg;
+@@ -1030,25 +1032,26 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
+ num_sge = 2;
+ ptail = tail;
+ } else {
+- for_each_sg(req->sglist, sg, req->sg_cnt, i) {
++ for_each_sg(req->sglist, sg, count, i) {
+ sge[i].addr = sg_dma_address(sg);
+ sge[i].length = sg_dma_len(sg);
+- sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey;
++ sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey;
+ }
+- num_sge = 1 + req->sg_cnt;
++ num_sge = 1 + count;
+ }
+ sge[i].addr = req->iu->dma_addr;
+ sge[i].length = size;
+- sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey;
++ sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey;
+
+ /*
+ * From time to time we have to post signalled sends,
+ * or send queue will fill up and only QP reset can help.
+ */
+- flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ?
++ flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ?
+ 0 : IB_SEND_SIGNALED;
+
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr,
++ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev,
++ req->iu->dma_addr,
+ size, DMA_TO_DEVICE);
+
+ return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, sge, num_sge,
+@@ -1074,8 +1077,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
+ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
+ {
+ struct rtrs_clt_con *con = req->con;
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_clt_sess *sess = to_clt_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_clt_path *clt_path = to_clt_path(s);
+ struct rtrs_msg_rdma_write *msg;
+
+ struct rtrs_rbuf *rbuf;
+@@ -1088,13 +1091,13 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
+
+ const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len;
+
+- if (tsize > sess->chunk_size) {
++ if (tsize > clt_path->chunk_size) {
+ rtrs_wrn(s, "Write request failed, size too big %zu > %d\n",
+- tsize, sess->chunk_size);
++ tsize, clt_path->chunk_size);
+ return -EMSGSIZE;
+ }
+ if (req->sg_cnt) {
+- count = ib_dma_map_sg(sess->s.dev->ib_dev, req->sglist,
++ count = ib_dma_map_sg(clt_path->s.dev->ib_dev, req->sglist,
+ req->sg_cnt, req->dir);
+ if (!count) {
+ rtrs_wrn(s, "Write request failed, map failed\n");
+@@ -1111,7 +1114,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
+ imm = rtrs_to_io_req_imm(imm);
+ buf_id = req->permit->mem_id;
+ req->sg_size = tsize;
+- rbuf = &sess->rbufs[buf_id];
++ rbuf = &clt_path->rbufs[buf_id];
+
+ if (count) {
+ ret = rtrs_map_sg_fr(req, count);
+@@ -1119,7 +1122,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
+ rtrs_err_rl(s,
+ "Write request failed, failed to map fast reg. data, err: %d\n",
+ ret);
+- ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
++ ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist,
+ req->sg_cnt, req->dir);
+ return ret;
+ }
+@@ -1147,18 +1150,18 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
+ */
+ rtrs_clt_update_all_stats(req, WRITE);
+
+- ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en,
++ ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, count,
+ req->usr_len + sizeof(*msg),
+ imm, wr, &inv_wr);
+ if (ret) {
+ rtrs_err_rl(s,
+ "Write request failed: error=%d path=%s [%s:%u]\n",
+- ret, kobject_name(&sess->kobj), sess->hca_name,
+- sess->hca_port);
++ ret, kobject_name(&clt_path->kobj), clt_path->hca_name,
++ clt_path->hca_port);
+ if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
+- atomic_dec(&sess->stats->inflight);
++ atomic_dec(&clt_path->stats->inflight);
+ if (req->sg_cnt)
+- ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
++ ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist,
+ req->sg_cnt, req->dir);
+ }
+
+@@ -1168,10 +1171,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
+ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
+ {
+ struct rtrs_clt_con *con = req->con;
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_clt_sess *sess = to_clt_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_clt_path *clt_path = to_clt_path(s);
+ struct rtrs_msg_rdma_read *msg;
+- struct rtrs_ib_dev *dev = sess->s.dev;
++ struct rtrs_ib_dev *dev = clt_path->s.dev;
+
+ struct ib_reg_wr rwr;
+ struct ib_send_wr *wr = NULL;
+@@ -1181,10 +1184,10 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
+
+ const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len;
+
+- if (tsize > sess->chunk_size) {
++ if (tsize > clt_path->chunk_size) {
+ rtrs_wrn(s,
+ "Read request failed, message size is %zu, bigger than CHUNK_SIZE %d\n",
+- tsize, sess->chunk_size);
++ tsize, clt_path->chunk_size);
+ return -EMSGSIZE;
+ }
+
+@@ -1254,15 +1257,15 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
+ */
+ rtrs_clt_update_all_stats(req, READ);
+
+- ret = rtrs_post_send_rdma(req->con, req, &sess->rbufs[buf_id],
++ ret = rtrs_post_send_rdma(req->con, req, &clt_path->rbufs[buf_id],
+ req->data_len, imm, wr);
+ if (ret) {
+ rtrs_err_rl(s,
+ "Read request failed: error=%d path=%s [%s:%u]\n",
+- ret, kobject_name(&sess->kobj), sess->hca_name,
+- sess->hca_port);
++ ret, kobject_name(&clt_path->kobj), clt_path->hca_name,
++ clt_path->hca_port);
+ if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
+- atomic_dec(&sess->stats->inflight);
++ atomic_dec(&clt_path->stats->inflight);
+ req->need_inv = false;
+ if (req->sg_cnt)
+ ib_dma_unmap_sg(dev->ib_dev, req->sglist,
+@@ -1280,18 +1283,18 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
+ static int rtrs_clt_failover_req(struct rtrs_clt *clt,
+ struct rtrs_clt_io_req *fail_req)
+ {
+- struct rtrs_clt_sess *alive_sess;
++ struct rtrs_clt_path *alive_path;
+ struct rtrs_clt_io_req *req;
+ int err = -ECONNABORTED;
+ struct path_it it;
+
+ rcu_read_lock();
+ for (path_it_init(&it, clt);
+- (alive_sess = it.next_path(&it)) && it.i < it.clt->paths_num;
++ (alive_path = it.next_path(&it)) && it.i < it.clt->paths_num;
+ it.i++) {
+- if (READ_ONCE(alive_sess->state) != RTRS_CLT_CONNECTED)
++ if (READ_ONCE(alive_path->state) != RTRS_CLT_CONNECTED)
+ continue;
+- req = rtrs_clt_get_copy_req(alive_sess, fail_req);
++ req = rtrs_clt_get_copy_req(alive_path, fail_req);
+ if (req->dir == DMA_TO_DEVICE)
+ err = rtrs_clt_write_req(req);
+ else
+@@ -1301,7 +1304,7 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt,
+ continue;
+ }
+ /* Success path */
+- rtrs_clt_inc_failover_cnt(alive_sess->stats);
++ rtrs_clt_inc_failover_cnt(alive_path->stats);
+ break;
+ }
+ path_it_deinit(&it);
+@@ -1310,16 +1313,16 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt,
+ return err;
+ }
+
+-static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess)
++static void fail_all_outstanding_reqs(struct rtrs_clt_path *clt_path)
+ {
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt *clt = clt_path->clt;
+ struct rtrs_clt_io_req *req;
+ int i, err;
+
+- if (!sess->reqs)
++ if (!clt_path->reqs)
+ return;
+- for (i = 0; i < sess->queue_depth; ++i) {
+- req = &sess->reqs[i];
++ for (i = 0; i < clt_path->queue_depth; ++i) {
++ req = &clt_path->reqs[i];
+ if (!req->in_use)
+ continue;
+
+@@ -1337,38 +1340,39 @@ static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess)
+ }
+ }
+
+-static void free_sess_reqs(struct rtrs_clt_sess *sess)
++static void free_path_reqs(struct rtrs_clt_path *clt_path)
+ {
+ struct rtrs_clt_io_req *req;
+ int i;
+
+- if (!sess->reqs)
++ if (!clt_path->reqs)
+ return;
+- for (i = 0; i < sess->queue_depth; ++i) {
+- req = &sess->reqs[i];
++ for (i = 0; i < clt_path->queue_depth; ++i) {
++ req = &clt_path->reqs[i];
+ if (req->mr)
+ ib_dereg_mr(req->mr);
+ kfree(req->sge);
+- rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(req->iu, clt_path->s.dev->ib_dev, 1);
+ }
+- kfree(sess->reqs);
+- sess->reqs = NULL;
++ kfree(clt_path->reqs);
++ clt_path->reqs = NULL;
+ }
+
+-static int alloc_sess_reqs(struct rtrs_clt_sess *sess)
++static int alloc_path_reqs(struct rtrs_clt_path *clt_path)
+ {
+ struct rtrs_clt_io_req *req;
+ int i, err = -ENOMEM;
+
+- sess->reqs = kcalloc(sess->queue_depth, sizeof(*sess->reqs),
+- GFP_KERNEL);
+- if (!sess->reqs)
++ clt_path->reqs = kcalloc(clt_path->queue_depth,
++ sizeof(*clt_path->reqs),
++ GFP_KERNEL);
++ if (!clt_path->reqs)
+ return -ENOMEM;
+
+- for (i = 0; i < sess->queue_depth; ++i) {
+- req = &sess->reqs[i];
+- req->iu = rtrs_iu_alloc(1, sess->max_hdr_size, GFP_KERNEL,
+- sess->s.dev->ib_dev,
++ for (i = 0; i < clt_path->queue_depth; ++i) {
++ req = &clt_path->reqs[i];
++ req->iu = rtrs_iu_alloc(1, clt_path->max_hdr_size, GFP_KERNEL,
++ clt_path->s.dev->ib_dev,
+ DMA_TO_DEVICE,
+ rtrs_clt_rdma_done);
+ if (!req->iu)
+@@ -1378,13 +1382,14 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess)
+ if (!req->sge)
+ goto out;
+
+- req->mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
+- sess->max_pages_per_mr);
++ req->mr = ib_alloc_mr(clt_path->s.dev->ib_pd,
++ IB_MR_TYPE_MEM_REG,
++ clt_path->max_pages_per_mr);
+ if (IS_ERR(req->mr)) {
+ err = PTR_ERR(req->mr);
+ req->mr = NULL;
+- pr_err("Failed to alloc sess->max_pages_per_mr %d\n",
+- sess->max_pages_per_mr);
++ pr_err("Failed to alloc clt_path->max_pages_per_mr %d\n",
++ clt_path->max_pages_per_mr);
+ goto out;
+ }
+
+@@ -1394,7 +1399,7 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess)
+ return 0;
+
+ out:
+- free_sess_reqs(sess);
++ free_path_reqs(clt_path);
+
+ return err;
+ }
+@@ -1447,13 +1452,13 @@ static void free_permits(struct rtrs_clt *clt)
+ clt->permits = NULL;
+ }
+
+-static void query_fast_reg_mode(struct rtrs_clt_sess *sess)
++static void query_fast_reg_mode(struct rtrs_clt_path *clt_path)
+ {
+ struct ib_device *ib_dev;
+ u64 max_pages_per_mr;
+ int mr_page_shift;
+
+- ib_dev = sess->s.dev->ib_dev;
++ ib_dev = clt_path->s.dev->ib_dev;
+
+ /*
+ * Use the smallest page size supported by the HCA, down to a
+@@ -1463,24 +1468,24 @@ static void query_fast_reg_mode(struct rtrs_clt_sess *sess)
+ mr_page_shift = max(12, ffs(ib_dev->attrs.page_size_cap) - 1);
+ max_pages_per_mr = ib_dev->attrs.max_mr_size;
+ do_div(max_pages_per_mr, (1ull << mr_page_shift));
+- sess->max_pages_per_mr =
+- min3(sess->max_pages_per_mr, (u32)max_pages_per_mr,
++ clt_path->max_pages_per_mr =
++ min3(clt_path->max_pages_per_mr, (u32)max_pages_per_mr,
+ ib_dev->attrs.max_fast_reg_page_list_len);
+- sess->clt->max_segments =
+- min(sess->max_pages_per_mr, sess->clt->max_segments);
++ clt_path->clt->max_segments =
++ min(clt_path->max_pages_per_mr, clt_path->clt->max_segments);
+ }
+
+-static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess,
++static bool rtrs_clt_change_state_get_old(struct rtrs_clt_path *clt_path,
+ enum rtrs_clt_state new_state,
+ enum rtrs_clt_state *old_state)
+ {
+ bool changed;
+
+- spin_lock_irq(&sess->state_wq.lock);
++ spin_lock_irq(&clt_path->state_wq.lock);
+ if (old_state)
+- *old_state = sess->state;
+- changed = rtrs_clt_change_state(sess, new_state);
+- spin_unlock_irq(&sess->state_wq.lock);
++ *old_state = clt_path->state;
++ changed = rtrs_clt_change_state(clt_path, new_state);
++ spin_unlock_irq(&clt_path->state_wq.lock);
+
+ return changed;
+ }
+@@ -1492,9 +1497,9 @@ static void rtrs_clt_hb_err_handler(struct rtrs_con *c)
+ rtrs_rdma_error_recovery(con);
+ }
+
+-static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess)
++static void rtrs_clt_init_hb(struct rtrs_clt_path *clt_path)
+ {
+- rtrs_init_hb(&sess->s, &io_comp_cqe,
++ rtrs_init_hb(&clt_path->s, &io_comp_cqe,
+ RTRS_HB_INTERVAL_MS,
+ RTRS_HB_MISSED_MAX,
+ rtrs_clt_hb_err_handler,
+@@ -1504,17 +1509,17 @@ static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess)
+ static void rtrs_clt_reconnect_work(struct work_struct *work);
+ static void rtrs_clt_close_work(struct work_struct *work);
+
+-static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
++static struct rtrs_clt_path *alloc_path(struct rtrs_clt *clt,
+ const struct rtrs_addr *path,
+ size_t con_num, u32 nr_poll_queues)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int err = -ENOMEM;
+ int cpu;
+ size_t total_con;
+
+- sess = kzalloc(sizeof(*sess), GFP_KERNEL);
+- if (!sess)
++ clt_path = kzalloc(sizeof(*clt_path), GFP_KERNEL);
++ if (!clt_path)
+ goto err;
+
+ /*
+@@ -1522,20 +1527,21 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
+ * +1: Extra connection for user messages
+ */
+ total_con = con_num + nr_poll_queues + 1;
+- sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
+- if (!sess->s.con)
+- goto err_free_sess;
++ clt_path->s.con = kcalloc(total_con, sizeof(*clt_path->s.con),
++ GFP_KERNEL);
++ if (!clt_path->s.con)
++ goto err_free_path;
+
+- sess->s.con_num = total_con;
+- sess->s.irq_con_num = con_num + 1;
++ clt_path->s.con_num = total_con;
++ clt_path->s.irq_con_num = con_num + 1;
+
+- sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
+- if (!sess->stats)
++ clt_path->stats = kzalloc(sizeof(*clt_path->stats), GFP_KERNEL);
++ if (!clt_path->stats)
+ goto err_free_con;
+
+- mutex_init(&sess->init_mutex);
+- uuid_gen(&sess->s.uuid);
+- memcpy(&sess->s.dst_addr, path->dst,
++ mutex_init(&clt_path->init_mutex);
++ uuid_gen(&clt_path->s.uuid);
++ memcpy(&clt_path->s.dst_addr, path->dst,
+ rdma_addr_size((struct sockaddr *)path->dst));
+
+ /*
+@@ -1544,53 +1550,54 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
+ * the sess->src_addr will contain only zeros, which is then fine.
+ */
+ if (path->src)
+- memcpy(&sess->s.src_addr, path->src,
++ memcpy(&clt_path->s.src_addr, path->src,
+ rdma_addr_size((struct sockaddr *)path->src));
+- strscpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
+- sess->clt = clt;
+- sess->max_pages_per_mr = RTRS_MAX_SEGMENTS;
+- init_waitqueue_head(&sess->state_wq);
+- sess->state = RTRS_CLT_CONNECTING;
+- atomic_set(&sess->connected_cnt, 0);
+- INIT_WORK(&sess->close_work, rtrs_clt_close_work);
+- INIT_DELAYED_WORK(&sess->reconnect_dwork, rtrs_clt_reconnect_work);
+- rtrs_clt_init_hb(sess);
+-
+- sess->mp_skip_entry = alloc_percpu(typeof(*sess->mp_skip_entry));
+- if (!sess->mp_skip_entry)
++ strscpy(clt_path->s.sessname, clt->sessname,
++ sizeof(clt_path->s.sessname));
++ clt_path->clt = clt;
++ clt_path->max_pages_per_mr = RTRS_MAX_SEGMENTS;
++ init_waitqueue_head(&clt_path->state_wq);
++ clt_path->state = RTRS_CLT_CONNECTING;
++ atomic_set(&clt_path->connected_cnt, 0);
++ INIT_WORK(&clt_path->close_work, rtrs_clt_close_work);
++ INIT_DELAYED_WORK(&clt_path->reconnect_dwork, rtrs_clt_reconnect_work);
++ rtrs_clt_init_hb(clt_path);
++
++ clt_path->mp_skip_entry = alloc_percpu(typeof(*clt_path->mp_skip_entry));
++ if (!clt_path->mp_skip_entry)
+ goto err_free_stats;
+
+ for_each_possible_cpu(cpu)
+- INIT_LIST_HEAD(per_cpu_ptr(sess->mp_skip_entry, cpu));
++ INIT_LIST_HEAD(per_cpu_ptr(clt_path->mp_skip_entry, cpu));
+
+- err = rtrs_clt_init_stats(sess->stats);
++ err = rtrs_clt_init_stats(clt_path->stats);
+ if (err)
+ goto err_free_percpu;
+
+- return sess;
++ return clt_path;
+
+ err_free_percpu:
+- free_percpu(sess->mp_skip_entry);
++ free_percpu(clt_path->mp_skip_entry);
+ err_free_stats:
+- kfree(sess->stats);
++ kfree(clt_path->stats);
+ err_free_con:
+- kfree(sess->s.con);
+-err_free_sess:
+- kfree(sess);
++ kfree(clt_path->s.con);
++err_free_path:
++ kfree(clt_path);
+ err:
+ return ERR_PTR(err);
+ }
+
+-void free_sess(struct rtrs_clt_sess *sess)
++void free_path(struct rtrs_clt_path *clt_path)
+ {
+- free_percpu(sess->mp_skip_entry);
+- mutex_destroy(&sess->init_mutex);
+- kfree(sess->s.con);
+- kfree(sess->rbufs);
+- kfree(sess);
++ free_percpu(clt_path->mp_skip_entry);
++ mutex_destroy(&clt_path->init_mutex);
++ kfree(clt_path->s.con);
++ kfree(clt_path->rbufs);
++ kfree(clt_path);
+ }
+
+-static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
++static int create_con(struct rtrs_clt_path *clt_path, unsigned int cid)
+ {
+ struct rtrs_clt_con *con;
+
+@@ -1601,28 +1608,28 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
+ /* Map first two connections to the first CPU */
+ con->cpu = (cid ? cid - 1 : 0) % nr_cpu_ids;
+ con->c.cid = cid;
+- con->c.sess = &sess->s;
++ con->c.path = &clt_path->s;
+ /* Align with srv, init as 1 */
+ atomic_set(&con->c.wr_cnt, 1);
+ mutex_init(&con->con_mutex);
+
+- sess->s.con[cid] = &con->c;
++ clt_path->s.con[cid] = &con->c;
+
+ return 0;
+ }
+
+ static void destroy_con(struct rtrs_clt_con *con)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+
+- sess->s.con[con->c.cid] = NULL;
++ clt_path->s.con[con->c.cid] = NULL;
+ mutex_destroy(&con->con_mutex);
+ kfree(con);
+ }
+
+ static int create_con_cq_qp(struct rtrs_clt_con *con)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ u32 max_send_wr, max_recv_wr, cq_num, max_send_sge, wr_limit;
+ int err, cq_vector;
+ struct rtrs_msg_rkey_rsp *rsp;
+@@ -1631,7 +1638,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
+ if (con->c.cid == 0) {
+ max_send_sge = 1;
+ /* We must be the first here */
+- if (WARN_ON(sess->s.dev))
++ if (WARN_ON(clt_path->s.dev))
+ return -EINVAL;
+
+ /*
+@@ -1639,16 +1646,16 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
+ * Be careful not to close user connection before ib dev
+ * is gracefully put.
+ */
+- sess->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device,
++ clt_path->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device,
+ &dev_pd);
+- if (!sess->s.dev) {
+- rtrs_wrn(sess->clt,
++ if (!clt_path->s.dev) {
++ rtrs_wrn(clt_path->clt,
+ "rtrs_ib_dev_find_get_or_add(): no memory\n");
+ return -ENOMEM;
+ }
+- sess->s.dev_ref = 1;
+- query_fast_reg_mode(sess);
+- wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr;
++ clt_path->s.dev_ref = 1;
++ query_fast_reg_mode(clt_path);
++ wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr;
+ /*
+ * Two (request + registration) completion for send
+ * Two for recv if always_invalidate is set on server
+@@ -1665,27 +1672,28 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
+ * This is always true if user connection (cid == 0) is
+ * established first.
+ */
+- if (WARN_ON(!sess->s.dev))
++ if (WARN_ON(!clt_path->s.dev))
+ return -EINVAL;
+- if (WARN_ON(!sess->queue_depth))
++ if (WARN_ON(!clt_path->queue_depth))
+ return -EINVAL;
+
+- wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr;
++ wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr;
+ /* Shared between connections */
+- sess->s.dev_ref++;
++ clt_path->s.dev_ref++;
+ max_send_wr = min_t(int, wr_limit,
+ /* QD * (REQ + RSP + FR REGS or INVS) + drain */
+- sess->queue_depth * 3 + 1);
++ clt_path->queue_depth * 3 + 1);
+ max_recv_wr = min_t(int, wr_limit,
+- sess->queue_depth * 3 + 1);
++ clt_path->queue_depth * 3 + 1);
+ max_send_sge = 2;
+ }
+ atomic_set(&con->c.sq_wr_avail, max_send_wr);
+ cq_num = max_send_wr + max_recv_wr;
+ /* alloc iu to recv new rkey reply when server reports flags set */
+- if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
++ if (clt_path->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) {
+ con->rsp_ius = rtrs_iu_alloc(cq_num, sizeof(*rsp),
+- GFP_KERNEL, sess->s.dev->ib_dev,
++ GFP_KERNEL,
++ clt_path->s.dev->ib_dev,
+ DMA_FROM_DEVICE,
+ rtrs_clt_rdma_done);
+ if (!con->rsp_ius)
+@@ -1693,13 +1701,13 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
+ con->queue_num = cq_num;
+ }
+ cq_num = max_send_wr + max_recv_wr;
+- cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
+- if (con->c.cid >= sess->s.irq_con_num)
+- err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge,
++ cq_vector = con->cpu % clt_path->s.dev->ib_dev->num_comp_vectors;
++ if (con->c.cid >= clt_path->s.irq_con_num)
++ err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge,
+ cq_vector, cq_num, max_send_wr,
+ max_recv_wr, IB_POLL_DIRECT);
+ else
+- err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge,
++ err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge,
+ cq_vector, cq_num, max_send_wr,
+ max_recv_wr, IB_POLL_SOFTIRQ);
+ /*
+@@ -1711,7 +1719,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
+
+ static void destroy_con_cq_qp(struct rtrs_clt_con *con)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+
+ /*
+ * Be careful here: destroy_con_cq_qp() can be called even
+@@ -1720,13 +1728,14 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con)
+ lockdep_assert_held(&con->con_mutex);
+ rtrs_cq_qp_destroy(&con->c);
+ if (con->rsp_ius) {
+- rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_num);
++ rtrs_iu_free(con->rsp_ius, clt_path->s.dev->ib_dev,
++ con->queue_num);
+ con->rsp_ius = NULL;
+ con->queue_num = 0;
+ }
+- if (sess->s.dev_ref && !--sess->s.dev_ref) {
+- rtrs_ib_dev_put(sess->s.dev);
+- sess->s.dev = NULL;
++ if (clt_path->s.dev_ref && !--clt_path->s.dev_ref) {
++ rtrs_ib_dev_put(clt_path->s.dev);
++ clt_path->s.dev = NULL;
+ }
+ }
+
+@@ -1745,7 +1754,7 @@ static void destroy_cm(struct rtrs_clt_con *con)
+
+ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
+ {
+- struct rtrs_sess *s = con->c.sess;
++ struct rtrs_path *s = con->c.path;
+ int err;
+
+ mutex_lock(&con->con_mutex);
+@@ -1764,8 +1773,8 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
+
+ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
++ struct rtrs_clt *clt = clt_path->clt;
+ struct rtrs_msg_conn_req msg;
+ struct rdma_conn_param param;
+
+@@ -1782,11 +1791,11 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
+ .magic = cpu_to_le16(RTRS_MAGIC),
+ .version = cpu_to_le16(RTRS_PROTO_VER),
+ .cid = cpu_to_le16(con->c.cid),
+- .cid_num = cpu_to_le16(sess->s.con_num),
+- .recon_cnt = cpu_to_le16(sess->s.recon_cnt),
++ .cid_num = cpu_to_le16(clt_path->s.con_num),
++ .recon_cnt = cpu_to_le16(clt_path->s.recon_cnt),
+ };
+- msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0;
+- uuid_copy(&msg.sess_uuid, &sess->s.uuid);
++ msg.first_conn = clt_path->for_new_clt ? FIRST_CONN : 0;
++ uuid_copy(&msg.sess_uuid, &clt_path->s.uuid);
+ uuid_copy(&msg.paths_uuid, &clt->paths_uuid);
+
+ err = rdma_connect_locked(con->c.cm_id, &param);
+@@ -1799,8 +1808,8 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
+ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
+ struct rdma_cm_event *ev)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
++ struct rtrs_clt *clt = clt_path->clt;
+ const struct rtrs_msg_conn_rsp *msg;
+ u16 version, queue_depth;
+ int errno;
+@@ -1831,31 +1840,32 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
+ if (con->c.cid == 0) {
+ queue_depth = le16_to_cpu(msg->queue_depth);
+
+- if (sess->queue_depth > 0 && queue_depth != sess->queue_depth) {
++ if (clt_path->queue_depth > 0 && queue_depth != clt_path->queue_depth) {
+ rtrs_err(clt, "Error: queue depth changed\n");
+
+ /*
+ * Stop any more reconnection attempts
+ */
+- sess->reconnect_attempts = -1;
++ clt_path->reconnect_attempts = -1;
+ rtrs_err(clt,
+ "Disabling auto-reconnect. Trigger a manual reconnect after issue is resolved\n");
+ return -ECONNRESET;
+ }
+
+- if (!sess->rbufs) {
+- sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs),
+- GFP_KERNEL);
+- if (!sess->rbufs)
++ if (!clt_path->rbufs) {
++ clt_path->rbufs = kcalloc(queue_depth,
++ sizeof(*clt_path->rbufs),
++ GFP_KERNEL);
++ if (!clt_path->rbufs)
+ return -ENOMEM;
+ }
+- sess->queue_depth = queue_depth;
+- sess->s.signal_interval = min_not_zero(queue_depth,
++ clt_path->queue_depth = queue_depth;
++ clt_path->s.signal_interval = min_not_zero(queue_depth,
+ (unsigned short) SERVICE_CON_QUEUE_DEPTH);
+- sess->max_hdr_size = le32_to_cpu(msg->max_hdr_size);
+- sess->max_io_size = le32_to_cpu(msg->max_io_size);
+- sess->flags = le32_to_cpu(msg->flags);
+- sess->chunk_size = sess->max_io_size + sess->max_hdr_size;
++ clt_path->max_hdr_size = le32_to_cpu(msg->max_hdr_size);
++ clt_path->max_io_size = le32_to_cpu(msg->max_io_size);
++ clt_path->flags = le32_to_cpu(msg->flags);
++ clt_path->chunk_size = clt_path->max_io_size + clt_path->max_hdr_size;
+
+ /*
+ * Global IO size is always a minimum.
+@@ -1866,20 +1876,20 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
+ * connections in parallel, use lock.
+ */
+ mutex_lock(&clt->paths_mutex);
+- clt->queue_depth = sess->queue_depth;
+- clt->max_io_size = min_not_zero(sess->max_io_size,
++ clt->queue_depth = clt_path->queue_depth;
++ clt->max_io_size = min_not_zero(clt_path->max_io_size,
+ clt->max_io_size);
+ mutex_unlock(&clt->paths_mutex);
+
+ /*
+ * Cache the hca_port and hca_name for sysfs
+ */
+- sess->hca_port = con->c.cm_id->port_num;
+- scnprintf(sess->hca_name, sizeof(sess->hca_name),
+- sess->s.dev->ib_dev->name);
+- sess->s.src_addr = con->c.cm_id->route.addr.src_addr;
++ clt_path->hca_port = con->c.cm_id->port_num;
++ scnprintf(clt_path->hca_name, sizeof(clt_path->hca_name),
++ clt_path->s.dev->ib_dev->name);
++ clt_path->s.src_addr = con->c.cm_id->route.addr.src_addr;
+ /* set for_new_clt, to allow future reconnect on any path */
+- sess->for_new_clt = 1;
++ clt_path->for_new_clt = 1;
+ }
+
+ return 0;
+@@ -1887,16 +1897,16 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
+
+ static inline void flag_success_on_conn(struct rtrs_clt_con *con)
+ {
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+
+- atomic_inc(&sess->connected_cnt);
++ atomic_inc(&clt_path->connected_cnt);
+ con->cm_err = 1;
+ }
+
+ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
+ struct rdma_cm_event *ev)
+ {
+- struct rtrs_sess *s = con->c.sess;
++ struct rtrs_path *s = con->c.path;
+ const struct rtrs_msg_conn_rsp *msg;
+ const char *rej_msg;
+ int status, errno;
+@@ -1924,23 +1934,23 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
+ return -ECONNRESET;
+ }
+
+-void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait)
++void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait)
+ {
+- if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL))
+- queue_work(rtrs_wq, &sess->close_work);
++ if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSING, NULL))
++ queue_work(rtrs_wq, &clt_path->close_work);
+ if (wait)
+- flush_work(&sess->close_work);
++ flush_work(&clt_path->close_work);
+ }
+
+ static inline void flag_error_on_conn(struct rtrs_clt_con *con, int cm_err)
+ {
+ if (con->cm_err == 1) {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = to_clt_sess(con->c.sess);
+- if (atomic_dec_and_test(&sess->connected_cnt))
++ clt_path = to_clt_path(con->c.path);
++ if (atomic_dec_and_test(&clt_path->connected_cnt))
+
+- wake_up(&sess->state_wq);
++ wake_up(&clt_path->state_wq);
+ }
+ con->cm_err = cm_err;
+ }
+@@ -1949,8 +1959,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *ev)
+ {
+ struct rtrs_clt_con *con = cm_id->context;
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_clt_sess *sess = to_clt_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_clt_path *clt_path = to_clt_path(s);
+ int cm_err = 0;
+
+ switch (ev->event) {
+@@ -1968,7 +1978,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ * i.e. wake up without state change, but we set cm_err.
+ */
+ flag_success_on_conn(con);
+- wake_up(&sess->state_wq);
++ wake_up(&clt_path->state_wq);
+ return 0;
+ }
+ break;
+@@ -1997,7 +2007,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ /*
+ * Device removal is a special case. Queue close and return 0.
+ */
+- rtrs_clt_close_conns(sess, false);
++ rtrs_clt_close_conns(clt_path, false);
+ return 0;
+ default:
+ rtrs_err(s, "Unexpected RDMA CM error (CM event: %s, err: %d)\n",
+@@ -2018,15 +2028,16 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ return 0;
+ }
+
++/* The caller should do the cleanup in case of error */
+ static int create_cm(struct rtrs_clt_con *con)
+ {
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_clt_sess *sess = to_clt_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_clt_path *clt_path = to_clt_path(s);
+ struct rdma_cm_id *cm_id;
+ int err;
+
+ cm_id = rdma_create_id(&init_net, rtrs_clt_rdma_cm_handler, con,
+- sess->s.dst_addr.ss_family == AF_IB ?
++ clt_path->s.dst_addr.ss_family == AF_IB ?
+ RDMA_PS_IB : RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(cm_id)) {
+ err = PTR_ERR(cm_id);
+@@ -2040,14 +2051,14 @@ static int create_cm(struct rtrs_clt_con *con)
+ err = rdma_set_reuseaddr(cm_id, 1);
+ if (err != 0) {
+ rtrs_err(s, "Set address reuse failed, err: %d\n", err);
+- goto destroy_cm;
++ return err;
+ }
+- err = rdma_resolve_addr(cm_id, (struct sockaddr *)&sess->s.src_addr,
+- (struct sockaddr *)&sess->s.dst_addr,
++ err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr,
++ (struct sockaddr *)&clt_path->s.dst_addr,
+ RTRS_CONNECT_TIMEOUT_MS);
+ if (err) {
+ rtrs_err(s, "Failed to resolve address, err: %d\n", err);
+- goto destroy_cm;
++ return err;
+ }
+ /*
+ * Combine connection status and session events. This is needed
+@@ -2055,41 +2066,27 @@ static int create_cm(struct rtrs_clt_con *con)
+ * or session state was really changed to error by device removal.
+ */
+ err = wait_event_interruptible_timeout(
+- sess->state_wq,
+- con->cm_err || sess->state != RTRS_CLT_CONNECTING,
++ clt_path->state_wq,
++ con->cm_err || clt_path->state != RTRS_CLT_CONNECTING,
+ msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS));
+ if (err == 0 || err == -ERESTARTSYS) {
+ if (err == 0)
+ err = -ETIMEDOUT;
+ /* Timedout or interrupted */
+- goto errr;
+- }
+- if (con->cm_err < 0) {
+- err = con->cm_err;
+- goto errr;
++ return err;
+ }
+- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTING) {
++ if (con->cm_err < 0)
++ return con->cm_err;
++ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING)
+ /* Device removal */
+- err = -ECONNABORTED;
+- goto errr;
+- }
++ return -ECONNABORTED;
+
+ return 0;
+-
+-errr:
+- stop_cm(con);
+- mutex_lock(&con->con_mutex);
+- destroy_con_cq_qp(con);
+- mutex_unlock(&con->con_mutex);
+-destroy_cm:
+- destroy_cm(con);
+-
+- return err;
+ }
+
+-static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess)
++static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path)
+ {
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt *clt = clt_path->clt;
+ int up;
+
+ /*
+@@ -2113,19 +2110,19 @@ static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess)
+ mutex_unlock(&clt->paths_ev_mutex);
+
+ /* Mark session as established */
+- sess->established = true;
+- sess->reconnect_attempts = 0;
+- sess->stats->reconnects.successful_cnt++;
++ clt_path->established = true;
++ clt_path->reconnect_attempts = 0;
++ clt_path->stats->reconnects.successful_cnt++;
+ }
+
+-static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess)
++static void rtrs_clt_path_down(struct rtrs_clt_path *clt_path)
+ {
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt *clt = clt_path->clt;
+
+- if (!sess->established)
++ if (!clt_path->established)
+ return;
+
+- sess->established = false;
++ clt_path->established = false;
+ mutex_lock(&clt->paths_ev_mutex);
+ WARN_ON(!clt->paths_up);
+ if (--clt->paths_up == 0)
+@@ -2133,19 +2130,19 @@ static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess)
+ mutex_unlock(&clt->paths_ev_mutex);
+ }
+
+-static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
++static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path)
+ {
+ struct rtrs_clt_con *con;
+ unsigned int cid;
+
+- WARN_ON(READ_ONCE(sess->state) == RTRS_CLT_CONNECTED);
++ WARN_ON(READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED);
+
+ /*
+ * Possible race with rtrs_clt_open(), when DEVICE_REMOVAL comes
+ * exactly in between. Start destroying after it finishes.
+ */
+- mutex_lock(&sess->init_mutex);
+- mutex_unlock(&sess->init_mutex);
++ mutex_lock(&clt_path->init_mutex);
++ mutex_unlock(&clt_path->init_mutex);
+
+ /*
+ * All IO paths must observe !CONNECTED state before we
+@@ -2153,7 +2150,7 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
+ */
+ synchronize_rcu();
+
+- rtrs_stop_hb(&sess->s);
++ rtrs_stop_hb(&clt_path->s);
+
+ /*
+ * The order it utterly crucial: firstly disconnect and complete all
+@@ -2162,15 +2159,15 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
+ * eventually notify upper layer about session disconnection.
+ */
+
+- for (cid = 0; cid < sess->s.con_num; cid++) {
+- if (!sess->s.con[cid])
++ for (cid = 0; cid < clt_path->s.con_num; cid++) {
++ if (!clt_path->s.con[cid])
+ break;
+- con = to_clt_con(sess->s.con[cid]);
++ con = to_clt_con(clt_path->s.con[cid]);
+ stop_cm(con);
+ }
+- fail_all_outstanding_reqs(sess);
+- free_sess_reqs(sess);
+- rtrs_clt_sess_down(sess);
++ fail_all_outstanding_reqs(clt_path);
++ free_path_reqs(clt_path);
++ rtrs_clt_path_down(clt_path);
+
+ /*
+ * Wait for graceful shutdown, namely when peer side invokes
+@@ -2180,13 +2177,14 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
+ * since CM does not fire anything. That is fine, we are not in
+ * hurry.
+ */
+- wait_event_timeout(sess->state_wq, !atomic_read(&sess->connected_cnt),
++ wait_event_timeout(clt_path->state_wq,
++ !atomic_read(&clt_path->connected_cnt),
+ msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS));
+
+- for (cid = 0; cid < sess->s.con_num; cid++) {
+- if (!sess->s.con[cid])
++ for (cid = 0; cid < clt_path->s.con_num; cid++) {
++ if (!clt_path->s.con[cid])
+ break;
+- con = to_clt_con(sess->s.con[cid]);
++ con = to_clt_con(clt_path->s.con[cid]);
+ mutex_lock(&con->con_mutex);
+ destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
+@@ -2195,26 +2193,26 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
+ }
+ }
+
+-static inline bool xchg_sessions(struct rtrs_clt_sess __rcu **rcu_ppcpu_path,
+- struct rtrs_clt_sess *sess,
+- struct rtrs_clt_sess *next)
++static inline bool xchg_paths(struct rtrs_clt_path __rcu **rcu_ppcpu_path,
++ struct rtrs_clt_path *clt_path,
++ struct rtrs_clt_path *next)
+ {
+- struct rtrs_clt_sess **ppcpu_path;
++ struct rtrs_clt_path **ppcpu_path;
+
+ /* Call cmpxchg() without sparse warnings */
+ ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path;
+- return sess == cmpxchg(ppcpu_path, sess, next);
++ return clt_path == cmpxchg(ppcpu_path, clt_path, next);
+ }
+
+-static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
++static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path)
+ {
+- struct rtrs_clt *clt = sess->clt;
+- struct rtrs_clt_sess *next;
++ struct rtrs_clt *clt = clt_path->clt;
++ struct rtrs_clt_path *next;
+ bool wait_for_grace = false;
+ int cpu;
+
+ mutex_lock(&clt->paths_mutex);
+- list_del_rcu(&sess->s.entry);
++ list_del_rcu(&clt_path->s.entry);
+
+ /* Make sure everybody observes path removal. */
+ synchronize_rcu();
+@@ -2255,8 +2253,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
+ * removed. If @sess is the last element, then @next is NULL.
+ */
+ rcu_read_lock();
+- next = list_next_or_null_rr_rcu(&clt->paths_list, &sess->s.entry,
+- typeof(*next), s.entry);
++ next = rtrs_clt_get_next_path_or_null(&clt->paths_list, clt_path);
+ rcu_read_unlock();
+
+ /*
+@@ -2264,11 +2261,11 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
+ * removed, so change the pointer manually.
+ */
+ for_each_possible_cpu(cpu) {
+- struct rtrs_clt_sess __rcu **ppcpu_path;
++ struct rtrs_clt_path __rcu **ppcpu_path;
+
+ ppcpu_path = per_cpu_ptr(clt->pcpu_path, cpu);
+ if (rcu_dereference_protected(*ppcpu_path,
+- lockdep_is_held(&clt->paths_mutex)) != sess)
++ lockdep_is_held(&clt->paths_mutex)) != clt_path)
+ /*
+ * synchronize_rcu() was called just after deleting
+ * entry from the list, thus IO code path cannot
+@@ -2281,7 +2278,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
+ * We race with IO code path, which also changes pointer,
+ * thus we have to be careful not to overwrite it.
+ */
+- if (xchg_sessions(ppcpu_path, sess, next))
++ if (xchg_paths(ppcpu_path, clt_path, next))
+ /*
+ * @ppcpu_path was successfully replaced with @next,
+ * that means that someone could also pick up the
+@@ -2296,70 +2293,74 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
+ mutex_unlock(&clt->paths_mutex);
+ }
+
+-static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess)
++static void rtrs_clt_add_path_to_arr(struct rtrs_clt_path *clt_path)
+ {
+- struct rtrs_clt *clt = sess->clt;
++ struct rtrs_clt *clt = clt_path->clt;
+
+ mutex_lock(&clt->paths_mutex);
+ clt->paths_num++;
+
+- list_add_tail_rcu(&sess->s.entry, &clt->paths_list);
++ list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list);
+ mutex_unlock(&clt->paths_mutex);
+ }
+
+ static void rtrs_clt_close_work(struct work_struct *work)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = container_of(work, struct rtrs_clt_sess, close_work);
++ clt_path = container_of(work, struct rtrs_clt_path, close_work);
+
+- cancel_delayed_work_sync(&sess->reconnect_dwork);
+- rtrs_clt_stop_and_destroy_conns(sess);
+- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL);
++ cancel_delayed_work_sync(&clt_path->reconnect_dwork);
++ rtrs_clt_stop_and_destroy_conns(clt_path);
++ rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSED, NULL);
+ }
+
+-static int init_conns(struct rtrs_clt_sess *sess)
++static int init_conns(struct rtrs_clt_path *clt_path)
+ {
+ unsigned int cid;
+- int err;
++ int err, i;
+
+ /*
+ * On every new session connections increase reconnect counter
+ * to avoid clashes with previous sessions not yet closed
+ * sessions on a server side.
+ */
+- sess->s.recon_cnt++;
++ clt_path->s.recon_cnt++;
+
+ /* Establish all RDMA connections */
+- for (cid = 0; cid < sess->s.con_num; cid++) {
+- err = create_con(sess, cid);
++ for (cid = 0; cid < clt_path->s.con_num; cid++) {
++ err = create_con(clt_path, cid);
+ if (err)
+ goto destroy;
+
+- err = create_cm(to_clt_con(sess->s.con[cid]));
+- if (err) {
+- destroy_con(to_clt_con(sess->s.con[cid]));
++ err = create_cm(to_clt_con(clt_path->s.con[cid]));
++ if (err)
+ goto destroy;
+- }
+ }
+- err = alloc_sess_reqs(sess);
++ err = alloc_path_reqs(clt_path);
+ if (err)
+ goto destroy;
+
+- rtrs_start_hb(&sess->s);
++ rtrs_start_hb(&clt_path->s);
+
+ return 0;
+
+ destroy:
+- while (cid--) {
+- struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]);
++ /* Make sure we do the cleanup in the order they are created */
++ for (i = 0; i <= cid; i++) {
++ struct rtrs_clt_con *con;
+
+- stop_cm(con);
++ if (!clt_path->s.con[i])
++ break;
+
+- mutex_lock(&con->con_mutex);
+- destroy_con_cq_qp(con);
+- mutex_unlock(&con->con_mutex);
+- destroy_cm(con);
++ con = to_clt_con(clt_path->s.con[i]);
++ if (con->c.cm_id) {
++ stop_cm(con);
++ mutex_lock(&con->con_mutex);
++ destroy_con_cq_qp(con);
++ mutex_unlock(&con->con_mutex);
++ destroy_cm(con);
++ }
+ destroy_con(con);
+ }
+ /*
+@@ -2367,7 +2368,7 @@ destroy:
+ * doing rdma_resolve_addr(), switch to CONNECTION_ERR state
+ * manually to keep reconnecting.
+ */
+- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
++ rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL);
+
+ return err;
+ }
+@@ -2375,31 +2376,32 @@ destroy:
+ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+ struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct rtrs_iu *iu;
+
+ iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
+- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1);
+
+ if (wc->status != IB_WC_SUCCESS) {
+- rtrs_err(sess->clt, "Sess info request send failed: %s\n",
++ rtrs_err(clt_path->clt, "Path info request send failed: %s\n",
+ ib_wc_status_msg(wc->status));
+- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
++ rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL);
+ return;
+ }
+
+ rtrs_clt_update_wc_stats(con);
+ }
+
+-static int process_info_rsp(struct rtrs_clt_sess *sess,
++static int process_info_rsp(struct rtrs_clt_path *clt_path,
+ const struct rtrs_msg_info_rsp *msg)
+ {
+ unsigned int sg_cnt, total_len;
+ int i, sgi;
+
+ sg_cnt = le16_to_cpu(msg->sg_cnt);
+- if (!sg_cnt || (sess->queue_depth % sg_cnt)) {
+- rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n",
++ if (!sg_cnt || (clt_path->queue_depth % sg_cnt)) {
++ rtrs_err(clt_path->clt,
++ "Incorrect sg_cnt %d, is not multiple\n",
+ sg_cnt);
+ return -EINVAL;
+ }
+@@ -2408,15 +2410,15 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
+ * Check if IB immediate data size is enough to hold the mem_id and
+ * the offset inside the memory chunk.
+ */
+- if ((ilog2(sg_cnt - 1) + 1) + (ilog2(sess->chunk_size - 1) + 1) >
++ if ((ilog2(sg_cnt - 1) + 1) + (ilog2(clt_path->chunk_size - 1) + 1) >
+ MAX_IMM_PAYL_BITS) {
+- rtrs_err(sess->clt,
++ rtrs_err(clt_path->clt,
+ "RDMA immediate size (%db) not enough to encode %d buffers of size %dB\n",
+- MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size);
++ MAX_IMM_PAYL_BITS, sg_cnt, clt_path->chunk_size);
+ return -EINVAL;
+ }
+ total_len = 0;
+- for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) {
++ for (sgi = 0, i = 0; sgi < sg_cnt && i < clt_path->queue_depth; sgi++) {
+ const struct rtrs_sg_desc *desc = &msg->desc[sgi];
+ u32 len, rkey;
+ u64 addr;
+@@ -2427,26 +2429,28 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
+
+ total_len += len;
+
+- if (!len || (len % sess->chunk_size)) {
+- rtrs_err(sess->clt, "Incorrect [%d].len %d\n", sgi,
++ if (!len || (len % clt_path->chunk_size)) {
++ rtrs_err(clt_path->clt, "Incorrect [%d].len %d\n",
++ sgi,
+ len);
+ return -EINVAL;
+ }
+- for ( ; len && i < sess->queue_depth; i++) {
+- sess->rbufs[i].addr = addr;
+- sess->rbufs[i].rkey = rkey;
++ for ( ; len && i < clt_path->queue_depth; i++) {
++ clt_path->rbufs[i].addr = addr;
++ clt_path->rbufs[i].rkey = rkey;
+
+- len -= sess->chunk_size;
+- addr += sess->chunk_size;
++ len -= clt_path->chunk_size;
++ addr += clt_path->chunk_size;
+ }
+ }
+ /* Sanity check */
+- if (sgi != sg_cnt || i != sess->queue_depth) {
+- rtrs_err(sess->clt, "Incorrect sg vector, not fully mapped\n");
++ if (sgi != sg_cnt || i != clt_path->queue_depth) {
++ rtrs_err(clt_path->clt,
++ "Incorrect sg vector, not fully mapped\n");
+ return -EINVAL;
+ }
+- if (total_len != sess->chunk_size * sess->queue_depth) {
+- rtrs_err(sess->clt, "Incorrect total_len %d\n", total_len);
++ if (total_len != clt_path->chunk_size * clt_path->queue_depth) {
++ rtrs_err(clt_path->clt, "Incorrect total_len %d\n", total_len);
+ return -EINVAL;
+ }
+
+@@ -2456,7 +2460,7 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
+ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+ struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context);
+- struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
++ struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+ struct rtrs_msg_info_rsp *msg;
+ enum rtrs_clt_state state;
+ struct rtrs_iu *iu;
+@@ -2468,37 +2472,37 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
+ WARN_ON(con->c.cid);
+ iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
+ if (wc->status != IB_WC_SUCCESS) {
+- rtrs_err(sess->clt, "Sess info response recv failed: %s\n",
++ rtrs_err(clt_path->clt, "Path info response recv failed: %s\n",
+ ib_wc_status_msg(wc->status));
+ goto out;
+ }
+ WARN_ON(wc->opcode != IB_WC_RECV);
+
+ if (wc->byte_len < sizeof(*msg)) {
+- rtrs_err(sess->clt, "Sess info response is malformed: size %d\n",
++ rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n",
+ wc->byte_len);
+ goto out;
+ }
+- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
++ ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr,
+ iu->size, DMA_FROM_DEVICE);
+ msg = iu->buf;
+ if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP) {
+- rtrs_err(sess->clt, "Sess info response is malformed: type %d\n",
++ rtrs_err(clt_path->clt, "Path info response is malformed: type %d\n",
+ le16_to_cpu(msg->type));
+ goto out;
+ }
+ rx_sz = sizeof(*msg);
+ rx_sz += sizeof(msg->desc[0]) * le16_to_cpu(msg->sg_cnt);
+ if (wc->byte_len < rx_sz) {
+- rtrs_err(sess->clt, "Sess info response is malformed: size %d\n",
++ rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n",
+ wc->byte_len);
+ goto out;
+ }
+- err = process_info_rsp(sess, msg);
++ err = process_info_rsp(clt_path, msg);
+ if (err)
+ goto out;
+
+- err = post_recv_sess(sess);
++ err = post_recv_path(clt_path);
+ if (err)
+ goto out;
+
+@@ -2506,25 +2510,25 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
+
+ out:
+ rtrs_clt_update_wc_stats(con);
+- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
+- rtrs_clt_change_state_get_old(sess, state, NULL);
++ rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1);
++ rtrs_clt_change_state_get_old(clt_path, state, NULL);
+ }
+
+-static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
++static int rtrs_send_path_info(struct rtrs_clt_path *clt_path)
+ {
+- struct rtrs_clt_con *usr_con = to_clt_con(sess->s.con[0]);
++ struct rtrs_clt_con *usr_con = to_clt_con(clt_path->s.con[0]);
+ struct rtrs_msg_info_req *msg;
+ struct rtrs_iu *tx_iu, *rx_iu;
+ size_t rx_sz;
+ int err;
+
+ rx_sz = sizeof(struct rtrs_msg_info_rsp);
+- rx_sz += sizeof(struct rtrs_sg_desc) * sess->queue_depth;
++ rx_sz += sizeof(struct rtrs_sg_desc) * clt_path->queue_depth;
+
+ tx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), GFP_KERNEL,
+- sess->s.dev->ib_dev, DMA_TO_DEVICE,
++ clt_path->s.dev->ib_dev, DMA_TO_DEVICE,
+ rtrs_clt_info_req_done);
+- rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, sess->s.dev->ib_dev,
++ rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, clt_path->s.dev->ib_dev,
+ DMA_FROM_DEVICE, rtrs_clt_info_rsp_done);
+ if (!tx_iu || !rx_iu) {
+ err = -ENOMEM;
+@@ -2533,33 +2537,34 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
+ /* Prepare for getting info response */
+ err = rtrs_iu_post_recv(&usr_con->c, rx_iu);
+ if (err) {
+- rtrs_err(sess->clt, "rtrs_iu_post_recv(), err: %d\n", err);
++ rtrs_err(clt_path->clt, "rtrs_iu_post_recv(), err: %d\n", err);
+ goto out;
+ }
+ rx_iu = NULL;
+
+ msg = tx_iu->buf;
+ msg->type = cpu_to_le16(RTRS_MSG_INFO_REQ);
+- memcpy(msg->sessname, sess->s.sessname, sizeof(msg->sessname));
++ memcpy(msg->pathname, clt_path->s.sessname, sizeof(msg->pathname));
+
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr,
++ ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev,
++ tx_iu->dma_addr,
+ tx_iu->size, DMA_TO_DEVICE);
+
+ /* Send info request */
+ err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL);
+ if (err) {
+- rtrs_err(sess->clt, "rtrs_iu_post_send(), err: %d\n", err);
++ rtrs_err(clt_path->clt, "rtrs_iu_post_send(), err: %d\n", err);
+ goto out;
+ }
+ tx_iu = NULL;
+
+ /* Wait for state change */
+- wait_event_interruptible_timeout(sess->state_wq,
+- sess->state != RTRS_CLT_CONNECTING,
++ wait_event_interruptible_timeout(clt_path->state_wq,
++ clt_path->state != RTRS_CLT_CONNECTING,
+ msecs_to_jiffies(
+ RTRS_CONNECT_TIMEOUT_MS));
+- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) {
+- if (READ_ONCE(sess->state) == RTRS_CLT_CONNECTING_ERR)
++ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) {
++ if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTING_ERR)
+ err = -ECONNRESET;
+ else
+ err = -ETIMEDOUT;
+@@ -2567,82 +2572,82 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
+
+ out:
+ if (tx_iu)
+- rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(tx_iu, clt_path->s.dev->ib_dev, 1);
+ if (rx_iu)
+- rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(rx_iu, clt_path->s.dev->ib_dev, 1);
+ if (err)
+ /* If we've never taken async path because of malloc problems */
+- rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL);
++ rtrs_clt_change_state_get_old(clt_path,
++ RTRS_CLT_CONNECTING_ERR, NULL);
+
+ return err;
+ }
+
+ /**
+- * init_sess() - establishes all session connections and does handshake
+- * @sess: client session.
++ * init_path() - establishes all path connections and does handshake
++ * @clt_path: client path.
+ * In case of error full close or reconnect procedure should be taken,
+ * because reconnect or close async works can be started.
+ */
+-static int init_sess(struct rtrs_clt_sess *sess)
++static int init_path(struct rtrs_clt_path *clt_path)
+ {
+ int err;
+ char str[NAME_MAX];
+ struct rtrs_addr path = {
+- .src = &sess->s.src_addr,
+- .dst = &sess->s.dst_addr,
++ .src = &clt_path->s.src_addr,
++ .dst = &clt_path->s.dst_addr,
+ };
+
+ rtrs_addr_to_str(&path, str, sizeof(str));
+
+- mutex_lock(&sess->init_mutex);
+- err = init_conns(sess);
++ mutex_lock(&clt_path->init_mutex);
++ err = init_conns(clt_path);
+ if (err) {
+- rtrs_err(sess->clt,
++ rtrs_err(clt_path->clt,
+ "init_conns() failed: err=%d path=%s [%s:%u]\n", err,
+- str, sess->hca_name, sess->hca_port);
++ str, clt_path->hca_name, clt_path->hca_port);
+ goto out;
+ }
+- err = rtrs_send_sess_info(sess);
++ err = rtrs_send_path_info(clt_path);
+ if (err) {
+- rtrs_err(
+- sess->clt,
+- "rtrs_send_sess_info() failed: err=%d path=%s [%s:%u]\n",
+- err, str, sess->hca_name, sess->hca_port);
++ rtrs_err(clt_path->clt,
++ "rtrs_send_path_info() failed: err=%d path=%s [%s:%u]\n",
++ err, str, clt_path->hca_name, clt_path->hca_port);
+ goto out;
+ }
+- rtrs_clt_sess_up(sess);
++ rtrs_clt_path_up(clt_path);
+ out:
+- mutex_unlock(&sess->init_mutex);
++ mutex_unlock(&clt_path->init_mutex);
+
+ return err;
+ }
+
+ static void rtrs_clt_reconnect_work(struct work_struct *work)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ struct rtrs_clt *clt;
+ unsigned int delay_ms;
+ int err;
+
+- sess = container_of(to_delayed_work(work), struct rtrs_clt_sess,
+- reconnect_dwork);
+- clt = sess->clt;
++ clt_path = container_of(to_delayed_work(work), struct rtrs_clt_path,
++ reconnect_dwork);
++ clt = clt_path->clt;
+
+- if (READ_ONCE(sess->state) != RTRS_CLT_RECONNECTING)
++ if (READ_ONCE(clt_path->state) != RTRS_CLT_RECONNECTING)
+ return;
+
+- if (sess->reconnect_attempts >= clt->max_reconnect_attempts) {
+- /* Close a session completely if max attempts is reached */
+- rtrs_clt_close_conns(sess, false);
++ if (clt_path->reconnect_attempts >= clt->max_reconnect_attempts) {
++ /* Close a path completely if max attempts is reached */
++ rtrs_clt_close_conns(clt_path, false);
+ return;
+ }
+- sess->reconnect_attempts++;
++ clt_path->reconnect_attempts++;
+
+ /* Stop everything */
+- rtrs_clt_stop_and_destroy_conns(sess);
++ rtrs_clt_stop_and_destroy_conns(clt_path);
+ msleep(RTRS_RECONNECT_BACKOFF);
+- if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) {
+- err = init_sess(sess);
++ if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING, NULL)) {
++ err = init_path(clt_path);
+ if (err)
+ goto reconnect_again;
+ }
+@@ -2650,10 +2655,10 @@ static void rtrs_clt_reconnect_work(struct work_struct *work)
+ return;
+
+ reconnect_again:
+- if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) {
+- sess->stats->reconnects.fail_cnt++;
++ if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_RECONNECTING, NULL)) {
++ clt_path->stats->reconnects.fail_cnt++;
+ delay_ms = clt->reconnect_delay_sec * 1000;
+- queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
++ queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork,
+ msecs_to_jiffies(delay_ms +
+ prandom_u32() %
+ RTRS_RECONNECT_SEED));
+@@ -2664,6 +2669,8 @@ static void rtrs_clt_dev_release(struct device *dev)
+ {
+ struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev);
+
++ mutex_destroy(&clt->paths_ev_mutex);
++ mutex_destroy(&clt->paths_mutex);
+ kfree(clt);
+ }
+
+@@ -2693,6 +2700,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
+ return ERR_PTR(-ENOMEM);
+ }
+
++ clt->dev.class = rtrs_clt_dev_class;
++ clt->dev.release = rtrs_clt_dev_release;
+ uuid_gen(&clt->paths_uuid);
+ INIT_LIST_HEAD_RCU(&clt->paths_list);
+ clt->paths_num = paths_num;
+@@ -2709,58 +2718,56 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
+ init_waitqueue_head(&clt->permits_wait);
+ mutex_init(&clt->paths_ev_mutex);
+ mutex_init(&clt->paths_mutex);
++ device_initialize(&clt->dev);
+
+- clt->dev.class = rtrs_clt_dev_class;
+- clt->dev.release = rtrs_clt_dev_release;
+ err = dev_set_name(&clt->dev, "%s", sessname);
+ if (err)
+- goto err;
++ goto err_put;
++
+ /*
+ * Suppress user space notification until
+ * sysfs files are created
+ */
+ dev_set_uevent_suppress(&clt->dev, true);
+- err = device_register(&clt->dev);
+- if (err) {
+- put_device(&clt->dev);
+- goto err;
+- }
++ err = device_add(&clt->dev);
++ if (err)
++ goto err_put;
+
+ clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj);
+ if (!clt->kobj_paths) {
+ err = -ENOMEM;
+- goto err_dev;
++ goto err_del;
+ }
+ err = rtrs_clt_create_sysfs_root_files(clt);
+ if (err) {
+ kobject_del(clt->kobj_paths);
+ kobject_put(clt->kobj_paths);
+- goto err_dev;
++ goto err_del;
+ }
+ dev_set_uevent_suppress(&clt->dev, false);
+ kobject_uevent(&clt->dev.kobj, KOBJ_ADD);
+
+ return clt;
+-err_dev:
+- device_unregister(&clt->dev);
+-err:
++err_del:
++ device_del(&clt->dev);
++err_put:
+ free_percpu(clt->pcpu_path);
+- kfree(clt);
++ put_device(&clt->dev);
+ return ERR_PTR(err);
+ }
+
+ static void free_clt(struct rtrs_clt *clt)
+ {
+- free_permits(clt);
+ free_percpu(clt->pcpu_path);
+- mutex_destroy(&clt->paths_ev_mutex);
+- mutex_destroy(&clt->paths_mutex);
+- /* release callback will free clt in last put */
++
++ /*
++ * release callback will free clt and destroy mutexes in last put
++ */
+ device_unregister(&clt->dev);
+ }
+
+ /**
+- * rtrs_clt_open() - Open a session to an RTRS server
++ * rtrs_clt_open() - Open a path to an RTRS server
+ * @ops: holds the link event callback and the private pointer.
+ * @sessname: name of the session
+ * @paths: Paths to be established defined by their src and dst addresses
+@@ -2778,17 +2785,23 @@ static void free_clt(struct rtrs_clt *clt)
+ * Return a valid pointer on success otherwise PTR_ERR.
+ */
+ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
+- const char *sessname,
++ const char *pathname,
+ const struct rtrs_addr *paths,
+ size_t paths_num, u16 port,
+ size_t pdu_sz, u8 reconnect_delay_sec,
+ s16 max_reconnect_attempts, u32 nr_poll_queues)
+ {
+- struct rtrs_clt_sess *sess, *tmp;
++ struct rtrs_clt_path *clt_path, *tmp;
+ struct rtrs_clt *clt;
+ int err, i;
+
+- clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv,
++ if (strchr(pathname, '/') || strchr(pathname, '.')) {
++ pr_err("pathname cannot contain / and .\n");
++ err = -EINVAL;
++ goto out;
++ }
++
++ clt = alloc_clt(pathname, paths_num, port, pdu_sz, ops->priv,
+ ops->link_ev,
+ reconnect_delay_sec,
+ max_reconnect_attempts);
+@@ -2797,49 +2810,49 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
+ goto out;
+ }
+ for (i = 0; i < paths_num; i++) {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+- sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
++ clt_path = alloc_path(clt, &paths[i], nr_cpu_ids,
+ nr_poll_queues);
+- if (IS_ERR(sess)) {
+- err = PTR_ERR(sess);
+- goto close_all_sess;
++ if (IS_ERR(clt_path)) {
++ err = PTR_ERR(clt_path);
++ goto close_all_path;
+ }
+ if (!i)
+- sess->for_new_clt = 1;
+- list_add_tail_rcu(&sess->s.entry, &clt->paths_list);
++ clt_path->for_new_clt = 1;
++ list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list);
+
+- err = init_sess(sess);
++ err = init_path(clt_path);
+ if (err) {
+- list_del_rcu(&sess->s.entry);
+- rtrs_clt_close_conns(sess, true);
+- free_percpu(sess->stats->pcpu_stats);
+- kfree(sess->stats);
+- free_sess(sess);
+- goto close_all_sess;
++ list_del_rcu(&clt_path->s.entry);
++ rtrs_clt_close_conns(clt_path, true);
++ free_percpu(clt_path->stats->pcpu_stats);
++ kfree(clt_path->stats);
++ free_path(clt_path);
++ goto close_all_path;
+ }
+
+- err = rtrs_clt_create_sess_files(sess);
++ err = rtrs_clt_create_path_files(clt_path);
+ if (err) {
+- list_del_rcu(&sess->s.entry);
+- rtrs_clt_close_conns(sess, true);
+- free_percpu(sess->stats->pcpu_stats);
+- kfree(sess->stats);
+- free_sess(sess);
+- goto close_all_sess;
++ list_del_rcu(&clt_path->s.entry);
++ rtrs_clt_close_conns(clt_path, true);
++ free_percpu(clt_path->stats->pcpu_stats);
++ kfree(clt_path->stats);
++ free_path(clt_path);
++ goto close_all_path;
+ }
+ }
+ err = alloc_permits(clt);
+ if (err)
+- goto close_all_sess;
++ goto close_all_path;
+
+ return clt;
+
+-close_all_sess:
+- list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
+- rtrs_clt_destroy_sess_files(sess, NULL);
+- rtrs_clt_close_conns(sess, true);
+- kobject_put(&sess->kobj);
++close_all_path:
++ list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) {
++ rtrs_clt_destroy_path_files(clt_path, NULL);
++ rtrs_clt_close_conns(clt_path, true);
++ kobject_put(&clt_path->kobj);
+ }
+ rtrs_clt_destroy_sysfs_root(clt);
+ free_clt(clt);
+@@ -2850,37 +2863,39 @@ out:
+ EXPORT_SYMBOL(rtrs_clt_open);
+
+ /**
+- * rtrs_clt_close() - Close a session
++ * rtrs_clt_close() - Close a path
+ * @clt: Session handle. Session is freed upon return.
+ */
+ void rtrs_clt_close(struct rtrs_clt *clt)
+ {
+- struct rtrs_clt_sess *sess, *tmp;
++ struct rtrs_clt_path *clt_path, *tmp;
+
+ /* Firstly forbid sysfs access */
+ rtrs_clt_destroy_sysfs_root(clt);
+
+ /* Now it is safe to iterate over all paths without locks */
+- list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
+- rtrs_clt_close_conns(sess, true);
+- rtrs_clt_destroy_sess_files(sess, NULL);
+- kobject_put(&sess->kobj);
++ list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) {
++ rtrs_clt_close_conns(clt_path, true);
++ rtrs_clt_destroy_path_files(clt_path, NULL);
++ kobject_put(&clt_path->kobj);
+ }
++ free_permits(clt);
+ free_clt(clt);
+ }
+ EXPORT_SYMBOL(rtrs_clt_close);
+
+-int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess)
++int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *clt_path)
+ {
+ enum rtrs_clt_state old_state;
+ int err = -EBUSY;
+ bool changed;
+
+- changed = rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING,
++ changed = rtrs_clt_change_state_get_old(clt_path,
++ RTRS_CLT_RECONNECTING,
+ &old_state);
+ if (changed) {
+- sess->reconnect_attempts = 0;
+- queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 0);
++ clt_path->reconnect_attempts = 0;
++ queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, 0);
+ }
+ if (changed || old_state == RTRS_CLT_RECONNECTING) {
+ /*
+@@ -2888,15 +2903,15 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess)
+ * execution, so do the flush if we have queued something
+ * right now or work is pending.
+ */
+- flush_delayed_work(&sess->reconnect_dwork);
+- err = (READ_ONCE(sess->state) ==
++ flush_delayed_work(&clt_path->reconnect_dwork);
++ err = (READ_ONCE(clt_path->state) ==
+ RTRS_CLT_CONNECTED ? 0 : -ENOTCONN);
+ }
+
+ return err;
+ }
+
+-int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess,
++int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *clt_path,
+ const struct attribute *sysfs_self)
+ {
+ enum rtrs_clt_state old_state;
+@@ -2912,16 +2927,16 @@ int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess,
+ * removing the path.
+ */
+ do {
+- rtrs_clt_close_conns(sess, true);
+- changed = rtrs_clt_change_state_get_old(sess,
++ rtrs_clt_close_conns(clt_path, true);
++ changed = rtrs_clt_change_state_get_old(clt_path,
+ RTRS_CLT_DEAD,
+ &old_state);
+ } while (!changed && old_state != RTRS_CLT_DEAD);
+
+ if (changed) {
+- rtrs_clt_remove_path_from_arr(sess);
+- rtrs_clt_destroy_sess_files(sess, sysfs_self);
+- kobject_put(&sess->kobj);
++ rtrs_clt_remove_path_from_arr(clt_path);
++ rtrs_clt_destroy_path_files(clt_path, sysfs_self);
++ kobject_put(&clt_path->kobj);
+ }
+
+ return 0;
+@@ -2967,7 +2982,7 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
+ struct scatterlist *sg, unsigned int sg_cnt)
+ {
+ struct rtrs_clt_io_req *req;
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+
+ enum dma_data_direction dma_dir;
+ int err = -ECONNABORTED, i;
+@@ -2989,19 +3004,19 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
+
+ rcu_read_lock();
+ for (path_it_init(&it, clt);
+- (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
++ (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
++ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
+ continue;
+
+- if (usr_len + hdr_len > sess->max_hdr_size) {
+- rtrs_wrn_rl(sess->clt,
++ if (usr_len + hdr_len > clt_path->max_hdr_size) {
++ rtrs_wrn_rl(clt_path->clt,
+ "%s request failed, user message size is %zu and header length %zu, but max size is %u\n",
+ dir == READ ? "Read" : "Write",
+- usr_len, hdr_len, sess->max_hdr_size);
++ usr_len, hdr_len, clt_path->max_hdr_size);
+ err = -EMSGSIZE;
+ break;
+ }
+- req = rtrs_clt_get_req(sess, ops->conf_fn, permit, ops->priv,
++ req = rtrs_clt_get_req(clt_path, ops->conf_fn, permit, ops->priv,
+ vec, usr_len, sg, sg_cnt, data_len,
+ dma_dir);
+ if (dir == READ)
+@@ -3027,16 +3042,16 @@ int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
+ /* If no path, return -1 for block layer not to try again */
+ int cnt = -1;
+ struct rtrs_con *con;
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ struct path_it it;
+
+ rcu_read_lock();
+ for (path_it_init(&it, clt);
+- (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+- if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
++ (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
++ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED)
+ continue;
+
+- con = sess->s.con[index + 1];
++ con = clt_path->s.con[index + 1];
+ cnt = ib_process_cq_direct(con->cq, -1);
+ if (cnt)
+ break;
+@@ -3074,12 +3089,12 @@ EXPORT_SYMBOL(rtrs_clt_query);
+ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
+ struct rtrs_addr *addr)
+ {
+- struct rtrs_clt_sess *sess;
++ struct rtrs_clt_path *clt_path;
+ int err;
+
+- sess = alloc_sess(clt, addr, nr_cpu_ids, 0);
+- if (IS_ERR(sess))
+- return PTR_ERR(sess);
++ clt_path = alloc_path(clt, addr, nr_cpu_ids, 0);
++ if (IS_ERR(clt_path))
++ return PTR_ERR(clt_path);
+
+ mutex_lock(&clt->paths_mutex);
+ if (clt->paths_num == 0) {
+@@ -3088,7 +3103,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
+ * the addition of the first path is like a new session for
+ * the storage server
+ */
+- sess->for_new_clt = 1;
++ clt_path->for_new_clt = 1;
+ }
+
+ mutex_unlock(&clt->paths_mutex);
+@@ -3098,24 +3113,24 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
+ * IO will never grab it. Also it is very important to add
+ * path before init, since init fires LINK_CONNECTED event.
+ */
+- rtrs_clt_add_path_to_arr(sess);
++ rtrs_clt_add_path_to_arr(clt_path);
+
+- err = init_sess(sess);
++ err = init_path(clt_path);
+ if (err)
+- goto close_sess;
++ goto close_path;
+
+- err = rtrs_clt_create_sess_files(sess);
++ err = rtrs_clt_create_path_files(clt_path);
+ if (err)
+- goto close_sess;
++ goto close_path;
+
+ return 0;
+
+-close_sess:
+- rtrs_clt_remove_path_from_arr(sess);
+- rtrs_clt_close_conns(sess, true);
+- free_percpu(sess->stats->pcpu_stats);
+- kfree(sess->stats);
+- free_sess(sess);
++close_path:
++ rtrs_clt_remove_path_from_arr(clt_path);
++ rtrs_clt_close_conns(clt_path, true);
++ free_percpu(clt_path->stats->pcpu_stats);
++ kfree(clt_path->stats);
++ free_path(clt_path);
+
+ return err;
+ }
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+index 9dc819885ec71..7f2a64995fb61 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
++++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+@@ -124,8 +124,8 @@ struct rtrs_rbuf {
+ u32 rkey;
+ };
+
+-struct rtrs_clt_sess {
+- struct rtrs_sess s;
++struct rtrs_clt_path {
++ struct rtrs_path s;
+ struct rtrs_clt *clt;
+ wait_queue_head_t state_wq;
+ enum rtrs_clt_state state;
+@@ -156,7 +156,7 @@ struct rtrs_clt_sess {
+ struct rtrs_clt {
+ struct list_head paths_list; /* rcu protected list */
+ size_t paths_num;
+- struct rtrs_clt_sess
++ struct rtrs_clt_path
+ __rcu * __percpu *pcpu_path;
+ uuid_t paths_uuid;
+ int paths_up;
+@@ -186,9 +186,9 @@ static inline struct rtrs_clt_con *to_clt_con(struct rtrs_con *c)
+ return container_of(c, struct rtrs_clt_con, c);
+ }
+
+-static inline struct rtrs_clt_sess *to_clt_sess(struct rtrs_sess *s)
++static inline struct rtrs_clt_path *to_clt_path(struct rtrs_path *s)
+ {
+- return container_of(s, struct rtrs_clt_sess, s);
++ return container_of(s, struct rtrs_clt_path, s);
+ }
+
+ static inline int permit_size(struct rtrs_clt *clt)
+@@ -201,16 +201,16 @@ static inline struct rtrs_permit *get_permit(struct rtrs_clt *clt, int idx)
+ return (struct rtrs_permit *)(clt->permits + permit_size(clt) * idx);
+ }
+
+-int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess);
+-void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait);
++int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *path);
++void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait);
+ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
+ struct rtrs_addr *addr);
+-int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess,
++int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *path,
+ const struct attribute *sysfs_self);
+
+ void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value);
+ int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt);
+-void free_sess(struct rtrs_clt_sess *sess);
++void free_path(struct rtrs_clt_path *clt_path);
+
+ /* rtrs-clt-stats.c */
+
+@@ -243,8 +243,8 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats,
+ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt);
+ void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt);
+
+-int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess);
+-void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess,
++int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path);
++void rtrs_clt_destroy_path_files(struct rtrs_clt_path *clt_path,
+ const struct attribute *sysfs_self);
+
+ #endif /* RTRS_CLT_H */
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+index d12ddfa507479..b69fa1fe9a707 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
++++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+@@ -23,6 +23,17 @@
+ #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
+ __stringify(RTRS_PROTO_VER_MINOR)
+
++/*
++ * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
++ * and the minimum chunk size is 4096 (2^12).
++ * So the maximum sess_queue_depth is 65536 (2^16) in theory.
++ * But mempool_create, create_qp and ib_post_send fail with
++ * "cannot allocate memory" error if sess_queue_depth is too big.
++ * Therefore the pratical max value of sess_queue_depth is
++ * somewhere between 1 and 65534 and it depends on the system.
++ */
++#define MAX_SESS_QUEUE_DEPTH 65535
++
+ enum rtrs_imm_const {
+ MAX_IMM_TYPE_BITS = 4,
+ MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
+@@ -46,16 +57,6 @@ enum {
+
+ MAX_PATHS_NUM = 128,
+
+- /*
+- * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
+- * and the minimum chunk size is 4096 (2^12).
+- * So the maximum sess_queue_depth is 65536 (2^16) in theory.
+- * But mempool_create, create_qp and ib_post_send fail with
+- * "cannot allocate memory" error if sess_queue_depth is too big.
+- * Therefore the pratical max value of sess_queue_depth is
+- * somewhere between 1 and 65534 and it depends on the system.
+- */
+- MAX_SESS_QUEUE_DEPTH = 65535,
+ MIN_CHUNK_SIZE = 8192,
+
+ RTRS_HB_INTERVAL_MS = 5000,
+@@ -90,7 +91,7 @@ struct rtrs_ib_dev {
+ };
+
+ struct rtrs_con {
+- struct rtrs_sess *sess;
++ struct rtrs_path *path;
+ struct ib_qp *qp;
+ struct ib_cq *cq;
+ struct rdma_cm_id *cm_id;
+@@ -100,7 +101,7 @@ struct rtrs_con {
+ atomic_t sq_wr_avail;
+ };
+
+-struct rtrs_sess {
++struct rtrs_path {
+ struct list_head entry;
+ struct sockaddr_storage dst_addr;
+ struct sockaddr_storage src_addr;
+@@ -229,11 +230,11 @@ struct rtrs_msg_conn_rsp {
+ /**
+ * struct rtrs_msg_info_req
+ * @type: @RTRS_MSG_INFO_REQ
+- * @sessname: Session name chosen by client
++ * @pathname: Path name chosen by client
+ */
+ struct rtrs_msg_info_req {
+ __le16 type;
+- u8 sessname[NAME_MAX];
++ u8 pathname[NAME_MAX];
+ u8 reserved[15];
+ };
+
+@@ -313,19 +314,19 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
+
+ int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
+
+-int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
++int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
+ u32 max_send_sge, int cq_vector, int nr_cqe,
+ u32 max_send_wr, u32 max_recv_wr,
+ enum ib_poll_context poll_ctx);
+ void rtrs_cq_qp_destroy(struct rtrs_con *con);
+
+-void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
++void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
+ unsigned int interval_ms, unsigned int missed_max,
+ void (*err_handler)(struct rtrs_con *con),
+ struct workqueue_struct *wq);
+-void rtrs_start_hb(struct rtrs_sess *sess);
+-void rtrs_stop_hb(struct rtrs_sess *sess);
+-void rtrs_send_hb_ack(struct rtrs_sess *sess);
++void rtrs_start_hb(struct rtrs_path *path);
++void rtrs_stop_hb(struct rtrs_path *path);
++void rtrs_send_hb_ack(struct rtrs_path *path);
+
+ void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
+ struct rtrs_rdma_dev_pd *pool);
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+index 20efd44297fbb..309080184aac7 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+@@ -15,10 +15,10 @@
+
+ static void rtrs_srv_release(struct kobject *kobj)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+
+- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
+- kfree(sess);
++ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
++ kfree(srv_path);
+ }
+
+ static struct kobj_type ktype = {
+@@ -36,24 +36,25 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+ {
+- struct rtrs_srv_sess *sess;
+- struct rtrs_sess *s;
++ struct rtrs_srv_path *srv_path;
++ struct rtrs_path *s;
+ char str[MAXHOSTNAMELEN];
+
+- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
+- s = &sess->s;
++ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
++ s = &srv_path->s;
+ if (!sysfs_streq(buf, "1")) {
+ rtrs_err(s, "%s: invalid value: '%s'\n",
+ attr->attr.name, buf);
+ return -EINVAL;
+ }
+
+- sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str));
++ sockaddr_to_str((struct sockaddr *)&srv_path->s.dst_addr, str,
++ sizeof(str));
+
+ rtrs_info(s, "disconnect for path %s requested\n", str);
+ /* first remove sysfs itself to avoid deadlock */
+- sysfs_remove_file_self(&sess->kobj, &attr->attr);
+- close_sess(sess);
++ sysfs_remove_file_self(&srv_path->kobj, &attr->attr);
++ close_path(srv_path);
+
+ return count;
+ }
+@@ -66,11 +67,11 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ struct rtrs_con *usr_con;
+
+- sess = container_of(kobj, typeof(*sess), kobj);
+- usr_con = sess->s.con[0];
++ srv_path = container_of(kobj, typeof(*srv_path), kobj);
++ usr_con = srv_path->s.con[0];
+
+ return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num);
+ }
+@@ -82,11 +83,11 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+
+- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
++ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
+
+- return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name);
++ return sysfs_emit(page, "%s\n", srv_path->s.dev->ib_dev->name);
+ }
+
+ static struct kobj_attribute rtrs_srv_hca_name_attr =
+@@ -96,11 +97,11 @@ static ssize_t rtrs_srv_src_addr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ int cnt;
+
+- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
+- cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr,
++ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
++ cnt = sockaddr_to_str((struct sockaddr *)&srv_path->s.dst_addr,
+ page, PAGE_SIZE);
+ return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n");
+ }
+@@ -112,11 +113,11 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *page)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ int len;
+
+- sess = container_of(kobj, struct rtrs_srv_sess, kobj);
+- len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page,
++ srv_path = container_of(kobj, struct rtrs_srv_path, kobj);
++ len = sockaddr_to_str((struct sockaddr *)&srv_path->s.src_addr, page,
+ PAGE_SIZE);
+ len += sysfs_emit_at(page, len, "\n");
+ return len;
+@@ -125,7 +126,7 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj,
+ static struct kobj_attribute rtrs_srv_dst_addr_attr =
+ __ATTR(dst_addr, 0444, rtrs_srv_dst_addr_show, NULL);
+
+-static struct attribute *rtrs_srv_sess_attrs[] = {
++static struct attribute *rtrs_srv_path_attrs[] = {
+ &rtrs_srv_hca_name_attr.attr,
+ &rtrs_srv_hca_port_attr.attr,
+ &rtrs_srv_src_addr_attr.attr,
+@@ -134,8 +135,8 @@ static struct attribute *rtrs_srv_sess_attrs[] = {
+ NULL,
+ };
+
+-static const struct attribute_group rtrs_srv_sess_attr_group = {
+- .attrs = rtrs_srv_sess_attrs,
++static const struct attribute_group rtrs_srv_path_attr_group = {
++ .attrs = rtrs_srv_path_attrs,
+ };
+
+ STAT_ATTR(struct rtrs_srv_stats, rdma,
+@@ -151,9 +152,9 @@ static const struct attribute_group rtrs_srv_stats_attr_group = {
+ .attrs = rtrs_srv_stats_attrs,
+ };
+
+-static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
++static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+ int err = 0;
+
+ mutex_lock(&srv->paths_mutex);
+@@ -164,7 +165,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
+ goto unlock;
+ }
+ srv->dev.class = rtrs_dev_class;
+- err = dev_set_name(&srv->dev, "%s", sess->s.sessname);
++ err = dev_set_name(&srv->dev, "%s", srv_path->s.sessname);
+ if (err)
+ goto unlock;
+
+@@ -196,9 +197,9 @@ unlock:
+ }
+
+ static void
+-rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
++rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+
+ mutex_lock(&srv->paths_mutex);
+ if (!--srv->dev_ref) {
+@@ -213,7 +214,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess)
+ }
+ }
+
+-static void rtrs_srv_sess_stats_release(struct kobject *kobj)
++static void rtrs_srv_path_stats_release(struct kobject *kobj)
+ {
+ struct rtrs_srv_stats *stats;
+
+@@ -224,22 +225,22 @@ static void rtrs_srv_sess_stats_release(struct kobject *kobj)
+
+ static struct kobj_type ktype_stats = {
+ .sysfs_ops = &kobj_sysfs_ops,
+- .release = rtrs_srv_sess_stats_release,
++ .release = rtrs_srv_path_stats_release,
+ };
+
+-static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess)
++static int rtrs_srv_create_stats_files(struct rtrs_srv_path *srv_path)
+ {
+ int err;
+- struct rtrs_sess *s = &sess->s;
++ struct rtrs_path *s = &srv_path->s;
+
+- err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats,
+- &sess->kobj, "stats");
++ err = kobject_init_and_add(&srv_path->stats->kobj_stats, &ktype_stats,
++ &srv_path->kobj, "stats");
+ if (err) {
+ rtrs_err(s, "kobject_init_and_add(): %d\n", err);
+- kobject_put(&sess->stats->kobj_stats);
++ kobject_put(&srv_path->stats->kobj_stats);
+ return err;
+ }
+- err = sysfs_create_group(&sess->stats->kobj_stats,
++ err = sysfs_create_group(&srv_path->stats->kobj_stats,
+ &rtrs_srv_stats_attr_group);
+ if (err) {
+ rtrs_err(s, "sysfs_create_group(): %d\n", err);
+@@ -249,64 +250,64 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess)
+ return 0;
+
+ err:
+- kobject_del(&sess->stats->kobj_stats);
+- kobject_put(&sess->stats->kobj_stats);
++ kobject_del(&srv_path->stats->kobj_stats);
++ kobject_put(&srv_path->stats->kobj_stats);
+
+ return err;
+ }
+
+-int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess)
++int rtrs_srv_create_path_files(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
+- struct rtrs_sess *s = &sess->s;
++ struct rtrs_srv *srv = srv_path->srv;
++ struct rtrs_path *s = &srv_path->s;
+ char str[NAME_MAX];
+ int err;
+ struct rtrs_addr path = {
+- .src = &sess->s.dst_addr,
+- .dst = &sess->s.src_addr,
++ .src = &srv_path->s.dst_addr,
++ .dst = &srv_path->s.src_addr,
+ };
+
+ rtrs_addr_to_str(&path, str, sizeof(str));
+- err = rtrs_srv_create_once_sysfs_root_folders(sess);
++ err = rtrs_srv_create_once_sysfs_root_folders(srv_path);
+ if (err)
+ return err;
+
+- err = kobject_init_and_add(&sess->kobj, &ktype, srv->kobj_paths,
++ err = kobject_init_and_add(&srv_path->kobj, &ktype, srv->kobj_paths,
+ "%s", str);
+ if (err) {
+ rtrs_err(s, "kobject_init_and_add(): %d\n", err);
+ goto destroy_root;
+ }
+- err = sysfs_create_group(&sess->kobj, &rtrs_srv_sess_attr_group);
++ err = sysfs_create_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
+ if (err) {
+ rtrs_err(s, "sysfs_create_group(): %d\n", err);
+ goto put_kobj;
+ }
+- err = rtrs_srv_create_stats_files(sess);
++ err = rtrs_srv_create_stats_files(srv_path);
+ if (err)
+ goto remove_group;
+
+ return 0;
+
+ remove_group:
+- sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group);
++ sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
+ put_kobj:
+- kobject_del(&sess->kobj);
++ kobject_del(&srv_path->kobj);
+ destroy_root:
+- kobject_put(&sess->kobj);
+- rtrs_srv_destroy_once_sysfs_root_folders(sess);
++ kobject_put(&srv_path->kobj);
++ rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
+
+ return err;
+ }
+
+-void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess)
++void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path)
+ {
+- if (sess->kobj.state_in_sysfs) {
+- kobject_del(&sess->stats->kobj_stats);
+- kobject_put(&sess->stats->kobj_stats);
+- sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group);
+- kobject_put(&sess->kobj);
++ if (srv_path->kobj.state_in_sysfs) {
++ kobject_del(&srv_path->stats->kobj_stats);
++ kobject_put(&srv_path->stats->kobj_stats);
++ sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
++ kobject_put(&srv_path->kobj);
+
+- rtrs_srv_destroy_once_sysfs_root_folders(sess);
++ rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
+ }
+ }
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+index 716ef7b235587..733116554e0bc 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+@@ -62,19 +62,19 @@ static inline struct rtrs_srv_con *to_srv_con(struct rtrs_con *c)
+ return container_of(c, struct rtrs_srv_con, c);
+ }
+
+-static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s)
++static inline struct rtrs_srv_path *to_srv_path(struct rtrs_path *s)
+ {
+- return container_of(s, struct rtrs_srv_sess, s);
++ return container_of(s, struct rtrs_srv_path, s);
+ }
+
+-static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess,
++static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
+ enum rtrs_srv_state new_state)
+ {
+ enum rtrs_srv_state old_state;
+ bool changed = false;
+
+- spin_lock_irq(&sess->state_lock);
+- old_state = sess->state;
++ spin_lock_irq(&srv_path->state_lock);
++ old_state = srv_path->state;
+ switch (new_state) {
+ case RTRS_SRV_CONNECTED:
+ if (old_state == RTRS_SRV_CONNECTING)
+@@ -93,8 +93,8 @@ static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess,
+ break;
+ }
+ if (changed)
+- sess->state = new_state;
+- spin_unlock_irq(&sess->state_lock);
++ srv_path->state = new_state;
++ spin_unlock_irq(&srv_path->state_lock);
+
+ return changed;
+ }
+@@ -106,16 +106,16 @@ static void free_id(struct rtrs_srv_op *id)
+ kfree(id);
+ }
+
+-static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess)
++static void rtrs_srv_free_ops_ids(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+ int i;
+
+- if (sess->ops_ids) {
++ if (srv_path->ops_ids) {
+ for (i = 0; i < srv->queue_depth; i++)
+- free_id(sess->ops_ids[i]);
+- kfree(sess->ops_ids);
+- sess->ops_ids = NULL;
++ free_id(srv_path->ops_ids[i]);
++ kfree(srv_path->ops_ids);
++ srv_path->ops_ids = NULL;
+ }
+ }
+
+@@ -127,21 +127,24 @@ static struct ib_cqe io_comp_cqe = {
+
+ static inline void rtrs_srv_inflight_ref_release(struct percpu_ref *ref)
+ {
+- struct rtrs_srv_sess *sess = container_of(ref, struct rtrs_srv_sess, ids_inflight_ref);
++ struct rtrs_srv_path *srv_path = container_of(ref,
++ struct rtrs_srv_path,
++ ids_inflight_ref);
+
+- percpu_ref_exit(&sess->ids_inflight_ref);
+- complete(&sess->complete_done);
++ percpu_ref_exit(&srv_path->ids_inflight_ref);
++ complete(&srv_path->complete_done);
+ }
+
+-static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess)
++static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+ struct rtrs_srv_op *id;
+ int i, ret;
+
+- sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids),
+- GFP_KERNEL);
+- if (!sess->ops_ids)
++ srv_path->ops_ids = kcalloc(srv->queue_depth,
++ sizeof(*srv_path->ops_ids),
++ GFP_KERNEL);
++ if (!srv_path->ops_ids)
+ goto err;
+
+ for (i = 0; i < srv->queue_depth; ++i) {
+@@ -149,44 +152,44 @@ static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess)
+ if (!id)
+ goto err;
+
+- sess->ops_ids[i] = id;
++ srv_path->ops_ids[i] = id;
+ }
+
+- ret = percpu_ref_init(&sess->ids_inflight_ref,
++ ret = percpu_ref_init(&srv_path->ids_inflight_ref,
+ rtrs_srv_inflight_ref_release, 0, GFP_KERNEL);
+ if (ret) {
+ pr_err("Percpu reference init failed\n");
+ goto err;
+ }
+- init_completion(&sess->complete_done);
++ init_completion(&srv_path->complete_done);
+
+ return 0;
+
+ err:
+- rtrs_srv_free_ops_ids(sess);
++ rtrs_srv_free_ops_ids(srv_path);
+ return -ENOMEM;
+ }
+
+-static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess)
++static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_path *srv_path)
+ {
+- percpu_ref_get(&sess->ids_inflight_ref);
++ percpu_ref_get(&srv_path->ids_inflight_ref);
+ }
+
+-static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess)
++static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_path *srv_path)
+ {
+- percpu_ref_put(&sess->ids_inflight_ref);
++ percpu_ref_put(&srv_path->ids_inflight_ref);
+ }
+
+ static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+ struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ rtrs_err(s, "REG MR failed: %s\n",
+ ib_wc_status_msg(wc->status));
+- close_sess(sess);
++ close_path(srv_path);
+ return;
+ }
+ }
+@@ -197,9 +200,9 @@ static struct ib_cqe local_reg_cqe = {
+
+ static int rdma_write_sg(struct rtrs_srv_op *id)
+ {
+- struct rtrs_sess *s = id->con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
+- dma_addr_t dma_addr = sess->dma_addr[id->msg_id];
++ struct rtrs_path *s = id->con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
++ dma_addr_t dma_addr = srv_path->dma_addr[id->msg_id];
+ struct rtrs_srv_mr *srv_mr;
+ struct ib_send_wr inv_wr;
+ struct ib_rdma_wr imm_wr;
+@@ -233,7 +236,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
+ return -EINVAL;
+ }
+
+- plist->lkey = sess->s.dev->ib_pd->local_dma_lkey;
++ plist->lkey = srv_path->s.dev->ib_pd->local_dma_lkey;
+ offset += plist->length;
+
+ wr->wr.sg_list = plist;
+@@ -284,7 +287,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
+ if (always_invalidate) {
+ struct rtrs_msg_rkey_rsp *msg;
+
+- srv_mr = &sess->mrs[id->msg_id];
++ srv_mr = &srv_path->mrs[id->msg_id];
+ rwr.wr.opcode = IB_WR_REG_MR;
+ rwr.wr.wr_cqe = &local_reg_cqe;
+ rwr.wr.num_sge = 0;
+@@ -300,11 +303,11 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
+
+ list.addr = srv_mr->iu->dma_addr;
+ list.length = sizeof(*msg);
+- list.lkey = sess->s.dev->ib_pd->local_dma_lkey;
++ list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey;
+ imm_wr.wr.sg_list = &list;
+ imm_wr.wr.num_sge = 1;
+ imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM;
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev,
++ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
+ srv_mr->iu->dma_addr,
+ srv_mr->iu->size, DMA_TO_DEVICE);
+ } else {
+@@ -317,7 +320,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
+ 0, need_inval));
+
+ imm_wr.wr.wr_cqe = &io_comp_cqe;
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr,
++ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, dma_addr,
+ offset, DMA_BIDIRECTIONAL);
+
+ err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL);
+@@ -341,8 +344,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
+ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
+ int errno)
+ {
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct ib_send_wr inv_wr, *wr = NULL;
+ struct ib_rdma_wr imm_wr;
+ struct ib_reg_wr rwr;
+@@ -402,7 +405,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
+ struct ib_sge list;
+ struct rtrs_msg_rkey_rsp *msg;
+
+- srv_mr = &sess->mrs[id->msg_id];
++ srv_mr = &srv_path->mrs[id->msg_id];
+ rwr.wr.next = &imm_wr.wr;
+ rwr.wr.opcode = IB_WR_REG_MR;
+ rwr.wr.wr_cqe = &local_reg_cqe;
+@@ -419,11 +422,11 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
+
+ list.addr = srv_mr->iu->dma_addr;
+ list.length = sizeof(*msg);
+- list.lkey = sess->s.dev->ib_pd->local_dma_lkey;
++ list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey;
+ imm_wr.wr.sg_list = &list;
+ imm_wr.wr.num_sge = 1;
+ imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM;
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev,
++ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
+ srv_mr->iu->dma_addr,
+ srv_mr->iu->size, DMA_TO_DEVICE);
+ } else {
+@@ -444,11 +447,11 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
+ return err;
+ }
+
+-void close_sess(struct rtrs_srv_sess *sess)
++void close_path(struct rtrs_srv_path *srv_path)
+ {
+- if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING))
+- queue_work(rtrs_wq, &sess->close_work);
+- WARN_ON(sess->state != RTRS_SRV_CLOSING);
++ if (rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSING))
++ queue_work(rtrs_wq, &srv_path->close_work);
++ WARN_ON(srv_path->state != RTRS_SRV_CLOSING);
+ }
+
+ static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state)
+@@ -480,35 +483,35 @@ static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state)
+ */
+ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ struct rtrs_srv_con *con;
+- struct rtrs_sess *s;
++ struct rtrs_path *s;
+ int err;
+
+ if (WARN_ON(!id))
+ return true;
+
+ con = id->con;
+- s = con->c.sess;
+- sess = to_srv_sess(s);
++ s = con->c.path;
++ srv_path = to_srv_path(s);
+
+ id->status = status;
+
+- if (sess->state != RTRS_SRV_CONNECTED) {
++ if (srv_path->state != RTRS_SRV_CONNECTED) {
+ rtrs_err_rl(s,
+- "Sending I/O response failed, session %s is disconnected, sess state %s\n",
+- kobject_name(&sess->kobj),
+- rtrs_srv_state_str(sess->state));
++ "Sending I/O response failed, server path %s is disconnected, path state %s\n",
++ kobject_name(&srv_path->kobj),
++ rtrs_srv_state_str(srv_path->state));
+ goto out;
+ }
+ if (always_invalidate) {
+- struct rtrs_srv_mr *mr = &sess->mrs[id->msg_id];
++ struct rtrs_srv_mr *mr = &srv_path->mrs[id->msg_id];
+
+ ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey));
+ }
+ if (atomic_sub_return(1, &con->c.sq_wr_avail) < 0) {
+- rtrs_err(s, "IB send queue full: sess=%s cid=%d\n",
+- kobject_name(&sess->kobj),
++ rtrs_err(s, "IB send queue full: srv_path=%s cid=%d\n",
++ kobject_name(&srv_path->kobj),
+ con->c.cid);
+ atomic_add(1, &con->c.sq_wr_avail);
+ spin_lock(&con->rsp_wr_wait_lock);
+@@ -523,12 +526,12 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
+ err = rdma_write_sg(id);
+
+ if (err) {
+- rtrs_err_rl(s, "IO response failed: %d: sess=%s\n", err,
+- kobject_name(&sess->kobj));
+- close_sess(sess);
++ rtrs_err_rl(s, "IO response failed: %d: srv_path=%s\n", err,
++ kobject_name(&srv_path->kobj));
++ close_path(srv_path);
+ }
+ out:
+- rtrs_srv_put_ops_ids(sess);
++ rtrs_srv_put_ops_ids(srv_path);
+ return true;
+ }
+ EXPORT_SYMBOL(rtrs_srv_resp_rdma);
+@@ -544,27 +547,27 @@ void rtrs_srv_set_sess_priv(struct rtrs_srv *srv, void *priv)
+ }
+ EXPORT_SYMBOL(rtrs_srv_set_sess_priv);
+
+-static void unmap_cont_bufs(struct rtrs_srv_sess *sess)
++static void unmap_cont_bufs(struct rtrs_srv_path *srv_path)
+ {
+ int i;
+
+- for (i = 0; i < sess->mrs_num; i++) {
++ for (i = 0; i < srv_path->mrs_num; i++) {
+ struct rtrs_srv_mr *srv_mr;
+
+- srv_mr = &sess->mrs[i];
+- rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
++ srv_mr = &srv_path->mrs[i];
++ rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
+ ib_dereg_mr(srv_mr->mr);
+- ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl,
++ ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl,
+ srv_mr->sgt.nents, DMA_BIDIRECTIONAL);
+ sg_free_table(&srv_mr->sgt);
+ }
+- kfree(sess->mrs);
++ kfree(srv_path->mrs);
+ }
+
+-static int map_cont_bufs(struct rtrs_srv_sess *sess)
++static int map_cont_bufs(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
+- struct rtrs_sess *ss = &sess->s;
++ struct rtrs_srv *srv = srv_path->srv;
++ struct rtrs_path *ss = &srv_path->s;
+ int i, mri, err, mrs_num;
+ unsigned int chunk_bits;
+ int chunks_per_mr = 1;
+@@ -581,23 +584,23 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
+ mrs_num = srv->queue_depth;
+ } else {
+ chunks_per_mr =
+- sess->s.dev->ib_dev->attrs.max_fast_reg_page_list_len;
++ srv_path->s.dev->ib_dev->attrs.max_fast_reg_page_list_len;
+ mrs_num = DIV_ROUND_UP(srv->queue_depth, chunks_per_mr);
+ chunks_per_mr = DIV_ROUND_UP(srv->queue_depth, mrs_num);
+ }
+
+- sess->mrs = kcalloc(mrs_num, sizeof(*sess->mrs), GFP_KERNEL);
+- if (!sess->mrs)
++ srv_path->mrs = kcalloc(mrs_num, sizeof(*srv_path->mrs), GFP_KERNEL);
++ if (!srv_path->mrs)
+ return -ENOMEM;
+
+- sess->mrs_num = mrs_num;
++ srv_path->mrs_num = mrs_num;
+
+ for (mri = 0; mri < mrs_num; mri++) {
+- struct rtrs_srv_mr *srv_mr = &sess->mrs[mri];
++ struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri];
+ struct sg_table *sgt = &srv_mr->sgt;
+ struct scatterlist *s;
+ struct ib_mr *mr;
+- int nr, chunks;
++ int nr, nr_sgt, chunks;
+
+ chunks = chunks_per_mr * mri;
+ if (!always_invalidate)
+@@ -612,19 +615,19 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
+ sg_set_page(s, srv->chunks[chunks + i],
+ max_chunk_size, 0);
+
+- nr = ib_dma_map_sg(sess->s.dev->ib_dev, sgt->sgl,
++ nr_sgt = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl,
+ sgt->nents, DMA_BIDIRECTIONAL);
+- if (nr < sgt->nents) {
+- err = nr < 0 ? nr : -EINVAL;
++ if (!nr_sgt) {
++ err = -EINVAL;
+ goto free_sg;
+ }
+- mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
+- sgt->nents);
++ mr = ib_alloc_mr(srv_path->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
++ nr_sgt);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ goto unmap_sg;
+ }
+- nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents,
++ nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
+ NULL, max_chunk_size);
+ if (nr < 0 || nr < sgt->nents) {
+ err = nr < 0 ? nr : -EINVAL;
+@@ -634,7 +637,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
+ if (always_invalidate) {
+ srv_mr->iu = rtrs_iu_alloc(1,
+ sizeof(struct rtrs_msg_rkey_rsp),
+- GFP_KERNEL, sess->s.dev->ib_dev,
++ GFP_KERNEL, srv_path->s.dev->ib_dev,
+ DMA_TO_DEVICE, rtrs_srv_rdma_done);
+ if (!srv_mr->iu) {
+ err = -ENOMEM;
+@@ -643,8 +646,8 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
+ }
+ }
+ /* Eventually dma addr for each chunk can be cached */
+- for_each_sg(sgt->sgl, s, sgt->orig_nents, i)
+- sess->dma_addr[chunks + i] = sg_dma_address(s);
++ for_each_sg(sgt->sgl, s, nr_sgt, i)
++ srv_path->dma_addr[chunks + i] = sg_dma_address(s);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
+ srv_mr->mr = mr;
+@@ -652,75 +655,75 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess)
+ continue;
+ err:
+ while (mri--) {
+- srv_mr = &sess->mrs[mri];
++ srv_mr = &srv_path->mrs[mri];
+ sgt = &srv_mr->sgt;
+ mr = srv_mr->mr;
+- rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
+ dereg_mr:
+ ib_dereg_mr(mr);
+ unmap_sg:
+- ib_dma_unmap_sg(sess->s.dev->ib_dev, sgt->sgl,
++ ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
+ sgt->nents, DMA_BIDIRECTIONAL);
+ free_sg:
+ sg_free_table(sgt);
+ }
+- kfree(sess->mrs);
++ kfree(srv_path->mrs);
+
+ return err;
+ }
+
+ chunk_bits = ilog2(srv->queue_depth - 1) + 1;
+- sess->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits);
++ srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits);
+
+ return 0;
+ }
+
+ static void rtrs_srv_hb_err_handler(struct rtrs_con *c)
+ {
+- close_sess(to_srv_sess(c->sess));
++ close_path(to_srv_path(c->path));
+ }
+
+-static void rtrs_srv_init_hb(struct rtrs_srv_sess *sess)
++static void rtrs_srv_init_hb(struct rtrs_srv_path *srv_path)
+ {
+- rtrs_init_hb(&sess->s, &io_comp_cqe,
++ rtrs_init_hb(&srv_path->s, &io_comp_cqe,
+ RTRS_HB_INTERVAL_MS,
+ RTRS_HB_MISSED_MAX,
+ rtrs_srv_hb_err_handler,
+ rtrs_wq);
+ }
+
+-static void rtrs_srv_start_hb(struct rtrs_srv_sess *sess)
++static void rtrs_srv_start_hb(struct rtrs_srv_path *srv_path)
+ {
+- rtrs_start_hb(&sess->s);
++ rtrs_start_hb(&srv_path->s);
+ }
+
+-static void rtrs_srv_stop_hb(struct rtrs_srv_sess *sess)
++static void rtrs_srv_stop_hb(struct rtrs_srv_path *srv_path)
+ {
+- rtrs_stop_hb(&sess->s);
++ rtrs_stop_hb(&srv_path->s);
+ }
+
+ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+ struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_iu *iu;
+
+ iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
+- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ rtrs_err(s, "Sess info response send failed: %s\n",
+ ib_wc_status_msg(wc->status));
+- close_sess(sess);
++ close_path(srv_path);
+ return;
+ }
+ WARN_ON(wc->opcode != IB_WC_SEND);
+ }
+
+-static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess)
++static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+ struct rtrs_srv_ctx *ctx = srv->ctx;
+ int up;
+
+@@ -731,18 +734,18 @@ static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess)
+ mutex_unlock(&srv->paths_ev_mutex);
+
+ /* Mark session as established */
+- sess->established = true;
++ srv_path->established = true;
+ }
+
+-static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess)
++static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+ struct rtrs_srv_ctx *ctx = srv->ctx;
+
+- if (!sess->established)
++ if (!srv_path->established)
+ return;
+
+- sess->established = false;
++ srv_path->established = false;
+ mutex_lock(&srv->paths_ev_mutex);
+ WARN_ON(!srv->paths_up);
+ if (--srv->paths_up == 0)
+@@ -750,11 +753,11 @@ static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess)
+ mutex_unlock(&srv->paths_ev_mutex);
+ }
+
+-static bool exist_sessname(struct rtrs_srv_ctx *ctx,
+- const char *sessname, const uuid_t *path_uuid)
++static bool exist_pathname(struct rtrs_srv_ctx *ctx,
++ const char *pathname, const uuid_t *path_uuid)
+ {
+ struct rtrs_srv *srv;
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ bool found = false;
+
+ mutex_lock(&ctx->srv_mutex);
+@@ -767,9 +770,9 @@ static bool exist_sessname(struct rtrs_srv_ctx *ctx,
+ continue;
+ }
+
+- list_for_each_entry(sess, &srv->paths_list, s.entry) {
+- if (strlen(sess->s.sessname) == strlen(sessname) &&
+- !strcmp(sess->s.sessname, sessname)) {
++ list_for_each_entry(srv_path, &srv->paths_list, s.entry) {
++ if (strlen(srv_path->s.sessname) == strlen(pathname) &&
++ !strcmp(srv_path->s.sessname, pathname)) {
+ found = true;
+ break;
+ }
+@@ -782,14 +785,14 @@ static bool exist_sessname(struct rtrs_srv_ctx *ctx,
+ return found;
+ }
+
+-static int post_recv_sess(struct rtrs_srv_sess *sess);
++static int post_recv_path(struct rtrs_srv_path *srv_path);
+ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno);
+
+ static int process_info_req(struct rtrs_srv_con *con,
+ struct rtrs_msg_info_req *msg)
+ {
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct ib_send_wr *reg_wr = NULL;
+ struct rtrs_msg_info_rsp *rsp;
+ struct rtrs_iu *tx_iu;
+@@ -797,26 +800,32 @@ static int process_info_req(struct rtrs_srv_con *con,
+ int mri, err;
+ size_t tx_sz;
+
+- err = post_recv_sess(sess);
++ err = post_recv_path(srv_path);
+ if (err) {
+- rtrs_err(s, "post_recv_sess(), err: %d\n", err);
++ rtrs_err(s, "post_recv_path(), err: %d\n", err);
+ return err;
+ }
+
+- if (exist_sessname(sess->srv->ctx,
+- msg->sessname, &sess->srv->paths_uuid)) {
+- rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname);
++ if (strchr(msg->pathname, '/') || strchr(msg->pathname, '.')) {
++ rtrs_err(s, "pathname cannot contain / and .\n");
++ return -EINVAL;
++ }
++
++ if (exist_pathname(srv_path->srv->ctx,
++ msg->pathname, &srv_path->srv->paths_uuid)) {
++ rtrs_err(s, "pathname is duplicated: %s\n", msg->pathname);
+ return -EPERM;
+ }
+- strscpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname));
++ strscpy(srv_path->s.sessname, msg->pathname,
++ sizeof(srv_path->s.sessname));
+
+- rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL);
++ rwr = kcalloc(srv_path->mrs_num, sizeof(*rwr), GFP_KERNEL);
+ if (!rwr)
+ return -ENOMEM;
+
+ tx_sz = sizeof(*rsp);
+- tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num;
+- tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, sess->s.dev->ib_dev,
++ tx_sz += sizeof(rsp->desc[0]) * srv_path->mrs_num;
++ tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, srv_path->s.dev->ib_dev,
+ DMA_TO_DEVICE, rtrs_srv_info_rsp_done);
+ if (!tx_iu) {
+ err = -ENOMEM;
+@@ -825,10 +834,10 @@ static int process_info_req(struct rtrs_srv_con *con,
+
+ rsp = tx_iu->buf;
+ rsp->type = cpu_to_le16(RTRS_MSG_INFO_RSP);
+- rsp->sg_cnt = cpu_to_le16(sess->mrs_num);
++ rsp->sg_cnt = cpu_to_le16(srv_path->mrs_num);
+
+- for (mri = 0; mri < sess->mrs_num; mri++) {
+- struct ib_mr *mr = sess->mrs[mri].mr;
++ for (mri = 0; mri < srv_path->mrs_num; mri++) {
++ struct ib_mr *mr = srv_path->mrs[mri].mr;
+
+ rsp->desc[mri].addr = cpu_to_le64(mr->iova);
+ rsp->desc[mri].key = cpu_to_le32(mr->rkey);
+@@ -849,13 +858,13 @@ static int process_info_req(struct rtrs_srv_con *con,
+ reg_wr = &rwr[mri].wr;
+ }
+
+- err = rtrs_srv_create_sess_files(sess);
++ err = rtrs_srv_create_path_files(srv_path);
+ if (err)
+ goto iu_free;
+- kobject_get(&sess->kobj);
+- get_device(&sess->srv->dev);
+- rtrs_srv_change_state(sess, RTRS_SRV_CONNECTED);
+- rtrs_srv_start_hb(sess);
++ kobject_get(&srv_path->kobj);
++ get_device(&srv_path->srv->dev);
++ rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
++ rtrs_srv_start_hb(srv_path);
+
+ /*
+ * We do not account number of established connections at the current
+@@ -863,9 +872,10 @@ static int process_info_req(struct rtrs_srv_con *con,
+ * all connections are successfully established. Thus, simply notify
+ * listener with a proper event if we are the first path.
+ */
+- rtrs_srv_sess_up(sess);
++ rtrs_srv_path_up(srv_path);
+
+- ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr,
++ ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
++ tx_iu->dma_addr,
+ tx_iu->size, DMA_TO_DEVICE);
+
+ /* Send info response */
+@@ -873,7 +883,7 @@ static int process_info_req(struct rtrs_srv_con *con,
+ if (err) {
+ rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err);
+ iu_free:
+- rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(tx_iu, srv_path->s.dev->ib_dev, 1);
+ }
+ rwr_free:
+ kfree(rwr);
+@@ -884,8 +894,8 @@ rwr_free:
+ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+ struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_msg_info_req *msg;
+ struct rtrs_iu *iu;
+ int err;
+@@ -905,7 +915,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
+ wc->byte_len);
+ goto close;
+ }
+- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr,
++ ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev, iu->dma_addr,
+ iu->size, DMA_FROM_DEVICE);
+ msg = iu->buf;
+ if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ) {
+@@ -918,22 +928,22 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
+ goto close;
+
+ out:
+- rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1);
+ return;
+ close:
+- close_sess(sess);
++ close_path(srv_path);
+ goto out;
+ }
+
+ static int post_recv_info_req(struct rtrs_srv_con *con)
+ {
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_iu *rx_iu;
+ int err;
+
+ rx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req),
+- GFP_KERNEL, sess->s.dev->ib_dev,
++ GFP_KERNEL, srv_path->s.dev->ib_dev,
+ DMA_FROM_DEVICE, rtrs_srv_info_req_done);
+ if (!rx_iu)
+ return -ENOMEM;
+@@ -941,7 +951,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con)
+ err = rtrs_iu_post_recv(&con->c, rx_iu);
+ if (err) {
+ rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err);
+- rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
++ rtrs_iu_free(rx_iu, srv_path->s.dev->ib_dev, 1);
+ return err;
+ }
+
+@@ -961,20 +971,20 @@ static int post_recv_io(struct rtrs_srv_con *con, size_t q_size)
+ return 0;
+ }
+
+-static int post_recv_sess(struct rtrs_srv_sess *sess)
++static int post_recv_path(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
+- struct rtrs_sess *s = &sess->s;
++ struct rtrs_srv *srv = srv_path->srv;
++ struct rtrs_path *s = &srv_path->s;
+ size_t q_size;
+ int err, cid;
+
+- for (cid = 0; cid < sess->s.con_num; cid++) {
++ for (cid = 0; cid < srv_path->s.con_num; cid++) {
+ if (cid == 0)
+ q_size = SERVICE_CON_QUEUE_DEPTH;
+ else
+ q_size = srv->queue_depth;
+
+- err = post_recv_io(to_srv_con(sess->s.con[cid]), q_size);
++ err = post_recv_io(to_srv_con(srv_path->s.con[cid]), q_size);
+ if (err) {
+ rtrs_err(s, "post_recv_io(), err: %d\n", err);
+ return err;
+@@ -988,9 +998,9 @@ static void process_read(struct rtrs_srv_con *con,
+ struct rtrs_msg_rdma_read *msg,
+ u32 buf_id, u32 off)
+ {
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
++ struct rtrs_srv *srv = srv_path->srv;
+ struct rtrs_srv_ctx *ctx = srv->ctx;
+ struct rtrs_srv_op *id;
+
+@@ -998,10 +1008,10 @@ static void process_read(struct rtrs_srv_con *con,
+ void *data;
+ int ret;
+
+- if (sess->state != RTRS_SRV_CONNECTED) {
++ if (srv_path->state != RTRS_SRV_CONNECTED) {
+ rtrs_err_rl(s,
+ "Processing read request failed, session is disconnected, sess state %s\n",
+- rtrs_srv_state_str(sess->state));
++ rtrs_srv_state_str(srv_path->state));
+ return;
+ }
+ if (msg->sg_cnt != 1 && msg->sg_cnt != 0) {
+@@ -1009,9 +1019,9 @@ static void process_read(struct rtrs_srv_con *con,
+ "Processing read request failed, invalid message\n");
+ return;
+ }
+- rtrs_srv_get_ops_ids(sess);
+- rtrs_srv_update_rdma_stats(sess->stats, off, READ);
+- id = sess->ops_ids[buf_id];
++ rtrs_srv_get_ops_ids(srv_path);
++ rtrs_srv_update_rdma_stats(srv_path->stats, off, READ);
++ id = srv_path->ops_ids[buf_id];
+ id->con = con;
+ id->dir = READ;
+ id->msg_id = buf_id;
+@@ -1037,18 +1047,18 @@ send_err_msg:
+ rtrs_err_rl(s,
+ "Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n",
+ buf_id, ret);
+- close_sess(sess);
++ close_path(srv_path);
+ }
+- rtrs_srv_put_ops_ids(sess);
++ rtrs_srv_put_ops_ids(srv_path);
+ }
+
+ static void process_write(struct rtrs_srv_con *con,
+ struct rtrs_msg_rdma_write *req,
+ u32 buf_id, u32 off)
+ {
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
++ struct rtrs_srv *srv = srv_path->srv;
+ struct rtrs_srv_ctx *ctx = srv->ctx;
+ struct rtrs_srv_op *id;
+
+@@ -1056,15 +1066,15 @@ static void process_write(struct rtrs_srv_con *con,
+ void *data;
+ int ret;
+
+- if (sess->state != RTRS_SRV_CONNECTED) {
++ if (srv_path->state != RTRS_SRV_CONNECTED) {
+ rtrs_err_rl(s,
+ "Processing write request failed, session is disconnected, sess state %s\n",
+- rtrs_srv_state_str(sess->state));
++ rtrs_srv_state_str(srv_path->state));
+ return;
+ }
+- rtrs_srv_get_ops_ids(sess);
+- rtrs_srv_update_rdma_stats(sess->stats, off, WRITE);
+- id = sess->ops_ids[buf_id];
++ rtrs_srv_get_ops_ids(srv_path);
++ rtrs_srv_update_rdma_stats(srv_path->stats, off, WRITE);
++ id = srv_path->ops_ids[buf_id];
+ id->con = con;
+ id->dir = WRITE;
+ id->msg_id = buf_id;
+@@ -1089,20 +1099,21 @@ send_err_msg:
+ rtrs_err_rl(s,
+ "Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n",
+ buf_id, ret);
+- close_sess(sess);
++ close_path(srv_path);
+ }
+- rtrs_srv_put_ops_ids(sess);
++ rtrs_srv_put_ops_ids(srv_path);
+ }
+
+ static void process_io_req(struct rtrs_srv_con *con, void *msg,
+ u32 id, u32 off)
+ {
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
+ struct rtrs_msg_rdma_hdr *hdr;
+ unsigned int type;
+
+- ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, sess->dma_addr[id],
++ ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev,
++ srv_path->dma_addr[id],
+ max_chunk_size, DMA_BIDIRECTIONAL);
+ hdr = msg;
+ type = le16_to_cpu(hdr->type);
+@@ -1124,7 +1135,7 @@ static void process_io_req(struct rtrs_srv_con *con, void *msg,
+ return;
+
+ err:
+- close_sess(sess);
++ close_path(srv_path);
+ }
+
+ static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
+@@ -1132,16 +1143,16 @@ static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
+ struct rtrs_srv_mr *mr =
+ container_of(wc->wr_cqe, typeof(*mr), inv_cqe);
+ struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
++ struct rtrs_srv *srv = srv_path->srv;
+ u32 msg_id, off;
+ void *data;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ rtrs_err(s, "Failed IB_WR_LOCAL_INV: %s\n",
+ ib_wc_status_msg(wc->status));
+- close_sess(sess);
++ close_path(srv_path);
+ }
+ msg_id = mr->msg_id;
+ off = mr->msg_off;
+@@ -1189,9 +1200,9 @@ static void rtrs_rdma_process_wr_wait_list(struct rtrs_srv_con *con)
+ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+ struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context);
+- struct rtrs_sess *s = con->c.sess;
+- struct rtrs_srv_sess *sess = to_srv_sess(s);
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_path *s = con->c.path;
++ struct rtrs_srv_path *srv_path = to_srv_path(s);
++ struct rtrs_srv *srv = srv_path->srv;
+ u32 imm_type, imm_payload;
+ int err;
+
+@@ -1201,7 +1212,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ "%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n",
+ ib_wc_status_msg(wc->status), wc->wr_cqe,
+ wc->opcode, wc->vendor_err, wc->byte_len);
+- close_sess(sess);
++ close_path(srv_path);
+ }
+ return;
+ }
+@@ -1217,7 +1228,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
+ if (err) {
+ rtrs_err(s, "rtrs_post_recv(), err: %d\n", err);
+- close_sess(sess);
++ close_path(srv_path);
+ break;
+ }
+ rtrs_from_imm(be32_to_cpu(wc->ex.imm_data),
+@@ -1226,16 +1237,16 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ u32 msg_id, off;
+ void *data;
+
+- msg_id = imm_payload >> sess->mem_bits;
+- off = imm_payload & ((1 << sess->mem_bits) - 1);
++ msg_id = imm_payload >> srv_path->mem_bits;
++ off = imm_payload & ((1 << srv_path->mem_bits) - 1);
+ if (msg_id >= srv->queue_depth || off >= max_chunk_size) {
+ rtrs_err(s, "Wrong msg_id %u, off %u\n",
+ msg_id, off);
+- close_sess(sess);
++ close_path(srv_path);
+ return;
+ }
+ if (always_invalidate) {
+- struct rtrs_srv_mr *mr = &sess->mrs[msg_id];
++ struct rtrs_srv_mr *mr = &srv_path->mrs[msg_id];
+
+ mr->msg_off = off;
+ mr->msg_id = msg_id;
+@@ -1243,7 +1254,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ if (err) {
+ rtrs_err(s, "rtrs_post_recv(), err: %d\n",
+ err);
+- close_sess(sess);
++ close_path(srv_path);
+ break;
+ }
+ } else {
+@@ -1252,10 +1263,10 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ }
+ } else if (imm_type == RTRS_HB_MSG_IMM) {
+ WARN_ON(con->c.cid);
+- rtrs_send_hb_ack(&sess->s);
++ rtrs_send_hb_ack(&srv_path->s);
+ } else if (imm_type == RTRS_HB_ACK_IMM) {
+ WARN_ON(con->c.cid);
+- sess->s.hb_missed_cnt = 0;
++ srv_path->s.hb_missed_cnt = 0;
+ } else {
+ rtrs_wrn(s, "Unknown IMM type %u\n", imm_type);
+ }
+@@ -1279,22 +1290,23 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
+ }
+
+ /**
+- * rtrs_srv_get_sess_name() - Get rtrs_srv peer hostname.
++ * rtrs_srv_get_path_name() - Get rtrs_srv peer hostname.
+ * @srv: Session
+- * @sessname: Sessname buffer
++ * @pathname: Pathname buffer
+ * @len: Length of sessname buffer
+ */
+-int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len)
++int rtrs_srv_get_path_name(struct rtrs_srv *srv, char *pathname,
++ size_t len)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ int err = -ENOTCONN;
+
+ mutex_lock(&srv->paths_mutex);
+- list_for_each_entry(sess, &srv->paths_list, s.entry) {
+- if (sess->state != RTRS_SRV_CONNECTED)
++ list_for_each_entry(srv_path, &srv->paths_list, s.entry) {
++ if (srv_path->state != RTRS_SRV_CONNECTED)
+ continue;
+- strscpy(sessname, sess->s.sessname,
+- min_t(size_t, sizeof(sess->s.sessname), len));
++ strscpy(pathname, srv_path->s.sessname,
++ min_t(size_t, sizeof(srv_path->s.sessname), len));
+ err = 0;
+ break;
+ }
+@@ -1302,7 +1314,7 @@ int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len)
+
+ return err;
+ }
+-EXPORT_SYMBOL(rtrs_srv_get_sess_name);
++EXPORT_SYMBOL(rtrs_srv_get_path_name);
+
+ /**
+ * rtrs_srv_get_queue_depth() - Get rtrs_srv qdepth.
+@@ -1314,22 +1326,22 @@ int rtrs_srv_get_queue_depth(struct rtrs_srv *srv)
+ }
+ EXPORT_SYMBOL(rtrs_srv_get_queue_depth);
+
+-static int find_next_bit_ring(struct rtrs_srv_sess *sess)
++static int find_next_bit_ring(struct rtrs_srv_path *srv_path)
+ {
+- struct ib_device *ib_dev = sess->s.dev->ib_dev;
++ struct ib_device *ib_dev = srv_path->s.dev->ib_dev;
+ int v;
+
+- v = cpumask_next(sess->cur_cq_vector, &cq_affinity_mask);
++ v = cpumask_next(srv_path->cur_cq_vector, &cq_affinity_mask);
+ if (v >= nr_cpu_ids || v >= ib_dev->num_comp_vectors)
+ v = cpumask_first(&cq_affinity_mask);
+ return v;
+ }
+
+-static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_sess *sess)
++static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_path *srv_path)
+ {
+- sess->cur_cq_vector = find_next_bit_ring(sess);
++ srv_path->cur_cq_vector = find_next_bit_ring(srv_path);
+
+- return sess->cur_cq_vector;
++ return srv_path->cur_cq_vector;
+ }
+
+ static void rtrs_srv_dev_release(struct device *dev)
+@@ -1434,22 +1446,22 @@ static void put_srv(struct rtrs_srv *srv)
+ }
+
+ static void __add_path_to_srv(struct rtrs_srv *srv,
+- struct rtrs_srv_sess *sess)
++ struct rtrs_srv_path *srv_path)
+ {
+- list_add_tail(&sess->s.entry, &srv->paths_list);
++ list_add_tail(&srv_path->s.entry, &srv->paths_list);
+ srv->paths_num++;
+ WARN_ON(srv->paths_num >= MAX_PATHS_NUM);
+ }
+
+-static void del_path_from_srv(struct rtrs_srv_sess *sess)
++static void del_path_from_srv(struct rtrs_srv_path *srv_path)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+
+ if (WARN_ON(!srv))
+ return;
+
+ mutex_lock(&srv->paths_mutex);
+- list_del(&sess->s.entry);
++ list_del(&srv_path->s.entry);
+ WARN_ON(!srv->paths_num);
+ srv->paths_num--;
+ mutex_unlock(&srv->paths_mutex);
+@@ -1482,44 +1494,44 @@ static int sockaddr_cmp(const struct sockaddr *a, const struct sockaddr *b)
+ static bool __is_path_w_addr_exists(struct rtrs_srv *srv,
+ struct rdma_addr *addr)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+
+- list_for_each_entry(sess, &srv->paths_list, s.entry)
+- if (!sockaddr_cmp((struct sockaddr *)&sess->s.dst_addr,
++ list_for_each_entry(srv_path, &srv->paths_list, s.entry)
++ if (!sockaddr_cmp((struct sockaddr *)&srv_path->s.dst_addr,
+ (struct sockaddr *)&addr->dst_addr) &&
+- !sockaddr_cmp((struct sockaddr *)&sess->s.src_addr,
++ !sockaddr_cmp((struct sockaddr *)&srv_path->s.src_addr,
+ (struct sockaddr *)&addr->src_addr))
+ return true;
+
+ return false;
+ }
+
+-static void free_sess(struct rtrs_srv_sess *sess)
++static void free_path(struct rtrs_srv_path *srv_path)
+ {
+- if (sess->kobj.state_in_sysfs) {
+- kobject_del(&sess->kobj);
+- kobject_put(&sess->kobj);
++ if (srv_path->kobj.state_in_sysfs) {
++ kobject_del(&srv_path->kobj);
++ kobject_put(&srv_path->kobj);
+ } else {
+- kfree(sess->stats);
+- kfree(sess);
++ kfree(srv_path->stats);
++ kfree(srv_path);
+ }
+ }
+
+ static void rtrs_srv_close_work(struct work_struct *work)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ struct rtrs_srv_con *con;
+ int i;
+
+- sess = container_of(work, typeof(*sess), close_work);
++ srv_path = container_of(work, typeof(*srv_path), close_work);
+
+- rtrs_srv_destroy_sess_files(sess);
+- rtrs_srv_stop_hb(sess);
++ rtrs_srv_destroy_path_files(srv_path);
++ rtrs_srv_stop_hb(srv_path);
+
+- for (i = 0; i < sess->s.con_num; i++) {
+- if (!sess->s.con[i])
++ for (i = 0; i < srv_path->s.con_num; i++) {
++ if (!srv_path->s.con[i])
+ continue;
+- con = to_srv_con(sess->s.con[i]);
++ con = to_srv_con(srv_path->s.con[i]);
+ rdma_disconnect(con->c.cm_id);
+ ib_drain_qp(con->c.qp);
+ }
+@@ -1528,41 +1540,41 @@ static void rtrs_srv_close_work(struct work_struct *work)
+ * Degrade ref count to the usual model with a single shared
+ * atomic_t counter
+ */
+- percpu_ref_kill(&sess->ids_inflight_ref);
++ percpu_ref_kill(&srv_path->ids_inflight_ref);
+
+ /* Wait for all completion */
+- wait_for_completion(&sess->complete_done);
++ wait_for_completion(&srv_path->complete_done);
+
+ /* Notify upper layer if we are the last path */
+- rtrs_srv_sess_down(sess);
++ rtrs_srv_path_down(srv_path);
+
+- unmap_cont_bufs(sess);
+- rtrs_srv_free_ops_ids(sess);
++ unmap_cont_bufs(srv_path);
++ rtrs_srv_free_ops_ids(srv_path);
+
+- for (i = 0; i < sess->s.con_num; i++) {
+- if (!sess->s.con[i])
++ for (i = 0; i < srv_path->s.con_num; i++) {
++ if (!srv_path->s.con[i])
+ continue;
+- con = to_srv_con(sess->s.con[i]);
++ con = to_srv_con(srv_path->s.con[i]);
+ rtrs_cq_qp_destroy(&con->c);
+ rdma_destroy_id(con->c.cm_id);
+ kfree(con);
+ }
+- rtrs_ib_dev_put(sess->s.dev);
++ rtrs_ib_dev_put(srv_path->s.dev);
+
+- del_path_from_srv(sess);
+- put_srv(sess->srv);
+- sess->srv = NULL;
+- rtrs_srv_change_state(sess, RTRS_SRV_CLOSED);
++ del_path_from_srv(srv_path);
++ put_srv(srv_path->srv);
++ srv_path->srv = NULL;
++ rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSED);
+
+- kfree(sess->dma_addr);
+- kfree(sess->s.con);
+- free_sess(sess);
++ kfree(srv_path->dma_addr);
++ kfree(srv_path->s.con);
++ free_path(srv_path);
+ }
+
+-static int rtrs_rdma_do_accept(struct rtrs_srv_sess *sess,
++static int rtrs_rdma_do_accept(struct rtrs_srv_path *srv_path,
+ struct rdma_cm_id *cm_id)
+ {
+- struct rtrs_srv *srv = sess->srv;
++ struct rtrs_srv *srv = srv_path->srv;
+ struct rtrs_msg_conn_rsp msg;
+ struct rdma_conn_param param;
+ int err;
+@@ -1610,25 +1622,25 @@ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno)
+ return errno;
+ }
+
+-static struct rtrs_srv_sess *
+-__find_sess(struct rtrs_srv *srv, const uuid_t *sess_uuid)
++static struct rtrs_srv_path *
++__find_path(struct rtrs_srv *srv, const uuid_t *sess_uuid)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+
+- list_for_each_entry(sess, &srv->paths_list, s.entry) {
+- if (uuid_equal(&sess->s.uuid, sess_uuid))
+- return sess;
++ list_for_each_entry(srv_path, &srv->paths_list, s.entry) {
++ if (uuid_equal(&srv_path->s.uuid, sess_uuid))
++ return srv_path;
+ }
+
+ return NULL;
+ }
+
+-static int create_con(struct rtrs_srv_sess *sess,
++static int create_con(struct rtrs_srv_path *srv_path,
+ struct rdma_cm_id *cm_id,
+ unsigned int cid)
+ {
+- struct rtrs_srv *srv = sess->srv;
+- struct rtrs_sess *s = &sess->s;
++ struct rtrs_srv *srv = srv_path->srv;
++ struct rtrs_path *s = &srv_path->s;
+ struct rtrs_srv_con *con;
+
+ u32 cq_num, max_send_wr, max_recv_wr, wr_limit;
+@@ -1643,10 +1655,10 @@ static int create_con(struct rtrs_srv_sess *sess,
+ spin_lock_init(&con->rsp_wr_wait_lock);
+ INIT_LIST_HEAD(&con->rsp_wr_wait_list);
+ con->c.cm_id = cm_id;
+- con->c.sess = &sess->s;
++ con->c.path = &srv_path->s;
+ con->c.cid = cid;
+ atomic_set(&con->c.wr_cnt, 1);
+- wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr;
++ wr_limit = srv_path->s.dev->ib_dev->attrs.max_qp_wr;
+
+ if (con->c.cid == 0) {
+ /*
+@@ -1679,10 +1691,10 @@ static int create_con(struct rtrs_srv_sess *sess,
+ }
+ cq_num = max_send_wr + max_recv_wr;
+ atomic_set(&con->c.sq_wr_avail, max_send_wr);
+- cq_vector = rtrs_srv_get_next_cq_vector(sess);
++ cq_vector = rtrs_srv_get_next_cq_vector(srv_path);
+
+ /* TODO: SOFTIRQ can be faster, but be careful with softirq context */
+- err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_num,
++ err = rtrs_cq_qp_create(&srv_path->s, &con->c, 1, cq_vector, cq_num,
+ max_send_wr, max_recv_wr,
+ IB_POLL_WORKQUEUE);
+ if (err) {
+@@ -1694,8 +1706,8 @@ static int create_con(struct rtrs_srv_sess *sess,
+ if (err)
+ goto free_cqqp;
+ }
+- WARN_ON(sess->s.con[cid]);
+- sess->s.con[cid] = &con->c;
++ WARN_ON(srv_path->s.con[cid]);
++ srv_path->s.con[cid] = &con->c;
+
+ /*
+ * Change context from server to current connection. The other
+@@ -1714,13 +1726,13 @@ err:
+ return err;
+ }
+
+-static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv,
++static struct rtrs_srv_path *__alloc_path(struct rtrs_srv *srv,
+ struct rdma_cm_id *cm_id,
+ unsigned int con_num,
+ unsigned int recon_cnt,
+ const uuid_t *uuid)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ int err = -ENOMEM;
+ char str[NAME_MAX];
+ struct rtrs_addr path;
+@@ -1734,73 +1746,76 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv,
+ pr_err("Path with same addr exists\n");
+ goto err;
+ }
+- sess = kzalloc(sizeof(*sess), GFP_KERNEL);
+- if (!sess)
++ srv_path = kzalloc(sizeof(*srv_path), GFP_KERNEL);
++ if (!srv_path)
+ goto err;
+
+- sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
+- if (!sess->stats)
++ srv_path->stats = kzalloc(sizeof(*srv_path->stats), GFP_KERNEL);
++ if (!srv_path->stats)
+ goto err_free_sess;
+
+- sess->stats->sess = sess;
++ srv_path->stats->srv_path = srv_path;
+
+- sess->dma_addr = kcalloc(srv->queue_depth, sizeof(*sess->dma_addr),
+- GFP_KERNEL);
+- if (!sess->dma_addr)
++ srv_path->dma_addr = kcalloc(srv->queue_depth,
++ sizeof(*srv_path->dma_addr),
++ GFP_KERNEL);
++ if (!srv_path->dma_addr)
+ goto err_free_stats;
+
+- sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
+- if (!sess->s.con)
++ srv_path->s.con = kcalloc(con_num, sizeof(*srv_path->s.con),
++ GFP_KERNEL);
++ if (!srv_path->s.con)
+ goto err_free_dma_addr;
+
+- sess->state = RTRS_SRV_CONNECTING;
+- sess->srv = srv;
+- sess->cur_cq_vector = -1;
+- sess->s.dst_addr = cm_id->route.addr.dst_addr;
+- sess->s.src_addr = cm_id->route.addr.src_addr;
++ srv_path->state = RTRS_SRV_CONNECTING;
++ srv_path->srv = srv;
++ srv_path->cur_cq_vector = -1;
++ srv_path->s.dst_addr = cm_id->route.addr.dst_addr;
++ srv_path->s.src_addr = cm_id->route.addr.src_addr;
+
+ /* temporary until receiving session-name from client */
+- path.src = &sess->s.src_addr;
+- path.dst = &sess->s.dst_addr;
++ path.src = &srv_path->s.src_addr;
++ path.dst = &srv_path->s.dst_addr;
+ rtrs_addr_to_str(&path, str, sizeof(str));
+- strscpy(sess->s.sessname, str, sizeof(sess->s.sessname));
+-
+- sess->s.con_num = con_num;
+- sess->s.recon_cnt = recon_cnt;
+- uuid_copy(&sess->s.uuid, uuid);
+- spin_lock_init(&sess->state_lock);
+- INIT_WORK(&sess->close_work, rtrs_srv_close_work);
+- rtrs_srv_init_hb(sess);
+-
+- sess->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd);
+- if (!sess->s.dev) {
++ strscpy(srv_path->s.sessname, str, sizeof(srv_path->s.sessname));
++
++ srv_path->s.con_num = con_num;
++ srv_path->s.irq_con_num = con_num;
++ srv_path->s.recon_cnt = recon_cnt;
++ uuid_copy(&srv_path->s.uuid, uuid);
++ spin_lock_init(&srv_path->state_lock);
++ INIT_WORK(&srv_path->close_work, rtrs_srv_close_work);
++ rtrs_srv_init_hb(srv_path);
++
++ srv_path->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd);
++ if (!srv_path->s.dev) {
+ err = -ENOMEM;
+ goto err_free_con;
+ }
+- err = map_cont_bufs(sess);
++ err = map_cont_bufs(srv_path);
+ if (err)
+ goto err_put_dev;
+
+- err = rtrs_srv_alloc_ops_ids(sess);
++ err = rtrs_srv_alloc_ops_ids(srv_path);
+ if (err)
+ goto err_unmap_bufs;
+
+- __add_path_to_srv(srv, sess);
++ __add_path_to_srv(srv, srv_path);
+
+- return sess;
++ return srv_path;
+
+ err_unmap_bufs:
+- unmap_cont_bufs(sess);
++ unmap_cont_bufs(srv_path);
+ err_put_dev:
+- rtrs_ib_dev_put(sess->s.dev);
++ rtrs_ib_dev_put(srv_path->s.dev);
+ err_free_con:
+- kfree(sess->s.con);
++ kfree(srv_path->s.con);
+ err_free_dma_addr:
+- kfree(sess->dma_addr);
++ kfree(srv_path->dma_addr);
+ err_free_stats:
+- kfree(sess->stats);
++ kfree(srv_path->stats);
+ err_free_sess:
+- kfree(sess);
++ kfree(srv_path);
+ err:
+ return ERR_PTR(err);
+ }
+@@ -1810,7 +1825,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
+ size_t len)
+ {
+ struct rtrs_srv_ctx *ctx = cm_id->context;
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ struct rtrs_srv *srv;
+
+ u16 version, con_num, cid;
+@@ -1851,16 +1866,16 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
+ goto reject_w_err;
+ }
+ mutex_lock(&srv->paths_mutex);
+- sess = __find_sess(srv, &msg->sess_uuid);
+- if (sess) {
+- struct rtrs_sess *s = &sess->s;
++ srv_path = __find_path(srv, &msg->sess_uuid);
++ if (srv_path) {
++ struct rtrs_path *s = &srv_path->s;
+
+ /* Session already holds a reference */
+ put_srv(srv);
+
+- if (sess->state != RTRS_SRV_CONNECTING) {
++ if (srv_path->state != RTRS_SRV_CONNECTING) {
+ rtrs_err(s, "Session in wrong state: %s\n",
+- rtrs_srv_state_str(sess->state));
++ rtrs_srv_state_str(srv_path->state));
+ mutex_unlock(&srv->paths_mutex);
+ goto reject_w_err;
+ }
+@@ -1880,19 +1895,19 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
+ goto reject_w_err;
+ }
+ } else {
+- sess = __alloc_sess(srv, cm_id, con_num, recon_cnt,
++ srv_path = __alloc_path(srv, cm_id, con_num, recon_cnt,
+ &msg->sess_uuid);
+- if (IS_ERR(sess)) {
++ if (IS_ERR(srv_path)) {
+ mutex_unlock(&srv->paths_mutex);
+ put_srv(srv);
+- err = PTR_ERR(sess);
++ err = PTR_ERR(srv_path);
+ pr_err("RTRS server session allocation failed: %d\n", err);
+ goto reject_w_err;
+ }
+ }
+- err = create_con(sess, cm_id, cid);
++ err = create_con(srv_path, cm_id, cid);
+ if (err) {
+- rtrs_err((&sess->s), "create_con(), error %d\n", err);
++ rtrs_err((&srv_path->s), "create_con(), error %d\n", err);
+ rtrs_rdma_do_reject(cm_id, err);
+ /*
+ * Since session has other connections we follow normal way
+@@ -1901,9 +1916,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
+ */
+ goto close_and_return_err;
+ }
+- err = rtrs_rdma_do_accept(sess, cm_id);
++ err = rtrs_rdma_do_accept(srv_path, cm_id);
+ if (err) {
+- rtrs_err((&sess->s), "rtrs_rdma_do_accept(), error %d\n", err);
++ rtrs_err((&srv_path->s), "rtrs_rdma_do_accept(), error %d\n", err);
+ rtrs_rdma_do_reject(cm_id, err);
+ /*
+ * Since current connection was successfully added to the
+@@ -1923,7 +1938,7 @@ reject_w_err:
+
+ close_and_return_err:
+ mutex_unlock(&srv->paths_mutex);
+- close_sess(sess);
++ close_path(srv_path);
+
+ return err;
+ }
+@@ -1931,14 +1946,14 @@ close_and_return_err:
+ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *ev)
+ {
+- struct rtrs_srv_sess *sess = NULL;
+- struct rtrs_sess *s = NULL;
++ struct rtrs_srv_path *srv_path = NULL;
++ struct rtrs_path *s = NULL;
+
+ if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
+ struct rtrs_con *c = cm_id->context;
+
+- s = c->sess;
+- sess = to_srv_sess(s);
++ s = c->path;
++ srv_path = to_srv_path(s);
+ }
+
+ switch (ev->event) {
+@@ -1962,7 +1977,7 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+- close_sess(sess);
++ close_path(srv_path);
+ break;
+ default:
+ pr_err("Ignoring unexpected CM event %s, err %d\n",
+@@ -2170,13 +2185,13 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port)
+ }
+ EXPORT_SYMBOL(rtrs_srv_open);
+
+-static void close_sessions(struct rtrs_srv *srv)
++static void close_paths(struct rtrs_srv *srv)
+ {
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+
+ mutex_lock(&srv->paths_mutex);
+- list_for_each_entry(sess, &srv->paths_list, s.entry)
+- close_sess(sess);
++ list_for_each_entry(srv_path, &srv->paths_list, s.entry)
++ close_path(srv_path);
+ mutex_unlock(&srv->paths_mutex);
+ }
+
+@@ -2186,7 +2201,7 @@ static void close_ctx(struct rtrs_srv_ctx *ctx)
+
+ mutex_lock(&ctx->srv_mutex);
+ list_for_each_entry(srv, &ctx->srv_list, ctx_list)
+- close_sessions(srv);
++ close_paths(srv);
+ mutex_unlock(&ctx->srv_mutex);
+ flush_workqueue(rtrs_wq);
+ }
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+index 9d8d2a91a235b..ee3578b9aa01f 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
++++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+@@ -37,7 +37,7 @@ struct rtrs_srv_stats_rdma_stats {
+ struct rtrs_srv_stats {
+ struct kobject kobj_stats;
+ struct rtrs_srv_stats_rdma_stats rdma_stats;
+- struct rtrs_srv_sess *sess;
++ struct rtrs_srv_path *srv_path;
+ };
+
+ struct rtrs_srv_con {
+@@ -71,8 +71,8 @@ struct rtrs_srv_mr {
+ struct rtrs_iu *iu; /* send buffer for new rkey msg */
+ };
+
+-struct rtrs_srv_sess {
+- struct rtrs_sess s;
++struct rtrs_srv_path {
++ struct rtrs_path s;
+ struct rtrs_srv *srv;
+ struct work_struct close_work;
+ enum rtrs_srv_state state;
+@@ -125,7 +125,7 @@ struct rtrs_srv_ib_ctx {
+
+ extern struct class *rtrs_dev_class;
+
+-void close_sess(struct rtrs_srv_sess *sess);
++void close_path(struct rtrs_srv_path *srv_path);
+
+ static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s,
+ size_t size, int d)
+@@ -143,7 +143,7 @@ ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats,
+ char *page, size_t len);
+
+ /* functions which are implemented in rtrs-srv-sysfs.c */
+-int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess);
+-void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess);
++int rtrs_srv_create_path_files(struct rtrs_srv_path *srv_path);
++void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path);
+
+ #endif /* RTRS_SRV_H */
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
+index ca542e477d384..4745f33d7104a 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs.c
+@@ -37,8 +37,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask,
+ goto err;
+
+ iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir);
+- if (ib_dma_mapping_error(dma_dev, iu->dma_addr))
++ if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) {
++ kfree(iu->buf);
+ goto err;
++ }
+
+ iu->cqe.done = done;
+ iu->size = size;
+@@ -69,16 +71,16 @@ EXPORT_SYMBOL_GPL(rtrs_iu_free);
+
+ int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu)
+ {
+- struct rtrs_sess *sess = con->sess;
++ struct rtrs_path *path = con->path;
+ struct ib_recv_wr wr;
+ struct ib_sge list;
+
+ list.addr = iu->dma_addr;
+ list.length = iu->size;
+- list.lkey = sess->dev->ib_pd->local_dma_lkey;
++ list.lkey = path->dev->ib_pd->local_dma_lkey;
+
+ if (list.length == 0) {
+- rtrs_wrn(con->sess,
++ rtrs_wrn(con->path,
+ "Posting receive work request failed, sg list is empty\n");
+ return -EINVAL;
+ }
+@@ -126,7 +128,7 @@ static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head,
+ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
+ struct ib_send_wr *head)
+ {
+- struct rtrs_sess *sess = con->sess;
++ struct rtrs_path *path = con->path;
+ struct ib_send_wr wr;
+ struct ib_sge list;
+
+@@ -135,7 +137,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
+
+ list.addr = iu->dma_addr;
+ list.length = size;
+- list.lkey = sess->dev->ib_pd->local_dma_lkey;
++ list.lkey = path->dev->ib_pd->local_dma_lkey;
+
+ wr = (struct ib_send_wr) {
+ .wr_cqe = &iu->cqe,
+@@ -188,11 +190,11 @@ static int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con,
+ struct ib_send_wr *head)
+ {
+ struct ib_rdma_wr wr;
+- struct rtrs_sess *sess = con->sess;
++ struct rtrs_path *path = con->path;
+ enum ib_send_flags sflags;
+
+ atomic_dec_if_positive(&con->sq_wr_avail);
+- sflags = (atomic_inc_return(&con->wr_cnt) % sess->signal_interval) ?
++ sflags = (atomic_inc_return(&con->wr_cnt) % path->signal_interval) ?
+ 0 : IB_SEND_SIGNALED;
+
+ wr = (struct ib_rdma_wr) {
+@@ -211,26 +213,36 @@ static void qp_event_handler(struct ib_event *ev, void *ctx)
+
+ switch (ev->event) {
+ case IB_EVENT_COMM_EST:
+- rtrs_info(con->sess, "QP event %s (%d) received\n",
++ rtrs_info(con->path, "QP event %s (%d) received\n",
+ ib_event_msg(ev->event), ev->event);
+ rdma_notify(con->cm_id, IB_EVENT_COMM_EST);
+ break;
+ default:
+- rtrs_info(con->sess, "Unhandled QP event %s (%d) received\n",
++ rtrs_info(con->path, "Unhandled QP event %s (%d) received\n",
+ ib_event_msg(ev->event), ev->event);
+ break;
+ }
+ }
+
++static bool is_pollqueue(struct rtrs_con *con)
++{
++ return con->cid >= con->path->irq_con_num;
++}
++
+ static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe,
+ enum ib_poll_context poll_ctx)
+ {
+ struct rdma_cm_id *cm_id = con->cm_id;
+ struct ib_cq *cq;
+
+- cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx);
++ if (is_pollqueue(con))
++ cq = ib_alloc_cq(cm_id->device, con, nr_cqe, cq_vector,
++ poll_ctx);
++ else
++ cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx);
++
+ if (IS_ERR(cq)) {
+- rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n",
++ rtrs_err(con->path, "Creating completion queue failed, errno: %ld\n",
+ PTR_ERR(cq));
+ return PTR_ERR(cq);
+ }
+@@ -261,7 +273,7 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
+
+ ret = rdma_create_qp(cm_id, pd, &init_attr);
+ if (ret) {
+- rtrs_err(con->sess, "Creating QP failed, err: %d\n", ret);
++ rtrs_err(con->path, "Creating QP failed, err: %d\n", ret);
+ return ret;
+ }
+ con->qp = cm_id->qp;
+@@ -269,7 +281,18 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
+ return ret;
+ }
+
+-int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
++static void destroy_cq(struct rtrs_con *con)
++{
++ if (con->cq) {
++ if (is_pollqueue(con))
++ ib_free_cq(con->cq);
++ else
++ ib_cq_pool_put(con->cq, con->nr_cqe);
++ }
++ con->cq = NULL;
++}
++
++int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
+ u32 max_send_sge, int cq_vector, int nr_cqe,
+ u32 max_send_wr, u32 max_recv_wr,
+ enum ib_poll_context poll_ctx)
+@@ -280,14 +303,13 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
+ if (err)
+ return err;
+
+- err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr,
++ err = create_qp(con, path->dev->ib_pd, max_send_wr, max_recv_wr,
+ max_send_sge);
+ if (err) {
+- ib_cq_pool_put(con->cq, con->nr_cqe);
+- con->cq = NULL;
++ destroy_cq(con);
+ return err;
+ }
+- con->sess = sess;
++ con->path = path;
+
+ return 0;
+ }
+@@ -299,31 +321,28 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con)
+ rdma_destroy_qp(con->cm_id);
+ con->qp = NULL;
+ }
+- if (con->cq) {
+- ib_cq_pool_put(con->cq, con->nr_cqe);
+- con->cq = NULL;
+- }
++ destroy_cq(con);
+ }
+ EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy);
+
+-static void schedule_hb(struct rtrs_sess *sess)
++static void schedule_hb(struct rtrs_path *path)
+ {
+- queue_delayed_work(sess->hb_wq, &sess->hb_dwork,
+- msecs_to_jiffies(sess->hb_interval_ms));
++ queue_delayed_work(path->hb_wq, &path->hb_dwork,
++ msecs_to_jiffies(path->hb_interval_ms));
+ }
+
+-void rtrs_send_hb_ack(struct rtrs_sess *sess)
++void rtrs_send_hb_ack(struct rtrs_path *path)
+ {
+- struct rtrs_con *usr_con = sess->con[0];
++ struct rtrs_con *usr_con = path->con[0];
+ u32 imm;
+ int err;
+
+ imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0);
+- err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
++ err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
+ NULL);
+ if (err) {
+- rtrs_err(sess, "send HB ACK failed, errno: %d\n", err);
+- sess->hb_err_handler(usr_con);
++ rtrs_err(path, "send HB ACK failed, errno: %d\n", err);
++ path->hb_err_handler(usr_con);
+ return;
+ }
+ }
+@@ -332,63 +351,63 @@ EXPORT_SYMBOL_GPL(rtrs_send_hb_ack);
+ static void hb_work(struct work_struct *work)
+ {
+ struct rtrs_con *usr_con;
+- struct rtrs_sess *sess;
++ struct rtrs_path *path;
+ u32 imm;
+ int err;
+
+- sess = container_of(to_delayed_work(work), typeof(*sess), hb_dwork);
+- usr_con = sess->con[0];
++ path = container_of(to_delayed_work(work), typeof(*path), hb_dwork);
++ usr_con = path->con[0];
+
+- if (sess->hb_missed_cnt > sess->hb_missed_max) {
+- rtrs_err(sess, "HB missed max reached.\n");
+- sess->hb_err_handler(usr_con);
++ if (path->hb_missed_cnt > path->hb_missed_max) {
++ rtrs_err(path, "HB missed max reached.\n");
++ path->hb_err_handler(usr_con);
+ return;
+ }
+- if (sess->hb_missed_cnt++) {
++ if (path->hb_missed_cnt++) {
+ /* Reschedule work without sending hb */
+- schedule_hb(sess);
++ schedule_hb(path);
+ return;
+ }
+
+- sess->hb_last_sent = ktime_get();
++ path->hb_last_sent = ktime_get();
+
+ imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
+- err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
++ err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
+ NULL);
+ if (err) {
+- rtrs_err(sess, "HB send failed, errno: %d\n", err);
+- sess->hb_err_handler(usr_con);
++ rtrs_err(path, "HB send failed, errno: %d\n", err);
++ path->hb_err_handler(usr_con);
+ return;
+ }
+
+- schedule_hb(sess);
++ schedule_hb(path);
+ }
+
+-void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
++void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
+ unsigned int interval_ms, unsigned int missed_max,
+ void (*err_handler)(struct rtrs_con *con),
+ struct workqueue_struct *wq)
+ {
+- sess->hb_cqe = cqe;
+- sess->hb_interval_ms = interval_ms;
+- sess->hb_err_handler = err_handler;
+- sess->hb_wq = wq;
+- sess->hb_missed_max = missed_max;
+- sess->hb_missed_cnt = 0;
+- INIT_DELAYED_WORK(&sess->hb_dwork, hb_work);
++ path->hb_cqe = cqe;
++ path->hb_interval_ms = interval_ms;
++ path->hb_err_handler = err_handler;
++ path->hb_wq = wq;
++ path->hb_missed_max = missed_max;
++ path->hb_missed_cnt = 0;
++ INIT_DELAYED_WORK(&path->hb_dwork, hb_work);
+ }
+ EXPORT_SYMBOL_GPL(rtrs_init_hb);
+
+-void rtrs_start_hb(struct rtrs_sess *sess)
++void rtrs_start_hb(struct rtrs_path *path)
+ {
+- schedule_hb(sess);
++ schedule_hb(path);
+ }
+ EXPORT_SYMBOL_GPL(rtrs_start_hb);
+
+-void rtrs_stop_hb(struct rtrs_sess *sess)
++void rtrs_stop_hb(struct rtrs_path *path)
+ {
+- cancel_delayed_work_sync(&sess->hb_dwork);
+- sess->hb_missed_cnt = 0;
++ cancel_delayed_work_sync(&path->hb_dwork);
++ path->hb_missed_cnt = 0;
+ }
+ EXPORT_SYMBOL_GPL(rtrs_stop_hb);
+
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
+index 859c79685daf3..c529b6d63c9a0 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs.h
++++ b/drivers/infiniband/ulp/rtrs/rtrs.h
+@@ -53,13 +53,13 @@ struct rtrs_clt_ops {
+ };
+
+ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
+- const char *sessname,
++ const char *pathname,
+ const struct rtrs_addr *paths,
+ size_t path_cnt, u16 port,
+ size_t pdu_sz, u8 reconnect_delay_sec,
+ s16 max_reconnect_attempts, u32 nr_poll_queues);
+
+-void rtrs_clt_close(struct rtrs_clt *sess);
++void rtrs_clt_close(struct rtrs_clt *clt_path);
+
+ enum wait_type {
+ RTRS_PERMIT_NOWAIT = 0,
+@@ -175,7 +175,8 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int errno);
+
+ void rtrs_srv_set_sess_priv(struct rtrs_srv *sess, void *priv);
+
+-int rtrs_srv_get_sess_name(struct rtrs_srv *sess, char *sessname, size_t len);
++int rtrs_srv_get_path_name(struct rtrs_srv *sess, char *pathname,
++ size_t len);
+
+ int rtrs_srv_get_queue_depth(struct rtrs_srv *sess);
+
+diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
+index 71eda91e810cf..7701204fe5423 100644
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -1955,7 +1955,8 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
+ if (scmnd) {
+ req = scsi_cmd_priv(scmnd);
+ scmnd = srp_claim_req(ch, req, NULL, scmnd);
+- } else {
++ }
++ if (!scmnd) {
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
+ rsp->tag, ch - target->ch, ch->qp->qp_num);
+@@ -1977,12 +1978,8 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
+
+ if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
+ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+- else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
+- scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
+ else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
+ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
+- else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
+- scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
+
+ srp_free_req(ch, req, scmnd,
+ be32_to_cpu(rsp->req_lim_delta));
+@@ -2782,7 +2779,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
+ static int srp_abort(struct scsi_cmnd *scmnd)
+ {
+ struct srp_target_port *target = host_to_target(scmnd->device->host);
+- struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
++ struct srp_request *req = scsi_cmd_priv(scmnd);
+ u32 tag;
+ u16 ch_idx;
+ struct srp_rdma_ch *ch;
+@@ -2790,8 +2787,6 @@ static int srp_abort(struct scsi_cmnd *scmnd)
+
+ shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
+
+- if (!req)
+- return SUCCESS;
+ tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd));
+ ch_idx = blk_mq_unique_tag_to_hwq(tag);
+ if (WARN_ON_ONCE(ch_idx >= target->ch_count))
+@@ -3398,7 +3393,8 @@ static int srp_parse_options(struct net *net, const char *buf,
+ break;
+
+ case SRP_OPT_PKEY:
+- if (match_hex(args, &token)) {
++ ret = match_hex(args, &token);
++ if (ret) {
+ pr_warn("bad P_Key parameter '%s'\n", p);
+ goto out;
+ }
+@@ -3458,7 +3454,8 @@ static int srp_parse_options(struct net *net, const char *buf,
+ break;
+
+ case SRP_OPT_MAX_SECT:
+- if (match_int(args, &token)) {
++ ret = match_int(args, &token);
++ if (ret) {
+ pr_warn("bad max sect parameter '%s'\n", p);
+ goto out;
+ }
+@@ -3466,8 +3463,15 @@ static int srp_parse_options(struct net *net, const char *buf,
+ break;
+
+ case SRP_OPT_QUEUE_SIZE:
+- if (match_int(args, &token) || token < 1) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 1) {
+ pr_warn("bad queue_size parameter '%s'\n", p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->scsi_host->can_queue = token;
+@@ -3478,25 +3482,40 @@ static int srp_parse_options(struct net *net, const char *buf,
+ break;
+
+ case SRP_OPT_MAX_CMD_PER_LUN:
+- if (match_int(args, &token) || token < 1) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 1) {
+ pr_warn("bad max cmd_per_lun parameter '%s'\n",
+ p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->scsi_host->cmd_per_lun = token;
+ break;
+
+ case SRP_OPT_TARGET_CAN_QUEUE:
+- if (match_int(args, &token) || token < 1) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 1) {
+ pr_warn("bad max target_can_queue parameter '%s'\n",
+ p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->target_can_queue = token;
+ break;
+
+ case SRP_OPT_IO_CLASS:
+- if (match_hex(args, &token)) {
++ ret = match_hex(args, &token);
++ if (ret) {
+ pr_warn("bad IO class parameter '%s'\n", p);
+ goto out;
+ }
+@@ -3505,6 +3524,7 @@ static int srp_parse_options(struct net *net, const char *buf,
+ pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
+ token, SRP_REV10_IB_IO_CLASS,
+ SRP_REV16A_IB_IO_CLASS);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->io_class = token;
+@@ -3527,16 +3547,24 @@ static int srp_parse_options(struct net *net, const char *buf,
+ break;
+
+ case SRP_OPT_CMD_SG_ENTRIES:
+- if (match_int(args, &token) || token < 1 || token > 255) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 1 || token > 255) {
+ pr_warn("bad max cmd_sg_entries parameter '%s'\n",
+ p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->cmd_sg_cnt = token;
+ break;
+
+ case SRP_OPT_ALLOW_EXT_SG:
+- if (match_int(args, &token)) {
++ ret = match_int(args, &token);
++ if (ret) {
+ pr_warn("bad allow_ext_sg parameter '%s'\n", p);
+ goto out;
+ }
+@@ -3544,43 +3572,77 @@ static int srp_parse_options(struct net *net, const char *buf,
+ break;
+
+ case SRP_OPT_SG_TABLESIZE:
+- if (match_int(args, &token) || token < 1 ||
+- token > SG_MAX_SEGMENTS) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 1 || token > SG_MAX_SEGMENTS) {
+ pr_warn("bad max sg_tablesize parameter '%s'\n",
+ p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->sg_tablesize = token;
+ break;
+
+ case SRP_OPT_COMP_VECTOR:
+- if (match_int(args, &token) || token < 0) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 0) {
+ pr_warn("bad comp_vector parameter '%s'\n", p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->comp_vector = token;
+ break;
+
+ case SRP_OPT_TL_RETRY_COUNT:
+- if (match_int(args, &token) || token < 2 || token > 7) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 2 || token > 7) {
+ pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
+ p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->tl_retry_count = token;
+ break;
+
+ case SRP_OPT_MAX_IT_IU_SIZE:
+- if (match_int(args, &token) || token < 0) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 0) {
+ pr_warn("bad maximum initiator to target IU size '%s'\n", p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->max_it_iu_size = token;
+ break;
+
+ case SRP_OPT_CH_COUNT:
+- if (match_int(args, &token) || token < 1) {
++ ret = match_int(args, &token);
++ if (ret) {
++ pr_warn("match_int() failed for channel count parameter '%s', Error %d\n",
++ p, ret);
++ goto out;
++ }
++ if (token < 1) {
+ pr_warn("bad channel count %s\n", p);
++ ret = -EINVAL;
+ goto out;
+ }
+ target->ch_count = token;
+@@ -3589,6 +3651,7 @@ static int srp_parse_options(struct net *net, const char *buf,
+ default:
+ pr_warn("unknown parameter or missing value '%s' in target creation request\n",
+ p);
++ ret = -EINVAL;
+ goto out;
+ }
+ }
+@@ -4038,9 +4101,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data)
+ spin_unlock(&host->target_lock);
+
+ /*
+- * Wait for tl_err and target port removal tasks.
++ * srp_queue_remove_work() queues a call to
++ * srp_remove_target(). The latter function cancels
++ * target->tl_err_work so waiting for the remove works to
++ * finish is sufficient.
+ */
+- flush_workqueue(system_long_wq);
+ flush_workqueue(srp_remove_wq);
+
+ kfree(host);
+diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
+index abccddeea1e32..152242e8f733d 100644
+--- a/drivers/infiniband/ulp/srp/ib_srp.h
++++ b/drivers/infiniband/ulp/srp/ib_srp.h
+@@ -62,9 +62,6 @@ enum {
+ SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE -
+ SRP_TSK_MGMT_SQ_SIZE,
+
+- SRP_TAG_NO_REQ = ~0U,
+- SRP_TAG_TSK_MGMT = 1U << 31,
+-
+ SRP_MAX_PAGES_PER_MR = 512,
+
+ SRP_MAX_ADD_CDB_LEN = 16,
+@@ -79,6 +76,11 @@ enum {
+ sizeof(struct srp_imm_buf),
+ };
+
++enum {
++ SRP_TAG_NO_REQ = ~0U,
++ SRP_TAG_TSK_MGMT = BIT(31),
++};
++
+ enum srp_target_state {
+ SRP_TARGET_SCANNING,
+ SRP_TARGET_LIVE,
+diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
+index 3cadf12954172..38494943bd748 100644
+--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
++++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
+@@ -549,6 +549,7 @@ static int srpt_format_guid(char *buf, unsigned int size, const __be64 *guid)
+ */
+ static int srpt_refresh_port(struct srpt_port *sport)
+ {
++ struct ib_mad_agent *mad_agent;
+ struct ib_mad_reg_req reg_req;
+ struct ib_port_modify port_modify;
+ struct ib_port_attr port_attr;
+@@ -565,12 +566,9 @@ static int srpt_refresh_port(struct srpt_port *sport)
+ if (ret)
+ return ret;
+
+- sport->port_guid_id.wwn.priv = sport;
+- srpt_format_guid(sport->port_guid_id.name,
+- sizeof(sport->port_guid_id.name),
++ srpt_format_guid(sport->guid_name, ARRAY_SIZE(sport->guid_name),
+ &sport->gid.global.interface_id);
+- sport->port_gid_id.wwn.priv = sport;
+- snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name),
++ snprintf(sport->gid_name, ARRAY_SIZE(sport->gid_name),
+ "0x%016llx%016llx",
+ be64_to_cpu(sport->gid.global.subnet_prefix),
+ be64_to_cpu(sport->gid.global.interface_id));
+@@ -596,24 +594,26 @@ static int srpt_refresh_port(struct srpt_port *sport)
+ set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+ set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+- sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
+- sport->port,
+- IB_QPT_GSI,
+- &reg_req, 0,
+- srpt_mad_send_handler,
+- srpt_mad_recv_handler,
+- sport, 0);
+- if (IS_ERR(sport->mad_agent)) {
++ mad_agent = ib_register_mad_agent(sport->sdev->device,
++ sport->port,
++ IB_QPT_GSI,
++ &reg_req, 0,
++ srpt_mad_send_handler,
++ srpt_mad_recv_handler,
++ sport, 0);
++ if (IS_ERR(mad_agent)) {
+ pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n",
+ dev_name(&sport->sdev->device->dev), sport->port,
+- PTR_ERR(sport->mad_agent));
++ PTR_ERR(mad_agent));
+ sport->mad_agent = NULL;
+ memset(&port_modify, 0, sizeof(port_modify));
+ port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
+ ib_modify_port(sport->sdev->device, sport->port, 0,
+ &port_modify);
+-
++ return 0;
+ }
++
++ sport->mad_agent = mad_agent;
+ }
+
+ return 0;
+@@ -2314,31 +2314,35 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
+ tag_num = ch->rq_size;
+ tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */
+
+- mutex_lock(&sport->port_guid_id.mutex);
+- list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) {
+- if (!IS_ERR_OR_NULL(ch->sess))
+- break;
+- ch->sess = target_setup_session(&stpg->tpg, tag_num,
++ if (sport->guid_id) {
++ mutex_lock(&sport->guid_id->mutex);
++ list_for_each_entry(stpg, &sport->guid_id->tpg_list, entry) {
++ if (!IS_ERR_OR_NULL(ch->sess))
++ break;
++ ch->sess = target_setup_session(&stpg->tpg, tag_num,
+ tag_size, TARGET_PROT_NORMAL,
+ ch->sess_name, ch, NULL);
++ }
++ mutex_unlock(&sport->guid_id->mutex);
+ }
+- mutex_unlock(&sport->port_guid_id.mutex);
+
+- mutex_lock(&sport->port_gid_id.mutex);
+- list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) {
+- if (!IS_ERR_OR_NULL(ch->sess))
+- break;
+- ch->sess = target_setup_session(&stpg->tpg, tag_num,
++ if (sport->gid_id) {
++ mutex_lock(&sport->gid_id->mutex);
++ list_for_each_entry(stpg, &sport->gid_id->tpg_list, entry) {
++ if (!IS_ERR_OR_NULL(ch->sess))
++ break;
++ ch->sess = target_setup_session(&stpg->tpg, tag_num,
+ tag_size, TARGET_PROT_NORMAL, i_port_id,
+ ch, NULL);
+- if (!IS_ERR_OR_NULL(ch->sess))
+- break;
+- /* Retry without leading "0x" */
+- ch->sess = target_setup_session(&stpg->tpg, tag_num,
++ if (!IS_ERR_OR_NULL(ch->sess))
++ break;
++ /* Retry without leading "0x" */
++ ch->sess = target_setup_session(&stpg->tpg, tag_num,
+ tag_size, TARGET_PROT_NORMAL,
+ i_port_id + 2, ch, NULL);
++ }
++ mutex_unlock(&sport->gid_id->mutex);
+ }
+- mutex_unlock(&sport->port_gid_id.mutex);
+
+ if (IS_ERR_OR_NULL(ch->sess)) {
+ WARN_ON_ONCE(ch->sess == NULL);
+@@ -2983,7 +2987,12 @@ static int srpt_release_sport(struct srpt_port *sport)
+ return 0;
+ }
+
+-static struct se_wwn *__srpt_lookup_wwn(const char *name)
++struct port_and_port_id {
++ struct srpt_port *sport;
++ struct srpt_port_id **port_id;
++};
++
++static struct port_and_port_id __srpt_lookup_port(const char *name)
+ {
+ struct ib_device *dev;
+ struct srpt_device *sdev;
+@@ -2998,25 +3007,38 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name)
+ for (i = 0; i < dev->phys_port_cnt; i++) {
+ sport = &sdev->port[i];
+
+- if (strcmp(sport->port_guid_id.name, name) == 0)
+- return &sport->port_guid_id.wwn;
+- if (strcmp(sport->port_gid_id.name, name) == 0)
+- return &sport->port_gid_id.wwn;
++ if (strcmp(sport->guid_name, name) == 0) {
++ kref_get(&sdev->refcnt);
++ return (struct port_and_port_id){
++ sport, &sport->guid_id};
++ }
++ if (strcmp(sport->gid_name, name) == 0) {
++ kref_get(&sdev->refcnt);
++ return (struct port_and_port_id){
++ sport, &sport->gid_id};
++ }
+ }
+ }
+
+- return NULL;
++ return (struct port_and_port_id){};
+ }
+
+-static struct se_wwn *srpt_lookup_wwn(const char *name)
++/**
++ * srpt_lookup_port() - Look up an RDMA port by name
++ * @name: ASCII port name
++ *
++ * Increments the RDMA port reference count if an RDMA port pointer is returned.
++ * The caller must drop that reference count by calling srpt_port_put_ref().
++ */
++static struct port_and_port_id srpt_lookup_port(const char *name)
+ {
+- struct se_wwn *wwn;
++ struct port_and_port_id papi;
+
+ spin_lock(&srpt_dev_lock);
+- wwn = __srpt_lookup_wwn(name);
++ papi = __srpt_lookup_port(name);
+ spin_unlock(&srpt_dev_lock);
+
+- return wwn;
++ return papi;
+ }
+
+ static void srpt_free_srq(struct srpt_device *sdev)
+@@ -3101,6 +3123,18 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
+ return ret;
+ }
+
++static void srpt_free_sdev(struct kref *refcnt)
++{
++ struct srpt_device *sdev = container_of(refcnt, typeof(*sdev), refcnt);
++
++ kfree(sdev);
++}
++
++static void srpt_sdev_put(struct srpt_device *sdev)
++{
++ kref_put(&sdev->refcnt, srpt_free_sdev);
++}
++
+ /**
+ * srpt_add_one - InfiniBand device addition callback function
+ * @device: Describes a HCA.
+@@ -3119,6 +3153,7 @@ static int srpt_add_one(struct ib_device *device)
+ if (!sdev)
+ return -ENOMEM;
+
++ kref_init(&sdev->refcnt);
+ sdev->device = device;
+ mutex_init(&sdev->sdev_mutex);
+
+@@ -3182,10 +3217,6 @@ static int srpt_add_one(struct ib_device *device)
+ sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
+ sport->port_attrib.use_srq = false;
+ INIT_WORK(&sport->work, srpt_refresh_port_work);
+- mutex_init(&sport->port_guid_id.mutex);
+- INIT_LIST_HEAD(&sport->port_guid_id.tpg_list);
+- mutex_init(&sport->port_gid_id.mutex);
+- INIT_LIST_HEAD(&sport->port_gid_id.tpg_list);
+
+ ret = srpt_refresh_port(sport);
+ if (ret) {
+@@ -3214,7 +3245,7 @@ err_ring:
+ srpt_free_srq(sdev);
+ ib_dealloc_pd(sdev->pd);
+ free_dev:
+- kfree(sdev);
++ srpt_sdev_put(sdev);
+ pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev));
+ return ret;
+ }
+@@ -3258,7 +3289,7 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
+
+ ib_dealloc_pd(sdev->pd);
+
+- kfree(sdev);
++ srpt_sdev_put(sdev);
+ }
+
+ static struct ib_client srpt_client = {
+@@ -3286,10 +3317,10 @@ static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn)
+ {
+ struct srpt_port *sport = wwn->priv;
+
+- if (wwn == &sport->port_guid_id.wwn)
+- return &sport->port_guid_id;
+- if (wwn == &sport->port_gid_id.wwn)
+- return &sport->port_gid_id;
++ if (sport->guid_id && &sport->guid_id->wwn == wwn)
++ return sport->guid_id;
++ if (sport->gid_id && &sport->gid_id->wwn == wwn)
++ return sport->gid_id;
+ WARN_ON_ONCE(true);
+ return NULL;
+ }
+@@ -3804,7 +3835,31 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
+ struct config_group *group,
+ const char *name)
+ {
+- return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL);
++ struct port_and_port_id papi = srpt_lookup_port(name);
++ struct srpt_port *sport = papi.sport;
++ struct srpt_port_id *port_id;
++
++ if (!papi.port_id)
++ return ERR_PTR(-EINVAL);
++ if (*papi.port_id) {
++ /* Attempt to create a directory that already exists. */
++ WARN_ON_ONCE(true);
++ return &(*papi.port_id)->wwn;
++ }
++ port_id = kzalloc(sizeof(*port_id), GFP_KERNEL);
++ if (!port_id) {
++ srpt_sdev_put(sport->sdev);
++ return ERR_PTR(-ENOMEM);
++ }
++ mutex_init(&port_id->mutex);
++ INIT_LIST_HEAD(&port_id->tpg_list);
++ port_id->wwn.priv = sport;
++ memcpy(port_id->name, port_id == sport->guid_id ? sport->guid_name :
++ sport->gid_name, ARRAY_SIZE(port_id->name));
++
++ *papi.port_id = port_id;
++
++ return &port_id->wwn;
+ }
+
+ /**
+@@ -3813,6 +3868,18 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
+ */
+ static void srpt_drop_tport(struct se_wwn *wwn)
+ {
++ struct srpt_port_id *port_id = container_of(wwn, typeof(*port_id), wwn);
++ struct srpt_port *sport = wwn->priv;
++
++ if (sport->guid_id == port_id)
++ sport->guid_id = NULL;
++ else if (sport->gid_id == port_id)
++ sport->gid_id = NULL;
++ else
++ WARN_ON_ONCE(true);
++
++ srpt_sdev_put(sport->sdev);
++ kfree(port_id);
+ }
+
+ static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
+diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
+index 76e66f630c17a..4c46b301eea18 100644
+--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
++++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
+@@ -376,7 +376,7 @@ struct srpt_tpg {
+ };
+
+ /**
+- * struct srpt_port_id - information about an RDMA port name
++ * struct srpt_port_id - LIO RDMA port information
+ * @mutex: Protects @tpg_list changes.
+ * @tpg_list: TPGs associated with the RDMA port name.
+ * @wwn: WWN associated with the RDMA port name.
+@@ -393,7 +393,7 @@ struct srpt_port_id {
+ };
+
+ /**
+- * struct srpt_port - information associated by SRPT with a single IB port
++ * struct srpt_port - SRPT RDMA port information
+ * @sdev: backpointer to the HCA information.
+ * @mad_agent: per-port management datagram processing information.
+ * @enabled: Whether or not this target port is enabled.
+@@ -402,8 +402,10 @@ struct srpt_port_id {
+ * @lid: cached value of the port's lid.
+ * @gid: cached value of the port's gid.
+ * @work: work structure for refreshing the aforementioned cached values.
+- * @port_guid_id: target port GUID
+- * @port_gid_id: target port GID
++ * @guid_name: port name in GUID format.
++ * @guid_id: LIO target port information for the port name in GUID format.
++ * @gid_name: port name in GID format.
++ * @gid_id: LIO target port information for the port name in GID format.
+ * @port_attrib: Port attributes that can be accessed through configfs.
+ * @refcount: Number of objects associated with this port.
+ * @freed_channels: Completion that will be signaled once @refcount becomes 0.
+@@ -419,8 +421,10 @@ struct srpt_port {
+ u32 lid;
+ union ib_gid gid;
+ struct work_struct work;
+- struct srpt_port_id port_guid_id;
+- struct srpt_port_id port_gid_id;
++ char guid_name[64];
++ struct srpt_port_id *guid_id;
++ char gid_name[64];
++ struct srpt_port_id *gid_id;
+ struct srpt_port_attrib port_attrib;
+ atomic_t refcount;
+ struct completion *freed_channels;
+@@ -430,6 +434,7 @@ struct srpt_port {
+
+ /**
+ * struct srpt_device - information associated by SRPT with a single HCA
++ * @refcnt: Reference count for this device.
+ * @device: Backpointer to the struct ib_device managed by the IB core.
+ * @pd: IB protection domain.
+ * @lkey: L_Key (local key) with write access to all local memory.
+@@ -445,6 +450,7 @@ struct srpt_port {
+ * @port: Information about the ports owned by this HCA.
+ */
+ struct srpt_device {
++ struct kref refcnt;
+ struct ib_device *device;
+ struct ib_pd *pd;
+ u32 lkey;
+diff --git a/drivers/input/input.c b/drivers/input/input.c
+index ccaeb24263854..5ca3f11d2d759 100644
+--- a/drivers/input/input.c
++++ b/drivers/input/input.c
+@@ -47,6 +47,17 @@ static DEFINE_MUTEX(input_mutex);
+
+ static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 };
+
++static const unsigned int input_max_code[EV_CNT] = {
++ [EV_KEY] = KEY_MAX,
++ [EV_REL] = REL_MAX,
++ [EV_ABS] = ABS_MAX,
++ [EV_MSC] = MSC_MAX,
++ [EV_SW] = SW_MAX,
++ [EV_LED] = LED_MAX,
++ [EV_SND] = SND_MAX,
++ [EV_FF] = FF_MAX,
++};
++
+ static inline int is_event_supported(unsigned int code,
+ unsigned long *bm, unsigned int max)
+ {
+@@ -680,7 +691,7 @@ void input_close_device(struct input_handle *handle)
+
+ __input_release_device(handle);
+
+- if (!dev->inhibited && !--dev->users) {
++ if (!--dev->users && !dev->inhibited) {
+ if (dev->poller)
+ input_dev_poller_stop(dev->poller);
+ if (dev->close)
+@@ -2074,6 +2085,14 @@ EXPORT_SYMBOL(input_get_timestamp);
+ */
+ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code)
+ {
++ if (type < EV_CNT && input_max_code[type] &&
++ code > input_max_code[type]) {
++ pr_err("%s: invalid code %u for type %u\n", __func__, code,
++ type);
++ dump_stack();
++ return;
++ }
++
+ switch (type) {
+ case EV_KEY:
+ __set_bit(code, dev->keybit);
+diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig
+index 3b23078bc7b5b..db4135bbd279a 100644
+--- a/drivers/input/joystick/Kconfig
++++ b/drivers/input/joystick/Kconfig
+@@ -46,6 +46,7 @@ config JOYSTICK_A3D
+ config JOYSTICK_ADC
+ tristate "Simple joystick connected over ADC"
+ depends on IIO
++ select IIO_BUFFER
+ select IIO_BUFFER_CB
+ help
+ Say Y here if you have a simple joystick connected over ADC.
+diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
+index b2a68bc9f0b4d..84b87526b7ba3 100644
+--- a/drivers/input/joystick/iforce/iforce-main.c
++++ b/drivers/input/joystick/iforce/iforce-main.c
+@@ -50,6 +50,7 @@ static struct iforce_device iforce_device[] = {
+ { 0x046d, 0xc291, "Logitech WingMan Formula Force", btn_wheel, abs_wheel, ff_iforce },
+ { 0x05ef, 0x020a, "AVB Top Shot Pegasus", btn_joystick_avb, abs_avb_pegasus, ff_iforce },
+ { 0x05ef, 0x8884, "AVB Mag Turbo Force", btn_wheel, abs_wheel, ff_iforce },
++ { 0x05ef, 0x8886, "Boeder Force Feedback Wheel", btn_wheel, abs_wheel, ff_iforce },
+ { 0x05ef, 0x8888, "AVB Top Shot Force Feedback Racing Wheel", btn_wheel, abs_wheel, ff_iforce }, //?
+ { 0x061c, 0xc0a4, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce }, //?
+ { 0x061c, 0xc084, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce },
+@@ -272,22 +273,22 @@ int iforce_init_device(struct device *parent, u16 bustype,
+ * Get device info.
+ */
+
+- if (!iforce_get_id_packet(iforce, 'M', buf, &len) || len < 3)
++ if (!iforce_get_id_packet(iforce, 'M', buf, &len) && len >= 3)
+ input_dev->id.vendor = get_unaligned_le16(buf + 1);
+ else
+ dev_warn(&iforce->dev->dev, "Device does not respond to id packet M\n");
+
+- if (!iforce_get_id_packet(iforce, 'P', buf, &len) || len < 3)
++ if (!iforce_get_id_packet(iforce, 'P', buf, &len) && len >= 3)
+ input_dev->id.product = get_unaligned_le16(buf + 1);
+ else
+ dev_warn(&iforce->dev->dev, "Device does not respond to id packet P\n");
+
+- if (!iforce_get_id_packet(iforce, 'B', buf, &len) || len < 3)
++ if (!iforce_get_id_packet(iforce, 'B', buf, &len) && len >= 3)
+ iforce->device_memory.end = get_unaligned_le16(buf + 1);
+ else
+ dev_warn(&iforce->dev->dev, "Device does not respond to id packet B\n");
+
+- if (!iforce_get_id_packet(iforce, 'N', buf, &len) || len < 2)
++ if (!iforce_get_id_packet(iforce, 'N', buf, &len) && len >= 2)
+ ff_effects = buf[1];
+ else
+ dev_warn(&iforce->dev->dev, "Device does not respond to id packet N\n");
+diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c
+index f95a81b9fac72..2380546d79782 100644
+--- a/drivers/input/joystick/iforce/iforce-serio.c
++++ b/drivers/input/joystick/iforce/iforce-serio.c
+@@ -39,7 +39,7 @@ static void iforce_serio_xmit(struct iforce *iforce)
+
+ again:
+ if (iforce->xmit.head == iforce->xmit.tail) {
+- clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags);
++ iforce_clear_xmit_and_wake(iforce);
+ spin_unlock_irqrestore(&iforce->xmit_lock, flags);
+ return;
+ }
+@@ -64,7 +64,7 @@ again:
+ if (test_and_clear_bit(IFORCE_XMIT_AGAIN, iforce->xmit_flags))
+ goto again;
+
+- clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags);
++ iforce_clear_xmit_and_wake(iforce);
+
+ spin_unlock_irqrestore(&iforce->xmit_lock, flags);
+ }
+@@ -169,7 +169,7 @@ static irqreturn_t iforce_serio_irq(struct serio *serio,
+ iforce_serio->cmd_response_len = iforce_serio->len;
+
+ /* Signal that command is done */
+- wake_up(&iforce->wait);
++ wake_up_all(&iforce->wait);
+ } else if (likely(iforce->type)) {
+ iforce_process_packet(iforce, iforce_serio->id,
+ iforce_serio->data_in,
+diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
+index 6c554c11a7ac3..cba92bd590a8d 100644
+--- a/drivers/input/joystick/iforce/iforce-usb.c
++++ b/drivers/input/joystick/iforce/iforce-usb.c
+@@ -30,7 +30,7 @@ static void __iforce_usb_xmit(struct iforce *iforce)
+ spin_lock_irqsave(&iforce->xmit_lock, flags);
+
+ if (iforce->xmit.head == iforce->xmit.tail) {
+- clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags);
++ iforce_clear_xmit_and_wake(iforce);
+ spin_unlock_irqrestore(&iforce->xmit_lock, flags);
+ return;
+ }
+@@ -58,9 +58,9 @@ static void __iforce_usb_xmit(struct iforce *iforce)
+ XMIT_INC(iforce->xmit.tail, n);
+
+ if ( (n=usb_submit_urb(iforce_usb->out, GFP_ATOMIC)) ) {
+- clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags);
+ dev_warn(&iforce_usb->intf->dev,
+ "usb_submit_urb failed %d\n", n);
++ iforce_clear_xmit_and_wake(iforce);
+ }
+
+ /* The IFORCE_XMIT_RUNNING bit is not cleared here. That's intended.
+@@ -92,7 +92,7 @@ static int iforce_usb_get_id(struct iforce *iforce, u8 id,
+ id,
+ USB_TYPE_VENDOR | USB_DIR_IN |
+ USB_RECIP_INTERFACE,
+- 0, 0, buf, IFORCE_MAX_LENGTH, HZ);
++ 0, 0, buf, IFORCE_MAX_LENGTH, 1000);
+ if (status < 0) {
+ dev_err(&iforce_usb->intf->dev,
+ "usb_submit_urb failed: %d\n", status);
+@@ -175,15 +175,15 @@ static void iforce_usb_out(struct urb *urb)
+ struct iforce *iforce = &iforce_usb->iforce;
+
+ if (urb->status) {
+- clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags);
+ dev_dbg(&iforce_usb->intf->dev, "urb->status %d, exiting\n",
+ urb->status);
++ iforce_clear_xmit_and_wake(iforce);
+ return;
+ }
+
+ __iforce_usb_xmit(iforce);
+
+- wake_up(&iforce->wait);
++ wake_up_all(&iforce->wait);
+ }
+
+ static int iforce_usb_probe(struct usb_interface *intf,
+diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
+index 6aa761ebbdf77..9ccb9107ccbef 100644
+--- a/drivers/input/joystick/iforce/iforce.h
++++ b/drivers/input/joystick/iforce/iforce.h
+@@ -119,6 +119,12 @@ static inline int iforce_get_id_packet(struct iforce *iforce, u8 id,
+ response_data, response_len);
+ }
+
++static inline void iforce_clear_xmit_and_wake(struct iforce *iforce)
++{
++ clear_bit(IFORCE_XMIT_RUNNING, iforce->xmit_flags);
++ wake_up_all(&iforce->wait);
++}
++
+ /* Public functions */
+ /* iforce-main.c */
+ int iforce_init_device(struct device *parent, u16 bustype,
+diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
+index 429411c6c0a8e..a85a4f33aea8c 100644
+--- a/drivers/input/joystick/spaceball.c
++++ b/drivers/input/joystick/spaceball.c
+@@ -19,6 +19,7 @@
+ #include <linux/module.h>
+ #include <linux/input.h>
+ #include <linux/serio.h>
++#include <asm/unaligned.h>
+
+ #define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver"
+
+@@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball)
+
+ case 'D': /* Ball data */
+ if (spaceball->idx != 15) return;
+- for (i = 0; i < 6; i++)
++ /*
++ * Skip first three bytes; read six axes worth of data.
++ * Axis values are signed 16-bit big-endian.
++ */
++ data += 3;
++ for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) {
+ input_report_abs(dev, spaceball_axes[i],
+- (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2]));
++ (__s16)get_unaligned_be16(&data[i * 2]));
++ }
+ break;
+
+ case 'K': /* Button data */
+diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
+index 4c914f75a9027..57947874f26f3 100644
+--- a/drivers/input/joystick/xpad.c
++++ b/drivers/input/joystick/xpad.c
+@@ -113,6 +113,8 @@ static const struct xpad_device {
+ u8 xtype;
+ } xpad_device[] = {
+ { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 },
++ { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 },
++ { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 },
+ { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX },
+ { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX },
+ { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
+@@ -244,6 +246,7 @@ static const struct xpad_device {
+ { 0x0f0d, 0x0063, "Hori Real Arcade Pro Hayabusa (USA) Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
+ { 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE },
+ { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
++ { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
+ { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX },
+ { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX },
+ { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
+@@ -260,7 +263,7 @@ static const struct xpad_device {
+ { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+ { 0x1430, 0xf801, "RedOctane Controller", 0, XTYPE_XBOX360 },
+ { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
+- { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 },
++ { 0x146b, 0x0604, "Bigben Interactive DAIJA Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+ { 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
+ { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE },
+ { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 },
+@@ -325,6 +328,7 @@ static const struct xpad_device {
+ { 0x24c6, 0x5502, "Hori Fighting Stick VX Alt", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+ { 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+ { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 },
++ { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+ { 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 },
+ { 0x24c6, 0x550e, "Hori Real Arcade Pro V Kai 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+ { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE },
+@@ -334,6 +338,14 @@ static const struct xpad_device {
+ { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
+ { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
+ { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
++ { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 },
++ { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE },
++ { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 },
++ { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 },
++ { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 },
++ { 0x31e3, 0x1220, "Wooting Two HE", 0, XTYPE_XBOX360 },
++ { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 },
++ { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 },
+ { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 },
+ { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX },
+ { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
+@@ -419,6 +431,7 @@ static const signed short xpad_abs_triggers[] = {
+ static const struct usb_device_id xpad_table[] = {
+ { USB_INTERFACE_INFO('X', 'B', 0) }, /* X-Box USB-IF not approved class */
+ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 Controller */
++ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */
+ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster X-Box 360 controllers */
+ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft X-Box 360 controllers */
+ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft X-Box One controllers */
+@@ -429,6 +442,7 @@ static const struct usb_device_id xpad_table[] = {
+ { USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */
+ XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */
+ XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz GamePad */
++ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */
+ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f X-Box 360 controllers */
+ XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f X-Box One controllers */
+ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */
+@@ -450,8 +464,12 @@ static const struct usb_device_id xpad_table[] = {
+ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA Controllers */
+ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA Controllers */
+ XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA Controllers */
++ XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */
++ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */
++ XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller for Xbox */
+ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke X-Box One pad */
+ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */
++ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */
+ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */
+ { }
+ };
+@@ -474,6 +492,9 @@ struct xboxone_init_packet {
+ }
+
+
++#define GIP_WIRED_INTF_DATA 0
++#define GIP_WIRED_INTF_AUDIO 1
++
+ /*
+ * This packet is required for all Xbox One pads with 2015
+ * or later firmware installed (or present from the factory).
+@@ -1802,7 +1823,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
+ }
+
+ if (xpad->xtype == XTYPE_XBOXONE &&
+- intf->cur_altsetting->desc.bInterfaceNumber != 0) {
++ intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) {
+ /*
+ * The Xbox One controller lists three interfaces all with the
+ * same interface class, subclass and protocol. Differentiate by
+@@ -1972,7 +1993,6 @@ static struct usb_driver xpad_driver = {
+ .disconnect = xpad_disconnect,
+ .suspend = xpad_suspend,
+ .resume = xpad_resume,
+- .reset_resume = xpad_resume,
+ .id_table = xpad_table,
+ };
+
+diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
+index e75650e98c9ef..e402915cc0c00 100644
+--- a/drivers/input/keyboard/Kconfig
++++ b/drivers/input/keyboard/Kconfig
+@@ -556,7 +556,7 @@ config KEYBOARD_PMIC8XXX
+
+ config KEYBOARD_SAMSUNG
+ tristate "Samsung keypad support"
+- depends on HAVE_CLK
++ depends on HAS_IOMEM && HAVE_CLK
+ select INPUT_MATRIXKMAP
+ help
+ Say Y here if you want to use the keypad on your Samsung mobile
+diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
+index 8dbf1e69c90ac..22a91db645b8f 100644
+--- a/drivers/input/keyboard/gpio_keys.c
++++ b/drivers/input/keyboard/gpio_keys.c
+@@ -131,7 +131,7 @@ static void gpio_keys_quiesce_key(void *data)
+
+ if (!bdata->gpiod)
+ hrtimer_cancel(&bdata->release_timer);
+- if (bdata->debounce_use_hrtimer)
++ else if (bdata->debounce_use_hrtimer)
+ hrtimer_cancel(&bdata->debounce_timer);
+ else
+ cancel_delayed_work_sync(&bdata->work);
+diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
+index 43375b38ee592..8a7ce41b8c56e 100644
+--- a/drivers/input/keyboard/omap4-keypad.c
++++ b/drivers/input/keyboard/omap4-keypad.c
+@@ -393,7 +393,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
+ * revision register.
+ */
+ error = pm_runtime_get_sync(dev);
+- if (error) {
++ if (error < 0) {
+ dev_err(dev, "pm_runtime_get_sync() failed\n");
+ pm_runtime_put_noidle(dev);
+ return error;
+diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c
+index 65286762b02ab..ad8660be0127c 100644
+--- a/drivers/input/keyboard/snvs_pwrkey.c
++++ b/drivers/input/keyboard/snvs_pwrkey.c
+@@ -20,7 +20,7 @@
+ #include <linux/mfd/syscon.h>
+ #include <linux/regmap.h>
+
+-#define SNVS_HPVIDR1_REG 0xF8
++#define SNVS_HPVIDR1_REG 0xBF8
+ #define SNVS_LPSR_REG 0x4C /* LP Status Register */
+ #define SNVS_LPCR_REG 0x38 /* LP Control Register */
+ #define SNVS_HPSR_REG 0x14
+diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c
+index 2a97559100652..508d84f6d00cb 100644
+--- a/drivers/input/keyboard/tca6416-keypad.c
++++ b/drivers/input/keyboard/tca6416-keypad.c
+@@ -148,7 +148,7 @@ static int tca6416_keys_open(struct input_dev *dev)
+ if (chip->use_polling)
+ schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
+ else
+- enable_irq(chip->irqnum);
++ enable_irq(chip->client->irq);
+
+ return 0;
+ }
+@@ -160,7 +160,7 @@ static void tca6416_keys_close(struct input_dev *dev)
+ if (chip->use_polling)
+ cancel_delayed_work_sync(&chip->dwork);
+ else
+- disable_irq(chip->irqnum);
++ disable_irq(chip->client->irq);
+ }
+
+ static int tca6416_setup_registers(struct tca6416_keypad_chip *chip)
+@@ -266,12 +266,7 @@ static int tca6416_keypad_probe(struct i2c_client *client,
+ goto fail1;
+
+ if (!chip->use_polling) {
+- if (pdata->irq_is_gpio)
+- chip->irqnum = gpio_to_irq(client->irq);
+- else
+- chip->irqnum = client->irq;
+-
+- error = request_threaded_irq(chip->irqnum, NULL,
++ error = request_threaded_irq(client->irq, NULL,
+ tca6416_keys_isr,
+ IRQF_TRIGGER_FALLING |
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
+@@ -279,7 +274,7 @@ static int tca6416_keypad_probe(struct i2c_client *client,
+ if (error) {
+ dev_dbg(&client->dev,
+ "Unable to claim irq %d; error %d\n",
+- chip->irqnum, error);
++ client->irq, error);
+ goto fail1;
+ }
+ }
+@@ -297,10 +292,8 @@ static int tca6416_keypad_probe(struct i2c_client *client,
+ return 0;
+
+ fail2:
+- if (!chip->use_polling) {
+- free_irq(chip->irqnum, chip);
+- enable_irq(chip->irqnum);
+- }
++ if (!chip->use_polling)
++ free_irq(client->irq, chip);
+ fail1:
+ input_free_device(input);
+ kfree(chip);
+@@ -311,10 +304,8 @@ static int tca6416_keypad_remove(struct i2c_client *client)
+ {
+ struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+
+- if (!chip->use_polling) {
+- free_irq(chip->irqnum, chip);
+- enable_irq(chip->irqnum);
+- }
++ if (!chip->use_polling)
++ free_irq(client->irq, chip);
+
+ input_unregister_device(chip->input);
+ kfree(chip);
+@@ -326,10 +317,9 @@ static int tca6416_keypad_remove(struct i2c_client *client)
+ static int tca6416_keypad_suspend(struct device *dev)
+ {
+ struct i2c_client *client = to_i2c_client(dev);
+- struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+
+ if (device_may_wakeup(dev))
+- enable_irq_wake(chip->irqnum);
++ enable_irq_wake(client->irq);
+
+ return 0;
+ }
+@@ -337,10 +327,9 @@ static int tca6416_keypad_suspend(struct device *dev)
+ static int tca6416_keypad_resume(struct device *dev)
+ {
+ struct i2c_client *client = to_i2c_client(dev);
+- struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+
+ if (device_may_wakeup(dev))
+- disable_irq_wake(chip->irqnum);
++ disable_irq_wake(client->irq);
+
+ return 0;
+ }
+diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
+index dd5227cf86964..b5b8ddb536be4 100644
+--- a/drivers/input/misc/Kconfig
++++ b/drivers/input/misc/Kconfig
+@@ -330,7 +330,7 @@ config INPUT_CPCAP_PWRBUTTON
+
+ config INPUT_WISTRON_BTNS
+ tristate "x86 Wistron laptop button interface"
+- depends on X86_32
++ depends on X86_32 && !UML
+ select INPUT_SPARSEKMAP
+ select NEW_LEDS
+ select LEDS_CLASS
+diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c
+index 4cc4e8ff42b33..ad035c342cd3b 100644
+--- a/drivers/input/misc/adxl34x.c
++++ b/drivers/input/misc/adxl34x.c
+@@ -811,8 +811,7 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq,
+ AC_WRITE(ac, POWER_CTL, 0);
+
+ err = request_threaded_irq(ac->irq, NULL, adxl34x_irq,
+- IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+- dev_name(dev), ac);
++ IRQF_ONESHOT, dev_name(dev), ac);
+ if (err) {
+ dev_err(dev, "irq %d busy?\n", ac->irq);
+ goto err_free_mem;
+diff --git a/drivers/input/misc/ariel-pwrbutton.c b/drivers/input/misc/ariel-pwrbutton.c
+index 17bbaac8b80c8..cdc80715b5fd6 100644
+--- a/drivers/input/misc/ariel-pwrbutton.c
++++ b/drivers/input/misc/ariel-pwrbutton.c
+@@ -149,12 +149,19 @@ static const struct of_device_id ariel_pwrbutton_of_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, ariel_pwrbutton_of_match);
+
++static const struct spi_device_id ariel_pwrbutton_spi_ids[] = {
++ { .name = "wyse-ariel-ec-input" },
++ { }
++};
++MODULE_DEVICE_TABLE(spi, ariel_pwrbutton_spi_ids);
++
+ static struct spi_driver ariel_pwrbutton_driver = {
+ .driver = {
+ .name = "dell-wyse-ariel-ec-input",
+ .of_match_table = ariel_pwrbutton_of_match,
+ },
+ .probe = ariel_pwrbutton_probe,
++ .id_table = ariel_pwrbutton_spi_ids,
+ };
+ module_spi_driver(ariel_pwrbutton_driver);
+
+diff --git a/drivers/input/misc/cpcap-pwrbutton.c b/drivers/input/misc/cpcap-pwrbutton.c
+index 0abef63217e21..372cb44d06357 100644
+--- a/drivers/input/misc/cpcap-pwrbutton.c
++++ b/drivers/input/misc/cpcap-pwrbutton.c
+@@ -54,9 +54,13 @@ static irqreturn_t powerbutton_irq(int irq, void *_button)
+ static int cpcap_power_button_probe(struct platform_device *pdev)
+ {
+ struct cpcap_power_button *button;
+- int irq = platform_get_irq(pdev, 0);
++ int irq;
+ int err;
+
++ irq = platform_get_irq(pdev, 0);
++ if (irq < 0)
++ return irq;
++
+ button = devm_kmalloc(&pdev->dev, sizeof(*button), GFP_KERNEL);
+ if (!button)
+ return -ENOMEM;
+diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c
+index 0efe56f49aa94..1923924fdd444 100644
+--- a/drivers/input/misc/drv260x.c
++++ b/drivers/input/misc/drv260x.c
+@@ -435,6 +435,7 @@ static int drv260x_init(struct drv260x_data *haptics)
+ }
+
+ do {
++ usleep_range(15000, 15500);
+ error = regmap_read(haptics->regmap, DRV260X_GO, &cal_buf);
+ if (error) {
+ dev_err(&haptics->client->dev,
+diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c
+index d57e996732cf4..23b5dd9552dcc 100644
+--- a/drivers/input/misc/iqs626a.c
++++ b/drivers/input/misc/iqs626a.c
+@@ -456,9 +456,10 @@ struct iqs626_private {
+ unsigned int suspend_mode;
+ };
+
+-static int iqs626_parse_events(struct iqs626_private *iqs626,
+- const struct fwnode_handle *ch_node,
+- enum iqs626_ch_id ch_id)
++static noinline_for_stack int
++iqs626_parse_events(struct iqs626_private *iqs626,
++ const struct fwnode_handle *ch_node,
++ enum iqs626_ch_id ch_id)
+ {
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+@@ -604,9 +605,10 @@ static int iqs626_parse_events(struct iqs626_private *iqs626,
+ return 0;
+ }
+
+-static int iqs626_parse_ati_target(struct iqs626_private *iqs626,
+- const struct fwnode_handle *ch_node,
+- enum iqs626_ch_id ch_id)
++static noinline_for_stack int
++iqs626_parse_ati_target(struct iqs626_private *iqs626,
++ const struct fwnode_handle *ch_node,
++ enum iqs626_ch_id ch_id)
+ {
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+@@ -885,9 +887,10 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626,
+ return 0;
+ }
+
+-static int iqs626_parse_channel(struct iqs626_private *iqs626,
+- const struct fwnode_handle *ch_node,
+- enum iqs626_ch_id ch_id)
++static noinline_for_stack int
++iqs626_parse_channel(struct iqs626_private *iqs626,
++ const struct fwnode_handle *ch_node,
++ enum iqs626_ch_id ch_id)
+ {
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+diff --git a/drivers/input/misc/rk805-pwrkey.c b/drivers/input/misc/rk805-pwrkey.c
+index 3fb64dbda1a21..76873aa005b41 100644
+--- a/drivers/input/misc/rk805-pwrkey.c
++++ b/drivers/input/misc/rk805-pwrkey.c
+@@ -98,6 +98,7 @@ static struct platform_driver rk805_pwrkey_driver = {
+ };
+ module_platform_driver(rk805_pwrkey_driver);
+
++MODULE_ALIAS("platform:rk805-pwrkey");
+ MODULE_AUTHOR("Joseph Chen <chenjh@rock-chips.com>");
+ MODULE_DESCRIPTION("RK805 PMIC Power Key driver");
+ MODULE_LICENSE("GPL");
+diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
+index cb6ec59a045d4..67a134c8448d2 100644
+--- a/drivers/input/misc/soc_button_array.c
++++ b/drivers/input/misc/soc_button_array.c
+@@ -18,6 +18,10 @@
+ #include <linux/gpio.h>
+ #include <linux/platform_device.h>
+
++static bool use_low_level_irq;
++module_param(use_low_level_irq, bool, 0444);
++MODULE_PARM_DESC(use_low_level_irq, "Use low-level triggered IRQ instead of edge triggered");
++
+ struct soc_button_info {
+ const char *name;
+ int acpi_index;
+@@ -73,6 +77,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"),
+ },
+ },
++ {
++ /* Acer Switch V 10 SW5-017, same issue as Acer Switch 10 SW5-012. */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "SW5-017"),
++ },
++ },
+ {
+ /*
+ * Acer One S1003. _LID method messes with power-button GPIO
+@@ -85,14 +96,35 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = {
+ },
+ {
+ /*
+- * Lenovo Yoga Tab2 1051L, something messes with the home-button
++ * Lenovo Yoga Tab2 1051F/1051L, something messes with the home-button
+ * IRQ settings, leading to a non working home-button.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "60073"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "1051L"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "1051"),
++ },
++ },
++ {} /* Terminating entry */
++};
++
++/*
++ * Some devices have a wrong entry which points to a GPIO which is
++ * required in another driver, so this driver must not claim it.
++ */
++static const struct dmi_system_id dmi_invalid_acpi_index[] = {
++ {
++ /*
++ * Lenovo Yoga Book X90F / X90L, the PNP0C40 home button entry
++ * points to a GPIO which is not a home button and which is
++ * required by the lenovo-yogabook driver.
++ */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
+ },
++ .driver_data = (void *)1l,
+ },
+ {} /* Terminating entry */
+ };
+@@ -126,6 +158,8 @@ soc_button_device_create(struct platform_device *pdev,
+ struct platform_device *pd;
+ struct gpio_keys_button *gpio_keys;
+ struct gpio_keys_platform_data *gpio_keys_pdata;
++ const struct dmi_system_id *dmi_id;
++ int invalid_acpi_index = -1;
+ int error, gpio, irq;
+ int n_buttons = 0;
+
+@@ -143,10 +177,17 @@ soc_button_device_create(struct platform_device *pdev,
+ gpio_keys = (void *)(gpio_keys_pdata + 1);
+ n_buttons = 0;
+
++ dmi_id = dmi_first_match(dmi_invalid_acpi_index);
++ if (dmi_id)
++ invalid_acpi_index = (long)dmi_id->driver_data;
++
+ for (info = button_info; info->name; info++) {
+ if (info->autorepeat != autorepeat)
+ continue;
+
++ if (info->acpi_index == invalid_acpi_index)
++ continue;
++
+ error = soc_button_lookup_gpio(&pdev->dev, info->acpi_index, &gpio, &irq);
+ if (error || irq < 0) {
+ /*
+@@ -164,7 +205,8 @@ soc_button_device_create(struct platform_device *pdev,
+ }
+
+ /* See dmi_use_low_level_irq[] comment */
+- if (!autorepeat && dmi_check_system(dmi_use_low_level_irq)) {
++ if (!autorepeat && (use_low_level_irq ||
++ dmi_check_system(dmi_use_low_level_irq))) {
+ irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+ gpio_keys[n_buttons].irq = irq;
+ gpio_keys[n_buttons].gpio = -ENOENT;
+diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
+index fe43e5557ed72..cdcb7737c46aa 100644
+--- a/drivers/input/misc/sparcspkr.c
++++ b/drivers/input/misc/sparcspkr.c
+@@ -205,6 +205,7 @@ static int bbc_beep_probe(struct platform_device *op)
+
+ info = &state->u.bbc;
+ info->clock_freq = of_getintprop_default(dp, "clock-frequency", 0);
++ of_node_put(dp);
+ if (!info->clock_freq)
+ goto out_free;
+
+diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
+index 4a6b33bbe7eaf..dd08ce97e7c90 100644
+--- a/drivers/input/mouse/alps.c
++++ b/drivers/input/mouse/alps.c
+@@ -852,8 +852,8 @@ static void alps_process_packet_v6(struct psmouse *psmouse)
+ x = y = z = 0;
+
+ /* Divide 4 since trackpoint's speed is too fast */
+- input_report_rel(dev2, REL_X, (char)x / 4);
+- input_report_rel(dev2, REL_Y, -((char)y / 4));
++ input_report_rel(dev2, REL_X, (s8)x / 4);
++ input_report_rel(dev2, REL_Y, -((s8)y / 4));
+
+ psmouse_report_standard_buttons(dev2, packet[3]);
+
+@@ -1104,8 +1104,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse)
+ ((packet[3] & 0x20) << 1);
+ z = (packet[5] & 0x3f) | ((packet[3] & 0x80) >> 1);
+
+- input_report_rel(dev2, REL_X, (char)x);
+- input_report_rel(dev2, REL_Y, -((char)y));
++ input_report_rel(dev2, REL_X, (s8)x);
++ input_report_rel(dev2, REL_Y, -((s8)y));
+ input_report_abs(dev2, ABS_PRESSURE, z);
+
+ psmouse_report_standard_buttons(dev2, packet[1]);
+@@ -2294,20 +2294,20 @@ static int alps_get_v3_v7_resolution(struct psmouse *psmouse, int reg_pitch)
+ if (reg < 0)
+ return reg;
+
+- x_pitch = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
++ x_pitch = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
+ x_pitch = 50 + 2 * x_pitch; /* In 0.1 mm units */
+
+- y_pitch = (char)reg >> 4; /* sign extend upper 4 bits */
++ y_pitch = (s8)reg >> 4; /* sign extend upper 4 bits */
+ y_pitch = 36 + 2 * y_pitch; /* In 0.1 mm units */
+
+ reg = alps_command_mode_read_reg(psmouse, reg_pitch + 1);
+ if (reg < 0)
+ return reg;
+
+- x_electrode = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
++ x_electrode = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
+ x_electrode = 17 + x_electrode;
+
+- y_electrode = (char)reg >> 4; /* sign extend upper 4 bits */
++ y_electrode = (s8)reg >> 4; /* sign extend upper 4 bits */
+ y_electrode = 13 + y_electrode;
+
+ x_phys = x_pitch * (x_electrode - 1); /* In 0.1 mm units */
+diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
+index bfa26651c0be7..627048bc6a12e 100644
+--- a/drivers/input/mouse/appletouch.c
++++ b/drivers/input/mouse/appletouch.c
+@@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface,
+ set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
+ set_bit(BTN_LEFT, input_dev->keybit);
+
++ INIT_WORK(&dev->work, atp_reinit);
++
+ error = input_register_device(dev->input);
+ if (error)
+ goto err_free_buffer;
+@@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface,
+ /* save our data pointer in this interface device */
+ usb_set_intfdata(iface, dev);
+
+- INIT_WORK(&dev->work, atp_reinit);
+-
+ return 0;
+
+ err_free_buffer:
+diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
+index 59a14505b9cd1..ca150618d32f1 100644
+--- a/drivers/input/mouse/bcm5974.c
++++ b/drivers/input/mouse/bcm5974.c
+@@ -942,17 +942,22 @@ static int bcm5974_probe(struct usb_interface *iface,
+ if (!dev->tp_data)
+ goto err_free_bt_buffer;
+
+- if (dev->bt_urb)
++ if (dev->bt_urb) {
+ usb_fill_int_urb(dev->bt_urb, udev,
+ usb_rcvintpipe(udev, cfg->bt_ep),
+ dev->bt_data, dev->cfg.bt_datalen,
+ bcm5974_irq_button, dev, 1);
+
++ dev->bt_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
++ }
++
+ usb_fill_int_urb(dev->tp_urb, udev,
+ usb_rcvintpipe(udev, cfg->tp_ep),
+ dev->tp_data, dev->cfg.tp_datalen,
+ bcm5974_irq_trackpad, dev, 1);
+
++ dev->tp_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
++
+ /* create bcm5974 device */
+ usb_make_path(udev, dev->phys, sizeof(dev->phys));
+ strlcat(dev->phys, "/input0", sizeof(dev->phys));
+diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
+index 47af62c122672..e1758d5ffe421 100644
+--- a/drivers/input/mouse/elan_i2c_core.c
++++ b/drivers/input/mouse/elan_i2c_core.c
+@@ -186,55 +186,21 @@ static int elan_get_fwinfo(u16 ic_type, u8 iap_version, u16 *validpage_count,
+ return 0;
+ }
+
+-static int elan_enable_power(struct elan_tp_data *data)
++static int elan_set_power(struct elan_tp_data *data, bool on)
+ {
+ int repeat = ETP_RETRY_COUNT;
+ int error;
+
+- error = regulator_enable(data->vcc);
+- if (error) {
+- dev_err(&data->client->dev,
+- "failed to enable regulator: %d\n", error);
+- return error;
+- }
+-
+ do {
+- error = data->ops->power_control(data->client, true);
++ error = data->ops->power_control(data->client, on);
+ if (error >= 0)
+ return 0;
+
+ msleep(30);
+ } while (--repeat > 0);
+
+- dev_err(&data->client->dev, "failed to enable power: %d\n", error);
+- return error;
+-}
+-
+-static int elan_disable_power(struct elan_tp_data *data)
+-{
+- int repeat = ETP_RETRY_COUNT;
+- int error;
+-
+- do {
+- error = data->ops->power_control(data->client, false);
+- if (!error) {
+- error = regulator_disable(data->vcc);
+- if (error) {
+- dev_err(&data->client->dev,
+- "failed to disable regulator: %d\n",
+- error);
+- /* Attempt to power the chip back up */
+- data->ops->power_control(data->client, true);
+- break;
+- }
+-
+- return 0;
+- }
+-
+- msleep(30);
+- } while (--repeat > 0);
+-
+- dev_err(&data->client->dev, "failed to disable power: %d\n", error);
++ dev_err(&data->client->dev, "failed to set power %s: %d\n",
++ on ? "on" : "off", error);
+ return error;
+ }
+
+@@ -1399,9 +1365,19 @@ static int __maybe_unused elan_suspend(struct device *dev)
+ /* Enable wake from IRQ */
+ data->irq_wake = (enable_irq_wake(client->irq) == 0);
+ } else {
+- ret = elan_disable_power(data);
++ ret = elan_set_power(data, false);
++ if (ret)
++ goto err;
++
++ ret = regulator_disable(data->vcc);
++ if (ret) {
++ dev_err(dev, "error %d disabling regulator\n", ret);
++ /* Attempt to power the chip back up */
++ elan_set_power(data, true);
++ }
+ }
+
++err:
+ mutex_unlock(&data->sysfs_mutex);
+ return ret;
+ }
+@@ -1412,12 +1388,18 @@ static int __maybe_unused elan_resume(struct device *dev)
+ struct elan_tp_data *data = i2c_get_clientdata(client);
+ int error;
+
+- if (device_may_wakeup(dev) && data->irq_wake) {
++ if (!device_may_wakeup(dev)) {
++ error = regulator_enable(data->vcc);
++ if (error) {
++ dev_err(dev, "error %d enabling regulator\n", error);
++ goto err;
++ }
++ } else if (data->irq_wake) {
+ disable_irq_wake(client->irq);
+ data->irq_wake = false;
+ }
+
+- error = elan_enable_power(data);
++ error = elan_set_power(data, true);
+ if (error) {
+ dev_err(dev, "power up when resuming failed: %d\n", error);
+ goto err;
+diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
+index 2d0bc029619ff..2118b2075f437 100644
+--- a/drivers/input/mouse/elantech.c
++++ b/drivers/input/mouse/elantech.c
+@@ -517,6 +517,19 @@ static void elantech_report_trackpoint(struct psmouse *psmouse,
+ case 0x16008020U:
+ case 0x26800010U:
+ case 0x36808000U:
++
++ /*
++ * This firmware misreport coordinates for trackpoint
++ * occasionally. Discard packets outside of [-127, 127] range
++ * to prevent cursor jumps.
++ */
++ if (packet[4] == 0x80 || packet[5] == 0x80 ||
++ packet[1] >> 7 == packet[4] >> 7 ||
++ packet[2] >> 7 == packet[5] >> 7) {
++ elantech_debug("discarding packet [%6ph]\n", packet);
++ break;
++
++ }
+ x = packet[4] - (int)((packet[1]^0x80) << 1);
+ y = (int)((packet[2]^0x80) << 1) - packet[5];
+
+@@ -661,10 +674,11 @@ static void process_packet_head_v4(struct psmouse *psmouse)
+ struct input_dev *dev = psmouse->dev;
+ struct elantech_data *etd = psmouse->private;
+ unsigned char *packet = psmouse->packet;
+- int id = ((packet[3] & 0xe0) >> 5) - 1;
++ int id;
+ int pres, traces;
+
+- if (id < 0)
++ id = ((packet[3] & 0xe0) >> 5) - 1;
++ if (id < 0 || id >= ETP_MAX_FINGERS)
+ return;
+
+ etd->mt[id].x = ((packet[1] & 0x0f) << 8) | packet[2];
+@@ -694,7 +708,7 @@ static void process_packet_motion_v4(struct psmouse *psmouse)
+ int id, sid;
+
+ id = ((packet[0] & 0xe0) >> 5) - 1;
+- if (id < 0)
++ if (id < 0 || id >= ETP_MAX_FINGERS)
+ return;
+
+ sid = ((packet[3] & 0xe0) >> 5) - 1;
+@@ -715,7 +729,7 @@ static void process_packet_motion_v4(struct psmouse *psmouse)
+ input_report_abs(dev, ABS_MT_POSITION_X, etd->mt[id].x);
+ input_report_abs(dev, ABS_MT_POSITION_Y, etd->mt[id].y);
+
+- if (sid >= 0) {
++ if (sid >= 0 && sid < ETP_MAX_FINGERS) {
+ etd->mt[sid].x += delta_x2 * weight;
+ etd->mt[sid].y -= delta_y2 * weight;
+ input_mt_slot(dev, sid);
+@@ -1575,7 +1589,13 @@ static const struct dmi_system_id no_hw_res_dmi_table[] = {
+ */
+ static int elantech_change_report_id(struct psmouse *psmouse)
+ {
+- unsigned char param[2] = { 0x10, 0x03 };
++ /*
++ * NOTE: the code is expecting to receive param[] as an array of 3
++ * items (see __ps2_command()), even if in this case only 2 are
++ * actually needed. Make sure the array size is 3 to avoid potential
++ * stack out-of-bound accesses.
++ */
++ unsigned char param[3] = { 0x10, 0x03 };
+
+ if (elantech_write_reg_params(psmouse, 0x7, param) ||
+ elantech_read_reg_params(psmouse, 0x7, param) ||
+diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c
+index 6fd5fff0cbfff..c74b99077d16a 100644
+--- a/drivers/input/mouse/focaltech.c
++++ b/drivers/input/mouse/focaltech.c
+@@ -202,8 +202,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
+ state->pressed = packet[0] >> 7;
+ finger1 = ((packet[0] >> 4) & 0x7) - 1;
+ if (finger1 < FOC_MAX_FINGERS) {
+- state->fingers[finger1].x += (char)packet[1];
+- state->fingers[finger1].y += (char)packet[2];
++ state->fingers[finger1].x += (s8)packet[1];
++ state->fingers[finger1].y += (s8)packet[2];
+ } else {
+ psmouse_err(psmouse, "First finger in rel packet invalid: %d\n",
+ finger1);
+@@ -218,8 +218,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
+ */
+ finger2 = ((packet[3] >> 4) & 0x7) - 1;
+ if (finger2 < FOC_MAX_FINGERS) {
+- state->fingers[finger2].x += (char)packet[4];
+- state->fingers[finger2].y += (char)packet[5];
++ state->fingers[finger2].x += (s8)packet[4];
++ state->fingers[finger2].y += (s8)packet[5];
+ }
+ }
+
+diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c
+index a9065c6ab5508..da2c67cb86422 100644
+--- a/drivers/input/serio/gscps2.c
++++ b/drivers/input/serio/gscps2.c
+@@ -350,6 +350,10 @@ static int __init gscps2_probe(struct parisc_device *dev)
+ ps2port->port = serio;
+ ps2port->padev = dev;
+ ps2port->addr = ioremap(hpa, GSC_STATUS + 4);
++ if (!ps2port->addr) {
++ ret = -ENOMEM;
++ goto fail_nomem;
++ }
+ spin_lock_init(&ps2port->lock);
+
+ gscps2_reset(ps2port);
+diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
+index a5a0035536462..339e765bcf5ae 100644
+--- a/drivers/input/serio/i8042-x86ia64io.h
++++ b/drivers/input/serio/i8042-x86ia64io.h
+@@ -67,605 +67,775 @@ static inline void i8042_write_command(int val)
+
+ #include <linux/dmi.h>
+
+-static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
++#define SERIO_QUIRK_NOKBD BIT(0)
++#define SERIO_QUIRK_NOAUX BIT(1)
++#define SERIO_QUIRK_NOMUX BIT(2)
++#define SERIO_QUIRK_FORCEMUX BIT(3)
++#define SERIO_QUIRK_UNLOCK BIT(4)
++#define SERIO_QUIRK_PROBE_DEFER BIT(5)
++#define SERIO_QUIRK_RESET_ALWAYS BIT(6)
++#define SERIO_QUIRK_RESET_NEVER BIT(7)
++#define SERIO_QUIRK_DIECT BIT(8)
++#define SERIO_QUIRK_DUMBKBD BIT(9)
++#define SERIO_QUIRK_NOLOOP BIT(10)
++#define SERIO_QUIRK_NOTIMEOUT BIT(11)
++#define SERIO_QUIRK_KBDRESET BIT(12)
++#define SERIO_QUIRK_DRITEK BIT(13)
++#define SERIO_QUIRK_NOPNP BIT(14)
++
++/* Quirk table for different mainboards. Options similar or identical to i8042
++ * module parameters.
++ * ORDERING IS IMPORTANT! The first match will be apllied and the rest ignored.
++ * This allows entries to overwrite vendor wide quirks on a per device basis.
++ * Where this is irrelevant, entries are sorted case sensitive by DMI_SYS_VENDOR
++ * and/or DMI_BOARD_VENDOR to make it easier to avoid dublicate entries.
++ */
++static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
+ {
+- /*
+- * Arima-Rioworks HDAMB -
+- * AUX LOOP command does not raise AUX IRQ
+- */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_VENDOR, "RIOWORKS"),
+- DMI_MATCH(DMI_BOARD_NAME, "HDAMB"),
+- DMI_MATCH(DMI_BOARD_VERSION, "Rev E"),
++ DMI_MATCH(DMI_SYS_VENDOR, "ALIENWARE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Sentia"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* ASUS G1S */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."),
+- DMI_MATCH(DMI_BOARD_NAME, "G1S"),
+- DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "X750LN"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
+- /* ASUS P65UP5 - AUX LOOP command does not raise AUX IRQ */
++ /* Asus X450LCP */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+- DMI_MATCH(DMI_BOARD_NAME, "P/I-P65UP5"),
+- DMI_MATCH(DMI_BOARD_VERSION, "REV 2.X"),
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "X450LCP"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_NEVER)
+ },
+ {
++ /* ASUS ZenBook UX425UA/QA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X750LN"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER)
+ },
+ {
++ /* ASUS ZenBook UM325UA/QA */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+- DMI_MATCH(DMI_PRODUCT_NAME , "ProLiant"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "8500"),
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER)
+ },
++ /*
++ * On some Asus laptops, just running self tests cause problems.
++ */
+ {
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+- DMI_MATCH(DMI_PRODUCT_NAME , "ProLiant"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "DL760"),
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_NEVER)
+ },
+ {
+- /* Dell Embedded Box PC 3000 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Embedded Box PC 3000"),
++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
++ DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /* Convertible Notebook */
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_NEVER)
+ },
+ {
+- /* OQO Model 01 */
++ /* ASUS P65UP5 - AUX LOOP command does not raise AUX IRQ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "OQO"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "ZEPTO"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "00"),
++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
++ DMI_MATCH(DMI_BOARD_NAME, "P/I-P65UP5"),
++ DMI_MATCH(DMI_BOARD_VERSION, "REV 2.X"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
+- /* ULI EV4873 - AUX LOOP does not work properly */
++ /* ASUS G1S */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "ULI"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "EV4873"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "5a"),
++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."),
++ DMI_MATCH(DMI_BOARD_NAME, "G1S"),
++ DMI_MATCH(DMI_BOARD_VERSION, "1.0"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
+- /* Microsoft Virtual Machine */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Virtual Machine"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "VS2005R2"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1360"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Medion MAM 2070 */
++ /* Acer Aspire 5710 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "MAM 2070"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "5a"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Medion Akoya E7225 */
++ /* Acer Aspire 7738 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Medion"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Akoya E7225"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7738"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Blue FB5601 */
++ /* Acer Aspire 5536 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "blue"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "FB5601"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "M606"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5536"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Gigabyte M912 */
++ /*
++ * Acer Aspire 5738z
++ * Touchpad stops working in mux mode when dis- + re-enabled
++ * with the touchpad enable/disable toggle hotkey
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "M912"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "01"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5738"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Gigabyte M1022M netbook */
++ /* Acer Aspire One 150 */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co.,Ltd."),
+- DMI_MATCH(DMI_BOARD_NAME, "M1022E"),
+- DMI_MATCH(DMI_BOARD_VERSION, "1.02"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "AOA150"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /* Gigabyte Spring Peak - defines wrong chassis type */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Spring Peak"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A114-31"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /* Gigabyte T1005 - defines wrong chassis type ("Other") */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "T1005"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A314-31"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /* Gigabyte T1005M/P - defines wrong chassis type ("Other") */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "T1005M/P"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-31"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv9700"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-132"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "C15B"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-332"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "ByteSpeed LLC"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "ByteSpeed Laptop C15B"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-432"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+- { }
+-};
+-
+-/*
+- * Some Fujitsu notebooks are having trouble with touchpads if
+- * active multiplexing mode is activated. Luckily they don't have
+- * external PS/2 ports so we can safely disable it.
+- * ... apparently some Toshibas don't like MUX mode either and
+- * die horrible death on reboot.
+- */
+-static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
+ {
+- /* Fujitsu Lifebook P7010/P7010D */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "P7010"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate Spin B118-RN"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
++ /*
++ * Some Wistron based laptops need us to explicitly enable the 'Dritek
++ * keyboard extension' to make their extra keys start generating scancodes.
++ * Originally, this was just confined to older laptops, but a few Acer laptops
++ * have turned up in 2007 that also need this again.
++ */
+ {
+- /* Fujitsu Lifebook P7010 */
++ /* Acer Aspire 5100 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "0000000000"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5100"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu Lifebook P5020D */
++ /* Acer Aspire 5610 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook P Series"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5610"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu Lifebook S2000 */
++ /* Acer Aspire 5630 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S Series"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5630"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu Lifebook S6230 */
++ /* Acer Aspire 5650 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S6230"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5650"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu Lifebook U745 */
++ /* Acer Aspire 5680 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U745"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5680"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu T70H */
++ /* Acer Aspire 5720 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "FMVLT70H"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5720"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu-Siemens Lifebook T3010 */
++ /* Acer Aspire 9110 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T3010"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 9110"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu-Siemens Lifebook E4010 */
++ /* Acer TravelMate 660 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E4010"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 660"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu-Siemens Amilo Pro 2010 */
++ /* Acer TravelMate 2490 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pro V2010"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 2490"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /* Fujitsu-Siemens Amilo Pro 2030 */
++ /* Acer TravelMate 4280 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 4280"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_DRITEK)
+ },
+ {
+- /*
+- * No data is coming from the touchscreen unless KBC
+- * is in legacy mode.
+- */
+- /* Panasonic CF-29 */
++ /* Amoi M636/A737 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Matsushita"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "CF-29"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Amoi Electronics CO.,LTD."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "M636/A737 platform"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /*
+- * HP Pavilion DV4017EA -
+- * errors on MUX ports are reported without raising AUXDATA
+- * causing "spurious NAK" messages.
+- */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion dv4000 (EA032EA#ABF)"),
++ DMI_MATCH(DMI_SYS_VENDOR, "ByteSpeed LLC"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "ByteSpeed Laptop C15B"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
+- /*
+- * HP Pavilion ZT1000 -
+- * like DV4017EA does not raise AUXERR for errors on MUX ports.
+- */
++ /* Compal HEL80I */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Notebook PC"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook ZT1000"),
++ DMI_MATCH(DMI_SYS_VENDOR, "COMPAL"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HEL80I"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /*
+- * HP Pavilion DV4270ca -
+- * like DV4017EA does not raise AUXERR for errors on MUX ports.
+- */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion dv4000 (EH476UA#ABL)"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "8500"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P10"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "DL760"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
++ /* Advent 4211 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "EQUIUM A110"),
++ DMI_MATCH(DMI_SYS_VENDOR, "DIXONSXP"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Advent 4211"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
++ /* Dell Embedded Box PC 3000 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE C850D"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Embedded Box PC 3000"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
++ /* Dell XPS M1530 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "ALIENWARE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Sentia"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "XPS M1530"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Sharp Actius MM20 */
++ /* Dell Vostro 1510 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "PC-MM20 Series"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro1510"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Sony Vaio FS-115b */
++ /* Dell Vostro V13 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FS115B"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V13"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_NOTIMEOUT)
+ },
+ {
+- /*
+- * Sony Vaio FZ-240E -
+- * reset and GET ID commands issued via KBD port are
+- * sometimes being delivered to AUX3.
+- */
++ /* Dell Vostro 1320 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FZ240E"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1320"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /*
+- * Most (all?) VAIOs do not have external PS/2 ports nor
+- * they implement active multiplexing properly, and
+- * MUX discovery usually messes up keyboard/touchpad.
+- */
++ /* Dell Vostro 1520 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+- DMI_MATCH(DMI_BOARD_NAME, "VAIO"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1520"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /* Amoi M636/A737 */
++ /* Dell Vostro 1720 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Amoi Electronics CO.,LTD."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "M636/A737 platform"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1720"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /* Lenovo 3000 n100 */
++ /* Entroware Proteus */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "076804U"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Entroware"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Proteus"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "EL07R4"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS)
+ },
++ /*
++ * Some Fujitsu notebooks are having trouble with touchpads if
++ * active multiplexing mode is activated. Luckily they don't have
++ * external PS/2 ports so we can safely disable it.
++ * ... apparently some Toshibas don't like MUX mode either and
++ * die horrible death on reboot.
++ */
+ {
+- /* Lenovo XiaoXin Air 12 */
++ /* Fujitsu Lifebook P7010/P7010D */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "80UN"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P7010"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
++ /* Fujitsu Lifebook P5020D */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1360"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook P Series"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Acer Aspire 5710 */
++ /* Fujitsu Lifebook S2000 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S Series"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Acer Aspire 7738 */
++ /* Fujitsu Lifebook S6230 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7738"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S6230"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Gericom Bellagio */
++ /* Fujitsu Lifebook T725 laptop */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Gericom"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "N34AS6"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_NOTIMEOUT)
+ },
+ {
+- /* IBM 2656 */
++ /* Fujitsu Lifebook U745 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "2656"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U745"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Dell XPS M1530 */
++ /* Fujitsu T70H */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "XPS M1530"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "FMVLT70H"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Compal HEL80I */
++ /* Fujitsu A544 laptop */
++ /* https://bugzilla.redhat.com/show_bug.cgi?id=1111138 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "COMPAL"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HEL80I"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK A544"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOTIMEOUT)
+ },
+ {
+- /* Dell Vostro 1510 */
++ /* Fujitsu AH544 laptop */
++ /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro1510"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOTIMEOUT)
+ },
+ {
+- /* Acer Aspire 5536 */
++ /* Fujitsu U574 laptop */
++ /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5536"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOTIMEOUT)
+ },
+ {
+- /* Dell Vostro V13 */
++ /* Fujitsu UH554 laptop */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V13"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOTIMEOUT)
+ },
+ {
+- /* Newer HP Pavilion dv4 models */
++ /* Fujitsu Lifebook P7010 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "0000000000"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Asus X450LCP */
++ /* Fujitsu-Siemens Lifebook T3010 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X450LCP"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T3010"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Avatar AVIU-145A6 */
++ /* Fujitsu-Siemens Lifebook E4010 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Intel"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E4010"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* TUXEDO BU1406 */
++ /* Fujitsu-Siemens Amilo Pro 2010 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pro V2010"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Lenovo LaVie Z */
++ /* Fujitsu-Siemens Amilo Pro 2030 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /*
+- * Acer Aspire 5738z
+- * Touchpad stops working in mux mode when dis- + re-enabled
+- * with the touchpad enable/disable toggle hotkey
+- */
++ /* Fujitsu Lifebook A574/H */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5738"),
++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Entroware Proteus */
++ /* Gigabyte M912 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Entroware"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Proteus"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "EL07R4"),
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "M912"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "01"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /* Gigabyte Spring Peak - defines wrong chassis type */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Spring Peak"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /* Gigabyte T1005 - defines wrong chassis type ("Other") */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "T1005"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /* Gigabyte T1005M/P - defines wrong chassis type ("Other") */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "T1005M/P"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ /*
++ * Some laptops need keyboard reset before probing for the trackpad to get
++ * it detected, initialised & finally work.
++ */
++ {
++ /* Gigabyte P35 v2 - Elantech touchpad */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P35V2"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_KBDRESET)
++ },
++ {
++ /* Aorus branded Gigabyte X3 Plus - Elantech touchpad */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "X3"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_KBDRESET)
++ },
++ {
++ /* Gigabyte P34 - Elantech touchpad */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P34"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_KBDRESET)
++ },
++ {
++ /* Gigabyte P57 - Elantech touchpad */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P57"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_KBDRESET)
++ },
++ {
++ /* Gericom Bellagio */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Gericom"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "N34AS6"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
++ },
++ {
++ /* Gigabyte M1022M netbook */
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co.,Ltd."),
++ DMI_MATCH(DMI_BOARD_NAME, "M1022E"),
++ DMI_MATCH(DMI_BOARD_VERSION, "1.02"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv9700"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+- { }
+-};
+-
+-static const struct dmi_system_id i8042_dmi_forcemux_table[] __initconst = {
+ {
+ /*
+- * Sony Vaio VGN-CS series require MUX or the touch sensor
+- * buttons will disturb touchpad operation
++ * HP Pavilion DV4017EA -
++ * errors on MUX ports are reported without raising AUXDATA
++ * causing "spurious NAK" messages.
+ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-CS"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion dv4000 (EA032EA#ABF)"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+- { }
+-};
+-
+-/*
+- * On some Asus laptops, just running self tests cause problems.
+- */
+-static const struct dmi_system_id i8042_dmi_noselftest_table[] = {
+ {
++ /*
++ * HP Pavilion ZT1000 -
++ * like DV4017EA does not raise AUXERR for errors on MUX ports.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */
++ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Notebook PC"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook ZT1000"),
+ },
+- }, {
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
++ },
++ {
++ /*
++ * HP Pavilion DV4270ca -
++ * like DV4017EA does not raise AUXERR for errors on MUX ports.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+- DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /* Convertible Notebook */
++ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion dv4000 (EH476UA#ABL)"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+- { }
+-};
+-static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = {
+ {
+- /* MSI Wind U-100 */
++ /* Newer HP Pavilion dv4 models */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_NAME, "U-100"),
+- DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_NOTIMEOUT)
+ },
+ {
+- /* LG Electronics X110 */
++ /* IBM 2656 */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_NAME, "X110"),
+- DMI_MATCH(DMI_BOARD_VENDOR, "LG Electronics Inc."),
++ DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "2656"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Acer Aspire One 150 */
++ /* Avatar AVIU-145A6 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "AOA150"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Intel"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
++ /* Intel MBO Desktop D845PESV */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A114-31"),
++ DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
++ DMI_MATCH(DMI_BOARD_NAME, "D845PESV"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOPNP)
+ },
+ {
++ /*
++ * Intel NUC D54250WYK - does not have i8042 controller but
++ * declares PS/2 devices in DSDT.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A314-31"),
++ DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
++ DMI_MATCH(DMI_BOARD_NAME, "D54250WYK"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOPNP)
+ },
+ {
++ /* Lenovo 3000 n100 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-31"),
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "076804U"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
++ /* Lenovo XiaoXin Air 12 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-132"),
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "80UN"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
++ /* Lenovo LaVie Z */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-332"),
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo LaVie Z"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
++ /* Lenovo Ideapad U455 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-432"),
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "20046"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
++ /* Lenovo ThinkPad L460 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate Spin B118-RN"),
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L460"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /* Advent 4211 */
++ /* Lenovo ThinkPad Twist S230u */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "DIXONSXP"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Advent 4211"),
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "33474HU"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
++ },
++ {
++ /* LG Electronics X110 */
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "LG Electronics Inc."),
++ DMI_MATCH(DMI_BOARD_NAME, "X110"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+ /* Medion Akoya Mini E1210 */
+@@ -673,6 +843,7 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MEDION"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "E1210"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+ /* Medion Akoya E1222 */
+@@ -680,306 +851,441 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MEDION"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "E122X"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
+ {
+- /* Mivvy M310 */
++ /* MSI Wind U-100 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "VIOOO"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "N10"),
++ DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
++ DMI_MATCH(DMI_BOARD_NAME, "U-100"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOPNP)
++ },
++ {
++ /*
++ * No data is coming from the touchscreen unless KBC
++ * is in legacy mode.
++ */
++ /* Panasonic CF-29 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Matsushita"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "CF-29"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
++ },
++ {
++ /* Medion Akoya E7225 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Medion"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Akoya E7225"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /* Microsoft Virtual Machine */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Virtual Machine"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "VS2005R2"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /* Medion MAM 2070 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "MAM 2070"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "5a"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /* TUXEDO BU1406 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
++ },
++ {
++ /* OQO Model 01 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "OQO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "ZEPTO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "00"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "C15B"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /* Acer Aspire 5 A515 */
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "PK"),
++ DMI_MATCH(DMI_BOARD_NAME, "Grumpy_PK"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOPNP)
++ },
++ {
++ /* ULI EV4873 - AUX LOOP does not work properly */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "ULI"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "EV4873"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "5a"),
++ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
++ },
++ {
++ /*
++ * Arima-Rioworks HDAMB -
++ * AUX LOOP command does not raise AUX IRQ
++ */
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "RIOWORKS"),
++ DMI_MATCH(DMI_BOARD_NAME, "HDAMB"),
++ DMI_MATCH(DMI_BOARD_VERSION, "Rev E"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
+ {
+- /* Dell Vostro 1320 */
++ /* Sharp Actius MM20 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1320"),
++ DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "PC-MM20 Series"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Dell Vostro 1520 */
++ /*
++ * Sony Vaio FZ-240E -
++ * reset and GET ID commands issued via KBD port are
++ * sometimes being delivered to AUX3.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1520"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FZ240E"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Dell Vostro 1720 */
++ /*
++ * Most (all?) VAIOs do not have external PS/2 ports nor
++ * they implement active multiplexing properly, and
++ * MUX discovery usually messes up keyboard/touchpad.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1720"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
++ DMI_MATCH(DMI_BOARD_NAME, "VAIO"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Lenovo Ideapad U455 */
++ /* Sony Vaio FS-115b */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "20046"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FS115B"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Lenovo ThinkPad L460 */
++ /*
++ * Sony Vaio VGN-CS series require MUX or the touch sensor
++ * buttons will disturb touchpad operation
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L460"),
++ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-CS"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_FORCEMUX)
+ },
+ {
+- /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
++ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P10"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Lenovo ThinkPad Twist S230u */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "33474HU"),
++ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "EQUIUM A110"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+ {
+- /* Entroware Proteus */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Entroware"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Proteus"),
+- DMI_MATCH(DMI_PRODUCT_VERSION, "EL07R4"),
++ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE C850D"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ },
+- { }
+-};
+-
+-#ifdef CONFIG_PNP
+-static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = {
++ /*
++ * A lot of modern Clevo barebones have touchpad and/or keyboard issues
++ * after suspend fixable with nomux + reset + noloop + nopnp. Luckily,
++ * none of them have an external PS/2 port so this can safely be set for
++ * all of them. These two are based on a Clevo design, but have the
++ * board_name changed.
++ */
+ {
+- /* Intel MBO Desktop D845PESV */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_NAME, "D845PESV"),
+- DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
++ DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /*
+- * Intel NUC D54250WYK - does not have i8042 controller but
+- * declares PS/2 devices in DSDT.
+- */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_NAME, "D54250WYK"),
+- DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
++ DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"),
++ DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* MSI Wind U-100 */
++ /* Mivvy M310 */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_NAME, "U-100"),
+- DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
++ DMI_MATCH(DMI_SYS_VENDOR, "VIOOO"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "N10"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS)
+ },
++ /*
++ * Some laptops need keyboard reset before probing for the trackpad to get
++ * it detected, initialised & finally work.
++ */
+ {
+- /* Acer Aspire 5 A515 */
++ /* Schenker XMG C504 - Elantech touchpad */
+ .matches = {
+- DMI_MATCH(DMI_BOARD_NAME, "Grumpy_PK"),
+- DMI_MATCH(DMI_BOARD_VENDOR, "PK"),
++ DMI_MATCH(DMI_SYS_VENDOR, "XMG"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "C504"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_KBDRESET)
+ },
+- { }
+-};
+-
+-static const struct dmi_system_id __initconst i8042_dmi_laptop_table[] = {
+ {
++ /* Blue FB5601 */
+ .matches = {
+- DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */
++ DMI_MATCH(DMI_SYS_VENDOR, "blue"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "FB5601"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "M606"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOLOOP)
+ },
++ /*
++ * A lot of modern Clevo barebones have touchpad and/or keyboard issues
++ * after suspend fixable with nomux + reset + noloop + nopnp. Luckily,
++ * none of them have an external PS/2 port so this can safely be set for
++ * all of them.
++ * Clevo barebones come with board_vendor and/or system_vendor set to
++ * either the very generic string "Notebook" and/or a different value
++ * for each individual reseller. The only somewhat universal way to
++ * identify them is by board_name.
++ */
+ {
+ .matches = {
+- DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /* Laptop */
++ DMI_MATCH(DMI_BOARD_NAME, "LAPQC71A"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ .matches = {
+- DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */
++ DMI_MATCH(DMI_BOARD_NAME, "LAPQC71B"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+ .matches = {
+- DMI_MATCH(DMI_CHASSIS_TYPE, "14"), /* Sub-Notebook */
++ DMI_MATCH(DMI_BOARD_NAME, "N140CU"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+- { }
+-};
+-#endif
+-
+-static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
+ {
+- /* Dell Vostro V13 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V13"),
++ DMI_MATCH(DMI_BOARD_NAME, "N141CU"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Newer HP Pavilion dv4 models */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"),
++ DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Fujitsu A544 laptop */
+- /* https://bugzilla.redhat.com/show_bug.cgi?id=1111138 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK A544"),
++ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
++ /*
++ * At least one modern Clevo barebone has the touchpad connected both
++ * via PS/2 and i2c interface. This causes a race condition between the
++ * psmouse and i2c-hid driver. Since the full capability of the touchpad
++ * is available via the i2c interface and the device has no external
++ * PS/2 port, it is safe to just ignore all ps2 mouses here to avoid
++ * this issue. The known affected device is the
++ * TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU which comes with one of
++ * the two different dmi strings below. NS50MU is not a typo!
++ */
+ {
+- /* Fujitsu AH544 laptop */
+- /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"),
++ DMI_MATCH(DMI_BOARD_NAME, "NS50MU"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX |
++ SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP |
++ SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Fujitsu U574 laptop */
+- /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
++ DMI_MATCH(DMI_BOARD_NAME, "NS50_70MU"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX |
++ SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP |
++ SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Fujitsu UH554 laptop */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
++ DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+- { }
+-};
+-
+-/*
+- * Some Wistron based laptops need us to explicitly enable the 'Dritek
+- * keyboard extension' to make their extra keys start generating scancodes.
+- * Originally, this was just confined to older laptops, but a few Acer laptops
+- * have turned up in 2007 that also need this again.
+- */
+-static const struct dmi_system_id __initconst i8042_dmi_dritek_table[] = {
+ {
+- /* Acer Aspire 5100 */
++ /*
++ * This is only a partial board_name and might be followed by
++ * another letter or number. DMI_MATCH however does do partial
++ * matching.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5100"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer Aspire 5610 */
++ /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5610"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer Aspire 5630 */
++ /*
++ * This is only a partial board_name and might be followed by
++ * another letter or number. DMI_MATCH however does do partial
++ * matching.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5630"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer Aspire 5650 */
++ /*
++ * This is only a partial board_name and might be followed by
++ * another letter or number. DMI_MATCH however does do partial
++ * matching.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5650"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer Aspire 5680 */
++ /*
++ * This is only a partial board_name and might be followed by
++ * another letter or number. DMI_MATCH however does do partial
++ * matching.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5680"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer Aspire 5720 */
++ /*
++ * This is only a partial board_name and might be followed by
++ * another letter or number. DMI_MATCH however does do partial
++ * matching.
++ */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5720"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer Aspire 9110 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 9110"),
++ DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer TravelMate 660 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 660"),
++ DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer TravelMate 2490 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 2490"),
++ DMI_MATCH(DMI_BOARD_NAME, "X170SM"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ {
+- /* Acer TravelMate 4280 */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 4280"),
++ DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"),
+ },
++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ },
+ { }
+ };
+
+-/*
+- * Some laptops need keyboard reset before probing for the trackpad to get
+- * it detected, initialised & finally work.
+- */
+-static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = {
++#ifdef CONFIG_PNP
++static const struct dmi_system_id i8042_dmi_laptop_table[] __initconst = {
+ {
+- /* Gigabyte P35 v2 - Elantech touchpad */
+- .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "P35V2"),
+- },
+- },
+- {
+- /* Aorus branded Gigabyte X3 Plus - Elantech touchpad */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "X3"),
++ DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */
+ },
+ },
+ {
+- /* Gigabyte P34 - Elantech touchpad */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "P34"),
++ DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /* Laptop */
+ },
+ },
+ {
+- /* Gigabyte P57 - Elantech touchpad */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "P57"),
++ DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */
+ },
+ },
+ {
+- /* Schenker XMG C504 - Elantech touchpad */
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "XMG"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "C504"),
++ DMI_MATCH(DMI_CHASSIS_TYPE, "14"), /* Sub-Notebook */
+ },
+ },
+ { }
+ };
++#endif
+
+ #endif /* CONFIG_X86 */
+
+@@ -1135,11 +1441,6 @@ static int __init i8042_pnp_init(void)
+ bool pnp_data_busted = false;
+ int err;
+
+-#ifdef CONFIG_X86
+- if (dmi_check_system(i8042_dmi_nopnp_table))
+- i8042_nopnp = true;
+-#endif
+-
+ if (i8042_nopnp) {
+ pr_info("PNP detection disabled\n");
+ return 0;
+@@ -1243,6 +1544,59 @@ static inline int i8042_pnp_init(void) { return 0; }
+ static inline void i8042_pnp_exit(void) { }
+ #endif /* CONFIG_PNP */
+
++
++#ifdef CONFIG_X86
++static void __init i8042_check_quirks(void)
++{
++ const struct dmi_system_id *device_quirk_info;
++ uintptr_t quirks;
++
++ device_quirk_info = dmi_first_match(i8042_dmi_quirk_table);
++ if (!device_quirk_info)
++ return;
++
++ quirks = (uintptr_t)device_quirk_info->driver_data;
++
++ if (quirks & SERIO_QUIRK_NOKBD)
++ i8042_nokbd = true;
++ if (quirks & SERIO_QUIRK_NOAUX)
++ i8042_noaux = true;
++ if (quirks & SERIO_QUIRK_NOMUX)
++ i8042_nomux = true;
++ if (quirks & SERIO_QUIRK_FORCEMUX)
++ i8042_nomux = false;
++ if (quirks & SERIO_QUIRK_UNLOCK)
++ i8042_unlock = true;
++ if (quirks & SERIO_QUIRK_PROBE_DEFER)
++ i8042_probe_defer = true;
++ /* Honor module parameter when value is not default */
++ if (i8042_reset == I8042_RESET_DEFAULT) {
++ if (quirks & SERIO_QUIRK_RESET_ALWAYS)
++ i8042_reset = I8042_RESET_ALWAYS;
++ if (quirks & SERIO_QUIRK_RESET_NEVER)
++ i8042_reset = I8042_RESET_NEVER;
++ }
++ if (quirks & SERIO_QUIRK_DIECT)
++ i8042_direct = true;
++ if (quirks & SERIO_QUIRK_DUMBKBD)
++ i8042_dumbkbd = true;
++ if (quirks & SERIO_QUIRK_NOLOOP)
++ i8042_noloop = true;
++ if (quirks & SERIO_QUIRK_NOTIMEOUT)
++ i8042_notimeout = true;
++ if (quirks & SERIO_QUIRK_KBDRESET)
++ i8042_kbdreset = true;
++ if (quirks & SERIO_QUIRK_DRITEK)
++ i8042_dritek = true;
++#ifdef CONFIG_PNP
++ if (quirks & SERIO_QUIRK_NOPNP)
++ i8042_nopnp = true;
++#endif
++}
++#else
++static inline void i8042_check_quirks(void) {}
++#endif
++
+ static int __init i8042_platform_init(void)
+ {
+ int retval;
+@@ -1265,42 +1619,17 @@ static int __init i8042_platform_init(void)
+ i8042_kbd_irq = I8042_MAP_IRQ(1);
+ i8042_aux_irq = I8042_MAP_IRQ(12);
+
+- retval = i8042_pnp_init();
+- if (retval)
+- return retval;
+-
+ #if defined(__ia64__)
+- i8042_reset = I8042_RESET_ALWAYS;
++ i8042_reset = I8042_RESET_ALWAYS;
+ #endif
+
+-#ifdef CONFIG_X86
+- /* Honor module parameter when value is not default */
+- if (i8042_reset == I8042_RESET_DEFAULT) {
+- if (dmi_check_system(i8042_dmi_reset_table))
+- i8042_reset = I8042_RESET_ALWAYS;
+-
+- if (dmi_check_system(i8042_dmi_noselftest_table))
+- i8042_reset = I8042_RESET_NEVER;
+- }
+-
+- if (dmi_check_system(i8042_dmi_noloop_table))
+- i8042_noloop = true;
+-
+- if (dmi_check_system(i8042_dmi_nomux_table))
+- i8042_nomux = true;
+-
+- if (dmi_check_system(i8042_dmi_forcemux_table))
+- i8042_nomux = false;
+-
+- if (dmi_check_system(i8042_dmi_notimeout_table))
+- i8042_notimeout = true;
+-
+- if (dmi_check_system(i8042_dmi_dritek_table))
+- i8042_dritek = true;
++ i8042_check_quirks();
+
+- if (dmi_check_system(i8042_dmi_kbdreset_table))
+- i8042_kbdreset = true;
++ retval = i8042_pnp_init();
++ if (retval)
++ return retval;
+
++#ifdef CONFIG_X86
+ /*
+ * A20 was already enabled during early kernel init. But some buggy
+ * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to
+diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
+index 0b9f1d0a8f8b0..f132d6dfc25eb 100644
+--- a/drivers/input/serio/i8042.c
++++ b/drivers/input/serio/i8042.c
+@@ -45,6 +45,10 @@ static bool i8042_unlock;
+ module_param_named(unlock, i8042_unlock, bool, 0);
+ MODULE_PARM_DESC(unlock, "Ignore keyboard lock.");
+
++static bool i8042_probe_defer;
++module_param_named(probe_defer, i8042_probe_defer, bool, 0);
++MODULE_PARM_DESC(probe_defer, "Allow deferred probing.");
++
+ enum i8042_controller_reset_mode {
+ I8042_RESET_NEVER,
+ I8042_RESET_ALWAYS,
+@@ -711,7 +715,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version)
+ * LCS/Telegraphics.
+ */
+
+-static int __init i8042_check_mux(void)
++static int i8042_check_mux(void)
+ {
+ unsigned char mux_version;
+
+@@ -740,10 +744,10 @@ static int __init i8042_check_mux(void)
+ /*
+ * The following is used to test AUX IRQ delivery.
+ */
+-static struct completion i8042_aux_irq_delivered __initdata;
+-static bool i8042_irq_being_tested __initdata;
++static struct completion i8042_aux_irq_delivered;
++static bool i8042_irq_being_tested;
+
+-static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
++static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id)
+ {
+ unsigned long flags;
+ unsigned char str, data;
+@@ -770,7 +774,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
+ * verifies success by readinng CTR. Used when testing for presence of AUX
+ * port.
+ */
+-static int __init i8042_toggle_aux(bool on)
++static int i8042_toggle_aux(bool on)
+ {
+ unsigned char param;
+ int i;
+@@ -798,7 +802,7 @@ static int __init i8042_toggle_aux(bool on)
+ * the presence of an AUX interface.
+ */
+
+-static int __init i8042_check_aux(void)
++static int i8042_check_aux(void)
+ {
+ int retval = -1;
+ bool irq_registered = false;
+@@ -1005,7 +1009,7 @@ static int i8042_controller_init(void)
+
+ if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) {
+ pr_err("Can't read CTR while initializing i8042\n");
+- return -EIO;
++ return i8042_probe_defer ? -EPROBE_DEFER : -EIO;
+ }
+
+ } while (n < 2 || ctr[0] != ctr[1]);
+@@ -1320,7 +1324,7 @@ static void i8042_shutdown(struct platform_device *dev)
+ i8042_controller_reset(false);
+ }
+
+-static int __init i8042_create_kbd_port(void)
++static int i8042_create_kbd_port(void)
+ {
+ struct serio *serio;
+ struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO];
+@@ -1349,7 +1353,7 @@ static int __init i8042_create_kbd_port(void)
+ return 0;
+ }
+
+-static int __init i8042_create_aux_port(int idx)
++static int i8042_create_aux_port(int idx)
+ {
+ struct serio *serio;
+ int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx;
+@@ -1386,13 +1390,13 @@ static int __init i8042_create_aux_port(int idx)
+ return 0;
+ }
+
+-static void __init i8042_free_kbd_port(void)
++static void i8042_free_kbd_port(void)
+ {
+ kfree(i8042_ports[I8042_KBD_PORT_NO].serio);
+ i8042_ports[I8042_KBD_PORT_NO].serio = NULL;
+ }
+
+-static void __init i8042_free_aux_ports(void)
++static void i8042_free_aux_ports(void)
+ {
+ int i;
+
+@@ -1402,7 +1406,7 @@ static void __init i8042_free_aux_ports(void)
+ }
+ }
+
+-static void __init i8042_register_ports(void)
++static void i8042_register_ports(void)
+ {
+ int i;
+
+@@ -1443,7 +1447,7 @@ static void i8042_free_irqs(void)
+ i8042_aux_irq_registered = i8042_kbd_irq_registered = false;
+ }
+
+-static int __init i8042_setup_aux(void)
++static int i8042_setup_aux(void)
+ {
+ int (*aux_enable)(void);
+ int error;
+@@ -1485,7 +1489,7 @@ static int __init i8042_setup_aux(void)
+ return error;
+ }
+
+-static int __init i8042_setup_kbd(void)
++static int i8042_setup_kbd(void)
+ {
+ int error;
+
+@@ -1535,12 +1539,10 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb,
+ return 0;
+ }
+
+-static int __init i8042_probe(struct platform_device *dev)
++static int i8042_probe(struct platform_device *dev)
+ {
+ int error;
+
+- i8042_platform_device = dev;
+-
+ if (i8042_reset == I8042_RESET_ALWAYS) {
+ error = i8042_controller_selftest();
+ if (error)
+@@ -1578,7 +1580,6 @@ static int __init i8042_probe(struct platform_device *dev)
+ i8042_free_aux_ports(); /* in case KBD failed but AUX not */
+ i8042_free_irqs();
+ i8042_controller_reset(false);
+- i8042_platform_device = NULL;
+
+ return error;
+ }
+@@ -1588,7 +1589,6 @@ static int i8042_remove(struct platform_device *dev)
+ i8042_unregister_ports();
+ i8042_free_irqs();
+ i8042_controller_reset(false);
+- i8042_platform_device = NULL;
+
+ return 0;
+ }
+@@ -1600,6 +1600,7 @@ static struct platform_driver i8042_driver = {
+ .pm = &i8042_pm_ops,
+ #endif
+ },
++ .probe = i8042_probe,
+ .remove = i8042_remove,
+ .shutdown = i8042_shutdown,
+ };
+@@ -1610,7 +1611,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = {
+
+ static int __init i8042_init(void)
+ {
+- struct platform_device *pdev;
+ int err;
+
+ dbg_init();
+@@ -1626,17 +1626,29 @@ static int __init i8042_init(void)
+ /* Set this before creating the dev to allow i8042_command to work right away */
+ i8042_present = true;
+
+- pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0);
+- if (IS_ERR(pdev)) {
+- err = PTR_ERR(pdev);
++ err = platform_driver_register(&i8042_driver);
++ if (err)
+ goto err_platform_exit;
++
++ i8042_platform_device = platform_device_alloc("i8042", -1);
++ if (!i8042_platform_device) {
++ err = -ENOMEM;
++ goto err_unregister_driver;
+ }
+
++ err = platform_device_add(i8042_platform_device);
++ if (err)
++ goto err_free_device;
++
+ bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block);
+ panic_blink = i8042_panic_blink;
+
+ return 0;
+
++err_free_device:
++ platform_device_put(i8042_platform_device);
++err_unregister_driver:
++ platform_driver_unregister(&i8042_driver);
+ err_platform_exit:
+ i8042_platform_exit();
+ return err;
+diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
+index fcb1b646436a5..1581f6ef09279 100644
+--- a/drivers/input/tablet/aiptek.c
++++ b/drivers/input/tablet/aiptek.c
+@@ -1787,15 +1787,13 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id)
+ input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0);
+ input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0);
+
+- /* Verify that a device really has an endpoint */
+- if (intf->cur_altsetting->desc.bNumEndpoints < 1) {
++ err = usb_find_common_endpoints(intf->cur_altsetting,
++ NULL, NULL, &endpoint, NULL);
++ if (err) {
+ dev_err(&intf->dev,
+- "interface has %d endpoints, but must have minimum 1\n",
+- intf->cur_altsetting->desc.bNumEndpoints);
+- err = -EINVAL;
++ "interface has no int in endpoints, but must have minimum 1\n");
+ goto fail3;
+ }
+- endpoint = &intf->cur_altsetting->endpoint[0].desc;
+
+ /* Go set up our URB, which is called when the tablet receives
+ * input.
+diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
+index 05de92c0293bc..eb66cd2689b7c 100644
+--- a/drivers/input/touchscreen/atmel_mxt_ts.c
++++ b/drivers/input/touchscreen/atmel_mxt_ts.c
+@@ -1882,7 +1882,7 @@ static int mxt_read_info_block(struct mxt_data *data)
+ if (error) {
+ dev_err(&client->dev, "Error %d parsing object table\n", error);
+ mxt_free_object_table(data);
+- goto err_free_mem;
++ return error;
+ }
+
+ data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START);
+diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
+index 68f542bb809f4..6858a3e20a0cc 100644
+--- a/drivers/input/touchscreen/elants_i2c.c
++++ b/drivers/input/touchscreen/elants_i2c.c
+@@ -117,6 +117,19 @@
+ #define ELAN_POWERON_DELAY_USEC 500
+ #define ELAN_RESET_DELAY_MSEC 20
+
++/* FW boot code version */
++#define BC_VER_H_BYTE_FOR_EKTH3900x1_I2C 0x72
++#define BC_VER_H_BYTE_FOR_EKTH3900x2_I2C 0x82
++#define BC_VER_H_BYTE_FOR_EKTH3900x3_I2C 0x92
++#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C 0x6D
++#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C 0x6E
++#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C 0x77
++#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C 0x78
++#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB 0x67
++#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB 0x68
++#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB 0x74
++#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB 0x75
++
+ enum elants_chip_id {
+ EKTH3500,
+ EKTF3624,
+@@ -736,6 +749,37 @@ static int elants_i2c_validate_remark_id(struct elants_data *ts,
+ return 0;
+ }
+
++static bool elants_i2c_should_check_remark_id(struct elants_data *ts)
++{
++ struct i2c_client *client = ts->client;
++ const u8 bootcode_version = ts->iap_version;
++ bool check;
++
++ /* I2C eKTH3900 and eKTH5312 are NOT support Remark ID */
++ if ((bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x1_I2C) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x2_I2C) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x3_I2C) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB) ||
++ (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB)) {
++ dev_dbg(&client->dev,
++ "eKTH3900/eKTH5312(0x%02x) are not support remark id\n",
++ bootcode_version);
++ check = false;
++ } else if (bootcode_version >= 0x60) {
++ check = true;
++ } else {
++ check = false;
++ }
++
++ return check;
++}
++
+ static int elants_i2c_do_update_firmware(struct i2c_client *client,
+ const struct firmware *fw,
+ bool force)
+@@ -749,7 +793,7 @@ static int elants_i2c_do_update_firmware(struct i2c_client *client,
+ u16 send_id;
+ int page, n_fw_pages;
+ int error;
+- bool check_remark_id = ts->iap_version >= 0x60;
++ bool check_remark_id = elants_i2c_should_check_remark_id(ts);
+
+ /* Recovery mode detection! */
+ if (force) {
+@@ -1285,14 +1329,12 @@ static int elants_i2c_power_on(struct elants_data *ts)
+ if (IS_ERR_OR_NULL(ts->reset_gpio))
+ return 0;
+
+- gpiod_set_value_cansleep(ts->reset_gpio, 1);
+-
+ error = regulator_enable(ts->vcc33);
+ if (error) {
+ dev_err(&ts->client->dev,
+ "failed to enable vcc33 regulator: %d\n",
+ error);
+- goto release_reset_gpio;
++ return error;
+ }
+
+ error = regulator_enable(ts->vccio);
+@@ -1301,7 +1343,7 @@ static int elants_i2c_power_on(struct elants_data *ts)
+ "failed to enable vccio regulator: %d\n",
+ error);
+ regulator_disable(ts->vcc33);
+- goto release_reset_gpio;
++ return error;
+ }
+
+ /*
+@@ -1310,7 +1352,6 @@ static int elants_i2c_power_on(struct elants_data *ts)
+ */
+ udelay(ELAN_POWERON_DELAY_USEC);
+
+-release_reset_gpio:
+ gpiod_set_value_cansleep(ts->reset_gpio, 0);
+ if (error)
+ return error;
+@@ -1418,7 +1459,7 @@ static int elants_i2c_probe(struct i2c_client *client)
+ return error;
+ }
+
+- ts->reset_gpio = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_LOW);
++ ts->reset_gpio = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(ts->reset_gpio)) {
+ error = PTR_ERR(ts->reset_gpio);
+
+diff --git a/drivers/input/touchscreen/exc3000.c b/drivers/input/touchscreen/exc3000.c
+index cbe0dd4129121..4b7eee01c6aad 100644
+--- a/drivers/input/touchscreen/exc3000.c
++++ b/drivers/input/touchscreen/exc3000.c
+@@ -220,6 +220,7 @@ static int exc3000_vendor_data_request(struct exc3000_data *data, u8 *request,
+ {
+ u8 buf[EXC3000_LEN_VENDOR_REQUEST] = { 0x67, 0x00, 0x42, 0x00, 0x03 };
+ int ret;
++ unsigned long time_left;
+
+ mutex_lock(&data->query_lock);
+
+@@ -233,9 +234,9 @@ static int exc3000_vendor_data_request(struct exc3000_data *data, u8 *request,
+ goto out_unlock;
+
+ if (response) {
+- ret = wait_for_completion_timeout(&data->wait_event,
+- timeout * HZ);
+- if (ret <= 0) {
++ time_left = wait_for_completion_timeout(&data->wait_event,
++ timeout * HZ);
++ if (time_left == 0) {
+ ret = -ETIMEDOUT;
+ goto out_unlock;
+ }
+diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
+index 4f53d3c57e698..166d36b2626e9 100644
+--- a/drivers/input/touchscreen/goodix.c
++++ b/drivers/input/touchscreen/goodix.c
+@@ -14,20 +14,15 @@
+ #include <linux/kernel.h>
+ #include <linux/dmi.h>
+ #include <linux/firmware.h>
+-#include <linux/gpio/consumer.h>
+-#include <linux/i2c.h>
+-#include <linux/input.h>
+-#include <linux/input/mt.h>
+-#include <linux/input/touchscreen.h>
+ #include <linux/module.h>
+ #include <linux/delay.h>
+ #include <linux/irq.h>
+ #include <linux/interrupt.h>
+-#include <linux/regulator/consumer.h>
+ #include <linux/slab.h>
+ #include <linux/acpi.h>
+ #include <linux/of.h>
+ #include <asm/unaligned.h>
++#include "goodix.h"
+
+ #define GOODIX_GPIO_INT_NAME "irq"
+ #define GOODIX_GPIO_RST_NAME "reset"
+@@ -38,22 +33,11 @@
+ #define GOODIX_CONTACT_SIZE 8
+ #define GOODIX_MAX_CONTACT_SIZE 9
+ #define GOODIX_MAX_CONTACTS 10
+-#define GOODIX_MAX_KEYS 7
+
+ #define GOODIX_CONFIG_MIN_LENGTH 186
+ #define GOODIX_CONFIG_911_LENGTH 186
+ #define GOODIX_CONFIG_967_LENGTH 228
+ #define GOODIX_CONFIG_GT9X_LENGTH 240
+-#define GOODIX_CONFIG_MAX_LENGTH 240
+-
+-/* Register defines */
+-#define GOODIX_REG_COMMAND 0x8040
+-#define GOODIX_CMD_SCREEN_OFF 0x05
+-
+-#define GOODIX_READ_COOR_ADDR 0x814E
+-#define GOODIX_GT1X_REG_CONFIG_DATA 0x8050
+-#define GOODIX_GT9X_REG_CONFIG_DATA 0x8047
+-#define GOODIX_REG_ID 0x8140
+
+ #define GOODIX_BUFFER_STATUS_READY BIT(7)
+ #define GOODIX_HAVE_KEY BIT(4)
+@@ -68,55 +52,11 @@
+ #define ACPI_GPIO_SUPPORT
+ #endif
+
+-struct goodix_ts_data;
+-
+-enum goodix_irq_pin_access_method {
+- IRQ_PIN_ACCESS_NONE,
+- IRQ_PIN_ACCESS_GPIO,
+- IRQ_PIN_ACCESS_ACPI_GPIO,
+- IRQ_PIN_ACCESS_ACPI_METHOD,
+-};
+-
+-struct goodix_chip_data {
+- u16 config_addr;
+- int config_len;
+- int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len);
+- void (*calc_config_checksum)(struct goodix_ts_data *ts);
+-};
+-
+ struct goodix_chip_id {
+ const char *id;
+ const struct goodix_chip_data *data;
+ };
+
+-#define GOODIX_ID_MAX_LEN 4
+-
+-struct goodix_ts_data {
+- struct i2c_client *client;
+- struct input_dev *input_dev;
+- const struct goodix_chip_data *chip;
+- struct touchscreen_properties prop;
+- unsigned int max_touch_num;
+- unsigned int int_trigger_type;
+- struct regulator *avdd28;
+- struct regulator *vddio;
+- struct gpio_desc *gpiod_int;
+- struct gpio_desc *gpiod_rst;
+- int gpio_count;
+- int gpio_int_idx;
+- char id[GOODIX_ID_MAX_LEN + 1];
+- u16 version;
+- const char *cfg_name;
+- bool reset_controller_at_probe;
+- bool load_cfg_from_disk;
+- struct completion firmware_loading_complete;
+- unsigned long irq_flags;
+- enum goodix_irq_pin_access_method irq_pin_access_method;
+- unsigned int contact_size;
+- u8 config[GOODIX_CONFIG_MAX_LENGTH];
+- unsigned short keymap[GOODIX_MAX_KEYS];
+-};
+-
+ static int goodix_check_cfg_8(struct goodix_ts_data *ts,
+ const u8 *cfg, int len);
+ static int goodix_check_cfg_16(struct goodix_ts_data *ts,
+@@ -154,6 +94,7 @@ static const struct goodix_chip_data gt9x_chip_data = {
+
+ static const struct goodix_chip_id goodix_chip_ids[] = {
+ { .id = "1151", .data = &gt1x_chip_data },
++ { .id = "1158", .data = &gt1x_chip_data },
+ { .id = "5663", .data = &gt1x_chip_data },
+ { .id = "5688", .data = &gt1x_chip_data },
+ { .id = "917S", .data = &gt1x_chip_data },
+@@ -162,6 +103,7 @@ static const struct goodix_chip_id goodix_chip_ids[] = {
+ { .id = "911", .data = &gt911_chip_data },
+ { .id = "9271", .data = &gt911_chip_data },
+ { .id = "9110", .data = &gt911_chip_data },
++ { .id = "9111", .data = &gt911_chip_data },
+ { .id = "927", .data = &gt911_chip_data },
+ { .id = "928", .data = &gt911_chip_data },
+
+@@ -181,10 +123,18 @@ static const unsigned long goodix_irq_flags[] = {
+ static const struct dmi_system_id nine_bytes_report[] = {
+ #if defined(CONFIG_DMI) && defined(CONFIG_X86)
+ {
+- .ident = "Lenovo YogaBook",
+- /* YB1-X91L/F and YB1-X90L/F */
++ /* Lenovo Yoga Book X90F / X90L */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
++ }
++ },
++ {
++ /* Lenovo Yoga Book X91F / X91L */
+ .matches = {
+- DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9")
++ /* Non exact match to match F + L versions */
++ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
+ }
+ },
+ #endif
+@@ -215,8 +165,7 @@ static const struct dmi_system_id inverted_x_screen[] = {
+ * @buf: raw write data buffer.
+ * @len: length of the buffer to write
+ */
+-static int goodix_i2c_read(struct i2c_client *client,
+- u16 reg, u8 *buf, int len)
++int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len)
+ {
+ struct i2c_msg msgs[2];
+ __be16 wbuf = cpu_to_be16(reg);
+@@ -244,8 +193,7 @@ static int goodix_i2c_read(struct i2c_client *client,
+ * @buf: raw data buffer to write.
+ * @len: length of the buffer to write
+ */
+-static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf,
+- unsigned len)
++int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len)
+ {
+ u8 *addr_buf;
+ struct i2c_msg msg;
+@@ -269,7 +217,7 @@ static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf,
+ return ret < 0 ? ret : (ret != 1 ? -EIO : 0);
+ }
+
+-static int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value)
++int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value)
+ {
+ return goodix_i2c_write(client, reg, &value, sizeof(value));
+ }
+@@ -553,7 +501,7 @@ static int goodix_check_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len)
+ * @cfg: config firmware to write to device
+ * @len: config data length
+ */
+-static int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len)
++int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len)
+ {
+ int error;
+
+@@ -651,62 +599,88 @@ static int goodix_irq_direction_input(struct goodix_ts_data *ts)
+ return -EINVAL; /* Never reached */
+ }
+
+-static int goodix_int_sync(struct goodix_ts_data *ts)
++int goodix_int_sync(struct goodix_ts_data *ts)
+ {
+ int error;
+
+ error = goodix_irq_direction_output(ts, 0);
+ if (error)
+- return error;
++ goto error;
+
+ msleep(50); /* T5: 50ms */
+
+ error = goodix_irq_direction_input(ts);
+ if (error)
+- return error;
++ goto error;
+
+ return 0;
++
++error:
++ dev_err(&ts->client->dev, "Controller irq sync failed.\n");
++ return error;
+ }
+
+ /**
+- * goodix_reset - Reset device during power on
++ * goodix_reset_no_int_sync - Reset device, leaving interrupt line in output mode
+ *
+ * @ts: goodix_ts_data pointer
+ */
+-static int goodix_reset(struct goodix_ts_data *ts)
++int goodix_reset_no_int_sync(struct goodix_ts_data *ts)
+ {
+ int error;
+
+ /* begin select I2C slave addr */
+ error = gpiod_direction_output(ts->gpiod_rst, 0);
+ if (error)
+- return error;
++ goto error;
+
+ msleep(20); /* T2: > 10ms */
+
+ /* HIGH: 0x28/0x29, LOW: 0xBA/0xBB */
+ error = goodix_irq_direction_output(ts, ts->client->addr == 0x14);
+ if (error)
+- return error;
++ goto error;
+
+ usleep_range(100, 2000); /* T3: > 100us */
+
+ error = gpiod_direction_output(ts->gpiod_rst, 1);
+ if (error)
+- return error;
++ goto error;
+
+ usleep_range(6000, 10000); /* T4: > 5ms */
+
+- /* end select I2C slave addr */
+- error = gpiod_direction_input(ts->gpiod_rst);
+- if (error)
+- return error;
++ /*
++ * Put the reset pin back in to input / high-impedance mode to save
++ * power. Only do this in the non ACPI case since some ACPI boards
++ * don't have a pull-up, so there the reset pin must stay active-high.
++ */
++ if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_GPIO) {
++ error = gpiod_direction_input(ts->gpiod_rst);
++ if (error)
++ goto error;
++ }
+
+- error = goodix_int_sync(ts);
++ return 0;
++
++error:
++ dev_err(&ts->client->dev, "Controller reset failed.\n");
++ return error;
++}
++
++/**
++ * goodix_reset - Reset device during power on
++ *
++ * @ts: goodix_ts_data pointer
++ */
++static int goodix_reset(struct goodix_ts_data *ts)
++{
++ int error;
++
++ error = goodix_reset_no_int_sync(ts);
+ if (error)
+ return error;
+
+- return 0;
++ return goodix_int_sync(ts);
+ }
+
+ #ifdef ACPI_GPIO_SUPPORT
+@@ -818,6 +792,14 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
+ return -EINVAL;
+ }
+
++ /*
++ * Normally we put the reset pin in input / high-impedance mode to save
++ * power. But some x86/ACPI boards don't have a pull-up, so for the ACPI
++ * case, leave the pin as is. This results in the pin not being touched
++ * at all on x86/ACPI boards, except when needed for error-recover.
++ */
++ ts->gpiod_rst_flags = GPIOD_ASIS;
++
+ return devm_acpi_dev_add_driver_gpios(dev, gpio_mapping);
+ }
+ #else
+@@ -843,6 +825,12 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts)
+ return -EINVAL;
+ dev = &ts->client->dev;
+
++ /*
++ * By default we request the reset pin as input, leaving it in
++ * high-impedance when not resetting the controller to save power.
++ */
++ ts->gpiod_rst_flags = GPIOD_IN;
++
+ ts->avdd28 = devm_regulator_get(dev, "AVDD28");
+ if (IS_ERR(ts->avdd28)) {
+ error = PTR_ERR(ts->avdd28);
+@@ -880,7 +868,7 @@ retry_get_irq_gpio:
+ ts->gpiod_int = gpiod;
+
+ /* Get the reset line GPIO pin number */
+- gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, GPIOD_IN);
++ gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags);
+ if (IS_ERR(gpiod)) {
+ error = PTR_ERR(gpiod);
+ if (error != -EPROBE_DEFER)
+@@ -1059,6 +1047,7 @@ static int goodix_configure_dev(struct goodix_ts_data *ts)
+ input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0);
+ input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+
++retry_read_config:
+ /* Read configuration and apply touchscreen parameters */
+ goodix_read_config(ts);
+
+@@ -1066,6 +1055,16 @@ static int goodix_configure_dev(struct goodix_ts_data *ts)
+ touchscreen_parse_properties(ts->input_dev, true, &ts->prop);
+
+ if (!ts->prop.max_x || !ts->prop.max_y || !ts->max_touch_num) {
++ if (!ts->reset_controller_at_probe &&
++ ts->irq_pin_access_method != IRQ_PIN_ACCESS_NONE) {
++ dev_info(&ts->client->dev, "Config not set, resetting controller\n");
++ /* Retry after a controller reset */
++ ts->reset_controller_at_probe = true;
++ error = goodix_reset(ts);
++ if (error)
++ return error;
++ goto retry_read_config;
++ }
+ dev_err(&ts->client->dev,
+ "Invalid config (%d, %d, %d), using defaults\n",
+ ts->prop.max_x, ts->prop.max_y, ts->max_touch_num);
+@@ -1205,10 +1204,8 @@ reset:
+ if (ts->reset_controller_at_probe) {
+ /* reset the controller */
+ error = goodix_reset(ts);
+- if (error) {
+- dev_err(&client->dev, "Controller reset failed.\n");
++ if (error)
+ return error;
+- }
+ }
+
+ error = goodix_i2c_test(client);
+@@ -1350,10 +1347,8 @@ static int __maybe_unused goodix_resume(struct device *dev)
+
+ if (error != 0 || config_ver != ts->config[0]) {
+ error = goodix_reset(ts);
+- if (error) {
+- dev_err(dev, "Controller reset failed.\n");
++ if (error)
+ return error;
+- }
+
+ error = goodix_send_cfg(ts, ts->config, ts->chip->config_len);
+ if (error)
+@@ -1387,6 +1382,7 @@ MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);
+ #ifdef CONFIG_OF
+ static const struct of_device_id goodix_of_match[] = {
+ { .compatible = "goodix,gt1151" },
++ { .compatible = "goodix,gt1158" },
+ { .compatible = "goodix,gt5663" },
+ { .compatible = "goodix,gt5688" },
+ { .compatible = "goodix,gt911" },
+diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h
+new file mode 100644
+index 0000000000000..1a1571ad2cd23
+--- /dev/null
++++ b/drivers/input/touchscreen/goodix.h
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++#ifndef __GOODIX_H__
++#define __GOODIX_H__
++
++#include <linux/gpio/consumer.h>
++#include <linux/i2c.h>
++#include <linux/input.h>
++#include <linux/input/mt.h>
++#include <linux/input/touchscreen.h>
++#include <linux/regulator/consumer.h>
++
++/* Register defines */
++#define GOODIX_REG_COMMAND 0x8040
++#define GOODIX_CMD_SCREEN_OFF 0x05
++
++#define GOODIX_GT1X_REG_CONFIG_DATA 0x8050
++#define GOODIX_GT9X_REG_CONFIG_DATA 0x8047
++#define GOODIX_REG_ID 0x8140
++#define GOODIX_READ_COOR_ADDR 0x814E
++
++#define GOODIX_ID_MAX_LEN 4
++#define GOODIX_CONFIG_MAX_LENGTH 240
++#define GOODIX_MAX_KEYS 7
++
++enum goodix_irq_pin_access_method {
++ IRQ_PIN_ACCESS_NONE,
++ IRQ_PIN_ACCESS_GPIO,
++ IRQ_PIN_ACCESS_ACPI_GPIO,
++ IRQ_PIN_ACCESS_ACPI_METHOD,
++};
++
++struct goodix_ts_data;
++
++struct goodix_chip_data {
++ u16 config_addr;
++ int config_len;
++ int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len);
++ void (*calc_config_checksum)(struct goodix_ts_data *ts);
++};
++
++struct goodix_ts_data {
++ struct i2c_client *client;
++ struct input_dev *input_dev;
++ const struct goodix_chip_data *chip;
++ struct touchscreen_properties prop;
++ unsigned int max_touch_num;
++ unsigned int int_trigger_type;
++ struct regulator *avdd28;
++ struct regulator *vddio;
++ struct gpio_desc *gpiod_int;
++ struct gpio_desc *gpiod_rst;
++ int gpio_count;
++ int gpio_int_idx;
++ enum gpiod_flags gpiod_rst_flags;
++ char id[GOODIX_ID_MAX_LEN + 1];
++ u16 version;
++ const char *cfg_name;
++ bool reset_controller_at_probe;
++ bool load_cfg_from_disk;
++ struct completion firmware_loading_complete;
++ unsigned long irq_flags;
++ enum goodix_irq_pin_access_method irq_pin_access_method;
++ unsigned int contact_size;
++ u8 config[GOODIX_CONFIG_MAX_LENGTH];
++ unsigned short keymap[GOODIX_MAX_KEYS];
++};
++
++int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len);
++int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len);
++int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value);
++int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len);
++int goodix_int_sync(struct goodix_ts_data *ts);
++int goodix_reset_no_int_sync(struct goodix_ts_data *ts);
++
++#endif
+diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
+index 30576a5f2f045..f437eefec94ad 100644
+--- a/drivers/input/touchscreen/ili210x.c
++++ b/drivers/input/touchscreen/ili210x.c
+@@ -420,9 +420,9 @@ static int ili210x_i2c_probe(struct i2c_client *client,
+ if (error)
+ return error;
+
+- usleep_range(50, 100);
++ usleep_range(12000, 15000);
+ gpiod_set_value_cansleep(reset_gpio, 0);
+- msleep(100);
++ msleep(160);
+ }
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c
+index 2745bf1aee381..83f4be05e27b6 100644
+--- a/drivers/input/touchscreen/melfas_mip4.c
++++ b/drivers/input/touchscreen/melfas_mip4.c
+@@ -1453,7 +1453,7 @@ static int mip4_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ "ce", GPIOD_OUT_LOW);
+ if (IS_ERR(ts->gpio_ce)) {
+ error = PTR_ERR(ts->gpio_ce);
+- if (error != EPROBE_DEFER)
++ if (error != -EPROBE_DEFER)
+ dev_err(&client->dev,
+ "Failed to get gpio: %d\n", error);
+ return error;
+diff --git a/drivers/input/touchscreen/raspberrypi-ts.c b/drivers/input/touchscreen/raspberrypi-ts.c
+index 5000f5fd9ec38..45c575df994e0 100644
+--- a/drivers/input/touchscreen/raspberrypi-ts.c
++++ b/drivers/input/touchscreen/raspberrypi-ts.c
+@@ -134,7 +134,7 @@ static int rpi_ts_probe(struct platform_device *pdev)
+ return -ENOENT;
+ }
+
+- fw = rpi_firmware_get(fw_node);
++ fw = devm_rpi_firmware_get(&pdev->dev, fw_node);
+ of_node_put(fw_node);
+ if (!fw)
+ return -EPROBE_DEFER;
+@@ -160,7 +160,6 @@ static int rpi_ts_probe(struct platform_device *pdev)
+ touchbuf = (u32)ts->fw_regs_phys;
+ error = rpi_firmware_property(fw, RPI_FIRMWARE_FRAMEBUFFER_SET_TOUCHBUF,
+ &touchbuf, sizeof(touchbuf));
+- rpi_firmware_put(fw);
+ if (error || touchbuf != 0) {
+ dev_warn(dev, "Failed to set touchbuf, %d\n", error);
+ return error;
+diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c
+index 4d2d22a869773..bdb3e2c3ab797 100644
+--- a/drivers/input/touchscreen/raydium_i2c_ts.c
++++ b/drivers/input/touchscreen/raydium_i2c_ts.c
+@@ -210,12 +210,14 @@ static int raydium_i2c_send(struct i2c_client *client,
+
+ error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer));
+ if (likely(!error))
+- return 0;
++ goto out;
+
+ msleep(RM_RETRY_DELAY_MS);
+ } while (++tries < RM_MAX_RETRIES);
+
+ dev_err(&client->dev, "%s failed: %d\n", __func__, error);
++out:
++ kfree(tx_buf);
+ return error;
+ }
+
+diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c
+index 6abae665ca71d..9d1dea6996a22 100644
+--- a/drivers/input/touchscreen/st1232.c
++++ b/drivers/input/touchscreen/st1232.c
+@@ -92,7 +92,7 @@ static int st1232_ts_wait_ready(struct st1232_ts_data *ts)
+ unsigned int retries;
+ int error;
+
+- for (retries = 10; retries; retries--) {
++ for (retries = 100; retries; retries--) {
+ error = st1232_ts_read_data(ts, REG_STATUS, 1);
+ if (!error) {
+ switch (ts->read_buf[0]) {
+diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c
+index bc11203c9cf78..c175d44c52f37 100644
+--- a/drivers/input/touchscreen/stmfts.c
++++ b/drivers/input/touchscreen/stmfts.c
+@@ -337,13 +337,15 @@ static int stmfts_input_open(struct input_dev *dev)
+ struct stmfts_data *sdata = input_get_drvdata(dev);
+ int err;
+
+- err = pm_runtime_get_sync(&sdata->client->dev);
+- if (err < 0)
++ err = pm_runtime_resume_and_get(&sdata->client->dev);
++ if (err)
+ return err;
+
+ err = i2c_smbus_write_byte(sdata->client, STMFTS_MS_MT_SENSE_ON);
+- if (err)
++ if (err) {
++ pm_runtime_put_sync(&sdata->client->dev);
+ return err;
++ }
+
+ mutex_lock(&sdata->mutex);
+ sdata->running = true;
+diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
+index 83e685557a197..cfc943423241f 100644
+--- a/drivers/input/touchscreen/ti_am335x_tsc.c
++++ b/drivers/input/touchscreen/ti_am335x_tsc.c
+@@ -131,7 +131,8 @@ static void titsc_step_config(struct titsc *ts_dev)
+ u32 stepenable;
+
+ config = STEPCONFIG_MODE_HWSYNC |
+- STEPCONFIG_AVG_16 | ts_dev->bit_xp;
++ STEPCONFIG_AVG_16 | ts_dev->bit_xp |
++ STEPCONFIG_INM_ADCREFM;
+ switch (ts_dev->wires) {
+ case 4:
+ config |= STEPCONFIG_INP(ts_dev->inp_yp) | ts_dev->bit_xn;
+@@ -195,7 +196,10 @@ static void titsc_step_config(struct titsc *ts_dev)
+ STEPCONFIG_OPENDLY);
+
+ end_step++;
+- config |= STEPCONFIG_INP(ts_dev->inp_yn);
++ config = STEPCONFIG_MODE_HWSYNC |
++ STEPCONFIG_AVG_16 | ts_dev->bit_yp |
++ ts_dev->bit_xn | STEPCONFIG_INM_ADCREFM |
++ STEPCONFIG_INP(ts_dev->inp_yn);
+ titsc_writel(ts_dev, REG_STEPCONFIG(end_step), config);
+ titsc_writel(ts_dev, REG_STEPDELAY(end_step),
+ STEPCONFIG_OPENDLY);
+diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c
+index b8d901099378d..400957f4c8c9c 100644
+--- a/drivers/input/touchscreen/zinitix.c
++++ b/drivers/input/touchscreen/zinitix.c
+@@ -135,7 +135,7 @@ struct point_coord {
+
+ struct touch_event {
+ __le16 status;
+- u8 finger_cnt;
++ u8 finger_mask;
+ u8 time_stamp;
+ struct point_coord point_coord[MAX_SUPPORTED_FINGER_NUM];
+ };
+@@ -311,11 +311,32 @@ static int zinitix_send_power_on_sequence(struct bt541_ts_data *bt541)
+ static void zinitix_report_finger(struct bt541_ts_data *bt541, int slot,
+ const struct point_coord *p)
+ {
++ u16 x, y;
++
++ if (unlikely(!(p->sub_status &
++ (SUB_BIT_UP | SUB_BIT_DOWN | SUB_BIT_MOVE)))) {
++ dev_dbg(&bt541->client->dev, "unknown finger event %#02x\n",
++ p->sub_status);
++ return;
++ }
++
++ x = le16_to_cpu(p->x);
++ y = le16_to_cpu(p->y);
++
+ input_mt_slot(bt541->input_dev, slot);
+- input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER, true);
+- touchscreen_report_pos(bt541->input_dev, &bt541->prop,
+- le16_to_cpu(p->x), le16_to_cpu(p->y), true);
+- input_report_abs(bt541->input_dev, ABS_MT_TOUCH_MAJOR, p->width);
++ if (input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER,
++ !(p->sub_status & SUB_BIT_UP))) {
++ touchscreen_report_pos(bt541->input_dev,
++ &bt541->prop, x, y, true);
++ input_report_abs(bt541->input_dev,
++ ABS_MT_TOUCH_MAJOR, p->width);
++ dev_dbg(&bt541->client->dev, "finger %d %s (%u, %u)\n",
++ slot, p->sub_status & SUB_BIT_DOWN ? "down" : "move",
++ x, y);
++ } else {
++ dev_dbg(&bt541->client->dev, "finger %d up (%u, %u)\n",
++ slot, x, y);
++ }
+ }
+
+ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
+@@ -323,6 +344,7 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
+ struct bt541_ts_data *bt541 = bt541_handler;
+ struct i2c_client *client = bt541->client;
+ struct touch_event touch_event;
++ unsigned long finger_mask;
+ int error;
+ int i;
+
+@@ -335,10 +357,14 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
+ goto out;
+ }
+
+- for (i = 0; i < MAX_SUPPORTED_FINGER_NUM; i++)
+- if (touch_event.point_coord[i].sub_status & SUB_BIT_EXIST)
+- zinitix_report_finger(bt541, i,
+- &touch_event.point_coord[i]);
++ finger_mask = touch_event.finger_mask;
++ for_each_set_bit(i, &finger_mask, MAX_SUPPORTED_FINGER_NUM) {
++ const struct point_coord *p = &touch_event.point_coord[i];
++
++ /* Only process contacts that are actually reported */
++ if (p->sub_status & SUB_BIT_EXIST)
++ zinitix_report_finger(bt541, i, p);
++ }
+
+ input_mt_sync_frame(bt541->input_dev);
+ input_sync(bt541->input_dev);
+@@ -488,6 +514,15 @@ static int zinitix_ts_probe(struct i2c_client *client)
+ return error;
+ }
+
++ error = devm_request_threaded_irq(&client->dev, client->irq,
++ NULL, zinitix_ts_irq_handler,
++ IRQF_ONESHOT | IRQF_NO_AUTOEN,
++ client->name, bt541);
++ if (error) {
++ dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
++ return error;
++ }
++
+ error = zinitix_init_input_dev(bt541);
+ if (error) {
+ dev_err(&client->dev,
+@@ -513,15 +548,6 @@ static int zinitix_ts_probe(struct i2c_client *client)
+ return -EINVAL;
+ }
+
+- error = devm_request_threaded_irq(&client->dev, client->irq,
+- NULL, zinitix_ts_irq_handler,
+- IRQF_ONESHOT | IRQF_NO_AUTOEN,
+- client->name, bt541);
+- if (error) {
+- dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
+- return error;
+- }
+-
+ return 0;
+ }
+
+diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
+index 9050ca1f4285c..14d785e5629e6 100644
+--- a/drivers/interconnect/core.c
++++ b/drivers/interconnect/core.c
+@@ -850,6 +850,10 @@ void icc_node_destroy(int id)
+
+ mutex_unlock(&icc_lock);
+
++ if (!node)
++ return;
++
++ kfree(node->links);
+ kfree(node);
+ }
+ EXPORT_SYMBOL_GPL(icc_node_destroy);
+@@ -1087,9 +1091,15 @@ static int of_count_icc_providers(struct device_node *np)
+ {
+ struct device_node *child;
+ int count = 0;
++ const struct of_device_id __maybe_unused ignore_list[] = {
++ { .compatible = "qcom,sc7180-ipa-virt" },
++ { .compatible = "qcom,sdx55-ipa-virt" },
++ {}
++ };
+
+ for_each_available_child_of_node(np, child) {
+- if (of_property_read_bool(child, "#interconnect-cells"))
++ if (of_property_read_bool(child, "#interconnect-cells") &&
++ likely(!of_match_node(ignore_list, child)))
+ count++;
+ count += of_count_icc_providers(child);
+ }
+diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c
+index c770951a909c9..aabd9edf2ef76 100644
+--- a/drivers/interconnect/imx/imx.c
++++ b/drivers/interconnect/imx/imx.c
+@@ -226,16 +226,16 @@ int imx_icc_register(struct platform_device *pdev,
+ struct device *dev = &pdev->dev;
+ struct icc_onecell_data *data;
+ struct icc_provider *provider;
+- int max_node_id;
++ int num_nodes;
+ int ret;
+
+ /* icc_onecell_data is indexed by node_id, unlike nodes param */
+- max_node_id = get_max_node_id(nodes, nodes_count);
+- data = devm_kzalloc(dev, struct_size(data, nodes, max_node_id),
++ num_nodes = get_max_node_id(nodes, nodes_count) + 1;
++ data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+- data->num_nodes = max_node_id;
++ data->num_nodes = num_nodes;
+
+ provider = devm_kzalloc(dev, sizeof(*provider), GFP_KERNEL);
+ if (!provider)
+diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
+index 54de49ca7808a..ddf1805ded0c0 100644
+--- a/drivers/interconnect/qcom/icc-rpm.c
++++ b/drivers/interconnect/qcom/icc-rpm.c
+@@ -68,6 +68,7 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
+ rate = max(sum_bw, max_peak_bw);
+
+ do_div(rate, qn->buswidth);
++ rate = min_t(u64, rate, LONG_MAX);
+
+ if (qn->rate == rate)
+ return 0;
+diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
+index 3eb7936d2cf60..2c8e12549804b 100644
+--- a/drivers/interconnect/qcom/icc-rpmh.c
++++ b/drivers/interconnect/qcom/icc-rpmh.c
+@@ -21,13 +21,18 @@ void qcom_icc_pre_aggregate(struct icc_node *node)
+ {
+ size_t i;
+ struct qcom_icc_node *qn;
++ struct qcom_icc_provider *qp;
+
+ qn = node->data;
++ qp = to_qcom_provider(node->provider);
+
+ for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) {
+ qn->sum_avg[i] = 0;
+ qn->max_peak[i] = 0;
+ }
++
++ for (i = 0; i < qn->num_bcms; i++)
++ qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
+ }
+ EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate);
+
+@@ -45,10 +50,8 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
+ {
+ size_t i;
+ struct qcom_icc_node *qn;
+- struct qcom_icc_provider *qp;
+
+ qn = node->data;
+- qp = to_qcom_provider(node->provider);
+
+ if (!tag)
+ tag = QCOM_ICC_TAG_ALWAYS;
+@@ -68,9 +71,6 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
+ *agg_avg += avg_bw;
+ *agg_peak = max_t(u32, *agg_peak, peak_bw);
+
+- for (i = 0; i < qn->num_bcms; i++)
+- qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
+-
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(qcom_icc_aggregate);
+diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
+index c7af143980de4..87edab1bf987b 100644
+--- a/drivers/interconnect/qcom/osm-l3.c
++++ b/drivers/interconnect/qcom/osm-l3.c
+@@ -275,7 +275,7 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
+ qnodes = desc->nodes;
+ num_nodes = desc->num_nodes;
+
+- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
++ data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c
+index 12d59c36df530..5f7c0f85fa8e3 100644
+--- a/drivers/interconnect/qcom/sc7180.c
++++ b/drivers/interconnect/qcom/sc7180.c
+@@ -47,7 +47,6 @@ DEFINE_QNODE(qnm_mnoc_sf, SC7180_MASTER_MNOC_SF_MEM_NOC, 1, 32, SC7180_SLAVE_GEM
+ DEFINE_QNODE(qnm_snoc_gc, SC7180_MASTER_SNOC_GC_MEM_NOC, 1, 8, SC7180_SLAVE_LLCC);
+ DEFINE_QNODE(qnm_snoc_sf, SC7180_MASTER_SNOC_SF_MEM_NOC, 1, 16, SC7180_SLAVE_LLCC);
+ DEFINE_QNODE(qxm_gpu, SC7180_MASTER_GFX3D, 2, 32, SC7180_SLAVE_GEM_NOC_SNOC, SC7180_SLAVE_LLCC);
+-DEFINE_QNODE(ipa_core_master, SC7180_MASTER_IPA_CORE, 1, 8, SC7180_SLAVE_IPA_CORE);
+ DEFINE_QNODE(llcc_mc, SC7180_MASTER_LLCC, 2, 4, SC7180_SLAVE_EBI1);
+ DEFINE_QNODE(qhm_mnoc_cfg, SC7180_MASTER_CNOC_MNOC_CFG, 1, 4, SC7180_SLAVE_SERVICE_MNOC);
+ DEFINE_QNODE(qxm_camnoc_hf0, SC7180_MASTER_CAMNOC_HF0, 2, 32, SC7180_SLAVE_MNOC_HF_MEM_NOC);
+@@ -129,7 +128,6 @@ DEFINE_QNODE(qhs_mdsp_ms_mpu_cfg, SC7180_SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4);
+ DEFINE_QNODE(qns_gem_noc_snoc, SC7180_SLAVE_GEM_NOC_SNOC, 1, 8, SC7180_MASTER_GEM_NOC_SNOC);
+ DEFINE_QNODE(qns_llcc, SC7180_SLAVE_LLCC, 1, 16, SC7180_MASTER_LLCC);
+ DEFINE_QNODE(srvc_gemnoc, SC7180_SLAVE_SERVICE_GEM_NOC, 1, 4);
+-DEFINE_QNODE(ipa_core_slave, SC7180_SLAVE_IPA_CORE, 1, 8);
+ DEFINE_QNODE(ebi, SC7180_SLAVE_EBI1, 2, 4);
+ DEFINE_QNODE(qns_mem_noc_hf, SC7180_SLAVE_MNOC_HF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_HF_MEM_NOC);
+ DEFINE_QNODE(qns_mem_noc_sf, SC7180_SLAVE_MNOC_SF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_SF_MEM_NOC);
+@@ -160,7 +158,6 @@ DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
+ DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
+ DEFINE_QBCM(bcm_mm0, "MM0", false, &qns_mem_noc_hf);
+ DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
+-DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
+ DEFINE_QBCM(bcm_cn0, "CN0", true, &qnm_snoc, &xm_qdss_dap, &qhs_a1_noc_cfg, &qhs_a2_noc_cfg, &qhs_ahb2phy0, &qhs_aop, &qhs_aoss, &qhs_boot_rom, &qhs_camera_cfg, &qhs_camera_nrt_throttle_cfg, &qhs_camera_rt_throttle_cfg, &qhs_clk_ctl, &qhs_cpr_cx, &qhs_cpr_mx, &qhs_crypto0_cfg, &qhs_dcc_cfg, &qhs_ddrss_cfg, &qhs_display_cfg, &qhs_display_rt_throttle_cfg, &qhs_display_throttle_cfg, &qhs_glm, &qhs_gpuss_cfg, &qhs_imem_cfg, &qhs_ipa, &qhs_mnoc_cfg, &qhs_mss_cfg, &qhs_npu_cfg, &qhs_npu_dma_throttle_cfg, &qhs_npu_dsp_throttle_cfg, &qhs_pimem_cfg, &qhs_prng, &qhs_qdss_cfg, &qhs_qm_cfg, &qhs_qm_mpu_cfg, &qhs_qup0, &qhs_qup1, &qhs_security, &qhs_snoc_cfg, &qhs_tcsr, &qhs_tlmm_1, &qhs_tlmm_2, &qhs_tlmm_3, &qhs_ufs_mem_cfg, &qhs_usb3, &qhs_venus_cfg, &qhs_venus_throttle_cfg, &qhs_vsense_ctrl_cfg, &srvc_cnoc);
+ DEFINE_QBCM(bcm_mm1, "MM1", false, &qxm_camnoc_hf0_uncomp, &qxm_camnoc_hf1_uncomp, &qxm_camnoc_sf_uncomp, &qhm_mnoc_cfg, &qxm_mdp0, &qxm_rot, &qxm_venus0, &qxm_venus_arm9);
+ DEFINE_QBCM(bcm_sh2, "SH2", false, &acm_sys_tcu);
+@@ -372,22 +369,6 @@ static struct qcom_icc_desc sc7180_gem_noc = {
+ .num_bcms = ARRAY_SIZE(gem_noc_bcms),
+ };
+
+-static struct qcom_icc_bcm *ipa_virt_bcms[] = {
+- &bcm_ip0,
+-};
+-
+-static struct qcom_icc_node *ipa_virt_nodes[] = {
+- [MASTER_IPA_CORE] = &ipa_core_master,
+- [SLAVE_IPA_CORE] = &ipa_core_slave,
+-};
+-
+-static struct qcom_icc_desc sc7180_ipa_virt = {
+- .nodes = ipa_virt_nodes,
+- .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
+- .bcms = ipa_virt_bcms,
+- .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
+-};
+-
+ static struct qcom_icc_bcm *mc_virt_bcms[] = {
+ &bcm_acv,
+ &bcm_mc0,
+@@ -519,8 +500,6 @@ static const struct of_device_id qnoc_of_match[] = {
+ .data = &sc7180_dc_noc},
+ { .compatible = "qcom,sc7180-gem-noc",
+ .data = &sc7180_gem_noc},
+- { .compatible = "qcom,sc7180-ipa-virt",
+- .data = &sc7180_ipa_virt},
+ { .compatible = "qcom,sc7180-mc-virt",
+ .data = &sc7180_mc_virt},
+ { .compatible = "qcom,sc7180-mmss-noc",
+diff --git a/drivers/interconnect/qcom/sdx55.c b/drivers/interconnect/qcom/sdx55.c
+index 03d604f84cc57..e3ac25a997b71 100644
+--- a/drivers/interconnect/qcom/sdx55.c
++++ b/drivers/interconnect/qcom/sdx55.c
+@@ -18,7 +18,6 @@
+ #include "icc-rpmh.h"
+ #include "sdx55.h"
+
+-DEFINE_QNODE(ipa_core_master, SDX55_MASTER_IPA_CORE, 1, 8, SDX55_SLAVE_IPA_CORE);
+ DEFINE_QNODE(llcc_mc, SDX55_MASTER_LLCC, 4, 4, SDX55_SLAVE_EBI_CH0);
+ DEFINE_QNODE(acm_tcu, SDX55_MASTER_TCU_0, 1, 8, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC);
+ DEFINE_QNODE(qnm_snoc_gc, SDX55_MASTER_SNOC_GC_MEM_NOC, 1, 8, SDX55_SLAVE_LLCC);
+@@ -40,7 +39,6 @@ DEFINE_QNODE(xm_pcie, SDX55_MASTER_PCIE, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+ DEFINE_QNODE(xm_qdss_etr, SDX55_MASTER_QDSS_ETR, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+ DEFINE_QNODE(xm_sdc1, SDX55_MASTER_SDCC_1, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO);
+ DEFINE_QNODE(xm_usb3, SDX55_MASTER_USB3, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+-DEFINE_QNODE(ipa_core_slave, SDX55_SLAVE_IPA_CORE, 1, 8);
+ DEFINE_QNODE(ebi, SDX55_SLAVE_EBI_CH0, 1, 4);
+ DEFINE_QNODE(qns_llcc, SDX55_SLAVE_LLCC, 1, 16, SDX55_SLAVE_EBI_CH0);
+ DEFINE_QNODE(qns_memnoc_snoc, SDX55_SLAVE_MEM_NOC_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_SNOC);
+@@ -82,7 +80,6 @@ DEFINE_QNODE(xs_sys_tcu_cfg, SDX55_SLAVE_TCU, 1, 8);
+ DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
+ DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
+ DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
+-DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
+ DEFINE_QBCM(bcm_pn0, "PN0", false, &qhm_snoc_cfg);
+ DEFINE_QBCM(bcm_sh3, "SH3", false, &xm_apps_rdwr);
+ DEFINE_QBCM(bcm_sh4, "SH4", false, &qns_memnoc_snoc, &qns_sys_pcie);
+@@ -219,22 +216,6 @@ static const struct qcom_icc_desc sdx55_system_noc = {
+ .num_bcms = ARRAY_SIZE(system_noc_bcms),
+ };
+
+-static struct qcom_icc_bcm *ipa_virt_bcms[] = {
+- &bcm_ip0,
+-};
+-
+-static struct qcom_icc_node *ipa_virt_nodes[] = {
+- [MASTER_IPA_CORE] = &ipa_core_master,
+- [SLAVE_IPA_CORE] = &ipa_core_slave,
+-};
+-
+-static const struct qcom_icc_desc sdx55_ipa_virt = {
+- .nodes = ipa_virt_nodes,
+- .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
+- .bcms = ipa_virt_bcms,
+- .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
+-};
+-
+ static const struct of_device_id qnoc_of_match[] = {
+ { .compatible = "qcom,sdx55-mc-virt",
+ .data = &sdx55_mc_virt},
+@@ -242,8 +223,6 @@ static const struct of_device_id qnoc_of_match[] = {
+ .data = &sdx55_mem_noc},
+ { .compatible = "qcom,sdx55-system-noc",
+ .data = &sdx55_system_noc},
+- { .compatible = "qcom,sdx55-ipa-virt",
+- .data = &sdx55_ipa_virt},
+ { }
+ };
+ MODULE_DEVICE_TABLE(of, qnoc_of_match);
+diff --git a/drivers/interconnect/qcom/sm8150.c b/drivers/interconnect/qcom/sm8150.c
+index 2a85f53802b5f..745e3c36a61af 100644
+--- a/drivers/interconnect/qcom/sm8150.c
++++ b/drivers/interconnect/qcom/sm8150.c
+@@ -535,7 +535,6 @@ static struct platform_driver qnoc_driver = {
+ .driver = {
+ .name = "qnoc-sm8150",
+ .of_match_table = qnoc_of_match,
+- .sync_state = icc_sync_state,
+ },
+ };
+ module_platform_driver(qnoc_driver);
+diff --git a/drivers/interconnect/qcom/sm8250.c b/drivers/interconnect/qcom/sm8250.c
+index 8dfb5dea562a3..aa707582ea016 100644
+--- a/drivers/interconnect/qcom/sm8250.c
++++ b/drivers/interconnect/qcom/sm8250.c
+@@ -551,7 +551,6 @@ static struct platform_driver qnoc_driver = {
+ .driver = {
+ .name = "qnoc-sm8250",
+ .of_match_table = qnoc_of_match,
+- .sync_state = icc_sync_state,
+ },
+ };
+ module_platform_driver(qnoc_driver);
+diff --git a/drivers/interconnect/qcom/sm8350.c b/drivers/interconnect/qcom/sm8350.c
+index 3e26a2175b28f..c79f93a1ac73c 100644
+--- a/drivers/interconnect/qcom/sm8350.c
++++ b/drivers/interconnect/qcom/sm8350.c
+@@ -531,7 +531,6 @@ static struct platform_driver qnoc_driver = {
+ .driver = {
+ .name = "qnoc-sm8350",
+ .of_match_table = qnoc_of_match,
+- .sync_state = icc_sync_state,
+ },
+ };
+ module_platform_driver(qnoc_driver);
+diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c
+index 6559d8cf80687..e706658994821 100644
+--- a/drivers/interconnect/samsung/exynos.c
++++ b/drivers/interconnect/samsung/exynos.c
+@@ -149,6 +149,9 @@ static int exynos_generic_icc_probe(struct platform_device *pdev)
+ &priv->bus_clk_ratio))
+ priv->bus_clk_ratio = EXYNOS_ICC_DEFAULT_BUS_CLK_RATIO;
+
++ icc_node->data = priv;
++ icc_node_add(icc_node, provider);
++
+ /*
+ * Register a PM QoS request for the parent (devfreq) device.
+ */
+@@ -157,9 +160,6 @@ static int exynos_generic_icc_probe(struct platform_device *pdev)
+ if (ret < 0)
+ goto err_node_del;
+
+- icc_node->data = priv;
+- icc_node_add(icc_node, provider);
+-
+ icc_parent_node = exynos_icc_get_parent(bus_dev->of_node);
+ if (IS_ERR(icc_parent_node)) {
+ ret = PTR_ERR(icc_parent_node);
+diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
+index 416815a525d67..bb95edf74415b 100644
+--- a/drivers/iommu/amd/amd_iommu.h
++++ b/drivers/iommu/amd/amd_iommu.h
+@@ -14,6 +14,7 @@
+ extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
+ extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+ extern void amd_iommu_apply_erratum_63(u16 devid);
++extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
+ extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
+ extern int amd_iommu_init_devices(void);
+ extern void amd_iommu_uninit_devices(void);
+diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
+index 8dbe61e2b3c15..390f10060c82b 100644
+--- a/drivers/iommu/amd/amd_iommu_types.h
++++ b/drivers/iommu/amd/amd_iommu_types.h
+@@ -110,6 +110,7 @@
+ #define PASID_MASK 0x0000ffff
+
+ /* MMIO status bits */
++#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK (1 << 0)
+ #define MMIO_STATUS_EVT_INT_MASK (1 << 1)
+ #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
+ #define MMIO_STATUS_PPR_INT_MASK (1 << 6)
+@@ -643,8 +644,6 @@ struct amd_iommu {
+ /* DebugFS Info */
+ struct dentry *debugfs;
+ #endif
+- /* IRQ notifier for IntCapXT interrupt */
+- struct irq_affinity_notify intcapxt_notify;
+ };
+
+ static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
+@@ -919,8 +918,8 @@ struct amd_ir_data {
+ */
+ struct irq_cfg *cfg;
+ int ga_vector;
+- int ga_root_ptr;
+- int ga_tag;
++ u64 ga_root_ptr;
++ u32 ga_tag;
+ };
+
+ struct amd_irte_ops {
+diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
+index 2a822b229bd05..50ea582be5910 100644
+--- a/drivers/iommu/amd/init.c
++++ b/drivers/iommu/amd/init.c
+@@ -21,6 +21,7 @@
+ #include <linux/export.h>
+ #include <linux/kmemleak.h>
+ #include <linux/mem_encrypt.h>
++#include <linux/iopoll.h>
+ #include <asm/pci-direct.h>
+ #include <asm/iommu.h>
+ #include <asm/apic.h>
+@@ -83,7 +84,11 @@
+ #define ACPI_DEVFLAG_LINT1 0x80
+ #define ACPI_DEVFLAG_ATSDIS 0x10000000
+
+-#define LOOP_TIMEOUT 100000
++#define LOOP_TIMEOUT 2000000
++
++#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
++ | ((dev & 0x1f) << 3) | (fn & 0x7))
++
+ /*
+ * ACPI table definitions
+ *
+@@ -654,6 +659,16 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu)
+ return iommu->cmd_buf ? 0 : -ENOMEM;
+ }
+
++/*
++ * This function restarts event logging in case the IOMMU experienced
++ * an event log buffer overflow.
++ */
++void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
++{
++ iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
++ iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
++}
++
+ /*
+ * This function resets the command buffer if the IOMMU stopped fetching
+ * commands from it.
+@@ -804,16 +819,27 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
+ {
+ #ifdef CONFIG_IRQ_REMAP
+ u32 status, i;
++ u64 entry;
+
+ if (!iommu->ga_log)
+ return -EINVAL;
+
+- status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+-
+ /* Check if already running */
+- if (status & (MMIO_STATUS_GALOG_RUN_MASK))
++ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
++ if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK)))
+ return 0;
+
++ entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
++ memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
++ &entry, sizeof(entry));
++ entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
++ (BIT_ULL(52)-1)) & ~7ULL;
++ memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
++ &entry, sizeof(entry));
++ writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
++ writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
++
++
+ iommu_feature_enable(iommu, CONTROL_GAINT_EN);
+ iommu_feature_enable(iommu, CONTROL_GALOG_EN);
+
+@@ -821,9 +847,10 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ if (status & (MMIO_STATUS_GALOG_RUN_MASK))
+ break;
++ udelay(10);
+ }
+
+- if (i >= LOOP_TIMEOUT)
++ if (WARN_ON(i >= LOOP_TIMEOUT))
+ return -EINVAL;
+ #endif /* CONFIG_IRQ_REMAP */
+ return 0;
+@@ -832,8 +859,6 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
+ static int iommu_init_ga_log(struct amd_iommu *iommu)
+ {
+ #ifdef CONFIG_IRQ_REMAP
+- u64 entry;
+-
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
+ return 0;
+
+@@ -847,16 +872,6 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
+ if (!iommu->ga_log_tail)
+ goto err_out;
+
+- entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
+- memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
+- &entry, sizeof(entry));
+- entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
+- (BIT_ULL(52)-1)) & ~7ULL;
+- memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
+- &entry, sizeof(entry));
+- writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
+- writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
+-
+ return 0;
+ err_out:
+ free_ga_log(iommu);
+@@ -2013,48 +2028,18 @@ union intcapxt {
+ };
+ } __attribute__ ((packed));
+
+-/*
+- * There isn't really any need to mask/unmask at the irqchip level because
+- * the 64-bit INTCAPXT registers can be updated atomically without tearing
+- * when the affinity is being updated.
+- */
+-static void intcapxt_unmask_irq(struct irq_data *data)
+-{
+-}
+-
+-static void intcapxt_mask_irq(struct irq_data *data)
+-{
+-}
+
+ static struct irq_chip intcapxt_controller;
+
+ static int intcapxt_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irqd, bool reserve)
+ {
+- struct amd_iommu *iommu = irqd->chip_data;
+- struct irq_cfg *cfg = irqd_cfg(irqd);
+- union intcapxt xt;
+-
+- xt.capxt = 0ULL;
+- xt.dest_mode_logical = apic->dest_mode_logical;
+- xt.vector = cfg->vector;
+- xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
+- xt.destid_24_31 = cfg->dest_apicid >> 24;
+-
+- /**
+- * Current IOMMU implemtation uses the same IRQ for all
+- * 3 IOMMU interrupts.
+- */
+- writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+- writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+- writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+ return 0;
+ }
+
+ static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
+ struct irq_data *irqd)
+ {
+- intcapxt_mask_irq(irqd);
+ }
+
+
+@@ -2088,6 +2073,38 @@ static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
+ }
+
++
++static void intcapxt_unmask_irq(struct irq_data *irqd)
++{
++ struct amd_iommu *iommu = irqd->chip_data;
++ struct irq_cfg *cfg = irqd_cfg(irqd);
++ union intcapxt xt;
++
++ xt.capxt = 0ULL;
++ xt.dest_mode_logical = apic->dest_mode_logical;
++ xt.vector = cfg->vector;
++ xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
++ xt.destid_24_31 = cfg->dest_apicid >> 24;
++
++ /**
++ * Current IOMMU implementation uses the same IRQ for all
++ * 3 IOMMU interrupts.
++ */
++ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
++ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
++ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
++}
++
++static void intcapxt_mask_irq(struct irq_data *irqd)
++{
++ struct amd_iommu *iommu = irqd->chip_data;
++
++ writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
++ writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
++ writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
++}
++
++
+ static int intcapxt_set_affinity(struct irq_data *irqd,
+ const struct cpumask *mask, bool force)
+ {
+@@ -2097,8 +2114,12 @@ static int intcapxt_set_affinity(struct irq_data *irqd,
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
++ return 0;
++}
+
+- return intcapxt_irqdomain_activate(irqd->domain, irqd, false);
++static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
++{
++ return on ? -EOPNOTSUPP : 0;
+ }
+
+ static struct irq_chip intcapxt_controller = {
+@@ -2108,7 +2129,8 @@ static struct irq_chip intcapxt_controller = {
+ .irq_ack = irq_chip_ack_parent,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_set_affinity = intcapxt_set_affinity,
+- .flags = IRQCHIP_SKIP_SET_WAKE,
++ .irq_set_wake = intcapxt_set_wake,
++ .flags = IRQCHIP_MASK_ON_SUSPEND,
+ };
+
+ static const struct irq_domain_ops intcapxt_domain_ops = {
+@@ -2170,7 +2192,6 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu)
+ return ret;
+ }
+
+- iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
+ return 0;
+ }
+
+@@ -2193,6 +2214,10 @@ static int iommu_init_irq(struct amd_iommu *iommu)
+
+ iommu->int_enabled = true;
+ enable_faults:
++
++ if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
++ iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
++
+ iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
+ if (iommu->ppr_log != NULL)
+@@ -3125,24 +3150,32 @@ static int __init parse_amd_iommu_options(char *str)
+
+ static int __init parse_ivrs_ioapic(char *str)
+ {
+- unsigned int bus, dev, fn;
+- int ret, id, i;
+- u16 devid;
++ u32 seg = 0, bus, dev, fn;
++ int id, i;
++ u32 devid;
+
+- ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
++ if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
++ sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
++ goto found;
+
+- if (ret != 4) {
+- pr_err("Invalid command line: ivrs_ioapic%s\n", str);
+- return 1;
++ if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
++ sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
++ pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
++ str, id, seg, bus, dev, fn);
++ goto found;
+ }
+
++ pr_err("Invalid command line: ivrs_ioapic%s\n", str);
++ return 1;
++
++found:
+ if (early_ioapic_map_size == EARLY_MAP_SIZE) {
+ pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
+ str);
+ return 1;
+ }
+
+- devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
++ devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
+
+ cmdline_maps = true;
+ i = early_ioapic_map_size++;
+@@ -3155,24 +3188,32 @@ static int __init parse_ivrs_ioapic(char *str)
+
+ static int __init parse_ivrs_hpet(char *str)
+ {
+- unsigned int bus, dev, fn;
+- int ret, id, i;
+- u16 devid;
++ u32 seg = 0, bus, dev, fn;
++ int id, i;
++ u32 devid;
+
+- ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
++ if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
++ sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
++ goto found;
+
+- if (ret != 4) {
+- pr_err("Invalid command line: ivrs_hpet%s\n", str);
+- return 1;
++ if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
++ sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
++ pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
++ str, id, seg, bus, dev, fn);
++ goto found;
+ }
+
++ pr_err("Invalid command line: ivrs_hpet%s\n", str);
++ return 1;
++
++found:
+ if (early_hpet_map_size == EARLY_MAP_SIZE) {
+ pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
+ str);
+ return 1;
+ }
+
+- devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
++ devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
+
+ cmdline_maps = true;
+ i = early_hpet_map_size++;
+@@ -3183,19 +3224,53 @@ static int __init parse_ivrs_hpet(char *str)
+ return 1;
+ }
+
++#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
++
+ static int __init parse_ivrs_acpihid(char *str)
+ {
+- u32 bus, dev, fn;
+- char *hid, *uid, *p;
+- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+- int ret, i;
++ u32 seg = 0, bus, dev, fn;
++ char *hid, *uid, *p, *addr;
++ char acpiid[ACPIID_LEN] = {0};
++ int i;
+
+- ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
+- if (ret != 4) {
+- pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
+- return 1;
++ addr = strchr(str, '@');
++ if (!addr) {
++ addr = strchr(str, '=');
++ if (!addr)
++ goto not_found;
++
++ ++addr;
++
++ if (strlen(addr) > ACPIID_LEN)
++ goto not_found;
++
++ if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
++ sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
++ pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
++ str, acpiid, seg, bus, dev, fn);
++ goto found;
++ }
++ goto not_found;
+ }
+
++ /* We have the '@', make it the terminator to get just the acpiid */
++ *addr++ = 0;
++
++ if (strlen(str) > ACPIID_LEN + 1)
++ goto not_found;
++
++ if (sscanf(str, "=%s", acpiid) != 1)
++ goto not_found;
++
++ if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
++ sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
++ goto found;
++
++not_found:
++ pr_err("Invalid command line: ivrs_acpihid%s\n", str);
++ return 1;
++
++found:
+ p = acpiid;
+ hid = strsep(&p, ":");
+ uid = p;
+@@ -3205,11 +3280,17 @@ static int __init parse_ivrs_acpihid(char *str)
+ return 1;
+ }
+
++ /*
++ * Ignore leading zeroes after ':', so e.g., AMDI0095:00
++ * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
++ */
++ while (*uid == '0' && *(uid + 1))
++ uid++;
++
+ i = early_acpihid_map_size++;
+ memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
+ memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
+- early_acpihid_map[i].devid =
+- ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
++ early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
+ early_acpihid_map[i].cmd_line = true;
+
+ return 1;
+diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
+index 182c93a43efd8..1eddf557636d7 100644
+--- a/drivers/iommu/amd/io_pgtable.c
++++ b/drivers/iommu/amd/io_pgtable.c
+@@ -519,12 +519,6 @@ static void v1_free_pgtable(struct io_pgtable *iop)
+
+ dom = container_of(pgtable, struct protection_domain, iop);
+
+- /* Update data structure */
+- amd_iommu_domain_clr_pt_root(dom);
+-
+- /* Make changes visible to IOMMUs */
+- amd_iommu_domain_update(dom);
+-
+ /* Page-table is not visible to IOMMU anymore, so free it */
+ BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+ pgtable->mode > PAGE_MODE_6_LEVEL);
+@@ -532,6 +526,12 @@ static void v1_free_pgtable(struct io_pgtable *iop)
+ root = (unsigned long)pgtable->root;
+ freelist = free_sub_pt(root, pgtable->mode, freelist);
+
++ /* Update data structure */
++ amd_iommu_domain_clr_pt_root(dom);
++
++ /* Make changes visible to IOMMUs */
++ amd_iommu_domain_update(dom);
++
+ free_page_list(freelist);
+ }
+
+diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
+index 1722bb161841f..d9251af7f3cf6 100644
+--- a/drivers/iommu/amd/iommu.c
++++ b/drivers/iommu/amd/iommu.c
+@@ -742,7 +742,8 @@ amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { }
+ #endif /* !CONFIG_IRQ_REMAP */
+
+ #define AMD_IOMMU_INT_MASK \
+- (MMIO_STATUS_EVT_INT_MASK | \
++ (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \
++ MMIO_STATUS_EVT_INT_MASK | \
+ MMIO_STATUS_PPR_INT_MASK | \
+ MMIO_STATUS_GALOG_INT_MASK)
+
+@@ -752,7 +753,7 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
+ u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+ while (status & AMD_IOMMU_INT_MASK) {
+- /* Enable EVT and PPR and GA interrupts again */
++ /* Enable interrupt sources again */
+ writel(AMD_IOMMU_INT_MASK,
+ iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+@@ -773,6 +774,11 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
+ }
+ #endif
+
++ if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) {
++ pr_info_ratelimited("IOMMU event log overflow\n");
++ amd_iommu_restart_event_logging(iommu);
++ }
++
+ /*
+ * Hardware bug: ERBT1312
+ * When re-enabling interrupt (by writing 1
+@@ -846,7 +852,8 @@ static void build_completion_wait(struct iommu_cmd *cmd,
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
+ cmd->data[1] = upper_32_bits(paddr);
+- cmd->data[2] = data;
++ cmd->data[2] = lower_32_bits(data);
++ cmd->data[3] = upper_32_bits(data);
+ CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
+ }
+
+@@ -1579,27 +1586,29 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
+ /* Only allow access to user-accessible pages */
+ ret = pci_enable_pasid(pdev, 0);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ /* First reset the PRI state of the device */
+ ret = pci_reset_pri(pdev);
+ if (ret)
+- goto out_err;
++ goto out_err_pasid;
+
+ /* Enable PRI */
+ /* FIXME: Hardcode number of outstanding requests for now */
+ ret = pci_enable_pri(pdev, 32);
+ if (ret)
+- goto out_err;
++ goto out_err_pasid;
+
+ ret = pci_enable_ats(pdev, PAGE_SHIFT);
+ if (ret)
+- goto out_err;
++ goto out_err_pri;
+
+ return 0;
+
+-out_err:
++out_err_pri:
+ pci_disable_pri(pdev);
++
++out_err_pasid:
+ pci_disable_pasid(pdev);
+
+ return ret;
+@@ -1810,17 +1819,10 @@ void amd_iommu_domain_update(struct protection_domain *domain)
+ amd_iommu_domain_flush_complete(domain);
+ }
+
+-static void __init amd_iommu_init_dma_ops(void)
+-{
+- swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
+-}
+-
+ int __init amd_iommu_init_api(void)
+ {
+ int err;
+
+- amd_iommu_init_dma_ops();
+-
+ err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
+ if (err)
+ return err;
+@@ -2220,7 +2222,7 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+- domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
++ domain_flush_pages(dom, gather->start, gather->end - gather->start + 1, 1);
+ amd_iommu_domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ }
+@@ -3314,8 +3316,7 @@ int amd_iommu_activate_guest_mode(void *data)
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+ u64 valid;
+
+- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+- !entry || entry->lo.fields_vapic.guest_mode)
++ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry)
+ return 0;
+
+ valid = entry->lo.fields_vapic.valid;
+diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
+index a9e568276c99f..29a3a62b7a3ac 100644
+--- a/drivers/iommu/amd/iommu_v2.c
++++ b/drivers/iommu/amd/iommu_v2.c
+@@ -264,8 +264,8 @@ static void put_pasid_state(struct pasid_state *pasid_state)
+
+ static void put_pasid_state_wait(struct pasid_state *pasid_state)
+ {
+- refcount_dec(&pasid_state->count);
+- wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
++ if (!refcount_dec_and_test(&pasid_state->count))
++ wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
+ free_pasid_state(pasid_state);
+ }
+
+@@ -588,6 +588,7 @@ out_drop_state:
+ put_device_state(dev_state);
+
+ out:
++ pci_dev_put(pdev);
+ return ret;
+ }
+
+@@ -928,10 +929,8 @@ static int __init amd_iommu_v2_init(void)
+ {
+ int ret;
+
+- pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n");
+-
+ if (!amd_iommu_v2_supported()) {
+- pr_info("AMD IOMMUv2 functionality not available on this system\n");
++ pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
+ /*
+ * Load anyway to provide the symbols to other modules
+ * which may use AMD IOMMUv2 optionally.
+@@ -946,6 +945,8 @@ static int __init amd_iommu_v2_init(void)
+
+ amd_iommu_register_ppr_notifier(&ppr_nb);
+
++ pr_info("AMD IOMMUv2 loaded and initialized\n");
++
+ return 0;
+
+ out:
+diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
+index fdfa39ec2a4d4..baba4571c8152 100644
+--- a/drivers/iommu/apple-dart.c
++++ b/drivers/iommu/apple-dart.c
+@@ -70,6 +70,8 @@
+ #define DART_ERROR_ADDR_HI 0x54
+ #define DART_ERROR_ADDR_LO 0x50
+
++#define DART_STREAMS_ENABLE 0xfc
++
+ #define DART_TCR(sid) (0x100 + 4 * (sid))
+ #define DART_TCR_TRANSLATE_ENABLE BIT(7)
+ #define DART_TCR_BYPASS0_ENABLE BIT(8)
+@@ -301,6 +303,9 @@ static int apple_dart_hw_reset(struct apple_dart *dart)
+ apple_dart_hw_disable_dma(&stream_map);
+ apple_dart_hw_clear_all_ttbrs(&stream_map);
+
++ /* enable all streams globally since TCR is used to control isolation */
++ writel(DART_STREAM_ALL, dart->regs + DART_STREAMS_ENABLE);
++
+ /* clear any pending errors before the interrupt is unmasked */
+ writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR);
+
+@@ -752,6 +757,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
+ .of_xlate = apple_dart_of_xlate,
+ .def_domain_type = apple_dart_def_domain_type,
+ .pgsize_bitmap = -1UL, /* Restricted during dart probe */
++ .owner = THIS_MODULE,
+ };
+
+ static irqreturn_t apple_dart_irq(int irq, void *dev)
+@@ -827,16 +833,15 @@ static int apple_dart_probe(struct platform_device *pdev)
+ dart->dev = dev;
+ spin_lock_init(&dart->lock);
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ dart->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
++ if (IS_ERR(dart->regs))
++ return PTR_ERR(dart->regs);
++
+ if (resource_size(res) < 0x4000) {
+ dev_err(dev, "MMIO region too small (%pr)\n", res);
+ return -EINVAL;
+ }
+
+- dart->regs = devm_ioremap_resource(dev, res);
+- if (IS_ERR(dart->regs))
+- return PTR_ERR(dart->regs);
+-
+ dart->irq = platform_get_irq(pdev, 0);
+ if (dart->irq < 0)
+ return -ENODEV;
+diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+index ee66d1f4cb81e..e2e80eb2840ca 100644
+--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
++++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+@@ -6,6 +6,7 @@
+ #include <linux/mm.h>
+ #include <linux/mmu_context.h>
+ #include <linux/mmu_notifier.h>
++#include <linux/sched/mm.h>
+ #include <linux/slab.h>
+
+ #include "arm-smmu-v3.h"
+@@ -96,9 +97,14 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
+ struct arm_smmu_ctx_desc *cd;
+ struct arm_smmu_ctx_desc *ret = NULL;
+
++ /* Don't free the mm until we release the ASID */
++ mmgrab(mm);
++
+ asid = arm64_mm_context_get(mm);
+- if (!asid)
+- return ERR_PTR(-ESRCH);
++ if (!asid) {
++ err = -ESRCH;
++ goto out_drop_mm;
++ }
+
+ cd = kzalloc(sizeof(*cd), GFP_KERNEL);
+ if (!cd) {
+@@ -165,6 +171,8 @@ out_free_cd:
+ kfree(cd);
+ out_put_context:
+ arm64_mm_context_put(mm);
++out_drop_mm:
++ mmdrop(mm);
+ return err < 0 ? ERR_PTR(err) : ret;
+ }
+
+@@ -173,6 +181,7 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
+ if (arm_smmu_free_asid(cd)) {
+ /* Unpin ASID */
+ arm64_mm_context_put(cd->mm);
++ mmdrop(cd->mm);
+ kfree(cd);
+ }
+ }
+@@ -183,7 +192,14 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
+ {
+ struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
+ struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
+- size_t size = end - start + 1;
++ size_t size;
++
++ /*
++ * The mm_types defines vm_end as the first byte after the end address,
++ * different from IOMMU subsystem using the last address of an address
++ * range. So do a simple translation here by calculating size correctly.
++ */
++ size = end - start;
+
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
+ arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
+diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+index a388e318f86e0..340ef116d574a 100644
+--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
++++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+@@ -154,6 +154,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q)
+ q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
+ }
+
++static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
++{
++ struct arm_smmu_ll_queue *llq = &q->llq;
++
++ if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
++ return;
++
++ llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
++ Q_IDX(llq, llq->cons);
++ queue_sync_cons_out(q);
++}
++
+ static int queue_sync_prod_in(struct arm_smmu_queue *q)
+ {
+ u32 prod;
+@@ -885,6 +897,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *cmd)
+ {
++ if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
++ (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
++ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
++ cmds->num = 0;
++ }
++
+ if (cmds->num == CMDQ_BATCH_ENTRIES) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
+ cmds->num = 0;
+@@ -1552,6 +1570,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
+ dev_info(smmu->dev, "\t0x%016llx\n",
+ (unsigned long long)evt[i]);
+
++ cond_resched();
+ }
+
+ /*
+@@ -1563,8 +1582,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
+ } while (!queue_empty(llq));
+
+ /* Sync our overflow flag, as we believe we're up to speed */
+- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+- Q_IDX(llq, llq->cons);
++ queue_sync_cons_ovf(q);
+ return IRQ_HANDLED;
+ }
+
+@@ -1622,9 +1640,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
+ } while (!queue_empty(llq));
+
+ /* Sync our overflow flag, as we believe we're up to speed */
+- llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+- Q_IDX(llq, llq->cons);
+- queue_sync_cons_out(q);
++ queue_sync_cons_ovf(q);
+ return IRQ_HANDLED;
+ }
+
+@@ -2831,6 +2847,26 @@ static int arm_smmu_dev_disable_feature(struct device *dev,
+ }
+ }
+
++/*
++ * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
++ * PCIe link and save the data to memory by DMA. The hardware is restricted to
++ * use identity mapping only.
++ */
++#define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
++ (pdev)->device == 0xa12e)
++
++static int arm_smmu_def_domain_type(struct device *dev)
++{
++ if (dev_is_pci(dev)) {
++ struct pci_dev *pdev = to_pci_dev(dev);
++
++ if (IS_HISI_PTT_DEVICE(pdev))
++ return IOMMU_DOMAIN_IDENTITY;
++ }
++
++ return 0;
++}
++
+ static struct iommu_ops arm_smmu_ops = {
+ .capable = arm_smmu_capable,
+ .domain_alloc = arm_smmu_domain_alloc,
+@@ -2856,6 +2892,7 @@ static struct iommu_ops arm_smmu_ops = {
+ .sva_unbind = arm_smmu_sva_unbind,
+ .sva_get_pasid = arm_smmu_sva_get_pasid,
+ .page_response = arm_smmu_page_response,
++ .def_domain_type = arm_smmu_def_domain_type,
+ .pgsize_bitmap = -1UL, /* Restricted during device attach */
+ .owner = THIS_MODULE,
+ };
+@@ -3428,6 +3465,44 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
+ return 0;
+ }
+
++#define IIDR_IMPLEMENTER_ARM 0x43b
++#define IIDR_PRODUCTID_ARM_MMU_600 0x483
++#define IIDR_PRODUCTID_ARM_MMU_700 0x487
++
++static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
++{
++ u32 reg;
++ unsigned int implementer, productid, variant, revision;
++
++ reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
++ implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
++ productid = FIELD_GET(IIDR_PRODUCTID, reg);
++ variant = FIELD_GET(IIDR_VARIANT, reg);
++ revision = FIELD_GET(IIDR_REVISION, reg);
++
++ switch (implementer) {
++ case IIDR_IMPLEMENTER_ARM:
++ switch (productid) {
++ case IIDR_PRODUCTID_ARM_MMU_600:
++ /* Arm erratum 1076982 */
++ if (variant == 0 && revision <= 2)
++ smmu->features &= ~ARM_SMMU_FEAT_SEV;
++ /* Arm erratum 1209401 */
++ if (variant < 2)
++ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
++ break;
++ case IIDR_PRODUCTID_ARM_MMU_700:
++ /* Arm erratum 2812531 */
++ smmu->features &= ~ARM_SMMU_FEAT_BTM;
++ smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
++ /* Arm errata 2268618, 2812531 */
++ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
++ break;
++ }
++ break;
++ }
++}
++
+ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
+ {
+ u32 reg;
+@@ -3633,6 +3708,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
+
+ smmu->ias = max(smmu->ias, smmu->oas);
+
++ if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
++ (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
++ smmu->features |= ARM_SMMU_FEAT_NESTING;
++
++ arm_smmu_device_iidr_probe(smmu);
++
+ if (arm_smmu_sva_supported(smmu))
+ smmu->features |= ARM_SMMU_FEAT_SVA;
+
+@@ -3785,6 +3866,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
+
+ /* Base address */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -EINVAL;
+ if (resource_size(res) < arm_smmu_resource_size(smmu)) {
+ dev_err(dev, "MMIO region too small (%pr)\n", res);
+ return -EINVAL;
+diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+index 4cb136f07914e..c594a9b469995 100644
+--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
++++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+@@ -69,6 +69,12 @@
+ #define IDR5_VAX GENMASK(11, 10)
+ #define IDR5_VAX_52_BIT 1
+
++#define ARM_SMMU_IIDR 0x18
++#define IIDR_PRODUCTID GENMASK(31, 20)
++#define IIDR_VARIANT GENMASK(19, 16)
++#define IIDR_REVISION GENMASK(15, 12)
++#define IIDR_IMPLEMENTER GENMASK(11, 0)
++
+ #define ARM_SMMU_CR0 0x20
+ #define CR0_ATSCHK (1 << 4)
+ #define CR0_CMDQEN (1 << 3)
+@@ -640,11 +646,13 @@ struct arm_smmu_device {
+ #define ARM_SMMU_FEAT_BTM (1 << 16)
+ #define ARM_SMMU_FEAT_SVA (1 << 17)
+ #define ARM_SMMU_FEAT_E2H (1 << 18)
++#define ARM_SMMU_FEAT_NESTING (1 << 19)
+ u32 features;
+
+ #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
+ #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
+ #define ARM_SMMU_OPT_MSIPOLL (1 << 2)
++#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
+ u32 options;
+
+ struct arm_smmu_cmdq cmdq;
+diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
+index 01e9b50b10a18..87bf522b9d2ee 100644
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
+@@ -258,6 +258,34 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
+ dev_name(dev), err);
+ }
+
++static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain,
++ struct io_pgtable_cfg *pgtbl_cfg,
++ struct device *dev)
++{
++ struct arm_smmu_device *smmu = smmu_domain->smmu;
++ const struct device_node *np = smmu->dev->of_node;
++
++ /*
++ * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
++ * entries to not be invalidated correctly. The problem is that the walk
++ * cache index generated for IOVA is not same across translation and
++ * invalidation requests. This is leading to page faults when PMD entry
++ * is released during unmap and populated with new PTE table during
++ * subsequent map request. Disabling large page mappings avoids the
++ * release of PMD entry and avoid translations seeing stale PMD entry in
++ * walk cache.
++ * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
++ * Tegra234.
++ */
++ if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
++ of_device_is_compatible(np, "nvidia,tegra194-smmu")) {
++ smmu->pgsize_bitmap = PAGE_SIZE;
++ pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap;
++ }
++
++ return 0;
++}
++
+ static const struct arm_smmu_impl nvidia_smmu_impl = {
+ .read_reg = nvidia_smmu_read_reg,
+ .write_reg = nvidia_smmu_write_reg,
+@@ -268,10 +296,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
+ .global_fault = nvidia_smmu_global_fault,
+ .context_fault = nvidia_smmu_context_fault,
+ .probe_finalize = nvidia_smmu_probe_finalize,
++ .init_context = nvidia_smmu_init_context,
+ };
+
+ static const struct arm_smmu_impl nvidia_smmu_single_impl = {
+ .probe_finalize = nvidia_smmu_probe_finalize,
++ .init_context = nvidia_smmu_init_context,
+ };
+
+ struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
+diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+index 55690af1b25d0..50453d38400c5 100644
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+@@ -51,7 +51,7 @@ static void qcom_adreno_smmu_get_fault_info(const void *cookie,
+ info->fsynr1 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_FSYNR1);
+ info->far = arm_smmu_cb_readq(smmu, cfg->cbndx, ARM_SMMU_CB_FAR);
+ info->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
+- info->ttbr0 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_TTBR0);
++ info->ttbr0 = arm_smmu_cb_readq(smmu, cfg->cbndx, ARM_SMMU_CB_TTBR0);
+ info->contextidr = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_CONTEXTIDR);
+ }
+
+@@ -247,12 +247,26 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+
+ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
+ {
+- unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
++ unsigned int last_s2cr;
+ u32 reg;
+ u32 smr;
+ int i;
+
++ /*
++ * Some platforms support more than the Arm SMMU architected maximum of
++ * 128 stream matching groups. For unknown reasons, the additional
++ * groups don't exhibit the same behavior as the architected registers,
++ * so limit the groups to 128 until the behavior is fixed for the other
++ * groups.
++ */
++ if (smmu->num_mapping_groups > 128) {
++ dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n");
++ smmu->num_mapping_groups = 128;
++ }
++
++ last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
++
+ /*
+ * With some firmware versions writes to S2CR of type FAULT are
+ * ignored, and writing BYPASS will end up written as FAULT in the
+diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
+index 4bc75c4ce402d..324e8f32962ac 100644
+--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
++++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
+@@ -2090,11 +2090,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
+ if (err)
+ return err;
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- ioaddr = res->start;
+- smmu->base = devm_ioremap_resource(dev, res);
++ smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(smmu->base))
+ return PTR_ERR(smmu->base);
++ ioaddr = res->start;
+ /*
+ * The resource size should effectively match the value of SMMU_TOP;
+ * stash that temporarily until we know PAGESIZE to validate it with.
+diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+index b91874cb6cf33..9438203f08de7 100644
+--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
++++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+@@ -273,6 +273,13 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
+ ctx->secure_init = true;
+ }
+
++ /* Disable context bank before programming */
++ iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
++
++ /* Clear context bank fault address fault status registers */
++ iommu_writel(ctx, ARM_SMMU_CB_FAR, 0);
++ iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
++
+ /* TTBRs */
+ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0,
+ pgtbl_cfg.arm_lpae_s1_cfg.ttbr |
+@@ -748,9 +755,12 @@ static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu)
+ {
+ struct device_node *child;
+
+- for_each_child_of_node(qcom_iommu->dev->of_node, child)
+- if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec"))
++ for_each_child_of_node(qcom_iommu->dev->of_node, child) {
++ if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) {
++ of_node_put(child);
+ return true;
++ }
++ }
+
+ return false;
+ }
+diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
+index 896bea04c347e..48c6f7ff4aef1 100644
+--- a/drivers/iommu/dma-iommu.c
++++ b/drivers/iommu/dma-iommu.c
+@@ -317,6 +317,11 @@ static bool dev_is_untrusted(struct device *dev)
+ return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+ }
+
++static bool dev_use_swiotlb(struct device *dev)
++{
++ return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
++}
++
+ /* sysfs updates are serialised by the mutex of the group owning @domain */
+ int iommu_dma_init_fq(struct iommu_domain *domain)
+ {
+@@ -510,23 +515,6 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
+ iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
+ }
+
+-static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
+- size_t size, enum dma_data_direction dir,
+- unsigned long attrs)
+-{
+- struct iommu_domain *domain = iommu_get_dma_domain(dev);
+- phys_addr_t phys;
+-
+- phys = iommu_iova_to_phys(domain, dma_addr);
+- if (WARN_ON(!phys))
+- return;
+-
+- __iommu_dma_unmap(dev, dma_addr, size);
+-
+- if (unlikely(is_swiotlb_buffer(dev, phys)))
+- swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+-}
+-
+ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
+ size_t size, int prot, u64 dma_mask)
+ {
+@@ -553,52 +541,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
+ return iova + iova_off;
+ }
+
+-static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
+- size_t org_size, dma_addr_t dma_mask, bool coherent,
+- enum dma_data_direction dir, unsigned long attrs)
+-{
+- int prot = dma_info_to_prot(dir, coherent, attrs);
+- struct iommu_domain *domain = iommu_get_dma_domain(dev);
+- struct iommu_dma_cookie *cookie = domain->iova_cookie;
+- struct iova_domain *iovad = &cookie->iovad;
+- size_t aligned_size = org_size;
+- void *padding_start;
+- size_t padding_size;
+- dma_addr_t iova;
+-
+- /*
+- * If both the physical buffer start address and size are
+- * page aligned, we don't need to use a bounce page.
+- */
+- if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+- iova_offset(iovad, phys | org_size)) {
+- aligned_size = iova_align(iovad, org_size);
+- phys = swiotlb_tbl_map_single(dev, phys, org_size,
+- aligned_size, dir, attrs);
+-
+- if (phys == DMA_MAPPING_ERROR)
+- return DMA_MAPPING_ERROR;
+-
+- /* Cleanup the padding area. */
+- padding_start = phys_to_virt(phys);
+- padding_size = aligned_size;
+-
+- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+- (dir == DMA_TO_DEVICE ||
+- dir == DMA_BIDIRECTIONAL)) {
+- padding_start += org_size;
+- padding_size -= org_size;
+- }
+-
+- memset(padding_start, 0, padding_size);
+- }
+-
+- iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+- if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
+- swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
+- return iova;
+-}
+-
+ static void __iommu_dma_free_pages(struct page **pages, int count)
+ {
+ while (count--)
+@@ -677,6 +619,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
+ unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
+ struct page **pages;
+ dma_addr_t iova;
++ ssize_t ret;
+
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
+ iommu_deferred_attach(dev, domain))
+@@ -714,8 +657,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
+ arch_dma_prep_coherent(sg_page(sg), sg->length);
+ }
+
+- if (iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot)
+- < size)
++ ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
++ if (ret < 0 || ret < size)
+ goto out_free_sg;
+
+ sgt->sgl->dma_address = iova;
+@@ -794,7 +737,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
+ {
+ phys_addr_t phys;
+
+- if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
++ if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+ return;
+
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+@@ -810,7 +753,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
+ {
+ phys_addr_t phys;
+
+- if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
++ if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+ return;
+
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+@@ -828,17 +771,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg;
+ int i;
+
+- if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+- return;
+-
+- for_each_sg(sgl, sg, nelems, i) {
+- if (!dev_is_dma_coherent(dev))
++ if (dev_use_swiotlb(dev))
++ for_each_sg(sgl, sg, nelems, i)
++ iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
++ sg->length, dir);
++ else if (!dev_is_dma_coherent(dev))
++ for_each_sg(sgl, sg, nelems, i)
+ arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
+-
+- if (is_swiotlb_buffer(dev, sg_phys(sg)))
+- swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
+- sg->length, dir);
+- }
+ }
+
+ static void iommu_dma_sync_sg_for_device(struct device *dev,
+@@ -848,17 +787,14 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg;
+ int i;
+
+- if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+- return;
+-
+- for_each_sg(sgl, sg, nelems, i) {
+- if (is_swiotlb_buffer(dev, sg_phys(sg)))
+- swiotlb_sync_single_for_device(dev, sg_phys(sg),
+- sg->length, dir);
+-
+- if (!dev_is_dma_coherent(dev))
++ if (dev_use_swiotlb(dev))
++ for_each_sg(sgl, sg, nelems, i)
++ iommu_dma_sync_single_for_device(dev,
++ sg_dma_address(sg),
++ sg->length, dir);
++ else if (!dev_is_dma_coherent(dev))
++ for_each_sg(sgl, sg, nelems, i)
+ arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
+- }
+ }
+
+ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+@@ -867,22 +803,66 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+ {
+ phys_addr_t phys = page_to_phys(page) + offset;
+ bool coherent = dev_is_dma_coherent(dev);
+- dma_addr_t dma_handle;
++ int prot = dma_info_to_prot(dir, coherent, attrs);
++ struct iommu_domain *domain = iommu_get_dma_domain(dev);
++ struct iommu_dma_cookie *cookie = domain->iova_cookie;
++ struct iova_domain *iovad = &cookie->iovad;
++ dma_addr_t iova, dma_mask = dma_get_mask(dev);
++
++ /*
++ * If both the physical buffer start address and size are
++ * page aligned, we don't need to use a bounce page.
++ */
++ if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
++ void *padding_start;
++ size_t padding_size, aligned_size;
++
++ aligned_size = iova_align(iovad, size);
++ phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
++ iova_mask(iovad), dir, attrs);
++
++ if (phys == DMA_MAPPING_ERROR)
++ return DMA_MAPPING_ERROR;
+
+- dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
+- coherent, dir, attrs);
+- if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+- dma_handle != DMA_MAPPING_ERROR)
++ /* Cleanup the padding area. */
++ padding_start = phys_to_virt(phys);
++ padding_size = aligned_size;
++
++ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
++ (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
++ padding_start += size;
++ padding_size -= size;
++ }
++
++ memset(padding_start, 0, padding_size);
++ }
++
++ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(phys, size, dir);
+- return dma_handle;
++
++ iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
++ if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
++ swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
++ return iova;
+ }
+
+ static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+ {
+- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+- iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
+- __iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
++ struct iommu_domain *domain = iommu_get_dma_domain(dev);
++ phys_addr_t phys;
++
++ phys = iommu_iova_to_phys(domain, dma_handle);
++ if (WARN_ON(!phys))
++ return;
++
++ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
++ arch_sync_dma_for_cpu(phys, size, dir);
++
++ __iommu_dma_unmap(dev, dma_handle, size);
++
++ if (unlikely(is_swiotlb_buffer(dev, phys)))
++ swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+ }
+
+ /*
+@@ -967,7 +947,7 @@ static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *s
+ int i;
+
+ for_each_sg(sg, s, nents, i)
+- __iommu_dma_unmap_swiotlb(dev, sg_dma_address(s),
++ iommu_dma_unmap_page(dev, sg_dma_address(s),
+ sg_dma_len(s), dir, attrs);
+ }
+
+@@ -978,9 +958,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+- sg_dma_address(s) = __iommu_dma_map_swiotlb(dev, sg_phys(s),
+- s->length, dma_get_mask(dev),
+- dev_is_dma_coherent(dev), dir, attrs);
++ sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
++ s->offset, s->length, dir, attrs);
+ if (sg_dma_address(s) == DMA_MAPPING_ERROR)
+ goto out_unmap;
+ sg_dma_len(s) = s->length;
+@@ -1016,15 +995,16 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
+ ret = iommu_deferred_attach(dev, domain);
+- goto out;
++ if (ret)
++ goto out;
+ }
+
++ if (dev_use_swiotlb(dev))
++ return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
++
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
+- if (dev_is_untrusted(dev))
+- return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
+-
+ /*
+ * Work out how much IOVA space we need, and align the segments to
+ * IOVA granules for the IOMMU driver to handle. With some clever
+@@ -1075,7 +1055,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ * implementation - it knows better than we do.
+ */
+ ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
+- if (ret < iova_len)
++ if (ret < 0 || ret < iova_len)
+ goto out_free_iova;
+
+ return __finalise_sg(dev, sg, nents, iova);
+@@ -1097,14 +1077,14 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ struct scatterlist *tmp;
+ int i;
+
+- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+- iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+-
+- if (dev_is_untrusted(dev)) {
++ if (dev_use_swiotlb(dev)) {
+ iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
+ return;
+ }
+
++ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
++ iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
++
+ /*
+ * The scatterlist segments are mapped into a single
+ * contiguous IOVA allocation, so this is incredibly easy.
+diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
+index 939ffa7689867..f96acc3525e8f 100644
+--- a/drivers/iommu/exynos-iommu.c
++++ b/drivers/iommu/exynos-iommu.c
+@@ -630,7 +630,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
+
+ ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev);
+ if (ret)
+- return ret;
++ goto err_iommu_register;
+
+ platform_set_drvdata(pdev, data);
+
+@@ -657,6 +657,10 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
+ pm_runtime_enable(dev);
+
+ return 0;
++
++err_iommu_register:
++ iommu_device_sysfs_remove(&data->iommu);
++ return ret;
+ }
+
+ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
+diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
+index fc38b1fba7cff..bb5d253188a18 100644
+--- a/drivers/iommu/fsl_pamu.c
++++ b/drivers/iommu/fsl_pamu.c
+@@ -865,7 +865,7 @@ static int fsl_pamu_probe(struct platform_device *pdev)
+ ret = create_csd(ppaact_phys, mem_size, csd_port_id);
+ if (ret) {
+ dev_err(dev, "could not create coherence subdomain\n");
+- return ret;
++ goto error;
+ }
+ }
+
+diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
+index b12e421a2f1ab..71596fc62822c 100644
+--- a/drivers/iommu/intel/cap_audit.c
++++ b/drivers/iommu/intel/cap_audit.c
+@@ -144,6 +144,7 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
+ {
+ struct dmar_drhd_unit *d;
+ struct intel_iommu *i;
++ int rc = 0;
+
+ rcu_read_lock();
+ if (list_empty(&dmar_drhd_units))
+@@ -163,9 +164,17 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
+ check_irq_capabilities(iommu, i);
+ }
+
++ /*
++ * If the system is sane to support scalable mode, either SL or FL
++ * should be sane.
++ */
++ if (intel_cap_smts_sanity() &&
++ !intel_cap_flts_sanity() && !intel_cap_slts_sanity())
++ rc = -EOPNOTSUPP;
++
+ out:
+ rcu_read_unlock();
+- return 0;
++ return rc;
+ }
+
+ int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu)
+@@ -203,3 +212,8 @@ bool intel_cap_flts_sanity(void)
+ {
+ return ecap_flts(intel_iommu_ecap_sanity);
+ }
++
++bool intel_cap_slts_sanity(void)
++{
++ return ecap_slts(intel_iommu_ecap_sanity);
++}
+diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h
+index 74cfccae0e817..d07b75938961f 100644
+--- a/drivers/iommu/intel/cap_audit.h
++++ b/drivers/iommu/intel/cap_audit.h
+@@ -111,6 +111,7 @@ bool intel_cap_smts_sanity(void);
+ bool intel_cap_pasid_sanity(void);
+ bool intel_cap_nest_sanity(void);
+ bool intel_cap_flts_sanity(void);
++bool intel_cap_slts_sanity(void);
+
+ static inline bool scalable_mode_support(void)
+ {
+diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
+index b7708b93f3fa1..7c20083d4a798 100644
+--- a/drivers/iommu/intel/dmar.c
++++ b/drivers/iommu/intel/dmar.c
+@@ -385,7 +385,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
+
+ static struct notifier_block dmar_pci_bus_nb = {
+ .notifier_call = dmar_pci_bus_notifier,
+- .priority = INT_MIN,
++ .priority = 1,
+ };
+
+ static struct dmar_drhd_unit *
+@@ -497,7 +497,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
+ if (drhd->reg_base_addr == rhsa->base_address) {
+ int node = pxm_to_node(rhsa->proximity_domain);
+
+- if (!node_online(node))
++ if (node != NUMA_NO_NODE && !node_online(node))
+ node = NUMA_NO_NODE;
+ drhd->iommu->node = node;
+ return 0;
+@@ -822,6 +822,7 @@ int __init dmar_dev_scope_init(void)
+ info = dmar_alloc_pci_notify_info(dev,
+ BUS_NOTIFY_ADD_DEVICE);
+ if (!info) {
++ pci_dev_put(dev);
+ return dmar_dev_scope_status;
+ } else {
+ dmar_pci_bus_add_dev(info);
+@@ -1079,7 +1080,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
+ }
+
+ err = -EINVAL;
+- if (cap_sagaw(iommu->cap) == 0) {
++ if (!cap_sagaw(iommu->cap) &&
++ (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
+ pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
+ iommu->name);
+ drhd->ignored = 1;
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index d75f59ae28e6e..29538471c528e 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -191,38 +191,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
+ return re->hi & VTD_PAGE_MASK;
+ }
+
+-static inline void context_clear_pasid_enable(struct context_entry *context)
+-{
+- context->lo &= ~(1ULL << 11);
+-}
+-
+-static inline bool context_pasid_enabled(struct context_entry *context)
+-{
+- return !!(context->lo & (1ULL << 11));
+-}
+-
+-static inline void context_set_copied(struct context_entry *context)
+-{
+- context->hi |= (1ull << 3);
+-}
+-
+-static inline bool context_copied(struct context_entry *context)
+-{
+- return !!(context->hi & (1ULL << 3));
+-}
+-
+-static inline bool __context_present(struct context_entry *context)
+-{
+- return (context->lo & 1);
+-}
+-
+-bool context_present(struct context_entry *context)
+-{
+- return context_pasid_enabled(context) ?
+- __context_present(context) :
+- __context_present(context) && !context_copied(context);
+-}
+-
+ static inline void context_set_present(struct context_entry *context)
+ {
+ context->lo |= 1;
+@@ -270,6 +238,26 @@ static inline void context_clear_entry(struct context_entry *context)
+ context->hi = 0;
+ }
+
++static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++ if (!iommu->copied_tables)
++ return false;
++
++ return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
++static inline void
++set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++ set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
++static inline void
++clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++ clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
+ /*
+ * This domain is a statically identity mapping domain.
+ * 1. This domain creats a static 1:1 mapping to all usable memory.
+@@ -412,6 +400,7 @@ static int __init intel_iommu_setup(char *str)
+ {
+ if (!str)
+ return -EINVAL;
++
+ while (*str) {
+ if (!strncmp(str, "on", 2)) {
+ dmar_disabled = 0;
+@@ -441,13 +430,16 @@ static int __init intel_iommu_setup(char *str)
+ } else if (!strncmp(str, "tboot_noforce", 13)) {
+ pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+ intel_iommu_tboot_noforce = 1;
++ } else {
++ pr_notice("Unknown option - '%s'\n", str);
+ }
+
+ str += strcspn(str, ",");
+ while (*str == ',')
+ str++;
+ }
+- return 0;
++
++ return 1;
+ }
+ __setup("intel_iommu=", intel_iommu_setup);
+
+@@ -522,7 +514,7 @@ static inline void free_devinfo_mem(void *vaddr)
+
+ static inline int domain_type_is_si(struct dmar_domain *domain)
+ {
+- return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
++ return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
+ }
+
+ static inline bool domain_use_first_level(struct dmar_domain *domain)
+@@ -538,14 +530,36 @@ static inline int domain_pfn_supported(struct dmar_domain *domain,
+ return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
+ }
+
++/*
++ * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
++ * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
++ * the returned SAGAW.
++ */
++static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
++{
++ unsigned long fl_sagaw, sl_sagaw;
++
++ fl_sagaw = BIT(2) | (cap_5lp_support(iommu->cap) ? BIT(3) : 0);
++ sl_sagaw = cap_sagaw(iommu->cap);
++
++ /* Second level only. */
++ if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
++ return sl_sagaw;
++
++ /* First level only. */
++ if (!ecap_slts(iommu->ecap))
++ return fl_sagaw;
++
++ return fl_sagaw & sl_sagaw;
++}
++
+ static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
+ {
+ unsigned long sagaw;
+ int agaw;
+
+- sagaw = cap_sagaw(iommu->cap);
+- for (agaw = width_to_agaw(max_gaw);
+- agaw >= 0; agaw--) {
++ sagaw = __iommu_calculate_sagaw(iommu);
++ for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
+ if (test_bit(agaw, &sagaw))
+ break;
+ }
+@@ -766,6 +780,13 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ struct context_entry *context;
+ u64 *entry;
+
++ /*
++ * Except that the caller requested to allocate a new entry,
++ * returning a copied context entry makes no sense.
++ */
++ if (!alloc && context_copied(iommu, bus, devfn))
++ return NULL;
++
+ entry = &root->lo;
+ if (sm_supported(iommu)) {
+ if (devfn >= 0x80) {
+@@ -1027,11 +1048,9 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
+
+ domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
+ pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+- if (domain_use_first_level(domain)) {
+- pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
+- if (iommu_is_dma_domain(&domain->domain))
+- pteval |= DMA_FL_PTE_ACCESS;
+- }
++ if (domain_use_first_level(domain))
++ pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
++
+ if (cmpxchg64(&pte->val, 0ULL, pteval))
+ /* Someone else set it while we were thinking; use theirs. */
+ free_pgtable_page(tmp_page);
+@@ -1222,13 +1241,11 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
+ pte = &pte[pfn_level_offset(pfn, level)];
+
+ do {
+- unsigned long level_pfn;
++ unsigned long level_pfn = pfn & level_mask(level);
+
+ if (!dma_pte_present(pte))
+ goto next;
+
+- level_pfn = pfn & level_mask(level);
+-
+ /* If range covers entire pagetable, free it */
+ if (start_pfn <= level_pfn &&
+ last_pfn >= level_pfn + level_size(level) - 1) {
+@@ -1249,7 +1266,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
+ freelist);
+ }
+ next:
+- pfn += level_size(level);
++ pfn = level_pfn + level_size(level);
+ } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
+
+ if (first_pte)
+@@ -1635,7 +1652,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
+ unsigned long pfn, unsigned int pages,
+ int ih, int map)
+ {
+- unsigned int mask = ilog2(__roundup_pow_of_two(pages));
++ unsigned int aligned_pages = __roundup_pow_of_two(pages);
++ unsigned int mask = ilog2(aligned_pages);
+ uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
+ u16 did = domain->iommu_did[iommu->seq_id];
+
+@@ -1647,10 +1665,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
+ if (domain_use_first_level(domain)) {
+ domain_flush_piotlb(iommu, domain, addr, pages, ih);
+ } else {
++ unsigned long bitmask = aligned_pages - 1;
++
++ /*
++ * PSI masks the low order bits of the base address. If the
++ * address isn't aligned to the mask, then compute a mask value
++ * needed to ensure the target range is flushed.
++ */
++ if (unlikely(bitmask & pfn)) {
++ unsigned long end_pfn = pfn + pages - 1, shared_bits;
++
++ /*
++ * Since end_pfn <= pfn + bitmask, the only way bits
++ * higher than bitmask can differ in pfn and end_pfn is
++ * by carrying. This means after masking out bitmask,
++ * high bits starting with the first set bit in
++ * shared_bits are all equal in both pfn and end_pfn.
++ */
++ shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
++ mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
++ }
++
+ /*
+ * Fallback to domain selective flush if no PSI support or
+- * the size is too big. PSI requires page size to be 2 ^ x,
+- * and the base address is naturally aligned to the size.
++ * the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+@@ -1854,6 +1892,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
+ iommu->domain_ids = NULL;
+ }
+
++ if (iommu->copied_tables) {
++ bitmap_free(iommu->copied_tables);
++ iommu->copied_tables = NULL;
++ }
++
+ g_iommus[iommu->seq_id] = NULL;
+
+ /* free context mapping */
+@@ -1874,12 +1917,21 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
+ * Check and return whether first level is used by default for
+ * DMA translation.
+ */
+-static bool first_level_by_default(void)
++static bool first_level_by_default(unsigned int type)
+ {
+- return scalable_mode_support() && intel_cap_flts_sanity();
++ /* Only SL is available in legacy mode */
++ if (!scalable_mode_support())
++ return false;
++
++ /* Only level (either FL or SL) is available, just use it */
++ if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
++ return intel_cap_flts_sanity();
++
++ /* Both levels are available, decide it based on domain type */
++ return type != IOMMU_DOMAIN_UNMANAGED;
+ }
+
+-static struct dmar_domain *alloc_domain(int flags)
++static struct dmar_domain *alloc_domain(unsigned int type)
+ {
+ struct dmar_domain *domain;
+
+@@ -1889,8 +1941,7 @@ static struct dmar_domain *alloc_domain(int flags)
+
+ memset(domain, 0, sizeof(*domain));
+ domain->nid = NUMA_NO_NODE;
+- domain->flags = flags;
+- if (first_level_by_default())
++ if (first_level_by_default(type))
+ domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
+ domain->has_iotlb_device = false;
+ INIT_LIST_HEAD(&domain->devices);
+@@ -2062,7 +2113,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+ goto out_unlock;
+
+ ret = 0;
+- if (context_present(context))
++ if (context_present(context) && !context_copied(iommu, bus, devfn))
+ goto out_unlock;
+
+ /*
+@@ -2074,7 +2125,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+ * in-flight DMA will exist, and we don't need to worry anymore
+ * hereafter.
+ */
+- if (context_copied(context)) {
++ if (context_copied(iommu, bus, devfn)) {
+ u16 did_old = context_domain_id(context);
+
+ if (did_old < cap_ndoms(iommu->cap)) {
+@@ -2085,6 +2136,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+ iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
+ DMA_TLB_DSI_FLUSH);
+ }
++
++ clear_context_copied(iommu, bus, devfn);
+ }
+
+ context_clear_entry(context);
+@@ -2649,7 +2702,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ /* PASID table is mandatory for a PCI device in scalable mode. */
+- if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
++ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
+ ret = intel_pasid_alloc_table(dev);
+ if (ret) {
+ dev_err(dev, "PASID table allocation failed\n");
+@@ -2708,12 +2761,13 @@ static int __init si_domain_init(int hw)
+ struct device *dev;
+ int i, nid, ret;
+
+- si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
++ si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
+ if (!si_domain)
+ return -EFAULT;
+
+ if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ domain_exit(si_domain);
++ si_domain = NULL;
+ return -EFAULT;
+ }
+
+@@ -2979,32 +3033,14 @@ static int copy_context_table(struct intel_iommu *iommu,
+ /* Now copy the context entry */
+ memcpy(&ce, old_ce + idx, sizeof(ce));
+
+- if (!__context_present(&ce))
++ if (!context_present(&ce))
+ continue;
+
+ did = context_domain_id(&ce);
+ if (did >= 0 && did < cap_ndoms(iommu->cap))
+ set_bit(did, iommu->domain_ids);
+
+- /*
+- * We need a marker for copied context entries. This
+- * marker needs to work for the old format as well as
+- * for extended context entries.
+- *
+- * Bit 67 of the context entry is used. In the old
+- * format this bit is available to software, in the
+- * extended format it is the PGE bit, but PGE is ignored
+- * by HW if PASIDs are disabled (and thus still
+- * available).
+- *
+- * So disable PASIDs first and then mark the entry
+- * copied. This means that we don't copy PASID
+- * translations from the old kernel, but this is fine as
+- * faults there are not fatal.
+- */
+- context_clear_pasid_enable(&ce);
+- context_set_copied(&ce);
+-
++ set_context_copied(iommu, bus, devfn);
+ new_ce[idx] = ce;
+ }
+
+@@ -3031,8 +3067,8 @@ static int copy_translation_tables(struct intel_iommu *iommu)
+ bool new_ext, ext;
+
+ rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
+- ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
+- new_ext = !!ecap_ecs(iommu->ecap);
++ ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
++ new_ext = !!sm_supported(iommu);
+
+ /*
+ * The RTT bit can only be changed when translation is disabled,
+@@ -3043,6 +3079,10 @@ static int copy_translation_tables(struct intel_iommu *iommu)
+ if (new_ext != ext)
+ return -EINVAL;
+
++ iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
++ if (!iommu->copied_tables)
++ return -ENOMEM;
++
+ old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
+ if (!old_rt_phys)
+ return -EINVAL;
+@@ -3364,6 +3404,10 @@ free_iommu:
+ disable_dmar_iommu(iommu);
+ free_dmar_iommu(iommu);
+ }
++ if (si_domain) {
++ domain_exit(si_domain);
++ si_domain = NULL;
++ }
+
+ kfree(g_iommus);
+
+@@ -4205,8 +4249,10 @@ static inline bool has_external_pci(void)
+ struct pci_dev *pdev = NULL;
+
+ for_each_pci_dev(pdev)
+- if (pdev->external_facing)
++ if (pdev->external_facing) {
++ pci_dev_put(pdev);
+ return true;
++ }
+
+ return false;
+ }
+@@ -4377,7 +4423,8 @@ int __init intel_iommu_init(void)
+ * is likely to be much lower than the overhead of synchronizing
+ * the virtual and physical IOMMU page-tables.
+ */
+- if (cap_caching_mode(iommu->cap)) {
++ if (cap_caching_mode(iommu->cap) &&
++ !first_level_by_default(IOMMU_DOMAIN_DMA)) {
+ pr_info_once("IOMMU batching disallowed due to virtualization\n");
+ iommu_set_dma_strict();
+ }
+@@ -4517,7 +4564,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
+ case IOMMU_DOMAIN_DMA:
+ case IOMMU_DOMAIN_DMA_FQ:
+ case IOMMU_DOMAIN_UNMANAGED:
+- dmar_domain = alloc_domain(0);
++ dmar_domain = alloc_domain(type);
+ if (!dmar_domain) {
+ pr_err("Can't allocate dmar_domain\n");
+ return NULL;
+@@ -5093,7 +5140,12 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
+ if (dmar_domain->max_addr == iova + size)
+ dmar_domain->max_addr = iova;
+
+- iommu_iotlb_gather_add_page(domain, gather, iova, size);
++ /*
++ * We do not use page-selective IOTLB invalidation in flush queue,
++ * so there is no need to track page and sync iotlb.
++ */
++ if (!iommu_iotlb_gather_queued(gather))
++ iommu_iotlb_gather_add_page(domain, gather, iova, size);
+
+ return size;
+ }
+@@ -5367,8 +5419,12 @@ static int intel_iommu_enable_sva(struct device *dev)
+ return -EINVAL;
+
+ ret = iopf_queue_add_device(iommu->iopf_queue, dev);
+- if (!ret)
+- ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
++ if (ret)
++ return ret;
++
++ ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
++ if (ret)
++ iopf_queue_remove_device(iommu->iopf_queue, dev);
+
+ return ret;
+ }
+@@ -5380,8 +5436,12 @@ static int intel_iommu_disable_sva(struct device *dev)
+ int ret;
+
+ ret = iommu_unregister_device_fault_handler(dev);
+- if (!ret)
+- ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
++ if (ret)
++ return ret;
++
++ ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
++ if (ret)
++ iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+
+ return ret;
+ }
+@@ -5705,7 +5765,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
+ ver = (dev->device >> 8) & 0xff;
+ if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
+ ver != 0x4e && ver != 0x8a && ver != 0x98 &&
+- ver != 0x9a)
++ ver != 0x9a && ver != 0xa7)
+ return;
+
+ if (risky_device(dev))
+diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
+index f912fe45bea2c..a673195978843 100644
+--- a/drivers/iommu/intel/irq_remapping.c
++++ b/drivers/iommu/intel/irq_remapping.c
+@@ -569,9 +569,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
+ fn, &intel_ir_domain_ops,
+ iommu);
+ if (!iommu->ir_domain) {
+- irq_domain_free_fwnode(fn);
+ pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
+- goto out_free_bitmap;
++ goto out_free_fwnode;
+ }
+ iommu->ir_msi_domain =
+ arch_create_remap_msi_irq_domain(iommu->ir_domain,
+@@ -595,7 +594,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
+
+ if (dmar_enable_qi(iommu)) {
+ pr_err("Failed to enable queued invalidation\n");
+- goto out_free_bitmap;
++ goto out_free_ir_domain;
+ }
+ }
+
+@@ -619,6 +618,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
+
+ return 0;
+
++out_free_ir_domain:
++ if (iommu->ir_msi_domain)
++ irq_domain_remove(iommu->ir_msi_domain);
++ iommu->ir_msi_domain = NULL;
++ irq_domain_remove(iommu->ir_domain);
++ iommu->ir_domain = NULL;
++out_free_fwnode:
++ irq_domain_free_fwnode(fn);
+ out_free_bitmap:
+ bitmap_free(bitmap);
+ out_free_pages:
+diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
+index 07c390aed1fe9..dc9d665d7365c 100644
+--- a/drivers/iommu/intel/pasid.c
++++ b/drivers/iommu/intel/pasid.c
+@@ -186,6 +186,9 @@ int intel_pasid_alloc_table(struct device *dev)
+ attach_out:
+ device_attach_pasid_table(info, pasid_table);
+
++ if (!ecap_coherent(info->iommu->ecap))
++ clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
++
+ return 0;
+ }
+
+@@ -276,6 +279,10 @@ retry:
+ free_pgtable_page(entries);
+ goto retry;
+ }
++ if (!ecap_coherent(info->iommu->ecap)) {
++ clflush_cache_range(entries, VTD_PAGE_SIZE);
++ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
++ }
+ }
+
+ return &entries[index];
+@@ -425,6 +432,16 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
+ pasid_set_bits(&pe->val[1], 1 << 23, value << 23);
+ }
+
++/*
++ * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID
++ * entry. It is required when XD bit of the first level page table
++ * entry is about to be set.
++ */
++static inline void pasid_set_nxe(struct pasid_entry *pe)
++{
++ pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5);
++}
++
+ /*
+ * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode
+ * PASID entry.
+@@ -631,6 +648,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
+ pasid_set_domain_id(pte, did);
+ pasid_set_address_width(pte, iommu->agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
++ pasid_set_nxe(pte);
+
+ /* Setup Present and PASID Granular Transfer Type: */
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
+@@ -717,7 +735,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+ * Since it is a second level only translation setup, we should
+ * set SRE bit as well (addresses are expected to be GPAs).
+ */
+- if (pasid != PASID_RID2PASID)
++ if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap))
+ pasid_set_sre(pte);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+@@ -756,7 +774,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
+ * We should set SRE bit as well since the addresses are expected
+ * to be GPAs.
+ */
+- pasid_set_sre(pte);
++ if (ecap_srs(iommu->ecap))
++ pasid_set_sre(pte);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
+index 0c228787704f3..3a9468b1d2c3c 100644
+--- a/drivers/iommu/intel/svm.c
++++ b/drivers/iommu/intel/svm.c
+@@ -978,6 +978,10 @@ bad_req:
+ goto bad_req;
+ }
+
++ /* Drop Stop Marker message. No need for a response. */
++ if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
++ goto prq_advance;
++
+ if (!svm || svm->pasid != req->pasid) {
+ /*
+ * It can't go away, because the driver is not permitted
+diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
+index bfb6acb651e5f..ba3115fd0f86a 100644
+--- a/drivers/iommu/io-pgtable-arm-v7s.c
++++ b/drivers/iommu/io-pgtable-arm-v7s.c
+@@ -182,14 +182,8 @@ static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
+ (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
+ }
+
+-static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
+- struct io_pgtable_cfg *cfg)
++static arm_v7s_iopte to_mtk_iopte(phys_addr_t paddr, arm_v7s_iopte pte)
+ {
+- arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+-
+- if (!arm_v7s_is_mtk_enabled(cfg))
+- return pte;
+-
+ if (paddr & BIT_ULL(32))
+ pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
+ if (paddr & BIT_ULL(33))
+@@ -199,6 +193,17 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
+ return pte;
+ }
+
++static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
++ struct io_pgtable_cfg *cfg)
++{
++ arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
++
++ if (arm_v7s_is_mtk_enabled(cfg))
++ return to_mtk_iopte(paddr, pte);
++
++ return pte;
++}
++
+ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
+ struct io_pgtable_cfg *cfg)
+ {
+@@ -240,19 +245,31 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
+ dma_addr_t dma;
+ size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
+ void *table = NULL;
++ gfp_t gfp_l1;
++
++ /*
++ * ARM_MTK_TTBR_EXT extend the translation table base support larger
++ * memory address.
++ */
++ gfp_l1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ?
++ GFP_KERNEL : ARM_V7S_TABLE_GFP_DMA;
+
+ if (lvl == 1)
+- table = (void *)__get_free_pages(
+- __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
++ table = (void *)__get_free_pages(gfp_l1 | __GFP_ZERO, get_order(size));
+ else if (lvl == 2)
+ table = kmem_cache_zalloc(data->l2_tables, gfp);
++
++ if (!table)
++ return NULL;
++
+ phys = virt_to_phys(table);
+- if (phys != (arm_v7s_iopte)phys) {
++ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ?
++ phys >= (1ULL << cfg->oas) : phys != (arm_v7s_iopte)phys) {
+ /* Doesn't fit in PTE */
+ dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
+ goto out_free;
+ }
+- if (table && !cfg->coherent_walk) {
++ if (!cfg->coherent_walk) {
+ dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma))
+ goto out_free;
+@@ -453,9 +470,14 @@ static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
+ arm_v7s_iopte curr,
+ struct io_pgtable_cfg *cfg)
+ {
++ phys_addr_t phys = virt_to_phys(table);
+ arm_v7s_iopte old, new;
+
+- new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
++ new = phys | ARM_V7S_PTE_TYPE_TABLE;
++
++ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT)
++ new = to_mtk_iopte(phys, new);
++
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+ new |= ARM_V7S_ATTR_NS_TABLE;
+
+@@ -775,6 +797,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
+ void *cookie)
+ {
+ struct arm_v7s_io_pgtable *data;
++ slab_flags_t slab_flag;
++ phys_addr_t paddr;
+
+ if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
+ return NULL;
+@@ -784,7 +808,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
+
+ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
+ IO_PGTABLE_QUIRK_NO_PERMS |
+- IO_PGTABLE_QUIRK_ARM_MTK_EXT))
++ IO_PGTABLE_QUIRK_ARM_MTK_EXT |
++ IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT))
+ return NULL;
+
+ /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
+@@ -792,15 +817,27 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
+ !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
+ return NULL;
+
++ if ((cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT) &&
++ !arm_v7s_is_mtk_enabled(cfg))
++ return NULL;
++
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+
+ spin_lock_init(&data->split_lock);
++
++ /*
++ * ARM_MTK_TTBR_EXT extend the translation table base support larger
++ * memory address.
++ */
++ slab_flag = cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ?
++ 0 : ARM_V7S_TABLE_SLAB_FLAGS;
++
+ data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
+ ARM_V7S_TABLE_SIZE(2, cfg),
+ ARM_V7S_TABLE_SIZE(2, cfg),
+- ARM_V7S_TABLE_SLAB_FLAGS, NULL);
++ slab_flag, NULL);
+ if (!data->l2_tables)
+ goto out_free_data;
+
+@@ -846,12 +883,16 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
+ wmb();
+
+ /* TTBR */
+- cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S |
+- (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS |
+- ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
+- ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
+- (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
+- ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
++ paddr = virt_to_phys(data->pgd);
++ if (arm_v7s_is_mtk_enabled(cfg))
++ cfg->arm_v7s_cfg.ttbr = paddr | upper_32_bits(paddr);
++ else
++ cfg->arm_v7s_cfg.ttbr = paddr | ARM_V7S_TTBR_S |
++ (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS |
++ ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
++ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
++ (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
++ ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
+ return &data->iop;
+
+ out_free_data:
+diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
+index dd9e47189d0d9..94ff319ae8acc 100644
+--- a/drivers/iommu/io-pgtable-arm.c
++++ b/drivers/iommu/io-pgtable-arm.c
+@@ -315,11 +315,12 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
+ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
+ arm_lpae_iopte *ptep,
+ arm_lpae_iopte curr,
+- struct io_pgtable_cfg *cfg)
++ struct arm_lpae_io_pgtable *data)
+ {
+ arm_lpae_iopte old, new;
++ struct io_pgtable_cfg *cfg = &data->iop.cfg;
+
+- new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
++ new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+ new |= ARM_LPAE_PTE_NSTABLE;
+
+@@ -380,7 +381,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
+ if (!cptep)
+ return -ENOMEM;
+
+- pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
++ pte = arm_lpae_install_table(cptep, ptep, 0, data);
+ if (pte)
+ __arm_lpae_free_pages(cptep, tblsz, cfg);
+ } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
+@@ -592,7 +593,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
+ __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
+ }
+
+- pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
++ pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
+ if (pte != blk_pte) {
+ __arm_lpae_free_pages(tablep, tablesz, cfg);
+ /*
+diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
+index 3303d707bab4b..d06dbf035c7c7 100644
+--- a/drivers/iommu/iommu.c
++++ b/drivers/iommu/iommu.c
+@@ -206,9 +206,14 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
+
+ static void dev_iommu_free(struct device *dev)
+ {
+- iommu_fwspec_free(dev);
+- kfree(dev->iommu);
++ struct dev_iommu *param = dev->iommu;
++
+ dev->iommu = NULL;
++ if (param->fwspec) {
++ fwnode_handle_put(param->fwspec->iommu_fwnode);
++ kfree(param->fwspec);
++ }
++ kfree(param);
+ }
+
+ static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
+@@ -287,11 +292,11 @@ int iommu_probe_device(struct device *dev)
+ */
+ mutex_lock(&group->mutex);
+ iommu_alloc_default_domain(group, dev);
+- mutex_unlock(&group->mutex);
+
+ if (group->default_domain) {
+ ret = __iommu_attach_device(group->default_domain, dev);
+ if (ret) {
++ mutex_unlock(&group->mutex);
+ iommu_group_put(group);
+ goto err_release;
+ }
+@@ -299,6 +304,7 @@ int iommu_probe_device(struct device *dev)
+
+ iommu_create_device_direct_mappings(group, dev);
+
++ mutex_unlock(&group->mutex);
+ iommu_group_put(group);
+
+ if (ops->probe_finalize)
+@@ -650,12 +656,16 @@ struct iommu_group *iommu_group_alloc(void)
+
+ ret = iommu_group_create_file(group,
+ &iommu_group_attr_reserved_regions);
+- if (ret)
++ if (ret) {
++ kobject_put(group->devices_kobj);
+ return ERR_PTR(ret);
++ }
+
+ ret = iommu_group_create_file(group, &iommu_group_attr_type);
+- if (ret)
++ if (ret) {
++ kobject_put(group->devices_kobj);
+ return ERR_PTR(ret);
++ }
+
+ pr_debug("Allocated group %d\n", group->id);
+
+diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
+index 9e8bc802ac053..0835f32e040ad 100644
+--- a/drivers/iommu/iova.c
++++ b/drivers/iommu/iova.c
+@@ -83,8 +83,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
+ if (!has_iova_flush_queue(iovad))
+ return;
+
+- if (timer_pending(&iovad->fq_timer))
+- del_timer(&iovad->fq_timer);
++ del_timer_sync(&iovad->fq_timer);
+
+ fq_destroy_all_entries(iovad);
+
+@@ -155,10 +154,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
+ cached_iova = to_iova(iovad->cached32_node);
+ if (free == cached_iova ||
+ (free->pfn_hi < iovad->dma_32bit_pfn &&
+- free->pfn_lo >= cached_iova->pfn_lo)) {
++ free->pfn_lo >= cached_iova->pfn_lo))
+ iovad->cached32_node = rb_next(&free->node);
++
++ if (free->pfn_lo < iovad->dma_32bit_pfn)
+ iovad->max32_alloc_size = iovad->dma_32bit_pfn;
+- }
+
+ cached_iova = to_iova(iovad->cached_node);
+ if (free->pfn_lo >= cached_iova->pfn_lo)
+@@ -252,7 +252,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
+
+ curr = __get_cached_rbnode(iovad, limit_pfn);
+ curr_iova = to_iova(curr);
+- retry_pfn = curr_iova->pfn_hi + 1;
++ retry_pfn = curr_iova->pfn_hi;
+
+ retry:
+ do {
+@@ -266,7 +266,7 @@ retry:
+ if (high_pfn < size || new_pfn < low_pfn) {
+ if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
+ high_pfn = limit_pfn;
+- low_pfn = retry_pfn;
++ low_pfn = retry_pfn + 1;
+ curr = iova_find_limit(iovad, limit_pfn);
+ curr_iova = to_iova(curr);
+ goto retry;
+diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
+index d38ff29a76e8f..96708cd2757f7 100644
+--- a/drivers/iommu/ipmmu-vmsa.c
++++ b/drivers/iommu/ipmmu-vmsa.c
+@@ -982,7 +982,9 @@ static int ipmmu_probe(struct platform_device *pdev)
+ bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
+ mmu->features = of_device_get_match_data(&pdev->dev);
+ memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs);
+- dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
++ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
++ if (ret)
++ return ret;
+
+ /* Map I/O memory and request IRQ. */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
+index 3a38352b603f3..c9eaf27cbb743 100644
+--- a/drivers/iommu/msm_iommu.c
++++ b/drivers/iommu/msm_iommu.c
+@@ -615,16 +615,19 @@ static void insert_iommu_master(struct device *dev,
+ static int qcom_iommu_of_xlate(struct device *dev,
+ struct of_phandle_args *spec)
+ {
+- struct msm_iommu_dev *iommu;
++ struct msm_iommu_dev *iommu = NULL, *iter;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&msm_iommu_lock, flags);
+- list_for_each_entry(iommu, &qcom_iommu_devices, dev_node)
+- if (iommu->dev->of_node == spec->np)
++ list_for_each_entry(iter, &qcom_iommu_devices, dev_node) {
++ if (iter->dev->of_node == spec->np) {
++ iommu = iter;
+ break;
++ }
++ }
+
+- if (!iommu || iommu->dev->of_node != spec->np) {
++ if (!iommu) {
+ ret = -ENODEV;
+ goto fail;
+ }
+diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
+index d837adfd1da55..2ae46fa6b3dee 100644
+--- a/drivers/iommu/mtk_iommu.c
++++ b/drivers/iommu/mtk_iommu.c
+@@ -451,7 +451,7 @@ static void mtk_iommu_domain_free(struct iommu_domain *domain)
+ static int mtk_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+ {
+- struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
++ struct mtk_iommu_data *data = dev_iommu_priv_get(dev), *frstdata;
+ struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+ struct device *m4udev = data->dev;
+ int ret, domid;
+@@ -461,20 +461,24 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
+ return domid;
+
+ if (!dom->data) {
+- if (mtk_iommu_domain_finalise(dom, data, domid))
++ /* Data is in the frstdata in sharing pgtable case. */
++ frstdata = mtk_iommu_get_m4u_data();
++
++ if (mtk_iommu_domain_finalise(dom, frstdata, domid))
+ return -ENODEV;
+ dom->data = data;
+ }
+
++ mutex_lock(&data->mutex);
+ if (!data->m4u_dom) { /* Initialize the M4U HW */
+ ret = pm_runtime_resume_and_get(m4udev);
+ if (ret < 0)
+- return ret;
++ goto err_unlock;
+
+ ret = mtk_iommu_hw_init(data);
+ if (ret) {
+ pm_runtime_put(m4udev);
+- return ret;
++ goto err_unlock;
+ }
+ data->m4u_dom = dom;
+ writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
+@@ -482,9 +486,14 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
+
+ pm_runtime_put(m4udev);
+ }
++ mutex_unlock(&data->mutex);
+
+ mtk_iommu_config(data, dev, true, domid);
+ return 0;
++
++err_unlock:
++ mutex_unlock(&data->mutex);
++ return ret;
+ }
+
+ static void mtk_iommu_detach_device(struct iommu_domain *domain,
+@@ -550,7 +559,9 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
+ phys_addr_t pa;
+
+ pa = dom->iop->iova_to_phys(dom->iop, iova);
+- if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
++ if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
++ dom->data->enable_4GB &&
++ pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
+ pa &= ~BIT_ULL(32);
+
+ return pa;
+@@ -560,22 +571,58 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
+ {
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct mtk_iommu_data *data;
++ struct device_link *link;
++ struct device *larbdev;
++ unsigned int larbid, larbidx, i;
+
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
+ return ERR_PTR(-ENODEV); /* Not a iommu client device */
+
+ data = dev_iommu_priv_get(dev);
+
++ /*
++ * Link the consumer device with the smi-larb device(supplier).
++ * The device that connects with each a larb is a independent HW.
++ * All the ports in each a device should be in the same larbs.
++ */
++ larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);
++ if (larbid >= MTK_LARB_NR_MAX)
++ return ERR_PTR(-EINVAL);
++
++ for (i = 1; i < fwspec->num_ids; i++) {
++ larbidx = MTK_M4U_TO_LARB(fwspec->ids[i]);
++ if (larbid != larbidx) {
++ dev_err(dev, "Can only use one larb. Fail@larb%d-%d.\n",
++ larbid, larbidx);
++ return ERR_PTR(-EINVAL);
++ }
++ }
++ larbdev = data->larb_imu[larbid].dev;
++ if (!larbdev)
++ return ERR_PTR(-EINVAL);
++
++ link = device_link_add(dev, larbdev,
++ DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
++ if (!link)
++ dev_err(dev, "Unable to link %s\n", dev_name(larbdev));
+ return &data->iommu;
+ }
+
+ static void mtk_iommu_release_device(struct device *dev)
+ {
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
++ struct mtk_iommu_data *data;
++ struct device *larbdev;
++ unsigned int larbid;
+
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
+ return;
+
++ data = dev_iommu_priv_get(dev);
++ larbid = MTK_M4U_TO_LARB(fwspec->ids[0]);
++ larbdev = data->larb_imu[larbid].dev;
++ device_link_remove(dev, larbdev);
++
+ iommu_fwspec_free(dev);
+ }
+
+@@ -592,6 +639,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev)
+ if (domid < 0)
+ return ERR_PTR(domid);
+
++ mutex_lock(&data->mutex);
+ group = data->m4u_group[domid];
+ if (!group) {
+ group = iommu_group_alloc();
+@@ -600,6 +648,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev)
+ } else {
+ iommu_group_ref_get(group);
+ }
++ mutex_unlock(&data->mutex);
+ return group;
+ }
+
+@@ -846,7 +895,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
+ plarbdev = of_find_device_by_node(larbnode);
+ if (!plarbdev) {
+ of_node_put(larbnode);
+- return -EPROBE_DEFER;
++ return -ENODEV;
+ }
+ data->larb_imu[id].dev = &plarbdev->dev;
+
+@@ -874,6 +923,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
+ }
+
+ platform_set_drvdata(pdev, data);
++ mutex_init(&data->mutex);
+
+ ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
+ "mtk-iommu.%pa", &ioaddr);
+@@ -919,10 +969,8 @@ static int mtk_iommu_remove(struct platform_device *pdev)
+ iommu_device_sysfs_remove(&data->iommu);
+ iommu_device_unregister(&data->iommu);
+
+- if (iommu_present(&platform_bus_type))
+- bus_set_iommu(&platform_bus_type, NULL);
++ list_del(&data->list);
+
+- clk_disable_unprepare(data->bclk);
+ device_link_remove(data->smicomm_dev, &pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ devm_free_irq(&pdev->dev, data->irq, data);
+diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
+index f81fa8862ed04..f413546ac6e57 100644
+--- a/drivers/iommu/mtk_iommu.h
++++ b/drivers/iommu/mtk_iommu.h
+@@ -80,6 +80,8 @@ struct mtk_iommu_data {
+
+ struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */
+
++ struct mutex mutex; /* Protect m4u_group/m4u_dom above */
++
+ struct list_head list;
+ struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX];
+ };
+diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
+index be22fcf988cee..fe1c3123a7e77 100644
+--- a/drivers/iommu/mtk_iommu_v1.c
++++ b/drivers/iommu/mtk_iommu_v1.c
+@@ -80,6 +80,7 @@
+ /* MTK generation one iommu HW only support 4K size mapping */
+ #define MT2701_IOMMU_PAGE_SHIFT 12
+ #define MT2701_IOMMU_PAGE_SIZE (1UL << MT2701_IOMMU_PAGE_SHIFT)
++#define MT2701_LARB_NR_MAX 3
+
+ /*
+ * MTK m4u support 4GB iova address space, and only support 4K page
+@@ -423,7 +424,18 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct of_phandle_args iommu_spec;
+ struct mtk_iommu_data *data;
+- int err, idx = 0;
++ int err, idx = 0, larbid, larbidx;
++ struct device_link *link;
++ struct device *larbdev;
++
++ /*
++ * In the deferred case, free the existed fwspec.
++ * Always initialize the fwspec internally.
++ */
++ if (fwspec) {
++ iommu_fwspec_free(dev);
++ fwspec = dev_iommu_fwspec_get(dev);
++ }
+
+ while (!of_parse_phandle_with_args(dev->of_node, "iommus",
+ "#iommu-cells",
+@@ -444,6 +456,29 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
+
+ data = dev_iommu_priv_get(dev);
+
++ /* Link the consumer device with the smi-larb device(supplier) */
++ larbid = mt2701_m4u_to_larb(fwspec->ids[0]);
++ if (larbid >= MT2701_LARB_NR_MAX)
++ return ERR_PTR(-EINVAL);
++
++ for (idx = 1; idx < fwspec->num_ids; idx++) {
++ larbidx = mt2701_m4u_to_larb(fwspec->ids[idx]);
++ if (larbid != larbidx) {
++ dev_err(dev, "Can only use one larb. Fail@larb%d-%d.\n",
++ larbid, larbidx);
++ return ERR_PTR(-EINVAL);
++ }
++ }
++
++ larbdev = data->larb_imu[larbid].dev;
++ if (!larbdev)
++ return ERR_PTR(-EINVAL);
++
++ link = device_link_add(dev, larbdev,
++ DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
++ if (!link)
++ dev_err(dev, "Unable to link %s\n", dev_name(larbdev));
++
+ return &data->iommu;
+ }
+
+@@ -464,10 +499,18 @@ static void mtk_iommu_probe_finalize(struct device *dev)
+ static void mtk_iommu_release_device(struct device *dev)
+ {
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
++ struct mtk_iommu_data *data;
++ struct device *larbdev;
++ unsigned int larbid;
+
+ if (!fwspec || fwspec->ops != &mtk_iommu_ops)
+ return;
+
++ data = dev_iommu_priv_get(dev);
++ larbid = mt2701_m4u_to_larb(fwspec->ids[0]);
++ larbdev = data->larb_imu[larbid].dev;
++ device_link_remove(dev, larbdev);
++
+ iommu_fwspec_free(dev);
+ }
+
+@@ -595,7 +638,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
+ plarbdev = of_find_device_by_node(larbnode);
+ if (!plarbdev) {
+ of_node_put(larbnode);
+- return -EPROBE_DEFER;
++ return -ENODEV;
+ }
+ data->larb_imu[i].dev = &plarbdev->dev;
+
+@@ -612,7 +655,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
+ ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL,
+ dev_name(&pdev->dev));
+ if (ret)
+- return ret;
++ goto out_clk_unprepare;
+
+ ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
+ if (ret)
+@@ -635,6 +678,8 @@ out_dev_unreg:
+ iommu_device_unregister(&data->iommu);
+ out_sysfs_remove:
+ iommu_device_sysfs_remove(&data->iommu);
++out_clk_unprepare:
++ clk_disable_unprepare(data->bclk);
+ return ret;
+ }
+
+diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
+index a99afb5d9011c..259f65291d909 100644
+--- a/drivers/iommu/omap-iommu-debug.c
++++ b/drivers/iommu/omap-iommu-debug.c
+@@ -32,12 +32,12 @@ static inline bool is_omap_iommu_detached(struct omap_iommu *obj)
+ ssize_t bytes; \
+ const char *str = "%20s: %08x\n"; \
+ const int maxcol = 32; \
+- bytes = snprintf(p, maxcol, str, __stringify(name), \
++ if (len < maxcol) \
++ goto out; \
++ bytes = scnprintf(p, maxcol, str, __stringify(name), \
+ iommu_read_reg(obj, MMU_##name)); \
+ p += bytes; \
+ len -= bytes; \
+- if (len < maxcol) \
+- goto out; \
+ } while (0)
+
+ static ssize_t
+diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
+index 91749654fd490..be60f6f3a265d 100644
+--- a/drivers/iommu/omap-iommu.c
++++ b/drivers/iommu/omap-iommu.c
+@@ -1661,7 +1661,7 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev)
+ num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus",
+ sizeof(phandle));
+ if (num_iommus < 0)
+- return 0;
++ return ERR_PTR(-ENODEV);
+
+ arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL);
+ if (!arch_data)
+diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
+index 5cb260820eda6..f9f6492c430df 100644
+--- a/drivers/iommu/rockchip-iommu.c
++++ b/drivers/iommu/rockchip-iommu.c
+@@ -98,8 +98,6 @@ struct rk_iommu_ops {
+ phys_addr_t (*pt_address)(u32 dte);
+ u32 (*mk_dtentries)(dma_addr_t pt_dma);
+ u32 (*mk_ptentries)(phys_addr_t page, int prot);
+- phys_addr_t (*dte_addr_phys)(u32 addr);
+- u32 (*dma_addr_dte)(dma_addr_t dt_dma);
+ u64 dma_bit_mask;
+ };
+
+@@ -200,8 +198,8 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte)
+ #define DTE_HI_MASK2 GENMASK(7, 4)
+ #define DTE_HI_SHIFT1 24 /* shift bit 8 to bit 32 */
+ #define DTE_HI_SHIFT2 32 /* shift bit 4 to bit 36 */
+-#define PAGE_DESC_HI_MASK1 GENMASK_ULL(39, 36)
+-#define PAGE_DESC_HI_MASK2 GENMASK_ULL(35, 32)
++#define PAGE_DESC_HI_MASK1 GENMASK_ULL(35, 32)
++#define PAGE_DESC_HI_MASK2 GENMASK_ULL(39, 36)
+
+ static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
+ {
+@@ -277,22 +275,20 @@ static u32 rk_mk_pte(phys_addr_t page, int prot)
+ /*
+ * In v2:
+ * 31:12 - Page address bit 31:0
+- * 11:9 - Page address bit 34:32
+- * 8:4 - Page address bit 39:35
++ * 11: 8 - Page address bit 35:32
++ * 7: 4 - Page address bit 39:36
+ * 3 - Security
+- * 2 - Readable
+- * 1 - Writable
++ * 2 - Writable
++ * 1 - Readable
+ * 0 - 1 if Page @ Page address is valid
+ */
+-#define RK_PTE_PAGE_READABLE_V2 BIT(2)
+-#define RK_PTE_PAGE_WRITABLE_V2 BIT(1)
+
+ static u32 rk_mk_pte_v2(phys_addr_t page, int prot)
+ {
+ u32 flags = 0;
+
+- flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0;
+- flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0;
++ flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0;
++ flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0;
+
+ return rk_mk_dte_v2(page) | flags;
+ }
+@@ -507,7 +503,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
+
+ /*
+ * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY
+- * and verifying that upper 5 nybbles are read back.
++ * and verifying that upper 5 (v1) or 7 (v2) nybbles are read back.
+ */
+ for (i = 0; i < iommu->num_mmu; i++) {
+ dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY);
+@@ -532,33 +528,6 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
+ return 0;
+ }
+
+-static inline phys_addr_t rk_dte_addr_phys(u32 addr)
+-{
+- return (phys_addr_t)addr;
+-}
+-
+-static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma)
+-{
+- return dt_dma;
+-}
+-
+-#define DT_HI_MASK GENMASK_ULL(39, 32)
+-#define DTE_BASE_HI_MASK GENMASK(11, 4)
+-#define DT_SHIFT 28
+-
+-static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr)
+-{
+- u64 addr64 = addr;
+- return (phys_addr_t)(addr64 & RK_DTE_PT_ADDRESS_MASK) |
+- ((addr64 & DTE_BASE_HI_MASK) << DT_SHIFT);
+-}
+-
+-static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma)
+-{
+- return (dt_dma & RK_DTE_PT_ADDRESS_MASK) |
+- ((dt_dma & DT_HI_MASK) >> DT_SHIFT);
+-}
+-
+ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
+ {
+ void __iomem *base = iommu->bases[index];
+@@ -578,7 +547,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
+ page_offset = rk_iova_page_offset(iova);
+
+ mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
+- mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr);
++ mmu_dte_addr_phys = rk_ops->pt_address(mmu_dte_addr);
+
+ dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
+ dte_addr = phys_to_virt(dte_addr_phys);
+@@ -968,7 +937,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
+
+ for (i = 0; i < iommu->num_mmu; i++) {
+ rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
+- rk_ops->dma_addr_dte(rk_domain->dt_dma));
++ rk_ops->mk_dtentries(rk_domain->dt_dma));
+ rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+ rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
+ }
+@@ -1305,20 +1274,22 @@ static int rk_iommu_probe(struct platform_device *pdev)
+ for (i = 0; i < iommu->num_irq; i++) {
+ int irq = platform_get_irq(pdev, i);
+
+- if (irq < 0)
+- return irq;
++ if (irq < 0) {
++ err = irq;
++ goto err_pm_disable;
++ }
+
+ err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
+ IRQF_SHARED, dev_name(dev), iommu);
+- if (err) {
+- pm_runtime_disable(dev);
+- goto err_remove_sysfs;
+- }
++ if (err)
++ goto err_pm_disable;
+ }
+
+ dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
+
+ return 0;
++err_pm_disable:
++ pm_runtime_disable(dev);
+ err_remove_sysfs:
+ iommu_device_sysfs_remove(&iommu->iommu);
+ err_put_group:
+@@ -1373,8 +1344,6 @@ static struct rk_iommu_ops iommu_data_ops_v1 = {
+ .pt_address = &rk_dte_pt_address,
+ .mk_dtentries = &rk_mk_dte,
+ .mk_ptentries = &rk_mk_pte,
+- .dte_addr_phys = &rk_dte_addr_phys,
+- .dma_addr_dte = &rk_dma_addr_dte,
+ .dma_bit_mask = DMA_BIT_MASK(32),
+ };
+
+@@ -1382,8 +1351,6 @@ static struct rk_iommu_ops iommu_data_ops_v2 = {
+ .pt_address = &rk_dte_pt_address_v2,
+ .mk_dtentries = &rk_mk_dte_v2,
+ .mk_ptentries = &rk_mk_pte_v2,
+- .dte_addr_phys = &rk_dte_addr_phys_v2,
+- .dma_addr_dte = &rk_dma_addr_dte_v2,
+ .dma_bit_mask = DMA_BIT_MASK(40),
+ };
+
+diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
+index 27ac818b03544..6b11770e3d75a 100644
+--- a/drivers/iommu/sprd-iommu.c
++++ b/drivers/iommu/sprd-iommu.c
+@@ -147,17 +147,11 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
+
+ dom->domain.geometry.aperture_start = 0;
+ dom->domain.geometry.aperture_end = SZ_256M - 1;
++ dom->domain.geometry.force_aperture = true;
+
+ return &dom->domain;
+ }
+
+-static void sprd_iommu_domain_free(struct iommu_domain *domain)
+-{
+- struct sprd_iommu_domain *dom = to_sprd_domain(domain);
+-
+- kfree(dom);
+-}
+-
+ static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom)
+ {
+ struct sprd_iommu_device *sdev = dom->sdev;
+@@ -230,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en)
+ sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val);
+ }
+
++static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom)
++{
++ size_t pgt_size;
++
++ /* Nothing need to do if the domain hasn't been attached */
++ if (!dom->sdev)
++ return;
++
++ pgt_size = sprd_iommu_pgt_size(&dom->domain);
++ dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa);
++ dom->sdev = NULL;
++ sprd_iommu_hw_en(dom->sdev, false);
++}
++
++static void sprd_iommu_domain_free(struct iommu_domain *domain)
++{
++ struct sprd_iommu_domain *dom = to_sprd_domain(domain);
++
++ sprd_iommu_cleanup(dom);
++ kfree(dom);
++}
++
+ static int sprd_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+ {
+diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
+index 92997021e188a..ed35741955997 100644
+--- a/drivers/iommu/sun50i-iommu.c
++++ b/drivers/iommu/sun50i-iommu.c
+@@ -27,6 +27,7 @@
+ #include <linux/types.h>
+
+ #define IOMMU_RESET_REG 0x010
++#define IOMMU_RESET_RELEASE_ALL 0xffffffff
+ #define IOMMU_ENABLE_REG 0x020
+ #define IOMMU_ENABLE_ENABLE BIT(0)
+
+@@ -270,7 +271,7 @@ static u32 sun50i_mk_pte(phys_addr_t page, int prot)
+ enum sun50i_iommu_aci aci;
+ u32 flags = 0;
+
+- if (prot & (IOMMU_READ | IOMMU_WRITE))
++ if ((prot & (IOMMU_READ | IOMMU_WRITE)) == (IOMMU_READ | IOMMU_WRITE))
+ aci = SUN50I_IOMMU_ACI_RD_WR;
+ else if (prot & IOMMU_READ)
+ aci = SUN50I_IOMMU_ACI_RD;
+@@ -511,7 +512,7 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
+ sun50i_iommu_free_page_table(iommu, drop_pt);
+ }
+
+- sun50i_table_flush(sun50i_domain, page_table, PT_SIZE);
++ sun50i_table_flush(sun50i_domain, page_table, NUM_PT_ENTRIES);
+ sun50i_table_flush(sun50i_domain, dte_addr, 1);
+
+ return page_table;
+@@ -601,7 +602,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
+ struct sun50i_iommu_domain *sun50i_domain;
+
+ if (type != IOMMU_DOMAIN_DMA &&
+- type != IOMMU_DOMAIN_IDENTITY &&
+ type != IOMMU_DOMAIN_UNMANAGED)
+ return NULL;
+
+@@ -869,8 +869,8 @@ static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu)
+
+ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
+ {
++ u32 status, l1_status, l2_status, resets;
+ struct sun50i_iommu *iommu = dev_id;
+- u32 status;
+
+ spin_lock(&iommu->iommu_lock);
+
+@@ -880,6 +880,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
+ return IRQ_NONE;
+ }
+
++ l1_status = iommu_read(iommu, IOMMU_L1PG_INT_REG);
++ l2_status = iommu_read(iommu, IOMMU_L2PG_INT_REG);
++
+ if (status & IOMMU_INT_INVALID_L2PG)
+ sun50i_iommu_handle_pt_irq(iommu,
+ IOMMU_INT_ERR_ADDR_L2_REG,
+@@ -893,8 +896,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
+
+ iommu_write(iommu, IOMMU_INT_CLR_REG, status);
+
+- iommu_write(iommu, IOMMU_RESET_REG, ~status);
+- iommu_write(iommu, IOMMU_RESET_REG, status);
++ resets = (status | l1_status | l2_status) & IOMMU_INT_MASTER_MASK;
++ iommu_write(iommu, IOMMU_RESET_REG, ~resets);
++ iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL);
+
+ spin_unlock(&iommu->iommu_lock);
+
+diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
+index 0a281833f6117..abbdaeb4bf8f1 100644
+--- a/drivers/iommu/tegra-smmu.c
++++ b/drivers/iommu/tegra-smmu.c
+@@ -808,8 +808,10 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
+ return NULL;
+
+ mc = platform_get_drvdata(pdev);
+- if (!mc)
++ if (!mc) {
++ put_device(&pdev->dev);
+ return NULL;
++ }
+
+ return mc->smmu;
+ }
+diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
+index aca7b595c4c78..ae1b9f59abc57 100644
+--- a/drivers/irqchip/Kconfig
++++ b/drivers/irqchip/Kconfig
+@@ -171,7 +171,7 @@ config MADERA_IRQ
+ config IRQ_MIPS_CPU
+ bool
+ select GENERIC_IRQ_CHIP
+- select GENERIC_IRQ_IPI if SYS_SUPPORTS_MULTITHREADING
++ select GENERIC_IRQ_IPI if SMP && SYS_SUPPORTS_MULTITHREADING
+ select IRQ_DOMAIN
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+
+@@ -304,7 +304,8 @@ config KEYSTONE_IRQ
+
+ config MIPS_GIC
+ bool
+- select GENERIC_IRQ_IPI
++ select GENERIC_IRQ_IPI if SMP
++ select IRQ_DOMAIN_HIERARCHY
+ select MIPS_CM
+
+ config INGENIC_IRQ
+diff --git a/drivers/irqchip/irq-alpine-msi.c b/drivers/irqchip/irq-alpine-msi.c
+index 5ddb8e578ac6a..fc1ef7de37973 100644
+--- a/drivers/irqchip/irq-alpine-msi.c
++++ b/drivers/irqchip/irq-alpine-msi.c
+@@ -199,6 +199,7 @@ static int alpine_msix_init_domains(struct alpine_msix_data *priv,
+ }
+
+ gic_domain = irq_find_host(gic_node);
++ of_node_put(gic_node);
+ if (!gic_domain) {
+ pr_err("Failed to find the GIC domain\n");
+ return -ENXIO;
+diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
+index 53e0fb0562c11..01709c61e3641 100644
+--- a/drivers/irqchip/irq-armada-370-xp.c
++++ b/drivers/irqchip/irq-armada-370-xp.c
+@@ -232,16 +232,12 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq,
+ int hwirq, i;
+
+ mutex_lock(&msi_used_lock);
++ hwirq = bitmap_find_free_region(msi_used, PCI_MSI_DOORBELL_NR,
++ order_base_2(nr_irqs));
++ mutex_unlock(&msi_used_lock);
+
+- hwirq = bitmap_find_next_zero_area(msi_used, PCI_MSI_DOORBELL_NR,
+- 0, nr_irqs, 0);
+- if (hwirq >= PCI_MSI_DOORBELL_NR) {
+- mutex_unlock(&msi_used_lock);
++ if (hwirq < 0)
+ return -ENOSPC;
+- }
+-
+- bitmap_set(msi_used, hwirq, nr_irqs);
+- mutex_unlock(&msi_used_lock);
+
+ for (i = 0; i < nr_irqs; i++) {
+ irq_domain_set_info(domain, virq + i, hwirq + i,
+@@ -250,7 +246,7 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq,
+ NULL, NULL);
+ }
+
+- return hwirq;
++ return 0;
+ }
+
+ static void armada_370_xp_msi_free(struct irq_domain *domain,
+@@ -259,7 +255,7 @@ static void armada_370_xp_msi_free(struct irq_domain *domain,
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+ mutex_lock(&msi_used_lock);
+- bitmap_clear(msi_used, d->hwirq, nr_irqs);
++ bitmap_release_region(msi_used, d->hwirq, order_base_2(nr_irqs));
+ mutex_unlock(&msi_used_lock);
+ }
+
+@@ -312,7 +308,16 @@ static inline int armada_370_xp_msi_init(struct device_node *node,
+
+ static void armada_xp_mpic_perf_init(void)
+ {
+- unsigned long cpuid = cpu_logical_map(smp_processor_id());
++ unsigned long cpuid;
++
++ /*
++ * This Performance Counter Overflow interrupt is specific for
++ * Armada 370 and XP. It is not available on Armada 375, 38x and 39x.
++ */
++ if (!of_machine_is_compatible("marvell,armada-370-xp"))
++ return;
++
++ cpuid = cpu_logical_map(smp_processor_id());
+
+ /* Enable Performance Counter Overflow interrupts */
+ writel(ARMADA_370_XP_INT_CAUSE_PERF(cpuid),
+diff --git a/drivers/irqchip/irq-aspeed-i2c-ic.c b/drivers/irqchip/irq-aspeed-i2c-ic.c
+index a47db16ff9603..9c9fc3e2967ed 100644
+--- a/drivers/irqchip/irq-aspeed-i2c-ic.c
++++ b/drivers/irqchip/irq-aspeed-i2c-ic.c
+@@ -77,8 +77,8 @@ static int __init aspeed_i2c_ic_of_init(struct device_node *node,
+ }
+
+ i2c_ic->parent_irq = irq_of_parse_and_map(node, 0);
+- if (i2c_ic->parent_irq < 0) {
+- ret = i2c_ic->parent_irq;
++ if (!i2c_ic->parent_irq) {
++ ret = -EINVAL;
+ goto err_iounmap;
+ }
+
+diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c
+index f3c6855a4cefb..279e92cf0b16b 100644
+--- a/drivers/irqchip/irq-aspeed-scu-ic.c
++++ b/drivers/irqchip/irq-aspeed-scu-ic.c
+@@ -76,8 +76,8 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
+ generic_handle_domain_irq(scu_ic->irq_domain,
+ bit - scu_ic->irq_shift);
+
+- regmap_update_bits(scu_ic->scu, scu_ic->reg, mask,
+- BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT));
++ regmap_write_bits(scu_ic->scu, scu_ic->reg, mask,
++ BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT));
+ }
+
+ chained_irq_exit(chip, desc);
+@@ -157,8 +157,8 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic,
+ }
+
+ irq = irq_of_parse_and_map(node, 0);
+- if (irq < 0) {
+- rc = irq;
++ if (!irq) {
++ rc = -EINVAL;
+ goto err;
+ }
+
+diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
+index e3483789f4df3..7c5d8b791592e 100644
+--- a/drivers/irqchip/irq-bcm6345-l1.c
++++ b/drivers/irqchip/irq-bcm6345-l1.c
+@@ -82,6 +82,7 @@ struct bcm6345_l1_chip {
+ };
+
+ struct bcm6345_l1_cpu {
++ struct bcm6345_l1_chip *intc;
+ void __iomem *map_base;
+ unsigned int parent_irq;
+ u32 enable_cache[];
+@@ -115,17 +116,11 @@ static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc,
+
+ static void bcm6345_l1_irq_handle(struct irq_desc *desc)
+ {
+- struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc);
+- struct bcm6345_l1_cpu *cpu;
++ struct bcm6345_l1_cpu *cpu = irq_desc_get_handler_data(desc);
++ struct bcm6345_l1_chip *intc = cpu->intc;
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int idx;
+
+-#ifdef CONFIG_SMP
+- cpu = intc->cpus[cpu_logical_map(smp_processor_id())];
+-#else
+- cpu = intc->cpus[0];
+-#endif
+-
+ chained_irq_enter(chip, desc);
+
+ for (idx = 0; idx < intc->n_words; idx++) {
+@@ -140,7 +135,7 @@ static void bcm6345_l1_irq_handle(struct irq_desc *desc)
+ for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
+ irq = irq_linear_revmap(intc->domain, base + hwirq);
+ if (irq)
+- do_IRQ(irq);
++ generic_handle_irq(irq);
+ else
+ spurious_interrupt();
+ }
+@@ -220,11 +215,11 @@ static int bcm6345_l1_set_affinity(struct irq_data *d,
+ enabled = intc->cpus[old_cpu]->enable_cache[word] & mask;
+ if (enabled)
+ __bcm6345_l1_mask(d);
+- cpumask_copy(irq_data_get_affinity_mask(d), dest);
++ irq_data_update_affinity(d, dest);
+ if (enabled)
+ __bcm6345_l1_unmask(d);
+ } else {
+- cpumask_copy(irq_data_get_affinity_mask(d), dest);
++ irq_data_update_affinity(d, dest);
+ }
+ raw_spin_unlock_irqrestore(&intc->lock, flags);
+
+@@ -257,6 +252,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
+ if (!cpu)
+ return -ENOMEM;
+
++ cpu->intc = intc;
+ cpu->map_base = ioremap(res.start, sz);
+ if (!cpu->map_base)
+ return -ENOMEM;
+@@ -272,7 +268,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
+ return -EINVAL;
+ }
+ irq_set_chained_handler_and_data(cpu->parent_irq,
+- bcm6345_l1_irq_handle, intc);
++ bcm6345_l1_irq_handle, cpu);
+
+ return 0;
+ }
+diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
+index f23d7651ea847..e91b38a6fc3df 100644
+--- a/drivers/irqchip/irq-bcm7120-l2.c
++++ b/drivers/irqchip/irq-bcm7120-l2.c
+@@ -271,7 +271,8 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
+ flags |= IRQ_GC_BE_IO;
+
+ ret = irq_alloc_domain_generic_chips(data->domain, IRQS_PER_WORD, 1,
+- dn->full_name, handle_level_irq, clr, 0, flags);
++ dn->full_name, handle_level_irq, clr,
++ IRQ_LEVEL, flags);
+ if (ret) {
+ pr_err("failed to allocate generic irq chip\n");
+ goto out_free_domain;
+diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
+index 8e0911561f2d1..fddea72272464 100644
+--- a/drivers/irqchip/irq-brcmstb-l2.c
++++ b/drivers/irqchip/irq-brcmstb-l2.c
+@@ -161,6 +161,7 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
+ *init_params)
+ {
+ unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
++ unsigned int set = 0;
+ struct brcmstb_l2_intc_data *data;
+ struct irq_chip_type *ct;
+ int ret;
+@@ -208,9 +209,12 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
+ if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ flags |= IRQ_GC_BE_IO;
+
++ if (init_params->handler == handle_level_irq)
++ set |= IRQ_LEVEL;
++
+ /* Allocate a single Generic IRQ chip for this node */
+ ret = irq_alloc_domain_generic_chips(data->domain, 32, 1,
+- np->full_name, init_params->handler, clr, 0, flags);
++ np->full_name, init_params->handler, clr, set, flags);
+ if (ret) {
+ pr_err("failed to allocate generic irq chip\n");
+ goto out_free_domain;
+diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
+index a610821c8ff2a..afd6a1841715a 100644
+--- a/drivers/irqchip/irq-gic-common.c
++++ b/drivers/irqchip/irq-gic-common.c
+@@ -16,7 +16,13 @@ void gic_enable_of_quirks(const struct device_node *np,
+ const struct gic_quirk *quirks, void *data)
+ {
+ for (; quirks->desc; quirks++) {
+- if (!of_device_is_compatible(np, quirks->compatible))
++ if (!quirks->compatible && !quirks->property)
++ continue;
++ if (quirks->compatible &&
++ !of_device_is_compatible(np, quirks->compatible))
++ continue;
++ if (quirks->property &&
++ !of_property_read_bool(np, quirks->property))
+ continue;
+ if (quirks->init(data))
+ pr_info("GIC: enabling workaround for %s\n",
+@@ -28,7 +34,7 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
+ void *data)
+ {
+ for (; quirks->desc; quirks++) {
+- if (quirks->compatible)
++ if (quirks->compatible || quirks->property)
+ continue;
+ if (quirks->iidr != (quirks->mask & iidr))
+ continue;
+diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
+index 27e3d4ed4f328..3db4592cda1c0 100644
+--- a/drivers/irqchip/irq-gic-common.h
++++ b/drivers/irqchip/irq-gic-common.h
+@@ -13,6 +13,7 @@
+ struct gic_quirk {
+ const char *desc;
+ const char *compatible;
++ const char *property;
+ bool (*init)(void *data);
+ u32 iidr;
+ u32 mask;
+diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c
+index b60e1853593f4..3989d16f997b3 100644
+--- a/drivers/irqchip/irq-gic-pm.c
++++ b/drivers/irqchip/irq-gic-pm.c
+@@ -102,7 +102,7 @@ static int gic_probe(struct platform_device *pdev)
+
+ pm_runtime_enable(dev);
+
+- ret = pm_runtime_get_sync(dev);
++ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ goto rpm_disable;
+
+diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c
+index b4c1924f02554..38fab02ffe9d0 100644
+--- a/drivers/irqchip/irq-gic-realview.c
++++ b/drivers/irqchip/irq-gic-realview.c
+@@ -57,6 +57,7 @@ realview_gic_of_init(struct device_node *node, struct device_node *parent)
+
+ /* The PB11MPCore GIC needs to be configured in the syscon */
+ map = syscon_node_to_regmap(np);
++ of_node_put(np);
+ if (!IS_ERR(map)) {
+ /* new irq mode with no DCC */
+ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET,
+diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
+index eb0882d153666..490e6cfe510e6 100644
+--- a/drivers/irqchip/irq-gic-v3-its.c
++++ b/drivers/irqchip/irq-gic-v3-its.c
+@@ -267,13 +267,23 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags)
+ raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
+ }
+
++static struct irq_chip its_vpe_irq_chip;
++
+ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
+ {
+- struct its_vlpi_map *map = get_vlpi_map(d);
++ struct its_vpe *vpe = NULL;
+ int cpu;
+
+- if (map) {
+- cpu = vpe_to_cpuid_lock(map->vpe, flags);
++ if (d->chip == &its_vpe_irq_chip) {
++ vpe = irq_data_get_irq_chip_data(d);
++ } else {
++ struct its_vlpi_map *map = get_vlpi_map(d);
++ if (map)
++ vpe = map->vpe;
++ }
++
++ if (vpe) {
++ cpu = vpe_to_cpuid_lock(vpe, flags);
+ } else {
+ /* Physical LPIs are already locked via the irq_desc lock */
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+@@ -287,10 +297,18 @@ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
+
+ static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
+ {
+- struct its_vlpi_map *map = get_vlpi_map(d);
++ struct its_vpe *vpe = NULL;
+
+- if (map)
+- vpe_to_cpuid_unlock(map->vpe, flags);
++ if (d->chip == &its_vpe_irq_chip) {
++ vpe = irq_data_get_irq_chip_data(d);
++ } else {
++ struct its_vlpi_map *map = get_vlpi_map(d);
++ if (map)
++ vpe = map->vpe;
++ }
++
++ if (vpe)
++ vpe_to_cpuid_unlock(vpe, flags);
+ }
+
+ static struct its_collection *valid_col(struct its_collection *col)
+@@ -742,7 +760,7 @@ static struct its_collection *its_build_invall_cmd(struct its_node *its,
+
+ its_fixup_cmd(cmd);
+
+- return NULL;
++ return desc->its_invall_cmd.col;
+ }
+
+ static struct its_vpe *its_build_vinvall_cmd(struct its_node *its,
+@@ -1427,14 +1445,29 @@ static void wait_for_syncr(void __iomem *rdbase)
+ cpu_relax();
+ }
+
+-static void direct_lpi_inv(struct irq_data *d)
++static void __direct_lpi_inv(struct irq_data *d, u64 val)
+ {
+- struct its_vlpi_map *map = get_vlpi_map(d);
+ void __iomem *rdbase;
+ unsigned long flags;
+- u64 val;
+ int cpu;
+
++ /* Target the redistributor this LPI is currently routed to */
++ cpu = irq_to_cpuid_lock(d, &flags);
++ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
++
++ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
++ gic_write_lpir(val, rdbase + GICR_INVLPIR);
++ wait_for_syncr(rdbase);
++
++ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
++ irq_to_cpuid_unlock(d, flags);
++}
++
++static void direct_lpi_inv(struct irq_data *d)
++{
++ struct its_vlpi_map *map = get_vlpi_map(d);
++ u64 val;
++
+ if (map) {
+ struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+
+@@ -1447,15 +1480,7 @@ static void direct_lpi_inv(struct irq_data *d)
+ val = d->hwirq;
+ }
+
+- /* Target the redistributor this LPI is currently routed to */
+- cpu = irq_to_cpuid_lock(d, &flags);
+- raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
+- rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
+- gic_write_lpir(val, rdbase + GICR_INVLPIR);
+-
+- wait_for_syncr(rdbase);
+- raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
+- irq_to_cpuid_unlock(d, flags);
++ __direct_lpi_inv(d, val);
+ }
+
+ static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
+@@ -1620,7 +1645,7 @@ static int its_select_cpu(struct irq_data *d,
+
+ cpu = cpumask_pick_least_loaded(d, tmpmask);
+ } else {
+- cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask);
++ cpumask_copy(tmpmask, aff_mask);
+
+ /* If we cannot cross sockets, limit the search to that node */
+ if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) &&
+@@ -3007,18 +3032,12 @@ static int __init allocate_lpi_tables(void)
+ return 0;
+ }
+
+-static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
++static u64 read_vpend_dirty_clear(void __iomem *vlpi_base)
+ {
+ u32 count = 1000000; /* 1s! */
+ bool clean;
+ u64 val;
+
+- val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
+- val &= ~GICR_VPENDBASER_Valid;
+- val &= ~clr;
+- val |= set;
+- gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
+-
+ do {
+ val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER);
+ clean = !(val & GICR_VPENDBASER_Dirty);
+@@ -3029,10 +3048,26 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
+ }
+ } while (!clean && count);
+
+- if (unlikely(val & GICR_VPENDBASER_Dirty)) {
++ if (unlikely(!clean))
+ pr_err_ratelimited("ITS virtual pending table not cleaning\n");
++
++ return val;
++}
++
++static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set)
++{
++ u64 val;
++
++ /* Make sure we wait until the RD is done with the initial scan */
++ val = read_vpend_dirty_clear(vlpi_base);
++ val &= ~GICR_VPENDBASER_Valid;
++ val &= ~clr;
++ val |= set;
++ gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
++
++ val = read_vpend_dirty_clear(vlpi_base);
++ if (unlikely(val & GICR_VPENDBASER_Dirty))
+ val |= GICR_VPENDBASER_PendingLast;
+- }
+
+ return val;
+ }
+@@ -3926,18 +3961,10 @@ static void its_vpe_send_inv(struct irq_data *d)
+ {
+ struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+
+- if (gic_rdists->has_direct_lpi) {
+- void __iomem *rdbase;
+-
+- /* Target the redistributor this VPE is currently known on */
+- raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
+- rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+- gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
+- wait_for_syncr(rdbase);
+- raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
+- } else {
++ if (gic_rdists->has_direct_lpi)
++ __direct_lpi_inv(d, d->parent_data->hwirq);
++ else
+ its_vpe_send_cmd(vpe, its_send_inv);
+- }
+ }
+
+ static void its_vpe_mask_irq(struct irq_data *d)
+diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
+index fd4e9a37fea67..500e0c6d17f61 100644
+--- a/drivers/irqchip/irq-gic-v3.c
++++ b/drivers/irqchip/irq-gic-v3.c
+@@ -35,6 +35,7 @@
+
+ #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
+ #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
++#define FLAGS_WORKAROUND_MTK_GICR_SAVE (1ULL << 2)
+
+ #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1)
+
+@@ -206,11 +207,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d)
+ }
+ }
+
+-static void gic_do_wait_for_rwp(void __iomem *base)
++static void gic_do_wait_for_rwp(void __iomem *base, u32 bit)
+ {
+ u32 count = 1000000; /* 1s! */
+
+- while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
++ while (readl_relaxed(base + GICD_CTLR) & bit) {
+ count--;
+ if (!count) {
+ pr_err_ratelimited("RWP timeout, gone fishing\n");
+@@ -224,13 +225,13 @@ static void gic_do_wait_for_rwp(void __iomem *base)
+ /* Wait for completion of a distributor change */
+ static void gic_dist_wait_for_rwp(void)
+ {
+- gic_do_wait_for_rwp(gic_data.dist_base);
++ gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP);
+ }
+
+ /* Wait for completion of a redistributor change */
+ static void gic_redist_wait_for_rwp(void)
+ {
+- gic_do_wait_for_rwp(gic_data_rdist_rd_base());
++ gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP);
+ }
+
+ #ifdef CONFIG_ARM64
+@@ -556,7 +557,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
+
+ static void gic_eoi_irq(struct irq_data *d)
+ {
+- gic_write_eoir(gic_irq(d));
++ write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
++ isb();
+ }
+
+ static void gic_eoimode1_eoi_irq(struct irq_data *d)
+@@ -640,10 +642,38 @@ static void gic_deactivate_unhandled(u32 irqnr)
+ if (irqnr < 8192)
+ gic_write_dir(irqnr);
+ } else {
+- gic_write_eoir(irqnr);
++ write_gicreg(irqnr, ICC_EOIR1_EL1);
++ isb();
+ }
+ }
+
++/*
++ * Follow a read of the IAR with any HW maintenance that needs to happen prior
++ * to invoking the relevant IRQ handler. We must do two things:
++ *
++ * (1) Ensure instruction ordering between a read of IAR and subsequent
++ * instructions in the IRQ handler using an ISB.
++ *
++ * It is possible for the IAR to report an IRQ which was signalled *after*
++ * the CPU took an IRQ exception as multiple interrupts can race to be
++ * recognized by the GIC, earlier interrupts could be withdrawn, and/or
++ * later interrupts could be prioritized by the GIC.
++ *
++ * For devices which are tightly coupled to the CPU, such as PMUs, a
++ * context synchronization event is necessary to ensure that system
++ * register state is not stale, as these may have been indirectly written
++ * *after* exception entry.
++ *
++ * (2) Deactivate the interrupt when EOI mode 1 is in use.
++ */
++static inline void gic_complete_ack(u32 irqnr)
++{
++ if (static_branch_likely(&supports_deactivate_key))
++ write_gicreg(irqnr, ICC_EOIR1_EL1);
++
++ isb();
++}
++
+ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
+ {
+ bool irqs_enabled = interrupts_enabled(regs);
+@@ -652,8 +682,8 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
+ if (irqs_enabled)
+ nmi_enter();
+
+- if (static_branch_likely(&supports_deactivate_key))
+- gic_write_eoir(irqnr);
++ gic_complete_ack(irqnr);
++
+ /*
+ * Leave the PSR.I bit set to prevent other NMIs to be
+ * received while handling this one.
+@@ -723,10 +753,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
+ gic_arch_enable_irqs();
+ }
+
+- if (static_branch_likely(&supports_deactivate_key))
+- gic_write_eoir(irqnr);
+- else
+- isb();
++ gic_complete_ack(irqnr);
+
+ if (handle_domain_irq(gic_data.domain, irqnr, regs)) {
+ WARN_ONCE(true, "Unexpected interrupt received!\n");
+@@ -920,6 +947,22 @@ static int __gic_update_rdist_properties(struct redist_region *region,
+ {
+ u64 typer = gic_read_typer(ptr + GICR_TYPER);
+
++ /* Boot-time cleanip */
++ if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) {
++ u64 val;
++
++ /* Deactivate any present vPE */
++ val = gicr_read_vpendbaser(ptr + SZ_128K + GICR_VPENDBASER);
++ if (val & GICR_VPENDBASER_Valid)
++ gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast,
++ ptr + SZ_128K + GICR_VPENDBASER);
++
++ /* Mark the VPE table as invalid */
++ val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER);
++ val &= ~GICR_VPROPBASER_4_1_VALID;
++ gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER);
++ }
++
+ gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS);
+
+ /* RVPEID implies some form of DirectLPI, no matter what the doc says... :-/ */
+@@ -1450,6 +1493,12 @@ static int gic_irq_domain_translate(struct irq_domain *d,
+ if(fwspec->param_count != 2)
+ return -EINVAL;
+
++ if (fwspec->param[0] < 16) {
++ pr_err(FW_BUG "Illegal GSI%d translation request\n",
++ fwspec->param[0]);
++ return -EINVAL;
++ }
++
+ *hwirq = fwspec->param[0];
+ *type = fwspec->param[1];
+
+@@ -1598,6 +1647,15 @@ static bool gic_enable_quirk_msm8996(void *data)
+ return true;
+ }
+
++static bool gic_enable_quirk_mtk_gicr(void *data)
++{
++ struct gic_chip_data *d = data;
++
++ d->flags |= FLAGS_WORKAROUND_MTK_GICR_SAVE;
++
++ return true;
++}
++
+ static bool gic_enable_quirk_cavium_38539(void *data)
+ {
+ struct gic_chip_data *d = data;
+@@ -1633,6 +1691,11 @@ static const struct gic_quirk gic_quirks[] = {
+ .compatible = "qcom,msm8996-gic-v3",
+ .init = gic_enable_quirk_msm8996,
+ },
++ {
++ .desc = "GICv3: Mediatek Chromebook GICR save problem",
++ .property = "mediatek,broken-save-restore-fw",
++ .init = gic_enable_quirk_mtk_gicr,
++ },
+ {
+ .desc = "GICv3: HIP06 erratum 161010803",
+ .iidr = 0x0204043b,
+@@ -1669,6 +1732,11 @@ static void gic_enable_nmi_support(void)
+ if (!gic_prio_masking_enabled())
+ return;
+
++ if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) {
++ pr_warn("Skipping NMI enable due to firmware issues\n");
++ return;
++ }
++
+ ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL);
+ if (!ppi_nmi_refs)
+ return;
+@@ -1842,7 +1910,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
+
+ gic_data.ppi_descs = kcalloc(gic_data.ppi_nr, sizeof(*gic_data.ppi_descs), GFP_KERNEL);
+ if (!gic_data.ppi_descs)
+- return;
++ goto out_put_node;
+
+ nr_parts = of_get_child_count(parts_node);
+
+@@ -1883,12 +1951,15 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
+ continue;
+
+ cpu = of_cpu_node_to_id(cpu_node);
+- if (WARN_ON(cpu < 0))
++ if (WARN_ON(cpu < 0)) {
++ of_node_put(cpu_node);
+ continue;
++ }
+
+ pr_cont("%pOF[%d] ", cpu_node, cpu);
+
+ cpumask_set_cpu(cpu, &part->mask);
++ of_node_put(cpu_node);
+ }
+
+ pr_cont("}\n");
+diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
+index 5f22c9d65e578..99077f30f6999 100644
+--- a/drivers/irqchip/irq-gic.c
++++ b/drivers/irqchip/irq-gic.c
+@@ -1085,6 +1085,12 @@ static int gic_irq_domain_translate(struct irq_domain *d,
+ if(fwspec->param_count != 2)
+ return -EINVAL;
+
++ if (fwspec->param[0] < 16) {
++ pr_err(FW_BUG "Illegal GSI%d translation request\n",
++ fwspec->param[0]);
++ return -EINVAL;
++ }
++
+ *hwirq = fwspec->param[0];
+ *type = fwspec->param[1];
+
+diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c
+index 5f47d8ee4ae39..b9dcc8e78c750 100644
+--- a/drivers/irqchip/irq-jcore-aic.c
++++ b/drivers/irqchip/irq-jcore-aic.c
+@@ -68,6 +68,7 @@ static int __init aic_irq_of_init(struct device_node *node,
+ unsigned min_irq = JCORE_AIC2_MIN_HWIRQ;
+ unsigned dom_sz = JCORE_AIC_MAX_HWIRQ+1;
+ struct irq_domain *domain;
++ int ret;
+
+ pr_info("Initializing J-Core AIC\n");
+
+@@ -100,6 +101,12 @@ static int __init aic_irq_of_init(struct device_node *node,
+ jcore_aic.irq_unmask = noop;
+ jcore_aic.name = "AIC";
+
++ ret = irq_alloc_descs(-1, min_irq, dom_sz - min_irq,
++ of_node_to_nid(node));
++
++ if (ret < 0)
++ return ret;
++
+ domain = irq_domain_add_legacy(node, dom_sz - min_irq, min_irq, min_irq,
+ &jcore_aic_irqdomain_ops,
+ &jcore_aic);
+diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
+index 54c7092cc61d9..c654fe22fcf33 100644
+--- a/drivers/irqchip/irq-mips-gic.c
++++ b/drivers/irqchip/irq-mips-gic.c
+@@ -49,15 +49,17 @@ void __iomem *mips_gic_base;
+
+ static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[GIC_MAX_LONGS], pcpu_masks);
+
+-static DEFINE_SPINLOCK(gic_lock);
++static DEFINE_RAW_SPINLOCK(gic_lock);
+ static struct irq_domain *gic_irq_domain;
+-static struct irq_domain *gic_ipi_domain;
+ static int gic_shared_intrs;
+ static unsigned int gic_cpu_pin;
+ static unsigned int timer_cpu_pin;
+ static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller;
++
++#ifdef CONFIG_GENERIC_IRQ_IPI
+ static DECLARE_BITMAP(ipi_resrv, GIC_MAX_INTRS);
+ static DECLARE_BITMAP(ipi_available, GIC_MAX_INTRS);
++#endif /* CONFIG_GENERIC_IRQ_IPI */
+
+ static struct gic_all_vpes_chip_data {
+ u32 map;
+@@ -208,7 +210,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
+
+ irq = GIC_HWIRQ_TO_SHARED(d->hwirq);
+
+- spin_lock_irqsave(&gic_lock, flags);
++ raw_spin_lock_irqsave(&gic_lock, flags);
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_EDGE_FALLING:
+ pol = GIC_POL_FALLING_EDGE;
+@@ -248,7 +250,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
+ else
+ irq_set_chip_handler_name_locked(d, &gic_level_irq_controller,
+ handle_level_irq, NULL);
+- spin_unlock_irqrestore(&gic_lock, flags);
++ raw_spin_unlock_irqrestore(&gic_lock, flags);
+
+ return 0;
+ }
+@@ -266,7 +268,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
+ return -EINVAL;
+
+ /* Assumption : cpumask refers to a single CPU */
+- spin_lock_irqsave(&gic_lock, flags);
++ raw_spin_lock_irqsave(&gic_lock, flags);
+
+ /* Re-route this IRQ */
+ write_gic_map_vp(irq, BIT(mips_cm_vp_id(cpu)));
+@@ -277,7 +279,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
+ set_bit(irq, per_cpu_ptr(pcpu_masks, cpu));
+
+ irq_data_update_effective_affinity(d, cpumask_of(cpu));
+- spin_unlock_irqrestore(&gic_lock, flags);
++ raw_spin_unlock_irqrestore(&gic_lock, flags);
+
+ return IRQ_SET_MASK_OK;
+ }
+@@ -355,12 +357,12 @@ static void gic_mask_local_irq_all_vpes(struct irq_data *d)
+ cd = irq_data_get_irq_chip_data(d);
+ cd->mask = false;
+
+- spin_lock_irqsave(&gic_lock, flags);
++ raw_spin_lock_irqsave(&gic_lock, flags);
+ for_each_online_cpu(cpu) {
+ write_gic_vl_other(mips_cm_vp_id(cpu));
+ write_gic_vo_rmask(BIT(intr));
+ }
+- spin_unlock_irqrestore(&gic_lock, flags);
++ raw_spin_unlock_irqrestore(&gic_lock, flags);
+ }
+
+ static void gic_unmask_local_irq_all_vpes(struct irq_data *d)
+@@ -373,32 +375,45 @@ static void gic_unmask_local_irq_all_vpes(struct irq_data *d)
+ cd = irq_data_get_irq_chip_data(d);
+ cd->mask = true;
+
+- spin_lock_irqsave(&gic_lock, flags);
++ raw_spin_lock_irqsave(&gic_lock, flags);
+ for_each_online_cpu(cpu) {
+ write_gic_vl_other(mips_cm_vp_id(cpu));
+ write_gic_vo_smask(BIT(intr));
+ }
+- spin_unlock_irqrestore(&gic_lock, flags);
++ raw_spin_unlock_irqrestore(&gic_lock, flags);
+ }
+
+-static void gic_all_vpes_irq_cpu_online(struct irq_data *d)
++static void gic_all_vpes_irq_cpu_online(void)
+ {
+- struct gic_all_vpes_chip_data *cd;
+- unsigned int intr;
++ static const unsigned int local_intrs[] = {
++ GIC_LOCAL_INT_TIMER,
++ GIC_LOCAL_INT_PERFCTR,
++ GIC_LOCAL_INT_FDC,
++ };
++ unsigned long flags;
++ int i;
+
+- intr = GIC_HWIRQ_TO_LOCAL(d->hwirq);
+- cd = irq_data_get_irq_chip_data(d);
++ raw_spin_lock_irqsave(&gic_lock, flags);
++
++ for (i = 0; i < ARRAY_SIZE(local_intrs); i++) {
++ unsigned int intr = local_intrs[i];
++ struct gic_all_vpes_chip_data *cd;
++
++ if (!gic_local_irq_is_routable(intr))
++ continue;
++ cd = &gic_all_vpes_chip_data[intr];
++ write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
++ if (cd->mask)
++ write_gic_vl_smask(BIT(intr));
++ }
+
+- write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
+- if (cd->mask)
+- write_gic_vl_smask(BIT(intr));
++ raw_spin_unlock_irqrestore(&gic_lock, flags);
+ }
+
+ static struct irq_chip gic_all_vpes_local_irq_controller = {
+ .name = "MIPS GIC Local",
+ .irq_mask = gic_mask_local_irq_all_vpes,
+ .irq_unmask = gic_unmask_local_irq_all_vpes,
+- .irq_cpu_online = gic_all_vpes_irq_cpu_online,
+ };
+
+ static void __gic_irq_dispatch(void)
+@@ -422,11 +437,11 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
+
+ data = irq_get_irq_data(virq);
+
+- spin_lock_irqsave(&gic_lock, flags);
++ raw_spin_lock_irqsave(&gic_lock, flags);
+ write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
+ write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
+ irq_data_update_effective_affinity(data, cpumask_of(cpu));
+- spin_unlock_irqrestore(&gic_lock, flags);
++ raw_spin_unlock_irqrestore(&gic_lock, flags);
+
+ return 0;
+ }
+@@ -460,9 +475,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ u32 map;
+
+ if (hwirq >= GIC_SHARED_HWIRQ_BASE) {
++#ifdef CONFIG_GENERIC_IRQ_IPI
+ /* verify that shared irqs don't conflict with an IPI irq */
+ if (test_bit(GIC_HWIRQ_TO_SHARED(hwirq), ipi_resrv))
+ return -EBUSY;
++#endif /* CONFIG_GENERIC_IRQ_IPI */
+
+ err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+ &gic_level_irq_controller,
+@@ -477,6 +494,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ intr = GIC_HWIRQ_TO_LOCAL(hwirq);
+ map = GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin;
+
++ /*
++ * If adding support for more per-cpu interrupts, keep the the
++ * array in gic_all_vpes_irq_cpu_online() in sync.
++ */
+ switch (intr) {
+ case GIC_LOCAL_INT_TIMER:
+ /* CONFIG_MIPS_CMP workaround (see __gic_init) */
+@@ -515,12 +536,12 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ if (!gic_local_irq_is_routable(intr))
+ return -EPERM;
+
+- spin_lock_irqsave(&gic_lock, flags);
++ raw_spin_lock_irqsave(&gic_lock, flags);
+ for_each_online_cpu(cpu) {
+ write_gic_vl_other(mips_cm_vp_id(cpu));
+ write_gic_vo_map(mips_gic_vx_map_reg(intr), map);
+ }
+- spin_unlock_irqrestore(&gic_lock, flags);
++ raw_spin_unlock_irqrestore(&gic_lock, flags);
+
+ return 0;
+ }
+@@ -551,6 +572,8 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
+ .map = gic_irq_domain_map,
+ };
+
++#ifdef CONFIG_GENERIC_IRQ_IPI
++
+ static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq,
+@@ -654,6 +677,48 @@ static const struct irq_domain_ops gic_ipi_domain_ops = {
+ .match = gic_ipi_domain_match,
+ };
+
++static int gic_register_ipi_domain(struct device_node *node)
++{
++ struct irq_domain *gic_ipi_domain;
++ unsigned int v[2], num_ipis;
++
++ gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
++ IRQ_DOMAIN_FLAG_IPI_PER_CPU,
++ GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
++ node, &gic_ipi_domain_ops, NULL);
++ if (!gic_ipi_domain) {
++ pr_err("Failed to add IPI domain");
++ return -ENXIO;
++ }
++
++ irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI);
++
++ if (node &&
++ !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) {
++ bitmap_set(ipi_resrv, v[0], v[1]);
++ } else {
++ /*
++ * Reserve 2 interrupts per possible CPU/VP for use as IPIs,
++ * meeting the requirements of arch/mips SMP.
++ */
++ num_ipis = 2 * num_possible_cpus();
++ bitmap_set(ipi_resrv, gic_shared_intrs - num_ipis, num_ipis);
++ }
++
++ bitmap_copy(ipi_available, ipi_resrv, GIC_MAX_INTRS);
++
++ return 0;
++}
++
++#else /* !CONFIG_GENERIC_IRQ_IPI */
++
++static inline int gic_register_ipi_domain(struct device_node *node)
++{
++ return 0;
++}
++
++#endif /* !CONFIG_GENERIC_IRQ_IPI */
++
+ static int gic_cpu_startup(unsigned int cpu)
+ {
+ /* Enable or disable EIC */
+@@ -663,8 +728,8 @@ static int gic_cpu_startup(unsigned int cpu)
+ /* Clear all local IRQ masks (ie. disable all local interrupts) */
+ write_gic_vl_rmask(~0);
+
+- /* Invoke irq_cpu_online callbacks to enable desired interrupts */
+- irq_cpu_online();
++ /* Enable desired interrupts */
++ gic_all_vpes_irq_cpu_online();
+
+ return 0;
+ }
+@@ -672,11 +737,12 @@ static int gic_cpu_startup(unsigned int cpu)
+ static int __init gic_of_init(struct device_node *node,
+ struct device_node *parent)
+ {
+- unsigned int cpu_vec, i, gicconfig, v[2], num_ipis;
++ unsigned int cpu_vec, i, gicconfig;
+ unsigned long reserved;
+ phys_addr_t gic_base;
+ struct resource res;
+ size_t gic_len;
++ int ret;
+
+ /* Find the first available CPU vector. */
+ i = 0;
+@@ -718,6 +784,10 @@ static int __init gic_of_init(struct device_node *node,
+ }
+
+ mips_gic_base = ioremap(gic_base, gic_len);
++ if (!mips_gic_base) {
++ pr_err("Failed to ioremap gic_base\n");
++ return -ENOMEM;
++ }
+
+ gicconfig = read_gic_config();
+ gic_shared_intrs = gicconfig & GIC_CONFIG_NUMINTERRUPTS;
+@@ -765,30 +835,9 @@ static int __init gic_of_init(struct device_node *node,
+ return -ENXIO;
+ }
+
+- gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
+- IRQ_DOMAIN_FLAG_IPI_PER_CPU,
+- GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
+- node, &gic_ipi_domain_ops, NULL);
+- if (!gic_ipi_domain) {
+- pr_err("Failed to add IPI domain");
+- return -ENXIO;
+- }
+-
+- irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI);
+-
+- if (node &&
+- !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) {
+- bitmap_set(ipi_resrv, v[0], v[1]);
+- } else {
+- /*
+- * Reserve 2 interrupts per possible CPU/VP for use as IPIs,
+- * meeting the requirements of arch/mips SMP.
+- */
+- num_ipis = 2 * num_possible_cpus();
+- bitmap_set(ipi_resrv, gic_shared_intrs - num_ipis, num_ipis);
+- }
+-
+- bitmap_copy(ipi_available, ipi_resrv, GIC_MAX_INTRS);
++ ret = gic_register_ipi_domain(node);
++ if (ret)
++ return ret;
+
+ board_bind_eic_interrupt = &gic_bind_eic_interrupt;
+
+diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c
+index fe88a782173dd..c43a345061d53 100644
+--- a/drivers/irqchip/irq-mvebu-gicp.c
++++ b/drivers/irqchip/irq-mvebu-gicp.c
+@@ -221,6 +221,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev)
+ }
+
+ parent_domain = irq_find_host(irq_parent_dn);
++ of_node_put(irq_parent_dn);
+ if (!parent_domain) {
+ dev_err(&pdev->dev, "failed to find parent IRQ domain\n");
+ return -ENODEV;
+diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
+index b31c4cff4d3a5..47b3b165479e8 100644
+--- a/drivers/irqchip/irq-nvic.c
++++ b/drivers/irqchip/irq-nvic.c
+@@ -26,7 +26,7 @@
+
+ #define NVIC_ISER 0x000
+ #define NVIC_ICER 0x080
+-#define NVIC_IPR 0x300
++#define NVIC_IPR 0x400
+
+ #define NVIC_MAX_BANKS 16
+ /*
+@@ -92,6 +92,7 @@ static int __init nvic_of_init(struct device_node *node,
+
+ if (!nvic_irq_domain) {
+ pr_warn("Failed to allocate irq domain\n");
++ iounmap(nvic_base);
+ return -ENOMEM;
+ }
+
+@@ -101,6 +102,7 @@ static int __init nvic_of_init(struct device_node *node,
+ if (ret) {
+ pr_warn("Failed to allocate irq chips\n");
+ irq_domain_remove(nvic_irq_domain);
++ iounmap(nvic_base);
+ return ret;
+ }
+
+diff --git a/drivers/irqchip/irq-or1k-pic.c b/drivers/irqchip/irq-or1k-pic.c
+index 03d2366118dd4..d5f1fabc45d79 100644
+--- a/drivers/irqchip/irq-or1k-pic.c
++++ b/drivers/irqchip/irq-or1k-pic.c
+@@ -66,7 +66,6 @@ static struct or1k_pic_dev or1k_pic_level = {
+ .name = "or1k-PIC-level",
+ .irq_unmask = or1k_pic_unmask,
+ .irq_mask = or1k_pic_mask,
+- .irq_mask_ack = or1k_pic_mask_ack,
+ },
+ .handle = handle_level_irq,
+ .flags = IRQ_LEVEL | IRQ_NOPROBE,
+diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c
+index fd9f275592d29..56bf502d9c673 100644
+--- a/drivers/irqchip/irq-realtek-rtl.c
++++ b/drivers/irqchip/irq-realtek-rtl.c
+@@ -62,7 +62,7 @@ static struct irq_chip realtek_ictl_irq = {
+
+ static int intc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
+ {
+- irq_set_chip_and_handler(hw, &realtek_ictl_irq, handle_level_irq);
++ irq_set_chip_and_handler(irq, &realtek_ictl_irq, handle_level_irq);
+
+ return 0;
+ }
+@@ -76,16 +76,20 @@ static void realtek_irq_dispatch(struct irq_desc *desc)
+ {
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irq_domain *domain;
+- unsigned int pending;
++ unsigned long pending;
++ unsigned int soc_int;
+
+ chained_irq_enter(chip, desc);
+ pending = readl(REG(RTL_ICTL_GIMR)) & readl(REG(RTL_ICTL_GISR));
++
+ if (unlikely(!pending)) {
+ spurious_interrupt();
+ goto out;
+ }
++
+ domain = irq_desc_get_handler_data(desc);
+- generic_handle_domain_irq(domain, __ffs(pending));
++ for_each_set_bit(soc_int, &pending, 32)
++ generic_handle_domain_irq(domain, soc_int);
+
+ out:
+ chained_irq_exit(chip, desc);
+@@ -95,7 +99,8 @@ out:
+ * SoC interrupts are cascaded to MIPS CPU interrupts according to the
+ * interrupt-map in the device tree. Each SoC interrupt gets 4 bits for
+ * the CPU interrupt in an Interrupt Routing Register. Max 32 SoC interrupts
+- * thus go into 4 IRRs.
++ * thus go into 4 IRRs. A routing value of '0' means the interrupt is left
++ * disconnected. Routing values {1..15} connect to output lines {0..14}.
+ */
+ static int __init map_interrupts(struct device_node *node, struct irq_domain *domain)
+ {
+@@ -129,12 +134,12 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do
+ if (!cpu_ictl)
+ return -EINVAL;
+ ret = of_property_read_u32(cpu_ictl, "#interrupt-cells", &tmp);
++ of_node_put(cpu_ictl);
+ if (ret || tmp != 1)
+ return -EINVAL;
+- of_node_put(cpu_ictl);
+
+ cpu_int = be32_to_cpup(imap + 2);
+- if (cpu_int > 7)
++ if (cpu_int > 7 || cpu_int < 2)
+ return -EINVAL;
+
+ if (!(mips_irqs_set & BIT(cpu_int))) {
+@@ -143,7 +148,8 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do
+ mips_irqs_set |= BIT(cpu_int);
+ }
+
+- regs[(soc_int * 4) / 32] |= cpu_int << (soc_int * 4) % 32;
++ /* Use routing values (1..6) for CPU interrupts (2..7) */
++ regs[(soc_int * 4) / 32] |= (cpu_int - 1) << (soc_int * 4) % 32;
+ imap += 3;
+ }
+
+diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
+index cf74cfa820453..09cc98266d30f 100644
+--- a/drivers/irqchip/irq-sifive-plic.c
++++ b/drivers/irqchip/irq-sifive-plic.c
+@@ -163,7 +163,13 @@ static void plic_irq_eoi(struct irq_data *d)
+ {
+ struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+
+- writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
++ if (irqd_irq_masked(d)) {
++ plic_irq_unmask(d);
++ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
++ plic_irq_mask(d);
++ } else {
++ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
++ }
+ }
+
+ static struct irq_chip plic_chip = {
+@@ -392,3 +398,4 @@ out_free_priv:
+
+ IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
+ IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
++IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_init); /* for firmware driver */
+diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c
+index abd011fcecf4a..c7db617e1a2f6 100644
+--- a/drivers/irqchip/irq-sni-exiu.c
++++ b/drivers/irqchip/irq-sni-exiu.c
+@@ -37,11 +37,26 @@ struct exiu_irq_data {
+ u32 spi_base;
+ };
+
+-static void exiu_irq_eoi(struct irq_data *d)
++static void exiu_irq_ack(struct irq_data *d)
+ {
+ struct exiu_irq_data *data = irq_data_get_irq_chip_data(d);
+
+ writel(BIT(d->hwirq), data->base + EIREQCLR);
++}
++
++static void exiu_irq_eoi(struct irq_data *d)
++{
++ struct exiu_irq_data *data = irq_data_get_irq_chip_data(d);
++
++ /*
++ * Level triggered interrupts are latched and must be cleared during
++ * EOI or the interrupt will be jammed on. Of course if a level
++ * triggered interrupt is still asserted then the write will not clear
++ * the interrupt.
++ */
++ if (irqd_is_level_type(d))
++ writel(BIT(d->hwirq), data->base + EIREQCLR);
++
+ irq_chip_eoi_parent(d);
+ }
+
+@@ -91,10 +106,13 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type)
+ writel_relaxed(val, data->base + EILVL);
+
+ val = readl_relaxed(data->base + EIEDG);
+- if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH)
++ if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) {
+ val &= ~BIT(d->hwirq);
+- else
++ irq_set_handler_locked(d, handle_fasteoi_irq);
++ } else {
+ val |= BIT(d->hwirq);
++ irq_set_handler_locked(d, handle_fasteoi_ack_irq);
++ }
+ writel_relaxed(val, data->base + EIEDG);
+
+ writel_relaxed(BIT(d->hwirq), data->base + EIREQCLR);
+@@ -104,6 +122,7 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type)
+
+ static struct irq_chip exiu_irq_chip = {
+ .name = "EXIU",
++ .irq_ack = exiu_irq_ack,
+ .irq_eoi = exiu_irq_eoi,
+ .irq_enable = exiu_irq_enable,
+ .irq_mask = exiu_irq_mask,
+diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c
+index e1f771c72fc4c..ad3e2c1b3c87b 100644
+--- a/drivers/irqchip/irq-tegra.c
++++ b/drivers/irqchip/irq-tegra.c
+@@ -148,10 +148,10 @@ static int tegra_ictlr_suspend(void)
+ lic->cop_iep[i] = readl_relaxed(ictlr + ICTLR_COP_IEP_CLASS);
+
+ /* Disable COP interrupts */
+- writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
++ writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_COP_IER_CLR);
+
+ /* Disable CPU interrupts */
+- writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
++ writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_CPU_IER_CLR);
+
+ /* Enable the wakeup sources of ictlr */
+ writel_relaxed(lic->ictlr_wake_mask[i], ictlr + ICTLR_CPU_IER_SET);
+@@ -172,12 +172,12 @@ static void tegra_ictlr_resume(void)
+
+ writel_relaxed(lic->cpu_iep[i],
+ ictlr + ICTLR_CPU_IEP_CLASS);
+- writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR);
++ writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_CPU_IER_CLR);
+ writel_relaxed(lic->cpu_ier[i],
+ ictlr + ICTLR_CPU_IER_SET);
+ writel_relaxed(lic->cop_iep[i],
+ ictlr + ICTLR_COP_IEP_CLASS);
+- writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR);
++ writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_COP_IER_CLR);
+ writel_relaxed(lic->cop_ier[i],
+ ictlr + ICTLR_COP_IER_SET);
+ }
+@@ -312,7 +312,7 @@ static int __init tegra_ictlr_init(struct device_node *node,
+ lic->base[i] = base;
+
+ /* Disable all interrupts */
+- writel_relaxed(~0UL, base + ICTLR_CPU_IER_CLR);
++ writel_relaxed(GENMASK(31, 0), base + ICTLR_CPU_IER_CLR);
+ /* All interrupts target IRQ */
+ writel_relaxed(0, base + ICTLR_CPU_IEP_CLASS);
+
+diff --git a/drivers/irqchip/irq-ti-sci-intr.c b/drivers/irqchip/irq-ti-sci-intr.c
+index fe8fad22bcf96..020ddf29efb80 100644
+--- a/drivers/irqchip/irq-ti-sci-intr.c
++++ b/drivers/irqchip/irq-ti-sci-intr.c
+@@ -236,6 +236,7 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev)
+ }
+
+ parent_domain = irq_find_host(parent_node);
++ of_node_put(parent_node);
+ if (!parent_domain) {
+ dev_err(dev, "Failed to find IRQ parent domain\n");
+ return -ENODEV;
+diff --git a/drivers/irqchip/irq-wpcm450-aic.c b/drivers/irqchip/irq-wpcm450-aic.c
+index f3ac392d5bc87..36d0d0cf3fa25 100644
+--- a/drivers/irqchip/irq-wpcm450-aic.c
++++ b/drivers/irqchip/irq-wpcm450-aic.c
+@@ -146,6 +146,7 @@ static int __init wpcm450_aic_of_init(struct device_node *node,
+ aic->regs = of_iomap(node, 0);
+ if (!aic->regs) {
+ pr_err("Failed to map WPCM450 AIC registers\n");
++ kfree(aic);
+ return -ENOMEM;
+ }
+
+diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c
+index 27933338f7b36..8c581c985aa7d 100644
+--- a/drivers/irqchip/irq-xtensa-mx.c
++++ b/drivers/irqchip/irq-xtensa-mx.c
+@@ -151,14 +151,25 @@ static struct irq_chip xtensa_mx_irq_chip = {
+ .irq_set_affinity = xtensa_mx_irq_set_affinity,
+ };
+
++static void __init xtensa_mx_init_common(struct irq_domain *root_domain)
++{
++ unsigned int i;
++
++ irq_set_default_host(root_domain);
++ secondary_init_irq();
++
++ /* Initialize default IRQ routing to CPU 0 */
++ for (i = 0; i < XCHAL_NUM_EXTINTERRUPTS; ++i)
++ set_er(1, MIROUT(i));
++}
++
+ int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
+ {
+ struct irq_domain *root_domain =
+ irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
+ &xtensa_mx_irq_domain_ops,
+ &xtensa_mx_irq_chip);
+- irq_set_default_host(root_domain);
+- secondary_init_irq();
++ xtensa_mx_init_common(root_domain);
+ return 0;
+ }
+
+@@ -168,8 +179,7 @@ static int __init xtensa_mx_init(struct device_node *np,
+ struct irq_domain *root_domain =
+ irq_domain_add_linear(np, NR_IRQS, &xtensa_mx_irq_domain_ops,
+ &xtensa_mx_irq_chip);
+- irq_set_default_host(root_domain);
+- secondary_init_irq();
++ xtensa_mx_init_common(root_domain);
+ return 0;
+ }
+ IRQCHIP_DECLARE(xtensa_mx_irq_chip, "cdns,xtensa-mx", xtensa_mx_init);
+diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
+index 3570f0a588c4b..7899607fbee8d 100644
+--- a/drivers/irqchip/irqchip.c
++++ b/drivers/irqchip/irqchip.c
+@@ -38,8 +38,10 @@ int platform_irqchip_probe(struct platform_device *pdev)
+ struct device_node *par_np = of_irq_find_parent(np);
+ of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev);
+
+- if (!irq_init_cb)
++ if (!irq_init_cb) {
++ of_node_put(par_np);
+ return -EINVAL;
++ }
+
+ if (par_np == np)
+ par_np = NULL;
+@@ -52,8 +54,10 @@ int platform_irqchip_probe(struct platform_device *pdev)
+ * interrupt controller. The actual initialization callback of this
+ * interrupt controller can check for specific domains as necessary.
+ */
+- if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY))
++ if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) {
++ of_node_put(par_np);
+ return -EPROBE_DEFER;
++ }
+
+ return irq_init_cb(np, par_np);
+ }
+diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
+index 173e6520e06ec..c0b457f26ec41 100644
+--- a/drivers/irqchip/qcom-pdc.c
++++ b/drivers/irqchip/qcom-pdc.c
+@@ -56,17 +56,18 @@ static u32 pdc_reg_read(int reg, u32 i)
+ static void pdc_enable_intr(struct irq_data *d, bool on)
+ {
+ int pin_out = d->hwirq;
++ unsigned long flags;
+ u32 index, mask;
+ u32 enable;
+
+ index = pin_out / 32;
+ mask = pin_out % 32;
+
+- raw_spin_lock(&pdc_lock);
++ raw_spin_lock_irqsave(&pdc_lock, flags);
+ enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
+ enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
+ pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
+- raw_spin_unlock(&pdc_lock);
++ raw_spin_unlock_irqrestore(&pdc_lock, flags);
+ }
+
+ static void qcom_pdc_gic_disable(struct irq_data *d)
+diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
+index 4f7eaa17fb274..e840609c50eb7 100644
+--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
++++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
+@@ -3217,6 +3217,7 @@ static int
+ hfcm_l1callback(struct dchannel *dch, u_int cmd)
+ {
+ struct hfc_multi *hc = dch->hw;
++ struct sk_buff_head free_queue;
+ u_long flags;
+
+ switch (cmd) {
+@@ -3245,6 +3246,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd)
+ l1_event(dch->l1, HW_POWERUP_IND);
+ break;
+ case HW_DEACT_REQ:
++ __skb_queue_head_init(&free_queue);
+ /* start deactivation */
+ spin_lock_irqsave(&hc->lock, flags);
+ if (hc->ctype == HFC_TYPE_E1) {
+@@ -3264,20 +3266,21 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd)
+ plxsd_checksync(hc, 0);
+ }
+ }
+- skb_queue_purge(&dch->squeue);
++ skb_queue_splice_init(&dch->squeue, &free_queue);
+ if (dch->tx_skb) {
+- dev_kfree_skb(dch->tx_skb);
++ __skb_queue_tail(&free_queue, dch->tx_skb);
+ dch->tx_skb = NULL;
+ }
+ dch->tx_idx = 0;
+ if (dch->rx_skb) {
+- dev_kfree_skb(dch->rx_skb);
++ __skb_queue_tail(&free_queue, dch->rx_skb);
+ dch->rx_skb = NULL;
+ }
+ test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
+ if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
+ del_timer(&dch->timer);
+ spin_unlock_irqrestore(&hc->lock, flags);
++ __skb_queue_purge(&free_queue);
+ break;
+ case HW_POWERUP_REQ:
+ spin_lock_irqsave(&hc->lock, flags);
+@@ -3384,6 +3387,9 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
+ case PH_DEACTIVATE_REQ:
+ test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
+ if (dch->dev.D.protocol != ISDN_P_TE_S0) {
++ struct sk_buff_head free_queue;
++
++ __skb_queue_head_init(&free_queue);
+ spin_lock_irqsave(&hc->lock, flags);
+ if (debug & DEBUG_HFCMULTI_MSG)
+ printk(KERN_DEBUG
+@@ -3405,14 +3411,14 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
+ /* deactivate */
+ dch->state = 1;
+ }
+- skb_queue_purge(&dch->squeue);
++ skb_queue_splice_init(&dch->squeue, &free_queue);
+ if (dch->tx_skb) {
+- dev_kfree_skb(dch->tx_skb);
++ __skb_queue_tail(&free_queue, dch->tx_skb);
+ dch->tx_skb = NULL;
+ }
+ dch->tx_idx = 0;
+ if (dch->rx_skb) {
+- dev_kfree_skb(dch->rx_skb);
++ __skb_queue_tail(&free_queue, dch->rx_skb);
+ dch->rx_skb = NULL;
+ }
+ test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
+@@ -3424,6 +3430,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
+ #endif
+ ret = 0;
+ spin_unlock_irqrestore(&hc->lock, flags);
++ __skb_queue_purge(&free_queue);
+ } else
+ ret = l1_event(dch->l1, hh->prim);
+ break;
+diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
+index bd087cca1c1d2..d6cf01c32a33d 100644
+--- a/drivers/isdn/hardware/mISDN/hfcpci.c
++++ b/drivers/isdn/hardware/mISDN/hfcpci.c
+@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+ *z1t = cpu_to_le16(new_z1); /* now send data */
+ if (bch->tx_idx < bch->tx_skb->len)
+ return;
+- dev_kfree_skb(bch->tx_skb);
++ dev_kfree_skb_any(bch->tx_skb);
+ if (get_next_bframe(bch))
+ goto next_t_frame;
+ return;
+@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+ }
+ bz->za[new_f1].z1 = cpu_to_le16(new_z1); /* for next buffer */
+ bz->f1 = new_f1; /* next frame */
+- dev_kfree_skb(bch->tx_skb);
++ dev_kfree_skb_any(bch->tx_skb);
+ get_next_bframe(bch);
+ }
+
+@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch)
+ if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
+ hfcpci_fill_fifo(bch);
+ else {
+- dev_kfree_skb(bch->tx_skb);
++ dev_kfree_skb_any(bch->tx_skb);
+ if (get_next_bframe(bch))
+ hfcpci_fill_fifo(bch);
+ }
+@@ -1617,16 +1617,19 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
+ test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
+ spin_lock_irqsave(&hc->lock, flags);
+ if (hc->hw.protocol == ISDN_P_NT_S0) {
++ struct sk_buff_head free_queue;
++
++ __skb_queue_head_init(&free_queue);
+ /* prepare deactivation */
+ Write_hfc(hc, HFCPCI_STATES, 0x40);
+- skb_queue_purge(&dch->squeue);
++ skb_queue_splice_init(&dch->squeue, &free_queue);
+ if (dch->tx_skb) {
+- dev_kfree_skb(dch->tx_skb);
++ __skb_queue_tail(&free_queue, dch->tx_skb);
+ dch->tx_skb = NULL;
+ }
+ dch->tx_idx = 0;
+ if (dch->rx_skb) {
+- dev_kfree_skb(dch->rx_skb);
++ __skb_queue_tail(&free_queue, dch->rx_skb);
+ dch->rx_skb = NULL;
+ }
+ test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
+@@ -1639,10 +1642,12 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
+ hc->hw.mst_m &= ~HFCPCI_MASTER;
+ Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
+ ret = 0;
++ spin_unlock_irqrestore(&hc->lock, flags);
++ __skb_queue_purge(&free_queue);
+ } else {
+ ret = l1_event(dch->l1, hh->prim);
++ spin_unlock_irqrestore(&hc->lock, flags);
+ }
+- spin_unlock_irqrestore(&hc->lock, flags);
+ break;
+ }
+ if (!ret)
+@@ -2005,7 +2010,11 @@ setup_hw(struct hfc_pci *hc)
+ }
+ /* Allocate memory for FIFOS */
+ /* the memory needs to be on a 32k boundary within the first 4G */
+- dma_set_mask(&hc->pdev->dev, 0xFFFF8000);
++ if (dma_set_mask(&hc->pdev->dev, 0xFFFF8000)) {
++ printk(KERN_WARNING
++ "HFC-PCI: No usable DMA configuration!\n");
++ return -EIO;
++ }
+ buffer = dma_alloc_coherent(&hc->pdev->dev, 0x8000, &hc->hw.dmahandle,
+ GFP_KERNEL);
+ /* We silently assume the address is okay if nonzero */
+@@ -2268,7 +2277,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+ return 0;
+
+ if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
+- spin_lock(&hc->lock);
++ spin_lock_irq(&hc->lock);
+ bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
+ if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
+ main_rec_hfcpci(bch);
+@@ -2279,7 +2288,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+ main_rec_hfcpci(bch);
+ tx_birq(bch);
+ }
+- spin_unlock(&hc->lock);
++ spin_unlock_irq(&hc->lock);
+ }
+ return 0;
+ }
+diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
+index cd5642cef01fd..e8b37bd5e34a3 100644
+--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
++++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
+@@ -326,20 +326,24 @@ hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
+ test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
+
+ if (hw->protocol == ISDN_P_NT_S0) {
++ struct sk_buff_head free_queue;
++
++ __skb_queue_head_init(&free_queue);
+ hfcsusb_ph_command(hw, HFC_L1_DEACTIVATE_NT);
+ spin_lock_irqsave(&hw->lock, flags);
+- skb_queue_purge(&dch->squeue);
++ skb_queue_splice_init(&dch->squeue, &free_queue);
+ if (dch->tx_skb) {
+- dev_kfree_skb(dch->tx_skb);
++ __skb_queue_tail(&free_queue, dch->tx_skb);
+ dch->tx_skb = NULL;
+ }
+ dch->tx_idx = 0;
+ if (dch->rx_skb) {
+- dev_kfree_skb(dch->rx_skb);
++ __skb_queue_tail(&free_queue, dch->rx_skb);
+ dch->rx_skb = NULL;
+ }
+ test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
+ spin_unlock_irqrestore(&hw->lock, flags);
++ __skb_queue_purge(&free_queue);
+ #ifdef FIXME
+ if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags))
+ dchannel_sched_event(&hc->dch, D_CLEARBUSY);
+@@ -1330,7 +1334,7 @@ tx_iso_complete(struct urb *urb)
+ printk("\n");
+ }
+
+- dev_kfree_skb(tx_skb);
++ dev_consume_skb_irq(tx_skb);
+ tx_skb = NULL;
+ if (fifo->dch && get_next_dframe(fifo->dch))
+ tx_skb = fifo->dch->tx_skb;
+diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
+index a52f275f82634..f8447135a9022 100644
+--- a/drivers/isdn/hardware/mISDN/netjet.c
++++ b/drivers/isdn/hardware/mISDN/netjet.c
+@@ -956,7 +956,7 @@ nj_release(struct tiger_hw *card)
+ }
+ if (card->irq > 0)
+ free_irq(card->irq, card);
+- if (card->isac.dch.dev.dev.class)
++ if (device_is_registered(&card->isac.dch.dev.dev))
+ mISDN_unregister_device(&card->isac.dch.dev);
+
+ for (i = 0; i < 2; i++) {
+diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c
+index 55891e4204460..90ee56d07a6e9 100644
+--- a/drivers/isdn/mISDN/core.c
++++ b/drivers/isdn/mISDN/core.c
+@@ -222,7 +222,7 @@ mISDN_register_device(struct mISDNdevice *dev,
+
+ err = get_free_devid();
+ if (err < 0)
+- goto error1;
++ return err;
+ dev->id = err;
+
+ device_initialize(&dev->dev);
+@@ -233,11 +233,12 @@ mISDN_register_device(struct mISDNdevice *dev,
+ if (debug & DEBUG_CORE)
+ printk(KERN_DEBUG "mISDN_register %s %d\n",
+ dev_name(&dev->dev), dev->id);
++ dev->dev.class = &mISDN_class;
++
+ err = create_stack(dev);
+ if (err)
+ goto error1;
+
+- dev->dev.class = &mISDN_class;
+ dev->dev.platform_data = dev;
+ dev->dev.parent = parent;
+ dev_set_drvdata(&dev->dev, dev);
+@@ -249,8 +250,8 @@ mISDN_register_device(struct mISDNdevice *dev,
+
+ error3:
+ delete_stack(dev);
+- return err;
+ error1:
++ put_device(&dev->dev);
+ return err;
+
+ }
+@@ -381,7 +382,7 @@ mISDNInit(void)
+ err = mISDN_inittimer(&debug);
+ if (err)
+ goto error2;
+- err = l1_init(&debug);
++ err = Isdnl1_Init(&debug);
+ if (err)
+ goto error3;
+ err = Isdnl2_Init(&debug);
+@@ -395,7 +396,7 @@ mISDNInit(void)
+ error5:
+ Isdnl2_cleanup();
+ error4:
+- l1_cleanup();
++ Isdnl1_cleanup();
+ error3:
+ mISDN_timer_cleanup();
+ error2:
+@@ -408,7 +409,7 @@ static void mISDN_cleanup(void)
+ {
+ misdn_sock_cleanup();
+ Isdnl2_cleanup();
+- l1_cleanup();
++ Isdnl1_cleanup();
+ mISDN_timer_cleanup();
+ class_unregister(&mISDN_class);
+
+diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h
+index 23b44d3033279..42599f49c189d 100644
+--- a/drivers/isdn/mISDN/core.h
++++ b/drivers/isdn/mISDN/core.h
+@@ -60,8 +60,8 @@ struct Bprotocol *get_Bprotocol4id(u_int);
+ extern int mISDN_inittimer(u_int *);
+ extern void mISDN_timer_cleanup(void);
+
+-extern int l1_init(u_int *);
+-extern void l1_cleanup(void);
++extern int Isdnl1_Init(u_int *);
++extern void Isdnl1_cleanup(void);
+ extern int Isdnl2_Init(u_int *);
+ extern void Isdnl2_cleanup(void);
+
+diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h
+index fa09d511a8eda..baf31258f5c90 100644
+--- a/drivers/isdn/mISDN/dsp.h
++++ b/drivers/isdn/mISDN/dsp.h
+@@ -247,7 +247,7 @@ extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
+ extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
+ extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
+ extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
+-extern void dsp_cmx_send(void *arg);
++extern void dsp_cmx_send(struct timer_list *arg);
+ extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
+ extern int dsp_cmx_del_conf_member(struct dsp *dsp);
+ extern int dsp_cmx_del_conf(struct dsp_conf *conf);
+diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
+index 6d2088fbaf69c..1b73af5013976 100644
+--- a/drivers/isdn/mISDN/dsp_cmx.c
++++ b/drivers/isdn/mISDN/dsp_cmx.c
+@@ -1625,7 +1625,7 @@ static u16 dsp_count; /* last sample count */
+ static int dsp_count_valid; /* if we have last sample count */
+
+ void
+-dsp_cmx_send(void *arg)
++dsp_cmx_send(struct timer_list *arg)
+ {
+ struct dsp_conf *conf;
+ struct dsp_conf_member *member;
+diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
+index 386084530c2f8..fae95f1666883 100644
+--- a/drivers/isdn/mISDN/dsp_core.c
++++ b/drivers/isdn/mISDN/dsp_core.c
+@@ -1195,7 +1195,7 @@ static int __init dsp_init(void)
+ }
+
+ /* set sample timer */
+- timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0);
++ timer_setup(&dsp_spl_tl, dsp_cmx_send, 0);
+ dsp_spl_tl.expires = jiffies + dsp_tics;
+ dsp_spl_jiffies = dsp_spl_tl.expires;
+ add_timer(&dsp_spl_tl);
+diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
+index e11ca6bbc7f41..cfbcd9e973c2e 100644
+--- a/drivers/isdn/mISDN/dsp_pipeline.c
++++ b/drivers/isdn/mISDN/dsp_pipeline.c
+@@ -77,6 +77,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem)
+ if (!entry)
+ return -ENOMEM;
+
++ INIT_LIST_HEAD(&entry->list);
+ entry->elem = elem;
+
+ entry->dev.class = elements_class;
+@@ -107,7 +108,7 @@ err2:
+ device_unregister(&entry->dev);
+ return ret;
+ err1:
+- kfree(entry);
++ put_device(&entry->dev);
+ return ret;
+ }
+ EXPORT_SYMBOL(mISDN_dsp_element_register);
+@@ -192,7 +193,7 @@ void dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
+ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
+ {
+ int found = 0;
+- char *dup, *tok, *name, *args;
++ char *dup, *next, *tok, *name, *args;
+ struct dsp_element_entry *entry, *n;
+ struct dsp_pipeline_entry *pipeline_entry;
+ struct mISDN_dsp_element *elem;
+@@ -203,10 +204,10 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
+ if (!list_empty(&pipeline->list))
+ _dsp_pipeline_destroy(pipeline);
+
+- dup = kstrdup(cfg, GFP_ATOMIC);
++ dup = next = kstrdup(cfg, GFP_ATOMIC);
+ if (!dup)
+ return 0;
+- while ((tok = strsep(&dup, "|"))) {
++ while ((tok = strsep(&next, "|"))) {
+ if (!strlen(tok))
+ continue;
+ name = strsep(&tok, "(");
+diff --git a/drivers/isdn/mISDN/l1oip.h b/drivers/isdn/mISDN/l1oip.h
+index 7ea10db20e3a6..48133d0228120 100644
+--- a/drivers/isdn/mISDN/l1oip.h
++++ b/drivers/isdn/mISDN/l1oip.h
+@@ -59,6 +59,7 @@ struct l1oip {
+ int bundle; /* bundle channels in one frm */
+ int codec; /* codec to use for transmis. */
+ int limit; /* limit number of bchannels */
++ bool shutdown; /* if card is released */
+
+ /* timer */
+ struct timer_list keep_tl;
+diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
+index 2c40412466e6f..a77195e378b7b 100644
+--- a/drivers/isdn/mISDN/l1oip_core.c
++++ b/drivers/isdn/mISDN/l1oip_core.c
+@@ -275,7 +275,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask,
+ p = frame;
+
+ /* restart timer */
+- if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ))
++ if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ) && !hc->shutdown)
+ mod_timer(&hc->keep_tl, jiffies + L1OIP_KEEPALIVE * HZ);
+ else
+ hc->keep_tl.expires = jiffies + L1OIP_KEEPALIVE * HZ;
+@@ -601,7 +601,9 @@ multiframe:
+ goto multiframe;
+
+ /* restart timer */
+- if (time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || !hc->timeout_on) {
++ if ((time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) ||
++ !hc->timeout_on) &&
++ !hc->shutdown) {
+ hc->timeout_on = 1;
+ mod_timer(&hc->timeout_tl, jiffies + L1OIP_TIMEOUT * HZ);
+ } else /* only adjust timer */
+@@ -1232,11 +1234,10 @@ release_card(struct l1oip *hc)
+ {
+ int ch;
+
+- if (timer_pending(&hc->keep_tl))
+- del_timer(&hc->keep_tl);
++ hc->shutdown = true;
+
+- if (timer_pending(&hc->timeout_tl))
+- del_timer(&hc->timeout_tl);
++ del_timer_sync(&hc->keep_tl);
++ del_timer_sync(&hc->timeout_tl);
+
+ cancel_work_sync(&hc->workq);
+
+diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c
+index 98a3bc6c17009..7b31c25a550e3 100644
+--- a/drivers/isdn/mISDN/layer1.c
++++ b/drivers/isdn/mISDN/layer1.c
+@@ -398,7 +398,7 @@ create_l1(struct dchannel *dch, dchannel_l1callback *dcb) {
+ EXPORT_SYMBOL(create_l1);
+
+ int
+-l1_init(u_int *deb)
++Isdnl1_Init(u_int *deb)
+ {
+ debug = deb;
+ l1fsm_s.state_count = L1S_STATE_COUNT;
+@@ -409,7 +409,7 @@ l1_init(u_int *deb)
+ }
+
+ void
+-l1_cleanup(void)
++Isdnl1_cleanup(void)
+ {
+ mISDN_FsmFree(&l1fsm_s);
+ }
+diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
+index ed800f5da7d88..8bf545100fb04 100644
+--- a/drivers/leds/Kconfig
++++ b/drivers/leds/Kconfig
+@@ -821,7 +821,7 @@ config LEDS_SPI_BYTE
+ config LEDS_TI_LMU_COMMON
+ tristate "LED driver for TI LMU"
+ depends on LEDS_CLASS
+- depends on REGMAP
++ select REGMAP
+ help
+ Say Y to enable the LED driver for TI LMU devices.
+ This supports common features between the TI LM3532, LM3631, LM3632,
+diff --git a/drivers/leds/flash/leds-lm3601x.c b/drivers/leds/flash/leds-lm3601x.c
+index d0e1d4814042e..3d12727482017 100644
+--- a/drivers/leds/flash/leds-lm3601x.c
++++ b/drivers/leds/flash/leds-lm3601x.c
+@@ -444,8 +444,6 @@ static int lm3601x_remove(struct i2c_client *client)
+ {
+ struct lm3601x_led *led = i2c_get_clientdata(client);
+
+- mutex_destroy(&led->lock);
+-
+ return regmap_update_bits(led->regmap, LM3601X_ENABLE_REG,
+ LM3601X_ENABLE_MASK,
+ LM3601X_MODE_STANDBY);
+diff --git a/drivers/leds/led-class-multicolor.c b/drivers/leds/led-class-multicolor.c
+index e317408583df9..ec62a48116135 100644
+--- a/drivers/leds/led-class-multicolor.c
++++ b/drivers/leds/led-class-multicolor.c
+@@ -6,6 +6,7 @@
+ #include <linux/device.h>
+ #include <linux/init.h>
+ #include <linux/led-class-multicolor.h>
++#include <linux/math.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
+ #include <linux/uaccess.h>
+@@ -19,9 +20,10 @@ int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
+ int i;
+
+ for (i = 0; i < mcled_cdev->num_colors; i++)
+- mcled_cdev->subled_info[i].brightness = brightness *
+- mcled_cdev->subled_info[i].intensity /
+- led_cdev->max_brightness;
++ mcled_cdev->subled_info[i].brightness =
++ DIV_ROUND_CLOSEST(brightness *
++ mcled_cdev->subled_info[i].intensity,
++ led_cdev->max_brightness);
+
+ return 0;
+ }
+diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
+index f4bb02f6e0428..1024b1562aafc 100644
+--- a/drivers/leds/led-class.c
++++ b/drivers/leds/led-class.c
+@@ -235,14 +235,17 @@ struct led_classdev *of_led_get(struct device_node *np, int index)
+
+ led_dev = class_find_device_by_of_node(leds_class, led_node);
+ of_node_put(led_node);
++ put_device(led_dev);
+
+ if (!led_dev)
+ return ERR_PTR(-EPROBE_DEFER);
+
+ led_cdev = dev_get_drvdata(led_dev);
+
+- if (!try_module_get(led_cdev->dev->parent->driver->owner))
++ if (!try_module_get(led_cdev->dev->parent->driver->owner)) {
++ put_device(led_cdev->dev);
+ return ERR_PTR(-ENODEV);
++ }
+
+ return led_cdev;
+ }
+@@ -255,6 +258,7 @@ EXPORT_SYMBOL_GPL(of_led_get);
+ void led_put(struct led_classdev *led_cdev)
+ {
+ module_put(led_cdev->dev->parent->driver->owner);
++ put_device(led_cdev->dev);
+ }
+ EXPORT_SYMBOL_GPL(led_put);
+
+diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
+index d1657c46ee2f8..9fdfc1b9a1a0c 100644
+--- a/drivers/leds/leds-lp55xx-common.c
++++ b/drivers/leds/leds-lp55xx-common.c
+@@ -439,6 +439,8 @@ int lp55xx_init_device(struct lp55xx_chip *chip)
+ return -EINVAL;
+
+ if (pdata->enable_gpiod) {
++ gpiod_direction_output(pdata->enable_gpiod, 0);
++
+ gpiod_set_consumer_name(pdata->enable_gpiod, "LP55xx enable");
+ gpiod_set_value(pdata->enable_gpiod, 0);
+ usleep_range(1000, 2000); /* Keep enable down at least 1ms */
+@@ -694,7 +696,7 @@ struct lp55xx_platform_data *lp55xx_of_populate_pdata(struct device *dev,
+ of_property_read_u8(np, "clock-mode", &pdata->clock_mode);
+
+ pdata->enable_gpiod = devm_gpiod_get_optional(dev, "enable",
+- GPIOD_OUT_LOW);
++ GPIOD_ASIS);
+ if (IS_ERR(pdata->enable_gpiod))
+ return ERR_CAST(pdata->enable_gpiod);
+
+diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
+index 6832180c1c54f..cc892ecd52408 100644
+--- a/drivers/leds/leds-pwm.c
++++ b/drivers/leds/leds-pwm.c
+@@ -146,7 +146,7 @@ static int led_pwm_create_fwnode(struct device *dev, struct led_pwm_priv *priv)
+ led.name = to_of_node(fwnode)->name;
+
+ if (!led.name) {
+- ret = EINVAL;
++ ret = -EINVAL;
+ goto err_child_out;
+ }
+
+diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c
+index 225b765830bdc..caad9d3e0eac8 100644
+--- a/drivers/leds/leds-tca6507.c
++++ b/drivers/leds/leds-tca6507.c
+@@ -696,8 +696,9 @@ tca6507_led_dt_init(struct device *dev)
+ if (fwnode_property_read_string(child, "label", &led.name))
+ led.name = fwnode_get_name(child);
+
+- fwnode_property_read_string(child, "linux,default-trigger",
+- &led.default_trigger);
++ if (fwnode_property_read_string(child, "linux,default-trigger",
++ &led.default_trigger))
++ led.default_trigger = NULL;
+
+ led.flags = 0;
+ if (fwnode_property_match_string(child, "compatible",
+diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
+index d5e774d830215..f4d670ec30bcb 100644
+--- a/drivers/leds/trigger/ledtrig-netdev.c
++++ b/drivers/leds/trigger/ledtrig-netdev.c
+@@ -318,6 +318,9 @@ static int netdev_trig_notify(struct notifier_block *nb,
+ clear_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+ switch (evt) {
+ case NETDEV_CHANGENAME:
++ if (netif_carrier_ok(dev))
++ set_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
++ fallthrough;
+ case NETDEV_REGISTER:
+ if (trigger_data->net_dev)
+ dev_put(trigger_data->net_dev);
+diff --git a/drivers/leds/trigger/ledtrig-tty.c b/drivers/leds/trigger/ledtrig-tty.c
+index f62db7e520b52..8ae0d2d284aff 100644
+--- a/drivers/leds/trigger/ledtrig-tty.c
++++ b/drivers/leds/trigger/ledtrig-tty.c
+@@ -7,6 +7,8 @@
+ #include <linux/tty.h>
+ #include <uapi/linux/serial.h>
+
++#define LEDTRIG_TTY_INTERVAL 50
++
+ struct ledtrig_tty_data {
+ struct led_classdev *led_cdev;
+ struct delayed_work dwork;
+@@ -122,17 +124,19 @@ static void ledtrig_tty_work(struct work_struct *work)
+
+ if (icount.rx != trigger_data->rx ||
+ icount.tx != trigger_data->tx) {
+- led_set_brightness_sync(trigger_data->led_cdev, LED_ON);
++ unsigned long interval = LEDTRIG_TTY_INTERVAL;
++
++ led_blink_set_oneshot(trigger_data->led_cdev, &interval,
++ &interval, 0);
+
+ trigger_data->rx = icount.rx;
+ trigger_data->tx = icount.tx;
+- } else {
+- led_set_brightness_sync(trigger_data->led_cdev, LED_OFF);
+ }
+
+ out:
+ mutex_unlock(&trigger_data->mutex);
+- schedule_delayed_work(&trigger_data->dwork, msecs_to_jiffies(100));
++ schedule_delayed_work(&trigger_data->dwork,
++ msecs_to_jiffies(LEDTRIG_TTY_INTERVAL * 2));
+ }
+
+ static struct attribute *ledtrig_tty_attrs[] = {
+diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
+index 5cdc361da37cb..a0e717a986dcb 100644
+--- a/drivers/macintosh/Kconfig
++++ b/drivers/macintosh/Kconfig
+@@ -44,6 +44,7 @@ config ADB_IOP
+ config ADB_CUDA
+ bool "Support for Cuda/Egret based Macs and PowerMacs"
+ depends on (ADB || PPC_PMAC) && !PPC_PMAC64
++ select RTC_LIB
+ help
+ This provides support for Cuda/Egret based Macintosh and
+ Power Macintosh systems. This includes most m68k based Macs,
+@@ -57,6 +58,7 @@ config ADB_CUDA
+ config ADB_PMU
+ bool "Support for PMU based PowerMacs and PowerBooks"
+ depends on PPC_PMAC || MAC
++ select RTC_LIB
+ help
+ On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the
+ PMU is an embedded microprocessor whose primary function is to
+@@ -67,6 +69,10 @@ config ADB_PMU
+ this device; you should do so if your machine is one of those
+ mentioned above.
+
++config ADB_PMU_EVENT
++ def_bool y
++ depends on ADB_PMU && INPUT=y
++
+ config ADB_PMU_LED
+ bool "Support for the Power/iBook front LED"
+ depends on PPC_PMAC && ADB_PMU
+@@ -80,6 +86,7 @@ config ADB_PMU_LED
+
+ config ADB_PMU_LED_DISK
+ bool "Use front LED as DISK LED by default"
++ depends on ATA
+ depends on ADB_PMU_LED
+ depends on LEDS_CLASS
+ select LEDS_TRIGGERS
+diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile
+index 49819b1b6f201..712edcb3e0b08 100644
+--- a/drivers/macintosh/Makefile
++++ b/drivers/macintosh/Makefile
+@@ -12,7 +12,8 @@ obj-$(CONFIG_MAC_EMUMOUSEBTN) += mac_hid.o
+ obj-$(CONFIG_INPUT_ADBHID) += adbhid.o
+ obj-$(CONFIG_ANSLCD) += ans-lcd.o
+
+-obj-$(CONFIG_ADB_PMU) += via-pmu.o via-pmu-event.o
++obj-$(CONFIG_ADB_PMU) += via-pmu.o
++obj-$(CONFIG_ADB_PMU_EVENT) += via-pmu-event.o
+ obj-$(CONFIG_ADB_PMU_LED) += via-pmu-led.o
+ obj-$(CONFIG_PMAC_BACKLIGHT) += via-pmu-backlight.o
+ obj-$(CONFIG_ADB_CUDA) += via-cuda.o
+diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
+index 73b3961890397..afb0942ccc293 100644
+--- a/drivers/macintosh/adb.c
++++ b/drivers/macintosh/adb.c
+@@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req)
+
+ switch(req->data[1]) {
+ case ADB_QUERY_GETDEVINFO:
+- if (req->nbytes < 3)
++ if (req->nbytes < 3 || req->data[2] >= 16)
+ break;
+ mutex_lock(&adb_handler_mutex);
+ req->reply[0] = adb_handler[req->data[2]].original_address;
+diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
+index dc634c2932fd3..dd24655861401 100644
+--- a/drivers/macintosh/macio-adb.c
++++ b/drivers/macintosh/macio-adb.c
+@@ -105,6 +105,10 @@ int macio_init(void)
+ return -ENXIO;
+ }
+ adb = ioremap(r.start, sizeof(struct adb_regs));
++ if (!adb) {
++ of_node_put(adbs);
++ return -ENOMEM;
++ }
+
+ out_8(&adb->ctrl.r, 0);
+ out_8(&adb->intr.r, 0);
+diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
+index c1fdf28960216..df69d648f6d0a 100644
+--- a/drivers/macintosh/macio_asic.c
++++ b/drivers/macintosh/macio_asic.c
+@@ -423,7 +423,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
+ if (of_device_register(&dev->ofdev) != 0) {
+ printk(KERN_DEBUG"macio: device registration error for %s!\n",
+ dev_name(&dev->ofdev.dev));
+- kfree(dev);
++ put_device(&dev->ofdev.dev);
+ return NULL;
+ }
+
+diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
+index 4b98bc26a94b5..2109129ea1bbf 100644
+--- a/drivers/macintosh/via-pmu.c
++++ b/drivers/macintosh/via-pmu.c
+@@ -1459,7 +1459,7 @@ next:
+ pmu_pass_intr(data, len);
+ /* len == 6 is probably a bad check. But how do I
+ * know what PMU versions send what events here? */
+- if (len == 6) {
++ if (IS_ENABLED(CONFIG_ADB_PMU_EVENT) && len == 6) {
+ via_pmu_event(PMU_EVT_POWER, !!(data[1]&8));
+ via_pmu_event(PMU_EVT_LID, data[1]&1);
+ }
+diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
+index 29f48c2028b6d..e90ad1b78e936 100644
+--- a/drivers/macintosh/windfarm_lm75_sensor.c
++++ b/drivers/macintosh/windfarm_lm75_sensor.c
+@@ -34,8 +34,8 @@
+ #endif
+
+ struct wf_lm75_sensor {
+- int ds1775 : 1;
+- int inited : 1;
++ unsigned int ds1775 : 1;
++ unsigned int inited : 1;
+ struct i2c_client *i2c;
+ struct wf_sensor sens;
+ };
+diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
+index e46e1153a0b43..7d7d6213e32aa 100644
+--- a/drivers/macintosh/windfarm_smu_sat.c
++++ b/drivers/macintosh/windfarm_smu_sat.c
+@@ -171,6 +171,7 @@ static void wf_sat_release(struct kref *ref)
+
+ if (sat->nr >= 0)
+ sats[sat->nr] = NULL;
++ of_node_put(sat->node);
+ kfree(sat);
+ }
+
+diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c
+index c8706cfb83fd8..714c1e14074ed 100644
+--- a/drivers/macintosh/windfarm_smu_sensors.c
++++ b/drivers/macintosh/windfarm_smu_sensors.c
+@@ -273,8 +273,8 @@ struct smu_cpu_power_sensor {
+ struct list_head link;
+ struct wf_sensor *volts;
+ struct wf_sensor *amps;
+- int fake_volts : 1;
+- int quadratic : 1;
++ unsigned int fake_volts : 1;
++ unsigned int quadratic : 1;
+ struct wf_sensor sens;
+ };
+ #define to_smu_cpu_power(c) container_of(c, struct smu_cpu_power_sensor, sens)
+diff --git a/drivers/mailbox/arm_mhuv2.c b/drivers/mailbox/arm_mhuv2.c
+index d997f8ebfa98c..3af15083a25af 100644
+--- a/drivers/mailbox/arm_mhuv2.c
++++ b/drivers/mailbox/arm_mhuv2.c
+@@ -1061,8 +1061,8 @@ static int mhuv2_probe(struct amba_device *adev, const struct amba_id *id)
+ int ret = -EINVAL;
+
+ reg = devm_of_iomap(dev, dev->of_node, 0, NULL);
+- if (!reg)
+- return -ENOMEM;
++ if (IS_ERR(reg))
++ return PTR_ERR(reg);
+
+ mhu = devm_kzalloc(dev, sizeof(*mhu), GFP_KERNEL);
+ if (!mhu)
+diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c
+index 78073ad1f2f1f..b7e9fd53d47db 100644
+--- a/drivers/mailbox/bcm-flexrm-mailbox.c
++++ b/drivers/mailbox/bcm-flexrm-mailbox.c
+@@ -632,15 +632,15 @@ static int flexrm_spu_dma_map(struct device *dev, struct brcm_message *msg)
+
+ rc = dma_map_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+ DMA_TO_DEVICE);
+- if (rc < 0)
+- return rc;
++ if (!rc)
++ return -EIO;
+
+ rc = dma_map_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst),
+ DMA_FROM_DEVICE);
+- if (rc < 0) {
++ if (!rc) {
+ dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+ DMA_TO_DEVICE);
+- return rc;
++ return -EIO;
+ }
+
+ return 0;
+diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c
+index 0ce75c6b36b65..850d4004c50e0 100644
+--- a/drivers/mailbox/imx-mailbox.c
++++ b/drivers/mailbox/imx-mailbox.c
+@@ -13,6 +13,7 @@
+ #include <linux/module.h>
+ #include <linux/of_device.h>
+ #include <linux/pm_runtime.h>
++#include <linux/suspend.h>
+ #include <linux/slab.h>
+
+ #define IMX_MU_CHANS 16
+@@ -67,6 +68,7 @@ struct imx_mu_priv {
+ const struct imx_mu_dcfg *dcfg;
+ struct clk *clk;
+ int irq;
++ bool suspend;
+
+ u32 xcr[4];
+
+@@ -307,6 +309,9 @@ static irqreturn_t imx_mu_isr(int irq, void *p)
+ return IRQ_NONE;
+ }
+
++ if (priv->suspend)
++ pm_system_wakeup();
++
+ return IRQ_HANDLED;
+ }
+
+@@ -652,6 +657,8 @@ static int __maybe_unused imx_mu_suspend_noirq(struct device *dev)
+ priv->xcr[i] = imx_mu_read(priv, priv->dcfg->xCR[i]);
+ }
+
++ priv->suspend = true;
++
+ return 0;
+ }
+
+@@ -668,11 +675,13 @@ static int __maybe_unused imx_mu_resume_noirq(struct device *dev)
+ * send failed, may lead to system freeze. This issue
+ * is observed by testing freeze mode suspend.
+ */
+- if (!imx_mu_read(priv, priv->dcfg->xCR[0]) && !priv->clk) {
++ if (!priv->clk && !imx_mu_read(priv, priv->dcfg->xCR[0])) {
+ for (i = 0; i < IMX_MU_xCR_MAX; i++)
+ imx_mu_write(priv, priv->xcr[i], priv->dcfg->xCR[i]);
+ }
+
++ priv->suspend = false;
++
+ return 0;
+ }
+
+diff --git a/drivers/mailbox/mailbox-mpfs.c b/drivers/mailbox/mailbox-mpfs.c
+index 0d6e2231a2c75..08aa840cccaca 100644
+--- a/drivers/mailbox/mailbox-mpfs.c
++++ b/drivers/mailbox/mailbox-mpfs.c
+@@ -2,7 +2,7 @@
+ /*
+ * Microchip PolarFire SoC (MPFS) system controller/mailbox controller driver
+ *
+- * Copyright (c) 2020 Microchip Corporation. All rights reserved.
++ * Copyright (c) 2020-2022 Microchip Corporation. All rights reserved.
+ *
+ * Author: Conor Dooley <conor.dooley@microchip.com>
+ *
+@@ -56,12 +56,13 @@
+ #define SCB_STATUS_NOTIFY_MASK BIT(SCB_STATUS_NOTIFY)
+
+ #define SCB_STATUS_POS (16)
+-#define SCB_STATUS_MASK GENMASK_ULL(SCB_STATUS_POS + SCB_MASK_WIDTH, SCB_STATUS_POS)
++#define SCB_STATUS_MASK GENMASK(SCB_STATUS_POS + SCB_MASK_WIDTH - 1, SCB_STATUS_POS)
+
+ struct mpfs_mbox {
+ struct mbox_controller controller;
+ struct device *dev;
+ int irq;
++ void __iomem *ctrl_base;
+ void __iomem *mbox_base;
+ void __iomem *int_reg;
+ struct mbox_chan chans[1];
+@@ -73,11 +74,18 @@ static bool mpfs_mbox_busy(struct mpfs_mbox *mbox)
+ {
+ u32 status;
+
+- status = readl_relaxed(mbox->mbox_base + SERVICES_SR_OFFSET);
++ status = readl_relaxed(mbox->ctrl_base + SERVICES_SR_OFFSET);
+
+ return status & SCB_STATUS_BUSY_MASK;
+ }
+
++static bool mpfs_mbox_last_tx_done(struct mbox_chan *chan)
++{
++ struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
++
++ return !mpfs_mbox_busy(mbox);
++}
++
+ static int mpfs_mbox_send_data(struct mbox_chan *chan, void *data)
+ {
+ struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
+@@ -99,29 +107,27 @@ static int mpfs_mbox_send_data(struct mbox_chan *chan, void *data)
+
+ for (index = 0; index < (msg->cmd_data_size / 4); index++)
+ writel_relaxed(word_buf[index],
+- mbox->mbox_base + MAILBOX_REG_OFFSET + index * 0x4);
++ mbox->mbox_base + msg->mbox_offset + index * 0x4);
+ if (extra_bits) {
+ u8 i;
+ u8 byte_off = ALIGN_DOWN(msg->cmd_data_size, 4);
+ u8 *byte_buf = msg->cmd_data + byte_off;
+
+- val = readl_relaxed(mbox->mbox_base +
+- MAILBOX_REG_OFFSET + index * 0x4);
++ val = readl_relaxed(mbox->mbox_base + msg->mbox_offset + index * 0x4);
+
+ for (i = 0u; i < extra_bits; i++) {
+ val &= ~(0xffu << (i * 8u));
+ val |= (byte_buf[i] << (i * 8u));
+ }
+
+- writel_relaxed(val,
+- mbox->mbox_base + MAILBOX_REG_OFFSET + index * 0x4);
++ writel_relaxed(val, mbox->mbox_base + msg->mbox_offset + index * 0x4);
+ }
+ }
+
+ opt_sel = ((msg->mbox_offset << 7u) | (msg->cmd_opcode & 0x7fu));
+ tx_trigger = (opt_sel << SCB_CTRL_POS) & SCB_CTRL_MASK;
+ tx_trigger |= SCB_CTRL_REQ_MASK | SCB_STATUS_NOTIFY_MASK;
+- writel_relaxed(tx_trigger, mbox->mbox_base + SERVICES_CR_OFFSET);
++ writel_relaxed(tx_trigger, mbox->ctrl_base + SERVICES_CR_OFFSET);
+
+ return 0;
+ }
+@@ -131,17 +137,42 @@ static void mpfs_mbox_rx_data(struct mbox_chan *chan)
+ struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
+ struct mpfs_mss_response *response = mbox->response;
+ u16 num_words = ALIGN((response->resp_size), (4)) / 4U;
+- u32 i;
++ u32 i, status;
+
+ if (!response->resp_msg) {
+ dev_err(mbox->dev, "failed to assign memory for response %d\n", -ENOMEM);
+ return;
+ }
+
++ /*
++ * The status is stored in bits 31:16 of the SERVICES_SR register.
++ * It is only valid when BUSY == 0.
++ * We should *never* get an interrupt while the controller is
++ * still in the busy state. If we do, something has gone badly
++ * wrong & the content of the mailbox would not be valid.
++ */
++ if (mpfs_mbox_busy(mbox)) {
++ dev_err(mbox->dev, "got an interrupt but system controller is busy\n");
++ response->resp_status = 0xDEAD;
++ return;
++ }
++
++ status = readl_relaxed(mbox->ctrl_base + SERVICES_SR_OFFSET);
++
++ /*
++ * If the status of the individual servers is non-zero, the service has
++ * failed. The contents of the mailbox at this point are not be valid,
++ * so don't bother reading them. Set the status so that the driver
++ * implementing the service can handle the result.
++ */
++ response->resp_status = (status & SCB_STATUS_MASK) >> SCB_STATUS_POS;
++ if (response->resp_status)
++ return;
++
+ if (!mpfs_mbox_busy(mbox)) {
+ for (i = 0; i < num_words; i++) {
+ response->resp_msg[i] =
+- readl_relaxed(mbox->mbox_base + MAILBOX_REG_OFFSET
++ readl_relaxed(mbox->mbox_base
+ + mbox->resp_offset + i * 0x4);
+ }
+ }
+@@ -158,7 +189,6 @@ static irqreturn_t mpfs_mbox_inbox_isr(int irq, void *data)
+
+ mpfs_mbox_rx_data(chan);
+
+- mbox_chan_txdone(chan, 0);
+ return IRQ_HANDLED;
+ }
+
+@@ -188,6 +218,7 @@ static const struct mbox_chan_ops mpfs_mbox_ops = {
+ .send_data = mpfs_mbox_send_data,
+ .startup = mpfs_mbox_startup,
+ .shutdown = mpfs_mbox_shutdown,
++ .last_tx_done = mpfs_mbox_last_tx_done,
+ };
+
+ static int mpfs_mbox_probe(struct platform_device *pdev)
+@@ -200,14 +231,18 @@ static int mpfs_mbox_probe(struct platform_device *pdev)
+ if (!mbox)
+ return -ENOMEM;
+
+- mbox->mbox_base = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
+- if (IS_ERR(mbox->mbox_base))
+- return PTR_ERR(mbox->mbox_base);
++ mbox->ctrl_base = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
++ if (IS_ERR(mbox->ctrl_base))
++ return PTR_ERR(mbox->ctrl_base);
+
+ mbox->int_reg = devm_platform_get_and_ioremap_resource(pdev, 1, &regs);
+ if (IS_ERR(mbox->int_reg))
+ return PTR_ERR(mbox->int_reg);
+
++ mbox->mbox_base = devm_platform_get_and_ioremap_resource(pdev, 2, &regs);
++ if (IS_ERR(mbox->mbox_base)) // account for the old dt-binding w/ 2 regs
++ mbox->mbox_base = mbox->ctrl_base + MAILBOX_REG_OFFSET;
++
+ mbox->irq = platform_get_irq(pdev, 0);
+ if (mbox->irq < 0)
+ return mbox->irq;
+@@ -219,7 +254,8 @@ static int mpfs_mbox_probe(struct platform_device *pdev)
+ mbox->controller.num_chans = 1;
+ mbox->controller.chans = mbox->chans;
+ mbox->controller.ops = &mpfs_mbox_ops;
+- mbox->controller.txdone_irq = true;
++ mbox->controller.txdone_poll = true;
++ mbox->controller.txpoll_period = 10u;
+
+ ret = devm_mbox_controller_register(&pdev->dev, &mbox->controller);
+ if (ret) {
+@@ -232,7 +268,7 @@ static int mpfs_mbox_probe(struct platform_device *pdev)
+ }
+
+ static const struct of_device_id mpfs_mbox_of_match[] = {
+- {.compatible = "microchip,polarfire-soc-mailbox", },
++ {.compatible = "microchip,mpfs-mailbox", },
+ {},
+ };
+ MODULE_DEVICE_TABLE(of, mpfs_mbox_of_match);
+diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c
+index 4555d678fadda..abcee58e851c2 100644
+--- a/drivers/mailbox/mailbox-test.c
++++ b/drivers/mailbox/mailbox-test.c
+@@ -12,6 +12,7 @@
+ #include <linux/kernel.h>
+ #include <linux/mailbox_client.h>
+ #include <linux/module.h>
++#include <linux/mutex.h>
+ #include <linux/of.h>
+ #include <linux/platform_device.h>
+ #include <linux/poll.h>
+@@ -38,6 +39,7 @@ struct mbox_test_device {
+ char *signal;
+ char *message;
+ spinlock_t lock;
++ struct mutex mutex;
+ wait_queue_head_t waitq;
+ struct fasync_struct *async_queue;
+ struct dentry *root_debugfs_dir;
+@@ -95,6 +97,7 @@ static ssize_t mbox_test_message_write(struct file *filp,
+ size_t count, loff_t *ppos)
+ {
+ struct mbox_test_device *tdev = filp->private_data;
++ char *message;
+ void *data;
+ int ret;
+
+@@ -110,10 +113,13 @@ static ssize_t mbox_test_message_write(struct file *filp,
+ return -EINVAL;
+ }
+
+- tdev->message = kzalloc(MBOX_MAX_MSG_LEN, GFP_KERNEL);
+- if (!tdev->message)
++ message = kzalloc(MBOX_MAX_MSG_LEN, GFP_KERNEL);
++ if (!message)
+ return -ENOMEM;
+
++ mutex_lock(&tdev->mutex);
++
++ tdev->message = message;
+ ret = copy_from_user(tdev->message, userbuf, count);
+ if (ret) {
+ ret = -EFAULT;
+@@ -144,6 +150,8 @@ out:
+ kfree(tdev->message);
+ tdev->signal = NULL;
+
++ mutex_unlock(&tdev->mutex);
++
+ return ret < 0 ? ret : count;
+ }
+
+@@ -392,6 +400,7 @@ static int mbox_test_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, tdev);
+
+ spin_lock_init(&tdev->lock);
++ mutex_init(&tdev->mutex);
+
+ if (tdev->rx_channel) {
+ tdev->rx_buffer = devm_kzalloc(&pdev->dev,
+diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
+index 3e7d4b20ab34f..4229b9b5da98f 100644
+--- a/drivers/mailbox/mailbox.c
++++ b/drivers/mailbox/mailbox.c
+@@ -82,11 +82,11 @@ static void msg_submit(struct mbox_chan *chan)
+ exit:
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+- /* kick start the timer immediately to avoid delays */
+ if (!err && (chan->txdone_method & TXDONE_BY_POLL)) {
+- /* but only if not already active */
+- if (!hrtimer_active(&chan->mbox->poll_hrt))
+- hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL);
++ /* kick start the timer immediately to avoid delays */
++ spin_lock_irqsave(&chan->mbox->poll_hrt_lock, flags);
++ hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL);
++ spin_unlock_irqrestore(&chan->mbox->poll_hrt_lock, flags);
+ }
+ }
+
+@@ -120,20 +120,26 @@ static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)
+ container_of(hrtimer, struct mbox_controller, poll_hrt);
+ bool txdone, resched = false;
+ int i;
++ unsigned long flags;
+
+ for (i = 0; i < mbox->num_chans; i++) {
+ struct mbox_chan *chan = &mbox->chans[i];
+
+ if (chan->active_req && chan->cl) {
+- resched = true;
+ txdone = chan->mbox->ops->last_tx_done(chan);
+ if (txdone)
+ tx_tick(chan, 0);
++ else
++ resched = true;
+ }
+ }
+
+ if (resched) {
+- hrtimer_forward_now(hrtimer, ms_to_ktime(mbox->txpoll_period));
++ spin_lock_irqsave(&mbox->poll_hrt_lock, flags);
++ if (!hrtimer_is_queued(hrtimer))
++ hrtimer_forward_now(hrtimer, ms_to_ktime(mbox->txpoll_period));
++ spin_unlock_irqrestore(&mbox->poll_hrt_lock, flags);
++
+ return HRTIMER_RESTART;
+ }
+ return HRTIMER_NORESTART;
+@@ -500,6 +506,7 @@ int mbox_controller_register(struct mbox_controller *mbox)
+ hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ mbox->poll_hrt.function = txdone_hrtimer;
++ spin_lock_init(&mbox->poll_hrt_lock);
+ }
+
+ for (i = 0; i < mbox->num_chans; i++) {
+diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
+index 64175a893312e..3583c2aad0edc 100644
+--- a/drivers/mailbox/mtk-cmdq-mailbox.c
++++ b/drivers/mailbox/mtk-cmdq-mailbox.c
+@@ -195,7 +195,6 @@ static void cmdq_task_exec_done(struct cmdq_task *task, int sta)
+ struct cmdq_task_cb *cb = &task->pkt->async_cb;
+ struct cmdq_cb_data data;
+
+- WARN_ON(cb->cb == (cmdq_async_flush_cb)NULL);
+ data.sta = sta;
+ data.data = cb->data;
+ data.pkt = task->pkt;
+@@ -532,7 +531,8 @@ static int cmdq_probe(struct platform_device *pdev)
+ struct device_node *phandle = dev->of_node;
+ struct device_node *node;
+ int alias_id = 0;
+- char clk_name[4] = "gce";
++ static const char * const clk_name = "gce";
++ static const char * const clk_names[] = { "gce0", "gce1" };
+
+ cmdq = devm_kzalloc(dev, sizeof(*cmdq), GFP_KERNEL);
+ if (!cmdq)
+@@ -570,12 +570,9 @@ static int cmdq_probe(struct platform_device *pdev)
+
+ if (cmdq->gce_num > 1) {
+ for_each_child_of_node(phandle->parent, node) {
+- char clk_id[8];
+-
+ alias_id = of_alias_get_id(node, clk_name);
+- if (alias_id < cmdq->gce_num) {
+- snprintf(clk_id, sizeof(clk_id), "%s%d", clk_name, alias_id);
+- cmdq->clocks[alias_id].id = clk_id;
++ if (alias_id >= 0 && alias_id < cmdq->gce_num) {
++ cmdq->clocks[alias_id].id = clk_names[alias_id];
+ cmdq->clocks[alias_id].clk = of_clk_get(node, 0);
+ if (IS_ERR(cmdq->clocks[alias_id].clk)) {
+ dev_err(dev, "failed to get gce clk: %d\n", alias_id);
+@@ -663,7 +660,7 @@ static const struct gce_plat gce_plat_v5 = {
+ .thread_nr = 24,
+ .shift = 3,
+ .control_by_sw = true,
+- .gce_num = 2
++ .gce_num = 1
+ };
+
+ static const struct gce_plat gce_plat_v6 = {
+diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c
+index acd0675da681e..78f7265039c66 100644
+--- a/drivers/mailbox/tegra-hsp.c
++++ b/drivers/mailbox/tegra-hsp.c
+@@ -412,6 +412,11 @@ static int tegra_hsp_mailbox_flush(struct mbox_chan *chan,
+ value = tegra_hsp_channel_readl(ch, HSP_SM_SHRD_MBOX);
+ if ((value & HSP_SM_SHRD_MBOX_FULL) == 0) {
+ mbox_chan_txdone(chan, 0);
++
++ /* Wait until channel is empty */
++ if (chan->active_req != NULL)
++ continue;
++
+ return 0;
+ }
+
+diff --git a/drivers/mailbox/ti-msgmgr.c b/drivers/mailbox/ti-msgmgr.c
+index efb43b0385960..fa71ae837d235 100644
+--- a/drivers/mailbox/ti-msgmgr.c
++++ b/drivers/mailbox/ti-msgmgr.c
+@@ -385,14 +385,20 @@ static int ti_msgmgr_send_data(struct mbox_chan *chan, void *data)
+ /* Ensure all unused data is 0 */
+ data_trail &= 0xFFFFFFFF >> (8 * (sizeof(u32) - trail_bytes));
+ writel(data_trail, data_reg);
+- data_reg++;
++ data_reg += sizeof(u32);
+ }
++
+ /*
+ * 'data_reg' indicates next register to write. If we did not already
+ * write on tx complete reg(last reg), we must do so for transmit
++ * In addition, we also need to make sure all intermediate data
++ * registers(if any required), are reset to 0 for TISCI backward
++ * compatibility to be maintained.
+ */
+- if (data_reg <= qinst->queue_buff_end)
+- writel(0, qinst->queue_buff_end);
++ while (data_reg <= qinst->queue_buff_end) {
++ writel(0, data_reg);
++ data_reg += sizeof(u32);
++ }
+
+ return 0;
+ }
+diff --git a/drivers/mailbox/zynqmp-ipi-mailbox.c b/drivers/mailbox/zynqmp-ipi-mailbox.c
+index f44079d62b1a7..be06de791c544 100644
+--- a/drivers/mailbox/zynqmp-ipi-mailbox.c
++++ b/drivers/mailbox/zynqmp-ipi-mailbox.c
+@@ -110,7 +110,7 @@ struct zynqmp_ipi_pdata {
+ unsigned int method;
+ u32 local_id;
+ int num_mboxes;
+- struct zynqmp_ipi_mbox *ipi_mboxes;
++ struct zynqmp_ipi_mbox ipi_mboxes[];
+ };
+
+ static struct device_driver zynqmp_ipi_mbox_driver = {
+@@ -152,7 +152,7 @@ static irqreturn_t zynqmp_ipi_interrupt(int irq, void *data)
+ struct zynqmp_ipi_message *msg;
+ u64 arg0, arg3;
+ struct arm_smccc_res res;
+- int ret, i;
++ int ret, i, status = IRQ_NONE;
+
+ (void)irq;
+ arg0 = SMC_IPI_MAILBOX_STATUS_ENQUIRY;
+@@ -170,11 +170,11 @@ static irqreturn_t zynqmp_ipi_interrupt(int irq, void *data)
+ memcpy_fromio(msg->data, mchan->req_buf,
+ msg->len);
+ mbox_chan_received_data(chan, (void *)msg);
+- return IRQ_HANDLED;
++ status = IRQ_HANDLED;
+ }
+ }
+ }
+- return IRQ_NONE;
++ return status;
+ }
+
+ /**
+@@ -493,6 +493,7 @@ static int zynqmp_ipi_mbox_probe(struct zynqmp_ipi_mbox *ipi_mbox,
+ ret = device_register(&ipi_mbox->dev);
+ if (ret) {
+ dev_err(dev, "Failed to register ipi mbox dev.\n");
++ put_device(&ipi_mbox->dev);
+ return ret;
+ }
+ mdev = &ipi_mbox->dev;
+@@ -619,7 +620,8 @@ static void zynqmp_ipi_free_mboxes(struct zynqmp_ipi_pdata *pdata)
+ ipi_mbox = &pdata->ipi_mboxes[i];
+ if (ipi_mbox->dev.parent) {
+ mbox_controller_unregister(&ipi_mbox->mbox);
+- device_unregister(&ipi_mbox->dev);
++ if (device_is_registered(&ipi_mbox->dev))
++ device_unregister(&ipi_mbox->dev);
+ }
+ }
+ }
+@@ -632,8 +634,13 @@ static int zynqmp_ipi_probe(struct platform_device *pdev)
+ struct zynqmp_ipi_mbox *mbox;
+ int num_mboxes, ret = -EINVAL;
+
+- num_mboxes = of_get_child_count(np);
+- pdata = devm_kzalloc(dev, sizeof(*pdata) + (num_mboxes * sizeof(*mbox)),
++ num_mboxes = of_get_available_child_count(np);
++ if (num_mboxes == 0) {
++ dev_err(dev, "mailbox nodes not available\n");
++ return -EINVAL;
++ }
++
++ pdata = devm_kzalloc(dev, struct_size(pdata, ipi_mboxes, num_mboxes),
+ GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+@@ -647,8 +654,6 @@ static int zynqmp_ipi_probe(struct platform_device *pdev)
+ }
+
+ pdata->num_mboxes = num_mboxes;
+- pdata->ipi_mboxes = (struct zynqmp_ipi_mbox *)
+- ((char *)pdata + sizeof(*pdata));
+
+ mbox = pdata->ipi_mboxes;
+ for_each_available_child_of_node(np, nc) {
+diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
+index cf128b3471d78..0530db5482311 100644
+--- a/drivers/mcb/mcb-core.c
++++ b/drivers/mcb/mcb-core.c
+@@ -71,8 +71,10 @@ static int mcb_probe(struct device *dev)
+
+ get_device(dev);
+ ret = mdrv->probe(mdev, found_id);
+- if (ret)
++ if (ret) {
+ module_put(carrier_mod);
++ put_device(dev);
++ }
+
+ return ret;
+ }
+diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c
+index 0266bfddfbe27..aa6938da0db85 100644
+--- a/drivers/mcb/mcb-parse.c
++++ b/drivers/mcb/mcb-parse.c
+@@ -108,7 +108,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus,
+ return 0;
+
+ err:
+- mcb_free_dev(mdev);
++ put_device(&mdev->dev);
+
+ return ret;
+ }
+diff --git a/drivers/mcb/mcb-pci.c b/drivers/mcb/mcb-pci.c
+index dc88232d9af83..53d9202ff9a7c 100644
+--- a/drivers/mcb/mcb-pci.c
++++ b/drivers/mcb/mcb-pci.c
+@@ -31,7 +31,7 @@ static int mcb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ {
+ struct resource *res;
+ struct priv *priv;
+- int ret;
++ int ret, table_size;
+ unsigned long flags;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(struct priv), GFP_KERNEL);
+@@ -90,7 +90,30 @@ static int mcb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ if (ret < 0)
+ goto out_mcb_bus;
+
+- dev_dbg(&pdev->dev, "Found %d cells\n", ret);
++ table_size = ret;
++
++ if (table_size < CHAM_HEADER_SIZE) {
++ /* Release the previous resources */
++ devm_iounmap(&pdev->dev, priv->base);
++ devm_release_mem_region(&pdev->dev, priv->mapbase, CHAM_HEADER_SIZE);
++
++ /* Then, allocate it again with the actual chameleon table size */
++ res = devm_request_mem_region(&pdev->dev, priv->mapbase,
++ table_size,
++ KBUILD_MODNAME);
++ if (!res) {
++ dev_err(&pdev->dev, "Failed to request PCI memory\n");
++ ret = -EBUSY;
++ goto out_mcb_bus;
++ }
++
++ priv->base = devm_ioremap(&pdev->dev, priv->mapbase, table_size);
++ if (!priv->base) {
++ dev_err(&pdev->dev, "Cannot ioremap\n");
++ ret = -ENOMEM;
++ goto out_mcb_bus;
++ }
++ }
+
+ mcb_bus_add_devices(priv->bus);
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index 0595559de174a..88097d1892ace 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -378,7 +378,7 @@ static void do_btree_node_write(struct btree *b)
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bv, b->bio, iter_all) {
+- memcpy(bvec_virt(bv), addr, PAGE_SIZE);
++ memcpy(page_address(bv->bv_page), addr, PAGE_SIZE);
+ addr += PAGE_SIZE;
+ }
+
+@@ -885,7 +885,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
+ * cannibalize_bucket() will take. This means every time we unlock the root of
+ * the btree, we need to release this lock if we have it held.
+ */
+-static void bch_cannibalize_unlock(struct cache_set *c)
++void bch_cannibalize_unlock(struct cache_set *c)
+ {
+ spin_lock(&c->btree_cannibalize_lock);
+ if (c->btree_cache_alloc_lock == current) {
+@@ -1090,10 +1090,12 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+ struct btree *parent)
+ {
+ BKEY_PADDED(key) k;
+- struct btree *b = ERR_PTR(-EAGAIN);
++ struct btree *b;
+
+ mutex_lock(&c->bucket_lock);
+ retry:
++ /* return ERR_PTR(-EAGAIN) when it fails */
++ b = ERR_PTR(-EAGAIN);
+ if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait))
+ goto err;
+
+@@ -1138,7 +1140,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
+ {
+ struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
+
+- if (!IS_ERR_OR_NULL(n)) {
++ if (!IS_ERR(n)) {
+ mutex_lock(&n->write_lock);
+ bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
+ bkey_copy_key(&n->key, &b->key);
+@@ -1340,7 +1342,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
+ memset(new_nodes, 0, sizeof(new_nodes));
+ closure_init_stack(&cl);
+
+- while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b))
++ while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b))
+ keys += r[nodes++].keys;
+
+ blocks = btree_default_blocks(b->c) * 2 / 3;
+@@ -1352,7 +1354,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
+
+ for (i = 0; i < nodes; i++) {
+ new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL);
+- if (IS_ERR_OR_NULL(new_nodes[i]))
++ if (IS_ERR(new_nodes[i]))
+ goto out_nocoalesce;
+ }
+
+@@ -1487,7 +1489,7 @@ out_nocoalesce:
+ bch_keylist_free(&keylist);
+
+ for (i = 0; i < nodes; i++)
+- if (!IS_ERR_OR_NULL(new_nodes[i])) {
++ if (!IS_ERR(new_nodes[i])) {
+ btree_node_free(new_nodes[i]);
+ rw_unlock(true, new_nodes[i]);
+ }
+@@ -1669,7 +1671,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
+ if (should_rewrite) {
+ n = btree_node_alloc_replacement(b, NULL);
+
+- if (!IS_ERR_OR_NULL(n)) {
++ if (!IS_ERR(n)) {
+ bch_btree_node_write_sync(n);
+
+ bch_btree_set_root(n);
+@@ -1968,6 +1970,15 @@ static int bch_btree_check_thread(void *arg)
+ c->gc_stats.nodes++;
+ bch_btree_op_init(&op, 0);
+ ret = bcache_btree(check_recurse, p, c->root, &op);
++ /*
++ * The op may be added to cache_set's btree_cache_wait
++ * in mca_cannibalize(), must ensure it is removed from
++ * the list and release btree_cache_alloc_lock before
++ * free op memory.
++ * Otherwise, the btree_cache_wait will be damaged.
++ */
++ bch_cannibalize_unlock(c);
++ finish_wait(&c->btree_cache_wait, &(&op)->wait);
+ if (ret)
+ goto out;
+ }
+@@ -2006,8 +2017,7 @@ int bch_btree_check(struct cache_set *c)
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+- struct btree_check_state *check_state;
+- char name[32];
++ struct btree_check_state check_state;
+
+ /* check and mark root node keys */
+ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
+@@ -2018,61 +2028,59 @@ int bch_btree_check(struct cache_set *c)
+ if (c->root->level == 0)
+ return 0;
+
+- check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
+- if (!check_state)
+- return -ENOMEM;
+-
+- check_state->c = c;
+- check_state->total_threads = bch_btree_chkthread_nr();
+- check_state->key_idx = 0;
+- spin_lock_init(&check_state->idx_lock);
+- atomic_set(&check_state->started, 0);
+- atomic_set(&check_state->enough, 0);
+- init_waitqueue_head(&check_state->wait);
++ memset(&check_state, 0, sizeof(struct btree_check_state));
++ check_state.c = c;
++ check_state.total_threads = bch_btree_chkthread_nr();
++ check_state.key_idx = 0;
++ spin_lock_init(&check_state.idx_lock);
++ atomic_set(&check_state.started, 0);
++ atomic_set(&check_state.enough, 0);
++ init_waitqueue_head(&check_state.wait);
+
++ rw_lock(0, c->root, c->root->level);
+ /*
+ * Run multiple threads to check btree nodes in parallel,
+- * if check_state->enough is non-zero, it means current
++ * if check_state.enough is non-zero, it means current
+ * running check threads are enough, unncessary to create
+ * more.
+ */
+- for (i = 0; i < check_state->total_threads; i++) {
+- /* fetch latest check_state->enough earlier */
++ for (i = 0; i < check_state.total_threads; i++) {
++ /* fetch latest check_state.enough earlier */
+ smp_mb__before_atomic();
+- if (atomic_read(&check_state->enough))
++ if (atomic_read(&check_state.enough))
+ break;
+
+- check_state->infos[i].result = 0;
+- check_state->infos[i].state = check_state;
+- snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
+- atomic_inc(&check_state->started);
++ check_state.infos[i].result = 0;
++ check_state.infos[i].state = &check_state;
+
+- check_state->infos[i].thread =
++ check_state.infos[i].thread =
+ kthread_run(bch_btree_check_thread,
+- &check_state->infos[i],
+- name);
+- if (IS_ERR(check_state->infos[i].thread)) {
++ &check_state.infos[i],
++ "bch_btrchk[%d]", i);
++ if (IS_ERR(check_state.infos[i].thread)) {
+ pr_err("fails to run thread bch_btrchk[%d]\n", i);
+ for (--i; i >= 0; i--)
+- kthread_stop(check_state->infos[i].thread);
++ kthread_stop(check_state.infos[i].thread);
+ ret = -ENOMEM;
+ goto out;
+ }
++ atomic_inc(&check_state.started);
+ }
+
+- wait_event_interruptible(check_state->wait,
+- atomic_read(&check_state->started) == 0 ||
+- test_bit(CACHE_SET_IO_DISABLE, &c->flags));
++ /*
++ * Must wait for all threads to stop.
++ */
++ wait_event(check_state.wait, atomic_read(&check_state.started) == 0);
+
+- for (i = 0; i < check_state->total_threads; i++) {
+- if (check_state->infos[i].result) {
+- ret = check_state->infos[i].result;
++ for (i = 0; i < check_state.total_threads; i++) {
++ if (check_state.infos[i].result) {
++ ret = check_state.infos[i].result;
+ goto out;
+ }
+ }
+
+ out:
+- kfree(check_state);
++ rw_unlock(0, c->root);
+ return ret;
+ }
+
+diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
+index 50482107134f1..a2920bbfcad56 100644
+--- a/drivers/md/bcache/btree.h
++++ b/drivers/md/bcache/btree.h
+@@ -226,7 +226,7 @@ struct btree_check_info {
+ int result;
+ };
+
+-#define BCH_BTR_CHKTHREAD_MAX 64
++#define BCH_BTR_CHKTHREAD_MAX 12
+ struct btree_check_state {
+ struct cache_set *c;
+ int total_threads;
+@@ -282,6 +282,7 @@ void bch_initial_gc_finish(struct cache_set *c);
+ void bch_moving_gc(struct cache_set *c);
+ int bch_btree_check(struct cache_set *c);
+ void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k);
++void bch_cannibalize_unlock(struct cache_set *c);
+
+ static inline void wake_up_gc(struct cache_set *c)
+ {
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index 61bd79babf7ae..346a92c438582 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -407,6 +407,11 @@ err:
+ return ret;
+ }
+
++void bch_journal_space_reserve(struct journal *j)
++{
++ j->do_reserve = true;
++}
++
+ /* Journalling */
+
+ static void btree_flush_write(struct cache_set *c)
+@@ -625,12 +630,30 @@ static void do_journal_discard(struct cache *ca)
+ }
+ }
+
++static unsigned int free_journal_buckets(struct cache_set *c)
++{
++ struct journal *j = &c->journal;
++ struct cache *ca = c->cache;
++ struct journal_device *ja = &c->cache->journal;
++ unsigned int n;
++
++ /* In case njournal_buckets is not power of 2 */
++ if (ja->cur_idx >= ja->discard_idx)
++ n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx;
++ else
++ n = ja->discard_idx - ja->cur_idx;
++
++ if (n > (1 + j->do_reserve))
++ return n - (1 + j->do_reserve);
++
++ return 0;
++}
++
+ static void journal_reclaim(struct cache_set *c)
+ {
+ struct bkey *k = &c->journal.key;
+ struct cache *ca = c->cache;
+ uint64_t last_seq;
+- unsigned int next;
+ struct journal_device *ja = &ca->journal;
+ atomic_t p __maybe_unused;
+
+@@ -653,12 +676,10 @@ static void journal_reclaim(struct cache_set *c)
+ if (c->journal.blocks_free)
+ goto out;
+
+- next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
+- /* No space available on this device */
+- if (next == ja->discard_idx)
++ if (!free_journal_buckets(c))
+ goto out;
+
+- ja->cur_idx = next;
++ ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
+ k->ptr[0] = MAKE_PTR(0,
+ bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
+ ca->sb.nr_this_dev);
+diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
+index f2ea34d5f431b..cd316b4a1e95f 100644
+--- a/drivers/md/bcache/journal.h
++++ b/drivers/md/bcache/journal.h
+@@ -105,6 +105,7 @@ struct journal {
+ spinlock_t lock;
+ spinlock_t flush_write_lock;
+ bool btree_flushing;
++ bool do_reserve;
+ /* used when waiting because the journal was full */
+ struct closure_waitlist wait;
+ struct closure io;
+@@ -182,5 +183,6 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list);
+
+ void bch_journal_free(struct cache_set *c);
+ int bch_journal_alloc(struct cache_set *c);
++void bch_journal_space_reserve(struct journal *j);
+
+ #endif /* _BCACHE_JOURNAL_H */
+diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
+index 6d1de889baeb1..9f4a2850aa47e 100644
+--- a/drivers/md/bcache/request.c
++++ b/drivers/md/bcache/request.c
+@@ -1107,6 +1107,12 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
+ * which would call closure_get(&dc->disk.cl)
+ */
+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
++ if (!ddip) {
++ bio->bi_status = BLK_STS_RESOURCE;
++ bio->bi_end_io(bio);
++ return;
++ }
++
+ ddip->d = d;
+ /* Count on the bcache device */
+ ddip->orig_bdev = orig_bdev;
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index f2874c77ff797..9e7a6c3faa420 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -885,9 +885,9 @@ static void bcache_device_free(struct bcache_device *d)
+ bcache_device_detach(d);
+
+ if (disk) {
+- blk_cleanup_disk(disk);
+ ida_simple_remove(&bcache_device_idx,
+ first_minor_to_idx(disk->first_minor));
++ blk_cleanup_disk(disk);
+ }
+
+ bioset_exit(&d->bio_split);
+@@ -1729,7 +1729,7 @@ static void cache_set_flush(struct closure *cl)
+ if (!IS_ERR_OR_NULL(c->gc_thread))
+ kthread_stop(c->gc_thread);
+
+- if (!IS_ERR_OR_NULL(c->root))
++ if (!IS_ERR(c->root))
+ list_add(&c->root->list, &c->btree_cache);
+
+ /*
+@@ -2093,7 +2093,7 @@ static int run_cache_set(struct cache_set *c)
+
+ err = "cannot allocate new btree root";
+ c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
+- if (IS_ERR_OR_NULL(c->root))
++ if (IS_ERR(c->root))
+ goto err;
+
+ mutex_lock(&c->root->write_lock);
+@@ -2131,6 +2131,7 @@ static int run_cache_set(struct cache_set *c)
+
+ flash_devs_run(c);
+
++ bch_journal_space_reserve(&c->journal);
+ set_bit(CACHE_SET_RUNNING, &c->flags);
+ return 0;
+ err:
+diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
+index 8120da278161e..4dcbaf9a2149d 100644
+--- a/drivers/md/bcache/writeback.c
++++ b/drivers/md/bcache/writeback.c
+@@ -157,6 +157,53 @@ static void __update_writeback_rate(struct cached_dev *dc)
+ dc->writeback_rate_target = target;
+ }
+
++static bool idle_counter_exceeded(struct cache_set *c)
++{
++ int counter, dev_nr;
++
++ /*
++ * If c->idle_counter is overflow (idel for really long time),
++ * reset as 0 and not set maximum rate this time for code
++ * simplicity.
++ */
++ counter = atomic_inc_return(&c->idle_counter);
++ if (counter <= 0) {
++ atomic_set(&c->idle_counter, 0);
++ return false;
++ }
++
++ dev_nr = atomic_read(&c->attached_dev_nr);
++ if (dev_nr == 0)
++ return false;
++
++ /*
++ * c->idle_counter is increased by writeback thread of all
++ * attached backing devices, in order to represent a rough
++ * time period, counter should be divided by dev_nr.
++ * Otherwise the idle time cannot be larger with more backing
++ * device attached.
++ * The following calculation equals to checking
++ * (counter / dev_nr) < (dev_nr * 6)
++ */
++ if (counter < (dev_nr * dev_nr * 6))
++ return false;
++
++ return true;
++}
++
++/*
++ * Idle_counter is increased every time when update_writeback_rate() is
++ * called. If all backing devices attached to the same cache set have
++ * identical dc->writeback_rate_update_seconds values, it is about 6
++ * rounds of update_writeback_rate() on each backing device before
++ * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
++ * to each dc->writeback_rate.rate.
++ * In order to avoid extra locking cost for counting exact dirty cached
++ * devices number, c->attached_dev_nr is used to calculate the idle
++ * throushold. It might be bigger if not all cached device are in write-
++ * back mode, but it still works well with limited extra rounds of
++ * update_writeback_rate().
++ */
+ static bool set_at_max_writeback_rate(struct cache_set *c,
+ struct cached_dev *dc)
+ {
+@@ -167,21 +214,8 @@ static bool set_at_max_writeback_rate(struct cache_set *c,
+ /* Don't set max writeback rate if gc is running */
+ if (!c->gc_mark_valid)
+ return false;
+- /*
+- * Idle_counter is increased everytime when update_writeback_rate() is
+- * called. If all backing devices attached to the same cache set have
+- * identical dc->writeback_rate_update_seconds values, it is about 6
+- * rounds of update_writeback_rate() on each backing device before
+- * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
+- * to each dc->writeback_rate.rate.
+- * In order to avoid extra locking cost for counting exact dirty cached
+- * devices number, c->attached_dev_nr is used to calculate the idle
+- * throushold. It might be bigger if not all cached device are in write-
+- * back mode, but it still works well with limited extra rounds of
+- * update_writeback_rate().
+- */
+- if (atomic_inc_return(&c->idle_counter) <
+- atomic_read(&c->attached_dev_nr) * 6)
++
++ if (!idle_counter_exceeded(c))
+ return false;
+
+ if (atomic_read(&c->at_max_writeback_rate) != 1)
+@@ -195,13 +229,10 @@ static bool set_at_max_writeback_rate(struct cache_set *c,
+ dc->writeback_rate_change = 0;
+
+ /*
+- * Check c->idle_counter and c->at_max_writeback_rate agagain in case
+- * new I/O arrives during before set_at_max_writeback_rate() returns.
+- * Then the writeback rate is set to 1, and its new value should be
+- * decided via __update_writeback_rate().
++ * In case new I/O arrives during before
++ * set_at_max_writeback_rate() returns.
+ */
+- if ((atomic_read(&c->idle_counter) <
+- atomic_read(&c->attached_dev_nr) * 6) ||
++ if (!idle_counter_exceeded(c) ||
+ !atomic_read(&c->at_max_writeback_rate))
+ return false;
+
+@@ -802,13 +833,11 @@ static int bch_writeback_thread(void *arg)
+
+ /* Init */
+ #define INIT_KEYS_EACH_TIME 500000
+-#define INIT_KEYS_SLEEP_MS 100
+
+ struct sectors_dirty_init {
+ struct btree_op op;
+ unsigned int inode;
+ size_t count;
+- struct bkey start;
+ };
+
+ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
+@@ -824,11 +853,8 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
+ KEY_START(k), KEY_SIZE(k));
+
+ op->count++;
+- if (atomic_read(&b->c->search_inflight) &&
+- !(op->count % INIT_KEYS_EACH_TIME)) {
+- bkey_copy_key(&op->start, k);
+- return -EAGAIN;
+- }
++ if (!(op->count % INIT_KEYS_EACH_TIME))
++ cond_resched();
+
+ return MAP_CONTINUE;
+ }
+@@ -843,24 +869,26 @@ static int bch_root_node_dirty_init(struct cache_set *c,
+ bch_btree_op_init(&op.op, -1);
+ op.inode = d->id;
+ op.count = 0;
+- op.start = KEY(op.inode, 0, 0);
+-
+- do {
+- ret = bcache_btree(map_keys_recurse,
+- k,
+- c->root,
+- &op.op,
+- &op.start,
+- sectors_dirty_init_fn,
+- 0);
+- if (ret == -EAGAIN)
+- schedule_timeout_interruptible(
+- msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
+- else if (ret < 0) {
+- pr_warn("sectors dirty init failed, ret=%d!\n", ret);
+- break;
+- }
+- } while (ret == -EAGAIN);
++
++ ret = bcache_btree(map_keys_recurse,
++ k,
++ c->root,
++ &op.op,
++ &KEY(op.inode, 0, 0),
++ sectors_dirty_init_fn,
++ 0);
++ if (ret < 0)
++ pr_warn("sectors dirty init failed, ret=%d!\n", ret);
++
++ /*
++ * The op may be added to cache_set's btree_cache_wait
++ * in mca_cannibalize(), must ensure it is removed from
++ * the list and release btree_cache_alloc_lock before
++ * free op memory.
++ * Otherwise, the btree_cache_wait will be damaged.
++ */
++ bch_cannibalize_unlock(c);
++ finish_wait(&c->btree_cache_wait, &(&op.op)->wait);
+
+ return ret;
+ }
+@@ -904,7 +932,6 @@ static int bch_dirty_init_thread(void *arg)
+ goto out;
+ }
+ skip_nr--;
+- cond_resched();
+ }
+
+ if (p) {
+@@ -914,7 +941,6 @@ static int bch_dirty_init_thread(void *arg)
+
+ p = NULL;
+ prev_idx = cur_idx;
+- cond_resched();
+ }
+
+ out:
+@@ -945,65 +971,56 @@ void bch_sectors_dirty_init(struct bcache_device *d)
+ struct btree_iter iter;
+ struct sectors_dirty_init op;
+ struct cache_set *c = d->c;
+- struct bch_dirty_init_state *state;
+- char name[32];
++ struct bch_dirty_init_state state;
+
+ /* Just count root keys if no leaf node */
++ rw_lock(0, c->root, c->root->level);
+ if (c->root->level == 0) {
+ bch_btree_op_init(&op.op, -1);
+ op.inode = d->id;
+ op.count = 0;
+- op.start = KEY(op.inode, 0, 0);
+
+ for_each_key_filter(&c->root->keys,
+ k, &iter, bch_ptr_invalid)
+ sectors_dirty_init_fn(&op.op, c->root, k);
+- return;
+- }
+
+- state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
+- if (!state) {
+- pr_warn("sectors dirty init failed: cannot allocate memory\n");
++ rw_unlock(0, c->root);
+ return;
+ }
+
+- state->c = c;
+- state->d = d;
+- state->total_threads = bch_btre_dirty_init_thread_nr();
+- state->key_idx = 0;
+- spin_lock_init(&state->idx_lock);
+- atomic_set(&state->started, 0);
+- atomic_set(&state->enough, 0);
+- init_waitqueue_head(&state->wait);
+-
+- for (i = 0; i < state->total_threads; i++) {
+- /* Fetch latest state->enough earlier */
++ memset(&state, 0, sizeof(struct bch_dirty_init_state));
++ state.c = c;
++ state.d = d;
++ state.total_threads = bch_btre_dirty_init_thread_nr();
++ state.key_idx = 0;
++ spin_lock_init(&state.idx_lock);
++ atomic_set(&state.started, 0);
++ atomic_set(&state.enough, 0);
++ init_waitqueue_head(&state.wait);
++
++ for (i = 0; i < state.total_threads; i++) {
++ /* Fetch latest state.enough earlier */
+ smp_mb__before_atomic();
+- if (atomic_read(&state->enough))
++ if (atomic_read(&state.enough))
+ break;
+
+- state->infos[i].state = state;
+- atomic_inc(&state->started);
+- snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
+-
+- state->infos[i].thread =
+- kthread_run(bch_dirty_init_thread,
+- &state->infos[i],
+- name);
+- if (IS_ERR(state->infos[i].thread)) {
++ state.infos[i].state = &state;
++ state.infos[i].thread =
++ kthread_run(bch_dirty_init_thread, &state.infos[i],
++ "bch_dirtcnt[%d]", i);
++ if (IS_ERR(state.infos[i].thread)) {
+ pr_err("fails to run thread bch_dirty_init[%d]\n", i);
+ for (--i; i >= 0; i--)
+- kthread_stop(state->infos[i].thread);
++ kthread_stop(state.infos[i].thread);
+ goto out;
+ }
++ atomic_inc(&state.started);
+ }
+
+- wait_event_interruptible(state->wait,
+- atomic_read(&state->started) == 0 ||
+- test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+-
+ out:
+- kfree(state);
++ /* Must wait for all threads to stop. */
++ wait_event(state.wait, atomic_read(&state.started) == 0);
++ rw_unlock(0, c->root);
+ }
+
+ void bch_cached_dev_writeback_init(struct cached_dev *dc)
+diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
+index 02b2f9df73f69..31df716951f66 100644
+--- a/drivers/md/bcache/writeback.h
++++ b/drivers/md/bcache/writeback.h
+@@ -20,7 +20,7 @@
+ #define BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID 57
+ #define BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH 64
+
+-#define BCH_DIRTY_INIT_THRD_MAX 64
++#define BCH_DIRTY_INIT_THRD_MAX 12
+ /*
+ * 14 (16384ths) is chosen here as something that each backing device
+ * should be a reasonable fraction of the share, and not to blow up
+diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
+index 89a73204dbf47..0f6f74e3030f7 100644
+--- a/drivers/md/dm-cache-metadata.c
++++ b/drivers/md/dm-cache-metadata.c
+@@ -551,11 +551,13 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
+ return r;
+ }
+
+-static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
++static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd,
++ bool destroy_bm)
+ {
+ dm_sm_destroy(cmd->metadata_sm);
+ dm_tm_destroy(cmd->tm);
+- dm_block_manager_destroy(cmd->bm);
++ if (destroy_bm)
++ dm_block_manager_destroy(cmd->bm);
+ }
+
+ typedef unsigned long (*flags_mutator)(unsigned long);
+@@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
+ cmd2 = lookup(bdev);
+ if (cmd2) {
+ mutex_unlock(&table_lock);
+- __destroy_persistent_data_objects(cmd);
++ __destroy_persistent_data_objects(cmd, true);
+ kfree(cmd);
+ return cmd2;
+ }
+@@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
+ mutex_unlock(&table_lock);
+
+ if (!cmd->fail_io)
+- __destroy_persistent_data_objects(cmd);
++ __destroy_persistent_data_objects(cmd, true);
+ kfree(cmd);
+ }
+ }
+@@ -1808,14 +1810,52 @@ int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
+
+ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
+ {
+- int r;
++ int r = -EINVAL;
++ struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++ /* fail_io is double-checked with cmd->root_lock held below */
++ if (unlikely(cmd->fail_io))
++ return r;
++
++ /*
++ * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++ * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++ * shrinker associated with the block manager's bufio client vs cmd root_lock).
++ * - must take shrinker_rwsem without holding cmd->root_lock
++ */
++ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++ CACHE_MAX_CONCURRENT_LOCKS);
+
+ WRITE_LOCK(cmd);
+- __destroy_persistent_data_objects(cmd);
+- r = __create_persistent_data_objects(cmd, false);
++ if (cmd->fail_io) {
++ WRITE_UNLOCK(cmd);
++ goto out;
++ }
++
++ __destroy_persistent_data_objects(cmd, false);
++ old_bm = cmd->bm;
++ if (IS_ERR(new_bm)) {
++ DMERR("could not create block manager during abort");
++ cmd->bm = NULL;
++ r = PTR_ERR(new_bm);
++ goto out_unlock;
++ }
++
++ cmd->bm = new_bm;
++ r = __open_or_format_metadata(cmd, false);
++ if (r) {
++ cmd->bm = NULL;
++ goto out_unlock;
++ }
++ new_bm = NULL;
++out_unlock:
+ if (r)
+ cmd->fail_io = true;
+ WRITE_UNLOCK(cmd);
++ dm_block_manager_destroy(old_bm);
++out:
++ if (new_bm && !IS_ERR(new_bm))
++ dm_block_manager_destroy(new_bm);
+
+ return r;
+ }
+diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
+index b61aac00ff409..859073193f5b4 100644
+--- a/drivers/md/dm-cache-policy-smq.c
++++ b/drivers/md/dm-cache-policy-smq.c
+@@ -854,7 +854,13 @@ struct smq_policy {
+
+ struct background_tracker *bg_work;
+
+- bool migrations_allowed;
++ bool migrations_allowed:1;
++
++ /*
++ * If this is set the policy will try and clean the whole cache
++ * even if the device is not idle.
++ */
++ bool cleaner:1;
+ };
+
+ /*----------------------------------------------------------------*/
+@@ -1133,7 +1139,7 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
+ * Cache entries may not be populated. So we cannot rely on the
+ * size of the clean queue.
+ */
+- if (idle) {
++ if (idle || mq->cleaner) {
+ /*
+ * We'd like to clean everything.
+ */
+@@ -1716,11 +1722,9 @@ static void calc_hotspot_params(sector_t origin_size,
+ *hotspot_block_size /= 2u;
+ }
+
+-static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
+- sector_t origin_size,
+- sector_t cache_block_size,
+- bool mimic_mq,
+- bool migrations_allowed)
++static struct dm_cache_policy *
++__smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size,
++ bool mimic_mq, bool migrations_allowed, bool cleaner)
+ {
+ unsigned i;
+ unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
+@@ -1807,6 +1811,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
+ goto bad_btracker;
+
+ mq->migrations_allowed = migrations_allowed;
++ mq->cleaner = cleaner;
+
+ return &mq->policy;
+
+@@ -1830,21 +1835,24 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
+ sector_t origin_size,
+ sector_t cache_block_size)
+ {
+- return __smq_create(cache_size, origin_size, cache_block_size, false, true);
++ return __smq_create(cache_size, origin_size, cache_block_size,
++ false, true, false);
+ }
+
+ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
+ sector_t origin_size,
+ sector_t cache_block_size)
+ {
+- return __smq_create(cache_size, origin_size, cache_block_size, true, true);
++ return __smq_create(cache_size, origin_size, cache_block_size,
++ true, true, false);
+ }
+
+ static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
+ sector_t origin_size,
+ sector_t cache_block_size)
+ {
+- return __smq_create(cache_size, origin_size, cache_block_size, false, false);
++ return __smq_create(cache_size, origin_size, cache_block_size,
++ false, false, true);
+ }
+
+ /*----------------------------------------------------------------*/
+diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
+index bdd500447dea2..24cd28ea2c595 100644
+--- a/drivers/md/dm-cache-target.c
++++ b/drivers/md/dm-cache-target.c
+@@ -915,16 +915,16 @@ static void abort_transaction(struct cache *cache)
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return;
+
+- if (dm_cache_metadata_set_needs_check(cache->cmd)) {
+- DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
+- set_cache_mode(cache, CM_FAIL);
+- }
+-
+ DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
+ if (dm_cache_metadata_abort(cache->cmd)) {
+ DMERR("%s: failed to abort metadata transaction", dev_name);
+ set_cache_mode(cache, CM_FAIL);
+ }
++
++ if (dm_cache_metadata_set_needs_check(cache->cmd)) {
++ DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
++ set_cache_mode(cache, CM_FAIL);
++ }
+ }
+
+ static void metadata_operation_failed(struct cache *cache, const char *op, int r)
+@@ -1813,6 +1813,7 @@ static void process_deferred_bios(struct work_struct *ws)
+
+ else
+ commit_needed = process_bio(cache, bio) || commit_needed;
++ cond_resched();
+ }
+
+ if (commit_needed)
+@@ -1835,6 +1836,7 @@ static void requeue_deferred_bios(struct cache *cache)
+ while ((bio = bio_list_pop(&bios))) {
+ bio->bi_status = BLK_STS_DM_REQUEUE;
+ bio_endio(bio);
++ cond_resched();
+ }
+ }
+
+@@ -1875,6 +1877,8 @@ static void check_migrations(struct work_struct *ws)
+ r = mg_start(cache, op, NULL);
+ if (r)
+ break;
++
++ cond_resched();
+ }
+ }
+
+@@ -1895,6 +1899,7 @@ static void destroy(struct cache *cache)
+ if (cache->prison)
+ dm_bio_prison_destroy_v2(cache->prison);
+
++ cancel_delayed_work_sync(&cache->waker);
+ if (cache->wq)
+ destroy_workqueue(cache->wq);
+
+diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
+index edd22e4d65dff..f49fdc9fb5c39 100644
+--- a/drivers/md/dm-clone-target.c
++++ b/drivers/md/dm-clone-target.c
+@@ -1959,6 +1959,7 @@ static void clone_dtr(struct dm_target *ti)
+
+ mempool_exit(&clone->hydration_pool);
+ dm_kcopyd_client_destroy(clone->kcopyd_client);
++ cancel_delayed_work_sync(&clone->waker);
+ destroy_workqueue(clone->wq);
+ hash_table_exit(clone);
+ dm_clone_metadata_close(clone->cmd);
+@@ -2213,6 +2214,7 @@ static int __init dm_clone_init(void)
+ r = dm_register_target(&clone_target);
+ if (r < 0) {
+ DMERR("Failed to register clone target");
++ kmem_cache_destroy(_hydration_cache);
+ return r;
+ }
+
+diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
+index 55dccdfbcb22e..5a7d270b32c01 100644
+--- a/drivers/md/dm-core.h
++++ b/drivers/md/dm-core.h
+@@ -65,6 +65,8 @@ struct mapped_device {
+ struct gendisk *disk;
+ struct dax_device *dax_dev;
+
++ unsigned long __percpu *pending_io;
++
+ /*
+ * A list of ios that arrived while we were suspended.
+ */
+diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
+index 916b7da16de25..a428770102a37 100644
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -68,7 +68,9 @@ struct dm_crypt_io {
+ struct crypt_config *cc;
+ struct bio *base_bio;
+ u8 *integrity_metadata;
+- bool integrity_metadata_from_pool;
++ bool integrity_metadata_from_pool:1;
++ bool in_tasklet:1;
++
+ struct work_struct work;
+ struct tasklet_struct tasklet;
+
+@@ -1723,6 +1725,7 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
+ io->ctx.r.req = NULL;
+ io->integrity_metadata = NULL;
+ io->integrity_metadata_from_pool = false;
++ io->in_tasklet = false;
+ atomic_set(&io->io_pending, 0);
+ }
+
+@@ -1768,14 +1771,13 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
+ * our tasklet. In this case we need to delay bio_endio()
+ * execution to after the tasklet is done and dequeued.
+ */
+- if (tasklet_trylock(&io->tasklet)) {
+- tasklet_unlock(&io->tasklet);
+- bio_endio(base_bio);
++ if (io->in_tasklet) {
++ INIT_WORK(&io->work, kcryptd_io_bio_endio);
++ queue_work(cc->io_queue, &io->work);
+ return;
+ }
+
+- INIT_WORK(&io->work, kcryptd_io_bio_endio);
+- queue_work(cc->io_queue, &io->work);
++ bio_endio(base_bio);
+ }
+
+ /*
+@@ -1935,6 +1937,7 @@ pop_from_list:
+ io = crypt_io_from_node(rb_first(&write_tree));
+ rb_erase(&io->rb_node, &write_tree);
+ kcryptd_io_write(io);
++ cond_resched();
+ } while (!RB_EMPTY_ROOT(&write_tree));
+ blk_finish_plug(&plug);
+ }
+@@ -2228,6 +2231,7 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
+ * it is being executed with irqs disabled.
+ */
+ if (in_hardirq() || irqs_disabled()) {
++ io->in_tasklet = true;
+ tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
+ tasklet_schedule(&io->tasklet);
+ return;
+@@ -2579,7 +2583,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
+
+ static int get_key_size(char **key_string)
+ {
+- return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1;
++ return (*key_string[0] == ':') ? -EINVAL : (int)(strlen(*key_string) >> 1);
+ }
+
+ #endif /* CONFIG_KEYS */
+@@ -3435,6 +3439,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
+ return DM_MAPIO_SUBMITTED;
+ }
+
++static char hex2asc(unsigned char c)
++{
++ return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27);
++}
++
+ static void crypt_status(struct dm_target *ti, status_type_t type,
+ unsigned status_flags, char *result, unsigned maxlen)
+ {
+@@ -3453,9 +3462,12 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
+ if (cc->key_size > 0) {
+ if (cc->key_string)
+ DMEMIT(":%u:%s", cc->key_size, cc->key_string);
+- else
+- for (i = 0; i < cc->key_size; i++)
+- DMEMIT("%02x", cc->key[i]);
++ else {
++ for (i = 0; i < cc->key_size; i++) {
++ DMEMIT("%c%c", hex2asc(cc->key[i] >> 4),
++ hex2asc(cc->key[i] & 0xf));
++ }
++ }
+ } else
+ DMEMIT("-");
+
+diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
+index 2a78f68741431..a56df45366059 100644
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -1400,7 +1400,7 @@ static void start_worker(struct era *era)
+ static void stop_worker(struct era *era)
+ {
+ atomic_set(&era->suspended, 1);
+- flush_workqueue(era->wq);
++ drain_workqueue(era->wq);
+ }
+
+ /*----------------------------------------------------------------
+@@ -1570,6 +1570,12 @@ static void era_postsuspend(struct dm_target *ti)
+ }
+
+ stop_worker(era);
++
++ r = metadata_commit(era->md);
++ if (r) {
++ DMERR("%s: metadata_commit failed", __func__);
++ /* FIXME: fail mode */
++ }
+ }
+
+ static int era_preresume(struct dm_target *ti)
+diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
+index 4b94ffe6f2d4f..6f3eb161ad2a7 100644
+--- a/drivers/md/dm-flakey.c
++++ b/drivers/md/dm-flakey.c
+@@ -124,9 +124,9 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
+ * Direction r or w?
+ */
+ arg_name = dm_shift_arg(as);
+- if (!strcasecmp(arg_name, "w"))
++ if (arg_name && !strcasecmp(arg_name, "w"))
+ fc->corrupt_bio_rw = WRITE;
+- else if (!strcasecmp(arg_name, "r"))
++ else if (arg_name && !strcasecmp(arg_name, "r"))
+ fc->corrupt_bio_rw = READ;
+ else {
+ ti->error = "Invalid corrupt bio direction (r or w)";
+@@ -301,9 +301,13 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
+ */
+ bio_for_each_segment(bvec, bio, iter) {
+ if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
+- char *segment = (page_address(bio_iter_page(bio, iter))
+- + bio_iter_offset(bio, iter));
++ char *segment;
++ struct page *page = bio_iter_page(bio, iter);
++ if (unlikely(page == ZERO_PAGE(0)))
++ break;
++ segment = bvec_kmap_local(&bvec);
+ segment[corrupt_bio_byte] = fc->corrupt_bio_value;
++ kunmap_local(segment);
+ DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
+ "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
+ bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
+@@ -359,9 +363,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
+ /*
+ * Corrupt matching writes.
+ */
+- if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) {
+- if (all_corrupt_bio_flags_match(bio, fc))
+- corrupt_bio_data(bio, fc);
++ if (fc->corrupt_bio_byte) {
++ if (fc->corrupt_bio_rw == WRITE) {
++ if (all_corrupt_bio_flags_match(bio, fc))
++ corrupt_bio_data(bio, fc);
++ }
+ goto map_bio;
+ }
+
+@@ -387,13 +393,14 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+ return DM_ENDIO_DONE;
+
+ if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+- if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
+- all_corrupt_bio_flags_match(bio, fc)) {
+- /*
+- * Corrupt successful matching READs while in down state.
+- */
+- corrupt_bio_data(bio, fc);
+-
++ if (fc->corrupt_bio_byte) {
++ if ((fc->corrupt_bio_rw == READ) &&
++ all_corrupt_bio_flags_match(bio, fc)) {
++ /*
++ * Corrupt successful matching READs while in down state.
++ */
++ corrupt_bio_data(bio, fc);
++ }
+ } else if (!test_bit(DROP_WRITES, &fc->flags) &&
+ !test_bit(ERROR_WRITES, &fc->flags)) {
+ /*
+diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
+index b0c45c6ebe0bf..dc4381d683131 100644
+--- a/drivers/md/dm-init.c
++++ b/drivers/md/dm-init.c
+@@ -8,6 +8,7 @@
+ */
+
+ #include <linux/ctype.h>
++#include <linux/delay.h>
+ #include <linux/device.h>
+ #include <linux/device-mapper.h>
+ #include <linux/init.h>
+@@ -18,12 +19,17 @@
+ #define DM_MAX_DEVICES 256
+ #define DM_MAX_TARGETS 256
+ #define DM_MAX_STR_SIZE 4096
++#define DM_MAX_WAITFOR 256
+
+ static char *create;
+
++static char *waitfor[DM_MAX_WAITFOR];
++
+ /*
+ * Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+ * Table format: <start_sector> <num_sectors> <target_type> <target_args>
++ * Block devices to wait for to become available before setting up tables:
++ * dm-mod.waitfor=<device1>[,..,<deviceN>]
+ *
+ * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format
+ * details.
+@@ -266,7 +272,7 @@ static int __init dm_init_init(void)
+ struct dm_device *dev;
+ LIST_HEAD(devices);
+ char *str;
+- int r;
++ int i, r;
+
+ if (!create)
+ return 0;
+@@ -286,6 +292,17 @@ static int __init dm_init_init(void)
+ DMINFO("waiting for all devices to be available before creating mapped devices");
+ wait_for_device_probe();
+
++ for (i = 0; i < ARRAY_SIZE(waitfor); i++) {
++ if (waitfor[i]) {
++ DMINFO("waiting for device %s ...", waitfor[i]);
++ while (!dm_get_dev_t(waitfor[i]))
++ msleep(5);
++ }
++ }
++
++ if (waitfor[0])
++ DMINFO("all devices available");
++
+ list_for_each_entry(dev, &devices, list) {
+ if (dm_early_create(&dev->dmi, dev->table,
+ dev->target_args_array))
+@@ -301,3 +318,6 @@ late_initcall(dm_init_init);
+
+ module_param(create, charp, 0);
+ MODULE_PARM_DESC(create, "Create a mapped device in early boot");
++
++module_param_array(waitfor, charp, NULL, 0);
++MODULE_PARM_DESC(waitfor, "Devices to wait for before setting up tables");
+diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
+index dc03b70f6e65c..455788b6e5a1c 100644
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -31,11 +31,11 @@
+ #define DEFAULT_BUFFER_SECTORS 128
+ #define DEFAULT_JOURNAL_WATERMARK 50
+ #define DEFAULT_SYNC_MSEC 10000
+-#define DEFAULT_MAX_JOURNAL_SECTORS 131072
++#define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192)
+ #define MIN_LOG2_INTERLEAVE_SECTORS 3
+ #define MAX_LOG2_INTERLEAVE_SECTORS 31
+ #define METADATA_WORKQUEUE_MAX_ACTIVE 16
+-#define RECALC_SECTORS 32768
++#define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048)
+ #define RECALC_WRITE_SUPER 16
+ #define BITMAP_BLOCK_SIZE 4096 /* don't change it */
+ #define BITMAP_FLUSH_INTERVAL (10 * HZ)
+@@ -259,6 +259,7 @@ struct dm_integrity_c {
+
+ struct completion crypto_backoff;
+
++ bool wrote_to_journal;
+ bool journal_uptodate;
+ bool just_formatted;
+ bool recalculate_flag;
+@@ -2361,6 +2362,8 @@ static void integrity_commit(struct work_struct *w)
+ if (!commit_sections)
+ goto release_flush_bios;
+
++ ic->wrote_to_journal = true;
++
+ i = commit_start;
+ for (n = 0; n < commit_sections; n++) {
+ for (j = 0; j < ic->journal_section_entries; j++) {
+@@ -2459,9 +2462,11 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
+ dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
+ sec &= ~(sector_t)(ic->sectors_per_block - 1);
+ }
++ if (unlikely(sec >= ic->provided_data_sectors)) {
++ journal_entry_set_unused(je);
++ continue;
++ }
+ }
+- if (unlikely(sec >= ic->provided_data_sectors))
+- continue;
+ get_area_and_offset(ic, sec, &area, &offset);
+ restore_last_bytes(ic, access_journal_data(ic, i, j), je);
+ for (k = j + 1; k < ic->journal_section_entries; k++) {
+@@ -2573,10 +2578,6 @@ static void integrity_writer(struct work_struct *w)
+
+ unsigned prev_free_sectors;
+
+- /* the following test is not needed, but it tests the replay code */
+- if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev)
+- return;
+-
+ spin_lock_irq(&ic->endio_wait.lock);
+ write_start = ic->committed_section;
+ write_sections = ic->n_committed_sections;
+@@ -3083,10 +3084,17 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
+ drain_workqueue(ic->commit_wq);
+
+ if (ic->mode == 'J') {
+- if (ic->meta_dev)
+- queue_work(ic->writer_wq, &ic->writer_work);
++ queue_work(ic->writer_wq, &ic->writer_work);
+ drain_workqueue(ic->writer_wq);
+ dm_integrity_flush_buffers(ic, true);
++ if (ic->wrote_to_journal) {
++ init_journal(ic, ic->free_section,
++ ic->journal_sections - ic->free_section, ic->commit_seq);
++ if (ic->free_section) {
++ init_journal(ic, 0, ic->free_section,
++ next_commit_seq(ic->commit_seq));
++ }
++ }
+ }
+
+ if (ic->mode == 'B') {
+@@ -3114,6 +3122,8 @@ static void dm_integrity_resume(struct dm_target *ti)
+
+ DEBUG_print("resume\n");
+
++ ic->wrote_to_journal = false;
++
+ if (ic->provided_data_sectors != old_provided_data_sectors) {
+ if (ic->provided_data_sectors > old_provided_data_sectors &&
+ ic->mode == 'B' &&
+@@ -4381,6 +4391,7 @@ try_smaller_buffer:
+ }
+
+ if (ic->internal_hash) {
++ size_t recalc_tags_size;
+ ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
+ if (!ic->recalc_wq ) {
+ ti->error = "Cannot allocate workqueue";
+@@ -4394,8 +4405,10 @@ try_smaller_buffer:
+ r = -ENOMEM;
+ goto bad;
+ }
+- ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block,
+- ic->tag_size, GFP_KERNEL);
++ recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
++ if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
++ recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
++ ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
+ if (!ic->recalc_tags) {
+ ti->error = "Cannot allocate tags for recalculating";
+ r = -ENOMEM;
+@@ -4473,8 +4486,6 @@ try_smaller_buffer:
+ }
+
+ if (should_write_sb) {
+- int r;
+-
+ init_journal(ic, 0, ic->journal_sections, 0);
+ r = dm_integrity_failed(ic);
+ if (unlikely(r)) {
+@@ -4528,6 +4539,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
+ BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
+ BUG_ON(!list_empty(&ic->wait_list));
+
++ if (ic->mode == 'B')
++ cancel_delayed_work_sync(&ic->bitmap_flush_work);
+ if (ic->metadata_wq)
+ destroy_workqueue(ic->metadata_wq);
+ if (ic->wait_wq)
+@@ -4619,11 +4632,13 @@ static int __init dm_integrity_init(void)
+ }
+
+ r = dm_register_target(&integrity_target);
+-
+- if (r < 0)
++ if (r < 0) {
+ DMERR("register failed %d", r);
++ kmem_cache_destroy(journal_io_cache);
++ return r;
++ }
+
+- return r;
++ return 0;
+ }
+
+ static void __exit dm_integrity_exit(void)
+diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
+index 21fe8652b095b..815c41e1ebdb8 100644
+--- a/drivers/md/dm-ioctl.c
++++ b/drivers/md/dm-ioctl.c
+@@ -18,6 +18,7 @@
+ #include <linux/dm-ioctl.h>
+ #include <linux/hdreg.h>
+ #include <linux/compat.h>
++#include <linux/nospec.h>
+
+ #include <linux/uaccess.h>
+ #include <linux/ima.h>
+@@ -481,7 +482,7 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
+ dm_table_event(table);
+ dm_put_live_table(hc->md, srcu_idx);
+
+- if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr))
++ if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr, false))
+ param->flags |= DM_UEVENT_GENERATED_FLAG;
+
+ md = hc->md;
+@@ -654,7 +655,7 @@ static void list_version_get_needed(struct target_type *tt, void *needed_param)
+ size_t *needed = needed_param;
+
+ *needed += sizeof(struct dm_target_versions);
+- *needed += strlen(tt->name);
++ *needed += strlen(tt->name) + 1;
+ *needed += ALIGN_MASK;
+ }
+
+@@ -719,7 +720,7 @@ static int __list_versions(struct dm_ioctl *param, size_t param_size, const char
+ iter_info.old_vers = NULL;
+ iter_info.vers = vers;
+ iter_info.flags = 0;
+- iter_info.end = (char *)vers+len;
++ iter_info.end = (char *)vers + needed;
+
+ /*
+ * Now loop through filling out the names & versions.
+@@ -988,7 +989,7 @@ static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_si
+
+ dm_ima_measure_on_device_remove(md, false);
+
+- if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
++ if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr, false))
+ param->flags |= DM_UEVENT_GENERATED_FLAG;
+
+ dm_put(md);
+@@ -1122,6 +1123,7 @@ static int do_resume(struct dm_ioctl *param)
+ struct hash_cell *hc;
+ struct mapped_device *md;
+ struct dm_table *new_map, *old_map = NULL;
++ bool need_resize_uevent = false;
+
+ down_write(&_hash_lock);
+
+@@ -1142,6 +1144,8 @@ static int do_resume(struct dm_ioctl *param)
+
+ /* Do we need to load a new map ? */
+ if (new_map) {
++ sector_t old_size, new_size;
++
+ /* Suspend if it isn't already suspended */
+ if (param->flags & DM_SKIP_LOCKFS_FLAG)
+ suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
+@@ -1150,6 +1154,7 @@ static int do_resume(struct dm_ioctl *param)
+ if (!dm_suspended_md(md))
+ dm_suspend(md, suspend_flags);
+
++ old_size = dm_get_size(md);
+ old_map = dm_swap_table(md, new_map);
+ if (IS_ERR(old_map)) {
+ dm_sync_table(md);
+@@ -1157,6 +1162,9 @@ static int do_resume(struct dm_ioctl *param)
+ dm_put(md);
+ return PTR_ERR(old_map);
+ }
++ new_size = dm_get_size(md);
++ if (old_size && new_size && old_size != new_size)
++ need_resize_uevent = true;
+
+ if (dm_table_get_mode(new_map) & FMODE_WRITE)
+ set_disk_ro(dm_disk(md), 0);
+@@ -1169,7 +1177,7 @@ static int do_resume(struct dm_ioctl *param)
+ if (!r) {
+ dm_ima_measure_on_device_resume(md, new_map ? true : false);
+
+- if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
++ if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr, need_resize_uevent))
+ param->flags |= DM_UEVENT_GENERATED_FLAG;
+ }
+ }
+@@ -1525,11 +1533,12 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s
+ has_new_map = true;
+ }
+
+- param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+-
+- __dev_status(hc->md, param);
+ md = hc->md;
+ up_write(&_hash_lock);
++
++ param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
++ __dev_status(md, param);
++
+ if (old_map) {
+ dm_sync_table(md);
+ dm_table_destroy(old_map);
+@@ -1788,6 +1797,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
+ if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
+ return NULL;
+
++ cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls));
+ *ioctl_flags = _ioctls[cmd].flags;
+ return _ioctls[cmd].fn;
+ }
+diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
+index 1ecf75ef276a4..b40741bedfd43 100644
+--- a/drivers/md/dm-log.c
++++ b/drivers/md/dm-log.c
+@@ -415,8 +415,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
+ /*
+ * Work out how many "unsigned long"s we need to hold the bitset.
+ */
+- bitset_size = dm_round_up(region_count,
+- sizeof(*lc->clean_bits) << BYTE_SHIFT);
++ bitset_size = dm_round_up(region_count, BITS_PER_LONG);
+ bitset_size >>= BYTE_SHIFT;
+
+ lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
+@@ -616,7 +615,7 @@ static int disk_resume(struct dm_dirty_log *log)
+ log_clear_bit(lc, lc->clean_bits, i);
+
+ /* clear any old bits -- device has shrunk */
+- for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
++ for (i = lc->region_count; i % BITS_PER_LONG; i++)
+ log_clear_bit(lc, lc->clean_bits, i);
+
+ /* copy clean across to sync */
+diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c
+index 1856a1b125cc1..82f2a06153dc0 100644
+--- a/drivers/md/dm-ps-historical-service-time.c
++++ b/drivers/md/dm-ps-historical-service-time.c
+@@ -432,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps,
+ {
+ struct selector *s = ps->context;
+ struct path_info *pi = NULL, *best = NULL;
+- u64 time_now = sched_clock();
++ u64 time_now = ktime_get_ns();
+ struct dm_path *ret = NULL;
+ unsigned long flags;
+
+@@ -473,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path,
+
+ static u64 path_service_time(struct path_info *pi, u64 start_time)
+ {
+- u64 sched_now = ktime_get_ns();
++ u64 now = ktime_get_ns();
+
+ /* if a previous disk request has finished after this IO was
+ * sent to the hardware, pretend the submission happened
+@@ -482,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time)
+ if (time_after64(pi->last_finish, start_time))
+ start_time = pi->last_finish;
+
+- pi->last_finish = sched_now;
+- if (time_before64(sched_now, start_time))
++ pi->last_finish = now;
++ if (time_before64(now, start_time))
+ return 0;
+
+- return sched_now - start_time;
++ return now - start_time;
+ }
+
+ static int hst_end_io(struct path_selector *ps, struct dm_path *path,
+diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
+index d9ef52159a22b..8d489933d5792 100644
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -1001,12 +1001,13 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
+ static int validate_raid_redundancy(struct raid_set *rs)
+ {
+ unsigned int i, rebuild_cnt = 0;
+- unsigned int rebuilds_per_group = 0, copies;
++ unsigned int rebuilds_per_group = 0, copies, raid_disks;
+ unsigned int group_size, last_group_start;
+
+- for (i = 0; i < rs->md.raid_disks; i++)
+- if (!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
+- !rs->dev[i].rdev.sb_page)
++ for (i = 0; i < rs->raid_disks; i++)
++ if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) &&
++ ((!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
++ !rs->dev[i].rdev.sb_page)))
+ rebuild_cnt++;
+
+ switch (rs->md.level) {
+@@ -1046,8 +1047,9 @@ static int validate_raid_redundancy(struct raid_set *rs)
+ * A A B B C
+ * C D D E E
+ */
++ raid_disks = min(rs->raid_disks, rs->md.raid_disks);
+ if (__is_raid10_near(rs->md.new_layout)) {
+- for (i = 0; i < rs->md.raid_disks; i++) {
++ for (i = 0; i < raid_disks; i++) {
+ if (!(i % copies))
+ rebuilds_per_group = 0;
+ if ((!rs->dev[i].rdev.sb_page ||
+@@ -1070,10 +1072,10 @@ static int validate_raid_redundancy(struct raid_set *rs)
+ * results in the need to treat the last (potentially larger)
+ * set differently.
+ */
+- group_size = (rs->md.raid_disks / copies);
+- last_group_start = (rs->md.raid_disks / group_size) - 1;
++ group_size = (raid_disks / copies);
++ last_group_start = (raid_disks / group_size) - 1;
+ last_group_start *= group_size;
+- for (i = 0; i < rs->md.raid_disks; i++) {
++ for (i = 0; i < raid_disks; i++) {
+ if (!(i % copies) && !(i > last_group_start))
+ rebuilds_per_group = 0;
+ if ((!rs->dev[i].rdev.sb_page ||
+@@ -1588,7 +1590,7 @@ static sector_t __rdev_sectors(struct raid_set *rs)
+ {
+ int i;
+
+- for (i = 0; i < rs->md.raid_disks; i++) {
++ for (i = 0; i < rs->raid_disks; i++) {
+ struct md_rdev *rdev = &rs->dev[i].rdev;
+
+ if (!test_bit(Journal, &rdev->flags) &&
+@@ -3256,8 +3258,7 @@ size_check:
+ r = md_start(&rs->md);
+ if (r) {
+ ti->error = "Failed to start raid array";
+- mddev_unlock(&rs->md);
+- goto bad_md_start;
++ goto bad_unlock;
+ }
+
+ /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
+@@ -3265,8 +3266,7 @@ size_check:
+ r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
+ if (r) {
+ ti->error = "Failed to set raid4/5/6 journal mode";
+- mddev_unlock(&rs->md);
+- goto bad_journal_mode_set;
++ goto bad_unlock;
+ }
+ }
+
+@@ -3277,14 +3277,14 @@ size_check:
+ if (rs_is_raid456(rs)) {
+ r = rs_set_raid456_stripe_cache(rs);
+ if (r)
+- goto bad_stripe_cache;
++ goto bad_unlock;
+ }
+
+ /* Now do an early reshape check */
+ if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
+ r = rs_check_reshape(rs);
+ if (r)
+- goto bad_check_reshape;
++ goto bad_unlock;
+
+ /* Restore new, ctr requested layout to perform check */
+ rs_config_restore(rs, &rs_layout);
+@@ -3293,7 +3293,7 @@ size_check:
+ r = rs->md.pers->check_reshape(&rs->md);
+ if (r) {
+ ti->error = "Reshape check failed";
+- goto bad_check_reshape;
++ goto bad_unlock;
+ }
+ }
+ }
+@@ -3304,11 +3304,9 @@ size_check:
+ mddev_unlock(&rs->md);
+ return 0;
+
+-bad_md_start:
+-bad_journal_mode_set:
+-bad_stripe_cache:
+-bad_check_reshape:
++bad_unlock:
+ md_stop(&rs->md);
++ mddev_unlock(&rs->md);
+ bad:
+ raid_set_free(rs);
+
+@@ -3319,7 +3317,9 @@ static void raid_dtr(struct dm_target *ti)
+ {
+ struct raid_set *rs = ti->private;
+
++ mddev_lock_nointr(&rs->md);
+ md_stop(&rs->md);
++ mddev_unlock(&rs->md);
+ raid_set_free(rs);
+ }
+
+@@ -3512,7 +3512,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
+ {
+ struct raid_set *rs = ti->private;
+ struct mddev *mddev = &rs->md;
+- struct r5conf *conf = mddev->private;
++ struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
+ int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
+ unsigned long recovery;
+ unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
+@@ -3771,13 +3771,13 @@ static int raid_iterate_devices(struct dm_target *ti,
+ unsigned int i;
+ int r = 0;
+
+- for (i = 0; !r && i < rs->md.raid_disks; i++)
+- if (rs->dev[i].data_dev)
+- r = fn(ti,
+- rs->dev[i].data_dev,
+- 0, /* No offset on data devs */
+- rs->md.dev_sectors,
+- data);
++ for (i = 0; !r && i < rs->raid_disks; i++) {
++ if (rs->dev[i].data_dev) {
++ r = fn(ti, rs->dev[i].data_dev,
++ 0, /* No offset on data devs */
++ rs->md.dev_sectors, data);
++ }
++ }
+
+ return r;
+ }
+@@ -3822,7 +3822,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
+
+ memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
+
+- for (i = 0; i < mddev->raid_disks; i++) {
++ for (i = 0; i < rs->raid_disks; i++) {
+ r = &rs->dev[i].rdev;
+ /* HM FIXME: enhance journal device recovery processing */
+ if (test_bit(Journal, &r->flags))
+diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
+index a896dea9750e4..53a9b16c7b2e6 100644
+--- a/drivers/md/dm-rq.c
++++ b/drivers/md/dm-rq.c
+@@ -500,8 +500,13 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+
+ if (unlikely(!ti)) {
+ int srcu_idx;
+- struct dm_table *map = dm_get_live_table(md, &srcu_idx);
++ struct dm_table *map;
+
++ map = dm_get_live_table(md, &srcu_idx);
++ if (unlikely(!map)) {
++ dm_put_live_table(md, srcu_idx);
++ return BLK_STS_RESOURCE;
++ }
+ ti = dm_table_find_target(map, 0);
+ dm_put_live_table(md, srcu_idx);
+ }
+diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
+index 35d368c418d03..9f71d169587f5 100644
+--- a/drivers/md/dm-stats.c
++++ b/drivers/md/dm-stats.c
+@@ -188,19 +188,25 @@ static int dm_stat_in_flight(struct dm_stat_shared *shared)
+ atomic_read(&shared->in_flight[WRITE]);
+ }
+
+-void dm_stats_init(struct dm_stats *stats)
++int dm_stats_init(struct dm_stats *stats)
+ {
+ int cpu;
+ struct dm_stats_last_position *last;
+
+ mutex_init(&stats->mutex);
+ INIT_LIST_HEAD(&stats->list);
++ stats->precise_timestamps = false;
+ stats->last = alloc_percpu(struct dm_stats_last_position);
++ if (!stats->last)
++ return -ENOMEM;
++
+ for_each_possible_cpu(cpu) {
+ last = per_cpu_ptr(stats->last, cpu);
+ last->last_sector = (sector_t)ULLONG_MAX;
+ last->last_rw = UINT_MAX;
+ }
++
++ return 0;
+ }
+
+ void dm_stats_cleanup(struct dm_stats *stats)
+@@ -224,6 +230,7 @@ void dm_stats_cleanup(struct dm_stats *stats)
+ atomic_read(&shared->in_flight[READ]),
+ atomic_read(&shared->in_flight[WRITE]));
+ }
++ cond_resched();
+ }
+ dm_stat_free(&s->rcu_head);
+ }
+@@ -231,6 +238,22 @@ void dm_stats_cleanup(struct dm_stats *stats)
+ mutex_destroy(&stats->mutex);
+ }
+
++static void dm_stats_recalc_precise_timestamps(struct dm_stats *stats)
++{
++ struct list_head *l;
++ struct dm_stat *tmp_s;
++ bool precise_timestamps = false;
++
++ list_for_each(l, &stats->list) {
++ tmp_s = container_of(l, struct dm_stat, list_entry);
++ if (tmp_s->stat_flags & STAT_PRECISE_TIMESTAMPS) {
++ precise_timestamps = true;
++ break;
++ }
++ }
++ stats->precise_timestamps = precise_timestamps;
++}
++
+ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
+ sector_t step, unsigned stat_flags,
+ unsigned n_histogram_entries,
+@@ -313,6 +336,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
+ for (ni = 0; ni < n_entries; ni++) {
+ atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
+ atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
++ cond_resched();
+ }
+
+ if (s->n_histogram_entries) {
+@@ -325,6 +349,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
+ for (ni = 0; ni < n_entries; ni++) {
+ s->stat_shared[ni].tmp.histogram = hi;
+ hi += s->n_histogram_entries + 1;
++ cond_resched();
+ }
+ }
+
+@@ -345,6 +370,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
+ for (ni = 0; ni < n_entries; ni++) {
+ p[ni].histogram = hi;
+ hi += s->n_histogram_entries + 1;
++ cond_resched();
+ }
+ }
+ }
+@@ -376,6 +402,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
+ }
+ ret_id = s->id;
+ list_add_tail_rcu(&s->list_entry, l);
++
++ dm_stats_recalc_precise_timestamps(stats);
++
+ mutex_unlock(&stats->mutex);
+
+ resume_callback(md);
+@@ -418,6 +447,9 @@ static int dm_stats_delete(struct dm_stats *stats, int id)
+ }
+
+ list_del_rcu(&s->list_entry);
++
++ dm_stats_recalc_precise_timestamps(stats);
++
+ mutex_unlock(&stats->mutex);
+
+ /*
+@@ -474,6 +506,7 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
+ }
+ DMEMIT("\n");
+ }
++ cond_resched();
+ }
+ mutex_unlock(&stats->mutex);
+
+@@ -621,13 +654,14 @@ static void __dm_stat_bio(struct dm_stat *s, int bi_rw,
+
+ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
+ sector_t bi_sector, unsigned bi_sectors, bool end,
+- unsigned long duration_jiffies,
++ unsigned long start_time,
+ struct dm_stats_aux *stats_aux)
+ {
+ struct dm_stat *s;
+ sector_t end_sector;
+ struct dm_stats_last_position *last;
+ bool got_precise_time;
++ unsigned long duration_jiffies = 0;
+
+ if (unlikely(!bi_sectors))
+ return;
+@@ -647,16 +681,16 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
+ ));
+ WRITE_ONCE(last->last_sector, end_sector);
+ WRITE_ONCE(last->last_rw, bi_rw);
+- }
++ } else
++ duration_jiffies = jiffies - start_time;
+
+ rcu_read_lock();
+
+ got_precise_time = false;
+ list_for_each_entry_rcu(s, &stats->list, list_entry) {
+ if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) {
+- if (!end)
+- stats_aux->duration_ns = ktime_to_ns(ktime_get());
+- else
++ /* start (!end) duration_ns is set by DM core's alloc_io() */
++ if (end)
+ stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns;
+ got_precise_time = true;
+ }
+@@ -750,6 +784,7 @@ static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
+ local_irq_enable();
+ }
+ }
++ cond_resched();
+ }
+ }
+
+@@ -865,6 +900,8 @@ static int dm_stats_print(struct dm_stats *stats, int id,
+
+ if (unlikely(sz + 1 >= maxlen))
+ goto buffer_overflow;
++
++ cond_resched();
+ }
+
+ if (clear)
+diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h
+index 2ddfae678f320..ee32b099f1cf7 100644
+--- a/drivers/md/dm-stats.h
++++ b/drivers/md/dm-stats.h
+@@ -13,8 +13,7 @@ struct dm_stats {
+ struct mutex mutex;
+ struct list_head list; /* list of struct dm_stat */
+ struct dm_stats_last_position __percpu *last;
+- sector_t last_sector;
+- unsigned last_rw;
++ bool precise_timestamps;
+ };
+
+ struct dm_stats_aux {
+@@ -22,7 +21,7 @@ struct dm_stats_aux {
+ unsigned long long duration_ns;
+ };
+
+-void dm_stats_init(struct dm_stats *st);
++int dm_stats_init(struct dm_stats *st);
+ void dm_stats_cleanup(struct dm_stats *st);
+
+ struct mapped_device;
+@@ -32,7 +31,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
+
+ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
+ sector_t bi_sector, unsigned bi_sectors, bool end,
+- unsigned long duration_jiffies,
++ unsigned long start_time,
+ struct dm_stats_aux *aux);
+
+ static inline bool dm_stats_used(struct dm_stats *st)
+@@ -40,4 +39,10 @@ static inline bool dm_stats_used(struct dm_stats *st)
+ return !list_empty(&st->list);
+ }
+
++static inline void dm_stats_record_start(struct dm_stats *stats, struct dm_stats_aux *aux)
++{
++ if (unlikely(stats->precise_timestamps))
++ aux->duration_ns = ktime_to_ns(ktime_get());
++}
++
+ #endif
+diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
+index 2111daaacabaf..46ec4590f62f6 100644
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -1191,21 +1191,12 @@ struct dm_keyslot_manager {
+ struct mapped_device *md;
+ };
+
+-struct dm_keyslot_evict_args {
+- const struct blk_crypto_key *key;
+- int err;
+-};
+-
+ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+ {
+- struct dm_keyslot_evict_args *args = data;
+- int err;
++ const struct blk_crypto_key *key = data;
+
+- err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key);
+- if (!args->err)
+- args->err = err;
+- /* Always try to evict the key from all devices. */
++ blk_crypto_evict_key(bdev_get_queue(dev->bdev), key);
+ return 0;
+ }
+
+@@ -1220,7 +1211,6 @@ static int dm_keyslot_evict(struct blk_keyslot_manager *ksm,
+ struct dm_keyslot_manager,
+ ksm);
+ struct mapped_device *md = dksm->md;
+- struct dm_keyslot_evict_args args = { key };
+ struct dm_table *t;
+ int srcu_idx;
+ int i;
+@@ -1233,10 +1223,11 @@ static int dm_keyslot_evict(struct blk_keyslot_manager *ksm,
+ ti = dm_table_get_target(t, i);
+ if (!ti->type->iterate_devices)
+ continue;
+- ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args);
++ ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
++ (void *)key);
+ }
+ dm_put_live_table(md, srcu_idx);
+- return args.err;
++ return 0;
+ }
+
+ static const struct blk_ksm_ll_ops dm_ksm_ll_ops = {
+diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
+index c88ed14d49e65..44a5978bcb8c4 100644
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -724,6 +724,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
+ goto bad_cleanup_data_sm;
+ }
+
++ /*
++ * For pool metadata opening process, root setting is redundant
++ * because it will be set again in __begin_transaction(). But dm
++ * pool aborting process really needs to get last transaction's
++ * root to avoid accessing broken btree.
++ */
++ pmd->root = le64_to_cpu(disk_super->data_mapping_root);
++ pmd->details_root = le64_to_cpu(disk_super->device_details_root);
++
+ __setup_btree_details(pmd);
+ dm_bm_unlock(sblock);
+
+@@ -776,13 +785,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f
+ return r;
+ }
+
+-static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
++static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
++ bool destroy_bm)
+ {
+ dm_sm_destroy(pmd->data_sm);
+ dm_sm_destroy(pmd->metadata_sm);
+ dm_tm_destroy(pmd->nb_tm);
+ dm_tm_destroy(pmd->tm);
+- dm_block_manager_destroy(pmd->bm);
++ if (destroy_bm)
++ dm_block_manager_destroy(pmd->bm);
+ }
+
+ static int __begin_transaction(struct dm_pool_metadata *pmd)
+@@ -989,7 +1000,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
+ }
+ pmd_write_unlock(pmd);
+ if (!pmd->fail_io)
+- __destroy_persistent_data_objects(pmd);
++ __destroy_persistent_data_objects(pmd, true);
+
+ kfree(pmd);
+ return 0;
+@@ -1767,13 +1778,15 @@ int dm_thin_remove_range(struct dm_thin_device *td,
+
+ int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
+ {
+- int r;
++ int r = -EINVAL;
+ uint32_t ref_count;
+
+ down_read(&pmd->root_lock);
+- r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
+- if (!r)
+- *result = (ref_count > 1);
++ if (!pmd->fail_io) {
++ r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
++ if (!r)
++ *result = (ref_count > 1);
++ }
+ up_read(&pmd->root_lock);
+
+ return r;
+@@ -1781,10 +1794,11 @@ int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *re
+
+ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
+ {
+- int r = 0;
++ int r = -EINVAL;
+
+ pmd_write_lock(pmd);
+- r = dm_sm_inc_blocks(pmd->data_sm, b, e);
++ if (!pmd->fail_io)
++ r = dm_sm_inc_blocks(pmd->data_sm, b, e);
+ pmd_write_unlock(pmd);
+
+ return r;
+@@ -1792,10 +1806,11 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
+
+ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
+ {
+- int r = 0;
++ int r = -EINVAL;
+
+ pmd_write_lock(pmd);
+- r = dm_sm_dec_blocks(pmd->data_sm, b, e);
++ if (!pmd->fail_io)
++ r = dm_sm_dec_blocks(pmd->data_sm, b, e);
+ pmd_write_unlock(pmd);
+
+ return r;
+@@ -1888,19 +1903,52 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
+ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
+ {
+ int r = -EINVAL;
++ struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
++
++ /* fail_io is double-checked with pmd->root_lock held below */
++ if (unlikely(pmd->fail_io))
++ return r;
++
++ /*
++ * Replacement block manager (new_bm) is created and old_bm destroyed outside of
++ * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
++ * shrinker associated with the block manager's bufio client vs pmd root_lock).
++ * - must take shrinker_rwsem without holding pmd->root_lock
++ */
++ new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
++ THIN_MAX_CONCURRENT_LOCKS);
+
+ pmd_write_lock(pmd);
+- if (pmd->fail_io)
++ if (pmd->fail_io) {
++ pmd_write_unlock(pmd);
+ goto out;
++ }
+
+ __set_abort_with_changes_flags(pmd);
+- __destroy_persistent_data_objects(pmd);
+- r = __create_persistent_data_objects(pmd, false);
++ __destroy_persistent_data_objects(pmd, false);
++ old_bm = pmd->bm;
++ if (IS_ERR(new_bm)) {
++ DMERR("could not create block manager during abort");
++ pmd->bm = NULL;
++ r = PTR_ERR(new_bm);
++ goto out_unlock;
++ }
++
++ pmd->bm = new_bm;
++ r = __open_or_format_metadata(pmd, false);
++ if (r) {
++ pmd->bm = NULL;
++ goto out_unlock;
++ }
++ new_bm = NULL;
++out_unlock:
+ if (r)
+ pmd->fail_io = true;
+-
+-out:
+ pmd_write_unlock(pmd);
++ dm_block_manager_destroy(old_bm);
++out:
++ if (new_bm && !IS_ERR(new_bm))
++ dm_block_manager_destroy(new_bm);
+
+ return r;
+ }
+@@ -2073,10 +2121,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
+ dm_sm_threshold_fn fn,
+ void *context)
+ {
+- int r;
++ int r = -EINVAL;
+
+ pmd_write_lock_in_core(pmd);
+- r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
++ if (!pmd->fail_io) {
++ r = dm_sm_register_threshold_callback(pmd->metadata_sm,
++ threshold, fn, context);
++ }
+ pmd_write_unlock(pmd);
+
+ return r;
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index 4c67b77c23c1b..1cf652670a7fe 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -2217,6 +2217,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
+ throttle_work_update(&pool->throttle);
+ dm_pool_issue_prefetches(pool->pmd);
+ }
++ cond_resched();
+ }
+ blk_finish_plug(&plug);
+ }
+@@ -2299,6 +2300,7 @@ static void process_thin_deferred_cells(struct thin_c *tc)
+ else
+ pool->process_cell(tc, cell);
+ }
++ cond_resched();
+ } while (!list_empty(&cells));
+ }
+
+@@ -2907,6 +2909,8 @@ static void __pool_destroy(struct pool *pool)
+ dm_bio_prison_destroy(pool->prison);
+ dm_kcopyd_client_destroy(pool->copier);
+
++ cancel_delayed_work_sync(&pool->waker);
++ cancel_delayed_work_sync(&pool->no_space_timeout);
+ if (pool->wq)
+ destroy_workqueue(pool->wq);
+
+@@ -3379,6 +3383,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ pt->low_water_blocks = low_water_blocks;
+ pt->adjusted_pf = pt->requested_pf = pf;
+ ti->num_flush_bios = 1;
++ ti->limit_swap_bios = true;
+
+ /*
+ * Only need to enable discards if the pool should pass
+@@ -3401,8 +3406,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ calc_metadata_threshold(pt),
+ metadata_low_callback,
+ pool);
+- if (r)
++ if (r) {
++ ti->error = "Error registering metadata threshold";
+ goto out_flags_changed;
++ }
+
+ dm_pool_register_pre_commit_callback(pool->pmd,
+ metadata_pre_commit_callback, pool);
+@@ -3564,20 +3571,28 @@ static int pool_preresume(struct dm_target *ti)
+ */
+ r = bind_control_target(pool, ti);
+ if (r)
+- return r;
++ goto out;
+
+ r = maybe_resize_data_dev(ti, &need_commit1);
+ if (r)
+- return r;
++ goto out;
+
+ r = maybe_resize_metadata_dev(ti, &need_commit2);
+ if (r)
+- return r;
++ goto out;
+
+ if (need_commit1 || need_commit2)
+ (void) commit(pool);
++out:
++ /*
++ * When a thin-pool is PM_FAIL, it cannot be rebuilt if
++ * bio is in deferred list. Therefore need to return 0
++ * to allow pool_resume() to flush IO.
++ */
++ if (r && get_pool_mode(pool) == PM_FAIL)
++ r = 0;
+
+- return 0;
++ return r;
+ }
+
+ static void pool_suspend_active_thins(struct pool *pool)
+@@ -4249,6 +4264,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
+ goto bad;
+
+ ti->num_flush_bios = 1;
++ ti->limit_swap_bios = true;
+ ti->flush_supported = true;
+ ti->per_io_data_size = sizeof(struct dm_thin_endio_hook);
+
+diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
+index 88288c8d6bc8c..1a2509623874f 100644
+--- a/drivers/md/dm-verity-target.c
++++ b/drivers/md/dm-verity-target.c
+@@ -482,7 +482,7 @@ static int verity_verify_io(struct dm_verity_io *io)
+ sector_t cur_block = io->block + b;
+ struct ahash_request *req = verity_io_hash_req(v, io);
+
+- if (v->validated_blocks &&
++ if (v->validated_blocks && bio->bi_status == BLK_STS_OK &&
+ likely(test_bit(cur_block, v->validated_blocks))) {
+ verity_bv_skip_block(v, io, &io->iter);
+ continue;
+@@ -1312,6 +1312,7 @@ bad:
+
+ static struct target_type verity_target = {
+ .name = "verity",
++ .features = DM_TARGET_IMMUTABLE,
+ .version = {1, 8, 0},
+ .module = THIS_MODULE,
+ .ctr = verity_ctr,
+diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
+index 18320444fb0a9..dfb55fe09ce13 100644
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -22,7 +22,7 @@
+
+ #define HIGH_WATERMARK 50
+ #define LOW_WATERMARK 45
+-#define MAX_WRITEBACK_JOBS 0
++#define MAX_WRITEBACK_JOBS min(0x10000000 / PAGE_SIZE, totalram_pages() / 16)
+ #define ENDIO_LATENCY 16
+ #define WRITEBACK_LATENCY 64
+ #define AUTOCOMMIT_BLOCKS_SSD 65536
+@@ -1328,8 +1328,8 @@ enum wc_map_op {
+ WC_MAP_ERROR,
+ };
+
+-static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
+- struct wc_entry *e)
++static void writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
++ struct wc_entry *e)
+ {
+ if (e) {
+ sector_t next_boundary =
+@@ -1337,8 +1337,6 @@ static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, stru
+ if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT)
+ dm_accept_partial_bio(bio, next_boundary);
+ }
+-
+- return WC_MAP_REMAP_ORIGIN;
+ }
+
+ static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio)
+@@ -1365,14 +1363,16 @@ read_next_block:
+ map_op = WC_MAP_REMAP;
+ }
+ } else {
+- map_op = writecache_map_remap_origin(wc, bio, e);
++ writecache_map_remap_origin(wc, bio, e);
++ wc->stats.reads += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
++ map_op = WC_MAP_REMAP_ORIGIN;
+ }
+
+ return map_op;
+ }
+
+-static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
+- struct wc_entry *e, bool search_used)
++static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
++ struct wc_entry *e, bool search_used)
+ {
+ unsigned bio_size = wc->block_size;
+ sector_t start_cache_sec = cache_sector(wc, e);
+@@ -1412,14 +1412,15 @@ static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct b
+ bio->bi_iter.bi_sector = start_cache_sec;
+ dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
+
++ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
++ wc->stats.writes_allocate += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
++
+ if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
+ wc->uncommitted_blocks = 0;
+ queue_work(wc->writeback_wq, &wc->flush_work);
+ } else {
+ writecache_schedule_autocommit(wc);
+ }
+-
+- return WC_MAP_REMAP;
+ }
+
+ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio)
+@@ -1429,9 +1430,10 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio
+ do {
+ bool found_entry = false;
+ bool search_used = false;
+- wc->stats.writes++;
+- if (writecache_has_error(wc))
++ if (writecache_has_error(wc)) {
++ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
+ return WC_MAP_ERROR;
++ }
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
+ if (e) {
+ if (!writecache_entry_is_committed(wc, e)) {
+@@ -1455,9 +1457,11 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio
+ if (unlikely(!e)) {
+ if (!WC_MODE_PMEM(wc) && !found_entry) {
+ direct_write:
+- wc->stats.writes_around++;
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+- return writecache_map_remap_origin(wc, bio, e);
++ writecache_map_remap_origin(wc, bio, e);
++ wc->stats.writes_around += bio->bi_iter.bi_size >> wc->block_size_bits;
++ wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
++ return WC_MAP_REMAP_ORIGIN;
+ }
+ wc->stats.writes_blocked_on_freelist++;
+ writecache_wait_on_freelist(wc);
+@@ -1468,10 +1472,13 @@ direct_write:
+ wc->uncommitted_blocks++;
+ wc->stats.writes_allocate++;
+ bio_copy:
+- if (WC_MODE_PMEM(wc))
++ if (WC_MODE_PMEM(wc)) {
+ bio_copy_block(wc, bio, memory_data(wc, e));
+- else
+- return writecache_bio_copy_ssd(wc, bio, e, search_used);
++ wc->stats.writes++;
++ } else {
++ writecache_bio_copy_ssd(wc, bio, e, search_used);
++ return WC_MAP_REMAP;
++ }
+ } while (bio->bi_iter.bi_size);
+
+ if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks))
+@@ -1506,7 +1513,7 @@ static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio
+
+ static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio)
+ {
+- wc->stats.discards++;
++ wc->stats.discards += bio->bi_iter.bi_size >> wc->block_size_bits;
+
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+diff --git a/drivers/md/dm.c b/drivers/md/dm.c
+index 76d9da49fda75..d6285a23dc3ed 100644
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -226,7 +226,6 @@ out_uevent_exit:
+
+ static void local_exit(void)
+ {
+- flush_scheduled_work();
+ destroy_workqueue(deferred_remove_workqueue);
+
+ unregister_blkdev(_major, _name);
+@@ -484,33 +483,48 @@ u64 dm_start_time_ns_from_clone(struct bio *bio)
+ }
+ EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
+
+-static void start_io_acct(struct dm_io *io)
++static bool bio_is_flush_with_data(struct bio *bio)
+ {
+- struct mapped_device *md = io->md;
+- struct bio *bio = io->orig_bio;
+-
+- io->start_time = bio_start_io_acct(bio);
+- if (unlikely(dm_stats_used(&md->stats)))
+- dm_stats_account_io(&md->stats, bio_data_dir(bio),
+- bio->bi_iter.bi_sector, bio_sectors(bio),
+- false, 0, &io->stats_aux);
++ return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
+ }
+
+-static void end_io_acct(struct mapped_device *md, struct bio *bio,
+- unsigned long start_time, struct dm_stats_aux *stats_aux)
++static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio,
++ unsigned long start_time, struct dm_stats_aux *stats_aux)
+ {
+- unsigned long duration = jiffies - start_time;
++ bool is_flush_with_data;
++ unsigned int bi_size;
++
++ /* If REQ_PREFLUSH set save any payload but do not account it */
++ is_flush_with_data = bio_is_flush_with_data(bio);
++ if (is_flush_with_data) {
++ bi_size = bio->bi_iter.bi_size;
++ bio->bi_iter.bi_size = 0;
++ }
+
+- bio_end_io_acct(bio, start_time);
++ if (!end)
++ bio_start_io_acct_time(bio, start_time);
++ else
++ bio_end_io_acct(bio, start_time);
+
+ if (unlikely(dm_stats_used(&md->stats)))
+ dm_stats_account_io(&md->stats, bio_data_dir(bio),
+ bio->bi_iter.bi_sector, bio_sectors(bio),
+- true, duration, stats_aux);
++ end, start_time, stats_aux);
+
+- /* nudge anyone waiting on suspend queue */
+- if (unlikely(wq_has_sleeper(&md->wait)))
+- wake_up(&md->wait);
++ /* Restore bio's payload so it does get accounted upon requeue */
++ if (is_flush_with_data)
++ bio->bi_iter.bi_size = bi_size;
++}
++
++static void start_io_acct(struct dm_io *io)
++{
++ dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux);
++}
++
++static void end_io_acct(struct mapped_device *md, struct bio *bio,
++ unsigned long start_time, struct dm_stats_aux *stats_aux)
++{
++ dm_io_acct(true, md, bio, start_time, stats_aux);
+ }
+
+ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
+@@ -531,11 +545,14 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
+ io->magic = DM_IO_MAGIC;
+ io->status = 0;
+ atomic_set(&io->io_count, 1);
++ this_cpu_inc(*md->pending_io);
+ io->orig_bio = bio;
+ io->md = md;
+ spin_lock_init(&io->endio_lock);
+
+- start_io_acct(io);
++ io->start_time = jiffies;
++
++ dm_stats_record_start(&md->stats, &io->stats_aux);
+
+ return io;
+ }
+@@ -826,11 +843,17 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
+ stats_aux = io->stats_aux;
+ free_io(md, io);
+ end_io_acct(md, bio, start_time, &stats_aux);
++ smp_wmb();
++ this_cpu_dec(*md->pending_io);
++
++ /* nudge anyone waiting on suspend queue */
++ if (unlikely(wq_has_sleeper(&md->wait)))
++ wake_up(&md->wait);
+
+ if (io_error == BLK_STS_DM_REQUEUE)
+ return;
+
+- if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
++ if (bio_is_flush_with_data(bio)) {
+ /*
+ * Preflush done for flush with data, reissue
+ * without REQ_PREFLUSH.
+@@ -1514,9 +1537,6 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
+ ci->sector = bio->bi_iter.bi_sector;
+ }
+
+-#define __dm_part_stat_sub(part, field, subnd) \
+- (part_stat_get(part, field) -= (subnd))
+-
+ /*
+ * Entry point to split a bio into clones and submit them to the targets.
+ */
+@@ -1553,23 +1573,12 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
+ GFP_NOIO, &md->queue->bio_split);
+ ci.io->orig_bio = b;
+
+- /*
+- * Adjust IO stats for each split, otherwise upon queue
+- * reentry there will be redundant IO accounting.
+- * NOTE: this is a stop-gap fix, a proper fix involves
+- * significant refactoring of DM core's bio splitting
+- * (by eliminating DM's splitting and just using bio_split)
+- */
+- part_stat_lock();
+- __dm_part_stat_sub(dm_disk(md)->part0,
+- sectors[op_stat_group(bio_op(bio))], ci.sector_count);
+- part_stat_unlock();
+-
+ bio_chain(b, bio);
+ trace_block_split(b, bio->bi_iter.bi_sector);
+ ret = submit_bio_noacct(bio);
+ }
+ }
++ start_io_acct(ci.io);
+
+ /* drop the extra reference count */
+ dm_io_dec_pending(ci.io, errno_to_blk_status(error));
+@@ -1584,15 +1593,10 @@ static blk_qc_t dm_submit_bio(struct bio *bio)
+ struct dm_table *map;
+
+ map = dm_get_live_table(md, &srcu_idx);
+- if (unlikely(!map)) {
+- DMERR_LIMIT("%s: mapping table unavailable, erroring io",
+- dm_device_name(md));
+- bio_io_error(bio);
+- goto out;
+- }
+
+- /* If suspended, queue this IO for later */
+- if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
++ /* If suspended, or map not yet available, queue this IO for later */
++ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
++ unlikely(!map)) {
+ if (bio->bi_opf & REQ_NOWAIT)
+ bio_wouldblock_error(bio);
+ else if (bio->bi_opf & REQ_RAHEAD)
+@@ -1696,6 +1700,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
+ md->dax_dev = NULL;
+ }
+
++ dm_cleanup_zoned_dev(md);
+ if (md->disk) {
+ spin_lock(&_minor_lock);
+ md->disk->private_data = NULL;
+@@ -1708,6 +1713,11 @@ static void cleanup_mapped_device(struct mapped_device *md)
+ blk_cleanup_disk(md->disk);
+ }
+
++ if (md->pending_io) {
++ free_percpu(md->pending_io);
++ md->pending_io = NULL;
++ }
++
+ cleanup_srcu_struct(&md->io_barrier);
+
+ mutex_destroy(&md->suspend_lock);
+@@ -1716,7 +1726,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
+ mutex_destroy(&md->swap_bios_lock);
+
+ dm_mq_cleanup_mapped_device(md);
+- dm_cleanup_zoned_dev(md);
+ }
+
+ /*
+@@ -1787,15 +1796,16 @@ static struct mapped_device *alloc_dev(int minor)
+ md->disk->first_minor = minor;
+ md->disk->minors = 1;
+ md->disk->fops = &dm_blk_dops;
+- md->disk->queue = md->queue;
+ md->disk->private_data = md;
+ sprintf(md->disk->disk_name, "dm-%d", minor);
+
+ if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
+ md->dax_dev = alloc_dax(md, md->disk->disk_name,
+ &dm_dax_ops, 0);
+- if (IS_ERR(md->dax_dev))
++ if (IS_ERR(md->dax_dev)) {
++ md->dax_dev = NULL;
+ goto bad;
++ }
+ }
+
+ format_dev_t(md->name, MKDEV(_major, minor));
+@@ -1804,7 +1814,13 @@ static struct mapped_device *alloc_dev(int minor)
+ if (!md->wq)
+ goto bad;
+
+- dm_stats_init(&md->stats);
++ md->pending_io = alloc_percpu(unsigned long);
++ if (!md->pending_io)
++ goto bad;
++
++ r = dm_stats_init(&md->stats);
++ if (r < 0)
++ goto bad;
+
+ /* Populate the mapping, nobody knows we exist yet */
+ spin_lock(&_minor_lock);
+@@ -1928,10 +1944,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
+ if (size != dm_get_size(md))
+ memset(&md->geometry, 0, sizeof(md->geometry));
+
+- if (!get_capacity(md->disk))
+- set_capacity(md->disk, size);
+- else
+- set_capacity_and_notify(md->disk, size);
++ set_capacity(md->disk, size);
+
+ dm_table_event_callback(t, event_callback, md);
+
+@@ -2168,7 +2181,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
+ set_bit(DMF_FREEING, &md->flags);
+ spin_unlock(&_minor_lock);
+
+- blk_set_queue_dying(md->queue);
++ blk_mark_disk_dead(md->disk);
+
+ /*
+ * Take suspend_lock so that presuspend and postsuspend methods
+@@ -2219,16 +2232,13 @@ void dm_put(struct mapped_device *md)
+ }
+ EXPORT_SYMBOL_GPL(dm_put);
+
+-static bool md_in_flight_bios(struct mapped_device *md)
++static bool dm_in_flight_bios(struct mapped_device *md)
+ {
+ int cpu;
+- struct block_device *part = dm_disk(md)->part0;
+- long sum = 0;
++ unsigned long sum = 0;
+
+- for_each_possible_cpu(cpu) {
+- sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
+- sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
+- }
++ for_each_possible_cpu(cpu)
++ sum += *per_cpu_ptr(md->pending_io, cpu);
+
+ return sum != 0;
+ }
+@@ -2241,7 +2251,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta
+ while (true) {
+ prepare_to_wait(&md->wait, &wait, task_state);
+
+- if (!md_in_flight_bios(md))
++ if (!dm_in_flight_bios(md))
+ break;
+
+ if (signal_pending_state(task_state, current)) {
+@@ -2253,6 +2263,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta
+ }
+ finish_wait(&md->wait, &wait);
+
++ smp_rmb();
++
+ return r;
+ }
+
+@@ -2295,6 +2307,7 @@ static void dm_wq_work(struct work_struct *work)
+ break;
+
+ submit_bio_noacct(bio);
++ cond_resched();
+ }
+ }
+
+@@ -2513,6 +2526,10 @@ retry:
+ }
+
+ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
++ if (!map) {
++ /* avoid deadlock with fs/namespace.c:do_mount() */
++ suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
++ }
+
+ r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
+ if (r)
+@@ -2694,24 +2711,26 @@ EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
+ * Event notification.
+ *---------------------------------------------------------------*/
+ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+- unsigned cookie)
++ unsigned cookie, bool need_resize_uevent)
+ {
+ int r;
+ unsigned noio_flag;
+ char udev_cookie[DM_COOKIE_LENGTH];
+- char *envp[] = { udev_cookie, NULL };
+-
+- noio_flag = memalloc_noio_save();
+-
+- if (!cookie)
+- r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+- else {
++ char *envp[3] = { NULL, NULL, NULL };
++ char **envpp = envp;
++ if (cookie) {
+ snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
+ DM_COOKIE_ENV_VAR_NAME, cookie);
+- r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
+- action, envp);
++ *envpp++ = udev_cookie;
++ }
++ if (need_resize_uevent) {
++ *envpp++ = "RESIZE=1";
+ }
+
++ noio_flag = memalloc_noio_save();
++
++ r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
++
+ memalloc_noio_restore(noio_flag);
+
+ return r;
+@@ -2896,6 +2915,11 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
+ goto out;
+ ti = dm_table_get_target(table, 0);
+
++ if (dm_suspended_md(md)) {
++ ret = -EAGAIN;
++ goto out;
++ }
++
+ ret = -EINVAL;
+ if (!ti->type->iterate_devices)
+ goto out;
+diff --git a/drivers/md/dm.h b/drivers/md/dm.h
+index 742d9c80efe19..10e4a3482db8f 100644
+--- a/drivers/md/dm.h
++++ b/drivers/md/dm.h
+@@ -210,7 +210,7 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
+
+ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+- unsigned cookie);
++ unsigned cookie, bool need_resize_uevent);
+
+ void dm_internal_suspend(struct mapped_device *md);
+ void dm_internal_resume(struct mapped_device *md);
+diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
+index e29c6298ef5c9..49c46f3aea573 100644
+--- a/drivers/md/md-bitmap.c
++++ b/drivers/md/md-bitmap.c
+@@ -54,14 +54,7 @@ __acquires(bitmap->lock)
+ {
+ unsigned char *mappage;
+
+- if (page >= bitmap->pages) {
+- /* This can happen if bitmap_start_sync goes beyond
+- * End-of-device while looking for a whole page.
+- * It is harmless.
+- */
+- return -EINVAL;
+- }
+-
++ WARN_ON_ONCE(page >= bitmap->pages);
+ if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
+ return 0;
+
+@@ -486,7 +479,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap)
+ sb = kmap_atomic(bitmap->storage.sb_page);
+ pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
+ pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
+- pr_debug(" version: %d\n", le32_to_cpu(sb->version));
++ pr_debug(" version: %u\n", le32_to_cpu(sb->version));
+ pr_debug(" uuid: %08x.%08x.%08x.%08x\n",
+ le32_to_cpu(*(__le32 *)(sb->uuid+0)),
+ le32_to_cpu(*(__le32 *)(sb->uuid+4)),
+@@ -497,11 +490,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap)
+ pr_debug("events cleared: %llu\n",
+ (unsigned long long) le64_to_cpu(sb->events_cleared));
+ pr_debug(" state: %08x\n", le32_to_cpu(sb->state));
+- pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize));
+- pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
++ pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize));
++ pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep));
+ pr_debug(" sync size: %llu KB\n",
+ (unsigned long long)le64_to_cpu(sb->sync_size)/2);
+- pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind));
++ pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
+ kunmap_atomic(sb);
+ }
+
+@@ -639,14 +632,6 @@ re_read:
+ daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
+ write_behind = le32_to_cpu(sb->write_behind);
+ sectors_reserved = le32_to_cpu(sb->sectors_reserved);
+- /* Setup nodes/clustername only if bitmap version is
+- * cluster-compatible
+- */
+- if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
+- nodes = le32_to_cpu(sb->nodes);
+- strlcpy(bitmap->mddev->bitmap_info.cluster_name,
+- sb->cluster_name, 64);
+- }
+
+ /* verify that the bitmap-specific fields are valid */
+ if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
+@@ -668,6 +653,16 @@ re_read:
+ goto out;
+ }
+
++ /*
++ * Setup nodes/clustername only if bitmap version is
++ * cluster-compatible
++ */
++ if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
++ nodes = le32_to_cpu(sb->nodes);
++ strlcpy(bitmap->mddev->bitmap_info.cluster_name,
++ sb->cluster_name, 64);
++ }
++
+ /* keep the array size field of the bitmap superblock up to date */
+ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
+
+@@ -700,9 +695,9 @@ re_read:
+
+ out:
+ kunmap_atomic(sb);
+- /* Assigning chunksize is required for "re_read" */
+- bitmap->mddev->bitmap_info.chunksize = chunksize;
+ if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
++ /* Assigning chunksize is required for "re_read" */
++ bitmap->mddev->bitmap_info.chunksize = chunksize;
+ err = md_setup_cluster(bitmap->mddev, nodes);
+ if (err) {
+ pr_warn("%s: Could not setup cluster service (%d)\n",
+@@ -713,18 +708,18 @@ out:
+ goto re_read;
+ }
+
+-
+ out_no_sb:
+- if (test_bit(BITMAP_STALE, &bitmap->flags))
+- bitmap->events_cleared = bitmap->mddev->events;
+- bitmap->mddev->bitmap_info.chunksize = chunksize;
+- bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+- bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+- bitmap->mddev->bitmap_info.nodes = nodes;
+- if (bitmap->mddev->bitmap_info.space == 0 ||
+- bitmap->mddev->bitmap_info.space > sectors_reserved)
+- bitmap->mddev->bitmap_info.space = sectors_reserved;
+- if (err) {
++ if (err == 0) {
++ if (test_bit(BITMAP_STALE, &bitmap->flags))
++ bitmap->events_cleared = bitmap->mddev->events;
++ bitmap->mddev->bitmap_info.chunksize = chunksize;
++ bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
++ bitmap->mddev->bitmap_info.max_write_behind = write_behind;
++ bitmap->mddev->bitmap_info.nodes = nodes;
++ if (bitmap->mddev->bitmap_info.space == 0 ||
++ bitmap->mddev->bitmap_info.space > sectors_reserved)
++ bitmap->mddev->bitmap_info.space = sectors_reserved;
++ } else {
+ md_bitmap_print_sb(bitmap);
+ if (bitmap->cluster_slot < 0)
+ md_cluster_stop(bitmap->mddev);
+@@ -1363,6 +1358,14 @@ __acquires(bitmap->lock)
+ sector_t csize;
+ int err;
+
++ if (page >= bitmap->pages) {
++ /*
++ * This can happen if bitmap_start_sync goes beyond
++ * End-of-device while looking for a whole page or
++ * user set a huge number to sysfs bitmap_set_bits.
++ */
++ return NULL;
++ }
+ err = md_bitmap_checkpage(bitmap, page, create, 0);
+
+ if (bitmap->bp[page].hijacked ||
+@@ -2104,7 +2107,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
+ bytes = DIV_ROUND_UP(chunks, 8);
+ if (!bitmap->mddev->bitmap_info.external)
+ bytes += sizeof(bitmap_super_t);
+- } while (bytes > (space << 9));
++ } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) <
++ (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1));
+ } else
+ chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
+
+@@ -2149,7 +2153,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
+ bitmap->counts.missing_pages = pages;
+ bitmap->counts.chunkshift = chunkshift;
+ bitmap->counts.chunks = chunks;
+- bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
++ bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift +
+ BITMAP_BLOCK_SHIFT);
+
+ blocks = min(old_counts.chunks << old_counts.chunkshift,
+@@ -2175,8 +2179,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
+ bitmap->counts.missing_pages = old_counts.pages;
+ bitmap->counts.chunkshift = old_counts.chunkshift;
+ bitmap->counts.chunks = old_counts.chunks;
+- bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
+- BITMAP_BLOCK_SHIFT);
++ bitmap->mddev->bitmap_info.chunksize =
++ 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT);
+ blocks = old_counts.chunks << old_counts.chunkshift;
+ pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
+ break;
+@@ -2194,20 +2198,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
+
+ if (set) {
+ bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
+- if (*bmc_new == 0) {
+- /* need to set on-disk bits too. */
+- sector_t end = block + new_blocks;
+- sector_t start = block >> chunkshift;
+- start <<= chunkshift;
+- while (start < end) {
+- md_bitmap_file_set_bit(bitmap, block);
+- start += 1 << chunkshift;
++ if (bmc_new) {
++ if (*bmc_new == 0) {
++ /* need to set on-disk bits too. */
++ sector_t end = block + new_blocks;
++ sector_t start = block >> chunkshift;
++
++ start <<= chunkshift;
++ while (start < end) {
++ md_bitmap_file_set_bit(bitmap, block);
++ start += 1 << chunkshift;
++ }
++ *bmc_new = 2;
++ md_bitmap_count_page(&bitmap->counts, block, 1);
++ md_bitmap_set_pending(&bitmap->counts, block);
+ }
+- *bmc_new = 2;
+- md_bitmap_count_page(&bitmap->counts, block, 1);
+- md_bitmap_set_pending(&bitmap->counts, block);
++ *bmc_new |= NEEDED_MASK;
+ }
+- *bmc_new |= NEEDED_MASK;
+ if (new_blocks < old_blocks)
+ old_blocks = new_blocks;
+ }
+@@ -2469,11 +2476,35 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
+ {
+ unsigned long backlog;
+ unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
++ struct md_rdev *rdev;
++ bool has_write_mostly = false;
+ int rv = kstrtoul(buf, 10, &backlog);
+ if (rv)
+ return rv;
+ if (backlog > COUNTER_MAX)
+ return -EINVAL;
++
++ rv = mddev_lock(mddev);
++ if (rv)
++ return rv;
++
++ /*
++ * Without write mostly device, it doesn't make sense to set
++ * backlog for max_write_behind.
++ */
++ rdev_for_each(rdev, mddev) {
++ if (test_bit(WriteMostly, &rdev->flags)) {
++ has_write_mostly = true;
++ break;
++ }
++ }
++ if (!has_write_mostly) {
++ pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
++ mdname(mddev));
++ mddev_unlock(mddev);
++ return -EINVAL;
++ }
++
+ mddev->bitmap_info.max_write_behind = backlog;
+ if (!backlog && mddev->serial_info_pool) {
+ /* serial_info_pool is not needed if backlog is zero */
+@@ -2481,13 +2512,13 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
+ mddev_destroy_serial_pool(mddev, NULL, false);
+ } else if (backlog && !mddev->serial_info_pool) {
+ /* serial_info_pool is needed since backlog is not zero */
+- struct md_rdev *rdev;
+-
+ rdev_for_each(rdev, mddev)
+ mddev_create_serial_pool(mddev, rdev, false);
+ }
+ if (old_mwb != backlog)
+ md_bitmap_update_sb(mddev->bitmap);
++
++ mddev_unlock(mddev);
+ return len;
+ }
+
+@@ -2514,6 +2545,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len)
+ if (csize < 512 ||
+ !is_power_of_2(csize))
+ return -EINVAL;
++ if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE *
++ sizeof(((bitmap_super_t *)0)->chunksize))))
++ return -EOVERFLOW;
+ mddev->bitmap_info.chunksize = csize;
+ return len;
+ }
+diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
+index 1ff51647a6822..c33cd28f1dba0 100644
+--- a/drivers/md/md-linear.c
++++ b/drivers/md/md-linear.c
+@@ -233,7 +233,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
+ bio_sector < start_sector))
+ goto out_of_bounds;
+
+- if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
++ if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
++ md_error(mddev, tmp_dev->rdev);
+ bio_io_error(bio);
+ return true;
+ }
+@@ -281,6 +282,16 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
+ seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
+ }
+
++static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
++{
++ if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
++ char *md_name = mdname(mddev);
++
++ pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
++ md_name, rdev->bdev);
++ }
++}
++
+ static void linear_quiesce(struct mddev *mddev, int state)
+ {
+ }
+@@ -297,6 +308,7 @@ static struct md_personality linear_personality =
+ .hot_add_disk = linear_add,
+ .size = linear_size,
+ .quiesce = linear_quiesce,
++ .error_handler = linear_error,
+ };
+
+ static int __init linear_init (void)
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 6c0c3d0d905aa..b585b642a0763 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -51,6 +51,7 @@
+ #include <linux/hdreg.h>
+ #include <linux/proc_fs.h>
+ #include <linux/random.h>
++#include <linux/major.h>
+ #include <linux/module.h>
+ #include <linux/reboot.h>
+ #include <linux/file.h>
+@@ -457,6 +458,8 @@ static blk_qc_t md_submit_bio(struct bio *bio)
+ }
+
+ blk_queue_split(&bio);
++ if (!bio)
++ return BLK_QC_T_NONE;
+
+ if (mddev->ro == 1 && unlikely(rw == WRITE)) {
+ if (bio_sectors(bio) != 0)
+@@ -525,13 +528,14 @@ static void md_end_flush(struct bio *bio)
+ struct md_rdev *rdev = bio->bi_private;
+ struct mddev *mddev = rdev->mddev;
+
++ bio_put(bio);
++
+ rdev_dec_pending(rdev, mddev);
+
+ if (atomic_dec_and_test(&mddev->flush_pending)) {
+ /* The pre-request flush has finished */
+ queue_work(md_wq, &mddev->flush_work);
+ }
+- bio_put(bio);
+ }
+
+ static void md_submit_flush_data(struct work_struct *ws);
+@@ -934,10 +938,12 @@ static void super_written(struct bio *bio)
+ } else
+ clear_bit(LastDev, &rdev->flags);
+
++ bio_put(bio);
++
++ rdev_dec_pending(rdev, mddev);
++
+ if (atomic_dec_and_test(&mddev->pending_writes))
+ wake_up(&mddev->sb_wait);
+- rdev_dec_pending(rdev, mddev);
+- bio_put(bio);
+ }
+
+ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
+@@ -2193,6 +2199,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
+
+ if (!num_sectors || num_sectors > max_sectors)
+ num_sectors = max_sectors;
++ rdev->sb_start = sb_start;
+ }
+ sb = page_address(rdev->sb_page);
+ sb->data_size = cpu_to_le64(num_sectors);
+@@ -2626,14 +2633,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
+
+ static bool does_sb_need_changing(struct mddev *mddev)
+ {
+- struct md_rdev *rdev;
++ struct md_rdev *rdev = NULL, *iter;
+ struct mdp_superblock_1 *sb;
+ int role;
+
+ /* Find a good rdev */
+- rdev_for_each(rdev, mddev)
+- if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
++ rdev_for_each(iter, mddev)
++ if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
++ rdev = iter;
+ break;
++ }
+
+ /* No good device found. */
+ if (!rdev)
+@@ -2976,13 +2985,18 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
+ * -write_error - clears WriteErrorSeen
+ * {,-}failfast - set/clear FailFast
+ */
++
++ struct mddev *mddev = rdev->mddev;
+ int err = -EINVAL;
++ bool need_update_sb = false;
++
+ if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
+ md_error(rdev->mddev, rdev);
+- if (test_bit(Faulty, &rdev->flags))
+- err = 0;
+- else
++
++ if (test_bit(MD_BROKEN, &rdev->mddev->flags))
+ err = -EBUSY;
++ else
++ err = 0;
+ } else if (cmd_match(buf, "remove")) {
+ if (rdev->mddev->pers) {
+ clear_bit(Blocked, &rdev->flags);
+@@ -2991,7 +3005,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
+ if (rdev->raid_disk >= 0)
+ err = -EBUSY;
+ else {
+- struct mddev *mddev = rdev->mddev;
+ err = 0;
+ if (mddev_is_clustered(mddev))
+ err = md_cluster_ops->remove_disk(mddev, rdev);
+@@ -3008,10 +3021,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
+ } else if (cmd_match(buf, "writemostly")) {
+ set_bit(WriteMostly, &rdev->flags);
+ mddev_create_serial_pool(rdev->mddev, rdev, false);
++ need_update_sb = true;
+ err = 0;
+ } else if (cmd_match(buf, "-writemostly")) {
+ mddev_destroy_serial_pool(rdev->mddev, rdev, false);
+ clear_bit(WriteMostly, &rdev->flags);
++ need_update_sb = true;
+ err = 0;
+ } else if (cmd_match(buf, "blocked")) {
+ set_bit(Blocked, &rdev->flags);
+@@ -3037,9 +3052,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
+ err = 0;
+ } else if (cmd_match(buf, "failfast")) {
+ set_bit(FailFast, &rdev->flags);
++ need_update_sb = true;
+ err = 0;
+ } else if (cmd_match(buf, "-failfast")) {
+ clear_bit(FailFast, &rdev->flags);
++ need_update_sb = true;
+ err = 0;
+ } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
+ !test_bit(Journal, &rdev->flags)) {
+@@ -3118,6 +3135,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
+ clear_bit(ExternalBbl, &rdev->flags);
+ err = 0;
+ }
++ if (need_update_sb)
++ md_update_sb(mddev, 1);
+ if (!err)
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
+ return err ? err : len;
+@@ -3171,6 +3190,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
+ err = kstrtouint(buf, 10, (unsigned int *)&slot);
+ if (err < 0)
+ return err;
++ if (slot < 0)
++ /* overflow */
++ return -ENOSPC;
+ }
+ if (rdev->mddev->pers && slot == -1) {
+ /* Setting 'slot' on an active array requires also
+@@ -3851,8 +3873,9 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
+ static ssize_t
+ safe_delay_show(struct mddev *mddev, char *page)
+ {
+- int msec = (mddev->safemode_delay*1000)/HZ;
+- return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
++ unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
++
++ return sprintf(page, "%u.%03u\n", msec/1000, msec%1000);
+ }
+ static ssize_t
+ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
+@@ -3864,7 +3887,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
+ return -EINVAL;
+ }
+
+- if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
++ if (strict_strtoul_scaled(cbuf, &msec, 3) < 0 || msec > UINT_MAX / HZ)
+ return -EINVAL;
+ if (msec == 0)
+ mddev->safemode_delay = 0;
+@@ -4342,10 +4365,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
+ * like active, but no writes have been seen for a while (100msec).
+ *
+ * broken
+- * RAID0/LINEAR-only: same as clean, but array is missing a member.
+- * It's useful because RAID0/LINEAR mounted-arrays aren't stopped
+- * when a member is gone, so this state will at least alert the
+- * user that something is wrong.
++* Array is failed. It's useful because mounted-arrays aren't stopped
++* when array is failed, so this state will at least alert the user that
++* something is wrong.
+ */
+ enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
+ write_pending, active_idle, broken, bad_word};
+@@ -4534,6 +4556,8 @@ max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len
+ rv = kstrtouint(buf, 10, &n);
+ if (rv < 0)
+ return rv;
++ if (n > INT_MAX)
++ return -EINVAL;
+ atomic_set(&mddev->max_corr_read_errors, n);
+ return len;
+ }
+@@ -5575,8 +5599,6 @@ static void md_free(struct kobject *ko)
+
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
+- if (mddev->level != 1 && mddev->level != 10)
+- bioset_exit(&mddev->io_acct_set);
+ kfree(mddev);
+ }
+
+@@ -5640,6 +5662,7 @@ static int md_alloc(dev_t dev, char *name)
+ * removed (mddev_delayed_delete).
+ */
+ flush_workqueue(md_misc_wq);
++ flush_workqueue(md_rdev_misc_wq);
+
+ mutex_lock(&disks_mutex);
+ mddev = mddev_alloc(dev);
+@@ -5862,13 +5885,6 @@ int md_run(struct mddev *mddev)
+ if (err)
+ goto exit_bio_set;
+ }
+- if (mddev->level != 1 && mddev->level != 10 &&
+- !bioset_initialized(&mddev->io_acct_set)) {
+- err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
+- offsetof(struct md_io_acct, bio_clone), 0);
+- if (err)
+- goto exit_sync_set;
+- }
+
+ spin_lock(&pers_lock);
+ pers = find_pers(mddev->level, mddev->clevel);
+@@ -6045,9 +6061,6 @@ bitmap_abort:
+ module_put(pers->owner);
+ md_bitmap_destroy(mddev);
+ abort:
+- if (mddev->level != 1 && mddev->level != 10)
+- bioset_exit(&mddev->io_acct_set);
+-exit_sync_set:
+ bioset_exit(&mddev->sync_set);
+ exit_bio_set:
+ bioset_exit(&mddev->bio_set);
+@@ -6268,14 +6281,15 @@ static void __md_stop(struct mddev *mddev)
+
+ void md_stop(struct mddev *mddev)
+ {
++ lockdep_assert_held(&mddev->reconfig_mutex);
++
+ /* stop the array and free an attached data structures.
+ * This is called from dm-raid
+ */
++ __md_stop_writes(mddev);
+ __md_stop(mddev);
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
+- if (mddev->level != 1 && mddev->level != 10)
+- bioset_exit(&mddev->io_acct_set);
+ }
+
+ EXPORT_SYMBOL_GPL(md_stop);
+@@ -7425,7 +7439,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
+ err = -ENODEV;
+ else {
+ md_error(mddev, rdev);
+- if (!test_bit(Faulty, &rdev->flags))
++ if (test_bit(MD_BROKEN, &mddev->flags))
+ err = -EBUSY;
+ }
+ rcu_read_unlock();
+@@ -7943,17 +7957,22 @@ EXPORT_SYMBOL(md_register_thread);
+
+ void md_unregister_thread(struct md_thread **threadp)
+ {
+- struct md_thread *thread = *threadp;
+- if (!thread)
+- return;
+- pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
+- /* Locking ensures that mddev_unlock does not wake_up a
++ struct md_thread *thread;
++
++ /*
++ * Locking ensures that mddev_unlock does not wake_up a
+ * non-existent thread
+ */
+ spin_lock(&pers_lock);
++ thread = *threadp;
++ if (!thread) {
++ spin_unlock(&pers_lock);
++ return;
++ }
+ *threadp = NULL;
+ spin_unlock(&pers_lock);
+
++ pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
+ kthread_stop(thread->tsk);
+ kfree(thread);
+ }
+@@ -7966,13 +7985,19 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
+
+ if (!mddev->pers || !mddev->pers->error_handler)
+ return;
+- mddev->pers->error_handler(mddev,rdev);
+- if (mddev->degraded)
++ mddev->pers->error_handler(mddev, rdev);
++
++ if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
++ return;
++
++ if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+- md_wakeup_thread(mddev->thread);
++ if (!test_bit(MD_BROKEN, &mddev->flags)) {
++ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
++ md_wakeup_thread(mddev->thread);
++ }
+ if (mddev->event_work.func)
+ queue_work(md_misc_wq, &mddev->event_work);
+ md_new_event(mddev);
+@@ -8580,6 +8605,23 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
+ }
+ EXPORT_SYMBOL_GPL(md_submit_discard_bio);
+
++int acct_bioset_init(struct mddev *mddev)
++{
++ int err = 0;
++
++ if (!bioset_initialized(&mddev->io_acct_set))
++ err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
++ offsetof(struct md_io_acct, bio_clone), 0);
++ return err;
++}
++EXPORT_SYMBOL_GPL(acct_bioset_init);
++
++void acct_bioset_exit(struct mddev *mddev)
++{
++ bioset_exit(&mddev->io_acct_set);
++}
++EXPORT_SYMBOL_GPL(acct_bioset_exit);
++
+ static void md_end_io_acct(struct bio *bio)
+ {
+ struct md_io_acct *md_io_acct = bio->bi_private;
+@@ -9446,6 +9488,7 @@ void md_reap_sync_thread(struct mddev *mddev)
+ wake_up(&resync_wait);
+ /* flag recovery needed just to double check */
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
++ sysfs_notify_dirent_safe(mddev->sysfs_completed);
+ sysfs_notify_dirent_safe(mddev->sysfs_action);
+ md_new_event(mddev);
+ if (mddev->event_work.func)
+@@ -9754,16 +9797,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
+
+ void md_reload_sb(struct mddev *mddev, int nr)
+ {
+- struct md_rdev *rdev;
++ struct md_rdev *rdev = NULL, *iter;
+ int err;
+
+ /* Find the rdev */
+- rdev_for_each_rcu(rdev, mddev) {
+- if (rdev->desc_nr == nr)
++ rdev_for_each_rcu(iter, mddev) {
++ if (iter->desc_nr == nr) {
++ rdev = iter;
+ break;
++ }
+ }
+
+- if (!rdev || rdev->desc_nr != nr) {
++ if (!rdev) {
+ pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
+ return;
+ }
+diff --git a/drivers/md/md.h b/drivers/md/md.h
+index 4c96c36bd01a1..99780e89531e5 100644
+--- a/drivers/md/md.h
++++ b/drivers/md/md.h
+@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
+ int is_new);
+ struct md_cluster_info;
+
+-/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
++/**
++ * enum mddev_flags - md device flags.
++ * @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
++ * @MD_CLOSING: If set, we are closing the array, do not open it then.
++ * @MD_JOURNAL_CLEAN: A raid with journal is already clean.
++ * @MD_HAS_JOURNAL: The raid array has journal feature set.
++ * @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
++ * resync lock, need to release the lock.
++ * @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
++ * calls to md_error() will never cause the array to
++ * become failed.
++ * @MD_HAS_PPL: The raid array has PPL feature set.
++ * @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
++ * @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
++ * without taking reconfig_mutex.
++ * @MD_UPDATING_SB: md_check_recovery is updating the metadata without
++ * explicitly holding reconfig_mutex.
++ * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
++ * array is ready yet.
++ * @MD_BROKEN: This is used to stop writes and mark array as failed.
++ *
++ * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
++ */
+ enum mddev_flags {
+- MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
+- MD_CLOSING, /* If set, we are closing the array, do not open
+- * it then */
+- MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
+- MD_HAS_JOURNAL, /* The raid array has journal feature set */
+- MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
+- * already took resync lock, need to
+- * release the lock */
+- MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
+- * supported as calls to md_error() will
+- * never cause the array to become failed.
+- */
+- MD_HAS_PPL, /* The raid array has PPL feature set */
+- MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
+- MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
+- * the metadata without taking reconfig_mutex.
+- */
+- MD_UPDATING_SB, /* md_check_recovery is updating the metadata
+- * without explicitly holding reconfig_mutex.
+- */
+- MD_NOT_READY, /* do_md_run() is active, so 'array_state'
+- * must not report that array is ready yet
+- */
+- MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
+- * I/O in case an array member is gone/failed.
+- */
++ MD_ARRAY_FIRST_USE,
++ MD_CLOSING,
++ MD_JOURNAL_CLEAN,
++ MD_HAS_JOURNAL,
++ MD_CLUSTER_RESYNC_LOCKED,
++ MD_FAILFAST_SUPPORTED,
++ MD_HAS_PPL,
++ MD_HAS_MULTIPLE_PPLS,
++ MD_ALLOW_SB_UPDATE,
++ MD_UPDATING_SB,
++ MD_NOT_READY,
++ MD_BROKEN,
+ };
+
+ enum mddev_sb_flags {
+@@ -721,6 +729,8 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
+ extern void md_finish_reshape(struct mddev *mddev);
+ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
+ struct bio *bio, sector_t start, sector_t size);
++int acct_bioset_init(struct mddev *mddev);
++void acct_bioset_exit(struct mddev *mddev);
+ void md_account_bio(struct mddev *mddev, struct bio **bio);
+
+ extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
+@@ -762,15 +772,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
+ struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
+ struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
+
+-static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
++static inline bool is_rdev_broken(struct md_rdev *rdev)
+ {
+- if (!disk_live(rdev->bdev->bd_disk)) {
+- if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
+- pr_warn("md: %s: %s array has a missing/failed member\n",
+- mdname(rdev->mddev), md_type);
+- return true;
+- }
+- return false;
++ return !disk_live(rdev->bdev->bd_disk);
+ }
+
+ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
+diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
+index 70532335c7c7e..cb670f16e98e9 100644
+--- a/drivers/md/persistent-data/dm-btree-remove.c
++++ b/drivers/md/persistent-data/dm-btree-remove.c
+@@ -423,9 +423,9 @@ static int rebalance_children(struct shadow_spine *s,
+
+ memcpy(n, dm_block_data(child),
+ dm_bm_block_size(dm_tm_get_bm(info->tm)));
+- dm_tm_unlock(info->tm, child);
+
+ dm_tm_dec(info->tm, dm_block_location(child));
++ dm_tm_unlock(info->tm, child);
+ return 0;
+ }
+
+diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
+index 0703ca7a7d9a4..5ce64e93aae74 100644
+--- a/drivers/md/persistent-data/dm-btree.c
++++ b/drivers/md/persistent-data/dm-btree.c
+@@ -81,14 +81,16 @@ void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
+ }
+
+ static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
+- uint64_t key, void *value)
+- __dm_written_to_disk(value)
++ uint64_t key, void *value)
++ __dm_written_to_disk(value)
+ {
+ uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
++ uint32_t max_entries = le32_to_cpu(node->header.max_entries);
+ __le64 key_le = cpu_to_le64(key);
+
+ if (index > nr_entries ||
+- index >= le32_to_cpu(node->header.max_entries)) {
++ index >= max_entries ||
++ nr_entries >= max_entries) {
+ DMERR("too many entries in btree node for insert");
+ __dm_unbless_for_disk(value);
+ return -ENOMEM;
+diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
+index 4a6a2a9b4eb49..bfbfa750e0160 100644
+--- a/drivers/md/persistent-data/dm-space-map-common.c
++++ b/drivers/md/persistent-data/dm-space-map-common.c
+@@ -283,6 +283,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+ struct disk_index_entry ie_disk;
+ struct dm_block *blk;
+
++ if (b >= ll->nr_blocks) {
++ DMERR_LIMIT("metadata block out of bounds");
++ return -EINVAL;
++ }
++
+ b = do_div(index, ll->entries_per_block);
+ r = ll->load_ie(ll, index, &ie_disk);
+ if (r < 0)
+diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
+index 62c8b6adac70e..2290307e0910f 100644
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -48,7 +48,7 @@ static void dump_zones(struct mddev *mddev)
+ int len = 0;
+
+ for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
+- len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
++ len += scnprintf(line+len, 200-len, "%s%s", k?"/":"",
+ bdevname(conf->devlist[j*raid_disks
+ + k]->bdev, b));
+ pr_debug("md: zone%d=[%s]\n", j, line);
+@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
+ pr_debug("md/raid0:%s: FINAL %d zones\n",
+ mdname(mddev), conf->nr_strip_zones);
+
+- if (conf->nr_strip_zones == 1) {
+- conf->layout = RAID0_ORIG_LAYOUT;
+- } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+- mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+- conf->layout = mddev->layout;
+- } else if (default_layout == RAID0_ORIG_LAYOUT ||
+- default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+- conf->layout = default_layout;
+- } else {
+- pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
+- mdname(mddev));
+- pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
+- err = -ENOTSUPP;
+- goto abort;
+- }
+ /*
+ * now since we have the hard sector sizes, we can make sure
+ * chunk size is a multiple of that sector size
+@@ -273,6 +258,34 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
+ (unsigned long long)smallest->sectors);
+ }
+
++ if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
++ conf->layout = RAID0_ORIG_LAYOUT;
++ } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
++ mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
++ conf->layout = mddev->layout;
++ } else if (default_layout == RAID0_ORIG_LAYOUT ||
++ default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
++ conf->layout = default_layout;
++ } else {
++ pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
++ mdname(mddev));
++ pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
++ err = -EOPNOTSUPP;
++ goto abort;
++ }
++
++ if (conf->layout == RAID0_ORIG_LAYOUT) {
++ for (i = 1; i < conf->nr_strip_zones; i++) {
++ sector_t first_sector = conf->strip_zone[i-1].zone_end;
++
++ sector_div(first_sector, mddev->chunk_sectors);
++ zone = conf->strip_zone + i;
++ /* disk_shift is first disk index used in the zone */
++ zone->disk_shift = sector_div(first_sector,
++ zone->nb_dev);
++ }
++ }
++
+ pr_debug("md/raid0:%s: done.\n", mdname(mddev));
+ *private_conf = conf;
+
+@@ -356,7 +369,20 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
+ return array_sectors;
+ }
+
+-static void raid0_free(struct mddev *mddev, void *priv);
++static void free_conf(struct mddev *mddev, struct r0conf *conf)
++{
++ kfree(conf->strip_zone);
++ kfree(conf->devlist);
++ kfree(conf);
++}
++
++static void raid0_free(struct mddev *mddev, void *priv)
++{
++ struct r0conf *conf = priv;
++
++ free_conf(mddev, conf);
++ acct_bioset_exit(mddev);
++}
+
+ static int raid0_run(struct mddev *mddev)
+ {
+@@ -370,11 +396,16 @@ static int raid0_run(struct mddev *mddev)
+ if (md_check_no_bitmap(mddev))
+ return -EINVAL;
+
++ if (acct_bioset_init(mddev)) {
++ pr_err("md/raid0:%s: alloc acct bioset failed.\n", mdname(mddev));
++ return -ENOMEM;
++ }
++
+ /* if private is not null, we are here after takeover */
+ if (mddev->private == NULL) {
+ ret = create_strip_zones(mddev, &conf);
+ if (ret < 0)
+- return ret;
++ goto exit_acct_set;
+ mddev->private = conf;
+ }
+ conf = mddev->private;
+@@ -413,17 +444,30 @@ static int raid0_run(struct mddev *mddev)
+ dump_zones(mddev);
+
+ ret = md_integrity_register(mddev);
++ if (ret)
++ goto free;
++
++ return ret;
+
++free:
++ free_conf(mddev, conf);
++exit_acct_set:
++ acct_bioset_exit(mddev);
+ return ret;
+ }
+
+-static void raid0_free(struct mddev *mddev, void *priv)
++/*
++ * Convert disk_index to the disk order in which it is read/written.
++ * For example, if we have 4 disks, they are numbered 0,1,2,3. If we
++ * write the disks starting at disk 3, then the read/write order would
++ * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift()
++ * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map
++ * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in
++ * that 'output' space to understand the read/write disk ordering.
++ */
++static int map_disk_shift(int disk_index, int num_disks, int disk_shift)
+ {
+- struct r0conf *conf = priv;
+-
+- kfree(conf->strip_zone);
+- kfree(conf->devlist);
+- kfree(conf);
++ return ((disk_index + num_disks - disk_shift) % num_disks);
+ }
+
+ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+@@ -439,7 +483,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+ sector_t end_disk_offset;
+ unsigned int end_disk_index;
+ unsigned int disk;
++ sector_t orig_start, orig_end;
+
++ orig_start = start;
+ zone = find_zone(conf, &start);
+
+ if (bio_end_sector(bio) > zone->zone_end) {
+@@ -453,6 +499,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+ } else
+ end = bio_end_sector(bio);
+
++ orig_end = end;
+ if (zone != conf->strip_zone)
+ end = end - zone[-1].zone_end;
+
+@@ -464,13 +511,26 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+ last_stripe_index = end;
+ sector_div(last_stripe_index, stripe_size);
+
+- start_disk_index = (int)(start - first_stripe_index * stripe_size) /
+- mddev->chunk_sectors;
++ /* In the first zone the original and alternate layouts are the same */
++ if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) {
++ sector_div(orig_start, mddev->chunk_sectors);
++ start_disk_index = sector_div(orig_start, zone->nb_dev);
++ start_disk_index = map_disk_shift(start_disk_index,
++ zone->nb_dev,
++ zone->disk_shift);
++ sector_div(orig_end, mddev->chunk_sectors);
++ end_disk_index = sector_div(orig_end, zone->nb_dev);
++ end_disk_index = map_disk_shift(end_disk_index,
++ zone->nb_dev, zone->disk_shift);
++ } else {
++ start_disk_index = (int)(start - first_stripe_index * stripe_size) /
++ mddev->chunk_sectors;
++ end_disk_index = (int)(end - last_stripe_index * stripe_size) /
++ mddev->chunk_sectors;
++ }
+ start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
+ mddev->chunk_sectors) +
+ first_stripe_index * mddev->chunk_sectors;
+- end_disk_index = (int)(end - last_stripe_index * stripe_size) /
+- mddev->chunk_sectors;
+ end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
+ mddev->chunk_sectors) +
+ last_stripe_index * mddev->chunk_sectors;
+@@ -478,18 +538,22 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+ for (disk = 0; disk < zone->nb_dev; disk++) {
+ sector_t dev_start, dev_end;
+ struct md_rdev *rdev;
++ int compare_disk;
++
++ compare_disk = map_disk_shift(disk, zone->nb_dev,
++ zone->disk_shift);
+
+- if (disk < start_disk_index)
++ if (compare_disk < start_disk_index)
+ dev_start = (first_stripe_index + 1) *
+ mddev->chunk_sectors;
+- else if (disk > start_disk_index)
++ else if (compare_disk > start_disk_index)
+ dev_start = first_stripe_index * mddev->chunk_sectors;
+ else
+ dev_start = start_disk_offset;
+
+- if (disk < end_disk_index)
++ if (compare_disk < end_disk_index)
+ dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
+- else if (disk > end_disk_index)
++ else if (compare_disk > end_disk_index)
+ dev_end = last_stripe_index * mddev->chunk_sectors;
+ else
+ dev_end = end_disk_offset;
+@@ -506,54 +570,20 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
+ bio_endio(bio);
+ }
+
+-static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
++static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
+ {
+ struct r0conf *conf = mddev->private;
+ struct strip_zone *zone;
+ struct md_rdev *tmp_dev;
+- sector_t bio_sector;
+- sector_t sector;
+- sector_t orig_sector;
+- unsigned chunk_sects;
+- unsigned sectors;
++ sector_t bio_sector = bio->bi_iter.bi_sector;
++ sector_t sector = bio_sector;
+
+- if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+- && md_flush_request(mddev, bio))
+- return true;
+-
+- if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
+- raid0_handle_discard(mddev, bio);
+- return true;
+- }
+-
+- bio_sector = bio->bi_iter.bi_sector;
+- sector = bio_sector;
+- chunk_sects = mddev->chunk_sectors;
+-
+- sectors = chunk_sects -
+- (likely(is_power_of_2(chunk_sects))
+- ? (sector & (chunk_sects-1))
+- : sector_div(sector, chunk_sects));
++ md_account_bio(mddev, &bio);
+
+- /* Restore due to sector_div */
+- sector = bio_sector;
+-
+- if (sectors < bio_sectors(bio)) {
+- struct bio *split = bio_split(bio, sectors, GFP_NOIO,
+- &mddev->bio_set);
+- bio_chain(split, bio);
+- submit_bio_noacct(bio);
+- bio = split;
+- }
+-
+- if (bio->bi_pool != &mddev->bio_set)
+- md_account_bio(mddev, &bio);
+-
+- orig_sector = sector;
+ zone = find_zone(mddev->private, &sector);
+ switch (conf->layout) {
+ case RAID0_ORIG_LAYOUT:
+- tmp_dev = map_sector(mddev, zone, orig_sector, &sector);
++ tmp_dev = map_sector(mddev, zone, bio_sector, &sector);
+ break;
+ case RAID0_ALT_MULTIZONE_LAYOUT:
+ tmp_dev = map_sector(mddev, zone, sector, &sector);
+@@ -561,12 +591,13 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
+ default:
+ WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev));
+ bio_io_error(bio);
+- return true;
++ return;
+ }
+
+- if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
++ if (unlikely(is_rdev_broken(tmp_dev))) {
+ bio_io_error(bio);
+- return true;
++ md_error(mddev, tmp_dev);
++ return;
+ }
+
+ bio_set_dev(bio, tmp_dev->bdev);
+@@ -579,6 +610,40 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
+ mddev_check_writesame(mddev, bio);
+ mddev_check_write_zeroes(mddev, bio);
+ submit_bio_noacct(bio);
++}
++
++static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
++{
++ sector_t sector;
++ unsigned chunk_sects;
++ unsigned sectors;
++
++ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
++ && md_flush_request(mddev, bio))
++ return true;
++
++ if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
++ raid0_handle_discard(mddev, bio);
++ return true;
++ }
++
++ sector = bio->bi_iter.bi_sector;
++ chunk_sects = mddev->chunk_sectors;
++
++ sectors = chunk_sects -
++ (likely(is_power_of_2(chunk_sects))
++ ? (sector & (chunk_sects-1))
++ : sector_div(sector, chunk_sects));
++
++ if (sectors < bio_sectors(bio)) {
++ struct bio *split = bio_split(bio, sectors, GFP_NOIO,
++ &mddev->bio_set);
++ bio_chain(split, bio);
++ raid0_map_submit_bio(mddev, bio);
++ bio = split;
++ }
++
++ raid0_map_submit_bio(mddev, bio);
+ return true;
+ }
+
+@@ -588,6 +653,16 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
+ return;
+ }
+
++static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
++{
++ if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
++ char *md_name = mdname(mddev);
++
++ pr_crit("md/raid0%s: Disk failure on %pg detected, failing array.\n",
++ md_name, rdev->bdev);
++ }
++}
++
+ static void *raid0_takeover_raid45(struct mddev *mddev)
+ {
+ struct md_rdev *rdev;
+@@ -763,6 +838,7 @@ static struct md_personality raid0_personality=
+ .size = raid0_size,
+ .takeover = raid0_takeover,
+ .quiesce = raid0_quiesce,
++ .error_handler = raid0_error,
+ };
+
+ static int __init raid0_init (void)
+diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
+index 3816e5477db1e..8cc761ca74230 100644
+--- a/drivers/md/raid0.h
++++ b/drivers/md/raid0.h
+@@ -6,6 +6,7 @@ struct strip_zone {
+ sector_t zone_end; /* Start of the next zone (in sectors) */
+ sector_t dev_start; /* Zone offset in real dev (in sectors) */
+ int nb_dev; /* # of devices attached to the zone */
++ int disk_shift; /* start disk for the original layout */
+ };
+
+ /* Linux 3.14 (20d0189b101) made an unintended change to
+diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
+index 54db341639687..83f9a4f3d82e0 100644
+--- a/drivers/md/raid1-10.c
++++ b/drivers/md/raid1-10.c
+@@ -22,12 +22,6 @@
+
+ #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
+
+-/* When there are this many requests queue to be written by
+- * the raid thread, we become 'congested' to provide back-pressure
+- * for writeback.
+- */
+-static int max_queued_requests = 1024;
+-
+ /* for managing resync I/O pages */
+ struct resync_pages {
+ void *raid_bio;
+diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
+index 19598bd38939d..084bfea6ad316 100644
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1358,12 +1358,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+ r1_bio = alloc_r1bio(mddev, bio);
+ r1_bio->sectors = max_write_sectors;
+
+- if (conf->pending_count >= max_queued_requests) {
+- md_wakeup_thread(mddev->thread);
+- raid1_log(mddev, "wait queued");
+- wait_event(conf->wait_barrier,
+- conf->pending_count < max_queued_requests);
+- }
+ /* first select target devices under rcu_lock and
+ * inc refcount on their rdev. Record them by setting
+ * bios[x] to bio
+@@ -1502,6 +1496,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+ * allocate memory, or a reader on WriteMostly
+ * is waiting for behind writes to flush */
+ if (bitmap &&
++ test_bit(WriteMostly, &rdev->flags) &&
+ (atomic_read(&bitmap->behind_writes)
+ < mddev->bitmap_info.max_write_behind) &&
+ !waitqueue_active(&bitmap->behind_wait)) {
+@@ -1616,30 +1611,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
+ seq_printf(seq, "]");
+ }
+
++/**
++ * raid1_error() - RAID1 error handler.
++ * @mddev: affected md device.
++ * @rdev: member device to fail.
++ *
++ * The routine acknowledges &rdev failure and determines new @mddev state.
++ * If it failed, then:
++ * - &MD_BROKEN flag is set in &mddev->flags.
++ * - recovery is disabled.
++ * Otherwise, it must be degraded:
++ * - recovery is interrupted.
++ * - &mddev->degraded is bumped.
++ *
++ * @rdev is marked as &Faulty excluding case when array is failed and
++ * &mddev->fail_last_dev is off.
++ */
+ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
+ {
+ char b[BDEVNAME_SIZE];
+ struct r1conf *conf = mddev->private;
+ unsigned long flags;
+
+- /*
+- * If it is not operational, then we have already marked it as dead
+- * else if it is the last working disks with "fail_last_dev == false",
+- * ignore the error, let the next level up know.
+- * else mark the drive as failed
+- */
+ spin_lock_irqsave(&conf->device_lock, flags);
+- if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
+- && (conf->raid_disks - mddev->degraded) == 1) {
+- /*
+- * Don't fail the drive, act as though we were just a
+- * normal single drive.
+- * However don't try a recovery from this drive as
+- * it is very likely to fail.
+- */
+- conf->recovery_disabled = mddev->recovery_disabled;
+- spin_unlock_irqrestore(&conf->device_lock, flags);
+- return;
++
++ if (test_bit(In_sync, &rdev->flags) &&
++ (conf->raid_disks - mddev->degraded) == 1) {
++ set_bit(MD_BROKEN, &mddev->flags);
++
++ if (!mddev->fail_last_dev) {
++ conf->recovery_disabled = mddev->recovery_disabled;
++ spin_unlock_irqrestore(&conf->device_lock, flags);
++ return;
++ }
+ }
+ set_bit(Blocked, &rdev->flags);
+ if (test_and_clear_bit(In_sync, &rdev->flags))
+@@ -3140,6 +3144,7 @@ static int raid1_run(struct mddev *mddev)
+ * RAID1 needs at least one disk in active
+ */
+ if (conf->raid_disks - mddev->degraded < 1) {
++ md_unregister_thread(&conf->thread);
+ ret = -EINVAL;
+ goto abort;
+ }
+@@ -3411,4 +3416,3 @@ MODULE_ALIAS("md-personality-3"); /* RAID1 */
+ MODULE_ALIAS("md-raid1");
+ MODULE_ALIAS("md-level-1");
+
+-module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index aa2636582841e..910e7db7d5736 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -297,7 +297,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
+ if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
+ bio->bi_status = BLK_STS_IOERR;
+
+- if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
++ if (r10_bio->start_time)
+ bio_end_io_acct(bio, r10_bio->start_time);
+ bio_endio(bio);
+ /*
+@@ -752,8 +752,16 @@ static struct md_rdev *read_balance(struct r10conf *conf,
+ disk = r10_bio->devs[slot].devnum;
+ rdev = rcu_dereference(conf->mirrors[disk].replacement);
+ if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
+- r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
++ r10_bio->devs[slot].addr + sectors >
++ rdev->recovery_offset) {
++ /*
++ * Read replacement first to prevent reading both rdev
++ * and replacement as NULL during replacement replace
++ * rdev.
++ */
++ smp_mb();
+ rdev = rcu_dereference(conf->mirrors[disk].rdev);
++ }
+ if (rdev == NULL ||
+ test_bit(Faulty, &rdev->flags))
+ continue;
+@@ -952,42 +960,62 @@ static void lower_barrier(struct r10conf *conf)
+ wake_up(&conf->wait_barrier);
+ }
+
+-static void wait_barrier(struct r10conf *conf)
++static bool stop_waiting_barrier(struct r10conf *conf)
+ {
++ struct bio_list *bio_list = current->bio_list;
++
++ /* barrier is dropped */
++ if (!conf->barrier)
++ return true;
++
++ /*
++ * If there are already pending requests (preventing the barrier from
++ * rising completely), and the pre-process bio queue isn't empty, then
++ * don't wait, as we need to empty that queue to get the nr_pending
++ * count down.
++ */
++ if (atomic_read(&conf->nr_pending) && bio_list &&
++ (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
++ return true;
++
++ /*
++ * move on if io is issued from raid10d(), nr_pending is not released
++ * from original io(see handle_read_error()). All raise barrier is
++ * blocked until this io is done.
++ */
++ if (conf->mddev->thread->tsk == current) {
++ WARN_ON_ONCE(atomic_read(&conf->nr_pending) == 0);
++ return true;
++ }
++
++ return false;
++}
++
++static bool wait_barrier(struct r10conf *conf, bool nowait)
++{
++ bool ret = true;
++
+ spin_lock_irq(&conf->resync_lock);
+ if (conf->barrier) {
+- struct bio_list *bio_list = current->bio_list;
+ conf->nr_waiting++;
+- /* Wait for the barrier to drop.
+- * However if there are already pending
+- * requests (preventing the barrier from
+- * rising completely), and the
+- * pre-process bio queue isn't empty,
+- * then don't wait, as we need to empty
+- * that queue to get the nr_pending
+- * count down.
+- */
+- raid10_log(conf->mddev, "wait barrier");
+- wait_event_lock_irq(conf->wait_barrier,
+- !conf->barrier ||
+- (atomic_read(&conf->nr_pending) &&
+- bio_list &&
+- (!bio_list_empty(&bio_list[0]) ||
+- !bio_list_empty(&bio_list[1]))) ||
+- /* move on if recovery thread is
+- * blocked by us
+- */
+- (conf->mddev->thread->tsk == current &&
+- test_bit(MD_RECOVERY_RUNNING,
+- &conf->mddev->recovery) &&
+- conf->nr_queued > 0),
+- conf->resync_lock);
++ /* Return false when nowait flag is set */
++ if (nowait) {
++ ret = false;
++ } else {
++ raid10_log(conf->mddev, "wait barrier");
++ wait_event_lock_irq(conf->wait_barrier,
++ stop_waiting_barrier(conf),
++ conf->resync_lock);
++ }
+ conf->nr_waiting--;
+ if (!conf->nr_waiting)
+ wake_up(&conf->wait_barrier);
+ }
+- atomic_inc(&conf->nr_pending);
++ /* Only increment nr_pending when we wait */
++ if (ret)
++ atomic_inc(&conf->nr_pending);
+ spin_unlock_irq(&conf->resync_lock);
++ return ret;
+ }
+
+ static void allow_barrier(struct r10conf *conf)
+@@ -1098,21 +1126,30 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
+ * currently.
+ * 2. If IO spans the reshape position. Need to wait for reshape to pass.
+ */
+-static void regular_request_wait(struct mddev *mddev, struct r10conf *conf,
++static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
+ struct bio *bio, sector_t sectors)
+ {
+- wait_barrier(conf);
++ /* Bail out if REQ_NOWAIT is set for the bio */
++ if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
++ bio_wouldblock_error(bio);
++ return false;
++ }
+ while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ bio->bi_iter.bi_sector < conf->reshape_progress &&
+ bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+- raid10_log(conf->mddev, "wait reshape");
+ allow_barrier(conf);
++ if (bio->bi_opf & REQ_NOWAIT) {
++ bio_wouldblock_error(bio);
++ return false;
++ }
++ raid10_log(conf->mddev, "wait reshape");
+ wait_event(conf->wait_barrier,
+ conf->reshape_progress <= bio->bi_iter.bi_sector ||
+ conf->reshape_progress >= bio->bi_iter.bi_sector +
+ sectors);
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ }
++ return true;
+ }
+
+ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+@@ -1157,7 +1194,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+ rcu_read_unlock();
+ }
+
+- regular_request_wait(mddev, conf, bio, r10_bio->sectors);
++ if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
++ return;
+ rdev = read_balance(conf, r10_bio, &max_sectors);
+ if (!rdev) {
+ if (err_rdev) {
+@@ -1179,14 +1217,15 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+ bio_chain(split, bio);
+ allow_barrier(conf);
+ submit_bio_noacct(bio);
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ bio = split;
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = max_sectors;
+ }
+ slot = r10_bio->read_slot;
+
+- if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
++ if (!r10_bio->start_time &&
++ blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ r10_bio->start_time = bio_start_io_acct(bio);
+ read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
+
+@@ -1277,6 +1316,25 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
+ }
+ }
+
++static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
++ struct md_rdev **prrdev)
++{
++ struct md_rdev *rdev, *rrdev;
++
++ rrdev = rcu_dereference(mirror->replacement);
++ /*
++ * Read replacement first to prevent reading both rdev and
++ * replacement as NULL during replacement replace rdev.
++ */
++ smp_mb();
++ rdev = rcu_dereference(mirror->rdev);
++ if (rdev == rrdev)
++ rrdev = NULL;
++
++ *prrdev = rrdev;
++ return rdev;
++}
++
+ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
+ {
+ int i;
+@@ -1287,11 +1345,9 @@ retry_wait:
+ blocked_rdev = NULL;
+ rcu_read_lock();
+ for (i = 0; i < conf->copies; i++) {
+- struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+- struct md_rdev *rrdev = rcu_dereference(
+- conf->mirrors[i].replacement);
+- if (rdev == rrdev)
+- rrdev = NULL;
++ struct md_rdev *rdev, *rrdev;
++
++ rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+@@ -1338,7 +1394,7 @@ retry_wait:
+ raid10_log(conf->mddev, "%s wait rdev %d blocked",
+ __func__, blocked_rdev->raid_disk);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ goto retry_wait;
+ }
+ }
+@@ -1356,6 +1412,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+ bio->bi_iter.bi_sector,
+ bio_end_sector(bio)))) {
+ DEFINE_WAIT(w);
++ /* Bail out if REQ_NOWAIT is set for the bio */
++ if (bio->bi_opf & REQ_NOWAIT) {
++ bio_wouldblock_error(bio);
++ return;
++ }
+ for (;;) {
+ prepare_to_wait(&conf->wait_barrier,
+ &w, TASK_IDLE);
+@@ -1368,7 +1429,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+ }
+
+ sectors = r10_bio->sectors;
+- regular_request_wait(mddev, conf, bio, sectors);
++ if (!regular_request_wait(mddev, conf, bio, sectors))
++ return;
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ (mddev->reshape_backwards
+ ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+@@ -1380,6 +1442,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+ set_mask_bits(&mddev->sb_flags, 0,
+ BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
+ md_wakeup_thread(mddev->thread);
++ if (bio->bi_opf & REQ_NOWAIT) {
++ allow_barrier(conf);
++ bio_wouldblock_error(bio);
++ return;
++ }
+ raid10_log(conf->mddev, "wait reshape metadata");
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+@@ -1387,12 +1454,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+ conf->reshape_safe = mddev->reshape_position;
+ }
+
+- if (conf->pending_count >= max_queued_requests) {
+- md_wakeup_thread(mddev->thread);
+- raid10_log(mddev, "wait queued");
+- wait_event(conf->wait_barrier,
+- conf->pending_count < max_queued_requests);
+- }
+ /* first select target devices under rcu_lock and
+ * inc refcount on their rdev. Record them by setting
+ * bios[x] to bio
+@@ -1413,11 +1474,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+
+ for (i = 0; i < conf->copies; i++) {
+ int d = r10_bio->devs[i].devnum;
+- struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
+- struct md_rdev *rrdev = rcu_dereference(
+- conf->mirrors[d].replacement);
+- if (rdev == rrdev)
+- rrdev = NULL;
++ struct md_rdev *rdev, *rrdev;
++
++ rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev);
+ if (rdev && (test_bit(Faulty, &rdev->flags)))
+ rdev = NULL;
+ if (rrdev && (test_bit(Faulty, &rrdev->flags)))
+@@ -1482,7 +1541,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+ bio_chain(split, bio);
+ allow_barrier(conf);
+ submit_bio_noacct(bio);
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ bio = split;
+ r10_bio->master_bio = bio;
+ }
+@@ -1515,6 +1574,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
+ r10_bio->sector = bio->bi_iter.bi_sector;
+ r10_bio->state = 0;
+ r10_bio->read_slot = -1;
++ r10_bio->start_time = 0;
+ memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
+ conf->geo.raid_disks);
+
+@@ -1607,7 +1667,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+ return -EAGAIN;
+
+- wait_barrier(conf);
++ if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
++ bio_wouldblock_error(bio);
++ return 0;
++ }
++ wait_barrier(conf, false);
+
+ /*
+ * Check reshape again to avoid reshape happens after checking
+@@ -1649,7 +1713,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
+ allow_barrier(conf);
+ /* Resend the fist split part */
+ submit_bio_noacct(split);
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ }
+ div_u64_rem(bio_end, stripe_size, &remainder);
+ if (remainder) {
+@@ -1660,7 +1724,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
+ /* Resend the second split part */
+ submit_bio_noacct(bio);
+ bio = split;
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ }
+
+ bio_start = bio->bi_iter.bi_sector;
+@@ -1719,10 +1783,9 @@ retry_discard:
+ */
+ rcu_read_lock();
+ for (disk = 0; disk < geo->raid_disks; disk++) {
+- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+- struct md_rdev *rrdev = rcu_dereference(
+- conf->mirrors[disk].replacement);
++ struct md_rdev *rdev, *rrdev;
+
++ rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev);
+ r10_bio->devs[disk].bio = NULL;
+ r10_bio->devs[disk].repl_bio = NULL;
+
+@@ -1816,7 +1879,7 @@ retry_discard:
+ end_disk_offset += geo->stride;
+ atomic_inc(&first_r10bio->remaining);
+ raid_end_discard_bio(r10_bio);
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ goto retry_discard;
+ }
+
+@@ -1945,32 +2008,40 @@ static int enough(struct r10conf *conf, int ignore)
+ _enough(conf, 1, ignore);
+ }
+
++/**
++ * raid10_error() - RAID10 error handler.
++ * @mddev: affected md device.
++ * @rdev: member device to fail.
++ *
++ * The routine acknowledges &rdev failure and determines new @mddev state.
++ * If it failed, then:
++ * - &MD_BROKEN flag is set in &mddev->flags.
++ * Otherwise, it must be degraded:
++ * - recovery is interrupted.
++ * - &mddev->degraded is bumped.
++
++ * @rdev is marked as &Faulty excluding case when array is failed and
++ * &mddev->fail_last_dev is off.
++ */
+ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
+ {
+ char b[BDEVNAME_SIZE];
+ struct r10conf *conf = mddev->private;
+ unsigned long flags;
+
+- /*
+- * If it is not operational, then we have already marked it as dead
+- * else if it is the last working disks with "fail_last_dev == false",
+- * ignore the error, let the next level up know.
+- * else mark the drive as failed
+- */
+ spin_lock_irqsave(&conf->device_lock, flags);
+- if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
+- && !enough(conf, rdev->raid_disk)) {
+- /*
+- * Don't fail the drive, just return an IO error.
+- */
+- spin_unlock_irqrestore(&conf->device_lock, flags);
+- return;
++
++ if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
++ set_bit(MD_BROKEN, &mddev->flags);
++
++ if (!mddev->fail_last_dev) {
++ spin_unlock_irqrestore(&conf->device_lock, flags);
++ return;
++ }
+ }
+ if (test_and_clear_bit(In_sync, &rdev->flags))
+ mddev->degraded++;
+- /*
+- * If recovery is running, make sure it aborts.
+- */
++
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ set_bit(Blocked, &rdev->flags);
+ set_bit(Faulty, &rdev->flags);
+@@ -2011,7 +2082,7 @@ static void print_conf(struct r10conf *conf)
+
+ static void close_sync(struct r10conf *conf)
+ {
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ allow_barrier(conf);
+
+ mempool_exit(&conf->r10buf_pool);
+@@ -2139,9 +2210,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+ int err = 0;
+ int number = rdev->raid_disk;
+ struct md_rdev **rdevp;
+- struct raid10_info *p = conf->mirrors + number;
++ struct raid10_info *p;
+
+ print_conf(conf);
++ if (unlikely(number >= mddev->raid_disks))
++ return 0;
++ p = conf->mirrors + number;
+ if (rdev == p->rdev)
+ rdevp = &p->rdev;
+ else if (rdev == p->replacement)
+@@ -2539,11 +2613,22 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
+ {
+ struct r10conf *conf = mddev->private;
+ int d;
+- struct bio *wbio, *wbio2;
++ struct bio *wbio = r10_bio->devs[1].bio;
++ struct bio *wbio2 = r10_bio->devs[1].repl_bio;
++
++ /* Need to test wbio2->bi_end_io before we call
++ * submit_bio_noacct as if the former is NULL,
++ * the latter is free to free wbio2.
++ */
++ if (wbio2 && !wbio2->bi_end_io)
++ wbio2 = NULL;
+
+ if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
+ fix_recovery_read_error(r10_bio);
+- end_sync_request(r10_bio);
++ if (wbio->bi_end_io)
++ end_sync_request(r10_bio);
++ if (wbio2)
++ end_sync_request(r10_bio);
+ return;
+ }
+
+@@ -2552,14 +2637,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
+ * and submit the write request
+ */
+ d = r10_bio->devs[1].devnum;
+- wbio = r10_bio->devs[1].bio;
+- wbio2 = r10_bio->devs[1].repl_bio;
+- /* Need to test wbio2->bi_end_io before we call
+- * submit_bio_noacct as if the former is NULL,
+- * the latter is free to free wbio2.
+- */
+- if (wbio2 && !wbio2->bi_end_io)
+- wbio2 = NULL;
+ if (wbio->bi_end_io) {
+ atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
+@@ -2916,9 +2993,13 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
+ md_error(mddev, rdev);
+
+ rdev_dec_pending(rdev, mddev);
+- allow_barrier(conf);
+ r10_bio->state = 0;
+ raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
++ /*
++ * allow_barrier after re-submit to ensure no sync io
++ * can be issued while regular io pending.
++ */
++ allow_barrier(conf);
+ }
+
+ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
+@@ -3227,10 +3308,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
+ sector_t chunk_mask = conf->geo.chunk_mask;
+ int page_idx = 0;
+
+- if (!mempool_initialized(&conf->r10buf_pool))
+- if (init_resync(conf))
+- return 0;
+-
+ /*
+ * Allow skipping a full rebuild for incremental assembly
+ * of a clean array, like RAID1 does.
+@@ -3246,6 +3323,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
+ return mddev->dev_sectors - sector_nr;
+ }
+
++ if (!mempool_initialized(&conf->r10buf_pool))
++ if (init_resync(conf))
++ return 0;
++
+ skipped:
+ max_sector = mddev->dev_sectors;
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
+@@ -3361,7 +3442,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
+ int must_sync;
+ int any_working;
+ int need_recover = 0;
+- int need_replace = 0;
+ struct raid10_info *mirror = &conf->mirrors[i];
+ struct md_rdev *mrdev, *mreplace;
+
+@@ -3373,11 +3453,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
+ !test_bit(Faulty, &mrdev->flags) &&
+ !test_bit(In_sync, &mrdev->flags))
+ need_recover = 1;
+- if (mreplace != NULL &&
+- !test_bit(Faulty, &mreplace->flags))
+- need_replace = 1;
++ if (mreplace && test_bit(Faulty, &mreplace->flags))
++ mreplace = NULL;
+
+- if (!need_recover && !need_replace) {
++ if (!need_recover && !mreplace) {
+ rcu_read_unlock();
+ continue;
+ }
+@@ -3393,8 +3472,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
+ rcu_read_unlock();
+ continue;
+ }
+- if (mreplace && test_bit(Faulty, &mreplace->flags))
+- mreplace = NULL;
+ /* Unless we are doing a full sync, or a replacement
+ * we only need to recover the block if it is set in
+ * the bitmap
+@@ -3517,11 +3594,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
+ bio = r10_bio->devs[1].repl_bio;
+ if (bio)
+ bio->bi_end_io = NULL;
+- /* Note: if need_replace, then bio
++ /* Note: if replace is not NULL, then bio
+ * cannot be NULL as r10buf_pool_alloc will
+ * have allocated it.
+ */
+- if (!need_replace)
++ if (!mreplace)
+ break;
+ bio->bi_next = biolist;
+ biolist = bio;
+@@ -3942,6 +4019,20 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
+ return nc*fc;
+ }
+
++static void raid10_free_conf(struct r10conf *conf)
++{
++ if (!conf)
++ return;
++
++ mempool_exit(&conf->r10bio_pool);
++ kfree(conf->mirrors);
++ kfree(conf->mirrors_old);
++ kfree(conf->mirrors_new);
++ safe_put_page(conf->tmppage);
++ bioset_exit(&conf->bio_split);
++ kfree(conf);
++}
++
+ static struct r10conf *setup_conf(struct mddev *mddev)
+ {
+ struct r10conf *conf = NULL;
+@@ -4024,13 +4115,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
+ return conf;
+
+ out:
+- if (conf) {
+- mempool_exit(&conf->r10bio_pool);
+- kfree(conf->mirrors);
+- safe_put_page(conf->tmppage);
+- bioset_exit(&conf->bio_split);
+- kfree(conf);
+- }
++ raid10_free_conf(conf);
+ return ERR_PTR(err);
+ }
+
+@@ -4068,6 +4153,9 @@ static int raid10_run(struct mddev *mddev)
+ if (!conf)
+ goto out;
+
++ mddev->thread = conf->thread;
++ conf->thread = NULL;
++
+ if (mddev_is_clustered(conf->mddev)) {
+ int fc, fo;
+
+@@ -4080,9 +4168,6 @@ static int raid10_run(struct mddev *mddev)
+ }
+ }
+
+- mddev->thread = conf->thread;
+- conf->thread = NULL;
+-
+ if (mddev->queue) {
+ blk_queue_max_discard_sectors(mddev->queue,
+ UINT_MAX);
+@@ -4236,10 +4321,7 @@ static int raid10_run(struct mddev *mddev)
+
+ out_free_conf:
+ md_unregister_thread(&mddev->thread);
+- mempool_exit(&conf->r10bio_pool);
+- safe_put_page(conf->tmppage);
+- kfree(conf->mirrors);
+- kfree(conf);
++ raid10_free_conf(conf);
+ mddev->private = NULL;
+ out:
+ return -EIO;
+@@ -4247,15 +4329,7 @@ out:
+
+ static void raid10_free(struct mddev *mddev, void *priv)
+ {
+- struct r10conf *conf = priv;
+-
+- mempool_exit(&conf->r10bio_pool);
+- safe_put_page(conf->tmppage);
+- kfree(conf->mirrors);
+- kfree(conf->mirrors_old);
+- kfree(conf->mirrors_new);
+- bioset_exit(&conf->bio_split);
+- kfree(conf);
++ raid10_free_conf(priv);
+ }
+
+ static void raid10_quiesce(struct mddev *mddev, int quiesce)
+@@ -4819,7 +4893,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
+ if (need_flush ||
+ time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
+ /* Need to update reshape_position in metadata */
+- wait_barrier(conf);
++ wait_barrier(conf, false);
+ mddev->reshape_position = conf->reshape_progress;
+ if (mddev->reshape_backwards)
+ mddev->curr_resync_completed = raid10_size(mddev, 0, 0)
+@@ -5242,5 +5316,3 @@ MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
+ MODULE_ALIAS("md-personality-9"); /* RAID10 */
+ MODULE_ALIAS("md-raid10");
+ MODULE_ALIAS("md-level-10");
+-
+-module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
+diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
+index 02ed53b20654c..c2a42486f9855 100644
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -36,6 +36,7 @@
+ */
+
+ #include <linux/blkdev.h>
++#include <linux/delay.h>
+ #include <linux/kthread.h>
+ #include <linux/raid/pq.h>
+ #include <linux/async_tx.h>
+@@ -686,17 +687,17 @@ int raid5_calc_degraded(struct r5conf *conf)
+ return degraded;
+ }
+
+-static int has_failed(struct r5conf *conf)
++static bool has_failed(struct r5conf *conf)
+ {
+- int degraded;
++ int degraded = conf->mddev->degraded;
+
+- if (conf->mddev->reshape_position == MaxSector)
+- return conf->mddev->degraded > conf->max_degraded;
++ if (test_bit(MD_BROKEN, &conf->mddev->flags))
++ return true;
+
+- degraded = raid5_calc_degraded(conf);
+- if (degraded > conf->max_degraded)
+- return 1;
+- return 0;
++ if (conf->mddev->reshape_position != MaxSector)
++ degraded = raid5_calc_degraded(conf);
++
++ return degraded > conf->max_degraded;
+ }
+
+ struct stripe_head *
+@@ -2864,10 +2865,10 @@ static void raid5_end_write_request(struct bio *bi)
+ if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ set_bit(STRIPE_HANDLE, &sh->state);
+- raid5_release_stripe(sh);
+
+ if (sh->batch_head && sh != sh->batch_head)
+ raid5_release_stripe(sh->batch_head);
++ raid5_release_stripe(sh);
+ }
+
+ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
+@@ -2877,34 +2878,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
+ unsigned long flags;
+ pr_debug("raid456: error called\n");
+
++ pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
++ mdname(mddev), bdevname(rdev->bdev, b));
++
+ spin_lock_irqsave(&conf->device_lock, flags);
++ set_bit(Faulty, &rdev->flags);
++ clear_bit(In_sync, &rdev->flags);
++ mddev->degraded = raid5_calc_degraded(conf);
+
+- if (test_bit(In_sync, &rdev->flags) &&
+- mddev->degraded == conf->max_degraded) {
+- /*
+- * Don't allow to achieve failed state
+- * Don't try to recover this device
+- */
++ if (has_failed(conf)) {
++ set_bit(MD_BROKEN, &conf->mddev->flags);
+ conf->recovery_disabled = mddev->recovery_disabled;
+- spin_unlock_irqrestore(&conf->device_lock, flags);
+- return;
++
++ pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
++ mdname(mddev), mddev->degraded, conf->raid_disks);
++ } else {
++ pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
++ mdname(mddev), conf->raid_disks - mddev->degraded);
+ }
+
+- set_bit(Faulty, &rdev->flags);
+- clear_bit(In_sync, &rdev->flags);
+- mddev->degraded = raid5_calc_degraded(conf);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+
+ set_bit(Blocked, &rdev->flags);
+ set_mask_bits(&mddev->sb_flags, 0,
+ BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
+- pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
+- "md/raid:%s: Operation continuing on %d devices.\n",
+- mdname(mddev),
+- bdevname(rdev->bdev, b),
+- mdname(mddev),
+- conf->raid_disks - mddev->degraded);
+ r5c_update_on_rdev_error(mddev, rdev);
+ }
+
+@@ -3939,7 +3937,7 @@ static void handle_stripe_fill(struct stripe_head *sh,
+ * back cache (prexor with orig_page, and then xor with
+ * page) in the read path
+ */
+- if (s->injournal && s->failed) {
++ if (s->to_read && s->injournal && s->failed) {
+ if (test_bit(STRIPE_R5C_CACHING, &sh->state))
+ r5c_make_stripe_write_out(sh);
+ goto out;
+@@ -5410,7 +5408,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
+
+ sector = raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 0,
+ &dd_idx, NULL);
+- end_sector = bio_end_sector(raid_bio);
++ end_sector = sector + bio_sectors(raid_bio);
+
+ rcu_read_lock();
+ if (r5c_big_stripe_cached(conf, sector))
+@@ -5433,7 +5431,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
+
+ if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
+ &bad_sectors)) {
+- bio_put(raid_bio);
+ rdev_dec_pending(rdev, mddev);
+ return 0;
+ }
+@@ -6525,7 +6522,18 @@ static void raid5d(struct md_thread *thread)
+ spin_unlock_irq(&conf->device_lock);
+ md_check_recovery(mddev);
+ spin_lock_irq(&conf->device_lock);
++
++ /*
++ * Waiting on MD_SB_CHANGE_PENDING below may deadlock
++ * seeing md_check_recovery() is needed to clear
++ * the flag when using mdmon.
++ */
++ continue;
+ }
++
++ wait_event_lock_irq(mddev->sb_wait,
++ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
++ conf->device_lock);
+ }
+ pr_debug("%d stripes handled\n", handled);
+
+@@ -7446,12 +7454,19 @@ static int raid5_run(struct mddev *mddev)
+ struct md_rdev *rdev;
+ struct md_rdev *journal_dev = NULL;
+ sector_t reshape_offset = 0;
+- int i;
++ int i, ret = 0;
+ long long min_offset_diff = 0;
+ int first = 1;
+
+- if (mddev_init_writes_pending(mddev) < 0)
++ if (acct_bioset_init(mddev)) {
++ pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev));
+ return -ENOMEM;
++ }
++
++ if (mddev_init_writes_pending(mddev) < 0) {
++ ret = -ENOMEM;
++ goto exit_acct_set;
++ }
+
+ if (mddev->recovery_cp != MaxSector)
+ pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
+@@ -7482,7 +7497,8 @@ static int raid5_run(struct mddev *mddev)
+ (mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
+ pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
+ mdname(mddev));
+- return -EINVAL;
++ ret = -EINVAL;
++ goto exit_acct_set;
+ }
+
+ if (mddev->reshape_position != MaxSector) {
+@@ -7507,13 +7523,15 @@ static int raid5_run(struct mddev *mddev)
+ if (journal_dev) {
+ pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n",
+ mdname(mddev));
+- return -EINVAL;
++ ret = -EINVAL;
++ goto exit_acct_set;
+ }
+
+ if (mddev->new_level != mddev->level) {
+ pr_warn("md/raid:%s: unsupported reshape required - aborting.\n",
+ mdname(mddev));
+- return -EINVAL;
++ ret = -EINVAL;
++ goto exit_acct_set;
+ }
+ old_disks = mddev->raid_disks - mddev->delta_disks;
+ /* reshape_position must be on a new-stripe boundary, and one
+@@ -7529,7 +7547,8 @@ static int raid5_run(struct mddev *mddev)
+ if (sector_div(here_new, chunk_sectors * new_data_disks)) {
+ pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n",
+ mdname(mddev));
+- return -EINVAL;
++ ret = -EINVAL;
++ goto exit_acct_set;
+ }
+ reshape_offset = here_new * chunk_sectors;
+ /* here_new is the stripe we will write to */
+@@ -7551,7 +7570,8 @@ static int raid5_run(struct mddev *mddev)
+ else if (mddev->ro == 0) {
+ pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n",
+ mdname(mddev));
+- return -EINVAL;
++ ret = -EINVAL;
++ goto exit_acct_set;
+ }
+ } else if (mddev->reshape_backwards
+ ? (here_new * chunk_sectors + min_offset_diff <=
+@@ -7561,7 +7581,8 @@ static int raid5_run(struct mddev *mddev)
+ /* Reading from the same stripe as writing to - bad */
+ pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n",
+ mdname(mddev));
+- return -EINVAL;
++ ret = -EINVAL;
++ goto exit_acct_set;
+ }
+ pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));
+ /* OK, we should be able to continue; */
+@@ -7585,8 +7606,10 @@ static int raid5_run(struct mddev *mddev)
+ else
+ conf = mddev->private;
+
+- if (IS_ERR(conf))
+- return PTR_ERR(conf);
++ if (IS_ERR(conf)) {
++ ret = PTR_ERR(conf);
++ goto exit_acct_set;
++ }
+
+ if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+ if (!journal_dev) {
+@@ -7786,7 +7809,10 @@ abort:
+ free_conf(conf);
+ mddev->private = NULL;
+ pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
+- return -EIO;
++ ret = -EIO;
++exit_acct_set:
++ acct_bioset_exit(mddev);
++ return ret;
+ }
+
+ static void raid5_free(struct mddev *mddev, void *priv)
+@@ -7794,6 +7820,7 @@ static void raid5_free(struct mddev *mddev, void *priv)
+ struct r5conf *conf = priv;
+
+ free_conf(conf);
++ acct_bioset_exit(mddev);
+ mddev->to_remove = &raid5_attrs_group;
+ }
+
+@@ -8010,6 +8037,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
+ */
+ if (rdev->saved_raid_disk >= 0 &&
+ rdev->saved_raid_disk >= first &&
++ rdev->saved_raid_disk <= last &&
+ conf->disks[rdev->saved_raid_disk].rdev == NULL)
+ first = rdev->saved_raid_disk;
+
+diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
+index b07812657cee6..f3f24c63536b6 100644
+--- a/drivers/media/Kconfig
++++ b/drivers/media/Kconfig
+@@ -141,10 +141,10 @@ config MEDIA_TEST_SUPPORT
+ prompt "Test drivers" if MEDIA_SUPPORT_FILTER
+ default y if !MEDIA_SUPPORT_FILTER
+ help
+- Those drivers should not be used on production Kernels, but
+- can be useful on debug ones. It enables several dummy drivers
+- that simulate a real hardware. Very useful to test userspace
+- applications and to validate if the subsystem core is doesn't
++ These drivers should not be used on production kernels, but
++ can be useful on debug ones. This option enables several dummy drivers
++ that simulate real hardware. Very useful to test userspace
++ applications and to validate if the subsystem core doesn't
+ have regressions.
+
+ Say Y if you want to use some virtual test driver.
+diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c
+index 79fa36de8a04a..99ede1417d727 100644
+--- a/drivers/media/cec/core/cec-adap.c
++++ b/drivers/media/cec/core/cec-adap.c
+@@ -161,10 +161,10 @@ static void cec_queue_event(struct cec_adapter *adap,
+ u64 ts = ktime_get_ns();
+ struct cec_fh *fh;
+
+- mutex_lock(&adap->devnode.lock);
++ mutex_lock(&adap->devnode.lock_fhs);
+ list_for_each_entry(fh, &adap->devnode.fhs, list)
+ cec_queue_event_fh(fh, ev, ts);
+- mutex_unlock(&adap->devnode.lock);
++ mutex_unlock(&adap->devnode.lock_fhs);
+ }
+
+ /* Notify userspace that the CEC pin changed state at the given time. */
+@@ -178,11 +178,12 @@ void cec_queue_pin_cec_event(struct cec_adapter *adap, bool is_high,
+ };
+ struct cec_fh *fh;
+
+- mutex_lock(&adap->devnode.lock);
+- list_for_each_entry(fh, &adap->devnode.fhs, list)
++ mutex_lock(&adap->devnode.lock_fhs);
++ list_for_each_entry(fh, &adap->devnode.fhs, list) {
+ if (fh->mode_follower == CEC_MODE_MONITOR_PIN)
+ cec_queue_event_fh(fh, &ev, ktime_to_ns(ts));
+- mutex_unlock(&adap->devnode.lock);
++ }
++ mutex_unlock(&adap->devnode.lock_fhs);
+ }
+ EXPORT_SYMBOL_GPL(cec_queue_pin_cec_event);
+
+@@ -195,10 +196,10 @@ void cec_queue_pin_hpd_event(struct cec_adapter *adap, bool is_high, ktime_t ts)
+ };
+ struct cec_fh *fh;
+
+- mutex_lock(&adap->devnode.lock);
++ mutex_lock(&adap->devnode.lock_fhs);
+ list_for_each_entry(fh, &adap->devnode.fhs, list)
+ cec_queue_event_fh(fh, &ev, ktime_to_ns(ts));
+- mutex_unlock(&adap->devnode.lock);
++ mutex_unlock(&adap->devnode.lock_fhs);
+ }
+ EXPORT_SYMBOL_GPL(cec_queue_pin_hpd_event);
+
+@@ -211,10 +212,10 @@ void cec_queue_pin_5v_event(struct cec_adapter *adap, bool is_high, ktime_t ts)
+ };
+ struct cec_fh *fh;
+
+- mutex_lock(&adap->devnode.lock);
++ mutex_lock(&adap->devnode.lock_fhs);
+ list_for_each_entry(fh, &adap->devnode.fhs, list)
+ cec_queue_event_fh(fh, &ev, ktime_to_ns(ts));
+- mutex_unlock(&adap->devnode.lock);
++ mutex_unlock(&adap->devnode.lock_fhs);
+ }
+ EXPORT_SYMBOL_GPL(cec_queue_pin_5v_event);
+
+@@ -286,12 +287,12 @@ static void cec_queue_msg_monitor(struct cec_adapter *adap,
+ u32 monitor_mode = valid_la ? CEC_MODE_MONITOR :
+ CEC_MODE_MONITOR_ALL;
+
+- mutex_lock(&adap->devnode.lock);
++ mutex_lock(&adap->devnode.lock_fhs);
+ list_for_each_entry(fh, &adap->devnode.fhs, list) {
+ if (fh->mode_follower >= monitor_mode)
+ cec_queue_msg_fh(fh, msg);
+ }
+- mutex_unlock(&adap->devnode.lock);
++ mutex_unlock(&adap->devnode.lock_fhs);
+ }
+
+ /*
+@@ -302,12 +303,12 @@ static void cec_queue_msg_followers(struct cec_adapter *adap,
+ {
+ struct cec_fh *fh;
+
+- mutex_lock(&adap->devnode.lock);
++ mutex_lock(&adap->devnode.lock_fhs);
+ list_for_each_entry(fh, &adap->devnode.fhs, list) {
+ if (fh->mode_follower == CEC_MODE_FOLLOWER)
+ cec_queue_msg_fh(fh, msg);
+ }
+- mutex_unlock(&adap->devnode.lock);
++ mutex_unlock(&adap->devnode.lock_fhs);
+ }
+
+ /* Notify userspace of an adapter state change. */
+@@ -1085,7 +1086,8 @@ void cec_received_msg_ts(struct cec_adapter *adap,
+ mutex_lock(&adap->lock);
+ dprintk(2, "%s: %*ph\n", __func__, msg->len, msg->msg);
+
+- adap->last_initiator = 0xff;
++ if (!adap->transmit_in_progress)
++ adap->last_initiator = 0xff;
+
+ /* Check if this message was for us (directed or broadcast). */
+ if (!cec_msg_is_broadcast(msg))
+@@ -1199,6 +1201,7 @@ void cec_received_msg_ts(struct cec_adapter *adap,
+ if (abort)
+ dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
+ msg->flags = dst->flags;
++ msg->sequence = dst->sequence;
+ /* Remove it from the wait_queue */
+ list_del_init(&data->list);
+
+@@ -1270,7 +1273,7 @@ static int cec_config_log_addr(struct cec_adapter *adap,
+ * While trying to poll the physical address was reset
+ * and the adapter was unconfigured, so bail out.
+ */
+- if (!adap->is_configuring)
++ if (adap->phys_addr == CEC_PHYS_ADDR_INVALID)
+ return -EINTR;
+
+ if (err)
+@@ -1327,7 +1330,6 @@ static void cec_adap_unconfigure(struct cec_adapter *adap)
+ adap->phys_addr != CEC_PHYS_ADDR_INVALID)
+ WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID));
+ adap->log_addrs.log_addr_mask = 0;
+- adap->is_configuring = false;
+ adap->is_configured = false;
+ cec_flush(adap);
+ wake_up_interruptible(&adap->kthread_waitq);
+@@ -1519,9 +1521,10 @@ unconfigure:
+ for (i = 0; i < las->num_log_addrs; i++)
+ las->log_addr[i] = CEC_LOG_ADDR_INVALID;
+ cec_adap_unconfigure(adap);
++ adap->is_configuring = false;
+ adap->kthread_config = NULL;
+- mutex_unlock(&adap->lock);
+ complete(&adap->config_completion);
++ mutex_unlock(&adap->lock);
+ return 0;
+ }
+
+@@ -1572,6 +1575,7 @@ void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
+ /* Disabling monitor all mode should always succeed */
+ if (adap->monitor_all_cnt)
+ WARN_ON(call_op(adap, adap_monitor_all_enable, false));
++ /* serialize adap_enable */
+ mutex_lock(&adap->devnode.lock);
+ if (adap->needs_hpd || list_empty(&adap->devnode.fhs)) {
+ WARN_ON(adap->ops->adap_enable(adap, false));
+@@ -1583,14 +1587,16 @@ void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block)
+ return;
+ }
+
++ /* serialize adap_enable */
+ mutex_lock(&adap->devnode.lock);
+ adap->last_initiator = 0xff;
+ adap->transmit_in_progress = false;
+
+- if ((adap->needs_hpd || list_empty(&adap->devnode.fhs)) &&
+- adap->ops->adap_enable(adap, true)) {
+- mutex_unlock(&adap->devnode.lock);
+- return;
++ if (adap->needs_hpd || list_empty(&adap->devnode.fhs)) {
++ if (adap->ops->adap_enable(adap, true)) {
++ mutex_unlock(&adap->devnode.lock);
++ return;
++ }
+ }
+
+ if (adap->monitor_all_cnt &&
+diff --git a/drivers/media/cec/core/cec-api.c b/drivers/media/cec/core/cec-api.c
+index 769e6b4cddce3..52c30e4e20055 100644
+--- a/drivers/media/cec/core/cec-api.c
++++ b/drivers/media/cec/core/cec-api.c
+@@ -586,6 +586,7 @@ static int cec_open(struct inode *inode, struct file *filp)
+ return err;
+ }
+
++ /* serialize adap_enable */
+ mutex_lock(&devnode->lock);
+ if (list_empty(&devnode->fhs) &&
+ !adap->needs_hpd &&
+@@ -624,7 +625,9 @@ static int cec_open(struct inode *inode, struct file *filp)
+ }
+ #endif
+
++ mutex_lock(&devnode->lock_fhs);
+ list_add(&fh->list, &devnode->fhs);
++ mutex_unlock(&devnode->lock_fhs);
+ mutex_unlock(&devnode->lock);
+
+ return 0;
+@@ -653,8 +656,11 @@ static int cec_release(struct inode *inode, struct file *filp)
+ cec_monitor_all_cnt_dec(adap);
+ mutex_unlock(&adap->lock);
+
++ /* serialize adap_enable */
+ mutex_lock(&devnode->lock);
++ mutex_lock(&devnode->lock_fhs);
+ list_del(&fh->list);
++ mutex_unlock(&devnode->lock_fhs);
+ if (cec_is_registered(adap) && list_empty(&devnode->fhs) &&
+ !adap->needs_hpd && adap->phys_addr == CEC_PHYS_ADDR_INVALID) {
+ WARN_ON(adap->ops->adap_enable(adap, false));
+diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c
+index 551689d371a71..ec67065d52021 100644
+--- a/drivers/media/cec/core/cec-core.c
++++ b/drivers/media/cec/core/cec-core.c
+@@ -169,8 +169,10 @@ static void cec_devnode_unregister(struct cec_adapter *adap)
+ devnode->registered = false;
+ devnode->unregistered = true;
+
++ mutex_lock(&devnode->lock_fhs);
+ list_for_each_entry(fh, &devnode->fhs, list)
+ wake_up_interruptible(&fh->wait);
++ mutex_unlock(&devnode->lock_fhs);
+
+ mutex_unlock(&devnode->lock);
+
+@@ -272,6 +274,7 @@ struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
+
+ /* adap->devnode initialization */
+ INIT_LIST_HEAD(&adap->devnode.fhs);
++ mutex_init(&adap->devnode.lock_fhs);
+ mutex_init(&adap->devnode.lock);
+
+ adap->kthread = kthread_run(cec_thread_func, adap, "cec-%s", name);
+diff --git a/drivers/media/cec/core/cec-pin.c b/drivers/media/cec/core/cec-pin.c
+index 8c613aa649c6f..0eb90cc0ffb0f 100644
+--- a/drivers/media/cec/core/cec-pin.c
++++ b/drivers/media/cec/core/cec-pin.c
+@@ -1033,6 +1033,7 @@ static int cec_pin_thread_func(void *_adap)
+ {
+ struct cec_adapter *adap = _adap;
+ struct cec_pin *pin = adap->pin;
++ bool irq_enabled = false;
+
+ for (;;) {
+ wait_event_interruptible(pin->kthread_waitq,
+@@ -1060,6 +1061,7 @@ static int cec_pin_thread_func(void *_adap)
+ ns_to_ktime(pin->work_rx_msg.rx_ts));
+ msg->len = 0;
+ }
++
+ if (pin->work_tx_status) {
+ unsigned int tx_status = pin->work_tx_status;
+
+@@ -1083,27 +1085,39 @@ static int cec_pin_thread_func(void *_adap)
+ switch (atomic_xchg(&pin->work_irq_change,
+ CEC_PIN_IRQ_UNCHANGED)) {
+ case CEC_PIN_IRQ_DISABLE:
+- pin->ops->disable_irq(adap);
++ if (irq_enabled) {
++ pin->ops->disable_irq(adap);
++ irq_enabled = false;
++ }
+ cec_pin_high(pin);
+ cec_pin_to_idle(pin);
+ hrtimer_start(&pin->timer, ns_to_ktime(0),
+ HRTIMER_MODE_REL);
+ break;
+ case CEC_PIN_IRQ_ENABLE:
++ if (irq_enabled)
++ break;
+ pin->enable_irq_failed = !pin->ops->enable_irq(adap);
+ if (pin->enable_irq_failed) {
+ cec_pin_to_idle(pin);
+ hrtimer_start(&pin->timer, ns_to_ktime(0),
+ HRTIMER_MODE_REL);
++ } else {
++ irq_enabled = true;
+ }
+ break;
+ default:
+ break;
+ }
+-
+ if (kthread_should_stop())
+ break;
+ }
++ if (pin->ops->disable_irq && irq_enabled)
++ pin->ops->disable_irq(adap);
++ hrtimer_cancel(&pin->timer);
++ cec_pin_read(pin);
++ cec_pin_to_idle(pin);
++ pin->state = CEC_ST_OFF;
+ return 0;
+ }
+
+@@ -1130,13 +1144,7 @@ static int cec_pin_adap_enable(struct cec_adapter *adap, bool enable)
+ hrtimer_start(&pin->timer, ns_to_ktime(0),
+ HRTIMER_MODE_REL);
+ } else {
+- if (pin->ops->disable_irq)
+- pin->ops->disable_irq(adap);
+- hrtimer_cancel(&pin->timer);
+ kthread_stop(pin->kthread);
+- cec_pin_read(pin);
+- cec_pin_to_idle(pin);
+- pin->state = CEC_ST_OFF;
+ }
+ return 0;
+ }
+@@ -1157,11 +1165,8 @@ void cec_pin_start_timer(struct cec_pin *pin)
+ if (pin->state != CEC_ST_RX_IRQ)
+ return;
+
+- atomic_set(&pin->work_irq_change, CEC_PIN_IRQ_UNCHANGED);
+- pin->ops->disable_irq(pin->adap);
+- cec_pin_high(pin);
+- cec_pin_to_idle(pin);
+- hrtimer_start(&pin->timer, ns_to_ktime(0), HRTIMER_MODE_REL);
++ atomic_set(&pin->work_irq_change, CEC_PIN_IRQ_DISABLE);
++ wake_up_interruptible(&pin->kthread_waitq);
+ }
+
+ static int cec_pin_adap_transmit(struct cec_adapter *adap, u8 attempts,
+diff --git a/drivers/media/cec/i2c/Kconfig b/drivers/media/cec/i2c/Kconfig
+index 70432a1d69186..d912d143fb312 100644
+--- a/drivers/media/cec/i2c/Kconfig
++++ b/drivers/media/cec/i2c/Kconfig
+@@ -5,6 +5,7 @@
+ config CEC_CH7322
+ tristate "Chrontel CH7322 CEC controller"
+ depends on I2C
++ select REGMAP
+ select REGMAP_I2C
+ select CEC_CORE
+ help
+diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c
+index 2d95e16cd2489..f66699d5dc66e 100644
+--- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c
++++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c
+@@ -44,6 +44,8 @@ static void handle_cec_message(struct cros_ec_cec *cros_ec_cec)
+ uint8_t *cec_message = cros_ec->event_data.data.cec_message;
+ unsigned int len = cros_ec->event_size;
+
++ if (len > CEC_MAX_MSG_SIZE)
++ len = CEC_MAX_MSG_SIZE;
+ cros_ec_cec->rx_msg.len = len;
+ memcpy(cros_ec_cec->rx_msg.msg, cec_message, len);
+
+diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c
+index 028a09a7531ef..102f1af01000a 100644
+--- a/drivers/media/cec/platform/s5p/s5p_cec.c
++++ b/drivers/media/cec/platform/s5p/s5p_cec.c
+@@ -115,6 +115,8 @@ static irqreturn_t s5p_cec_irq_handler(int irq, void *priv)
+ dev_dbg(cec->dev, "Buffer overrun (worker did not process previous message)\n");
+ cec->rx = STATE_BUSY;
+ cec->msg.len = status >> 24;
++ if (cec->msg.len > CEC_MAX_MSG_SIZE)
++ cec->msg.len = CEC_MAX_MSG_SIZE;
+ cec->msg.rx_status = CEC_RX_STATUS_OK;
+ s5p_cec_get_rx_buf(cec, cec->msg.len,
+ cec->msg.msg);
+diff --git a/drivers/media/cec/usb/pulse8/pulse8-cec.c b/drivers/media/cec/usb/pulse8/pulse8-cec.c
+index 04b13cdc38d2c..ba67587bd43ec 100644
+--- a/drivers/media/cec/usb/pulse8/pulse8-cec.c
++++ b/drivers/media/cec/usb/pulse8/pulse8-cec.c
+@@ -809,8 +809,11 @@ static void pulse8_ping_eeprom_work_handler(struct work_struct *work)
+
+ mutex_lock(&pulse8->lock);
+ cmd = MSGCODE_PING;
+- pulse8_send_and_wait(pulse8, &cmd, 1,
+- MSGCODE_COMMAND_ACCEPTED, 0);
++ if (pulse8_send_and_wait(pulse8, &cmd, 1,
++ MSGCODE_COMMAND_ACCEPTED, 0)) {
++ dev_warn(pulse8->dev, "failed to ping EEPROM\n");
++ goto unlock;
++ }
+
+ if (pulse8->vers < 2)
+ goto unlock;
+diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c
+index baf5772c52a96..be32159777142 100644
+--- a/drivers/media/common/saa7146/saa7146_fops.c
++++ b/drivers/media/common/saa7146/saa7146_fops.c
+@@ -521,7 +521,7 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv)
+ ERR("out of memory. aborting.\n");
+ kfree(vv);
+ v4l2_ctrl_handler_free(hdl);
+- return -1;
++ return -ENOMEM;
+ }
+
+ saa7146_video_uops.init(dev,vv);
+diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c
+index ce879f6f8f829..144027035892a 100644
+--- a/drivers/media/common/videobuf2/frame_vector.c
++++ b/drivers/media/common/videobuf2/frame_vector.c
+@@ -35,10 +35,7 @@
+ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
+ struct frame_vector *vec)
+ {
+- struct mm_struct *mm = current->mm;
+- struct vm_area_struct *vma;
+- int ret = 0;
+- int err;
++ int ret;
+
+ if (nr_frames == 0)
+ return 0;
+@@ -51,45 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
+ ret = pin_user_pages_fast(start, nr_frames,
+ FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
+ (struct page **)(vec->ptrs));
+- if (ret > 0) {
+- vec->got_ref = true;
+- vec->is_pfns = false;
+- goto out_unlocked;
+- }
++ vec->got_ref = true;
++ vec->is_pfns = false;
++ vec->nr_frames = ret;
+
+- mmap_read_lock(mm);
+- vec->got_ref = false;
+- vec->is_pfns = true;
+- ret = 0;
+- do {
+- unsigned long *nums = frame_vector_pfns(vec);
+-
+- vma = vma_lookup(mm, start);
+- if (!vma)
+- break;
+-
+- while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
+- err = follow_pfn(vma, start, &nums[ret]);
+- if (err) {
+- if (ret == 0)
+- ret = err;
+- goto out;
+- }
+- start += PAGE_SIZE;
+- ret++;
+- }
+- /* Bail out if VMA doesn't completely cover the tail page. */
+- if (start < vma->vm_end)
+- break;
+- } while (ret < nr_frames);
+-out:
+- mmap_read_unlock(mm);
+-out_unlocked:
+- if (!ret)
+- ret = -EFAULT;
+- if (ret > 0)
+- vec->nr_frames = ret;
+- return ret;
++ if (likely(ret > 0))
++ return ret;
++
++ /* This used to (racily) return non-refcounted pfns. Let people know */
++ WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping");
++ vec->nr_frames = 0;
++ return ret ? ret : -EFAULT;
+ }
+ EXPORT_SYMBOL(get_vaddr_frames);
+
+diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
+index 508ac295eb06e..30c8497f7c118 100644
+--- a/drivers/media/common/videobuf2/videobuf2-core.c
++++ b/drivers/media/common/videobuf2/videobuf2-core.c
+@@ -68,13 +68,13 @@ module_param(debug, int, 0644);
+ err; \
+ })
+
+-#define call_ptr_memop(vb, op, args...) \
++#define call_ptr_memop(op, vb, args...) \
+ ({ \
+ struct vb2_queue *_q = (vb)->vb2_queue; \
+ void *ptr; \
+ \
+ log_memop(vb, op); \
+- ptr = _q->mem_ops->op ? _q->mem_ops->op(args) : NULL; \
++ ptr = _q->mem_ops->op ? _q->mem_ops->op(vb, args) : NULL; \
+ if (!IS_ERR_OR_NULL(ptr)) \
+ (vb)->cnt_mem_ ## op++; \
+ ptr; \
+@@ -144,9 +144,9 @@ module_param(debug, int, 0644);
+ ((vb)->vb2_queue->mem_ops->op ? \
+ (vb)->vb2_queue->mem_ops->op(args) : 0)
+
+-#define call_ptr_memop(vb, op, args...) \
++#define call_ptr_memop(op, vb, args...) \
+ ((vb)->vb2_queue->mem_ops->op ? \
+- (vb)->vb2_queue->mem_ops->op(args) : NULL)
++ (vb)->vb2_queue->mem_ops->op(vb, args) : NULL)
+
+ #define call_void_memop(vb, op, args...) \
+ do { \
+@@ -230,9 +230,10 @@ static int __vb2_buf_mem_alloc(struct vb2_buffer *vb)
+ if (size < vb->planes[plane].length)
+ goto free;
+
+- mem_priv = call_ptr_memop(vb, alloc,
+- q->alloc_devs[plane] ? : q->dev,
+- q->dma_attrs, size, q->dma_dir, q->gfp_flags);
++ mem_priv = call_ptr_memop(alloc,
++ vb,
++ q->alloc_devs[plane] ? : q->dev,
++ size);
+ if (IS_ERR_OR_NULL(mem_priv)) {
+ if (mem_priv)
+ ret = PTR_ERR(mem_priv);
+@@ -787,7 +788,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ num_buffers = max_t(unsigned int, *count, q->min_buffers_needed);
+ num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
+ memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
++ /*
++ * Set this now to ensure that drivers see the correct q->memory value
++ * in the queue_setup op.
++ */
++ mutex_lock(&q->mmap_lock);
+ q->memory = memory;
++ mutex_unlock(&q->mmap_lock);
+
+ /*
+ * Ask the driver how many buffers and planes per buffer it requires.
+@@ -796,22 +803,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes,
+ plane_sizes, q->alloc_devs);
+ if (ret)
+- return ret;
++ goto error;
+
+ /* Check that driver has set sane values */
+- if (WARN_ON(!num_planes))
+- return -EINVAL;
++ if (WARN_ON(!num_planes)) {
++ ret = -EINVAL;
++ goto error;
++ }
+
+ for (i = 0; i < num_planes; i++)
+- if (WARN_ON(!plane_sizes[i]))
+- return -EINVAL;
++ if (WARN_ON(!plane_sizes[i])) {
++ ret = -EINVAL;
++ goto error;
++ }
+
+ /* Finally, allocate buffers and video memory */
+ allocated_buffers =
+ __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes);
+ if (allocated_buffers == 0) {
+ dprintk(q, 1, "memory allocation failed\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto error;
+ }
+
+ /*
+@@ -852,7 +864,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ if (ret < 0) {
+ /*
+ * Note: __vb2_queue_free() will subtract 'allocated_buffers'
+- * from q->num_buffers.
++ * from q->num_buffers and it will reset q->memory to
++ * VB2_MEMORY_UNKNOWN.
+ */
+ __vb2_queue_free(q, allocated_buffers);
+ mutex_unlock(&q->mmap_lock);
+@@ -868,6 +881,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
+ q->waiting_for_buffers = !q->is_output;
+
+ return 0;
++
++error:
++ mutex_lock(&q->mmap_lock);
++ q->memory = VB2_MEMORY_UNKNOWN;
++ mutex_unlock(&q->mmap_lock);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_core_reqbufs);
+
+@@ -878,6 +897,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ {
+ unsigned int num_planes = 0, num_buffers, allocated_buffers;
+ unsigned plane_sizes[VB2_MAX_PLANES] = { };
++ bool no_previous_buffers = !q->num_buffers;
+ int ret;
+
+ if (q->num_buffers == VB2_MAX_FRAME) {
+@@ -885,13 +905,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ return -ENOBUFS;
+ }
+
+- if (!q->num_buffers) {
++ if (no_previous_buffers) {
+ if (q->waiting_in_dqbuf && *count) {
+ dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
+ return -EBUSY;
+ }
+ memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
++ /*
++ * Set this now to ensure that drivers see the correct q->memory
++ * value in the queue_setup op.
++ */
++ mutex_lock(&q->mmap_lock);
+ q->memory = memory;
++ mutex_unlock(&q->mmap_lock);
+ q->waiting_for_buffers = !q->is_output;
+ } else {
+ if (q->memory != memory) {
+@@ -914,14 +940,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ ret = call_qop(q, queue_setup, q, &num_buffers,
+ &num_planes, plane_sizes, q->alloc_devs);
+ if (ret)
+- return ret;
++ goto error;
+
+ /* Finally, allocate buffers and video memory */
+ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers,
+ num_planes, plane_sizes);
+ if (allocated_buffers == 0) {
+ dprintk(q, 1, "memory allocation failed\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto error;
+ }
+
+ /*
+@@ -952,7 +979,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ if (ret < 0) {
+ /*
+ * Note: __vb2_queue_free() will subtract 'allocated_buffers'
+- * from q->num_buffers.
++ * from q->num_buffers and it will reset q->memory to
++ * VB2_MEMORY_UNKNOWN.
+ */
+ __vb2_queue_free(q, allocated_buffers);
+ mutex_unlock(&q->mmap_lock);
+@@ -967,6 +995,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
+ *count = allocated_buffers;
+
+ return 0;
++
++error:
++ if (no_previous_buffers) {
++ mutex_lock(&q->mmap_lock);
++ q->memory = VB2_MEMORY_UNKNOWN;
++ mutex_unlock(&q->mmap_lock);
++ }
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
+
+@@ -975,7 +1011,7 @@ void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
+ if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
+ return NULL;
+
+- return call_ptr_memop(vb, vaddr, vb->planes[plane_no].mem_priv);
++ return call_ptr_memop(vaddr, vb, vb->planes[plane_no].mem_priv);
+
+ }
+ EXPORT_SYMBOL_GPL(vb2_plane_vaddr);
+@@ -985,7 +1021,7 @@ void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no)
+ if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
+ return NULL;
+
+- return call_ptr_memop(vb, cookie, vb->planes[plane_no].mem_priv);
++ return call_ptr_memop(cookie, vb, vb->planes[plane_no].mem_priv);
+ }
+ EXPORT_SYMBOL_GPL(vb2_plane_cookie);
+
+@@ -1125,10 +1161,11 @@ static int __prepare_userptr(struct vb2_buffer *vb)
+ vb->planes[plane].data_offset = 0;
+
+ /* Acquire each plane's memory */
+- mem_priv = call_ptr_memop(vb, get_userptr,
+- q->alloc_devs[plane] ? : q->dev,
+- planes[plane].m.userptr,
+- planes[plane].length, q->dma_dir);
++ mem_priv = call_ptr_memop(get_userptr,
++ vb,
++ q->alloc_devs[plane] ? : q->dev,
++ planes[plane].m.userptr,
++ planes[plane].length);
+ if (IS_ERR(mem_priv)) {
+ dprintk(q, 1, "failed acquiring userspace memory for plane %d\n",
+ plane);
+@@ -1249,9 +1286,11 @@ static int __prepare_dmabuf(struct vb2_buffer *vb)
+ vb->planes[plane].data_offset = 0;
+
+ /* Acquire each plane's memory */
+- mem_priv = call_ptr_memop(vb, attach_dmabuf,
+- q->alloc_devs[plane] ? : q->dev,
+- dbuf, planes[plane].length, q->dma_dir);
++ mem_priv = call_ptr_memop(attach_dmabuf,
++ vb,
++ q->alloc_devs[plane] ? : q->dev,
++ dbuf,
++ planes[plane].length);
+ if (IS_ERR(mem_priv)) {
+ dprintk(q, 1, "failed to attach dmabuf\n");
+ ret = PTR_ERR(mem_priv);
+@@ -2120,6 +2159,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off,
+ struct vb2_buffer *vb;
+ unsigned int buffer, plane;
+
++ /*
++ * Sanity checks to ensure the lock is held, MEMORY_MMAP is
++ * used and fileio isn't active.
++ */
++ lockdep_assert_held(&q->mmap_lock);
++
++ if (q->memory != VB2_MEMORY_MMAP) {
++ dprintk(q, 1, "queue is not currently set up for mmap\n");
++ return -EINVAL;
++ }
++
++ if (vb2_fileio_is_active(q)) {
++ dprintk(q, 1, "file io in progress\n");
++ return -EBUSY;
++ }
++
+ /*
+ * Go over all buffers and their planes, comparing the given offset
+ * with an offset assigned to each plane. If a match is found,
+@@ -2187,8 +2242,10 @@ int vb2_core_expbuf(struct vb2_queue *q, int *fd, unsigned int type,
+
+ vb_plane = &vb->planes[plane];
+
+- dbuf = call_ptr_memop(vb, get_dmabuf, vb_plane->mem_priv,
+- flags & O_ACCMODE);
++ dbuf = call_ptr_memop(get_dmabuf,
++ vb,
++ vb_plane->mem_priv,
++ flags & O_ACCMODE);
+ if (IS_ERR_OR_NULL(dbuf)) {
+ dprintk(q, 1, "failed to export buffer %d, plane %d\n",
+ index, plane);
+@@ -2219,11 +2276,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
+ int ret;
+ unsigned long length;
+
+- if (q->memory != VB2_MEMORY_MMAP) {
+- dprintk(q, 1, "queue is not currently set up for mmap\n");
+- return -EINVAL;
+- }
+-
+ /*
+ * Check memory area access mode.
+ */
+@@ -2245,14 +2297,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
+
+ mutex_lock(&q->mmap_lock);
+
+- if (vb2_fileio_is_active(q)) {
+- dprintk(q, 1, "mmap: file io in progress\n");
+- ret = -EBUSY;
+- goto unlock;
+- }
+-
+ /*
+- * Find the plane corresponding to the offset passed by userspace.
++ * Find the plane corresponding to the offset passed by userspace. This
++ * will return an error if not MEMORY_MMAP or file I/O is in progress.
+ */
+ ret = __find_plane_by_offset(q, off, &buffer, &plane);
+ if (ret)
+@@ -2305,22 +2352,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q,
+ void *vaddr;
+ int ret;
+
+- if (q->memory != VB2_MEMORY_MMAP) {
+- dprintk(q, 1, "queue is not currently set up for mmap\n");
+- return -EINVAL;
+- }
++ mutex_lock(&q->mmap_lock);
+
+ /*
+- * Find the plane corresponding to the offset passed by userspace.
++ * Find the plane corresponding to the offset passed by userspace. This
++ * will return an error if not MEMORY_MMAP or file I/O is in progress.
+ */
+ ret = __find_plane_by_offset(q, off, &buffer, &plane);
+ if (ret)
+- return ret;
++ goto unlock;
+
+ vb = q->bufs[buffer];
+
+ vaddr = vb2_plane_vaddr(vb, plane);
++ mutex_unlock(&q->mmap_lock);
+ return vaddr ? (unsigned long)vaddr : -EINVAL;
++
++unlock:
++ mutex_unlock(&q->mmap_lock);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(vb2_get_unmapped_area);
+ #endif
+diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+index a7f61ba854405..f8c65b0401054 100644
+--- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
++++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+@@ -40,6 +40,8 @@ struct vb2_dc_buf {
+
+ /* DMABUF related */
+ struct dma_buf_attachment *db_attach;
++
++ struct vb2_buffer *vb;
+ };
+
+ /*********************************************/
+@@ -66,14 +68,14 @@ static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
+ /* callbacks for all buffers */
+ /*********************************************/
+
+-static void *vb2_dc_cookie(void *buf_priv)
++static void *vb2_dc_cookie(struct vb2_buffer *vb, void *buf_priv)
+ {
+ struct vb2_dc_buf *buf = buf_priv;
+
+ return &buf->dma_addr;
+ }
+
+-static void *vb2_dc_vaddr(void *buf_priv)
++static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv)
+ {
+ struct vb2_dc_buf *buf = buf_priv;
+ struct dma_buf_map map;
+@@ -137,9 +139,9 @@ static void vb2_dc_put(void *buf_priv)
+ kfree(buf);
+ }
+
+-static void *vb2_dc_alloc(struct device *dev, unsigned long attrs,
+- unsigned long size, enum dma_data_direction dma_dir,
+- gfp_t gfp_flags)
++static void *vb2_dc_alloc(struct vb2_buffer *vb,
++ struct device *dev,
++ unsigned long size)
+ {
+ struct vb2_dc_buf *buf;
+
+@@ -150,11 +152,12 @@ static void *vb2_dc_alloc(struct device *dev, unsigned long attrs,
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+- buf->attrs = attrs;
++ buf->attrs = vb->vb2_queue->dma_attrs;
+ buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr,
+- GFP_KERNEL | gfp_flags, buf->attrs);
++ GFP_KERNEL | vb->vb2_queue->gfp_flags,
++ buf->attrs);
+ if (!buf->cookie) {
+- dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size);
++ dev_err(dev, "dma_alloc_coherent of size %lu failed\n", size);
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
+@@ -165,11 +168,12 @@ static void *vb2_dc_alloc(struct device *dev, unsigned long attrs,
+ /* Prevent the device from being released while the buffer is used */
+ buf->dev = get_device(dev);
+ buf->size = size;
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+
+ buf->handler.refcount = &buf->refcount;
+ buf->handler.put = vb2_dc_put;
+ buf->handler.arg = buf;
++ buf->vb = vb;
+
+ refcount_set(&buf->refcount, 1);
+
+@@ -200,9 +204,9 @@ static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma)
+
+ vma->vm_ops->open(vma);
+
+- pr_debug("%s: mapped dma addr 0x%08lx at 0x%08lx, size %ld\n",
+- __func__, (unsigned long)buf->dma_addr, vma->vm_start,
+- buf->size);
++ pr_debug("%s: mapped dma addr 0x%08lx at 0x%08lx, size %lu\n",
++ __func__, (unsigned long)buf->dma_addr, vma->vm_start,
++ buf->size);
+
+ return 0;
+ }
+@@ -397,7 +401,9 @@ static struct sg_table *vb2_dc_get_base_sgt(struct vb2_dc_buf *buf)
+ return sgt;
+ }
+
+-static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags)
++static struct dma_buf *vb2_dc_get_dmabuf(struct vb2_buffer *vb,
++ void *buf_priv,
++ unsigned long flags)
+ {
+ struct vb2_dc_buf *buf = buf_priv;
+ struct dma_buf *dbuf;
+@@ -459,8 +465,8 @@ static void vb2_dc_put_userptr(void *buf_priv)
+ kfree(buf);
+ }
+
+-static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr,
+- unsigned long size, enum dma_data_direction dma_dir)
++static void *vb2_dc_get_userptr(struct vb2_buffer *vb, struct device *dev,
++ unsigned long vaddr, unsigned long size)
+ {
+ struct vb2_dc_buf *buf;
+ struct frame_vector *vec;
+@@ -490,7 +496,8 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr,
+ return ERR_PTR(-ENOMEM);
+
+ buf->dev = dev;
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
++ buf->vb = vb;
+
+ offset = lower_32_bits(offset_in_page(vaddr));
+ vec = vb2_create_framevec(vaddr, size);
+@@ -660,8 +667,8 @@ static void vb2_dc_detach_dmabuf(void *mem_priv)
+ kfree(buf);
+ }
+
+-static void *vb2_dc_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
+- unsigned long size, enum dma_data_direction dma_dir)
++static void *vb2_dc_attach_dmabuf(struct vb2_buffer *vb, struct device *dev,
++ struct dma_buf *dbuf, unsigned long size)
+ {
+ struct vb2_dc_buf *buf;
+ struct dma_buf_attachment *dba;
+@@ -677,6 +684,8 @@ static void *vb2_dc_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
+ return ERR_PTR(-ENOMEM);
+
+ buf->dev = dev;
++ buf->vb = vb;
++
+ /* create attachment for the dmabuf with the user device */
+ dba = dma_buf_attach(dbuf, buf->dev);
+ if (IS_ERR(dba)) {
+@@ -685,7 +694,7 @@ static void *vb2_dc_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
+ return dba;
+ }
+
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->size = size;
+ buf->db_attach = dba;
+
+diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+index c5b06a5095661..0d6389dd9b0c6 100644
+--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
++++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+@@ -51,6 +51,8 @@ struct vb2_dma_sg_buf {
+ struct vb2_vmarea_handler handler;
+
+ struct dma_buf_attachment *db_attach;
++
++ struct vb2_buffer *vb;
+ };
+
+ static void vb2_dma_sg_put(void *buf_priv);
+@@ -96,9 +98,8 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
+ return 0;
+ }
+
+-static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs,
+- unsigned long size, enum dma_data_direction dma_dir,
+- gfp_t gfp_flags)
++static void *vb2_dma_sg_alloc(struct vb2_buffer *vb, struct device *dev,
++ unsigned long size)
+ {
+ struct vb2_dma_sg_buf *buf;
+ struct sg_table *sgt;
+@@ -113,7 +114,7 @@ static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs,
+ return ERR_PTR(-ENOMEM);
+
+ buf->vaddr = NULL;
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->offset = 0;
+ buf->size = size;
+ /* size is already page aligned */
+@@ -130,7 +131,7 @@ static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs,
+ if (!buf->pages)
+ goto fail_pages_array_alloc;
+
+- ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
++ ret = vb2_dma_sg_alloc_compacted(buf, vb->vb2_queue->gfp_flags);
+ if (ret)
+ goto fail_pages_alloc;
+
+@@ -154,6 +155,7 @@ static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs,
+ buf->handler.refcount = &buf->refcount;
+ buf->handler.put = vb2_dma_sg_put;
+ buf->handler.arg = buf;
++ buf->vb = vb;
+
+ refcount_set(&buf->refcount, 1);
+
+@@ -213,9 +215,8 @@ static void vb2_dma_sg_finish(void *buf_priv)
+ dma_sync_sgtable_for_cpu(buf->dev, sgt, buf->dma_dir);
+ }
+
+-static void *vb2_dma_sg_get_userptr(struct device *dev, unsigned long vaddr,
+- unsigned long size,
+- enum dma_data_direction dma_dir)
++static void *vb2_dma_sg_get_userptr(struct vb2_buffer *vb, struct device *dev,
++ unsigned long vaddr, unsigned long size)
+ {
+ struct vb2_dma_sg_buf *buf;
+ struct sg_table *sgt;
+@@ -230,10 +231,11 @@ static void *vb2_dma_sg_get_userptr(struct device *dev, unsigned long vaddr,
+
+ buf->vaddr = NULL;
+ buf->dev = dev;
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->offset = vaddr & ~PAGE_MASK;
+ buf->size = size;
+ buf->dma_sgt = &buf->sg_table;
++ buf->vb = vb;
+ vec = vb2_create_framevec(vaddr, size);
+ if (IS_ERR(vec))
+ goto userptr_fail_pfnvec;
+@@ -292,7 +294,7 @@ static void vb2_dma_sg_put_userptr(void *buf_priv)
+ kfree(buf);
+ }
+
+-static void *vb2_dma_sg_vaddr(void *buf_priv)
++static void *vb2_dma_sg_vaddr(struct vb2_buffer *vb, void *buf_priv)
+ {
+ struct vb2_dma_sg_buf *buf = buf_priv;
+ struct dma_buf_map map;
+@@ -511,7 +513,9 @@ static const struct dma_buf_ops vb2_dma_sg_dmabuf_ops = {
+ .release = vb2_dma_sg_dmabuf_ops_release,
+ };
+
+-static struct dma_buf *vb2_dma_sg_get_dmabuf(void *buf_priv, unsigned long flags)
++static struct dma_buf *vb2_dma_sg_get_dmabuf(struct vb2_buffer *vb,
++ void *buf_priv,
++ unsigned long flags)
+ {
+ struct vb2_dma_sg_buf *buf = buf_priv;
+ struct dma_buf *dbuf;
+@@ -605,8 +609,8 @@ static void vb2_dma_sg_detach_dmabuf(void *mem_priv)
+ kfree(buf);
+ }
+
+-static void *vb2_dma_sg_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
+- unsigned long size, enum dma_data_direction dma_dir)
++static void *vb2_dma_sg_attach_dmabuf(struct vb2_buffer *vb, struct device *dev,
++ struct dma_buf *dbuf, unsigned long size)
+ {
+ struct vb2_dma_sg_buf *buf;
+ struct dma_buf_attachment *dba;
+@@ -630,14 +634,15 @@ static void *vb2_dma_sg_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
+ return dba;
+ }
+
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->size = size;
+ buf->db_attach = dba;
++ buf->vb = vb;
+
+ return buf;
+ }
+
+-static void *vb2_dma_sg_cookie(void *buf_priv)
++static void *vb2_dma_sg_cookie(struct vb2_buffer *vb, void *buf_priv)
+ {
+ struct vb2_dma_sg_buf *buf = buf_priv;
+
+diff --git a/drivers/media/common/videobuf2/videobuf2-vmalloc.c b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
+index 83f95258ec8c6..ef36abd912dcc 100644
+--- a/drivers/media/common/videobuf2/videobuf2-vmalloc.c
++++ b/drivers/media/common/videobuf2/videobuf2-vmalloc.c
+@@ -34,13 +34,12 @@ struct vb2_vmalloc_buf {
+
+ static void vb2_vmalloc_put(void *buf_priv);
+
+-static void *vb2_vmalloc_alloc(struct device *dev, unsigned long attrs,
+- unsigned long size, enum dma_data_direction dma_dir,
+- gfp_t gfp_flags)
++static void *vb2_vmalloc_alloc(struct vb2_buffer *vb, struct device *dev,
++ unsigned long size)
+ {
+ struct vb2_vmalloc_buf *buf;
+
+- buf = kzalloc(sizeof(*buf), GFP_KERNEL | gfp_flags);
++ buf = kzalloc(sizeof(*buf), GFP_KERNEL | vb->vb2_queue->gfp_flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+@@ -52,7 +51,7 @@ static void *vb2_vmalloc_alloc(struct device *dev, unsigned long attrs,
+ return ERR_PTR(-ENOMEM);
+ }
+
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->handler.refcount = &buf->refcount;
+ buf->handler.put = vb2_vmalloc_put;
+ buf->handler.arg = buf;
+@@ -71,9 +70,8 @@ static void vb2_vmalloc_put(void *buf_priv)
+ }
+ }
+
+-static void *vb2_vmalloc_get_userptr(struct device *dev, unsigned long vaddr,
+- unsigned long size,
+- enum dma_data_direction dma_dir)
++static void *vb2_vmalloc_get_userptr(struct vb2_buffer *vb, struct device *dev,
++ unsigned long vaddr, unsigned long size)
+ {
+ struct vb2_vmalloc_buf *buf;
+ struct frame_vector *vec;
+@@ -84,7 +82,7 @@ static void *vb2_vmalloc_get_userptr(struct device *dev, unsigned long vaddr,
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+ offset = vaddr & ~PAGE_MASK;
+ buf->size = size;
+ vec = vb2_create_framevec(vaddr, size);
+@@ -147,7 +145,7 @@ static void vb2_vmalloc_put_userptr(void *buf_priv)
+ kfree(buf);
+ }
+
+-static void *vb2_vmalloc_vaddr(void *buf_priv)
++static void *vb2_vmalloc_vaddr(struct vb2_buffer *vb, void *buf_priv)
+ {
+ struct vb2_vmalloc_buf *buf = buf_priv;
+
+@@ -339,7 +337,9 @@ static const struct dma_buf_ops vb2_vmalloc_dmabuf_ops = {
+ .release = vb2_vmalloc_dmabuf_ops_release,
+ };
+
+-static struct dma_buf *vb2_vmalloc_get_dmabuf(void *buf_priv, unsigned long flags)
++static struct dma_buf *vb2_vmalloc_get_dmabuf(struct vb2_buffer *vb,
++ void *buf_priv,
++ unsigned long flags)
+ {
+ struct vb2_vmalloc_buf *buf = buf_priv;
+ struct dma_buf *dbuf;
+@@ -403,8 +403,10 @@ static void vb2_vmalloc_detach_dmabuf(void *mem_priv)
+ kfree(buf);
+ }
+
+-static void *vb2_vmalloc_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
+- unsigned long size, enum dma_data_direction dma_dir)
++static void *vb2_vmalloc_attach_dmabuf(struct vb2_buffer *vb,
++ struct device *dev,
++ struct dma_buf *dbuf,
++ unsigned long size)
+ {
+ struct vb2_vmalloc_buf *buf;
+
+@@ -416,7 +418,7 @@ static void *vb2_vmalloc_attach_dmabuf(struct device *dev, struct dma_buf *dbuf,
+ return ERR_PTR(-ENOMEM);
+
+ buf->dbuf = dbuf;
+- buf->dma_dir = dma_dir;
++ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->size = size;
+
+ return buf;
+diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
+index 5d5a48475a54f..8abf7f44d96bc 100644
+--- a/drivers/media/dvb-core/dmxdev.c
++++ b/drivers/media/dvb-core/dmxdev.c
+@@ -800,6 +800,11 @@ static int dvb_demux_open(struct inode *inode, struct file *file)
+ if (mutex_lock_interruptible(&dmxdev->mutex))
+ return -ERESTARTSYS;
+
++ if (dmxdev->exit) {
++ mutex_unlock(&dmxdev->mutex);
++ return -ENODEV;
++ }
++
+ for (i = 0; i < dmxdev->filternum; i++)
+ if (dmxdev->filter[i].state == DMXDEV_STATE_FREE)
+ break;
+@@ -1413,7 +1418,7 @@ static const struct dvb_device dvbdev_dvr = {
+ };
+ int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter)
+ {
+- int i;
++ int i, ret;
+
+ if (dmxdev->demux->open(dmxdev->demux) < 0)
+ return -EUSERS;
+@@ -1432,21 +1437,36 @@ int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter)
+ DMXDEV_STATE_FREE);
+ }
+
+- dvb_register_device(dvb_adapter, &dmxdev->dvbdev, &dvbdev_demux, dmxdev,
++ ret = dvb_register_device(dvb_adapter, &dmxdev->dvbdev, &dvbdev_demux, dmxdev,
+ DVB_DEVICE_DEMUX, dmxdev->filternum);
+- dvb_register_device(dvb_adapter, &dmxdev->dvr_dvbdev, &dvbdev_dvr,
++ if (ret < 0)
++ goto err_register_dvbdev;
++
++ ret = dvb_register_device(dvb_adapter, &dmxdev->dvr_dvbdev, &dvbdev_dvr,
+ dmxdev, DVB_DEVICE_DVR, dmxdev->filternum);
++ if (ret < 0)
++ goto err_register_dvr_dvbdev;
+
+ dvb_ringbuffer_init(&dmxdev->dvr_buffer, NULL, 8192);
+
+ return 0;
++
++err_register_dvr_dvbdev:
++ dvb_unregister_device(dmxdev->dvbdev);
++err_register_dvbdev:
++ vfree(dmxdev->filter);
++ dmxdev->filter = NULL;
++ return ret;
+ }
+
+ EXPORT_SYMBOL(dvb_dmxdev_init);
+
+ void dvb_dmxdev_release(struct dmxdev *dmxdev)
+ {
++ mutex_lock(&dmxdev->mutex);
+ dmxdev->exit = 1;
++ mutex_unlock(&dmxdev->mutex);
++
+ if (dmxdev->dvbdev->users > 1) {
+ wait_event(dmxdev->dvbdev->wait_queue,
+ dmxdev->dvbdev->users == 1);
+diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
+index 15a08d8c69ef8..baf64540dc00a 100644
+--- a/drivers/media/dvb-core/dvb_ca_en50221.c
++++ b/drivers/media/dvb-core/dvb_ca_en50221.c
+@@ -151,13 +151,19 @@ struct dvb_ca_private {
+
+ /* mutex serializing ioctls */
+ struct mutex ioctl_mutex;
++
++ /* A mutex used when a device is disconnected */
++ struct mutex remove_mutex;
++
++ /* Whether the device is disconnected */
++ int exit;
+ };
+
+ static void dvb_ca_private_free(struct dvb_ca_private *ca)
+ {
+ unsigned int i;
+
+- dvb_free_device(ca->dvbdev);
++ dvb_device_put(ca->dvbdev);
+ for (i = 0; i < ca->slot_count; i++)
+ vfree(ca->slot_info[i].rx_buffer.data);
+
+@@ -187,7 +193,7 @@ static void dvb_ca_en50221_thread_wakeup(struct dvb_ca_private *ca);
+ static int dvb_ca_en50221_read_data(struct dvb_ca_private *ca, int slot,
+ u8 *ebuf, int ecount);
+ static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
+- u8 *ebuf, int ecount);
++ u8 *ebuf, int ecount, int size_write_flag);
+
+ /**
+ * findstr - Safely find needle in haystack.
+@@ -370,7 +376,7 @@ static int dvb_ca_en50221_link_init(struct dvb_ca_private *ca, int slot)
+ ret = dvb_ca_en50221_wait_if_status(ca, slot, STATUSREG_FR, HZ / 10);
+ if (ret)
+ return ret;
+- ret = dvb_ca_en50221_write_data(ca, slot, buf, 2);
++ ret = dvb_ca_en50221_write_data(ca, slot, buf, 2, CMDREG_SW);
+ if (ret != 2)
+ return -EIO;
+ ret = ca->pub->write_cam_control(ca->pub, slot, CTRLIF_COMMAND, IRQEN);
+@@ -778,11 +784,13 @@ exit:
+ * @buf: The data in this buffer is treated as a complete link-level packet to
+ * be written.
+ * @bytes_write: Size of ebuf.
++ * @size_write_flag: A flag on Command Register which says whether the link size
++ * information will be writen or not.
+ *
+ * return: Number of bytes written, or < 0 on error.
+ */
+ static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
+- u8 *buf, int bytes_write)
++ u8 *buf, int bytes_write, int size_write_flag)
+ {
+ struct dvb_ca_slot *sl = &ca->slot_info[slot];
+ int status;
+@@ -817,7 +825,7 @@ static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
+
+ /* OK, set HC bit */
+ status = ca->pub->write_cam_control(ca->pub, slot, CTRLIF_COMMAND,
+- IRQEN | CMDREG_HC);
++ IRQEN | CMDREG_HC | size_write_flag);
+ if (status)
+ goto exit;
+
+@@ -1508,7 +1516,7 @@ static ssize_t dvb_ca_en50221_io_write(struct file *file,
+
+ mutex_lock(&sl->slot_lock);
+ status = dvb_ca_en50221_write_data(ca, slot, fragbuf,
+- fraglen + 2);
++ fraglen + 2, 0);
+ mutex_unlock(&sl->slot_lock);
+ if (status == (fraglen + 2)) {
+ written = 1;
+@@ -1709,12 +1717,22 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file)
+
+ dprintk("%s\n", __func__);
+
+- if (!try_module_get(ca->pub->owner))
++ mutex_lock(&ca->remove_mutex);
++
++ if (ca->exit) {
++ mutex_unlock(&ca->remove_mutex);
++ return -ENODEV;
++ }
++
++ if (!try_module_get(ca->pub->owner)) {
++ mutex_unlock(&ca->remove_mutex);
+ return -EIO;
++ }
+
+ err = dvb_generic_open(inode, file);
+ if (err < 0) {
+ module_put(ca->pub->owner);
++ mutex_unlock(&ca->remove_mutex);
+ return err;
+ }
+
+@@ -1739,6 +1757,7 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file)
+
+ dvb_ca_private_get(ca);
+
++ mutex_unlock(&ca->remove_mutex);
+ return 0;
+ }
+
+@@ -1758,6 +1777,8 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file)
+
+ dprintk("%s\n", __func__);
+
++ mutex_lock(&ca->remove_mutex);
++
+ /* mark the CA device as closed */
+ ca->open = 0;
+ dvb_ca_en50221_thread_update_delay(ca);
+@@ -1768,6 +1789,13 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file)
+
+ dvb_ca_private_put(ca);
+
++ if (dvbdev->users == 1 && ca->exit == 1) {
++ mutex_unlock(&ca->remove_mutex);
++ wake_up(&dvbdev->wait_queue);
++ } else {
++ mutex_unlock(&ca->remove_mutex);
++ }
++
+ return err;
+ }
+
+@@ -1891,6 +1919,7 @@ int dvb_ca_en50221_init(struct dvb_adapter *dvb_adapter,
+ }
+
+ mutex_init(&ca->ioctl_mutex);
++ mutex_init(&ca->remove_mutex);
+
+ if (signal_pending(current)) {
+ ret = -EINTR;
+@@ -1933,6 +1962,14 @@ void dvb_ca_en50221_release(struct dvb_ca_en50221 *pubca)
+
+ dprintk("%s\n", __func__);
+
++ mutex_lock(&ca->remove_mutex);
++ ca->exit = 1;
++ mutex_unlock(&ca->remove_mutex);
++
++ if (ca->dvbdev->users < 1)
++ wait_event(ca->dvbdev->wait_queue,
++ ca->dvbdev->users == 1);
++
+ /* shutdown the thread if there was one */
+ kthread_stop(ca->thread);
+
+diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
+index 5fde1d38b3e34..80b495982f63c 100644
+--- a/drivers/media/dvb-core/dvb_demux.c
++++ b/drivers/media/dvb-core/dvb_demux.c
+@@ -125,12 +125,12 @@ static inline int dvb_dmx_swfilter_payload(struct dvb_demux_feed *feed,
+
+ cc = buf[3] & 0x0f;
+ ccok = ((feed->cc + 1) & 0x0f) == cc;
+- feed->cc = cc;
+ if (!ccok) {
+ set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+ dprintk_sect_loss("missed packet: %d instead of %d!\n",
+ cc, (feed->cc + 1) & 0x0f);
+ }
++ feed->cc = cc;
+
+ if (buf[1] & 0x40) // PUSI ?
+ feed->peslen = 0xfffa;
+@@ -310,7 +310,6 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
+
+ cc = buf[3] & 0x0f;
+ ccok = ((feed->cc + 1) & 0x0f) == cc;
+- feed->cc = cc;
+
+ if (buf[3] & 0x20) {
+ /* adaption field present, check for discontinuity_indicator */
+@@ -346,6 +345,7 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
+ feed->pusi_seen = false;
+ dvb_dmx_swfilter_section_new(feed);
+ }
++ feed->cc = cc;
+
+ if (buf[1] & 0x40) {
+ /* PUSI=1 (is set), section boundary is here */
+diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
+index 258637d762d64..fea62bce97468 100644
+--- a/drivers/media/dvb-core/dvb_frontend.c
++++ b/drivers/media/dvb-core/dvb_frontend.c
+@@ -136,7 +136,7 @@ static void __dvb_frontend_free(struct dvb_frontend *fe)
+ struct dvb_frontend_private *fepriv = fe->frontend_priv;
+
+ if (fepriv)
+- dvb_free_device(fepriv->dvbdev);
++ dvb_device_put(fepriv->dvbdev);
+
+ dvb_frontend_invoke_release(fe, fe->ops.release);
+
+@@ -293,14 +293,22 @@ static int dvb_frontend_get_event(struct dvb_frontend *fe,
+ }
+
+ if (events->eventw == events->eventr) {
+- int ret;
++ struct wait_queue_entry wait;
++ int ret = 0;
+
+ if (flags & O_NONBLOCK)
+ return -EWOULDBLOCK;
+
+- ret = wait_event_interruptible(events->wait_queue,
+- dvb_frontend_test_event(fepriv, events));
+-
++ init_waitqueue_entry(&wait, current);
++ add_wait_queue(&events->wait_queue, &wait);
++ while (!dvb_frontend_test_event(fepriv, events)) {
++ wait_woken(&wait, TASK_INTERRUPTIBLE, 0);
++ if (signal_pending(current)) {
++ ret = -ERESTARTSYS;
++ break;
++ }
++ }
++ remove_wait_queue(&events->wait_queue, &wait);
+ if (ret < 0)
+ return ret;
+ }
+@@ -2985,6 +2993,7 @@ int dvb_register_frontend(struct dvb_adapter *dvb,
+ .name = fe->ops.info.name,
+ #endif
+ };
++ int ret;
+
+ dev_dbg(dvb->device, "%s:\n", __func__);
+
+@@ -3018,8 +3027,13 @@ int dvb_register_frontend(struct dvb_adapter *dvb,
+ "DVB: registering adapter %i frontend %i (%s)...\n",
+ fe->dvb->num, fe->id, fe->ops.info.name);
+
+- dvb_register_device(fe->dvb, &fepriv->dvbdev, &dvbdev_template,
++ ret = dvb_register_device(fe->dvb, &fepriv->dvbdev, &dvbdev_template,
+ fe, DVB_DEVICE_FRONTEND, 0);
++ if (ret) {
++ dvb_frontend_put(fe);
++ mutex_unlock(&frontend_mutex);
++ return ret;
++ }
+
+ /*
+ * Initialize the cache to the proper values according with the
+diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
+index dddebea644bb8..c594b1bdfcaa5 100644
+--- a/drivers/media/dvb-core/dvb_net.c
++++ b/drivers/media/dvb-core/dvb_net.c
+@@ -1564,15 +1564,43 @@ static long dvb_net_ioctl(struct file *file,
+ return dvb_usercopy(file, cmd, arg, dvb_net_do_ioctl);
+ }
+
++static int locked_dvb_net_open(struct inode *inode, struct file *file)
++{
++ struct dvb_device *dvbdev = file->private_data;
++ struct dvb_net *dvbnet = dvbdev->priv;
++ int ret;
++
++ if (mutex_lock_interruptible(&dvbnet->remove_mutex))
++ return -ERESTARTSYS;
++
++ if (dvbnet->exit) {
++ mutex_unlock(&dvbnet->remove_mutex);
++ return -ENODEV;
++ }
++
++ ret = dvb_generic_open(inode, file);
++
++ mutex_unlock(&dvbnet->remove_mutex);
++
++ return ret;
++}
++
+ static int dvb_net_close(struct inode *inode, struct file *file)
+ {
+ struct dvb_device *dvbdev = file->private_data;
+ struct dvb_net *dvbnet = dvbdev->priv;
+
++ mutex_lock(&dvbnet->remove_mutex);
++
+ dvb_generic_release(inode, file);
+
+- if(dvbdev->users == 1 && dvbnet->exit == 1)
++ if (dvbdev->users == 1 && dvbnet->exit == 1) {
++ mutex_unlock(&dvbnet->remove_mutex);
+ wake_up(&dvbdev->wait_queue);
++ } else {
++ mutex_unlock(&dvbnet->remove_mutex);
++ }
++
+ return 0;
+ }
+
+@@ -1580,7 +1608,7 @@ static int dvb_net_close(struct inode *inode, struct file *file)
+ static const struct file_operations dvb_net_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = dvb_net_ioctl,
+- .open = dvb_generic_open,
++ .open = locked_dvb_net_open,
+ .release = dvb_net_close,
+ .llseek = noop_llseek,
+ };
+@@ -1599,10 +1627,13 @@ void dvb_net_release (struct dvb_net *dvbnet)
+ {
+ int i;
+
++ mutex_lock(&dvbnet->remove_mutex);
+ dvbnet->exit = 1;
++ mutex_unlock(&dvbnet->remove_mutex);
++
+ if (dvbnet->dvbdev->users < 1)
+ wait_event(dvbnet->dvbdev->wait_queue,
+- dvbnet->dvbdev->users==1);
++ dvbnet->dvbdev->users == 1);
+
+ dvb_unregister_device(dvbnet->dvbdev);
+
+@@ -1621,6 +1652,7 @@ int dvb_net_init (struct dvb_adapter *adap, struct dvb_net *dvbnet,
+ int i;
+
+ mutex_init(&dvbnet->ioctl_mutex);
++ mutex_init(&dvbnet->remove_mutex);
+ dvbnet->demux = dmx;
+
+ for (i=0; i<DVB_NET_DEVICES_MAX; i++)
+diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c
+index 6974f17315294..1331f2c2237e6 100644
+--- a/drivers/media/dvb-core/dvb_vb2.c
++++ b/drivers/media/dvb-core/dvb_vb2.c
+@@ -358,6 +358,12 @@ int dvb_vb2_reqbufs(struct dvb_vb2_ctx *ctx, struct dmx_requestbuffers *req)
+
+ int dvb_vb2_querybuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
+ {
++ struct vb2_queue *q = &ctx->vb_q;
++
++ if (b->index >= q->num_buffers) {
++ dprintk(1, "[%s] buffer index out of range\n", ctx->name);
++ return -EINVAL;
++ }
+ vb2_core_querybuf(&ctx->vb_q, b->index, b);
+ dprintk(3, "[%s] index=%d\n", ctx->name, b->index);
+ return 0;
+@@ -382,8 +388,13 @@ int dvb_vb2_expbuf(struct dvb_vb2_ctx *ctx, struct dmx_exportbuffer *exp)
+
+ int dvb_vb2_qbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
+ {
++ struct vb2_queue *q = &ctx->vb_q;
+ int ret;
+
++ if (b->index >= q->num_buffers) {
++ dprintk(1, "[%s] buffer index out of range\n", ctx->name);
++ return -EINVAL;
++ }
+ ret = vb2_core_qbuf(&ctx->vb_q, b->index, b, NULL);
+ if (ret) {
+ dprintk(1, "[%s] index=%d errno=%d\n", ctx->name,
+diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
+index 795d9bfaba5cf..2ff8a1b776fb4 100644
+--- a/drivers/media/dvb-core/dvbdev.c
++++ b/drivers/media/dvb-core/dvbdev.c
+@@ -37,6 +37,7 @@
+ #include <media/tuner.h>
+
+ static DEFINE_MUTEX(dvbdev_mutex);
++static LIST_HEAD(dvbdevfops_list);
+ static int dvbdev_debug;
+
+ module_param(dvbdev_debug, int, 0644);
+@@ -107,7 +108,7 @@ static int dvb_device_open(struct inode *inode, struct file *file)
+ new_fops = fops_get(dvbdev->fops);
+ if (!new_fops)
+ goto fail;
+- file->private_data = dvbdev;
++ file->private_data = dvb_device_get(dvbdev);
+ replace_fops(file, new_fops);
+ if (file->f_op->open)
+ err = file->f_op->open(inode, file);
+@@ -171,6 +172,9 @@ int dvb_generic_release(struct inode *inode, struct file *file)
+ }
+
+ dvbdev->users++;
++
++ dvb_device_put(dvbdev);
++
+ return 0;
+ }
+ EXPORT_SYMBOL(dvb_generic_release);
+@@ -342,6 +346,7 @@ static int dvb_create_media_entity(struct dvb_device *dvbdev,
+ GFP_KERNEL);
+ if (!dvbdev->pads) {
+ kfree(dvbdev->entity);
++ dvbdev->entity = NULL;
+ return -ENOMEM;
+ }
+ }
+@@ -458,14 +463,15 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
+ enum dvb_device_type type, int demux_sink_pads)
+ {
+ struct dvb_device *dvbdev;
+- struct file_operations *dvbdevfops;
++ struct file_operations *dvbdevfops = NULL;
++ struct dvbdevfops_node *node = NULL, *new_node = NULL;
+ struct device *clsdev;
+ int minor;
+ int id, ret;
+
+ mutex_lock(&dvbdev_register_lock);
+
+- if ((id = dvbdev_get_free_id (adap, type)) < 0){
++ if ((id = dvbdev_get_free_id (adap, type)) < 0) {
+ mutex_unlock(&dvbdev_register_lock);
+ *pdvbdev = NULL;
+ pr_err("%s: couldn't find free device id\n", __func__);
+@@ -473,41 +479,69 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
+ }
+
+ *pdvbdev = dvbdev = kzalloc(sizeof(*dvbdev), GFP_KERNEL);
+-
+ if (!dvbdev){
+ mutex_unlock(&dvbdev_register_lock);
+ return -ENOMEM;
+ }
+
+- dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL);
++ /*
++ * When a device of the same type is probe()d more than once,
++ * the first allocated fops are used. This prevents memory leaks
++ * that can occur when the same device is probe()d repeatedly.
++ */
++ list_for_each_entry(node, &dvbdevfops_list, list_head) {
++ if (node->fops->owner == adap->module &&
++ node->type == type &&
++ node->template == template) {
++ dvbdevfops = node->fops;
++ break;
++ }
++ }
+
+- if (!dvbdevfops){
+- kfree (dvbdev);
+- mutex_unlock(&dvbdev_register_lock);
+- return -ENOMEM;
++ if (dvbdevfops == NULL) {
++ dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL);
++ if (!dvbdevfops) {
++ kfree(dvbdev);
++ mutex_unlock(&dvbdev_register_lock);
++ return -ENOMEM;
++ }
++
++ new_node = kzalloc(sizeof(struct dvbdevfops_node), GFP_KERNEL);
++ if (!new_node) {
++ kfree(dvbdevfops);
++ kfree(dvbdev);
++ mutex_unlock(&dvbdev_register_lock);
++ return -ENOMEM;
++ }
++
++ new_node->fops = dvbdevfops;
++ new_node->type = type;
++ new_node->template = template;
++ list_add_tail (&new_node->list_head, &dvbdevfops_list);
+ }
+
+ memcpy(dvbdev, template, sizeof(struct dvb_device));
++ kref_init(&dvbdev->ref);
+ dvbdev->type = type;
+ dvbdev->id = id;
+ dvbdev->adapter = adap;
+ dvbdev->priv = priv;
+ dvbdev->fops = dvbdevfops;
+ init_waitqueue_head (&dvbdev->wait_queue);
+-
+ dvbdevfops->owner = adap->module;
+-
+ list_add_tail (&dvbdev->list_head, &adap->device_list);
+-
+ down_write(&minor_rwsem);
+ #ifdef CONFIG_DVB_DYNAMIC_MINORS
+ for (minor = 0; minor < MAX_DVB_MINORS; minor++)
+ if (dvb_minors[minor] == NULL)
+ break;
+-
+ if (minor == MAX_DVB_MINORS) {
++ if (new_node) {
++ list_del (&new_node->list_head);
++ kfree(dvbdevfops);
++ kfree(new_node);
++ }
+ list_del (&dvbdev->list_head);
+- kfree(dvbdevfops);
+ kfree(dvbdev);
+ up_write(&minor_rwsem);
+ mutex_unlock(&dvbdev_register_lock);
+@@ -516,41 +550,47 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
+ #else
+ minor = nums2minor(adap->num, type, id);
+ #endif
+-
+ dvbdev->minor = minor;
+- dvb_minors[minor] = dvbdev;
++ dvb_minors[minor] = dvb_device_get(dvbdev);
+ up_write(&minor_rwsem);
+-
+ ret = dvb_register_media_device(dvbdev, type, minor, demux_sink_pads);
+ if (ret) {
+ pr_err("%s: dvb_register_media_device failed to create the mediagraph\n",
+ __func__);
+-
++ if (new_node) {
++ list_del (&new_node->list_head);
++ kfree(dvbdevfops);
++ kfree(new_node);
++ }
+ dvb_media_device_free(dvbdev);
+ list_del (&dvbdev->list_head);
+- kfree(dvbdevfops);
+ kfree(dvbdev);
+ mutex_unlock(&dvbdev_register_lock);
+ return ret;
+ }
+
+- mutex_unlock(&dvbdev_register_lock);
+-
+ clsdev = device_create(dvb_class, adap->device,
+ MKDEV(DVB_MAJOR, minor),
+ dvbdev, "dvb%d.%s%d", adap->num, dnames[type], id);
+ if (IS_ERR(clsdev)) {
+ pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n",
+ __func__, adap->num, dnames[type], id, PTR_ERR(clsdev));
++ if (new_node) {
++ list_del (&new_node->list_head);
++ kfree(dvbdevfops);
++ kfree(new_node);
++ }
+ dvb_media_device_free(dvbdev);
+ list_del (&dvbdev->list_head);
+- kfree(dvbdevfops);
+ kfree(dvbdev);
++ mutex_unlock(&dvbdev_register_lock);
+ return PTR_ERR(clsdev);
+ }
++
+ dprintk("DVB: register adapter%d/%s%d @ minor: %i (0x%02x)\n",
+ adap->num, dnames[type], id, minor, minor);
+
++ mutex_unlock(&dvbdev_register_lock);
+ return 0;
+ }
+ EXPORT_SYMBOL(dvb_register_device);
+@@ -563,6 +603,7 @@ void dvb_remove_device(struct dvb_device *dvbdev)
+
+ down_write(&minor_rwsem);
+ dvb_minors[dvbdev->minor] = NULL;
++ dvb_device_put(dvbdev);
+ up_write(&minor_rwsem);
+
+ dvb_media_device_free(dvbdev);
+@@ -574,21 +615,33 @@ void dvb_remove_device(struct dvb_device *dvbdev)
+ EXPORT_SYMBOL(dvb_remove_device);
+
+
+-void dvb_free_device(struct dvb_device *dvbdev)
++static void dvb_free_device(struct kref *ref)
+ {
+- if (!dvbdev)
+- return;
++ struct dvb_device *dvbdev = container_of(ref, struct dvb_device, ref);
+
+- kfree (dvbdev->fops);
+ kfree (dvbdev);
+ }
+-EXPORT_SYMBOL(dvb_free_device);
++
++
++struct dvb_device *dvb_device_get(struct dvb_device *dvbdev)
++{
++ kref_get(&dvbdev->ref);
++ return dvbdev;
++}
++EXPORT_SYMBOL(dvb_device_get);
++
++
++void dvb_device_put(struct dvb_device *dvbdev)
++{
++ if (dvbdev)
++ kref_put(&dvbdev->ref, dvb_free_device);
++}
+
+
+ void dvb_unregister_device(struct dvb_device *dvbdev)
+ {
+ dvb_remove_device(dvbdev);
+- dvb_free_device(dvbdev);
++ dvb_device_put(dvbdev);
+ }
+ EXPORT_SYMBOL(dvb_unregister_device);
+
+@@ -1071,9 +1124,17 @@ error:
+
+ static void __exit exit_dvbdev(void)
+ {
++ struct dvbdevfops_node *node, *next;
++
+ class_destroy(dvb_class);
+ cdev_del(&dvb_device_cdev);
+ unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS);
++
++ list_for_each_entry_safe(node, next, &dvbdevfops_list, list_head) {
++ list_del (&node->list_head);
++ kfree(node->fops);
++ kfree(node);
++ }
+ }
+
+ subsys_initcall(init_dvbdev);
+diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c
+index 9b00b56230b61..cf8e5f1bd1018 100644
+--- a/drivers/media/dvb-frontends/ascot2e.c
++++ b/drivers/media/dvb-frontends/ascot2e.c
+@@ -533,7 +533,7 @@ struct dvb_frontend *ascot2e_attach(struct dvb_frontend *fe,
+ priv->i2c_address, priv->i2c);
+ return fe;
+ }
+-EXPORT_SYMBOL(ascot2e_attach);
++EXPORT_SYMBOL_GPL(ascot2e_attach);
+
+ MODULE_DESCRIPTION("Sony ASCOT2E terr/cab tuner driver");
+ MODULE_AUTHOR("info@netup.ru");
+diff --git a/drivers/media/dvb-frontends/atbm8830.c b/drivers/media/dvb-frontends/atbm8830.c
+index bdd16b9c58244..778c865085bf9 100644
+--- a/drivers/media/dvb-frontends/atbm8830.c
++++ b/drivers/media/dvb-frontends/atbm8830.c
+@@ -489,7 +489,7 @@ error_out:
+ return NULL;
+
+ }
+-EXPORT_SYMBOL(atbm8830_attach);
++EXPORT_SYMBOL_GPL(atbm8830_attach);
+
+ MODULE_DESCRIPTION("AltoBeam ATBM8830/8831 GB20600 demodulator driver");
+ MODULE_AUTHOR("David T. L. Wong <davidtlwong@gmail.com>");
+diff --git a/drivers/media/dvb-frontends/au8522_dig.c b/drivers/media/dvb-frontends/au8522_dig.c
+index 78cafdf279618..230436bf6cbd9 100644
+--- a/drivers/media/dvb-frontends/au8522_dig.c
++++ b/drivers/media/dvb-frontends/au8522_dig.c
+@@ -879,7 +879,7 @@ error:
+ au8522_release_state(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(au8522_attach);
++EXPORT_SYMBOL_GPL(au8522_attach);
+
+ static const struct dvb_frontend_ops au8522_ops = {
+ .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
+diff --git a/drivers/media/dvb-frontends/bcm3510.c b/drivers/media/dvb-frontends/bcm3510.c
+index da0ff7b44da41..b3f5c49accafd 100644
+--- a/drivers/media/dvb-frontends/bcm3510.c
++++ b/drivers/media/dvb-frontends/bcm3510.c
+@@ -649,6 +649,7 @@ static int bcm3510_download_firmware(struct dvb_frontend* fe)
+ deb_info("firmware chunk, addr: 0x%04x, len: 0x%04x, total length: 0x%04zx\n",addr,len,fw->size);
+ if ((ret = bcm3510_write_ram(st,addr,&b[i+4],len)) < 0) {
+ err("firmware download failed: %d\n",ret);
++ release_firmware(fw);
+ return ret;
+ }
+ i += 4 + len;
+@@ -834,7 +835,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(bcm3510_attach);
++EXPORT_SYMBOL_GPL(bcm3510_attach);
+
+ static const struct dvb_frontend_ops bcm3510_ops = {
+ .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
+diff --git a/drivers/media/dvb-frontends/cx22700.c b/drivers/media/dvb-frontends/cx22700.c
+index b39ff516271b2..1d04c0a652b26 100644
+--- a/drivers/media/dvb-frontends/cx22700.c
++++ b/drivers/media/dvb-frontends/cx22700.c
+@@ -432,4 +432,4 @@ MODULE_DESCRIPTION("Conexant CX22700 DVB-T Demodulator driver");
+ MODULE_AUTHOR("Holger Waechtler");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(cx22700_attach);
++EXPORT_SYMBOL_GPL(cx22700_attach);
+diff --git a/drivers/media/dvb-frontends/cx22702.c b/drivers/media/dvb-frontends/cx22702.c
+index cc6acbf6393d4..61ad34b7004b5 100644
+--- a/drivers/media/dvb-frontends/cx22702.c
++++ b/drivers/media/dvb-frontends/cx22702.c
+@@ -604,7 +604,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(cx22702_attach);
++EXPORT_SYMBOL_GPL(cx22702_attach);
+
+ static const struct dvb_frontend_ops cx22702_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/cx24110.c b/drivers/media/dvb-frontends/cx24110.c
+index 6f99d6a27be2d..9aeea089756fe 100644
+--- a/drivers/media/dvb-frontends/cx24110.c
++++ b/drivers/media/dvb-frontends/cx24110.c
+@@ -653,4 +653,4 @@ MODULE_DESCRIPTION("Conexant CX24110 DVB-S Demodulator driver");
+ MODULE_AUTHOR("Peter Hettkamp");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(cx24110_attach);
++EXPORT_SYMBOL_GPL(cx24110_attach);
+diff --git a/drivers/media/dvb-frontends/cx24113.c b/drivers/media/dvb-frontends/cx24113.c
+index 60a9f70275f75..619df8329fbbc 100644
+--- a/drivers/media/dvb-frontends/cx24113.c
++++ b/drivers/media/dvb-frontends/cx24113.c
+@@ -590,7 +590,7 @@ error:
+
+ return NULL;
+ }
+-EXPORT_SYMBOL(cx24113_attach);
++EXPORT_SYMBOL_GPL(cx24113_attach);
+
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Activates frontend debugging (default:0)");
+diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c
+index ea8264ccbb4e8..8b978a9f74a4e 100644
+--- a/drivers/media/dvb-frontends/cx24116.c
++++ b/drivers/media/dvb-frontends/cx24116.c
+@@ -1133,7 +1133,7 @@ struct dvb_frontend *cx24116_attach(const struct cx24116_config *config,
+ state->frontend.demodulator_priv = state;
+ return &state->frontend;
+ }
+-EXPORT_SYMBOL(cx24116_attach);
++EXPORT_SYMBOL_GPL(cx24116_attach);
+
+ /*
+ * Initialise or wake up device
+diff --git a/drivers/media/dvb-frontends/cx24120.c b/drivers/media/dvb-frontends/cx24120.c
+index d8acd582c7111..44515fdbe91d4 100644
+--- a/drivers/media/dvb-frontends/cx24120.c
++++ b/drivers/media/dvb-frontends/cx24120.c
+@@ -305,7 +305,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(cx24120_attach);
++EXPORT_SYMBOL_GPL(cx24120_attach);
+
+ static int cx24120_test_rom(struct cx24120_state *state)
+ {
+@@ -973,7 +973,9 @@ static void cx24120_set_clock_ratios(struct dvb_frontend *fe)
+ cmd.arg[8] = (clock_ratios_table[idx].rate >> 8) & 0xff;
+ cmd.arg[9] = (clock_ratios_table[idx].rate >> 0) & 0xff;
+
+- cx24120_message_send(state, &cmd);
++ ret = cx24120_message_send(state, &cmd);
++ if (ret != 0)
++ return;
+
+ /* Calculate ber window rates for stat work */
+ cx24120_calculate_ber_window(state, clock_ratios_table[idx].rate);
+diff --git a/drivers/media/dvb-frontends/cx24123.c b/drivers/media/dvb-frontends/cx24123.c
+index 3d84ee17e54c6..539889e638ccc 100644
+--- a/drivers/media/dvb-frontends/cx24123.c
++++ b/drivers/media/dvb-frontends/cx24123.c
+@@ -1096,7 +1096,7 @@ error:
+
+ return NULL;
+ }
+-EXPORT_SYMBOL(cx24123_attach);
++EXPORT_SYMBOL_GPL(cx24123_attach);
+
+ static const struct dvb_frontend_ops cx24123_ops = {
+ .delsys = { SYS_DVBS },
+diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c
+index b1618339eec0e..b0e6343ea5911 100644
+--- a/drivers/media/dvb-frontends/cxd2820r_core.c
++++ b/drivers/media/dvb-frontends/cxd2820r_core.c
+@@ -536,7 +536,7 @@ struct dvb_frontend *cxd2820r_attach(const struct cxd2820r_config *config,
+
+ return pdata.get_dvb_frontend(client);
+ }
+-EXPORT_SYMBOL(cxd2820r_attach);
++EXPORT_SYMBOL_GPL(cxd2820r_attach);
+
+ static struct dvb_frontend *cxd2820r_get_dvb_frontend(struct i2c_client *client)
+ {
+diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c
+index 5431f922f55e4..e9d1eef40c627 100644
+--- a/drivers/media/dvb-frontends/cxd2841er.c
++++ b/drivers/media/dvb-frontends/cxd2841er.c
+@@ -3930,14 +3930,14 @@ struct dvb_frontend *cxd2841er_attach_s(struct cxd2841er_config *cfg,
+ {
+ return cxd2841er_attach(cfg, i2c, SYS_DVBS);
+ }
+-EXPORT_SYMBOL(cxd2841er_attach_s);
++EXPORT_SYMBOL_GPL(cxd2841er_attach_s);
+
+ struct dvb_frontend *cxd2841er_attach_t_c(struct cxd2841er_config *cfg,
+ struct i2c_adapter *i2c)
+ {
+ return cxd2841er_attach(cfg, i2c, 0);
+ }
+-EXPORT_SYMBOL(cxd2841er_attach_t_c);
++EXPORT_SYMBOL_GPL(cxd2841er_attach_t_c);
+
+ static const struct dvb_frontend_ops cxd2841er_dvbs_s2_ops = {
+ .delsys = { SYS_DVBS, SYS_DVBS2 },
+diff --git a/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c b/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c
+index d5b1b3788e392..09d31c368741d 100644
+--- a/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c
++++ b/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c
+@@ -1950,7 +1950,7 @@ struct dvb_frontend *cxd2880_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(cxd2880_attach);
++EXPORT_SYMBOL_GPL(cxd2880_attach);
+
+ MODULE_DESCRIPTION("Sony CXD2880 DVB-T2/T tuner + demod driver");
+ MODULE_AUTHOR("Sony Semiconductor Solutions Corporation");
+diff --git a/drivers/media/dvb-frontends/dib0070.c b/drivers/media/dvb-frontends/dib0070.c
+index cafb41dba861c..9a8e7cdd2a247 100644
+--- a/drivers/media/dvb-frontends/dib0070.c
++++ b/drivers/media/dvb-frontends/dib0070.c
+@@ -762,7 +762,7 @@ free_mem:
+ fe->tuner_priv = NULL;
+ return NULL;
+ }
+-EXPORT_SYMBOL(dib0070_attach);
++EXPORT_SYMBOL_GPL(dib0070_attach);
+
+ MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>");
+ MODULE_DESCRIPTION("Driver for the DiBcom 0070 base-band RF Tuner");
+diff --git a/drivers/media/dvb-frontends/dib0090.c b/drivers/media/dvb-frontends/dib0090.c
+index 903da33642dff..c958bcff026ec 100644
+--- a/drivers/media/dvb-frontends/dib0090.c
++++ b/drivers/media/dvb-frontends/dib0090.c
+@@ -2634,7 +2634,7 @@ struct dvb_frontend *dib0090_register(struct dvb_frontend *fe, struct i2c_adapte
+ return NULL;
+ }
+
+-EXPORT_SYMBOL(dib0090_register);
++EXPORT_SYMBOL_GPL(dib0090_register);
+
+ struct dvb_frontend *dib0090_fw_register(struct dvb_frontend *fe, struct i2c_adapter *i2c, const struct dib0090_config *config)
+ {
+@@ -2660,7 +2660,7 @@ free_mem:
+ fe->tuner_priv = NULL;
+ return NULL;
+ }
+-EXPORT_SYMBOL(dib0090_fw_register);
++EXPORT_SYMBOL_GPL(dib0090_fw_register);
+
+ MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>");
+ MODULE_AUTHOR("Olivier Grenie <olivier.grenie@parrot.com>");
+diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c
+index a6c2fc4586eb3..c598b2a633256 100644
+--- a/drivers/media/dvb-frontends/dib3000mb.c
++++ b/drivers/media/dvb-frontends/dib3000mb.c
+@@ -815,4 +815,4 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
+ MODULE_DESCRIPTION(DRIVER_DESC);
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(dib3000mb_attach);
++EXPORT_SYMBOL_GPL(dib3000mb_attach);
+diff --git a/drivers/media/dvb-frontends/dib3000mc.c b/drivers/media/dvb-frontends/dib3000mc.c
+index 692600ce5f230..c69665024330c 100644
+--- a/drivers/media/dvb-frontends/dib3000mc.c
++++ b/drivers/media/dvb-frontends/dib3000mc.c
+@@ -935,7 +935,7 @@ error:
+ kfree(st);
+ return NULL;
+ }
+-EXPORT_SYMBOL(dib3000mc_attach);
++EXPORT_SYMBOL_GPL(dib3000mc_attach);
+
+ static const struct dvb_frontend_ops dib3000mc_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/dib7000m.c b/drivers/media/dvb-frontends/dib7000m.c
+index 97ce97789c9e3..fdb22f32e3a11 100644
+--- a/drivers/media/dvb-frontends/dib7000m.c
++++ b/drivers/media/dvb-frontends/dib7000m.c
+@@ -1434,7 +1434,7 @@ error:
+ kfree(st);
+ return NULL;
+ }
+-EXPORT_SYMBOL(dib7000m_attach);
++EXPORT_SYMBOL_GPL(dib7000m_attach);
+
+ static const struct dvb_frontend_ops dib7000m_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/dib7000p.c b/drivers/media/dvb-frontends/dib7000p.c
+index 55bee50aa8716..8c426baf76ee3 100644
+--- a/drivers/media/dvb-frontends/dib7000p.c
++++ b/drivers/media/dvb-frontends/dib7000p.c
+@@ -497,7 +497,7 @@ static int dib7000p_update_pll(struct dvb_frontend *fe, struct dibx000_bandwidth
+ prediv = reg_1856 & 0x3f;
+ loopdiv = (reg_1856 >> 6) & 0x3f;
+
+- if ((bw != NULL) && (bw->pll_prediv != prediv || bw->pll_ratio != loopdiv)) {
++ if (loopdiv && bw && (bw->pll_prediv != prediv || bw->pll_ratio != loopdiv)) {
+ dprintk("Updating pll (prediv: old = %d new = %d ; loopdiv : old = %d new = %d)\n", prediv, bw->pll_prediv, loopdiv, bw->pll_ratio);
+ reg_1856 &= 0xf000;
+ reg_1857 = dib7000p_read_word(state, 1857);
+@@ -2822,7 +2822,7 @@ void *dib7000p_attach(struct dib7000p_ops *ops)
+
+ return ops;
+ }
+-EXPORT_SYMBOL(dib7000p_attach);
++EXPORT_SYMBOL_GPL(dib7000p_attach);
+
+ static const struct dvb_frontend_ops dib7000p_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c
+index bb02354a48b81..02cb48223dc67 100644
+--- a/drivers/media/dvb-frontends/dib8000.c
++++ b/drivers/media/dvb-frontends/dib8000.c
+@@ -4473,8 +4473,10 @@ static struct dvb_frontend *dib8000_init(struct i2c_adapter *i2c_adap, u8 i2c_ad
+
+ state->timf_default = cfg->pll->timf;
+
+- if (dib8000_identify(&state->i2c) == 0)
++ if (dib8000_identify(&state->i2c) == 0) {
++ kfree(fe);
+ goto error;
++ }
+
+ dibx000_init_i2c_master(&state->i2c_master, DIB8000, state->i2c.adap, state->i2c.addr);
+
+@@ -4525,7 +4527,7 @@ void *dib8000_attach(struct dib8000_ops *ops)
+
+ return ops;
+ }
+-EXPORT_SYMBOL(dib8000_attach);
++EXPORT_SYMBOL_GPL(dib8000_attach);
+
+ MODULE_AUTHOR("Olivier Grenie <Olivier.Grenie@parrot.com, Patrick Boettcher <patrick.boettcher@posteo.de>");
+ MODULE_DESCRIPTION("Driver for the DiBcom 8000 ISDB-T demodulator");
+diff --git a/drivers/media/dvb-frontends/dib9000.c b/drivers/media/dvb-frontends/dib9000.c
+index 04d92d6142797..24f7f7a7598d4 100644
+--- a/drivers/media/dvb-frontends/dib9000.c
++++ b/drivers/media/dvb-frontends/dib9000.c
+@@ -2546,7 +2546,7 @@ error:
+ kfree(st);
+ return NULL;
+ }
+-EXPORT_SYMBOL(dib9000_attach);
++EXPORT_SYMBOL_GPL(dib9000_attach);
+
+ static const struct dvb_frontend_ops dib9000_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.c b/drivers/media/dvb-frontends/drx39xyj/drxj.c
+index bf9e4ef35684b..88860d08f9c12 100644
+--- a/drivers/media/dvb-frontends/drx39xyj/drxj.c
++++ b/drivers/media/dvb-frontends/drx39xyj/drxj.c
+@@ -12368,7 +12368,7 @@ error:
+
+ return NULL;
+ }
+-EXPORT_SYMBOL(drx39xxj_attach);
++EXPORT_SYMBOL_GPL(drx39xxj_attach);
+
+ static const struct dvb_frontend_ops drx39xxj_ops = {
+ .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
+diff --git a/drivers/media/dvb-frontends/drxd_hard.c b/drivers/media/dvb-frontends/drxd_hard.c
+index a7eb81df88c2c..45487d1080b7d 100644
+--- a/drivers/media/dvb-frontends/drxd_hard.c
++++ b/drivers/media/dvb-frontends/drxd_hard.c
+@@ -2947,7 +2947,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(drxd_attach);
++EXPORT_SYMBOL_GPL(drxd_attach);
+
+ MODULE_DESCRIPTION("DRXD driver");
+ MODULE_AUTHOR("Micronas");
+diff --git a/drivers/media/dvb-frontends/drxk_hard.c b/drivers/media/dvb-frontends/drxk_hard.c
+index d7fc2595f15b8..e8afd5305b541 100644
+--- a/drivers/media/dvb-frontends/drxk_hard.c
++++ b/drivers/media/dvb-frontends/drxk_hard.c
+@@ -6673,7 +6673,7 @@ static int drxk_read_snr(struct dvb_frontend *fe, u16 *snr)
+ static int drxk_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
+ {
+ struct drxk_state *state = fe->demodulator_priv;
+- u16 err;
++ u16 err = 0;
+
+ dprintk(1, "\n");
+
+@@ -6846,7 +6846,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(drxk_attach);
++EXPORT_SYMBOL_GPL(drxk_attach);
+
+ MODULE_DESCRIPTION("DRX-K driver");
+ MODULE_AUTHOR("Ralph Metzler");
+diff --git a/drivers/media/dvb-frontends/ds3000.c b/drivers/media/dvb-frontends/ds3000.c
+index 20fcf31af1658..515aa7c7baf2a 100644
+--- a/drivers/media/dvb-frontends/ds3000.c
++++ b/drivers/media/dvb-frontends/ds3000.c
+@@ -859,7 +859,7 @@ struct dvb_frontend *ds3000_attach(const struct ds3000_config *config,
+ ds3000_set_voltage(&state->frontend, SEC_VOLTAGE_OFF);
+ return &state->frontend;
+ }
+-EXPORT_SYMBOL(ds3000_attach);
++EXPORT_SYMBOL_GPL(ds3000_attach);
+
+ static int ds3000_set_carrier_offset(struct dvb_frontend *fe,
+ s32 carrier_offset_khz)
+diff --git a/drivers/media/dvb-frontends/dvb-pll.c b/drivers/media/dvb-frontends/dvb-pll.c
+index d45b4ddc8f912..846bfe7ef30eb 100644
+--- a/drivers/media/dvb-frontends/dvb-pll.c
++++ b/drivers/media/dvb-frontends/dvb-pll.c
+@@ -866,7 +866,7 @@ out:
+
+ return NULL;
+ }
+-EXPORT_SYMBOL(dvb_pll_attach);
++EXPORT_SYMBOL_GPL(dvb_pll_attach);
+
+
+ static int
+diff --git a/drivers/media/dvb-frontends/ec100.c b/drivers/media/dvb-frontends/ec100.c
+index 03bd80666cf83..2ad0a3c2f7567 100644
+--- a/drivers/media/dvb-frontends/ec100.c
++++ b/drivers/media/dvb-frontends/ec100.c
+@@ -299,7 +299,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(ec100_attach);
++EXPORT_SYMBOL_GPL(ec100_attach);
+
+ static const struct dvb_frontend_ops ec100_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c
+index 8c1310c6b0bc2..c299d31dc7d27 100644
+--- a/drivers/media/dvb-frontends/helene.c
++++ b/drivers/media/dvb-frontends/helene.c
+@@ -1025,7 +1025,7 @@ struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
+ priv->i2c_address, priv->i2c);
+ return fe;
+ }
+-EXPORT_SYMBOL(helene_attach_s);
++EXPORT_SYMBOL_GPL(helene_attach_s);
+
+ struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
+ const struct helene_config *config,
+@@ -1061,7 +1061,7 @@ struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
+ priv->i2c_address, priv->i2c);
+ return fe;
+ }
+-EXPORT_SYMBOL(helene_attach);
++EXPORT_SYMBOL_GPL(helene_attach);
+
+ static int helene_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c
+index 24bf5cbcc1846..0330b78a5b3f2 100644
+--- a/drivers/media/dvb-frontends/horus3a.c
++++ b/drivers/media/dvb-frontends/horus3a.c
+@@ -395,7 +395,7 @@ struct dvb_frontend *horus3a_attach(struct dvb_frontend *fe,
+ priv->i2c_address, priv->i2c);
+ return fe;
+ }
+-EXPORT_SYMBOL(horus3a_attach);
++EXPORT_SYMBOL_GPL(horus3a_attach);
+
+ MODULE_DESCRIPTION("Sony HORUS3A satellite tuner driver");
+ MODULE_AUTHOR("Sergey Kozlov <serjk@netup.ru>");
+diff --git a/drivers/media/dvb-frontends/isl6405.c b/drivers/media/dvb-frontends/isl6405.c
+index 2cd69b4ff82cb..7d28a743f97eb 100644
+--- a/drivers/media/dvb-frontends/isl6405.c
++++ b/drivers/media/dvb-frontends/isl6405.c
+@@ -141,7 +141,7 @@ struct dvb_frontend *isl6405_attach(struct dvb_frontend *fe, struct i2c_adapter
+
+ return fe;
+ }
+-EXPORT_SYMBOL(isl6405_attach);
++EXPORT_SYMBOL_GPL(isl6405_attach);
+
+ MODULE_DESCRIPTION("Driver for lnb supply and control ic isl6405");
+ MODULE_AUTHOR("Hartmut Hackmann & Oliver Endriss");
+diff --git a/drivers/media/dvb-frontends/isl6421.c b/drivers/media/dvb-frontends/isl6421.c
+index 43b0dfc6f453e..2e9f6f12f849e 100644
+--- a/drivers/media/dvb-frontends/isl6421.c
++++ b/drivers/media/dvb-frontends/isl6421.c
+@@ -213,7 +213,7 @@ struct dvb_frontend *isl6421_attach(struct dvb_frontend *fe, struct i2c_adapter
+
+ return fe;
+ }
+-EXPORT_SYMBOL(isl6421_attach);
++EXPORT_SYMBOL_GPL(isl6421_attach);
+
+ MODULE_DESCRIPTION("Driver for lnb supply and control ic isl6421");
+ MODULE_AUTHOR("Andrew de Quincey & Oliver Endriss");
+diff --git a/drivers/media/dvb-frontends/isl6423.c b/drivers/media/dvb-frontends/isl6423.c
+index 8cd1bb88ce6e7..a0d0a38340574 100644
+--- a/drivers/media/dvb-frontends/isl6423.c
++++ b/drivers/media/dvb-frontends/isl6423.c
+@@ -289,7 +289,7 @@ exit:
+ fe->sec_priv = NULL;
+ return NULL;
+ }
+-EXPORT_SYMBOL(isl6423_attach);
++EXPORT_SYMBOL_GPL(isl6423_attach);
+
+ MODULE_DESCRIPTION("ISL6423 SEC");
+ MODULE_AUTHOR("Manu Abraham");
+diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c
+index 1b33478653d16..f8f362f50e78d 100644
+--- a/drivers/media/dvb-frontends/itd1000.c
++++ b/drivers/media/dvb-frontends/itd1000.c
+@@ -389,7 +389,7 @@ struct dvb_frontend *itd1000_attach(struct dvb_frontend *fe, struct i2c_adapter
+
+ return fe;
+ }
+-EXPORT_SYMBOL(itd1000_attach);
++EXPORT_SYMBOL_GPL(itd1000_attach);
+
+ MODULE_AUTHOR("Patrick Boettcher <pb@linuxtv.org>");
+ MODULE_DESCRIPTION("Integrant ITD1000 driver");
+diff --git a/drivers/media/dvb-frontends/ix2505v.c b/drivers/media/dvb-frontends/ix2505v.c
+index 73f27105c139d..3212e333d472b 100644
+--- a/drivers/media/dvb-frontends/ix2505v.c
++++ b/drivers/media/dvb-frontends/ix2505v.c
+@@ -302,7 +302,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(ix2505v_attach);
++EXPORT_SYMBOL_GPL(ix2505v_attach);
+
+ module_param_named(debug, ix2505v_debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/l64781.c b/drivers/media/dvb-frontends/l64781.c
+index c5106a1ea1cd0..fe5af2453d559 100644
+--- a/drivers/media/dvb-frontends/l64781.c
++++ b/drivers/media/dvb-frontends/l64781.c
+@@ -593,4 +593,4 @@ MODULE_DESCRIPTION("LSI L64781 DVB-T Demodulator driver");
+ MODULE_AUTHOR("Holger Waechtler, Marko Kohtala");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(l64781_attach);
++EXPORT_SYMBOL_GPL(l64781_attach);
+diff --git a/drivers/media/dvb-frontends/lg2160.c b/drivers/media/dvb-frontends/lg2160.c
+index f343066c297e2..fe700aa56bff3 100644
+--- a/drivers/media/dvb-frontends/lg2160.c
++++ b/drivers/media/dvb-frontends/lg2160.c
+@@ -1426,7 +1426,7 @@ struct dvb_frontend *lg2160_attach(const struct lg2160_config *config,
+
+ return &state->frontend;
+ }
+-EXPORT_SYMBOL(lg2160_attach);
++EXPORT_SYMBOL_GPL(lg2160_attach);
+
+ MODULE_DESCRIPTION("LG Electronics LG216x ATSC/MH Demodulator Driver");
+ MODULE_AUTHOR("Michael Krufky <mkrufky@linuxtv.org>");
+diff --git a/drivers/media/dvb-frontends/lgdt3305.c b/drivers/media/dvb-frontends/lgdt3305.c
+index 62d7439889196..60a97f1cc74e5 100644
+--- a/drivers/media/dvb-frontends/lgdt3305.c
++++ b/drivers/media/dvb-frontends/lgdt3305.c
+@@ -1148,7 +1148,7 @@ fail:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(lgdt3305_attach);
++EXPORT_SYMBOL_GPL(lgdt3305_attach);
+
+ static const struct dvb_frontend_ops lgdt3304_ops = {
+ .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
+diff --git a/drivers/media/dvb-frontends/lgdt3306a.c b/drivers/media/dvb-frontends/lgdt3306a.c
+index 136b76cb48077..f6e83a38738dd 100644
+--- a/drivers/media/dvb-frontends/lgdt3306a.c
++++ b/drivers/media/dvb-frontends/lgdt3306a.c
+@@ -1859,7 +1859,7 @@ fail:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(lgdt3306a_attach);
++EXPORT_SYMBOL_GPL(lgdt3306a_attach);
+
+ #ifdef DBG_DUMP
+
+diff --git a/drivers/media/dvb-frontends/lgdt330x.c b/drivers/media/dvb-frontends/lgdt330x.c
+index da3a8c5e18d8e..53b1443ba0220 100644
+--- a/drivers/media/dvb-frontends/lgdt330x.c
++++ b/drivers/media/dvb-frontends/lgdt330x.c
+@@ -928,7 +928,7 @@ struct dvb_frontend *lgdt330x_attach(const struct lgdt330x_config *_config,
+
+ return lgdt330x_get_dvb_frontend(client);
+ }
+-EXPORT_SYMBOL(lgdt330x_attach);
++EXPORT_SYMBOL_GPL(lgdt330x_attach);
+
+ static const struct dvb_frontend_ops lgdt3302_ops = {
+ .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
+diff --git a/drivers/media/dvb-frontends/lgs8gxx.c b/drivers/media/dvb-frontends/lgs8gxx.c
+index 30014979b985b..ffaf60e16ecd4 100644
+--- a/drivers/media/dvb-frontends/lgs8gxx.c
++++ b/drivers/media/dvb-frontends/lgs8gxx.c
+@@ -1043,7 +1043,7 @@ error_out:
+ return NULL;
+
+ }
+-EXPORT_SYMBOL(lgs8gxx_attach);
++EXPORT_SYMBOL_GPL(lgs8gxx_attach);
+
+ MODULE_DESCRIPTION("Legend Silicon LGS8913/LGS8GXX DMB-TH demodulator driver");
+ MODULE_AUTHOR("David T. L. Wong <davidtlwong@gmail.com>");
+diff --git a/drivers/media/dvb-frontends/lnbh25.c b/drivers/media/dvb-frontends/lnbh25.c
+index 9ffe06cd787dd..41bec050642b5 100644
+--- a/drivers/media/dvb-frontends/lnbh25.c
++++ b/drivers/media/dvb-frontends/lnbh25.c
+@@ -173,7 +173,7 @@ struct dvb_frontend *lnbh25_attach(struct dvb_frontend *fe,
+ __func__, priv->i2c_address);
+ return fe;
+ }
+-EXPORT_SYMBOL(lnbh25_attach);
++EXPORT_SYMBOL_GPL(lnbh25_attach);
+
+ MODULE_DESCRIPTION("ST LNBH25 driver");
+ MODULE_AUTHOR("info@netup.ru");
+diff --git a/drivers/media/dvb-frontends/lnbp21.c b/drivers/media/dvb-frontends/lnbp21.c
+index e564974162d65..32593b1f75a38 100644
+--- a/drivers/media/dvb-frontends/lnbp21.c
++++ b/drivers/media/dvb-frontends/lnbp21.c
+@@ -155,7 +155,7 @@ struct dvb_frontend *lnbh24_attach(struct dvb_frontend *fe,
+ return lnbx2x_attach(fe, i2c, override_set, override_clear,
+ i2c_addr, LNBH24_TTX);
+ }
+-EXPORT_SYMBOL(lnbh24_attach);
++EXPORT_SYMBOL_GPL(lnbh24_attach);
+
+ struct dvb_frontend *lnbp21_attach(struct dvb_frontend *fe,
+ struct i2c_adapter *i2c, u8 override_set,
+@@ -164,7 +164,7 @@ struct dvb_frontend *lnbp21_attach(struct dvb_frontend *fe,
+ return lnbx2x_attach(fe, i2c, override_set, override_clear,
+ 0x08, LNBP21_ISEL);
+ }
+-EXPORT_SYMBOL(lnbp21_attach);
++EXPORT_SYMBOL_GPL(lnbp21_attach);
+
+ MODULE_DESCRIPTION("Driver for lnb supply and control ic lnbp21, lnbh24");
+ MODULE_AUTHOR("Oliver Endriss, Igor M. Liplianin");
+diff --git a/drivers/media/dvb-frontends/lnbp22.c b/drivers/media/dvb-frontends/lnbp22.c
+index b8c7145d4cefe..cb4ea5d3fad4a 100644
+--- a/drivers/media/dvb-frontends/lnbp22.c
++++ b/drivers/media/dvb-frontends/lnbp22.c
+@@ -125,7 +125,7 @@ struct dvb_frontend *lnbp22_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(lnbp22_attach);
++EXPORT_SYMBOL_GPL(lnbp22_attach);
+
+ MODULE_DESCRIPTION("Driver for lnb supply and control ic lnbp22");
+ MODULE_AUTHOR("Dominik Kuhlen");
+diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
+index 02e8aa11e36e7..e03fac025b512 100644
+--- a/drivers/media/dvb-frontends/m88ds3103.c
++++ b/drivers/media/dvb-frontends/m88ds3103.c
+@@ -1699,7 +1699,7 @@ struct dvb_frontend *m88ds3103_attach(const struct m88ds3103_config *cfg,
+ *tuner_i2c_adapter = pdata.get_i2c_adapter(client);
+ return pdata.get_dvb_frontend(client);
+ }
+-EXPORT_SYMBOL(m88ds3103_attach);
++EXPORT_SYMBOL_GPL(m88ds3103_attach);
+
+ static const struct dvb_frontend_ops m88ds3103_ops = {
+ .delsys = {SYS_DVBS, SYS_DVBS2},
+diff --git a/drivers/media/dvb-frontends/m88rs2000.c b/drivers/media/dvb-frontends/m88rs2000.c
+index b294ba87e934f..2aa98203cd659 100644
+--- a/drivers/media/dvb-frontends/m88rs2000.c
++++ b/drivers/media/dvb-frontends/m88rs2000.c
+@@ -808,7 +808,7 @@ error:
+
+ return NULL;
+ }
+-EXPORT_SYMBOL(m88rs2000_attach);
++EXPORT_SYMBOL_GPL(m88rs2000_attach);
+
+ MODULE_DESCRIPTION("M88RS2000 DVB-S Demodulator driver");
+ MODULE_AUTHOR("Malcolm Priestley tvboxspy@gmail.com");
+diff --git a/drivers/media/dvb-frontends/mb86a16.c b/drivers/media/dvb-frontends/mb86a16.c
+index 2505f1e5794e7..ed08e0c2cf512 100644
+--- a/drivers/media/dvb-frontends/mb86a16.c
++++ b/drivers/media/dvb-frontends/mb86a16.c
+@@ -1848,6 +1848,6 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(mb86a16_attach);
++EXPORT_SYMBOL_GPL(mb86a16_attach);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Manu Abraham");
+diff --git a/drivers/media/dvb-frontends/mb86a20s.c b/drivers/media/dvb-frontends/mb86a20s.c
+index a7faf0cf8788b..8a333af9e176f 100644
+--- a/drivers/media/dvb-frontends/mb86a20s.c
++++ b/drivers/media/dvb-frontends/mb86a20s.c
+@@ -2081,7 +2081,7 @@ struct dvb_frontend *mb86a20s_attach(const struct mb86a20s_config *config,
+ dev_info(&i2c->dev, "Detected a Fujitsu mb86a20s frontend\n");
+ return &state->frontend;
+ }
+-EXPORT_SYMBOL(mb86a20s_attach);
++EXPORT_SYMBOL_GPL(mb86a20s_attach);
+
+ static const struct dvb_frontend_ops mb86a20s_ops = {
+ .delsys = { SYS_ISDBT },
+diff --git a/drivers/media/dvb-frontends/mn88443x.c b/drivers/media/dvb-frontends/mn88443x.c
+index e4528784f8477..05894deb8a19a 100644
+--- a/drivers/media/dvb-frontends/mn88443x.c
++++ b/drivers/media/dvb-frontends/mn88443x.c
+@@ -204,11 +204,18 @@ struct mn88443x_priv {
+ struct regmap *regmap_t;
+ };
+
+-static void mn88443x_cmn_power_on(struct mn88443x_priv *chip)
++static int mn88443x_cmn_power_on(struct mn88443x_priv *chip)
+ {
++ struct device *dev = &chip->client_s->dev;
+ struct regmap *r_t = chip->regmap_t;
++ int ret;
+
+- clk_prepare_enable(chip->mclk);
++ ret = clk_prepare_enable(chip->mclk);
++ if (ret) {
++ dev_err(dev, "Failed to prepare and enable mclk: %d\n",
++ ret);
++ return ret;
++ }
+
+ gpiod_set_value_cansleep(chip->reset_gpio, 1);
+ usleep_range(100, 1000);
+@@ -222,6 +229,8 @@ static void mn88443x_cmn_power_on(struct mn88443x_priv *chip)
+ } else {
+ regmap_write(r_t, HIZSET3, 0x8f);
+ }
++
++ return 0;
+ }
+
+ static void mn88443x_cmn_power_off(struct mn88443x_priv *chip)
+@@ -738,7 +747,10 @@ static int mn88443x_probe(struct i2c_client *client,
+ chip->fe.demodulator_priv = chip;
+ i2c_set_clientdata(client, chip);
+
+- mn88443x_cmn_power_on(chip);
++ ret = mn88443x_cmn_power_on(chip);
++ if (ret)
++ goto err_i2c_t;
++
+ mn88443x_s_sleep(chip);
+ mn88443x_t_sleep(chip);
+
+@@ -788,7 +800,7 @@ MODULE_DEVICE_TABLE(i2c, mn88443x_i2c_id);
+ static struct i2c_driver mn88443x_driver = {
+ .driver = {
+ .name = "mn88443x",
+- .of_match_table = of_match_ptr(mn88443x_of_match),
++ .of_match_table = mn88443x_of_match,
+ },
+ .probe = mn88443x_probe,
+ .remove = mn88443x_remove,
+diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c
+index d43a67045dbe7..fb867dd8a26be 100644
+--- a/drivers/media/dvb-frontends/mt312.c
++++ b/drivers/media/dvb-frontends/mt312.c
+@@ -827,7 +827,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(mt312_attach);
++EXPORT_SYMBOL_GPL(mt312_attach);
+
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/mt352.c b/drivers/media/dvb-frontends/mt352.c
+index 399d5c519027e..1b2889f5cf67d 100644
+--- a/drivers/media/dvb-frontends/mt352.c
++++ b/drivers/media/dvb-frontends/mt352.c
+@@ -593,4 +593,4 @@ MODULE_DESCRIPTION("Zarlink MT352 DVB-T Demodulator driver");
+ MODULE_AUTHOR("Holger Waechtler, Daniel Mack, Antonio Mancuso");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(mt352_attach);
++EXPORT_SYMBOL_GPL(mt352_attach);
+diff --git a/drivers/media/dvb-frontends/nxt200x.c b/drivers/media/dvb-frontends/nxt200x.c
+index 200b6dbc75f81..1c549ada6ebf9 100644
+--- a/drivers/media/dvb-frontends/nxt200x.c
++++ b/drivers/media/dvb-frontends/nxt200x.c
+@@ -1216,5 +1216,5 @@ MODULE_DESCRIPTION("NXT200X (ATSC 8VSB & ITU-T J.83 AnnexB 64/256 QAM) Demodulat
+ MODULE_AUTHOR("Kirk Lapray, Michael Krufky, Jean-Francois Thibert, and Taylor Jacob");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(nxt200x_attach);
++EXPORT_SYMBOL_GPL(nxt200x_attach);
+
+diff --git a/drivers/media/dvb-frontends/nxt6000.c b/drivers/media/dvb-frontends/nxt6000.c
+index 136918f82dda0..e8d4940370ddf 100644
+--- a/drivers/media/dvb-frontends/nxt6000.c
++++ b/drivers/media/dvb-frontends/nxt6000.c
+@@ -621,4 +621,4 @@ MODULE_DESCRIPTION("NxtWave NXT6000 DVB-T demodulator driver");
+ MODULE_AUTHOR("Florian Schirmer");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(nxt6000_attach);
++EXPORT_SYMBOL_GPL(nxt6000_attach);
+diff --git a/drivers/media/dvb-frontends/or51132.c b/drivers/media/dvb-frontends/or51132.c
+index 24de1b1151583..144a1f25dec0a 100644
+--- a/drivers/media/dvb-frontends/or51132.c
++++ b/drivers/media/dvb-frontends/or51132.c
+@@ -605,4 +605,4 @@ MODULE_AUTHOR("Kirk Lapray");
+ MODULE_AUTHOR("Trent Piepho");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(or51132_attach);
++EXPORT_SYMBOL_GPL(or51132_attach);
+diff --git a/drivers/media/dvb-frontends/or51211.c b/drivers/media/dvb-frontends/or51211.c
+index ddcaea5c9941f..dc60482162c54 100644
+--- a/drivers/media/dvb-frontends/or51211.c
++++ b/drivers/media/dvb-frontends/or51211.c
+@@ -551,5 +551,5 @@ MODULE_DESCRIPTION("Oren OR51211 VSB [pcHDTV HD-2000] Demodulator Driver");
+ MODULE_AUTHOR("Kirk Lapray");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(or51211_attach);
++EXPORT_SYMBOL_GPL(or51211_attach);
+
+diff --git a/drivers/media/dvb-frontends/s5h1409.c b/drivers/media/dvb-frontends/s5h1409.c
+index 3089cc174a6f5..28b1dca077ead 100644
+--- a/drivers/media/dvb-frontends/s5h1409.c
++++ b/drivers/media/dvb-frontends/s5h1409.c
+@@ -981,7 +981,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(s5h1409_attach);
++EXPORT_SYMBOL_GPL(s5h1409_attach);
+
+ static const struct dvb_frontend_ops s5h1409_ops = {
+ .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
+diff --git a/drivers/media/dvb-frontends/s5h1411.c b/drivers/media/dvb-frontends/s5h1411.c
+index c1334d7eb4420..ae2b391af9039 100644
+--- a/drivers/media/dvb-frontends/s5h1411.c
++++ b/drivers/media/dvb-frontends/s5h1411.c
+@@ -900,7 +900,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(s5h1411_attach);
++EXPORT_SYMBOL_GPL(s5h1411_attach);
+
+ static const struct dvb_frontend_ops s5h1411_ops = {
+ .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
+diff --git a/drivers/media/dvb-frontends/s5h1420.c b/drivers/media/dvb-frontends/s5h1420.c
+index 6bdec2898bc81..d700de1ea6c24 100644
+--- a/drivers/media/dvb-frontends/s5h1420.c
++++ b/drivers/media/dvb-frontends/s5h1420.c
+@@ -918,7 +918,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(s5h1420_attach);
++EXPORT_SYMBOL_GPL(s5h1420_attach);
+
+ static const struct dvb_frontend_ops s5h1420_ops = {
+ .delsys = { SYS_DVBS },
+diff --git a/drivers/media/dvb-frontends/s5h1432.c b/drivers/media/dvb-frontends/s5h1432.c
+index 956e8ee4b388e..ff5d3bdf3bc67 100644
+--- a/drivers/media/dvb-frontends/s5h1432.c
++++ b/drivers/media/dvb-frontends/s5h1432.c
+@@ -355,7 +355,7 @@ struct dvb_frontend *s5h1432_attach(const struct s5h1432_config *config,
+
+ return &state->frontend;
+ }
+-EXPORT_SYMBOL(s5h1432_attach);
++EXPORT_SYMBOL_GPL(s5h1432_attach);
+
+ static const struct dvb_frontend_ops s5h1432_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/s921.c b/drivers/media/dvb-frontends/s921.c
+index f118d8e641030..7e461ac159fc1 100644
+--- a/drivers/media/dvb-frontends/s921.c
++++ b/drivers/media/dvb-frontends/s921.c
+@@ -495,7 +495,7 @@ struct dvb_frontend *s921_attach(const struct s921_config *config,
+
+ return &state->frontend;
+ }
+-EXPORT_SYMBOL(s921_attach);
++EXPORT_SYMBOL_GPL(s921_attach);
+
+ static const struct dvb_frontend_ops s921_ops = {
+ .delsys = { SYS_ISDBT },
+diff --git a/drivers/media/dvb-frontends/si21xx.c b/drivers/media/dvb-frontends/si21xx.c
+index e31eb2c5cc4c9..a35ab007a0b34 100644
+--- a/drivers/media/dvb-frontends/si21xx.c
++++ b/drivers/media/dvb-frontends/si21xx.c
+@@ -936,7 +936,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(si21xx_attach);
++EXPORT_SYMBOL_GPL(si21xx_attach);
+
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/sp887x.c b/drivers/media/dvb-frontends/sp887x.c
+index c89a91a3daf40..72f58626475c4 100644
+--- a/drivers/media/dvb-frontends/sp887x.c
++++ b/drivers/media/dvb-frontends/sp887x.c
+@@ -626,4 +626,4 @@ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+ MODULE_DESCRIPTION("Spase sp887x DVB-T demodulator driver");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(sp887x_attach);
++EXPORT_SYMBOL_GPL(sp887x_attach);
+diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c
+index 4ee6c1e1e9f7d..2f4d8fb400cd6 100644
+--- a/drivers/media/dvb-frontends/stb0899_drv.c
++++ b/drivers/media/dvb-frontends/stb0899_drv.c
+@@ -1638,7 +1638,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(stb0899_attach);
++EXPORT_SYMBOL_GPL(stb0899_attach);
+ MODULE_PARM_DESC(verbose, "Set Verbosity level");
+ MODULE_AUTHOR("Manu Abraham");
+ MODULE_DESCRIPTION("STB0899 Multi-Std frontend");
+diff --git a/drivers/media/dvb-frontends/stb6000.c b/drivers/media/dvb-frontends/stb6000.c
+index 8c9800d577e03..d74e34677b925 100644
+--- a/drivers/media/dvb-frontends/stb6000.c
++++ b/drivers/media/dvb-frontends/stb6000.c
+@@ -232,7 +232,7 @@ struct dvb_frontend *stb6000_attach(struct dvb_frontend *fe, int addr,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(stb6000_attach);
++EXPORT_SYMBOL_GPL(stb6000_attach);
+
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c
+index d541d66136107..9f92760256cf5 100644
+--- a/drivers/media/dvb-frontends/stb6100.c
++++ b/drivers/media/dvb-frontends/stb6100.c
+@@ -557,7 +557,7 @@ static void stb6100_release(struct dvb_frontend *fe)
+ kfree(state);
+ }
+
+-EXPORT_SYMBOL(stb6100_attach);
++EXPORT_SYMBOL_GPL(stb6100_attach);
+ MODULE_PARM_DESC(verbose, "Set Verbosity level");
+
+ MODULE_AUTHOR("Manu Abraham");
+diff --git a/drivers/media/dvb-frontends/stv0288.c b/drivers/media/dvb-frontends/stv0288.c
+index 3d54a0ec86afd..a5581bd60f9e8 100644
+--- a/drivers/media/dvb-frontends/stv0288.c
++++ b/drivers/media/dvb-frontends/stv0288.c
+@@ -440,9 +440,8 @@ static int stv0288_set_frontend(struct dvb_frontend *fe)
+ struct stv0288_state *state = fe->demodulator_priv;
+ struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+
+- char tm;
+- unsigned char tda[3];
+- u8 reg, time_out = 0;
++ u8 tda[3], reg, time_out = 0;
++ s8 tm;
+
+ dprintk("%s : FE_SET_FRONTEND\n", __func__);
+
+@@ -591,7 +590,7 @@ error:
+
+ return NULL;
+ }
+-EXPORT_SYMBOL(stv0288_attach);
++EXPORT_SYMBOL_GPL(stv0288_attach);
+
+ module_param(debug_legacy_dish_switch, int, 0444);
+ MODULE_PARM_DESC(debug_legacy_dish_switch,
+diff --git a/drivers/media/dvb-frontends/stv0297.c b/drivers/media/dvb-frontends/stv0297.c
+index 6d5962d5697ac..9d4dbd99a5a79 100644
+--- a/drivers/media/dvb-frontends/stv0297.c
++++ b/drivers/media/dvb-frontends/stv0297.c
+@@ -710,4 +710,4 @@ MODULE_DESCRIPTION("ST STV0297 DVB-C Demodulator driver");
+ MODULE_AUTHOR("Dennis Noermann and Andrew de Quincey");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(stv0297_attach);
++EXPORT_SYMBOL_GPL(stv0297_attach);
+diff --git a/drivers/media/dvb-frontends/stv0299.c b/drivers/media/dvb-frontends/stv0299.c
+index 421395ea33343..0a1b57e9e2281 100644
+--- a/drivers/media/dvb-frontends/stv0299.c
++++ b/drivers/media/dvb-frontends/stv0299.c
+@@ -751,4 +751,4 @@ MODULE_DESCRIPTION("ST STV0299 DVB Demodulator driver");
+ MODULE_AUTHOR("Ralph Metzler, Holger Waechtler, Peter Schildmann, Felix Domke, Andreas Oberritter, Andrew de Quincey, Kenneth Aafly");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(stv0299_attach);
++EXPORT_SYMBOL_GPL(stv0299_attach);
+diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c
+index 6c2b05fae1c55..0bfca1174e9e7 100644
+--- a/drivers/media/dvb-frontends/stv0367.c
++++ b/drivers/media/dvb-frontends/stv0367.c
+@@ -1750,7 +1750,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(stv0367ter_attach);
++EXPORT_SYMBOL_GPL(stv0367ter_attach);
+
+ static int stv0367cab_gate_ctrl(struct dvb_frontend *fe, int enable)
+ {
+@@ -2923,7 +2923,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(stv0367cab_attach);
++EXPORT_SYMBOL_GPL(stv0367cab_attach);
+
+ /*
+ * Functions for operation on Digital Devices hardware
+@@ -3344,7 +3344,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(stv0367ddb_attach);
++EXPORT_SYMBOL_GPL(stv0367ddb_attach);
+
+ MODULE_PARM_DESC(debug, "Set debug");
+ MODULE_PARM_DESC(i2c_debug, "Set i2c debug");
+diff --git a/drivers/media/dvb-frontends/stv0900_core.c b/drivers/media/dvb-frontends/stv0900_core.c
+index 212312d20ff62..e7b9b9b11d7df 100644
+--- a/drivers/media/dvb-frontends/stv0900_core.c
++++ b/drivers/media/dvb-frontends/stv0900_core.c
+@@ -1957,7 +1957,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(stv0900_attach);
++EXPORT_SYMBOL_GPL(stv0900_attach);
+
+ MODULE_PARM_DESC(debug, "Set debug");
+
+diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c
+index 90d24131d335f..799dbefb9eef7 100644
+--- a/drivers/media/dvb-frontends/stv090x.c
++++ b/drivers/media/dvb-frontends/stv090x.c
+@@ -5073,7 +5073,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(stv090x_attach);
++EXPORT_SYMBOL_GPL(stv090x_attach);
+
+ static const struct i2c_device_id stv090x_id_table[] = {
+ {"stv090x", 0},
+diff --git a/drivers/media/dvb-frontends/stv6110.c b/drivers/media/dvb-frontends/stv6110.c
+index 963f6a896102a..1cf9c095dbff0 100644
+--- a/drivers/media/dvb-frontends/stv6110.c
++++ b/drivers/media/dvb-frontends/stv6110.c
+@@ -427,7 +427,7 @@ struct dvb_frontend *stv6110_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(stv6110_attach);
++EXPORT_SYMBOL_GPL(stv6110_attach);
+
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c
+index 5012d02316522..b08c7536a69fb 100644
+--- a/drivers/media/dvb-frontends/stv6110x.c
++++ b/drivers/media/dvb-frontends/stv6110x.c
+@@ -469,7 +469,7 @@ const struct stv6110x_devctl *stv6110x_attach(struct dvb_frontend *fe,
+ dev_info(&stv6110x->i2c->dev, "Attaching STV6110x\n");
+ return stv6110x->devctl;
+ }
+-EXPORT_SYMBOL(stv6110x_attach);
++EXPORT_SYMBOL_GPL(stv6110x_attach);
+
+ static const struct i2c_device_id stv6110x_id_table[] = {
+ {"stv6110x", 0},
+diff --git a/drivers/media/dvb-frontends/tda10021.c b/drivers/media/dvb-frontends/tda10021.c
+index faa6e54b33729..462e12ab6bd14 100644
+--- a/drivers/media/dvb-frontends/tda10021.c
++++ b/drivers/media/dvb-frontends/tda10021.c
+@@ -523,4 +523,4 @@ MODULE_DESCRIPTION("Philips TDA10021 DVB-C demodulator driver");
+ MODULE_AUTHOR("Ralph Metzler, Holger Waechtler, Markus Schulz");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(tda10021_attach);
++EXPORT_SYMBOL_GPL(tda10021_attach);
+diff --git a/drivers/media/dvb-frontends/tda10023.c b/drivers/media/dvb-frontends/tda10023.c
+index 8f32edf6b700e..4c2541ecd7433 100644
+--- a/drivers/media/dvb-frontends/tda10023.c
++++ b/drivers/media/dvb-frontends/tda10023.c
+@@ -594,4 +594,4 @@ MODULE_DESCRIPTION("Philips TDA10023 DVB-C demodulator driver");
+ MODULE_AUTHOR("Georg Acher, Hartmut Birr");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(tda10023_attach);
++EXPORT_SYMBOL_GPL(tda10023_attach);
+diff --git a/drivers/media/dvb-frontends/tda10048.c b/drivers/media/dvb-frontends/tda10048.c
+index d1d206ebdedd7..f1d5e77d5dcce 100644
+--- a/drivers/media/dvb-frontends/tda10048.c
++++ b/drivers/media/dvb-frontends/tda10048.c
+@@ -1138,7 +1138,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(tda10048_attach);
++EXPORT_SYMBOL_GPL(tda10048_attach);
+
+ static const struct dvb_frontend_ops tda10048_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c
+index 83a798ca9b002..6f306db6c615f 100644
+--- a/drivers/media/dvb-frontends/tda1004x.c
++++ b/drivers/media/dvb-frontends/tda1004x.c
+@@ -1378,5 +1378,5 @@ MODULE_DESCRIPTION("Philips TDA10045H & TDA10046H DVB-T Demodulator");
+ MODULE_AUTHOR("Andrew de Quincey & Robert Schlabbach");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(tda10045_attach);
+-EXPORT_SYMBOL(tda10046_attach);
++EXPORT_SYMBOL_GPL(tda10045_attach);
++EXPORT_SYMBOL_GPL(tda10046_attach);
+diff --git a/drivers/media/dvb-frontends/tda10086.c b/drivers/media/dvb-frontends/tda10086.c
+index cdcf97664bba8..b449514ae5854 100644
+--- a/drivers/media/dvb-frontends/tda10086.c
++++ b/drivers/media/dvb-frontends/tda10086.c
+@@ -764,4 +764,4 @@ MODULE_DESCRIPTION("Philips TDA10086 DVB-S Demodulator");
+ MODULE_AUTHOR("Andrew de Quincey");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(tda10086_attach);
++EXPORT_SYMBOL_GPL(tda10086_attach);
+diff --git a/drivers/media/dvb-frontends/tda665x.c b/drivers/media/dvb-frontends/tda665x.c
+index 13e8969da7f89..346be5011fb73 100644
+--- a/drivers/media/dvb-frontends/tda665x.c
++++ b/drivers/media/dvb-frontends/tda665x.c
+@@ -227,7 +227,7 @@ struct dvb_frontend *tda665x_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(tda665x_attach);
++EXPORT_SYMBOL_GPL(tda665x_attach);
+
+ MODULE_DESCRIPTION("TDA665x driver");
+ MODULE_AUTHOR("Manu Abraham");
+diff --git a/drivers/media/dvb-frontends/tda8083.c b/drivers/media/dvb-frontends/tda8083.c
+index 5be11fd65e3b1..9fc16e917f342 100644
+--- a/drivers/media/dvb-frontends/tda8083.c
++++ b/drivers/media/dvb-frontends/tda8083.c
+@@ -481,4 +481,4 @@ MODULE_DESCRIPTION("Philips TDA8083 DVB-S Demodulator");
+ MODULE_AUTHOR("Ralph Metzler, Holger Waechtler");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(tda8083_attach);
++EXPORT_SYMBOL_GPL(tda8083_attach);
+diff --git a/drivers/media/dvb-frontends/tda8261.c b/drivers/media/dvb-frontends/tda8261.c
+index 0d576d41c67d8..8b06f92745dca 100644
+--- a/drivers/media/dvb-frontends/tda8261.c
++++ b/drivers/media/dvb-frontends/tda8261.c
+@@ -188,7 +188,7 @@ exit:
+ return NULL;
+ }
+
+-EXPORT_SYMBOL(tda8261_attach);
++EXPORT_SYMBOL_GPL(tda8261_attach);
+
+ MODULE_AUTHOR("Manu Abraham");
+ MODULE_DESCRIPTION("TDA8261 8PSK/QPSK Tuner");
+diff --git a/drivers/media/dvb-frontends/tda826x.c b/drivers/media/dvb-frontends/tda826x.c
+index f9703a1dd758c..eafcf5f7da3dc 100644
+--- a/drivers/media/dvb-frontends/tda826x.c
++++ b/drivers/media/dvb-frontends/tda826x.c
+@@ -164,7 +164,7 @@ struct dvb_frontend *tda826x_attach(struct dvb_frontend *fe, int addr, struct i2
+
+ return fe;
+ }
+-EXPORT_SYMBOL(tda826x_attach);
++EXPORT_SYMBOL_GPL(tda826x_attach);
+
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c
+index 3e383912bcfd8..4e89193f8e6a9 100644
+--- a/drivers/media/dvb-frontends/ts2020.c
++++ b/drivers/media/dvb-frontends/ts2020.c
+@@ -525,7 +525,7 @@ struct dvb_frontend *ts2020_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(ts2020_attach);
++EXPORT_SYMBOL_GPL(ts2020_attach);
+
+ /*
+ * We implement own regmap locking due to legacy DVB attach which uses frontend
+diff --git a/drivers/media/dvb-frontends/tua6100.c b/drivers/media/dvb-frontends/tua6100.c
+index 2483f614d0e7d..41dd9b6d31908 100644
+--- a/drivers/media/dvb-frontends/tua6100.c
++++ b/drivers/media/dvb-frontends/tua6100.c
+@@ -186,7 +186,7 @@ struct dvb_frontend *tua6100_attach(struct dvb_frontend *fe, int addr, struct i2
+ fe->tuner_priv = priv;
+ return fe;
+ }
+-EXPORT_SYMBOL(tua6100_attach);
++EXPORT_SYMBOL_GPL(tua6100_attach);
+
+ MODULE_DESCRIPTION("DVB tua6100 driver");
+ MODULE_AUTHOR("Andrew de Quincey");
+diff --git a/drivers/media/dvb-frontends/ves1820.c b/drivers/media/dvb-frontends/ves1820.c
+index 9df14d0be1c1a..ee5620e731e9b 100644
+--- a/drivers/media/dvb-frontends/ves1820.c
++++ b/drivers/media/dvb-frontends/ves1820.c
+@@ -434,4 +434,4 @@ MODULE_DESCRIPTION("VLSI VES1820 DVB-C Demodulator driver");
+ MODULE_AUTHOR("Ralph Metzler, Holger Waechtler");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(ves1820_attach);
++EXPORT_SYMBOL_GPL(ves1820_attach);
+diff --git a/drivers/media/dvb-frontends/ves1x93.c b/drivers/media/dvb-frontends/ves1x93.c
+index b747272863025..c60e21d26b881 100644
+--- a/drivers/media/dvb-frontends/ves1x93.c
++++ b/drivers/media/dvb-frontends/ves1x93.c
+@@ -540,4 +540,4 @@ MODULE_DESCRIPTION("VLSI VES1x93 DVB-S Demodulator driver");
+ MODULE_AUTHOR("Ralph Metzler");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(ves1x93_attach);
++EXPORT_SYMBOL_GPL(ves1x93_attach);
+diff --git a/drivers/media/dvb-frontends/zl10036.c b/drivers/media/dvb-frontends/zl10036.c
+index d392c7cce2ce0..7ba575e9c55f4 100644
+--- a/drivers/media/dvb-frontends/zl10036.c
++++ b/drivers/media/dvb-frontends/zl10036.c
+@@ -496,7 +496,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(zl10036_attach);
++EXPORT_SYMBOL_GPL(zl10036_attach);
+
+ module_param_named(debug, zl10036_debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c
+index 1335bf78d5b7f..a3e4d219400ce 100644
+--- a/drivers/media/dvb-frontends/zl10039.c
++++ b/drivers/media/dvb-frontends/zl10039.c
+@@ -295,7 +295,7 @@ error:
+ kfree(state);
+ return NULL;
+ }
+-EXPORT_SYMBOL(zl10039_attach);
++EXPORT_SYMBOL_GPL(zl10039_attach);
+
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
+diff --git a/drivers/media/dvb-frontends/zl10353.c b/drivers/media/dvb-frontends/zl10353.c
+index 2a2cf20a73d61..8849d05475c27 100644
+--- a/drivers/media/dvb-frontends/zl10353.c
++++ b/drivers/media/dvb-frontends/zl10353.c
+@@ -665,4 +665,4 @@ MODULE_DESCRIPTION("Zarlink ZL10353 DVB-T demodulator driver");
+ MODULE_AUTHOR("Chris Pascoe");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(zl10353_attach);
++EXPORT_SYMBOL_GPL(zl10353_attach);
+diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c
+index 2bf9467b917d1..71991f8638e6b 100644
+--- a/drivers/media/firewire/firedtv-avc.c
++++ b/drivers/media/firewire/firedtv-avc.c
+@@ -1165,7 +1165,11 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length)
+ read_pos += program_info_length;
+ write_pos += program_info_length;
+ }
+- while (read_pos < length) {
++ while (read_pos + 4 < length) {
++ if (write_pos + 4 >= sizeof(c->operand) - 4) {
++ ret = -EINVAL;
++ goto out;
++ }
+ c->operand[write_pos++] = msg[read_pos++];
+ c->operand[write_pos++] = msg[read_pos++];
+ c->operand[write_pos++] = msg[read_pos++];
+@@ -1177,13 +1181,17 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length)
+ c->operand[write_pos++] = es_info_length >> 8;
+ c->operand[write_pos++] = es_info_length & 0xff;
+ if (es_info_length > 0) {
++ if (read_pos >= length) {
++ ret = -EINVAL;
++ goto out;
++ }
+ pmt_cmd_id = msg[read_pos++];
+ if (pmt_cmd_id != 1 && pmt_cmd_id != 4)
+ dev_err(fdtv->device, "invalid pmt_cmd_id %d at stream level\n",
+ pmt_cmd_id);
+
+- if (es_info_length > sizeof(c->operand) - 4 -
+- write_pos) {
++ if (es_info_length > sizeof(c->operand) - 4 - write_pos ||
++ es_info_length > length - read_pos) {
+ ret = -EINVAL;
+ goto out;
+ }
+diff --git a/drivers/media/firewire/firedtv-ci.c b/drivers/media/firewire/firedtv-ci.c
+index 9363d005e2b61..e0d57e09dab0c 100644
+--- a/drivers/media/firewire/firedtv-ci.c
++++ b/drivers/media/firewire/firedtv-ci.c
+@@ -134,6 +134,8 @@ static int fdtv_ca_pmt(struct firedtv *fdtv, void *arg)
+ } else {
+ data_length = msg->msg[3];
+ }
++ if (data_length > sizeof(msg->msg) - data_pos)
++ return -EINVAL;
+
+ return avc_ca_pmt(fdtv, &msg->msg[data_pos], data_length);
+ }
+diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
+index 08feb3e8c1bf6..6157e73eef24e 100644
+--- a/drivers/media/i2c/Kconfig
++++ b/drivers/media/i2c/Kconfig
+@@ -597,6 +597,7 @@ config VIDEO_AK881X
+ config VIDEO_THS8200
+ tristate "Texas Instruments THS8200 video encoder"
+ depends on VIDEO_V4L2 && I2C
++ select V4L2_ASYNC
+ help
+ Support for the Texas Instruments THS8200 video encoder.
+
+diff --git a/drivers/media/i2c/ad5820.c b/drivers/media/i2c/ad5820.c
+index 2958a46944614..63b5bf1fae761 100644
+--- a/drivers/media/i2c/ad5820.c
++++ b/drivers/media/i2c/ad5820.c
+@@ -327,18 +327,18 @@ static int ad5820_probe(struct i2c_client *client,
+
+ ret = media_entity_pads_init(&coil->subdev.entity, 0, NULL);
+ if (ret < 0)
+- goto cleanup2;
++ goto clean_mutex;
+
+ ret = v4l2_async_register_subdev(&coil->subdev);
+ if (ret < 0)
+- goto cleanup;
++ goto clean_entity;
+
+ return ret;
+
+-cleanup2:
+- mutex_destroy(&coil->power_lock);
+-cleanup:
++clean_entity:
+ media_entity_cleanup(&coil->subdev.entity);
++clean_mutex:
++ mutex_destroy(&coil->power_lock);
+ return ret;
+ }
+
+@@ -357,7 +357,6 @@ static int ad5820_remove(struct i2c_client *client)
+ static const struct i2c_device_id ad5820_id_table[] = {
+ { "ad5820", 0 },
+ { "ad5821", 0 },
+- { "ad5823", 0 },
+ { }
+ };
+ MODULE_DEVICE_TABLE(i2c, ad5820_id_table);
+@@ -365,7 +364,6 @@ MODULE_DEVICE_TABLE(i2c, ad5820_id_table);
+ static const struct of_device_id ad5820_of_table[] = {
+ { .compatible = "adi,ad5820" },
+ { .compatible = "adi,ad5821" },
+- { .compatible = "adi,ad5823" },
+ { }
+ };
+ MODULE_DEVICE_TABLE(of, ad5820_of_table);
+diff --git a/drivers/media/i2c/adv748x/adv748x-afe.c b/drivers/media/i2c/adv748x/adv748x-afe.c
+index 02eabe10ab970..00095c7762c24 100644
+--- a/drivers/media/i2c/adv748x/adv748x-afe.c
++++ b/drivers/media/i2c/adv748x/adv748x-afe.c
+@@ -521,6 +521,10 @@ int adv748x_afe_init(struct adv748x_afe *afe)
+ }
+ }
+
++ adv748x_afe_s_input(afe, afe->input);
++
++ adv_dbg(state, "AFE Default input set to %d\n", afe->input);
++
+ /* Entity pads and sinks are 0-indexed to match the pads */
+ for (i = ADV748X_AFE_SINK_AIN0; i <= ADV748X_AFE_SINK_AIN7; i++)
+ afe->pads[i].flags = MEDIA_PAD_FL_SINK;
+diff --git a/drivers/media/i2c/adv7511-v4l2.c b/drivers/media/i2c/adv7511-v4l2.c
+index 41f4e749a859c..2217004264e4b 100644
+--- a/drivers/media/i2c/adv7511-v4l2.c
++++ b/drivers/media/i2c/adv7511-v4l2.c
+@@ -544,7 +544,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7511_cfg_read_
+ buffer[3] = 0;
+ buffer[3] = hdmi_infoframe_checksum(buffer, len + 4);
+
+- if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) {
++ if (hdmi_infoframe_unpack(&frame, buffer, len + 4) < 0) {
+ v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc);
+ return;
+ }
+diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
+index 122e1fdccd962..d688ffff7a074 100644
+--- a/drivers/media/i2c/adv7604.c
++++ b/drivers/media/i2c/adv7604.c
+@@ -2484,7 +2484,7 @@ static int adv76xx_read_infoframe(struct v4l2_subdev *sd, int index,
+ buffer[i + 3] = infoframe_read(sd,
+ adv76xx_cri[index].payload_addr + i);
+
+- if (hdmi_infoframe_unpack(frame, buffer, sizeof(buffer)) < 0) {
++ if (hdmi_infoframe_unpack(frame, buffer, len + 3) < 0) {
+ v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__,
+ adv76xx_cri[index].desc);
+ return -ENOENT;
+diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c
+index 7f8acbdf0db4a..8ab4c63839b49 100644
+--- a/drivers/media/i2c/adv7842.c
++++ b/drivers/media/i2c/adv7842.c
+@@ -2593,7 +2593,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7842_cfg_read_
+ for (i = 0; i < len; i++)
+ buffer[i + 3] = infoframe_read(sd, cri->payload_addr + i);
+
+- if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) {
++ if (hdmi_infoframe_unpack(&frame, buffer, len + 3) < 0) {
+ v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc);
+ return;
+ }
+diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
+index 5363f3bcafe3d..67bb770ed63fa 100644
+--- a/drivers/media/i2c/ccs/ccs-core.c
++++ b/drivers/media/i2c/ccs/ccs-core.c
+@@ -1603,8 +1603,11 @@ static int ccs_power_on(struct device *dev)
+ usleep_range(1000, 2000);
+ } while (--retry);
+
+- if (!reset)
+- return -EIO;
++ if (!reset) {
++ dev_err(dev, "software reset failed\n");
++ rval = -EIO;
++ goto out_cci_addr_fail;
++ }
+ }
+
+ if (sensor->hwcfg.i2c_addr_alt) {
+diff --git a/drivers/media/i2c/ccs/ccs-data.c b/drivers/media/i2c/ccs/ccs-data.c
+index 45f2b2f55ec5c..08400edf77ced 100644
+--- a/drivers/media/i2c/ccs/ccs-data.c
++++ b/drivers/media/i2c/ccs/ccs-data.c
+@@ -464,8 +464,7 @@ static int ccs_data_parse_rules(struct bin_container *bin,
+ rule_payload = __rule_type + 1;
+ rule_plen2 = rule_plen - sizeof(*__rule_type);
+
+- switch (*__rule_type) {
+- case CCS_DATA_BLOCK_RULE_ID_IF: {
++ if (*__rule_type == CCS_DATA_BLOCK_RULE_ID_IF) {
+ const struct __ccs_data_block_rule_if *__if_rules =
+ rule_payload;
+ const size_t __num_if_rules =
+@@ -514,49 +513,61 @@ static int ccs_data_parse_rules(struct bin_container *bin,
+ rules->if_rules = if_rule;
+ rules->num_if_rules = __num_if_rules;
+ }
+- break;
+- }
+- case CCS_DATA_BLOCK_RULE_ID_READ_ONLY_REGS:
+- rval = ccs_data_parse_reg_rules(bin, &rules->read_only_regs,
+- &rules->num_read_only_regs,
+- rule_payload,
+- rule_payload + rule_plen2,
+- dev);
+- if (rval)
+- return rval;
+- break;
+- case CCS_DATA_BLOCK_RULE_ID_FFD:
+- rval = ccs_data_parse_ffd(bin, &rules->frame_format,
+- rule_payload,
+- rule_payload + rule_plen2,
+- dev);
+- if (rval)
+- return rval;
+- break;
+- case CCS_DATA_BLOCK_RULE_ID_MSR:
+- rval = ccs_data_parse_reg_rules(bin,
+- &rules->manufacturer_regs,
+- &rules->num_manufacturer_regs,
+- rule_payload,
+- rule_payload + rule_plen2,
+- dev);
+- if (rval)
+- return rval;
+- break;
+- case CCS_DATA_BLOCK_RULE_ID_PDAF_READOUT:
+- rval = ccs_data_parse_pdaf_readout(bin,
+- &rules->pdaf_readout,
+- rule_payload,
+- rule_payload + rule_plen2,
+- dev);
+- if (rval)
+- return rval;
+- break;
+- default:
+- dev_dbg(dev,
+- "Don't know how to handle rule type %u!\n",
+- *__rule_type);
+- return -EINVAL;
++ } else {
++ /* Check there was an if rule before any other rules */
++ if (bin->base && !rules)
++ return -EINVAL;
++
++ switch (*__rule_type) {
++ case CCS_DATA_BLOCK_RULE_ID_READ_ONLY_REGS:
++ rval = ccs_data_parse_reg_rules(bin,
++ rules ?
++ &rules->read_only_regs : NULL,
++ rules ?
++ &rules->num_read_only_regs : NULL,
++ rule_payload,
++ rule_payload + rule_plen2,
++ dev);
++ if (rval)
++ return rval;
++ break;
++ case CCS_DATA_BLOCK_RULE_ID_FFD:
++ rval = ccs_data_parse_ffd(bin, rules ?
++ &rules->frame_format : NULL,
++ rule_payload,
++ rule_payload + rule_plen2,
++ dev);
++ if (rval)
++ return rval;
++ break;
++ case CCS_DATA_BLOCK_RULE_ID_MSR:
++ rval = ccs_data_parse_reg_rules(bin,
++ rules ?
++ &rules->manufacturer_regs : NULL,
++ rules ?
++ &rules->num_manufacturer_regs : NULL,
++ rule_payload,
++ rule_payload + rule_plen2,
++ dev);
++ if (rval)
++ return rval;
++ break;
++ case CCS_DATA_BLOCK_RULE_ID_PDAF_READOUT:
++ rval = ccs_data_parse_pdaf_readout(bin,
++ rules ?
++ &rules->pdaf_readout : NULL,
++ rule_payload,
++ rule_payload + rule_plen2,
++ dev);
++ if (rval)
++ return rval;
++ break;
++ default:
++ dev_dbg(dev,
++ "Don't know how to handle rule type %u!\n",
++ *__rule_type);
++ return -EINVAL;
++ }
+ }
+ __next_rule = __next_rule + rule_hlen + rule_plen;
+ }
+diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c
+index e10af3f74b38f..de1f0aa6fff4a 100644
+--- a/drivers/media/i2c/imx219.c
++++ b/drivers/media/i2c/imx219.c
+@@ -89,6 +89,12 @@
+
+ #define IMX219_REG_ORIENTATION 0x0172
+
++/* Binning Mode */
++#define IMX219_REG_BINNING_MODE 0x0174
++#define IMX219_BINNING_NONE 0x0000
++#define IMX219_BINNING_2X2 0x0101
++#define IMX219_BINNING_2X2_ANALOG 0x0303
++
+ /* Test Pattern Control */
+ #define IMX219_REG_TEST_PATTERN 0x0600
+ #define IMX219_TEST_PATTERN_DISABLE 0
+@@ -143,25 +149,66 @@ struct imx219_mode {
+
+ /* Default register values */
+ struct imx219_reg_list reg_list;
++
++ /* 2x2 binning is used */
++ bool binning;
+ };
+
+-/*
+- * Register sets lifted off the i2C interface from the Raspberry Pi firmware
+- * driver.
+- * 3280x2464 = mode 2, 1920x1080 = mode 1, 1640x1232 = mode 4, 640x480 = mode 7.
+- */
+-static const struct imx219_reg mode_3280x2464_regs[] = {
+- {0x0100, 0x00},
++static const struct imx219_reg imx219_common_regs[] = {
++ {0x0100, 0x00}, /* Mode Select */
++
++ /* To Access Addresses 3000-5fff, send the following commands */
+ {0x30eb, 0x0c},
+ {0x30eb, 0x05},
+ {0x300a, 0xff},
+ {0x300b, 0xff},
+ {0x30eb, 0x05},
+ {0x30eb, 0x09},
+- {0x0114, 0x01},
+- {0x0128, 0x00},
+- {0x012a, 0x18},
++
++ /* PLL Clock Table */
++ {0x0301, 0x05}, /* VTPXCK_DIV */
++ {0x0303, 0x01}, /* VTSYSCK_DIV */
++ {0x0304, 0x03}, /* PREPLLCK_VT_DIV 0x03 = AUTO set */
++ {0x0305, 0x03}, /* PREPLLCK_OP_DIV 0x03 = AUTO set */
++ {0x0306, 0x00}, /* PLL_VT_MPY */
++ {0x0307, 0x39},
++ {0x030b, 0x01}, /* OP_SYS_CLK_DIV */
++ {0x030c, 0x00}, /* PLL_OP_MPY */
++ {0x030d, 0x72},
++
++ /* Undocumented registers */
++ {0x455e, 0x00},
++ {0x471e, 0x4b},
++ {0x4767, 0x0f},
++ {0x4750, 0x14},
++ {0x4540, 0x00},
++ {0x47b4, 0x14},
++ {0x4713, 0x30},
++ {0x478b, 0x10},
++ {0x478f, 0x10},
++ {0x4793, 0x10},
++ {0x4797, 0x0e},
++ {0x479b, 0x0e},
++
++ /* Frame Bank Register Group "A" */
++ {0x0162, 0x0d}, /* Line_Length_A */
++ {0x0163, 0x78},
++ {0x0170, 0x01}, /* X_ODD_INC_A */
++ {0x0171, 0x01}, /* Y_ODD_INC_A */
++
++ /* Output setup registers */
++ {0x0114, 0x01}, /* CSI 2-Lane Mode */
++ {0x0128, 0x00}, /* DPHY Auto Mode */
++ {0x012a, 0x18}, /* EXCK_Freq */
+ {0x012b, 0x00},
++};
++
++/*
++ * Register sets lifted off the i2C interface from the Raspberry Pi firmware
++ * driver.
++ * 3280x2464 = mode 2, 1920x1080 = mode 1, 1640x1232 = mode 4, 640x480 = mode 7.
++ */
++static const struct imx219_reg mode_3280x2464_regs[] = {
+ {0x0164, 0x00},
+ {0x0165, 0x00},
+ {0x0166, 0x0c},
+@@ -174,53 +221,13 @@ static const struct imx219_reg mode_3280x2464_regs[] = {
+ {0x016d, 0xd0},
+ {0x016e, 0x09},
+ {0x016f, 0xa0},
+- {0x0170, 0x01},
+- {0x0171, 0x01},
+- {0x0174, 0x00},
+- {0x0175, 0x00},
+- {0x0301, 0x05},
+- {0x0303, 0x01},
+- {0x0304, 0x03},
+- {0x0305, 0x03},
+- {0x0306, 0x00},
+- {0x0307, 0x39},
+- {0x030b, 0x01},
+- {0x030c, 0x00},
+- {0x030d, 0x72},
+ {0x0624, 0x0c},
+ {0x0625, 0xd0},
+ {0x0626, 0x09},
+ {0x0627, 0xa0},
+- {0x455e, 0x00},
+- {0x471e, 0x4b},
+- {0x4767, 0x0f},
+- {0x4750, 0x14},
+- {0x4540, 0x00},
+- {0x47b4, 0x14},
+- {0x4713, 0x30},
+- {0x478b, 0x10},
+- {0x478f, 0x10},
+- {0x4793, 0x10},
+- {0x4797, 0x0e},
+- {0x479b, 0x0e},
+- {0x0162, 0x0d},
+- {0x0163, 0x78},
+ };
+
+ static const struct imx219_reg mode_1920_1080_regs[] = {
+- {0x0100, 0x00},
+- {0x30eb, 0x05},
+- {0x30eb, 0x0c},
+- {0x300a, 0xff},
+- {0x300b, 0xff},
+- {0x30eb, 0x05},
+- {0x30eb, 0x09},
+- {0x0114, 0x01},
+- {0x0128, 0x00},
+- {0x012a, 0x18},
+- {0x012b, 0x00},
+- {0x0162, 0x0d},
+- {0x0163, 0x78},
+ {0x0164, 0x02},
+ {0x0165, 0xa8},
+ {0x0166, 0x0a},
+@@ -233,49 +240,13 @@ static const struct imx219_reg mode_1920_1080_regs[] = {
+ {0x016d, 0x80},
+ {0x016e, 0x04},
+ {0x016f, 0x38},
+- {0x0170, 0x01},
+- {0x0171, 0x01},
+- {0x0174, 0x00},
+- {0x0175, 0x00},
+- {0x0301, 0x05},
+- {0x0303, 0x01},
+- {0x0304, 0x03},
+- {0x0305, 0x03},
+- {0x0306, 0x00},
+- {0x0307, 0x39},
+- {0x030b, 0x01},
+- {0x030c, 0x00},
+- {0x030d, 0x72},
+ {0x0624, 0x07},
+ {0x0625, 0x80},
+ {0x0626, 0x04},
+ {0x0627, 0x38},
+- {0x455e, 0x00},
+- {0x471e, 0x4b},
+- {0x4767, 0x0f},
+- {0x4750, 0x14},
+- {0x4540, 0x00},
+- {0x47b4, 0x14},
+- {0x4713, 0x30},
+- {0x478b, 0x10},
+- {0x478f, 0x10},
+- {0x4793, 0x10},
+- {0x4797, 0x0e},
+- {0x479b, 0x0e},
+ };
+
+ static const struct imx219_reg mode_1640_1232_regs[] = {
+- {0x0100, 0x00},
+- {0x30eb, 0x0c},
+- {0x30eb, 0x05},
+- {0x300a, 0xff},
+- {0x300b, 0xff},
+- {0x30eb, 0x05},
+- {0x30eb, 0x09},
+- {0x0114, 0x01},
+- {0x0128, 0x00},
+- {0x012a, 0x18},
+- {0x012b, 0x00},
+ {0x0164, 0x00},
+ {0x0165, 0x00},
+ {0x0166, 0x0c},
+@@ -288,53 +259,13 @@ static const struct imx219_reg mode_1640_1232_regs[] = {
+ {0x016d, 0x68},
+ {0x016e, 0x04},
+ {0x016f, 0xd0},
+- {0x0170, 0x01},
+- {0x0171, 0x01},
+- {0x0174, 0x01},
+- {0x0175, 0x01},
+- {0x0301, 0x05},
+- {0x0303, 0x01},
+- {0x0304, 0x03},
+- {0x0305, 0x03},
+- {0x0306, 0x00},
+- {0x0307, 0x39},
+- {0x030b, 0x01},
+- {0x030c, 0x00},
+- {0x030d, 0x72},
+ {0x0624, 0x06},
+ {0x0625, 0x68},
+ {0x0626, 0x04},
+ {0x0627, 0xd0},
+- {0x455e, 0x00},
+- {0x471e, 0x4b},
+- {0x4767, 0x0f},
+- {0x4750, 0x14},
+- {0x4540, 0x00},
+- {0x47b4, 0x14},
+- {0x4713, 0x30},
+- {0x478b, 0x10},
+- {0x478f, 0x10},
+- {0x4793, 0x10},
+- {0x4797, 0x0e},
+- {0x479b, 0x0e},
+- {0x0162, 0x0d},
+- {0x0163, 0x78},
+ };
+
+ static const struct imx219_reg mode_640_480_regs[] = {
+- {0x0100, 0x00},
+- {0x30eb, 0x05},
+- {0x30eb, 0x0c},
+- {0x300a, 0xff},
+- {0x300b, 0xff},
+- {0x30eb, 0x05},
+- {0x30eb, 0x09},
+- {0x0114, 0x01},
+- {0x0128, 0x00},
+- {0x012a, 0x18},
+- {0x012b, 0x00},
+- {0x0162, 0x0d},
+- {0x0163, 0x78},
+ {0x0164, 0x03},
+ {0x0165, 0xe8},
+ {0x0166, 0x08},
+@@ -347,35 +278,10 @@ static const struct imx219_reg mode_640_480_regs[] = {
+ {0x016d, 0x80},
+ {0x016e, 0x01},
+ {0x016f, 0xe0},
+- {0x0170, 0x01},
+- {0x0171, 0x01},
+- {0x0174, 0x03},
+- {0x0175, 0x03},
+- {0x0301, 0x05},
+- {0x0303, 0x01},
+- {0x0304, 0x03},
+- {0x0305, 0x03},
+- {0x0306, 0x00},
+- {0x0307, 0x39},
+- {0x030b, 0x01},
+- {0x030c, 0x00},
+- {0x030d, 0x72},
+ {0x0624, 0x06},
+ {0x0625, 0x68},
+ {0x0626, 0x04},
+ {0x0627, 0xd0},
+- {0x455e, 0x00},
+- {0x471e, 0x4b},
+- {0x4767, 0x0f},
+- {0x4750, 0x14},
+- {0x4540, 0x00},
+- {0x47b4, 0x14},
+- {0x4713, 0x30},
+- {0x478b, 0x10},
+- {0x478f, 0x10},
+- {0x4793, 0x10},
+- {0x4797, 0x0e},
+- {0x479b, 0x0e},
+ };
+
+ static const struct imx219_reg raw8_framefmt_regs[] = {
+@@ -485,6 +391,7 @@ static const struct imx219_mode supported_modes[] = {
+ .num_of_regs = ARRAY_SIZE(mode_3280x2464_regs),
+ .regs = mode_3280x2464_regs,
+ },
++ .binning = false,
+ },
+ {
+ /* 1080P 30fps cropped */
+@@ -501,6 +408,7 @@ static const struct imx219_mode supported_modes[] = {
+ .num_of_regs = ARRAY_SIZE(mode_1920_1080_regs),
+ .regs = mode_1920_1080_regs,
+ },
++ .binning = false,
+ },
+ {
+ /* 2x2 binned 30fps mode */
+@@ -517,6 +425,7 @@ static const struct imx219_mode supported_modes[] = {
+ .num_of_regs = ARRAY_SIZE(mode_1640_1232_regs),
+ .regs = mode_1640_1232_regs,
+ },
++ .binning = true,
+ },
+ {
+ /* 640x480 30fps mode */
+@@ -533,6 +442,7 @@ static const struct imx219_mode supported_modes[] = {
+ .num_of_regs = ARRAY_SIZE(mode_640_480_regs),
+ .regs = mode_640_480_regs,
+ },
++ .binning = true,
+ },
+ };
+
+@@ -979,6 +889,35 @@ static int imx219_set_framefmt(struct imx219 *imx219)
+ return -EINVAL;
+ }
+
++static int imx219_set_binning(struct imx219 *imx219)
++{
++ if (!imx219->mode->binning) {
++ return imx219_write_reg(imx219, IMX219_REG_BINNING_MODE,
++ IMX219_REG_VALUE_16BIT,
++ IMX219_BINNING_NONE);
++ }
++
++ switch (imx219->fmt.code) {
++ case MEDIA_BUS_FMT_SRGGB8_1X8:
++ case MEDIA_BUS_FMT_SGRBG8_1X8:
++ case MEDIA_BUS_FMT_SGBRG8_1X8:
++ case MEDIA_BUS_FMT_SBGGR8_1X8:
++ return imx219_write_reg(imx219, IMX219_REG_BINNING_MODE,
++ IMX219_REG_VALUE_16BIT,
++ IMX219_BINNING_2X2_ANALOG);
++
++ case MEDIA_BUS_FMT_SRGGB10_1X10:
++ case MEDIA_BUS_FMT_SGRBG10_1X10:
++ case MEDIA_BUS_FMT_SGBRG10_1X10:
++ case MEDIA_BUS_FMT_SBGGR10_1X10:
++ return imx219_write_reg(imx219, IMX219_REG_BINNING_MODE,
++ IMX219_REG_VALUE_16BIT,
++ IMX219_BINNING_2X2);
++ }
++
++ return -EINVAL;
++}
++
+ static const struct v4l2_rect *
+ __imx219_get_pad_crop(struct imx219 *imx219,
+ struct v4l2_subdev_state *sd_state,
+@@ -1041,6 +980,13 @@ static int imx219_start_streaming(struct imx219 *imx219)
+ if (ret < 0)
+ return ret;
+
++ /* Send all registers that are common to all modes */
++ ret = imx219_write_regs(imx219, imx219_common_regs, ARRAY_SIZE(imx219_common_regs));
++ if (ret) {
++ dev_err(&client->dev, "%s failed to send mfg header\n", __func__);
++ goto err_rpm_put;
++ }
++
+ /* Apply default values of current mode */
+ reg_list = &imx219->mode->reg_list;
+ ret = imx219_write_regs(imx219, reg_list->regs, reg_list->num_of_regs);
+@@ -1056,6 +1002,13 @@ static int imx219_start_streaming(struct imx219 *imx219)
+ goto err_rpm_put;
+ }
+
++ ret = imx219_set_binning(imx219);
++ if (ret) {
++ dev_err(&client->dev, "%s failed to set binning: %d\n",
++ __func__, ret);
++ goto err_rpm_put;
++ }
++
+ /* Apply customized values from user */
+ ret = __v4l2_ctrl_handler_setup(imx219->sd.ctrl_handler);
+ if (ret)
+diff --git a/drivers/media/i2c/imx258.c b/drivers/media/i2c/imx258.c
+index 81cdf37216ca7..c249507aa2dbc 100644
+--- a/drivers/media/i2c/imx258.c
++++ b/drivers/media/i2c/imx258.c
+@@ -1260,18 +1260,18 @@ static int imx258_probe(struct i2c_client *client)
+ return -ENOMEM;
+
+ imx258->clk = devm_clk_get_optional(&client->dev, NULL);
++ if (IS_ERR(imx258->clk))
++ return dev_err_probe(&client->dev, PTR_ERR(imx258->clk),
++ "error getting clock\n");
+ if (!imx258->clk) {
+ dev_dbg(&client->dev,
+ "no clock provided, using clock-frequency property\n");
+
+ device_property_read_u32(&client->dev, "clock-frequency", &val);
+- if (val != IMX258_INPUT_CLOCK_FREQ)
+- return -EINVAL;
+- } else if (IS_ERR(imx258->clk)) {
+- return dev_err_probe(&client->dev, PTR_ERR(imx258->clk),
+- "error getting clock\n");
++ } else {
++ val = clk_get_rate(imx258->clk);
+ }
+- if (clk_get_rate(imx258->clk) != IMX258_INPUT_CLOCK_FREQ) {
++ if (val != IMX258_INPUT_CLOCK_FREQ) {
+ dev_err(&client->dev, "input clock frequency not supported\n");
+ return -EINVAL;
+ }
+diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c
+index 0dce92872176d..4d9b64c61f603 100644
+--- a/drivers/media/i2c/imx274.c
++++ b/drivers/media/i2c/imx274.c
+@@ -1367,6 +1367,10 @@ static int imx274_s_frame_interval(struct v4l2_subdev *sd,
+ int min, max, def;
+ int ret;
+
++ ret = pm_runtime_resume_and_get(&imx274->client->dev);
++ if (ret < 0)
++ return ret;
++
+ mutex_lock(&imx274->lock);
+ ret = imx274_set_frame_interval(imx274, fi->interval);
+
+@@ -1398,6 +1402,7 @@ static int imx274_s_frame_interval(struct v4l2_subdev *sd,
+
+ unlock:
+ mutex_unlock(&imx274->lock);
++ pm_runtime_put(&imx274->client->dev);
+
+ return ret;
+ }
+diff --git a/drivers/media/i2c/imx412.c b/drivers/media/i2c/imx412.c
+index be3f6ea555597..84279a6808730 100644
+--- a/drivers/media/i2c/imx412.c
++++ b/drivers/media/i2c/imx412.c
+@@ -1011,7 +1011,7 @@ static int imx412_power_on(struct device *dev)
+ struct imx412 *imx412 = to_imx412(sd);
+ int ret;
+
+- gpiod_set_value_cansleep(imx412->reset_gpio, 1);
++ gpiod_set_value_cansleep(imx412->reset_gpio, 0);
+
+ ret = clk_prepare_enable(imx412->inclk);
+ if (ret) {
+@@ -1024,7 +1024,7 @@ static int imx412_power_on(struct device *dev)
+ return 0;
+
+ error_reset:
+- gpiod_set_value_cansleep(imx412->reset_gpio, 0);
++ gpiod_set_value_cansleep(imx412->reset_gpio, 1);
+
+ return ret;
+ }
+@@ -1040,10 +1040,10 @@ static int imx412_power_off(struct device *dev)
+ struct v4l2_subdev *sd = dev_get_drvdata(dev);
+ struct imx412 *imx412 = to_imx412(sd);
+
+- gpiod_set_value_cansleep(imx412->reset_gpio, 0);
+-
+ clk_disable_unprepare(imx412->inclk);
+
++ gpiod_set_value_cansleep(imx412->reset_gpio, 1);
++
+ return 0;
+ }
+
+diff --git a/drivers/media/i2c/ir-kbd-i2c.c b/drivers/media/i2c/ir-kbd-i2c.c
+index 92376592455ee..56674173524fd 100644
+--- a/drivers/media/i2c/ir-kbd-i2c.c
++++ b/drivers/media/i2c/ir-kbd-i2c.c
+@@ -791,6 +791,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
+ rc_proto = RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC6_MCE |
+ RC_PROTO_BIT_RC6_6A_32;
+ ir_codes = RC_MAP_HAUPPAUGE;
++ ir->polling_interval = 125;
+ probe_tx = true;
+ break;
+ }
+diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c
+index e29be0242f078..f4233feb26276 100644
+--- a/drivers/media/i2c/m5mols/m5mols_core.c
++++ b/drivers/media/i2c/m5mols/m5mols_core.c
+@@ -488,7 +488,7 @@ static enum m5mols_restype __find_restype(u32 code)
+ do {
+ if (code == m5mols_default_ffmt[type].code)
+ return type;
+- } while (type++ != SIZE_DEFAULT_FFMT);
++ } while (++type != SIZE_DEFAULT_FFMT);
+
+ return 0;
+ }
+diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c
+index 1aa2c58fd38c5..404a03f48b976 100644
+--- a/drivers/media/i2c/max9286.c
++++ b/drivers/media/i2c/max9286.c
+@@ -15,6 +15,7 @@
+ #include <linux/fwnode.h>
+ #include <linux/gpio/consumer.h>
+ #include <linux/gpio/driver.h>
++#include <linux/gpio/machine.h>
+ #include <linux/i2c.h>
+ #include <linux/i2c-mux.h>
+ #include <linux/module.h>
+@@ -168,6 +169,8 @@ struct max9286_priv {
+ u32 init_rev_chan_mv;
+ u32 rev_chan_mv;
+
++ u32 gpio_poc[2];
++
+ struct v4l2_ctrl_handler ctrls;
+ struct v4l2_ctrl *pixelrate;
+
+@@ -925,6 +928,7 @@ static int max9286_v4l2_register(struct max9286_priv *priv)
+ err_put_node:
+ fwnode_handle_put(ep);
+ err_async:
++ v4l2_ctrl_handler_free(&priv->ctrls);
+ max9286_v4l2_notifier_unregister(priv);
+
+ return ret;
+@@ -933,6 +937,7 @@ err_async:
+ static void max9286_v4l2_unregister(struct max9286_priv *priv)
+ {
+ fwnode_handle_put(priv->sd.fwnode);
++ v4l2_ctrl_handler_free(&priv->ctrls);
+ v4l2_async_unregister_subdev(&priv->sd);
+ max9286_v4l2_notifier_unregister(priv);
+ }
+@@ -1026,20 +1031,27 @@ static int max9286_setup(struct max9286_priv *priv)
+ return 0;
+ }
+
+-static void max9286_gpio_set(struct gpio_chip *chip,
+- unsigned int offset, int value)
++static int max9286_gpio_set(struct max9286_priv *priv, unsigned int offset,
++ int value)
+ {
+- struct max9286_priv *priv = gpiochip_get_data(chip);
+-
+ if (value)
+ priv->gpio_state |= BIT(offset);
+ else
+ priv->gpio_state &= ~BIT(offset);
+
+- max9286_write(priv, 0x0f, MAX9286_0X0F_RESERVED | priv->gpio_state);
++ return max9286_write(priv, 0x0f,
++ MAX9286_0X0F_RESERVED | priv->gpio_state);
++}
++
++static void max9286_gpiochip_set(struct gpio_chip *chip,
++ unsigned int offset, int value)
++{
++ struct max9286_priv *priv = gpiochip_get_data(chip);
++
++ max9286_gpio_set(priv, offset, value);
+ }
+
+-static int max9286_gpio_get(struct gpio_chip *chip, unsigned int offset)
++static int max9286_gpiochip_get(struct gpio_chip *chip, unsigned int offset)
+ {
+ struct max9286_priv *priv = gpiochip_get_data(chip);
+
+@@ -1059,13 +1071,10 @@ static int max9286_register_gpio(struct max9286_priv *priv)
+ gpio->of_node = dev->of_node;
+ gpio->ngpio = 2;
+ gpio->base = -1;
+- gpio->set = max9286_gpio_set;
+- gpio->get = max9286_gpio_get;
++ gpio->set = max9286_gpiochip_set;
++ gpio->get = max9286_gpiochip_get;
+ gpio->can_sleep = true;
+
+- /* GPIO values default to high */
+- priv->gpio_state = BIT(0) | BIT(1);
+-
+ ret = devm_gpiochip_add_data(dev, gpio, priv);
+ if (ret)
+ dev_err(dev, "Unable to create gpio_chip\n");
+@@ -1073,26 +1082,83 @@ static int max9286_register_gpio(struct max9286_priv *priv)
+ return ret;
+ }
+
+-static int max9286_init(struct device *dev)
++static int max9286_parse_gpios(struct max9286_priv *priv)
+ {
+- struct max9286_priv *priv;
+- struct i2c_client *client;
++ struct device *dev = &priv->client->dev;
+ int ret;
+
+- client = to_i2c_client(dev);
+- priv = i2c_get_clientdata(client);
++ /* GPIO values default to high */
++ priv->gpio_state = BIT(0) | BIT(1);
+
+- /* Enable the bus power. */
+- ret = regulator_enable(priv->regulator);
+- if (ret < 0) {
+- dev_err(&client->dev, "Unable to turn PoC on\n");
+- return ret;
++ /*
++ * Parse the "gpio-poc" vendor property. If the property is not
++ * specified the camera power is controlled by a regulator.
++ */
++ ret = of_property_read_u32_array(dev->of_node, "maxim,gpio-poc",
++ priv->gpio_poc, 2);
++ if (ret == -EINVAL) {
++ /*
++ * If gpio lines are not used for the camera power, register
++ * a gpio controller for consumers.
++ */
++ ret = max9286_register_gpio(priv);
++ if (ret)
++ return ret;
++
++ priv->regulator = devm_regulator_get(dev, "poc");
++ if (IS_ERR(priv->regulator)) {
++ return dev_err_probe(dev, PTR_ERR(priv->regulator),
++ "Unable to get PoC regulator (%ld)\n",
++ PTR_ERR(priv->regulator));
++ }
++
++ return 0;
++ }
++
++ /* If the property is specified make sure it is well formed. */
++ if (ret || priv->gpio_poc[0] > 1 ||
++ (priv->gpio_poc[1] != GPIO_ACTIVE_HIGH &&
++ priv->gpio_poc[1] != GPIO_ACTIVE_LOW)) {
++ dev_err(dev, "Invalid 'gpio-poc' property\n");
++ return -EINVAL;
+ }
+
++ return 0;
++}
++
++static int max9286_poc_enable(struct max9286_priv *priv, bool enable)
++{
++ int ret;
++
++ /* If the regulator is not available, use gpio to control power. */
++ if (!priv->regulator)
++ ret = max9286_gpio_set(priv, priv->gpio_poc[0],
++ enable ^ priv->gpio_poc[1]);
++ else if (enable)
++ ret = regulator_enable(priv->regulator);
++ else
++ ret = regulator_disable(priv->regulator);
++
++ if (ret < 0)
++ dev_err(&priv->client->dev, "Unable to turn power %s\n",
++ enable ? "on" : "off");
++
++ return ret;
++}
++
++static int max9286_init(struct max9286_priv *priv)
++{
++ struct i2c_client *client = priv->client;
++ int ret;
++
++ ret = max9286_poc_enable(priv, true);
++ if (ret)
++ return ret;
++
+ ret = max9286_setup(priv);
+ if (ret) {
+- dev_err(dev, "Unable to setup max9286\n");
+- goto err_regulator;
++ dev_err(&client->dev, "Unable to setup max9286\n");
++ goto err_poc_disable;
+ }
+
+ /*
+@@ -1101,13 +1167,13 @@ static int max9286_init(struct device *dev)
+ */
+ ret = max9286_v4l2_register(priv);
+ if (ret) {
+- dev_err(dev, "Failed to register with V4L2\n");
+- goto err_regulator;
++ dev_err(&client->dev, "Failed to register with V4L2\n");
++ goto err_poc_disable;
+ }
+
+ ret = max9286_i2c_mux_init(priv);
+ if (ret) {
+- dev_err(dev, "Unable to initialize I2C multiplexer\n");
++ dev_err(&client->dev, "Unable to initialize I2C multiplexer\n");
+ goto err_v4l2_register;
+ }
+
+@@ -1118,8 +1184,8 @@ static int max9286_init(struct device *dev)
+
+ err_v4l2_register:
+ max9286_v4l2_unregister(priv);
+-err_regulator:
+- regulator_disable(priv->regulator);
++err_poc_disable:
++ max9286_poc_enable(priv, false);
+
+ return ret;
+ }
+@@ -1262,7 +1328,6 @@ static int max9286_probe(struct i2c_client *client)
+ mutex_init(&priv->mutex);
+
+ priv->client = client;
+- i2c_set_clientdata(client, priv);
+
+ priv->gpiod_pwdn = devm_gpiod_get_optional(&client->dev, "enable",
+ GPIOD_OUT_HIGH);
+@@ -1290,25 +1355,15 @@ static int max9286_probe(struct i2c_client *client)
+ */
+ max9286_configure_i2c(priv, false);
+
+- ret = max9286_register_gpio(priv);
++ ret = max9286_parse_gpios(priv);
+ if (ret)
+ goto err_powerdown;
+
+- priv->regulator = devm_regulator_get(&client->dev, "poc");
+- if (IS_ERR(priv->regulator)) {
+- if (PTR_ERR(priv->regulator) != -EPROBE_DEFER)
+- dev_err(&client->dev,
+- "Unable to get PoC regulator (%ld)\n",
+- PTR_ERR(priv->regulator));
+- ret = PTR_ERR(priv->regulator);
+- goto err_powerdown;
+- }
+-
+ ret = max9286_parse_dt(priv);
+ if (ret)
+ goto err_powerdown;
+
+- ret = max9286_init(&client->dev);
++ ret = max9286_init(priv);
+ if (ret < 0)
+ goto err_cleanup_dt;
+
+@@ -1324,13 +1379,13 @@ err_powerdown:
+
+ static int max9286_remove(struct i2c_client *client)
+ {
+- struct max9286_priv *priv = i2c_get_clientdata(client);
++ struct max9286_priv *priv = sd_to_max9286(i2c_get_clientdata(client));
+
+ i2c_mux_del_adapters(priv->mux);
+
+ max9286_v4l2_unregister(priv);
+
+- regulator_disable(priv->regulator);
++ max9286_poc_enable(priv, false);
+
+ gpiod_set_value_cansleep(priv->gpiod_pwdn, 0);
+
+diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
+index 6eb88ef997836..3ae1b28c8351b 100644
+--- a/drivers/media/i2c/mt9p031.c
++++ b/drivers/media/i2c/mt9p031.c
+@@ -78,7 +78,9 @@
+ #define MT9P031_PIXEL_CLOCK_INVERT (1 << 15)
+ #define MT9P031_PIXEL_CLOCK_SHIFT(n) ((n) << 8)
+ #define MT9P031_PIXEL_CLOCK_DIVIDE(n) ((n) << 0)
+-#define MT9P031_FRAME_RESTART 0x0b
++#define MT9P031_RESTART 0x0b
++#define MT9P031_FRAME_PAUSE_RESTART (1 << 1)
++#define MT9P031_FRAME_RESTART (1 << 0)
+ #define MT9P031_SHUTTER_DELAY 0x0c
+ #define MT9P031_RST 0x0d
+ #define MT9P031_RST_ENABLE 1
+@@ -444,9 +446,23 @@ static int mt9p031_set_params(struct mt9p031 *mt9p031)
+ static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable)
+ {
+ struct mt9p031 *mt9p031 = to_mt9p031(subdev);
++ struct i2c_client *client = v4l2_get_subdevdata(subdev);
++ int val;
+ int ret;
+
+ if (!enable) {
++ /* enable pause restart */
++ val = MT9P031_FRAME_PAUSE_RESTART;
++ ret = mt9p031_write(client, MT9P031_RESTART, val);
++ if (ret < 0)
++ return ret;
++
++ /* enable restart + keep pause restart set */
++ val |= MT9P031_FRAME_RESTART;
++ ret = mt9p031_write(client, MT9P031_RESTART, val);
++ if (ret < 0)
++ return ret;
++
+ /* Stop sensor readout */
+ ret = mt9p031_set_output_control(mt9p031,
+ MT9P031_OUTPUT_CONTROL_CEN, 0);
+@@ -466,6 +482,16 @@ static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable)
+ if (ret < 0)
+ return ret;
+
++ /*
++ * - clear pause restart
++ * - don't clear restart as clearing restart manually can cause
++ * undefined behavior
++ */
++ val = MT9P031_FRAME_RESTART;
++ ret = mt9p031_write(client, MT9P031_RESTART, val);
++ if (ret < 0)
++ return ret;
++
+ return mt9p031_pll_enable(mt9p031);
+ }
+
+diff --git a/drivers/media/i2c/ov2680.c b/drivers/media/i2c/ov2680.c
+index 906c711f6821b..3059d1157bac3 100644
+--- a/drivers/media/i2c/ov2680.c
++++ b/drivers/media/i2c/ov2680.c
+@@ -54,6 +54,9 @@
+ #define OV2680_WIDTH_MAX 1600
+ #define OV2680_HEIGHT_MAX 1200
+
++#define OV2680_DEFAULT_WIDTH 800
++#define OV2680_DEFAULT_HEIGHT 600
++
+ enum ov2680_mode_id {
+ OV2680_MODE_QUXGA_800_600,
+ OV2680_MODE_720P_1280_720,
+@@ -85,15 +88,8 @@ struct ov2680_mode_info {
+
+ struct ov2680_ctrls {
+ struct v4l2_ctrl_handler handler;
+- struct {
+- struct v4l2_ctrl *auto_exp;
+- struct v4l2_ctrl *exposure;
+- };
+- struct {
+- struct v4l2_ctrl *auto_gain;
+- struct v4l2_ctrl *gain;
+- };
+-
++ struct v4l2_ctrl *exposure;
++ struct v4l2_ctrl *gain;
+ struct v4l2_ctrl *hflip;
+ struct v4l2_ctrl *vflip;
+ struct v4l2_ctrl *test_pattern;
+@@ -143,6 +139,7 @@ static const struct reg_value ov2680_setting_30fps_QUXGA_800_600[] = {
+ {0x380e, 0x02}, {0x380f, 0x84}, {0x3811, 0x04}, {0x3813, 0x04},
+ {0x3814, 0x31}, {0x3815, 0x31}, {0x3820, 0xc0}, {0x4008, 0x00},
+ {0x4009, 0x03}, {0x4837, 0x1e}, {0x3501, 0x4e}, {0x3502, 0xe0},
++ {0x3503, 0x03},
+ };
+
+ static const struct reg_value ov2680_setting_30fps_720P_1280_720[] = {
+@@ -321,70 +318,62 @@ static void ov2680_power_down(struct ov2680_dev *sensor)
+ usleep_range(5000, 10000);
+ }
+
+-static int ov2680_bayer_order(struct ov2680_dev *sensor)
++static void ov2680_set_bayer_order(struct ov2680_dev *sensor,
++ struct v4l2_mbus_framefmt *fmt)
+ {
+- u32 format1;
+- u32 format2;
+- u32 hv_flip;
+- int ret;
+-
+- ret = ov2680_read_reg(sensor, OV2680_REG_FORMAT1, &format1);
+- if (ret < 0)
+- return ret;
+-
+- ret = ov2680_read_reg(sensor, OV2680_REG_FORMAT2, &format2);
+- if (ret < 0)
+- return ret;
++ int hv_flip = 0;
+
+- hv_flip = (format2 & BIT(2) << 1) | (format1 & BIT(2));
++ if (sensor->ctrls.vflip && sensor->ctrls.vflip->val)
++ hv_flip += 1;
+
+- sensor->fmt.code = ov2680_hv_flip_bayer_order[hv_flip];
++ if (sensor->ctrls.hflip && sensor->ctrls.hflip->val)
++ hv_flip += 2;
+
+- return 0;
++ fmt->code = ov2680_hv_flip_bayer_order[hv_flip];
+ }
+
+-static int ov2680_vflip_enable(struct ov2680_dev *sensor)
++static void ov2680_fill_format(struct ov2680_dev *sensor,
++ struct v4l2_mbus_framefmt *fmt,
++ unsigned int width, unsigned int height)
+ {
+- int ret;
+-
+- ret = ov2680_mod_reg(sensor, OV2680_REG_FORMAT1, BIT(2), BIT(2));
+- if (ret < 0)
+- return ret;
+-
+- return ov2680_bayer_order(sensor);
++ memset(fmt, 0, sizeof(*fmt));
++ fmt->width = width;
++ fmt->height = height;
++ fmt->field = V4L2_FIELD_NONE;
++ fmt->colorspace = V4L2_COLORSPACE_SRGB;
++ ov2680_set_bayer_order(sensor, fmt);
+ }
+
+-static int ov2680_vflip_disable(struct ov2680_dev *sensor)
++static int ov2680_set_vflip(struct ov2680_dev *sensor, s32 val)
+ {
+ int ret;
+
+- ret = ov2680_mod_reg(sensor, OV2680_REG_FORMAT1, BIT(2), BIT(0));
+- if (ret < 0)
+- return ret;
+-
+- return ov2680_bayer_order(sensor);
+-}
+-
+-static int ov2680_hflip_enable(struct ov2680_dev *sensor)
+-{
+- int ret;
++ if (sensor->is_streaming)
++ return -EBUSY;
+
+- ret = ov2680_mod_reg(sensor, OV2680_REG_FORMAT2, BIT(2), BIT(2));
++ ret = ov2680_mod_reg(sensor, OV2680_REG_FORMAT1,
++ BIT(2), val ? BIT(2) : 0);
+ if (ret < 0)
+ return ret;
+
+- return ov2680_bayer_order(sensor);
++ ov2680_set_bayer_order(sensor, &sensor->fmt);
++ return 0;
+ }
+
+-static int ov2680_hflip_disable(struct ov2680_dev *sensor)
++static int ov2680_set_hflip(struct ov2680_dev *sensor, s32 val)
+ {
+ int ret;
+
+- ret = ov2680_mod_reg(sensor, OV2680_REG_FORMAT2, BIT(2), BIT(0));
++ if (sensor->is_streaming)
++ return -EBUSY;
++
++ ret = ov2680_mod_reg(sensor, OV2680_REG_FORMAT2,
++ BIT(2), val ? BIT(2) : 0);
+ if (ret < 0)
+ return ret;
+
+- return ov2680_bayer_order(sensor);
++ ov2680_set_bayer_order(sensor, &sensor->fmt);
++ return 0;
+ }
+
+ static int ov2680_test_pattern_set(struct ov2680_dev *sensor, int value)
+@@ -405,69 +394,15 @@ static int ov2680_test_pattern_set(struct ov2680_dev *sensor, int value)
+ return 0;
+ }
+
+-static int ov2680_gain_set(struct ov2680_dev *sensor, bool auto_gain)
++static int ov2680_gain_set(struct ov2680_dev *sensor, u32 gain)
+ {
+- struct ov2680_ctrls *ctrls = &sensor->ctrls;
+- u32 gain;
+- int ret;
+-
+- ret = ov2680_mod_reg(sensor, OV2680_REG_R_MANUAL, BIT(1),
+- auto_gain ? 0 : BIT(1));
+- if (ret < 0)
+- return ret;
+-
+- if (auto_gain || !ctrls->gain->is_new)
+- return 0;
+-
+- gain = ctrls->gain->val;
+-
+- ret = ov2680_write_reg16(sensor, OV2680_REG_GAIN_PK, gain);
+-
+- return 0;
+-}
+-
+-static int ov2680_gain_get(struct ov2680_dev *sensor)
+-{
+- u32 gain;
+- int ret;
+-
+- ret = ov2680_read_reg16(sensor, OV2680_REG_GAIN_PK, &gain);
+- if (ret)
+- return ret;
+-
+- return gain;
+-}
+-
+-static int ov2680_exposure_set(struct ov2680_dev *sensor, bool auto_exp)
+-{
+- struct ov2680_ctrls *ctrls = &sensor->ctrls;
+- u32 exp;
+- int ret;
+-
+- ret = ov2680_mod_reg(sensor, OV2680_REG_R_MANUAL, BIT(0),
+- auto_exp ? 0 : BIT(0));
+- if (ret < 0)
+- return ret;
+-
+- if (auto_exp || !ctrls->exposure->is_new)
+- return 0;
+-
+- exp = (u32)ctrls->exposure->val;
+- exp <<= 4;
+-
+- return ov2680_write_reg24(sensor, OV2680_REG_EXPOSURE_PK_HIGH, exp);
++ return ov2680_write_reg16(sensor, OV2680_REG_GAIN_PK, gain);
+ }
+
+-static int ov2680_exposure_get(struct ov2680_dev *sensor)
++static int ov2680_exposure_set(struct ov2680_dev *sensor, u32 exp)
+ {
+- int ret;
+- u32 exp;
+-
+- ret = ov2680_read_reg24(sensor, OV2680_REG_EXPOSURE_PK_HIGH, &exp);
+- if (ret)
+- return ret;
+-
+- return exp >> 4;
++ return ov2680_write_reg24(sensor, OV2680_REG_EXPOSURE_PK_HIGH,
++ exp << 4);
+ }
+
+ static int ov2680_stream_enable(struct ov2680_dev *sensor)
+@@ -482,33 +417,17 @@ static int ov2680_stream_disable(struct ov2680_dev *sensor)
+
+ static int ov2680_mode_set(struct ov2680_dev *sensor)
+ {
+- struct ov2680_ctrls *ctrls = &sensor->ctrls;
+ int ret;
+
+- ret = ov2680_gain_set(sensor, false);
+- if (ret < 0)
+- return ret;
+-
+- ret = ov2680_exposure_set(sensor, false);
++ ret = ov2680_load_regs(sensor, sensor->current_mode);
+ if (ret < 0)
+ return ret;
+
+- ret = ov2680_load_regs(sensor, sensor->current_mode);
++ /* Restore value of all ctrls */
++ ret = __v4l2_ctrl_handler_setup(&sensor->ctrls.handler);
+ if (ret < 0)
+ return ret;
+
+- if (ctrls->auto_gain->val) {
+- ret = ov2680_gain_set(sensor, true);
+- if (ret < 0)
+- return ret;
+- }
+-
+- if (ctrls->auto_exp->val == V4L2_EXPOSURE_AUTO) {
+- ret = ov2680_exposure_set(sensor, true);
+- if (ret < 0)
+- return ret;
+- }
+-
+ sensor->mode_pending_changes = false;
+
+ return 0;
+@@ -556,7 +475,7 @@ static int ov2680_power_on(struct ov2680_dev *sensor)
+ ret = ov2680_write_reg(sensor, OV2680_REG_SOFT_RESET, 0x01);
+ if (ret != 0) {
+ dev_err(dev, "sensor soft reset failed\n");
+- return ret;
++ goto err_disable_regulators;
+ }
+ usleep_range(1000, 2000);
+ } else {
+@@ -566,7 +485,7 @@ static int ov2680_power_on(struct ov2680_dev *sensor)
+
+ ret = clk_prepare_enable(sensor->xvclk);
+ if (ret < 0)
+- return ret;
++ goto err_disable_regulators;
+
+ sensor->is_enabled = true;
+
+@@ -576,6 +495,10 @@ static int ov2680_power_on(struct ov2680_dev *sensor)
+ ov2680_stream_disable(sensor);
+
+ return 0;
++
++err_disable_regulators:
++ regulator_bulk_disable(OV2680_NUM_SUPPLIES, sensor->supplies);
++ return ret;
+ }
+
+ static int ov2680_s_power(struct v4l2_subdev *sd, int on)
+@@ -590,15 +513,10 @@ static int ov2680_s_power(struct v4l2_subdev *sd, int on)
+ else
+ ret = ov2680_power_off(sensor);
+
+- mutex_unlock(&sensor->lock);
+-
+- if (on && ret == 0) {
+- ret = v4l2_ctrl_handler_setup(&sensor->ctrls.handler);
+- if (ret < 0)
+- return ret;
+-
++ if (on && ret == 0)
+ ret = ov2680_mode_restore(sensor);
+- }
++
++ mutex_unlock(&sensor->lock);
+
+ return ret;
+ }
+@@ -664,7 +582,6 @@ static int ov2680_get_fmt(struct v4l2_subdev *sd,
+ {
+ struct ov2680_dev *sensor = to_ov2680_dev(sd);
+ struct v4l2_mbus_framefmt *fmt = NULL;
+- int ret = 0;
+
+ if (format->pad != 0)
+ return -EINVAL;
+@@ -672,22 +589,17 @@ static int ov2680_get_fmt(struct v4l2_subdev *sd,
+ mutex_lock(&sensor->lock);
+
+ if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
+-#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
+ fmt = v4l2_subdev_get_try_format(&sensor->sd, sd_state,
+ format->pad);
+-#else
+- ret = -EINVAL;
+-#endif
+ } else {
+ fmt = &sensor->fmt;
+ }
+
+- if (fmt)
+- format->format = *fmt;
++ format->format = *fmt;
+
+ mutex_unlock(&sensor->lock);
+
+- return ret;
++ return 0;
+ }
+
+ static int ov2680_set_fmt(struct v4l2_subdev *sd,
+@@ -695,43 +607,35 @@ static int ov2680_set_fmt(struct v4l2_subdev *sd,
+ struct v4l2_subdev_format *format)
+ {
+ struct ov2680_dev *sensor = to_ov2680_dev(sd);
+- struct v4l2_mbus_framefmt *fmt = &format->format;
+-#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
+ struct v4l2_mbus_framefmt *try_fmt;
+-#endif
+ const struct ov2680_mode_info *mode;
+ int ret = 0;
+
+ if (format->pad != 0)
+ return -EINVAL;
+
+- mutex_lock(&sensor->lock);
+-
+- if (sensor->is_streaming) {
+- ret = -EBUSY;
+- goto unlock;
+- }
+-
+ mode = v4l2_find_nearest_size(ov2680_mode_data,
+- ARRAY_SIZE(ov2680_mode_data), width,
+- height, fmt->width, fmt->height);
+- if (!mode) {
+- ret = -EINVAL;
+- goto unlock;
+- }
++ ARRAY_SIZE(ov2680_mode_data),
++ width, height,
++ format->format.width,
++ format->format.height);
++ if (!mode)
++ return -EINVAL;
++
++ ov2680_fill_format(sensor, &format->format, mode->width, mode->height);
+
+ if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
+-#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
+ try_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
+- format->format = *try_fmt;
+-#endif
+- goto unlock;
++ *try_fmt = format->format;
++ return 0;
+ }
+
+- fmt->width = mode->width;
+- fmt->height = mode->height;
+- fmt->code = sensor->fmt.code;
+- fmt->colorspace = sensor->fmt.colorspace;
++ mutex_lock(&sensor->lock);
++
++ if (sensor->is_streaming) {
++ ret = -EBUSY;
++ goto unlock;
++ }
+
+ sensor->current_mode = mode;
+ sensor->fmt = format->format;
+@@ -746,16 +650,11 @@ unlock:
+ static int ov2680_init_cfg(struct v4l2_subdev *sd,
+ struct v4l2_subdev_state *sd_state)
+ {
+- struct v4l2_subdev_format fmt = {
+- .which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
+- : V4L2_SUBDEV_FORMAT_ACTIVE,
+- .format = {
+- .width = 800,
+- .height = 600,
+- }
+- };
++ struct ov2680_dev *sensor = to_ov2680_dev(sd);
+
+- return ov2680_set_fmt(sd, sd_state, &fmt);
++ ov2680_fill_format(sensor, &sd_state->pads[0].try_fmt,
++ OV2680_DEFAULT_WIDTH, OV2680_DEFAULT_HEIGHT);
++ return 0;
+ }
+
+ static int ov2680_enum_frame_size(struct v4l2_subdev *sd,
+@@ -794,66 +693,23 @@ static int ov2680_enum_frame_interval(struct v4l2_subdev *sd,
+ return 0;
+ }
+
+-static int ov2680_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
+-{
+- struct v4l2_subdev *sd = ctrl_to_sd(ctrl);
+- struct ov2680_dev *sensor = to_ov2680_dev(sd);
+- struct ov2680_ctrls *ctrls = &sensor->ctrls;
+- int val;
+-
+- if (!sensor->is_enabled)
+- return 0;
+-
+- switch (ctrl->id) {
+- case V4L2_CID_GAIN:
+- val = ov2680_gain_get(sensor);
+- if (val < 0)
+- return val;
+- ctrls->gain->val = val;
+- break;
+- case V4L2_CID_EXPOSURE:
+- val = ov2680_exposure_get(sensor);
+- if (val < 0)
+- return val;
+- ctrls->exposure->val = val;
+- break;
+- }
+-
+- return 0;
+-}
+-
+ static int ov2680_s_ctrl(struct v4l2_ctrl *ctrl)
+ {
+ struct v4l2_subdev *sd = ctrl_to_sd(ctrl);
+ struct ov2680_dev *sensor = to_ov2680_dev(sd);
+- struct ov2680_ctrls *ctrls = &sensor->ctrls;
+
+ if (!sensor->is_enabled)
+ return 0;
+
+ switch (ctrl->id) {
+- case V4L2_CID_AUTOGAIN:
+- return ov2680_gain_set(sensor, !!ctrl->val);
+ case V4L2_CID_GAIN:
+- return ov2680_gain_set(sensor, !!ctrls->auto_gain->val);
+- case V4L2_CID_EXPOSURE_AUTO:
+- return ov2680_exposure_set(sensor, !!ctrl->val);
++ return ov2680_gain_set(sensor, ctrl->val);
+ case V4L2_CID_EXPOSURE:
+- return ov2680_exposure_set(sensor, !!ctrls->auto_exp->val);
++ return ov2680_exposure_set(sensor, ctrl->val);
+ case V4L2_CID_VFLIP:
+- if (sensor->is_streaming)
+- return -EBUSY;
+- if (ctrl->val)
+- return ov2680_vflip_enable(sensor);
+- else
+- return ov2680_vflip_disable(sensor);
++ return ov2680_set_vflip(sensor, ctrl->val);
+ case V4L2_CID_HFLIP:
+- if (sensor->is_streaming)
+- return -EBUSY;
+- if (ctrl->val)
+- return ov2680_hflip_enable(sensor);
+- else
+- return ov2680_hflip_disable(sensor);
++ return ov2680_set_hflip(sensor, ctrl->val);
+ case V4L2_CID_TEST_PATTERN:
+ return ov2680_test_pattern_set(sensor, ctrl->val);
+ default:
+@@ -864,7 +720,6 @@ static int ov2680_s_ctrl(struct v4l2_ctrl *ctrl)
+ }
+
+ static const struct v4l2_ctrl_ops ov2680_ctrl_ops = {
+- .g_volatile_ctrl = ov2680_g_volatile_ctrl,
+ .s_ctrl = ov2680_s_ctrl,
+ };
+
+@@ -898,11 +753,8 @@ static int ov2680_mode_init(struct ov2680_dev *sensor)
+ const struct ov2680_mode_info *init_mode;
+
+ /* set initial mode */
+- sensor->fmt.code = MEDIA_BUS_FMT_SBGGR10_1X10;
+- sensor->fmt.width = 800;
+- sensor->fmt.height = 600;
+- sensor->fmt.field = V4L2_FIELD_NONE;
+- sensor->fmt.colorspace = V4L2_COLORSPACE_SRGB;
++ ov2680_fill_format(sensor, &sensor->fmt,
++ OV2680_DEFAULT_WIDTH, OV2680_DEFAULT_HEIGHT);
+
+ sensor->frame_interval.denominator = OV2680_FRAME_RATE;
+ sensor->frame_interval.numerator = 1;
+@@ -926,9 +778,7 @@ static int ov2680_v4l2_register(struct ov2680_dev *sensor)
+ v4l2_i2c_subdev_init(&sensor->sd, sensor->i2c_client,
+ &ov2680_subdev_ops);
+
+-#ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
+ sensor->sd.flags = V4L2_SUBDEV_FL_HAS_DEVNODE;
+-#endif
+ sensor->pad.flags = MEDIA_PAD_FL_SOURCE;
+ sensor->sd.entity.function = MEDIA_ENT_F_CAM_SENSOR;
+
+@@ -936,7 +786,7 @@ static int ov2680_v4l2_register(struct ov2680_dev *sensor)
+ if (ret < 0)
+ return ret;
+
+- v4l2_ctrl_handler_init(hdl, 7);
++ v4l2_ctrl_handler_init(hdl, 5);
+
+ hdl->lock = &sensor->lock;
+
+@@ -948,16 +798,9 @@ static int ov2680_v4l2_register(struct ov2680_dev *sensor)
+ ARRAY_SIZE(test_pattern_menu) - 1,
+ 0, 0, test_pattern_menu);
+
+- ctrls->auto_exp = v4l2_ctrl_new_std_menu(hdl, ops,
+- V4L2_CID_EXPOSURE_AUTO,
+- V4L2_EXPOSURE_MANUAL, 0,
+- V4L2_EXPOSURE_AUTO);
+-
+ ctrls->exposure = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_EXPOSURE,
+ 0, 32767, 1, 0);
+
+- ctrls->auto_gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTOGAIN,
+- 0, 1, 1, 1);
+ ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAIN, 0, 2047, 1, 0);
+
+ if (hdl->error) {
+@@ -965,11 +808,8 @@ static int ov2680_v4l2_register(struct ov2680_dev *sensor)
+ goto cleanup_entity;
+ }
+
+- ctrls->gain->flags |= V4L2_CTRL_FLAG_VOLATILE;
+- ctrls->exposure->flags |= V4L2_CTRL_FLAG_VOLATILE;
+-
+- v4l2_ctrl_auto_cluster(2, &ctrls->auto_gain, 0, true);
+- v4l2_ctrl_auto_cluster(2, &ctrls->auto_exp, 1, true);
++ ctrls->vflip->flags |= V4L2_CTRL_FLAG_MODIFY_LAYOUT;
++ ctrls->hflip->flags |= V4L2_CTRL_FLAG_MODIFY_LAYOUT;
+
+ sensor->sd.ctrl_handler = hdl;
+
+diff --git a/drivers/media/i2c/ov2740.c b/drivers/media/i2c/ov2740.c
+index 934c9d65cb097..4b1ab3e07910e 100644
+--- a/drivers/media/i2c/ov2740.c
++++ b/drivers/media/i2c/ov2740.c
+@@ -603,8 +603,10 @@ static int ov2740_init_controls(struct ov2740 *ov2740)
+ V4L2_CID_TEST_PATTERN,
+ ARRAY_SIZE(ov2740_test_pattern_menu) - 1,
+ 0, 0, ov2740_test_pattern_menu);
+- if (ctrl_hdlr->error)
++ if (ctrl_hdlr->error) {
++ v4l2_ctrl_handler_free(ctrl_hdlr);
+ return ctrl_hdlr->error;
++ }
+
+ ov2740->sd.ctrl_handler = ctrl_hdlr;
+
+diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c
+index ddbd71394db33..13144e87f47a1 100644
+--- a/drivers/media/i2c/ov5640.c
++++ b/drivers/media/i2c/ov5640.c
+@@ -1968,9 +1968,9 @@ static int ov5640_set_power_mipi(struct ov5640_dev *sensor, bool on)
+ * "ov5640_set_stream_mipi()")
+ * [4] = 0 : Power up MIPI HS Tx
+ * [3] = 0 : Power up MIPI LS Rx
+- * [2] = 0 : MIPI interface disabled
++ * [2] = 1 : MIPI interface enabled
+ */
+- ret = ov5640_write_reg(sensor, OV5640_REG_IO_MIPI_CTRL00, 0x40);
++ ret = ov5640_write_reg(sensor, OV5640_REG_IO_MIPI_CTRL00, 0x44);
+ if (ret)
+ return ret;
+
+@@ -2293,7 +2293,6 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
+ struct ov5640_dev *sensor = to_ov5640_dev(sd);
+ const struct ov5640_mode_info *new_mode;
+ struct v4l2_mbus_framefmt *mbus_fmt = &format->format;
+- struct v4l2_mbus_framefmt *fmt;
+ int ret;
+
+ if (format->pad != 0)
+@@ -2311,12 +2310,10 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
+ if (ret)
+ goto out;
+
+- if (format->which == V4L2_SUBDEV_FORMAT_TRY)
+- fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
+- else
+- fmt = &sensor->fmt;
+-
+- *fmt = *mbus_fmt;
++ if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
++ *v4l2_subdev_get_try_format(sd, sd_state, 0) = *mbus_fmt;
++ goto out;
++ }
+
+ if (new_mode != sensor->current_mode) {
+ sensor->current_mode = new_mode;
+@@ -2325,6 +2322,9 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
+ if (mbus_fmt->code != sensor->fmt.code)
+ sensor->pending_fmt_change = true;
+
++ /* update format even if code is unchanged, resolution might change */
++ sensor->fmt = *mbus_fmt;
++
+ __v4l2_ctrl_s_ctrl_int64(sensor->ctrls.pixel_rate,
+ ov5640_calc_pixel_rate(sensor));
+ out:
+@@ -2776,7 +2776,7 @@ static int ov5640_init_controls(struct ov5640_dev *sensor)
+ /* Auto/manual gain */
+ ctrls->auto_gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTOGAIN,
+ 0, 1, 1, 1);
+- ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAIN,
++ ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_ANALOGUE_GAIN,
+ 0, 1023, 1, 0);
+
+ ctrls->saturation = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_SATURATION,
+diff --git a/drivers/media/i2c/ov5648.c b/drivers/media/i2c/ov5648.c
+index 947d437ed0efe..bb3666fc56183 100644
+--- a/drivers/media/i2c/ov5648.c
++++ b/drivers/media/i2c/ov5648.c
+@@ -639,7 +639,7 @@ struct ov5648_ctrls {
+ struct v4l2_ctrl *pixel_rate;
+
+ struct v4l2_ctrl_handler handler;
+-} __packed;
++};
+
+ struct ov5648_sensor {
+ struct device *dev;
+@@ -1778,8 +1778,14 @@ static int ov5648_state_configure(struct ov5648_sensor *sensor,
+
+ static int ov5648_state_init(struct ov5648_sensor *sensor)
+ {
+- return ov5648_state_configure(sensor, &ov5648_modes[0],
+- ov5648_mbus_codes[0]);
++ int ret;
++
++ mutex_lock(&sensor->mutex);
++ ret = ov5648_state_configure(sensor, &ov5648_modes[0],
++ ov5648_mbus_codes[0]);
++ mutex_unlock(&sensor->mutex);
++
++ return ret;
+ }
+
+ /* Sensor Base */
+@@ -2492,9 +2498,9 @@ static int ov5648_probe(struct i2c_client *client)
+
+ /* DOVDD: digital I/O */
+ sensor->dovdd = devm_regulator_get(dev, "dovdd");
+- if (IS_ERR(sensor->dvdd)) {
++ if (IS_ERR(sensor->dovdd)) {
+ dev_err(dev, "cannot get DOVDD (digital I/O) regulator\n");
+- ret = PTR_ERR(sensor->dvdd);
++ ret = PTR_ERR(sensor->dovdd);
+ goto error_endpoint;
+ }
+
+diff --git a/drivers/media/i2c/ov5675.c b/drivers/media/i2c/ov5675.c
+index da5850b7ad07f..2104589dd4343 100644
+--- a/drivers/media/i2c/ov5675.c
++++ b/drivers/media/i2c/ov5675.c
+@@ -791,8 +791,10 @@ static int ov5675_init_controls(struct ov5675 *ov5675)
+ v4l2_ctrl_new_std(ctrl_hdlr, &ov5675_ctrl_ops,
+ V4L2_CID_VFLIP, 0, 1, 1, 0);
+
+- if (ctrl_hdlr->error)
++ if (ctrl_hdlr->error) {
++ v4l2_ctrl_handler_free(ctrl_hdlr);
+ return ctrl_hdlr->error;
++ }
+
+ ov5675->sd.ctrl_handler = ctrl_hdlr;
+
+diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c
+index f67412150b16b..eb59dc8bb5929 100644
+--- a/drivers/media/i2c/ov6650.c
++++ b/drivers/media/i2c/ov6650.c
+@@ -472,9 +472,16 @@ static int ov6650_get_selection(struct v4l2_subdev *sd,
+ {
+ struct i2c_client *client = v4l2_get_subdevdata(sd);
+ struct ov6650 *priv = to_ov6650(client);
++ struct v4l2_rect *rect;
+
+- if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE)
+- return -EINVAL;
++ if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
++ /* pre-select try crop rectangle */
++ rect = &sd_state->pads->try_crop;
++
++ } else {
++ /* pre-select active crop rectangle */
++ rect = &priv->rect;
++ }
+
+ switch (sel->target) {
+ case V4L2_SEL_TGT_CROP_BOUNDS:
+@@ -483,14 +490,33 @@ static int ov6650_get_selection(struct v4l2_subdev *sd,
+ sel->r.width = W_CIF;
+ sel->r.height = H_CIF;
+ return 0;
++
+ case V4L2_SEL_TGT_CROP:
+- sel->r = priv->rect;
++ /* use selected crop rectangle */
++ sel->r = *rect;
+ return 0;
++
+ default:
+ return -EINVAL;
+ }
+ }
+
++static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect)
++{
++ return width > rect->width >> 1 || height > rect->height >> 1;
++}
++
++static void ov6650_bind_align_crop_rectangle(struct v4l2_rect *rect)
++{
++ v4l_bound_align_image(&rect->width, 2, W_CIF, 1,
++ &rect->height, 2, H_CIF, 1, 0);
++ v4l_bound_align_image(&rect->left, DEF_HSTRT << 1,
++ (DEF_HSTRT << 1) + W_CIF - (__s32)rect->width, 1,
++ &rect->top, DEF_VSTRT << 1,
++ (DEF_VSTRT << 1) + H_CIF - (__s32)rect->height,
++ 1, 0);
++}
++
+ static int ov6650_set_selection(struct v4l2_subdev *sd,
+ struct v4l2_subdev_state *sd_state,
+ struct v4l2_subdev_selection *sel)
+@@ -499,18 +525,30 @@ static int ov6650_set_selection(struct v4l2_subdev *sd,
+ struct ov6650 *priv = to_ov6650(client);
+ int ret;
+
+- if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE ||
+- sel->target != V4L2_SEL_TGT_CROP)
++ if (sel->target != V4L2_SEL_TGT_CROP)
+ return -EINVAL;
+
+- v4l_bound_align_image(&sel->r.width, 2, W_CIF, 1,
+- &sel->r.height, 2, H_CIF, 1, 0);
+- v4l_bound_align_image(&sel->r.left, DEF_HSTRT << 1,
+- (DEF_HSTRT << 1) + W_CIF - (__s32)sel->r.width, 1,
+- &sel->r.top, DEF_VSTRT << 1,
+- (DEF_VSTRT << 1) + H_CIF - (__s32)sel->r.height,
+- 1, 0);
++ ov6650_bind_align_crop_rectangle(&sel->r);
++
++ if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
++ struct v4l2_rect *crop = &sd_state->pads->try_crop;
++ struct v4l2_mbus_framefmt *mf = &sd_state->pads->try_fmt;
++ /* detect current pad config scaling factor */
++ bool half_scale = !is_unscaled_ok(mf->width, mf->height, crop);
++
++ /* store new crop rectangle */
++ *crop = sel->r;
+
++ /* adjust frame size */
++ mf->width = crop->width >> half_scale;
++ mf->height = crop->height >> half_scale;
++
++ return 0;
++ }
++
++ /* V4L2_SUBDEV_FORMAT_ACTIVE */
++
++ /* apply new crop rectangle */
+ ret = ov6650_reg_write(client, REG_HSTRT, sel->r.left >> 1);
+ if (!ret) {
+ priv->rect.width += priv->rect.left - sel->r.left;
+@@ -562,30 +600,13 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd,
+ return 0;
+ }
+
+-static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect)
+-{
+- return width > rect->width >> 1 || height > rect->height >> 1;
+-}
+-
+ #define to_clkrc(div) ((div) - 1)
+
+ /* set the format we will capture in */
+-static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf)
++static int ov6650_s_fmt(struct v4l2_subdev *sd, u32 code, bool half_scale)
+ {
+ struct i2c_client *client = v4l2_get_subdevdata(sd);
+ struct ov6650 *priv = to_ov6650(client);
+- bool half_scale = !is_unscaled_ok(mf->width, mf->height, &priv->rect);
+- struct v4l2_subdev_selection sel = {
+- .which = V4L2_SUBDEV_FORMAT_ACTIVE,
+- .target = V4L2_SEL_TGT_CROP,
+- .r.left = priv->rect.left + (priv->rect.width >> 1) -
+- (mf->width >> (1 - half_scale)),
+- .r.top = priv->rect.top + (priv->rect.height >> 1) -
+- (mf->height >> (1 - half_scale)),
+- .r.width = mf->width << half_scale,
+- .r.height = mf->height << half_scale,
+- };
+- u32 code = mf->code;
+ u8 coma_set = 0, coma_mask = 0, coml_set, coml_mask;
+ int ret;
+
+@@ -653,9 +674,7 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf)
+ coma_mask |= COMA_QCIF;
+ }
+
+- ret = ov6650_set_selection(sd, NULL, &sel);
+- if (!ret)
+- ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask);
++ ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask);
+ if (!ret) {
+ priv->half_scale = half_scale;
+
+@@ -674,14 +693,12 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd,
+ struct v4l2_mbus_framefmt *mf = &format->format;
+ struct i2c_client *client = v4l2_get_subdevdata(sd);
+ struct ov6650 *priv = to_ov6650(client);
++ struct v4l2_rect *crop;
++ bool half_scale;
+
+ if (format->pad)
+ return -EINVAL;
+
+- if (is_unscaled_ok(mf->width, mf->height, &priv->rect))
+- v4l_bound_align_image(&mf->width, 2, W_CIF, 1,
+- &mf->height, 2, H_CIF, 1, 0);
+-
+ switch (mf->code) {
+ case MEDIA_BUS_FMT_Y10_1X10:
+ mf->code = MEDIA_BUS_FMT_Y8_1X8;
+@@ -699,10 +716,17 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd,
+ break;
+ }
+
++ if (format->which == V4L2_SUBDEV_FORMAT_TRY)
++ crop = &sd_state->pads->try_crop;
++ else
++ crop = &priv->rect;
++
++ half_scale = !is_unscaled_ok(mf->width, mf->height, crop);
++
+ if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
+- /* store media bus format code and frame size in pad config */
+- sd_state->pads->try_fmt.width = mf->width;
+- sd_state->pads->try_fmt.height = mf->height;
++ /* store new mbus frame format code and size in pad config */
++ sd_state->pads->try_fmt.width = crop->width >> half_scale;
++ sd_state->pads->try_fmt.height = crop->height >> half_scale;
+ sd_state->pads->try_fmt.code = mf->code;
+
+ /* return default mbus frame format updated with pad config */
+@@ -712,9 +736,11 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd,
+ mf->code = sd_state->pads->try_fmt.code;
+
+ } else {
+- /* apply new media bus format code and frame size */
+- int ret = ov6650_s_fmt(sd, mf);
++ int ret = 0;
+
++ /* apply new media bus frame format and scaling if changed */
++ if (mf->code != priv->code || half_scale != priv->half_scale)
++ ret = ov6650_s_fmt(sd, mf->code, half_scale);
+ if (ret)
+ return ret;
+
+@@ -890,9 +916,8 @@ static int ov6650_video_probe(struct v4l2_subdev *sd)
+ if (!ret)
+ ret = ov6650_prog_dflt(client, xclk->clkrc);
+ if (!ret) {
+- struct v4l2_mbus_framefmt mf = ov6650_def_fmt;
+-
+- ret = ov6650_s_fmt(sd, &mf);
++ /* driver default frame format, no scaling */
++ ret = ov6650_s_fmt(sd, ov6650_def_fmt.code, false);
+ }
+ if (!ret)
+ ret = v4l2_ctrl_handler_setup(&priv->hdl);
+diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c
+index 1967464231160..23001ede138c1 100644
+--- a/drivers/media/i2c/ov7670.c
++++ b/drivers/media/i2c/ov7670.c
+@@ -1841,7 +1841,7 @@ static int ov7670_parse_dt(struct device *dev,
+
+ if (bus_cfg.bus_type != V4L2_MBUS_PARALLEL) {
+ dev_err(dev, "Unsupported media bus type\n");
+- return ret;
++ return -EINVAL;
+ }
+ info->mbus_config = bus_cfg.bus.parallel.flags;
+
+@@ -2017,7 +2017,6 @@ static int ov7670_remove(struct i2c_client *client)
+ v4l2_async_unregister_subdev(sd);
+ v4l2_ctrl_handler_free(&info->hdl);
+ media_entity_cleanup(&info->sd.entity);
+- ov7670_power_off(sd);
+ return 0;
+ }
+
+diff --git a/drivers/media/i2c/ov772x.c b/drivers/media/i2c/ov772x.c
+index 78602a2f70b0f..e05b48c90faed 100644
+--- a/drivers/media/i2c/ov772x.c
++++ b/drivers/media/i2c/ov772x.c
+@@ -1462,7 +1462,7 @@ static int ov772x_probe(struct i2c_client *client)
+ priv->subdev.ctrl_handler = &priv->hdl;
+ if (priv->hdl.error) {
+ ret = priv->hdl.error;
+- goto error_mutex_destroy;
++ goto error_ctrl_free;
+ }
+
+ priv->clk = clk_get(&client->dev, NULL);
+@@ -1515,7 +1515,6 @@ error_clk_put:
+ clk_put(priv->clk);
+ error_ctrl_free:
+ v4l2_ctrl_handler_free(&priv->hdl);
+-error_mutex_destroy:
+ mutex_destroy(&priv->lock);
+
+ return ret;
+diff --git a/drivers/media/i2c/ov8865.c b/drivers/media/i2c/ov8865.c
+index ce50f3ea87b8e..92f6c3a940cfb 100644
+--- a/drivers/media/i2c/ov8865.c
++++ b/drivers/media/i2c/ov8865.c
+@@ -2330,27 +2330,27 @@ static int ov8865_sensor_power(struct ov8865_sensor *sensor, bool on)
+ if (ret) {
+ dev_err(sensor->dev,
+ "failed to enable DOVDD regulator\n");
+- goto disable;
++ return ret;
+ }
+
+ ret = regulator_enable(sensor->avdd);
+ if (ret) {
+ dev_err(sensor->dev,
+ "failed to enable AVDD regulator\n");
+- goto disable;
++ goto disable_dovdd;
+ }
+
+ ret = regulator_enable(sensor->dvdd);
+ if (ret) {
+ dev_err(sensor->dev,
+ "failed to enable DVDD regulator\n");
+- goto disable;
++ goto disable_avdd;
+ }
+
+ ret = clk_prepare_enable(sensor->extclk);
+ if (ret) {
+ dev_err(sensor->dev, "failed to enable EXTCLK clock\n");
+- goto disable;
++ goto disable_dvdd;
+ }
+
+ gpiod_set_value_cansleep(sensor->reset, 0);
+@@ -2359,14 +2359,16 @@ static int ov8865_sensor_power(struct ov8865_sensor *sensor, bool on)
+ /* Time to enter streaming mode according to power timings. */
+ usleep_range(10000, 12000);
+ } else {
+-disable:
+ gpiod_set_value_cansleep(sensor->powerdown, 1);
+ gpiod_set_value_cansleep(sensor->reset, 1);
+
+ clk_disable_unprepare(sensor->extclk);
+
++disable_dvdd:
+ regulator_disable(sensor->dvdd);
++disable_avdd:
+ regulator_disable(sensor->avdd);
++disable_dovdd:
+ regulator_disable(sensor->dovdd);
+ }
+
+@@ -2891,14 +2893,16 @@ static int ov8865_probe(struct i2c_client *client)
+ if (ret)
+ goto error_mutex;
+
++ mutex_lock(&sensor->mutex);
+ ret = ov8865_state_init(sensor);
++ mutex_unlock(&sensor->mutex);
+ if (ret)
+ goto error_ctrls;
+
+ /* Runtime PM */
+
+- pm_runtime_enable(sensor->dev);
+ pm_runtime_set_suspended(sensor->dev);
++ pm_runtime_enable(sensor->dev);
+
+ /* V4L2 subdev register */
+
+diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
+index 025a610de8935..9c6f66cab5642 100644
+--- a/drivers/media/i2c/rdacm20.c
++++ b/drivers/media/i2c/rdacm20.c
+@@ -611,7 +611,7 @@ static int rdacm20_probe(struct i2c_client *client)
+ goto error_free_ctrls;
+
+ dev->pad.flags = MEDIA_PAD_FL_SOURCE;
+- dev->sd.entity.flags |= MEDIA_ENT_F_CAM_SENSOR;
++ dev->sd.entity.function = MEDIA_ENT_F_CAM_SENSOR;
+ ret = media_entity_pads_init(&dev->sd.entity, 1, &dev->pad);
+ if (ret < 0)
+ goto error_free_ctrls;
+diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c
+index 12ec5467ed1ee..7995cb956aa7f 100644
+--- a/drivers/media/i2c/rdacm21.c
++++ b/drivers/media/i2c/rdacm21.c
+@@ -351,7 +351,7 @@ static void ov10640_power_up(struct rdacm21_device *dev)
+ static int ov10640_check_id(struct rdacm21_device *dev)
+ {
+ unsigned int i;
+- u8 val;
++ u8 val = 0;
+
+ /* Read OV10640 ID to test communications. */
+ for (i = 0; i < OV10640_PID_TIMEOUT; ++i) {
+@@ -583,7 +583,7 @@ static int rdacm21_probe(struct i2c_client *client)
+ goto error_free_ctrls;
+
+ dev->pad.flags = MEDIA_PAD_FL_SOURCE;
+- dev->sd.entity.flags |= MEDIA_ENT_F_CAM_SENSOR;
++ dev->sd.entity.function = MEDIA_ENT_F_CAM_SENSOR;
+ ret = media_entity_pads_init(&dev->sd.entity, 1, &dev->pad);
+ if (ret < 0)
+ goto error_free_ctrls;
+diff --git a/drivers/media/i2c/st-mipid02.c b/drivers/media/i2c/st-mipid02.c
+index f630b88cbfaa9..cf55c57a79707 100644
+--- a/drivers/media/i2c/st-mipid02.c
++++ b/drivers/media/i2c/st-mipid02.c
+@@ -710,8 +710,13 @@ static void mipid02_set_fmt_source(struct v4l2_subdev *sd,
+ {
+ struct mipid02_dev *bridge = to_mipid02_dev(sd);
+
+- /* source pad mirror active sink pad */
+- format->format = bridge->fmt;
++ /* source pad mirror sink pad */
++ if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE)
++ format->format = bridge->fmt;
++ else
++ format->format = *v4l2_subdev_get_try_format(sd, sd_state,
++ MIPID02_SINK_0);
++
+ /* but code may need to be converted */
+ format->format.code = serial_to_parallel_code(format->format.code);
+
+diff --git a/drivers/media/i2c/tda1997x.c b/drivers/media/i2c/tda1997x.c
+index 6070aaf0b32ea..4dafa9f1cf522 100644
+--- a/drivers/media/i2c/tda1997x.c
++++ b/drivers/media/i2c/tda1997x.c
+@@ -1248,13 +1248,13 @@ tda1997x_parse_infoframe(struct tda1997x_state *state, u16 addr)
+ {
+ struct v4l2_subdev *sd = &state->sd;
+ union hdmi_infoframe frame;
+- u8 buffer[40];
++ u8 buffer[40] = { 0 };
+ u8 reg;
+ int len, err;
+
+ /* read data */
+ len = io_readn(sd, addr, sizeof(buffer), buffer);
+- err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer));
++ err = hdmi_infoframe_unpack(&frame, buffer, len);
+ if (err) {
+ v4l_err(state->client,
+ "failed parsing %d byte infoframe: 0x%04x/0x%02x\n",
+@@ -1928,13 +1928,13 @@ static int tda1997x_log_infoframe(struct v4l2_subdev *sd, int addr)
+ {
+ struct tda1997x_state *state = to_state(sd);
+ union hdmi_infoframe frame;
+- u8 buffer[40];
++ u8 buffer[40] = { 0 };
+ int len, err;
+
+ /* read data */
+ len = io_readn(sd, addr, sizeof(buffer), buffer);
+ v4l2_dbg(1, debug, sd, "infoframe: addr=%d len=%d\n", addr, len);
+- err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer));
++ err = hdmi_infoframe_unpack(&frame, buffer, len);
+ if (err) {
+ v4l_err(state->client,
+ "failed parsing %d byte infoframe: 0x%04x/0x%02x\n",
+diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c
+index 4b16ffcaef98a..b0dc21ba25c31 100644
+--- a/drivers/media/i2c/tvp5150.c
++++ b/drivers/media/i2c/tvp5150.c
+@@ -2066,6 +2066,10 @@ static int tvp5150_parse_dt(struct tvp5150 *decoder, struct device_node *np)
+ tvpc->ent.name = devm_kasprintf(dev, GFP_KERNEL, "%s %s",
+ v4l2c->name, v4l2c->label ?
+ v4l2c->label : "");
++ if (!tvpc->ent.name) {
++ ret = -ENOMEM;
++ goto err_free;
++ }
+ }
+
+ ep_np = of_graph_get_endpoint_by_regs(np, TVP5150_PAD_VID_OUT, 0);
+diff --git a/drivers/media/pci/b2c2/flexcop-pci.c b/drivers/media/pci/b2c2/flexcop-pci.c
+index 6a4c7cb0ad0f9..486c8ec0fa60d 100644
+--- a/drivers/media/pci/b2c2/flexcop-pci.c
++++ b/drivers/media/pci/b2c2/flexcop-pci.c
+@@ -185,6 +185,8 @@ static irqreturn_t flexcop_pci_isr(int irq, void *dev_id)
+ dma_addr_t cur_addr =
+ fc->read_ibi_reg(fc,dma1_008).dma_0x8.dma_cur_addr << 2;
+ u32 cur_pos = cur_addr - fc_pci->dma[0].dma_addr0;
++ if (cur_pos > fc_pci->dma[0].size * 2)
++ goto error;
+
+ deb_irq("%u irq: %08x cur_addr: %llx: cur_pos: %08x, last_cur_pos: %08x ",
+ jiffies_to_usecs(jiffies - fc_pci->last_irq),
+@@ -225,6 +227,7 @@ static irqreturn_t flexcop_pci_isr(int irq, void *dev_id)
+ ret = IRQ_NONE;
+ }
+
++error:
+ spin_unlock_irqrestore(&fc_pci->irq_lock, flags);
+ return ret;
+ }
+diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
+index 0e9df8b35ac66..661ebfa7bf3f5 100644
+--- a/drivers/media/pci/bt8xx/bttv-driver.c
++++ b/drivers/media/pci/bt8xx/bttv-driver.c
+@@ -3890,7 +3890,7 @@ static int bttv_register_video(struct bttv *btv)
+
+ /* video */
+ vdev_init(btv, &btv->video_dev, &bttv_video_template, "video");
+- btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_TUNER |
++ btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE |
+ V4L2_CAP_READWRITE | V4L2_CAP_STREAMING;
+ if (btv->tuner_type != TUNER_ABSENT)
+ btv->video_dev.device_caps |= V4L2_CAP_TUNER;
+@@ -3911,7 +3911,7 @@ static int bttv_register_video(struct bttv *btv)
+ /* vbi */
+ vdev_init(btv, &btv->vbi_dev, &bttv_video_template, "vbi");
+ btv->vbi_dev.device_caps = V4L2_CAP_VBI_CAPTURE | V4L2_CAP_READWRITE |
+- V4L2_CAP_STREAMING | V4L2_CAP_TUNER;
++ V4L2_CAP_STREAMING;
+ if (btv->tuner_type != TUNER_ABSENT)
+ btv->vbi_dev.device_caps |= V4L2_CAP_TUNER;
+
+diff --git a/drivers/media/pci/bt8xx/dst.c b/drivers/media/pci/bt8xx/dst.c
+index 3e52a51982d76..110651e478314 100644
+--- a/drivers/media/pci/bt8xx/dst.c
++++ b/drivers/media/pci/bt8xx/dst.c
+@@ -1722,7 +1722,7 @@ struct dst_state *dst_attach(struct dst_state *state, struct dvb_adapter *dvb_ad
+ return state; /* Manu (DST is a card not a frontend) */
+ }
+
+-EXPORT_SYMBOL(dst_attach);
++EXPORT_SYMBOL_GPL(dst_attach);
+
+ static const struct dvb_frontend_ops dst_dvbt_ops = {
+ .delsys = { SYS_DVBT },
+diff --git a/drivers/media/pci/bt8xx/dst_ca.c b/drivers/media/pci/bt8xx/dst_ca.c
+index 85fcdc59f0d18..571392d80ccc6 100644
+--- a/drivers/media/pci/bt8xx/dst_ca.c
++++ b/drivers/media/pci/bt8xx/dst_ca.c
+@@ -668,7 +668,7 @@ struct dvb_device *dst_ca_attach(struct dst_state *dst, struct dvb_adapter *dvb_
+ return NULL;
+ }
+
+-EXPORT_SYMBOL(dst_ca_attach);
++EXPORT_SYMBOL_GPL(dst_ca_attach);
+
+ MODULE_DESCRIPTION("DST DVB-S/T/C Combo CA driver");
+ MODULE_AUTHOR("Manu Abraham");
+diff --git a/drivers/media/pci/cx23885/cx23885-alsa.c b/drivers/media/pci/cx23885/cx23885-alsa.c
+index ab14d35214aa8..25dc8d4dc5b73 100644
+--- a/drivers/media/pci/cx23885/cx23885-alsa.c
++++ b/drivers/media/pci/cx23885/cx23885-alsa.c
+@@ -550,7 +550,7 @@ struct cx23885_audio_dev *cx23885_audio_register(struct cx23885_dev *dev)
+ SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+ THIS_MODULE, sizeof(struct cx23885_audio_dev), &card);
+ if (err < 0)
+- goto error;
++ goto error_msg;
+
+ chip = (struct cx23885_audio_dev *) card->private_data;
+ chip->dev = dev;
+@@ -576,6 +576,7 @@ struct cx23885_audio_dev *cx23885_audio_register(struct cx23885_dev *dev)
+
+ error:
+ snd_card_free(card);
++error_msg:
+ pr_err("%s(): Failed to register analog audio adapter\n",
+ __func__);
+
+diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
+index f8f2ff3b00c37..8019cdf5dbae5 100644
+--- a/drivers/media/pci/cx23885/cx23885-core.c
++++ b/drivers/media/pci/cx23885/cx23885-core.c
+@@ -1325,7 +1325,9 @@ void cx23885_free_buffer(struct cx23885_dev *dev, struct cx23885_buffer *buf)
+ {
+ struct cx23885_riscmem *risc = &buf->risc;
+
+- dma_free_coherent(&dev->pci->dev, risc->size, risc->cpu, risc->dma);
++ if (risc->cpu)
++ dma_free_coherent(&dev->pci->dev, risc->size, risc->cpu, risc->dma);
++ memset(risc, 0, sizeof(*risc));
+ }
+
+ static void cx23885_tsport_reg_dump(struct cx23885_tsport *port)
+@@ -2165,7 +2167,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
+ err = dma_set_mask(&pci_dev->dev, 0xffffffff);
+ if (err) {
+ pr_err("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
+- goto fail_ctrl;
++ goto fail_dma_set_mask;
+ }
+
+ err = request_irq(pci_dev->irq, cx23885_irq,
+@@ -2173,7 +2175,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
+ if (err < 0) {
+ pr_err("%s: can't get IRQ %d\n",
+ dev->name, pci_dev->irq);
+- goto fail_irq;
++ goto fail_dma_set_mask;
+ }
+
+ switch (dev->board) {
+@@ -2195,7 +2197,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
+
+ return 0;
+
+-fail_irq:
++fail_dma_set_mask:
+ cx23885_dev_unregister(dev);
+ fail_ctrl:
+ v4l2_ctrl_handler_free(hdl);
+diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c
+index 45c2f4afceb82..9b437faf2c3f6 100644
+--- a/drivers/media/pci/cx23885/cx23885-dvb.c
++++ b/drivers/media/pci/cx23885/cx23885-dvb.c
+@@ -2459,16 +2459,10 @@ static int dvb_register(struct cx23885_tsport *port)
+ request_module("%s", info.type);
+ client_tuner = i2c_new_client_device(&dev->i2c_bus[1].i2c_adap, &info);
+ if (!i2c_client_has_driver(client_tuner)) {
+- module_put(client_demod->dev.driver->owner);
+- i2c_unregister_device(client_demod);
+- port->i2c_client_demod = NULL;
+ goto frontend_detach;
+ }
+ if (!try_module_get(client_tuner->dev.driver->owner)) {
+ i2c_unregister_device(client_tuner);
+- module_put(client_demod->dev.driver->owner);
+- i2c_unregister_device(client_demod);
+- port->i2c_client_demod = NULL;
+ goto frontend_detach;
+ }
+ port->i2c_client_tuner = client_tuner;
+@@ -2505,16 +2499,10 @@ static int dvb_register(struct cx23885_tsport *port)
+ request_module("%s", info.type);
+ client_tuner = i2c_new_client_device(&dev->i2c_bus[1].i2c_adap, &info);
+ if (!i2c_client_has_driver(client_tuner)) {
+- module_put(client_demod->dev.driver->owner);
+- i2c_unregister_device(client_demod);
+- port->i2c_client_demod = NULL;
+ goto frontend_detach;
+ }
+ if (!try_module_get(client_tuner->dev.driver->owner)) {
+ i2c_unregister_device(client_tuner);
+- module_put(client_demod->dev.driver->owner);
+- i2c_unregister_device(client_demod);
+- port->i2c_client_demod = NULL;
+ goto frontend_detach;
+ }
+ port->i2c_client_tuner = client_tuner;
+diff --git a/drivers/media/pci/cx23885/cx23885-video.c b/drivers/media/pci/cx23885/cx23885-video.c
+index a380e0920a21f..b01499f810697 100644
+--- a/drivers/media/pci/cx23885/cx23885-video.c
++++ b/drivers/media/pci/cx23885/cx23885-video.c
+@@ -342,6 +342,7 @@ static int queue_setup(struct vb2_queue *q,
+
+ static int buffer_prepare(struct vb2_buffer *vb)
+ {
++ int ret;
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+ struct cx23885_dev *dev = vb->vb2_queue->drv_priv;
+ struct cx23885_buffer *buf =
+@@ -358,12 +359,12 @@ static int buffer_prepare(struct vb2_buffer *vb)
+
+ switch (dev->field) {
+ case V4L2_FIELD_TOP:
+- cx23885_risc_buffer(dev->pci, &buf->risc,
++ ret = cx23885_risc_buffer(dev->pci, &buf->risc,
+ sgt->sgl, 0, UNSET,
+ buf->bpl, 0, dev->height);
+ break;
+ case V4L2_FIELD_BOTTOM:
+- cx23885_risc_buffer(dev->pci, &buf->risc,
++ ret = cx23885_risc_buffer(dev->pci, &buf->risc,
+ sgt->sgl, UNSET, 0,
+ buf->bpl, 0, dev->height);
+ break;
+@@ -391,21 +392,21 @@ static int buffer_prepare(struct vb2_buffer *vb)
+ line0_offset = 0;
+ line1_offset = buf->bpl;
+ }
+- cx23885_risc_buffer(dev->pci, &buf->risc,
++ ret = cx23885_risc_buffer(dev->pci, &buf->risc,
+ sgt->sgl, line0_offset,
+ line1_offset,
+ buf->bpl, buf->bpl,
+ dev->height >> 1);
+ break;
+ case V4L2_FIELD_SEQ_TB:
+- cx23885_risc_buffer(dev->pci, &buf->risc,
++ ret = cx23885_risc_buffer(dev->pci, &buf->risc,
+ sgt->sgl,
+ 0, buf->bpl * (dev->height >> 1),
+ buf->bpl, 0,
+ dev->height >> 1);
+ break;
+ case V4L2_FIELD_SEQ_BT:
+- cx23885_risc_buffer(dev->pci, &buf->risc,
++ ret = cx23885_risc_buffer(dev->pci, &buf->risc,
+ sgt->sgl,
+ buf->bpl * (dev->height >> 1), 0,
+ buf->bpl, 0,
+@@ -418,7 +419,7 @@ static int buffer_prepare(struct vb2_buffer *vb)
+ buf, buf->vb.vb2_buf.index,
+ dev->width, dev->height, dev->fmt->depth, dev->fmt->fourcc,
+ (unsigned long)buf->risc.dma);
+- return 0;
++ return ret;
+ }
+
+ static void buffer_finish(struct vb2_buffer *vb)
+diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c
+index 40c10ca94defe..a4192e80e9a07 100644
+--- a/drivers/media/pci/cx25821/cx25821-core.c
++++ b/drivers/media/pci/cx25821/cx25821-core.c
+@@ -1339,11 +1339,11 @@ static void cx25821_finidev(struct pci_dev *pci_dev)
+ struct cx25821_dev *dev = get_cx25821(v4l2_dev);
+
+ cx25821_shutdown(dev);
+- pci_disable_device(pci_dev);
+
+ /* unregister stuff */
+ if (pci_dev->irq)
+ free_irq(pci_dev->irq, dev);
++ pci_disable_device(pci_dev);
+
+ cx25821_dev_unregister(dev);
+ v4l2_device_unregister(v4l2_dev);
+diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c
+index 680e1e3fe89b7..2c1d5137ac470 100644
+--- a/drivers/media/pci/cx88/cx88-mpeg.c
++++ b/drivers/media/pci/cx88/cx88-mpeg.c
+@@ -162,6 +162,9 @@ int cx8802_start_dma(struct cx8802_dev *dev,
+ cx_write(MO_TS_GPCNTRL, GP_COUNT_CONTROL_RESET);
+ q->count = 0;
+
++ /* clear interrupt status register */
++ cx_write(MO_TS_INTSTAT, 0x1f1111);
++
+ /* enable irqs */
+ dprintk(1, "setting the interrupt mask\n");
+ cx_set(MO_PCI_INTMSK, core->pci_irqmask | PCI_INT_TSINT);
+diff --git a/drivers/media/pci/cx88/cx88-vbi.c b/drivers/media/pci/cx88/cx88-vbi.c
+index a075788c64d45..469aeaa725ad9 100644
+--- a/drivers/media/pci/cx88/cx88-vbi.c
++++ b/drivers/media/pci/cx88/cx88-vbi.c
+@@ -144,11 +144,10 @@ static int buffer_prepare(struct vb2_buffer *vb)
+ return -EINVAL;
+ vb2_set_plane_payload(vb, 0, size);
+
+- cx88_risc_buffer(dev->pci, &buf->risc, sgt->sgl,
+- 0, VBI_LINE_LENGTH * lines,
+- VBI_LINE_LENGTH, 0,
+- lines);
+- return 0;
++ return cx88_risc_buffer(dev->pci, &buf->risc, sgt->sgl,
++ 0, VBI_LINE_LENGTH * lines,
++ VBI_LINE_LENGTH, 0,
++ lines);
+ }
+
+ static void buffer_finish(struct vb2_buffer *vb)
+diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
+index c17ad9f7d822b..99c55109c1e01 100644
+--- a/drivers/media/pci/cx88/cx88-video.c
++++ b/drivers/media/pci/cx88/cx88-video.c
+@@ -431,6 +431,7 @@ static int queue_setup(struct vb2_queue *q,
+
+ static int buffer_prepare(struct vb2_buffer *vb)
+ {
++ int ret;
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+ struct cx8800_dev *dev = vb->vb2_queue->drv_priv;
+ struct cx88_core *core = dev->core;
+@@ -445,35 +446,35 @@ static int buffer_prepare(struct vb2_buffer *vb)
+
+ switch (core->field) {
+ case V4L2_FIELD_TOP:
+- cx88_risc_buffer(dev->pci, &buf->risc,
+- sgt->sgl, 0, UNSET,
+- buf->bpl, 0, core->height);
++ ret = cx88_risc_buffer(dev->pci, &buf->risc,
++ sgt->sgl, 0, UNSET,
++ buf->bpl, 0, core->height);
+ break;
+ case V4L2_FIELD_BOTTOM:
+- cx88_risc_buffer(dev->pci, &buf->risc,
+- sgt->sgl, UNSET, 0,
+- buf->bpl, 0, core->height);
++ ret = cx88_risc_buffer(dev->pci, &buf->risc,
++ sgt->sgl, UNSET, 0,
++ buf->bpl, 0, core->height);
+ break;
+ case V4L2_FIELD_SEQ_TB:
+- cx88_risc_buffer(dev->pci, &buf->risc,
+- sgt->sgl,
+- 0, buf->bpl * (core->height >> 1),
+- buf->bpl, 0,
+- core->height >> 1);
++ ret = cx88_risc_buffer(dev->pci, &buf->risc,
++ sgt->sgl,
++ 0, buf->bpl * (core->height >> 1),
++ buf->bpl, 0,
++ core->height >> 1);
+ break;
+ case V4L2_FIELD_SEQ_BT:
+- cx88_risc_buffer(dev->pci, &buf->risc,
+- sgt->sgl,
+- buf->bpl * (core->height >> 1), 0,
+- buf->bpl, 0,
+- core->height >> 1);
++ ret = cx88_risc_buffer(dev->pci, &buf->risc,
++ sgt->sgl,
++ buf->bpl * (core->height >> 1), 0,
++ buf->bpl, 0,
++ core->height >> 1);
+ break;
+ case V4L2_FIELD_INTERLACED:
+ default:
+- cx88_risc_buffer(dev->pci, &buf->risc,
+- sgt->sgl, 0, buf->bpl,
+- buf->bpl, buf->bpl,
+- core->height >> 1);
++ ret = cx88_risc_buffer(dev->pci, &buf->risc,
++ sgt->sgl, 0, buf->bpl,
++ buf->bpl, buf->bpl,
++ core->height >> 1);
+ break;
+ }
+ dprintk(2,
+@@ -481,7 +482,7 @@ static int buffer_prepare(struct vb2_buffer *vb)
+ buf, buf->vb.vb2_buf.index, __func__,
+ core->width, core->height, dev->fmt->depth, dev->fmt->fourcc,
+ (unsigned long)buf->risc.dma);
+- return 0;
++ return ret;
+ }
+
+ static void buffer_finish(struct vb2_buffer *vb)
+diff --git a/drivers/media/pci/ddbridge/ddbridge-dummy-fe.c b/drivers/media/pci/ddbridge/ddbridge-dummy-fe.c
+index 6868a0c4fc82a..520ebd16b0c44 100644
+--- a/drivers/media/pci/ddbridge/ddbridge-dummy-fe.c
++++ b/drivers/media/pci/ddbridge/ddbridge-dummy-fe.c
+@@ -112,7 +112,7 @@ struct dvb_frontend *ddbridge_dummy_fe_qam_attach(void)
+ state->frontend.demodulator_priv = state;
+ return &state->frontend;
+ }
+-EXPORT_SYMBOL(ddbridge_dummy_fe_qam_attach);
++EXPORT_SYMBOL_GPL(ddbridge_dummy_fe_qam_attach);
+
+ static const struct dvb_frontend_ops ddbridge_dummy_fe_qam_ops = {
+ .delsys = { SYS_DVBC_ANNEX_A },
+diff --git a/drivers/media/pci/dm1105/dm1105.c b/drivers/media/pci/dm1105/dm1105.c
+index 4ac645a56c14e..9e9c7c071accc 100644
+--- a/drivers/media/pci/dm1105/dm1105.c
++++ b/drivers/media/pci/dm1105/dm1105.c
+@@ -1176,6 +1176,7 @@ static void dm1105_remove(struct pci_dev *pdev)
+ struct dvb_demux *dvbdemux = &dev->demux;
+ struct dmx_demux *dmx = &dvbdemux->dmx;
+
++ cancel_work_sync(&dev->ir.work);
+ dm1105_ir_exit(dev);
+ dmx->close(dmx);
+ dvb_net_release(&dev->dvbnet);
+diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
+index 47db0ee0fcbfa..3a8af3936e93a 100644
+--- a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
++++ b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
+@@ -1851,6 +1851,9 @@ static void cio2_pci_remove(struct pci_dev *pci_dev)
+ v4l2_device_unregister(&cio2->v4l2_dev);
+ media_device_cleanup(&cio2->media_dev);
+ mutex_destroy(&cio2->lock);
++
++ pm_runtime_forbid(&pci_dev->dev);
++ pm_runtime_get_noresume(&pci_dev->dev);
+ }
+
+ static int __maybe_unused cio2_runtime_suspend(struct device *dev)
+diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h
+index 4cf92dee65271..ce3a7ca51736e 100644
+--- a/drivers/media/pci/ivtv/ivtv-driver.h
++++ b/drivers/media/pci/ivtv/ivtv-driver.h
+@@ -330,7 +330,6 @@ struct ivtv_stream {
+ struct ivtv *itv; /* for ease of use */
+ const char *name; /* name of the stream */
+ int type; /* stream type */
+- u32 caps; /* V4L2 capabilities */
+
+ struct v4l2_fh *fh; /* pointer to the streaming filehandle */
+ spinlock_t qlock; /* locks access to the queues */
+diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c
+index da19b2e95e6cf..6d2d3348e3215 100644
+--- a/drivers/media/pci/ivtv/ivtv-ioctl.c
++++ b/drivers/media/pci/ivtv/ivtv-ioctl.c
+@@ -438,7 +438,7 @@ static int ivtv_g_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2_f
+ struct ivtv_stream *s = &itv->streams[fh2id(fh)->type];
+ struct v4l2_window *winfmt = &fmt->fmt.win;
+
+- if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
++ if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
+ return -EINVAL;
+ if (!itv->osd_video_pbase)
+ return -EINVAL;
+@@ -549,7 +549,7 @@ static int ivtv_try_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2
+ u32 chromakey = fmt->fmt.win.chromakey;
+ u8 global_alpha = fmt->fmt.win.global_alpha;
+
+- if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
++ if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
+ return -EINVAL;
+ if (!itv->osd_video_pbase)
+ return -EINVAL;
+@@ -1383,7 +1383,7 @@ static int ivtv_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *fb)
+ 0,
+ };
+
+- if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
++ if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
+ return -ENOTTY;
+ if (!itv->osd_video_pbase)
+ return -ENOTTY;
+@@ -1450,7 +1450,7 @@ static int ivtv_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffe
+ struct ivtv_stream *s = &itv->streams[fh2id(fh)->type];
+ struct yuv_playback_info *yi = &itv->yuv_info;
+
+- if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
++ if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
+ return -ENOTTY;
+ if (!itv->osd_video_pbase)
+ return -ENOTTY;
+@@ -1470,7 +1470,7 @@ static int ivtv_overlay(struct file *file, void *fh, unsigned int on)
+ struct ivtv *itv = id->itv;
+ struct ivtv_stream *s = &itv->streams[fh2id(fh)->type];
+
+- if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
++ if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY))
+ return -ENOTTY;
+ if (!itv->osd_video_pbase)
+ return -ENOTTY;
+diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c
+index f04ee84bab5fd..f9de5d1605fe3 100644
+--- a/drivers/media/pci/ivtv/ivtv-streams.c
++++ b/drivers/media/pci/ivtv/ivtv-streams.c
+@@ -176,7 +176,7 @@ static void ivtv_stream_init(struct ivtv *itv, int type)
+ s->itv = itv;
+ s->type = type;
+ s->name = ivtv_stream_info[type].name;
+- s->caps = ivtv_stream_info[type].v4l2_caps;
++ s->vdev.device_caps = ivtv_stream_info[type].v4l2_caps;
+
+ if (ivtv_stream_info[type].pio)
+ s->dma = PCI_DMA_NONE;
+@@ -299,12 +299,9 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
+ if (s_mpg->vdev.v4l2_dev)
+ num = s_mpg->vdev.num + ivtv_stream_info[type].num_offset;
+ }
+- s->vdev.device_caps = s->caps;
+- if (itv->osd_video_pbase) {
+- itv->streams[IVTV_DEC_STREAM_TYPE_YUV].vdev.device_caps |=
+- V4L2_CAP_VIDEO_OUTPUT_OVERLAY;
+- itv->streams[IVTV_DEC_STREAM_TYPE_MPG].vdev.device_caps |=
+- V4L2_CAP_VIDEO_OUTPUT_OVERLAY;
++ if (itv->osd_video_pbase && (type == IVTV_DEC_STREAM_TYPE_YUV ||
++ type == IVTV_DEC_STREAM_TYPE_MPG)) {
++ s->vdev.device_caps |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY;
+ itv->v4l2_cap |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY;
+ }
+ video_set_drvdata(&s->vdev, s);
+diff --git a/drivers/media/pci/ivtv/ivtvfb.c b/drivers/media/pci/ivtv/ivtvfb.c
+index e2d56dca5be40..5ad03b2a50bdb 100644
+--- a/drivers/media/pci/ivtv/ivtvfb.c
++++ b/drivers/media/pci/ivtv/ivtvfb.c
+@@ -36,7 +36,7 @@
+ #include <linux/fb.h>
+ #include <linux/ivtvfb.h>
+
+-#ifdef CONFIG_X86_64
++#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+ #include <asm/memtype.h>
+ #endif
+
+@@ -1157,7 +1157,7 @@ static int ivtvfb_init_card(struct ivtv *itv)
+ {
+ int rc;
+
+-#ifdef CONFIG_X86_64
++#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+ if (pat_enabled()) {
+ if (ivtvfb_force_pat) {
+ pr_info("PAT is enabled. Write-combined framebuffer caching will be disabled.\n");
+diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
+index 6f3125c2d0976..7c5061953ee82 100644
+--- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
++++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
+@@ -258,19 +258,24 @@ static irqreturn_t netup_unidvb_isr(int irq, void *dev_id)
+ if ((reg40 & AVL_IRQ_ASSERTED) != 0) {
+ /* IRQ is being signaled */
+ reg_isr = readw(ndev->bmmio0 + REG_ISR);
+- if (reg_isr & NETUP_UNIDVB_IRQ_I2C0) {
+- iret = netup_i2c_interrupt(&ndev->i2c[0]);
+- } else if (reg_isr & NETUP_UNIDVB_IRQ_I2C1) {
+- iret = netup_i2c_interrupt(&ndev->i2c[1]);
+- } else if (reg_isr & NETUP_UNIDVB_IRQ_SPI) {
++ if (reg_isr & NETUP_UNIDVB_IRQ_SPI)
+ iret = netup_spi_interrupt(ndev->spi);
+- } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA1) {
+- iret = netup_dma_interrupt(&ndev->dma[0]);
+- } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA2) {
+- iret = netup_dma_interrupt(&ndev->dma[1]);
+- } else if (reg_isr & NETUP_UNIDVB_IRQ_CI) {
+- iret = netup_ci_interrupt(ndev);
++ else if (!ndev->old_fw) {
++ if (reg_isr & NETUP_UNIDVB_IRQ_I2C0) {
++ iret = netup_i2c_interrupt(&ndev->i2c[0]);
++ } else if (reg_isr & NETUP_UNIDVB_IRQ_I2C1) {
++ iret = netup_i2c_interrupt(&ndev->i2c[1]);
++ } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA1) {
++ iret = netup_dma_interrupt(&ndev->dma[0]);
++ } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA2) {
++ iret = netup_dma_interrupt(&ndev->dma[1]);
++ } else if (reg_isr & NETUP_UNIDVB_IRQ_CI) {
++ iret = netup_ci_interrupt(ndev);
++ } else {
++ goto err;
++ }
+ } else {
++err:
+ dev_err(&pci_dev->dev,
+ "%s(): unknown interrupt 0x%x\n",
+ __func__, reg_isr);
+@@ -692,7 +697,7 @@ static void netup_unidvb_dma_fini(struct netup_unidvb_dev *ndev, int num)
+ netup_unidvb_dma_enable(dma, 0);
+ msleep(50);
+ cancel_work_sync(&dma->work);
+- del_timer(&dma->timeout);
++ del_timer_sync(&dma->timeout);
+ }
+
+ static int netup_unidvb_dma_setup(struct netup_unidvb_dev *ndev)
+@@ -882,12 +887,7 @@ static int netup_unidvb_initdev(struct pci_dev *pci_dev,
+ ndev->lmmio0, (u32)pci_resource_len(pci_dev, 0),
+ ndev->lmmio1, (u32)pci_resource_len(pci_dev, 1),
+ pci_dev->irq);
+- if (request_irq(pci_dev->irq, netup_unidvb_isr, IRQF_SHARED,
+- "netup_unidvb", pci_dev) < 0) {
+- dev_err(&pci_dev->dev,
+- "%s(): can't get IRQ %d\n", __func__, pci_dev->irq);
+- goto irq_request_err;
+- }
++
+ ndev->dma_size = 2 * 188 *
+ NETUP_DMA_BLOCKS_COUNT * NETUP_DMA_PACKETS_COUNT;
+ ndev->dma_virt = dma_alloc_coherent(&pci_dev->dev,
+@@ -928,6 +928,14 @@ static int netup_unidvb_initdev(struct pci_dev *pci_dev,
+ dev_err(&pci_dev->dev, "netup_unidvb: DMA setup failed\n");
+ goto dma_setup_err;
+ }
++
++ if (request_irq(pci_dev->irq, netup_unidvb_isr, IRQF_SHARED,
++ "netup_unidvb", pci_dev) < 0) {
++ dev_err(&pci_dev->dev,
++ "%s(): can't get IRQ %d\n", __func__, pci_dev->irq);
++ goto dma_setup_err;
++ }
++
+ dev_info(&pci_dev->dev,
+ "netup_unidvb: device has been initialized\n");
+ return 0;
+@@ -946,8 +954,6 @@ spi_setup_err:
+ dma_free_coherent(&pci_dev->dev, ndev->dma_size,
+ ndev->dma_virt, ndev->dma_phys);
+ dma_alloc_err:
+- free_irq(pci_dev->irq, pci_dev);
+-irq_request_err:
+ iounmap(ndev->lmmio1);
+ pci_bar1_error:
+ iounmap(ndev->lmmio0);
+diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c
+index fb24d2ed3621b..d3cde05a6ebab 100644
+--- a/drivers/media/pci/saa7134/saa7134-alsa.c
++++ b/drivers/media/pci/saa7134/saa7134-alsa.c
+@@ -1214,7 +1214,7 @@ static int alsa_device_exit(struct saa7134_dev *dev)
+
+ static int saa7134_alsa_init(void)
+ {
+- struct saa7134_dev *dev = NULL;
++ struct saa7134_dev *dev;
+
+ saa7134_dmasound_init = alsa_device_init;
+ saa7134_dmasound_exit = alsa_device_exit;
+@@ -1229,7 +1229,7 @@ static int saa7134_alsa_init(void)
+ alsa_device_init(dev);
+ }
+
+- if (dev == NULL)
++ if (list_empty(&saa7134_devlist))
+ pr_info("saa7134 ALSA: no saa7134 cards found\n");
+
+ return 0;
+diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c
+index 96328b0af1641..cf2871306987c 100644
+--- a/drivers/media/pci/saa7134/saa7134-core.c
++++ b/drivers/media/pci/saa7134/saa7134-core.c
+@@ -978,7 +978,7 @@ static void saa7134_unregister_video(struct saa7134_dev *dev)
+ }
+ if (dev->radio_dev) {
+ if (video_is_registered(dev->radio_dev))
+- vb2_video_unregister_device(dev->radio_dev);
++ video_unregister_device(dev->radio_dev);
+ else
+ video_device_release(dev->radio_dev);
+ dev->radio_dev = NULL;
+diff --git a/drivers/media/pci/saa7134/saa7134-ts.c b/drivers/media/pci/saa7134/saa7134-ts.c
+index 6a5053126237f..437dbe5e75e29 100644
+--- a/drivers/media/pci/saa7134/saa7134-ts.c
++++ b/drivers/media/pci/saa7134/saa7134-ts.c
+@@ -300,6 +300,7 @@ int saa7134_ts_start(struct saa7134_dev *dev)
+
+ int saa7134_ts_fini(struct saa7134_dev *dev)
+ {
++ del_timer_sync(&dev->ts_q.timeout);
+ saa7134_pgtable_free(dev->pci, &dev->ts_q.pt);
+ return 0;
+ }
+diff --git a/drivers/media/pci/saa7134/saa7134-vbi.c b/drivers/media/pci/saa7134/saa7134-vbi.c
+index 3f0b0933eed69..3e773690468bd 100644
+--- a/drivers/media/pci/saa7134/saa7134-vbi.c
++++ b/drivers/media/pci/saa7134/saa7134-vbi.c
+@@ -185,6 +185,7 @@ int saa7134_vbi_init1(struct saa7134_dev *dev)
+ int saa7134_vbi_fini(struct saa7134_dev *dev)
+ {
+ /* nothing */
++ del_timer_sync(&dev->vbi_q.timeout);
+ return 0;
+ }
+
+diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c
+index 374c8e1087de1..81bb9a3671953 100644
+--- a/drivers/media/pci/saa7134/saa7134-video.c
++++ b/drivers/media/pci/saa7134/saa7134-video.c
+@@ -2153,6 +2153,7 @@ int saa7134_video_init1(struct saa7134_dev *dev)
+
+ void saa7134_video_fini(struct saa7134_dev *dev)
+ {
++ del_timer_sync(&dev->video_q.timeout);
+ /* free stuff */
+ saa7134_pgtable_free(dev->pci, &dev->video_q.pt);
+ saa7134_pgtable_free(dev->pci, &dev->vbi_q.pt);
+diff --git a/drivers/media/pci/saa7146/hexium_gemini.c b/drivers/media/pci/saa7146/hexium_gemini.c
+index 2214c74bbbf15..3947701cd6c7e 100644
+--- a/drivers/media/pci/saa7146/hexium_gemini.c
++++ b/drivers/media/pci/saa7146/hexium_gemini.c
+@@ -284,7 +284,12 @@ static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_d
+ hexium_set_input(hexium, 0);
+ hexium->cur_input = 0;
+
+- saa7146_vv_init(dev, &vv_data);
++ ret = saa7146_vv_init(dev, &vv_data);
++ if (ret) {
++ i2c_del_adapter(&hexium->i2c_adapter);
++ kfree(hexium);
++ return ret;
++ }
+
+ vv_data.vid_ops.vidioc_enum_input = vidioc_enum_input;
+ vv_data.vid_ops.vidioc_g_input = vidioc_g_input;
+diff --git a/drivers/media/pci/saa7146/hexium_orion.c b/drivers/media/pci/saa7146/hexium_orion.c
+index 39d14c179d229..2eb4bee16b71f 100644
+--- a/drivers/media/pci/saa7146/hexium_orion.c
++++ b/drivers/media/pci/saa7146/hexium_orion.c
+@@ -355,10 +355,16 @@ static struct saa7146_ext_vv vv_data;
+ static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data *info)
+ {
+ struct hexium *hexium = (struct hexium *) dev->ext_priv;
++ int ret;
+
+ DEB_EE("\n");
+
+- saa7146_vv_init(dev, &vv_data);
++ ret = saa7146_vv_init(dev, &vv_data);
++ if (ret) {
++ pr_err("Error in saa7146_vv_init()\n");
++ return ret;
++ }
++
+ vv_data.vid_ops.vidioc_enum_input = vidioc_enum_input;
+ vv_data.vid_ops.vidioc_g_input = vidioc_g_input;
+ vv_data.vid_ops.vidioc_s_input = vidioc_s_input;
+diff --git a/drivers/media/pci/saa7146/mxb.c b/drivers/media/pci/saa7146/mxb.c
+index 73fc901ecf3db..bf0b9b0914cd5 100644
+--- a/drivers/media/pci/saa7146/mxb.c
++++ b/drivers/media/pci/saa7146/mxb.c
+@@ -683,10 +683,16 @@ static struct saa7146_ext_vv vv_data;
+ static int mxb_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data *info)
+ {
+ struct mxb *mxb;
++ int ret;
+
+ DEB_EE("dev:%p\n", dev);
+
+- saa7146_vv_init(dev, &vv_data);
++ ret = saa7146_vv_init(dev, &vv_data);
++ if (ret) {
++ ERR("Error in saa7146_vv_init()");
++ return ret;
++ }
++
+ if (mxb_probe(dev)) {
+ saa7146_vv_release(dev);
+ return -1;
+diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c
+index 7973ae42873a6..c10997e2271d2 100644
+--- a/drivers/media/pci/saa7164/saa7164-core.c
++++ b/drivers/media/pci/saa7164/saa7164-core.c
+@@ -1259,7 +1259,7 @@ static int saa7164_initdev(struct pci_dev *pci_dev,
+
+ if (saa7164_dev_setup(dev) < 0) {
+ err = -EINVAL;
+- goto fail_free;
++ goto fail_dev;
+ }
+
+ /* print pci info */
+@@ -1427,6 +1427,8 @@ fail_fw:
+
+ fail_irq:
+ saa7164_dev_unregister(dev);
++fail_dev:
++ pci_disable_device(pci_dev);
+ fail_free:
+ v4l2_device_unregister(&dev->v4l2_dev);
+ kfree(dev);
+diff --git a/drivers/media/pci/solo6x10/solo6x10-core.c b/drivers/media/pci/solo6x10/solo6x10-core.c
+index 4a546eeefe38f..6d87fbb0ee04a 100644
+--- a/drivers/media/pci/solo6x10/solo6x10-core.c
++++ b/drivers/media/pci/solo6x10/solo6x10-core.c
+@@ -420,6 +420,7 @@ static int solo_sysfs_init(struct solo_dev *solo_dev)
+ solo_dev->nr_chans);
+
+ if (device_register(dev)) {
++ put_device(dev);
+ dev->parent = NULL;
+ return -ENOMEM;
+ }
+diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
+index fe94944d05317..0d1120abc6471 100644
+--- a/drivers/media/pci/tw68/tw68-video.c
++++ b/drivers/media/pci/tw68/tw68-video.c
+@@ -437,6 +437,7 @@ static void tw68_buf_queue(struct vb2_buffer *vb)
+ */
+ static int tw68_buf_prepare(struct vb2_buffer *vb)
+ {
++ int ret;
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+ struct vb2_queue *vq = vb->vb2_queue;
+ struct tw68_dev *dev = vb2_get_drv_priv(vq);
+@@ -452,30 +453,30 @@ static int tw68_buf_prepare(struct vb2_buffer *vb)
+ bpl = (dev->width * dev->fmt->depth) >> 3;
+ switch (dev->field) {
+ case V4L2_FIELD_TOP:
+- tw68_risc_buffer(dev->pci, buf, dma->sgl,
++ ret = tw68_risc_buffer(dev->pci, buf, dma->sgl,
+ 0, UNSET, bpl, 0, dev->height);
+ break;
+ case V4L2_FIELD_BOTTOM:
+- tw68_risc_buffer(dev->pci, buf, dma->sgl,
++ ret = tw68_risc_buffer(dev->pci, buf, dma->sgl,
+ UNSET, 0, bpl, 0, dev->height);
+ break;
+ case V4L2_FIELD_SEQ_TB:
+- tw68_risc_buffer(dev->pci, buf, dma->sgl,
++ ret = tw68_risc_buffer(dev->pci, buf, dma->sgl,
+ 0, bpl * (dev->height >> 1),
+ bpl, 0, dev->height >> 1);
+ break;
+ case V4L2_FIELD_SEQ_BT:
+- tw68_risc_buffer(dev->pci, buf, dma->sgl,
++ ret = tw68_risc_buffer(dev->pci, buf, dma->sgl,
+ bpl * (dev->height >> 1), 0,
+ bpl, 0, dev->height >> 1);
+ break;
+ case V4L2_FIELD_INTERLACED:
+ default:
+- tw68_risc_buffer(dev->pci, buf, dma->sgl,
++ ret = tw68_risc_buffer(dev->pci, buf, dma->sgl,
+ 0, bpl, bpl, bpl, dev->height >> 1);
+ break;
+ }
+- return 0;
++ return ret;
+ }
+
+ static void tw68_buf_finish(struct vb2_buffer *vb)
+@@ -485,7 +486,8 @@ static void tw68_buf_finish(struct vb2_buffer *vb)
+ struct tw68_dev *dev = vb2_get_drv_priv(vq);
+ struct tw68_buf *buf = container_of(vbuf, struct tw68_buf, vb);
+
+- dma_free_coherent(&dev->pci->dev, buf->size, buf->cpu, buf->dma);
++ if (buf->cpu)
++ dma_free_coherent(&dev->pci->dev, buf->size, buf->cpu, buf->dma);
+ }
+
+ static int tw68_start_streaming(struct vb2_queue *q, unsigned int count)
+diff --git a/drivers/media/pci/tw686x/tw686x-core.c b/drivers/media/pci/tw686x/tw686x-core.c
+index 6676e069b515d..384d38754a4b1 100644
+--- a/drivers/media/pci/tw686x/tw686x-core.c
++++ b/drivers/media/pci/tw686x/tw686x-core.c
+@@ -315,13 +315,6 @@ static int tw686x_probe(struct pci_dev *pci_dev,
+
+ spin_lock_init(&dev->lock);
+
+- err = request_irq(pci_dev->irq, tw686x_irq, IRQF_SHARED,
+- dev->name, dev);
+- if (err < 0) {
+- dev_err(&pci_dev->dev, "unable to request interrupt\n");
+- goto iounmap;
+- }
+-
+ timer_setup(&dev->dma_delay_timer, tw686x_dma_delay, 0);
+
+ /*
+@@ -333,18 +326,23 @@ static int tw686x_probe(struct pci_dev *pci_dev,
+ err = tw686x_video_init(dev);
+ if (err) {
+ dev_err(&pci_dev->dev, "can't register video\n");
+- goto free_irq;
++ goto iounmap;
+ }
+
+ err = tw686x_audio_init(dev);
+ if (err)
+ dev_warn(&pci_dev->dev, "can't register audio\n");
+
++ err = request_irq(pci_dev->irq, tw686x_irq, IRQF_SHARED,
++ dev->name, dev);
++ if (err < 0) {
++ dev_err(&pci_dev->dev, "unable to request interrupt\n");
++ goto iounmap;
++ }
++
+ pci_set_drvdata(pci_dev, dev);
+ return 0;
+
+-free_irq:
+- free_irq(pci_dev->irq, dev);
+ iounmap:
+ pci_iounmap(pci_dev, dev->mmio);
+ free_region:
+diff --git a/drivers/media/pci/tw686x/tw686x-video.c b/drivers/media/pci/tw686x/tw686x-video.c
+index b227e9e78ebd0..37a20fe24241f 100644
+--- a/drivers/media/pci/tw686x/tw686x-video.c
++++ b/drivers/media/pci/tw686x/tw686x-video.c
+@@ -1282,8 +1282,10 @@ int tw686x_video_init(struct tw686x_dev *dev)
+ video_set_drvdata(vdev, vc);
+
+ err = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
+- if (err < 0)
++ if (err < 0) {
++ video_device_release(vdev);
+ goto error;
++ }
+ vc->num = vdev->num;
+ }
+
+diff --git a/drivers/media/platform/allegro-dvt/allegro-core.c b/drivers/media/platform/allegro-dvt/allegro-core.c
+index 887b492e4ad1c..14a119b43bca0 100644
+--- a/drivers/media/platform/allegro-dvt/allegro-core.c
++++ b/drivers/media/platform/allegro-dvt/allegro-core.c
+@@ -2185,6 +2185,15 @@ static irqreturn_t allegro_irq_thread(int irq, void *data)
+ {
+ struct allegro_dev *dev = data;
+
++ /*
++ * The firmware is initialized after the mailbox is setup. We further
++ * check the AL5_ITC_CPU_IRQ_STA register, if the firmware actually
++ * triggered the interrupt. Although this should not happen, make sure
++ * that we ignore interrupts, if the mailbox is not initialized.
++ */
++ if (!dev->mbox_status)
++ return IRQ_NONE;
++
+ allegro_mbox_notify(dev->mbox_status);
+
+ return IRQ_HANDLED;
+diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
+index 1c9cb9e05fdf6..c1ce93efc6559 100644
+--- a/drivers/media/platform/am437x/am437x-vpfe.c
++++ b/drivers/media/platform/am437x/am437x-vpfe.c
+@@ -1499,7 +1499,9 @@ static int vpfe_enum_size(struct file *file, void *priv,
+ struct v4l2_frmsizeenum *fsize)
+ {
+ struct vpfe_device *vpfe = video_drvdata(file);
+- struct v4l2_subdev_frame_size_enum fse;
++ struct v4l2_subdev_frame_size_enum fse = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+ struct v4l2_subdev *sd = vpfe->current_subdev->sd;
+ struct vpfe_fmt *fmt;
+ int ret;
+@@ -1514,11 +1516,9 @@ static int vpfe_enum_size(struct file *file, void *priv,
+
+ memset(fsize->reserved, 0x0, sizeof(fsize->reserved));
+
+- memset(&fse, 0x0, sizeof(fse));
+ fse.index = fsize->index;
+ fse.pad = 0;
+ fse.code = fmt->code;
+- fse.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ ret = v4l2_subdev_call(sd, pad, enum_frame_size, NULL, &fse);
+ if (ret)
+ return ret;
+@@ -2146,7 +2146,6 @@ vpfe_async_bound(struct v4l2_async_notifier *notifier,
+ {
+ struct vpfe_device *vpfe = container_of(notifier->v4l2_dev,
+ struct vpfe_device, v4l2_dev);
+- struct v4l2_subdev_mbus_code_enum mbus_code;
+ struct vpfe_subdev_info *sdinfo;
+ struct vpfe_fmt *fmt;
+ int ret = 0;
+@@ -2173,9 +2172,11 @@ vpfe_async_bound(struct v4l2_async_notifier *notifier,
+
+ vpfe->num_active_fmt = 0;
+ for (j = 0, i = 0; (ret != -EINVAL); ++j) {
+- memset(&mbus_code, 0, sizeof(mbus_code));
+- mbus_code.index = j;
+- mbus_code.which = V4L2_SUBDEV_FORMAT_ACTIVE;
++ struct v4l2_subdev_mbus_code_enum mbus_code = {
++ .index = j,
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
++
+ ret = v4l2_subdev_call(subdev, pad, enum_mbus_code,
+ NULL, &mbus_code);
+ if (ret)
+diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c
+index 7bb6babdcade0..9d9124308f6ad 100644
+--- a/drivers/media/platform/aspeed-video.c
++++ b/drivers/media/platform/aspeed-video.c
+@@ -151,7 +151,7 @@
+ #define VE_SRC_TB_EDGE_DET_BOT GENMASK(28, VE_SRC_TB_EDGE_DET_BOT_SHF)
+
+ #define VE_MODE_DETECT_STATUS 0x098
+-#define VE_MODE_DETECT_H_PIXELS GENMASK(11, 0)
++#define VE_MODE_DETECT_H_PERIOD GENMASK(11, 0)
+ #define VE_MODE_DETECT_V_LINES_SHF 16
+ #define VE_MODE_DETECT_V_LINES GENMASK(27, VE_MODE_DETECT_V_LINES_SHF)
+ #define VE_MODE_DETECT_STATUS_VSYNC BIT(28)
+@@ -162,6 +162,8 @@
+ #define VE_SYNC_STATUS_VSYNC_SHF 16
+ #define VE_SYNC_STATUS_VSYNC GENMASK(27, VE_SYNC_STATUS_VSYNC_SHF)
+
++#define VE_H_TOTAL_PIXELS 0x0A0
++
+ #define VE_INTERRUPT_CTRL 0x304
+ #define VE_INTERRUPT_STATUS 0x308
+ #define VE_INTERRUPT_MODE_DETECT_WD BIT(0)
+@@ -500,6 +502,10 @@ static void aspeed_video_enable_mode_detect(struct aspeed_video *video)
+ aspeed_video_update(video, VE_INTERRUPT_CTRL, 0,
+ VE_INTERRUPT_MODE_DETECT);
+
++ /* Disable mode detect in order to re-trigger */
++ aspeed_video_update(video, VE_SEQ_CTRL,
++ VE_SEQ_CTRL_TRIG_MODE_DET, 0);
++
+ /* Trigger mode detect */
+ aspeed_video_update(video, VE_SEQ_CTRL, 0, VE_SEQ_CTRL_TRIG_MODE_DET);
+ }
+@@ -552,6 +558,8 @@ static void aspeed_video_irq_res_change(struct aspeed_video *video, ulong delay)
+ set_bit(VIDEO_RES_CHANGE, &video->flags);
+ clear_bit(VIDEO_FRAME_INPRG, &video->flags);
+
++ video->v4l2_input_status = V4L2_IN_ST_NO_SIGNAL;
++
+ aspeed_video_off(video);
+ aspeed_video_bufs_done(video, VB2_BUF_STATE_ERROR);
+
+@@ -759,6 +767,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video)
+ u32 src_lr_edge;
+ u32 src_tb_edge;
+ u32 sync;
++ u32 htotal;
+ struct v4l2_bt_timings *det = &video->detected_timings;
+
+ det->width = MIN_WIDTH;
+@@ -786,10 +795,6 @@ static void aspeed_video_get_resolution(struct aspeed_video *video)
+ return;
+ }
+
+- /* Disable mode detect in order to re-trigger */
+- aspeed_video_update(video, VE_SEQ_CTRL,
+- VE_SEQ_CTRL_TRIG_MODE_DET, 0);
+-
+ aspeed_video_check_and_set_polarity(video);
+
+ aspeed_video_enable_mode_detect(video);
+@@ -807,6 +812,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video)
+ src_tb_edge = aspeed_video_read(video, VE_SRC_TB_EDGE_DET);
+ mds = aspeed_video_read(video, VE_MODE_DETECT_STATUS);
+ sync = aspeed_video_read(video, VE_SYNC_STATUS);
++ htotal = aspeed_video_read(video, VE_H_TOTAL_PIXELS);
+
+ video->frame_bottom = (src_tb_edge & VE_SRC_TB_EDGE_DET_BOT) >>
+ VE_SRC_TB_EDGE_DET_BOT_SHF;
+@@ -823,8 +829,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video)
+ VE_SRC_LR_EDGE_DET_RT_SHF;
+ video->frame_left = src_lr_edge & VE_SRC_LR_EDGE_DET_LEFT;
+ det->hfrontporch = video->frame_left;
+- det->hbackporch = (mds & VE_MODE_DETECT_H_PIXELS) -
+- video->frame_right;
++ det->hbackporch = htotal - video->frame_right;
+ det->hsync = sync & VE_SYNC_STATUS_HSYNC;
+ if (video->frame_left > video->frame_right)
+ continue;
+@@ -1337,7 +1342,6 @@ static void aspeed_video_resolution_work(struct work_struct *work)
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct aspeed_video *video = container_of(dwork, struct aspeed_video,
+ res_work);
+- u32 input_status = video->v4l2_input_status;
+
+ aspeed_video_on(video);
+
+@@ -1350,8 +1354,7 @@ static void aspeed_video_resolution_work(struct work_struct *work)
+ aspeed_video_get_resolution(video);
+
+ if (video->detected_timings.width != video->active_timings.width ||
+- video->detected_timings.height != video->active_timings.height ||
+- input_status != video->v4l2_input_status) {
++ video->detected_timings.height != video->active_timings.height) {
+ static const struct v4l2_event ev = {
+ .type = V4L2_EVENT_SOURCE_CHANGE,
+ .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
+@@ -1720,6 +1723,7 @@ static int aspeed_video_probe(struct platform_device *pdev)
+
+ rc = aspeed_video_setup_video(video);
+ if (rc) {
++ aspeed_video_free_buf(video, &video->jpeg);
+ clk_unprepare(video->vclk);
+ clk_unprepare(video->eclk);
+ return rc;
+@@ -1745,8 +1749,7 @@ static int aspeed_video_remove(struct platform_device *pdev)
+
+ v4l2_device_unregister(v4l2_dev);
+
+- dma_free_coherent(video->dev, VE_JPEG_HEADER_SIZE, video->jpeg.virt,
+- video->jpeg.dma);
++ aspeed_video_free_buf(video, &video->jpeg);
+
+ of_reserved_mem_device_release(dev);
+
+diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
+index 136ab7cf36edc..f768be3c40595 100644
+--- a/drivers/media/platform/atmel/atmel-isc-base.c
++++ b/drivers/media/platform/atmel/atmel-isc-base.c
+@@ -123,11 +123,9 @@ static int isc_clk_prepare(struct clk_hw *hw)
+ struct isc_clk *isc_clk = to_isc_clk(hw);
+ int ret;
+
+- if (isc_clk->id == ISC_ISPCK) {
+- ret = pm_runtime_resume_and_get(isc_clk->dev);
+- if (ret < 0)
+- return ret;
+- }
++ ret = pm_runtime_resume_and_get(isc_clk->dev);
++ if (ret < 0)
++ return ret;
+
+ return isc_wait_clk_stable(hw);
+ }
+@@ -138,8 +136,7 @@ static void isc_clk_unprepare(struct clk_hw *hw)
+
+ isc_wait_clk_stable(hw);
+
+- if (isc_clk->id == ISC_ISPCK)
+- pm_runtime_put_sync(isc_clk->dev);
++ pm_runtime_put_sync(isc_clk->dev);
+ }
+
+ static int isc_clk_enable(struct clk_hw *hw)
+@@ -186,16 +183,13 @@ static int isc_clk_is_enabled(struct clk_hw *hw)
+ u32 status;
+ int ret;
+
+- if (isc_clk->id == ISC_ISPCK) {
+- ret = pm_runtime_resume_and_get(isc_clk->dev);
+- if (ret < 0)
+- return 0;
+- }
++ ret = pm_runtime_resume_and_get(isc_clk->dev);
++ if (ret < 0)
++ return 0;
+
+ regmap_read(isc_clk->regmap, ISC_CLKSR, &status);
+
+- if (isc_clk->id == ISC_ISPCK)
+- pm_runtime_put_sync(isc_clk->dev);
++ pm_runtime_put_sync(isc_clk->dev);
+
+ return status & ISC_CLK(isc_clk->id) ? 1 : 0;
+ }
+@@ -325,6 +319,9 @@ static int isc_clk_register(struct isc_device *isc, unsigned int id)
+ const char *parent_names[3];
+ int num_parents;
+
++ if (id == ISC_ISPCK && !isc->ispck_required)
++ return 0;
++
+ num_parents = of_clk_get_parent_count(np);
+ if (num_parents < 1 || num_parents > 3)
+ return -EINVAL;
+@@ -1372,14 +1369,12 @@ static int isc_enum_framesizes(struct file *file, void *fh,
+ struct v4l2_frmsizeenum *fsize)
+ {
+ struct isc_device *isc = video_drvdata(file);
+- struct v4l2_subdev_frame_size_enum fse = {
+- .code = isc->config.sd_format->mbus_code,
+- .index = fsize->index,
+- .which = V4L2_SUBDEV_FORMAT_ACTIVE,
+- };
+ int ret = -EINVAL;
+ int i;
+
++ if (fsize->index)
++ return -EINVAL;
++
+ for (i = 0; i < isc->num_user_formats; i++)
+ if (isc->user_formats[i]->fourcc == fsize->pixel_format)
+ ret = 0;
+@@ -1391,14 +1386,14 @@ static int isc_enum_framesizes(struct file *file, void *fh,
+ if (ret)
+ return ret;
+
+- ret = v4l2_subdev_call(isc->current_subdev->sd, pad, enum_frame_size,
+- NULL, &fse);
+- if (ret)
+- return ret;
++ fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
+
+- fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
+- fsize->discrete.width = fse.max_width;
+- fsize->discrete.height = fse.max_height;
++ fsize->stepwise.min_width = 16;
++ fsize->stepwise.max_width = isc->max_width;
++ fsize->stepwise.min_height = 16;
++ fsize->stepwise.max_height = isc->max_height;
++ fsize->stepwise.step_width = 1;
++ fsize->stepwise.step_height = 1;
+
+ return 0;
+ }
+diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
+index 19cc60dfcbe0f..2bfcb135ef13b 100644
+--- a/drivers/media/platform/atmel/atmel-isc.h
++++ b/drivers/media/platform/atmel/atmel-isc.h
+@@ -178,6 +178,7 @@ struct isc_reg_offsets {
+ * @hclock: Hclock clock input (refer datasheet)
+ * @ispck: iscpck clock (refer datasheet)
+ * @isc_clks: ISC clocks
++ * @ispck_required: ISC requires ISP Clock initialization
+ * @dcfg: DMA master configuration, architecture dependent
+ *
+ * @dev: Registered device driver
+@@ -252,6 +253,7 @@ struct isc_device {
+ struct clk *hclock;
+ struct clk *ispck;
+ struct isc_clk isc_clks[2];
++ bool ispck_required;
+ u32 dcfg;
+
+ struct device *dev;
+diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+index b66f1d174e9d7..7421bc51709c4 100644
+--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
++++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+@@ -267,7 +267,7 @@ static void isc_sama5d2_config_rlp(struct isc_device *isc)
+ * Thus, if the YCYC mode is selected, replace it with the
+ * sama5d2-compliant mode which is YYCC .
+ */
+- if ((rlp_mode & ISC_RLP_CFG_MODE_YCYC) == ISC_RLP_CFG_MODE_YCYC) {
++ if ((rlp_mode & ISC_RLP_CFG_MODE_MASK) == ISC_RLP_CFG_MODE_YCYC) {
+ rlp_mode &= ~ISC_RLP_CFG_MODE_MASK;
+ rlp_mode |= ISC_RLP_CFG_MODE_YYCC;
+ }
+@@ -454,6 +454,9 @@ static int atmel_isc_probe(struct platform_device *pdev)
+ /* sama5d2-isc - 8 bits per beat */
+ isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
+
++ /* sama5d2-isc : ISPCK is required and mandatory */
++ isc->ispck_required = true;
++
+ ret = isc_pipeline_init(isc);
+ if (ret)
+ return ret;
+@@ -476,22 +479,6 @@ static int atmel_isc_probe(struct platform_device *pdev)
+ dev_err(dev, "failed to init isc clock: %d\n", ret);
+ goto unprepare_hclk;
+ }
+-
+- isc->ispck = isc->isc_clks[ISC_ISPCK].clk;
+-
+- ret = clk_prepare_enable(isc->ispck);
+- if (ret) {
+- dev_err(dev, "failed to enable ispck: %d\n", ret);
+- goto unprepare_hclk;
+- }
+-
+- /* ispck should be greater or equal to hclock */
+- ret = clk_set_rate(isc->ispck, clk_get_rate(isc->hclock));
+- if (ret) {
+- dev_err(dev, "failed to set ispck rate: %d\n", ret);
+- goto unprepare_clk;
+- }
+-
+ ret = v4l2_device_register(dev, &isc->v4l2_dev);
+ if (ret) {
+ dev_err(dev, "unable to register v4l2 device.\n");
+@@ -545,19 +532,38 @@ static int atmel_isc_probe(struct platform_device *pdev)
+ pm_runtime_enable(dev);
+ pm_request_idle(dev);
+
++ isc->ispck = isc->isc_clks[ISC_ISPCK].clk;
++
++ ret = clk_prepare_enable(isc->ispck);
++ if (ret) {
++ dev_err(dev, "failed to enable ispck: %d\n", ret);
++ goto disable_pm;
++ }
++
++ /* ispck should be greater or equal to hclock */
++ ret = clk_set_rate(isc->ispck, clk_get_rate(isc->hclock));
++ if (ret) {
++ dev_err(dev, "failed to set ispck rate: %d\n", ret);
++ goto unprepare_clk;
++ }
++
+ regmap_read(isc->regmap, ISC_VERSION + isc->offsets.version, &ver);
+ dev_info(dev, "Microchip ISC version %x\n", ver);
+
+ return 0;
+
++unprepare_clk:
++ clk_disable_unprepare(isc->ispck);
++
++disable_pm:
++ pm_runtime_disable(dev);
++
+ cleanup_subdev:
+ isc_subdev_cleanup(isc);
+
+ unregister_v4l2_device:
+ v4l2_device_unregister(&isc->v4l2_dev);
+
+-unprepare_clk:
+- clk_disable_unprepare(isc->ispck);
+ unprepare_hclk:
+ clk_disable_unprepare(isc->hclock);
+
+diff --git a/drivers/media/platform/atmel/atmel-sama7g5-isc.c b/drivers/media/platform/atmel/atmel-sama7g5-isc.c
+index f2785131ff569..a4defc30cf412 100644
+--- a/drivers/media/platform/atmel/atmel-sama7g5-isc.c
++++ b/drivers/media/platform/atmel/atmel-sama7g5-isc.c
+@@ -447,6 +447,9 @@ static int microchip_xisc_probe(struct platform_device *pdev)
+ /* sama7g5-isc RAM access port is full AXI4 - 32 bits per beat */
+ isc->dcfg = ISC_DCFG_YMBSIZE_BEATS32 | ISC_DCFG_CMBSIZE_BEATS32;
+
++ /* sama7g5-isc : ISPCK does not exist, ISC is clocked by MCK */
++ isc->ispck_required = false;
++
+ ret = isc_pipeline_init(isc);
+ if (ret)
+ return ret;
+@@ -470,25 +473,10 @@ static int microchip_xisc_probe(struct platform_device *pdev)
+ goto unprepare_hclk;
+ }
+
+- isc->ispck = isc->isc_clks[ISC_ISPCK].clk;
+-
+- ret = clk_prepare_enable(isc->ispck);
+- if (ret) {
+- dev_err(dev, "failed to enable ispck: %d\n", ret);
+- goto unprepare_hclk;
+- }
+-
+- /* ispck should be greater or equal to hclock */
+- ret = clk_set_rate(isc->ispck, clk_get_rate(isc->hclock));
+- if (ret) {
+- dev_err(dev, "failed to set ispck rate: %d\n", ret);
+- goto unprepare_clk;
+- }
+-
+ ret = v4l2_device_register(dev, &isc->v4l2_dev);
+ if (ret) {
+ dev_err(dev, "unable to register v4l2 device.\n");
+- goto unprepare_clk;
++ goto unprepare_hclk;
+ }
+
+ ret = xisc_parse_dt(dev, isc);
+@@ -549,8 +537,6 @@ cleanup_subdev:
+ unregister_v4l2_device:
+ v4l2_device_unregister(&isc->v4l2_dev);
+
+-unprepare_clk:
+- clk_disable_unprepare(isc->ispck);
+ unprepare_hclk:
+ clk_disable_unprepare(isc->hclock);
+
+@@ -569,7 +555,6 @@ static int microchip_xisc_remove(struct platform_device *pdev)
+
+ v4l2_device_unregister(&isc->v4l2_dev);
+
+- clk_disable_unprepare(isc->ispck);
+ clk_disable_unprepare(isc->hclock);
+
+ isc_clk_cleanup(isc);
+@@ -581,7 +566,6 @@ static int __maybe_unused xisc_runtime_suspend(struct device *dev)
+ {
+ struct isc_device *isc = dev_get_drvdata(dev);
+
+- clk_disable_unprepare(isc->ispck);
+ clk_disable_unprepare(isc->hclock);
+
+ return 0;
+@@ -596,10 +580,6 @@ static int __maybe_unused xisc_runtime_resume(struct device *dev)
+ if (ret)
+ return ret;
+
+- ret = clk_prepare_enable(isc->ispck);
+- if (ret)
+- clk_disable_unprepare(isc->hclock);
+-
+ return ret;
+ }
+
+@@ -607,11 +587,13 @@ static const struct dev_pm_ops microchip_xisc_dev_pm_ops = {
+ SET_RUNTIME_PM_OPS(xisc_runtime_suspend, xisc_runtime_resume, NULL)
+ };
+
++#if IS_ENABLED(CONFIG_OF)
+ static const struct of_device_id microchip_xisc_of_match[] = {
+ { .compatible = "microchip,sama7g5-isc" },
+ { }
+ };
+ MODULE_DEVICE_TABLE(of, microchip_xisc_of_match);
++#endif
+
+ static struct platform_driver microchip_xisc_driver = {
+ .probe = microchip_xisc_probe,
+diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c
+index c484c008ab027..582a6c581f3c3 100644
+--- a/drivers/media/platform/coda/coda-bit.c
++++ b/drivers/media/platform/coda/coda-bit.c
+@@ -852,7 +852,7 @@ static void coda_setup_iram(struct coda_ctx *ctx)
+ /* Only H.264BP and H.263P3 are considered */
+ iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
+ iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
+- if (!iram_info->buf_dbk_c_use)
++ if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use)
+ goto out;
+ iram_info->axi_sram_use |= dbk_bits;
+
+@@ -876,7 +876,7 @@ static void coda_setup_iram(struct coda_ctx *ctx)
+
+ iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
+ iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
+- if (!iram_info->buf_dbk_c_use)
++ if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use)
+ goto out;
+ iram_info->axi_sram_use |= dbk_bits;
+
+@@ -1082,10 +1082,16 @@ static int coda_start_encoding(struct coda_ctx *ctx)
+ }
+
+ if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
+- if (!ctx->params.jpeg_qmat_tab[0])
++ if (!ctx->params.jpeg_qmat_tab[0]) {
+ ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
+- if (!ctx->params.jpeg_qmat_tab[1])
++ if (!ctx->params.jpeg_qmat_tab[0])
++ return -ENOMEM;
++ }
++ if (!ctx->params.jpeg_qmat_tab[1]) {
+ ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
++ if (!ctx->params.jpeg_qmat_tab[1])
++ return -ENOMEM;
++ }
+ coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
+ }
+
+diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c
+index 0e312b0842d7f..b4b85a19f7d64 100644
+--- a/drivers/media/platform/coda/coda-common.c
++++ b/drivers/media/platform/coda/coda-common.c
+@@ -408,6 +408,7 @@ static struct vdoa_data *coda_get_vdoa_data(void)
+ if (!vdoa_data)
+ vdoa_data = ERR_PTR(-EPROBE_DEFER);
+
++ put_device(&vdoa_pdev->dev);
+ out:
+ of_node_put(vdoa_node);
+
+@@ -1317,7 +1318,8 @@ static int coda_enum_frameintervals(struct file *file, void *fh,
+ struct v4l2_frmivalenum *f)
+ {
+ struct coda_ctx *ctx = fh_to_ctx(fh);
+- int i;
++ struct coda_q_data *q_data;
++ const struct coda_codec *codec;
+
+ if (f->index)
+ return -EINVAL;
+@@ -1326,12 +1328,19 @@ static int coda_enum_frameintervals(struct file *file, void *fh,
+ if (!ctx->vdoa && f->pixel_format == V4L2_PIX_FMT_YUYV)
+ return -EINVAL;
+
+- for (i = 0; i < CODA_MAX_FORMATS; i++) {
+- if (f->pixel_format == ctx->cvd->src_formats[i] ||
+- f->pixel_format == ctx->cvd->dst_formats[i])
+- break;
++ if (coda_format_normalize_yuv(f->pixel_format) == V4L2_PIX_FMT_YUV420) {
++ q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
++ codec = coda_find_codec(ctx->dev, f->pixel_format,
++ q_data->fourcc);
++ } else {
++ codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420,
++ f->pixel_format);
+ }
+- if (i == CODA_MAX_FORMATS)
++ if (!codec)
++ return -EINVAL;
++
++ if (f->width < MIN_W || f->width > codec->max_w ||
++ f->height < MIN_H || f->height > codec->max_h)
+ return -EINVAL;
+
+ f->type = V4L2_FRMIVAL_TYPE_CONTINUOUS;
+@@ -1537,11 +1546,13 @@ static void coda_pic_run_work(struct work_struct *work)
+
+ if (!wait_for_completion_timeout(&ctx->completion,
+ msecs_to_jiffies(1000))) {
+- dev_err(dev->dev, "CODA PIC_RUN timeout\n");
++ if (ctx->use_bit) {
++ dev_err(dev->dev, "CODA PIC_RUN timeout\n");
+
+- ctx->hold = true;
++ ctx->hold = true;
+
+- coda_hw_reset(ctx);
++ coda_hw_reset(ctx);
++ }
+
+ if (ctx->ops->run_timeout)
+ ctx->ops->run_timeout(ctx);
+@@ -2335,8 +2346,8 @@ static void coda_encode_ctrls(struct coda_ctx *ctx)
+ V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET, -12, 12, 1, 0);
+ v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops,
+ V4L2_CID_MPEG_VIDEO_H264_PROFILE,
+- V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE, 0x0,
+- V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE);
++ V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE, 0x0,
++ V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE);
+ if (ctx->dev->devtype->product == CODA_HX4 ||
+ ctx->dev->devtype->product == CODA_7541) {
+ v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops,
+@@ -2350,12 +2361,15 @@ static void coda_encode_ctrls(struct coda_ctx *ctx)
+ if (ctx->dev->devtype->product == CODA_960) {
+ v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops,
+ V4L2_CID_MPEG_VIDEO_H264_LEVEL,
+- V4L2_MPEG_VIDEO_H264_LEVEL_4_0,
+- ~((1 << V4L2_MPEG_VIDEO_H264_LEVEL_2_0) |
++ V4L2_MPEG_VIDEO_H264_LEVEL_4_2,
++ ~((1 << V4L2_MPEG_VIDEO_H264_LEVEL_1_0) |
++ (1 << V4L2_MPEG_VIDEO_H264_LEVEL_2_0) |
+ (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_0) |
+ (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_1) |
+ (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_2) |
+- (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_0)),
++ (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_0) |
++ (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_1) |
++ (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_2)),
+ V4L2_MPEG_VIDEO_H264_LEVEL_4_0);
+ }
+ v4l2_ctrl_new_std(&ctx->ctrls, &coda_ctrl_ops,
+@@ -2417,7 +2431,7 @@ static void coda_decode_ctrls(struct coda_ctx *ctx)
+ ctx->h264_profile_ctrl = v4l2_ctrl_new_std_menu(&ctx->ctrls,
+ &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE,
+ V4L2_MPEG_VIDEO_H264_PROFILE_HIGH,
+- ~((1 << V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE) |
++ ~((1 << V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE) |
+ (1 << V4L2_MPEG_VIDEO_H264_PROFILE_MAIN) |
+ (1 << V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)),
+ V4L2_MPEG_VIDEO_H264_PROFILE_HIGH);
+diff --git a/drivers/media/platform/coda/coda-jpeg.c b/drivers/media/platform/coda/coda-jpeg.c
+index b11cfbe166dd3..b7bf529f18f77 100644
+--- a/drivers/media/platform/coda/coda-jpeg.c
++++ b/drivers/media/platform/coda/coda-jpeg.c
+@@ -1052,10 +1052,16 @@ static int coda9_jpeg_start_encoding(struct coda_ctx *ctx)
+ v4l2_err(&dev->v4l2_dev, "error loading Huffman tables\n");
+ return ret;
+ }
+- if (!ctx->params.jpeg_qmat_tab[0])
++ if (!ctx->params.jpeg_qmat_tab[0]) {
+ ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
+- if (!ctx->params.jpeg_qmat_tab[1])
++ if (!ctx->params.jpeg_qmat_tab[0])
++ return -ENOMEM;
++ }
++ if (!ctx->params.jpeg_qmat_tab[1]) {
+ ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
++ if (!ctx->params.jpeg_qmat_tab[1])
++ return -ENOMEM;
++ }
+ coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
+
+ return 0;
+@@ -1127,7 +1133,8 @@ static int coda9_jpeg_prepare_encode(struct coda_ctx *ctx)
+ coda_write(dev, 0, CODA9_REG_JPEG_GBU_BT_PTR);
+ coda_write(dev, 0, CODA9_REG_JPEG_GBU_WD_PTR);
+ coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBSR);
+- coda_write(dev, 0, CODA9_REG_JPEG_BBC_STRM_CTRL);
++ coda_write(dev, BIT(31) | ((end_addr - start_addr - header_len) / 256),
++ CODA9_REG_JPEG_BBC_STRM_CTRL);
+ coda_write(dev, 0, CODA9_REG_JPEG_GBU_CTRL);
+ coda_write(dev, 0, CODA9_REG_JPEG_GBU_FF_RPTR);
+ coda_write(dev, 127, CODA9_REG_JPEG_GBU_BBER);
+@@ -1257,6 +1264,23 @@ static void coda9_jpeg_finish_encode(struct coda_ctx *ctx)
+ coda_hw_reset(ctx);
+ }
+
++static void coda9_jpeg_encode_timeout(struct coda_ctx *ctx)
++{
++ struct coda_dev *dev = ctx->dev;
++ u32 end_addr, wr_ptr;
++
++ /* Handle missing BBC overflow interrupt via timeout */
++ end_addr = coda_read(dev, CODA9_REG_JPEG_BBC_END_ADDR);
++ wr_ptr = coda_read(dev, CODA9_REG_JPEG_BBC_WR_PTR);
++ if (wr_ptr >= end_addr - 256) {
++ v4l2_err(&dev->v4l2_dev, "JPEG too large for capture buffer\n");
++ coda9_jpeg_finish_encode(ctx);
++ return;
++ }
++
++ coda_hw_reset(ctx);
++}
++
+ static void coda9_jpeg_release(struct coda_ctx *ctx)
+ {
+ int i;
+@@ -1276,6 +1300,7 @@ const struct coda_context_ops coda9_jpeg_encode_ops = {
+ .start_streaming = coda9_jpeg_start_encoding,
+ .prepare_run = coda9_jpeg_prepare_encode,
+ .finish_run = coda9_jpeg_finish_encode,
++ .run_timeout = coda9_jpeg_encode_timeout,
+ .release = coda9_jpeg_release,
+ };
+
+diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c
+index 8bc0d83718193..dd6e2e320264e 100644
+--- a/drivers/media/platform/coda/imx-vdoa.c
++++ b/drivers/media/platform/coda/imx-vdoa.c
+@@ -287,7 +287,11 @@ static int vdoa_probe(struct platform_device *pdev)
+ struct resource *res;
+ int ret;
+
+- dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
++ ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
++ if (ret) {
++ dev_err(&pdev->dev, "DMA enable failed\n");
++ return ret;
++ }
+
+ vdoa = devm_kzalloc(&pdev->dev, sizeof(*vdoa), GFP_KERNEL);
+ if (!vdoa)
+diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c
+index f1ce10828b8e5..8ffc01c606d0c 100644
+--- a/drivers/media/platform/davinci/vpif.c
++++ b/drivers/media/platform/davinci/vpif.c
+@@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME);
+ #define VPIF_CH2_MAX_MODES 15
+ #define VPIF_CH3_MAX_MODES 2
+
++struct vpif_data {
++ struct platform_device *capture;
++ struct platform_device *display;
++};
++
+ DEFINE_SPINLOCK(vpif_lock);
+ EXPORT_SYMBOL_GPL(vpif_lock);
+
+@@ -423,17 +428,32 @@ int vpif_channel_getfid(u8 channel_id)
+ }
+ EXPORT_SYMBOL(vpif_channel_getfid);
+
++static void vpif_pdev_release(struct device *dev)
++{
++ struct platform_device *pdev = to_platform_device(dev);
++
++ kfree(pdev);
++}
++
+ static int vpif_probe(struct platform_device *pdev)
+ {
+ static struct resource *res, *res_irq;
+ struct platform_device *pdev_capture, *pdev_display;
+ struct device_node *endpoint = NULL;
++ struct vpif_data *data;
++ int ret;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ vpif_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(vpif_base))
+ return PTR_ERR(vpif_base);
+
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
++ if (!data)
++ return -ENOMEM;
++
++ platform_set_drvdata(pdev, data);
++
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_get(&pdev->dev);
+
+@@ -457,46 +477,79 @@ static int vpif_probe(struct platform_device *pdev)
+ res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (!res_irq) {
+ dev_warn(&pdev->dev, "Missing IRQ resource.\n");
+- pm_runtime_put(&pdev->dev);
+- return -EINVAL;
++ ret = -EINVAL;
++ goto err_put_rpm;
+ }
+
+- pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture),
+- GFP_KERNEL);
+- if (pdev_capture) {
+- pdev_capture->name = "vpif_capture";
+- pdev_capture->id = -1;
+- pdev_capture->resource = res_irq;
+- pdev_capture->num_resources = 1;
+- pdev_capture->dev.dma_mask = pdev->dev.dma_mask;
+- pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask;
+- pdev_capture->dev.parent = &pdev->dev;
+- platform_device_register(pdev_capture);
+- } else {
+- dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n");
++ pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL);
++ if (!pdev_capture) {
++ ret = -ENOMEM;
++ goto err_put_rpm;
+ }
+
+- pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display),
+- GFP_KERNEL);
+- if (pdev_display) {
+- pdev_display->name = "vpif_display";
+- pdev_display->id = -1;
+- pdev_display->resource = res_irq;
+- pdev_display->num_resources = 1;
+- pdev_display->dev.dma_mask = pdev->dev.dma_mask;
+- pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask;
+- pdev_display->dev.parent = &pdev->dev;
+- platform_device_register(pdev_display);
+- } else {
+- dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n");
++ pdev_capture->name = "vpif_capture";
++ pdev_capture->id = -1;
++ pdev_capture->resource = res_irq;
++ pdev_capture->num_resources = 1;
++ pdev_capture->dev.dma_mask = pdev->dev.dma_mask;
++ pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask;
++ pdev_capture->dev.parent = &pdev->dev;
++ pdev_capture->dev.release = vpif_pdev_release;
++
++ ret = platform_device_register(pdev_capture);
++ if (ret)
++ goto err_put_pdev_capture;
++
++ pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL);
++ if (!pdev_display) {
++ ret = -ENOMEM;
++ goto err_put_pdev_capture;
+ }
+
++ pdev_display->name = "vpif_display";
++ pdev_display->id = -1;
++ pdev_display->resource = res_irq;
++ pdev_display->num_resources = 1;
++ pdev_display->dev.dma_mask = pdev->dev.dma_mask;
++ pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask;
++ pdev_display->dev.parent = &pdev->dev;
++ pdev_display->dev.release = vpif_pdev_release;
++
++ ret = platform_device_register(pdev_display);
++ if (ret)
++ goto err_put_pdev_display;
++
++ data->capture = pdev_capture;
++ data->display = pdev_display;
++
+ return 0;
++
++err_put_pdev_display:
++ platform_device_put(pdev_display);
++err_put_pdev_capture:
++ platform_device_put(pdev_capture);
++err_put_rpm:
++ pm_runtime_put(&pdev->dev);
++ pm_runtime_disable(&pdev->dev);
++ kfree(data);
++
++ return ret;
+ }
+
+ static int vpif_remove(struct platform_device *pdev)
+ {
++ struct vpif_data *data = platform_get_drvdata(pdev);
++
++ if (data->capture)
++ platform_device_unregister(data->capture);
++ if (data->display)
++ platform_device_unregister(data->display);
++
++ pm_runtime_put(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
++
++ kfree(data);
++
+ return 0;
+ }
+
+diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c
+index 7ff4024003f4a..0b3cf01e9537e 100644
+--- a/drivers/media/platform/exynos4-is/fimc-capture.c
++++ b/drivers/media/platform/exynos4-is/fimc-capture.c
+@@ -763,7 +763,10 @@ static int fimc_pipeline_try_format(struct fimc_ctx *ctx,
+ struct fimc_dev *fimc = ctx->fimc_dev;
+ struct fimc_pipeline *p = to_fimc_pipeline(fimc->vid_cap.ve.pipe);
+ struct v4l2_subdev *sd = p->subdevs[IDX_SENSOR];
+- struct v4l2_subdev_format sfmt;
++ struct v4l2_subdev_format sfmt = {
++ .which = set ? V4L2_SUBDEV_FORMAT_ACTIVE
++ : V4L2_SUBDEV_FORMAT_TRY,
++ };
+ struct v4l2_mbus_framefmt *mf = &sfmt.format;
+ struct media_entity *me;
+ struct fimc_fmt *ffmt;
+@@ -774,9 +777,7 @@ static int fimc_pipeline_try_format(struct fimc_ctx *ctx,
+ if (WARN_ON(!sd || !tfmt))
+ return -EINVAL;
+
+- memset(&sfmt, 0, sizeof(sfmt));
+ sfmt.format = *tfmt;
+- sfmt.which = set ? V4L2_SUBDEV_FORMAT_ACTIVE : V4L2_SUBDEV_FORMAT_TRY;
+
+ me = fimc_pipeline_get_head(&sd->entity);
+
+diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c
+index bfdee771cef9d..4afe0b9b17730 100644
+--- a/drivers/media/platform/exynos4-is/fimc-core.c
++++ b/drivers/media/platform/exynos4-is/fimc-core.c
+@@ -1174,7 +1174,7 @@ int __init fimc_register_driver(void)
+ return platform_driver_register(&fimc_driver);
+ }
+
+-void __exit fimc_unregister_driver(void)
++void fimc_unregister_driver(void)
+ {
+ platform_driver_unregister(&fimc_driver);
+ }
+diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c
+index e55e411038f48..a7704ff069d6c 100644
+--- a/drivers/media/platform/exynos4-is/fimc-is.c
++++ b/drivers/media/platform/exynos4-is/fimc-is.c
+@@ -140,7 +140,7 @@ static int fimc_is_enable_clocks(struct fimc_is *is)
+ dev_err(&is->pdev->dev, "clock %s enable failed\n",
+ fimc_is_clocks[i]);
+ for (--i; i >= 0; i--)
+- clk_disable(is->clocks[i]);
++ clk_disable_unprepare(is->clocks[i]);
+ return ret;
+ }
+ pr_debug("enabled clock: %s\n", fimc_is_clocks[i]);
+@@ -213,6 +213,7 @@ static int fimc_is_register_subdevs(struct fimc_is *is)
+
+ if (ret < 0 || index >= FIMC_IS_SENSORS_NUM) {
+ of_node_put(child);
++ of_node_put(i2c_bus);
+ return ret;
+ }
+ index++;
+@@ -830,7 +831,7 @@ static int fimc_is_probe(struct platform_device *pdev)
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+- goto err_irq;
++ goto err_pm_disable;
+
+ vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32));
+
+@@ -864,6 +865,8 @@ err_pm:
+ pm_runtime_put_noidle(dev);
+ if (!pm_runtime_enabled(dev))
+ fimc_is_runtime_suspend(dev);
++err_pm_disable:
++ pm_runtime_disable(dev);
+ err_irq:
+ free_irq(is->irq, is);
+ err_clk:
+diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.h b/drivers/media/platform/exynos4-is/fimc-isp-video.h
+index edcb3a5e3cb90..2dd4ddbc748a1 100644
+--- a/drivers/media/platform/exynos4-is/fimc-isp-video.h
++++ b/drivers/media/platform/exynos4-is/fimc-isp-video.h
+@@ -32,7 +32,7 @@ static inline int fimc_isp_video_device_register(struct fimc_isp *isp,
+ return 0;
+ }
+
+-void fimc_isp_video_device_unregister(struct fimc_isp *isp,
++static inline void fimc_isp_video_device_unregister(struct fimc_isp *isp,
+ enum v4l2_buf_type type)
+ {
+ }
+diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c
+index fa648721eaab9..b19d7c8ddc06b 100644
+--- a/drivers/media/platform/exynos4-is/media-dev.c
++++ b/drivers/media/platform/exynos4-is/media-dev.c
+@@ -1380,9 +1380,7 @@ static int subdev_notifier_bound(struct v4l2_async_notifier *notifier,
+
+ /* Find platform data for this sensor subdev */
+ for (i = 0; i < ARRAY_SIZE(fmd->sensor); i++)
+- if (fmd->sensor[i].asd &&
+- fmd->sensor[i].asd->match.fwnode ==
+- of_fwnode_handle(subdev->dev->of_node))
++ if (fmd->sensor[i].asd == asd)
+ si = &fmd->sensor[i];
+
+ if (si == NULL)
+@@ -1474,7 +1472,7 @@ static int fimc_md_probe(struct platform_device *pdev)
+ pinctrl = devm_pinctrl_get(dev);
+ if (IS_ERR(pinctrl)) {
+ ret = PTR_ERR(pinctrl);
+- if (ret != EPROBE_DEFER)
++ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to get pinctrl: %d\n", ret);
+ goto err_clk;
+ }
+@@ -1586,7 +1584,11 @@ static int __init fimc_md_init(void)
+ if (ret)
+ return ret;
+
+- return platform_driver_register(&fimc_md_driver);
++ ret = platform_driver_register(&fimc_md_driver);
++ if (ret)
++ fimc_unregister_driver();
++
++ return ret;
+ }
+
+ static void __exit fimc_md_exit(void)
+diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c b/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c
+index 29c604b1b1790..8936d5ce886c2 100644
+--- a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c
++++ b/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c
+@@ -76,7 +76,14 @@ void print_wrapper_info(struct device *dev, void __iomem *reg)
+
+ void mxc_jpeg_enable_irq(void __iomem *reg, int slot)
+ {
+- writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN));
++ writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS));
++ writel(0xF0C, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN));
++}
++
++void mxc_jpeg_disable_irq(void __iomem *reg, int slot)
++{
++ writel(0x0, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN));
++ writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS));
+ }
+
+ void mxc_jpeg_sw_reset(void __iomem *reg)
+diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.h b/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.h
+index ae70d3a0dc243..bf4e1973a0661 100644
+--- a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.h
++++ b/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.h
+@@ -53,10 +53,10 @@
+ #define CAST_REC_REGS_SEL CAST_STATUS4
+ #define CAST_LUMTH CAST_STATUS5
+ #define CAST_CHRTH CAST_STATUS6
+-#define CAST_NOMFRSIZE_LO CAST_STATUS7
+-#define CAST_NOMFRSIZE_HI CAST_STATUS8
+-#define CAST_OFBSIZE_LO CAST_STATUS9
+-#define CAST_OFBSIZE_HI CAST_STATUS10
++#define CAST_NOMFRSIZE_LO CAST_STATUS16
++#define CAST_NOMFRSIZE_HI CAST_STATUS17
++#define CAST_OFBSIZE_LO CAST_STATUS18
++#define CAST_OFBSIZE_HI CAST_STATUS19
+
+ #define MXC_MAX_SLOTS 1 /* TODO use all 4 slots*/
+ /* JPEG-Decoder Wrapper Slot Registers 0..3 */
+@@ -125,6 +125,7 @@ u32 mxc_jpeg_get_offset(void __iomem *reg, int slot);
+ void mxc_jpeg_enable_slot(void __iomem *reg, int slot);
+ void mxc_jpeg_set_l_endian(void __iomem *reg, int le);
+ void mxc_jpeg_enable_irq(void __iomem *reg, int slot);
++void mxc_jpeg_disable_irq(void __iomem *reg, int slot);
+ int mxc_jpeg_set_input(void __iomem *reg, u32 in_buf, u32 bufsize);
+ int mxc_jpeg_set_output(void __iomem *reg, u16 out_pitch, u32 out_buf,
+ u16 w, u16 h);
+diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.c b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
+index 755138063ee61..e515325683a47 100644
+--- a/drivers/media/platform/imx-jpeg/mxc-jpeg.c
++++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
+@@ -49,6 +49,7 @@
+ #include <linux/slab.h>
+ #include <linux/irqreturn.h>
+ #include <linux/interrupt.h>
++#include <linux/pm_runtime.h>
+ #include <linux/pm_domain.h>
+ #include <linux/string.h>
+
+@@ -81,6 +82,7 @@ static const struct mxc_jpeg_fmt mxc_formats[] = {
+ .h_align = 3,
+ .v_align = 3,
+ .flags = MXC_JPEG_FMT_TYPE_RAW,
++ .precision = 8,
+ },
+ {
+ .name = "ARGB", /* ARGBARGB packed format */
+@@ -92,10 +94,11 @@ static const struct mxc_jpeg_fmt mxc_formats[] = {
+ .h_align = 3,
+ .v_align = 3,
+ .flags = MXC_JPEG_FMT_TYPE_RAW,
++ .precision = 8,
+ },
+ {
+ .name = "YUV420", /* 1st plane = Y, 2nd plane = UV */
+- .fourcc = V4L2_PIX_FMT_NV12,
++ .fourcc = V4L2_PIX_FMT_NV12M,
+ .subsampling = V4L2_JPEG_CHROMA_SUBSAMPLING_420,
+ .nc = 3,
+ .depth = 12, /* 6 bytes (4Y + UV) for 4 pixels */
+@@ -103,6 +106,7 @@ static const struct mxc_jpeg_fmt mxc_formats[] = {
+ .h_align = 4,
+ .v_align = 4,
+ .flags = MXC_JPEG_FMT_TYPE_RAW,
++ .precision = 8,
+ },
+ {
+ .name = "YUV422", /* YUYV */
+@@ -114,6 +118,7 @@ static const struct mxc_jpeg_fmt mxc_formats[] = {
+ .h_align = 4,
+ .v_align = 3,
+ .flags = MXC_JPEG_FMT_TYPE_RAW,
++ .precision = 8,
+ },
+ {
+ .name = "YUV444", /* YUVYUV */
+@@ -125,6 +130,7 @@ static const struct mxc_jpeg_fmt mxc_formats[] = {
+ .h_align = 3,
+ .v_align = 3,
+ .flags = MXC_JPEG_FMT_TYPE_RAW,
++ .precision = 8,
+ },
+ {
+ .name = "Gray", /* Gray (Y8/Y12) or Single Comp */
+@@ -136,6 +142,7 @@ static const struct mxc_jpeg_fmt mxc_formats[] = {
+ .h_align = 3,
+ .v_align = 3,
+ .flags = MXC_JPEG_FMT_TYPE_RAW,
++ .precision = 8,
+ },
+ };
+
+@@ -294,6 +301,9 @@ struct mxc_jpeg_src_buf {
+ /* mxc-jpeg specific */
+ bool dht_needed;
+ bool jpeg_parse_error;
++ const struct mxc_jpeg_fmt *fmt;
++ int w;
++ int h;
+ };
+
+ static inline struct mxc_jpeg_src_buf *vb2_to_mxc_buf(struct vb2_buffer *vb)
+@@ -306,6 +316,9 @@ static unsigned int debug;
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "Debug level (0-3)");
+
++static void mxc_jpeg_bytesperline(struct mxc_jpeg_q_data *q, u32 precision);
++static void mxc_jpeg_sizeimage(struct mxc_jpeg_q_data *q);
++
+ static void _bswap16(u16 *a)
+ {
+ *a = ((*a & 0x00FF) << 8) | ((*a & 0xFF00) >> 8);
+@@ -389,7 +402,7 @@ static enum mxc_jpeg_image_format mxc_jpeg_fourcc_to_imgfmt(u32 fourcc)
+ return MXC_JPEG_GRAY;
+ case V4L2_PIX_FMT_YUYV:
+ return MXC_JPEG_YUV422;
+- case V4L2_PIX_FMT_NV12:
++ case V4L2_PIX_FMT_NV12M:
+ return MXC_JPEG_YUV420;
+ case V4L2_PIX_FMT_YUV24:
+ return MXC_JPEG_YUV444;
+@@ -493,6 +506,7 @@ static bool mxc_jpeg_alloc_slot_data(struct mxc_jpeg_dev *jpeg,
+ GFP_ATOMIC);
+ if (!cfg_stm)
+ goto err;
++ memset(cfg_stm, 0, MXC_JPEG_MAX_CFG_STREAM);
+ jpeg->slot_data[slot].cfg_stream_vaddr = cfg_stm;
+
+ skip_alloc:
+@@ -531,6 +545,18 @@ static void mxc_jpeg_free_slot_data(struct mxc_jpeg_dev *jpeg,
+ jpeg->slot_data[slot].used = false;
+ }
+
++static void mxc_jpeg_check_and_set_last_buffer(struct mxc_jpeg_ctx *ctx,
++ struct vb2_v4l2_buffer *src_buf,
++ struct vb2_v4l2_buffer *dst_buf)
++{
++ if (v4l2_m2m_is_last_draining_src_buf(ctx->fh.m2m_ctx, src_buf)) {
++ dst_buf->flags |= V4L2_BUF_FLAG_LAST;
++ v4l2_m2m_mark_stopped(ctx->fh.m2m_ctx);
++ notify_eos(ctx);
++ ctx->header_parsed = false;
++ }
++}
++
+ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv)
+ {
+ struct mxc_jpeg_dev *jpeg = priv;
+@@ -553,15 +579,8 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv)
+ dev_dbg(dev, "Irq %d on slot %d.\n", irq, slot);
+
+ ctx = v4l2_m2m_get_curr_priv(jpeg->m2m_dev);
+- if (!ctx) {
+- dev_err(dev,
+- "Instance released before the end of transaction.\n");
+- /* soft reset only resets internal state, not registers */
+- mxc_jpeg_sw_reset(reg);
+- /* clear all interrupts */
+- writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS));
++ if (WARN_ON(!ctx))
+ goto job_unlock;
+- }
+
+ if (slot != ctx->slot) {
+ /* TODO investigate when adding multi-instance support */
+@@ -575,6 +594,10 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv)
+
+ dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+ src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
++ if (!dst_buf || !src_buf) {
++ dev_err(dev, "No source or destination buffer.\n");
++ goto job_unlock;
++ }
+ jpeg_src_buf = vb2_to_mxc_buf(&src_buf->vb2_buf);
+
+ if (dec_ret & SLOT_STATUS_ENC_CONFIG_ERR) {
+@@ -601,6 +624,7 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv)
+ dev_dbg(dev, "Decoder DHT cfg finished. Start decoding...\n");
+ goto job_unlock;
+ }
++
+ if (jpeg->mode == MXC_JPEG_ENCODE) {
+ payload = readl(reg + MXC_SLOT_OFFSET(slot, SLOT_BUF_PTR));
+ vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
+@@ -628,7 +652,9 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv)
+ buf_state = VB2_BUF_STATE_DONE;
+
+ buffers_done:
++ mxc_jpeg_disable_irq(reg, ctx->slot);
+ jpeg->slot_data[slot].used = false; /* unused, but don't free */
++ mxc_jpeg_check_and_set_last_buffer(ctx, src_buf, dst_buf);
+ v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+ v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+ v4l2_m2m_buf_done(src_buf, buf_state);
+@@ -654,7 +680,7 @@ static int mxc_jpeg_fixup_sof(struct mxc_jpeg_sof *sof,
+ _bswap16(&sof->width);
+
+ switch (fourcc) {
+- case V4L2_PIX_FMT_NV12:
++ case V4L2_PIX_FMT_NV12M:
+ sof->components_no = 3;
+ sof->comp[0].v = 0x2;
+ sof->comp[0].h = 0x2;
+@@ -690,7 +716,7 @@ static int mxc_jpeg_fixup_sos(struct mxc_jpeg_sos *sos,
+ u8 *sof_u8 = (u8 *)sos;
+
+ switch (fourcc) {
+- case V4L2_PIX_FMT_NV12:
++ case V4L2_PIX_FMT_NV12M:
+ sos->components_no = 3;
+ break;
+ case V4L2_PIX_FMT_YUYV:
+@@ -724,7 +750,13 @@ static unsigned int mxc_jpeg_setup_cfg_stream(void *cfg_stream_vaddr,
+ u32 fourcc,
+ u16 w, u16 h)
+ {
+- unsigned int offset = 0;
++ /*
++ * There is a hardware issue that first 128 bytes of configuration data
++ * can't be loaded correctly.
++ * To avoid this issue, we need to write the configuration from
++ * an offset which should be no less than 0x80 (128 bytes).
++ */
++ unsigned int offset = 0x80;
+ u8 *cfg = (u8 *)cfg_stream_vaddr;
+ struct mxc_jpeg_sof *sof;
+ struct mxc_jpeg_sos *sos;
+@@ -851,8 +883,8 @@ static void mxc_jpeg_config_enc_desc(struct vb2_buffer *out_buf,
+ jpeg->slot_data[slot].cfg_stream_size =
+ mxc_jpeg_setup_cfg_stream(cfg_stream_vaddr,
+ q_data->fmt->fourcc,
+- q_data->w_adjusted,
+- q_data->h_adjusted);
++ q_data->w,
++ q_data->h);
+
+ /* chain the config descriptor with the encoding descriptor */
+ cfg_desc->next_descpt_ptr = desc_handle | MXC_NXT_DESCPT_EN;
+@@ -890,6 +922,67 @@ static void mxc_jpeg_config_enc_desc(struct vb2_buffer *out_buf,
+ mxc_jpeg_set_desc(cfg_desc_handle, reg, slot);
+ }
+
++static bool mxc_jpeg_source_change(struct mxc_jpeg_ctx *ctx,
++ struct mxc_jpeg_src_buf *jpeg_src_buf)
++{
++ struct device *dev = ctx->mxc_jpeg->dev;
++ struct mxc_jpeg_q_data *q_data_cap;
++
++ if (!jpeg_src_buf->fmt)
++ return false;
++
++ q_data_cap = mxc_jpeg_get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
++ if (q_data_cap->fmt != jpeg_src_buf->fmt ||
++ q_data_cap->w != jpeg_src_buf->w ||
++ q_data_cap->h != jpeg_src_buf->h) {
++ dev_dbg(dev, "Detected jpeg res=(%dx%d)->(%dx%d), pixfmt=%c%c%c%c\n",
++ q_data_cap->w, q_data_cap->h,
++ jpeg_src_buf->w, jpeg_src_buf->h,
++ (jpeg_src_buf->fmt->fourcc & 0xff),
++ (jpeg_src_buf->fmt->fourcc >> 8) & 0xff,
++ (jpeg_src_buf->fmt->fourcc >> 16) & 0xff,
++ (jpeg_src_buf->fmt->fourcc >> 24) & 0xff);
++
++ /*
++ * set-up the capture queue with the pixelformat and resolution
++ * detected from the jpeg output stream
++ */
++ q_data_cap->w = jpeg_src_buf->w;
++ q_data_cap->h = jpeg_src_buf->h;
++ q_data_cap->fmt = jpeg_src_buf->fmt;
++ q_data_cap->w_adjusted = q_data_cap->w;
++ q_data_cap->h_adjusted = q_data_cap->h;
++
++ /*
++ * align up the resolution for CAST IP,
++ * but leave the buffer resolution unchanged
++ */
++ v4l_bound_align_image(&q_data_cap->w_adjusted,
++ q_data_cap->w_adjusted, /* adjust up */
++ MXC_JPEG_MAX_WIDTH,
++ q_data_cap->fmt->h_align,
++ &q_data_cap->h_adjusted,
++ q_data_cap->h_adjusted, /* adjust up */
++ MXC_JPEG_MAX_HEIGHT,
++ 0,
++ 0);
++
++ /* setup bytesperline/sizeimage for capture queue */
++ mxc_jpeg_bytesperline(q_data_cap, jpeg_src_buf->fmt->precision);
++ mxc_jpeg_sizeimage(q_data_cap);
++ notify_src_chg(ctx);
++ ctx->source_change = 1;
++ }
++ return ctx->source_change ? true : false;
++}
++
++static int mxc_jpeg_job_ready(void *priv)
++{
++ struct mxc_jpeg_ctx *ctx = priv;
++
++ return ctx->source_change ? 0 : 1;
++}
++
+ static void mxc_jpeg_device_run(void *priv)
+ {
+ struct mxc_jpeg_ctx *ctx = priv;
+@@ -921,8 +1014,14 @@ static void mxc_jpeg_device_run(void *priv)
+ v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
+
+ jpeg_src_buf = vb2_to_mxc_buf(&src_buf->vb2_buf);
++ if (q_data_cap->fmt->colplanes != dst_buf->vb2_buf.num_planes) {
++ dev_err(dev, "Capture format %s has %d planes, but capture buffer has %d planes\n",
++ q_data_cap->fmt->name, q_data_cap->fmt->colplanes,
++ dst_buf->vb2_buf.num_planes);
++ jpeg_src_buf->jpeg_parse_error = true;
++ }
+ if (jpeg_src_buf->jpeg_parse_error) {
+- jpeg->slot_data[ctx->slot].used = false;
++ mxc_jpeg_check_and_set_last_buffer(ctx, src_buf, dst_buf);
+ v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+ v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+ v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
+@@ -932,6 +1031,13 @@ static void mxc_jpeg_device_run(void *priv)
+
+ return;
+ }
++ if (ctx->mxc_jpeg->mode == MXC_JPEG_DECODE) {
++ if (ctx->source_change || mxc_jpeg_source_change(ctx, jpeg_src_buf)) {
++ spin_unlock_irqrestore(&ctx->mxc_jpeg->hw_lock, flags);
++ v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
++ return;
++ }
++ }
+
+ /*
+ * TODO: this reset should be removed, once we figure out
+@@ -976,24 +1082,28 @@ static int mxc_jpeg_decoder_cmd(struct file *file, void *priv,
+ {
+ struct v4l2_fh *fh = file->private_data;
+ struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(fh);
+- struct device *dev = ctx->mxc_jpeg->dev;
+ int ret;
+
+ ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, cmd);
+ if (ret < 0)
+ return ret;
+
+- if (cmd->cmd == V4L2_DEC_CMD_STOP) {
+- dev_dbg(dev, "Received V4L2_DEC_CMD_STOP");
+- if (v4l2_m2m_num_src_bufs_ready(fh->m2m_ctx) == 0) {
+- /* No more src bufs, notify app EOS */
+- notify_eos(ctx);
+- } else {
+- /* will send EOS later*/
+- ctx->stopping = 1;
+- }
++ if (!vb2_is_streaming(v4l2_m2m_get_src_vq(fh->m2m_ctx)))
++ return 0;
++
++ ret = v4l2_m2m_ioctl_decoder_cmd(file, priv, cmd);
++ if (ret < 0)
++ return ret;
++
++ if (cmd->cmd == V4L2_DEC_CMD_STOP &&
++ v4l2_m2m_has_stopped(fh->m2m_ctx)) {
++ notify_eos(ctx);
++ ctx->header_parsed = false;
+ }
+
++ if (cmd->cmd == V4L2_DEC_CMD_START &&
++ v4l2_m2m_has_stopped(fh->m2m_ctx))
++ vb2_clear_last_buffer_dequeued(&fh->m2m_ctx->cap_q_ctx.q);
+ return 0;
+ }
+
+@@ -1002,23 +1112,27 @@ static int mxc_jpeg_encoder_cmd(struct file *file, void *priv,
+ {
+ struct v4l2_fh *fh = file->private_data;
+ struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(fh);
+- struct device *dev = ctx->mxc_jpeg->dev;
+ int ret;
+
+ ret = v4l2_m2m_ioctl_try_encoder_cmd(file, fh, cmd);
+ if (ret < 0)
+ return ret;
+
+- if (cmd->cmd == V4L2_ENC_CMD_STOP) {
+- dev_dbg(dev, "Received V4L2_ENC_CMD_STOP");
+- if (v4l2_m2m_num_src_bufs_ready(fh->m2m_ctx) == 0) {
+- /* No more src bufs, notify app EOS */
+- notify_eos(ctx);
+- } else {
+- /* will send EOS later*/
+- ctx->stopping = 1;
+- }
+- }
++ if (!vb2_is_streaming(v4l2_m2m_get_src_vq(fh->m2m_ctx)) ||
++ !vb2_is_streaming(v4l2_m2m_get_dst_vq(fh->m2m_ctx)))
++ return 0;
++
++ ret = v4l2_m2m_ioctl_encoder_cmd(file, fh, cmd);
++ if (ret < 0)
++ return 0;
++
++ if (cmd->cmd == V4L2_ENC_CMD_STOP &&
++ v4l2_m2m_has_stopped(fh->m2m_ctx))
++ notify_eos(ctx);
++
++ if (cmd->cmd == V4L2_ENC_CMD_START &&
++ v4l2_m2m_has_stopped(fh->m2m_ctx))
++ vb2_clear_last_buffer_dequeued(&fh->m2m_ctx->cap_q_ctx.q);
+
+ return 0;
+ }
+@@ -1031,16 +1145,28 @@ static int mxc_jpeg_queue_setup(struct vb2_queue *q,
+ {
+ struct mxc_jpeg_ctx *ctx = vb2_get_drv_priv(q);
+ struct mxc_jpeg_q_data *q_data = NULL;
++ struct mxc_jpeg_q_data tmp_q;
+ int i;
+
+ q_data = mxc_jpeg_get_q_data(ctx, q->type);
+ if (!q_data)
+ return -EINVAL;
+
++ tmp_q.fmt = q_data->fmt;
++ tmp_q.w = q_data->w_adjusted;
++ tmp_q.h = q_data->h_adjusted;
++ for (i = 0; i < MXC_JPEG_MAX_PLANES; i++) {
++ tmp_q.bytesperline[i] = q_data->bytesperline[i];
++ tmp_q.sizeimage[i] = q_data->sizeimage[i];
++ }
++ mxc_jpeg_sizeimage(&tmp_q);
++ for (i = 0; i < MXC_JPEG_MAX_PLANES; i++)
++ tmp_q.sizeimage[i] = max(tmp_q.sizeimage[i], q_data->sizeimage[i]);
++
+ /* Handle CREATE_BUFS situation - *nplanes != 0 */
+ if (*nplanes) {
+ for (i = 0; i < *nplanes; i++) {
+- if (sizes[i] < q_data->sizeimage[i])
++ if (sizes[i] < tmp_q.sizeimage[i])
+ return -EINVAL;
+ }
+ return 0;
+@@ -1049,7 +1175,7 @@ static int mxc_jpeg_queue_setup(struct vb2_queue *q,
+ /* Handle REQBUFS situation */
+ *nplanes = q_data->fmt->colplanes;
+ for (i = 0; i < *nplanes; i++)
+- sizes[i] = q_data->sizeimage[i];
++ sizes[i] = tmp_q.sizeimage[i];
+
+ return 0;
+ }
+@@ -1058,10 +1184,21 @@ static int mxc_jpeg_start_streaming(struct vb2_queue *q, unsigned int count)
+ {
+ struct mxc_jpeg_ctx *ctx = vb2_get_drv_priv(q);
+ struct mxc_jpeg_q_data *q_data = mxc_jpeg_get_q_data(ctx, q->type);
++ int ret;
+
++ v4l2_m2m_update_start_streaming_state(ctx->fh.m2m_ctx, q);
++
++ if (ctx->mxc_jpeg->mode == MXC_JPEG_DECODE && V4L2_TYPE_IS_CAPTURE(q->type))
++ ctx->source_change = 0;
+ dev_dbg(ctx->mxc_jpeg->dev, "Start streaming ctx=%p", ctx);
+ q_data->sequence = 0;
+
++ ret = pm_runtime_resume_and_get(ctx->mxc_jpeg->dev);
++ if (ret < 0) {
++ dev_err(ctx->mxc_jpeg->dev, "Failed to power up jpeg\n");
++ return ret;
++ }
++
+ return 0;
+ }
+
+@@ -1079,9 +1216,18 @@ static void mxc_jpeg_stop_streaming(struct vb2_queue *q)
+ else
+ vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+ if (!vbuf)
+- return;
++ break;
+ v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
+ }
++
++ v4l2_m2m_update_stop_streaming_state(ctx->fh.m2m_ctx, q);
++ if (V4L2_TYPE_IS_OUTPUT(q->type) &&
++ v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) {
++ notify_eos(ctx);
++ ctx->header_parsed = false;
++ }
++
++ pm_runtime_put_sync(&ctx->mxc_jpeg->pdev->dev);
+ }
+
+ static int mxc_jpeg_valid_comp_id(struct device *dev,
+@@ -1121,14 +1267,17 @@ static u32 mxc_jpeg_get_image_format(struct device *dev,
+
+ for (i = 0; i < MXC_JPEG_NUM_FORMATS; i++)
+ if (mxc_formats[i].subsampling == header->frame.subsampling &&
+- mxc_formats[i].nc == header->frame.num_components) {
++ mxc_formats[i].nc == header->frame.num_components &&
++ mxc_formats[i].precision == header->frame.precision) {
+ fourcc = mxc_formats[i].fourcc;
+ break;
+ }
+ if (fourcc == 0) {
+- dev_err(dev, "Could not identify image format nc=%d, subsampling=%d\n",
++ dev_err(dev,
++ "Could not identify image format nc=%d, subsampling=%d, precision=%d\n",
+ header->frame.num_components,
+- header->frame.subsampling);
++ header->frame.subsampling,
++ header->frame.precision);
+ return fourcc;
+ }
+ /*
+@@ -1146,26 +1295,29 @@ static u32 mxc_jpeg_get_image_format(struct device *dev,
+ return fourcc;
+ }
+
+-static void mxc_jpeg_bytesperline(struct mxc_jpeg_q_data *q,
+- u32 precision)
++static void mxc_jpeg_bytesperline(struct mxc_jpeg_q_data *q, u32 precision)
+ {
+ /* Bytes distance between the leftmost pixels in two adjacent lines */
+ if (q->fmt->fourcc == V4L2_PIX_FMT_JPEG) {
+ /* bytesperline unused for compressed formats */
+ q->bytesperline[0] = 0;
+ q->bytesperline[1] = 0;
+- } else if (q->fmt->fourcc == V4L2_PIX_FMT_NV12) {
++ } else if (q->fmt->subsampling == V4L2_JPEG_CHROMA_SUBSAMPLING_420) {
+ /* When the image format is planar the bytesperline value
+ * applies to the first plane and is divided by the same factor
+ * as the width field for the other planes
+ */
+- q->bytesperline[0] = q->w * (precision / 8) *
+- (q->fmt->depth / 8);
++ q->bytesperline[0] = q->w * DIV_ROUND_UP(precision, 8);
+ q->bytesperline[1] = q->bytesperline[0];
++ } else if (q->fmt->subsampling == V4L2_JPEG_CHROMA_SUBSAMPLING_422) {
++ q->bytesperline[0] = q->w * DIV_ROUND_UP(precision, 8) * 2;
++ q->bytesperline[1] = 0;
++ } else if (q->fmt->subsampling == V4L2_JPEG_CHROMA_SUBSAMPLING_444) {
++ q->bytesperline[0] = q->w * DIV_ROUND_UP(precision, 8) * q->fmt->nc;
++ q->bytesperline[1] = 0;
+ } else {
+- /* single plane formats */
+- q->bytesperline[0] = q->w * (precision / 8) *
+- (q->fmt->depth / 8);
++ /* grayscale */
++ q->bytesperline[0] = q->w * DIV_ROUND_UP(precision, 8);
+ q->bytesperline[1] = 0;
+ }
+ }
+@@ -1186,22 +1338,22 @@ static void mxc_jpeg_sizeimage(struct mxc_jpeg_q_data *q)
+ } else {
+ q->sizeimage[0] = q->bytesperline[0] * q->h;
+ q->sizeimage[1] = 0;
+- if (q->fmt->fourcc == V4L2_PIX_FMT_NV12)
++ if (q->fmt->fourcc == V4L2_PIX_FMT_NV12M)
+ q->sizeimage[1] = q->sizeimage[0] / 2;
+ }
+ }
+
+-static int mxc_jpeg_parse(struct mxc_jpeg_ctx *ctx,
+- u8 *src_addr, u32 size, bool *dht_needed)
++static int mxc_jpeg_parse(struct mxc_jpeg_ctx *ctx, struct vb2_buffer *vb)
+ {
+ struct device *dev = ctx->mxc_jpeg->dev;
+- struct mxc_jpeg_q_data *q_data_out, *q_data_cap;
+- enum v4l2_buf_type cap_type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+- bool src_chg = false;
++ struct mxc_jpeg_q_data *q_data_out;
+ u32 fourcc;
+ struct v4l2_jpeg_header header;
+ struct mxc_jpeg_sof *psof = NULL;
+ struct mxc_jpeg_sos *psos = NULL;
++ struct mxc_jpeg_src_buf *jpeg_src_buf = vb2_to_mxc_buf(vb);
++ u8 *src_addr = (u8 *)vb2_plane_vaddr(vb, 0);
++ u32 size = vb2_get_plane_payload(vb, 0);
+ int ret;
+
+ memset(&header, 0, sizeof(header));
+@@ -1212,7 +1364,7 @@ static int mxc_jpeg_parse(struct mxc_jpeg_ctx *ctx,
+ }
+
+ /* if DHT marker present, no need to inject default one */
+- *dht_needed = (header.num_dht == 0);
++ jpeg_src_buf->dht_needed = (header.num_dht == 0);
+
+ q_data_out = mxc_jpeg_get_q_data(ctx,
+ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
+@@ -1220,21 +1372,15 @@ static int mxc_jpeg_parse(struct mxc_jpeg_ctx *ctx,
+ dev_warn(dev, "Invalid user resolution 0x0");
+ dev_warn(dev, "Keeping resolution from JPEG: %dx%d",
+ header.frame.width, header.frame.height);
+- q_data_out->w = header.frame.width;
+- q_data_out->h = header.frame.height;
+ } else if (header.frame.width != q_data_out->w ||
+ header.frame.height != q_data_out->h) {
+ dev_err(dev,
+ "Resolution mismatch: %dx%d (JPEG) versus %dx%d(user)",
+ header.frame.width, header.frame.height,
+ q_data_out->w, q_data_out->h);
+- return -EINVAL;
+- }
+- if (header.frame.width % 8 != 0 || header.frame.height % 8 != 0) {
+- dev_err(dev, "JPEG width or height not multiple of 8: %dx%d\n",
+- header.frame.width, header.frame.height);
+- return -EINVAL;
+ }
++ q_data_out->w = header.frame.width;
++ q_data_out->h = header.frame.height;
+ if (header.frame.width > MXC_JPEG_MAX_WIDTH ||
+ header.frame.height > MXC_JPEG_MAX_HEIGHT) {
+ dev_err(dev, "JPEG width or height should be <= 8192: %dx%d\n",
+@@ -1262,51 +1408,13 @@ static int mxc_jpeg_parse(struct mxc_jpeg_ctx *ctx,
+ if (fourcc == 0)
+ return -EINVAL;
+
+- /*
+- * set-up the capture queue with the pixelformat and resolution
+- * detected from the jpeg output stream
+- */
+- q_data_cap = mxc_jpeg_get_q_data(ctx, cap_type);
+- if (q_data_cap->w != header.frame.width ||
+- q_data_cap->h != header.frame.height)
+- src_chg = true;
+- q_data_cap->w = header.frame.width;
+- q_data_cap->h = header.frame.height;
+- q_data_cap->fmt = mxc_jpeg_find_format(ctx, fourcc);
+- q_data_cap->w_adjusted = q_data_cap->w;
+- q_data_cap->h_adjusted = q_data_cap->h;
+- /*
+- * align up the resolution for CAST IP,
+- * but leave the buffer resolution unchanged
+- */
+- v4l_bound_align_image(&q_data_cap->w_adjusted,
+- q_data_cap->w_adjusted, /* adjust up */
+- MXC_JPEG_MAX_WIDTH,
+- q_data_cap->fmt->h_align,
+- &q_data_cap->h_adjusted,
+- q_data_cap->h_adjusted, /* adjust up */
+- MXC_JPEG_MAX_HEIGHT,
+- q_data_cap->fmt->v_align,
+- 0);
+- dev_dbg(dev, "Detected jpeg res=(%dx%d)->(%dx%d), pixfmt=%c%c%c%c\n",
+- q_data_cap->w, q_data_cap->h,
+- q_data_cap->w_adjusted, q_data_cap->h_adjusted,
+- (fourcc & 0xff),
+- (fourcc >> 8) & 0xff,
+- (fourcc >> 16) & 0xff,
+- (fourcc >> 24) & 0xff);
+-
+- /* setup bytesperline/sizeimage for capture queue */
+- mxc_jpeg_bytesperline(q_data_cap, header.frame.precision);
+- mxc_jpeg_sizeimage(q_data_cap);
++ jpeg_src_buf->fmt = mxc_jpeg_find_format(ctx, fourcc);
++ jpeg_src_buf->w = header.frame.width;
++ jpeg_src_buf->h = header.frame.height;
++ ctx->header_parsed = true;
+
+- /*
+- * if the CAPTURE format was updated with new values, regardless of
+- * whether they match the values set by the client or not, signal
+- * a source change event
+- */
+- if (src_chg)
+- notify_src_chg(ctx);
++ if (!v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx))
++ mxc_jpeg_source_change(ctx, jpeg_src_buf);
+
+ return 0;
+ }
+@@ -1318,6 +1426,20 @@ static void mxc_jpeg_buf_queue(struct vb2_buffer *vb)
+ struct mxc_jpeg_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+ struct mxc_jpeg_src_buf *jpeg_src_buf;
+
++ if (V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type) &&
++ vb2_is_streaming(vb->vb2_queue) &&
++ v4l2_m2m_dst_buf_is_last(ctx->fh.m2m_ctx)) {
++ struct mxc_jpeg_q_data *q_data;
++
++ q_data = mxc_jpeg_get_q_data(ctx, vb->vb2_queue->type);
++ vbuf->field = V4L2_FIELD_NONE;
++ vbuf->sequence = q_data->sequence++;
++ v4l2_m2m_last_buffer_done(ctx->fh.m2m_ctx, vbuf);
++ notify_eos(ctx);
++ ctx->header_parsed = false;
++ return;
++ }
++
+ if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
+ goto end;
+
+@@ -1327,10 +1449,7 @@ static void mxc_jpeg_buf_queue(struct vb2_buffer *vb)
+
+ jpeg_src_buf = vb2_to_mxc_buf(vb);
+ jpeg_src_buf->jpeg_parse_error = false;
+- ret = mxc_jpeg_parse(ctx,
+- (u8 *)vb2_plane_vaddr(vb, 0),
+- vb2_get_plane_payload(vb, 0),
+- &jpeg_src_buf->dht_needed);
++ ret = mxc_jpeg_parse(ctx, vb);
+ if (ret)
+ jpeg_src_buf->jpeg_parse_error = true;
+
+@@ -1370,6 +1489,10 @@ static int mxc_jpeg_buf_prepare(struct vb2_buffer *vb)
+ }
+ vb2_set_plane_payload(vb, i, sizeimage);
+ }
++ if (V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type)) {
++ vb2_set_plane_payload(vb, 0, 0);
++ vb2_set_plane_payload(vb, 1, 0);
++ }
+ return 0;
+ }
+
+@@ -1439,7 +1562,7 @@ static void mxc_jpeg_set_default_params(struct mxc_jpeg_ctx *ctx)
+ q[i]->h = MXC_JPEG_DEFAULT_HEIGHT;
+ q[i]->w_adjusted = MXC_JPEG_DEFAULT_WIDTH;
+ q[i]->h_adjusted = MXC_JPEG_DEFAULT_HEIGHT;
+- mxc_jpeg_bytesperline(q[i], 8);
++ mxc_jpeg_bytesperline(q[i], q[i]->fmt->precision);
+ mxc_jpeg_sizeimage(q[i]);
+ }
+ }
+@@ -1514,26 +1637,42 @@ static int mxc_jpeg_enum_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f)
+ {
+ struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(priv);
++ struct mxc_jpeg_q_data *q_data = mxc_jpeg_get_q_data(ctx, f->type);
+
+- if (ctx->mxc_jpeg->mode == MXC_JPEG_ENCODE)
++ if (ctx->mxc_jpeg->mode == MXC_JPEG_ENCODE) {
+ return enum_fmt(mxc_formats, MXC_JPEG_NUM_FORMATS, f,
+ MXC_JPEG_FMT_TYPE_ENC);
+- else
++ } else if (!ctx->header_parsed) {
+ return enum_fmt(mxc_formats, MXC_JPEG_NUM_FORMATS, f,
+ MXC_JPEG_FMT_TYPE_RAW);
++ } else {
++ /* For the decoder CAPTURE queue, only enumerate the raw formats
++ * supported for the format currently active on OUTPUT
++ * (more precisely what was propagated on capture queue
++ * after jpeg parse on the output buffer)
++ */
++ if (f->index)
++ return -EINVAL;
++ f->pixelformat = q_data->fmt->fourcc;
++ strscpy(f->description, q_data->fmt->name, sizeof(f->description));
++ return 0;
++ }
+ }
+
+ static int mxc_jpeg_enum_fmt_vid_out(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f)
+ {
+ struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(priv);
++ u32 type = ctx->mxc_jpeg->mode == MXC_JPEG_DECODE ? MXC_JPEG_FMT_TYPE_ENC :
++ MXC_JPEG_FMT_TYPE_RAW;
++ int ret;
+
++ ret = enum_fmt(mxc_formats, MXC_JPEG_NUM_FORMATS, f, type);
++ if (ret)
++ return ret;
+ if (ctx->mxc_jpeg->mode == MXC_JPEG_DECODE)
+- return enum_fmt(mxc_formats, MXC_JPEG_NUM_FORMATS, f,
+- MXC_JPEG_FMT_TYPE_ENC);
+- else
+- return enum_fmt(mxc_formats, MXC_JPEG_NUM_FORMATS, f,
+- MXC_JPEG_FMT_TYPE_RAW);
++ f->flags = V4L2_FMT_FLAG_DYN_RESOLUTION;
++ return 0;
+ }
+
+ static int mxc_jpeg_try_fmt(struct v4l2_format *f, const struct mxc_jpeg_fmt *fmt,
+@@ -1553,22 +1692,17 @@ static int mxc_jpeg_try_fmt(struct v4l2_format *f, const struct mxc_jpeg_fmt *fm
+ pix_mp->num_planes = fmt->colplanes;
+ pix_mp->pixelformat = fmt->fourcc;
+
+- /*
+- * use MXC_JPEG_H_ALIGN instead of fmt->v_align, for vertical
+- * alignment, to loosen up the alignment to multiple of 8,
+- * otherwise NV12-1080p fails as 1080 is not a multiple of 16
+- */
++ pix_mp->width = w;
++ pix_mp->height = h;
+ v4l_bound_align_image(&w,
+- MXC_JPEG_MIN_WIDTH,
+- w, /* adjust downwards*/
++ w, /* adjust upwards*/
++ MXC_JPEG_MAX_WIDTH,
+ fmt->h_align,
+ &h,
+- MXC_JPEG_MIN_HEIGHT,
+- h, /* adjust downwards*/
+- MXC_JPEG_H_ALIGN,
++ h, /* adjust upwards*/
++ MXC_JPEG_MAX_HEIGHT,
++ 0,
+ 0);
+- pix_mp->width = w; /* negotiate the width */
+- pix_mp->height = h; /* negotiate the height */
+
+ /* get user input into the tmp_q */
+ tmp_q.w = w;
+@@ -1581,7 +1715,7 @@ static int mxc_jpeg_try_fmt(struct v4l2_format *f, const struct mxc_jpeg_fmt *fm
+ }
+
+ /* calculate bytesperline & sizeimage into the tmp_q */
+- mxc_jpeg_bytesperline(&tmp_q, 8);
++ mxc_jpeg_bytesperline(&tmp_q, fmt->precision);
+ mxc_jpeg_sizeimage(&tmp_q);
+
+ /* adjust user format according to our calculations */
+@@ -1694,35 +1828,19 @@ static int mxc_jpeg_s_fmt(struct mxc_jpeg_ctx *ctx,
+
+ q_data->w_adjusted = q_data->w;
+ q_data->h_adjusted = q_data->h;
+- if (jpeg->mode == MXC_JPEG_DECODE) {
+- /*
+- * align up the resolution for CAST IP,
+- * but leave the buffer resolution unchanged
+- */
+- v4l_bound_align_image(&q_data->w_adjusted,
+- q_data->w_adjusted, /* adjust upwards */
+- MXC_JPEG_MAX_WIDTH,
+- q_data->fmt->h_align,
+- &q_data->h_adjusted,
+- q_data->h_adjusted, /* adjust upwards */
+- MXC_JPEG_MAX_HEIGHT,
+- q_data->fmt->v_align,
+- 0);
+- } else {
+- /*
+- * align down the resolution for CAST IP,
+- * but leave the buffer resolution unchanged
+- */
+- v4l_bound_align_image(&q_data->w_adjusted,
+- MXC_JPEG_MIN_WIDTH,
+- q_data->w_adjusted, /* adjust downwards*/
+- q_data->fmt->h_align,
+- &q_data->h_adjusted,
+- MXC_JPEG_MIN_HEIGHT,
+- q_data->h_adjusted, /* adjust downwards*/
+- q_data->fmt->v_align,
+- 0);
+- }
++ /*
++ * align up the resolution for CAST IP,
++ * but leave the buffer resolution unchanged
++ */
++ v4l_bound_align_image(&q_data->w_adjusted,
++ q_data->w_adjusted, /* adjust upwards */
++ MXC_JPEG_MAX_WIDTH,
++ q_data->fmt->h_align,
++ &q_data->h_adjusted,
++ q_data->h_adjusted, /* adjust upwards */
++ MXC_JPEG_MAX_HEIGHT,
++ q_data->fmt->v_align,
++ 0);
+
+ for (i = 0; i < pix_mp->num_planes; i++) {
+ q_data->bytesperline[i] = pix_mp->plane_fmt[i].bytesperline;
+@@ -1804,27 +1922,6 @@ static int mxc_jpeg_subscribe_event(struct v4l2_fh *fh,
+ }
+ }
+
+-static int mxc_jpeg_dqbuf(struct file *file, void *priv,
+- struct v4l2_buffer *buf)
+-{
+- struct v4l2_fh *fh = file->private_data;
+- struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(priv);
+- struct device *dev = ctx->mxc_jpeg->dev;
+- int num_src_ready = v4l2_m2m_num_src_bufs_ready(fh->m2m_ctx);
+- int ret;
+-
+- dev_dbg(dev, "DQBUF type=%d, index=%d", buf->type, buf->index);
+- if (ctx->stopping == 1 && num_src_ready == 0) {
+- /* No more src bufs, notify app EOS */
+- notify_eos(ctx);
+- ctx->stopping = 0;
+- }
+-
+- ret = v4l2_m2m_dqbuf(file, fh->m2m_ctx, buf);
+-
+- return ret;
+-}
+-
+ static const struct v4l2_ioctl_ops mxc_jpeg_ioctl_ops = {
+ .vidioc_querycap = mxc_jpeg_querycap,
+ .vidioc_enum_fmt_vid_cap = mxc_jpeg_enum_fmt_vid_cap,
+@@ -1848,7 +1945,7 @@ static const struct v4l2_ioctl_ops mxc_jpeg_ioctl_ops = {
+ .vidioc_encoder_cmd = mxc_jpeg_encoder_cmd,
+
+ .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
+- .vidioc_dqbuf = mxc_jpeg_dqbuf,
++ .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
+
+ .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
+ .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
+@@ -1891,6 +1988,7 @@ static const struct v4l2_file_operations mxc_jpeg_fops = {
+ };
+
+ static const struct v4l2_m2m_ops mxc_jpeg_m2m_ops = {
++ .job_ready = mxc_jpeg_job_ready,
+ .device_run = mxc_jpeg_device_run,
+ };
+
+@@ -1941,8 +2039,7 @@ static int mxc_jpeg_attach_pm_domains(struct mxc_jpeg_dev *jpeg)
+
+ jpeg->pd_link[i] = device_link_add(dev, jpeg->pd_dev[i],
+ DL_FLAG_STATELESS |
+- DL_FLAG_PM_RUNTIME |
+- DL_FLAG_RPM_ACTIVE);
++ DL_FLAG_PM_RUNTIME);
+ if (!jpeg->pd_link[i]) {
+ ret = -EINVAL;
+ goto fail;
+@@ -2007,6 +2104,14 @@ static int mxc_jpeg_probe(struct platform_device *pdev)
+ jpeg->dev = dev;
+ jpeg->mode = mode;
+
++ /* Get clocks */
++ ret = devm_clk_bulk_get_all(&pdev->dev, &jpeg->clks);
++ if (ret < 0) {
++ dev_err(dev, "failed to get clock\n");
++ goto err_clk;
++ }
++ jpeg->num_clks = ret;
++
+ ret = mxc_jpeg_attach_pm_domains(jpeg);
+ if (ret < 0) {
+ dev_err(dev, "failed to attach power domains %d\n", ret);
+@@ -2075,6 +2180,7 @@ static int mxc_jpeg_probe(struct platform_device *pdev)
+ jpeg->dec_vdev->minor);
+
+ platform_set_drvdata(pdev, jpeg);
++ pm_runtime_enable(dev);
+
+ return 0;
+
+@@ -2088,10 +2194,43 @@ err_m2m:
+ v4l2_device_unregister(&jpeg->v4l2_dev);
+
+ err_register:
++ mxc_jpeg_detach_pm_domains(jpeg);
++
+ err_irq:
++err_clk:
+ return ret;
+ }
+
++#ifdef CONFIG_PM
++static int mxc_jpeg_runtime_resume(struct device *dev)
++{
++ struct mxc_jpeg_dev *jpeg = dev_get_drvdata(dev);
++ int ret;
++
++ ret = clk_bulk_prepare_enable(jpeg->num_clks, jpeg->clks);
++ if (ret < 0) {
++ dev_err(dev, "failed to enable clock\n");
++ return ret;
++ }
++
++ return 0;
++}
++
++static int mxc_jpeg_runtime_suspend(struct device *dev)
++{
++ struct mxc_jpeg_dev *jpeg = dev_get_drvdata(dev);
++
++ clk_bulk_disable_unprepare(jpeg->num_clks, jpeg->clks);
++
++ return 0;
++}
++#endif
++
++static const struct dev_pm_ops mxc_jpeg_pm_ops = {
++ SET_RUNTIME_PM_OPS(mxc_jpeg_runtime_suspend,
++ mxc_jpeg_runtime_resume, NULL)
++};
++
+ static int mxc_jpeg_remove(struct platform_device *pdev)
+ {
+ unsigned int slot;
+@@ -2100,6 +2239,7 @@ static int mxc_jpeg_remove(struct platform_device *pdev)
+ for (slot = 0; slot < MXC_MAX_SLOTS; slot++)
+ mxc_jpeg_free_slot_data(jpeg, slot);
+
++ pm_runtime_disable(&pdev->dev);
+ video_unregister_device(jpeg->dec_vdev);
+ v4l2_m2m_release(jpeg->m2m_dev);
+ v4l2_device_unregister(&jpeg->v4l2_dev);
+@@ -2116,6 +2256,7 @@ static struct platform_driver mxc_jpeg_driver = {
+ .driver = {
+ .name = "mxc-jpeg",
+ .of_match_table = mxc_jpeg_match,
++ .pm = &mxc_jpeg_pm_ops,
+ },
+ };
+ module_platform_driver(mxc_jpeg_driver);
+diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.h b/drivers/media/platform/imx-jpeg/mxc-jpeg.h
+index 4c210852e876c..495000800d552 100644
+--- a/drivers/media/platform/imx-jpeg/mxc-jpeg.h
++++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.h
+@@ -49,6 +49,7 @@ enum mxc_jpeg_mode {
+ * @h_align: horizontal alignment order (align to 2^h_align)
+ * @v_align: vertical alignment order (align to 2^v_align)
+ * @flags: flags describing format applicability
++ * @precision: jpeg sample precision
+ */
+ struct mxc_jpeg_fmt {
+ const char *name;
+@@ -60,6 +61,7 @@ struct mxc_jpeg_fmt {
+ int h_align;
+ int v_align;
+ u32 flags;
++ u8 precision;
+ };
+
+ struct mxc_jpeg_desc {
+@@ -90,8 +92,9 @@ struct mxc_jpeg_ctx {
+ struct mxc_jpeg_q_data cap_q;
+ struct v4l2_fh fh;
+ enum mxc_jpeg_enc_state enc_state;
+- unsigned int stopping;
+ unsigned int slot;
++ unsigned int source_change;
++ bool header_parsed;
+ };
+
+ struct mxc_jpeg_slot_data {
+@@ -109,6 +112,8 @@ struct mxc_jpeg_dev {
+ spinlock_t hw_lock; /* hardware access lock */
+ unsigned int mode;
+ struct mutex lock; /* v4l2 ioctls serialization */
++ struct clk_bulk_data *clks;
++ int num_clks;
+ struct platform_device *pdev;
+ struct device *dev;
+ void __iomem *base_reg;
+diff --git a/drivers/media/platform/imx-pxp.c b/drivers/media/platform/imx-pxp.c
+index 4321edc0c23d9..8e9c6fee75a48 100644
+--- a/drivers/media/platform/imx-pxp.c
++++ b/drivers/media/platform/imx-pxp.c
+@@ -1661,6 +1661,8 @@ static int pxp_probe(struct platform_device *pdev)
+ if (irq < 0)
+ return irq;
+
++ spin_lock_init(&dev->irqlock);
++
+ ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, pxp_irq_handler,
+ IRQF_ONESHOT, dev_name(&pdev->dev), dev);
+ if (ret < 0) {
+@@ -1678,8 +1680,6 @@ static int pxp_probe(struct platform_device *pdev)
+ goto err_clk;
+ }
+
+- spin_lock_init(&dev->irqlock);
+-
+ ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
+ if (ret)
+ goto err_clk;
+diff --git a/drivers/media/platform/meson/ge2d/ge2d.c b/drivers/media/platform/meson/ge2d/ge2d.c
+index a1393fefa8aea..a885cbc99e382 100644
+--- a/drivers/media/platform/meson/ge2d/ge2d.c
++++ b/drivers/media/platform/meson/ge2d/ge2d.c
+@@ -215,35 +215,35 @@ static void ge2d_hw_start(struct meson_ge2d *ge2d)
+
+ regmap_write(ge2d->map, GE2D_SRC1_CLIPY_START_END,
+ FIELD_PREP(GE2D_START, ctx->in.crop.top) |
+- FIELD_PREP(GE2D_END, ctx->in.crop.top + ctx->in.crop.height));
++ FIELD_PREP(GE2D_END, ctx->in.crop.top + ctx->in.crop.height - 1));
+ regmap_write(ge2d->map, GE2D_SRC1_CLIPX_START_END,
+ FIELD_PREP(GE2D_START, ctx->in.crop.left) |
+- FIELD_PREP(GE2D_END, ctx->in.crop.left + ctx->in.crop.width));
++ FIELD_PREP(GE2D_END, ctx->in.crop.left + ctx->in.crop.width - 1));
+ regmap_write(ge2d->map, GE2D_SRC2_CLIPY_START_END,
+ FIELD_PREP(GE2D_START, ctx->out.crop.top) |
+- FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height));
++ FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height - 1));
+ regmap_write(ge2d->map, GE2D_SRC2_CLIPX_START_END,
+ FIELD_PREP(GE2D_START, ctx->out.crop.left) |
+- FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width));
++ FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width - 1));
+ regmap_write(ge2d->map, GE2D_DST_CLIPY_START_END,
+ FIELD_PREP(GE2D_START, ctx->out.crop.top) |
+- FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height));
++ FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height - 1));
+ regmap_write(ge2d->map, GE2D_DST_CLIPX_START_END,
+ FIELD_PREP(GE2D_START, ctx->out.crop.left) |
+- FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width));
++ FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width - 1));
+
+ regmap_write(ge2d->map, GE2D_SRC1_Y_START_END,
+- FIELD_PREP(GE2D_END, ctx->in.pix_fmt.height));
++ FIELD_PREP(GE2D_END, ctx->in.pix_fmt.height - 1));
+ regmap_write(ge2d->map, GE2D_SRC1_X_START_END,
+- FIELD_PREP(GE2D_END, ctx->in.pix_fmt.width));
++ FIELD_PREP(GE2D_END, ctx->in.pix_fmt.width - 1));
+ regmap_write(ge2d->map, GE2D_SRC2_Y_START_END,
+- FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height));
++ FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height - 1));
+ regmap_write(ge2d->map, GE2D_SRC2_X_START_END,
+- FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width));
++ FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width - 1));
+ regmap_write(ge2d->map, GE2D_DST_Y_START_END,
+- FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height));
++ FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height - 1));
+ regmap_write(ge2d->map, GE2D_DST_X_START_END,
+- FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width));
++ FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width - 1));
+
+ /* Color, no blend, use source color */
+ reg = GE2D_ALU_DO_COLOR_OPERATION_LOGIC(LOGIC_OPERATION_COPY,
+@@ -779,11 +779,7 @@ static int ge2d_s_ctrl(struct v4l2_ctrl *ctrl)
+ * If the rotation parameter changes the OUTPUT frames
+ * parameters, take them in account
+ */
+- if (fmt.width != ctx->out.pix_fmt.width ||
+- fmt.height != ctx->out.pix_fmt.width ||
+- fmt.bytesperline > ctx->out.pix_fmt.bytesperline ||
+- fmt.sizeimage > ctx->out.pix_fmt.sizeimage)
+- ctx->out.pix_fmt = fmt;
++ ctx->out.pix_fmt = fmt;
+
+ break;
+ }
+@@ -1036,7 +1032,6 @@ static int ge2d_remove(struct platform_device *pdev)
+
+ video_unregister_device(ge2d->vfd);
+ v4l2_m2m_release(ge2d->m2m_dev);
+- video_device_release(ge2d->vfd);
+ v4l2_device_unregister(&ge2d->v4l2_dev);
+ clk_disable_unprepare(ge2d->clk);
+
+diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
+index a89c7b206eefd..470f8f1677448 100644
+--- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
++++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
+@@ -1457,7 +1457,6 @@ static int mtk_jpeg_remove(struct platform_device *pdev)
+
+ pm_runtime_disable(&pdev->dev);
+ video_unregister_device(jpeg->vdev);
+- video_device_release(jpeg->vdev);
+ v4l2_m2m_release(jpeg->m2m_dev);
+ v4l2_device_unregister(&jpeg->v4l2_dev);
+ mtk_jpeg_clk_release(jpeg);
+diff --git a/drivers/media/platform/mtk-mdp/mtk_mdp_ipi.h b/drivers/media/platform/mtk-mdp/mtk_mdp_ipi.h
+index 2cb8cecb30771..b810c96695c83 100644
+--- a/drivers/media/platform/mtk-mdp/mtk_mdp_ipi.h
++++ b/drivers/media/platform/mtk-mdp/mtk_mdp_ipi.h
+@@ -40,12 +40,14 @@ struct mdp_ipi_init {
+ * @ipi_id : IPI_MDP
+ * @ap_inst : AP mtk_mdp_vpu address
+ * @vpu_inst_addr : VPU MDP instance address
++ * @padding : Alignment padding
+ */
+ struct mdp_ipi_comm {
+ uint32_t msg_id;
+ uint32_t ipi_id;
+ uint64_t ap_inst;
+ uint32_t vpu_inst_addr;
++ uint32_t padding;
+ };
+
+ /**
+diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
+index 416f356af363d..389ac3d1f3446 100644
+--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
++++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
+@@ -733,6 +733,8 @@ static int vb2ops_venc_queue_setup(struct vb2_queue *vq,
+ return -EINVAL;
+
+ if (*nplanes) {
++ if (*nplanes != q_data->fmt->num_planes)
++ return -EINVAL;
+ for (i = 0; i < *nplanes; i++)
+ if (sizes[i] < q_data->sizeimage[i])
+ return -EINVAL;
+@@ -793,7 +795,7 @@ static int vb2ops_venc_start_streaming(struct vb2_queue *q, unsigned int count)
+ {
+ struct mtk_vcodec_ctx *ctx = vb2_get_drv_priv(q);
+ struct venc_enc_param param;
+- int ret;
++ int ret, pm_ret;
+ int i;
+
+ /* Once state turn into MTK_STATE_ABORT, we need stop_streaming
+@@ -845,9 +847,9 @@ static int vb2ops_venc_start_streaming(struct vb2_queue *q, unsigned int count)
+ return 0;
+
+ err_set_param:
+- ret = pm_runtime_put(&ctx->dev->plat_dev->dev);
+- if (ret < 0)
+- mtk_v4l2_err("pm_runtime_put fail %d", ret);
++ pm_ret = pm_runtime_put(&ctx->dev->plat_dev->dev);
++ if (pm_ret < 0)
++ mtk_v4l2_err("pm_runtime_put fail %d", pm_ret);
+
+ err_start_stream:
+ for (i = 0; i < q->num_buffers; ++i) {
+diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
+index 45d1870c83dd7..4ced20ca647b1 100644
+--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
++++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
+@@ -218,11 +218,11 @@ static int fops_vcodec_release(struct file *file)
+ mtk_v4l2_debug(1, "[%d] encoder", ctx->id);
+ mutex_lock(&dev->dev_mutex);
+
++ v4l2_m2m_ctx_release(ctx->m2m_ctx);
+ mtk_vcodec_enc_release(ctx);
+ v4l2_fh_del(&ctx->fh);
+ v4l2_fh_exit(&ctx->fh);
+ v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
+- v4l2_m2m_ctx_release(ctx->m2m_ctx);
+
+ list_del_init(&ctx->list);
+ kfree(ctx);
+diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c
+index cd27f637dbe7c..cfc7ebed8fb7a 100644
+--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c
++++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c
+@@ -102,6 +102,8 @@ struct mtk_vcodec_fw *mtk_vcodec_fw_vpu_init(struct mtk_vcodec_dev *dev,
+ vpu_wdt_reg_handler(fw_pdev, mtk_vcodec_vpu_reset_handler, dev, rst_id);
+
+ fw = devm_kzalloc(&dev->plat_dev->dev, sizeof(*fw), GFP_KERNEL);
++ if (!fw)
++ return ERR_PTR(-ENOMEM);
+ fw->type = VPU;
+ fw->ops = &mtk_vcodec_vpu_msg;
+ fw->pdev = fw_pdev;
+diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
+index 71cdc3ddafcbb..0b2cde3b3439a 100644
+--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
++++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
+@@ -226,10 +226,11 @@ static struct vdec_fb *vp9_rm_from_fb_use_list(struct vdec_vp9_inst
+ if (fb->base_y.va == addr) {
+ list_move_tail(&node->list,
+ &inst->available_fb_node_list);
+- break;
++ return fb;
+ }
+ }
+- return fb;
++
++ return NULL;
+ }
+
+ static void vp9_add_to_fb_free_list(struct vdec_vp9_inst *inst,
+diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c
+index ec290dde59cfd..af59cc52fdd73 100644
+--- a/drivers/media/platform/mtk-vpu/mtk_vpu.c
++++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c
+@@ -562,15 +562,17 @@ static int load_requested_vpu(struct mtk_vpu *vpu,
+ int vpu_load_firmware(struct platform_device *pdev)
+ {
+ struct mtk_vpu *vpu;
+- struct device *dev = &pdev->dev;
++ struct device *dev;
+ struct vpu_run *run;
+ int ret;
+
+ if (!pdev) {
+- dev_err(dev, "VPU platform device is invalid\n");
++ pr_err("VPU platform device is invalid\n");
+ return -EINVAL;
+ }
+
++ dev = &pdev->dev;
++
+ vpu = platform_get_drvdata(pdev);
+ run = &vpu->run;
+
+@@ -848,7 +850,8 @@ static int mtk_vpu_probe(struct platform_device *pdev)
+ vpu->wdt.wq = create_singlethread_workqueue("vpu_wdt");
+ if (!vpu->wdt.wq) {
+ dev_err(dev, "initialize wdt workqueue failed\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto clk_unprepare;
+ }
+ INIT_WORK(&vpu->wdt.ws, vpu_wdt_reset_func);
+ mutex_init(&vpu->vpu_mutex);
+@@ -942,6 +945,8 @@ disable_vpu_clk:
+ vpu_clock_disable(vpu);
+ workqueue_destroy:
+ destroy_workqueue(vpu->wdt.wq);
++clk_unprepare:
++ clk_unprepare(vpu->clk);
+
+ return ret;
+ }
+diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
+index 20f59c59ff8a2..3222c98b83630 100644
+--- a/drivers/media/platform/omap3isp/isp.c
++++ b/drivers/media/platform/omap3isp/isp.c
+@@ -2306,7 +2306,16 @@ static int isp_probe(struct platform_device *pdev)
+
+ /* Regulators */
+ isp->isp_csiphy1.vdd = devm_regulator_get(&pdev->dev, "vdd-csiphy1");
++ if (IS_ERR(isp->isp_csiphy1.vdd)) {
++ ret = PTR_ERR(isp->isp_csiphy1.vdd);
++ goto error;
++ }
++
+ isp->isp_csiphy2.vdd = devm_regulator_get(&pdev->dev, "vdd-csiphy2");
++ if (IS_ERR(isp->isp_csiphy2.vdd)) {
++ ret = PTR_ERR(isp->isp_csiphy2.vdd);
++ goto error;
++ }
+
+ /* Clocks
+ *
+diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c
+index 5b9b57f4d9bf8..68cf68dbcace2 100644
+--- a/drivers/media/platform/omap3isp/ispstat.c
++++ b/drivers/media/platform/omap3isp/ispstat.c
+@@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat,
+ int omap3isp_stat_request_statistics_time32(struct ispstat *stat,
+ struct omap3isp_stat_data_time32 *data)
+ {
+- struct omap3isp_stat_data data64;
++ struct omap3isp_stat_data data64 = { };
+ int ret;
+
+ ret = omap3isp_stat_request_statistics(stat, &data64);
+@@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat,
+
+ data->ts.tv_sec = data64.ts.tv_sec;
+ data->ts.tv_usec = data64.ts.tv_usec;
+- memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts));
++ data->buf = (uintptr_t)data64.buf;
++ memcpy(&data->frame, &data64.frame, sizeof(data->frame));
+
+ return 0;
+ }
+diff --git a/drivers/media/platform/qcom/camss/camss-csid-170.c b/drivers/media/platform/qcom/camss/camss-csid-170.c
+index ac22ff29d2a9f..82f59933ad7b3 100644
+--- a/drivers/media/platform/qcom/camss/camss-csid-170.c
++++ b/drivers/media/platform/qcom/camss/camss-csid-170.c
+@@ -105,7 +105,8 @@
+ #define CSID_RDI_CTRL(rdi) ((IS_LITE ? 0x208 : 0x308)\
+ + 0x100 * (rdi))
+ #define RDI_CTRL_HALT_CMD 0
+-#define ALT_CMD_RESUME_AT_FRAME_BOUNDARY 1
++#define HALT_CMD_HALT_AT_FRAME_BOUNDARY 0
++#define HALT_CMD_RESUME_AT_FRAME_BOUNDARY 1
+ #define RDI_CTRL_HALT_MODE 2
+
+ #define CSID_RDI_FRM_DROP_PATTERN(rdi) ((IS_LITE ? 0x20C : 0x30C)\
+@@ -366,7 +367,7 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable)
+ val |= input_format->width & 0x1fff << TPG_DT_n_CFG_0_FRAME_WIDTH;
+ writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_0(0));
+
+- val = DATA_TYPE_RAW_10BIT << TPG_DT_n_CFG_1_DATA_TYPE;
++ val = format->data_type << TPG_DT_n_CFG_1_DATA_TYPE;
+ writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_1(0));
+
+ val = tg->mode << TPG_DT_n_CFG_2_PAYLOAD_MODE;
+@@ -382,8 +383,9 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable)
+ val = 1 << RDI_CFG0_BYTE_CNTR_EN;
+ val |= 1 << RDI_CFG0_FORMAT_MEASURE_EN;
+ val |= 1 << RDI_CFG0_TIMESTAMP_EN;
++ /* note: for non-RDI path, this should be format->decode_format */
+ val |= DECODE_FORMAT_PAYLOAD_ONLY << RDI_CFG0_DECODE_FORMAT;
+- val |= DATA_TYPE_RAW_10BIT << RDI_CFG0_DATA_TYPE;
++ val |= format->data_type << RDI_CFG0_DATA_TYPE;
+ val |= vc << RDI_CFG0_VIRTUAL_CHANNEL;
+ val |= dt_id << RDI_CFG0_DT_ID;
+ writel_relaxed(val, csid->base + CSID_RDI_CFG0(0));
+@@ -443,13 +445,10 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable)
+ val |= 1 << CSI2_RX_CFG1_MISR_EN;
+ writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG1); // csi2_vc_mode_shift_val ?
+
+- /* error irqs start at BIT(11) */
+- writel_relaxed(~0u, csid->base + CSID_CSI2_RX_IRQ_MASK);
+-
+- /* RDI irq */
+- writel_relaxed(~0u, csid->base + CSID_TOP_IRQ_MASK);
+-
+- val = 1 << RDI_CTRL_HALT_CMD;
++ if (enable)
++ val = HALT_CMD_RESUME_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD;
++ else
++ val = HALT_CMD_HALT_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD;
+ writel_relaxed(val, csid->base + CSID_RDI_CTRL(0));
+ }
+
+diff --git a/drivers/media/platform/qcom/camss/camss-vfe-170.c b/drivers/media/platform/qcom/camss/camss-vfe-170.c
+index 8594d275b41d1..02cb8005504a2 100644
+--- a/drivers/media/platform/qcom/camss/camss-vfe-170.c
++++ b/drivers/media/platform/qcom/camss/camss-vfe-170.c
+@@ -399,17 +399,7 @@ static irqreturn_t vfe_isr(int irq, void *dev)
+ */
+ static int vfe_halt(struct vfe_device *vfe)
+ {
+- unsigned long time;
+-
+- reinit_completion(&vfe->halt_complete);
+-
+- time = wait_for_completion_timeout(&vfe->halt_complete,
+- msecs_to_jiffies(VFE_HALT_TIMEOUT_MS));
+- if (!time) {
+- dev_err(vfe->camss->dev, "VFE halt timeout\n");
+- return -EIO;
+- }
+-
++ /* rely on vfe_disable_output() to stop the VFE */
+ return 0;
+ }
+
+diff --git a/drivers/media/platform/qcom/camss/camss-video.c b/drivers/media/platform/qcom/camss/camss-video.c
+index f282275af626f..5173b79995ee7 100644
+--- a/drivers/media/platform/qcom/camss/camss-video.c
++++ b/drivers/media/platform/qcom/camss/camss-video.c
+@@ -493,7 +493,7 @@ static int video_start_streaming(struct vb2_queue *q, unsigned int count)
+
+ ret = media_pipeline_start(&vdev->entity, &video->pipe);
+ if (ret < 0)
+- return ret;
++ goto flush_buffers;
+
+ ret = video_check_format(video);
+ if (ret < 0)
+@@ -522,6 +522,7 @@ static int video_start_streaming(struct vb2_queue *q, unsigned int count)
+ error:
+ media_pipeline_stop(&vdev->entity);
+
++flush_buffers:
+ video->ops->flush_buffers(video, VB2_BUF_STATE_QUEUED);
+
+ return ret;
+diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
+index 91b15842c5558..1f0181b6353c9 100644
+--- a/drivers/media/platform/qcom/venus/core.c
++++ b/drivers/media/platform/qcom/venus/core.c
+@@ -349,11 +349,11 @@ static int venus_probe(struct platform_device *pdev)
+
+ ret = venus_firmware_init(core);
+ if (ret)
+- goto err_runtime_disable;
++ goto err_of_depopulate;
+
+ ret = venus_boot(core);
+ if (ret)
+- goto err_runtime_disable;
++ goto err_firmware_deinit;
+
+ ret = hfi_core_resume(core, true);
+ if (ret)
+@@ -385,6 +385,10 @@ err_dev_unregister:
+ v4l2_device_unregister(&core->v4l2_dev);
+ err_venus_shutdown:
+ venus_shutdown(core);
++err_firmware_deinit:
++ venus_firmware_deinit(core);
++err_of_depopulate:
++ of_platform_depopulate(dev);
+ err_runtime_disable:
+ pm_runtime_put_noidle(dev);
+ pm_runtime_set_suspended(dev);
+@@ -472,7 +476,8 @@ static __maybe_unused int venus_runtime_suspend(struct device *dev)
+ err_video_path:
+ icc_set_bw(core->cpucfg_path, kbps_to_icc(1000), 0);
+ err_cpucfg_path:
+- pm_ops->core_power(core, POWER_ON);
++ if (pm_ops->core_power)
++ pm_ops->core_power(core, POWER_ON);
+
+ return ret;
+ }
+diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
+index 8012f5c7bf344..ff705d513aae4 100644
+--- a/drivers/media/platform/qcom/venus/helpers.c
++++ b/drivers/media/platform/qcom/venus/helpers.c
+@@ -981,8 +981,8 @@ static u32 get_framesize_raw_yuv420_tp10_ubwc(u32 width, u32 height)
+ u32 extradata = SZ_16K;
+ u32 size;
+
+- y_stride = ALIGN(ALIGN(width, 192) * 4 / 3, 256);
+- uv_stride = ALIGN(ALIGN(width, 192) * 4 / 3, 256);
++ y_stride = ALIGN(width * 4 / 3, 256);
++ uv_stride = ALIGN(width * 4 / 3, 256);
+ y_sclines = ALIGN(height, 16);
+ uv_sclines = ALIGN((height + 1) >> 1, 16);
+
+diff --git a/drivers/media/platform/qcom/venus/hfi.c b/drivers/media/platform/qcom/venus/hfi.c
+index 0f2482367e060..9bc4becdf6386 100644
+--- a/drivers/media/platform/qcom/venus/hfi.c
++++ b/drivers/media/platform/qcom/venus/hfi.c
+@@ -104,6 +104,9 @@ int hfi_core_deinit(struct venus_core *core, bool blocking)
+ mutex_lock(&core->lock);
+ }
+
++ if (!core->ops)
++ goto unlock;
++
+ ret = core->ops->core_deinit(core);
+
+ if (!ret)
+diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.c b/drivers/media/platform/qcom/venus/hfi_cmds.c
+index 60f4b8e4b8d02..1bf5db7673ebf 100644
+--- a/drivers/media/platform/qcom/venus/hfi_cmds.c
++++ b/drivers/media/platform/qcom/venus/hfi_cmds.c
+@@ -1054,6 +1054,8 @@ static int pkt_session_set_property_1x(struct hfi_session_set_property_pkt *pkt,
+ pkt->shdr.hdr.size += sizeof(u32) + sizeof(*info);
+ break;
+ }
++ case HFI_PROPERTY_PARAM_VENC_HDR10_PQ_SEI:
++ return -ENOTSUPP;
+
+ /* FOLLOWING PROPERTIES ARE NOT IMPLEMENTED IN CORE YET */
+ case HFI_PROPERTY_CONFIG_BUFFER_REQUIREMENTS:
+diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
+index ce98c523b3c68..c1e44cd2ef6dc 100644
+--- a/drivers/media/platform/qcom/venus/hfi_venus.c
++++ b/drivers/media/platform/qcom/venus/hfi_venus.c
+@@ -131,7 +131,6 @@ struct venus_hfi_device {
+
+ static bool venus_pkt_debug;
+ int venus_fw_debug = HFI_DEBUG_MSG_ERROR | HFI_DEBUG_MSG_FATAL;
+-static bool venus_sys_idle_indicator;
+ static bool venus_fw_low_power_mode = true;
+ static int venus_hw_rsp_timeout = 1000;
+ static bool venus_fw_coverage;
+@@ -454,7 +453,6 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
+ void __iomem *wrapper_base = hdev->core->wrapper_base;
+ int ret = 0;
+
+- writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
+ if (IS_V6(hdev->core)) {
+ mask_val = readl(wrapper_base + WRAPPER_INTR_MASK);
+ mask_val &= ~(WRAPPER_INTR_MASK_A2HWD_BASK_V6 |
+@@ -465,6 +463,7 @@ static int venus_boot_core(struct venus_hfi_device *hdev)
+ writel(mask_val, wrapper_base + WRAPPER_INTR_MASK);
+ writel(1, cpu_cs_base + CPU_CS_SCIACMDARG3);
+
++ writel(BIT(VIDC_CTRL_INIT_CTRL_SHIFT), cpu_cs_base + VIDC_CTRL_INIT);
+ while (!ctrl_status && count < max_tries) {
+ ctrl_status = readl(cpu_cs_base + CPU_CS_SCIACMDARG0);
+ if ((ctrl_status & CPU_CS_SCIACMDARG0_ERROR_STATUS_MASK) == 4) {
+@@ -924,17 +923,12 @@ static int venus_sys_set_default_properties(struct venus_hfi_device *hdev)
+ if (ret)
+ dev_warn(dev, "setting fw debug msg ON failed (%d)\n", ret);
+
+- /*
+- * Idle indicator is disabled by default on some 4xx firmware versions,
+- * enable it explicitly in order to make suspend functional by checking
+- * WFI (wait-for-interrupt) bit.
+- */
+- if (IS_V4(hdev->core) || IS_V6(hdev->core))
+- venus_sys_idle_indicator = true;
+-
+- ret = venus_sys_set_idle_message(hdev, venus_sys_idle_indicator);
+- if (ret)
+- dev_warn(dev, "setting idle response ON failed (%d)\n", ret);
++ /* HFI_PROPERTY_SYS_IDLE_INDICATOR is not supported beyond 8916 (HFI V1) */
++ if (IS_V1(hdev->core)) {
++ ret = venus_sys_set_idle_message(hdev, false);
++ if (ret)
++ dev_warn(dev, "setting idle response ON failed (%d)\n", ret);
++ }
+
+ ret = venus_sys_set_power_control(hdev, venus_fw_low_power_mode);
+ if (ret)
+diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c
+index 3e2345eb47f7c..055513a7301f1 100644
+--- a/drivers/media/platform/qcom/venus/pm_helpers.c
++++ b/drivers/media/platform/qcom/venus/pm_helpers.c
+@@ -163,14 +163,12 @@ static u32 load_per_type(struct venus_core *core, u32 session_type)
+ struct venus_inst *inst = NULL;
+ u32 mbs_per_sec = 0;
+
+- mutex_lock(&core->lock);
+ list_for_each_entry(inst, &core->instances, list) {
+ if (inst->session_type != session_type)
+ continue;
+
+ mbs_per_sec += load_per_instance(inst);
+ }
+- mutex_unlock(&core->lock);
+
+ return mbs_per_sec;
+ }
+@@ -219,14 +217,12 @@ static int load_scale_bw(struct venus_core *core)
+ struct venus_inst *inst = NULL;
+ u32 mbs_per_sec, avg, peak, total_avg = 0, total_peak = 0;
+
+- mutex_lock(&core->lock);
+ list_for_each_entry(inst, &core->instances, list) {
+ mbs_per_sec = load_per_instance(inst);
+ mbs_to_bw(inst, mbs_per_sec, &avg, &peak);
+ total_avg += avg;
+ total_peak += peak;
+ }
+- mutex_unlock(&core->lock);
+
+ /*
+ * keep minimum bandwidth vote for "video-mem" path,
+@@ -253,8 +249,9 @@ static int load_scale_v1(struct venus_inst *inst)
+ struct device *dev = core->dev;
+ u32 mbs_per_sec;
+ unsigned int i;
+- int ret;
++ int ret = 0;
+
++ mutex_lock(&core->lock);
+ mbs_per_sec = load_per_type(core, VIDC_SESSION_TYPE_ENC) +
+ load_per_type(core, VIDC_SESSION_TYPE_DEC);
+
+@@ -279,17 +276,19 @@ set_freq:
+ if (ret) {
+ dev_err(dev, "failed to set clock rate %lu (%d)\n",
+ freq, ret);
+- return ret;
++ goto exit;
+ }
+
+ ret = load_scale_bw(core);
+ if (ret) {
+ dev_err(dev, "failed to set bandwidth (%d)\n",
+ ret);
+- return ret;
++ goto exit;
+ }
+
+- return 0;
++exit:
++ mutex_unlock(&core->lock);
++ return ret;
+ }
+
+ static int core_get_v1(struct venus_core *core)
+@@ -587,8 +586,8 @@ min_loaded_core(struct venus_inst *inst, u32 *min_coreid, u32 *min_load, bool lo
+ if (inst->session_type == VIDC_SESSION_TYPE_DEC)
+ vpp_freq = inst_pos->clk_data.vpp_freq;
+ else if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+- vpp_freq = low_power ? inst_pos->clk_data.vpp_freq :
+- inst_pos->clk_data.low_power_freq;
++ vpp_freq = low_power ? inst_pos->clk_data.low_power_freq :
++ inst_pos->clk_data.vpp_freq;
+ else
+ continue;
+
+@@ -870,13 +869,13 @@ static int vcodec_domains_get(struct venus_core *core)
+ for (i = 0; i < res->vcodec_pmdomains_num; i++) {
+ pd = dev_pm_domain_attach_by_name(dev,
+ res->vcodec_pmdomains[i]);
+- if (IS_ERR(pd))
+- return PTR_ERR(pd);
++ if (IS_ERR_OR_NULL(pd))
++ return PTR_ERR(pd) ? : -ENODATA;
+ core->pmdomains[i] = pd;
+ }
+
+ skip_pmdomains:
+- if (!core->has_opp_table)
++ if (!core->res->opp_pmdomain)
+ return 0;
+
+ /* Attach the power domain for setting performance state */
+@@ -1008,6 +1007,10 @@ static int core_get_v4(struct venus_core *core)
+ if (ret)
+ return ret;
+
++ ret = vcodec_domains_get(core);
++ if (ret)
++ return ret;
++
+ if (core->res->opp_pmdomain) {
+ ret = devm_pm_opp_of_add_table(dev);
+ if (!ret) {
+@@ -1018,10 +1021,6 @@ static int core_get_v4(struct venus_core *core)
+ }
+ }
+
+- ret = vcodec_domains_get(core);
+- if (ret)
+- return ret;
+-
+ return 0;
+ }
+
+@@ -1085,12 +1084,16 @@ static unsigned long calculate_inst_freq(struct venus_inst *inst,
+ if (inst->state != INST_START)
+ return 0;
+
+- if (inst->session_type == VIDC_SESSION_TYPE_ENC)
++ if (inst->session_type == VIDC_SESSION_TYPE_ENC) {
+ vpp_freq_per_mb = inst->flags & VENUS_LOW_POWER ?
+ inst->clk_data.low_power_freq :
+ inst->clk_data.vpp_freq;
+
+- vpp_freq = mbs_per_sec * vpp_freq_per_mb;
++ vpp_freq = mbs_per_sec * vpp_freq_per_mb;
++ } else {
++ vpp_freq = mbs_per_sec * inst->clk_data.vpp_freq;
++ }
++
+ /* 21 / 20 is overhead factor */
+ vpp_freq += vpp_freq / 20;
+ vsp_freq = mbs_per_sec * inst->clk_data.vsp_freq;
+@@ -1112,13 +1115,13 @@ static int load_scale_v4(struct venus_inst *inst)
+ struct device *dev = core->dev;
+ unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+ unsigned long filled_len = 0;
+- int i, ret;
++ int i, ret = 0;
+
+ for (i = 0; i < inst->num_input_bufs; i++)
+ filled_len = max(filled_len, inst->payloads[i]);
+
+ if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+- return 0;
++ return ret;
+
+ freq = calculate_inst_freq(inst, filled_len);
+ inst->clk_data.freq = freq;
+@@ -1134,7 +1137,6 @@ static int load_scale_v4(struct venus_inst *inst)
+ freq_core2 += inst->clk_data.freq;
+ }
+ }
+- mutex_unlock(&core->lock);
+
+ freq = max(freq_core1, freq_core2);
+
+@@ -1158,17 +1160,19 @@ set_freq:
+ if (ret) {
+ dev_err(dev, "failed to set clock rate %lu (%d)\n",
+ freq, ret);
+- return ret;
++ goto exit;
+ }
+
+ ret = load_scale_bw(core);
+ if (ret) {
+ dev_err(dev, "failed to set bandwidth (%d)\n",
+ ret);
+- return ret;
++ goto exit;
+ }
+
+- return 0;
++exit:
++ mutex_unlock(&core->lock);
++ return ret;
+ }
+
+ static const struct venus_pm_ops pm_ops_v4 = {
+diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
+index 198e47eb63f41..6e0466772339a 100644
+--- a/drivers/media/platform/qcom/venus/vdec.c
++++ b/drivers/media/platform/qcom/venus/vdec.c
+@@ -158,6 +158,8 @@ vdec_try_fmt_common(struct venus_inst *inst, struct v4l2_format *f)
+ else
+ return NULL;
+ fmt = find_format(inst, pixmp->pixelformat, f->type);
++ if (!fmt)
++ return NULL;
+ }
+
+ pixmp->width = clamp(pixmp->width, frame_width_min(inst),
+@@ -496,6 +498,7 @@ static int
+ vdec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *cmd)
+ {
+ struct venus_inst *inst = to_inst(file);
++ struct vb2_queue *dst_vq;
+ struct hfi_frame_data fdata = {0};
+ int ret;
+
+@@ -526,6 +529,13 @@ vdec_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *cmd)
+ inst->codec_state = VENUS_DEC_STATE_DRAIN;
+ inst->drain_active = true;
+ }
++ } else if (cmd->cmd == V4L2_DEC_CMD_START &&
++ inst->codec_state == VENUS_DEC_STATE_STOPPED) {
++ dst_vq = v4l2_m2m_get_vq(inst->fh.m2m_ctx,
++ V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
++ vb2_clear_last_buffer_dequeued(dst_vq);
++
++ inst->codec_state = VENUS_DEC_STATE_DECODING;
+ }
+
+ unlock:
+diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
+index bc1c42dd53c04..c4e0fe14c058d 100644
+--- a/drivers/media/platform/qcom/venus/venc.c
++++ b/drivers/media/platform/qcom/venus/venc.c
+@@ -604,8 +604,8 @@ static int venc_set_properties(struct venus_inst *inst)
+
+ ptype = HFI_PROPERTY_PARAM_VENC_H264_TRANSFORM_8X8;
+ h264_transform.enable_type = 0;
+- if (ctr->profile.h264 == HFI_H264_PROFILE_HIGH ||
+- ctr->profile.h264 == HFI_H264_PROFILE_CONSTRAINED_HIGH)
++ if (ctr->profile.h264 == V4L2_MPEG_VIDEO_H264_PROFILE_HIGH ||
++ ctr->profile.h264 == V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH)
+ h264_transform.enable_type = ctr->h264_8x8_transform;
+
+ ret = hfi_session_set_property(inst, ptype, &h264_transform);
+diff --git a/drivers/media/platform/qcom/venus/venc_ctrls.c b/drivers/media/platform/qcom/venus/venc_ctrls.c
+index 1ada42df314dc..ea5805e71c143 100644
+--- a/drivers/media/platform/qcom/venus/venc_ctrls.c
++++ b/drivers/media/platform/qcom/venus/venc_ctrls.c
+@@ -320,8 +320,8 @@ static int venc_op_s_ctrl(struct v4l2_ctrl *ctrl)
+ ctr->intra_refresh_period = ctrl->val;
+ break;
+ case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM:
+- if (ctr->profile.h264 != HFI_H264_PROFILE_HIGH &&
+- ctr->profile.h264 != HFI_H264_PROFILE_CONSTRAINED_HIGH)
++ if (ctr->profile.h264 != V4L2_MPEG_VIDEO_H264_PROFILE_HIGH &&
++ ctr->profile.h264 != V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH)
+ return -EINVAL;
+
+ /*
+@@ -457,7 +457,7 @@ int venc_ctrl_init(struct venus_inst *inst)
+ V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP, 1, 51, 1, 1);
+
+ v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops,
+- V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM, 0, 1, 1, 0);
++ V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM, 0, 1, 1, 1);
+
+ v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops,
+ V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP, 1, 51, 1, 1);
+diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c
+index e28eff0396888..0c5e2f7e04beb 100644
+--- a/drivers/media/platform/rcar-vin/rcar-csi2.c
++++ b/drivers/media/platform/rcar-vin/rcar-csi2.c
+@@ -445,16 +445,23 @@ static int rcsi2_wait_phy_start(struct rcar_csi2 *priv,
+ static int rcsi2_set_phypll(struct rcar_csi2 *priv, unsigned int mbps)
+ {
+ const struct rcsi2_mbps_reg *hsfreq;
++ const struct rcsi2_mbps_reg *hsfreq_prev = NULL;
+
+- for (hsfreq = priv->info->hsfreqrange; hsfreq->mbps != 0; hsfreq++)
++ for (hsfreq = priv->info->hsfreqrange; hsfreq->mbps != 0; hsfreq++) {
+ if (hsfreq->mbps >= mbps)
+ break;
++ hsfreq_prev = hsfreq;
++ }
+
+ if (!hsfreq->mbps) {
+ dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps);
+ return -ERANGE;
+ }
+
++ if (hsfreq_prev &&
++ ((mbps - hsfreq_prev->mbps) <= (hsfreq->mbps - mbps)))
++ hsfreq = hsfreq_prev;
++
+ rcsi2_write(priv, PHYPLL_REG, PHYPLL_HSFREQRANGE(hsfreq->reg));
+
+ return 0;
+@@ -553,6 +560,8 @@ static int rcsi2_start_receiver(struct rcar_csi2 *priv)
+
+ /* Code is validated in set_fmt. */
+ format = rcsi2_code_to_fmt(priv->mf.code);
++ if (!format)
++ return -EINVAL;
+
+ /*
+ * Enable all supported CSI-2 channels with virtual channel and
+@@ -980,10 +989,17 @@ static int rcsi2_phtw_write_mbps(struct rcar_csi2 *priv, unsigned int mbps,
+ const struct rcsi2_mbps_reg *values, u16 code)
+ {
+ const struct rcsi2_mbps_reg *value;
++ const struct rcsi2_mbps_reg *prev_value = NULL;
+
+- for (value = values; value->mbps; value++)
++ for (value = values; value->mbps; value++) {
+ if (value->mbps >= mbps)
+ break;
++ prev_value = value;
++ }
++
++ if (prev_value &&
++ ((mbps - prev_value->mbps) <= (value->mbps - mbps)))
++ value = prev_value;
+
+ if (!value->mbps) {
+ dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps);
+diff --git a/drivers/media/platform/rcar-vin/rcar-dma.c b/drivers/media/platform/rcar-vin/rcar-dma.c
+index f5f722ab1d4e8..efebae935720a 100644
+--- a/drivers/media/platform/rcar-vin/rcar-dma.c
++++ b/drivers/media/platform/rcar-vin/rcar-dma.c
+@@ -644,11 +644,9 @@ static int rvin_setup(struct rvin_dev *vin)
+ case V4L2_FIELD_SEQ_TB:
+ case V4L2_FIELD_SEQ_BT:
+ case V4L2_FIELD_NONE:
+- vnmc = VNMC_IM_ODD_EVEN;
+- progressive = true;
+- break;
+ case V4L2_FIELD_ALTERNATE:
+ vnmc = VNMC_IM_ODD_EVEN;
++ progressive = true;
+ break;
+ default:
+ vnmc = VNMC_IM_ODD;
+@@ -904,7 +902,8 @@ static void rvin_fill_hw_slot(struct rvin_dev *vin, int slot)
+ vin->format.sizeimage / 2;
+ break;
+ }
+- } else if (vin->state != RUNNING || list_empty(&vin->buf_list)) {
++ } else if ((vin->state != STOPPED && vin->state != RUNNING) ||
++ list_empty(&vin->buf_list)) {
+ vin->buf_hw[slot].buffer = NULL;
+ vin->buf_hw[slot].type = FULL;
+ phys_addr = vin->scratch_phys;
+diff --git a/drivers/media/platform/rcar-vin/rcar-v4l2.c b/drivers/media/platform/rcar-vin/rcar-v4l2.c
+index 0d141155f0e3e..eb8c79bac540f 100644
+--- a/drivers/media/platform/rcar-vin/rcar-v4l2.c
++++ b/drivers/media/platform/rcar-vin/rcar-v4l2.c
+@@ -175,20 +175,27 @@ static void rvin_format_align(struct rvin_dev *vin, struct v4l2_pix_format *pix)
+ break;
+ }
+
+- /* HW limit width to a multiple of 32 (2^5) for NV12/16 else 2 (2^1) */
++ /* Hardware limits width alignment based on format. */
+ switch (pix->pixelformat) {
++ /* Multiple of 32 (2^5) for NV12/16. */
+ case V4L2_PIX_FMT_NV12:
+ case V4L2_PIX_FMT_NV16:
+ walign = 5;
+ break;
+- default:
++ /* Multiple of 2 (2^1) for YUV. */
++ case V4L2_PIX_FMT_YUYV:
++ case V4L2_PIX_FMT_UYVY:
+ walign = 1;
+ break;
++ /* No multiple for RGB. */
++ default:
++ walign = 0;
++ break;
+ }
+
+ /* Limit to VIN capabilities */
+- v4l_bound_align_image(&pix->width, 2, vin->info->max_width, walign,
+- &pix->height, 4, vin->info->max_height, 2, 0);
++ v4l_bound_align_image(&pix->width, 5, vin->info->max_width, walign,
++ &pix->height, 2, vin->info->max_height, 0, 0);
+
+ pix->bytesperline = rvin_format_bytesperline(vin, pix);
+ pix->sizeimage = rvin_format_sizeimage(pix);
+diff --git a/drivers/media/platform/rcar_fdp1.c b/drivers/media/platform/rcar_fdp1.c
+index 89aac60066d91..c548cb01957b0 100644
+--- a/drivers/media/platform/rcar_fdp1.c
++++ b/drivers/media/platform/rcar_fdp1.c
+@@ -2256,7 +2256,6 @@ static int fdp1_probe(struct platform_device *pdev)
+ struct fdp1_dev *fdp1;
+ struct video_device *vfd;
+ struct device_node *fcp_node;
+- struct resource *res;
+ struct clk *clk;
+ unsigned int i;
+
+@@ -2283,17 +2282,15 @@ static int fdp1_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, fdp1);
+
+ /* Memory-mapped registers */
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- fdp1->regs = devm_ioremap_resource(&pdev->dev, res);
++ fdp1->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(fdp1->regs))
+ return PTR_ERR(fdp1->regs);
+
+ /* Interrupt service routine registration */
+- fdp1->irq = ret = platform_get_irq(pdev, 0);
+- if (ret < 0) {
+- dev_err(&pdev->dev, "cannot find IRQ\n");
++ ret = platform_get_irq(pdev, 0);
++ if (ret < 0)
+ return ret;
+- }
++ fdp1->irq = ret;
+
+ ret = devm_request_irq(&pdev->dev, fdp1->irq, fdp1_irq_handler, 0,
+ dev_name(&pdev->dev), fdp1);
+@@ -2316,8 +2313,10 @@ static int fdp1_probe(struct platform_device *pdev)
+
+ /* Determine our clock rate */
+ clk = clk_get(&pdev->dev, NULL);
+- if (IS_ERR(clk))
+- return PTR_ERR(clk);
++ if (IS_ERR(clk)) {
++ ret = PTR_ERR(clk);
++ goto put_dev;
++ }
+
+ fdp1->clk_rate = clk_get_rate(clk);
+ clk_put(clk);
+@@ -2326,7 +2325,7 @@ static int fdp1_probe(struct platform_device *pdev)
+ ret = v4l2_device_register(&pdev->dev, &fdp1->v4l2_dev);
+ if (ret) {
+ v4l2_err(&fdp1->v4l2_dev, "Failed to register video device\n");
+- return ret;
++ goto put_dev;
+ }
+
+ /* M2M registration */
+@@ -2396,10 +2395,12 @@ release_m2m:
+ unreg_dev:
+ v4l2_device_unregister(&fdp1->v4l2_dev);
+
++put_dev:
++ rcar_fcp_put(fdp1->fcp);
+ return ret;
+ }
+
+-static int fdp1_remove(struct platform_device *pdev)
++static void fdp1_remove(struct platform_device *pdev)
+ {
+ struct fdp1_dev *fdp1 = platform_get_drvdata(pdev);
+
+@@ -2407,8 +2408,7 @@ static int fdp1_remove(struct platform_device *pdev)
+ video_unregister_device(&fdp1->vfd);
+ v4l2_device_unregister(&fdp1->v4l2_dev);
+ pm_runtime_disable(&pdev->dev);
+-
+- return 0;
++ rcar_fcp_put(fdp1->fcp);
+ }
+
+ static int __maybe_unused fdp1_pm_runtime_suspend(struct device *dev)
+@@ -2444,7 +2444,7 @@ MODULE_DEVICE_TABLE(of, fdp1_dt_ids);
+
+ static struct platform_driver fdp1_pdrv = {
+ .probe = fdp1_probe,
+- .remove = fdp1_remove,
++ .remove_new = fdp1_remove,
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = fdp1_dt_ids,
+diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
+index 6759091b15e09..e3246344fb724 100644
+--- a/drivers/media/platform/rockchip/rga/rga.c
++++ b/drivers/media/platform/rockchip/rga/rga.c
+@@ -868,7 +868,7 @@ static int rga_probe(struct platform_device *pdev)
+
+ ret = pm_runtime_resume_and_get(rga->dev);
+ if (ret < 0)
+- goto rel_vdev;
++ goto rel_m2m;
+
+ rga->version.major = (rga_read(rga, RGA_VERSION_INFO) >> 24) & 0xFF;
+ rga->version.minor = (rga_read(rga, RGA_VERSION_INFO) >> 20) & 0x0F;
+@@ -884,7 +884,7 @@ static int rga_probe(struct platform_device *pdev)
+ DMA_ATTR_WRITE_COMBINE);
+ if (!rga->cmdbuf_virt) {
+ ret = -ENOMEM;
+- goto rel_vdev;
++ goto rel_m2m;
+ }
+
+ rga->src_mmu_pages =
+@@ -895,7 +895,7 @@ static int rga_probe(struct platform_device *pdev)
+ }
+ rga->dst_mmu_pages =
+ (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
+- if (rga->dst_mmu_pages) {
++ if (!rga->dst_mmu_pages) {
+ ret = -ENOMEM;
+ goto free_src_pages;
+ }
+@@ -921,6 +921,8 @@ free_src_pages:
+ free_dma:
+ dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt,
+ rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE);
++rel_m2m:
++ v4l2_m2m_release(rga->m2m_dev);
+ rel_vdev:
+ video_device_release(vfd);
+ unreg_v4l2_dev:
+diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
+index 41988eb0ec0a5..0f980f68058c0 100644
+--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
+@@ -1270,11 +1270,12 @@ static int rkisp1_capture_link_validate(struct media_link *link)
+ struct rkisp1_capture *cap = video_get_drvdata(vdev);
+ const struct rkisp1_capture_fmt_cfg *fmt =
+ rkisp1_find_fmt_cfg(cap, cap->pix.fmt.pixelformat);
+- struct v4l2_subdev_format sd_fmt;
++ struct v4l2_subdev_format sd_fmt = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ .pad = link->source->index,
++ };
+ int ret;
+
+- sd_fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+- sd_fmt.pad = link->source->index;
+ ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &sd_fmt);
+ if (ret)
+ return ret;
+diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
+index 7474150b94ed3..560f928c37520 100644
+--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
+@@ -426,7 +426,7 @@ static void rkisp1_debug_init(struct rkisp1_device *rkisp1)
+ {
+ struct rkisp1_debug *debug = &rkisp1->debug;
+
+- debug->debugfs_dir = debugfs_create_dir(RKISP1_DRIVER_NAME, NULL);
++ debug->debugfs_dir = debugfs_create_dir(dev_name(rkisp1->dev), NULL);
+ debugfs_create_ulong("data_loss", 0444, debug->debugfs_dir,
+ &debug->data_loss);
+ debugfs_create_ulong("outform_size_err", 0444, debug->debugfs_dir,
+diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
+index 8fa5b0abf1f9c..e0e7d0b4ea047 100644
+--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
+@@ -275,7 +275,7 @@ static void rkisp1_lsc_config(struct rkisp1_params *params,
+ RKISP1_CIF_ISP_LSC_XSIZE_01 + i * 4);
+
+ /* program x grad tables */
+- data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->x_grad_tbl[i * 2],
++ data = RKISP1_CIF_ISP_LSC_SECT_GRAD(arg->x_grad_tbl[i * 2],
+ arg->x_grad_tbl[i * 2 + 1]);
+ rkisp1_write(params->rkisp1, data,
+ RKISP1_CIF_ISP_LSC_XGRAD_01 + i * 4);
+@@ -287,7 +287,7 @@ static void rkisp1_lsc_config(struct rkisp1_params *params,
+ RKISP1_CIF_ISP_LSC_YSIZE_01 + i * 4);
+
+ /* program y grad tables */
+- data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->y_grad_tbl[i * 2],
++ data = RKISP1_CIF_ISP_LSC_SECT_GRAD(arg->y_grad_tbl[i * 2],
+ arg->y_grad_tbl[i * 2 + 1]);
+ rkisp1_write(params->rkisp1, data,
+ RKISP1_CIF_ISP_LSC_YGRAD_01 + i * 4);
+@@ -751,7 +751,7 @@ static void rkisp1_ie_enable(struct rkisp1_params *params, bool en)
+ }
+ }
+
+-static void rkisp1_csm_config(struct rkisp1_params *params, bool full_range)
++static void rkisp1_csm_config(struct rkisp1_params *params)
+ {
+ static const u16 full_range_coeff[] = {
+ 0x0026, 0x004b, 0x000f,
+@@ -765,7 +765,7 @@ static void rkisp1_csm_config(struct rkisp1_params *params, bool full_range)
+ };
+ unsigned int i;
+
+- if (full_range) {
++ if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE) {
+ for (i = 0; i < ARRAY_SIZE(full_range_coeff); i++)
+ rkisp1_write(params->rkisp1, full_range_coeff[i],
+ RKISP1_CIF_ISP_CC_COEFF_0 + i * 4);
+@@ -1235,11 +1235,7 @@ static void rkisp1_params_config_parameter(struct rkisp1_params *params)
+ rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP,
+ rkisp1_hst_params_default_config.mode);
+
+- /* set the range */
+- if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE)
+- rkisp1_csm_config(params, true);
+- else
+- rkisp1_csm_config(params, false);
++ rkisp1_csm_config(params);
+
+ spin_lock_irq(&params->config_lock);
+
+diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h
+index fa33080f51db5..f584ccfe0286f 100644
+--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h
++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h
+@@ -480,7 +480,7 @@
+ (((v0) & 0xFFF) | (((v1) & 0xFFF) << 12))
+ #define RKISP1_CIF_ISP_LSC_SECT_SIZE(v0, v1) \
+ (((v0) & 0xFFF) | (((v1) & 0xFFF) << 16))
+-#define RKISP1_CIF_ISP_LSC_GRAD_SIZE(v0, v1) \
++#define RKISP1_CIF_ISP_LSC_SECT_GRAD(v0, v1) \
+ (((v0) & 0xFFF) | (((v1) & 0xFFF) << 16))
+
+ /* LSC: ISP_LSC_TABLE_SEL */
+diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
+index 2070f4b067059..a166ede409675 100644
+--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
+@@ -510,6 +510,10 @@ static int rkisp1_rsz_init_config(struct v4l2_subdev *sd,
+ sink_fmt->height = RKISP1_DEFAULT_HEIGHT;
+ sink_fmt->field = V4L2_FIELD_NONE;
+ sink_fmt->code = RKISP1_DEF_FMT;
++ sink_fmt->colorspace = V4L2_COLORSPACE_SRGB;
++ sink_fmt->xfer_func = V4L2_XFER_FUNC_SRGB;
++ sink_fmt->ycbcr_enc = V4L2_YCBCR_ENC_601;
++ sink_fmt->quantization = V4L2_QUANTIZATION_LIM_RANGE;
+
+ sink_crop = v4l2_subdev_get_try_crop(sd, sd_state,
+ RKISP1_RSZ_PAD_SINK);
+diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c
+index eba2b9f040df0..4c511b026bd72 100644
+--- a/drivers/media/platform/s5p-mfc/s5p_mfc.c
++++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c
+@@ -1283,11 +1283,15 @@ static int s5p_mfc_probe(struct platform_device *pdev)
+ spin_lock_init(&dev->condlock);
+ dev->plat_dev = pdev;
+ if (!dev->plat_dev) {
+- dev_err(&pdev->dev, "No platform data specified\n");
++ mfc_err("No platform data specified\n");
+ return -ENODEV;
+ }
+
+ dev->variant = of_device_get_match_data(&pdev->dev);
++ if (!dev->variant) {
++ dev_err(&pdev->dev, "Failed to get device MFC hardware variant information\n");
++ return -ENOENT;
++ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dev->regs_base = devm_ioremap_resource(&pdev->dev, res);
+@@ -1402,6 +1406,7 @@ static int s5p_mfc_probe(struct platform_device *pdev)
+ /* Deinit MFC if probe had failed */
+ err_enc_reg:
+ video_unregister_device(dev->vfd_dec);
++ dev->vfd_dec = NULL;
+ err_dec_reg:
+ video_device_release(dev->vfd_enc);
+ err_enc_alloc:
+@@ -1447,8 +1452,6 @@ static int s5p_mfc_remove(struct platform_device *pdev)
+
+ video_unregister_device(dev->vfd_enc);
+ video_unregister_device(dev->vfd_dec);
+- video_device_release(dev->vfd_enc);
+- video_device_release(dev->vfd_dec);
+ v4l2_device_unregister(&dev->v4l2_dev);
+ s5p_mfc_unconfigure_dma_memory(dev);
+
+@@ -1580,8 +1583,18 @@ static struct s5p_mfc_variant mfc_drvdata_v7 = {
+ .port_num = MFC_NUM_PORTS_V7,
+ .buf_size = &buf_size_v7,
+ .fw_name[0] = "s5p-mfc-v7.fw",
+- .clk_names = {"mfc", "sclk_mfc"},
+- .num_clocks = 2,
++ .clk_names = {"mfc"},
++ .num_clocks = 1,
++};
++
++static struct s5p_mfc_variant mfc_drvdata_v7_3250 = {
++ .version = MFC_VERSION_V7,
++ .version_bit = MFC_V7_BIT,
++ .port_num = MFC_NUM_PORTS_V7,
++ .buf_size = &buf_size_v7,
++ .fw_name[0] = "s5p-mfc-v7.fw",
++ .clk_names = {"mfc", "sclk_mfc"},
++ .num_clocks = 2,
+ };
+
+ static struct s5p_mfc_buf_size_v6 mfc_buf_size_v8 = {
+@@ -1651,6 +1664,9 @@ static const struct of_device_id exynos_mfc_match[] = {
+ }, {
+ .compatible = "samsung,mfc-v7",
+ .data = &mfc_drvdata_v7,
++ }, {
++ .compatible = "samsung,exynos3250-mfc",
++ .data = &mfc_drvdata_v7_3250,
+ }, {
+ .compatible = "samsung,mfc-v8",
+ .data = &mfc_drvdata_v8,
+diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c b/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c
+index da138c314963a..58822ec5370e2 100644
+--- a/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c
++++ b/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c
+@@ -468,8 +468,10 @@ void s5p_mfc_close_mfc_inst(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx)
+ s5p_mfc_hw_call(dev->mfc_ops, try_run, dev);
+ /* Wait until instance is returned or timeout occurred */
+ if (s5p_mfc_wait_for_done_ctx(ctx,
+- S5P_MFC_R2H_CMD_CLOSE_INSTANCE_RET, 0))
++ S5P_MFC_R2H_CMD_CLOSE_INSTANCE_RET, 0)){
++ clear_work_bit_irqsave(ctx);
+ mfc_err("Err returning instance\n");
++ }
+
+ /* Free resources */
+ s5p_mfc_hw_call(dev->mfc_ops, release_codec_buffers, ctx);
+diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+index 1fad99edb0913..3da1775a65f19 100644
+--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
++++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+@@ -1218,6 +1218,7 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx)
+ unsigned long mb_y_addr, mb_c_addr;
+ int slice_type;
+ unsigned int strm_size;
++ bool src_ready;
+
+ slice_type = s5p_mfc_hw_call(dev->mfc_ops, get_enc_slice_type, dev);
+ strm_size = s5p_mfc_hw_call(dev->mfc_ops, get_enc_strm_size, dev);
+@@ -1257,7 +1258,8 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx)
+ }
+ }
+ }
+- if ((ctx->src_queue_cnt > 0) && (ctx->state == MFCINST_RUNNING)) {
++ if (ctx->src_queue_cnt > 0 && (ctx->state == MFCINST_RUNNING ||
++ ctx->state == MFCINST_FINISHING)) {
+ mb_entry = list_entry(ctx->src_queue.next, struct s5p_mfc_buf,
+ list);
+ if (mb_entry->flags & MFC_BUF_FLAG_USED) {
+@@ -1288,7 +1290,13 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx)
+ vb2_set_plane_payload(&mb_entry->b->vb2_buf, 0, strm_size);
+ vb2_buffer_done(&mb_entry->b->vb2_buf, VB2_BUF_STATE_DONE);
+ }
+- if ((ctx->src_queue_cnt == 0) || (ctx->dst_queue_cnt == 0))
++
++ src_ready = true;
++ if (ctx->state == MFCINST_RUNNING && ctx->src_queue_cnt == 0)
++ src_ready = false;
++ if (ctx->state == MFCINST_FINISHING && ctx->ref_queue_cnt == 0)
++ src_ready = false;
++ if (!src_ready || ctx->dst_queue_cnt == 0)
+ clear_work_bit(ctx);
+
+ return 0;
+diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
+index a1453053e31ab..ef8169f6c428c 100644
+--- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
++++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
+@@ -1060,7 +1060,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx)
+ }
+
+ /* aspect ratio VUI */
+- readl(mfc_regs->e_h264_options);
++ reg = readl(mfc_regs->e_h264_options);
+ reg &= ~(0x1 << 5);
+ reg |= ((p_h264->vui_sar & 0x1) << 5);
+ writel(reg, mfc_regs->e_h264_options);
+@@ -1083,7 +1083,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx)
+
+ /* intra picture period for H.264 open GOP */
+ /* control */
+- readl(mfc_regs->e_h264_options);
++ reg = readl(mfc_regs->e_h264_options);
+ reg &= ~(0x1 << 4);
+ reg |= ((p_h264->open_gop & 0x1) << 4);
+ writel(reg, mfc_regs->e_h264_options);
+@@ -1097,23 +1097,23 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx)
+ }
+
+ /* 'WEIGHTED_BI_PREDICTION' for B is disable */
+- readl(mfc_regs->e_h264_options);
++ reg = readl(mfc_regs->e_h264_options);
+ reg &= ~(0x3 << 9);
+ writel(reg, mfc_regs->e_h264_options);
+
+ /* 'CONSTRAINED_INTRA_PRED_ENABLE' is disable */
+- readl(mfc_regs->e_h264_options);
++ reg = readl(mfc_regs->e_h264_options);
+ reg &= ~(0x1 << 14);
+ writel(reg, mfc_regs->e_h264_options);
+
+ /* ASO */
+- readl(mfc_regs->e_h264_options);
++ reg = readl(mfc_regs->e_h264_options);
+ reg &= ~(0x1 << 6);
+ reg |= ((p_h264->aso & 0x1) << 6);
+ writel(reg, mfc_regs->e_h264_options);
+
+ /* hier qp enable */
+- readl(mfc_regs->e_h264_options);
++ reg = readl(mfc_regs->e_h264_options);
+ reg &= ~(0x1 << 8);
+ reg |= ((p_h264->open_gop & 0x1) << 8);
+ writel(reg, mfc_regs->e_h264_options);
+@@ -1134,7 +1134,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx)
+ writel(reg, mfc_regs->e_h264_num_t_layer);
+
+ /* frame packing SEI generation */
+- readl(mfc_regs->e_h264_options);
++ reg = readl(mfc_regs->e_h264_options);
+ reg &= ~(0x1 << 25);
+ reg |= ((p_h264->sei_frame_packing & 0x1) << 25);
+ writel(reg, mfc_regs->e_h264_options);
+diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+index 6413cd2791251..19a0f12483dba 100644
+--- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
++++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+@@ -1310,6 +1310,8 @@ static int bdisp_probe(struct platform_device *pdev)
+ init_waitqueue_head(&bdisp->irq_queue);
+ INIT_DELAYED_WORK(&bdisp->timeout_work, bdisp_irq_timeout);
+ bdisp->work_queue = create_workqueue(BDISP_NAME);
++ if (!bdisp->work_queue)
++ return -ENOMEM;
+
+ spin_lock_init(&bdisp->slock);
+ mutex_init(&bdisp->lock);
+diff --git a/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c b/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c
+index 338b205ae3a79..88d0188397e7b 100644
+--- a/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c
++++ b/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c
+@@ -940,6 +940,7 @@ static int configure_channels(struct c8sectpfei *fei)
+ if (ret) {
+ dev_err(fei->dev,
+ "configure_memdma_and_inputblock failed\n");
++ of_node_put(child);
+ goto err_unmap;
+ }
+ index++;
+diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c
+index c887a31ebb540..420ad4d8df5d5 100644
+--- a/drivers/media/platform/sti/delta/delta-v4l2.c
++++ b/drivers/media/platform/sti/delta/delta-v4l2.c
+@@ -1859,7 +1859,7 @@ static int delta_probe(struct platform_device *pdev)
+ if (ret) {
+ dev_err(delta->dev, "%s failed to initialize firmware ipc channel\n",
+ DELTA_PREFIX);
+- goto err;
++ goto err_pm_disable;
+ }
+
+ /* register all available decoders */
+@@ -1873,7 +1873,7 @@ static int delta_probe(struct platform_device *pdev)
+ if (ret) {
+ dev_err(delta->dev, "%s failed to register V4L2 device\n",
+ DELTA_PREFIX);
+- goto err;
++ goto err_pm_disable;
+ }
+
+ delta->work_queue = create_workqueue(DELTA_NAME);
+@@ -1898,6 +1898,8 @@ err_work_queue:
+ destroy_workqueue(delta->work_queue);
+ err_v4l2:
+ v4l2_device_unregister(&delta->v4l2_dev);
++err_pm_disable:
++ pm_runtime_disable(dev);
+ err:
+ return ret;
+ }
+diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c
+index d914ccef98317..6110718645a4f 100644
+--- a/drivers/media/platform/stm32/stm32-dcmi.c
++++ b/drivers/media/platform/stm32/stm32-dcmi.c
+@@ -128,6 +128,7 @@ struct stm32_dcmi {
+ int sequence;
+ struct list_head buffers;
+ struct dcmi_buf *active;
++ int irq;
+
+ struct v4l2_device v4l2_dev;
+ struct video_device *vdev;
+@@ -1759,6 +1760,14 @@ static int dcmi_graph_notify_complete(struct v4l2_async_notifier *notifier)
+ return ret;
+ }
+
++ ret = devm_request_threaded_irq(dcmi->dev, dcmi->irq, dcmi_irq_callback,
++ dcmi_irq_thread, IRQF_ONESHOT,
++ dev_name(dcmi->dev), dcmi);
++ if (ret) {
++ dev_err(dcmi->dev, "Unable to request irq %d\n", dcmi->irq);
++ return ret;
++ }
++
+ return 0;
+ }
+
+@@ -1914,6 +1923,8 @@ static int dcmi_probe(struct platform_device *pdev)
+ if (irq <= 0)
+ return irq ? irq : -ENXIO;
+
++ dcmi->irq = irq;
++
+ dcmi->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!dcmi->res) {
+ dev_err(&pdev->dev, "Could not get resource\n");
+@@ -1926,14 +1937,6 @@ static int dcmi_probe(struct platform_device *pdev)
+ return PTR_ERR(dcmi->regs);
+ }
+
+- ret = devm_request_threaded_irq(&pdev->dev, irq, dcmi_irq_callback,
+- dcmi_irq_thread, IRQF_ONESHOT,
+- dev_name(&pdev->dev), dcmi);
+- if (ret) {
+- dev_err(&pdev->dev, "Unable to request irq %d\n", irq);
+- return ret;
+- }
+-
+ mclk = devm_clk_get(&pdev->dev, "mclk");
+ if (IS_ERR(mclk)) {
+ if (PTR_ERR(mclk) != -EPROBE_DEFER)
+diff --git a/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c b/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c
+index 07b2161392d21..5ba3e29f794fd 100644
+--- a/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c
++++ b/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c
+@@ -467,7 +467,7 @@ static const struct v4l2_ioctl_ops sun6i_video_ioctl_ops = {
+ static int sun6i_video_open(struct file *file)
+ {
+ struct sun6i_video *video = video_drvdata(file);
+- int ret;
++ int ret = 0;
+
+ if (mutex_lock_interruptible(&video->lock))
+ return -ERESTARTSYS;
+@@ -481,10 +481,8 @@ static int sun6i_video_open(struct file *file)
+ goto fh_release;
+
+ /* check if already powered */
+- if (!v4l2_fh_is_singular_file(file)) {
+- ret = -EBUSY;
++ if (!v4l2_fh_is_singular_file(file))
+ goto unlock;
+- }
+
+ ret = sun6i_csi_set_power(video->csi, true);
+ if (ret < 0)
+diff --git a/drivers/media/platform/ti-vpe/cal-video.c b/drivers/media/platform/ti-vpe/cal-video.c
+index 7799da1cc261b..d87177d04e921 100644
+--- a/drivers/media/platform/ti-vpe/cal-video.c
++++ b/drivers/media/platform/ti-vpe/cal-video.c
+@@ -814,7 +814,6 @@ static const struct v4l2_file_operations cal_fops = {
+
+ static int cal_ctx_v4l2_init_formats(struct cal_ctx *ctx)
+ {
+- struct v4l2_subdev_mbus_code_enum mbus_code;
+ struct v4l2_mbus_framefmt mbus_fmt;
+ const struct cal_format_info *fmtinfo;
+ unsigned int i, j, k;
+@@ -823,13 +822,17 @@ static int cal_ctx_v4l2_init_formats(struct cal_ctx *ctx)
+ /* Enumerate sub device formats and enable all matching local formats */
+ ctx->active_fmt = devm_kcalloc(ctx->cal->dev, cal_num_formats,
+ sizeof(*ctx->active_fmt), GFP_KERNEL);
++ if (!ctx->active_fmt)
++ return -ENOMEM;
++
+ ctx->num_active_fmt = 0;
+
+ for (j = 0, i = 0; ; ++j) {
++ struct v4l2_subdev_mbus_code_enum mbus_code = {
++ .index = j,
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+
+- memset(&mbus_code, 0, sizeof(mbus_code));
+- mbus_code.index = j;
+- mbus_code.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ ret = v4l2_subdev_call(ctx->phy->source, pad, enum_mbus_code,
+ NULL, &mbus_code);
+ if (ret == -EINVAL)
+diff --git a/drivers/media/platform/ti-vpe/cal.c b/drivers/media/platform/ti-vpe/cal.c
+index 8e469d518a742..35d62eb1321fb 100644
+--- a/drivers/media/platform/ti-vpe/cal.c
++++ b/drivers/media/platform/ti-vpe/cal.c
+@@ -940,8 +940,10 @@ static struct cal_ctx *cal_ctx_create(struct cal_dev *cal, int inst)
+ ctx->datatype = CAL_CSI2_CTX_DT_ANY;
+
+ ret = cal_ctx_v4l2_init(ctx);
+- if (ret)
++ if (ret) {
++ kfree(ctx);
+ return NULL;
++ }
+
+ return ctx;
+ }
+diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
+index 06f74d410973e..706d48601bf2c 100644
+--- a/drivers/media/platform/vsp1/vsp1_drm.c
++++ b/drivers/media/platform/vsp1/vsp1_drm.c
+@@ -66,7 +66,9 @@ static int vsp1_du_insert_uif(struct vsp1_device *vsp1,
+ struct vsp1_entity *prev, unsigned int prev_pad,
+ struct vsp1_entity *next, unsigned int next_pad)
+ {
+- struct v4l2_subdev_format format;
++ struct v4l2_subdev_format format = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+ int ret;
+
+ if (!uif) {
+@@ -82,8 +84,6 @@ static int vsp1_du_insert_uif(struct vsp1_device *vsp1,
+ prev->sink = uif;
+ prev->sink_pad = UIF_PAD_SINK;
+
+- memset(&format, 0, sizeof(format));
+- format.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ format.pad = prev_pad;
+
+ ret = v4l2_subdev_call(&prev->subdev, pad, get_fmt, NULL, &format);
+@@ -118,8 +118,12 @@ static int vsp1_du_pipeline_setup_rpf(struct vsp1_device *vsp1,
+ struct vsp1_entity *uif,
+ unsigned int brx_input)
+ {
+- struct v4l2_subdev_selection sel;
+- struct v4l2_subdev_format format;
++ struct v4l2_subdev_selection sel = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
++ struct v4l2_subdev_format format = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+ const struct v4l2_rect *crop;
+ int ret;
+
+@@ -129,8 +133,6 @@ static int vsp1_du_pipeline_setup_rpf(struct vsp1_device *vsp1,
+ */
+ crop = &vsp1->drm->inputs[rpf->entity.index].crop;
+
+- memset(&format, 0, sizeof(format));
+- format.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ format.pad = RWPF_PAD_SINK;
+ format.format.width = crop->width + crop->left;
+ format.format.height = crop->height + crop->top;
+@@ -147,8 +149,6 @@ static int vsp1_du_pipeline_setup_rpf(struct vsp1_device *vsp1,
+ __func__, format.format.width, format.format.height,
+ format.format.code, rpf->entity.index);
+
+- memset(&sel, 0, sizeof(sel));
+- sel.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ sel.pad = RWPF_PAD_SINK;
+ sel.target = V4L2_SEL_TGT_CROP;
+ sel.r = *crop;
+diff --git a/drivers/media/platform/vsp1/vsp1_entity.c b/drivers/media/platform/vsp1/vsp1_entity.c
+index 823c15facd1b4..b40926270c149 100644
+--- a/drivers/media/platform/vsp1/vsp1_entity.c
++++ b/drivers/media/platform/vsp1/vsp1_entity.c
+@@ -184,15 +184,14 @@ vsp1_entity_get_pad_selection(struct vsp1_entity *entity,
+ int vsp1_entity_init_cfg(struct v4l2_subdev *subdev,
+ struct v4l2_subdev_state *sd_state)
+ {
+- struct v4l2_subdev_format format;
+ unsigned int pad;
+
+ for (pad = 0; pad < subdev->entity.num_pads - 1; ++pad) {
+- memset(&format, 0, sizeof(format));
+-
+- format.pad = pad;
+- format.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
+- : V4L2_SUBDEV_FORMAT_ACTIVE;
++ struct v4l2_subdev_format format = {
++ .pad = pad,
++ .which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
++ : V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+
+ v4l2_subdev_call(subdev, pad, set_fmt, sd_state, &format);
+ }
+diff --git a/drivers/media/platform/vsp1/vsp1_rpf.c b/drivers/media/platform/vsp1/vsp1_rpf.c
+index 85587c1b6a373..75083cb234fe3 100644
+--- a/drivers/media/platform/vsp1/vsp1_rpf.c
++++ b/drivers/media/platform/vsp1/vsp1_rpf.c
+@@ -291,11 +291,11 @@ static void rpf_configure_partition(struct vsp1_entity *entity,
+ + crop.left * fmtinfo->bpp[0] / 8;
+
+ if (format->num_planes > 1) {
++ unsigned int bpl = format->plane_fmt[1].bytesperline;
+ unsigned int offset;
+
+- offset = crop.top * format->plane_fmt[1].bytesperline
+- + crop.left / fmtinfo->hsub
+- * fmtinfo->bpp[1] / 8;
++ offset = crop.top / fmtinfo->vsub * bpl
++ + crop.left / fmtinfo->hsub * fmtinfo->bpp[1] / 8;
+ mem.addr[1] += offset;
+ mem.addr[2] += offset;
+ }
+diff --git a/drivers/media/platform/xilinx/xilinx-vipp.c b/drivers/media/platform/xilinx/xilinx-vipp.c
+index 2ce31d7ce1a63..5896a662da3ba 100644
+--- a/drivers/media/platform/xilinx/xilinx-vipp.c
++++ b/drivers/media/platform/xilinx/xilinx-vipp.c
+@@ -472,7 +472,7 @@ static int xvip_graph_dma_init(struct xvip_composite_device *xdev)
+ {
+ struct device_node *ports;
+ struct device_node *port;
+- int ret;
++ int ret = 0;
+
+ ports = of_get_child_by_name(xdev->dev->of_node, "ports");
+ if (ports == NULL) {
+@@ -482,13 +482,14 @@ static int xvip_graph_dma_init(struct xvip_composite_device *xdev)
+
+ for_each_child_of_node(ports, port) {
+ ret = xvip_graph_dma_init_one(xdev, port);
+- if (ret < 0) {
++ if (ret) {
+ of_node_put(port);
+- return ret;
++ break;
+ }
+ }
+
+- return 0;
++ of_node_put(ports);
++ return ret;
+ }
+
+ static void xvip_graph_cleanup(struct xvip_composite_device *xdev)
+diff --git a/drivers/media/radio/radio-shark.c b/drivers/media/radio/radio-shark.c
+index 8230da828d0ee..127a3be0e0f07 100644
+--- a/drivers/media/radio/radio-shark.c
++++ b/drivers/media/radio/radio-shark.c
+@@ -316,6 +316,16 @@ static int usb_shark_probe(struct usb_interface *intf,
+ {
+ struct shark_device *shark;
+ int retval = -ENOMEM;
++ static const u8 ep_addresses[] = {
++ SHARK_IN_EP | USB_DIR_IN,
++ SHARK_OUT_EP | USB_DIR_OUT,
++ 0};
++
++ /* Are the expected endpoints present? */
++ if (!usb_check_int_endpoints(intf, ep_addresses)) {
++ dev_err(&intf->dev, "Invalid radioSHARK device\n");
++ return -EINVAL;
++ }
+
+ shark = kzalloc(sizeof(struct shark_device), GFP_KERNEL);
+ if (!shark)
+diff --git a/drivers/media/radio/radio-shark2.c b/drivers/media/radio/radio-shark2.c
+index d150f12382c60..f1c5c0a6a335c 100644
+--- a/drivers/media/radio/radio-shark2.c
++++ b/drivers/media/radio/radio-shark2.c
+@@ -282,6 +282,16 @@ static int usb_shark_probe(struct usb_interface *intf,
+ {
+ struct shark_device *shark;
+ int retval = -ENOMEM;
++ static const u8 ep_addresses[] = {
++ SHARK_IN_EP | USB_DIR_IN,
++ SHARK_OUT_EP | USB_DIR_OUT,
++ 0};
++
++ /* Are the expected endpoints present? */
++ if (!usb_check_int_endpoints(intf, ep_addresses)) {
++ dev_err(&intf->dev, "Invalid radioSHARK2 device\n");
++ return -EINVAL;
++ }
+
+ shark = kzalloc(sizeof(struct shark_device), GFP_KERNEL);
+ if (!shark)
+diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c
+index 1123768731676..484046471c03f 100644
+--- a/drivers/media/radio/radio-wl1273.c
++++ b/drivers/media/radio/radio-wl1273.c
+@@ -1279,7 +1279,7 @@ static int wl1273_fm_vidioc_querycap(struct file *file, void *priv,
+
+ strscpy(capability->driver, WL1273_FM_DRIVER_NAME,
+ sizeof(capability->driver));
+- strscpy(capability->card, "Texas Instruments Wl1273 FM Radio",
++ strscpy(capability->card, "TI Wl1273 FM Radio",
+ sizeof(capability->card));
+ strscpy(capability->bus_info, radio->bus_type,
+ sizeof(capability->bus_info));
+diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c
+index f491420d7b538..76d39e2e87706 100644
+--- a/drivers/media/radio/si470x/radio-si470x-i2c.c
++++ b/drivers/media/radio/si470x/radio-si470x-i2c.c
+@@ -11,7 +11,7 @@
+
+ /* driver definitions */
+ #define DRIVER_AUTHOR "Joonyoung Shim <jy0922.shim@samsung.com>";
+-#define DRIVER_CARD "Silicon Labs Si470x FM Radio Receiver"
++#define DRIVER_CARD "Silicon Labs Si470x FM Radio"
+ #define DRIVER_DESC "I2C radio driver for Si470x FM Radio Receivers"
+ #define DRIVER_VERSION "1.0.2"
+
+@@ -368,7 +368,7 @@ static int si470x_i2c_probe(struct i2c_client *client)
+ if (radio->hdl.error) {
+ retval = radio->hdl.error;
+ dev_err(&client->dev, "couldn't register control\n");
+- goto err_dev;
++ goto err_all;
+ }
+
+ /* video device initialization */
+@@ -463,7 +463,6 @@ static int si470x_i2c_probe(struct i2c_client *client)
+ return 0;
+ err_all:
+ v4l2_ctrl_handler_free(&radio->hdl);
+-err_dev:
+ v4l2_device_unregister(&radio->v4l2_dev);
+ err_initial:
+ return retval;
+diff --git a/drivers/media/radio/si470x/radio-si470x-usb.c b/drivers/media/radio/si470x/radio-si470x-usb.c
+index fedff68d8c496..1365ae732b799 100644
+--- a/drivers/media/radio/si470x/radio-si470x-usb.c
++++ b/drivers/media/radio/si470x/radio-si470x-usb.c
+@@ -16,7 +16,7 @@
+
+ /* driver definitions */
+ #define DRIVER_AUTHOR "Tobias Lorenz <tobias.lorenz@gmx.net>"
+-#define DRIVER_CARD "Silicon Labs Si470x FM Radio Receiver"
++#define DRIVER_CARD "Silicon Labs Si470x FM Radio"
+ #define DRIVER_DESC "USB radio driver for Si470x FM Radio Receivers"
+ #define DRIVER_VERSION "1.0.10"
+
+@@ -733,8 +733,10 @@ static int si470x_usb_driver_probe(struct usb_interface *intf,
+
+ /* start radio */
+ retval = si470x_start_usb(radio);
+- if (retval < 0)
++ if (retval < 0 && !radio->int_in_running)
+ goto err_buf;
++ else if (retval < 0) /* in case of radio->int_in_running == 1 */
++ goto err_all;
+
+ /* set initial frequency */
+ si470x_set_freq(radio, 87.5 * FREQ_MUL); /* available in all regions */
+diff --git a/drivers/media/rc/ene_ir.c b/drivers/media/rc/ene_ir.c
+index e09270916fbca..11ee21a7db8f0 100644
+--- a/drivers/media/rc/ene_ir.c
++++ b/drivers/media/rc/ene_ir.c
+@@ -1106,6 +1106,8 @@ static void ene_remove(struct pnp_dev *pnp_dev)
+ struct ene_device *dev = pnp_get_drvdata(pnp_dev);
+ unsigned long flags;
+
++ rc_unregister_device(dev->rdev);
++ del_timer_sync(&dev->tx_sim_timer);
+ spin_lock_irqsave(&dev->hw_lock, flags);
+ ene_rx_disable(dev);
+ ene_rx_restore_hw_buffer(dev);
+@@ -1113,7 +1115,6 @@ static void ene_remove(struct pnp_dev *pnp_dev)
+
+ free_irq(dev->irq, dev);
+ release_region(dev->hw_io, ENE_IO_SIZE);
+- rc_unregister_device(dev->rdev);
+ kfree(dev);
+ }
+
+diff --git a/drivers/media/rc/gpio-ir-recv.c b/drivers/media/rc/gpio-ir-recv.c
+index 22e524b69806a..16795e07dc103 100644
+--- a/drivers/media/rc/gpio-ir-recv.c
++++ b/drivers/media/rc/gpio-ir-recv.c
+@@ -107,6 +107,8 @@ static int gpio_ir_recv_probe(struct platform_device *pdev)
+ rcdev->map_name = RC_MAP_EMPTY;
+
+ gpio_dev->rcdev = rcdev;
++ if (of_property_read_bool(np, "wakeup-source"))
++ device_init_wakeup(dev, true);
+
+ rc = devm_rc_register_device(dev, rcdev);
+ if (rc < 0) {
+@@ -130,6 +132,23 @@ static int gpio_ir_recv_probe(struct platform_device *pdev)
+ "gpio-ir-recv-irq", gpio_dev);
+ }
+
++static int gpio_ir_recv_remove(struct platform_device *pdev)
++{
++ struct gpio_rc_dev *gpio_dev = platform_get_drvdata(pdev);
++ struct device *pmdev = gpio_dev->pmdev;
++
++ if (pmdev) {
++ pm_runtime_get_sync(pmdev);
++ cpu_latency_qos_remove_request(&gpio_dev->qos);
++
++ pm_runtime_disable(pmdev);
++ pm_runtime_put_noidle(pmdev);
++ pm_runtime_set_suspended(pmdev);
++ }
++
++ return 0;
++}
++
+ #ifdef CONFIG_PM
+ static int gpio_ir_recv_suspend(struct device *dev)
+ {
+@@ -189,6 +208,7 @@ MODULE_DEVICE_TABLE(of, gpio_ir_recv_of_match);
+
+ static struct platform_driver gpio_ir_recv_driver = {
+ .probe = gpio_ir_recv_probe,
++ .remove = gpio_ir_recv_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = of_match_ptr(gpio_ir_recv_of_match),
+diff --git a/drivers/media/rc/gpio-ir-tx.c b/drivers/media/rc/gpio-ir-tx.c
+index c6cd2e6d8e654..a50701cfbbd7b 100644
+--- a/drivers/media/rc/gpio-ir-tx.c
++++ b/drivers/media/rc/gpio-ir-tx.c
+@@ -48,11 +48,29 @@ static int gpio_ir_tx_set_carrier(struct rc_dev *dev, u32 carrier)
+ return 0;
+ }
+
++static void delay_until(ktime_t until)
++{
++ /*
++ * delta should never exceed 0.5 seconds (IR_MAX_DURATION) and on
++ * m68k ndelay(s64) does not compile; so use s32 rather than s64.
++ */
++ s32 delta;
++
++ while (true) {
++ delta = ktime_us_delta(until, ktime_get());
++ if (delta <= 0)
++ return;
++
++ /* udelay more than 1ms may not work */
++ delta = min(delta, 1000);
++ udelay(delta);
++ }
++}
++
+ static void gpio_ir_tx_unmodulated(struct gpio_ir *gpio_ir, uint *txbuf,
+ uint count)
+ {
+ ktime_t edge;
+- s32 delta;
+ int i;
+
+ local_irq_disable();
+@@ -63,9 +81,7 @@ static void gpio_ir_tx_unmodulated(struct gpio_ir *gpio_ir, uint *txbuf,
+ gpiod_set_value(gpio_ir->gpio, !(i % 2));
+
+ edge = ktime_add_us(edge, txbuf[i]);
+- delta = ktime_us_delta(edge, ktime_get());
+- if (delta > 0)
+- udelay(delta);
++ delay_until(edge);
+ }
+
+ gpiod_set_value(gpio_ir->gpio, 0);
+@@ -97,9 +113,7 @@ static void gpio_ir_tx_modulated(struct gpio_ir *gpio_ir, uint *txbuf,
+ if (i % 2) {
+ // space
+ edge = ktime_add_us(edge, txbuf[i]);
+- delta = ktime_us_delta(edge, ktime_get());
+- if (delta > 0)
+- udelay(delta);
++ delay_until(edge);
+ } else {
+ // pulse
+ ktime_t last = ktime_add_us(edge, txbuf[i]);
+diff --git a/drivers/media/rc/igorplugusb.c b/drivers/media/rc/igorplugusb.c
+index effaa5751d6c9..3e9988ee785f0 100644
+--- a/drivers/media/rc/igorplugusb.c
++++ b/drivers/media/rc/igorplugusb.c
+@@ -64,9 +64,11 @@ static void igorplugusb_irdata(struct igorplugusb *ir, unsigned len)
+ if (start >= len) {
+ dev_err(ir->dev, "receive overflow invalid: %u", overflow);
+ } else {
+- if (overflow > 0)
++ if (overflow > 0) {
+ dev_warn(ir->dev, "receive overflow, at least %u lost",
+ overflow);
++ ir_raw_event_reset(ir->rc);
++ }
+
+ do {
+ rawir.duration = ir->buf_in[i] * 85;
+diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
+index 2ca4e86c7b9f1..72e4bb0fb71ec 100644
+--- a/drivers/media/rc/imon.c
++++ b/drivers/media/rc/imon.c
+@@ -153,6 +153,24 @@ struct imon_context {
+ const struct imon_usb_dev_descr *dev_descr;
+ /* device description with key */
+ /* table for front panels */
++ /*
++ * Fields for deferring free_imon_context().
++ *
++ * Since reference to "struct imon_context" is stored into
++ * "struct file"->private_data, we need to remember
++ * how many file descriptors might access this "struct imon_context".
++ */
++ refcount_t users;
++ /*
++ * Use a flag for telling display_open()/vfd_write()/lcd_write() that
++ * imon_disconnect() was already called.
++ */
++ bool disconnected;
++ /*
++ * We need to wait for RCU grace period in order to allow
++ * display_open() to safely check ->disconnected and increment ->users.
++ */
++ struct rcu_head rcu;
+ };
+
+ #define TOUCH_TIMEOUT (HZ/30)
+@@ -160,18 +178,18 @@ struct imon_context {
+ /* vfd character device file operations */
+ static const struct file_operations vfd_fops = {
+ .owner = THIS_MODULE,
+- .open = &display_open,
+- .write = &vfd_write,
+- .release = &display_close,
++ .open = display_open,
++ .write = vfd_write,
++ .release = display_close,
+ .llseek = noop_llseek,
+ };
+
+ /* lcd character device file operations */
+ static const struct file_operations lcd_fops = {
+ .owner = THIS_MODULE,
+- .open = &display_open,
+- .write = &lcd_write,
+- .release = &display_close,
++ .open = display_open,
++ .write = lcd_write,
++ .release = display_close,
+ .llseek = noop_llseek,
+ };
+
+@@ -439,9 +457,6 @@ static struct usb_driver imon_driver = {
+ .id_table = imon_usb_id_table,
+ };
+
+-/* to prevent races between open() and disconnect(), probing, etc */
+-static DEFINE_MUTEX(driver_lock);
+-
+ /* Module bookkeeping bits */
+ MODULE_AUTHOR(MOD_AUTHOR);
+ MODULE_DESCRIPTION(MOD_DESC);
+@@ -481,9 +496,11 @@ static void free_imon_context(struct imon_context *ictx)
+ struct device *dev = ictx->dev;
+
+ usb_free_urb(ictx->tx_urb);
++ WARN_ON(ictx->dev_present_intf0);
+ usb_free_urb(ictx->rx_urb_intf0);
++ WARN_ON(ictx->dev_present_intf1);
+ usb_free_urb(ictx->rx_urb_intf1);
+- kfree(ictx);
++ kfree_rcu(ictx, rcu);
+
+ dev_dbg(dev, "%s: iMON context freed\n", __func__);
+ }
+@@ -499,9 +516,6 @@ static int display_open(struct inode *inode, struct file *file)
+ int subminor;
+ int retval = 0;
+
+- /* prevent races with disconnect */
+- mutex_lock(&driver_lock);
+-
+ subminor = iminor(inode);
+ interface = usb_find_interface(&imon_driver, subminor);
+ if (!interface) {
+@@ -509,13 +523,16 @@ static int display_open(struct inode *inode, struct file *file)
+ retval = -ENODEV;
+ goto exit;
+ }
+- ictx = usb_get_intfdata(interface);
+
+- if (!ictx) {
++ rcu_read_lock();
++ ictx = usb_get_intfdata(interface);
++ if (!ictx || ictx->disconnected || !refcount_inc_not_zero(&ictx->users)) {
++ rcu_read_unlock();
+ pr_err("no context found for minor %d\n", subminor);
+ retval = -ENODEV;
+ goto exit;
+ }
++ rcu_read_unlock();
+
+ mutex_lock(&ictx->lock);
+
+@@ -533,8 +550,10 @@ static int display_open(struct inode *inode, struct file *file)
+
+ mutex_unlock(&ictx->lock);
+
++ if (retval && refcount_dec_and_test(&ictx->users))
++ free_imon_context(ictx);
++
+ exit:
+- mutex_unlock(&driver_lock);
+ return retval;
+ }
+
+@@ -544,16 +563,9 @@ exit:
+ */
+ static int display_close(struct inode *inode, struct file *file)
+ {
+- struct imon_context *ictx = NULL;
++ struct imon_context *ictx = file->private_data;
+ int retval = 0;
+
+- ictx = file->private_data;
+-
+- if (!ictx) {
+- pr_err("no context for device\n");
+- return -ENODEV;
+- }
+-
+ mutex_lock(&ictx->lock);
+
+ if (!ictx->display_supported) {
+@@ -568,6 +580,8 @@ static int display_close(struct inode *inode, struct file *file)
+ }
+
+ mutex_unlock(&ictx->lock);
++ if (refcount_dec_and_test(&ictx->users))
++ free_imon_context(ictx);
+ return retval;
+ }
+
+@@ -632,15 +646,14 @@ static int send_packet(struct imon_context *ictx)
+ pr_err_ratelimited("error submitting urb(%d)\n", retval);
+ } else {
+ /* Wait for transmission to complete (or abort) */
+- mutex_unlock(&ictx->lock);
+ retval = wait_for_completion_interruptible(
+ &ictx->tx.finished);
+ if (retval) {
+ usb_kill_urb(ictx->tx_urb);
+ pr_err_ratelimited("task interrupted\n");
+ }
+- mutex_lock(&ictx->lock);
+
++ ictx->tx.busy = false;
+ retval = ictx->tx.status;
+ if (retval)
+ pr_err_ratelimited("packet tx failed (%d)\n", retval);
+@@ -934,17 +947,15 @@ static ssize_t vfd_write(struct file *file, const char __user *buf,
+ int offset;
+ int seq;
+ int retval = 0;
+- struct imon_context *ictx;
++ struct imon_context *ictx = file->private_data;
+ static const unsigned char vfd_packet6[] = {
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF };
+
+- ictx = file->private_data;
+- if (!ictx) {
+- pr_err_ratelimited("no context for device\n");
++ if (ictx->disconnected)
+ return -ENODEV;
+- }
+
+- mutex_lock(&ictx->lock);
++ if (mutex_lock_interruptible(&ictx->lock))
++ return -ERESTARTSYS;
+
+ if (!ictx->dev_present_intf0) {
+ pr_err_ratelimited("no iMON device present\n");
+@@ -1018,13 +1029,10 @@ static ssize_t lcd_write(struct file *file, const char __user *buf,
+ size_t n_bytes, loff_t *pos)
+ {
+ int retval = 0;
+- struct imon_context *ictx;
++ struct imon_context *ictx = file->private_data;
+
+- ictx = file->private_data;
+- if (!ictx) {
+- pr_err_ratelimited("no context for device\n");
++ if (ictx->disconnected)
+ return -ENODEV;
+- }
+
+ mutex_lock(&ictx->lock);
+
+@@ -2402,7 +2410,6 @@ static int imon_probe(struct usb_interface *interface,
+ int ifnum, sysfs_err;
+ int ret = 0;
+ struct imon_context *ictx = NULL;
+- struct imon_context *first_if_ctx = NULL;
+ u16 vendor, product;
+
+ usbdev = usb_get_dev(interface_to_usbdev(interface));
+@@ -2414,17 +2421,12 @@ static int imon_probe(struct usb_interface *interface,
+ dev_dbg(dev, "%s: found iMON device (%04x:%04x, intf%d)\n",
+ __func__, vendor, product, ifnum);
+
+- /* prevent races probing devices w/multiple interfaces */
+- mutex_lock(&driver_lock);
+-
+ first_if = usb_ifnum_to_if(usbdev, 0);
+ if (!first_if) {
+ ret = -ENODEV;
+ goto fail;
+ }
+
+- first_if_ctx = usb_get_intfdata(first_if);
+-
+ if (ifnum == 0) {
+ ictx = imon_init_intf0(interface, id);
+ if (!ictx) {
+@@ -2432,9 +2434,11 @@ static int imon_probe(struct usb_interface *interface,
+ ret = -ENODEV;
+ goto fail;
+ }
++ refcount_set(&ictx->users, 1);
+
+ } else {
+ /* this is the secondary interface on the device */
++ struct imon_context *first_if_ctx = usb_get_intfdata(first_if);
+
+ /* fail early if first intf failed to register */
+ if (!first_if_ctx) {
+@@ -2448,14 +2452,13 @@ static int imon_probe(struct usb_interface *interface,
+ ret = -ENODEV;
+ goto fail;
+ }
++ refcount_inc(&ictx->users);
+
+ }
+
+ usb_set_intfdata(interface, ictx);
+
+ if (ifnum == 0) {
+- mutex_lock(&ictx->lock);
+-
+ if (product == 0xffdc && ictx->rf_device) {
+ sysfs_err = sysfs_create_group(&interface->dev.kobj,
+ &imon_rf_attr_group);
+@@ -2466,21 +2469,17 @@ static int imon_probe(struct usb_interface *interface,
+
+ if (ictx->display_supported)
+ imon_init_display(ictx, interface);
+-
+- mutex_unlock(&ictx->lock);
+ }
+
+ dev_info(dev, "iMON device (%04x:%04x, intf%d) on usb<%d:%d> initialized\n",
+ vendor, product, ifnum,
+ usbdev->bus->busnum, usbdev->devnum);
+
+- mutex_unlock(&driver_lock);
+ usb_put_dev(usbdev);
+
+ return 0;
+
+ fail:
+- mutex_unlock(&driver_lock);
+ usb_put_dev(usbdev);
+ dev_err(dev, "unable to register, err %d\n", ret);
+
+@@ -2496,10 +2495,8 @@ static void imon_disconnect(struct usb_interface *interface)
+ struct device *dev;
+ int ifnum;
+
+- /* prevent races with multi-interface device probing and display_open */
+- mutex_lock(&driver_lock);
+-
+ ictx = usb_get_intfdata(interface);
++ ictx->disconnected = true;
+ dev = ictx->dev;
+ ifnum = interface->cur_altsetting->desc.bInterfaceNumber;
+
+@@ -2540,11 +2537,9 @@ static void imon_disconnect(struct usb_interface *interface)
+ }
+ }
+
+- if (!ictx->dev_present_intf0 && !ictx->dev_present_intf1)
++ if (refcount_dec_and_test(&ictx->users))
+ free_imon_context(ictx);
+
+- mutex_unlock(&driver_lock);
+-
+ dev_dbg(dev, "%s: iMON device (intf%d) disconnected\n",
+ __func__, ifnum);
+ }
+diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c
+index 48d52baec1a1c..53ae19fa103ab 100644
+--- a/drivers/media/rc/ir_toy.c
++++ b/drivers/media/rc/ir_toy.c
+@@ -429,7 +429,7 @@ static int irtoy_probe(struct usb_interface *intf,
+ err = usb_submit_urb(irtoy->urb_in, GFP_KERNEL);
+ if (err != 0) {
+ dev_err(irtoy->dev, "fail to submit in urb: %d\n", err);
+- return err;
++ goto free_rcdev;
+ }
+
+ err = irtoy_setup(irtoy);
+diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c
+index 5bc23e8c6d91d..4f77d4ebacdc5 100644
+--- a/drivers/media/rc/ite-cir.c
++++ b/drivers/media/rc/ite-cir.c
+@@ -242,7 +242,7 @@ static irqreturn_t ite_cir_isr(int irq, void *data)
+ }
+
+ /* check for the receive interrupt */
+- if (iflags & ITE_IRQ_RX_FIFO) {
++ if (iflags & (ITE_IRQ_RX_FIFO | ITE_IRQ_RX_FIFO_OVERRUN)) {
+ /* read the FIFO bytes */
+ rx_bytes = dev->params->get_rx_bytes(dev, rx_buf,
+ ITE_RX_FIFO_LEN);
+diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
+index e03dd1f0144f0..391de68365f67 100644
+--- a/drivers/media/rc/mceusb.c
++++ b/drivers/media/rc/mceusb.c
+@@ -1077,7 +1077,7 @@ static int mceusb_set_timeout(struct rc_dev *dev, unsigned int timeout)
+ struct mceusb_dev *ir = dev->priv;
+ unsigned int units;
+
+- units = DIV_ROUND_CLOSEST(timeout, MCE_TIME_UNIT);
++ units = DIV_ROUND_UP(timeout, MCE_TIME_UNIT);
+
+ cmdbuf[2] = units >> 8;
+ cmdbuf[3] = units;
+@@ -1386,6 +1386,7 @@ static void mceusb_dev_recv(struct urb *urb)
+ case -ECONNRESET:
+ case -ENOENT:
+ case -EILSEQ:
++ case -EPROTO:
+ case -ESHUTDOWN:
+ usb_unlink_urb(urb);
+ return;
+@@ -1415,42 +1416,37 @@ static void mceusb_gen1_init(struct mceusb_dev *ir)
+ {
+ int ret;
+ struct device *dev = ir->dev;
+- char *data;
+-
+- data = kzalloc(USB_CTRL_MSG_SZ, GFP_KERNEL);
+- if (!data) {
+- dev_err(dev, "%s: memory allocation failed!", __func__);
+- return;
+- }
++ char data[USB_CTRL_MSG_SZ];
+
+ /*
+ * This is a strange one. Windows issues a set address to the device
+ * on the receive control pipe and expect a certain value pair back
+ */
+- ret = usb_control_msg(ir->usbdev, usb_rcvctrlpipe(ir->usbdev, 0),
+- USB_REQ_SET_ADDRESS, USB_TYPE_VENDOR, 0, 0,
+- data, USB_CTRL_MSG_SZ, HZ * 3);
++ ret = usb_control_msg_recv(ir->usbdev, 0, USB_REQ_SET_ADDRESS,
++ USB_DIR_IN | USB_TYPE_VENDOR,
++ 0, 0, data, USB_CTRL_MSG_SZ, 3000,
++ GFP_KERNEL);
+ dev_dbg(dev, "set address - ret = %d", ret);
+ dev_dbg(dev, "set address - data[0] = %d, data[1] = %d",
+ data[0], data[1]);
+
+ /* set feature: bit rate 38400 bps */
+- ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0),
+- USB_REQ_SET_FEATURE, USB_TYPE_VENDOR,
+- 0xc04e, 0x0000, NULL, 0, HZ * 3);
++ ret = usb_control_msg_send(ir->usbdev, 0,
++ USB_REQ_SET_FEATURE, USB_TYPE_VENDOR,
++ 0xc04e, 0x0000, NULL, 0, 3000, GFP_KERNEL);
+
+ dev_dbg(dev, "set feature - ret = %d", ret);
+
+ /* bRequest 4: set char length to 8 bits */
+- ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0),
+- 4, USB_TYPE_VENDOR,
+- 0x0808, 0x0000, NULL, 0, HZ * 3);
++ ret = usb_control_msg_send(ir->usbdev, 0,
++ 4, USB_TYPE_VENDOR,
++ 0x0808, 0x0000, NULL, 0, 3000, GFP_KERNEL);
+ dev_dbg(dev, "set char length - retB = %d", ret);
+
+ /* bRequest 2: set handshaking to use DTR/DSR */
+- ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0),
+- 2, USB_TYPE_VENDOR,
+- 0x0000, 0x0100, NULL, 0, HZ * 3);
++ ret = usb_control_msg_send(ir->usbdev, 0,
++ 2, USB_TYPE_VENDOR,
++ 0x0000, 0x0100, NULL, 0, 3000, GFP_KERNEL);
+ dev_dbg(dev, "set handshake - retC = %d", ret);
+
+ /* device resume */
+@@ -1458,8 +1454,6 @@ static void mceusb_gen1_init(struct mceusb_dev *ir)
+
+ /* get hw/sw revision? */
+ mce_command_out(ir, GET_REVISION, sizeof(GET_REVISION));
+-
+- kfree(data);
+ }
+
+ static void mceusb_gen2_init(struct mceusb_dev *ir)
+diff --git a/drivers/media/rc/redrat3.c b/drivers/media/rc/redrat3.c
+index ac85464864b9d..cb22316b3f002 100644
+--- a/drivers/media/rc/redrat3.c
++++ b/drivers/media/rc/redrat3.c
+@@ -404,7 +404,7 @@ static int redrat3_send_cmd(int cmd, struct redrat3_dev *rr3)
+ udev = rr3->udev;
+ res = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), cmd,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+- 0x0000, 0x0000, data, sizeof(u8), HZ * 10);
++ 0x0000, 0x0000, data, sizeof(u8), 10000);
+
+ if (res < 0) {
+ dev_err(rr3->dev, "%s: Error sending rr3 cmd res %d, data %d",
+@@ -480,7 +480,7 @@ static u32 redrat3_get_timeout(struct redrat3_dev *rr3)
+ pipe = usb_rcvctrlpipe(rr3->udev, 0);
+ ret = usb_control_msg(rr3->udev, pipe, RR3_GET_IR_PARAM,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+- RR3_IR_IO_SIG_TIMEOUT, 0, tmp, len, HZ * 5);
++ RR3_IR_IO_SIG_TIMEOUT, 0, tmp, len, 5000);
+ if (ret != len)
+ dev_warn(rr3->dev, "Failed to read timeout from hardware\n");
+ else {
+@@ -510,7 +510,7 @@ static int redrat3_set_timeout(struct rc_dev *rc_dev, unsigned int timeoutus)
+ ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RR3_SET_IR_PARAM,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+ RR3_IR_IO_SIG_TIMEOUT, 0, timeout, sizeof(*timeout),
+- HZ * 25);
++ 25000);
+ dev_dbg(dev, "set ir parm timeout %d ret 0x%02x\n",
+ be32_to_cpu(*timeout), ret);
+
+@@ -542,32 +542,32 @@ static void redrat3_reset(struct redrat3_dev *rr3)
+ *val = 0x01;
+ rc = usb_control_msg(udev, rxpipe, RR3_RESET,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+- RR3_CPUCS_REG_ADDR, 0, val, len, HZ * 25);
++ RR3_CPUCS_REG_ADDR, 0, val, len, 25000);
+ dev_dbg(dev, "reset returned 0x%02x\n", rc);
+
+ *val = length_fuzz;
+ rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+- RR3_IR_IO_LENGTH_FUZZ, 0, val, len, HZ * 25);
++ RR3_IR_IO_LENGTH_FUZZ, 0, val, len, 25000);
+ dev_dbg(dev, "set ir parm len fuzz %d rc 0x%02x\n", *val, rc);
+
+ *val = (65536 - (minimum_pause * 2000)) / 256;
+ rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+- RR3_IR_IO_MIN_PAUSE, 0, val, len, HZ * 25);
++ RR3_IR_IO_MIN_PAUSE, 0, val, len, 25000);
+ dev_dbg(dev, "set ir parm min pause %d rc 0x%02x\n", *val, rc);
+
+ *val = periods_measure_carrier;
+ rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+- RR3_IR_IO_PERIODS_MF, 0, val, len, HZ * 25);
++ RR3_IR_IO_PERIODS_MF, 0, val, len, 25000);
+ dev_dbg(dev, "set ir parm periods measure carrier %d rc 0x%02x", *val,
+ rc);
+
+ *val = RR3_DRIVER_MAXLENS;
+ rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+- RR3_IR_IO_MAX_LENGTHS, 0, val, len, HZ * 25);
++ RR3_IR_IO_MAX_LENGTHS, 0, val, len, 25000);
+ dev_dbg(dev, "set ir parm max lens %d rc 0x%02x\n", *val, rc);
+
+ kfree(val);
+@@ -585,7 +585,7 @@ static void redrat3_get_firmware_rev(struct redrat3_dev *rr3)
+ rc = usb_control_msg(rr3->udev, usb_rcvctrlpipe(rr3->udev, 0),
+ RR3_FW_VERSION,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+- 0, 0, buffer, RR3_FW_VERSION_LEN, HZ * 5);
++ 0, 0, buffer, RR3_FW_VERSION_LEN, 5000);
+
+ if (rc >= 0)
+ dev_info(rr3->dev, "Firmware rev: %s", buffer);
+@@ -825,14 +825,14 @@ static int redrat3_transmit_ir(struct rc_dev *rcdev, unsigned *txbuf,
+
+ pipe = usb_sndbulkpipe(rr3->udev, rr3->ep_out->bEndpointAddress);
+ ret = usb_bulk_msg(rr3->udev, pipe, irdata,
+- sendbuf_len, &ret_len, 10 * HZ);
++ sendbuf_len, &ret_len, 10000);
+ dev_dbg(dev, "sent %d bytes, (ret %d)\n", ret_len, ret);
+
+ /* now tell the hardware to transmit what we sent it */
+ pipe = usb_rcvctrlpipe(rr3->udev, 0);
+ ret = usb_control_msg(rr3->udev, pipe, RR3_TX_SEND_SIGNAL,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+- 0, 0, irdata, 2, HZ * 10);
++ 0, 0, irdata, 2, 10000);
+
+ if (ret < 0)
+ dev_err(dev, "Error: control msg send failed, rc %d\n", ret);
+diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c
+index b91a1e845b972..506f52c1af101 100644
+--- a/drivers/media/spi/cxd2880-spi.c
++++ b/drivers/media/spi/cxd2880-spi.c
+@@ -618,7 +618,7 @@ fail_frontend:
+ fail_attach:
+ dvb_unregister_adapter(&dvb_spi->adapter);
+ fail_adapter:
+- if (!dvb_spi->vcc_supply)
++ if (dvb_spi->vcc_supply)
+ regulator_disable(dvb_spi->vcc_supply);
+ fail_regulator:
+ kfree(dvb_spi);
+diff --git a/drivers/media/test-drivers/vidtv/vidtv_bridge.c b/drivers/media/test-drivers/vidtv/vidtv_bridge.c
+index 75617709c8ce2..dff7265a42ca2 100644
+--- a/drivers/media/test-drivers/vidtv/vidtv_bridge.c
++++ b/drivers/media/test-drivers/vidtv/vidtv_bridge.c
+@@ -459,26 +459,20 @@ fail_dmx_conn:
+ for (j = j - 1; j >= 0; --j)
+ dvb->demux.dmx.remove_frontend(&dvb->demux.dmx,
+ &dvb->dmx_fe[j]);
+-fail_dmx_dev:
+ dvb_dmxdev_release(&dvb->dmx_dev);
+-fail_dmx:
++fail_dmx_dev:
+ dvb_dmx_release(&dvb->demux);
++fail_dmx:
++fail_demod_probe:
++ for (i = i - 1; i >= 0; --i) {
++ dvb_unregister_frontend(dvb->fe[i]);
+ fail_fe:
+- for (j = i; j >= 0; --j)
+- dvb_unregister_frontend(dvb->fe[j]);
++ dvb_module_release(dvb->i2c_client_tuner[i]);
+ fail_tuner_probe:
+- for (j = i; j >= 0; --j)
+- if (dvb->i2c_client_tuner[j])
+- dvb_module_release(dvb->i2c_client_tuner[j]);
+-
+-fail_demod_probe:
+- for (j = i; j >= 0; --j)
+- if (dvb->i2c_client_demod[j])
+- dvb_module_release(dvb->i2c_client_demod[j]);
+-
++ dvb_module_release(dvb->i2c_client_demod[i]);
++ }
+ fail_adapter:
+ dvb_unregister_adapter(&dvb->adapter);
+-
+ fail_i2c:
+ i2c_del_adapter(&dvb->i2c_adapter);
+
+@@ -564,6 +558,10 @@ static int vidtv_bridge_remove(struct platform_device *pdev)
+
+ static void vidtv_bridge_dev_release(struct device *dev)
+ {
++ struct vidtv_dvb *dvb;
++
++ dvb = dev_get_drvdata(dev);
++ kfree(dvb);
+ }
+
+ static struct platform_device vidtv_bridge_dev = {
+diff --git a/drivers/media/test-drivers/vidtv/vidtv_s302m.c b/drivers/media/test-drivers/vidtv/vidtv_s302m.c
+index d79b65854627c..4676083cee3b8 100644
+--- a/drivers/media/test-drivers/vidtv/vidtv_s302m.c
++++ b/drivers/media/test-drivers/vidtv/vidtv_s302m.c
+@@ -455,6 +455,9 @@ struct vidtv_encoder
+ e->name = kstrdup(args.name, GFP_KERNEL);
+
+ e->encoder_buf = vzalloc(VIDTV_S302M_BUF_SZ);
++ if (!e->encoder_buf)
++ goto out_kfree_e;
++
+ e->encoder_buf_sz = VIDTV_S302M_BUF_SZ;
+ e->encoder_buf_offset = 0;
+
+@@ -467,10 +470,8 @@ struct vidtv_encoder
+ e->is_video_encoder = false;
+
+ ctx = kzalloc(priv_sz, GFP_KERNEL);
+- if (!ctx) {
+- kfree(e);
+- return NULL;
+- }
++ if (!ctx)
++ goto out_kfree_buf;
+
+ e->ctx = ctx;
+ ctx->last_duration = 0;
+@@ -498,6 +499,14 @@ struct vidtv_encoder
+ e->next = NULL;
+
+ return e;
++
++out_kfree_buf:
++ kfree(e->encoder_buf);
++
++out_kfree_e:
++ kfree(e->name);
++ kfree(e);
++ return NULL;
+ }
+
+ void vidtv_s302m_encoder_destroy(struct vidtv_encoder *e)
+diff --git a/drivers/media/test-drivers/vimc/vimc-core.c b/drivers/media/test-drivers/vimc/vimc-core.c
+index 4b0ae6f51d765..857529ce3638a 100644
+--- a/drivers/media/test-drivers/vimc/vimc-core.c
++++ b/drivers/media/test-drivers/vimc/vimc-core.c
+@@ -357,7 +357,7 @@ static int __init vimc_init(void)
+ if (ret) {
+ dev_err(&vimc_pdev.dev,
+ "platform driver registration failed (err=%d)\n", ret);
+- platform_driver_unregister(&vimc_pdrv);
++ platform_device_unregister(&vimc_pdev);
+ return ret;
+ }
+
+diff --git a/drivers/media/test-drivers/vivid/vivid-core.c b/drivers/media/test-drivers/vivid/vivid-core.c
+index d2bd2653cf54d..065bdc33f0491 100644
+--- a/drivers/media/test-drivers/vivid/vivid-core.c
++++ b/drivers/media/test-drivers/vivid/vivid-core.c
+@@ -330,6 +330,28 @@ static int vidioc_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *a
+ return vivid_vid_out_g_fbuf(file, fh, a);
+ }
+
++/*
++ * Only support the framebuffer of one of the vivid instances.
++ * Anything else is rejected.
++ */
++bool vivid_validate_fb(const struct v4l2_framebuffer *a)
++{
++ struct vivid_dev *dev;
++ int i;
++
++ for (i = 0; i < n_devs; i++) {
++ dev = vivid_devs[i];
++ if (!dev || !dev->video_pbase)
++ continue;
++ if ((unsigned long)a->base == dev->video_pbase &&
++ a->fmt.width <= dev->display_width &&
++ a->fmt.height <= dev->display_height &&
++ a->fmt.bytesperline <= dev->display_byte_stride)
++ return true;
++ }
++ return false;
++}
++
+ static int vidioc_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffer *a)
+ {
+ struct video_device *vdev = video_devdata(file);
+@@ -910,8 +932,12 @@ static int vivid_detect_feature_set(struct vivid_dev *dev, int inst,
+
+ /* how many inputs do we have and of what type? */
+ dev->num_inputs = num_inputs[inst];
+- if (dev->num_inputs < 1)
+- dev->num_inputs = 1;
++ if (node_type & 0x20007) {
++ if (dev->num_inputs < 1)
++ dev->num_inputs = 1;
++ } else {
++ dev->num_inputs = 0;
++ }
+ if (dev->num_inputs >= MAX_INPUTS)
+ dev->num_inputs = MAX_INPUTS;
+ for (i = 0; i < dev->num_inputs; i++) {
+@@ -928,8 +954,12 @@ static int vivid_detect_feature_set(struct vivid_dev *dev, int inst,
+
+ /* how many outputs do we have and of what type? */
+ dev->num_outputs = num_outputs[inst];
+- if (dev->num_outputs < 1)
+- dev->num_outputs = 1;
++ if (node_type & 0x40300) {
++ if (dev->num_outputs < 1)
++ dev->num_outputs = 1;
++ } else {
++ dev->num_outputs = 0;
++ }
+ if (dev->num_outputs >= MAX_OUTPUTS)
+ dev->num_outputs = MAX_OUTPUTS;
+ for (i = 0; i < dev->num_outputs; i++) {
+diff --git a/drivers/media/test-drivers/vivid/vivid-core.h b/drivers/media/test-drivers/vivid/vivid-core.h
+index 1e3c4f5a9413f..7ceaf9bac2f05 100644
+--- a/drivers/media/test-drivers/vivid/vivid-core.h
++++ b/drivers/media/test-drivers/vivid/vivid-core.h
+@@ -610,4 +610,6 @@ static inline bool vivid_is_hdmi_out(const struct vivid_dev *dev)
+ return dev->output_type[dev->output] == HDMI;
+ }
+
++bool vivid_validate_fb(const struct v4l2_framebuffer *a);
++
+ #endif
+diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c
+index b9caa4b26209e..331a3f4286d2e 100644
+--- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c
++++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c
+@@ -452,6 +452,12 @@ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls)
+ tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap);
+ dev->crop_cap = dev->src_rect;
+ dev->crop_bounds_cap = dev->src_rect;
++ if (dev->bitmap_cap &&
++ (dev->compose_cap.width != dev->crop_cap.width ||
++ dev->compose_cap.height != dev->crop_cap.height)) {
++ vfree(dev->bitmap_cap);
++ dev->bitmap_cap = NULL;
++ }
+ dev->compose_cap = dev->crop_cap;
+ if (V4L2_FIELD_HAS_T_OR_B(dev->field_cap))
+ dev->compose_cap.height /= 2;
+@@ -909,6 +915,8 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection
+ struct vivid_dev *dev = video_drvdata(file);
+ struct v4l2_rect *crop = &dev->crop_cap;
+ struct v4l2_rect *compose = &dev->compose_cap;
++ unsigned orig_compose_w = compose->width;
++ unsigned orig_compose_h = compose->height;
+ unsigned factor = V4L2_FIELD_HAS_T_OR_B(dev->field_cap) ? 2 : 1;
+ int ret;
+
+@@ -953,6 +961,7 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection
+ if (dev->has_compose_cap) {
+ v4l2_rect_set_min_size(compose, &min_rect);
+ v4l2_rect_set_max_size(compose, &max_rect);
++ v4l2_rect_map_inside(compose, &fmt);
+ }
+ dev->fmt_cap_rect = fmt;
+ tpg_s_buf_height(&dev->tpg, fmt.height);
+@@ -1025,17 +1034,17 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection
+ s->r.height /= factor;
+ }
+ v4l2_rect_map_inside(&s->r, &dev->fmt_cap_rect);
+- if (dev->bitmap_cap && (compose->width != s->r.width ||
+- compose->height != s->r.height)) {
+- vfree(dev->bitmap_cap);
+- dev->bitmap_cap = NULL;
+- }
+ *compose = s->r;
+ break;
+ default:
+ return -EINVAL;
+ }
+
++ if (dev->bitmap_cap && (compose->width != orig_compose_w ||
++ compose->height != orig_compose_h)) {
++ vfree(dev->bitmap_cap);
++ dev->bitmap_cap = NULL;
++ }
+ tpg_s_crop_compose(&dev->tpg, crop, compose);
+ return 0;
+ }
+@@ -1272,7 +1281,14 @@ int vivid_vid_cap_s_fbuf(struct file *file, void *fh,
+ return -EINVAL;
+ if (a->fmt.bytesperline < (a->fmt.width * fmt->bit_depth[0]) / 8)
+ return -EINVAL;
+- if (a->fmt.height * a->fmt.bytesperline < a->fmt.sizeimage)
++ if (a->fmt.bytesperline > a->fmt.sizeimage / a->fmt.height)
++ return -EINVAL;
++
++ /*
++ * Only support the framebuffer of one of the vivid instances.
++ * Anything else is rejected.
++ */
++ if (!vivid_validate_fb(a))
+ return -EINVAL;
+
+ dev->fb_vbase_cap = phys_to_virt((unsigned long)a->base);
+diff --git a/drivers/media/tuners/fc0011.c b/drivers/media/tuners/fc0011.c
+index eaa3bbc903d7e..3d3b54be29557 100644
+--- a/drivers/media/tuners/fc0011.c
++++ b/drivers/media/tuners/fc0011.c
+@@ -499,7 +499,7 @@ struct dvb_frontend *fc0011_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(fc0011_attach);
++EXPORT_SYMBOL_GPL(fc0011_attach);
+
+ MODULE_DESCRIPTION("Fitipower FC0011 silicon tuner driver");
+ MODULE_AUTHOR("Michael Buesch <m@bues.ch>");
+diff --git a/drivers/media/tuners/fc0012.c b/drivers/media/tuners/fc0012.c
+index 4429d5e8c5796..81e65acbdb170 100644
+--- a/drivers/media/tuners/fc0012.c
++++ b/drivers/media/tuners/fc0012.c
+@@ -495,7 +495,7 @@ err:
+
+ return fe;
+ }
+-EXPORT_SYMBOL(fc0012_attach);
++EXPORT_SYMBOL_GPL(fc0012_attach);
+
+ MODULE_DESCRIPTION("Fitipower FC0012 silicon tuner driver");
+ MODULE_AUTHOR("Hans-Frieder Vogt <hfvogt@gmx.net>");
+diff --git a/drivers/media/tuners/fc0013.c b/drivers/media/tuners/fc0013.c
+index 29dd9b55ff333..1006a2798eefc 100644
+--- a/drivers/media/tuners/fc0013.c
++++ b/drivers/media/tuners/fc0013.c
+@@ -608,7 +608,7 @@ struct dvb_frontend *fc0013_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(fc0013_attach);
++EXPORT_SYMBOL_GPL(fc0013_attach);
+
+ MODULE_DESCRIPTION("Fitipower FC0013 silicon tuner driver");
+ MODULE_AUTHOR("Hans-Frieder Vogt <hfvogt@gmx.net>");
+diff --git a/drivers/media/tuners/max2165.c b/drivers/media/tuners/max2165.c
+index 1c746bed51fee..1575ab94e1c8b 100644
+--- a/drivers/media/tuners/max2165.c
++++ b/drivers/media/tuners/max2165.c
+@@ -410,7 +410,7 @@ struct dvb_frontend *max2165_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(max2165_attach);
++EXPORT_SYMBOL_GPL(max2165_attach);
+
+ MODULE_AUTHOR("David T. L. Wong <davidtlwong@gmail.com>");
+ MODULE_DESCRIPTION("Maxim MAX2165 silicon tuner driver");
+diff --git a/drivers/media/tuners/mc44s803.c b/drivers/media/tuners/mc44s803.c
+index 0c9161516abdf..ed8bdf7ebd99d 100644
+--- a/drivers/media/tuners/mc44s803.c
++++ b/drivers/media/tuners/mc44s803.c
+@@ -356,7 +356,7 @@ error:
+ kfree(priv);
+ return NULL;
+ }
+-EXPORT_SYMBOL(mc44s803_attach);
++EXPORT_SYMBOL_GPL(mc44s803_attach);
+
+ MODULE_AUTHOR("Jochen Friedrich");
+ MODULE_DESCRIPTION("Freescale MC44S803 silicon tuner driver");
+diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c
+index 78e6fd600d8ef..44247049a3190 100644
+--- a/drivers/media/tuners/msi001.c
++++ b/drivers/media/tuners/msi001.c
+@@ -442,6 +442,13 @@ static int msi001_probe(struct spi_device *spi)
+ V4L2_CID_RF_TUNER_BANDWIDTH_AUTO, 0, 1, 1, 1);
+ dev->bandwidth = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops,
+ V4L2_CID_RF_TUNER_BANDWIDTH, 200000, 8000000, 1, 200000);
++ if (dev->hdl.error) {
++ ret = dev->hdl.error;
++ dev_err(&spi->dev, "Could not initialize controls\n");
++ /* control init failed, free handler */
++ goto err_ctrl_handler_free;
++ }
++
+ v4l2_ctrl_auto_cluster(2, &dev->bandwidth_auto, 0, false);
+ dev->lna_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops,
+ V4L2_CID_RF_TUNER_LNA_GAIN, 0, 1, 1, 1);
+diff --git a/drivers/media/tuners/mt2060.c b/drivers/media/tuners/mt2060.c
+index 204e6186bf715..907c06224e5ae 100644
+--- a/drivers/media/tuners/mt2060.c
++++ b/drivers/media/tuners/mt2060.c
+@@ -440,7 +440,7 @@ struct dvb_frontend * mt2060_attach(struct dvb_frontend *fe, struct i2c_adapter
+
+ return fe;
+ }
+-EXPORT_SYMBOL(mt2060_attach);
++EXPORT_SYMBOL_GPL(mt2060_attach);
+
+ static int mt2060_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+diff --git a/drivers/media/tuners/mt2131.c b/drivers/media/tuners/mt2131.c
+index 37f50ff6c0bd2..eebc060883414 100644
+--- a/drivers/media/tuners/mt2131.c
++++ b/drivers/media/tuners/mt2131.c
+@@ -274,7 +274,7 @@ struct dvb_frontend * mt2131_attach(struct dvb_frontend *fe,
+ fe->tuner_priv = priv;
+ return fe;
+ }
+-EXPORT_SYMBOL(mt2131_attach);
++EXPORT_SYMBOL_GPL(mt2131_attach);
+
+ MODULE_AUTHOR("Steven Toth");
+ MODULE_DESCRIPTION("Microtune MT2131 silicon tuner driver");
+diff --git a/drivers/media/tuners/mt2266.c b/drivers/media/tuners/mt2266.c
+index 6136f20fa9b7f..2e92885a6bcb9 100644
+--- a/drivers/media/tuners/mt2266.c
++++ b/drivers/media/tuners/mt2266.c
+@@ -336,7 +336,7 @@ struct dvb_frontend * mt2266_attach(struct dvb_frontend *fe, struct i2c_adapter
+ mt2266_calibrate(priv);
+ return fe;
+ }
+-EXPORT_SYMBOL(mt2266_attach);
++EXPORT_SYMBOL_GPL(mt2266_attach);
+
+ MODULE_AUTHOR("Olivier DANET");
+ MODULE_DESCRIPTION("Microtune MT2266 silicon tuner driver");
+diff --git a/drivers/media/tuners/mxl5005s.c b/drivers/media/tuners/mxl5005s.c
+index f6e82a8e7d37d..f50fc161a80df 100644
+--- a/drivers/media/tuners/mxl5005s.c
++++ b/drivers/media/tuners/mxl5005s.c
+@@ -4128,7 +4128,7 @@ struct dvb_frontend *mxl5005s_attach(struct dvb_frontend *fe,
+ fe->tuner_priv = state;
+ return fe;
+ }
+-EXPORT_SYMBOL(mxl5005s_attach);
++EXPORT_SYMBOL_GPL(mxl5005s_attach);
+
+ MODULE_DESCRIPTION("MaxLinear MXL5005S silicon tuner driver");
+ MODULE_AUTHOR("Steven Toth");
+diff --git a/drivers/media/tuners/qt1010.c b/drivers/media/tuners/qt1010.c
+index 3853a3d43d4f2..60931367b82ca 100644
+--- a/drivers/media/tuners/qt1010.c
++++ b/drivers/media/tuners/qt1010.c
+@@ -440,7 +440,7 @@ struct dvb_frontend * qt1010_attach(struct dvb_frontend *fe,
+ fe->tuner_priv = priv;
+ return fe;
+ }
+-EXPORT_SYMBOL(qt1010_attach);
++EXPORT_SYMBOL_GPL(qt1010_attach);
+
+ MODULE_DESCRIPTION("Quantek QT1010 silicon tuner driver");
+ MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
+diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c
+index fefb2625f6558..75ddf7ed1faff 100644
+--- a/drivers/media/tuners/si2157.c
++++ b/drivers/media/tuners/si2157.c
+@@ -90,7 +90,7 @@ static int si2157_init(struct dvb_frontend *fe)
+ dev_dbg(&client->dev, "\n");
+
+ /* Try to get Xtal trim property, to verify tuner still running */
+- memcpy(cmd.args, "\x15\x00\x04\x02", 4);
++ memcpy(cmd.args, "\x15\x00\x02\x04", 4);
+ cmd.wlen = 4;
+ cmd.rlen = 4;
+ ret = si2157_cmd_execute(client, &cmd);
+diff --git a/drivers/media/tuners/tda18218.c b/drivers/media/tuners/tda18218.c
+index 4ed94646116fa..7d8d84dcb2459 100644
+--- a/drivers/media/tuners/tda18218.c
++++ b/drivers/media/tuners/tda18218.c
+@@ -336,7 +336,7 @@ struct dvb_frontend *tda18218_attach(struct dvb_frontend *fe,
+
+ return fe;
+ }
+-EXPORT_SYMBOL(tda18218_attach);
++EXPORT_SYMBOL_GPL(tda18218_attach);
+
+ MODULE_DESCRIPTION("NXP TDA18218HN silicon tuner driver");
+ MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
+diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c
+index d9606738ce432..ef9af052007cb 100644
+--- a/drivers/media/tuners/xc4000.c
++++ b/drivers/media/tuners/xc4000.c
+@@ -1744,7 +1744,7 @@ fail2:
+ xc4000_release(fe);
+ return NULL;
+ }
+-EXPORT_SYMBOL(xc4000_attach);
++EXPORT_SYMBOL_GPL(xc4000_attach);
+
+ MODULE_AUTHOR("Steven Toth, Davide Ferri");
+ MODULE_DESCRIPTION("Xceive xc4000 silicon tuner driver");
+diff --git a/drivers/media/tuners/xc5000.c b/drivers/media/tuners/xc5000.c
+index 7b7d9fe4f9453..2182e5b7b6064 100644
+--- a/drivers/media/tuners/xc5000.c
++++ b/drivers/media/tuners/xc5000.c
+@@ -1460,7 +1460,7 @@ fail:
+ xc5000_release(fe);
+ return NULL;
+ }
+-EXPORT_SYMBOL(xc5000_attach);
++EXPORT_SYMBOL_GPL(xc5000_attach);
+
+ MODULE_AUTHOR("Steven Toth");
+ MODULE_DESCRIPTION("Xceive xc5000 silicon tuner driver");
+diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c
+index 5d38171b7638c..8ab1be03e7319 100644
+--- a/drivers/media/usb/b2c2/flexcop-usb.c
++++ b/drivers/media/usb/b2c2/flexcop-usb.c
+@@ -87,7 +87,7 @@ static int flexcop_usb_readwrite_dw(struct flexcop_device *fc, u16 wRegOffsPCI,
+ 0,
+ fc_usb->data,
+ sizeof(u32),
+- B2C2_WAIT_FOR_OPERATION_RDW * HZ);
++ B2C2_WAIT_FOR_OPERATION_RDW);
+
+ if (ret != sizeof(u32)) {
+ err("error while %s dword from %d (%d).", read ? "reading" :
+@@ -155,7 +155,7 @@ static int flexcop_usb_v8_memory_req(struct flexcop_usb *fc_usb,
+ wIndex,
+ fc_usb->data,
+ buflen,
+- nWaitTime * HZ);
++ nWaitTime);
+ if (ret != buflen)
+ ret = -EIO;
+
+@@ -248,13 +248,13 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c,
+ /* DKT 020208 - add this to support special case of DiSEqC */
+ case USB_FUNC_I2C_CHECKWRITE:
+ pipe = B2C2_USB_CTRL_PIPE_OUT;
+- nWaitTime = 2;
++ nWaitTime = 2000;
+ request_type |= USB_DIR_OUT;
+ break;
+ case USB_FUNC_I2C_READ:
+ case USB_FUNC_I2C_REPEATREAD:
+ pipe = B2C2_USB_CTRL_PIPE_IN;
+- nWaitTime = 2;
++ nWaitTime = 2000;
+ request_type |= USB_DIR_IN;
+ break;
+ default:
+@@ -281,7 +281,7 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c,
+ wIndex,
+ fc_usb->data,
+ buflen,
+- nWaitTime * HZ);
++ nWaitTime);
+
+ if (ret != buflen)
+ ret = -EIO;
+@@ -511,7 +511,7 @@ static int flexcop_usb_init(struct flexcop_usb *fc_usb)
+
+ if (fc_usb->uintf->cur_altsetting->desc.bNumEndpoints < 1)
+ return -ENODEV;
+- if (!usb_endpoint_is_isoc_in(&fc_usb->uintf->cur_altsetting->endpoint[1].desc))
++ if (!usb_endpoint_is_isoc_in(&fc_usb->uintf->cur_altsetting->endpoint[0].desc))
+ return -ENODEV;
+
+ switch (fc_usb->udev->speed) {
+diff --git a/drivers/media/usb/b2c2/flexcop-usb.h b/drivers/media/usb/b2c2/flexcop-usb.h
+index 2f230bf72252b..c7cca1a5ee59d 100644
+--- a/drivers/media/usb/b2c2/flexcop-usb.h
++++ b/drivers/media/usb/b2c2/flexcop-usb.h
+@@ -91,13 +91,13 @@ typedef enum {
+ UTILITY_SRAM_TESTVERIFY = 0x16,
+ } flexcop_usb_utility_function_t;
+
+-#define B2C2_WAIT_FOR_OPERATION_RW (1*HZ)
+-#define B2C2_WAIT_FOR_OPERATION_RDW (3*HZ)
+-#define B2C2_WAIT_FOR_OPERATION_WDW (1*HZ)
++#define B2C2_WAIT_FOR_OPERATION_RW 1000
++#define B2C2_WAIT_FOR_OPERATION_RDW 3000
++#define B2C2_WAIT_FOR_OPERATION_WDW 1000
+
+-#define B2C2_WAIT_FOR_OPERATION_V8READ (3*HZ)
+-#define B2C2_WAIT_FOR_OPERATION_V8WRITE (3*HZ)
+-#define B2C2_WAIT_FOR_OPERATION_V8FLASH (3*HZ)
++#define B2C2_WAIT_FOR_OPERATION_V8READ 3000
++#define B2C2_WAIT_FOR_OPERATION_V8WRITE 3000
++#define B2C2_WAIT_FOR_OPERATION_V8FLASH 3000
+
+ typedef enum {
+ V8_MEMORY_PAGE_DVB_CI = 0x20,
+diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c
+index 76aac06f9fb8e..cba03b2864738 100644
+--- a/drivers/media/usb/cpia2/cpia2_usb.c
++++ b/drivers/media/usb/cpia2/cpia2_usb.c
+@@ -550,7 +550,7 @@ static int write_packet(struct usb_device *udev,
+ 0, /* index */
+ buf, /* buffer */
+ size,
+- HZ);
++ 1000);
+
+ kfree(buf);
+ return ret;
+@@ -582,7 +582,7 @@ static int read_packet(struct usb_device *udev,
+ 0, /* index */
+ buf, /* buffer */
+ size,
+- HZ);
++ 1000);
+
+ if (ret >= 0)
+ memcpy(registers, buf, size);
+diff --git a/drivers/media/usb/dvb-usb-v2/az6007.c b/drivers/media/usb/dvb-usb-v2/az6007.c
+index 62ee09f28a0bc..7524c90f5da61 100644
+--- a/drivers/media/usb/dvb-usb-v2/az6007.c
++++ b/drivers/media/usb/dvb-usb-v2/az6007.c
+@@ -202,7 +202,8 @@ static int az6007_rc_query(struct dvb_usb_device *d)
+ unsigned code;
+ enum rc_proto proto;
+
+- az6007_read(d, AZ6007_READ_IR, 0, 0, st->data, 10);
++ if (az6007_read(d, AZ6007_READ_IR, 0, 0, st->data, 10) < 0)
++ return -EIO;
+
+ if (st->data[1] == 0x44)
+ return 0;
+diff --git a/drivers/media/usb/dvb-usb-v2/ce6230.c b/drivers/media/usb/dvb-usb-v2/ce6230.c
+index 44540de1a2066..d3b5cb4a24daf 100644
+--- a/drivers/media/usb/dvb-usb-v2/ce6230.c
++++ b/drivers/media/usb/dvb-usb-v2/ce6230.c
+@@ -101,6 +101,10 @@ static int ce6230_i2c_master_xfer(struct i2c_adapter *adap,
+ if (num > i + 1 && (msg[i+1].flags & I2C_M_RD)) {
+ if (msg[i].addr ==
+ ce6230_zl10353_config.demod_address) {
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ req.cmd = DEMOD_READ;
+ req.value = msg[i].addr >> 1;
+ req.index = msg[i].buf[0];
+@@ -117,6 +121,10 @@ static int ce6230_i2c_master_xfer(struct i2c_adapter *adap,
+ } else {
+ if (msg[i].addr ==
+ ce6230_zl10353_config.demod_address) {
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ req.cmd = DEMOD_WRITE;
+ req.value = msg[i].addr >> 1;
+ req.index = msg[i].buf[0];
+diff --git a/drivers/media/usb/dvb-usb-v2/ec168.c b/drivers/media/usb/dvb-usb-v2/ec168.c
+index 7ed0ab9e429b1..0e4773fc025c9 100644
+--- a/drivers/media/usb/dvb-usb-v2/ec168.c
++++ b/drivers/media/usb/dvb-usb-v2/ec168.c
+@@ -115,6 +115,10 @@ static int ec168_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+ while (i < num) {
+ if (num > i + 1 && (msg[i+1].flags & I2C_M_RD)) {
+ if (msg[i].addr == ec168_ec100_config.demod_address) {
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ req.cmd = READ_DEMOD;
+ req.value = 0;
+ req.index = 0xff00 + msg[i].buf[0]; /* reg */
+@@ -131,6 +135,10 @@ static int ec168_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+ }
+ } else {
+ if (msg[i].addr == ec168_ec100_config.demod_address) {
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ req.cmd = WRITE_DEMOD;
+ req.value = msg[i].buf[1]; /* val */
+ req.index = 0xff00 + msg[i].buf[0]; /* reg */
+@@ -139,6 +147,10 @@ static int ec168_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+ ret = ec168_ctrl_msg(d, &req);
+ i += 1;
+ } else {
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ req.cmd = WRITE_I2C;
+ req.value = msg[i].buf[0]; /* val */
+ req.index = 0x0100 + msg[i].addr; /* I2C addr */
+diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c
+index 7865fa0a82957..cd5861a30b6f8 100644
+--- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c
++++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c
+@@ -931,8 +931,6 @@ static int mxl111sf_init(struct dvb_usb_device *d)
+ .len = sizeof(eeprom), .buf = eeprom },
+ };
+
+- mutex_init(&state->msg_lock);
+-
+ ret = get_chip_info(state);
+ if (mxl_fail(ret))
+ pr_err("failed to get chip info during probe");
+@@ -1074,6 +1072,14 @@ static int mxl111sf_get_stream_config_dvbt(struct dvb_frontend *fe,
+ return 0;
+ }
+
++static int mxl111sf_probe(struct dvb_usb_device *dev)
++{
++ struct mxl111sf_state *state = d_to_priv(dev);
++
++ mutex_init(&state->msg_lock);
++ return 0;
++}
++
+ static struct dvb_usb_device_properties mxl111sf_props_dvbt = {
+ .driver_name = KBUILD_MODNAME,
+ .owner = THIS_MODULE,
+@@ -1083,6 +1089,7 @@ static struct dvb_usb_device_properties mxl111sf_props_dvbt = {
+ .generic_bulk_ctrl_endpoint = 0x02,
+ .generic_bulk_ctrl_endpoint_response = 0x81,
+
++ .probe = mxl111sf_probe,
+ .i2c_algo = &mxl111sf_i2c_algo,
+ .frontend_attach = mxl111sf_frontend_attach_dvbt,
+ .tuner_attach = mxl111sf_attach_tuner,
+@@ -1124,6 +1131,7 @@ static struct dvb_usb_device_properties mxl111sf_props_atsc = {
+ .generic_bulk_ctrl_endpoint = 0x02,
+ .generic_bulk_ctrl_endpoint_response = 0x81,
+
++ .probe = mxl111sf_probe,
+ .i2c_algo = &mxl111sf_i2c_algo,
+ .frontend_attach = mxl111sf_frontend_attach_atsc,
+ .tuner_attach = mxl111sf_attach_tuner,
+@@ -1165,6 +1173,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mh = {
+ .generic_bulk_ctrl_endpoint = 0x02,
+ .generic_bulk_ctrl_endpoint_response = 0x81,
+
++ .probe = mxl111sf_probe,
+ .i2c_algo = &mxl111sf_i2c_algo,
+ .frontend_attach = mxl111sf_frontend_attach_mh,
+ .tuner_attach = mxl111sf_attach_tuner,
+@@ -1233,6 +1242,7 @@ static struct dvb_usb_device_properties mxl111sf_props_atsc_mh = {
+ .generic_bulk_ctrl_endpoint = 0x02,
+ .generic_bulk_ctrl_endpoint_response = 0x81,
+
++ .probe = mxl111sf_probe,
+ .i2c_algo = &mxl111sf_i2c_algo,
+ .frontend_attach = mxl111sf_frontend_attach_atsc_mh,
+ .tuner_attach = mxl111sf_attach_tuner,
+@@ -1311,6 +1321,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mercury = {
+ .generic_bulk_ctrl_endpoint = 0x02,
+ .generic_bulk_ctrl_endpoint_response = 0x81,
+
++ .probe = mxl111sf_probe,
+ .i2c_algo = &mxl111sf_i2c_algo,
+ .frontend_attach = mxl111sf_frontend_attach_mercury,
+ .tuner_attach = mxl111sf_attach_tuner,
+@@ -1381,6 +1392,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mercury_mh = {
+ .generic_bulk_ctrl_endpoint = 0x02,
+ .generic_bulk_ctrl_endpoint_response = 0x81,
+
++ .probe = mxl111sf_probe,
+ .i2c_algo = &mxl111sf_i2c_algo,
+ .frontend_attach = mxl111sf_frontend_attach_mercury_mh,
+ .tuner_attach = mxl111sf_attach_tuner,
+diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+index 795a012d40200..f7884bb56fccf 100644
+--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
++++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+@@ -176,6 +176,10 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+ ret = -EOPNOTSUPP;
+ goto err_mutex_unlock;
+ } else if (msg[0].addr == 0x10) {
++ if (msg[0].len < 1 || msg[1].len < 1) {
++ ret = -EOPNOTSUPP;
++ goto err_mutex_unlock;
++ }
+ /* method 1 - integrated demod */
+ if (msg[0].buf[0] == 0x00) {
+ /* return demod page from driver cache */
+@@ -189,6 +193,10 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+ ret = rtl28xxu_ctrl_msg(d, &req);
+ }
+ } else if (msg[0].len < 2) {
++ if (msg[0].len < 1) {
++ ret = -EOPNOTSUPP;
++ goto err_mutex_unlock;
++ }
+ /* method 2 - old I2C */
+ req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1);
+ req.index = CMD_I2C_RD;
+@@ -217,8 +225,16 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+ ret = -EOPNOTSUPP;
+ goto err_mutex_unlock;
+ } else if (msg[0].addr == 0x10) {
++ if (msg[0].len < 1) {
++ ret = -EOPNOTSUPP;
++ goto err_mutex_unlock;
++ }
+ /* method 1 - integrated demod */
+ if (msg[0].buf[0] == 0x00) {
++ if (msg[0].len < 2) {
++ ret = -EOPNOTSUPP;
++ goto err_mutex_unlock;
++ }
+ /* save demod page for later demod access */
+ dev->page = msg[0].buf[1];
+ ret = 0;
+@@ -231,6 +247,10 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+ ret = rtl28xxu_ctrl_msg(d, &req);
+ }
+ } else if ((msg[0].len < 23) && (!dev->new_i2c_write)) {
++ if (msg[0].len < 1) {
++ ret = -EOPNOTSUPP;
++ goto err_mutex_unlock;
++ }
+ /* method 2 - old I2C */
+ req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1);
+ req.index = CMD_I2C_WR;
+diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c
+index 1c39b61cde29b..991f4510aaebb 100644
+--- a/drivers/media/usb/dvb-usb/az6027.c
++++ b/drivers/media/usb/dvb-usb/az6027.c
+@@ -391,6 +391,7 @@ static struct rc_map_table rc_map_az6027_table[] = {
+ /* remote control stuff (does not work with my box) */
+ static int az6027_rc_query(struct dvb_usb_device *d, u32 *event, int *state)
+ {
++ *state = REMOTE_NO_KEY_PRESSED;
+ return 0;
+ }
+
+@@ -974,6 +975,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n
+ if (msg[i].addr == 0x99) {
+ req = 0xBE;
+ index = 0;
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ value = msg[i].buf[0] & 0x00ff;
+ length = 1;
+ az6027_usb_out_op(d, req, value, index, data, length);
+@@ -983,6 +988,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n
+ /* write/read request */
+ if (i + 1 < num && (msg[i + 1].flags & I2C_M_RD)) {
+ req = 0xB9;
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ index = (((msg[i].buf[0] << 8) & 0xff00) | (msg[i].buf[1] & 0x00ff));
+ value = msg[i].addr + (msg[i].len << 8);
+ length = msg[i + 1].len + 6;
+@@ -996,6 +1005,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n
+
+ /* demod 16bit addr */
+ req = 0xBD;
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ index = (((msg[i].buf[0] << 8) & 0xff00) | (msg[i].buf[1] & 0x00ff));
+ value = msg[i].addr + (2 << 8);
+ length = msg[i].len - 2;
+@@ -1021,6 +1034,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n
+ } else {
+
+ req = 0xBD;
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ index = msg[i].buf[0] & 0x00FF;
+ value = msg[i].addr + (1 << 8);
+ length = msg[i].len - 1;
+diff --git a/drivers/media/usb/dvb-usb/cxusb-analog.c b/drivers/media/usb/dvb-usb/cxusb-analog.c
+index e93183ddd7975..deba5224cb8df 100644
+--- a/drivers/media/usb/dvb-usb/cxusb-analog.c
++++ b/drivers/media/usb/dvb-usb/cxusb-analog.c
+@@ -1014,7 +1014,10 @@ static int cxusb_medion_try_s_fmt_vid_cap(struct file *file,
+ {
+ struct dvb_usb_device *dvbdev = video_drvdata(file);
+ struct cxusb_medion_dev *cxdev = dvbdev->priv;
+- struct v4l2_subdev_format subfmt;
++ struct v4l2_subdev_format subfmt = {
++ .which = isset ? V4L2_SUBDEV_FORMAT_ACTIVE :
++ V4L2_SUBDEV_FORMAT_TRY,
++ };
+ u32 field;
+ int ret;
+
+@@ -1024,9 +1027,6 @@ static int cxusb_medion_try_s_fmt_vid_cap(struct file *file,
+ field = vb2_start_streaming_called(&cxdev->videoqueue) ?
+ cxdev->field_order : cxusb_medion_field_order(cxdev);
+
+- memset(&subfmt, 0, sizeof(subfmt));
+- subfmt.which = isset ? V4L2_SUBDEV_FORMAT_ACTIVE :
+- V4L2_SUBDEV_FORMAT_TRY;
+ subfmt.format.width = f->fmt.pix.width & ~1;
+ subfmt.format.height = f->fmt.pix.height & ~1;
+ subfmt.format.code = MEDIA_BUS_FMT_FIXED;
+@@ -1464,7 +1464,9 @@ int cxusb_medion_analog_init(struct dvb_usb_device *dvbdev)
+ .buf = tuner_analog_msg_data,
+ .len =
+ sizeof(tuner_analog_msg_data) };
+- struct v4l2_subdev_format subfmt;
++ struct v4l2_subdev_format subfmt = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+ int ret;
+
+ /* switch tuner to analog mode so IF demod will become accessible */
+@@ -1507,8 +1509,6 @@ int cxusb_medion_analog_init(struct dvb_usb_device *dvbdev)
+ v4l2_subdev_call(cxdev->tuner, video, s_std, cxdev->norm);
+ v4l2_subdev_call(cxdev->cx25840, video, s_std, cxdev->norm);
+
+- memset(&subfmt, 0, sizeof(subfmt));
+- subfmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ subfmt.format.width = cxdev->width;
+ subfmt.format.height = cxdev->height;
+ subfmt.format.code = MEDIA_BUS_FMT_FIXED;
+diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c
+index 70219b3e85666..7ea8f68b0f458 100644
+--- a/drivers/media/usb/dvb-usb/dib0700_core.c
++++ b/drivers/media/usb/dvb-usb/dib0700_core.c
+@@ -618,8 +618,6 @@ int dib0700_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
+ deb_info("the endpoint number (%i) is not correct, use the adapter id instead", adap->fe_adap[0].stream.props.endpoint);
+ if (onoff)
+ st->channel_state |= 1 << (adap->id);
+- else
+- st->channel_state |= 1 << ~(adap->id);
+ } else {
+ if (onoff)
+ st->channel_state |= 1 << (adap->fe_adap[0].stream.props.endpoint-2);
+diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c
+index 02b51d1a1b67c..aff60c10cb0b2 100644
+--- a/drivers/media/usb/dvb-usb/dibusb-common.c
++++ b/drivers/media/usb/dvb-usb/dibusb-common.c
+@@ -223,7 +223,7 @@ int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val)
+ u8 *buf;
+ int rc;
+
+- buf = kmalloc(2, GFP_KERNEL);
++ buf = kzalloc(2, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c
+index 4e3b3c064bcfb..e56efebd4f0a1 100644
+--- a/drivers/media/usb/dvb-usb/digitv.c
++++ b/drivers/media/usb/dvb-usb/digitv.c
+@@ -63,6 +63,10 @@ static int digitv_i2c_xfer(struct i2c_adapter *adap,struct i2c_msg msg[],int num
+ warn("more than 2 i2c messages at a time is not handled yet. TODO.");
+
+ for (i = 0; i < num; i++) {
++ if (msg[i].len < 1) {
++ i = -EOPNOTSUPP;
++ break;
++ }
+ /* write/read request */
+ if (i+1 < num && (msg[i+1].flags & I2C_M_RD)) {
+ if (digitv_ctrl_msg(d, USB_READ_COFDM, msg[i].buf[0], NULL, 0,
+diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c
+index 61439c8f33cab..58eea8ab54779 100644
+--- a/drivers/media/usb/dvb-usb/dvb-usb-init.c
++++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c
+@@ -81,7 +81,7 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs)
+
+ ret = dvb_usb_adapter_stream_init(adap);
+ if (ret)
+- return ret;
++ goto stream_init_err;
+
+ ret = dvb_usb_adapter_dvb_init(adap, adapter_nrs);
+ if (ret)
+@@ -114,6 +114,8 @@ frontend_init_err:
+ dvb_usb_adapter_dvb_exit(adap);
+ dvb_init_err:
+ dvb_usb_adapter_stream_exit(adap);
++stream_init_err:
++ kfree(adap->priv);
+ return ret;
+ }
+
+diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c
+index f0e686b05dc63..1ed62a80067c6 100644
+--- a/drivers/media/usb/dvb-usb/dw2102.c
++++ b/drivers/media/usb/dvb-usb/dw2102.c
+@@ -946,7 +946,7 @@ static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6])
+ for (i = 0; i < 6; i++) {
+ obuf[1] = 0xf0 + i;
+ if (i2c_transfer(&d->i2c_adap, msg, 2) != 2)
+- break;
++ return -1;
+ else
+ mac[i] = ibuf[0];
+ }
+@@ -2150,46 +2150,153 @@ static struct dvb_usb_device_properties s6x0_properties = {
+ }
+ };
+
+-static const struct dvb_usb_device_description d1100 = {
+- "Prof 1100 USB ",
+- {&dw2102_table[PROF_1100], NULL},
+- {NULL},
+-};
++static struct dvb_usb_device_properties p1100_properties = {
++ .caps = DVB_USB_IS_AN_I2C_ADAPTER,
++ .usb_ctrl = DEVICE_SPECIFIC,
++ .size_of_priv = sizeof(struct dw2102_state),
++ .firmware = P1100_FIRMWARE,
++ .no_reconnect = 1,
+
+-static const struct dvb_usb_device_description d660 = {
+- "TeVii S660 USB",
+- {&dw2102_table[TEVII_S660], NULL},
+- {NULL},
+-};
++ .i2c_algo = &s6x0_i2c_algo,
++ .rc.core = {
++ .rc_interval = 150,
++ .rc_codes = RC_MAP_TBS_NEC,
++ .module_name = "dw2102",
++ .allowed_protos = RC_PROTO_BIT_NEC,
++ .rc_query = prof_rc_query,
++ },
+
+-static const struct dvb_usb_device_description d480_1 = {
+- "TeVii S480.1 USB",
+- {&dw2102_table[TEVII_S480_1], NULL},
+- {NULL},
++ .generic_bulk_ctrl_endpoint = 0x81,
++ .num_adapters = 1,
++ .download_firmware = dw2102_load_firmware,
++ .read_mac_address = s6x0_read_mac_address,
++ .adapter = {
++ {
++ .num_frontends = 1,
++ .fe = {{
++ .frontend_attach = stv0288_frontend_attach,
++ .stream = {
++ .type = USB_BULK,
++ .count = 8,
++ .endpoint = 0x82,
++ .u = {
++ .bulk = {
++ .buffersize = 4096,
++ }
++ }
++ },
++ } },
++ }
++ },
++ .num_device_descs = 1,
++ .devices = {
++ {"Prof 1100 USB ",
++ {&dw2102_table[PROF_1100], NULL},
++ {NULL},
++ },
++ }
+ };
+
+-static const struct dvb_usb_device_description d480_2 = {
+- "TeVii S480.2 USB",
+- {&dw2102_table[TEVII_S480_2], NULL},
+- {NULL},
+-};
++static struct dvb_usb_device_properties s660_properties = {
++ .caps = DVB_USB_IS_AN_I2C_ADAPTER,
++ .usb_ctrl = DEVICE_SPECIFIC,
++ .size_of_priv = sizeof(struct dw2102_state),
++ .firmware = S660_FIRMWARE,
++ .no_reconnect = 1,
+
+-static const struct dvb_usb_device_description d7500 = {
+- "Prof 7500 USB DVB-S2",
+- {&dw2102_table[PROF_7500], NULL},
+- {NULL},
+-};
++ .i2c_algo = &s6x0_i2c_algo,
++ .rc.core = {
++ .rc_interval = 150,
++ .rc_codes = RC_MAP_TEVII_NEC,
++ .module_name = "dw2102",
++ .allowed_protos = RC_PROTO_BIT_NEC,
++ .rc_query = dw2102_rc_query,
++ },
+
+-static const struct dvb_usb_device_description d421 = {
+- "TeVii S421 PCI",
+- {&dw2102_table[TEVII_S421], NULL},
+- {NULL},
++ .generic_bulk_ctrl_endpoint = 0x81,
++ .num_adapters = 1,
++ .download_firmware = dw2102_load_firmware,
++ .read_mac_address = s6x0_read_mac_address,
++ .adapter = {
++ {
++ .num_frontends = 1,
++ .fe = {{
++ .frontend_attach = ds3000_frontend_attach,
++ .stream = {
++ .type = USB_BULK,
++ .count = 8,
++ .endpoint = 0x82,
++ .u = {
++ .bulk = {
++ .buffersize = 4096,
++ }
++ }
++ },
++ } },
++ }
++ },
++ .num_device_descs = 3,
++ .devices = {
++ {"TeVii S660 USB",
++ {&dw2102_table[TEVII_S660], NULL},
++ {NULL},
++ },
++ {"TeVii S480.1 USB",
++ {&dw2102_table[TEVII_S480_1], NULL},
++ {NULL},
++ },
++ {"TeVii S480.2 USB",
++ {&dw2102_table[TEVII_S480_2], NULL},
++ {NULL},
++ },
++ }
+ };
+
+-static const struct dvb_usb_device_description d632 = {
+- "TeVii S632 USB",
+- {&dw2102_table[TEVII_S632], NULL},
+- {NULL},
++static struct dvb_usb_device_properties p7500_properties = {
++ .caps = DVB_USB_IS_AN_I2C_ADAPTER,
++ .usb_ctrl = DEVICE_SPECIFIC,
++ .size_of_priv = sizeof(struct dw2102_state),
++ .firmware = P7500_FIRMWARE,
++ .no_reconnect = 1,
++
++ .i2c_algo = &s6x0_i2c_algo,
++ .rc.core = {
++ .rc_interval = 150,
++ .rc_codes = RC_MAP_TBS_NEC,
++ .module_name = "dw2102",
++ .allowed_protos = RC_PROTO_BIT_NEC,
++ .rc_query = prof_rc_query,
++ },
++
++ .generic_bulk_ctrl_endpoint = 0x81,
++ .num_adapters = 1,
++ .download_firmware = dw2102_load_firmware,
++ .read_mac_address = s6x0_read_mac_address,
++ .adapter = {
++ {
++ .num_frontends = 1,
++ .fe = {{
++ .frontend_attach = prof_7500_frontend_attach,
++ .stream = {
++ .type = USB_BULK,
++ .count = 8,
++ .endpoint = 0x82,
++ .u = {
++ .bulk = {
++ .buffersize = 4096,
++ }
++ }
++ },
++ } },
++ }
++ },
++ .num_device_descs = 1,
++ .devices = {
++ {"Prof 7500 USB DVB-S2",
++ {&dw2102_table[PROF_7500], NULL},
++ {NULL},
++ },
++ }
+ };
+
+ static struct dvb_usb_device_properties su3000_properties = {
+@@ -2273,6 +2380,59 @@ static struct dvb_usb_device_properties su3000_properties = {
+ }
+ };
+
++static struct dvb_usb_device_properties s421_properties = {
++ .caps = DVB_USB_IS_AN_I2C_ADAPTER,
++ .usb_ctrl = DEVICE_SPECIFIC,
++ .size_of_priv = sizeof(struct dw2102_state),
++ .power_ctrl = su3000_power_ctrl,
++ .num_adapters = 1,
++ .identify_state = su3000_identify_state,
++ .i2c_algo = &su3000_i2c_algo,
++
++ .rc.core = {
++ .rc_interval = 150,
++ .rc_codes = RC_MAP_SU3000,
++ .module_name = "dw2102",
++ .allowed_protos = RC_PROTO_BIT_RC5,
++ .rc_query = su3000_rc_query,
++ },
++
++ .read_mac_address = su3000_read_mac_address,
++
++ .generic_bulk_ctrl_endpoint = 0x01,
++
++ .adapter = {
++ {
++ .num_frontends = 1,
++ .fe = {{
++ .streaming_ctrl = su3000_streaming_ctrl,
++ .frontend_attach = m88rs2000_frontend_attach,
++ .stream = {
++ .type = USB_BULK,
++ .count = 8,
++ .endpoint = 0x82,
++ .u = {
++ .bulk = {
++ .buffersize = 4096,
++ }
++ }
++ }
++ } },
++ }
++ },
++ .num_device_descs = 2,
++ .devices = {
++ { "TeVii S421 PCI",
++ { &dw2102_table[TEVII_S421], NULL },
++ { NULL },
++ },
++ { "TeVii S632 USB",
++ { &dw2102_table[TEVII_S632], NULL },
++ { NULL },
++ },
++ }
++};
++
+ static struct dvb_usb_device_properties t220_properties = {
+ .caps = DVB_USB_IS_AN_I2C_ADAPTER,
+ .usb_ctrl = DEVICE_SPECIFIC,
+@@ -2390,101 +2550,33 @@ static struct dvb_usb_device_properties tt_s2_4600_properties = {
+ static int dw2102_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
+ {
+- int retval = -ENOMEM;
+- struct dvb_usb_device_properties *p1100;
+- struct dvb_usb_device_properties *s660;
+- struct dvb_usb_device_properties *p7500;
+- struct dvb_usb_device_properties *s421;
+-
+- p1100 = kmemdup(&s6x0_properties,
+- sizeof(struct dvb_usb_device_properties), GFP_KERNEL);
+- if (!p1100)
+- goto err0;
+-
+- /* copy default structure */
+- /* fill only different fields */
+- p1100->firmware = P1100_FIRMWARE;
+- p1100->devices[0] = d1100;
+- p1100->rc.core.rc_query = prof_rc_query;
+- p1100->rc.core.rc_codes = RC_MAP_TBS_NEC;
+- p1100->adapter->fe[0].frontend_attach = stv0288_frontend_attach;
+-
+- s660 = kmemdup(&s6x0_properties,
+- sizeof(struct dvb_usb_device_properties), GFP_KERNEL);
+- if (!s660)
+- goto err1;
+-
+- s660->firmware = S660_FIRMWARE;
+- s660->num_device_descs = 3;
+- s660->devices[0] = d660;
+- s660->devices[1] = d480_1;
+- s660->devices[2] = d480_2;
+- s660->adapter->fe[0].frontend_attach = ds3000_frontend_attach;
+-
+- p7500 = kmemdup(&s6x0_properties,
+- sizeof(struct dvb_usb_device_properties), GFP_KERNEL);
+- if (!p7500)
+- goto err2;
+-
+- p7500->firmware = P7500_FIRMWARE;
+- p7500->devices[0] = d7500;
+- p7500->rc.core.rc_query = prof_rc_query;
+- p7500->rc.core.rc_codes = RC_MAP_TBS_NEC;
+- p7500->adapter->fe[0].frontend_attach = prof_7500_frontend_attach;
+-
+-
+- s421 = kmemdup(&su3000_properties,
+- sizeof(struct dvb_usb_device_properties), GFP_KERNEL);
+- if (!s421)
+- goto err3;
+-
+- s421->num_device_descs = 2;
+- s421->devices[0] = d421;
+- s421->devices[1] = d632;
+- s421->adapter->fe[0].frontend_attach = m88rs2000_frontend_attach;
+-
+- if (0 == dvb_usb_device_init(intf, &dw2102_properties,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, &dw2104_properties,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, &dw3101_properties,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, &s6x0_properties,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, p1100,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, s660,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, p7500,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, s421,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, &su3000_properties,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, &t220_properties,
+- THIS_MODULE, NULL, adapter_nr) ||
+- 0 == dvb_usb_device_init(intf, &tt_s2_4600_properties,
+- THIS_MODULE, NULL, adapter_nr)) {
+-
+- /* clean up copied properties */
+- kfree(s421);
+- kfree(p7500);
+- kfree(s660);
+- kfree(p1100);
++ if (!(dvb_usb_device_init(intf, &dw2102_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &dw2104_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &dw3101_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &s6x0_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &p1100_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &s660_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &p7500_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &s421_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &su3000_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &t220_properties,
++ THIS_MODULE, NULL, adapter_nr) &&
++ dvb_usb_device_init(intf, &tt_s2_4600_properties,
++ THIS_MODULE, NULL, adapter_nr))) {
+
+ return 0;
+ }
+
+- retval = -ENODEV;
+- kfree(s421);
+-err3:
+- kfree(p7500);
+-err2:
+- kfree(s660);
+-err1:
+- kfree(p1100);
+-err0:
+- return retval;
++ return -ENODEV;
+ }
+
+ static void dw2102_disconnect(struct usb_interface *intf)
+diff --git a/drivers/media/usb/dvb-usb/m920x.c b/drivers/media/usb/dvb-usb/m920x.c
+index 4bb5b82599a79..da81fa189b5d5 100644
+--- a/drivers/media/usb/dvb-usb/m920x.c
++++ b/drivers/media/usb/dvb-usb/m920x.c
+@@ -274,6 +274,12 @@ static int m920x_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int nu
+ /* Should check for ack here, if we knew how. */
+ }
+ if (msg[i].flags & I2C_M_RD) {
++ char *read = kmalloc(1, GFP_KERNEL);
++ if (!read) {
++ ret = -ENOMEM;
++ goto unlock;
++ }
++
+ for (j = 0; j < msg[i].len; j++) {
+ /* Last byte of transaction?
+ * Send STOP, otherwise send ACK. */
+@@ -281,9 +287,14 @@ static int m920x_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int nu
+
+ if ((ret = m920x_read(d->udev, M9206_I2C, 0x0,
+ 0x20 | stop,
+- &msg[i].buf[j], 1)) != 0)
++ read, 1)) != 0) {
++ kfree(read);
+ goto unlock;
++ }
++ msg[i].buf[j] = read[0];
+ }
++
++ kfree(read);
+ } else {
+ for (j = 0; j < msg[i].len; j++) {
+ /* Last byte of transaction? Then send STOP. */
+diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c
+index c1e0dccb74088..92d867fc519c4 100644
+--- a/drivers/media/usb/em28xx/em28xx-cards.c
++++ b/drivers/media/usb/em28xx/em28xx-cards.c
+@@ -3625,8 +3625,10 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev,
+
+ if (dev->is_audio_only) {
+ retval = em28xx_audio_setup(dev);
+- if (retval)
+- return -ENODEV;
++ if (retval) {
++ retval = -ENODEV;
++ goto err_deinit_media;
++ }
+ em28xx_init_extension(dev);
+
+ return 0;
+@@ -3645,7 +3647,7 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev,
+ dev_err(&dev->intf->dev,
+ "%s: em28xx_i2c_register bus 0 - error [%d]!\n",
+ __func__, retval);
+- return retval;
++ goto err_deinit_media;
+ }
+
+ /* register i2c bus 1 */
+@@ -3661,9 +3663,7 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev,
+ "%s: em28xx_i2c_register bus 1 - error [%d]!\n",
+ __func__, retval);
+
+- em28xx_i2c_unregister(dev, 0);
+-
+- return retval;
++ goto err_unreg_i2c;
+ }
+ }
+
+@@ -3671,6 +3671,12 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev,
+ em28xx_card_setup(dev);
+
+ return 0;
++
++err_unreg_i2c:
++ em28xx_i2c_unregister(dev, 0);
++err_deinit_media:
++ em28xx_unregister_media_device(dev);
++ return retval;
+ }
+
+ static int em28xx_duplicate_dev(struct em28xx *dev)
+@@ -3925,6 +3931,8 @@ static int em28xx_usb_probe(struct usb_interface *intf,
+ goto err_free;
+ }
+
++ kref_init(&dev->ref);
++
+ dev->devno = nr;
+ dev->model = id->driver_info;
+ dev->alt = -1;
+@@ -4025,6 +4033,8 @@ static int em28xx_usb_probe(struct usb_interface *intf,
+ }
+
+ if (dev->board.has_dual_ts && em28xx_duplicate_dev(dev) == 0) {
++ kref_init(&dev->dev_next->ref);
++
+ dev->dev_next->ts = SECONDARY_TS;
+ dev->dev_next->alt = -1;
+ dev->dev_next->is_audio_only = has_vendor_audio &&
+@@ -4079,12 +4089,8 @@ static int em28xx_usb_probe(struct usb_interface *intf,
+ em28xx_write_reg(dev, 0x0b, 0x82);
+ mdelay(100);
+ }
+-
+- kref_init(&dev->dev_next->ref);
+ }
+
+- kref_init(&dev->ref);
+-
+ request_modules(dev);
+
+ /*
+diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c
+index 584fa400cd7d8..c837cc528a335 100644
+--- a/drivers/media/usb/em28xx/em28xx-core.c
++++ b/drivers/media/usb/em28xx/em28xx-core.c
+@@ -89,7 +89,7 @@ int em28xx_read_reg_req_len(struct em28xx *dev, u8 req, u16 reg,
+ mutex_lock(&dev->ctrl_urb_lock);
+ ret = usb_control_msg(udev, pipe, req,
+ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+- 0x0000, reg, dev->urb_buf, len, HZ);
++ 0x0000, reg, dev->urb_buf, len, 1000);
+ if (ret < 0) {
+ em28xx_regdbg("(pipe 0x%08x): IN: %02x %02x %02x %02x %02x %02x %02x %02x failed with error %i\n",
+ pipe,
+@@ -158,7 +158,7 @@ int em28xx_write_regs_req(struct em28xx *dev, u8 req, u16 reg, char *buf,
+ memcpy(dev->urb_buf, buf, len);
+ ret = usb_control_msg(udev, pipe, req,
+ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+- 0x0000, reg, dev->urb_buf, len, HZ);
++ 0x0000, reg, dev->urb_buf, len, 1000);
+ mutex_unlock(&dev->ctrl_urb_lock);
+
+ if (ret < 0) {
+@@ -1154,8 +1154,9 @@ int em28xx_suspend_extension(struct em28xx *dev)
+ dev_info(&dev->intf->dev, "Suspending extensions\n");
+ mutex_lock(&em28xx_devlist_mutex);
+ list_for_each_entry(ops, &em28xx_extension_devlist, next) {
+- if (ops->suspend)
+- ops->suspend(dev);
++ if (!ops->suspend)
++ continue;
++ ops->suspend(dev);
+ if (dev->dev_next)
+ ops->suspend(dev->dev_next);
+ }
+diff --git a/drivers/media/usb/go7007/go7007-i2c.c b/drivers/media/usb/go7007/go7007-i2c.c
+index 38339dd2f83f7..2880370e45c8b 100644
+--- a/drivers/media/usb/go7007/go7007-i2c.c
++++ b/drivers/media/usb/go7007/go7007-i2c.c
+@@ -165,8 +165,6 @@ static int go7007_i2c_master_xfer(struct i2c_adapter *adapter,
+ } else if (msgs[i].len == 3) {
+ if (msgs[i].flags & I2C_M_RD)
+ return -EIO;
+- if (msgs[i].len != 3)
+- return -EIO;
+ if (go7007_i2c_xfer(go, msgs[i].addr, 0,
+ (msgs[i].buf[0] << 8) | msgs[i].buf[1],
+ 0x01, &msgs[i].buf[2]) < 0)
+diff --git a/drivers/media/usb/go7007/s2250-board.c b/drivers/media/usb/go7007/s2250-board.c
+index c742cc88fac5c..1fa6f10ee157b 100644
+--- a/drivers/media/usb/go7007/s2250-board.c
++++ b/drivers/media/usb/go7007/s2250-board.c
+@@ -504,6 +504,7 @@ static int s2250_probe(struct i2c_client *client,
+ u8 *data;
+ struct go7007 *go = i2c_get_adapdata(adapter);
+ struct go7007_usb *usb = go->hpi_context;
++ int err = -EIO;
+
+ audio = i2c_new_dummy_device(adapter, TLV320_ADDRESS >> 1);
+ if (IS_ERR(audio))
+@@ -532,11 +533,8 @@ static int s2250_probe(struct i2c_client *client,
+ V4L2_CID_HUE, -512, 511, 1, 0);
+ sd->ctrl_handler = &state->hdl;
+ if (state->hdl.error) {
+- int err = state->hdl.error;
+-
+- v4l2_ctrl_handler_free(&state->hdl);
+- kfree(state);
+- return err;
++ err = state->hdl.error;
++ goto fail;
+ }
+
+ state->std = V4L2_STD_NTSC;
+@@ -600,7 +598,7 @@ fail:
+ i2c_unregister_device(audio);
+ v4l2_ctrl_handler_free(&state->hdl);
+ kfree(state);
+- return -EIO;
++ return err;
+ }
+
+ static int s2250_remove(struct i2c_client *client)
+diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c
+index 563128d117317..fd7d2a9d0449a 100644
+--- a/drivers/media/usb/hdpvr/hdpvr-video.c
++++ b/drivers/media/usb/hdpvr/hdpvr-video.c
+@@ -308,7 +308,6 @@ static int hdpvr_start_streaming(struct hdpvr_device *dev)
+
+ dev->status = STATUS_STREAMING;
+
+- INIT_WORK(&dev->worker, hdpvr_transmit_buffers);
+ schedule_work(&dev->worker);
+
+ v4l2_dbg(MSG_BUFFER, hdpvr_debug, &dev->v4l2_dev,
+@@ -410,7 +409,7 @@ static ssize_t hdpvr_read(struct file *file, char __user *buffer, size_t count,
+ struct hdpvr_device *dev = video_drvdata(file);
+ struct hdpvr_buffer *buf = NULL;
+ struct urb *urb;
+- unsigned int ret = 0;
++ int ret = 0;
+ int rem, cnt;
+
+ if (*pos)
+@@ -1165,6 +1164,9 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent,
+ bool ac3 = dev->flags & HDPVR_FLAG_AC3_CAP;
+ int res;
+
++ // initialize dev->worker
++ INIT_WORK(&dev->worker, hdpvr_transmit_buffers);
++
+ dev->cur_std = V4L2_STD_525_60;
+ dev->width = 720;
+ dev->height = 480;
+diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
+index d38dee1792e41..d22ce328a2797 100644
+--- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
++++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
+@@ -1467,7 +1467,7 @@ static int pvr2_upload_firmware1(struct pvr2_hdw *hdw)
+ for (address = 0; address < fwsize; address += 0x800) {
+ memcpy(fw_ptr, fw_entry->data + address, 0x800);
+ ret += usb_control_msg(hdw->usb_dev, pipe, 0xa0, 0x40, address,
+- 0, fw_ptr, 0x800, HZ);
++ 0, fw_ptr, 0x800, 1000);
+ }
+
+ trace_firmware("Upload done, releasing device's CPU");
+@@ -1605,7 +1605,7 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw)
+ ((u32 *)fw_ptr)[icnt] = swab32(((u32 *)fw_ptr)[icnt]);
+
+ ret |= usb_bulk_msg(hdw->usb_dev, pipe, fw_ptr,bcnt,
+- &actual_length, HZ);
++ &actual_length, 1000);
+ ret |= (actual_length != bcnt);
+ if (ret) break;
+ fw_done += bcnt;
+@@ -2569,6 +2569,11 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
+ } while (0);
+ mutex_unlock(&pvr2_unit_mtx);
+
++ INIT_WORK(&hdw->workpoll, pvr2_hdw_worker_poll);
++
++ if (hdw->unit_number == -1)
++ goto fail;
++
+ cnt1 = 0;
+ cnt2 = scnprintf(hdw->name+cnt1,sizeof(hdw->name)-cnt1,"pvrusb2");
+ cnt1 += cnt2;
+@@ -2580,8 +2585,6 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
+ if (cnt1 >= sizeof(hdw->name)) cnt1 = sizeof(hdw->name)-1;
+ hdw->name[cnt1] = 0;
+
+- INIT_WORK(&hdw->workpoll,pvr2_hdw_worker_poll);
+-
+ pvr2_trace(PVR2_TRACE_INIT,"Driver unit number is %d, name is %s",
+ hdw->unit_number,hdw->name);
+
+@@ -2607,6 +2610,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
+ del_timer_sync(&hdw->encoder_run_timer);
+ del_timer_sync(&hdw->encoder_wait_timer);
+ flush_work(&hdw->workpoll);
++ v4l2_device_unregister(&hdw->v4l2_dev);
+ usb_free_urb(hdw->ctl_read_urb);
+ usb_free_urb(hdw->ctl_write_urb);
+ kfree(hdw->ctl_read_buffer);
+@@ -3438,7 +3442,7 @@ void pvr2_hdw_cpufw_set_enabled(struct pvr2_hdw *hdw,
+ 0xa0,0xc0,
+ address,0,
+ hdw->fw_buffer+address,
+- 0x800,HZ);
++ 0x800,1000);
+ if (ret < 0) break;
+ }
+
+@@ -3977,7 +3981,7 @@ void pvr2_hdw_cpureset_assert(struct pvr2_hdw *hdw,int val)
+ /* Write the CPUCS register on the 8051. The lsb of the register
+ is the reset bit; a 1 asserts reset while a 0 clears it. */
+ pipe = usb_sndctrlpipe(hdw->usb_dev, 0);
+- ret = usb_control_msg(hdw->usb_dev,pipe,0xa0,0x40,0xe600,0,da,1,HZ);
++ ret = usb_control_msg(hdw->usb_dev,pipe,0xa0,0x40,0xe600,0,da,1,1000);
+ if (ret < 0) {
+ pvr2_trace(PVR2_TRACE_ERROR_LEGS,
+ "cpureset_assert(%d) error=%d",val,ret);
+diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c
+index 3b0e4ed75d99c..acf18e2251a52 100644
+--- a/drivers/media/usb/s2255/s2255drv.c
++++ b/drivers/media/usb/s2255/s2255drv.c
+@@ -1882,7 +1882,7 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE |
+ USB_DIR_IN,
+ Value, Index, buf,
+- TransferBufferLength, HZ * 5);
++ TransferBufferLength, USB_CTRL_SET_TIMEOUT);
+
+ if (r >= 0)
+ memcpy(TransferBuffer, buf, TransferBufferLength);
+@@ -1891,7 +1891,7 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request,
+ r = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+ Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+ Value, Index, buf,
+- TransferBufferLength, HZ * 5);
++ TransferBufferLength, USB_CTRL_SET_TIMEOUT);
+ }
+ kfree(buf);
+ return r;
+diff --git a/drivers/media/usb/siano/smsusb.c b/drivers/media/usb/siano/smsusb.c
+index df4c5dcba39cd..6036ad3b15681 100644
+--- a/drivers/media/usb/siano/smsusb.c
++++ b/drivers/media/usb/siano/smsusb.c
+@@ -179,6 +179,8 @@ static void smsusb_stop_streaming(struct smsusb_device_t *dev)
+
+ for (i = 0; i < MAX_URBS; i++) {
+ usb_kill_urb(&dev->surbs[i].urb);
++ if (dev->surbs[i].wq.func)
++ cancel_work_sync(&dev->surbs[i].wq);
+
+ if (dev->surbs[i].cb) {
+ smscore_putbuffer(dev->coredev, dev->surbs[i].cb);
+@@ -453,12 +455,7 @@ static int smsusb_init_device(struct usb_interface *intf, int board_id)
+ rc = smscore_register_device(&params, &dev->coredev, 0, mdev);
+ if (rc < 0) {
+ pr_err("smscore_register_device(...) failed, rc %d\n", rc);
+- smsusb_term_device(intf);
+-#ifdef CONFIG_MEDIA_CONTROLLER_DVB
+- media_device_unregister(mdev);
+-#endif
+- kfree(mdev);
+- return rc;
++ goto err_unregister_device;
+ }
+
+ smscore_set_board_id(dev->coredev, board_id);
+@@ -475,8 +472,7 @@ static int smsusb_init_device(struct usb_interface *intf, int board_id)
+ rc = smsusb_start_streaming(dev);
+ if (rc < 0) {
+ pr_err("smsusb_start_streaming(...) failed\n");
+- smsusb_term_device(intf);
+- return rc;
++ goto err_unregister_device;
+ }
+
+ dev->state = SMSUSB_ACTIVE;
+@@ -484,13 +480,20 @@ static int smsusb_init_device(struct usb_interface *intf, int board_id)
+ rc = smscore_start_device(dev->coredev);
+ if (rc < 0) {
+ pr_err("smscore_start_device(...) failed\n");
+- smsusb_term_device(intf);
+- return rc;
++ goto err_unregister_device;
+ }
+
+ pr_debug("device 0x%p created\n", dev);
+
+ return rc;
++
++err_unregister_device:
++ smsusb_term_device(intf);
++#ifdef CONFIG_MEDIA_CONTROLLER_DVB
++ media_device_unregister(mdev);
++#endif
++ kfree(mdev);
++ return rc;
+ }
+
+ static int smsusb_probe(struct usb_interface *intf,
+diff --git a/drivers/media/usb/stk1160/stk1160-core.c b/drivers/media/usb/stk1160/stk1160-core.c
+index b4f8bc5db1389..ce717502ea4c3 100644
+--- a/drivers/media/usb/stk1160/stk1160-core.c
++++ b/drivers/media/usb/stk1160/stk1160-core.c
+@@ -65,7 +65,7 @@ int stk1160_read_reg(struct stk1160 *dev, u16 reg, u8 *value)
+ return -ENOMEM;
+ ret = usb_control_msg(dev->udev, pipe, 0x00,
+ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+- 0x00, reg, buf, sizeof(u8), HZ);
++ 0x00, reg, buf, sizeof(u8), 1000);
+ if (ret < 0) {
+ stk1160_err("read failed on reg 0x%x (%d)\n",
+ reg, ret);
+@@ -85,7 +85,7 @@ int stk1160_write_reg(struct stk1160 *dev, u16 reg, u16 value)
+
+ ret = usb_control_msg(dev->udev, pipe, 0x01,
+ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+- value, reg, NULL, 0, HZ);
++ value, reg, NULL, 0, 1000);
+ if (ret < 0) {
+ stk1160_err("write failed on reg 0x%x (%d)\n",
+ reg, ret);
+@@ -403,7 +403,7 @@ static void stk1160_disconnect(struct usb_interface *interface)
+ /* Here is the only place where isoc get released */
+ stk1160_uninit_isoc(dev);
+
+- stk1160_clear_queue(dev);
++ stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR);
+
+ video_unregister_device(&dev->vdev);
+ v4l2_device_disconnect(&dev->v4l2_dev);
+diff --git a/drivers/media/usb/stk1160/stk1160-v4l.c b/drivers/media/usb/stk1160/stk1160-v4l.c
+index 6a4eb616d5160..1aa953469402f 100644
+--- a/drivers/media/usb/stk1160/stk1160-v4l.c
++++ b/drivers/media/usb/stk1160/stk1160-v4l.c
+@@ -258,7 +258,7 @@ out_uninit:
+ stk1160_uninit_isoc(dev);
+ out_stop_hw:
+ usb_set_interface(dev->udev, 0, 0);
+- stk1160_clear_queue(dev);
++ stk1160_clear_queue(dev, VB2_BUF_STATE_QUEUED);
+
+ mutex_unlock(&dev->v4l_lock);
+
+@@ -306,7 +306,7 @@ static int stk1160_stop_streaming(struct stk1160 *dev)
+
+ stk1160_stop_hw(dev);
+
+- stk1160_clear_queue(dev);
++ stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR);
+
+ stk1160_dbg("streaming stopped\n");
+
+@@ -745,7 +745,7 @@ static const struct video_device v4l_template = {
+ /********************************************************************/
+
+ /* Must be called with both v4l_lock and vb_queue_lock hold */
+-void stk1160_clear_queue(struct stk1160 *dev)
++void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state)
+ {
+ struct stk1160_buffer *buf;
+ unsigned long flags;
+@@ -756,7 +756,7 @@ void stk1160_clear_queue(struct stk1160 *dev)
+ buf = list_first_entry(&dev->avail_bufs,
+ struct stk1160_buffer, list);
+ list_del(&buf->list);
+- vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
++ vb2_buffer_done(&buf->vb.vb2_buf, vb2_state);
+ stk1160_dbg("buffer [%p/%d] aborted\n",
+ buf, buf->vb.vb2_buf.index);
+ }
+@@ -766,7 +766,7 @@ void stk1160_clear_queue(struct stk1160 *dev)
+ buf = dev->isoc_ctl.buf;
+ dev->isoc_ctl.buf = NULL;
+
+- vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
++ vb2_buffer_done(&buf->vb.vb2_buf, vb2_state);
+ stk1160_dbg("buffer [%p/%d] aborted\n",
+ buf, buf->vb.vb2_buf.index);
+ }
+diff --git a/drivers/media/usb/stk1160/stk1160.h b/drivers/media/usb/stk1160/stk1160.h
+index a31ea1c80f255..a70963ce87533 100644
+--- a/drivers/media/usb/stk1160/stk1160.h
++++ b/drivers/media/usb/stk1160/stk1160.h
+@@ -166,7 +166,7 @@ struct regval {
+ int stk1160_vb2_setup(struct stk1160 *dev);
+ int stk1160_video_register(struct stk1160 *dev);
+ void stk1160_video_unregister(struct stk1160 *dev);
+-void stk1160_clear_queue(struct stk1160 *dev);
++void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state);
+
+ /* Provided by stk1160-video.c */
+ int stk1160_alloc_isoc(struct stk1160 *dev);
+diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c
+index 3f650ede0c3dc..e293f6f3d1bc9 100644
+--- a/drivers/media/usb/tm6000/tm6000-video.c
++++ b/drivers/media/usb/tm6000/tm6000-video.c
+@@ -852,8 +852,7 @@ static int vidioc_querycap(struct file *file, void *priv,
+ struct tm6000_core *dev = ((struct tm6000_fh *)priv)->dev;
+
+ strscpy(cap->driver, "tm6000", sizeof(cap->driver));
+- strscpy(cap->card, "Trident TVMaster TM5600/6000/6010",
+- sizeof(cap->card));
++ strscpy(cap->card, "Trident TM5600/6000/6010", sizeof(cap->card));
+ usb_make_path(dev->udev, cap->bus_info, sizeof(cap->bus_info));
+ cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE |
+ V4L2_CAP_DEVICE_CAPS;
+diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c
+index bfda46a36dc50..c4474d4c44e28 100644
+--- a/drivers/media/usb/ttusb-dec/ttusb_dec.c
++++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c
+@@ -327,7 +327,7 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
+ result = mutex_lock_interruptible(&dec->usb_mutex);
+ if (result) {
+ printk("%s: Failed to lock usb mutex.\n", __func__);
+- goto err;
++ goto err_free;
+ }
+
+ b[0] = 0xaa;
+@@ -349,7 +349,7 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
+ if (result) {
+ printk("%s: command bulk message failed: error %d\n",
+ __func__, result);
+- goto err;
++ goto err_mutex_unlock;
+ }
+
+ result = usb_bulk_msg(dec->udev, dec->result_pipe, b,
+@@ -358,7 +358,7 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
+ if (result) {
+ printk("%s: result bulk message failed: error %d\n",
+ __func__, result);
+- goto err;
++ goto err_mutex_unlock;
+ } else {
+ if (debug) {
+ printk(KERN_DEBUG "%s: result: %*ph\n",
+@@ -371,9 +371,9 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
+ memcpy(cmd_result, &b[4], b[3]);
+ }
+
+-err:
++err_mutex_unlock:
+ mutex_unlock(&dec->usb_mutex);
+-
++err_free:
+ kfree(b);
+ return result;
+ }
+@@ -1544,8 +1544,7 @@ static void ttusb_dec_exit_dvb(struct ttusb_dec *dec)
+ dvb_dmx_release(&dec->demux);
+ if (dec->fe) {
+ dvb_unregister_frontend(dec->fe);
+- if (dec->fe->ops.release)
+- dec->fe->ops.release(dec->fe);
++ dvb_frontend_detach(dec->fe);
+ }
+ dvb_unregister_adapter(&dec->adapter);
+ }
+diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c
+index b3dde98499f41..05335866e6d62 100644
+--- a/drivers/media/usb/uvc/uvc_ctrl.c
++++ b/drivers/media/usb/uvc/uvc_ctrl.c
+@@ -6,6 +6,7 @@
+ * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ */
+
++#include <asm/barrier.h>
+ #include <linux/kernel.h>
+ #include <linux/list.h>
+ #include <linux/module.h>
+@@ -357,6 +358,11 @@ static const struct uvc_control_info uvc_ctrls[] = {
+ },
+ };
+
++static const u32 uvc_control_classes[] = {
++ V4L2_CID_CAMERA_CLASS,
++ V4L2_CID_USER_CLASS,
++};
++
+ static const struct uvc_menu_info power_line_frequency_controls[] = {
+ { 0, "Disabled" },
+ { 1, "50 Hz" },
+@@ -427,7 +433,6 @@ static void uvc_ctrl_set_rel_speed(struct uvc_control_mapping *mapping,
+ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ {
+ .id = V4L2_CID_BRIGHTNESS,
+- .name = "Brightness",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_BRIGHTNESS_CONTROL,
+ .size = 16,
+@@ -437,7 +442,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_CONTRAST,
+- .name = "Contrast",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_CONTRAST_CONTROL,
+ .size = 16,
+@@ -447,7 +451,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_HUE,
+- .name = "Hue",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_HUE_CONTROL,
+ .size = 16,
+@@ -459,7 +462,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_SATURATION,
+- .name = "Saturation",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_SATURATION_CONTROL,
+ .size = 16,
+@@ -469,7 +471,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_SHARPNESS,
+- .name = "Sharpness",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_SHARPNESS_CONTROL,
+ .size = 16,
+@@ -479,7 +480,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_GAMMA,
+- .name = "Gamma",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_GAMMA_CONTROL,
+ .size = 16,
+@@ -489,7 +489,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_BACKLIGHT_COMPENSATION,
+- .name = "Backlight Compensation",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_BACKLIGHT_COMPENSATION_CONTROL,
+ .size = 16,
+@@ -499,7 +498,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_GAIN,
+- .name = "Gain",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_GAIN_CONTROL,
+ .size = 16,
+@@ -509,7 +507,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_POWER_LINE_FREQUENCY,
+- .name = "Power Line Frequency",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_POWER_LINE_FREQUENCY_CONTROL,
+ .size = 2,
+@@ -521,7 +518,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_HUE_AUTO,
+- .name = "Hue, Auto",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_HUE_AUTO_CONTROL,
+ .size = 1,
+@@ -532,7 +528,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_EXPOSURE_AUTO,
+- .name = "Exposure, Auto",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_AE_MODE_CONTROL,
+ .size = 4,
+@@ -545,7 +540,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_EXPOSURE_AUTO_PRIORITY,
+- .name = "Exposure, Auto Priority",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_AE_PRIORITY_CONTROL,
+ .size = 1,
+@@ -555,7 +549,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_EXPOSURE_ABSOLUTE,
+- .name = "Exposure (Absolute)",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_EXPOSURE_TIME_ABSOLUTE_CONTROL,
+ .size = 32,
+@@ -567,7 +560,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_AUTO_WHITE_BALANCE,
+- .name = "White Balance Temperature, Auto",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL,
+ .size = 1,
+@@ -578,7 +570,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_WHITE_BALANCE_TEMPERATURE,
+- .name = "White Balance Temperature",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_WHITE_BALANCE_TEMPERATURE_CONTROL,
+ .size = 16,
+@@ -590,7 +581,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_AUTO_WHITE_BALANCE,
+- .name = "White Balance Component, Auto",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL,
+ .size = 1,
+@@ -602,7 +592,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_BLUE_BALANCE,
+- .name = "White Balance Blue Component",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_WHITE_BALANCE_COMPONENT_CONTROL,
+ .size = 16,
+@@ -614,7 +603,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_RED_BALANCE,
+- .name = "White Balance Red Component",
+ .entity = UVC_GUID_UVC_PROCESSING,
+ .selector = UVC_PU_WHITE_BALANCE_COMPONENT_CONTROL,
+ .size = 16,
+@@ -626,7 +614,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_FOCUS_ABSOLUTE,
+- .name = "Focus (absolute)",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_FOCUS_ABSOLUTE_CONTROL,
+ .size = 16,
+@@ -638,7 +625,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_FOCUS_AUTO,
+- .name = "Focus, Auto",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_FOCUS_AUTO_CONTROL,
+ .size = 1,
+@@ -649,7 +635,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_IRIS_ABSOLUTE,
+- .name = "Iris, Absolute",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_IRIS_ABSOLUTE_CONTROL,
+ .size = 16,
+@@ -659,7 +644,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_IRIS_RELATIVE,
+- .name = "Iris, Relative",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_IRIS_RELATIVE_CONTROL,
+ .size = 8,
+@@ -669,7 +653,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_ZOOM_ABSOLUTE,
+- .name = "Zoom, Absolute",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_ZOOM_ABSOLUTE_CONTROL,
+ .size = 16,
+@@ -679,7 +662,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_ZOOM_CONTINUOUS,
+- .name = "Zoom, Continuous",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_ZOOM_RELATIVE_CONTROL,
+ .size = 0,
+@@ -691,7 +673,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_PAN_ABSOLUTE,
+- .name = "Pan (Absolute)",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_PANTILT_ABSOLUTE_CONTROL,
+ .size = 32,
+@@ -701,7 +682,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_TILT_ABSOLUTE,
+- .name = "Tilt (Absolute)",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_PANTILT_ABSOLUTE_CONTROL,
+ .size = 32,
+@@ -711,7 +691,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_PAN_SPEED,
+- .name = "Pan (Speed)",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_PANTILT_RELATIVE_CONTROL,
+ .size = 16,
+@@ -723,7 +702,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_TILT_SPEED,
+- .name = "Tilt (Speed)",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_PANTILT_RELATIVE_CONTROL,
+ .size = 16,
+@@ -735,7 +713,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_PRIVACY,
+- .name = "Privacy",
+ .entity = UVC_GUID_UVC_CAMERA,
+ .selector = UVC_CT_PRIVACY_CONTROL,
+ .size = 1,
+@@ -745,7 +722,6 @@ static const struct uvc_control_mapping uvc_ctrl_mappings[] = {
+ },
+ {
+ .id = V4L2_CID_PRIVACY,
+- .name = "Privacy",
+ .entity = UVC_GUID_EXT_GPIO_CONTROLLER,
+ .selector = UVC_CT_PRIVACY_CONTROL,
+ .size = 1,
+@@ -988,42 +964,181 @@ static s32 __uvc_ctrl_get_value(struct uvc_control_mapping *mapping,
+ return value;
+ }
+
++static int __uvc_ctrl_load_cur(struct uvc_video_chain *chain,
++ struct uvc_control *ctrl)
++{
++ u8 *data;
++ int ret;
++
++ if (ctrl->loaded)
++ return 0;
++
++ data = uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT);
++
++ if ((ctrl->info.flags & UVC_CTRL_FLAG_GET_CUR) == 0) {
++ memset(data, 0, ctrl->info.size);
++ ctrl->loaded = 1;
++
++ return 0;
++ }
++
++ if (ctrl->entity->get_cur)
++ ret = ctrl->entity->get_cur(chain->dev, ctrl->entity,
++ ctrl->info.selector, data,
++ ctrl->info.size);
++ else
++ ret = uvc_query_ctrl(chain->dev, UVC_GET_CUR,
++ ctrl->entity->id, chain->dev->intfnum,
++ ctrl->info.selector, data,
++ ctrl->info.size);
++
++ if (ret < 0)
++ return ret;
++
++ ctrl->loaded = 1;
++
++ return ret;
++}
++
+ static int __uvc_ctrl_get(struct uvc_video_chain *chain,
+- struct uvc_control *ctrl, struct uvc_control_mapping *mapping,
+- s32 *value)
++ struct uvc_control *ctrl,
++ struct uvc_control_mapping *mapping,
++ s32 *value)
+ {
+ int ret;
+
+ if ((ctrl->info.flags & UVC_CTRL_FLAG_GET_CUR) == 0)
+ return -EACCES;
+
+- if (!ctrl->loaded) {
+- if (ctrl->entity->get_cur) {
+- ret = ctrl->entity->get_cur(chain->dev,
+- ctrl->entity,
+- ctrl->info.selector,
+- uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT),
+- ctrl->info.size);
+- } else {
+- ret = uvc_query_ctrl(chain->dev, UVC_GET_CUR,
+- ctrl->entity->id,
+- chain->dev->intfnum,
+- ctrl->info.selector,
+- uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT),
+- ctrl->info.size);
++ ret = __uvc_ctrl_load_cur(chain, ctrl);
++ if (ret < 0)
++ return ret;
++
++ *value = __uvc_ctrl_get_value(mapping,
++ uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT));
++
++ return 0;
++}
++
++static int __uvc_query_v4l2_class(struct uvc_video_chain *chain, u32 req_id,
++ u32 found_id)
++{
++ bool find_next = req_id & V4L2_CTRL_FLAG_NEXT_CTRL;
++ unsigned int i;
++
++ req_id &= V4L2_CTRL_ID_MASK;
++
++ for (i = 0; i < ARRAY_SIZE(uvc_control_classes); i++) {
++ if (!(chain->ctrl_class_bitmap & BIT(i)))
++ continue;
++ if (!find_next) {
++ if (uvc_control_classes[i] == req_id)
++ return i;
++ continue;
+ }
+- if (ret < 0)
+- return ret;
++ if (uvc_control_classes[i] > req_id &&
++ uvc_control_classes[i] < found_id)
++ return i;
++ }
+
+- ctrl->loaded = 1;
++ return -ENODEV;
++}
++
++static int uvc_query_v4l2_class(struct uvc_video_chain *chain, u32 req_id,
++ u32 found_id, struct v4l2_queryctrl *v4l2_ctrl)
++{
++ int idx;
++
++ idx = __uvc_query_v4l2_class(chain, req_id, found_id);
++ if (idx < 0)
++ return -ENODEV;
++
++ memset(v4l2_ctrl, 0, sizeof(*v4l2_ctrl));
++ v4l2_ctrl->id = uvc_control_classes[idx];
++ strscpy(v4l2_ctrl->name, v4l2_ctrl_get_name(v4l2_ctrl->id),
++ sizeof(v4l2_ctrl->name));
++ v4l2_ctrl->type = V4L2_CTRL_TYPE_CTRL_CLASS;
++ v4l2_ctrl->flags = V4L2_CTRL_FLAG_WRITE_ONLY
++ | V4L2_CTRL_FLAG_READ_ONLY;
++ return 0;
++}
++
++/*
++ * Check if control @v4l2_id can be accessed by the given control @ioctl
++ * (VIDIOC_G_EXT_CTRLS, VIDIOC_TRY_EXT_CTRLS or VIDIOC_S_EXT_CTRLS).
++ *
++ * For set operations on slave controls, check if the master's value is set to
++ * manual, either in the others controls set in the same ioctl call, or from
++ * the master's current value. This catches VIDIOC_S_EXT_CTRLS calls that set
++ * both the master and slave control, such as for instance setting
++ * auto_exposure=1, exposure_time_absolute=251.
++ */
++int uvc_ctrl_is_accessible(struct uvc_video_chain *chain, u32 v4l2_id,
++ const struct v4l2_ext_controls *ctrls,
++ unsigned long ioctl)
++{
++ struct uvc_control_mapping *master_map = NULL;
++ struct uvc_control *master_ctrl = NULL;
++ struct uvc_control_mapping *mapping;
++ struct uvc_control *ctrl;
++ bool read = ioctl == VIDIOC_G_EXT_CTRLS;
++ s32 val;
++ int ret;
++ int i;
++
++ if (__uvc_query_v4l2_class(chain, v4l2_id, 0) >= 0)
++ return -EACCES;
++
++ ctrl = uvc_find_control(chain, v4l2_id, &mapping);
++ if (!ctrl)
++ return -EINVAL;
++
++ if (!(ctrl->info.flags & UVC_CTRL_FLAG_GET_CUR) && read)
++ return -EACCES;
++
++ if (!(ctrl->info.flags & UVC_CTRL_FLAG_SET_CUR) && !read)
++ return -EACCES;
++
++ if (ioctl != VIDIOC_S_EXT_CTRLS || !mapping->master_id)
++ return 0;
++
++ /*
++ * Iterate backwards in cases where the master control is accessed
++ * multiple times in the same ioctl. We want the last value.
++ */
++ for (i = ctrls->count - 1; i >= 0; i--) {
++ if (ctrls->controls[i].id == mapping->master_id)
++ return ctrls->controls[i].value ==
++ mapping->master_manual ? 0 : -EACCES;
+ }
+
+- *value = __uvc_ctrl_get_value(mapping,
+- uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT));
++ __uvc_find_control(ctrl->entity, mapping->master_id, &master_map,
++ &master_ctrl, 0);
++
++ if (!master_ctrl || !(master_ctrl->info.flags & UVC_CTRL_FLAG_GET_CUR))
++ return 0;
++
++ ret = __uvc_ctrl_get(chain, master_ctrl, master_map, &val);
++ if (ret >= 0 && val != mapping->master_manual)
++ return -EACCES;
+
+ return 0;
+ }
+
++static const char *uvc_map_get_name(const struct uvc_control_mapping *map)
++{
++ const char *name;
++
++ if (map->name)
++ return map->name;
++
++ name = v4l2_ctrl_get_name(map->id);
++ if (name)
++ return name;
++
++ return "Unknown Control";
++}
++
+ static int __uvc_query_v4l2_ctrl(struct uvc_video_chain *chain,
+ struct uvc_control *ctrl,
+ struct uvc_control_mapping *mapping,
+@@ -1037,7 +1152,8 @@ static int __uvc_query_v4l2_ctrl(struct uvc_video_chain *chain,
+ memset(v4l2_ctrl, 0, sizeof(*v4l2_ctrl));
+ v4l2_ctrl->id = mapping->id;
+ v4l2_ctrl->type = mapping->v4l2_type;
+- strscpy(v4l2_ctrl->name, mapping->name, sizeof(v4l2_ctrl->name));
++ strscpy(v4l2_ctrl->name, uvc_map_get_name(mapping),
++ sizeof(v4l2_ctrl->name));
+ v4l2_ctrl->flags = 0;
+
+ if (!(ctrl->info.flags & UVC_CTRL_FLAG_GET_CUR))
+@@ -1127,12 +1243,31 @@ int uvc_query_v4l2_ctrl(struct uvc_video_chain *chain,
+ if (ret < 0)
+ return -ERESTARTSYS;
+
++ /* Check if the ctrl is a know class */
++ if (!(v4l2_ctrl->id & V4L2_CTRL_FLAG_NEXT_CTRL)) {
++ ret = uvc_query_v4l2_class(chain, v4l2_ctrl->id, 0, v4l2_ctrl);
++ if (!ret)
++ goto done;
++ }
++
+ ctrl = uvc_find_control(chain, v4l2_ctrl->id, &mapping);
+ if (ctrl == NULL) {
+ ret = -EINVAL;
+ goto done;
+ }
+
++ /*
++ * If we're enumerating control with V4L2_CTRL_FLAG_NEXT_CTRL, check if
++ * a class should be inserted between the previous control and the one
++ * we have just found.
++ */
++ if (v4l2_ctrl->id & V4L2_CTRL_FLAG_NEXT_CTRL) {
++ ret = uvc_query_v4l2_class(chain, v4l2_ctrl->id, mapping->id,
++ v4l2_ctrl);
++ if (!ret)
++ goto done;
++ }
++
+ ret = __uvc_query_v4l2_ctrl(chain, ctrl, mapping, v4l2_ctrl);
+ done:
+ mutex_unlock(&chain->ctrl_mutex);
+@@ -1325,6 +1460,10 @@ static void uvc_ctrl_status_event_work(struct work_struct *work)
+
+ uvc_ctrl_status_event(w->chain, w->ctrl, w->data);
+
++ /* The barrier is needed to synchronize with uvc_status_stop(). */
++ if (smp_load_acquire(&dev->flush_status))
++ return;
++
+ /* Resubmit the URB. */
+ w->urb->interval = dev->int_ep->desc.bInterval;
+ ret = usb_submit_urb(w->urb, GFP_KERNEL);
+@@ -1426,6 +1565,11 @@ static int uvc_ctrl_add_event(struct v4l2_subscribed_event *sev, unsigned elems)
+ if (ret < 0)
+ return -ERESTARTSYS;
+
++ if (__uvc_query_v4l2_class(handle->chain, sev->id, 0) >= 0) {
++ ret = 0;
++ goto done;
++ }
++
+ ctrl = uvc_find_control(handle->chain, sev->id, &mapping);
+ if (ctrl == NULL) {
+ ret = -EINVAL;
+@@ -1459,7 +1603,10 @@ static void uvc_ctrl_del_event(struct v4l2_subscribed_event *sev)
+ struct uvc_fh *handle = container_of(sev->fh, struct uvc_fh, vfh);
+
+ mutex_lock(&handle->chain->ctrl_mutex);
++ if (__uvc_query_v4l2_class(handle->chain, sev->id, 0) >= 0)
++ goto done;
+ list_del(&sev->node);
++done:
+ mutex_unlock(&handle->chain->ctrl_mutex);
+ }
+
+@@ -1577,6 +1724,9 @@ int uvc_ctrl_get(struct uvc_video_chain *chain,
+ struct uvc_control *ctrl;
+ struct uvc_control_mapping *mapping;
+
++ if (__uvc_query_v4l2_class(chain, xctrl->id, 0) >= 0)
++ return -EACCES;
++
+ ctrl = uvc_find_control(chain, xctrl->id, &mapping);
+ if (ctrl == NULL)
+ return -EINVAL;
+@@ -1596,6 +1746,9 @@ int uvc_ctrl_set(struct uvc_fh *handle,
+ s32 max;
+ int ret;
+
++ if (__uvc_query_v4l2_class(chain, xctrl->id, 0) >= 0)
++ return -EACCES;
++
+ ctrl = uvc_find_control(chain, xctrl->id, &mapping);
+ if (ctrl == NULL)
+ return -EINVAL;
+@@ -1667,21 +1820,10 @@ int uvc_ctrl_set(struct uvc_fh *handle,
+ * needs to be loaded from the device to perform the read-modify-write
+ * operation.
+ */
+- if (!ctrl->loaded && (ctrl->info.size * 8) != mapping->size) {
+- if ((ctrl->info.flags & UVC_CTRL_FLAG_GET_CUR) == 0) {
+- memset(uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT),
+- 0, ctrl->info.size);
+- } else {
+- ret = uvc_query_ctrl(chain->dev, UVC_GET_CUR,
+- ctrl->entity->id, chain->dev->intfnum,
+- ctrl->info.selector,
+- uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT),
+- ctrl->info.size);
+- if (ret < 0)
+- return ret;
+- }
+-
+- ctrl->loaded = 1;
++ if ((ctrl->info.size * 8) != mapping->size) {
++ ret = __uvc_ctrl_load_cur(chain, ctrl);
++ if (ret < 0)
++ return ret;
+ }
+
+ /* Backup the current value in case we need to rollback later. */
+@@ -2057,11 +2199,12 @@ static int uvc_ctrl_add_info(struct uvc_device *dev, struct uvc_control *ctrl,
+ /*
+ * Add a control mapping to a given control.
+ */
+-static int __uvc_ctrl_add_mapping(struct uvc_device *dev,
++static int __uvc_ctrl_add_mapping(struct uvc_video_chain *chain,
+ struct uvc_control *ctrl, const struct uvc_control_mapping *mapping)
+ {
+ struct uvc_control_mapping *map;
+ unsigned int size;
++ unsigned int i;
+
+ /* Most mappings come from static kernel data and need to be duplicated.
+ * Mappings that come from userspace will be unnecessarily duplicated,
+@@ -2085,9 +2228,18 @@ static int __uvc_ctrl_add_mapping(struct uvc_device *dev,
+ if (map->set == NULL)
+ map->set = uvc_set_le_value;
+
++ for (i = 0; i < ARRAY_SIZE(uvc_control_classes); i++) {
++ if (V4L2_CTRL_ID2WHICH(uvc_control_classes[i]) ==
++ V4L2_CTRL_ID2WHICH(map->id)) {
++ chain->ctrl_class_bitmap |= BIT(i);
++ break;
++ }
++ }
++
+ list_add_tail(&map->list, &ctrl->info.mappings);
+- uvc_dbg(dev, CONTROL, "Adding mapping '%s' to control %pUl/%u\n",
+- map->name, ctrl->info.entity, ctrl->info.selector);
++ uvc_dbg(chain->dev, CONTROL, "Adding mapping '%s' to control %pUl/%u\n",
++ uvc_map_get_name(map), ctrl->info.entity,
++ ctrl->info.selector);
+
+ return 0;
+ }
+@@ -2105,7 +2257,7 @@ int uvc_ctrl_add_mapping(struct uvc_video_chain *chain,
+ if (mapping->id & ~V4L2_CTRL_ID_MASK) {
+ uvc_dbg(dev, CONTROL,
+ "Can't add mapping '%s', control id 0x%08x is invalid\n",
+- mapping->name, mapping->id);
++ uvc_map_get_name(mapping), mapping->id);
+ return -EINVAL;
+ }
+
+@@ -2152,7 +2304,7 @@ int uvc_ctrl_add_mapping(struct uvc_video_chain *chain,
+ if (mapping->id == map->id) {
+ uvc_dbg(dev, CONTROL,
+ "Can't add mapping '%s', control id 0x%08x already exists\n",
+- mapping->name, mapping->id);
++ uvc_map_get_name(mapping), mapping->id);
+ ret = -EEXIST;
+ goto done;
+ }
+@@ -2163,12 +2315,12 @@ int uvc_ctrl_add_mapping(struct uvc_video_chain *chain,
+ atomic_dec(&dev->nmappings);
+ uvc_dbg(dev, CONTROL,
+ "Can't add mapping '%s', maximum mappings count (%u) exceeded\n",
+- mapping->name, UVC_MAX_CONTROL_MAPPINGS);
++ uvc_map_get_name(mapping), UVC_MAX_CONTROL_MAPPINGS);
+ ret = -ENOMEM;
+ goto done;
+ }
+
+- ret = __uvc_ctrl_add_mapping(dev, ctrl, mapping);
++ ret = __uvc_ctrl_add_mapping(chain, ctrl, mapping);
+ if (ret < 0)
+ atomic_dec(&dev->nmappings);
+
+@@ -2244,7 +2396,8 @@ static void uvc_ctrl_prune_entity(struct uvc_device *dev,
+ * Add control information and hardcoded stock control mappings to the given
+ * device.
+ */
+-static void uvc_ctrl_init_ctrl(struct uvc_device *dev, struct uvc_control *ctrl)
++static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain,
++ struct uvc_control *ctrl)
+ {
+ const struct uvc_control_info *info = uvc_ctrls;
+ const struct uvc_control_info *iend = info + ARRAY_SIZE(uvc_ctrls);
+@@ -2263,14 +2416,14 @@ static void uvc_ctrl_init_ctrl(struct uvc_device *dev, struct uvc_control *ctrl)
+ for (; info < iend; ++info) {
+ if (uvc_entity_match_guid(ctrl->entity, info->entity) &&
+ ctrl->index == info->index) {
+- uvc_ctrl_add_info(dev, ctrl, info);
++ uvc_ctrl_add_info(chain->dev, ctrl, info);
+ /*
+ * Retrieve control flags from the device. Ignore errors
+ * and work with default flag values from the uvc_ctrl
+ * array when the device doesn't properly implement
+ * GET_INFO on standard controls.
+ */
+- uvc_ctrl_get_flags(dev, ctrl, &ctrl->info);
++ uvc_ctrl_get_flags(chain->dev, ctrl, &ctrl->info);
+ break;
+ }
+ }
+@@ -2281,22 +2434,20 @@ static void uvc_ctrl_init_ctrl(struct uvc_device *dev, struct uvc_control *ctrl)
+ for (; mapping < mend; ++mapping) {
+ if (uvc_entity_match_guid(ctrl->entity, mapping->entity) &&
+ ctrl->info.selector == mapping->selector)
+- __uvc_ctrl_add_mapping(dev, ctrl, mapping);
++ __uvc_ctrl_add_mapping(chain, ctrl, mapping);
+ }
+ }
+
+ /*
+ * Initialize device controls.
+ */
+-int uvc_ctrl_init_device(struct uvc_device *dev)
++static int uvc_ctrl_init_chain(struct uvc_video_chain *chain)
+ {
+ struct uvc_entity *entity;
+ unsigned int i;
+
+- INIT_WORK(&dev->async_ctrl.work, uvc_ctrl_status_event_work);
+-
+ /* Walk the entities list and instantiate controls */
+- list_for_each_entry(entity, &dev->entities, list) {
++ list_for_each_entry(entity, &chain->entities, chain) {
+ struct uvc_control *ctrl;
+ unsigned int bControlSize = 0, ncontrols;
+ u8 *bmControls = NULL;
+@@ -2316,7 +2467,7 @@ int uvc_ctrl_init_device(struct uvc_device *dev)
+ }
+
+ /* Remove bogus/blacklisted controls */
+- uvc_ctrl_prune_entity(dev, entity);
++ uvc_ctrl_prune_entity(chain->dev, entity);
+
+ /* Count supported controls and allocate the controls array */
+ ncontrols = memweight(bmControls, bControlSize);
+@@ -2338,7 +2489,7 @@ int uvc_ctrl_init_device(struct uvc_device *dev)
+ ctrl->entity = entity;
+ ctrl->index = i;
+
+- uvc_ctrl_init_ctrl(dev, ctrl);
++ uvc_ctrl_init_ctrl(chain, ctrl);
+ ctrl++;
+ }
+ }
+@@ -2346,6 +2497,22 @@ int uvc_ctrl_init_device(struct uvc_device *dev)
+ return 0;
+ }
+
++int uvc_ctrl_init_device(struct uvc_device *dev)
++{
++ struct uvc_video_chain *chain;
++ int ret;
++
++ INIT_WORK(&dev->async_ctrl.work, uvc_ctrl_status_event_work);
++
++ list_for_each_entry(chain, &dev->chains, list) {
++ ret = uvc_ctrl_init_chain(chain);
++ if (ret)
++ return ret;
++ }
++
++ return 0;
++}
++
+ /*
+ * Cleanup device controls.
+ */
+@@ -2357,6 +2524,7 @@ static void uvc_ctrl_cleanup_mappings(struct uvc_device *dev,
+ list_for_each_entry_safe(mapping, nm, &ctrl->info.mappings, list) {
+ list_del(&mapping->list);
+ kfree(mapping->menu_info);
++ kfree(mapping->name);
+ kfree(mapping);
+ }
+ }
+diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
+index 9a791d8ef200d..b19c75a6f5952 100644
+--- a/drivers/media/usb/uvc/uvc_driver.c
++++ b/drivers/media/usb/uvc/uvc_driver.c
+@@ -530,18 +530,17 @@ static int uvc_parse_format(struct uvc_device *dev,
+ /* Find the format descriptor from its GUID. */
+ fmtdesc = uvc_format_by_guid(&buffer[5]);
+
+- if (fmtdesc != NULL) {
+- strscpy(format->name, fmtdesc->name,
+- sizeof(format->name));
+- format->fcc = fmtdesc->fcc;
+- } else {
++ if (!fmtdesc) {
++ /*
++ * Unknown video formats are not fatal errors, the
++ * caller will skip this descriptor.
++ */
+ dev_info(&streaming->intf->dev,
+ "Unknown video format %pUl\n", &buffer[5]);
+- snprintf(format->name, sizeof(format->name), "%pUl\n",
+- &buffer[5]);
+- format->fcc = 0;
++ return 0;
+ }
+
++ format->fcc = fmtdesc->fcc;
+ format->bpp = buffer[21];
+
+ /* Some devices report a format that doesn't match what they
+@@ -549,8 +548,6 @@ static int uvc_parse_format(struct uvc_device *dev,
+ */
+ if (dev->quirks & UVC_QUIRK_FORCE_Y8) {
+ if (format->fcc == V4L2_PIX_FMT_YUYV) {
+- strscpy(format->name, "Greyscale 8-bit (Y8 )",
+- sizeof(format->name));
+ format->fcc = V4L2_PIX_FMT_GREY;
+ format->bpp = 8;
+ width_multiplier = 2;
+@@ -591,7 +588,6 @@ static int uvc_parse_format(struct uvc_device *dev,
+ return -EINVAL;
+ }
+
+- strscpy(format->name, "MJPEG", sizeof(format->name));
+ format->fcc = V4L2_PIX_FMT_MJPEG;
+ format->flags = UVC_FMT_FLAG_COMPRESSED;
+ format->bpp = 0;
+@@ -607,17 +603,7 @@ static int uvc_parse_format(struct uvc_device *dev,
+ return -EINVAL;
+ }
+
+- switch (buffer[8] & 0x7f) {
+- case 0:
+- strscpy(format->name, "SD-DV", sizeof(format->name));
+- break;
+- case 1:
+- strscpy(format->name, "SDL-DV", sizeof(format->name));
+- break;
+- case 2:
+- strscpy(format->name, "HD-DV", sizeof(format->name));
+- break;
+- default:
++ if ((buffer[8] & 0x7f) > 2) {
+ uvc_dbg(dev, DESCR,
+ "device %d videostreaming interface %d: unknown DV format %u\n",
+ dev->udev->devnum,
+@@ -625,9 +611,6 @@ static int uvc_parse_format(struct uvc_device *dev,
+ return -EINVAL;
+ }
+
+- strlcat(format->name, buffer[8] & (1 << 7) ? " 60Hz" : " 50Hz",
+- sizeof(format->name));
+-
+ format->fcc = V4L2_PIX_FMT_DV;
+ format->flags = UVC_FMT_FLAG_COMPRESSED | UVC_FMT_FLAG_STREAM;
+ format->bpp = 0;
+@@ -654,7 +637,7 @@ static int uvc_parse_format(struct uvc_device *dev,
+ return -EINVAL;
+ }
+
+- uvc_dbg(dev, DESCR, "Found format %s\n", format->name);
++ uvc_dbg(dev, DESCR, "Found format %p4cc", &format->fcc);
+
+ buflen -= buffer[0];
+ buffer += buffer[0];
+@@ -965,7 +948,7 @@ static int uvc_parse_streaming(struct uvc_device *dev,
+ interval = (u32 *)&frame[nframes];
+
+ streaming->format = format;
+- streaming->nformats = nformats;
++ streaming->nformats = 0;
+
+ /* Parse the format descriptors. */
+ while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE) {
+@@ -979,7 +962,10 @@ static int uvc_parse_streaming(struct uvc_device *dev,
+ &interval, buffer, buflen);
+ if (ret < 0)
+ goto error;
++ if (!ret)
++ break;
+
++ streaming->nformats++;
+ frame += format->nframes;
+ format++;
+
+@@ -1151,10 +1137,8 @@ static int uvc_parse_vendor_control(struct uvc_device *dev,
+ + n;
+ memcpy(unit->extension.bmControls, &buffer[23+p], 2*n);
+
+- if (buffer[24+p+2*n] != 0)
+- usb_string(udev, buffer[24+p+2*n], unit->name,
+- sizeof(unit->name));
+- else
++ if (buffer[24+p+2*n] == 0 ||
++ usb_string(udev, buffer[24+p+2*n], unit->name, sizeof(unit->name)) < 0)
+ sprintf(unit->name, "Extension %u", buffer[3]);
+
+ list_add_tail(&unit->list, &dev->entities);
+@@ -1278,15 +1262,15 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
+ memcpy(term->media.bmTransportModes, &buffer[10+n], p);
+ }
+
+- if (buffer[7] != 0)
+- usb_string(udev, buffer[7], term->name,
+- sizeof(term->name));
+- else if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA)
+- sprintf(term->name, "Camera %u", buffer[3]);
+- else if (UVC_ENTITY_TYPE(term) == UVC_ITT_MEDIA_TRANSPORT_INPUT)
+- sprintf(term->name, "Media %u", buffer[3]);
+- else
+- sprintf(term->name, "Input %u", buffer[3]);
++ if (buffer[7] == 0 ||
++ usb_string(udev, buffer[7], term->name, sizeof(term->name)) < 0) {
++ if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA)
++ sprintf(term->name, "Camera %u", buffer[3]);
++ if (UVC_ENTITY_TYPE(term) == UVC_ITT_MEDIA_TRANSPORT_INPUT)
++ sprintf(term->name, "Media %u", buffer[3]);
++ else
++ sprintf(term->name, "Input %u", buffer[3]);
++ }
+
+ list_add_tail(&term->list, &dev->entities);
+ break;
+@@ -1318,10 +1302,8 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
+
+ memcpy(term->baSourceID, &buffer[7], 1);
+
+- if (buffer[8] != 0)
+- usb_string(udev, buffer[8], term->name,
+- sizeof(term->name));
+- else
++ if (buffer[8] == 0 ||
++ usb_string(udev, buffer[8], term->name, sizeof(term->name)) < 0)
+ sprintf(term->name, "Output %u", buffer[3]);
+
+ list_add_tail(&term->list, &dev->entities);
+@@ -1343,10 +1325,8 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
+
+ memcpy(unit->baSourceID, &buffer[5], p);
+
+- if (buffer[5+p] != 0)
+- usb_string(udev, buffer[5+p], unit->name,
+- sizeof(unit->name));
+- else
++ if (buffer[5+p] == 0 ||
++ usb_string(udev, buffer[5+p], unit->name, sizeof(unit->name)) < 0)
+ sprintf(unit->name, "Selector %u", buffer[3]);
+
+ list_add_tail(&unit->list, &dev->entities);
+@@ -1376,10 +1356,8 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
+ if (dev->uvc_version >= 0x0110)
+ unit->processing.bmVideoStandards = buffer[9+n];
+
+- if (buffer[8+n] != 0)
+- usb_string(udev, buffer[8+n], unit->name,
+- sizeof(unit->name));
+- else
++ if (buffer[8+n] == 0 ||
++ usb_string(udev, buffer[8+n], unit->name, sizeof(unit->name)) < 0)
+ sprintf(unit->name, "Processing %u", buffer[3]);
+
+ list_add_tail(&unit->list, &dev->entities);
+@@ -1407,10 +1385,8 @@ static int uvc_parse_standard_control(struct uvc_device *dev,
+ unit->extension.bmControls = (u8 *)unit + sizeof(*unit);
+ memcpy(unit->extension.bmControls, &buffer[23+p], n);
+
+- if (buffer[23+p+n] != 0)
+- usb_string(udev, buffer[23+p+n], unit->name,
+- sizeof(unit->name));
+- else
++ if (buffer[23+p+n] == 0 ||
++ usb_string(udev, buffer[23+p+n], unit->name, sizeof(unit->name)) < 0)
+ sprintf(unit->name, "Extension %u", buffer[3]);
+
+ list_add_tail(&unit->list, &dev->entities);
+@@ -1534,10 +1510,6 @@ static int uvc_gpio_parse(struct uvc_device *dev)
+ if (IS_ERR_OR_NULL(gpio_privacy))
+ return PTR_ERR_OR_ZERO(gpio_privacy);
+
+- unit = uvc_alloc_entity(UVC_EXT_GPIO_UNIT, UVC_EXT_GPIO_UNIT_ID, 0, 1);
+- if (!unit)
+- return -ENOMEM;
+-
+ irq = gpiod_to_irq(gpio_privacy);
+ if (irq < 0) {
+ if (irq != EPROBE_DEFER)
+@@ -1546,6 +1518,10 @@ static int uvc_gpio_parse(struct uvc_device *dev)
+ return irq;
+ }
+
++ unit = uvc_alloc_entity(UVC_EXT_GPIO_UNIT, UVC_EXT_GPIO_UNIT_ID, 0, 1);
++ if (!unit)
++ return -ENOMEM;
++
+ unit->gpio.gpio_privacy = gpio_privacy;
+ unit->gpio.irq = irq;
+ unit->gpio.bControlSize = 1;
+@@ -2455,14 +2431,14 @@ static int uvc_probe(struct usb_interface *intf,
+ if (v4l2_device_register(&intf->dev, &dev->vdev) < 0)
+ goto error;
+
+- /* Initialize controls. */
+- if (uvc_ctrl_init_device(dev) < 0)
+- goto error;
+-
+ /* Scan the device for video chains. */
+ if (uvc_scan_device(dev) < 0)
+ goto error;
+
++ /* Initialize controls. */
++ if (uvc_ctrl_init_device(dev) < 0)
++ goto error;
++
+ /* Register video device nodes. */
+ if (uvc_register_chains(dev) < 0)
+ goto error;
+@@ -2714,6 +2690,24 @@ static const struct usb_device_id uvc_ids[] = {
+ .bInterfaceSubClass = 1,
+ .bInterfaceProtocol = 0,
+ .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax },
++ /* Logitech, Webcam C910 */
++ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE
++ | USB_DEVICE_ID_MATCH_INT_INFO,
++ .idVendor = 0x046d,
++ .idProduct = 0x0821,
++ .bInterfaceClass = USB_CLASS_VIDEO,
++ .bInterfaceSubClass = 1,
++ .bInterfaceProtocol = 0,
++ .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_WAKE_AUTOSUSPEND)},
++ /* Logitech, Webcam B910 */
++ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE
++ | USB_DEVICE_ID_MATCH_INT_INFO,
++ .idVendor = 0x046d,
++ .idProduct = 0x0823,
++ .bInterfaceClass = USB_CLASS_VIDEO,
++ .bInterfaceSubClass = 1,
++ .bInterfaceProtocol = 0,
++ .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_WAKE_AUTOSUSPEND)},
+ /* Logitech Quickcam Fusion */
+ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE
+ | USB_DEVICE_ID_MATCH_INT_INFO,
+diff --git a/drivers/media/usb/uvc/uvc_entity.c b/drivers/media/usb/uvc/uvc_entity.c
+index 7c4d2f93d3513..cc68dd24eb42d 100644
+--- a/drivers/media/usb/uvc/uvc_entity.c
++++ b/drivers/media/usb/uvc/uvc_entity.c
+@@ -37,7 +37,7 @@ static int uvc_mc_create_links(struct uvc_video_chain *chain,
+ continue;
+
+ remote = uvc_entity_by_id(chain->dev, entity->baSourceID[i]);
+- if (remote == NULL)
++ if (remote == NULL || remote->num_pads == 0)
+ return -EINVAL;
+
+ source = (UVC_ENTITY_TYPE(remote) == UVC_TT_STREAMING)
+diff --git a/drivers/media/usb/uvc/uvc_status.c b/drivers/media/usb/uvc/uvc_status.c
+index 753c8226db707..3fa658b86c82b 100644
+--- a/drivers/media/usb/uvc/uvc_status.c
++++ b/drivers/media/usb/uvc/uvc_status.c
+@@ -6,6 +6,7 @@
+ * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ */
+
++#include <asm/barrier.h>
+ #include <linux/kernel.h>
+ #include <linux/input.h>
+ #include <linux/slab.h>
+@@ -309,5 +310,41 @@ int uvc_status_start(struct uvc_device *dev, gfp_t flags)
+
+ void uvc_status_stop(struct uvc_device *dev)
+ {
++ struct uvc_ctrl_work *w = &dev->async_ctrl;
++
++ /*
++ * Prevent the asynchronous control handler from requeing the URB. The
++ * barrier is needed so the flush_status change is visible to other
++ * CPUs running the asynchronous handler before usb_kill_urb() is
++ * called below.
++ */
++ smp_store_release(&dev->flush_status, true);
++
++ /*
++ * Cancel any pending asynchronous work. If any status event was queued,
++ * process it synchronously.
++ */
++ if (cancel_work_sync(&w->work))
++ uvc_ctrl_status_event(w->chain, w->ctrl, w->data);
++
++ /* Kill the urb. */
+ usb_kill_urb(dev->int_urb);
++
++ /*
++ * The URB completion handler may have queued asynchronous work. This
++ * won't resubmit the URB as flush_status is set, but it needs to be
++ * cancelled before returning or it could then race with a future
++ * uvc_status_start() call.
++ */
++ if (cancel_work_sync(&w->work))
++ uvc_ctrl_status_event(w->chain, w->ctrl, w->data);
++
++ /*
++ * From this point, there are no events on the queue and the status URB
++ * is dead. No events will be queued until uvc_status_start() is called.
++ * The barrier is needed to make sure that flush_status is visible to
++ * uvc_ctrl_status_event_work() when uvc_status_start() will be called
++ * again.
++ */
++ smp_store_release(&dev->flush_status, false);
+ }
+diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
+index 6acb8013de08b..ab535e5501583 100644
+--- a/drivers/media/usb/uvc/uvc_v4l2.c
++++ b/drivers/media/usb/uvc/uvc_v4l2.c
+@@ -40,7 +40,15 @@ static int uvc_ioctl_ctrl_map(struct uvc_video_chain *chain,
+ return -ENOMEM;
+
+ map->id = xmap->id;
+- memcpy(map->name, xmap->name, sizeof(map->name));
++ /* Non standard control id. */
++ if (v4l2_ctrl_get_name(map->id) == NULL) {
++ map->name = kmemdup(xmap->name, sizeof(xmap->name),
++ GFP_KERNEL);
++ if (!map->name) {
++ ret = -ENOMEM;
++ goto free_map;
++ }
++ }
+ memcpy(map->entity, xmap->entity, sizeof(map->entity));
+ map->selector = xmap->selector;
+ map->size = xmap->size;
+@@ -472,10 +480,13 @@ static int uvc_v4l2_set_streamparm(struct uvc_streaming *stream,
+ uvc_simplify_fraction(&timeperframe.numerator,
+ &timeperframe.denominator, 8, 333);
+
+- if (parm->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
++ if (parm->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+ parm->parm.capture.timeperframe = timeperframe;
+- else
++ parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME;
++ } else {
+ parm->parm.output.timeperframe = timeperframe;
++ parm->parm.output.capability = V4L2_CAP_TIMEPERFRAME;
++ }
+
+ return 0;
+ }
+@@ -646,8 +657,6 @@ static int uvc_ioctl_enum_fmt(struct uvc_streaming *stream,
+ fmt->flags = 0;
+ if (format->flags & UVC_FMT_FLAG_COMPRESSED)
+ fmt->flags |= V4L2_FMT_FLAG_COMPRESSED;
+- strscpy(fmt->description, format->name, sizeof(fmt->description));
+- fmt->description[sizeof(fmt->description) - 1] = 0;
+ fmt->pixelformat = format->fcc;
+ return 0;
+ }
+@@ -861,29 +870,31 @@ static int uvc_ioctl_enum_input(struct file *file, void *fh,
+ struct uvc_video_chain *chain = handle->chain;
+ const struct uvc_entity *selector = chain->selector;
+ struct uvc_entity *iterm = NULL;
++ struct uvc_entity *it;
+ u32 index = input->index;
+- int pin = 0;
+
+ if (selector == NULL ||
+ (chain->dev->quirks & UVC_QUIRK_IGNORE_SELECTOR_UNIT)) {
+ if (index != 0)
+ return -EINVAL;
+- list_for_each_entry(iterm, &chain->entities, chain) {
+- if (UVC_ENTITY_IS_ITERM(iterm))
++ list_for_each_entry(it, &chain->entities, chain) {
++ if (UVC_ENTITY_IS_ITERM(it)) {
++ iterm = it;
+ break;
++ }
+ }
+- pin = iterm->id;
+ } else if (index < selector->bNrInPins) {
+- pin = selector->baSourceID[index];
+- list_for_each_entry(iterm, &chain->entities, chain) {
+- if (!UVC_ENTITY_IS_ITERM(iterm))
++ list_for_each_entry(it, &chain->entities, chain) {
++ if (!UVC_ENTITY_IS_ITERM(it))
+ continue;
+- if (iterm->id == pin)
++ if (it->id == selector->baSourceID[index]) {
++ iterm = it;
+ break;
++ }
+ }
+ }
+
+- if (iterm == NULL || iterm->id != pin)
++ if (iterm == NULL)
+ return -EINVAL;
+
+ memset(input, 0, sizeof(*input));
+@@ -995,58 +1006,23 @@ static int uvc_ioctl_query_ext_ctrl(struct file *file, void *fh,
+ return 0;
+ }
+
+-static int uvc_ioctl_g_ctrl(struct file *file, void *fh,
+- struct v4l2_control *ctrl)
+-{
+- struct uvc_fh *handle = fh;
+- struct uvc_video_chain *chain = handle->chain;
+- struct v4l2_ext_control xctrl;
+- int ret;
+-
+- memset(&xctrl, 0, sizeof(xctrl));
+- xctrl.id = ctrl->id;
+-
+- ret = uvc_ctrl_begin(chain);
+- if (ret < 0)
+- return ret;
+-
+- ret = uvc_ctrl_get(chain, &xctrl);
+- uvc_ctrl_rollback(handle);
+- if (ret < 0)
+- return ret;
+-
+- ctrl->value = xctrl.value;
+- return 0;
+-}
+-
+-static int uvc_ioctl_s_ctrl(struct file *file, void *fh,
+- struct v4l2_control *ctrl)
++static int uvc_ctrl_check_access(struct uvc_video_chain *chain,
++ struct v4l2_ext_controls *ctrls,
++ unsigned long ioctl)
+ {
+- struct uvc_fh *handle = fh;
+- struct uvc_video_chain *chain = handle->chain;
+- struct v4l2_ext_control xctrl;
+- int ret;
+-
+- memset(&xctrl, 0, sizeof(xctrl));
+- xctrl.id = ctrl->id;
+- xctrl.value = ctrl->value;
+-
+- ret = uvc_ctrl_begin(chain);
+- if (ret < 0)
+- return ret;
++ struct v4l2_ext_control *ctrl = ctrls->controls;
++ unsigned int i;
++ int ret = 0;
+
+- ret = uvc_ctrl_set(handle, &xctrl);
+- if (ret < 0) {
+- uvc_ctrl_rollback(handle);
+- return ret;
++ for (i = 0; i < ctrls->count; ++ctrl, ++i) {
++ ret = uvc_ctrl_is_accessible(chain, ctrl->id, ctrls, ioctl);
++ if (ret)
++ break;
+ }
+
+- ret = uvc_ctrl_commit(handle, &xctrl, 1);
+- if (ret < 0)
+- return ret;
++ ctrls->error_idx = ioctl == VIDIOC_TRY_EXT_CTRLS ? i : ctrls->count;
+
+- ctrl->value = xctrl.value;
+- return 0;
++ return ret;
+ }
+
+ static int uvc_ioctl_g_ext_ctrls(struct file *file, void *fh,
+@@ -1058,6 +1034,10 @@ static int uvc_ioctl_g_ext_ctrls(struct file *file, void *fh,
+ unsigned int i;
+ int ret;
+
++ ret = uvc_ctrl_check_access(chain, ctrls, VIDIOC_G_EXT_CTRLS);
++ if (ret < 0)
++ return ret;
++
+ if (ctrls->which == V4L2_CTRL_WHICH_DEF_VAL) {
+ for (i = 0; i < ctrls->count; ++ctrl, ++i) {
+ struct v4l2_queryctrl qc = { .id = ctrl->id };
+@@ -1094,16 +1074,16 @@ static int uvc_ioctl_g_ext_ctrls(struct file *file, void *fh,
+
+ static int uvc_ioctl_s_try_ext_ctrls(struct uvc_fh *handle,
+ struct v4l2_ext_controls *ctrls,
+- bool commit)
++ unsigned long ioctl)
+ {
+ struct v4l2_ext_control *ctrl = ctrls->controls;
+ struct uvc_video_chain *chain = handle->chain;
+ unsigned int i;
+ int ret;
+
+- /* Default value cannot be changed */
+- if (ctrls->which == V4L2_CTRL_WHICH_DEF_VAL)
+- return -EINVAL;
++ ret = uvc_ctrl_check_access(chain, ctrls, ioctl);
++ if (ret < 0)
++ return ret;
+
+ ret = uvc_ctrl_begin(chain);
+ if (ret < 0)
+@@ -1113,14 +1093,15 @@ static int uvc_ioctl_s_try_ext_ctrls(struct uvc_fh *handle,
+ ret = uvc_ctrl_set(handle, ctrl);
+ if (ret < 0) {
+ uvc_ctrl_rollback(handle);
+- ctrls->error_idx = commit ? ctrls->count : i;
++ ctrls->error_idx = ioctl == VIDIOC_S_EXT_CTRLS ?
++ ctrls->count : i;
+ return ret;
+ }
+ }
+
+ ctrls->error_idx = 0;
+
+- if (commit)
++ if (ioctl == VIDIOC_S_EXT_CTRLS)
+ return uvc_ctrl_commit(handle, ctrls->controls, ctrls->count);
+ else
+ return uvc_ctrl_rollback(handle);
+@@ -1131,7 +1112,7 @@ static int uvc_ioctl_s_ext_ctrls(struct file *file, void *fh,
+ {
+ struct uvc_fh *handle = fh;
+
+- return uvc_ioctl_s_try_ext_ctrls(handle, ctrls, true);
++ return uvc_ioctl_s_try_ext_ctrls(handle, ctrls, VIDIOC_S_EXT_CTRLS);
+ }
+
+ static int uvc_ioctl_try_ext_ctrls(struct file *file, void *fh,
+@@ -1139,7 +1120,7 @@ static int uvc_ioctl_try_ext_ctrls(struct file *file, void *fh,
+ {
+ struct uvc_fh *handle = fh;
+
+- return uvc_ioctl_s_try_ext_ctrls(handle, ctrls, false);
++ return uvc_ioctl_s_try_ext_ctrls(handle, ctrls, VIDIOC_TRY_EXT_CTRLS);
+ }
+
+ static int uvc_ioctl_querymenu(struct file *file, void *fh,
+@@ -1538,8 +1519,6 @@ const struct v4l2_ioctl_ops uvc_ioctl_ops = {
+ .vidioc_s_input = uvc_ioctl_s_input,
+ .vidioc_queryctrl = uvc_ioctl_queryctrl,
+ .vidioc_query_ext_ctrl = uvc_ioctl_query_ext_ctrl,
+- .vidioc_g_ctrl = uvc_ioctl_g_ctrl,
+- .vidioc_s_ctrl = uvc_ioctl_s_ctrl,
+ .vidioc_g_ext_ctrls = uvc_ioctl_g_ext_ctrls,
+ .vidioc_s_ext_ctrls = uvc_ioctl_s_ext_ctrls,
+ .vidioc_try_ext_ctrls = uvc_ioctl_try_ext_ctrls,
+diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
+index e16464606b140..f477cfbbb905a 100644
+--- a/drivers/media/usb/uvc/uvc_video.c
++++ b/drivers/media/usb/uvc/uvc_video.c
+@@ -115,6 +115,11 @@ int uvc_query_ctrl(struct uvc_device *dev, u8 query, u8 unit,
+ case 5: /* Invalid unit */
+ case 6: /* Invalid control */
+ case 7: /* Invalid Request */
++ /*
++ * The firmware has not properly implemented
++ * the control or there has been a HW error.
++ */
++ return -EIO;
+ case 8: /* Invalid value within range */
+ return -EINVAL;
+ default: /* reserved or unknown */
+@@ -1329,7 +1334,9 @@ static void uvc_video_decode_meta(struct uvc_streaming *stream,
+ if (has_scr)
+ memcpy(stream->clock.last_scr, scr, 6);
+
+- memcpy(&meta->length, mem, length);
++ meta->length = mem[0];
++ meta->flags = mem[1];
++ memcpy(meta->buf, &mem[2], length - 2);
+ meta_buf->bytesused += length + sizeof(meta->ns) + sizeof(meta->sof);
+
+ uvc_dbg(stream->dev, FRAME,
+@@ -1946,6 +1953,17 @@ static int uvc_video_start_transfer(struct uvc_streaming *stream,
+ "Selecting alternate setting %u (%u B/frame bandwidth)\n",
+ altsetting, best_psize);
+
++ /*
++ * Some devices, namely the Logitech C910 and B910, are unable
++ * to recover from a USB autosuspend, unless the alternate
++ * setting of the streaming interface is toggled.
++ */
++ if (stream->dev->quirks & UVC_QUIRK_WAKE_AUTOSUSPEND) {
++ usb_set_interface(stream->dev->udev, intfnum,
++ altsetting);
++ usb_set_interface(stream->dev->udev, intfnum, 0);
++ }
++
+ ret = usb_set_interface(stream->dev->udev, intfnum, altsetting);
+ if (ret < 0)
+ return ret;
+@@ -1958,6 +1976,10 @@ static int uvc_video_start_transfer(struct uvc_streaming *stream,
+ if (ep == NULL)
+ return -EIO;
+
++ /* Reject broken descriptors. */
++ if (usb_endpoint_maxp(&ep->desc) == 0)
++ return -EIO;
++
+ ret = uvc_init_video_bulk(stream, ep, gfp_flags);
+ }
+
+diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h
+index cce5e38133cd3..1aa2cc98502d1 100644
+--- a/drivers/media/usb/uvc/uvcvideo.h
++++ b/drivers/media/usb/uvc/uvcvideo.h
+@@ -189,7 +189,7 @@
+ /* Maximum status buffer size in bytes of interrupt URB. */
+ #define UVC_MAX_STATUS_SIZE 16
+
+-#define UVC_CTRL_CONTROL_TIMEOUT 500
++#define UVC_CTRL_CONTROL_TIMEOUT 5000
+ #define UVC_CTRL_STREAMING_TIMEOUT 5000
+
+ /* Maximum allowed number of control mappings per device */
+@@ -209,6 +209,7 @@
+ #define UVC_QUIRK_RESTORE_CTRLS_ON_INIT 0x00000400
+ #define UVC_QUIRK_FORCE_Y8 0x00000800
+ #define UVC_QUIRK_FORCE_BPP 0x00001000
++#define UVC_QUIRK_WAKE_AUTOSUSPEND 0x00002000
+
+ /* Format flags */
+ #define UVC_FMT_FLAG_COMPRESSED 0x00000001
+@@ -241,7 +242,7 @@ struct uvc_control_mapping {
+ struct list_head ev_subs;
+
+ u32 id;
+- u8 name[32];
++ char *name;
+ u8 entity[16];
+ u8 selector;
+
+@@ -405,8 +406,6 @@ struct uvc_format {
+ u32 fcc;
+ u32 flags;
+
+- char name[32];
+-
+ unsigned int nframes;
+ struct uvc_frame *frame;
+ };
+@@ -476,6 +475,7 @@ struct uvc_video_chain {
+
+ struct v4l2_prio_state prio; /* V4L2 priority state */
+ u32 caps; /* V4L2 chain-wide caps */
++ u8 ctrl_class_bitmap; /* Bitmap of valid classes */
+ };
+
+ struct uvc_stats_frame {
+@@ -697,6 +697,7 @@ struct uvc_device {
+ /* Status Interrupt Endpoint */
+ struct usb_host_endpoint *int_ep;
+ struct urb *int_urb;
++ bool flush_status;
+ u8 *status;
+ struct input_dev *input;
+ char input_phys[64];
+@@ -900,6 +901,9 @@ static inline int uvc_ctrl_rollback(struct uvc_fh *handle)
+
+ int uvc_ctrl_get(struct uvc_video_chain *chain, struct v4l2_ext_control *xctrl);
+ int uvc_ctrl_set(struct uvc_fh *handle, struct v4l2_ext_control *xctrl);
++int uvc_ctrl_is_accessible(struct uvc_video_chain *chain, u32 v4l2_id,
++ const struct v4l2_ext_controls *ctrls,
++ unsigned long ioctl);
+
+ int uvc_xu_ctrl_query(struct uvc_video_chain *chain,
+ struct uvc_xu_control_query *xqry);
+diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+index 47aff3b197426..94037af1af2dc 100644
+--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
++++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+@@ -744,10 +744,6 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *p64,
+ /*
+ * x86 is the only compat architecture with different struct alignment
+ * between 32-bit and 64-bit tasks.
+- *
+- * On all other architectures, v4l2_event32 and v4l2_event32_time32 are
+- * the same as v4l2_event and v4l2_event_time32, so we can use the native
+- * handlers, converting v4l2_event to v4l2_event_time32 if necessary.
+ */
+ struct v4l2_event32 {
+ __u32 type;
+@@ -765,21 +761,6 @@ struct v4l2_event32 {
+ __u32 reserved[8];
+ };
+
+-#ifdef CONFIG_COMPAT_32BIT_TIME
+-struct v4l2_event32_time32 {
+- __u32 type;
+- union {
+- compat_s64 value64;
+- __u8 data[64];
+- } u;
+- __u32 pending;
+- __u32 sequence;
+- struct old_timespec32 timestamp;
+- __u32 id;
+- __u32 reserved[8];
+-};
+-#endif
+-
+ static int put_v4l2_event32(struct v4l2_event *p64,
+ struct v4l2_event32 __user *p32)
+ {
+@@ -795,7 +776,22 @@ static int put_v4l2_event32(struct v4l2_event *p64,
+ return 0;
+ }
+
++#endif
++
+ #ifdef CONFIG_COMPAT_32BIT_TIME
++struct v4l2_event32_time32 {
++ __u32 type;
++ union {
++ compat_s64 value64;
++ __u8 data[64];
++ } u;
++ __u32 pending;
++ __u32 sequence;
++ struct old_timespec32 timestamp;
++ __u32 id;
++ __u32 reserved[8];
++};
++
+ static int put_v4l2_event32_time32(struct v4l2_event *p64,
+ struct v4l2_event32_time32 __user *p32)
+ {
+@@ -811,7 +807,6 @@ static int put_v4l2_event32_time32(struct v4l2_event *p64,
+ return 0;
+ }
+ #endif
+-#endif
+
+ struct v4l2_edid32 {
+ __u32 pad;
+@@ -873,9 +868,7 @@ static int put_v4l2_edid32(struct v4l2_edid *p64,
+ #define VIDIOC_QUERYBUF32_TIME32 _IOWR('V', 9, struct v4l2_buffer32_time32)
+ #define VIDIOC_QBUF32_TIME32 _IOWR('V', 15, struct v4l2_buffer32_time32)
+ #define VIDIOC_DQBUF32_TIME32 _IOWR('V', 17, struct v4l2_buffer32_time32)
+-#ifdef CONFIG_X86_64
+ #define VIDIOC_DQEVENT32_TIME32 _IOR ('V', 89, struct v4l2_event32_time32)
+-#endif
+ #define VIDIOC_PREPARE_BUF32_TIME32 _IOWR('V', 93, struct v4l2_buffer32_time32)
+ #endif
+
+@@ -929,10 +922,10 @@ unsigned int v4l2_compat_translate_cmd(unsigned int cmd)
+ #ifdef CONFIG_X86_64
+ case VIDIOC_DQEVENT32:
+ return VIDIOC_DQEVENT;
++#endif
+ #ifdef CONFIG_COMPAT_32BIT_TIME
+ case VIDIOC_DQEVENT32_TIME32:
+ return VIDIOC_DQEVENT;
+-#endif
+ #endif
+ }
+ return cmd;
+@@ -1025,10 +1018,10 @@ int v4l2_compat_put_user(void __user *arg, void *parg, unsigned int cmd)
+ #ifdef CONFIG_X86_64
+ case VIDIOC_DQEVENT32:
+ return put_v4l2_event32(parg, arg);
++#endif
+ #ifdef CONFIG_COMPAT_32BIT_TIME
+ case VIDIOC_DQEVENT32_TIME32:
+ return put_v4l2_event32_time32(parg, arg);
+-#endif
+ #endif
+ }
+ return 0;
+@@ -1040,6 +1033,8 @@ int v4l2_compat_get_array_args(struct file *file, void *mbuf,
+ {
+ int err = 0;
+
++ memset(mbuf, 0, array_size);
++
+ switch (cmd) {
+ case VIDIOC_G_FMT32:
+ case VIDIOC_S_FMT32:
+diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c b/drivers/media/v4l2-core/v4l2-ctrls-core.c
+index c4b5082849b66..3798a57bbbd43 100644
+--- a/drivers/media/v4l2-core/v4l2-ctrls-core.c
++++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c
+@@ -113,6 +113,7 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
+ struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant;
+ struct v4l2_ctrl_vp8_frame *p_vp8_frame;
+ struct v4l2_ctrl_fwht_params *p_fwht_params;
++ struct v4l2_ctrl_h264_scaling_matrix *p_h264_scaling_matrix;
+ void *p = ptr.p + idx * ctrl->elem_size;
+
+ if (ctrl->p_def.p_const)
+@@ -160,6 +161,15 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
+ p_fwht_params->flags = V4L2_FWHT_FL_PIXENC_YUV |
+ (2 << V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET);
+ break;
++ case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
++ p_h264_scaling_matrix = p;
++ /*
++ * The default (flat) H.264 scaling matrix when none are
++ * specified in the bitstream, this is according to formulas
++ * (7-8) and (7-9) of the specification.
++ */
++ memset(p_h264_scaling_matrix, 16, sizeof(*p_h264_scaling_matrix));
++ break;
+ }
+ }
+
+@@ -1446,7 +1456,7 @@ struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl,
+ else if (type == V4L2_CTRL_TYPE_INTEGER_MENU)
+ qmenu_int = v4l2_ctrl_get_int_menu(id, &qmenu_int_len);
+
+- if ((!qmenu && !qmenu_int) || (qmenu_int && max > qmenu_int_len)) {
++ if ((!qmenu && !qmenu_int) || (qmenu_int && max >= qmenu_int_len)) {
+ handler_set_err(hdl, -EINVAL);
+ return NULL;
+ }
+diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
+index af48705c704f8..942d0005c55e8 100644
+--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
++++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
+@@ -145,6 +145,8 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t,
+ const struct v4l2_bt_timings *bt = &t->bt;
+ const struct v4l2_bt_timings_cap *cap = &dvcap->bt;
+ u32 caps = cap->capabilities;
++ const u32 max_vert = 10240;
++ u32 max_hor = 3 * bt->width;
+
+ if (t->type != V4L2_DV_BT_656_1120)
+ return false;
+@@ -161,6 +163,26 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t,
+ (bt->interlaced && !(caps & V4L2_DV_BT_CAP_INTERLACED)) ||
+ (!bt->interlaced && !(caps & V4L2_DV_BT_CAP_PROGRESSIVE)))
+ return false;
++
++ /* sanity checks for the blanking timings */
++ if (!bt->interlaced &&
++ (bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch))
++ return false;
++ /*
++ * Some video receivers cannot properly separate the frontporch,
++ * backporch and sync values, and instead they only have the total
++ * blanking. That can be assigned to any of these three fields.
++ * So just check that none of these are way out of range.
++ */
++ if (bt->hfrontporch > max_hor ||
++ bt->hsync > max_hor || bt->hbackporch > max_hor)
++ return false;
++ if (bt->vfrontporch > max_vert ||
++ bt->vsync > max_vert || bt->vbackporch > max_vert)
++ return false;
++ if (bt->interlaced && (bt->il_vfrontporch > max_vert ||
++ bt->il_vsync > max_vert || bt->il_vbackporch > max_vert))
++ return false;
+ return fnc == NULL || fnc(t, fnc_handle);
+ }
+ EXPORT_SYMBOL_GPL(v4l2_valid_dv_timings);
+diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
+index 843259c304bb5..5d2eaad1fa684 100644
+--- a/drivers/media/v4l2-core/v4l2-fwnode.c
++++ b/drivers/media/v4l2-core/v4l2-fwnode.c
+@@ -549,19 +549,29 @@ int v4l2_fwnode_parse_link(struct fwnode_handle *fwnode,
+ link->local_id = fwep.id;
+ link->local_port = fwep.port;
+ link->local_node = fwnode_graph_get_port_parent(fwnode);
++ if (!link->local_node)
++ return -ENOLINK;
+
+ fwnode = fwnode_graph_get_remote_endpoint(fwnode);
+- if (!fwnode) {
+- fwnode_handle_put(fwnode);
+- return -ENOLINK;
+- }
++ if (!fwnode)
++ goto err_put_local_node;
+
+ fwnode_graph_parse_endpoint(fwnode, &fwep);
+ link->remote_id = fwep.id;
+ link->remote_port = fwep.port;
+ link->remote_node = fwnode_graph_get_port_parent(fwnode);
++ if (!link->remote_node)
++ goto err_put_remote_endpoint;
+
+ return 0;
++
++err_put_remote_endpoint:
++ fwnode_handle_put(fwnode);
++
++err_put_local_node:
++ fwnode_handle_put(link->local_node);
++
++ return -ENOLINK;
+ }
+ EXPORT_SYMBOL_GPL(v4l2_fwnode_parse_link);
+
+diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
+index 05d5db3d85e58..7c596a85f34f5 100644
+--- a/drivers/media/v4l2-core/v4l2-ioctl.c
++++ b/drivers/media/v4l2-core/v4l2-ioctl.c
+@@ -279,8 +279,8 @@ static void v4l_print_format(const void *arg, bool write_only)
+ const struct v4l2_vbi_format *vbi;
+ const struct v4l2_sliced_vbi_format *sliced;
+ const struct v4l2_window *win;
+- const struct v4l2_sdr_format *sdr;
+ const struct v4l2_meta_format *meta;
++ u32 pixelformat;
+ u32 planes;
+ unsigned i;
+
+@@ -299,8 +299,9 @@ static void v4l_print_format(const void *arg, bool write_only)
+ case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
+ case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
+ mp = &p->fmt.pix_mp;
++ pixelformat = mp->pixelformat;
+ pr_cont(", width=%u, height=%u, format=%p4cc, field=%s, colorspace=%d, num_planes=%u, flags=0x%x, ycbcr_enc=%u, quantization=%u, xfer_func=%u\n",
+- mp->width, mp->height, &mp->pixelformat,
++ mp->width, mp->height, &pixelformat,
+ prt_names(mp->field, v4l2_field_names),
+ mp->colorspace, mp->num_planes, mp->flags,
+ mp->ycbcr_enc, mp->quantization, mp->xfer_func);
+@@ -343,14 +344,15 @@ static void v4l_print_format(const void *arg, bool write_only)
+ break;
+ case V4L2_BUF_TYPE_SDR_CAPTURE:
+ case V4L2_BUF_TYPE_SDR_OUTPUT:
+- sdr = &p->fmt.sdr;
+- pr_cont(", pixelformat=%p4cc\n", &sdr->pixelformat);
++ pixelformat = p->fmt.sdr.pixelformat;
++ pr_cont(", pixelformat=%p4cc\n", &pixelformat);
+ break;
+ case V4L2_BUF_TYPE_META_CAPTURE:
+ case V4L2_BUF_TYPE_META_OUTPUT:
+ meta = &p->fmt.meta;
++ pixelformat = meta->dataformat;
+ pr_cont(", dataformat=%p4cc, buffersize=%u\n",
+- &meta->dataformat, meta->buffersize);
++ &pixelformat, meta->buffersize);
+ break;
+ }
+ }
+@@ -869,7 +871,7 @@ static void v4l_print_default(const void *arg, bool write_only)
+ pr_cont("driver-specific ioctl\n");
+ }
+
+-static int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv)
++static bool check_ext_ctrls(struct v4l2_ext_controls *c, unsigned long ioctl)
+ {
+ __u32 i;
+
+@@ -878,23 +880,41 @@ static int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv)
+ for (i = 0; i < c->count; i++)
+ c->controls[i].reserved2[0] = 0;
+
+- /* V4L2_CID_PRIVATE_BASE cannot be used as control class
+- when using extended controls.
+- Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL
+- is it allowed for backwards compatibility.
+- */
+- if (!allow_priv && c->which == V4L2_CID_PRIVATE_BASE)
+- return 0;
+- if (!c->which)
+- return 1;
++ switch (c->which) {
++ case V4L2_CID_PRIVATE_BASE:
++ /*
++ * V4L2_CID_PRIVATE_BASE cannot be used as control class
++ * when using extended controls.
++ * Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL
++ * is it allowed for backwards compatibility.
++ */
++ if (ioctl == VIDIOC_G_CTRL || ioctl == VIDIOC_S_CTRL)
++ return false;
++ break;
++ case V4L2_CTRL_WHICH_DEF_VAL:
++ /* Default value cannot be changed */
++ if (ioctl == VIDIOC_S_EXT_CTRLS ||
++ ioctl == VIDIOC_TRY_EXT_CTRLS) {
++ c->error_idx = c->count;
++ return false;
++ }
++ return true;
++ case V4L2_CTRL_WHICH_CUR_VAL:
++ return true;
++ case V4L2_CTRL_WHICH_REQUEST_VAL:
++ c->error_idx = c->count;
++ return false;
++ }
++
+ /* Check that all controls are from the same control class. */
+ for (i = 0; i < c->count; i++) {
+ if (V4L2_CTRL_ID2WHICH(c->controls[i].id) != c->which) {
+- c->error_idx = i;
+- return 0;
++ c->error_idx = ioctl == VIDIOC_TRY_EXT_CTRLS ? i :
++ c->count;
++ return false;
+ }
+ }
+- return 1;
++ return true;
+ }
+
+ static int check_fmt(struct file *file, enum v4l2_buf_type type)
+@@ -2070,6 +2090,7 @@ static int v4l_prepare_buf(const struct v4l2_ioctl_ops *ops,
+ static int v4l_g_parm(const struct v4l2_ioctl_ops *ops,
+ struct file *file, void *fh, void *arg)
+ {
++ struct video_device *vfd = video_devdata(file);
+ struct v4l2_streamparm *p = arg;
+ v4l2_std_id std;
+ int ret = check_fmt(file, p->type);
+@@ -2081,7 +2102,8 @@ static int v4l_g_parm(const struct v4l2_ioctl_ops *ops,
+ if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE &&
+ p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
+ return -EINVAL;
+- p->parm.capture.readbuffers = 2;
++ if (vfd->device_caps & V4L2_CAP_READWRITE)
++ p->parm.capture.readbuffers = 2;
+ ret = ops->vidioc_g_std(file, fh, &std);
+ if (ret == 0)
+ v4l2_video_std_frame_period(std, &p->parm.capture.timeperframe);
+@@ -2187,7 +2209,7 @@ static int v4l_g_ctrl(const struct v4l2_ioctl_ops *ops,
+ ctrls.controls = &ctrl;
+ ctrl.id = p->id;
+ ctrl.value = p->value;
+- if (check_ext_ctrls(&ctrls, 1)) {
++ if (check_ext_ctrls(&ctrls, VIDIOC_G_CTRL)) {
+ int ret = ops->vidioc_g_ext_ctrls(file, fh, &ctrls);
+
+ if (ret == 0)
+@@ -2206,6 +2228,7 @@ static int v4l_s_ctrl(const struct v4l2_ioctl_ops *ops,
+ test_bit(V4L2_FL_USES_V4L2_FH, &vfd->flags) ? fh : NULL;
+ struct v4l2_ext_controls ctrls;
+ struct v4l2_ext_control ctrl;
++ int ret;
+
+ if (vfh && vfh->ctrl_handler)
+ return v4l2_s_ctrl(vfh, vfh->ctrl_handler, p);
+@@ -2221,9 +2244,11 @@ static int v4l_s_ctrl(const struct v4l2_ioctl_ops *ops,
+ ctrls.controls = &ctrl;
+ ctrl.id = p->id;
+ ctrl.value = p->value;
+- if (check_ext_ctrls(&ctrls, 1))
+- return ops->vidioc_s_ext_ctrls(file, fh, &ctrls);
+- return -EINVAL;
++ if (!check_ext_ctrls(&ctrls, VIDIOC_S_CTRL))
++ return -EINVAL;
++ ret = ops->vidioc_s_ext_ctrls(file, fh, &ctrls);
++ p->value = ctrl.value;
++ return ret;
+ }
+
+ static int v4l_g_ext_ctrls(const struct v4l2_ioctl_ops *ops,
+@@ -2243,8 +2268,8 @@ static int v4l_g_ext_ctrls(const struct v4l2_ioctl_ops *ops,
+ vfd, vfd->v4l2_dev->mdev, p);
+ if (ops->vidioc_g_ext_ctrls == NULL)
+ return -ENOTTY;
+- return check_ext_ctrls(p, 0) ? ops->vidioc_g_ext_ctrls(file, fh, p) :
+- -EINVAL;
++ return check_ext_ctrls(p, VIDIOC_G_EXT_CTRLS) ?
++ ops->vidioc_g_ext_ctrls(file, fh, p) : -EINVAL;
+ }
+
+ static int v4l_s_ext_ctrls(const struct v4l2_ioctl_ops *ops,
+@@ -2264,8 +2289,8 @@ static int v4l_s_ext_ctrls(const struct v4l2_ioctl_ops *ops,
+ vfd, vfd->v4l2_dev->mdev, p);
+ if (ops->vidioc_s_ext_ctrls == NULL)
+ return -ENOTTY;
+- return check_ext_ctrls(p, 0) ? ops->vidioc_s_ext_ctrls(file, fh, p) :
+- -EINVAL;
++ return check_ext_ctrls(p, VIDIOC_S_EXT_CTRLS) ?
++ ops->vidioc_s_ext_ctrls(file, fh, p) : -EINVAL;
+ }
+
+ static int v4l_try_ext_ctrls(const struct v4l2_ioctl_ops *ops,
+@@ -2285,8 +2310,8 @@ static int v4l_try_ext_ctrls(const struct v4l2_ioctl_ops *ops,
+ vfd, vfd->v4l2_dev->mdev, p);
+ if (ops->vidioc_try_ext_ctrls == NULL)
+ return -ENOTTY;
+- return check_ext_ctrls(p, 0) ? ops->vidioc_try_ext_ctrls(file, fh, p) :
+- -EINVAL;
++ return check_ext_ctrls(p, VIDIOC_TRY_EXT_CTRLS) ?
++ ops->vidioc_try_ext_ctrls(file, fh, p) : -EINVAL;
+ }
+
+ /*
+diff --git a/drivers/media/v4l2-core/v4l2-jpeg.c b/drivers/media/v4l2-core/v4l2-jpeg.c
+index c2513b775f6a7..94435a7b68169 100644
+--- a/drivers/media/v4l2-core/v4l2-jpeg.c
++++ b/drivers/media/v4l2-core/v4l2-jpeg.c
+@@ -460,7 +460,7 @@ static int jpeg_parse_app14_data(struct jpeg_stream *stream,
+ /* Check for "Adobe\0" in Ap1..6 */
+ if (stream->curr + 6 > stream->end ||
+ strncmp(stream->curr, "Adobe\0", 6))
+- return -EINVAL;
++ return jpeg_skip(stream, lp - 2);
+
+ /* get to Ap12 */
+ ret = jpeg_skip(stream, 11);
+@@ -474,7 +474,7 @@ static int jpeg_parse_app14_data(struct jpeg_stream *stream,
+ *tf = ret;
+
+ /* skip the rest of the segment, this ensures at least it is complete */
+- skip = lp - 2 - 11;
++ skip = lp - 2 - 11 - 1;
+ return jpeg_skip(stream, skip);
+ }
+
+diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
+index e7f4bf5bc8dd7..8aeed39c415f2 100644
+--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
++++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
+@@ -585,19 +585,14 @@ int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
+ }
+ EXPORT_SYMBOL_GPL(v4l2_m2m_reqbufs);
+
+-int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
+- struct v4l2_buffer *buf)
++static void v4l2_m2m_adjust_mem_offset(struct vb2_queue *vq,
++ struct v4l2_buffer *buf)
+ {
+- struct vb2_queue *vq;
+- int ret = 0;
+- unsigned int i;
+-
+- vq = v4l2_m2m_get_vq(m2m_ctx, buf->type);
+- ret = vb2_querybuf(vq, buf);
+-
+ /* Adjust MMAP memory offsets for the CAPTURE queue */
+ if (buf->memory == V4L2_MEMORY_MMAP && V4L2_TYPE_IS_CAPTURE(vq->type)) {
+ if (V4L2_TYPE_IS_MULTIPLANAR(vq->type)) {
++ unsigned int i;
++
+ for (i = 0; i < buf->length; ++i)
+ buf->m.planes[i].m.mem_offset
+ += DST_QUEUE_OFF_BASE;
+@@ -605,8 +600,23 @@ int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
+ buf->m.offset += DST_QUEUE_OFF_BASE;
+ }
+ }
++}
+
+- return ret;
++int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
++ struct v4l2_buffer *buf)
++{
++ struct vb2_queue *vq;
++ int ret;
++
++ vq = v4l2_m2m_get_vq(m2m_ctx, buf->type);
++ ret = vb2_querybuf(vq, buf);
++ if (ret)
++ return ret;
++
++ /* Adjust MMAP memory offsets for the CAPTURE queue */
++ v4l2_m2m_adjust_mem_offset(vq, buf);
++
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(v4l2_m2m_querybuf);
+
+@@ -763,6 +773,9 @@ int v4l2_m2m_qbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
+ if (ret)
+ return ret;
+
++ /* Adjust MMAP memory offsets for the CAPTURE queue */
++ v4l2_m2m_adjust_mem_offset(vq, buf);
++
+ /*
+ * If the capture queue is streaming, but streaming hasn't started
+ * on the device, but was asked to stop, mark the previously queued
+@@ -784,9 +797,17 @@ int v4l2_m2m_dqbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
+ struct v4l2_buffer *buf)
+ {
+ struct vb2_queue *vq;
++ int ret;
+
+ vq = v4l2_m2m_get_vq(m2m_ctx, buf->type);
+- return vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK);
++ ret = vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK);
++ if (ret)
++ return ret;
++
++ /* Adjust MMAP memory offsets for the CAPTURE queue */
++ v4l2_m2m_adjust_mem_offset(vq, buf);
++
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(v4l2_m2m_dqbuf);
+
+@@ -795,9 +816,17 @@ int v4l2_m2m_prepare_buf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
+ {
+ struct video_device *vdev = video_devdata(file);
+ struct vb2_queue *vq;
++ int ret;
+
+ vq = v4l2_m2m_get_vq(m2m_ctx, buf->type);
+- return vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf);
++ ret = vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf);
++ if (ret)
++ return ret;
++
++ /* Adjust MMAP memory offsets for the CAPTURE queue */
++ v4l2_m2m_adjust_mem_offset(vq, buf);
++
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(v4l2_m2m_prepare_buf);
+
+@@ -895,7 +924,7 @@ static __poll_t v4l2_m2m_poll_for_data(struct file *file,
+ if ((!src_q->streaming || src_q->error ||
+ list_empty(&src_q->queued_list)) &&
+ (!dst_q->streaming || dst_q->error ||
+- list_empty(&dst_q->queued_list)))
++ (list_empty(&dst_q->queued_list) && !dst_q->last_buffer_dequeued)))
+ return EPOLLERR;
+
+ spin_lock_irqsave(&src_q->done_lock, flags);
+diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c
+index 52312ce2ba056..f2c4393595574 100644
+--- a/drivers/media/v4l2-core/videobuf-dma-contig.c
++++ b/drivers/media/v4l2-core/videobuf-dma-contig.c
+@@ -36,12 +36,11 @@ struct videobuf_dma_contig_memory {
+
+ static int __videobuf_dc_alloc(struct device *dev,
+ struct videobuf_dma_contig_memory *mem,
+- unsigned long size, gfp_t flags)
++ unsigned long size)
+ {
+ mem->size = size;
+- mem->vaddr = dma_alloc_coherent(dev, mem->size,
+- &mem->dma_handle, flags);
+-
++ mem->vaddr = dma_alloc_coherent(dev, mem->size, &mem->dma_handle,
++ GFP_KERNEL);
+ if (!mem->vaddr) {
+ dev_err(dev, "memory alloc size %ld failed\n", mem->size);
+ return -ENOMEM;
+@@ -258,8 +257,7 @@ static int __videobuf_iolock(struct videobuf_queue *q,
+ return videobuf_dma_contig_user_get(mem, vb);
+
+ /* allocate memory for the read() method */
+- if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size),
+- GFP_KERNEL))
++ if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size)))
+ return -ENOMEM;
+ break;
+ case V4L2_MEMORY_OVERLAY:
+@@ -295,22 +293,18 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
+ BUG_ON(!mem);
+ MAGIC_CHECK(mem->magic, MAGIC_DC_MEM);
+
+- if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize),
+- GFP_KERNEL | __GFP_COMP))
++ if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize)))
+ goto error;
+
+- /* Try to remap memory */
+- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+-
+ /* the "vm_pgoff" is just used in v4l2 to find the
+ * corresponding buffer data structure which is allocated
+ * earlier and it does not mean the offset from the physical
+ * buffer start address as usual. So set it to 0 to pass
+- * the sanity check in vm_iomap_memory().
++ * the sanity check in dma_mmap_coherent().
+ */
+ vma->vm_pgoff = 0;
+-
+- retval = vm_iomap_memory(vma, mem->dma_handle, mem->size);
++ retval = dma_mmap_coherent(q->dev, vma, mem->vaddr, mem->dma_handle,
++ mem->size);
+ if (retval) {
+ dev_err(q->dev, "mmap: remap failed with error %d. ",
+ retval);
+diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c
+index c267283b01fda..e749dcb3ddea9 100644
+--- a/drivers/memory/atmel-ebi.c
++++ b/drivers/memory/atmel-ebi.c
+@@ -544,20 +544,27 @@ static int atmel_ebi_probe(struct platform_device *pdev)
+ smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0);
+
+ ebi->smc.regmap = syscon_node_to_regmap(smc_np);
+- if (IS_ERR(ebi->smc.regmap))
+- return PTR_ERR(ebi->smc.regmap);
++ if (IS_ERR(ebi->smc.regmap)) {
++ ret = PTR_ERR(ebi->smc.regmap);
++ goto put_node;
++ }
+
+ ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np);
+- if (IS_ERR(ebi->smc.layout))
+- return PTR_ERR(ebi->smc.layout);
++ if (IS_ERR(ebi->smc.layout)) {
++ ret = PTR_ERR(ebi->smc.layout);
++ goto put_node;
++ }
+
+ ebi->smc.clk = of_clk_get(smc_np, 0);
+ if (IS_ERR(ebi->smc.clk)) {
+- if (PTR_ERR(ebi->smc.clk) != -ENOENT)
+- return PTR_ERR(ebi->smc.clk);
++ if (PTR_ERR(ebi->smc.clk) != -ENOENT) {
++ ret = PTR_ERR(ebi->smc.clk);
++ goto put_node;
++ }
+
+ ebi->smc.clk = NULL;
+ }
++ of_node_put(smc_np);
+ ret = clk_prepare_enable(ebi->smc.clk);
+ if (ret)
+ return ret;
+@@ -608,6 +615,10 @@ static int atmel_ebi_probe(struct platform_device *pdev)
+ }
+
+ return of_platform_populate(np, NULL, NULL, dev);
++
++put_node:
++ of_node_put(smc_np);
++ return ret;
+ }
+
+ static __maybe_unused int atmel_ebi_resume(struct device *dev)
+diff --git a/drivers/memory/atmel-sdramc.c b/drivers/memory/atmel-sdramc.c
+index 9c49d00c2a966..ea6e9e1eaf046 100644
+--- a/drivers/memory/atmel-sdramc.c
++++ b/drivers/memory/atmel-sdramc.c
+@@ -47,19 +47,17 @@ static int atmel_ramc_probe(struct platform_device *pdev)
+ caps = of_device_get_match_data(&pdev->dev);
+
+ if (caps->has_ddrck) {
+- clk = devm_clk_get(&pdev->dev, "ddrck");
++ clk = devm_clk_get_enabled(&pdev->dev, "ddrck");
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+- clk_prepare_enable(clk);
+ }
+
+ if (caps->has_mpddr_clk) {
+- clk = devm_clk_get(&pdev->dev, "mpddr");
++ clk = devm_clk_get_enabled(&pdev->dev, "mpddr");
+ if (IS_ERR(clk)) {
+ pr_err("AT91 RAMC: couldn't get mpddr clock\n");
+ return PTR_ERR(clk);
+ }
+- clk_prepare_enable(clk);
+ }
+
+ return 0;
+diff --git a/drivers/memory/brcmstb_dpfe.c b/drivers/memory/brcmstb_dpfe.c
+index f43ba69fbb3e3..2daae2e0cb19e 100644
+--- a/drivers/memory/brcmstb_dpfe.c
++++ b/drivers/memory/brcmstb_dpfe.c
+@@ -434,15 +434,17 @@ static void __finalize_command(struct brcmstb_dpfe_priv *priv)
+ static int __send_command(struct brcmstb_dpfe_priv *priv, unsigned int cmd,
+ u32 result[])
+ {
+- const u32 *msg = priv->dpfe_api->command[cmd];
+ void __iomem *regs = priv->regs;
+ unsigned int i, chksum, chksum_idx;
++ const u32 *msg;
+ int ret = 0;
+ u32 resp;
+
+ if (cmd >= DPFE_CMD_MAX)
+ return -1;
+
++ msg = priv->dpfe_api->command[cmd];
++
+ mutex_lock(&priv->lock);
+
+ /* Wait for DCPU to become ready */
+diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c
+index 762d0c0f0716f..ecc78d6f89ed2 100644
+--- a/drivers/memory/emif.c
++++ b/drivers/memory/emif.c
+@@ -1025,7 +1025,7 @@ static struct emif_data *__init_or_module get_device_details(
+ temp = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL);
+ dev_info = devm_kzalloc(dev, sizeof(*dev_info), GFP_KERNEL);
+
+- if (!emif || !pd || !dev_info) {
++ if (!emif || !temp || !dev_info) {
+ dev_err(dev, "%s:%d: allocation error\n", __func__, __LINE__);
+ goto error;
+ }
+@@ -1117,7 +1117,7 @@ static int __init_or_module emif_probe(struct platform_device *pdev)
+ {
+ struct emif_data *emif;
+ struct resource *res;
+- int irq;
++ int irq, ret;
+
+ if (pdev->dev.of_node)
+ emif = of_get_memory_device_details(pdev->dev.of_node, &pdev->dev);
+@@ -1147,7 +1147,9 @@ static int __init_or_module emif_probe(struct platform_device *pdev)
+ emif_onetime_settings(emif);
+ emif_debugfs_init(emif);
+ disable_and_clear_all_interrupts(emif);
+- setup_interrupts(emif, irq);
++ ret = setup_interrupts(emif, irq);
++ if (ret)
++ goto error;
+
+ /* One-time actions taken on probing the first device */
+ if (!emif1) {
+diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c
+index d062c2f8250f4..75a8c38df9394 100644
+--- a/drivers/memory/fsl_ifc.c
++++ b/drivers/memory/fsl_ifc.c
+@@ -263,7 +263,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
+
+ ret = fsl_ifc_ctrl_init(fsl_ifc_ctrl_dev);
+ if (ret < 0)
+- goto err;
++ goto err_unmap_nandirq;
+
+ init_waitqueue_head(&fsl_ifc_ctrl_dev->nand_wait);
+
+@@ -272,7 +272,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
+ if (ret != 0) {
+ dev_err(&dev->dev, "failed to install irq (%d)\n",
+ fsl_ifc_ctrl_dev->irq);
+- goto err_irq;
++ goto err_unmap_nandirq;
+ }
+
+ if (fsl_ifc_ctrl_dev->nand_irq) {
+@@ -281,17 +281,16 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
+ if (ret != 0) {
+ dev_err(&dev->dev, "failed to install irq (%d)\n",
+ fsl_ifc_ctrl_dev->nand_irq);
+- goto err_nandirq;
++ goto err_free_irq;
+ }
+ }
+
+ return 0;
+
+-err_nandirq:
+- free_irq(fsl_ifc_ctrl_dev->nand_irq, fsl_ifc_ctrl_dev);
+- irq_dispose_mapping(fsl_ifc_ctrl_dev->nand_irq);
+-err_irq:
++err_free_irq:
+ free_irq(fsl_ifc_ctrl_dev->irq, fsl_ifc_ctrl_dev);
++err_unmap_nandirq:
++ irq_dispose_mapping(fsl_ifc_ctrl_dev->nand_irq);
+ irq_dispose_mapping(fsl_ifc_ctrl_dev->irq);
+ err:
+ iounmap(fsl_ifc_ctrl_dev->gregs);
+diff --git a/drivers/memory/mvebu-devbus.c b/drivers/memory/mvebu-devbus.c
+index 8450638e86700..efc6c08db2b70 100644
+--- a/drivers/memory/mvebu-devbus.c
++++ b/drivers/memory/mvebu-devbus.c
+@@ -280,10 +280,9 @@ static int mvebu_devbus_probe(struct platform_device *pdev)
+ if (IS_ERR(devbus->base))
+ return PTR_ERR(devbus->base);
+
+- clk = devm_clk_get(&pdev->dev, NULL);
++ clk = devm_clk_get_enabled(&pdev->dev, NULL);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+- clk_prepare_enable(clk);
+
+ /*
+ * Obtain clock period in picoseconds,
+diff --git a/drivers/memory/of_memory.c b/drivers/memory/of_memory.c
+index d9f5437d3bce0..1791614f324b7 100644
+--- a/drivers/memory/of_memory.c
++++ b/drivers/memory/of_memory.c
+@@ -134,6 +134,7 @@ const struct lpddr2_timings *of_get_ddr_timings(struct device_node *np_ddr,
+ for_each_child_of_node(np_ddr, np_tim) {
+ if (of_device_is_compatible(np_tim, tim_compat)) {
+ if (of_do_get_timings(np_tim, &timings[i])) {
++ of_node_put(np_tim);
+ devm_kfree(dev, timings);
+ goto default_timings;
+ }
+@@ -282,6 +283,7 @@ const struct lpddr3_timings
+ if (of_device_is_compatible(np_tim, tim_compat)) {
+ if (of_lpddr3_do_get_timings(np_tim, &timings[i])) {
+ devm_kfree(dev, timings);
++ of_node_put(np_tim);
+ goto default_timings;
+ }
+ i++;
+diff --git a/drivers/memory/pl353-smc.c b/drivers/memory/pl353-smc.c
+index f84b98278745c..d39ee7d06665b 100644
+--- a/drivers/memory/pl353-smc.c
++++ b/drivers/memory/pl353-smc.c
+@@ -122,6 +122,7 @@ static int pl353_smc_probe(struct amba_device *adev, const struct amba_id *id)
+ }
+
+ of_platform_device_create(child, NULL, &adev->dev);
++ of_node_put(child);
+
+ return 0;
+
+diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c
+index 45eed659b0c6d..c77b23b68a931 100644
+--- a/drivers/memory/renesas-rpc-if.c
++++ b/drivers/memory/renesas-rpc-if.c
+@@ -160,10 +160,91 @@ static const struct regmap_access_table rpcif_volatile_table = {
+ .n_yes_ranges = ARRAY_SIZE(rpcif_volatile_ranges),
+ };
+
++
++/*
++ * Custom accessor functions to ensure SM[RW]DR[01] are always accessed with
++ * proper width. Requires rpcif.xfer_size to be correctly set before!
++ */
++static int rpcif_reg_read(void *context, unsigned int reg, unsigned int *val)
++{
++ struct rpcif *rpc = context;
++
++ switch (reg) {
++ case RPCIF_SMRDR0:
++ case RPCIF_SMWDR0:
++ switch (rpc->xfer_size) {
++ case 1:
++ *val = readb(rpc->base + reg);
++ return 0;
++
++ case 2:
++ *val = readw(rpc->base + reg);
++ return 0;
++
++ case 4:
++ case 8:
++ *val = readl(rpc->base + reg);
++ return 0;
++
++ default:
++ return -EILSEQ;
++ }
++
++ case RPCIF_SMRDR1:
++ case RPCIF_SMWDR1:
++ if (rpc->xfer_size != 8)
++ return -EILSEQ;
++ break;
++ }
++
++ *val = readl(rpc->base + reg);
++ return 0;
++}
++
++static int rpcif_reg_write(void *context, unsigned int reg, unsigned int val)
++{
++ struct rpcif *rpc = context;
++
++ switch (reg) {
++ case RPCIF_SMWDR0:
++ switch (rpc->xfer_size) {
++ case 1:
++ writeb(val, rpc->base + reg);
++ return 0;
++
++ case 2:
++ writew(val, rpc->base + reg);
++ return 0;
++
++ case 4:
++ case 8:
++ writel(val, rpc->base + reg);
++ return 0;
++
++ default:
++ return -EILSEQ;
++ }
++
++ case RPCIF_SMWDR1:
++ if (rpc->xfer_size != 8)
++ return -EILSEQ;
++ break;
++
++ case RPCIF_SMRDR0:
++ case RPCIF_SMRDR1:
++ return -EPERM;
++ }
++
++ writel(val, rpc->base + reg);
++ return 0;
++}
++
+ static const struct regmap_config rpcif_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
++ .reg_read = rpcif_reg_read,
++ .reg_write = rpcif_reg_write,
+ .fast_io = true,
+ .max_register = RPCIF_PHYINT,
+ .volatile_table = &rpcif_volatile_table,
+@@ -173,17 +254,15 @@ int rpcif_sw_init(struct rpcif *rpc, struct device *dev)
+ {
+ struct platform_device *pdev = to_platform_device(dev);
+ struct resource *res;
+- void __iomem *base;
+
+ rpc->dev = dev;
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+- base = devm_ioremap_resource(&pdev->dev, res);
+- if (IS_ERR(base))
+- return PTR_ERR(base);
++ rpc->base = devm_ioremap_resource(&pdev->dev, res);
++ if (IS_ERR(rpc->base))
++ return PTR_ERR(rpc->base);
+
+- rpc->regmap = devm_regmap_init_mmio(&pdev->dev, base,
+- &rpcif_regmap_config);
++ rpc->regmap = devm_regmap_init(&pdev->dev, NULL, rpc, &rpcif_regmap_config);
+ if (IS_ERR(rpc->regmap)) {
+ dev_err(&pdev->dev,
+ "failed to init regmap for rpcif, error %ld\n",
+@@ -194,7 +273,7 @@ int rpcif_sw_init(struct rpcif *rpc, struct device *dev)
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dirmap");
+ rpc->dirmap = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(rpc->dirmap))
+- rpc->dirmap = NULL;
++ return PTR_ERR(rpc->dirmap);
+ rpc->size = resource_size(res);
+
+ rpc->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+@@ -354,20 +433,16 @@ void rpcif_prepare(struct rpcif *rpc, const struct rpcif_op *op, u64 *offs,
+ nbytes = op->data.nbytes;
+ rpc->xferlen = nbytes;
+
+- rpc->enable |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes)) |
+- RPCIF_SMENR_SPIDB(rpcif_bit_size(op->data.buswidth));
++ rpc->enable |= RPCIF_SMENR_SPIDB(rpcif_bit_size(op->data.buswidth));
+ }
+ }
+ EXPORT_SYMBOL(rpcif_prepare);
+
+ int rpcif_manual_xfer(struct rpcif *rpc)
+ {
+- u32 smenr, smcr, pos = 0, max = 4;
++ u32 smenr, smcr, pos = 0, max = rpc->bus_size == 2 ? 8 : 4;
+ int ret = 0;
+
+- if (rpc->bus_size == 2)
+- max = 8;
+-
+ pm_runtime_get_sync(rpc->dev);
+
+ regmap_update_bits(rpc->regmap, RPCIF_PHYCNT,
+@@ -378,37 +453,37 @@ int rpcif_manual_xfer(struct rpcif *rpc)
+ regmap_write(rpc->regmap, RPCIF_SMOPR, rpc->option);
+ regmap_write(rpc->regmap, RPCIF_SMDMCR, rpc->dummy);
+ regmap_write(rpc->regmap, RPCIF_SMDRENR, rpc->ddr);
++ regmap_write(rpc->regmap, RPCIF_SMADR, rpc->smadr);
+ smenr = rpc->enable;
+
+ switch (rpc->dir) {
+ case RPCIF_DATA_OUT:
+ while (pos < rpc->xferlen) {
+- u32 nbytes = rpc->xferlen - pos;
+- u32 data[2];
++ u32 bytes_left = rpc->xferlen - pos;
++ u32 nbytes, data[2];
+
+ smcr = rpc->smcr | RPCIF_SMCR_SPIE;
+- if (nbytes > max) {
+- nbytes = max;
++
++ /* nbytes may only be 1, 2, 4, or 8 */
++ nbytes = bytes_left >= max ? max : (1 << ilog2(bytes_left));
++ if (bytes_left > nbytes)
+ smcr |= RPCIF_SMCR_SSLKP;
+- }
++
++ smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes));
++ regmap_write(rpc->regmap, RPCIF_SMENR, smenr);
++ rpc->xfer_size = nbytes;
+
+ memcpy(data, rpc->buffer + pos, nbytes);
+- if (nbytes > 4) {
++ if (nbytes == 8) {
+ regmap_write(rpc->regmap, RPCIF_SMWDR1,
+ data[0]);
+ regmap_write(rpc->regmap, RPCIF_SMWDR0,
+ data[1]);
+- } else if (nbytes > 2) {
++ } else {
+ regmap_write(rpc->regmap, RPCIF_SMWDR0,
+ data[0]);
+- } else {
+- regmap_write(rpc->regmap, RPCIF_SMWDR0,
+- data[0] << 16);
+ }
+
+- regmap_write(rpc->regmap, RPCIF_SMADR,
+- rpc->smadr + pos);
+- regmap_write(rpc->regmap, RPCIF_SMENR, smenr);
+ regmap_write(rpc->regmap, RPCIF_SMCR, smcr);
+ ret = wait_msg_xfer_end(rpc);
+ if (ret)
+@@ -448,33 +523,32 @@ int rpcif_manual_xfer(struct rpcif *rpc)
+ break;
+ }
+ while (pos < rpc->xferlen) {
+- u32 nbytes = rpc->xferlen - pos;
+- u32 data[2];
++ u32 bytes_left = rpc->xferlen - pos;
++ u32 nbytes, data[2];
+
+- if (nbytes > max)
+- nbytes = max;
++ /* nbytes may only be 1, 2, 4, or 8 */
++ nbytes = bytes_left >= max ? max : (1 << ilog2(bytes_left));
+
+ regmap_write(rpc->regmap, RPCIF_SMADR,
+ rpc->smadr + pos);
++ smenr &= ~RPCIF_SMENR_SPIDE(0xF);
++ smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes));
+ regmap_write(rpc->regmap, RPCIF_SMENR, smenr);
+ regmap_write(rpc->regmap, RPCIF_SMCR,
+ rpc->smcr | RPCIF_SMCR_SPIE);
++ rpc->xfer_size = nbytes;
+ ret = wait_msg_xfer_end(rpc);
+ if (ret)
+ goto err_out;
+
+- if (nbytes > 4) {
++ if (nbytes == 8) {
+ regmap_read(rpc->regmap, RPCIF_SMRDR1,
+ &data[0]);
+ regmap_read(rpc->regmap, RPCIF_SMRDR0,
+ &data[1]);
+- } else if (nbytes > 2) {
+- regmap_read(rpc->regmap, RPCIF_SMRDR0,
+- &data[0]);
+- } else {
++ } else {
+ regmap_read(rpc->regmap, RPCIF_SMRDR0,
+ &data[0]);
+- data[0] >>= 16;
+ }
+ memcpy(rpc->buffer + pos, data, nbytes);
+
+@@ -502,6 +576,48 @@ err_out:
+ }
+ EXPORT_SYMBOL(rpcif_manual_xfer);
+
++static void memcpy_fromio_readw(void *to,
++ const void __iomem *from,
++ size_t count)
++{
++ const int maxw = (IS_ENABLED(CONFIG_64BIT)) ? 8 : 4;
++ u8 buf[2];
++
++ if (count && ((unsigned long)from & 1)) {
++ *(u16 *)buf = __raw_readw((void __iomem *)((unsigned long)from & ~1));
++ *(u8 *)to = buf[1];
++ from++;
++ to++;
++ count--;
++ }
++ while (count >= 2 && !IS_ALIGNED((unsigned long)from, maxw)) {
++ *(u16 *)to = __raw_readw(from);
++ from += 2;
++ to += 2;
++ count -= 2;
++ }
++ while (count >= maxw) {
++#ifdef CONFIG_64BIT
++ *(u64 *)to = __raw_readq(from);
++#else
++ *(u32 *)to = __raw_readl(from);
++#endif
++ from += maxw;
++ to += maxw;
++ count -= maxw;
++ }
++ while (count >= 2) {
++ *(u16 *)to = __raw_readw(from);
++ from += 2;
++ to += 2;
++ count -= 2;
++ }
++ if (count) {
++ *(u16 *)buf = __raw_readw(from);
++ *(u8 *)to = buf[0];
++ }
++}
++
+ ssize_t rpcif_dirmap_read(struct rpcif *rpc, u64 offs, size_t len, void *buf)
+ {
+ loff_t from = offs & (RPCIF_DIRMAP_SIZE - 1);
+@@ -523,7 +639,10 @@ ssize_t rpcif_dirmap_read(struct rpcif *rpc, u64 offs, size_t len, void *buf)
+ regmap_write(rpc->regmap, RPCIF_DRDMCR, rpc->dummy);
+ regmap_write(rpc->regmap, RPCIF_DRDRENR, rpc->ddr);
+
+- memcpy_fromio(buf, rpc->dirmap + from, len);
++ if (rpc->bus_size == 2)
++ memcpy_fromio_readw(buf, rpc->dirmap + from, len);
++ else
++ memcpy_fromio(buf, rpc->dirmap + from, len);
+
+ pm_runtime_put(rpc->dev);
+
+@@ -536,6 +655,7 @@ static int rpcif_probe(struct platform_device *pdev)
+ struct platform_device *vdev;
+ struct device_node *flash;
+ const char *name;
++ int ret;
+
+ flash = of_get_next_child(pdev->dev.of_node, NULL);
+ if (!flash) {
+@@ -559,7 +679,14 @@ static int rpcif_probe(struct platform_device *pdev)
+ return -ENOMEM;
+ vdev->dev.parent = &pdev->dev;
+ platform_set_drvdata(pdev, vdev);
+- return platform_device_add(vdev);
++
++ ret = platform_device_add(vdev);
++ if (ret) {
++ platform_device_put(vdev);
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static int rpcif_remove(struct platform_device *pdev)
+diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
+index 9c8318923ed0b..c491cd549644f 100644
+--- a/drivers/memory/samsung/exynos5422-dmc.c
++++ b/drivers/memory/samsung/exynos5422-dmc.c
+@@ -1187,33 +1187,39 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc)
+
+ dmc->timing_row = devm_kmalloc_array(dmc->dev, TIMING_COUNT,
+ sizeof(u32), GFP_KERNEL);
+- if (!dmc->timing_row)
+- return -ENOMEM;
++ if (!dmc->timing_row) {
++ ret = -ENOMEM;
++ goto put_node;
++ }
+
+ dmc->timing_data = devm_kmalloc_array(dmc->dev, TIMING_COUNT,
+ sizeof(u32), GFP_KERNEL);
+- if (!dmc->timing_data)
+- return -ENOMEM;
++ if (!dmc->timing_data) {
++ ret = -ENOMEM;
++ goto put_node;
++ }
+
+ dmc->timing_power = devm_kmalloc_array(dmc->dev, TIMING_COUNT,
+ sizeof(u32), GFP_KERNEL);
+- if (!dmc->timing_power)
+- return -ENOMEM;
++ if (!dmc->timing_power) {
++ ret = -ENOMEM;
++ goto put_node;
++ }
+
+ dmc->timings = of_lpddr3_get_ddr_timings(np_ddr, dmc->dev,
+ DDR_TYPE_LPDDR3,
+ &dmc->timings_arr_size);
+ if (!dmc->timings) {
+- of_node_put(np_ddr);
+ dev_warn(dmc->dev, "could not get timings from DT\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_node;
+ }
+
+ dmc->min_tck = of_lpddr3_get_min_tck(np_ddr, dmc->dev);
+ if (!dmc->min_tck) {
+- of_node_put(np_ddr);
+ dev_warn(dmc->dev, "could not get tck from DT\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_node;
+ }
+
+ /* Sorted array of OPPs with frequency ascending */
+@@ -1227,13 +1233,14 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc)
+ clk_period_ps);
+ }
+
+- of_node_put(np_ddr);
+
+ /* Take the highest frequency's timings as 'bypass' */
+ dmc->bypass_timing_row = dmc->timing_row[idx - 1];
+ dmc->bypass_timing_data = dmc->timing_data[idx - 1];
+ dmc->bypass_timing_power = dmc->timing_power[idx - 1];
+
++put_node:
++ of_node_put(np_ddr);
+ return ret;
+ }
+
+@@ -1322,7 +1329,6 @@ static int exynos5_dmc_init_clks(struct exynos5_dmc *dmc)
+ */
+ static int exynos5_performance_counters_init(struct exynos5_dmc *dmc)
+ {
+- int counters_size;
+ int ret, i;
+
+ dmc->num_counters = devfreq_event_get_edev_count(dmc->dev,
+@@ -1332,8 +1338,8 @@ static int exynos5_performance_counters_init(struct exynos5_dmc *dmc)
+ return dmc->num_counters;
+ }
+
+- counters_size = sizeof(struct devfreq_event_dev) * dmc->num_counters;
+- dmc->counter = devm_kzalloc(dmc->dev, counters_size, GFP_KERNEL);
++ dmc->counter = devm_kcalloc(dmc->dev, dmc->num_counters,
++ sizeof(*dmc->counter), GFP_KERNEL);
+ if (!dmc->counter)
+ return -ENOMEM;
+
+diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
+index 3d153881abc16..4bed0e54fd456 100644
+--- a/drivers/memory/tegra/tegra186.c
++++ b/drivers/memory/tegra/tegra186.c
+@@ -20,32 +20,6 @@
+ #define MC_SID_STREAMID_SECURITY_WRITE_ACCESS_DISABLED BIT(16)
+ #define MC_SID_STREAMID_SECURITY_OVERRIDE BIT(8)
+
+-static void tegra186_mc_program_sid(struct tegra_mc *mc)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < mc->soc->num_clients; i++) {
+- const struct tegra_mc_client *client = &mc->soc->clients[i];
+- u32 override, security;
+-
+- override = readl(mc->regs + client->regs.sid.override);
+- security = readl(mc->regs + client->regs.sid.security);
+-
+- dev_dbg(mc->dev, "client %s: override: %x security: %x\n",
+- client->name, override, security);
+-
+- dev_dbg(mc->dev, "setting SID %u for %s\n", client->sid,
+- client->name);
+- writel(client->sid, mc->regs + client->regs.sid.override);
+-
+- override = readl(mc->regs + client->regs.sid.override);
+- security = readl(mc->regs + client->regs.sid.security);
+-
+- dev_dbg(mc->dev, "client %s: override: %x security: %x\n",
+- client->name, override, security);
+- }
+-}
+-
+ static int tegra186_mc_probe(struct tegra_mc *mc)
+ {
+ int err;
+@@ -54,8 +28,6 @@ static int tegra186_mc_probe(struct tegra_mc *mc)
+ if (err < 0)
+ return err;
+
+- tegra186_mc_program_sid(mc);
+-
+ return 0;
+ }
+
+@@ -64,13 +36,6 @@ static void tegra186_mc_remove(struct tegra_mc *mc)
+ of_platform_depopulate(mc->dev);
+ }
+
+-static int tegra186_mc_resume(struct tegra_mc *mc)
+-{
+- tegra186_mc_program_sid(mc);
+-
+- return 0;
+-}
+-
+ #if IS_ENABLED(CONFIG_IOMMU_API)
+ static void tegra186_mc_client_sid_override(struct tegra_mc *mc,
+ const struct tegra_mc_client *client,
+@@ -142,7 +107,6 @@ static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev)
+ const struct tegra_mc_ops tegra186_mc_ops = {
+ .probe = tegra186_mc_probe,
+ .remove = tegra186_mc_remove,
+- .resume = tegra186_mc_resume,
+ .probe_device = tegra186_mc_probe_device,
+ };
+
+diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
+index c3462dbc8c22b..6fc90f2160e93 100644
+--- a/drivers/memory/tegra/tegra20-emc.c
++++ b/drivers/memory/tegra/tegra20-emc.c
+@@ -1117,4 +1117,5 @@ module_platform_driver(tegra_emc_driver);
+
+ MODULE_AUTHOR("Dmitry Osipenko <digetx@gmail.com>");
+ MODULE_DESCRIPTION("NVIDIA Tegra20 EMC driver");
++MODULE_SOFTDEP("pre: governor_simpleondemand");
+ MODULE_LICENSE("GPL v2");
+diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
+index 660df7d269fac..d410e2e78a3d3 100644
+--- a/drivers/memstick/core/memstick.c
++++ b/drivers/memstick/core/memstick.c
+@@ -410,6 +410,7 @@ static struct memstick_dev *memstick_alloc_card(struct memstick_host *host)
+ return card;
+ err_out:
+ host->card = old_card;
++ kfree_const(card->dev.kobj.name);
+ kfree(card);
+ return NULL;
+ }
+@@ -468,8 +469,10 @@ static void memstick_check(struct work_struct *work)
+ put_device(&card->dev);
+ host->card = NULL;
+ }
+- } else
++ } else {
++ kfree_const(card->dev.kobj.name);
+ kfree(card);
++ }
+ }
+
+ out_power_off:
+diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
+index acf36676e388d..7619c30b4ee10 100644
+--- a/drivers/memstick/core/ms_block.c
++++ b/drivers/memstick/core/ms_block.c
+@@ -1341,17 +1341,17 @@ static int msb_ftl_initialize(struct msb_data *msb)
+ msb->zone_count = msb->block_count / MS_BLOCKS_IN_ZONE;
+ msb->logical_block_count = msb->zone_count * 496 - 2;
+
+- msb->used_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL);
+- msb->erased_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL);
++ msb->used_blocks_bitmap = bitmap_zalloc(msb->block_count, GFP_KERNEL);
++ msb->erased_blocks_bitmap = bitmap_zalloc(msb->block_count, GFP_KERNEL);
+ msb->lba_to_pba_table =
+ kmalloc_array(msb->logical_block_count, sizeof(u16),
+ GFP_KERNEL);
+
+ if (!msb->used_blocks_bitmap || !msb->lba_to_pba_table ||
+ !msb->erased_blocks_bitmap) {
+- kfree(msb->used_blocks_bitmap);
++ bitmap_free(msb->used_blocks_bitmap);
++ bitmap_free(msb->erased_blocks_bitmap);
+ kfree(msb->lba_to_pba_table);
+- kfree(msb->erased_blocks_bitmap);
+ return -ENOMEM;
+ }
+
+@@ -1736,7 +1736,7 @@ static int msb_init_card(struct memstick_dev *card)
+ msb->pages_in_block = boot_block->attr.block_size * 2;
+ msb->block_size = msb->page_size * msb->pages_in_block;
+
+- if (msb->page_size > PAGE_SIZE) {
++ if ((size_t)msb->page_size > PAGE_SIZE) {
+ /* this isn't supported by linux at all, anyway*/
+ dbg("device page %d size isn't supported", msb->page_size);
+ return -EINVAL;
+@@ -1962,7 +1962,8 @@ static int msb_bd_open(struct block_device *bdev, fmode_t mode)
+ static void msb_data_clear(struct msb_data *msb)
+ {
+ kfree(msb->boot_page);
+- kfree(msb->used_blocks_bitmap);
++ bitmap_free(msb->used_blocks_bitmap);
++ bitmap_free(msb->erased_blocks_bitmap);
+ kfree(msb->lba_to_pba_table);
+ kfree(msb->cache);
+ msb->card = NULL;
+@@ -2149,6 +2150,11 @@ static int msb_init_disk(struct memstick_dev *card)
+
+ msb->usage_count = 1;
+ msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM);
++ if (!msb->io_queue) {
++ rc = -ENOMEM;
++ goto out_cleanup_disk;
++ }
++
+ INIT_WORK(&msb->io_work, msb_io_work);
+ sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
+
+@@ -2156,10 +2162,16 @@ static int msb_init_disk(struct memstick_dev *card)
+ set_disk_ro(msb->disk, 1);
+
+ msb_start(card);
+- device_add_disk(&card->dev, msb->disk, NULL);
++ rc = device_add_disk(&card->dev, msb->disk, NULL);
++ if (rc)
++ goto out_destroy_workqueue;
+ dbg("Disk added");
+ return 0;
+
++out_destroy_workqueue:
++ destroy_workqueue(msb->io_queue);
++out_cleanup_disk:
++ blk_cleanup_disk(msb->disk);
+ out_free_tag_set:
+ blk_mq_free_tag_set(&msb->tag_set);
+ out_release_id:
+diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c
+index f9a93b0565e15..435d4c058b20e 100644
+--- a/drivers/memstick/host/jmb38x_ms.c
++++ b/drivers/memstick/host/jmb38x_ms.c
+@@ -882,7 +882,7 @@ static struct memstick_host *jmb38x_ms_alloc_host(struct jmb38x_ms *jm, int cnt)
+
+ iounmap(host->addr);
+ err_out_free:
+- kfree(msh);
++ memstick_free_host(msh);
+ return NULL;
+ }
+
+diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
+index e79a0218c492e..461f5ffd02bc1 100644
+--- a/drivers/memstick/host/r592.c
++++ b/drivers/memstick/host/r592.c
+@@ -44,12 +44,10 @@ static const char *tpc_names[] = {
+ * memstick_debug_get_tpc_name - debug helper that returns string for
+ * a TPC number
+ */
+-const char *memstick_debug_get_tpc_name(int tpc)
++static __maybe_unused const char *memstick_debug_get_tpc_name(int tpc)
+ {
+ return tpc_names[tpc-1];
+ }
+-EXPORT_SYMBOL(memstick_debug_get_tpc_name);
+-
+
+ /* Read a register*/
+ static inline u32 r592_read_reg(struct r592_device *dev, int address)
+@@ -829,7 +827,7 @@ static void r592_remove(struct pci_dev *pdev)
+ /* Stop the processing thread.
+ That ensures that we won't take any more requests */
+ kthread_stop(dev->io_thread);
+-
++ del_timer_sync(&dev->detect_timer);
+ r592_enable_device(dev, false);
+
+ while (!error && dev->req) {
+@@ -838,15 +836,15 @@ static void r592_remove(struct pci_dev *pdev)
+ }
+ memstick_remove_host(dev->host);
+
++ if (dev->dummy_dma_page)
++ dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->dummy_dma_page,
++ dev->dummy_dma_page_physical_address);
++
+ free_irq(dev->irq, dev);
+ iounmap(dev->mmio);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ memstick_free_host(dev->host);
+-
+- if (dev->dummy_dma_page)
+- dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->dummy_dma_page,
+- dev->dummy_dma_page_physical_address);
+ }
+
+ #ifdef CONFIG_PM_SLEEP
+diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
+index 3261cac762def..ec3ee356078db 100644
+--- a/drivers/message/fusion/mptlan.c
++++ b/drivers/message/fusion/mptlan.c
+@@ -1427,7 +1427,9 @@ mptlan_remove(struct pci_dev *pdev)
+ {
+ MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
+ struct net_device *dev = ioc->netdev;
++ struct mpt_lan_priv *priv = netdev_priv(dev);
+
++ cancel_delayed_work_sync(&priv->post_buckets_task);
+ if(dev != NULL) {
+ unregister_netdev(dev);
+ free_netdev(dev);
+diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
+index ca0edab91aeb6..ef550d33af920 100644
+--- a/drivers/mfd/Kconfig
++++ b/drivers/mfd/Kconfig
+@@ -15,6 +15,7 @@ config MFD_CS5535
+ tristate "AMD CS5535 and CS5536 southbridge core functions"
+ select MFD_CORE
+ depends on PCI && (X86_32 || (X86 && COMPILE_TEST))
++ depends on !UML
+ help
+ This is the core driver for CS5535/CS5536 MFD functions. This is
+ necessary for using the board's GPIO and MFGPT functionality.
+@@ -1194,6 +1195,7 @@ config MFD_SI476X_CORE
+ config MFD_SIMPLE_MFD_I2C
+ tristate
+ depends on I2C
++ select MFD_CORE
+ select REGMAP_I2C
+ help
+ This driver creates a single register map with the intention for it
+@@ -1994,6 +1996,7 @@ config MFD_ROHM_BD957XMUF
+ depends on I2C=y
+ depends on OF
+ select REGMAP_I2C
++ select REGMAP_IRQ
+ select MFD_CORE
+ help
+ Select this option to get support for the ROHM BD9576MUF and
+diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c
+index 20cb294c75122..5d3715a28b28e 100644
+--- a/drivers/mfd/altera-sysmgr.c
++++ b/drivers/mfd/altera-sysmgr.c
+@@ -153,7 +153,7 @@ static int sysmgr_probe(struct platform_device *pdev)
+ if (!base)
+ return -ENOMEM;
+
+- sysmgr_config.max_register = resource_size(res) - 3;
++ sysmgr_config.max_register = resource_size(res) - 4;
+ regmap = devm_regmap_init_mmio(dev, base, &sysmgr_config);
+ }
+
+diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
+index 9323b1e3a69ef..5c8317bd4d98b 100644
+--- a/drivers/mfd/arizona-core.c
++++ b/drivers/mfd/arizona-core.c
+@@ -45,7 +45,7 @@ int arizona_clk32k_enable(struct arizona *arizona)
+ if (arizona->clk32k_ref == 1) {
+ switch (arizona->pdata.clk32k_src) {
+ case ARIZONA_32KZ_MCLK1:
+- ret = pm_runtime_get_sync(arizona->dev);
++ ret = pm_runtime_resume_and_get(arizona->dev);
+ if (ret != 0)
+ goto err_ref;
+ ret = clk_prepare_enable(arizona->mclk[ARIZONA_MCLK1]);
+diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
+index 8d58c8df46cfb..56338f9dbd0ba 100644
+--- a/drivers/mfd/asic3.c
++++ b/drivers/mfd/asic3.c
+@@ -906,14 +906,14 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
+ ret = mfd_add_devices(&pdev->dev, pdev->id,
+ &asic3_cell_ds1wm, 1, mem, asic->irq_base, NULL);
+ if (ret < 0)
+- goto out;
++ goto out_unmap;
+ }
+
+ if (mem_sdio && (irq >= 0)) {
+ ret = mfd_add_devices(&pdev->dev, pdev->id,
+ &asic3_cell_mmc, 1, mem_sdio, irq, NULL);
+ if (ret < 0)
+- goto out;
++ goto out_unmap;
+ }
+
+ ret = 0;
+@@ -927,8 +927,12 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
+ ret = mfd_add_devices(&pdev->dev, 0,
+ asic3_cell_leds, ASIC3_NUM_LEDS, NULL, 0, NULL);
+ }
++ return ret;
+
+- out:
++out_unmap:
++ if (asic->tmio_cnf)
++ iounmap(asic->tmio_cnf);
++out:
+ return ret;
+ }
+
+diff --git a/drivers/mfd/atmel-flexcom.c b/drivers/mfd/atmel-flexcom.c
+index d2f5c073fdf31..559eb4d352b68 100644
+--- a/drivers/mfd/atmel-flexcom.c
++++ b/drivers/mfd/atmel-flexcom.c
+@@ -87,8 +87,7 @@ static const struct of_device_id atmel_flexcom_of_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, atmel_flexcom_of_match);
+
+-#ifdef CONFIG_PM_SLEEP
+-static int atmel_flexcom_resume(struct device *dev)
++static int __maybe_unused atmel_flexcom_resume_noirq(struct device *dev)
+ {
+ struct atmel_flexcom *ddata = dev_get_drvdata(dev);
+ int err;
+@@ -105,16 +104,16 @@ static int atmel_flexcom_resume(struct device *dev)
+
+ return 0;
+ }
+-#endif
+
+-static SIMPLE_DEV_PM_OPS(atmel_flexcom_pm_ops, NULL,
+- atmel_flexcom_resume);
++static const struct dev_pm_ops atmel_flexcom_pm_ops = {
++ .resume_noirq = atmel_flexcom_resume_noirq,
++};
+
+ static struct platform_driver atmel_flexcom_driver = {
+ .probe = atmel_flexcom_probe,
+ .driver = {
+ .name = "atmel_flexcom",
+- .pm = &atmel_flexcom_pm_ops,
++ .pm = pm_ptr(&atmel_flexcom_pm_ops),
+ .of_match_table = atmel_flexcom_of_match,
+ },
+ };
+diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
+index e5c8bc998eb4e..965820481f1e1 100644
+--- a/drivers/mfd/davinci_voicecodec.c
++++ b/drivers/mfd/davinci_voicecodec.c
+@@ -46,14 +46,12 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
+ }
+ clk_enable(davinci_vc->clk);
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+-
+- fifo_base = (dma_addr_t)res->start;
+- davinci_vc->base = devm_ioremap_resource(&pdev->dev, res);
++ davinci_vc->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(davinci_vc->base)) {
+ ret = PTR_ERR(davinci_vc->base);
+ goto fail;
+ }
++ fifo_base = (dma_addr_t)res->start;
+
+ davinci_vc->regmap = devm_regmap_init_mmio(&pdev->dev,
+ davinci_vc->base,
+diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c
+index 83e676a096dc1..fc65f9e25fda8 100644
+--- a/drivers/mfd/dln2.c
++++ b/drivers/mfd/dln2.c
+@@ -50,6 +50,7 @@ enum dln2_handle {
+ DLN2_HANDLE_GPIO,
+ DLN2_HANDLE_I2C,
+ DLN2_HANDLE_SPI,
++ DLN2_HANDLE_ADC,
+ DLN2_HANDLES
+ };
+
+@@ -653,6 +654,7 @@ enum {
+ DLN2_ACPI_MATCH_GPIO = 0,
+ DLN2_ACPI_MATCH_I2C = 1,
+ DLN2_ACPI_MATCH_SPI = 2,
++ DLN2_ACPI_MATCH_ADC = 3,
+ };
+
+ static struct dln2_platform_data dln2_pdata_gpio = {
+@@ -683,6 +685,16 @@ static struct mfd_cell_acpi_match dln2_acpi_match_spi = {
+ .adr = DLN2_ACPI_MATCH_SPI,
+ };
+
++/* Only one ADC port supported */
++static struct dln2_platform_data dln2_pdata_adc = {
++ .handle = DLN2_HANDLE_ADC,
++ .port = 0,
++};
++
++static struct mfd_cell_acpi_match dln2_acpi_match_adc = {
++ .adr = DLN2_ACPI_MATCH_ADC,
++};
++
+ static const struct mfd_cell dln2_devs[] = {
+ {
+ .name = "dln2-gpio",
+@@ -702,6 +714,12 @@ static const struct mfd_cell dln2_devs[] = {
+ .platform_data = &dln2_pdata_spi,
+ .pdata_size = sizeof(struct dln2_platform_data),
+ },
++ {
++ .name = "dln2-adc",
++ .acpi_match = &dln2_acpi_match_adc,
++ .platform_data = &dln2_pdata_adc,
++ .pdata_size = sizeof(struct dln2_platform_data),
++ },
+ };
+
+ static void dln2_stop(struct dln2_dev *dln2)
+@@ -818,6 +836,7 @@ out_stop_rx:
+ dln2_stop_rx_urbs(dln2);
+
+ out_free:
++ usb_put_dev(dln2->usb_dev);
+ dln2_free(dln2);
+
+ return ret;
+diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c
+index 37e5e02a1d059..823595bcc9b7c 100644
+--- a/drivers/mfd/fsl-imx25-tsadc.c
++++ b/drivers/mfd/fsl-imx25-tsadc.c
+@@ -69,7 +69,7 @@ static int mx25_tsadc_setup_irq(struct platform_device *pdev,
+ int irq;
+
+ irq = platform_get_irq(pdev, 0);
+- if (irq <= 0)
++ if (irq < 0)
+ return irq;
+
+ tsadc->domain = irq_domain_add_simple(np, 2, 0, &mx25_tsadc_domain_ops,
+@@ -84,6 +84,19 @@ static int mx25_tsadc_setup_irq(struct platform_device *pdev,
+ return 0;
+ }
+
++static int mx25_tsadc_unset_irq(struct platform_device *pdev)
++{
++ struct mx25_tsadc *tsadc = platform_get_drvdata(pdev);
++ int irq = platform_get_irq(pdev, 0);
++
++ if (irq >= 0) {
++ irq_set_chained_handler_and_data(irq, NULL, NULL);
++ irq_domain_remove(tsadc->domain);
++ }
++
++ return 0;
++}
++
+ static void mx25_tsadc_setup_clk(struct platform_device *pdev,
+ struct mx25_tsadc *tsadc)
+ {
+@@ -171,18 +184,21 @@ static int mx25_tsadc_probe(struct platform_device *pdev)
+
+ platform_set_drvdata(pdev, tsadc);
+
+- return devm_of_platform_populate(dev);
++ ret = devm_of_platform_populate(dev);
++ if (ret)
++ goto err_irq;
++
++ return 0;
++
++err_irq:
++ mx25_tsadc_unset_irq(pdev);
++
++ return ret;
+ }
+
+ static int mx25_tsadc_remove(struct platform_device *pdev)
+ {
+- struct mx25_tsadc *tsadc = platform_get_drvdata(pdev);
+- int irq = platform_get_irq(pdev, 0);
+-
+- if (irq) {
+- irq_set_chained_handler_and_data(irq, NULL, NULL);
+- irq_domain_remove(tsadc->domain);
+- }
++ mx25_tsadc_unset_irq(pdev);
+
+ return 0;
+ }
+diff --git a/drivers/mfd/intel-lpss-acpi.c b/drivers/mfd/intel-lpss-acpi.c
+index 3f1d976eb67cb..4c43d71cddbdc 100644
+--- a/drivers/mfd/intel-lpss-acpi.c
++++ b/drivers/mfd/intel-lpss-acpi.c
+@@ -136,6 +136,7 @@ static int intel_lpss_acpi_probe(struct platform_device *pdev)
+ {
+ struct intel_lpss_platform_info *info;
+ const struct acpi_device_id *id;
++ int ret;
+
+ id = acpi_match_device(intel_lpss_acpi_ids, &pdev->dev);
+ if (!id)
+@@ -147,12 +148,19 @@ static int intel_lpss_acpi_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ info->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!info->mem)
++ return -ENODEV;
++
+ info->irq = platform_get_irq(pdev, 0);
+
++ ret = intel_lpss_probe(&pdev->dev, info);
++ if (ret)
++ return ret;
++
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+- return intel_lpss_probe(&pdev->dev, info);
++ return 0;
+ }
+
+ static int intel_lpss_acpi_remove(struct platform_device *pdev)
+diff --git a/drivers/mfd/intel_soc_pmic_core.c b/drivers/mfd/intel_soc_pmic_core.c
+index ddd64f9e3341e..926653e1f6033 100644
+--- a/drivers/mfd/intel_soc_pmic_core.c
++++ b/drivers/mfd/intel_soc_pmic_core.c
+@@ -95,6 +95,7 @@ static int intel_soc_pmic_i2c_probe(struct i2c_client *i2c,
+ return 0;
+
+ err_del_irq_chip:
++ pwm_remove_table(crc_pwm_lookup, ARRAY_SIZE(crc_pwm_lookup));
+ regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data);
+ return ret;
+ }
+diff --git a/drivers/mfd/ipaq-micro.c b/drivers/mfd/ipaq-micro.c
+index e92eeeb67a98a..4cd5ecc722112 100644
+--- a/drivers/mfd/ipaq-micro.c
++++ b/drivers/mfd/ipaq-micro.c
+@@ -403,7 +403,7 @@ static int __init micro_probe(struct platform_device *pdev)
+ micro_reset_comm(micro);
+
+ irq = platform_get_irq(pdev, 0);
+- if (!irq)
++ if (irq < 0)
+ return -EINVAL;
+ ret = devm_request_irq(&pdev->dev, irq, micro_serial_isr,
+ IRQF_SHARED, "ipaq-micro",
+diff --git a/drivers/mfd/lp8788-irq.c b/drivers/mfd/lp8788-irq.c
+index 348439a3fbbd4..39006297f3d27 100644
+--- a/drivers/mfd/lp8788-irq.c
++++ b/drivers/mfd/lp8788-irq.c
+@@ -175,6 +175,7 @@ int lp8788_irq_init(struct lp8788 *lp, int irq)
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ "lp8788-irq", irqd);
+ if (ret) {
++ irq_domain_remove(lp->irqdm);
+ dev_err(lp->dev, "failed to create a thread for IRQ_N\n");
+ return ret;
+ }
+@@ -188,4 +189,6 @@ void lp8788_irq_exit(struct lp8788 *lp)
+ {
+ if (lp->irq)
+ free_irq(lp->irq, lp->irqdm);
++ if (lp->irqdm)
++ irq_domain_remove(lp->irqdm);
+ }
+diff --git a/drivers/mfd/lp8788.c b/drivers/mfd/lp8788.c
+index c223d2c6a3635..998e8cc408a0e 100644
+--- a/drivers/mfd/lp8788.c
++++ b/drivers/mfd/lp8788.c
+@@ -195,8 +195,16 @@ static int lp8788_probe(struct i2c_client *cl, const struct i2c_device_id *id)
+ if (ret)
+ return ret;
+
+- return mfd_add_devices(lp->dev, -1, lp8788_devs,
+- ARRAY_SIZE(lp8788_devs), NULL, 0, NULL);
++ ret = mfd_add_devices(lp->dev, -1, lp8788_devs,
++ ARRAY_SIZE(lp8788_devs), NULL, 0, NULL);
++ if (ret)
++ goto err_exit_irq;
++
++ return 0;
++
++err_exit_irq:
++ lp8788_irq_exit(lp);
++ return ret;
+ }
+
+ static int lp8788_remove(struct i2c_client *cl)
+diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
+index f10e53187f67a..9ffab9aafd81b 100644
+--- a/drivers/mfd/lpc_ich.c
++++ b/drivers/mfd/lpc_ich.c
+@@ -63,6 +63,8 @@
+ #define SPIBASE_BYT 0x54
+ #define SPIBASE_BYT_SZ 512
+ #define SPIBASE_BYT_EN BIT(1)
++#define BYT_BCR 0xfc
++#define BYT_BCR_WPD BIT(0)
+
+ #define SPIBASE_LPT 0x3800
+ #define SPIBASE_LPT_SZ 512
+@@ -1084,12 +1086,57 @@ wdt_done:
+ return ret;
+ }
+
++static bool lpc_ich_byt_set_writeable(void __iomem *base, void *data)
++{
++ u32 val;
++
++ val = readl(base + BYT_BCR);
++ if (!(val & BYT_BCR_WPD)) {
++ val |= BYT_BCR_WPD;
++ writel(val, base + BYT_BCR);
++ val = readl(base + BYT_BCR);
++ }
++
++ return val & BYT_BCR_WPD;
++}
++
++static bool lpc_ich_lpt_set_writeable(void __iomem *base, void *data)
++{
++ struct pci_dev *pdev = data;
++ u32 bcr;
++
++ pci_read_config_dword(pdev, BCR, &bcr);
++ if (!(bcr & BCR_WPD)) {
++ bcr |= BCR_WPD;
++ pci_write_config_dword(pdev, BCR, bcr);
++ pci_read_config_dword(pdev, BCR, &bcr);
++ }
++
++ return bcr & BCR_WPD;
++}
++
++static bool lpc_ich_bxt_set_writeable(void __iomem *base, void *data)
++{
++ unsigned int spi = PCI_DEVFN(13, 2);
++ struct pci_bus *bus = data;
++ u32 bcr;
++
++ pci_bus_read_config_dword(bus, spi, BCR, &bcr);
++ if (!(bcr & BCR_WPD)) {
++ bcr |= BCR_WPD;
++ pci_bus_write_config_dword(bus, spi, BCR, bcr);
++ pci_bus_read_config_dword(bus, spi, BCR, &bcr);
++ }
++
++ return bcr & BCR_WPD;
++}
++
+ static int lpc_ich_init_spi(struct pci_dev *dev)
+ {
+ struct lpc_ich_priv *priv = pci_get_drvdata(dev);
+ struct resource *res = &intel_spi_res[0];
+ struct intel_spi_boardinfo *info;
+- u32 spi_base, rcba, bcr;
++ u32 spi_base, rcba;
+
+ info = devm_kzalloc(&dev->dev, sizeof(*info), GFP_KERNEL);
+ if (!info)
+@@ -1103,6 +1150,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
+ if (spi_base & SPIBASE_BYT_EN) {
+ res->start = spi_base & ~(SPIBASE_BYT_SZ - 1);
+ res->end = res->start + SPIBASE_BYT_SZ - 1;
++
++ info->set_writeable = lpc_ich_byt_set_writeable;
+ }
+ break;
+
+@@ -1113,8 +1162,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
+ res->start = spi_base + SPIBASE_LPT;
+ res->end = res->start + SPIBASE_LPT_SZ - 1;
+
+- pci_read_config_dword(dev, BCR, &bcr);
+- info->writeable = !!(bcr & BCR_WPD);
++ info->set_writeable = lpc_ich_lpt_set_writeable;
++ info->data = dev;
+ }
+ break;
+
+@@ -1135,8 +1184,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
+ res->start = spi_base & 0xfffffff0;
+ res->end = res->start + SPIBASE_APL_SZ - 1;
+
+- pci_bus_read_config_dword(bus, spi, BCR, &bcr);
+- info->writeable = !!(bcr & BCR_WPD);
++ info->set_writeable = lpc_ich_bxt_set_writeable;
++ info->data = bus;
+ }
+
+ pci_bus_write_config_byte(bus, p2sb, 0xe1, 0x1);
+diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c
+index fec2096474ad1..a6661e07035ba 100644
+--- a/drivers/mfd/max77620.c
++++ b/drivers/mfd/max77620.c
+@@ -419,9 +419,11 @@ static int max77620_initialise_fps(struct max77620_chip *chip)
+ ret = max77620_config_fps(chip, fps_child);
+ if (ret < 0) {
+ of_node_put(fps_child);
++ of_node_put(fps_np);
+ return ret;
+ }
+ }
++ of_node_put(fps_np);
+
+ config = chip->enable_global_lpm ? MAX77620_ONOFFCNFG2_SLP_LPM_MSK : 0;
+ ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2,
+diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
+index 1abe7432aad82..e281a9202f110 100644
+--- a/drivers/mfd/mc13xxx-core.c
++++ b/drivers/mfd/mc13xxx-core.c
+@@ -323,8 +323,10 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
+ adc1 |= MC13783_ADC1_ATOX;
+
+ dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__);
+- mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE,
++ ret = mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE,
+ mc13xxx_handler_adcdone, __func__, &adcdone_data);
++ if (ret)
++ goto out;
+
+ mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0);
+ mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1);
+diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
+index 79f5c6a18815a..684a011a63968 100644
+--- a/drivers/mfd/mfd-core.c
++++ b/drivers/mfd/mfd-core.c
+@@ -198,6 +198,7 @@ static int mfd_add_device(struct device *parent, int id,
+ if (of_device_is_compatible(np, cell->of_compatible)) {
+ /* Ignore 'disabled' devices error free */
+ if (!of_device_is_available(np)) {
++ of_node_put(np);
+ ret = 0;
+ goto fail_alias;
+ }
+@@ -205,6 +206,7 @@ static int mfd_add_device(struct device *parent, int id,
+ ret = mfd_match_of_node_to_dev(pdev, np, cell);
+ if (ret == -EAGAIN)
+ continue;
++ of_node_put(np);
+ if (ret)
+ goto fail_alias;
+
+diff --git a/drivers/mfd/motorola-cpcap.c b/drivers/mfd/motorola-cpcap.c
+index 6fb206da27298..265464b5d7cc5 100644
+--- a/drivers/mfd/motorola-cpcap.c
++++ b/drivers/mfd/motorola-cpcap.c
+@@ -202,6 +202,13 @@ static const struct of_device_id cpcap_of_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, cpcap_of_match);
+
++static const struct spi_device_id cpcap_spi_ids[] = {
++ { .name = "cpcap", },
++ { .name = "6556002", },
++ {},
++};
++MODULE_DEVICE_TABLE(spi, cpcap_spi_ids);
++
+ static const struct regmap_config cpcap_regmap_config = {
+ .reg_bits = 16,
+ .reg_stride = 4,
+@@ -342,6 +349,7 @@ static struct spi_driver cpcap_driver = {
+ .pm = &cpcap_pm,
+ },
+ .probe = cpcap_probe,
++ .id_table = cpcap_spi_ids,
+ };
+ module_spi_driver(cpcap_driver);
+
+diff --git a/drivers/mfd/mt6360-core.c b/drivers/mfd/mt6360-core.c
+index 6eaa6775b8885..d3b32eb798377 100644
+--- a/drivers/mfd/mt6360-core.c
++++ b/drivers/mfd/mt6360-core.c
+@@ -402,7 +402,7 @@ static int mt6360_regmap_read(void *context, const void *reg, size_t reg_size,
+ struct mt6360_ddata *ddata = context;
+ u8 bank = *(u8 *)reg;
+ u8 reg_addr = *(u8 *)(reg + 1);
+- struct i2c_client *i2c = ddata->i2c[bank];
++ struct i2c_client *i2c;
+ bool crc_needed = false;
+ u8 *buf;
+ int buf_len = MT6360_ALLOC_READ_SIZE(val_size);
+@@ -410,6 +410,11 @@ static int mt6360_regmap_read(void *context, const void *reg, size_t reg_size,
+ u8 crc;
+ int ret;
+
++ if (bank >= MT6360_SLAVE_MAX)
++ return -EINVAL;
++
++ i2c = ddata->i2c[bank];
++
+ if (bank == MT6360_SLAVE_PMIC || bank == MT6360_SLAVE_LDO) {
+ crc_needed = true;
+ ret = mt6360_xlate_pmicldo_addr(&reg_addr, val_size);
+@@ -453,13 +458,18 @@ static int mt6360_regmap_write(void *context, const void *val, size_t val_size)
+ struct mt6360_ddata *ddata = context;
+ u8 bank = *(u8 *)val;
+ u8 reg_addr = *(u8 *)(val + 1);
+- struct i2c_client *i2c = ddata->i2c[bank];
++ struct i2c_client *i2c;
+ bool crc_needed = false;
+ u8 *buf;
+ int buf_len = MT6360_ALLOC_WRITE_SIZE(val_size);
+ int write_size = val_size - MT6360_REGMAP_REG_BYTE_SIZE;
+ int ret;
+
++ if (bank >= MT6360_SLAVE_MAX)
++ return -EINVAL;
++
++ i2c = ddata->i2c[bank];
++
+ if (bank == MT6360_SLAVE_PMIC || bank == MT6360_SLAVE_LDO) {
+ crc_needed = true;
+ ret = mt6360_xlate_pmicldo_addr(&reg_addr, val_size - MT6360_REGMAP_REG_BYTE_SIZE);
+diff --git a/drivers/mfd/pcf50633-adc.c b/drivers/mfd/pcf50633-adc.c
+index 5cd653e615125..191b1bc6141c2 100644
+--- a/drivers/mfd/pcf50633-adc.c
++++ b/drivers/mfd/pcf50633-adc.c
+@@ -136,6 +136,7 @@ int pcf50633_adc_async_read(struct pcf50633 *pcf, int mux, int avg,
+ void *callback_param)
+ {
+ struct pcf50633_adc_request *req;
++ int ret;
+
+ /* req is freed when the result is ready, in interrupt handler */
+ req = kmalloc(sizeof(*req), GFP_KERNEL);
+@@ -147,7 +148,11 @@ int pcf50633_adc_async_read(struct pcf50633 *pcf, int mux, int avg,
+ req->callback = callback;
+ req->callback_param = callback_param;
+
+- return adc_enqueue_request(pcf, req);
++ ret = adc_enqueue_request(pcf, req);
++ if (ret)
++ kfree(req);
++
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(pcf50633_adc_async_read);
+
+diff --git a/drivers/mfd/qcom-pm8008.c b/drivers/mfd/qcom-pm8008.c
+index c472d7f8103c4..4af1d368c321b 100644
+--- a/drivers/mfd/qcom-pm8008.c
++++ b/drivers/mfd/qcom-pm8008.c
+@@ -54,13 +54,6 @@ enum {
+
+ #define PM8008_PERIPH_OFFSET(paddr) (paddr - PM8008_PERIPH_0_BASE)
+
+-struct pm8008_data {
+- struct device *dev;
+- struct regmap *regmap;
+- int irq;
+- struct regmap_irq_chip_data *irq_data;
+-};
+-
+ static unsigned int p0_offs[] = {PM8008_PERIPH_OFFSET(PM8008_PERIPH_0_BASE)};
+ static unsigned int p1_offs[] = {PM8008_PERIPH_OFFSET(PM8008_PERIPH_1_BASE)};
+ static unsigned int p2_offs[] = {PM8008_PERIPH_OFFSET(PM8008_PERIPH_2_BASE)};
+@@ -150,7 +143,7 @@ static struct regmap_config qcom_mfd_regmap_cfg = {
+ .max_register = 0xFFFF,
+ };
+
+-static int pm8008_init(struct pm8008_data *chip)
++static int pm8008_init(struct regmap *regmap)
+ {
+ int rc;
+
+@@ -160,34 +153,31 @@ static int pm8008_init(struct pm8008_data *chip)
+ * This is required to enable the writing of TYPE registers in
+ * regmap_irq_sync_unlock().
+ */
+- rc = regmap_write(chip->regmap,
+- (PM8008_TEMP_ALARM_ADDR | INT_SET_TYPE_OFFSET),
+- BIT(0));
++ rc = regmap_write(regmap, (PM8008_TEMP_ALARM_ADDR | INT_SET_TYPE_OFFSET), BIT(0));
+ if (rc)
+ return rc;
+
+ /* Do the same for GPIO1 and GPIO2 peripherals */
+- rc = regmap_write(chip->regmap,
+- (PM8008_GPIO1_ADDR | INT_SET_TYPE_OFFSET), BIT(0));
++ rc = regmap_write(regmap, (PM8008_GPIO1_ADDR | INT_SET_TYPE_OFFSET), BIT(0));
+ if (rc)
+ return rc;
+
+- rc = regmap_write(chip->regmap,
+- (PM8008_GPIO2_ADDR | INT_SET_TYPE_OFFSET), BIT(0));
++ rc = regmap_write(regmap, (PM8008_GPIO2_ADDR | INT_SET_TYPE_OFFSET), BIT(0));
+
+ return rc;
+ }
+
+-static int pm8008_probe_irq_peripherals(struct pm8008_data *chip,
++static int pm8008_probe_irq_peripherals(struct device *dev,
++ struct regmap *regmap,
+ int client_irq)
+ {
+ int rc, i;
+ struct regmap_irq_type *type;
+ struct regmap_irq_chip_data *irq_data;
+
+- rc = pm8008_init(chip);
++ rc = pm8008_init(regmap);
+ if (rc) {
+- dev_err(chip->dev, "Init failed: %d\n", rc);
++ dev_err(dev, "Init failed: %d\n", rc);
+ return rc;
+ }
+
+@@ -207,10 +197,10 @@ static int pm8008_probe_irq_peripherals(struct pm8008_data *chip,
+ IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW);
+ }
+
+- rc = devm_regmap_add_irq_chip(chip->dev, chip->regmap, client_irq,
++ rc = devm_regmap_add_irq_chip(dev, regmap, client_irq,
+ IRQF_SHARED, 0, &pm8008_irq_chip, &irq_data);
+ if (rc) {
+- dev_err(chip->dev, "Failed to add IRQ chip: %d\n", rc);
++ dev_err(dev, "Failed to add IRQ chip: %d\n", rc);
+ return rc;
+ }
+
+@@ -220,32 +210,30 @@ static int pm8008_probe_irq_peripherals(struct pm8008_data *chip,
+ static int pm8008_probe(struct i2c_client *client)
+ {
+ int rc;
+- struct pm8008_data *chip;
+-
+- chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
+- if (!chip)
+- return -ENOMEM;
++ struct device *dev;
++ struct regmap *regmap;
+
+- chip->dev = &client->dev;
+- chip->regmap = devm_regmap_init_i2c(client, &qcom_mfd_regmap_cfg);
+- if (!chip->regmap)
+- return -ENODEV;
++ dev = &client->dev;
++ regmap = devm_regmap_init_i2c(client, &qcom_mfd_regmap_cfg);
++ if (IS_ERR(regmap))
++ return PTR_ERR(regmap);
+
+- i2c_set_clientdata(client, chip);
++ i2c_set_clientdata(client, regmap);
+
+- if (of_property_read_bool(chip->dev->of_node, "interrupt-controller")) {
+- rc = pm8008_probe_irq_peripherals(chip, client->irq);
++ if (of_property_read_bool(dev->of_node, "interrupt-controller")) {
++ rc = pm8008_probe_irq_peripherals(dev, regmap, client->irq);
+ if (rc)
+- dev_err(chip->dev, "Failed to probe irq periphs: %d\n", rc);
++ dev_err(dev, "Failed to probe irq periphs: %d\n", rc);
+ }
+
+- return devm_of_platform_populate(chip->dev);
++ return devm_of_platform_populate(dev);
+ }
+
+ static const struct of_device_id pm8008_match[] = {
+ { .compatible = "qcom,pm8008", },
+ { },
+ };
++MODULE_DEVICE_TABLE(of, pm8008_match);
+
+ static struct i2c_driver pm8008_mfd_driver = {
+ .driver = {
+diff --git a/drivers/mfd/qcom_rpm.c b/drivers/mfd/qcom_rpm.c
+index 71bc34b74bc9c..8fea0e511550a 100644
+--- a/drivers/mfd/qcom_rpm.c
++++ b/drivers/mfd/qcom_rpm.c
+@@ -547,7 +547,7 @@ static int qcom_rpm_probe(struct platform_device *pdev)
+ init_completion(&rpm->ack);
+
+ /* Enable message RAM clock */
+- rpm->ramclk = devm_clk_get(&pdev->dev, "ram");
++ rpm->ramclk = devm_clk_get_enabled(&pdev->dev, "ram");
+ if (IS_ERR(rpm->ramclk)) {
+ ret = PTR_ERR(rpm->ramclk);
+ if (ret == -EPROBE_DEFER)
+@@ -558,7 +558,6 @@ static int qcom_rpm_probe(struct platform_device *pdev)
+ */
+ rpm->ramclk = NULL;
+ }
+- clk_prepare_enable(rpm->ramclk); /* Accepts NULL */
+
+ irq_ack = platform_get_irq_byname(pdev, "ack");
+ if (irq_ack < 0)
+@@ -673,22 +672,11 @@ static int qcom_rpm_probe(struct platform_device *pdev)
+ if (ret)
+ dev_warn(&pdev->dev, "failed to mark wakeup irq as wakeup\n");
+
+- return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+-}
+-
+-static int qcom_rpm_remove(struct platform_device *pdev)
+-{
+- struct qcom_rpm *rpm = dev_get_drvdata(&pdev->dev);
+-
+- of_platform_depopulate(&pdev->dev);
+- clk_disable_unprepare(rpm->ramclk);
+-
+- return 0;
++ return devm_of_platform_populate(&pdev->dev);
+ }
+
+ static struct platform_driver qcom_rpm_driver = {
+ .probe = qcom_rpm_probe,
+- .remove = qcom_rpm_remove,
+ .driver = {
+ .name = "qcom_rpm",
+ .of_match_table = qcom_rpm_of_match,
+diff --git a/drivers/mfd/rt5033.c b/drivers/mfd/rt5033.c
+index f1236a9acf304..df095e91e2666 100644
+--- a/drivers/mfd/rt5033.c
++++ b/drivers/mfd/rt5033.c
+@@ -41,9 +41,6 @@ static const struct mfd_cell rt5033_devs[] = {
+ {
+ .name = "rt5033-charger",
+ .of_compatible = "richtek,rt5033-charger",
+- }, {
+- .name = "rt5033-battery",
+- .of_compatible = "richtek,rt5033-battery",
+ }, {
+ .name = "rt5033-led",
+ .of_compatible = "richtek,rt5033-led",
+diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
+index bc0a2c38653e5..3ac4508a6742a 100644
+--- a/drivers/mfd/sm501.c
++++ b/drivers/mfd/sm501.c
+@@ -1720,7 +1720,12 @@ static struct platform_driver sm501_plat_driver = {
+
+ static int __init sm501_base_init(void)
+ {
+- platform_driver_register(&sm501_plat_driver);
++ int ret;
++
++ ret = platform_driver_register(&sm501_plat_driver);
++ if (ret < 0)
++ return ret;
++
+ return pci_register_driver(&sm501_pci_driver);
+ }
+
+diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c
+index 6b7956604a0f0..9890882db1ed3 100644
+--- a/drivers/mfd/sprd-sc27xx-spi.c
++++ b/drivers/mfd/sprd-sc27xx-spi.c
+@@ -236,6 +236,12 @@ static const struct of_device_id sprd_pmic_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, sprd_pmic_match);
+
++static const struct spi_device_id sprd_pmic_spi_ids[] = {
++ { .name = "sc2731", .driver_data = (unsigned long)&sc2731_data },
++ {},
++};
++MODULE_DEVICE_TABLE(spi, sprd_pmic_spi_ids);
++
+ static struct spi_driver sprd_pmic_driver = {
+ .driver = {
+ .name = "sc27xx-pmic",
+@@ -243,6 +249,7 @@ static struct spi_driver sprd_pmic_driver = {
+ .pm = &sprd_pmic_pm_ops,
+ },
+ .probe = sprd_pmic_probe,
++ .id_table = sprd_pmic_spi_ids,
+ };
+
+ static int __init sprd_pmic_init(void)
+diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c
+index e095a39301423..9852f49c3f881 100644
+--- a/drivers/mfd/stmfx.c
++++ b/drivers/mfd/stmfx.c
+@@ -330,9 +330,8 @@ static int stmfx_chip_init(struct i2c_client *client)
+ stmfx->vdd = devm_regulator_get_optional(&client->dev, "vdd");
+ ret = PTR_ERR_OR_ZERO(stmfx->vdd);
+ if (ret) {
+- if (ret == -ENODEV)
+- stmfx->vdd = NULL;
+- else
++ stmfx->vdd = NULL;
++ if (ret != -ENODEV)
+ return dev_err_probe(&client->dev, ret, "Failed to get VDD regulator\n");
+ }
+
+@@ -387,7 +386,7 @@ static int stmfx_chip_init(struct i2c_client *client)
+
+ err:
+ if (stmfx->vdd)
+- return regulator_disable(stmfx->vdd);
++ regulator_disable(stmfx->vdd);
+
+ return ret;
+ }
+diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
+index 58d09c615e673..743afbe4e99b7 100644
+--- a/drivers/mfd/stmpe.c
++++ b/drivers/mfd/stmpe.c
+@@ -1498,9 +1498,9 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum)
+
+ int stmpe_remove(struct stmpe *stmpe)
+ {
+- if (!IS_ERR(stmpe->vio))
++ if (!IS_ERR(stmpe->vio) && regulator_is_enabled(stmpe->vio))
+ regulator_disable(stmpe->vio);
+- if (!IS_ERR(stmpe->vcc))
++ if (!IS_ERR(stmpe->vcc) && regulator_is_enabled(stmpe->vcc))
+ regulator_disable(stmpe->vcc);
+
+ __stmpe_disable(stmpe, STMPE_BLOCK_ADC);
+diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
+index 5369c67e3280d..663ffd4b85706 100644
+--- a/drivers/mfd/t7l66xb.c
++++ b/drivers/mfd/t7l66xb.c
+@@ -397,11 +397,8 @@ err_noirq:
+
+ static int t7l66xb_remove(struct platform_device *dev)
+ {
+- struct t7l66xb_platform_data *pdata = dev_get_platdata(&dev->dev);
+ struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+- int ret;
+
+- ret = pdata->disable(dev);
+ clk_disable_unprepare(t7l66xb->clk48m);
+ clk_put(t7l66xb->clk48m);
+ clk_disable_unprepare(t7l66xb->clk32k);
+@@ -412,8 +409,7 @@ static int t7l66xb_remove(struct platform_device *dev)
+ mfd_remove_devices(&dev->dev);
+ kfree(t7l66xb);
+
+- return ret;
+-
++ return 0;
+ }
+
+ static struct platform_driver t7l66xb_platform_driver = {
+diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
+index 6e105cca27d47..67e2707af4bce 100644
+--- a/drivers/mfd/tps65910.c
++++ b/drivers/mfd/tps65910.c
+@@ -436,15 +436,6 @@ static void tps65910_power_off(void)
+
+ tps65910 = dev_get_drvdata(&tps65910_i2c_client->dev);
+
+- /*
+- * The PWR_OFF bit needs to be set separately, before transitioning
+- * to the OFF state. It enables the "sequential" power-off mode on
+- * TPS65911, it's a NO-OP on TPS65910.
+- */
+- if (regmap_set_bits(tps65910->regmap, TPS65910_DEVCTRL,
+- DEVCTRL_PWR_OFF_MASK) < 0)
+- return;
+-
+ regmap_update_bits(tps65910->regmap, TPS65910_DEVCTRL,
+ DEVCTRL_DEV_OFF_MASK | DEVCTRL_DEV_ON_MASK,
+ DEVCTRL_DEV_OFF_MASK);
+@@ -504,6 +495,19 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
+ tps65910_sleepinit(tps65910, pmic_plat_data);
+
+ if (pmic_plat_data->pm_off && !pm_power_off) {
++ /*
++ * The PWR_OFF bit needs to be set separately, before
++ * transitioning to the OFF state. It enables the "sequential"
++ * power-off mode on TPS65911, it's a NO-OP on TPS65910.
++ */
++ ret = regmap_set_bits(tps65910->regmap, TPS65910_DEVCTRL,
++ DEVCTRL_PWR_OFF_MASK);
++ if (ret) {
++ dev_err(&i2c->dev, "failed to set power-off mode: %d\n",
++ ret);
++ return ret;
++ }
++
+ tps65910_i2c_client = i2c;
+ pm_power_off = tps65910_power_off;
+ }
+diff --git a/drivers/mfd/tqmx86.c b/drivers/mfd/tqmx86.c
+index 7ae906ff8e353..fac02875fe7d9 100644
+--- a/drivers/mfd/tqmx86.c
++++ b/drivers/mfd/tqmx86.c
+@@ -16,8 +16,8 @@
+ #include <linux/platform_data/i2c-ocores.h>
+ #include <linux/platform_device.h>
+
+-#define TQMX86_IOBASE 0x160
+-#define TQMX86_IOSIZE 0x3f
++#define TQMX86_IOBASE 0x180
++#define TQMX86_IOSIZE 0x20
+ #define TQMX86_IOBASE_I2C 0x1a0
+ #define TQMX86_IOSIZE_I2C 0xa
+ #define TQMX86_IOBASE_WATCHDOG 0x18b
+@@ -25,14 +25,14 @@
+ #define TQMX86_IOBASE_GPIO 0x18d
+ #define TQMX86_IOSIZE_GPIO 0x4
+
+-#define TQMX86_REG_BOARD_ID 0x20
++#define TQMX86_REG_BOARD_ID 0x00
+ #define TQMX86_REG_BOARD_ID_E38M 1
+ #define TQMX86_REG_BOARD_ID_50UC 2
+ #define TQMX86_REG_BOARD_ID_E38C 3
+ #define TQMX86_REG_BOARD_ID_60EB 4
+-#define TQMX86_REG_BOARD_ID_E39M 5
+-#define TQMX86_REG_BOARD_ID_E39C 6
+-#define TQMX86_REG_BOARD_ID_E39x 7
++#define TQMX86_REG_BOARD_ID_E39MS 5
++#define TQMX86_REG_BOARD_ID_E39C1 6
++#define TQMX86_REG_BOARD_ID_E39C2 7
+ #define TQMX86_REG_BOARD_ID_70EB 8
+ #define TQMX86_REG_BOARD_ID_80UC 9
+ #define TQMX86_REG_BOARD_ID_110EB 11
+@@ -40,18 +40,18 @@
+ #define TQMX86_REG_BOARD_ID_E40S 13
+ #define TQMX86_REG_BOARD_ID_E40C1 14
+ #define TQMX86_REG_BOARD_ID_E40C2 15
+-#define TQMX86_REG_BOARD_REV 0x21
+-#define TQMX86_REG_IO_EXT_INT 0x26
++#define TQMX86_REG_BOARD_REV 0x01
++#define TQMX86_REG_IO_EXT_INT 0x06
+ #define TQMX86_REG_IO_EXT_INT_NONE 0
+ #define TQMX86_REG_IO_EXT_INT_7 1
+ #define TQMX86_REG_IO_EXT_INT_9 2
+ #define TQMX86_REG_IO_EXT_INT_12 3
+ #define TQMX86_REG_IO_EXT_INT_MASK 0x3
+ #define TQMX86_REG_IO_EXT_INT_GPIO_SHIFT 4
++#define TQMX86_REG_SAUC 0x17
+
+-#define TQMX86_REG_I2C_DETECT 0x47
++#define TQMX86_REG_I2C_DETECT 0x1a7
+ #define TQMX86_REG_I2C_DETECT_SOFT 0xa5
+-#define TQMX86_REG_I2C_INT_EN 0x49
+
+ static uint gpio_irq;
+ module_param(gpio_irq, uint, 0);
+@@ -111,7 +111,7 @@ static const struct mfd_cell tqmx86_devs[] = {
+ },
+ };
+
+-static const char *tqmx86_board_id_to_name(u8 board_id)
++static const char *tqmx86_board_id_to_name(u8 board_id, u8 sauc)
+ {
+ switch (board_id) {
+ case TQMX86_REG_BOARD_ID_E38M:
+@@ -122,12 +122,12 @@ static const char *tqmx86_board_id_to_name(u8 board_id)
+ return "TQMxE38C";
+ case TQMX86_REG_BOARD_ID_60EB:
+ return "TQMx60EB";
+- case TQMX86_REG_BOARD_ID_E39M:
+- return "TQMxE39M";
+- case TQMX86_REG_BOARD_ID_E39C:
+- return "TQMxE39C";
+- case TQMX86_REG_BOARD_ID_E39x:
+- return "TQMxE39x";
++ case TQMX86_REG_BOARD_ID_E39MS:
++ return (sauc == 0xff) ? "TQMxE39M" : "TQMxE39S";
++ case TQMX86_REG_BOARD_ID_E39C1:
++ return "TQMxE39C1";
++ case TQMX86_REG_BOARD_ID_E39C2:
++ return "TQMxE39C2";
+ case TQMX86_REG_BOARD_ID_70EB:
+ return "TQMx70EB";
+ case TQMX86_REG_BOARD_ID_80UC:
+@@ -160,9 +160,9 @@ static int tqmx86_board_id_to_clk_rate(struct device *dev, u8 board_id)
+ case TQMX86_REG_BOARD_ID_E40C1:
+ case TQMX86_REG_BOARD_ID_E40C2:
+ return 24000;
+- case TQMX86_REG_BOARD_ID_E39M:
+- case TQMX86_REG_BOARD_ID_E39C:
+- case TQMX86_REG_BOARD_ID_E39x:
++ case TQMX86_REG_BOARD_ID_E39MS:
++ case TQMX86_REG_BOARD_ID_E39C1:
++ case TQMX86_REG_BOARD_ID_E39C2:
+ return 25000;
+ case TQMX86_REG_BOARD_ID_E38M:
+ case TQMX86_REG_BOARD_ID_E38C:
+@@ -176,7 +176,7 @@ static int tqmx86_board_id_to_clk_rate(struct device *dev, u8 board_id)
+
+ static int tqmx86_probe(struct platform_device *pdev)
+ {
+- u8 board_id, rev, i2c_det, io_ext_int_val;
++ u8 board_id, sauc, rev, i2c_det, io_ext_int_val;
+ struct device *dev = &pdev->dev;
+ u8 gpio_irq_cfg, readback;
+ const char *board_name;
+@@ -206,14 +206,20 @@ static int tqmx86_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ board_id = ioread8(io_base + TQMX86_REG_BOARD_ID);
+- board_name = tqmx86_board_id_to_name(board_id);
++ sauc = ioread8(io_base + TQMX86_REG_SAUC);
++ board_name = tqmx86_board_id_to_name(board_id, sauc);
+ rev = ioread8(io_base + TQMX86_REG_BOARD_REV);
+
+ dev_info(dev,
+ "Found %s - Board ID %d, PCB Revision %d, PLD Revision %d\n",
+ board_name, board_id, rev >> 4, rev & 0xf);
+
+- i2c_det = ioread8(io_base + TQMX86_REG_I2C_DETECT);
++ /*
++ * The I2C_DETECT register is in the range assigned to the I2C driver
++ * later, so we don't extend TQMX86_IOSIZE. Use inb() for this one-off
++ * access instead of ioport_map + unmap.
++ */
++ i2c_det = inb(TQMX86_REG_I2C_DETECT);
+
+ if (gpio_irq_cfg) {
+ io_ext_int_val =
+diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
+index d6cd5537126c6..69f9b0336410d 100644
+--- a/drivers/misc/atmel-ssc.c
++++ b/drivers/misc/atmel-ssc.c
+@@ -232,9 +232,9 @@ static int ssc_probe(struct platform_device *pdev)
+ clk_disable_unprepare(ssc->clk);
+
+ ssc->irq = platform_get_irq(pdev, 0);
+- if (!ssc->irq) {
++ if (ssc->irq < 0) {
+ dev_dbg(&pdev->dev, "could not get irq\n");
+- return -ENXIO;
++ return ssc->irq;
+ }
+
+ mutex_lock(&user_lock);
+diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c
+index de6d44a158bba..3f514d77a843f 100644
+--- a/drivers/misc/cardreader/alcor_pci.c
++++ b/drivers/misc/cardreader/alcor_pci.c
+@@ -266,7 +266,7 @@ static int alcor_pci_probe(struct pci_dev *pdev,
+ if (!priv)
+ return -ENOMEM;
+
+- ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL);
++ ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ priv->id = ret;
+@@ -280,7 +280,8 @@ static int alcor_pci_probe(struct pci_dev *pdev,
+ ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot request region\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto error_free_ida;
+ }
+
+ if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+@@ -324,6 +325,8 @@ static int alcor_pci_probe(struct pci_dev *pdev,
+
+ error_release_regions:
+ pci_release_regions(pdev);
++error_free_ida:
++ ida_free(&alcor_pci_idr, priv->id);
+ return ret;
+ }
+
+@@ -337,7 +340,7 @@ static void alcor_pci_remove(struct pci_dev *pdev)
+
+ mfd_remove_devices(&pdev->dev);
+
+- ida_simple_remove(&alcor_pci_idr, priv->id);
++ ida_free(&alcor_pci_idr, priv->id);
+
+ pci_release_regions(pdev);
+ pci_set_drvdata(pdev, NULL);
+diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
+index 4bcfbc9afbac1..0f106d7006251 100644
+--- a/drivers/misc/cardreader/rts5227.c
++++ b/drivers/misc/cardreader/rts5227.c
+@@ -171,7 +171,7 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
+ else
+ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x00);
+
+- if (option->force_clkreq_0)
++ if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
+ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+ FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+ else
+diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c
+index ffc128278613b..282a03520cf52 100644
+--- a/drivers/misc/cardreader/rts5228.c
++++ b/drivers/misc/cardreader/rts5228.c
+@@ -427,17 +427,10 @@ static void rts5228_init_from_cfg(struct rtsx_pcr *pcr)
+ option->ltr_enabled = false;
+ }
+ }
+-
+- if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+- | PM_L1_1_EN | PM_L1_2_EN))
+- option->force_clkreq_0 = false;
+- else
+- option->force_clkreq_0 = true;
+ }
+
+ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
+ {
+- struct rtsx_cr_option *option = &pcr->option;
+
+ rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
+ CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
+@@ -468,17 +461,6 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
+ else
+ rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
+
+- /*
+- * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+- * to drive low, and we forcibly request clock.
+- */
+- if (option->force_clkreq_0)
+- rtsx_pci_write_register(pcr, PETXCFG,
+- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+- else
+- rtsx_pci_write_register(pcr, PETXCFG,
+- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+-
+ rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
+ rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
+ rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
+diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
+index 53f3a1f45c4a7..6b5e4bdf209df 100644
+--- a/drivers/misc/cardreader/rts5249.c
++++ b/drivers/misc/cardreader/rts5249.c
+@@ -302,12 +302,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
+ }
+ }
+
+-
+ /*
+ * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+ * to drive low, and we forcibly request clock.
+ */
+- if (option->force_clkreq_0)
++ if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
+ rtsx_pci_write_register(pcr, PETXCFG,
+ FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+ else
+diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
+index 9b42b20a3e5ae..79b18f6f73a8a 100644
+--- a/drivers/misc/cardreader/rts5260.c
++++ b/drivers/misc/cardreader/rts5260.c
+@@ -517,17 +517,10 @@ static void rts5260_init_from_cfg(struct rtsx_pcr *pcr)
+ option->ltr_enabled = false;
+ }
+ }
+-
+- if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+- | PM_L1_1_EN | PM_L1_2_EN))
+- option->force_clkreq_0 = false;
+- else
+- option->force_clkreq_0 = true;
+ }
+
+ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
+ {
+- struct rtsx_cr_option *option = &pcr->option;
+
+ /* Set mcu_cnt to 7 to ensure data can be sampled properly */
+ rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07);
+@@ -546,17 +539,6 @@ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
+
+ rts5260_init_hw(pcr);
+
+- /*
+- * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+- * to drive low, and we forcibly request clock.
+- */
+- if (option->force_clkreq_0)
+- rtsx_pci_write_register(pcr, PETXCFG,
+- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+- else
+- rtsx_pci_write_register(pcr, PETXCFG,
+- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+-
+ rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
+
+ return 0;
+diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
+index 1fd4e0e507302..2a97eeb0e5097 100644
+--- a/drivers/misc/cardreader/rts5261.c
++++ b/drivers/misc/cardreader/rts5261.c
+@@ -468,17 +468,10 @@ static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
+ option->ltr_enabled = false;
+ }
+ }
+-
+- if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+- | PM_L1_1_EN | PM_L1_2_EN))
+- option->force_clkreq_0 = false;
+- else
+- option->force_clkreq_0 = true;
+ }
+
+ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
+ {
+- struct rtsx_cr_option *option = &pcr->option;
+ u32 val;
+
+ rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
+@@ -524,17 +517,6 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
+ else
+ rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
+
+- /*
+- * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+- * to drive low, and we forcibly request clock.
+- */
+- if (option->force_clkreq_0)
+- rtsx_pci_write_register(pcr, PETXCFG,
+- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+- else
+- rtsx_pci_write_register(pcr, PETXCFG,
+- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+-
+ rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
+ rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
+ rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
+index baf83594a01d3..c0bf747305e22 100644
+--- a/drivers/misc/cardreader/rtsx_pcr.c
++++ b/drivers/misc/cardreader/rtsx_pcr.c
+@@ -1400,8 +1400,11 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
+ return err;
+ }
+
+- if (pcr->aspm_mode == ASPM_MODE_REG)
++ if (pcr->aspm_mode == ASPM_MODE_REG) {
+ rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
++ rtsx_pci_write_register(pcr, PETXCFG,
++ FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
++ }
+
+ /* No CD interrupt if probing driver with card inserted.
+ * So we need to initialize pcr->card_exist here.
+@@ -1581,7 +1584,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
+ pcr->remap_addr = ioremap(base, len);
+ if (!pcr->remap_addr) {
+ ret = -ENOMEM;
+- goto free_handle;
++ goto free_idr;
+ }
+
+ pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev),
+@@ -1651,6 +1654,10 @@ disable_msi:
+ pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+ unmap:
+ iounmap(pcr->remap_addr);
++free_idr:
++ spin_lock(&rtsx_pci_lock);
++ idr_remove(&rtsx_pci_idr, pcr->id);
++ spin_unlock(&rtsx_pci_lock);
+ free_handle:
+ kfree(handle);
+ free_pcr:
+@@ -1803,8 +1810,6 @@ static int rtsx_pci_runtime_suspend(struct device *device)
+ mutex_lock(&pcr->pcr_mutex);
+ rtsx_pci_power_off(pcr, HOST_ENTER_S3);
+
+- free_irq(pcr->irq, (void *)pcr);
+-
+ mutex_unlock(&pcr->pcr_mutex);
+
+ pcr->is_runtime_suspended = true;
+@@ -1825,8 +1830,6 @@ static int rtsx_pci_runtime_resume(struct device *device)
+ mutex_lock(&pcr->pcr_mutex);
+
+ rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
+- rtsx_pci_acquire_irq(pcr);
+- synchronize_irq(pcr->irq);
+
+ if (pcr->ops->fetch_vendor_settings)
+ pcr->ops->fetch_vendor_settings(pcr);
+diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c
+index 59eda55d92a38..f150d8769f198 100644
+--- a/drivers/misc/cardreader/rtsx_usb.c
++++ b/drivers/misc/cardreader/rtsx_usb.c
+@@ -631,16 +631,20 @@ static int rtsx_usb_probe(struct usb_interface *intf,
+
+ ucr->pusb_dev = usb_dev;
+
+- ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE,
+- GFP_KERNEL, &ucr->iobuf_dma);
+- if (!ucr->iobuf)
++ ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL);
++ if (!ucr->cmd_buf)
+ return -ENOMEM;
+
++ ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL);
++ if (!ucr->rsp_buf) {
++ ret = -ENOMEM;
++ goto out_free_cmd_buf;
++ }
++
+ usb_set_intfdata(intf, ucr);
+
+ ucr->vendor_id = id->idVendor;
+ ucr->product_id = id->idProduct;
+- ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf;
+
+ mutex_init(&ucr->dev_mutex);
+
+@@ -667,8 +671,12 @@ static int rtsx_usb_probe(struct usb_interface *intf,
+ return 0;
+
+ out_init_fail:
+- usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf,
+- ucr->iobuf_dma);
++ usb_set_intfdata(ucr->pusb_intf, NULL);
++ kfree(ucr->rsp_buf);
++ ucr->rsp_buf = NULL;
++out_free_cmd_buf:
++ kfree(ucr->cmd_buf);
++ ucr->cmd_buf = NULL;
+ return ret;
+ }
+
+@@ -681,8 +689,12 @@ static void rtsx_usb_disconnect(struct usb_interface *intf)
+ mfd_remove_devices(&intf->dev);
+
+ usb_set_intfdata(ucr->pusb_intf, NULL);
+- usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf,
+- ucr->iobuf_dma);
++
++ kfree(ucr->cmd_buf);
++ ucr->cmd_buf = NULL;
++
++ kfree(ucr->rsp_buf);
++ ucr->rsp_buf = NULL;
+ }
+
+ #ifdef CONFIG_PM
+diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
+index 186308f1f8eba..6334376826a92 100644
+--- a/drivers/misc/cxl/guest.c
++++ b/drivers/misc/cxl/guest.c
+@@ -959,10 +959,10 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
+ * if it returns an error!
+ */
+ if ((rc = cxl_register_afu(afu)))
+- goto err_put1;
++ goto err_put_dev;
+
+ if ((rc = cxl_sysfs_afu_add(afu)))
+- goto err_put1;
++ goto err_del_dev;
+
+ /*
+ * pHyp doesn't expose the programming models supported by the
+@@ -978,7 +978,7 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
+ afu->modes_supported = CXL_MODE_DIRECTED;
+
+ if ((rc = cxl_afu_select_best_mode(afu)))
+- goto err_put2;
++ goto err_remove_sysfs;
+
+ adapter->afu[afu->slice] = afu;
+
+@@ -998,10 +998,12 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
+
+ return 0;
+
+-err_put2:
++err_remove_sysfs:
+ cxl_sysfs_afu_remove(afu);
+-err_put1:
+- device_unregister(&afu->dev);
++err_del_dev:
++ device_del(&afu->dev);
++err_put_dev:
++ put_device(&afu->dev);
+ free = false;
+ guest_release_serr_irq(afu);
+ err2:
+@@ -1135,18 +1137,20 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic
+ * even if it returns an error!
+ */
+ if ((rc = cxl_register_adapter(adapter)))
+- goto err_put1;
++ goto err_put_dev;
+
+ if ((rc = cxl_sysfs_adapter_add(adapter)))
+- goto err_put1;
++ goto err_del_dev;
+
+ /* release the context lock as the adapter is configured */
+ cxl_adapter_context_unlock(adapter);
+
+ return adapter;
+
+-err_put1:
+- device_unregister(&adapter->dev);
++err_del_dev:
++ device_del(&adapter->dev);
++err_put_dev:
++ put_device(&adapter->dev);
+ free = false;
+ cxl_guest_remove_chardev(adapter);
+ err1:
+diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
+index 4cb829d5d873c..2e4dcfebf19af 100644
+--- a/drivers/misc/cxl/irq.c
++++ b/drivers/misc/cxl/irq.c
+@@ -349,6 +349,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
+
+ out:
+ cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
++ bitmap_free(ctx->irq_bitmap);
+ afu_irq_name_free(ctx);
+ return -ENOMEM;
+ }
+diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
+index 2ba899f5659ff..d183836d80e3f 100644
+--- a/drivers/misc/cxl/pci.c
++++ b/drivers/misc/cxl/pci.c
+@@ -387,6 +387,7 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
+ rc = get_phb_index(np, phb_index);
+ if (rc) {
+ pr_err("cxl: invalid phb index\n");
++ of_node_put(np);
+ return rc;
+ }
+
+@@ -1164,10 +1165,10 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+ * if it returns an error!
+ */
+ if ((rc = cxl_register_afu(afu)))
+- goto err_put1;
++ goto err_put_dev;
+
+ if ((rc = cxl_sysfs_afu_add(afu)))
+- goto err_put1;
++ goto err_del_dev;
+
+ adapter->afu[afu->slice] = afu;
+
+@@ -1176,10 +1177,12 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+
+ return 0;
+
+-err_put1:
++err_del_dev:
++ device_del(&afu->dev);
++err_put_dev:
+ pci_deconfigure_afu(afu);
+ cxl_debugfs_afu_remove(afu);
+- device_unregister(&afu->dev);
++ put_device(&afu->dev);
+ return rc;
+
+ err_free_native:
+@@ -1667,23 +1670,25 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
+ * even if it returns an error!
+ */
+ if ((rc = cxl_register_adapter(adapter)))
+- goto err_put1;
++ goto err_put_dev;
+
+ if ((rc = cxl_sysfs_adapter_add(adapter)))
+- goto err_put1;
++ goto err_del_dev;
+
+ /* Release the context lock as adapter is configured */
+ cxl_adapter_context_unlock(adapter);
+
+ return adapter;
+
+-err_put1:
++err_del_dev:
++ device_del(&adapter->dev);
++err_put_dev:
+ /* This should mirror cxl_remove_adapter, except without the
+ * sysfs parts
+ */
+ cxl_debugfs_adapter_remove(adapter);
+ cxl_deconfigure_adapter(adapter);
+- device_unregister(&adapter->dev);
++ put_device(&adapter->dev);
+ return ERR_PTR(rc);
+
+ err_release:
+diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
+index f0a7531f354c1..2d240bfa819f8 100644
+--- a/drivers/misc/eeprom/Kconfig
++++ b/drivers/misc/eeprom/Kconfig
+@@ -6,6 +6,7 @@ config EEPROM_AT24
+ depends on I2C && SYSFS
+ select NVMEM
+ select NVMEM_SYSFS
++ select REGMAP
+ select REGMAP_I2C
+ help
+ Enable this driver to get read/write support to most I2C EEPROMs
+diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
+index 632325474233a..403243859dce9 100644
+--- a/drivers/misc/eeprom/at25.c
++++ b/drivers/misc/eeprom/at25.c
+@@ -17,8 +17,6 @@
+ #include <linux/spi/spi.h>
+ #include <linux/spi/eeprom.h>
+ #include <linux/property.h>
+-#include <linux/of.h>
+-#include <linux/of_device.h>
+ #include <linux/math.h>
+
+ /*
+@@ -32,6 +30,8 @@
+ */
+
+ #define FM25_SN_LEN 8 /* serial number length */
++#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */
++
+ struct at25_data {
+ struct spi_device *spi;
+ struct mutex lock;
+@@ -40,6 +40,7 @@ struct at25_data {
+ struct nvmem_config nvmem_config;
+ struct nvmem_device *nvmem;
+ u8 sernum[FM25_SN_LEN];
++ u8 command[EE_MAXADDRLEN + 1];
+ };
+
+ #define AT25_WREN 0x06 /* latch the write enable */
+@@ -62,8 +63,6 @@ struct at25_data {
+
+ #define FM25_ID_LEN 9 /* ID length */
+
+-#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */
+-
+ /* Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+@@ -78,7 +77,6 @@ static int at25_ee_read(void *priv, unsigned int offset,
+ {
+ struct at25_data *at25 = priv;
+ char *buf = val;
+- u8 command[EE_MAXADDRLEN + 1];
+ u8 *cp;
+ ssize_t status;
+ struct spi_transfer t[2];
+@@ -92,12 +90,15 @@ static int at25_ee_read(void *priv, unsigned int offset,
+ if (unlikely(!count))
+ return -EINVAL;
+
+- cp = command;
++ cp = at25->command;
+
+ instr = AT25_READ;
+ if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+ if (offset >= (1U << (at25->addrlen * 8)))
+ instr |= AT25_INSTR_BIT3;
++
++ mutex_lock(&at25->lock);
++
+ *cp++ = instr;
+
+ /* 8/16/24-bit address is written MSB first */
+@@ -116,7 +117,7 @@ static int at25_ee_read(void *priv, unsigned int offset,
+ spi_message_init(&m);
+ memset(t, 0, sizeof(t));
+
+- t[0].tx_buf = command;
++ t[0].tx_buf = at25->command;
+ t[0].len = at25->addrlen + 1;
+ spi_message_add_tail(&t[0], &m);
+
+@@ -124,8 +125,6 @@ static int at25_ee_read(void *priv, unsigned int offset,
+ t[1].len = count;
+ spi_message_add_tail(&t[1], &m);
+
+- mutex_lock(&at25->lock);
+-
+ /* Read it all at once.
+ *
+ * REVISIT that's potentially a problem with large chips, if
+@@ -153,7 +152,7 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
+ spi_message_init(&m);
+ memset(t, 0, sizeof(t));
+
+- t[0].tx_buf = &command;
++ t[0].tx_buf = at25->command;
+ t[0].len = 1;
+ spi_message_add_tail(&t[0], &m);
+
+@@ -163,6 +162,8 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
+
+ mutex_lock(&at25->lock);
+
++ at25->command[0] = command;
++
+ status = spi_sync(at25->spi, &m);
+ dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status);
+
+@@ -376,28 +377,31 @@ MODULE_DEVICE_TABLE(spi, at25_spi_ids);
+ static int at25_probe(struct spi_device *spi)
+ {
+ struct at25_data *at25 = NULL;
+- struct spi_eeprom chip;
+ int err;
+ int sr;
+ u8 id[FM25_ID_LEN];
+ u8 sernum[FM25_SN_LEN];
++ bool is_fram;
+ int i;
+- const struct of_device_id *match;
+- bool is_fram = 0;
+
+- match = of_match_device(of_match_ptr(at25_of_match), &spi->dev);
+- if (match && !strcmp(match->compatible, "cypress,fm25"))
+- is_fram = 1;
++ err = device_property_match_string(&spi->dev, "compatible", "cypress,fm25");
++ if (err >= 0)
++ is_fram = true;
++ else
++ is_fram = false;
++
++ at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL);
++ if (!at25)
++ return -ENOMEM;
+
+ /* Chip description */
+- if (!spi->dev.platform_data) {
+- if (!is_fram) {
+- err = at25_fw_to_chip(&spi->dev, &chip);
+- if (err)
+- return err;
+- }
+- } else
+- chip = *(struct spi_eeprom *)spi->dev.platform_data;
++ if (spi->dev.platform_data) {
++ memcpy(&at25->chip, spi->dev.platform_data, sizeof(at25->chip));
++ } else if (!is_fram) {
++ err = at25_fw_to_chip(&spi->dev, &at25->chip);
++ if (err)
++ return err;
++ }
+
+ /* Ping the chip ... the status register is pretty portable,
+ * unlike probing manufacturer IDs. We do expect that system
+@@ -409,12 +413,7 @@ static int at25_probe(struct spi_device *spi)
+ return -ENXIO;
+ }
+
+- at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL);
+- if (!at25)
+- return -ENOMEM;
+-
+ mutex_init(&at25->lock);
+- at25->chip = chip;
+ at25->spi = spi;
+ spi_set_drvdata(spi, at25);
+
+@@ -431,7 +430,7 @@ static int at25_probe(struct spi_device *spi)
+ dev_err(&spi->dev, "Error: unsupported size (id %02x)\n", id[7]);
+ return -ENODEV;
+ }
+- chip.byte_len = int_pow(2, id[7] - 0x21 + 4) * 1024;
++ at25->chip.byte_len = int_pow(2, id[7] - 0x21 + 4) * 1024;
+
+ if (at25->chip.byte_len > 64 * 1024)
+ at25->chip.flags |= EE_ADDR3;
+@@ -464,7 +463,7 @@ static int at25_probe(struct spi_device *spi)
+ at25->nvmem_config.type = is_fram ? NVMEM_TYPE_FRAM : NVMEM_TYPE_EEPROM;
+ at25->nvmem_config.name = dev_name(&spi->dev);
+ at25->nvmem_config.dev = &spi->dev;
+- at25->nvmem_config.read_only = chip.flags & EE_READONLY;
++ at25->nvmem_config.read_only = at25->chip.flags & EE_READONLY;
+ at25->nvmem_config.root_only = true;
+ at25->nvmem_config.owner = THIS_MODULE;
+ at25->nvmem_config.compat = true;
+@@ -474,17 +473,18 @@ static int at25_probe(struct spi_device *spi)
+ at25->nvmem_config.priv = at25;
+ at25->nvmem_config.stride = 1;
+ at25->nvmem_config.word_size = 1;
+- at25->nvmem_config.size = chip.byte_len;
++ at25->nvmem_config.size = at25->chip.byte_len;
+
+ at25->nvmem = devm_nvmem_register(&spi->dev, &at25->nvmem_config);
+ if (IS_ERR(at25->nvmem))
+ return PTR_ERR(at25->nvmem);
+
+ dev_info(&spi->dev, "%d %s %s %s%s, pagesize %u\n",
+- (chip.byte_len < 1024) ? chip.byte_len : (chip.byte_len / 1024),
+- (chip.byte_len < 1024) ? "Byte" : "KByte",
++ (at25->chip.byte_len < 1024) ?
++ at25->chip.byte_len : (at25->chip.byte_len / 1024),
++ (at25->chip.byte_len < 1024) ? "Byte" : "KByte",
+ at25->chip.name, is_fram ? "fram" : "eeprom",
+- (chip.flags & EE_READONLY) ? " (readonly)" : "",
++ (at25->chip.flags & EE_READONLY) ? " (readonly)" : "",
+ at25->chip.page_size);
+ return 0;
+ }
+diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c
+index bb9c4512c968c..9fbfe784d7101 100644
+--- a/drivers/misc/eeprom/ee1004.c
++++ b/drivers/misc/eeprom/ee1004.c
+@@ -114,6 +114,9 @@ static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf,
+ if (offset + count > EE1004_PAGE_SIZE)
+ count = EE1004_PAGE_SIZE - offset;
+
++ if (count > I2C_SMBUS_BLOCK_MAX)
++ count = I2C_SMBUS_BLOCK_MAX;
++
+ return i2c_smbus_read_i2c_block_data_or_emulated(client, offset, count, buf);
+ }
+
+diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
+index b0cff4b152da8..5298be4cc14c1 100644
+--- a/drivers/misc/eeprom/idt_89hpesx.c
++++ b/drivers/misc/eeprom/idt_89hpesx.c
+@@ -909,14 +909,18 @@ static ssize_t idt_dbgfs_csr_write(struct file *filep, const char __user *ubuf,
+ u32 csraddr, csrval;
+ char *buf;
+
++ if (*offp)
++ return 0;
++
+ /* Copy data from User-space */
+ buf = kmalloc(count + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+- ret = simple_write_to_buffer(buf, count, offp, ubuf, count);
+- if (ret < 0)
++ if (copy_from_user(buf, ubuf, count)) {
++ ret = -EFAULT;
+ goto free_buf;
++ }
+ buf[count] = 0;
+
+ /* Find position of colon in the buffer */
+@@ -1564,12 +1568,20 @@ static struct i2c_driver idt_driver = {
+ */
+ static int __init idt_init(void)
+ {
++ int ret;
++
+ /* Create Debugfs directory first */
+ if (debugfs_initialized())
+ csr_dbgdir = debugfs_create_dir("idt_csr", NULL);
+
+ /* Add new i2c-device driver */
+- return i2c_add_driver(&idt_driver);
++ ret = i2c_add_driver(&idt_driver);
++ if (ret) {
++ debugfs_remove_recursive(csr_dbgdir);
++ return ret;
++ }
++
++ return 0;
+ }
+ module_init(idt_init);
+
+diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
+index ad6ced4546556..24dfc069dbc65 100644
+--- a/drivers/misc/fastrpc.c
++++ b/drivers/misc/fastrpc.c
+@@ -247,6 +247,13 @@ static void fastrpc_free_map(struct kref *ref)
+ dma_buf_put(map->buf);
+ }
+
++ if (map->fl) {
++ spin_lock(&map->fl->lock);
++ list_del(&map->node);
++ spin_unlock(&map->fl->lock);
++ map->fl = NULL;
++ }
++
+ kfree(map);
+ }
+
+@@ -256,10 +263,12 @@ static void fastrpc_map_put(struct fastrpc_map *map)
+ kref_put(&map->refcount, fastrpc_free_map);
+ }
+
+-static void fastrpc_map_get(struct fastrpc_map *map)
++static int fastrpc_map_get(struct fastrpc_map *map)
+ {
+- if (map)
+- kref_get(&map->refcount);
++ if (!map)
++ return -ENOENT;
++
++ return kref_get_unless_zero(&map->refcount) ? 0 : -ENOENT;
+ }
+
+ static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
+@@ -719,16 +728,18 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
+ static u64 fastrpc_get_payload_size(struct fastrpc_invoke_ctx *ctx, int metalen)
+ {
+ u64 size = 0;
+- int i;
++ int oix;
+
+ size = ALIGN(metalen, FASTRPC_ALIGN);
+- for (i = 0; i < ctx->nscalars; i++) {
++ for (oix = 0; oix < ctx->nbufs; oix++) {
++ int i = ctx->olaps[oix].raix;
++
+ if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1) {
+
+- if (ctx->olaps[i].offset == 0)
++ if (ctx->olaps[oix].offset == 0)
+ size = ALIGN(size, FASTRPC_ALIGN);
+
+- size += (ctx->olaps[i].mend - ctx->olaps[i].mstart);
++ size += (ctx->olaps[oix].mend - ctx->olaps[oix].mstart);
+ }
+ }
+
+@@ -1097,7 +1108,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl,
+
+ sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE, 4, 0);
+ if (init.attrs)
+- sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 6, 0);
++ sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 4, 0);
+
+ err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+ sc, args);
+@@ -1112,12 +1123,7 @@ err_invoke:
+ fl->init_mem = NULL;
+ fastrpc_buf_free(imem);
+ err_alloc:
+- if (map) {
+- spin_lock(&fl->lock);
+- list_del(&map->node);
+- spin_unlock(&fl->lock);
+- fastrpc_map_put(map);
+- }
++ fastrpc_map_put(map);
+ err:
+ kfree(args);
+
+@@ -1194,10 +1200,8 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
+ fastrpc_context_put(ctx);
+ }
+
+- list_for_each_entry_safe(map, m, &fl->maps, node) {
+- list_del(&map->node);
++ list_for_each_entry_safe(map, m, &fl->maps, node)
+ fastrpc_map_put(map);
+- }
+
+ list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
+ list_del(&buf->node);
+@@ -1284,7 +1288,14 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp)
+ }
+
+ if (copy_to_user(argp, &bp, sizeof(bp))) {
+- dma_buf_put(buf->dmabuf);
++ /*
++ * The usercopy failed, but we can't do much about it, as
++ * dma_buf_fd() already called fd_install() and made the
++ * file descriptor accessible for the current process. It
++ * might already be closed and dmabuf no longer valid when
++ * we reach this point. Therefore "leak" the fd and rely on
++ * the process exit path to do any required cleanup.
++ */
+ return -EFAULT;
+ }
+
+@@ -1342,17 +1353,18 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl,
+ struct fastrpc_req_munmap *req)
+ {
+ struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
+- struct fastrpc_buf *buf, *b;
++ struct fastrpc_buf *buf = NULL, *iter, *b;
+ struct fastrpc_munmap_req_msg req_msg;
+ struct device *dev = fl->sctx->dev;
+ int err;
+ u32 sc;
+
+ spin_lock(&fl->lock);
+- list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
+- if ((buf->raddr == req->vaddrout) && (buf->size == req->size))
++ list_for_each_entry_safe(iter, b, &fl->mmaps, node) {
++ if ((iter->raddr == req->vaddrout) && (iter->size == req->size)) {
++ buf = iter;
+ break;
+- buf = NULL;
++ }
+ }
+ spin_unlock(&fl->lock);
+
+@@ -1540,7 +1552,12 @@ static int fastrpc_cb_probe(struct platform_device *pdev)
+ of_property_read_u32(dev->of_node, "qcom,nsessions", &sessions);
+
+ spin_lock_irqsave(&cctx->lock, flags);
+- sess = &cctx->session[cctx->sesscount];
++ if (cctx->sesscount >= FASTRPC_MAX_SESSIONS) {
++ dev_err(&pdev->dev, "too many sessions\n");
++ spin_unlock_irqrestore(&cctx->lock, flags);
++ return -ENOSPC;
++ }
++ sess = &cctx->session[cctx->sesscount++];
+ sess->used = false;
+ sess->valid = true;
+ sess->dev = dev;
+@@ -1553,13 +1570,12 @@ static int fastrpc_cb_probe(struct platform_device *pdev)
+ struct fastrpc_session_ctx *dup_sess;
+
+ for (i = 1; i < sessions; i++) {
+- if (cctx->sesscount++ >= FASTRPC_MAX_SESSIONS)
++ if (cctx->sesscount >= FASTRPC_MAX_SESSIONS)
+ break;
+- dup_sess = &cctx->session[cctx->sesscount];
++ dup_sess = &cctx->session[cctx->sesscount++];
+ memcpy(dup_sess, sess, sizeof(*dup_sess));
+ }
+ }
+- cctx->sesscount++;
+ spin_unlock_irqrestore(&cctx->lock, flags);
+ rc = dma_set_mask(dev, DMA_BIT_MASK(32));
+ if (rc) {
+@@ -1661,8 +1677,10 @@ static void fastrpc_notify_users(struct fastrpc_user *user)
+ struct fastrpc_invoke_ctx *ctx;
+
+ spin_lock(&user->lock);
+- list_for_each_entry(ctx, &user->pending, node)
++ list_for_each_entry(ctx, &user->pending, node) {
++ ctx->retval = -EPIPE;
+ complete(&ctx->work);
++ }
+ spin_unlock(&user->lock);
+ }
+
+@@ -1672,7 +1690,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
+ struct fastrpc_user *user;
+ unsigned long flags;
+
++ /* No invocations past this point */
+ spin_lock_irqsave(&cctx->lock, flags);
++ cctx->rpdev = NULL;
+ list_for_each_entry(user, &cctx->users, user)
+ fastrpc_notify_users(user);
+ spin_unlock_irqrestore(&cctx->lock, flags);
+@@ -1680,7 +1700,6 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
+ misc_deregister(&cctx->miscdev);
+ of_platform_depopulate(&rpdev->dev);
+
+- cctx->rpdev = NULL;
+ fastrpc_channel_ctx_put(cctx);
+ }
+
+diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
+index 985f1f3dbd20f..0b46fd22c411b 100644
+--- a/drivers/misc/habanalabs/common/debugfs.c
++++ b/drivers/misc/habanalabs/common/debugfs.c
+@@ -856,6 +856,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
+ pci_set_power_state(hdev->pdev, PCI_D0);
+ pci_restore_state(hdev->pdev);
+ rc = pci_enable_device(hdev->pdev);
++ if (rc < 0)
++ return rc;
+ } else if (value == 2) {
+ pci_save_state(hdev->pdev);
+ pci_disable_device(hdev->pdev);
+diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
+index 8d2568c63f19e..a8e683964ab03 100644
+--- a/drivers/misc/habanalabs/common/firmware_if.c
++++ b/drivers/misc/habanalabs/common/firmware_if.c
+@@ -1703,6 +1703,9 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
+ return rc;
+ }
+
++ /* here we can mark the descriptor as valid as the content has been validated */
++ fw_loader->dynamic_loader.fw_desc_valid = true;
++
+ return 0;
+ }
+
+@@ -1759,7 +1762,13 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev,
+ return rc;
+ }
+
+- /* extract address copy the descriptor from */
++ /*
++ * extract address to copy the descriptor from
++ * in addition, as the descriptor value is going to be over-ridden by new data- we mark it
++ * as invalid.
++ * it will be marked again as valid once validated
++ */
++ fw_loader->dynamic_loader.fw_desc_valid = false;
+ src = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
+ response->ram_offset;
+ memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc));
+@@ -2239,6 +2248,9 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
+ dev_info(hdev->dev,
+ "Loading firmware to device, may take some time...\n");
+
++ /* initialize FW descriptor as invalid */
++ fw_loader->dynamic_loader.fw_desc_valid = false;
++
+ /*
+ * In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
+ * It will be updated from FW after hl_fw_dynamic_request_descriptor().
+@@ -2325,7 +2337,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
+ return 0;
+
+ protocol_err:
+- fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
++ if (fw_loader->dynamic_loader.fw_desc_valid)
++ fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
+ le32_to_cpu(dyn_regs->cpu_boot_err1),
+ le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
+ le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
+diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
+index bebebcb163ee8..dfcd87b98ca08 100644
+--- a/drivers/misc/habanalabs/common/habanalabs.h
++++ b/drivers/misc/habanalabs/common/habanalabs.h
+@@ -992,6 +992,7 @@ struct fw_response {
+ * @image_region: region to copy the FW image to
+ * @fw_image_size: size of FW image to load
+ * @wait_for_bl_timeout: timeout for waiting for boot loader to respond
++ * @fw_desc_valid: true if FW descriptor has been validated and hence the data can be used
+ */
+ struct dynamic_fw_load_mgr {
+ struct fw_response response;
+@@ -999,6 +1000,7 @@ struct dynamic_fw_load_mgr {
+ struct pci_mem_region *image_region;
+ size_t fw_image_size;
+ u32 wait_for_bl_timeout;
++ bool fw_desc_valid;
+ };
+
+ /**
+diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+index 0f536f79dd9c9..e68e9f71c546a 100644
+--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
++++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+@@ -467,7 +467,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
+ {
+ /* MMU H/W fini was already done in device hw_fini() */
+
+- if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
++ if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
+ kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+
+diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
+index 14da87b38e835..8132200dca67f 100644
+--- a/drivers/misc/habanalabs/gaudi/gaudi.c
++++ b/drivers/misc/habanalabs/gaudi/gaudi.c
+@@ -3318,19 +3318,19 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
+ u32 nic_qm_err_cfg, irq_handler_offset;
+ u32 q_off;
+
+- mtr_base_en_lo = lower_32_bits(CFG_BASE +
++ mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+ mtr_base_en_hi = upper_32_bits(CFG_BASE +
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+- so_base_en_lo = lower_32_bits(CFG_BASE +
++ so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
+ so_base_en_hi = upper_32_bits(CFG_BASE +
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
+- mtr_base_ws_lo = lower_32_bits(CFG_BASE +
++ mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
+ mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+ mtr_base_ws_hi = upper_32_bits(CFG_BASE +
+ mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
+- so_base_ws_lo = lower_32_bits(CFG_BASE +
++ so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
+ mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
+ so_base_ws_hi = upper_32_bits(CFG_BASE +
+ mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
+@@ -5744,15 +5744,17 @@ static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
+ {
+ struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
+ struct gaudi_device *gaudi = hdev->asic_specific;
+- u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
+- ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
++ u32 nic_queue_offset, nic_mask_q_id;
+
+ if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
+- (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
+- (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
+- dev_err(hdev->dev, "h/w queue %d is disabled\n",
+- parser->hw_queue_id);
+- return -EINVAL;
++ (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
++ nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
++ nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
++
++ if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
++ dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
++ return -EINVAL;
++ }
+ }
+
+ /* For internal queue jobs just check if CB address is valid */
+diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
+index 67c5b452dd356..88b91ad8e5413 100644
+--- a/drivers/misc/kgdbts.c
++++ b/drivers/misc/kgdbts.c
+@@ -1070,10 +1070,10 @@ static int kgdbts_option_setup(char *opt)
+ {
+ if (strlen(opt) >= MAX_CONFIG_LEN) {
+ printk(KERN_ERR "kgdbts: config string too long\n");
+- return -ENOSPC;
++ return 1;
+ }
+ strcpy(config, opt);
+- return 0;
++ return 1;
+ }
+
+ __setup("kgdbts=", kgdbts_option_setup);
+diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c
+index 0f54730c7ed56..98828030b5a4d 100644
+--- a/drivers/misc/lattice-ecp3-config.c
++++ b/drivers/misc/lattice-ecp3-config.c
+@@ -76,12 +76,12 @@ static void firmware_load(const struct firmware *fw, void *context)
+
+ if (fw == NULL) {
+ dev_err(&spi->dev, "Cannot load firmware, aborting\n");
+- return;
++ goto out;
+ }
+
+ if (fw->size == 0) {
+ dev_err(&spi->dev, "Error: Firmware size is 0!\n");
+- return;
++ goto out;
+ }
+
+ /* Fill dummy data (24 stuffing bits for commands) */
+@@ -103,7 +103,7 @@ static void firmware_load(const struct firmware *fw, void *context)
+ dev_err(&spi->dev,
+ "Error: No supported FPGA detected (JEDEC_ID=%08x)!\n",
+ jedec_id);
+- return;
++ goto out;
+ }
+
+ dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name);
+@@ -116,7 +116,7 @@ static void firmware_load(const struct firmware *fw, void *context)
+ buffer = kzalloc(fw->size + 8, GFP_KERNEL);
+ if (!buffer) {
+ dev_err(&spi->dev, "Error: Can't allocate memory!\n");
+- return;
++ goto out;
+ }
+
+ /*
+@@ -155,7 +155,7 @@ static void firmware_load(const struct firmware *fw, void *context)
+ "Error: Timeout waiting for FPGA to clear (status=%08x)!\n",
+ status);
+ kfree(buffer);
+- return;
++ goto out;
+ }
+
+ dev_info(&spi->dev, "Configuring the FPGA...\n");
+@@ -181,7 +181,7 @@ static void firmware_load(const struct firmware *fw, void *context)
+ release_firmware(fw);
+
+ kfree(buffer);
+-
++out:
+ complete(&data->fw_loaded);
+ }
+
+diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
+index aa12097668d33..e2984ce51fe4d 100644
+--- a/drivers/misc/lkdtm/Makefile
++++ b/drivers/misc/lkdtm/Makefile
+@@ -20,7 +20,7 @@ CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO)
+
+ OBJCOPYFLAGS :=
+ OBJCOPYFLAGS_rodata_objcopy.o := \
+- --rename-section .noinstr.text=.rodata,alloc,readonly,load
++ --rename-section .noinstr.text=.rodata,alloc,readonly,load,contents
+ targets += rodata.o rodata_objcopy.o
+ $(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE
+ $(call if_changed,objcopy)
+diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
+index 4282b625200f5..fac4a811b97b6 100644
+--- a/drivers/misc/lkdtm/bugs.c
++++ b/drivers/misc/lkdtm/bugs.c
+@@ -248,6 +248,11 @@ void lkdtm_ARRAY_BOUNDS(void)
+
+ not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL);
+ checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL);
++ if (!not_checked || !checked) {
++ kfree(not_checked);
++ kfree(checked);
++ return;
++ }
+
+ pr_info("Array access within bounds ...\n");
+ /* For both, touch all bytes in the actual member size. */
+@@ -267,7 +272,10 @@ void lkdtm_ARRAY_BOUNDS(void)
+ kfree(not_checked);
+ kfree(checked);
+ pr_err("FAIL: survived array bounds overflow!\n");
+- pr_expected_config(CONFIG_UBSAN_BOUNDS);
++ if (IS_ENABLED(CONFIG_UBSAN_BOUNDS))
++ pr_expected_config(CONFIG_UBSAN_TRAP);
++ else
++ pr_expected_config(CONFIG_UBSAN_BOUNDS);
+ }
+
+ void lkdtm_CORRUPT_LIST_ADD(void)
+diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
+index c212a253edde0..ef9a24aabfc3f 100644
+--- a/drivers/misc/lkdtm/lkdtm.h
++++ b/drivers/misc/lkdtm/lkdtm.h
+@@ -9,19 +9,19 @@
+ extern char *lkdtm_kernel_info;
+
+ #define pr_expected_config(kconfig) \
+-{ \
++do { \
+ if (IS_ENABLED(kconfig)) \
+ pr_err("Unexpected! This %s was built with " #kconfig "=y\n", \
+ lkdtm_kernel_info); \
+ else \
+ pr_warn("This is probably expected, since this %s was built *without* " #kconfig "=y\n", \
+ lkdtm_kernel_info); \
+-}
++} while (0)
+
+ #ifndef MODULE
+ int lkdtm_check_bool_cmdline(const char *param);
+ #define pr_expected_config_param(kconfig, param) \
+-{ \
++do { \
+ if (IS_ENABLED(kconfig)) { \
+ switch (lkdtm_check_bool_cmdline(param)) { \
+ case 0: \
+@@ -52,7 +52,7 @@ int lkdtm_check_bool_cmdline(const char *param);
+ break; \
+ } \
+ } \
+-}
++} while (0)
+ #else
+ #define pr_expected_config_param(kconfig, param) pr_expected_config(kconfig)
+ #endif
+diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
+index 9161ce7ed47a6..3fead5efe523a 100644
+--- a/drivers/misc/lkdtm/usercopy.c
++++ b/drivers/misc/lkdtm/usercopy.c
+@@ -30,12 +30,12 @@ static const unsigned char test_text[] = "This is a test.\n";
+ */
+ static noinline unsigned char *trick_compiler(unsigned char *stack)
+ {
+- return stack + 0;
++ return stack + unconst;
+ }
+
+ static noinline unsigned char *do_usercopy_stack_callee(int value)
+ {
+- unsigned char buf[32];
++ unsigned char buf[128];
+ int i;
+
+ /* Exercise stack to avoid everything living in registers. */
+@@ -43,7 +43,12 @@ static noinline unsigned char *do_usercopy_stack_callee(int value)
+ buf[i] = value & 0xff;
+ }
+
+- return trick_compiler(buf);
++ /*
++ * Put the target buffer in the middle of stack allocation
++ * so that we don't step on future stack users regardless
++ * of stack growth direction.
++ */
++ return trick_compiler(&buf[(128/2)-32]);
+ }
+
+ static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
+@@ -66,6 +71,12 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
+ bad_stack -= sizeof(unsigned long);
+ }
+
++#ifdef ARCH_HAS_CURRENT_STACK_POINTER
++ pr_info("stack : %px\n", (void *)current_stack_pointer);
++#endif
++ pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack));
++ pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack));
++
+ user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0);
+diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
+index 67844089db216..9d082287dbe02 100644
+--- a/drivers/misc/mei/bus-fixup.c
++++ b/drivers/misc/mei/bus-fixup.c
+@@ -175,7 +175,7 @@ static int mei_fwver(struct mei_cl_device *cldev)
+ ret = __mei_cl_send(cldev->cl, (u8 *)&req, sizeof(req), 0,
+ MEI_CL_IO_TX_BLOCKING);
+ if (ret < 0) {
+- dev_err(&cldev->dev, "Could not send ReqFWVersion cmd\n");
++ dev_err(&cldev->dev, "Could not send ReqFWVersion cmd ret = %d\n", ret);
+ return ret;
+ }
+
+@@ -187,7 +187,7 @@ static int mei_fwver(struct mei_cl_device *cldev)
+ * Should be at least one version block,
+ * error out if nothing found
+ */
+- dev_err(&cldev->dev, "Could not read FW version\n");
++ dev_err(&cldev->dev, "Could not read FW version ret = %d\n", bytes_recv);
+ return -EIO;
+ }
+
+@@ -337,7 +337,7 @@ static int mei_nfc_if_version(struct mei_cl *cl,
+ ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(cmd), 0,
+ MEI_CL_IO_TX_BLOCKING);
+ if (ret < 0) {
+- dev_err(bus->dev, "Could not send IF version cmd\n");
++ dev_err(bus->dev, "Could not send IF version cmd ret = %d\n", ret);
+ return ret;
+ }
+
+@@ -352,7 +352,7 @@ static int mei_nfc_if_version(struct mei_cl *cl,
+ bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length, &vtag,
+ 0, 0);
+ if (bytes_recv < 0 || (size_t)bytes_recv < if_version_length) {
+- dev_err(bus->dev, "Could not read IF version\n");
++ dev_err(bus->dev, "Could not read IF version ret = %d\n", bytes_recv);
+ ret = -EIO;
+ goto err;
+ }
+diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
+index be41843df75bc..cf2b8261da144 100644
+--- a/drivers/misc/mei/hbm.c
++++ b/drivers/misc/mei/hbm.c
+@@ -672,10 +672,14 @@ static void mei_hbm_cl_dma_map_res(struct mei_device *dev,
+ if (!cl)
+ return;
+
+- dev_dbg(dev->dev, "cl dma map result = %d\n", res->status);
+- cl->status = res->status;
+- if (!cl->status)
++ if (res->status) {
++ dev_err(dev->dev, "cl dma map failed %d\n", res->status);
++ cl->status = -EFAULT;
++ } else {
++ dev_dbg(dev->dev, "cl dma map succeeded\n");
+ cl->dma_mapped = 1;
++ cl->status = 0;
++ }
+ wake_up(&cl->wait);
+ }
+
+@@ -698,10 +702,14 @@ static void mei_hbm_cl_dma_unmap_res(struct mei_device *dev,
+ if (!cl)
+ return;
+
+- dev_dbg(dev->dev, "cl dma unmap result = %d\n", res->status);
+- cl->status = res->status;
+- if (!cl->status)
++ if (res->status) {
++ dev_err(dev->dev, "cl dma unmap failed %d\n", res->status);
++ cl->status = -EFAULT;
++ } else {
++ dev_dbg(dev->dev, "cl dma unmap succeeded\n");
+ cl->dma_mapped = 0;
++ cl->status = 0;
++ }
+ wake_up(&cl->wait);
+ }
+
+@@ -1343,7 +1351,8 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
+
+ if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+ dev->hbm_state != MEI_HBM_CAP_SETUP) {
+- if (dev->dev_state == MEI_DEV_POWER_DOWN) {
++ if (dev->dev_state == MEI_DEV_POWER_DOWN ||
++ dev->dev_state == MEI_DEV_POWERING_DOWN) {
+ dev_dbg(dev->dev, "hbm: capabilities response: on shutdown, ignoring\n");
+ return 0;
+ }
+diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c
+index ec2a4fce85818..5c4295d366eae 100644
+--- a/drivers/misc/mei/hdcp/mei_hdcp.c
++++ b/drivers/misc/mei/hdcp/mei_hdcp.c
+@@ -859,8 +859,8 @@ static void mei_hdcp_remove(struct mei_cl_device *cldev)
+ dev_warn(&cldev->dev, "mei_cldev_disable() failed\n");
+ }
+
+-#define MEI_UUID_HDCP GUID_INIT(0xB638AB7E, 0x94E2, 0x4EA2, 0xA5, \
+- 0x52, 0xD1, 0xC5, 0x4B, 0x62, 0x7F, 0x04)
++#define MEI_UUID_HDCP UUID_LE(0xB638AB7E, 0x94E2, 0x4EA2, 0xA5, \
++ 0x52, 0xD1, 0xC5, 0x4B, 0x62, 0x7F, 0x04)
+
+ static const struct mei_cl_device_id mei_hdcp_tbl[] = {
+ { .uuid = MEI_UUID_HDCP, .version = MEI_CL_VERSION_ANY },
+diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
+index 67bb6a25fd0a0..609519571545f 100644
+--- a/drivers/misc/mei/hw-me-regs.h
++++ b/drivers/misc/mei/hw-me-regs.h
+@@ -107,6 +107,11 @@
+ #define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */
+ #define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */
+ #define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */
++#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */
++
++#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */
++
++#define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */
+
+ /*
+ * MEI HW Section
+@@ -120,6 +125,7 @@
+ #define PCI_CFG_HFS_2 0x48
+ #define PCI_CFG_HFS_3 0x60
+ # define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070
++# define PCI_CFG_HFS_3_FW_SKU_IGN 0x00000000
+ # define PCI_CFG_HFS_3_FW_SKU_SPS 0x00000060
+ #define PCI_CFG_HFS_4 0x64
+ #define PCI_CFG_HFS_5 0x68
+diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
+index d3a6c07286451..fbc4c95818645 100644
+--- a/drivers/misc/mei/hw-me.c
++++ b/drivers/misc/mei/hw-me.c
+@@ -1405,16 +1405,16 @@ static bool mei_me_fw_type_sps_4(const struct pci_dev *pdev)
+ .quirk_probe = mei_me_fw_type_sps_4
+
+ /**
+- * mei_me_fw_type_sps() - check for sps sku
++ * mei_me_fw_type_sps_ign() - check for sps or ign sku
+ *
+- * Read ME FW Status register to check for SPS Firmware.
+- * The SPS FW is only signaled in pci function 0
++ * Read ME FW Status register to check for SPS or IGN Firmware.
++ * The SPS/IGN FW is only signaled in pci function 0
+ *
+ * @pdev: pci device
+ *
+- * Return: true in case of SPS firmware
++ * Return: true in case of SPS/IGN firmware
+ */
+-static bool mei_me_fw_type_sps(const struct pci_dev *pdev)
++static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev)
+ {
+ u32 reg;
+ u32 fw_type;
+@@ -1427,14 +1427,15 @@ static bool mei_me_fw_type_sps(const struct pci_dev *pdev)
+
+ dev_dbg(&pdev->dev, "fw type is %d\n", fw_type);
+
+- return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS;
++ return fw_type == PCI_CFG_HFS_3_FW_SKU_IGN ||
++ fw_type == PCI_CFG_HFS_3_FW_SKU_SPS;
+ }
+
+ #define MEI_CFG_KIND_ITOUCH \
+ .kind = "itouch"
+
+-#define MEI_CFG_FW_SPS \
+- .quirk_probe = mei_me_fw_type_sps
++#define MEI_CFG_FW_SPS_IGN \
++ .quirk_probe = mei_me_fw_type_sps_ign
+
+ #define MEI_CFG_FW_VER_SUPP \
+ .fw_ver_supported = 1
+@@ -1535,7 +1536,7 @@ static const struct mei_cfg mei_me_pch12_sps_cfg = {
+ MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_VER_SUPP,
+ MEI_CFG_DMA_128,
+- MEI_CFG_FW_SPS,
++ MEI_CFG_FW_SPS_IGN,
+ };
+
+ /* Cannon Lake itouch with quirk for SPS 5.0 and newer Firmware exclusion
+@@ -1545,7 +1546,7 @@ static const struct mei_cfg mei_me_pch12_itouch_sps_cfg = {
+ MEI_CFG_KIND_ITOUCH,
+ MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_VER_SUPP,
+- MEI_CFG_FW_SPS,
++ MEI_CFG_FW_SPS_IGN,
+ };
+
+ /* Tiger Lake and newer devices */
+@@ -1562,7 +1563,7 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = {
+ MEI_CFG_FW_VER_SUPP,
+ MEI_CFG_DMA_128,
+ MEI_CFG_TRC,
+- MEI_CFG_FW_SPS,
++ MEI_CFG_FW_SPS_IGN,
+ };
+
+ /*
+diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
+index a67f4f2d33a93..0706322154cbe 100644
+--- a/drivers/misc/mei/interrupt.c
++++ b/drivers/misc/mei/interrupt.c
+@@ -424,31 +424,26 @@ int mei_irq_read_handler(struct mei_device *dev,
+ list_for_each_entry(cl, &dev->file_list, link) {
+ if (mei_cl_hbm_equal(cl, mei_hdr)) {
+ cl_dbg(dev, cl, "got a message\n");
+- break;
++ ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list);
++ goto reset_slots;
+ }
+ }
+
+ /* if no recipient cl was found we assume corrupted header */
+- if (&cl->link == &dev->file_list) {
+- /* A message for not connected fixed address clients
+- * should be silently discarded
+- * On power down client may be force cleaned,
+- * silently discard such messages
+- */
+- if (hdr_is_fixed(mei_hdr) ||
+- dev->dev_state == MEI_DEV_POWER_DOWN) {
+- mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length);
+- ret = 0;
+- goto reset_slots;
+- }
+- dev_err(dev->dev, "no destination client found 0x%08X\n",
+- dev->rd_msg_hdr[0]);
+- ret = -EBADMSG;
+- goto end;
++ /* A message for not connected fixed address clients
++ * should be silently discarded
++ * On power down client may be force cleaned,
++ * silently discard such messages
++ */
++ if (hdr_is_fixed(mei_hdr) ||
++ dev->dev_state == MEI_DEV_POWER_DOWN) {
++ mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length);
++ ret = 0;
++ goto reset_slots;
+ }
+-
+- ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list);
+-
++ dev_err(dev->dev, "no destination client found 0x%08X\n", dev->rd_msg_hdr[0]);
++ ret = -EBADMSG;
++ goto end;
+
+ reset_slots:
+ /* reset the number of slots and header */
+diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
+index 3a45aaf002ac8..f2765d6b8c043 100644
+--- a/drivers/misc/mei/pci-me.c
++++ b/drivers/misc/mei/pci-me.c
+@@ -113,6 +113,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
++ {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
++
++ {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
++
++ {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)},
+
+ /* required last entry */
+ {0, }
+diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
+index a68738f382521..f1f669efe050d 100644
+--- a/drivers/misc/ocxl/config.c
++++ b/drivers/misc/ocxl/config.c
+@@ -204,6 +204,18 @@ static int read_dvsec_vendor(struct pci_dev *dev)
+ return 0;
+ }
+
++/**
++ * get_dvsec_vendor0() - Find a related PCI device (function 0)
++ * @dev: PCI device to match
++ * @dev0: The PCI device (function 0) found
++ * @out_pos: The position of PCI device (function 0)
++ *
++ * Returns 0 on success, negative on failure.
++ *
++ * NOTE: If it's successful, the reference of dev0 is increased,
++ * so after using it, the callers must call pci_dev_put() to give
++ * up the reference.
++ */
+ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
+ int *out_pos)
+ {
+@@ -213,10 +225,14 @@ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
+ dev = get_function_0(dev);
+ if (!dev)
+ return -1;
++ } else {
++ dev = pci_dev_get(dev);
+ }
+ pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+- if (!pos)
++ if (!pos) {
++ pci_dev_put(dev);
+ return -1;
++ }
+ *dev0 = dev;
+ *out_pos = pos;
+ return 0;
+@@ -233,6 +249,7 @@ int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val)
+
+ pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+ &reset_reload);
++ pci_dev_put(dev0);
+ *val = !!(reset_reload & BIT(0));
+ return 0;
+ }
+@@ -254,6 +271,7 @@ int ocxl_config_set_reset_reload(struct pci_dev *dev, int val)
+ reset_reload &= ~BIT(0);
+ pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+ reset_reload);
++ pci_dev_put(dev0);
+ return 0;
+ }
+
+diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
+index e70525eedaaeb..a199c7ce3f81d 100644
+--- a/drivers/misc/ocxl/file.c
++++ b/drivers/misc/ocxl/file.c
+@@ -259,6 +259,8 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
+ if (IS_ERR(ev_ctx))
+ return PTR_ERR(ev_ctx);
+ rc = ocxl_irq_set_handler(ctx, irq_id, irq_handler, irq_free, ev_ctx);
++ if (rc)
++ eventfd_ctx_put(ev_ctx);
+ break;
+
+ case OCXL_IOCTL_GET_METADATA:
+@@ -541,8 +543,11 @@ int ocxl_file_register_afu(struct ocxl_afu *afu)
+ goto err_put;
+
+ rc = device_register(&info->dev);
+- if (rc)
+- goto err_put;
++ if (rc) {
++ free_minor(info);
++ put_device(&info->dev);
++ return rc;
++ }
+
+ rc = ocxl_sysfs_register_afu(info);
+ if (rc)
+@@ -558,7 +563,9 @@ int ocxl_file_register_afu(struct ocxl_afu *afu)
+
+ err_unregister:
+ ocxl_sysfs_unregister_afu(info); // safe to call even if register failed
++ free_minor(info);
+ device_unregister(&info->dev);
++ return rc;
+ err_put:
+ ocxl_afu_put(afu);
+ free_minor(info);
+diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
+index 2ed7e3aaff3a8..2ad28f1b1461c 100644
+--- a/drivers/misc/pci_endpoint_test.c
++++ b/drivers/misc/pci_endpoint_test.c
+@@ -332,6 +332,22 @@ static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test,
+ return false;
+ }
+
++static int pci_endpoint_test_validate_xfer_params(struct device *dev,
++ struct pci_endpoint_test_xfer_param *param, size_t alignment)
++{
++ if (!param->size) {
++ dev_dbg(dev, "Data size is zero\n");
++ return -EINVAL;
++ }
++
++ if (param->size > SIZE_MAX - alignment) {
++ dev_dbg(dev, "Maximum transfer data size exceeded\n");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
+ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test,
+ unsigned long arg)
+ {
+@@ -363,9 +379,11 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test,
+ return false;
+ }
+
++ err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
++ if (err)
++ return false;
++
+ size = param.size;
+- if (size > SIZE_MAX - alignment)
+- goto err;
+
+ use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+ if (use_dma)
+@@ -497,9 +515,11 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test,
+ return false;
+ }
+
++ err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
++ if (err)
++ return false;
++
+ size = param.size;
+- if (size > SIZE_MAX - alignment)
+- goto err;
+
+ use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+ if (use_dma)
+@@ -595,9 +615,11 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test,
+ return false;
+ }
+
++ err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
++ if (err)
++ return false;
++
+ size = param.size;
+- if (size > SIZE_MAX - alignment)
+- goto err;
+
+ use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+ if (use_dma)
+@@ -706,6 +728,10 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
+ struct pci_dev *pdev = test->pdev;
+
+ mutex_lock(&test->mutex);
++
++ reinit_completion(&test->irq_raised);
++ test->last_irq = -ENODATA;
++
+ switch (cmd) {
+ case PCITEST_BAR:
+ bar = arg;
+@@ -915,6 +941,9 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev)
+ if (id < 0)
+ return;
+
++ pci_endpoint_test_release_irq(test);
++ pci_endpoint_test_free_irq_vectors(test);
++
+ misc_deregister(&test->miscdev);
+ kfree(misc_device->name);
+ kfree(test->name);
+@@ -924,9 +953,6 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev)
+ pci_iounmap(pdev, test->bar[bar]);
+ }
+
+- pci_endpoint_test_release_irq(test);
+- pci_endpoint_test_free_irq_vectors(test);
+-
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ }
+diff --git a/drivers/misc/pvpanic/pvpanic-mmio.c b/drivers/misc/pvpanic/pvpanic-mmio.c
+index be40160849792..61dbff5f0065c 100644
+--- a/drivers/misc/pvpanic/pvpanic-mmio.c
++++ b/drivers/misc/pvpanic/pvpanic-mmio.c
+@@ -100,7 +100,7 @@ static int pvpanic_mmio_probe(struct platform_device *pdev)
+ pi->base = base;
+ pi->capability = PVPANIC_PANICKED | PVPANIC_CRASH_LOADED;
+
+- /* initlize capability by RDPT */
++ /* initialize capability by RDPT */
+ pi->capability &= ioread8(base);
+ pi->events = pi->capability;
+
+diff --git a/drivers/misc/pvpanic/pvpanic.c b/drivers/misc/pvpanic/pvpanic.c
+index bb7aa63685388..b9e6400a574b0 100644
+--- a/drivers/misc/pvpanic/pvpanic.c
++++ b/drivers/misc/pvpanic/pvpanic.c
+@@ -34,7 +34,9 @@ pvpanic_send_event(unsigned int event)
+ {
+ struct pvpanic_instance *pi_cur;
+
+- spin_lock(&pvpanic_lock);
++ if (!spin_trylock(&pvpanic_lock))
++ return;
++
+ list_for_each_entry(pi_cur, &pvpanic_list, list) {
+ if (event & pi_cur->capability & pi_cur->events)
+ iowrite8(event, pi_cur->base);
+@@ -56,9 +58,13 @@ pvpanic_panic_notify(struct notifier_block *nb, unsigned long code,
+ return NOTIFY_DONE;
+ }
+
++/*
++ * Call our notifier very early on panic, deferring the
++ * action taken to the hypervisor.
++ */
+ static struct notifier_block pvpanic_panic_nb = {
+ .notifier_call = pvpanic_panic_notify,
+- .priority = 1, /* let this called before broken drm_fb_helper */
++ .priority = INT_MAX,
+ };
+
+ static void pvpanic_remove(void *param)
+diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
+index d7ef61e602ede..b836936e97471 100644
+--- a/drivers/misc/sgi-gru/grufault.c
++++ b/drivers/misc/sgi-gru/grufault.c
+@@ -648,6 +648,7 @@ int gru_handle_user_call_os(unsigned long cb)
+ if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
+ return -EINVAL;
+
++again:
+ gts = gru_find_lock_gts(cb);
+ if (!gts)
+ return -EINVAL;
+@@ -656,7 +657,11 @@ int gru_handle_user_call_os(unsigned long cb)
+ if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
+ goto exit;
+
+- gru_check_context_placement(gts);
++ if (gru_check_context_placement(gts)) {
++ gru_unlock_gts(gts);
++ gru_unload_context(gts, 1);
++ goto again;
++ }
+
+ /*
+ * CCH may contain stale data if ts_force_cch_reload is set.
+@@ -874,7 +879,11 @@ int gru_set_context_option(unsigned long arg)
+ } else {
+ gts->ts_user_blade_id = req.val1;
+ gts->ts_user_chiplet_id = req.val0;
+- gru_check_context_placement(gts);
++ if (gru_check_context_placement(gts)) {
++ gru_unlock_gts(gts);
++ gru_unload_context(gts, 1);
++ return ret;
++ }
+ }
+ break;
+ case sco_gseg_owner:
+diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
+index 9afda47efbf2e..3a16eb8e03f73 100644
+--- a/drivers/misc/sgi-gru/grumain.c
++++ b/drivers/misc/sgi-gru/grumain.c
+@@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru,
+ * chiplet. Misassignment can occur if the process migrates to a different
+ * blade or if the user changes the selected blade/chiplet.
+ */
+-void gru_check_context_placement(struct gru_thread_state *gts)
++int gru_check_context_placement(struct gru_thread_state *gts)
+ {
+ struct gru_state *gru;
++ int ret = 0;
+
+ /*
+ * If the current task is the context owner, verify that the
+@@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts)
+ * references. Pthread apps use non-owner references to the CBRs.
+ */
+ gru = gts->ts_gru;
++ /*
++ * If gru or gts->ts_tgid_owner isn't initialized properly, return
++ * success to indicate that the caller does not need to unload the
++ * gru context.The caller is responsible for their inspection and
++ * reinitialization if needed.
++ */
+ if (!gru || gts->ts_tgid_owner != current->tgid)
+- return;
++ return ret;
+
+ if (!gru_check_chiplet_assignment(gru, gts)) {
+ STAT(check_context_unload);
+- gru_unload_context(gts, 1);
++ ret = -EINVAL;
+ } else if (gru_retarget_intr(gts)) {
+ STAT(check_context_retarget_intr);
+ }
++
++ return ret;
+ }
+
+
+@@ -934,7 +943,12 @@ again:
+ mutex_lock(&gts->ts_ctxlock);
+ preempt_disable();
+
+- gru_check_context_placement(gts);
++ if (gru_check_context_placement(gts)) {
++ preempt_enable();
++ mutex_unlock(&gts->ts_ctxlock);
++ gru_unload_context(gts, 1);
++ return VM_FAULT_NOPAGE;
++ }
+
+ if (!gts->ts_gru) {
+ STAT(load_user_context);
+diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
+index e4c067c61251b..5c9783150cdfa 100644
+--- a/drivers/misc/sgi-gru/grutables.h
++++ b/drivers/misc/sgi-gru/grutables.h
+@@ -638,7 +638,7 @@ extern int gru_user_flush_tlb(unsigned long arg);
+ extern int gru_user_unload_context(unsigned long arg);
+ extern int gru_get_exception_detail(unsigned long arg);
+ extern int gru_set_context_option(unsigned long address);
+-extern void gru_check_context_placement(struct gru_thread_state *gts);
++extern int gru_check_context_placement(struct gru_thread_state *gts);
+ extern int gru_cpu_fault_map_id(void);
+ extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
+ extern void gru_flush_all_tlb(struct gru_state *gru);
+diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
+index 228f2eb1d4762..2aebbfda104d8 100644
+--- a/drivers/misc/tifm_7xx1.c
++++ b/drivers/misc/tifm_7xx1.c
+@@ -190,7 +190,7 @@ static void tifm_7xx1_switch_media(struct work_struct *work)
+ spin_unlock_irqrestore(&fm->lock, flags);
+ }
+ if (sock)
+- tifm_free_device(&sock->dev);
++ put_device(&sock->dev);
+ }
+ spin_lock_irqsave(&fm->lock, flags);
+ }
+diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
+index 488eeb2811aeb..976d051071dc3 100644
+--- a/drivers/misc/uacce/uacce.c
++++ b/drivers/misc/uacce/uacce.c
+@@ -9,43 +9,38 @@
+
+ static struct class *uacce_class;
+ static dev_t uacce_devt;
+-static DEFINE_MUTEX(uacce_mutex);
+ static DEFINE_XARRAY_ALLOC(uacce_xa);
+
+-static int uacce_start_queue(struct uacce_queue *q)
++/*
++ * If the parent driver or the device disappears, the queue state is invalid and
++ * ops are not usable anymore.
++ */
++static bool uacce_queue_is_valid(struct uacce_queue *q)
+ {
+- int ret = 0;
++ return q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED;
++}
+
+- mutex_lock(&uacce_mutex);
++static int uacce_start_queue(struct uacce_queue *q)
++{
++ int ret;
+
+- if (q->state != UACCE_Q_INIT) {
+- ret = -EINVAL;
+- goto out_with_lock;
+- }
++ if (q->state != UACCE_Q_INIT)
++ return -EINVAL;
+
+ if (q->uacce->ops->start_queue) {
+ ret = q->uacce->ops->start_queue(q);
+ if (ret < 0)
+- goto out_with_lock;
++ return ret;
+ }
+
+ q->state = UACCE_Q_STARTED;
+-
+-out_with_lock:
+- mutex_unlock(&uacce_mutex);
+-
+- return ret;
++ return 0;
+ }
+
+ static int uacce_put_queue(struct uacce_queue *q)
+ {
+ struct uacce_device *uacce = q->uacce;
+
+- mutex_lock(&uacce_mutex);
+-
+- if (q->state == UACCE_Q_ZOMBIE)
+- goto out;
+-
+ if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue)
+ uacce->ops->stop_queue(q);
+
+@@ -54,8 +49,6 @@ static int uacce_put_queue(struct uacce_queue *q)
+ uacce->ops->put_queue(q);
+
+ q->state = UACCE_Q_ZOMBIE;
+-out:
+- mutex_unlock(&uacce_mutex);
+
+ return 0;
+ }
+@@ -65,20 +58,36 @@ static long uacce_fops_unl_ioctl(struct file *filep,
+ {
+ struct uacce_queue *q = filep->private_data;
+ struct uacce_device *uacce = q->uacce;
++ long ret = -ENXIO;
++
++ /*
++ * uacce->ops->ioctl() may take the mmap_lock when copying arg to/from
++ * user. Avoid a circular lock dependency with uacce_fops_mmap(), which
++ * gets called with mmap_lock held, by taking uacce->mutex instead of
++ * q->mutex. Doing this in uacce_fops_mmap() is not possible because
++ * uacce_fops_open() calls iommu_sva_bind_device(), which takes
++ * mmap_lock, while holding uacce->mutex.
++ */
++ mutex_lock(&uacce->mutex);
++ if (!uacce_queue_is_valid(q))
++ goto out_unlock;
+
+ switch (cmd) {
+ case UACCE_CMD_START_Q:
+- return uacce_start_queue(q);
+-
++ ret = uacce_start_queue(q);
++ break;
+ case UACCE_CMD_PUT_Q:
+- return uacce_put_queue(q);
+-
++ ret = uacce_put_queue(q);
++ break;
+ default:
+- if (!uacce->ops->ioctl)
+- return -EINVAL;
+-
+- return uacce->ops->ioctl(q, cmd, arg);
++ if (uacce->ops->ioctl)
++ ret = uacce->ops->ioctl(q, cmd, arg);
++ else
++ ret = -EINVAL;
+ }
++out_unlock:
++ mutex_unlock(&uacce->mutex);
++ return ret;
+ }
+
+ #ifdef CONFIG_COMPAT
+@@ -136,6 +145,13 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
+ if (!q)
+ return -ENOMEM;
+
++ mutex_lock(&uacce->mutex);
++
++ if (!uacce->parent) {
++ ret = -EINVAL;
++ goto out_with_mem;
++ }
++
+ ret = uacce_bind_queue(uacce, q);
+ if (ret)
+ goto out_with_mem;
+@@ -152,10 +168,9 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
+ filep->private_data = q;
+ uacce->inode = inode;
+ q->state = UACCE_Q_INIT;
+-
+- mutex_lock(&uacce->queues_lock);
++ mutex_init(&q->mutex);
+ list_add(&q->list, &uacce->queues);
+- mutex_unlock(&uacce->queues_lock);
++ mutex_unlock(&uacce->mutex);
+
+ return 0;
+
+@@ -163,18 +178,20 @@ out_with_bond:
+ uacce_unbind_queue(q);
+ out_with_mem:
+ kfree(q);
++ mutex_unlock(&uacce->mutex);
+ return ret;
+ }
+
+ static int uacce_fops_release(struct inode *inode, struct file *filep)
+ {
+ struct uacce_queue *q = filep->private_data;
++ struct uacce_device *uacce = q->uacce;
+
+- mutex_lock(&q->uacce->queues_lock);
+- list_del(&q->list);
+- mutex_unlock(&q->uacce->queues_lock);
++ mutex_lock(&uacce->mutex);
+ uacce_put_queue(q);
+ uacce_unbind_queue(q);
++ list_del(&q->list);
++ mutex_unlock(&uacce->mutex);
+ kfree(q);
+
+ return 0;
+@@ -217,10 +234,9 @@ static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+ vma->vm_private_data = q;
+ qfr->type = type;
+
+- mutex_lock(&uacce_mutex);
+-
+- if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) {
+- ret = -EINVAL;
++ mutex_lock(&q->mutex);
++ if (!uacce_queue_is_valid(q)) {
++ ret = -ENXIO;
+ goto out_with_lock;
+ }
+
+@@ -248,12 +264,12 @@ static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+ }
+
+ q->qfrs[type] = qfr;
+- mutex_unlock(&uacce_mutex);
++ mutex_unlock(&q->mutex);
+
+ return ret;
+
+ out_with_lock:
+- mutex_unlock(&uacce_mutex);
++ mutex_unlock(&q->mutex);
+ kfree(qfr);
+ return ret;
+ }
+@@ -262,12 +278,20 @@ static __poll_t uacce_fops_poll(struct file *file, poll_table *wait)
+ {
+ struct uacce_queue *q = file->private_data;
+ struct uacce_device *uacce = q->uacce;
++ __poll_t ret = 0;
++
++ mutex_lock(&q->mutex);
++ if (!uacce_queue_is_valid(q))
++ goto out_unlock;
+
+ poll_wait(file, &q->wait, wait);
++
+ if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q))
+- return EPOLLIN | EPOLLRDNORM;
++ ret = EPOLLIN | EPOLLRDNORM;
+
+- return 0;
++out_unlock:
++ mutex_unlock(&q->mutex);
++ return ret;
+ }
+
+ static const struct file_operations uacce_fops = {
+@@ -450,7 +474,7 @@ struct uacce_device *uacce_alloc(struct device *parent,
+ goto err_with_uacce;
+
+ INIT_LIST_HEAD(&uacce->queues);
+- mutex_init(&uacce->queues_lock);
++ mutex_init(&uacce->mutex);
+ device_initialize(&uacce->dev);
+ uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id);
+ uacce->dev.class = uacce_class;
+@@ -507,13 +531,23 @@ void uacce_remove(struct uacce_device *uacce)
+ if (uacce->inode)
+ unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
+
++ /*
++ * uacce_fops_open() may be running concurrently, even after we remove
++ * the cdev. Holding uacce->mutex ensures that open() does not obtain a
++ * removed uacce device.
++ */
++ mutex_lock(&uacce->mutex);
+ /* ensure no open queue remains */
+- mutex_lock(&uacce->queues_lock);
+ list_for_each_entry_safe(q, next_q, &uacce->queues, list) {
++ /*
++ * Taking q->mutex ensures that fops do not use the defunct
++ * uacce->ops after the queue is disabled.
++ */
++ mutex_lock(&q->mutex);
+ uacce_put_queue(q);
++ mutex_unlock(&q->mutex);
+ uacce_unbind_queue(q);
+ }
+- mutex_unlock(&uacce->queues_lock);
+
+ /* disable sva now since no opened queues */
+ uacce_disable_sva(uacce);
+@@ -521,6 +555,13 @@ void uacce_remove(struct uacce_device *uacce)
+ if (uacce->cdev)
+ cdev_device_del(uacce->cdev, &uacce->dev);
+ xa_erase(&uacce_xa, uacce->dev_id);
++ /*
++ * uacce exists as long as there are open fds, but ops will be freed
++ * now. Ensure that bugs cause NULL deref rather than use-after-free.
++ */
++ uacce->ops = NULL;
++ uacce->parent = NULL;
++ mutex_unlock(&uacce->mutex);
+ put_device(&uacce->dev);
+ }
+ EXPORT_SYMBOL_GPL(uacce_remove);
+diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
+index f1d8ba6d48574..dab8ad9fed6b3 100644
+--- a/drivers/misc/vmw_balloon.c
++++ b/drivers/misc/vmw_balloon.c
+@@ -1711,7 +1711,7 @@ static void __init vmballoon_debugfs_init(struct vmballoon *b)
+ static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
+ {
+ static_key_disable(&balloon_stat_enabled.key);
+- debugfs_remove(debugfs_lookup("vmmemctl", NULL));
++ debugfs_lookup_and_remove("vmmemctl", NULL);
+ kfree(b->stats);
+ b->stats = NULL;
+ }
+diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
+index da1e2a773823e..abe79f6fd2a79 100644
+--- a/drivers/misc/vmw_vmci/vmci_host.c
++++ b/drivers/misc/vmw_vmci/vmci_host.c
+@@ -165,10 +165,16 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
+ static __poll_t vmci_host_poll(struct file *filp, poll_table *wait)
+ {
+ struct vmci_host_dev *vmci_host_dev = filp->private_data;
+- struct vmci_ctx *context = vmci_host_dev->context;
++ struct vmci_ctx *context;
+ __poll_t mask = 0;
+
+ if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
++ /*
++ * Read context only if ct_type == VMCIOBJ_CONTEXT to make
++ * sure that context is initialized
++ */
++ context = vmci_host_dev->context;
++
+ /* Check for VMCI calls to this VM context. */
+ if (wait)
+ poll_wait(filp, &context->host_context.wait_queue,
+@@ -242,6 +248,8 @@ static int vmci_host_setup_notify(struct vmci_ctx *context,
+ context->notify_page = NULL;
+ return VMCI_ERROR_GENERIC;
+ }
++ if (context->notify_page == NULL)
++ return VMCI_ERROR_UNAVAILABLE;
+
+ /*
+ * Map the locked page and set up notify pointer.
+diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
+index 94ebf7f3fd58a..fe67e39d68543 100644
+--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
++++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
+@@ -854,6 +854,7 @@ static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
+ u32 context_id = vmci_get_context_id();
+ struct vmci_event_qp ev;
+
++ memset(&ev, 0, sizeof(ev));
+ ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+ ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID);
+@@ -1467,6 +1468,7 @@ static int qp_notify_peer(bool attach,
+ * kernel.
+ */
+
++ memset(&ev, 0, sizeof(ev));
+ ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
+ ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID);
+diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
+index 431af5e8be2f8..25077a1a3d821 100644
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -133,6 +133,7 @@ struct mmc_blk_data {
+ * track of the current selected device partition.
+ */
+ unsigned int part_curr;
++#define MMC_BLK_PART_INVALID UINT_MAX /* Unknown partition active */
+ int area_type;
+
+ /* debugfs files (only in main mmc_blk_data) */
+@@ -175,7 +176,7 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
+ unsigned int part_type);
+ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
+ struct mmc_card *card,
+- int disable_multi,
++ int recovery_mode,
+ struct mmc_queue *mq);
+ static void mmc_blk_hsq_req_done(struct mmc_request *mrq);
+
+@@ -264,6 +265,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
+ goto out_put;
+ }
+ req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP;
++ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
+ blk_execute_rq(NULL, req, 0);
+ ret = req_to_mmc_queue_req(req)->drv_op_result;
+ blk_put_request(req);
+@@ -609,11 +611,11 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
+
+ if (idata->rpmb || (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
+ /*
+- * Ensure RPMB/R1B command has completed by polling CMD13
+- * "Send Status".
++ * Ensure RPMB/R1B command has completed by polling CMD13 "Send Status". Here we
++ * allow to override the default timeout value if a custom timeout is specified.
+ */
+- err = mmc_poll_for_busy(card, MMC_BLK_TIMEOUT_MS, false,
+- MMC_BUSY_IO);
++ err = mmc_poll_for_busy(card, idata->ic.cmd_timeout_ms ? : MMC_BLK_TIMEOUT_MS,
++ false, MMC_BUSY_IO);
+ }
+
+ return err;
+@@ -655,6 +657,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
+ idatas[0] = idata;
+ req_to_mmc_queue_req(req)->drv_op =
+ rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
++ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
+ req_to_mmc_queue_req(req)->drv_op_data = idatas;
+ req_to_mmc_queue_req(req)->ioc_count = 1;
+ blk_execute_rq(NULL, req, 0);
+@@ -724,6 +727,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
+ }
+ req_to_mmc_queue_req(req)->drv_op =
+ rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
++ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
+ req_to_mmc_queue_req(req)->drv_op_data = idata;
+ req_to_mmc_queue_req(req)->ioc_count = num_of_cmds;
+ blk_execute_rq(NULL, req, 0);
+@@ -984,9 +988,16 @@ static unsigned int mmc_blk_data_timeout_ms(struct mmc_host *host,
+ return ms;
+ }
+
++/*
++ * Attempts to reset the card and get back to the requested partition.
++ * Therefore any error here must result in cancelling the block layer
++ * request, it must not be reattempted without going through the mmc_blk
++ * partition sanity checks.
++ */
+ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host,
+ int type)
+ {
++ struct mmc_blk_data *main_md = dev_get_drvdata(&host->card->dev);
+ int err;
+
+ if (md->reset_done & type)
+@@ -994,23 +1005,22 @@ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host,
+
+ md->reset_done |= type;
+ err = mmc_hw_reset(host);
++ /*
++ * A successful reset will leave the card in the main partition, but
++ * upon failure it might not be, so set it to MMC_BLK_PART_INVALID
++ * in that case.
++ */
++ main_md->part_curr = err ? MMC_BLK_PART_INVALID : main_md->part_type;
++ if (err)
++ return err;
+ /* Ensure we switch back to the correct partition */
+- if (err) {
+- struct mmc_blk_data *main_md =
+- dev_get_drvdata(&host->card->dev);
+- int part_err;
+-
+- main_md->part_curr = main_md->part_type;
+- part_err = mmc_blk_part_switch(host->card, md->part_type);
+- if (part_err) {
+- /*
+- * We have failed to get back into the correct
+- * partition, so we need to abort the whole request.
+- */
+- return -ENODEV;
+- }
+- }
+- return err;
++ if (mmc_blk_part_switch(host->card, md->part_type))
++ /*
++ * We have failed to get back into the correct
++ * partition, so we need to abort the whole request.
++ */
++ return -ENODEV;
++ return 0;
+ }
+
+ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type)
+@@ -1107,6 +1117,11 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
+ nr = blk_rq_sectors(req);
+
+ do {
++ unsigned int erase_arg = card->erase_arg;
++
++ if (mmc_card_broken_sd_discard(card))
++ erase_arg = SD_ERASE_ARG;
++
+ err = 0;
+ if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+@@ -1117,7 +1132,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
+ card->ext_csd.generic_cmd6_time);
+ }
+ if (!err)
+- err = mmc_erase(card, from, nr, card->erase_arg);
++ err = mmc_erase(card, from, nr, erase_arg);
+ } while (err == -EIO && !mmc_blk_reset(md, card->host, type));
+ if (err)
+ status = BLK_STS_IOERR;
+@@ -1285,7 +1300,7 @@ static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq)
+ }
+
+ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq,
+- int disable_multi, bool *do_rel_wr_p,
++ int recovery_mode, bool *do_rel_wr_p,
+ bool *do_data_tag_p)
+ {
+ struct mmc_blk_data *md = mq->blkdata;
+@@ -1351,12 +1366,12 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq,
+ brq->data.blocks--;
+
+ /*
+- * After a read error, we redo the request one sector
++ * After a read error, we redo the request one (native) sector
+ * at a time in order to accurately determine which
+ * sectors can be read successfully.
+ */
+- if (disable_multi)
+- brq->data.blocks = 1;
++ if (recovery_mode)
++ brq->data.blocks = queue_physical_block_size(mq->queue) >> 9;
+
+ /*
+ * Some controllers have HW issues while operating
+@@ -1482,8 +1497,7 @@ void mmc_blk_cqe_recovery(struct mmc_queue *mq)
+ err = mmc_cqe_recovery(host);
+ if (err)
+ mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY);
+- else
+- mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY);
++ mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY);
+
+ pr_debug("%s: CQE recovery done\n", mmc_hostname(host));
+ }
+@@ -1574,7 +1588,7 @@ static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req)
+
+ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
+ struct mmc_card *card,
+- int disable_multi,
++ int recovery_mode,
+ struct mmc_queue *mq)
+ {
+ u32 readcmd, writecmd;
+@@ -1583,7 +1597,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
+ struct mmc_blk_data *md = mq->blkdata;
+ bool do_rel_wr, do_data_tag;
+
+- mmc_blk_data_prep(mq, mqrq, disable_multi, &do_rel_wr, &do_data_tag);
++ mmc_blk_data_prep(mq, mqrq, recovery_mode, &do_rel_wr, &do_data_tag);
+
+ brq->mrq.cmd = &brq->cmd;
+
+@@ -1674,7 +1688,7 @@ static int mmc_blk_fix_state(struct mmc_card *card, struct request *req)
+
+ #define MMC_READ_SINGLE_RETRIES 2
+
+-/* Single sector read during recovery */
++/* Single (native) sector read during recovery */
+ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req)
+ {
+ struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
+@@ -1682,31 +1696,32 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req)
+ struct mmc_card *card = mq->card;
+ struct mmc_host *host = card->host;
+ blk_status_t error = BLK_STS_OK;
+- int retries = 0;
++ size_t bytes_per_read = queue_physical_block_size(mq->queue);
+
+ do {
+ u32 status;
+ int err;
++ int retries = 0;
+
+- mmc_blk_rw_rq_prep(mqrq, card, 1, mq);
++ while (retries++ <= MMC_READ_SINGLE_RETRIES) {
++ mmc_blk_rw_rq_prep(mqrq, card, 1, mq);
+
+- mmc_wait_for_req(host, mrq);
++ mmc_wait_for_req(host, mrq);
+
+- err = mmc_send_status(card, &status);
+- if (err)
+- goto error_exit;
+-
+- if (!mmc_host_is_spi(host) &&
+- !mmc_ready_for_data(status)) {
+- err = mmc_blk_fix_state(card, req);
++ err = mmc_send_status(card, &status);
+ if (err)
+ goto error_exit;
+- }
+
+- if (mrq->cmd->error && retries++ < MMC_READ_SINGLE_RETRIES)
+- continue;
++ if (!mmc_host_is_spi(host) &&
++ !mmc_ready_for_data(status)) {
++ err = mmc_blk_fix_state(card, req);
++ if (err)
++ goto error_exit;
++ }
+
+- retries = 0;
++ if (!mrq->cmd->error)
++ break;
++ }
+
+ if (mrq->cmd->error ||
+ mrq->data->error ||
+@@ -1716,13 +1731,13 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req)
+ else
+ error = BLK_STS_OK;
+
+- } while (blk_update_request(req, error, 512));
++ } while (blk_update_request(req, error, bytes_per_read));
+
+ return;
+
+ error_exit:
+ mrq->data->bytes_xfered = 0;
+- blk_update_request(req, BLK_STS_IOERR, 512);
++ blk_update_request(req, BLK_STS_IOERR, bytes_per_read);
+ /* Let it try the remaining request again */
+ if (mqrq->retries > MMC_MAX_RETRIES - 1)
+ mqrq->retries = MMC_MAX_RETRIES - 1;
+@@ -1850,8 +1865,9 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req)
+ return;
+
+ /* Reset before last retry */
+- if (mqrq->retries + 1 == MMC_MAX_RETRIES)
+- mmc_blk_reset(md, card->host, type);
++ if (mqrq->retries + 1 == MMC_MAX_RETRIES &&
++ mmc_blk_reset(md, card->host, type))
++ return;
+
+ /* Command errors fail fast, so use all MMC_MAX_RETRIES */
+ if (brq->sbc.error || brq->cmd.error)
+@@ -1863,10 +1879,9 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req)
+ return;
+ }
+
+- /* FIXME: Missing single sector read for large sector size */
+- if (!mmc_large_sector(card) && rq_data_dir(req) == READ &&
+- brq->data.blocks > 1) {
+- /* Read one sector at a time */
++ if (rq_data_dir(req) == READ && brq->data.blocks >
++ queue_physical_block_size(mq->queue) >> 9) {
++ /* Read one (native) sector at a time */
+ mmc_blk_read_single(mq, req);
+ return;
+ }
+@@ -1880,6 +1895,31 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq)
+ brq->data.error || brq->cmd.resp[0] & CMD_ERRORS;
+ }
+
++static int mmc_spi_err_check(struct mmc_card *card)
++{
++ u32 status = 0;
++ int err;
++
++ /*
++ * SPI does not have a TRAN state we have to wait on, instead the
++ * card is ready again when it no longer holds the line LOW.
++ * We still have to ensure two things here before we know the write
++ * was successful:
++ * 1. The card has not disconnected during busy and we actually read our
++ * own pull-up, thinking it was still connected, so ensure it
++ * still responds.
++ * 2. Check for any error bits, in particular R1_SPI_IDLE to catch a
++ * just reconnected card after being disconnected during busy.
++ */
++ err = __mmc_send_status(card, &status, 0);
++ if (err)
++ return err;
++ /* All R1 and R2 bits of SPI are errors in our case */
++ if (status)
++ return -EIO;
++ return 0;
++}
++
+ static int mmc_blk_busy_cb(void *cb_data, bool *busy)
+ {
+ struct mmc_blk_busy_data *data = cb_data;
+@@ -1903,9 +1943,16 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req)
+ struct mmc_blk_busy_data cb_data;
+ int err;
+
+- if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ)
++ if (rq_data_dir(req) == READ)
+ return 0;
+
++ if (mmc_host_is_spi(card->host)) {
++ err = mmc_spi_err_check(card);
++ if (err)
++ mqrq->brq.data.bytes_xfered = 0;
++ return err;
++ }
++
+ cb_data.card = card;
+ cb_data.status = 0;
+ err = __mmc_poll_for_busy(card, MMC_BLK_TIMEOUT_MS, &mmc_blk_busy_cb,
+@@ -2034,14 +2081,14 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
+ mmc_blk_urgent_bkops(mq, mqrq);
+ }
+
+-static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
++static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, enum mmc_issue_type issue_type)
+ {
+ unsigned long flags;
+ bool put_card;
+
+ spin_lock_irqsave(&mq->lock, flags);
+
+- mq->in_flight[mmc_issue_type(mq, req)] -= 1;
++ mq->in_flight[issue_type] -= 1;
+
+ put_card = (mmc_tot_in_flight(mq) == 0);
+
+@@ -2053,6 +2100,7 @@ static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
+
+ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
+ {
++ enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
+ struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
+ struct mmc_request *mrq = &mqrq->brq.mrq;
+ struct mmc_host *host = mq->card->host;
+@@ -2068,7 +2116,7 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
+ else if (likely(!blk_should_fake_timeout(req->q)))
+ blk_mq_complete_request(req);
+
+- mmc_blk_mq_dec_in_flight(mq, req);
++ mmc_blk_mq_dec_in_flight(mq, issue_type);
+ }
+
+ void mmc_blk_mq_recovery(struct mmc_queue *mq)
+@@ -2344,6 +2392,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
+ struct mmc_blk_data *md;
+ int devidx, ret;
+ char cap_str[10];
++ bool cache_enabled = false;
++ bool fua_enabled = false;
+
+ devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL);
+ if (devidx < 0) {
+@@ -2397,8 +2447,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
+ set_disk_ro(md->disk, md->read_only || default_ro);
+ md->disk->flags = GENHD_FL_EXT_DEVT;
+ if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
+- md->disk->flags |= GENHD_FL_NO_PART_SCAN
+- | GENHD_FL_SUPPRESS_PARTITION_INFO;
++ md->disk->flags |= GENHD_FL_NO_PART |
++ GENHD_FL_SUPPRESS_PARTITION_INFO;
+
+ /*
+ * As discussed on lkml, GENHD_FL_REMOVABLE should:
+@@ -2425,13 +2475,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
+ md->flags |= MMC_BLK_CMD23;
+ }
+
+- if (mmc_card_mmc(card) &&
+- md->flags & MMC_BLK_CMD23 &&
++ if (md->flags & MMC_BLK_CMD23 &&
+ ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
+ card->ext_csd.rel_sectors)) {
+ md->flags |= MMC_BLK_REL_WR;
+- blk_queue_write_cache(md->queue.queue, true, true);
++ fua_enabled = true;
++ cache_enabled = true;
+ }
++ if (mmc_cache_enabled(card->host))
++ cache_enabled = true;
++
++ blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled);
+
+ string_get_size((u64)size, 512, STRING_UNITS_2,
+ cap_str, sizeof(cap_str));
+@@ -2734,6 +2788,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
++ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
+ blk_execute_rq(NULL, req, 0);
+ ret = req_to_mmc_queue_req(req)->drv_op_result;
+ if (ret >= 0) {
+@@ -2772,6 +2827,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
+ goto out_free;
+ }
+ req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD;
++ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
+ req_to_mmc_queue_req(req)->drv_op_data = &ext_csd;
+ blk_execute_rq(NULL, req, 0);
+ err = req_to_mmc_queue_req(req)->drv_op_result;
+diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
+index 7bd392d55cfa5..5c6986131faff 100644
+--- a/drivers/mmc/core/card.h
++++ b/drivers/mmc/core/card.h
+@@ -70,6 +70,7 @@ struct mmc_fixup {
+ #define EXT_CSD_REV_ANY (-1u)
+
+ #define CID_MANFID_SANDISK 0x2
++#define CID_MANFID_SANDISK_SD 0x3
+ #define CID_MANFID_ATP 0x9
+ #define CID_MANFID_TOSHIBA 0x11
+ #define CID_MANFID_MICRON 0x13
+@@ -222,4 +223,9 @@ static inline int mmc_card_broken_hpi(const struct mmc_card *c)
+ return c->quirks & MMC_QUIRK_BROKEN_HPI;
+ }
+
++static inline int mmc_card_broken_sd_discard(const struct mmc_card *c)
++{
++ return c->quirks & MMC_QUIRK_BROKEN_SD_DISCARD;
++}
++
+ #endif
+diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
+index 240c5af793dce..07eda6cc6767b 100644
+--- a/drivers/mmc/core/core.c
++++ b/drivers/mmc/core/core.c
+@@ -1132,7 +1132,13 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr)
+ mmc_power_cycle(host, ocr);
+ } else {
+ bit = fls(ocr) - 1;
+- ocr &= 3 << bit;
++ /*
++ * The bit variable represents the highest voltage bit set in
++ * the OCR register.
++ * To keep a range of 2 values (e.g. 3.2V/3.3V and 3.3V/3.4V),
++ * we must shift the mask '3' with (bit - 1).
++ */
++ ocr &= 3 << (bit - 1);
+ if (bit != host->ios.vdd)
+ dev_warn(mmc_dev(host), "exceeding card's volts\n");
+ }
+@@ -1476,6 +1482,11 @@ void mmc_init_erase(struct mmc_card *card)
+ card->pref_erase = 0;
+ }
+
++static bool is_trim_arg(unsigned int arg)
++{
++ return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG;
++}
++
+ static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
+ unsigned int arg, unsigned int qty)
+ {
+@@ -1758,7 +1769,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
+ !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN))
+ return -EOPNOTSUPP;
+
+- if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) &&
++ if (mmc_card_mmc(card) && is_trim_arg(arg) &&
+ !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN))
+ return -EOPNOTSUPP;
+
+@@ -1788,7 +1799,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
+ * identified by the card->eg_boundary flag.
+ */
+ rem = card->erase_size - (from % card->erase_size);
+- if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) {
++ if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) {
+ err = mmc_do_erase(card, from, from + rem - 1, arg);
+ from += rem;
+ if ((err) || (to <= from))
+@@ -2264,7 +2275,7 @@ void mmc_start_host(struct mmc_host *host)
+ _mmc_detect_change(host, 0, false);
+ }
+
+-void mmc_stop_host(struct mmc_host *host)
++void __mmc_stop_host(struct mmc_host *host)
+ {
+ if (host->slot.cd_irq >= 0) {
+ mmc_gpio_set_cd_wake(host, false);
+@@ -2273,6 +2284,11 @@ void mmc_stop_host(struct mmc_host *host)
+
+ host->rescan_disable = 1;
+ cancel_delayed_work_sync(&host->detect);
++}
++
++void mmc_stop_host(struct mmc_host *host)
++{
++ __mmc_stop_host(host);
+
+ /* clear pm flags now and let card drivers set them as needed */
+ host->pm_flags = 0;
+diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
+index 7931a4f0137d2..f5f3f623ea492 100644
+--- a/drivers/mmc/core/core.h
++++ b/drivers/mmc/core/core.h
+@@ -70,6 +70,7 @@ static inline void mmc_delay(unsigned int ms)
+
+ void mmc_rescan(struct work_struct *work);
+ void mmc_start_host(struct mmc_host *host);
++void __mmc_stop_host(struct mmc_host *host);
+ void mmc_stop_host(struct mmc_host *host);
+
+ void _mmc_detect_change(struct mmc_host *host, unsigned long delay,
+diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
+index d4683b1d263fd..d739e2b631fe8 100644
+--- a/drivers/mmc/core/host.c
++++ b/drivers/mmc/core/host.c
+@@ -80,9 +80,18 @@ static void mmc_host_classdev_release(struct device *dev)
+ kfree(host);
+ }
+
++static int mmc_host_classdev_shutdown(struct device *dev)
++{
++ struct mmc_host *host = cls_dev_to_mmc_host(dev);
++
++ __mmc_stop_host(host);
++ return 0;
++}
++
+ static struct class mmc_host_class = {
+ .name = "mmc_host",
+ .dev_release = mmc_host_classdev_release,
++ .shutdown_pre = mmc_host_classdev_shutdown,
+ .pm = MMC_HOST_CLASS_DEV_PM_OPS,
+ };
+
+@@ -579,6 +588,16 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
+
+ EXPORT_SYMBOL(mmc_alloc_host);
+
++static int mmc_validate_host_caps(struct mmc_host *host)
++{
++ if (host->caps & MMC_CAP_SDIO_IRQ && !host->ops->enable_sdio_irq) {
++ dev_warn(host->parent, "missing ->enable_sdio_irq() ops\n");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
+ /**
+ * mmc_add_host - initialise host hardware
+ * @host: mmc host
+@@ -591,8 +610,9 @@ int mmc_add_host(struct mmc_host *host)
+ {
+ int err;
+
+- WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) &&
+- !host->ops->enable_sdio_irq);
++ err = mmc_validate_host_caps(host);
++ if (err)
++ return err;
+
+ err = device_add(&host->class_dev);
+ if (err)
+diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
+index 29e58ffae3797..d805f84507198 100644
+--- a/drivers/mmc/core/mmc.c
++++ b/drivers/mmc/core/mmc.c
+@@ -1381,13 +1381,17 @@ static int mmc_select_hs400es(struct mmc_card *card)
+ goto out_err;
+ }
+
++ /*
++ * Bump to HS timing and frequency. Some cards don't handle
++ * SEND_STATUS reliably at the initial frequency.
++ */
+ mmc_set_timing(host, MMC_TIMING_MMC_HS);
++ mmc_set_bus_speed(card);
++
+ err = mmc_switch_status(card, true);
+ if (err)
+ goto out_err;
+
+- mmc_set_clock(host, card->ext_csd.hs_max_dtr);
+-
+ /* Switch card to DDR with strobe bit */
+ val = EXT_CSD_DDR_BUS_WIDTH_8 | EXT_CSD_BUS_WIDTH_STROBE;
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+@@ -1445,7 +1449,7 @@ out_err:
+ static int mmc_select_hs200(struct mmc_card *card)
+ {
+ struct mmc_host *host = card->host;
+- unsigned int old_timing, old_signal_voltage;
++ unsigned int old_timing, old_signal_voltage, old_clock;
+ int err = -EINVAL;
+ u8 val;
+
+@@ -1476,8 +1480,17 @@ static int mmc_select_hs200(struct mmc_card *card)
+ false, true, MMC_CMD_RETRIES);
+ if (err)
+ goto err;
++
++ /*
++ * Bump to HS timing and frequency. Some cards don't handle
++ * SEND_STATUS reliably at the initial frequency.
++ * NB: We can't move to full (HS200) speeds until after we've
++ * successfully switched over.
++ */
+ old_timing = host->ios.timing;
++ old_clock = host->ios.clock;
+ mmc_set_timing(host, MMC_TIMING_MMC_HS200);
++ mmc_set_clock(card->host, card->ext_csd.hs_max_dtr);
+
+ /*
+ * For HS200, CRC errors are not a reliable way to know the
+@@ -1490,8 +1503,10 @@ static int mmc_select_hs200(struct mmc_card *card)
+ * mmc_select_timing() assumes timing has not changed if
+ * it is a switch error.
+ */
+- if (err == -EBADMSG)
++ if (err == -EBADMSG) {
++ mmc_set_clock(host, old_clock);
+ mmc_set_timing(host, old_timing);
++ }
+ }
+ err:
+ if (err) {
+diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c
+index 63524551a13a1..4052f828f75e7 100644
+--- a/drivers/mmc/core/mmc_test.c
++++ b/drivers/mmc/core/mmc_test.c
+@@ -3181,7 +3181,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card,
+ struct mmc_test_dbgfs_file *df;
+
+ if (card->debugfs_root)
+- debugfs_create_file(name, mode, card->debugfs_root, card, fops);
++ file = debugfs_create_file(name, mode, card->debugfs_root,
++ card, fops);
+
+ df = kmalloc(sizeof(*df), GFP_KERNEL);
+ if (!df) {
+diff --git a/drivers/mmc/core/pwrseq_sd8787.c b/drivers/mmc/core/pwrseq_sd8787.c
+index 2e120ad83020f..0c5f5e371e1f8 100644
+--- a/drivers/mmc/core/pwrseq_sd8787.c
++++ b/drivers/mmc/core/pwrseq_sd8787.c
+@@ -28,7 +28,6 @@ struct mmc_pwrseq_sd8787 {
+ struct mmc_pwrseq pwrseq;
+ struct gpio_desc *reset_gpio;
+ struct gpio_desc *pwrdn_gpio;
+- u32 reset_pwrdwn_delay_ms;
+ };
+
+ #define to_pwrseq_sd8787(p) container_of(p, struct mmc_pwrseq_sd8787, pwrseq)
+@@ -39,7 +38,7 @@ static void mmc_pwrseq_sd8787_pre_power_on(struct mmc_host *host)
+
+ gpiod_set_value_cansleep(pwrseq->reset_gpio, 1);
+
+- msleep(pwrseq->reset_pwrdwn_delay_ms);
++ msleep(300);
+ gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 1);
+ }
+
+@@ -51,17 +50,37 @@ static void mmc_pwrseq_sd8787_power_off(struct mmc_host *host)
+ gpiod_set_value_cansleep(pwrseq->reset_gpio, 0);
+ }
+
++static void mmc_pwrseq_wilc1000_pre_power_on(struct mmc_host *host)
++{
++ struct mmc_pwrseq_sd8787 *pwrseq = to_pwrseq_sd8787(host->pwrseq);
++
++ /* The pwrdn_gpio is really CHIP_EN, reset_gpio is RESETN */
++ gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 1);
++ msleep(5);
++ gpiod_set_value_cansleep(pwrseq->reset_gpio, 1);
++}
++
++static void mmc_pwrseq_wilc1000_power_off(struct mmc_host *host)
++{
++ struct mmc_pwrseq_sd8787 *pwrseq = to_pwrseq_sd8787(host->pwrseq);
++
++ gpiod_set_value_cansleep(pwrseq->reset_gpio, 0);
++ gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 0);
++}
++
+ static const struct mmc_pwrseq_ops mmc_pwrseq_sd8787_ops = {
+ .pre_power_on = mmc_pwrseq_sd8787_pre_power_on,
+ .power_off = mmc_pwrseq_sd8787_power_off,
+ };
+
+-static const u32 sd8787_delay_ms = 300;
+-static const u32 wilc1000_delay_ms = 5;
++static const struct mmc_pwrseq_ops mmc_pwrseq_wilc1000_ops = {
++ .pre_power_on = mmc_pwrseq_wilc1000_pre_power_on,
++ .power_off = mmc_pwrseq_wilc1000_power_off,
++};
+
+ static const struct of_device_id mmc_pwrseq_sd8787_of_match[] = {
+- { .compatible = "mmc-pwrseq-sd8787", .data = &sd8787_delay_ms },
+- { .compatible = "mmc-pwrseq-wilc1000", .data = &wilc1000_delay_ms },
++ { .compatible = "mmc-pwrseq-sd8787", .data = &mmc_pwrseq_sd8787_ops },
++ { .compatible = "mmc-pwrseq-wilc1000", .data = &mmc_pwrseq_wilc1000_ops },
+ {/* sentinel */},
+ };
+ MODULE_DEVICE_TABLE(of, mmc_pwrseq_sd8787_of_match);
+@@ -77,7 +96,6 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ match = of_match_node(mmc_pwrseq_sd8787_of_match, pdev->dev.of_node);
+- pwrseq->reset_pwrdwn_delay_ms = *(u32 *)match->data;
+
+ pwrseq->pwrdn_gpio = devm_gpiod_get(dev, "powerdown", GPIOD_OUT_LOW);
+ if (IS_ERR(pwrseq->pwrdn_gpio))
+@@ -88,7 +106,7 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev)
+ return PTR_ERR(pwrseq->reset_gpio);
+
+ pwrseq->pwrseq.dev = dev;
+- pwrseq->pwrseq.ops = &mmc_pwrseq_sd8787_ops;
++ pwrseq->pwrseq.ops = match->data;
+ pwrseq->pwrseq.owner = THIS_MODULE;
+ platform_set_drvdata(pdev, pwrseq);
+
+diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
+index d68e6e513a4f4..afe8d8c5fa8a2 100644
+--- a/drivers/mmc/core/quirks.h
++++ b/drivers/mmc/core/quirks.h
+@@ -99,6 +99,26 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
+ MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_TRIM_BROKEN),
+
++ /*
++ * Kingston EMMC04G-M627 advertises TRIM but it does not seems to
++ * support being used to offload WRITE_ZEROES.
++ */
++ MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc,
++ MMC_QUIRK_TRIM_BROKEN),
++
++ /*
++ * Micron MTFC4GACAJCN-1M advertises TRIM but it does not seems to
++ * support being used to offload WRITE_ZEROES.
++ */
++ MMC_FIXUP("Q2J54A", CID_MANFID_MICRON, 0x014e, add_quirk_mmc,
++ MMC_QUIRK_TRIM_BROKEN),
++
++ /*
++ * Some SD cards reports discard support while they don't
++ */
++ MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd,
++ MMC_QUIRK_BROKEN_SD_DISCARD),
++
+ END_FIXUP
+ };
+
+diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
+index 4646b7a03db6b..592166e53dce8 100644
+--- a/drivers/mmc/core/sd.c
++++ b/drivers/mmc/core/sd.c
+@@ -66,7 +66,7 @@ static const unsigned int sd_au_size[] = {
+ __res & __mask; \
+ })
+
+-#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 2000
++#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 1000
+ #define SD_WRITE_EXTR_SINGLE_TIMEOUT_MS 1000
+
+ struct sd_busy_data {
+@@ -863,7 +863,8 @@ try_again:
+ * the CCS bit is set as well. We deliberately deviate from the spec in
+ * regards to this, which allows UHS-I to be supported for SDSC cards.
+ */
+- if (!mmc_host_is_spi(host) && rocr && (*rocr & 0x01000000)) {
++ if (!mmc_host_is_spi(host) && (ocr & SD_OCR_S18R) &&
++ rocr && (*rocr & SD_ROCR_S18A)) {
+ err = mmc_set_uhs_voltage(host, pocr);
+ if (err == -EAGAIN) {
+ retries--;
+@@ -942,15 +943,16 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
+
+ /* Erase init depends on CSD and SSR */
+ mmc_init_erase(card);
+-
+- /*
+- * Fetch switch information from card.
+- */
+- err = mmc_read_switch(card);
+- if (err)
+- return err;
+ }
+
++ /*
++ * Fetch switch information from card. Note, sd3_bus_mode can change if
++ * voltage switch outcome changes, so do this always.
++ */
++ err = mmc_read_switch(card);
++ if (err)
++ return err;
++
+ /*
+ * For SPI, enable CRC as appropriate.
+ * This CRC enable is located AFTER the reading of the
+@@ -1250,7 +1252,7 @@ static int sd_read_ext_regs(struct mmc_card *card)
+ */
+ err = sd_read_ext_reg(card, 0, 0, 0, 512, gen_info_buf);
+ if (err) {
+- pr_warn("%s: error %d reading general info of SD ext reg\n",
++ pr_err("%s: error %d reading general info of SD ext reg\n",
+ mmc_hostname(card->host), err);
+ goto out;
+ }
+@@ -1264,7 +1266,12 @@ static int sd_read_ext_regs(struct mmc_card *card)
+ /* Number of extensions to be find. */
+ num_ext = gen_info_buf[4];
+
+- /* We support revision 0, but limit it to 512 bytes for simplicity. */
++ /*
++ * We only support revision 0 and limit it to 512 bytes for simplicity.
++ * No matter what, let's return zero to allow us to continue using the
++ * card, even if we can't support the features from the SD function
++ * extensions registers.
++ */
+ if (rev != 0 || len > 512) {
+ pr_warn("%s: non-supported SD ext reg layout\n",
+ mmc_hostname(card->host));
+@@ -1279,7 +1286,7 @@ static int sd_read_ext_regs(struct mmc_card *card)
+ for (i = 0; i < num_ext; i++) {
+ err = sd_parse_ext_reg(card, gen_info_buf, &next_ext_addr);
+ if (err) {
+- pr_warn("%s: error %d parsing SD ext reg\n",
++ pr_err("%s: error %d parsing SD ext reg\n",
+ mmc_hostname(card->host), err);
+ goto out;
+ }
+@@ -1473,26 +1480,15 @@ retry:
+ if (!v18_fixup_failed && !mmc_host_is_spi(host) && mmc_host_uhs(host) &&
+ mmc_sd_card_using_v18(card) &&
+ host->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_180) {
+- /*
+- * Re-read switch information in case it has changed since
+- * oldcard was initialized.
+- */
+- if (oldcard) {
+- err = mmc_read_switch(card);
+- if (err)
+- goto free_card;
+- }
+- if (mmc_sd_card_using_v18(card)) {
+- if (mmc_host_set_uhs_voltage(host) ||
+- mmc_sd_init_uhs_card(card)) {
+- v18_fixup_failed = true;
+- mmc_power_cycle(host, ocr);
+- if (!oldcard)
+- mmc_remove_card(card);
+- goto retry;
+- }
+- goto done;
++ if (mmc_host_set_uhs_voltage(host) ||
++ mmc_sd_init_uhs_card(card)) {
++ v18_fixup_failed = true;
++ mmc_power_cycle(host, ocr);
++ if (!oldcard)
++ mmc_remove_card(card);
++ goto retry;
+ }
++ goto cont;
+ }
+
+ /* Initialization sequence for UHS-I cards */
+@@ -1527,7 +1523,7 @@ retry:
+ mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+ }
+ }
+-
++cont:
+ if (!oldcard) {
+ /* Read/parse the extension registers. */
+ err = sd_read_ext_regs(card);
+@@ -1559,7 +1555,7 @@ retry:
+ err = -EINVAL;
+ goto free_card;
+ }
+-done:
++
+ host->card = card;
+ return 0;
+
+@@ -1663,6 +1659,12 @@ static int sd_poweroff_notify(struct mmc_card *card)
+ goto out;
+ }
+
++ /* Find out when the command is completed. */
++ err = mmc_poll_for_busy(card, SD_WRITE_EXTR_SINGLE_TIMEOUT_MS, false,
++ MMC_BUSY_EXTR_SINGLE);
++ if (err)
++ goto out;
++
+ cb_data.card = card;
+ cb_data.reg_buf = reg_buf;
+ err = __mmc_poll_for_busy(card, SD_POWEROFF_NOTIFY_TIMEOUT_MS,
+diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
+index 68edf7a615be5..5447c47157aa5 100644
+--- a/drivers/mmc/core/sdio.c
++++ b/drivers/mmc/core/sdio.c
+@@ -708,6 +708,8 @@ try_again:
+ if (host->ops->init_card)
+ host->ops->init_card(host, card);
+
++ card->ocr = ocr_card;
++
+ /*
+ * If the host and card support UHS-I mode request the card
+ * to switch to 1.8V signaling level. No 1.8v signalling if
+@@ -820,7 +822,7 @@ try_again:
+ goto mismatch;
+ }
+ }
+- card->ocr = ocr_card;
++
+ mmc_fixup_device(card, sdio_fixup_methods);
+
+ if (card->type == MMC_TYPE_SD_COMBO) {
+diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
+index fda03b35c14a5..f6cdec00e97e7 100644
+--- a/drivers/mmc/core/sdio_bus.c
++++ b/drivers/mmc/core/sdio_bus.c
+@@ -290,7 +290,14 @@ static void sdio_release_func(struct device *dev)
+ {
+ struct sdio_func *func = dev_to_sdio_func(dev);
+
+- sdio_free_func_cis(func);
++ if (!(func->card->quirks & MMC_QUIRK_NONSTD_SDIO))
++ sdio_free_func_cis(func);
++
++ /*
++ * We have now removed the link to the tuples in the
++ * card structure, so remove the reference.
++ */
++ put_device(&func->card->dev);
+
+ kfree(func->info);
+ kfree(func->tmpbuf);
+@@ -322,6 +329,12 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card)
+
+ device_initialize(&func->dev);
+
++ /*
++ * We may link to tuples in the card structure,
++ * we need make sure we have a reference to it.
++ */
++ get_device(&func->card->dev);
++
+ func->dev.parent = &card->dev;
+ func->dev.bus = &sdio_bus_type;
+ func->dev.release = sdio_release_func;
+@@ -375,10 +388,9 @@ int sdio_add_func(struct sdio_func *func)
+ */
+ void sdio_remove_func(struct sdio_func *func)
+ {
+- if (!sdio_func_present(func))
+- return;
++ if (sdio_func_present(func))
++ device_del(&func->dev);
+
+- device_del(&func->dev);
+ of_node_put(func->dev.of_node);
+ put_device(&func->dev);
+ }
+diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
+index a705ba6eff5bf..afaa6cab1adc3 100644
+--- a/drivers/mmc/core/sdio_cis.c
++++ b/drivers/mmc/core/sdio_cis.c
+@@ -403,12 +403,6 @@ int sdio_read_func_cis(struct sdio_func *func)
+ if (ret)
+ return ret;
+
+- /*
+- * Since we've linked to tuples in the card structure,
+- * we must make sure we have a reference to it.
+- */
+- get_device(&func->card->dev);
+-
+ /*
+ * Vendor/device id is optional for function CIS, so
+ * copy it from the card structure as needed.
+@@ -434,11 +428,5 @@ void sdio_free_func_cis(struct sdio_func *func)
+ }
+
+ func->tuples = NULL;
+-
+- /*
+- * We have now removed the link to the tuples in the
+- * card structure, so remove the reference.
+- */
+- put_device(&func->card->dev);
+ }
+
+diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
+index ccc148cdb5eee..a281df78d1685 100644
+--- a/drivers/mmc/host/Kconfig
++++ b/drivers/mmc/host/Kconfig
+@@ -523,11 +523,12 @@ config MMC_ALCOR
+ of Alcor Micro PCI-E card reader
+
+ config MMC_AU1X
+- tristate "Alchemy AU1XX0 MMC Card Interface support"
++ bool "Alchemy AU1XX0 MMC Card Interface support"
+ depends on MIPS_ALCHEMY
++ depends on MMC=y
+ help
+ This selects the AMD Alchemy(R) Multimedia card interface.
+- If you have a Alchemy platform with a MMC slot, say Y or M here.
++ If you have a Alchemy platform with a MMC slot, say Y here.
+
+ If unsure, say N.
+
+@@ -1069,9 +1070,10 @@ config MMC_SDHCI_OMAP
+
+ config MMC_SDHCI_AM654
+ tristate "Support for the SDHCI Controller in TI's AM654 SOCs"
+- depends on MMC_SDHCI_PLTFM && OF && REGMAP_MMIO
++ depends on MMC_SDHCI_PLTFM && OF
+ select MMC_SDHCI_IO_ACCESSORS
+ select MMC_CQHCI
++ select REGMAP_MMIO
+ help
+ This selects the Secure Digital Host Controller Interface (SDHCI)
+ support present in TI's AM654 SOCs. The controller supports
+diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c
+index bfb8efeb7eb80..d01df01d4b4d1 100644
+--- a/drivers/mmc/host/alcor.c
++++ b/drivers/mmc/host/alcor.c
+@@ -1114,7 +1114,10 @@ static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev)
+ alcor_hw_init(host);
+
+ dev_set_drvdata(&pdev->dev, host);
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto free_host;
++
+ return 0;
+
+ free_host:
+diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
+index 807177c953f3d..493ed8c824195 100644
+--- a/drivers/mmc/host/atmel-mci.c
++++ b/drivers/mmc/host/atmel-mci.c
+@@ -1818,7 +1818,6 @@ static void atmci_tasklet_func(struct tasklet_struct *t)
+ atmci_writel(host, ATMCI_IER, ATMCI_NOTBUSY);
+ state = STATE_WAITING_NOTBUSY;
+ } else if (host->mrq->stop) {
+- atmci_writel(host, ATMCI_IER, ATMCI_CMDRDY);
+ atmci_send_stop_cmd(host, data);
+ state = STATE_SENDING_STOP;
+ } else {
+@@ -1851,8 +1850,6 @@ static void atmci_tasklet_func(struct tasklet_struct *t)
+ * command to send.
+ */
+ if (host->mrq->stop) {
+- atmci_writel(host, ATMCI_IER,
+- ATMCI_CMDRDY);
+ atmci_send_stop_cmd(host, data);
+ state = STATE_SENDING_STOP;
+ } else {
+@@ -2223,6 +2220,7 @@ static int atmci_init_slot(struct atmel_mci *host,
+ {
+ struct mmc_host *mmc;
+ struct atmel_mci_slot *slot;
++ int ret;
+
+ mmc = mmc_alloc_host(sizeof(struct atmel_mci_slot), &host->pdev->dev);
+ if (!mmc)
+@@ -2306,11 +2304,13 @@ static int atmci_init_slot(struct atmel_mci *host,
+
+ host->slot[id] = slot;
+ mmc_regulator_get_supply(mmc);
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret) {
++ mmc_free_host(mmc);
++ return ret;
++ }
+
+ if (gpio_is_valid(slot->detect_pin)) {
+- int ret;
+-
+ timer_setup(&slot->detect_timer, atmci_detect_change, 0);
+
+ ret = request_irq(gpio_to_irq(slot->detect_pin),
+diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
+index 0acc237843f7c..f5f9cb7a2da5e 100644
+--- a/drivers/mmc/host/au1xmmc.c
++++ b/drivers/mmc/host/au1xmmc.c
+@@ -1095,8 +1095,9 @@ out5:
+ if (host->platdata && host->platdata->cd_setup &&
+ !(mmc->caps & MMC_CAP_NEEDS_POLL))
+ host->platdata->cd_setup(mmc, 0);
+-out_clk:
++
+ clk_disable_unprepare(host->clk);
++out_clk:
+ clk_put(host->clk);
+ out_irq:
+ free_irq(host->irq, host);
+diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
+index 8c2361e662774..985079943be76 100644
+--- a/drivers/mmc/host/bcm2835.c
++++ b/drivers/mmc/host/bcm2835.c
+@@ -1413,8 +1413,8 @@ static int bcm2835_probe(struct platform_device *pdev)
+ host->max_clk = clk_get_rate(clk);
+
+ host->irq = platform_get_irq(pdev, 0);
+- if (host->irq <= 0) {
+- ret = -EINVAL;
++ if (host->irq < 0) {
++ ret = host->irq;
+ goto err;
+ }
+
+diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
+index 2c4b2df52adb1..12dca91a8ef61 100644
+--- a/drivers/mmc/host/cavium-octeon.c
++++ b/drivers/mmc/host/cavium-octeon.c
+@@ -277,6 +277,7 @@ static int octeon_mmc_probe(struct platform_device *pdev)
+ if (ret) {
+ dev_err(&pdev->dev, "Error populating slots\n");
+ octeon_mmc_set_shared_power(host, 0);
++ of_node_put(cn);
+ goto error;
+ }
+ i++;
+diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
+index 76013bbbcff30..202b1d6da678c 100644
+--- a/drivers/mmc/host/cavium-thunderx.c
++++ b/drivers/mmc/host/cavium-thunderx.c
+@@ -142,8 +142,10 @@ static int thunder_mmc_probe(struct pci_dev *pdev,
+ continue;
+
+ ret = cvm_mmc_of_slot_probe(&host->slot_pdev[i]->dev, host);
+- if (ret)
++ if (ret) {
++ of_node_put(child_node);
+ goto error;
++ }
+ }
+ i++;
+ }
+diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
+index 2a757c88f9d21..80de660027d89 100644
+--- a/drivers/mmc/host/davinci_mmc.c
++++ b/drivers/mmc/host/davinci_mmc.c
+@@ -1375,8 +1375,12 @@ static int davinci_mmcsd_suspend(struct device *dev)
+ static int davinci_mmcsd_resume(struct device *dev)
+ {
+ struct mmc_davinci_host *host = dev_get_drvdata(dev);
++ int ret;
++
++ ret = clk_enable(host->clk);
++ if (ret)
++ return ret;
+
+- clk_enable(host->clk);
+ mmc_davinci_reset_ctrl(host, 0);
+
+ return 0;
+diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
+index 380f9aa56eb26..1e8f1bb3cad7c 100644
+--- a/drivers/mmc/host/dw_mmc.c
++++ b/drivers/mmc/host/dw_mmc.c
+@@ -2086,7 +2086,8 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
+ * delayed. Allowing the transfer to take place
+ * avoids races and keeps things simple.
+ */
+- if (err != -ETIMEDOUT) {
++ if (err != -ETIMEDOUT &&
++ host->dir_status == DW_MCI_RECV_STATUS) {
+ state = STATE_SENDING_DATA;
+ continue;
+ }
+diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
+index 80a2c270d502e..8586447d4b4f2 100644
+--- a/drivers/mmc/host/jz4740_mmc.c
++++ b/drivers/mmc/host/jz4740_mmc.c
+@@ -235,6 +235,26 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host)
+ return PTR_ERR(host->dma_rx);
+ }
+
++ /*
++ * Limit the maximum segment size in any SG entry according to
++ * the parameters of the DMA engine device.
++ */
++ if (host->dma_tx) {
++ struct device *dev = host->dma_tx->device->dev;
++ unsigned int max_seg_size = dma_get_max_seg_size(dev);
++
++ if (max_seg_size < host->mmc->max_seg_size)
++ host->mmc->max_seg_size = max_seg_size;
++ }
++
++ if (host->dma_rx) {
++ struct device *dev = host->dma_rx->device->dev;
++ unsigned int max_seg_size = dma_get_max_seg_size(dev);
++
++ if (max_seg_size < host->mmc->max_seg_size)
++ host->mmc->max_seg_size = max_seg_size;
++ }
++
+ return 0;
+ }
+
+@@ -1018,6 +1038,16 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
+ mmc->ops = &jz4740_mmc_ops;
+ if (!mmc->f_max)
+ mmc->f_max = JZ_MMC_CLK_RATE;
++
++ /*
++ * There seems to be a problem with this driver on the JZ4760 and
++ * JZ4760B SoCs. There, when using the maximum rate supported (50 MHz),
++ * the communication fails with many SD cards.
++ * Until this bug is sorted out, limit the maximum rate to 24 MHz.
++ */
++ if (host->version == JZ_MMC_JZ4760 && mmc->f_max > JZ_MMC_CLK_RATE)
++ mmc->f_max = JZ_MMC_CLK_RATE;
++
+ mmc->f_min = mmc->f_max / 128;
+ mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+
+diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
+index 8f36536cb1b6d..287705729064c 100644
+--- a/drivers/mmc/host/meson-gx-mmc.c
++++ b/drivers/mmc/host/meson-gx-mmc.c
+@@ -173,6 +173,8 @@ struct meson_host {
+ int irq;
+
+ bool vqmmc_enabled;
++ bool needs_pre_post_req;
++
+ };
+
+ #define CMD_CFG_LENGTH_MASK GENMASK(8, 0)
+@@ -663,6 +665,8 @@ static void meson_mmc_request_done(struct mmc_host *mmc,
+ struct meson_host *host = mmc_priv(mmc);
+
+ host->cmd = NULL;
++ if (host->needs_pre_post_req)
++ meson_mmc_post_req(mmc, mrq, 0);
+ mmc_request_done(host->mmc, mrq);
+ }
+
+@@ -880,7 +884,7 @@ static int meson_mmc_validate_dram_access(struct mmc_host *mmc, struct mmc_data
+ static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+ {
+ struct meson_host *host = mmc_priv(mmc);
+- bool needs_pre_post_req = mrq->data &&
++ host->needs_pre_post_req = mrq->data &&
+ !(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE);
+
+ /*
+@@ -896,22 +900,19 @@ static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+ }
+ }
+
+- if (needs_pre_post_req) {
++ if (host->needs_pre_post_req) {
+ meson_mmc_get_transfer_mode(mmc, mrq);
+ if (!meson_mmc_desc_chain_mode(mrq->data))
+- needs_pre_post_req = false;
++ host->needs_pre_post_req = false;
+ }
+
+- if (needs_pre_post_req)
++ if (host->needs_pre_post_req)
+ meson_mmc_pre_req(mmc, mrq);
+
+ /* Stop execution */
+ writel(0, host->regs + SD_EMMC_START);
+
+ meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd);
+-
+- if (needs_pre_post_req)
+- meson_mmc_post_req(mmc, mrq, 0);
+ }
+
+ static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
+@@ -980,11 +981,8 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
+ if (status & (IRQ_END_OF_CHAIN | IRQ_RESP_STATUS)) {
+ if (data && !cmd->error)
+ data->bytes_xfered = data->blksz * data->blocks;
+- if (meson_mmc_bounce_buf_read(data) ||
+- meson_mmc_get_next_command(cmd))
+- ret = IRQ_WAKE_THREAD;
+- else
+- ret = IRQ_HANDLED;
++
++ return IRQ_WAKE_THREAD;
+ }
+
+ out:
+@@ -996,9 +994,6 @@ out:
+ writel(start, host->regs + SD_EMMC_START);
+ }
+
+- if (ret == IRQ_HANDLED)
+- meson_mmc_request_done(host->mmc, cmd->mrq);
+-
+ return ret;
+ }
+
+@@ -1171,8 +1166,10 @@ static int meson_mmc_probe(struct platform_device *pdev)
+ }
+
+ ret = device_reset_optional(&pdev->dev);
+- if (ret)
+- return dev_err_probe(&pdev->dev, ret, "device reset failed\n");
++ if (ret) {
++ dev_err_probe(&pdev->dev, ret, "device reset failed\n");
++ goto free_host;
++ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ host->regs = devm_ioremap_resource(&pdev->dev, res);
+@@ -1182,8 +1179,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
+ }
+
+ host->irq = platform_get_irq(pdev, 0);
+- if (host->irq <= 0) {
+- ret = -EINVAL;
++ if (host->irq < 0) {
++ ret = host->irq;
+ goto free_host;
+ }
+
+@@ -1288,7 +1285,9 @@ static int meson_mmc_probe(struct platform_device *pdev)
+ }
+
+ mmc->ops = &meson_mmc_ops;
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto err_free_irq;
+
+ return 0;
+
+diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c
+index 7cd9c0ec2fcfe..28aa78aa08f3f 100644
+--- a/drivers/mmc/host/meson-mx-sdhc-mmc.c
++++ b/drivers/mmc/host/meson-mx-sdhc-mmc.c
+@@ -135,6 +135,7 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc,
+ struct mmc_command *cmd)
+ {
+ struct meson_mx_sdhc_host *host = mmc_priv(mmc);
++ bool manual_stop = false;
+ u32 ictl, send;
+ int pack_len;
+
+@@ -172,12 +173,27 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc,
+ else
+ /* software flush: */
+ ictl |= MESON_SDHC_ICTL_DATA_XFER_OK;
++
++ /*
++ * Mimic the logic from the vendor driver where (only)
++ * SD_IO_RW_EXTENDED commands with more than one block set the
++ * MESON_SDHC_MISC_MANUAL_STOP bit. This fixes the firmware
++ * download in the brcmfmac driver for a BCM43362/1 card.
++ * Without this sdio_memcpy_toio() (with a size of 219557
++ * bytes) times out if MESON_SDHC_MISC_MANUAL_STOP is not set.
++ */
++ manual_stop = cmd->data->blocks > 1 &&
++ cmd->opcode == SD_IO_RW_EXTENDED;
+ } else {
+ pack_len = 0;
+
+ ictl |= MESON_SDHC_ICTL_RESP_OK;
+ }
+
++ regmap_update_bits(host->regmap, MESON_SDHC_MISC,
++ MESON_SDHC_MISC_MANUAL_STOP,
++ manual_stop ? MESON_SDHC_MISC_MANUAL_STOP : 0);
++
+ if (cmd->opcode == MMC_STOP_TRANSMISSION)
+ send |= MESON_SDHC_SEND_DATA_STOP;
+
+@@ -838,6 +854,11 @@ static int meson_mx_sdhc_probe(struct platform_device *pdev)
+ goto err_disable_pclk;
+
+ irq = platform_get_irq(pdev, 0);
++ if (irq < 0) {
++ ret = irq;
++ goto err_disable_pclk;
++ }
++
+ ret = devm_request_threaded_irq(dev, irq, meson_mx_sdhc_irq,
+ meson_mx_sdhc_irq_thread, IRQF_ONESHOT,
+ NULL, host);
+diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c
+index d4a48916bfb67..3a19a05ef55a7 100644
+--- a/drivers/mmc/host/meson-mx-sdio.c
++++ b/drivers/mmc/host/meson-mx-sdio.c
+@@ -662,6 +662,11 @@ static int meson_mx_mmc_probe(struct platform_device *pdev)
+ }
+
+ irq = platform_get_irq(pdev, 0);
++ if (irq < 0) {
++ ret = irq;
++ goto error_free_mmc;
++ }
++
+ ret = devm_request_threaded_irq(host->controller_dev, irq,
+ meson_mx_mmc_irq,
+ meson_mx_mmc_irq_thread, IRQF_ONESHOT,
+diff --git a/drivers/mmc/host/mmc_hsq.c b/drivers/mmc/host/mmc_hsq.c
+index a5e05ed0fda3e..9d35453e7371b 100644
+--- a/drivers/mmc/host/mmc_hsq.c
++++ b/drivers/mmc/host/mmc_hsq.c
+@@ -34,7 +34,7 @@ static void mmc_hsq_pump_requests(struct mmc_hsq *hsq)
+ spin_lock_irqsave(&hsq->lock, flags);
+
+ /* Make sure we are not already running a request now */
+- if (hsq->mrq) {
++ if (hsq->mrq || hsq->recovery_halt) {
+ spin_unlock_irqrestore(&hsq->lock, flags);
+ return;
+ }
+diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
+index f4c8e1a61f537..91fde4943defa 100644
+--- a/drivers/mmc/host/mmc_spi.c
++++ b/drivers/mmc/host/mmc_spi.c
+@@ -1441,7 +1441,7 @@ static int mmc_spi_probe(struct spi_device *spi)
+
+ status = mmc_add_host(mmc);
+ if (status != 0)
+- goto fail_add_host;
++ goto fail_glue_init;
+
+ /*
+ * Index 0 is card detect
+@@ -1449,7 +1449,7 @@ static int mmc_spi_probe(struct spi_device *spi)
+ */
+ status = mmc_gpiod_request_cd(mmc, NULL, 0, false, 1000);
+ if (status == -EPROBE_DEFER)
+- goto fail_add_host;
++ goto fail_gpiod_request;
+ if (!status) {
+ /*
+ * The platform has a CD GPIO signal that may support
+@@ -1464,7 +1464,7 @@ static int mmc_spi_probe(struct spi_device *spi)
+ /* Index 1 is write protect/read only */
+ status = mmc_gpiod_request_ro(mmc, NULL, 1, 0);
+ if (status == -EPROBE_DEFER)
+- goto fail_add_host;
++ goto fail_gpiod_request;
+ if (!status)
+ has_ro = true;
+
+@@ -1478,7 +1478,7 @@ static int mmc_spi_probe(struct spi_device *spi)
+ ? ", cd polling" : "");
+ return 0;
+
+-fail_add_host:
++fail_gpiod_request:
+ mmc_remove_host(mmc);
+ fail_glue_init:
+ mmc_spi_dma_free(host);
+@@ -1514,6 +1514,12 @@ static int mmc_spi_remove(struct spi_device *spi)
+ return 0;
+ }
+
++static const struct spi_device_id mmc_spi_dev_ids[] = {
++ { "mmc-spi-slot"},
++ { },
++};
++MODULE_DEVICE_TABLE(spi, mmc_spi_dev_ids);
++
+ static const struct of_device_id mmc_spi_of_match_table[] = {
+ { .compatible = "mmc-spi-slot", },
+ {},
+@@ -1525,6 +1531,7 @@ static struct spi_driver mmc_spi_driver = {
+ .name = "mmc_spi",
+ .of_match_table = mmc_spi_of_match_table,
+ },
++ .id_table = mmc_spi_dev_ids,
+ .probe = mmc_spi_probe,
+ .remove = mmc_spi_remove,
+ };
+diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
+index 3765e2f4ad98a..1ffb9dbdf78ed 100644
+--- a/drivers/mmc/host/mmci.c
++++ b/drivers/mmc/host/mmci.c
+@@ -1729,7 +1729,8 @@ static void mmci_set_max_busy_timeout(struct mmc_host *mmc)
+ return;
+
+ if (host->variant->busy_timeout && mmc->actual_clock)
+- max_busy_timeout = ~0UL / (mmc->actual_clock / MSEC_PER_SEC);
++ max_busy_timeout = U32_MAX / DIV_ROUND_UP(mmc->actual_clock,
++ MSEC_PER_SEC);
+
+ mmc->max_busy_timeout = max_busy_timeout;
+ }
+@@ -2254,7 +2255,9 @@ static int mmci_probe(struct amba_device *dev,
+ pm_runtime_set_autosuspend_delay(&dev->dev, 50);
+ pm_runtime_use_autosuspend(&dev->dev);
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto clk_disable;
+
+ pm_runtime_put(&dev->dev);
+ return 0;
+@@ -2446,6 +2449,7 @@ static struct amba_driver mmci_driver = {
+ .drv = {
+ .name = DRIVER_NAME,
+ .pm = &mmci_dev_pm_ops,
++ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ },
+ .probe = mmci_probe,
+ .remove = mmci_remove,
+diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
+index fdaa11f92fe6f..4cceb9bab0361 100644
+--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
++++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
+@@ -62,8 +62,8 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
+ * excepted the last element which has no constraint on idmasize
+ */
+ for_each_sg(data->sg, sg, data->sg_len - 1, i) {
+- if (!IS_ALIGNED(data->sg->offset, sizeof(u32)) ||
+- !IS_ALIGNED(data->sg->length, SDMMC_IDMA_BURST)) {
++ if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
++ !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
+ dev_err(mmc_dev(host->mmc),
+ "unaligned scatterlist: ofst:%x length:%d\n",
+ data->sg->offset, data->sg->length);
+@@ -71,7 +71,7 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
+ }
+ }
+
+- if (!IS_ALIGNED(data->sg->offset, sizeof(u32))) {
++ if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
+ dev_err(mmc_dev(host->mmc),
+ "unaligned last scatterlist: ofst:%x length:%d\n",
+ data->sg->offset, data->sg->length);
+@@ -441,6 +441,8 @@ static int sdmmc_dlyb_phase_tuning(struct mmci_host *host, u32 opcode)
+ return -EINVAL;
+ }
+
++ writel_relaxed(0, dlyb->base + DLYB_CR);
++
+ phase = end_of_len - max_len / 2;
+ sdmmc_dlyb_set_cfgr(dlyb, dlyb->unit, phase, false);
+
+diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
+index 6c9d38132f74c..94e9a08bc90e1 100644
+--- a/drivers/mmc/host/moxart-mmc.c
++++ b/drivers/mmc/host/moxart-mmc.c
+@@ -111,8 +111,8 @@
+ #define CLK_DIV_MASK 0x7f
+
+ /* REG_BUS_WIDTH */
+-#define BUS_WIDTH_8 BIT(2)
+-#define BUS_WIDTH_4 BIT(1)
++#define BUS_WIDTH_4_SUPPORT BIT(3)
++#define BUS_WIDTH_4 BIT(2)
+ #define BUS_WIDTH_1 BIT(0)
+
+ #define MMC_VDD_360 23
+@@ -338,13 +338,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
+ return;
+ }
+ for (len = 0; len < remain && len < host->fifo_width;) {
+- /* SCR data must be read in big endian. */
+- if (data->mrq->cmd->opcode == SD_APP_SEND_SCR)
+- *sgp = ioread32be(host->base +
+- REG_DATA_WINDOW);
+- else
+- *sgp = ioread32(host->base +
+- REG_DATA_WINDOW);
++ *sgp = ioread32(host->base + REG_DATA_WINDOW);
+ sgp++;
+ len += 4;
+ }
+@@ -524,9 +518,6 @@ static void moxart_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+ case MMC_BUS_WIDTH_4:
+ writel(BUS_WIDTH_4, host->base + REG_BUS_WIDTH);
+ break;
+- case MMC_BUS_WIDTH_8:
+- writel(BUS_WIDTH_8, host->base + REG_BUS_WIDTH);
+- break;
+ default:
+ writel(BUS_WIDTH_1, host->base + REG_BUS_WIDTH);
+ break;
+@@ -566,37 +557,37 @@ static int moxart_probe(struct platform_device *pdev)
+ if (!mmc) {
+ dev_err(dev, "mmc_alloc_host failed\n");
+ ret = -ENOMEM;
+- goto out;
++ goto out_mmc;
+ }
+
+ ret = of_address_to_resource(node, 0, &res_mmc);
+ if (ret) {
+ dev_err(dev, "of_address_to_resource failed\n");
+- goto out;
++ goto out_mmc;
+ }
+
+ irq = irq_of_parse_and_map(node, 0);
+ if (irq <= 0) {
+ dev_err(dev, "irq_of_parse_and_map failed\n");
+ ret = -EINVAL;
+- goto out;
++ goto out_mmc;
+ }
+
+ clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(clk)) {
+ ret = PTR_ERR(clk);
+- goto out;
++ goto out_mmc;
+ }
+
+ reg_mmc = devm_ioremap_resource(dev, &res_mmc);
+ if (IS_ERR(reg_mmc)) {
+ ret = PTR_ERR(reg_mmc);
+- goto out;
++ goto out_mmc;
+ }
+
+ ret = mmc_of_parse(mmc);
+ if (ret)
+- goto out;
++ goto out_mmc;
+
+ host = mmc_priv(mmc);
+ host->mmc = mmc;
+@@ -621,6 +612,14 @@ static int moxart_probe(struct platform_device *pdev)
+ ret = -EPROBE_DEFER;
+ goto out;
+ }
++ if (!IS_ERR(host->dma_chan_tx)) {
++ dma_release_channel(host->dma_chan_tx);
++ host->dma_chan_tx = NULL;
++ }
++ if (!IS_ERR(host->dma_chan_rx)) {
++ dma_release_channel(host->dma_chan_rx);
++ host->dma_chan_rx = NULL;
++ }
+ dev_dbg(dev, "PIO mode transfer enabled\n");
+ host->have_dma = false;
+ } else {
+@@ -643,16 +642,8 @@ static int moxart_probe(struct platform_device *pdev)
+ dmaengine_slave_config(host->dma_chan_rx, &cfg);
+ }
+
+- switch ((readl(host->base + REG_BUS_WIDTH) >> 3) & 3) {
+- case 1:
++ if (readl(host->base + REG_BUS_WIDTH) & BUS_WIDTH_4_SUPPORT)
+ mmc->caps |= MMC_CAP_4_BIT_DATA;
+- break;
+- case 2:
+- mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA;
+- break;
+- default:
+- break;
+- }
+
+ writel(0, host->base + REG_INTERRUPT_MASK);
+
+@@ -668,13 +659,20 @@ static int moxart_probe(struct platform_device *pdev)
+ goto out;
+
+ dev_set_drvdata(dev, mmc);
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto out;
+
+ dev_dbg(dev, "IRQ=%d, FIFO is %d bytes\n", irq, host->fifo_width);
+
+ return 0;
+
+ out:
++ if (!IS_ERR_OR_NULL(host->dma_chan_tx))
++ dma_release_channel(host->dma_chan_tx);
++ if (!IS_ERR_OR_NULL(host->dma_chan_rx))
++ dma_release_channel(host->dma_chan_rx);
++out_mmc:
+ if (mmc)
+ mmc_free_host(mmc);
+ return ret;
+@@ -687,17 +685,17 @@ static int moxart_remove(struct platform_device *pdev)
+
+ dev_set_drvdata(&pdev->dev, NULL);
+
+- if (!IS_ERR(host->dma_chan_tx))
++ if (!IS_ERR_OR_NULL(host->dma_chan_tx))
+ dma_release_channel(host->dma_chan_tx);
+- if (!IS_ERR(host->dma_chan_rx))
++ if (!IS_ERR_OR_NULL(host->dma_chan_rx))
+ dma_release_channel(host->dma_chan_rx);
+ mmc_remove_host(mmc);
+- mmc_free_host(mmc);
+
+ writel(0, host->base + REG_INTERRUPT_MASK);
+ writel(0, host->base + REG_POWER_CONTROL);
+ writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF,
+ host->base + REG_CLOCK_CONTROL);
++ mmc_free_host(mmc);
+
+ return 0;
+ }
+diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
+index b06b4dcb7c782..6d0fc247bddb3 100644
+--- a/drivers/mmc/host/mtk-sd.c
++++ b/drivers/mmc/host/mtk-sd.c
+@@ -8,6 +8,7 @@
+ #include <linux/clk.h>
+ #include <linux/delay.h>
+ #include <linux/dma-mapping.h>
++#include <linux/iopoll.h>
+ #include <linux/ioport.h>
+ #include <linux/irq.h>
+ #include <linux/of_address.h>
+@@ -627,12 +628,11 @@ static void msdc_reset_hw(struct msdc_host *host)
+ u32 val;
+
+ sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_RST);
+- while (readl(host->base + MSDC_CFG) & MSDC_CFG_RST)
+- cpu_relax();
++ readl_poll_timeout(host->base + MSDC_CFG, val, !(val & MSDC_CFG_RST), 0, 0);
+
+ sdr_set_bits(host->base + MSDC_FIFOCS, MSDC_FIFOCS_CLR);
+- while (readl(host->base + MSDC_FIFOCS) & MSDC_FIFOCS_CLR)
+- cpu_relax();
++ readl_poll_timeout(host->base + MSDC_FIFOCS, val,
++ !(val & MSDC_FIFOCS_CLR), 0, 0);
+
+ val = readl(host->base + MSDC_INT);
+ writel(val, host->base + MSDC_INT);
+@@ -805,8 +805,9 @@ static void msdc_gate_clock(struct msdc_host *host)
+ clk_disable_unprepare(host->h_clk);
+ }
+
+-static void msdc_ungate_clock(struct msdc_host *host)
++static int msdc_ungate_clock(struct msdc_host *host)
+ {
++ u32 val;
+ int ret;
+
+ clk_prepare_enable(host->h_clk);
+@@ -816,11 +817,11 @@ static void msdc_ungate_clock(struct msdc_host *host)
+ ret = clk_bulk_prepare_enable(MSDC_NR_CLOCKS, host->bulk_clks);
+ if (ret) {
+ dev_err(host->dev, "Cannot enable pclk/axi/ahb clock gates\n");
+- return;
++ return ret;
+ }
+
+- while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB))
+- cpu_relax();
++ return readl_poll_timeout(host->base + MSDC_CFG, val,
++ (val & MSDC_CFG_CKSTB), 1, 20000);
+ }
+
+ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
+@@ -831,6 +832,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
+ u32 div;
+ u32 sclk;
+ u32 tune_reg = host->dev_comp->pad_tune_reg;
++ u32 val;
+
+ if (!hz) {
+ dev_dbg(host->dev, "set mclk to 0\n");
+@@ -911,8 +913,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
+ else
+ clk_prepare_enable(clk_get_parent(host->src_clk));
+
+- while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB))
+- cpu_relax();
++ readl_poll_timeout(host->base + MSDC_CFG, val, (val & MSDC_CFG_CKSTB), 0, 0);
+ sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN);
+ mmc->actual_clock = sclk;
+ host->mclk = hz;
+@@ -1222,13 +1223,13 @@ static bool msdc_cmd_done(struct msdc_host *host, int events,
+ static inline bool msdc_cmd_is_ready(struct msdc_host *host,
+ struct mmc_request *mrq, struct mmc_command *cmd)
+ {
+- /* The max busy time we can endure is 20ms */
+- unsigned long tmo = jiffies + msecs_to_jiffies(20);
++ u32 val;
++ int ret;
+
+- while ((readl(host->base + SDC_STS) & SDC_STS_CMDBUSY) &&
+- time_before(jiffies, tmo))
+- cpu_relax();
+- if (readl(host->base + SDC_STS) & SDC_STS_CMDBUSY) {
++ /* The max busy time we can endure is 20ms */
++ ret = readl_poll_timeout_atomic(host->base + SDC_STS, val,
++ !(val & SDC_STS_CMDBUSY), 1, 20000);
++ if (ret) {
+ dev_err(host->dev, "CMD bus busy detected\n");
+ host->error |= REQ_CMD_BUSY;
+ msdc_cmd_done(host, MSDC_INT_CMDTMO, mrq, cmd);
+@@ -1236,12 +1237,10 @@ static inline bool msdc_cmd_is_ready(struct msdc_host *host,
+ }
+
+ if (mmc_resp_type(cmd) == MMC_RSP_R1B || cmd->data) {
+- tmo = jiffies + msecs_to_jiffies(20);
+ /* R1B or with data, should check SDCBUSY */
+- while ((readl(host->base + SDC_STS) & SDC_STS_SDCBUSY) &&
+- time_before(jiffies, tmo))
+- cpu_relax();
+- if (readl(host->base + SDC_STS) & SDC_STS_SDCBUSY) {
++ ret = readl_poll_timeout_atomic(host->base + SDC_STS, val,
++ !(val & SDC_STS_SDCBUSY), 1, 20000);
++ if (ret) {
+ dev_err(host->dev, "Controller busy detected\n");
+ host->error |= REQ_CMD_BUSY;
+ msdc_cmd_done(host, MSDC_INT_CMDTMO, mrq, cmd);
+@@ -1356,7 +1355,7 @@ static void msdc_data_xfer_next(struct msdc_host *host, struct mmc_request *mrq)
+ msdc_request_done(host, mrq);
+ }
+
+-static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
++static void msdc_data_xfer_done(struct msdc_host *host, u32 events,
+ struct mmc_request *mrq, struct mmc_data *data)
+ {
+ struct mmc_command *stop;
+@@ -1366,6 +1365,8 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
+ (MSDC_INT_XFER_COMPL | MSDC_INT_DATCRCERR | MSDC_INT_DATTMO
+ | MSDC_INT_DMA_BDCSERR | MSDC_INT_DMA_GPDCSERR
+ | MSDC_INT_DMA_PROTECT);
++ u32 val;
++ int ret;
+
+ spin_lock_irqsave(&host->lock, flags);
+ done = !host->data;
+@@ -1374,7 +1375,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ if (done)
+- return true;
++ return;
+ stop = data->stop;
+
+ if (check_data || (stop && stop->error)) {
+@@ -1382,8 +1383,17 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
+ readl(host->base + MSDC_DMA_CFG));
+ sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP,
+ 1);
+- while (readl(host->base + MSDC_DMA_CFG) & MSDC_DMA_CFG_STS)
+- cpu_relax();
++
++ ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CTRL, val,
++ !(val & MSDC_DMA_CTRL_STOP), 1, 20000);
++ if (ret)
++ dev_dbg(host->dev, "DMA stop timed out\n");
++
++ ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CFG, val,
++ !(val & MSDC_DMA_CFG_STS), 1, 20000);
++ if (ret)
++ dev_dbg(host->dev, "DMA inactive timed out\n");
++
+ sdr_clr_bits(host->base + MSDC_INTEN, data_ints_mask);
+ dev_dbg(host->dev, "DMA stop\n");
+
+@@ -1407,9 +1417,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
+ }
+
+ msdc_data_xfer_next(host, mrq);
+- done = true;
+ }
+- return done;
+ }
+
+ static void msdc_set_buswidth(struct msdc_host *host, u32 width)
+@@ -2330,15 +2338,25 @@ static void msdc_cqe_enable(struct mmc_host *mmc)
+ static void msdc_cqe_disable(struct mmc_host *mmc, bool recovery)
+ {
+ struct msdc_host *host = mmc_priv(mmc);
++ unsigned int val = 0;
+
+ /* disable cmdq irq */
+ sdr_clr_bits(host->base + MSDC_INTEN, MSDC_INT_CMDQ);
+ /* disable busy check */
+ sdr_clr_bits(host->base + MSDC_PATCH_BIT1, MSDC_PB1_BUSY_CHECK_SEL);
+
++ val = readl(host->base + MSDC_INT);
++ writel(val, host->base + MSDC_INT);
++
+ if (recovery) {
+ sdr_set_field(host->base + MSDC_DMA_CTRL,
+ MSDC_DMA_CTRL_STOP, 1);
++ if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CTRL, val,
++ !(val & MSDC_DMA_CTRL_STOP), 1, 3000)))
++ return;
++ if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CFG, val,
++ !(val & MSDC_DMA_CFG_STS), 1, 3000)))
++ return;
+ msdc_reset_hw(host);
+ }
+ }
+@@ -2437,13 +2455,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev,
+ if (IS_ERR(host->src_clk_cg))
+ host->src_clk_cg = NULL;
+
+- host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg");
++ /* If present, always enable for this clock gate */
++ host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg");
+ if (IS_ERR(host->sys_clk_cg))
+ host->sys_clk_cg = NULL;
+
+- /* If present, always enable for this clock gate */
+- clk_prepare_enable(host->sys_clk_cg);
+-
+ host->bulk_clks[0].id = "pclk_cg";
+ host->bulk_clks[1].id = "axi_cg";
+ host->bulk_clks[2].id = "ahb_cg";
+@@ -2509,7 +2525,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
+
+ host->irq = platform_get_irq(pdev, 0);
+ if (host->irq < 0) {
+- ret = -EINVAL;
++ ret = host->irq;
+ goto host_free;
+ }
+
+@@ -2593,7 +2609,11 @@ static int msdc_drv_probe(struct platform_device *pdev)
+ spin_lock_init(&host->lock);
+
+ platform_set_drvdata(pdev, mmc);
+- msdc_ungate_clock(host);
++ ret = msdc_ungate_clock(host);
++ if (ret) {
++ dev_err(&pdev->dev, "Cannot ungate clocks!\n");
++ goto release_mem;
++ }
+ msdc_init_hw(host);
+
+ if (mmc->caps2 & MMC_CAP2_CQE) {
+@@ -2752,8 +2772,12 @@ static int __maybe_unused msdc_runtime_resume(struct device *dev)
+ {
+ struct mmc_host *mmc = dev_get_drvdata(dev);
+ struct msdc_host *host = mmc_priv(mmc);
++ int ret;
++
++ ret = msdc_ungate_clock(host);
++ if (ret)
++ return ret;
+
+- msdc_ungate_clock(host);
+ msdc_restore_reg(host);
+ return 0;
+ }
+@@ -2762,11 +2786,14 @@ static int __maybe_unused msdc_suspend(struct device *dev)
+ {
+ struct mmc_host *mmc = dev_get_drvdata(dev);
+ int ret;
++ u32 val;
+
+ if (mmc->caps2 & MMC_CAP2_CQE) {
+ ret = cqhci_suspend(mmc);
+ if (ret)
+ return ret;
++ val = readl(((struct msdc_host *)mmc_priv(mmc))->base + MSDC_INT);
++ writel(val, ((struct msdc_host *)mmc_priv(mmc))->base + MSDC_INT);
+ }
+
+ return pm_runtime_force_suspend(dev);
+diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
+index 629efbe639c4f..b4f6a0a2fcb51 100644
+--- a/drivers/mmc/host/mvsdio.c
++++ b/drivers/mmc/host/mvsdio.c
+@@ -704,7 +704,7 @@ static int mvsd_probe(struct platform_device *pdev)
+ }
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+- return -ENXIO;
++ return irq;
+
+ mmc = mmc_alloc_host(sizeof(struct mvsd_host), &pdev->dev);
+ if (!mmc) {
+diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
+index 2fe6fcdbb1b30..97227ad717150 100644
+--- a/drivers/mmc/host/mxcmmc.c
++++ b/drivers/mmc/host/mxcmmc.c
+@@ -1025,7 +1025,7 @@ static int mxcmci_probe(struct platform_device *pdev)
+ mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+ mmc->max_seg_size = mmc->max_req_size;
+
+- host->devtype = (enum mxcmci_type)of_device_get_match_data(&pdev->dev);
++ host->devtype = (uintptr_t)of_device_get_match_data(&pdev->dev);
+
+ /* adjust max_segs after devtype detection */
+ if (!is_mpc512x_mmc(host))
+@@ -1143,7 +1143,9 @@ static int mxcmci_probe(struct platform_device *pdev)
+
+ timer_setup(&host->watchdog, mxcmci_watchdog, 0);
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto out_free_dma;
+
+ return 0;
+
+diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
+index 947581de78601..8c3655d3be961 100644
+--- a/drivers/mmc/host/mxs-mmc.c
++++ b/drivers/mmc/host/mxs-mmc.c
+@@ -552,6 +552,11 @@ static const struct of_device_id mxs_mmc_dt_ids[] = {
+ };
+ MODULE_DEVICE_TABLE(of, mxs_mmc_dt_ids);
+
++static void mxs_mmc_regulator_disable(void *regulator)
++{
++ regulator_disable(regulator);
++}
++
+ static int mxs_mmc_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
+@@ -591,6 +596,11 @@ static int mxs_mmc_probe(struct platform_device *pdev)
+ "Failed to enable vmmc regulator: %d\n", ret);
+ goto out_mmc_free;
+ }
++
++ ret = devm_add_action_or_reset(&pdev->dev, mxs_mmc_regulator_disable,
++ reg_vmmc);
++ if (ret)
++ goto out_mmc_free;
+ }
+
+ ssp->clk = devm_clk_get(&pdev->dev, NULL);
+diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
+index 5e5af34090f1b..ecf2a68d0e846 100644
+--- a/drivers/mmc/host/omap.c
++++ b/drivers/mmc/host/omap.c
+@@ -1343,7 +1343,7 @@ static int mmc_omap_probe(struct platform_device *pdev)
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+- return -ENXIO;
++ return irq;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ host->virt_base = devm_ioremap_resource(&pdev->dev, res);
+diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
+index 2f8038d69f677..500c906413a7f 100644
+--- a/drivers/mmc/host/omap_hsmmc.c
++++ b/drivers/mmc/host/omap_hsmmc.c
+@@ -1832,9 +1832,11 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- irq = platform_get_irq(pdev, 0);
+- if (res == NULL || irq < 0)
++ if (!res)
+ return -ENXIO;
++ irq = platform_get_irq(pdev, 0);
++ if (irq < 0)
++ return irq;
+
+ base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base))
+@@ -1987,7 +1989,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
+ if (!ret)
+ mmc->caps |= MMC_CAP_SDIO_IRQ;
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto err_irq;
+
+ if (mmc_pdata(host)->name != NULL) {
+ ret = device_create_file(&mmc->class_dev, &dev_attr_slot_name);
+diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c
+index 3dc143b039397..679b8b0b310e5 100644
+--- a/drivers/mmc/host/owl-mmc.c
++++ b/drivers/mmc/host/owl-mmc.c
+@@ -638,7 +638,7 @@ static int owl_mmc_probe(struct platform_device *pdev)
+
+ owl_host->irq = platform_get_irq(pdev, 0);
+ if (owl_host->irq < 0) {
+- ret = -EINVAL;
++ ret = owl_host->irq;
+ goto err_release_channel;
+ }
+
+diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
+index 316393c694d7a..e25e9bb34eb39 100644
+--- a/drivers/mmc/host/pxamci.c
++++ b/drivers/mmc/host/pxamci.c
+@@ -648,7 +648,7 @@ static int pxamci_probe(struct platform_device *pdev)
+
+ ret = pxamci_of_init(pdev, mmc);
+ if (ret)
+- return ret;
++ goto out;
+
+ host = mmc_priv(mmc);
+ host->mmc = mmc;
+@@ -672,7 +672,7 @@ static int pxamci_probe(struct platform_device *pdev)
+
+ ret = pxamci_init_ocr(host);
+ if (ret < 0)
+- return ret;
++ goto out;
+
+ mmc->caps = 0;
+ host->cmdat = 0;
+@@ -763,7 +763,12 @@ static int pxamci_probe(struct platform_device *pdev)
+ dev_warn(dev, "gpio_ro and get_ro() both defined\n");
+ }
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret) {
++ if (host->pdata && host->pdata->exit)
++ host->pdata->exit(dev, mmc);
++ goto out;
++ }
+
+ return 0;
+
+diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
+index a4407f391f66a..3ff95ed8513a3 100644
+--- a/drivers/mmc/host/renesas_sdhi_core.c
++++ b/drivers/mmc/host/renesas_sdhi_core.c
+@@ -51,9 +51,6 @@
+ #define HOST_MODE_GEN3_32BIT (HOST_MODE_GEN3_WMODE | HOST_MODE_GEN3_BUSWIDTH)
+ #define HOST_MODE_GEN3_64BIT 0
+
+-#define CTL_SDIF_MODE 0xe6
+-#define SDIF_MODE_HS400 BIT(0)
+-
+ #define SDHI_VER_GEN2_SDR50 0x490c
+ #define SDHI_VER_RZ_A1 0x820b
+ /* very old datasheets said 0x490c for SDR104, too. They are wrong! */
+@@ -382,10 +379,10 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc)
+ SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) |
+ sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2));
+
+- /* Set the sampling clock selection range of HS400 mode */
+ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL,
+ SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN |
+- 0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT);
++ sd_scc_read32(host, priv,
++ SH_MOBILE_SDHI_SCC_DTCNTL));
+
+ /* Avoid bad TAP */
+ if (bad_taps & BIT(priv->tap_set)) {
+@@ -523,7 +520,7 @@ static void renesas_sdhi_reset_hs400_mode(struct tmio_mmc_host *host,
+ SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) &
+ sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2));
+
+- if (priv->adjust_hs400_calib_table)
++ if (priv->quirks && (priv->quirks->hs400_calib_table || priv->quirks->hs400_bad_taps))
+ renesas_sdhi_adjust_hs400_mode_disable(host);
+
+ sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
+@@ -550,23 +547,25 @@ static void renesas_sdhi_scc_reset(struct tmio_mmc_host *host, struct renesas_sd
+ }
+
+ /* only populated for TMIO_MMC_MIN_RCAR2 */
+-static void renesas_sdhi_reset(struct tmio_mmc_host *host)
++static void renesas_sdhi_reset(struct tmio_mmc_host *host, bool preserve)
+ {
+ struct renesas_sdhi *priv = host_to_priv(host);
+ int ret;
+ u16 val;
+
+- if (priv->rstc) {
+- reset_control_reset(priv->rstc);
+- /* Unknown why but without polling reset status, it will hang */
+- read_poll_timeout(reset_control_status, ret, ret == 0, 1, 100,
+- false, priv->rstc);
+- /* At least SDHI_VER_GEN2_SDR50 needs manual release of reset */
+- sd_ctrl_write16(host, CTL_RESET_SD, 0x0001);
+- priv->needs_adjust_hs400 = false;
+- renesas_sdhi_set_clock(host, host->clk_cache);
+- } else if (priv->scc_ctl) {
+- renesas_sdhi_scc_reset(host, priv);
++ if (!preserve) {
++ if (priv->rstc) {
++ reset_control_reset(priv->rstc);
++ /* Unknown why but without polling reset status, it will hang */
++ read_poll_timeout(reset_control_status, ret, ret == 0, 1, 100,
++ false, priv->rstc);
++ /* At least SDHI_VER_GEN2_SDR50 needs manual release of reset */
++ sd_ctrl_write16(host, CTL_RESET_SD, 0x0001);
++ priv->needs_adjust_hs400 = false;
++ renesas_sdhi_set_clock(host, host->clk_cache);
++ } else if (priv->scc_ctl) {
++ renesas_sdhi_scc_reset(host, priv);
++ }
+ }
+
+ if (sd_ctrl_read16(host, CTL_VERSION) >= SDHI_VER_GEN3_SD) {
+@@ -673,7 +672,7 @@ static int renesas_sdhi_execute_tuning(struct mmc_host *mmc, u32 opcode)
+
+ /* Issue CMD19 twice for each tap */
+ for (i = 0; i < 2 * priv->tap_num; i++) {
+- int cmd_error;
++ int cmd_error = 0;
+
+ /* Set sampling clock position */
+ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_TAPSET, i % priv->tap_num);
+@@ -925,6 +924,10 @@ int renesas_sdhi_probe(struct platform_device *pdev,
+ if (IS_ERR(priv->clk_cd))
+ priv->clk_cd = NULL;
+
++ priv->rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
++ if (IS_ERR(priv->rstc))
++ return PTR_ERR(priv->rstc);
++
+ priv->pinctrl = devm_pinctrl_get(&pdev->dev);
+ if (!IS_ERR(priv->pinctrl)) {
+ priv->pins_default = pinctrl_lookup_state(priv->pinctrl,
+@@ -972,6 +975,8 @@ int renesas_sdhi_probe(struct platform_device *pdev,
+ host->sdcard_irq_setbit_mask = TMIO_STAT_ALWAYS_SET_27;
+ host->sdcard_irq_mask_all = TMIO_MASK_ALL_RCAR2;
+ host->reset = renesas_sdhi_reset;
++ } else {
++ host->sdcard_irq_mask_all = TMIO_MASK_ALL;
+ }
+
+ /* Orginally registers were 16 bit apart, could be 32 or 64 nowadays */
+@@ -1013,10 +1018,6 @@ int renesas_sdhi_probe(struct platform_device *pdev,
+ if (ret)
+ goto efree;
+
+- priv->rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+- if (IS_ERR(priv->rstc))
+- return PTR_ERR(priv->rstc);
+-
+ ver = sd_ctrl_read16(host, CTL_VERSION);
+ /* GEN2_SDR104 is first known SDHI to use 32bit block count */
+ if (ver < SDHI_VER_GEN2_SDR104 && mmc_data->max_blk_count > U16_MAX)
+@@ -1038,11 +1039,14 @@ int renesas_sdhi_probe(struct platform_device *pdev,
+ if (ver >= SDHI_VER_GEN3_SD)
+ host->get_timeout_cycles = renesas_sdhi_gen3_get_cycles;
+
++ /* Check for SCC so we can reset it if needed */
++ if (of_data && of_data->scc_offset && ver >= SDHI_VER_GEN2_SDR104)
++ priv->scc_ctl = host->ctl + of_data->scc_offset;
++
+ /* Enable tuning iff we have an SCC and a supported mode */
+- if (of_data && of_data->scc_offset &&
+- (host->mmc->caps & MMC_CAP_UHS_SDR104 ||
+- host->mmc->caps2 & (MMC_CAP2_HS200_1_8V_SDR |
+- MMC_CAP2_HS400_1_8V))) {
++ if (priv->scc_ctl && (host->mmc->caps & MMC_CAP_UHS_SDR104 ||
++ host->mmc->caps2 & (MMC_CAP2_HS200_1_8V_SDR |
++ MMC_CAP2_HS400_1_8V))) {
+ const struct renesas_sdhi_scc *taps = of_data->taps;
+ bool use_4tap = priv->quirks && priv->quirks->hs400_4taps;
+ bool hit = false;
+@@ -1062,7 +1066,6 @@ int renesas_sdhi_probe(struct platform_device *pdev,
+ if (!hit)
+ dev_warn(&host->pdev->dev, "Unknown clock rate for tuning\n");
+
+- priv->scc_ctl = host->ctl + of_data->scc_offset;
+ host->check_retune = renesas_sdhi_check_scc_error;
+ host->ops.execute_tuning = renesas_sdhi_execute_tuning;
+ host->ops.prepare_hs400_tuning = renesas_sdhi_prepare_hs400_tuning;
+@@ -1070,9 +1073,7 @@ int renesas_sdhi_probe(struct platform_device *pdev,
+ host->ops.hs400_complete = renesas_sdhi_hs400_complete;
+ }
+
+- ret = tmio_mmc_host_probe(host);
+- if (ret < 0)
+- goto edisclk;
++ sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask_all);
+
+ num_irqs = platform_irq_count(pdev);
+ if (num_irqs < 0) {
+@@ -1099,6 +1100,10 @@ int renesas_sdhi_probe(struct platform_device *pdev,
+ goto eirq;
+ }
+
++ ret = tmio_mmc_host_probe(host);
++ if (ret < 0)
++ goto edisclk;
++
+ dev_info(&pdev->dev, "%s base at %pa, max clock rate %u MHz\n",
+ mmc_hostname(host->mmc), &res->start, host->mmc->f_max / 1000000);
+
+diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
+index 58cfaffa3c2d8..8098726dcc0bf 100644
+--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
++++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
+@@ -38,10 +38,7 @@ struct realtek_pci_sdmmc {
+ bool double_clk;
+ bool eject;
+ bool initial_mode;
+- int power_state;
+-#define SDMMC_POWER_ON 1
+-#define SDMMC_POWER_OFF 0
+-
++ int prev_power_state;
+ int sg_count;
+ s32 cookie;
+ int cookie_sg_count;
+@@ -905,7 +902,7 @@ static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
+ return err;
+ }
+
+-static int sd_power_on(struct realtek_pci_sdmmc *host)
++static int sd_power_on(struct realtek_pci_sdmmc *host, unsigned char power_mode)
+ {
+ struct rtsx_pcr *pcr = host->pcr;
+ struct mmc_host *mmc = host->mmc;
+@@ -913,9 +910,14 @@ static int sd_power_on(struct realtek_pci_sdmmc *host)
+ u32 val;
+ u8 test_mode;
+
+- if (host->power_state == SDMMC_POWER_ON)
++ if (host->prev_power_state == MMC_POWER_ON)
+ return 0;
+
++ if (host->prev_power_state == MMC_POWER_UP) {
++ rtsx_pci_write_register(pcr, SD_BUS_STAT, SD_CLK_TOGGLE_EN, 0);
++ goto finish;
++ }
++
+ msleep(100);
+
+ rtsx_pci_init_cmd(pcr);
+@@ -936,10 +938,15 @@ static int sd_power_on(struct realtek_pci_sdmmc *host)
+ if (err < 0)
+ return err;
+
++ mdelay(1);
++
+ err = rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN);
+ if (err < 0)
+ return err;
+
++ /* send at least 74 clocks */
++ rtsx_pci_write_register(pcr, SD_BUS_STAT, SD_CLK_TOGGLE_EN, SD_CLK_TOGGLE_EN);
++
+ if (PCI_PID(pcr) == PID_5261) {
+ /*
+ * If test mode is set switch to SD Express mandatorily,
+@@ -964,7 +971,8 @@ static int sd_power_on(struct realtek_pci_sdmmc *host)
+ }
+ }
+
+- host->power_state = SDMMC_POWER_ON;
++finish:
++ host->prev_power_state = power_mode;
+ return 0;
+ }
+
+@@ -973,7 +981,7 @@ static int sd_power_off(struct realtek_pci_sdmmc *host)
+ struct rtsx_pcr *pcr = host->pcr;
+ int err;
+
+- host->power_state = SDMMC_POWER_OFF;
++ host->prev_power_state = MMC_POWER_OFF;
+
+ rtsx_pci_init_cmd(pcr);
+
+@@ -999,7 +1007,7 @@ static int sd_set_power_mode(struct realtek_pci_sdmmc *host,
+ if (power_mode == MMC_POWER_OFF)
+ err = sd_power_off(host);
+ else
+- err = sd_power_on(host);
++ err = sd_power_on(host, power_mode);
+
+ return err;
+ }
+@@ -1466,6 +1474,7 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
+ struct realtek_pci_sdmmc *host;
+ struct rtsx_pcr *pcr;
+ struct pcr_handle *handle = pdev->dev.platform_data;
++ int ret;
+
+ if (!handle)
+ return -ENXIO;
+@@ -1482,10 +1491,11 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
+
+ host = mmc_priv(mmc);
+ host->pcr = pcr;
++ mmc->ios.power_delay_ms = 5;
+ host->mmc = mmc;
+ host->pdev = pdev;
+ host->cookie = -1;
+- host->power_state = SDMMC_POWER_OFF;
++ host->prev_power_state = MMC_POWER_OFF;
+ INIT_WORK(&host->work, sd_request);
+ platform_set_drvdata(pdev, host);
+ pcr->slots[RTSX_SD_CARD].p_dev = pdev;
+@@ -1495,14 +1505,20 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
+
+ realtek_init_host(host);
+
+- if (pcr->rtd3_en) {
+- pm_runtime_set_autosuspend_delay(&pdev->dev, 5000);
+- pm_runtime_use_autosuspend(&pdev->dev);
+- pm_runtime_enable(&pdev->dev);
+- }
+-
++ pm_runtime_no_callbacks(&pdev->dev);
++ pm_runtime_set_active(&pdev->dev);
++ pm_runtime_enable(&pdev->dev);
++ pm_runtime_set_autosuspend_delay(&pdev->dev, 200);
++ pm_runtime_mark_last_busy(&pdev->dev);
++ pm_runtime_use_autosuspend(&pdev->dev);
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret) {
++ pm_runtime_dont_use_autosuspend(&pdev->dev);
++ pm_runtime_disable(&pdev->dev);
++ mmc_free_host(mmc);
++ return ret;
++ }
+
+ return 0;
+ }
+@@ -1521,11 +1537,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
+ pcr->slots[RTSX_SD_CARD].card_event = NULL;
+ mmc = host->mmc;
+
+- if (pcr->rtd3_en) {
+- pm_runtime_dont_use_autosuspend(&pdev->dev);
+- pm_runtime_disable(&pdev->dev);
+- }
+-
+ cancel_work_sync(&host->work);
+
+ mutex_lock(&host->host_mutex);
+@@ -1548,6 +1559,9 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
+
+ flush_work(&host->work);
+
++ pm_runtime_dont_use_autosuspend(&pdev->dev);
++ pm_runtime_disable(&pdev->dev);
++
+ mmc_free_host(mmc);
+
+ dev_dbg(&(pdev->dev),
+diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c
+index 5fe4528e296e6..1be3a355f10d5 100644
+--- a/drivers/mmc/host/rtsx_usb_sdmmc.c
++++ b/drivers/mmc/host/rtsx_usb_sdmmc.c
+@@ -1332,6 +1332,7 @@ static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev)
+ #ifdef RTSX_USB_USE_LEDS_CLASS
+ int err;
+ #endif
++ int ret;
+
+ ucr = usb_get_intfdata(to_usb_interface(pdev->dev.parent));
+ if (!ucr)
+@@ -1368,7 +1369,15 @@ static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev)
+ INIT_WORK(&host->led_work, rtsx_usb_update_led);
+
+ #endif
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret) {
++#ifdef RTSX_USB_USE_LEDS_CLASS
++ led_classdev_unregister(&host->led);
++#endif
++ mmc_free_host(mmc);
++ pm_runtime_disable(&pdev->dev);
++ return ret;
++ }
+
+ return 0;
+ }
+diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
+index 8fe65f172a611..f4e15eef70454 100644
+--- a/drivers/mmc/host/sdhci-acpi.c
++++ b/drivers/mmc/host/sdhci-acpi.c
+@@ -910,7 +910,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
+ host->ops = &sdhci_acpi_ops_dflt;
+ host->irq = platform_get_irq(pdev, 0);
+ if (host->irq < 0) {
+- err = -EINVAL;
++ err = host->irq;
+ goto err_free;
+ }
+
+diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
+index f24623aac2dbe..4d42b1810acea 100644
+--- a/drivers/mmc/host/sdhci-brcmstb.c
++++ b/drivers/mmc/host/sdhci-brcmstb.c
+@@ -12,28 +12,55 @@
+ #include <linux/bitops.h>
+ #include <linux/delay.h>
+
++#include "sdhci-cqhci.h"
+ #include "sdhci-pltfm.h"
+ #include "cqhci.h"
+
+ #define SDHCI_VENDOR 0x78
+ #define SDHCI_VENDOR_ENHANCED_STRB 0x1
++#define SDHCI_VENDOR_GATE_SDCLK_EN 0x2
+
+-#define BRCMSTB_PRIV_FLAGS_NO_64BIT BIT(0)
+-#define BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT BIT(1)
++#define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0)
++#define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1)
++#define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE BIT(2)
++
++#define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0)
++#define BRCMSTB_PRIV_FLAGS_GATE_CLOCK BIT(1)
+
+ #define SDHCI_ARASAN_CQE_BASE_ADDR 0x200
+
+ struct sdhci_brcmstb_priv {
+ void __iomem *cfg_regs;
+- bool has_cqe;
++ unsigned int flags;
+ };
+
+ struct brcmstb_match_priv {
+ void (*hs400es)(struct mmc_host *mmc, struct mmc_ios *ios);
+ struct sdhci_ops *ops;
+- unsigned int flags;
++ const unsigned int flags;
+ };
+
++static inline void enable_clock_gating(struct sdhci_host *host)
++{
++ u32 reg;
++
++ reg = sdhci_readl(host, SDHCI_VENDOR);
++ reg |= SDHCI_VENDOR_GATE_SDCLK_EN;
++ sdhci_writel(host, reg, SDHCI_VENDOR);
++}
++
++void brcmstb_reset(struct sdhci_host *host, u8 mask)
++{
++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
++ struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host);
++
++ sdhci_and_cqhci_reset(host, mask);
++
++ /* Reset will clear this, so re-enable it */
++ if (priv->flags & BRCMSTB_PRIV_FLAGS_GATE_CLOCK)
++ enable_clock_gating(host);
++}
++
+ static void sdhci_brcmstb_hs400es(struct mmc_host *mmc, struct mmc_ios *ios)
+ {
+ struct sdhci_host *host = mmc_priv(mmc);
+@@ -129,22 +156,23 @@ static struct sdhci_ops sdhci_brcmstb_ops = {
+ static struct sdhci_ops sdhci_brcmstb_ops_7216 = {
+ .set_clock = sdhci_brcmstb_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+- .reset = sdhci_reset,
++ .reset = brcmstb_reset,
+ .set_uhs_signaling = sdhci_brcmstb_set_uhs_signaling,
+ };
+
+ static struct brcmstb_match_priv match_priv_7425 = {
+- .flags = BRCMSTB_PRIV_FLAGS_NO_64BIT |
+- BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT,
++ .flags = BRCMSTB_MATCH_FLAGS_NO_64BIT |
++ BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT,
+ .ops = &sdhci_brcmstb_ops,
+ };
+
+ static struct brcmstb_match_priv match_priv_7445 = {
+- .flags = BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT,
++ .flags = BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT,
+ .ops = &sdhci_brcmstb_ops,
+ };
+
+ static const struct brcmstb_match_priv match_priv_7216 = {
++ .flags = BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE,
+ .hs400es = sdhci_brcmstb_hs400es,
+ .ops = &sdhci_brcmstb_ops_7216,
+ };
+@@ -176,7 +204,7 @@ static int sdhci_brcmstb_add_host(struct sdhci_host *host,
+ bool dma64;
+ int ret;
+
+- if (!priv->has_cqe)
++ if ((priv->flags & BRCMSTB_PRIV_FLAGS_HAS_CQE) == 0)
+ return sdhci_add_host(host);
+
+ dev_dbg(mmc_dev(host->mmc), "CQE is enabled\n");
+@@ -225,7 +253,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
+ struct sdhci_brcmstb_priv *priv;
+ struct sdhci_host *host;
+ struct resource *iomem;
+- bool has_cqe = false;
+ struct clk *clk;
+ int res;
+
+@@ -244,10 +271,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
+ return res;
+
+ memset(&brcmstb_pdata, 0, sizeof(brcmstb_pdata));
+- if (device_property_read_bool(&pdev->dev, "supports-cqe")) {
+- has_cqe = true;
+- match_priv->ops->irq = sdhci_brcmstb_cqhci_irq;
+- }
+ brcmstb_pdata.ops = match_priv->ops;
+ host = sdhci_pltfm_init(pdev, &brcmstb_pdata,
+ sizeof(struct sdhci_brcmstb_priv));
+@@ -258,7 +281,10 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
+
+ pltfm_host = sdhci_priv(host);
+ priv = sdhci_pltfm_priv(pltfm_host);
+- priv->has_cqe = has_cqe;
++ if (device_property_read_bool(&pdev->dev, "supports-cqe")) {
++ priv->flags |= BRCMSTB_PRIV_FLAGS_HAS_CQE;
++ match_priv->ops->irq = sdhci_brcmstb_cqhci_irq;
++ }
+
+ /* Map in the non-standard CFG registers */
+ iomem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+@@ -273,6 +299,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
+ if (res)
+ goto err;
+
++ /*
++ * Automatic clock gating does not work for SD cards that may
++ * voltage switch so only enable it for non-removable devices.
++ */
++ if ((match_priv->flags & BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE) &&
++ (host->mmc->caps & MMC_CAP_NONREMOVABLE))
++ priv->flags |= BRCMSTB_PRIV_FLAGS_GATE_CLOCK;
++
+ /*
+ * If the chip has enhanced strobe and it's enabled, add
+ * callback
+@@ -287,14 +321,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
+ * properties through mmc_of_parse().
+ */
+ host->caps = sdhci_readl(host, SDHCI_CAPABILITIES);
+- if (match_priv->flags & BRCMSTB_PRIV_FLAGS_NO_64BIT)
++ if (match_priv->flags & BRCMSTB_MATCH_FLAGS_NO_64BIT)
+ host->caps &= ~SDHCI_CAN_64BIT;
+ host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1);
+ host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 |
+ SDHCI_SUPPORT_DDR50);
+ host->quirks |= SDHCI_QUIRK_MISSING_CAPS;
+
+- if (match_priv->flags & BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT)
++ if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT)
+ host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+
+ res = sdhci_brcmstb_add_host(host, priv);
+diff --git a/drivers/mmc/host/sdhci-cqhci.h b/drivers/mmc/host/sdhci-cqhci.h
+new file mode 100644
+index 0000000000000..cf8e7ba71bbd7
+--- /dev/null
++++ b/drivers/mmc/host/sdhci-cqhci.h
+@@ -0,0 +1,24 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright 2022 The Chromium OS Authors
++ *
++ * Support that applies to the combination of SDHCI and CQHCI, while not
++ * expressing a dependency between the two modules.
++ */
++
++#ifndef __MMC_HOST_SDHCI_CQHCI_H__
++#define __MMC_HOST_SDHCI_CQHCI_H__
++
++#include "cqhci.h"
++#include "sdhci.h"
++
++static inline void sdhci_and_cqhci_reset(struct sdhci_host *host, u8 mask)
++{
++ if ((host->mmc->caps2 & MMC_CAP2_CQE) && (mask & SDHCI_RESET_ALL) &&
++ host->mmc->cqe_private)
++ cqhci_deactivate(host->mmc);
++
++ sdhci_reset(host, mask);
++}
++
++#endif /* __MMC_HOST_SDHCI_CQHCI_H__ */
+diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
+index e658f01742420..a6aa33dcd2a2e 100644
+--- a/drivers/mmc/host/sdhci-esdhc-imx.c
++++ b/drivers/mmc/host/sdhci-esdhc-imx.c
+@@ -25,6 +25,7 @@
+ #include <linux/of_device.h>
+ #include <linux/pinctrl/consumer.h>
+ #include <linux/pm_runtime.h>
++#include "sdhci-cqhci.h"
+ #include "sdhci-pltfm.h"
+ #include "sdhci-esdhc.h"
+ #include "cqhci.h"
+@@ -106,6 +107,7 @@
+ #define ESDHC_TUNING_START_TAP_DEFAULT 0x1
+ #define ESDHC_TUNING_START_TAP_MASK 0x7f
+ #define ESDHC_TUNING_CMD_CRC_CHECK_DISABLE (1 << 7)
++#define ESDHC_TUNING_STEP_DEFAULT 0x1
+ #define ESDHC_TUNING_STEP_MASK 0x00070000
+ #define ESDHC_TUNING_STEP_SHIFT 16
+
+@@ -300,7 +302,6 @@ static struct esdhc_soc_data usdhc_imx8qxp_data = {
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
+ | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
+- | ESDHC_FLAG_CQHCI
+ | ESDHC_FLAG_STATE_LOST_IN_LPMODE
+ | ESDHC_FLAG_CLK_RATE_LOST_IN_PM_RUNTIME,
+ };
+@@ -309,7 +310,6 @@ static struct esdhc_soc_data usdhc_imx8mm_data = {
+ .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
+ | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
+ | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
+- | ESDHC_FLAG_CQHCI
+ | ESDHC_FLAG_STATE_LOST_IN_LPMODE,
+ };
+
+@@ -1275,7 +1275,7 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
+
+ static void esdhc_reset(struct sdhci_host *host, u8 mask)
+ {
+- sdhci_reset(host, mask);
++ sdhci_and_cqhci_reset(host, mask);
+
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+@@ -1347,7 +1347,7 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host)
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
+ struct cqhci_host *cq_host = host->mmc->cqe_private;
+- int tmp;
++ u32 tmp;
+
+ if (esdhc_is_usdhc(imx_data)) {
+ /*
+@@ -1400,17 +1400,24 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host)
+
+ if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
+ tmp = readl(host->ioaddr + ESDHC_TUNING_CTRL);
+- tmp |= ESDHC_STD_TUNING_EN |
+- ESDHC_TUNING_START_TAP_DEFAULT;
+- if (imx_data->boarddata.tuning_start_tap) {
+- tmp &= ~ESDHC_TUNING_START_TAP_MASK;
++ tmp |= ESDHC_STD_TUNING_EN;
++
++ /*
++ * ROM code or bootloader may config the start tap
++ * and step, unmask them first.
++ */
++ tmp &= ~(ESDHC_TUNING_START_TAP_MASK | ESDHC_TUNING_STEP_MASK);
++ if (imx_data->boarddata.tuning_start_tap)
+ tmp |= imx_data->boarddata.tuning_start_tap;
+- }
++ else
++ tmp |= ESDHC_TUNING_START_TAP_DEFAULT;
+
+ if (imx_data->boarddata.tuning_step) {
+- tmp &= ~ESDHC_TUNING_STEP_MASK;
+ tmp |= imx_data->boarddata.tuning_step
+ << ESDHC_TUNING_STEP_SHIFT;
++ } else {
++ tmp |= ESDHC_TUNING_STEP_DEFAULT
++ << ESDHC_TUNING_STEP_SHIFT;
+ }
+
+ /* Disable the CMD CRC check for tuning, if not, need to
+@@ -1496,7 +1503,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc)
+ * system resume back.
+ */
+ cqhci_writel(cq_host, 0, CQHCI_CTL);
+- if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT)
++ if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT)
+ dev_err(mmc_dev(host->mmc),
+ "failed to exit halt state when enable CQE\n");
+
+@@ -1561,6 +1568,10 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
+ if (ret)
+ return ret;
+
++ /* HS400/HS400ES require 8 bit bus */
++ if (!(host->mmc->caps & MMC_CAP_8_BIT_DATA))
++ host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES);
++
+ if (mmc_gpio_get_cd(host->mmc) >= 0)
+ host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
+
+diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
+index 50c71e0ba5e4e..6537a845266dc 100644
+--- a/drivers/mmc/host/sdhci-msm.c
++++ b/drivers/mmc/host/sdhci-msm.c
+@@ -17,6 +17,7 @@
+ #include <linux/regulator/consumer.h>
+ #include <linux/interconnect.h>
+ #include <linux/pinctrl/consumer.h>
++#include <linux/reset.h>
+
+ #include "sdhci-pltfm.h"
+ #include "cqhci.h"
+@@ -2436,6 +2437,7 @@ static const struct sdhci_msm_variant_info sdm845_sdhci_var = {
+ static const struct of_device_id sdhci_msm_dt_match[] = {
+ {.compatible = "qcom,sdhci-msm-v4", .data = &sdhci_msm_mci_var},
+ {.compatible = "qcom,sdhci-msm-v5", .data = &sdhci_msm_v5_var},
++ {.compatible = "qcom,sdm670-sdhci", .data = &sdm845_sdhci_var},
+ {.compatible = "qcom,sdm845-sdhci", .data = &sdm845_sdhci_var},
+ {.compatible = "qcom,sc7180-sdhci", .data = &sdm845_sdhci_var},
+ {},
+@@ -2480,8 +2482,48 @@ static inline void sdhci_msm_get_of_property(struct platform_device *pdev,
+ msm_host->ddr_config = DDR_CONFIG_POR_VAL;
+
+ of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config);
++
++ if (of_device_is_compatible(node, "qcom,msm8916-sdhci"))
++ host->quirks2 |= SDHCI_QUIRK2_BROKEN_64_BIT_DMA;
+ }
+
++static int sdhci_msm_gcc_reset(struct device *dev, struct sdhci_host *host)
++{
++ struct reset_control *reset;
++ int ret = 0;
++
++ reset = reset_control_get_optional_exclusive(dev, NULL);
++ if (IS_ERR(reset))
++ return dev_err_probe(dev, PTR_ERR(reset),
++ "unable to acquire core_reset\n");
++
++ if (!reset)
++ return ret;
++
++ ret = reset_control_assert(reset);
++ if (ret) {
++ reset_control_put(reset);
++ return dev_err_probe(dev, ret, "core_reset assert failed\n");
++ }
++
++ /*
++ * The hardware requirement for delay between assert/deassert
++ * is at least 3-4 sleep clock (32.7KHz) cycles, which comes to
++ * ~125us (4/32768). To be on the safe side add 200us delay.
++ */
++ usleep_range(200, 210);
++
++ ret = reset_control_deassert(reset);
++ if (ret) {
++ reset_control_put(reset);
++ return dev_err_probe(dev, ret, "core_reset deassert failed\n");
++ }
++
++ usleep_range(200, 210);
++ reset_control_put(reset);
++
++ return ret;
++}
+
+ static int sdhci_msm_probe(struct platform_device *pdev)
+ {
+@@ -2529,6 +2571,10 @@ static int sdhci_msm_probe(struct platform_device *pdev)
+
+ msm_host->saved_tuning_phase = INVALID_TUNING_PHASE;
+
++ ret = sdhci_msm_gcc_reset(&pdev->dev, host);
++ if (ret)
++ goto pltfm_free;
++
+ /* Setup SDCC bus voter clock. */
+ msm_host->bus_clk = devm_clk_get(&pdev->dev, "bus");
+ if (!IS_ERR(msm_host->bus_clk)) {
+diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
+index 737e2bfdedc28..bede148db7326 100644
+--- a/drivers/mmc/host/sdhci-of-arasan.c
++++ b/drivers/mmc/host/sdhci-of-arasan.c
+@@ -25,6 +25,7 @@
+ #include <linux/firmware/xlnx-zynqmp.h>
+
+ #include "cqhci.h"
++#include "sdhci-cqhci.h"
+ #include "sdhci-pltfm.h"
+
+ #define SDHCI_ARASAN_VENDOR_REGISTER 0x78
+@@ -359,7 +360,7 @@ static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask)
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
+
+- sdhci_reset(host, mask);
++ sdhci_and_cqhci_reset(host, mask);
+
+ if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_FORCE_CDTEST) {
+ ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
+index d1a1c548c515f..0452c312b65eb 100644
+--- a/drivers/mmc/host/sdhci-of-at91.c
++++ b/drivers/mmc/host/sdhci-of-at91.c
+@@ -100,8 +100,13 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock)
+ static void sdhci_at91_set_uhs_signaling(struct sdhci_host *host,
+ unsigned int timing)
+ {
+- if (timing == MMC_TIMING_MMC_DDR52)
+- sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R);
++ u8 mc1r;
++
++ if (timing == MMC_TIMING_MMC_DDR52) {
++ mc1r = sdhci_readb(host, SDMMC_MC1R);
++ mc1r |= SDMMC_MC1R_DDR;
++ sdhci_writeb(host, mc1r, SDMMC_MC1R);
++ }
+ sdhci_set_uhs_signaling(host, timing);
+ }
+
+diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
+index bac874ab0b33a..335c88fd849c4 100644
+--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
++++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
+@@ -15,6 +15,7 @@
+ #include <linux/module.h>
+ #include <linux/of.h>
+ #include <linux/of_device.h>
++#include <linux/reset.h>
+ #include <linux/sizes.h>
+
+ #include "sdhci-pltfm.h"
+@@ -55,14 +56,15 @@
+ #define DLL_LOCK_WO_TMOUT(x) \
+ ((((x) & DWCMSHC_EMMC_DLL_LOCKED) == DWCMSHC_EMMC_DLL_LOCKED) && \
+ (((x) & DWCMSHC_EMMC_DLL_TIMEOUT) == 0))
+-#define RK3568_MAX_CLKS 3
++#define RK35xx_MAX_CLKS 3
+
+ #define BOUNDARY_OK(addr, len) \
+ ((addr | (SZ_128M - 1)) == ((addr + len - 1) | (SZ_128M - 1)))
+
+-struct rk3568_priv {
++struct rk35xx_priv {
+ /* Rockchip specified optional clocks */
+- struct clk_bulk_data rockchip_clks[RK3568_MAX_CLKS];
++ struct clk_bulk_data rockchip_clks[RK35xx_MAX_CLKS];
++ struct reset_control *reset;
+ u8 txclk_tapnum;
+ };
+
+@@ -176,7 +178,7 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock
+ {
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct dwcmshc_priv *dwc_priv = sdhci_pltfm_priv(pltfm_host);
+- struct rk3568_priv *priv = dwc_priv->priv;
++ struct rk35xx_priv *priv = dwc_priv->priv;
+ u8 txclk_tapnum = DLL_TXCLK_TAPNUM_DEFAULT;
+ u32 extra, reg;
+ int err;
+@@ -255,6 +257,21 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock
+ sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
+ }
+
++static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask)
++{
++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
++ struct dwcmshc_priv *dwc_priv = sdhci_pltfm_priv(pltfm_host);
++ struct rk35xx_priv *priv = dwc_priv->priv;
++
++ if (mask & SDHCI_RESET_ALL && priv->reset) {
++ reset_control_assert(priv->reset);
++ udelay(1);
++ reset_control_deassert(priv->reset);
++ }
++
++ sdhci_reset(host, mask);
++}
++
+ static const struct sdhci_ops sdhci_dwcmshc_ops = {
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+@@ -264,12 +281,12 @@ static const struct sdhci_ops sdhci_dwcmshc_ops = {
+ .adma_write_desc = dwcmshc_adma_write_desc,
+ };
+
+-static const struct sdhci_ops sdhci_dwcmshc_rk3568_ops = {
++static const struct sdhci_ops sdhci_dwcmshc_rk35xx_ops = {
+ .set_clock = dwcmshc_rk3568_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .set_uhs_signaling = dwcmshc_set_uhs_signaling,
+ .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+- .reset = sdhci_reset,
++ .reset = rk35xx_sdhci_reset,
+ .adma_write_desc = dwcmshc_adma_write_desc,
+ };
+
+@@ -279,30 +296,46 @@ static const struct sdhci_pltfm_data sdhci_dwcmshc_pdata = {
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+ };
+
+-static const struct sdhci_pltfm_data sdhci_dwcmshc_rk3568_pdata = {
+- .ops = &sdhci_dwcmshc_rk3568_ops,
++#ifdef CONFIG_ACPI
++static const struct sdhci_pltfm_data sdhci_dwcmshc_bf3_pdata = {
++ .ops = &sdhci_dwcmshc_ops,
++ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
++ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
++ SDHCI_QUIRK2_ACMD23_BROKEN,
++};
++#endif
++
++static const struct sdhci_pltfm_data sdhci_dwcmshc_rk35xx_pdata = {
++ .ops = &sdhci_dwcmshc_rk35xx_ops,
+ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+ SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
+ };
+
+-static int dwcmshc_rk3568_init(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
++static int dwcmshc_rk35xx_init(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv)
+ {
+ int err;
+- struct rk3568_priv *priv = dwc_priv->priv;
++ struct rk35xx_priv *priv = dwc_priv->priv;
++
++ priv->reset = devm_reset_control_array_get_optional_exclusive(mmc_dev(host->mmc));
++ if (IS_ERR(priv->reset)) {
++ err = PTR_ERR(priv->reset);
++ dev_err(mmc_dev(host->mmc), "failed to get reset control %d\n", err);
++ return err;
++ }
+
+ priv->rockchip_clks[0].id = "axi";
+ priv->rockchip_clks[1].id = "block";
+ priv->rockchip_clks[2].id = "timer";
+- err = devm_clk_bulk_get_optional(mmc_dev(host->mmc), RK3568_MAX_CLKS,
++ err = devm_clk_bulk_get_optional(mmc_dev(host->mmc), RK35xx_MAX_CLKS,
+ priv->rockchip_clks);
+ if (err) {
+ dev_err(mmc_dev(host->mmc), "failed to get clocks %d\n", err);
+ return err;
+ }
+
+- err = clk_bulk_prepare_enable(RK3568_MAX_CLKS, priv->rockchip_clks);
++ err = clk_bulk_prepare_enable(RK35xx_MAX_CLKS, priv->rockchip_clks);
+ if (err) {
+ dev_err(mmc_dev(host->mmc), "failed to enable clocks %d\n", err);
+ return err;
+@@ -324,7 +357,7 @@ static int dwcmshc_rk3568_init(struct sdhci_host *host, struct dwcmshc_priv *dwc
+ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = {
+ {
+ .compatible = "rockchip,rk3568-dwcmshc",
+- .data = &sdhci_dwcmshc_rk3568_pdata,
++ .data = &sdhci_dwcmshc_rk35xx_pdata,
+ },
+ {
+ .compatible = "snps,dwcmshc-sdhci",
+@@ -336,7 +369,10 @@ MODULE_DEVICE_TABLE(of, sdhci_dwcmshc_dt_ids);
+
+ #ifdef CONFIG_ACPI
+ static const struct acpi_device_id sdhci_dwcmshc_acpi_ids[] = {
+- { .id = "MLNXBF30" },
++ {
++ .id = "MLNXBF30",
++ .driver_data = (kernel_ulong_t)&sdhci_dwcmshc_bf3_pdata,
++ },
+ {}
+ };
+ #endif
+@@ -347,12 +383,12 @@ static int dwcmshc_probe(struct platform_device *pdev)
+ struct sdhci_pltfm_host *pltfm_host;
+ struct sdhci_host *host;
+ struct dwcmshc_priv *priv;
+- struct rk3568_priv *rk_priv = NULL;
++ struct rk35xx_priv *rk_priv = NULL;
+ const struct sdhci_pltfm_data *pltfm_data;
+ int err;
+ u32 extra;
+
+- pltfm_data = of_device_get_match_data(&pdev->dev);
++ pltfm_data = device_get_match_data(&pdev->dev);
+ if (!pltfm_data) {
+ dev_err(&pdev->dev, "Error: No device match data found\n");
+ return -ENODEV;
+@@ -402,8 +438,8 @@ static int dwcmshc_probe(struct platform_device *pdev)
+ host->mmc_host_ops.request = dwcmshc_request;
+ host->mmc_host_ops.hs400_enhanced_strobe = dwcmshc_hs400_enhanced_strobe;
+
+- if (pltfm_data == &sdhci_dwcmshc_rk3568_pdata) {
+- rk_priv = devm_kzalloc(&pdev->dev, sizeof(struct rk3568_priv), GFP_KERNEL);
++ if (pltfm_data == &sdhci_dwcmshc_rk35xx_pdata) {
++ rk_priv = devm_kzalloc(&pdev->dev, sizeof(struct rk35xx_priv), GFP_KERNEL);
+ if (!rk_priv) {
+ err = -ENOMEM;
+ goto err_clk;
+@@ -411,7 +447,7 @@ static int dwcmshc_probe(struct platform_device *pdev)
+
+ priv->priv = rk_priv;
+
+- err = dwcmshc_rk3568_init(host, priv);
++ err = dwcmshc_rk35xx_init(host, priv);
+ if (err)
+ goto err_clk;
+ }
+@@ -428,7 +464,7 @@ err_clk:
+ clk_disable_unprepare(pltfm_host->clk);
+ clk_disable_unprepare(priv->bus_clk);
+ if (rk_priv)
+- clk_bulk_disable_unprepare(RK3568_MAX_CLKS,
++ clk_bulk_disable_unprepare(RK35xx_MAX_CLKS,
+ rk_priv->rockchip_clks);
+ free_pltfm:
+ sdhci_pltfm_free(pdev);
+@@ -440,14 +476,14 @@ static int dwcmshc_remove(struct platform_device *pdev)
+ struct sdhci_host *host = platform_get_drvdata(pdev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
+- struct rk3568_priv *rk_priv = priv->priv;
++ struct rk35xx_priv *rk_priv = priv->priv;
+
+ sdhci_remove_host(host, 0);
+
+ clk_disable_unprepare(pltfm_host->clk);
+ clk_disable_unprepare(priv->bus_clk);
+ if (rk_priv)
+- clk_bulk_disable_unprepare(RK3568_MAX_CLKS,
++ clk_bulk_disable_unprepare(RK35xx_MAX_CLKS,
+ rk_priv->rockchip_clks);
+ sdhci_pltfm_free(pdev);
+
+@@ -460,7 +496,7 @@ static int dwcmshc_suspend(struct device *dev)
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
+- struct rk3568_priv *rk_priv = priv->priv;
++ struct rk35xx_priv *rk_priv = priv->priv;
+ int ret;
+
+ ret = sdhci_suspend_host(host);
+@@ -472,7 +508,7 @@ static int dwcmshc_suspend(struct device *dev)
+ clk_disable_unprepare(priv->bus_clk);
+
+ if (rk_priv)
+- clk_bulk_disable_unprepare(RK3568_MAX_CLKS,
++ clk_bulk_disable_unprepare(RK35xx_MAX_CLKS,
+ rk_priv->rockchip_clks);
+
+ return ret;
+@@ -483,7 +519,7 @@ static int dwcmshc_resume(struct device *dev)
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
+- struct rk3568_priv *rk_priv = priv->priv;
++ struct rk35xx_priv *rk_priv = priv->priv;
+ int ret;
+
+ ret = clk_prepare_enable(pltfm_host->clk);
+@@ -497,7 +533,7 @@ static int dwcmshc_resume(struct device *dev)
+ }
+
+ if (rk_priv) {
+- ret = clk_bulk_prepare_enable(RK3568_MAX_CLKS,
++ ret = clk_bulk_prepare_enable(RK35xx_MAX_CLKS,
+ rk_priv->rockchip_clks);
+ if (ret)
+ return ret;
+diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
+index a593b1fbd69e5..ea9bb545b1a21 100644
+--- a/drivers/mmc/host/sdhci-of-esdhc.c
++++ b/drivers/mmc/host/sdhci-of-esdhc.c
+@@ -126,6 +126,7 @@ static u32 esdhc_readl_fixup(struct sdhci_host *host,
+ return ret;
+ }
+ }
++
+ /*
+ * The DAT[3:0] line signal levels and the CMD line signal level are
+ * not compatible with standard SDHC register. The line signal levels
+@@ -137,6 +138,16 @@ static u32 esdhc_readl_fixup(struct sdhci_host *host,
+ ret = value & 0x000fffff;
+ ret |= (value >> 4) & SDHCI_DATA_LVL_MASK;
+ ret |= (value << 1) & SDHCI_CMD_LVL;
++
++ /*
++ * Some controllers have unreliable Data Line Active
++ * bit for commands with busy signal. This affects
++ * Command Inhibit (data) bit. Just ignore it since
++ * MMC core driver has already polled card status
++ * with CMD13 after any command with busy siganl.
++ */
++ if (esdhc->quirk_ignore_data_inhibit)
++ ret &= ~SDHCI_DATA_INHIBIT;
+ return ret;
+ }
+
+@@ -151,19 +162,6 @@ static u32 esdhc_readl_fixup(struct sdhci_host *host,
+ return ret;
+ }
+
+- /*
+- * Some controllers have unreliable Data Line Active
+- * bit for commands with busy signal. This affects
+- * Command Inhibit (data) bit. Just ignore it since
+- * MMC core driver has already polled card status
+- * with CMD13 after any command with busy siganl.
+- */
+- if ((spec_reg == SDHCI_PRESENT_STATE) &&
+- (esdhc->quirk_ignore_data_inhibit == true)) {
+- ret = value & ~SDHCI_DATA_INHIBIT;
+- return ret;
+- }
+-
+ ret = value;
+ return ret;
+ }
+@@ -524,12 +522,16 @@ static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask)
+
+ static int esdhc_of_enable_dma(struct sdhci_host *host)
+ {
++ int ret;
+ u32 value;
+ struct device *dev = mmc_dev(host->mmc);
+
+ if (of_device_is_compatible(dev->of_node, "fsl,ls1043a-esdhc") ||
+- of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc"))
+- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
++ of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) {
++ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
++ if (ret)
++ return ret;
++ }
+
+ value = sdhci_readl(host, ESDHC_DMA_SYSCTL);
+
+@@ -900,6 +902,7 @@ static int esdhc_signal_voltage_switch(struct mmc_host *mmc,
+ scfg_node = of_find_matching_node(NULL, scfg_device_ids);
+ if (scfg_node)
+ scfg_base = of_iomap(scfg_node, 0);
++ of_node_put(scfg_node);
+ if (scfg_base) {
+ sdhciovselcr = SDHCIOVSELCR_TGLEN |
+ SDHCIOVSELCR_VSELVAL;
+diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
+index 8f4d1f003f656..fd188b6d88f49 100644
+--- a/drivers/mmc/host/sdhci-omap.c
++++ b/drivers/mmc/host/sdhci-omap.c
+@@ -62,6 +62,8 @@
+ #define SDHCI_OMAP_IE 0x234
+ #define INT_CC_EN BIT(0)
+
++#define SDHCI_OMAP_ISE 0x238
++
+ #define SDHCI_OMAP_AC12 0x23c
+ #define AC12_V1V8_SIGEN BIT(19)
+ #define AC12_SCLK_SEL BIT(23)
+@@ -113,6 +115,8 @@ struct sdhci_omap_host {
+ u32 hctl;
+ u32 sysctl;
+ u32 capa;
++ u32 ie;
++ u32 ise;
+ };
+
+ static void sdhci_omap_start_clock(struct sdhci_omap_host *omap_host);
+@@ -682,7 +686,8 @@ static void sdhci_omap_set_power(struct sdhci_host *host, unsigned char mode,
+ {
+ struct mmc_host *mmc = host->mmc;
+
+- mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
++ if (!IS_ERR(mmc->supply.vmmc))
++ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+ }
+
+ static int sdhci_omap_enable_dma(struct sdhci_host *host)
+@@ -1244,14 +1249,23 @@ static void sdhci_omap_context_save(struct sdhci_omap_host *omap_host)
+ {
+ omap_host->con = sdhci_omap_readl(omap_host, SDHCI_OMAP_CON);
+ omap_host->hctl = sdhci_omap_readl(omap_host, SDHCI_OMAP_HCTL);
++ omap_host->sysctl = sdhci_omap_readl(omap_host, SDHCI_OMAP_SYSCTL);
+ omap_host->capa = sdhci_omap_readl(omap_host, SDHCI_OMAP_CAPA);
++ omap_host->ie = sdhci_omap_readl(omap_host, SDHCI_OMAP_IE);
++ omap_host->ise = sdhci_omap_readl(omap_host, SDHCI_OMAP_ISE);
+ }
+
++/* Order matters here, HCTL must be restored in two phases */
+ static void sdhci_omap_context_restore(struct sdhci_omap_host *omap_host)
+ {
+- sdhci_omap_writel(omap_host, SDHCI_OMAP_CON, omap_host->con);
+ sdhci_omap_writel(omap_host, SDHCI_OMAP_HCTL, omap_host->hctl);
+ sdhci_omap_writel(omap_host, SDHCI_OMAP_CAPA, omap_host->capa);
++ sdhci_omap_writel(omap_host, SDHCI_OMAP_HCTL, omap_host->hctl);
++
++ sdhci_omap_writel(omap_host, SDHCI_OMAP_SYSCTL, omap_host->sysctl);
++ sdhci_omap_writel(omap_host, SDHCI_OMAP_CON, omap_host->con);
++ sdhci_omap_writel(omap_host, SDHCI_OMAP_IE, omap_host->ie);
++ sdhci_omap_writel(omap_host, SDHCI_OMAP_ISE, omap_host->ise);
+ }
+
+ static int __maybe_unused sdhci_omap_suspend(struct device *dev)
+diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
+index d0f2edfe296c8..8736e04fa73cc 100644
+--- a/drivers/mmc/host/sdhci-pci-core.c
++++ b/drivers/mmc/host/sdhci-pci-core.c
+@@ -978,6 +978,12 @@ static bool glk_broken_cqhci(struct sdhci_pci_slot *slot)
+ dmi_match(DMI_SYS_VENDOR, "IRBIS"));
+ }
+
++static bool jsl_broken_hs400es(struct sdhci_pci_slot *slot)
++{
++ return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_JSL_EMMC &&
++ dmi_match(DMI_BIOS_VENDOR, "ASUSTeK COMPUTER INC.");
++}
++
+ static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot)
+ {
+ int ret = byt_emmc_probe_slot(slot);
+@@ -986,9 +992,11 @@ static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot)
+ slot->host->mmc->caps2 |= MMC_CAP2_CQE;
+
+ if (slot->chip->pdev->device != PCI_DEVICE_ID_INTEL_GLK_EMMC) {
+- slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES;
+- slot->host->mmc_host_ops.hs400_enhanced_strobe =
+- intel_hs400_enhanced_strobe;
++ if (!jsl_broken_hs400es(slot)) {
++ slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES;
++ slot->host->mmc_host_ops.hs400_enhanced_strobe =
++ intel_hs400_enhanced_strobe;
++ }
+ slot->host->mmc->caps2 |= MMC_CAP2_CQE_DCMD;
+ }
+
+@@ -1810,6 +1818,8 @@ static int amd_probe(struct sdhci_pci_chip *chip)
+ }
+ }
+
++ pci_dev_put(smbus_dev);
++
+ if (gen == AMD_CHIPSET_BEFORE_ML || gen == AMD_CHIPSET_CZ)
+ chip->quirks2 |= SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD;
+
+@@ -1951,6 +1961,7 @@ static const struct pci_device_id pci_ids[] = {
+ SDHCI_PCI_DEVICE(INTEL, JSL_SD, intel_byt_sd),
+ SDHCI_PCI_DEVICE(INTEL, LKF_EMMC, intel_glk_emmc),
+ SDHCI_PCI_DEVICE(INTEL, LKF_SD, intel_byt_sd),
++ SDHCI_PCI_DEVICE(INTEL, ADL_EMMC, intel_glk_emmc),
+ SDHCI_PCI_DEVICE(O2, 8120, o2),
+ SDHCI_PCI_DEVICE(O2, 8220, o2),
+ SDHCI_PCI_DEVICE(O2, 8221, o2),
+diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
+index 4fd99c1e82ba3..ad50f16658fe2 100644
+--- a/drivers/mmc/host/sdhci-pci-gli.c
++++ b/drivers/mmc/host/sdhci-pci-gli.c
+@@ -12,6 +12,7 @@
+ #include <linux/pci.h>
+ #include <linux/mmc/mmc.h>
+ #include <linux/delay.h>
++#include <linux/of.h>
+ #include "sdhci.h"
+ #include "sdhci-pci.h"
+ #include "cqhci.h"
+@@ -116,6 +117,8 @@
+ #define PCI_GLI_9755_PECONF 0x44
+ #define PCI_GLI_9755_LFCLK GENMASK(14, 12)
+ #define PCI_GLI_9755_DMACLK BIT(29)
++#define PCI_GLI_9755_INVERT_CD BIT(30)
++#define PCI_GLI_9755_INVERT_WP BIT(31)
+
+ #define PCI_GLI_9755_CFG2 0x48
+ #define PCI_GLI_9755_CFG2_L1DLY GENMASK(28, 24)
+@@ -570,6 +573,14 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot)
+ gl9755_wt_on(pdev);
+
+ pci_read_config_dword(pdev, PCI_GLI_9755_PECONF, &value);
++ /*
++ * Apple ARM64 platforms using these chips may have
++ * inverted CD/WP detection.
++ */
++ if (of_property_read_bool(pdev->dev.of_node, "cd-inverted"))
++ value |= PCI_GLI_9755_INVERT_CD;
++ if (of_property_read_bool(pdev->dev.of_node, "wp-inverted"))
++ value |= PCI_GLI_9755_INVERT_WP;
+ value &= ~PCI_GLI_9755_LFCLK;
+ value &= ~PCI_GLI_9755_DMACLK;
+ pci_write_config_dword(pdev, PCI_GLI_9755_PECONF, value);
+diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
+index 51d55a87aebef..78d0b9fcc42cb 100644
+--- a/drivers/mmc/host/sdhci-pci-o2micro.c
++++ b/drivers/mmc/host/sdhci-pci-o2micro.c
+@@ -31,6 +31,7 @@
+ #define O2_SD_CAPS 0xE0
+ #define O2_SD_ADMA1 0xE2
+ #define O2_SD_ADMA2 0xE7
++#define O2_SD_MISC_CTRL2 0xF0
+ #define O2_SD_INF_MOD 0xF1
+ #define O2_SD_MISC_CTRL4 0xFC
+ #define O2_SD_MISC_CTRL 0x1C0
+@@ -147,6 +148,8 @@ static int sdhci_o2_get_cd(struct mmc_host *mmc)
+
+ if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS))
+ sdhci_o2_enable_internal_clock(host);
++ else
++ sdhci_o2_wait_card_detect_stable(host);
+
+ return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
+ }
+@@ -828,6 +831,12 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
+ /* Set Tuning Windows to 5 */
+ pci_write_config_byte(chip->pdev,
+ O2_SD_TUNING_CTRL, 0x55);
++ //Adjust 1st and 2nd CD debounce time
++ pci_read_config_dword(chip->pdev, O2_SD_MISC_CTRL2, &scratch_32);
++ scratch_32 &= 0xFFE7FFFF;
++ scratch_32 |= 0x00180000;
++ pci_write_config_dword(chip->pdev, O2_SD_MISC_CTRL2, scratch_32);
++ pci_write_config_dword(chip->pdev, O2_SD_DETECT_SETTING, 1);
+ /* Lock WP */
+ ret = pci_read_config_byte(chip->pdev,
+ O2_SD_LOCK_WP, &scratch);
+diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
+index 8f90c4163bb5c..dcd99d5057ee1 100644
+--- a/drivers/mmc/host/sdhci-pci.h
++++ b/drivers/mmc/host/sdhci-pci.h
+@@ -59,6 +59,7 @@
+ #define PCI_DEVICE_ID_INTEL_JSL_SD 0x4df8
+ #define PCI_DEVICE_ID_INTEL_LKF_EMMC 0x98c4
+ #define PCI_DEVICE_ID_INTEL_LKF_SD 0x98f8
++#define PCI_DEVICE_ID_INTEL_ADL_EMMC 0x54c4
+
+ #define PCI_DEVICE_ID_SYSKONNECT_8000 0x8000
+ #define PCI_DEVICE_ID_VIA_95D0 0x95d0
+diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
+index 11e375579cfb9..256260339f692 100644
+--- a/drivers/mmc/host/sdhci-sprd.c
++++ b/drivers/mmc/host/sdhci-sprd.c
+@@ -224,13 +224,15 @@ static inline void _sdhci_sprd_set_clock(struct sdhci_host *host,
+ div = ((div & 0x300) >> 2) | ((div & 0xFF) << 8);
+ sdhci_enable_clk(host, div);
+
+- /* enable auto gate sdhc_enable_auto_gate */
+- val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI);
+- mask = SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN |
+- SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN;
+- if (mask != (val & mask)) {
+- val |= mask;
+- sdhci_writel(host, val, SDHCI_SPRD_REG_32_BUSY_POSI);
++ /* Enable CLK_AUTO when the clock is greater than 400K. */
++ if (clk > 400000) {
++ val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI);
++ mask = SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN |
++ SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN;
++ if (mask != (val & mask)) {
++ val |= mask;
++ sdhci_writel(host, val, SDHCI_SPRD_REG_32_BUSY_POSI);
++ }
+ }
+ }
+
+@@ -296,7 +298,7 @@ static unsigned int sdhci_sprd_get_max_clock(struct sdhci_host *host)
+
+ static unsigned int sdhci_sprd_get_min_clock(struct sdhci_host *host)
+ {
+- return 400000;
++ return 100000;
+ }
+
+ static void sdhci_sprd_set_uhs_signaling(struct sdhci_host *host,
+@@ -457,7 +459,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
+ }
+
+ if (IS_ERR(sprd_host->pinctrl))
+- return 0;
++ goto reset;
+
+ switch (ios->signal_voltage) {
+ case MMC_SIGNAL_VOLTAGE_180:
+@@ -485,6 +487,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
+
+ /* Wait for 300 ~ 500 us for pin state stable */
+ usleep_range(300, 500);
++
++reset:
+ sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
+
+ return 0;
+diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
+index a5001875876b9..fff9fb8d6bacb 100644
+--- a/drivers/mmc/host/sdhci-tegra.c
++++ b/drivers/mmc/host/sdhci-tegra.c
+@@ -24,6 +24,7 @@
+ #include <linux/gpio/consumer.h>
+ #include <linux/ktime.h>
+
++#include "sdhci-cqhci.h"
+ #include "sdhci-pltfm.h"
+ #include "cqhci.h"
+
+@@ -356,23 +357,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap)
+ }
+ }
+
+-static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc,
+- struct mmc_ios *ios)
+-{
+- struct sdhci_host *host = mmc_priv(mmc);
+- u32 val;
+-
+- val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL);
+-
+- if (ios->enhanced_strobe)
+- val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE;
+- else
+- val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE;
+-
+- sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL);
+-
+-}
+-
+ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
+ {
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+@@ -380,7 +364,7 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
+ const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
+ u32 misc_ctrl, clk_ctrl, pad_ctrl;
+
+- sdhci_reset(host, mask);
++ sdhci_and_cqhci_reset(host, mask);
+
+ if (!(mask & SDHCI_RESET_ALL))
+ return;
+@@ -779,7 +763,7 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+ */
+ host_clk = tegra_host->ddr_signaling ? clock * 2 : clock;
+ clk_set_rate(pltfm_host->clk, host_clk);
+- tegra_host->curr_clk_rate = host_clk;
++ tegra_host->curr_clk_rate = clk_get_rate(pltfm_host->clk);
+ if (tegra_host->ddr_signaling)
+ host->max_clk = host_clk;
+ else
+@@ -793,6 +777,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+ }
+ }
+
++static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc,
++ struct mmc_ios *ios)
++{
++ struct sdhci_host *host = mmc_priv(mmc);
++ u32 val;
++
++ val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL);
++
++ if (ios->enhanced_strobe) {
++ val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE;
++ /*
++ * When CMD13 is sent from mmc_select_hs400es() after
++ * switching to HS400ES mode, the bus is operating at
++ * either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR.
++ * To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI
++ * interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host
++ * controller CAR clock and the interface clock are rate matched.
++ */
++ tegra_sdhci_set_clock(host, MMC_HS200_MAX_DTR);
++ } else {
++ val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE;
++ }
++
++ sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL);
++}
++
+ static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host)
+ {
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
+index 666cee4c7f7c6..08e838400b526 100644
+--- a/drivers/mmc/host/sdhci-xenon.c
++++ b/drivers/mmc/host/sdhci-xenon.c
+@@ -241,16 +241,6 @@ static void xenon_voltage_switch(struct sdhci_host *host)
+ {
+ /* Wait for 5ms after set 1.8V signal enable bit */
+ usleep_range(5000, 5500);
+-
+- /*
+- * For some reason the controller's Host Control2 register reports
+- * the bit representing 1.8V signaling as 0 when read after it was
+- * written as 1. Subsequent read reports 1.
+- *
+- * Since this may cause some issues, do an empty read of the Host
+- * Control2 register here to circumvent this.
+- */
+- sdhci_readw(host, SDHCI_HOST_CONTROL2);
+ }
+
+ static unsigned int xenon_get_max_clock(struct sdhci_host *host)
+diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
+index 2d80a04e11d87..d897c981b079f 100644
+--- a/drivers/mmc/host/sdhci.c
++++ b/drivers/mmc/host/sdhci.c
+@@ -338,6 +338,7 @@ static void sdhci_init(struct sdhci_host *host, int soft)
+ if (soft) {
+ /* force clock reconfiguration */
+ host->clock = 0;
++ host->reinit_uhs = true;
+ mmc->ops->set_ios(mmc, &mmc->ios);
+ }
+ }
+@@ -771,7 +772,19 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
+ len -= offset;
+ }
+
+- BUG_ON(len > 65536);
++ /*
++ * The block layer forces a minimum segment size of PAGE_SIZE,
++ * so 'len' can be too big here if PAGE_SIZE >= 64KiB. Write
++ * multiple descriptors, noting that the ADMA table is sized
++ * for 4KiB chunks anyway, so it will be big enough.
++ */
++ while (len > host->max_adma) {
++ int n = 32 * 1024; /* 32KiB*/
++
++ __sdhci_adma_write_desc(host, &desc, addr, n, ADMA2_TRAN_VALID);
++ addr += n;
++ len -= n;
++ }
+
+ /* tran, valid */
+ if (len)
+@@ -1131,6 +1144,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
+ }
+ }
+
++ sdhci_config_dma(host);
++
+ if (host->flags & SDHCI_REQ_USE_DMA) {
+ int sg_cnt = sdhci_pre_dma_transfer(host, data, COOKIE_MAPPED);
+
+@@ -1150,8 +1165,6 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
+ }
+ }
+
+- sdhci_config_dma(host);
+-
+ if (!(host->flags & SDHCI_REQ_USE_DMA)) {
+ int flags;
+
+@@ -2245,11 +2258,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
+ }
+ EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
+
++static bool sdhci_timing_has_preset(unsigned char timing)
++{
++ switch (timing) {
++ case MMC_TIMING_UHS_SDR12:
++ case MMC_TIMING_UHS_SDR25:
++ case MMC_TIMING_UHS_SDR50:
++ case MMC_TIMING_UHS_SDR104:
++ case MMC_TIMING_UHS_DDR50:
++ case MMC_TIMING_MMC_DDR52:
++ return true;
++ };
++ return false;
++}
++
++static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing)
++{
++ return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
++ sdhci_timing_has_preset(timing);
++}
++
++static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios)
++{
++ /*
++ * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK
++ * Frequency. Check if preset values need to be enabled, or the Driver
++ * Strength needs updating. Note, clock changes are handled separately.
++ */
++ return !host->preset_enabled &&
++ (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type);
++}
++
+ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+ {
+ struct sdhci_host *host = mmc_priv(mmc);
++ bool reinit_uhs = host->reinit_uhs;
++ bool turning_on_clk = false;
+ u8 ctrl;
+
++ host->reinit_uhs = false;
++
+ if (ios->power_mode == MMC_POWER_UNDEFINED)
+ return;
+
+@@ -2275,6 +2323,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+ sdhci_enable_preset_value(host, false);
+
+ if (!ios->clock || ios->clock != host->clock) {
++ turning_on_clk = ios->clock && !host->clock;
++
+ host->ops->set_clock(host, ios->clock);
+ host->clock = ios->clock;
+
+@@ -2301,6 +2351,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+
+ host->ops->set_bus_width(host, ios->bus_width);
+
++ /*
++ * Special case to avoid multiple clock changes during voltage
++ * switching.
++ */
++ if (!reinit_uhs &&
++ turning_on_clk &&
++ host->timing == ios->timing &&
++ host->version >= SDHCI_SPEC_300 &&
++ !sdhci_presetable_values_change(host, ios))
++ return;
++
+ ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+
+ if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) {
+@@ -2344,6 +2405,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+ }
+
+ sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
++ host->drv_type = ios->drv_type;
+ } else {
+ /*
+ * According to SDHC Spec v3.00, if the Preset Value
+@@ -2371,19 +2433,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+ host->ops->set_uhs_signaling(host, ios->timing);
+ host->timing = ios->timing;
+
+- if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
+- ((ios->timing == MMC_TIMING_UHS_SDR12) ||
+- (ios->timing == MMC_TIMING_UHS_SDR25) ||
+- (ios->timing == MMC_TIMING_UHS_SDR50) ||
+- (ios->timing == MMC_TIMING_UHS_SDR104) ||
+- (ios->timing == MMC_TIMING_UHS_DDR50) ||
+- (ios->timing == MMC_TIMING_MMC_DDR52))) {
++ if (sdhci_preset_needed(host, ios->timing)) {
+ u16 preset;
+
+ sdhci_enable_preset_value(host, true);
+ preset = sdhci_get_preset_value(host);
+ ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK,
+ preset);
++ host->drv_type = ios->drv_type;
+ }
+
+ /* Re-enable SD Clock */
+@@ -3699,6 +3756,7 @@ int sdhci_resume_host(struct sdhci_host *host)
+ sdhci_init(host, 0);
+ host->pwr = 0;
+ host->clock = 0;
++ host->reinit_uhs = true;
+ mmc->ops->set_ios(mmc, &mmc->ios);
+ } else {
+ sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER));
+@@ -3761,6 +3819,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset)
+ /* Force clock and power re-program */
+ host->pwr = 0;
+ host->clock = 0;
++ host->reinit_uhs = true;
+ mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios);
+ mmc->ops->set_ios(mmc, &mmc->ios);
+
+@@ -3952,6 +4011,7 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev,
+ * descriptor for each segment, plus 1 for a nop end descriptor.
+ */
+ host->adma_table_cnt = SDHCI_MAX_SEGS * 2 + 1;
++ host->max_adma = 65536;
+
+ host->max_timeout_count = 0xE;
+
+@@ -4617,10 +4677,12 @@ int sdhci_setup_host(struct sdhci_host *host)
+ * be larger than 64 KiB though.
+ */
+ if (host->flags & SDHCI_USE_ADMA) {
+- if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC)
++ if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) {
++ host->max_adma = 65532; /* 32-bit alignment */
+ mmc->max_seg_size = 65535;
+- else
++ } else {
+ mmc->max_seg_size = 65536;
++ }
+ } else {
+ mmc->max_seg_size = mmc->max_req_size;
+ }
+diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
+index e8d04e42a5afd..6a5cc05576cd5 100644
+--- a/drivers/mmc/host/sdhci.h
++++ b/drivers/mmc/host/sdhci.h
+@@ -340,7 +340,8 @@ struct sdhci_adma2_64_desc {
+
+ /*
+ * Maximum segments assuming a 512KiB maximum requisition size and a minimum
+- * 4KiB page size.
++ * 4KiB page size. Note this also allows enough for multiple descriptors in
++ * case of PAGE_SIZE >= 64KiB.
+ */
+ #define SDHCI_MAX_SEGS 128
+
+@@ -522,6 +523,8 @@ struct sdhci_host {
+
+ unsigned int clock; /* Current clock (MHz) */
+ u8 pwr; /* Current voltage */
++ u8 drv_type; /* Current UHS-I driver type */
++ bool reinit_uhs; /* Force UHS-related re-initialization */
+
+ bool runtime_suspended; /* Host is runtime suspended */
+ bool bus_on; /* Bus power prevents runtime suspend */
+@@ -543,6 +546,7 @@ struct sdhci_host {
+ unsigned int blocks; /* remaining PIO blocks */
+
+ int sg_count; /* Mapped sg entries */
++ int max_adma; /* Max. length in ADMA descriptor */
+
+ void *adma_table; /* ADMA descriptor table */
+ void *align_buffer; /* Bounce buffer */
+diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
+index f654afbe8e83c..0158b2b1507d4 100644
+--- a/drivers/mmc/host/sdhci_am654.c
++++ b/drivers/mmc/host/sdhci_am654.c
+@@ -15,6 +15,7 @@
+ #include <linux/sys_soc.h>
+
+ #include "cqhci.h"
++#include "sdhci-cqhci.h"
+ #include "sdhci-pltfm.h"
+
+ /* CTL_CFG Registers */
+@@ -147,6 +148,9 @@ struct sdhci_am654_data {
+ int drv_strength;
+ int strb_sel;
+ u32 flags;
++ u32 quirks;
++
++#define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0)
+ };
+
+ struct sdhci_am654_driver_data {
+@@ -347,8 +351,6 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg)
+ */
+ case MMC_TIMING_SD_HS:
+ case MMC_TIMING_MMC_HS:
+- case MMC_TIMING_UHS_SDR12:
+- case MMC_TIMING_UHS_SDR25:
+ val &= ~SDHCI_CTRL_HISPD;
+ }
+ }
+@@ -365,7 +367,22 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg)
+ MAX_POWER_ON_TIMEOUT, false, host, val,
+ reg);
+ if (ret)
+- dev_warn(mmc_dev(host->mmc), "Power on failed\n");
++ dev_info(mmc_dev(host->mmc), "Power on failed\n");
++ }
++}
++
++static void sdhci_am654_reset(struct sdhci_host *host, u8 mask)
++{
++ u8 ctrl;
++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
++ struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
++
++ sdhci_and_cqhci_reset(host, mask);
++
++ if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_FORCE_CDTEST) {
++ ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
++ ctrl |= SDHCI_CTRL_CDTEST_INS | SDHCI_CTRL_CDTEST_EN;
++ sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+ }
+ }
+
+@@ -446,7 +463,7 @@ static struct sdhci_ops sdhci_am654_ops = {
+ .set_clock = sdhci_am654_set_clock,
+ .write_b = sdhci_am654_write_b,
+ .irq = sdhci_am654_cqhci_irq,
+- .reset = sdhci_reset,
++ .reset = sdhci_and_cqhci_reset,
+ };
+
+ static const struct sdhci_pltfm_data sdhci_am654_pdata = {
+@@ -476,7 +493,7 @@ static struct sdhci_ops sdhci_j721e_8bit_ops = {
+ .set_clock = sdhci_am654_set_clock,
+ .write_b = sdhci_am654_write_b,
+ .irq = sdhci_am654_cqhci_irq,
+- .reset = sdhci_reset,
++ .reset = sdhci_and_cqhci_reset,
+ };
+
+ static const struct sdhci_pltfm_data sdhci_j721e_8bit_pdata = {
+@@ -500,7 +517,7 @@ static struct sdhci_ops sdhci_j721e_4bit_ops = {
+ .set_clock = sdhci_j721e_4bit_set_clock,
+ .write_b = sdhci_am654_write_b,
+ .irq = sdhci_am654_cqhci_irq,
+- .reset = sdhci_reset,
++ .reset = sdhci_am654_reset,
+ };
+
+ static const struct sdhci_pltfm_data sdhci_j721e_4bit_pdata = {
+@@ -514,26 +531,6 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = {
+ .flags = IOMUX_PRESENT,
+ };
+
+-static const struct sdhci_pltfm_data sdhci_am64_8bit_pdata = {
+- .ops = &sdhci_j721e_8bit_ops,
+- .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+-};
+-
+-static const struct sdhci_am654_driver_data sdhci_am64_8bit_drvdata = {
+- .pdata = &sdhci_am64_8bit_pdata,
+- .flags = DLL_PRESENT | DLL_CALIB,
+-};
+-
+-static const struct sdhci_pltfm_data sdhci_am64_4bit_pdata = {
+- .ops = &sdhci_j721e_4bit_ops,
+- .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+-};
+-
+-static const struct sdhci_am654_driver_data sdhci_am64_4bit_drvdata = {
+- .pdata = &sdhci_am64_4bit_pdata,
+- .flags = IOMUX_PRESENT,
+-};
+-
+ static const struct soc_device_attribute sdhci_am654_devices[] = {
+ { .family = "AM65X",
+ .revision = "SR1.0",
+@@ -739,6 +736,9 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev,
+ device_property_read_u32(dev, "ti,clkbuf-sel",
+ &sdhci_am654->clkbuf_sel);
+
++ if (device_property_read_bool(dev, "ti,fails-without-test-cd"))
++ sdhci_am654->quirks |= SDHCI_AM654_QUIRK_FORCE_CDTEST;
++
+ sdhci_get_of_property(pdev);
+
+ return 0;
+@@ -759,11 +759,11 @@ static const struct of_device_id sdhci_am654_of_match[] = {
+ },
+ {
+ .compatible = "ti,am64-sdhci-8bit",
+- .data = &sdhci_am64_8bit_drvdata,
++ .data = &sdhci_j721e_8bit_drvdata,
+ },
+ {
+ .compatible = "ti,am64-sdhci-4bit",
+- .data = &sdhci_am64_4bit_drvdata,
++ .data = &sdhci_j721e_4bit_drvdata,
+ },
+ { /* sentinel */ }
+ };
+diff --git a/drivers/mmc/host/sdhci_f_sdh30.c b/drivers/mmc/host/sdhci_f_sdh30.c
+index 3f5977979cf25..7ede74bf37230 100644
+--- a/drivers/mmc/host/sdhci_f_sdh30.c
++++ b/drivers/mmc/host/sdhci_f_sdh30.c
+@@ -26,9 +26,16 @@ struct f_sdhost_priv {
+ bool enable_cmd_dat_delay;
+ };
+
++static void *sdhci_f_sdhost_priv(struct sdhci_host *host)
++{
++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
++
++ return sdhci_pltfm_priv(pltfm_host);
++}
++
+ static void sdhci_f_sdh30_soft_voltage_switch(struct sdhci_host *host)
+ {
+- struct f_sdhost_priv *priv = sdhci_priv(host);
++ struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
+ u32 ctrl = 0;
+
+ usleep_range(2500, 3000);
+@@ -61,7 +68,7 @@ static unsigned int sdhci_f_sdh30_get_min_clock(struct sdhci_host *host)
+
+ static void sdhci_f_sdh30_reset(struct sdhci_host *host, u8 mask)
+ {
+- struct f_sdhost_priv *priv = sdhci_priv(host);
++ struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
+ u32 ctl;
+
+ if (sdhci_readw(host, SDHCI_CLOCK_CONTROL) == 0)
+@@ -85,30 +92,32 @@ static const struct sdhci_ops sdhci_f_sdh30_ops = {
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
+ };
+
++static const struct sdhci_pltfm_data sdhci_f_sdh30_pltfm_data = {
++ .ops = &sdhci_f_sdh30_ops,
++ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC
++ | SDHCI_QUIRK_INVERTED_WRITE_PROTECT,
++ .quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE
++ | SDHCI_QUIRK2_TUNING_WORK_AROUND,
++};
++
+ static int sdhci_f_sdh30_probe(struct platform_device *pdev)
+ {
+ struct sdhci_host *host;
+ struct device *dev = &pdev->dev;
+- int irq, ctrl = 0, ret = 0;
++ int ctrl = 0, ret = 0;
+ struct f_sdhost_priv *priv;
++ struct sdhci_pltfm_host *pltfm_host;
+ u32 reg = 0;
+
+- irq = platform_get_irq(pdev, 0);
+- if (irq < 0)
+- return irq;
+-
+- host = sdhci_alloc_host(dev, sizeof(struct f_sdhost_priv));
++ host = sdhci_pltfm_init(pdev, &sdhci_f_sdh30_pltfm_data,
++ sizeof(struct f_sdhost_priv));
+ if (IS_ERR(host))
+ return PTR_ERR(host);
+
+- priv = sdhci_priv(host);
++ pltfm_host = sdhci_priv(host);
++ priv = sdhci_pltfm_priv(pltfm_host);
+ priv->dev = dev;
+
+- host->quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+- SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
+- host->quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE |
+- SDHCI_QUIRK2_TUNING_WORK_AROUND;
+-
+ priv->enable_cmd_dat_delay = device_property_read_bool(dev,
+ "fujitsu,cmd-dat-delay-select");
+
+@@ -116,18 +125,6 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev)
+ if (ret)
+ goto err;
+
+- platform_set_drvdata(pdev, host);
+-
+- host->hw_name = "f_sdh30";
+- host->ops = &sdhci_f_sdh30_ops;
+- host->irq = irq;
+-
+- host->ioaddr = devm_platform_ioremap_resource(pdev, 0);
+- if (IS_ERR(host->ioaddr)) {
+- ret = PTR_ERR(host->ioaddr);
+- goto err;
+- }
+-
+ if (dev_of_node(dev)) {
+ sdhci_get_of_property(pdev);
+
+@@ -168,6 +165,9 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev)
+ if (reg & SDHCI_CAN_DO_8BIT)
+ priv->vendor_hs200 = F_SDH30_EMMC_HS200;
+
++ if (!(reg & SDHCI_TIMEOUT_CLK_MASK))
++ host->quirks |= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK;
++
+ ret = sdhci_add_host(host);
+ if (ret)
+ goto err_add_host;
+@@ -179,23 +179,22 @@ err_add_host:
+ err_clk:
+ clk_disable_unprepare(priv->clk_iface);
+ err:
+- sdhci_free_host(host);
++ sdhci_pltfm_free(pdev);
++
+ return ret;
+ }
+
+ static int sdhci_f_sdh30_remove(struct platform_device *pdev)
+ {
+ struct sdhci_host *host = platform_get_drvdata(pdev);
+- struct f_sdhost_priv *priv = sdhci_priv(host);
+-
+- sdhci_remove_host(host, readl(host->ioaddr + SDHCI_INT_STATUS) ==
+- 0xffffffff);
++ struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
++ struct clk *clk_iface = priv->clk_iface;
++ struct clk *clk = priv->clk;
+
+- clk_disable_unprepare(priv->clk_iface);
+- clk_disable_unprepare(priv->clk);
++ sdhci_pltfm_unregister(pdev);
+
+- sdhci_free_host(host);
+- platform_set_drvdata(pdev, NULL);
++ clk_disable_unprepare(clk_iface);
++ clk_disable_unprepare(clk);
+
+ return 0;
+ }
+diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
+index bcc595c70a9fb..e12fe29b275c2 100644
+--- a/drivers/mmc/host/sh_mmcif.c
++++ b/drivers/mmc/host/sh_mmcif.c
+@@ -1398,7 +1398,7 @@ static int sh_mmcif_probe(struct platform_device *pdev)
+ irq[0] = platform_get_irq(pdev, 0);
+ irq[1] = platform_get_irq_optional(pdev, 1);
+ if (irq[0] < 0)
+- return -ENXIO;
++ return irq[0];
+
+ reg = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(reg))
+diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
+index 2702736a1c57d..3c213816db786 100644
+--- a/drivers/mmc/host/sunxi-mmc.c
++++ b/drivers/mmc/host/sunxi-mmc.c
+@@ -377,8 +377,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host,
+ pdes[i].buf_addr_ptr1 =
+ cpu_to_le32(sg_dma_address(&data->sg[i]) >>
+ host->cfg->idma_des_shift);
+- pdes[i].buf_addr_ptr2 = cpu_to_le32((u32)next_desc >>
+- host->cfg->idma_des_shift);
++ pdes[i].buf_addr_ptr2 =
++ cpu_to_le32(next_desc >>
++ host->cfg->idma_des_shift);
+ }
+
+ pdes[0].config |= cpu_to_le32(SDXC_IDMAC_DES0_FD);
+@@ -1340,8 +1341,8 @@ static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host,
+ return ret;
+
+ host->irq = platform_get_irq(pdev, 0);
+- if (host->irq <= 0) {
+- ret = -EINVAL;
++ if (host->irq < 0) {
++ ret = host->irq;
+ goto error_disable_mmc;
+ }
+
+@@ -1482,9 +1483,11 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
+ struct sunxi_mmc_host *host = mmc_priv(mmc);
+
+ mmc_remove_host(mmc);
+- pm_runtime_force_suspend(&pdev->dev);
+- disable_irq(host->irq);
+- sunxi_mmc_disable(host);
++ pm_runtime_disable(&pdev->dev);
++ if (!pm_runtime_status_suspended(&pdev->dev)) {
++ disable_irq(host->irq);
++ sunxi_mmc_disable(host);
++ }
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
+ mmc_free_host(mmc);
+
+diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
+index b55a29c53d9c3..53a2ad9a24b87 100644
+--- a/drivers/mmc/host/tmio_mmc.c
++++ b/drivers/mmc/host/tmio_mmc.c
+@@ -75,7 +75,7 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
+ tmio_mmc_clk_start(host);
+ }
+
+-static void tmio_mmc_reset(struct tmio_mmc_host *host)
++static void tmio_mmc_reset(struct tmio_mmc_host *host, bool preserve)
+ {
+ sd_ctrl_write16(host, CTL_RESET_SDIO, 0x0000);
+ usleep_range(10000, 11000);
+diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
+index f936aad945ce3..da63193dd45b0 100644
+--- a/drivers/mmc/host/tmio_mmc.h
++++ b/drivers/mmc/host/tmio_mmc.h
+@@ -42,6 +42,7 @@
+ #define CTL_DMA_ENABLE 0xd8
+ #define CTL_RESET_SD 0xe0
+ #define CTL_VERSION 0xe2
++#define CTL_SDIF_MODE 0xe6 /* only known on R-Car 2+ */
+
+ /* Definitions for values the CTL_STOP_INTERNAL_ACTION register can take */
+ #define TMIO_STOP_STP BIT(0)
+@@ -98,6 +99,9 @@
+ /* Definitions for values the CTL_DMA_ENABLE register can take */
+ #define DMA_ENABLE_DMASDRW BIT(1)
+
++/* Definitions for values the CTL_SDIF_MODE register can take */
++#define SDIF_MODE_HS400 BIT(0) /* only known on R-Car 2+ */
++
+ /* Define some IRQ masks */
+ /* This is the mask used at reset by the chip */
+ #define TMIO_MASK_ALL 0x837f031d
+@@ -181,7 +185,7 @@ struct tmio_mmc_host {
+ int (*multi_io_quirk)(struct mmc_card *card,
+ unsigned int direction, int blk_size);
+ int (*write16_hook)(struct tmio_mmc_host *host, int addr);
+- void (*reset)(struct tmio_mmc_host *host);
++ void (*reset)(struct tmio_mmc_host *host, bool preserve);
+ bool (*check_retune)(struct tmio_mmc_host *host, struct mmc_request *mrq);
+ void (*fixup_request)(struct tmio_mmc_host *host, struct mmc_request *mrq);
+ unsigned int (*get_timeout_cycles)(struct tmio_mmc_host *host);
+diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
+index e2affa52ef469..437048bb80273 100644
+--- a/drivers/mmc/host/tmio_mmc_core.c
++++ b/drivers/mmc/host/tmio_mmc_core.c
+@@ -179,8 +179,17 @@ static void tmio_mmc_set_bus_width(struct tmio_mmc_host *host,
+ sd_ctrl_write16(host, CTL_SD_MEM_CARD_OPT, reg);
+ }
+
+-static void tmio_mmc_reset(struct tmio_mmc_host *host)
++static void tmio_mmc_reset(struct tmio_mmc_host *host, bool preserve)
+ {
++ u16 card_opt, clk_ctrl, sdif_mode;
++
++ if (preserve) {
++ card_opt = sd_ctrl_read16(host, CTL_SD_MEM_CARD_OPT);
++ clk_ctrl = sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL);
++ if (host->pdata->flags & TMIO_MMC_MIN_RCAR2)
++ sdif_mode = sd_ctrl_read16(host, CTL_SDIF_MODE);
++ }
++
+ /* FIXME - should we set stop clock reg here */
+ sd_ctrl_write16(host, CTL_RESET_SD, 0x0000);
+ usleep_range(10000, 11000);
+@@ -190,7 +199,7 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host)
+ tmio_mmc_abort_dma(host);
+
+ if (host->reset)
+- host->reset(host);
++ host->reset(host, preserve);
+
+ sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask_all);
+ host->sdcard_irq_mask = host->sdcard_irq_mask_all;
+@@ -206,6 +215,13 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host)
+ sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0001);
+ }
+
++ if (preserve) {
++ sd_ctrl_write16(host, CTL_SD_MEM_CARD_OPT, card_opt);
++ sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk_ctrl);
++ if (host->pdata->flags & TMIO_MMC_MIN_RCAR2)
++ sd_ctrl_write16(host, CTL_SDIF_MODE, sdif_mode);
++ }
++
+ if (host->mmc->card)
+ mmc_retune_needed(host->mmc);
+ }
+@@ -248,7 +264,7 @@ static void tmio_mmc_reset_work(struct work_struct *work)
+
+ spin_unlock_irqrestore(&host->lock, flags);
+
+- tmio_mmc_reset(host);
++ tmio_mmc_reset(host, true);
+
+ /* Ready for new calls */
+ host->mrq = NULL;
+@@ -960,14 +976,8 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+ case MMC_POWER_OFF:
+ tmio_mmc_power_off(host);
+ /* For R-Car Gen2+, we need to reset SDHI specific SCC */
+- if (host->pdata->flags & TMIO_MMC_MIN_RCAR2) {
+- host->reset(host);
+-
+- if (host->native_hotplug)
+- tmio_mmc_enable_mmc_irqs(host,
+- TMIO_STAT_CARD_REMOVE |
+- TMIO_STAT_CARD_INSERT);
+- }
++ if (host->pdata->flags & TMIO_MMC_MIN_RCAR2)
++ tmio_mmc_reset(host, false);
+
+ host->set_clock(host, 0);
+ break;
+@@ -1175,6 +1185,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
+ if (mmc_can_gpio_cd(mmc))
+ _host->ops.get_cd = mmc_gpio_get_cd;
+
++ /* must be set before tmio_mmc_reset() */
+ _host->native_hotplug = !(mmc_can_gpio_cd(mmc) ||
+ mmc->caps & MMC_CAP_NEEDS_POLL ||
+ !mmc_card_is_removable(mmc));
+@@ -1194,7 +1205,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host)
+ _host->sdcard_irq_mask_all = TMIO_MASK_ALL;
+
+ _host->set_clock(_host, 0);
+- tmio_mmc_reset(_host);
++ tmio_mmc_reset(_host, false);
+
+ spin_lock_init(&_host->lock);
+ mutex_init(&_host->ios_lock);
+@@ -1290,15 +1301,11 @@ int tmio_mmc_host_runtime_resume(struct device *dev)
+ struct tmio_mmc_host *host = dev_get_drvdata(dev);
+
+ tmio_mmc_clk_enable(host);
+- tmio_mmc_reset(host);
++ tmio_mmc_reset(host, false);
+
+ if (host->clk_cache)
+ host->set_clock(host, host->clk_cache);
+
+- if (host->native_hotplug)
+- tmio_mmc_enable_mmc_irqs(host,
+- TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT);
+-
+ tmio_mmc_enable_dma(host, true);
+
+ return 0;
+diff --git a/drivers/mmc/host/toshsd.c b/drivers/mmc/host/toshsd.c
+index 8d037c2071abc..497791ffada6d 100644
+--- a/drivers/mmc/host/toshsd.c
++++ b/drivers/mmc/host/toshsd.c
+@@ -651,7 +651,9 @@ static int toshsd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ if (ret)
+ goto unmap;
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto free_irq;
+
+ base = pci_resource_start(pdev, 0);
+ dev_dbg(&pdev->dev, "MMIO %pa, IRQ %d\n", &base, pdev->irq);
+@@ -660,6 +662,8 @@ static int toshsd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ return 0;
+
++free_irq:
++ free_irq(pdev->irq, host);
+ unmap:
+ pci_iounmap(pdev, host->ioaddr);
+ release:
+diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
+index 99515be6e5e57..2032e4e1ee68b 100644
+--- a/drivers/mmc/host/usdhi6rol0.c
++++ b/drivers/mmc/host/usdhi6rol0.c
+@@ -1757,8 +1757,10 @@ static int usdhi6_probe(struct platform_device *pdev)
+ irq_cd = platform_get_irq_byname(pdev, "card detect");
+ irq_sd = platform_get_irq_byname(pdev, "data");
+ irq_sdio = platform_get_irq_byname(pdev, "SDIO");
+- if (irq_sd < 0 || irq_sdio < 0)
+- return -ENODEV;
++ if (irq_sd < 0)
++ return irq_sd;
++ if (irq_sdio < 0)
++ return irq_sdio;
+
+ mmc = mmc_alloc_host(sizeof(struct usdhi6_host), dev);
+ if (!mmc)
+diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c
+index 88662a90ed960..a2b0d9461665b 100644
+--- a/drivers/mmc/host/via-sdmmc.c
++++ b/drivers/mmc/host/via-sdmmc.c
+@@ -1151,7 +1151,9 @@ static int via_sd_probe(struct pci_dev *pcidev,
+ pcidev->subsystem_device == 0x3891)
+ sdhost->quirks = VIA_CRDR_QUIRK_300MS_PWRDELAY;
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto unmap;
+
+ return 0;
+
+diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
+index 97beece62fec4..7dc0e91dabfc7 100644
+--- a/drivers/mmc/host/vub300.c
++++ b/drivers/mmc/host/vub300.c
+@@ -1715,6 +1715,9 @@ static void construct_request_response(struct vub300_mmc_host *vub300,
+ int bytes = 3 & less_cmd;
+ int words = less_cmd >> 2;
+ u8 *r = vub300->resp.response.command_response;
++
++ if (!resp_len)
++ return;
+ if (bytes == 3) {
+ cmd->resp[words] = (r[1 + (words << 2)] << 24)
+ | (r[2 + (words << 2)] << 16)
+@@ -2049,6 +2052,7 @@ static void vub300_enable_sdio_irq(struct mmc_host *mmc, int enable)
+ return;
+ kref_get(&vub300->kref);
+ if (enable) {
++ set_current_state(TASK_RUNNING);
+ mutex_lock(&vub300->irq_mutex);
+ if (vub300->irqs_queued) {
+ vub300->irqs_queued -= 1;
+@@ -2064,6 +2068,7 @@ static void vub300_enable_sdio_irq(struct mmc_host *mmc, int enable)
+ vub300_queue_poll_work(vub300, 0);
+ }
+ mutex_unlock(&vub300->irq_mutex);
++ set_current_state(TASK_INTERRUPTIBLE);
+ } else {
+ vub300->irq_enabled = 0;
+ }
+@@ -2299,14 +2304,14 @@ static int vub300_probe(struct usb_interface *interface,
+ 0x0000, 0x0000, &vub300->system_port_status,
+ sizeof(vub300->system_port_status), 1000);
+ if (retval < 0) {
+- goto error4;
++ goto error5;
+ } else if (sizeof(vub300->system_port_status) == retval) {
+ vub300->card_present =
+ (0x0001 & vub300->system_port_status.port_flags) ? 1 : 0;
+ vub300->read_only =
+ (0x0010 & vub300->system_port_status.port_flags) ? 1 : 0;
+ } else {
+- goto error4;
++ goto error5;
+ }
+ usb_set_intfdata(interface, vub300);
+ INIT_DELAYED_WORK(&vub300->pollwork, vub300_pollwork_thread);
+@@ -2329,8 +2334,13 @@ static int vub300_probe(struct usb_interface *interface,
+ "USB vub300 remote SDIO host controller[%d]"
+ "connected with no SD/SDIO card inserted\n",
+ interface_to_InterfaceNumber(interface));
+- mmc_add_host(mmc);
++ retval = mmc_add_host(mmc);
++ if (retval)
++ goto error6;
++
+ return 0;
++error6:
++ del_timer_sync(&vub300->inactivity_timer);
+ error5:
+ mmc_free_host(mmc);
+ /*
+diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
+index 67ecd342fe5f1..b5b1a42ca25e1 100644
+--- a/drivers/mmc/host/wbsd.c
++++ b/drivers/mmc/host/wbsd.c
+@@ -1698,7 +1698,15 @@ static int wbsd_init(struct device *dev, int base, int irq, int dma,
+ */
+ wbsd_init_device(host);
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret) {
++ if (!pnp)
++ wbsd_chip_poweroff(host);
++
++ wbsd_release_resources(host);
++ wbsd_free_mmc(dev);
++ return ret;
++ }
+
+ pr_info("%s: W83L51xD", mmc_hostname(mmc));
+ if (host->chip_id != 0)
+diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
+index cf10949fb0acc..3933195488575 100644
+--- a/drivers/mmc/host/wmt-sdmmc.c
++++ b/drivers/mmc/host/wmt-sdmmc.c
+@@ -849,7 +849,7 @@ static int wmt_mci_probe(struct platform_device *pdev)
+ if (IS_ERR(priv->clk_sdmmc)) {
+ dev_err(&pdev->dev, "Error getting clock\n");
+ ret = PTR_ERR(priv->clk_sdmmc);
+- goto fail5;
++ goto fail5_and_a_half;
+ }
+
+ ret = clk_prepare_enable(priv->clk_sdmmc);
+@@ -859,13 +859,20 @@ static int wmt_mci_probe(struct platform_device *pdev)
+ /* configure the controller to a known 'ready' state */
+ wmt_reset_hardware(mmc);
+
+- mmc_add_host(mmc);
++ ret = mmc_add_host(mmc);
++ if (ret)
++ goto fail7;
+
+ dev_info(&pdev->dev, "WMT SDHC Controller initialized\n");
+
+ return 0;
++fail7:
++ clk_disable_unprepare(priv->clk_sdmmc);
+ fail6:
+ clk_put(priv->clk_sdmmc);
++fail5_and_a_half:
++ dma_free_coherent(&pdev->dev, mmc->max_blk_count * 16,
++ priv->dma_desc_buffer, priv->dma_desc_device_addr);
+ fail5:
+ free_irq(dma_irq, priv);
+ fail4:
+diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c
+index 2640c5b326a49..acabb7715b423 100644
+--- a/drivers/most/most_usb.c
++++ b/drivers/most/most_usb.c
+@@ -149,7 +149,8 @@ static inline int drci_rd_reg(struct usb_device *dev, u16 reg, u16 *buf)
+ retval = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
+ DRCI_READ_REQ, req_type,
+ 0x0000,
+- reg, dma_buf, sizeof(*dma_buf), 5 * HZ);
++ reg, dma_buf, sizeof(*dma_buf),
++ USB_CTRL_GET_TIMEOUT);
+ *buf = le16_to_cpu(*dma_buf);
+ kfree(dma_buf);
+
+@@ -176,7 +177,7 @@ static inline int drci_wr_reg(struct usb_device *dev, u16 reg, u16 data)
+ reg,
+ NULL,
+ 0,
+- 5 * HZ);
++ USB_CTRL_SET_TIMEOUT);
+ }
+
+ static inline int start_sync_ep(struct usb_device *usb_dev, u16 ep)
+diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
+index aef14990e5f7c..19726ebd973d0 100644
+--- a/drivers/mtd/chips/Kconfig
++++ b/drivers/mtd/chips/Kconfig
+@@ -55,12 +55,14 @@ choice
+ LITTLE_ENDIAN_BYTE, if the bytes are reversed.
+
+ config MTD_CFI_NOSWAP
++ depends on !ARCH_IXP4XX || CPU_BIG_ENDIAN
+ bool "NO"
+
+ config MTD_CFI_BE_BYTE_SWAP
+ bool "BIG_ENDIAN_BYTE"
+
+ config MTD_CFI_LE_BYTE_SWAP
++ depends on !ARCH_IXP4XX
+ bool "LITTLE_ENDIAN_BYTE"
+
+ endchoice
+diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
+index a761134fd3bea..59334530dd46f 100644
+--- a/drivers/mtd/chips/cfi_cmdset_0002.c
++++ b/drivers/mtd/chips/cfi_cmdset_0002.c
+@@ -59,6 +59,10 @@
+ #define CFI_SR_WBASB BIT(3)
+ #define CFI_SR_SLSB BIT(1)
+
++enum cfi_quirks {
++ CFI_QUIRK_DQ_TRUE_DATA = BIT(0),
++};
++
+ static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
+ static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *);
+ #if !FORCE_WORD_WRITE
+@@ -436,6 +440,15 @@ static void fixup_s29ns512p_sectors(struct mtd_info *mtd)
+ mtd->name);
+ }
+
++static void fixup_quirks(struct mtd_info *mtd)
++{
++ struct map_info *map = mtd->priv;
++ struct cfi_private *cfi = map->fldrv_priv;
++
++ if (cfi->mfr == CFI_MFR_AMD && cfi->id == 0x0c01)
++ cfi->quirks |= CFI_QUIRK_DQ_TRUE_DATA;
++}
++
+ /* Used to fix CFI-Tables of chips without Extended Query Tables */
+ static struct cfi_fixup cfi_nopri_fixup_table[] = {
+ { CFI_MFR_SST, 0x234a, fixup_sst39vf }, /* SST39VF1602 */
+@@ -474,6 +487,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
+ #if !FORCE_WORD_WRITE
+ { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers },
+ #endif
++ { CFI_MFR_ANY, CFI_ID_ANY, fixup_quirks },
+ { 0, 0, NULL }
+ };
+ static struct cfi_fixup jedec_fixup_table[] = {
+@@ -802,21 +816,25 @@ static struct mtd_info *cfi_amdstd_setup(struct mtd_info *mtd)
+ }
+
+ /*
+- * Return true if the chip is ready.
++ * Return true if the chip is ready and has the correct value.
+ *
+ * Ready is one of: read mode, query mode, erase-suspend-read mode (in any
+ * non-suspended sector) and is indicated by no toggle bits toggling.
+ *
++ * Error are indicated by toggling bits or bits held with the wrong value,
++ * or with bits toggling.
++ *
+ * Note that anything more complicated than checking if no bits are toggling
+ * (including checking DQ5 for an error status) is tricky to get working
+ * correctly and is therefore not done (particularly with interleaved chips
+ * as each chip must be checked independently of the others).
+ */
+ static int __xipram chip_ready(struct map_info *map, struct flchip *chip,
+- unsigned long addr)
++ unsigned long addr, map_word *expected)
+ {
+ struct cfi_private *cfi = map->fldrv_priv;
+ map_word d, t;
++ int ret;
+
+ if (cfi_use_status_reg(cfi)) {
+ map_word ready = CMD(CFI_SR_DRB);
+@@ -826,57 +844,32 @@ static int __xipram chip_ready(struct map_info *map, struct flchip *chip,
+ */
+ cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi,
+ cfi->device_type, NULL);
+- d = map_read(map, addr);
++ t = map_read(map, addr);
+
+- return map_word_andequal(map, d, ready, ready);
++ return map_word_andequal(map, t, ready, ready);
+ }
+
+ d = map_read(map, addr);
+ t = map_read(map, addr);
+
+- return map_word_equal(map, d, t);
++ ret = map_word_equal(map, d, t);
++
++ if (!ret || !expected)
++ return ret;
++
++ return map_word_equal(map, t, *expected);
+ }
+
+-/*
+- * Return true if the chip is ready and has the correct value.
+- *
+- * Ready is one of: read mode, query mode, erase-suspend-read mode (in any
+- * non-suspended sector) and it is indicated by no bits toggling.
+- *
+- * Error are indicated by toggling bits or bits held with the wrong value,
+- * or with bits toggling.
+- *
+- * Note that anything more complicated than checking if no bits are toggling
+- * (including checking DQ5 for an error status) is tricky to get working
+- * correctly and is therefore not done (particularly with interleaved chips
+- * as each chip must be checked independently of the others).
+- *
+- */
+ static int __xipram chip_good(struct map_info *map, struct flchip *chip,
+- unsigned long addr, map_word expected)
++ unsigned long addr, map_word *expected)
+ {
+ struct cfi_private *cfi = map->fldrv_priv;
+- map_word oldd, curd;
+-
+- if (cfi_use_status_reg(cfi)) {
+- map_word ready = CMD(CFI_SR_DRB);
+-
+- /*
+- * For chips that support status register, check device
+- * ready bit
+- */
+- cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi,
+- cfi->device_type, NULL);
+- curd = map_read(map, addr);
+-
+- return map_word_andequal(map, curd, ready, ready);
+- }
++ map_word *datum = expected;
+
+- oldd = map_read(map, addr);
+- curd = map_read(map, addr);
++ if (cfi->quirks & CFI_QUIRK_DQ_TRUE_DATA)
++ datum = NULL;
+
+- return map_word_equal(map, oldd, curd) &&
+- map_word_equal(map, curd, expected);
++ return chip_ready(map, chip, addr, datum);
+ }
+
+ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
+@@ -893,7 +886,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
+
+ case FL_STATUS:
+ for (;;) {
+- if (chip_ready(map, chip, adr))
++ if (chip_ready(map, chip, adr, NULL))
+ break;
+
+ if (time_after(jiffies, timeo)) {
+@@ -932,7 +925,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
+ chip->state = FL_ERASE_SUSPENDING;
+ chip->erase_suspended = 1;
+ for (;;) {
+- if (chip_ready(map, chip, adr))
++ if (chip_ready(map, chip, adr, NULL))
+ break;
+
+ if (time_after(jiffies, timeo)) {
+@@ -1463,7 +1456,7 @@ static int do_otp_lock(struct map_info *map, struct flchip *chip, loff_t adr,
+ /* wait for chip to become ready */
+ timeo = jiffies + msecs_to_jiffies(2);
+ for (;;) {
+- if (chip_ready(map, chip, adr))
++ if (chip_ready(map, chip, adr, NULL))
+ break;
+
+ if (time_after(jiffies, timeo)) {
+@@ -1699,7 +1692,7 @@ static int __xipram do_write_oneword_once(struct map_info *map,
+ * "chip_good" to avoid the failure due to scheduling.
+ */
+ if (time_after(jiffies, timeo) &&
+- !chip_good(map, chip, adr, datum)) {
++ !chip_good(map, chip, adr, &datum)) {
+ xip_enable(map, chip, adr);
+ printk(KERN_WARNING "MTD %s(): software timeout\n", __func__);
+ xip_disable(map, chip, adr);
+@@ -1707,7 +1700,7 @@ static int __xipram do_write_oneword_once(struct map_info *map,
+ break;
+ }
+
+- if (chip_good(map, chip, adr, datum)) {
++ if (chip_good(map, chip, adr, &datum)) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
+ break;
+@@ -1979,14 +1972,14 @@ static int __xipram do_write_buffer_wait(struct map_info *map,
+ * "chip_good" to avoid the failure due to scheduling.
+ */
+ if (time_after(jiffies, timeo) &&
+- !chip_good(map, chip, adr, datum)) {
++ !chip_good(map, chip, adr, &datum)) {
+ pr_err("MTD %s(): software timeout, address:0x%.8lx.\n",
+ __func__, adr);
+ ret = -EIO;
+ break;
+ }
+
+- if (chip_good(map, chip, adr, datum)) {
++ if (chip_good(map, chip, adr, &datum)) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
+ break;
+@@ -2195,7 +2188,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip,
+ * If the driver thinks the chip is idle, and no toggle bits
+ * are changing, then the chip is actually idle for sure.
+ */
+- if (chip->state == FL_READY && chip_ready(map, chip, adr))
++ if (chip->state == FL_READY && chip_ready(map, chip, adr, NULL))
+ return 0;
+
+ /*
+@@ -2212,7 +2205,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip,
+
+ /* wait for the chip to become ready */
+ for (i = 0; i < jiffies_to_usecs(timeo); i++) {
+- if (chip_ready(map, chip, adr))
++ if (chip_ready(map, chip, adr, NULL))
+ return 0;
+
+ udelay(1);
+@@ -2276,13 +2269,13 @@ retry:
+ map_write(map, datum, adr);
+
+ for (i = 0; i < jiffies_to_usecs(uWriteTimeout); i++) {
+- if (chip_ready(map, chip, adr))
++ if (chip_ready(map, chip, adr, NULL))
+ break;
+
+ udelay(1);
+ }
+
+- if (!chip_good(map, chip, adr, datum) ||
++ if (!chip_ready(map, chip, adr, &datum) ||
+ cfi_check_err_status(map, chip, adr)) {
+ /* reset on all failures. */
+ map_write(map, CMD(0xF0), chip->start);
+@@ -2424,6 +2417,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
+ DECLARE_WAITQUEUE(wait, current);
+ int ret;
+ int retry_cnt = 0;
++ map_word datum = map_word_ff(map);
+
+ adr = cfi->addr_unlock1;
+
+@@ -2478,7 +2472,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
+ chip->erase_suspended = 0;
+ }
+
+- if (chip_good(map, chip, adr, map_word_ff(map))) {
++ if (chip_ready(map, chip, adr, &datum)) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
+ break;
+@@ -2523,6 +2517,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
+ DECLARE_WAITQUEUE(wait, current);
+ int ret;
+ int retry_cnt = 0;
++ map_word datum = map_word_ff(map);
+
+ adr += chip->start;
+
+@@ -2577,7 +2572,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
+ chip->erase_suspended = 0;
+ }
+
+- if (chip_good(map, chip, adr, map_word_ff(map))) {
++ if (chip_ready(map, chip, adr, &datum)) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
+ break;
+@@ -2771,7 +2766,7 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map,
+ */
+ timeo = jiffies + msecs_to_jiffies(2000); /* 2s max (un)locking */
+ for (;;) {
+- if (chip_ready(map, chip, adr))
++ if (chip_ready(map, chip, adr, NULL))
+ break;
+
+ if (time_after(jiffies, timeo)) {
+diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
+index 5b0ae5ddad745..27c08f22dec8c 100644
+--- a/drivers/mtd/devices/docg3.c
++++ b/drivers/mtd/devices/docg3.c
+@@ -1974,9 +1974,14 @@ static int __init docg3_probe(struct platform_device *pdev)
+ dev_err(dev, "No I/O memory resource defined\n");
+ return ret;
+ }
+- base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
+
+ ret = -ENOMEM;
++ base = devm_ioremap(dev, ress->start, DOC_IOSPACE_SIZE);
++ if (!base) {
++ dev_err(dev, "devm_ioremap dev failed\n");
++ return ret;
++ }
++
+ cascade = devm_kcalloc(dev, DOC_MAX_NBFLOORS, sizeof(*cascade),
+ GFP_KERNEL);
+ if (!cascade)
+diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
+index 77c872fd3d839..7d188cdff6a26 100644
+--- a/drivers/mtd/devices/mchp23k256.c
++++ b/drivers/mtd/devices/mchp23k256.c
+@@ -229,6 +229,19 @@ static const struct of_device_id mchp23k256_of_table[] = {
+ };
+ MODULE_DEVICE_TABLE(of, mchp23k256_of_table);
+
++static const struct spi_device_id mchp23k256_spi_ids[] = {
++ {
++ .name = "mchp23k256",
++ .driver_data = (kernel_ulong_t)&mchp23k256_caps,
++ },
++ {
++ .name = "mchp23lcv1024",
++ .driver_data = (kernel_ulong_t)&mchp23lcv1024_caps,
++ },
++ {}
++};
++MODULE_DEVICE_TABLE(spi, mchp23k256_spi_ids);
++
+ static struct spi_driver mchp23k256_driver = {
+ .driver = {
+ .name = "mchp23k256",
+@@ -236,6 +249,7 @@ static struct spi_driver mchp23k256_driver = {
+ },
+ .probe = mchp23k256_probe,
+ .remove = mchp23k256_remove,
++ .id_table = mchp23k256_spi_ids,
+ };
+
+ module_spi_driver(mchp23k256_driver);
+diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c
+index 99400d0fb8c1e..fbd6b6bf908e5 100644
+--- a/drivers/mtd/devices/mchp48l640.c
++++ b/drivers/mtd/devices/mchp48l640.c
+@@ -357,6 +357,15 @@ static const struct of_device_id mchp48l640_of_table[] = {
+ };
+ MODULE_DEVICE_TABLE(of, mchp48l640_of_table);
+
++static const struct spi_device_id mchp48l640_spi_ids[] = {
++ {
++ .name = "48l640",
++ .driver_data = (kernel_ulong_t)&mchp48l640_caps,
++ },
++ {}
++};
++MODULE_DEVICE_TABLE(spi, mchp48l640_spi_ids);
++
+ static struct spi_driver mchp48l640_driver = {
+ .driver = {
+ .name = "mchp48l640",
+@@ -364,6 +373,7 @@ static struct spi_driver mchp48l640_driver = {
+ },
+ .probe = mchp48l640_probe,
+ .remove = mchp48l640_remove,
++ .id_table = mchp48l640_spi_ids,
+ };
+
+ module_spi_driver(mchp48l640_driver);
+diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
+index 9802e265fca80..9c714c982c6ec 100644
+--- a/drivers/mtd/devices/mtd_dataflash.c
++++ b/drivers/mtd/devices/mtd_dataflash.c
+@@ -96,6 +96,13 @@ struct dataflash {
+ struct mtd_info mtd;
+ };
+
++static const struct spi_device_id dataflash_dev_ids[] = {
++ { "at45" },
++ { "dataflash" },
++ { },
++};
++MODULE_DEVICE_TABLE(spi, dataflash_dev_ids);
++
+ #ifdef CONFIG_OF
+ static const struct of_device_id dataflash_dt_ids[] = {
+ { .compatible = "atmel,at45", },
+@@ -105,6 +112,13 @@ static const struct of_device_id dataflash_dt_ids[] = {
+ MODULE_DEVICE_TABLE(of, dataflash_dt_ids);
+ #endif
+
++static const struct spi_device_id dataflash_spi_ids[] = {
++ { .name = "at45", },
++ { .name = "dataflash", },
++ { /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(spi, dataflash_spi_ids);
++
+ /* ......................................................................... */
+
+ /*
+@@ -927,9 +941,11 @@ static struct spi_driver dataflash_driver = {
+ .name = "mtd_dataflash",
+ .of_match_table = of_match_ptr(dataflash_dt_ids),
+ },
++ .id_table = dataflash_dev_ids,
+
+ .probe = dataflash_probe,
+ .remove = dataflash_remove,
++ .id_table = dataflash_spi_ids,
+
+ /* FIXME: investigate suspend and resume... */
+ };
+diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
+index 6ed6c51fac69e..d503821a3e606 100644
+--- a/drivers/mtd/devices/phram.c
++++ b/drivers/mtd/devices/phram.c
+@@ -264,16 +264,20 @@ static int phram_setup(const char *val)
+ }
+ }
+
+- if (erasesize)
+- div_u64_rem(len, (uint32_t)erasesize, &rem);
+-
+ if (len == 0 || erasesize == 0 || erasesize > len
+- || erasesize > UINT_MAX || rem) {
++ || erasesize > UINT_MAX) {
+ parse_err("illegal erasesize or len\n");
+ ret = -EINVAL;
+ goto error;
+ }
+
++ div_u64_rem(len, (uint32_t)erasesize, &rem);
++ if (rem) {
++ parse_err("len is not multiple of erasesize\n");
++ ret = -EINVAL;
++ goto error;
++ }
++
+ ret = register_device(name, start, len, (uint32_t)erasesize);
+ if (ret)
+ goto error;
+diff --git a/drivers/mtd/devices/st_spi_fsm.c b/drivers/mtd/devices/st_spi_fsm.c
+index 983999c020d66..48bda2dd1bb55 100644
+--- a/drivers/mtd/devices/st_spi_fsm.c
++++ b/drivers/mtd/devices/st_spi_fsm.c
+@@ -2115,10 +2115,12 @@ static int stfsm_probe(struct platform_device *pdev)
+ (long long)fsm->mtd.size, (long long)(fsm->mtd.size >> 20),
+ fsm->mtd.erasesize, (fsm->mtd.erasesize >> 10));
+
+- return mtd_device_register(&fsm->mtd, NULL, 0);
+-
++ ret = mtd_device_register(&fsm->mtd, NULL, 0);
++ if (ret) {
+ err_clk_unprepare:
+- clk_disable_unprepare(fsm->clk);
++ clk_disable_unprepare(fsm->clk);
++ }
++
+ return ret;
+ }
+
+diff --git a/drivers/mtd/hyperbus/rpc-if.c b/drivers/mtd/hyperbus/rpc-if.c
+index ecb050ba95cdf..dc164c18f8429 100644
+--- a/drivers/mtd/hyperbus/rpc-if.c
++++ b/drivers/mtd/hyperbus/rpc-if.c
+@@ -124,7 +124,9 @@ static int rpcif_hb_probe(struct platform_device *pdev)
+ if (!hyperbus)
+ return -ENOMEM;
+
+- rpcif_sw_init(&hyperbus->rpc, pdev->dev.parent);
++ error = rpcif_sw_init(&hyperbus->rpc, pdev->dev.parent);
++ if (error)
++ return error;
+
+ platform_set_drvdata(pdev, hyperbus);
+
+@@ -150,9 +152,9 @@ static int rpcif_hb_remove(struct platform_device *pdev)
+ {
+ struct rpcif_hyperbus *hyperbus = platform_get_drvdata(pdev);
+ int error = hyperbus_unregister_device(&hyperbus->hbdev);
+- struct rpcif *rpc = dev_get_drvdata(pdev->dev.parent);
+
+- rpcif_disable_rpm(rpc);
++ rpcif_disable_rpm(&hyperbus->rpc);
++
+ return error;
+ }
+
+diff --git a/drivers/mtd/lpddr/lpddr2_nvm.c b/drivers/mtd/lpddr/lpddr2_nvm.c
+index 72f5c7b300790..add4386f99f00 100644
+--- a/drivers/mtd/lpddr/lpddr2_nvm.c
++++ b/drivers/mtd/lpddr/lpddr2_nvm.c
+@@ -433,6 +433,8 @@ static int lpddr2_nvm_probe(struct platform_device *pdev)
+
+ /* lpddr2_nvm address range */
+ add_range = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!add_range)
++ return -ENODEV;
+
+ /* Populate map_info data structure */
+ *map = (struct map_info) {
+diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
+index aaa164b977fe8..4945caa88345b 100644
+--- a/drivers/mtd/maps/Kconfig
++++ b/drivers/mtd/maps/Kconfig
+@@ -302,7 +302,7 @@ config MTD_DC21285
+
+ config MTD_IXP4XX
+ tristate "CFI Flash device mapped on Intel IXP4xx based systems"
+- depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX
++ depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX && MTD_CFI_ADV_OPTIONS
+ help
+ This enables MTD access to flash devices on platforms based
+ on Intel's IXP4xx family of network processors such as the
+diff --git a/drivers/mtd/maps/physmap-versatile.c b/drivers/mtd/maps/physmap-versatile.c
+index ad7cd9cfaee04..a1b8b7b25f88b 100644
+--- a/drivers/mtd/maps/physmap-versatile.c
++++ b/drivers/mtd/maps/physmap-versatile.c
+@@ -93,6 +93,7 @@ static int ap_flash_init(struct platform_device *pdev)
+ return -ENODEV;
+ }
+ ebi_base = of_iomap(ebi, 0);
++ of_node_put(ebi);
+ if (!ebi_base)
+ return -ENODEV;
+
+@@ -207,6 +208,7 @@ int of_flash_probe_versatile(struct platform_device *pdev,
+
+ versatile_flashprot = (enum versatile_flashprot)devid->data;
+ rmap = syscon_node_to_regmap(sysnp);
++ of_node_put(sysnp);
+ if (IS_ERR(rmap))
+ return PTR_ERR(rmap);
+
+diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
+index 7d96758a8f04e..6e5e557559704 100644
+--- a/drivers/mtd/maps/pxa2xx-flash.c
++++ b/drivers/mtd/maps/pxa2xx-flash.c
+@@ -66,6 +66,7 @@ static int pxa2xx_flash_probe(struct platform_device *pdev)
+ if (!info->map.virt) {
+ printk(KERN_WARNING "Failed to ioremap %s\n",
+ info->map.name);
++ kfree(info);
+ return -ENOMEM;
+ }
+ info->map.cached = ioremap_cache(info->map.phys, info->map.size);
+@@ -87,6 +88,7 @@ static int pxa2xx_flash_probe(struct platform_device *pdev)
+ iounmap((void *)info->map.virt);
+ if (info->map.cached)
+ iounmap(info->map.cached);
++ kfree(info);
+ return -EIO;
+ }
+ info->mtd->dev.parent = &pdev->dev;
+diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
+index 03e3de3a5d79e..a0a1194dc1d90 100644
+--- a/drivers/mtd/mtdblock.c
++++ b/drivers/mtd/mtdblock.c
+@@ -153,7 +153,7 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos,
+ mtdblk->cache_state = STATE_EMPTY;
+ ret = mtd_read(mtd, sect_start, sect_size,
+ &retlen, mtdblk->cache_data);
+- if (ret)
++ if (ret && !mtd_is_bitflip(ret))
+ return ret;
+ if (retlen != sect_size)
+ return -EIO;
+@@ -188,8 +188,12 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
+ pr_debug("mtdblock: read on \"%s\" at 0x%lx, size 0x%x\n",
+ mtd->name, pos, len);
+
+- if (!sect_size)
+- return mtd_read(mtd, pos, len, &retlen, buf);
++ if (!sect_size) {
++ ret = mtd_read(mtd, pos, len, &retlen, buf);
++ if (ret && !mtd_is_bitflip(ret))
++ return ret;
++ return 0;
++ }
+
+ while (len > 0) {
+ unsigned long sect_start = (pos/sect_size)*sect_size;
+@@ -209,7 +213,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
+ memcpy (buf, mtdblk->cache_data + offset, size);
+ } else {
+ ret = mtd_read(mtd, pos, size, &retlen, buf);
+- if (ret)
++ if (ret && !mtd_is_bitflip(ret))
+ return ret;
+ if (retlen != size)
+ return -EIO;
+@@ -257,6 +261,10 @@ static int mtdblock_open(struct mtd_blktrans_dev *mbd)
+ return 0;
+ }
+
++ if (mtd_type_is_nand(mbd->mtd))
++ pr_warn("%s: MTD device '%s' is NAND, please consider using UBI block devices instead.\n",
++ mbd->tr->name, mbd->mtd->name);
++
+ /* OK, it's not open. Create cache info for it */
+ mtdblk->count = 1;
+ mutex_init(&mtdblk->cache_mutex);
+@@ -322,10 +330,6 @@ static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
+ if (!(mtd->flags & MTD_WRITEABLE))
+ dev->mbd.readonly = 1;
+
+- if (mtd_type_is_nand(mtd))
+- pr_warn("%s: MTD device '%s' is NAND, please consider using UBI block devices instead.\n",
+- tr->name, mtd->name);
+-
+ if (add_mtd_blktrans_dev(&dev->mbd))
+ kfree(dev);
+ }
+diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
+index c8fd7f758938b..2a228ee32641c 100644
+--- a/drivers/mtd/mtdcore.c
++++ b/drivers/mtd/mtdcore.c
+@@ -546,6 +546,7 @@ static int mtd_nvmem_add(struct mtd_info *mtd)
+ config.stride = 1;
+ config.read_only = true;
+ config.root_only = true;
++ config.ignore_wp = true;
+ config.no_of_node = !of_device_is_compatible(node, "nvmem-cells");
+ config.priv = mtd;
+
+@@ -670,8 +671,10 @@ int add_mtd_device(struct mtd_info *mtd)
+ dev_set_drvdata(&mtd->dev, mtd);
+ of_node_get(mtd_get_of_node(mtd));
+ error = device_register(&mtd->dev);
+- if (error)
++ if (error) {
++ put_device(&mtd->dev);
+ goto fail_added;
++ }
+
+ /* Add the nvmem provider */
+ error = mtd_nvmem_add(mtd);
+@@ -724,8 +727,6 @@ int del_mtd_device(struct mtd_info *mtd)
+
+ mutex_lock(&mtd_table_mutex);
+
+- debugfs_remove_recursive(mtd->dbg.dfs_dir);
+-
+ if (idr_find(&mtd_idr, mtd->index) != mtd) {
+ ret = -ENODEV;
+ goto out_error;
+@@ -741,6 +742,8 @@ int del_mtd_device(struct mtd_info *mtd)
+ mtd->index, mtd->name, mtd->usecount);
+ ret = -EBUSY;
+ } else {
++ debugfs_remove_recursive(mtd->dbg.dfs_dir);
++
+ /* Try to remove the NVMEM provider */
+ if (mtd->nvmem)
+ nvmem_unregister(mtd->nvmem);
+@@ -825,12 +828,12 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd,
+
+ /* OTP nvmem will be registered on the physical device */
+ config.dev = mtd->dev.parent;
+- /* just reuse the compatible as name */
+ config.name = compatible;
+- config.id = NVMEM_DEVID_NONE;
++ config.id = NVMEM_DEVID_AUTO;
+ config.owner = THIS_MODULE;
+ config.type = NVMEM_TYPE_OTP;
+ config.root_only = true;
++ config.ignore_wp = true;
+ config.reg_read = reg_read;
+ config.size = size;
+ config.of_node = np;
+@@ -876,6 +879,7 @@ static int mtd_nvmem_fact_otp_reg_read(void *priv, unsigned int offset,
+
+ static int mtd_otp_nvmem_add(struct mtd_info *mtd)
+ {
++ struct device *dev = mtd->dev.parent;
+ struct nvmem_device *nvmem;
+ ssize_t size;
+ int err;
+@@ -889,7 +893,7 @@ static int mtd_otp_nvmem_add(struct mtd_info *mtd)
+ nvmem = mtd_otp_nvmem_register(mtd, "user-otp", size,
+ mtd_nvmem_user_otp_reg_read);
+ if (IS_ERR(nvmem)) {
+- dev_err(&mtd->dev, "Failed to register OTP NVMEM device\n");
++ dev_err(dev, "Failed to register OTP NVMEM device\n");
+ return PTR_ERR(nvmem);
+ }
+ mtd->otp_user_nvmem = nvmem;
+@@ -907,7 +911,7 @@ static int mtd_otp_nvmem_add(struct mtd_info *mtd)
+ nvmem = mtd_otp_nvmem_register(mtd, "factory-otp", size,
+ mtd_nvmem_fact_otp_reg_read);
+ if (IS_ERR(nvmem)) {
+- dev_err(&mtd->dev, "Failed to register OTP NVMEM device\n");
++ dev_err(dev, "Failed to register OTP NVMEM device\n");
+ err = PTR_ERR(nvmem);
+ goto err;
+ }
+@@ -960,10 +964,14 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
+
+ mtd_set_dev_defaults(mtd);
+
++ ret = mtd_otp_nvmem_add(mtd);
++ if (ret)
++ goto out;
++
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
+ ret = add_mtd_device(mtd);
+ if (ret)
+- return ret;
++ goto out;
+ }
+
+ /* Prefer parsed partitions over driver-provided fallback */
+@@ -998,9 +1006,12 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
+ register_reboot_notifier(&mtd->reboot_notifier);
+ }
+
+- ret = mtd_otp_nvmem_add(mtd);
+-
+ out:
++ if (ret) {
++ nvmem_unregister(mtd->otp_user_nvmem);
++ nvmem_unregister(mtd->otp_factory_nvmem);
++ }
++
+ if (ret && device_is_registered(&mtd->dev))
+ del_mtd_device(mtd);
+
+diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
+index 04af12b66110c..357661b62c94d 100644
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -312,7 +312,7 @@ static int __mtd_del_partition(struct mtd_info *mtd)
+ if (err)
+ return err;
+
+- list_del(&child->part.node);
++ list_del(&mtd->part.node);
+ free_partition(mtd);
+
+ return 0;
+diff --git a/drivers/mtd/nand/onenand/generic.c b/drivers/mtd/nand/onenand/generic.c
+index 8b6f4da5d7201..a4b8b65fe15f5 100644
+--- a/drivers/mtd/nand/onenand/generic.c
++++ b/drivers/mtd/nand/onenand/generic.c
+@@ -53,7 +53,12 @@ static int generic_onenand_probe(struct platform_device *pdev)
+ }
+
+ info->onenand.mmcontrol = pdata ? pdata->mmcontrol : NULL;
+- info->onenand.irq = platform_get_irq(pdev, 0);
++
++ err = platform_get_irq(pdev, 0);
++ if (err < 0)
++ goto out_iounmap;
++
++ info->onenand.irq = err;
+
+ info->mtd.dev.parent = &pdev->dev;
+ info->mtd.priv = &info->onenand;
+diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
+index ff1697f899ba6..13de39aa3288f 100644
+--- a/drivers/mtd/nand/raw/ams-delta.c
++++ b/drivers/mtd/nand/raw/ams-delta.c
+@@ -217,9 +217,8 @@ static int gpio_nand_setup_interface(struct nand_chip *this, int csline,
+
+ static int gpio_nand_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -370,6 +369,13 @@ static int gpio_nand_probe(struct platform_device *pdev)
+ /* Release write protection */
+ gpiod_set_value(priv->gpiod_nwp, 0);
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ /* Scan to find existence of the device */
+ err = nand_scan(this, 1);
+ if (err)
+diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c
+index 9cbcc698c64d8..296fb16c8dc3c 100644
+--- a/drivers/mtd/nand/raw/arasan-nand-controller.c
++++ b/drivers/mtd/nand/raw/arasan-nand-controller.c
+@@ -347,17 +347,17 @@ static int anfc_select_target(struct nand_chip *chip, int target)
+
+ /* Update clock frequency */
+ if (nfc->cur_clk != anand->clk) {
+- clk_disable_unprepare(nfc->controller_clk);
+- ret = clk_set_rate(nfc->controller_clk, anand->clk);
++ clk_disable_unprepare(nfc->bus_clk);
++ ret = clk_set_rate(nfc->bus_clk, anand->clk);
+ if (ret) {
+ dev_err(nfc->dev, "Failed to change clock rate\n");
+ return ret;
+ }
+
+- ret = clk_prepare_enable(nfc->controller_clk);
++ ret = clk_prepare_enable(nfc->bus_clk);
+ if (ret) {
+ dev_err(nfc->dev,
+- "Failed to re-enable the controller clock\n");
++ "Failed to re-enable the bus clock\n");
+ return ret;
+ }
+
+@@ -973,6 +973,21 @@ static int anfc_setup_interface(struct nand_chip *chip, int target,
+ nvddr = nand_get_nvddr_timings(conf);
+ if (IS_ERR(nvddr))
+ return PTR_ERR(nvddr);
++
++ /*
++ * The controller only supports data payload requests which are
++ * a multiple of 4. In practice, most data accesses are 4-byte
++ * aligned and this is not an issue. However, rounding up will
++ * simply be refused by the controller if we reached the end of
++ * the device *and* we are using the NV-DDR interface(!). In
++ * this situation, unaligned data requests ending at the device
++ * boundary will confuse the controller and cannot be performed.
++ *
++ * This is something that happens in nand_read_subpage() when
++ * selecting software ECC support and must be avoided.
++ */
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT)
++ return -ENOTSUPP;
+ } else {
+ sdr = nand_get_sdr_timings(conf);
+ if (IS_ERR(sdr))
+@@ -1028,7 +1043,13 @@ static int anfc_setup_interface(struct nand_chip *chip, int target,
+ DQS_BUFF_SEL_OUT(dqs_mode);
+ }
+
+- anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK;
++ if (nand_interface_is_sdr(conf)) {
++ anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK;
++ } else {
++ /* ONFI timings are defined in picoseconds */
++ anand->clk = div_u64((u64)NSEC_PER_SEC * 1000,
++ conf->timings.nvddr.tCK_min);
++ }
+
+ /*
+ * Due to a hardware bug in the ZynqMP SoC, SDR timing modes 0-1 work
+diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
+index f3276ee9e4fe7..73956a9f5449f 100644
+--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
++++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
+@@ -405,6 +405,7 @@ static int atmel_nand_dma_transfer(struct atmel_nand_controller *nc,
+
+ dma_async_issue_pending(nc->dmac);
+ wait_for_completion(&finished);
++ dma_unmap_single(nc->dev, buf_dma, len, dir);
+
+ return 0;
+
+@@ -2060,13 +2061,15 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
+ nc->mck = of_clk_get(dev->parent->of_node, 0);
+ if (IS_ERR(nc->mck)) {
+ dev_err(dev, "Failed to retrieve MCK clk\n");
+- return PTR_ERR(nc->mck);
++ ret = PTR_ERR(nc->mck);
++ goto out_release_dma;
+ }
+
+ np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0);
+ if (!np) {
+ dev_err(dev, "Missing or invalid atmel,smc property\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto out_release_dma;
+ }
+
+ nc->smc = syscon_node_to_regmap(np);
+@@ -2074,10 +2077,16 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
+ if (IS_ERR(nc->smc)) {
+ ret = PTR_ERR(nc->smc);
+ dev_err(dev, "Could not get SMC regmap (err = %d)\n", ret);
+- return ret;
++ goto out_release_dma;
+ }
+
+ return 0;
++
++out_release_dma:
++ if (nc->dmac)
++ dma_release_channel(nc->dmac);
++
++ return ret;
+ }
+
+ static int
+diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
+index 99116896cfd6c..5aa3a06d740c7 100644
+--- a/drivers/mtd/nand/raw/au1550nd.c
++++ b/drivers/mtd/nand/raw/au1550nd.c
+@@ -239,9 +239,8 @@ static int au1550nd_exec_op(struct nand_chip *this,
+
+ static int au1550nd_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -310,6 +309,13 @@ static int au1550nd_probe(struct platform_device *pdev)
+ if (pd->devwidth)
+ this->options |= NAND_BUSWIDTH_16;
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ ret = nand_scan(this, 1);
+ if (ret) {
+ dev_err(&pdev->dev, "NAND scan failed with %d\n", ret);
+diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+index f75929783b941..c1afadb50eecc 100644
+--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
++++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+@@ -1043,6 +1043,14 @@ static int bcmnand_ctrl_poll_status(struct brcmnand_controller *ctrl,
+ cpu_relax();
+ } while (time_after(limit, jiffies));
+
++ /*
++ * do a final check after time out in case the CPU was busy and the driver
++ * did not get enough time to perform the polling to avoid false alarms
++ */
++ val = brcmnand_read_reg(ctrl, BRCMNAND_INTFC_STATUS);
++ if ((val & mask) == expected_val)
++ return 0;
++
+ dev_warn(ctrl->dev, "timeout on status poll (expected %x got %x)\n",
+ expected_val, val & mask);
+
+@@ -1432,19 +1440,33 @@ static int write_oob_to_regs(struct brcmnand_controller *ctrl, int i,
+ const u8 *oob, int sas, int sector_1k)
+ {
+ int tbytes = sas << sector_1k;
+- int j;
++ int j, k = 0;
++ u32 last = 0xffffffff;
++ u8 *plast = (u8 *)&last;
+
+ /* Adjust OOB values for 1K sector size */
+ if (sector_1k && (i & 0x01))
+ tbytes = max(0, tbytes - (int)ctrl->max_oob);
+ tbytes = min_t(int, tbytes, ctrl->max_oob);
+
+- for (j = 0; j < tbytes; j += 4)
++ /*
++ * tbytes may not be multiple of words. Make sure we don't read out of
++ * the boundary and stop at last word.
++ */
++ for (j = 0; (j + 3) < tbytes; j += 4)
+ oob_reg_write(ctrl, j,
+ (oob[j + 0] << 24) |
+ (oob[j + 1] << 16) |
+ (oob[j + 2] << 8) |
+ (oob[j + 3] << 0));
++
++ /* handle the remaing bytes */
++ while (j < tbytes)
++ plast[k++] = oob[j++];
++
++ if (tbytes & 0x3)
++ oob_reg_write(ctrl, (tbytes & ~0x3), (__force u32)cpu_to_be32(last));
++
+ return tbytes;
+ }
+
+@@ -1563,7 +1585,17 @@ static void brcmnand_send_cmd(struct brcmnand_host *host, int cmd)
+
+ dev_dbg(ctrl->dev, "send native cmd %d addr 0x%llx\n", cmd, cmd_addr);
+
+- BUG_ON(ctrl->cmd_pending != 0);
++ /*
++ * If we came here through _panic_write and there is a pending
++ * command, try to wait for it. If it times out, rather than
++ * hitting BUG_ON, just return so we don't crash while crashing.
++ */
++ if (oops_in_progress) {
++ if (ctrl->cmd_pending &&
++ bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY, NAND_CTRL_RDY, 0))
++ return;
++ } else
++ BUG_ON(ctrl->cmd_pending != 0);
+ ctrl->cmd_pending = cmd;
+
+ ret = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY, NAND_CTRL_RDY, 0);
+@@ -2106,7 +2138,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
+ mtd->oobsize / trans,
+ host->hwcfg.sector_size_1k);
+
+- if (!ret) {
++ if (ret != -EBADMSG) {
+ *err_addr = brcmnand_get_uncorrecc_addr(ctrl);
+
+ if (*err_addr)
+@@ -2583,6 +2615,8 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
+ struct nand_chip *chip = &host->chip;
+ const struct nand_ecc_props *requirements =
+ nanddev_get_ecc_requirements(&chip->base);
++ struct nand_memory_organization *memorg =
++ nanddev_get_memorg(&chip->base);
+ struct brcmnand_controller *ctrl = host->ctrl;
+ struct brcmnand_cfg *cfg = &host->hwcfg;
+ char msg[128];
+@@ -2604,10 +2638,11 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
+ if (cfg->spare_area_size > ctrl->max_oob)
+ cfg->spare_area_size = ctrl->max_oob;
+ /*
+- * Set oobsize to be consistent with controller's spare_area_size, as
+- * the rest is inaccessible.
++ * Set mtd and memorg oobsize to be consistent with controller's
++ * spare_area_size, as the rest is inaccessible.
+ */
+ mtd->oobsize = cfg->spare_area_size * (mtd->writesize >> FC_SHIFT);
++ memorg->oobsize = mtd->oobsize;
+
+ cfg->device_size = mtd->size;
+ cfg->block_size = mtd->erasesize;
+diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c
+index 7eec60ea90564..0d72672f8b64d 100644
+--- a/drivers/mtd/nand/raw/cadence-nand-controller.c
++++ b/drivers/mtd/nand/raw/cadence-nand-controller.c
+@@ -2983,11 +2983,10 @@ static int cadence_nand_dt_probe(struct platform_device *ofdev)
+ if (IS_ERR(cdns_ctrl->reg))
+ return PTR_ERR(cdns_ctrl->reg);
+
+- res = platform_get_resource(ofdev, IORESOURCE_MEM, 1);
+- cdns_ctrl->io.dma = res->start;
+- cdns_ctrl->io.virt = devm_ioremap_resource(&ofdev->dev, res);
++ cdns_ctrl->io.virt = devm_platform_get_and_ioremap_resource(ofdev, 1, &res);
+ if (IS_ERR(cdns_ctrl->io.virt))
+ return PTR_ERR(cdns_ctrl->io.virt);
++ cdns_ctrl->io.dma = res->start;
+
+ dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk");
+ if (IS_ERR(dt->clk))
+diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
+index 118da9944e3bc..45fec8c192aba 100644
+--- a/drivers/mtd/nand/raw/davinci_nand.c
++++ b/drivers/mtd/nand/raw/davinci_nand.c
+@@ -371,77 +371,6 @@ correct:
+ return corrected;
+ }
+
+-/**
+- * nand_read_page_hwecc_oob_first - hw ecc, read oob first
+- * @chip: nand chip info structure
+- * @buf: buffer to store read data
+- * @oob_required: caller requires OOB data read to chip->oob_poi
+- * @page: page number to read
+- *
+- * Hardware ECC for large page chips, require OOB to be read first. For this
+- * ECC mode, the write_page method is re-used from ECC_HW. These methods
+- * read/write ECC from the OOB area, unlike the ECC_HW_SYNDROME support with
+- * multiple ECC steps, follows the "infix ECC" scheme and reads/writes ECC from
+- * the data area, by overwriting the NAND manufacturer bad block markings.
+- */
+-static int nand_davinci_read_page_hwecc_oob_first(struct nand_chip *chip,
+- uint8_t *buf,
+- int oob_required, int page)
+-{
+- struct mtd_info *mtd = nand_to_mtd(chip);
+- int i, eccsize = chip->ecc.size, ret;
+- int eccbytes = chip->ecc.bytes;
+- int eccsteps = chip->ecc.steps;
+- uint8_t *p = buf;
+- uint8_t *ecc_code = chip->ecc.code_buf;
+- uint8_t *ecc_calc = chip->ecc.calc_buf;
+- unsigned int max_bitflips = 0;
+-
+- /* Read the OOB area first */
+- ret = nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize);
+- if (ret)
+- return ret;
+-
+- ret = nand_read_page_op(chip, page, 0, NULL, 0);
+- if (ret)
+- return ret;
+-
+- ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0,
+- chip->ecc.total);
+- if (ret)
+- return ret;
+-
+- for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+- int stat;
+-
+- chip->ecc.hwctl(chip, NAND_ECC_READ);
+-
+- ret = nand_read_data_op(chip, p, eccsize, false, false);
+- if (ret)
+- return ret;
+-
+- chip->ecc.calculate(chip, p, &ecc_calc[i]);
+-
+- stat = chip->ecc.correct(chip, p, &ecc_code[i], NULL);
+- if (stat == -EBADMSG &&
+- (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) {
+- /* check for empty pages with bitflips */
+- stat = nand_check_erased_ecc_chunk(p, eccsize,
+- &ecc_code[i],
+- eccbytes, NULL, 0,
+- chip->ecc.strength);
+- }
+-
+- if (stat < 0) {
+- mtd->ecc_stats.failed++;
+- } else {
+- mtd->ecc_stats.corrected += stat;
+- max_bitflips = max_t(unsigned int, max_bitflips, stat);
+- }
+- }
+- return max_bitflips;
+-}
+-
+ /*----------------------------------------------------------------------*/
+
+ /* An ECC layout for using 4-bit ECC with small-page flash, storing
+@@ -651,7 +580,7 @@ static int davinci_nand_attach_chip(struct nand_chip *chip)
+ } else if (chunks == 4 || chunks == 8) {
+ mtd_set_ooblayout(mtd,
+ nand_get_large_page_ooblayout());
+- chip->ecc.read_page = nand_davinci_read_page_hwecc_oob_first;
++ chip->ecc.read_page = nand_read_page_hwecc_oob_first;
+ } else {
+ return -EIO;
+ }
+diff --git a/drivers/mtd/nand/raw/denali_pci.c b/drivers/mtd/nand/raw/denali_pci.c
+index 20c085a30adcb..de7e722d38262 100644
+--- a/drivers/mtd/nand/raw/denali_pci.c
++++ b/drivers/mtd/nand/raw/denali_pci.c
+@@ -74,22 +74,21 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ return ret;
+ }
+
+- denali->reg = ioremap(csr_base, csr_len);
++ denali->reg = devm_ioremap(denali->dev, csr_base, csr_len);
+ if (!denali->reg) {
+ dev_err(&dev->dev, "Spectra: Unable to remap memory region\n");
+ return -ENOMEM;
+ }
+
+- denali->host = ioremap(mem_base, mem_len);
++ denali->host = devm_ioremap(denali->dev, mem_base, mem_len);
+ if (!denali->host) {
+ dev_err(&dev->dev, "Spectra: ioremap failed!");
+- ret = -ENOMEM;
+- goto out_unmap_reg;
++ return -ENOMEM;
+ }
+
+ ret = denali_init(denali);
+ if (ret)
+- goto out_unmap_host;
++ return ret;
+
+ nsels = denali->nbanks;
+
+@@ -117,10 +116,6 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+
+ out_remove_denali:
+ denali_remove(denali);
+-out_unmap_host:
+- iounmap(denali->host);
+-out_unmap_reg:
+- iounmap(denali->reg);
+ return ret;
+ }
+
+@@ -129,8 +124,6 @@ static void denali_pci_remove(struct pci_dev *dev)
+ struct denali_controller *denali = pci_get_drvdata(dev);
+
+ denali_remove(denali);
+- iounmap(denali->reg);
+- iounmap(denali->host);
+ }
+
+ static struct pci_driver denali_pci_driver = {
+diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
+index aab93b9e6052d..a18d121396aa5 100644
+--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
++++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
+@@ -726,36 +726,40 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip)
+ struct fsl_lbc_regs __iomem *lbc = ctrl->regs;
+ unsigned int al;
+
+- switch (chip->ecc.engine_type) {
+ /*
+ * if ECC was not chosen in DT, decide whether to use HW or SW ECC from
+ * CS Base Register
+ */
+- case NAND_ECC_ENGINE_TYPE_NONE:
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_INVALID) {
+ /* If CS Base Register selects full hardware ECC then use it */
+ if ((in_be32(&lbc->bank[priv->bank].br) & BR_DECC) ==
+ BR_DECC_CHK_GEN) {
+- chip->ecc.read_page = fsl_elbc_read_page;
+- chip->ecc.write_page = fsl_elbc_write_page;
+- chip->ecc.write_subpage = fsl_elbc_write_subpage;
+-
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+- mtd_set_ooblayout(mtd, &fsl_elbc_ooblayout_ops);
+- chip->ecc.size = 512;
+- chip->ecc.bytes = 3;
+- chip->ecc.strength = 1;
+ } else {
+ /* otherwise fall back to default software ECC */
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+ }
++ }
++
++ switch (chip->ecc.engine_type) {
++ /* if HW ECC was chosen, setup ecc and oob layout */
++ case NAND_ECC_ENGINE_TYPE_ON_HOST:
++ chip->ecc.read_page = fsl_elbc_read_page;
++ chip->ecc.write_page = fsl_elbc_write_page;
++ chip->ecc.write_subpage = fsl_elbc_write_subpage;
++ mtd_set_ooblayout(mtd, &fsl_elbc_ooblayout_ops);
++ chip->ecc.size = 512;
++ chip->ecc.bytes = 3;
++ chip->ecc.strength = 1;
+ break;
+
+- /* if SW ECC was chosen in DT, we do not need to set anything here */
++ /* if none or SW ECC was chosen, we do not need to set anything here */
++ case NAND_ECC_ENGINE_TYPE_NONE:
+ case NAND_ECC_ENGINE_TYPE_SOFT:
++ case NAND_ECC_ENGINE_TYPE_ON_DIE:
+ break;
+
+- /* should we also implement *_ECC_ENGINE_CONTROLLER to do as above? */
+ default:
+ return -EINVAL;
+ }
+diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
+index b3cc427100a22..636e65328bb32 100644
+--- a/drivers/mtd/nand/raw/fsl_upm.c
++++ b/drivers/mtd/nand/raw/fsl_upm.c
+@@ -135,7 +135,7 @@ static int fun_exec_op(struct nand_chip *chip, const struct nand_operation *op,
+ unsigned int i;
+ int ret;
+
+- if (op->cs > NAND_MAX_CHIPS)
++ if (op->cs >= NAND_MAX_CHIPS)
+ return -EINVAL;
+
+ if (check_only)
+diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
+index a3e66155ae405..17786e1331e6d 100644
+--- a/drivers/mtd/nand/raw/fsmc_nand.c
++++ b/drivers/mtd/nand/raw/fsmc_nand.c
+@@ -15,6 +15,7 @@
+
+ #include <linux/clk.h>
+ #include <linux/completion.h>
++#include <linux/delay.h>
+ #include <linux/dmaengine.h>
+ #include <linux/dma-direction.h>
+ #include <linux/dma-mapping.h>
+@@ -93,6 +94,14 @@
+
+ #define FSMC_BUSY_WAIT_TIMEOUT (1 * HZ)
+
++/*
++ * According to SPEAr300 Reference Manual (RM0082)
++ * TOUDEL = 7ns (Output delay from the flip-flops to the board)
++ * TINDEL = 5ns (Input delay from the board to the flipflop)
++ */
++#define TOUTDEL 7000
++#define TINDEL 5000
++
+ struct fsmc_nand_timings {
+ u8 tclr;
+ u8 tar;
+@@ -277,7 +286,7 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host,
+ {
+ unsigned long hclk = clk_get_rate(host->clk);
+ unsigned long hclkn = NSEC_PER_SEC / hclk;
+- u32 thiz, thold, twait, tset;
++ u32 thiz, thold, twait, tset, twait_min;
+
+ if (sdrt->tRC_min < 30000)
+ return -EOPNOTSUPP;
+@@ -309,13 +318,6 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host,
+ else if (tims->thold > FSMC_THOLD_MASK)
+ tims->thold = FSMC_THOLD_MASK;
+
+- twait = max(sdrt->tRP_min, sdrt->tWP_min);
+- tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1;
+- if (tims->twait == 0)
+- tims->twait = 1;
+- else if (tims->twait > FSMC_TWAIT_MASK)
+- tims->twait = FSMC_TWAIT_MASK;
+-
+ tset = max(sdrt->tCS_min - sdrt->tWP_min,
+ sdrt->tCEA_max - sdrt->tREA_max);
+ tims->tset = DIV_ROUND_UP(tset / 1000, hclkn) - 1;
+@@ -324,6 +326,21 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host,
+ else if (tims->tset > FSMC_TSET_MASK)
+ tims->tset = FSMC_TSET_MASK;
+
++ /*
++ * According to SPEAr300 Reference Manual (RM0082) which gives more
++ * information related to FSMSC timings than the SPEAr600 one (RM0305),
++ * twait >= tCEA - (tset * TCLK) + TOUTDEL + TINDEL
++ */
++ twait_min = sdrt->tCEA_max - ((tims->tset + 1) * hclkn * 1000)
++ + TOUTDEL + TINDEL;
++ twait = max3(sdrt->tRP_min, sdrt->tWP_min, twait_min);
++
++ tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1;
++ if (tims->twait == 0)
++ tims->twait = 1;
++ else if (tims->twait > FSMC_TWAIT_MASK)
++ tims->twait = FSMC_TWAIT_MASK;
++
+ return 0;
+ }
+
+@@ -438,8 +455,10 @@ static int fsmc_correct_ecc1(struct nand_chip *chip,
+ unsigned char *read_ecc,
+ unsigned char *calc_ecc)
+ {
++ bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
++
+ return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+- chip->ecc.size, false);
++ chip->ecc.size, sm_order);
+ }
+
+ /* Count the number of 0's in buff upto a max of max_bits */
+@@ -662,6 +681,9 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op,
+ instr->ctx.waitrdy.timeout_ms);
+ break;
+ }
++
++ if (instr->delay_ns)
++ ndelay(instr->delay_ns);
+ }
+
+ return ret;
+@@ -1180,9 +1202,14 @@ static int fsmc_nand_suspend(struct device *dev)
+ static int fsmc_nand_resume(struct device *dev)
+ {
+ struct fsmc_nand_data *host = dev_get_drvdata(dev);
++ int ret;
+
+ if (host) {
+- clk_prepare_enable(host->clk);
++ ret = clk_prepare_enable(host->clk);
++ if (ret) {
++ dev_err(dev, "failed to enable clk\n");
++ return ret;
++ }
+ if (host->dev_timings)
+ fsmc_nand_setup(host, host->dev_timings);
+ nand_reset(&host->nand, 0);
+diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
+index fb7a086de35e5..fdf073d2e1b6c 100644
+--- a/drivers/mtd/nand/raw/gpio.c
++++ b/drivers/mtd/nand/raw/gpio.c
+@@ -163,9 +163,8 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
+
+ static int gpio_nand_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -365,6 +364,13 @@ static int gpio_nand_probe(struct platform_device *pdev)
+ if (gpiomtd->nwp && !IS_ERR(gpiomtd->nwp))
+ gpiod_direction_output(gpiomtd->nwp, 1);
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ ret = nand_scan(chip, 1);
+ if (ret)
+ goto err_wp;
+diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+index 4d08e4ab5c1b6..aef722dfdef5f 100644
+--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
++++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+@@ -644,40 +644,55 @@ err_out:
+ * RDN_DELAY = ----------------------- {3}
+ * RP
+ */
+-static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
+- const struct nand_sdr_timings *sdr)
++static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
++ const struct nand_sdr_timings *sdr)
+ {
+ struct gpmi_nfc_hardware_timing *hw = &this->hw;
++ struct resources *r = &this->resources;
+ unsigned int dll_threshold_ps = this->devdata->max_chain_delay;
+ unsigned int period_ps, reference_period_ps;
+ unsigned int data_setup_cycles, data_hold_cycles, addr_setup_cycles;
+ unsigned int tRP_ps;
+ bool use_half_period;
+ int sample_delay_ps, sample_delay_factor;
+- u16 busy_timeout_cycles;
++ unsigned int busy_timeout_cycles;
+ u8 wrn_dly_sel;
++ unsigned long clk_rate, min_rate;
++ u64 busy_timeout_ps;
+
+ if (sdr->tRC_min >= 30000) {
+ /* ONFI non-EDO modes [0-3] */
+ hw->clk_rate = 22000000;
++ min_rate = 0;
+ wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_4_TO_8NS;
+ } else if (sdr->tRC_min >= 25000) {
+ /* ONFI EDO mode 4 */
+ hw->clk_rate = 80000000;
++ min_rate = 22000000;
+ wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY;
+ } else {
+ /* ONFI EDO mode 5 */
+ hw->clk_rate = 100000000;
++ min_rate = 80000000;
+ wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY;
+ }
+
++ clk_rate = clk_round_rate(r->clock[0], hw->clk_rate);
++ if (clk_rate <= min_rate) {
++ dev_err(this->dev, "clock setting: expected %ld, got %ld\n",
++ hw->clk_rate, clk_rate);
++ return -ENOTSUPP;
++ }
++
++ hw->clk_rate = clk_rate;
+ /* SDR core timings are given in picoseconds */
+ period_ps = div_u64((u64)NSEC_PER_SEC * 1000, hw->clk_rate);
+
+ addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps);
+ data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps);
+ data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps);
+- busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps);
++ busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max);
++ busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps);
+
+ hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) |
+ BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) |
+@@ -711,16 +726,35 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
+ hw->ctrl1n |= BF_GPMI_CTRL1_RDN_DELAY(sample_delay_factor) |
+ BM_GPMI_CTRL1_DLL_ENABLE |
+ (use_half_period ? BM_GPMI_CTRL1_HALF_PERIOD : 0);
++ return 0;
+ }
+
+-static void gpmi_nfc_apply_timings(struct gpmi_nand_data *this)
++static int gpmi_nfc_apply_timings(struct gpmi_nand_data *this)
+ {
+ struct gpmi_nfc_hardware_timing *hw = &this->hw;
+ struct resources *r = &this->resources;
+ void __iomem *gpmi_regs = r->gpmi_regs;
+ unsigned int dll_wait_time_us;
++ int ret;
+
+- clk_set_rate(r->clock[0], hw->clk_rate);
++ /* Clock dividers do NOT guarantee a clean clock signal on its output
++ * during the change of the divide factor on i.MX6Q/UL/SX. On i.MX7/8,
++ * all clock dividers provide these guarantee.
++ */
++ if (GPMI_IS_MX6Q(this) || GPMI_IS_MX6SX(this))
++ clk_disable_unprepare(r->clock[0]);
++
++ ret = clk_set_rate(r->clock[0], hw->clk_rate);
++ if (ret) {
++ dev_err(this->dev, "cannot set clock rate to %lu Hz: %d\n", hw->clk_rate, ret);
++ return ret;
++ }
++
++ if (GPMI_IS_MX6Q(this) || GPMI_IS_MX6SX(this)) {
++ ret = clk_prepare_enable(r->clock[0]);
++ if (ret)
++ return ret;
++ }
+
+ writel(hw->timing0, gpmi_regs + HW_GPMI_TIMING0);
+ writel(hw->timing1, gpmi_regs + HW_GPMI_TIMING1);
+@@ -739,6 +773,8 @@ static void gpmi_nfc_apply_timings(struct gpmi_nand_data *this)
+
+ /* Wait for the DLL to settle. */
+ udelay(dll_wait_time_us);
++
++ return 0;
+ }
+
+ static int gpmi_setup_interface(struct nand_chip *chip, int chipnr,
+@@ -746,6 +782,7 @@ static int gpmi_setup_interface(struct nand_chip *chip, int chipnr,
+ {
+ struct gpmi_nand_data *this = nand_get_controller_data(chip);
+ const struct nand_sdr_timings *sdr;
++ int ret;
+
+ /* Retrieve required NAND timings */
+ sdr = nand_get_sdr_timings(conf);
+@@ -761,7 +798,9 @@ static int gpmi_setup_interface(struct nand_chip *chip, int chipnr,
+ return 0;
+
+ /* Do the actual derivation of the controller timings */
+- gpmi_nfc_compute_timings(this, sdr);
++ ret = gpmi_nfc_compute_timings(this, sdr);
++ if (ret)
++ return ret;
+
+ this->hw.must_apply_timings = true;
+
+@@ -1034,15 +1073,6 @@ static int gpmi_get_clks(struct gpmi_nand_data *this)
+ r->clock[i] = clk;
+ }
+
+- if (GPMI_IS_MX6(this))
+- /*
+- * Set the default value for the gpmi clock.
+- *
+- * If you want to use the ONFI nand which is in the
+- * Synchronous Mode, you should change the clock as you need.
+- */
+- clk_set_rate(r->clock[0], 22000000);
+-
+ return 0;
+
+ err_clock:
+@@ -2280,7 +2310,9 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip,
+ */
+ if (this->hw.must_apply_timings) {
+ this->hw.must_apply_timings = false;
+- gpmi_nfc_apply_timings(this);
++ ret = gpmi_nfc_apply_timings(this);
++ if (ret)
++ goto out_pm;
+ }
+
+ dev_dbg(this->dev, "%s: %d instructions\n", __func__, op->ninstrs);
+@@ -2409,6 +2441,7 @@ unmap:
+
+ this->bch = false;
+
++out_pm:
+ pm_runtime_mark_last_busy(this->dev);
+ pm_runtime_put_autosuspend(this->dev);
+
+diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
+index efe0ffe4f1abc..9054559e52dda 100644
+--- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
++++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
+@@ -68,9 +68,14 @@ static struct ingenic_ecc *ingenic_ecc_get(struct device_node *np)
+ struct ingenic_ecc *ecc;
+
+ pdev = of_find_device_by_node(np);
+- if (!pdev || !platform_get_drvdata(pdev))
++ if (!pdev)
+ return ERR_PTR(-EPROBE_DEFER);
+
++ if (!platform_get_drvdata(pdev)) {
++ put_device(&pdev->dev);
++ return ERR_PTR(-EPROBE_DEFER);
++ }
++
+ ecc = platform_get_drvdata(pdev);
+ clk_prepare_enable(ecc->clk);
+
+diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h
+index 2cda439b5e11b..017868f59f222 100644
+--- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h
++++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h
+@@ -36,25 +36,25 @@ int ingenic_ecc_correct(struct ingenic_ecc *ecc,
+ void ingenic_ecc_release(struct ingenic_ecc *ecc);
+ struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np);
+ #else /* CONFIG_MTD_NAND_INGENIC_ECC */
+-int ingenic_ecc_calculate(struct ingenic_ecc *ecc,
++static inline int ingenic_ecc_calculate(struct ingenic_ecc *ecc,
+ struct ingenic_ecc_params *params,
+ const u8 *buf, u8 *ecc_code)
+ {
+ return -ENODEV;
+ }
+
+-int ingenic_ecc_correct(struct ingenic_ecc *ecc,
++static inline int ingenic_ecc_correct(struct ingenic_ecc *ecc,
+ struct ingenic_ecc_params *params, u8 *buf,
+ u8 *ecc_code)
+ {
+ return -ENODEV;
+ }
+
+-void ingenic_ecc_release(struct ingenic_ecc *ecc)
++static inline void ingenic_ecc_release(struct ingenic_ecc *ecc)
+ {
+ }
+
+-struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np)
++static inline struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np)
+ {
+ return ERR_PTR(-ENODEV);
+ }
+diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
+index 0e9d426fe4f2b..b18861bdcdc88 100644
+--- a/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
++++ b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
+@@ -32,6 +32,7 @@ struct jz_soc_info {
+ unsigned long addr_offset;
+ unsigned long cmd_offset;
+ const struct mtd_ooblayout_ops *oob_layout;
++ bool oob_first;
+ };
+
+ struct ingenic_nand_cs {
+@@ -240,6 +241,9 @@ static int ingenic_nand_attach_chip(struct nand_chip *chip)
+ if (chip->bbt_options & NAND_BBT_USE_FLASH)
+ chip->bbt_options |= NAND_BBT_NO_OOB;
+
++ if (nfc->soc_info->oob_first)
++ chip->ecc.read_page = nand_read_page_hwecc_oob_first;
++
+ /* For legacy reasons we use a different layout on the qi,lb60 board. */
+ if (of_machine_is_compatible("qi,lb60"))
+ mtd_set_ooblayout(mtd, &qi_lb60_ooblayout_ops);
+@@ -534,6 +538,7 @@ static const struct jz_soc_info jz4740_soc_info = {
+ .data_offset = 0x00000000,
+ .cmd_offset = 0x00008000,
+ .addr_offset = 0x00010000,
++ .oob_first = true,
+ };
+
+ static const struct jz_soc_info jz4725b_soc_info = {
+diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c
+index b9784f3da7a11..53071e791e179 100644
+--- a/drivers/mtd/nand/raw/intel-nand-controller.c
++++ b/drivers/mtd/nand/raw/intel-nand-controller.c
+@@ -16,6 +16,7 @@
+ #include <linux/mtd/rawnand.h>
+ #include <linux/mtd/nand.h>
+
++#include <linux/of.h>
+ #include <linux/platform_device.h>
+ #include <linux/sched.h>
+ #include <linux/slab.h>
+@@ -580,6 +581,7 @@ static int ebu_nand_probe(struct platform_device *pdev)
+ {
+ struct device *dev = &pdev->dev;
+ struct ebu_nand_controller *ebu_host;
++ struct device_node *chip_np;
+ struct nand_chip *nand;
+ struct mtd_info *mtd;
+ struct resource *res;
+@@ -604,29 +606,42 @@ static int ebu_nand_probe(struct platform_device *pdev)
+ if (IS_ERR(ebu_host->hsnand))
+ return PTR_ERR(ebu_host->hsnand);
+
+- ret = device_property_read_u32(dev, "reg", &cs);
++ chip_np = of_get_next_child(dev->of_node, NULL);
++ if (!chip_np)
++ return dev_err_probe(dev, -EINVAL,
++ "Could not find child node for the NAND chip\n");
++
++ ret = of_property_read_u32(chip_np, "reg", &cs);
+ if (ret) {
+ dev_err(dev, "failed to get chip select: %d\n", ret);
+- return ret;
++ goto err_of_node_put;
+ }
++ if (cs >= MAX_CS) {
++ dev_err(dev, "got invalid chip select: %d\n", cs);
++ ret = -EINVAL;
++ goto err_of_node_put;
++ }
++
+ ebu_host->cs_num = cs;
+
+ resname = devm_kasprintf(dev, GFP_KERNEL, "nand_cs%d", cs);
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, resname);
+ ebu_host->cs[cs].chipaddr = devm_ioremap_resource(dev, res);
+- ebu_host->cs[cs].nand_pa = res->start;
+ if (IS_ERR(ebu_host->cs[cs].chipaddr))
+- return PTR_ERR(ebu_host->cs[cs].chipaddr);
++ goto err_of_node_put;
++ ebu_host->cs[cs].nand_pa = res->start;
+
+ ebu_host->clk = devm_clk_get(dev, NULL);
+- if (IS_ERR(ebu_host->clk))
+- return dev_err_probe(dev, PTR_ERR(ebu_host->clk),
+- "failed to get clock\n");
++ if (IS_ERR(ebu_host->clk)) {
++ ret = dev_err_probe(dev, PTR_ERR(ebu_host->clk),
++ "failed to get clock\n");
++ goto err_of_node_put;
++ }
+
+ ret = clk_prepare_enable(ebu_host->clk);
+ if (ret) {
+ dev_err(dev, "failed to enable clock: %d\n", ret);
+- return ret;
++ goto err_of_node_put;
+ }
+ ebu_host->clk_rate = clk_get_rate(ebu_host->clk);
+
+@@ -655,7 +670,7 @@ static int ebu_nand_probe(struct platform_device *pdev)
+ writel(ebu_host->cs[cs].addr_sel | EBU_ADDR_MASK(5) | EBU_ADDR_SEL_REGEN,
+ ebu_host->ebu + EBU_ADDR_SEL(cs));
+
+- nand_set_flash_node(&ebu_host->chip, dev->of_node);
++ nand_set_flash_node(&ebu_host->chip, chip_np);
+
+ mtd = nand_to_mtd(&ebu_host->chip);
+ if (!mtd->name) {
+@@ -691,6 +706,8 @@ err_cleanup_dma:
+ ebu_dma_cleanup(ebu_host);
+ err_disable_unprepare_clk:
+ clk_disable_unprepare(ebu_host->clk);
++err_of_node_put:
++ of_node_put(chip_np);
+
+ return ret;
+ }
+@@ -711,7 +728,6 @@ static int ebu_nand_remove(struct platform_device *pdev)
+ }
+
+ static const struct of_device_id ebu_nand_match[] = {
+- { .compatible = "intel,nand-controller" },
+ { .compatible = "intel,lgm-ebunand" },
+ {}
+ };
+diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
+index 2455a581fd70c..9f662d5cf7fac 100644
+--- a/drivers/mtd/nand/raw/marvell_nand.c
++++ b/drivers/mtd/nand/raw/marvell_nand.c
+@@ -2443,6 +2443,12 @@ static int marvell_nfc_setup_interface(struct nand_chip *chip, int chipnr,
+ NDTR1_WAIT_MODE;
+ }
+
++ /*
++ * Reset nfc->selected_chip so the next command will cause the timing
++ * registers to be updated in marvell_nfc_select_target().
++ */
++ nfc->selected_chip = NULL;
++
+ return 0;
+ }
+
+@@ -2672,7 +2678,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc,
+ chip->controller = &nfc->controller;
+ nand_set_flash_node(chip, np);
+
+- if (!of_property_read_bool(np, "marvell,nand-keep-config"))
++ if (of_property_read_bool(np, "marvell,nand-keep-config"))
+ chip->options |= NAND_KEEP_TIMINGS;
+
+ mtd = nand_to_mtd(chip);
+@@ -2879,10 +2885,6 @@ static int marvell_nfc_init(struct marvell_nfc *nfc)
+ regmap_update_bits(sysctrl_base, GENCONF_CLK_GATING_CTRL,
+ GENCONF_CLK_GATING_CTRL_ND_GATE,
+ GENCONF_CLK_GATING_CTRL_ND_GATE);
+-
+- regmap_update_bits(sysctrl_base, GENCONF_ND_CLK_CTRL,
+- GENCONF_ND_CLK_CTRL_EN,
+- GENCONF_ND_CLK_CTRL_EN);
+ }
+
+ /* Configure the DMA if appropriate */
+diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
+index ac3be92872d06..9d441965321ad 100644
+--- a/drivers/mtd/nand/raw/meson_nand.c
++++ b/drivers/mtd/nand/raw/meson_nand.c
+@@ -72,6 +72,7 @@
+ #define GENCMDIADDRH(aih, addr) ((aih) | (((addr) >> 16) & 0xffff))
+
+ #define DMA_DIR(dir) ((dir) ? NFC_CMD_N2M : NFC_CMD_M2N)
++#define DMA_ADDR_ALIGN 8
+
+ #define ECC_CHECK_RETURN_FF (-1)
+
+@@ -172,6 +173,7 @@ struct meson_nfc {
+
+ dma_addr_t daddr;
+ dma_addr_t iaddr;
++ u32 info_bytes;
+
+ unsigned long assigned_cs;
+ };
+@@ -275,7 +277,7 @@ static void meson_nfc_cmd_access(struct nand_chip *nand, int raw, bool dir,
+
+ if (raw) {
+ len = mtd->writesize + mtd->oobsize;
+- cmd = (len & GENMASK(5, 0)) | scrambler | DMA_DIR(dir);
++ cmd = (len & GENMASK(13, 0)) | scrambler | DMA_DIR(dir);
+ writel(cmd, nfc->reg_base + NFC_REG_CMD);
+ return;
+ }
+@@ -454,7 +456,7 @@ static int meson_nfc_ecc_correct(struct nand_chip *nand, u32 *bitflips,
+ if (ECC_ERR_CNT(*info) != ECC_UNCORRECTABLE) {
+ mtd->ecc_stats.corrected += ECC_ERR_CNT(*info);
+ *bitflips = max_t(u32, *bitflips, ECC_ERR_CNT(*info));
+- *correct_bitmap |= 1 >> i;
++ *correct_bitmap |= BIT_ULL(i);
+ continue;
+ }
+ if ((nand->options & NAND_NEED_SCRAMBLING) &&
+@@ -499,6 +501,7 @@ static int meson_nfc_dma_buffer_setup(struct nand_chip *nand, void *databuf,
+ nfc->daddr, datalen, dir);
+ return ret;
+ }
++ nfc->info_bytes = infolen;
+ cmd = GENCMDIADDRL(NFC_CMD_AIL, nfc->iaddr);
+ writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+@@ -516,8 +519,10 @@ static void meson_nfc_dma_buffer_release(struct nand_chip *nand,
+ struct meson_nfc *nfc = nand_get_controller_data(nand);
+
+ dma_unmap_single(nfc->dev, nfc->daddr, datalen, dir);
+- if (infolen)
++ if (infolen) {
+ dma_unmap_single(nfc->dev, nfc->iaddr, infolen, dir);
++ nfc->info_bytes = 0;
++ }
+ }
+
+ static int meson_nfc_read_buf(struct nand_chip *nand, u8 *buf, int len)
+@@ -536,7 +541,7 @@ static int meson_nfc_read_buf(struct nand_chip *nand, u8 *buf, int len)
+ if (ret)
+ goto out;
+
+- cmd = NFC_CMD_N2M | (len & GENMASK(5, 0));
++ cmd = NFC_CMD_N2M | (len & GENMASK(13, 0));
+ writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+ meson_nfc_drain_cmd(nfc);
+@@ -560,7 +565,7 @@ static int meson_nfc_write_buf(struct nand_chip *nand, u8 *buf, int len)
+ if (ret)
+ return ret;
+
+- cmd = NFC_CMD_M2N | (len & GENMASK(5, 0));
++ cmd = NFC_CMD_M2N | (len & GENMASK(13, 0));
+ writel(cmd, nfc->reg_base + NFC_REG_CMD);
+
+ meson_nfc_drain_cmd(nfc);
+@@ -706,6 +711,8 @@ static void meson_nfc_check_ecc_pages_valid(struct meson_nfc *nfc,
+ usleep_range(10, 15);
+ /* info is updated by nfc dma engine*/
+ smp_rmb();
++ dma_sync_single_for_cpu(nfc->dev, nfc->iaddr, nfc->info_bytes,
++ DMA_FROM_DEVICE);
+ ret = *info & ECC_COMPLETE;
+ } while (!ret);
+ }
+@@ -800,7 +807,7 @@ static int meson_nfc_read_page_hwecc(struct nand_chip *nand, u8 *buf,
+ u8 *data = buf + i * ecc->size;
+ u8 *oob = nand->oob_poi + i * (ecc->bytes + 2);
+
+- if (correct_bitmap & (1 << i))
++ if (correct_bitmap & BIT_ULL(i))
+ continue;
+ ret = nand_check_erased_ecc_chunk(data, ecc->size,
+ oob, ecc->bytes + 2,
+@@ -832,6 +839,9 @@ static int meson_nfc_read_oob(struct nand_chip *nand, int page)
+
+ static bool meson_nfc_is_buffer_dma_safe(const void *buffer)
+ {
++ if ((uintptr_t)buffer % DMA_ADDR_ALIGN)
++ return false;
++
+ if (virt_addr_valid(buffer) && (!object_is_on_stack(buffer)))
+ return true;
+ return false;
+@@ -1170,7 +1180,6 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
+ struct meson_nfc *nfc = nand_get_controller_data(nand);
+ struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+ struct mtd_info *mtd = nand_to_mtd(nand);
+- int nsectors = mtd->writesize / 1024;
+ int ret;
+
+ if (!mtd->name) {
+@@ -1188,7 +1197,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
+ nand->options |= NAND_NO_SUBPAGE_WRITE;
+
+ ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps,
+- mtd->oobsize - 2 * nsectors);
++ mtd->oobsize - 2);
+ if (ret) {
+ dev_err(nfc->dev, "failed to ECC init\n");
+ return -EINVAL;
+@@ -1307,7 +1316,6 @@ static int meson_nfc_nand_chip_cleanup(struct meson_nfc *nfc)
+ if (ret)
+ return ret;
+
+- meson_nfc_free_buffer(&meson_chip->nand);
+ nand_cleanup(&meson_chip->nand);
+ list_del(&meson_chip->node);
+ }
+diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
+index bcd4a556c959c..5b9271b9c3265 100644
+--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
++++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
+@@ -291,7 +291,6 @@ static int ads5121_chipselect_init(struct mtd_info *mtd)
+ /* Control chips select signal on ADS5121 board */
+ static void ads5121_select_chip(struct nand_chip *nand, int chip)
+ {
+- struct mtd_info *mtd = nand_to_mtd(nand);
+ struct mpc5121_nfc_prv *prv = nand_get_controller_data(nand);
+ u8 v;
+
+@@ -605,9 +604,8 @@ static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd)
+
+ static int mpc5121_nfc_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -772,6 +770,13 @@ static int mpc5121_nfc_probe(struct platform_device *op)
+ goto error;
+ }
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ /* Detect NAND chips */
+ retval = nand_scan(chip, be32_to_cpup(chips_no));
+ if (retval) {
+diff --git a/drivers/mtd/nand/raw/mtk_ecc.c b/drivers/mtd/nand/raw/mtk_ecc.c
+index c437d97debb8a..ec9d1fb07006f 100644
+--- a/drivers/mtd/nand/raw/mtk_ecc.c
++++ b/drivers/mtd/nand/raw/mtk_ecc.c
+@@ -43,6 +43,7 @@
+
+ struct mtk_ecc_caps {
+ u32 err_mask;
++ u32 err_shift;
+ const u8 *ecc_strength;
+ const u32 *ecc_regs;
+ u8 num_ecc_strength;
+@@ -76,7 +77,7 @@ static const u8 ecc_strength_mt2712[] = {
+ };
+
+ static const u8 ecc_strength_mt7622[] = {
+- 4, 6, 8, 10, 12, 14, 16
++ 4, 6, 8, 10, 12
+ };
+
+ enum mtk_ecc_regs {
+@@ -221,7 +222,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats,
+ for (i = 0; i < sectors; i++) {
+ offset = (i >> 2) << 2;
+ err = readl(ecc->regs + ECC_DECENUM0 + offset);
+- err = err >> ((i % 4) * 8);
++ err = err >> ((i % 4) * ecc->caps->err_shift);
+ err &= ecc->caps->err_mask;
+ if (err == ecc->caps->err_mask) {
+ /* uncorrectable errors */
+@@ -449,6 +450,7 @@ EXPORT_SYMBOL(mtk_ecc_get_parity_bits);
+
+ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
+ .err_mask = 0x3f,
++ .err_shift = 8,
+ .ecc_strength = ecc_strength_mt2701,
+ .ecc_regs = mt2701_ecc_regs,
+ .num_ecc_strength = 20,
+@@ -459,6 +461,7 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
+
+ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
+ .err_mask = 0x7f,
++ .err_shift = 8,
+ .ecc_strength = ecc_strength_mt2712,
+ .ecc_regs = mt2712_ecc_regs,
+ .num_ecc_strength = 23,
+@@ -468,10 +471,11 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
+ };
+
+ static const struct mtk_ecc_caps mtk_ecc_caps_mt7622 = {
+- .err_mask = 0x3f,
++ .err_mask = 0x1f,
++ .err_shift = 5,
+ .ecc_strength = ecc_strength_mt7622,
+ .ecc_regs = mt7622_ecc_regs,
+- .num_ecc_strength = 7,
++ .num_ecc_strength = 5,
+ .ecc_mode_shift = 4,
+ .parity_bits = 13,
+ .pg_irq_sel = 0,
+diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
+index 3d6c6e8805207..881e768f636f8 100644
+--- a/drivers/mtd/nand/raw/nand_base.c
++++ b/drivers/mtd/nand/raw/nand_base.c
+@@ -335,16 +335,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs)
+ *
+ * Return: -EBUSY if the chip has been suspended, 0 otherwise
+ */
+-static int nand_get_device(struct nand_chip *chip)
++static void nand_get_device(struct nand_chip *chip)
+ {
+- mutex_lock(&chip->lock);
+- if (chip->suspended) {
++ /* Wait until the device is resumed. */
++ while (1) {
++ mutex_lock(&chip->lock);
++ if (!chip->suspended) {
++ mutex_lock(&chip->controller->lock);
++ return;
++ }
+ mutex_unlock(&chip->lock);
+- return -EBUSY;
+- }
+- mutex_lock(&chip->controller->lock);
+
+- return 0;
++ wait_event(chip->resume_wq, !chip->suspended);
++ }
+ }
+
+ /**
+@@ -573,9 +576,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs)
+ nand_erase_nand(chip, &einfo, 0);
+
+ /* Write bad block marker to OOB */
+- ret = nand_get_device(chip);
+- if (ret)
+- return ret;
++ nand_get_device(chip);
+
+ ret = nand_markbad_bbm(chip, ofs);
+ nand_release_device(chip);
+@@ -926,7 +927,7 @@ int nand_choose_best_sdr_timings(struct nand_chip *chip,
+ struct nand_sdr_timings *spec_timings)
+ {
+ const struct nand_controller_ops *ops = chip->controller->ops;
+- int best_mode = 0, mode, ret;
++ int best_mode = 0, mode, ret = -EOPNOTSUPP;
+
+ iface->type = NAND_SDR_IFACE;
+
+@@ -977,7 +978,7 @@ int nand_choose_best_nvddr_timings(struct nand_chip *chip,
+ struct nand_nvddr_timings *spec_timings)
+ {
+ const struct nand_controller_ops *ops = chip->controller->ops;
+- int best_mode = 0, mode, ret;
++ int best_mode = 0, mode, ret = -EOPNOTSUPP;
+
+ iface->type = NAND_NVDDR_IFACE;
+
+@@ -1837,7 +1838,7 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock)
+ NAND_OP_CMD(NAND_CMD_ERASE1, 0),
+ NAND_OP_ADDR(2, addrs, 0),
+ NAND_OP_CMD(NAND_CMD_ERASE2,
+- NAND_COMMON_TIMING_MS(conf, tWB_max)),
++ NAND_COMMON_TIMING_NS(conf, tWB_max)),
+ NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tBERS_max),
+ 0),
+ };
+@@ -3160,6 +3161,73 @@ static int nand_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
+ return max_bitflips;
+ }
+
++/**
++ * nand_read_page_hwecc_oob_first - Hardware ECC page read with ECC
++ * data read from OOB area
++ * @chip: nand chip info structure
++ * @buf: buffer to store read data
++ * @oob_required: caller requires OOB data read to chip->oob_poi
++ * @page: page number to read
++ *
++ * Hardware ECC for large page chips, which requires the ECC data to be
++ * extracted from the OOB before the actual data is read.
++ */
++int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf,
++ int oob_required, int page)
++{
++ struct mtd_info *mtd = nand_to_mtd(chip);
++ int i, eccsize = chip->ecc.size, ret;
++ int eccbytes = chip->ecc.bytes;
++ int eccsteps = chip->ecc.steps;
++ uint8_t *p = buf;
++ uint8_t *ecc_code = chip->ecc.code_buf;
++ unsigned int max_bitflips = 0;
++
++ /* Read the OOB area first */
++ ret = nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize);
++ if (ret)
++ return ret;
++
++ /* Move read cursor to start of page */
++ ret = nand_change_read_column_op(chip, 0, NULL, 0, false);
++ if (ret)
++ return ret;
++
++ ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0,
++ chip->ecc.total);
++ if (ret)
++ return ret;
++
++ for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
++ int stat;
++
++ chip->ecc.hwctl(chip, NAND_ECC_READ);
++
++ ret = nand_read_data_op(chip, p, eccsize, false, false);
++ if (ret)
++ return ret;
++
++ stat = chip->ecc.correct(chip, p, &ecc_code[i], NULL);
++ if (stat == -EBADMSG &&
++ (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) {
++ /* check for empty pages with bitflips */
++ stat = nand_check_erased_ecc_chunk(p, eccsize,
++ &ecc_code[i],
++ eccbytes, NULL, 0,
++ chip->ecc.strength);
++ }
++
++ if (stat < 0) {
++ mtd->ecc_stats.failed++;
++ } else {
++ mtd->ecc_stats.corrected += stat;
++ max_bitflips = max_t(unsigned int, max_bitflips, stat);
++ }
++ }
++ return max_bitflips;
++}
++EXPORT_SYMBOL_GPL(nand_read_page_hwecc_oob_first);
++
+ /**
+ * nand_read_page_syndrome - [REPLACEABLE] hardware ECC syndrome based page read
+ * @chip: nand chip info structure
+@@ -3756,9 +3824,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
+ ops->mode != MTD_OPS_RAW)
+ return -ENOTSUPP;
+
+- ret = nand_get_device(chip);
+- if (ret)
+- return ret;
++ nand_get_device(chip);
+
+ if (!ops->datbuf)
+ ret = nand_do_read_oob(chip, from, ops);
+@@ -4345,13 +4411,11 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
+ struct mtd_oob_ops *ops)
+ {
+ struct nand_chip *chip = mtd_to_nand(mtd);
+- int ret;
++ int ret = 0;
+
+ ops->retlen = 0;
+
+- ret = nand_get_device(chip);
+- if (ret)
+- return ret;
++ nand_get_device(chip);
+
+ switch (ops->mode) {
+ case MTD_OPS_PLACE_OOB:
+@@ -4411,9 +4475,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
+ return -EIO;
+
+ /* Grab the lock and see if the device is available */
+- ret = nand_get_device(chip);
+- if (ret)
+- return ret;
++ nand_get_device(chip);
+
+ /* Shift to get first page */
+ page = (int)(instr->addr >> chip->page_shift);
+@@ -4500,7 +4562,7 @@ static void nand_sync(struct mtd_info *mtd)
+ pr_debug("%s: called\n", __func__);
+
+ /* Grab the lock and see if the device is available */
+- WARN_ON(nand_get_device(chip));
++ nand_get_device(chip);
+ /* Release it and go back */
+ nand_release_device(chip);
+ }
+@@ -4517,9 +4579,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
+ int ret;
+
+ /* Select the NAND device */
+- ret = nand_get_device(chip);
+- if (ret)
+- return ret;
++ nand_get_device(chip);
+
+ nand_select_target(chip, chipnr);
+
+@@ -4590,6 +4650,8 @@ static void nand_resume(struct mtd_info *mtd)
+ __func__);
+ }
+ mutex_unlock(&chip->lock);
++
++ wake_up_all(&chip->resume_wq);
+ }
+
+ /**
+@@ -5367,6 +5429,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
+ chip->cur_cs = -1;
+
+ mutex_init(&chip->lock);
++ init_waitqueue_head(&chip->resume_wq);
+
+ /* Enforce the right timings for reset/detection */
+ chip->current_interface_config = nand_get_reset_interface_config();
+diff --git a/drivers/mtd/nand/raw/omap_elm.c b/drivers/mtd/nand/raw/omap_elm.c
+index 2b21ce04b3ec6..1a48347be3fe4 100644
+--- a/drivers/mtd/nand/raw/omap_elm.c
++++ b/drivers/mtd/nand/raw/omap_elm.c
+@@ -177,17 +177,17 @@ static void elm_load_syndrome(struct elm_info *info,
+ switch (info->bch_type) {
+ case BCH8_ECC:
+ /* syndrome fragment 0 = ecc[9-12B] */
+- val = cpu_to_be32(*(u32 *) &ecc[9]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[9]);
+ elm_write_reg(info, offset, val);
+
+ /* syndrome fragment 1 = ecc[5-8B] */
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[5]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[5]);
+ elm_write_reg(info, offset, val);
+
+ /* syndrome fragment 2 = ecc[1-4B] */
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[1]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[1]);
+ elm_write_reg(info, offset, val);
+
+ /* syndrome fragment 3 = ecc[0B] */
+@@ -197,35 +197,35 @@ static void elm_load_syndrome(struct elm_info *info,
+ break;
+ case BCH4_ECC:
+ /* syndrome fragment 0 = ecc[20-52b] bits */
+- val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) |
++ val = ((__force u32)cpu_to_be32(*(u32 *)&ecc[3]) >> 4) |
+ ((ecc[2] & 0xf) << 28);
+ elm_write_reg(info, offset, val);
+
+ /* syndrome fragment 1 = ecc[0-20b] bits */
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 12;
+ elm_write_reg(info, offset, val);
+ break;
+ case BCH16_ECC:
+- val = cpu_to_be32(*(u32 *) &ecc[22]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[22]);
+ elm_write_reg(info, offset, val);
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[18]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[18]);
+ elm_write_reg(info, offset, val);
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[14]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[14]);
+ elm_write_reg(info, offset, val);
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[10]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[10]);
+ elm_write_reg(info, offset, val);
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[6]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[6]);
+ elm_write_reg(info, offset, val);
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[2]);
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[2]);
+ elm_write_reg(info, offset, val);
+ offset += 4;
+- val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16;
++ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 16;
+ elm_write_reg(info, offset, val);
+ break;
+ default:
+diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
+index 66211c9311d2f..2c87c7d892058 100644
+--- a/drivers/mtd/nand/raw/orion_nand.c
++++ b/drivers/mtd/nand/raw/orion_nand.c
+@@ -85,9 +85,8 @@ static void orion_nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
+
+ static int orion_nand_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -190,6 +189,13 @@ static int __init orion_nand_probe(struct platform_device *pdev)
+ return ret;
+ }
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ nc->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ ret = nand_scan(nc, 1);
+ if (ret)
+ goto no_dev;
+diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
+index 789f33312c15f..c176036453ed9 100644
+--- a/drivers/mtd/nand/raw/pasemi_nand.c
++++ b/drivers/mtd/nand/raw/pasemi_nand.c
+@@ -75,9 +75,8 @@ static int pasemi_device_ready(struct nand_chip *chip)
+
+ static int pasemi_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -154,6 +153,13 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
+ /* Enable the following for a flash based bad block table */
+ chip->bbt_options = NAND_BBT_USE_FLASH;
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ /* Scan to find existence of the device */
+ err = nand_scan(chip, 1);
+ if (err)
+diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c
+index 8a91e069ee2e9..3c6f6aff649f8 100644
+--- a/drivers/mtd/nand/raw/pl35x-nand-controller.c
++++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c
+@@ -1062,7 +1062,7 @@ static int pl35x_nand_chip_init(struct pl35x_nandc *nfc,
+ chip->controller = &nfc->controller;
+ mtd = nand_to_mtd(chip);
+ mtd->dev.parent = nfc->dev;
+- nand_set_flash_node(chip, nfc->dev->of_node);
++ nand_set_flash_node(chip, np);
+ if (!mtd->name) {
+ mtd->name = devm_kasprintf(nfc->dev, GFP_KERNEL,
+ "%s", PL35X_NANDC_DRIVER_NAME);
+diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
+index 7711e1020c21c..0ee08c42cc35b 100644
+--- a/drivers/mtd/nand/raw/plat_nand.c
++++ b/drivers/mtd/nand/raw/plat_nand.c
+@@ -21,9 +21,8 @@ struct plat_nand_data {
+
+ static int plat_nand_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -94,6 +93,13 @@ static int plat_nand_probe(struct platform_device *pdev)
+ goto out;
+ }
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ /* Scan to find existence of the device */
+ err = nand_scan(&data->chip, pdata->chip.nr_chips);
+ if (err)
+diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
+index 04e6f7b267064..e972bee60e7c8 100644
+--- a/drivers/mtd/nand/raw/qcom_nandc.c
++++ b/drivers/mtd/nand/raw/qcom_nandc.c
+@@ -2,7 +2,6 @@
+ /*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ */
+-
+ #include <linux/clk.h>
+ #include <linux/slab.h>
+ #include <linux/bitops.h>
+@@ -2642,10 +2641,23 @@ static int qcom_nand_attach_chip(struct nand_chip *chip)
+ ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+
+ mtd_set_ooblayout(mtd, &qcom_nand_ooblayout_ops);
++ /* Free the initially allocated BAM transaction for reading the ONFI params */
++ if (nandc->props->is_bam)
++ free_bam_transaction(nandc);
+
+ nandc->max_cwperpage = max_t(unsigned int, nandc->max_cwperpage,
+ cwperpage);
+
++ /* Now allocate the BAM transaction based on updated max_cwperpage */
++ if (nandc->props->is_bam) {
++ nandc->bam_txn = alloc_bam_transaction(nandc);
++ if (!nandc->bam_txn) {
++ dev_err(nandc->dev,
++ "failed to allocate bam transaction\n");
++ return -ENOMEM;
++ }
++ }
++
+ /*
+ * DATA_UD_BYTES varies based on whether the read/write command protects
+ * spare data with ECC too. We protect spare data by default, so we set
+@@ -2946,17 +2958,6 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
+ if (ret)
+ return ret;
+
+- if (nandc->props->is_bam) {
+- free_bam_transaction(nandc);
+- nandc->bam_txn = alloc_bam_transaction(nandc);
+- if (!nandc->bam_txn) {
+- dev_err(nandc->dev,
+- "failed to allocate bam transaction\n");
+- nand_cleanup(chip);
+- return -ENOMEM;
+- }
+- }
+-
+ ret = mtd_device_parse_register(mtd, probes, NULL, NULL, 0);
+ if (ret)
+ nand_cleanup(chip);
+@@ -3063,10 +3064,6 @@ static int qcom_nandc_probe(struct platform_device *pdev)
+ if (dma_mapping_error(dev, nandc->base_dma))
+ return -ENXIO;
+
+- ret = qcom_nandc_alloc(nandc);
+- if (ret)
+- goto err_nandc_alloc;
+-
+ ret = clk_prepare_enable(nandc->core_clk);
+ if (ret)
+ goto err_core_clk;
+@@ -3075,6 +3072,10 @@ static int qcom_nandc_probe(struct platform_device *pdev)
+ if (ret)
+ goto err_aon_clk;
+
++ ret = qcom_nandc_alloc(nandc);
++ if (ret)
++ goto err_nandc_alloc;
++
+ ret = qcom_nandc_setup(nandc);
+ if (ret)
+ goto err_setup;
+@@ -3086,15 +3087,14 @@ static int qcom_nandc_probe(struct platform_device *pdev)
+ return 0;
+
+ err_setup:
++ qcom_nandc_unalloc(nandc);
++err_nandc_alloc:
+ clk_disable_unprepare(nandc->aon_clk);
+ err_aon_clk:
+ clk_disable_unprepare(nandc->core_clk);
+ err_core_clk:
+- qcom_nandc_unalloc(nandc);
+-err_nandc_alloc:
+ dma_unmap_resource(dev, res->start, resource_size(res),
+ DMA_BIDIRECTIONAL, 0);
+-
+ return ret;
+ }
+
+diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c
+index b5405bc7ca3a3..99242bd684375 100644
+--- a/drivers/mtd/nand/raw/rockchip-nand-controller.c
++++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c
+@@ -562,9 +562,10 @@ static int rk_nfc_write_page_raw(struct nand_chip *chip, const u8 *buf,
+ * BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3
+ *
+ * The rk_nfc_ooblayout_free() function already has reserved
+- * these 4 bytes with:
++ * these 4 bytes together with 2 bytes for BBM
++ * by reducing it's length:
+ *
+- * oob_region->offset = NFC_SYS_DATA_SIZE + 2;
++ * oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
+ */
+ if (!i)
+ memcpy(rk_nfc_oob_ptr(chip, i),
+@@ -597,7 +598,7 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
+ int pages_per_blk = mtd->erasesize / mtd->writesize;
+ int ret = 0, i, boot_rom_mode = 0;
+ dma_addr_t dma_data, dma_oob;
+- u32 reg;
++ u32 tmp;
+ u8 *oob;
+
+ nand_prog_page_begin_op(chip, page, 0, NULL, 0);
+@@ -624,6 +625,13 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
+ *
+ * 0xFF 0xFF 0xFF 0xFF | BBM OOB1 OOB2 OOB3 | ...
+ *
++ * The code here just swaps the first 4 bytes with the last
++ * 4 bytes without losing any data.
++ *
++ * The chip->oob_poi data layout:
++ *
++ * BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3
++ *
+ * Configure the ECC algorithm supported by the boot ROM.
+ */
+ if ((page < (pages_per_blk * rknand->boot_blks)) &&
+@@ -634,21 +642,17 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
+ }
+
+ for (i = 0; i < ecc->steps; i++) {
+- if (!i) {
+- reg = 0xFFFFFFFF;
+- } else {
++ if (!i)
++ oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
++ else
+ oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
+- reg = oob[0] | oob[1] << 8 | oob[2] << 16 |
+- oob[3] << 24;
+- }
+
+- if (!i && boot_rom_mode)
+- reg = (page & (pages_per_blk - 1)) * 4;
++ tmp = oob[0] | oob[1] << 8 | oob[2] << 16 | oob[3] << 24;
+
+ if (nfc->cfg->type == NFC_V9)
+- nfc->oob_buf[i] = reg;
++ nfc->oob_buf[i] = tmp;
+ else
+- nfc->oob_buf[i * (oob_step / 4)] = reg;
++ nfc->oob_buf[i * (oob_step / 4)] = tmp;
+ }
+
+ dma_data = dma_map_single(nfc->dev, (void *)nfc->page_buf,
+@@ -811,12 +815,17 @@ static int rk_nfc_read_page_hwecc(struct nand_chip *chip, u8 *buf, int oob_on,
+ goto timeout_err;
+ }
+
+- for (i = 1; i < ecc->steps; i++) {
+- oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
++ for (i = 0; i < ecc->steps; i++) {
++ if (!i)
++ oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
++ else
++ oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
++
+ if (nfc->cfg->type == NFC_V9)
+ tmp = nfc->oob_buf[i];
+ else
+ tmp = nfc->oob_buf[i * (oob_step / 4)];
++
+ *oob++ = (u8)tmp;
+ *oob++ = (u8)(tmp >> 8);
+ *oob++ = (u8)(tmp >> 16);
+@@ -935,12 +944,8 @@ static int rk_nfc_ooblayout_free(struct mtd_info *mtd, int section,
+ if (section)
+ return -ERANGE;
+
+- /*
+- * The beginning of the OOB area stores the reserved data for the NFC,
+- * the size of the reserved data is NFC_SYS_DATA_SIZE bytes.
+- */
+ oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
+- oob_region->offset = NFC_SYS_DATA_SIZE + 2;
++ oob_region->offset = 2;
+
+ return 0;
+ }
+diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
+index 13df4bdf792af..8f89e2d3d817f 100644
+--- a/drivers/mtd/nand/raw/sh_flctl.c
++++ b/drivers/mtd/nand/raw/sh_flctl.c
+@@ -384,7 +384,8 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
+ dma_addr_t dma_addr;
+ dma_cookie_t cookie;
+ uint32_t reg;
+- int ret;
++ int ret = 0;
++ unsigned long time_left;
+
+ if (dir == DMA_FROM_DEVICE) {
+ chan = flctl->chan_fifo0_rx;
+@@ -425,13 +426,14 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
+ goto out;
+ }
+
+- ret =
++ time_left =
+ wait_for_completion_timeout(&flctl->dma_complete,
+ msecs_to_jiffies(3000));
+
+- if (ret <= 0) {
++ if (time_left == 0) {
+ dmaengine_terminate_all(chan);
+ dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n");
++ ret = -ETIMEDOUT;
+ }
+
+ out:
+@@ -441,7 +443,7 @@ out:
+
+ dma_unmap_single(chan->device->dev, dma_addr, len, dir);
+
+- /* ret > 0 is success */
++ /* ret == 0 is success */
+ return ret;
+ }
+
+@@ -465,7 +467,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+
+ /* initiate DMA transfer */
+ if (flctl->chan_fifo0_rx && rlen >= 32 &&
+- flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0)
++ !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE))
+ goto convert; /* DMA success */
+
+ /* do polling transfer */
+@@ -524,7 +526,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
+
+ /* initiate DMA transfer */
+ if (flctl->chan_fifo0_tx && rlen >= 32 &&
+- flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0)
++ !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE))
+ return; /* DMA success */
+
+ /* do polling transfer */
+diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
+index 70f8305c9b6e1..fb39cc7ebce03 100644
+--- a/drivers/mtd/nand/raw/socrates_nand.c
++++ b/drivers/mtd/nand/raw/socrates_nand.c
+@@ -119,9 +119,8 @@ static int socrates_nand_device_ready(struct nand_chip *nand_chip)
+
+ static int socrates_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -175,6 +174,13 @@ static int socrates_nand_probe(struct platform_device *ofdev)
+ /* TODO: I have no idea what real delay is. */
+ nand_chip->legacy.chip_delay = 20; /* 20us command delay time */
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ dev_set_drvdata(&ofdev->dev, host);
+
+ res = nand_scan(nand_chip, 1);
+diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
+index 1c277fbb91f2b..1ac8c4887ce03 100644
+--- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c
++++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
+@@ -1527,6 +1527,9 @@ static int stm32_fmc2_nfc_setup_interface(struct nand_chip *chip, int chipnr,
+ if (IS_ERR(sdrt))
+ return PTR_ERR(sdrt);
+
++ if (conf->timings.mode > 3)
++ return -EOPNOTSUPP;
++
+ if (chipnr == NAND_DATA_IFACE_CHECK_ONLY)
+ return 0;
+
+diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
+index 26751976e5026..236fd8c5a958f 100644
+--- a/drivers/mtd/nand/raw/xway_nand.c
++++ b/drivers/mtd/nand/raw/xway_nand.c
+@@ -148,9 +148,8 @@ static void xway_write_buf(struct nand_chip *chip, const u_char *buf, int len)
+
+ static int xway_attach_chip(struct nand_chip *chip)
+ {
+- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+-
+- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
++ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
++ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+ return 0;
+@@ -219,6 +218,13 @@ static int xway_nand_probe(struct platform_device *pdev)
+ | NAND_CON_SE_P | NAND_CON_WP_P | NAND_CON_PRE_P
+ | cs_flag, EBU_NAND_CON);
+
++ /*
++ * This driver assumes that the default ECC engine should be TYPE_SOFT.
++ * Set ->engine_type before registering the NAND devices in order to
++ * provide a driver specific default value.
++ */
++ data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
++
+ /* Scan to find existence of the device */
+ err = nand_scan(&data->chip, 1);
+ if (err)
+diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
+index 1dd1c58980934..da77ab20296ea 100644
+--- a/drivers/mtd/nand/spi/gigadevice.c
++++ b/drivers/mtd/nand/spi/gigadevice.c
+@@ -39,6 +39,14 @@ static SPINAND_OP_VARIANTS(read_cache_variants_f,
+ SPINAND_PAGE_READ_FROM_CACHE_OP_3A(true, 0, 1, NULL, 0),
+ SPINAND_PAGE_READ_FROM_CACHE_OP_3A(false, 0, 0, NULL, 0));
+
++static SPINAND_OP_VARIANTS(read_cache_variants_1gq5,
++ SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
++ SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
++ SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
++ SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
++ SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
++ SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
++
+ static SPINAND_OP_VARIANTS(write_cache_variants,
+ SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+ SPINAND_PROG_LOAD(true, 0, NULL, 0));
+@@ -339,7 +347,7 @@ static const struct spinand_info gigadevice_spinand_table[] = {
+ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x51),
+ NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
+ NAND_ECCREQ(4, 512),
+- SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
++ SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
+ &write_cache_variants,
+ &update_cache_variants),
+ SPINAND_HAS_QE_BIT,
+diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c
+index 7380b1ebaccd5..a80427c131216 100644
+--- a/drivers/mtd/nand/spi/toshiba.c
++++ b/drivers/mtd/nand/spi/toshiba.c
+@@ -73,7 +73,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
+ {
+ struct nand_device *nand = spinand_to_nand(spinand);
+ u8 mbf = 0;
+- struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf);
++ struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf);
+
+ switch (status & STATUS_ECC_MASK) {
+ case STATUS_ECC_NO_BITFLIPS:
+@@ -92,7 +92,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
+ if (spi_mem_exec_op(spinand->spimem, &op))
+ return nanddev_get_ecc_conf(nand)->strength;
+
+- mbf >>= 4;
++ mbf = *(spinand->scratchbuf) >> 4;
+
+ if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
+ return nanddev_get_ecc_conf(nand)->strength;
+diff --git a/drivers/mtd/parsers/bcm47xxpart.c b/drivers/mtd/parsers/bcm47xxpart.c
+index 6012a10f10c83..13daf9bffd081 100644
+--- a/drivers/mtd/parsers/bcm47xxpart.c
++++ b/drivers/mtd/parsers/bcm47xxpart.c
+@@ -233,11 +233,11 @@ static int bcm47xxpart_parse(struct mtd_info *master,
+ }
+
+ /* Read middle of the block */
+- err = mtd_read(master, offset + 0x8000, 0x4, &bytes_read,
++ err = mtd_read(master, offset + (blocksize / 2), 0x4, &bytes_read,
+ (uint8_t *)buf);
+ if (err && !mtd_is_bitflip(err)) {
+ pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+- offset, err);
++ offset + (blocksize / 2), err);
+ continue;
+ }
+
+diff --git a/drivers/mtd/parsers/ofpart_bcm4908.c b/drivers/mtd/parsers/ofpart_bcm4908.c
+index 0eddef4c198ec..bb072a0940e48 100644
+--- a/drivers/mtd/parsers/ofpart_bcm4908.c
++++ b/drivers/mtd/parsers/ofpart_bcm4908.c
+@@ -35,12 +35,15 @@ static long long bcm4908_partitions_fw_offset(void)
+ err = kstrtoul(s + len + 1, 0, &offset);
+ if (err) {
+ pr_err("failed to parse %s\n", s + len + 1);
++ of_node_put(root);
+ return err;
+ }
+
++ of_node_put(root);
+ return offset << 10;
+ }
+
++ of_node_put(root);
+ return -ENOENT;
+ }
+
+diff --git a/drivers/mtd/parsers/qcomsmempart.c b/drivers/mtd/parsers/qcomsmempart.c
+index 06a818cd2433f..32ddfea701423 100644
+--- a/drivers/mtd/parsers/qcomsmempart.c
++++ b/drivers/mtd/parsers/qcomsmempart.c
+@@ -58,11 +58,11 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
+ const struct mtd_partition **pparts,
+ struct mtd_part_parser_data *data)
+ {
++ size_t len = SMEM_FLASH_PTABLE_HDR_LEN;
++ int ret, i, j, tmpparts, numparts = 0;
+ struct smem_flash_pentry *pentry;
+ struct smem_flash_ptable *ptable;
+- size_t len = SMEM_FLASH_PTABLE_HDR_LEN;
+ struct mtd_partition *parts;
+- int ret, i, numparts;
+ char *name, *c;
+
+ if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_4K_SECTORS)
+@@ -87,8 +87,8 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
+ }
+
+ /* Ensure that # of partitions is less than the max we have allocated */
+- numparts = le32_to_cpu(ptable->numparts);
+- if (numparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) {
++ tmpparts = le32_to_cpu(ptable->numparts);
++ if (tmpparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) {
+ pr_err("Partition numbers exceed the max limit\n");
+ return -EINVAL;
+ }
+@@ -116,11 +116,17 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
+ return PTR_ERR(ptable);
+ }
+
++ for (i = 0; i < tmpparts; i++) {
++ pentry = &ptable->pentry[i];
++ if (pentry->name[0] != '\0')
++ numparts++;
++ }
++
+ parts = kcalloc(numparts, sizeof(*parts), GFP_KERNEL);
+ if (!parts)
+ return -ENOMEM;
+
+- for (i = 0; i < numparts; i++) {
++ for (i = 0, j = 0; i < tmpparts; i++) {
+ pentry = &ptable->pentry[i];
+ if (pentry->name[0] == '\0')
+ continue;
+@@ -135,24 +141,25 @@ static int parse_qcomsmem_part(struct mtd_info *mtd,
+ for (c = name; *c != '\0'; c++)
+ *c = tolower(*c);
+
+- parts[i].name = name;
+- parts[i].offset = le32_to_cpu(pentry->offset) * mtd->erasesize;
+- parts[i].mask_flags = pentry->attr;
+- parts[i].size = le32_to_cpu(pentry->length) * mtd->erasesize;
++ parts[j].name = name;
++ parts[j].offset = le32_to_cpu(pentry->offset) * mtd->erasesize;
++ parts[j].mask_flags = pentry->attr;
++ parts[j].size = le32_to_cpu(pentry->length) * mtd->erasesize;
+ pr_debug("%d: %s offs=0x%08x size=0x%08x attr:0x%08x\n",
+ i, pentry->name, le32_to_cpu(pentry->offset),
+ le32_to_cpu(pentry->length), pentry->attr);
++ j++;
+ }
+
+ pr_debug("SMEM partition table found: ver: %d len: %d\n",
+- le32_to_cpu(ptable->version), numparts);
++ le32_to_cpu(ptable->version), tmpparts);
+ *pparts = parts;
+
+ return numparts;
+
+ out_free_parts:
+- while (--i >= 0)
+- kfree(parts[i].name);
++ while (--j >= 0)
++ kfree(parts[j].name);
+ kfree(parts);
+ *pparts = NULL;
+
+@@ -166,6 +173,8 @@ static void parse_qcomsmem_cleanup(const struct mtd_partition *pparts,
+
+ for (i = 0; i < nr_parts; i++)
+ kfree(pparts[i].name);
++
++ kfree(pparts);
+ }
+
+ static const struct of_device_id qcomsmem_of_match_table[] = {
+diff --git a/drivers/mtd/parsers/redboot.c b/drivers/mtd/parsers/redboot.c
+index feb44a573d447..a16b42a885816 100644
+--- a/drivers/mtd/parsers/redboot.c
++++ b/drivers/mtd/parsers/redboot.c
+@@ -58,6 +58,7 @@ static void parse_redboot_of(struct mtd_info *master)
+ return;
+
+ ret = of_property_read_u32(npart, "fis-index-block", &dirblock);
++ of_node_put(npart);
+ if (ret)
+ return;
+
+diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
+index 0cff2cda1b5a0..7f955fade8383 100644
+--- a/drivers/mtd/sm_ftl.c
++++ b/drivers/mtd/sm_ftl.c
+@@ -1111,9 +1111,9 @@ static void sm_release(struct mtd_blktrans_dev *dev)
+ {
+ struct sm_ftl *ftl = dev->priv;
+
+- mutex_lock(&ftl->mutex);
+ del_timer_sync(&ftl->timer);
+ cancel_work_sync(&ftl->flush_work);
++ mutex_lock(&ftl->mutex);
+ sm_cache_flush(ftl);
+ mutex_unlock(&ftl->mutex);
+ }
+diff --git a/drivers/mtd/spi-nor/controllers/hisi-sfc.c b/drivers/mtd/spi-nor/controllers/hisi-sfc.c
+index 47fbf1d1e5573..516e502694780 100644
+--- a/drivers/mtd/spi-nor/controllers/hisi-sfc.c
++++ b/drivers/mtd/spi-nor/controllers/hisi-sfc.c
+@@ -477,7 +477,6 @@ static int hisi_spi_nor_remove(struct platform_device *pdev)
+
+ hisi_spi_nor_unregister_all(host);
+ mutex_destroy(&host->lock);
+- clk_disable_unprepare(host->clk);
+ return 0;
+ }
+
+diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
+index 1bc53b8bb88a9..508f7ca098eff 100644
+--- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
++++ b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
+@@ -16,12 +16,30 @@
+ #define BCR 0xdc
+ #define BCR_WPD BIT(0)
+
++static bool intel_spi_pci_set_writeable(void __iomem *base, void *data)
++{
++ struct pci_dev *pdev = data;
++ u32 bcr;
++
++ /* Try to make the chip read/write */
++ pci_read_config_dword(pdev, BCR, &bcr);
++ if (!(bcr & BCR_WPD)) {
++ bcr |= BCR_WPD;
++ pci_write_config_dword(pdev, BCR, bcr);
++ pci_read_config_dword(pdev, BCR, &bcr);
++ }
++
++ return bcr & BCR_WPD;
++}
++
+ static const struct intel_spi_boardinfo bxt_info = {
+ .type = INTEL_SPI_BXT,
++ .set_writeable = intel_spi_pci_set_writeable,
+ };
+
+ static const struct intel_spi_boardinfo cnl_info = {
+ .type = INTEL_SPI_CNL,
++ .set_writeable = intel_spi_pci_set_writeable,
+ };
+
+ static int intel_spi_pci_probe(struct pci_dev *pdev,
+@@ -29,7 +47,6 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
+ {
+ struct intel_spi_boardinfo *info;
+ struct intel_spi *ispi;
+- u32 bcr;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+@@ -41,15 +58,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
+ if (!info)
+ return -ENOMEM;
+
+- /* Try to make the chip read/write */
+- pci_read_config_dword(pdev, BCR, &bcr);
+- if (!(bcr & BCR_WPD)) {
+- bcr |= BCR_WPD;
+- pci_write_config_dword(pdev, BCR, bcr);
+- pci_read_config_dword(pdev, BCR, &bcr);
+- }
+- info->writeable = !!(bcr & BCR_WPD);
+-
++ info->data = pdev;
+ ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info);
+ if (IS_ERR(ispi))
+ return PTR_ERR(ispi);
+diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/mtd/spi-nor/controllers/intel-spi.c
+index a413892ff449f..6cb818feaf7f0 100644
+--- a/drivers/mtd/spi-nor/controllers/intel-spi.c
++++ b/drivers/mtd/spi-nor/controllers/intel-spi.c
+@@ -52,17 +52,17 @@
+ #define FRACC 0x50
+
+ #define FREG(n) (0x54 + ((n) * 4))
+-#define FREG_BASE_MASK 0x3fff
++#define FREG_BASE_MASK GENMASK(14, 0)
+ #define FREG_LIMIT_SHIFT 16
+-#define FREG_LIMIT_MASK (0x03fff << FREG_LIMIT_SHIFT)
++#define FREG_LIMIT_MASK GENMASK(30, 16)
+
+ /* Offset is from @ispi->pregs */
+ #define PR(n) ((n) * 4)
+ #define PR_WPE BIT(31)
+ #define PR_LIMIT_SHIFT 16
+-#define PR_LIMIT_MASK (0x3fff << PR_LIMIT_SHIFT)
++#define PR_LIMIT_MASK GENMASK(30, 16)
+ #define PR_RPE BIT(15)
+-#define PR_BASE_MASK 0x3fff
++#define PR_BASE_MASK GENMASK(14, 0)
+
+ /* Offsets are from @ispi->sregs */
+ #define SSFSTS_CTL 0x00
+@@ -116,7 +116,7 @@
+ #define ERASE_OPCODE_SHIFT 8
+ #define ERASE_OPCODE_MASK (0xff << ERASE_OPCODE_SHIFT)
+ #define ERASE_64K_OPCODE_SHIFT 16
+-#define ERASE_64K_OPCODE_MASK (0xff << ERASE_OPCODE_SHIFT)
++#define ERASE_64K_OPCODE_MASK (0xff << ERASE_64K_OPCODE_SHIFT)
+
+ #define INTEL_SPI_TIMEOUT 5000 /* ms */
+ #define INTEL_SPI_FIFO_SZ 64
+@@ -131,7 +131,6 @@
+ * @sregs: Start of software sequencer registers
+ * @nregions: Maximum number of regions
+ * @pr_num: Maximum number of protected range registers
+- * @writeable: Is the chip writeable
+ * @locked: Is SPI setting locked
+ * @swseq_reg: Use SW sequencer in register reads/writes
+ * @swseq_erase: Use SW sequencer in erase operation
+@@ -149,7 +148,6 @@ struct intel_spi {
+ void __iomem *sregs;
+ size_t nregions;
+ size_t pr_num;
+- bool writeable;
+ bool locked;
+ bool swseq_reg;
+ bool swseq_erase;
+@@ -304,6 +302,14 @@ static int intel_spi_wait_sw_busy(struct intel_spi *ispi)
+ INTEL_SPI_TIMEOUT * 1000);
+ }
+
++static bool intel_spi_set_writeable(struct intel_spi *ispi)
++{
++ if (!ispi->info->set_writeable)
++ return false;
++
++ return ispi->info->set_writeable(ispi->base, ispi->info->data);
++}
++
+ static int intel_spi_init(struct intel_spi *ispi)
+ {
+ u32 opmenu0, opmenu1, lvscc, uvscc, val;
+@@ -316,19 +322,6 @@ static int intel_spi_init(struct intel_spi *ispi)
+ ispi->nregions = BYT_FREG_NUM;
+ ispi->pr_num = BYT_PR_NUM;
+ ispi->swseq_reg = true;
+-
+- if (writeable) {
+- /* Disable write protection */
+- val = readl(ispi->base + BYT_BCR);
+- if (!(val & BYT_BCR_WPD)) {
+- val |= BYT_BCR_WPD;
+- writel(val, ispi->base + BYT_BCR);
+- val = readl(ispi->base + BYT_BCR);
+- }
+-
+- ispi->writeable = !!(val & BYT_BCR_WPD);
+- }
+-
+ break;
+
+ case INTEL_SPI_LPT:
+@@ -358,6 +351,12 @@ static int intel_spi_init(struct intel_spi *ispi)
+ return -EINVAL;
+ }
+
++ /* Try to disable write protection if user asked to do so */
++ if (writeable && !intel_spi_set_writeable(ispi)) {
++ dev_warn(ispi->dev, "can't disable chip write protection\n");
++ writeable = false;
++ }
++
+ /* Disable #SMI generation from HW sequencer */
+ val = readl(ispi->base + HSFSTS_CTL);
+ val &= ~HSFSTS_CTL_FSMIE;
+@@ -884,9 +883,12 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
+ /*
+ * If any of the regions have protection bits set, make the
+ * whole partition read-only to be on the safe side.
++ *
++ * Also if the user did not ask the chip to be writeable
++ * mask the bit too.
+ */
+- if (intel_spi_is_protected(ispi, base, limit))
+- ispi->writeable = false;
++ if (!writeable || intel_spi_is_protected(ispi, base, limit))
++ part->mask_flags |= MTD_WRITEABLE;
+
+ end = (limit << 12) + 4096;
+ if (end > part->size)
+@@ -927,7 +929,6 @@ struct intel_spi *intel_spi_probe(struct device *dev,
+
+ ispi->dev = dev;
+ ispi->info = info;
+- ispi->writeable = info->writeable;
+
+ ret = intel_spi_init(ispi);
+ if (ret)
+@@ -945,10 +946,6 @@ struct intel_spi *intel_spi_probe(struct device *dev,
+
+ intel_spi_fill_partition(ispi, &part);
+
+- /* Prevent writes if not explicitly enabled */
+- if (!ispi->writeable || !writeable)
+- ispi->nor.mtd.flags &= ~MTD_WRITEABLE;
+-
+ ret = mtd_device_register(&ispi->nor.mtd, &part, 1);
+ if (ret)
+ return ERR_PTR(ret);
+diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
+index cc08bd707378f..e115aab7243e1 100644
+--- a/drivers/mtd/spi-nor/core.c
++++ b/drivers/mtd/spi-nor/core.c
+@@ -177,7 +177,7 @@ static int spi_nor_controller_ops_write_reg(struct spi_nor *nor, u8 opcode,
+
+ static int spi_nor_controller_ops_erase(struct spi_nor *nor, loff_t offs)
+ {
+- if (spi_nor_protocol_is_dtr(nor->write_proto))
++ if (spi_nor_protocol_is_dtr(nor->reg_proto))
+ return -EOPNOTSUPP;
+
+ return nor->controller_ops->erase(nor, offs);
+@@ -980,21 +980,22 @@ static int spi_nor_write_16bit_sr_and_check(struct spi_nor *nor, u8 sr1)
+ ret = spi_nor_read_cr(nor, &sr_cr[1]);
+ if (ret)
+ return ret;
+- } else if (nor->params->quad_enable) {
++ } else if (spi_nor_get_protocol_width(nor->read_proto) == 4 &&
++ spi_nor_get_protocol_width(nor->write_proto) == 4 &&
++ nor->params->quad_enable) {
+ /*
+ * If the Status Register 2 Read command (35h) is not
+ * supported, we should at least be sure we don't
+ * change the value of the SR2 Quad Enable bit.
+ *
+- * We can safely assume that when the Quad Enable method is
+- * set, the value of the QE bit is one, as a consequence of the
+- * nor->params->quad_enable() call.
++ * When the Quad Enable method is set and the buswidth is 4, we
++ * can safely assume that the value of the QE bit is one, as a
++ * consequence of the nor->params->quad_enable() call.
+ *
+- * We can safely assume that the Quad Enable bit is present in
+- * the Status Register 2 at BIT(1). According to the JESD216
+- * revB standard, BFPT DWORDS[15], bits 22:20, the 16-bit
+- * Write Status (01h) command is available just for the cases
+- * in which the QE bit is described in SR2 at BIT(1).
++ * According to the JESD216 revB standard, BFPT DWORDS[15],
++ * bits 22:20, the 16-bit Write Status (01h) command is
++ * available just for the cases in which the QE bit is
++ * described in SR2 at BIT(1).
+ */
+ sr_cr[1] = SR2_QUAD_EN_BIT1;
+ } else {
+@@ -1007,6 +1008,15 @@ static int spi_nor_write_16bit_sr_and_check(struct spi_nor *nor, u8 sr1)
+ if (ret)
+ return ret;
+
++ ret = spi_nor_read_sr(nor, sr_cr);
++ if (ret)
++ return ret;
++
++ if (sr1 != sr_cr[0]) {
++ dev_dbg(nor->dev, "SR: Read back test failed\n");
++ return -EIO;
++ }
++
+ if (nor->flags & SNOR_F_NO_READ_CR)
+ return 0;
+
+@@ -1186,7 +1196,7 @@ static int spi_nor_erase_chip(struct spi_nor *nor)
+ SPI_MEM_OP_NO_DUMMY,
+ SPI_MEM_OP_NO_DATA);
+
+- spi_nor_spimem_setup_op(nor, &op, nor->write_proto);
++ spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
+
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+@@ -1331,7 +1341,7 @@ int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
+ SPI_MEM_OP_NO_DUMMY,
+ SPI_MEM_OP_NO_DATA);
+
+- spi_nor_spimem_setup_op(nor, &op, nor->write_proto);
++ spi_nor_spimem_setup_op(nor, &op, nor->reg_proto);
+
+ return spi_mem_exec_op(nor->spimem, &op);
+ } else if (nor->controller_ops->erase) {
+@@ -1400,6 +1410,8 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map,
+ continue;
+
+ erase = &map->erase_type[i];
++ if (!erase->size)
++ continue;
+
+ /* Alignment is not mandatory for overlaid regions */
+ if (region->offset & SNOR_OVERLAID_REGION &&
+@@ -2146,7 +2158,8 @@ static int spi_nor_spimem_check_readop(struct spi_nor *nor,
+ spi_nor_spimem_setup_op(nor, &op, read->proto);
+
+ /* convert the dummy cycles to the number of bytes */
+- op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
++ op.dummy.nbytes = (read->num_mode_clocks + read->num_wait_states) *
++ op.dummy.buswidth / 8;
+ if (spi_nor_protocol_is_dtr(nor->read_proto))
+ op.dummy.nbytes *= 2;
+
+@@ -2234,6 +2247,15 @@ void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
+ erase->size_mask = (1 << erase->size_shift) - 1;
+ }
+
++/**
++ * spi_nor_mask_erase_type() - mask out a SPI NOR erase type
++ * @erase: pointer to a structure that describes a SPI NOR erase type
++ */
++void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase)
++{
++ erase->size = 0;
++}
++
+ /**
+ * spi_nor_init_uniform_erase_map() - Initialize uniform erase map
+ * @map: the erase map of the SPI NOR
+@@ -3139,7 +3161,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
+ mtd->writesize = nor->params->writesize;
+ mtd->flags = MTD_CAP_NORFLASH;
+ mtd->size = nor->params->size;
+- mtd->_erase = spi_nor_erase;
+ mtd->_read = spi_nor_read;
+ mtd->_suspend = spi_nor_suspend;
+ mtd->_resume = spi_nor_resume;
+@@ -3169,6 +3190,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
+
+ if (info->flags & SPI_NOR_NO_ERASE)
+ mtd->flags |= MTD_NO_ERASE;
++ else
++ mtd->_erase = spi_nor_erase;
+
+ mtd->dev.parent = dev;
+ nor->page_size = nor->params->page_size;
+diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
+index 3348e1dd14452..7eb2090b2fdb7 100644
+--- a/drivers/mtd/spi-nor/core.h
++++ b/drivers/mtd/spi-nor/core.h
+@@ -538,6 +538,7 @@ void spi_nor_set_pp_settings(struct spi_nor_pp_command *pp, u8 opcode,
+
+ void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
+ u8 opcode);
++void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase);
+ struct spi_nor_erase_region *
+ spi_nor_region_next(struct spi_nor_erase_region *region);
+ void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map,
+diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c
+index c500c2118a5db..c787fdacf0a11 100644
+--- a/drivers/mtd/spi-nor/sfdp.c
++++ b/drivers/mtd/spi-nor/sfdp.c
+@@ -874,7 +874,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
+ */
+ for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
+ if (!(regions_erase_type & BIT(erase[i].idx)))
+- spi_nor_set_erase_type(&erase[i], 0, 0xFF);
++ spi_nor_mask_erase_type(&erase[i]);
+
+ return 0;
+ }
+@@ -1088,7 +1088,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor,
+ erase_type[i].opcode = (dwords[1] >>
+ erase_type[i].idx * 8) & 0xFF;
+ else
+- spi_nor_set_erase_type(&erase_type[i], 0u, 0xFF);
++ spi_nor_mask_erase_type(&erase_type[i]);
+ }
+
+ /*
+@@ -1220,7 +1220,7 @@ static int spi_nor_parse_sccr(struct spi_nor *nor,
+
+ le32_to_cpu_array(dwords, sccr_header->length);
+
+- if (FIELD_GET(SCCR_DWORD22_OCTAL_DTR_EN_VOLATILE, dwords[22]))
++ if (FIELD_GET(SCCR_DWORD22_OCTAL_DTR_EN_VOLATILE, dwords[21]))
+ nor->flags |= SNOR_F_IO_MODE_EN_VOLATILE;
+
+ out:
+diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c
+index ee82dcd75310c..f3684b3f40895 100644
+--- a/drivers/mtd/spi-nor/spansion.c
++++ b/drivers/mtd/spi-nor/spansion.c
+@@ -15,8 +15,13 @@
+ #define SPINOR_REG_CYPRESS_CFR3V 0x00800004
+ #define SPINOR_REG_CYPRESS_CFR3V_PGSZ BIT(4) /* Page size. */
+ #define SPINOR_REG_CYPRESS_CFR5V 0x00800006
+-#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN 0x3
+-#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS 0
++#define SPINOR_REG_CYPRESS_CFR5_BIT6 BIT(6)
++#define SPINOR_REG_CYPRESS_CFR5_DDR BIT(1)
++#define SPINOR_REG_CYPRESS_CFR5_OPI BIT(0)
++#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN \
++ (SPINOR_REG_CYPRESS_CFR5_BIT6 | SPINOR_REG_CYPRESS_CFR5_DDR | \
++ SPINOR_REG_CYPRESS_CFR5_OPI)
++#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS SPINOR_REG_CYPRESS_CFR5_BIT6
+ #define SPINOR_OP_CYPRESS_RD_FAST 0xee
+
+ /**
+diff --git a/drivers/mtd/spi-nor/sysfs.c b/drivers/mtd/spi-nor/sysfs.c
+index 9aec9d8a98ada..4c3b351aef245 100644
+--- a/drivers/mtd/spi-nor/sysfs.c
++++ b/drivers/mtd/spi-nor/sysfs.c
+@@ -67,6 +67,19 @@ static struct bin_attribute *spi_nor_sysfs_bin_entries[] = {
+ NULL
+ };
+
++static umode_t spi_nor_sysfs_is_visible(struct kobject *kobj,
++ struct attribute *attr, int n)
++{
++ struct spi_device *spi = to_spi_device(kobj_to_dev(kobj));
++ struct spi_mem *spimem = spi_get_drvdata(spi);
++ struct spi_nor *nor = spi_mem_get_drvdata(spimem);
++
++ if (attr == &dev_attr_jedec_id.attr && !nor->info->id_len)
++ return 0;
++
++ return 0444;
++}
++
+ static umode_t spi_nor_sysfs_is_bin_visible(struct kobject *kobj,
+ struct bin_attribute *attr, int n)
+ {
+@@ -82,6 +95,7 @@ static umode_t spi_nor_sysfs_is_bin_visible(struct kobject *kobj,
+
+ static const struct attribute_group spi_nor_sysfs_group = {
+ .name = "spi-nor",
++ .is_visible = spi_nor_sysfs_is_visible,
+ .is_bin_visible = spi_nor_sysfs_is_bin_visible,
+ .attrs = spi_nor_sysfs_entries,
+ .bin_attrs = spi_nor_sysfs_bin_entries,
+diff --git a/drivers/mtd/spi-nor/xilinx.c b/drivers/mtd/spi-nor/xilinx.c
+index 1138bdbf41998..75dd13a390404 100644
+--- a/drivers/mtd/spi-nor/xilinx.c
++++ b/drivers/mtd/spi-nor/xilinx.c
+@@ -66,7 +66,8 @@ static int xilinx_nor_setup(struct spi_nor *nor,
+ /* Flash in Power of 2 mode */
+ nor->page_size = (nor->page_size == 264) ? 256 : 512;
+ nor->mtd.writebufsize = nor->page_size;
+- nor->mtd.size = 8 * nor->page_size * nor->info->n_sectors;
++ nor->params->size = 8 * nor->page_size * nor->info->n_sectors;
++ nor->mtd.size = nor->params->size;
+ nor->mtd.erasesize = 8 * nor->page_size;
+ } else {
+ /* Flash in Default addressing mode */
+diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
+index a7e3eb9befb62..762dc14aef742 100644
+--- a/drivers/mtd/ubi/build.c
++++ b/drivers/mtd/ubi/build.c
+@@ -351,9 +351,6 @@ static ssize_t dev_attribute_show(struct device *dev,
+ * we still can use 'ubi->ubi_num'.
+ */
+ ubi = container_of(dev, struct ubi_device, dev);
+- ubi = ubi_get_device(ubi->ubi_num);
+- if (!ubi)
+- return -ENODEV;
+
+ if (attr == &dev_eraseblock_size)
+ ret = sprintf(buf, "%d\n", ubi->leb_size);
+@@ -382,7 +379,6 @@ static ssize_t dev_attribute_show(struct device *dev,
+ else
+ ret = -EINVAL;
+
+- ubi_put_device(ubi);
+ return ret;
+ }
+
+@@ -472,6 +468,7 @@ static int uif_init(struct ubi_device *ubi)
+ err = ubi_add_volume(ubi, ubi->volumes[i]);
+ if (err) {
+ ubi_err(ubi, "cannot add volume %d", i);
++ ubi->volumes[i] = NULL;
+ goto out_volumes;
+ }
+ }
+@@ -684,6 +681,21 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
+ ubi->vid_hdr_aloffset;
+ }
+
++ /*
++ * Memory allocation for VID header is ubi->vid_hdr_alsize
++ * which is described in comments in io.c.
++ * Make sure VID header shift + UBI_VID_HDR_SIZE not exceeds
++ * ubi->vid_hdr_alsize, so that all vid header operations
++ * won't access memory out of bounds.
++ */
++ if ((ubi->vid_hdr_shift + UBI_VID_HDR_SIZE) > ubi->vid_hdr_alsize) {
++ ubi_err(ubi, "Invalid VID header offset %d, VID header shift(%d)"
++ " + VID header size(%zu) > VID header aligned size(%d).",
++ ubi->vid_hdr_offset, ubi->vid_hdr_shift,
++ UBI_VID_HDR_SIZE, ubi->vid_hdr_alsize);
++ return -EINVAL;
++ }
++
+ /* Similar for the data offset */
+ ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE;
+ ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
+@@ -979,9 +991,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+ goto out_detach;
+ }
+
+- /* Make device "available" before it becomes accessible via sysfs */
+- ubi_devices[ubi_num] = ubi;
+-
+ err = uif_init(ubi);
+ if (err)
+ goto out_detach;
+@@ -1026,6 +1035,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+ wake_up_process(ubi->bgt_thread);
+ spin_unlock(&ubi->wl_lock);
+
++ ubi_devices[ubi_num] = ubi;
+ ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL);
+ return ubi_num;
+
+@@ -1034,7 +1044,6 @@ out_debugfs:
+ out_uif:
+ uif_close(ubi);
+ out_detach:
+- ubi_devices[ubi_num] = NULL;
+ ubi_wl_close(ubi);
+ ubi_free_all_volumes(ubi);
+ vfree(ubi->vtbl);
+diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
+index ccc5979642b78..4d05b8d320830 100644
+--- a/drivers/mtd/ubi/eba.c
++++ b/drivers/mtd/ubi/eba.c
+@@ -946,7 +946,7 @@ static int try_write_vid_and_data(struct ubi_volume *vol, int lnum,
+ int offset, int len)
+ {
+ struct ubi_device *ubi = vol->ubi;
+- int pnum, opnum, err, vol_id = vol->vol_id;
++ int pnum, opnum, err, err2, vol_id = vol->vol_id;
+
+ pnum = ubi_wl_get_peb(ubi);
+ if (pnum < 0) {
+@@ -981,10 +981,19 @@ static int try_write_vid_and_data(struct ubi_volume *vol, int lnum,
+ out_put:
+ up_read(&ubi->fm_eba_sem);
+
+- if (err && pnum >= 0)
+- err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1);
+- else if (!err && opnum >= 0)
+- err = ubi_wl_put_peb(ubi, vol_id, lnum, opnum, 0);
++ if (err && pnum >= 0) {
++ err2 = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1);
++ if (err2) {
++ ubi_warn(ubi, "failed to return physical eraseblock %d, error %d",
++ pnum, err2);
++ }
++ } else if (!err && opnum >= 0) {
++ err2 = ubi_wl_put_peb(ubi, vol_id, lnum, opnum, 0);
++ if (err2) {
++ ubi_warn(ubi, "failed to return physical eraseblock %d, error %d",
++ opnum, err2);
++ }
++ }
+
+ return err;
+ }
+diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c
+index 28f55f9cf7153..69592be33adfc 100644
+--- a/drivers/mtd/ubi/fastmap-wl.c
++++ b/drivers/mtd/ubi/fastmap-wl.c
+@@ -97,6 +97,33 @@ out:
+ return e;
+ }
+
++/*
++ * has_enough_free_count - whether ubi has enough free pebs to fill fm pools
++ * @ubi: UBI device description object
++ * @is_wl_pool: whether UBI is filling wear leveling pool
++ *
++ * This helper function checks whether there are enough free pebs (deducted
++ * by fastmap pebs) to fill fm_pool and fm_wl_pool, above rule works after
++ * there is at least one of free pebs is filled into fm_wl_pool.
++ * For wear leveling pool, UBI should also reserve free pebs for bad pebs
++ * handling, because there maybe no enough free pebs for user volumes after
++ * producing new bad pebs.
++ */
++static bool has_enough_free_count(struct ubi_device *ubi, bool is_wl_pool)
++{
++ int fm_used = 0; // fastmap non anchor pebs.
++ int beb_rsvd_pebs;
++
++ if (!ubi->free.rb_node)
++ return false;
++
++ beb_rsvd_pebs = is_wl_pool ? ubi->beb_rsvd_pebs : 0;
++ if (ubi->fm_wl_pool.size > 0 && !(ubi->ro_mode || ubi->fm_disabled))
++ fm_used = ubi->fm_size / ubi->leb_size - 1;
++
++ return ubi->free_count - beb_rsvd_pebs > fm_used;
++}
++
+ /**
+ * ubi_refill_pools - refills all fastmap PEB pools.
+ * @ubi: UBI device description object
+@@ -119,22 +146,20 @@ void ubi_refill_pools(struct ubi_device *ubi)
+ if (ubi->fm_anchor) {
+ wl_tree_add(ubi->fm_anchor, &ubi->free);
+ ubi->free_count++;
+- }
+- if (ubi->fm_next_anchor) {
+- wl_tree_add(ubi->fm_next_anchor, &ubi->free);
+- ubi->free_count++;
++ ubi->fm_anchor = NULL;
+ }
+
+- /* All available PEBs are in ubi->free, now is the time to get
+- * the best anchor PEBs.
+- */
+- ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
+- ubi->fm_next_anchor = ubi_wl_get_fm_peb(ubi, 1);
++ if (!ubi->fm_disabled)
++ /*
++ * All available PEBs are in ubi->free, now is the time to get
++ * the best anchor PEBs.
++ */
++ ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
+
+ for (;;) {
+ enough = 0;
+ if (pool->size < pool->max_size) {
+- if (!ubi->free.rb_node)
++ if (!has_enough_free_count(ubi, false))
+ break;
+
+ e = wl_get_wle(ubi);
+@@ -147,8 +172,7 @@ void ubi_refill_pools(struct ubi_device *ubi)
+ enough++;
+
+ if (wl_pool->size < wl_pool->max_size) {
+- if (!ubi->free.rb_node ||
+- (ubi->free_count - ubi->beb_rsvd_pebs < 5))
++ if (!has_enough_free_count(ubi, true))
+ break;
+
+ e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
+@@ -286,20 +310,26 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
+ int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
+ {
+ struct ubi_work *wrk;
++ struct ubi_wl_entry *anchor;
+
+ spin_lock(&ubi->wl_lock);
+
+- /* Do we have a next anchor? */
+- if (!ubi->fm_next_anchor) {
+- ubi->fm_next_anchor = ubi_wl_get_fm_peb(ubi, 1);
+- if (!ubi->fm_next_anchor)
+- /* Tell wear leveling to produce a new anchor PEB */
+- ubi->fm_do_produce_anchor = 1;
++ /* Do we already have an anchor? */
++ if (ubi->fm_anchor) {
++ spin_unlock(&ubi->wl_lock);
++ return 0;
+ }
+
+- /* Do wear leveling to get a new anchor PEB or check the
+- * existing next anchor candidate.
+- */
++ /* See if we can find an anchor PEB on the list of free PEBs */
++ anchor = ubi_wl_get_fm_peb(ubi, 1);
++ if (anchor) {
++ ubi->fm_anchor = anchor;
++ spin_unlock(&ubi->wl_lock);
++ return 0;
++ }
++
++ ubi->fm_do_produce_anchor = 1;
++ /* No luck, trigger wear leveling to produce a new anchor PEB. */
+ if (ubi->wl_scheduled) {
+ spin_unlock(&ubi->wl_lock);
+ return 0;
+@@ -381,11 +411,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi)
+ ubi->fm_anchor = NULL;
+ }
+
+- if (ubi->fm_next_anchor) {
+- return_unused_peb(ubi, ubi->fm_next_anchor);
+- ubi->fm_next_anchor = NULL;
+- }
+-
+ if (ubi->fm) {
+ for (i = 0; i < ubi->fm->used_blocks; i++)
+ kfree(ubi->fm->e[i]);
+diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
+index 022af59906aa9..6e95c4b1473e6 100644
+--- a/drivers/mtd/ubi/fastmap.c
++++ b/drivers/mtd/ubi/fastmap.c
+@@ -468,7 +468,9 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai,
+ if (err == UBI_IO_FF_BITFLIPS)
+ scrub = 1;
+
+- add_aeb(ai, free, pnum, ec, scrub);
++ ret = add_aeb(ai, free, pnum, ec, scrub);
++ if (ret)
++ goto out;
+ continue;
+ } else if (err == 0 || err == UBI_IO_BITFLIPS) {
+ dbg_bld("Found non empty PEB:%i in pool", pnum);
+@@ -638,8 +640,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
+ if (fm_pos >= fm_size)
+ goto fail_bad;
+
+- add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum),
+- be32_to_cpu(fmec->ec), 0);
++ ret = add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum),
++ be32_to_cpu(fmec->ec), 0);
++ if (ret)
++ goto fail;
+ }
+
+ /* read EC values from used list */
+@@ -649,8 +653,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
+ if (fm_pos >= fm_size)
+ goto fail_bad;
+
+- add_aeb(ai, &used, be32_to_cpu(fmec->pnum),
+- be32_to_cpu(fmec->ec), 0);
++ ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum),
++ be32_to_cpu(fmec->ec), 0);
++ if (ret)
++ goto fail;
+ }
+
+ /* read EC values from scrub list */
+@@ -660,8 +666,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
+ if (fm_pos >= fm_size)
+ goto fail_bad;
+
+- add_aeb(ai, &used, be32_to_cpu(fmec->pnum),
+- be32_to_cpu(fmec->ec), 1);
++ ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum),
++ be32_to_cpu(fmec->ec), 1);
++ if (ret)
++ goto fail;
+ }
+
+ /* read EC values from erase list */
+@@ -671,8 +679,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
+ if (fm_pos >= fm_size)
+ goto fail_bad;
+
+- add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum),
+- be32_to_cpu(fmec->ec), 1);
++ ret = add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum),
++ be32_to_cpu(fmec->ec), 1);
++ if (ret)
++ goto fail;
+ }
+
+ ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count);
+@@ -1220,17 +1230,6 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
+ fm_pos += sizeof(*fec);
+ ubi_assert(fm_pos <= ubi->fm_size);
+ }
+- if (ubi->fm_next_anchor) {
+- fec = (struct ubi_fm_ec *)(fm_raw + fm_pos);
+-
+- fec->pnum = cpu_to_be32(ubi->fm_next_anchor->pnum);
+- set_seen(ubi, ubi->fm_next_anchor->pnum, seen_pebs);
+- fec->ec = cpu_to_be32(ubi->fm_next_anchor->ec);
+-
+- free_peb_count++;
+- fm_pos += sizeof(*fec);
+- ubi_assert(fm_pos <= ubi->fm_size);
+- }
+ fmh->free_peb_count = cpu_to_be32(free_peb_count);
+
+ ubi_for_each_used_peb(ubi, wl_e, tmp_rb) {
+diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
+index 7c083ad58274a..078112e23dfd5 100644
+--- a/drivers/mtd/ubi/ubi.h
++++ b/drivers/mtd/ubi/ubi.h
+@@ -489,8 +489,7 @@ struct ubi_debug_info {
+ * @fm_work: fastmap work queue
+ * @fm_work_scheduled: non-zero if fastmap work was scheduled
+ * @fast_attach: non-zero if UBI was attached by fastmap
+- * @fm_anchor: The new anchor PEB used during fastmap update
+- * @fm_next_anchor: An anchor PEB candidate for the next time fastmap is updated
++ * @fm_anchor: The next anchor PEB to use for fastmap
+ * @fm_do_produce_anchor: If true produce an anchor PEB in wl
+ *
+ * @used: RB-tree of used physical eraseblocks
+@@ -601,7 +600,6 @@ struct ubi_device {
+ int fm_work_scheduled;
+ int fast_attach;
+ struct ubi_wl_entry *fm_anchor;
+- struct ubi_wl_entry *fm_next_anchor;
+ int fm_do_produce_anchor;
+
+ /* Wear-leveling sub-system's stuff */
+diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
+index 139ee132bfbcf..d79323e8ea29d 100644
+--- a/drivers/mtd/ubi/vmt.c
++++ b/drivers/mtd/ubi/vmt.c
+@@ -56,16 +56,11 @@ static ssize_t vol_attribute_show(struct device *dev,
+ {
+ int ret;
+ struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
+- struct ubi_device *ubi;
+-
+- ubi = ubi_get_device(vol->ubi->ubi_num);
+- if (!ubi)
+- return -ENODEV;
++ struct ubi_device *ubi = vol->ubi;
+
+ spin_lock(&ubi->volumes_lock);
+ if (!ubi->volumes[vol->vol_id]) {
+ spin_unlock(&ubi->volumes_lock);
+- ubi_put_device(ubi);
+ return -ENODEV;
+ }
+ /* Take a reference to prevent volume removal */
+@@ -103,7 +98,6 @@ static ssize_t vol_attribute_show(struct device *dev,
+ vol->ref_count -= 1;
+ ubi_assert(vol->ref_count >= 0);
+ spin_unlock(&ubi->volumes_lock);
+- ubi_put_device(ubi);
+ return ret;
+ }
+
+@@ -315,7 +309,6 @@ out_mapping:
+ ubi->volumes[vol_id] = NULL;
+ ubi->vol_count -= 1;
+ spin_unlock(&ubi->volumes_lock);
+- ubi_eba_destroy_table(eba_tbl);
+ out_acc:
+ spin_lock(&ubi->volumes_lock);
+ ubi->rsvd_pebs -= vol->reserved_pebs;
+@@ -471,7 +464,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
+ for (i = 0; i < -pebs; i++) {
+ err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i);
+ if (err)
+- goto out_acc;
++ goto out_free;
+ }
+ spin_lock(&ubi->volumes_lock);
+ ubi->rsvd_pebs += pebs;
+@@ -519,8 +512,10 @@ out_acc:
+ ubi->avail_pebs += pebs;
+ spin_unlock(&ubi->volumes_lock);
+ }
++ return err;
++
+ out_free:
+- kfree(new_eba_tbl);
++ ubi_eba_destroy_table(new_eba_tbl);
+ return err;
+ }
+
+@@ -587,6 +582,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
+ if (err) {
+ ubi_err(ubi, "cannot add character device for volume %d, error %d",
+ vol_id, err);
++ vol_release(&vol->dev);
+ return err;
+ }
+
+@@ -597,15 +593,14 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
+ vol->dev.groups = volume_dev_groups;
+ dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id);
+ err = device_register(&vol->dev);
+- if (err)
+- goto out_cdev;
++ if (err) {
++ cdev_del(&vol->cdev);
++ put_device(&vol->dev);
++ return err;
++ }
+
+ self_check_volumes(ubi);
+ return err;
+-
+-out_cdev:
+- cdev_del(&vol->cdev);
+- return err;
+ }
+
+ /**
+diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
+index 8455f1d47f3c9..4427018ad4d9b 100644
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -575,7 +575,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+ * @vol_id: the volume ID that last used this PEB
+ * @lnum: the last used logical eraseblock number for the PEB
+ * @torture: if the physical eraseblock has to be tortured
+- * @nested: denotes whether the work_sem is already held in read mode
++ * @nested: denotes whether the work_sem is already held
+ *
+ * This function returns zero in case of success and a %-ENOMEM in case of
+ * failure.
+@@ -689,16 +689,16 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
+
+ #ifdef CONFIG_MTD_UBI_FASTMAP
+ e1 = find_anchor_wl_entry(&ubi->used);
+- if (e1 && ubi->fm_next_anchor &&
+- (ubi->fm_next_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) {
++ if (e1 && ubi->fm_anchor &&
++ (ubi->fm_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) {
+ ubi->fm_do_produce_anchor = 1;
+- /* fm_next_anchor is no longer considered a good anchor
+- * candidate.
++ /*
++ * fm_anchor is no longer considered a good anchor.
+ * NULL assignment also prevents multiple wear level checks
+ * of this PEB.
+ */
+- wl_tree_add(ubi->fm_next_anchor, &ubi->free);
+- ubi->fm_next_anchor = NULL;
++ wl_tree_add(ubi->fm_anchor, &ubi->free);
++ ubi->fm_anchor = NULL;
+ ubi->free_count++;
+ }
+
+@@ -886,8 +886,11 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
+
+ err = do_sync_erase(ubi, e1, vol_id, lnum, 0);
+ if (err) {
+- if (e2)
++ if (e2) {
++ spin_lock(&ubi->wl_lock);
+ wl_entry_destroy(ubi, e2);
++ spin_unlock(&ubi->wl_lock);
++ }
+ goto out_ro;
+ }
+
+@@ -969,11 +972,11 @@ out_error:
+ spin_lock(&ubi->wl_lock);
+ ubi->move_from = ubi->move_to = NULL;
+ ubi->move_to_put = ubi->wl_scheduled = 0;
++ wl_entry_destroy(ubi, e1);
++ wl_entry_destroy(ubi, e2);
+ spin_unlock(&ubi->wl_lock);
+
+ ubi_free_vid_buf(vidb);
+- wl_entry_destroy(ubi, e1);
+- wl_entry_destroy(ubi, e2);
+
+ out_ro:
+ ubi_ro_mode(ubi);
+@@ -1085,12 +1088,13 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
+ if (!err) {
+ spin_lock(&ubi->wl_lock);
+
+- if (!ubi->fm_disabled && !ubi->fm_next_anchor &&
++ if (!ubi->fm_disabled && !ubi->fm_anchor &&
+ e->pnum < UBI_FM_MAX_START) {
+- /* Abort anchor production, if needed it will be
++ /*
++ * Abort anchor production, if needed it will be
+ * enabled again in the wear leveling started below.
+ */
+- ubi->fm_next_anchor = e;
++ ubi->fm_anchor = e;
+ ubi->fm_do_produce_anchor = 0;
+ } else {
+ wl_tree_add(e, &ubi->free);
+@@ -1117,16 +1121,20 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
+ int err1;
+
+ /* Re-schedule the LEB for erasure */
+- err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
++ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, true);
+ if (err1) {
++ spin_lock(&ubi->wl_lock);
+ wl_entry_destroy(ubi, e);
++ spin_unlock(&ubi->wl_lock);
+ err = err1;
+ goto out_ro;
+ }
+ return err;
+ }
+
++ spin_lock(&ubi->wl_lock);
+ wl_entry_destroy(ubi, e);
++ spin_unlock(&ubi->wl_lock);
+ if (err != -EIO)
+ /*
+ * If this is not %-EIO, we have no idea what to do. Scheduling
+@@ -1242,6 +1250,18 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
+ retry:
+ spin_lock(&ubi->wl_lock);
+ e = ubi->lookuptbl[pnum];
++ if (!e) {
++ /*
++ * This wl entry has been removed for some errors by other
++ * process (eg. wear leveling worker), corresponding process
++ * (except __erase_worker, which cannot concurrent with
++ * ubi_wl_put_peb) will set ubi ro_mode at the same time,
++ * just ignore this wl entry.
++ */
++ spin_unlock(&ubi->wl_lock);
++ up_read(&ubi->fm_protect);
++ return 0;
++ }
+ if (e == ubi->move_from) {
+ /*
+ * User is putting the physical eraseblock which was selected to
+diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
+index f37b1c56f7c43..44a7d36446c52 100644
+--- a/drivers/net/Kconfig
++++ b/drivers/net/Kconfig
+@@ -81,7 +81,6 @@ config WIREGUARD
+ select CRYPTO
+ select CRYPTO_LIB_CURVE25519
+ select CRYPTO_LIB_CHACHA20POLY1305
+- select CRYPTO_LIB_BLAKE2S
+ select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT
+ select CRYPTO_POLY1305_X86_64 if X86 && 64BIT
+ select CRYPTO_BLAKE2S_X86 if X86 && 64BIT
+@@ -150,7 +149,7 @@ config NET_FC
+
+ config IFB
+ tristate "Intermediate Functional Block support"
+- depends on NET_CLS_ACT
++ depends on NET_ACT_MIRRED || NFT_FWD_NETDEV
+ select NET_REDIRECT
+ help
+ This is an intermediate driver that allows sharing of
+diff --git a/drivers/net/Makefile b/drivers/net/Makefile
+index 739838623cf65..50e60852f1286 100644
+--- a/drivers/net/Makefile
++++ b/drivers/net/Makefile
+@@ -30,7 +30,7 @@ obj-$(CONFIG_TUN) += tun.o
+ obj-$(CONFIG_TAP) += tap.o
+ obj-$(CONFIG_VETH) += veth.o
+ obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
+-obj-$(CONFIG_VXLAN) += vxlan.o
++obj-$(CONFIG_VXLAN) += vxlan/
+ obj-$(CONFIG_GENEVE) += geneve.o
+ obj-$(CONFIG_BAREUDP) += bareudp.o
+ obj-$(CONFIG_GTP) += gtp.o
+diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
+index 1bad1866ae462..a48220f91a2df 100644
+--- a/drivers/net/arcnet/arcnet.c
++++ b/drivers/net/arcnet/arcnet.c
+@@ -468,7 +468,7 @@ static void arcnet_reply_tasklet(struct tasklet_struct *t)
+
+ ret = sock_queue_err_skb(sk, ackskb);
+ if (ret)
+- kfree_skb(ackskb);
++ dev_kfree_skb_irq(ackskb);
+
+ local_irq_enable();
+ };
+diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
+index 3c8f665c15580..28dccbc0e8d8f 100644
+--- a/drivers/net/arcnet/com20020-pci.c
++++ b/drivers/net/arcnet/com20020-pci.c
+@@ -138,6 +138,9 @@ static int com20020pci_probe(struct pci_dev *pdev,
+ return -ENOMEM;
+
+ ci = (struct com20020_pci_card_info *)id->driver_data;
++ if (!ci)
++ return -EINVAL;
++
+ priv->ci = ci;
+ mm = &ci->misc_map;
+
+diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
+index b88a109b3b150..26ee263d8f3aa 100644
+--- a/drivers/net/arcnet/com20020_cs.c
++++ b/drivers/net/arcnet/com20020_cs.c
+@@ -113,6 +113,7 @@ static int com20020_probe(struct pcmcia_device *p_dev)
+ struct com20020_dev *info;
+ struct net_device *dev;
+ struct arcnet_local *lp;
++ int ret = -ENOMEM;
+
+ dev_dbg(&p_dev->dev, "com20020_attach()\n");
+
+@@ -142,12 +143,18 @@ static int com20020_probe(struct pcmcia_device *p_dev)
+ info->dev = dev;
+ p_dev->priv = info;
+
+- return com20020_config(p_dev);
++ ret = com20020_config(p_dev);
++ if (ret)
++ goto fail_config;
++
++ return 0;
+
++fail_config:
++ free_arcdev(dev);
+ fail_alloc_dev:
+ kfree(info);
+ fail_alloc_info:
+- return -ENOMEM;
++ return ret;
+ } /* com20020_attach */
+
+ static void com20020_detach(struct pcmcia_device *link)
+diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
+index 54e321a695ce9..98c915943f323 100644
+--- a/drivers/net/bareudp.c
++++ b/drivers/net/bareudp.c
+@@ -141,14 +141,14 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
+- if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET)
++ if (!ipv6_mod_enabled() || family == AF_INET)
+ err = IP_ECN_decapsulate(oiph, skb);
+ else
+ err = IP6_ECN_decapsulate(oiph, skb);
+
+ if (unlikely(err)) {
+ if (log_ecn_error) {
+- if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET)
++ if (!ipv6_mod_enabled() || family == AF_INET)
+ net_info_ratelimited("non-ECT from %pI4 "
+ "with TOS=%#x\n",
+ &((struct iphdr *)oiph)->saddr,
+@@ -214,11 +214,12 @@ static struct socket *bareudp_create_sock(struct net *net, __be16 port)
+ int err;
+
+ memset(&udp_conf, 0, sizeof(udp_conf));
+-#if IS_ENABLED(CONFIG_IPV6)
+- udp_conf.family = AF_INET6;
+-#else
+- udp_conf.family = AF_INET;
+-#endif
++
++ if (ipv6_mod_enabled())
++ udp_conf.family = AF_INET6;
++ else
++ udp_conf.family = AF_INET;
++
+ udp_conf.local_udp_port = port;
+ /* Open UDP socket */
+ err = udp_sock_create(net, &udp_conf, &sock);
+@@ -441,7 +442,7 @@ static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev)
+ }
+
+ rcu_read_lock();
+- if (IS_ENABLED(CONFIG_IPV6) && info->mode & IP_TUNNEL_INFO_IPV6)
++ if (ipv6_mod_enabled() && info->mode & IP_TUNNEL_INFO_IPV6)
+ err = bareudp6_xmit_skb(skb, dev, bareudp, info);
+ else
+ err = bareudp_xmit_skb(skb, dev, bareudp, info);
+@@ -471,7 +472,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev,
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+
+- if (!IS_ENABLED(CONFIG_IPV6) || ip_tunnel_info_af(info) == AF_INET) {
++ if (!ipv6_mod_enabled() || ip_tunnel_info_af(info) == AF_INET) {
+ struct rtable *rt;
+ __be32 saddr;
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 6006c2e8fa2bc..ff6d4e74a186a 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -87,8 +87,9 @@ static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
+ static u16 ad_ticks_per_sec;
+ static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
+
+-static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
+- MULTICAST_LACPDU_ADDR;
++const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = {
++ 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02
++};
+
+ /* ================= main 802.3ad protocol functions ================== */
+ static int ad_lacpdu_send(struct port *port);
+@@ -225,7 +226,7 @@ static inline int __check_agg_selection_timer(struct port *port)
+ if (bond == NULL)
+ return 0;
+
+- return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0;
++ return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0;
+ }
+
+ /**
+@@ -1021,8 +1022,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
+ if (port->aggregator &&
+ port->aggregator->is_active &&
+ !__port_is_enabled(port)) {
+-
+ __enable_port(port);
++ *update_slave_arr = true;
+ }
+ }
+ break;
+@@ -1538,6 +1539,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
+ slave_err(bond->dev, port->slave->dev,
+ "Port %d did not find a suitable aggregator\n",
+ port->actor_port_number);
++ return;
+ }
+ }
+ /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
+@@ -1779,6 +1781,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
+ port = port->next_port_in_aggregator) {
+ __enable_port(port);
+ }
++ *update_slave_arr = true;
+ }
+ }
+
+@@ -1994,7 +1997,7 @@ static void ad_marker_response_received(struct bond_marker *marker,
+ */
+ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
+ {
+- BOND_AD_INFO(bond).agg_select_timer = timeout;
++ atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout);
+ }
+
+ /**
+@@ -2006,30 +2009,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
+ */
+ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)
+ {
+- /* check that the bond is not initialized yet */
+- if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr),
+- bond->dev->dev_addr)) {
+-
+- BOND_AD_INFO(bond).aggregator_identifier = 0;
+-
+- BOND_AD_INFO(bond).system.sys_priority =
+- bond->params.ad_actor_sys_prio;
+- if (is_zero_ether_addr(bond->params.ad_actor_system))
+- BOND_AD_INFO(bond).system.sys_mac_addr =
+- *((struct mac_addr *)bond->dev->dev_addr);
+- else
+- BOND_AD_INFO(bond).system.sys_mac_addr =
+- *((struct mac_addr *)bond->params.ad_actor_system);
++ BOND_AD_INFO(bond).aggregator_identifier = 0;
++ BOND_AD_INFO(bond).system.sys_priority =
++ bond->params.ad_actor_sys_prio;
++ if (is_zero_ether_addr(bond->params.ad_actor_system))
++ BOND_AD_INFO(bond).system.sys_mac_addr =
++ *((struct mac_addr *)bond->dev->dev_addr);
++ else
++ BOND_AD_INFO(bond).system.sys_mac_addr =
++ *((struct mac_addr *)bond->params.ad_actor_system);
+
+- /* initialize how many times this module is called in one
+- * second (should be about every 100ms)
+- */
+- ad_ticks_per_sec = tick_resolution;
++ /* initialize how many times this module is called in one
++ * second (should be about every 100ms)
++ */
++ ad_ticks_per_sec = tick_resolution;
+
+- bond_3ad_initiate_agg_selection(bond,
+- AD_AGGREGATOR_SELECTION_TIMER *
+- ad_ticks_per_sec);
+- }
++ bond_3ad_initiate_agg_selection(bond,
++ AD_AGGREGATOR_SELECTION_TIMER *
++ ad_ticks_per_sec);
+ }
+
+ /**
+@@ -2227,7 +2224,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
+ temp_aggregator->num_of_ports--;
+ if (__agg_active_ports(temp_aggregator) == 0) {
+ select_new_active_agg = temp_aggregator->is_active;
+- ad_clear_agg(temp_aggregator);
++ if (temp_aggregator->num_of_ports == 0)
++ ad_clear_agg(temp_aggregator);
+ if (select_new_active_agg) {
+ slave_info(bond->dev, slave->dev, "Removing an active aggregator\n");
+ /* select new active aggregator */
+@@ -2277,6 +2275,28 @@ void bond_3ad_update_ad_actor_settings(struct bonding *bond)
+ spin_unlock_bh(&bond->mode_lock);
+ }
+
++/**
++ * bond_agg_timer_advance - advance agg_select_timer
++ * @bond: bonding structure
++ *
++ * Return true when agg_select_timer reaches 0.
++ */
++static bool bond_agg_timer_advance(struct bonding *bond)
++{
++ int val, nval;
++
++ while (1) {
++ val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer);
++ if (!val)
++ return false;
++ nval = val - 1;
++ if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer,
++ val, nval) == val)
++ break;
++ }
++ return nval == 0;
++}
++
+ /**
+ * bond_3ad_state_machine_handler - handle state machines timeout
+ * @work: work context to fetch bonding struct to work on from
+@@ -2312,9 +2332,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
+ if (!bond_has_slaves(bond))
+ goto re_arm;
+
+- /* check if agg_select_timer timer after initialize is timed out */
+- if (BOND_AD_INFO(bond).agg_select_timer &&
+- !(--BOND_AD_INFO(bond).agg_select_timer)) {
++ if (bond_agg_timer_advance(bond)) {
+ slave = bond_first_slave_rcu(bond);
+ port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL;
+
+diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
+index 7d3752cbf761d..b29393831a302 100644
+--- a/drivers/net/bonding/bond_alb.c
++++ b/drivers/net/bonding/bond_alb.c
+@@ -657,10 +657,10 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
+ return NULL;
+ arp = (struct arp_pkt *)skb_network_header(skb);
+
+- /* Don't modify or load balance ARPs that do not originate locally
+- * (e.g.,arrive via a bridge).
++ /* Don't modify or load balance ARPs that do not originate
++ * from the bond itself or a VLAN directly above the bond.
+ */
+- if (!bond_slave_has_mac_rx(bond, arp->mac_src))
++ if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
+ return NULL;
+
+ if (arp->op_code == htons(ARPOP_REPLY)) {
+@@ -1281,12 +1281,12 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
+ return res;
+
+ if (rlb_enabled) {
+- bond->alb_info.rlb_enabled = 1;
+ res = rlb_initialize(bond);
+ if (res) {
+ tlb_deinitialize(bond);
+ return res;
+ }
++ bond->alb_info.rlb_enabled = 1;
+ } else {
+ bond->alb_info.rlb_enabled = 0;
+ }
+@@ -1502,14 +1502,14 @@ void bond_alb_monitor(struct work_struct *work)
+ struct slave *slave;
+
+ if (!bond_has_slaves(bond)) {
+- bond_info->tx_rebalance_counter = 0;
++ atomic_set(&bond_info->tx_rebalance_counter, 0);
+ bond_info->lp_counter = 0;
+ goto re_arm;
+ }
+
+ rcu_read_lock();
+
+- bond_info->tx_rebalance_counter++;
++ atomic_inc(&bond_info->tx_rebalance_counter);
+ bond_info->lp_counter++;
+
+ /* send learning packets */
+@@ -1531,7 +1531,7 @@ void bond_alb_monitor(struct work_struct *work)
+ }
+
+ /* rebalance tx traffic */
+- if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
++ if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
+ tlb_clear_slave(bond, slave, 1);
+ if (slave == rcu_access_pointer(bond->curr_active_slave)) {
+@@ -1541,7 +1541,7 @@ void bond_alb_monitor(struct work_struct *work)
+ bond_info->unbalanced_load = 0;
+ }
+ }
+- bond_info->tx_rebalance_counter = 0;
++ atomic_set(&bond_info->tx_rebalance_counter, 0);
+ }
+
+ if (bond_info->rlb_enabled) {
+@@ -1611,7 +1611,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
+ tlb_init_slave(slave);
+
+ /* order a rebalance ASAP */
+- bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
++ atomic_set(&bond->alb_info.tx_rebalance_counter,
++ BOND_TLB_REBALANCE_TICKS);
+
+ if (bond->alb_info.rlb_enabled)
+ bond->alb_info.rlb_rebalance = 1;
+@@ -1648,7 +1649,8 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
+ rlb_clear_slave(bond, slave);
+ } else if (link == BOND_LINK_UP) {
+ /* order a rebalance ASAP */
+- bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
++ atomic_set(&bond_info->tx_rebalance_counter,
++ BOND_TLB_REBALANCE_TICKS);
+ if (bond->alb_info.rlb_enabled) {
+ bond->alb_info.rlb_rebalance = 1;
+ /* If the updelay module parameter is smaller than the
+diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
+index 4f9b4a18c74cd..5940945266489 100644
+--- a/drivers/net/bonding/bond_debugfs.c
++++ b/drivers/net/bonding/bond_debugfs.c
+@@ -76,7 +76,7 @@ void bond_debug_reregister(struct bonding *bond)
+
+ d = debugfs_rename(bonding_debug_root, bond->debug_dir,
+ bonding_debug_root, bond->dev->name);
+- if (d) {
++ if (!IS_ERR(d)) {
+ bond->debug_dir = d;
+ } else {
+ netdev_warn(bond->dev, "failed to reregister, so just unregister old one\n");
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index 77dc79a7f5748..e64c652b78f03 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -862,12 +862,8 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,
+ dev_uc_unsync(slave_dev, bond_dev);
+ dev_mc_unsync(slave_dev, bond_dev);
+
+- if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+- /* del lacpdu mc addr from mc list */
+- u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+-
+- dev_mc_del(slave_dev, lacpdu_multicast);
+- }
++ if (BOND_MODE(bond) == BOND_MODE_8023AD)
++ dev_mc_del(slave_dev, lacpdu_mcast_addr);
+ }
+
+ /*--------------------------- Active slave change ---------------------------*/
+@@ -887,7 +883,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
+ if (bond->dev->flags & IFF_ALLMULTI)
+ dev_set_allmulti(old_active->dev, -1);
+
+- bond_hw_addr_flush(bond->dev, old_active->dev);
++ if (bond->dev->flags & IFF_UP)
++ bond_hw_addr_flush(bond->dev, old_active->dev);
+ }
+
+ if (new_active) {
+@@ -898,10 +895,12 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
+ if (bond->dev->flags & IFF_ALLMULTI)
+ dev_set_allmulti(new_active->dev, 1);
+
+- netif_addr_lock_bh(bond->dev);
+- dev_uc_sync(new_active->dev, bond->dev);
+- dev_mc_sync(new_active->dev, bond->dev);
+- netif_addr_unlock_bh(bond->dev);
++ if (bond->dev->flags & IFF_UP) {
++ netif_addr_lock_bh(bond->dev);
++ dev_uc_sync(new_active->dev, bond->dev);
++ dev_mc_sync(new_active->dev, bond->dev);
++ netif_addr_unlock_bh(bond->dev);
++ }
+ }
+ }
+
+@@ -1096,9 +1095,6 @@ static bool bond_should_notify_peers(struct bonding *bond)
+ slave = rcu_dereference(bond->curr_active_slave);
+ rcu_read_unlock();
+
+- netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n",
+- slave ? slave->dev->name : "NULL");
+-
+ if (!slave || !bond->send_peer_notif ||
+ bond->send_peer_notif %
+ max(1, bond->params.peer_notif_delay) != 0 ||
+@@ -1106,6 +1102,9 @@ static bool bond_should_notify_peers(struct bonding *bond)
+ test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
+ return false;
+
++ netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n",
++ slave ? slave->dev->name : "NULL");
++
+ return true;
+ }
+
+@@ -1483,6 +1482,11 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
+
+ memcpy(bond_dev->broadcast, slave_dev->broadcast,
+ slave_dev->addr_len);
++
++ if (slave_dev->flags & IFF_POINTOPOINT) {
++ bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
++ bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
++ }
+ }
+
+ /* On bonding slaves other than the currently active slave, suppress
+@@ -1745,6 +1749,20 @@ void bond_lower_state_changed(struct slave *slave)
+ slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg); \
+ } while (0)
+
++/* The bonding driver uses ether_setup() to convert a master bond device
++ * to ARPHRD_ETHER, that resets the target netdevice's flags so we always
++ * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE and IFF_UP
++ * if they were set
++ */
++static void bond_ether_setup(struct net_device *bond_dev)
++{
++ unsigned int flags = bond_dev->flags & (IFF_SLAVE | IFF_UP);
++
++ ether_setup(bond_dev);
++ bond_dev->flags |= IFF_MASTER | flags;
++ bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
++}
++
+ /* enslave device <slave> to bond device <master> */
+ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
+@@ -1836,10 +1854,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+
+ if (slave_dev->type != ARPHRD_ETHER)
+ bond_setup_by_slave(bond_dev, slave_dev);
+- else {
+- ether_setup(bond_dev);
+- bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+- }
++ else
++ bond_ether_setup(bond_dev);
+
+ call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
+ bond_dev);
+@@ -2134,16 +2150,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ }
+ }
+
+- netif_addr_lock_bh(bond_dev);
+- dev_mc_sync_multiple(slave_dev, bond_dev);
+- dev_uc_sync_multiple(slave_dev, bond_dev);
+- netif_addr_unlock_bh(bond_dev);
++ if (bond_dev->flags & IFF_UP) {
++ netif_addr_lock_bh(bond_dev);
++ dev_mc_sync_multiple(slave_dev, bond_dev);
++ dev_uc_sync_multiple(slave_dev, bond_dev);
++ netif_addr_unlock_bh(bond_dev);
+
+- if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+- /* add lacpdu mc addr to mc list */
+- u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+-
+- dev_mc_add(slave_dev, lacpdu_multicast);
++ if (BOND_MODE(bond) == BOND_MODE_8023AD)
++ dev_mc_add(slave_dev, lacpdu_mcast_addr);
+ }
+ }
+
+@@ -2259,9 +2273,7 @@ err_undo_flags:
+ eth_hw_addr_random(bond_dev);
+ if (bond_dev->type != ARPHRD_ETHER) {
+ dev_close(bond_dev);
+- ether_setup(bond_dev);
+- bond_dev->flags |= IFF_MASTER;
+- bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
++ bond_ether_setup(bond_dev);
+ }
+ }
+
+@@ -2377,10 +2389,9 @@ static int __bond_release_one(struct net_device *bond_dev,
+ bond_select_active_slave(bond);
+ }
+
+- if (!bond_has_slaves(bond)) {
+- bond_set_carrier(bond);
++ bond_set_carrier(bond);
++ if (!bond_has_slaves(bond))
+ eth_hw_addr_random(bond_dev);
+- }
+
+ unblock_netpoll_tx();
+ synchronize_rcu();
+@@ -2416,7 +2427,8 @@ static int __bond_release_one(struct net_device *bond_dev,
+ if (old_flags & IFF_ALLMULTI)
+ dev_set_allmulti(slave_dev, -1);
+
+- bond_hw_addr_flush(bond_dev, slave_dev);
++ if (old_flags & IFF_UP)
++ bond_hw_addr_flush(bond_dev, slave_dev);
+ }
+
+ slave_disable_netpoll(slave);
+@@ -2502,12 +2514,21 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
+ /* called with rcu_read_lock() */
+ static int bond_miimon_inspect(struct bonding *bond)
+ {
++ bool ignore_updelay = false;
+ int link_state, commit = 0;
+ struct list_head *iter;
+ struct slave *slave;
+- bool ignore_updelay;
+
+- ignore_updelay = !rcu_dereference(bond->curr_active_slave);
++ if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
++ ignore_updelay = !rcu_dereference(bond->curr_active_slave);
++ } else {
++ struct bond_up_slave *usable_slaves;
++
++ usable_slaves = rcu_dereference(bond->usable_slaves);
++
++ if (usable_slaves && usable_slaves->count == 0)
++ ignore_updelay = true;
++ }
+
+ bond_for_each_slave_rcu(bond, slave, iter) {
+ bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
+@@ -3129,8 +3150,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
+ * when the source ip is 0, so don't take the link down
+ * if we don't know our ip yet
+ */
+- if (!bond_time_in_interval(bond, trans_start, 2) ||
+- !bond_time_in_interval(bond, slave->last_rx, 2)) {
++ if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
++ !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
+
+ bond_propose_link_state(slave, BOND_LINK_DOWN);
+ slave_state_changed = 1;
+@@ -3224,7 +3245,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
+
+ /* Backup slave is down if:
+ * - No current_arp_slave AND
+- * - more than 3*delta since last receive AND
++ * - more than (missed_max+1)*delta since last receive AND
+ * - the bond has an IP address
+ *
+ * Note: a non-null current_arp_slave indicates
+@@ -3236,20 +3257,20 @@ static int bond_ab_arp_inspect(struct bonding *bond)
+ */
+ if (!bond_is_active_slave(slave) &&
+ !rcu_access_pointer(bond->current_arp_slave) &&
+- !bond_time_in_interval(bond, last_rx, 3)) {
++ !bond_time_in_interval(bond, last_rx, bond->params.missed_max + 1)) {
+ bond_propose_link_state(slave, BOND_LINK_DOWN);
+ commit++;
+ }
+
+ /* Active slave is down if:
+- * - more than 2*delta since transmitting OR
+- * - (more than 2*delta since receive AND
++ * - more than missed_max*delta since transmitting OR
++ * - (more than missed_max*delta since receive AND
+ * the bond has an IP address)
+ */
+ trans_start = dev_trans_start(slave->dev);
+ if (bond_is_active_slave(slave) &&
+- (!bond_time_in_interval(bond, trans_start, 2) ||
+- !bond_time_in_interval(bond, last_rx, 2))) {
++ (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
++ !bond_time_in_interval(bond, last_rx, bond->params.missed_max))) {
+ bond_propose_link_state(slave, BOND_LINK_DOWN);
+ commit++;
+ }
+@@ -3475,9 +3496,11 @@ re_arm:
+ if (!rtnl_trylock())
+ return;
+
+- if (should_notify_peers)
++ if (should_notify_peers) {
++ bond->send_peer_notif--;
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+ bond->dev);
++ }
+ if (should_notify_rtnl) {
+ bond_slave_state_notify(bond);
+ bond_slave_link_notify(bond);
+@@ -3635,7 +3658,11 @@ static int bond_slave_netdev_event(unsigned long event,
+ unblock_netpoll_tx();
+ break;
+ case NETDEV_FEAT_CHANGE:
+- bond_compute_features(bond);
++ if (!bond->notifier_ctx) {
++ bond->notifier_ctx = true;
++ bond_compute_features(bond);
++ bond->notifier_ctx = false;
++ }
+ break;
+ case NETDEV_RESEND_IGMP:
+ /* Propagate to master device */
+@@ -3818,14 +3845,19 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const v
+ return true;
+ }
+
+-static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
++static u32 bond_ip_hash(u32 hash, struct flow_keys *flow, int xmit_policy)
+ {
+ hash ^= (__force u32)flow_get_u32_dst(flow) ^
+ (__force u32)flow_get_u32_src(flow);
+ hash ^= (hash >> 16);
+ hash ^= (hash >> 8);
++
+ /* discard lowest hash bit to deal with the common even ports pattern */
+- return hash >> 1;
++ if (xmit_policy == BOND_XMIT_POLICY_LAYER34 ||
++ xmit_policy == BOND_XMIT_POLICY_ENCAP34)
++ return hash >> 1;
++
++ return hash;
+ }
+
+ /* Generate hash based on xmit policy. If @skb is given it is used to linearize
+@@ -3855,7 +3887,7 @@ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const voi
+ memcpy(&hash, &flow.ports.ports, sizeof(hash));
+ }
+
+- return bond_ip_hash(hash, &flow);
++ return bond_ip_hash(hash, &flow, bond->params.xmit_policy);
+ }
+
+ /**
+@@ -3872,8 +3904,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+ skb->l4_hash)
+ return skb->hash;
+
+- return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+- skb->mac_header, skb->network_header,
++ return __bond_xmit_hash(bond, skb, skb->data, skb->protocol,
++ 0, skb_network_offset(skb),
+ skb_headlen(skb));
+ }
+
+@@ -3926,6 +3958,12 @@ static int bond_open(struct net_device *bond_dev)
+ struct list_head *iter;
+ struct slave *slave;
+
++ if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) {
++ bond->rr_tx_counter = alloc_percpu(u32);
++ if (!bond->rr_tx_counter)
++ return -ENOMEM;
++ }
++
+ /* reset slave->backup and slave->inactive */
+ if (bond_has_slaves(bond)) {
+ bond_for_each_slave(bond, slave, iter) {
+@@ -3963,6 +4001,9 @@ static int bond_open(struct net_device *bond_dev)
+ /* register to receive LACPDUs */
+ bond->recv_probe = bond_3ad_lacpdu_recv;
+ bond_3ad_initiate_agg_selection(bond, 1);
++
++ bond_for_each_slave(bond, slave, iter)
++ dev_mc_add(slave->dev, lacpdu_mcast_addr);
+ }
+
+ if (bond_mode_can_use_xmit_hash(bond))
+@@ -3974,6 +4015,7 @@ static int bond_open(struct net_device *bond_dev)
+ static int bond_close(struct net_device *bond_dev)
+ {
+ struct bonding *bond = netdev_priv(bond_dev);
++ struct slave *slave;
+
+ bond_work_cancel_all(bond);
+ bond->send_peer_notif = 0;
+@@ -3981,6 +4023,19 @@ static int bond_close(struct net_device *bond_dev)
+ bond_alb_deinitialize(bond);
+ bond->recv_probe = NULL;
+
++ if (bond_uses_primary(bond)) {
++ rcu_read_lock();
++ slave = rcu_dereference(bond->curr_active_slave);
++ if (slave)
++ bond_hw_addr_flush(bond_dev, slave->dev);
++ rcu_read_unlock();
++ } else {
++ struct list_head *iter;
++
++ bond_for_each_slave(bond, slave, iter)
++ bond_hw_addr_flush(bond_dev, slave->dev);
++ }
++
+ return 0;
+ }
+
+@@ -4843,25 +4898,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave = NULL;
+ struct list_head *iter;
++ bool xmit_suc = false;
++ bool skb_used = false;
+
+ bond_for_each_slave_rcu(bond, slave, iter) {
+- if (bond_is_last_slave(bond, slave))
+- break;
+- if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
+- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
++ struct sk_buff *skb2;
++
++ if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
++ continue;
+
++ if (bond_is_last_slave(bond, slave)) {
++ skb2 = skb;
++ skb_used = true;
++ } else {
++ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2) {
+ net_err_ratelimited("%s: Error: %s: skb_clone() failed\n",
+ bond_dev->name, __func__);
+ continue;
+ }
+- bond_dev_queue_xmit(bond, skb2, slave->dev);
+ }
++
++ if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK)
++ xmit_suc = true;
+ }
+- if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
+- return bond_dev_queue_xmit(bond, skb, slave->dev);
+
+- return bond_tx_drop(bond_dev, skb);
++ if (!skb_used)
++ dev_kfree_skb_any(skb);
++
++ if (xmit_suc)
++ return NETDEV_TX_OK;
++
++ atomic_long_inc(&bond_dev->tx_dropped);
++ return NET_XMIT_DROP;
+ }
+
+ /*------------------------- Device initialization ---------------------------*/
+@@ -4999,7 +5068,7 @@ static u32 bond_sk_hash_l34(struct sock *sk)
+ /* L4 */
+ memcpy(&hash, &flow.ports.ports, sizeof(hash));
+ /* L3 */
+- return bond_ip_hash(hash, &flow);
++ return bond_ip_hash(hash, &flow, BOND_XMIT_POLICY_LAYER34);
+ }
+
+ static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond,
+@@ -5422,7 +5491,9 @@ void bond_setup(struct net_device *bond_dev)
+
+ bond_dev->hw_features = BOND_VLAN_FEATURES |
+ NETIF_F_HW_VLAN_CTAG_RX |
+- NETIF_F_HW_VLAN_CTAG_FILTER;
++ NETIF_F_HW_VLAN_CTAG_FILTER |
++ NETIF_F_HW_VLAN_STAG_RX |
++ NETIF_F_HW_VLAN_STAG_FILTER;
+
+ bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+ bond_dev->features |= bond_dev->hw_features;
+@@ -5822,6 +5893,7 @@ static int bond_check_params(struct bond_params *params)
+ params->arp_interval = arp_interval;
+ params->arp_validate = arp_validate_value;
+ params->arp_all_targets = arp_all_targets_value;
++ params->missed_max = 2;
+ params->updelay = updelay;
+ params->downdelay = downdelay;
+ params->peer_notif_delay = 0;
+@@ -5872,14 +5944,7 @@ static int bond_init(struct net_device *bond_dev)
+ if (!bond->wq)
+ return -ENOMEM;
+
+- if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN) {
+- bond->rr_tx_counter = alloc_percpu(u32);
+- if (!bond->rr_tx_counter) {
+- destroy_workqueue(bond->wq);
+- bond->wq = NULL;
+- return -ENOMEM;
+- }
+- }
++ bond->notifier_ctx = false;
+
+ spin_lock_init(&bond->stats_lock);
+ netdev_lockdep_set_classes(bond_dev);
+diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
+index 5d54e11d18fa5..7398accd46805 100644
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -79,6 +79,11 @@ nla_put_failure:
+ return -EMSGSIZE;
+ }
+
++/* Limit the max delay range to 300s */
++static struct netlink_range_validation delay_range = {
++ .max = 300000,
++};
++
+ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
+ [IFLA_BOND_MODE] = { .type = NLA_U8 },
+ [IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 },
+@@ -109,7 +114,8 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
+ [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY,
+ .len = ETH_ALEN },
+ [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 },
+- [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 },
++ [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range),
++ [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 },
+ };
+
+ static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
+@@ -453,6 +459,15 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
+ return err;
+ }
+
++ if (data[IFLA_BOND_MISSED_MAX]) {
++ int missed_max = nla_get_u8(data[IFLA_BOND_MISSED_MAX]);
++
++ bond_opt_initval(&newval, missed_max);
++ err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval);
++ if (err)
++ return err;
++ }
++
+ return 0;
+ }
+
+@@ -515,6 +530,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
+ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */
+ nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */
+ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */
++ nla_total_size(sizeof(u8)) + /* IFLA_BOND_MISSED_MAX */
+ 0;
+ }
+
+@@ -650,6 +666,10 @@ static int bond_fill_info(struct sk_buff *skb,
+ bond->params.tlb_dynamic_lb))
+ goto nla_put_failure;
+
++ if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX,
++ bond->params.missed_max))
++ goto nla_put_failure;
++
+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+ struct ad_info info;
+
+diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
+index a8fde3bc458f6..5f883a18bbabd 100644
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -78,6 +78,8 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
+ static int bond_option_ad_user_port_key_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
++static int bond_option_missed_max_set(struct bonding *bond,
++ const struct bond_opt_value *newval);
+
+
+ static const struct bond_opt_value bond_mode_tbl[] = {
+@@ -163,6 +165,12 @@ static const struct bond_opt_value bond_num_peer_notif_tbl[] = {
+ { NULL, -1, 0}
+ };
+
++static const struct bond_opt_value bond_peer_notif_delay_tbl[] = {
++ { "off", 0, 0},
++ { "maxval", 300000, BOND_VALFLAG_MAX},
++ { NULL, -1, 0}
++};
++
+ static const struct bond_opt_value bond_primary_reselect_tbl[] = {
+ { "always", BOND_PRI_RESELECT_ALWAYS, BOND_VALFLAG_DEFAULT},
+ { "better", BOND_PRI_RESELECT_BETTER, 0},
+@@ -213,6 +221,13 @@ static const struct bond_opt_value bond_ad_user_port_key_tbl[] = {
+ { NULL, -1, 0},
+ };
+
++static const struct bond_opt_value bond_missed_max_tbl[] = {
++ { "minval", 1, BOND_VALFLAG_MIN},
++ { "maxval", 255, BOND_VALFLAG_MAX},
++ { "default", 2, BOND_VALFLAG_DEFAULT},
++ { NULL, -1, 0},
++};
++
+ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
+ [BOND_OPT_MODE] = {
+ .id = BOND_OPT_MODE,
+@@ -270,6 +285,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
+ .values = bond_intmax_tbl,
+ .set = bond_option_arp_interval_set
+ },
++ [BOND_OPT_MISSED_MAX] = {
++ .id = BOND_OPT_MISSED_MAX,
++ .name = "arp_missed_max",
++ .desc = "Maximum number of missed ARP interval",
++ .unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) |
++ BIT(BOND_MODE_ALB),
++ .values = bond_missed_max_tbl,
++ .set = bond_option_missed_max_set
++ },
+ [BOND_OPT_ARP_TARGETS] = {
+ .id = BOND_OPT_ARP_TARGETS,
+ .name = "arp_ip_target",
+@@ -449,7 +473,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
+ .id = BOND_OPT_PEER_NOTIF_DELAY,
+ .name = "peer_notif_delay",
+ .desc = "Delay between each peer notification on failover event, in milliseconds",
+- .values = bond_intmax_tbl,
++ .values = bond_peer_notif_delay_tbl,
+ .set = bond_option_peer_notif_delay_set
+ }
+ };
+@@ -1186,6 +1210,16 @@ static int bond_option_arp_all_targets_set(struct bonding *bond,
+ return 0;
+ }
+
++static int bond_option_missed_max_set(struct bonding *bond,
++ const struct bond_opt_value *newval)
++{
++ netdev_dbg(bond->dev, "Setting missed max to %s (%llu)\n",
++ newval->string, newval->value);
++ bond->params.missed_max = newval->value;
++
++ return 0;
++}
++
+ static int bond_option_primary_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+ {
+@@ -1526,7 +1560,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
+ mac = (u8 *)&newval->value;
+ }
+
+- if (!is_valid_ether_addr(mac))
++ if (is_multicast_ether_addr(mac))
+ goto err;
+
+ netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac);
+diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
+index f3e3bfd72556c..2ec11af5f0cce 100644
+--- a/drivers/net/bonding/bond_procfs.c
++++ b/drivers/net/bonding/bond_procfs.c
+@@ -115,6 +115,8 @@ static void bond_info_show_master(struct seq_file *seq)
+
+ seq_printf(seq, "ARP Polling Interval (ms): %d\n",
+ bond->params.arp_interval);
++ seq_printf(seq, "ARP Missed Max: %u\n",
++ bond->params.missed_max);
+
+ seq_printf(seq, "ARP IP target/s (n.n.n.n form):");
+
+diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
+index b9e9842fed94e..22aa22f4e0882 100644
+--- a/drivers/net/bonding/bond_sysfs.c
++++ b/drivers/net/bonding/bond_sysfs.c
+@@ -303,6 +303,18 @@ static ssize_t bonding_show_arp_targets(struct device *d,
+ static DEVICE_ATTR(arp_ip_target, 0644,
+ bonding_show_arp_targets, bonding_sysfs_store_option);
+
++/* Show the arp missed max. */
++static ssize_t bonding_show_missed_max(struct device *d,
++ struct device_attribute *attr,
++ char *buf)
++{
++ struct bonding *bond = to_bond(d);
++
++ return sprintf(buf, "%u\n", bond->params.missed_max);
++}
++static DEVICE_ATTR(arp_missed_max, 0644,
++ bonding_show_missed_max, bonding_sysfs_store_option);
++
+ /* Show the up and down delays. */
+ static ssize_t bonding_show_downdelay(struct device *d,
+ struct device_attribute *attr,
+@@ -779,6 +791,7 @@ static struct attribute *per_bond_attrs[] = {
+ &dev_attr_ad_actor_sys_prio.attr,
+ &dev_attr_ad_actor_system.attr,
+ &dev_attr_ad_user_port_key.attr,
++ &dev_attr_arp_missed_max.attr,
+ NULL,
+ };
+
+diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
+index fd07561da0348..6a6cdd0bb2585 100644
+--- a/drivers/net/bonding/bond_sysfs_slave.c
++++ b/drivers/net/bonding/bond_sysfs_slave.c
+@@ -108,15 +108,15 @@ static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf)
+ }
+ static SLAVE_ATTR_RO(ad_partner_oper_port_state);
+
+-static const struct slave_attribute *slave_attrs[] = {
+- &slave_attr_state,
+- &slave_attr_mii_status,
+- &slave_attr_link_failure_count,
+- &slave_attr_perm_hwaddr,
+- &slave_attr_queue_id,
+- &slave_attr_ad_aggregator_id,
+- &slave_attr_ad_actor_oper_port_state,
+- &slave_attr_ad_partner_oper_port_state,
++static const struct attribute *slave_attrs[] = {
++ &slave_attr_state.attr,
++ &slave_attr_mii_status.attr,
++ &slave_attr_link_failure_count.attr,
++ &slave_attr_perm_hwaddr.attr,
++ &slave_attr_queue_id.attr,
++ &slave_attr_ad_aggregator_id.attr,
++ &slave_attr_ad_actor_oper_port_state.attr,
++ &slave_attr_ad_partner_oper_port_state.attr,
+ NULL
+ };
+
+@@ -137,24 +137,10 @@ const struct sysfs_ops slave_sysfs_ops = {
+
+ int bond_sysfs_slave_add(struct slave *slave)
+ {
+- const struct slave_attribute **a;
+- int err;
+-
+- for (a = slave_attrs; *a; ++a) {
+- err = sysfs_create_file(&slave->kobj, &((*a)->attr));
+- if (err) {
+- kobject_put(&slave->kobj);
+- return err;
+- }
+- }
+-
+- return 0;
++ return sysfs_create_files(&slave->kobj, slave_attrs);
+ }
+
+ void bond_sysfs_slave_del(struct slave *slave)
+ {
+- const struct slave_attribute **a;
+-
+- for (a = slave_attrs; *a; ++a)
+- sysfs_remove_file(&slave->kobj, &((*a)->attr));
++ sysfs_remove_files(&slave->kobj, slave_attrs);
+ }
+diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
+index 91230894692d2..da87de02b2fcc 100644
+--- a/drivers/net/caif/caif_virtio.c
++++ b/drivers/net/caif/caif_virtio.c
+@@ -721,13 +721,21 @@ static int cfv_probe(struct virtio_device *vdev)
+ /* Carrier is off until netdevice is opened */
+ netif_carrier_off(netdev);
+
++ /* serialize netdev register + virtio_device_ready() with ndo_open() */
++ rtnl_lock();
++
+ /* register Netdev */
+- err = register_netdev(netdev);
++ err = register_netdevice(netdev);
+ if (err) {
++ rtnl_unlock();
+ dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err);
+ goto err;
+ }
+
++ virtio_device_ready(vdev);
++
++ rtnl_unlock();
++
+ debugfs_init(cfv);
+
+ return 0;
+diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
+index 08b6efa7a1a77..ae55eaca7b5e1 100644
+--- a/drivers/net/can/c_can/c_can.h
++++ b/drivers/net/can/c_can/c_can.h
+@@ -236,9 +236,22 @@ static inline u8 c_can_get_tx_tail(const struct c_can_tx_ring *ring)
+ return ring->tail & (ring->obj_num - 1);
+ }
+
+-static inline u8 c_can_get_tx_free(const struct c_can_tx_ring *ring)
++static inline u8 c_can_get_tx_free(const struct c_can_priv *priv,
++ const struct c_can_tx_ring *ring)
+ {
+- return ring->obj_num - (ring->head - ring->tail);
++ u8 head = c_can_get_tx_head(ring);
++ u8 tail = c_can_get_tx_tail(ring);
++
++ if (priv->type == BOSCH_D_CAN)
++ return ring->obj_num - (ring->head - ring->tail);
++
++ /* This is not a FIFO. C/D_CAN sends out the buffers
++ * prioritized. The lowest buffer number wins.
++ */
++ if (head < tail)
++ return 0;
++
++ return ring->obj_num - head;
+ }
+
+ #endif /* C_CAN_H */
+diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c
+index 52671d1ea17d5..e04d4e7cc8683 100644
+--- a/drivers/net/can/c_can/c_can_main.c
++++ b/drivers/net/can/c_can/c_can_main.c
+@@ -430,7 +430,7 @@ static void c_can_setup_receive_object(struct net_device *dev, int iface,
+ static bool c_can_tx_busy(const struct c_can_priv *priv,
+ const struct c_can_tx_ring *tx_ring)
+ {
+- if (c_can_get_tx_free(tx_ring) > 0)
++ if (c_can_get_tx_free(priv, tx_ring) > 0)
+ return false;
+
+ netif_stop_queue(priv->dev);
+@@ -438,7 +438,7 @@ static bool c_can_tx_busy(const struct c_can_priv *priv,
+ /* Memory barrier before checking tx_free (head and tail) */
+ smp_mb();
+
+- if (c_can_get_tx_free(tx_ring) == 0) {
++ if (c_can_get_tx_free(priv, tx_ring) == 0) {
+ netdev_dbg(priv->dev,
+ "Stopping tx-queue (tx_head=0x%08x, tx_tail=0x%08x, len=%d).\n",
+ tx_ring->head, tx_ring->tail,
+@@ -466,7 +466,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
+
+ idx = c_can_get_tx_head(tx_ring);
+ tx_ring->head++;
+- if (c_can_get_tx_free(tx_ring) == 0)
++ if (c_can_get_tx_free(priv, tx_ring) == 0)
+ netif_stop_queue(dev);
+
+ if (idx < c_can_get_tx_tail(tx_ring))
+@@ -751,7 +751,7 @@ static void c_can_do_tx(struct net_device *dev)
+ return;
+
+ tx_ring->tail += pkts;
+- if (c_can_get_tx_free(tx_ring)) {
++ if (c_can_get_tx_free(priv, tx_ring)) {
+ /* Make sure that anybody stopping the queue after
+ * this sees the new tx_ring->tail.
+ */
+@@ -764,8 +764,7 @@ static void c_can_do_tx(struct net_device *dev)
+ can_led_event(dev, CAN_LED_EVENT_TX);
+
+ tail = c_can_get_tx_tail(tx_ring);
+-
+- if (tail == 0) {
++ if (priv->type == BOSCH_D_CAN && tail == 0) {
+ u8 head = c_can_get_tx_head(tx_ring);
+
+ /* Start transmission for all cached messages */
+diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c
+index 194c86e0f340f..8f6dccd5a5879 100644
+--- a/drivers/net/can/cc770/cc770_isa.c
++++ b/drivers/net/can/cc770/cc770_isa.c
+@@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev)
+ if (err) {
+ dev_err(&pdev->dev,
+ "couldn't register device (err=%d)\n", err);
+- goto exit_unmap;
++ goto exit_free;
+ }
+
+ dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n",
+ priv->reg_base, dev->irq);
+ return 0;
+
+- exit_unmap:
++exit_free:
++ free_cc770dev(dev);
++exit_unmap:
+ if (mem[idx])
+ iounmap(base);
+- exit_release:
++exit_release:
+ if (mem[idx])
+ release_mem_region(mem[idx], iosize);
+ else
+ release_region(port[idx], iosize);
+- exit:
++exit:
+ return err;
+ }
+
+diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c
+index f49170eadd547..b1b5a82f08299 100644
+--- a/drivers/net/can/dev/bittiming.c
++++ b/drivers/net/can/dev/bittiming.c
+@@ -209,7 +209,7 @@ static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt,
+ const struct can_bittiming_const *btc)
+ {
+ struct can_priv *priv = netdev_priv(dev);
+- int tseg1, alltseg;
++ unsigned int tseg1, alltseg;
+ u64 brp64;
+
+ tseg1 = bt->prop_seg + bt->phase_seg1;
+diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
+index 80425636049d2..29e2beae3357b 100644
+--- a/drivers/net/can/dev/netlink.c
++++ b/drivers/net/can/dev/netlink.c
+@@ -76,7 +76,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
+ * directly via do_set_bitrate(). Bail out if neither
+ * is given.
+ */
+- if (!priv->bittiming_const && !priv->do_set_bittiming)
++ if (!priv->bittiming_const && !priv->do_set_bittiming &&
++ !priv->bitrate_const)
+ return -EOPNOTSUPP;
+
+ memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt));
+@@ -169,7 +170,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
+ * directly via do_set_bitrate(). Bail out if neither
+ * is given.
+ */
+- if (!priv->data_bittiming_const && !priv->do_set_data_bittiming)
++ if (!priv->data_bittiming_const && !priv->do_set_data_bittiming &&
++ !priv->data_bitrate_const)
+ return -EOPNOTSUPP;
+
+ memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]),
+diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
+index 7734229aa0788..837bca7347594 100644
+--- a/drivers/net/can/flexcan.c
++++ b/drivers/net/can/flexcan.c
+@@ -173,9 +173,9 @@
+
+ /* FLEXCAN interrupt flag register (IFLAG) bits */
+ /* Errata ERR005829 step7: Reserve first valid MB */
+-#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8
+-#define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0
+-#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP + 1)
++#define FLEXCAN_TX_MB_RESERVED_RX_FIFO 8
++#define FLEXCAN_TX_MB_RESERVED_RX_MAILBOX 0
++#define FLEXCAN_RX_MB_RX_MAILBOX_FIRST (FLEXCAN_TX_MB_RESERVED_RX_MAILBOX + 1)
+ #define FLEXCAN_IFLAG_MB(x) BIT_ULL(x)
+ #define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7)
+ #define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6)
+@@ -234,8 +234,8 @@
+ #define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3)
+ /* Disable non-correctable errors interrupt and freeze mode */
+ #define FLEXCAN_QUIRK_DISABLE_MECR BIT(4)
+-/* Use timestamp based offloading */
+-#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5)
++/* Use mailboxes (not FIFO) for RX path */
++#define FLEXCAN_QUIRK_USE_RX_MAILBOX BIT(5)
+ /* No interrupt for error passive */
+ #define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6)
+ /* default to BE register access */
+@@ -252,6 +252,12 @@
+ #define FLEXCAN_QUIRK_NR_IRQ_3 BIT(12)
+ /* Setup 16 mailboxes */
+ #define FLEXCAN_QUIRK_NR_MB_16 BIT(13)
++/* Device supports RX via mailboxes */
++#define FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX BIT(14)
++/* Device supports RTR reception via mailboxes */
++#define FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR BIT(15)
++/* Device supports RX via FIFO */
++#define FLEXCAN_QUIRK_SUPPPORT_RX_FIFO BIT(16)
+
+ /* Structure of the message buffer */
+ struct flexcan_mb {
+@@ -365,7 +371,7 @@ struct flexcan_priv {
+
+ struct clk *clk_ipg;
+ struct clk *clk_per;
+- const struct flexcan_devtype_data *devtype_data;
++ struct flexcan_devtype_data devtype_data;
+ struct regulator *reg_xceiver;
+ struct flexcan_stop_mode stm;
+
+@@ -382,59 +388,78 @@ struct flexcan_priv {
+
+ static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+- FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16,
++ FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16 |
++ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
+ };
+
+ static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
+ FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+- FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN,
++ FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
+ };
+
+ static const struct flexcan_devtype_data fsl_imx25_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
+- FLEXCAN_QUIRK_BROKEN_PERR_STATE,
++ FLEXCAN_QUIRK_BROKEN_PERR_STATE |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
+ };
+
+ static const struct flexcan_devtype_data fsl_imx28_devtype_data = {
+- .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE,
++ .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO,
+ };
+
+ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+- FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+- FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR,
++ FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
++ FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
+ };
+
+ static const struct flexcan_devtype_data fsl_imx8qm_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+- FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+- FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW,
++ FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
++ FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
+ };
+
+ static struct flexcan_devtype_data fsl_imx8mp_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+- FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
++ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_RX_MAILBOX |
+ FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR |
+- FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SUPPORT_ECC,
++ FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SUPPORT_ECC |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
+ };
+
+ static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+- FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
+- FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SUPPORT_ECC,
++ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_RX_MAILBOX |
++ FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SUPPORT_ECC |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
+ };
+
+ static const struct flexcan_devtype_data fsl_ls1021a_r2_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+- FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP,
++ FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_USE_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
+ };
+
+ static const struct flexcan_devtype_data fsl_lx2160a_r1_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+- FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_SUPPORT_FD |
+- FLEXCAN_QUIRK_SUPPORT_ECC,
++ FLEXCAN_QUIRK_USE_RX_MAILBOX | FLEXCAN_QUIRK_SUPPORT_FD |
++ FLEXCAN_QUIRK_SUPPORT_ECC |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR,
+ };
+
+ static const struct can_bittiming_const flexcan_bittiming_const = {
+@@ -596,7 +621,7 @@ static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv)
+ priv->write(reg_mcr, &regs->mcr);
+
+ /* enable stop request */
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
+ ret = flexcan_stop_mode_enable_scfw(priv, true);
+ if (ret < 0)
+ return ret;
+@@ -615,7 +640,7 @@ static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv)
+ int ret;
+
+ /* remove stop request */
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) {
+ ret = flexcan_stop_mode_enable_scfw(priv, false);
+ if (ret < 0)
+ return ret;
+@@ -1011,14 +1036,9 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
+ u32 reg_ctrl, reg_id, reg_iflag1;
+ int i;
+
+- if (unlikely(drop)) {
+- skb = ERR_PTR(-ENOBUFS);
+- goto mark_as_read;
+- }
+-
+ mb = flexcan_get_mb(priv, n);
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
+ u32 code;
+
+ do {
+@@ -1044,6 +1064,11 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
+ reg_ctrl = priv->read(&mb->can_ctrl);
+ }
+
++ if (unlikely(drop)) {
++ skb = ERR_PTR(-ENOBUFS);
++ goto mark_as_read;
++ }
++
+ if (reg_ctrl & FLEXCAN_MB_CNT_EDL)
+ skb = alloc_canfd_skb(offload->dev, &cfd);
+ else
+@@ -1083,7 +1108,7 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
+ }
+
+ mark_as_read:
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX)
+ flexcan_write64(priv, FLEXCAN_IFLAG_MB(n), &regs->iflag1);
+ else
+ priv->write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, &regs->iflag1);
+@@ -1109,7 +1134,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
+ enum can_state last_state = priv->can.state;
+
+ /* reception interrupt */
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
+ u64 reg_iflag_rx;
+ int ret;
+
+@@ -1169,7 +1194,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
+
+ /* state change interrupt or broken error state quirk fix is enabled */
+ if ((reg_esr & FLEXCAN_ESR_ERR_STATE) ||
+- (priv->devtype_data->quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE |
++ (priv->devtype_data.quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE |
+ FLEXCAN_QUIRK_BROKEN_PERR_STATE)))
+ flexcan_irq_state(dev, reg_esr);
+
+@@ -1191,11 +1216,11 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
+ * (1): enabled if FLEXCAN_QUIRK_BROKEN_WERR_STATE is enabled
+ */
+ if ((last_state != priv->can.state) &&
+- (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_PERR_STATE) &&
++ (priv->devtype_data.quirks & FLEXCAN_QUIRK_BROKEN_PERR_STATE) &&
+ !(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)) {
+ switch (priv->can.state) {
+ case CAN_STATE_ERROR_ACTIVE:
+- if (priv->devtype_data->quirks &
++ if (priv->devtype_data.quirks &
+ FLEXCAN_QUIRK_BROKEN_WERR_STATE)
+ flexcan_error_irq_enable(priv);
+ else
+@@ -1423,26 +1448,26 @@ static int flexcan_rx_offload_setup(struct net_device *dev)
+ else
+ priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_MB_16)
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_MB_16)
+ priv->mb_count = 16;
+ else
+ priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
+ (sizeof(priv->regs->mb[1]) / priv->mb_size);
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX)
+ priv->tx_mb_reserved =
+- flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP);
++ flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_RX_MAILBOX);
+ else
+ priv->tx_mb_reserved =
+- flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_OFF_FIFO);
++ flexcan_get_mb(priv, FLEXCAN_TX_MB_RESERVED_RX_FIFO);
+ priv->tx_mb_idx = priv->mb_count - 1;
+ priv->tx_mb = flexcan_get_mb(priv, priv->tx_mb_idx);
+ priv->tx_mask = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
+
+ priv->offload.mailbox_read = flexcan_mailbox_read;
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
+- priv->offload.mb_first = FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST;
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
++ priv->offload.mb_first = FLEXCAN_RX_MB_RX_MAILBOX_FIRST;
+ priv->offload.mb_last = priv->mb_count - 2;
+
+ priv->rx_mask = GENMASK_ULL(priv->offload.mb_last,
+@@ -1506,7 +1531,7 @@ static int flexcan_chip_start(struct net_device *dev)
+ if (err)
+ goto out_chip_disable;
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_ECC)
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SUPPORT_ECC)
+ flexcan_ram_init(dev);
+
+ flexcan_set_bittiming(dev);
+@@ -1532,10 +1557,10 @@ static int flexcan_chip_start(struct net_device *dev)
+ /* MCR
+ *
+ * FIFO:
+- * - disable for timestamp mode
++ * - disable for mailbox mode
+ * - enable for FIFO mode
+ */
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX)
+ reg_mcr &= ~FLEXCAN_MCR_FEN;
+ else
+ reg_mcr |= FLEXCAN_MCR_FEN;
+@@ -1586,7 +1611,7 @@ static int flexcan_chip_start(struct net_device *dev)
+ * on most Flexcan cores, too. Otherwise we don't get
+ * any error warning or passive interrupts.
+ */
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE ||
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE ||
+ priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+ reg_ctrl |= FLEXCAN_CTRL_ERR_MSK;
+ else
+@@ -1599,7 +1624,7 @@ static int flexcan_chip_start(struct net_device *dev)
+ netdev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl);
+ priv->write(reg_ctrl, &regs->ctrl);
+
+- if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) {
++ if ((priv->devtype_data.quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) {
+ reg_ctrl2 = priv->read(&regs->ctrl2);
+ reg_ctrl2 |= FLEXCAN_CTRL2_EACEN | FLEXCAN_CTRL2_RRS;
+ priv->write(reg_ctrl2, &regs->ctrl2);
+@@ -1631,7 +1656,7 @@ static int flexcan_chip_start(struct net_device *dev)
+ priv->write(reg_fdctrl, &regs->fdctrl);
+ }
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
+ for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) {
+ mb = flexcan_get_mb(priv, i);
+ priv->write(FLEXCAN_MB_CODE_RX_EMPTY,
+@@ -1639,7 +1664,7 @@ static int flexcan_chip_start(struct net_device *dev)
+ }
+ } else {
+ /* clear and invalidate unused mailboxes first */
+- for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i < priv->mb_count; i++) {
++ for (i = FLEXCAN_TX_MB_RESERVED_RX_FIFO; i < priv->mb_count; i++) {
+ mb = flexcan_get_mb(priv, i);
+ priv->write(FLEXCAN_MB_CODE_RX_INACTIVE,
+ &mb->can_ctrl);
+@@ -1659,7 +1684,7 @@ static int flexcan_chip_start(struct net_device *dev)
+ priv->write(0x0, &regs->rx14mask);
+ priv->write(0x0, &regs->rx15mask);
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_RXFG)
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_DISABLE_RXFG)
+ priv->write(0x0, &regs->rxfgmask);
+
+ /* clear acceptance filters */
+@@ -1673,7 +1698,7 @@ static int flexcan_chip_start(struct net_device *dev)
+ * This also works around errata e5295 which generates false
+ * positive memory errors and put the device in freeze mode.
+ */
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_MECR) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_DISABLE_MECR) {
+ /* Follow the protocol as described in "Detection
+ * and Correction of Memory Errors" to write to
+ * MECR register (step 1 - 5)
+@@ -1799,7 +1824,7 @@ static int flexcan_open(struct net_device *dev)
+ if (err)
+ goto out_can_rx_offload_disable;
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ err = request_irq(priv->irq_boff,
+ flexcan_irq, IRQF_SHARED, dev->name, dev);
+ if (err)
+@@ -1845,7 +1870,7 @@ static int flexcan_close(struct net_device *dev)
+ netif_stop_queue(dev);
+ flexcan_chip_interrupts_disable(dev);
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ free_irq(priv->irq_err, dev);
+ free_irq(priv->irq_boff, dev);
+ }
+@@ -2051,9 +2076,9 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
+
+ priv = netdev_priv(dev);
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW)
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW)
+ ret = flexcan_setup_stop_mode_scfw(pdev);
+- else if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR)
++ else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR)
+ ret = flexcan_setup_stop_mode_gpr(pdev);
+ else
+ /* return 0 directly if doesn't support stop mode feature */
+@@ -2164,8 +2189,25 @@ static int flexcan_probe(struct platform_device *pdev)
+ return -ENODEV;
+
+ if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) &&
+- !(devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)) {
+- dev_err(&pdev->dev, "CAN-FD mode doesn't work with FIFO mode!\n");
++ !((devtype_data->quirks &
++ (FLEXCAN_QUIRK_USE_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR |
++ FLEXCAN_QUIRK_SUPPPORT_RX_FIFO)) ==
++ (FLEXCAN_QUIRK_USE_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR))) {
++ dev_err(&pdev->dev, "CAN-FD mode doesn't work in RX-FIFO mode!\n");
++ return -EINVAL;
++ }
++
++ if ((devtype_data->quirks &
++ (FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX |
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR)) ==
++ FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX_RTR) {
++ dev_err(&pdev->dev,
++ "Quirks (0x%08x) inconsistent: RX_MAILBOX_RX supported but not RX_MAILBOX\n",
++ devtype_data->quirks);
+ return -EINVAL;
+ }
+
+@@ -2181,9 +2223,10 @@ static int flexcan_probe(struct platform_device *pdev)
+ dev->flags |= IFF_ECHO;
+
+ priv = netdev_priv(dev);
++ priv->devtype_data = *devtype_data;
+
+ if (of_property_read_bool(pdev->dev.of_node, "big-endian") ||
+- devtype_data->quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) {
++ priv->devtype_data.quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) {
+ priv->read = flexcan_read_be;
+ priv->write = flexcan_write_be;
+ } else {
+@@ -2202,10 +2245,9 @@ static int flexcan_probe(struct platform_device *pdev)
+ priv->clk_ipg = clk_ipg;
+ priv->clk_per = clk_per;
+ priv->clk_src = clk_src;
+- priv->devtype_data = devtype_data;
+ priv->reg_xceiver = reg_xceiver;
+
+- if (devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+ priv->irq_boff = platform_get_irq(pdev, 1);
+ if (priv->irq_boff <= 0) {
+ err = -ENODEV;
+@@ -2218,7 +2260,7 @@ static int flexcan_probe(struct platform_device *pdev)
+ }
+ }
+
+- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
++ if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
+ priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD |
+ CAN_CTRLMODE_FD_NON_ISO;
+ priv->can.bittiming_const = &flexcan_fd_bittiming_const;
+diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
+index 78e27940b2aff..e098f594a7492 100644
+--- a/drivers/net/can/grcan.c
++++ b/drivers/net/can/grcan.c
+@@ -241,13 +241,14 @@ struct grcan_device_config {
+ .rxsize = GRCAN_DEFAULT_BUFFER_SIZE, \
+ }
+
+-#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 0x4100
++#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 4100
+ #define GRLIB_VERSION_MASK 0xffff
+
+ /* GRCAN private data structure */
+ struct grcan_priv {
+ struct can_priv can; /* must be the first member */
+ struct net_device *dev;
++ struct device *ofdev_dev;
+ struct napi_struct napi;
+
+ struct grcan_registers __iomem *regs; /* ioremap'ed registers */
+@@ -924,7 +925,7 @@ static void grcan_free_dma_buffers(struct net_device *dev)
+ struct grcan_priv *priv = netdev_priv(dev);
+ struct grcan_dma *dma = &priv->dma;
+
+- dma_free_coherent(&dev->dev, dma->base_size, dma->base_buf,
++ dma_free_coherent(priv->ofdev_dev, dma->base_size, dma->base_buf,
+ dma->base_handle);
+ memset(dma, 0, sizeof(*dma));
+ }
+@@ -949,7 +950,7 @@ static int grcan_allocate_dma_buffers(struct net_device *dev,
+
+ /* Extra GRCAN_BUFFER_ALIGNMENT to allow for alignment */
+ dma->base_size = lsize + ssize + GRCAN_BUFFER_ALIGNMENT;
+- dma->base_buf = dma_alloc_coherent(&dev->dev,
++ dma->base_buf = dma_alloc_coherent(priv->ofdev_dev,
+ dma->base_size,
+ &dma->base_handle,
+ GFP_KERNEL);
+@@ -1113,8 +1114,10 @@ static int grcan_close(struct net_device *dev)
+
+ priv->closing = true;
+ if (priv->need_txbug_workaround) {
++ spin_unlock_irqrestore(&priv->lock, flags);
+ del_timer_sync(&priv->hang_timer);
+ del_timer_sync(&priv->rr_timer);
++ spin_lock_irqsave(&priv->lock, flags);
+ }
+ netif_stop_queue(dev);
+ grcan_stop_hardware(dev);
+@@ -1134,7 +1137,7 @@ static int grcan_close(struct net_device *dev)
+ return 0;
+ }
+
+-static int grcan_transmit_catch_up(struct net_device *dev, int budget)
++static void grcan_transmit_catch_up(struct net_device *dev)
+ {
+ struct grcan_priv *priv = netdev_priv(dev);
+ unsigned long flags;
+@@ -1142,7 +1145,7 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget)
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+- work_done = catch_up_echo_skb(dev, budget, true);
++ work_done = catch_up_echo_skb(dev, -1, true);
+ if (work_done) {
+ if (!priv->resetting && !priv->closing &&
+ !(priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
+@@ -1156,8 +1159,6 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget)
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+-
+- return work_done;
+ }
+
+ static int grcan_receive(struct net_device *dev, int budget)
+@@ -1239,19 +1240,13 @@ static int grcan_poll(struct napi_struct *napi, int budget)
+ struct net_device *dev = priv->dev;
+ struct grcan_registers __iomem *regs = priv->regs;
+ unsigned long flags;
+- int tx_work_done, rx_work_done;
+- int rx_budget = budget / 2;
+- int tx_budget = budget - rx_budget;
++ int work_done;
+
+- /* Half of the budget for receiving messages */
+- rx_work_done = grcan_receive(dev, rx_budget);
++ work_done = grcan_receive(dev, budget);
+
+- /* Half of the budget for transmitting messages as that can trigger echo
+- * frames being received
+- */
+- tx_work_done = grcan_transmit_catch_up(dev, tx_budget);
++ grcan_transmit_catch_up(dev);
+
+- if (rx_work_done < rx_budget && tx_work_done < tx_budget) {
++ if (work_done < budget) {
+ napi_complete(napi);
+
+ /* Guarantee no interference with a running reset that otherwise
+@@ -1268,7 +1263,7 @@ static int grcan_poll(struct napi_struct *napi, int budget)
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+
+- return rx_work_done + tx_work_done;
++ return work_done;
+ }
+
+ /* Work tx bug by waiting while for the risky situation to clear. If that fails,
+@@ -1600,6 +1595,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev,
+ memcpy(&priv->config, &grcan_module_config,
+ sizeof(struct grcan_device_config));
+ priv->dev = dev;
++ priv->ofdev_dev = &ofdev->dev;
+ priv->regs = base;
+ priv->can.bittiming_const = &grcan_bittiming_const;
+ priv->can.do_set_bittiming = grcan_set_bittiming;
+@@ -1652,6 +1648,7 @@ exit_free_candev:
+ static int grcan_probe(struct platform_device *ofdev)
+ {
+ struct device_node *np = ofdev->dev.of_node;
++ struct device_node *sysid_parent;
+ u32 sysid, ambafreq;
+ int irq, err;
+ void __iomem *base;
+@@ -1660,10 +1657,14 @@ static int grcan_probe(struct platform_device *ofdev)
+ /* Compare GRLIB version number with the first that does not
+ * have the tx bug (see start_xmit)
+ */
+- err = of_property_read_u32(np, "systemid", &sysid);
+- if (!err && ((sysid & GRLIB_VERSION_MASK)
+- >= GRCAN_TXBUG_SAFE_GRLIB_VERSION))
+- txbug = false;
++ sysid_parent = of_find_node_by_path("/ambapp0");
++ if (sysid_parent) {
++ err = of_property_read_u32(sysid_parent, "systemid", &sysid);
++ if (!err && ((sysid & GRLIB_VERSION_MASK) >=
++ GRCAN_TXBUG_SAFE_GRLIB_VERSION))
++ txbug = false;
++ of_node_put(sysid_parent);
++ }
+
+ err = of_property_read_u32(np, "freq", &ambafreq);
+ if (err) {
+diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
+index 74d9899fc904c..477bc56b12060 100644
+--- a/drivers/net/can/kvaser_pciefd.c
++++ b/drivers/net/can/kvaser_pciefd.c
+@@ -70,10 +70,12 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
+ #define KVASER_PCIEFD_SYSID_BUILD_REG (KVASER_PCIEFD_SYSID_BASE + 0x14)
+ /* Shared receive buffer registers */
+ #define KVASER_PCIEFD_SRB_BASE 0x1f200
++#define KVASER_PCIEFD_SRB_FIFO_LAST_REG (KVASER_PCIEFD_SRB_BASE + 0x1f4)
+ #define KVASER_PCIEFD_SRB_CMD_REG (KVASER_PCIEFD_SRB_BASE + 0x200)
+ #define KVASER_PCIEFD_SRB_IEN_REG (KVASER_PCIEFD_SRB_BASE + 0x204)
+ #define KVASER_PCIEFD_SRB_IRQ_REG (KVASER_PCIEFD_SRB_BASE + 0x20c)
+ #define KVASER_PCIEFD_SRB_STAT_REG (KVASER_PCIEFD_SRB_BASE + 0x210)
++#define KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG (KVASER_PCIEFD_SRB_BASE + 0x214)
+ #define KVASER_PCIEFD_SRB_CTRL_REG (KVASER_PCIEFD_SRB_BASE + 0x218)
+ /* EPCS flash controller registers */
+ #define KVASER_PCIEFD_SPI_BASE 0x1fc00
+@@ -110,6 +112,9 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
+ /* DMA support */
+ #define KVASER_PCIEFD_SRB_STAT_DMA BIT(24)
+
++/* SRB current packet level */
++#define KVASER_PCIEFD_SRB_RX_NR_PACKETS_MASK 0xff
++
+ /* DMA Enable */
+ #define KVASER_PCIEFD_SRB_CTRL_DMA_ENABLE BIT(0)
+
+@@ -248,6 +253,9 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
+ #define KVASER_PCIEFD_SPACK_EWLR BIT(23)
+ #define KVASER_PCIEFD_SPACK_EPLR BIT(24)
+
++/* Kvaser KCAN_EPACK second word */
++#define KVASER_PCIEFD_EPACK_DIR_TX BIT(0)
++
+ struct kvaser_pciefd;
+
+ struct kvaser_pciefd_can {
+@@ -525,7 +533,7 @@ static int kvaser_pciefd_set_tx_irq(struct kvaser_pciefd_can *can)
+ KVASER_PCIEFD_KCAN_IRQ_TOF | KVASER_PCIEFD_KCAN_IRQ_ABD |
+ KVASER_PCIEFD_KCAN_IRQ_TAE | KVASER_PCIEFD_KCAN_IRQ_TAL |
+ KVASER_PCIEFD_KCAN_IRQ_FDIC | KVASER_PCIEFD_KCAN_IRQ_BPP |
+- KVASER_PCIEFD_KCAN_IRQ_TAR | KVASER_PCIEFD_KCAN_IRQ_TFD;
++ KVASER_PCIEFD_KCAN_IRQ_TAR;
+
+ iowrite32(msk, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+
+@@ -553,6 +561,8 @@ static void kvaser_pciefd_setup_controller(struct kvaser_pciefd_can *can)
+
+ if (can->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
+ mode |= KVASER_PCIEFD_KCAN_MODE_LOM;
++ else
++ mode &= ~KVASER_PCIEFD_KCAN_MODE_LOM;
+
+ mode |= KVASER_PCIEFD_KCAN_MODE_EEN;
+ mode |= KVASER_PCIEFD_KCAN_MODE_EPEN;
+@@ -571,7 +581,7 @@ static void kvaser_pciefd_start_controller_flush(struct kvaser_pciefd_can *can)
+
+ spin_lock_irqsave(&can->lock, irq);
+ iowrite32(-1, can->reg_base + KVASER_PCIEFD_KCAN_IRQ_REG);
+- iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD | KVASER_PCIEFD_KCAN_IRQ_TFD,
++ iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD,
+ can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+
+ status = ioread32(can->reg_base + KVASER_PCIEFD_KCAN_STAT_REG);
+@@ -614,7 +624,7 @@ static int kvaser_pciefd_bus_on(struct kvaser_pciefd_can *can)
+ iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+ iowrite32(-1, can->reg_base + KVASER_PCIEFD_KCAN_IRQ_REG);
+
+- iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD | KVASER_PCIEFD_KCAN_IRQ_TFD,
++ iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD,
+ can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+
+ mode = ioread32(can->reg_base + KVASER_PCIEFD_KCAN_MODE_REG);
+@@ -718,6 +728,7 @@ static int kvaser_pciefd_stop(struct net_device *netdev)
+ iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+ del_timer(&can->bec_poll_timer);
+ }
++ can->can.state = CAN_STATE_STOPPED;
+ close_candev(netdev);
+
+ return ret;
+@@ -1000,8 +1011,7 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
+ SET_NETDEV_DEV(netdev, &pcie->pci->dev);
+
+ iowrite32(-1, can->reg_base + KVASER_PCIEFD_KCAN_IRQ_REG);
+- iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD |
+- KVASER_PCIEFD_KCAN_IRQ_TFD,
++ iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD,
+ can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+
+ pcie->can[i] = can;
+@@ -1051,6 +1061,7 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
+ {
+ int i;
+ u32 srb_status;
++ u32 srb_packet_count;
+ dma_addr_t dma_addr[KVASER_PCIEFD_DMA_COUNT];
+
+ /* Disable the DMA */
+@@ -1078,6 +1089,15 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
+ KVASER_PCIEFD_SRB_CMD_RDB1,
+ pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+
++ /* Empty Rx FIFO */
++ srb_packet_count = ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG) &
++ KVASER_PCIEFD_SRB_RX_NR_PACKETS_MASK;
++ while (srb_packet_count) {
++ /* Drop current packet in FIFO */
++ ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_FIFO_LAST_REG);
++ srb_packet_count--;
++ }
++
+ srb_status = ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_STAT_REG);
+ if (!(srb_status & KVASER_PCIEFD_SRB_STAT_DI)) {
+ dev_err(&pcie->pci->dev, "DMA not idle before enabling\n");
+@@ -1285,7 +1305,10 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can,
+
+ can->err_rep_cnt++;
+ can->can.can_stats.bus_error++;
+- stats->rx_errors++;
++ if (p->header[1] & KVASER_PCIEFD_EPACK_DIR_TX)
++ stats->tx_errors++;
++ else
++ stats->rx_errors++;
+
+ can->bec.txerr = bec.txerr;
+ can->bec.rxerr = bec.rxerr;
+@@ -1417,9 +1440,6 @@ static int kvaser_pciefd_handle_status_packet(struct kvaser_pciefd *pcie,
+ cmd = KVASER_PCIEFD_KCAN_CMD_AT;
+ cmd |= ++can->cmd_seq << KVASER_PCIEFD_KCAN_CMD_SEQ_SHIFT;
+ iowrite32(cmd, can->reg_base + KVASER_PCIEFD_KCAN_CMD_REG);
+-
+- iowrite32(KVASER_PCIEFD_KCAN_IRQ_TFD,
+- can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+ } else if (p->header[0] & KVASER_PCIEFD_SPACK_IDET &&
+ p->header[0] & KVASER_PCIEFD_SPACK_IRM &&
+ cmdseq == (p->header[1] & KVASER_PCIEFD_PACKET_SEQ_MSK) &&
+@@ -1708,15 +1728,6 @@ static int kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can)
+ if (irq & KVASER_PCIEFD_KCAN_IRQ_TOF)
+ netdev_err(can->can.dev, "Tx FIFO overflow\n");
+
+- if (irq & KVASER_PCIEFD_KCAN_IRQ_TFD) {
+- u8 count = ioread32(can->reg_base +
+- KVASER_PCIEFD_KCAN_TX_NPACKETS_REG) & 0xff;
+-
+- if (count == 0)
+- iowrite32(KVASER_PCIEFD_KCAN_CTRL_EFLUSH,
+- can->reg_base + KVASER_PCIEFD_KCAN_CTRL_REG);
+- }
+-
+ if (irq & KVASER_PCIEFD_KCAN_IRQ_BPP)
+ netdev_err(can->can.dev,
+ "Fail to change bittiming, when not in reset mode\n");
+@@ -1818,6 +1829,11 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
+ if (err)
+ goto err_teardown_can_ctrls;
+
++ err = request_irq(pcie->pci->irq, kvaser_pciefd_irq_handler,
++ IRQF_SHARED, KVASER_PCIEFD_DRV_NAME, pcie);
++ if (err)
++ goto err_teardown_can_ctrls;
++
+ iowrite32(KVASER_PCIEFD_SRB_IRQ_DPD0 | KVASER_PCIEFD_SRB_IRQ_DPD1,
+ pcie->reg_base + KVASER_PCIEFD_SRB_IRQ_REG);
+
+@@ -1838,11 +1854,6 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
+ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1,
+ pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+
+- err = request_irq(pcie->pci->irq, kvaser_pciefd_irq_handler,
+- IRQF_SHARED, KVASER_PCIEFD_DRV_NAME, pcie);
+- if (err)
+- goto err_teardown_can_ctrls;
+-
+ err = kvaser_pciefd_reg_candev(pcie);
+ if (err)
+ goto err_free_irq;
+@@ -1850,6 +1861,8 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
+ return 0;
+
+ err_free_irq:
++ /* Disable PCI interrupts */
++ iowrite32(0, pcie->reg_base + KVASER_PCIEFD_IEN_REG);
+ free_irq(pcie->pci->irq, pcie);
+
+ err_teardown_can_ctrls:
+diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
+index 2470c47b2e315..e027229c1955b 100644
+--- a/drivers/net/can/m_can/m_can.c
++++ b/drivers/net/can/m_can/m_can.c
+@@ -204,16 +204,16 @@ enum m_can_reg {
+
+ /* Interrupts for version 3.0.x */
+ #define IR_ERR_LEC_30X (IR_STE | IR_FOE | IR_ACKE | IR_BE | IR_CRCE)
+-#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_ELO | IR_BEU | \
+- IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \
+- IR_RF1L | IR_RF0L)
++#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_BEU | IR_BEC | \
++ IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \
++ IR_RF0L)
+ #define IR_ERR_ALL_30X (IR_ERR_STATE | IR_ERR_BUS_30X)
+
+ /* Interrupts for version >= 3.1.x */
+ #define IR_ERR_LEC_31X (IR_PED | IR_PEA)
+-#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_ELO | IR_BEU | \
+- IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \
+- IR_RF1L | IR_RF0L)
++#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_BEU | IR_BEC | \
++ IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \
++ IR_RF0L)
+ #define IR_ERR_ALL_31X (IR_ERR_STATE | IR_ERR_BUS_31X)
+
+ /* Interrupt Line Select (ILS) */
+@@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *cdev,
+ u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE +
+ offset;
+
++ if (val_count == 0)
++ return 0;
++
+ return cdev->ops->read_fifo(cdev, addr_offset, val, val_count);
+ }
+
+@@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev *cdev,
+ u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE +
+ offset;
+
++ if (val_count == 0)
++ return 0;
++
+ return cdev->ops->write_fifo(cdev, addr_offset, val, val_count);
+ }
+
+@@ -517,7 +523,7 @@ static int m_can_read_fifo(struct net_device *dev, u32 rxfs)
+ err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DATA,
+ cf->data, DIV_ROUND_UP(cf->len, 4));
+ if (err)
+- goto out_fail;
++ goto out_free_skb;
+ }
+
+ /* acknowledge rx fifo 0 */
+@@ -526,12 +532,14 @@ static int m_can_read_fifo(struct net_device *dev, u32 rxfs)
+ stats->rx_packets++;
+ stats->rx_bytes += cf->len;
+
+- timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc);
++ timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc) << 16;
+
+ m_can_receive_skb(cdev, skb, timestamp);
+
+ return 0;
+
++out_free_skb:
++ kfree_skb(skb);
+ out_fail:
+ netdev_err(dev, "FIFO read returned %d\n", err);
+ return err;
+@@ -810,8 +818,6 @@ static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus)
+ {
+ if (irqstatus & IR_WDI)
+ netdev_err(dev, "Message RAM Watchdog event due to missing READY\n");
+- if (irqstatus & IR_ELO)
+- netdev_err(dev, "Error Logging Overflow\n");
+ if (irqstatus & IR_BEU)
+ netdev_err(dev, "Bit Error Uncorrected\n");
+ if (irqstatus & IR_BEC)
+@@ -1037,7 +1043,7 @@ static int m_can_echo_tx_event(struct net_device *dev)
+ }
+
+ msg_mark = FIELD_GET(TX_EVENT_MM_MASK, txe);
+- timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe);
++ timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe) << 16;
+
+ /* ack txe element */
+ m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK,
+@@ -1242,10 +1248,17 @@ static int m_can_set_bittiming(struct net_device *dev)
+ * - setup bittiming
+ * - configure timestamp generation
+ */
+-static void m_can_chip_config(struct net_device *dev)
++static int m_can_chip_config(struct net_device *dev)
+ {
+ struct m_can_classdev *cdev = netdev_priv(dev);
+ u32 cccr, test;
++ int err;
++
++ err = m_can_init_ram(cdev);
++ if (err) {
++ dev_err(cdev->dev, "Message RAM configuration failed\n");
++ return err;
++ }
+
+ m_can_config_endisable(cdev, true);
+
+@@ -1361,24 +1374,33 @@ static void m_can_chip_config(struct net_device *dev)
+ /* enable internal timestamp generation, with a prescalar of 16. The
+ * prescalar is applied to the nominal bit timing
+ */
+- m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf));
++ m_can_write(cdev, M_CAN_TSCC,
++ FIELD_PREP(TSCC_TCP_MASK, 0xf) |
++ FIELD_PREP(TSCC_TSS_MASK, TSCC_TSS_INTERNAL));
+
+ m_can_config_endisable(cdev, false);
+
+ if (cdev->ops->init)
+ cdev->ops->init(cdev);
++
++ return 0;
+ }
+
+-static void m_can_start(struct net_device *dev)
++static int m_can_start(struct net_device *dev)
+ {
+ struct m_can_classdev *cdev = netdev_priv(dev);
++ int ret;
+
+ /* basic m_can configuration */
+- m_can_chip_config(dev);
++ ret = m_can_chip_config(dev);
++ if (ret)
++ return ret;
+
+ cdev->can.state = CAN_STATE_ERROR_ACTIVE;
+
+ m_can_enable_all_interrupts(cdev);
++
++ return 0;
+ }
+
+ static int m_can_set_mode(struct net_device *dev, enum can_mode mode)
+@@ -1494,20 +1516,32 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
+ case 30:
+ /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */
+ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
+- cdev->can.bittiming_const = &m_can_bittiming_const_30X;
+- cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X;
++ cdev->can.bittiming_const = cdev->bit_timing ?
++ cdev->bit_timing : &m_can_bittiming_const_30X;
++
++ cdev->can.data_bittiming_const = cdev->data_timing ?
++ cdev->data_timing :
++ &m_can_data_bittiming_const_30X;
+ break;
+ case 31:
+ /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */
+ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
+- cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+- cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
++ cdev->can.bittiming_const = cdev->bit_timing ?
++ cdev->bit_timing : &m_can_bittiming_const_31X;
++
++ cdev->can.data_bittiming_const = cdev->data_timing ?
++ cdev->data_timing :
++ &m_can_data_bittiming_const_31X;
+ break;
+ case 32:
+ case 33:
+ /* Support both MCAN version v3.2.x and v3.3.0 */
+- cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+- cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
++ cdev->can.bittiming_const = cdev->bit_timing ?
++ cdev->bit_timing : &m_can_bittiming_const_31X;
++
++ cdev->can.data_bittiming_const = cdev->data_timing ?
++ cdev->data_timing :
++ &m_can_data_bittiming_const_31X;
+
+ cdev->can.ctrlmode_supported |=
+ (m_can_niso_supported(cdev) ?
+@@ -1622,8 +1656,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
+ if (err)
+ goto out_fail;
+
+- can_put_echo_skb(skb, dev, 0, 0);
+-
+ if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) {
+ cccr = m_can_read(cdev, M_CAN_CCCR);
+ cccr &= ~CCCR_CMR_MASK;
+@@ -1640,6 +1672,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
+ m_can_write(cdev, M_CAN_CCCR, cccr);
+ }
+ m_can_write(cdev, M_CAN_TXBTIE, 0x1);
++
++ can_put_echo_skb(skb, dev, 0, 0);
++
+ m_can_write(cdev, M_CAN_TXBAR, 0x1);
+ /* End of xmit function for version 3.0.x */
+ } else {
+@@ -1803,7 +1838,9 @@ static int m_can_open(struct net_device *dev)
+ }
+
+ /* start the m_can controller */
+- m_can_start(dev);
++ err = m_can_start(dev);
++ if (err)
++ goto exit_irq_fail;
+
+ can_led_event(dev, CAN_LED_EVENT_OPEN);
+
+@@ -1910,7 +1947,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev)
+ cdev->hclk = devm_clk_get(cdev->dev, "hclk");
+ cdev->cclk = devm_clk_get(cdev->dev, "cclk");
+
+- if (IS_ERR(cdev->cclk)) {
++ if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) {
+ dev_err(cdev->dev, "no clock found\n");
+ ret = -ENODEV;
+ }
+@@ -2061,9 +2098,13 @@ int m_can_class_resume(struct device *dev)
+ ret = m_can_clk_start(cdev);
+ if (ret)
+ return ret;
++ ret = m_can_start(ndev);
++ if (ret) {
++ m_can_clk_stop(cdev);
++
++ return ret;
++ }
+
+- m_can_init_ram(cdev);
+- m_can_start(ndev);
+ netif_device_attach(ndev);
+ netif_start_queue(ndev);
+ }
+diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
+index d18b515e6ccc7..2c5d409971686 100644
+--- a/drivers/net/can/m_can/m_can.h
++++ b/drivers/net/can/m_can/m_can.h
+@@ -85,6 +85,9 @@ struct m_can_classdev {
+ struct sk_buff *tx_skb;
+ struct phy *transceiver;
+
++ const struct can_bittiming_const *bit_timing;
++ const struct can_bittiming_const *data_timing;
++
+ struct m_can_ops *ops;
+
+ int version;
+diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c
+index 89cc3d41e952b..f2219aa2824b3 100644
+--- a/drivers/net/can/m_can/m_can_pci.c
++++ b/drivers/net/can/m_can/m_can_pci.c
+@@ -18,7 +18,7 @@
+
+ #define M_CAN_PCI_MMIO_BAR 0
+
+-#define M_CAN_CLOCK_FREQ_EHL 100000000
++#define M_CAN_CLOCK_FREQ_EHL 200000000
+ #define CTL_CSR_INT_CTL_OFFSET 0x508
+
+ struct m_can_pci_priv {
+@@ -42,8 +42,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg)
+ static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count)
+ {
+ struct m_can_pci_priv *priv = cdev_to_priv(cdev);
++ void __iomem *src = priv->base + offset;
+
+- ioread32_rep(priv->base + offset, val, val_count);
++ while (val_count--) {
++ *(unsigned int *)val = ioread32(src);
++ val += 4;
++ src += 4;
++ }
+
+ return 0;
+ }
+@@ -61,8 +66,13 @@ static int iomap_write_fifo(struct m_can_classdev *cdev, int offset,
+ const void *val, size_t val_count)
+ {
+ struct m_can_pci_priv *priv = cdev_to_priv(cdev);
++ void __iomem *dst = priv->base + offset;
+
+- iowrite32_rep(priv->base + offset, val, val_count);
++ while (val_count--) {
++ iowrite32(*(unsigned int *)val, dst);
++ val += 4;
++ dst += 4;
++ }
+
+ return 0;
+ }
+@@ -110,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+
+ ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (ret < 0)
+- return ret;
++ goto err_free_dev;
+
+ mcan_class->dev = &pci->dev;
+ mcan_class->net->irq = pci_irq_vector(pci, 0);
+@@ -122,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+
+ ret = m_can_class_register(mcan_class);
+ if (ret)
+- goto err;
++ goto err_free_irq;
+
+ /* Enable interrupt control at CAN wrapper IP */
+ writel(0x1, base + CTL_CSR_INT_CTL_OFFSET);
+@@ -134,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+
+ return 0;
+
+-err:
++err_free_irq:
+ pci_free_irq_vectors(pci);
++err_free_dev:
++ m_can_class_free_dev(mcan_class->net);
+ return ret;
+ }
+
+@@ -151,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci)
+ writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET);
+
+ m_can_class_unregister(mcan_class);
++ m_can_class_free_dev(mcan_class->net);
+ pci_free_irq_vectors(pci);
+ }
+
+diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
+index eee47bad05920..de6d8e01bf2e8 100644
+--- a/drivers/net/can/m_can/m_can_platform.c
++++ b/drivers/net/can/m_can/m_can_platform.c
+@@ -140,10 +140,6 @@ static int m_can_plat_probe(struct platform_device *pdev)
+
+ platform_set_drvdata(pdev, mcan_class);
+
+- ret = m_can_init_ram(mcan_class);
+- if (ret)
+- goto probe_fail;
+-
+ pm_runtime_enable(mcan_class->dev);
+ ret = m_can_class_register(mcan_class);
+ if (ret)
+diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
+index 04687b15b250e..c83b347be1cfd 100644
+--- a/drivers/net/can/m_can/tcan4x5x-core.c
++++ b/drivers/net/can/m_can/tcan4x5x-core.c
+@@ -10,7 +10,7 @@
+ #define TCAN4X5X_DEV_ID1 0x04
+ #define TCAN4X5X_REV 0x08
+ #define TCAN4X5X_STATUS 0x0C
+-#define TCAN4X5X_ERROR_STATUS 0x10
++#define TCAN4X5X_ERROR_STATUS_MASK 0x10
+ #define TCAN4X5X_CONTROL 0x14
+
+ #define TCAN4X5X_CONFIG 0x800
+@@ -204,17 +204,7 @@ static int tcan4x5x_clear_interrupts(struct m_can_classdev *cdev)
+ if (ret)
+ return ret;
+
+- ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_MCAN_INT_REG,
+- TCAN4X5X_ENABLE_MCAN_INT);
+- if (ret)
+- return ret;
+-
+- ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_INT_FLAGS,
+- TCAN4X5X_CLEAR_ALL_INT);
+- if (ret)
+- return ret;
+-
+- return tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_ERROR_STATUS,
++ return tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_INT_FLAGS,
+ TCAN4X5X_CLEAR_ALL_INT);
+ }
+
+@@ -234,8 +224,8 @@ static int tcan4x5x_init(struct m_can_classdev *cdev)
+ if (ret)
+ return ret;
+
+- /* Zero out the MCAN buffers */
+- ret = m_can_init_ram(cdev);
++ ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_ERROR_STATUS_MASK,
++ TCAN4X5X_CLEAR_ALL_INT);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/net/can/m_can/tcan4x5x-regmap.c b/drivers/net/can/m_can/tcan4x5x-regmap.c
+index ca80dbaf7a3f5..26e212b8ca7a6 100644
+--- a/drivers/net/can/m_can/tcan4x5x-regmap.c
++++ b/drivers/net/can/m_can/tcan4x5x-regmap.c
+@@ -12,7 +12,7 @@
+ #define TCAN4X5X_SPI_INSTRUCTION_WRITE (0x61 << 24)
+ #define TCAN4X5X_SPI_INSTRUCTION_READ (0x41 << 24)
+
+-#define TCAN4X5X_MAX_REGISTER 0x8ffc
++#define TCAN4X5X_MAX_REGISTER 0x87fc
+
+ static int tcan4x5x_regmap_gather_write(void *context,
+ const void *reg, size_t reg_len,
+diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
+index 35892c1efef02..7d868b6eb579b 100644
+--- a/drivers/net/can/mscan/mpc5xxx_can.c
++++ b/drivers/net/can/mscan/mpc5xxx_can.c
+@@ -322,14 +322,14 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev)
+ &mscan_clksrc);
+ if (!priv->can.clock.freq) {
+ dev_err(&ofdev->dev, "couldn't get MSCAN clock properties\n");
+- goto exit_free_mscan;
++ goto exit_put_clock;
+ }
+
+ err = register_mscandev(dev, mscan_clksrc);
+ if (err) {
+ dev_err(&ofdev->dev, "registering %s failed (err=%d)\n",
+ DRV_NAME, err);
+- goto exit_free_mscan;
++ goto exit_put_clock;
+ }
+
+ dev_info(&ofdev->dev, "MSCAN at 0x%p, irq %d, clock %d Hz\n",
+@@ -337,7 +337,9 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev)
+
+ return 0;
+
+-exit_free_mscan:
++exit_put_clock:
++ if (data->put_clock)
++ data->put_clock(ofdev);
+ free_candev(dev);
+ exit_dispose_irq:
+ irq_dispose_mapping(irq);
+diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c
+index 92a54a5fd4c50..cd8d536c6fb20 100644
+--- a/drivers/net/can/pch_can.c
++++ b/drivers/net/can/pch_can.c
+@@ -489,6 +489,7 @@ static void pch_can_error(struct net_device *ndev, u32 status)
+ if (!skb)
+ return;
+
++ errc = ioread32(&priv->regs->errc);
+ if (status & PCH_BUS_OFF) {
+ pch_can_set_tx_all(priv, 0);
+ pch_can_set_rx_all(priv, 0);
+@@ -496,9 +497,11 @@ static void pch_can_error(struct net_device *ndev, u32 status)
+ cf->can_id |= CAN_ERR_BUSOFF;
+ priv->can.can_stats.bus_off++;
+ can_bus_off(ndev);
++ } else {
++ cf->data[6] = errc & PCH_TEC;
++ cf->data[7] = (errc & PCH_REC) >> 8;
+ }
+
+- errc = ioread32(&priv->regs->errc);
+ /* Warning interrupt. */
+ if (status & PCH_EWARN) {
+ state = CAN_STATE_ERROR_WARNING;
+@@ -556,9 +559,6 @@ static void pch_can_error(struct net_device *ndev, u32 status)
+ break;
+ }
+
+- cf->data[6] = errc & PCH_TEC;
+- cf->data[7] = (errc & PCH_REC) >> 8;
+-
+ priv->can.state = state;
+ netif_receive_skb(skb);
+
+@@ -692,11 +692,11 @@ static int pch_can_rx_normal(struct net_device *ndev, u32 obj_num, int quota)
+ cf->data[i + 1] = data_reg >> 8;
+ }
+
+- netif_receive_skb(skb);
+ rcv_pkts++;
+ stats->rx_packets++;
+ quota--;
+ stats->rx_bytes += cf->len;
++ netif_receive_skb(skb);
+
+ pch_fifo_thresh(priv, obj_num);
+ obj_num++;
+diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c
+index 8999ec9455ec2..945b319de841c 100644
+--- a/drivers/net/can/rcar/rcar_can.c
++++ b/drivers/net/can/rcar/rcar_can.c
+@@ -235,11 +235,8 @@ static void rcar_can_error(struct net_device *ndev)
+ if (eifr & (RCAR_CAN_EIFR_EWIF | RCAR_CAN_EIFR_EPIF)) {
+ txerr = readb(&priv->regs->tecr);
+ rxerr = readb(&priv->regs->recr);
+- if (skb) {
++ if (skb)
+ cf->can_id |= CAN_ERR_CRTL;
+- cf->data[6] = txerr;
+- cf->data[7] = rxerr;
+- }
+ }
+ if (eifr & RCAR_CAN_EIFR_BEIF) {
+ int rx_errors = 0, tx_errors = 0;
+@@ -339,6 +336,9 @@ static void rcar_can_error(struct net_device *ndev)
+ can_bus_off(ndev);
+ if (skb)
+ cf->can_id |= CAN_ERR_BUSOFF;
++ } else if (skb) {
++ cf->data[6] = txerr;
++ cf->data[7] = rxerr;
+ }
+ if (eifr & RCAR_CAN_EIFR_ORIF) {
+ netdev_dbg(priv->ndev, "Receive overrun error interrupt\n");
+diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
+index ff9d0f5ae0dd2..4e230e1456647 100644
+--- a/drivers/net/can/rcar/rcar_canfd.c
++++ b/drivers/net/can/rcar/rcar_canfd.c
+@@ -1106,11 +1106,13 @@ static void rcar_canfd_handle_global_receive(struct rcar_canfd_global *gpriv, u3
+ {
+ struct rcar_canfd_channel *priv = gpriv->ch[ch];
+ u32 ridx = ch + RCANFD_RFFIFO_IDX;
+- u32 sts;
++ u32 sts, cc;
+
+ /* Handle Rx interrupts */
+ sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
+- if (likely(sts & RCANFD_RFSTS_RFIF)) {
++ cc = rcar_canfd_read(priv->base, RCANFD_RFCC(ridx));
++ if (likely(sts & RCANFD_RFSTS_RFIF &&
++ cc & RCANFD_RFCC_RFIE)) {
+ if (napi_schedule_prep(&priv->napi)) {
+ /* Disable Rx FIFO interrupts */
+ rcar_canfd_clear_bit(priv->base,
+@@ -1195,11 +1197,9 @@ static void rcar_canfd_handle_channel_tx(struct rcar_canfd_global *gpriv, u32 ch
+
+ static irqreturn_t rcar_canfd_channel_tx_interrupt(int irq, void *dev_id)
+ {
+- struct rcar_canfd_global *gpriv = dev_id;
+- u32 ch;
++ struct rcar_canfd_channel *priv = dev_id;
+
+- for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+- rcar_canfd_handle_channel_tx(gpriv, ch);
++ rcar_canfd_handle_channel_tx(priv->gpriv, priv->channel);
+
+ return IRQ_HANDLED;
+ }
+@@ -1227,11 +1227,9 @@ static void rcar_canfd_handle_channel_err(struct rcar_canfd_global *gpriv, u32 c
+
+ static irqreturn_t rcar_canfd_channel_err_interrupt(int irq, void *dev_id)
+ {
+- struct rcar_canfd_global *gpriv = dev_id;
+- u32 ch;
++ struct rcar_canfd_channel *priv = dev_id;
+
+- for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+- rcar_canfd_handle_channel_err(gpriv, ch);
++ rcar_canfd_handle_channel_err(priv->gpriv, priv->channel);
+
+ return IRQ_HANDLED;
+ }
+@@ -1640,8 +1638,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
+ ndev = alloc_candev(sizeof(*priv), RCANFD_FIFO_DEPTH);
+ if (!ndev) {
+ dev_err(&pdev->dev, "alloc_candev() failed\n");
+- err = -ENOMEM;
+- goto fail;
++ return -ENOMEM;
+ }
+ priv = netdev_priv(ndev);
+
+@@ -1650,6 +1647,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
+ priv->ndev = ndev;
+ priv->base = gpriv->base;
+ priv->channel = ch;
++ priv->gpriv = gpriv;
+ priv->can.clock.freq = fcan_freq;
+ dev_info(&pdev->dev, "can_clk rate is %u\n", priv->can.clock.freq);
+
+@@ -1678,7 +1676,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
+ }
+ err = devm_request_irq(&pdev->dev, err_irq,
+ rcar_canfd_channel_err_interrupt, 0,
+- irq_name, gpriv);
++ irq_name, priv);
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq CH Err(%d) failed, error %d\n",
+ err_irq, err);
+@@ -1692,7 +1690,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
+ }
+ err = devm_request_irq(&pdev->dev, tx_irq,
+ rcar_canfd_channel_tx_interrupt, 0,
+- irq_name, gpriv);
++ irq_name, priv);
+ if (err) {
+ dev_err(&pdev->dev, "devm_request_irq Tx (%d) failed, error %d\n",
+ tx_irq, err);
+@@ -1716,27 +1714,26 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
+
+ priv->can.do_set_mode = rcar_canfd_do_set_mode;
+ priv->can.do_get_berr_counter = rcar_canfd_get_berr_counter;
+- priv->gpriv = gpriv;
+ SET_NETDEV_DEV(ndev, &pdev->dev);
+
+ netif_napi_add(ndev, &priv->napi, rcar_canfd_rx_poll,
+ RCANFD_NAPI_WEIGHT);
++ spin_lock_init(&priv->tx_lock);
++ devm_can_led_init(ndev);
++ gpriv->ch[priv->channel] = priv;
+ err = register_candev(ndev);
+ if (err) {
+ dev_err(&pdev->dev,
+ "register_candev() failed, error %d\n", err);
+ goto fail_candev;
+ }
+- spin_lock_init(&priv->tx_lock);
+- devm_can_led_init(ndev);
+- gpriv->ch[priv->channel] = priv;
+ dev_info(&pdev->dev, "device registered (channel %u)\n", priv->channel);
+ return 0;
+
+ fail_candev:
+ netif_napi_del(&priv->napi);
+- free_candev(ndev);
+ fail:
++ free_candev(ndev);
+ return err;
+ }
+
+diff --git a/drivers/net/can/sja1000/ems_pcmcia.c b/drivers/net/can/sja1000/ems_pcmcia.c
+index e21b169c14c01..4642b6d4aaf7b 100644
+--- a/drivers/net/can/sja1000/ems_pcmcia.c
++++ b/drivers/net/can/sja1000/ems_pcmcia.c
+@@ -234,7 +234,12 @@ static int ems_pcmcia_add_card(struct pcmcia_device *pdev, unsigned long base)
+ free_sja1000dev(dev);
+ }
+
+- err = request_irq(dev->irq, &ems_pcmcia_interrupt, IRQF_SHARED,
++ if (!card->channels) {
++ err = -ENODEV;
++ goto failure_cleanup;
++ }
++
++ err = request_irq(pdev->irq, &ems_pcmcia_interrupt, IRQF_SHARED,
+ DRV_NAME, card);
+ if (!err)
+ return 0;
+diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
+index 3fad546467461..aae2677e24f99 100644
+--- a/drivers/net/can/sja1000/sja1000.c
++++ b/drivers/net/can/sja1000/sja1000.c
+@@ -404,9 +404,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
+ txerr = priv->read_reg(priv, SJA1000_TXERR);
+ rxerr = priv->read_reg(priv, SJA1000_RXERR);
+
+- cf->data[6] = txerr;
+- cf->data[7] = rxerr;
+-
+ if (isrc & IRQ_DOI) {
+ /* data overrun interrupt */
+ netdev_dbg(dev, "data overrun interrupt\n");
+@@ -428,6 +425,10 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
+ else
+ state = CAN_STATE_ERROR_ACTIVE;
+ }
++ if (state != CAN_STATE_BUS_OFF) {
++ cf->data[6] = txerr;
++ cf->data[7] = rxerr;
++ }
+ if (isrc & IRQ_BEI) {
+ /* bus error interrupt */
+ priv->can.can_stats.bus_error++;
+diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c
+index d513fac507185..db3e767d5320f 100644
+--- a/drivers/net/can/sja1000/sja1000_isa.c
++++ b/drivers/net/can/sja1000/sja1000_isa.c
+@@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev)
+ if (err) {
+ dev_err(&pdev->dev, "registering %s failed (err=%d)\n",
+ DRV_NAME, err);
+- goto exit_unmap;
++ goto exit_free;
+ }
+
+ dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n",
+ DRV_NAME, priv->reg_base, dev->irq);
+ return 0;
+
+- exit_unmap:
++exit_free:
++ free_sja1000dev(dev);
++exit_unmap:
+ if (mem[idx])
+ iounmap(base);
+- exit_release:
++exit_release:
+ if (mem[idx])
+ release_mem_region(mem[idx], iosize);
+ else
+ release_region(port[idx], iosize);
+- exit:
++exit:
+ return err;
+ }
+
+diff --git a/drivers/net/can/softing/softing_cs.c b/drivers/net/can/softing/softing_cs.c
+index 2e93ee7923739..e5c939b63fa65 100644
+--- a/drivers/net/can/softing/softing_cs.c
++++ b/drivers/net/can/softing/softing_cs.c
+@@ -293,7 +293,7 @@ static int softingcs_probe(struct pcmcia_device *pcmcia)
+ return 0;
+
+ platform_failed:
+- kfree(dev);
++ platform_device_put(pdev);
+ mem_failed:
+ pcmcia_bad:
+ pcmcia_failed:
+diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c
+index 7e15368779931..32286f861a195 100644
+--- a/drivers/net/can/softing/softing_fw.c
++++ b/drivers/net/can/softing/softing_fw.c
+@@ -565,18 +565,19 @@ int softing_startstop(struct net_device *dev, int up)
+ if (ret < 0)
+ goto failed;
+ }
+- /* enable_error_frame */
+- /*
++
++ /* enable_error_frame
++ *
+ * Error reporting is switched off at the moment since
+ * the receiving of them is not yet 100% verified
+ * This should be enabled sooner or later
+- *
+- if (error_reporting) {
++ */
++ if (0 && error_reporting) {
+ ret = softing_fct_cmd(card, 51, "enable_error_frame");
+ if (ret < 0)
+ goto failed;
+ }
+- */
++
+ /* initialize interface */
+ iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 2]);
+ iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 4]);
+diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
+index 89d9c986a2297..b08b98e6ad1c9 100644
+--- a/drivers/net/can/spi/hi311x.c
++++ b/drivers/net/can/spi/hi311x.c
+@@ -670,8 +670,6 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
+
+ txerr = hi3110_read(spi, HI3110_READ_TEC);
+ rxerr = hi3110_read(spi, HI3110_READ_REC);
+- cf->data[6] = txerr;
+- cf->data[7] = rxerr;
+ tx_state = txerr >= rxerr ? new_state : 0;
+ rx_state = txerr <= rxerr ? new_state : 0;
+ can_change_state(net, cf, tx_state, rx_state);
+@@ -684,6 +682,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id)
+ hi3110_hw_sleep(spi);
+ break;
+ }
++ } else {
++ cf->data[6] = txerr;
++ cf->data[7] = rxerr;
+ }
+ }
+
+diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
+index 0579ab74f728a..f02275f71e4d9 100644
+--- a/drivers/net/can/spi/mcp251x.c
++++ b/drivers/net/can/spi/mcp251x.c
+@@ -1074,9 +1074,6 @@ static irqreturn_t mcp251x_can_ist(int irq, void *dev_id)
+
+ mcp251x_read_2regs(spi, CANINTF, &intf, &eflag);
+
+- /* mask out flags we don't care about */
+- intf &= CANINTF_RX | CANINTF_TX | CANINTF_ERR;
+-
+ /* receive buffer 0 */
+ if (intf & CANINTF_RX0IF) {
+ mcp251x_hw_rx(spi, 0);
+@@ -1086,6 +1083,18 @@ static irqreturn_t mcp251x_can_ist(int irq, void *dev_id)
+ if (mcp251x_is_2510(spi))
+ mcp251x_write_bits(spi, CANINTF,
+ CANINTF_RX0IF, 0x00);
++
++ /* check if buffer 1 is already known to be full, no need to re-read */
++ if (!(intf & CANINTF_RX1IF)) {
++ u8 intf1, eflag1;
++
++ /* intf needs to be read again to avoid a race condition */
++ mcp251x_read_2regs(spi, CANINTF, &intf1, &eflag1);
++
++ /* combine flags from both operations for error handling */
++ intf |= intf1;
++ eflag |= eflag1;
++ }
+ }
+
+ /* receive buffer 1 */
+@@ -1096,6 +1105,9 @@ static irqreturn_t mcp251x_can_ist(int irq, void *dev_id)
+ clear_intf |= CANINTF_RX1IF;
+ }
+
++ /* mask out flags we don't care about */
++ intf &= CANINTF_RX | CANINTF_TX | CANINTF_ERR;
++
+ /* any error or tx interrupt we need to clear? */
+ if (intf & (CANINTF_ERR | CANINTF_TX))
+ clear_intf |= intf & (CANINTF_ERR | CANINTF_TX);
+@@ -1407,11 +1419,14 @@ static int mcp251x_can_probe(struct spi_device *spi)
+
+ ret = mcp251x_gpio_setup(priv);
+ if (ret)
+- goto error_probe;
++ goto out_unregister_candev;
+
+ netdev_info(net, "MCP%x successfully initialized.\n", priv->model);
+ return 0;
+
++out_unregister_candev:
++ unregister_candev(net);
++
+ error_probe:
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+index 673861ab665a4..3a0f022b15625 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+@@ -1092,7 +1092,7 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv)
+
+ err = mcp251xfd_chip_rx_int_enable(priv);
+ if (err)
+- return err;
++ goto out_chip_stop;
+
+ err = mcp251xfd_chip_ecc_init(priv);
+ if (err)
+@@ -1336,7 +1336,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
+ len > tx_ring->obj_num ||
+ offset + len > tx_ring->obj_num)) {
+ netdev_err(priv->ndev,
+- "Trying to read to many TEF objects (max=%d, offset=%d, len=%d).\n",
++ "Trying to read too many TEF objects (max=%d, offset=%d, len=%d).\n",
+ tx_ring->obj_num, offset, len);
+ return -ERANGE;
+ }
+@@ -2290,8 +2290,10 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
+ * check will fail, too. So leave IRQ handler
+ * directly.
+ */
+- if (priv->can.state == CAN_STATE_BUS_OFF)
++ if (priv->can.state == CAN_STATE_BUS_OFF) {
++ can_rx_offload_threaded_irq_finish(&priv->offload);
+ return IRQ_HANDLED;
++ }
+ }
+
+ handled = IRQ_HANDLED;
+@@ -2623,7 +2625,7 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
+ if (!mcp251xfd_is_251X(priv) &&
+ priv->devtype_data.model != devtype_data->model) {
+ netdev_info(ndev,
+- "Detected %s, but firmware specifies a %s. Fixing up.",
++ "Detected %s, but firmware specifies a %s. Fixing up.\n",
+ __mcp251xfd_get_model_str(devtype_data->model),
+ mcp251xfd_get_model_str(priv));
+ }
+@@ -2660,7 +2662,7 @@ static int mcp251xfd_register_check_rx_int(struct mcp251xfd_priv *priv)
+ return 0;
+
+ netdev_info(priv->ndev,
+- "RX_INT active after softreset, disabling RX_INT support.");
++ "RX_INT active after softreset, disabling RX_INT support.\n");
+ devm_gpiod_put(&priv->spi->dev, priv->rx_int);
+ priv->rx_int = NULL;
+
+@@ -2704,7 +2706,7 @@ mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv,
+ out_kfree_buf_rx:
+ kfree(buf_rx);
+
+- return 0;
++ return err;
+ }
+
+ #define MCP251XFD_QUIRK_ACTIVE(quirk) \
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
+index 297491516a265..bb559663a3fa5 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
+@@ -325,19 +325,21 @@ mcp251xfd_regmap_crc_read(void *context,
+ * register. It increments once per SYS clock tick,
+ * which is 20 or 40 MHz.
+ *
+- * Observation shows that if the lowest byte (which is
+- * transferred first on the SPI bus) of that register
+- * is 0x00 or 0x80 the calculated CRC doesn't always
+- * match the transferred one.
++ * Observation on the mcp2518fd shows that if the
++ * lowest byte (which is transferred first on the SPI
++ * bus) of that register is 0x00 or 0x80 the
++ * calculated CRC doesn't always match the transferred
++ * one. On the mcp2517fd this problem is not limited
++ * to the first byte being 0x00 or 0x80.
+ *
+ * If the highest bit in the lowest byte is flipped
+ * the transferred CRC matches the calculated one. We
+- * assume for now the CRC calculation in the chip
+- * works on wrong data and the transferred data is
+- * correct.
++ * assume for now the CRC operates on the correct
++ * data.
+ */
+ if (reg == MCP251XFD_REG_TBC &&
+- (buf_rx->data[0] == 0x0 || buf_rx->data[0] == 0x80)) {
++ ((buf_rx->data[0] & 0xf8) == 0x0 ||
++ (buf_rx->data[0] & 0xf8) == 0x80)) {
+ /* Flip highest bit in lowest byte of le32 */
+ buf_rx->data[0] ^= 0x80;
+
+@@ -347,10 +349,8 @@ mcp251xfd_regmap_crc_read(void *context,
+ val_len);
+ if (!err) {
+ /* If CRC is now correct, assume
+- * transferred data was OK, flip bit
+- * back to original value.
++ * flipped data is OK.
+ */
+- buf_rx->data[0] ^= 0x80;
+ goto out;
+ }
+ }
+diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+index 0f322dabaf651..281856eea2ef8 100644
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+@@ -433,7 +433,7 @@ struct mcp251xfd_hw_tef_obj {
+ /* The tx_obj_raw version is used in spi async, i.e. without
+ * regmap. We have to take care of endianness ourselves.
+ */
+-struct mcp251xfd_hw_tx_obj_raw {
++struct __packed mcp251xfd_hw_tx_obj_raw {
+ __le32 id;
+ __le32 flags;
+ u8 data[sizeof_field(struct canfd_frame, data)];
+diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
+index 54aa7c25c4de1..ad8f50807aca5 100644
+--- a/drivers/net/can/sun4i_can.c
++++ b/drivers/net/can/sun4i_can.c
+@@ -525,11 +525,6 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status)
+ rxerr = (errc >> 16) & 0xFF;
+ txerr = errc & 0xFF;
+
+- if (skb) {
+- cf->data[6] = txerr;
+- cf->data[7] = rxerr;
+- }
+-
+ if (isrc & SUN4I_INT_DATA_OR) {
+ /* data overrun interrupt */
+ netdev_dbg(dev, "data overrun interrupt\n");
+@@ -560,6 +555,10 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status)
+ else
+ state = CAN_STATE_ERROR_ACTIVE;
+ }
++ if (skb && state != CAN_STATE_BUS_OFF) {
++ cf->data[6] = txerr;
++ cf->data[7] = rxerr;
++ }
+ if (isrc & SUN4I_INT_BUS_ERR) {
+ /* bus error interrupt */
+ netdev_dbg(dev, "bus error interrupt\n");
+diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
+index 2b5302e724353..a1b7c1a451c0c 100644
+--- a/drivers/net/can/usb/ems_usb.c
++++ b/drivers/net/can/usb/ems_usb.c
+@@ -194,7 +194,7 @@ struct __packed ems_cpc_msg {
+ __le32 ts_sec; /* timestamp in seconds */
+ __le32 ts_nsec; /* timestamp in nano seconds */
+
+- union {
++ union __packed {
+ u8 generic[64];
+ struct cpc_can_msg can_msg;
+ struct cpc_can_params can_params;
+@@ -823,7 +823,6 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
+
+ usb_unanchor_urb(urb);
+ usb_free_coherent(dev->udev, size, buf, urb->transfer_dma);
+- dev_kfree_skb(skb);
+
+ atomic_dec(&dev->active_tx_urbs);
+
+diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
+index c6068a251fbed..1abdf88597de0 100644
+--- a/drivers/net/can/usb/esd_usb2.c
++++ b/drivers/net/can/usb/esd_usb2.c
+@@ -227,6 +227,10 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv,
+ u8 rxerr = msg->msg.rx.data[2];
+ u8 txerr = msg->msg.rx.data[3];
+
++ netdev_dbg(priv->netdev,
++ "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n",
++ msg->msg.rx.dlc, state, ecc, rxerr, txerr);
++
+ skb = alloc_can_err_skb(priv->netdev, &cf);
+ if (skb == NULL) {
+ stats->rx_dropped++;
+@@ -253,6 +257,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv,
+ break;
+ default:
+ priv->can.state = CAN_STATE_ERROR_ACTIVE;
++ txerr = 0;
++ rxerr = 0;
+ break;
+ }
+ } else {
+@@ -272,7 +278,6 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv,
+ cf->data[2] |= CAN_ERR_PROT_STUFF;
+ break;
+ default:
+- cf->data[3] = ecc & SJA1000_ECC_SEG;
+ break;
+ }
+
+@@ -280,6 +285,9 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv,
+ if (!(ecc & SJA1000_ECC_DIR))
+ cf->data[2] |= CAN_ERR_PROT_TX;
+
++ /* Bit stream position in CAN frame as the error was detected */
++ cf->data[3] = ecc & SJA1000_ECC_SEG;
++
+ if (priv->can.state == CAN_STATE_ERROR_WARNING ||
+ priv->can.state == CAN_STATE_ERROR_PASSIVE) {
+ cf->data[1] = (txerr > rxerr) ?
+diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
+index 96a13c770e4a1..0e6faf962ebbc 100644
+--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
++++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
+@@ -664,7 +664,7 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error,
+ struct can_device_stats *can_stats = &can->can_stats;
+ struct can_frame *cf = NULL;
+ struct sk_buff *skb;
+- int ret;
++ int ret = 0;
+
+ if (!netif_running(netdev)) {
+ if (net_ratelimit())
+@@ -823,8 +823,6 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error,
+ can->state = CAN_STATE_BUS_OFF;
+ can_bus_off(netdev);
+ ret = can->do_set_mode(netdev, CAN_MODE_STOP);
+- if (ret)
+- return ret;
+ }
+ break;
+
+@@ -881,7 +879,7 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error,
+ ES58X_EVENT_BUSOFF, timestamp);
+ }
+
+- return 0;
++ return ret;
+ }
+
+ /**
+@@ -1796,7 +1794,7 @@ static int es58x_open(struct net_device *netdev)
+ struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+ int ret;
+
+- if (atomic_inc_return(&es58x_dev->opened_channel_cnt) == 1) {
++ if (!es58x_dev->opened_channel_cnt) {
+ ret = es58x_alloc_rx_urbs(es58x_dev);
+ if (ret)
+ return ret;
+@@ -1814,12 +1812,13 @@ static int es58x_open(struct net_device *netdev)
+ if (ret)
+ goto free_urbs;
+
++ es58x_dev->opened_channel_cnt++;
+ netif_start_queue(netdev);
+
+ return ret;
+
+ free_urbs:
+- if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt))
++ if (!es58x_dev->opened_channel_cnt)
+ es58x_free_urbs(es58x_dev);
+ netdev_err(netdev, "%s: Could not open the network device: %pe\n",
+ __func__, ERR_PTR(ret));
+@@ -1854,7 +1853,8 @@ static int es58x_stop(struct net_device *netdev)
+
+ es58x_flush_pending_tx_msg(netdev);
+
+- if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt))
++ es58x_dev->opened_channel_cnt--;
++ if (!es58x_dev->opened_channel_cnt)
+ es58x_free_urbs(es58x_dev);
+
+ return 0;
+@@ -2098,8 +2098,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx)
+ netdev->flags |= IFF_ECHO; /* We support local echo */
+
+ ret = register_candev(netdev);
+- if (ret)
++ if (ret) {
++ es58x_dev->netdev[channel_idx] = NULL;
++ free_candev(netdev);
+ return ret;
++ }
+
+ netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0),
+ es58x_dev->param->dql_min_limit);
+@@ -2223,7 +2226,6 @@ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf,
+ init_usb_anchor(&es58x_dev->tx_urbs_idle);
+ init_usb_anchor(&es58x_dev->tx_urbs_busy);
+ atomic_set(&es58x_dev->tx_urbs_idle_cnt, 0);
+- atomic_set(&es58x_dev->opened_channel_cnt, 0);
+ usb_set_intfdata(intf, es58x_dev);
+
+ es58x_dev->rx_pipe = usb_rcvbulkpipe(es58x_dev->udev,
+diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h
+index 826a15871573a..e5033cb5e6959 100644
+--- a/drivers/net/can/usb/etas_es58x/es58x_core.h
++++ b/drivers/net/can/usb/etas_es58x/es58x_core.h
+@@ -373,8 +373,6 @@ struct es58x_operators {
+ * queue wake/stop logic should prevent this URB from getting
+ * empty. Please refer to es58x_get_tx_urb() for more details.
+ * @tx_urbs_idle_cnt: number of urbs in @tx_urbs_idle.
+- * @opened_channel_cnt: number of channels opened (c.f. es58x_open()
+- * and es58x_stop()).
+ * @ktime_req_ns: kernel timestamp when es58x_set_realtime_diff_ns()
+ * was called.
+ * @realtime_diff_ns: difference in nanoseconds between the clocks of
+@@ -384,6 +382,10 @@ struct es58x_operators {
+ * in RX branches.
+ * @rx_max_packet_size: Maximum length of bulk-in URB.
+ * @num_can_ch: Number of CAN channel (i.e. number of elements of @netdev).
++ * @opened_channel_cnt: number of channels opened. Free of race
++ * conditions because its two users (net_device_ops:ndo_open()
++ * and net_device_ops:ndo_close()) guarantee that the network
++ * stack big kernel lock (a.k.a. rtnl_mutex) is being hold.
+ * @rx_cmd_buf_len: Length of @rx_cmd_buf.
+ * @rx_cmd_buf: The device might split the URB commands in an
+ * arbitrary amount of pieces. This buffer is used to concatenate
+@@ -406,7 +408,6 @@ struct es58x_device {
+ struct usb_anchor tx_urbs_busy;
+ struct usb_anchor tx_urbs_idle;
+ atomic_t tx_urbs_idle_cnt;
+- atomic_t opened_channel_cnt;
+
+ u64 ktime_req_ns;
+ s64 realtime_diff_ns;
+@@ -415,6 +416,7 @@ struct es58x_device {
+
+ u16 rx_max_packet_size;
+ u8 num_can_ch;
++ u8 opened_channel_cnt;
+
+ u16 rx_cmd_buf_len;
+ union es58x_urb_cmd rx_cmd_buf;
+diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c
+index af042aa55f59e..26bf4775e884c 100644
+--- a/drivers/net/can/usb/etas_es58x/es58x_fd.c
++++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c
+@@ -171,12 +171,11 @@ static int es58x_fd_rx_event_msg(struct net_device *netdev,
+ const struct es58x_fd_rx_event_msg *rx_event_msg;
+ int ret;
+
++ rx_event_msg = &es58x_fd_urb_cmd->rx_event_msg;
+ ret = es58x_check_msg_len(es58x_dev->dev, *rx_event_msg, msg_len);
+ if (ret)
+ return ret;
+
+- rx_event_msg = &es58x_fd_urb_cmd->rx_event_msg;
+-
+ return es58x_rx_err_msg(netdev, rx_event_msg->error_code,
+ rx_event_msg->event_code,
+ get_unaligned_le64(&rx_event_msg->timestamp));
+diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
+index 5e892bef46b00..7dc4fb574e459 100644
+--- a/drivers/net/can/usb/gs_usb.c
++++ b/drivers/net/can/usb/gs_usb.c
+@@ -185,14 +185,16 @@ struct gs_can {
+
+ struct usb_anchor tx_submitted;
+ atomic_t active_tx_urbs;
++ void *rxbuf[GS_MAX_RX_URBS];
++ dma_addr_t rxbuf_dma[GS_MAX_RX_URBS];
+ };
+
+ /* usb interface struct */
+ struct gs_usb {
+ struct gs_can *canch[GS_MAX_INTF];
+ struct usb_anchor rx_submitted;
+- atomic_t active_channels;
+ struct usb_device *udev;
++ u8 active_channels;
+ };
+
+ /* 'allocate' a tx context.
+@@ -321,7 +323,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
+
+ /* device reports out of range channel id */
+ if (hf->channel >= GS_MAX_INTF)
+- goto resubmit_urb;
++ goto device_detach;
+
+ dev = usbcan->canch[hf->channel];
+
+@@ -380,6 +382,9 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
+ }
+
+ if (hf->flags & GS_CAN_FLAG_OVERFLOW) {
++ stats->rx_over_errors++;
++ stats->rx_errors++;
++
+ skb = alloc_can_err_skb(netdev, &cf);
+ if (!skb)
+ goto resubmit_urb;
+@@ -387,8 +392,6 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->len = CAN_ERR_DLC;
+ cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+- stats->rx_over_errors++;
+- stats->rx_errors++;
+ netif_rx(skb);
+ }
+
+@@ -406,6 +409,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
+
+ /* USB failure take down all interfaces */
+ if (rc == -ENODEV) {
++ device_detach:
+ for (rc = 0; rc < GS_MAX_INTF; rc++) {
+ if (usbcan->canch[rc])
+ netif_device_detach(usbcan->canch[rc]->netdev);
+@@ -507,6 +511,8 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
+
+ hf->echo_id = idx;
+ hf->channel = dev->channel;
++ hf->flags = 0;
++ hf->reserved = 0;
+
+ cf = (struct can_frame *)skb->data;
+
+@@ -587,10 +593,11 @@ static int gs_can_open(struct net_device *netdev)
+ if (rc)
+ return rc;
+
+- if (atomic_add_return(1, &parent->active_channels) == 1) {
++ if (!parent->active_channels) {
+ for (i = 0; i < GS_MAX_RX_URBS; i++) {
+ struct urb *urb;
+ u8 *buf;
++ dma_addr_t buf_dma;
+
+ /* alloc rx urb */
+ urb = usb_alloc_urb(0, GFP_KERNEL);
+@@ -601,7 +608,7 @@ static int gs_can_open(struct net_device *netdev)
+ buf = usb_alloc_coherent(dev->udev,
+ sizeof(struct gs_host_frame),
+ GFP_KERNEL,
+- &urb->transfer_dma);
++ &buf_dma);
+ if (!buf) {
+ netdev_err(netdev,
+ "No memory left for USB buffer\n");
+@@ -609,6 +616,8 @@ static int gs_can_open(struct net_device *netdev)
+ return -ENOMEM;
+ }
+
++ urb->transfer_dma = buf_dma;
++
+ /* fill, anchor, and submit rx urb */
+ usb_fill_bulk_urb(urb,
+ dev->udev,
+@@ -632,10 +641,17 @@ static int gs_can_open(struct net_device *netdev)
+ rc);
+
+ usb_unanchor_urb(urb);
++ usb_free_coherent(dev->udev,
++ sizeof(struct gs_host_frame),
++ buf,
++ buf_dma);
+ usb_free_urb(urb);
+ break;
+ }
+
++ dev->rxbuf[i] = buf;
++ dev->rxbuf_dma[i] = buf_dma;
++
+ /* Drop reference,
+ * USB core will take care of freeing it
+ */
+@@ -665,6 +681,7 @@ static int gs_can_open(struct net_device *netdev)
+ flags |= GS_CAN_MODE_TRIPLE_SAMPLE;
+
+ /* finally start device */
++ dev->can.state = CAN_STATE_ERROR_ACTIVE;
+ dm->mode = cpu_to_le32(GS_CAN_MODE_START);
+ dm->flags = cpu_to_le32(flags);
+ rc = usb_control_msg(interface_to_usbdev(dev->iface),
+@@ -681,13 +698,13 @@ static int gs_can_open(struct net_device *netdev)
+ if (rc < 0) {
+ netdev_err(netdev, "Couldn't start device (err=%d)\n", rc);
+ kfree(dm);
++ dev->can.state = CAN_STATE_STOPPED;
+ return rc;
+ }
+
+ kfree(dm);
+
+- dev->can.state = CAN_STATE_ERROR_ACTIVE;
+-
++ parent->active_channels++;
+ if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
+ netif_start_queue(netdev);
+
+@@ -699,17 +716,27 @@ static int gs_can_close(struct net_device *netdev)
+ int rc;
+ struct gs_can *dev = netdev_priv(netdev);
+ struct gs_usb *parent = dev->parent;
++ unsigned int i;
+
+ netif_stop_queue(netdev);
+
+ /* Stop polling */
+- if (atomic_dec_and_test(&parent->active_channels))
++ parent->active_channels--;
++ if (!parent->active_channels) {
+ usb_kill_anchored_urbs(&parent->rx_submitted);
++ for (i = 0; i < GS_MAX_RX_URBS; i++)
++ usb_free_coherent(dev->udev,
++ sizeof(struct gs_host_frame),
++ dev->rxbuf[i],
++ dev->rxbuf_dma[i]);
++ }
+
+ /* Stop sending URBs */
+ usb_kill_anchored_urbs(&dev->tx_submitted);
+ atomic_set(&dev->active_tx_urbs, 0);
+
++ dev->can.state = CAN_STATE_STOPPED;
++
+ /* reset the device */
+ rc = gs_cmd_reset(dev);
+ if (rc < 0)
+@@ -982,8 +1009,6 @@ static int gs_usb_probe(struct usb_interface *intf,
+
+ init_usb_anchor(&dev->rx_submitted);
+
+- atomic_set(&dev->active_channels, 0);
+-
+ usb_set_intfdata(intf, dev);
+ dev->udev = interface_to_usbdev(intf);
+
+diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h
+index 390b6bde883c8..5699531f87873 100644
+--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h
++++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h
+@@ -35,9 +35,10 @@
+ #define KVASER_USB_RX_BUFFER_SIZE 3072
+ #define KVASER_USB_MAX_NET_DEVICES 5
+
+-/* USB devices features */
+-#define KVASER_USB_HAS_SILENT_MODE BIT(0)
+-#define KVASER_USB_HAS_TXRX_ERRORS BIT(1)
++/* Kvaser USB device quirks */
++#define KVASER_USB_QUIRK_HAS_SILENT_MODE BIT(0)
++#define KVASER_USB_QUIRK_HAS_TXRX_ERRORS BIT(1)
++#define KVASER_USB_QUIRK_IGNORE_CLK_FREQ BIT(2)
+
+ /* Device capabilities */
+ #define KVASER_USB_CAP_BERR_CAP 0x01
+@@ -65,12 +66,7 @@ struct kvaser_usb_dev_card_data_hydra {
+ struct kvaser_usb_dev_card_data {
+ u32 ctrlmode_supported;
+ u32 capabilities;
+- union {
+- struct {
+- enum kvaser_usb_leaf_family family;
+- } leaf;
+- struct kvaser_usb_dev_card_data_hydra hydra;
+- };
++ struct kvaser_usb_dev_card_data_hydra hydra;
+ };
+
+ /* Context for an outstanding, not yet ACKed, transmission */
+@@ -80,11 +76,19 @@ struct kvaser_usb_tx_urb_context {
+ int dlc;
+ };
+
++struct kvaser_usb_busparams {
++ __le32 bitrate;
++ u8 tseg1;
++ u8 tseg2;
++ u8 sjw;
++ u8 nsamples;
++} __packed;
++
+ struct kvaser_usb {
+ struct usb_device *udev;
+ struct usb_interface *intf;
+ struct kvaser_usb_net_priv *nets[KVASER_USB_MAX_NET_DEVICES];
+- const struct kvaser_usb_dev_ops *ops;
++ const struct kvaser_usb_driver_info *driver_info;
+ const struct kvaser_usb_dev_cfg *cfg;
+
+ struct usb_endpoint_descriptor *bulk_in, *bulk_out;
+@@ -108,13 +112,19 @@ struct kvaser_usb_net_priv {
+ struct can_priv can;
+ struct can_berr_counter bec;
+
++ /* subdriver-specific data */
++ void *sub_priv;
++
+ struct kvaser_usb *dev;
+ struct net_device *netdev;
+ int channel;
+
+- struct completion start_comp, stop_comp, flush_comp;
++ struct completion start_comp, stop_comp, flush_comp,
++ get_busparams_comp;
+ struct usb_anchor tx_submitted;
+
++ struct kvaser_usb_busparams busparams_nominal, busparams_data;
++
+ spinlock_t tx_contexts_lock; /* lock for active_tx_contexts */
+ int active_tx_contexts;
+ struct kvaser_usb_tx_urb_context tx_contexts[];
+@@ -124,11 +134,15 @@ struct kvaser_usb_net_priv {
+ * struct kvaser_usb_dev_ops - Device specific functions
+ * @dev_set_mode: used for can.do_set_mode
+ * @dev_set_bittiming: used for can.do_set_bittiming
++ * @dev_get_busparams: readback arbitration busparams
+ * @dev_set_data_bittiming: used for can.do_set_data_bittiming
++ * @dev_get_data_busparams: readback data busparams
+ * @dev_get_berr_counter: used for can.do_get_berr_counter
+ *
+ * @dev_setup_endpoints: setup USB in and out endpoints
+ * @dev_init_card: initialize card
++ * @dev_init_channel: initialize channel
++ * @dev_remove_channel: uninitialize channel
+ * @dev_get_software_info: get software info
+ * @dev_get_software_details: get software details
+ * @dev_get_card_info: get card info
+@@ -144,12 +158,18 @@ struct kvaser_usb_net_priv {
+ */
+ struct kvaser_usb_dev_ops {
+ int (*dev_set_mode)(struct net_device *netdev, enum can_mode mode);
+- int (*dev_set_bittiming)(struct net_device *netdev);
+- int (*dev_set_data_bittiming)(struct net_device *netdev);
++ int (*dev_set_bittiming)(const struct net_device *netdev,
++ const struct kvaser_usb_busparams *busparams);
++ int (*dev_get_busparams)(struct kvaser_usb_net_priv *priv);
++ int (*dev_set_data_bittiming)(const struct net_device *netdev,
++ const struct kvaser_usb_busparams *busparams);
++ int (*dev_get_data_busparams)(struct kvaser_usb_net_priv *priv);
+ int (*dev_get_berr_counter)(const struct net_device *netdev,
+ struct can_berr_counter *bec);
+ int (*dev_setup_endpoints)(struct kvaser_usb *dev);
+ int (*dev_init_card)(struct kvaser_usb *dev);
++ int (*dev_init_channel)(struct kvaser_usb_net_priv *priv);
++ void (*dev_remove_channel)(struct kvaser_usb_net_priv *priv);
+ int (*dev_get_software_info)(struct kvaser_usb *dev);
+ int (*dev_get_software_details)(struct kvaser_usb *dev);
+ int (*dev_get_card_info)(struct kvaser_usb *dev);
+@@ -166,6 +186,12 @@ struct kvaser_usb_dev_ops {
+ int *cmd_len, u16 transid);
+ };
+
++struct kvaser_usb_driver_info {
++ u32 quirks;
++ enum kvaser_usb_leaf_family family;
++ const struct kvaser_usb_dev_ops *ops;
++};
++
+ struct kvaser_usb_dev_cfg {
+ const struct can_clock clock;
+ const unsigned int timestamp_freq;
+@@ -176,6 +202,8 @@ struct kvaser_usb_dev_cfg {
+ extern const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops;
+ extern const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops;
+
++void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv);
++
+ int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len,
+ int *actual_len);
+
+@@ -185,4 +213,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd,
+ int len);
+
+ int kvaser_usb_can_rx_over_error(struct net_device *netdev);
++
++extern const struct can_bittiming_const kvaser_usb_flexc_bittiming_const;
++
+ #endif /* KVASER_USB_H */
+diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+index 0cc0fc866a2a9..09dbc51347d70 100644
+--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
++++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+@@ -61,8 +61,6 @@
+ #define USB_USBCAN_R_V2_PRODUCT_ID 294
+ #define USB_LEAF_LIGHT_R_V2_PRODUCT_ID 295
+ #define USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID 296
+-#define USB_LEAF_PRODUCT_ID_END \
+- USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID
+
+ /* Kvaser USBCan-II devices product ids */
+ #define USB_USBCAN_REVB_PRODUCT_ID 2
+@@ -89,116 +87,153 @@
+ #define USB_USBCAN_PRO_4HS_PRODUCT_ID 276
+ #define USB_HYBRID_CANLIN_PRODUCT_ID 277
+ #define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 278
+-#define USB_HYDRA_PRODUCT_ID_END \
+- USB_HYBRID_PRO_CANLIN_PRODUCT_ID
+
+-static inline bool kvaser_is_leaf(const struct usb_device_id *id)
+-{
+- return (id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
+- id->idProduct <= USB_CAN_R_PRODUCT_ID) ||
+- (id->idProduct >= USB_LEAF_LITE_V2_PRODUCT_ID &&
+- id->idProduct <= USB_LEAF_PRODUCT_ID_END);
+-}
++static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = {
++ .quirks = 0,
++ .ops = &kvaser_usb_hydra_dev_ops,
++};
+
+-static inline bool kvaser_is_usbcan(const struct usb_device_id *id)
+-{
+- return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID &&
+- id->idProduct <= USB_MEMORATOR_PRODUCT_ID;
+-}
++static const struct kvaser_usb_driver_info kvaser_usb_driver_info_usbcan = {
++ .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS |
++ KVASER_USB_QUIRK_HAS_SILENT_MODE,
++ .family = KVASER_USBCAN,
++ .ops = &kvaser_usb_leaf_dev_ops,
++};
+
+-static inline bool kvaser_is_hydra(const struct usb_device_id *id)
+-{
+- return id->idProduct >= USB_BLACKBIRD_V2_PRODUCT_ID &&
+- id->idProduct <= USB_HYDRA_PRODUCT_ID_END;
+-}
++static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf = {
++ .quirks = KVASER_USB_QUIRK_IGNORE_CLK_FREQ,
++ .family = KVASER_LEAF,
++ .ops = &kvaser_usb_leaf_dev_ops,
++};
++
++static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err = {
++ .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS |
++ KVASER_USB_QUIRK_IGNORE_CLK_FREQ,
++ .family = KVASER_LEAF,
++ .ops = &kvaser_usb_leaf_dev_ops,
++};
++
++static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_listen = {
++ .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS |
++ KVASER_USB_QUIRK_HAS_SILENT_MODE |
++ KVASER_USB_QUIRK_IGNORE_CLK_FREQ,
++ .family = KVASER_LEAF,
++ .ops = &kvaser_usb_leaf_dev_ops,
++};
++
++static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = {
++ .quirks = 0,
++ .ops = &kvaser_usb_leaf_dev_ops,
++};
+
+ static const struct usb_device_id kvaser_usb_table[] = {
+- /* Leaf USB product IDs */
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) },
++ /* Leaf M32C USB product IDs */
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID) },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+- KVASER_USB_HAS_SILENT_MODE },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID) },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
++
++ /* Leaf i.MX28 USB product IDs */
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+
+ /* USBCANII USB product IDs */
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID),
+- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
+
+ /* Minihydra USB product IDs */
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) },
+- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
++ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID),
++ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { }
+ };
+ MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
+@@ -289,6 +324,7 @@ int kvaser_usb_can_rx_over_error(struct net_device *netdev)
+ static void kvaser_usb_read_bulk_callback(struct urb *urb)
+ {
+ struct kvaser_usb *dev = urb->context;
++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
+ int err;
+ unsigned int i;
+
+@@ -305,8 +341,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
+ goto resubmit_urb;
+ }
+
+- dev->ops->dev_read_bulk_callback(dev, urb->transfer_buffer,
+- urb->actual_length);
++ ops->dev_read_bulk_callback(dev, urb->transfer_buffer,
++ urb->actual_length);
+
+ resubmit_urb:
+ usb_fill_bulk_urb(urb, dev->udev,
+@@ -400,21 +436,18 @@ static int kvaser_usb_open(struct net_device *netdev)
+ {
+ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+ struct kvaser_usb *dev = priv->dev;
++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
+ int err;
+
+ err = open_candev(netdev);
+ if (err)
+ return err;
+
+- err = kvaser_usb_setup_rx_urbs(dev);
+- if (err)
+- goto error;
+-
+- err = dev->ops->dev_set_opt_mode(priv);
++ err = ops->dev_set_opt_mode(priv);
+ if (err)
+ goto error;
+
+- err = dev->ops->dev_start_chip(priv);
++ err = ops->dev_start_chip(priv);
+ if (err) {
+ netdev_warn(netdev, "Cannot start device, error %d\n", err);
+ goto error;
+@@ -443,7 +476,7 @@ static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv)
+ /* This method might sleep. Do not call it in the atomic context
+ * of URB completions.
+ */
+-static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
++void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
+ {
+ usb_kill_anchored_urbs(&priv->tx_submitted);
+ kvaser_usb_reset_tx_urb_contexts(priv);
+@@ -471,22 +504,23 @@ static int kvaser_usb_close(struct net_device *netdev)
+ {
+ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+ struct kvaser_usb *dev = priv->dev;
++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
+ int err;
+
+ netif_stop_queue(netdev);
+
+- err = dev->ops->dev_flush_queue(priv);
++ err = ops->dev_flush_queue(priv);
+ if (err)
+ netdev_warn(netdev, "Cannot flush queue, error %d\n", err);
+
+- if (dev->ops->dev_reset_chip) {
+- err = dev->ops->dev_reset_chip(dev, priv->channel);
++ if (ops->dev_reset_chip) {
++ err = ops->dev_reset_chip(dev, priv->channel);
+ if (err)
+ netdev_warn(netdev, "Cannot reset card, error %d\n",
+ err);
+ }
+
+- err = dev->ops->dev_stop_chip(priv);
++ err = ops->dev_stop_chip(priv);
+ if (err)
+ netdev_warn(netdev, "Cannot stop device, error %d\n", err);
+
+@@ -499,6 +533,93 @@ static int kvaser_usb_close(struct net_device *netdev)
+ return 0;
+ }
+
++static int kvaser_usb_set_bittiming(struct net_device *netdev)
++{
++ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
++ struct kvaser_usb *dev = priv->dev;
++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
++ struct can_bittiming *bt = &priv->can.bittiming;
++
++ struct kvaser_usb_busparams busparams;
++ int tseg1 = bt->prop_seg + bt->phase_seg1;
++ int tseg2 = bt->phase_seg2;
++ int sjw = bt->sjw;
++ int err = -EOPNOTSUPP;
++
++ busparams.bitrate = cpu_to_le32(bt->bitrate);
++ busparams.sjw = (u8)sjw;
++ busparams.tseg1 = (u8)tseg1;
++ busparams.tseg2 = (u8)tseg2;
++ if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
++ busparams.nsamples = 3;
++ else
++ busparams.nsamples = 1;
++
++ err = ops->dev_set_bittiming(netdev, &busparams);
++ if (err)
++ return err;
++
++ err = kvaser_usb_setup_rx_urbs(priv->dev);
++ if (err)
++ return err;
++
++ err = ops->dev_get_busparams(priv);
++ if (err) {
++ /* Treat EOPNOTSUPP as success */
++ if (err == -EOPNOTSUPP)
++ err = 0;
++ return err;
++ }
++
++ if (memcmp(&busparams, &priv->busparams_nominal,
++ sizeof(priv->busparams_nominal)) != 0)
++ err = -EINVAL;
++
++ return err;
++}
++
++static int kvaser_usb_set_data_bittiming(struct net_device *netdev)
++{
++ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
++ struct kvaser_usb *dev = priv->dev;
++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
++ struct can_bittiming *dbt = &priv->can.data_bittiming;
++
++ struct kvaser_usb_busparams busparams;
++ int tseg1 = dbt->prop_seg + dbt->phase_seg1;
++ int tseg2 = dbt->phase_seg2;
++ int sjw = dbt->sjw;
++ int err;
++
++ if (!ops->dev_set_data_bittiming ||
++ !ops->dev_get_data_busparams)
++ return -EOPNOTSUPP;
++
++ busparams.bitrate = cpu_to_le32(dbt->bitrate);
++ busparams.sjw = (u8)sjw;
++ busparams.tseg1 = (u8)tseg1;
++ busparams.tseg2 = (u8)tseg2;
++ busparams.nsamples = 1;
++
++ err = ops->dev_set_data_bittiming(netdev, &busparams);
++ if (err)
++ return err;
++
++ err = kvaser_usb_setup_rx_urbs(priv->dev);
++ if (err)
++ return err;
++
++ err = ops->dev_get_data_busparams(priv);
++ if (err)
++ return err;
++
++ if (memcmp(&busparams, &priv->busparams_data,
++ sizeof(priv->busparams_data)) != 0)
++ err = -EINVAL;
++
++ return err;
++}
++
+ static void kvaser_usb_write_bulk_callback(struct urb *urb)
+ {
+ struct kvaser_usb_tx_urb_context *context = urb->context;
+@@ -525,6 +646,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
+ {
+ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+ struct kvaser_usb *dev = priv->dev;
++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
+ struct net_device_stats *stats = &netdev->stats;
+ struct kvaser_usb_tx_urb_context *context = NULL;
+ struct urb *urb;
+@@ -567,8 +689,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
+ goto freeurb;
+ }
+
+- buf = dev->ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len,
+- context->echo_index);
++ buf = ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len,
++ context->echo_index);
+ if (!buf) {
+ stats->tx_dropped++;
+ dev_kfree_skb(skb);
+@@ -633,6 +755,7 @@ static const struct net_device_ops kvaser_usb_netdev_ops = {
+
+ static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev)
+ {
++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
+ int i;
+
+ for (i = 0; i < dev->nchannels; i++) {
+@@ -648,19 +771,23 @@ static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev)
+ if (!dev->nets[i])
+ continue;
+
++ if (ops->dev_remove_channel)
++ ops->dev_remove_channel(dev->nets[i]);
++
+ free_candev(dev->nets[i]->netdev);
+ }
+ }
+
+-static int kvaser_usb_init_one(struct kvaser_usb *dev,
+- const struct usb_device_id *id, int channel)
++static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel)
+ {
+ struct net_device *netdev;
+ struct kvaser_usb_net_priv *priv;
++ const struct kvaser_usb_driver_info *driver_info = dev->driver_info;
++ const struct kvaser_usb_dev_ops *ops = driver_info->ops;
+ int err;
+
+- if (dev->ops->dev_reset_chip) {
+- err = dev->ops->dev_reset_chip(dev, channel);
++ if (ops->dev_reset_chip) {
++ err = ops->dev_reset_chip(dev, channel);
+ if (err)
+ return err;
+ }
+@@ -677,6 +804,8 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev,
+ init_usb_anchor(&priv->tx_submitted);
+ init_completion(&priv->start_comp);
+ init_completion(&priv->stop_comp);
++ init_completion(&priv->flush_comp);
++ init_completion(&priv->get_busparams_comp);
+ priv->can.ctrlmode_supported = 0;
+
+ priv->dev = dev;
+@@ -689,20 +818,19 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev,
+ priv->can.state = CAN_STATE_STOPPED;
+ priv->can.clock.freq = dev->cfg->clock.freq;
+ priv->can.bittiming_const = dev->cfg->bittiming_const;
+- priv->can.do_set_bittiming = dev->ops->dev_set_bittiming;
+- priv->can.do_set_mode = dev->ops->dev_set_mode;
+- if ((id->driver_info & KVASER_USB_HAS_TXRX_ERRORS) ||
++ priv->can.do_set_bittiming = kvaser_usb_set_bittiming;
++ priv->can.do_set_mode = ops->dev_set_mode;
++ if ((driver_info->quirks & KVASER_USB_QUIRK_HAS_TXRX_ERRORS) ||
+ (priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP))
+- priv->can.do_get_berr_counter = dev->ops->dev_get_berr_counter;
+- if (id->driver_info & KVASER_USB_HAS_SILENT_MODE)
++ priv->can.do_get_berr_counter = ops->dev_get_berr_counter;
++ if (driver_info->quirks & KVASER_USB_QUIRK_HAS_SILENT_MODE)
+ priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
+
+ priv->can.ctrlmode_supported |= dev->card_data.ctrlmode_supported;
+
+ if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) {
+ priv->can.data_bittiming_const = dev->cfg->data_bittiming_const;
+- priv->can.do_set_data_bittiming =
+- dev->ops->dev_set_data_bittiming;
++ priv->can.do_set_data_bittiming = kvaser_usb_set_data_bittiming;
+ }
+
+ netdev->flags |= IFF_ECHO;
+@@ -714,17 +842,26 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev,
+
+ dev->nets[channel] = priv;
+
++ if (ops->dev_init_channel) {
++ err = ops->dev_init_channel(priv);
++ if (err)
++ goto err;
++ }
++
+ err = register_candev(netdev);
+ if (err) {
+ dev_err(&dev->intf->dev, "Failed to register CAN device\n");
+- free_candev(netdev);
+- dev->nets[channel] = NULL;
+- return err;
++ goto err;
+ }
+
+ netdev_dbg(netdev, "device registered\n");
+
+ return 0;
++
++err:
++ free_candev(netdev);
++ dev->nets[channel] = NULL;
++ return err;
+ }
+
+ static int kvaser_usb_probe(struct usb_interface *intf,
+@@ -733,29 +870,22 @@ static int kvaser_usb_probe(struct usb_interface *intf,
+ struct kvaser_usb *dev;
+ int err;
+ int i;
++ const struct kvaser_usb_driver_info *driver_info;
++ const struct kvaser_usb_dev_ops *ops;
++
++ driver_info = (const struct kvaser_usb_driver_info *)id->driver_info;
++ if (!driver_info)
++ return -ENODEV;
+
+ dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+- if (kvaser_is_leaf(id)) {
+- dev->card_data.leaf.family = KVASER_LEAF;
+- dev->ops = &kvaser_usb_leaf_dev_ops;
+- } else if (kvaser_is_usbcan(id)) {
+- dev->card_data.leaf.family = KVASER_USBCAN;
+- dev->ops = &kvaser_usb_leaf_dev_ops;
+- } else if (kvaser_is_hydra(id)) {
+- dev->ops = &kvaser_usb_hydra_dev_ops;
+- } else {
+- dev_err(&intf->dev,
+- "Product ID (%d) is not a supported Kvaser USB device\n",
+- id->idProduct);
+- return -ENODEV;
+- }
+-
+ dev->intf = intf;
++ dev->driver_info = driver_info;
++ ops = driver_info->ops;
+
+- err = dev->ops->dev_setup_endpoints(dev);
++ err = ops->dev_setup_endpoints(dev);
+ if (err) {
+ dev_err(&intf->dev, "Cannot get usb endpoint(s)");
+ return err;
+@@ -769,22 +899,22 @@ static int kvaser_usb_probe(struct usb_interface *intf,
+
+ dev->card_data.ctrlmode_supported = 0;
+ dev->card_data.capabilities = 0;
+- err = dev->ops->dev_init_card(dev);
++ err = ops->dev_init_card(dev);
+ if (err) {
+ dev_err(&intf->dev,
+ "Failed to initialize card, error %d\n", err);
+ return err;
+ }
+
+- err = dev->ops->dev_get_software_info(dev);
++ err = ops->dev_get_software_info(dev);
+ if (err) {
+ dev_err(&intf->dev,
+ "Cannot get software info, error %d\n", err);
+ return err;
+ }
+
+- if (dev->ops->dev_get_software_details) {
+- err = dev->ops->dev_get_software_details(dev);
++ if (ops->dev_get_software_details) {
++ err = ops->dev_get_software_details(dev);
+ if (err) {
+ dev_err(&intf->dev,
+ "Cannot get software details, error %d\n", err);
+@@ -802,14 +932,14 @@ static int kvaser_usb_probe(struct usb_interface *intf,
+
+ dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs);
+
+- err = dev->ops->dev_get_card_info(dev);
++ err = ops->dev_get_card_info(dev);
+ if (err) {
+ dev_err(&intf->dev, "Cannot get card info, error %d\n", err);
+ return err;
+ }
+
+- if (dev->ops->dev_get_capabilities) {
+- err = dev->ops->dev_get_capabilities(dev);
++ if (ops->dev_get_capabilities) {
++ err = ops->dev_get_capabilities(dev);
+ if (err) {
+ dev_err(&intf->dev,
+ "Cannot get capabilities, error %d\n", err);
+@@ -819,7 +949,7 @@ static int kvaser_usb_probe(struct usb_interface *intf,
+ }
+
+ for (i = 0; i < dev->nchannels; i++) {
+- err = kvaser_usb_init_one(dev, id, i);
++ err = kvaser_usb_init_one(dev, i);
+ if (err) {
+ kvaser_usb_remove_interfaces(dev);
+ return err;
+diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+index dcee8dc828ecc..562105b8a6327 100644
+--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
++++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+@@ -22,6 +22,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
++#include <linux/units.h>
+ #include <linux/usb.h>
+
+ #include <linux/can.h>
+@@ -44,6 +45,8 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt;
+
+ /* Minihydra command IDs */
+ #define CMD_SET_BUSPARAMS_REQ 16
++#define CMD_GET_BUSPARAMS_REQ 17
++#define CMD_GET_BUSPARAMS_RESP 18
+ #define CMD_GET_CHIP_STATE_REQ 19
+ #define CMD_CHIP_STATE_EVENT 20
+ #define CMD_SET_DRIVERMODE_REQ 21
+@@ -195,21 +198,26 @@ struct kvaser_cmd_chip_state_event {
+ #define KVASER_USB_HYDRA_BUS_MODE_CANFD_ISO 0x01
+ #define KVASER_USB_HYDRA_BUS_MODE_NONISO 0x02
+ struct kvaser_cmd_set_busparams {
+- __le32 bitrate;
+- u8 tseg1;
+- u8 tseg2;
+- u8 sjw;
+- u8 nsamples;
++ struct kvaser_usb_busparams busparams_nominal;
+ u8 reserved0[4];
+- __le32 bitrate_d;
+- u8 tseg1_d;
+- u8 tseg2_d;
+- u8 sjw_d;
+- u8 nsamples_d;
++ struct kvaser_usb_busparams busparams_data;
+ u8 canfd_mode;
+ u8 reserved1[7];
+ } __packed;
+
++/* Busparam type */
++#define KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN 0x00
++#define KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD 0x01
++struct kvaser_cmd_get_busparams_req {
++ u8 type;
++ u8 reserved[27];
++} __packed;
++
++struct kvaser_cmd_get_busparams_res {
++ struct kvaser_usb_busparams busparams;
++ u8 reserved[20];
++} __packed;
++
+ /* Ctrl modes */
+ #define KVASER_USB_HYDRA_CTRLMODE_NORMAL 0x01
+ #define KVASER_USB_HYDRA_CTRLMODE_LISTEN 0x02
+@@ -280,6 +288,8 @@ struct kvaser_cmd {
+ struct kvaser_cmd_error_event error_event;
+
+ struct kvaser_cmd_set_busparams set_busparams_req;
++ struct kvaser_cmd_get_busparams_req get_busparams_req;
++ struct kvaser_cmd_get_busparams_res get_busparams_res;
+
+ struct kvaser_cmd_chip_state_event chip_state_event;
+
+@@ -295,6 +305,7 @@ struct kvaser_cmd {
+ #define KVASER_USB_HYDRA_CF_FLAG_OVERRUN BIT(1)
+ #define KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME BIT(4)
+ #define KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID BIT(5)
++#define KVASER_USB_HYDRA_CF_FLAG_TX_ACK BIT(6)
+ /* CAN frame flags. Used in ext_rx_can and ext_tx_can */
+ #define KVASER_USB_HYDRA_CF_FLAG_OSM_NACK BIT(12)
+ #define KVASER_USB_HYDRA_CF_FLAG_ABL BIT(13)
+@@ -361,6 +372,10 @@ struct kvaser_cmd_ext {
+ } __packed;
+ } __packed;
+
++struct kvaser_usb_net_hydra_priv {
++ int pending_get_busparams_type;
++};
++
+ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = {
+ .name = "kvaser_usb_kcan",
+ .tseg1_min = 1,
+@@ -373,7 +388,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = {
+ .brp_inc = 1,
+ };
+
+-static const struct can_bittiming_const kvaser_usb_hydra_flexc_bittiming_c = {
++const struct can_bittiming_const kvaser_usb_flexc_bittiming_const = {
+ .name = "kvaser_usb_flex",
+ .tseg1_min = 4,
+ .tseg1_max = 16,
+@@ -530,6 +545,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev,
+ u8 cmd_no, int channel)
+ {
+ struct kvaser_cmd *cmd;
++ size_t cmd_len;
+ int err;
+
+ cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+@@ -537,6 +553,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev,
+ return -ENOMEM;
+
+ cmd->header.cmd_no = cmd_no;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
+ if (channel < 0) {
+ kvaser_usb_hydra_set_cmd_dest_he
+ (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL);
+@@ -553,7 +570,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev,
+ kvaser_usb_hydra_set_cmd_transid
+ (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
+ if (err)
+ goto end;
+
+@@ -569,6 +586,7 @@ kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv,
+ {
+ struct kvaser_cmd *cmd;
+ struct kvaser_usb *dev = priv->dev;
++ size_t cmd_len;
+ int err;
+
+ cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_ATOMIC);
+@@ -576,14 +594,14 @@ kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv,
+ return -ENOMEM;
+
+ cmd->header.cmd_no = cmd_no;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
+
+ kvaser_usb_hydra_set_cmd_dest_he
+ (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+ kvaser_usb_hydra_set_cmd_transid
+ (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+- err = kvaser_usb_send_cmd_async(priv, cmd,
+- kvaser_usb_hydra_cmd_size(cmd));
++ err = kvaser_usb_send_cmd_async(priv, cmd, cmd_len);
+ if (err)
+ kfree(cmd);
+
+@@ -727,6 +745,7 @@ static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev,
+ {
+ struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
+ struct kvaser_cmd *cmd;
++ size_t cmd_len;
+ u32 value = 0;
+ u32 mask = 0;
+ u16 cap_cmd_res;
+@@ -738,13 +757,14 @@ static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev,
+ return -ENOMEM;
+
+ cmd->header.cmd_no = CMD_GET_CAPABILITIES_REQ;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
+ cmd->cap_req.cap_cmd = cpu_to_le16(cap_cmd_req);
+
+ kvaser_usb_hydra_set_cmd_dest_he(cmd, card_data->hydra.sysdbg_he);
+ kvaser_usb_hydra_set_cmd_transid
+ (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
+ if (err)
+ goto end;
+
+@@ -838,6 +858,39 @@ static void kvaser_usb_hydra_flush_queue_reply(const struct kvaser_usb *dev,
+ complete(&priv->flush_comp);
+ }
+
++static void kvaser_usb_hydra_get_busparams_reply(const struct kvaser_usb *dev,
++ const struct kvaser_cmd *cmd)
++{
++ struct kvaser_usb_net_priv *priv;
++ struct kvaser_usb_net_hydra_priv *hydra;
++
++ priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
++ if (!priv)
++ return;
++
++ hydra = priv->sub_priv;
++ if (!hydra)
++ return;
++
++ switch (hydra->pending_get_busparams_type) {
++ case KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN:
++ memcpy(&priv->busparams_nominal, &cmd->get_busparams_res.busparams,
++ sizeof(priv->busparams_nominal));
++ break;
++ case KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD:
++ memcpy(&priv->busparams_data, &cmd->get_busparams_res.busparams,
++ sizeof(priv->busparams_nominal));
++ break;
++ default:
++ dev_warn(&dev->intf->dev, "Unknown get_busparams_type %d\n",
++ hydra->pending_get_busparams_type);
++ break;
++ }
++ hydra->pending_get_busparams_type = -1;
++
++ complete(&priv->get_busparams_comp);
++}
++
+ static void
+ kvaser_usb_hydra_bus_status_to_can_state(const struct kvaser_usb_net_priv *priv,
+ u8 bus_status,
+@@ -916,8 +969,10 @@ static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv,
+ new_state < CAN_STATE_BUS_OFF)
+ priv->can.can_stats.restarts++;
+
+- cf->data[6] = bec->txerr;
+- cf->data[7] = bec->rxerr;
++ if (new_state != CAN_STATE_BUS_OFF) {
++ cf->data[6] = bec->txerr;
++ cf->data[7] = bec->rxerr;
++ }
+
+ stats = &netdev->stats;
+ stats->rx_packets++;
+@@ -1071,8 +1126,10 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv,
+ shhwtstamps->hwtstamp = hwtstamp;
+
+ cf->can_id |= CAN_ERR_BUSERROR;
+- cf->data[6] = bec.txerr;
+- cf->data[7] = bec.rxerr;
++ if (new_state != CAN_STATE_BUS_OFF) {
++ cf->data[6] = bec.txerr;
++ cf->data[7] = bec.rxerr;
++ }
+
+ stats->rx_packets++;
+ stats->rx_bytes += cf->len;
+@@ -1121,6 +1178,7 @@ static void kvaser_usb_hydra_tx_acknowledge(const struct kvaser_usb *dev,
+ struct kvaser_usb_net_priv *priv;
+ unsigned long irq_flags;
+ bool one_shot_fail = false;
++ bool is_err_frame = false;
+ u16 transid = kvaser_usb_hydra_get_cmd_transid(cmd);
+
+ priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
+@@ -1139,10 +1197,13 @@ static void kvaser_usb_hydra_tx_acknowledge(const struct kvaser_usb *dev,
+ kvaser_usb_hydra_one_shot_fail(priv, cmd_ext);
+ one_shot_fail = true;
+ }
++
++ is_err_frame = flags & KVASER_USB_HYDRA_CF_FLAG_TX_ACK &&
++ flags & KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME;
+ }
+
+ context = &priv->tx_contexts[transid % dev->max_tx_urbs];
+- if (!one_shot_fail) {
++ if (!one_shot_fail && !is_err_frame) {
+ struct net_device_stats *stats = &priv->netdev->stats;
+
+ stats->tx_packets++;
+@@ -1316,6 +1377,10 @@ static void kvaser_usb_hydra_handle_cmd_std(const struct kvaser_usb *dev,
+ kvaser_usb_hydra_state_event(dev, cmd);
+ break;
+
++ case CMD_GET_BUSPARAMS_RESP:
++ kvaser_usb_hydra_get_busparams_reply(dev, cmd);
++ break;
++
+ case CMD_ERROR_EVENT:
+ kvaser_usb_hydra_error_event(dev, cmd);
+ break;
+@@ -1516,15 +1581,61 @@ static int kvaser_usb_hydra_set_mode(struct net_device *netdev,
+ return err;
+ }
+
+-static int kvaser_usb_hydra_set_bittiming(struct net_device *netdev)
++static int kvaser_usb_hydra_get_busparams(struct kvaser_usb_net_priv *priv,
++ int busparams_type)
++{
++ struct kvaser_usb *dev = priv->dev;
++ struct kvaser_usb_net_hydra_priv *hydra = priv->sub_priv;
++ struct kvaser_cmd *cmd;
++ size_t cmd_len;
++ int err;
++
++ if (!hydra)
++ return -EINVAL;
++
++ cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
++ if (!cmd)
++ return -ENOMEM;
++
++ cmd->header.cmd_no = CMD_GET_BUSPARAMS_REQ;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
++ kvaser_usb_hydra_set_cmd_dest_he
++ (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
++ kvaser_usb_hydra_set_cmd_transid
++ (cmd, kvaser_usb_hydra_get_next_transid(dev));
++ cmd->get_busparams_req.type = busparams_type;
++ hydra->pending_get_busparams_type = busparams_type;
++
++ reinit_completion(&priv->get_busparams_comp);
++
++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
++ if (err)
++ return err;
++
++ if (!wait_for_completion_timeout(&priv->get_busparams_comp,
++ msecs_to_jiffies(KVASER_USB_TIMEOUT)))
++ return -ETIMEDOUT;
++
++ return err;
++}
++
++static int kvaser_usb_hydra_get_nominal_busparams(struct kvaser_usb_net_priv *priv)
++{
++ return kvaser_usb_hydra_get_busparams(priv, KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN);
++}
++
++static int kvaser_usb_hydra_get_data_busparams(struct kvaser_usb_net_priv *priv)
++{
++ return kvaser_usb_hydra_get_busparams(priv, KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD);
++}
++
++static int kvaser_usb_hydra_set_bittiming(const struct net_device *netdev,
++ const struct kvaser_usb_busparams *busparams)
+ {
+ struct kvaser_cmd *cmd;
+ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+- struct can_bittiming *bt = &priv->can.bittiming;
+ struct kvaser_usb *dev = priv->dev;
+- int tseg1 = bt->prop_seg + bt->phase_seg1;
+- int tseg2 = bt->phase_seg2;
+- int sjw = bt->sjw;
++ size_t cmd_len;
+ int err;
+
+ cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+@@ -1532,33 +1643,29 @@ static int kvaser_usb_hydra_set_bittiming(struct net_device *netdev)
+ return -ENOMEM;
+
+ cmd->header.cmd_no = CMD_SET_BUSPARAMS_REQ;
+- cmd->set_busparams_req.bitrate = cpu_to_le32(bt->bitrate);
+- cmd->set_busparams_req.sjw = (u8)sjw;
+- cmd->set_busparams_req.tseg1 = (u8)tseg1;
+- cmd->set_busparams_req.tseg2 = (u8)tseg2;
+- cmd->set_busparams_req.nsamples = 1;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
++ memcpy(&cmd->set_busparams_req.busparams_nominal, busparams,
++ sizeof(cmd->set_busparams_req.busparams_nominal));
+
+ kvaser_usb_hydra_set_cmd_dest_he
+ (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+ kvaser_usb_hydra_set_cmd_transid
+ (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
+
+ kfree(cmd);
+
+ return err;
+ }
+
+-static int kvaser_usb_hydra_set_data_bittiming(struct net_device *netdev)
++static int kvaser_usb_hydra_set_data_bittiming(const struct net_device *netdev,
++ const struct kvaser_usb_busparams *busparams)
+ {
+ struct kvaser_cmd *cmd;
+ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+- struct can_bittiming *dbt = &priv->can.data_bittiming;
+ struct kvaser_usb *dev = priv->dev;
+- int tseg1 = dbt->prop_seg + dbt->phase_seg1;
+- int tseg2 = dbt->phase_seg2;
+- int sjw = dbt->sjw;
++ size_t cmd_len;
+ int err;
+
+ cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+@@ -1566,11 +1673,9 @@ static int kvaser_usb_hydra_set_data_bittiming(struct net_device *netdev)
+ return -ENOMEM;
+
+ cmd->header.cmd_no = CMD_SET_BUSPARAMS_FD_REQ;
+- cmd->set_busparams_req.bitrate_d = cpu_to_le32(dbt->bitrate);
+- cmd->set_busparams_req.sjw_d = (u8)sjw;
+- cmd->set_busparams_req.tseg1_d = (u8)tseg1;
+- cmd->set_busparams_req.tseg2_d = (u8)tseg2;
+- cmd->set_busparams_req.nsamples_d = 1;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
++ memcpy(&cmd->set_busparams_req.busparams_data, busparams,
++ sizeof(cmd->set_busparams_req.busparams_data));
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+ if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO)
+@@ -1586,7 +1691,7 @@ static int kvaser_usb_hydra_set_data_bittiming(struct net_device *netdev)
+ kvaser_usb_hydra_set_cmd_transid
+ (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
+
+ kfree(cmd);
+
+@@ -1677,6 +1782,19 @@ static int kvaser_usb_hydra_init_card(struct kvaser_usb *dev)
+ return 0;
+ }
+
++static int kvaser_usb_hydra_init_channel(struct kvaser_usb_net_priv *priv)
++{
++ struct kvaser_usb_net_hydra_priv *hydra;
++
++ hydra = devm_kzalloc(&priv->dev->intf->dev, sizeof(*hydra), GFP_KERNEL);
++ if (!hydra)
++ return -ENOMEM;
++
++ priv->sub_priv = hydra;
++
++ return 0;
++}
++
+ static int kvaser_usb_hydra_get_software_info(struct kvaser_usb *dev)
+ {
+ struct kvaser_cmd cmd;
+@@ -1701,6 +1819,7 @@ static int kvaser_usb_hydra_get_software_info(struct kvaser_usb *dev)
+ static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev)
+ {
+ struct kvaser_cmd *cmd;
++ size_t cmd_len;
+ int err;
+ u32 flags;
+ struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
+@@ -1710,6 +1829,7 @@ static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev)
+ return -ENOMEM;
+
+ cmd->header.cmd_no = CMD_GET_SOFTWARE_DETAILS_REQ;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
+ cmd->sw_detail_req.use_ext_cmd = 1;
+ kvaser_usb_hydra_set_cmd_dest_he
+ (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL);
+@@ -1717,7 +1837,7 @@ static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev)
+ kvaser_usb_hydra_set_cmd_transid
+ (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
+ if (err)
+ goto end;
+
+@@ -1835,6 +1955,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv)
+ {
+ struct kvaser_usb *dev = priv->dev;
+ struct kvaser_cmd *cmd;
++ size_t cmd_len;
+ int err;
+
+ if ((priv->can.ctrlmode &
+@@ -1850,6 +1971,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv)
+ return -ENOMEM;
+
+ cmd->header.cmd_no = CMD_SET_DRIVERMODE_REQ;
++ cmd_len = kvaser_usb_hydra_cmd_size(cmd);
+ kvaser_usb_hydra_set_cmd_dest_he
+ (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+ kvaser_usb_hydra_set_cmd_transid
+@@ -1859,7 +1981,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv)
+ else
+ cmd->set_ctrlmode.mode = KVASER_USB_HYDRA_CTRLMODE_NORMAL;
+
+- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
+ kfree(cmd);
+
+ return err;
+@@ -1869,7 +1991,7 @@ static int kvaser_usb_hydra_start_chip(struct kvaser_usb_net_priv *priv)
+ {
+ int err;
+
+- init_completion(&priv->start_comp);
++ reinit_completion(&priv->start_comp);
+
+ err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_START_CHIP_REQ,
+ priv->channel);
+@@ -1887,7 +2009,7 @@ static int kvaser_usb_hydra_stop_chip(struct kvaser_usb_net_priv *priv)
+ {
+ int err;
+
+- init_completion(&priv->stop_comp);
++ reinit_completion(&priv->stop_comp);
+
+ /* Make sure we do not report invalid BUS_OFF from CMD_CHIP_STATE_EVENT
+ * see comment in kvaser_usb_hydra_update_state()
+@@ -1910,7 +2032,7 @@ static int kvaser_usb_hydra_flush_queue(struct kvaser_usb_net_priv *priv)
+ {
+ int err;
+
+- init_completion(&priv->flush_comp);
++ reinit_completion(&priv->flush_comp);
+
+ err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_FLUSH_QUEUE,
+ priv->channel);
+@@ -2021,10 +2143,13 @@ kvaser_usb_hydra_frame_to_cmd(const struct kvaser_usb_net_priv *priv,
+ const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops = {
+ .dev_set_mode = kvaser_usb_hydra_set_mode,
+ .dev_set_bittiming = kvaser_usb_hydra_set_bittiming,
++ .dev_get_busparams = kvaser_usb_hydra_get_nominal_busparams,
+ .dev_set_data_bittiming = kvaser_usb_hydra_set_data_bittiming,
++ .dev_get_data_busparams = kvaser_usb_hydra_get_data_busparams,
+ .dev_get_berr_counter = kvaser_usb_hydra_get_berr_counter,
+ .dev_setup_endpoints = kvaser_usb_hydra_setup_endpoints,
+ .dev_init_card = kvaser_usb_hydra_init_card,
++ .dev_init_channel = kvaser_usb_hydra_init_channel,
+ .dev_get_software_info = kvaser_usb_hydra_get_software_info,
+ .dev_get_software_details = kvaser_usb_hydra_get_software_details,
+ .dev_get_card_info = kvaser_usb_hydra_get_card_info,
+@@ -2040,7 +2165,7 @@ const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops = {
+
+ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan = {
+ .clock = {
+- .freq = 80000000,
++ .freq = 80 * MEGA /* Hz */,
+ },
+ .timestamp_freq = 80,
+ .bittiming_const = &kvaser_usb_hydra_kcan_bittiming_c,
+@@ -2049,15 +2174,15 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan = {
+
+ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = {
+ .clock = {
+- .freq = 24000000,
++ .freq = 24 * MEGA /* Hz */,
+ },
+ .timestamp_freq = 1,
+- .bittiming_const = &kvaser_usb_hydra_flexc_bittiming_c,
++ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
+ };
+
+ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt = {
+ .clock = {
+- .freq = 80000000,
++ .freq = 80 * MEGA /* Hz */,
+ },
+ .timestamp_freq = 24,
+ .bittiming_const = &kvaser_usb_hydra_rt_bittiming_c,
+diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
+index 59ba7c7beec00..ad3103391c793 100644
+--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
++++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
+@@ -19,7 +19,9 @@
+ #include <linux/spinlock.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
++#include <linux/units.h>
+ #include <linux/usb.h>
++#include <linux/workqueue.h>
+
+ #include <linux/can.h>
+ #include <linux/can/dev.h>
+@@ -28,10 +30,6 @@
+
+ #include "kvaser_usb.h"
+
+-/* Forward declaration */
+-static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg;
+-
+-#define CAN_USB_CLOCK 8000000
+ #define MAX_USBCAN_NET_DEVICES 2
+
+ /* Command header size */
+@@ -59,6 +57,9 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg;
+ #define CMD_RX_EXT_MESSAGE 14
+ #define CMD_TX_EXT_MESSAGE 15
+ #define CMD_SET_BUS_PARAMS 16
++#define CMD_GET_BUS_PARAMS 17
++#define CMD_GET_BUS_PARAMS_REPLY 18
++#define CMD_GET_CHIP_STATE 19
+ #define CMD_CHIP_STATE_EVENT 20
+ #define CMD_SET_CTRL_MODE 21
+ #define CMD_RESET_CHIP 24
+@@ -73,13 +74,24 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg;
+ #define CMD_GET_CARD_INFO_REPLY 35
+ #define CMD_GET_SOFTWARE_INFO 38
+ #define CMD_GET_SOFTWARE_INFO_REPLY 39
++#define CMD_ERROR_EVENT 45
+ #define CMD_FLUSH_QUEUE 48
+ #define CMD_TX_ACKNOWLEDGE 50
+ #define CMD_CAN_ERROR_EVENT 51
+ #define CMD_FLUSH_QUEUE_REPLY 68
++#define CMD_GET_CAPABILITIES_REQ 95
++#define CMD_GET_CAPABILITIES_RESP 96
+
+ #define CMD_LEAF_LOG_MESSAGE 106
+
++/* Leaf frequency options */
++#define KVASER_USB_LEAF_SWOPTION_FREQ_MASK 0x60
++#define KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK 0
++#define KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK BIT(5)
++#define KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK BIT(6)
++
++#define KVASER_USB_LEAF_SWOPTION_EXT_CAP BIT(12)
++
+ /* error factors */
+ #define M16C_EF_ACKE BIT(0)
+ #define M16C_EF_CRCE BIT(1)
+@@ -98,16 +110,6 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg;
+ #define USBCAN_ERROR_STATE_RX_ERROR BIT(1)
+ #define USBCAN_ERROR_STATE_BUSERROR BIT(2)
+
+-/* bittiming parameters */
+-#define KVASER_USB_TSEG1_MIN 1
+-#define KVASER_USB_TSEG1_MAX 16
+-#define KVASER_USB_TSEG2_MIN 1
+-#define KVASER_USB_TSEG2_MAX 8
+-#define KVASER_USB_SJW_MAX 4
+-#define KVASER_USB_BRP_MIN 1
+-#define KVASER_USB_BRP_MAX 64
+-#define KVASER_USB_BRP_INC 1
+-
+ /* ctrl modes */
+ #define KVASER_CTRL_MODE_NORMAL 1
+ #define KVASER_CTRL_MODE_SILENT 2
+@@ -164,11 +166,7 @@ struct usbcan_cmd_softinfo {
+ struct kvaser_cmd_busparams {
+ u8 tid;
+ u8 channel;
+- __le32 bitrate;
+- u8 tseg1;
+- u8 tseg2;
+- u8 sjw;
+- u8 no_samp;
++ struct kvaser_usb_busparams busparams;
+ } __packed;
+
+ struct kvaser_cmd_tx_can {
+@@ -237,7 +235,7 @@ struct kvaser_cmd_tx_acknowledge_header {
+ u8 tid;
+ } __packed;
+
+-struct leaf_cmd_error_event {
++struct leaf_cmd_can_error_event {
+ u8 tid;
+ u8 flags;
+ __le16 time[3];
+@@ -249,7 +247,7 @@ struct leaf_cmd_error_event {
+ u8 error_factor;
+ } __packed;
+
+-struct usbcan_cmd_error_event {
++struct usbcan_cmd_can_error_event {
+ u8 tid;
+ u8 padding;
+ u8 tx_errors_count_ch0;
+@@ -261,6 +259,28 @@ struct usbcan_cmd_error_event {
+ __le16 time;
+ } __packed;
+
++/* CMD_ERROR_EVENT error codes */
++#define KVASER_USB_LEAF_ERROR_EVENT_TX_QUEUE_FULL 0x8
++#define KVASER_USB_LEAF_ERROR_EVENT_PARAM 0x9
++
++struct leaf_cmd_error_event {
++ u8 tid;
++ u8 error_code;
++ __le16 timestamp[3];
++ __le16 padding;
++ __le16 info1;
++ __le16 info2;
++} __packed;
++
++struct usbcan_cmd_error_event {
++ u8 tid;
++ u8 error_code;
++ __le16 info1;
++ __le16 info2;
++ __le16 timestamp;
++ __le16 padding;
++} __packed;
++
+ struct kvaser_cmd_ctrl_mode {
+ u8 tid;
+ u8 channel;
+@@ -285,6 +305,28 @@ struct leaf_cmd_log_message {
+ u8 data[8];
+ } __packed;
+
++/* Sub commands for cap_req and cap_res */
++#define KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE 0x02
++#define KVASER_USB_LEAF_CAP_CMD_ERR_REPORT 0x05
++struct kvaser_cmd_cap_req {
++ __le16 padding0;
++ __le16 cap_cmd;
++ __le16 padding1;
++ __le16 channel;
++} __packed;
++
++/* Status codes for cap_res */
++#define KVASER_USB_LEAF_CAP_STAT_OK 0x00
++#define KVASER_USB_LEAF_CAP_STAT_NOT_IMPL 0x01
++#define KVASER_USB_LEAF_CAP_STAT_UNAVAIL 0x02
++struct kvaser_cmd_cap_res {
++ __le16 padding;
++ __le16 cap_cmd;
++ __le16 status;
++ __le32 mask;
++ __le32 value;
++} __packed;
++
+ struct kvaser_cmd {
+ u8 len;
+ u8 id;
+@@ -300,14 +342,18 @@ struct kvaser_cmd {
+ struct leaf_cmd_softinfo softinfo;
+ struct leaf_cmd_rx_can rx_can;
+ struct leaf_cmd_chip_state_event chip_state_event;
+- struct leaf_cmd_error_event error_event;
++ struct leaf_cmd_can_error_event can_error_event;
+ struct leaf_cmd_log_message log_message;
++ struct leaf_cmd_error_event error_event;
++ struct kvaser_cmd_cap_req cap_req;
++ struct kvaser_cmd_cap_res cap_res;
+ } __packed leaf;
+
+ union {
+ struct usbcan_cmd_softinfo softinfo;
+ struct usbcan_cmd_rx_can rx_can;
+ struct usbcan_cmd_chip_state_event chip_state_event;
++ struct usbcan_cmd_can_error_event can_error_event;
+ struct usbcan_cmd_error_event error_event;
+ } __packed usbcan;
+
+@@ -317,6 +363,42 @@ struct kvaser_cmd {
+ } u;
+ } __packed;
+
++#define CMD_SIZE_ANY 0xff
++#define kvaser_fsize(field) sizeof_field(struct kvaser_cmd, field)
++
++static const u8 kvaser_usb_leaf_cmd_sizes_leaf[] = {
++ [CMD_START_CHIP_REPLY] = kvaser_fsize(u.simple),
++ [CMD_STOP_CHIP_REPLY] = kvaser_fsize(u.simple),
++ [CMD_GET_CARD_INFO_REPLY] = kvaser_fsize(u.cardinfo),
++ [CMD_TX_ACKNOWLEDGE] = kvaser_fsize(u.tx_acknowledge_header),
++ [CMD_GET_SOFTWARE_INFO_REPLY] = kvaser_fsize(u.leaf.softinfo),
++ [CMD_RX_STD_MESSAGE] = kvaser_fsize(u.leaf.rx_can),
++ [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.leaf.rx_can),
++ [CMD_LEAF_LOG_MESSAGE] = kvaser_fsize(u.leaf.log_message),
++ [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.leaf.chip_state_event),
++ [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.leaf.can_error_event),
++ [CMD_GET_CAPABILITIES_RESP] = kvaser_fsize(u.leaf.cap_res),
++ [CMD_GET_BUS_PARAMS_REPLY] = kvaser_fsize(u.busparams),
++ [CMD_ERROR_EVENT] = kvaser_fsize(u.leaf.error_event),
++ /* ignored events: */
++ [CMD_FLUSH_QUEUE_REPLY] = CMD_SIZE_ANY,
++};
++
++static const u8 kvaser_usb_leaf_cmd_sizes_usbcan[] = {
++ [CMD_START_CHIP_REPLY] = kvaser_fsize(u.simple),
++ [CMD_STOP_CHIP_REPLY] = kvaser_fsize(u.simple),
++ [CMD_GET_CARD_INFO_REPLY] = kvaser_fsize(u.cardinfo),
++ [CMD_TX_ACKNOWLEDGE] = kvaser_fsize(u.tx_acknowledge_header),
++ [CMD_GET_SOFTWARE_INFO_REPLY] = kvaser_fsize(u.usbcan.softinfo),
++ [CMD_RX_STD_MESSAGE] = kvaser_fsize(u.usbcan.rx_can),
++ [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.usbcan.rx_can),
++ [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.usbcan.chip_state_event),
++ [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.usbcan.can_error_event),
++ [CMD_ERROR_EVENT] = kvaser_fsize(u.usbcan.error_event),
++ /* ignored events: */
++ [CMD_USBCAN_CLOCK_OVERFLOW_EVENT] = CMD_SIZE_ANY,
++};
++
+ /* Summary of a kvaser error event, for a unified Leaf/Usbcan error
+ * handling. Some discrepancies between the two families exist:
+ *
+@@ -340,6 +422,113 @@ struct kvaser_usb_err_summary {
+ };
+ };
+
++struct kvaser_usb_net_leaf_priv {
++ struct kvaser_usb_net_priv *net;
++
++ struct delayed_work chip_state_req_work;
++};
++
++static const struct can_bittiming_const kvaser_usb_leaf_m16c_bittiming_const = {
++ .name = "kvaser_usb_ucii",
++ .tseg1_min = 4,
++ .tseg1_max = 16,
++ .tseg2_min = 2,
++ .tseg2_max = 8,
++ .sjw_max = 4,
++ .brp_min = 1,
++ .brp_max = 16,
++ .brp_inc = 1,
++};
++
++static const struct can_bittiming_const kvaser_usb_leaf_m32c_bittiming_const = {
++ .name = "kvaser_usb_leaf",
++ .tseg1_min = 3,
++ .tseg1_max = 16,
++ .tseg2_min = 2,
++ .tseg2_max = 8,
++ .sjw_max = 4,
++ .brp_min = 2,
++ .brp_max = 128,
++ .brp_inc = 2,
++};
++
++static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_usbcan_dev_cfg = {
++ .clock = {
++ .freq = 8 * MEGA /* Hz */,
++ },
++ .timestamp_freq = 1,
++ .bittiming_const = &kvaser_usb_leaf_m16c_bittiming_const,
++};
++
++static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_m32c_dev_cfg = {
++ .clock = {
++ .freq = 16 * MEGA /* Hz */,
++ },
++ .timestamp_freq = 1,
++ .bittiming_const = &kvaser_usb_leaf_m32c_bittiming_const,
++};
++
++static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_16mhz = {
++ .clock = {
++ .freq = 16000000,
++ },
++ .timestamp_freq = 1,
++ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
++};
++
++static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_24mhz = {
++ .clock = {
++ .freq = 24 * MEGA /* Hz */,
++ },
++ .timestamp_freq = 1,
++ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
++};
++
++static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = {
++ .clock = {
++ .freq = 32 * MEGA /* Hz */,
++ },
++ .timestamp_freq = 1,
++ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
++};
++
++static int kvaser_usb_leaf_verify_size(const struct kvaser_usb *dev,
++ const struct kvaser_cmd *cmd)
++{
++ /* buffer size >= cmd->len ensured by caller */
++ u8 min_size = 0;
++
++ switch (dev->driver_info->family) {
++ case KVASER_LEAF:
++ if (cmd->id < ARRAY_SIZE(kvaser_usb_leaf_cmd_sizes_leaf))
++ min_size = kvaser_usb_leaf_cmd_sizes_leaf[cmd->id];
++ break;
++ case KVASER_USBCAN:
++ if (cmd->id < ARRAY_SIZE(kvaser_usb_leaf_cmd_sizes_usbcan))
++ min_size = kvaser_usb_leaf_cmd_sizes_usbcan[cmd->id];
++ break;
++ }
++
++ if (min_size == CMD_SIZE_ANY)
++ return 0;
++
++ if (min_size) {
++ min_size += CMD_HEADER_LEN;
++ if (cmd->len >= min_size)
++ return 0;
++
++ dev_err_ratelimited(&dev->intf->dev,
++ "Received command %u too short (size %u, needed %u)",
++ cmd->id, cmd->len, min_size);
++ return -EIO;
++ }
++
++ dev_warn_ratelimited(&dev->intf->dev,
++ "Unhandled command (%d, size %d)\n",
++ cmd->id, cmd->len);
++ return -EINVAL;
++}
++
+ static void *
+ kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv,
+ const struct sk_buff *skb, int *frame_len,
+@@ -359,7 +548,7 @@ kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv,
+ sizeof(struct kvaser_cmd_tx_can);
+ cmd->u.tx_can.channel = priv->channel;
+
+- switch (dev->card_data.leaf.family) {
++ switch (dev->driver_info->family) {
+ case KVASER_LEAF:
+ cmd_tx_can_flags = &cmd->u.tx_can.leaf.flags;
+ break;
+@@ -447,6 +636,9 @@ static int kvaser_usb_leaf_wait_cmd(const struct kvaser_usb *dev, u8 id,
+ end:
+ kfree(buf);
+
++ if (err == 0)
++ err = kvaser_usb_leaf_verify_size(dev, cmd);
++
+ return err;
+ }
+
+@@ -471,6 +663,37 @@ static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev,
+ return rc;
+ }
+
++static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev,
++ const struct leaf_cmd_softinfo *softinfo)
++{
++ u32 sw_options = le32_to_cpu(softinfo->sw_options);
++
++ dev->fw_version = le32_to_cpu(softinfo->fw_version);
++ dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx);
++
++ if (sw_options & KVASER_USB_LEAF_SWOPTION_EXT_CAP)
++ dev->card_data.capabilities |= KVASER_USB_CAP_EXT_CAP;
++
++ if (dev->driver_info->quirks & KVASER_USB_QUIRK_IGNORE_CLK_FREQ) {
++ /* Firmware expects bittiming parameters calculated for 16MHz
++ * clock, regardless of the actual clock
++ */
++ dev->cfg = &kvaser_usb_leaf_m32c_dev_cfg;
++ } else {
++ switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) {
++ case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK:
++ dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_16mhz;
++ break;
++ case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK:
++ dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_24mhz;
++ break;
++ case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK:
++ dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_32mhz;
++ break;
++ }
++ }
++}
++
+ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev)
+ {
+ struct kvaser_cmd cmd;
+@@ -484,16 +707,15 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev)
+ if (err)
+ return err;
+
+- switch (dev->card_data.leaf.family) {
++ switch (dev->driver_info->family) {
+ case KVASER_LEAF:
+- dev->fw_version = le32_to_cpu(cmd.u.leaf.softinfo.fw_version);
+- dev->max_tx_urbs =
+- le16_to_cpu(cmd.u.leaf.softinfo.max_outstanding_tx);
++ kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo);
+ break;
+ case KVASER_USBCAN:
+ dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version);
+ dev->max_tx_urbs =
+ le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx);
++ dev->cfg = &kvaser_usb_leaf_usbcan_dev_cfg;
+ break;
+ }
+
+@@ -532,13 +754,123 @@ static int kvaser_usb_leaf_get_card_info(struct kvaser_usb *dev)
+
+ dev->nchannels = cmd.u.cardinfo.nchannels;
+ if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES ||
+- (dev->card_data.leaf.family == KVASER_USBCAN &&
++ (dev->driver_info->family == KVASER_USBCAN &&
+ dev->nchannels > MAX_USBCAN_NET_DEVICES))
+ return -EINVAL;
+
+ return 0;
+ }
+
++static int kvaser_usb_leaf_get_single_capability(struct kvaser_usb *dev,
++ u16 cap_cmd_req, u16 *status)
++{
++ struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
++ struct kvaser_cmd *cmd;
++ u32 value = 0;
++ u32 mask = 0;
++ u16 cap_cmd_res;
++ int err;
++ int i;
++
++ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
++ if (!cmd)
++ return -ENOMEM;
++
++ cmd->id = CMD_GET_CAPABILITIES_REQ;
++ cmd->u.leaf.cap_req.cap_cmd = cpu_to_le16(cap_cmd_req);
++ cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_cap_req);
++
++ err = kvaser_usb_send_cmd(dev, cmd, cmd->len);
++ if (err)
++ goto end;
++
++ err = kvaser_usb_leaf_wait_cmd(dev, CMD_GET_CAPABILITIES_RESP, cmd);
++ if (err)
++ goto end;
++
++ *status = le16_to_cpu(cmd->u.leaf.cap_res.status);
++
++ if (*status != KVASER_USB_LEAF_CAP_STAT_OK)
++ goto end;
++
++ cap_cmd_res = le16_to_cpu(cmd->u.leaf.cap_res.cap_cmd);
++ switch (cap_cmd_res) {
++ case KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE:
++ case KVASER_USB_LEAF_CAP_CMD_ERR_REPORT:
++ value = le32_to_cpu(cmd->u.leaf.cap_res.value);
++ mask = le32_to_cpu(cmd->u.leaf.cap_res.mask);
++ break;
++ default:
++ dev_warn(&dev->intf->dev, "Unknown capability command %u\n",
++ cap_cmd_res);
++ break;
++ }
++
++ for (i = 0; i < dev->nchannels; i++) {
++ if (BIT(i) & (value & mask)) {
++ switch (cap_cmd_res) {
++ case KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE:
++ card_data->ctrlmode_supported |=
++ CAN_CTRLMODE_LISTENONLY;
++ break;
++ case KVASER_USB_LEAF_CAP_CMD_ERR_REPORT:
++ card_data->capabilities |=
++ KVASER_USB_CAP_BERR_CAP;
++ break;
++ }
++ }
++ }
++
++end:
++ kfree(cmd);
++
++ return err;
++}
++
++static int kvaser_usb_leaf_get_capabilities_leaf(struct kvaser_usb *dev)
++{
++ int err;
++ u16 status;
++
++ if (!(dev->card_data.capabilities & KVASER_USB_CAP_EXT_CAP)) {
++ dev_info(&dev->intf->dev,
++ "No extended capability support. Upgrade device firmware.\n");
++ return 0;
++ }
++
++ err = kvaser_usb_leaf_get_single_capability(dev,
++ KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE,
++ &status);
++ if (err)
++ return err;
++ if (status)
++ dev_info(&dev->intf->dev,
++ "KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE failed %u\n",
++ status);
++
++ err = kvaser_usb_leaf_get_single_capability(dev,
++ KVASER_USB_LEAF_CAP_CMD_ERR_REPORT,
++ &status);
++ if (err)
++ return err;
++ if (status)
++ dev_info(&dev->intf->dev,
++ "KVASER_USB_LEAF_CAP_CMD_ERR_REPORT failed %u\n",
++ status);
++
++ return 0;
++}
++
++static int kvaser_usb_leaf_get_capabilities(struct kvaser_usb *dev)
++{
++ int err = 0;
++
++ if (dev->driver_info->family == KVASER_LEAF)
++ err = kvaser_usb_leaf_get_capabilities_leaf(dev);
++
++ return err;
++}
++
+ static void kvaser_usb_leaf_tx_acknowledge(const struct kvaser_usb *dev,
+ const struct kvaser_cmd *cmd)
+ {
+@@ -567,7 +899,7 @@ static void kvaser_usb_leaf_tx_acknowledge(const struct kvaser_usb *dev,
+ context = &priv->tx_contexts[tid % dev->max_tx_urbs];
+
+ /* Sometimes the state change doesn't come after a bus-off event */
+- if (priv->can.restart_ms && priv->can.state >= CAN_STATE_BUS_OFF) {
++ if (priv->can.restart_ms && priv->can.state == CAN_STATE_BUS_OFF) {
+ struct sk_buff *skb;
+ struct can_frame *cf;
+
+@@ -623,6 +955,16 @@ static int kvaser_usb_leaf_simple_cmd_async(struct kvaser_usb_net_priv *priv,
+ return err;
+ }
+
++static void kvaser_usb_leaf_chip_state_req_work(struct work_struct *work)
++{
++ struct kvaser_usb_net_leaf_priv *leaf =
++ container_of(work, struct kvaser_usb_net_leaf_priv,
++ chip_state_req_work.work);
++ struct kvaser_usb_net_priv *priv = leaf->net;
++
++ kvaser_usb_leaf_simple_cmd_async(priv, CMD_GET_CHIP_STATE);
++}
++
+ static void
+ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv,
+ const struct kvaser_usb_err_summary *es,
+@@ -641,20 +983,16 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv,
+ new_state = CAN_STATE_BUS_OFF;
+ } else if (es->status & M16C_STATE_BUS_PASSIVE) {
+ new_state = CAN_STATE_ERROR_PASSIVE;
+- } else if (es->status & M16C_STATE_BUS_ERROR) {
++ } else if ((es->status & M16C_STATE_BUS_ERROR) &&
++ cur_state >= CAN_STATE_BUS_OFF) {
+ /* Guard against spurious error events after a busoff */
+- if (cur_state < CAN_STATE_BUS_OFF) {
+- if (es->txerr >= 128 || es->rxerr >= 128)
+- new_state = CAN_STATE_ERROR_PASSIVE;
+- else if (es->txerr >= 96 || es->rxerr >= 96)
+- new_state = CAN_STATE_ERROR_WARNING;
+- else if (cur_state > CAN_STATE_ERROR_ACTIVE)
+- new_state = CAN_STATE_ERROR_ACTIVE;
+- }
+- }
+-
+- if (!es->status)
++ } else if (es->txerr >= 128 || es->rxerr >= 128) {
++ new_state = CAN_STATE_ERROR_PASSIVE;
++ } else if (es->txerr >= 96 || es->rxerr >= 96) {
++ new_state = CAN_STATE_ERROR_WARNING;
++ } else {
+ new_state = CAN_STATE_ERROR_ACTIVE;
++ }
+
+ if (new_state != cur_state) {
+ tx_state = (es->txerr >= es->rxerr) ? new_state : 0;
+@@ -664,11 +1002,11 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv,
+ }
+
+ if (priv->can.restart_ms &&
+- cur_state >= CAN_STATE_BUS_OFF &&
++ cur_state == CAN_STATE_BUS_OFF &&
+ new_state < CAN_STATE_BUS_OFF)
+ priv->can.can_stats.restarts++;
+
+- switch (dev->card_data.leaf.family) {
++ switch (dev->driver_info->family) {
+ case KVASER_LEAF:
+ if (es->leaf.error_factor) {
+ priv->can.can_stats.bus_error++;
+@@ -698,6 +1036,7 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
+ struct sk_buff *skb;
+ struct net_device_stats *stats;
+ struct kvaser_usb_net_priv *priv;
++ struct kvaser_usb_net_leaf_priv *leaf;
+ enum can_state old_state, new_state;
+
+ if (es->channel >= dev->nchannels) {
+@@ -707,8 +1046,13 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
+ }
+
+ priv = dev->nets[es->channel];
++ leaf = priv->sub_priv;
+ stats = &priv->netdev->stats;
+
++ /* Ignore e.g. state change to bus-off reported just after stopping */
++ if (!netif_running(priv->netdev))
++ return;
++
+ /* Update all of the CAN interface's state and error counters before
+ * trying any memory allocation that can actually fail with -ENOMEM.
+ *
+@@ -723,6 +1067,14 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
+ kvaser_usb_leaf_rx_error_update_can_state(priv, es, &tmp_cf);
+ new_state = priv->can.state;
+
++ /* If there are errors, request status updates periodically as we do
++ * not get automatic notifications of improved state.
++ */
++ if (new_state < CAN_STATE_BUS_OFF &&
++ (es->rxerr || es->txerr || new_state == CAN_STATE_ERROR_PASSIVE))
++ schedule_delayed_work(&leaf->chip_state_req_work,
++ msecs_to_jiffies(500));
++
+ skb = alloc_can_err_skb(priv->netdev, &cf);
+ if (!skb) {
+ stats->rx_dropped++;
+@@ -740,14 +1092,14 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
+ }
+
+ if (priv->can.restart_ms &&
+- old_state >= CAN_STATE_BUS_OFF &&
++ old_state == CAN_STATE_BUS_OFF &&
+ new_state < CAN_STATE_BUS_OFF) {
+ cf->can_id |= CAN_ERR_RESTARTED;
+ netif_carrier_on(priv->netdev);
+ }
+ }
+
+- switch (dev->card_data.leaf.family) {
++ switch (dev->driver_info->family) {
+ case KVASER_LEAF:
+ if (es->leaf.error_factor) {
+ cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+@@ -774,8 +1126,10 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
+ break;
+ }
+
+- cf->data[6] = es->txerr;
+- cf->data[7] = es->rxerr;
++ if (new_state != CAN_STATE_BUS_OFF) {
++ cf->data[6] = es->txerr;
++ cf->data[7] = es->rxerr;
++ }
+
+ stats->rx_packets++;
+ stats->rx_bytes += cf->len;
+@@ -838,11 +1192,11 @@ static void kvaser_usb_leaf_usbcan_rx_error(const struct kvaser_usb *dev,
+
+ case CMD_CAN_ERROR_EVENT:
+ es.channel = 0;
+- es.status = cmd->u.usbcan.error_event.status_ch0;
+- es.txerr = cmd->u.usbcan.error_event.tx_errors_count_ch0;
+- es.rxerr = cmd->u.usbcan.error_event.rx_errors_count_ch0;
++ es.status = cmd->u.usbcan.can_error_event.status_ch0;
++ es.txerr = cmd->u.usbcan.can_error_event.tx_errors_count_ch0;
++ es.rxerr = cmd->u.usbcan.can_error_event.rx_errors_count_ch0;
+ es.usbcan.other_ch_status =
+- cmd->u.usbcan.error_event.status_ch1;
++ cmd->u.usbcan.can_error_event.status_ch1;
+ kvaser_usb_leaf_usbcan_conditionally_rx_error(dev, &es);
+
+ /* The USBCAN firmware supports up to 2 channels.
+@@ -850,13 +1204,13 @@ static void kvaser_usb_leaf_usbcan_rx_error(const struct kvaser_usb *dev,
+ */
+ if (dev->nchannels == MAX_USBCAN_NET_DEVICES) {
+ es.channel = 1;
+- es.status = cmd->u.usbcan.error_event.status_ch1;
++ es.status = cmd->u.usbcan.can_error_event.status_ch1;
+ es.txerr =
+- cmd->u.usbcan.error_event.tx_errors_count_ch1;
++ cmd->u.usbcan.can_error_event.tx_errors_count_ch1;
+ es.rxerr =
+- cmd->u.usbcan.error_event.rx_errors_count_ch1;
++ cmd->u.usbcan.can_error_event.rx_errors_count_ch1;
+ es.usbcan.other_ch_status =
+- cmd->u.usbcan.error_event.status_ch0;
++ cmd->u.usbcan.can_error_event.status_ch0;
+ kvaser_usb_leaf_usbcan_conditionally_rx_error(dev, &es);
+ }
+ break;
+@@ -873,11 +1227,11 @@ static void kvaser_usb_leaf_leaf_rx_error(const struct kvaser_usb *dev,
+
+ switch (cmd->id) {
+ case CMD_CAN_ERROR_EVENT:
+- es.channel = cmd->u.leaf.error_event.channel;
+- es.status = cmd->u.leaf.error_event.status;
+- es.txerr = cmd->u.leaf.error_event.tx_errors_count;
+- es.rxerr = cmd->u.leaf.error_event.rx_errors_count;
+- es.leaf.error_factor = cmd->u.leaf.error_event.error_factor;
++ es.channel = cmd->u.leaf.can_error_event.channel;
++ es.status = cmd->u.leaf.can_error_event.status;
++ es.txerr = cmd->u.leaf.can_error_event.tx_errors_count;
++ es.rxerr = cmd->u.leaf.can_error_event.rx_errors_count;
++ es.leaf.error_factor = cmd->u.leaf.can_error_event.error_factor;
+ break;
+ case CMD_LEAF_LOG_MESSAGE:
+ es.channel = cmd->u.leaf.log_message.channel;
+@@ -939,7 +1293,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev,
+ stats = &priv->netdev->stats;
+
+ if ((cmd->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) &&
+- (dev->card_data.leaf.family == KVASER_LEAF &&
++ (dev->driver_info->family == KVASER_LEAF &&
+ cmd->id == CMD_LEAF_LOG_MESSAGE)) {
+ kvaser_usb_leaf_leaf_rx_error(dev, cmd);
+ return;
+@@ -955,7 +1309,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev,
+ return;
+ }
+
+- switch (dev->card_data.leaf.family) {
++ switch (dev->driver_info->family) {
+ case KVASER_LEAF:
+ rx_data = cmd->u.leaf.rx_can.data;
+ break;
+@@ -970,7 +1324,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev,
+ return;
+ }
+
+- if (dev->card_data.leaf.family == KVASER_LEAF && cmd->id ==
++ if (dev->driver_info->family == KVASER_LEAF && cmd->id ==
+ CMD_LEAF_LOG_MESSAGE) {
+ cf->can_id = le32_to_cpu(cmd->u.leaf.log_message.id);
+ if (cf->can_id & KVASER_EXTENDED_FRAME)
+@@ -1009,6 +1363,74 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev,
+ netif_rx(skb);
+ }
+
++static void kvaser_usb_leaf_error_event_parameter(const struct kvaser_usb *dev,
++ const struct kvaser_cmd *cmd)
++{
++ u16 info1 = 0;
++
++ switch (dev->driver_info->family) {
++ case KVASER_LEAF:
++ info1 = le16_to_cpu(cmd->u.leaf.error_event.info1);
++ break;
++ case KVASER_USBCAN:
++ info1 = le16_to_cpu(cmd->u.usbcan.error_event.info1);
++ break;
++ }
++
++ /* info1 will contain the offending cmd_no */
++ switch (info1) {
++ case CMD_SET_CTRL_MODE:
++ dev_warn(&dev->intf->dev,
++ "CMD_SET_CTRL_MODE error in parameter\n");
++ break;
++
++ case CMD_SET_BUS_PARAMS:
++ dev_warn(&dev->intf->dev,
++ "CMD_SET_BUS_PARAMS error in parameter\n");
++ break;
++
++ default:
++ dev_warn(&dev->intf->dev,
++ "Unhandled parameter error event cmd_no (%u)\n",
++ info1);
++ break;
++ }
++}
++
++static void kvaser_usb_leaf_error_event(const struct kvaser_usb *dev,
++ const struct kvaser_cmd *cmd)
++{
++ u8 error_code = 0;
++
++ switch (dev->driver_info->family) {
++ case KVASER_LEAF:
++ error_code = cmd->u.leaf.error_event.error_code;
++ break;
++ case KVASER_USBCAN:
++ error_code = cmd->u.usbcan.error_event.error_code;
++ break;
++ }
++
++ switch (error_code) {
++ case KVASER_USB_LEAF_ERROR_EVENT_TX_QUEUE_FULL:
++ /* Received additional CAN message, when firmware TX queue is
++ * already full. Something is wrong with the driver.
++ * This should never happen!
++ */
++ dev_err(&dev->intf->dev,
++ "Received error event TX_QUEUE_FULL\n");
++ break;
++ case KVASER_USB_LEAF_ERROR_EVENT_PARAM:
++ kvaser_usb_leaf_error_event_parameter(dev, cmd);
++ break;
++
++ default:
++ dev_warn(&dev->intf->dev,
++ "Unhandled error event (%d)\n", error_code);
++ break;
++ }
++}
++
+ static void kvaser_usb_leaf_start_chip_reply(const struct kvaser_usb *dev,
+ const struct kvaser_cmd *cmd)
+ {
+@@ -1049,9 +1471,31 @@ static void kvaser_usb_leaf_stop_chip_reply(const struct kvaser_usb *dev,
+ complete(&priv->stop_comp);
+ }
+
++static void kvaser_usb_leaf_get_busparams_reply(const struct kvaser_usb *dev,
++ const struct kvaser_cmd *cmd)
++{
++ struct kvaser_usb_net_priv *priv;
++ u8 channel = cmd->u.busparams.channel;
++
++ if (channel >= dev->nchannels) {
++ dev_err(&dev->intf->dev,
++ "Invalid channel number (%d)\n", channel);
++ return;
++ }
++
++ priv = dev->nets[channel];
++ memcpy(&priv->busparams_nominal, &cmd->u.busparams.busparams,
++ sizeof(priv->busparams_nominal));
++
++ complete(&priv->get_busparams_comp);
++}
++
+ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev,
+ const struct kvaser_cmd *cmd)
+ {
++ if (kvaser_usb_leaf_verify_size(dev, cmd) < 0)
++ return;
++
+ switch (cmd->id) {
+ case CMD_START_CHIP_REPLY:
+ kvaser_usb_leaf_start_chip_reply(dev, cmd);
+@@ -1067,14 +1511,14 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev,
+ break;
+
+ case CMD_LEAF_LOG_MESSAGE:
+- if (dev->card_data.leaf.family != KVASER_LEAF)
++ if (dev->driver_info->family != KVASER_LEAF)
+ goto warn;
+ kvaser_usb_leaf_rx_can_msg(dev, cmd);
+ break;
+
+ case CMD_CHIP_STATE_EVENT:
+ case CMD_CAN_ERROR_EVENT:
+- if (dev->card_data.leaf.family == KVASER_LEAF)
++ if (dev->driver_info->family == KVASER_LEAF)
+ kvaser_usb_leaf_leaf_rx_error(dev, cmd);
+ else
+ kvaser_usb_leaf_usbcan_rx_error(dev, cmd);
+@@ -1084,14 +1528,22 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev,
+ kvaser_usb_leaf_tx_acknowledge(dev, cmd);
+ break;
+
++ case CMD_ERROR_EVENT:
++ kvaser_usb_leaf_error_event(dev, cmd);
++ break;
++
++ case CMD_GET_BUS_PARAMS_REPLY:
++ kvaser_usb_leaf_get_busparams_reply(dev, cmd);
++ break;
++
+ /* Ignored commands */
+ case CMD_USBCAN_CLOCK_OVERFLOW_EVENT:
+- if (dev->card_data.leaf.family != KVASER_USBCAN)
++ if (dev->driver_info->family != KVASER_USBCAN)
+ goto warn;
+ break;
+
+ case CMD_FLUSH_QUEUE_REPLY:
+- if (dev->card_data.leaf.family != KVASER_LEAF)
++ if (dev->driver_info->family != KVASER_LEAF)
+ goto warn;
+ break;
+
+@@ -1164,7 +1616,7 @@ static int kvaser_usb_leaf_start_chip(struct kvaser_usb_net_priv *priv)
+ {
+ int err;
+
+- init_completion(&priv->start_comp);
++ reinit_completion(&priv->start_comp);
+
+ err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_START_CHIP,
+ priv->channel);
+@@ -1180,9 +1632,12 @@ static int kvaser_usb_leaf_start_chip(struct kvaser_usb_net_priv *priv)
+
+ static int kvaser_usb_leaf_stop_chip(struct kvaser_usb_net_priv *priv)
+ {
++ struct kvaser_usb_net_leaf_priv *leaf = priv->sub_priv;
+ int err;
+
+- init_completion(&priv->stop_comp);
++ reinit_completion(&priv->stop_comp);
++
++ cancel_delayed_work(&leaf->chip_state_req_work);
+
+ err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_STOP_CHIP,
+ priv->channel);
+@@ -1225,28 +1680,40 @@ static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev)
+ {
+ struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
+
+- dev->cfg = &kvaser_usb_leaf_dev_cfg;
+ card_data->ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
+
+ return 0;
+ }
+
+-static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = {
+- .name = "kvaser_usb",
+- .tseg1_min = KVASER_USB_TSEG1_MIN,
+- .tseg1_max = KVASER_USB_TSEG1_MAX,
+- .tseg2_min = KVASER_USB_TSEG2_MIN,
+- .tseg2_max = KVASER_USB_TSEG2_MAX,
+- .sjw_max = KVASER_USB_SJW_MAX,
+- .brp_min = KVASER_USB_BRP_MIN,
+- .brp_max = KVASER_USB_BRP_MAX,
+- .brp_inc = KVASER_USB_BRP_INC,
+-};
++static int kvaser_usb_leaf_init_channel(struct kvaser_usb_net_priv *priv)
++{
++ struct kvaser_usb_net_leaf_priv *leaf;
++
++ leaf = devm_kzalloc(&priv->dev->intf->dev, sizeof(*leaf), GFP_KERNEL);
++ if (!leaf)
++ return -ENOMEM;
++
++ leaf->net = priv;
++ INIT_DELAYED_WORK(&leaf->chip_state_req_work,
++ kvaser_usb_leaf_chip_state_req_work);
++
++ priv->sub_priv = leaf;
++
++ return 0;
++}
++
++static void kvaser_usb_leaf_remove_channel(struct kvaser_usb_net_priv *priv)
++{
++ struct kvaser_usb_net_leaf_priv *leaf = priv->sub_priv;
++
++ if (leaf)
++ cancel_delayed_work_sync(&leaf->chip_state_req_work);
++}
+
+-static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev)
++static int kvaser_usb_leaf_set_bittiming(const struct net_device *netdev,
++ const struct kvaser_usb_busparams *busparams)
+ {
+ struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+- struct can_bittiming *bt = &priv->can.bittiming;
+ struct kvaser_usb *dev = priv->dev;
+ struct kvaser_cmd *cmd;
+ int rc;
+@@ -1259,15 +1726,8 @@ static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev)
+ cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_busparams);
+ cmd->u.busparams.channel = priv->channel;
+ cmd->u.busparams.tid = 0xff;
+- cmd->u.busparams.bitrate = cpu_to_le32(bt->bitrate);
+- cmd->u.busparams.sjw = bt->sjw;
+- cmd->u.busparams.tseg1 = bt->prop_seg + bt->phase_seg1;
+- cmd->u.busparams.tseg2 = bt->phase_seg2;
+-
+- if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+- cmd->u.busparams.no_samp = 3;
+- else
+- cmd->u.busparams.no_samp = 1;
++ memcpy(&cmd->u.busparams.busparams, busparams,
++ sizeof(cmd->u.busparams.busparams));
+
+ rc = kvaser_usb_send_cmd(dev, cmd, cmd->len);
+
+@@ -1275,6 +1735,27 @@ static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev)
+ return rc;
+ }
+
++static int kvaser_usb_leaf_get_busparams(struct kvaser_usb_net_priv *priv)
++{
++ int err;
++
++ if (priv->dev->driver_info->family == KVASER_USBCAN)
++ return -EOPNOTSUPP;
++
++ reinit_completion(&priv->get_busparams_comp);
++
++ err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_GET_BUS_PARAMS,
++ priv->channel);
++ if (err)
++ return err;
++
++ if (!wait_for_completion_timeout(&priv->get_busparams_comp,
++ msecs_to_jiffies(KVASER_USB_TIMEOUT)))
++ return -ETIMEDOUT;
++
++ return 0;
++}
++
+ static int kvaser_usb_leaf_set_mode(struct net_device *netdev,
+ enum can_mode mode)
+ {
+@@ -1283,9 +1764,13 @@ static int kvaser_usb_leaf_set_mode(struct net_device *netdev,
+
+ switch (mode) {
+ case CAN_MODE_START:
++ kvaser_usb_unlink_tx_urbs(priv);
++
+ err = kvaser_usb_leaf_simple_cmd_async(priv, CMD_START_CHIP);
+ if (err)
+ return err;
++
++ priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ break;
+ default:
+ return -EOPNOTSUPP;
+@@ -1332,14 +1817,18 @@ static int kvaser_usb_leaf_setup_endpoints(struct kvaser_usb *dev)
+ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = {
+ .dev_set_mode = kvaser_usb_leaf_set_mode,
+ .dev_set_bittiming = kvaser_usb_leaf_set_bittiming,
++ .dev_get_busparams = kvaser_usb_leaf_get_busparams,
+ .dev_set_data_bittiming = NULL,
++ .dev_get_data_busparams = NULL,
+ .dev_get_berr_counter = kvaser_usb_leaf_get_berr_counter,
+ .dev_setup_endpoints = kvaser_usb_leaf_setup_endpoints,
+ .dev_init_card = kvaser_usb_leaf_init_card,
++ .dev_init_channel = kvaser_usb_leaf_init_channel,
++ .dev_remove_channel = kvaser_usb_leaf_remove_channel,
+ .dev_get_software_info = kvaser_usb_leaf_get_software_info,
+ .dev_get_software_details = NULL,
+ .dev_get_card_info = kvaser_usb_leaf_get_card_info,
+- .dev_get_capabilities = NULL,
++ .dev_get_capabilities = kvaser_usb_leaf_get_capabilities,
+ .dev_set_opt_mode = kvaser_usb_leaf_set_opt_mode,
+ .dev_start_chip = kvaser_usb_leaf_start_chip,
+ .dev_stop_chip = kvaser_usb_leaf_stop_chip,
+@@ -1348,11 +1837,3 @@ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = {
+ .dev_read_bulk_callback = kvaser_usb_leaf_read_bulk_callback,
+ .dev_frame_to_cmd = kvaser_usb_leaf_frame_to_cmd,
+ };
+-
+-static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg = {
+- .clock = {
+- .freq = CAN_USB_CLOCK,
+- },
+- .timestamp_freq = 1,
+- .bittiming_const = &kvaser_usb_leaf_bittiming_const,
+-};
+diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
+index a1a154c08b7f7..e9ccdcce01cc3 100644
+--- a/drivers/net/can/usb/mcba_usb.c
++++ b/drivers/net/can/usb/mcba_usb.c
+@@ -33,10 +33,6 @@
+ #define MCBA_USB_RX_BUFF_SIZE 64
+ #define MCBA_USB_TX_BUFF_SIZE (sizeof(struct mcba_usb_msg))
+
+-/* MCBA endpoint numbers */
+-#define MCBA_USB_EP_IN 1
+-#define MCBA_USB_EP_OUT 1
+-
+ /* Microchip command id */
+ #define MBCA_CMD_RECEIVE_MESSAGE 0xE3
+ #define MBCA_CMD_I_AM_ALIVE_FROM_CAN 0xF5
+@@ -51,6 +47,10 @@
+ #define MCBA_VER_REQ_USB 1
+ #define MCBA_VER_REQ_CAN 2
+
++/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */
++#define MCBA_VER_TERMINATION_ON 0
++#define MCBA_VER_TERMINATION_OFF 1
++
+ #define MCBA_SIDL_EXID_MASK 0x8
+ #define MCBA_DLC_MASK 0xf
+ #define MCBA_DLC_RTR_MASK 0x40
+@@ -84,6 +84,8 @@ struct mcba_priv {
+ atomic_t free_ctx_cnt;
+ void *rxbuf[MCBA_MAX_RX_URBS];
+ dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS];
++ int rx_pipe;
++ int tx_pipe;
+ };
+
+ /* CAN frame */
+@@ -272,10 +274,8 @@ static netdev_tx_t mcba_usb_xmit(struct mcba_priv *priv,
+
+ memcpy(buf, usb_msg, MCBA_USB_TX_BUFF_SIZE);
+
+- usb_fill_bulk_urb(urb, priv->udev,
+- usb_sndbulkpipe(priv->udev, MCBA_USB_EP_OUT), buf,
+- MCBA_USB_TX_BUFF_SIZE, mcba_usb_write_bulk_callback,
+- ctx);
++ usb_fill_bulk_urb(urb, priv->udev, priv->tx_pipe, buf, MCBA_USB_TX_BUFF_SIZE,
++ mcba_usb_write_bulk_callback, ctx);
+
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ usb_anchor_urb(urb, &priv->tx_submitted);
+@@ -368,7 +368,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
+ xmit_failed:
+ can_free_echo_skb(priv->netdev, ctx->ndx, NULL);
+ mcba_usb_free_ctx(ctx);
+- dev_kfree_skb(skb);
+ stats->tx_dropped++;
+
+ return NETDEV_TX_OK;
+@@ -474,7 +473,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv,
+ priv->usb_ka_first_pass = false;
+ }
+
+- if (msg->termination_state)
++ if (msg->termination_state == MCBA_VER_TERMINATION_ON)
+ priv->can.termination = MCBA_TERMINATION_ENABLED;
+ else
+ priv->can.termination = MCBA_TERMINATION_DISABLED;
+@@ -611,7 +610,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb)
+ resubmit_urb:
+
+ usb_fill_bulk_urb(urb, priv->udev,
+- usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_OUT),
++ priv->rx_pipe,
+ urb->transfer_buffer, MCBA_USB_RX_BUFF_SIZE,
+ mcba_usb_read_bulk_callback, priv);
+
+@@ -656,7 +655,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
+ urb->transfer_dma = buf_dma;
+
+ usb_fill_bulk_urb(urb, priv->udev,
+- usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN),
++ priv->rx_pipe,
+ buf, MCBA_USB_RX_BUFF_SIZE,
+ mcba_usb_read_bulk_callback, priv);
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+@@ -794,9 +793,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term)
+ };
+
+ if (term == MCBA_TERMINATION_ENABLED)
+- usb_msg.termination = 1;
++ usb_msg.termination = MCBA_VER_TERMINATION_ON;
+ else
+- usb_msg.termination = 0;
++ usb_msg.termination = MCBA_VER_TERMINATION_OFF;
+
+ mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg);
+
+@@ -810,6 +809,13 @@ static int mcba_usb_probe(struct usb_interface *intf,
+ struct mcba_priv *priv;
+ int err;
+ struct usb_device *usbdev = interface_to_usbdev(intf);
++ struct usb_endpoint_descriptor *in, *out;
++
++ err = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL);
++ if (err) {
++ dev_err(&intf->dev, "Can't find endpoints\n");
++ return err;
++ }
+
+ netdev = alloc_candev(sizeof(struct mcba_priv), MCBA_MAX_TX_URBS);
+ if (!netdev) {
+@@ -855,6 +861,9 @@ static int mcba_usb_probe(struct usb_interface *intf,
+ goto cleanup_free_candev;
+ }
+
++ priv->rx_pipe = usb_rcvbulkpipe(priv->udev, in->bEndpointAddress);
++ priv->tx_pipe = usb_sndbulkpipe(priv->udev, out->bEndpointAddress);
++
+ devm_can_led_init(netdev);
+
+ /* Start USB dev only if we have successfully registered CAN device */
+diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
+index 837b3fecd71e9..af8d3dadbbb8b 100644
+--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
++++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
+@@ -841,14 +841,14 @@ static int pcan_usb_start(struct peak_usb_device *dev)
+ pdev->bec.rxerr = 0;
+ pdev->bec.txerr = 0;
+
+- /* be notified on error counter changes (if requested by user) */
+- if (dev->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) {
+- err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK);
+- if (err)
+- netdev_warn(dev->netdev,
+- "Asking for BERR reporting error %u\n",
+- err);
+- }
++ /* always ask the device for BERR reporting, to be able to switch from
++ * WARNING to PASSIVE state
++ */
++ err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK);
++ if (err)
++ netdev_warn(dev->netdev,
++ "Asking for BERR reporting error %u\n",
++ err);
+
+ /* if revision greater than 3, can put silent mode on/off */
+ if (dev->device_rev > 3) {
+@@ -986,7 +986,6 @@ const struct peak_usb_adapter pcan_usb = {
+ .device_id = PCAN_USB_PRODUCT_ID,
+ .ctrl_count = 1,
+ .ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
+- CAN_CTRLMODE_BERR_REPORTING |
+ CAN_CTRLMODE_CC_LEN8_DLC,
+ .clock = {
+ .freq = PCAN_USB_CRYSTAL_HZ / 2,
+diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
+index d1b83bd1b3cb9..a5dee2ee24656 100644
+--- a/drivers/net/can/usb/usb_8dev.c
++++ b/drivers/net/can/usb/usb_8dev.c
+@@ -442,9 +442,10 @@ static void usb_8dev_rx_err_msg(struct usb_8dev_priv *priv,
+
+ if (rx_errors)
+ stats->rx_errors++;
+-
+- cf->data[6] = txerr;
+- cf->data[7] = rxerr;
++ if (priv->can.state != CAN_STATE_BUS_OFF) {
++ cf->data[6] = txerr;
++ cf->data[7] = rxerr;
++ }
+
+ priv->bec.txerr = txerr;
+ priv->bec.rxerr = rxerr;
+@@ -670,9 +671,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb,
+ atomic_inc(&priv->active_tx_urbs);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+- if (unlikely(err))
+- goto failed;
+- else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS)
++ if (unlikely(err)) {
++ can_free_echo_skb(netdev, context->echo_index, NULL);
++
++ usb_unanchor_urb(urb);
++ usb_free_coherent(priv->udev, size, buf, urb->transfer_dma);
++
++ atomic_dec(&priv->active_tx_urbs);
++
++ if (err == -ENODEV)
++ netif_device_detach(netdev);
++ else
++ netdev_warn(netdev, "failed tx_urb %d\n", err);
++ stats->tx_dropped++;
++ } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS)
+ /* Slow down tx path */
+ netif_stop_queue(netdev);
+
+@@ -691,19 +703,6 @@ nofreecontext:
+
+ return NETDEV_TX_BUSY;
+
+-failed:
+- can_free_echo_skb(netdev, context->echo_index, NULL);
+-
+- usb_unanchor_urb(urb);
+- usb_free_coherent(priv->udev, size, buf, urb->transfer_dma);
+-
+- atomic_dec(&priv->active_tx_urbs);
+-
+- if (err == -ENODEV)
+- netif_device_detach(netdev);
+- else
+- netdev_warn(netdev, "failed tx_urb %d\n", err);
+-
+ nomembuf:
+ usb_free_urb(urb);
+
+diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
+index 8861a7d875e7e..afd9060c5421c 100644
+--- a/drivers/net/can/vxcan.c
++++ b/drivers/net/can/vxcan.c
+@@ -148,7 +148,7 @@ static void vxcan_setup(struct net_device *dev)
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->tx_queue_len = 0;
+- dev->flags = (IFF_NOARP|IFF_ECHO);
++ dev->flags = IFF_NOARP;
+ dev->netdev_ops = &vxcan_netdev_ops;
+ dev->needs_free_netdev = true;
+
+@@ -179,12 +179,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
+
+ nla_peer = data[VXCAN_INFO_PEER];
+ ifmp = nla_data(nla_peer);
+- err = rtnl_nla_parse_ifla(peer_tb,
+- nla_data(nla_peer) +
+- sizeof(struct ifinfomsg),
+- nla_len(nla_peer) -
+- sizeof(struct ifinfomsg),
+- NULL);
++ err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
+ if (err < 0)
+ return err;
+
+diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
+index 3b883e607d8ba..a2e751f0ae0b1 100644
+--- a/drivers/net/can/xilinx_can.c
++++ b/drivers/net/can/xilinx_can.c
+@@ -239,7 +239,7 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd = {
+ };
+
+ /* AXI CANFD Data Bittiming constants as per AXI CANFD 1.0 specs */
+-static struct can_bittiming_const xcan_data_bittiming_const_canfd = {
++static const struct can_bittiming_const xcan_data_bittiming_const_canfd = {
+ .name = DRIVER_NAME,
+ .tseg1_min = 1,
+ .tseg1_max = 16,
+@@ -259,20 +259,20 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd2 = {
+ .tseg2_min = 1,
+ .tseg2_max = 128,
+ .sjw_max = 128,
+- .brp_min = 2,
++ .brp_min = 1,
+ .brp_max = 256,
+ .brp_inc = 1,
+ };
+
+ /* AXI CANFD 2.0 Data Bittiming constants as per AXI CANFD 2.0 spec */
+-static struct can_bittiming_const xcan_data_bittiming_const_canfd2 = {
++static const struct can_bittiming_const xcan_data_bittiming_const_canfd2 = {
+ .name = DRIVER_NAME,
+ .tseg1_min = 1,
+ .tseg1_max = 32,
+ .tseg2_min = 1,
+ .tseg2_max = 16,
+ .sjw_max = 16,
+- .brp_min = 2,
++ .brp_min = 1,
+ .brp_max = 256,
+ .brp_inc = 1,
+ };
+@@ -1762,7 +1762,12 @@ static int xcan_probe(struct platform_device *pdev)
+ spin_lock_init(&priv->tx_lock);
+
+ /* Get IRQ for the device */
+- ndev->irq = platform_get_irq(pdev, 0);
++ ret = platform_get_irq(pdev, 0);
++ if (ret < 0)
++ goto err_free;
++
++ ndev->irq = ret;
++
+ ndev->flags |= IFF_ECHO; /* We support local echo */
+
+ platform_set_drvdata(pdev, ndev);
+diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
+index a5f1aa911fe2a..6117d4537f88c 100644
+--- a/drivers/net/dsa/Kconfig
++++ b/drivers/net/dsa/Kconfig
+@@ -36,6 +36,7 @@ config NET_DSA_LANTIQ_GSWIP
+ config NET_DSA_MT7530
+ tristate "MediaTek MT753x and MT7621 Ethernet switch support"
+ select NET_DSA_TAG_MTK
++ select MEDIATEK_GE_PHY
+ help
+ This enables support for the MediaTek MT7530, MT7531, and MT7621
+ Ethernet switch chips.
+@@ -89,6 +90,7 @@ config NET_DSA_SMSC_LAN9303
+ config NET_DSA_SMSC_LAN9303_I2C
+ tristate "SMSC/Microchip LAN9303 3-ports 10/100 ethernet switch in I2C managed mode"
+ depends on I2C
++ depends on VLAN_8021Q || VLAN_8021Q=n
+ select NET_DSA_SMSC_LAN9303
+ select REGMAP_I2C
+ help
+@@ -98,6 +100,7 @@ config NET_DSA_SMSC_LAN9303_I2C
+ config NET_DSA_SMSC_LAN9303_MDIO
+ tristate "SMSC/Microchip LAN9303 3-ports 10/100 ethernet switch in MDIO managed mode"
+ select NET_DSA_SMSC_LAN9303
++ depends on VLAN_8021Q || VLAN_8021Q=n
+ help
+ Enable access functions if the SMSC/Microchip LAN9303 is configured
+ for MDIO managed mode.
+diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
+index ae4c79d39bc04..ca6f53c630676 100644
+--- a/drivers/net/dsa/b53/b53_mmap.c
++++ b/drivers/net/dsa/b53/b53_mmap.c
+@@ -216,6 +216,18 @@ static int b53_mmap_write64(struct b53_device *dev, u8 page, u8 reg,
+ return 0;
+ }
+
++static int b53_mmap_phy_read16(struct b53_device *dev, int addr, int reg,
++ u16 *value)
++{
++ return -EIO;
++}
++
++static int b53_mmap_phy_write16(struct b53_device *dev, int addr, int reg,
++ u16 value)
++{
++ return -EIO;
++}
++
+ static const struct b53_io_ops b53_mmap_ops = {
+ .read8 = b53_mmap_read8,
+ .read16 = b53_mmap_read16,
+@@ -227,6 +239,8 @@ static const struct b53_io_ops b53_mmap_ops = {
+ .write32 = b53_mmap_write32,
+ .write48 = b53_mmap_write48,
+ .write64 = b53_mmap_write64,
++ .phy_read16 = b53_mmap_phy_read16,
++ .phy_write16 = b53_mmap_phy_write16,
+ };
+
+ static int b53_mmap_probe_of(struct platform_device *pdev,
+@@ -263,7 +277,7 @@ static int b53_mmap_probe_of(struct platform_device *pdev,
+ if (of_property_read_u32(of_port, "reg", &reg))
+ continue;
+
+- if (reg < B53_CPU_PORT)
++ if (reg < B53_N_PORTS)
+ pdata->enabled_ports |= BIT(reg);
+ }
+
+diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
+index 01e37b75471e1..2b88f03e52521 100644
+--- a/drivers/net/dsa/b53/b53_spi.c
++++ b/drivers/net/dsa/b53/b53_spi.c
+@@ -349,6 +349,19 @@ static const struct of_device_id b53_spi_of_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, b53_spi_of_match);
+
++static const struct spi_device_id b53_spi_ids[] = {
++ { .name = "bcm5325" },
++ { .name = "bcm5365" },
++ { .name = "bcm5395" },
++ { .name = "bcm5397" },
++ { .name = "bcm5398" },
++ { .name = "bcm53115" },
++ { .name = "bcm53125" },
++ { .name = "bcm53128" },
++ { /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(spi, b53_spi_ids);
++
+ static struct spi_driver b53_spi_driver = {
+ .driver = {
+ .name = "b53-switch",
+@@ -357,6 +370,7 @@ static struct spi_driver b53_spi_driver = {
+ .probe = b53_spi_probe,
+ .remove = b53_spi_remove,
+ .shutdown = b53_spi_shutdown,
++ .id_table = b53_spi_ids,
+ };
+
+ module_spi_driver(b53_spi_driver);
+diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
+index 7578a5c38df59..773d751ef169f 100644
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -584,7 +584,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
+ get_device(&priv->master_mii_bus->dev);
+ priv->master_mii_dn = dn;
+
+- priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
++ priv->slave_mii_bus = mdiobus_alloc();
+ if (!priv->slave_mii_bus) {
+ of_node_put(dn);
+ return -ENOMEM;
+@@ -644,8 +644,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
+ }
+
+ err = mdiobus_register(priv->slave_mii_bus);
+- if (err && dn)
++ if (err && dn) {
++ mdiobus_free(priv->slave_mii_bus);
+ of_node_put(dn);
++ }
+
+ return err;
+ }
+@@ -653,6 +655,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
+ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
+ {
+ mdiobus_unregister(priv->slave_mii_bus);
++ mdiobus_free(priv->slave_mii_bus);
+ of_node_put(priv->master_mii_dn);
+ }
+
+@@ -793,6 +796,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
+ struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+ u32 reg, offset;
+
++ if (priv->wol_ports_mask & BIT(port))
++ return;
++
+ if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
+ if (priv->type == BCM4908_DEVICE_ID ||
+ priv->type == BCM7445_DEVICE_ID)
+@@ -859,6 +865,11 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
+ if (duplex == DUPLEX_FULL)
+ reg |= DUPLX_MODE;
+
++ if (tx_pause)
++ reg |= TXFLOW_CNTL;
++ if (rx_pause)
++ reg |= RXFLOW_CNTL;
++
+ core_writel(priv, reg, offset);
+ }
+
+@@ -1411,7 +1422,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
+ if (IS_ERR(priv->clk))
+ return PTR_ERR(priv->clk);
+
+- clk_prepare_enable(priv->clk);
++ ret = clk_prepare_enable(priv->clk);
++ if (ret)
++ return ret;
+
+ priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv");
+ if (IS_ERR(priv->clk_mdiv)) {
+@@ -1419,7 +1432,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
+ goto out_clk;
+ }
+
+- clk_prepare_enable(priv->clk_mdiv);
++ ret = clk_prepare_enable(priv->clk_mdiv);
++ if (ret)
++ goto out_clk;
+
+ ret = bcm_sf2_sw_rst(priv);
+ if (ret) {
+diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
+index a7e2fcf2df2c9..edbe5e7f1cb6b 100644
+--- a/drivers/net/dsa/bcm_sf2_cfp.c
++++ b/drivers/net/dsa/bcm_sf2_cfp.c
+@@ -567,14 +567,14 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv,
+ static struct cfp_rule *bcm_sf2_cfp_rule_find(struct bcm_sf2_priv *priv,
+ int port, u32 location)
+ {
+- struct cfp_rule *rule = NULL;
++ struct cfp_rule *rule;
+
+ list_for_each_entry(rule, &priv->cfp.rules_list, next) {
+ if (rule->port == port && rule->fs.location == location)
+- break;
++ return rule;
+ }
+
+- return rule;
++ return NULL;
+ }
+
+ static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port,
+diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
+index e638e3eea9112..e6e1054339b59 100644
+--- a/drivers/net/dsa/dsa_loop.c
++++ b/drivers/net/dsa/dsa_loop.c
+@@ -376,6 +376,17 @@ static struct mdio_driver dsa_loop_drv = {
+
+ #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2)
+
++static void dsa_loop_phydevs_unregister(void)
++{
++ unsigned int i;
++
++ for (i = 0; i < NUM_FIXED_PHYS; i++)
++ if (!IS_ERR(phydevs[i])) {
++ fixed_phy_unregister(phydevs[i]);
++ phy_device_free(phydevs[i]);
++ }
++}
++
+ static int __init dsa_loop_init(void)
+ {
+ struct fixed_phy_status status = {
+@@ -383,23 +394,23 @@ static int __init dsa_loop_init(void)
+ .speed = SPEED_100,
+ .duplex = DUPLEX_FULL,
+ };
+- unsigned int i;
++ unsigned int i, ret;
+
+ for (i = 0; i < NUM_FIXED_PHYS; i++)
+ phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL);
+
+- return mdio_driver_register(&dsa_loop_drv);
++ ret = mdio_driver_register(&dsa_loop_drv);
++ if (ret)
++ dsa_loop_phydevs_unregister();
++
++ return ret;
+ }
+ module_init(dsa_loop_init);
+
+ static void __exit dsa_loop_exit(void)
+ {
+- unsigned int i;
+-
+ mdio_driver_unregister(&dsa_loop_drv);
+- for (i = 0; i < NUM_FIXED_PHYS; i++)
+- if (!IS_ERR(phydevs[i]))
+- fixed_phy_unregister(phydevs[i]);
++ dsa_loop_phydevs_unregister();
+ }
+ module_exit(dsa_loop_exit);
+
+diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
+index 354655f9ed003..950a54ec4b59b 100644
+--- a/drivers/net/dsa/hirschmann/hellcreek.c
++++ b/drivers/net/dsa/hirschmann/hellcreek.c
+@@ -710,8 +710,9 @@ static int __hellcreek_fdb_add(struct hellcreek *hellcreek,
+ u16 meta = 0;
+
+ dev_dbg(hellcreek->dev, "Add static FDB entry: MAC=%pM, MASK=0x%02x, "
+- "OBT=%d, REPRIO_EN=%d, PRIO=%d\n", entry->mac, entry->portmask,
+- entry->is_obt, entry->reprio_en, entry->reprio_tc);
++ "OBT=%d, PASS_BLOCKED=%d, REPRIO_EN=%d, PRIO=%d\n", entry->mac,
++ entry->portmask, entry->is_obt, entry->pass_blocked,
++ entry->reprio_en, entry->reprio_tc);
+
+ /* Add mac address */
+ hellcreek_write(hellcreek, entry->mac[1] | (entry->mac[0] << 8), HR_FDBWDH);
+@@ -722,6 +723,8 @@ static int __hellcreek_fdb_add(struct hellcreek *hellcreek,
+ meta |= entry->portmask << HR_FDBWRM0_PORTMASK_SHIFT;
+ if (entry->is_obt)
+ meta |= HR_FDBWRM0_OBT;
++ if (entry->pass_blocked)
++ meta |= HR_FDBWRM0_PASS_BLOCKED;
+ if (entry->reprio_en) {
+ meta |= HR_FDBWRM0_REPRIO_EN;
+ meta |= entry->reprio_tc << HR_FDBWRM0_REPRIO_TC_SHIFT;
+@@ -1049,7 +1052,7 @@ static void hellcreek_setup_tc_identity_mapping(struct hellcreek *hellcreek)
+
+ static int hellcreek_setup_fdb(struct hellcreek *hellcreek)
+ {
+- static struct hellcreek_fdb_entry ptp = {
++ static struct hellcreek_fdb_entry l2_ptp = {
+ /* MAC: 01-1B-19-00-00-00 */
+ .mac = { 0x01, 0x1b, 0x19, 0x00, 0x00, 0x00 },
+ .portmask = 0x03, /* Management ports */
+@@ -1060,24 +1063,94 @@ static int hellcreek_setup_fdb(struct hellcreek *hellcreek)
+ .reprio_tc = 6, /* TC: 6 as per IEEE 802.1AS */
+ .reprio_en = 1,
+ };
+- static struct hellcreek_fdb_entry p2p = {
++ static struct hellcreek_fdb_entry udp4_ptp = {
++ /* MAC: 01-00-5E-00-01-81 */
++ .mac = { 0x01, 0x00, 0x5e, 0x00, 0x01, 0x81 },
++ .portmask = 0x03, /* Management ports */
++ .age = 0,
++ .is_obt = 0,
++ .pass_blocked = 0,
++ .is_static = 1,
++ .reprio_tc = 6,
++ .reprio_en = 1,
++ };
++ static struct hellcreek_fdb_entry udp6_ptp = {
++ /* MAC: 33-33-00-00-01-81 */
++ .mac = { 0x33, 0x33, 0x00, 0x00, 0x01, 0x81 },
++ .portmask = 0x03, /* Management ports */
++ .age = 0,
++ .is_obt = 0,
++ .pass_blocked = 0,
++ .is_static = 1,
++ .reprio_tc = 6,
++ .reprio_en = 1,
++ };
++ static struct hellcreek_fdb_entry l2_p2p = {
+ /* MAC: 01-80-C2-00-00-0E */
+ .mac = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e },
+ .portmask = 0x03, /* Management ports */
+ .age = 0,
+ .is_obt = 0,
+- .pass_blocked = 0,
++ .pass_blocked = 1,
+ .is_static = 1,
+ .reprio_tc = 6, /* TC: 6 as per IEEE 802.1AS */
+ .reprio_en = 1,
+ };
++ static struct hellcreek_fdb_entry udp4_p2p = {
++ /* MAC: 01-00-5E-00-00-6B */
++ .mac = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x6b },
++ .portmask = 0x03, /* Management ports */
++ .age = 0,
++ .is_obt = 0,
++ .pass_blocked = 1,
++ .is_static = 1,
++ .reprio_tc = 6,
++ .reprio_en = 1,
++ };
++ static struct hellcreek_fdb_entry udp6_p2p = {
++ /* MAC: 33-33-00-00-00-6B */
++ .mac = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x6b },
++ .portmask = 0x03, /* Management ports */
++ .age = 0,
++ .is_obt = 0,
++ .pass_blocked = 1,
++ .is_static = 1,
++ .reprio_tc = 6,
++ .reprio_en = 1,
++ };
++ static struct hellcreek_fdb_entry stp = {
++ /* MAC: 01-80-C2-00-00-00 */
++ .mac = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 },
++ .portmask = 0x03, /* Management ports */
++ .age = 0,
++ .is_obt = 0,
++ .pass_blocked = 1,
++ .is_static = 1,
++ .reprio_tc = 6,
++ .reprio_en = 1,
++ };
+ int ret;
+
+ mutex_lock(&hellcreek->reg_lock);
+- ret = __hellcreek_fdb_add(hellcreek, &ptp);
++ ret = __hellcreek_fdb_add(hellcreek, &l2_ptp);
++ if (ret)
++ goto out;
++ ret = __hellcreek_fdb_add(hellcreek, &udp4_ptp);
++ if (ret)
++ goto out;
++ ret = __hellcreek_fdb_add(hellcreek, &udp6_ptp);
++ if (ret)
++ goto out;
++ ret = __hellcreek_fdb_add(hellcreek, &l2_p2p);
++ if (ret)
++ goto out;
++ ret = __hellcreek_fdb_add(hellcreek, &udp4_p2p);
++ if (ret)
++ goto out;
++ ret = __hellcreek_fdb_add(hellcreek, &udp6_p2p);
+ if (ret)
+ goto out;
+- ret = __hellcreek_fdb_add(hellcreek, &p2p);
++ ret = __hellcreek_fdb_add(hellcreek, &stp);
+ out:
+ mutex_unlock(&hellcreek->reg_lock);
+
+diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c
+index 2572c6087bb5a..b28baab6d56a1 100644
+--- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c
++++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c
+@@ -300,6 +300,7 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek)
+ const char *label, *state;
+ int ret = -EINVAL;
+
++ of_node_get(hellcreek->dev->of_node);
+ leds = of_find_node_by_name(hellcreek->dev->of_node, "leds");
+ if (!leds) {
+ dev_err(hellcreek->dev, "No LEDs specified in device tree!\n");
+diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
+index 89f920289ae21..63826553719bf 100644
+--- a/drivers/net/dsa/lan9303-core.c
++++ b/drivers/net/dsa/lan9303-core.c
+@@ -10,6 +10,7 @@
+ #include <linux/mii.h>
+ #include <linux/phy.h>
+ #include <linux/if_bridge.h>
++#include <linux/if_vlan.h>
+ #include <linux/etherdevice.h>
+
+ #include "lan9303.h"
+@@ -958,7 +959,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = {
+ { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", },
+ { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", },
+ { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", },
+- { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", },
++ { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", },
+ { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", },
+ { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", },
+ { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", },
+@@ -1002,9 +1003,11 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
+ ret = lan9303_read_switch_port(
+ chip, port, lan9303_mib[u].offset, &reg);
+
+- if (ret)
++ if (ret) {
+ dev_warn(chip->dev, "Reading status port %d reg %u failed\n",
+ port, lan9303_mib[u].offset);
++ reg = 0;
++ }
+ data[u] = reg;
+ }
+ }
+@@ -1083,21 +1086,27 @@ static void lan9303_adjust_link(struct dsa_switch *ds, int port,
+ static int lan9303_port_enable(struct dsa_switch *ds, int port,
+ struct phy_device *phy)
+ {
++ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct lan9303 *chip = ds->priv;
+
+- if (!dsa_is_user_port(ds, port))
++ if (!dsa_port_is_user(dp))
+ return 0;
+
++ vlan_vid_add(dp->cpu_dp->master, htons(ETH_P_8021Q), port);
++
+ return lan9303_enable_processing_port(chip, port);
+ }
+
+ static void lan9303_port_disable(struct dsa_switch *ds, int port)
+ {
++ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct lan9303 *chip = ds->priv;
+
+- if (!dsa_is_user_port(ds, port))
++ if (!dsa_port_is_user(dp))
+ return;
+
++ vlan_vid_del(dp->cpu_dp->master, htons(ETH_P_8021Q), port);
++
+ lan9303_disable_processing_port(chip, port);
+ lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN);
+ }
+@@ -1185,8 +1194,6 @@ static int lan9303_port_fdb_add(struct dsa_switch *ds, int port,
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid);
+- if (vid)
+- return -EOPNOTSUPP;
+
+ return lan9303_alr_add_port(chip, addr, port, false);
+ }
+@@ -1198,8 +1205,6 @@ static int lan9303_port_fdb_del(struct dsa_switch *ds, int port,
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid);
+- if (vid)
+- return -EOPNOTSUPP;
+ lan9303_alr_del_port(chip, addr, port);
+
+ return 0;
+@@ -1309,7 +1314,7 @@ static int lan9303_probe_reset_gpio(struct lan9303 *chip,
+ struct device_node *np)
+ {
+ chip->reset_gpio = devm_gpiod_get_optional(chip->dev, "reset",
+- GPIOD_OUT_LOW);
++ GPIOD_OUT_HIGH);
+ if (IS_ERR(chip->reset_gpio))
+ return PTR_ERR(chip->reset_gpio);
+
+diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
+index dbd4486a173ff..2240a3d351225 100644
+--- a/drivers/net/dsa/lantiq_gswip.c
++++ b/drivers/net/dsa/lantiq_gswip.c
+@@ -497,8 +497,9 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg)
+ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np)
+ {
+ struct dsa_switch *ds = priv->ds;
++ int err;
+
+- ds->slave_mii_bus = devm_mdiobus_alloc(priv->dev);
++ ds->slave_mii_bus = mdiobus_alloc();
+ if (!ds->slave_mii_bus)
+ return -ENOMEM;
+
+@@ -511,7 +512,11 @@ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np)
+ ds->slave_mii_bus->parent = priv->dev;
+ ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
+
+- return of_mdiobus_register(ds->slave_mii_bus, mdio_np);
++ err = of_mdiobus_register(ds->slave_mii_bus, mdio_np);
++ if (err)
++ mdiobus_free(ds->slave_mii_bus);
++
++ return err;
+ }
+
+ static int gswip_pce_table_entry_read(struct gswip_priv *priv,
+@@ -1658,9 +1663,6 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ miicfg |= GSWIP_MII_CFG_MODE_RMIIM;
+-
+- /* Configure the RMII clock as output: */
+- miicfg |= GSWIP_MII_CFG_RMII_CLK;
+ break;
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+@@ -2045,8 +2047,10 @@ static int gswip_gphy_fw_list(struct gswip_priv *priv,
+ for_each_available_child_of_node(gphy_fw_list_np, gphy_fw_np) {
+ err = gswip_gphy_fw_probe(priv, &priv->gphy_fw[i],
+ gphy_fw_np, i);
+- if (err)
++ if (err) {
++ of_node_put(gphy_fw_np);
+ goto remove_gphy;
++ }
+ i++;
+ }
+
+@@ -2170,8 +2174,10 @@ disable_switch:
+ gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
+ dsa_unregister_switch(priv->ds);
+ mdio_bus:
+- if (mdio_np)
++ if (mdio_np) {
+ mdiobus_unregister(priv->ds->slave_mii_bus);
++ mdiobus_free(priv->ds->slave_mii_bus);
++ }
+ put_mdio_node:
+ of_node_put(mdio_np);
+ for (i = 0; i < priv->num_gphy_fw; i++)
+@@ -2195,6 +2201,7 @@ static int gswip_remove(struct platform_device *pdev)
+ if (priv->ds->slave_mii_bus) {
+ mdiobus_unregister(priv->ds->slave_mii_bus);
+ of_node_put(priv->ds->slave_mii_bus->dev.of_node);
++ mdiobus_free(priv->ds->slave_mii_bus);
+ }
+
+ for (i = 0; i < priv->num_gphy_fw; i++)
+diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
+index 866767b70d65b..b0a7dee27ffc9 100644
+--- a/drivers/net/dsa/microchip/ksz8795_spi.c
++++ b/drivers/net/dsa/microchip/ksz8795_spi.c
+@@ -124,12 +124,23 @@ static const struct of_device_id ksz8795_dt_ids[] = {
+ };
+ MODULE_DEVICE_TABLE(of, ksz8795_dt_ids);
+
++static const struct spi_device_id ksz8795_spi_ids[] = {
++ { "ksz8765" },
++ { "ksz8794" },
++ { "ksz8795" },
++ { "ksz8863" },
++ { "ksz8873" },
++ { },
++};
++MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids);
++
+ static struct spi_driver ksz8795_spi_driver = {
+ .driver = {
+ .name = "ksz8795-switch",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(ksz8795_dt_ids),
+ },
++ .id_table = ksz8795_spi_ids,
+ .probe = ksz8795_spi_probe,
+ .remove = ksz8795_spi_remove,
+ .shutdown = ksz8795_spi_shutdown,
+diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c
+index 5883fa7edda22..54f3e033abbf4 100644
+--- a/drivers/net/dsa/microchip/ksz8863_smi.c
++++ b/drivers/net/dsa/microchip/ksz8863_smi.c
+@@ -86,22 +86,16 @@ static const struct regmap_bus regmap_smi[] = {
+ {
+ .read = ksz8863_mdio_read,
+ .write = ksz8863_mdio_write,
+- .max_raw_read = 1,
+- .max_raw_write = 1,
+ },
+ {
+ .read = ksz8863_mdio_read,
+ .write = ksz8863_mdio_write,
+ .val_format_endian_default = REGMAP_ENDIAN_BIG,
+- .max_raw_read = 2,
+- .max_raw_write = 2,
+ },
+ {
+ .read = ksz8863_mdio_read,
+ .write = ksz8863_mdio_write,
+ .val_format_endian_default = REGMAP_ENDIAN_BIG,
+- .max_raw_read = 4,
+- .max_raw_write = 4,
+ }
+ };
+
+@@ -112,7 +106,6 @@ static const struct regmap_config ksz8863_regmap_config[] = {
+ .pad_bits = 24,
+ .val_bits = 8,
+ .cache_type = REGCACHE_NONE,
+- .use_single_read = 1,
+ .lock = ksz_regmap_lock,
+ .unlock = ksz_regmap_unlock,
+ },
+@@ -122,7 +115,6 @@ static const struct regmap_config ksz8863_regmap_config[] = {
+ .pad_bits = 24,
+ .val_bits = 16,
+ .cache_type = REGCACHE_NONE,
+- .use_single_read = 1,
+ .lock = ksz_regmap_lock,
+ .unlock = ksz_regmap_unlock,
+ },
+@@ -132,7 +124,6 @@ static const struct regmap_config ksz8863_regmap_config[] = {
+ .pad_bits = 24,
+ .val_bits = 32,
+ .cache_type = REGCACHE_NONE,
+- .use_single_read = 1,
+ .lock = ksz_regmap_lock,
+ .unlock = ksz_regmap_unlock,
+ }
+diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
+index 854e25f43fa70..bf788e17f408f 100644
+--- a/drivers/net/dsa/microchip/ksz9477.c
++++ b/drivers/net/dsa/microchip/ksz9477.c
+@@ -675,10 +675,10 @@ static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port,
+ ksz_read32(dev, REG_SW_ALU_VAL_D, &alu_table[3]);
+
+ /* clear forwarding port */
+- alu_table[2] &= ~BIT(port);
++ alu_table[1] &= ~BIT(port);
+
+ /* if there is no port to forward, clear table */
+- if ((alu_table[2] & ALU_V_PORT_MAP) == 0) {
++ if ((alu_table[1] & ALU_V_PORT_MAP) == 0) {
+ alu_table[0] = 0;
+ alu_table[1] = 0;
+ alu_table[2] = 0;
+@@ -759,6 +759,9 @@ static int ksz9477_port_fdb_dump(struct dsa_switch *ds, int port,
+ goto exit;
+ }
+
++ if (!(ksz_data & ALU_VALID))
++ continue;
++
+ /* read ALU table */
+ ksz9477_read_table(dev, alu_table);
+
+diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
+index e3cb0e6c9f6f2..43addeabfc259 100644
+--- a/drivers/net/dsa/microchip/ksz9477_spi.c
++++ b/drivers/net/dsa/microchip/ksz9477_spi.c
+@@ -98,12 +98,24 @@ static const struct of_device_id ksz9477_dt_ids[] = {
+ };
+ MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
+
++static const struct spi_device_id ksz9477_spi_ids[] = {
++ { "ksz9477" },
++ { "ksz9897" },
++ { "ksz9893" },
++ { "ksz9563" },
++ { "ksz8563" },
++ { "ksz9567" },
++ { },
++};
++MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids);
++
+ static struct spi_driver ksz9477_spi_driver = {
+ .driver = {
+ .name = "ksz9477-switch",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(ksz9477_dt_ids),
+ },
++ .id_table = ksz9477_spi_ids,
+ .probe = ksz9477_spi_probe,
+ .remove = ksz9477_spi_remove,
+ .shutdown = ksz9477_spi_shutdown,
+diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
+index 7c2968a639eba..4c4e6990c0ae9 100644
+--- a/drivers/net/dsa/microchip/ksz_common.c
++++ b/drivers/net/dsa/microchip/ksz_common.c
+@@ -414,18 +414,21 @@ int ksz_switch_register(struct ksz_device *dev,
+ ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports");
+ if (!ports)
+ ports = of_get_child_by_name(dev->dev->of_node, "ports");
+- if (ports)
++ if (ports) {
+ for_each_available_child_of_node(ports, port) {
+ if (of_property_read_u32(port, "reg",
+ &port_num))
+ continue;
+ if (!(dev->port_mask & BIT(port_num))) {
+ of_node_put(port);
++ of_node_put(ports);
+ return -EINVAL;
+ }
+ of_get_phy_mode(port,
+ &dev->ports[port_num].interface);
+ }
++ of_node_put(ports);
++ }
+ dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
+ "microchip,synclko-125");
+ }
+diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
+index 9890672a206d0..f74d9fbd08178 100644
+--- a/drivers/net/dsa/mt7530.c
++++ b/drivers/net/dsa/mt7530.c
+@@ -388,12 +388,38 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
+ mt7530_write(priv, MT7530_ATA1 + (i * 4), reg[i]);
+ }
+
+-/* Setup TX circuit including relevant PAD and driving */
++/* Set up switch core clock for MT7530 */
++static void mt7530_pll_setup(struct mt7530_priv *priv)
++{
++ /* Disable core clock */
++ core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
++
++ /* Disable PLL */
++ core_write(priv, CORE_GSWPLL_GRP1, 0);
++
++ /* Set core clock into 500Mhz */
++ core_write(priv, CORE_GSWPLL_GRP2,
++ RG_GSWPLL_POSDIV_500M(1) |
++ RG_GSWPLL_FBKDIV_500M(25));
++
++ /* Enable PLL */
++ core_write(priv, CORE_GSWPLL_GRP1,
++ RG_GSWPLL_EN_PRE |
++ RG_GSWPLL_POSDIV_200M(2) |
++ RG_GSWPLL_FBKDIV_200M(32));
++
++ udelay(20);
++
++ /* Enable core clock */
++ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
++}
++
++/* Setup port 6 interface mode and TRGMII TX circuit */
+ static int
+ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
+ {
+ struct mt7530_priv *priv = ds->priv;
+- u32 ncpo1, ssc_delta, trgint, i, xtal;
++ u32 ncpo1, ssc_delta, trgint, xtal;
+
+ xtal = mt7530_read(priv, MT7530_MHWTRAP) & HWTRAP_XTAL_MASK;
+
+@@ -407,15 +433,17 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
+ switch (interface) {
+ case PHY_INTERFACE_MODE_RGMII:
+ trgint = 0;
+- /* PLL frequency: 125MHz */
+- ncpo1 = 0x0c80;
+ break;
+ case PHY_INTERFACE_MODE_TRGMII:
+ trgint = 1;
++ if (xtal == HWTRAP_XTAL_25MHZ)
++ ssc_delta = 0x57;
++ else
++ ssc_delta = 0x87;
+ if (priv->id == ID_MT7621) {
+- /* PLL frequency: 150MHz: 1.2GBit */
++ /* PLL frequency: 125MHz: 1.0GBit */
+ if (xtal == HWTRAP_XTAL_40MHZ)
+- ncpo1 = 0x0780;
++ ncpo1 = 0x0640;
+ if (xtal == HWTRAP_XTAL_25MHZ)
+ ncpo1 = 0x0a00;
+ } else { /* PLL frequency: 250MHz: 2.0Gbit */
+@@ -431,61 +459,32 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
+ return -EINVAL;
+ }
+
+- if (xtal == HWTRAP_XTAL_25MHZ)
+- ssc_delta = 0x57;
+- else
+- ssc_delta = 0x87;
+-
+ mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK,
+ P6_INTF_MODE(trgint));
+
+- /* Lower Tx Driving for TRGMII path */
+- for (i = 0 ; i < NUM_TRGMII_CTRL ; i++)
+- mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
+- TD_DM_DRVP(8) | TD_DM_DRVN(8));
+-
+- /* Disable MT7530 core and TRGMII Tx clocks */
+- core_clear(priv, CORE_TRGMII_GSW_CLK_CG,
+- REG_GSWCK_EN | REG_TRGMIICK_EN);
+-
+- /* Setup core clock for MT7530 */
+- /* Disable PLL */
+- core_write(priv, CORE_GSWPLL_GRP1, 0);
+-
+- /* Set core clock into 500Mhz */
+- core_write(priv, CORE_GSWPLL_GRP2,
+- RG_GSWPLL_POSDIV_500M(1) |
+- RG_GSWPLL_FBKDIV_500M(25));
+-
+- /* Enable PLL */
+- core_write(priv, CORE_GSWPLL_GRP1,
+- RG_GSWPLL_EN_PRE |
+- RG_GSWPLL_POSDIV_200M(2) |
+- RG_GSWPLL_FBKDIV_200M(32));
++ if (trgint) {
++ /* Disable the MT7530 TRGMII clocks */
++ core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN);
++
++ /* Setup the MT7530 TRGMII Tx Clock */
++ core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
++ core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
++ core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
++ core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta));
++ core_write(priv, CORE_PLL_GROUP4,
++ RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN |
++ RG_SYSPLL_BIAS_LPF_EN);
++ core_write(priv, CORE_PLL_GROUP2,
++ RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN |
++ RG_SYSPLL_POSDIV(1));
++ core_write(priv, CORE_PLL_GROUP7,
++ RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) |
++ RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
++
++ /* Enable the MT7530 TRGMII clocks */
++ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN);
++ }
+
+- /* Setup the MT7530 TRGMII Tx Clock */
+- core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
+- core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
+- core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
+- core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta));
+- core_write(priv, CORE_PLL_GROUP4,
+- RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN |
+- RG_SYSPLL_BIAS_LPF_EN);
+- core_write(priv, CORE_PLL_GROUP2,
+- RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN |
+- RG_SYSPLL_POSDIV(1));
+- core_write(priv, CORE_PLL_GROUP7,
+- RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) |
+- RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
+-
+- /* Enable MT7530 core and TRGMII Tx clocks */
+- core_set(priv, CORE_TRGMII_GSW_CLK_CG,
+- REG_GSWCK_EN | REG_TRGMIICK_EN);
+-
+- if (!trgint)
+- for (i = 0 ; i < NUM_TRGMII_CTRL; i++)
+- mt7530_rmw(priv, MT7530_TRGMII_RD(i),
+- RD_TAP_MASK, RD_TAP(16));
+ return 0;
+ }
+
+@@ -501,14 +500,19 @@ static bool mt7531_dual_sgmii_supported(struct mt7530_priv *priv)
+ static int
+ mt7531_pad_setup(struct dsa_switch *ds, phy_interface_t interface)
+ {
+- struct mt7530_priv *priv = ds->priv;
++ return 0;
++}
++
++static void
++mt7531_pll_setup(struct mt7530_priv *priv)
++{
+ u32 top_sig;
+ u32 hwstrap;
+ u32 xtal;
+ u32 val;
+
+ if (mt7531_dual_sgmii_supported(priv))
+- return 0;
++ return;
+
+ val = mt7530_read(priv, MT7531_CREV);
+ top_sig = mt7530_read(priv, MT7531_TOP_SIG_SR);
+@@ -587,8 +591,6 @@ mt7531_pad_setup(struct dsa_switch *ds, phy_interface_t interface)
+ val |= EN_COREPLL;
+ mt7530_write(priv, MT7531_PLLGP_EN, val);
+ usleep_range(25, 35);
+-
+- return 0;
+ }
+
+ static void
+@@ -991,6 +993,14 @@ unlock_exit:
+ mutex_unlock(&priv->reg_mutex);
+ }
+
++static void
++mt753x_trap_frames(struct mt7530_priv *priv)
++{
++ /* Trap BPDUs to the CPU port(s) */
++ mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
++ MT753X_BPDU_CPU_ONLY);
++}
++
+ static int
+ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
+ {
+@@ -1008,12 +1018,12 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
+ mt7530_write(priv, MT7530_PVC_P(port),
+ PORT_SPEC_TAG);
+
+- /* Disable flooding by default */
+- mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK,
+- BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port)));
++ /* Enable flooding on the CPU port */
++ mt7530_set(priv, MT7530_MFC, BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) |
++ UNU_FFP(BIT(port)));
+
+ /* Set CPU port number */
+- if (priv->id == ID_MT7621)
++ if (priv->id == ID_MT7530 || priv->id == ID_MT7621)
+ mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port));
+
+ /* CPU port gets connected to all user ports of
+@@ -1287,14 +1297,26 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
+ if (!priv->ports[port].pvid)
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_TAGGED);
+- }
+
+- /* Set the port as a user port which is to be able to recognize VID
+- * from incoming packets before fetching entry within the VLAN table.
+- */
+- mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
+- VLAN_ATTR(MT7530_VLAN_USER) |
+- PVC_EG_TAG(MT7530_VLAN_EG_DISABLED));
++ /* Set the port as a user port which is to be able to recognize
++ * VID from incoming packets before fetching entry within the
++ * VLAN table.
++ */
++ mt7530_rmw(priv, MT7530_PVC_P(port),
++ VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
++ VLAN_ATTR(MT7530_VLAN_USER) |
++ PVC_EG_TAG(MT7530_VLAN_EG_DISABLED));
++ } else {
++ /* Also set CPU ports to the "user" VLAN port attribute, to
++ * allow VLAN classification, but keep the EG_TAG attribute as
++ * "consistent" (i.o.w. don't change its value) for packets
++ * received by the switch from the CPU, so that tagged packets
++ * are forwarded to user ports as tagged, and untagged as
++ * untagged.
++ */
++ mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK,
++ VLAN_ATTR(MT7530_VLAN_USER));
++ }
+ }
+
+ static void
+@@ -2066,7 +2088,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv)
+ if (priv->irq)
+ mt7530_setup_mdio_irq(priv);
+
+- ret = mdiobus_register(bus);
++ ret = devm_mdiobus_register(dev, bus);
+ if (ret) {
+ dev_err(dev, "failed to register MDIO bus: %d\n", ret);
+ if (priv->irq)
+@@ -2080,11 +2102,12 @@ static int
+ mt7530_setup(struct dsa_switch *ds)
+ {
+ struct mt7530_priv *priv = ds->priv;
++ struct device_node *dn = NULL;
+ struct device_node *phy_node;
+ struct device_node *mac_np;
+ struct mt7530_dummy_poll p;
+ phy_interface_t interface;
+- struct device_node *dn;
++ struct dsa_port *cpu_dp;
+ u32 id, val;
+ int ret, i;
+
+@@ -2092,7 +2115,19 @@ mt7530_setup(struct dsa_switch *ds)
+ * controller also is the container for two GMACs nodes representing
+ * as two netdev instances.
+ */
+- dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
++ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
++ dn = cpu_dp->master->dev.of_node->parent;
++ /* It doesn't matter which CPU port is found first,
++ * their masters should share the same parent OF node
++ */
++ break;
++ }
++
++ if (!dn) {
++ dev_err(ds->dev, "parent OF node of DSA master not found");
++ return -EINVAL;
++ }
++
+ ds->assisted_learning_on_cpu_port = true;
+ ds->mtu_enforcement_ingress = true;
+
+@@ -2148,7 +2183,18 @@ mt7530_setup(struct dsa_switch *ds)
+ SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
+ SYS_CTRL_REG_RST);
+
+- /* Enable Port 6 only; P5 as GMAC5 which currently is not supported */
++ mt7530_pll_setup(priv);
++
++ /* Lower Tx driving for TRGMII path */
++ for (i = 0; i < NUM_TRGMII_CTRL; i++)
++ mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
++ TD_DM_DRVP(8) | TD_DM_DRVN(8));
++
++ for (i = 0; i < NUM_TRGMII_CTRL; i++)
++ mt7530_rmw(priv, MT7530_TRGMII_RD(i),
++ RD_TAP_MASK, RD_TAP(16));
++
++ /* Enable port 6 */
+ val = mt7530_read(priv, MT7530_MHWTRAP);
+ val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS;
+ val |= MHWTRAP_MANUAL;
+@@ -2156,6 +2202,8 @@ mt7530_setup(struct dsa_switch *ds)
+
+ priv->p6_interface = PHY_INTERFACE_MODE_NA;
+
++ mt753x_trap_frames(priv);
++
+ /* Enable and reset MIB counters */
+ mt7530_mib_reset(ds);
+
+@@ -2216,6 +2264,7 @@ mt7530_setup(struct dsa_switch *ds)
+ ret = of_get_phy_mode(mac_np, &interface);
+ if (ret && ret != -ENODEV) {
+ of_node_put(mac_np);
++ of_node_put(phy_node);
+ return ret;
+ }
+ id = of_mdio_parse_addr(ds->dev, phy_node);
+@@ -2248,13 +2297,71 @@ mt7530_setup(struct dsa_switch *ds)
+ return 0;
+ }
+
++static int
++mt7531_setup_common(struct dsa_switch *ds)
++{
++ struct mt7530_priv *priv = ds->priv;
++ struct dsa_port *cpu_dp;
++ int ret, i;
++
++ /* BPDU to CPU port */
++ dsa_switch_for_each_cpu_port(cpu_dp, ds) {
++ mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK,
++ BIT(cpu_dp->index));
++ break;
++ }
++
++ mt753x_trap_frames(priv);
++
++ /* Enable and reset MIB counters */
++ mt7530_mib_reset(ds);
++
++ /* Disable flooding on all ports */
++ mt7530_clear(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK |
++ UNU_FFP_MASK);
++
++ for (i = 0; i < MT7530_NUM_PORTS; i++) {
++ /* Disable forwarding by default on all ports */
++ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
++ PCR_MATRIX_CLR);
++
++ /* Disable learning by default on all ports */
++ mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
++
++ mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
++
++ if (dsa_is_cpu_port(ds, i)) {
++ ret = mt753x_cpu_port_enable(ds, i);
++ if (ret)
++ return ret;
++ } else {
++ mt7530_port_disable(ds, i);
++
++ /* Set default PVID to 0 on all user ports */
++ mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
++ G0_PORT_VID_DEF);
++ }
++
++ /* Enable consistent egress tag */
++ mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
++ PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
++ }
++
++ /* Flush the FDB table */
++ ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
++ if (ret < 0)
++ return ret;
++
++ return 0;
++}
++
+ static int
+ mt7531_setup(struct dsa_switch *ds)
+ {
+ struct mt7530_priv *priv = ds->priv;
+ struct mt7530_dummy_poll p;
+ u32 val, id;
+- int ret, i;
++ int ret;
+
+ /* Reset whole chip through gpio pin or memory-mapped registers for
+ * different type of hardware
+@@ -2291,6 +2398,8 @@ mt7531_setup(struct dsa_switch *ds)
+ SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
+ SYS_CTRL_REG_RST);
+
++ mt7531_pll_setup(priv);
++
+ if (mt7531_dual_sgmii_supported(priv)) {
+ priv->p5_intf_sel = P5_INTF_SEL_GMAC5_SGMII;
+
+@@ -2324,41 +2433,7 @@ mt7531_setup(struct dsa_switch *ds)
+ mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
+ CORE_PLL_GROUP4, val);
+
+- /* BPDU to CPU port */
+- mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK,
+- BIT(MT7530_CPU_PORT));
+- mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
+- MT753X_BPDU_CPU_ONLY);
+-
+- /* Enable and reset MIB counters */
+- mt7530_mib_reset(ds);
+-
+- for (i = 0; i < MT7530_NUM_PORTS; i++) {
+- /* Disable forwarding by default on all ports */
+- mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
+- PCR_MATRIX_CLR);
+-
+- /* Disable learning by default on all ports */
+- mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+-
+- mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
+-
+- if (dsa_is_cpu_port(ds, i)) {
+- ret = mt753x_cpu_port_enable(ds, i);
+- if (ret)
+- return ret;
+- } else {
+- mt7530_port_disable(ds, i);
+-
+- /* Set default PVID to 0 on all user ports */
+- mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+- G0_PORT_VID_DEF);
+- }
+-
+- /* Enable consistent egress tag */
+- mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
+- PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
+- }
++ mt7531_setup_common(ds);
+
+ /* Setup VLAN ID 0 for VLAN-unaware bridges */
+ ret = mt7530_setup_vlan0(priv);
+@@ -2368,11 +2443,6 @@ mt7531_setup(struct dsa_switch *ds)
+ ds->assisted_learning_on_cpu_port = true;
+ ds->mtu_enforcement_ingress = true;
+
+- /* Flush the FDB table */
+- ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
+- if (ret < 0)
+- return ret;
+-
+ return 0;
+ }
+
+@@ -2526,13 +2596,7 @@ static void mt7531_sgmii_validate(struct mt7530_priv *priv, int port,
+ /* Port5 supports ethier RGMII or SGMII.
+ * Port6 supports SGMII only.
+ */
+- switch (port) {
+- case 5:
+- if (mt7531_is_rgmii_port(priv, port))
+- break;
+- fallthrough;
+- case 6:
+- phylink_set(supported, 1000baseX_Full);
++ if (port == 6) {
+ phylink_set(supported, 2500baseX_Full);
+ phylink_set(supported, 2500baseT_Full);
+ }
+@@ -2872,8 +2936,6 @@ mt7531_cpu_port_config(struct dsa_switch *ds, int port)
+ case 6:
+ interface = PHY_INTERFACE_MODE_2500BASEX;
+
+- mt7531_pad_setup(ds, interface);
+-
+ priv->p6_interface = interface;
+ break;
+ default:
+@@ -2900,8 +2962,6 @@ static void
+ mt7530_mac_port_validate(struct dsa_switch *ds, int port,
+ unsigned long *supported)
+ {
+- if (port == 5)
+- phylink_set(supported, 1000baseX_Full);
+ }
+
+ static void mt7531_mac_port_validate(struct dsa_switch *ds, int port,
+@@ -2928,7 +2988,7 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port,
+
+ phylink_set_port_modes(mask);
+
+- if (state->interface != PHY_INTERFACE_MODE_TRGMII ||
++ if (state->interface != PHY_INTERFACE_MODE_TRGMII &&
+ !phy_interface_mode_is_8023z(state->interface)) {
+ phylink_set(mask, 10baseT_Half);
+ phylink_set(mask, 10baseT_Full);
+@@ -2938,8 +2998,10 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port,
+ }
+
+ /* This switch only supports 1G full-duplex. */
+- if (state->interface != PHY_INTERFACE_MODE_MII)
++ if (state->interface != PHY_INTERFACE_MODE_MII) {
+ phylink_set(mask, 1000baseT_Full);
++ phylink_set(mask, 1000baseX_Full);
++ }
+
+ priv->info->mac_port_validate(ds, port, mask);
+
+diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
+index a4c6eb9a52d0d..83dca9179aa07 100644
+--- a/drivers/net/dsa/mv88e6060.c
++++ b/drivers/net/dsa/mv88e6060.c
+@@ -118,6 +118,9 @@ static int mv88e6060_setup_port(struct mv88e6060_priv *priv, int p)
+ int addr = REG_PORT(p);
+ int ret;
+
++ if (dsa_is_unused_port(priv->ds, p))
++ return 0;
++
+ /* Do not force flow control, disable Ingress and Egress
+ * Header tagging, disable VLAN tunneling, and set the port
+ * state to Forwarding. Additionally, if this is the CPU
+diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
+index 7a2445a34eb77..e3181d5471dfe 100644
+--- a/drivers/net/dsa/mv88e6xxx/Kconfig
++++ b/drivers/net/dsa/mv88e6xxx/Kconfig
+@@ -2,7 +2,6 @@
+ config NET_DSA_MV88E6XXX
+ tristate "Marvell 88E6xxx Ethernet switch fabric support"
+ depends on NET_DSA
+- depends on PTP_1588_CLOCK_OPTIONAL
+ select IRQ_DOMAIN
+ select NET_DSA_TAG_EDSA
+ select NET_DSA_TAG_DSA
+@@ -13,7 +12,8 @@ config NET_DSA_MV88E6XXX
+ config NET_DSA_MV88E6XXX_PTP
+ bool "PTP support for Marvell 88E6xxx"
+ default n
+- depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK
++ depends on (NET_DSA_MV88E6XXX = y && PTP_1588_CLOCK = y) || \
++ (NET_DSA_MV88E6XXX = m && PTP_1588_CLOCK)
+ help
+ Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
+ chips that support it.
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 8dadcae93c9b5..7e93b72f9b541 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -471,6 +471,12 @@ static int mv88e6xxx_port_ppu_updates(struct mv88e6xxx_chip *chip, int port)
+ u16 reg;
+ int err;
+
++ /* The 88e6250 family does not have the PHY detect bit. Instead,
++ * report whether the port is internal.
++ */
++ if (chip->info->family == MV88E6XXX_FAMILY_6250)
++ return port < chip->info->num_internal_phys;
++
+ err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
+ if (err) {
+ dev_err(chip->dev,
+@@ -640,7 +646,10 @@ static void mv88e6393x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
+ unsigned long *mask,
+ struct phylink_link_state *state)
+ {
+- if (port == 0 || port == 9 || port == 10) {
++ bool is_6191x =
++ chip->info->prod_num == MV88E6XXX_PORT_SWITCH_ID_PROD_6191X;
++
++ if (((port == 0 || port == 9) && !is_6191x) || port == 10) {
+ phylink_set(mask, 10000baseT_Full);
+ phylink_set(mask, 10000baseKR_Full);
+ phylink_set(mask, 10000baseCR_Full);
+@@ -690,44 +699,48 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
+ {
+ struct mv88e6xxx_chip *chip = ds->priv;
+ struct mv88e6xxx_port *p;
+- int err;
++ int err = 0;
+
+ p = &chip->ports[port];
+
+- /* FIXME: is this the correct test? If we're in fixed mode on an
+- * internal port, why should we process this any different from
+- * PHY mode? On the other hand, the port may be automedia between
+- * an internal PHY and the serdes...
+- */
+- if ((mode == MLO_AN_PHY) && mv88e6xxx_phy_is_internal(ds, port))
+- return;
+-
+ mv88e6xxx_reg_lock(chip);
+- /* In inband mode, the link may come up at any time while the link
+- * is not forced down. Force the link down while we reconfigure the
+- * interface mode.
+- */
+- if (mode == MLO_AN_INBAND && p->interface != state->interface &&
+- chip->info->ops->port_set_link)
+- chip->info->ops->port_set_link(chip, port, LINK_FORCED_DOWN);
+-
+- err = mv88e6xxx_port_config_interface(chip, port, state->interface);
+- if (err && err != -EOPNOTSUPP)
+- goto err_unlock;
+
+- err = mv88e6xxx_serdes_pcs_config(chip, port, mode, state->interface,
+- state->advertising);
+- /* FIXME: we should restart negotiation if something changed - which
+- * is something we get if we convert to using phylinks PCS operations.
+- */
+- if (err > 0)
+- err = 0;
++ if (mode != MLO_AN_PHY || !mv88e6xxx_phy_is_internal(ds, port)) {
++ /* In inband mode, the link may come up at any time while the
++ * link is not forced down. Force the link down while we
++ * reconfigure the interface mode.
++ */
++ if (mode == MLO_AN_INBAND &&
++ p->interface != state->interface &&
++ chip->info->ops->port_set_link)
++ chip->info->ops->port_set_link(chip, port,
++ LINK_FORCED_DOWN);
++
++ err = mv88e6xxx_port_config_interface(chip, port,
++ state->interface);
++ if (err && err != -EOPNOTSUPP)
++ goto err_unlock;
++
++ err = mv88e6xxx_serdes_pcs_config(chip, port, mode,
++ state->interface,
++ state->advertising);
++ /* FIXME: we should restart negotiation if something changed -
++ * which is something we get if we convert to using phylinks
++ * PCS operations.
++ */
++ if (err > 0)
++ err = 0;
++ }
+
+ /* Undo the forced down state above after completing configuration
+- * irrespective of its state on entry, which allows the link to come up.
++ * irrespective of its state on entry, which allows the link to come
++ * up in the in-band case where there is no separate SERDES. Also
++ * ensure that the link can come up if the PPU is in use and we are
++ * in PHY mode (we treat the PPU as an effective in-band mechanism.)
+ */
+- if (mode == MLO_AN_INBAND && p->interface != state->interface &&
+- chip->info->ops->port_set_link)
++ if (chip->info->ops->port_set_link &&
++ ((mode == MLO_AN_INBAND && p->interface != state->interface) ||
++ (mode == MLO_AN_PHY && mv88e6xxx_port_ppu_updates(chip, port))))
+ chip->info->ops->port_set_link(chip, port, LINK_UNFORCED);
+
+ p->interface = state->interface;
+@@ -750,13 +763,16 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port,
+ ops = chip->info->ops;
+
+ mv88e6xxx_reg_lock(chip);
+- /* Internal PHYs propagate their configuration directly to the MAC.
+- * External PHYs depend on whether the PPU is enabled for this port.
++ /* Force the link down if we know the port may not be automatically
++ * updated by the switch or if we are using fixed-link mode.
+ */
+- if (((!mv88e6xxx_phy_is_internal(ds, port) &&
+- !mv88e6xxx_port_ppu_updates(chip, port)) ||
++ if ((!mv88e6xxx_port_ppu_updates(chip, port) ||
+ mode == MLO_AN_FIXED) && ops->port_sync_link)
+ err = ops->port_sync_link(chip, port, mode, false);
++
++ if (!err && ops->port_set_speed_duplex)
++ err = ops->port_set_speed_duplex(chip, port, SPEED_UNFORCED,
++ DUPLEX_UNFORCED);
+ mv88e6xxx_reg_unlock(chip);
+
+ if (err)
+@@ -777,11 +793,11 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port,
+ ops = chip->info->ops;
+
+ mv88e6xxx_reg_lock(chip);
+- /* Internal PHYs propagate their configuration directly to the MAC.
+- * External PHYs depend on whether the PPU is enabled for this port.
++ /* Configure and force the link up if we know that the port may not
++ * automatically updated by the switch or if we are using fixed-link
++ * mode.
+ */
+- if ((!mv88e6xxx_phy_is_internal(ds, port) &&
+- !mv88e6xxx_port_ppu_updates(chip, port)) ||
++ if (!mv88e6xxx_port_ppu_updates(chip, port) ||
+ mode == MLO_AN_FIXED) {
+ /* FIXME: for an automedia port, should we force the link
+ * down here - what if the link comes up due to "other" media
+@@ -2572,6 +2588,14 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip)
+
+ /* If there is a GPIO connected to the reset pin, toggle it */
+ if (gpiod) {
++ /* If the switch has just been reset and not yet completed
++ * loading EEPROM, the reset may interrupt the I2C transaction
++ * mid-byte, causing the first EEPROM read after the reset
++ * from the wrong location resulting in the switch booting
++ * to wrong mode and inoperable.
++ */
++ mv88e6xxx_g1_wait_eeprom_done(chip);
++
+ gpiod_set_value_cansleep(gpiod, 1);
+ usleep_range(10000, 20000);
+ gpiod_set_value_cansleep(gpiod, 0);
+@@ -2896,9 +2920,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
+ * If this is the upstream port for this switch, enable
+ * forwarding of unknown unicasts and multicasts.
+ */
+- reg = MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP |
+- MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP |
++ reg = MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP |
+ MV88E6XXX_PORT_CTL0_STATE_FORWARDING;
++ /* Forward any IPv4 IGMP or IPv6 MLD frames received
++ * by a USER port to the CPU port to allow snooping.
++ */
++ if (dsa_is_user_port(ds, port))
++ reg |= MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP;
++
+ err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL0, reg);
+ if (err)
+ return err;
+@@ -3038,7 +3067,7 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port)
+ return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+ else if (chip->info->ops->set_max_frame_size)
+ return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+- return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
++ return ETH_DATA_LEN;
+ }
+
+ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+@@ -3046,6 +3075,17 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ struct mv88e6xxx_chip *chip = ds->priv;
+ int ret = 0;
+
++ /* For families where we don't know how to alter the MTU,
++ * just accept any value up to ETH_DATA_LEN
++ */
++ if (!chip->info->ops->port_set_jumbo_size &&
++ !chip->info->ops->set_max_frame_size) {
++ if (new_mtu > ETH_DATA_LEN)
++ return -EINVAL;
++
++ return 0;
++ }
++
+ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ new_mtu += EDSA_HLEN;
+
+@@ -3054,9 +3094,6 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
+ else if (chip->info->ops->set_max_frame_size)
+ ret = chip->info->ops->set_max_frame_size(chip, new_mtu);
+- else
+- if (new_mtu > 1522)
+- ret = -EINVAL;
+ mv88e6xxx_reg_unlock(chip);
+
+ return ret;
+@@ -3400,7 +3437,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
+ return err;
+ }
+
+- bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
++ bus = mdiobus_alloc_size(sizeof(*mdio_bus));
+ if (!bus)
+ return -ENOMEM;
+
+@@ -3425,14 +3462,14 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
+ if (!external) {
+ err = mv88e6xxx_g2_irq_mdio_setup(chip, bus);
+ if (err)
+- return err;
++ goto out;
+ }
+
+ err = of_mdiobus_register(bus, np);
+ if (err) {
+ dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
+ mv88e6xxx_g2_irq_mdio_free(chip, bus);
+- return err;
++ goto out;
+ }
+
+ if (external)
+@@ -3441,21 +3478,26 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
+ list_add(&mdio_bus->list, &chip->mdios);
+
+ return 0;
++
++out:
++ mdiobus_free(bus);
++ return err;
+ }
+
+ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
+
+ {
+- struct mv88e6xxx_mdio_bus *mdio_bus;
++ struct mv88e6xxx_mdio_bus *mdio_bus, *p;
+ struct mii_bus *bus;
+
+- list_for_each_entry(mdio_bus, &chip->mdios, list) {
++ list_for_each_entry_safe(mdio_bus, p, &chip->mdios, list) {
+ bus = mdio_bus->bus;
+
+ if (!mdio_bus->external)
+ mv88e6xxx_g2_irq_mdio_free(chip, bus);
+
+ mdiobus_unregister(bus);
++ mdiobus_free(bus);
+ }
+ }
+
+@@ -3471,6 +3513,7 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip,
+ */
+ child = of_get_child_by_name(np, "mdio");
+ err = mv88e6xxx_mdio_register(chip, child, false);
++ of_node_put(child);
+ if (err)
+ return err;
+
+@@ -3628,6 +3671,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
+ .port_sync_link = mv88e6185_port_sync_link,
+ .port_set_speed_duplex = mv88e6185_port_set_speed_duplex,
+ .port_tag_remap = mv88e6095_port_tag_remap,
++ .port_set_policy = mv88e6352_port_set_policy,
+ .port_set_frame_mode = mv88e6351_port_set_frame_mode,
+ .port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
+ .port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
+@@ -4539,6 +4583,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
+ .set_cpu_port = mv88e6095_g1_set_cpu_port,
+ .set_egress_port = mv88e6095_g1_set_egress_port,
+ .watchdog_ops = &mv88e6390_watchdog_ops,
++ .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+ .reset = mv88e6352_g1_reset,
+ .vtu_getnext = mv88e6185_g1_vtu_getnext,
+ .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+@@ -4931,7 +4976,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
+ * .port_set_upstream_port method.
+ */
+ .set_egress_port = mv88e6393x_set_egress_port,
+- .watchdog_ops = &mv88e6390_watchdog_ops,
++ .watchdog_ops = &mv88e6393x_watchdog_ops,
+ .mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu,
+ .pot_clear = mv88e6xxx_g2_pot_clear,
+ .reset = mv88e6352_g1_reset,
+@@ -6395,7 +6440,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
+ goto out;
+ }
+ if (chip->reset)
+- usleep_range(1000, 2000);
++ usleep_range(10000, 20000);
+
+ err = mv88e6xxx_detect(chip);
+ if (err)
+diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
+index fa65ecd9cb853..ec49939968fac 100644
+--- a/drivers/net/dsa/mv88e6xxx/global2.c
++++ b/drivers/net/dsa/mv88e6xxx/global2.c
+@@ -931,6 +931,26 @@ const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {
+ .irq_free = mv88e6390_watchdog_free,
+ };
+
++static int mv88e6393x_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
++{
++ mv88e6390_watchdog_action(chip, irq);
++
++ /* Fix for clearing the force WD event bit.
++ * Unreleased erratum on mv88e6393x.
++ */
++ mv88e6xxx_g2_write(chip, MV88E6390_G2_WDOG_CTL,
++ MV88E6390_G2_WDOG_CTL_UPDATE |
++ MV88E6390_G2_WDOG_CTL_PTR_EVENT);
++
++ return IRQ_HANDLED;
++}
++
++const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops = {
++ .irq_action = mv88e6393x_watchdog_action,
++ .irq_setup = mv88e6390_watchdog_setup,
++ .irq_free = mv88e6390_watchdog_free,
++};
++
+ static irqreturn_t mv88e6xxx_g2_watchdog_thread_fn(int irq, void *dev_id)
+ {
+ struct mv88e6xxx_chip *chip = dev_id;
+diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
+index f3e27573a3864..89ba09b663a26 100644
+--- a/drivers/net/dsa/mv88e6xxx/global2.h
++++ b/drivers/net/dsa/mv88e6xxx/global2.h
+@@ -361,6 +361,7 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
+ extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
+ extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops;
+ extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
++extern const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops;
+
+ extern const struct mv88e6xxx_avb_ops mv88e6165_avb_ops;
+ extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
+diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
+index d9817b20ea641..ab41619a809b3 100644
+--- a/drivers/net/dsa/mv88e6xxx/port.c
++++ b/drivers/net/dsa/mv88e6xxx/port.c
+@@ -283,7 +283,7 @@ static int mv88e6xxx_port_set_speed_duplex(struct mv88e6xxx_chip *chip,
+ if (err)
+ return err;
+
+- if (speed)
++ if (speed != SPEED_UNFORCED)
+ dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed);
+ else
+ dev_dbg(chip->dev, "p%d: Speed unforced\n", port);
+@@ -516,7 +516,7 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
+ if (err)
+ return err;
+
+- if (speed)
++ if (speed != SPEED_UNFORCED)
+ dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed);
+ else
+ dev_dbg(chip->dev, "p%d: Speed unforced\n", port);
+diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
+index 03382b66f8003..3e68d534eaca5 100644
+--- a/drivers/net/dsa/mv88e6xxx/port.h
++++ b/drivers/net/dsa/mv88e6xxx/port.h
+@@ -267,7 +267,7 @@
+ /* Offset 0x10: Extended Port Control Command */
+ #define MV88E6393X_PORT_EPC_CMD 0x10
+ #define MV88E6393X_PORT_EPC_CMD_BUSY 0x8000
+-#define MV88E6393X_PORT_EPC_CMD_WRITE 0x0300
++#define MV88E6393X_PORT_EPC_CMD_WRITE 0x3000
+ #define MV88E6393X_PORT_EPC_INDEX_PORT_ETYPE 0x02
+
+ /* Offset 0x11: Extended Port Control Data */
+diff --git a/drivers/net/dsa/mv88e6xxx/port_hidden.c b/drivers/net/dsa/mv88e6xxx/port_hidden.c
+index b49d05f0e1179..7a9f9ff6dedf3 100644
+--- a/drivers/net/dsa/mv88e6xxx/port_hidden.c
++++ b/drivers/net/dsa/mv88e6xxx/port_hidden.c
+@@ -40,8 +40,9 @@ int mv88e6xxx_port_hidden_wait(struct mv88e6xxx_chip *chip)
+ {
+ int bit = __bf_shf(MV88E6XXX_PORT_RESERVED_1A_BUSY);
+
+- return mv88e6xxx_wait_bit(chip, MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT,
+- MV88E6XXX_PORT_RESERVED_1A, bit, 0);
++ return mv88e6xxx_port_wait_bit(chip,
++ MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT,
++ MV88E6XXX_PORT_RESERVED_1A, bit, 0);
+ }
+
+ int mv88e6xxx_port_hidden_read(struct mv88e6xxx_chip *chip, int block, int port,
+diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
+index 6ea0036787986..6ae7a0ed9e0ba 100644
+--- a/drivers/net/dsa/mv88e6xxx/serdes.c
++++ b/drivers/net/dsa/mv88e6xxx/serdes.c
+@@ -50,11 +50,17 @@ static int mv88e6390_serdes_write(struct mv88e6xxx_chip *chip,
+ }
+
+ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
+- u16 status, u16 lpa,
++ u16 bmsr, u16 lpa, u16 status,
+ struct phylink_link_state *state)
+ {
++ state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK);
++ state->an_complete = !!(bmsr & BMSR_ANEGCOMPLETE);
++
+ if (status & MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID) {
+- state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK);
++ /* The Spped and Duplex Resolved register is 1 if AN is enabled
++ * and complete, or if AN is disabled. So with disabled AN we
++ * still get here on link up.
++ */
+ state->duplex = status &
+ MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL ?
+ DUPLEX_FULL : DUPLEX_HALF;
+@@ -81,6 +87,18 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
+ dev_err(chip->dev, "invalid PHY speed\n");
+ return -EINVAL;
+ }
++ } else if (state->link &&
++ state->interface != PHY_INTERFACE_MODE_SGMII) {
++ /* If Speed and Duplex Resolved register is 0 and link is up, it
++ * means that AN was enabled, but link partner had it disabled
++ * and the PHY invoked the Auto-Negotiation Bypass feature and
++ * linked anyway.
++ */
++ state->duplex = DUPLEX_FULL;
++ if (state->interface == PHY_INTERFACE_MODE_2500BASEX)
++ state->speed = SPEED_2500;
++ else
++ state->speed = SPEED_1000;
+ } else {
+ state->link = false;
+ }
+@@ -168,9 +186,15 @@ int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
+ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
+ int lane, struct phylink_link_state *state)
+ {
+- u16 lpa, status;
++ u16 bmsr, lpa, status;
+ int err;
+
++ err = mv88e6352_serdes_read(chip, MII_BMSR, &bmsr);
++ if (err) {
++ dev_err(chip->dev, "can't read Serdes BMSR: %d\n", err);
++ return err;
++ }
++
+ err = mv88e6352_serdes_read(chip, 0x11, &status);
+ if (err) {
+ dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err);
+@@ -183,7 +207,7 @@ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
+ return err;
+ }
+
+- return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state);
++ return mv88e6xxx_serdes_pcs_get_state(chip, bmsr, lpa, status, state);
+ }
+
+ int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
+@@ -801,7 +825,7 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
+ bool up)
+ {
+ u8 cmode = chip->ports[port].cmode;
+- int err = 0;
++ int err;
+
+ switch (cmode) {
+ case MV88E6XXX_PORT_STS_CMODE_SGMII:
+@@ -813,6 +837,9 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
+ case MV88E6XXX_PORT_STS_CMODE_RXAUI:
+ err = mv88e6390_serdes_power_10g(chip, lane, up);
+ break;
++ default:
++ err = -EINVAL;
++ break;
+ }
+
+ if (!err && up)
+@@ -883,9 +910,16 @@ int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
+ static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
+ int port, int lane, struct phylink_link_state *state)
+ {
+- u16 lpa, status;
++ u16 bmsr, lpa, status;
+ int err;
+
++ err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
++ MV88E6390_SGMII_BMSR, &bmsr);
++ if (err) {
++ dev_err(chip->dev, "can't read Serdes PHY BMSR: %d\n", err);
++ return err;
++ }
++
+ err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+ MV88E6390_SGMII_PHY_STATUS, &status);
+ if (err) {
+@@ -900,7 +934,7 @@ static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
+ return err;
+ }
+
+- return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state);
++ return mv88e6xxx_serdes_pcs_get_state(chip, bmsr, lpa, status, state);
+ }
+
+ static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
+@@ -1271,9 +1305,31 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
+ }
+ }
+
+-static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
++static int mv88e6393x_serdes_power_lane(struct mv88e6xxx_chip *chip, int lane,
++ bool on)
+ {
+- u16 reg, pcs;
++ u16 reg;
++ int err;
++
++ err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
++ MV88E6393X_SERDES_CTRL1, &reg);
++ if (err)
++ return err;
++
++ if (on)
++ reg &= ~(MV88E6393X_SERDES_CTRL1_TX_PDOWN |
++ MV88E6393X_SERDES_CTRL1_RX_PDOWN);
++ else
++ reg |= MV88E6393X_SERDES_CTRL1_TX_PDOWN |
++ MV88E6393X_SERDES_CTRL1_RX_PDOWN;
++
++ return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
++ MV88E6393X_SERDES_CTRL1, reg);
++}
++
++static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane)
++{
++ u16 reg;
+ int err;
+
+ /* mv88e6393x family errata 4.6:
+@@ -1284,26 +1340,45 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
+ * It seems that after this workaround the SERDES is automatically
+ * powered up (the bit is cleared), so power it down.
+ */
+- if (lane == MV88E6393X_PORT0_LANE || lane == MV88E6393X_PORT9_LANE ||
+- lane == MV88E6393X_PORT10_LANE) {
+- err = mv88e6390_serdes_read(chip, lane,
+- MDIO_MMD_PHYXS,
+- MV88E6393X_SERDES_POC, &reg);
+- if (err)
+- return err;
++ err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
++ MV88E6393X_SERDES_POC, &reg);
++ if (err)
++ return err;
+
+- reg &= ~MV88E6393X_SERDES_POC_PDOWN;
+- reg |= MV88E6393X_SERDES_POC_RESET;
++ reg &= ~MV88E6393X_SERDES_POC_PDOWN;
++ reg |= MV88E6393X_SERDES_POC_RESET;
+
+- err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+- MV88E6393X_SERDES_POC, reg);
+- if (err)
+- return err;
++ err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
++ MV88E6393X_SERDES_POC, reg);
++ if (err)
++ return err;
+
+- err = mv88e6390_serdes_power_sgmii(chip, lane, false);
+- if (err)
+- return err;
+- }
++ err = mv88e6390_serdes_power_sgmii(chip, lane, false);
++ if (err)
++ return err;
++
++ return mv88e6393x_serdes_power_lane(chip, lane, false);
++}
++
++int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip)
++{
++ int err;
++
++ err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT0_LANE);
++ if (err)
++ return err;
++
++ err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT9_LANE);
++ if (err)
++ return err;
++
++ return mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT10_LANE);
++}
++
++static int mv88e6393x_serdes_erratum_4_8(struct mv88e6xxx_chip *chip, int lane)
++{
++ u16 reg, pcs;
++ int err;
+
+ /* mv88e6393x family errata 4.8:
+ * When a SERDES port is operating in 1000BASE-X or SGMII mode link may
+@@ -1334,38 +1409,152 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
+ MV88E6393X_ERRATA_4_8_REG, reg);
+ }
+
+-int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip)
++static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane,
++ u8 cmode)
++{
++ static const struct {
++ u16 dev, reg, val, mask;
++ } fixes[] = {
++ { MDIO_MMD_VEND1, 0x8093, 0xcb5a, 0xffff },
++ { MDIO_MMD_VEND1, 0x8171, 0x7088, 0xffff },
++ { MDIO_MMD_VEND1, 0x80c9, 0x311a, 0xffff },
++ { MDIO_MMD_VEND1, 0x80a2, 0x8000, 0xff7f },
++ { MDIO_MMD_VEND1, 0x80a9, 0x0000, 0xfff0 },
++ { MDIO_MMD_VEND1, 0x80a3, 0x0000, 0xf8ff },
++ { MDIO_MMD_PHYXS, MV88E6393X_SERDES_POC,
++ MV88E6393X_SERDES_POC_RESET, MV88E6393X_SERDES_POC_RESET },
++ };
++ int err, i;
++ u16 reg;
++
++ /* mv88e6393x family errata 5.2:
++ * For optimal signal integrity the following sequence should be applied
++ * to SERDES operating in 10G mode. These registers only apply to 10G
++ * operation and have no effect on other speeds.
++ */
++ if (cmode != MV88E6393X_PORT_STS_CMODE_10GBASER)
++ return 0;
++
++ for (i = 0; i < ARRAY_SIZE(fixes); ++i) {
++ err = mv88e6390_serdes_read(chip, lane, fixes[i].dev,
++ fixes[i].reg, &reg);
++ if (err)
++ return err;
++
++ reg &= ~fixes[i].mask;
++ reg |= fixes[i].val;
++
++ err = mv88e6390_serdes_write(chip, lane, fixes[i].dev,
++ fixes[i].reg, reg);
++ if (err)
++ return err;
++ }
++
++ return 0;
++}
++
++static int mv88e6393x_serdes_fix_2500basex_an(struct mv88e6xxx_chip *chip,
++ int lane, u8 cmode, bool on)
+ {
++ u16 reg;
+ int err;
+
+- err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT0_LANE);
++ if (cmode != MV88E6XXX_PORT_STS_CMODE_2500BASEX)
++ return 0;
++
++ /* Inband AN is broken on Amethyst in 2500base-x mode when set by
++ * standard mechanism (via cmode).
++ * We can get around this by configuring the PCS mode to 1000base-x
++ * and then writing value 0x58 to register 1e.8000. (This must be done
++ * while SerDes receiver and transmitter are disabled, which is, when
++ * this function is called.)
++ * It seem that when we do this configuration to 2500base-x mode (by
++ * changing PCS mode to 1000base-x and frequency to 3.125 GHz from
++ * 1.25 GHz) and then configure to sgmii or 1000base-x, the device
++ * thinks that it already has SerDes at 1.25 GHz and does not change
++ * the 1e.8000 register, leaving SerDes at 3.125 GHz.
++ * To avoid this, change PCS mode back to 2500base-x when disabling
++ * SerDes from 2500base-x mode.
++ */
++ err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
++ MV88E6393X_SERDES_POC, &reg);
++ if (err)
++ return err;
++
++ reg &= ~(MV88E6393X_SERDES_POC_PCS_MASK | MV88E6393X_SERDES_POC_AN);
++ if (on)
++ reg |= MV88E6393X_SERDES_POC_PCS_1000BASEX |
++ MV88E6393X_SERDES_POC_AN;
++ else
++ reg |= MV88E6393X_SERDES_POC_PCS_2500BASEX;
++ reg |= MV88E6393X_SERDES_POC_RESET;
++
++ err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
++ MV88E6393X_SERDES_POC, reg);
+ if (err)
+ return err;
+
+- err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT9_LANE);
++ err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_VEND1, 0x8000, 0x58);
+ if (err)
+ return err;
+
+- return mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT10_LANE);
++ return 0;
+ }
+
+ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
+ bool on)
+ {
+ u8 cmode = chip->ports[port].cmode;
++ int err;
+
+ if (port != 0 && port != 9 && port != 10)
+ return -EOPNOTSUPP;
+
++ if (on) {
++ err = mv88e6393x_serdes_erratum_4_8(chip, lane);
++ if (err)
++ return err;
++
++ err = mv88e6393x_serdes_erratum_5_2(chip, lane, cmode);
++ if (err)
++ return err;
++
++ err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode,
++ true);
++ if (err)
++ return err;
++
++ err = mv88e6393x_serdes_power_lane(chip, lane, true);
++ if (err)
++ return err;
++ }
++
+ switch (cmode) {
+ case MV88E6XXX_PORT_STS_CMODE_SGMII:
+ case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
+ case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+- return mv88e6390_serdes_power_sgmii(chip, lane, on);
++ err = mv88e6390_serdes_power_sgmii(chip, lane, on);
++ break;
+ case MV88E6393X_PORT_STS_CMODE_5GBASER:
+ case MV88E6393X_PORT_STS_CMODE_10GBASER:
+- return mv88e6390_serdes_power_10g(chip, lane, on);
++ err = mv88e6390_serdes_power_10g(chip, lane, on);
++ break;
++ default:
++ err = -EINVAL;
++ break;
+ }
+
+- return 0;
++ if (err)
++ return err;
++
++ if (!on) {
++ err = mv88e6393x_serdes_power_lane(chip, lane, false);
++ if (err)
++ return err;
++
++ err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode,
++ false);
++ }
++
++ return err;
+ }
+diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
+index cbb3ba30caea9..8dd8ed225b459 100644
+--- a/drivers/net/dsa/mv88e6xxx/serdes.h
++++ b/drivers/net/dsa/mv88e6xxx/serdes.h
+@@ -93,6 +93,10 @@
+ #define MV88E6393X_SERDES_POC_PCS_MASK 0x0007
+ #define MV88E6393X_SERDES_POC_RESET BIT(15)
+ #define MV88E6393X_SERDES_POC_PDOWN BIT(5)
++#define MV88E6393X_SERDES_POC_AN BIT(3)
++#define MV88E6393X_SERDES_CTRL1 0xf003
++#define MV88E6393X_SERDES_CTRL1_TX_PDOWN BIT(9)
++#define MV88E6393X_SERDES_CTRL1_RX_PDOWN BIT(8)
+
+ #define MV88E6393X_ERRATA_4_8_REG 0xF074
+ #define MV88E6393X_ERRATA_4_8_BIT BIT(14)
+diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
+index 341236dcbdb47..1513dfb523de7 100644
+--- a/drivers/net/dsa/ocelot/felix.c
++++ b/drivers/net/dsa/ocelot/felix.c
+@@ -290,8 +290,11 @@ static int felix_setup_mmio_filtering(struct felix *felix)
+ }
+ }
+
+- if (cpu < 0)
++ if (cpu < 0) {
++ kfree(tagging_rule);
++ kfree(redirect_rule);
+ return -EINVAL;
++ }
+
+ tagging_rule->key_type = OCELOT_VCAP_KEY_ETYPE;
+ *(__be16 *)tagging_rule->key.etype.etype.value = htons(ETH_P_1588);
+@@ -1368,12 +1371,12 @@ out:
+ static bool felix_rxtstamp(struct dsa_switch *ds, int port,
+ struct sk_buff *skb, unsigned int type)
+ {
+- u8 *extraction = skb->data - ETH_HLEN - OCELOT_TAG_LEN;
++ u32 tstamp_lo = OCELOT_SKB_CB(skb)->tstamp_lo;
+ struct skb_shared_hwtstamps *shhwtstamps;
+ struct ocelot *ocelot = ds->priv;
+- u32 tstamp_lo, tstamp_hi;
+ struct timespec64 ts;
+- u64 tstamp, val;
++ u32 tstamp_hi;
++ u64 tstamp;
+
+ /* If the "no XTR IRQ" workaround is in use, tell DSA to defer this skb
+ * for RX timestamping. Then free it, and poll for its copy through
+@@ -1388,9 +1391,6 @@ static bool felix_rxtstamp(struct dsa_switch *ds, int port,
+ ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
+ tstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+
+- ocelot_xfh_get_rew_val(extraction, &val);
+- tstamp_lo = (u32)val;
+-
+ tstamp_hi = tstamp >> 32;
+ if ((tstamp & 0xffffffff) < tstamp_lo)
+ tstamp_hi--;
+diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
+index 11b42fd812e4a..5ba7e5c820dde 100644
+--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
+@@ -578,7 +578,8 @@ static const struct ocelot_stat_layout vsc9959_stats_layout[] = {
+ { .offset = 0x87, .name = "tx_frames_below_65_octets", },
+ { .offset = 0x88, .name = "tx_frames_65_to_127_octets", },
+ { .offset = 0x89, .name = "tx_frames_128_255_octets", },
+- { .offset = 0x8B, .name = "tx_frames_256_511_octets", },
++ { .offset = 0x8A, .name = "tx_frames_256_511_octets", },
++ { .offset = 0x8B, .name = "tx_frames_512_1023_octets", },
+ { .offset = 0x8C, .name = "tx_frames_1024_1526_octets", },
+ { .offset = 0x8D, .name = "tx_frames_over_1526_octets", },
+ { .offset = 0x8E, .name = "tx_yellow_prio_0", },
+@@ -1066,7 +1067,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
+ return PTR_ERR(hw);
+ }
+
+- bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
++ bus = mdiobus_alloc_size(sizeof(*mdio_priv));
+ if (!bus)
+ return -ENOMEM;
+
+@@ -1086,6 +1087,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
+ rc = mdiobus_register(bus);
+ if (rc < 0) {
+ dev_err(dev, "failed to register MDIO bus\n");
++ mdiobus_free(bus);
+ return rc;
+ }
+
+@@ -1135,6 +1137,7 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
+ lynx_pcs_destroy(pcs);
+ }
+ mdiobus_unregister(felix->imdio);
++ mdiobus_free(felix->imdio);
+ }
+
+ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
+@@ -1453,7 +1456,7 @@ static int felix_pci_probe(struct pci_dev *pdev,
+
+ err = dsa_register_switch(ds);
+ if (err) {
+- dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err);
++ dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n");
+ goto err_register_ds;
+ }
+
+diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
+index de1d34a1f1e47..05e4e75c01076 100644
+--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
++++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
+@@ -10,6 +10,7 @@
+ #include <linux/pcs-lynx.h>
+ #include <linux/dsa/ocelot.h>
+ #include <linux/iopoll.h>
++#include <linux/of_mdio.h>
+ #include "felix.h"
+
+ #define MSCC_MIIM_CMD_OPR_WRITE BIT(1)
+@@ -1110,7 +1111,7 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot)
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s-imdio", dev_name(dev));
+
+ /* Needed in order to initialize the bus mutex lock */
+- rc = mdiobus_register(bus);
++ rc = devm_of_mdiobus_register(dev, bus, NULL);
+ if (rc < 0) {
+ dev_err(dev, "failed to register MDIO bus\n");
+ return rc;
+@@ -1162,7 +1163,8 @@ static void vsc9953_mdio_bus_free(struct ocelot *ocelot)
+ mdio_device_free(pcs->mdio);
+ lynx_pcs_destroy(pcs);
+ }
+- mdiobus_unregister(felix->imdio);
++
++ /* mdiobus_unregister and mdiobus_free handled by devres */
+ }
+
+ static const struct felix_info seville_info_vsc9953 = {
+diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
+index a6bfb6abc51a7..5d476f452396c 100644
+--- a/drivers/net/dsa/qca/ar9331.c
++++ b/drivers/net/dsa/qca/ar9331.c
+@@ -378,7 +378,7 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv)
+ if (!mnp)
+ return -ENODEV;
+
+- ret = of_mdiobus_register(mbus, mnp);
++ ret = devm_of_mdiobus_register(dev, mbus, mnp);
+ of_node_put(mnp);
+ if (ret)
+ return ret;
+@@ -1093,7 +1093,6 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev)
+ }
+
+ irq_domain_remove(priv->irqdomain);
+- mdiobus_unregister(priv->mbus);
+ dsa_unregister_switch(&priv->ds);
+
+ reset_control_assert(priv->sw_reset);
+diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
+index a984f06f6f04f..67869c8cbeaa8 100644
+--- a/drivers/net/dsa/qca8k.c
++++ b/drivers/net/dsa/qca8k.c
+@@ -1599,7 +1599,7 @@ static int
+ qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ {
+ struct qca8k_priv *priv = ds->priv;
+- int i, mtu = 0;
++ int ret, i, mtu = 0;
+
+ priv->port_mtu[port] = new_mtu;
+
+@@ -1607,8 +1607,27 @@ qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ if (priv->port_mtu[i] > mtu)
+ mtu = priv->port_mtu[i];
+
++ /* To change the MAX_FRAME_SIZE the cpu ports must be off or
++ * the switch panics.
++ * Turn off both cpu ports before applying the new value to prevent
++ * this.
++ */
++ if (priv->port_sts[0].enabled)
++ qca8k_port_set_status(priv, 0, 0);
++
++ if (priv->port_sts[6].enabled)
++ qca8k_port_set_status(priv, 6, 0);
++
+ /* Include L2 header / FCS length */
+- return qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, mtu + ETH_HLEN + ETH_FCS_LEN);
++ ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, mtu + ETH_HLEN + ETH_FCS_LEN);
++
++ if (priv->port_sts[0].enabled)
++ qca8k_port_set_status(priv, 0, 1);
++
++ if (priv->port_sts[6].enabled)
++ qca8k_port_set_status(priv, 6, 1);
++
++ return ret;
+ }
+
+ static int
+diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c
+index 75897a3690969..ffbe5b6b2655b 100644
+--- a/drivers/net/dsa/rtl8366.c
++++ b/drivers/net/dsa/rtl8366.c
+@@ -457,7 +457,7 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port,
+ * anymore then clear the whole member
+ * config so it can be reused.
+ */
+- if (!vlanmc.member && vlanmc.untag) {
++ if (!vlanmc.member) {
+ vlanmc.vid = 0;
+ vlanmc.priority = 0;
+ vlanmc.fid = 0;
+diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c
+index a89093bc6c6ad..9e3b572ed999e 100644
+--- a/drivers/net/dsa/rtl8366rb.c
++++ b/drivers/net/dsa/rtl8366rb.c
+@@ -1350,7 +1350,7 @@ static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index)
+
+ static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan)
+ {
+- unsigned int max = RTL8366RB_NUM_VLANS;
++ unsigned int max = RTL8366RB_NUM_VLANS - 1;
+
+ if (smi->vlan4k_enabled)
+ max = RTL8366RB_NUM_VIDS - 1;
+diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
+index 5e5d24e7c02b2..548d585256fbb 100644
+--- a/drivers/net/dsa/sja1105/sja1105.h
++++ b/drivers/net/dsa/sja1105/sja1105.h
+@@ -111,6 +111,8 @@ struct sja1105_info {
+ int max_frame_mem;
+ int num_ports;
+ bool multiple_cascade_ports;
++ /* Every {port, TXQ} has its own CBS shaper */
++ bool fixed_cbs_mapping;
+ enum dsa_tag_protocol tag_proto;
+ const struct sja1105_dynamic_table_ops *dyn_ops;
+ const struct sja1105_table_ops *static_ops;
+diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c
+index 0569ff066634d..bdbbff2a79095 100644
+--- a/drivers/net/dsa/sja1105/sja1105_devlink.c
++++ b/drivers/net/dsa/sja1105/sja1105_devlink.c
+@@ -93,8 +93,10 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds)
+
+ region = dsa_devlink_region_create(ds, ops, 1, size);
+ if (IS_ERR(region)) {
+- while (i-- >= 0)
++ while (--i >= 0)
+ dsa_devlink_region_destroy(priv->regions[i]);
++
++ kfree(priv->regions);
+ return PTR_ERR(region);
+ }
+
+diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
+index 924c3f129992f..493192a8000c8 100644
+--- a/drivers/net/dsa/sja1105/sja1105_main.c
++++ b/drivers/net/dsa/sja1105/sja1105_main.c
+@@ -1025,7 +1025,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
+
+ policing[bcast].sharindx = port;
+ /* Only SJA1110 has multicast policers */
+- if (mcast <= table->ops->max_entry_count)
++ if (mcast < table->ops->max_entry_count)
+ policing[mcast].sharindx = port;
+ }
+
+@@ -1794,13 +1794,14 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
+ if (!(l2_lookup.destports & BIT(port)))
+ continue;
+
+- /* We need to hide the FDB entry for unknown multicast */
+- if (l2_lookup.macaddr == SJA1105_UNKNOWN_MULTICAST &&
+- l2_lookup.mask_macaddr == SJA1105_UNKNOWN_MULTICAST)
+- continue;
+-
+ u64_to_ether_addr(l2_lookup.macaddr, macaddr);
+
++ /* Hardware FDB is shared for fdb and mdb, "bridge fdb show"
++ * only wants to see unicast
++ */
++ if (is_multicast_ether_addr(macaddr))
++ continue;
++
+ /* We need to hide the dsa_8021q VLANs from the user. */
+ if (!priv->vlan_aware)
+ l2_lookup.vlanid = 0;
+@@ -2014,11 +2015,36 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
+ }
+
+ #define BYTES_PER_KBIT (1000LL / 8)
++/* Port 0 (the uC port) does not have CBS shapers */
++#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio))
++
++static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
++ int port, int prio)
++{
++ int i;
++
++ if (priv->info->fixed_cbs_mapping) {
++ i = SJA1110_FIXED_CBS(port, prio);
++ if (i >= 0 && i < priv->info->num_cbs_shapers)
++ return i;
++
++ return -1;
++ }
++
++ for (i = 0; i < priv->info->num_cbs_shapers; i++)
++ if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
++ return i;
++
++ return -1;
++}
+
+ static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
+ {
+ int i;
+
++ if (priv->info->fixed_cbs_mapping)
++ return -1;
++
+ for (i = 0; i < priv->info->num_cbs_shapers; i++)
+ if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
+ return i;
+@@ -2049,14 +2075,20 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
+ {
+ struct sja1105_private *priv = ds->priv;
+ struct sja1105_cbs_entry *cbs;
++ s64 port_transmit_rate_kbps;
+ int index;
+
+ if (!offload->enable)
+ return sja1105_delete_cbs_shaper(priv, port, offload->queue);
+
+- index = sja1105_find_unused_cbs_shaper(priv);
+- if (index < 0)
+- return -ENOSPC;
++ /* The user may be replacing an existing shaper */
++ index = sja1105_find_cbs_shaper(priv, port, offload->queue);
++ if (index < 0) {
++ /* That isn't the case - see if we can allocate a new one */
++ index = sja1105_find_unused_cbs_shaper(priv);
++ if (index < 0)
++ return -ENOSPC;
++ }
+
+ cbs = &priv->cbs[index];
+ cbs->port = port;
+@@ -2066,9 +2098,17 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
+ */
+ cbs->credit_hi = offload->hicredit;
+ cbs->credit_lo = abs(offload->locredit);
+- /* User space is in kbits/sec, hardware in bytes/sec */
+- cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
+- cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
++ /* User space is in kbits/sec, while the hardware in bytes/sec times
++ * link speed. Since the given offload->sendslope is good only for the
++ * current link speed anyway, and user space is likely to reprogram it
++ * when that changes, don't even bother to track the port's link speed,
++ * but deduce the port transmit rate from idleslope - sendslope.
++ */
++ port_transmit_rate_kbps = offload->idleslope - offload->sendslope;
++ cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT,
++ port_transmit_rate_kbps);
++ cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT),
++ port_transmit_rate_kbps);
+ /* Convert the negative values from 64-bit 2's complement
+ * to 32-bit 2's complement (for the case of 0x80000000 whose
+ * negative is still negative).
+@@ -3372,12 +3412,28 @@ static const struct of_device_id sja1105_dt_ids[] = {
+ };
+ MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
+
++static const struct spi_device_id sja1105_spi_ids[] = {
++ { "sja1105e" },
++ { "sja1105t" },
++ { "sja1105p" },
++ { "sja1105q" },
++ { "sja1105r" },
++ { "sja1105s" },
++ { "sja1110a" },
++ { "sja1110b" },
++ { "sja1110c" },
++ { "sja1110d" },
++ { },
++};
++MODULE_DEVICE_TABLE(spi, sja1105_spi_ids);
++
+ static struct spi_driver sja1105_driver = {
+ .driver = {
+ .name = "sja1105",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(sja1105_dt_ids),
+ },
++ .id_table = sja1105_spi_ids,
+ .probe = sja1105_probe,
+ .remove = sja1105_remove,
+ .shutdown = sja1105_shutdown,
+diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
+index 215dd17ca7906..4059fcc8c8326 100644
+--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
++++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
+@@ -256,6 +256,9 @@ static int sja1105_base_tx_mdio_read(struct mii_bus *bus, int phy, int reg)
+ u32 tmp;
+ int rc;
+
++ if (reg & MII_ADDR_C45)
++ return -EOPNOTSUPP;
++
+ rc = sja1105_xfer_u32(priv, SPI_READ, regs->mdio_100base_tx + reg,
+ &tmp, NULL);
+ if (rc < 0)
+@@ -272,6 +275,9 @@ static int sja1105_base_tx_mdio_write(struct mii_bus *bus, int phy, int reg,
+ const struct sja1105_regs *regs = priv->info->regs;
+ u32 tmp = val;
+
++ if (reg & MII_ADDR_C45)
++ return -EOPNOTSUPP;
++
+ return sja1105_xfer_u32(priv, SPI_WRITE, regs->mdio_100base_tx + reg,
+ &tmp, NULL);
+ }
+diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
+index d3c9ad6d39d46..e6b61aef4127c 100644
+--- a/drivers/net/dsa/sja1105/sja1105_spi.c
++++ b/drivers/net/dsa/sja1105/sja1105_spi.c
+@@ -781,6 +781,7 @@ const struct sja1105_info sja1110a_info = {
+ .tag_proto = DSA_TAG_PROTO_SJA1110,
+ .can_limit_mcast_flood = true,
+ .multiple_cascade_ports = true,
++ .fixed_cbs_mapping = true,
+ .ptp_ts_bits = 32,
+ .ptpegr_ts_bytes = 8,
+ .max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
+@@ -831,6 +832,7 @@ const struct sja1105_info sja1110b_info = {
+ .tag_proto = DSA_TAG_PROTO_SJA1110,
+ .can_limit_mcast_flood = true,
+ .multiple_cascade_ports = true,
++ .fixed_cbs_mapping = true,
+ .ptp_ts_bits = 32,
+ .ptpegr_ts_bytes = 8,
+ .max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
+@@ -881,6 +883,7 @@ const struct sja1105_info sja1110c_info = {
+ .tag_proto = DSA_TAG_PROTO_SJA1110,
+ .can_limit_mcast_flood = true,
+ .multiple_cascade_ports = true,
++ .fixed_cbs_mapping = true,
+ .ptp_ts_bits = 32,
+ .ptpegr_ts_bytes = 8,
+ .max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
+@@ -931,6 +934,7 @@ const struct sja1105_info sja1110d_info = {
+ .tag_proto = DSA_TAG_PROTO_SJA1110,
+ .can_limit_mcast_flood = true,
+ .multiple_cascade_ports = true,
++ .fixed_cbs_mapping = true,
+ .ptp_ts_bits = 32,
+ .ptpegr_ts_bytes = 8,
+ .max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
+index a4b1447ff0557..0c0bc78b1788e 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
+@@ -1025,17 +1025,17 @@ static int vsc73xx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ struct vsc73xx *vsc = ds->priv;
+
+ return vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port,
+- VSC73XX_MAXLEN, new_mtu);
++ VSC73XX_MAXLEN, new_mtu + ETH_HLEN + ETH_FCS_LEN);
+ }
+
+ /* According to application not "VSC7398 Jumbo Frames" setting
+- * up the MTU to 9.6 KB does not affect the performance on standard
++ * up the frame size to 9.6 KB does not affect the performance on standard
+ * frames. It is clear from the application note that
+ * "9.6 kilobytes" == 9600 bytes.
+ */
+ static int vsc73xx_get_max_mtu(struct dsa_switch *ds, int port)
+ {
+- return 9600;
++ return 9600 - ETH_HLEN - ETH_FCS_LEN;
+ }
+
+ static const struct dsa_switch_ops vsc73xx_ds_ops = {
+diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c
+index 645398901e05e..922ae22fad66b 100644
+--- a/drivers/net/dsa/vitesse-vsc73xx-spi.c
++++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c
+@@ -207,10 +207,20 @@ static const struct of_device_id vsc73xx_of_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
+
++static const struct spi_device_id vsc73xx_spi_ids[] = {
++ { "vsc7385" },
++ { "vsc7388" },
++ { "vsc7395" },
++ { "vsc7398" },
++ { },
++};
++MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids);
++
+ static struct spi_driver vsc73xx_spi_driver = {
+ .probe = vsc73xx_spi_probe,
+ .remove = vsc73xx_spi_remove,
+ .shutdown = vsc73xx_spi_shutdown,
++ .id_table = vsc73xx_spi_ids,
+ .driver = {
+ .name = "vsc73xx-spi",
+ .of_match_table = vsc73xx_of_match,
+diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c
+index 469420941054e..cf363d5a30020 100644
+--- a/drivers/net/dsa/xrs700x/xrs700x.c
++++ b/drivers/net/dsa/xrs700x/xrs700x.c
+@@ -108,6 +108,7 @@ static void xrs700x_read_port_counters(struct xrs700x *priv, int port)
+ {
+ struct xrs700x_port *p = &priv->ports[port];
+ struct rtnl_link_stats64 stats;
++ unsigned long flags;
+ int i;
+
+ memset(&stats, 0, sizeof(stats));
+@@ -137,9 +138,9 @@ static void xrs700x_read_port_counters(struct xrs700x *priv, int port)
+ */
+ stats.rx_packets += stats.multicast;
+
+- u64_stats_update_begin(&p->syncp);
++ flags = u64_stats_update_begin_irqsave(&p->syncp);
+ p->stats64 = stats;
+- u64_stats_update_end(&p->syncp);
++ u64_stats_update_end_irqrestore(&p->syncp, flags);
+
+ mutex_unlock(&p->mib_mutex);
+ }
+diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
+index 09816e84314d0..0197ef6f15826 100644
+--- a/drivers/net/ethernet/3com/3c589_cs.c
++++ b/drivers/net/ethernet/3com/3c589_cs.c
+@@ -195,6 +195,7 @@ static int tc589_probe(struct pcmcia_device *link)
+ {
+ struct el3_private *lp;
+ struct net_device *dev;
++ int ret;
+
+ dev_dbg(&link->dev, "3c589_attach()\n");
+
+@@ -218,7 +219,15 @@ static int tc589_probe(struct pcmcia_device *link)
+
+ dev->ethtool_ops = &netdev_ethtool_ops;
+
+- return tc589_config(link);
++ ret = tc589_config(link);
++ if (ret)
++ goto err_free_netdev;
++
++ return 0;
++
++err_free_netdev:
++ free_netdev(dev);
++ return ret;
+ }
+
+ static void tc589_detach(struct pcmcia_device *link)
+diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
+index 4ad8031ab6695..065fdbe66c425 100644
+--- a/drivers/net/ethernet/8390/mcf8390.c
++++ b/drivers/net/ethernet/8390/mcf8390.c
+@@ -406,12 +406,12 @@ static int mcf8390_init(struct net_device *dev)
+ static int mcf8390_probe(struct platform_device *pdev)
+ {
+ struct net_device *dev;
+- struct resource *mem, *irq;
++ struct resource *mem;
+ resource_size_t msize;
+- int ret;
++ int ret, irq;
+
+- irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+- if (irq == NULL) {
++ irq = platform_get_irq(pdev, 0);
++ if (irq < 0) {
+ dev_err(&pdev->dev, "no IRQ specified?\n");
+ return -ENXIO;
+ }
+@@ -434,7 +434,7 @@ static int mcf8390_probe(struct platform_device *pdev)
+ SET_NETDEV_DEV(dev, &pdev->dev);
+ platform_set_drvdata(pdev, dev);
+
+- dev->irq = irq->start;
++ dev->irq = irq;
+ dev->base_addr = mem->start;
+
+ ret = mcf8390_init(dev);
+diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
+index 412ae3e43ffb7..35ac6fe7529c5 100644
+--- a/drivers/net/ethernet/Kconfig
++++ b/drivers/net/ethernet/Kconfig
+@@ -34,15 +34,6 @@ source "drivers/net/ethernet/apple/Kconfig"
+ source "drivers/net/ethernet/aquantia/Kconfig"
+ source "drivers/net/ethernet/arc/Kconfig"
+ source "drivers/net/ethernet/atheros/Kconfig"
+-source "drivers/net/ethernet/broadcom/Kconfig"
+-source "drivers/net/ethernet/brocade/Kconfig"
+-source "drivers/net/ethernet/cadence/Kconfig"
+-source "drivers/net/ethernet/calxeda/Kconfig"
+-source "drivers/net/ethernet/cavium/Kconfig"
+-source "drivers/net/ethernet/chelsio/Kconfig"
+-source "drivers/net/ethernet/cirrus/Kconfig"
+-source "drivers/net/ethernet/cisco/Kconfig"
+-source "drivers/net/ethernet/cortina/Kconfig"
+
+ config CX_ECAT
+ tristate "Beckhoff CX5020 EtherCAT master support"
+@@ -56,6 +47,14 @@ config CX_ECAT
+ To compile this driver as a module, choose M here. The module
+ will be called ec_bhf.
+
++source "drivers/net/ethernet/broadcom/Kconfig"
++source "drivers/net/ethernet/cadence/Kconfig"
++source "drivers/net/ethernet/calxeda/Kconfig"
++source "drivers/net/ethernet/cavium/Kconfig"
++source "drivers/net/ethernet/chelsio/Kconfig"
++source "drivers/net/ethernet/cirrus/Kconfig"
++source "drivers/net/ethernet/cisco/Kconfig"
++source "drivers/net/ethernet/cortina/Kconfig"
+ source "drivers/net/ethernet/davicom/Kconfig"
+
+ config DNET
+@@ -82,7 +81,6 @@ source "drivers/net/ethernet/huawei/Kconfig"
+ source "drivers/net/ethernet/i825xx/Kconfig"
+ source "drivers/net/ethernet/ibm/Kconfig"
+ source "drivers/net/ethernet/intel/Kconfig"
+-source "drivers/net/ethernet/microsoft/Kconfig"
+ source "drivers/net/ethernet/xscale/Kconfig"
+
+ config JME
+@@ -125,8 +123,9 @@ source "drivers/net/ethernet/mediatek/Kconfig"
+ source "drivers/net/ethernet/mellanox/Kconfig"
+ source "drivers/net/ethernet/micrel/Kconfig"
+ source "drivers/net/ethernet/microchip/Kconfig"
+-source "drivers/net/ethernet/moxa/Kconfig"
+ source "drivers/net/ethernet/mscc/Kconfig"
++source "drivers/net/ethernet/microsoft/Kconfig"
++source "drivers/net/ethernet/moxa/Kconfig"
+ source "drivers/net/ethernet/myricom/Kconfig"
+
+ config FEALNX
+@@ -138,10 +137,10 @@ config FEALNX
+ Say Y here to support the Myson MTD-800 family of PCI-based Ethernet
+ cards. <http://www.myson.com.tw/>
+
++source "drivers/net/ethernet/ni/Kconfig"
+ source "drivers/net/ethernet/natsemi/Kconfig"
+ source "drivers/net/ethernet/neterion/Kconfig"
+ source "drivers/net/ethernet/netronome/Kconfig"
+-source "drivers/net/ethernet/ni/Kconfig"
+ source "drivers/net/ethernet/8390/Kconfig"
+ source "drivers/net/ethernet/nvidia/Kconfig"
+ source "drivers/net/ethernet/nxp/Kconfig"
+@@ -161,6 +160,7 @@ source "drivers/net/ethernet/packetengines/Kconfig"
+ source "drivers/net/ethernet/pasemi/Kconfig"
+ source "drivers/net/ethernet/pensando/Kconfig"
+ source "drivers/net/ethernet/qlogic/Kconfig"
++source "drivers/net/ethernet/brocade/Kconfig"
+ source "drivers/net/ethernet/qualcomm/Kconfig"
+ source "drivers/net/ethernet/rdc/Kconfig"
+ source "drivers/net/ethernet/realtek/Kconfig"
+@@ -168,10 +168,10 @@ source "drivers/net/ethernet/renesas/Kconfig"
+ source "drivers/net/ethernet/rocker/Kconfig"
+ source "drivers/net/ethernet/samsung/Kconfig"
+ source "drivers/net/ethernet/seeq/Kconfig"
+-source "drivers/net/ethernet/sfc/Kconfig"
+ source "drivers/net/ethernet/sgi/Kconfig"
+ source "drivers/net/ethernet/silan/Kconfig"
+ source "drivers/net/ethernet/sis/Kconfig"
++source "drivers/net/ethernet/sfc/Kconfig"
+ source "drivers/net/ethernet/smsc/Kconfig"
+ source "drivers/net/ethernet/socionext/Kconfig"
+ source "drivers/net/ethernet/stmicro/Kconfig"
+diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
+index c560ad06f0be3..a95bac4e14f6a 100644
+--- a/drivers/net/ethernet/aeroflex/greth.c
++++ b/drivers/net/ethernet/aeroflex/greth.c
+@@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth)
+ if (dma_mapping_error(greth->dev, dma_addr)) {
+ if (netif_msg_ifup(greth))
+ dev_err(greth->dev, "Could not create initial DMA mapping\n");
++ dev_kfree_skb(skb);
+ goto cleanup;
+ }
+ greth->rx_skbuff[i] = skb;
+diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
+index 920633161174d..f4edc616388c0 100644
+--- a/drivers/net/ethernet/agere/et131x.c
++++ b/drivers/net/ethernet/agere/et131x.c
+@@ -3863,7 +3863,7 @@ static int et131x_change_mtu(struct net_device *netdev, int new_mtu)
+
+ et131x_init_send(adapter);
+ et131x_hwaddr_init(adapter);
+- ether_addr_copy(netdev->dev_addr, adapter->addr);
++ eth_hw_addr_set(netdev, adapter->addr);
+
+ /* Init the device with the new settings */
+ et131x_adapter_setup(adapter);
+@@ -3966,7 +3966,7 @@ static int et131x_pci_setup(struct pci_dev *pdev,
+
+ netif_napi_add(netdev, &adapter->napi, et131x_poll, 64);
+
+- ether_addr_copy(netdev->dev_addr, adapter->addr);
++ eth_hw_addr_set(netdev, adapter->addr);
+
+ rc = -ENOMEM;
+
+diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
+index 696517eae77f0..82f4f26081021 100644
+--- a/drivers/net/ethernet/alacritech/slicoss.c
++++ b/drivers/net/ethernet/alacritech/slicoss.c
+@@ -1660,7 +1660,7 @@ static int slic_read_eeprom(struct slic_device *sdev)
+ goto free_eeprom;
+ }
+ /* set mac address */
+- ether_addr_copy(sdev->netdev->dev_addr, mac[devfn]);
++ eth_hw_addr_set(sdev->netdev, mac[devfn]);
+ free_eeprom:
+ dma_free_coherent(&sdev->pdev->dev, SLIC_EEPROM_SIZE, eeprom, paddr);
+
+diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
+index 037baea1c7388..ef218a6d477cc 100644
+--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
++++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
+@@ -852,7 +852,7 @@ static int emac_probe(struct platform_device *pdev)
+ }
+
+ /* Read MAC-address from DT */
+- ret = of_get_mac_address(np, ndev->dev_addr);
++ ret = of_get_ethdev_address(np, ndev);
+ if (ret) {
+ /* if the MAC address is invalid get a random one */
+ eth_hw_addr_random(ndev);
+diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
+index 1c00d719e5d76..9e721436f06fa 100644
+--- a/drivers/net/ethernet/altera/altera_tse_main.c
++++ b/drivers/net/ethernet/altera/altera_tse_main.c
+@@ -163,7 +163,8 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id)
+ mdio = mdiobus_alloc();
+ if (mdio == NULL) {
+ netdev_err(dev, "Error allocating MDIO bus\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto put_node;
+ }
+
+ mdio->name = ALTERA_TSE_RESOURCE_NAME;
+@@ -180,6 +181,7 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id)
+ mdio->id);
+ goto out_free_mdio;
+ }
++ of_node_put(mdio_node);
+
+ if (netif_msg_drv(priv))
+ netdev_info(dev, "MDIO bus %s: created\n", mdio->id);
+@@ -189,6 +191,8 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id)
+ out_free_mdio:
+ mdiobus_free(mdio);
+ mdio = NULL;
++put_node:
++ of_node_put(mdio_node);
+ return ret;
+ }
+
+@@ -1430,16 +1434,19 @@ static int altera_tse_probe(struct platform_device *pdev)
+ priv->rxdescmem_busaddr = dma_res->start;
+
+ } else {
++ ret = -ENODEV;
+ goto err_free_netdev;
+ }
+
+- if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask)))
++ if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) {
+ dma_set_coherent_mask(priv->device,
+ DMA_BIT_MASK(priv->dmaops->dmamask));
+- else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32)))
++ } else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) {
+ dma_set_coherent_mask(priv->device, DMA_BIT_MASK(32));
+- else
++ } else {
++ ret = -EIO;
+ goto err_free_netdev;
++ }
+
+ /* MAC address space */
+ ret = request_and_map(pdev, "control_port", &control_port,
+@@ -1524,7 +1531,7 @@ static int altera_tse_probe(struct platform_device *pdev)
+ priv->rx_dma_buf_sz = ALTERA_RXDMABUFFER_SIZE;
+
+ /* get default MAC address from device tree */
+- ret = of_get_mac_address(pdev->dev.of_node, ndev->dev_addr);
++ ret = of_get_ethdev_address(pdev->dev.of_node, ndev);
+ if (ret)
+ eth_hw_addr_random(ndev);
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
+index ab413fc1f68e3..7979b10192425 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_com.c
++++ b/drivers/net/ethernet/amazon/ena/ena_com.c
+@@ -35,6 +35,8 @@
+
+ #define ENA_REGS_ADMIN_INTR_MASK 1
+
++#define ENA_MAX_BACKOFF_DELAY_EXP 16U
++
+ #define ENA_MIN_ADMIN_POLL_US 100
+
+ #define ENA_MAX_ADMIN_POLL_US 5000
+@@ -536,6 +538,7 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue,
+
+ static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us)
+ {
++ exp = min_t(u32, exp, ENA_MAX_BACKOFF_DELAY_EXP);
+ delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us);
+ delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US);
+ usleep_range(delay_us, 2 * delay_us);
+@@ -2392,29 +2395,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+ return -EOPNOTSUPP;
+ }
+
+- switch (func) {
+- case ENA_ADMIN_TOEPLITZ:
+- if (key) {
+- if (key_len != sizeof(hash_key->key)) {
+- netdev_err(ena_dev->net_device,
+- "key len (%u) doesn't equal the supported size (%zu)\n",
+- key_len, sizeof(hash_key->key));
+- return -EINVAL;
+- }
+- memcpy(hash_key->key, key, key_len);
+- rss->hash_init_val = init_val;
+- hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
++ if ((func == ENA_ADMIN_TOEPLITZ) && key) {
++ if (key_len != sizeof(hash_key->key)) {
++ netdev_err(ena_dev->net_device,
++ "key len (%u) doesn't equal the supported size (%zu)\n",
++ key_len, sizeof(hash_key->key));
++ return -EINVAL;
+ }
+- break;
+- case ENA_ADMIN_CRC32:
+- rss->hash_init_val = init_val;
+- break;
+- default:
+- netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n",
+- func);
+- return -EINVAL;
++ memcpy(hash_key->key, key, key_len);
++ hash_key->key_parts = key_len / sizeof(hash_key->key[0]);
+ }
+
++ rss->hash_init_val = init_val;
+ old_func = rss->hash_func;
+ rss->hash_func = func;
+ rc = ena_com_set_hash_function(ena_dev);
+diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+index 13e745cf3781b..413082f10dc1c 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
++++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+@@ -880,11 +880,7 @@ static int ena_set_tunable(struct net_device *netdev,
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ len = *(u32 *)data;
+- if (len > adapter->netdev->mtu) {
+- ret = -EINVAL;
+- break;
+- }
+- adapter->rx_copybreak = len;
++ ret = ena_set_rx_copybreak(adapter, len);
+ break;
+ default:
+ ret = -EINVAL;
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 0e43000614abd..f3673be4fc087 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -378,9 +378,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n,
+
+ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+ {
++ u32 verdict = ENA_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ struct ena_ring *xdp_ring;
+- u32 verdict = XDP_PASS;
+ struct xdp_frame *xdpf;
+ u64 *xdp_stat;
+
+@@ -397,7 +397,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+ if (unlikely(!xdpf)) {
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+- verdict = XDP_ABORTED;
++ verdict = ENA_XDP_DROP;
+ break;
+ }
+
+@@ -413,29 +413,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+
+ spin_unlock(&xdp_ring->xdp_tx_lock);
+ xdp_stat = &rx_ring->rx_stats.xdp_tx;
++ verdict = ENA_XDP_TX;
+ break;
+ case XDP_REDIRECT:
+ if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
+ xdp_stat = &rx_ring->rx_stats.xdp_redirect;
++ verdict = ENA_XDP_REDIRECT;
+ break;
+ }
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+- verdict = XDP_ABORTED;
++ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
++ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_DROP:
+ xdp_stat = &rx_ring->rx_stats.xdp_drop;
++ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_PASS:
+ xdp_stat = &rx_ring->rx_stats.xdp_pass;
++ verdict = ENA_XDP_PASS;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_invalid;
++ verdict = ENA_XDP_DROP;
+ }
+
+ ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
+@@ -516,16 +522,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+ struct bpf_prog *prog,
+ int first, int count)
+ {
++ struct bpf_prog *old_bpf_prog;
+ struct ena_ring *rx_ring;
+ int i = 0;
+
+ for (i = first; i < count; i++) {
+ rx_ring = &adapter->rx_ring[i];
+- xchg(&rx_ring->xdp_bpf_prog, prog);
+- if (prog) {
++ old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
++
++ if (!old_bpf_prog && prog) {
+ ena_xdp_register_rxq_info(rx_ring);
+ rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
+- } else {
++ } else if (old_bpf_prog && !prog) {
+ ena_xdp_unregister_rxq_info(rx_ring);
+ rx_ring->rx_headroom = NET_SKB_PAD;
+ }
+@@ -676,6 +684,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
+ ring->ena_dev = adapter->ena_dev;
+ ring->per_napi_packets = 0;
+ ring->cpu = 0;
++ ring->numa_node = 0;
+ ring->no_interrupt_event_cnt = 0;
+ u64_stats_init(&ring->syncp);
+ }
+@@ -779,6 +788,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+ tx_ring->cpu = ena_irq->cpu;
++ tx_ring->numa_node = node;
+ return 0;
+
+ err_push_buf_intermediate_buf:
+@@ -911,6 +921,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter,
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+ rx_ring->cpu = ena_irq->cpu;
++ rx_ring->numa_node = node;
+
+ return 0;
+ }
+@@ -1288,26 +1299,22 @@ static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
+
+ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
+ {
+- struct ena_tx_buffer *tx_info = NULL;
++ struct ena_tx_buffer *tx_info;
+
+- if (likely(req_id < tx_ring->ring_size)) {
+- tx_info = &tx_ring->tx_buffer_info[req_id];
+- if (likely(tx_info->skb))
+- return 0;
+- }
++ tx_info = &tx_ring->tx_buffer_info[req_id];
++ if (likely(tx_info->skb))
++ return 0;
+
+ return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
+ }
+
+ static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
+ {
+- struct ena_tx_buffer *tx_info = NULL;
++ struct ena_tx_buffer *tx_info;
+
+- if (likely(req_id < xdp_ring->ring_size)) {
+- tx_info = &xdp_ring->tx_buffer_info[req_id];
+- if (likely(tx_info->xdpf))
+- return 0;
+- }
++ tx_info = &xdp_ring->tx_buffer_info[req_id];
++ if (likely(tx_info->xdpf))
++ return 0;
+
+ return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
+ }
+@@ -1332,9 +1339,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
+
+ rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
+ &req_id);
+- if (rc)
++ if (rc) {
++ if (unlikely(rc == -EINVAL))
++ handle_invalid_req_id(tx_ring, req_id, NULL,
++ false);
+ break;
++ }
+
++ /* validate that the request id points to a valid skb */
+ rc = validate_tx_req_id(tx_ring, req_id);
+ if (rc)
+ break;
+@@ -1427,6 +1439,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+ u16 *next_to_clean)
+ {
+ struct ena_rx_buffer *rx_info;
++ struct ena_adapter *adapter;
+ u16 len, req_id, buf = 0;
+ struct sk_buff *skb;
+ void *page_addr;
+@@ -1439,8 +1452,14 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+ rx_info = &rx_ring->rx_buffer_info[req_id];
+
+ if (unlikely(!rx_info->page)) {
+- netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+- "Page is NULL\n");
++ adapter = rx_ring->adapter;
++ netif_err(adapter, rx_err, rx_ring->netdev,
++ "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
++ ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
++ adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
++ /* Make sure reset reason is set before triggering the reset */
++ smp_mb__before_atomic();
++ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ return NULL;
+ }
+
+@@ -1621,12 +1640,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+ * we expect, then we simply drop it
+ */
+ if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
+- return XDP_DROP;
++ return ENA_XDP_DROP;
+
+ ret = ena_xdp_execute(rx_ring, xdp);
+
+ /* The xdp program might expand the headers */
+- if (ret == XDP_PASS) {
++ if (ret == ENA_XDP_PASS) {
+ rx_info->page_offset = xdp->data - xdp->data_hard_start;
+ rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
+ }
+@@ -1665,7 +1684,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+ xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
+
+ do {
+- xdp_verdict = XDP_PASS;
++ xdp_verdict = ENA_XDP_PASS;
+ skb = NULL;
+ ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
+ ena_rx_ctx.max_bufs = rx_ring->sgl_size;
+@@ -1693,7 +1712,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+ xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
+
+ /* allocate skb and fill it */
+- if (xdp_verdict == XDP_PASS)
++ if (xdp_verdict == ENA_XDP_PASS)
+ skb = ena_rx_skb(rx_ring,
+ rx_ring->ena_bufs,
+ ena_rx_ctx.descs,
+@@ -1711,14 +1730,15 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+ /* Packets was passed for transmission, unmap it
+ * from RX side.
+ */
+- if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) {
++ if (xdp_verdict & ENA_XDP_FORWARDED) {
+ ena_unmap_rx_buff(rx_ring,
+ &rx_ring->rx_buffer_info[req_id]);
+ rx_ring->rx_buffer_info[req_id].page = NULL;
+ }
+ }
+- if (xdp_verdict != XDP_PASS) {
++ if (xdp_verdict != ENA_XDP_PASS) {
+ xdp_flags |= xdp_verdict;
++ total_len += ena_rx_ctx.ena_bufs[0].len;
+ res_budget--;
+ continue;
+ }
+@@ -1762,7 +1782,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+ ena_refill_rx_bufs(rx_ring, refill_required);
+ }
+
+- if (xdp_flags & XDP_REDIRECT)
++ if (xdp_flags & ENA_XDP_REDIRECT)
+ xdp_do_flush_map();
+
+ return work_done;
+@@ -1819,8 +1839,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
+ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring)
+ {
++ u32 rx_interval = tx_ring->smoothed_interval;
+ struct ena_eth_io_intr_reg intr_reg;
+- u32 rx_interval = 0;
++
+ /* Rx ring can be NULL when for XDP tx queues which don't have an
+ * accompanying rx_ring pair.
+ */
+@@ -1858,20 +1879,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+ if (likely(tx_ring->cpu == cpu))
+ goto out;
+
++ tx_ring->cpu = cpu;
++ if (rx_ring)
++ rx_ring->cpu = cpu;
++
+ numa_node = cpu_to_node(cpu);
++
++ if (likely(tx_ring->numa_node == numa_node))
++ goto out;
++
+ put_cpu();
+
+ if (numa_node != NUMA_NO_NODE) {
+ ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
+- if (rx_ring)
++ tx_ring->numa_node = numa_node;
++ if (rx_ring) {
++ rx_ring->numa_node = numa_node;
+ ena_com_update_numa_node(rx_ring->ena_com_io_cq,
+ numa_node);
++ }
+ }
+
+- tx_ring->cpu = cpu;
+- if (rx_ring)
+- rx_ring->cpu = cpu;
+-
+ return;
+ out:
+ put_cpu();
+@@ -1896,9 +1924,14 @@ static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
+
+ rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
+ &req_id);
+- if (rc)
++ if (rc) {
++ if (unlikely(rc == -EINVAL))
++ handle_invalid_req_id(xdp_ring, req_id, NULL,
++ true);
+ break;
++ }
+
++ /* validate that the request id points to a valid xdp_frame */
+ rc = validate_xdp_req_id(xdp_ring, req_id);
+ if (rc)
+ break;
+@@ -1987,11 +2020,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
+ if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+ ena_adjust_adaptive_rx_intr_moderation(ena_napi);
+
++ ena_update_ring_numa_node(tx_ring, rx_ring);
+ ena_unmask_interrupt(tx_ring, rx_ring);
+ }
+
+- ena_update_ring_numa_node(tx_ring, rx_ring);
+-
+ ret = rx_work_done;
+ } else {
+ ret = budget;
+@@ -2378,7 +2410,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
+ ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
+ ctx.msix_vector = msix_vector;
+ ctx.queue_size = tx_ring->ring_size;
+- ctx.numa_node = cpu_to_node(tx_ring->cpu);
++ ctx.numa_node = tx_ring->numa_node;
+
+ rc = ena_com_create_io_queue(ena_dev, &ctx);
+ if (rc) {
+@@ -2446,7 +2478,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
+ ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ ctx.msix_vector = msix_vector;
+ ctx.queue_size = rx_ring->ring_size;
+- ctx.numa_node = cpu_to_node(rx_ring->cpu);
++ ctx.numa_node = rx_ring->numa_node;
+
+ rc = ena_com_create_io_queue(ena_dev, &ctx);
+ if (rc) {
+@@ -2807,6 +2839,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
+ return dev_was_up ? ena_up(adapter) : 0;
+ }
+
++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
++{
++ struct ena_ring *rx_ring;
++ int i;
++
++ if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
++ return -EINVAL;
++
++ adapter->rx_copybreak = rx_copybreak;
++
++ for (i = 0; i < adapter->num_io_queues; i++) {
++ rx_ring = &adapter->rx_ring[i];
++ rx_ring->rx_copybreak = rx_copybreak;
++ }
++
++ return 0;
++}
++
+ int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
+ {
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+@@ -4013,10 +4063,6 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
+ max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
+ /* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
+ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
+- if (unlikely(!max_num_io_queues)) {
+- dev_err(&pdev->dev, "The device doesn't have io queues\n");
+- return -EFAULT;
+- }
+
+ return max_num_io_queues;
+ }
+@@ -4073,7 +4119,7 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter,
+ ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
+ } else {
+ ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
+- ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
++ eth_hw_addr_set(netdev, adapter->mac_addr);
+ }
+
+ /* Set offload features */
+@@ -4574,13 +4620,19 @@ static struct pci_driver ena_pci_driver = {
+
+ static int __init ena_init(void)
+ {
++ int ret;
++
+ ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
+ if (!ena_wq) {
+ pr_err("Failed to create workqueue\n");
+ return -ENOMEM;
+ }
+
+- return pci_register_driver(&ena_pci_driver);
++ ret = pci_register_driver(&ena_pci_driver);
++ if (ret)
++ destroy_workqueue(ena_wq);
++
++ return ret;
+ }
+
+ static void __exit ena_cleanup(void)
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+index 0c39fc2fa345c..bf2a39c91c00d 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+@@ -273,9 +273,11 @@ struct ena_ring {
+ bool disable_meta_caching;
+ u16 no_interrupt_event_cnt;
+
+- /* cpu for TPH */
++ /* cpu and NUMA for TPH */
+ int cpu;
+- /* number of tx/rx_buffer_info's entries */
++ int numa_node;
++
++ /* number of tx/rx_buffer_info's entries */
+ int ring_size;
+
+ enum ena_admin_placement_policy_type tx_mem_queue_type;
+@@ -404,6 +406,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
+
+ int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
+
++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak);
++
+ int ena_get_sset_count(struct net_device *netdev, int sset);
+
+ enum ena_xdp_errors_t {
+@@ -412,6 +416,15 @@ enum ena_xdp_errors_t {
+ ENA_XDP_NO_ENOUGH_QUEUES,
+ };
+
++enum ENA_XDP_ACTIONS {
++ ENA_XDP_PASS = 0,
++ ENA_XDP_TX = BIT(0),
++ ENA_XDP_REDIRECT = BIT(1),
++ ENA_XDP_DROP = BIT(2)
++};
++
++#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
++
+ static inline bool ena_xdp_present(struct ena_adapter *adapter)
+ {
+ return !!adapter->xdp_bpf_prog;
+diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
+index 4786f0504691d..899c8a2a34b6b 100644
+--- a/drivers/net/ethernet/amd/Kconfig
++++ b/drivers/net/ethernet/amd/Kconfig
+@@ -168,7 +168,7 @@ config SUNLANCE
+
+ config AMD_XGBE
+ tristate "AMD 10GbE Ethernet driver"
+- depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM
++ depends on (OF_ADDRESS || ACPI || PCI) && HAS_IOMEM
+ depends on X86 || ARM64 || COMPILE_TEST
+ depends on PTP_1588_CLOCK_OPTIONAL
+ select BITREVERSE
+diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
+index 9d2f49fd945ed..a0971ed00453c 100644
+--- a/drivers/net/ethernet/amd/atarilance.c
++++ b/drivers/net/ethernet/amd/atarilance.c
+@@ -821,7 +821,7 @@ lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ lp->memcpy_f( PKTBUF_ADDR(head), (void *)skb->data, skb->len );
+ head->flag = TMD1_OWN_CHIP | TMD1_ENP | TMD1_STP;
+ dev->stats.tx_bytes += skb->len;
+- dev_kfree_skb( skb );
++ dev_consume_skb_irq(skb);
+ lp->cur_tx++;
+ while( lp->cur_tx >= TX_RING_SIZE && lp->dirty_tx >= TX_RING_SIZE ) {
+ lp->cur_tx -= TX_RING_SIZE;
+diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
+index 945bf1d875072..6c2d72024e218 100644
+--- a/drivers/net/ethernet/amd/lance.c
++++ b/drivers/net/ethernet/amd/lance.c
+@@ -999,7 +999,7 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb,
+ skb_copy_from_linear_data(skb, &lp->tx_bounce_buffs[entry], skb->len);
+ lp->tx_ring[entry].base =
+ ((u32)isa_virt_to_bus((lp->tx_bounce_buffs + entry)) & 0xffffff) | 0x83000000;
+- dev_kfree_skb(skb);
++ dev_consume_skb_irq(skb);
+ } else {
+ lp->tx_skbuff[entry] = skb;
+ lp->tx_ring[entry].base = ((u32)isa_virt_to_bus(skb->data) & 0xffffff) | 0x83000000;
+diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
+index 4019cab875051..8bd063e54ac38 100644
+--- a/drivers/net/ethernet/amd/nmclan_cs.c
++++ b/drivers/net/ethernet/amd/nmclan_cs.c
+@@ -650,7 +650,7 @@ static int nmclan_config(struct pcmcia_device *link)
+ } else {
+ pr_notice("mace id not found: %x %x should be 0x40 0x?9\n",
+ sig[0], sig[1]);
+- return -ENODEV;
++ goto failed;
+ }
+ }
+
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+index b2cd3bdba9f89..533b8519ec352 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+@@ -1331,6 +1331,10 @@
+ #define MDIO_VEND2_PMA_CDR_CONTROL 0x8056
+ #endif
+
++#ifndef MDIO_VEND2_PMA_MISC_CTRL0
++#define MDIO_VEND2_PMA_MISC_CTRL0 0x8090
++#endif
++
+ #ifndef MDIO_CTRL1_SPEED1G
+ #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100)
+ #endif
+@@ -1389,6 +1393,10 @@
+ #define XGBE_PMA_RX_RST_0_RESET_ON 0x10
+ #define XGBE_PMA_RX_RST_0_RESET_OFF 0x00
+
++#define XGBE_PMA_PLL_CTRL_MASK BIT(15)
++#define XGBE_PMA_PLL_CTRL_ENABLE BIT(15)
++#define XGBE_PMA_PLL_CTRL_DISABLE 0x0000
++
+ /* Bit setting and getting macros
+ * The get macro will extract the current bit field value from within
+ * the variable
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+index d5fd49dd25f33..decc1c09a031b 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+@@ -524,19 +524,28 @@ static void xgbe_disable_vxlan(struct xgbe_prv_data *pdata)
+ netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration disabled\n");
+ }
+
++static unsigned int xgbe_get_fc_queue_count(struct xgbe_prv_data *pdata)
++{
++ unsigned int max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
++
++ /* From MAC ver 30H the TFCR is per priority, instead of per queue */
++ if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) >= 0x30)
++ return max_q_count;
++ else
++ return min_t(unsigned int, pdata->tx_q_count, max_q_count);
++}
++
+ static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
+ {
+- unsigned int max_q_count, q_count;
+ unsigned int reg, reg_val;
+- unsigned int i;
++ unsigned int i, q_count;
+
+ /* Clear MTL flow control */
+ for (i = 0; i < pdata->rx_q_count; i++)
+ XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0);
+
+ /* Clear MAC flow control */
+- max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
+- q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count);
++ q_count = xgbe_get_fc_queue_count(pdata);
+ reg = MAC_Q0TFCR;
+ for (i = 0; i < q_count; i++) {
+ reg_val = XGMAC_IOREAD(pdata, reg);
+@@ -553,9 +562,8 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata)
+ {
+ struct ieee_pfc *pfc = pdata->pfc;
+ struct ieee_ets *ets = pdata->ets;
+- unsigned int max_q_count, q_count;
+ unsigned int reg, reg_val;
+- unsigned int i;
++ unsigned int i, q_count;
+
+ /* Set MTL flow control */
+ for (i = 0; i < pdata->rx_q_count; i++) {
+@@ -579,8 +587,7 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata)
+ }
+
+ /* Set MAC flow control */
+- max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES;
+- q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count);
++ q_count = xgbe_get_fc_queue_count(pdata);
+ reg = MAC_Q0TFCR;
+ for (i = 0; i < q_count; i++) {
+ reg_val = XGMAC_IOREAD(pdata, reg);
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+index 17a585adfb49c..555db1871ec9f 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata)
+ if (!channel->tx_ring)
+ break;
+
++ /* Deactivate the Tx timer */
+ del_timer_sync(&channel->tx_timer);
++ channel->tx_timer_active = 0;
+ }
+ }
+
+@@ -1062,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
+
+ devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+
++ tasklet_kill(&pdata->tasklet_dev);
++ tasklet_kill(&pdata->tasklet_ecc);
++
+ if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+ devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+
+@@ -2555,6 +2560,14 @@ read_again:
+ buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+ len += buf2_len;
+
++ if (buf2_len > rdata->rx.buf.dma_len) {
++ /* Hardware inconsistency within the descriptors
++ * that has resulted in a length underflow.
++ */
++ error = 1;
++ goto skip_data;
++ }
++
+ if (!skb) {
+ skb = xgbe_create_skb(pdata, napi, rdata,
+ buf1_len);
+@@ -2584,8 +2597,10 @@ skip_data:
+ if (!last || context_next)
+ goto read_again;
+
+- if (!skb)
++ if (!skb || error) {
++ dev_kfree_skb(skb);
+ goto next_packet;
++ }
+
+ /* Be sure we don't exceed the configured MTU */
+ max_len = netdev->mtu + ETH_HLEN;
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+index 22d4fc547a0a3..a9ccc4258ee50 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
+ xgbe_i2c_disable(pdata);
+ xgbe_i2c_clear_all_interrupts(pdata);
+
+- if (pdata->dev_irq != pdata->i2c_irq)
++ if (pdata->dev_irq != pdata->i2c_irq) {
+ devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
++ tasklet_kill(&pdata->tasklet_i2c);
++ }
+ }
+
+ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+index 4e97b48695220..ca7372369b3e6 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+@@ -496,6 +496,7 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata,
+ reg |= XGBE_KR_TRAINING_ENABLE;
+ reg |= XGBE_KR_TRAINING_START;
+ XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
++ pdata->kr_start_time = jiffies;
+
+ netif_dbg(pdata, link, pdata->netdev,
+ "KR training initiated\n");
+@@ -632,6 +633,8 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata)
+
+ xgbe_switch_mode(pdata);
+
++ pdata->an_result = XGBE_AN_READY;
++
+ xgbe_an_restart(pdata);
+
+ return XGBE_AN_INCOMPAT_LINK;
+@@ -1275,9 +1278,30 @@ static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata)
+ static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata)
+ {
+ unsigned long link_timeout;
++ unsigned long kr_time;
++ int wait;
+
+ link_timeout = pdata->link_check + (XGBE_LINK_TIMEOUT * HZ);
+ if (time_after(jiffies, link_timeout)) {
++ if ((xgbe_cur_mode(pdata) == XGBE_MODE_KR) &&
++ pdata->phy.autoneg == AUTONEG_ENABLE) {
++ /* AN restart should not happen while KR training is in progress.
++ * The while loop ensures no AN restart during KR training,
++ * waits up to 500ms and AN restart is triggered only if KR
++ * training is failed.
++ */
++ wait = XGBE_KR_TRAINING_WAIT_ITER;
++ while (wait--) {
++ kr_time = pdata->kr_start_time +
++ msecs_to_jiffies(XGBE_AN_MS_TIMEOUT);
++ if (time_after(jiffies, kr_time))
++ break;
++ /* AN restart is not required, if AN result is COMPLETE */
++ if (pdata->an_result == XGBE_AN_COMPLETE)
++ return;
++ usleep_range(10000, 11000);
++ }
++ }
+ netif_dbg(pdata, link, pdata->netdev, "AN link timeout\n");
+ xgbe_phy_config_aneg(pdata);
+ }
+@@ -1288,7 +1312,7 @@ static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata)
+ return pdata->phy_if.phy_impl.an_outcome(pdata);
+ }
+
+-static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
++static bool xgbe_phy_status_result(struct xgbe_prv_data *pdata)
+ {
+ struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+ enum xgbe_mode mode;
+@@ -1323,8 +1347,13 @@ static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
+
+ pdata->phy.duplex = DUPLEX_FULL;
+
+- if (xgbe_set_mode(pdata, mode) && pdata->an_again)
++ if (!xgbe_set_mode(pdata, mode))
++ return false;
++
++ if (pdata->an_again)
+ xgbe_phy_reconfig_aneg(pdata);
++
++ return true;
+ }
+
+ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
+@@ -1354,7 +1383,8 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
+ return;
+ }
+
+- xgbe_phy_status_result(pdata);
++ if (xgbe_phy_status_result(pdata))
++ return;
+
+ if (test_bit(XGBE_LINK_INIT, &pdata->dev_state))
+ clear_bit(XGBE_LINK_INIT, &pdata->dev_state);
+@@ -1390,8 +1420,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+ /* Disable auto-negotiation */
+ xgbe_an_disable_all(pdata);
+
+- if (pdata->dev_irq != pdata->an_irq)
++ if (pdata->dev_irq != pdata->an_irq) {
+ devm_free_irq(pdata->dev, pdata->an_irq, pdata);
++ tasklet_kill(&pdata->tasklet_an);
++ }
+
+ pdata->phy_if.phy_impl.stop(pdata);
+
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+index 90cb55eb54665..014513ce00a14 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+@@ -418,6 +418,9 @@ static void xgbe_pci_remove(struct pci_dev *pdev)
+
+ pci_free_irq_vectors(pdata->pcidev);
+
++ /* Disable all interrupts in the hardware */
++ XP_IOWRITE(pdata, XP_INT_EN, 0x0);
++
+ xgbe_free_pdata(pdata);
+ }
+
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+index 18e48b3bc402b..97e32c0490f8a 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+@@ -189,6 +189,7 @@ enum xgbe_sfp_cable {
+ XGBE_SFP_CABLE_UNKNOWN = 0,
+ XGBE_SFP_CABLE_ACTIVE,
+ XGBE_SFP_CABLE_PASSIVE,
++ XGBE_SFP_CABLE_FIBER,
+ };
+
+ enum xgbe_sfp_base {
+@@ -236,9 +237,7 @@ enum xgbe_sfp_speed {
+
+ #define XGBE_SFP_BASE_BR 12
+ #define XGBE_SFP_BASE_BR_1GBE_MIN 0x0a
+-#define XGBE_SFP_BASE_BR_1GBE_MAX 0x0d
+ #define XGBE_SFP_BASE_BR_10GBE_MIN 0x64
+-#define XGBE_SFP_BASE_BR_10GBE_MAX 0x68
+
+ #define XGBE_SFP_BASE_CU_CABLE_LEN 18
+
+@@ -284,6 +283,8 @@ struct xgbe_sfp_eeprom {
+ #define XGBE_BEL_FUSE_VENDOR "BEL-FUSE "
+ #define XGBE_BEL_FUSE_PARTNO "1GBT-SFP06 "
+
++#define XGBE_MOLEX_VENDOR "Molex Inc. "
++
+ struct xgbe_sfp_ascii {
+ union {
+ char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1];
+@@ -823,25 +824,22 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
+ static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom,
+ enum xgbe_sfp_speed sfp_speed)
+ {
+- u8 *sfp_base, min, max;
++ u8 *sfp_base, min;
+
+ sfp_base = sfp_eeprom->base;
+
+ switch (sfp_speed) {
+ case XGBE_SFP_SPEED_1000:
+ min = XGBE_SFP_BASE_BR_1GBE_MIN;
+- max = XGBE_SFP_BASE_BR_1GBE_MAX;
+ break;
+ case XGBE_SFP_SPEED_10000:
+ min = XGBE_SFP_BASE_BR_10GBE_MIN;
+- max = XGBE_SFP_BASE_BR_10GBE_MAX;
+ break;
+ default:
+ return false;
+ }
+
+- return ((sfp_base[XGBE_SFP_BASE_BR] >= min) &&
+- (sfp_base[XGBE_SFP_BASE_BR] <= max));
++ return sfp_base[XGBE_SFP_BASE_BR] >= min;
+ }
+
+ static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata)
+@@ -1142,16 +1140,21 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata)
+ phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data);
+ phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data);
+
+- /* Assume ACTIVE cable unless told it is PASSIVE */
++ /* Assume FIBER cable unless told otherwise */
+ if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) {
+ phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE;
+ phy_data->sfp_cable_len = sfp_base[XGBE_SFP_BASE_CU_CABLE_LEN];
+- } else {
++ } else if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_ACTIVE) {
+ phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE;
++ } else {
++ phy_data->sfp_cable = XGBE_SFP_CABLE_FIBER;
+ }
+
+ /* Determine the type of SFP */
+- if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR)
++ if (phy_data->sfp_cable != XGBE_SFP_CABLE_FIBER &&
++ xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000))
++ phy_data->sfp_base = XGBE_SFP_BASE_10000_CR;
++ else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR)
+ phy_data->sfp_base = XGBE_SFP_BASE_10000_SR;
+ else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR)
+ phy_data->sfp_base = XGBE_SFP_BASE_10000_LR;
+@@ -1167,9 +1170,6 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata)
+ phy_data->sfp_base = XGBE_SFP_BASE_1000_CX;
+ else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T)
+ phy_data->sfp_base = XGBE_SFP_BASE_1000_T;
+- else if ((phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE) &&
+- xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000))
+- phy_data->sfp_base = XGBE_SFP_BASE_10000_CR;
+
+ switch (phy_data->sfp_base) {
+ case XGBE_SFP_BASE_1000_T:
+@@ -1977,12 +1977,26 @@ static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata)
+ }
+ }
+
++static void xgbe_phy_pll_ctrl(struct xgbe_prv_data *pdata, bool enable)
++{
++ XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_MISC_CTRL0,
++ XGBE_PMA_PLL_CTRL_MASK,
++ enable ? XGBE_PMA_PLL_CTRL_ENABLE
++ : XGBE_PMA_PLL_CTRL_DISABLE);
++
++ /* Wait for command to complete */
++ usleep_range(100, 200);
++}
++
+ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
+ unsigned int cmd, unsigned int sub_cmd)
+ {
+ unsigned int s0 = 0;
+ unsigned int wait;
+
++ /* Disable PLL re-initialization during FW command processing */
++ xgbe_phy_pll_ctrl(pdata, false);
++
+ /* Log if a previous command did not complete */
+ if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) {
+ netif_dbg(pdata, link, pdata->netdev,
+@@ -2003,7 +2017,7 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
+ wait = XGBE_RATECHANGE_COUNT;
+ while (wait--) {
+ if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+- return;
++ goto reenable_pll;
+
+ usleep_range(1000, 2000);
+ }
+@@ -2013,6 +2027,10 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
+
+ /* Reset on error */
+ xgbe_phy_rx_reset(pdata);
++
++reenable_pll:
++ /* Enable PLL re-initialization */
++ xgbe_phy_pll_ctrl(pdata, true);
+ }
+
+ static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
+index 3305979a9f7c1..e0b8f3c4cc0b2 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
++++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
+@@ -289,6 +289,7 @@
+ /* Auto-negotiation */
+ #define XGBE_AN_MS_TIMEOUT 500
+ #define XGBE_LINK_TIMEOUT 5
++#define XGBE_KR_TRAINING_WAIT_ITER 50
+
+ #define XGBE_SGMII_AN_LINK_STATUS BIT(1)
+ #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3))
+@@ -1253,6 +1254,7 @@ struct xgbe_prv_data {
+ unsigned int parallel_detect;
+ unsigned int fec_ability;
+ unsigned long an_start;
++ unsigned long kr_start_time;
+ enum xgbe_an_mode an_mode;
+
+ /* I2C support */
+diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+index 5f1fc6582d74a..71151f675a498 100644
+--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
++++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+@@ -696,6 +696,12 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
+ buf_pool->rx_skb[skb_index] = NULL;
+
+ datalen = xgene_enet_get_data_len(le64_to_cpu(raw_desc->m1));
++
++ /* strip off CRC as HW isn't doing this */
++ nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0));
++ if (!nv)
++ datalen -= 4;
++
+ skb_put(skb, datalen);
+ prefetch(skb->data - NET_IP_ALIGN);
+ skb->protocol = eth_type_trans(skb, ndev);
+@@ -717,12 +723,8 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
+ }
+ }
+
+- nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0));
+- if (!nv) {
+- /* strip off CRC as HW isn't doing this */
+- datalen -= 4;
++ if (!nv)
+ goto skip_jumbo;
+- }
+
+ slots = page_pool->slots - 1;
+ head = page_pool->head;
+@@ -1002,8 +1004,10 @@ static int xgene_enet_open(struct net_device *ndev)
+
+ xgene_enet_napi_enable(pdata);
+ ret = xgene_enet_register_irq(ndev);
+- if (ret)
++ if (ret) {
++ xgene_enet_napi_disable(pdata);
+ return ret;
++ }
+
+ if (ndev->phydev) {
+ phy_start(ndev->phydev);
+diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
+index a989d2df59ad0..7a966361d83f7 100644
+--- a/drivers/net/ethernet/apple/bmac.c
++++ b/drivers/net/ethernet/apple/bmac.c
+@@ -1511,7 +1511,7 @@ static void bmac_tx_timeout(struct timer_list *t)
+ i = bp->tx_empty;
+ ++dev->stats.tx_errors;
+ if (i != bp->tx_fill) {
+- dev_kfree_skb(bp->tx_bufs[i]);
++ dev_kfree_skb_irq(bp->tx_bufs[i]);
+ bp->tx_bufs[i] = NULL;
+ if (++i >= N_TX_RING) i = 0;
+ bp->tx_empty = i;
+diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
+index 4b80e3a52a199..44037e9e197fa 100644
+--- a/drivers/net/ethernet/apple/mace.c
++++ b/drivers/net/ethernet/apple/mace.c
+@@ -841,7 +841,7 @@ static void mace_tx_timeout(struct timer_list *t)
+ if (mp->tx_bad_runt) {
+ mp->tx_bad_runt = 0;
+ } else if (i != mp->tx_fill) {
+- dev_kfree_skb(mp->tx_bufs[i]);
++ dev_kfree_skb_irq(mp->tx_bufs[i]);
+ if (++i >= N_TX_RING)
+ i = 0;
+ mp->tx_empty = i;
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
+index 23b2d390fcdda..ace691d7cd759 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
+@@ -40,10 +40,12 @@
+
+ #define AQ_DEVICE_ID_AQC113DEV 0x00C0
+ #define AQ_DEVICE_ID_AQC113CS 0x94C0
++#define AQ_DEVICE_ID_AQC113CA 0x34C0
+ #define AQ_DEVICE_ID_AQC114CS 0x93C0
+ #define AQ_DEVICE_ID_AQC113 0x04C0
+ #define AQ_DEVICE_ID_AQC113C 0x14C0
+ #define AQ_DEVICE_ID_AQC115C 0x12C0
++#define AQ_DEVICE_ID_AQC116C 0x11C0
+
+ #define HW_ATL_NIC_NAME "Marvell (aQuantia) AQtion 10Gbit Network Adapter"
+
+@@ -53,20 +55,19 @@
+
+ #define AQ_NIC_RATE_10G BIT(0)
+ #define AQ_NIC_RATE_5G BIT(1)
+-#define AQ_NIC_RATE_5GSR BIT(2)
+-#define AQ_NIC_RATE_2G5 BIT(3)
+-#define AQ_NIC_RATE_1G BIT(4)
+-#define AQ_NIC_RATE_100M BIT(5)
+-#define AQ_NIC_RATE_10M BIT(6)
+-#define AQ_NIC_RATE_1G_HALF BIT(7)
+-#define AQ_NIC_RATE_100M_HALF BIT(8)
+-#define AQ_NIC_RATE_10M_HALF BIT(9)
++#define AQ_NIC_RATE_2G5 BIT(2)
++#define AQ_NIC_RATE_1G BIT(3)
++#define AQ_NIC_RATE_100M BIT(4)
++#define AQ_NIC_RATE_10M BIT(5)
++#define AQ_NIC_RATE_1G_HALF BIT(6)
++#define AQ_NIC_RATE_100M_HALF BIT(7)
++#define AQ_NIC_RATE_10M_HALF BIT(8)
+
+-#define AQ_NIC_RATE_EEE_10G BIT(10)
+-#define AQ_NIC_RATE_EEE_5G BIT(11)
+-#define AQ_NIC_RATE_EEE_2G5 BIT(12)
+-#define AQ_NIC_RATE_EEE_1G BIT(13)
+-#define AQ_NIC_RATE_EEE_100M BIT(14)
++#define AQ_NIC_RATE_EEE_10G BIT(9)
++#define AQ_NIC_RATE_EEE_5G BIT(10)
++#define AQ_NIC_RATE_EEE_2G5 BIT(11)
++#define AQ_NIC_RATE_EEE_1G BIT(12)
++#define AQ_NIC_RATE_EEE_100M BIT(13)
+ #define AQ_NIC_RATE_EEE_MSK (AQ_NIC_RATE_EEE_10G |\
+ AQ_NIC_RATE_EEE_5G |\
+ AQ_NIC_RATE_EEE_2G5 |\
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+index a9ef0544e30f0..715859cb6560a 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+@@ -13,6 +13,7 @@
+ #include "aq_ptp.h"
+ #include "aq_filters.h"
+ #include "aq_macsec.h"
++#include "aq_main.h"
+
+ #include <linux/ptp_clock_kernel.h>
+
+@@ -845,7 +846,7 @@ static int aq_set_ringparam(struct net_device *ndev,
+
+ if (netif_running(ndev)) {
+ ndev_running = true;
+- dev_close(ndev);
++ aq_ndev_close(ndev);
+ }
+
+ cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min);
+@@ -861,7 +862,7 @@ static int aq_set_ringparam(struct net_device *ndev,
+ goto err_exit;
+
+ if (ndev_running)
+- err = dev_open(ndev, NULL);
++ err = aq_ndev_open(ndev);
+
+ err_exit:
+ return err;
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+index bed481816ea31..7442850ca95f0 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+@@ -80,6 +80,8 @@ struct aq_hw_link_status_s {
+ };
+
+ struct aq_stats_s {
++ u64 brc;
++ u64 btc;
+ u64 uprc;
+ u64 mprc;
+ u64 bprc;
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
+index 4a6dfac857ca9..ee823a18294cd 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
+@@ -585,6 +585,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx,
+
+ ret = aq_mss_set_egress_sakey_record(hw, &key_rec, sa_idx);
+
++ memzero_explicit(&key_rec, sizeof(key_rec));
+ return ret;
+ }
+
+@@ -932,6 +933,7 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx,
+
+ ret = aq_mss_set_ingress_sakey_record(hw, &sa_key_record, sa_idx);
+
++ memzero_explicit(&sa_key_record, sizeof(sa_key_record));
+ return ret;
+ }
+
+@@ -1451,26 +1453,57 @@ static void aq_check_txsa_expiration(struct aq_nic_s *nic)
+ egress_sa_threshold_expired);
+ }
+
++#define AQ_LOCKED_MDO_DEF(mdo) \
++static int aq_locked_mdo_##mdo(struct macsec_context *ctx) \
++{ \
++ struct aq_nic_s *nic = netdev_priv(ctx->netdev); \
++ int ret; \
++ mutex_lock(&nic->macsec_mutex); \
++ ret = aq_mdo_##mdo(ctx); \
++ mutex_unlock(&nic->macsec_mutex); \
++ return ret; \
++}
++
++AQ_LOCKED_MDO_DEF(dev_open)
++AQ_LOCKED_MDO_DEF(dev_stop)
++AQ_LOCKED_MDO_DEF(add_secy)
++AQ_LOCKED_MDO_DEF(upd_secy)
++AQ_LOCKED_MDO_DEF(del_secy)
++AQ_LOCKED_MDO_DEF(add_rxsc)
++AQ_LOCKED_MDO_DEF(upd_rxsc)
++AQ_LOCKED_MDO_DEF(del_rxsc)
++AQ_LOCKED_MDO_DEF(add_rxsa)
++AQ_LOCKED_MDO_DEF(upd_rxsa)
++AQ_LOCKED_MDO_DEF(del_rxsa)
++AQ_LOCKED_MDO_DEF(add_txsa)
++AQ_LOCKED_MDO_DEF(upd_txsa)
++AQ_LOCKED_MDO_DEF(del_txsa)
++AQ_LOCKED_MDO_DEF(get_dev_stats)
++AQ_LOCKED_MDO_DEF(get_tx_sc_stats)
++AQ_LOCKED_MDO_DEF(get_tx_sa_stats)
++AQ_LOCKED_MDO_DEF(get_rx_sc_stats)
++AQ_LOCKED_MDO_DEF(get_rx_sa_stats)
++
+ const struct macsec_ops aq_macsec_ops = {
+- .mdo_dev_open = aq_mdo_dev_open,
+- .mdo_dev_stop = aq_mdo_dev_stop,
+- .mdo_add_secy = aq_mdo_add_secy,
+- .mdo_upd_secy = aq_mdo_upd_secy,
+- .mdo_del_secy = aq_mdo_del_secy,
+- .mdo_add_rxsc = aq_mdo_add_rxsc,
+- .mdo_upd_rxsc = aq_mdo_upd_rxsc,
+- .mdo_del_rxsc = aq_mdo_del_rxsc,
+- .mdo_add_rxsa = aq_mdo_add_rxsa,
+- .mdo_upd_rxsa = aq_mdo_upd_rxsa,
+- .mdo_del_rxsa = aq_mdo_del_rxsa,
+- .mdo_add_txsa = aq_mdo_add_txsa,
+- .mdo_upd_txsa = aq_mdo_upd_txsa,
+- .mdo_del_txsa = aq_mdo_del_txsa,
+- .mdo_get_dev_stats = aq_mdo_get_dev_stats,
+- .mdo_get_tx_sc_stats = aq_mdo_get_tx_sc_stats,
+- .mdo_get_tx_sa_stats = aq_mdo_get_tx_sa_stats,
+- .mdo_get_rx_sc_stats = aq_mdo_get_rx_sc_stats,
+- .mdo_get_rx_sa_stats = aq_mdo_get_rx_sa_stats,
++ .mdo_dev_open = aq_locked_mdo_dev_open,
++ .mdo_dev_stop = aq_locked_mdo_dev_stop,
++ .mdo_add_secy = aq_locked_mdo_add_secy,
++ .mdo_upd_secy = aq_locked_mdo_upd_secy,
++ .mdo_del_secy = aq_locked_mdo_del_secy,
++ .mdo_add_rxsc = aq_locked_mdo_add_rxsc,
++ .mdo_upd_rxsc = aq_locked_mdo_upd_rxsc,
++ .mdo_del_rxsc = aq_locked_mdo_del_rxsc,
++ .mdo_add_rxsa = aq_locked_mdo_add_rxsa,
++ .mdo_upd_rxsa = aq_locked_mdo_upd_rxsa,
++ .mdo_del_rxsa = aq_locked_mdo_del_rxsa,
++ .mdo_add_txsa = aq_locked_mdo_add_txsa,
++ .mdo_upd_txsa = aq_locked_mdo_upd_txsa,
++ .mdo_del_txsa = aq_locked_mdo_del_txsa,
++ .mdo_get_dev_stats = aq_locked_mdo_get_dev_stats,
++ .mdo_get_tx_sc_stats = aq_locked_mdo_get_tx_sc_stats,
++ .mdo_get_tx_sa_stats = aq_locked_mdo_get_tx_sa_stats,
++ .mdo_get_rx_sc_stats = aq_locked_mdo_get_rx_sc_stats,
++ .mdo_get_rx_sa_stats = aq_locked_mdo_get_rx_sa_stats,
+ };
+
+ int aq_macsec_init(struct aq_nic_s *nic)
+@@ -1492,6 +1525,7 @@ int aq_macsec_init(struct aq_nic_s *nic)
+
+ nic->ndev->features |= NETIF_F_HW_MACSEC;
+ nic->ndev->macsec_ops = &aq_macsec_ops;
++ mutex_init(&nic->macsec_mutex);
+
+ return 0;
+ }
+@@ -1515,7 +1549,7 @@ int aq_macsec_enable(struct aq_nic_s *nic)
+ if (!nic->macsec_cfg)
+ return 0;
+
+- rtnl_lock();
++ mutex_lock(&nic->macsec_mutex);
+
+ if (nic->aq_fw_ops->send_macsec_req) {
+ struct macsec_cfg_request cfg = { 0 };
+@@ -1564,7 +1598,7 @@ int aq_macsec_enable(struct aq_nic_s *nic)
+ ret = aq_apply_macsec_cfg(nic);
+
+ unlock:
+- rtnl_unlock();
++ mutex_unlock(&nic->macsec_mutex);
+ return ret;
+ }
+
+@@ -1576,9 +1610,9 @@ void aq_macsec_work(struct aq_nic_s *nic)
+ if (!netif_carrier_ok(nic->ndev))
+ return;
+
+- rtnl_lock();
++ mutex_lock(&nic->macsec_mutex);
+ aq_check_txsa_expiration(nic);
+- rtnl_unlock();
++ mutex_unlock(&nic->macsec_mutex);
+ }
+
+ int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic)
+@@ -1589,21 +1623,30 @@ int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic)
+ if (!cfg)
+ return 0;
+
++ mutex_lock(&nic->macsec_mutex);
++
+ for (i = 0; i < AQ_MACSEC_MAX_SC; i++) {
+ if (!test_bit(i, &cfg->rxsc_idx_busy))
+ continue;
+ cnt += hweight_long(cfg->aq_rxsc[i].rx_sa_idx_busy);
+ }
+
++ mutex_unlock(&nic->macsec_mutex);
+ return cnt;
+ }
+
+ int aq_macsec_tx_sc_cnt(struct aq_nic_s *nic)
+ {
++ int cnt;
++
+ if (!nic->macsec_cfg)
+ return 0;
+
+- return hweight_long(nic->macsec_cfg->txsc_idx_busy);
++ mutex_lock(&nic->macsec_mutex);
++ cnt = hweight_long(nic->macsec_cfg->txsc_idx_busy);
++ mutex_unlock(&nic->macsec_mutex);
++
++ return cnt;
+ }
+
+ int aq_macsec_tx_sa_cnt(struct aq_nic_s *nic)
+@@ -1614,12 +1657,15 @@ int aq_macsec_tx_sa_cnt(struct aq_nic_s *nic)
+ if (!cfg)
+ return 0;
+
++ mutex_lock(&nic->macsec_mutex);
++
+ for (i = 0; i < AQ_MACSEC_MAX_SC; i++) {
+ if (!test_bit(i, &cfg->txsc_idx_busy))
+ continue;
+ cnt += hweight_long(cfg->aq_txsc[i].tx_sa_idx_busy);
+ }
+
++ mutex_unlock(&nic->macsec_mutex);
+ return cnt;
+ }
+
+@@ -1691,6 +1737,8 @@ u64 *aq_macsec_get_stats(struct aq_nic_s *nic, u64 *data)
+ if (!cfg)
+ return data;
+
++ mutex_lock(&nic->macsec_mutex);
++
+ aq_macsec_update_stats(nic);
+
+ common_stats = &cfg->stats;
+@@ -1773,5 +1821,7 @@ u64 *aq_macsec_get_stats(struct aq_nic_s *nic, u64 *data)
+
+ data += i;
+
++ mutex_unlock(&nic->macsec_mutex);
++
+ return data;
+ }
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+index e22935ce95730..45ed097bfe49a 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+@@ -53,7 +53,7 @@ struct net_device *aq_ndev_alloc(void)
+ return ndev;
+ }
+
+-static int aq_ndev_open(struct net_device *ndev)
++int aq_ndev_open(struct net_device *ndev)
+ {
+ struct aq_nic_s *aq_nic = netdev_priv(ndev);
+ int err = 0;
+@@ -83,17 +83,14 @@ err_exit:
+ return err;
+ }
+
+-static int aq_ndev_close(struct net_device *ndev)
++int aq_ndev_close(struct net_device *ndev)
+ {
+ struct aq_nic_s *aq_nic = netdev_priv(ndev);
+ int err = 0;
+
+ err = aq_nic_stop(aq_nic);
+- if (err < 0)
+- goto err_exit;
+ aq_nic_deinit(aq_nic, true);
+
+-err_exit:
+ return err;
+ }
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h
+index a5a624b9ce733..2a562ab7a5afd 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h
+@@ -14,5 +14,7 @@
+
+ void aq_ndev_schedule_work(struct work_struct *work);
+ struct net_device *aq_ndev_alloc(void);
++int aq_ndev_open(struct net_device *ndev);
++int aq_ndev_close(struct net_device *ndev);
+
+ #endif /* AQ_MAIN_H */
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+index 6c049864dac08..c52093589d7cf 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+@@ -265,12 +265,10 @@ static void aq_nic_service_timer_cb(struct timer_list *t)
+ static void aq_nic_polling_timer_cb(struct timer_list *t)
+ {
+ struct aq_nic_s *self = from_timer(self, t, polling_timer);
+- struct aq_vec_s *aq_vec = NULL;
+ unsigned int i = 0U;
+
+- for (i = 0U, aq_vec = self->aq_vec[0];
+- self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
+- aq_vec_isr(i, (void *)aq_vec);
++ for (i = 0U; self->aq_vecs > i; ++i)
++ aq_vec_isr(i, (void *)self->aq_vec[i]);
+
+ mod_timer(&self->polling_timer, jiffies +
+ AQ_CFG_POLLING_TIMER_INTERVAL);
+@@ -332,7 +330,7 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
+ {
+ static u8 mac_addr_permanent[] = AQ_CFG_MAC_ADDR_PERMANENT;
+
+- ether_addr_copy(self->ndev->dev_addr, mac_addr_permanent);
++ eth_hw_addr_set(self->ndev, mac_addr_permanent);
+ }
+ #endif
+
+@@ -480,8 +478,8 @@ int aq_nic_start(struct aq_nic_s *self)
+ if (err < 0)
+ goto err_exit;
+
+- for (i = 0U, aq_vec = self->aq_vec[0];
+- self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
++ for (i = 0U; self->aq_vecs > i; ++i) {
++ aq_vec = self->aq_vec[i];
+ err = aq_vec_start(aq_vec);
+ if (err < 0)
+ goto err_exit;
+@@ -511,8 +509,8 @@ int aq_nic_start(struct aq_nic_s *self)
+ mod_timer(&self->polling_timer, jiffies +
+ AQ_CFG_POLLING_TIMER_INTERVAL);
+ } else {
+- for (i = 0U, aq_vec = self->aq_vec[0];
+- self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
++ for (i = 0U; self->aq_vecs > i; ++i) {
++ aq_vec = self->aq_vec[i];
+ err = aq_pci_func_alloc_irq(self, i, self->ndev->name,
+ aq_vec_isr, aq_vec,
+ aq_vec_get_affinity_mask(aq_vec));
+@@ -872,7 +870,6 @@ int aq_nic_get_regs_count(struct aq_nic_s *self)
+
+ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
+ {
+- struct aq_vec_s *aq_vec = NULL;
+ struct aq_stats_s *stats;
+ unsigned int count = 0U;
+ unsigned int i = 0U;
+@@ -903,8 +900,14 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
+ data[++i] = stats->mbtc;
+ data[++i] = stats->bbrc;
+ data[++i] = stats->bbtc;
+- data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
+- data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
++ if (stats->brc)
++ data[++i] = stats->brc;
++ else
++ data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
++ if (stats->btc)
++ data[++i] = stats->btc;
++ else
++ data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
+ data[++i] = stats->dma_pkt_rc;
+ data[++i] = stats->dma_pkt_tc;
+ data[++i] = stats->dma_oct_rc;
+@@ -916,11 +919,11 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
+ data += i;
+
+ for (tc = 0U; tc < self->aq_nic_cfg.tcs; tc++) {
+- for (i = 0U, aq_vec = self->aq_vec[0];
+- aq_vec && self->aq_vecs > i;
+- ++i, aq_vec = self->aq_vec[i]) {
++ for (i = 0U; self->aq_vecs > i; ++i) {
++ if (!self->aq_vec[i])
++ break;
+ data += count;
+- count = aq_vec_get_sw_stats(aq_vec, tc, data);
++ count = aq_vec_get_sw_stats(self->aq_vec[i], tc, data);
+ }
+ }
+
+@@ -1234,7 +1237,6 @@ int aq_nic_set_loopback(struct aq_nic_s *self)
+
+ int aq_nic_stop(struct aq_nic_s *self)
+ {
+- struct aq_vec_s *aq_vec = NULL;
+ unsigned int i = 0U;
+
+ netif_tx_disable(self->ndev);
+@@ -1252,9 +1254,8 @@ int aq_nic_stop(struct aq_nic_s *self)
+
+ aq_ptp_irq_free(self);
+
+- for (i = 0U, aq_vec = self->aq_vec[0];
+- self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
+- aq_vec_stop(aq_vec);
++ for (i = 0U; self->aq_vecs > i; ++i)
++ aq_vec_stop(self->aq_vec[i]);
+
+ aq_ptp_ring_stop(self);
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+index 1a7148041e3dc..b7f7d6f66633f 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+@@ -154,6 +154,8 @@ struct aq_nic_s {
+ struct mutex fwreq_mutex;
+ #if IS_ENABLED(CONFIG_MACSEC)
+ struct aq_macsec_cfg *macsec_cfg;
++ /* mutex to protect data in macsec_cfg */
++ struct mutex macsec_mutex;
+ #endif
+ /* PTP support */
+ struct aq_ptp_s *aq_ptp;
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+index d4b1976ee69b9..8647125d60aef 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+@@ -49,6 +49,8 @@ static const struct pci_device_id aq_pci_tbl[] = {
+ { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113), },
+ { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113C), },
+ { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC115C), },
++ { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113CA), },
++ { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC116C), },
+
+ {}
+ };
+@@ -85,7 +87,10 @@ static const struct aq_board_revision_s hw_atl_boards[] = {
+ { AQ_DEVICE_ID_AQC113CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, },
+ { AQ_DEVICE_ID_AQC114CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, },
+ { AQ_DEVICE_ID_AQC113C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, },
+- { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, },
++ { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc115c, },
++ { AQ_DEVICE_ID_AQC113CA, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, },
++ { AQ_DEVICE_ID_AQC116C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc116c, },
++
+ };
+
+ MODULE_DEVICE_TABLE(pci, aq_pci_tbl);
+@@ -374,7 +379,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev)
+ }
+ }
+
+-static int aq_suspend_common(struct device *dev, bool deep)
++static int aq_suspend_common(struct device *dev)
+ {
+ struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev));
+
+@@ -387,17 +392,15 @@ static int aq_suspend_common(struct device *dev, bool deep)
+ if (netif_running(nic->ndev))
+ aq_nic_stop(nic);
+
+- if (deep) {
+- aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
+- aq_nic_set_power(nic);
+- }
++ aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
++ aq_nic_set_power(nic);
+
+ rtnl_unlock();
+
+ return 0;
+ }
+
+-static int atl_resume_common(struct device *dev, bool deep)
++static int atl_resume_common(struct device *dev)
+ {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct aq_nic_s *nic;
+@@ -410,11 +413,6 @@ static int atl_resume_common(struct device *dev, bool deep)
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+
+- if (deep) {
+- /* Reinitialize Nic/Vecs objects */
+- aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
+- }
+-
+ if (netif_running(nic->ndev)) {
+ ret = aq_nic_init(nic);
+ if (ret)
+@@ -439,22 +437,22 @@ err_exit:
+
+ static int aq_pm_freeze(struct device *dev)
+ {
+- return aq_suspend_common(dev, false);
++ return aq_suspend_common(dev);
+ }
+
+ static int aq_pm_suspend_poweroff(struct device *dev)
+ {
+- return aq_suspend_common(dev, true);
++ return aq_suspend_common(dev);
+ }
+
+ static int aq_pm_thaw(struct device *dev)
+ {
+- return atl_resume_common(dev, false);
++ return atl_resume_common(dev);
+ }
+
+ static int aq_pm_resume_restore(struct device *dev)
+ {
+- return atl_resume_common(dev, true);
++ return atl_resume_common(dev);
+ }
+
+ static const struct dev_pm_ops aq_pm_ops = {
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+index 24122ccda614c..e9c6f1fa0b1a7 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+@@ -345,7 +345,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
+ int budget)
+ {
+ struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
+- bool is_rsc_completed = true;
+ int err = 0;
+
+ for (; (self->sw_head != self->hw_head) && budget;
+@@ -363,8 +362,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
+ continue;
+
+ if (!buff->is_eop) {
++ unsigned int frag_cnt = 0U;
+ buff_ = buff;
+ do {
++ bool is_rsc_completed = true;
++
++ if (buff_->next >= self->size) {
++ err = -EIO;
++ goto err_exit;
++ }
++
++ frag_cnt++;
+ next_ = buff_->next,
+ buff_ = &self->buff_ring[next_];
+ is_rsc_completed =
+@@ -372,22 +380,25 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
+ next_,
+ self->hw_head);
+
+- if (unlikely(!is_rsc_completed))
+- break;
++ if (unlikely(!is_rsc_completed) ||
++ frag_cnt > MAX_SKB_FRAGS) {
++ err = 0;
++ goto err_exit;
++ }
+
+ buff->is_error |= buff_->is_error;
+ buff->is_cso_err |= buff_->is_cso_err;
+
+ } while (!buff_->is_eop);
+
+- if (!is_rsc_completed) {
+- err = 0;
+- goto err_exit;
+- }
+ if (buff->is_error ||
+ (buff->is_lro && buff->is_cso_err)) {
+ buff_ = buff;
+ do {
++ if (buff_->next >= self->size) {
++ err = -EIO;
++ goto err_exit;
++ }
+ next_ = buff_->next,
+ buff_ = &self->buff_ring[next_];
+
+@@ -437,7 +448,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
+ ALIGN(hdr_len, sizeof(long)));
+
+ if (buff->len - hdr_len > 0) {
+- skb_add_rx_frag(skb, 0, buff->rxdata.page,
++ skb_add_rx_frag(skb, i++, buff->rxdata.page,
+ buff->rxdata.pg_off + hdr_len,
+ buff->len - hdr_len,
+ AQ_CFG_RX_FRAME_MAX);
+@@ -446,7 +457,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
+
+ if (!buff->is_eop) {
+ buff_ = buff;
+- i = 1U;
+ do {
+ next_ = buff_->next;
+ buff_ = &self->buff_ring[next_];
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+index d281322d7dd29..6ab1f3212d246 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+@@ -43,8 +43,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
+ if (!self) {
+ err = -EINVAL;
+ } else {
+- for (i = 0U, ring = self->ring[0];
+- self->tx_rings > i; ++i, ring = self->ring[i]) {
++ for (i = 0U; self->tx_rings > i; ++i) {
++ ring = self->ring[i];
+ u64_stats_update_begin(&ring[AQ_VEC_RX_ID].stats.rx.syncp);
+ ring[AQ_VEC_RX_ID].stats.rx.polls++;
+ u64_stats_update_end(&ring[AQ_VEC_RX_ID].stats.rx.syncp);
+@@ -182,8 +182,8 @@ int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops,
+ self->aq_hw_ops = aq_hw_ops;
+ self->aq_hw = aq_hw;
+
+- for (i = 0U, ring = self->ring[0];
+- self->tx_rings > i; ++i, ring = self->ring[i]) {
++ for (i = 0U; self->tx_rings > i; ++i) {
++ ring = self->ring[i];
+ err = aq_ring_init(&ring[AQ_VEC_TX_ID], ATL_RING_TX);
+ if (err < 0)
+ goto err_exit;
+@@ -224,8 +224,8 @@ int aq_vec_start(struct aq_vec_s *self)
+ unsigned int i = 0U;
+ int err = 0;
+
+- for (i = 0U, ring = self->ring[0];
+- self->tx_rings > i; ++i, ring = self->ring[i]) {
++ for (i = 0U; self->tx_rings > i; ++i) {
++ ring = self->ring[i];
+ err = self->aq_hw_ops->hw_ring_tx_start(self->aq_hw,
+ &ring[AQ_VEC_TX_ID]);
+ if (err < 0)
+@@ -248,8 +248,8 @@ void aq_vec_stop(struct aq_vec_s *self)
+ struct aq_ring_s *ring = NULL;
+ unsigned int i = 0U;
+
+- for (i = 0U, ring = self->ring[0];
+- self->tx_rings > i; ++i, ring = self->ring[i]) {
++ for (i = 0U; self->tx_rings > i; ++i) {
++ ring = self->ring[i];
+ self->aq_hw_ops->hw_ring_tx_stop(self->aq_hw,
+ &ring[AQ_VEC_TX_ID]);
+
+@@ -268,8 +268,8 @@ void aq_vec_deinit(struct aq_vec_s *self)
+ if (!self)
+ goto err_exit;
+
+- for (i = 0U, ring = self->ring[0];
+- self->tx_rings > i; ++i, ring = self->ring[i]) {
++ for (i = 0U; self->tx_rings > i; ++i) {
++ ring = self->ring[i];
+ aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
+ aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]);
+ }
+@@ -297,8 +297,8 @@ void aq_vec_ring_free(struct aq_vec_s *self)
+ if (!self)
+ goto err_exit;
+
+- for (i = 0U, ring = self->ring[0];
+- self->tx_rings > i; ++i, ring = self->ring[i]) {
++ for (i = 0U; self->tx_rings > i; ++i) {
++ ring = self->ring[i];
+ aq_ring_free(&ring[AQ_VEC_TX_ID]);
+ if (i < self->rx_rings)
+ aq_ring_free(&ring[AQ_VEC_RX_ID]);
+@@ -362,9 +362,6 @@ unsigned int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u
+ {
+ unsigned int count;
+
+- WARN_ONCE(!aq_vec_is_valid_tc(self, tc),
+- "Invalid tc %u (#rx=%u, #tx=%u)\n",
+- tc, self->rx_rings, self->tx_rings);
+ if (!aq_vec_is_valid_tc(self, tc))
+ return 0;
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+index 9f1b15077e7d6..45c17c585d743 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+@@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
+ err = -ENXIO;
+ goto err_exit;
+ }
++
++ /* Validate that the new hw_head_ is reasonable. */
++ if (hw_head_ >= ring->size) {
++ err = -ENXIO;
++ goto err_exit;
++ }
++
+ ring->hw_head = hw_head_;
+ err = aq_hw_err_from_flags(self);
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+index 404cbf60d3f2f..65b9e5846be45 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+@@ -559,6 +559,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
+ goto err_exit;
+
+ if (fw.len == 0xFFFFU) {
++ if (sw.len > sizeof(self->rpc)) {
++ printk(KERN_INFO "Invalid sw len: %x\n", sw.len);
++ err = -EINVAL;
++ goto err_exit;
++ }
+ err = hw_atl_utils_fw_rpc_call(self, sw.len);
+ if (err < 0)
+ goto err_exit;
+@@ -567,6 +572,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
+
+ if (rpc) {
+ if (fw.len) {
++ if (fw.len > sizeof(self->rpc)) {
++ printk(KERN_INFO "Invalid fw len: %x\n", fw.len);
++ err = -EINVAL;
++ goto err_exit;
++ }
+ err =
+ hw_atl_utils_fw_downld_dwords(self,
+ self->rpc_addr,
+@@ -857,12 +867,20 @@ static int hw_atl_fw1x_deinit(struct aq_hw_s *self)
+ int hw_atl_utils_update_stats(struct aq_hw_s *self)
+ {
+ struct aq_stats_s *cs = &self->curr_stats;
++ struct aq_stats_s curr_stats = *cs;
+ struct hw_atl_utils_mbox mbox;
++ bool corrupted_stats = false;
+
+ hw_atl_utils_mpi_read_stats(self, &mbox);
+
+-#define AQ_SDELTA(_N_) (self->curr_stats._N_ += \
+- mbox.stats._N_ - self->last_stats._N_)
++#define AQ_SDELTA(_N_) \
++do { \
++ if (!corrupted_stats && \
++ ((s64)(mbox.stats._N_ - self->last_stats._N_)) >= 0) \
++ curr_stats._N_ += mbox.stats._N_ - self->last_stats._N_; \
++ else \
++ corrupted_stats = true; \
++} while (0)
+
+ if (self->aq_link_status.mbps) {
+ AQ_SDELTA(uprc);
+@@ -882,6 +900,9 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self)
+ AQ_SDELTA(bbrc);
+ AQ_SDELTA(bbtc);
+ AQ_SDELTA(dpc);
++
++ if (!corrupted_stats)
++ *cs = curr_stats;
+ }
+ #undef AQ_SDELTA
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+index ee0c22d049354..05086f0040fd9 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+@@ -132,9 +132,6 @@ static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed)
+ if (speed & AQ_NIC_RATE_5G)
+ rate |= FW2X_RATE_5G;
+
+- if (speed & AQ_NIC_RATE_5GSR)
+- rate |= FW2X_RATE_5G;
+-
+ if (speed & AQ_NIC_RATE_2G5)
+ rate |= FW2X_RATE_2G5;
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+index 92f64048bf691..c76ccdc77ba60 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+@@ -65,11 +65,25 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = {
+ AQ_NIC_RATE_5G |
+ AQ_NIC_RATE_2G5 |
+ AQ_NIC_RATE_1G |
+- AQ_NIC_RATE_1G_HALF |
+ AQ_NIC_RATE_100M |
+- AQ_NIC_RATE_100M_HALF |
+- AQ_NIC_RATE_10M |
+- AQ_NIC_RATE_10M_HALF,
++ AQ_NIC_RATE_10M,
++};
++
++const struct aq_hw_caps_s hw_atl2_caps_aqc115c = {
++ DEFAULT_BOARD_BASIC_CAPABILITIES,
++ .media_type = AQ_HW_MEDIA_TYPE_TP,
++ .link_speed_msk = AQ_NIC_RATE_2G5 |
++ AQ_NIC_RATE_1G |
++ AQ_NIC_RATE_100M |
++ AQ_NIC_RATE_10M,
++};
++
++const struct aq_hw_caps_s hw_atl2_caps_aqc116c = {
++ DEFAULT_BOARD_BASIC_CAPABILITIES,
++ .media_type = AQ_HW_MEDIA_TYPE_TP,
++ .link_speed_msk = AQ_NIC_RATE_1G |
++ AQ_NIC_RATE_100M |
++ AQ_NIC_RATE_10M,
+ };
+
+ static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self)
+diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h
+index de8723f1c28a1..346f0dc9912e5 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h
+@@ -9,6 +9,8 @@
+ #include "aq_common.h"
+
+ extern const struct aq_hw_caps_s hw_atl2_caps_aqc113;
++extern const struct aq_hw_caps_s hw_atl2_caps_aqc115c;
++extern const struct aq_hw_caps_s hw_atl2_caps_aqc116c;
+ extern const struct aq_hw_ops hw_atl2_ops;
+
+ #endif /* HW_ATL2_H */
+diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
+index b66fa346581ce..6bad64c77b87c 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
+@@ -239,7 +239,8 @@ struct version_s {
+ u8 minor;
+ u16 build;
+ } phy;
+- u32 rsvd;
++ u32 drv_iface_ver:4;
++ u32 rsvd:28;
+ };
+
+ struct link_status_s {
+@@ -424,7 +425,7 @@ struct cable_diag_status_s {
+ u16 rsvd2;
+ };
+
+-struct statistics_s {
++struct statistics_a0_s {
+ struct {
+ u32 link_up;
+ u32 link_down;
+@@ -457,6 +458,33 @@ struct statistics_s {
+ u32 reserve_fw_gap;
+ };
+
++struct __packed statistics_b0_s {
++ u64 rx_good_octets;
++ u64 rx_pause_frames;
++ u64 rx_good_frames;
++ u64 rx_errors;
++ u64 rx_unicast_frames;
++ u64 rx_multicast_frames;
++ u64 rx_broadcast_frames;
++
++ u64 tx_good_octets;
++ u64 tx_pause_frames;
++ u64 tx_good_frames;
++ u64 tx_errors;
++ u64 tx_unicast_frames;
++ u64 tx_multicast_frames;
++ u64 tx_broadcast_frames;
++
++ u32 main_loop_cycles;
++};
++
++struct __packed statistics_s {
++ union __packed {
++ struct statistics_a0_s a0;
++ struct statistics_b0_s b0;
++ };
++};
++
+ struct filter_caps_s {
+ u8 l2_filters_base_index:6;
+ u8 flexible_filter_mask:2;
+@@ -545,7 +573,7 @@ struct management_status_s {
+ u32 rsvd5;
+ };
+
+-struct fw_interface_out {
++struct __packed fw_interface_out {
+ struct transaction_counter_s transaction_id;
+ struct version_s version;
+ struct link_status_s link_status;
+@@ -569,7 +597,6 @@ struct fw_interface_out {
+ struct core_dump_s core_dump;
+ u32 rsvd11;
+ struct statistics_s stats;
+- u32 rsvd12;
+ struct filter_caps_s filter_caps;
+ struct device_caps_s device_caps;
+ u32 rsvd13;
+@@ -592,6 +619,9 @@ struct fw_interface_out {
+ #define AQ_HOST_MODE_LOW_POWER 3U
+ #define AQ_HOST_MODE_SHUTDOWN 4U
+
++#define AQ_A2_FW_INTERFACE_A0 0
++#define AQ_A2_FW_INTERFACE_B0 1
++
+ int hw_atl2_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops);
+
+ int hw_atl2_utils_soft_reset(struct aq_hw_s *self);
+diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
+index dd259c8f2f4f3..58d426dda3edb 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
++++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
+@@ -84,7 +84,7 @@ static int hw_atl2_shared_buffer_read_block(struct aq_hw_s *self,
+ if (cnt > AQ_A2_FW_READ_TRY_MAX)
+ return -ETIME;
+ if (tid1.transaction_cnt_a != tid1.transaction_cnt_b)
+- udelay(1);
++ mdelay(1);
+ } while (tid1.transaction_cnt_a != tid1.transaction_cnt_b);
+
+ hw_atl2_mif_shared_buf_read(self, offset, (u32 *)data, dwords);
+@@ -154,7 +154,7 @@ static void a2_link_speed_mask2fw(u32 speed,
+ {
+ link_options->rate_10G = !!(speed & AQ_NIC_RATE_10G);
+ link_options->rate_5G = !!(speed & AQ_NIC_RATE_5G);
+- link_options->rate_N5G = !!(speed & AQ_NIC_RATE_5GSR);
++ link_options->rate_N5G = link_options->rate_5G;
+ link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2G5);
+ link_options->rate_N2P5G = link_options->rate_2P5G;
+ link_options->rate_1G = !!(speed & AQ_NIC_RATE_1G);
+@@ -192,8 +192,6 @@ static u32 a2_fw_lkp_to_mask(struct lkp_link_caps_s *lkp_link_caps)
+ rate |= AQ_NIC_RATE_10G;
+ if (lkp_link_caps->rate_5G)
+ rate |= AQ_NIC_RATE_5G;
+- if (lkp_link_caps->rate_N5G)
+- rate |= AQ_NIC_RATE_5GSR;
+ if (lkp_link_caps->rate_2P5G)
+ rate |= AQ_NIC_RATE_2G5;
+ if (lkp_link_caps->rate_1G)
+@@ -335,15 +333,22 @@ static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
+ return 0;
+ }
+
+-static int aq_a2_fw_update_stats(struct aq_hw_s *self)
++static void aq_a2_fill_a0_stats(struct aq_hw_s *self,
++ struct statistics_s *stats)
+ {
+ struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+- struct statistics_s stats;
+-
+- hw_atl2_shared_buffer_read_safe(self, stats, &stats);
+-
+-#define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \
+- stats.msm._F_ - priv->last_stats.msm._F_)
++ struct aq_stats_s *cs = &self->curr_stats;
++ struct aq_stats_s curr_stats = *cs;
++ bool corrupted_stats = false;
++
++#define AQ_SDELTA(_N, _F) \
++do { \
++ if (!corrupted_stats && \
++ ((s64)(stats->a0.msm._F - priv->last_stats.a0.msm._F)) >= 0) \
++ curr_stats._N += stats->a0.msm._F - priv->last_stats.a0.msm._F;\
++ else \
++ corrupted_stats = true; \
++} while (0)
+
+ if (self->aq_link_status.mbps) {
+ AQ_SDELTA(uprc, rx_unicast_frames);
+@@ -362,17 +367,76 @@ static int aq_a2_fw_update_stats(struct aq_hw_s *self)
+ AQ_SDELTA(mbtc, tx_multicast_octets);
+ AQ_SDELTA(bbrc, rx_broadcast_octets);
+ AQ_SDELTA(bbtc, tx_broadcast_octets);
++
++ if (!corrupted_stats)
++ *cs = curr_stats;
+ }
+ #undef AQ_SDELTA
+- self->curr_stats.dma_pkt_rc =
+- hw_atl_stats_rx_dma_good_pkt_counter_get(self);
+- self->curr_stats.dma_pkt_tc =
+- hw_atl_stats_tx_dma_good_pkt_counter_get(self);
+- self->curr_stats.dma_oct_rc =
+- hw_atl_stats_rx_dma_good_octet_counter_get(self);
+- self->curr_stats.dma_oct_tc =
+- hw_atl_stats_tx_dma_good_octet_counter_get(self);
+- self->curr_stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
++
++}
++
++static void aq_a2_fill_b0_stats(struct aq_hw_s *self,
++ struct statistics_s *stats)
++{
++ struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
++ struct aq_stats_s *cs = &self->curr_stats;
++ struct aq_stats_s curr_stats = *cs;
++ bool corrupted_stats = false;
++
++#define AQ_SDELTA(_N, _F) \
++do { \
++ if (!corrupted_stats && \
++ ((s64)(stats->b0._F - priv->last_stats.b0._F)) >= 0) \
++ curr_stats._N += stats->b0._F - priv->last_stats.b0._F; \
++ else \
++ corrupted_stats = true; \
++} while (0)
++
++ if (self->aq_link_status.mbps) {
++ AQ_SDELTA(uprc, rx_unicast_frames);
++ AQ_SDELTA(mprc, rx_multicast_frames);
++ AQ_SDELTA(bprc, rx_broadcast_frames);
++ AQ_SDELTA(erpr, rx_errors);
++ AQ_SDELTA(brc, rx_good_octets);
++
++ AQ_SDELTA(uptc, tx_unicast_frames);
++ AQ_SDELTA(mptc, tx_multicast_frames);
++ AQ_SDELTA(bptc, tx_broadcast_frames);
++ AQ_SDELTA(erpt, tx_errors);
++ AQ_SDELTA(btc, tx_good_octets);
++
++ if (!corrupted_stats)
++ *cs = curr_stats;
++ }
++#undef AQ_SDELTA
++}
++
++static int aq_a2_fw_update_stats(struct aq_hw_s *self)
++{
++ struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
++ struct aq_stats_s *cs = &self->curr_stats;
++ struct statistics_s stats;
++ struct version_s version;
++ int err;
++
++ err = hw_atl2_shared_buffer_read_safe(self, version, &version);
++ if (err)
++ return err;
++
++ err = hw_atl2_shared_buffer_read_safe(self, stats, &stats);
++ if (err)
++ return err;
++
++ if (version.drv_iface_ver == AQ_A2_FW_INTERFACE_A0)
++ aq_a2_fill_a0_stats(self, &stats);
++ else
++ aq_a2_fill_b0_stats(self, &stats);
++
++ cs->dma_pkt_rc = hw_atl_stats_rx_dma_good_pkt_counter_get(self);
++ cs->dma_pkt_tc = hw_atl_stats_tx_dma_good_pkt_counter_get(self);
++ cs->dma_oct_rc = hw_atl_stats_rx_dma_good_octet_counter_get(self);
++ cs->dma_oct_tc = hw_atl_stats_tx_dma_good_octet_counter_get(self);
++ cs->dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
+
+ memcpy(&priv->last_stats, &stats, sizeof(stats));
+
+@@ -499,9 +563,9 @@ u32 hw_atl2_utils_get_fw_version(struct aq_hw_s *self)
+ hw_atl2_shared_buffer_read_safe(self, version, &version);
+
+ /* A2 FW version is stored in reverse order */
+- return version.mac.major << 24 |
+- version.mac.minor << 16 |
+- version.mac.build;
++ return version.bundle.major << 24 |
++ version.bundle.minor << 16 |
++ version.bundle.build;
+ }
+
+ int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self,
+diff --git a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c
+index 36c7cf05630a1..4319249595207 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c
++++ b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c
+@@ -757,6 +757,7 @@ set_ingress_sakey_record(struct aq_hw_s *hw,
+ u16 table_index)
+ {
+ u16 packed_record[18];
++ int ret;
+
+ if (table_index >= NUMROWS_INGRESSSAKEYRECORD)
+ return -EINVAL;
+@@ -789,9 +790,12 @@ set_ingress_sakey_record(struct aq_hw_s *hw,
+
+ packed_record[16] = rec->key_len & 0x3;
+
+- return set_raw_ingress_record(hw, packed_record, 18, 2,
+- ROWOFFSET_INGRESSSAKEYRECORD +
+- table_index);
++ ret = set_raw_ingress_record(hw, packed_record, 18, 2,
++ ROWOFFSET_INGRESSSAKEYRECORD +
++ table_index);
++
++ memzero_explicit(packed_record, sizeof(packed_record));
++ return ret;
+ }
+
+ int aq_mss_set_ingress_sakey_record(struct aq_hw_s *hw,
+@@ -1739,14 +1743,14 @@ static int set_egress_sakey_record(struct aq_hw_s *hw,
+ ret = set_raw_egress_record(hw, packed_record, 8, 2,
+ ROWOFFSET_EGRESSSAKEYRECORD + table_index);
+ if (unlikely(ret))
+- return ret;
++ goto clear_key;
+ ret = set_raw_egress_record(hw, packed_record + 8, 8, 2,
+ ROWOFFSET_EGRESSSAKEYRECORD + table_index -
+ 32);
+- if (unlikely(ret))
+- return ret;
+
+- return 0;
++clear_key:
++ memzero_explicit(packed_record, sizeof(packed_record));
++ return ret;
+ }
+
+ int aq_mss_set_egress_sakey_record(struct aq_hw_s *hw,
+diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig
+index 92a79c4ffa2c7..0a67612af2281 100644
+--- a/drivers/net/ethernet/arc/Kconfig
++++ b/drivers/net/ethernet/arc/Kconfig
+@@ -26,7 +26,7 @@ config ARC_EMAC_CORE
+ config ARC_EMAC
+ tristate "ARC EMAC support"
+ select ARC_EMAC_CORE
+- depends on OF_IRQ && OF_NET
++ depends on OF_IRQ
+ depends on ARC || COMPILE_TEST
+ help
+ On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x
+@@ -36,7 +36,7 @@ config ARC_EMAC
+ config EMAC_ROCKCHIP
+ tristate "Rockchip EMAC support"
+ select ARC_EMAC_CORE
+- depends on OF_IRQ && OF_NET && REGULATOR
++ depends on OF_IRQ && REGULATOR
+ depends on ARCH_ROCKCHIP || COMPILE_TEST
+ help
+ Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers.
+diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
+index 38c288ec90590..333333692caa5 100644
+--- a/drivers/net/ethernet/arc/emac_main.c
++++ b/drivers/net/ethernet/arc/emac_main.c
+@@ -941,7 +941,7 @@ int arc_emac_probe(struct net_device *ndev, int interface)
+ }
+
+ /* Get MAC address from device tree */
+- err = of_get_mac_address(dev->of_node, ndev->dev_addr);
++ err = of_get_ethdev_address(dev->of_node, ndev);
+ if (err)
+ eth_hw_addr_random(ndev);
+
+diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
+index 02ae98aabf91c..9d8b214c129d2 100644
+--- a/drivers/net/ethernet/atheros/ag71xx.c
++++ b/drivers/net/ethernet/atheros/ag71xx.c
+@@ -1480,7 +1480,7 @@ static int ag71xx_open(struct net_device *ndev)
+ if (ret) {
+ netif_err(ag, link, ndev, "phylink_of_phy_connect filed with err: %i\n",
+ ret);
+- goto err;
++ return ret;
+ }
+
+ max_frame_len = ag71xx_max_frame_len(ndev->mtu);
+@@ -1501,6 +1501,7 @@ static int ag71xx_open(struct net_device *ndev)
+
+ err:
+ ag71xx_rings_cleanup(ag);
++ phylink_disconnect_phy(ag->phylink);
+ return ret;
+ }
+
+@@ -1915,15 +1916,12 @@ static int ag71xx_probe(struct platform_device *pdev)
+ ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
+ if (IS_ERR(ag->mac_reset)) {
+ netif_err(ag, probe, ndev, "missing mac reset\n");
+- err = PTR_ERR(ag->mac_reset);
+- goto err_free;
++ return PTR_ERR(ag->mac_reset);
+ }
+
+ ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+- if (!ag->mac_base) {
+- err = -ENOMEM;
+- goto err_free;
+- }
++ if (!ag->mac_base)
++ return -ENOMEM;
+
+ ndev->irq = platform_get_irq(pdev, 0);
+ err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
+@@ -1931,7 +1929,7 @@ static int ag71xx_probe(struct platform_device *pdev)
+ if (err) {
+ netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
+ ndev->irq);
+- goto err_free;
++ return err;
+ }
+
+ ndev->netdev_ops = &ag71xx_netdev_ops;
+@@ -1959,16 +1957,14 @@ static int ag71xx_probe(struct platform_device *pdev)
+ ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
+ sizeof(struct ag71xx_desc),
+ &ag->stop_desc_dma, GFP_KERNEL);
+- if (!ag->stop_desc) {
+- err = -ENOMEM;
+- goto err_free;
+- }
++ if (!ag->stop_desc)
++ return -ENOMEM;
+
+ ag->stop_desc->data = 0;
+ ag->stop_desc->ctrl = 0;
+ ag->stop_desc->next = (u32)ag->stop_desc_dma;
+
+- err = of_get_mac_address(np, ndev->dev_addr);
++ err = of_get_ethdev_address(np, ndev);
+ if (err) {
+ netif_err(ag, probe, ndev, "invalid MAC address, using random address\n");
+ eth_random_addr(ndev->dev_addr);
+@@ -1977,7 +1973,7 @@ static int ag71xx_probe(struct platform_device *pdev)
+ err = of_get_phy_mode(np, &ag->phy_if_mode);
+ if (err) {
+ netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
+- goto err_free;
++ return err;
+ }
+
+ netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
+@@ -1985,7 +1981,7 @@ static int ag71xx_probe(struct platform_device *pdev)
+ err = clk_prepare_enable(ag->clk_eth);
+ if (err) {
+ netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
+- goto err_free;
++ return err;
+ }
+
+ ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
+@@ -2021,8 +2017,6 @@ err_mdio_remove:
+ ag71xx_mdio_remove(ag);
+ err_put_clk:
+ clk_disable_unprepare(ag->clk_eth);
+-err_free:
+- free_netdev(ndev);
+ return err;
+ }
+
+diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
+index 4ea157efca868..2ac5253ff89aa 100644
+--- a/drivers/net/ethernet/atheros/alx/main.c
++++ b/drivers/net/ethernet/atheros/alx/main.c
+@@ -1181,8 +1181,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
+ alx->hw.mtu = mtu;
+ alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
+ netdev_update_features(netdev);
+- if (netif_running(netdev))
++ if (netif_running(netdev)) {
++ mutex_lock(&alx->mtx);
+ alx_reinit(alx);
++ mutex_unlock(&alx->mtx);
++ }
+ return 0;
+ }
+
+@@ -1909,11 +1912,14 @@ static int alx_suspend(struct device *dev)
+
+ if (!netif_running(alx->dev))
+ return 0;
++
++ rtnl_lock();
+ netif_device_detach(alx->dev);
+
+ mutex_lock(&alx->mtx);
+ __alx_stop(alx);
+ mutex_unlock(&alx->mtx);
++ rtnl_unlock();
+
+ return 0;
+ }
+@@ -1924,6 +1930,7 @@ static int alx_resume(struct device *dev)
+ struct alx_hw *hw = &alx->hw;
+ int err;
+
++ rtnl_lock();
+ mutex_lock(&alx->mtx);
+ alx_reset_phy(hw);
+
+@@ -1940,6 +1947,7 @@ static int alx_resume(struct device *dev)
+
+ unlock:
+ mutex_unlock(&alx->mtx);
++ rtnl_unlock();
+ return err;
+ }
+
+diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+index 3b51b172b3172..dad21b4fbc0bc 100644
+--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
++++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+@@ -900,7 +900,7 @@ static void atl1c_clean_tx_ring(struct atl1c_adapter *adapter,
+ atl1c_clean_buffer(pdev, buffer_info);
+ }
+
+- netdev_reset_queue(adapter->netdev);
++ netdev_tx_reset_queue(netdev_get_tx_queue(adapter->netdev, queue));
+
+ /* Zero out Tx-buffers */
+ memset(tpd_ring->desc, 0, sizeof(struct atl1c_tpd_desc) *
+@@ -2104,8 +2104,11 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter,
+ real_len = (((unsigned char *)ip_hdr(skb) - skb->data)
+ + ntohs(ip_hdr(skb)->tot_len));
+
+- if (real_len < skb->len)
+- pskb_trim(skb, real_len);
++ if (real_len < skb->len) {
++ err = pskb_trim(skb, real_len);
++ if (err)
++ return err;
++ }
+
+ hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (unlikely(skb->len == hdr_len)) {
+diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+index 753973ac922e9..db13311e77e73 100644
+--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
++++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+@@ -1642,8 +1642,11 @@ static int atl1e_tso_csum(struct atl1e_adapter *adapter,
+ real_len = (((unsigned char *)ip_hdr(skb) - skb->data)
+ + ntohs(ip_hdr(skb)->tot_len));
+
+- if (real_len < skb->len)
+- pskb_trim(skb, real_len);
++ if (real_len < skb->len) {
++ err = pskb_trim(skb, real_len);
++ if (err)
++ return err;
++ }
+
+ hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (unlikely(skb->len == hdr_len)) {
+diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
+index 56e0fb07aec7f..cd1706909044d 100644
+--- a/drivers/net/ethernet/broadcom/Kconfig
++++ b/drivers/net/ethernet/broadcom/Kconfig
+@@ -71,6 +71,7 @@ config BCM63XX_ENET
+ config BCMGENET
+ tristate "Broadcom GENET internal MAC support"
+ depends on HAS_IOMEM
++ depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835
+ select MII
+ select PHYLIB
+ select FIXED_PHY
+diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
+index 0ddfb5b5d53ca..2e6c5f258a1ff 100644
+--- a/drivers/net/ethernet/broadcom/Makefile
++++ b/drivers/net/ethernet/broadcom/Makefile
+@@ -17,3 +17,8 @@ obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o
+ obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o
+ obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o
+ obj-$(CONFIG_BNXT) += bnxt/
++
++# FIXME: temporarily silence -Warray-bounds on non W=1+ builds
++ifndef KBUILD_EXTRA_WARN
++CFLAGS_tg3.o += -Wno-array-bounds
++endif
+diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
+index 02a569500234c..ba48ddff5e7cd 100644
+--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
++++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
+@@ -561,8 +561,6 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde
+
+ if (++ring->write_idx == ring->length - 1)
+ ring->write_idx = 0;
+- enet->netdev->stats.tx_bytes += skb->len;
+- enet->netdev->stats.tx_packets++;
+
+ return NETDEV_TX_OK;
+ }
+@@ -646,13 +644,17 @@ static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight)
+
+ dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
+ dev_kfree_skb(slot->skb);
++
++ handled++;
+ bytes += slot->len;
++
+ if (++tx_ring->read_idx == tx_ring->length)
+ tx_ring->read_idx = 0;
+-
+- handled++;
+ }
+
++ enet->netdev->stats.tx_packets += handled;
++ enet->netdev->stats.tx_bytes += bytes;
++
+ if (handled < weight) {
+ napi_complete_done(napi, handled);
+ bcm4908_enet_dma_ring_intrs_on(enet, tx_ring);
+@@ -708,14 +710,16 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
+
+ enet->irq_tx = platform_get_irq_byname(pdev, "tx");
+
+- dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
++ err = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
++ if (err)
++ return err;
+
+ err = bcm4908_enet_dma_alloc(enet);
+ if (err)
+ return err;
+
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+- err = of_get_mac_address(dev->of_node, netdev->dev_addr);
++ err = of_get_ethdev_address(dev->of_node, netdev);
+ if (err)
+ eth_hw_addr_random(netdev);
+ netdev->netdev_ops = &bcm4908_enet_netdev_ops;
+diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
+index 7fa1b695400d7..25466d73b432a 100644
+--- a/drivers/net/ethernet/broadcom/bcmsysport.c
++++ b/drivers/net/ethernet/broadcom/bcmsysport.c
+@@ -1309,11 +1309,11 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
+ struct bcm_sysport_priv *priv = netdev_priv(dev);
+ struct device *kdev = &priv->pdev->dev;
+ struct bcm_sysport_tx_ring *ring;
++ unsigned long flags, desc_flags;
+ struct bcm_sysport_cb *cb;
+ struct netdev_queue *txq;
+ u32 len_status, addr_lo;
+ unsigned int skb_len;
+- unsigned long flags;
+ dma_addr_t mapping;
+ u16 queue;
+ int ret;
+@@ -1373,8 +1373,10 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb,
+ ring->desc_count--;
+
+ /* Ports are latched, so write upper address first */
++ spin_lock_irqsave(&priv->desc_lock, desc_flags);
+ tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index));
+ tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index));
++ spin_unlock_irqrestore(&priv->desc_lock, desc_flags);
+
+ /* Check ring space and update SW control flow */
+ if (ring->desc_count == 0)
+@@ -1989,6 +1991,9 @@ static int bcm_sysport_open(struct net_device *dev)
+ goto out_clk_disable;
+ }
+
++ /* Indicate that the MAC is responsible for PHY PM */
++ phydev->mac_managed_pm = true;
++
+ /* Reset house keeping link status */
+ priv->old_duplex = -1;
+ priv->old_link = -1;
+@@ -2013,6 +2018,7 @@ static int bcm_sysport_open(struct net_device *dev)
+ }
+
+ /* Initialize both hardware and software ring */
++ spin_lock_init(&priv->desc_lock);
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ ret = bcm_sysport_init_tx_ring(priv, i);
+ if (ret) {
+@@ -2555,7 +2561,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
+ }
+
+ /* Initialize netdevice members */
+- ret = of_get_mac_address(dn, dev->dev_addr);
++ ret = of_get_ethdev_address(dn, dev);
+ if (ret) {
+ dev_warn(&pdev->dev, "using random Ethernet MAC\n");
+ eth_hw_addr_random(dev);
+@@ -2582,8 +2588,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
+ device_set_wakeup_capable(&pdev->dev, 1);
+
+ priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol");
+- if (IS_ERR(priv->wol_clk))
+- return PTR_ERR(priv->wol_clk);
++ if (IS_ERR(priv->wol_clk)) {
++ ret = PTR_ERR(priv->wol_clk);
++ goto err_deregister_fixed_link;
++ }
+
+ /* Set the needed headroom once and for all */
+ BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
+diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
+index 984f76e74b43e..16b73bb9acc78 100644
+--- a/drivers/net/ethernet/broadcom/bcmsysport.h
++++ b/drivers/net/ethernet/broadcom/bcmsysport.h
+@@ -711,6 +711,7 @@ struct bcm_sysport_priv {
+ int wol_irq;
+
+ /* Transmit rings */
++ spinlock_t desc_lock;
+ struct bcm_sysport_tx_ring *tx_rings;
+
+ /* Receive queue */
+diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
+index 9513cfb5ba58c..678cc6a3617c3 100644
+--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
++++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
+@@ -128,7 +128,7 @@ static int bgmac_probe(struct bcma_device *core)
+
+ bcma_set_drvdata(core, bgmac);
+
+- err = of_get_mac_address(bgmac->dev->of_node, bgmac->net_dev->dev_addr);
++ err = of_get_ethdev_address(bgmac->dev->of_node, bgmac->net_dev);
+ if (err == -EPROBE_DEFER)
+ return err;
+
+@@ -150,7 +150,7 @@ static int bgmac_probe(struct bcma_device *core)
+ err = -ENOTSUPP;
+ goto err;
+ }
+- ether_addr_copy(bgmac->net_dev->dev_addr, mac);
++ eth_hw_addr_set(bgmac->net_dev, mac);
+ }
+
+ /* On BCM4706 we need common core to access PHY */
+@@ -228,12 +228,12 @@ static int bgmac_probe(struct bcma_device *core)
+ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+ bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
+- if (ci->pkg == BCMA_PKG_ID_BCM47188 ||
+- ci->pkg == BCMA_PKG_ID_BCM47186) {
++ if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) ||
++ (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) {
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+ bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+ }
+- if (ci->pkg == BCMA_PKG_ID_BCM5358)
++ if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358)
+ bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII;
+ break;
+ case BCMA_CHIP_ID_BCM53573:
+@@ -323,7 +323,6 @@ static void bgmac_remove(struct bcma_device *core)
+ bcma_mdio_mii_unregister(bgmac->mii_bus);
+ bgmac_enet_remove(bgmac);
+ bcma_set_drvdata(core, NULL);
+- kfree(bgmac);
+ }
+
+ static struct bcma_driver bgmac_bcma_driver = {
+diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
+index df8ff839cc621..b4381cd419792 100644
+--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
++++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
+@@ -172,6 +172,7 @@ static int bgmac_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
+ struct bgmac *bgmac;
++ struct resource *regs;
+ int ret;
+
+ bgmac = bgmac_alloc(&pdev->dev);
+@@ -191,7 +192,7 @@ static int bgmac_probe(struct platform_device *pdev)
+ bgmac->dev = &pdev->dev;
+ bgmac->dma_dev = &pdev->dev;
+
+- ret = of_get_mac_address(np, bgmac->net_dev->dev_addr);
++ ret = of_get_ethdev_address(np, bgmac->net_dev);
+ if (ret == -EPROBE_DEFER)
+ return ret;
+
+@@ -208,15 +209,23 @@ static int bgmac_probe(struct platform_device *pdev)
+ if (IS_ERR(bgmac->plat.base))
+ return PTR_ERR(bgmac->plat.base);
+
+- bgmac->plat.idm_base = devm_platform_ioremap_resource_byname(pdev, "idm_base");
+- if (IS_ERR(bgmac->plat.idm_base))
+- return PTR_ERR(bgmac->plat.idm_base);
+- else
++ /* The idm_base resource is optional for some platforms */
++ regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
++ if (regs) {
++ bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
++ if (IS_ERR(bgmac->plat.idm_base))
++ return PTR_ERR(bgmac->plat.idm_base);
+ bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK;
++ }
+
+- bgmac->plat.nicpm_base = devm_platform_ioremap_resource_byname(pdev, "nicpm_base");
+- if (IS_ERR(bgmac->plat.nicpm_base))
+- return PTR_ERR(bgmac->plat.nicpm_base);
++ /* The nicpm_base resource is optional for some platforms */
++ regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base");
++ if (regs) {
++ bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev,
++ regs);
++ if (IS_ERR(bgmac->plat.nicpm_base))
++ return PTR_ERR(bgmac->plat.nicpm_base);
++ }
+
+ bgmac->read = platform_bgmac_read;
+ bgmac->write = platform_bgmac_write;
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
+index fe4d99abd5487..c691635cf4ebe 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -189,8 +189,8 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
+ }
+
+ slot->skb = skb;
+- ring->end += nr_frags + 1;
+ netdev_sent_queue(net_dev, skb->len);
++ ring->end += nr_frags + 1;
+
+ wmb();
+
+@@ -890,13 +890,13 @@ static void bgmac_chip_reset_idm_config(struct bgmac *bgmac)
+
+ if (iost & BGMAC_BCMA_IOST_ATTACHED) {
+ flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
+- if (!bgmac->has_robosw)
++ if (bgmac->in_init || !bgmac->has_robosw)
+ flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+ }
+ bgmac_clk_enable(bgmac, flags);
+ }
+
+- if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
++ if (iost & BGMAC_BCMA_IOST_ATTACHED && (bgmac->in_init || !bgmac->has_robosw))
+ bgmac_idm_write(bgmac, BCMA_IOCTL,
+ bgmac_idm_read(bgmac, BCMA_IOCTL) &
+ ~BGMAC_BCMA_IOCTL_SW_RESET);
+@@ -1241,7 +1241,7 @@ static int bgmac_set_mac_address(struct net_device *net_dev, void *addr)
+ if (ret < 0)
+ return ret;
+
+- ether_addr_copy(net_dev->dev_addr, sa->sa_data);
++ eth_hw_addr_set(net_dev, sa->sa_data);
+ bgmac_write_mac_address(bgmac, net_dev->dev_addr);
+
+ eth_commit_mac_addr_change(net_dev, addr);
+@@ -1448,7 +1448,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac)
+ int err;
+
+ phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
+- if (!phy_dev || IS_ERR(phy_dev)) {
++ if (IS_ERR(phy_dev)) {
+ dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
+ return -ENODEV;
+ }
+@@ -1490,7 +1490,7 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+ struct net_device *net_dev = bgmac->net_dev;
+ int err;
+
+- bgmac_chip_intrs_off(bgmac);
++ bgmac->in_init = true;
+
+ net_dev->irq = bgmac->irq;
+ SET_NETDEV_DEV(net_dev, bgmac->dev);
+@@ -1509,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+ */
+ bgmac_clk_enable(bgmac, 0);
+
++ bgmac_chip_intrs_off(bgmac);
++
+ /* This seems to be fixing IRQ by assigning OOB #6 to the core */
+ if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) {
+ if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6)
+@@ -1542,6 +1544,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+ /* Omit FCS from max MTU size */
+ net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN;
+
++ bgmac->in_init = false;
++
+ err = register_netdev(bgmac->net_dev);
+ if (err) {
+ dev_err(bgmac->dev, "Cannot register net device\n");
+@@ -1568,7 +1572,6 @@ void bgmac_enet_remove(struct bgmac *bgmac)
+ phy_disconnect(bgmac->net_dev->phydev);
+ netif_napi_del(&bgmac->napi);
+ bgmac_dma_free(bgmac);
+- free_netdev(bgmac->net_dev);
+ }
+ EXPORT_SYMBOL_GPL(bgmac_enet_remove);
+
+diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
+index 110088e662eab..99a344175a751 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -474,6 +474,8 @@ struct bgmac {
+ int irq;
+ u32 int_mask;
+
++ bool in_init;
++
+ /* Current MAC state */
+ int mac_speed;
+ int mac_duplex;
+diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
+index 8c83973adca57..9d70d908c0646 100644
+--- a/drivers/net/ethernet/broadcom/bnx2.c
++++ b/drivers/net/ethernet/broadcom/bnx2.c
+@@ -8212,7 +8212,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
+ rc = dma_set_coherent_mask(&pdev->dev, persist_dma_mask);
+ if (rc) {
+ dev_err(&pdev->dev,
+- "pci_set_consistent_dma_mask failed, aborting\n");
++ "dma_set_coherent_mask failed, aborting\n");
+ goto err_out_unmap;
+ }
+ } else if ((rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+index e789430f407c3..9e79bcfb365fa 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+@@ -1850,6 +1850,14 @@ struct bnx2x {
+
+ /* Vxlan/Geneve related information */
+ u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
++
++#define FW_CAP_INVALIDATE_VF_FP_HSI BIT(0)
++ u32 fw_cap;
++
++ u32 fw_major;
++ u32 fw_minor;
++ u32 fw_rev;
++ u32 fw_eng;
+ };
+
+ /* Tx queues may be less or equal to Rx queues */
+@@ -2525,5 +2533,4 @@ void bnx2x_register_phc(struct bnx2x *bp);
+ * Meant for implicit re-load flows.
+ */
+ int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
+-
+ #endif /* bnx2x.h */
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+index b5d954cb409ae..4f669e7c75587 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+@@ -788,6 +788,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+ BNX2X_ERR("skb_put is about to fail... pad %d len %d rx_buf_size %d\n",
+ pad, len, fp->rx_buf_size);
+ bnx2x_panic();
++ bnx2x_frag_free(fp, new_data);
+ return;
+ }
+ #endif
+@@ -2363,26 +2364,30 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
+ /* is another pf loaded on this engine? */
+ if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
+ load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
+- /* build my FW version dword */
+- u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
+- (BCM_5710_FW_MINOR_VERSION << 8) +
+- (BCM_5710_FW_REVISION_VERSION << 16) +
+- (BCM_5710_FW_ENGINEERING_VERSION << 24);
++ u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng;
++ u32 loaded_fw;
+
+ /* read loaded FW from chip */
+- u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
++ loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
+
+- DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x\n",
+- loaded_fw, my_fw);
++ loaded_fw_major = loaded_fw & 0xff;
++ loaded_fw_minor = (loaded_fw >> 8) & 0xff;
++ loaded_fw_rev = (loaded_fw >> 16) & 0xff;
++ loaded_fw_eng = (loaded_fw >> 24) & 0xff;
++
++ DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n",
++ loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng);
+
+ /* abort nic load if version mismatch */
+- if (my_fw != loaded_fw) {
++ if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION ||
++ loaded_fw_minor != BCM_5710_FW_MINOR_VERSION ||
++ loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION ||
++ loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) {
+ if (print_err)
+- BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n",
+- loaded_fw, my_fw);
++ BNX2X_ERR("loaded FW incompatible. Aborting\n");
+ else
+- BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n",
+- loaded_fw, my_fw);
++ BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n");
++
+ return -EBUSY;
+ }
+ }
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
+index 3f8435208bf49..a84d015da5dfa 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
+@@ -241,6 +241,8 @@
+ IRO[221].m2))
+ #define XSTORM_VF_TO_PF_OFFSET(funcId) \
+ (IRO[48].base + ((funcId) * IRO[48].m1))
++#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid) \
++ (IRO[386].base + ((fid) * IRO[386].m1))
+ #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
+
+ /* eth hsi version */
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+index 622fadc50316e..611efee758340 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+@@ -3024,7 +3024,8 @@ struct afex_stats {
+
+ #define BCM_5710_FW_MAJOR_VERSION 7
+ #define BCM_5710_FW_MINOR_VERSION 13
+-#define BCM_5710_FW_REVISION_VERSION 15
++#define BCM_5710_FW_REVISION_VERSION 21
++#define BCM_5710_FW_REVISION_VERSION_V15 15
+ #define BCM_5710_FW_ENGINEERING_VERSION 0
+ #define BCM_5710_FW_COMPILE_FLAGS 1
+
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
+index 1835d2e451c01..fc7fce642666c 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
+@@ -635,11 +635,13 @@ static int bnx2x_ilt_client_mem_op(struct bnx2x *bp, int cli_num,
+ {
+ int i, rc;
+ struct bnx2x_ilt *ilt = BP_ILT(bp);
+- struct ilt_client_info *ilt_cli = &ilt->clients[cli_num];
++ struct ilt_client_info *ilt_cli;
+
+ if (!ilt || !ilt->lines)
+ return -1;
+
++ ilt_cli = &ilt->clients[cli_num];
++
+ if (ilt_cli->flags & (ILT_CLIENT_SKIP_INIT | ILT_CLIENT_SKIP_MEM))
+ return 0;
+
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+index ae87296ae1ffa..9c26c46771f5e 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+@@ -74,9 +74,19 @@
+ __stringify(BCM_5710_FW_MINOR_VERSION) "." \
+ __stringify(BCM_5710_FW_REVISION_VERSION) "." \
+ __stringify(BCM_5710_FW_ENGINEERING_VERSION)
++
++#define FW_FILE_VERSION_V15 \
++ __stringify(BCM_5710_FW_MAJOR_VERSION) "." \
++ __stringify(BCM_5710_FW_MINOR_VERSION) "." \
++ __stringify(BCM_5710_FW_REVISION_VERSION_V15) "." \
++ __stringify(BCM_5710_FW_ENGINEERING_VERSION)
++
+ #define FW_FILE_NAME_E1 "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
+ #define FW_FILE_NAME_E1H "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
+ #define FW_FILE_NAME_E2 "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
++#define FW_FILE_NAME_E1_V15 "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw"
++#define FW_FILE_NAME_E1H_V15 "bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw"
++#define FW_FILE_NAME_E2_V15 "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw"
+
+ /* Time in jiffies before concluding the transmitter is hung */
+ #define TX_TIMEOUT (5*HZ)
+@@ -90,6 +100,9 @@ MODULE_LICENSE("GPL");
+ MODULE_FIRMWARE(FW_FILE_NAME_E1);
+ MODULE_FIRMWARE(FW_FILE_NAME_E1H);
+ MODULE_FIRMWARE(FW_FILE_NAME_E2);
++MODULE_FIRMWARE(FW_FILE_NAME_E1_V15);
++MODULE_FIRMWARE(FW_FILE_NAME_E1H_V15);
++MODULE_FIRMWARE(FW_FILE_NAME_E2_V15);
+
+ int bnx2x_num_queues;
+ module_param_named(num_queues, bnx2x_num_queues, int, 0444);
+@@ -747,9 +760,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
+ CHIP_IS_E1(bp) ? "everest1" :
+ CHIP_IS_E1H(bp) ? "everest1h" :
+ CHIP_IS_E2(bp) ? "everest2" : "everest3",
+- BCM_5710_FW_MAJOR_VERSION,
+- BCM_5710_FW_MINOR_VERSION,
+- BCM_5710_FW_REVISION_VERSION);
++ bp->fw_major, bp->fw_minor, bp->fw_rev);
+
+ return rc;
+ }
+@@ -13311,16 +13322,11 @@ static int bnx2x_check_firmware(struct bnx2x *bp)
+ /* Check FW version */
+ offset = be32_to_cpu(fw_hdr->fw_version.offset);
+ fw_ver = firmware->data + offset;
+- if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
+- (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
+- (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
+- (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
++ if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
++ fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
+ BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n",
+- fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
+- BCM_5710_FW_MAJOR_VERSION,
+- BCM_5710_FW_MINOR_VERSION,
+- BCM_5710_FW_REVISION_VERSION,
+- BCM_5710_FW_ENGINEERING_VERSION);
++ fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
++ bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
+ return -EINVAL;
+ }
+
+@@ -13400,32 +13406,49 @@ do { \
+
+ static int bnx2x_init_firmware(struct bnx2x *bp)
+ {
+- const char *fw_file_name;
++ const char *fw_file_name, *fw_file_name_v15;
+ struct bnx2x_fw_file_hdr *fw_hdr;
+ int rc;
+
+ if (bp->firmware)
+ return 0;
+
+- if (CHIP_IS_E1(bp))
++ if (CHIP_IS_E1(bp)) {
+ fw_file_name = FW_FILE_NAME_E1;
+- else if (CHIP_IS_E1H(bp))
++ fw_file_name_v15 = FW_FILE_NAME_E1_V15;
++ } else if (CHIP_IS_E1H(bp)) {
+ fw_file_name = FW_FILE_NAME_E1H;
+- else if (!CHIP_IS_E1x(bp))
++ fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
++ } else if (!CHIP_IS_E1x(bp)) {
+ fw_file_name = FW_FILE_NAME_E2;
+- else {
++ fw_file_name_v15 = FW_FILE_NAME_E2_V15;
++ } else {
+ BNX2X_ERR("Unsupported chip revision\n");
+ return -EINVAL;
+ }
++
+ BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
+
+ rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
+ if (rc) {
+- BNX2X_ERR("Can't load firmware file %s\n",
+- fw_file_name);
+- goto request_firmware_exit;
++ BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
++
++ /* try to load prev version */
++ rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev);
++
++ if (rc)
++ goto request_firmware_exit;
++
++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
++ } else {
++ bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
+ }
+
++ bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
++ bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
++ bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
++
+ rc = bnx2x_check_firmware(bp);
+ if (rc) {
+ BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
+@@ -14135,10 +14158,6 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
+
+ /* Stop Tx */
+ bnx2x_tx_disable(bp);
+- /* Delete all NAPI objects */
+- bnx2x_del_all_napi(bp);
+- if (CNIC_LOADED(bp))
+- bnx2x_del_all_napi_cnic(bp);
+ netdev_reset_tc(bp->dev);
+
+ del_timer_sync(&bp->timer);
+@@ -14243,6 +14262,11 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
+ bnx2x_drain_tx_queues(bp);
+ bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
+ bnx2x_netif_stop(bp, 1);
++ bnx2x_del_all_napi(bp);
++
++ if (CNIC_LOADED(bp))
++ bnx2x_del_all_napi_cnic(bp);
++
+ bnx2x_free_irq(bp);
+
+ /* Report UNLOAD_DONE to MCP */
+@@ -14293,11 +14317,16 @@ static void bnx2x_io_resume(struct pci_dev *pdev)
+ bp->fw_seq = SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
+ DRV_MSG_SEQ_NUMBER_MASK;
+
+- if (netif_running(dev))
+- bnx2x_nic_load(bp, LOAD_NORMAL);
++ if (netif_running(dev)) {
++ if (bnx2x_nic_load(bp, LOAD_NORMAL)) {
++ netdev_err(bp->dev, "Error during driver initialization, try unloading/reloading the driver\n");
++ goto done;
++ }
++ }
+
+ netif_device_attach(dev);
+
++done:
+ rtnl_unlock();
+ }
+
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+index 6fbf735fca31c..a9f202bbada1b 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
+
+ void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
+ {
++ u16 abs_fid;
++
++ abs_fid = FW_VF_HANDLE(abs_vfid);
++
+ /* set the VF-PF association in the FW */
+- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
+- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
++ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
++ storm_memset_func_en(bp, abs_fid, 1);
++
++ /* Invalidate fp_hsi version for vfs */
++ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
++ REG_WR8(bp, BAR_XSTRORM_INTMEM +
++ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
+
+ /* clear vf errors*/
+ bnx2x_vf_semi_clear_err(bp, abs_vfid);
+@@ -786,16 +795,20 @@ static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf)
+
+ static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid)
+ {
+- struct pci_dev *dev;
+ struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid);
++ struct pci_dev *dev;
++ bool pending;
+
+ if (!vf)
+ return false;
+
+ dev = pci_get_domain_bus_and_slot(vf->domain, vf->bus, vf->devfn);
+- if (dev)
+- return bnx2x_is_pcie_pending(dev);
+- return false;
++ if (!dev)
++ return false;
++ pending = bnx2x_is_pcie_pending(dev);
++ pci_dev_put(dev);
++
++ return pending;
+ }
+
+ int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid)
+diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
+index c6ef7ec2c1151..2bc2b707d6eee 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
++++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
+@@ -1,6 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ obj-$(CONFIG_BNXT) += bnxt_en.o
+
+-bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
++bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o bnxt_coredump.o
+ bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
+ bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 62f84cc91e4d1..931bb40ac05b5 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -223,12 +223,12 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
+ { PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 },
+ { PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 },
+ { PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 },
+- { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR },
++ { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57502_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR },
+- { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR },
+- { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR },
++ { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57508_NPAR },
++ { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57502_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR },
+- { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR },
++ { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57508_NPAR },
+ { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
+ { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
+ #ifdef CONFIG_BNXT_SRIOV
+@@ -709,7 +709,6 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+
+ for (i = 0; i < nr_pkts; i++) {
+ struct bnxt_sw_tx_bd *tx_buf;
+- bool compl_deferred = false;
+ struct sk_buff *skb;
+ int j, last;
+
+@@ -718,6 +717,8 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+ skb = tx_buf->skb;
+ tx_buf->skb = NULL;
+
++ tx_bytes += skb->len;
++
+ if (tx_buf->is_push) {
+ tx_buf->is_push = 0;
+ goto next_tx_int;
+@@ -738,8 +739,9 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+ }
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
++ /* PTP worker takes ownership of the skb */
+ if (!bnxt_get_tx_ts_p5(bp, skb))
+- compl_deferred = true;
++ skb = NULL;
+ else
+ atomic_inc(&bp->ptp_cfg->tx_avail);
+ }
+@@ -748,9 +750,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+ next_tx_int:
+ cons = NEXT_TX(cons);
+
+- tx_bytes += skb->len;
+- if (!compl_deferred)
+- dev_kfree_skb_any(skb);
++ dev_kfree_skb_any(skb);
+ }
+
+ netdev_tx_completed_queue(txq, nr_pkts, tx_bytes);
+@@ -2699,6 +2699,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
+ u32 idx = le32_to_cpu(nqcmp->cq_handle_low);
+ struct bnxt_cp_ring_info *cpr2;
+
++ /* No more budget for RX work */
++ if (budget && work_done >= budget && idx == BNXT_RX_HDL)
++ break;
++
+ cpr2 = cpr->cp_ring_arr[idx];
+ work_done += __bnxt_poll_work(bp, cpr2,
+ budget - work_done);
+@@ -2995,7 +2999,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
+
+ static void bnxt_free_tpa_info(struct bnxt *bp)
+ {
+- int i;
++ int i, j;
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+@@ -3003,8 +3007,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
+ kfree(rxr->rx_tpa_idx_map);
+ rxr->rx_tpa_idx_map = NULL;
+ if (rxr->rx_tpa) {
+- kfree(rxr->rx_tpa[0].agg_arr);
+- rxr->rx_tpa[0].agg_arr = NULL;
++ for (j = 0; j < bp->max_tpa; j++) {
++ kfree(rxr->rx_tpa[j].agg_arr);
++ rxr->rx_tpa[j].agg_arr = NULL;
++ }
+ }
+ kfree(rxr->rx_tpa);
+ rxr->rx_tpa = NULL;
+@@ -3013,14 +3019,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
+
+ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+ {
+- int i, j, total_aggs = 0;
++ int i, j;
+
+ bp->max_tpa = MAX_TPA;
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (!bp->max_tpa_v2)
+ return 0;
+ bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
+- total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
+ }
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+@@ -3034,12 +3039,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ continue;
+- agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
+- rxr->rx_tpa[0].agg_arr = agg;
+- if (!agg)
+- return -ENOMEM;
+- for (j = 1; j < bp->max_tpa; j++)
+- rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
++ for (j = 0; j < bp->max_tpa; j++) {
++ agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
++ if (!agg)
++ return -ENOMEM;
++ rxr->rx_tpa[j].agg_arr = agg;
++ }
+ rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
+ GFP_KERNEL);
+ if (!rxr->rx_tpa_idx_map)
+@@ -3234,6 +3239,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
+ }
+ qidx = bp->tc_to_qidx[j];
+ ring->queue_id = bp->q_info[qidx].queue_id;
++ spin_lock_init(&txr->xdp_tx_lock);
+ if (i < bp->tx_nr_rings_xdp)
+ continue;
+ if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
+@@ -4757,8 +4763,10 @@ static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id)
+ return rc;
+
+ req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+- req->num_mc_entries = cpu_to_le32(vnic->mc_list_count);
+- req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
++ if (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST) {
++ req->num_mc_entries = cpu_to_le32(vnic->mc_list_count);
++ req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
++ }
+ req->mask = cpu_to_le32(vnic->rx_mask);
+ return hwrm_req_send_silent(bp, req);
+ }
+@@ -7429,7 +7437,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
+ u8 flags;
+ int rc;
+
+- if (bp->hwrm_spec_code < 0x10801) {
++ if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_THOR(bp)) {
+ rc = -ENODEV;
+ goto no_ptp;
+ }
+@@ -8004,6 +8012,12 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
+ bp->hwrm_cmd_timeout = le16_to_cpu(resp->def_req_timeout);
+ if (!bp->hwrm_cmd_timeout)
+ bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT;
++ bp->hwrm_cmd_max_timeout = le16_to_cpu(resp->max_req_timeout) * 1000;
++ if (!bp->hwrm_cmd_max_timeout)
++ bp->hwrm_cmd_max_timeout = HWRM_CMD_MAX_TIMEOUT;
++ else if (bp->hwrm_cmd_max_timeout > HWRM_CMD_MAX_TIMEOUT)
++ netdev_warn(bp->dev, "Device requests max timeout of %d seconds, may trigger hung task watchdog\n",
++ bp->hwrm_cmd_max_timeout / 1000);
+
+ if (resp->hwrm_intf_maj_8b >= 1) {
+ bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len);
+@@ -8581,6 +8595,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
+ goto err_out;
+ }
+
++ if (BNXT_VF(bp))
++ bnxt_hwrm_func_qcfg(bp);
++
+ rc = bnxt_setup_vnic(bp, 0);
+ if (rc)
+ goto err_out;
+@@ -8609,6 +8626,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
+ vnic->uc_filter_count = 1;
+
+ vnic->rx_mask = 0;
++ if (test_bit(BNXT_STATE_HALF_OPEN, &bp->state))
++ goto skip_rx_mask;
++
+ if (bp->dev->flags & IFF_BROADCAST)
+ vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
+
+@@ -8618,7 +8638,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
+ if (bp->dev->flags & IFF_ALLMULTI) {
+ vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
+ vnic->mc_list_count = 0;
+- } else {
++ } else if (bp->dev->flags & IFF_MULTICAST) {
+ u32 mask = 0;
+
+ bnxt_mc_list_updated(bp, &mask);
+@@ -8629,6 +8649,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
+ if (rc)
+ goto err_out;
+
++skip_rx_mask:
+ rc = bnxt_hwrm_set_coal(bp);
+ if (rc)
+ netdev_warn(bp->dev, "HWRM set coalescing failure rc: %x\n",
+@@ -9006,10 +9027,14 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
+ netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
+ return rc;
+ }
+- if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) {
++ if (tcs && (bp->tx_nr_rings_per_tc * tcs !=
++ bp->tx_nr_rings - bp->tx_nr_rings_xdp)) {
+ netdev_err(bp->dev, "tx ring reservation failure\n");
+ netdev_reset_tc(bp->dev);
+- bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
++ if (bp->tx_nr_rings_xdp)
++ bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp;
++ else
++ bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+ return -ENOMEM;
+ }
+ return 0;
+@@ -9789,7 +9814,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
+
+ if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE)
+ resc_reinit = true;
+- if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE)
++ if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE ||
++ test_bit(BNXT_STATE_FW_RESET_DET, &bp->state))
+ fw_reset = true;
+ else if (bp->fw_health && !bp->fw_health->status_reliable)
+ bnxt_try_map_fw_health_reg(bp);
+@@ -10234,6 +10260,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
+ if (irq_re_init)
+ udp_tunnel_nic_reset_ntf(bp->dev);
+
++ if (bp->tx_nr_rings_xdp < num_possible_cpus()) {
++ if (!static_key_enabled(&bnxt_xdp_locking_key))
++ static_branch_enable(&bnxt_xdp_locking_key);
++ } else if (static_key_enabled(&bnxt_xdp_locking_key)) {
++ static_branch_disable(&bnxt_xdp_locking_key);
++ }
+ set_bit(BNXT_STATE_OPEN, &bp->state);
+ bnxt_enable_int(bp);
+ /* Enable TX queues */
+@@ -10289,13 +10321,15 @@ int bnxt_half_open_nic(struct bnxt *bp)
+ goto half_open_err;
+ }
+
+- rc = bnxt_alloc_mem(bp, false);
++ rc = bnxt_alloc_mem(bp, true);
+ if (rc) {
+ netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc);
+ goto half_open_err;
+ }
+- rc = bnxt_init_nic(bp, false);
++ set_bit(BNXT_STATE_HALF_OPEN, &bp->state);
++ rc = bnxt_init_nic(bp, true);
+ if (rc) {
++ clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
+ netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc);
+ goto half_open_err;
+ }
+@@ -10303,7 +10337,7 @@ int bnxt_half_open_nic(struct bnxt *bp)
+
+ half_open_err:
+ bnxt_free_skbs(bp);
+- bnxt_free_mem(bp, false);
++ bnxt_free_mem(bp, true);
+ dev_close(bp->dev);
+ return rc;
+ }
+@@ -10313,9 +10347,10 @@ half_open_err:
+ */
+ void bnxt_half_close_nic(struct bnxt *bp)
+ {
+- bnxt_hwrm_resource_free(bp, false, false);
++ bnxt_hwrm_resource_free(bp, false, true);
+ bnxt_free_skbs(bp);
+- bnxt_free_mem(bp, false);
++ bnxt_free_mem(bp, true);
++ clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
+ }
+
+ static void bnxt_reenable_sriov(struct bnxt *bp)
+@@ -10731,7 +10766,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
+ if (dev->flags & IFF_ALLMULTI) {
+ mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
+ vnic->mc_list_count = 0;
+- } else {
++ } else if (dev->flags & IFF_MULTICAST) {
+ mc_update = bnxt_mc_list_updated(bp, &mask);
+ }
+
+@@ -10799,9 +10834,10 @@ skip_uc:
+ !bnxt_promisc_ok(bp))
+ vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
+ rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+- if (rc && vnic->mc_list_count) {
++ if (rc && (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST)) {
+ netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
+ rc);
++ vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_MCAST;
+ vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
+ vnic->mc_list_count = 0;
+ rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+@@ -10858,7 +10894,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
+
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+ return bnxt_rfs_supported(bp);
+- if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
++ if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
+ return false;
+
+ vnics = 1 + bp->rx_nr_rings;
+@@ -12577,8 +12613,8 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(fltr, head, hash) {
+ if (bnxt_fltr_match(fltr, new_fltr)) {
++ rc = fltr->sw_id;
+ rcu_read_unlock();
+- rc = 0;
+ goto err_free;
+ }
+ }
+@@ -12665,26 +12701,37 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp)
+
+ #endif /* CONFIG_RFS_ACCEL */
+
+-static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
++static int bnxt_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
++ unsigned int entry, struct udp_tunnel_info *ti)
+ {
+ struct bnxt *bp = netdev_priv(netdev);
+- struct udp_tunnel_info ti;
+ unsigned int cmd;
+
+- udp_tunnel_nic_get_port(netdev, table, 0, &ti);
+- if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
++ if (ti->type == UDP_TUNNEL_TYPE_VXLAN)
+ cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
+ else
+ cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
+
+- if (ti.port)
+- return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd);
++ return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti->port, cmd);
++}
++
++static int bnxt_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
++ unsigned int entry, struct udp_tunnel_info *ti)
++{
++ struct bnxt *bp = netdev_priv(netdev);
++ unsigned int cmd;
++
++ if (ti->type == UDP_TUNNEL_TYPE_VXLAN)
++ cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
++ else
++ cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
+
+ return bnxt_hwrm_tunnel_dst_port_free(bp, cmd);
+ }
+
+ static const struct udp_tunnel_nic_info bnxt_udp_tunnels = {
+- .sync_table = bnxt_udp_tunnel_sync,
++ .set_port = bnxt_udp_tunnel_set_port,
++ .unset_port = bnxt_udp_tunnel_unset_port,
+ .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
+ UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
+ .tables = {
+@@ -13064,10 +13111,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
+ goto init_dflt_ring_err;
+
+ bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+- if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
+- bp->flags |= BNXT_FLAG_RFS;
+- bp->dev->features |= NETIF_F_NTUPLE;
+- }
++
++ bnxt_set_dflt_rfs(bp);
++
+ init_dflt_ring_err:
+ bnxt_ulp_irq_restart(bp, rc);
+ return rc;
+@@ -13370,7 +13416,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ }
+
+ bnxt_inv_fw_health_reg(bp);
+- bnxt_dl_register(bp);
++ rc = bnxt_dl_register(bp);
++ if (rc)
++ goto init_err_dl;
+
+ rc = register_netdev(dev);
+ if (rc)
+@@ -13390,6 +13438,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ init_err_cleanup:
+ bnxt_dl_unregister(bp);
++init_err_dl:
+ bnxt_shutdown_tc(bp);
+ bnxt_clear_int_mode(bp);
+
+@@ -13667,8 +13716,16 @@ static struct pci_driver bnxt_pci_driver = {
+
+ static int __init bnxt_init(void)
+ {
++ int err;
++
+ bnxt_debug_init();
+- return pci_register_driver(&bnxt_pci_driver);
++ err = pci_register_driver(&bnxt_pci_driver);
++ if (err) {
++ bnxt_debug_exit();
++ return err;
++ }
++
++ return 0;
+ }
+
+ static void __exit bnxt_exit(void)
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+index 19fe6478e9b4b..ae4695fc067d5 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -584,7 +584,8 @@ struct nqe_cn {
+ #define BNXT_MAX_MTU 9500
+ #define BNXT_MAX_PAGE_MODE_MTU \
+ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \
+- XDP_PACKET_HEADROOM)
++ XDP_PACKET_HEADROOM - \
++ SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)))
+
+ #define BNXT_MIN_PKT_SIZE 52
+
+@@ -791,6 +792,8 @@ struct bnxt_tx_ring_info {
+ u32 dev_state;
+
+ struct bnxt_ring_struct tx_ring_struct;
++ /* Synchronize simultaneous xdp_xmit on same ring */
++ spinlock_t xdp_tx_lock;
+ };
+
+ #define BNXT_LEGACY_COAL_CMPL_PARAMS \
+@@ -1199,6 +1202,7 @@ struct bnxt_link_info {
+ #define BNXT_LINK_SPEED_40GB PORT_PHY_QCFG_RESP_LINK_SPEED_40GB
+ #define BNXT_LINK_SPEED_50GB PORT_PHY_QCFG_RESP_LINK_SPEED_50GB
+ #define BNXT_LINK_SPEED_100GB PORT_PHY_QCFG_RESP_LINK_SPEED_100GB
++#define BNXT_LINK_SPEED_200GB PORT_PHY_QCFG_RESP_LINK_SPEED_200GB
+ u16 support_speeds;
+ u16 support_pam4_speeds;
+ u16 auto_link_speeds; /* fw adv setting */
+@@ -1840,6 +1844,7 @@ struct bnxt {
+ #define BNXT_STATE_DRV_REGISTERED 7
+ #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8
+ #define BNXT_STATE_NAPI_DISABLED 9
++#define BNXT_STATE_HALF_OPEN 15 /* For offline ethtool tests */
+
+ #define BNXT_NO_FW_ACCESS(bp) \
+ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \
+@@ -1901,7 +1906,8 @@ struct bnxt {
+
+ u16 hwrm_max_req_len;
+ u16 hwrm_max_ext_req_len;
+- int hwrm_cmd_timeout;
++ unsigned int hwrm_cmd_timeout;
++ unsigned int hwrm_cmd_max_timeout;
+ struct mutex hwrm_cmd_lock; /* serialize hwrm messages */
+ struct hwrm_ver_get_output ver_resp;
+ #define FW_VER_STR_LEN 32
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
+new file mode 100644
+index 0000000000000..156f76bcea7eb
+--- /dev/null
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
+@@ -0,0 +1,372 @@
++/* Broadcom NetXtreme-C/E network driver.
++ *
++ * Copyright (c) 2021 Broadcom Limited
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation.
++ */
++
++#include <linux/types.h>
++#include <linux/errno.h>
++#include <linux/pci.h>
++#include "bnxt_hsi.h"
++#include "bnxt.h"
++#include "bnxt_hwrm.h"
++#include "bnxt_coredump.h"
++
++static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg,
++ struct bnxt_hwrm_dbg_dma_info *info)
++{
++ struct hwrm_dbg_cmn_input *cmn_req = msg;
++ __le16 *seq_ptr = msg + info->seq_off;
++ struct hwrm_dbg_cmn_output *cmn_resp;
++ u16 seq = 0, len, segs_off;
++ dma_addr_t dma_handle;
++ void *dma_buf, *resp;
++ int rc, off = 0;
++
++ dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle);
++ if (!dma_buf) {
++ hwrm_req_drop(bp, msg);
++ return -ENOMEM;
++ }
++
++ hwrm_req_timeout(bp, msg, bp->hwrm_cmd_max_timeout);
++ cmn_resp = hwrm_req_hold(bp, msg);
++ resp = cmn_resp;
++
++ segs_off = offsetof(struct hwrm_dbg_coredump_list_output,
++ total_segments);
++ cmn_req->host_dest_addr = cpu_to_le64(dma_handle);
++ cmn_req->host_buf_len = cpu_to_le32(info->dma_len);
++ while (1) {
++ *seq_ptr = cpu_to_le16(seq);
++ rc = hwrm_req_send(bp, msg);
++ if (rc)
++ break;
++
++ len = le16_to_cpu(*((__le16 *)(resp + info->data_len_off)));
++ if (!seq &&
++ cmn_req->req_type == cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) {
++ info->segs = le16_to_cpu(*((__le16 *)(resp +
++ segs_off)));
++ if (!info->segs) {
++ rc = -EIO;
++ break;
++ }
++
++ info->dest_buf_size = info->segs *
++ sizeof(struct coredump_segment_record);
++ info->dest_buf = kmalloc(info->dest_buf_size,
++ GFP_KERNEL);
++ if (!info->dest_buf) {
++ rc = -ENOMEM;
++ break;
++ }
++ }
++
++ if (info->dest_buf) {
++ if ((info->seg_start + off + len) <=
++ BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
++ memcpy(info->dest_buf + off, dma_buf, len);
++ } else {
++ rc = -ENOBUFS;
++ break;
++ }
++ }
++
++ if (cmn_req->req_type ==
++ cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
++ info->dest_buf_size += len;
++
++ if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE))
++ break;
++
++ seq++;
++ off += len;
++ }
++ hwrm_req_drop(bp, msg);
++ return rc;
++}
++
++static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
++ struct bnxt_coredump *coredump)
++{
++ struct bnxt_hwrm_dbg_dma_info info = {NULL};
++ struct hwrm_dbg_coredump_list_input *req;
++ int rc;
++
++ rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST);
++ if (rc)
++ return rc;
++
++ info.dma_len = COREDUMP_LIST_BUF_LEN;
++ info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no);
++ info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output,
++ data_len);
++
++ rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
++ if (!rc) {
++ coredump->data = info.dest_buf;
++ coredump->data_size = info.dest_buf_size;
++ coredump->total_segs = info.segs;
++ }
++ return rc;
++}
++
++static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
++ u16 segment_id)
++{
++ struct hwrm_dbg_coredump_initiate_input *req;
++ int rc;
++
++ rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE);
++ if (rc)
++ return rc;
++
++ hwrm_req_timeout(bp, req, bp->hwrm_cmd_max_timeout);
++ req->component_id = cpu_to_le16(component_id);
++ req->segment_id = cpu_to_le16(segment_id);
++
++ return hwrm_req_send(bp, req);
++}
++
++static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
++ u16 segment_id, u32 *seg_len,
++ void *buf, u32 buf_len, u32 offset)
++{
++ struct hwrm_dbg_coredump_retrieve_input *req;
++ struct bnxt_hwrm_dbg_dma_info info = {NULL};
++ int rc;
++
++ rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE);
++ if (rc)
++ return rc;
++
++ req->component_id = cpu_to_le16(component_id);
++ req->segment_id = cpu_to_le16(segment_id);
++
++ info.dma_len = COREDUMP_RETRIEVE_BUF_LEN;
++ info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input,
++ seq_no);
++ info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
++ data_len);
++ if (buf) {
++ info.dest_buf = buf + offset;
++ info.buf_len = buf_len;
++ info.seg_start = offset;
++ }
++
++ rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
++ if (!rc)
++ *seg_len = info.dest_buf_size;
++
++ return rc;
++}
++
++static void
++bnxt_fill_coredump_seg_hdr(struct bnxt *bp,
++ struct bnxt_coredump_segment_hdr *seg_hdr,
++ struct coredump_segment_record *seg_rec, u32 seg_len,
++ int status, u32 duration, u32 instance)
++{
++ memset(seg_hdr, 0, sizeof(*seg_hdr));
++ memcpy(seg_hdr->signature, "sEgM", 4);
++ if (seg_rec) {
++ seg_hdr->component_id = (__force __le32)seg_rec->component_id;
++ seg_hdr->segment_id = (__force __le32)seg_rec->segment_id;
++ seg_hdr->low_version = seg_rec->version_low;
++ seg_hdr->high_version = seg_rec->version_hi;
++ } else {
++ /* For hwrm_ver_get response Component id = 2
++ * and Segment id = 0
++ */
++ seg_hdr->component_id = cpu_to_le32(2);
++ seg_hdr->segment_id = 0;
++ }
++ seg_hdr->function_id = cpu_to_le16(bp->pdev->devfn);
++ seg_hdr->length = cpu_to_le32(seg_len);
++ seg_hdr->status = cpu_to_le32(status);
++ seg_hdr->duration = cpu_to_le32(duration);
++ seg_hdr->data_offset = cpu_to_le32(sizeof(*seg_hdr));
++ seg_hdr->instance = cpu_to_le32(instance);
++}
++
++static void
++bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
++ time64_t start, s16 start_utc, u16 total_segs,
++ int status)
++{
++ time64_t end = ktime_get_real_seconds();
++ u32 os_ver_major = 0, os_ver_minor = 0;
++ struct tm tm;
++
++ time64_to_tm(start, 0, &tm);
++ memset(record, 0, sizeof(*record));
++ memcpy(record->signature, "cOrE", 4);
++ record->flags = 0;
++ record->low_version = 0;
++ record->high_version = 1;
++ record->asic_state = 0;
++ strscpy(record->system_name, utsname()->nodename,
++ sizeof(record->system_name));
++ record->year = cpu_to_le16(tm.tm_year + 1900);
++ record->month = cpu_to_le16(tm.tm_mon + 1);
++ record->day = cpu_to_le16(tm.tm_mday);
++ record->hour = cpu_to_le16(tm.tm_hour);
++ record->minute = cpu_to_le16(tm.tm_min);
++ record->second = cpu_to_le16(tm.tm_sec);
++ record->utc_bias = cpu_to_le16(start_utc);
++ strcpy(record->commandline, "ethtool -w");
++ record->total_segments = cpu_to_le32(total_segs);
++
++ if (sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor) != 2)
++ netdev_warn(bp->dev, "Unknown OS release in coredump\n");
++ record->os_ver_major = cpu_to_le32(os_ver_major);
++ record->os_ver_minor = cpu_to_le32(os_ver_minor);
++
++ strscpy(record->os_name, utsname()->sysname, sizeof(record->os_name));
++ time64_to_tm(end, 0, &tm);
++ record->end_year = cpu_to_le16(tm.tm_year + 1900);
++ record->end_month = cpu_to_le16(tm.tm_mon + 1);
++ record->end_day = cpu_to_le16(tm.tm_mday);
++ record->end_hour = cpu_to_le16(tm.tm_hour);
++ record->end_minute = cpu_to_le16(tm.tm_min);
++ record->end_second = cpu_to_le16(tm.tm_sec);
++ record->end_utc_bias = cpu_to_le16(sys_tz.tz_minuteswest * 60);
++ record->asic_id1 = cpu_to_le32(bp->chip_num << 16 |
++ bp->ver_resp.chip_rev << 8 |
++ bp->ver_resp.chip_metal);
++ record->asic_id2 = 0;
++ record->coredump_status = cpu_to_le32(status);
++ record->ioctl_low_version = 0;
++ record->ioctl_high_version = 0;
++}
++
++static int __bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
++{
++ u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
++ u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
++ struct coredump_segment_record *seg_record = NULL;
++ struct bnxt_coredump_segment_hdr seg_hdr;
++ struct bnxt_coredump coredump = {NULL};
++ time64_t start_time;
++ u16 start_utc;
++ int rc = 0, i;
++
++ if (buf)
++ buf_len = *dump_len;
++
++ start_time = ktime_get_real_seconds();
++ start_utc = sys_tz.tz_minuteswest * 60;
++ seg_hdr_len = sizeof(seg_hdr);
++
++ /* First segment should be hwrm_ver_get response */
++ *dump_len = seg_hdr_len + ver_get_resp_len;
++ if (buf) {
++ bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, ver_get_resp_len,
++ 0, 0, 0);
++ memcpy(buf + offset, &seg_hdr, seg_hdr_len);
++ offset += seg_hdr_len;
++ memcpy(buf + offset, &bp->ver_resp, ver_get_resp_len);
++ offset += ver_get_resp_len;
++ }
++
++ rc = bnxt_hwrm_dbg_coredump_list(bp, &coredump);
++ if (rc) {
++ netdev_err(bp->dev, "Failed to get coredump segment list\n");
++ goto err;
++ }
++
++ *dump_len += seg_hdr_len * coredump.total_segs;
++
++ seg_record = (struct coredump_segment_record *)coredump.data;
++ seg_record_len = sizeof(*seg_record);
++
++ for (i = 0; i < coredump.total_segs; i++) {
++ u16 comp_id = le16_to_cpu(seg_record->component_id);
++ u16 seg_id = le16_to_cpu(seg_record->segment_id);
++ u32 duration = 0, seg_len = 0;
++ unsigned long start, end;
++
++ if (buf && ((offset + seg_hdr_len) >
++ BNXT_COREDUMP_BUF_LEN(buf_len))) {
++ rc = -ENOBUFS;
++ goto err;
++ }
++
++ start = jiffies;
++
++ rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
++ if (rc) {
++ netdev_err(bp->dev,
++ "Failed to initiate coredump for seg = %d\n",
++ seg_record->segment_id);
++ goto next_seg;
++ }
++
++ /* Write segment data into the buffer */
++ rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
++ &seg_len, buf, buf_len,
++ offset + seg_hdr_len);
++ if (rc && rc == -ENOBUFS)
++ goto err;
++ else if (rc)
++ netdev_err(bp->dev,
++ "Failed to retrieve coredump for seg = %d\n",
++ seg_record->segment_id);
++
++next_seg:
++ end = jiffies;
++ duration = jiffies_to_msecs(end - start);
++ bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, seg_record, seg_len,
++ rc, duration, 0);
++
++ if (buf) {
++ /* Write segment header into the buffer */
++ memcpy(buf + offset, &seg_hdr, seg_hdr_len);
++ offset += seg_hdr_len + seg_len;
++ }
++
++ *dump_len += seg_len;
++ seg_record =
++ (struct coredump_segment_record *)((u8 *)seg_record +
++ seg_record_len);
++ }
++
++err:
++ if (buf)
++ bnxt_fill_coredump_record(bp, buf + offset, start_time,
++ start_utc, coredump.total_segs + 1,
++ rc);
++ kfree(coredump.data);
++ *dump_len += sizeof(struct bnxt_coredump_record);
++ if (rc == -ENOBUFS)
++ netdev_err(bp->dev, "Firmware returned large coredump buffer\n");
++ return rc;
++}
++
++int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len)
++{
++ if (dump_type == BNXT_DUMP_CRASH) {
++#ifdef CONFIG_TEE_BNXT_FW
++ return tee_bnxt_copy_coredump(buf, 0, *dump_len);
++#else
++ return -EOPNOTSUPP;
++#endif
++ } else {
++ return __bnxt_get_coredump(bp, buf, dump_len);
++ }
++}
++
++u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type)
++{
++ u32 len = 0;
++
++ if (dump_type == BNXT_DUMP_CRASH)
++ len = BNXT_CRASH_DUMP_LEN;
++ else
++ __bnxt_get_coredump(bp, NULL, &len);
++ return len;
++}
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
+index 09c22f8fe3991..b1a1b2fffb194 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
+@@ -10,6 +10,10 @@
+ #ifndef BNXT_COREDUMP_H
+ #define BNXT_COREDUMP_H
+
++#include <linux/utsname.h>
++#include <linux/time.h>
++#include <linux/rtc.h>
++
+ struct bnxt_coredump_segment_hdr {
+ __u8 signature[4];
+ __le32 component_id;
+@@ -63,4 +67,51 @@ struct bnxt_coredump_record {
+ __u8 ioctl_high_version;
+ __le16 rsvd3[313];
+ };
++
++#define BNXT_CRASH_DUMP_LEN (8 << 20)
++
++#define COREDUMP_LIST_BUF_LEN 2048
++#define COREDUMP_RETRIEVE_BUF_LEN 4096
++
++struct bnxt_coredump {
++ void *data;
++ int data_size;
++ u16 total_segs;
++};
++
++#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
++
++struct bnxt_hwrm_dbg_dma_info {
++ void *dest_buf;
++ int dest_buf_size;
++ u16 dma_len;
++ u16 seq_off;
++ u16 data_len_off;
++ u16 segs;
++ u32 seg_start;
++ u32 buf_len;
++};
++
++struct hwrm_dbg_cmn_input {
++ __le16 req_type;
++ __le16 cmpl_ring;
++ __le16 seq_id;
++ __le16 target_id;
++ __le64 resp_addr;
++ __le64 host_dest_addr;
++ __le32 host_buf_len;
++};
++
++struct hwrm_dbg_cmn_output {
++ __le16 error_code;
++ __le16 req_type;
++ __le16 seq_id;
++ __le16 resp_len;
++ u8 flags;
++ #define HWRM_DBG_CMN_FLAGS_MORE 1
++};
++
++int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len);
++u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type);
++
+ #endif
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+index 9576547df4aba..2a80882971e3d 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -134,7 +134,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
+ {
+ struct bnxt_fw_health *health = bp->fw_health;
+
+- if (!bp->dl || !health)
++ if (!health)
+ return;
+
+ if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter)
+@@ -188,7 +188,7 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
+ {
+ struct bnxt_fw_health *health = bp->fw_health;
+
+- if (!bp->dl || !health)
++ if (!health)
+ return;
+
+ if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) &&
+@@ -781,6 +781,7 @@ int bnxt_dl_register(struct bnxt *bp)
+ {
+ const struct devlink_ops *devlink_ops;
+ struct devlink_port_attrs attrs = {};
++ struct bnxt_dl *bp_dl;
+ struct devlink *dl;
+ int rc;
+
+@@ -795,7 +796,9 @@ int bnxt_dl_register(struct bnxt *bp)
+ return -ENOMEM;
+ }
+
+- bnxt_link_bp_to_dl(bp, dl);
++ bp->dl = dl;
++ bp_dl = devlink_priv(dl);
++ bp_dl->bp = bp;
+
+ /* Add switchdev eswitch mode setting, if SRIOV supported */
+ if (pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV) &&
+@@ -833,7 +836,6 @@ err_dl_port_unreg:
+ err_dl_unreg:
+ devlink_unregister(dl);
+ err_dl_free:
+- bnxt_link_bp_to_dl(bp, NULL);
+ devlink_free(dl);
+ return rc;
+ }
+@@ -842,9 +844,6 @@ void bnxt_dl_unregister(struct bnxt *bp)
+ {
+ struct devlink *dl = bp->dl;
+
+- if (!dl)
+- return;
+-
+ if (BNXT_PF(bp)) {
+ bnxt_dl_params_unregister(bp);
+ devlink_port_unregister(&bp->dl_port);
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+index d889f240da2b2..406dc655a5fc9 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+@@ -20,19 +20,6 @@ static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
+ return ((struct bnxt_dl *)devlink_priv(dl))->bp;
+ }
+
+-/* To clear devlink pointer from bp, pass NULL dl */
+-static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
+-{
+- bp->dl = dl;
+-
+- /* add a back pointer in dl to bp */
+- if (dl) {
+- struct bnxt_dl *bp_dl = devlink_priv(dl);
+-
+- bp_dl->bp = bp;
+- }
+-}
+-
+ #define NVM_OFF_MSIX_VEC_PER_PF_MAX 108
+ #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114
+ #define NVM_OFF_IGNORE_ARI 164
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+index 7260910e75fb2..8ebc1c522a05b 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+@@ -25,15 +25,13 @@
+ #include "bnxt_hsi.h"
+ #include "bnxt.h"
+ #include "bnxt_hwrm.h"
++#include "bnxt_ulp.h"
+ #include "bnxt_xdp.h"
+ #include "bnxt_ptp.h"
+ #include "bnxt_ethtool.h"
+ #include "bnxt_nvm_defs.h" /* NVRAM content constant and structure defs */
+ #include "bnxt_fw_hdr.h" /* Firmware hdr constant and structure defs */
+ #include "bnxt_coredump.h"
+-#define FLASH_NVRAM_TIMEOUT ((HWRM_CMD_TIMEOUT) * 100)
+-#define FLASH_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200)
+-#define INSTALL_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200)
+
+ static u32 bnxt_get_msglevel(struct net_device *dev)
+ {
+@@ -134,7 +132,7 @@ static int bnxt_set_coalesce(struct net_device *dev,
+ }
+
+ reset_coalesce:
+- if (netif_running(dev)) {
++ if (test_bit(BNXT_STATE_OPEN, &bp->state)) {
+ if (update_stats) {
+ rc = bnxt_close_nic(bp, true, false);
+ if (!rc)
+@@ -1672,6 +1670,8 @@ u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
+ return SPEED_50000;
+ case BNXT_LINK_SPEED_100GB:
+ return SPEED_100000;
++ case BNXT_LINK_SPEED_200GB:
++ return SPEED_200000;
+ default:
+ return SPEED_UNKNOWN;
+ }
+@@ -1945,6 +1945,9 @@ static int bnxt_get_fecparam(struct net_device *dev,
+ case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE:
+ fec->active_fec |= ETHTOOL_FEC_LLRS;
+ break;
++ case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE:
++ fec->active_fec |= ETHTOOL_FEC_OFF;
++ break;
+ }
+ return 0;
+ }
+@@ -2073,9 +2076,7 @@ static int bnxt_set_pauseparam(struct net_device *dev,
+ }
+
+ link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+- if (bp->hwrm_spec_code >= 0x10201)
+- link_info->req_flow_ctrl =
+- PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE;
++ link_info->req_flow_ctrl = 0;
+ } else {
+ /* when transition from auto pause to force pause,
+ * force a link change
+@@ -2167,7 +2168,7 @@ static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
+ req->host_src_addr = cpu_to_le64(dma_handle);
+ }
+
+- hwrm_req_timeout(bp, req, FLASH_NVRAM_TIMEOUT);
++ hwrm_req_timeout(bp, req, bp->hwrm_cmd_max_timeout);
+ req->dir_type = cpu_to_le16(dir_type);
+ req->dir_ordinal = cpu_to_le16(dir_ordinal);
+ req->dir_ext = cpu_to_le16(dir_ext);
+@@ -2508,8 +2509,8 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware
+ return rc;
+ }
+
+- hwrm_req_timeout(bp, modify, FLASH_PACKAGE_TIMEOUT);
+- hwrm_req_timeout(bp, install, INSTALL_PACKAGE_TIMEOUT);
++ hwrm_req_timeout(bp, modify, bp->hwrm_cmd_max_timeout);
++ hwrm_req_timeout(bp, install, bp->hwrm_cmd_max_timeout);
+
+ hwrm_req_hold(bp, modify);
+ modify->host_src_addr = cpu_to_le64(dma_handle);
+@@ -2708,7 +2709,7 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
+ if (rc)
+ return rc;
+
+- buflen = dir_entries * entry_length;
++ buflen = mul_u32_u32(dir_entries, entry_length);
+ buf = hwrm_req_dma_slice(bp, req, buflen, &dma_handle);
+ if (!buf) {
+ hwrm_req_drop(bp, req);
+@@ -3408,7 +3409,7 @@ static int bnxt_run_loopback(struct bnxt *bp)
+ if (!skb)
+ return -ENOMEM;
+ data = skb_put(skb, pkt_size);
+- eth_broadcast_addr(data);
++ ether_addr_copy(&data[i], bp->dev->dev_addr);
+ i += ETH_ALEN;
+ ether_addr_copy(&data[i], bp->dev->dev_addr);
+ i += ETH_ALEN;
+@@ -3502,9 +3503,13 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
+ if (!offline) {
+ bnxt_run_fw_tests(bp, test_mask, &test_results);
+ } else {
+- rc = bnxt_close_nic(bp, false, false);
+- if (rc)
++ bnxt_ulp_stop(bp);
++ rc = bnxt_close_nic(bp, true, false);
++ if (rc) {
++ etest->flags |= ETH_TEST_FL_FAILED;
++ bnxt_ulp_start(bp, rc);
+ return;
++ }
+ bnxt_run_fw_tests(bp, test_mask, &test_results);
+
+ buf[BNXT_MACLPBK_TEST_IDX] = 1;
+@@ -3514,6 +3519,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
+ if (rc) {
+ bnxt_hwrm_mac_loopback(bp, false);
+ etest->flags |= ETH_TEST_FL_FAILED;
++ bnxt_ulp_start(bp, rc);
+ return;
+ }
+ if (bnxt_run_loopback(bp))
+@@ -3539,7 +3545,8 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
+ }
+ bnxt_hwrm_phy_loopback(bp, false, false);
+ bnxt_half_close_nic(bp);
+- rc = bnxt_open_nic(bp, false, true);
++ rc = bnxt_open_nic(bp, true, true);
++ bnxt_ulp_start(bp, rc);
+ }
+ if (rc || bnxt_test_irq(bp)) {
+ buf[BNXT_IRQ_TEST_IDX] = 1;
+@@ -3590,7 +3597,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
+ }
+ }
+
+- if (req & BNXT_FW_RESET_AP) {
++ if (!BNXT_CHIP_P4_PLUS(bp) && (req & BNXT_FW_RESET_AP)) {
+ /* This feature is not supported in older firmware versions */
+ if (bp->hwrm_spec_code >= 0x10803) {
+ if (!bnxt_firmware_reset_ap(dev)) {
+@@ -3609,337 +3616,6 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
+ return 0;
+ }
+
+-static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg,
+- struct bnxt_hwrm_dbg_dma_info *info)
+-{
+- struct hwrm_dbg_cmn_input *cmn_req = msg;
+- __le16 *seq_ptr = msg + info->seq_off;
+- struct hwrm_dbg_cmn_output *cmn_resp;
+- u16 seq = 0, len, segs_off;
+- dma_addr_t dma_handle;
+- void *dma_buf, *resp;
+- int rc, off = 0;
+-
+- dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle);
+- if (!dma_buf) {
+- hwrm_req_drop(bp, msg);
+- return -ENOMEM;
+- }
+-
+- hwrm_req_timeout(bp, msg, HWRM_COREDUMP_TIMEOUT);
+- cmn_resp = hwrm_req_hold(bp, msg);
+- resp = cmn_resp;
+-
+- segs_off = offsetof(struct hwrm_dbg_coredump_list_output,
+- total_segments);
+- cmn_req->host_dest_addr = cpu_to_le64(dma_handle);
+- cmn_req->host_buf_len = cpu_to_le32(info->dma_len);
+- while (1) {
+- *seq_ptr = cpu_to_le16(seq);
+- rc = hwrm_req_send(bp, msg);
+- if (rc)
+- break;
+-
+- len = le16_to_cpu(*((__le16 *)(resp + info->data_len_off)));
+- if (!seq &&
+- cmn_req->req_type == cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) {
+- info->segs = le16_to_cpu(*((__le16 *)(resp +
+- segs_off)));
+- if (!info->segs) {
+- rc = -EIO;
+- break;
+- }
+-
+- info->dest_buf_size = info->segs *
+- sizeof(struct coredump_segment_record);
+- info->dest_buf = kmalloc(info->dest_buf_size,
+- GFP_KERNEL);
+- if (!info->dest_buf) {
+- rc = -ENOMEM;
+- break;
+- }
+- }
+-
+- if (info->dest_buf) {
+- if ((info->seg_start + off + len) <=
+- BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
+- memcpy(info->dest_buf + off, dma_buf, len);
+- } else {
+- rc = -ENOBUFS;
+- break;
+- }
+- }
+-
+- if (cmn_req->req_type ==
+- cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
+- info->dest_buf_size += len;
+-
+- if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE))
+- break;
+-
+- seq++;
+- off += len;
+- }
+- hwrm_req_drop(bp, msg);
+- return rc;
+-}
+-
+-static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
+- struct bnxt_coredump *coredump)
+-{
+- struct bnxt_hwrm_dbg_dma_info info = {NULL};
+- struct hwrm_dbg_coredump_list_input *req;
+- int rc;
+-
+- rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST);
+- if (rc)
+- return rc;
+-
+- info.dma_len = COREDUMP_LIST_BUF_LEN;
+- info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no);
+- info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output,
+- data_len);
+-
+- rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
+- if (!rc) {
+- coredump->data = info.dest_buf;
+- coredump->data_size = info.dest_buf_size;
+- coredump->total_segs = info.segs;
+- }
+- return rc;
+-}
+-
+-static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
+- u16 segment_id)
+-{
+- struct hwrm_dbg_coredump_initiate_input *req;
+- int rc;
+-
+- rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE);
+- if (rc)
+- return rc;
+-
+- hwrm_req_timeout(bp, req, HWRM_COREDUMP_TIMEOUT);
+- req->component_id = cpu_to_le16(component_id);
+- req->segment_id = cpu_to_le16(segment_id);
+-
+- return hwrm_req_send(bp, req);
+-}
+-
+-static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
+- u16 segment_id, u32 *seg_len,
+- void *buf, u32 buf_len, u32 offset)
+-{
+- struct hwrm_dbg_coredump_retrieve_input *req;
+- struct bnxt_hwrm_dbg_dma_info info = {NULL};
+- int rc;
+-
+- rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE);
+- if (rc)
+- return rc;
+-
+- req->component_id = cpu_to_le16(component_id);
+- req->segment_id = cpu_to_le16(segment_id);
+-
+- info.dma_len = COREDUMP_RETRIEVE_BUF_LEN;
+- info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input,
+- seq_no);
+- info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
+- data_len);
+- if (buf) {
+- info.dest_buf = buf + offset;
+- info.buf_len = buf_len;
+- info.seg_start = offset;
+- }
+-
+- rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
+- if (!rc)
+- *seg_len = info.dest_buf_size;
+-
+- return rc;
+-}
+-
+-static void
+-bnxt_fill_coredump_seg_hdr(struct bnxt *bp,
+- struct bnxt_coredump_segment_hdr *seg_hdr,
+- struct coredump_segment_record *seg_rec, u32 seg_len,
+- int status, u32 duration, u32 instance)
+-{
+- memset(seg_hdr, 0, sizeof(*seg_hdr));
+- memcpy(seg_hdr->signature, "sEgM", 4);
+- if (seg_rec) {
+- seg_hdr->component_id = (__force __le32)seg_rec->component_id;
+- seg_hdr->segment_id = (__force __le32)seg_rec->segment_id;
+- seg_hdr->low_version = seg_rec->version_low;
+- seg_hdr->high_version = seg_rec->version_hi;
+- } else {
+- /* For hwrm_ver_get response Component id = 2
+- * and Segment id = 0
+- */
+- seg_hdr->component_id = cpu_to_le32(2);
+- seg_hdr->segment_id = 0;
+- }
+- seg_hdr->function_id = cpu_to_le16(bp->pdev->devfn);
+- seg_hdr->length = cpu_to_le32(seg_len);
+- seg_hdr->status = cpu_to_le32(status);
+- seg_hdr->duration = cpu_to_le32(duration);
+- seg_hdr->data_offset = cpu_to_le32(sizeof(*seg_hdr));
+- seg_hdr->instance = cpu_to_le32(instance);
+-}
+-
+-static void
+-bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
+- time64_t start, s16 start_utc, u16 total_segs,
+- int status)
+-{
+- time64_t end = ktime_get_real_seconds();
+- u32 os_ver_major = 0, os_ver_minor = 0;
+- struct tm tm;
+-
+- time64_to_tm(start, 0, &tm);
+- memset(record, 0, sizeof(*record));
+- memcpy(record->signature, "cOrE", 4);
+- record->flags = 0;
+- record->low_version = 0;
+- record->high_version = 1;
+- record->asic_state = 0;
+- strlcpy(record->system_name, utsname()->nodename,
+- sizeof(record->system_name));
+- record->year = cpu_to_le16(tm.tm_year + 1900);
+- record->month = cpu_to_le16(tm.tm_mon + 1);
+- record->day = cpu_to_le16(tm.tm_mday);
+- record->hour = cpu_to_le16(tm.tm_hour);
+- record->minute = cpu_to_le16(tm.tm_min);
+- record->second = cpu_to_le16(tm.tm_sec);
+- record->utc_bias = cpu_to_le16(start_utc);
+- strcpy(record->commandline, "ethtool -w");
+- record->total_segments = cpu_to_le32(total_segs);
+-
+- sscanf(utsname()->release, "%u.%u", &os_ver_major, &os_ver_minor);
+- record->os_ver_major = cpu_to_le32(os_ver_major);
+- record->os_ver_minor = cpu_to_le32(os_ver_minor);
+-
+- strlcpy(record->os_name, utsname()->sysname, 32);
+- time64_to_tm(end, 0, &tm);
+- record->end_year = cpu_to_le16(tm.tm_year + 1900);
+- record->end_month = cpu_to_le16(tm.tm_mon + 1);
+- record->end_day = cpu_to_le16(tm.tm_mday);
+- record->end_hour = cpu_to_le16(tm.tm_hour);
+- record->end_minute = cpu_to_le16(tm.tm_min);
+- record->end_second = cpu_to_le16(tm.tm_sec);
+- record->end_utc_bias = cpu_to_le16(sys_tz.tz_minuteswest * 60);
+- record->asic_id1 = cpu_to_le32(bp->chip_num << 16 |
+- bp->ver_resp.chip_rev << 8 |
+- bp->ver_resp.chip_metal);
+- record->asic_id2 = 0;
+- record->coredump_status = cpu_to_le32(status);
+- record->ioctl_low_version = 0;
+- record->ioctl_high_version = 0;
+-}
+-
+-static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
+-{
+- u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
+- u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
+- struct coredump_segment_record *seg_record = NULL;
+- struct bnxt_coredump_segment_hdr seg_hdr;
+- struct bnxt_coredump coredump = {NULL};
+- time64_t start_time;
+- u16 start_utc;
+- int rc = 0, i;
+-
+- if (buf)
+- buf_len = *dump_len;
+-
+- start_time = ktime_get_real_seconds();
+- start_utc = sys_tz.tz_minuteswest * 60;
+- seg_hdr_len = sizeof(seg_hdr);
+-
+- /* First segment should be hwrm_ver_get response */
+- *dump_len = seg_hdr_len + ver_get_resp_len;
+- if (buf) {
+- bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, ver_get_resp_len,
+- 0, 0, 0);
+- memcpy(buf + offset, &seg_hdr, seg_hdr_len);
+- offset += seg_hdr_len;
+- memcpy(buf + offset, &bp->ver_resp, ver_get_resp_len);
+- offset += ver_get_resp_len;
+- }
+-
+- rc = bnxt_hwrm_dbg_coredump_list(bp, &coredump);
+- if (rc) {
+- netdev_err(bp->dev, "Failed to get coredump segment list\n");
+- goto err;
+- }
+-
+- *dump_len += seg_hdr_len * coredump.total_segs;
+-
+- seg_record = (struct coredump_segment_record *)coredump.data;
+- seg_record_len = sizeof(*seg_record);
+-
+- for (i = 0; i < coredump.total_segs; i++) {
+- u16 comp_id = le16_to_cpu(seg_record->component_id);
+- u16 seg_id = le16_to_cpu(seg_record->segment_id);
+- u32 duration = 0, seg_len = 0;
+- unsigned long start, end;
+-
+- if (buf && ((offset + seg_hdr_len) >
+- BNXT_COREDUMP_BUF_LEN(buf_len))) {
+- rc = -ENOBUFS;
+- goto err;
+- }
+-
+- start = jiffies;
+-
+- rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
+- if (rc) {
+- netdev_err(bp->dev,
+- "Failed to initiate coredump for seg = %d\n",
+- seg_record->segment_id);
+- goto next_seg;
+- }
+-
+- /* Write segment data into the buffer */
+- rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
+- &seg_len, buf, buf_len,
+- offset + seg_hdr_len);
+- if (rc && rc == -ENOBUFS)
+- goto err;
+- else if (rc)
+- netdev_err(bp->dev,
+- "Failed to retrieve coredump for seg = %d\n",
+- seg_record->segment_id);
+-
+-next_seg:
+- end = jiffies;
+- duration = jiffies_to_msecs(end - start);
+- bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, seg_record, seg_len,
+- rc, duration, 0);
+-
+- if (buf) {
+- /* Write segment header into the buffer */
+- memcpy(buf + offset, &seg_hdr, seg_hdr_len);
+- offset += seg_hdr_len + seg_len;
+- }
+-
+- *dump_len += seg_len;
+- seg_record =
+- (struct coredump_segment_record *)((u8 *)seg_record +
+- seg_record_len);
+- }
+-
+-err:
+- if (buf)
+- bnxt_fill_coredump_record(bp, buf + offset, start_time,
+- start_utc, coredump.total_segs + 1,
+- rc);
+- kfree(coredump.data);
+- *dump_len += sizeof(struct bnxt_coredump_record);
+- if (rc == -ENOBUFS)
+- netdev_err(bp->dev, "Firmware returned large coredump buffer\n");
+- return rc;
+-}
+-
+ static int bnxt_set_dump(struct net_device *dev, struct ethtool_dump *dump)
+ {
+ struct bnxt *bp = netdev_priv(dev);
+@@ -3971,10 +3647,7 @@ static int bnxt_get_dump_flag(struct net_device *dev, struct ethtool_dump *dump)
+ bp->ver_resp.hwrm_fw_rsvd_8b;
+
+ dump->flag = bp->dump_flag;
+- if (bp->dump_flag == BNXT_DUMP_CRASH)
+- dump->len = BNXT_CRASH_DUMP_LEN;
+- else
+- bnxt_get_coredump(bp, NULL, &dump->len);
++ dump->len = bnxt_get_coredump_length(bp, bp->dump_flag);
+ return 0;
+ }
+
+@@ -3989,15 +3662,7 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump,
+ memset(buf, 0, dump->len);
+
+ dump->flag = bp->dump_flag;
+- if (dump->flag == BNXT_DUMP_CRASH) {
+-#ifdef CONFIG_TEE_BNXT_FW
+- return tee_bnxt_copy_coredump(buf, 0, dump->len);
+-#endif
+- } else {
+- return bnxt_get_coredump(bp, buf, &dump->len);
+- }
+-
+- return 0;
++ return bnxt_get_coredump(bp, dump->flag, buf, &dump->len);
+ }
+
+ static int bnxt_get_ts_info(struct net_device *dev,
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+index 0a57cb6a4a4bf..11a719f98defd 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+@@ -22,49 +22,6 @@ struct bnxt_led_cfg {
+ u8 rsvd;
+ };
+
+-#define COREDUMP_LIST_BUF_LEN 2048
+-#define COREDUMP_RETRIEVE_BUF_LEN 4096
+-
+-struct bnxt_coredump {
+- void *data;
+- int data_size;
+- u16 total_segs;
+-};
+-
+-#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
+-
+-struct bnxt_hwrm_dbg_dma_info {
+- void *dest_buf;
+- int dest_buf_size;
+- u16 dma_len;
+- u16 seq_off;
+- u16 data_len_off;
+- u16 segs;
+- u32 seg_start;
+- u32 buf_len;
+-};
+-
+-struct hwrm_dbg_cmn_input {
+- __le16 req_type;
+- __le16 cmpl_ring;
+- __le16 seq_id;
+- __le16 target_id;
+- __le64 resp_addr;
+- __le64 host_dest_addr;
+- __le32 host_buf_len;
+-};
+-
+-struct hwrm_dbg_cmn_output {
+- __le16 error_code;
+- __le16 req_type;
+- __le16 seq_id;
+- __le16 resp_len;
+- u8 flags;
+- #define HWRM_DBG_CMN_FLAGS_MORE 1
+-};
+-
+-#define BNXT_CRASH_DUMP_LEN (8 << 20)
+-
+ #define BNXT_LED_DFLT_ENA \
+ (PORT_LED_CFG_REQ_ENABLES_LED0_ID | \
+ PORT_LED_CFG_REQ_ENABLES_LED0_STATE | \
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
+index bb7327b82d0b2..3a0eeb3737767 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
+@@ -496,7 +496,7 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
+ }
+
+ /* Limit timeout to an upper limit */
+- timeout = min_t(uint, ctx->timeout, HWRM_CMD_MAX_TIMEOUT);
++ timeout = min(ctx->timeout, bp->hwrm_cmd_max_timeout ?: HWRM_CMD_MAX_TIMEOUT);
+ /* convert timeout to usec */
+ timeout *= 1000;
+
+@@ -595,18 +595,24 @@ timeout_abort:
+
+ /* Last byte of resp contains valid bit */
+ valid = ((u8 *)ctx->resp) + len - 1;
+- for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
++ for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; ) {
+ /* make sure we read from updated DMA memory */
+ dma_rmb();
+ if (*valid)
+ break;
+- usleep_range(1, 5);
++ if (j < 10) {
++ udelay(1);
++ j++;
++ } else {
++ usleep_range(20, 30);
++ j += 20;
++ }
+ }
+
+ if (j >= HWRM_VALID_BIT_DELAY_USEC) {
+ if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+ netdev_err(bp->dev, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n",
+- hwrm_total_timeout(i),
++ hwrm_total_timeout(i) + j,
+ le16_to_cpu(ctx->req->req_type),
+ le16_to_cpu(ctx->req->seq_id), len,
+ *valid);
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
+index 4d17f0d5363bb..380ef69afb51b 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
+@@ -58,11 +58,10 @@ void hwrm_update_token(struct bnxt *bp, u16 seq, enum bnxt_hwrm_wait_state s);
+
+ #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len)
+ #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input)
+-#define HWRM_CMD_MAX_TIMEOUT 40000
++#define HWRM_CMD_MAX_TIMEOUT 40000U
+ #define SHORT_HWRM_CMD_TIMEOUT 20
+ #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout)
+ #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4)
+-#define HWRM_COREDUMP_TIMEOUT ((HWRM_CMD_TIMEOUT) * 12)
+ #define BNXT_HWRM_TARGET 0xffff
+ #define BNXT_HWRM_NO_CMPL_RING -1
+ #define BNXT_HWRM_REQ_MAX_SIZE 128
+@@ -95,7 +94,7 @@ static inline unsigned int hwrm_total_timeout(unsigned int n)
+ }
+
+
+-#define HWRM_VALID_BIT_DELAY_USEC 150
++#define HWRM_VALID_BIT_DELAY_USEC 50000
+
+ static inline bool bnxt_cfa_hwrm_message(u16 req_type)
+ {
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+index f0aa480799ca4..a78cc65a38f2f 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+@@ -61,14 +61,23 @@ static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts,
+ u64 *ns)
+ {
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
++ u32 high_before, high_now, low;
+
+ if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+ return -EIO;
+
++ high_before = readl(bp->bar0 + ptp->refclk_mapped_regs[1]);
+ ptp_read_system_prets(sts);
+- *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
++ low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+ ptp_read_system_postts(sts);
+- *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
++ high_now = readl(bp->bar0 + ptp->refclk_mapped_regs[1]);
++ if (high_now != high_before) {
++ ptp_read_system_prets(sts);
++ low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
++ ptp_read_system_postts(sts);
++ }
++ *ns = ((u64)high_now << 32) | low;
++
+ return 0;
+ }
+
+@@ -331,7 +340,7 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info,
+ struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+ ptp_info);
+ struct bnxt *bp = ptp->bp;
+- u8 pin_id;
++ int pin_id;
+ int rc;
+
+ switch (rq->type) {
+@@ -339,6 +348,8 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info,
+ /* Configure an External PPS IN */
+ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS,
+ rq->extts.index);
++ if (!TSIO_PIN_VALID(pin_id))
++ return -EOPNOTSUPP;
+ if (!on)
+ break;
+ rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_IN);
+@@ -352,6 +363,8 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info,
+ /* Configure a Periodic PPS OUT */
+ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT,
+ rq->perout.index);
++ if (!TSIO_PIN_VALID(pin_id))
++ return -EOPNOTSUPP;
+ if (!on)
+ break;
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+index fa5f05708e6df..c3cd51e672e7b 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+@@ -28,7 +28,7 @@ struct pps_pin {
+ u8 state;
+ };
+
+-#define TSIO_PIN_VALID(pin) ((pin) < (BNXT_MAX_TSIO_PINS))
++#define TSIO_PIN_VALID(pin) ((pin) >= 0 && (pin) < (BNXT_MAX_TSIO_PINS))
+
+ #define EVENT_DATA2_PPS_EVENT_TYPE(data2) \
+ ((data2) & ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE)
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+index 70d8ca3039dcb..78763f5027d10 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+@@ -623,7 +623,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
+ hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
+ hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
+ if (bp->flags & BNXT_FLAG_CHIP_P5)
+- hw_resc->max_irqs -= vf_msix * n;
++ hw_resc->max_nqs -= vf_msix;
+
+ rc = pf->active_vfs;
+ }
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+index e6a4a768b10b2..1471b6130a2b9 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+@@ -1868,7 +1868,7 @@ static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
+ struct flow_cls_offload *flower = type_data;
+ struct bnxt *bp = priv->bp;
+
+- if (flower->common.chain_index)
++ if (!tc_cls_can_offload_and_chain0(bp->dev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+index 9401936b74fa2..8eb28e0885820 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+@@ -475,7 +475,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
+ dev->features |= pf_dev->features;
+ bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx,
+ dev->perm_addr);
+- ether_addr_copy(dev->dev_addr, dev->perm_addr);
++ eth_hw_addr_set(dev, dev->perm_addr);
+ /* Set VF-Rep's max-mtu to the corresponding VF's max-mtu */
+ if (!bnxt_hwrm_vfr_qcfg(bp, vf_rep, &max_mtu))
+ dev->max_mtu = max_mtu;
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index c8083df5e0ab8..148b58f3468b3 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -20,6 +20,8 @@
+ #include "bnxt.h"
+ #include "bnxt_xdp.h"
+
++DEFINE_STATIC_KEY_FALSE(bnxt_xdp_locking_key);
++
+ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+ struct bnxt_tx_ring_info *txr,
+ dma_addr_t mapping, u32 len)
+@@ -227,11 +229,16 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+ ring = smp_processor_id() % bp->tx_nr_rings_xdp;
+ txr = &bp->tx_ring[ring];
+
++ if (READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING)
++ return -EINVAL;
++
++ if (static_branch_unlikely(&bnxt_xdp_locking_key))
++ spin_lock(&txr->xdp_tx_lock);
++
+ for (i = 0; i < num_frames; i++) {
+ struct xdp_frame *xdp = frames[i];
+
+- if (!txr || !bnxt_tx_avail(bp, txr) ||
+- !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP))
++ if (!bnxt_tx_avail(bp, txr))
+ break;
+
+ mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len,
+@@ -250,6 +257,9 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
+ bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
+ }
+
++ if (static_branch_unlikely(&bnxt_xdp_locking_key))
++ spin_unlock(&txr->xdp_tx_lock);
++
+ return nxmit;
+ }
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+index 0df40c3beb050..067bb5e821f54 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+@@ -10,6 +10,8 @@
+ #ifndef BNXT_XDP_H
+ #define BNXT_XDP_H
+
++DECLARE_STATIC_KEY_FALSE(bnxt_xdp_locking_key);
++
+ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+ struct bnxt_tx_ring_info *txr,
+ dma_addr_t mapping, u32 len);
+diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+index 23c7595d2a1d3..e036a244b78bf 100644
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -1991,6 +1991,11 @@ static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev,
+ return skb;
+ }
+
++static void bcmgenet_hide_tsb(struct sk_buff *skb)
++{
++ __skb_pull(skb, sizeof(struct status_64));
++}
++
+ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+@@ -2097,6 +2102,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
+ }
+
+ GENET_CB(skb)->last_cb = tx_cb_ptr;
++
++ bcmgenet_hide_tsb(skb);
+ skb_tx_timestamp(skb);
+
+ /* Decrement total BD count and advance our write pointer */
+@@ -2243,8 +2250,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
+ dma_length_status = status->length_status;
+ if (dev->features & NETIF_F_RXCSUM) {
+ rx_csum = (__force __be16)(status->rx_csum & 0xffff);
+- skb->csum = (__force __wsum)ntohs(rx_csum);
+- skb->ip_summed = CHECKSUM_COMPLETE;
++ if (rx_csum) {
++ skb->csum = (__force __wsum)ntohs(rx_csum);
++ skb->ip_summed = CHECKSUM_COMPLETE;
++ }
+ }
+
+ /* DMA flags and length are still valid no matter how
+@@ -2258,6 +2267,14 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
+ __func__, p_index, ring->c_index,
+ ring->read_ptr, dma_length_status);
+
++ if (unlikely(len > RX_BUF_LENGTH)) {
++ netif_err(priv, rx_status, dev, "oversized packet\n");
++ dev->stats.rx_length_errors++;
++ dev->stats.rx_errors++;
++ dev_kfree_skb_any(skb);
++ goto next;
++ }
++
+ if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) {
+ netif_err(priv, rx_status, dev,
+ "dropping fragmented packet!\n");
+@@ -3384,7 +3401,7 @@ err_clk_disable:
+ return ret;
+ }
+
+-static void bcmgenet_netif_stop(struct net_device *dev)
++static void bcmgenet_netif_stop(struct net_device *dev, bool stop_phy)
+ {
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
+@@ -3399,7 +3416,8 @@ static void bcmgenet_netif_stop(struct net_device *dev)
+ /* Disable MAC transmit. TX DMA disabled must be done before this */
+ umac_enable_set(priv, CMD_TX_EN, false);
+
+- phy_stop(dev->phydev);
++ if (stop_phy)
++ phy_stop(dev->phydev);
+ bcmgenet_disable_rx_napi(priv);
+ bcmgenet_intr_disable(priv);
+
+@@ -3425,7 +3443,7 @@ static int bcmgenet_close(struct net_device *dev)
+
+ netif_dbg(priv, ifdown, dev, "bcmgenet_close\n");
+
+- bcmgenet_netif_stop(dev);
++ bcmgenet_netif_stop(dev, false);
+
+ /* Really kill the PHY state machine and disconnect from it */
+ phy_disconnect(dev->phydev);
+@@ -3592,7 +3610,7 @@ static int bcmgenet_set_mac_addr(struct net_device *dev, void *p)
+ if (netif_running(dev))
+ return -EBUSY;
+
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+
+ return 0;
+ }
+@@ -3941,6 +3959,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
+ goto err;
+ }
+ priv->wol_irq = platform_get_irq_optional(pdev, 2);
++ if (priv->wol_irq == -EPROBE_DEFER) {
++ err = priv->wol_irq;
++ goto err;
++ }
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base)) {
+@@ -3966,10 +3988,12 @@ static int bcmgenet_probe(struct platform_device *pdev)
+
+ /* Request the WOL interrupt and advertise suspend if available */
+ priv->wol_irq_disabled = true;
+- err = devm_request_irq(&pdev->dev, priv->wol_irq, bcmgenet_wol_isr, 0,
+- dev->name, priv);
+- if (!err)
+- device_set_wakeup_capable(&pdev->dev, 1);
++ if (priv->wol_irq > 0) {
++ err = devm_request_irq(&pdev->dev, priv->wol_irq,
++ bcmgenet_wol_isr, 0, dev->name, priv);
++ if (!err)
++ device_set_wakeup_capable(&pdev->dev, 1);
++ }
+
+ /* Set the needed headroom to account for any possible
+ * features enabling/disabling at runtime
+@@ -4036,7 +4060,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
+ bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
+ if (pd && !IS_ERR_OR_NULL(pd->mac_address))
+- ether_addr_copy(dev->dev_addr, pd->mac_address);
++ eth_hw_addr_set(dev, pd->mac_address);
+ else
+ if (!device_get_mac_address(&pdev->dev, dev->dev_addr, ETH_ALEN))
+ if (has_acpi_companion(&pdev->dev))
+@@ -4219,7 +4243,7 @@ static int bcmgenet_suspend(struct device *d)
+
+ netif_device_detach(dev);
+
+- bcmgenet_netif_stop(dev);
++ bcmgenet_netif_stop(dev, true);
+
+ if (!device_may_wakeup(d))
+ phy_suspend(dev->phydev);
+diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+index e31a5a397f114..f55d9d9c01a85 100644
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+@@ -40,6 +40,13 @@
+ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+ {
+ struct bcmgenet_priv *priv = netdev_priv(dev);
++ struct device *kdev = &priv->pdev->dev;
++
++ if (!device_can_wakeup(kdev)) {
++ wol->supported = 0;
++ wol->wolopts = 0;
++ return;
++ }
+
+ wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
+ wol->wolopts = priv->wolopts;
+diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
+index 89d16c587bb7d..bfe90cacbd073 100644
+--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
+@@ -165,15 +165,6 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
+
+ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
+ {
+- u32 reg;
+-
+- if (!GENET_IS_V5(priv)) {
+- /* Speed settings are set in bcmgenet_mii_setup() */
+- reg = bcmgenet_sys_readl(priv, SYS_PORT_CTRL);
+- reg |= LED_ACT_SOURCE_MAC;
+- bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
+- }
+-
+ if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
+ fixed_phy_set_link_update(priv->dev->phydev,
+ bcmgenet_fixed_phy_link_update);
+@@ -206,6 +197,8 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
+
+ if (!phy_name) {
+ phy_name = "MoCA";
++ if (!GENET_IS_V5(priv))
++ port_ctrl |= LED_ACT_SOURCE_MAC;
+ bcmgenet_moca_phy_setup(priv);
+ }
+ break;
+@@ -361,6 +354,9 @@ int bcmgenet_mii_probe(struct net_device *dev)
+ if (priv->internal_phy && !GENET_IS_V5(priv))
+ dev->phydev->irq = PHY_MAC_INTERRUPT;
+
++ /* Indicate that the MAC is responsible for PHY PM */
++ dev->phydev->mac_managed_pm = true;
++
+ return 0;
+ }
+
+@@ -575,7 +571,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
+ };
+
+ phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
+- if (!phydev || IS_ERR(phydev)) {
++ if (IS_ERR(phydev)) {
+ dev_err(kdev, "failed to register fixed PHY device\n");
+ return -ENODEV;
+ }
+@@ -631,5 +627,7 @@ void bcmgenet_mii_exit(struct net_device *dev)
+ if (of_phy_is_fixed_link(dn))
+ of_phy_deregister_fixed_link(dn);
+ of_node_put(priv->phy_dn);
++ clk_prepare_enable(priv->clk);
+ platform_device_unregister(priv->mii_pdev);
++ clk_disable_unprepare(priv->clk);
+ }
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 5e0e0e70d8014..70b1a855273e4 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -224,6 +224,7 @@ MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox
+ MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver");
+ MODULE_LICENSE("GPL");
+ MODULE_FIRMWARE(FIRMWARE_TG3);
++MODULE_FIRMWARE(FIRMWARE_TG357766);
+ MODULE_FIRMWARE(FIRMWARE_TG3TSO);
+ MODULE_FIRMWARE(FIRMWARE_TG3TSO5);
+
+@@ -11176,7 +11177,7 @@ static void tg3_reset_task(struct work_struct *work)
+ rtnl_lock();
+ tg3_full_lock(tp, 0);
+
+- if (!netif_running(tp->dev)) {
++ if (tp->pcierr_recovery || !netif_running(tp->dev)) {
+ tg3_flag_clear(tp, RESET_TASK_PENDING);
+ tg3_full_unlock(tp);
+ rtnl_unlock();
+@@ -18078,16 +18079,20 @@ static void tg3_shutdown(struct pci_dev *pdev)
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct tg3 *tp = netdev_priv(dev);
+
++ tg3_reset_task_cancel(tp);
++
+ rtnl_lock();
++
+ netif_device_detach(dev);
+
+ if (netif_running(dev))
+ dev_close(dev);
+
+- if (system_state == SYSTEM_POWER_OFF)
+- tg3_power_down(tp);
++ tg3_power_down(tp);
+
+ rtnl_unlock();
++
++ pci_disable_device(pdev);
+ }
+
+ /**
+@@ -18107,6 +18112,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
+
+ netdev_info(netdev, "PCI I/O error detected\n");
+
++ /* Want to make sure that the reset task doesn't run */
++ tg3_reset_task_cancel(tp);
++
+ rtnl_lock();
+
+ /* Could be second call or maybe we don't have netdev yet */
+@@ -18123,9 +18131,6 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
+
+ tg3_timer_stop(tp);
+
+- /* Want to make sure that the reset task doesn't run */
+- tg3_reset_task_cancel(tp);
+-
+ netif_device_detach(netdev);
+
+ /* Clean up software state, even if MMIO is blocked */
+diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
+index ba47777d9cff7..b1947fd9a07cc 100644
+--- a/drivers/net/ethernet/brocade/bna/bnad.c
++++ b/drivers/net/ethernet/brocade/bna/bnad.c
+@@ -875,7 +875,7 @@ bnad_set_netdev_perm_addr(struct bnad *bnad)
+
+ ether_addr_copy(netdev->perm_addr, bnad->perm_addr);
+ if (is_zero_ether_addr(netdev->dev_addr))
+- ether_addr_copy(netdev->dev_addr, bnad->perm_addr);
++ eth_hw_addr_set(netdev, bnad->perm_addr);
+ }
+
+ /* Control Path Handlers */
+@@ -3249,7 +3249,7 @@ bnad_set_mac_address(struct net_device *netdev, void *addr)
+
+ err = bnad_mac_addr_set_locked(bnad, sa->sa_data);
+ if (!err)
+- ether_addr_copy(netdev->dev_addr, sa->sa_data);
++ eth_hw_addr_set(netdev, sa->sa_data);
+
+ spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
+index d13fb1d318215..dac56169851a2 100644
+--- a/drivers/net/ethernet/cadence/macb_main.c
++++ b/drivers/net/ethernet/cadence/macb_main.c
+@@ -35,6 +35,7 @@
+ #include <linux/tcp.h>
+ #include <linux/iopoll.h>
+ #include <linux/pm_runtime.h>
++#include <linux/ptp_classify.h>
+ #include "macb.h"
+
+ /* This structure is only used for MACB on SiFive FU540 devices */
+@@ -879,6 +880,7 @@ static int macb_mii_probe(struct net_device *dev)
+
+ bp->phylink_config.dev = &dev->dev;
+ bp->phylink_config.type = PHYLINK_NETDEV;
++ bp->phylink_config.mac_managed_pm = true;
+
+ if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ bp->phylink_config.poll_fixed_state = true;
+@@ -1042,6 +1044,10 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
+ }
+ #endif
+ addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
++#ifdef CONFIG_MACB_USE_HWSTAMP
++ if (bp->hw_dma_cap & HW_DMA_CAP_PTP)
++ addr &= ~GEM_BIT(DMA_RXVALID);
++#endif
+ return addr;
+ }
+
+@@ -1155,6 +1161,36 @@ static void macb_tx_error_task(struct work_struct *work)
+ spin_unlock_irqrestore(&bp->lock, flags);
+ }
+
++static bool ptp_one_step_sync(struct sk_buff *skb)
++{
++ struct ptp_header *hdr;
++ unsigned int ptp_class;
++ u8 msgtype;
++
++ /* No need to parse packet if PTP TS is not involved */
++ if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
++ goto not_oss;
++
++ /* Identify and return whether PTP one step sync is being processed */
++ ptp_class = ptp_classify_raw(skb);
++ if (ptp_class == PTP_CLASS_NONE)
++ goto not_oss;
++
++ hdr = ptp_parse_header(skb, ptp_class);
++ if (!hdr)
++ goto not_oss;
++
++ if (hdr->flag_field[0] & PTP_FLAG_TWOSTEP)
++ goto not_oss;
++
++ msgtype = ptp_get_msgtype(hdr, ptp_class);
++ if (msgtype == PTP_MSGTYPE_SYNC)
++ return true;
++
++not_oss:
++ return false;
++}
++
+ static void macb_tx_interrupt(struct macb_queue *queue)
+ {
+ unsigned int tail;
+@@ -1199,8 +1235,8 @@ static void macb_tx_interrupt(struct macb_queue *queue)
+
+ /* First, update TX stats if needed */
+ if (skb) {
+- if (unlikely(skb_shinfo(skb)->tx_flags &
+- SKBTX_HW_TSTAMP) &&
++ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
++ !ptp_one_step_sync(skb) &&
+ gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+ /* skb now belongs to timestamp buffer
+ * and will be removed later
+@@ -1250,7 +1286,6 @@ static void gem_rx_refill(struct macb_queue *queue)
+ /* Make hw descriptor updates visible to CPU */
+ rmb();
+
+- queue->rx_prepared_head++;
+ desc = macb_rx_desc(queue, entry);
+
+ if (!queue->rx_skbuff[entry]) {
+@@ -1289,6 +1324,7 @@ static void gem_rx_refill(struct macb_queue *queue)
+ dma_wmb();
+ desc->addr &= ~MACB_BIT(RX_USED);
+ }
++ queue->rx_prepared_head++;
+ }
+
+ /* Make descriptor updates visible to hardware */
+@@ -1606,7 +1642,14 @@ static int macb_poll(struct napi_struct *napi, int budget)
+ if (work_done < budget) {
+ napi_complete_done(napi, work_done);
+
+- /* Packets received while interrupts were disabled */
++ /* RSR bits only seem to propagate to raise interrupts when
++ * interrupts are enabled at the time, so if bits are already
++ * set due to packets received while interrupts were disabled,
++ * they will not cause another interrupt to be generated when
++ * interrupts are re-enabled.
++ * Check for this case here. This has been seen to happen
++ * around 30% of the time under heavy network load.
++ */
+ status = macb_readl(bp, RSR);
+ if (status) {
+ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+@@ -1614,6 +1657,22 @@ static int macb_poll(struct napi_struct *napi, int budget)
+ napi_reschedule(napi);
+ } else {
+ queue_writel(queue, IER, bp->rx_intr_mask);
++
++ /* In rare cases, packets could have been received in
++ * the window between the check above and re-enabling
++ * interrupts. Therefore, a double-check is required
++ * to avoid losing a wakeup. This can potentially race
++ * with the interrupt handler doing the same actions
++ * if an interrupt is raised just after enabling them,
++ * but this should be harmless.
++ */
++ status = macb_readl(bp, RSR);
++ if (unlikely(status)) {
++ queue_writel(queue, IDR, bp->rx_intr_mask);
++ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
++ queue_writel(queue, ISR, MACB_BIT(RCOMP));
++ napi_schedule(napi);
++ }
+ }
+ }
+
+@@ -1666,6 +1725,7 @@ static void macb_tx_restart(struct macb_queue *queue)
+ unsigned int head = queue->tx_head;
+ unsigned int tail = queue->tx_tail;
+ struct macb *bp = queue->bp;
++ unsigned int head_idx, tbqp;
+
+ if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
+ queue_writel(queue, ISR, MACB_BIT(TXUBR));
+@@ -1673,6 +1733,13 @@ static void macb_tx_restart(struct macb_queue *queue)
+ if (head == tail)
+ return;
+
++ tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
++ tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
++ head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head));
++
++ if (tbqp == head_idx)
++ return;
++
+ macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
+ }
+
+@@ -1999,7 +2066,8 @@ static unsigned int macb_tx_map(struct macb *bp,
+ ctrl |= MACB_BF(TX_LSO, lso_ctrl);
+ ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
+ if ((bp->dev->features & NETIF_F_HW_CSUM) &&
+- skb->ip_summed != CHECKSUM_PARTIAL && !lso_ctrl)
++ skb->ip_summed != CHECKSUM_PARTIAL && !lso_ctrl &&
++ !ptp_one_step_sync(skb))
+ ctrl |= MACB_BIT(TX_NOCRC);
+ } else
+ /* Only set MSS/MFS on payload descriptors
+@@ -2090,23 +2158,19 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
+ bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) ||
+ skb_is_nonlinear(*skb);
+ int padlen = ETH_ZLEN - (*skb)->len;
+- int headroom = skb_headroom(*skb);
+ int tailroom = skb_tailroom(*skb);
+ struct sk_buff *nskb;
+ u32 fcs;
+
+ if (!(ndev->features & NETIF_F_HW_CSUM) ||
+ !((*skb)->ip_summed != CHECKSUM_PARTIAL) ||
+- skb_shinfo(*skb)->gso_size) /* Not available for GSO */
++ skb_shinfo(*skb)->gso_size || ptp_one_step_sync(*skb))
+ return 0;
+
+ if (padlen <= 0) {
+ /* FCS could be appeded to tailroom. */
+ if (tailroom >= ETH_FCS_LEN)
+ goto add_fcs;
+- /* FCS could be appeded by moving data to headroom. */
+- else if (!cloned && headroom + tailroom >= ETH_FCS_LEN)
+- padlen = 0;
+ /* No room for FCS, need to reallocate skb. */
+ else
+ padlen = ETH_FCS_LEN;
+@@ -2115,10 +2179,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
+ padlen += ETH_FCS_LEN;
+ }
+
+- if (!cloned && headroom + tailroom >= padlen) {
+- (*skb)->data = memmove((*skb)->head, (*skb)->data, (*skb)->len);
+- skb_set_tail_pointer(*skb, (*skb)->len);
+- } else {
++ if (cloned || tailroom < padlen) {
+ nskb = skb_copy_expand(*skb, 0, padlen, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+@@ -4739,7 +4800,7 @@ static int macb_probe(struct platform_device *pdev)
+
+ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
+- dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
++ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
+ bp->hw_dma_cap |= HW_DMA_CAP_64B;
+ }
+ #endif
+@@ -4774,7 +4835,7 @@ static int macb_probe(struct platform_device *pdev)
+ if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR)
+ bp->rx_intr_mask |= MACB_BIT(RXUBR);
+
+- err = of_get_mac_address(np, bp->dev->dev_addr);
++ err = of_get_ethdev_address(np, bp->dev);
+ if (err == -EPROBE_DEFER)
+ goto err_out_free_netdev;
+ else if (err)
+diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
+index c2e1f163bb14f..c52ec1cc8a08c 100644
+--- a/drivers/net/ethernet/cadence/macb_ptp.c
++++ b/drivers/net/ethernet/cadence/macb_ptp.c
+@@ -469,8 +469,10 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ if (gem_ptp_set_one_step_sync(bp, 1) != 0)
+ return -ERANGE;
+- fallthrough;
++ tx_bd_control = TSTAMP_ALL_FRAMES;
++ break;
+ case HWTSTAMP_TX_ON:
++ gem_ptp_set_one_step_sync(bp, 0);
+ tx_bd_control = TSTAMP_ALL_FRAMES;
+ break;
+ default:
+diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
+index 2a0d64e5797c8..ec7928b54e4a7 100644
+--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
++++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
+@@ -411,7 +411,7 @@ void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac)
+
+ if (!ether_addr_equal(netdev->dev_addr, mac)) {
+ macaddr_changed = true;
+- ether_addr_copy(netdev->dev_addr, mac);
++ eth_hw_addr_set(netdev, mac);
+ ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, mac);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, netdev);
+ }
+diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
+index 2907e13b9df69..443755729d793 100644
+--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
++++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
+@@ -1798,13 +1798,10 @@ static int liquidio_open(struct net_device *netdev)
+
+ ifstate_set(lio, LIO_IFSTATE_RUNNING);
+
+- if (OCTEON_CN23XX_PF(oct)) {
+- if (!oct->msix_on)
+- if (setup_tx_poll_fn(netdev))
+- return -1;
+- } else {
+- if (setup_tx_poll_fn(netdev))
+- return -1;
++ if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) {
++ ret = setup_tx_poll_fn(netdev);
++ if (ret)
++ goto err_poll;
+ }
+
+ netif_tx_start_all_queues(netdev);
+@@ -1817,7 +1814,7 @@ static int liquidio_open(struct net_device *netdev)
+ /* tell Octeon to start forwarding packets to host */
+ ret = send_rx_ctrl_cmd(lio, 1);
+ if (ret)
+- return ret;
++ goto err_rx_ctrl;
+
+ /* start periodical statistics fetch */
+ INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats);
+@@ -1828,6 +1825,27 @@ static int liquidio_open(struct net_device *netdev)
+ dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
+ netdev->name);
+
++ return 0;
++
++err_rx_ctrl:
++ if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on)
++ cleanup_tx_poll_fn(netdev);
++err_poll:
++ if (lio->ptp_clock) {
++ ptp_clock_unregister(lio->ptp_clock);
++ lio->ptp_clock = NULL;
++ }
++
++ if (oct->props[lio->ifidx].napi_enabled == 1) {
++ list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
++ napi_disable(napi);
++
++ oct->props[lio->ifidx].napi_enabled = 0;
++
++ if (OCTEON_CN23XX_PF(oct))
++ oct->droq[0]->ops.poll_mode = 0;
++ }
++
+ return ret;
+ }
+
+@@ -3632,7 +3650,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
+
+ /* Copy MAC Address to OS network device structure */
+
+- ether_addr_copy(netdev->dev_addr, mac);
++ eth_hw_addr_set(netdev, mac);
+
+ /* By default all interfaces on a single Octeon uses the same
+ * tx and rx queues
+diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+index f6396ac64006c..8a969a9d4b637 100644
+--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
++++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+@@ -2148,7 +2148,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
+ mac[j] = *((u8 *)(((u8 *)&lio->linfo.hw_addr) + 2 + j));
+
+ /* Copy MAC Address to OS network device structure */
+- ether_addr_copy(netdev->dev_addr, mac);
++ eth_hw_addr_set(netdev, mac);
+
+ if (liquidio_setup_io_queues(octeon_dev, i,
+ lio->linfo.num_txpciq,
+diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+index 30463a6d1f8c9..4e39d712e121e 100644
+--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
++++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+@@ -1501,7 +1501,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev)
+ netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM;
+ netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN;
+
+- result = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
++ result = of_get_ethdev_address(pdev->dev.of_node, netdev);
+ if (result)
+ eth_hw_addr_random(netdev);
+
+diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+index a27227aeae880..8418797be205e 100644
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+@@ -221,8 +221,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
+ nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
+ nic->node = mbx.nic_cfg.node_id;
+ if (!nic->set_mac_pending)
+- ether_addr_copy(nic->netdev->dev_addr,
+- mbx.nic_cfg.mac_addr);
++ eth_hw_addr_set(nic->netdev, mbx.nic_cfg.mac_addr);
+ nic->sqs_mode = mbx.nic_cfg.sqs_mode;
+ nic->loopback_supported = mbx.nic_cfg.loopback_supported;
+ nic->link_up = false;
+@@ -2250,7 +2249,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ err = register_netdev(netdev);
+ if (err) {
+ dev_err(dev, "Failed to register netdevice\n");
+- goto err_unregister_interrupts;
++ goto err_destroy_workqueue;
+ }
+
+ nic->msg_enable = debug;
+@@ -2259,6 +2258,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ return 0;
+
++err_destroy_workqueue:
++ destroy_workqueue(nic->nicvf_rx_mode_wq);
+ err_unregister_interrupts:
+ nicvf_unregister_interrupts(nic);
+ err_free_netdev:
+diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+index c36fed9c3d737..daaffae1a89f5 100644
+--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
++++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+@@ -1435,8 +1435,10 @@ static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl,
+ return AE_OK;
+ }
+
+- if (strncmp(string.pointer, bgx_sel, 4))
++ if (strncmp(string.pointer, bgx_sel, 4)) {
++ kfree(string.pointer);
+ return AE_OK;
++ }
+
+ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+ bgx_acpi_register_phy, NULL, bgx, NULL);
+diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+index 38e47703f9abd..07568aa15873d 100644
+--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+@@ -1302,6 +1302,7 @@ static int cxgb_up(struct adapter *adap)
+ if (ret < 0) {
+ CH_ERR(adap, "failed to bind qsets, err %d\n", ret);
+ t3_intr_disable(adap);
++ quiesce_rx(adap);
+ free_irq_resources(adap);
+ err = ret;
+ goto out;
+diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+index 7ff31d1026fb2..e0d34e64fc6cb 100644
+--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+@@ -3678,6 +3678,8 @@ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
+ MAC_STATS_ACCUM_SECS : (MAC_STATS_ACCUM_SECS * 10);
+ adapter->params.pci.vpd_cap_addr =
+ pci_find_capability(adapter->pdev, PCI_CAP_ID_VPD);
++ if (!adapter->params.pci.vpd_cap_addr)
++ return -ENODEV;
+ ret = get_vpd_params(adapter, &adapter->params.vpd);
+ if (ret < 0)
+ return ret;
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+index a7f291c897021..557c591a6ce3a 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+@@ -14,6 +14,7 @@
+ #include "cudbg_entity.h"
+ #include "cudbg_lib.h"
+ #include "cudbg_zlib.h"
++#include "cxgb4_tc_mqprio.h"
+
+ static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = {
+ {0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */
+@@ -3458,7 +3459,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
+ for (i = 0; i < utxq->ntxq; i++)
+ QDESC_GET_TXQ(&utxq->uldtxq[i].q,
+ cudbg_uld_txq_to_qtype(j),
+- out_unlock);
++ out_unlock_uld);
+ }
+ }
+
+@@ -3475,7 +3476,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
+ for (i = 0; i < urxq->nrxq; i++)
+ QDESC_GET_RXQ(&urxq->uldrxq[i].rspq,
+ cudbg_uld_rxq_to_qtype(j),
+- out_unlock);
++ out_unlock_uld);
+ }
+
+ /* ULD FLQ */
+@@ -3487,7 +3488,7 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
+ for (i = 0; i < urxq->nrxq; i++)
+ QDESC_GET_FLQ(&urxq->uldrxq[i].fl,
+ cudbg_uld_flq_to_qtype(j),
+- out_unlock);
++ out_unlock_uld);
+ }
+
+ /* ULD CIQ */
+@@ -3500,29 +3501,34 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
+ for (i = 0; i < urxq->nciq; i++)
+ QDESC_GET_RXQ(&urxq->uldrxq[base + i].rspq,
+ cudbg_uld_ciq_to_qtype(j),
+- out_unlock);
++ out_unlock_uld);
+ }
+ }
++ mutex_unlock(&uld_mutex);
++
++ if (!padap->tc_mqprio)
++ goto out;
+
++ mutex_lock(&padap->tc_mqprio->mqprio_mutex);
+ /* ETHOFLD TXQ */
+ if (s->eohw_txq)
+ for (i = 0; i < s->eoqsets; i++)
+ QDESC_GET_TXQ(&s->eohw_txq[i].q,
+- CUDBG_QTYPE_ETHOFLD_TXQ, out);
++ CUDBG_QTYPE_ETHOFLD_TXQ, out_unlock_mqprio);
+
+ /* ETHOFLD RXQ and FLQ */
+ if (s->eohw_rxq) {
+ for (i = 0; i < s->eoqsets; i++)
+ QDESC_GET_RXQ(&s->eohw_rxq[i].rspq,
+- CUDBG_QTYPE_ETHOFLD_RXQ, out);
++ CUDBG_QTYPE_ETHOFLD_RXQ, out_unlock_mqprio);
+
+ for (i = 0; i < s->eoqsets; i++)
+ QDESC_GET_FLQ(&s->eohw_rxq[i].fl,
+- CUDBG_QTYPE_ETHOFLD_FLQ, out);
++ CUDBG_QTYPE_ETHOFLD_FLQ, out_unlock_mqprio);
+ }
+
+-out_unlock:
+- mutex_unlock(&uld_mutex);
++out_unlock_mqprio:
++ mutex_unlock(&padap->tc_mqprio->mqprio_mutex);
+
+ out:
+ qdesc_info->qdesc_entry_size = sizeof(*qdesc_entry);
+@@ -3559,6 +3565,10 @@ out_free:
+ #undef QDESC_GET
+
+ return rc;
++
++out_unlock_uld:
++ mutex_unlock(&uld_mutex);
++ goto out;
+ }
+
+ int cudbg_collect_flash(struct cudbg_init *pdbg_init,
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+index 5903bdb78916f..129352bbe1143 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+@@ -2015,12 +2015,15 @@ static int cxgb4_get_module_info(struct net_device *dev,
+ if (ret)
+ return ret;
+
+- if (!sff8472_comp || (sff_diag_type & 4)) {
++ if (!sff8472_comp || (sff_diag_type & SFP_DIAG_ADDRMODE)) {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8472;
+- modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
++ if (sff_diag_type & SFP_DIAG_IMPLEMENTED)
++ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
++ else
++ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2;
+ }
+ break;
+
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+index 64144b6171d72..b1c9f65ab10f6 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+@@ -2793,14 +2793,14 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
+ goto out;
+ na = ret;
+
+- memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN));
++ memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN));
+ strim(p->id);
+- memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN));
++ memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN));
+ strim(p->sn);
+- memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN));
++ memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN));
+ strim(p->pn);
+- memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN));
+- strim((char *)p->na);
++ memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN));
++ strim(p->na);
+
+ out:
+ vfree(vpd);
+diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+index 002fc62ea7262..63bc956d20376 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+@@ -293,6 +293,8 @@ enum {
+ #define I2C_PAGE_SIZE 0x100
+ #define SFP_DIAG_TYPE_ADDR 0x5c
+ #define SFP_DIAG_TYPE_LEN 0x1
++#define SFP_DIAG_ADDRMODE BIT(2)
++#define SFP_DIAG_IMPLEMENTED BIT(6)
+ #define SFF_8472_COMP_ADDR 0x5e
+ #define SFF_8472_COMP_LEN 0x1
+ #define SFF_REV_ADDR 0x1
+diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+index 49b76fd47daa0..464c2b365721f 100644
+--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+@@ -858,7 +858,7 @@ static int cxgb4vf_open(struct net_device *dev)
+ */
+ err = t4vf_update_port_info(pi);
+ if (err < 0)
+- return err;
++ goto err_unwind;
+
+ /*
+ * Note that this interface is up and start everything up ...
+diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+index bcad69c480740..ddfe9208529a5 100644
+--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
++++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+@@ -870,7 +870,7 @@ static void do_abort_syn_rcv(struct sock *child, struct sock *parent)
+ * created only after 3 way handshake is done.
+ */
+ sock_orphan(child);
+- percpu_counter_inc((child)->sk_prot->orphan_count);
++ INC_ORPHAN_COUNT(child);
+ chtls_release_resources(child);
+ chtls_conn_done(child);
+ } else {
+@@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
+ csk->sndbuf = newsk->sk_sndbuf;
+ csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
+ RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
+- sock_net(newsk)->
+- ipv4.sysctl_tcp_window_scaling,
++ READ_ONCE(sock_net(newsk)->
++ ipv4.sysctl_tcp_window_scaling),
+ tp->window_clamp);
+ neigh_release(n);
+ inet_inherit_port(&tcp_hashinfo, lsk, newsk);
+@@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk,
+ #endif
+ }
+ if (req->tcpopt.wsf <= 14 &&
+- sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+ inet_rsk(oreq)->wscale_ok = 1;
+ inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
+ }
+@@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk,
+ th_ecn = tcph->ece && tcph->cwr;
+ if (th_ecn) {
+ ect = !INET_ECN_is_not_ect(ip_dsfield);
+- ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn;
++ ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
+ if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
+ inet_rsk(oreq)->ecn_ok = 1;
+ }
+diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
+index b1161bdeda4dc..f61ca657601ca 100644
+--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
++++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h
+@@ -95,7 +95,7 @@ struct deferred_skb_cb {
+ #define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok)
+ #define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok)
+ #define SACK_OK(tp) ((tp)->rx_opt.sack_ok)
+-#define INC_ORPHAN_COUNT(sk) percpu_counter_inc((sk)->sk_prot->orphan_count)
++#define INC_ORPHAN_COUNT(sk) this_cpu_inc(*(sk)->sk_prot->orphan_count)
+
+ /* TLS SKB */
+ #define skb_ulp_tls_inline(skb) (ULP_SKB_CB(skb)->ulp.tls.ofld)
+diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+index d04a6c1634452..da8d10475a08e 100644
+--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
++++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+@@ -32,6 +32,7 @@
+
+ #include <linux/tcp.h>
+ #include <linux/ipv6.h>
++#include <net/inet_ecn.h>
+ #include <net/route.h>
+ #include <net/ip6_route.h>
+
+@@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi,
+
+ rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
+ peer_port, local_port, IPPROTO_TCP,
+- tos, 0);
++ tos & ~INET_ECN_MASK, 0);
+ if (IS_ERR(rt))
+ return NULL;
+ n = dst_neigh_lookup(&rt->dst, &peer_ip);
+diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
+index 6e745ca4c4333..8361faf03e429 100644
+--- a/drivers/net/ethernet/cortina/gemini.c
++++ b/drivers/net/ethernet/cortina/gemini.c
+@@ -305,21 +305,21 @@ static void gmac_speed_set(struct net_device *netdev)
+ switch (phydev->speed) {
+ case 1000:
+ status.bits.speed = GMAC_SPEED_1000;
+- if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
++ if (phy_interface_mode_is_rgmii(phydev->interface))
+ status.bits.mii_rmii = GMAC_PHY_RGMII_1000;
+ netdev_dbg(netdev, "connect %s to RGMII @ 1Gbit\n",
+ phydev_name(phydev));
+ break;
+ case 100:
+ status.bits.speed = GMAC_SPEED_100;
+- if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
++ if (phy_interface_mode_is_rgmii(phydev->interface))
+ status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
+ netdev_dbg(netdev, "connect %s to RGMII @ 100 Mbit\n",
+ phydev_name(phydev));
+ break;
+ case 10:
+ status.bits.speed = GMAC_SPEED_10;
+- if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
++ if (phy_interface_mode_is_rgmii(phydev->interface))
+ status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
+ netdev_dbg(netdev, "connect %s to RGMII @ 10 Mbit\n",
+ phydev_name(phydev));
+@@ -389,6 +389,9 @@ static int gmac_setup_phy(struct net_device *netdev)
+ status.bits.mii_rmii = GMAC_PHY_GMII;
+ break;
+ case PHY_INTERFACE_MODE_RGMII:
++ case PHY_INTERFACE_MODE_RGMII_ID:
++ case PHY_INTERFACE_MODE_RGMII_TXID:
++ case PHY_INTERFACE_MODE_RGMII_RXID:
+ netdev_dbg(netdev,
+ "RGMII: set GMAC0 and GMAC1 to MII/RGMII mode\n");
+ status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
+@@ -1917,7 +1920,7 @@ static void gmac_get_stats64(struct net_device *netdev,
+
+ /* Racing with RX NAPI */
+ do {
+- start = u64_stats_fetch_begin(&port->rx_stats_syncp);
++ start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp);
+
+ stats->rx_packets = port->stats.rx_packets;
+ stats->rx_bytes = port->stats.rx_bytes;
+@@ -1929,11 +1932,11 @@ static void gmac_get_stats64(struct net_device *netdev,
+ stats->rx_crc_errors = port->stats.rx_crc_errors;
+ stats->rx_frame_errors = port->stats.rx_frame_errors;
+
+- } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start));
++ } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start));
+
+ /* Racing with MIB and TX completion interrupts */
+ do {
+- start = u64_stats_fetch_begin(&port->ir_stats_syncp);
++ start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp);
+
+ stats->tx_errors = port->stats.tx_errors;
+ stats->tx_packets = port->stats.tx_packets;
+@@ -1943,15 +1946,15 @@ static void gmac_get_stats64(struct net_device *netdev,
+ stats->rx_missed_errors = port->stats.rx_missed_errors;
+ stats->rx_fifo_errors = port->stats.rx_fifo_errors;
+
+- } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start));
++ } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start));
+
+ /* Racing with hard_start_xmit */
+ do {
+- start = u64_stats_fetch_begin(&port->tx_stats_syncp);
++ start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp);
+
+ stats->tx_dropped = port->stats.tx_dropped;
+
+- } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start));
++ } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start));
+
+ stats->rx_dropped += stats->rx_missed_errors;
+ }
+@@ -2029,18 +2032,18 @@ static void gmac_get_ethtool_stats(struct net_device *netdev,
+ /* Racing with MIB interrupt */
+ do {
+ p = values;
+- start = u64_stats_fetch_begin(&port->ir_stats_syncp);
++ start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp);
+
+ for (i = 0; i < RX_STATS_NUM; i++)
+ *p++ = port->hw_stats[i];
+
+- } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start));
++ } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start));
+ values = p;
+
+ /* Racing with RX NAPI */
+ do {
+ p = values;
+- start = u64_stats_fetch_begin(&port->rx_stats_syncp);
++ start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp);
+
+ for (i = 0; i < RX_STATUS_NUM; i++)
+ *p++ = port->rx_stats[i];
+@@ -2048,13 +2051,13 @@ static void gmac_get_ethtool_stats(struct net_device *netdev,
+ *p++ = port->rx_csum_stats[i];
+ *p++ = port->rx_napi_exits;
+
+- } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start));
++ } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start));
+ values = p;
+
+ /* Racing with TX start_xmit */
+ do {
+ p = values;
+- start = u64_stats_fetch_begin(&port->tx_stats_syncp);
++ start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp);
+
+ for (i = 0; i < TX_MAX_FRAGS; i++) {
+ *values++ = port->tx_frag_stats[i];
+@@ -2063,7 +2066,7 @@ static void gmac_get_ethtool_stats(struct net_device *netdev,
+ *values++ = port->tx_frags_linearized;
+ *values++ = port->tx_hw_csummed;
+
+- } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start));
++ } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start));
+ }
+
+ static int gmac_get_ksettings(struct net_device *netdev,
+diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
+index 36ab4cbf2ad08..b9d967e419387 100644
+--- a/drivers/net/ethernet/dec/tulip/de4x5.c
++++ b/drivers/net/ethernet/dec/tulip/de4x5.c
+@@ -4708,6 +4708,10 @@ type3_infoblock(struct net_device *dev, u_char count, u_char *p)
+ lp->ibn = 3;
+ lp->active = *p++;
+ if (MOTO_SROM_BUG) lp->active = 0;
++ /* if (MOTO_SROM_BUG) statement indicates lp->active could
++ * be 8 (i.e. the size of array lp->phy) */
++ if (WARN_ON(lp->active >= ARRAY_SIZE(lp->phy)))
++ return -EINVAL;
+ lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1);
+ lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1);
+ lp->phy[lp->active].mc = get_unaligned_le16(p); p += 2;
+@@ -4999,19 +5003,23 @@ mii_get_phy(struct net_device *dev)
+ }
+ if ((j == limit) && (i < DE4X5_MAX_MII)) {
+ for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++);
+- lp->phy[k].addr = i;
+- lp->phy[k].id = id;
+- lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */
+- lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */
+- lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */
+- lp->mii_cnt++;
+- lp->active++;
+- printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name);
+- j = de4x5_debug;
+- de4x5_debug |= DEBUG_MII;
+- de4x5_dbg_mii(dev, k);
+- de4x5_debug = j;
+- printk("\n");
++ if (k < DE4X5_MAX_PHY) {
++ lp->phy[k].addr = i;
++ lp->phy[k].id = id;
++ lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */
++ lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */
++ lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */
++ lp->mii_cnt++;
++ lp->active++;
++ printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name);
++ j = de4x5_debug;
++ de4x5_debug |= DEBUG_MII;
++ de4x5_dbg_mii(dev, k);
++ de4x5_debug = j;
++ printk("\n");
++ } else {
++ goto purgatory;
++ }
+ }
+ }
+ purgatory:
+diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
+index fcedd733bacbf..834a3f8c80da0 100644
+--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
++++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
+@@ -1398,8 +1398,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ /* alloc_etherdev ensures aligned and zeroed private structures */
+ dev = alloc_etherdev (sizeof (*tp));
+- if (!dev)
++ if (!dev) {
++ pci_disable_device(pdev);
+ return -ENOMEM;
++ }
+
+ SET_NETDEV_DEV(dev, &pdev->dev);
+ if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) {
+@@ -1778,6 +1780,7 @@ err_out_free_res:
+
+ err_out_free_netdev:
+ free_netdev (dev);
++ pci_disable_device(pdev);
+ return -ENODEV;
+ }
+
+diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
+index 85b99099c6b94..5babcf05bc2f1 100644
+--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
++++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
+@@ -877,7 +877,7 @@ static void init_registers(struct net_device *dev)
+ 8000 16 longwords 0200 2 longwords 2000 32 longwords
+ C000 32 longwords 0400 4 longwords */
+
+-#if defined (__i386__) && !defined(MODULE)
++#if defined (__i386__) && !defined(MODULE) && !defined(CONFIG_UML)
+ /* When not a module we can work around broken '486 PCI boards. */
+ if (boot_cpu_data.x86 <= 4) {
+ i |= 0x4800;
+diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
+index 6c51cf991dad5..14dc2e13bf038 100644
+--- a/drivers/net/ethernet/dnet.c
++++ b/drivers/net/ethernet/dnet.c
+@@ -550,11 +550,11 @@ static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+
+ skb_tx_timestamp(skb);
+
++ spin_unlock_irqrestore(&bp->lock, flags);
++
+ /* free the buffer */
+ dev_kfree_skb(skb);
+
+- spin_unlock_irqrestore(&bp->lock, flags);
+-
+ return NETDEV_TX_OK;
+ }
+
+diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
+index 649c5c429bd7c..1288b5e3d2201 100644
+--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
++++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
+@@ -2287,7 +2287,7 @@ err:
+
+ /* Uses sync mcc */
+ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
+- u8 page_num, u8 *data)
++ u8 page_num, u32 off, u32 len, u8 *data)
+ {
+ struct be_dma_mem cmd;
+ struct be_mcc_wrb *wrb;
+@@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
+ req->port = cpu_to_le32(adapter->hba_port_num);
+ req->page_num = cpu_to_le32(page_num);
+ status = be_mcc_notify_wait(adapter);
+- if (!status) {
++ if (!status && len > 0) {
+ struct be_cmd_resp_port_type *resp = cmd.va;
+
+- memcpy(data, resp->page_data, PAGE_DATA_LEN);
++ memcpy(data, resp->page_data + off, len);
+ }
+ err:
+ mutex_unlock(&adapter->mcc_lock);
+@@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter)
+ int status;
+
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+- page_data);
++ 0, PAGE_DATA_LEN, page_data);
+ if (!status) {
+ switch (adapter->phy.interface_type) {
+ case PHY_TYPE_QSFP:
+@@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter)
+ int status;
+
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+- page_data);
++ 0, PAGE_DATA_LEN, page_data);
+ if (!status) {
+ strlcpy(adapter->phy.vendor_name, page_data +
+ SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1);
+diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
+index c30d6d6f0f3a0..9e17d6a7ab8cd 100644
+--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
++++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
+@@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
+ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num,
+ u32 *state);
+ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
+- u8 page_num, u8 *data);
++ u8 page_num, u32 off, u32 len, u8 *data);
+ int be_cmd_query_cable_type(struct be_adapter *adapter);
+ int be_cmd_query_sfp_info(struct be_adapter *adapter);
+ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
+diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
+index f9955308b93d6..010a0024f3ced 100644
+--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
++++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
+@@ -1342,7 +1342,7 @@ static int be_get_module_info(struct net_device *netdev,
+ return -EOPNOTSUPP;
+
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+- page_data);
++ 0, PAGE_DATA_LEN, page_data);
+ if (!status) {
+ if (!page_data[SFP_PLUS_SFF_8472_COMP]) {
+ modinfo->type = ETH_MODULE_SFF_8079;
+@@ -1360,25 +1360,32 @@ static int be_get_module_eeprom(struct net_device *netdev,
+ {
+ struct be_adapter *adapter = netdev_priv(netdev);
+ int status;
++ u32 begin, end;
+
+ if (!check_privilege(adapter, MAX_PRIVILEGES))
+ return -EOPNOTSUPP;
+
+- status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
+- data);
+- if (status)
+- goto err;
++ begin = eeprom->offset;
++ end = eeprom->offset + eeprom->len;
++
++ if (begin < PAGE_DATA_LEN) {
++ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin,
++ min_t(u32, end, PAGE_DATA_LEN) - begin,
++ data);
++ if (status)
++ goto err;
++
++ data += PAGE_DATA_LEN - begin;
++ begin = PAGE_DATA_LEN;
++ }
+
+- if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) {
+- status = be_cmd_read_port_transceiver_data(adapter,
+- TR_PAGE_A2,
+- data +
+- PAGE_DATA_LEN);
++ if (end > PAGE_DATA_LEN) {
++ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2,
++ begin - PAGE_DATA_LEN,
++ end - begin, data);
+ if (status)
+ goto err;
+ }
+- if (eeprom->offset)
+- memcpy(data, data + eeprom->offset, eeprom->len);
+ err:
+ return be_cmd_status(status);
+ }
+diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
+index 361c1c87c1830..c14a3dbd075cc 100644
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -369,7 +369,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
+ /* Remember currently programmed MAC */
+ ether_addr_copy(adapter->dev_mac, addr->sa_data);
+ done:
+- ether_addr_copy(netdev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(netdev, addr->sa_data);
+ dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
+ return 0;
+ err:
+@@ -1136,10 +1136,11 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
+ eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
+ VLAN_ETH_HLEN : ETH_HLEN;
+ if (skb->len <= 60 &&
+- (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
+- is_ipv4_pkt(skb)) {
++ (lancer_chip(adapter) || BE3_chip(adapter) ||
++ skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
+ ip = (struct iphdr *)ip_hdr(skb);
+- pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
++ if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
++ goto tx_drop;
+ }
+
+ /* If vlan tag is already inlined in the packet, skip HW VLAN
+diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
+index ed1ed48e74838..7f456297fc458 100644
+--- a/drivers/net/ethernet/ethoc.c
++++ b/drivers/net/ethernet/ethoc.c
+@@ -1148,10 +1148,10 @@ static int ethoc_probe(struct platform_device *pdev)
+
+ /* Allow the platform setup code to pass in a MAC address. */
+ if (pdata) {
+- ether_addr_copy(netdev->dev_addr, pdata->hwaddr);
++ eth_hw_addr_set(netdev, pdata->hwaddr);
+ priv->phy_id = pdata->phy_id;
+ } else {
+- of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
++ of_get_ethdev_address(pdev->dev.of_node, netdev);
+ priv->phy_id = -1;
+ }
+
+diff --git a/drivers/net/ethernet/ezchip/Kconfig b/drivers/net/ethernet/ezchip/Kconfig
+index 38aa824efb25d..9241b9b1c7a36 100644
+--- a/drivers/net/ethernet/ezchip/Kconfig
++++ b/drivers/net/ethernet/ezchip/Kconfig
+@@ -18,7 +18,7 @@ if NET_VENDOR_EZCHIP
+
+ config EZCHIP_NPS_MANAGEMENT_ENET
+ tristate "EZchip NPS management enet support"
+- depends on OF_IRQ && OF_NET
++ depends on OF_IRQ
+ depends on HAS_IOMEM
+ help
+ Simple LAN device for debug or management purposes.
+diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
+index f9a288a6ec8cc..323340826dabd 100644
+--- a/drivers/net/ethernet/ezchip/nps_enet.c
++++ b/drivers/net/ethernet/ezchip/nps_enet.c
+@@ -421,7 +421,7 @@ static s32 nps_enet_set_mac_address(struct net_device *ndev, void *p)
+
+ res = eth_mac_addr(ndev, p);
+ if (!res) {
+- ether_addr_copy(ndev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(ndev, addr->sa_data);
+ nps_enet_set_hw_mac_address(ndev);
+ }
+
+@@ -601,7 +601,7 @@ static s32 nps_enet_probe(struct platform_device *pdev)
+ dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs_base);
+
+ /* set kernel MAC address to dev */
+- err = of_get_mac_address(dev->of_node, ndev->dev_addr);
++ err = of_get_ethdev_address(dev->of_node, ndev);
+ if (err)
+ eth_hw_addr_random(ndev);
+
+diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
+index ff76e401a014b..11f76e56d0316 100644
+--- a/drivers/net/ethernet/faraday/ftgmac100.c
++++ b/drivers/net/ethernet/faraday/ftgmac100.c
+@@ -186,7 +186,7 @@ static void ftgmac100_initial_mac(struct ftgmac100 *priv)
+
+ addr = device_get_mac_address(priv->dev, mac, ETH_ALEN);
+ if (addr) {
+- ether_addr_copy(priv->netdev->dev_addr, mac);
++ eth_hw_addr_set(priv->netdev, mac);
+ dev_info(priv->dev, "Read MAC address %pM from device tree\n",
+ mac);
+ return;
+@@ -203,7 +203,7 @@ static void ftgmac100_initial_mac(struct ftgmac100 *priv)
+ mac[5] = l & 0xff;
+
+ if (is_valid_ether_addr(mac)) {
+- ether_addr_copy(priv->netdev->dev_addr, mac);
++ eth_hw_addr_set(priv->netdev, mac);
+ dev_info(priv->dev, "Read MAC address %pM from chip\n", mac);
+ } else {
+ eth_hw_addr_random(priv->netdev);
+@@ -1746,6 +1746,19 @@ cleanup_clk:
+ return rc;
+ }
+
++static bool ftgmac100_has_child_node(struct device_node *np, const char *name)
++{
++ struct device_node *child_np = of_get_child_by_name(np, name);
++ bool ret = false;
++
++ if (child_np) {
++ ret = true;
++ of_node_put(child_np);
++ }
++
++ return ret;
++}
++
+ static int ftgmac100_probe(struct platform_device *pdev)
+ {
+ struct resource *res;
+@@ -1817,11 +1830,6 @@ static int ftgmac100_probe(struct platform_device *pdev)
+ priv->rxdes0_edorr_mask = BIT(30);
+ priv->txdes0_edotr_mask = BIT(30);
+ priv->is_aspeed = true;
+- /* Disable ast2600 problematic HW arbitration */
+- if (of_device_is_compatible(np, "aspeed,ast2600-mac")) {
+- iowrite32(FTGMAC100_TM_DEFAULT,
+- priv->base + FTGMAC100_OFFSET_TM);
+- }
+ } else {
+ priv->rxdes0_edorr_mask = BIT(15);
+ priv->txdes0_edotr_mask = BIT(15);
+@@ -1870,7 +1878,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
+
+ /* Display what we found */
+ phy_attached_info(phy);
+- } else if (np && !of_get_child_by_name(np, "mdio")) {
++ } else if (np && !ftgmac100_has_child_node(np, "mdio")) {
+ /* Support legacy ASPEED devicetree descriptions that decribe a
+ * MAC with an embedded MDIO controller but have no "mdio"
+ * child node. Automatically scan the MDIO bus for available
+@@ -1893,6 +1901,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
+ err = ftgmac100_setup_clk(priv);
+ if (err)
+ goto err_phy_connect;
++
++ /* Disable ast2600 problematic HW arbitration */
++ if (of_device_is_compatible(np, "aspeed,ast2600-mac"))
++ iowrite32(FTGMAC100_TM_DEFAULT,
++ priv->base + FTGMAC100_OFFSET_TM);
+ }
+
+ /* Default ring sizes */
+@@ -1910,6 +1923,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
+ /* AST2400 doesn't have working HW checksum generation */
+ if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
+ netdev->hw_features &= ~NETIF_F_HW_CSUM;
++
++ /* AST2600 tx checksum with NCSI is broken */
++ if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac"))
++ netdev->hw_features &= ~NETIF_F_HW_CSUM;
++
+ if (np && of_get_property(np, "no-hw-checksum", NULL))
+ netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM);
+ netdev->features |= netdev->hw_features;
+diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
+index 25c91b3c5fd30..819266d463b07 100644
+--- a/drivers/net/ethernet/fealnx.c
++++ b/drivers/net/ethernet/fealnx.c
+@@ -857,7 +857,7 @@ static int netdev_open(struct net_device *dev)
+ np->bcrvalue |= 0x04; /* big-endian */
+ #endif
+
+-#if defined(__i386__) && !defined(MODULE)
++#if defined(__i386__) && !defined(MODULE) && !defined(CONFIG_UML)
+ if (boot_cpu_data.x86 <= 4)
+ np->crvalue = 0xa00;
+ else
+diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+index 685d2d8a3b366..fe5fc2b3406f9 100644
+--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
++++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+@@ -2395,6 +2395,9 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget)
+
+ cleaned = qman_p_poll_dqrr(np->p, budget);
+
++ if (np->xdp_act & XDP_REDIRECT)
++ xdp_do_flush();
++
+ if (cleaned < budget) {
+ napi_complete_done(napi, cleaned);
+ qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+@@ -2402,9 +2405,6 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget)
+ qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+ }
+
+- if (np->xdp_act & XDP_REDIRECT)
+- xdp_do_flush();
+-
+ return cleaned;
+ }
+
+diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+index 763d2c7b5fb1a..5750f9a56393a 100644
+--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
++++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+@@ -489,11 +489,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev,
+ info->phc_index = -1;
+
+ fman_node = of_get_parent(mac_node);
+- if (fman_node)
++ if (fman_node) {
+ ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0);
++ of_node_put(fman_node);
++ }
+
+- if (ptp_node)
++ if (ptp_node) {
+ ptp_dev = of_find_device_by_node(ptp_node);
++ of_node_put(ptp_node);
++ }
+
+ if (ptp_dev)
+ ptp = platform_get_drvdata(ptp_dev);
+diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+index 7065c71ed7b86..c48d410936517 100644
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+@@ -1389,8 +1389,8 @@ static int dpaa2_eth_add_bufs(struct dpaa2_eth_priv *priv,
+ buf_array[i] = addr;
+
+ /* tracing point */
+- trace_dpaa2_eth_buf_seed(priv->net_dev,
+- page, DPAA2_ETH_RX_BUF_RAW_SIZE,
++ trace_dpaa2_eth_buf_seed(priv->net_dev, page_address(page),
++ DPAA2_ETH_RX_BUF_RAW_SIZE,
+ addr, priv->rx_buf_size,
+ bpid);
+ }
+@@ -1597,10 +1597,15 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
+ if (rx_cleaned >= budget ||
+ txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI) {
+ work_done = budget;
++ if (ch->xdp.res & XDP_REDIRECT)
++ xdp_do_flush();
+ goto out;
+ }
+ } while (store_cleaned);
+
++ if (ch->xdp.res & XDP_REDIRECT)
++ xdp_do_flush();
++
+ /* We didn't consume the entire budget, so finish napi and
+ * re-enable data availability notifications
+ */
+@@ -1625,9 +1630,7 @@ out:
+ txc_fq->dq_bytes = 0;
+ }
+
+- if (ch->xdp.res & XDP_REDIRECT)
+- xdp_do_flush_map();
+- else if (rx_cleaned && ch->xdp.res & XDP_TX)
++ if (rx_cleaned && ch->xdp.res & XDP_TX)
+ dpaa2_eth_xdp_tx_flush(priv, ch, &priv->fq[flowid]);
+
+ return work_done;
+@@ -4329,7 +4332,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
+ }
+
+ INIT_WORK(&priv->tx_onestep_tstamp, dpaa2_eth_tx_onestep_tstamp);
+-
++ mutex_init(&priv->onestep_tstamp_lock);
+ skb_queue_head_init(&priv->tx_skbs);
+
+ priv->rx_copybreak = DPAA2_ETH_DEFAULT_COPYBREAK;
+@@ -4511,12 +4514,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
+ #ifdef CONFIG_DEBUG_FS
+ dpaa2_dbg_remove(priv);
+ #endif
++
++ unregister_netdev(net_dev);
+ rtnl_lock();
+ dpaa2_eth_disconnect_mac(priv);
+ rtnl_unlock();
+
+- unregister_netdev(net_dev);
+-
+ dpaa2_eth_dl_port_del(priv);
+ dpaa2_eth_dl_traps_unregister(priv);
+ dpaa2_eth_dl_unregister(priv);
+@@ -4538,10 +4541,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
+
+ fsl_mc_portal_free(priv->mc_io);
+
+- free_netdev(net_dev);
++ destroy_workqueue(priv->dpaa2_ptp_wq);
+
+ dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name);
+
++ free_netdev(net_dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+index 32b5faa87bb8d..208a3459f2e29 100644
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+@@ -168,7 +168,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
+ base = of_iomap(node, 0);
+ if (!base) {
+ err = -ENOMEM;
+- goto err_close;
++ goto err_put;
+ }
+
+ err = fsl_mc_allocate_irqs(mc_dev);
+@@ -212,6 +212,8 @@ err_free_mc_irq:
+ fsl_mc_free_irqs(mc_dev);
+ err_unmap:
+ iounmap(base);
++err_put:
++ of_node_put(node);
+ err_close:
+ dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
+ err_free_mcp:
+diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+index d6eefbbf163fa..c39b866e2582d 100644
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+@@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
+ dev_err(dev, "DMA mapping failed\n");
++ kfree(cmd_buff);
+ return -EFAULT;
+ }
+
+@@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
+ DMA_TO_DEVICE);
+ if (err) {
+ dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err);
++ kfree(cmd_buff);
+ return err;
+ }
+
+@@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
+ dev_err(dev, "DMA mapping failed\n");
++ kfree(cmd_buff);
+ return -EFAULT;
+ }
+
+@@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
+ DMA_TO_DEVICE);
+ if (err) {
+ dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err);
++ kfree(cmd_buff);
+ return err;
+ }
+
+@@ -532,6 +536,7 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls,
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ struct netlink_ext_ack *extack = cls->common.extack;
++ int ret = -EOPNOTSUPP;
+
+ if (dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+@@ -561,9 +566,10 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls,
+ }
+
+ *vlan = (u16)match.key->vlan_id;
++ ret = 0;
+ }
+
+- return 0;
++ return ret;
+ }
+
+ static int
+diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
+index a139f2e9d59f0..e0e8dfd137930 100644
+--- a/drivers/net/ethernet/freescale/enetc/Makefile
++++ b/drivers/net/ethernet/freescale/enetc/Makefile
+@@ -9,7 +9,6 @@ fsl-enetc-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
+
+ obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o
+ fsl-enetc-vf-y := enetc_vf.o $(common-objs)
+-fsl-enetc-vf-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
+
+ obj-$(CONFIG_FSL_ENETC_IERB) += fsl-enetc-ierb.o
+ fsl-enetc-ierb-y := enetc_ierb.o
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
+index 042327b9981fa..e16bd2b7692f3 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc.c
+@@ -940,7 +940,13 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
+ if (!skb)
+ break;
+
+- rx_byte_cnt += skb->len;
++ /* When set, the outer VLAN header is extracted and reported
++ * in the receive buffer descriptor. So rx_byte_cnt should
++ * add the length of the extracted VLAN header.
++ */
++ if (bd_status & ENETC_RXBD_FLAG_VLAN)
++ rx_byte_cnt += VLAN_HLEN;
++ rx_byte_cnt += skb->len + ETH_HLEN;
+ rx_frm_cnt++;
+
+ napi_gro_receive(napi, skb);
+@@ -1220,23 +1226,6 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
+ rx_ring->stats.xdp_drops++;
+ }
+
+-static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first,
+- int rx_ring_last)
+-{
+- while (rx_ring_first != rx_ring_last) {
+- struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
+-
+- if (rx_swbd->page) {
+- dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
+- rx_swbd->dir);
+- __free_page(rx_swbd->page);
+- rx_swbd->page = NULL;
+- }
+- enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
+- }
+- rx_ring->stats.xdp_redirect_failures++;
+-}
+-
+ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+ struct napi_struct *napi, int work_limit,
+ struct bpf_prog *prog)
+@@ -1258,8 +1247,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+ int orig_i, orig_cleaned_cnt;
+ struct xdp_buff xdp_buff;
+ struct sk_buff *skb;
+- int tmp_orig_i, err;
+ u32 bd_status;
++ int err;
+
+ rxbd = enetc_rxbd(rx_ring, i);
+ bd_status = le32_to_cpu(rxbd->r.lstatus);
+@@ -1346,18 +1335,16 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+ break;
+ }
+
+- tmp_orig_i = orig_i;
+-
+- while (orig_i != i) {
+- enetc_flip_rx_buff(rx_ring,
+- &rx_ring->rx_swbd[orig_i]);
+- enetc_bdr_idx_inc(rx_ring, &orig_i);
+- }
+-
+ err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog);
+ if (unlikely(err)) {
+- enetc_xdp_free(rx_ring, tmp_orig_i, i);
++ enetc_xdp_drop(rx_ring, orig_i, i);
++ rx_ring->stats.xdp_redirect_failures++;
+ } else {
++ while (orig_i != i) {
++ enetc_flip_rx_buff(rx_ring,
++ &rx_ring->rx_swbd[orig_i]);
++ enetc_bdr_idx_inc(rx_ring, &orig_i);
++ }
+ xdp_redirect_frm_cnt++;
+ rx_ring->stats.xdp_redirect++;
+ }
+@@ -1768,7 +1755,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+ /* enable Tx ints by setting pkt thr to 1 */
+ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1);
+
+- tbmr = ENETC_TBMR_EN;
++ tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio);
+ if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX)
+ tbmr |= ENETC_TBMR_VIH;
+
+@@ -1800,7 +1787,12 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
+ else
+ enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE);
+
++ /* Also prepare the consumer index in case page allocation never
++ * succeeds. In that case, hardware will never advance producer index
++ * to match consumer index, and will drop all frames.
++ */
+ enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0);
++ enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, 1);
+
+ /* enable Rx ints by setting pkt thr to 1 */
+ enetc_rxbdr_wr(hw, idx, ENETC_RBICR0, ENETC_RBICR0_ICEN | 0x1);
+@@ -1826,13 +1818,14 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
+
+ static void enetc_setup_bdrs(struct enetc_ndev_priv *priv)
+ {
++ struct enetc_hw *hw = &priv->si->hw;
+ int i;
+
+ for (i = 0; i < priv->num_tx_rings; i++)
+- enetc_setup_txbdr(&priv->si->hw, priv->tx_ring[i]);
++ enetc_setup_txbdr(hw, priv->tx_ring[i]);
+
+ for (i = 0; i < priv->num_rx_rings; i++)
+- enetc_setup_rxbdr(&priv->si->hw, priv->rx_ring[i]);
++ enetc_setup_rxbdr(hw, priv->rx_ring[i]);
+ }
+
+ static void enetc_clear_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
+@@ -1865,13 +1858,14 @@ static void enetc_clear_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+
+ static void enetc_clear_bdrs(struct enetc_ndev_priv *priv)
+ {
++ struct enetc_hw *hw = &priv->si->hw;
+ int i;
+
+ for (i = 0; i < priv->num_tx_rings; i++)
+- enetc_clear_txbdr(&priv->si->hw, priv->tx_ring[i]);
++ enetc_clear_txbdr(hw, priv->tx_ring[i]);
+
+ for (i = 0; i < priv->num_rx_rings; i++)
+- enetc_clear_rxbdr(&priv->si->hw, priv->rx_ring[i]);
++ enetc_clear_rxbdr(hw, priv->rx_ring[i]);
+
+ udelay(1);
+ }
+@@ -1879,13 +1873,13 @@ static void enetc_clear_bdrs(struct enetc_ndev_priv *priv)
+ static int enetc_setup_irqs(struct enetc_ndev_priv *priv)
+ {
+ struct pci_dev *pdev = priv->si->pdev;
++ struct enetc_hw *hw = &priv->si->hw;
+ int i, j, err;
+
+ for (i = 0; i < priv->bdr_int_num; i++) {
+ int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i);
+ struct enetc_int_vector *v = priv->int_vector[i];
+ int entry = ENETC_BDR_INT_BASE_IDX + i;
+- struct enetc_hw *hw = &priv->si->hw;
+
+ snprintf(v->name, sizeof(v->name), "%s-rxtx%d",
+ priv->ndev->name, i);
+@@ -1973,13 +1967,14 @@ static void enetc_setup_interrupts(struct enetc_ndev_priv *priv)
+
+ static void enetc_clear_interrupts(struct enetc_ndev_priv *priv)
+ {
++ struct enetc_hw *hw = &priv->si->hw;
+ int i;
+
+ for (i = 0; i < priv->num_tx_rings; i++)
+- enetc_txbdr_wr(&priv->si->hw, i, ENETC_TBIER, 0);
++ enetc_txbdr_wr(hw, i, ENETC_TBIER, 0);
+
+ for (i = 0; i < priv->num_rx_rings; i++)
+- enetc_rxbdr_wr(&priv->si->hw, i, ENETC_RBIER, 0);
++ enetc_rxbdr_wr(hw, i, ENETC_RBIER, 0);
+ }
+
+ static int enetc_phylink_connect(struct net_device *ndev)
+@@ -2011,14 +2006,14 @@ static void enetc_tx_onestep_tstamp(struct work_struct *work)
+
+ priv = container_of(work, struct enetc_ndev_priv, tx_onestep_tstamp);
+
+- netif_tx_lock(priv->ndev);
++ netif_tx_lock_bh(priv->ndev);
+
+ clear_bit_unlock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags);
+ skb = skb_dequeue(&priv->tx_skbs);
+ if (skb)
+ enetc_start_xmit(skb, priv->ndev);
+
+- netif_tx_unlock(priv->ndev);
++ netif_tx_unlock_bh(priv->ndev);
+ }
+
+ static void enetc_tx_onestep_tstamp_init(struct enetc_ndev_priv *priv)
+@@ -2142,10 +2137,11 @@ int enetc_close(struct net_device *ndev)
+ return 0;
+ }
+
+-static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
++int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
+ {
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct tc_mqprio_qopt *mqprio = type_data;
++ struct enetc_hw *hw = &priv->si->hw;
+ struct enetc_bdr *tx_ring;
+ int num_stack_tx_queues;
+ u8 num_tc;
+@@ -2162,7 +2158,8 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
+ /* Reset all ring priorities to 0 */
+ for (i = 0; i < priv->num_tx_rings; i++) {
+ tx_ring = priv->tx_ring[i];
+- enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0);
++ tx_ring->prio = 0;
++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio);
+ }
+
+ return 0;
+@@ -2181,7 +2178,8 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
+ */
+ for (i = 0; i < num_tc; i++) {
+ tx_ring = priv->tx_ring[i];
+- enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i);
++ tx_ring->prio = i;
++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio);
+ }
+
+ /* Reset the number of netdev queues based on the TC count */
+@@ -2196,25 +2194,6 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
+ return 0;
+ }
+
+-int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+- void *type_data)
+-{
+- switch (type) {
+- case TC_SETUP_QDISC_MQPRIO:
+- return enetc_setup_tc_mqprio(ndev, type_data);
+- case TC_SETUP_QDISC_TAPRIO:
+- return enetc_setup_tc_taprio(ndev, type_data);
+- case TC_SETUP_QDISC_CBS:
+- return enetc_setup_tc_cbs(ndev, type_data);
+- case TC_SETUP_QDISC_ETF:
+- return enetc_setup_tc_txtime(ndev, type_data);
+- case TC_SETUP_BLOCK:
+- return enetc_setup_tc_psfp(ndev, type_data);
+- default:
+- return -EOPNOTSUPP;
+- }
+-}
+-
+ static int enetc_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+ {
+@@ -2307,52 +2286,29 @@ static int enetc_set_rss(struct net_device *ndev, int en)
+ return 0;
+ }
+
+-static int enetc_set_psfp(struct net_device *ndev, int en)
+-{
+- struct enetc_ndev_priv *priv = netdev_priv(ndev);
+- int err;
+-
+- if (en) {
+- err = enetc_psfp_enable(priv);
+- if (err)
+- return err;
+-
+- priv->active_offloads |= ENETC_F_QCI;
+- return 0;
+- }
+-
+- err = enetc_psfp_disable(priv);
+- if (err)
+- return err;
+-
+- priv->active_offloads &= ~ENETC_F_QCI;
+-
+- return 0;
+-}
+-
+ static void enetc_enable_rxvlan(struct net_device *ndev, bool en)
+ {
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
++ struct enetc_hw *hw = &priv->si->hw;
+ int i;
+
+ for (i = 0; i < priv->num_rx_rings; i++)
+- enetc_bdr_enable_rxvlan(&priv->si->hw, i, en);
++ enetc_bdr_enable_rxvlan(hw, i, en);
+ }
+
+ static void enetc_enable_txvlan(struct net_device *ndev, bool en)
+ {
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
++ struct enetc_hw *hw = &priv->si->hw;
+ int i;
+
+ for (i = 0; i < priv->num_tx_rings; i++)
+- enetc_bdr_enable_txvlan(&priv->si->hw, i, en);
++ enetc_bdr_enable_txvlan(hw, i, en);
+ }
+
+-int enetc_set_features(struct net_device *ndev,
+- netdev_features_t features)
++void enetc_set_features(struct net_device *ndev, netdev_features_t features)
+ {
+ netdev_features_t changed = ndev->features ^ features;
+- int err = 0;
+
+ if (changed & NETIF_F_RXHASH)
+ enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH));
+@@ -2364,11 +2320,6 @@ int enetc_set_features(struct net_device *ndev,
+ if (changed & NETIF_F_HW_VLAN_CTAG_TX)
+ enetc_enable_txvlan(ndev,
+ !!(features & NETIF_F_HW_VLAN_CTAG_TX));
+-
+- if (changed & NETIF_F_HW_TC)
+- err = enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC));
+-
+- return err;
+ }
+
+ #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
+index 08b283347d9ce..a3b936375c561 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc.h
++++ b/drivers/net/ethernet/freescale/enetc/enetc.h
+@@ -91,6 +91,7 @@ struct enetc_bdr {
+ void __iomem *rcir;
+ };
+ u16 index;
++ u16 prio;
+ int bd_count; /* # of BDs */
+ int next_to_use;
+ int next_to_clean;
+@@ -385,11 +386,9 @@ void enetc_start(struct net_device *ndev);
+ void enetc_stop(struct net_device *ndev);
+ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev);
+ struct net_device_stats *enetc_get_stats(struct net_device *ndev);
+-int enetc_set_features(struct net_device *ndev,
+- netdev_features_t features);
++void enetc_set_features(struct net_device *ndev, netdev_features_t features);
+ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
+-int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+- void *type_data);
++int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data);
+ int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
+ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
+ struct xdp_frame **frames, u32 flags);
+@@ -421,22 +420,24 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data);
+ int enetc_psfp_init(struct enetc_ndev_priv *priv);
+ int enetc_psfp_clean(struct enetc_ndev_priv *priv);
++int enetc_set_psfp(struct net_device *ndev, bool en);
+
+ static inline void enetc_get_max_cap(struct enetc_ndev_priv *priv)
+ {
++ struct enetc_hw *hw = &priv->si->hw;
+ u32 reg;
+
+- reg = enetc_port_rd(&priv->si->hw, ENETC_PSIDCAPR);
++ reg = enetc_port_rd(hw, ENETC_PSIDCAPR);
+ priv->psfp_cap.max_streamid = reg & ENETC_PSIDCAPR_MSK;
+ /* Port stream filter capability */
+- reg = enetc_port_rd(&priv->si->hw, ENETC_PSFCAPR);
++ reg = enetc_port_rd(hw, ENETC_PSFCAPR);
+ priv->psfp_cap.max_psfp_filter = reg & ENETC_PSFCAPR_MSK;
+ /* Port stream gate capability */
+- reg = enetc_port_rd(&priv->si->hw, ENETC_PSGCAPR);
++ reg = enetc_port_rd(hw, ENETC_PSGCAPR);
+ priv->psfp_cap.max_psfp_gate = (reg & ENETC_PSGCAPR_SGIT_MSK);
+ priv->psfp_cap.max_psfp_gatelist = (reg & ENETC_PSGCAPR_GCL_MSK) >> 16;
+ /* Port flow meter capability */
+- reg = enetc_port_rd(&priv->si->hw, ENETC_PFMCAPR);
++ reg = enetc_port_rd(hw, ENETC_PFMCAPR);
+ priv->psfp_cap.max_psfp_meter = reg & ENETC_PFMCAPR_MSK;
+ }
+
+@@ -496,4 +497,9 @@ static inline int enetc_psfp_disable(struct enetc_ndev_priv *priv)
+ {
+ return 0;
+ }
++
++static inline int enetc_set_psfp(struct net_device *ndev, bool en)
++{
++ return 0;
++}
+ #endif
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+index 910b9f722504a..d62c188c87480 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+@@ -672,7 +672,10 @@ static int enetc_get_ts_info(struct net_device *ndev,
+ #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+- SOF_TIMESTAMPING_RAW_HARDWARE;
++ SOF_TIMESTAMPING_RAW_HARDWARE |
++ SOF_TIMESTAMPING_TX_SOFTWARE |
++ SOF_TIMESTAMPING_RX_SOFTWARE |
++ SOF_TIMESTAMPING_SOFTWARE;
+
+ info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON) |
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+index d522bd5c90b49..5efb079ef25fa 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+@@ -708,6 +708,13 @@ static int enetc_pf_set_features(struct net_device *ndev,
+ {
+ netdev_features_t changed = ndev->features ^ features;
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
++ int err;
++
++ if (changed & NETIF_F_HW_TC) {
++ err = enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC));
++ if (err)
++ return err;
++ }
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ struct enetc_pf *pf = enetc_si_priv(priv->si);
+@@ -721,7 +728,28 @@ static int enetc_pf_set_features(struct net_device *ndev,
+ if (changed & NETIF_F_LOOPBACK)
+ enetc_set_loopback(ndev, !!(features & NETIF_F_LOOPBACK));
+
+- return enetc_set_features(ndev, features);
++ enetc_set_features(ndev, features);
++
++ return 0;
++}
++
++static int enetc_pf_setup_tc(struct net_device *ndev, enum tc_setup_type type,
++ void *type_data)
++{
++ switch (type) {
++ case TC_SETUP_QDISC_MQPRIO:
++ return enetc_setup_tc_mqprio(ndev, type_data);
++ case TC_SETUP_QDISC_TAPRIO:
++ return enetc_setup_tc_taprio(ndev, type_data);
++ case TC_SETUP_QDISC_CBS:
++ return enetc_setup_tc_cbs(ndev, type_data);
++ case TC_SETUP_QDISC_ETF:
++ return enetc_setup_tc_txtime(ndev, type_data);
++ case TC_SETUP_BLOCK:
++ return enetc_setup_tc_psfp(ndev, type_data);
++ default:
++ return -EOPNOTSUPP;
++ }
+ }
+
+ static const struct net_device_ops enetc_ndev_ops = {
+@@ -738,7 +766,7 @@ static const struct net_device_ops enetc_ndev_ops = {
+ .ndo_set_vf_spoofchk = enetc_pf_set_vf_spoofchk,
+ .ndo_set_features = enetc_pf_set_features,
+ .ndo_eth_ioctl = enetc_ioctl,
+- .ndo_setup_tc = enetc_setup_tc,
++ .ndo_setup_tc = enetc_pf_setup_tc,
+ .ndo_bpf = enetc_setup_bpf,
+ .ndo_xdp_xmit = enetc_xdp_xmit,
+ };
+@@ -772,9 +800,6 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
+
+ ndev->priv_flags |= IFF_UNICAST_FLT;
+
+- if (si->hw_features & ENETC_SI_F_QBV)
+- priv->active_offloads |= ENETC_F_QBV;
+-
+ if (si->hw_features & ENETC_SI_F_PSFP && !enetc_psfp_enable(priv)) {
+ priv->active_offloads |= ENETC_F_QCI;
+ ndev->features |= NETIF_F_HW_TC;
+@@ -1025,7 +1050,8 @@ static void enetc_pl_mac_link_up(struct phylink_config *config,
+ int idx;
+
+ priv = netdev_priv(pf->si->ndev);
+- if (priv->active_offloads & ENETC_F_QBV)
++
++ if (pf->si->hw_features & ENETC_SI_F_QBV)
+ enetc_sched_speed_set(priv, speed);
+
+ if (!phylink_autoneg_inband(mode) &&
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
+index bc594892507ac..8c36615256944 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
+@@ -8,7 +8,7 @@
+ #include "enetc.h"
+
+ int enetc_phc_index = -1;
+-EXPORT_SYMBOL(enetc_phc_index);
++EXPORT_SYMBOL_GPL(enetc_phc_index);
+
+ static struct ptp_clock_info enetc_ptp_caps = {
+ .owner = THIS_MODULE,
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+index 4577226d3c6ad..c348b6fb0e6f9 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+@@ -17,8 +17,9 @@ static u16 enetc_get_max_gcl_len(struct enetc_hw *hw)
+
+ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed)
+ {
++ struct enetc_hw *hw = &priv->si->hw;
+ u32 old_speed = priv->speed;
+- u32 pspeed;
++ u32 pspeed, tmp;
+
+ if (speed == old_speed)
+ return;
+@@ -39,36 +40,38 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed)
+ }
+
+ priv->speed = speed;
+- enetc_port_wr(&priv->si->hw, ENETC_PMR,
+- (enetc_port_rd(&priv->si->hw, ENETC_PMR)
+- & (~ENETC_PMR_PSPEED_MASK))
+- | pspeed);
++ tmp = enetc_port_rd(hw, ENETC_PMR);
++ enetc_port_wr(hw, ENETC_PMR, (tmp & ~ENETC_PMR_PSPEED_MASK) | pspeed);
+ }
+
++#define ENETC_QOS_ALIGN 64
+ static int enetc_setup_taprio(struct net_device *ndev,
+ struct tc_taprio_qopt_offload *admin_conf)
+ {
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
++ struct enetc_hw *hw = &priv->si->hw;
+ struct enetc_cbd cbd = {.cmd = 0};
+ struct tgs_gcl_conf *gcl_config;
+ struct tgs_gcl_data *gcl_data;
++ dma_addr_t dma, dma_align;
+ struct gce *gce;
+- dma_addr_t dma;
+ u16 data_size;
+ u16 gcl_len;
++ void *tmp;
+ u32 tge;
+ int err;
+ int i;
+
+- if (admin_conf->num_entries > enetc_get_max_gcl_len(&priv->si->hw))
++ if (admin_conf->num_entries > enetc_get_max_gcl_len(hw))
+ return -EINVAL;
+ gcl_len = admin_conf->num_entries;
+
+- tge = enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET);
++ tge = enetc_rd(hw, ENETC_QBV_PTGCR_OFFSET);
+ if (!admin_conf->enable) {
+- enetc_wr(&priv->si->hw,
+- ENETC_QBV_PTGCR_OFFSET,
+- tge & (~ENETC_QBV_TGE));
++ enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge & ~ENETC_QBV_TGE);
++
++ priv->active_offloads &= ~ENETC_F_QBV;
++
+ return 0;
+ }
+
+@@ -82,9 +85,16 @@ static int enetc_setup_taprio(struct net_device *ndev,
+ gcl_config = &cbd.gcl_conf;
+
+ data_size = struct_size(gcl_data, entry, gcl_len);
+- gcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
+- if (!gcl_data)
++ tmp = dma_alloc_coherent(&priv->si->pdev->dev,
++ data_size + ENETC_QOS_ALIGN,
++ &dma, GFP_KERNEL);
++ if (!tmp) {
++ dev_err(&priv->si->pdev->dev,
++ "DMA mapping of taprio gate list failed!\n");
+ return -ENOMEM;
++ }
++ dma_align = ALIGN(dma, ENETC_QOS_ALIGN);
++ gcl_data = (struct tgs_gcl_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN);
+
+ gce = (struct gce *)(gcl_data + 1);
+
+@@ -110,30 +120,22 @@ static int enetc_setup_taprio(struct net_device *ndev,
+ cbd.length = cpu_to_le16(data_size);
+ cbd.status_flags = 0;
+
+- dma = dma_map_single(&priv->si->pdev->dev, gcl_data,
+- data_size, DMA_TO_DEVICE);
+- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
+- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
+- kfree(gcl_data);
+- return -ENOMEM;
+- }
+-
+- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
+- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
++ cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
++ cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+ cbd.cls = BDCR_CMD_PORT_GCL;
+ cbd.status_flags = 0;
+
+- enetc_wr(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET,
+- tge | ENETC_QBV_TGE);
++ enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge | ENETC_QBV_TGE);
+
+ err = enetc_send_cmd(priv->si, &cbd);
+ if (err)
+- enetc_wr(&priv->si->hw,
+- ENETC_QBV_PTGCR_OFFSET,
+- tge & (~ENETC_QBV_TGE));
++ enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge & ~ENETC_QBV_TGE);
+
+- dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE);
+- kfree(gcl_data);
++ dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN,
++ tmp, dma);
++
++ if (!err)
++ priv->active_offloads |= ENETC_F_QBV;
+
+ return err;
+ }
+@@ -142,6 +144,8 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data)
+ {
+ struct tc_taprio_qopt_offload *taprio = type_data;
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
++ struct enetc_hw *hw = &priv->si->hw;
++ struct enetc_bdr *tx_ring;
+ int err;
+ int i;
+
+@@ -150,18 +154,20 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data)
+ if (priv->tx_ring[i]->tsd_enable)
+ return -EBUSY;
+
+- for (i = 0; i < priv->num_tx_rings; i++)
+- enetc_set_bdr_prio(&priv->si->hw,
+- priv->tx_ring[i]->index,
+- taprio->enable ? i : 0);
++ for (i = 0; i < priv->num_tx_rings; i++) {
++ tx_ring = priv->tx_ring[i];
++ tx_ring->prio = taprio->enable ? i : 0;
++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio);
++ }
+
+ err = enetc_setup_taprio(ndev, taprio);
+-
+- if (err)
+- for (i = 0; i < priv->num_tx_rings; i++)
+- enetc_set_bdr_prio(&priv->si->hw,
+- priv->tx_ring[i]->index,
+- taprio->enable ? 0 : i);
++ if (err) {
++ for (i = 0; i < priv->num_tx_rings; i++) {
++ tx_ring = priv->tx_ring[i];
++ tx_ring->prio = taprio->enable ? 0 : i;
++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio);
++ }
++ }
+
+ return err;
+ }
+@@ -182,7 +188,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
+ struct tc_cbs_qopt_offload *cbs = type_data;
+ u32 port_transmit_rate = priv->speed;
+ u8 tc_nums = netdev_get_num_tc(ndev);
+- struct enetc_si *si = priv->si;
++ struct enetc_hw *hw = &priv->si->hw;
+ u32 hi_credit_bit, hi_credit_reg;
+ u32 max_interference_size;
+ u32 port_frame_max_size;
+@@ -191,8 +197,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
+ int bw_sum = 0;
+ u8 bw;
+
+- prio_top = netdev_get_prio_tc_map(ndev, tc_nums - 1);
+- prio_next = netdev_get_prio_tc_map(ndev, tc_nums - 2);
++ prio_top = tc_nums - 1;
++ prio_next = tc_nums - 2;
+
+ /* Support highest prio and second prio tc in cbs mode */
+ if (tc != prio_top && tc != prio_next)
+@@ -203,15 +209,15 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
+ * lower than this TC have been disabled.
+ */
+ if (tc == prio_top &&
+- enetc_get_cbs_enable(&si->hw, prio_next)) {
++ enetc_get_cbs_enable(hw, prio_next)) {
+ dev_err(&ndev->dev,
+ "Disable TC%d before disable TC%d\n",
+ prio_next, tc);
+ return -EINVAL;
+ }
+
+- enetc_port_wr(&si->hw, ENETC_PTCCBSR1(tc), 0);
+- enetc_port_wr(&si->hw, ENETC_PTCCBSR0(tc), 0);
++ enetc_port_wr(hw, ENETC_PTCCBSR1(tc), 0);
++ enetc_port_wr(hw, ENETC_PTCCBSR0(tc), 0);
+
+ return 0;
+ }
+@@ -228,13 +234,13 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
+ * higher than this TC have been enabled.
+ */
+ if (tc == prio_next) {
+- if (!enetc_get_cbs_enable(&si->hw, prio_top)) {
++ if (!enetc_get_cbs_enable(hw, prio_top)) {
+ dev_err(&ndev->dev,
+ "Enable TC%d first before enable TC%d\n",
+ prio_top, prio_next);
+ return -EINVAL;
+ }
+- bw_sum += enetc_get_cbs_bw(&si->hw, prio_top);
++ bw_sum += enetc_get_cbs_bw(hw, prio_top);
+ }
+
+ if (bw_sum + bw >= 100) {
+@@ -243,7 +249,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
+ return -EINVAL;
+ }
+
+- enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc));
++ enetc_port_rd(hw, ENETC_PTCMSDUR(tc));
+
+ /* For top prio TC, the max_interfrence_size is maxSizedFrame.
+ *
+@@ -263,8 +269,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
+ u32 m0, ma, r0, ra;
+
+ m0 = port_frame_max_size * 8;
+- ma = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(prio_top)) * 8;
+- ra = enetc_get_cbs_bw(&si->hw, prio_top) *
++ ma = enetc_port_rd(hw, ENETC_PTCMSDUR(prio_top)) * 8;
++ ra = enetc_get_cbs_bw(hw, prio_top) *
+ port_transmit_rate * 10000ULL;
+ r0 = port_transmit_rate * 1000000ULL;
+ max_interference_size = m0 + ma +
+@@ -284,10 +290,10 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
+ hi_credit_reg = (u32)div_u64((ENETC_CLK * 100ULL) * hi_credit_bit,
+ port_transmit_rate * 1000000ULL);
+
+- enetc_port_wr(&si->hw, ENETC_PTCCBSR1(tc), hi_credit_reg);
++ enetc_port_wr(hw, ENETC_PTCCBSR1(tc), hi_credit_reg);
+
+ /* Set bw register and enable this traffic class */
+- enetc_port_wr(&si->hw, ENETC_PTCCBSR0(tc), bw | ENETC_CBSE);
++ enetc_port_wr(hw, ENETC_PTCCBSR0(tc), bw | ENETC_CBSE);
+
+ return 0;
+ }
+@@ -297,6 +303,7 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data)
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct tc_etf_qopt_offload *qopt = type_data;
+ u8 tc_nums = netdev_get_num_tc(ndev);
++ struct enetc_hw *hw = &priv->si->hw;
+ int tc;
+
+ if (!tc_nums)
+@@ -312,12 +319,11 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data)
+ return -EBUSY;
+
+ /* TSD and Qbv are mutually exclusive in hardware */
+- if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE)
++ if (enetc_rd(hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE)
+ return -EBUSY;
+
+ priv->tx_ring[tc]->tsd_enable = qopt->enable;
+- enetc_port_wr(&priv->si->hw, ENETC_PTCTSDR(tc),
+- qopt->enable ? ENETC_TSDE : 0);
++ enetc_port_wr(hw, ENETC_PTCTSDR(tc), qopt->enable ? ENETC_TSDE : 0);
+
+ return 0;
+ }
+@@ -463,8 +469,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
+ struct enetc_cbd cbd = {.cmd = 0};
+ struct streamid_data *si_data;
+ struct streamid_conf *si_conf;
++ dma_addr_t dma, dma_align;
+ u16 data_size;
+- dma_addr_t dma;
++ void *tmp;
+ int port;
+ int err;
+
+@@ -485,19 +492,20 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
+ cbd.status_flags = 0;
+
+ data_size = sizeof(struct streamid_data);
+- si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
+- cbd.length = cpu_to_le16(data_size);
+-
+- dma = dma_map_single(&priv->si->pdev->dev, si_data,
+- data_size, DMA_FROM_DEVICE);
+- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
+- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
+- kfree(si_data);
++ tmp = dma_alloc_coherent(&priv->si->pdev->dev,
++ data_size + ENETC_QOS_ALIGN,
++ &dma, GFP_KERNEL);
++ if (!tmp) {
++ dev_err(&priv->si->pdev->dev,
++ "DMA mapping of stream identify failed!\n");
+ return -ENOMEM;
+ }
++ dma_align = ALIGN(dma, ENETC_QOS_ALIGN);
++ si_data = (struct streamid_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN);
+
+- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
+- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
++ cbd.length = cpu_to_le16(data_size);
++ cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
++ cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+ eth_broadcast_addr(si_data->dmac);
+ si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK
+ + ((0x3 << 14) | ENETC_CBDR_SID_VIDM));
+@@ -512,12 +520,10 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
+
+ err = enetc_send_cmd(priv->si, &cbd);
+ if (err)
+- return -EINVAL;
++ goto out;
+
+- if (!enable) {
+- kfree(si_data);
+- return 0;
+- }
++ if (!enable)
++ goto out;
+
+ /* Enable the entry overwrite again incase space flushed by hardware */
+ memset(&cbd, 0, sizeof(cbd));
+@@ -539,8 +545,8 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
+
+ cbd.length = cpu_to_le16(data_size);
+
+- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
+- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
++ cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
++ cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+
+ /* VIDM default to be 1.
+ * VID Match. If set (b1) then the VID must match, otherwise
+@@ -560,7 +566,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
+ }
+
+ err = enetc_send_cmd(priv->si, &cbd);
+- kfree(si_data);
++out:
++ dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN,
++ tmp, dma);
+
+ return err;
+ }
+@@ -629,8 +637,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
+ {
+ struct enetc_cbd cbd = { .cmd = 2 };
+ struct sfi_counter_data *data_buf;
+- dma_addr_t dma;
++ dma_addr_t dma, dma_align;
+ u16 data_size;
++ void *tmp;
+ int err;
+
+ cbd.index = cpu_to_le16((u16)index);
+@@ -639,19 +648,19 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
+ cbd.status_flags = 0;
+
+ data_size = sizeof(struct sfi_counter_data);
+- data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
+- if (!data_buf)
++ tmp = dma_alloc_coherent(&priv->si->pdev->dev,
++ data_size + ENETC_QOS_ALIGN,
++ &dma, GFP_KERNEL);
++ if (!tmp) {
++ dev_err(&priv->si->pdev->dev,
++ "DMA mapping of stream counter failed!\n");
+ return -ENOMEM;
+-
+- dma = dma_map_single(&priv->si->pdev->dev, data_buf,
+- data_size, DMA_FROM_DEVICE);
+- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
+- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
+- err = -ENOMEM;
+- goto exit;
+ }
+- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
+- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
++ dma_align = ALIGN(dma, ENETC_QOS_ALIGN);
++ data_buf = (struct sfi_counter_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN);
++
++ cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
++ cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+
+ cbd.length = cpu_to_le16(data_size);
+
+@@ -680,7 +689,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
+ data_buf->flow_meter_dropl;
+
+ exit:
+- kfree(data_buf);
++ dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN,
++ tmp, dma);
++
+ return err;
+ }
+
+@@ -719,9 +730,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
+ struct sgcl_conf *sgcl_config;
+ struct sgcl_data *sgcl_data;
+ struct sgce *sgce;
+- dma_addr_t dma;
++ dma_addr_t dma, dma_align;
+ u16 data_size;
+ int err, i;
++ void *tmp;
+ u64 now;
+
+ cbd.index = cpu_to_le16(sgi->index);
+@@ -768,24 +780,20 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
+ sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3;
+
+ data_size = struct_size(sgcl_data, sgcl, sgi->num_entries);
+-
+- sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
+- if (!sgcl_data)
+- return -ENOMEM;
+-
+- cbd.length = cpu_to_le16(data_size);
+-
+- dma = dma_map_single(&priv->si->pdev->dev,
+- sgcl_data, data_size,
+- DMA_FROM_DEVICE);
+- if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
+- netdev_err(priv->si->ndev, "DMA mapping failed!\n");
+- kfree(sgcl_data);
++ tmp = dma_alloc_coherent(&priv->si->pdev->dev,
++ data_size + ENETC_QOS_ALIGN,
++ &dma, GFP_KERNEL);
++ if (!tmp) {
++ dev_err(&priv->si->pdev->dev,
++ "DMA mapping of stream counter failed!\n");
+ return -ENOMEM;
+ }
++ dma_align = ALIGN(dma, ENETC_QOS_ALIGN);
++ sgcl_data = (struct sgcl_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN);
+
+- cbd.addr[0] = cpu_to_le32(lower_32_bits(dma));
+- cbd.addr[1] = cpu_to_le32(upper_32_bits(dma));
++ cbd.length = cpu_to_le16(data_size);
++ cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align));
++ cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align));
+
+ sgce = &sgcl_data->sgcl[0];
+
+@@ -840,7 +848,8 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
+ err = enetc_send_cmd(priv->si, &cbd);
+
+ exit:
+- kfree(sgcl_data);
++ dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN,
++ tmp, dma);
+
+ return err;
+ }
+@@ -1261,7 +1270,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
+ int index;
+
+ index = enetc_get_free_index(priv);
+- if (sfi->handle < 0) {
++ if (index < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "No Stream Filter resource!");
+ err = -ENOSPC;
+ goto free_fmi;
+@@ -1525,6 +1534,29 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ }
+ }
+
++int enetc_set_psfp(struct net_device *ndev, bool en)
++{
++ struct enetc_ndev_priv *priv = netdev_priv(ndev);
++ int err;
++
++ if (en) {
++ err = enetc_psfp_enable(priv);
++ if (err)
++ return err;
++
++ priv->active_offloads |= ENETC_F_QCI;
++ return 0;
++ }
++
++ err = enetc_psfp_disable(priv);
++ if (err)
++ return err;
++
++ priv->active_offloads &= ~ENETC_F_QCI;
++
++ return 0;
++}
++
+ int enetc_psfp_init(struct enetc_ndev_priv *priv)
+ {
+ if (epsfp.psfp_sfi_bitmap)
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+index 1a9d1e8b772ce..acd4a3167ed6a 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+@@ -88,7 +88,20 @@ static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr)
+ static int enetc_vf_set_features(struct net_device *ndev,
+ netdev_features_t features)
+ {
+- return enetc_set_features(ndev, features);
++ enetc_set_features(ndev, features);
++
++ return 0;
++}
++
++static int enetc_vf_setup_tc(struct net_device *ndev, enum tc_setup_type type,
++ void *type_data)
++{
++ switch (type) {
++ case TC_SETUP_QDISC_MQPRIO:
++ return enetc_setup_tc_mqprio(ndev, type_data);
++ default:
++ return -EOPNOTSUPP;
++ }
+ }
+
+ /* Probing/ Init */
+@@ -100,7 +113,7 @@ static const struct net_device_ops enetc_ndev_ops = {
+ .ndo_set_mac_address = enetc_vf_set_mac_addr,
+ .ndo_set_features = enetc_vf_set_features,
+ .ndo_eth_ioctl = enetc_ioctl,
+- .ndo_setup_tc = enetc_setup_tc,
++ .ndo_setup_tc = enetc_vf_setup_tc,
+ };
+
+ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
+diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
+index 7b4961daa2540..ed7301b691694 100644
+--- a/drivers/net/ethernet/freescale/fec.h
++++ b/drivers/net/ethernet/freescale/fec.h
+@@ -377,6 +377,9 @@ struct bufdesc_ex {
+ #define FEC_ENET_WAKEUP ((uint)0x00020000) /* Wakeup request */
+ #define FEC_ENET_TXF (FEC_ENET_TXF_0 | FEC_ENET_TXF_1 | FEC_ENET_TXF_2)
+ #define FEC_ENET_RXF (FEC_ENET_RXF_0 | FEC_ENET_RXF_1 | FEC_ENET_RXF_2)
++#define FEC_ENET_RXF_GET(X) (((X) == 0) ? FEC_ENET_RXF_0 : \
++ (((X) == 1) ? FEC_ENET_RXF_1 : \
++ FEC_ENET_RXF_2))
+ #define FEC_ENET_TS_AVAIL ((uint)0x00010000)
+ #define FEC_ENET_TS_TIMER ((uint)0x00008000)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index ec87b370bba1f..c0c96de7a9de4 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -72,7 +72,7 @@
+ #include "fec.h"
+
+ static void set_multicast_list(struct net_device *ndev);
+-static void fec_enet_itr_coal_init(struct net_device *ndev);
++static void fec_enet_itr_coal_set(struct net_device *ndev);
+
+ #define DRIVER_NAME "fec"
+
+@@ -656,7 +656,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb,
+ dev_kfree_skb_any(skb);
+ if (net_ratelimit())
+ netdev_err(ndev, "Tx DMA memory map failed\n");
+- return NETDEV_TX_BUSY;
++ return NETDEV_TX_OK;
+ }
+
+ bdp->cbd_datlen = cpu_to_fec16(size);
+@@ -718,7 +718,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq,
+ dev_kfree_skb_any(skb);
+ if (net_ratelimit())
+ netdev_err(ndev, "Tx DMA memory map failed\n");
+- return NETDEV_TX_BUSY;
++ return NETDEV_TX_OK;
+ }
+ }
+
+@@ -1163,8 +1163,8 @@ fec_restart(struct net_device *ndev)
+ writel(0, fep->hwp + FEC_IMASK);
+
+ /* Init the interrupt coalescing */
+- fec_enet_itr_coal_init(ndev);
+-
++ if (fep->quirks & FEC_QUIRK_HAS_COALESCE)
++ fec_enet_itr_coal_set(ndev);
+ }
+
+ static void fec_enet_stop_mode(struct fec_enet_private *fep, bool enabled)
+@@ -1480,7 +1480,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
+ break;
+ pkt_received++;
+
+- writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT);
++ writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT);
+
+ /* Check for errors. */
+ status ^= BD_ENET_RX_LAST;
+@@ -2336,6 +2336,31 @@ static u32 fec_enet_register_offset[] = {
+ IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR,
+ IEEE_R_FDXFC, IEEE_R_OCTETS_OK
+ };
++/* for i.MX6ul */
++static u32 fec_enet_register_offset_6ul[] = {
++ FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
++ FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
++ FEC_X_CNTRL, FEC_ADDR_LOW, FEC_ADDR_HIGH, FEC_OPD, FEC_TXIC0, FEC_RXIC0,
++ FEC_HASH_TABLE_HIGH, FEC_HASH_TABLE_LOW, FEC_GRP_HASH_TABLE_HIGH,
++ FEC_GRP_HASH_TABLE_LOW, FEC_X_WMRK, FEC_R_DES_START_0,
++ FEC_X_DES_START_0, FEC_R_BUFF_SIZE_0, FEC_R_FIFO_RSFL, FEC_R_FIFO_RSEM,
++ FEC_R_FIFO_RAEM, FEC_R_FIFO_RAFL, FEC_RACC,
++ RMON_T_DROP, RMON_T_PACKETS, RMON_T_BC_PKT, RMON_T_MC_PKT,
++ RMON_T_CRC_ALIGN, RMON_T_UNDERSIZE, RMON_T_OVERSIZE, RMON_T_FRAG,
++ RMON_T_JAB, RMON_T_COL, RMON_T_P64, RMON_T_P65TO127, RMON_T_P128TO255,
++ RMON_T_P256TO511, RMON_T_P512TO1023, RMON_T_P1024TO2047,
++ RMON_T_P_GTE2048, RMON_T_OCTETS,
++ IEEE_T_DROP, IEEE_T_FRAME_OK, IEEE_T_1COL, IEEE_T_MCOL, IEEE_T_DEF,
++ IEEE_T_LCOL, IEEE_T_EXCOL, IEEE_T_MACERR, IEEE_T_CSERR, IEEE_T_SQE,
++ IEEE_T_FDXFC, IEEE_T_OCTETS_OK,
++ RMON_R_PACKETS, RMON_R_BC_PKT, RMON_R_MC_PKT, RMON_R_CRC_ALIGN,
++ RMON_R_UNDERSIZE, RMON_R_OVERSIZE, RMON_R_FRAG, RMON_R_JAB,
++ RMON_R_RESVD_O, RMON_R_P64, RMON_R_P65TO127, RMON_R_P128TO255,
++ RMON_R_P256TO511, RMON_R_P512TO1023, RMON_R_P1024TO2047,
++ RMON_R_P_GTE2048, RMON_R_OCTETS,
++ IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR,
++ IEEE_R_FDXFC, IEEE_R_OCTETS_OK
++};
+ #else
+ static __u32 fec_enet_register_version = 1;
+ static u32 fec_enet_register_offset[] = {
+@@ -2360,7 +2385,24 @@ static void fec_enet_get_regs(struct net_device *ndev,
+ u32 *buf = (u32 *)regbuf;
+ u32 i, off;
+ int ret;
++#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
++ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
++ defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST)
++ u32 *reg_list;
++ u32 reg_cnt;
+
++ if (!of_machine_is_compatible("fsl,imx6ul")) {
++ reg_list = fec_enet_register_offset;
++ reg_cnt = ARRAY_SIZE(fec_enet_register_offset);
++ } else {
++ reg_list = fec_enet_register_offset_6ul;
++ reg_cnt = ARRAY_SIZE(fec_enet_register_offset_6ul);
++ }
++#else
++ /* coldfire */
++ static u32 *reg_list = fec_enet_register_offset;
++ static const u32 reg_cnt = ARRAY_SIZE(fec_enet_register_offset);
++#endif
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return;
+@@ -2369,8 +2411,8 @@ static void fec_enet_get_regs(struct net_device *ndev,
+
+ memset(buf, 0, regs->len);
+
+- for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) {
+- off = fec_enet_register_offset[i];
++ for (i = 0; i < reg_cnt; i++) {
++ off = reg_list[i];
+
+ if ((off == FEC_R_BOUND || off == FEC_R_FSTART) &&
+ !(fep->quirks & FEC_QUIRK_HAS_FRREG))
+@@ -2718,19 +2760,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev,
+ return 0;
+ }
+
+-static void fec_enet_itr_coal_init(struct net_device *ndev)
+-{
+- struct ethtool_coalesce ec;
+-
+- ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
+- ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
+-
+- ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
+- ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
+-
+- fec_enet_set_coalesce(ndev, &ec, NULL, NULL);
+-}
+-
+ static int fec_enet_get_tunable(struct net_device *netdev,
+ const struct ethtool_tunable *tuna,
+ void *data)
+@@ -3484,6 +3513,10 @@ static int fec_enet_init(struct net_device *ndev)
+ fep->rx_align = 0x3;
+ fep->tx_align = 0x3;
+ #endif
++ fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT;
++ fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT;
++ fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT;
++ fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT;
+
+ /* Check mask of the streaming and coherent API */
+ ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32));
+@@ -3726,7 +3759,7 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep,
+ ARRAY_SIZE(out_val));
+ if (ret) {
+ dev_dbg(&fep->pdev->dev, "no stop mode property\n");
+- return ret;
++ goto out;
+ }
+
+ fep->stop_gpr.gpr = syscon_node_to_regmap(gpr_np);
+@@ -4021,9 +4054,11 @@ fec_drv_remove(struct platform_device *pdev)
+ struct device_node *np = pdev->dev.of_node;
+ int ret;
+
+- ret = pm_runtime_resume_and_get(&pdev->dev);
++ ret = pm_runtime_get_sync(&pdev->dev);
+ if (ret < 0)
+- return ret;
++ dev_err(&pdev->dev,
++ "Failed to resume device in remove callback (%pe)\n",
++ ERR_PTR(ret));
+
+ cancel_work_sync(&fep->tx_timeout_work);
+ fec_ptp_stop(pdev);
+@@ -4036,8 +4071,13 @@ fec_drv_remove(struct platform_device *pdev)
+ of_phy_deregister_fixed_link(np);
+ of_node_put(fep->phy_node);
+
+- clk_disable_unprepare(fep->clk_ahb);
+- clk_disable_unprepare(fep->clk_ipg);
++ /* After pm_runtime_get_sync() failed, the clks are still off, so skip
++ * disabling them again.
++ */
++ if (ret >= 0) {
++ clk_disable_unprepare(fep->clk_ahb);
++ clk_disable_unprepare(fep->clk_ipg);
++ }
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+
+diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
+index 73ff359a15f1a..2777dd2b4dd78 100644
+--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
++++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
+@@ -890,7 +890,7 @@ static int mpc52xx_fec_probe(struct platform_device *op)
+ *
+ * First try to read MAC address from DT
+ */
+- rv = of_get_mac_address(np, ndev->dev_addr);
++ rv = of_get_ethdev_address(np, ndev);
+ if (rv) {
+ struct mpc52xx_fec __iomem *fec = priv->fec;
+
+diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
+index d71eac7e19249..c5ae673005908 100644
+--- a/drivers/net/ethernet/freescale/fec_ptp.c
++++ b/drivers/net/ethernet/freescale/fec_ptp.c
+@@ -136,11 +136,7 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
+ * NSEC_PER_SEC - ts.tv_nsec. Add the remaining nanoseconds
+ * to current timer would be next second.
+ */
+- tempval = readl(fep->hwp + FEC_ATIME_CTRL);
+- tempval |= FEC_T_CTRL_CAPTURE;
+- writel(tempval, fep->hwp + FEC_ATIME_CTRL);
+-
+- tempval = readl(fep->hwp + FEC_ATIME);
++ tempval = fep->cc.read(&fep->cc);
+ /* Convert the ptp local counter to 1588 timestamp */
+ ns = timecounter_cyc2time(&fep->tc, tempval);
+ ts = ns_to_timespec64(ns);
+diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
+index d9baac0dbc7d0..4c9d05c45c033 100644
+--- a/drivers/net/ethernet/freescale/fman/fman_port.c
++++ b/drivers/net/ethernet/freescale/fman/fman_port.c
+@@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+ fman = dev_get_drvdata(&fm_pdev->dev);
+ if (!fman) {
+ err = -EINVAL;
+- goto return_err;
++ goto put_device;
+ }
+
+ err = of_property_read_u32(port_node, "cell-index", &val);
+@@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+ dev_err(port->dev, "%s: reading cell-index for %pOF failed\n",
+ __func__, port_node);
+ err = -EINVAL;
+- goto return_err;
++ goto put_device;
+ }
+ port_id = (u8)val;
+ port->dts_params.id = port_id;
+@@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+ } else {
+ dev_err(port->dev, "%s: Illegal port type\n", __func__);
+ err = -EINVAL;
+- goto return_err;
++ goto put_device;
+ }
+
+ port->dts_params.type = port_type;
+@@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+ dev_err(port->dev, "%s: incorrect qman-channel-id\n",
+ __func__);
+ err = -EINVAL;
+- goto return_err;
++ goto put_device;
+ }
+ port->dts_params.qman_channel_id = qman_channel_id;
+ }
+@@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+ dev_err(port->dev, "%s: of_address_to_resource() failed\n",
+ __func__);
+ err = -ENOMEM;
+- goto return_err;
++ goto put_device;
+ }
+
+ port->dts_params.fman = fman;
+@@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev)
+
+ return 0;
+
++put_device:
++ put_device(&fm_pdev->dev);
+ return_err:
+ of_node_put(port_node);
+ free_port:
+diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
+index d9fc5c456bf3e..b0c756b65cc2e 100644
+--- a/drivers/net/ethernet/freescale/fman/mac.c
++++ b/drivers/net/ethernet/freescale/fman/mac.c
+@@ -94,14 +94,17 @@ static void mac_exception(void *handle, enum fman_mac_exceptions ex)
+ __func__, ex);
+ }
+
+-static void set_fman_mac_params(struct mac_device *mac_dev,
+- struct fman_mac_params *params)
++static int set_fman_mac_params(struct mac_device *mac_dev,
++ struct fman_mac_params *params)
+ {
+ struct mac_priv_s *priv = mac_dev->priv;
+
+ params->base_addr = (typeof(params->base_addr))
+ devm_ioremap(priv->dev, mac_dev->res->start,
+ resource_size(mac_dev->res));
++ if (!params->base_addr)
++ return -ENOMEM;
++
+ memcpy(&params->addr, mac_dev->addr, sizeof(mac_dev->addr));
+ params->max_speed = priv->max_speed;
+ params->phy_if = mac_dev->phy_if;
+@@ -112,6 +115,8 @@ static void set_fman_mac_params(struct mac_device *mac_dev,
+ params->event_cb = mac_exception;
+ params->dev_id = mac_dev;
+ params->internal_phy_node = priv->internal_phy_node;
++
++ return 0;
+ }
+
+ static int tgec_initialization(struct mac_device *mac_dev)
+@@ -123,7 +128,9 @@ static int tgec_initialization(struct mac_device *mac_dev)
+
+ priv = mac_dev->priv;
+
+- set_fman_mac_params(mac_dev, &params);
++ err = set_fman_mac_params(mac_dev, &params);
++ if (err)
++ goto _return;
+
+ mac_dev->fman_mac = tgec_config(&params);
+ if (!mac_dev->fman_mac) {
+@@ -169,7 +176,9 @@ static int dtsec_initialization(struct mac_device *mac_dev)
+
+ priv = mac_dev->priv;
+
+- set_fman_mac_params(mac_dev, &params);
++ err = set_fman_mac_params(mac_dev, &params);
++ if (err)
++ goto _return;
+
+ mac_dev->fman_mac = dtsec_config(&params);
+ if (!mac_dev->fman_mac) {
+@@ -218,7 +227,9 @@ static int memac_initialization(struct mac_device *mac_dev)
+
+ priv = mac_dev->priv;
+
+- set_fman_mac_params(mac_dev, &params);
++ err = set_fman_mac_params(mac_dev, &params);
++ if (err)
++ goto _return;
+
+ if (priv->max_speed == SPEED_10000)
+ params.phy_if = PHY_INTERFACE_MODE_XGMII;
+@@ -871,12 +882,21 @@ _return:
+ return err;
+ }
+
++static int mac_remove(struct platform_device *pdev)
++{
++ struct mac_device *mac_dev = platform_get_drvdata(pdev);
++
++ platform_device_unregister(mac_dev->priv->eth_dev);
++ return 0;
++}
++
+ static struct platform_driver mac_driver = {
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = mac_match,
+ },
+ .probe = mac_probe,
++ .remove = mac_remove,
+ };
+
+ builtin_platform_driver(mac_driver);
+diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+index 2db6e38a772e7..bacf25318f87a 100644
+--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
++++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+@@ -1005,7 +1005,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
+ spin_lock_init(&fep->lock);
+ spin_lock_init(&fep->tx_lock);
+
+- of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr);
++ of_get_ethdev_address(ofdev->dev.of_node, ndev);
+
+ ret = fep->ops->allocate_bd(ndev);
+ if (ret)
+diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+index 99fe2c210d0f6..61f4b6e50d29b 100644
+--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
++++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+@@ -98,7 +98,7 @@ static int do_pd_setup(struct fs_enet_private *fep)
+ return -EINVAL;
+
+ fep->fec.fecp = of_iomap(ofdev->dev.of_node, 0);
+- if (!fep->fcc.fccp)
++ if (!fep->fec.fecp)
+ return -EINVAL;
+
+ return 0;
+diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
+index af6ad94bf24a4..acab58fd3db38 100644
+--- a/drivers/net/ethernet/freescale/gianfar.c
++++ b/drivers/net/ethernet/freescale/gianfar.c
+@@ -753,7 +753,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
+ if (stash_len || stash_idx)
+ priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING;
+
+- err = of_get_mac_address(np, dev->dev_addr);
++ err = of_get_ethdev_address(np, dev);
+ if (err) {
+ eth_hw_addr_random(dev);
+ dev_info(&ofdev->dev, "Using random MAC address: %pM\n", dev->dev_addr);
+diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
+index 7b32ed29bf4cb..8c17fe5d66ed4 100644
+--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
++++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
+@@ -1460,6 +1460,7 @@ static int gfar_get_ts_info(struct net_device *dev,
+ ptp_node = of_find_compatible_node(NULL, NULL, "fsl,etsec-ptp");
+ if (ptp_node) {
+ ptp_dev = of_find_device_by_node(ptp_node);
++ of_node_put(ptp_node);
+ if (ptp_dev)
+ ptp = platform_get_drvdata(ptp_dev);
+ }
+diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
+index 3eb288d10b0c3..e0036fd2b56c7 100644
+--- a/drivers/net/ethernet/freescale/ucc_geth.c
++++ b/drivers/net/ethernet/freescale/ucc_geth.c
+@@ -3731,7 +3731,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
+ goto err_free_netdev;
+ }
+
+- of_get_mac_address(np, dev->dev_addr);
++ of_get_ethdev_address(np, dev);
+
+ ugeth->ug_info = ug_info;
+ ugeth->dev = device;
+diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
+index 0b68852379da5..27d07468765f0 100644
+--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
++++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
+@@ -52,6 +52,7 @@ struct tgec_mdio_controller {
+ struct mdio_fsl_priv {
+ struct tgec_mdio_controller __iomem *mdio_base;
+ bool is_little_endian;
++ bool has_a009885;
+ bool has_a011043;
+ };
+
+@@ -187,10 +188,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
+ {
+ struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+ struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
++ unsigned long flags;
+ uint16_t dev_addr;
+ uint32_t mdio_stat;
+ uint32_t mdio_ctl;
+- uint16_t value;
+ int ret;
+ bool endian = priv->is_little_endian;
+
+@@ -222,12 +223,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
+ return ret;
+ }
+
++ if (priv->has_a009885)
++ /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we
++ * must read back the data register within 16 MDC cycles.
++ */
++ local_irq_save(flags);
++
+ /* Initiate the read */
+ xgmac_write32(mdio_ctl | MDIO_CTL_READ, &regs->mdio_ctl, endian);
+
+ ret = xgmac_wait_until_done(&bus->dev, regs, endian);
+ if (ret)
+- return ret;
++ goto irq_restore;
+
+ /* Return all Fs if nothing was there */
+ if ((xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) &&
+@@ -235,13 +242,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
+ dev_dbg(&bus->dev,
+ "Error while reading PHY%d reg at %d.%hhu\n",
+ phy_id, dev_addr, regnum);
+- return 0xffff;
++ ret = 0xffff;
++ } else {
++ ret = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
++ dev_dbg(&bus->dev, "read %04x\n", ret);
+ }
+
+- value = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
+- dev_dbg(&bus->dev, "read %04x\n", value);
++irq_restore:
++ if (priv->has_a009885)
++ local_irq_restore(flags);
+
+- return value;
++ return ret;
+ }
+
+ static int xgmac_mdio_probe(struct platform_device *pdev)
+@@ -288,6 +299,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
+ priv->is_little_endian = device_property_read_bool(&pdev->dev,
+ "little-endian");
+
++ priv->has_a009885 = device_property_read_bool(&pdev->dev,
++ "fsl,erratum-a009885");
+ priv->has_a011043 = device_property_read_bool(&pdev->dev,
+ "fsl,erratum-a011043");
+
+@@ -319,9 +332,10 @@ err_ioremap:
+ static int xgmac_mdio_remove(struct platform_device *pdev)
+ {
+ struct mii_bus *bus = platform_get_drvdata(pdev);
++ struct mdio_fsl_priv *priv = bus->priv;
+
+ mdiobus_unregister(bus);
+- iounmap(bus->priv);
++ iounmap(priv->mdio_base);
+ mdiobus_free(bus);
+
+ return 0;
+diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
+index 92dc18a4bcc41..08f4c0595efae 100644
+--- a/drivers/net/ethernet/google/gve/gve.h
++++ b/drivers/net/ethernet/google/gve/gve.h
+@@ -30,7 +30,7 @@
+ #define GVE_MIN_MSIX 3
+
+ /* Numbers of gve tx/rx stats in stats report. */
+-#define GVE_TX_STATS_REPORT_NUM 5
++#define GVE_TX_STATS_REPORT_NUM 6
+ #define GVE_RX_STATS_REPORT_NUM 2
+
+ /* Interval to schedule a stats report update, 20000ms. */
+@@ -47,6 +47,8 @@
+
+ #define GVE_RX_BUFFER_SIZE_DQO 2048
+
++#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
++
+ /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
+ struct gve_rx_desc_queue {
+ struct gve_rx_desc *desc_ring; /* the descriptor ring */
+@@ -224,11 +226,6 @@ struct gve_tx_iovec {
+ u32 iov_padding; /* padding associated with this segment */
+ };
+
+-struct gve_tx_dma_buf {
+- DEFINE_DMA_UNMAP_ADDR(dma);
+- DEFINE_DMA_UNMAP_LEN(len);
+-};
+-
+ /* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
+ * ring entry but only used for a pkt_desc not a seg_desc
+ */
+@@ -236,7 +233,10 @@ struct gve_tx_buffer_state {
+ struct sk_buff *skb; /* skb for this pkt */
+ union {
+ struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+- struct gve_tx_dma_buf buf;
++ struct {
++ DEFINE_DMA_UNMAP_ADDR(dma);
++ DEFINE_DMA_UNMAP_LEN(len);
++ };
+ };
+ };
+
+@@ -280,7 +280,8 @@ struct gve_tx_pending_packet_dqo {
+ * All others correspond to `skb`'s frags and should be unmapped with
+ * `dma_unmap_page`.
+ */
+- struct gve_tx_dma_buf bufs[MAX_SKB_FRAGS + 1];
++ DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
++ DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
+ u16 num_bufs;
+
+ /* Linked list index to next element in the list, or -1 if none */
+@@ -414,7 +415,9 @@ struct gve_tx_ring {
+ u32 q_num ____cacheline_aligned; /* queue idx */
+ u32 stop_queue; /* count of queue stops */
+ u32 wake_queue; /* count of queue wakes */
++ u32 queue_timeout; /* count of queue timeouts */
+ u32 ntfy_id; /* notification block index */
++ u32 last_kick_msec; /* Last time the queue was kicked */
+ dma_addr_t bus; /* dma address of the descr ring */
+ dma_addr_t q_resources_bus; /* dma address of the queue resources */
+ dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
+@@ -814,7 +817,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
+ /* buffers */
+ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
+ struct page **page, dma_addr_t *dma,
+- enum dma_data_direction);
++ enum dma_data_direction, gfp_t gfp_flags);
+ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
+ enum dma_data_direction);
+ /* tx handling */
+diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
+index f089d33dd48e0..54d649e5ee65b 100644
+--- a/drivers/net/ethernet/google/gve/gve_adminq.c
++++ b/drivers/net/ethernet/google/gve/gve_adminq.c
+@@ -281,7 +281,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status)
+ */
+ static int gve_adminq_kick_and_wait(struct gve_priv *priv)
+ {
+- u32 tail, head;
++ int tail, head;
+ int i;
+
+ tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
+@@ -733,7 +733,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
+ }
+ priv->dev->max_mtu = mtu;
+ priv->num_event_counters = be16_to_cpu(descriptor->counters);
+- ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
++ eth_hw_addr_set(priv->dev, descriptor->mac);
+ mac = descriptor->mac;
+ dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
+ priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
+diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
+index 47c3d8f313fcf..3953f6f7a4273 100644
+--- a/drivers/net/ethernet/google/gve/gve_adminq.h
++++ b/drivers/net/ethernet/google/gve/gve_adminq.h
+@@ -270,6 +270,7 @@ enum gve_stat_names {
+ TX_LAST_COMPLETION_PROCESSED = 5,
+ RX_NEXT_EXPECTED_SEQUENCE = 6,
+ RX_BUFFERS_POSTED = 7,
++ TX_TIMEOUT_CNT = 8,
+ // stats from NIC
+ RX_QUEUE_DROP_CNT = 65,
+ RX_NO_BUFFERS_POSTED = 66,
+diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
+index 716e6240305d9..1f8cc722aae30 100644
+--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
++++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
+@@ -174,14 +174,14 @@ gve_get_ethtool_stats(struct net_device *netdev,
+ struct gve_rx_ring *rx = &priv->rx[ring];
+
+ start =
+- u64_stats_fetch_begin(&priv->rx[ring].statss);
++ u64_stats_fetch_begin_irq(&priv->rx[ring].statss);
+ tmp_rx_pkts = rx->rpackets;
+ tmp_rx_bytes = rx->rbytes;
+ tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+ tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+ tmp_rx_desc_err_dropped_pkt =
+ rx->rx_desc_err_dropped_pkt;
+- } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
++ } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss,
+ start));
+ rx_pkts += tmp_rx_pkts;
+ rx_bytes += tmp_rx_bytes;
+@@ -195,10 +195,10 @@ gve_get_ethtool_stats(struct net_device *netdev,
+ if (priv->tx) {
+ do {
+ start =
+- u64_stats_fetch_begin(&priv->tx[ring].statss);
++ u64_stats_fetch_begin_irq(&priv->tx[ring].statss);
+ tmp_tx_pkts = priv->tx[ring].pkt_done;
+ tmp_tx_bytes = priv->tx[ring].bytes_done;
+- } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
++ } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss,
+ start));
+ tx_pkts += tmp_tx_pkts;
+ tx_bytes += tmp_tx_bytes;
+@@ -256,13 +256,13 @@ gve_get_ethtool_stats(struct net_device *netdev,
+ data[i++] = rx->cnt;
+ do {
+ start =
+- u64_stats_fetch_begin(&priv->rx[ring].statss);
++ u64_stats_fetch_begin_irq(&priv->rx[ring].statss);
+ tmp_rx_bytes = rx->rbytes;
+ tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
+ tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
+ tmp_rx_desc_err_dropped_pkt =
+ rx->rx_desc_err_dropped_pkt;
+- } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
++ } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss,
+ start));
+ data[i++] = tmp_rx_bytes;
+ /* rx dropped packets */
+@@ -323,9 +323,9 @@ gve_get_ethtool_stats(struct net_device *netdev,
+ }
+ do {
+ start =
+- u64_stats_fetch_begin(&priv->tx[ring].statss);
++ u64_stats_fetch_begin_irq(&priv->tx[ring].statss);
+ tmp_tx_bytes = tx->bytes_done;
+- } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
++ } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss,
+ start));
+ data[i++] = tmp_tx_bytes;
+ data[i++] = tx->wake_queue;
+@@ -526,9 +526,15 @@ static int gve_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+ {
+ struct gve_priv *priv = netdev_priv(netdev);
+- int err = gve_adminq_report_link_speed(priv);
++ int err = 0;
++
++ if (priv->link_speed == 0)
++ err = gve_adminq_report_link_speed(priv);
+
+ cmd->base.speed = priv->link_speed;
++
++ cmd->base.duplex = DUPLEX_FULL;
++
+ return err;
+ }
+
+diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
+index bf8a4a7c43f78..c0ea1b185e1bd 100644
+--- a/drivers/net/ethernet/google/gve/gve_main.c
++++ b/drivers/net/ethernet/google/gve/gve_main.c
+@@ -24,6 +24,9 @@
+ #define GVE_VERSION "1.0.0"
+ #define GVE_VERSION_PREFIX "GVE-"
+
++// Minimum amount of time between queue kicks in msec (10 seconds)
++#define MIN_TX_TIMEOUT_GAP (1000 * 10)
++
+ const char gve_version_str[] = GVE_VERSION;
+ static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
+
+@@ -48,10 +51,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
+ for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+ do {
+ start =
+- u64_stats_fetch_begin(&priv->rx[ring].statss);
++ u64_stats_fetch_begin_irq(&priv->rx[ring].statss);
+ packets = priv->rx[ring].rpackets;
+ bytes = priv->rx[ring].rbytes;
+- } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
++ } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss,
+ start));
+ s->rx_packets += packets;
+ s->rx_bytes += bytes;
+@@ -61,10 +64,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
+ for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+ do {
+ start =
+- u64_stats_fetch_begin(&priv->tx[ring].statss);
++ u64_stats_fetch_begin_irq(&priv->tx[ring].statss);
+ packets = priv->tx[ring].pkt_done;
+ bytes = priv->tx[ring].bytes_done;
+- } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
++ } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss,
+ start));
+ s->tx_packets += packets;
+ s->tx_bytes += bytes;
+@@ -230,19 +233,6 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
+ bool reschedule = false;
+ int work_done = 0;
+
+- /* Clear PCI MSI-X Pending Bit Array (PBA)
+- *
+- * This bit is set if an interrupt event occurs while the vector is
+- * masked. If this bit is set and we reenable the interrupt, it will
+- * fire again. Since we're just about to poll the queue state, we don't
+- * need it to fire again.
+- *
+- * Under high softirq load, it's possible that the interrupt condition
+- * is triggered twice before we got the chance to process it.
+- */
+- gve_write_irq_doorbell_dqo(priv, block,
+- GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);
+-
+ if (block->tx)
+ reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+
+@@ -743,9 +733,9 @@ static void gve_free_rings(struct gve_priv *priv)
+
+ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
+ struct page **page, dma_addr_t *dma,
+- enum dma_data_direction dir)
++ enum dma_data_direction dir, gfp_t gfp_flags)
+ {
+- *page = alloc_page(GFP_KERNEL);
++ *page = alloc_page(gfp_flags);
+ if (!*page) {
+ priv->page_alloc_fail++;
+ return -ENOMEM;
+@@ -789,7 +779,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
+ for (i = 0; i < pages; i++) {
+ err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
+ &qpl->page_buses[i],
+- gve_qpl_dma_dir(priv, id));
++ gve_qpl_dma_dir(priv, id), GFP_KERNEL);
+ /* caller handles clean up */
+ if (err)
+ return -ENOMEM;
+@@ -1116,9 +1106,47 @@ static void gve_turnup(struct gve_priv *priv)
+
+ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
+ {
+- struct gve_priv *priv = netdev_priv(dev);
++ struct gve_notify_block *block;
++ struct gve_tx_ring *tx = NULL;
++ struct gve_priv *priv;
++ u32 last_nic_done;
++ u32 current_time;
++ u32 ntfy_idx;
++
++ netdev_info(dev, "Timeout on tx queue, %d", txqueue);
++ priv = netdev_priv(dev);
++ if (txqueue > priv->tx_cfg.num_queues)
++ goto reset;
+
++ ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
++ if (ntfy_idx >= priv->num_ntfy_blks)
++ goto reset;
++
++ block = &priv->ntfy_blocks[ntfy_idx];
++ tx = block->tx;
++
++ current_time = jiffies_to_msecs(jiffies);
++ if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
++ goto reset;
++
++ /* Check to see if there are missed completions, which will allow us to
++ * kick the queue.
++ */
++ last_nic_done = gve_tx_load_event_counter(priv, tx);
++ if (last_nic_done - tx->done) {
++ netdev_info(dev, "Kicking queue %d", txqueue);
++ iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
++ napi_schedule(&block->napi);
++ tx->last_kick_msec = current_time;
++ goto out;
++ } // Else reset.
++
++reset:
+ gve_schedule_reset(priv);
++
++out:
++ if (tx)
++ tx->queue_timeout++;
+ priv->tx_timeo_cnt++;
+ }
+
+@@ -1219,9 +1247,9 @@ void gve_handle_report_stats(struct gve_priv *priv)
+ }
+
+ do {
+- start = u64_stats_fetch_begin(&priv->tx[idx].statss);
++ start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss);
+ tx_bytes = priv->tx[idx].bytes_done;
+- } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
++ } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start));
+ stats[stats_idx++] = (struct stats) {
+ .stat_name = cpu_to_be32(TX_WAKE_CNT),
+ .value = cpu_to_be64(priv->tx[idx].wake_queue),
+@@ -1247,6 +1275,11 @@ void gve_handle_report_stats(struct gve_priv *priv)
+ .value = cpu_to_be64(last_completion),
+ .queue_id = cpu_to_be32(idx),
+ };
++ stats[stats_idx++] = (struct stats) {
++ .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
++ .value = cpu_to_be64(priv->tx[idx].queue_timeout),
++ .queue_id = cpu_to_be32(idx),
++ };
+ }
+ }
+ /* rx stats */
+diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
+index 94941d4e47449..97431969a488f 100644
+--- a/drivers/net/ethernet/google/gve/gve_rx.c
++++ b/drivers/net/ethernet/google/gve/gve_rx.c
+@@ -79,7 +79,8 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
+ dma_addr_t dma;
+ int err;
+
+- err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
++ err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
++ GFP_ATOMIC);
+ if (err)
+ return err;
+
+@@ -449,6 +450,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
+ skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
+ gve_rss_type(rx_desc->flags_seq));
+
++ skb_record_rx_queue(skb, rx->q_num);
+ if (skb_is_nonlinear(skb))
+ napi_gro_frags(napi);
+ else
+@@ -514,8 +516,13 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
+
+ gve_rx_free_buffer(dev, page_info, data_slot);
+ page_info->page = NULL;
+- if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
++ if (gve_rx_alloc_buffer(priv, dev, page_info,
++ data_slot)) {
++ u64_stats_update_begin(&rx->statss);
++ rx->rx_buf_alloc_fail++;
++ u64_stats_update_end(&rx->statss);
+ break;
++ }
+ }
+ }
+ fill_cnt++;
+diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+index 8500621b2cd41..7b18b4fd9e548 100644
+--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
++++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+@@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv,
+ int err;
+
+ err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
+- &buf_state->addr, DMA_FROM_DEVICE);
++ &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL);
+ if (err)
+ return err;
+
+diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
+index 665ac795a1adf..43e7b74bdb767 100644
+--- a/drivers/net/ethernet/google/gve/gve_tx.c
++++ b/drivers/net/ethernet/google/gve/gve_tx.c
+@@ -283,8 +283,8 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
+ int bytes;
+ int hlen;
+
+- hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
+- tcp_hdrlen(skb) : skb_headlen(skb);
++ hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + tcp_hdrlen(skb) :
++ min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len);
+
+ pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
+ hlen);
+@@ -303,15 +303,15 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
+ static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info)
+ {
+ if (info->skb) {
+- dma_unmap_single(dev, dma_unmap_addr(&info->buf, dma),
+- dma_unmap_len(&info->buf, len),
++ dma_unmap_single(dev, dma_unmap_addr(info, dma),
++ dma_unmap_len(info, len),
+ DMA_TO_DEVICE);
+- dma_unmap_len_set(&info->buf, len, 0);
++ dma_unmap_len_set(info, len, 0);
+ } else {
+- dma_unmap_page(dev, dma_unmap_addr(&info->buf, dma),
+- dma_unmap_len(&info->buf, len),
++ dma_unmap_page(dev, dma_unmap_addr(info, dma),
++ dma_unmap_len(info, len),
+ DMA_TO_DEVICE);
+- dma_unmap_len_set(&info->buf, len, 0);
++ dma_unmap_len_set(info, len, 0);
+ }
+ }
+
+@@ -431,13 +431,11 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
+ pkt_desc = &tx->desc[idx];
+
+ l4_hdr_offset = skb_checksum_start_offset(skb);
+- /* If the skb is gso, then we want the tcp header in the first segment
+- * otherwise we want the linear portion of the skb (which will contain
+- * the checksum because skb->csum_start and skb->csum_offset are given
+- * relative to skb->head) in the first segment.
++ /* If the skb is gso, then we want the tcp header alone in the first segment
++ * otherwise we want the minimum required by the gVNIC spec.
+ */
+ hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
+- skb_headlen(skb);
++ min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len);
+
+ info->skb = skb;
+ /* We don't want to split the header, so if necessary, pad to the end
+@@ -491,7 +489,6 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct gve_tx_buffer_state *info;
+ bool is_gso = skb_is_gso(skb);
+ u32 idx = tx->req & tx->mask;
+- struct gve_tx_dma_buf *buf;
+ u64 addr;
+ u32 len;
+ int i;
+@@ -515,9 +512,8 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
+ tx->dma_mapping_error++;
+ goto drop;
+ }
+- buf = &info->buf;
+- dma_unmap_len_set(buf, len, len);
+- dma_unmap_addr_set(buf, dma, addr);
++ dma_unmap_len_set(info, len, len);
++ dma_unmap_addr_set(info, dma, addr);
+
+ payload_nfrags = shinfo->nr_frags;
+ if (hlen < len) {
+@@ -549,10 +545,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
+ tx->dma_mapping_error++;
+ goto unmap_drop;
+ }
+- buf = &tx->info[idx].buf;
+ tx->info[idx].skb = NULL;
+- dma_unmap_len_set(buf, len, len);
+- dma_unmap_addr_set(buf, dma, addr);
++ dma_unmap_len_set(&tx->info[idx], len, len);
++ dma_unmap_addr_set(&tx->info[idx], dma, addr);
+
+ gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ }
+diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+index 05ddb6a75c38f..ec394d9916681 100644
+--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
++++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+@@ -85,18 +85,16 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx)
+ int j;
+
+ for (j = 0; j < cur_state->num_bufs; j++) {
+- struct gve_tx_dma_buf *buf = &cur_state->bufs[j];
+-
+ if (j == 0) {
+ dma_unmap_single(tx->dev,
+- dma_unmap_addr(buf, dma),
+- dma_unmap_len(buf, len),
+- DMA_TO_DEVICE);
++ dma_unmap_addr(cur_state, dma[j]),
++ dma_unmap_len(cur_state, len[j]),
++ DMA_TO_DEVICE);
+ } else {
+ dma_unmap_page(tx->dev,
+- dma_unmap_addr(buf, dma),
+- dma_unmap_len(buf, len),
+- DMA_TO_DEVICE);
++ dma_unmap_addr(cur_state, dma[j]),
++ dma_unmap_len(cur_state, len[j]),
++ DMA_TO_DEVICE);
+ }
+ }
+ if (cur_state->skb) {
+@@ -457,15 +455,15 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
+ const bool is_gso = skb_is_gso(skb);
+ u32 desc_idx = tx->dqo_tx.tail;
+
+- struct gve_tx_pending_packet_dqo *pending_packet;
++ struct gve_tx_pending_packet_dqo *pkt;
+ struct gve_tx_metadata_dqo metadata;
+ s16 completion_tag;
+ int i;
+
+- pending_packet = gve_alloc_pending_packet(tx);
+- pending_packet->skb = skb;
+- pending_packet->num_bufs = 0;
+- completion_tag = pending_packet - tx->dqo.pending_packets;
++ pkt = gve_alloc_pending_packet(tx);
++ pkt->skb = skb;
++ pkt->num_bufs = 0;
++ completion_tag = pkt - tx->dqo.pending_packets;
+
+ gve_extract_tx_metadata_dqo(skb, &metadata);
+ if (is_gso) {
+@@ -493,8 +491,6 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
+
+ /* Map the linear portion of skb */
+ {
+- struct gve_tx_dma_buf *buf =
+- &pending_packet->bufs[pending_packet->num_bufs];
+ u32 len = skb_headlen(skb);
+ dma_addr_t addr;
+
+@@ -502,9 +498,9 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
+ if (unlikely(dma_mapping_error(tx->dev, addr)))
+ goto err;
+
+- dma_unmap_len_set(buf, len, len);
+- dma_unmap_addr_set(buf, dma, addr);
+- ++pending_packet->num_bufs;
++ dma_unmap_len_set(pkt, len[pkt->num_bufs], len);
++ dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
++ ++pkt->num_bufs;
+
+ gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+ completion_tag,
+@@ -512,8 +508,6 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
+ }
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+- struct gve_tx_dma_buf *buf =
+- &pending_packet->bufs[pending_packet->num_bufs];
+ const skb_frag_t *frag = &shinfo->frags[i];
+ bool is_eop = i == (shinfo->nr_frags - 1);
+ u32 len = skb_frag_size(frag);
+@@ -523,9 +517,9 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
+ if (unlikely(dma_mapping_error(tx->dev, addr)))
+ goto err;
+
+- dma_unmap_len_set(buf, len, len);
+- dma_unmap_addr_set(buf, dma, addr);
+- ++pending_packet->num_bufs;
++ dma_unmap_len_set(pkt, len[pkt->num_bufs], len);
++ dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
++ ++pkt->num_bufs;
+
+ gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+ completion_tag, is_eop, is_gso);
+@@ -552,22 +546,23 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
+ return 0;
+
+ err:
+- for (i = 0; i < pending_packet->num_bufs; i++) {
+- struct gve_tx_dma_buf *buf = &pending_packet->bufs[i];
+-
++ for (i = 0; i < pkt->num_bufs; i++) {
+ if (i == 0) {
+- dma_unmap_single(tx->dev, dma_unmap_addr(buf, dma),
+- dma_unmap_len(buf, len),
++ dma_unmap_single(tx->dev,
++ dma_unmap_addr(pkt, dma[i]),
++ dma_unmap_len(pkt, len[i]),
+ DMA_TO_DEVICE);
+ } else {
+- dma_unmap_page(tx->dev, dma_unmap_addr(buf, dma),
+- dma_unmap_len(buf, len), DMA_TO_DEVICE);
++ dma_unmap_page(tx->dev,
++ dma_unmap_addr(pkt, dma[i]),
++ dma_unmap_len(pkt, len[i]),
++ DMA_TO_DEVICE);
+ }
+ }
+
+- pending_packet->skb = NULL;
+- pending_packet->num_bufs = 0;
+- gve_free_pending_packet(tx, pending_packet);
++ pkt->skb = NULL;
++ pkt->num_bufs = 0;
++ gve_free_pending_packet(tx, pkt);
+
+ return -1;
+ }
+@@ -725,12 +720,12 @@ static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list,
+
+ static void remove_from_list(struct gve_tx_ring *tx,
+ struct gve_index_list *list,
+- struct gve_tx_pending_packet_dqo *pending_packet)
++ struct gve_tx_pending_packet_dqo *pkt)
+ {
+ s16 prev_index, next_index;
+
+- prev_index = pending_packet->prev;
+- next_index = pending_packet->next;
++ prev_index = pkt->prev;
++ next_index = pkt->next;
+
+ if (prev_index == -1) {
+ /* Node is head */
+@@ -747,21 +742,18 @@ static void remove_from_list(struct gve_tx_ring *tx,
+ }
+
+ static void gve_unmap_packet(struct device *dev,
+- struct gve_tx_pending_packet_dqo *pending_packet)
++ struct gve_tx_pending_packet_dqo *pkt)
+ {
+- struct gve_tx_dma_buf *buf;
+ int i;
+
+ /* SKB linear portion is guaranteed to be mapped */
+- buf = &pending_packet->bufs[0];
+- dma_unmap_single(dev, dma_unmap_addr(buf, dma),
+- dma_unmap_len(buf, len), DMA_TO_DEVICE);
+- for (i = 1; i < pending_packet->num_bufs; i++) {
+- buf = &pending_packet->bufs[i];
+- dma_unmap_page(dev, dma_unmap_addr(buf, dma),
+- dma_unmap_len(buf, len), DMA_TO_DEVICE);
++ dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]),
++ dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE);
++ for (i = 1; i < pkt->num_bufs; i++) {
++ dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]),
++ dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE);
+ }
+- pending_packet->num_bufs = 0;
++ pkt->num_bufs = 0;
+ }
+
+ /* Completion types and expected behavior:
+diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
+index 22bf914f2dbd0..05cb4582a58d5 100644
+--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
++++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
+@@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit)
+ skb->protocol = eth_type_trans(skb, dev);
+ napi_gro_receive(&priv->napi, skb);
+ dev->stats.rx_packets++;
+- dev->stats.rx_bytes += skb->len;
++ dev->stats.rx_bytes += len;
+ next:
+ pos = (pos + 1) % rxq->num;
+ if (rx_pkts_num >= limit)
+@@ -841,7 +841,7 @@ static int hisi_femac_drv_probe(struct platform_device *pdev)
+ (unsigned long)phy->phy_id,
+ phy_modes(phy->interface));
+
+- ret = of_get_mac_address(node, ndev->dev_addr);
++ ret = of_get_ethdev_address(node, ndev);
+ if (ret) {
+ eth_hw_addr_random(ndev);
+ dev_warn(dev, "using random MAC address %pM\n",
+diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+index c1aae0fca5e98..923191b9a87d4 100644
+--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
++++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+@@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit)
+ skb->protocol = eth_type_trans(skb, dev);
+ napi_gro_receive(&priv->napi, skb);
+ dev->stats.rx_packets++;
+- dev->stats.rx_bytes += skb->len;
++ dev->stats.rx_bytes += len;
+ next:
+ pos = dma_ring_incr(pos, RX_DESC_NUM);
+ }
+@@ -1219,7 +1219,7 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
+ goto out_phy_node;
+ }
+
+- ret = of_get_mac_address(node, ndev->dev_addr);
++ ret = of_get_ethdev_address(node, ndev);
+ if (ret) {
+ eth_hw_addr_random(ndev);
+ netdev_warn(ndev, "using random MAC address %pM\n",
+diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
+index 00fafc0f85121..430eccea8e5e9 100644
+--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
++++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
+@@ -419,8 +419,10 @@ int hnae_ae_register(struct hnae_ae_dev *hdev, struct module *owner)
+ hdev->cls_dev.release = hnae_release;
+ (void)dev_set_name(&hdev->cls_dev, "hnae%d", hdev->id);
+ ret = device_register(&hdev->cls_dev);
+- if (ret)
++ if (ret) {
++ put_device(&hdev->cls_dev);
+ return ret;
++ }
+
+ __module_get(THIS_MODULE);
+
+diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+index 23d9cbf262c32..740850b64aff5 100644
+--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
++++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+@@ -400,6 +400,10 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
+ return;
+
+ if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
++ /* DSAF_MAX_PORT_NUM is 6, but DSAF_GE_NUM is 8.
++ We need check to prevent array overflow */
++ if (port >= DSAF_MAX_PORT_NUM)
++ return;
+ reg_val_1 = 0x1 << port;
+ port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off;
+ /* there is difference between V1 and V2 in register.*/
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+index d701451596c82..695e299f534d5 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+@@ -527,6 +527,8 @@ struct hnae3_ae_dev {
+ * Get 1588 rx hwstamp
+ * get_ts_info
+ * Get phc info
++ * clean_vf_config
++ * Clean residual vf info after disable sriov
+ */
+ struct hnae3_ae_ops {
+ int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
+@@ -720,6 +722,7 @@ struct hnae3_ae_ops {
+ struct ethtool_ts_info *info);
+ int (*get_link_diagnosis_info)(struct hnae3_handle *handle,
+ u32 *status_code);
++ void (*clean_vf_config)(struct hnae3_ae_dev *ae_dev, int num_vfs);
+ };
+
+ struct hnae3_dcb_ops {
+@@ -754,6 +757,7 @@ struct hnae3_tc_info {
+ u16 tqp_offset[HNAE3_MAX_TC];
+ u8 num_tc; /* Total number of enabled TCs */
+ bool mqprio_active;
++ bool dcb_ets_active;
+ };
+
+ struct hnae3_knic_private_info {
+@@ -830,6 +834,9 @@ struct hnae3_handle {
+
+ u8 netdev_flags;
+ struct dentry *hnae3_dbgfs;
++ /* protects concurrent contention between debugfs commands */
++ struct mutex dbgfs_lock;
++ char **dbgfs_buf;
+
+ /* Network interface message level enabled bits */
+ u32 msg_enable;
+@@ -850,6 +857,20 @@ struct hnae3_handle {
+ #define hnae3_get_bit(origin, shift) \
+ hnae3_get_field(origin, 0x1 << (shift), shift)
+
++#define HNAE3_FORMAT_MAC_ADDR_LEN 18
++#define HNAE3_FORMAT_MAC_ADDR_OFFSET_0 0
++#define HNAE3_FORMAT_MAC_ADDR_OFFSET_4 4
++#define HNAE3_FORMAT_MAC_ADDR_OFFSET_5 5
++
++static inline void hnae3_format_mac_addr(char *format_mac_addr,
++ const u8 *mac_addr)
++{
++ snprintf(format_mac_addr, HNAE3_FORMAT_MAC_ADDR_LEN, "%02x:**:**:**:%02x:%02x",
++ mac_addr[HNAE3_FORMAT_MAC_ADDR_OFFSET_0],
++ mac_addr[HNAE3_FORMAT_MAC_ADDR_OFFSET_4],
++ mac_addr[HNAE3_FORMAT_MAC_ADDR_OFFSET_5]);
++}
++
+ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
+ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev);
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+index e54f96251fea9..45f245b1d331c 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+@@ -123,7 +123,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
+ .name = "tx_bd_queue",
+ .cmd = HNAE3_DBG_CMD_TX_BD,
+ .dentry = HNS3_DBG_DENTRY_TX_BD,
+- .buf_len = HNS3_DBG_READ_LEN_4MB,
++ .buf_len = HNS3_DBG_READ_LEN_5MB,
+ .init = hns3_dbg_bd_file_init,
+ },
+ {
+@@ -1021,7 +1021,8 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
+ if (ret)
+ return ret;
+
+- save_buf = &hns3_dbg_cmd[index].buf;
++ mutex_lock(&handle->dbgfs_lock);
++ save_buf = &handle->dbgfs_buf[index];
+
+ if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+ test_bit(HNS3_NIC_STATE_RESETTING, &priv->state)) {
+@@ -1033,15 +1034,15 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
+ read_buf = *save_buf;
+ } else {
+ read_buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL);
+- if (!read_buf)
+- return -ENOMEM;
++ if (!read_buf) {
++ ret = -ENOMEM;
++ goto out;
++ }
+
+ /* save the buffer addr until the last read operation */
+ *save_buf = read_buf;
+- }
+
+- /* get data ready for the first time to read */
+- if (!*ppos) {
++ /* get data ready for the first time to read */
+ ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd,
+ read_buf, hns3_dbg_cmd[index].buf_len);
+ if (ret)
+@@ -1050,8 +1051,10 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
+
+ size = simple_read_from_buffer(buffer, count, ppos, read_buf,
+ strlen(read_buf));
+- if (size > 0)
++ if (size > 0) {
++ mutex_unlock(&handle->dbgfs_lock);
+ return size;
++ }
+
+ out:
+ /* free the buffer for the last read operation */
+@@ -1060,6 +1063,7 @@ out:
+ *save_buf = NULL;
+ }
+
++ mutex_unlock(&handle->dbgfs_lock);
+ return ret;
+ }
+
+@@ -1123,6 +1127,13 @@ int hns3_dbg_init(struct hnae3_handle *handle)
+ int ret;
+ u32 i;
+
++ handle->dbgfs_buf = devm_kcalloc(&handle->pdev->dev,
++ ARRAY_SIZE(hns3_dbg_cmd),
++ sizeof(*handle->dbgfs_buf),
++ GFP_KERNEL);
++ if (!handle->dbgfs_buf)
++ return -ENOMEM;
++
+ hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry =
+ debugfs_create_dir(name, hns3_dbgfs_root);
+ handle->hnae3_dbgfs = hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry;
+@@ -1132,6 +1143,8 @@ int hns3_dbg_init(struct hnae3_handle *handle)
+ debugfs_create_dir(hns3_dbg_dentry[i].name,
+ handle->hnae3_dbgfs);
+
++ mutex_init(&handle->dbgfs_lock);
++
+ for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) {
+ if ((hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES &&
+ ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) ||
+@@ -1160,6 +1173,7 @@ int hns3_dbg_init(struct hnae3_handle *handle)
+ out:
+ debugfs_remove_recursive(handle->hnae3_dbgfs);
+ handle->hnae3_dbgfs = NULL;
++ mutex_destroy(&handle->dbgfs_lock);
+ return ret;
+ }
+
+@@ -1167,14 +1181,16 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
+ {
+ u32 i;
+
++ debugfs_remove_recursive(handle->hnae3_dbgfs);
++ handle->hnae3_dbgfs = NULL;
++
+ for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
+- if (hns3_dbg_cmd[i].buf) {
+- kvfree(hns3_dbg_cmd[i].buf);
+- hns3_dbg_cmd[i].buf = NULL;
++ if (handle->dbgfs_buf[i]) {
++ kvfree(handle->dbgfs_buf[i]);
++ handle->dbgfs_buf[i] = NULL;
+ }
+
+- debugfs_remove_recursive(handle->hnae3_dbgfs);
+- handle->hnae3_dbgfs = NULL;
++ mutex_destroy(&handle->dbgfs_lock);
+ }
+
+ void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+index bd8801065e024..fb0c907cec852 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+@@ -8,6 +8,7 @@
+ #define HNS3_DBG_READ_LEN_128KB 0x20000
+ #define HNS3_DBG_READ_LEN_1MB 0x100000
+ #define HNS3_DBG_READ_LEN_4MB 0x400000
++#define HNS3_DBG_READ_LEN_5MB 0x500000
+ #define HNS3_DBG_WRITE_LEN 1024
+
+ #define HNS3_DBG_DATA_STR_LEN 32
+@@ -47,7 +48,6 @@ struct hns3_dbg_cmd_info {
+ enum hnae3_dbg_cmd cmd;
+ enum hns3_dbg_dentry_type dentry;
+ u32 buf_len;
+- char *buf;
+ int (*init)(struct hnae3_handle *handle, unsigned int cmd);
+ };
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+index 4b886a13e0797..3693ff55197dd 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -1005,9 +1005,7 @@ static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring,
+ return false;
+
+ if (ALIGN(len, dma_get_cache_alignment()) > space) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_spare_full++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_spare_full);
+ return false;
+ }
+
+@@ -1024,9 +1022,7 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring,
+ return false;
+
+ if (space < HNS3_MAX_SGL_SIZE) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_spare_full++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_spare_full);
+ return false;
+ }
+
+@@ -1554,9 +1550,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
+
+ ret = hns3_handle_vtags(ring, skb);
+ if (unlikely(ret < 0)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_vlan_err++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_vlan_err);
+ return ret;
+ } else if (ret == HNS3_INNER_VLAN_TAG) {
+ inner_vtag = skb_vlan_tag_get(skb);
+@@ -1591,9 +1585,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
+
+ ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
+ if (unlikely(ret < 0)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_l4_proto_err++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_l4_proto_err);
+ return ret;
+ }
+
+@@ -1601,18 +1593,14 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
+ &type_cs_vlan_tso,
+ &ol_type_vlan_len_msec);
+ if (unlikely(ret < 0)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_l2l3l4_err++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_l2l3l4_err);
+ return ret;
+ }
+
+ ret = hns3_set_tso(skb, &paylen_ol4cs, &mss_hw_csum,
+ &type_cs_vlan_tso, &desc_cb->send_bytes);
+ if (unlikely(ret < 0)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_tso_err++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_tso_err);
+ return ret;
+ }
+ }
+@@ -1705,9 +1693,7 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv,
+ }
+
+ if (unlikely(dma_mapping_error(dev, dma))) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.sw_err_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, sw_err_cnt);
+ return -ENOMEM;
+ }
+
+@@ -1853,9 +1839,7 @@ static int hns3_skb_linearize(struct hns3_enet_ring *ring,
+ * recursion level of over HNS3_MAX_RECURSION_LEVEL.
+ */
+ if (bd_num == UINT_MAX) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.over_max_recursion++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, over_max_recursion);
+ return -ENOMEM;
+ }
+
+@@ -1864,16 +1848,12 @@ static int hns3_skb_linearize(struct hns3_enet_ring *ring,
+ */
+ if (skb->len > HNS3_MAX_TSO_SIZE ||
+ (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.hw_limitation++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, hw_limitation);
+ return -ENOMEM;
+ }
+
+ if (__skb_linearize(skb)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.sw_err_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, sw_err_cnt);
+ return -ENOMEM;
+ }
+
+@@ -1903,9 +1883,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
+
+ bd_num = hns3_tx_bd_count(skb->len);
+
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_copy++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_copy);
+ }
+
+ out:
+@@ -1925,9 +1903,7 @@ out:
+ return bd_num;
+ }
+
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_busy++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_busy);
+
+ return -EBUSY;
+ }
+@@ -2012,9 +1988,7 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
+ ring->pending_buf += num;
+
+ if (!doorbell) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_more++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_more);
+ return;
+ }
+
+@@ -2064,9 +2038,7 @@ static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring,
+ ret = skb_copy_bits(skb, 0, buf, size);
+ if (unlikely(ret < 0)) {
+ hns3_tx_spare_rollback(ring, cb_len);
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.copy_bits_err++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, copy_bits_err);
+ return ret;
+ }
+
+@@ -2089,9 +2061,8 @@ static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring,
+ dma_sync_single_for_device(ring_to_dev(ring), dma, size,
+ DMA_TO_DEVICE);
+
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_bounce++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_bounce);
++
+ return bd_num;
+ }
+
+@@ -2121,9 +2092,7 @@ static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring,
+ nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len);
+ if (unlikely(nents < 0)) {
+ hns3_tx_spare_rollback(ring, cb_len);
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.skb2sgl_err++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, skb2sgl_err);
+ return -ENOMEM;
+ }
+
+@@ -2132,9 +2101,7 @@ static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring,
+ DMA_TO_DEVICE);
+ if (unlikely(!sgt->nents)) {
+ hns3_tx_spare_rollback(ring, cb_len);
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.map_sg_err++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, map_sg_err);
+ return -ENOMEM;
+ }
+
+@@ -2146,10 +2113,7 @@ static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring,
+ for (i = 0; i < sgt->nents; i++)
+ bd_num += hns3_fill_desc(ring, sg_dma_address(sgt->sgl + i),
+ sg_dma_len(sgt->sgl + i));
+-
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.tx_sgl++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, tx_sgl);
+
+ return bd_num;
+ }
+@@ -2188,9 +2152,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
+ if (skb_put_padto(skb, HNS3_MIN_TX_LEN)) {
+ hns3_tx_doorbell(ring, 0, !netdev_xmit_more());
+
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.sw_err_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, sw_err_cnt);
+
+ return NETDEV_TX_OK;
+ }
+@@ -2255,6 +2217,8 @@ out_err_tx_ok:
+
+ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
+ {
++ char format_mac_addr_perm[HNAE3_FORMAT_MAC_ADDR_LEN];
++ char format_mac_addr_sa[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+ struct sockaddr *mac_addr = p;
+ int ret;
+@@ -2263,8 +2227,9 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
+ return -EADDRNOTAVAIL;
+
+ if (ether_addr_equal(netdev->dev_addr, mac_addr->sa_data)) {
+- netdev_info(netdev, "already using mac address %pM\n",
+- mac_addr->sa_data);
++ hnae3_format_mac_addr(format_mac_addr_sa, mac_addr->sa_data);
++ netdev_info(netdev, "already using mac address %s\n",
++ format_mac_addr_sa);
+ return 0;
+ }
+
+@@ -2273,8 +2238,10 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
+ */
+ if (!hns3_is_phys_func(h->pdev) &&
+ !is_zero_ether_addr(netdev->perm_addr)) {
+- netdev_err(netdev, "has permanent MAC %pM, user MAC %pM not allow\n",
+- netdev->perm_addr, mac_addr->sa_data);
++ hnae3_format_mac_addr(format_mac_addr_perm, netdev->perm_addr);
++ hnae3_format_mac_addr(format_mac_addr_sa, mac_addr->sa_data);
++ netdev_err(netdev, "has permanent MAC %s, user MAC %s not allow\n",
++ format_mac_addr_perm, format_mac_addr_sa);
+ return -EPERM;
+ }
+
+@@ -2284,7 +2251,7 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
+ return ret;
+ }
+
+- ether_addr_copy(netdev->dev_addr, mac_addr->sa_data);
++ eth_hw_addr_set(netdev, mac_addr->sa_data);
+
+ return 0;
+ }
+@@ -2836,14 +2803,16 @@ static int hns3_nic_set_vf_rate(struct net_device *ndev, int vf,
+ static int hns3_nic_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+ {
+ struct hnae3_handle *h = hns3_get_handle(netdev);
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+
+ if (!h->ae_algo->ops->set_vf_mac)
+ return -EOPNOTSUPP;
+
+ if (is_multicast_ether_addr(mac)) {
++ hnae3_format_mac_addr(format_mac_addr, mac);
+ netdev_err(netdev,
+- "Invalid MAC:%pM specified. Could not set MAC\n",
+- mac);
++ "Invalid MAC:%s specified. Could not set MAC\n",
++ format_mac_addr);
+ return -EINVAL;
+ }
+
+@@ -2947,6 +2916,21 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ return ret;
+ }
+
++/**
++ * hns3_clean_vf_config
++ * @pdev: pointer to a pci_dev structure
++ * @num_vfs: number of VFs allocated
++ *
++ * Clean residual vf config after disable sriov
++ **/
++static void hns3_clean_vf_config(struct pci_dev *pdev, int num_vfs)
++{
++ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
++
++ if (ae_dev->ops->clean_vf_config)
++ ae_dev->ops->clean_vf_config(ae_dev, num_vfs);
++}
++
+ /* hns3_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+@@ -2985,7 +2969,10 @@ static int hns3_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+ else
+ return num_vfs;
+ } else if (!pci_vfs_assigned(pdev)) {
++ int num_vfs_pre = pci_num_vf(pdev);
++
+ pci_disable_sriov(pdev);
++ hns3_clean_vf_config(pdev, num_vfs_pre);
+ } else {
+ dev_warn(&pdev->dev,
+ "Unable to free VFs because some are assigned to VMs.\n");
+@@ -3131,8 +3118,6 @@ static void hns3_set_default_feature(struct net_device *netdev)
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+- netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+-
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+@@ -3497,17 +3482,13 @@ static bool hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
+ for (i = 0; i < cleand_count; i++) {
+ desc_cb = &ring->desc_cb[ring->next_to_use];
+ if (desc_cb->reuse_flag) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.reuse_pg_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, reuse_pg_cnt);
+
+ hns3_reuse_buffer(ring, ring->next_to_use);
+ } else {
+ ret = hns3_alloc_and_map_buffer(ring, &res_cbs);
+ if (ret) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.sw_err_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, sw_err_cnt);
+
+ hns3_rl_err(ring_to_netdev(ring),
+ "alloc rx buffer failed: %d\n",
+@@ -3519,9 +3500,7 @@ static bool hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
+ }
+ hns3_replace_buffer(ring, ring->next_to_use, &res_cbs);
+
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.non_reuse_pg++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, non_reuse_pg);
+ }
+
+ ring_ptr_move_fw(ring, next_to_use);
+@@ -3536,6 +3515,34 @@ static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
+ return page_count(cb->priv) == cb->pagecnt_bias;
+ }
+
++static int hns3_handle_rx_copybreak(struct sk_buff *skb, int i,
++ struct hns3_enet_ring *ring,
++ int pull_len,
++ struct hns3_desc_cb *desc_cb)
++{
++ struct hns3_desc *desc = &ring->desc[ring->next_to_clean];
++ u32 frag_offset = desc_cb->page_offset + pull_len;
++ int size = le16_to_cpu(desc->rx.size);
++ u32 frag_size = size - pull_len;
++ void *frag = napi_alloc_frag(frag_size);
++
++ if (unlikely(!frag)) {
++ hns3_ring_stats_update(ring, frag_alloc_err);
++
++ hns3_rl_err(ring_to_netdev(ring),
++ "failed to allocate rx frag\n");
++ return -ENOMEM;
++ }
++
++ desc_cb->reuse_flag = 1;
++ memcpy(frag, desc_cb->buf + frag_offset, frag_size);
++ skb_add_rx_frag(skb, i, virt_to_page(frag),
++ offset_in_page(frag), frag_size, frag_size);
++
++ hns3_ring_stats_update(ring, frag_alloc);
++ return 0;
++}
++
+ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
+ struct hns3_enet_ring *ring, int pull_len,
+ struct hns3_desc_cb *desc_cb)
+@@ -3545,6 +3552,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
+ int size = le16_to_cpu(desc->rx.size);
+ u32 truesize = hns3_buf_size(ring);
+ u32 frag_size = size - pull_len;
++ int ret = 0;
+ bool reused;
+
+ if (ring->page_pool) {
+@@ -3579,27 +3587,9 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
+ desc_cb->page_offset = 0;
+ desc_cb->reuse_flag = 1;
+ } else if (frag_size <= ring->rx_copybreak) {
+- void *frag = napi_alloc_frag(frag_size);
+-
+- if (unlikely(!frag)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.frag_alloc_err++;
+- u64_stats_update_end(&ring->syncp);
+-
+- hns3_rl_err(ring_to_netdev(ring),
+- "failed to allocate rx frag\n");
+- goto out;
+- }
+-
+- desc_cb->reuse_flag = 1;
+- memcpy(frag, desc_cb->buf + frag_offset, frag_size);
+- skb_add_rx_frag(skb, i, virt_to_page(frag),
+- offset_in_page(frag), frag_size, frag_size);
+-
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.frag_alloc++;
+- u64_stats_update_end(&ring->syncp);
+- return;
++ ret = hns3_handle_rx_copybreak(skb, i, ring, pull_len, desc_cb);
++ if (!ret)
++ return;
+ }
+
+ out:
+@@ -3675,20 +3665,16 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
+ return 0;
+ }
+
+-static bool hns3_checksum_complete(struct hns3_enet_ring *ring,
++static void hns3_checksum_complete(struct hns3_enet_ring *ring,
+ struct sk_buff *skb, u32 ptype, u16 csum)
+ {
+ if (ptype == HNS3_INVALID_PTYPE ||
+ hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE)
+- return false;
++ return;
+
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.csum_complete++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, csum_complete);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)csum);
+-
+- return true;
+ }
+
+ static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info,
+@@ -3748,8 +3734,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
+ ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M,
+ HNS3_RXD_PTYPE_S);
+
+- if (hns3_checksum_complete(ring, skb, ptype, csum))
+- return;
++ hns3_checksum_complete(ring, skb, ptype, csum);
+
+ /* check if hardware has done checksum */
+ if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
+@@ -3758,9 +3743,8 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
+ if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) |
+ BIT(HNS3_RXD_OL3E_B) |
+ BIT(HNS3_RXD_OL4E_B)))) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.l3l4_csum_err++;
+- u64_stats_update_end(&ring->syncp);
++ skb->ip_summed = CHECKSUM_NONE;
++ hns3_ring_stats_update(ring, l3l4_csum_err);
+
+ return;
+ }
+@@ -3851,10 +3835,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
+ skb = ring->skb;
+ if (unlikely(!skb)) {
+ hns3_rl_err(netdev, "alloc rx skb fail\n");
+-
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.sw_err_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, sw_err_cnt);
+
+ return -ENOMEM;
+ }
+@@ -3885,9 +3866,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
+ if (ring->page_pool)
+ skb_mark_for_recycle(skb);
+
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.seg_pkt_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, seg_pkt_cnt);
+
+ ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE);
+ __skb_put(skb, ring->pull_len);
+@@ -4079,9 +4058,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
+ ret = hns3_set_gro_and_checksum(ring, skb, l234info,
+ bd_base_info, ol_info, csum);
+ if (unlikely(ret)) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.rx_err_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, rx_err_cnt);
+ return ret;
+ }
+
+@@ -4927,6 +4904,7 @@ static void hns3_uninit_all_ring(struct hns3_nic_priv *priv)
+ static int hns3_init_mac_addr(struct net_device *netdev)
+ {
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hnae3_handle *h = priv->ae_handle;
+ u8 mac_addr_temp[ETH_ALEN];
+ int ret = 0;
+@@ -4937,10 +4915,11 @@ static int hns3_init_mac_addr(struct net_device *netdev)
+ /* Check if the MAC address is valid, if not get a random one */
+ if (!is_valid_ether_addr(mac_addr_temp)) {
+ eth_hw_addr_random(netdev);
+- dev_warn(priv->dev, "using random MAC address %pM\n",
+- netdev->dev_addr);
++ hnae3_format_mac_addr(format_mac_addr, netdev->dev_addr);
++ dev_warn(priv->dev, "using random MAC address %s\n",
++ format_mac_addr);
+ } else if (!ether_addr_equal(netdev->dev_addr, mac_addr_temp)) {
+- ether_addr_copy(netdev->dev_addr, mac_addr_temp);
++ eth_hw_addr_set(netdev, mac_addr_temp);
+ ether_addr_copy(netdev->perm_addr, mac_addr_temp);
+ } else {
+ return 0;
+@@ -4990,8 +4969,10 @@ static void hns3_client_stop(struct hnae3_handle *handle)
+ static void hns3_info_show(struct hns3_nic_priv *priv)
+ {
+ struct hnae3_knic_private_info *kinfo = &priv->ae_handle->kinfo;
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+
+- dev_info(priv->dev, "MAC address: %pM\n", priv->netdev->dev_addr);
++ hnae3_format_mac_addr(format_mac_addr, priv->netdev->dev_addr);
++ dev_info(priv->dev, "MAC address: %s\n", format_mac_addr);
+ dev_info(priv->dev, "Task queue pairs numbers: %u\n", kinfo->num_tqps);
+ dev_info(priv->dev, "RSS size: %u\n", kinfo->rss_size);
+ dev_info(priv->dev, "Allocated RSS size: %u\n", kinfo->req_rss_size);
+@@ -5063,6 +5044,13 @@ static void hns3_state_init(struct hnae3_handle *handle)
+ set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
+ }
+
++static void hns3_state_uninit(struct hnae3_handle *handle)
++{
++ struct hns3_nic_priv *priv = handle->priv;
++
++ clear_bit(HNS3_NIC_STATE_INITED, &priv->state);
++}
++
+ static int hns3_client_init(struct hnae3_handle *handle)
+ {
+ struct pci_dev *pdev = handle->pdev;
+@@ -5180,7 +5168,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
+ return ret;
+
+ out_reg_netdev_fail:
++ hns3_state_uninit(handle);
+ hns3_dbg_uninit(handle);
++ hns3_client_stop(handle);
+ out_client_start:
+ hns3_free_rx_cpu_rmap(netdev);
+ hns3_nic_uninit_irq(priv);
+@@ -5280,9 +5270,7 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring)
+ if (!ring->desc_cb[ring->next_to_use].reuse_flag) {
+ ret = hns3_alloc_and_map_buffer(ring, &res_cbs);
+ if (ret) {
+- u64_stats_update_begin(&ring->syncp);
+- ring->stats.sw_err_cnt++;
+- u64_stats_update_end(&ring->syncp);
++ hns3_ring_stats_update(ring, sw_err_cnt);
+ /* if alloc new buffer fail, exit directly
+ * and reclear in up flow.
+ */
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+index f09a61d9c6264..91b656adaacb0 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+@@ -654,6 +654,13 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
+
+ #define hns3_buf_size(_ring) ((_ring)->buf_size)
+
++#define hns3_ring_stats_update(ring, cnt) do { \
++ typeof(ring) (tmp) = (ring); \
++ u64_stats_update_begin(&(tmp)->syncp); \
++ ((tmp)->stats.cnt)++; \
++ u64_stats_update_end(&(tmp)->syncp); \
++} while (0) \
++
+ static inline unsigned int hns3_page_order(struct hns3_enet_ring *ring)
+ {
+ #if (PAGE_SIZE < 8192)
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+index 5ebd96f6833d6..17fa4e7684cd2 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+@@ -739,7 +739,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
+ hns3_get_ksettings(h, cmd);
+ break;
+ case HNAE3_MEDIA_TYPE_FIBER:
+- if (module_type == HNAE3_MODULE_TYPE_CR)
++ if (module_type == HNAE3_MODULE_TYPE_UNKNOWN)
++ cmd->base.port = PORT_OTHER;
++ else if (module_type == HNAE3_MODULE_TYPE_CR)
+ cmd->base.port = PORT_DA;
+ else
+ cmd->base.port = PORT_FIBRE;
+@@ -985,6 +987,7 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags)
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+ const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+ const struct hns3_reset_type_map *rst_type_map;
++ enum ethtool_reset_flags rst_flags;
+ u32 i, size;
+
+ if (ops->ae_dev_resetting && ops->ae_dev_resetting(h))
+@@ -1004,6 +1007,7 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags)
+ for (i = 0; i < size; i++) {
+ if (rst_type_map[i].rst_flags == *flags) {
+ rst_type = rst_type_map[i].rst_type;
++ rst_flags = rst_type_map[i].rst_flags;
+ break;
+ }
+ }
+@@ -1019,6 +1023,8 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags)
+
+ ops->reset_event(h->pdev, h);
+
++ *flags &= ~rst_flags;
++
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+index 91cb578f56b80..a15f2ed268a8d 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+@@ -52,7 +52,10 @@ static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ ets->prio_tc[i] = hdev->tm_info.prio_tc[i];
+- ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
++ if (i < hdev->tm_info.num_tc)
++ ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
++ else
++ ets->tc_tx_bw[i] = 0;
+
+ if (hdev->tm_info.tc_info[i].tc_sch_mode ==
+ HCLGE_SCH_MODE_SP)
+@@ -123,13 +126,14 @@ static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets,
+ }
+
+ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
+- struct ieee_ets *ets, bool *changed)
++ struct ieee_ets *ets, bool *changed,
++ u8 tc_num)
+ {
+ bool has_ets_tc = false;
+ u32 total_ets_bw = 0;
+ u8 i;
+
+- for (i = 0; i < hdev->tc_max; i++) {
++ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+@@ -137,6 +141,13 @@ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
+ *changed = true;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
++ if (i >= tc_num) {
++ dev_err(&hdev->pdev->dev,
++ "tc%u is disabled, cannot set ets bw\n",
++ i);
++ return -EINVAL;
++ }
++
+ /* The hardware will switch to sp mode if bandwidth is
+ * 0, so limit ets bandwidth must be greater than 0.
+ */
+@@ -176,7 +187,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
+ if (ret)
+ return ret;
+
+- ret = hclge_ets_sch_mode_validate(hdev, ets, changed);
++ ret = hclge_ets_sch_mode_validate(hdev, ets, changed, tc_num);
+ if (ret)
+ return ret;
+
+@@ -240,7 +251,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+ int ret;
+
+ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+- hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
++ h->kinfo.tc_info.mqprio_active)
+ return -EINVAL;
+
+ ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
+@@ -256,10 +267,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+ }
+
+ hclge_tm_schd_info_update(hdev, num_tc);
+- if (num_tc > 1)
+- hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+- else
+- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
++ h->kinfo.tc_info.dcb_ets_active = num_tc > 1;
+
+ ret = hclge_ieee_ets_to_tm_info(hdev, ets);
+ if (ret)
+@@ -286,28 +294,24 @@ err_out:
+
+ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+ {
+- u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC];
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+- u8 i;
+
+ memset(pfc, 0, sizeof(*pfc));
+ pfc->pfc_cap = hdev->pfc_max;
+ pfc->pfc_en = hdev->tm_info.pfc_en;
+
+- ret = hclge_pfc_tx_stats_get(hdev, requests);
+- if (ret)
++ ret = hclge_mac_update_stats(hdev);
++ if (ret) {
++ dev_err(&hdev->pdev->dev,
++ "failed to update MAC stats, ret = %d.\n", ret);
+ return ret;
++ }
+
+- ret = hclge_pfc_rx_stats_get(hdev, indications);
+- if (ret)
+- return ret;
++ hclge_pfc_tx_stats_get(hdev, pfc->requests);
++ hclge_pfc_rx_stats_get(hdev, pfc->indications);
+
+- for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+- pfc->requests[i] = requests[i];
+- pfc->indications[i] = indications[i];
+- }
+ return 0;
+ }
+
+@@ -369,7 +373,7 @@ static u8 hclge_getdcbx(struct hnae3_handle *h)
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+- if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
++ if (h->kinfo.tc_info.mqprio_active)
+ return 0;
+
+ return hdev->dcbx_cap;
+@@ -493,7 +497,8 @@ static int hclge_setup_tc(struct hnae3_handle *h,
+ if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
+ return -EBUSY;
+
+- if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
++ kinfo = &vport->nic.kinfo;
++ if (kinfo->tc_info.dcb_ets_active)
+ return -EINVAL;
+
+ ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt);
+@@ -507,7 +512,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
+ if (ret)
+ return ret;
+
+- kinfo = &vport->nic.kinfo;
+ memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info));
+ hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt);
+ kinfo->tc_info.mqprio_active = tc > 0;
+@@ -516,13 +520,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
+ if (ret)
+ goto err_out;
+
+- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+-
+- if (tc > 1)
+- hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
+- else
+- hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
+-
+ return hclge_notify_init_up(hdev);
+
+ err_out:
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+index 9cda8b3562b89..63665e8a7c718 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+@@ -677,8 +677,7 @@ static int hclge_dbg_dump_tc(struct hclge_dev *hdev, char *buf, int len)
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ sch_mode_str = ets_weight->tc_weight[i] ? "dwrr" : "sp";
+ pos += scnprintf(buf + pos, len - pos, "%u %4s %3u\n",
+- i, sch_mode_str,
+- hdev->tm_info.pg_info[0].tc_dwrr[i]);
++ i, sch_mode_str, ets_weight->tc_weight[i]);
+ }
+
+ return 0;
+@@ -1451,7 +1450,7 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
+ struct hclge_desc desc[3];
+ int pos = 0;
+ int ret, i;
+- u32 *req;
++ __le32 *req;
+
+ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true);
+ desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+@@ -1476,22 +1475,22 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
+ tcam_msg.loc);
+
+ /* tcam_data0 ~ tcam_data1 */
+- req = (u32 *)req1->tcam_data;
++ req = (__le32 *)req1->tcam_data;
+ for (i = 0; i < 2; i++)
+ pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+- "%08x\n", *req++);
++ "%08x\n", le32_to_cpu(*req++));
+
+ /* tcam_data2 ~ tcam_data7 */
+- req = (u32 *)req2->tcam_data;
++ req = (__le32 *)req2->tcam_data;
+ for (i = 0; i < 6; i++)
+ pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+- "%08x\n", *req++);
++ "%08x\n", le32_to_cpu(*req++));
+
+ /* tcam_data8 ~ tcam_data12 */
+- req = (u32 *)req3->tcam_data;
++ req = (__le32 *)req3->tcam_data;
+ for (i = 0; i < 5; i++)
+ pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+- "%08x\n", *req++);
++ "%08x\n", le32_to_cpu(*req++));
+
+ return ret;
+ }
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index d891390d492f6..a415760505ab4 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -26,8 +26,6 @@
+ #include "hclge_devlink.h"
+
+ #define HCLGE_NAME "hclge"
+-#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
+-#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
+
+ #define HCLGE_BUF_SIZE_UNIT 256U
+ #define HCLGE_BUF_MUL_BY 2
+@@ -72,6 +70,8 @@ static void hclge_sync_mac_table(struct hclge_dev *hdev);
+ static void hclge_restore_hw_table(struct hclge_dev *hdev);
+ static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
+ static void hclge_sync_fd_table(struct hclge_dev *hdev);
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++ int wait_cnt);
+
+ static struct hnae3_ae_algo ae_algo;
+
+@@ -548,7 +548,7 @@ static int hclge_mac_query_reg_num(struct hclge_dev *hdev, u32 *desc_num)
+ return 0;
+ }
+
+-static int hclge_mac_update_stats(struct hclge_dev *hdev)
++int hclge_mac_update_stats(struct hclge_dev *hdev)
+ {
+ u32 desc_num;
+ int ret;
+@@ -1865,6 +1865,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
+ vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO;
+ vport->mps = HCLGE_MAC_DEFAULT_FRAME;
+ vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
++ vport->port_base_vlan_cfg.tbl_sta = true;
+ vport->rxvlan_cfg.rx_vlan_offload_en = true;
+ vport->req_vlan_fltr_en = true;
+ INIT_LIST_HEAD(&vport->vlan_list);
+@@ -2498,7 +2499,7 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport)
+ if (hdev->num_msi < hdev->num_nic_msi + hdev->num_roce_msi)
+ return -EINVAL;
+
+- roce->rinfo.base_vector = hdev->roce_base_vector;
++ roce->rinfo.base_vector = hdev->num_nic_msi;
+
+ roce->rinfo.netdev = nic->kinfo.netdev;
+ roce->rinfo.roce_io_base = hdev->hw.io_base;
+@@ -2534,10 +2535,6 @@ static int hclge_init_msi(struct hclge_dev *hdev)
+ hdev->num_msi = vectors;
+ hdev->num_msi_left = vectors;
+
+- hdev->base_msi_vector = pdev->irq;
+- hdev->roce_base_vector = hdev->base_msi_vector +
+- hdev->num_nic_msi;
+-
+ hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
+ sizeof(u16), GFP_KERNEL);
+ if (!hdev->vector_status) {
+@@ -3177,6 +3174,7 @@ static int hclge_update_tp_port_info(struct hclge_dev *hdev)
+ hdev->hw.mac.autoneg = cmd.base.autoneg;
+ hdev->hw.mac.speed = cmd.base.speed;
+ hdev->hw.mac.duplex = cmd.base.duplex;
++ linkmode_copy(hdev->hw.mac.advertising, cmd.link_modes.advertising);
+
+ return 0;
+ }
+@@ -3199,7 +3197,7 @@ static int hclge_tp_port_init(struct hclge_dev *hdev)
+ static int hclge_update_port_info(struct hclge_dev *hdev)
+ {
+ struct hclge_mac *mac = &hdev->hw.mac;
+- int speed = HCLGE_MAC_SPEED_UNKNOWN;
++ int speed;
+ int ret;
+
+ /* get the port info from SFP cmd if not copper port */
+@@ -3210,10 +3208,13 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
+ if (!hdev->support_sfp_query)
+ return 0;
+
+- if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
++ if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
++ speed = mac->speed;
+ ret = hclge_get_sfp_info(hdev, mac);
+- else
++ } else {
++ speed = HCLGE_MAC_SPEED_UNKNOWN;
+ ret = hclge_get_sfp_speed(hdev, &speed);
++ }
+
+ if (ret == -EOPNOTSUPP) {
+ hdev->support_sfp_query = false;
+@@ -3225,6 +3226,8 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
+ if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
+ if (mac->speed_type == QUERY_ACTIVE_SPEED) {
+ hclge_update_port_capability(hdev, mac);
++ if (mac->speed != speed)
++ (void)hclge_tm_port_shaper_cfg(hdev);
+ return 0;
+ }
+ return hclge_cfg_mac_speed_dup(hdev, mac->speed,
+@@ -3307,6 +3310,12 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
+ link_state_old = vport->vf_info.link_state;
+ vport->vf_info.link_state = link_state;
+
++ /* return success directly if the VF is unalive, VF will
++ * query link state itself when it starts work.
++ */
++ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
++ return 0;
++
+ ret = hclge_push_vf_link_status(vport);
+ if (ret) {
+ vport->vf_info.link_state = link_state_old;
+@@ -7649,6 +7658,8 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
+
+ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
+ {
++#define HCLGE_LINK_STATUS_WAIT_CNT 3
++
+ struct hclge_desc desc;
+ struct hclge_config_mac_mode_cmd *req =
+ (struct hclge_config_mac_mode_cmd *)desc.data;
+@@ -7673,9 +7684,15 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
+ req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+- if (ret)
++ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "mac enable fail, ret =%d.\n", ret);
++ return;
++ }
++
++ if (!enable)
++ hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
++ HCLGE_LINK_STATUS_WAIT_CNT);
+ }
+
+ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
+@@ -7738,10 +7755,9 @@ static void hclge_phy_link_status_wait(struct hclge_dev *hdev,
+ } while (++i < HCLGE_PHY_LINK_STATUS_NUM);
+ }
+
+-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++ int wait_cnt)
+ {
+-#define HCLGE_MAC_LINK_STATUS_NUM 100
+-
+ int link_status;
+ int i = 0;
+ int ret;
+@@ -7754,13 +7770,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
+ return 0;
+
+ msleep(HCLGE_LINK_STATUS_MS);
+- } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
++ } while (++i < wait_cnt);
+ return -EBUSY;
+ }
+
+ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
+ bool is_phy)
+ {
++#define HCLGE_MAC_LINK_STATUS_NUM 100
++
+ int link_ret;
+
+ link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
+@@ -7768,7 +7786,8 @@ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
+ if (is_phy)
+ hclge_phy_link_status_wait(hdev, link_ret);
+
+- return hclge_mac_link_status_wait(hdev, link_ret);
++ return hclge_mac_link_status_wait(hdev, link_ret,
++ HCLGE_MAC_LINK_STATUS_NUM);
+ }
+
+ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
+@@ -8127,12 +8146,15 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
+ /* If it is not PF reset or FLR, the firmware will disable the MAC,
+ * so it only need to stop phy here.
+ */
+- if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) &&
+- hdev->reset_type != HNAE3_FUNC_RESET &&
+- hdev->reset_type != HNAE3_FLR_RESET) {
+- hclge_mac_stop_phy(hdev);
+- hclge_update_link_status(hdev);
+- return;
++ if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) {
++ hclge_pfc_pause_en_cfg(hdev, HCLGE_PFC_TX_RX_DISABLE,
++ HCLGE_PFC_DISABLE);
++ if (hdev->reset_type != HNAE3_FUNC_RESET &&
++ hdev->reset_type != HNAE3_FLR_RESET) {
++ hclge_mac_stop_phy(hdev);
++ hclge_update_link_status(hdev);
++ return;
++ }
+ }
+
+ hclge_reset_tqp(handle);
+@@ -8575,6 +8597,7 @@ int hclge_update_mac_list(struct hclge_vport *vport,
+ enum HCLGE_MAC_ADDR_TYPE mac_type,
+ const unsigned char *addr)
+ {
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_mac_node *mac_node;
+ struct list_head *list;
+@@ -8599,9 +8622,10 @@ int hclge_update_mac_list(struct hclge_vport *vport,
+ /* if this address is never added, unnecessary to delete */
+ if (state == HCLGE_MAC_TO_DEL) {
+ spin_unlock_bh(&vport->mac_list_lock);
++ hnae3_format_mac_addr(format_mac_addr, addr);
+ dev_err(&hdev->pdev->dev,
+- "failed to delete address %pM from mac list\n",
+- addr);
++ "failed to delete address %s from mac list\n",
++ format_mac_addr);
+ return -ENOENT;
+ }
+
+@@ -8634,6 +8658,7 @@ static int hclge_add_uc_addr(struct hnae3_handle *handle,
+ int hclge_add_uc_addr_common(struct hclge_vport *vport,
+ const unsigned char *addr)
+ {
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
+ struct hclge_desc desc;
+@@ -8644,9 +8669,10 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
+ if (is_zero_ether_addr(addr) ||
+ is_broadcast_ether_addr(addr) ||
+ is_multicast_ether_addr(addr)) {
++ hnae3_format_mac_addr(format_mac_addr, addr);
+ dev_err(&hdev->pdev->dev,
+- "Set_uc mac err! invalid mac:%pM. is_zero:%d,is_br=%d,is_mul=%d\n",
+- addr, is_zero_ether_addr(addr),
++ "Set_uc mac err! invalid mac:%s. is_zero:%d,is_br=%d,is_mul=%d\n",
++ format_mac_addr, is_zero_ether_addr(addr),
+ is_broadcast_ether_addr(addr),
+ is_multicast_ether_addr(addr));
+ return -EINVAL;
+@@ -8703,6 +8729,7 @@ static int hclge_rm_uc_addr(struct hnae3_handle *handle,
+ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
+ const unsigned char *addr)
+ {
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
+ int ret;
+@@ -8711,8 +8738,9 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
+ if (is_zero_ether_addr(addr) ||
+ is_broadcast_ether_addr(addr) ||
+ is_multicast_ether_addr(addr)) {
+- dev_dbg(&hdev->pdev->dev, "Remove mac err! invalid mac:%pM.\n",
+- addr);
++ hnae3_format_mac_addr(format_mac_addr, addr);
++ dev_dbg(&hdev->pdev->dev, "Remove mac err! invalid mac:%s.\n",
++ format_mac_addr);
+ return -EINVAL;
+ }
+
+@@ -8720,12 +8748,11 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
+ hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+ hclge_prepare_mac_addr(&req, addr, false);
+ ret = hclge_remove_mac_vlan_tbl(vport, &req);
+- if (!ret) {
++ if (!ret || ret == -ENOENT) {
+ mutex_lock(&hdev->vport_lock);
+ hclge_update_umv_space(vport, true);
+ mutex_unlock(&hdev->vport_lock);
+- } else if (ret == -ENOENT) {
+- ret = 0;
++ return 0;
+ }
+
+ return ret;
+@@ -8743,6 +8770,7 @@ static int hclge_add_mc_addr(struct hnae3_handle *handle,
+ int hclge_add_mc_addr_common(struct hclge_vport *vport,
+ const unsigned char *addr)
+ {
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
+ struct hclge_desc desc[3];
+@@ -8750,9 +8778,10 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport,
+
+ /* mac addr check */
+ if (!is_multicast_ether_addr(addr)) {
++ hnae3_format_mac_addr(format_mac_addr, addr);
+ dev_err(&hdev->pdev->dev,
+- "Add mc mac err! invalid mac:%pM.\n",
+- addr);
++ "Add mc mac err! invalid mac:%s.\n",
++ format_mac_addr);
+ return -EINVAL;
+ }
+ memset(&req, 0, sizeof(req));
+@@ -8788,6 +8817,7 @@ static int hclge_rm_mc_addr(struct hnae3_handle *handle,
+ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
+ const unsigned char *addr)
+ {
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
+ enum hclge_cmd_status status;
+@@ -8795,9 +8825,10 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
+
+ /* mac addr check */
+ if (!is_multicast_ether_addr(addr)) {
++ hnae3_format_mac_addr(format_mac_addr, addr);
+ dev_dbg(&hdev->pdev->dev,
+- "Remove mc mac err! invalid mac:%pM.\n",
+- addr);
++ "Remove mc mac err! invalid mac:%s.\n",
++ format_mac_addr);
+ return -EINVAL;
+ }
+
+@@ -9263,16 +9294,18 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf,
+ u8 *mac_addr)
+ {
+ struct hclge_vport *vport = hclge_get_vport(handle);
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclge_dev *hdev = vport->back;
+
+ vport = hclge_get_vf_vport(hdev, vf);
+ if (!vport)
+ return -EINVAL;
+
++ hnae3_format_mac_addr(format_mac_addr, mac_addr);
+ if (ether_addr_equal(mac_addr, vport->vf_info.mac)) {
+ dev_info(&hdev->pdev->dev,
+- "Specified MAC(=%pM) is same as before, no change committed!\n",
+- mac_addr);
++ "Specified MAC(=%s) is same as before, no change committed!\n",
++ format_mac_addr);
+ return 0;
+ }
+
+@@ -9284,15 +9317,20 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf,
+
+ ether_addr_copy(vport->vf_info.mac, mac_addr);
+
++ /* there is a timewindow for PF to know VF unalive, it may
++ * cause send mailbox fail, but it doesn't matter, VF will
++ * query it when reinit.
++ */
+ if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) {
+ dev_info(&hdev->pdev->dev,
+- "MAC of VF %d has been set to %pM, and it will be reinitialized!\n",
+- vf, mac_addr);
+- return hclge_inform_reset_assert_to_vf(vport);
++ "MAC of VF %d has been set to %s, and it will be reinitialized!\n",
++ vf, format_mac_addr);
++ (void)hclge_inform_reset_assert_to_vf(vport);
++ return 0;
+ }
+
+- dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %pM\n",
+- vf, mac_addr);
++ dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n",
++ vf, format_mac_addr);
+ return 0;
+ }
+
+@@ -9396,6 +9434,7 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+ {
+ const unsigned char *new_addr = (const unsigned char *)p;
+ struct hclge_vport *vport = hclge_get_vport(handle);
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclge_dev *hdev = vport->back;
+ unsigned char *old_addr = NULL;
+ int ret;
+@@ -9404,9 +9443,10 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+ if (is_zero_ether_addr(new_addr) ||
+ is_broadcast_ether_addr(new_addr) ||
+ is_multicast_ether_addr(new_addr)) {
++ hnae3_format_mac_addr(format_mac_addr, new_addr);
+ dev_err(&hdev->pdev->dev,
+- "change uc mac err! invalid mac: %pM.\n",
+- new_addr);
++ "change uc mac err! invalid mac: %s.\n",
++ format_mac_addr);
+ return -EINVAL;
+ }
+
+@@ -9424,9 +9464,10 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+ spin_lock_bh(&vport->mac_list_lock);
+ ret = hclge_update_mac_node_for_dev_addr(vport, old_addr, new_addr);
+ if (ret) {
++ hnae3_format_mac_addr(format_mac_addr, new_addr);
+ dev_err(&hdev->pdev->dev,
+- "failed to change the mac addr:%pM, ret = %d\n",
+- new_addr, ret);
++ "failed to change the mac addr:%s, ret = %d\n",
++ format_mac_addr, ret);
+ spin_unlock_bh(&vport->mac_list_lock);
+
+ if (!is_first)
+@@ -10084,19 +10125,28 @@ static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+ bool writen_to_tbl)
+ {
+ struct hclge_vport_vlan_cfg *vlan, *tmp;
++ struct hclge_dev *hdev = vport->back;
+
+- list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node)
+- if (vlan->vlan_id == vlan_id)
++ mutex_lock(&hdev->vport_lock);
++
++ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
++ if (vlan->vlan_id == vlan_id) {
++ mutex_unlock(&hdev->vport_lock);
+ return;
++ }
++ }
+
+ vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+- if (!vlan)
++ if (!vlan) {
++ mutex_unlock(&hdev->vport_lock);
+ return;
++ }
+
+ vlan->hd_tbl_status = writen_to_tbl;
+ vlan->vlan_id = vlan_id;
+
+ list_add_tail(&vlan->node, &vport->vlan_list);
++ mutex_unlock(&hdev->vport_lock);
+ }
+
+ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport)
+@@ -10105,6 +10155,8 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport)
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
++ mutex_lock(&hdev->vport_lock);
++
+ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+ if (!vlan->hd_tbl_status) {
+ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+@@ -10114,12 +10166,16 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport)
+ dev_err(&hdev->pdev->dev,
+ "restore vport vlan list failed, ret=%d\n",
+ ret);
++
++ mutex_unlock(&hdev->vport_lock);
+ return ret;
+ }
+ }
+ vlan->hd_tbl_status = true;
+ }
+
++ mutex_unlock(&hdev->vport_lock);
++
+ return 0;
+ }
+
+@@ -10129,6 +10185,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+ struct hclge_vport_vlan_cfg *vlan, *tmp;
+ struct hclge_dev *hdev = vport->back;
+
++ mutex_lock(&hdev->vport_lock);
++
+ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+ if (vlan->vlan_id == vlan_id) {
+ if (is_write_tbl && vlan->hd_tbl_status)
+@@ -10143,6 +10201,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+ break;
+ }
+ }
++
++ mutex_unlock(&hdev->vport_lock);
+ }
+
+ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
+@@ -10150,6 +10210,8 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
+ struct hclge_vport_vlan_cfg *vlan, *tmp;
+ struct hclge_dev *hdev = vport->back;
+
++ mutex_lock(&hdev->vport_lock);
++
+ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+ if (vlan->hd_tbl_status)
+ hclge_set_vlan_filter_hw(hdev,
+@@ -10165,6 +10227,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
+ }
+ }
+ clear_bit(vport->vport_id, hdev->vf_vlan_full);
++ mutex_unlock(&hdev->vport_lock);
+ }
+
+ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
+@@ -10173,6 +10236,8 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
+ struct hclge_vport *vport;
+ int i;
+
++ mutex_lock(&hdev->vport_lock);
++
+ for (i = 0; i < hdev->num_alloc_vport; i++) {
+ vport = &hdev->vport[i];
+ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+@@ -10180,37 +10245,61 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
+ kfree(vlan);
+ }
+ }
++
++ mutex_unlock(&hdev->vport_lock);
+ }
+
+-void hclge_restore_vport_vlan_table(struct hclge_vport *vport)
++void hclge_restore_vport_port_base_vlan_config(struct hclge_dev *hdev)
+ {
+- struct hclge_vport_vlan_cfg *vlan, *tmp;
+- struct hclge_dev *hdev = vport->back;
++ struct hclge_vlan_info *vlan_info;
++ struct hclge_vport *vport;
+ u16 vlan_proto;
+ u16 vlan_id;
+ u16 state;
++ int vf_id;
+ int ret;
+
+- vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto;
+- vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
+- state = vport->port_base_vlan_cfg.state;
++ /* PF should restore all vfs port base vlan */
++ for (vf_id = 0; vf_id < hdev->num_alloc_vfs; vf_id++) {
++ vport = &hdev->vport[vf_id + HCLGE_VF_VPORT_START_NUM];
++ vlan_info = vport->port_base_vlan_cfg.tbl_sta ?
++ &vport->port_base_vlan_cfg.vlan_info :
++ &vport->port_base_vlan_cfg.old_vlan_info;
+
+- if (state != HNAE3_PORT_BASE_VLAN_DISABLE) {
+- clear_bit(vport->vport_id, hdev->vlan_table[vlan_id]);
+- hclge_set_vlan_filter_hw(hdev, htons(vlan_proto),
+- vport->vport_id, vlan_id,
+- false);
+- return;
++ vlan_id = vlan_info->vlan_tag;
++ vlan_proto = vlan_info->vlan_proto;
++ state = vport->port_base_vlan_cfg.state;
++
++ if (state != HNAE3_PORT_BASE_VLAN_DISABLE) {
++ clear_bit(vport->vport_id, hdev->vlan_table[vlan_id]);
++ ret = hclge_set_vlan_filter_hw(hdev, htons(vlan_proto),
++ vport->vport_id,
++ vlan_id, false);
++ vport->port_base_vlan_cfg.tbl_sta = ret == 0;
++ }
+ }
++}
+
+- list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+- ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+- vport->vport_id,
+- vlan->vlan_id, false);
+- if (ret)
+- break;
+- vlan->hd_tbl_status = true;
++void hclge_restore_vport_vlan_table(struct hclge_vport *vport)
++{
++ struct hclge_vport_vlan_cfg *vlan, *tmp;
++ struct hclge_dev *hdev = vport->back;
++ int ret;
++
++ mutex_lock(&hdev->vport_lock);
++
++ if (vport->port_base_vlan_cfg.state == HNAE3_PORT_BASE_VLAN_DISABLE) {
++ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
++ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
++ vport->vport_id,
++ vlan->vlan_id, false);
++ if (ret)
++ break;
++ vlan->hd_tbl_status = true;
++ }
+ }
++
++ mutex_unlock(&hdev->vport_lock);
+ }
+
+ /* For global reset and imp reset, hardware will clear the mac table,
+@@ -10250,6 +10339,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev)
+ struct hnae3_handle *handle = &vport->nic;
+
+ hclge_restore_mac_table_common(vport);
++ hclge_restore_vport_port_base_vlan_config(hdev);
+ hclge_restore_vport_vlan_table(vport);
+ set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
+ hclge_restore_fd_entries(handle);
+@@ -10306,6 +10396,8 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
+ false);
+ }
+
++ vport->port_base_vlan_cfg.tbl_sta = false;
++
+ /* force add VLAN 0 */
+ ret = hclge_set_vf_vlan_common(hdev, vport->vport_id, false, 0);
+ if (ret)
+@@ -10332,12 +10424,42 @@ static bool hclge_need_update_vlan_filter(const struct hclge_vlan_info *new_cfg,
+ return false;
+ }
+
++static int hclge_modify_port_base_vlan_tag(struct hclge_vport *vport,
++ struct hclge_vlan_info *new_info,
++ struct hclge_vlan_info *old_info)
++{
++ struct hclge_dev *hdev = vport->back;
++ int ret;
++
++ /* add new VLAN tag */
++ ret = hclge_set_vlan_filter_hw(hdev, htons(new_info->vlan_proto),
++ vport->vport_id, new_info->vlan_tag,
++ false);
++ if (ret)
++ return ret;
++
++ vport->port_base_vlan_cfg.tbl_sta = false;
++ /* remove old VLAN tag */
++ if (old_info->vlan_tag == 0)
++ ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
++ true, 0);
++ else
++ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
++ vport->vport_id,
++ old_info->vlan_tag, true);
++ if (ret)
++ dev_err(&hdev->pdev->dev,
++ "failed to clear vport%u port base vlan %u, ret = %d.\n",
++ vport->vport_id, old_info->vlan_tag, ret);
++
++ return ret;
++}
++
+ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+ struct hclge_vlan_info *vlan_info)
+ {
+ struct hnae3_handle *nic = &vport->nic;
+ struct hclge_vlan_info *old_vlan_info;
+- struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ old_vlan_info = &vport->port_base_vlan_cfg.vlan_info;
+@@ -10350,38 +10472,12 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+ if (!hclge_need_update_vlan_filter(vlan_info, old_vlan_info))
+ goto out;
+
+- if (state == HNAE3_PORT_BASE_VLAN_MODIFY) {
+- /* add new VLAN tag */
+- ret = hclge_set_vlan_filter_hw(hdev,
+- htons(vlan_info->vlan_proto),
+- vport->vport_id,
+- vlan_info->vlan_tag,
+- false);
+- if (ret)
+- return ret;
+-
+- /* remove old VLAN tag */
+- if (old_vlan_info->vlan_tag == 0)
+- ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
+- true, 0);
+- else
+- ret = hclge_set_vlan_filter_hw(hdev,
+- htons(ETH_P_8021Q),
+- vport->vport_id,
+- old_vlan_info->vlan_tag,
+- true);
+- if (ret) {
+- dev_err(&hdev->pdev->dev,
+- "failed to clear vport%u port base vlan %u, ret = %d.\n",
+- vport->vport_id, old_vlan_info->vlan_tag, ret);
+- return ret;
+- }
+-
+- goto out;
+- }
+-
+- ret = hclge_update_vlan_filter_entries(vport, state, vlan_info,
+- old_vlan_info);
++ if (state == HNAE3_PORT_BASE_VLAN_MODIFY)
++ ret = hclge_modify_port_base_vlan_tag(vport, vlan_info,
++ old_vlan_info);
++ else
++ ret = hclge_update_vlan_filter_entries(vport, state, vlan_info,
++ old_vlan_info);
+ if (ret)
+ return ret;
+
+@@ -10392,7 +10488,9 @@ out:
+ else
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+
++ vport->port_base_vlan_cfg.old_vlan_info = *old_vlan_info;
+ vport->port_base_vlan_cfg.vlan_info = *vlan_info;
++ vport->port_base_vlan_cfg.tbl_sta = true;
+ hclge_set_vport_vlan_fltr_change(vport);
+
+ return 0;
+@@ -10460,14 +10558,17 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
+ return ret;
+ }
+
+- /* for DEVICE_VERSION_V3, vf doesn't need to know about the port based
++ /* there is a timewindow for PF to know VF unalive, it may
++ * cause send mailbox fail, but it doesn't matter, VF will
++ * query it when reinit.
++ * for DEVICE_VERSION_V3, vf doesn't need to know about the port based
+ * VLAN state.
+ */
+ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 &&
+ test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+- hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
+- vport->vport_id, state,
+- &vlan_info);
++ (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
++ vport->vport_id,
++ state, &vlan_info);
+
+ return 0;
+ }
+@@ -10525,11 +10626,11 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+ }
+
+ if (!ret) {
+- if (is_kill)
+- hclge_rm_vport_vlan_table(vport, vlan_id, false);
+- else
++ if (!is_kill)
+ hclge_add_vport_vlan_table(vport, vlan_id,
+ writen_to_tbl);
++ else if (is_kill && vlan_id != 0)
++ hclge_rm_vport_vlan_table(vport, vlan_id, false);
+ } else if (is_kill) {
+ /* when remove hw vlan filter failed, record the vlan id,
+ * and try to remove it from hw later, to be consistence
+@@ -10892,9 +10993,12 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
+ u32 rx_pause, tx_pause;
+ u8 flowctl;
+
+- if (!phydev->link || !phydev->autoneg)
++ if (!phydev->link)
+ return 0;
+
++ if (!phydev->autoneg)
++ return hclge_mac_pause_setup_hw(hdev);
++
+ local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising);
+
+ if (phydev->pause)
+@@ -11083,6 +11187,7 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle,
+
+ static void hclge_info_show(struct hclge_dev *hdev)
+ {
++ struct hnae3_handle *handle = &hdev->vport->nic;
+ struct device *dev = &hdev->pdev->dev;
+
+ dev_info(dev, "PF info begin:\n");
+@@ -11099,9 +11204,9 @@ static void hclge_info_show(struct hclge_dev *hdev)
+ dev_info(dev, "This is %s PF\n",
+ hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
+ dev_info(dev, "DCB %s\n",
+- hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
++ handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable");
+ dev_info(dev, "MQPRIO %s\n",
+- hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
++ handle->kinfo.tc_info.mqprio_active ? "enable" : "disable");
+ dev_info(dev, "Default tx spare buffer size: %u\n",
+ hdev->tx_spare_buf_size);
+
+@@ -11584,9 +11689,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+ if (ret)
+ goto err_msi_irq_uninit;
+
+- if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER &&
+- !hnae3_dev_phy_imp_supported(hdev)) {
+- ret = hclge_mac_mdio_config(hdev);
++ if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
++ if (hnae3_dev_phy_imp_supported(hdev))
++ ret = hclge_update_tp_port_info(hdev);
++ else
++ ret = hclge_mac_mdio_config(hdev);
++
+ if (ret)
+ goto err_msi_irq_uninit;
+ }
+@@ -12103,8 +12211,8 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+ hclge_misc_irq_uninit(hdev);
+ hclge_devlink_uninit(hdev);
+ hclge_pci_uninit(hdev);
+- mutex_destroy(&hdev->vport_lock);
+ hclge_uninit_vport_vlan_table(hdev);
++ mutex_destroy(&hdev->vport_lock);
+ ae_dev->priv = NULL;
+ }
+
+@@ -12736,60 +12844,71 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable)
+ return ret;
+ }
+
+-static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
++static int hclge_sync_vport_promisc_mode(struct hclge_vport *vport)
+ {
+- struct hclge_vport *vport = &hdev->vport[0];
+ struct hnae3_handle *handle = &vport->nic;
++ struct hclge_dev *hdev = vport->back;
++ bool uc_en = false;
++ bool mc_en = false;
+ u8 tmp_flags;
++ bool bc_en;
+ int ret;
+- u16 i;
+
+ if (vport->last_promisc_flags != vport->overflow_promisc_flags) {
+ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
+ vport->last_promisc_flags = vport->overflow_promisc_flags;
+ }
+
+- if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) {
++ if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
++ &vport->state))
++ return 0;
++
++ /* for PF */
++ if (!vport->vport_id) {
+ tmp_flags = handle->netdev_flags | vport->last_promisc_flags;
+ ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE,
+ tmp_flags & HNAE3_MPE);
+- if (!ret) {
+- clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+- &vport->state);
++ if (!ret)
+ set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
+ &vport->state);
+- }
++ else
++ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
++ &vport->state);
++ return ret;
+ }
+
+- for (i = 1; i < hdev->num_alloc_vport; i++) {
+- bool uc_en = false;
+- bool mc_en = false;
+- bool bc_en;
++ /* for VF */
++ if (vport->vf_info.trusted) {
++ uc_en = vport->vf_info.request_uc_en > 0 ||
++ vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE;
++ mc_en = vport->vf_info.request_mc_en > 0 ||
++ vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE;
++ }
++ bc_en = vport->vf_info.request_bc_en > 0;
+
+- vport = &hdev->vport[i];
++ ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en,
++ mc_en, bc_en);
++ if (ret) {
++ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
++ return ret;
++ }
++ hclge_set_vport_vlan_fltr_change(vport);
+
+- if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+- &vport->state))
+- continue;
++ return 0;
++}
+
+- if (vport->vf_info.trusted) {
+- uc_en = vport->vf_info.request_uc_en > 0 ||
+- vport->overflow_promisc_flags &
+- HNAE3_OVERFLOW_UPE;
+- mc_en = vport->vf_info.request_mc_en > 0 ||
+- vport->overflow_promisc_flags &
+- HNAE3_OVERFLOW_MPE;
+- }
+- bc_en = vport->vf_info.request_bc_en > 0;
++static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
++{
++ struct hclge_vport *vport;
++ int ret;
++ u16 i;
+
+- ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en,
+- mc_en, bc_en);
+- if (ret) {
+- set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+- &vport->state);
++ for (i = 0; i < hdev->num_alloc_vport; i++) {
++ vport = &hdev->vport[i];
++
++ ret = hclge_sync_vport_promisc_mode(vport);
++ if (ret)
+ return;
+- }
+- hclge_set_vport_vlan_fltr_change(vport);
+ }
+ }
+
+@@ -12917,6 +13036,55 @@ static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle,
+ return 0;
+ }
+
++/* After disable sriov, VF still has some config and info need clean,
++ * which configed by PF.
++ */
++static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid)
++{
++ struct hclge_dev *hdev = vport->back;
++ struct hclge_vlan_info vlan_info;
++ int ret;
++
++ /* after disable sriov, clean VF rate configured by PF */
++ ret = hclge_tm_qs_shaper_cfg(vport, 0);
++ if (ret)
++ dev_err(&hdev->pdev->dev,
++ "failed to clean vf%d rate config, ret = %d\n",
++ vfid, ret);
++
++ vlan_info.vlan_tag = 0;
++ vlan_info.qos = 0;
++ vlan_info.vlan_proto = ETH_P_8021Q;
++ ret = hclge_update_port_base_vlan_cfg(vport,
++ HNAE3_PORT_BASE_VLAN_DISABLE,
++ &vlan_info);
++ if (ret)
++ dev_err(&hdev->pdev->dev,
++ "failed to clean vf%d port base vlan, ret = %d\n",
++ vfid, ret);
++
++ ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, false);
++ if (ret)
++ dev_err(&hdev->pdev->dev,
++ "failed to clean vf%d spoof config, ret = %d\n",
++ vfid, ret);
++
++ memset(&vport->vf_info, 0, sizeof(vport->vf_info));
++}
++
++static void hclge_clean_vport_config(struct hnae3_ae_dev *ae_dev, int num_vfs)
++{
++ struct hclge_dev *hdev = ae_dev->priv;
++ struct hclge_vport *vport;
++ int i;
++
++ for (i = 0; i < num_vfs; i++) {
++ vport = &hdev->vport[i + HCLGE_VF_VPORT_START_NUM];
++
++ hclge_clear_vport_vf_info(vport, i);
++ }
++}
++
+ static const struct hnae3_ae_ops hclge_ops = {
+ .init_ae_dev = hclge_init_ae_dev,
+ .uninit_ae_dev = hclge_uninit_ae_dev,
+@@ -13018,6 +13186,7 @@ static const struct hnae3_ae_ops hclge_ops = {
+ .get_rx_hwts = hclge_ptp_get_rx_hwts,
+ .get_ts_info = hclge_ptp_get_ts_info,
+ .get_link_diagnosis_info = hclge_get_link_diagnosis_info,
++ .clean_vf_config = hclge_clean_vport_config,
+ };
+
+ static struct hnae3_ae_algo ae_algo = {
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+index 69cd8f87b4c86..a716027df0ed1 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+@@ -824,6 +824,9 @@ struct hclge_vf_vlan_cfg {
+ (y) = (_k_ ^ ~_v_) & (_k_); \
+ } while (0)
+
++#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
++#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
++
+ #define HCLGE_MAC_TNL_LOG_SIZE 8
+ #define HCLGE_VPORT_NUM 256
+ struct hclge_dev {
+@@ -876,12 +879,10 @@ struct hclge_dev {
+ u16 num_msi;
+ u16 num_msi_left;
+ u16 num_msi_used;
+- u32 base_msi_vector;
+ u16 *vector_status;
+ int *vector_irq;
+ u16 num_nic_msi; /* Num of nic vectors for this PF */
+ u16 num_roce_msi; /* Num of roce vectors for this PF */
+- int roce_base_vector;
+
+ unsigned long service_timer_period;
+ unsigned long service_timer_previous;
+@@ -901,8 +902,6 @@ struct hclge_dev {
+
+ #define HCLGE_FLAG_MAIN BIT(0)
+ #define HCLGE_FLAG_DCB_CAPABLE BIT(1)
+-#define HCLGE_FLAG_DCB_ENABLE BIT(2)
+-#define HCLGE_FLAG_MQPRIO_ENABLE BIT(3)
+ u32 flag;
+
+ u32 pkt_buf_size; /* Total pf buf size for tx/rx */
+@@ -999,7 +998,9 @@ struct hclge_vlan_info {
+
+ struct hclge_port_base_vlan_config {
+ u16 state;
++ bool tbl_sta;
+ struct hclge_vlan_info vlan_info;
++ struct hclge_vlan_info old_vlan_info;
+ };
+
+ struct hclge_vf_info {
+@@ -1054,6 +1055,7 @@ struct hclge_vport {
+ spinlock_t mac_list_lock; /* protect mac address need to add/detele */
+ struct list_head uc_mac_list; /* Store VF unicast table */
+ struct list_head mc_mac_list; /* Store VF multicast table */
++
+ struct list_head vlan_list; /* Store VF vlan table */
+ };
+
+@@ -1123,6 +1125,7 @@ void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
+ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list);
+ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev);
+ void hclge_restore_mac_table_common(struct hclge_vport *vport);
++void hclge_restore_vport_port_base_vlan_config(struct hclge_dev *hdev);
+ void hclge_restore_vport_vlan_table(struct hclge_vport *vport);
+ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+ struct hclge_vlan_info *vlan_info);
+@@ -1138,4 +1141,5 @@ void hclge_inform_vf_promisc_info(struct hclge_vport *vport);
+ int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len);
+ int hclge_push_vf_link_status(struct hclge_vport *vport);
+ int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en);
++int hclge_mac_update_stats(struct hclge_dev *hdev);
+ #endif
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+index 65d78ee4d65a0..4a5b11b6fed3f 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+@@ -93,6 +93,13 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
+ enum hclge_cmd_status status;
+ struct hclge_desc desc;
+
++ if (msg_len > HCLGE_MBX_MAX_MSG_SIZE) {
++ dev_err(&hdev->pdev->dev,
++ "msg data length(=%u) exceeds maximum(=%u)\n",
++ msg_len, HCLGE_MBX_MAX_MSG_SIZE);
++ return -EMSGSIZE;
++ }
++
+ resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
+@@ -175,7 +182,7 @@ static int hclge_get_ring_chain_from_mbx(
+ ring_num = req->msg.ring_num;
+
+ if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM)
+- return -ENOMEM;
++ return -EINVAL;
+
+ for (i = 0; i < ring_num; i++) {
+ if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) {
+@@ -586,9 +593,9 @@ static int hclge_set_vf_mtu(struct hclge_vport *vport,
+ return hclge_set_vport_mtu(vport, mtu);
+ }
+
+-static void hclge_get_queue_id_in_pf(struct hclge_vport *vport,
+- struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+- struct hclge_respond_to_vf_msg *resp_msg)
++static int hclge_get_queue_id_in_pf(struct hclge_vport *vport,
++ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
++ struct hclge_respond_to_vf_msg *resp_msg)
+ {
+ struct hnae3_handle *handle = &vport->nic;
+ struct hclge_dev *hdev = vport->back;
+@@ -598,17 +605,18 @@ static void hclge_get_queue_id_in_pf(struct hclge_vport *vport,
+ if (queue_id >= handle->kinfo.num_tqps) {
+ dev_err(&hdev->pdev->dev, "Invalid queue id(%u) from VF %u\n",
+ queue_id, mbx_req->mbx_src_vfid);
+- return;
++ return -EINVAL;
+ }
+
+ qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+ memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf));
+ resp_msg->len = sizeof(qid_in_pf);
++ return 0;
+ }
+
+-static void hclge_get_rss_key(struct hclge_vport *vport,
+- struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+- struct hclge_respond_to_vf_msg *resp_msg)
++static int hclge_get_rss_key(struct hclge_vport *vport,
++ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
++ struct hclge_respond_to_vf_msg *resp_msg)
+ {
+ #define HCLGE_RSS_MBX_RESP_LEN 8
+ struct hclge_dev *hdev = vport->back;
+@@ -624,13 +632,14 @@ static void hclge_get_rss_key(struct hclge_vport *vport,
+ dev_warn(&hdev->pdev->dev,
+ "failed to get the rss hash key, the index(%u) invalid !\n",
+ index);
+- return;
++ return -EINVAL;
+ }
+
+ memcpy(resp_msg->data,
+ &hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN],
+ HCLGE_RSS_MBX_RESP_LEN);
+ resp_msg->len = HCLGE_RSS_MBX_RESP_LEN;
++ return 0;
+ }
+
+ static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code)
+@@ -805,10 +814,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
+ "VF fail(%d) to set mtu\n", ret);
+ break;
+ case HCLGE_MBX_GET_QID_IN_PF:
+- hclge_get_queue_id_in_pf(vport, req, &resp_msg);
++ ret = hclge_get_queue_id_in_pf(vport, req, &resp_msg);
+ break;
+ case HCLGE_MBX_GET_RSS_KEY:
+- hclge_get_rss_key(vport, req, &resp_msg);
++ ret = hclge_get_rss_key(vport, req, &resp_msg);
+ break;
+ case HCLGE_MBX_GET_LINK_MODE:
+ hclge_get_link_mode(vport, req);
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+index 95074e91a8466..f5fe5e437bcd1 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+@@ -113,50 +113,50 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
+ return 0;
+ }
+
+-static int hclge_pfc_stats_get(struct hclge_dev *hdev,
+- enum hclge_opcode_type opcode, u64 *stats)
+-{
+- struct hclge_desc desc[HCLGE_TM_PFC_PKT_GET_CMD_NUM];
+- int ret, i, j;
+-
+- if (!(opcode == HCLGE_OPC_QUERY_PFC_RX_PKT_CNT ||
+- opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT))
+- return -EINVAL;
+-
+- for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1; i++) {
+- hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
+- desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+- }
+-
+- hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
++static const u16 hclge_pfc_tx_stats_offset[] = {
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri0_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri1_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri2_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri3_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri4_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri5_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri6_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri7_pkt_num)
++};
+
+- ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
+- if (ret)
+- return ret;
++static const u16 hclge_pfc_rx_stats_offset[] = {
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri0_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri1_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri2_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri3_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri4_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri5_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri6_pkt_num),
++ HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri7_pkt_num)
++};
+
+- for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+- struct hclge_pfc_stats_cmd *pfc_stats =
+- (struct hclge_pfc_stats_cmd *)desc[i].data;
++static void hclge_pfc_stats_get(struct hclge_dev *hdev, bool tx, u64 *stats)
++{
++ const u16 *offset;
++ int i;
+
+- for (j = 0; j < HCLGE_TM_PFC_NUM_GET_PER_CMD; j++) {
+- u32 index = i * HCLGE_TM_PFC_PKT_GET_CMD_NUM + j;
++ if (tx)
++ offset = hclge_pfc_tx_stats_offset;
++ else
++ offset = hclge_pfc_rx_stats_offset;
+
+- if (index < HCLGE_MAX_TC_NUM)
+- stats[index] =
+- le64_to_cpu(pfc_stats->pkt_num[j]);
+- }
+- }
+- return 0;
++ for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
++ stats[i] = HCLGE_STATS_READ(&hdev->mac_stats, offset[i]);
+ }
+
+-int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats)
++void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats)
+ {
+- return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_RX_PKT_CNT, stats);
++ hclge_pfc_stats_get(hdev, false, stats);
+ }
+
+-int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats)
++void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats)
+ {
+- return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_TX_PKT_CNT, stats);
++ hclge_pfc_stats_get(hdev, true, stats);
+ }
+
+ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
+@@ -171,8 +171,8 @@ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+ }
+
+-static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
+- u8 pfc_bitmap)
++int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
++ u8 pfc_bitmap)
+ {
+ struct hclge_desc desc;
+ struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)desc.data;
+@@ -420,7 +420,7 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+ }
+
+-static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
++int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+ {
+ struct hclge_port_shapping_cmd *shap_cfg_cmd;
+ struct hclge_shaper_ir_para ir_para;
+@@ -732,6 +732,7 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
+ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
+ {
+ #define BW_PERCENT 100
++#define DEFAULT_BW_WEIGHT 1
+
+ u8 i;
+
+@@ -753,7 +754,7 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
+ for (k = 0; k < hdev->tm_info.num_tc; k++)
+ hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT;
+ for (; k < HNAE3_MAX_TC; k++)
+- hdev->tm_info.pg_info[i].tc_dwrr[k] = 0;
++ hdev->tm_info.pg_info[i].tc_dwrr[k] = DEFAULT_BW_WEIGHT;
+ }
+ }
+
+@@ -1123,7 +1124,6 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
+
+ static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev)
+ {
+-#define DEFAULT_TC_WEIGHT 1
+ #define DEFAULT_TC_OFFSET 14
+
+ struct hclge_ets_tc_weight_cmd *ets_weight;
+@@ -1136,13 +1136,7 @@ static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev)
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ struct hclge_pg_info *pg_info;
+
+- ets_weight->tc_weight[i] = DEFAULT_TC_WEIGHT;
+-
+- if (!(hdev->hw_tc_map & BIT(i)))
+- continue;
+-
+- pg_info =
+- &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
++ pg_info = &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
+ ets_weight->tc_weight[i] = pg_info->tc_dwrr[i];
+ }
+
+@@ -1435,7 +1429,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
+ return 0;
+ }
+
+-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
++int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+ {
+ bool tx_en, rx_en;
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+index 2ee9b795f71dc..e1f2feaba5454 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+@@ -155,6 +155,9 @@ struct hclge_bp_to_qs_map_cmd {
+ u32 rsvd1;
+ };
+
++#define HCLGE_PFC_DISABLE 0
++#define HCLGE_PFC_TX_RX_DISABLE 0
++
+ struct hclge_pfc_en_cmd {
+ u8 tx_rx_en_bitmap;
+ u8 pri_en_bitmap;
+@@ -226,11 +229,15 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+ void hclge_tm_pfc_info_update(struct hclge_dev *hdev);
+ int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
+ int hclge_tm_init_hw(struct hclge_dev *hdev, bool init);
++int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
++ u8 pfc_bitmap);
+ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
+ int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+-int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
+-int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
++int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
++void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
++void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
+ int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
++int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev);
+ int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num);
+ int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num);
+ int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority,
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+index cf00ad7bb881f..bc140e3620d6c 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -703,9 +703,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size)
+ roundup_size = ilog2(roundup_size);
+
+ for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) {
+- tc_valid[i] = !!(hdev->hw_tc_map & BIT(i));
++ tc_valid[i] = 1;
+ tc_size[i] = roundup_size;
+- tc_offset[i] = rss_size * i;
++ tc_offset[i] = (hdev->hw_tc_map & BIT(i)) ? rss_size * i : 0;
+ }
+
+ hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false);
+@@ -1514,15 +1514,18 @@ static void hclgevf_config_mac_list(struct hclgevf_dev *hdev,
+ struct list_head *list,
+ enum HCLGEVF_MAC_ADDR_TYPE mac_type)
+ {
++ char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
+ struct hclgevf_mac_addr_node *mac_node, *tmp;
+ int ret;
+
+ list_for_each_entry_safe(mac_node, tmp, list, node) {
+ ret = hclgevf_add_del_mac_addr(hdev, mac_node, mac_type);
+ if (ret) {
++ hnae3_format_mac_addr(format_mac_addr,
++ mac_node->mac_addr);
+ dev_err(&hdev->pdev->dev,
+- "failed to configure mac %pM, state = %d, ret = %d\n",
+- mac_node->mac_addr, mac_node->state, ret);
++ "failed to configure mac %s, state = %d, ret = %d\n",
++ format_mac_addr, mac_node->state, ret);
+ return;
+ }
+ if (mac_node->state == HCLGEVF_MAC_TO_ADD) {
+@@ -1882,7 +1885,10 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
+ * might happen in case reset assertion was made by PF. Yes, this also
+ * means we might end up waiting bit more even for VF reset.
+ */
+- msleep(5000);
++ if (hdev->reset_type == HNAE3_VF_FULL_RESET)
++ msleep(5000);
++ else
++ msleep(500);
+
+ return 0;
+ }
+@@ -2496,8 +2502,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
+ break;
+ }
+
+- if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER)
+- hclgevf_enable_vector(&hdev->misc_vector, true);
++ hclgevf_enable_vector(&hdev->misc_vector, true);
+
+ return IRQ_HANDLED;
+ }
+@@ -2557,7 +2562,7 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
+ hdev->num_msi_left == 0)
+ return -EINVAL;
+
+- roce->rinfo.base_vector = hdev->roce_base_vector;
++ roce->rinfo.base_vector = hdev->roce_base_msix_offset;
+
+ roce->rinfo.netdev = nic->kinfo.netdev;
+ roce->rinfo.roce_io_base = hdev->hw.io_base;
+@@ -2823,9 +2828,6 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
+ hdev->num_msi = vectors;
+ hdev->num_msi_left = vectors;
+
+- hdev->base_msi_vector = pdev->irq;
+- hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset;
+-
+ hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
+ sizeof(u16), GFP_KERNEL);
+ if (!hdev->vector_status) {
+@@ -3013,7 +3015,10 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
+
+ /* un-init roce, if it exists */
+ if (hdev->roce_client) {
++ while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
++ msleep(HCLGEVF_WAIT_RESET_DONE);
+ clear_bit(HCLGEVF_STATE_ROCE_REGISTERED, &hdev->state);
++
+ hdev->roce_client->ops->uninit_instance(&hdev->roce, 0);
+ hdev->roce_client = NULL;
+ hdev->roce.client = NULL;
+@@ -3022,6 +3027,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
+ /* un-init nic/unic, if this was not called by roce client */
+ if (client->ops->uninit_instance && hdev->nic_client &&
+ client->type != HNAE3_CLIENT_ROCE) {
++ while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
++ msleep(HCLGEVF_WAIT_RESET_DONE);
+ clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+
+ client->ops->uninit_instance(&hdev->nic, 0);
+@@ -3254,7 +3261,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
+ struct pci_dev *pdev = hdev->pdev;
+ int ret = 0;
+
+- if (hdev->reset_type == HNAE3_VF_FULL_RESET &&
++ if ((hdev->reset_type == HNAE3_VF_FULL_RESET ||
++ hdev->reset_type == HNAE3_FLR_RESET) &&
+ test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
+ hclgevf_misc_irq_uninit(hdev);
+ hclgevf_uninit_msi(hdev);
+@@ -3340,6 +3348,11 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
+ return ret;
+ }
+
++ /* get current port based vlan state from PF */
++ ret = hclgevf_get_port_base_vlan_filter_state(hdev);
++ if (ret)
++ return ret;
++
+ set_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state);
+
+ hclgevf_init_rxd_adv_layout(hdev);
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+index 28288d7e33032..f6f736c0091c0 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+@@ -109,6 +109,8 @@
+ #define HCLGEVF_VF_RST_ING 0x07008
+ #define HCLGEVF_VF_RST_ING_BIT BIT(16)
+
++#define HCLGEVF_WAIT_RESET_DONE 100
++
+ #define HCLGEVF_RSS_IND_TBL_SIZE 512
+ #define HCLGEVF_RSS_SET_BITMAP_MSK 0xffff
+ #define HCLGEVF_RSS_KEY_SIZE 40
+@@ -308,8 +310,6 @@ struct hclgevf_dev {
+ u16 num_nic_msix; /* Num of nic vectors for this VF */
+ u16 num_roce_msix; /* Num of roce vectors for this VF */
+ u16 roce_base_msix_offset;
+- int roce_base_vector;
+- u32 base_msi_vector;
+ u16 *vector_status;
+ int *vector_irq;
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+index fdc66fae09601..c5ac6ecf36e10 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+@@ -114,7 +114,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev,
+
+ memcpy(&req->msg, send_msg, sizeof(struct hclge_vf_to_pf_msg));
+
+- trace_hclge_vf_mbx_send(hdev, req);
++ if (test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state))
++ trace_hclge_vf_mbx_send(hdev, req);
+
+ /* synchronous send */
+ if (need_resp) {
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c
+index 19eb839177ec2..061952c6c21a4 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c
+@@ -85,6 +85,7 @@ static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx)
+ struct tag_sml_funcfg_tbl *funcfg_table_elem;
+ struct hinic_cmd_lt_rd *read_data;
+ u16 out_size = sizeof(*read_data);
++ int ret = ~0;
+ int err;
+
+ read_data = kzalloc(sizeof(*read_data), GFP_KERNEL);
+@@ -111,20 +112,25 @@ static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx)
+
+ switch (idx) {
+ case VALID:
+- return funcfg_table_elem->dw0.bs.valid;
++ ret = funcfg_table_elem->dw0.bs.valid;
++ break;
+ case RX_MODE:
+- return funcfg_table_elem->dw0.bs.nic_rx_mode;
++ ret = funcfg_table_elem->dw0.bs.nic_rx_mode;
++ break;
+ case MTU:
+- return funcfg_table_elem->dw1.bs.mtu;
++ ret = funcfg_table_elem->dw1.bs.mtu;
++ break;
+ case RQ_DEPTH:
+- return funcfg_table_elem->dw13.bs.cfg_rq_depth;
++ ret = funcfg_table_elem->dw13.bs.cfg_rq_depth;
++ break;
+ case QUEUE_NUM:
+- return funcfg_table_elem->dw13.bs.cfg_q_num;
++ ret = funcfg_table_elem->dw13.bs.cfg_q_num;
++ break;
+ }
+
+ kfree(read_data);
+
+- return ~0;
++ return ret;
+ }
+
+ static ssize_t hinic_dbg_cmd_read(struct file *filp, char __user *buffer, size_t count,
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+index fb3e89141a0d9..a4fbf44f944cd 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
++++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+@@ -95,9 +95,6 @@ struct hinic_dev {
+ u16 sq_depth;
+ u16 rq_depth;
+
+- struct hinic_txq_stats tx_stats;
+- struct hinic_rxq_stats rx_stats;
+-
+ u8 rss_tmpl_idx;
+ u8 rss_hash_engine;
+ u16 num_rss;
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+index 06586173add77..998717f02136f 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+@@ -814,7 +814,6 @@ static int api_chain_init(struct hinic_api_cmd_chain *chain,
+ {
+ struct hinic_hwif *hwif = attr->hwif;
+ struct pci_dev *pdev = hwif->pdev;
+- size_t cell_ctxt_size;
+
+ chain->hwif = hwif;
+ chain->chain_type = attr->chain_type;
+@@ -826,8 +825,8 @@ static int api_chain_init(struct hinic_api_cmd_chain *chain,
+
+ sema_init(&chain->sem, 1);
+
+- cell_ctxt_size = chain->num_cells * sizeof(*chain->cell_ctxt);
+- chain->cell_ctxt = devm_kzalloc(&pdev->dev, cell_ctxt_size, GFP_KERNEL);
++ chain->cell_ctxt = devm_kcalloc(&pdev->dev, chain->num_cells,
++ sizeof(*chain->cell_ctxt), GFP_KERNEL);
+ if (!chain->cell_ctxt)
+ return -ENOMEM;
+
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+index 307a6d4af993d..afa816cfcdf4a 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+@@ -796,11 +796,10 @@ static int init_cmdqs_ctxt(struct hinic_hwdev *hwdev,
+ struct hinic_cmdq_ctxt *cmdq_ctxts;
+ struct pci_dev *pdev = hwif->pdev;
+ struct hinic_pfhwdev *pfhwdev;
+- size_t cmdq_ctxts_size;
+ int err;
+
+- cmdq_ctxts_size = HINIC_MAX_CMDQ_TYPES * sizeof(*cmdq_ctxts);
+- cmdq_ctxts = devm_kzalloc(&pdev->dev, cmdq_ctxts_size, GFP_KERNEL);
++ cmdq_ctxts = devm_kcalloc(&pdev->dev, HINIC_MAX_CMDQ_TYPES,
++ sizeof(*cmdq_ctxts), GFP_KERNEL);
+ if (!cmdq_ctxts)
+ return -ENOMEM;
+
+@@ -884,7 +883,6 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif,
+ struct hinic_func_to_io *func_to_io = cmdqs_to_func_to_io(cmdqs);
+ struct pci_dev *pdev = hwif->pdev;
+ struct hinic_hwdev *hwdev;
+- size_t saved_wqs_size;
+ u16 max_wqe_size;
+ int err;
+
+@@ -895,8 +893,8 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif,
+ if (!cmdqs->cmdq_buf_pool)
+ return -ENOMEM;
+
+- saved_wqs_size = HINIC_MAX_CMDQ_TYPES * sizeof(struct hinic_wq);
+- cmdqs->saved_wqs = devm_kzalloc(&pdev->dev, saved_wqs_size, GFP_KERNEL);
++ cmdqs->saved_wqs = devm_kcalloc(&pdev->dev, HINIC_MAX_CMDQ_TYPES,
++ sizeof(*cmdqs->saved_wqs), GFP_KERNEL);
+ if (!cmdqs->saved_wqs) {
+ err = -ENOMEM;
+ goto err_saved_wqs;
+@@ -931,7 +929,7 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif,
+
+ err_set_cmdq_depth:
+ hinic_ceq_unregister_cb(&func_to_io->ceqs, HINIC_CEQ_CMDQ);
+-
++ free_cmdq(&cmdqs->cmdq[HINIC_CMDQ_SYNC]);
+ err_cmdq_ctxt:
+ hinic_wqs_cmdq_free(&cmdqs->cmdq_pages, cmdqs->saved_wqs,
+ HINIC_MAX_CMDQ_TYPES);
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+index 56b6b04e209b3..8b04d133b3c47 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+@@ -162,7 +162,6 @@ static int init_msix(struct hinic_hwdev *hwdev)
+ struct hinic_hwif *hwif = hwdev->hwif;
+ struct pci_dev *pdev = hwif->pdev;
+ int nr_irqs, num_aeqs, num_ceqs;
+- size_t msix_entries_size;
+ int i, err;
+
+ num_aeqs = HINIC_HWIF_NUM_AEQS(hwif);
+@@ -171,8 +170,8 @@ static int init_msix(struct hinic_hwdev *hwdev)
+ if (nr_irqs > HINIC_HWIF_NUM_IRQS(hwif))
+ nr_irqs = HINIC_HWIF_NUM_IRQS(hwif);
+
+- msix_entries_size = nr_irqs * sizeof(*hwdev->msix_entries);
+- hwdev->msix_entries = devm_kzalloc(&pdev->dev, msix_entries_size,
++ hwdev->msix_entries = devm_kcalloc(&pdev->dev, nr_irqs,
++ sizeof(*hwdev->msix_entries),
+ GFP_KERNEL);
+ if (!hwdev->msix_entries)
+ return -ENOMEM;
+@@ -893,7 +892,7 @@ int hinic_set_interrupt_cfg(struct hinic_hwdev *hwdev,
+ if (err)
+ return -EINVAL;
+
+- interrupt_info->lli_credit_cnt = temp_info.lli_timer_cnt;
++ interrupt_info->lli_credit_cnt = temp_info.lli_credit_cnt;
+ interrupt_info->lli_timer_cnt = temp_info.lli_timer_cnt;
+
+ err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+index d3fc05a07fdb6..045c47786a041 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+@@ -631,16 +631,15 @@ static int alloc_eq_pages(struct hinic_eq *eq)
+ struct hinic_hwif *hwif = eq->hwif;
+ struct pci_dev *pdev = hwif->pdev;
+ u32 init_val, addr, val;
+- size_t addr_size;
+ int err, pg;
+
+- addr_size = eq->num_pages * sizeof(*eq->dma_addr);
+- eq->dma_addr = devm_kzalloc(&pdev->dev, addr_size, GFP_KERNEL);
++ eq->dma_addr = devm_kcalloc(&pdev->dev, eq->num_pages,
++ sizeof(*eq->dma_addr), GFP_KERNEL);
+ if (!eq->dma_addr)
+ return -ENOMEM;
+
+- addr_size = eq->num_pages * sizeof(*eq->virt_addr);
+- eq->virt_addr = devm_kzalloc(&pdev->dev, addr_size, GFP_KERNEL);
++ eq->virt_addr = devm_kcalloc(&pdev->dev, eq->num_pages,
++ sizeof(*eq->virt_addr), GFP_KERNEL);
+ if (!eq->virt_addr) {
+ err = -ENOMEM;
+ goto err_virt_addr_alloc;
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+index ebc77771f5dac..4aa1f433ed24d 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+@@ -643,6 +643,7 @@ int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt,
+ err = alloc_msg_buf(pf_to_mgmt);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to allocate msg buffers\n");
++ destroy_workqueue(pf_to_mgmt->workq);
+ hinic_health_reporters_destroy(hwdev->devlink_dev);
+ return err;
+ }
+@@ -650,6 +651,7 @@ int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt,
+ err = hinic_api_cmd_init(pf_to_mgmt->cmd_chain, hwif);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to initialize cmd chains\n");
++ destroy_workqueue(pf_to_mgmt->workq);
+ hinic_health_reporters_destroy(hwdev->devlink_dev);
+ return err;
+ }
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+index 7f0f1aa3cedd9..4daf6bf291ecb 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+@@ -193,20 +193,20 @@ static int alloc_page_arrays(struct hinic_wqs *wqs)
+ {
+ struct hinic_hwif *hwif = wqs->hwif;
+ struct pci_dev *pdev = hwif->pdev;
+- size_t size;
+
+- size = wqs->num_pages * sizeof(*wqs->page_paddr);
+- wqs->page_paddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
++ wqs->page_paddr = devm_kcalloc(&pdev->dev, wqs->num_pages,
++ sizeof(*wqs->page_paddr), GFP_KERNEL);
+ if (!wqs->page_paddr)
+ return -ENOMEM;
+
+- size = wqs->num_pages * sizeof(*wqs->page_vaddr);
+- wqs->page_vaddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
++ wqs->page_vaddr = devm_kcalloc(&pdev->dev, wqs->num_pages,
++ sizeof(*wqs->page_vaddr), GFP_KERNEL);
+ if (!wqs->page_vaddr)
+ goto err_page_vaddr;
+
+- size = wqs->num_pages * sizeof(*wqs->shadow_page_vaddr);
+- wqs->shadow_page_vaddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
++ wqs->shadow_page_vaddr = devm_kcalloc(&pdev->dev, wqs->num_pages,
++ sizeof(*wqs->shadow_page_vaddr),
++ GFP_KERNEL);
+ if (!wqs->shadow_page_vaddr)
+ goto err_page_shadow_vaddr;
+
+@@ -379,15 +379,14 @@ static int alloc_wqes_shadow(struct hinic_wq *wq)
+ {
+ struct hinic_hwif *hwif = wq->hwif;
+ struct pci_dev *pdev = hwif->pdev;
+- size_t size;
+
+- size = wq->num_q_pages * wq->max_wqe_size;
+- wq->shadow_wqe = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
++ wq->shadow_wqe = devm_kcalloc(&pdev->dev, wq->num_q_pages,
++ wq->max_wqe_size, GFP_KERNEL);
+ if (!wq->shadow_wqe)
+ return -ENOMEM;
+
+- size = wq->num_q_pages * sizeof(wq->prod_idx);
+- wq->shadow_idx = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
++ wq->shadow_idx = devm_kcalloc(&pdev->dev, wq->num_q_pages,
++ sizeof(*wq->shadow_idx), GFP_KERNEL);
+ if (!wq->shadow_idx)
+ goto err_shadow_idx;
+
+@@ -772,7 +771,7 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
+ /* If we only have one page, still need to get shadown wqe when
+ * wqe rolling-over page
+ */
+- if (curr_pg != end_pg || MASKED_WQE_IDX(wq, end_prod_idx) < *prod_idx) {
++ if (curr_pg != end_pg || end_prod_idx < *prod_idx) {
+ void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
+
+ copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *prod_idx);
+@@ -842,7 +841,10 @@ struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
+
+ *cons_idx = curr_cons_idx;
+
+- if (curr_pg != end_pg) {
++ /* If we only have one page, still need to get shadown wqe when
++ * wqe rolling-over page
++ */
++ if (curr_pg != end_pg || end_cons_idx < curr_cons_idx) {
+ void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
+
+ copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *cons_idx);
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
+index ae707e305684b..92fba9a0c3718 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
+@@ -62,8 +62,6 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
+
+ #define HINIC_LRO_RX_TIMER_DEFAULT 16
+
+-#define VLAN_BITMAP_SIZE(nic_dev) (ALIGN(VLAN_N_VID, 8) / 8)
+-
+ #define work_to_rx_mode_work(work) \
+ container_of(work, struct hinic_rx_mode_work, work)
+
+@@ -82,56 +80,44 @@ static int set_features(struct hinic_dev *nic_dev,
+ netdev_features_t pre_features,
+ netdev_features_t features, bool force_change);
+
+-static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq)
++static void gather_rx_stats(struct hinic_rxq_stats *nic_rx_stats, struct hinic_rxq *rxq)
+ {
+- struct hinic_rxq_stats *nic_rx_stats = &nic_dev->rx_stats;
+ struct hinic_rxq_stats rx_stats;
+
+- u64_stats_init(&rx_stats.syncp);
+-
+ hinic_rxq_get_stats(rxq, &rx_stats);
+
+- u64_stats_update_begin(&nic_rx_stats->syncp);
+ nic_rx_stats->bytes += rx_stats.bytes;
+ nic_rx_stats->pkts += rx_stats.pkts;
+ nic_rx_stats->errors += rx_stats.errors;
+ nic_rx_stats->csum_errors += rx_stats.csum_errors;
+ nic_rx_stats->other_errors += rx_stats.other_errors;
+- u64_stats_update_end(&nic_rx_stats->syncp);
+-
+- hinic_rxq_clean_stats(rxq);
+ }
+
+-static void update_tx_stats(struct hinic_dev *nic_dev, struct hinic_txq *txq)
++static void gather_tx_stats(struct hinic_txq_stats *nic_tx_stats, struct hinic_txq *txq)
+ {
+- struct hinic_txq_stats *nic_tx_stats = &nic_dev->tx_stats;
+ struct hinic_txq_stats tx_stats;
+
+- u64_stats_init(&tx_stats.syncp);
+-
+ hinic_txq_get_stats(txq, &tx_stats);
+
+- u64_stats_update_begin(&nic_tx_stats->syncp);
+ nic_tx_stats->bytes += tx_stats.bytes;
+ nic_tx_stats->pkts += tx_stats.pkts;
+ nic_tx_stats->tx_busy += tx_stats.tx_busy;
+ nic_tx_stats->tx_wake += tx_stats.tx_wake;
+ nic_tx_stats->tx_dropped += tx_stats.tx_dropped;
+ nic_tx_stats->big_frags_pkts += tx_stats.big_frags_pkts;
+- u64_stats_update_end(&nic_tx_stats->syncp);
+-
+- hinic_txq_clean_stats(txq);
+ }
+
+-static void update_nic_stats(struct hinic_dev *nic_dev)
++static void gather_nic_stats(struct hinic_dev *nic_dev,
++ struct hinic_rxq_stats *nic_rx_stats,
++ struct hinic_txq_stats *nic_tx_stats)
+ {
+ int i, num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
+
+ for (i = 0; i < num_qps; i++)
+- update_rx_stats(nic_dev, &nic_dev->rxqs[i]);
++ gather_rx_stats(nic_rx_stats, &nic_dev->rxqs[i]);
+
+ for (i = 0; i < num_qps; i++)
+- update_tx_stats(nic_dev, &nic_dev->txqs[i]);
++ gather_tx_stats(nic_tx_stats, &nic_dev->txqs[i]);
+ }
+
+ /**
+@@ -144,13 +130,12 @@ static int create_txqs(struct hinic_dev *nic_dev)
+ {
+ int err, i, j, num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev);
+ struct net_device *netdev = nic_dev->netdev;
+- size_t txq_size;
+
+ if (nic_dev->txqs)
+ return -EINVAL;
+
+- txq_size = num_txqs * sizeof(*nic_dev->txqs);
+- nic_dev->txqs = devm_kzalloc(&netdev->dev, txq_size, GFP_KERNEL);
++ nic_dev->txqs = devm_kcalloc(&netdev->dev, num_txqs,
++ sizeof(*nic_dev->txqs), GFP_KERNEL);
+ if (!nic_dev->txqs)
+ return -ENOMEM;
+
+@@ -241,13 +226,12 @@ static int create_rxqs(struct hinic_dev *nic_dev)
+ {
+ int err, i, j, num_rxqs = hinic_hwdev_num_qps(nic_dev->hwdev);
+ struct net_device *netdev = nic_dev->netdev;
+- size_t rxq_size;
+
+ if (nic_dev->rxqs)
+ return -EINVAL;
+
+- rxq_size = num_rxqs * sizeof(*nic_dev->rxqs);
+- nic_dev->rxqs = devm_kzalloc(&netdev->dev, rxq_size, GFP_KERNEL);
++ nic_dev->rxqs = devm_kcalloc(&netdev->dev, num_rxqs,
++ sizeof(*nic_dev->rxqs), GFP_KERNEL);
+ if (!nic_dev->rxqs)
+ return -ENOMEM;
+
+@@ -562,8 +546,6 @@ int hinic_close(struct net_device *netdev)
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+
+- update_nic_stats(nic_dev);
+-
+ up(&nic_dev->mgmt_lock);
+
+ if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+@@ -857,26 +839,19 @@ static void hinic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+ {
+ struct hinic_dev *nic_dev = netdev_priv(netdev);
+- struct hinic_rxq_stats *nic_rx_stats;
+- struct hinic_txq_stats *nic_tx_stats;
+-
+- nic_rx_stats = &nic_dev->rx_stats;
+- nic_tx_stats = &nic_dev->tx_stats;
+-
+- down(&nic_dev->mgmt_lock);
++ struct hinic_rxq_stats nic_rx_stats = {};
++ struct hinic_txq_stats nic_tx_stats = {};
+
+ if (nic_dev->flags & HINIC_INTF_UP)
+- update_nic_stats(nic_dev);
++ gather_nic_stats(nic_dev, &nic_rx_stats, &nic_tx_stats);
+
+- up(&nic_dev->mgmt_lock);
+-
+- stats->rx_bytes = nic_rx_stats->bytes;
+- stats->rx_packets = nic_rx_stats->pkts;
+- stats->rx_errors = nic_rx_stats->errors;
++ stats->rx_bytes = nic_rx_stats.bytes;
++ stats->rx_packets = nic_rx_stats.pkts;
++ stats->rx_errors = nic_rx_stats.errors;
+
+- stats->tx_bytes = nic_tx_stats->bytes;
+- stats->tx_packets = nic_tx_stats->pkts;
+- stats->tx_errors = nic_tx_stats->tx_dropped;
++ stats->tx_bytes = nic_tx_stats.bytes;
++ stats->tx_packets = nic_tx_stats.pkts;
++ stats->tx_errors = nic_tx_stats.tx_dropped;
+ }
+
+ static int hinic_set_features(struct net_device *netdev,
+@@ -1175,8 +1150,6 @@ static void hinic_free_intr_coalesce(struct hinic_dev *nic_dev)
+ static int nic_dev_init(struct pci_dev *pdev)
+ {
+ struct hinic_rx_mode_work *rx_mode_work;
+- struct hinic_txq_stats *tx_stats;
+- struct hinic_rxq_stats *rx_stats;
+ struct hinic_dev *nic_dev;
+ struct net_device *netdev;
+ struct hinic_hwdev *hwdev;
+@@ -1237,15 +1210,8 @@ static int nic_dev_init(struct pci_dev *pdev)
+
+ sema_init(&nic_dev->mgmt_lock, 1);
+
+- tx_stats = &nic_dev->tx_stats;
+- rx_stats = &nic_dev->rx_stats;
+-
+- u64_stats_init(&tx_stats->syncp);
+- u64_stats_init(&rx_stats->syncp);
+-
+- nic_dev->vlan_bitmap = devm_kzalloc(&pdev->dev,
+- VLAN_BITMAP_SIZE(nic_dev),
+- GFP_KERNEL);
++ nic_dev->vlan_bitmap = devm_bitmap_zalloc(&pdev->dev, VLAN_N_VID,
++ GFP_KERNEL);
+ if (!nic_dev->vlan_bitmap) {
+ err = -ENOMEM;
+ goto err_vlan_bitmap;
+@@ -1516,8 +1482,15 @@ static struct pci_driver hinic_driver = {
+
+ static int __init hinic_module_init(void)
+ {
++ int ret;
++
+ hinic_dbg_register_debugfs(HINIC_DRV_NAME);
+- return pci_register_driver(&hinic_driver);
++
++ ret = pci_register_driver(&hinic_driver);
++ if (ret)
++ hinic_dbg_unregister_debugfs();
++
++ return ret;
+ }
+
+ static void __exit hinic_module_exit(void)
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+index fed3b6bc0d763..d11ec69a2e17d 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+@@ -73,17 +73,15 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
+ struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
+ unsigned int start;
+
+- u64_stats_update_begin(&stats->syncp);
+ do {
+- start = u64_stats_fetch_begin(&rxq_stats->syncp);
++ start = u64_stats_fetch_begin_irq(&rxq_stats->syncp);
+ stats->pkts = rxq_stats->pkts;
+ stats->bytes = rxq_stats->bytes;
+ stats->errors = rxq_stats->csum_errors +
+ rxq_stats->other_errors;
+ stats->csum_errors = rxq_stats->csum_errors;
+ stats->other_errors = rxq_stats->other_errors;
+- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+- u64_stats_update_end(&stats->syncp);
++ } while (u64_stats_fetch_retry_irq(&rxq_stats->syncp, start));
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+index a78c398bf5b25..e81a7b28209b9 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+@@ -1180,7 +1180,6 @@ int hinic_vf_func_init(struct hinic_hwdev *hwdev)
+ dev_err(&hwdev->hwif->pdev->dev,
+ "Failed to register VF, err: %d, status: 0x%x, out size: 0x%x\n",
+ err, register_info.status, out_size);
+- hinic_unregister_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC);
+ return -EIO;
+ }
+ } else {
+diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+index c5bdb0d374efa..8d3ec6c729cc7 100644
+--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+@@ -97,17 +97,15 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
+ struct hinic_txq_stats *txq_stats = &txq->txq_stats;
+ unsigned int start;
+
+- u64_stats_update_begin(&stats->syncp);
+ do {
+- start = u64_stats_fetch_begin(&txq_stats->syncp);
++ start = u64_stats_fetch_begin_irq(&txq_stats->syncp);
+ stats->pkts = txq_stats->pkts;
+ stats->bytes = txq_stats->bytes;
+ stats->tx_busy = txq_stats->tx_busy;
+ stats->tx_wake = txq_stats->tx_wake;
+ stats->tx_dropped = txq_stats->tx_dropped;
+ stats->big_frags_pkts = txq_stats->big_frags_pkts;
+- } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+- u64_stats_update_end(&stats->syncp);
++ } while (u64_stats_fetch_retry_irq(&txq_stats->syncp, start));
+ }
+
+ /**
+@@ -862,7 +860,6 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
+ struct hinic_dev *nic_dev = netdev_priv(netdev);
+ struct hinic_hwdev *hwdev = nic_dev->hwdev;
+ int err, irqname_len;
+- size_t sges_size;
+
+ txq->netdev = netdev;
+ txq->sq = sq;
+@@ -871,13 +868,13 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
+
+ txq->max_sges = HINIC_MAX_SQ_BUFDESCS;
+
+- sges_size = txq->max_sges * sizeof(*txq->sges);
+- txq->sges = devm_kzalloc(&netdev->dev, sges_size, GFP_KERNEL);
++ txq->sges = devm_kcalloc(&netdev->dev, txq->max_sges,
++ sizeof(*txq->sges), GFP_KERNEL);
+ if (!txq->sges)
+ return -ENOMEM;
+
+- sges_size = txq->max_sges * sizeof(*txq->free_sges);
+- txq->free_sges = devm_kzalloc(&netdev->dev, sges_size, GFP_KERNEL);
++ txq->free_sges = devm_kcalloc(&netdev->dev, txq->max_sges,
++ sizeof(*txq->free_sges), GFP_KERNEL);
+ if (!txq->free_sges) {
+ err = -ENOMEM;
+ goto err_alloc_free_sges;
+diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
+index 27937c5d79567..daec9ce04531b 100644
+--- a/drivers/net/ethernet/i825xx/sni_82596.c
++++ b/drivers/net/ethernet/i825xx/sni_82596.c
+@@ -117,9 +117,10 @@ static int sni_82596_probe(struct platform_device *dev)
+ netdevice->dev_addr[5] = readb(eth_addr + 0x06);
+ iounmap(eth_addr);
+
+- if (!netdevice->irq) {
++ if (netdevice->irq < 0) {
+ printk(KERN_ERR "%s: IRQ not found for i82596 at 0x%lx\n",
+ __FILE__, netdevice->base_addr);
++ retval = netdevice->irq;
+ goto probe_failed;
+ }
+
+diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
+index d5df131b183c7..6c534b92aeed0 100644
+--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
++++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
+@@ -2898,6 +2898,7 @@ static struct device *ehea_register_port(struct ehea_port *port,
+ ret = of_device_register(&port->ofdev);
+ if (ret) {
+ pr_err("failed to register device. ret=%d\n", ret);
++ put_device(&port->ofdev.dev);
+ goto out;
+ }
+
+diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
+index 3d9b4f99d357f..05759f690e1fd 100644
+--- a/drivers/net/ethernet/ibm/ibmveth.c
++++ b/drivers/net/ethernet/ibm/ibmveth.c
+@@ -196,7 +196,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
+ unsigned long offset;
+
+ for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
+- asm("dcbfl %0,%1" :: "b" (addr), "r" (offset));
++ asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset));
+ }
+
+ /* replenish the buffers for a pool. note that we don't need to
+@@ -1620,7 +1620,7 @@ static int ibmveth_set_mac_addr(struct net_device *dev, void *p)
+ return rc;
+ }
+
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
+index 6aa6ff89a7651..890e27b986e2a 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -108,6 +108,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter);
+ static int send_query_phys_parms(struct ibmvnic_adapter *adapter);
+ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
+ struct ibmvnic_sub_crq_queue *tx_scrq);
++static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
+
+ struct ibmvnic_stat {
+ char name[ETH_GSTRING_LEN];
+@@ -1006,12 +1007,22 @@ static int ibmvnic_login(struct net_device *netdev)
+
+ static void release_login_buffer(struct ibmvnic_adapter *adapter)
+ {
++ if (!adapter->login_buf)
++ return;
++
++ dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
++ adapter->login_buf_sz, DMA_TO_DEVICE);
+ kfree(adapter->login_buf);
+ adapter->login_buf = NULL;
+ }
+
+ static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
+ {
++ if (!adapter->login_rsp_buf)
++ return;
++
++ dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
++ adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+ kfree(adapter->login_rsp_buf);
+ adapter->login_rsp_buf = NULL;
+ }
+@@ -1239,16 +1250,32 @@ static int __ibmvnic_open(struct net_device *netdev)
+ if (prev_state == VNIC_CLOSED)
+ enable_irq(adapter->tx_scrq[i]->irq);
+ enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+- netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
++ /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL
++ * resets, don't reset the stats because there could be batched
++ * skb's waiting to be sent. If we reset dql stats, we risk
++ * num_completed being greater than num_queued. This will cause
++ * a BUG_ON in dql_completed().
++ */
++ if (adapter->reset_reason != VNIC_RESET_NON_FATAL)
++ netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
+ }
+
+ rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
+ if (rc) {
+ ibmvnic_napi_disable(adapter);
+- release_resources(adapter);
++ ibmvnic_disable_irqs(adapter);
+ return rc;
+ }
+
++ adapter->tx_queues_active = true;
++
++ /* Since queues were stopped until now, there shouldn't be any
++ * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we
++ * don't need the synchronize_rcu()? Leaving it for consistency
++ * with setting ->tx_queues_active = false.
++ */
++ synchronize_rcu();
++
+ netif_tx_start_all_queues(netdev);
+
+ if (prev_state == VNIC_CLOSED) {
+@@ -1295,7 +1322,6 @@ static int ibmvnic_open(struct net_device *netdev)
+ rc = init_resources(adapter);
+ if (rc) {
+ netdev_err(netdev, "failed to initialize resources\n");
+- release_resources(adapter);
+ goto out;
+ }
+ }
+@@ -1312,6 +1338,11 @@ out:
+ adapter->state = VNIC_OPEN;
+ rc = 0;
+ }
++
++ if (rc) {
++ release_resources(adapter);
++ }
++
+ return rc;
+ }
+
+@@ -1417,6 +1448,14 @@ static void ibmvnic_cleanup(struct net_device *netdev)
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+ /* ensure that transmissions are stopped if called by do_reset */
++
++ adapter->tx_queues_active = false;
++
++ /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active
++ * update so they don't restart a queue after we stop it below.
++ */
++ synchronize_rcu();
++
+ if (test_bit(0, &adapter->resetting))
+ netif_tx_disable(netdev);
+ else
+@@ -1657,14 +1696,21 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
+ tx_buff->skb = NULL;
+ adapter->netdev->stats.tx_dropped++;
+ }
++
+ ind_bufp->index = 0;
++
+ if (atomic_sub_return(entries, &tx_scrq->used) <=
+ (adapter->req_tx_entries_per_subcrq / 2) &&
+- __netif_subqueue_stopped(adapter->netdev, queue_num) &&
+- !test_bit(0, &adapter->resetting)) {
+- netif_wake_subqueue(adapter->netdev, queue_num);
+- netdev_dbg(adapter->netdev, "Started queue %d\n",
+- queue_num);
++ __netif_subqueue_stopped(adapter->netdev, queue_num)) {
++ rcu_read_lock();
++
++ if (adapter->tx_queues_active) {
++ netif_wake_subqueue(adapter->netdev, queue_num);
++ netdev_dbg(adapter->netdev, "Started queue %d\n",
++ queue_num);
++ }
++
++ rcu_read_unlock();
+ }
+ }
+
+@@ -1719,13 +1765,12 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+ int index = 0;
+ u8 proto = 0;
+
+- tx_scrq = adapter->tx_scrq[queue_num];
+- txq = netdev_get_tx_queue(netdev, queue_num);
+- ind_bufp = &tx_scrq->ind_buf;
+-
+- if (test_bit(0, &adapter->resetting)) {
+- if (!netif_subqueue_stopped(netdev, skb))
+- netif_stop_subqueue(netdev, queue_num);
++ /* If a reset is in progress, drop the packet since
++ * the scrqs may get torn down. Otherwise use the
++ * rcu to ensure reset waits for us to complete.
++ */
++ rcu_read_lock();
++ if (!adapter->tx_queues_active) {
+ dev_kfree_skb_any(skb);
+
+ tx_send_failed++;
+@@ -1734,6 +1779,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+ goto out;
+ }
+
++ tx_scrq = adapter->tx_scrq[queue_num];
++ txq = netdev_get_tx_queue(netdev, queue_num);
++ ind_bufp = &tx_scrq->ind_buf;
++
+ if (ibmvnic_xmit_workarounds(skb, netdev)) {
+ tx_dropped++;
+ tx_send_failed++;
+@@ -1741,6 +1790,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+ ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+ goto out;
+ }
++
+ if (skb_is_gso(skb))
+ tx_pool = &adapter->tso_pool[queue_num];
+ else
+@@ -1895,6 +1945,7 @@ tx_err:
+ netif_carrier_off(netdev);
+ }
+ out:
++ rcu_read_unlock();
+ netdev->stats.tx_dropped += tx_dropped;
+ netdev->stats.tx_bytes += tx_bytes;
+ netdev->stats.tx_packets += tx_packets;
+@@ -2029,6 +2080,19 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
+ return "UNKNOWN";
+ }
+
++/*
++ * Initialize the init_done completion and return code values. We
++ * can get a transport event just after registering the CRQ and the
++ * tasklet will use this to communicate the transport event. To ensure
++ * we don't miss the notification/error, initialize these _before_
++ * regisering the CRQ.
++ */
++static inline void reinit_init_done(struct ibmvnic_adapter *adapter)
++{
++ reinit_completion(&adapter->init_done);
++ adapter->init_done_rc = 0;
++}
++
+ /*
+ * do_reset returns zero if we are able to keep processing reset events, or
+ * non-zero if we hit a fatal error and must halt.
+@@ -2135,6 +2199,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
+ */
+ adapter->state = VNIC_PROBED;
+
++ reinit_init_done(adapter);
++
+ if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
+ rc = init_crq_queue(adapter);
+ } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+@@ -2280,7 +2346,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
+ */
+ adapter->state = VNIC_PROBED;
+
+- reinit_completion(&adapter->init_done);
++ reinit_init_done(adapter);
++
+ rc = init_crq_queue(adapter);
+ if (rc) {
+ netdev_err(adapter->netdev,
+@@ -2421,22 +2488,82 @@ out:
+ static void __ibmvnic_reset(struct work_struct *work)
+ {
+ struct ibmvnic_adapter *adapter;
+- bool saved_state = false;
++ unsigned int timeout = 5000;
+ struct ibmvnic_rwi *tmprwi;
++ bool saved_state = false;
+ struct ibmvnic_rwi *rwi;
+ unsigned long flags;
++ struct device *dev;
++ bool need_reset;
++ int num_fails = 0;
+ u32 reset_state;
+ int rc = 0;
+
+ adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
++ dev = &adapter->vdev->dev;
+
+- if (test_and_set_bit_lock(0, &adapter->resetting)) {
++ /* Wait for ibmvnic_probe() to complete. If probe is taking too long
++ * or if another reset is in progress, defer work for now. If probe
++ * eventually fails it will flush and terminate our work.
++ *
++ * Three possibilities here:
++ * 1. Adpater being removed - just return
++ * 2. Timed out on probe or another reset in progress - delay the work
++ * 3. Completed probe - perform any resets in queue
++ */
++ if (adapter->state == VNIC_PROBING &&
++ !wait_for_completion_timeout(&adapter->probe_done, timeout)) {
++ dev_err(dev, "Reset thread timed out on probe");
+ queue_delayed_work(system_long_wq,
+ &adapter->ibmvnic_delayed_reset,
+ IBMVNIC_RESET_DELAY);
+ return;
+ }
+
++ /* adapter is done with probe (i.e state is never VNIC_PROBING now) */
++ if (adapter->state == VNIC_REMOVING)
++ return;
++
++ /* ->rwi_list is stable now (no one else is removing entries) */
++
++ /* ibmvnic_probe() may have purged the reset queue after we were
++ * scheduled to process a reset so there maybe no resets to process.
++ * Before setting the ->resetting bit though, we have to make sure
++ * that there is infact a reset to process. Otherwise we may race
++ * with ibmvnic_open() and end up leaving the vnic down:
++ *
++ * __ibmvnic_reset() ibmvnic_open()
++ * ----------------- --------------
++ *
++ * set ->resetting bit
++ * find ->resetting bit is set
++ * set ->state to IBMVNIC_OPEN (i.e
++ * assume reset will open device)
++ * return
++ * find reset queue empty
++ * return
++ *
++ * Neither performed vnic login/open and vnic stays down
++ *
++ * If we hold the lock and conditionally set the bit, either we
++ * or ibmvnic_open() will complete the open.
++ */
++ need_reset = false;
++ spin_lock(&adapter->rwi_lock);
++ if (!list_empty(&adapter->rwi_list)) {
++ if (test_and_set_bit_lock(0, &adapter->resetting)) {
++ queue_delayed_work(system_long_wq,
++ &adapter->ibmvnic_delayed_reset,
++ IBMVNIC_RESET_DELAY);
++ } else {
++ need_reset = true;
++ }
++ }
++ spin_unlock(&adapter->rwi_lock);
++
++ if (!need_reset)
++ return;
++
+ rwi = get_next_rwi(adapter);
+ while (rwi) {
+ spin_lock_irqsave(&adapter->state_lock, flags);
+@@ -2479,11 +2606,23 @@ static void __ibmvnic_reset(struct work_struct *work)
+ rc = do_hard_reset(adapter, rwi, reset_state);
+ rtnl_unlock();
+ }
+- if (rc) {
+- /* give backing device time to settle down */
++ if (rc)
++ num_fails++;
++ else
++ num_fails = 0;
++
++ /* If auto-priority-failover is enabled we can get
++ * back to back failovers during resets, resulting
++ * in at least two failed resets (from high-priority
++ * backing device to low-priority one and then back)
++ * If resets continue to fail beyond that, give the
++ * adapter some time to settle down before retrying.
++ */
++ if (num_fails >= 3) {
+ netdev_dbg(adapter->netdev,
+- "[S:%s] Hard reset failed, waiting 60 secs\n",
+- adapter_state_to_string(adapter->state));
++ "[S:%s] Hard reset failed %d times, waiting 60 secs\n",
++ adapter_state_to_string(adapter->state),
++ num_fails);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(60 * HZ);
+ }
+@@ -2499,19 +2638,19 @@ static void __ibmvnic_reset(struct work_struct *work)
+ rwi = get_next_rwi(adapter);
+
+ /*
+- * If there is another reset queued, free the previous rwi
+- * and process the new reset even if previous reset failed
+- * (the previous reset could have failed because of a fail
+- * over for instance, so process the fail over).
+- *
+ * If there are no resets queued and the previous reset failed,
+ * the adapter would be in an undefined state. So retry the
+ * previous reset as a hard reset.
++ *
++ * Else, free the previous rwi and, if there is another reset
++ * queued, process the new reset even if previous reset failed
++ * (the previous reset could have failed because of a fail
++ * over for instance, so process the fail over).
+ */
+- if (rwi)
+- kfree(tmprwi);
+- else if (rc)
++ if (!rwi && rc)
+ rwi = tmprwi;
++ else
++ kfree(tmprwi);
+
+ if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
+ rwi->reset_reason == VNIC_RESET_MOBILITY || rc))
+@@ -2541,12 +2680,23 @@ static void __ibmvnic_delayed_reset(struct work_struct *work)
+ __ibmvnic_reset(&adapter->ibmvnic_reset);
+ }
+
++static void flush_reset_queue(struct ibmvnic_adapter *adapter)
++{
++ struct list_head *entry, *tmp_entry;
++
++ if (!list_empty(&adapter->rwi_list)) {
++ list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) {
++ list_del(entry);
++ kfree(list_entry(entry, struct ibmvnic_rwi, list));
++ }
++ }
++}
++
+ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+ enum ibmvnic_reset_reason reason)
+ {
+- struct list_head *entry, *tmp_entry;
+- struct ibmvnic_rwi *rwi, *tmp;
+ struct net_device *netdev = adapter->netdev;
++ struct ibmvnic_rwi *rwi, *tmp;
+ unsigned long flags;
+ int ret;
+
+@@ -2565,13 +2715,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+ goto err;
+ }
+
+- if (adapter->state == VNIC_PROBING) {
+- netdev_warn(netdev, "Adapter reset during probe\n");
+- adapter->init_done_rc = EAGAIN;
+- ret = EAGAIN;
+- goto err;
+- }
+-
+ list_for_each_entry(tmp, &adapter->rwi_list, list) {
+ if (tmp->reset_reason == reason) {
+ netdev_dbg(netdev, "Skipping matching reset, reason=%s\n",
+@@ -2589,10 +2732,9 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+ /* if we just received a transport event,
+ * flush reset queue and process this reset
+ */
+- if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) {
+- list_for_each_safe(entry, tmp_entry, &adapter->rwi_list)
+- list_del(entry);
+- }
++ if (adapter->force_reset_recovery)
++ flush_reset_queue(adapter);
++
+ rwi->reset_reason = reason;
+ list_add_tail(&rwi->list, &adapter->rwi_list);
+ netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n",
+@@ -2904,13 +3046,8 @@ static void ibmvnic_get_ringparam(struct net_device *netdev,
+ {
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+- if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
+- ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
+- ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
+- } else {
+- ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
+- ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
+- }
++ ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
++ ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
+ ring->rx_mini_max_pending = 0;
+ ring->rx_jumbo_max_pending = 0;
+ ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
+@@ -2923,23 +3060,21 @@ static int ibmvnic_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+ {
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+- int ret;
+
+- ret = 0;
++ if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq ||
++ ring->tx_pending > adapter->max_tx_entries_per_subcrq) {
++ netdev_err(netdev, "Invalid request.\n");
++ netdev_err(netdev, "Max tx buffers = %llu\n",
++ adapter->max_rx_add_entries_per_subcrq);
++ netdev_err(netdev, "Max rx buffers = %llu\n",
++ adapter->max_tx_entries_per_subcrq);
++ return -EINVAL;
++ }
++
+ adapter->desired.rx_entries = ring->rx_pending;
+ adapter->desired.tx_entries = ring->tx_pending;
+
+- ret = wait_for_reset(adapter);
+-
+- if (!ret &&
+- (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending ||
+- adapter->req_tx_entries_per_subcrq != ring->tx_pending))
+- netdev_info(netdev,
+- "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
+- ring->rx_pending, ring->tx_pending,
+- adapter->req_rx_add_entries_per_subcrq,
+- adapter->req_tx_entries_per_subcrq);
+- return ret;
++ return wait_for_reset(adapter);
+ }
+
+ static void ibmvnic_get_channels(struct net_device *netdev,
+@@ -2947,14 +3082,8 @@ static void ibmvnic_get_channels(struct net_device *netdev,
+ {
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+- if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
+- channels->max_rx = adapter->max_rx_queues;
+- channels->max_tx = adapter->max_tx_queues;
+- } else {
+- channels->max_rx = IBMVNIC_MAX_QUEUES;
+- channels->max_tx = IBMVNIC_MAX_QUEUES;
+- }
+-
++ channels->max_rx = adapter->max_rx_queues;
++ channels->max_tx = adapter->max_tx_queues;
+ channels->max_other = 0;
+ channels->max_combined = 0;
+ channels->rx_count = adapter->req_rx_queues;
+@@ -2967,22 +3096,11 @@ static int ibmvnic_set_channels(struct net_device *netdev,
+ struct ethtool_channels *channels)
+ {
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+- int ret;
+
+- ret = 0;
+ adapter->desired.rx_queues = channels->rx_count;
+ adapter->desired.tx_queues = channels->tx_count;
+
+- ret = wait_for_reset(adapter);
+-
+- if (!ret &&
+- (adapter->req_rx_queues != channels->rx_count ||
+- adapter->req_tx_queues != channels->tx_count))
+- netdev_info(netdev,
+- "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
+- channels->rx_count, channels->tx_count,
+- adapter->req_rx_queues, adapter->req_tx_queues);
+- return ret;
++ return wait_for_reset(adapter);
+ }
+
+ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+@@ -2990,43 +3108,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+ struct ibmvnic_adapter *adapter = netdev_priv(dev);
+ int i;
+
+- switch (stringset) {
+- case ETH_SS_STATS:
+- for (i = 0; i < ARRAY_SIZE(ibmvnic_stats);
+- i++, data += ETH_GSTRING_LEN)
+- memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
++ if (stringset != ETH_SS_STATS)
++ return;
+
+- for (i = 0; i < adapter->req_tx_queues; i++) {
+- snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
+- data += ETH_GSTRING_LEN;
++ for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
++ memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+
+- snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
+- data += ETH_GSTRING_LEN;
++ for (i = 0; i < adapter->req_tx_queues; i++) {
++ snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
++ data += ETH_GSTRING_LEN;
+
+- snprintf(data, ETH_GSTRING_LEN,
+- "tx%d_dropped_packets", i);
+- data += ETH_GSTRING_LEN;
+- }
++ snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
++ data += ETH_GSTRING_LEN;
+
+- for (i = 0; i < adapter->req_rx_queues; i++) {
+- snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
+- data += ETH_GSTRING_LEN;
++ snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
++ data += ETH_GSTRING_LEN;
++ }
+
+- snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
+- data += ETH_GSTRING_LEN;
++ for (i = 0; i < adapter->req_rx_queues; i++) {
++ snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
++ data += ETH_GSTRING_LEN;
+
+- snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
+- data += ETH_GSTRING_LEN;
+- }
+- break;
++ snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
++ data += ETH_GSTRING_LEN;
+
+- case ETH_SS_PRIV_FLAGS:
+- for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++)
+- strcpy(data + i * ETH_GSTRING_LEN,
+- ibmvnic_priv_flags[i]);
+- break;
+- default:
+- return;
++ snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
++ data += ETH_GSTRING_LEN;
+ }
+ }
+
+@@ -3039,8 +3146,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
+ return ARRAY_SIZE(ibmvnic_stats) +
+ adapter->req_tx_queues * NUM_TX_STATS +
+ adapter->req_rx_queues * NUM_RX_STATS;
+- case ETH_SS_PRIV_FLAGS:
+- return ARRAY_SIZE(ibmvnic_priv_flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+@@ -3093,26 +3198,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
+ }
+ }
+
+-static u32 ibmvnic_get_priv_flags(struct net_device *netdev)
+-{
+- struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+-
+- return adapter->priv_flags;
+-}
+-
+-static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags)
+-{
+- struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+- bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES);
+-
+- if (which_maxes)
+- adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES;
+- else
+- adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES;
+-
+- return 0;
+-}
+-
+ static const struct ethtool_ops ibmvnic_ethtool_ops = {
+ .get_drvinfo = ibmvnic_get_drvinfo,
+ .get_msglevel = ibmvnic_get_msglevel,
+@@ -3126,8 +3211,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = {
+ .get_sset_count = ibmvnic_get_sset_count,
+ .get_ethtool_stats = ibmvnic_get_ethtool_stats,
+ .get_link_ksettings = ibmvnic_get_link_ksettings,
+- .get_priv_flags = ibmvnic_get_priv_flags,
+- .set_priv_flags = ibmvnic_set_priv_flags,
+ };
+
+ /* Routines for managing CRQs/sCRQs */
+@@ -3456,9 +3539,15 @@ restart_loop:
+ (adapter->req_tx_entries_per_subcrq / 2) &&
+ __netif_subqueue_stopped(adapter->netdev,
+ scrq->pool_index)) {
+- netif_wake_subqueue(adapter->netdev, scrq->pool_index);
+- netdev_dbg(adapter->netdev, "Started queue %d\n",
+- scrq->pool_index);
++ rcu_read_lock();
++ if (adapter->tx_queues_active) {
++ netif_wake_subqueue(adapter->netdev,
++ scrq->pool_index);
++ netdev_dbg(adapter->netdev,
++ "Started queue %d\n",
++ scrq->pool_index);
++ }
++ rcu_read_unlock();
+ }
+ }
+
+@@ -3664,11 +3753,25 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
+ struct device *dev = &adapter->vdev->dev;
+ union ibmvnic_crq crq;
+ int max_entries;
++ int cap_reqs;
++
++ /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on
++ * the PROMISC flag). Initialize this count upfront. When the tasklet
++ * receives a response to all of these, it will send the next protocol
++ * message (QUERY_IP_OFFLOAD).
++ */
++ if (!(adapter->netdev->flags & IFF_PROMISC) ||
++ adapter->promisc_supported)
++ cap_reqs = 7;
++ else
++ cap_reqs = 6;
+
+ if (!retry) {
+ /* Sub-CRQ entries are 32 byte long */
+ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4);
+
++ atomic_set(&adapter->running_cap_crqs, cap_reqs);
++
+ if (adapter->min_tx_entries_per_subcrq > entries_page ||
+ adapter->min_rx_add_entries_per_subcrq > entries_page) {
+ dev_err(dev, "Fatal, invalid entries per sub-crq\n");
+@@ -3729,44 +3832,45 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
+ adapter->opt_rx_comp_queues;
+
+ adapter->req_rx_add_queues = adapter->max_rx_add_queues;
++ } else {
++ atomic_add(cap_reqs, &adapter->running_cap_crqs);
+ }
+-
+ memset(&crq, 0, sizeof(crq));
+ crq.request_capability.first = IBMVNIC_CRQ_CMD;
+ crq.request_capability.cmd = REQUEST_CAPABILITY;
+
+ crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
+ crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+
+ crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
+ crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+
+ crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
+ crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+
+ crq.request_capability.capability =
+ cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
+ crq.request_capability.number =
+ cpu_to_be64(adapter->req_tx_entries_per_subcrq);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+
+ crq.request_capability.capability =
+ cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
+ crq.request_capability.number =
+ cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+
+ crq.request_capability.capability = cpu_to_be16(REQ_MTU);
+ crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+
+ if (adapter->netdev->flags & IFF_PROMISC) {
+@@ -3774,16 +3878,21 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
+ crq.request_capability.capability =
+ cpu_to_be16(PROMISC_REQUESTED);
+ crq.request_capability.number = cpu_to_be64(1);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+ }
+ } else {
+ crq.request_capability.capability =
+ cpu_to_be16(PROMISC_REQUESTED);
+ crq.request_capability.number = cpu_to_be64(0);
+- atomic_inc(&adapter->running_cap_crqs);
++ cap_reqs--;
+ ibmvnic_send_crq(adapter, &crq);
+ }
++
++ /* Keep at end to catch any discrepancy between expected and actual
++ * CRQs sent.
++ */
++ WARN_ON(cap_reqs != 0);
+ }
+
+ static int pending_scrq(struct ibmvnic_adapter *adapter,
+@@ -4121,11 +4230,14 @@ static int send_login(struct ibmvnic_adapter *adapter)
+ if (rc) {
+ adapter->login_pending = false;
+ netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
+- goto buf_rsp_map_failed;
++ goto buf_send_failed;
+ }
+
+ return 0;
+
++buf_send_failed:
++ dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
++ DMA_FROM_DEVICE);
+ buf_rsp_map_failed:
+ kfree(login_rsp_buffer);
+ adapter->login_rsp_buf = NULL;
+@@ -4177,118 +4289,132 @@ static void send_query_map(struct ibmvnic_adapter *adapter)
+ static void send_query_cap(struct ibmvnic_adapter *adapter)
+ {
+ union ibmvnic_crq crq;
++ int cap_reqs;
++
++ /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count
++ * upfront. When the tasklet receives a response to all of these, it
++ * can send out the next protocol messaage (REQUEST_CAPABILITY).
++ */
++ cap_reqs = 25;
++
++ atomic_set(&adapter->running_cap_crqs, cap_reqs);
+
+- atomic_set(&adapter->running_cap_crqs, 0);
+ memset(&crq, 0, sizeof(crq));
+ crq.query_capability.first = IBMVNIC_CRQ_CMD;
+ crq.query_capability.cmd = QUERY_CAPABILITY;
+
+ crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability =
+ cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability =
+ cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability =
+ cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability =
+ cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MIN_MTU);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MAX_MTU);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability =
+ cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability =
+ cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability =
+ cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ);
+- atomic_inc(&adapter->running_cap_crqs);
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
+
+ crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ);
+- atomic_inc(&adapter->running_cap_crqs);
++
+ ibmvnic_send_crq(adapter, &crq);
++ cap_reqs--;
++
++ /* Keep at end to catch any discrepancy between expected and actual
++ * CRQs sent.
++ */
++ WARN_ON(cap_reqs != 0);
+ }
+
+ static void send_query_ip_offload(struct ibmvnic_adapter *adapter)
+@@ -4576,8 +4702,7 @@ static int handle_change_mac_rsp(union ibmvnic_crq *crq,
+ /* crq->change_mac_addr.mac_addr is the requested one
+ * crq->change_mac_addr_rsp.mac_addr is the returned valid one.
+ */
+- ether_addr_copy(netdev->dev_addr,
+- &crq->change_mac_addr_rsp.mac_addr[0]);
++ eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]);
+ ether_addr_copy(adapter->mac_addr,
+ &crq->change_mac_addr_rsp.mac_addr[0]);
+ out:
+@@ -4593,6 +4718,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
+ char *name;
+
+ atomic_dec(&adapter->running_cap_crqs);
++ netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n",
++ atomic_read(&adapter->running_cap_crqs));
+ switch (be16_to_cpu(crq->request_capability_rsp.capability)) {
+ case REQ_TX_QUEUES:
+ req_value = &adapter->req_tx_queues;
+@@ -4674,6 +4801,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
+ int num_tx_pools;
+ int num_rx_pools;
+ u64 *size_array;
++ u32 rsp_len;
+ int i;
+
+ /* CHECK: Test/set of login_pending does not need to be atomic
+@@ -4685,11 +4813,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
+ }
+ adapter->login_pending = false;
+
+- dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
+- DMA_TO_DEVICE);
+- dma_unmap_single(dev, adapter->login_rsp_buf_token,
+- adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+-
+ /* If the number of queues requested can't be allocated by the
+ * server, the login response will return with code 1. We will need
+ * to resend the login buffer with fewer queues requested.
+@@ -4725,6 +4848,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
+ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+ return -EIO;
+ }
++
++ rsp_len = be32_to_cpu(login_rsp->len);
++ if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
++ rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
++ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
++ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
++ rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
++ /* This can happen if a login request times out and there are
++ * 2 outstanding login requests sent, the LOGIN_RSP crq
++ * could have been for the older login request. So we are
++ * parsing the newer response buffer which may be incomplete
++ */
++ dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
++ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
++ return -EIO;
++ }
++
+ size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+ /* variable buffer sizes are not supported, so just read the
+@@ -5069,11 +5209,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
+ */
+ adapter->login_pending = false;
+
+- if (!completion_done(&adapter->init_done)) {
+- complete(&adapter->init_done);
+- adapter->init_done_rc = -EIO;
+- }
+-
+ if (adapter->state == VNIC_DOWN)
+ rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT);
+ else
+@@ -5094,6 +5229,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
+ rc);
+ adapter->failover_pending = false;
+ }
++
++ if (!completion_done(&adapter->init_done)) {
++ if (!adapter->init_done_rc)
++ adapter->init_done_rc = -EAGAIN;
++ complete(&adapter->init_done);
++ }
++
+ break;
+ case IBMVNIC_CRQ_INIT_COMPLETE:
+ dev_info(dev, "Partner initialization complete\n");
+@@ -5114,6 +5256,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
+ adapter->fw_done_rc = -EIO;
+ complete(&adapter->fw_done);
+ }
++
++ /* if we got here during crq-init, retry crq-init */
++ if (!completion_done(&adapter->init_done)) {
++ adapter->init_done_rc = -EAGAIN;
++ complete(&adapter->init_done);
++ }
++
+ if (!completion_done(&adapter->stats_done))
+ complete(&adapter->stats_done);
+ if (test_bit(0, &adapter->resetting))
+@@ -5268,12 +5417,6 @@ static void ibmvnic_tasklet(struct tasklet_struct *t)
+ ibmvnic_handle_crq(crq, adapter);
+ crq->generic.first = 0;
+ }
+-
+- /* remain in tasklet until all
+- * capabilities responses are received
+- */
+- if (!adapter->wait_capability)
+- done = true;
+ }
+ /* if capabilities CRQ's were sent in this tasklet, the following
+ * tasklet must wait until all responses are received
+@@ -5414,6 +5557,9 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter)
+ crq->cur = 0;
+ spin_lock_init(&crq->lock);
+
++ /* process any CRQs that were queued before we enabled interrupts */
++ tasklet_schedule(&adapter->tasklet);
++
+ return retrc;
+
+ req_irq_failed:
+@@ -5439,10 +5585,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
+
+ adapter->from_passive_init = false;
+
+- if (reset)
+- reinit_completion(&adapter->init_done);
+-
+- adapter->init_done_rc = 0;
+ rc = ibmvnic_send_crq_init(adapter);
+ if (rc) {
+ dev_err(dev, "Send crq init failed with error %d\n", rc);
+@@ -5456,12 +5598,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
+
+ if (adapter->init_done_rc) {
+ release_crq_queue(adapter);
++ dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc);
+ return adapter->init_done_rc;
+ }
+
+ if (adapter->from_passive_init) {
+ adapter->state = VNIC_OPEN;
+ adapter->from_passive_init = false;
++ dev_err(dev, "CRQ-init failed, passive-init\n");
+ return -1;
+ }
+
+@@ -5473,6 +5617,15 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
+ release_sub_crqs(adapter, 0);
+ rc = init_sub_crqs(adapter);
+ } else {
++ /* no need to reinitialize completely, but we do
++ * need to clean up transmits that were in flight
++ * when we processed the reset. Failure to do so
++ * will confound the upper layer, usually TCP, by
++ * creating the illusion of transmits that are
++ * awaiting completion.
++ */
++ clean_tx_pools(adapter);
++
+ rc = reset_sub_crq_queues(adapter);
+ }
+ } else {
+@@ -5501,6 +5654,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+ struct ibmvnic_adapter *adapter;
+ struct net_device *netdev;
+ unsigned char *mac_addr_p;
++ unsigned long flags;
+ bool init_success;
+ int rc;
+
+@@ -5529,7 +5683,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+ adapter->login_pending = false;
+
+ ether_addr_copy(adapter->mac_addr, mac_addr_p);
+- ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
++ eth_hw_addr_set(netdev, adapter->mac_addr);
+ netdev->irq = dev->irq;
+ netdev->netdev_ops = &ibmvnic_netdev_ops;
+ netdev->ethtool_ops = &ibmvnic_ethtool_ops;
+@@ -5542,6 +5696,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+ spin_lock_init(&adapter->rwi_lock);
+ spin_lock_init(&adapter->state_lock);
+ mutex_init(&adapter->fw_lock);
++ init_completion(&adapter->probe_done);
+ init_completion(&adapter->init_done);
+ init_completion(&adapter->fw_done);
+ init_completion(&adapter->reset_done);
+@@ -5550,6 +5705,33 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+
+ init_success = false;
+ do {
++ reinit_init_done(adapter);
++
++ /* clear any failovers we got in the previous pass
++ * since we are reinitializing the CRQ
++ */
++ adapter->failover_pending = false;
++
++ /* If we had already initialized CRQ, we may have one or
++ * more resets queued already. Discard those and release
++ * the CRQ before initializing the CRQ again.
++ */
++ release_crq_queue(adapter);
++
++ /* Since we are still in PROBING state, __ibmvnic_reset()
++ * will not access the ->rwi_list and since we released CRQ,
++ * we won't get _new_ transport events. But there maybe an
++ * ongoing ibmvnic_reset() call. So serialize access to
++ * rwi_list. If we win the race, ibvmnic_reset() could add
++ * a reset after we purged but thats ok - we just may end
++ * up with an extra reset (i.e similar to having two or more
++ * resets in the queue at once).
++ * CHECK.
++ */
++ spin_lock_irqsave(&adapter->rwi_lock, flags);
++ flush_reset_queue(adapter);
++ spin_unlock_irqrestore(&adapter->rwi_lock, flags);
++
+ rc = init_crq_queue(adapter);
+ if (rc) {
+ dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n",
+@@ -5558,7 +5740,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+ }
+
+ rc = ibmvnic_reset_init(adapter, false);
+- } while (rc == EAGAIN);
++ } while (rc == -EAGAIN);
+
+ /* We are ignoring the error from ibmvnic_reset_init() assuming that the
+ * partner is not ready. CRQ is not active. When the partner becomes
+@@ -5581,12 +5763,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+ goto ibmvnic_dev_file_err;
+
+ netif_carrier_off(netdev);
+- rc = register_netdev(netdev);
+- if (rc) {
+- dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
+- goto ibmvnic_register_fail;
+- }
+- dev_info(&dev->dev, "ibmvnic registered\n");
+
+ if (init_success) {
+ adapter->state = VNIC_PROBED;
+@@ -5599,6 +5775,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+
+ adapter->wait_for_reset = false;
+ adapter->last_reset_time = jiffies;
++
++ rc = register_netdev(netdev);
++ if (rc) {
++ dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
++ goto ibmvnic_register_fail;
++ }
++ dev_info(&dev->dev, "ibmvnic registered\n");
++
++ complete(&adapter->probe_done);
++
+ return 0;
+
+ ibmvnic_register_fail:
+@@ -5613,6 +5799,17 @@ ibmvnic_stats_fail:
+ ibmvnic_init_fail:
+ release_sub_crqs(adapter, 1);
+ release_crq_queue(adapter);
++
++ /* cleanup worker thread after releasing CRQ so we don't get
++ * transport events (i.e new work items for the worker thread).
++ */
++ adapter->state = VNIC_REMOVING;
++ complete(&adapter->probe_done);
++ flush_work(&adapter->ibmvnic_reset);
++ flush_delayed_work(&adapter->ibmvnic_delayed_reset);
++
++ flush_reset_queue(adapter);
++
+ mutex_destroy(&adapter->fw_lock);
+ free_netdev(netdev);
+
+@@ -5687,10 +5884,14 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
+ be64_to_cpu(session_token));
+ rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
+ H_SESSION_ERR_DETECTED, session_token, 0, 0);
+- if (rc)
++ if (rc) {
+ netdev_err(netdev,
+ "H_VIOCTL initiated failover failed, rc %ld\n",
+ rc);
++ goto last_resort;
++ }
++
++ return count;
+
+ last_resort:
+ netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n");
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
+index 22df602323bc0..b01c439965ff9 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.h
++++ b/drivers/net/ethernet/ibm/ibmvnic.h
+@@ -43,11 +43,6 @@
+
+ #define IBMVNIC_RESET_DELAY 100
+
+-static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
+-#define IBMVNIC_USE_SERVER_MAXES 0x1
+- "use-server-maxes"
+-};
+-
+ struct ibmvnic_login_buffer {
+ __be32 len;
+ __be32 version;
+@@ -885,7 +880,6 @@ struct ibmvnic_adapter {
+ struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
+ dma_addr_t ip_offload_ctrl_tok;
+ u32 msg_enable;
+- u32 priv_flags;
+
+ /* Vital Product Data (VPD) */
+ struct ibmvnic_vpd *vpd;
+@@ -933,6 +927,7 @@ struct ibmvnic_adapter {
+
+ struct ibmvnic_tx_pool *tx_pool;
+ struct ibmvnic_tx_pool *tso_pool;
++ struct completion probe_done;
+ struct completion init_done;
+ int init_done_rc;
+
+@@ -1002,11 +997,14 @@ struct ibmvnic_adapter {
+ struct work_struct ibmvnic_reset;
+ struct delayed_work ibmvnic_delayed_reset;
+ unsigned long resetting;
+- bool napi_enabled, from_passive_init;
+- bool login_pending;
+ /* last device reset time */
+ unsigned long last_reset_time;
+
++ bool napi_enabled;
++ bool from_passive_init;
++ bool login_pending;
++ /* protected by rcu */
++ bool tx_queues_active;
+ bool failover_pending;
+ bool force_reset_recovery;
+
+diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
+index 09ae1939e6db4..8cd371437c99f 100644
+--- a/drivers/net/ethernet/intel/e100.c
++++ b/drivers/net/ethernet/intel/e100.c
+@@ -1742,11 +1742,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb,
+ dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ /* If we can't map the skb, have the upper layer try later */
+- if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+- dev_kfree_skb_any(skb);
+- skb = NULL;
++ if (dma_mapping_error(&nic->pdev->dev, dma_addr))
+ return -ENOMEM;
+- }
+
+ /*
+ * Use the last 4 bytes of the SKB payload packet as the CRC, used for
+@@ -3003,9 +3000,10 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake)
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct nic *nic = netdev_priv(netdev);
+
++ netif_device_detach(netdev);
++
+ if (netif_running(netdev))
+ e100_down(nic);
+- netif_device_detach(netdev);
+
+ if ((nic->flags & wol_magic) | e100_asf(nic)) {
+ /* enable reverse auto-negotiation */
+@@ -3022,7 +3020,7 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake)
+ *enable_wake = false;
+ }
+
+- pci_clear_master(pdev);
++ pci_disable_device(pdev);
+ }
+
+ static int __e100_power_off(struct pci_dev *pdev, bool wake)
+@@ -3042,8 +3040,6 @@ static int __maybe_unused e100_suspend(struct device *dev_d)
+
+ __e100_shutdown(to_pci_dev(dev_d), &wake);
+
+- device_wakeup_disable(dev_d);
+-
+ return 0;
+ }
+
+@@ -3051,6 +3047,14 @@ static int __maybe_unused e100_resume(struct device *dev_d)
+ {
+ struct net_device *netdev = dev_get_drvdata(dev_d);
+ struct nic *nic = netdev_priv(netdev);
++ int err;
++
++ err = pci_enable_device(to_pci_dev(dev_d));
++ if (err) {
++ netdev_err(netdev, "Resume cannot enable PCI device, aborting\n");
++ return err;
++ }
++ pci_set_master(to_pci_dev(dev_d));
+
+ /* disable reverse auto-negotiation */
+ if (nic->phy == phy_82552_v) {
+@@ -3062,10 +3066,11 @@ static int __maybe_unused e100_resume(struct device *dev_d)
+ smartspeed & ~(E100_82552_REV_ANEG));
+ }
+
+- netif_device_attach(netdev);
+ if (netif_running(netdev))
+ e100_up(nic);
+
++ netif_device_attach(netdev);
++
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
+index 3178efd980066..eb6961d901c16 100644
+--- a/drivers/net/ethernet/intel/e1000e/e1000.h
++++ b/drivers/net/ethernet/intel/e1000e/e1000.h
+@@ -114,7 +114,8 @@ enum e1000_boards {
+ board_pch_lpt,
+ board_pch_spt,
+ board_pch_cnp,
+- board_pch_tgp
++ board_pch_tgp,
++ board_pch_adp
+ };
+
+ struct e1000_ps_page {
+@@ -501,6 +502,7 @@ extern const struct e1000_info e1000_pch_lpt_info;
+ extern const struct e1000_info e1000_pch_spt_info;
+ extern const struct e1000_info e1000_pch_cnp_info;
+ extern const struct e1000_info e1000_pch_tgp_info;
++extern const struct e1000_info e1000_pch_adp_info;
+ extern const struct e1000_info e1000_es2_info;
+
+ void e1000e_ptp_init(struct e1000_adapter *adapter);
+diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+index 5e4fc9b4e2adb..9466f65a6da77 100644
+--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+@@ -1009,8 +1009,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
+ {
+ u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) |
+ link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND;
+- u16 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */
+- u16 lat_enc_d = 0; /* latency decoded */
++ u32 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */
++ u32 lat_enc_d = 0; /* latency decoded */
+ u16 lat_enc = 0; /* latency encoded */
+
+ if (link) {
+@@ -4136,9 +4136,9 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
+ return ret_val;
+
+ if (!(data & valid_csum_mask)) {
+- e_dbg("NVM Checksum Invalid\n");
++ e_dbg("NVM Checksum valid bit not set\n");
+
+- if (hw->mac.type < e1000_pch_cnp) {
++ if (hw->mac.type < e1000_pch_tgp) {
+ data |= valid_csum_mask;
+ ret_val = e1000_write_nvm(hw, word, 1, &data);
+ if (ret_val)
+@@ -6021,3 +6021,23 @@ const struct e1000_info e1000_pch_tgp_info = {
+ .phy_ops = &ich8_phy_ops,
+ .nvm_ops = &spt_nvm_ops,
+ };
++
++const struct e1000_info e1000_pch_adp_info = {
++ .mac = e1000_pch_adp,
++ .flags = FLAG_IS_ICH
++ | FLAG_HAS_WOL
++ | FLAG_HAS_HW_TIMESTAMP
++ | FLAG_HAS_CTRLEXT_ON_LOAD
++ | FLAG_HAS_AMT
++ | FLAG_HAS_FLASH
++ | FLAG_HAS_JUMBO_FRAMES
++ | FLAG_APME_IN_WUC,
++ .flags2 = FLAG2_HAS_PHY_STATS
++ | FLAG2_HAS_EEE,
++ .pba = 26,
++ .max_hw_frame_size = 9022,
++ .get_variants = e1000_get_variants_ich8lan,
++ .mac_ops = &ich8_mac_ops,
++ .phy_ops = &ich8_phy_ops,
++ .nvm_ops = &spt_nvm_ops,
++};
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index ebcb2a30add09..6b7d162af3e5e 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -52,6 +52,7 @@ static const struct e1000_info *e1000_info_tbl[] = {
+ [board_pch_spt] = &e1000_pch_spt_info,
+ [board_pch_cnp] = &e1000_pch_cnp_info,
+ [board_pch_tgp] = &e1000_pch_tgp_info,
++ [board_pch_adp] = &e1000_pch_adp_info,
+ };
+
+ struct e1000_reg_info {
+@@ -5297,31 +5298,6 @@ static void e1000_watchdog_task(struct work_struct *work)
+ ew32(TARC(0), tarc0);
+ }
+
+- /* disable TSO for pcie and 10/100 speeds, to avoid
+- * some hardware issues
+- */
+- if (!(adapter->flags & FLAG_TSO_FORCE)) {
+- switch (adapter->link_speed) {
+- case SPEED_10:
+- case SPEED_100:
+- e_info("10/100 speed: disabling TSO\n");
+- netdev->features &= ~NETIF_F_TSO;
+- netdev->features &= ~NETIF_F_TSO6;
+- break;
+- case SPEED_1000:
+- netdev->features |= NETIF_F_TSO;
+- netdev->features |= NETIF_F_TSO6;
+- break;
+- default:
+- /* oops */
+- break;
+- }
+- if (hw->mac.type == e1000_pch_spt) {
+- netdev->features &= ~NETIF_F_TSO;
+- netdev->features &= ~NETIF_F_TSO6;
+- }
+- }
+-
+ /* enable transmits in the hardware, need to do this
+ * after setting TARC(0)
+ */
+@@ -5940,9 +5916,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ e1000_tx_queue(tx_ring, tx_flags, count);
+ /* Make sure there is space in the ring for the next send. */
+ e1000_maybe_stop_tx(tx_ring,
+- (MAX_SKB_FRAGS *
++ ((MAX_SKB_FRAGS + 1) *
+ DIV_ROUND_UP(PAGE_SIZE,
+- adapter->tx_fifo_limit) + 2));
++ adapter->tx_fifo_limit) + 4));
+
+ if (!netdev_xmit_more() ||
+ netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
+@@ -6346,7 +6322,8 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
+ u32 mac_data;
+ u16 phy_data;
+
+- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++ hw->mac.type >= e1000_pch_adp) {
+ /* Request ME configure the device for S0ix */
+ mac_data = er32(H2ME);
+ mac_data |= E1000_H2ME_START_DPG;
+@@ -6495,7 +6472,12 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
+ u16 phy_data;
+ u32 i = 0;
+
+- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++ hw->mac.type >= e1000_pch_adp) {
++ /* Keep the GPT clock enabled for CSME */
++ mac_data = er32(FEXTNVM);
++ mac_data |= BIT(3);
++ ew32(FEXTNVM, mac_data);
+ /* Request ME unconfigure the device from S0ix */
+ mac_data = er32(H2ME);
+ mac_data &= ~E1000_H2ME_START_DPG;
+@@ -7536,6 +7518,32 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ NETIF_F_RXCSUM |
+ NETIF_F_HW_CSUM);
+
++ /* disable TSO for pcie and 10/100 speeds to avoid
++ * some hardware issues and for i219 to fix transfer
++ * speed being capped at 60%
++ */
++ if (!(adapter->flags & FLAG_TSO_FORCE)) {
++ switch (adapter->link_speed) {
++ case SPEED_10:
++ case SPEED_100:
++ e_info("10/100 speed: disabling TSO\n");
++ netdev->features &= ~NETIF_F_TSO;
++ netdev->features &= ~NETIF_F_TSO6;
++ break;
++ case SPEED_1000:
++ netdev->features |= NETIF_F_TSO;
++ netdev->features |= NETIF_F_TSO6;
++ break;
++ default:
++ /* oops */
++ break;
++ }
++ if (hw->mac.type == e1000_pch_spt) {
++ netdev->features &= ~NETIF_F_TSO;
++ netdev->features &= ~NETIF_F_TSO6;
++ }
++ }
++
+ /* Set user-changeable features (subset of all device features) */
+ netdev->hw_features = netdev->features;
+ netdev->hw_features |= NETIF_F_RXFCS;
+@@ -7903,22 +7911,22 @@ static const struct pci_device_id e1000_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp },
+ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_tgp },
+- { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_tgp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_adp },
++ { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_adp },
+
+ { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */
+ };
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+index 3362f26d7f999..1b273446621c5 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+@@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue;
+ **/
+ static int __init fm10k_init_module(void)
+ {
++ int ret;
++
+ pr_info("%s\n", fm10k_driver_string);
+ pr_info("%s\n", fm10k_copyright);
+
+@@ -43,7 +45,13 @@ static int __init fm10k_init_module(void)
+
+ fm10k_dbg_init();
+
+- return fm10k_register_pci_driver();
++ ret = fm10k_register_pci_driver();
++ if (ret) {
++ fm10k_dbg_exit();
++ destroy_workqueue(fm10k_workqueue);
++ }
++
++ return ret;
+ }
+ module_init(fm10k_init_module);
+
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+index 2fb52bd6fc0e1..2cca9e84e31e1 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+@@ -990,7 +990,7 @@ static int fm10k_set_mac(struct net_device *dev, void *p)
+ }
+
+ if (!err) {
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+ ether_addr_copy(hw->mac.addr, addr->sa_data);
+ dev->addr_assign_type &= ~NET_ADDR_RANDOM;
+ }
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+index adfa2768f024d..b473cb7d7c575 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+@@ -300,7 +300,7 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface)
+ if (is_valid_ether_addr(hw->mac.perm_addr)) {
+ ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
+ ether_addr_copy(netdev->perm_addr, hw->mac.perm_addr);
+- ether_addr_copy(netdev->dev_addr, hw->mac.perm_addr);
++ eth_hw_addr_set(netdev, hw->mac.perm_addr);
+ netdev->addr_assign_type &= ~NET_ADDR_RANDOM;
+ }
+
+@@ -2045,7 +2045,7 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
+ netdev->addr_assign_type |= NET_ADDR_RANDOM;
+ }
+
+- ether_addr_copy(netdev->dev_addr, hw->mac.addr);
++ eth_hw_addr_set(netdev, hw->mac.addr);
+ ether_addr_copy(netdev->perm_addr, hw->mac.addr);
+
+ if (!is_valid_ether_addr(netdev->perm_addr)) {
+diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
+index 39fb3d57c0574..a42ca847c8f86 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e.h
++++ b/drivers/net/ethernet/intel/i40e/i40e.h
+@@ -37,6 +37,7 @@
+ #include <net/tc_act/tc_mirred.h>
+ #include <net/udp_tunnel.h>
+ #include <net/xdp_sock.h>
++#include <linux/bitfield.h>
+ #include "i40e_type.h"
+ #include "i40e_prototype.h"
+ #include <linux/net/intel/i40e_client.h>
+@@ -144,6 +145,7 @@ enum i40e_state_t {
+ __I40E_VIRTCHNL_OP_PENDING,
+ __I40E_RECOVERY_MODE,
+ __I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */
++ __I40E_IN_REMOVE,
+ __I40E_VFS_RELEASING,
+ /* This must be last as it determines the size of the BITMAP */
+ __I40E_STATE_SIZE__,
+@@ -161,6 +163,7 @@ enum i40e_vsi_state_t {
+ __I40E_VSI_OVERFLOW_PROMISC,
+ __I40E_VSI_REINIT_REQUESTED,
+ __I40E_VSI_DOWN_REQUESTED,
++ __I40E_VSI_RELEASING,
+ /* This must be last as it determines the size of the BITMAP */
+ __I40E_VSI_STATE_SIZE__,
+ };
+@@ -173,7 +176,6 @@ enum i40e_interrupt_policy {
+
+ struct i40e_lump_tracking {
+ u16 num_entries;
+- u16 search_hint;
+ u16 list[0];
+ #define I40E_PILE_VALID_BIT 0x8000
+ #define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2)
+@@ -847,12 +849,12 @@ struct i40e_vsi {
+ struct rtnl_link_stats64 net_stats_offsets;
+ struct i40e_eth_stats eth_stats;
+ struct i40e_eth_stats eth_stats_offsets;
+- u32 tx_restart;
+- u32 tx_busy;
++ u64 tx_restart;
++ u64 tx_busy;
+ u64 tx_linearize;
+ u64 tx_force_wb;
+- u32 rx_buf_failed;
+- u32 rx_page_failed;
++ u64 rx_buf_failed;
++ u64 rx_page_failed;
+
+ /* These are containers of ring pointers, allocated at run-time */
+ struct i40e_ring **rx_rings;
+@@ -1086,6 +1088,21 @@ static inline void i40e_write_fd_input_set(struct i40e_pf *pf,
+ (u32)(val & 0xFFFFFFFFULL));
+ }
+
++/**
++ * i40e_get_pf_count - get PCI PF count.
++ * @hw: pointer to a hw.
++ *
++ * Reports the function number of the highest PCI physical
++ * function plus 1 as it is loaded from the NVM.
++ *
++ * Return: PCI PF count.
++ **/
++static inline u32 i40e_get_pf_count(struct i40e_hw *hw)
++{
++ return FIELD_GET(I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK,
++ rd32(hw, I40E_GLGEN_PCIFCNCNT));
++}
++
+ /* needed by i40e_ethtool.c */
+ int i40e_up(struct i40e_vsi *vsi);
+ void i40e_down(struct i40e_vsi *vsi);
+@@ -1247,10 +1264,11 @@ void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
+ void i40e_ptp_init(struct i40e_pf *pf);
+ void i40e_ptp_stop(struct i40e_pf *pf);
+ int i40e_ptp_alloc_pins(struct i40e_pf *pf);
++int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset);
+ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
+-i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf);
+-i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf);
+-i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf);
++int i40e_get_partition_bw_setting(struct i40e_pf *pf);
++int i40e_set_partition_bw_setting(struct i40e_pf *pf);
++int i40e_commit_partition_bw_setting(struct i40e_pf *pf);
+ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
+
+ void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
+@@ -1268,4 +1286,18 @@ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+ struct i40e_cloud_filter *filter,
+ bool add);
++
++/**
++ * i40e_is_tc_mqprio_enabled - check if TC MQPRIO is enabled on PF
++ * @pf: pointer to a pf.
++ *
++ * Check and return value of flag I40E_FLAG_TC_MQPRIO.
++ *
++ * Return: I40E_FLAG_TC_MQPRIO set state.
++ **/
++static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
++{
++ return pf->flags & I40E_FLAG_TC_MQPRIO;
++}
++
+ #endif /* _I40E_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+index 593912b176099..20de187dc5f15 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+@@ -47,9 +47,9 @@ static void i40e_adminq_init_regs(struct i40e_hw *hw)
+ * i40e_alloc_adminq_asq_ring - Allocate Admin Queue send rings
+ * @hw: pointer to the hardware structure
+ **/
+-static i40e_status i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
++static int i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+ {
+- i40e_status ret_code;
++ int ret_code;
+
+ ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
+ i40e_mem_atq_ring,
+@@ -74,9 +74,9 @@ static i40e_status i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+ * i40e_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
+ * @hw: pointer to the hardware structure
+ **/
+-static i40e_status i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
++static int i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
+ {
+- i40e_status ret_code;
++ int ret_code;
+
+ ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
+ i40e_mem_arq_ring,
+@@ -115,11 +115,11 @@ static void i40e_free_adminq_arq(struct i40e_hw *hw)
+ * i40e_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
+ * @hw: pointer to the hardware structure
+ **/
+-static i40e_status i40e_alloc_arq_bufs(struct i40e_hw *hw)
++static int i40e_alloc_arq_bufs(struct i40e_hw *hw)
+ {
+- i40e_status ret_code;
+ struct i40e_aq_desc *desc;
+ struct i40e_dma_mem *bi;
++ int ret_code;
+ int i;
+
+ /* We'll be allocating the buffer info memory first, then we can
+@@ -182,10 +182,10 @@ unwind_alloc_arq_bufs:
+ * i40e_alloc_asq_bufs - Allocate empty buffer structs for the send queue
+ * @hw: pointer to the hardware structure
+ **/
+-static i40e_status i40e_alloc_asq_bufs(struct i40e_hw *hw)
++static int i40e_alloc_asq_bufs(struct i40e_hw *hw)
+ {
+- i40e_status ret_code;
+ struct i40e_dma_mem *bi;
++ int ret_code;
+ int i;
+
+ /* No mapped memory needed yet, just the buffer info structures */
+@@ -266,9 +266,9 @@ static void i40e_free_asq_bufs(struct i40e_hw *hw)
+ *
+ * Configure base address and length registers for the transmit queue
+ **/
+-static i40e_status i40e_config_asq_regs(struct i40e_hw *hw)
++static int i40e_config_asq_regs(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+ u32 reg = 0;
+
+ /* Clear Head and Tail */
+@@ -295,9 +295,9 @@ static i40e_status i40e_config_asq_regs(struct i40e_hw *hw)
+ *
+ * Configure base address and length registers for the receive (event queue)
+ **/
+-static i40e_status i40e_config_arq_regs(struct i40e_hw *hw)
++static int i40e_config_arq_regs(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+ u32 reg = 0;
+
+ /* Clear Head and Tail */
+@@ -334,9 +334,9 @@ static i40e_status i40e_config_arq_regs(struct i40e_hw *hw)
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ **/
+-static i40e_status i40e_init_asq(struct i40e_hw *hw)
++static int i40e_init_asq(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ if (hw->aq.asq.count > 0) {
+ /* queue already initialized */
+@@ -393,9 +393,9 @@ init_adminq_exit:
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ **/
+-static i40e_status i40e_init_arq(struct i40e_hw *hw)
++static int i40e_init_arq(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ if (hw->aq.arq.count > 0) {
+ /* queue already initialized */
+@@ -445,9 +445,9 @@ init_adminq_exit:
+ *
+ * The main shutdown routine for the Admin Send Queue
+ **/
+-static i40e_status i40e_shutdown_asq(struct i40e_hw *hw)
++static int i40e_shutdown_asq(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ mutex_lock(&hw->aq.asq_mutex);
+
+@@ -479,9 +479,9 @@ shutdown_asq_out:
+ *
+ * The main shutdown routine for the Admin Receive Queue
+ **/
+-static i40e_status i40e_shutdown_arq(struct i40e_hw *hw)
++static int i40e_shutdown_arq(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ mutex_lock(&hw->aq.arq_mutex);
+
+@@ -582,12 +582,12 @@ static void i40e_set_hw_flags(struct i40e_hw *hw)
+ * - hw->aq.arq_buf_size
+ * - hw->aq.asq_buf_size
+ **/
+-i40e_status i40e_init_adminq(struct i40e_hw *hw)
++int i40e_init_adminq(struct i40e_hw *hw)
+ {
+ u16 cfg_ptr, oem_hi, oem_lo;
+ u16 eetrack_lo, eetrack_hi;
+- i40e_status ret_code;
+ int retry = 0;
++ int ret_code;
+
+ /* verify input for valid configuration */
+ if ((hw->aq.num_arq_entries == 0) ||
+@@ -779,18 +779,18 @@ static bool i40e_asq_done(struct i40e_hw *hw)
+ * This is the main send command driver routine for the Admin Queue send
+ * queue. It runs the queue, cleans the queue, etc
+ **/
+-i40e_status i40e_asq_send_command(struct i40e_hw *hw,
+- struct i40e_aq_desc *desc,
+- void *buff, /* can be NULL */
+- u16 buff_size,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_asq_send_command(struct i40e_hw *hw,
++ struct i40e_aq_desc *desc,
++ void *buff, /* can be NULL */
++ u16 buff_size,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+- i40e_status status = 0;
+ struct i40e_dma_mem *dma_buff = NULL;
+ struct i40e_asq_cmd_details *details;
+ struct i40e_aq_desc *desc_on_ring;
+ bool cmd_completed = false;
+ u16 retval = 0;
++ int status = 0;
+ u32 val = 0;
+
+ mutex_lock(&hw->aq.asq_mutex);
+@@ -993,14 +993,14 @@ void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
+ * the contents through e. It can also return how many events are
+ * left to process through 'pending'
+ **/
+-i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
+- struct i40e_arq_event_info *e,
+- u16 *pending)
++int i40e_clean_arq_element(struct i40e_hw *hw,
++ struct i40e_arq_event_info *e,
++ u16 *pending)
+ {
+- i40e_status ret_code = 0;
+ u16 ntc = hw->aq.arq.next_to_clean;
+ struct i40e_aq_desc *desc;
+ struct i40e_dma_mem *bi;
++ int ret_code = 0;
+ u16 desc_idx;
+ u16 datalen;
+ u16 flags;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+index cb8689222c8b7..a6c9a9e343d11 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+@@ -20,16 +20,16 @@ enum i40e_memory_type {
+ };
+
+ /* prototype for functions used for dynamic memory allocation */
+-i40e_status i40e_allocate_dma_mem(struct i40e_hw *hw,
+- struct i40e_dma_mem *mem,
+- enum i40e_memory_type type,
+- u64 size, u32 alignment);
+-i40e_status i40e_free_dma_mem(struct i40e_hw *hw,
+- struct i40e_dma_mem *mem);
+-i40e_status i40e_allocate_virt_mem(struct i40e_hw *hw,
+- struct i40e_virt_mem *mem,
+- u32 size);
+-i40e_status i40e_free_virt_mem(struct i40e_hw *hw,
+- struct i40e_virt_mem *mem);
++int i40e_allocate_dma_mem(struct i40e_hw *hw,
++ struct i40e_dma_mem *mem,
++ enum i40e_memory_type type,
++ u64 size, u32 alignment);
++int i40e_free_dma_mem(struct i40e_hw *hw,
++ struct i40e_dma_mem *mem);
++int i40e_allocate_virt_mem(struct i40e_hw *hw,
++ struct i40e_virt_mem *mem,
++ u32 size);
++int i40e_free_virt_mem(struct i40e_hw *hw,
++ struct i40e_virt_mem *mem);
+
+ #endif /* _I40E_ALLOC_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
+index ea2bb0140a6eb..8bcb98b85e3d9 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
+@@ -177,6 +177,10 @@ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
+ "Cannot locate client instance close routine\n");
+ return;
+ }
++ if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
++ dev_dbg(&pf->pdev->dev, "Client is not open, abort close\n");
++ return;
++ }
+ cdev->client->ops->close(&cdev->lan_info, cdev->client, reset);
+ clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+ i40e_client_release_qvlist(&cdev->lan_info);
+@@ -429,7 +433,6 @@ void i40e_client_subtask(struct i40e_pf *pf)
+ /* Remove failed client instance */
+ clear_bit(__I40E_CLIENT_INSTANCE_OPENED,
+ &cdev->state);
+- i40e_client_del_instance(pf);
+ return;
+ }
+ }
+@@ -538,7 +541,7 @@ static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+ {
+ struct i40e_pf *pf = ldev->pf;
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status err;
++ int err;
+
+ err = i40e_aq_send_msg_to_vf(hw, vf_id, VIRTCHNL_OP_IWARP,
+ 0, msg, len, NULL);
+@@ -671,7 +674,7 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ struct i40e_vsi_context ctxt;
+ bool update = true;
+- i40e_status err;
++ int err;
+
+ /* TODO: for now do not allow setting VF's VSI setting */
+ if (is_vf)
+@@ -683,8 +686,8 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+ ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+ if (err) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get PF vsi config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, err),
++ "couldn't get PF vsi config, err %d aq_err %s\n",
++ err,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return -ENOENT;
+@@ -711,8 +714,8 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+ err = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ if (err) {
+ dev_info(&pf->pdev->dev,
+- "update VSI ctxt for PE failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, err),
++ "update VSI ctxt for PE failed, err %d aq_err %s\n",
++ err,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ }
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
+index b4d3fed0d2f20..7f91e04d75b8c 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
+@@ -14,9 +14,9 @@
+ * This function sets the mac type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ **/
+-i40e_status i40e_set_mac_type(struct i40e_hw *hw)
++int i40e_set_mac_type(struct i40e_hw *hw)
+ {
+- i40e_status status = 0;
++ int status = 0;
+
+ if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
+ switch (hw->device_id) {
+@@ -122,154 +122,6 @@ const char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err)
+ return hw->err_str;
+ }
+
+-/**
+- * i40e_stat_str - convert status err code to a string
+- * @hw: pointer to the HW structure
+- * @stat_err: the status error code to convert
+- **/
+-const char *i40e_stat_str(struct i40e_hw *hw, i40e_status stat_err)
+-{
+- switch (stat_err) {
+- case 0:
+- return "OK";
+- case I40E_ERR_NVM:
+- return "I40E_ERR_NVM";
+- case I40E_ERR_NVM_CHECKSUM:
+- return "I40E_ERR_NVM_CHECKSUM";
+- case I40E_ERR_PHY:
+- return "I40E_ERR_PHY";
+- case I40E_ERR_CONFIG:
+- return "I40E_ERR_CONFIG";
+- case I40E_ERR_PARAM:
+- return "I40E_ERR_PARAM";
+- case I40E_ERR_MAC_TYPE:
+- return "I40E_ERR_MAC_TYPE";
+- case I40E_ERR_UNKNOWN_PHY:
+- return "I40E_ERR_UNKNOWN_PHY";
+- case I40E_ERR_LINK_SETUP:
+- return "I40E_ERR_LINK_SETUP";
+- case I40E_ERR_ADAPTER_STOPPED:
+- return "I40E_ERR_ADAPTER_STOPPED";
+- case I40E_ERR_INVALID_MAC_ADDR:
+- return "I40E_ERR_INVALID_MAC_ADDR";
+- case I40E_ERR_DEVICE_NOT_SUPPORTED:
+- return "I40E_ERR_DEVICE_NOT_SUPPORTED";
+- case I40E_ERR_MASTER_REQUESTS_PENDING:
+- return "I40E_ERR_MASTER_REQUESTS_PENDING";
+- case I40E_ERR_INVALID_LINK_SETTINGS:
+- return "I40E_ERR_INVALID_LINK_SETTINGS";
+- case I40E_ERR_AUTONEG_NOT_COMPLETE:
+- return "I40E_ERR_AUTONEG_NOT_COMPLETE";
+- case I40E_ERR_RESET_FAILED:
+- return "I40E_ERR_RESET_FAILED";
+- case I40E_ERR_SWFW_SYNC:
+- return "I40E_ERR_SWFW_SYNC";
+- case I40E_ERR_NO_AVAILABLE_VSI:
+- return "I40E_ERR_NO_AVAILABLE_VSI";
+- case I40E_ERR_NO_MEMORY:
+- return "I40E_ERR_NO_MEMORY";
+- case I40E_ERR_BAD_PTR:
+- return "I40E_ERR_BAD_PTR";
+- case I40E_ERR_RING_FULL:
+- return "I40E_ERR_RING_FULL";
+- case I40E_ERR_INVALID_PD_ID:
+- return "I40E_ERR_INVALID_PD_ID";
+- case I40E_ERR_INVALID_QP_ID:
+- return "I40E_ERR_INVALID_QP_ID";
+- case I40E_ERR_INVALID_CQ_ID:
+- return "I40E_ERR_INVALID_CQ_ID";
+- case I40E_ERR_INVALID_CEQ_ID:
+- return "I40E_ERR_INVALID_CEQ_ID";
+- case I40E_ERR_INVALID_AEQ_ID:
+- return "I40E_ERR_INVALID_AEQ_ID";
+- case I40E_ERR_INVALID_SIZE:
+- return "I40E_ERR_INVALID_SIZE";
+- case I40E_ERR_INVALID_ARP_INDEX:
+- return "I40E_ERR_INVALID_ARP_INDEX";
+- case I40E_ERR_INVALID_FPM_FUNC_ID:
+- return "I40E_ERR_INVALID_FPM_FUNC_ID";
+- case I40E_ERR_QP_INVALID_MSG_SIZE:
+- return "I40E_ERR_QP_INVALID_MSG_SIZE";
+- case I40E_ERR_QP_TOOMANY_WRS_POSTED:
+- return "I40E_ERR_QP_TOOMANY_WRS_POSTED";
+- case I40E_ERR_INVALID_FRAG_COUNT:
+- return "I40E_ERR_INVALID_FRAG_COUNT";
+- case I40E_ERR_QUEUE_EMPTY:
+- return "I40E_ERR_QUEUE_EMPTY";
+- case I40E_ERR_INVALID_ALIGNMENT:
+- return "I40E_ERR_INVALID_ALIGNMENT";
+- case I40E_ERR_FLUSHED_QUEUE:
+- return "I40E_ERR_FLUSHED_QUEUE";
+- case I40E_ERR_INVALID_PUSH_PAGE_INDEX:
+- return "I40E_ERR_INVALID_PUSH_PAGE_INDEX";
+- case I40E_ERR_INVALID_IMM_DATA_SIZE:
+- return "I40E_ERR_INVALID_IMM_DATA_SIZE";
+- case I40E_ERR_TIMEOUT:
+- return "I40E_ERR_TIMEOUT";
+- case I40E_ERR_OPCODE_MISMATCH:
+- return "I40E_ERR_OPCODE_MISMATCH";
+- case I40E_ERR_CQP_COMPL_ERROR:
+- return "I40E_ERR_CQP_COMPL_ERROR";
+- case I40E_ERR_INVALID_VF_ID:
+- return "I40E_ERR_INVALID_VF_ID";
+- case I40E_ERR_INVALID_HMCFN_ID:
+- return "I40E_ERR_INVALID_HMCFN_ID";
+- case I40E_ERR_BACKING_PAGE_ERROR:
+- return "I40E_ERR_BACKING_PAGE_ERROR";
+- case I40E_ERR_NO_PBLCHUNKS_AVAILABLE:
+- return "I40E_ERR_NO_PBLCHUNKS_AVAILABLE";
+- case I40E_ERR_INVALID_PBLE_INDEX:
+- return "I40E_ERR_INVALID_PBLE_INDEX";
+- case I40E_ERR_INVALID_SD_INDEX:
+- return "I40E_ERR_INVALID_SD_INDEX";
+- case I40E_ERR_INVALID_PAGE_DESC_INDEX:
+- return "I40E_ERR_INVALID_PAGE_DESC_INDEX";
+- case I40E_ERR_INVALID_SD_TYPE:
+- return "I40E_ERR_INVALID_SD_TYPE";
+- case I40E_ERR_MEMCPY_FAILED:
+- return "I40E_ERR_MEMCPY_FAILED";
+- case I40E_ERR_INVALID_HMC_OBJ_INDEX:
+- return "I40E_ERR_INVALID_HMC_OBJ_INDEX";
+- case I40E_ERR_INVALID_HMC_OBJ_COUNT:
+- return "I40E_ERR_INVALID_HMC_OBJ_COUNT";
+- case I40E_ERR_INVALID_SRQ_ARM_LIMIT:
+- return "I40E_ERR_INVALID_SRQ_ARM_LIMIT";
+- case I40E_ERR_SRQ_ENABLED:
+- return "I40E_ERR_SRQ_ENABLED";
+- case I40E_ERR_ADMIN_QUEUE_ERROR:
+- return "I40E_ERR_ADMIN_QUEUE_ERROR";
+- case I40E_ERR_ADMIN_QUEUE_TIMEOUT:
+- return "I40E_ERR_ADMIN_QUEUE_TIMEOUT";
+- case I40E_ERR_BUF_TOO_SHORT:
+- return "I40E_ERR_BUF_TOO_SHORT";
+- case I40E_ERR_ADMIN_QUEUE_FULL:
+- return "I40E_ERR_ADMIN_QUEUE_FULL";
+- case I40E_ERR_ADMIN_QUEUE_NO_WORK:
+- return "I40E_ERR_ADMIN_QUEUE_NO_WORK";
+- case I40E_ERR_BAD_IWARP_CQE:
+- return "I40E_ERR_BAD_IWARP_CQE";
+- case I40E_ERR_NVM_BLANK_MODE:
+- return "I40E_ERR_NVM_BLANK_MODE";
+- case I40E_ERR_NOT_IMPLEMENTED:
+- return "I40E_ERR_NOT_IMPLEMENTED";
+- case I40E_ERR_PE_DOORBELL_NOT_ENABLED:
+- return "I40E_ERR_PE_DOORBELL_NOT_ENABLED";
+- case I40E_ERR_DIAG_TEST_FAILED:
+- return "I40E_ERR_DIAG_TEST_FAILED";
+- case I40E_ERR_NOT_READY:
+- return "I40E_ERR_NOT_READY";
+- case I40E_NOT_SUPPORTED:
+- return "I40E_NOT_SUPPORTED";
+- case I40E_ERR_FIRMWARE_API_VERSION:
+- return "I40E_ERR_FIRMWARE_API_VERSION";
+- case I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
+- return "I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
+- }
+-
+- snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
+- return hw->err_str;
+-}
+-
+ /**
+ * i40e_debug_aq
+ * @hw: debug mask related to admin queue
+@@ -353,13 +205,13 @@ bool i40e_check_asq_alive(struct i40e_hw *hw)
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well.
+ **/
+-i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw,
+- bool unloading)
++int i40e_aq_queue_shutdown(struct i40e_hw *hw,
++ bool unloading)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_queue_shutdown *cmd =
+ (struct i40e_aqc_queue_shutdown *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_queue_shutdown);
+@@ -382,15 +234,15 @@ i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw,
+ *
+ * Internal function to get or set RSS look up table
+ **/
+-static i40e_status i40e_aq_get_set_rss_lut(struct i40e_hw *hw,
+- u16 vsi_id, bool pf_lut,
+- u8 *lut, u16 lut_size,
+- bool set)
++static int i40e_aq_get_set_rss_lut(struct i40e_hw *hw,
++ u16 vsi_id, bool pf_lut,
++ u8 *lut, u16 lut_size,
++ bool set)
+ {
+- i40e_status status;
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_set_rss_lut *cmd_resp =
+ (struct i40e_aqc_get_set_rss_lut *)&desc.params.raw;
++ int status;
+
+ if (set)
+ i40e_fill_default_direct_cmd_desc(&desc,
+@@ -435,8 +287,8 @@ static i40e_status i40e_aq_get_set_rss_lut(struct i40e_hw *hw,
+ *
+ * get the RSS lookup table, PF or VSI type
+ **/
+-i40e_status i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+- bool pf_lut, u8 *lut, u16 lut_size)
++int i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 vsi_id,
++ bool pf_lut, u8 *lut, u16 lut_size)
+ {
+ return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size,
+ false);
+@@ -452,8 +304,8 @@ i40e_status i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+ *
+ * set the RSS lookup table, PF or VSI type
+ **/
+-i40e_status i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+- bool pf_lut, u8 *lut, u16 lut_size)
++int i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 vsi_id,
++ bool pf_lut, u8 *lut, u16 lut_size)
+ {
+ return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, true);
+ }
+@@ -467,16 +319,16 @@ i40e_status i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+ *
+ * get the RSS key per VSI
+ **/
+-static i40e_status i40e_aq_get_set_rss_key(struct i40e_hw *hw,
+- u16 vsi_id,
+- struct i40e_aqc_get_set_rss_key_data *key,
+- bool set)
++static int i40e_aq_get_set_rss_key(struct i40e_hw *hw,
++ u16 vsi_id,
++ struct i40e_aqc_get_set_rss_key_data *key,
++ bool set)
+ {
+- i40e_status status;
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_set_rss_key *cmd_resp =
+ (struct i40e_aqc_get_set_rss_key *)&desc.params.raw;
+ u16 key_size = sizeof(struct i40e_aqc_get_set_rss_key_data);
++ int status;
+
+ if (set)
+ i40e_fill_default_direct_cmd_desc(&desc,
+@@ -507,9 +359,9 @@ static i40e_status i40e_aq_get_set_rss_key(struct i40e_hw *hw,
+ * @key: pointer to key info struct
+ *
+ **/
+-i40e_status i40e_aq_get_rss_key(struct i40e_hw *hw,
+- u16 vsi_id,
+- struct i40e_aqc_get_set_rss_key_data *key)
++int i40e_aq_get_rss_key(struct i40e_hw *hw,
++ u16 vsi_id,
++ struct i40e_aqc_get_set_rss_key_data *key)
+ {
+ return i40e_aq_get_set_rss_key(hw, vsi_id, key, false);
+ }
+@@ -522,9 +374,9 @@ i40e_status i40e_aq_get_rss_key(struct i40e_hw *hw,
+ *
+ * set the RSS key per VSI
+ **/
+-i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw,
+- u16 vsi_id,
+- struct i40e_aqc_get_set_rss_key_data *key)
++int i40e_aq_set_rss_key(struct i40e_hw *hw,
++ u16 vsi_id,
++ struct i40e_aqc_get_set_rss_key_data *key)
+ {
+ return i40e_aq_get_set_rss_key(hw, vsi_id, key, true);
+ }
+@@ -794,10 +646,10 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = {
+ * hw_addr, back, device_id, vendor_id, subsystem_device_id,
+ * subsystem_vendor_id, and revision_id
+ **/
+-i40e_status i40e_init_shared_code(struct i40e_hw *hw)
++int i40e_init_shared_code(struct i40e_hw *hw)
+ {
+- i40e_status status = 0;
+ u32 port, ari, func_rid;
++ int status = 0;
+
+ i40e_set_mac_type(hw);
+
+@@ -834,15 +686,16 @@ i40e_status i40e_init_shared_code(struct i40e_hw *hw)
+ * @addrs: the requestor's mac addr store
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-static i40e_status i40e_aq_mac_address_read(struct i40e_hw *hw,
+- u16 *flags,
+- struct i40e_aqc_mac_address_read_data *addrs,
+- struct i40e_asq_cmd_details *cmd_details)
++static int
++i40e_aq_mac_address_read(struct i40e_hw *hw,
++ u16 *flags,
++ struct i40e_aqc_mac_address_read_data *addrs,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_mac_address_read *cmd_data =
+ (struct i40e_aqc_mac_address_read *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_mac_address_read);
+ desc.flags |= cpu_to_le16(I40E_AQ_FLAG_BUF);
+@@ -861,14 +714,14 @@ static i40e_status i40e_aq_mac_address_read(struct i40e_hw *hw,
+ * @mac_addr: address to write
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_mac_address_write(struct i40e_hw *hw,
+- u16 flags, u8 *mac_addr,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_mac_address_write(struct i40e_hw *hw,
++ u16 flags, u8 *mac_addr,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_mac_address_write *cmd_data =
+ (struct i40e_aqc_mac_address_write *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_mac_address_write);
+@@ -891,11 +744,11 @@ i40e_status i40e_aq_mac_address_write(struct i40e_hw *hw,
+ *
+ * Reads the adapter's MAC address from register
+ **/
+-i40e_status i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
++int i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+ {
+ struct i40e_aqc_mac_address_read_data addrs;
+- i40e_status status;
+ u16 flags = 0;
++ int status;
+
+ status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+
+@@ -912,11 +765,11 @@ i40e_status i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+ *
+ * Reads the adapter's Port MAC address
+ **/
+-i40e_status i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
++int i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+ {
+ struct i40e_aqc_mac_address_read_data addrs;
+- i40e_status status;
+ u16 flags = 0;
++ int status;
+
+ status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+ if (status)
+@@ -970,13 +823,13 @@ void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable)
+ *
+ * Reads the part number string from the EEPROM.
+ **/
+-i40e_status i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
+- u32 pba_num_size)
++int i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
++ u32 pba_num_size)
+ {
+- i40e_status status = 0;
+ u16 pba_word = 0;
+ u16 pba_size = 0;
+ u16 pba_ptr = 0;
++ int status = 0;
+ u16 i = 0;
+
+ status = i40e_read_nvm_word(hw, I40E_SR_PBA_FLAGS, &pba_word);
+@@ -1085,8 +938,8 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
+ * @hw: pointer to the hardware structure
+ * @retry_limit: how many times to retry before failure
+ **/
+-static i40e_status i40e_poll_globr(struct i40e_hw *hw,
+- u32 retry_limit)
++static int i40e_poll_globr(struct i40e_hw *hw,
++ u32 retry_limit)
+ {
+ u32 cnt, reg = 0;
+
+@@ -1112,7 +965,7 @@ static i40e_status i40e_poll_globr(struct i40e_hw *hw,
+ * Assuming someone else has triggered a global reset,
+ * assure the global reset is complete and then reset the PF
+ **/
+-i40e_status i40e_pf_reset(struct i40e_hw *hw)
++int i40e_pf_reset(struct i40e_hw *hw)
+ {
+ u32 cnt = 0;
+ u32 cnt1 = 0;
+@@ -1451,15 +1304,16 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
+ *
+ * Returns the various PHY abilities supported on the Port.
+ **/
+-i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
+- bool qualified_modules, bool report_init,
+- struct i40e_aq_get_phy_abilities_resp *abilities,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
++ bool qualified_modules, bool report_init,
++ struct i40e_aq_get_phy_abilities_resp *abilities,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+- struct i40e_aq_desc desc;
+- i40e_status status;
+ u16 abilities_size = sizeof(struct i40e_aq_get_phy_abilities_resp);
+ u16 max_delay = I40E_MAX_PHY_TIMEOUT, total_delay = 0;
++ struct i40e_aq_desc desc;
++ int status;
+
+ if (!abilities)
+ return I40E_ERR_PARAM;
+@@ -1530,14 +1384,14 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
+ * of the PHY Config parameters. This status will be indicated by the
+ * command response.
+ **/
+-enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw,
+- struct i40e_aq_set_phy_config *config,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_phy_config(struct i40e_hw *hw,
++ struct i40e_aq_set_phy_config *config,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aq_set_phy_config *cmd =
+ (struct i40e_aq_set_phy_config *)&desc.params.raw;
+- enum i40e_status_code status;
++ int status;
+
+ if (!config)
+ return I40E_ERR_PARAM;
+@@ -1552,7 +1406,7 @@ enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw,
+ return status;
+ }
+
+-static noinline_for_stack enum i40e_status_code
++static noinline_for_stack int
+ i40e_set_fc_status(struct i40e_hw *hw,
+ struct i40e_aq_get_phy_abilities_resp *abilities,
+ bool atomic_restart)
+@@ -1610,11 +1464,11 @@ i40e_set_fc_status(struct i40e_hw *hw,
+ *
+ * Set the requested flow control mode using set_phy_config.
+ **/
+-enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+- bool atomic_restart)
++int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
++ bool atomic_restart)
+ {
+ struct i40e_aq_get_phy_abilities_resp abilities;
+- enum i40e_status_code status;
++ int status;
+
+ *aq_failures = 0x0;
+
+@@ -1653,13 +1507,13 @@ enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+ *
+ * Tell the firmware that the driver is taking over from PXE
+ **/
+-i40e_status i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+- i40e_status status;
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_clear_pxe *cmd =
+ (struct i40e_aqc_clear_pxe *)&desc.params.raw;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_clear_pxe_mode);
+@@ -1681,14 +1535,14 @@ i40e_status i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ **/
+-i40e_status i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+- bool enable_link,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_link_restart_an(struct i40e_hw *hw,
++ bool enable_link,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_link_restart_an *cmd =
+ (struct i40e_aqc_set_link_restart_an *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_link_restart_an);
+@@ -1713,17 +1567,17 @@ i40e_status i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+ *
+ * Returns the link status of the adapter.
+ **/
+-i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
+- bool enable_lse, struct i40e_link_status *link,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_link_info(struct i40e_hw *hw,
++ bool enable_lse, struct i40e_link_status *link,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_link_status *resp =
+ (struct i40e_aqc_get_link_status *)&desc.params.raw;
+ struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+- i40e_status status;
+ bool tx_pause, rx_pause;
+ u16 command_flags;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status);
+
+@@ -1809,14 +1663,14 @@ aq_get_link_info_exit:
+ *
+ * Set link interrupt mask.
+ **/
+-i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw,
+- u16 mask,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_phy_int_mask(struct i40e_hw *hw,
++ u16 mask,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_phy_int_mask *cmd =
+ (struct i40e_aqc_set_phy_int_mask *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_int_mask);
+@@ -1836,13 +1690,13 @@ i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw,
+ *
+ * Reset the external PHY.
+ **/
+-i40e_status i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_phy_debug *cmd =
+ (struct i40e_aqc_set_phy_debug *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_debug);
+@@ -1877,9 +1731,9 @@ static bool i40e_is_aq_api_ver_ge(struct i40e_adminq_info *aq, u16 maj,
+ *
+ * Add a VSI context to the hardware.
+ **/
+-i40e_status i40e_aq_add_vsi(struct i40e_hw *hw,
+- struct i40e_vsi_context *vsi_ctx,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_add_vsi(struct i40e_hw *hw,
++ struct i40e_vsi_context *vsi_ctx,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_get_update_vsi *cmd =
+@@ -1887,7 +1741,7 @@ i40e_status i40e_aq_add_vsi(struct i40e_hw *hw,
+ struct i40e_aqc_add_get_update_vsi_completion *resp =
+ (struct i40e_aqc_add_get_update_vsi_completion *)
+ &desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_add_vsi);
+@@ -1920,15 +1774,15 @@ aq_add_vsi_exit:
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_default_vsi(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)
+ &desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -1948,15 +1802,15 @@ i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw,
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_clear_default_vsi(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_clear_default_vsi(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)
+ &desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -1978,16 +1832,16 @@ i40e_status i40e_aq_clear_default_vsi(struct i40e_hw *hw,
+ * @cmd_details: pointer to command details structure or NULL
+ * @rx_only_promisc: flag to decide if egress traffic gets mirrored in promisc
+ **/
+-i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+- u16 seid, bool set,
+- struct i40e_asq_cmd_details *cmd_details,
+- bool rx_only_promisc)
++int i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
++ u16 seid, bool set,
++ struct i40e_asq_cmd_details *cmd_details,
++ bool rx_only_promisc)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+- i40e_status status;
+ u16 flags = 0;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -2018,14 +1872,15 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+ * @set: set multicast promiscuous enable/disable
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+- u16 seid, bool set, struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
++ u16 seid, bool set,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+- i40e_status status;
+ u16 flags = 0;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -2051,16 +1906,16 @@ i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+ * @vid: The VLAN tag filter - capture any multicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+- u16 seid, bool enable,
+- u16 vid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
++ u16 seid, bool enable,
++ u16 vid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+- enum i40e_status_code status;
+ u16 flags = 0;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -2086,16 +1941,16 @@ enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+ * @vid: The VLAN tag filter - capture any unicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+- u16 seid, bool enable,
+- u16 vid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
++ u16 seid, bool enable,
++ u16 vid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+- enum i40e_status_code status;
+ u16 flags = 0;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -2127,15 +1982,15 @@ enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+ * @vid: The VLAN tag filter - capture any broadcast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+- u16 seid, bool enable, u16 vid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
++ u16 seid, bool enable, u16 vid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+- i40e_status status;
+ u16 flags = 0;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -2162,14 +2017,14 @@ i40e_status i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+ *
+ * Set or clear the broadcast promiscuous flag (filter) for a given VSI.
+ **/
+-i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+- u16 seid, bool set_filter,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
++ u16 seid, bool set_filter,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -2195,15 +2050,15 @@ i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
+- u16 seid, bool enable,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
++ u16 seid, bool enable,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+- i40e_status status;
+ u16 flags = 0;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+@@ -2225,9 +2080,9 @@ i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
+ * @vsi_ctx: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw,
+- struct i40e_vsi_context *vsi_ctx,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_vsi_params(struct i40e_hw *hw,
++ struct i40e_vsi_context *vsi_ctx,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_get_update_vsi *cmd =
+@@ -2235,7 +2090,7 @@ i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw,
+ struct i40e_aqc_add_get_update_vsi_completion *resp =
+ (struct i40e_aqc_add_get_update_vsi_completion *)
+ &desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_vsi_parameters);
+@@ -2267,9 +2122,9 @@ aq_get_vsi_params_exit:
+ *
+ * Update a VSI context.
+ **/
+-i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
+- struct i40e_vsi_context *vsi_ctx,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_update_vsi_params(struct i40e_hw *hw,
++ struct i40e_vsi_context *vsi_ctx,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_get_update_vsi *cmd =
+@@ -2277,7 +2132,7 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
+ struct i40e_aqc_add_get_update_vsi_completion *resp =
+ (struct i40e_aqc_add_get_update_vsi_completion *)
+ &desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_update_vsi_parameters);
+@@ -2304,15 +2159,15 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
+ *
+ * Fill the buf with switch configuration returned from AdminQ command
+ **/
+-i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
+- struct i40e_aqc_get_switch_config_resp *buf,
+- u16 buf_size, u16 *start_seid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_switch_config(struct i40e_hw *hw,
++ struct i40e_aqc_get_switch_config_resp *buf,
++ u16 buf_size, u16 *start_seid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_switch_seid *scfg =
+ (struct i40e_aqc_switch_seid *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_switch_config);
+@@ -2338,15 +2193,15 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
+ *
+ * Set switch configuration bits
+ **/
+-enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
+- u16 flags,
+- u16 valid_flags, u8 mode,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_switch_config(struct i40e_hw *hw,
++ u16 flags,
++ u16 valid_flags, u8 mode,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_switch_config *scfg =
+ (struct i40e_aqc_set_switch_config *)&desc.params.raw;
+- enum i40e_status_code status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_switch_config);
+@@ -2375,16 +2230,16 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
+ *
+ * Get the firmware version from the admin queue commands
+ **/
+-i40e_status i40e_aq_get_firmware_version(struct i40e_hw *hw,
+- u16 *fw_major_version, u16 *fw_minor_version,
+- u32 *fw_build,
+- u16 *api_major_version, u16 *api_minor_version,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_firmware_version(struct i40e_hw *hw,
++ u16 *fw_major_version, u16 *fw_minor_version,
++ u32 *fw_build,
++ u16 *api_major_version, u16 *api_minor_version,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_version *resp =
+ (struct i40e_aqc_get_version *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_version);
+
+@@ -2414,14 +2269,14 @@ i40e_status i40e_aq_get_firmware_version(struct i40e_hw *hw,
+ *
+ * Send the driver version to the firmware
+ **/
+-i40e_status i40e_aq_send_driver_version(struct i40e_hw *hw,
++int i40e_aq_send_driver_version(struct i40e_hw *hw,
+ struct i40e_driver_version *dv,
+ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_driver_version *cmd =
+ (struct i40e_aqc_driver_version *)&desc.params.raw;
+- i40e_status status;
++ int status;
+ u16 len;
+
+ if (dv == NULL)
+@@ -2456,9 +2311,9 @@ i40e_status i40e_aq_send_driver_version(struct i40e_hw *hw,
+ *
+ * Side effect: LinkStatusEvent reporting becomes enabled
+ **/
+-i40e_status i40e_get_link_status(struct i40e_hw *hw, bool *link_up)
++int i40e_get_link_status(struct i40e_hw *hw, bool *link_up)
+ {
+- i40e_status status = 0;
++ int status = 0;
+
+ if (hw->phy.get_link_info) {
+ status = i40e_update_link_info(hw);
+@@ -2477,10 +2332,10 @@ i40e_status i40e_get_link_status(struct i40e_hw *hw, bool *link_up)
+ * i40e_update_link_info - update status of the HW network link
+ * @hw: pointer to the hw struct
+ **/
+-noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw)
++noinline_for_stack int i40e_update_link_info(struct i40e_hw *hw)
+ {
+ struct i40e_aq_get_phy_abilities_resp abilities;
+- i40e_status status = 0;
++ int status = 0;
+
+ status = i40e_aq_get_link_info(hw, true, NULL, NULL);
+ if (status)
+@@ -2527,19 +2382,19 @@ noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw)
+ * This asks the FW to add a VEB between the uplink and downlink
+ * elements. If the uplink SEID is 0, this will be a floating VEB.
+ **/
+-i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+- u16 downlink_seid, u8 enabled_tc,
+- bool default_port, u16 *veb_seid,
+- bool enable_stats,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
++ u16 downlink_seid, u8 enabled_tc,
++ bool default_port, u16 *veb_seid,
++ bool enable_stats,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_veb *cmd =
+ (struct i40e_aqc_add_veb *)&desc.params.raw;
+ struct i40e_aqc_add_veb_completion *resp =
+ (struct i40e_aqc_add_veb_completion *)&desc.params.raw;
+- i40e_status status;
+ u16 veb_flags = 0;
++ int status;
+
+ /* SEIDs need to either both be set or both be 0 for floating VEB */
+ if (!!uplink_seid != !!downlink_seid)
+@@ -2585,17 +2440,17 @@ i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+ * This retrieves the parameters for a particular VEB, specified by
+ * uplink_seid, and returns them to the caller.
+ **/
+-i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+- u16 veb_seid, u16 *switch_id,
+- bool *floating, u16 *statistic_index,
+- u16 *vebs_used, u16 *vebs_free,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_veb_parameters(struct i40e_hw *hw,
++ u16 veb_seid, u16 *switch_id,
++ bool *floating, u16 *statistic_index,
++ u16 *vebs_used, u16 *vebs_free,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_veb_parameters_completion *cmd_resp =
+ (struct i40e_aqc_get_veb_parameters_completion *)
+ &desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (veb_seid == 0)
+ return I40E_ERR_PARAM;
+@@ -2639,14 +2494,14 @@ get_veb_exit:
+ *
+ * Add MAC/VLAN addresses to the HW filtering
+ **/
+-i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
++int i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_add_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_macvlan *cmd =
+ (struct i40e_aqc_macvlan *)&desc.params.raw;
+- i40e_status status;
++ int status;
+ u16 buf_size;
+ int i;
+
+@@ -2687,15 +2542,16 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
+ *
+ * Remove MAC/VLAN addresses from the HW filtering
+ **/
+-i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
+- struct i40e_aqc_remove_macvlan_element_data *mv_list,
+- u16 count, struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_remove_macvlan_element_data *mv_list,
++ u16 count, struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_macvlan *cmd =
+ (struct i40e_aqc_macvlan *)&desc.params.raw;
+- i40e_status status;
+ u16 buf_size;
++ int status;
+
+ if (count == 0 || !mv_list || !hw)
+ return I40E_ERR_PARAM;
+@@ -2736,19 +2592,19 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
+ * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for
+ * VEBs/VEPA elements only
+ **/
+-static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw,
+- u16 opcode, u16 sw_seid, u16 rule_type, u16 id,
+- u16 count, __le16 *mr_list,
+- struct i40e_asq_cmd_details *cmd_details,
+- u16 *rule_id, u16 *rules_used, u16 *rules_free)
++static int i40e_mirrorrule_op(struct i40e_hw *hw,
++ u16 opcode, u16 sw_seid, u16 rule_type, u16 id,
++ u16 count, __le16 *mr_list,
++ struct i40e_asq_cmd_details *cmd_details,
++ u16 *rule_id, u16 *rules_used, u16 *rules_free)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_delete_mirror_rule *cmd =
+ (struct i40e_aqc_add_delete_mirror_rule *)&desc.params.raw;
+ struct i40e_aqc_add_delete_mirror_rule_completion *resp =
+ (struct i40e_aqc_add_delete_mirror_rule_completion *)&desc.params.raw;
+- i40e_status status;
+ u16 buf_size;
++ int status;
+
+ buf_size = count * sizeof(*mr_list);
+
+@@ -2796,10 +2652,11 @@ static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw,
+ *
+ * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only
+ **/
+-i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+- u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list,
+- struct i40e_asq_cmd_details *cmd_details,
+- u16 *rule_id, u16 *rules_used, u16 *rules_free)
++int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
++ u16 rule_type, u16 dest_vsi, u16 count,
++ __le16 *mr_list,
++ struct i40e_asq_cmd_details *cmd_details,
++ u16 *rule_id, u16 *rules_used, u16 *rules_free)
+ {
+ if (!(rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS ||
+ rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS)) {
+@@ -2827,10 +2684,11 @@ i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+ *
+ * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only
+ **/
+-i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+- u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list,
+- struct i40e_asq_cmd_details *cmd_details,
+- u16 *rules_used, u16 *rules_free)
++int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
++ u16 rule_type, u16 rule_id, u16 count,
++ __le16 *mr_list,
++ struct i40e_asq_cmd_details *cmd_details,
++ u16 *rules_used, u16 *rules_free)
+ {
+ /* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */
+ if (rule_type == I40E_AQC_MIRROR_RULE_TYPE_VLAN) {
+@@ -2859,14 +2717,14 @@ i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+ *
+ * send msg to vf
+ **/
+-i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+- u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
++ u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_pf_vf_message *cmd =
+ (struct i40e_aqc_pf_vf_message *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_vf);
+ cmd->id = cpu_to_le32(vfid);
+@@ -2894,14 +2752,14 @@ i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+ *
+ * Read the register using the admin queue commands
+ **/
+-i40e_status i40e_aq_debug_read_register(struct i40e_hw *hw,
++int i40e_aq_debug_read_register(struct i40e_hw *hw,
+ u32 reg_addr, u64 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_debug_reg_read_write *cmd_resp =
+ (struct i40e_aqc_debug_reg_read_write *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (reg_val == NULL)
+ return I40E_ERR_PARAM;
+@@ -2929,14 +2787,14 @@ i40e_status i40e_aq_debug_read_register(struct i40e_hw *hw,
+ *
+ * Write to a register using the admin queue commands
+ **/
+-i40e_status i40e_aq_debug_write_register(struct i40e_hw *hw,
+- u32 reg_addr, u64 reg_val,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_debug_write_register(struct i40e_hw *hw,
++ u32 reg_addr, u64 reg_val,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_debug_reg_read_write *cmd =
+ (struct i40e_aqc_debug_reg_read_write *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_debug_write_reg);
+
+@@ -2960,16 +2818,16 @@ i40e_status i40e_aq_debug_write_register(struct i40e_hw *hw,
+ *
+ * requests common resource using the admin queue commands
+ **/
+-i40e_status i40e_aq_request_resource(struct i40e_hw *hw,
+- enum i40e_aq_resources_ids resource,
+- enum i40e_aq_resource_access_type access,
+- u8 sdp_number, u64 *timeout,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_request_resource(struct i40e_hw *hw,
++ enum i40e_aq_resources_ids resource,
++ enum i40e_aq_resource_access_type access,
++ u8 sdp_number, u64 *timeout,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_request_resource *cmd_resp =
+ (struct i40e_aqc_request_resource *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_request_resource);
+
+@@ -2999,15 +2857,15 @@ i40e_status i40e_aq_request_resource(struct i40e_hw *hw,
+ *
+ * release common resource using the admin queue commands
+ **/
+-i40e_status i40e_aq_release_resource(struct i40e_hw *hw,
+- enum i40e_aq_resources_ids resource,
+- u8 sdp_number,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_release_resource(struct i40e_hw *hw,
++ enum i40e_aq_resources_ids resource,
++ u8 sdp_number,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_request_resource *cmd =
+ (struct i40e_aqc_request_resource *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_release_resource);
+
+@@ -3031,15 +2889,15 @@ i40e_status i40e_aq_release_resource(struct i40e_hw *hw,
+ *
+ * Read the NVM using the admin queue commands
+ **/
+-i40e_status i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+- u32 offset, u16 length, void *data,
+- bool last_command,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
++ u32 offset, u16 length, void *data,
++ bool last_command,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_update *cmd =
+ (struct i40e_aqc_nvm_update *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000) {
+@@ -3077,14 +2935,14 @@ i40e_aq_read_nvm_exit:
+ *
+ * Erase the NVM sector using the admin queue commands
+ **/
+-i40e_status i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
+- u32 offset, u16 length, bool last_command,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
++ u32 offset, u16 length, bool last_command,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_update *cmd =
+ (struct i40e_aqc_nvm_update *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000) {
+@@ -3125,8 +2983,8 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
+ u32 number, logical_id, phys_id;
+ struct i40e_hw_capabilities *p;
+ u16 id, ocp_cfg_word0;
+- i40e_status status;
+ u8 major_rev;
++ int status;
+ u32 i = 0;
+
+ cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
+@@ -3367,14 +3225,14 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
+ *
+ * Get the device capabilities descriptions from the firmware
+ **/
+-i40e_status i40e_aq_discover_capabilities(struct i40e_hw *hw,
+- void *buff, u16 buff_size, u16 *data_size,
+- enum i40e_admin_queue_opc list_type_opc,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_discover_capabilities(struct i40e_hw *hw,
++ void *buff, u16 buff_size, u16 *data_size,
++ enum i40e_admin_queue_opc list_type_opc,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aqc_list_capabilites *cmd;
+ struct i40e_aq_desc desc;
+- i40e_status status = 0;
++ int status = 0;
+
+ cmd = (struct i40e_aqc_list_capabilites *)&desc.params.raw;
+
+@@ -3416,15 +3274,15 @@ exit:
+ *
+ * Update the NVM using the admin queue commands
+ **/
+-i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
+- u32 offset, u16 length, void *data,
+- bool last_command, u8 preservation_flags,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
++ u32 offset, u16 length, void *data,
++ bool last_command, u8 preservation_flags,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_update *cmd =
+ (struct i40e_aqc_nvm_update *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000) {
+@@ -3469,13 +3327,13 @@ i40e_aq_update_nvm_exit:
+ *
+ * Rearrange NVM structure, available only for transition FW
+ **/
+-i40e_status i40e_aq_rearrange_nvm(struct i40e_hw *hw,
+- u8 rearrange_nvm,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_rearrange_nvm(struct i40e_hw *hw,
++ u8 rearrange_nvm,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aqc_nvm_update *cmd;
+- i40e_status status;
+ struct i40e_aq_desc desc;
++ int status;
+
+ cmd = (struct i40e_aqc_nvm_update *)&desc.params.raw;
+
+@@ -3509,17 +3367,17 @@ i40e_aq_rearrange_nvm_exit:
+ *
+ * Requests the complete LLDP MIB (entire packet).
+ **/
+-i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+- u8 mib_type, void *buff, u16 buff_size,
+- u16 *local_len, u16 *remote_len,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
++ u8 mib_type, void *buff, u16 buff_size,
++ u16 *local_len, u16 *remote_len,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_get_mib *cmd =
+ (struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+ struct i40e_aqc_lldp_get_mib *resp =
+ (struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+@@ -3559,14 +3417,14 @@ i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+ *
+ * Set the LLDP MIB.
+ **/
+-enum i40e_status_code
++int
+ i40e_aq_set_lldp_mib(struct i40e_hw *hw,
+ u8 mib_type, void *buff, u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aqc_lldp_set_local_mib *cmd;
+- enum i40e_status_code status;
+ struct i40e_aq_desc desc;
++ int status;
+
+ cmd = (struct i40e_aqc_lldp_set_local_mib *)&desc.params.raw;
+ if (buff_size == 0 || !buff)
+@@ -3598,14 +3456,14 @@ i40e_aq_set_lldp_mib(struct i40e_hw *hw,
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes
+ **/
+-i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+- bool enable_update,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
++ bool enable_update,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_update_mib *cmd =
+ (struct i40e_aqc_lldp_update_mib *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_update_mib);
+
+@@ -3627,14 +3485,14 @@ i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+ * Restore LLDP Agent factory settings if @restore set to True. In other case
+ * only returns factory setting in AQ response.
+ **/
+-enum i40e_status_code
++int
+ i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
+ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_restore *cmd =
+ (struct i40e_aqc_lldp_restore *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)) {
+ i40e_debug(hw, I40E_DEBUG_ALL,
+@@ -3664,14 +3522,14 @@ i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
+ *
+ * Stop or Shutdown the embedded LLDP Agent
+ **/
+-i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+- bool persist,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
++ bool persist,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_stop *cmd =
+ (struct i40e_aqc_lldp_stop *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_stop);
+
+@@ -3699,13 +3557,13 @@ i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+ *
+ * Start the embedded LLDP Agent on all ports.
+ **/
+-i40e_status i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_start *cmd =
+ (struct i40e_aqc_lldp_start *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_start);
+
+@@ -3731,14 +3589,14 @@ i40e_status i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
+ * @dcb_enable: True if DCB configuration needs to be applied
+ *
+ **/
+-enum i40e_status_code
++int
+ i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
+ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_dcb_parameters *cmd =
+ (struct i40e_aqc_set_dcb_parameters *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+ return I40E_ERR_DEVICE_NOT_SUPPORTED;
+@@ -3764,12 +3622,12 @@ i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
+ *
+ * Get CEE DCBX mode operational configuration from firmware
+ **/
+-i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+- void *buff, u16 buff_size,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
++ void *buff, u16 buff_size,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+- i40e_status status;
++ int status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+@@ -3795,17 +3653,17 @@ i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+ * and this function will call cpu_to_le16 to convert from Host byte order to
+ * Little Endian order.
+ **/
+-i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
+- u16 udp_port, u8 protocol_index,
+- u8 *filter_index,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
++ u16 udp_port, u8 protocol_index,
++ u8 *filter_index,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_udp_tunnel *cmd =
+ (struct i40e_aqc_add_udp_tunnel *)&desc.params.raw;
+ struct i40e_aqc_del_udp_tunnel_completion *resp =
+ (struct i40e_aqc_del_udp_tunnel_completion *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_udp_tunnel);
+
+@@ -3826,13 +3684,13 @@ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
+ * @index: filter index
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_remove_udp_tunnel *cmd =
+ (struct i40e_aqc_remove_udp_tunnel *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_del_udp_tunnel);
+
+@@ -3851,13 +3709,13 @@ i40e_status i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
+ *
+ * This deletes a switch element from the switch.
+ **/
+-i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_switch_seid *cmd =
+ (struct i40e_aqc_switch_seid *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (seid == 0)
+ return I40E_ERR_PARAM;
+@@ -3880,11 +3738,11 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+ * recomputed and modified. The retval field in the descriptor
+ * will be set to 0 when RPB is modified.
+ **/
+-i40e_status i40e_aq_dcb_updated(struct i40e_hw *hw,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_dcb_updated(struct i40e_hw *hw,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_dcb_updated);
+
+@@ -3904,15 +3762,15 @@ i40e_status i40e_aq_dcb_updated(struct i40e_hw *hw,
+ *
+ * Generic command handler for Tx scheduler AQ commands
+ **/
+-static i40e_status i40e_aq_tx_sched_cmd(struct i40e_hw *hw, u16 seid,
++static int i40e_aq_tx_sched_cmd(struct i40e_hw *hw, u16 seid,
+ void *buff, u16 buff_size,
+- enum i40e_admin_queue_opc opcode,
++ enum i40e_admin_queue_opc opcode,
+ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_tx_sched_ind *cmd =
+ (struct i40e_aqc_tx_sched_ind *)&desc.params.raw;
+- i40e_status status;
++ int status;
+ bool cmd_param_flag = false;
+
+ switch (opcode) {
+@@ -3962,14 +3820,14 @@ static i40e_status i40e_aq_tx_sched_cmd(struct i40e_hw *hw, u16 seid,
+ * @max_credit: Max BW limit credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
++int i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+ u16 seid, u16 credit, u8 max_credit,
+ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_configure_vsi_bw_limit *cmd =
+ (struct i40e_aqc_configure_vsi_bw_limit *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_configure_vsi_bw_limit);
+@@ -3990,10 +3848,10 @@ i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+ * @bw_data: Buffer holding enabled TCs, relative TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_configure_vsi_tc_bw,
+@@ -4008,11 +3866,12 @@ i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
+ * @opcode: Tx scheduler AQ command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_configure_switching_comp_ets_data *ets_data,
+- enum i40e_admin_queue_opc opcode,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_aqc_configure_switching_comp_ets_data *ets_data,
++ enum i40e_admin_queue_opc opcode,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)ets_data,
+ sizeof(*ets_data), opcode, cmd_details);
+@@ -4025,7 +3884,8 @@ i40e_status i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
+ * @bw_data: Buffer holding enabled TCs, relative/absolute TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_config_switch_comp_bw_config(struct i40e_hw *hw,
++int
++i40e_aq_config_switch_comp_bw_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_configure_switching_comp_bw_config_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+@@ -4042,10 +3902,11 @@ i40e_status i40e_aq_config_switch_comp_bw_config(struct i40e_hw *hw,
+ * @bw_data: Buffer to hold VSI BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_vsi_bw_config,
+@@ -4059,10 +3920,11 @@ i40e_status i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+ * @bw_data: Buffer to hold VSI BW configuration per TC
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_vsi_ets_sla_config,
+@@ -4076,10 +3938,11 @@ i40e_status i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+ * @bw_data: Buffer to hold switching component's per TC BW config
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_switching_comp_ets_config,
+@@ -4093,10 +3956,11 @@ i40e_status i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+ * @bw_data: Buffer to hold current ETS configuration for the Physical Port
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_port_ets_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_query_port_ets_config(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_aqc_query_port_ets_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_port_ets_config,
+@@ -4110,10 +3974,11 @@ i40e_status i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+ * @bw_data: Buffer to hold switching component's BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
++ u16 seid,
++ struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_switching_comp_bw_config,
+@@ -4132,8 +3997,9 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+ * Returns 0 if the values passed are valid and within
+ * range else returns an error.
+ **/
+-static i40e_status i40e_validate_filter_settings(struct i40e_hw *hw,
+- struct i40e_filter_control_settings *settings)
++static int
++i40e_validate_filter_settings(struct i40e_hw *hw,
++ struct i40e_filter_control_settings *settings)
+ {
+ u32 fcoe_cntx_size, fcoe_filt_size;
+ u32 pe_cntx_size, pe_filt_size;
+@@ -4224,11 +4090,11 @@ static i40e_status i40e_validate_filter_settings(struct i40e_hw *hw,
+ * for a single PF. It is expected that these settings are programmed
+ * at the driver initialization time.
+ **/
+-i40e_status i40e_set_filter_control(struct i40e_hw *hw,
+- struct i40e_filter_control_settings *settings)
++int i40e_set_filter_control(struct i40e_hw *hw,
++ struct i40e_filter_control_settings *settings)
+ {
+- i40e_status ret = 0;
+ u32 hash_lut_size = 0;
++ int ret = 0;
+ u32 val;
+
+ if (!settings)
+@@ -4298,11 +4164,11 @@ i40e_status i40e_set_filter_control(struct i40e_hw *hw,
+ * In return it will update the total number of perfect filter count in
+ * the stats member.
+ **/
+-i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
+- u8 *mac_addr, u16 ethtype, u16 flags,
+- u16 vsi_seid, u16 queue, bool is_add,
+- struct i40e_control_filter_stats *stats,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
++ u8 *mac_addr, u16 ethtype, u16 flags,
++ u16 vsi_seid, u16 queue, bool is_add,
++ struct i40e_control_filter_stats *stats,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_control_packet_filter *cmd =
+@@ -4311,7 +4177,7 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
+ struct i40e_aqc_add_remove_control_packet_filter_completion *resp =
+ (struct i40e_aqc_add_remove_control_packet_filter_completion *)
+ &desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (vsi_seid == 0)
+ return I40E_ERR_PARAM;
+@@ -4357,7 +4223,7 @@ void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
+ I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP |
+ I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX;
+ u16 ethtype = I40E_FLOW_CONTROL_ETHTYPE;
+- i40e_status status;
++ int status;
+
+ status = i40e_aq_add_rem_control_packet_filter(hw, NULL, ethtype, flag,
+ seid, 0, true, NULL,
+@@ -4379,14 +4245,14 @@ void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
+ * is not passed then only register at 'reg_addr0' is read.
+ *
+ **/
+-static i40e_status i40e_aq_alternate_read(struct i40e_hw *hw,
+- u32 reg_addr0, u32 *reg_val0,
+- u32 reg_addr1, u32 *reg_val1)
++static int i40e_aq_alternate_read(struct i40e_hw *hw,
++ u32 reg_addr0, u32 *reg_val0,
++ u32 reg_addr1, u32 *reg_val1)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_alternate_write *cmd_resp =
+ (struct i40e_aqc_alternate_write *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (!reg_val0)
+ return I40E_ERR_PARAM;
+@@ -4415,12 +4281,12 @@ static i40e_status i40e_aq_alternate_read(struct i40e_hw *hw,
+ *
+ * Suspend port's Tx traffic
+ **/
+-i40e_status i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aqc_tx_sched_ind *cmd;
+ struct i40e_aq_desc desc;
+- i40e_status status;
++ int status;
+
+ cmd = (struct i40e_aqc_tx_sched_ind *)&desc.params.raw;
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_suspend_port_tx);
+@@ -4437,11 +4303,11 @@ i40e_status i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid,
+ *
+ * Resume port's Tx traffic
+ **/
+-i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_resume_port_tx(struct i40e_hw *hw,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_resume_port_tx);
+
+@@ -4511,18 +4377,18 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status)
+ * Dump internal FW/HW data for debug purposes.
+ *
+ **/
+-i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
+- u8 table_id, u32 start_index, u16 buff_size,
+- void *buff, u16 *ret_buff_size,
+- u8 *ret_next_table, u32 *ret_next_index,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
++ u8 table_id, u32 start_index, u16 buff_size,
++ void *buff, u16 *ret_buff_size,
++ u8 *ret_next_table, u32 *ret_next_index,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_debug_dump_internals *cmd =
+ (struct i40e_aqc_debug_dump_internals *)&desc.params.raw;
+ struct i40e_aqc_debug_dump_internals *resp =
+ (struct i40e_aqc_debug_dump_internals *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+@@ -4563,12 +4429,12 @@ i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
+ *
+ * Read bw from the alternate ram for the given pf
+ **/
+-i40e_status i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
+- u32 *max_bw, u32 *min_bw,
+- bool *min_valid, bool *max_valid)
++int i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
++ u32 *max_bw, u32 *min_bw,
++ bool *min_valid, bool *max_valid)
+ {
+- i40e_status status;
+ u32 max_bw_addr, min_bw_addr;
++ int status;
+
+ /* Calculate the address of the min/max bw registers */
+ max_bw_addr = I40E_ALT_STRUCT_FIRST_PF_OFFSET +
+@@ -4603,13 +4469,14 @@ i40e_status i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
+ *
+ * Configure partitions guaranteed/max bw
+ **/
+-i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw,
+- struct i40e_aqc_configure_partition_bw_data *bw_data,
+- struct i40e_asq_cmd_details *cmd_details)
++int
++i40e_aq_configure_partition_bw(struct i40e_hw *hw,
++ struct i40e_aqc_configure_partition_bw_data *bw_data,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+- i40e_status status;
+- struct i40e_aq_desc desc;
+ u16 bwd_size = sizeof(*bw_data);
++ struct i40e_aq_desc desc;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_configure_partition_bw);
+@@ -4638,11 +4505,11 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw,
+ *
+ * Reads specified PHY register value
+ **/
+-i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
+- u16 reg, u8 phy_addr, u16 *value)
++int i40e_read_phy_register_clause22(struct i40e_hw *hw,
++ u16 reg, u8 phy_addr, u16 *value)
+ {
+- i40e_status status = I40E_ERR_TIMEOUT;
+ u8 port_num = (u8)hw->func_caps.mdio_port_num;
++ int status = I40E_ERR_TIMEOUT;
+ u32 command = 0;
+ u16 retry = 1000;
+
+@@ -4683,11 +4550,11 @@ i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
+ *
+ * Writes specified PHY register value
+ **/
+-i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
+- u16 reg, u8 phy_addr, u16 value)
++int i40e_write_phy_register_clause22(struct i40e_hw *hw,
++ u16 reg, u8 phy_addr, u16 value)
+ {
+- i40e_status status = I40E_ERR_TIMEOUT;
+ u8 port_num = (u8)hw->func_caps.mdio_port_num;
++ int status = I40E_ERR_TIMEOUT;
+ u32 command = 0;
+ u16 retry = 1000;
+
+@@ -4724,13 +4591,13 @@ i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
+ *
+ * Reads specified PHY register value
+ **/
+-i40e_status i40e_read_phy_register_clause45(struct i40e_hw *hw,
+- u8 page, u16 reg, u8 phy_addr, u16 *value)
++int i40e_read_phy_register_clause45(struct i40e_hw *hw,
++ u8 page, u16 reg, u8 phy_addr, u16 *value)
+ {
+- i40e_status status = I40E_ERR_TIMEOUT;
++ u8 port_num = hw->func_caps.mdio_port_num;
++ int status = I40E_ERR_TIMEOUT;
+ u32 command = 0;
+ u16 retry = 1000;
+- u8 port_num = hw->func_caps.mdio_port_num;
+
+ command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
+ (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+@@ -4798,13 +4665,13 @@ phy_read_end:
+ *
+ * Writes value to specified PHY register
+ **/
+-i40e_status i40e_write_phy_register_clause45(struct i40e_hw *hw,
+- u8 page, u16 reg, u8 phy_addr, u16 value)
++int i40e_write_phy_register_clause45(struct i40e_hw *hw,
++ u8 page, u16 reg, u8 phy_addr, u16 value)
+ {
+- i40e_status status = I40E_ERR_TIMEOUT;
+- u32 command = 0;
+- u16 retry = 1000;
+ u8 port_num = hw->func_caps.mdio_port_num;
++ int status = I40E_ERR_TIMEOUT;
++ u16 retry = 1000;
++ u32 command = 0;
+
+ command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
+ (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+@@ -4865,10 +4732,10 @@ phy_write_end:
+ *
+ * Writes value to specified PHY register
+ **/
+-i40e_status i40e_write_phy_register(struct i40e_hw *hw,
+- u8 page, u16 reg, u8 phy_addr, u16 value)
++int i40e_write_phy_register(struct i40e_hw *hw,
++ u8 page, u16 reg, u8 phy_addr, u16 value)
+ {
+- i40e_status status;
++ int status;
+
+ switch (hw->device_id) {
+ case I40E_DEV_ID_1G_BASE_T_X722:
+@@ -4903,10 +4770,10 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw,
+ *
+ * Reads specified PHY register value
+ **/
+-i40e_status i40e_read_phy_register(struct i40e_hw *hw,
+- u8 page, u16 reg, u8 phy_addr, u16 *value)
++int i40e_read_phy_register(struct i40e_hw *hw,
++ u8 page, u16 reg, u8 phy_addr, u16 *value)
+ {
+- i40e_status status;
++ int status;
+
+ switch (hw->device_id) {
+ case I40E_DEV_ID_1G_BASE_T_X722:
+@@ -4954,17 +4821,17 @@ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num)
+ *
+ * Blinks PHY link LED
+ **/
+-i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
+- u32 time, u32 interval)
++int i40e_blink_phy_link_led(struct i40e_hw *hw,
++ u32 time, u32 interval)
+ {
+- i40e_status status = 0;
+- u32 i;
+- u16 led_ctl;
+- u16 gpio_led_port;
+- u16 led_reg;
+ u16 led_addr = I40E_PHY_LED_PROV_REG_1;
++ u16 gpio_led_port;
+ u8 phy_addr = 0;
++ int status = 0;
++ u16 led_ctl;
+ u8 port_num;
++ u16 led_reg;
++ u32 i;
+
+ i = rd32(hw, I40E_PFGEN_PORTNUM);
+ port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+@@ -5026,12 +4893,12 @@ phy_blinking_end:
+ * @led_addr: LED register address
+ * @reg_val: read register value
+ **/
+-static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
+- u32 *reg_val)
++static int i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
++ u32 *reg_val)
+ {
+- enum i40e_status_code status;
+ u8 phy_addr = 0;
+ u8 port_num;
++ int status;
+ u32 i;
+
+ *reg_val = 0;
+@@ -5060,12 +4927,12 @@ static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
+ * @led_addr: LED register address
+ * @reg_val: register value to write
+ **/
+-static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
+- u32 reg_val)
++static int i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
++ u32 reg_val)
+ {
+- enum i40e_status_code status;
+ u8 phy_addr = 0;
+ u8 port_num;
++ int status;
+ u32 i;
+
+ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+@@ -5095,17 +4962,17 @@ static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
+ * @val: original value of register to use
+ *
+ **/
+-i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
+- u16 *val)
++int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
++ u16 *val)
+ {
+- i40e_status status = 0;
+ u16 gpio_led_port;
+ u8 phy_addr = 0;
+- u16 reg_val;
++ u32 reg_val_aq;
++ int status = 0;
+ u16 temp_addr;
++ u16 reg_val;
+ u8 port_num;
+ u32 i;
+- u32 reg_val_aq;
+
+ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ status =
+@@ -5150,12 +5017,12 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
+ * Set led's on or off when controlled by the PHY
+ *
+ **/
+-i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on,
+- u16 led_addr, u32 mode)
++int i40e_led_set_phy(struct i40e_hw *hw, bool on,
++ u16 led_addr, u32 mode)
+ {
+- i40e_status status = 0;
+ u32 led_ctl = 0;
+ u32 led_reg = 0;
++ int status = 0;
+
+ status = i40e_led_get_reg(hw, led_addr, &led_reg);
+ if (status)
+@@ -5199,14 +5066,14 @@ restore_config:
+ * Use the firmware to read the Rx control register,
+ * especially useful if the Rx unit is under heavy pressure
+ **/
+-i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
+- u32 reg_addr, u32 *reg_val,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
++ u32 reg_addr, u32 *reg_val,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp =
+ (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ if (!reg_val)
+ return I40E_ERR_PARAM;
+@@ -5230,8 +5097,8 @@ i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
+ **/
+ u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
+ {
+- i40e_status status = 0;
+ bool use_register;
++ int status = 0;
+ int retry = 5;
+ u32 val = 0;
+
+@@ -5265,14 +5132,14 @@ do_retry:
+ * Use the firmware to write to an Rx control register,
+ * especially useful if the Rx unit is under heavy pressure
+ **/
+-i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
+- u32 reg_addr, u32 reg_val,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
++ u32 reg_addr, u32 reg_val,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_rx_ctl_reg_read_write *cmd =
+ (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_write);
+
+@@ -5292,8 +5159,8 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
+ **/
+ void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
+ {
+- i40e_status status = 0;
+ bool use_register;
++ int status = 0;
+ int retry = 5;
+
+ use_register = (((hw->aq.api_maj_ver == 1) &&
+@@ -5355,16 +5222,16 @@ static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio,
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_set_phy_register.
+ **/
+-enum i40e_status_code i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+- u8 phy_select, u8 dev_addr, bool page_change,
+- bool set_mdio, u8 mdio_num,
+- u32 reg_addr, u32 reg_val,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
++ u8 phy_select, u8 dev_addr, bool page_change,
++ bool set_mdio, u8 mdio_num,
++ u32 reg_addr, u32 reg_val,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_phy_register_access *cmd =
+ (struct i40e_aqc_phy_register_access *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_register);
+@@ -5400,16 +5267,16 @@ enum i40e_status_code i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_get_phy_register.
+ **/
+-enum i40e_status_code i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+- u8 phy_select, u8 dev_addr, bool page_change,
+- bool set_mdio, u8 mdio_num,
+- u32 reg_addr, u32 *reg_val,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
++ u8 phy_select, u8 dev_addr, bool page_change,
++ bool set_mdio, u8 mdio_num,
++ u32 reg_addr, u32 *reg_val,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_phy_register_access *cmd =
+ (struct i40e_aqc_phy_register_access *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_phy_register);
+@@ -5440,18 +5307,17 @@ enum i40e_status_code i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+ * @error_info: returns error information
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-enum
+-i40e_status_code i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
+- u16 buff_size, u32 track_id,
+- u32 *error_offset, u32 *error_info,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
++ u16 buff_size, u32 track_id,
++ u32 *error_offset, u32 *error_info,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_write_personalization_profile *cmd =
+ (struct i40e_aqc_write_personalization_profile *)
+ &desc.params.raw;
+ struct i40e_aqc_write_ddp_resp *resp;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_write_personalization_profile);
+@@ -5484,15 +5350,14 @@ i40e_status_code i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
+ * @flags: AdminQ command flags
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+-enum
+-i40e_status_code i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
+- u16 buff_size, u8 flags,
+- struct i40e_asq_cmd_details *cmd_details)
++int i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
++ u16 buff_size, u8 flags,
++ struct i40e_asq_cmd_details *cmd_details)
+ {
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_applied_profiles *cmd =
+ (struct i40e_aqc_get_applied_profiles *)&desc.params.raw;
+- i40e_status status;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_personalization_profile_list);
+@@ -5591,14 +5456,13 @@ i40e_find_section_in_profile(u32 section_type,
+ * @hw: pointer to the hw struct
+ * @aq: command buffer containing all data to execute AQ
+ **/
+-static enum
+-i40e_status_code i40e_ddp_exec_aq_section(struct i40e_hw *hw,
+- struct i40e_profile_aq_section *aq)
++static int i40e_ddp_exec_aq_section(struct i40e_hw *hw,
++ struct i40e_profile_aq_section *aq)
+ {
+- i40e_status status;
+ struct i40e_aq_desc desc;
+ u8 *msg = NULL;
+ u16 msglen;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, aq->opcode);
+ desc.flags |= cpu_to_le16(aq->flags);
+@@ -5638,14 +5502,14 @@ i40e_status_code i40e_ddp_exec_aq_section(struct i40e_hw *hw,
+ *
+ * Validates supported devices and profile's sections.
+ */
+-static enum i40e_status_code
++static int
+ i40e_validate_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u32 track_id, bool rollback)
+ {
+ struct i40e_profile_section_header *sec = NULL;
+- i40e_status status = 0;
+ struct i40e_section_table *sec_tbl;
+ u32 vendor_dev_id;
++ int status = 0;
+ u32 dev_cnt;
+ u32 sec_off;
+ u32 i;
+@@ -5703,16 +5567,16 @@ i40e_validate_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ *
+ * Handles the download of a complete package.
+ */
+-enum i40e_status_code
++int
+ i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u32 track_id)
+ {
+- i40e_status status = 0;
+- struct i40e_section_table *sec_tbl;
+ struct i40e_profile_section_header *sec = NULL;
+ struct i40e_profile_aq_section *ddp_aq;
+- u32 section_size = 0;
++ struct i40e_section_table *sec_tbl;
+ u32 offset = 0, info = 0;
++ u32 section_size = 0;
++ int status = 0;
+ u32 sec_off;
+ u32 i;
+
+@@ -5766,15 +5630,15 @@ i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ *
+ * Rolls back previously loaded package.
+ */
+-enum i40e_status_code
++int
+ i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u32 track_id)
+ {
+ struct i40e_profile_section_header *sec = NULL;
+- i40e_status status = 0;
+ struct i40e_section_table *sec_tbl;
+ u32 offset = 0, info = 0;
+ u32 section_size = 0;
++ int status = 0;
+ u32 sec_off;
+ int i;
+
+@@ -5818,15 +5682,15 @@ i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ *
+ * Register a profile to the list of loaded profiles.
+ */
+-enum i40e_status_code
++int
+ i40e_add_pinfo_to_list(struct i40e_hw *hw,
+ struct i40e_profile_segment *profile,
+ u8 *profile_info_sec, u32 track_id)
+ {
+- i40e_status status = 0;
+ struct i40e_profile_section_header *sec = NULL;
+ struct i40e_profile_info *pinfo;
+ u32 offset = 0, info = 0;
++ int status = 0;
+
+ sec = (struct i40e_profile_section_header *)profile_info_sec;
+ sec->tbl_size = 1;
+@@ -5860,7 +5724,7 @@ i40e_add_pinfo_to_list(struct i40e_hw *hw,
+ * of the function.
+ *
+ **/
+-enum i40e_status_code
++int
+ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_cloud_filters_element_data *filters,
+ u8 filter_count)
+@@ -5868,8 +5732,8 @@ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_cloud_filters *cmd =
+ (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+- enum i40e_status_code status;
+ u16 buff_len;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_add_cloud_filters);
+@@ -5897,7 +5761,7 @@ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
+ * function.
+ *
+ **/
+-enum i40e_status_code
++int
+ i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_cloud_filters_element_bb *filters,
+ u8 filter_count)
+@@ -5905,8 +5769,8 @@ i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_cloud_filters *cmd =
+ (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+- i40e_status status;
+ u16 buff_len;
++ int status;
+ int i;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+@@ -5954,7 +5818,7 @@ i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+ * of the function.
+ *
+ **/
+-enum i40e_status_code
++int
+ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_cloud_filters_element_data *filters,
+ u8 filter_count)
+@@ -5962,8 +5826,8 @@ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_cloud_filters *cmd =
+ (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+- enum i40e_status_code status;
+ u16 buff_len;
++ int status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_remove_cloud_filters);
+@@ -5991,7 +5855,7 @@ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
+ * function.
+ *
+ **/
+-enum i40e_status_code
++int
+ i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_cloud_filters_element_bb *filters,
+ u8 filter_count)
+@@ -5999,8 +5863,8 @@ i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_cloud_filters *cmd =
+ (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+- i40e_status status;
+ u16 buff_len;
++ int status;
+ int i;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+index 673f341f4c0c1..90638b67f8dc8 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+@@ -12,7 +12,7 @@
+ *
+ * Get the DCBX status from the Firmware
+ **/
+-i40e_status i40e_get_dcbx_status(struct i40e_hw *hw, u16 *status)
++int i40e_get_dcbx_status(struct i40e_hw *hw, u16 *status)
+ {
+ u32 reg;
+
+@@ -497,15 +497,15 @@ static void i40e_parse_org_tlv(struct i40e_lldp_org_tlv *tlv,
+ *
+ * Parse DCB configuration from the LLDPDU
+ **/
+-i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib,
+- struct i40e_dcbx_config *dcbcfg)
++int i40e_lldp_to_dcb_config(u8 *lldpmib,
++ struct i40e_dcbx_config *dcbcfg)
+ {
+- i40e_status ret = 0;
+ struct i40e_lldp_org_tlv *tlv;
+- u16 type;
+- u16 length;
+ u16 typelength;
+ u16 offset = 0;
++ int ret = 0;
++ u16 length;
++ u16 type;
+
+ if (!lldpmib || !dcbcfg)
+ return I40E_ERR_PARAM;
+@@ -551,12 +551,12 @@ i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib,
+ *
+ * Query DCB configuration from the Firmware
+ **/
+-i40e_status i40e_aq_get_dcb_config(struct i40e_hw *hw, u8 mib_type,
+- u8 bridgetype,
+- struct i40e_dcbx_config *dcbcfg)
++int i40e_aq_get_dcb_config(struct i40e_hw *hw, u8 mib_type,
++ u8 bridgetype,
++ struct i40e_dcbx_config *dcbcfg)
+ {
+- i40e_status ret = 0;
+ struct i40e_virt_mem mem;
++ int ret = 0;
+ u8 *lldpmib;
+
+ /* Allocate the LLDPDU */
+@@ -767,9 +767,9 @@ static void i40e_cee_to_dcb_config(
+ *
+ * Get IEEE mode DCB configuration from the Firmware
+ **/
+-static i40e_status i40e_get_ieee_dcb_config(struct i40e_hw *hw)
++static int i40e_get_ieee_dcb_config(struct i40e_hw *hw)
+ {
+- i40e_status ret = 0;
++ int ret = 0;
+
+ /* IEEE mode */
+ hw->local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_IEEE;
+@@ -797,11 +797,11 @@ out:
+ *
+ * Get DCB configuration from the Firmware
+ **/
+-i40e_status i40e_get_dcb_config(struct i40e_hw *hw)
++int i40e_get_dcb_config(struct i40e_hw *hw)
+ {
+- i40e_status ret = 0;
+- struct i40e_aqc_get_cee_dcb_cfg_resp cee_cfg;
+ struct i40e_aqc_get_cee_dcb_cfg_v1_resp cee_v1_cfg;
++ struct i40e_aqc_get_cee_dcb_cfg_resp cee_cfg;
++ int ret = 0;
+
+ /* If Firmware version < v4.33 on X710/XL710, IEEE only */
+ if ((hw->mac.type == I40E_MAC_XL710) &&
+@@ -867,11 +867,11 @@ out:
+ *
+ * Update DCB configuration from the Firmware
+ **/
+-i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
++int i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
+ {
+- i40e_status ret = 0;
+ struct i40e_lldp_variables lldp_cfg;
+ u8 adminstatus = 0;
++ int ret = 0;
+
+ if (!hw->func_caps.dcb)
+ return I40E_NOT_SUPPORTED;
+@@ -940,13 +940,13 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
+ * Get status of FW Link Layer Discovery Protocol (LLDP) Agent.
+ * Status of agent is reported via @lldp_status parameter.
+ **/
+-enum i40e_status_code
++int
+ i40e_get_fw_lldp_status(struct i40e_hw *hw,
+ enum i40e_get_fw_lldp_status_resp *lldp_status)
+ {
+ struct i40e_virt_mem mem;
+- i40e_status ret;
+ u8 *lldpmib;
++ int ret;
+
+ if (!lldp_status)
+ return I40E_ERR_PARAM;
+@@ -1238,13 +1238,13 @@ static void i40e_add_dcb_tlv(struct i40e_lldp_org_tlv *tlv,
+ *
+ * Set DCB configuration to the Firmware
+ **/
+-i40e_status i40e_set_dcb_config(struct i40e_hw *hw)
++int i40e_set_dcb_config(struct i40e_hw *hw)
+ {
+ struct i40e_dcbx_config *dcbcfg;
+ struct i40e_virt_mem mem;
+ u8 mib_type, *lldpmib;
+- i40e_status ret;
+ u16 miblen;
++ int ret;
+
+ /* update the hw local config */
+ dcbcfg = &hw->local_dcbx_config;
+@@ -1274,8 +1274,8 @@ i40e_status i40e_set_dcb_config(struct i40e_hw *hw)
+ *
+ * send DCB configuration to FW
+ **/
+-i40e_status i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen,
+- struct i40e_dcbx_config *dcbcfg)
++int i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen,
++ struct i40e_dcbx_config *dcbcfg)
+ {
+ u16 length, offset = 0, tlvid, typelength;
+ struct i40e_lldp_org_tlv *tlv;
+@@ -1888,13 +1888,13 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw,
+ *
+ * Reads the LLDP configuration data from NVM using passed addresses
+ **/
+-static i40e_status _i40e_read_lldp_cfg(struct i40e_hw *hw,
+- struct i40e_lldp_variables *lldp_cfg,
+- u8 module, u32 word_offset)
++static int _i40e_read_lldp_cfg(struct i40e_hw *hw,
++ struct i40e_lldp_variables *lldp_cfg,
++ u8 module, u32 word_offset)
+ {
+ u32 address, offset = (2 * word_offset);
+- i40e_status ret;
+ __le16 raw_mem;
++ int ret;
+ u16 mem;
+
+ ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+@@ -1950,10 +1950,10 @@ err_lldp_cfg:
+ *
+ * Reads the LLDP configuration data from NVM
+ **/
+-i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw,
+- struct i40e_lldp_variables *lldp_cfg)
++int i40e_read_lldp_cfg(struct i40e_hw *hw,
++ struct i40e_lldp_variables *lldp_cfg)
+ {
+- i40e_status ret = 0;
++ int ret = 0;
+ u32 mem;
+
+ if (!lldp_cfg)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+index 2370ceecb0612..6b60dc9b77361 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+@@ -264,20 +264,20 @@ void i40e_dcb_hw_calculate_pool_sizes(struct i40e_hw *hw,
+ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw,
+ struct i40e_rx_pb_config *old_pb_cfg,
+ struct i40e_rx_pb_config *new_pb_cfg);
+-i40e_status i40e_get_dcbx_status(struct i40e_hw *hw,
+- u16 *status);
+-i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib,
+- struct i40e_dcbx_config *dcbcfg);
+-i40e_status i40e_aq_get_dcb_config(struct i40e_hw *hw, u8 mib_type,
+- u8 bridgetype,
+- struct i40e_dcbx_config *dcbcfg);
+-i40e_status i40e_get_dcb_config(struct i40e_hw *hw);
+-i40e_status i40e_init_dcb(struct i40e_hw *hw,
+- bool enable_mib_change);
+-enum i40e_status_code
++int i40e_get_dcbx_status(struct i40e_hw *hw,
++ u16 *status);
++int i40e_lldp_to_dcb_config(u8 *lldpmib,
++ struct i40e_dcbx_config *dcbcfg);
++int i40e_aq_get_dcb_config(struct i40e_hw *hw, u8 mib_type,
++ u8 bridgetype,
++ struct i40e_dcbx_config *dcbcfg);
++int i40e_get_dcb_config(struct i40e_hw *hw);
++int i40e_init_dcb(struct i40e_hw *hw,
++ bool enable_mib_change);
++int
+ i40e_get_fw_lldp_status(struct i40e_hw *hw,
+ enum i40e_get_fw_lldp_status_resp *lldp_status);
+-i40e_status i40e_set_dcb_config(struct i40e_hw *hw);
+-i40e_status i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen,
+- struct i40e_dcbx_config *dcbcfg);
++int i40e_set_dcb_config(struct i40e_hw *hw);
++int i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen,
++ struct i40e_dcbx_config *dcbcfg);
+ #endif /* _I40E_DCB_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+index e32c61909b310..bba70bd5703bf 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+@@ -135,8 +135,8 @@ static int i40e_dcbnl_ieee_setets(struct net_device *netdev,
+ ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Failed setting DCB ETS configuration err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed setting DCB ETS configuration err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EINVAL;
+ }
+@@ -174,8 +174,8 @@ static int i40e_dcbnl_ieee_setpfc(struct net_device *netdev,
+ ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Failed setting DCB PFC configuration err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed setting DCB PFC configuration err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EINVAL;
+ }
+@@ -225,8 +225,8 @@ static int i40e_dcbnl_ieee_setapp(struct net_device *netdev,
+ ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Failed setting DCB configuration err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed setting DCB configuration err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EINVAL;
+ }
+@@ -290,8 +290,8 @@ static int i40e_dcbnl_ieee_delapp(struct net_device *netdev,
+ ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Failed setting DCB configuration err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed setting DCB configuration err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EINVAL;
+ }
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+index e1069ae658ad3..7e8183762fd95 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+@@ -36,7 +36,7 @@ static int i40e_ddp_does_profile_exist(struct i40e_hw *hw,
+ {
+ struct i40e_ddp_profile_list *profile_list;
+ u8 buff[I40E_PROFILE_LIST_SIZE];
+- i40e_status status;
++ int status;
+ int i;
+
+ status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+@@ -91,7 +91,7 @@ static int i40e_ddp_does_profile_overlap(struct i40e_hw *hw,
+ {
+ struct i40e_ddp_profile_list *profile_list;
+ u8 buff[I40E_PROFILE_LIST_SIZE];
+- i40e_status status;
++ int status;
+ int i;
+
+ status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+@@ -117,14 +117,14 @@ static int i40e_ddp_does_profile_overlap(struct i40e_hw *hw,
+ *
+ * Register a profile to the list of loaded profiles.
+ */
+-static enum i40e_status_code
++static int
+ i40e_add_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u8 *profile_info_sec, u32 track_id)
+ {
+ struct i40e_profile_section_header *sec;
+ struct i40e_profile_info *pinfo;
+- i40e_status status;
+ u32 offset = 0, info = 0;
++ int status;
+
+ sec = (struct i40e_profile_section_header *)profile_info_sec;
+ sec->tbl_size = 1;
+@@ -157,14 +157,14 @@ i40e_add_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ *
+ * Removes DDP profile from the NIC.
+ **/
+-static enum i40e_status_code
++static int
+ i40e_del_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+ u8 *profile_info_sec, u32 track_id)
+ {
+ struct i40e_profile_section_header *sec;
+ struct i40e_profile_info *pinfo;
+- i40e_status status;
+ u32 offset = 0, info = 0;
++ int status;
+
+ sec = (struct i40e_profile_section_header *)profile_info_sec;
+ sec->tbl_size = 1;
+@@ -270,12 +270,12 @@ int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size,
+ struct i40e_profile_segment *profile_hdr;
+ struct i40e_profile_info pinfo;
+ struct i40e_package_header *pkg_hdr;
+- i40e_status status;
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ u32 track_id;
+ int istatus;
++ int status;
+
+ pkg_hdr = (struct i40e_package_header *)data;
+ if (!i40e_ddp_is_pkg_hdr_valid(netdev, pkg_hdr, size))
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+index 291e61ac3e448..7c5f874ef335a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+@@ -240,7 +240,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
+ (unsigned long int)vsi->net_stats_offsets.rx_compressed,
+ (unsigned long int)vsi->net_stats_offsets.tx_compressed);
+ dev_info(&pf->pdev->dev,
+- " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n",
++ " tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n",
+ vsi->tx_restart, vsi->tx_busy,
+ vsi->rx_buf_failed, vsi->rx_page_failed);
+ rcu_read_lock();
+@@ -553,6 +553,14 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
+ dev_info(&pf->pdev->dev, "vsi %d not found\n", vsi_seid);
+ return;
+ }
++ if (vsi->type != I40E_VSI_MAIN &&
++ vsi->type != I40E_VSI_FDIR &&
++ vsi->type != I40E_VSI_VMDQ2) {
++ dev_info(&pf->pdev->dev,
++ "vsi %d type %d descriptor rings not available\n",
++ vsi_seid, vsi->type);
++ return;
++ }
+ if (type == RING_TYPE_XDP && !i40e_enabled_xdp_vsi(vsi)) {
+ dev_info(&pf->pdev->dev, "XDP not enabled on VSI %d\n", vsi_seid);
+ return;
+@@ -734,10 +742,8 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
+ vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
+- dev_info(&pf->pdev->dev, " num MDD=%lld, invalid msg=%lld, valid msg=%lld\n",
+- vf->num_mdd_events,
+- vf->num_invalid_msgs,
+- vf->num_valid_msgs);
++ dev_info(&pf->pdev->dev, " num MDD=%lld\n",
++ vf->num_mdd_events);
+ } else {
+ dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
+ }
+@@ -912,9 +918,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
+ dev_info(&pf->pdev->dev, "deleting relay %d\n", veb_seid);
+ i40e_veb_release(pf->veb[i]);
+ } else if (strncmp(cmd_buf, "add pvid", 8) == 0) {
+- i40e_status ret;
+- u16 vid;
+ unsigned int v;
++ int ret;
++ u16 vid;
+
+ cnt = sscanf(&cmd_buf[8], "%i %u", &vsi_seid, &v);
+ if (cnt != 2) {
+@@ -1278,7 +1284,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
+ }
+ } else if (strncmp(cmd_buf, "send aq_cmd", 11) == 0) {
+ struct i40e_aq_desc *desc;
+- i40e_status ret;
++ int ret;
+
+ desc = kzalloc(sizeof(struct i40e_aq_desc), GFP_KERNEL);
+ if (!desc)
+@@ -1324,9 +1330,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
+ desc = NULL;
+ } else if (strncmp(cmd_buf, "send indirect aq_cmd", 20) == 0) {
+ struct i40e_aq_desc *desc;
+- i40e_status ret;
+ u16 buffer_len;
+ u8 *buff;
++ int ret;
+
+ desc = kzalloc(sizeof(struct i40e_aq_desc), GFP_KERNEL);
+ if (!desc)
+@@ -1833,7 +1839,7 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf)
+ void i40e_dbg_init(void)
+ {
+ i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL);
+- if (!i40e_dbg_root)
++ if (IS_ERR(i40e_dbg_root))
+ pr_info("init of debugfs failed\n");
+ }
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
+index ef4d3762bf371..97fe1787a8f4a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
+@@ -10,8 +10,8 @@
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ **/
+-static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw,
+- u32 reg, u32 mask)
++static int i40e_diag_reg_pattern_test(struct i40e_hw *hw,
++ u32 reg, u32 mask)
+ {
+ static const u32 patterns[] = {
+ 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+@@ -44,7 +44,7 @@ static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw,
+ return 0;
+ }
+
+-struct i40e_diag_reg_test_info i40e_reg_list[] = {
++const struct i40e_diag_reg_test_info i40e_reg_list[] = {
+ /* offset mask elements stride */
+ {I40E_QTX_CTL(0), 0x0000FFBF, 1,
+ I40E_QTX_CTL(1) - I40E_QTX_CTL(0)},
+@@ -74,31 +74,32 @@ struct i40e_diag_reg_test_info i40e_reg_list[] = {
+ *
+ * Perform registers diagnostic test
+ **/
+-i40e_status i40e_diag_reg_test(struct i40e_hw *hw)
++int i40e_diag_reg_test(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+ u32 reg, mask;
++ u32 elements;
+ u32 i, j;
+
+ for (i = 0; i40e_reg_list[i].offset != 0 &&
+ !ret_code; i++) {
+
++ elements = i40e_reg_list[i].elements;
+ /* set actual reg range for dynamically allocated resources */
+ if (i40e_reg_list[i].offset == I40E_QTX_CTL(0) &&
+ hw->func_caps.num_tx_qp != 0)
+- i40e_reg_list[i].elements = hw->func_caps.num_tx_qp;
++ elements = hw->func_caps.num_tx_qp;
+ if ((i40e_reg_list[i].offset == I40E_PFINT_ITRN(0, 0) ||
+ i40e_reg_list[i].offset == I40E_PFINT_ITRN(1, 0) ||
+ i40e_reg_list[i].offset == I40E_PFINT_ITRN(2, 0) ||
+ i40e_reg_list[i].offset == I40E_QINT_TQCTL(0) ||
+ i40e_reg_list[i].offset == I40E_QINT_RQCTL(0)) &&
+ hw->func_caps.num_msix_vectors != 0)
+- i40e_reg_list[i].elements =
+- hw->func_caps.num_msix_vectors - 1;
++ elements = hw->func_caps.num_msix_vectors - 1;
+
+ /* test register access */
+ mask = i40e_reg_list[i].mask;
+- for (j = 0; j < i40e_reg_list[i].elements && !ret_code; j++) {
++ for (j = 0; j < elements && !ret_code; j++) {
+ reg = i40e_reg_list[i].offset +
+ (j * i40e_reg_list[i].stride);
+ ret_code = i40e_diag_reg_pattern_test(hw, reg, mask);
+@@ -114,9 +115,9 @@ i40e_status i40e_diag_reg_test(struct i40e_hw *hw)
+ *
+ * Perform EEPROM diagnostic test
+ **/
+-i40e_status i40e_diag_eeprom_test(struct i40e_hw *hw)
++int i40e_diag_eeprom_test(struct i40e_hw *hw)
+ {
+- i40e_status ret_code;
++ int ret_code;
+ u16 reg_val;
+
+ /* read NVM control word and if NVM valid, validate EEPROM checksum*/
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+index c3340f320a18c..c3ce5f35211f0 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+@@ -20,9 +20,9 @@ struct i40e_diag_reg_test_info {
+ u32 stride; /* bytes between each element */
+ };
+
+-extern struct i40e_diag_reg_test_info i40e_reg_list[];
++extern const struct i40e_diag_reg_test_info i40e_reg_list[];
+
+-i40e_status i40e_diag_reg_test(struct i40e_hw *hw);
+-i40e_status i40e_diag_eeprom_test(struct i40e_hw *hw);
++int i40e_diag_reg_test(struct i40e_hw *hw);
++int i40e_diag_eeprom_test(struct i40e_hw *hw);
+
+ #endif /* _I40E_DIAG_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+index 513ba69743559..d124cb947ffa5 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+@@ -1154,8 +1154,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_hw *hw = &pf->hw;
+ bool autoneg_changed = false;
+- i40e_status status = 0;
+ int timeout = 50;
++ int status = 0;
+ int err = 0;
+ u8 autoneg;
+
+@@ -1357,8 +1357,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
+ status = i40e_aq_set_phy_config(hw, &config, NULL);
+ if (status) {
+ netdev_info(netdev,
+- "Set phy config failed, err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ "Set phy config failed, err %d aq_err %s\n",
++ status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ err = -EAGAIN;
+ goto done;
+@@ -1367,8 +1367,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
+ status = i40e_update_link_info(hw);
+ if (status)
+ netdev_dbg(netdev,
+- "Updating link info failed with err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ "Updating link info failed with err %d aq_err %s\n",
++ status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+
+ } else {
+@@ -1387,7 +1387,7 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ struct i40e_pf *pf = np->vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status status = 0;
++ int status = 0;
+ u32 flags = 0;
+ int err = 0;
+
+@@ -1419,8 +1419,8 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
+ status = i40e_aq_set_phy_config(hw, &config, NULL);
+ if (status) {
+ netdev_info(netdev,
+- "Set phy config failed, err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ "Set phy config failed, err %d aq_err %s\n",
++ status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ err = -EAGAIN;
+ goto done;
+@@ -1433,8 +1433,8 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
+ * (e.g. no physical connection etc.)
+ */
+ netdev_dbg(netdev,
+- "Updating link info failed with err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ "Updating link info failed with err %d aq_err %s\n",
++ status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ }
+
+@@ -1449,7 +1449,7 @@ static int i40e_get_fec_param(struct net_device *netdev,
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ struct i40e_pf *pf = np->vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status status = 0;
++ int status = 0;
+ int err = 0;
+ u8 fec_cfg;
+
+@@ -1536,12 +1536,12 @@ static int i40e_nway_reset(struct net_device *netdev)
+ struct i40e_pf *pf = np->vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ bool link_up = hw->phy.link_info.link_info & I40E_AQ_LINK_UP;
+- i40e_status ret = 0;
++ int ret = 0;
+
+ ret = i40e_aq_set_link_restart_an(hw, link_up, NULL);
+ if (ret) {
+- netdev_info(netdev, "link restart failed, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ netdev_info(netdev, "link restart failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return -EIO;
+ }
+@@ -1601,9 +1601,9 @@ static int i40e_set_pauseparam(struct net_device *netdev,
+ struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+ struct i40e_dcbx_config *dcbx_cfg = &hw->local_dcbx_config;
+ bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
+- i40e_status status;
+ u8 aq_failures;
+ int err = 0;
++ int status;
+ u32 is_an;
+
+ /* Changing the port's flow control is not supported if this isn't the
+@@ -1657,20 +1657,20 @@ static int i40e_set_pauseparam(struct net_device *netdev,
+ status = i40e_set_fc(hw, &aq_failures, link_up);
+
+ if (aq_failures & I40E_SET_FC_AQ_FAIL_GET) {
+- netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %s\n",
++ status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ err = -EAGAIN;
+ }
+ if (aq_failures & I40E_SET_FC_AQ_FAIL_SET) {
+- netdev_info(netdev, "Set fc failed on the set_phy_config call with err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %s\n",
++ status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ err = -EAGAIN;
+ }
+ if (aq_failures & I40E_SET_FC_AQ_FAIL_UPDATE) {
+- netdev_info(netdev, "Set fc failed on the get_link_info call with err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %s\n",
++ status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ err = -EAGAIN;
+ }
+@@ -2081,9 +2081,6 @@ static int i40e_set_ringparam(struct net_device *netdev,
+ */
+ rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
+ err = i40e_setup_rx_descriptors(&rx_rings[i]);
+- if (err)
+- goto rx_unwind;
+- err = i40e_alloc_rx_bi(&rx_rings[i]);
+ if (err)
+ goto rx_unwind;
+
+@@ -2484,8 +2481,8 @@ static u64 i40e_link_test(struct net_device *netdev, u64 *data)
+ {
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_pf *pf = np->vsi->back;
+- i40e_status status;
+ bool link_up = false;
++ int status;
+
+ netif_info(pf, hw, netdev, "link test\n");
+ status = i40e_get_link_status(&pf->hw, &link_up);
+@@ -2576,15 +2573,16 @@ static void i40e_diag_test(struct net_device *netdev,
+
+ set_bit(__I40E_TESTING, pf->state);
+
++ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
++ test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
++ dev_warn(&pf->pdev->dev,
++ "Cannot start offline testing when PF is in reset state.\n");
++ goto skip_ol_tests;
++ }
++
+ if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) {
+ dev_warn(&pf->pdev->dev,
+ "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+- data[I40E_ETH_TEST_REG] = 1;
+- data[I40E_ETH_TEST_EEPROM] = 1;
+- data[I40E_ETH_TEST_INTR] = 1;
+- data[I40E_ETH_TEST_LINK] = 1;
+- eth_test->flags |= ETH_TEST_FL_FAILED;
+- clear_bit(__I40E_TESTING, pf->state);
+ goto skip_ol_tests;
+ }
+
+@@ -2631,9 +2629,17 @@ static void i40e_diag_test(struct net_device *netdev,
+ data[I40E_ETH_TEST_INTR] = 0;
+ }
+
+-skip_ol_tests:
+-
+ netif_info(pf, drv, netdev, "testing finished\n");
++ return;
++
++skip_ol_tests:
++ data[I40E_ETH_TEST_REG] = 1;
++ data[I40E_ETH_TEST_EEPROM] = 1;
++ data[I40E_ETH_TEST_INTR] = 1;
++ data[I40E_ETH_TEST_LINK] = 1;
++ eth_test->flags |= ETH_TEST_FL_FAILED;
++ clear_bit(__I40E_TESTING, pf->state);
++ netif_info(pf, drv, netdev, "testing failed\n");
+ }
+
+ static void i40e_get_wol(struct net_device *netdev,
+@@ -2699,11 +2705,11 @@ static int i40e_set_phys_id(struct net_device *netdev,
+ enum ethtool_phys_id_state state)
+ {
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+- i40e_status ret = 0;
+ struct i40e_pf *pf = np->vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int blink_freq = 2;
+ u16 temp_status;
++ int ret = 0;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+@@ -3079,10 +3085,17 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
+
+ if (cmd->flow_type == TCP_V4_FLOW ||
+ cmd->flow_type == UDP_V4_FLOW) {
+- if (i_set & I40E_L3_SRC_MASK)
+- cmd->data |= RXH_IP_SRC;
+- if (i_set & I40E_L3_DST_MASK)
+- cmd->data |= RXH_IP_DST;
++ if (hw->mac.type == I40E_MAC_X722) {
++ if (i_set & I40E_X722_L3_SRC_MASK)
++ cmd->data |= RXH_IP_SRC;
++ if (i_set & I40E_X722_L3_DST_MASK)
++ cmd->data |= RXH_IP_DST;
++ } else {
++ if (i_set & I40E_L3_SRC_MASK)
++ cmd->data |= RXH_IP_SRC;
++ if (i_set & I40E_L3_DST_MASK)
++ cmd->data |= RXH_IP_DST;
++ }
+ } else if (cmd->flow_type == TCP_V6_FLOW ||
+ cmd->flow_type == UDP_V6_FLOW) {
+ if (i_set & I40E_L3_V6_SRC_MASK)
+@@ -3440,12 +3453,15 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+
+ /**
+ * i40e_get_rss_hash_bits - Read RSS Hash bits from register
++ * @hw: hw structure
+ * @nfc: pointer to user request
+ * @i_setc: bits currently set
+ *
+ * Returns value of bits to be set per user request
+ **/
+-static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc)
++static u64 i40e_get_rss_hash_bits(struct i40e_hw *hw,
++ struct ethtool_rxnfc *nfc,
++ u64 i_setc)
+ {
+ u64 i_set = i_setc;
+ u64 src_l3 = 0, dst_l3 = 0;
+@@ -3464,8 +3480,13 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc)
+ dst_l3 = I40E_L3_V6_DST_MASK;
+ } else if (nfc->flow_type == TCP_V4_FLOW ||
+ nfc->flow_type == UDP_V4_FLOW) {
+- src_l3 = I40E_L3_SRC_MASK;
+- dst_l3 = I40E_L3_DST_MASK;
++ if (hw->mac.type == I40E_MAC_X722) {
++ src_l3 = I40E_X722_L3_SRC_MASK;
++ dst_l3 = I40E_X722_L3_DST_MASK;
++ } else {
++ src_l3 = I40E_L3_SRC_MASK;
++ dst_l3 = I40E_L3_DST_MASK;
++ }
+ } else {
+ /* Any other flow type are not supported here */
+ return i_set;
+@@ -3483,6 +3504,7 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc)
+ return i_set;
+ }
+
++#define FLOW_PCTYPES_SIZE 64
+ /**
+ * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @pf: pointer to the physical function struct
+@@ -3495,9 +3517,11 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
+ struct i40e_hw *hw = &pf->hw;
+ u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
+ ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
+- u8 flow_pctype = 0;
++ DECLARE_BITMAP(flow_pctypes, FLOW_PCTYPES_SIZE);
+ u64 i_set, i_setc;
+
++ bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE);
++
+ if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ dev_err(&pf->pdev->dev,
+ "Change of RSS hash input set is not supported when MFP mode is enabled\n");
+@@ -3513,36 +3537,35 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
+
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP, flow_pctypes);
+ if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+- hena |=
+- BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK,
++ flow_pctypes);
+ break;
+ case TCP_V6_FLOW:
+- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP, flow_pctypes);
+ if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+- hena |=
+- BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
+- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+- hena |=
+- BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK,
++ flow_pctypes);
+ break;
+ case UDP_V4_FLOW:
+- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+- hena |=
+- BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
+- BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
+-
++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_UDP, flow_pctypes);
++ if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
++ set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP,
++ flow_pctypes);
++ set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP,
++ flow_pctypes);
++ }
+ hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4);
+ break;
+ case UDP_V6_FLOW:
+- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+- hena |=
+- BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
+- BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
+-
++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_UDP, flow_pctypes);
++ if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
++ set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP,
++ flow_pctypes);
++ set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP,
++ flow_pctypes);
++ }
+ hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6);
+ break;
+ case AH_ESP_V4_FLOW:
+@@ -3575,17 +3598,20 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
+ return -EINVAL;
+ }
+
+- if (flow_pctype) {
+- i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0,
+- flow_pctype)) |
+- ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1,
+- flow_pctype)) << 32);
+- i_set = i40e_get_rss_hash_bits(nfc, i_setc);
+- i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype),
+- (u32)i_set);
+- i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype),
+- (u32)(i_set >> 32));
+- hena |= BIT_ULL(flow_pctype);
++ if (bitmap_weight(flow_pctypes, FLOW_PCTYPES_SIZE)) {
++ u8 flow_id;
++
++ for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) {
++ i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) |
++ ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32);
++ i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc);
++
++ i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id),
++ (u32)i_set);
++ i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id),
++ (u32)(i_set >> 32));
++ hena |= BIT_ULL(flow_id);
++ }
+ }
+
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
+@@ -4338,11 +4364,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
+ return -EOPNOTSUPP;
+
+ /* First 4 bytes of L4 header */
+- if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF))
+- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
+- else if (!usr_ip4_spec->l4_4_bytes)
+- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+- else
++ if (usr_ip4_spec->l4_4_bytes)
+ return -EOPNOTSUPP;
+
+ /* Filtering on Type of Service is not supported. */
+@@ -4376,16 +4398,12 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
+ (struct in6_addr *)&ipv6_full_mask))
+ new_mask |= I40E_L3_V6_DST_MASK;
+ else if (ipv6_addr_any((struct in6_addr *)
+- &usr_ip6_spec->ip6src))
++ &usr_ip6_spec->ip6dst))
+ new_mask &= ~I40E_L3_V6_DST_MASK;
+ else
+ return -EOPNOTSUPP;
+
+- if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF))
+- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
+- else if (!usr_ip6_spec->l4_4_bytes)
+- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+- else
++ if (usr_ip6_spec->l4_4_bytes)
+ return -EOPNOTSUPP;
+
+ /* Filtering on Traffic class is not supported. */
+@@ -4912,7 +4930,7 @@ static int i40e_set_channels(struct net_device *dev,
+ /* We do not support setting channels via ethtool when TCs are
+ * configured through mqprio
+ */
+- if (pf->flags & I40E_FLAG_TC_MQPRIO)
++ if (i40e_is_tc_mqprio_enabled(pf))
+ return -EINVAL;
+
+ /* verify they are not requesting separate vectors */
+@@ -5127,7 +5145,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ u32 reset_needed = 0;
+- i40e_status status;
++ int status;
+ u32 i, j;
+
+ orig_flags = READ_ONCE(pf->flags);
+@@ -5242,8 +5260,8 @@ flags_complete:
+ 0, NULL);
+ if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
+ dev_info(&pf->pdev->dev,
+- "couldn't set switch config bits, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't set switch config bits, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ /* not a fatal problem, just keep going */
+@@ -5308,9 +5326,8 @@ flags_complete:
+ return -EBUSY;
+ default:
+ dev_warn(&pf->pdev->dev,
+- "Starting FW LLDP agent failed: error: %s, %s\n",
+- i40e_stat_str(&pf->hw,
+- status),
++ "Starting FW LLDP agent failed: error: %d, %s\n",
++ status,
+ i40e_aq_str(&pf->hw,
+ adq_err));
+ return -EINVAL;
+@@ -5350,8 +5367,8 @@ static int i40e_get_module_info(struct net_device *netdev,
+ u32 sff8472_comp = 0;
+ u32 sff8472_swap = 0;
+ u32 sff8636_rev = 0;
+- i40e_status status;
+ u32 type = 0;
++ int status;
+
+ /* Check if firmware supports reading module EEPROM. */
+ if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+@@ -5455,8 +5472,8 @@ static int i40e_get_module_eeprom(struct net_device *netdev,
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ bool is_sfp = false;
+- i40e_status status;
+ u32 value = 0;
++ int status;
+ int i;
+
+ if (!ee || !ee->len || !data)
+@@ -5497,10 +5514,10 @@ static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+ {
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_aq_get_phy_abilities_resp phy_cfg;
+- enum i40e_status_code status = 0;
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
++ int status = 0;
+
+ /* Get initial PHY capabilities */
+ status = i40e_aq_get_phy_capabilities(hw, false, true, &phy_cfg, NULL);
+@@ -5562,11 +5579,11 @@ static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+ {
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_aq_get_phy_abilities_resp abilities;
+- enum i40e_status_code status = I40E_SUCCESS;
+ struct i40e_aq_set_phy_config config;
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
++ int status = I40E_SUCCESS;
+ __le16 eee_capability;
+
+ /* Deny parameters we don't support */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+index 163ee8c6311cc..46f7950a0049a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+@@ -17,17 +17,17 @@
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ **/
+-i40e_status i40e_add_sd_table_entry(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 sd_index,
+- enum i40e_sd_entry_type type,
+- u64 direct_mode_sz)
++int i40e_add_sd_table_entry(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 sd_index,
++ enum i40e_sd_entry_type type,
++ u64 direct_mode_sz)
+ {
+ enum i40e_memory_type mem_type __attribute__((unused));
+ struct i40e_hmc_sd_entry *sd_entry;
+ bool dma_mem_alloc_done = false;
++ int ret_code = I40E_SUCCESS;
+ struct i40e_dma_mem mem;
+- i40e_status ret_code = I40E_SUCCESS;
+ u64 alloc_len;
+
+ if (NULL == hmc_info->sd_table.sd_entry) {
+@@ -106,19 +106,19 @@ exit:
+ * aligned on 4K boundary and zeroed memory.
+ * 2. It should be 4K in size.
+ **/
+-i40e_status i40e_add_pd_table_entry(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 pd_index,
+- struct i40e_dma_mem *rsrc_pg)
++int i40e_add_pd_table_entry(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 pd_index,
++ struct i40e_dma_mem *rsrc_pg)
+ {
+- i40e_status ret_code = 0;
+ struct i40e_hmc_pd_table *pd_table;
+ struct i40e_hmc_pd_entry *pd_entry;
+ struct i40e_dma_mem mem;
+ struct i40e_dma_mem *page = &mem;
+ u32 sd_idx, rel_pd_idx;
+- u64 *pd_addr;
++ int ret_code = 0;
+ u64 page_desc;
++ u64 *pd_addr;
+
+ if (pd_index / I40E_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt) {
+ ret_code = I40E_ERR_INVALID_PAGE_DESC_INDEX;
+@@ -185,15 +185,15 @@ exit:
+ * 1. Caller can deallocate the memory used by backing storage after this
+ * function returns.
+ **/
+-i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx)
++int i40e_remove_pd_bp(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx)
+ {
+- i40e_status ret_code = 0;
+ struct i40e_hmc_pd_entry *pd_entry;
+ struct i40e_hmc_pd_table *pd_table;
+ struct i40e_hmc_sd_entry *sd_entry;
+ u32 sd_idx, rel_pd_idx;
++ int ret_code = 0;
+ u64 *pd_addr;
+
+ /* calculate index */
+@@ -241,11 +241,11 @@ exit:
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ **/
+-i40e_status i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+- u32 idx)
++int i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
++ u32 idx)
+ {
+- i40e_status ret_code = 0;
+ struct i40e_hmc_sd_entry *sd_entry;
++ int ret_code = 0;
+
+ /* get the entry and decrease its ref counter */
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+@@ -269,9 +269,9 @@ exit:
+ * @idx: the page index
+ * @is_pf: used to distinguish between VF and PF
+ **/
+-i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx, bool is_pf)
++int i40e_remove_sd_bp_new(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx, bool is_pf)
+ {
+ struct i40e_hmc_sd_entry *sd_entry;
+
+@@ -290,11 +290,11 @@ i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ **/
+-i40e_status i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+- u32 idx)
++int i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
++ u32 idx)
+ {
+- i40e_status ret_code = 0;
+ struct i40e_hmc_sd_entry *sd_entry;
++ int ret_code = 0;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+@@ -318,9 +318,9 @@ exit:
+ * @idx: segment descriptor index to find the relevant page descriptor
+ * @is_pf: used to distinguish between VF and PF
+ **/
+-i40e_status i40e_remove_pd_page_new(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx, bool is_pf)
++int i40e_remove_pd_page_new(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx, bool is_pf)
+ {
+ struct i40e_hmc_sd_entry *sd_entry;
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+index 3113792afaffa..9960da07a5732 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+@@ -187,28 +187,28 @@ struct i40e_hmc_info {
+ /* add one more to the limit to correct our range */ \
+ *(pd_limit) += 1; \
+ }
+-i40e_status i40e_add_sd_table_entry(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 sd_index,
+- enum i40e_sd_entry_type type,
+- u64 direct_mode_sz);
+-
+-i40e_status i40e_add_pd_table_entry(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 pd_index,
+- struct i40e_dma_mem *rsrc_pg);
+-i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx);
+-i40e_status i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+- u32 idx);
+-i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx, bool is_pf);
+-i40e_status i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+- u32 idx);
+-i40e_status i40e_remove_pd_page_new(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx, bool is_pf);
++
++int i40e_add_sd_table_entry(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 sd_index,
++ enum i40e_sd_entry_type type,
++ u64 direct_mode_sz);
++int i40e_add_pd_table_entry(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 pd_index,
++ struct i40e_dma_mem *rsrc_pg);
++int i40e_remove_pd_bp(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx);
++int i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
++ u32 idx);
++int i40e_remove_sd_bp_new(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx, bool is_pf);
++int i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
++ u32 idx);
++int i40e_remove_pd_page_new(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx, bool is_pf);
+
+ #endif /* _I40E_HMC_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+index d6e92ecddfbd8..40c101f286d19 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+@@ -74,12 +74,12 @@ static u64 i40e_calculate_l2fpm_size(u32 txq_num, u32 rxq_num,
+ * Assumptions:
+ * - HMC Resource Profile has been selected before calling this function.
+ **/
+-i40e_status i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+- u32 rxq_num, u32 fcoe_cntx_num,
+- u32 fcoe_filt_num)
++int i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
++ u32 rxq_num, u32 fcoe_cntx_num,
++ u32 fcoe_filt_num)
+ {
+ struct i40e_hmc_obj_info *obj, *full_obj;
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+ u64 l2fpm_size;
+ u32 size_exp;
+
+@@ -229,11 +229,11 @@ init_lan_hmc_out:
+ * 1. caller can deallocate the memory used by pd after this function
+ * returns.
+ **/
+-static i40e_status i40e_remove_pd_page(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx)
++static int i40e_remove_pd_page(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ if (!i40e_prep_remove_pd_page(hmc_info, idx))
+ ret_code = i40e_remove_pd_page_new(hw, hmc_info, idx, true);
+@@ -256,11 +256,11 @@ static i40e_status i40e_remove_pd_page(struct i40e_hw *hw,
+ * 1. caller can deallocate the memory used by backing storage after this
+ * function returns.
+ **/
+-static i40e_status i40e_remove_sd_bp(struct i40e_hw *hw,
+- struct i40e_hmc_info *hmc_info,
+- u32 idx)
++static int i40e_remove_sd_bp(struct i40e_hw *hw,
++ struct i40e_hmc_info *hmc_info,
++ u32 idx)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ if (!i40e_prep_remove_sd_bp(hmc_info, idx))
+ ret_code = i40e_remove_sd_bp_new(hw, hmc_info, idx, true);
+@@ -276,15 +276,15 @@ static i40e_status i40e_remove_sd_bp(struct i40e_hw *hw,
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ **/
+-static i40e_status i40e_create_lan_hmc_object(struct i40e_hw *hw,
+- struct i40e_hmc_lan_create_obj_info *info)
++static int i40e_create_lan_hmc_object(struct i40e_hw *hw,
++ struct i40e_hmc_lan_create_obj_info *info)
+ {
+- i40e_status ret_code = 0;
+ struct i40e_hmc_sd_entry *sd_entry;
+ u32 pd_idx1 = 0, pd_lmt1 = 0;
+ u32 pd_idx = 0, pd_lmt = 0;
+ bool pd_error = false;
+ u32 sd_idx, sd_lmt;
++ int ret_code = 0;
+ u64 sd_size;
+ u32 i, j;
+
+@@ -435,13 +435,13 @@ exit:
+ * - This function will be called after i40e_init_lan_hmc() and before
+ * any LAN/FCoE HMC objects can be created.
+ **/
+-i40e_status i40e_configure_lan_hmc(struct i40e_hw *hw,
+- enum i40e_hmc_model model)
++int i40e_configure_lan_hmc(struct i40e_hw *hw,
++ enum i40e_hmc_model model)
+ {
+ struct i40e_hmc_lan_create_obj_info info;
+- i40e_status ret_code = 0;
+ u8 hmc_fn_id = hw->hmc.hmc_fn_id;
+ struct i40e_hmc_obj_info *obj;
++ int ret_code = 0;
+
+ /* Initialize part of the create object info struct */
+ info.hmc_info = &hw->hmc;
+@@ -520,13 +520,13 @@ configure_lan_hmc_out:
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ **/
+-static i40e_status i40e_delete_lan_hmc_object(struct i40e_hw *hw,
+- struct i40e_hmc_lan_delete_obj_info *info)
++static int i40e_delete_lan_hmc_object(struct i40e_hw *hw,
++ struct i40e_hmc_lan_delete_obj_info *info)
+ {
+- i40e_status ret_code = 0;
+ struct i40e_hmc_pd_table *pd_table;
+ u32 pd_idx, pd_lmt, rel_pd_idx;
+ u32 sd_idx, sd_lmt;
++ int ret_code = 0;
+ u32 i, j;
+
+ if (NULL == info) {
+@@ -632,10 +632,10 @@ exit:
+ * This must be called by drivers as they are shutting down and being
+ * removed from the OS.
+ **/
+-i40e_status i40e_shutdown_lan_hmc(struct i40e_hw *hw)
++int i40e_shutdown_lan_hmc(struct i40e_hw *hw)
+ {
+ struct i40e_hmc_lan_delete_obj_info info;
+- i40e_status ret_code;
++ int ret_code;
+
+ info.hmc_info = &hw->hmc;
+ info.rsrc_type = I40E_HMC_LAN_FULL;
+@@ -915,9 +915,9 @@ static void i40e_write_qword(u8 *hmc_bits,
+ * @context_bytes: pointer to the context bit array (DMA memory)
+ * @hmc_type: the type of HMC resource
+ **/
+-static i40e_status i40e_clear_hmc_context(struct i40e_hw *hw,
+- u8 *context_bytes,
+- enum i40e_hmc_lan_rsrc_type hmc_type)
++static int i40e_clear_hmc_context(struct i40e_hw *hw,
++ u8 *context_bytes,
++ enum i40e_hmc_lan_rsrc_type hmc_type)
+ {
+ /* clean the bit array */
+ memset(context_bytes, 0, (u32)hw->hmc.hmc_obj[hmc_type].size);
+@@ -931,9 +931,9 @@ static i40e_status i40e_clear_hmc_context(struct i40e_hw *hw,
+ * @ce_info: a description of the struct to be filled
+ * @dest: the struct to be filled
+ **/
+-static i40e_status i40e_set_hmc_context(u8 *context_bytes,
+- struct i40e_context_ele *ce_info,
+- u8 *dest)
++static int i40e_set_hmc_context(u8 *context_bytes,
++ struct i40e_context_ele *ce_info,
++ u8 *dest)
+ {
+ int f;
+
+@@ -973,18 +973,18 @@ static i40e_status i40e_set_hmc_context(u8 *context_bytes,
+ * base pointer. This function is used for LAN Queue contexts.
+ **/
+ static
+-i40e_status i40e_hmc_get_object_va(struct i40e_hw *hw, u8 **object_base,
+- enum i40e_hmc_lan_rsrc_type rsrc_type,
+- u32 obj_idx)
++int i40e_hmc_get_object_va(struct i40e_hw *hw, u8 **object_base,
++ enum i40e_hmc_lan_rsrc_type rsrc_type,
++ u32 obj_idx)
+ {
+ struct i40e_hmc_info *hmc_info = &hw->hmc;
+ u32 obj_offset_in_sd, obj_offset_in_pd;
+ struct i40e_hmc_sd_entry *sd_entry;
+ struct i40e_hmc_pd_entry *pd_entry;
+ u32 pd_idx, pd_lmt, rel_pd_idx;
+- i40e_status ret_code = 0;
+ u64 obj_offset_in_fpm;
+ u32 sd_idx, sd_lmt;
++ int ret_code = 0;
+
+ if (NULL == hmc_info) {
+ ret_code = I40E_ERR_BAD_PTR;
+@@ -1042,11 +1042,11 @@ exit:
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ **/
+-i40e_status i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+- u16 queue)
++int i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
++ u16 queue)
+ {
+- i40e_status err;
+ u8 *context_bytes;
++ int err;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
+ I40E_HMC_LAN_TX, queue);
+@@ -1062,12 +1062,12 @@ i40e_status i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+ * @queue: the queue we care about
+ * @s: the struct to be filled
+ **/
+-i40e_status i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+- u16 queue,
+- struct i40e_hmc_obj_txq *s)
++int i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
++ u16 queue,
++ struct i40e_hmc_obj_txq *s)
+ {
+- i40e_status err;
+ u8 *context_bytes;
++ int err;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
+ I40E_HMC_LAN_TX, queue);
+@@ -1083,11 +1083,11 @@ i40e_status i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ **/
+-i40e_status i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+- u16 queue)
++int i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
++ u16 queue)
+ {
+- i40e_status err;
+ u8 *context_bytes;
++ int err;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
+ I40E_HMC_LAN_RX, queue);
+@@ -1103,12 +1103,12 @@ i40e_status i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+ * @queue: the queue we care about
+ * @s: the struct to be filled
+ **/
+-i40e_status i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+- u16 queue,
+- struct i40e_hmc_obj_rxq *s)
++int i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
++ u16 queue,
++ struct i40e_hmc_obj_rxq *s)
+ {
+- i40e_status err;
+ u8 *context_bytes;
++ int err;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
+ I40E_HMC_LAN_RX, queue);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+index c46a2c449e60e..9f960404c2b37 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+@@ -137,22 +137,22 @@ struct i40e_hmc_lan_delete_obj_info {
+ u32 count;
+ };
+
+-i40e_status i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+- u32 rxq_num, u32 fcoe_cntx_num,
+- u32 fcoe_filt_num);
+-i40e_status i40e_configure_lan_hmc(struct i40e_hw *hw,
+- enum i40e_hmc_model model);
+-i40e_status i40e_shutdown_lan_hmc(struct i40e_hw *hw);
+-
+-i40e_status i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+- u16 queue);
+-i40e_status i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+- u16 queue,
+- struct i40e_hmc_obj_txq *s);
+-i40e_status i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+- u16 queue);
+-i40e_status i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+- u16 queue,
+- struct i40e_hmc_obj_rxq *s);
++int i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
++ u32 rxq_num, u32 fcoe_cntx_num,
++ u32 fcoe_filt_num);
++int i40e_configure_lan_hmc(struct i40e_hw *hw,
++ enum i40e_hmc_model model);
++int i40e_shutdown_lan_hmc(struct i40e_hw *hw);
++
++int i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
++ u16 queue);
++int i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
++ u16 queue,
++ struct i40e_hmc_obj_txq *s);
++int i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
++ u16 queue);
++int i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
++ u16 queue,
++ struct i40e_hmc_obj_rxq *s);
+
+ #endif /* _I40E_LAN_HMC_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index e04b540cedc85..d3f3874220a31 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -99,6 +99,24 @@ MODULE_LICENSE("GPL v2");
+
+ static struct workqueue_struct *i40e_wq;
+
++static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
++ struct net_device *netdev, int delta)
++{
++ struct netdev_hw_addr *ha;
++
++ if (!f || !netdev)
++ return;
++
++ netdev_for_each_mc_addr(ha, netdev) {
++ if (ether_addr_equal(ha->addr, f->macaddr)) {
++ ha->refcount += delta;
++ if (ha->refcount <= 0)
++ ha->refcount = 1;
++ break;
++ }
++ }
++}
++
+ /**
+ * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * @hw: pointer to the HW structure
+@@ -178,10 +196,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
+ * @id: an owner id to stick on the items assigned
+ *
+ * Returns the base item index of the lump, or negative for error
+- *
+- * The search_hint trick and lack of advanced fit-finding only work
+- * because we're highly likely to have all the same size lump requests.
+- * Linear search time and any fragmentation should be minimal.
+ **/
+ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
+ u16 needed, u16 id)
+@@ -196,8 +210,21 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
+ return -EINVAL;
+ }
+
+- /* start the linear search with an imperfect hint */
+- i = pile->search_hint;
++ /* Allocate last queue in the pile for FDIR VSI queue
++ * so it doesn't fragment the qp_pile
++ */
++ if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) {
++ if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) {
++ dev_err(&pf->pdev->dev,
++ "Cannot allocate queue %d for I40E_VSI_FDIR\n",
++ pile->num_entries - 1);
++ return -ENOMEM;
++ }
++ pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT;
++ return pile->num_entries - 1;
++ }
++
++ i = 0;
+ while (i < pile->num_entries) {
+ /* skip already allocated entries */
+ if (pile->list[i] & I40E_PILE_VALID_BIT) {
+@@ -216,7 +243,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
+ for (j = 0; j < needed; j++)
+ pile->list[i+j] = id | I40E_PILE_VALID_BIT;
+ ret = i;
+- pile->search_hint = i + j;
+ break;
+ }
+
+@@ -239,7 +265,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
+ {
+ int valid_id = (id | I40E_PILE_VALID_BIT);
+ int count = 0;
+- int i;
++ u16 i;
+
+ if (!pile || index >= pile->num_entries)
+ return -EINVAL;
+@@ -251,8 +277,6 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
+ count++;
+ }
+
+- if (count && index < pile->search_hint)
+- pile->search_hint = index;
+
+ return count;
+ }
+@@ -359,7 +383,9 @@ static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+ set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
+ break;
+ default:
+- netdev_err(netdev, "tx_timeout recovery unsuccessful\n");
++ netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n");
++ set_bit(__I40E_DOWN_REQUESTED, pf->state);
++ set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state);
+ break;
+ }
+
+@@ -525,6 +551,47 @@ void i40e_pf_reset_stats(struct i40e_pf *pf)
+ pf->hw_csum_rx_error = 0;
+ }
+
++/**
++ * i40e_compute_pci_to_hw_id - compute index form PCI function.
++ * @vsi: ptr to the VSI to read from.
++ * @hw: ptr to the hardware info.
++ **/
++static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw)
++{
++ int pf_count = i40e_get_pf_count(hw);
++
++ if (vsi->type == I40E_VSI_SRIOV)
++ return (hw->port * BIT(7)) / pf_count + vsi->vf_id;
++
++ return hw->port + BIT(7);
++}
++
++/**
++ * i40e_stat_update64 - read and update a 64 bit stat from the chip.
++ * @hw: ptr to the hardware info.
++ * @hireg: the high 32 bit reg to read.
++ * @loreg: the low 32 bit reg to read.
++ * @offset_loaded: has the initial offset been loaded yet.
++ * @offset: ptr to current offset value.
++ * @stat: ptr to the stat.
++ *
++ * Since the device stats are not reset at PFReset, they will not
++ * be zeroed when the driver starts. We'll save the first values read
++ * and use them as offsets to be subtracted from the raw values in order
++ * to report stats that count from zero.
++ **/
++static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg,
++ bool offset_loaded, u64 *offset, u64 *stat)
++{
++ u64 new_data;
++
++ new_data = rd64(hw, loreg);
++
++ if (!offset_loaded || new_data < *offset)
++ *offset = new_data;
++ *stat = new_data - *offset;
++}
++
+ /**
+ * i40e_stat_update48 - read and update a 48 bit stat from the chip
+ * @hw: ptr to the hardware info
+@@ -596,6 +663,34 @@ static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
+ *stat += new_data;
+ }
+
++/**
++ * i40e_stats_update_rx_discards - update rx_discards.
++ * @vsi: ptr to the VSI to be updated.
++ * @hw: ptr to the hardware info.
++ * @stat_idx: VSI's stat_counter_idx.
++ * @offset_loaded: ptr to the VSI's stat_offsets_loaded.
++ * @stat_offset: ptr to stat_offset to store first read of specific register.
++ * @stat: ptr to VSI's stat to be updated.
++ **/
++static void
++i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw,
++ int stat_idx, bool offset_loaded,
++ struct i40e_eth_stats *stat_offset,
++ struct i40e_eth_stats *stat)
++{
++ u64 rx_rdpc, rx_rxerr;
++
++ i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded,
++ &stat_offset->rx_discards, &rx_rdpc);
++ i40e_stat_update64(hw,
++ I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)),
++ I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)),
++ offset_loaded, &stat_offset->rx_discards_other,
++ &rx_rxerr);
++
++ stat->rx_discards = rx_rdpc + rx_rxerr;
++}
++
+ /**
+ * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
+ * @vsi: the VSI to be updated
+@@ -655,6 +750,10 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi)
+ I40E_GLV_BPTCL(stat_idx),
+ vsi->stat_offsets_loaded,
+ &oes->tx_broadcast, &es->tx_broadcast);
++
++ i40e_stats_update_rx_discards(vsi, hw, stat_idx,
++ vsi->stat_offsets_loaded, oes, es);
++
+ vsi->stat_offsets_loaded = true;
+ }
+
+@@ -754,9 +853,9 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
+ struct rtnl_link_stats64 *ns; /* netdev stats */
+ struct i40e_eth_stats *oes;
+ struct i40e_eth_stats *es; /* device's eth stats */
+- u32 tx_restart, tx_busy;
++ u64 tx_restart, tx_busy;
+ struct i40e_ring *p;
+- u32 rx_page, rx_buf;
++ u64 rx_page, rx_buf;
+ u64 bytes, packets;
+ unsigned int start;
+ u64 tx_linearize;
+@@ -1587,18 +1686,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
+ */
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
+ i40e_del_mac_filter(vsi, netdev->dev_addr);
+- ether_addr_copy(netdev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(netdev, addr->sa_data);
+ i40e_add_mac_filter(vsi, netdev->dev_addr);
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+ if (vsi->type == I40E_VSI_MAIN) {
+- i40e_status ret;
++ int ret;
+
+ ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL,
+ addr->sa_data, NULL);
+ if (ret)
+- netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n",
+- i40e_stat_str(hw, ret),
++ netdev_info(netdev, "Ignoring error from firmware on LAA update, status %d, AQ ret %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ }
+
+@@ -1629,8 +1728,8 @@ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+ ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Cannot set RSS key, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Cannot set RSS key, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+@@ -1641,8 +1740,8 @@ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+ ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Cannot set RSS lut, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Cannot set RSS lut, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+@@ -1790,6 +1889,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+ bool is_add)
+ {
+ struct i40e_pf *pf = vsi->back;
++ u16 num_tc_qps = 0;
+ u16 sections = 0;
+ u8 netdev_tc = 0;
+ u16 numtc = 1;
+@@ -1797,13 +1897,37 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+ u8 offset;
+ u16 qmap;
+ int i;
+- u16 num_tc_qps = 0;
+
+ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ offset = 0;
++ /* zero out queue mapping, it will get updated on the end of the function */
++ memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping));
++
++ if (vsi->type == I40E_VSI_MAIN) {
++ /* This code helps add more queue to the VSI if we have
++ * more cores than RSS can support, the higher cores will
++ * be served by ATR or other filters. Furthermore, the
++ * non-zero req_queue_pairs says that user requested a new
++ * queue count via ethtool's set_channels, so use this
++ * value for queues distribution across traffic classes
++ * We need at least one queue pair for the interface
++ * to be usable as we see in else statement.
++ */
++ if (vsi->req_queue_pairs > 0)
++ vsi->num_queue_pairs = vsi->req_queue_pairs;
++ else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
++ vsi->num_queue_pairs = pf->num_lan_msix;
++ else
++ vsi->num_queue_pairs = 1;
++ }
+
+ /* Number of queues per enabled TC */
+- num_tc_qps = vsi->alloc_queue_pairs;
++ if (vsi->type == I40E_VSI_MAIN ||
++ (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0))
++ num_tc_qps = vsi->num_queue_pairs;
++ else
++ num_tc_qps = vsi->alloc_queue_pairs;
++
+ if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+ /* Find numtc from enabled TC bitmap */
+ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+@@ -1881,15 +2005,11 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+ }
+ ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+ }
+-
+- /* Set actual Tx/Rx queue pairs */
+- vsi->num_queue_pairs = offset;
+- if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) {
+- if (vsi->req_queue_pairs > 0)
+- vsi->num_queue_pairs = vsi->req_queue_pairs;
+- else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+- vsi->num_queue_pairs = pf->num_lan_msix;
+- }
++ /* Do not change previously set num_queue_pairs for PFs and VFs*/
++ if ((vsi->type == I40E_VSI_MAIN && numtc != 1) ||
++ (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) ||
++ (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV))
++ vsi->num_queue_pairs = offset;
+
+ /* Scheduler section valid can only be set for ADD VSI */
+ if (is_add) {
+@@ -2019,6 +2139,7 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
+ hlist_for_each_entry_safe(new, h, from, hlist) {
+ /* We can simply free the wrapper structure */
+ hlist_del(&new->hlist);
++ netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
+ kfree(new);
+ }
+ }
+@@ -2101,7 +2222,7 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
+ int num_del, int *retval)
+ {
+ struct i40e_hw *hw = &vsi->back->hw;
+- i40e_status aq_ret;
++ int aq_ret;
+ int aq_err;
+
+ aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL);
+@@ -2111,8 +2232,8 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
+ if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
+ *retval = -EIO;
+ dev_info(&vsi->back->pdev->dev,
+- "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
+- vsi_name, i40e_stat_str(hw, aq_ret),
++ "ignoring delete macvlan error on %s, err %d, aq_err %s\n",
++ vsi_name, aq_ret,
+ i40e_aq_str(hw, aq_err));
+ }
+ }
+@@ -2174,13 +2295,13 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
+ *
+ * Returns status indicating success or failure;
+ **/
+-static i40e_status
++static int
+ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
+ struct i40e_mac_filter *f)
+ {
+ bool enable = f->state == I40E_FILTER_NEW;
+ struct i40e_hw *hw = &vsi->back->hw;
+- i40e_status aq_ret;
++ int aq_ret;
+
+ if (f->vlan == I40E_VLAN_ANY) {
+ aq_ret = i40e_aq_set_vsi_broadcast(hw,
+@@ -2219,7 +2340,7 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
+ {
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status aq_ret;
++ int aq_ret;
+
+ if (vsi->type == I40E_VSI_MAIN &&
+ pf->lan_veb != I40E_NO_VEB &&
+@@ -2239,8 +2360,8 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
+ NULL);
+ if (aq_ret) {
+ dev_info(&pf->pdev->dev,
+- "Set default VSI failed, err %s, aq_err %s\n",
+- i40e_stat_str(hw, aq_ret),
++ "Set default VSI failed, err %d, aq_err %s\n",
++ aq_ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ }
+ } else {
+@@ -2251,8 +2372,8 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
+ true);
+ if (aq_ret) {
+ dev_info(&pf->pdev->dev,
+- "set unicast promisc failed, err %s, aq_err %s\n",
+- i40e_stat_str(hw, aq_ret),
++ "set unicast promisc failed, err %d, aq_err %s\n",
++ aq_ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ }
+ aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
+@@ -2261,8 +2382,8 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
+ promisc, NULL);
+ if (aq_ret) {
+ dev_info(&pf->pdev->dev,
+- "set multicast promisc failed, err %s, aq_err %s\n",
+- i40e_stat_str(hw, aq_ret),
++ "set multicast promisc failed, err %d, aq_err %s\n",
++ aq_ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ }
+ }
+@@ -2292,12 +2413,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+ unsigned int vlan_filters = 0;
+ char vsi_name[16] = "PF";
+ int filter_list_len = 0;
+- i40e_status aq_ret = 0;
+ u32 changed_flags = 0;
+ struct hlist_node *h;
+ struct i40e_pf *pf;
+ int num_add = 0;
+ int num_del = 0;
++ int aq_ret = 0;
+ int retval = 0;
+ u16 cmd_flags;
+ int list_size;
+@@ -2366,6 +2487,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+ &tmp_add_list,
+ &tmp_del_list,
+ vlan_filters);
++
++ hlist_for_each_entry(new, &tmp_add_list, hlist)
++ netdev_hw_addr_refcnt(new->f, vsi->netdev, 1);
++
+ if (retval)
+ goto err_no_memory_locked;
+
+@@ -2498,6 +2623,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+ if (new->f->state == I40E_FILTER_NEW)
+ new->f->state = new->state;
+ hlist_del(&new->hlist);
++ netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
+ kfree(new);
+ }
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
+@@ -2556,9 +2682,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+ retval = i40e_aq_rc_to_posix(aq_ret,
+ hw->aq.asq_last_status);
+ dev_info(&pf->pdev->dev,
+- "set multi promisc failed on %s, err %s aq_err %s\n",
++ "set multi promisc failed on %s, err %d aq_err %s\n",
+ vsi_name,
+- i40e_stat_str(hw, aq_ret),
++ aq_ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ } else {
+ dev_info(&pf->pdev->dev, "%s allmulti mode.\n",
+@@ -2576,10 +2702,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+ retval = i40e_aq_rc_to_posix(aq_ret,
+ hw->aq.asq_last_status);
+ dev_info(&pf->pdev->dev,
+- "Setting promiscuous %s failed on %s, err %s aq_err %s\n",
++ "Setting promiscuous %s failed on %s, err %d aq_err %s\n",
+ cur_promisc ? "on" : "off",
+ vsi_name,
+- i40e_stat_str(hw, aq_ret),
++ aq_ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ }
+ }
+@@ -2623,7 +2749,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
+
+ for (v = 0; v < pf->num_alloc_vsi; v++) {
+ if (pf->vsi[v] &&
+- (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) {
++ (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
++ !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) {
+ int ret = i40e_sync_vsi_filters(pf->vsi[v]);
+
+ if (ret) {
+@@ -2662,7 +2789,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
+ struct i40e_pf *pf = vsi->back;
+
+ if (i40e_enabled_xdp_vsi(vsi)) {
+- int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
++ int frame_size = new_mtu + I40E_PACKET_HDR_PAD;
+
+ if (frame_size > i40e_max_xdp_frame_size(vsi))
+ return -EINVAL;
+@@ -2706,7 +2833,7 @@ int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
+ {
+ struct i40e_vsi_context ctxt;
+- i40e_status ret;
++ int ret;
+
+ /* Don't modify stripping options if a port VLAN is active */
+ if (vsi->info.pvid)
+@@ -2726,8 +2853,8 @@ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
+ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+- "update vlan stripping failed, err %s aq_err %s\n",
+- i40e_stat_str(&vsi->back->hw, ret),
++ "update vlan stripping failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&vsi->back->hw,
+ vsi->back->hw.aq.asq_last_status));
+ }
+@@ -2740,7 +2867,7 @@ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
+ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
+ {
+ struct i40e_vsi_context ctxt;
+- i40e_status ret;
++ int ret;
+
+ /* Don't modify stripping options if a port VLAN is active */
+ if (vsi->info.pvid)
+@@ -2761,8 +2888,8 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
+ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+- "update vlan stripping failed, err %s aq_err %s\n",
+- i40e_stat_str(&vsi->back->hw, ret),
++ "update vlan stripping failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&vsi->back->hw,
+ vsi->back->hw.aq.asq_last_status));
+ }
+@@ -2980,7 +3107,7 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi)
+ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
+ {
+ struct i40e_vsi_context ctxt;
+- i40e_status ret;
++ int ret;
+
+ vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+ vsi->info.pvid = cpu_to_le16(vid);
+@@ -2993,8 +3120,8 @@ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
+ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+- "add pvid failed, err %s aq_err %s\n",
+- i40e_stat_str(&vsi->back->hw, ret),
++ "add pvid failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&vsi->back->hw,
+ vsi->back->hw.aq.asq_last_status));
+ return -ENOENT;
+@@ -3157,8 +3284,8 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
+ u16 pf_q = vsi->base_queue + ring->queue_index;
+ struct i40e_hw *hw = &vsi->back->hw;
+ struct i40e_hmc_obj_txq tx_ctx;
+- i40e_status err = 0;
+ u32 qtx_ctl = 0;
++ int err = 0;
+
+ if (ring_is_xdp(ring))
+ ring->xsk_pool = i40e_xsk_pool(ring);
+@@ -3282,7 +3409,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
+ u16 pf_q = vsi->base_queue + ring->queue_index;
+ struct i40e_hw *hw = &vsi->back->hw;
+ struct i40e_hmc_obj_rxq rx_ctx;
+- i40e_status err = 0;
++ int err = 0;
+ bool ok;
+ int ret;
+
+@@ -3294,12 +3421,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
+ if (ring->vsi->type == I40E_VSI_MAIN)
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+- kfree(ring->rx_bi);
+ ring->xsk_pool = i40e_xsk_pool(ring);
+ if (ring->xsk_pool) {
+- ret = i40e_alloc_rx_bi_zc(ring);
+- if (ret)
+- return ret;
+ ring->rx_buf_len =
+ xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ /* For AF_XDP ZC, we disallow packets to span on
+@@ -3317,9 +3440,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
+ ring->queue_index);
+
+ } else {
+- ret = i40e_alloc_rx_bi(ring);
+- if (ret)
+- return ret;
+ ring->rx_buf_len = vsi->rx_buf_len;
+ if (ring->vsi->type == I40E_VSI_MAIN) {
+ ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+@@ -3428,6 +3548,24 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
+ return err;
+ }
+
++/**
++ * i40e_calculate_vsi_rx_buf_len - Calculates buffer length
++ *
++ * @vsi: VSI to calculate rx_buf_len from
++ */
++static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
++{
++ if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
++ return I40E_RXBUFFER_2048;
++
++#if (PAGE_SIZE < 8192)
++ if (!I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN)
++ return I40E_RXBUFFER_1536 - NET_IP_ALIGN;
++#endif
++
++ return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
++}
++
+ /**
+ * i40e_vsi_configure_rx - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+@@ -3439,20 +3577,14 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
+ int err = 0;
+ u16 i;
+
+- if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
+- vsi->max_frame = I40E_MAX_RXBUFFER;
+- vsi->rx_buf_len = I40E_RXBUFFER_2048;
++ vsi->max_frame = I40E_MAX_RXBUFFER;
++ vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
++
+ #if (PAGE_SIZE < 8192)
+- } else if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
+- (vsi->netdev->mtu <= ETH_DATA_LEN)) {
++ if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING &&
++ vsi->netdev->mtu <= ETH_DATA_LEN)
+ vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+- vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+ #endif
+- } else {
+- vsi->max_frame = I40E_MAX_RXBUFFER;
+- vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 :
+- I40E_RXBUFFER_2048;
+- }
+
+ /* set up individual rings */
+ for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+@@ -5193,7 +5325,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
+ u8 num_tc = 0;
+ struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
+
+- if (pf->flags & I40E_FLAG_TC_MQPRIO)
++ if (i40e_is_tc_mqprio_enabled(pf))
+ return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+
+ /* If neither MQPRIO nor DCB is enabled, then always use single TC */
+@@ -5225,7 +5357,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
+ **/
+ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
+ {
+- if (pf->flags & I40E_FLAG_TC_MQPRIO)
++ if (i40e_is_tc_mqprio_enabled(pf))
+ return i40e_mqprio_get_enabled_tc(pf);
+
+ /* If neither MQPRIO nor DCB is enabled for this PF then just return
+@@ -5257,16 +5389,16 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
+ struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status ret;
+ u32 tc_bw_max;
++ int ret;
+ int i;
+
+ /* Get the VSI level BW configuration */
+ ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get PF vsi bw config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get PF vsi bw config, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EINVAL;
+ }
+@@ -5276,8 +5408,8 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
+ NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get PF vsi ets bw config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get PF vsi ets bw config, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EINVAL;
+ }
+@@ -5318,11 +5450,11 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
+ {
+ struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+ struct i40e_pf *pf = vsi->back;
+- i40e_status ret;
++ int ret;
+ int i;
+
+ /* There is no need to reset BW when mqprio mode is on. */
+- if (pf->flags & I40E_FLAG_TC_MQPRIO)
++ if (i40e_is_tc_mqprio_enabled(pf))
+ return 0;
+ if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+@@ -5394,7 +5526,7 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+ vsi->tc_config.tc_info[i].qoffset);
+ }
+
+- if (pf->flags & I40E_FLAG_TC_MQPRIO)
++ if (i40e_is_tc_mqprio_enabled(pf))
+ return;
+
+ /* Assign UP2TC map for the VSI */
+@@ -5426,6 +5558,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
+ sizeof(vsi->info.tc_mapping));
+ }
+
++/**
++ * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI
++ * @vsi: the VSI being reconfigured
++ * @vsi_offset: offset from main VF VSI
++ */
++int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset)
++{
++ struct i40e_vsi_context ctxt = {};
++ struct i40e_pf *pf;
++ struct i40e_hw *hw;
++ int ret;
++
++ if (!vsi)
++ return I40E_ERR_PARAM;
++ pf = vsi->back;
++ hw = &pf->hw;
++
++ ctxt.seid = vsi->seid;
++ ctxt.pf_num = hw->pf_id;
++ ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset;
++ ctxt.uplink_seid = vsi->uplink_seid;
++ ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
++ ctxt.flags = I40E_AQ_VSI_TYPE_VF;
++ ctxt.info = vsi->info;
++
++ i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc,
++ false);
++ if (vsi->reconfig_rss) {
++ vsi->rss_size = min_t(int, pf->alloc_rss_size,
++ vsi->num_queue_pairs);
++ ret = i40e_vsi_config_rss(vsi);
++ if (ret) {
++ dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n");
++ return ret;
++ }
++ vsi->reconfig_rss = false;
++ }
++
++ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
++ if (ret) {
++ dev_info(&pf->pdev->dev, "Update vsi config failed, err %d aq_err %s\n",
++ ret,
++ i40e_aq_str(hw, hw->aq.asq_last_status));
++ return ret;
++ }
++ /* update the local VSI info with updated queue map */
++ i40e_vsi_update_queue_map(vsi, &ctxt);
++ vsi->info.valid_sections = 0;
++
++ return ret;
++}
++
+ /**
+ * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map
+ * @vsi: VSI to be configured
+@@ -5470,8 +5654,8 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+ &bw_config, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Failed querying vsi bw info, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Failed querying vsi bw info, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ goto out;
+ }
+@@ -5503,7 +5687,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+ ctxt.vf_num = 0;
+ ctxt.uplink_seid = vsi->uplink_seid;
+ ctxt.info = vsi->info;
+- if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
++ if (i40e_is_tc_mqprio_enabled(pf)) {
+ ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+ if (ret)
+ goto out;
+@@ -5537,8 +5721,8 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Update vsi tc config failed, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Update vsi tc config failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ goto out;
+ }
+@@ -5550,8 +5734,8 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+ ret = i40e_vsi_get_bw_info(vsi);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Failed updating vsi bw info, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Failed updating vsi bw info, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ goto out;
+ }
+@@ -5587,6 +5771,26 @@ static int i40e_get_link_speed(struct i40e_vsi *vsi)
+ }
+ }
+
++/**
++ * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits
++ * @vsi: Pointer to vsi structure
++ * @max_tx_rate: max TX rate in bytes to be converted into Mbits
++ *
++ * Helper function to convert units before send to set BW limit
++ **/
++static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate)
++{
++ if (max_tx_rate < I40E_BW_MBPS_DIVISOR) {
++ dev_warn(&vsi->back->pdev->dev,
++ "Setting max tx rate to minimum usable value of 50Mbps.\n");
++ max_tx_rate = I40E_BW_CREDIT_DIVISOR;
++ } else {
++ do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
++ }
++
++ return max_tx_rate;
++}
++
+ /**
+ * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+@@ -5609,10 +5813,10 @@ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
+ max_tx_rate, seid);
+ return -EINVAL;
+ }
+- if (max_tx_rate && max_tx_rate < 50) {
++ if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) {
+ dev_warn(&pf->pdev->dev,
+ "Setting max tx rate to minimum usable value of 50Mbps.\n");
+- max_tx_rate = 50;
++ max_tx_rate = I40E_BW_CREDIT_DIVISOR;
+ }
+
+ /* Tx rate credits are in values of 50Mbps, 0 is disabled */
+@@ -5622,8 +5826,8 @@ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
+ I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+- "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n",
+- max_tx_rate, seid, i40e_stat_str(&pf->hw, ret),
++ "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %d aq_err %s\n",
++ max_tx_rate, seid, ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return ret;
+ }
+@@ -5698,8 +5902,8 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+ last_aq_status = pf->hw.aq.asq_last_status;
+ if (ret)
+ dev_info(&pf->pdev->dev,
+- "Failed to delete cloud filter, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed to delete cloud filter, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, last_aq_status));
+ kfree(cfilter);
+ }
+@@ -5716,24 +5920,6 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+ INIT_LIST_HEAD(&vsi->ch_list);
+ }
+
+-/**
+- * i40e_is_any_channel - channel exist or not
+- * @vsi: ptr to VSI to which channels are associated with
+- *
+- * Returns true or false if channel(s) exist for associated VSI or not
+- **/
+-static bool i40e_is_any_channel(struct i40e_vsi *vsi)
+-{
+- struct i40e_channel *ch, *ch_tmp;
+-
+- list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+- if (ch->initialized)
+- return true;
+- }
+-
+- return false;
+-}
+-
+ /**
+ * i40e_get_max_queues_for_channel
+ * @vsi: ptr to VSI to which channels are associated with
+@@ -5851,8 +6037,8 @@ static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
+ ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Cannot set RSS lut, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Cannot set RSS lut, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ kfree(lut);
+ return ret;
+@@ -5950,8 +6136,8 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
+ ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "add new vsi failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "add new vsi failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return -ENOENT;
+@@ -5982,7 +6168,7 @@ static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
+ u8 *bw_share)
+ {
+ struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+- i40e_status ret;
++ int ret;
+ int i;
+
+ memset(&bw_data, 0, sizeof(bw_data));
+@@ -6018,9 +6204,9 @@ static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
+ struct i40e_vsi *vsi,
+ struct i40e_channel *ch)
+ {
+- i40e_status ret;
+- int i;
+ u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
++ int ret;
++ int i;
+
+ /* Enable ETS TCs with equal BW Share for now across all VSIs */
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+@@ -6196,8 +6382,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
+ mode, NULL);
+ if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
+ dev_err(&pf->pdev->dev,
+- "couldn't set switch config bits, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "couldn't set switch config bits, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw,
+ hw->aq.asq_last_status));
+
+@@ -6240,26 +6426,15 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi,
+ /* By default we are in VEPA mode, if this is the first VF/VMDq
+ * VSI to be added switch to VEB mode.
+ */
+- if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
+- (!i40e_is_any_channel(vsi))) {
+- if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
+- dev_dbg(&pf->pdev->dev,
+- "Failed to create channel. Override queues (%u) not power of 2\n",
+- vsi->tc_config.tc_info[0].qcount);
+- return -EINVAL;
+- }
+
+- if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+- pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
++ if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
++ pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+
+- if (vsi->type == I40E_VSI_MAIN) {
+- if (pf->flags & I40E_FLAG_TC_MQPRIO)
+- i40e_do_reset(pf, I40E_PF_RESET_FLAG,
+- true);
+- else
+- i40e_do_reset_safe(pf,
+- I40E_PF_RESET_FLAG);
+- }
++ if (vsi->type == I40E_VSI_MAIN) {
++ if (i40e_is_tc_mqprio_enabled(pf))
++ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
++ else
++ i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
+ }
+ /* now onwards for main VSI, number of queues will be value
+ * of TC0's queue count
+@@ -6367,6 +6542,9 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+ vsi->tc_seid_map[i] = ch->seid;
+ }
+ }
++
++ /* reset to reconfigure TX queue contexts */
++ i40e_do_reset(vsi->back, I40E_PF_RESET_FLAG, true);
+ return ret;
+
+ err_free:
+@@ -6405,8 +6583,8 @@ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ &bw_data, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "VEB bw config failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "VEB bw config failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto out;
+ }
+@@ -6415,8 +6593,8 @@ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+ ret = i40e_veb_get_bw_info(veb);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Failed getting veb bw config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed getting veb bw config, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ }
+
+@@ -6499,8 +6677,8 @@ static int i40e_resume_port_tx(struct i40e_pf *pf)
+ ret = i40e_aq_resume_port_tx(hw, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Resume Port Tx failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Resume Port Tx failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ /* Schedule PF reset to recover */
+ set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+@@ -6524,8 +6702,8 @@ static int i40e_suspend_port_tx(struct i40e_pf *pf)
+ ret = i40e_aq_suspend_port_tx(hw, pf->mac_seid, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Suspend Port Tx failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Suspend Port Tx failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ /* Schedule PF reset to recover */
+ set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+@@ -6564,8 +6742,8 @@ static int i40e_hw_set_dcb_config(struct i40e_pf *pf,
+ ret = i40e_set_dcb_config(&pf->hw);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Set DCB Config failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Set DCB Config failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto out;
+ }
+@@ -6681,8 +6859,8 @@ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
+ i40e_aqc_opc_modify_switching_comp_ets, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Modify Port ETS failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Modify Port ETS failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto out;
+ }
+@@ -6719,8 +6897,8 @@ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
+ ret = i40e_aq_dcb_updated(&pf->hw, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "DCB Updated failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "DCB Updated failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto out;
+ }
+@@ -6803,8 +6981,8 @@ int i40e_dcb_sw_default_config(struct i40e_pf *pf)
+ i40e_aqc_opc_enable_switching_comp_ets, NULL);
+ if (err) {
+ dev_info(&pf->pdev->dev,
+- "Enable Port ETS failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, err),
++ "Enable Port ETS failed, err %d aq_err %s\n",
++ err,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ err = -ENOENT;
+ goto out;
+@@ -6883,8 +7061,8 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
+ pf->flags |= I40E_FLAG_DISABLE_FW_LLDP;
+ } else {
+ dev_info(&pf->pdev->dev,
+- "Query for DCB configuration failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, err),
++ "Query for DCB configuration failed, err %d aq_err %s\n",
++ err,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ }
+
+@@ -7102,15 +7280,15 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+-static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
++static int i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+ {
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ struct i40e_aq_set_phy_config config = {0};
+ bool non_zero_phy_type = is_up;
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status err;
+ u64 mask;
+ u8 speed;
++ int err;
+
+ /* Card might've been put in an unstable state by other drivers
+ * and applications, which causes incorrect speed values being
+@@ -7122,8 +7300,8 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+ NULL);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+- "failed to get phy cap., ret = %s last_status = %s\n",
+- i40e_stat_str(hw, err),
++ "failed to get phy cap., ret = %d last_status = %s\n",
++ err,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return err;
+ }
+@@ -7134,8 +7312,8 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+ NULL);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+- "failed to get phy cap., ret = %s last_status = %s\n",
+- i40e_stat_str(hw, err),
++ "failed to get phy cap., ret = %d last_status = %s\n",
++ err,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return err;
+ }
+@@ -7179,8 +7357,8 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+
+ if (err) {
+ dev_err(&pf->pdev->dev,
+- "set phy config ret = %s last_status = %s\n",
+- i40e_stat_str(&pf->hw, err),
++ "set phy config ret = %d last_status = %s\n",
++ err,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return err;
+ }
+@@ -7343,11 +7521,11 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
+ * This function deletes a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+-static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
+- const u8 *macaddr, int *aq_err)
++static int i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
++ const u8 *macaddr, int *aq_err)
+ {
+ struct i40e_aqc_remove_macvlan_element_data element;
+- i40e_status status;
++ int status;
+
+ memset(&element, 0, sizeof(element));
+ ether_addr_copy(element.mac_addr, macaddr);
+@@ -7369,12 +7547,12 @@ static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
+ * This function adds a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+-static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
+- const u8 *macaddr, int *aq_err)
++static int i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
++ const u8 *macaddr, int *aq_err)
+ {
+ struct i40e_aqc_add_macvlan_element_data element;
+- i40e_status status;
+ u16 cmd_flags = 0;
++ int status;
+
+ ether_addr_copy(element.mac_addr, macaddr);
+ element.vlan_tag = 0;
+@@ -7464,42 +7642,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
+ static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
+ struct i40e_fwd_adapter *fwd)
+ {
++ struct i40e_channel *ch = NULL, *ch_tmp, *iter;
+ int ret = 0, num_tc = 1, i, aq_err;
+- struct i40e_channel *ch, *ch_tmp;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+
+- if (list_empty(&vsi->macvlan_list))
+- return -EINVAL;
+-
+ /* Go through the list and find an available channel */
+- list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+- if (!i40e_is_channel_macvlan(ch)) {
+- ch->fwd = fwd;
++ list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) {
++ if (!i40e_is_channel_macvlan(iter)) {
++ iter->fwd = fwd;
+ /* record configuration for macvlan interface in vdev */
+ for (i = 0; i < num_tc; i++)
+ netdev_bind_sb_channel_queue(vsi->netdev, vdev,
+ i,
+- ch->num_queue_pairs,
+- ch->base_queue);
+- for (i = 0; i < ch->num_queue_pairs; i++) {
++ iter->num_queue_pairs,
++ iter->base_queue);
++ for (i = 0; i < iter->num_queue_pairs; i++) {
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+
+- pf_q = ch->base_queue + i;
++ pf_q = iter->base_queue + i;
+
+ /* Get to TX ring ptr */
+ tx_ring = vsi->tx_rings[pf_q];
+- tx_ring->ch = ch;
++ tx_ring->ch = iter;
+
+ /* Get the RX ring ptr */
+ rx_ring = vsi->rx_rings[pf_q];
+- rx_ring->ch = ch;
++ rx_ring->ch = iter;
+ }
++ ch = iter;
+ break;
+ }
+ }
+
++ if (!ch)
++ return -EINVAL;
++
+ /* Guarantee all rings are updated before we update the
+ * MAC address filter.
+ */
+@@ -7519,8 +7698,8 @@ static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
+ rx_ring->netdev = NULL;
+ }
+ dev_info(&pf->pdev->dev,
+- "Error adding mac filter on macvlan err %s, aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Error adding mac filter on macvlan err %d, aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, aq_err));
+ netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
+ }
+@@ -7592,8 +7771,8 @@ static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Update vsi tc config failed, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Update vsi tc config failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+@@ -7649,7 +7828,7 @@ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
+ netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
+ return ERR_PTR(-EINVAL);
+ }
+- if ((pf->flags & I40E_FLAG_TC_MQPRIO)) {
++ if (i40e_is_tc_mqprio_enabled(pf)) {
+ netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
+ return ERR_PTR(-EINVAL);
+ }
+@@ -7808,8 +7987,8 @@ static void i40e_fwd_del(struct net_device *netdev, void *vdev)
+ ch->fwd = NULL;
+ } else {
+ dev_info(&pf->pdev->dev,
+- "Error deleting mac filter on macvlan err %s, aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ "Error deleting mac filter on macvlan err %d, aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, aq_err));
+ }
+ break;
+@@ -7902,7 +8081,7 @@ config_tc:
+ /* Quiesce VSI queues */
+ i40e_quiesce_vsi(vsi);
+
+- if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
++ if (!hw && !i40e_is_tc_mqprio_enabled(pf))
+ i40e_remove_queue_channels(vsi);
+
+ /* Configure VSI for enabled TCs */
+@@ -7912,17 +8091,25 @@ config_tc:
+ vsi->seid);
+ need_reset = true;
+ goto exit;
+- } else {
+- dev_info(&vsi->back->pdev->dev,
+- "Setup channel (id:%u) utilizing num_queues %d\n",
+- vsi->seid, vsi->tc_config.tc_info[0].qcount);
++ } else if (enabled_tc &&
++ (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) {
++ netdev_info(netdev,
++ "Failed to create channel. Override queues (%u) not power of 2\n",
++ vsi->tc_config.tc_info[0].qcount);
++ ret = -EINVAL;
++ need_reset = true;
++ goto exit;
+ }
+
+- if (pf->flags & I40E_FLAG_TC_MQPRIO) {
++ dev_info(&vsi->back->pdev->dev,
++ "Setup channel (id:%u) utilizing num_queues %d\n",
++ vsi->seid, vsi->tc_config.tc_info[0].qcount);
++
++ if (i40e_is_tc_mqprio_enabled(pf)) {
+ if (vsi->mqprio_qopt.max_rate[0]) {
+- u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
++ u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
++ vsi->mqprio_qopt.max_rate[0]);
+
+- do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ if (!ret) {
+ u64 credits = max_tx_rate;
+@@ -8443,6 +8630,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
+ return -EOPNOTSUPP;
+ }
+
++ if (!tc) {
++ dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination");
++ return -EINVAL;
++ }
++
+ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+ test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+ return -EBUSY;
+@@ -8482,9 +8674,8 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
+ err = i40e_add_del_cloud_filter(vsi, filter, true);
+
+ if (err) {
+- dev_err(&pf->pdev->dev,
+- "Failed to add cloud filter, err %s\n",
+- i40e_stat_str(&pf->hw, err));
++ dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n",
++ err);
+ goto err;
+ }
+
+@@ -8548,8 +8739,7 @@ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
+ kfree(filter);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+- "Failed to delete cloud filter, err %s\n",
+- i40e_stat_str(&pf->hw, err));
++ "Failed to delete cloud filter, err %d\n", err);
+ return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
+ }
+
+@@ -8668,6 +8858,27 @@ int i40e_open(struct net_device *netdev)
+ return 0;
+ }
+
++/**
++ * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues
++ * @vsi: vsi structure
++ *
++ * This updates netdev's number of tx/rx queues
++ *
++ * Returns status of setting tx/rx queues
++ **/
++static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi)
++{
++ int ret;
++
++ ret = netif_set_real_num_rx_queues(vsi->netdev,
++ vsi->num_queue_pairs);
++ if (ret)
++ return ret;
++
++ return netif_set_real_num_tx_queues(vsi->netdev,
++ vsi->num_queue_pairs);
++}
++
+ /**
+ * i40e_vsi_open -
+ * @vsi: the VSI to open
+@@ -8704,13 +8915,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi)
+ goto err_setup_rx;
+
+ /* Notify the stack of the actual queue counts. */
+- err = netif_set_real_num_tx_queues(vsi->netdev,
+- vsi->num_queue_pairs);
+- if (err)
+- goto err_set_queues;
+-
+- err = netif_set_real_num_rx_queues(vsi->netdev,
+- vsi->num_queue_pairs);
++ err = i40e_netif_set_realnum_tx_rx_queues(vsi);
+ if (err)
+ goto err_set_queues;
+
+@@ -9096,8 +9301,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
+ pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+ } else {
+ dev_info(&pf->pdev->dev,
+- "Failed querying DCB configuration data from firmware, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed querying DCB configuration data from firmware, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ }
+@@ -9545,8 +9750,8 @@ static void i40e_link_event(struct i40e_pf *pf)
+ {
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ u8 new_link_speed, old_link_speed;
+- i40e_status status;
+ bool new_link, old_link;
++ int status;
+ #ifdef CONFIG_I40E_DCB
+ int err;
+ #endif /* CONFIG_I40E_DCB */
+@@ -9757,9 +9962,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
+ struct i40e_arq_event_info event;
+ struct i40e_hw *hw = &pf->hw;
+ u16 pending, i = 0;
+- i40e_status ret;
+ u16 opcode;
+ u32 oldval;
++ int ret;
+ u32 val;
+
+ /* Do not run clean AQ when PF reset fails */
+@@ -9923,8 +10128,8 @@ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf)
+ ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get PF vsi config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get PF vsi config, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return;
+ }
+@@ -9935,8 +10140,8 @@ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf)
+ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "update vsi switch failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "update vsi switch failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ }
+ }
+@@ -9959,8 +10164,8 @@ static void i40e_disable_pf_switch_lb(struct i40e_pf *pf)
+ ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get PF vsi config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get PF vsi config, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return;
+ }
+@@ -9971,8 +10176,8 @@ static void i40e_disable_pf_switch_lb(struct i40e_pf *pf)
+ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "update vsi switch failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "update vsi switch failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ }
+ }
+@@ -10116,8 +10321,8 @@ static int i40e_get_capabilities(struct i40e_pf *pf,
+ buf_len = data_size;
+ } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) {
+ dev_info(&pf->pdev->dev,
+- "capability discovery failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, err),
++ "capability discovery failed, err %d aq_err %s\n",
++ err,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return -ENODEV;
+@@ -10238,7 +10443,7 @@ static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+ struct i40e_cloud_filter *cfilter;
+ struct i40e_pf *pf = vsi->back;
+ struct hlist_node *node;
+- i40e_status ret;
++ int ret;
+
+ /* Add cloud filters back if they exist */
+ hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
+@@ -10254,8 +10459,8 @@ static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+
+ if (ret) {
+ dev_dbg(&pf->pdev->dev,
+- "Failed to rebuild cloud filter, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Failed to rebuild cloud filter, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return ret;
+@@ -10273,7 +10478,7 @@ static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
+ {
+ struct i40e_channel *ch, *ch_tmp;
+- i40e_status ret;
++ int ret;
+
+ if (list_empty(&vsi->ch_list))
+ return 0;
+@@ -10325,6 +10530,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
+ return 0;
+ }
+
++/**
++ * i40e_clean_xps_state - clean xps state for every tx_ring
++ * @vsi: ptr to the VSI
++ **/
++static void i40e_clean_xps_state(struct i40e_vsi *vsi)
++{
++ int i;
++
++ if (vsi->tx_rings)
++ for (i = 0; i < vsi->num_queue_pairs; i++)
++ if (vsi->tx_rings[i])
++ clear_bit(__I40E_TX_XPS_INIT_DONE,
++ vsi->tx_rings[i]->state);
++}
++
+ /**
+ * i40e_prep_for_reset - prep for the core to reset
+ * @pf: board private structure
+@@ -10334,7 +10554,7 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
+ static void i40e_prep_for_reset(struct i40e_pf *pf)
+ {
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status ret = 0;
++ int ret = 0;
+ u32 v;
+
+ clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
+@@ -10349,8 +10569,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf)
+ i40e_pf_quiesce_all_vsi(pf);
+
+ for (v = 0; v < pf->num_alloc_vsi; v++) {
+- if (pf->vsi[v])
++ if (pf->vsi[v]) {
++ i40e_clean_xps_state(pf->vsi[v]);
+ pf->vsi[v]->seid = 0;
++ }
+ }
+
+ i40e_shutdown_adminq(&pf->hw);
+@@ -10437,7 +10659,7 @@ static void i40e_get_oem_version(struct i40e_hw *hw)
+ static int i40e_reset(struct i40e_pf *pf)
+ {
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status ret;
++ int ret;
+
+ ret = i40e_pf_reset(hw);
+ if (ret) {
+@@ -10459,43 +10681,35 @@ static int i40e_reset(struct i40e_pf *pf)
+ **/
+ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ {
+- int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
++ const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status ret;
++ int ret;
+ u32 val;
+ int v;
+
+ if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+- i40e_check_recovery_mode(pf)) {
++ is_recovery_mode_reported)
+ i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
+- }
+
+ if (test_bit(__I40E_DOWN, pf->state) &&
+- !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
+- !old_recovery_mode_bit)
++ !test_bit(__I40E_RECOVERY_MODE, pf->state))
+ goto clear_recovery;
+ dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
+
+ /* rebuild the basics for the AdminQ, HMC, and initial HW switch */
+ ret = i40e_init_adminq(&pf->hw);
+ if (ret) {
+- dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto clear_recovery;
+ }
+ i40e_get_oem_version(&pf->hw);
+
+- if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+- ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
+- hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
+- /* The following delay is necessary for 4.33 firmware and older
+- * to recover after EMP reset. 200 ms should suffice but we
+- * put here 300 ms to be sure that FW is ready to operate
+- * after reset.
+- */
+- mdelay(300);
++ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) {
++ /* The following delay is necessary for firmware update. */
++ mdelay(1000);
+ }
+
+ /* re-verify the eeprom if we just had an EMP reset */
+@@ -10506,13 +10720,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ * accordingly with regard to resources initialization
+ * and deinitialization
+ */
+- if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
+- old_recovery_mode_bit) {
++ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+ if (i40e_get_capabilities(pf,
+ i40e_aqc_opc_list_func_capabilities))
+ goto end_unlock;
+
+- if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
++ if (is_recovery_mode_reported) {
+ /* we're staying in recovery mode so we'll reinitialize
+ * misc vector here
+ */
+@@ -10562,7 +10775,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ * unless I40E_FLAG_TC_MQPRIO was enabled or DCB
+ * is not supported with new link speed
+ */
+- if (pf->flags & I40E_FLAG_TC_MQPRIO) {
++ if (i40e_is_tc_mqprio_enabled(pf)) {
+ i40e_aq_set_dcb_parameters(hw, false, NULL);
+ } else {
+ if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
+@@ -10599,8 +10812,8 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ I40E_AQ_EVENT_MEDIA_NA |
+ I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
+ if (ret)
+- dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ dev_info(&pf->pdev->dev, "set phy mask fail, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+ /* Rebuild the VSIs and VEBs that existed before reset.
+@@ -10657,10 +10870,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ }
+
+ if (vsi->mqprio_qopt.max_rate[0]) {
+- u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
++ u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
++ vsi->mqprio_qopt.max_rate[0]);
+ u64 credits = 0;
+
+- do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ if (ret)
+ goto end_unlock;
+@@ -10703,14 +10916,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ msleep(75);
+ ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
+ if (ret)
+- dev_info(&pf->pdev->dev, "link restart failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ dev_info(&pf->pdev->dev, "link restart failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ }
+ /* reinit the misc interrupt */
+- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
++ if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+ ret = i40e_setup_misc_vector(pf);
++ if (ret)
++ goto end_unlock;
++ }
+
+ /* Add a filter to drop all Flow control frames from any VSI from being
+ * transmitted. By doing so we stop a malicious VF from sending out
+@@ -10732,9 +10948,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ ret = i40e_set_promiscuous(pf, pf->cur_promisc);
+ if (ret)
+ dev_warn(&pf->pdev->dev,
+- "Failed to restore promiscuous setting: %s, err %s aq_err %s\n",
++ "Failed to restore promiscuous setting: %s, err %d aq_err %s\n",
+ pf->cur_promisc ? "on" : "off",
+- i40e_stat_str(&pf->hw, ret),
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+ i40e_reset_all_vfs(pf, true);
+@@ -10766,6 +10982,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
+ bool lock_acquired)
+ {
+ int ret;
++
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return;
+ /* Now we wait for GRST to settle out.
+ * We don't have to delete the VEBs or VSIs from the hw switch
+ * because the reset will make them disappear.
+@@ -11705,7 +11924,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
+ return -ENOMEM;
+
+ pf->irq_pile->num_entries = vectors;
+- pf->irq_pile->search_hint = 0;
+
+ /* track first vector for misc interrupts, ignore return */
+ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1);
+@@ -11867,8 +12085,8 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+ (struct i40e_aqc_get_set_rss_key_data *)seed);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Cannot get RSS key, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Cannot get RSS key, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return ret;
+@@ -11881,8 +12099,8 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+ ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Cannot get RSS lut, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Cannot get RSS lut, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return ret;
+@@ -12126,6 +12344,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
+
+ vsi->req_queue_pairs = queue_count;
+ i40e_prep_for_reset(pf);
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return pf->alloc_rss_size;
+
+ pf->alloc_rss_size = new_rss_size;
+
+@@ -12155,11 +12375,11 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
+ * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition
+ * @pf: board private structure
+ **/
+-i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf)
++int i40e_get_partition_bw_setting(struct i40e_pf *pf)
+ {
+- i40e_status status;
+ bool min_valid, max_valid;
+ u32 max_bw, min_bw;
++ int status;
+
+ status = i40e_read_bw_from_alt_ram(&pf->hw, &max_bw, &min_bw,
+ &min_valid, &max_valid);
+@@ -12178,10 +12398,10 @@ i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf)
+ * i40e_set_partition_bw_setting - Set BW settings for this PF partition
+ * @pf: board private structure
+ **/
+-i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf)
++int i40e_set_partition_bw_setting(struct i40e_pf *pf)
+ {
+ struct i40e_aqc_configure_partition_bw_data bw_data;
+- i40e_status status;
++ int status;
+
+ memset(&bw_data, 0, sizeof(bw_data));
+
+@@ -12200,12 +12420,12 @@ i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf)
+ * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition
+ * @pf: board private structure
+ **/
+-i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
++int i40e_commit_partition_bw_setting(struct i40e_pf *pf)
+ {
+ /* Commit temporary BW setting to permanent NVM image */
+ enum i40e_admin_queue_err last_aq_status;
+- i40e_status ret;
+ u16 nvm_word;
++ int ret;
+
+ if (pf->hw.partition_id != 1) {
+ dev_info(&pf->pdev->dev,
+@@ -12220,8 +12440,8 @@ i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
+ last_aq_status = pf->hw.aq.asq_last_status;
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Cannot acquire NVM for read access, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Cannot acquire NVM for read access, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, last_aq_status));
+ goto bw_commit_out;
+ }
+@@ -12237,8 +12457,8 @@ i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
+ last_aq_status = pf->hw.aq.asq_last_status;
+ i40e_release_nvm(&pf->hw);
+ if (ret) {
+- dev_info(&pf->pdev->dev, "NVM read error, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ dev_info(&pf->pdev->dev, "NVM read error, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, last_aq_status));
+ goto bw_commit_out;
+ }
+@@ -12251,8 +12471,8 @@ i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
+ last_aq_status = pf->hw.aq.asq_last_status;
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "Cannot acquire NVM for write access, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "Cannot acquire NVM for write access, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, last_aq_status));
+ goto bw_commit_out;
+ }
+@@ -12271,8 +12491,8 @@ i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
+ i40e_release_nvm(&pf->hw);
+ if (ret)
+ dev_info(&pf->pdev->dev,
+- "BW settings NOT SAVED, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "BW settings NOT SAVED, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, last_aq_status));
+ bw_commit_out:
+
+@@ -12293,7 +12513,7 @@ static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf)
+ #define I40E_LINK_BEHAVIOR_WORD_LENGTH 0x1
+ #define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED BIT(0)
+ #define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH 4
+- i40e_status read_status = I40E_SUCCESS;
++ int read_status = I40E_SUCCESS;
+ u16 sr_emp_sr_settings_ptr = 0;
+ u16 features_enable = 0;
+ u16 link_behavior = 0;
+@@ -12326,8 +12546,8 @@ static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf)
+
+ err_nvm:
+ dev_warn(&pf->pdev->dev,
+- "total-port-shutdown feature is off due to read nvm error: %s\n",
+- i40e_stat_str(&pf->hw, read_status));
++ "total-port-shutdown feature is off due to read nvm error: %d\n",
++ read_status);
+ return ret;
+ }
+
+@@ -12508,7 +12728,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
+ goto sw_init_done;
+ }
+ pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp;
+- pf->qp_pile->search_hint = 0;
+
+ pf->tx_timeout_recovery_level = 1;
+
+@@ -12646,7 +12865,7 @@ static int i40e_udp_tunnel_set_port(struct net_device *netdev,
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_hw *hw = &np->vsi->back->hw;
+ u8 type, filter_index;
+- i40e_status ret;
++ int ret;
+
+ type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN :
+ I40E_AQC_TUNNEL_TYPE_NGE;
+@@ -12654,8 +12873,8 @@ static int i40e_udp_tunnel_set_port(struct net_device *netdev,
+ ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index,
+ NULL);
+ if (ret) {
+- netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ netdev_info(netdev, "add UDP port failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return -EIO;
+ }
+@@ -12670,12 +12889,12 @@ static int i40e_udp_tunnel_unset_port(struct net_device *netdev,
+ {
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_hw *hw = &np->vsi->back->hw;
+- i40e_status ret;
++ int ret;
+
+ ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL);
+ if (ret) {
+- netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n",
+- i40e_stat_str(hw, ret),
++ netdev_info(netdev, "delete UDP port failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return -EIO;
+ }
+@@ -12788,6 +13007,8 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
+ }
+
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
++ if (!br_spec)
++ return -EINVAL;
+
+ nla_for_each_nested(attr, br_spec, rem) {
+ __u16 mode;
+@@ -12942,7 +13163,7 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
+ int i;
+
+ /* Don't allow frames that span over multiple buffers */
+- if (frame_size > vsi->rx_buf_len) {
++ if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
+ return -EINVAL;
+ }
+@@ -12953,6 +13174,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
+ if (need_reset)
+ i40e_prep_for_reset(pf);
+
++ /* VSI shall be deleted in a moment, just return EINVAL */
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return -EINVAL;
++
+ old_prog = xchg(&vsi->xdp_prog, prog);
+
+ if (need_reset) {
+@@ -12962,6 +13187,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
+ i40e_reset_and_rebuild(pf, true, true);
+ }
+
++ if (!i40e_enabled_xdp_vsi(vsi) && prog) {
++ if (i40e_realloc_rx_bi_zc(vsi, true))
++ return -ENOMEM;
++ } else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
++ if (i40e_realloc_rx_bi_zc(vsi, false))
++ return -ENOMEM;
++ }
++
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+
+@@ -13194,6 +13427,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
+
+ i40e_queue_pair_disable_irq(vsi, queue_pair);
+ err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
++ i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
+ i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
+ i40e_queue_pair_clean_rings(vsi, queue_pair);
+ i40e_queue_pair_reset_stats(vsi, queue_pair);
+@@ -13425,7 +13659,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
+ i40e_add_mac_filter(vsi, broadcast);
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+- ether_addr_copy(netdev->dev_addr, mac_addr);
++ eth_hw_addr_set(netdev, mac_addr);
+ ether_addr_copy(netdev->perm_addr, mac_addr);
+
+ /* i40iw_net_event() reads 16 bytes from neigh->primary_key */
+@@ -13531,8 +13765,8 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
+ ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get PF vsi config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get PF vsi config, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return -ENOENT;
+@@ -13561,8 +13795,8 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "update vsi failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "update vsi failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ ret = -ENOENT;
+@@ -13581,8 +13815,8 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "update vsi failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "update vsi failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ ret = -ENOENT;
+@@ -13604,9 +13838,9 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
+ * message and continue
+ */
+ dev_info(&pf->pdev->dev,
+- "failed to configure TCs for main VSI tc_map 0x%08x, err %s aq_err %s\n",
++ "failed to configure TCs for main VSI tc_map 0x%08x, err %d aq_err %s\n",
+ enabled_tc,
+- i40e_stat_str(&pf->hw, ret),
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ }
+@@ -13700,8 +13934,8 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
+ ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+- "add vsi failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "add vsi failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ ret = -ENOENT;
+@@ -13713,15 +13947,15 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
+ vsi->id = ctxt.vsi_number;
+ }
+
+- vsi->active_filters = 0;
+- clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
++ vsi->active_filters = 0;
+ /* If macvlan filters already exist, force them to get loaded */
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+ f->state = I40E_FILTER_NEW;
+ f_count++;
+ }
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
++ clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+ if (f_count) {
+ vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+@@ -13732,8 +13966,8 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
+ ret = i40e_vsi_get_bw_info(vsi);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get vsi bw info, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get vsi bw info, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ /* VSI is already added so not tearing that up */
+ ret = 0;
+@@ -13771,7 +14005,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
+ dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
+ return -ENODEV;
+ }
+-
++ set_bit(__I40E_VSI_RELEASING, vsi->state);
+ uplink_seid = vsi->uplink_seid;
+ if (vsi->type != I40E_VSI_SRIOV) {
+ if (vsi->netdev_registered) {
+@@ -14101,6 +14335,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
+ case I40E_VSI_MAIN:
+ case I40E_VSI_VMDQ2:
+ ret = i40e_config_netdev(vsi);
++ if (ret)
++ goto err_netdev;
++ ret = i40e_netif_set_realnum_tx_rx_queues(vsi);
+ if (ret)
+ goto err_netdev;
+ ret = register_netdev(vsi->netdev);
+@@ -14176,8 +14413,8 @@ static int i40e_veb_get_bw_info(struct i40e_veb *veb)
+ &bw_data, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "query veb bw config failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "query veb bw config failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
+ goto out;
+ }
+@@ -14186,8 +14423,8 @@ static int i40e_veb_get_bw_info(struct i40e_veb *veb)
+ &ets_data, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "query veb bw ets config failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "query veb bw ets config failed, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
+ goto out;
+ }
+@@ -14383,8 +14620,8 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
+ /* get a VEB from the hardware */
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't add VEB, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't add VEB, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EPERM;
+ }
+@@ -14394,16 +14631,16 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
+ &veb->stats_idx, NULL, NULL, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get VEB statistics idx, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get VEB statistics idx, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return -EPERM;
+ }
+ ret = i40e_veb_get_bw_info(veb);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't get VEB bw info, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't get VEB bw info, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
+ return -ENOENT;
+@@ -14613,8 +14850,8 @@ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig)
+ &next_seid, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "get switch config failed err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "get switch config failed err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ kfree(aq_buf);
+@@ -14659,8 +14896,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
+ ret = i40e_fetch_switch_configuration(pf, false);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+- "couldn't fetch switch config, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't fetch switch config, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return ret;
+ }
+@@ -14686,8 +14923,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
+ NULL);
+ if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
+ dev_info(&pf->pdev->dev,
+- "couldn't set switch config bits, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, ret),
++ "couldn't set switch config bits, err %d aq_err %s\n",
++ ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ /* not a fatal problem, just keep going */
+@@ -15024,13 +15261,12 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf)
+ *
+ * Return 0 on success, negative on failure.
+ **/
+-static i40e_status i40e_pf_loop_reset(struct i40e_pf *pf)
++static int i40e_pf_loop_reset(struct i40e_pf *pf)
+ {
+ /* wait max 10 seconds for PF reset to succeed */
+ const unsigned long time_end = jiffies + 10 * HZ;
+-
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status ret;
++ int ret;
+
+ ret = i40e_pf_reset(hw);
+ while (ret != I40E_SUCCESS && time_before(jiffies, time_end)) {
+@@ -15076,9 +15312,9 @@ static bool i40e_check_fw_empr(struct i40e_pf *pf)
+ * Return 0 if NIC is healthy or negative value when there are issues
+ * with resets
+ **/
+-static i40e_status i40e_handle_resets(struct i40e_pf *pf)
++static int i40e_handle_resets(struct i40e_pf *pf)
+ {
+- const i40e_status pfr = i40e_pf_loop_reset(pf);
++ const int pfr = i40e_pf_loop_reset(pf);
+ const bool is_empr = i40e_check_fw_empr(pf);
+
+ if (is_empr || pfr != I40E_SUCCESS)
+@@ -15103,6 +15339,7 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
+ int err;
+ int v_idx;
+
++ pci_set_drvdata(pf->pdev, pf);
+ pci_save_state(pf->pdev);
+
+ /* set up periodic task facility */
+@@ -15216,13 +15453,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ #ifdef CONFIG_I40E_DCB
+ enum i40e_get_fw_lldp_status_resp lldp_status;
+- i40e_status status;
+ #endif /* CONFIG_I40E_DCB */
+ struct i40e_pf *pf;
+ struct i40e_hw *hw;
+ static u16 pfs_found;
+ u16 wol_nvm_bits;
+ u16 link_status;
++#ifdef CONFIG_I40E_DCB
++ int status;
++#endif /* CONFIG_I40E_DCB */
+ int err;
+ u32 val;
+ u32 i;
+@@ -15403,8 +15642,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+ hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
+- dev_info(&pdev->dev,
+- "The driver for the device detected a newer version of the NVM image v%u.%u than expected v%u.%u. Please install the most recent version of the network driver.\n",
++ dev_dbg(&pdev->dev,
++ "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n",
+ hw->aq.api_maj_ver,
+ hw->aq.api_min_ver,
+ I40E_FW_API_VERSION_MAJOR,
+@@ -15594,8 +15833,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ I40E_AQ_EVENT_MEDIA_NA |
+ I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
+ if (err)
+- dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, err),
++ dev_info(&pf->pdev->dev, "set phy mask fail, err %d aq_err %s\n",
++ err,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+ /* Reconfigure hardware for allowing smaller MSS in the case
+@@ -15613,8 +15852,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ msleep(75);
+ err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
+ if (err)
+- dev_info(&pf->pdev->dev, "link restart failed, err %s aq_err %s\n",
+- i40e_stat_str(&pf->hw, err),
++ dev_info(&pf->pdev->dev, "link restart failed, err %d aq_err %s\n",
++ err,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ }
+@@ -15746,8 +15985,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ /* get the requested speeds from the fw */
+ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL);
+ if (err)
+- dev_dbg(&pf->pdev->dev, "get requested speeds ret = %s last_status = %s\n",
+- i40e_stat_str(&pf->hw, err),
++ dev_dbg(&pf->pdev->dev, "get requested speeds ret = %d last_status = %s\n",
++ err,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
+
+@@ -15757,8 +15996,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ /* get the supported phy types from the fw */
+ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
+ if (err)
+- dev_dbg(&pf->pdev->dev, "get supported phy types ret = %s last_status = %s\n",
+- i40e_stat_str(&pf->hw, err),
++ dev_dbg(&pf->pdev->dev, "get supported phy types ret = %d last_status = %s\n",
++ err,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+ /* make sure the MFS hasn't been set lower than the default */
+@@ -15829,7 +16068,7 @@ static void i40e_remove(struct pci_dev *pdev)
+ {
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status ret_code;
++ int ret_code;
+ int i;
+
+ i40e_dbg_pf_exit(pf);
+@@ -15840,8 +16079,13 @@ static void i40e_remove(struct pci_dev *pdev)
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
+
+- while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
++ /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE
++ * flags, once they are set, i40e_rebuild should not be called as
++ * i40e_prep_for_reset always returns early.
++ */
++ while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+ usleep_range(1000, 2000);
++ set_bit(__I40E_IN_REMOVE, pf->state);
+
+ if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+ set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
+@@ -16040,6 +16284,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+ {
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return;
++
+ i40e_reset_and_rebuild(pf, false, false);
+ }
+
+@@ -16069,9 +16316,9 @@ static void i40e_pci_error_resume(struct pci_dev *pdev)
+ static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
+ {
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status ret;
+ u8 mac_addr[6];
+ u16 flags = 0;
++ int ret;
+
+ /* Get current MAC address in case it's an LAA */
+ if (pf->vsi[pf->lan_vsi] && pf->vsi[pf->lan_vsi]->netdev) {
+@@ -16282,6 +16529,8 @@ static struct pci_driver i40e_driver = {
+ **/
+ static int __init i40e_init_module(void)
+ {
++ int err;
++
+ pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string);
+ pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
+
+@@ -16299,7 +16548,14 @@ static int __init i40e_init_module(void)
+ }
+
+ i40e_dbg_init();
+- return pci_register_driver(&i40e_driver);
++ err = pci_register_driver(&i40e_driver);
++ if (err) {
++ destroy_workqueue(i40e_wq);
++ i40e_dbg_exit();
++ return err;
++ }
++
++ return 0;
+ }
+ module_init(i40e_init_module);
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+index fe6dca846028f..b7556a6c27589 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+@@ -13,10 +13,10 @@
+ * in this file) as an equivalent of the FLASH part mapped into the SR.
+ * We are accessing FLASH always thru the Shadow RAM.
+ **/
+-i40e_status i40e_init_nvm(struct i40e_hw *hw)
++int i40e_init_nvm(struct i40e_hw *hw)
+ {
+ struct i40e_nvm_info *nvm = &hw->nvm;
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+ u32 fla, gens;
+ u8 sr_size;
+
+@@ -52,12 +52,12 @@ i40e_status i40e_init_nvm(struct i40e_hw *hw)
+ * This function will request NVM ownership for reading
+ * via the proper Admin Command.
+ **/
+-i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
+- enum i40e_aq_resource_access_type access)
++int i40e_acquire_nvm(struct i40e_hw *hw,
++ enum i40e_aq_resource_access_type access)
+ {
+- i40e_status ret_code = 0;
+ u64 gtime, timeout;
+ u64 time_left = 0;
++ int ret_code = 0;
+
+ if (hw->nvm.blank_nvm_mode)
+ goto i40e_i40e_acquire_nvm_exit;
+@@ -111,7 +111,7 @@ i40e_i40e_acquire_nvm_exit:
+ **/
+ void i40e_release_nvm(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = I40E_SUCCESS;
++ int ret_code = I40E_SUCCESS;
+ u32 total_delay = 0;
+
+ if (hw->nvm.blank_nvm_mode)
+@@ -138,9 +138,9 @@ void i40e_release_nvm(struct i40e_hw *hw)
+ *
+ * Polls the SRCTL Shadow RAM register done bit.
+ **/
+-static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
++static int i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
+ {
+- i40e_status ret_code = I40E_ERR_TIMEOUT;
++ int ret_code = I40E_ERR_TIMEOUT;
+ u32 srctl, wait_cnt;
+
+ /* Poll the I40E_GLNVM_SRCTL until the done bit is set */
+@@ -165,10 +165,10 @@ static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
+ *
+ * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ **/
+-static i40e_status i40e_read_nvm_word_srctl(struct i40e_hw *hw, u16 offset,
+- u16 *data)
++static int i40e_read_nvm_word_srctl(struct i40e_hw *hw, u16 offset,
++ u16 *data)
+ {
+- i40e_status ret_code = I40E_ERR_TIMEOUT;
++ int ret_code = I40E_ERR_TIMEOUT;
+ u32 sr_reg;
+
+ if (offset >= hw->nvm.sr_size) {
+@@ -210,19 +210,19 @@ read_nvm_exit:
+ * @hw: pointer to the HW structure.
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset in words from module start
+- * @words: number of words to write
+- * @data: buffer with words to write to the Shadow RAM
++ * @words: number of words to read
++ * @data: buffer with words to read to the Shadow RAM
+ * @last_command: tells the AdminQ that this is the last command
+ *
+- * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
++ * Reads a 16 bit words buffer to the Shadow RAM using the admin command.
+ **/
+-static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw,
+- u8 module_pointer, u32 offset,
+- u16 words, void *data,
+- bool last_command)
++static int i40e_read_nvm_aq(struct i40e_hw *hw,
++ u8 module_pointer, u32 offset,
++ u16 words, void *data,
++ bool last_command)
+ {
+- i40e_status ret_code = I40E_ERR_NVM;
+ struct i40e_asq_cmd_details cmd_details;
++ int ret_code = I40E_ERR_NVM;
+
+ memset(&cmd_details, 0, sizeof(cmd_details));
+ cmd_details.wb_desc = &hw->nvm_wb_desc;
+@@ -234,18 +234,18 @@ static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw,
+ */
+ if ((offset + words) > hw->nvm.sr_size)
+ i40e_debug(hw, I40E_DEBUG_NVM,
+- "NVM write error: offset %d beyond Shadow RAM limit %d\n",
++ "NVM read error: offset %d beyond Shadow RAM limit %d\n",
+ (offset + words), hw->nvm.sr_size);
+ else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
+- /* We can write only up to 4KB (one sector), in one AQ write */
++ /* We can read only up to 4KB (one sector), in one AQ write */
+ i40e_debug(hw, I40E_DEBUG_NVM,
+- "NVM write fail error: tried to write %d words, limit is %d.\n",
++ "NVM read fail error: tried to read %d words, limit is %d.\n",
+ words, I40E_SR_SECTOR_SIZE_IN_WORDS);
+ else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
+ != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
+- /* A single write cannot spread over two sectors */
++ /* A single read cannot spread over two sectors */
+ i40e_debug(hw, I40E_DEBUG_NVM,
+- "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n",
++ "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n",
+ offset, words);
+ else
+ ret_code = i40e_aq_read_nvm(hw, module_pointer,
+@@ -264,10 +264,10 @@ static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw,
+ *
+ * Reads one 16 bit word from the Shadow RAM using the AdminQ
+ **/
+-static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
+- u16 *data)
++static int i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
++ u16 *data)
+ {
+- i40e_status ret_code = I40E_ERR_TIMEOUT;
++ int ret_code = I40E_ERR_TIMEOUT;
+
+ ret_code = i40e_read_nvm_aq(hw, 0x0, offset, 1, data, true);
+ *data = le16_to_cpu(*(__le16 *)data);
+@@ -286,8 +286,8 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
+ * Do not use this function except in cases where the nvm lock is already
+ * taken via i40e_acquire_nvm().
+ **/
+-static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
+- u16 offset, u16 *data)
++static int __i40e_read_nvm_word(struct i40e_hw *hw,
++ u16 offset, u16 *data)
+ {
+ if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+ return i40e_read_nvm_word_aq(hw, offset, data);
+@@ -303,10 +303,10 @@ static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
+ *
+ * Reads one 16 bit word from the Shadow RAM.
+ **/
+-i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+- u16 *data)
++int i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
++ u16 *data)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK)
+ ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+@@ -330,17 +330,17 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+ * @words_data_size: Words to read from NVM
+ * @data_ptr: Pointer to memory location where resulting buffer will be stored
+ **/
+-enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw,
+- u8 module_ptr,
+- u16 module_offset,
+- u16 data_offset,
+- u16 words_data_size,
+- u16 *data_ptr)
++int i40e_read_nvm_module_data(struct i40e_hw *hw,
++ u8 module_ptr,
++ u16 module_offset,
++ u16 data_offset,
++ u16 words_data_size,
++ u16 *data_ptr)
+ {
+- i40e_status status;
+ u16 specific_ptr = 0;
+ u16 ptr_value = 0;
+ u32 offset = 0;
++ int status;
+
+ if (module_ptr != 0) {
+ status = i40e_read_nvm_word(hw, module_ptr, &ptr_value);
+@@ -406,10 +406,10 @@ enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw,
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+-static i40e_status i40e_read_nvm_buffer_srctl(struct i40e_hw *hw, u16 offset,
+- u16 *words, u16 *data)
++static int i40e_read_nvm_buffer_srctl(struct i40e_hw *hw, u16 offset,
++ u16 *words, u16 *data)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+ u16 index, word;
+
+ /* Loop thru the selected region */
+@@ -437,13 +437,13 @@ static i40e_status i40e_read_nvm_buffer_srctl(struct i40e_hw *hw, u16 offset,
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+-static i40e_status i40e_read_nvm_buffer_aq(struct i40e_hw *hw, u16 offset,
+- u16 *words, u16 *data)
++static int i40e_read_nvm_buffer_aq(struct i40e_hw *hw, u16 offset,
++ u16 *words, u16 *data)
+ {
+- i40e_status ret_code;
+- u16 read_size;
+ bool last_cmd = false;
+ u16 words_read = 0;
++ u16 read_size;
++ int ret_code;
+ u16 i = 0;
+
+ do {
+@@ -493,9 +493,9 @@ read_nvm_buffer_aq_exit:
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method.
+ **/
+-static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw,
+- u16 offset, u16 *words,
+- u16 *data)
++static int __i40e_read_nvm_buffer(struct i40e_hw *hw,
++ u16 offset, u16 *words,
++ u16 *data)
+ {
+ if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+ return i40e_read_nvm_buffer_aq(hw, offset, words, data);
+@@ -514,10 +514,10 @@ static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw,
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+-i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+- u16 *words, u16 *data)
++int i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
++ u16 *words, u16 *data)
+ {
+- i40e_status ret_code = 0;
++ int ret_code = 0;
+
+ if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
+ ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+@@ -544,12 +544,12 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+ *
+ * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ **/
+-static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+- u32 offset, u16 words, void *data,
+- bool last_command)
++static int i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
++ u32 offset, u16 words, void *data,
++ bool last_command)
+ {
+- i40e_status ret_code = I40E_ERR_NVM;
+ struct i40e_asq_cmd_details cmd_details;
++ int ret_code = I40E_ERR_NVM;
+
+ memset(&cmd_details, 0, sizeof(cmd_details));
+ cmd_details.wb_desc = &hw->nvm_wb_desc;
+@@ -594,14 +594,14 @@ static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+ * is customer specific and unknown. Therefore, this function skips all maximum
+ * possible size of VPD (1kB).
+ **/
+-static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
+- u16 *checksum)
++static int i40e_calc_nvm_checksum(struct i40e_hw *hw,
++ u16 *checksum)
+ {
+- i40e_status ret_code;
+ struct i40e_virt_mem vmem;
+ u16 pcie_alt_module = 0;
+ u16 checksum_local = 0;
+ u16 vpd_module = 0;
++ int ret_code;
+ u16 *data;
+ u16 i = 0;
+
+@@ -675,11 +675,11 @@ i40e_calc_nvm_checksum_exit:
+ * on ARQ completion event reception by caller.
+ * This function will commit SR to NVM.
+ **/
+-i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw)
++int i40e_update_nvm_checksum(struct i40e_hw *hw)
+ {
+- i40e_status ret_code;
+- u16 checksum;
+ __le16 le_sum;
++ int ret_code;
++ u16 checksum;
+
+ ret_code = i40e_calc_nvm_checksum(hw, &checksum);
+ le_sum = cpu_to_le16(checksum);
+@@ -698,12 +698,12 @@ i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw)
+ * Performs checksum calculation and validates the NVM SW checksum. If the
+ * caller does not need checksum, the value can be NULL.
+ **/
+-i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
+- u16 *checksum)
++int i40e_validate_nvm_checksum(struct i40e_hw *hw,
++ u16 *checksum)
+ {
+- i40e_status ret_code = 0;
+- u16 checksum_sr = 0;
+ u16 checksum_local = 0;
++ u16 checksum_sr = 0;
++ int ret_code = 0;
+
+ /* We must acquire the NVM lock in order to correctly synchronize the
+ * NVM accesses across multiple PFs. Without doing so it is possible
+@@ -732,36 +732,36 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
+ return ret_code;
+ }
+
+-static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno);
+-static i40e_status i40e_nvmupd_state_reading(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno);
+-static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *errno);
++static int i40e_nvmupd_state_init(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno);
++static int i40e_nvmupd_state_reading(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno);
++static int i40e_nvmupd_state_writing(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *errno);
+ static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw,
+ struct i40e_nvm_access *cmd,
+ int *perrno);
+-static i40e_status i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- int *perrno);
+-static i40e_status i40e_nvmupd_nvm_write(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno);
+-static i40e_status i40e_nvmupd_nvm_read(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno);
+-static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno);
+-static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno);
+-static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno);
++static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ int *perrno);
++static int i40e_nvmupd_nvm_write(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno);
++static int i40e_nvmupd_nvm_read(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno);
++static int i40e_nvmupd_exec_aq(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno);
++static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno);
++static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno);
+ static inline u8 i40e_nvmupd_get_module(u32 val)
+ {
+ return (u8)(val & I40E_NVM_MOD_PNT_MASK);
+@@ -806,12 +806,12 @@ static const char * const i40e_nvm_update_state_str[] = {
+ *
+ * Dispatches command depending on what update state is current
+ **/
+-i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++int i40e_nvmupd_command(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+- i40e_status status;
+ enum i40e_nvmupd_cmd upd_cmd;
++ int status;
+
+ /* assume success */
+ *perrno = 0;
+@@ -922,12 +922,12 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
+ * Process legitimate commands of the Init state and conditionally set next
+ * state. Reject all other commands.
+ **/
+-static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_state_init(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+- i40e_status status = 0;
+ enum i40e_nvmupd_cmd upd_cmd;
++ int status = 0;
+
+ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
+
+@@ -1061,12 +1061,12 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
+ * NVM ownership is already held. Process legitimate commands and set any
+ * change in state; reject all other commands.
+ **/
+-static i40e_status i40e_nvmupd_state_reading(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_state_reading(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+- i40e_status status = 0;
+ enum i40e_nvmupd_cmd upd_cmd;
++ int status = 0;
+
+ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
+
+@@ -1103,13 +1103,13 @@ static i40e_status i40e_nvmupd_state_reading(struct i40e_hw *hw,
+ * NVM ownership is already held. Process legitimate commands and set any
+ * change in state; reject all other commands
+ **/
+-static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_state_writing(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+- i40e_status status = 0;
+ enum i40e_nvmupd_cmd upd_cmd;
+ bool retry_attempt = false;
++ int status = 0;
+
+ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
+
+@@ -1186,8 +1186,8 @@ retry:
+ */
+ if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) &&
+ !retry_attempt) {
+- i40e_status old_status = status;
+ u32 old_asq_status = hw->aq.asq_last_status;
++ int old_status = status;
+ u32 gtime;
+
+ gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+@@ -1369,17 +1369,17 @@ static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw,
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+-static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_exec_aq(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+ struct i40e_asq_cmd_details cmd_details;
+- i40e_status status;
+ struct i40e_aq_desc *aq_desc;
+ u32 buff_size = 0;
+ u8 *buff = NULL;
+ u32 aq_desc_len;
+ u32 aq_data_len;
++ int status;
+
+ i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+ if (cmd->offset == 0xffff)
+@@ -1428,8 +1428,8 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
+ buff_size, &cmd_details);
+ if (status) {
+ i40e_debug(hw, I40E_DEBUG_NVM,
+- "i40e_nvmupd_exec_aq err %s aq_err %s\n",
+- i40e_stat_str(hw, status),
++ "%s err %d aq_err %s\n",
++ __func__, status,
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status);
+ return status;
+@@ -1453,9 +1453,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+-static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+ u32 aq_total_len;
+ u32 aq_desc_len;
+@@ -1522,9 +1522,9 @@ static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+-static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+ u32 aq_total_len;
+ u32 aq_desc_len;
+@@ -1556,13 +1556,13 @@ static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+-static i40e_status i40e_nvmupd_nvm_read(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_nvm_read(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+ struct i40e_asq_cmd_details cmd_details;
+- i40e_status status;
+ u8 module, transaction;
++ int status;
+ bool last;
+
+ transaction = i40e_nvmupd_get_transaction(cmd->config);
+@@ -1595,13 +1595,13 @@ static i40e_status i40e_nvmupd_nvm_read(struct i40e_hw *hw,
+ *
+ * module, offset, data_size and data are in cmd structure
+ **/
+-static i40e_status i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- int *perrno)
++static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ int *perrno)
+ {
+- i40e_status status = 0;
+ struct i40e_asq_cmd_details cmd_details;
+ u8 module, transaction;
++ int status = 0;
+ bool last;
+
+ transaction = i40e_nvmupd_get_transaction(cmd->config);
+@@ -1635,14 +1635,14 @@ static i40e_status i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
+ *
+ * module, offset, data_size and data are in cmd structure
+ **/
+-static i40e_status i40e_nvmupd_nvm_write(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *perrno)
++static int i40e_nvmupd_nvm_write(struct i40e_hw *hw,
++ struct i40e_nvm_access *cmd,
++ u8 *bytes, int *perrno)
+ {
+- i40e_status status = 0;
+ struct i40e_asq_cmd_details cmd_details;
+ u8 module, transaction;
+ u8 preservation_flags;
++ int status = 0;
+ bool last;
+
+ transaction = i40e_nvmupd_get_transaction(cmd->config);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+index 2f6815b2f8df8..2bd4de03dafa2 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+@@ -56,5 +56,4 @@ do { \
+ (h)->bus.func, ##__VA_ARGS__); \
+ } while (0)
+
+-typedef enum i40e_status_code i40e_status;
+ #endif /* _I40E_OSDEP_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+index aaea297640e09..c9e2b4875f063 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+@@ -16,17 +16,17 @@
+ */
+
+ /* adminq functions */
+-i40e_status i40e_init_adminq(struct i40e_hw *hw);
++int i40e_init_adminq(struct i40e_hw *hw);
+ void i40e_shutdown_adminq(struct i40e_hw *hw);
+ void i40e_adminq_init_ring_data(struct i40e_hw *hw);
+-i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
+- struct i40e_arq_event_info *e,
+- u16 *events_pending);
+-i40e_status i40e_asq_send_command(struct i40e_hw *hw,
+- struct i40e_aq_desc *desc,
+- void *buff, /* can be NULL */
+- u16 buff_size,
+- struct i40e_asq_cmd_details *cmd_details);
++int i40e_clean_arq_element(struct i40e_hw *hw,
++ struct i40e_arq_event_info *e,
++ u16 *events_pending);
++int i40e_asq_send_command(struct i40e_hw *hw,
++ struct i40e_aq_desc *desc,
++ void *buff, /* can be NULL */
++ u16 buff_size,
++ struct i40e_asq_cmd_details *cmd_details);
+
+ /* debug function for adminq */
+ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
+@@ -34,314 +34,269 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
+
+ void i40e_idle_aq(struct i40e_hw *hw);
+ bool i40e_check_asq_alive(struct i40e_hw *hw);
+-i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw, bool unloading);
++int i40e_aq_queue_shutdown(struct i40e_hw *hw, bool unloading);
+ const char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err);
+-const char *i40e_stat_str(struct i40e_hw *hw, i40e_status stat_err);
+
+-i40e_status i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 seid,
+- bool pf_lut, u8 *lut, u16 lut_size);
+-i40e_status i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 seid,
+- bool pf_lut, u8 *lut, u16 lut_size);
+-i40e_status i40e_aq_get_rss_key(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_get_set_rss_key_data *key);
+-i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_get_set_rss_key_data *key);
++int i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 seid,
++ bool pf_lut, u8 *lut, u16 lut_size);
++int i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 seid,
++ bool pf_lut, u8 *lut, u16 lut_size);
++int i40e_aq_get_rss_key(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_get_set_rss_key_data *key);
++int i40e_aq_set_rss_key(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_get_set_rss_key_data *key);
+
+ u32 i40e_led_get(struct i40e_hw *hw);
+ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink);
+-i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on,
+- u16 led_addr, u32 mode);
+-i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
+- u16 *val);
+-i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
+- u32 time, u32 interval);
++int i40e_led_set_phy(struct i40e_hw *hw, bool on, u16 led_addr, u32 mode);
++int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, u16 *val);
++int i40e_blink_phy_link_led(struct i40e_hw *hw, u32 time, u32 interval);
+
+ /* admin send queue commands */
+
+-i40e_status i40e_aq_get_firmware_version(struct i40e_hw *hw,
+- u16 *fw_major_version, u16 *fw_minor_version,
+- u32 *fw_build,
+- u16 *api_major_version, u16 *api_minor_version,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_debug_write_register(struct i40e_hw *hw,
+- u32 reg_addr, u64 reg_val,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_debug_read_register(struct i40e_hw *hw,
+- u32 reg_addr, u64 *reg_val,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
+- bool qualified_modules, bool report_init,
+- struct i40e_aq_get_phy_abilities_resp *abilities,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw,
+- struct i40e_aq_set_phy_config *config,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+- bool atomic_reset);
+-i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+- bool enable_link,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
+- bool enable_lse, struct i40e_link_status *link,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_local_advt_reg(struct i40e_hw *hw,
+- u64 advt_reg,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_send_driver_version(struct i40e_hw *hw,
++int i40e_aq_get_firmware_version(struct i40e_hw *hw,
++ u16 *fw_major_version, u16 *fw_minor_version,
++ u32 *fw_build,
++ u16 *api_major_version, u16 *api_minor_version,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_debug_write_register(struct i40e_hw *hw, u32 reg_addr, u64 reg_val,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_debug_read_register(struct i40e_hw *hw,
++ u32 reg_addr, u64 *reg_val,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
++ bool qualified_modules, bool report_init,
++ struct i40e_aq_get_phy_abilities_resp *abilities,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_phy_config(struct i40e_hw *hw, struct i40e_aq_set_phy_config *config,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, bool atomic_reset);
++int i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_link_restart_an(struct i40e_hw *hw, bool enable_link,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_get_link_info(struct i40e_hw *hw, bool enable_lse,
++ struct i40e_link_status *link,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_local_advt_reg(struct i40e_hw *hw, u64 advt_reg,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_send_driver_version(struct i40e_hw *hw,
+ struct i40e_driver_version *dv,
+ struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_add_vsi(struct i40e_hw *hw,
+- struct i40e_vsi_context *vsi_ctx,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+- u16 vsi_id, bool set_filter,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+- u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details,
+- bool rx_only_promisc);
+-i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+- u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+- u16 seid, bool enable,
+- u16 vid,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+- u16 seid, bool enable,
+- u16 vid,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+- u16 seid, bool enable, u16 vid,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
+- u16 seid, bool enable,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw,
+- struct i40e_vsi_context *vsi_ctx,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
+- struct i40e_vsi_context *vsi_ctx,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+- u16 downlink_seid, u8 enabled_tc,
+- bool default_port, u16 *pveb_seid,
+- bool enable_stats,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+- u16 veb_seid, u16 *switch_id, bool *floating,
+- u16 *statistic_index, u16 *vebs_used,
+- u16 *vebs_free,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id,
++int i40e_aq_add_vsi(struct i40e_hw *hw,
++ struct i40e_vsi_context *vsi_ctx,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_vsi_broadcast(struct i40e_hw *hw, u16 vsi_id, bool set_filter,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
++ u16 vsi_id, bool set,
++ struct i40e_asq_cmd_details *cmd_details,
++ bool rx_only_promisc);
++int i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
++ u16 vsi_id, bool set,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw, u16 seid,
++ bool enable, u16 vid,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
++ u16 seid, bool enable, u16 vid,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
++ u16 seid, bool enable, u16 vid,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
++ u16 seid, bool enable,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_get_vsi_params(struct i40e_hw *hw,
++ struct i40e_vsi_context *vsi_ctx,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_update_vsi_params(struct i40e_hw *hw,
++ struct i40e_vsi_context *vsi_ctx,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
++ u16 downlink_seid, u8 enabled_tc,
++ bool default_port, u16 *pveb_seid,
++ bool enable_stats,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_get_veb_parameters(struct i40e_hw *hw,
++ u16 veb_seid, u16 *switch_id, bool *floating,
++ u16 *statistic_index, u16 *vebs_used,
++ u16 *vebs_free,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id,
+ struct i40e_aqc_add_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id,
+- struct i40e_aqc_remove_macvlan_element_data *mv_list,
+- u16 count, struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+- u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list,
+- struct i40e_asq_cmd_details *cmd_details,
+- u16 *rule_id, u16 *rules_used, u16 *rules_free);
+-i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+- u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list,
+- struct i40e_asq_cmd_details *cmd_details,
+- u16 *rules_used, u16 *rules_free);
++int i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id,
++ struct i40e_aqc_remove_macvlan_element_data *mv_list,
++ u16 count, struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
++ u16 rule_type, u16 dest_vsi, u16 count,
++ __le16 *mr_list,
++ struct i40e_asq_cmd_details *cmd_details,
++ u16 *rule_id, u16 *rules_used, u16 *rules_free);
++int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
++ u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list,
++ struct i40e_asq_cmd_details *cmd_details,
++ u16 *rules_used, u16 *rules_free);
+
+-i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+- u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
+- struct i40e_aqc_get_switch_config_resp *buf,
+- u16 buf_size, u16 *start_seid,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
+- u16 flags,
+- u16 valid_flags, u8 mode,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_request_resource(struct i40e_hw *hw,
+- enum i40e_aq_resources_ids resource,
+- enum i40e_aq_resource_access_type access,
+- u8 sdp_number, u64 *timeout,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_release_resource(struct i40e_hw *hw,
+- enum i40e_aq_resources_ids resource,
+- u8 sdp_number,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+- u32 offset, u16 length, void *data,
+- bool last_command,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
+- u32 offset, u16 length, bool last_command,
++int i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
++ u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_get_switch_config(struct i40e_hw *hw,
++ struct i40e_aqc_get_switch_config_resp *buf,
++ u16 buf_size, u16 *start_seid,
+ struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_discover_capabilities(struct i40e_hw *hw,
+- void *buff, u16 buff_size, u16 *data_size,
+- enum i40e_admin_queue_opc list_type_opc,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
+- u32 offset, u16 length, void *data,
+- bool last_command, u8 preservation_flags,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_rearrange_nvm(struct i40e_hw *hw,
+- u8 rearrange_nvm,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+- u8 mib_type, void *buff, u16 buff_size,
+- u16 *local_len, u16 *remote_len,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code
+-i40e_aq_set_lldp_mib(struct i40e_hw *hw,
+- u8 mib_type, void *buff, u16 buff_size,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+- bool enable_update,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code
+-i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
++int i40e_aq_set_switch_config(struct i40e_hw *hw, u16 flags,
++ u16 valid_flags, u8 mode,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_request_resource(struct i40e_hw *hw,
++ enum i40e_aq_resources_ids resource,
++ enum i40e_aq_resource_access_type access,
++ u8 sdp_number, u64 *timeout,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_release_resource(struct i40e_hw *hw,
++ enum i40e_aq_resources_ids resource, u8 sdp_number,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
++ u32 offset, u16 length, void *data, bool last_command,
+ struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+- bool persist,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_set_dcb_parameters(struct i40e_hw *hw,
+- bool dcb_enable,
+- struct i40e_asq_cmd_details
+- *cmd_details);
+-i40e_status i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
++int i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
++ u32 offset, u16 length, bool last_command,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_discover_capabilities(struct i40e_hw *hw,
++ void *buff, u16 buff_size, u16 *data_size,
++ enum i40e_admin_queue_opc list_type_opc,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
++ u32 offset, u16 length, void *data,
++ bool last_command, u8 preservation_flags,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_rearrange_nvm(struct i40e_hw *hw, u8 rearrange_nvm,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
++ u8 mib_type, void *buff, u16 buff_size,
++ u16 *local_len, u16 *remote_len,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_lldp_mib(struct i40e_hw *hw, u8 mib_type,
++ void *buff, u16 buff_size,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
++ bool enable_update,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent, bool persist,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
+ struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+- void *buff, u16 buff_size,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
+- u16 udp_port, u8 protocol_index,
+- u8 *filter_index,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_mac_address_write(struct i40e_hw *hw,
+- u16 flags, u8 *mac_addr,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
++int i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
++ void *buff, u16 buff_size,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_add_udp_tunnel(struct i40e_hw *hw, u16 udp_port,
++ u8 protocol_index, u8 *filter_index,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_mac_address_write(struct i40e_hw *hw, u16 flags, u8 *mac_addr,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+ u16 seid, u16 credit, u8 max_credit,
+ struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_dcb_updated(struct i40e_hw *hw,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_config_switch_comp_bw_limit(struct i40e_hw *hw,
+- u16 seid, u16 credit, u8 max_bw,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw, u16 seid,
+- struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
++int i40e_aq_dcb_updated(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_configure_switching_comp_ets_data *ets_data,
+- enum i40e_admin_queue_opc opcode,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_config_switch_comp_bw_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_configure_switching_comp_bw_config_data *bw_data,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+- u16 seid,
+- struct i40e_aqc_query_port_ets_config_resp *bw_data,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+- u16 seid,
++int i40e_aq_config_switch_comp_bw_limit(struct i40e_hw *hw,
++ u16 seid, u16 credit, u8 max_bw,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_config_switch_comp_ets(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_configure_switching_comp_ets_data *ets_data,
++ enum i40e_admin_queue_opc opcode,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_config_switch_comp_bw_config(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_configure_switching_comp_bw_config_data *bw_data,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_query_vsi_bw_config(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_query_port_ets_config(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_query_port_ets_config_resp *bw_data,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw,
+- struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code
+-i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+- struct i40e_aqc_cloud_filters_element_bb *filters,
+- u8 filter_count);
+-enum i40e_status_code
+-i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 vsi,
+- struct i40e_aqc_cloud_filters_element_data *filters,
+- u8 filter_count);
+-enum i40e_status_code
+-i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
+- struct i40e_aqc_cloud_filters_element_data *filters,
+- u8 filter_count);
+-enum i40e_status_code
+-i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+- struct i40e_aqc_cloud_filters_element_bb *filters,
+- u8 filter_count);
+-i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw,
+- struct i40e_lldp_variables *lldp_cfg);
+-enum i40e_status_code
+-i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid,
+- struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_resume_port_tx(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_cloud_filters_element_bb *filters,
++ u8 filter_count);
++int i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 vsi,
++ struct i40e_aqc_cloud_filters_element_data *filters,
++ u8 filter_count);
++int i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
++ struct i40e_aqc_cloud_filters_element_data *filters,
++ u8 filter_count);
++int i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
++ struct i40e_aqc_cloud_filters_element_bb *filters,
++ u8 filter_count);
++int i40e_read_lldp_cfg(struct i40e_hw *hw, struct i40e_lldp_variables *lldp_cfg);
++int i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid,
++ struct i40e_asq_cmd_details *cmd_details);
+ /* i40e_common */
+-i40e_status i40e_init_shared_code(struct i40e_hw *hw);
+-i40e_status i40e_pf_reset(struct i40e_hw *hw);
++int i40e_init_shared_code(struct i40e_hw *hw);
++int i40e_pf_reset(struct i40e_hw *hw);
+ void i40e_clear_hw(struct i40e_hw *hw);
+ void i40e_clear_pxe_mode(struct i40e_hw *hw);
+-i40e_status i40e_get_link_status(struct i40e_hw *hw, bool *link_up);
+-i40e_status i40e_update_link_info(struct i40e_hw *hw);
+-i40e_status i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
+-i40e_status i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
+- u32 *max_bw, u32 *min_bw, bool *min_valid,
+- bool *max_valid);
+-i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw,
+- struct i40e_aqc_configure_partition_bw_data *bw_data,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
+-i40e_status i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
+- u32 pba_num_size);
+-i40e_status i40e_validate_mac_addr(u8 *mac_addr);
++int i40e_get_link_status(struct i40e_hw *hw, bool *link_up);
++int i40e_update_link_info(struct i40e_hw *hw);
++int i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
++int i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
++ u32 *max_bw, u32 *min_bw, bool *min_valid,
++ bool *max_valid);
++int i40e_aq_configure_partition_bw(struct i40e_hw *hw,
++ struct i40e_aqc_configure_partition_bw_data *bw_data,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
++int i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num, u32 pba_num_size);
++int i40e_validate_mac_addr(u8 *mac_addr);
+ void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable);
+ /* prototype for functions used for NVM access */
+-i40e_status i40e_init_nvm(struct i40e_hw *hw);
+-i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
+- enum i40e_aq_resource_access_type access);
++int i40e_init_nvm(struct i40e_hw *hw);
++int i40e_acquire_nvm(struct i40e_hw *hw, enum i40e_aq_resource_access_type access);
+ void i40e_release_nvm(struct i40e_hw *hw);
+-i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+- u16 *data);
+-enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw,
+- u8 module_ptr,
+- u16 module_offset,
+- u16 data_offset,
+- u16 words_data_size,
+- u16 *data_ptr);
+-i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+- u16 *words, u16 *data);
+-i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw);
+-i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
+- u16 *checksum);
+-i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
+- struct i40e_nvm_access *cmd,
+- u8 *bytes, int *);
++int i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, u16 *data);
++int i40e_read_nvm_module_data(struct i40e_hw *hw, u8 module_ptr,
++ u16 module_offset, u16 data_offset,
++ u16 words_data_size, u16 *data_ptr);
++int i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, u16 *words, u16 *data);
++int i40e_update_nvm_checksum(struct i40e_hw *hw);
++int i40e_validate_nvm_checksum(struct i40e_hw *hw, u16 *checksum);
++int i40e_nvmupd_command(struct i40e_hw *hw, struct i40e_nvm_access *cmd,
++ u8 *bytes, int *);
+ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
+ struct i40e_aq_desc *desc);
+ void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw);
+ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
+
+-i40e_status i40e_set_mac_type(struct i40e_hw *hw);
++int i40e_set_mac_type(struct i40e_hw *hw);
+
+ extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+
+@@ -390,41 +345,41 @@ i40e_virtchnl_link_speed(enum i40e_aq_link_speed link_speed)
+ /* i40e_common for VF drivers*/
+ void i40e_vf_parse_hw_config(struct i40e_hw *hw,
+ struct virtchnl_vf_resource *msg);
+-i40e_status i40e_vf_reset(struct i40e_hw *hw);
+-i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
+- enum virtchnl_ops v_opcode,
+- i40e_status v_retval,
+- u8 *msg, u16 msglen,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_set_filter_control(struct i40e_hw *hw,
+- struct i40e_filter_control_settings *settings);
+-i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
+- u8 *mac_addr, u16 ethtype, u16 flags,
+- u16 vsi_seid, u16 queue, bool is_add,
+- struct i40e_control_filter_stats *stats,
+- struct i40e_asq_cmd_details *cmd_details);
+-i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
+- u8 table_id, u32 start_index, u16 buff_size,
+- void *buff, u16 *ret_buff_size,
+- u8 *ret_next_table, u32 *ret_next_index,
+- struct i40e_asq_cmd_details *cmd_details);
++int i40e_vf_reset(struct i40e_hw *hw);
++int i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
++ enum virtchnl_ops v_opcode,
++ int v_retval,
++ u8 *msg, u16 msglen,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_set_filter_control(struct i40e_hw *hw,
++ struct i40e_filter_control_settings *settings);
++int i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
++ u8 *mac_addr, u16 ethtype, u16 flags,
++ u16 vsi_seid, u16 queue, bool is_add,
++ struct i40e_control_filter_stats *stats,
++ struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
++ u8 table_id, u32 start_index, u16 buff_size,
++ void *buff, u16 *ret_buff_size,
++ u8 *ret_next_table, u32 *ret_next_index,
++ struct i40e_asq_cmd_details *cmd_details);
+ void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
+ u16 vsi_seid);
+-i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
+- u32 reg_addr, u32 *reg_val,
+- struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
++ u32 reg_addr, u32 *reg_val,
++ struct i40e_asq_cmd_details *cmd_details);
+ u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr);
+-i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
+- u32 reg_addr, u32 reg_val,
+- struct i40e_asq_cmd_details *cmd_details);
++int i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
++ u32 reg_addr, u32 reg_val,
++ struct i40e_asq_cmd_details *cmd_details);
+ void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
+-enum i40e_status_code
++int
+ i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr, bool page_change,
+ bool set_mdio, u8 mdio_num,
+ u32 reg_addr, u32 reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+-enum i40e_status_code
++int
+ i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr, bool page_change,
+ bool set_mdio, u8 mdio_num,
+@@ -437,43 +392,43 @@ i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+ #define i40e_aq_get_phy_register(hw, ps, da, pc, ra, rv, cd) \
+ i40e_aq_get_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd)
+
+-i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
+- u16 reg, u8 phy_addr, u16 *value);
+-i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
+- u16 reg, u8 phy_addr, u16 value);
+-i40e_status i40e_read_phy_register_clause45(struct i40e_hw *hw,
+- u8 page, u16 reg, u8 phy_addr, u16 *value);
+-i40e_status i40e_write_phy_register_clause45(struct i40e_hw *hw,
+- u8 page, u16 reg, u8 phy_addr, u16 value);
+-i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+- u8 phy_addr, u16 *value);
+-i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+- u8 phy_addr, u16 value);
++int i40e_read_phy_register_clause22(struct i40e_hw *hw,
++ u16 reg, u8 phy_addr, u16 *value);
++int i40e_write_phy_register_clause22(struct i40e_hw *hw,
++ u16 reg, u8 phy_addr, u16 value);
++int i40e_read_phy_register_clause45(struct i40e_hw *hw,
++ u8 page, u16 reg, u8 phy_addr, u16 *value);
++int i40e_write_phy_register_clause45(struct i40e_hw *hw,
++ u8 page, u16 reg, u8 phy_addr, u16 value);
++int i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
++ u8 phy_addr, u16 *value);
++int i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
++ u8 phy_addr, u16 value);
+ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num);
+-i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
+- u32 time, u32 interval);
+-i40e_status i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
+- u16 buff_size, u32 track_id,
+- u32 *error_offset, u32 *error_info,
+- struct i40e_asq_cmd_details *
+- cmd_details);
+-i40e_status i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
+- u16 buff_size, u8 flags,
+- struct i40e_asq_cmd_details *
+- cmd_details);
++int i40e_blink_phy_link_led(struct i40e_hw *hw,
++ u32 time, u32 interval);
++int i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
++ u16 buff_size, u32 track_id,
++ u32 *error_offset, u32 *error_info,
++ struct i40e_asq_cmd_details *
++ cmd_details);
++int i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
++ u16 buff_size, u8 flags,
++ struct i40e_asq_cmd_details *
++ cmd_details);
+ struct i40e_generic_seg_header *
+ i40e_find_segment_in_package(u32 segment_type,
+ struct i40e_package_header *pkg_header);
+ struct i40e_profile_section_header *
+ i40e_find_section_in_profile(u32 section_type,
+ struct i40e_profile_segment *profile);
+-enum i40e_status_code
++int
+ i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
+ u32 track_id);
+-enum i40e_status_code
++int
+ i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
+ u32 track_id);
+-enum i40e_status_code
++int
+ i40e_add_pinfo_to_list(struct i40e_hw *hw,
+ struct i40e_profile_segment *profile,
+ u8 *profile_info_sec, u32 track_id);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
+index 8d0588a27a053..7339003aa17cd 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
+@@ -211,6 +211,11 @@
+ #define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
+ #define I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT 16
+ #define I40E_GLGEN_MSRWD_MDIRDDATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT)
++#define I40E_GLGEN_PCIFCNCNT 0x001C0AB4 /* Reset: PCIR */
++#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT 0
++#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK I40E_MASK(0x1F, I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT)
++#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT 16
++#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_MASK I40E_MASK(0xFF, I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT)
+ #define I40E_GLGEN_RSTAT 0x000B8188 /* Reset: POR */
+ #define I40E_GLGEN_RSTAT_DEVSTATE_SHIFT 0
+ #define I40E_GLGEN_RSTAT_DEVSTATE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_DEVSTATE_SHIFT)
+@@ -413,6 +418,9 @@
+ #define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+ #define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+ #define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT)
++#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30
++#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT)
++#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+ #define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+ #define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0
+ #define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11
+@@ -640,6 +648,14 @@
+ #define I40E_VFQF_HKEY1_MAX_INDEX 12
+ #define I40E_VFQF_HLUT1(_i, _VF) (0x00220000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: CORER */
+ #define I40E_VFQF_HLUT1_MAX_INDEX 15
++#define I40E_GL_RXERR1H(_i) (0x00318004 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
++#define I40E_GL_RXERR1H_MAX_INDEX 143
++#define I40E_GL_RXERR1H_RXERR1H_SHIFT 0
++#define I40E_GL_RXERR1H_RXERR1H_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1H_RXERR1H_SHIFT)
++#define I40E_GL_RXERR1L(_i) (0x00318000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
++#define I40E_GL_RXERR1L_MAX_INDEX 143
++#define I40E_GL_RXERR1L_RXERR1L_SHIFT 0
++#define I40E_GL_RXERR1L_RXERR1L_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1L_RXERR1L_SHIFT)
+ #define I40E_GLPRT_BPRCH(_i) (0x003005E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+ #define I40E_GLPRT_BPRCL(_i) (0x003005E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+ #define I40E_GLPRT_BPTCH(_i) (0x00300A04 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index 10a83e5385c70..9787e794eeda6 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -170,10 +170,10 @@ static char *i40e_create_dummy_packet(u8 *dummy_packet, bool ipv4, u8 l4proto,
+ struct i40e_fdir_filter *data)
+ {
+ bool is_vlan = !!data->vlan_tag;
+- struct vlan_hdr vlan;
+- struct ipv6hdr ipv6;
+- struct ethhdr eth;
+- struct iphdr ip;
++ struct vlan_hdr vlan = {};
++ struct ipv6hdr ipv6 = {};
++ struct ethhdr eth = {};
++ struct iphdr ip = {};
+ u8 *tmp;
+
+ if (ipv4) {
+@@ -830,8 +830,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
+ i40e_clean_tx_ring(tx_ring);
+ kfree(tx_ring->tx_bi);
+ tx_ring->tx_bi = NULL;
+- kfree(tx_ring->xsk_descs);
+- tx_ring->xsk_descs = NULL;
+
+ if (tx_ring->desc) {
+ dma_free_coherent(tx_ring->dev, tx_ring->size,
+@@ -1433,13 +1431,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
+ if (!tx_ring->tx_bi)
+ goto err;
+
+- if (ring_is_xdp(tx_ring)) {
+- tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs),
+- GFP_KERNEL);
+- if (!tx_ring->xsk_descs)
+- goto err;
+- }
+-
+ u64_stats_init(&tx_ring->syncp);
+
+ /* round up to nearest 4K */
+@@ -1463,21 +1454,11 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
+ return 0;
+
+ err:
+- kfree(tx_ring->xsk_descs);
+- tx_ring->xsk_descs = NULL;
+ kfree(tx_ring->tx_bi);
+ tx_ring->tx_bi = NULL;
+ return -ENOMEM;
+ }
+
+-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
+-{
+- unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
+-
+- rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
+- return rx_ring->rx_bi ? 0 : -ENOMEM;
+-}
+-
+ static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
+ {
+ memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
+@@ -1608,6 +1589,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
+
+ rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
+
++ rx_ring->rx_bi =
++ kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
++ if (!rx_ring->rx_bi)
++ return -ENOMEM;
++
+ return 0;
+ }
+
+@@ -3662,7 +3648,8 @@ u16 i40e_lan_select_queue(struct net_device *netdev,
+ u8 prio;
+
+ /* is DCB enabled at all? */
+- if (vsi->tc_config.numtc == 1)
++ if (vsi->tc_config.numtc == 1 ||
++ i40e_is_tc_mqprio_enabled(vsi->back))
+ return netdev_pick_tx(netdev, skb, sb_dev);
+
+ prio = skb->priority;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+index bfc2845c99d1c..f3b0b81517096 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+@@ -390,7 +390,6 @@ struct i40e_ring {
+ u16 rx_offset;
+ struct xdp_rxq_info xdp_rxq;
+ struct xsk_buff_pool *xsk_pool;
+- struct xdp_desc *xsk_descs; /* For storing descriptors in the AF_XDP ZC path */
+ } ____cacheline_internodealigned_in_smp;
+
+ static inline bool ring_uses_build_skb(struct i40e_ring *ring)
+@@ -467,7 +466,6 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
+ bool __i40e_chk_linearize(struct sk_buff *skb);
+ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+-int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
+
+ /**
+ * i40e_get_head - Retrieve head from head writeback
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
+index 36a4ca1ffb1a9..388c3d36d96a5 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
+@@ -1172,6 +1172,7 @@ struct i40e_eth_stats {
+ u64 tx_broadcast; /* bptc */
+ u64 tx_discards; /* tdpc */
+ u64 tx_errors; /* tepc */
++ u64 rx_discards_other; /* rxerr1 */
+ };
+
+ /* Statistics collected per VEB per TC */
+@@ -1403,6 +1404,10 @@ struct i40e_lldp_variables {
+ #define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000
+
+ /* INPUT SET MASK for RSS, flow director, and flexible payload */
++#define I40E_X722_L3_SRC_SHIFT 49
++#define I40E_X722_L3_SRC_MASK (0x3ULL << I40E_X722_L3_SRC_SHIFT)
++#define I40E_X722_L3_DST_SHIFT 41
++#define I40E_X722_L3_DST_MASK (0x3ULL << I40E_X722_L3_DST_SHIFT)
+ #define I40E_L3_SRC_SHIFT 47
+ #define I40E_L3_SRC_MASK (0x3ULL << I40E_L3_SRC_SHIFT)
+ #define I40E_L3_V6_SRC_SHIFT 43
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+index 472f56b360b8c..46758bbcb04f4 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -17,7 +17,7 @@
+ **/
+ static void i40e_vc_vf_broadcast(struct i40e_pf *pf,
+ enum virtchnl_ops v_opcode,
+- i40e_status v_retval, u8 *msg,
++ int v_retval, u8 *msg,
+ u16 msglen)
+ {
+ struct i40e_hw *hw = &pf->hw;
+@@ -183,17 +183,18 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
+ /***********************misc routines*****************************/
+
+ /**
+- * i40e_vc_disable_vf
++ * i40e_vc_reset_vf
+ * @vf: pointer to the VF info
+- *
+- * Disable the VF through a SW reset.
++ * @notify_vf: notify vf about reset or not
++ * Reset VF handler.
+ **/
+-static inline void i40e_vc_disable_vf(struct i40e_vf *vf)
++static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
+ {
+ struct i40e_pf *pf = vf->pf;
+ int i;
+
+- i40e_vc_notify_vf_reset(vf);
++ if (notify_vf)
++ i40e_vc_notify_vf_reset(vf);
+
+ /* We want to ensure that an actual reset occurs initiated after this
+ * function was called. However, we do not want to wait forever, so
+@@ -211,9 +212,14 @@ static inline void i40e_vc_disable_vf(struct i40e_vf *vf)
+ usleep_range(10000, 20000);
+ }
+
+- dev_warn(&vf->pf->pdev->dev,
+- "Failed to initiate reset for VF %d after 200 milliseconds\n",
+- vf->vf_id);
++ if (notify_vf)
++ dev_warn(&vf->pf->pdev->dev,
++ "Failed to initiate reset for VF %d after 200 milliseconds\n",
++ vf->vf_id);
++ else
++ dev_dbg(&vf->pf->pdev->dev,
++ "Failed to initiate reset for VF %d after 200 milliseconds\n",
++ vf->vf_id);
+ }
+
+ /**
+@@ -674,14 +680,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
+ u16 vsi_queue_id,
+ struct virtchnl_rxq_info *info)
+ {
++ u16 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+ struct i40e_pf *pf = vf->pf;
++ struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_hmc_obj_rxq rx_ctx;
+- u16 pf_queue_id;
+ int ret = 0;
+
+- pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+-
+ /* clear the context structure first */
+ memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
+
+@@ -719,6 +724,10 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
+ }
+ rx_ctx.rxmax = info->max_pkt_size;
+
++ /* if port VLAN is configured increase the max packet size */
++ if (vsi->info.pvid)
++ rx_ctx.rxmax += VLAN_HLEN;
++
+ /* enable 32bytes desc always */
+ rx_ctx.dsize = 1;
+
+@@ -1237,13 +1246,13 @@ err:
+ * @vl: List of VLANs - apply filter for given VLANs
+ * @num_vlans: Number of elements in @vl
+ **/
+-static i40e_status
++static int
+ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable,
+ bool unicast_enable, s16 *vl, u16 num_vlans)
+ {
+- i40e_status aq_ret, aq_tmp = 0;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_hw *hw = &pf->hw;
++ int aq_ret, aq_tmp = 0;
+ int i;
+
+ /* No VLAN to set promisc on, set on VSI */
+@@ -1255,9 +1264,9 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable,
+ int aq_err = pf->hw.aq.asq_last_status;
+
+ dev_err(&pf->pdev->dev,
+- "VF %d failed to set multicast promiscuous mode err %s aq_err %s\n",
++ "VF %d failed to set multicast promiscuous mode err %d aq_err %s\n",
+ vf->vf_id,
+- i40e_stat_str(&pf->hw, aq_ret),
++ aq_ret,
+ i40e_aq_str(&pf->hw, aq_err));
+
+ return aq_ret;
+@@ -1271,9 +1280,9 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable,
+ int aq_err = pf->hw.aq.asq_last_status;
+
+ dev_err(&pf->pdev->dev,
+- "VF %d failed to set unicast promiscuous mode err %s aq_err %s\n",
++ "VF %d failed to set unicast promiscuous mode err %d aq_err %s\n",
+ vf->vf_id,
+- i40e_stat_str(&pf->hw, aq_ret),
++ aq_ret,
+ i40e_aq_str(&pf->hw, aq_err));
+ }
+
+@@ -1288,9 +1297,9 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable,
+ int aq_err = pf->hw.aq.asq_last_status;
+
+ dev_err(&pf->pdev->dev,
+- "VF %d failed to set multicast promiscuous mode err %s aq_err %s\n",
++ "VF %d failed to set multicast promiscuous mode err %d aq_err %s\n",
+ vf->vf_id,
+- i40e_stat_str(&pf->hw, aq_ret),
++ aq_ret,
+ i40e_aq_str(&pf->hw, aq_err));
+
+ if (!aq_tmp)
+@@ -1304,9 +1313,9 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable,
+ int aq_err = pf->hw.aq.asq_last_status;
+
+ dev_err(&pf->pdev->dev,
+- "VF %d failed to set unicast promiscuous mode err %s aq_err %s\n",
++ "VF %d failed to set unicast promiscuous mode err %d aq_err %s\n",
+ vf->vf_id,
+- i40e_stat_str(&pf->hw, aq_ret),
++ aq_ret,
+ i40e_aq_str(&pf->hw, aq_err));
+
+ if (!aq_tmp)
+@@ -1330,13 +1339,13 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable,
+ * Called from the VF to configure the promiscuous mode of
+ * VF vsis and from the VF reset path to reset promiscuous mode.
+ **/
+-static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf,
+- u16 vsi_id,
+- bool allmulti,
+- bool alluni)
++static int i40e_config_vf_promiscuous_mode(struct i40e_vf *vf,
++ u16 vsi_id,
++ bool allmulti,
++ bool alluni)
+ {
+- i40e_status aq_ret = I40E_SUCCESS;
+ struct i40e_pf *pf = vf->pf;
++ int aq_ret = I40E_SUCCESS;
+ struct i40e_vsi *vsi;
+ u16 num_vlans;
+ s16 *vl;
+@@ -1367,6 +1376,32 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf,
+ return aq_ret;
+ }
+
++/**
++ * i40e_sync_vfr_reset
++ * @hw: pointer to hw struct
++ * @vf_id: VF identifier
++ *
++ * Before trigger hardware reset, we need to know if no other process has
++ * reserved the hardware for any reset operations. This check is done by
++ * examining the status of the RSTAT1 register used to signal the reset.
++ **/
++static int i40e_sync_vfr_reset(struct i40e_hw *hw, int vf_id)
++{
++ u32 reg;
++ int i;
++
++ for (i = 0; i < I40E_VFR_WAIT_COUNT; i++) {
++ reg = rd32(hw, I40E_VFINT_ICR0_ENA(vf_id)) &
++ I40E_VFINT_ICR0_ADMINQ_MASK;
++ if (reg)
++ return 0;
++
++ usleep_range(100, 200);
++ }
++
++ return -EAGAIN;
++}
++
+ /**
+ * i40e_trigger_vf_reset
+ * @vf: pointer to the VF structure
+@@ -1381,9 +1416,11 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_hw *hw = &pf->hw;
+ u32 reg, reg_idx, bit_idx;
++ bool vf_active;
++ u32 radq;
+
+ /* warn the VF */
+- clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
++ vf_active = test_and_clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
+
+ /* Disable VF's configuration API during reset. The flag is re-enabled
+ * in i40e_alloc_vf_res(), when it's safe again to access VF's VSI.
+@@ -1397,7 +1434,19 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
+ * just need to clean up, so don't hit the VFRTRIG register.
+ */
+ if (!flr) {
+- /* reset VF using VPGEN_VFRTRIG reg */
++ /* Sync VFR reset before trigger next one */
++ radq = rd32(hw, I40E_VFINT_ICR0_ENA(vf->vf_id)) &
++ I40E_VFINT_ICR0_ADMINQ_MASK;
++ if (vf_active && !radq)
++ /* waiting for finish reset by virtual driver */
++ if (i40e_sync_vfr_reset(hw, vf->vf_id))
++ dev_info(&pf->pdev->dev,
++ "Reset VF %d never finished\n",
++ vf->vf_id);
++
++ /* Reset VF using VPGEN_VFRTRIG reg. It is also setting
++ * in progress state in rstat1 register.
++ */
+ reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
+ reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK;
+ wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
+@@ -1487,10 +1536,12 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
+ if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state))
+ return true;
+
+- /* If the VFs have been disabled, this means something else is
+- * resetting the VF, so we shouldn't continue.
+- */
+- if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
++ /* Bail out if VFs are disabled. */
++ if (test_bit(__I40E_VF_DISABLE, pf->state))
++ return true;
++
++ /* If VF is being reset already we don't need to continue. */
++ if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+ return true;
+
+ i40e_trigger_vf_reset(vf, flr);
+@@ -1527,7 +1578,8 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
+ i40e_cleanup_reset_vf(vf);
+
+ i40e_flush(hw);
+- clear_bit(__I40E_VF_DISABLE, pf->state);
++ usleep_range(20000, 40000);
++ clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states);
+
+ return true;
+ }
+@@ -1560,8 +1612,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ return false;
+
+ /* Begin reset on all VFs at once */
+- for (v = 0; v < pf->num_alloc_vfs; v++)
+- i40e_trigger_vf_reset(&pf->vf[v], flr);
++ for (v = 0; v < pf->num_alloc_vfs; v++) {
++ vf = &pf->vf[v];
++ /* If VF is being reset no need to trigger reset again */
++ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
++ i40e_trigger_vf_reset(&pf->vf[v], flr);
++ }
+
+ /* HW requires some time to make sure it can flush the FIFO for a VF
+ * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
+@@ -1577,9 +1633,11 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ */
+ while (v < pf->num_alloc_vfs) {
+ vf = &pf->vf[v];
+- reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
+- if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
+- break;
++ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
++ reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
++ if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
++ break;
++ }
+
+ /* If the current VF has finished resetting, move on
+ * to the next VF in sequence.
+@@ -1607,6 +1665,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ if (pf->vf[v].lan_vsi_idx == 0)
+ continue;
+
++ /* If VF is reset in another thread just continue */
++ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
++ continue;
++
+ i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ }
+
+@@ -1618,6 +1680,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ if (pf->vf[v].lan_vsi_idx == 0)
+ continue;
+
++ /* If VF is reset in another thread just continue */
++ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
++ continue;
++
+ i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ }
+
+@@ -1627,10 +1693,16 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ mdelay(50);
+
+ /* Finish the reset on each VF */
+- for (v = 0; v < pf->num_alloc_vfs; v++)
++ for (v = 0; v < pf->num_alloc_vfs; v++) {
++ /* If VF is reset in another thread just continue */
++ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
++ continue;
++
+ i40e_cleanup_reset_vf(&pf->vf[v]);
++ }
+
+ i40e_flush(hw);
++ usleep_range(20000, 40000);
+ clear_bit(__I40E_VF_DISABLE, pf->state);
+
+ return true;
+@@ -1883,7 +1955,7 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
+ struct i40e_pf *pf;
+ struct i40e_hw *hw;
+ int abs_vf_id;
+- i40e_status aq_ret;
++ int aq_ret;
+
+ /* validate the request */
+ if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+@@ -1893,25 +1965,6 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
+ hw = &pf->hw;
+ abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+- /* single place to detect unsuccessful return values */
+- if (v_retval) {
+- vf->num_invalid_msgs++;
+- dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n",
+- vf->vf_id, v_opcode, v_retval);
+- if (vf->num_invalid_msgs >
+- I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) {
+- dev_err(&pf->pdev->dev,
+- "Number of invalid messages exceeded for VF %d\n",
+- vf->vf_id);
+- dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n");
+- set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+- }
+- } else {
+- vf->num_valid_msgs++;
+- /* reset the invalid counter, if a valid message is received. */
+- vf->num_invalid_msgs = 0;
+- }
+-
+ aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval,
+ msg, msglen, NULL);
+ if (aq_ret) {
+@@ -1934,11 +1987,37 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
+ **/
+ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf,
+ enum virtchnl_ops opcode,
+- i40e_status retval)
++ int retval)
+ {
+ return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0);
+ }
+
++/**
++ * i40e_sync_vf_state
++ * @vf: pointer to the VF info
++ * @state: VF state
++ *
++ * Called from a VF message to synchronize the service with a potential
++ * VF reset state
++ **/
++static bool i40e_sync_vf_state(struct i40e_vf *vf, enum i40e_vf_states state)
++{
++ int i;
++
++ /* When handling some messages, it needs VF state to be set.
++ * It is possible that this flag is cleared during VF reset,
++ * so there is a need to wait until the end of the reset to
++ * handle the request message correctly.
++ */
++ for (i = 0; i < I40E_VF_STATE_WAIT_COUNT; i++) {
++ if (test_bit(state, &vf->vf_states))
++ return true;
++ usleep_range(10000, 20000);
++ }
++
++ return test_bit(state, &vf->vf_states);
++}
++
+ /**
+ * i40e_vc_get_version_msg
+ * @vf: pointer to the VF info
+@@ -1982,6 +2061,25 @@ static void i40e_del_qch(struct i40e_vf *vf)
+ }
+ }
+
++/**
++ * i40e_vc_get_max_frame_size
++ * @vf: pointer to the VF
++ *
++ * Max frame size is determined based on the current port's max frame size and
++ * whether a port VLAN is configured on this VF. The VF is not aware whether
++ * it's in a port VLAN so the PF needs to account for this in max frame size
++ * checks and sending the max frame size to the VF.
++ **/
++static u16 i40e_vc_get_max_frame_size(struct i40e_vf *vf)
++{
++ u16 max_frame_size = vf->pf->hw.phy.link_info.max_frame_size;
++
++ if (vf->port_vlan_id)
++ max_frame_size -= VLAN_HLEN;
++
++ return max_frame_size;
++}
++
+ /**
+ * i40e_vc_get_vf_resources_msg
+ * @vf: pointer to the VF info
+@@ -1993,13 +2091,13 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
+ {
+ struct virtchnl_vf_resource *vfres = NULL;
+ struct i40e_pf *pf = vf->pf;
+- i40e_status aq_ret = 0;
+ struct i40e_vsi *vsi;
+ int num_vsis = 1;
++ int aq_ret = 0;
+ size_t len = 0;
+ int ret;
+
+- if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -2083,6 +2181,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
+ vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
+ vfres->rss_key_size = I40E_HKEY_ARRAY_SIZE;
+ vfres->rss_lut_size = I40E_VF_HLUT_ARRAY_SIZE;
++ vfres->max_mtu = i40e_vc_get_max_frame_size(vf);
+
+ if (vf->lan_vsi_idx) {
+ vfres->vsi_res[0].vsi_id = vf->lan_vsi_id;
+@@ -2091,6 +2190,10 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
+ /* VFs only use TC 0 */
+ vfres->vsi_res[0].qset_handle
+ = le16_to_cpu(vsi->info.qs_handle[0]);
++ if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) && !vf->pf_set_mac) {
++ i40e_del_mac_filter(vsi, vf->default_lan_addr.addr);
++ eth_zero_addr(vf->default_lan_addr.addr);
++ }
+ ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
+ vf->default_lan_addr.addr);
+ }
+@@ -2105,20 +2208,6 @@ err:
+ return ret;
+ }
+
+-/**
+- * i40e_vc_reset_vf_msg
+- * @vf: pointer to the VF info
+- *
+- * called from the VF to reset itself,
+- * unlike other virtchnl messages, PF driver
+- * doesn't send the response back to the VF
+- **/
+-static void i40e_vc_reset_vf_msg(struct i40e_vf *vf)
+-{
+- if (test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
+- i40e_reset_vf(vf, false);
+-}
+-
+ /**
+ * i40e_vc_config_promiscuous_mode_msg
+ * @vf: pointer to the VF info
+@@ -2132,11 +2221,11 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg)
+ struct virtchnl_promisc_info *info =
+ (struct virtchnl_promisc_info *)msg;
+ struct i40e_pf *pf = vf->pf;
+- i40e_status aq_ret = 0;
+ bool allmulti = false;
+ bool alluni = false;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err_out;
+ }
+@@ -2217,13 +2306,14 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
+ struct virtchnl_vsi_queue_config_info *qci =
+ (struct virtchnl_vsi_queue_config_info *)msg;
+ struct virtchnl_queue_pair_info *qpi;
+- struct i40e_pf *pf = vf->pf;
+ u16 vsi_id, vsi_queue_id = 0;
+- u16 num_qps_all = 0;
+- i40e_status aq_ret = 0;
++ struct i40e_pf *pf = vf->pf;
+ int i, j = 0, idx = 0;
++ struct i40e_vsi *vsi;
++ u16 num_qps_all = 0;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+ }
+@@ -2239,7 +2329,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
+ }
+
+ if (vf->adq_enabled) {
+- for (i = 0; i < I40E_MAX_VF_VSI; i++)
++ for (i = 0; i < vf->num_tc; i++)
+ num_qps_all += vf->ch[i].num_qps;
+ if (num_qps_all != qci->num_queue_pairs) {
+ aq_ret = I40E_ERR_PARAM;
+@@ -2310,9 +2400,15 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
+ pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
+ qci->num_queue_pairs;
+ } else {
+- for (i = 0; i < vf->num_tc; i++)
+- pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
+- vf->ch[i].num_qps;
++ for (i = 0; i < vf->num_tc; i++) {
++ vsi = pf->vsi[vf->ch[i].vsi_idx];
++ vsi->num_queue_pairs = vf->ch[i].num_qps;
++
++ if (i40e_update_adq_vsi_queues(vsi, i)) {
++ aq_ret = I40E_ERR_CONFIG;
++ goto error_param;
++ }
++ }
+ }
+
+ error_param:
+@@ -2362,11 +2458,11 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg)
+ struct virtchnl_irq_map_info *irqmap_info =
+ (struct virtchnl_irq_map_info *)msg;
+ struct virtchnl_vector_map *map;
++ int aq_ret = 0;
+ u16 vsi_id;
+- i40e_status aq_ret = 0;
+ int i;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+ }
+@@ -2478,7 +2574,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg)
+ struct virtchnl_queue_select *vqs =
+ (struct virtchnl_queue_select *)msg;
+ struct i40e_pf *pf = vf->pf;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ int i;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+@@ -2536,9 +2632,9 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg)
+ struct virtchnl_queue_select *vqs =
+ (struct virtchnl_queue_select *)msg;
+ struct i40e_pf *pf = vf->pf;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+ }
+@@ -2570,6 +2666,59 @@ error_param:
+ aq_ret);
+ }
+
++/**
++ * i40e_check_enough_queue - find big enough queue number
++ * @vf: pointer to the VF info
++ * @needed: the number of items needed
++ *
++ * Returns the base item index of the queue, or negative for error
++ **/
++static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed)
++{
++ unsigned int i, cur_queues, more, pool_size;
++ struct i40e_lump_tracking *pile;
++ struct i40e_pf *pf = vf->pf;
++ struct i40e_vsi *vsi;
++
++ vsi = pf->vsi[vf->lan_vsi_idx];
++ cur_queues = vsi->alloc_queue_pairs;
++
++ /* if current allocated queues are enough for need */
++ if (cur_queues >= needed)
++ return vsi->base_queue;
++
++ pile = pf->qp_pile;
++ if (cur_queues > 0) {
++ /* if the allocated queues are not zero
++ * just check if there are enough queues for more
++ * behind the allocated queues.
++ */
++ more = needed - cur_queues;
++ for (i = vsi->base_queue + cur_queues;
++ i < pile->num_entries; i++) {
++ if (pile->list[i] & I40E_PILE_VALID_BIT)
++ break;
++
++ if (more-- == 1)
++ /* there is enough */
++ return vsi->base_queue;
++ }
++ }
++
++ pool_size = 0;
++ for (i = 0; i < pile->num_entries; i++) {
++ if (pile->list[i] & I40E_PILE_VALID_BIT) {
++ pool_size = 0;
++ continue;
++ }
++ if (needed <= ++pool_size)
++ /* there is enough */
++ return i;
++ }
++
++ return -ENOMEM;
++}
++
+ /**
+ * i40e_vc_request_queues_msg
+ * @vf: pointer to the VF info
+@@ -2588,7 +2737,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
+ u8 cur_pairs = vf->num_queue_pairs;
+ struct i40e_pf *pf = vf->pf;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE))
+ return -EINVAL;
+
+ if (req_pairs > I40E_MAX_VF_QUEUES) {
+@@ -2604,11 +2753,16 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
+ req_pairs - cur_pairs,
+ pf->queues_left);
+ vfres->num_queue_pairs = pf->queues_left + cur_pairs;
++ } else if (i40e_check_enough_queue(vf, req_pairs) < 0) {
++ dev_warn(&pf->pdev->dev,
++ "VF %d requested %d more queues, but there is not enough for it.\n",
++ vf->vf_id,
++ req_pairs - cur_pairs);
++ vfres->num_queue_pairs = cur_pairs;
+ } else {
+ /* successful request */
+ vf->num_req_queues = req_pairs;
+- i40e_vc_notify_vf_reset(vf);
+- i40e_reset_vf(vf, false);
++ i40e_vc_reset_vf(vf, true);
+ return 0;
+ }
+
+@@ -2629,12 +2783,12 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg)
+ (struct virtchnl_queue_select *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_eth_stats stats;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ struct i40e_vsi *vsi;
+
+ memset(&stats, 0, sizeof(struct i40e_eth_stats));
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+ }
+@@ -2748,10 +2902,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
+ (struct virtchnl_ether_addr_list *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+- i40e_status ret = 0;
++ int ret = 0;
+ int i;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+ !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+ ret = I40E_ERR_PARAM;
+ goto error_param;
+@@ -2802,8 +2956,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
+
+ error_param:
+ /* send the response to the VF */
+- return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
+- ret);
++ return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
++ ret, NULL, 0);
+ }
+
+ /**
+@@ -2820,10 +2974,10 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
+ bool was_unimac_deleted = false;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+- i40e_status ret = 0;
++ int ret = 0;
+ int i;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+ !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+ ret = I40E_ERR_PARAM;
+ goto error_param;
+@@ -2893,7 +3047,7 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg)
+ (struct virtchnl_vlan_filter_list *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ int i;
+
+ if ((vf->num_vlan >= I40E_VC_MAX_VLAN_PER_VF) &&
+@@ -2964,10 +3118,10 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg)
+ (struct virtchnl_vlan_filter_list *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ int i;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+ !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto error_param;
+@@ -3020,7 +3174,7 @@ static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+ {
+ struct i40e_pf *pf = vf->pf;
+ int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
+ !test_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states)) {
+@@ -3049,7 +3203,7 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, bool config)
+ {
+ struct virtchnl_iwarp_qvlist_info *qvlist_info =
+ (struct virtchnl_iwarp_qvlist_info *)msg;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
+ !test_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states)) {
+@@ -3085,11 +3239,11 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg)
+ (struct virtchnl_rss_key *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+ !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) ||
+- (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) {
++ vrk->key_len != I40E_HKEY_ARRAY_SIZE) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3115,12 +3269,12 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg)
+ (struct virtchnl_rss_lut *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ u16 i;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+ !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) ||
+- (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) {
++ vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3150,10 +3304,10 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg)
+ {
+ struct virtchnl_rss_hena *vrh = NULL;
+ struct i40e_pf *pf = vf->pf;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ int len = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3187,9 +3341,9 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg)
+ (struct virtchnl_rss_hena *)msg;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_hw *hw = &pf->hw;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3211,10 +3365,10 @@ err:
+ **/
+ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
+ {
+- i40e_status aq_ret = 0;
+ struct i40e_vsi *vsi;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3237,10 +3391,10 @@ err:
+ **/
+ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
+ {
+- i40e_status aq_ret = 0;
+ struct i40e_vsi *vsi;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3437,8 +3591,8 @@ static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
+ ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+- "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+- vf->vf_id, i40e_stat_str(&pf->hw, ret),
++ "VF %d: Failed to delete cloud filter, err %d aq_err %s\n",
++ vf->vf_id, ret,
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+
+@@ -3464,10 +3618,10 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+ struct hlist_node *node;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ int i, ret;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3540,8 +3694,8 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+ ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+- "VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+- vf->vf_id, i40e_stat_str(&pf->hw, ret),
++ "VF %d: Failed to delete cloud filter, err %d aq_err %s\n",
++ vf->vf_id, ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto err;
+ }
+@@ -3595,10 +3749,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+ struct i40e_cloud_filter *cfilter = NULL;
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_vsi *vsi = NULL;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ int i, ret;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err_out;
+ }
+@@ -3674,8 +3828,8 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+ ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+- "VF %d: Failed to add cloud filter, err %s aq_err %s\n",
+- vf->vf_id, i40e_stat_str(&pf->hw, ret),
++ "VF %d: Failed to add cloud filter, err %d aq_err %s\n",
++ vf->vf_id, ret,
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ goto err_free;
+ }
+@@ -3704,10 +3858,10 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+ struct i40e_pf *pf = vf->pf;
+ struct i40e_link_status *ls = &pf->hw.phy.link_info;
+ int i, adq_request_qps = 0;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+ u64 speed = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3796,15 +3950,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+
+ /* set this flag only after making sure all inputs are sane */
+ vf->adq_enabled = true;
+- /* num_req_queues is set when user changes number of queues via ethtool
+- * and this causes issue for default VSI(which depends on this variable)
+- * when ADq is enabled, hence reset it.
+- */
+- vf->num_req_queues = 0;
+
+ /* reset the VF in order to allocate resources */
+- i40e_vc_notify_vf_reset(vf);
+- i40e_reset_vf(vf, false);
++ i40e_vc_reset_vf(vf, true);
+
+ return I40E_SUCCESS;
+
+@@ -3822,9 +3970,9 @@ err:
+ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+ {
+ struct i40e_pf *pf = vf->pf;
+- i40e_status aq_ret = 0;
++ int aq_ret = 0;
+
+- if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
++ if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
+ }
+@@ -3844,8 +3992,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+ }
+
+ /* reset the VF in order to allocate resources */
+- i40e_vc_notify_vf_reset(vf);
+- i40e_reset_vf(vf, false);
++ i40e_vc_reset_vf(vf, true);
+
+ return I40E_SUCCESS;
+
+@@ -3907,7 +4054,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
+ i40e_vc_notify_vf_link_state(vf);
+ break;
+ case VIRTCHNL_OP_RESET_VF:
+- i40e_vc_reset_vf_msg(vf);
++ i40e_vc_reset_vf(vf, false);
+ ret = 0;
+ break;
+ case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+@@ -4161,7 +4308,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+ /* Force the VF interface down so it has to bring up with new MAC
+ * address
+ */
+- i40e_vc_disable_vf(vf);
++ i40e_vc_reset_vf(vf, true);
+ dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n");
+
+ error_param:
+@@ -4169,34 +4316,6 @@ error_param:
+ return ret;
+ }
+
+-/**
+- * i40e_vsi_has_vlans - True if VSI has configured VLANs
+- * @vsi: pointer to the vsi
+- *
+- * Check if a VSI has configured any VLANs. False if we have a port VLAN or if
+- * we have no configured VLANs. Do not call while holding the
+- * mac_filter_hash_lock.
+- */
+-static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi)
+-{
+- bool have_vlans;
+-
+- /* If we have a port VLAN, then the VSI cannot have any VLANs
+- * configured, as all MAC/VLAN filters will be assigned to the PVID.
+- */
+- if (vsi->info.pvid)
+- return false;
+-
+- /* Since we don't have a PVID, we know that if the device is in VLAN
+- * mode it must be because of a VLAN filter configured on this VSI.
+- */
+- spin_lock_bh(&vsi->mac_filter_hash_lock);
+- have_vlans = i40e_is_vsi_in_vlan(vsi);
+- spin_unlock_bh(&vsi->mac_filter_hash_lock);
+-
+- return have_vlans;
+-}
+-
+ /**
+ * i40e_ndo_set_vf_port_vlan
+ * @netdev: network interface device structure
+@@ -4253,19 +4372,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+ /* duplicate request, so just return success */
+ goto error_pvid;
+
+- if (i40e_vsi_has_vlans(vsi)) {
+- dev_err(&pf->pdev->dev,
+- "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n",
+- vf_id);
+- /* Administrator Error - knock the VF offline until he does
+- * the right thing by reconfiguring his network correctly
+- * and then reloading the VF driver.
+- */
+- i40e_vc_disable_vf(vf);
+- /* During reset the VF got a new VSI, so refresh the pointer. */
+- vsi = pf->vsi[vf->lan_vsi_idx];
+- }
+-
++ i40e_vc_reset_vf(vf, true);
++ /* During reset the VF got a new VSI, so refresh a pointer. */
++ vsi = pf->vsi[vf->lan_vsi_idx];
+ /* Locked once because multiple functions below iterate list */
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+@@ -4641,7 +4750,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
+ goto out;
+
+ vf->trusted = setting;
+- i40e_vc_disable_vf(vf);
++ i40e_vc_reset_vf(vf, true);
+ dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
+ vf_id, setting ? "" : "un");
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+index 091e32c1bb46f..358bbdb587951 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+@@ -10,14 +10,15 @@
+
+ #define I40E_VIRTCHNL_SUPPORTED_QTYPES 2
+
+-#define I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED 10
+-
+ #define I40E_VLAN_PRIORITY_SHIFT 13
+ #define I40E_VLAN_MASK 0xFFF
+ #define I40E_PRIORITY_MASK 0xE000
+
+ #define I40E_MAX_VF_PROMISC_FLAGS 3
+
++#define I40E_VF_STATE_WAIT_COUNT 20
++#define I40E_VFR_WAIT_COUNT 100
++
+ /* Various queue ctrls */
+ enum i40e_queue_ctrl {
+ I40E_QUEUE_CTRL_UNKNOWN = 0,
+@@ -38,6 +39,7 @@ enum i40e_vf_states {
+ I40E_VF_STATE_MC_PROMISC,
+ I40E_VF_STATE_UC_PROMISC,
+ I40E_VF_STATE_PRE_ENABLE,
++ I40E_VF_STATE_RESETTING
+ };
+
+ /* VF capabilities */
+@@ -89,9 +91,6 @@ struct i40e_vf {
+ u8 num_queue_pairs; /* num of qps assigned to VF vsis */
+ u8 num_req_queues; /* num of requested qps */
+ u64 num_mdd_events; /* num of mdd events detected */
+- /* num of continuous malformed or invalid msgs detected */
+- u64 num_invalid_msgs;
+- u64 num_valid_msgs; /* num of valid msgs detected */
+
+ unsigned long vf_caps; /* vf's adv. capabilities */
+ unsigned long vf_states; /* vf's runtime states */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+index e7e778ca074c0..7e50b8fff9b59 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+@@ -10,14 +10,6 @@
+ #include "i40e_txrx_common.h"
+ #include "i40e_xsk.h"
+
+-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
+-{
+- unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
+-
+- rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
+- return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
+-}
+-
+ void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
+ {
+ memset(rx_ring->rx_bi_zc, 0,
+@@ -29,6 +21,58 @@ static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+ return &rx_ring->rx_bi_zc[idx];
+ }
+
++/**
++ * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer
++ * @rx_ring: Current rx ring
++ * @pool_present: is pool for XSK present
++ *
++ * Try allocating memory and return ENOMEM, if failed to allocate.
++ * If allocation was successful, substitute buffer with allocated one.
++ * Returns 0 on success, negative on failure
++ */
++static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
++{
++ size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
++ sizeof(*rx_ring->rx_bi);
++ void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
++
++ if (!sw_ring)
++ return -ENOMEM;
++
++ if (pool_present) {
++ kfree(rx_ring->rx_bi);
++ rx_ring->rx_bi = NULL;
++ rx_ring->rx_bi_zc = sw_ring;
++ } else {
++ kfree(rx_ring->rx_bi_zc);
++ rx_ring->rx_bi_zc = NULL;
++ rx_ring->rx_bi = sw_ring;
++ }
++ return 0;
++}
++
++/**
++ * i40e_realloc_rx_bi_zc - reallocate rx SW rings
++ * @vsi: Current VSI
++ * @zc: is zero copy set
++ *
++ * Reallocate buffer for rx_rings that might be used by XSK.
++ * XDP requires more memory, than rx_buf provides.
++ * Returns 0 on success, negative on failure
++ */
++int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
++{
++ struct i40e_ring *rx_ring;
++ unsigned long q;
++
++ for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
++ rx_ring = vsi->rx_rings[q];
++ if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
++ return -ENOMEM;
++ }
++ return 0;
++}
++
+ /**
+ * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
+ * certain ring/qid
+@@ -69,6 +113,10 @@ static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
+ if (err)
+ return err;
+
++ err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
++ if (err)
++ return err;
++
+ err = i40e_queue_pair_enable(vsi, qid);
+ if (err)
+ return err;
+@@ -113,6 +161,9 @@ static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
+ xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
+
+ if (if_running) {
++ err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
++ if (err)
++ return err;
+ err = i40e_queue_pair_enable(vsi, qid);
+ if (err)
+ return err;
+@@ -243,21 +294,25 @@ no_buffers:
+ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
+ struct xdp_buff *xdp)
+ {
++ unsigned int totalsize = xdp->data_end - xdp->data_meta;
+ unsigned int metasize = xdp->data - xdp->data_meta;
+- unsigned int datasize = xdp->data_end - xdp->data;
+ struct sk_buff *skb;
+
++ net_prefetch(xdp->data_meta);
++
+ /* allocate a skb to store the frags */
+- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+- xdp->data_end - xdp->data_hard_start,
++ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ goto out;
+
+- skb_reserve(skb, xdp->data - xdp->data_hard_start);
+- memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+- if (metasize)
++ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
++ ALIGN(totalsize, sizeof(long)));
++
++ if (metasize) {
+ skb_metadata_set(skb, metasize);
++ __skb_pull(skb, metasize);
++ }
+
+ out:
+ xsk_buff_free(xdp);
+@@ -469,11 +524,11 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
+ **/
+ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
+ {
+- struct xdp_desc *descs = xdp_ring->xsk_descs;
++ struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+ u32 nb_pkts, nb_processed = 0;
+ unsigned int total_bytes = 0;
+
+- nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget);
++ nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+ if (!nb_pkts)
+ return true;
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+index ea88f4597a072..75103c9922696 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+@@ -33,7 +33,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
+
+ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
+ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+-int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
++int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
+ void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
+
+ #endif /* _I40E_XSK_H_ */
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 68c80f04113c8..478a292ac803b 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -39,6 +39,7 @@
+ #include "iavf_txrx.h"
+ #include "iavf_fdir.h"
+ #include "iavf_adv_rss.h"
++#include <linux/bitmap.h>
+
+ #define DEFAULT_DEBUG_LEVEL_SHIFT 3
+ #define PFX "iavf: "
+@@ -88,6 +89,7 @@ struct iavf_vsi {
+ #define IAVF_HKEY_ARRAY_SIZE ((IAVF_VFQF_HKEY_MAX_INDEX + 1) * 4)
+ #define IAVF_HLUT_ARRAY_SIZE ((IAVF_VFQF_HLUT_MAX_INDEX + 1) * 4)
+ #define IAVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */
++#define IAVF_MBPS_QUANTA 50
+
+ #define IAVF_VIRTCHNL_VF_RESOURCE_SIZE (sizeof(struct virtchnl_vf_resource) + \
+ (IAVF_MAX_VF_VSI * \
+@@ -177,6 +179,7 @@ enum iavf_state_t {
+ __IAVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */
+ __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */
+ __IAVF_INIT_SW, /* got resources, setting up structs */
++ __IAVF_INIT_FAILED, /* init failed, restarting procedure */
+ __IAVF_RESETTING, /* in reset */
+ __IAVF_COMM_FAILED, /* communication with PF failed */
+ /* Below here, watchdog is running */
+@@ -186,6 +189,10 @@ enum iavf_state_t {
+ __IAVF_RUNNING, /* opened, working */
+ };
+
++enum iavf_critical_section_t {
++ __IAVF_IN_REMOVE_TASK, /* device being removed */
++};
++
+ #define IAVF_CLOUD_FIELD_OMAC 0x01
+ #define IAVF_CLOUD_FIELD_IMAC 0x02
+ #define IAVF_CLOUD_FIELD_IVLAN 0x04
+@@ -225,14 +232,12 @@ struct iavf_adapter {
+ struct work_struct reset_task;
+ struct work_struct adminq_task;
+ struct delayed_work client_task;
+- struct delayed_work init_task;
+ wait_queue_head_t down_waitqueue;
+ struct iavf_q_vector *q_vectors;
+ struct list_head vlan_filter_list;
+ struct list_head mac_filter_list;
+ struct mutex crit_lock;
+ struct mutex client_lock;
+- struct mutex remove_lock;
+ /* Lock to protect accesses to MAC and VLAN lists */
+ spinlock_t mac_vlan_list_lock;
+ char misc_vector_name[IFNAMSIZ + 9];
+@@ -270,6 +275,7 @@ struct iavf_adapter {
+ #define IAVF_FLAG_LEGACY_RX BIT(15)
+ #define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16)
+ #define IAVF_FLAG_QUEUES_DISABLED BIT(17)
++#define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18)
+ /* duplicates for common code */
+ #define IAVF_FLAG_DCB_ENABLED 0
+ /* flags for admin queue service task */
+@@ -304,6 +310,7 @@ struct iavf_adapter {
+ #define IAVF_FLAG_AQ_DEL_FDIR_FILTER BIT(26)
+ #define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG BIT(27)
+ #define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG BIT(28)
++#define IAVF_FLAG_AQ_REQUEST_STATS BIT(29)
+
+ /* OS defined structs */
+ struct net_device *netdev;
+@@ -312,6 +319,7 @@ struct iavf_adapter {
+ struct iavf_hw hw; /* defined in iavf_type.h */
+
+ enum iavf_state_t state;
++ enum iavf_state_t last_state;
+ unsigned long crit_section;
+
+ struct delayed_work watchdog_task;
+@@ -370,6 +378,11 @@ struct iavf_adapter {
+ /* lock to protect access to the cloud filter list */
+ spinlock_t cloud_filter_list_lock;
+ u16 num_cloud_filters;
++ /* snapshot of "num_active_queues" before setup_tc for qdisc add
++ * is invoked. This information is useful during qdisc del flow,
++ * to restore correct number of queues
++ */
++ int orig_num_active_queues;
+
+ #define IAVF_MAX_FDIR_FILTERS 128 /* max allowed Flow Director filters */
+ u16 fdir_active_fltr;
+@@ -393,16 +406,62 @@ struct iavf_device {
+ extern char iavf_driver_name[];
+ extern struct workqueue_struct *iavf_wq;
+
++static inline const char *iavf_state_str(enum iavf_state_t state)
++{
++ switch (state) {
++ case __IAVF_STARTUP:
++ return "__IAVF_STARTUP";
++ case __IAVF_REMOVE:
++ return "__IAVF_REMOVE";
++ case __IAVF_INIT_VERSION_CHECK:
++ return "__IAVF_INIT_VERSION_CHECK";
++ case __IAVF_INIT_GET_RESOURCES:
++ return "__IAVF_INIT_GET_RESOURCES";
++ case __IAVF_INIT_SW:
++ return "__IAVF_INIT_SW";
++ case __IAVF_INIT_FAILED:
++ return "__IAVF_INIT_FAILED";
++ case __IAVF_RESETTING:
++ return "__IAVF_RESETTING";
++ case __IAVF_COMM_FAILED:
++ return "__IAVF_COMM_FAILED";
++ case __IAVF_DOWN:
++ return "__IAVF_DOWN";
++ case __IAVF_DOWN_PENDING:
++ return "__IAVF_DOWN_PENDING";
++ case __IAVF_TESTING:
++ return "__IAVF_TESTING";
++ case __IAVF_RUNNING:
++ return "__IAVF_RUNNING";
++ default:
++ return "__IAVF_UNKNOWN_STATE";
++ }
++}
++
++static inline void iavf_change_state(struct iavf_adapter *adapter,
++ enum iavf_state_t state)
++{
++ if (adapter->state != state) {
++ adapter->last_state = adapter->state;
++ adapter->state = state;
++ }
++ dev_dbg(&adapter->pdev->dev,
++ "state transition from:%s to:%s\n",
++ iavf_state_str(adapter->last_state),
++ iavf_state_str(adapter->state));
++}
++
+ int iavf_up(struct iavf_adapter *adapter);
+ void iavf_down(struct iavf_adapter *adapter);
+ int iavf_process_config(struct iavf_adapter *adapter);
+ void iavf_schedule_reset(struct iavf_adapter *adapter);
++void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+ void iavf_reset(struct iavf_adapter *adapter);
+ void iavf_set_ethtool_ops(struct net_device *netdev);
+ void iavf_update_stats(struct iavf_adapter *adapter);
+ void iavf_reset_interrupt_capability(struct iavf_adapter *adapter);
+ int iavf_init_interrupt_scheme(struct iavf_adapter *adapter);
+-void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask);
++void iavf_irq_enable_queues(struct iavf_adapter *adapter);
+ void iavf_free_all_tx_resources(struct iavf_adapter *adapter);
+ void iavf_free_all_rx_resources(struct iavf_adapter *adapter);
+
+@@ -454,4 +513,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
+ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
+ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+ const u8 *macaddr);
++int iavf_lock_timeout(struct mutex *lock, unsigned int msecs);
+ #endif /* _IAVF_H_ */
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.c b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
+index 9fa3fa99b4c20..897b349cdaf1c 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_adminq.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
+@@ -324,6 +324,7 @@ static enum iavf_status iavf_config_arq_regs(struct iavf_hw *hw)
+ static enum iavf_status iavf_init_asq(struct iavf_hw *hw)
+ {
+ enum iavf_status ret_code = 0;
++ int i;
+
+ if (hw->aq.asq.count > 0) {
+ /* queue already initialized */
+@@ -354,12 +355,17 @@ static enum iavf_status iavf_init_asq(struct iavf_hw *hw)
+ /* initialize base registers */
+ ret_code = iavf_config_asq_regs(hw);
+ if (ret_code)
+- goto init_adminq_free_rings;
++ goto init_free_asq_bufs;
+
+ /* success! */
+ hw->aq.asq.count = hw->aq.num_asq_entries;
+ goto init_adminq_exit;
+
++init_free_asq_bufs:
++ for (i = 0; i < hw->aq.num_asq_entries; i++)
++ iavf_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
++ iavf_free_virt_mem(hw, &hw->aq.asq.dma_head);
++
+ init_adminq_free_rings:
+ iavf_free_adminq_asq(hw);
+
+@@ -383,6 +389,7 @@ init_adminq_exit:
+ static enum iavf_status iavf_init_arq(struct iavf_hw *hw)
+ {
+ enum iavf_status ret_code = 0;
++ int i;
+
+ if (hw->aq.arq.count > 0) {
+ /* queue already initialized */
+@@ -413,12 +420,16 @@ static enum iavf_status iavf_init_arq(struct iavf_hw *hw)
+ /* initialize base registers */
+ ret_code = iavf_config_arq_regs(hw);
+ if (ret_code)
+- goto init_adminq_free_rings;
++ goto init_free_arq_bufs;
+
+ /* success! */
+ hw->aq.arq.count = hw->aq.num_arq_entries;
+ goto init_adminq_exit;
+
++init_free_arq_bufs:
++ for (i = 0; i < hw->aq.num_arq_entries; i++)
++ iavf_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
++ iavf_free_virt_mem(hw, &hw->aq.arq.dma_head);
+ init_adminq_free_rings:
+ iavf_free_adminq_arq(hw);
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
+index e9cc7f6ddc466..c423e73c2d026 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_common.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
+@@ -661,7 +661,7 @@ struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = {
+ /* Non Tunneled IPv6 */
+ IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+ IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+- IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3),
++ IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4),
+ IAVF_PTT_UNUSED_ENTRY(91),
+ IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4),
+ IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 5a359a0a20ecc..a9a7453d969cb 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -354,6 +354,9 @@ static void iavf_get_ethtool_stats(struct net_device *netdev,
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+ unsigned int i;
+
++ /* Explicitly request stats refresh */
++ iavf_schedule_request_stats(adapter);
++
+ iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
+
+ rcu_read_lock();
+@@ -612,23 +615,44 @@ static int iavf_set_ringparam(struct net_device *netdev,
+ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+ return -EINVAL;
+
+- new_tx_count = clamp_t(u32, ring->tx_pending,
+- IAVF_MIN_TXD,
+- IAVF_MAX_TXD);
+- new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++ if (ring->tx_pending > IAVF_MAX_TXD ||
++ ring->tx_pending < IAVF_MIN_TXD ||
++ ring->rx_pending > IAVF_MAX_RXD ||
++ ring->rx_pending < IAVF_MIN_RXD) {
++ netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n",
++ ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD,
++ IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++ return -EINVAL;
++ }
+
+- new_rx_count = clamp_t(u32, ring->rx_pending,
+- IAVF_MIN_RXD,
+- IAVF_MAX_RXD);
+- new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++ new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++ if (new_tx_count != ring->tx_pending)
++ netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
++ new_tx_count);
++
++ new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++ if (new_rx_count != ring->rx_pending)
++ netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
++ new_rx_count);
+
+ /* if nothing to do return success */
+ if ((new_tx_count == adapter->tx_desc_count) &&
+- (new_rx_count == adapter->rx_desc_count))
++ (new_rx_count == adapter->rx_desc_count)) {
++ netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+ return 0;
++ }
+
+- adapter->tx_desc_count = new_tx_count;
+- adapter->rx_desc_count = new_rx_count;
++ if (new_tx_count != adapter->tx_desc_count) {
++ netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n",
++ adapter->tx_desc_count, new_tx_count);
++ adapter->tx_desc_count = new_tx_count;
++ }
++
++ if (new_rx_count != adapter->rx_desc_count) {
++ netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n",
++ adapter->rx_desc_count, new_rx_count);
++ adapter->rx_desc_count = new_rx_count;
++ }
+
+ if (netif_running(netdev)) {
+ adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+@@ -723,12 +747,31 @@ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+-static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
+- struct ethtool_coalesce *ec, int queue)
++static int iavf_set_itr_per_queue(struct iavf_adapter *adapter,
++ struct ethtool_coalesce *ec, int queue)
+ {
+ struct iavf_ring *rx_ring = &adapter->rx_rings[queue];
+ struct iavf_ring *tx_ring = &adapter->tx_rings[queue];
+ struct iavf_q_vector *q_vector;
++ u16 itr_setting;
++
++ itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
++
++ if (ec->rx_coalesce_usecs != itr_setting &&
++ ec->use_adaptive_rx_coalesce) {
++ netif_info(adapter, drv, adapter->netdev,
++ "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n");
++ return -EINVAL;
++ }
++
++ itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
++
++ if (ec->tx_coalesce_usecs != itr_setting &&
++ ec->use_adaptive_tx_coalesce) {
++ netif_info(adapter, drv, adapter->netdev,
++ "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n");
++ return -EINVAL;
++ }
+
+ rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+ tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
+@@ -751,6 +794,7 @@ static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
+ * the Tx and Rx ITR values based on the values we have entered
+ * into the q_vector, no need to write the values now.
+ */
++ return 0;
+ }
+
+ /**
+@@ -792,9 +836,11 @@ static int __iavf_set_coalesce(struct net_device *netdev,
+ */
+ if (queue < 0) {
+ for (i = 0; i < adapter->num_active_queues; i++)
+- iavf_set_itr_per_queue(adapter, ec, i);
++ if (iavf_set_itr_per_queue(adapter, ec, i))
++ return -EINVAL;
+ } else if (queue < adapter->num_active_queues) {
+- iavf_set_itr_per_queue(adapter, ec, queue);
++ if (iavf_set_itr_per_queue(adapter, ec, queue))
++ return -EINVAL;
+ } else {
+ netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
+ adapter->num_active_queues - 1);
+@@ -1229,6 +1275,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
+ fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+ fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+ fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos;
++ fltr->ip_ver = 4;
+ break;
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+@@ -1240,6 +1287,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
+ fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst;
+ fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi;
+ fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos;
++ fltr->ip_ver = 4;
+ break;
+ case IPV4_USER_FLOW:
+ fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+@@ -1252,6 +1300,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
+ fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+ fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
+ fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
++ fltr->ip_ver = 4;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+@@ -1270,6 +1319,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
+ fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+ fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+ fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass;
++ fltr->ip_ver = 6;
+ break;
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+@@ -1285,6 +1335,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
+ sizeof(struct in6_addr));
+ fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi;
+ fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass;
++ fltr->ip_ver = 6;
+ break;
+ case IPV6_USER_FLOW:
+ memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+@@ -1301,6 +1352,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
+ fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+ fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
+ fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
++ fltr->ip_ver = 6;
+ break;
+ case ETHER_FLOW:
+ fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto;
+@@ -1311,6 +1363,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
+ return -EINVAL;
+ }
+
++ err = iavf_validate_fdir_fltr_masks(adapter, fltr);
++ if (err)
++ return err;
++
+ if (iavf_fdir_is_dup_fltr(adapter, fltr))
+ return -EEXIST;
+
+@@ -1341,14 +1397,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
+ if (fsp->flow_type & FLOW_MAC_EXT)
+ return -EINVAL;
+
++ spin_lock_bh(&adapter->fdir_fltr_lock);
+ if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
++ spin_unlock_bh(&adapter->fdir_fltr_lock);
+ dev_err(&adapter->pdev->dev,
+ "Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
+ IAVF_MAX_FDIR_FILTERS);
+ return -ENOSPC;
+ }
+
+- spin_lock_bh(&adapter->fdir_fltr_lock);
+ if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
+ dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
+@@ -1721,7 +1778,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+ case ETHTOOL_GRXCLSRLCNT:
+ if (!FDIR_FLTR_SUPPORT(adapter))
+ break;
++ spin_lock_bh(&adapter->fdir_fltr_lock);
+ cmd->rule_cnt = adapter->fdir_active_fltr;
++ spin_unlock_bh(&adapter->fdir_fltr_lock);
+ cmd->data = IAVF_MAX_FDIR_FILTERS;
+ ret = 0;
+ break;
+@@ -1776,6 +1835,7 @@ static int iavf_set_channels(struct net_device *netdev,
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+ u32 num_req = ch->combined_count;
++ int i;
+
+ if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+ adapter->num_tc) {
+@@ -1786,7 +1846,7 @@ static int iavf_set_channels(struct net_device *netdev,
+ /* All of these should have already been checked by ethtool before this
+ * even gets to us, but just to be sure.
+ */
+- if (num_req > adapter->vsi_res->num_queue_pairs)
++ if (num_req == 0 || num_req > adapter->vsi_res->num_queue_pairs)
+ return -EINVAL;
+
+ if (num_req == adapter->num_active_queues)
+@@ -1798,6 +1858,20 @@ static int iavf_set_channels(struct net_device *netdev,
+ adapter->num_req_queues = num_req;
+ adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+ iavf_schedule_reset(adapter);
++
++ /* wait for the reset is done */
++ for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
++ msleep(IAVF_RESET_WAIT_MS);
++ if (adapter->flags & IAVF_FLAG_RESET_PENDING)
++ continue;
++ break;
++ }
++ if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
++ adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
++ adapter->num_req_queues = 0;
++ return -EOPNOTSUPP;
++ }
++
+ return 0;
+ }
+
+@@ -1844,14 +1918,13 @@ static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP;
+- if (!indir)
+- return 0;
+-
+- memcpy(key, adapter->rss_key, adapter->rss_key_size);
++ if (key)
++ memcpy(key, adapter->rss_key, adapter->rss_key_size);
+
+- /* Each 32 bits pointed by 'indir' is stored with a lut entry */
+- for (i = 0; i < adapter->rss_lut_size; i++)
+- indir[i] = (u32)adapter->rss_lut[i];
++ if (indir)
++ /* Each 32 bits pointed by 'indir' is stored with a lut entry */
++ for (i = 0; i < adapter->rss_lut_size; i++)
++ indir[i] = (u32)adapter->rss_lut[i];
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+index 6146203efd84a..03e774bd2a5b4 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+@@ -18,6 +18,79 @@ static const struct in6_addr ipv6_addr_full_mask = {
+ }
+ };
+
++static const struct in6_addr ipv6_addr_zero_mask = {
++ .in6_u = {
++ .u6_addr8 = {
++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
++ }
++ }
++};
++
++/**
++ * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks
++ * @adapter: pointer to the VF adapter structure
++ * @fltr: Flow Director filter data structure
++ *
++ * Returns 0 if all masks of packet fields are either full or empty. Returns
++ * error on at least one partial mask.
++ */
++int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
++ struct iavf_fdir_fltr *fltr)
++{
++ if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX))
++ goto partial_mask;
++
++ if (fltr->ip_ver == 4) {
++ if (fltr->ip_mask.v4_addrs.src_ip &&
++ fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX))
++ goto partial_mask;
++
++ if (fltr->ip_mask.v4_addrs.dst_ip &&
++ fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX))
++ goto partial_mask;
++
++ if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX)
++ goto partial_mask;
++ } else if (fltr->ip_ver == 6) {
++ if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask,
++ sizeof(struct in6_addr)) &&
++ memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
++ sizeof(struct in6_addr)))
++ goto partial_mask;
++
++ if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask,
++ sizeof(struct in6_addr)) &&
++ memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
++ sizeof(struct in6_addr)))
++ goto partial_mask;
++
++ if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX)
++ goto partial_mask;
++ }
++
++ if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX)
++ goto partial_mask;
++
++ if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX))
++ goto partial_mask;
++
++ if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX))
++ goto partial_mask;
++
++ if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX))
++ goto partial_mask;
++
++ if (fltr->ip_mask.l4_header &&
++ fltr->ip_mask.l4_header != htonl(U32_MAX))
++ goto partial_mask;
++
++ return 0;
++
++partial_mask:
++ dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n");
++ return -EOPNOTSUPP;
++}
++
+ /**
+ * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload
+ * @fltr: Flow Director filter data structure
+@@ -263,8 +336,6 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
+ VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
+ }
+
+- fltr->ip_ver = 4;
+-
+ return 0;
+ }
+
+@@ -309,8 +380,6 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
+ VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
+ }
+
+- fltr->ip_ver = 6;
+-
+ return 0;
+ }
+
+@@ -722,7 +791,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
+ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+ {
+ struct iavf_fdir_fltr *tmp;
++ bool ret = false;
+
++ spin_lock_bh(&adapter->fdir_fltr_lock);
+ list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
+ if (tmp->flow_type != fltr->flow_type)
+ continue;
+@@ -732,11 +803,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
+ !memcmp(&tmp->ip_data, &fltr->ip_data,
+ sizeof(fltr->ip_data)) &&
+ !memcmp(&tmp->ext_data, &fltr->ext_data,
+- sizeof(fltr->ext_data)))
+- return true;
++ sizeof(fltr->ext_data))) {
++ ret = true;
++ break;
++ }
+ }
++ spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+- return false;
++ return ret;
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+index 33c55c366315b..9eb9f73f6adf3 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
++++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+@@ -110,6 +110,8 @@ struct iavf_fdir_fltr {
+ struct virtchnl_fdir_add vc_add_msg;
+ };
+
++int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
++ struct iavf_fdir_fltr *fltr);
+ int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index cada4e0e40b48..a87f4f1ae6845 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -14,7 +14,7 @@
+ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter);
+ static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter);
+ static int iavf_close(struct net_device *netdev);
+-static int iavf_init_get_resources(struct iavf_adapter *adapter);
++static void iavf_init_get_resources(struct iavf_adapter *adapter);
+ static int iavf_check_reset_complete(struct iavf_hw *hw);
+
+ char iavf_driver_name[] = "iavf";
+@@ -51,6 +51,15 @@ MODULE_LICENSE("GPL v2");
+ static const struct net_device_ops iavf_netdev_ops;
+ struct workqueue_struct *iavf_wq;
+
++/**
++ * iavf_pdev_to_adapter - go from pci_dev to adapter
++ * @pdev: pci_dev pointer
++ */
++static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev)
++{
++ return netdev_priv(pci_get_drvdata(pdev));
++}
++
+ /**
+ * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * @hw: pointer to the HW structure
+@@ -138,7 +147,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
+ *
+ * Returns 0 on success, negative on failure
+ **/
+-static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
++int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+ {
+ unsigned int wait, delay = 10;
+
+@@ -165,6 +174,19 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
+ }
+ }
+
++/**
++ * iavf_schedule_request_stats - Set the flags and schedule statistics request
++ * @adapter: board private structure
++ *
++ * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly
++ * request and refresh ethtool stats
++ **/
++void iavf_schedule_request_stats(struct iavf_adapter *adapter)
++{
++ adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS;
++ mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
++}
++
+ /**
+ * iavf_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+@@ -231,21 +253,18 @@ static void iavf_irq_disable(struct iavf_adapter *adapter)
+ }
+
+ /**
+- * iavf_irq_enable_queues - Enable interrupt for specified queues
++ * iavf_irq_enable_queues - Enable interrupt for all queues
+ * @adapter: board private structure
+- * @mask: bitmap of queues to enable
+ **/
+-void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask)
++void iavf_irq_enable_queues(struct iavf_adapter *adapter)
+ {
+ struct iavf_hw *hw = &adapter->hw;
+ int i;
+
+ for (i = 1; i < adapter->num_msix_vectors; i++) {
+- if (mask & BIT(i - 1)) {
+- wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1),
+- IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+- IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
+- }
++ wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1),
++ IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
++ IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
+ }
+ }
+
+@@ -259,7 +278,7 @@ void iavf_irq_enable(struct iavf_adapter *adapter, bool flush)
+ struct iavf_hw *hw = &adapter->hw;
+
+ iavf_misc_irq_enable(adapter);
+- iavf_irq_enable_queues(adapter, ~0);
++ iavf_irq_enable_queues(adapter);
+
+ if (flush)
+ iavf_flush(hw);
+@@ -280,8 +299,9 @@ static irqreturn_t iavf_msix_aq(int irq, void *data)
+ rd32(hw, IAVF_VFINT_ICR01);
+ rd32(hw, IAVF_VFINT_ICR0_ENA1);
+
+- /* schedule work on the private workqueue */
+- queue_work(iavf_wq, &adapter->adminq_task);
++ if (adapter->state != __IAVF_REMOVE)
++ /* schedule work on the private workqueue */
++ queue_work(iavf_wq, &adapter->adminq_task);
+
+ return IRQ_HANDLED;
+ }
+@@ -687,6 +707,21 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan)
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ }
+
++/**
++ * iavf_restore_filters
++ * @adapter: board private structure
++ *
++ * Restore existing non MAC filters when VF netdev comes back up
++ **/
++static void iavf_restore_filters(struct iavf_adapter *adapter)
++{
++ u16 vid;
++
++ /* re-add all VLAN filters */
++ for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID)
++ iavf_add_vlan(adapter, vid);
++}
++
+ /**
+ * iavf_vlan_rx_add_vid - Add a VLAN filter to a device
+ * @netdev: network device struct
+@@ -700,8 +735,11 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev,
+
+ if (!VLAN_ALLOWED(adapter))
+ return -EIO;
++
+ if (iavf_add_vlan(adapter, vid) == NULL)
+ return -ENOMEM;
++
++ set_bit(vid, adapter->vsi.active_vlans);
+ return 0;
+ }
+
+@@ -716,11 +754,10 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev,
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+
+- if (VLAN_ALLOWED(adapter)) {
+- iavf_del_vlan(adapter, vid);
+- return 0;
+- }
+- return -EIO;
++ iavf_del_vlan(adapter, vid);
++ clear_bit(vid, adapter->vsi.active_vlans);
++
++ return 0;
+ }
+
+ /**
+@@ -960,7 +997,7 @@ static void iavf_configure(struct iavf_adapter *adapter)
+ **/
+ static void iavf_up_complete(struct iavf_adapter *adapter)
+ {
+- adapter->state = __IAVF_RUNNING;
++ iavf_change_state(adapter, __IAVF_RUNNING);
+ clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+
+ iavf_napi_enable_all(adapter);
+@@ -972,80 +1009,156 @@ static void iavf_up_complete(struct iavf_adapter *adapter)
+ }
+
+ /**
+- * iavf_down - Shutdown the connection processing
++ * iavf_clear_mac_vlan_filters - Remove mac and vlan filters not sent to PF
++ * yet and mark other to be removed.
+ * @adapter: board private structure
+- *
+- * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
+ **/
+-void iavf_down(struct iavf_adapter *adapter)
++static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter)
+ {
+- struct net_device *netdev = adapter->netdev;
+- struct iavf_vlan_filter *vlf;
+- struct iavf_cloud_filter *cf;
+- struct iavf_fdir_fltr *fdir;
+- struct iavf_mac_filter *f;
+- struct iavf_adv_rss *rss;
+-
+- if (adapter->state <= __IAVF_DOWN_PENDING)
+- return;
+-
+- netif_carrier_off(netdev);
+- netif_tx_disable(netdev);
+- adapter->link_up = false;
+- iavf_napi_disable_all(adapter);
+- iavf_irq_disable(adapter);
++ struct iavf_vlan_filter *vlf, *vlftmp;
++ struct iavf_mac_filter *f, *ftmp;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+-
+ /* clear the sync flag on all filters */
+ __dev_uc_unsync(adapter->netdev, NULL);
+ __dev_mc_unsync(adapter->netdev, NULL);
+
+ /* remove all MAC filters */
+- list_for_each_entry(f, &adapter->mac_filter_list, list) {
+- f->remove = true;
++ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list,
++ list) {
++ if (f->add) {
++ list_del(&f->list);
++ kfree(f);
++ } else {
++ f->remove = true;
++ }
+ }
+
+ /* remove all VLAN filters */
+- list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+- vlf->remove = true;
++ list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
++ list) {
++ if (vlf->add) {
++ list_del(&vlf->list);
++ kfree(vlf);
++ } else {
++ vlf->remove = true;
++ }
+ }
+-
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
++}
++
++/**
++ * iavf_clear_cloud_filters - Remove cloud filters not sent to PF yet and
++ * mark other to be removed.
++ * @adapter: board private structure
++ **/
++static void iavf_clear_cloud_filters(struct iavf_adapter *adapter)
++{
++ struct iavf_cloud_filter *cf, *cftmp;
+
+ /* remove all cloud filters */
+ spin_lock_bh(&adapter->cloud_filter_list_lock);
+- list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+- cf->del = true;
++ list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
++ list) {
++ if (cf->add) {
++ list_del(&cf->list);
++ kfree(cf);
++ adapter->num_cloud_filters--;
++ } else {
++ cf->del = true;
++ }
+ }
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
++}
++
++/**
++ * iavf_clear_fdir_filters - Remove fdir filters not sent to PF yet and mark
++ * other to be removed.
++ * @adapter: board private structure
++ **/
++static void iavf_clear_fdir_filters(struct iavf_adapter *adapter)
++{
++ struct iavf_fdir_fltr *fdir, *fdirtmp;
+
+ /* remove all Flow Director filters */
+ spin_lock_bh(&adapter->fdir_fltr_lock);
+- list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+- fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST;
++ list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head,
++ list) {
++ if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) {
++ list_del(&fdir->list);
++ kfree(fdir);
++ adapter->fdir_active_fltr--;
++ } else {
++ fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST;
++ }
+ }
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
++}
++
++/**
++ * iavf_clear_adv_rss_conf - Remove adv rss conf not sent to PF yet and mark
++ * other to be removed.
++ * @adapter: board private structure
++ **/
++static void iavf_clear_adv_rss_conf(struct iavf_adapter *adapter)
++{
++ struct iavf_adv_rss *rss, *rsstmp;
+
+ /* remove all advance RSS configuration */
+ spin_lock_bh(&adapter->adv_rss_lock);
+- list_for_each_entry(rss, &adapter->adv_rss_list_head, list)
+- rss->state = IAVF_ADV_RSS_DEL_REQUEST;
++ list_for_each_entry_safe(rss, rsstmp, &adapter->adv_rss_list_head,
++ list) {
++ if (rss->state == IAVF_ADV_RSS_ADD_REQUEST) {
++ list_del(&rss->list);
++ kfree(rss);
++ } else {
++ rss->state = IAVF_ADV_RSS_DEL_REQUEST;
++ }
++ }
+ spin_unlock_bh(&adapter->adv_rss_lock);
++}
++
++/**
++ * iavf_down - Shutdown the connection processing
++ * @adapter: board private structure
++ *
++ * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
++ **/
++void iavf_down(struct iavf_adapter *adapter)
++{
++ struct net_device *netdev = adapter->netdev;
+
+- if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
+- adapter->state != __IAVF_RESETTING) {
++ if (adapter->state <= __IAVF_DOWN_PENDING)
++ return;
++
++ netif_carrier_off(netdev);
++ netif_tx_disable(netdev);
++ adapter->link_up = false;
++ iavf_napi_disable_all(adapter);
++ iavf_irq_disable(adapter);
++
++ iavf_clear_mac_vlan_filters(adapter);
++ iavf_clear_cloud_filters(adapter);
++ iavf_clear_fdir_filters(adapter);
++ iavf_clear_adv_rss_conf(adapter);
++
++ if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) {
+ /* cancel any current operation */
+ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+ /* Schedule operations to close down the HW. Don't wait
+ * here for this to complete. The watchdog is still running
+ * and it will take care of this.
+ */
+- adapter->aq_required = IAVF_FLAG_AQ_DEL_MAC_FILTER;
+- adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+- adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+- adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+- adapter->aq_required |= IAVF_FLAG_AQ_DEL_ADV_RSS_CFG;
++ if (!list_empty(&adapter->mac_filter_list))
++ adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER;
++ if (!list_empty(&adapter->vlan_filter_list))
++ adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
++ if (!list_empty(&adapter->cloud_filter_list))
++ adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
++ if (!list_empty(&adapter->fdir_list_head))
++ adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
++ if (!list_empty(&adapter->adv_rss_list_head))
++ adapter->aq_required |= IAVF_FLAG_AQ_DEL_ADV_RSS_CFG;
+ adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
+ }
+
+@@ -1332,7 +1445,6 @@ static void iavf_fill_rss_lut(struct iavf_adapter *adapter)
+ static int iavf_init_rss(struct iavf_adapter *adapter)
+ {
+ struct iavf_hw *hw = &adapter->hw;
+- int ret;
+
+ if (!RSS_PF(adapter)) {
+ /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */
+@@ -1348,9 +1460,8 @@ static int iavf_init_rss(struct iavf_adapter *adapter)
+
+ iavf_fill_rss_lut(adapter);
+ netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size);
+- ret = iavf_config_rss(adapter);
+
+- return ret;
++ return iavf_config_rss(adapter);
+ }
+
+ /**
+@@ -1396,19 +1507,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
+ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+ {
+ int q_idx, num_q_vectors;
+- int napi_vectors;
+
+ if (!adapter->q_vectors)
+ return;
+
+ num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+- napi_vectors = adapter->num_active_queues;
+
+ for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+ struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
+
+- if (q_idx < napi_vectors)
+- netif_napi_del(&q_vector->napi);
++ netif_napi_del(&q_vector->napi);
+ }
+ kfree(adapter->q_vectors);
+ adapter->q_vectors = NULL;
+@@ -1630,8 +1738,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
+ iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
+ return 0;
+ }
+-
+- if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) &&
++ if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) ||
+ (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) {
+ iavf_set_promiscuous(adapter, 0);
+ return 0;
+@@ -1679,6 +1786,11 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
+ iavf_del_adv_rss_cfg(adapter);
+ return 0;
+ }
++ if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) {
++ iavf_request_stats(adapter);
++ return 0;
++ }
++
+ return -EAGAIN;
+ }
+
+@@ -1688,9 +1800,9 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
+ *
+ * Function process __IAVF_STARTUP driver state.
+ * When success the state is changed to __IAVF_INIT_VERSION_CHECK
+- * when fails it returns -EAGAIN
++ * when fails the state is changed to __IAVF_INIT_FAILED
+ **/
+-static int iavf_startup(struct iavf_adapter *adapter)
++static void iavf_startup(struct iavf_adapter *adapter)
+ {
+ struct pci_dev *pdev = adapter->pdev;
+ struct iavf_hw *hw = &adapter->hw;
+@@ -1729,9 +1841,10 @@ static int iavf_startup(struct iavf_adapter *adapter)
+ iavf_shutdown_adminq(hw);
+ goto err;
+ }
+- adapter->state = __IAVF_INIT_VERSION_CHECK;
++ iavf_change_state(adapter, __IAVF_INIT_VERSION_CHECK);
++ return;
+ err:
+- return err;
++ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+ }
+
+ /**
+@@ -1740,9 +1853,9 @@ err:
+ *
+ * Function process __IAVF_INIT_VERSION_CHECK driver state.
+ * When success the state is changed to __IAVF_INIT_GET_RESOURCES
+- * when fails it returns -EAGAIN
++ * when fails the state is changed to __IAVF_INIT_FAILED
+ **/
+-static int iavf_init_version_check(struct iavf_adapter *adapter)
++static void iavf_init_version_check(struct iavf_adapter *adapter)
+ {
+ struct pci_dev *pdev = adapter->pdev;
+ struct iavf_hw *hw = &adapter->hw;
+@@ -1753,7 +1866,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter)
+ if (!iavf_asq_done(hw)) {
+ dev_err(&pdev->dev, "Admin queue command never completed\n");
+ iavf_shutdown_adminq(hw);
+- adapter->state = __IAVF_STARTUP;
++ iavf_change_state(adapter, __IAVF_STARTUP);
+ goto err;
+ }
+
+@@ -1776,10 +1889,10 @@ static int iavf_init_version_check(struct iavf_adapter *adapter)
+ err);
+ goto err;
+ }
+- adapter->state = __IAVF_INIT_GET_RESOURCES;
+-
++ iavf_change_state(adapter, __IAVF_INIT_GET_RESOURCES);
++ return;
+ err:
+- return err;
++ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+ }
+
+ /**
+@@ -1789,9 +1902,9 @@ err:
+ * Function process __IAVF_INIT_GET_RESOURCES driver state and
+ * finishes driver initialization procedure.
+ * When success the state is changed to __IAVF_DOWN
+- * when fails it returns -EAGAIN
++ * when fails the state is changed to __IAVF_INIT_FAILED
+ **/
+-static int iavf_init_get_resources(struct iavf_adapter *adapter)
++static void iavf_init_get_resources(struct iavf_adapter *adapter)
+ {
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+@@ -1819,7 +1932,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
+ */
+ iavf_shutdown_adminq(hw);
+ dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n");
+- return 0;
++ return;
+ }
+ if (err) {
+ dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err);
+@@ -1847,7 +1960,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
+ eth_hw_addr_random(netdev);
+ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+ } else {
+- ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
++ eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+ ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
+ }
+
+@@ -1893,7 +2006,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
+ if (netdev->features & NETIF_F_GRO)
+ dev_info(&pdev->dev, "GRO is enabled\n");
+
+- adapter->state = __IAVF_DOWN;
++ iavf_change_state(adapter, __IAVF_DOWN);
+ set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+ rtnl_unlock();
+
+@@ -1911,7 +2024,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
+ else
+ iavf_init_rss(adapter);
+
+- return err;
++ return;
+ err_mem:
+ iavf_free_rss(adapter);
+ err_register:
+@@ -1922,7 +2035,7 @@ err_alloc:
+ kfree(adapter->vf_res);
+ adapter->vf_res = NULL;
+ err:
+- return err;
++ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+ }
+
+ /**
+@@ -1937,14 +2050,80 @@ static void iavf_watchdog_task(struct work_struct *work)
+ struct iavf_hw *hw = &adapter->hw;
+ u32 reg_val;
+
+- if (!mutex_trylock(&adapter->crit_lock))
++ if (!mutex_trylock(&adapter->crit_lock)) {
++ if (adapter->state == __IAVF_REMOVE)
++ return;
++
+ goto restart_watchdog;
++ }
+
+ if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+- adapter->state = __IAVF_COMM_FAILED;
++ iavf_change_state(adapter, __IAVF_COMM_FAILED);
++
++ if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
++ adapter->aq_required = 0;
++ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
++ mutex_unlock(&adapter->crit_lock);
++ queue_work(iavf_wq, &adapter->reset_task);
++ return;
++ }
+
+ switch (adapter->state) {
++ case __IAVF_STARTUP:
++ iavf_startup(adapter);
++ mutex_unlock(&adapter->crit_lock);
++ queue_delayed_work(iavf_wq, &adapter->watchdog_task,
++ msecs_to_jiffies(30));
++ return;
++ case __IAVF_INIT_VERSION_CHECK:
++ iavf_init_version_check(adapter);
++ mutex_unlock(&adapter->crit_lock);
++ queue_delayed_work(iavf_wq, &adapter->watchdog_task,
++ msecs_to_jiffies(30));
++ return;
++ case __IAVF_INIT_GET_RESOURCES:
++ iavf_init_get_resources(adapter);
++ mutex_unlock(&adapter->crit_lock);
++ queue_delayed_work(iavf_wq, &adapter->watchdog_task,
++ msecs_to_jiffies(1));
++ return;
++ case __IAVF_INIT_FAILED:
++ if (test_bit(__IAVF_IN_REMOVE_TASK,
++ &adapter->crit_section)) {
++ /* Do not update the state and do not reschedule
++ * watchdog task, iavf_remove should handle this state
++ * as it can loop forever
++ */
++ mutex_unlock(&adapter->crit_lock);
++ return;
++ }
++ if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
++ dev_err(&adapter->pdev->dev,
++ "Failed to communicate with PF; waiting before retry\n");
++ adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
++ iavf_shutdown_adminq(hw);
++ mutex_unlock(&adapter->crit_lock);
++ queue_delayed_work(iavf_wq,
++ &adapter->watchdog_task, (5 * HZ));
++ return;
++ }
++ /* Try again from failed step*/
++ iavf_change_state(adapter, adapter->last_state);
++ mutex_unlock(&adapter->crit_lock);
++ queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ);
++ return;
+ case __IAVF_COMM_FAILED:
++ if (test_bit(__IAVF_IN_REMOVE_TASK,
++ &adapter->crit_section)) {
++ /* Set state to __IAVF_INIT_FAILED and perform remove
++ * steps. Remove IAVF_FLAG_PF_COMMS_FAILED so the task
++ * doesn't bring the state back to __IAVF_COMM_FAILED.
++ */
++ iavf_change_state(adapter, __IAVF_INIT_FAILED);
++ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
++ mutex_unlock(&adapter->crit_lock);
++ return;
++ }
+ reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+ IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+ if (reg_val == VIRTCHNL_VFR_VFACTIVE ||
+@@ -1952,23 +2131,20 @@ static void iavf_watchdog_task(struct work_struct *work)
+ /* A chance for redemption! */
+ dev_err(&adapter->pdev->dev,
+ "Hardware came out of reset. Attempting reinit.\n");
+- adapter->state = __IAVF_STARTUP;
+- adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+- queue_delayed_work(iavf_wq, &adapter->init_task, 10);
+- mutex_unlock(&adapter->crit_lock);
+- /* Don't reschedule the watchdog, since we've restarted
+- * the init task. When init_task contacts the PF and
++ /* When init task contacts the PF and
+ * gets everything set up again, it'll restart the
+ * watchdog for us. Down, boy. Sit. Stay. Woof.
+ */
+- return;
++ iavf_change_state(adapter, __IAVF_STARTUP);
++ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+ }
+ adapter->aq_required = 0;
+ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
++ mutex_unlock(&adapter->crit_lock);
+ queue_delayed_work(iavf_wq,
+ &adapter->watchdog_task,
+ msecs_to_jiffies(10));
+- goto watchdog_done;
++ return;
+ case __IAVF_RESETTING:
+ mutex_unlock(&adapter->crit_lock);
+ queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+@@ -1991,15 +2167,16 @@ static void iavf_watchdog_task(struct work_struct *work)
+ adapter->state == __IAVF_RUNNING)
+ iavf_request_stats(adapter);
+ }
++ if (adapter->state == __IAVF_RUNNING)
++ iavf_detect_recover_hung(&adapter->vsi);
+ break;
+ case __IAVF_REMOVE:
++ default:
+ mutex_unlock(&adapter->crit_lock);
+ return;
+- default:
+- goto restart_watchdog;
+ }
+
+- /* check for hw reset */
++ /* check for hw reset */
+ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+ if (!reg_val) {
+ adapter->flags |= IAVF_FLAG_RESET_PENDING;
+@@ -2007,24 +2184,31 @@ static void iavf_watchdog_task(struct work_struct *work)
+ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+ dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+ queue_work(iavf_wq, &adapter->reset_task);
+- goto watchdog_done;
++ mutex_unlock(&adapter->crit_lock);
++ queue_delayed_work(iavf_wq,
++ &adapter->watchdog_task, HZ * 2);
++ return;
+ }
+
+ schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
+-watchdog_done:
+- if (adapter->state == __IAVF_RUNNING ||
+- adapter->state == __IAVF_COMM_FAILED)
+- iavf_detect_recover_hung(&adapter->vsi);
+ mutex_unlock(&adapter->crit_lock);
+ restart_watchdog:
++ if (adapter->state >= __IAVF_DOWN)
++ queue_work(iavf_wq, &adapter->adminq_task);
+ if (adapter->aq_required)
+ queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ msecs_to_jiffies(20));
+ else
+ queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
+- queue_work(iavf_wq, &adapter->adminq_task);
+ }
+
++/**
++ * iavf_disable_vf - disable VF
++ * @adapter: board private structure
++ *
++ * Set communication failed flag and free all resources.
++ * NOTE: This function is expected to be called with crit_lock being held.
++ **/
+ static void iavf_disable_vf(struct iavf_adapter *adapter)
+ {
+ struct iavf_mac_filter *f, *ftmp;
+@@ -2074,14 +2258,12 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
+
+ iavf_free_misc_irq(adapter);
+ iavf_reset_interrupt_capability(adapter);
+- iavf_free_queues(adapter);
+ iavf_free_q_vectors(adapter);
++ iavf_free_queues(adapter);
+ memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
+ iavf_shutdown_adminq(&adapter->hw);
+- adapter->netdev->flags &= ~IFF_UP;
+- mutex_unlock(&adapter->crit_lock);
+ adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+- adapter->state = __IAVF_DOWN;
++ iavf_change_state(adapter, __IAVF_DOWN);
+ wake_up(&adapter->down_waitqueue);
+ dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
+ }
+@@ -2103,22 +2285,26 @@ static void iavf_reset_task(struct work_struct *work)
+ struct net_device *netdev = adapter->netdev;
+ struct iavf_hw *hw = &adapter->hw;
+ struct iavf_mac_filter *f, *ftmp;
+- struct iavf_vlan_filter *vlf;
+ struct iavf_cloud_filter *cf;
+ u32 reg_val;
+ int i = 0, err;
+ bool running;
+
++ /* Detach interface to avoid subsequent NDO callbacks */
++ rtnl_lock();
++ netif_device_detach(netdev);
++ rtnl_unlock();
++
+ /* When device is being removed it doesn't make sense to run the reset
+ * task, just return in such a case.
+ */
+- if (mutex_is_locked(&adapter->remove_lock))
+- return;
++ if (!mutex_trylock(&adapter->crit_lock)) {
++ if (adapter->state != __IAVF_REMOVE)
++ queue_work(iavf_wq, &adapter->reset_task);
+
+- if (iavf_lock_timeout(&adapter->crit_lock, 200)) {
+- schedule_work(&adapter->reset_task);
+- return;
++ goto reset_finish;
+ }
++
+ while (!mutex_trylock(&adapter->client_lock))
+ usleep_range(500, 1000);
+ if (CLIENT_ENABLED(adapter)) {
+@@ -2166,12 +2352,19 @@ static void iavf_reset_task(struct work_struct *work)
+ }
+
+ pci_set_master(adapter->pdev);
++ pci_restore_msi_state(adapter->pdev);
+
+ if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+ dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
+ reg_val);
+ iavf_disable_vf(adapter);
+ mutex_unlock(&adapter->client_lock);
++ mutex_unlock(&adapter->crit_lock);
++ if (netif_running(netdev)) {
++ rtnl_lock();
++ dev_close(netdev);
++ rtnl_unlock();
++ }
+ return; /* Do not attempt to reinit. It's dead, Jim. */
+ }
+
+@@ -2180,8 +2373,7 @@ continue_reset:
+ * ndo_open() returning, so we can't assume it means all our open
+ * tasks have finished, since we're not holding the rtnl_lock here.
+ */
+- running = ((adapter->state == __IAVF_RUNNING) ||
+- (adapter->state == __IAVF_RESETTING));
++ running = adapter->state == __IAVF_RUNNING;
+
+ if (running) {
+ netif_carrier_off(netdev);
+@@ -2191,7 +2383,7 @@ continue_reset:
+ }
+ iavf_irq_disable(adapter);
+
+- adapter->state = __IAVF_RESETTING;
++ iavf_change_state(adapter, __IAVF_RESETTING);
+ adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+
+ /* free the Tx/Rx rings and descriptors, might be better to just
+@@ -2243,11 +2435,6 @@ continue_reset:
+ list_for_each_entry(f, &adapter->mac_filter_list, list) {
+ f->add = true;
+ }
+- /* re-add all VLAN filters */
+- list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
+- vlf->add = true;
+- }
+-
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+ /* check if TCs are running and re-add all cloud filters */
+@@ -2261,7 +2448,6 @@ continue_reset:
+ spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+ adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+- adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+ adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
+ iavf_misc_irq_enable(adapter);
+
+@@ -2291,22 +2477,44 @@ continue_reset:
+
+ iavf_configure(adapter);
+
++ /* iavf_up_complete() will switch device back
++ * to __IAVF_RUNNING
++ */
+ iavf_up_complete(adapter);
+
+ iavf_irq_enable(adapter, true);
+ } else {
+- adapter->state = __IAVF_DOWN;
++ iavf_change_state(adapter, __IAVF_DOWN);
+ wake_up(&adapter->down_waitqueue);
+ }
+ mutex_unlock(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
+
+- return;
++ goto reset_finish;
+ reset_err:
++ if (running) {
++ set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
++ iavf_free_traffic_irqs(adapter);
++ }
++ iavf_disable_vf(adapter);
++
+ mutex_unlock(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
++
++ if (netif_running(netdev)) {
++ /* Close device to ensure that Tx queues will not be started
++ * during netif_device_attach() at the end of the reset task.
++ */
++ rtnl_lock();
++ dev_close(netdev);
++ rtnl_unlock();
++ }
++
+ dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
+- iavf_close(netdev);
++reset_finish:
++ rtnl_lock();
++ netif_device_attach(netdev);
++ rtnl_unlock();
+ }
+
+ /**
+@@ -2324,16 +2532,22 @@ static void iavf_adminq_task(struct work_struct *work)
+ u32 val, oldval;
+ u16 pending;
+
+- if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
++ if (!mutex_trylock(&adapter->crit_lock)) {
++ if (adapter->state == __IAVF_REMOVE)
++ return;
++
++ queue_work(iavf_wq, &adapter->adminq_task);
+ goto out;
++ }
++
++ if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
++ goto unlock;
+
+ event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
+ event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
+ if (!event.msg_buf)
+- goto out;
++ goto unlock;
+
+- if (iavf_lock_timeout(&adapter->crit_lock, 200))
+- goto freedom;
+ do {
+ ret = iavf_clean_arq_element(hw, &event, &pending);
+ v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+@@ -2347,8 +2561,19 @@ static void iavf_adminq_task(struct work_struct *work)
+ if (pending != 0)
+ memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
+ } while (pending);
+- mutex_unlock(&adapter->crit_lock);
+
++ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) {
++ if (adapter->netdev_registered ||
++ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
++ struct net_device *netdev = adapter->netdev;
++
++ rtnl_lock();
++ netdev_update_features(netdev);
++ rtnl_unlock();
++ }
++
++ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
++ }
+ if ((adapter->flags &
+ (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
+ adapter->state == __IAVF_RESETTING)
+@@ -2356,7 +2581,7 @@ static void iavf_adminq_task(struct work_struct *work)
+
+ /* check for error indications */
+ val = rd32(hw, hw->aq.arq.len);
+- if (val == 0xdeadbeef) /* indicates device in reset */
++ if (val == 0xdeadbeef || val == 0xffffffff) /* device in reset */
+ goto freedom;
+ oldval = val;
+ if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) {
+@@ -2393,6 +2618,8 @@ static void iavf_adminq_task(struct work_struct *work)
+
+ freedom:
+ kfree(event.msg_buf);
++unlock:
++ mutex_unlock(&adapter->crit_lock);
+ out:
+ /* re-enable Admin queue interrupt cause */
+ iavf_misc_irq_enable(adapter);
+@@ -2601,6 +2828,7 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter,
+ struct tc_mqprio_qopt_offload *mqprio_qopt)
+ {
+ u64 total_max_rate = 0;
++ u32 tx_rate_rem = 0;
+ int i, num_qps = 0;
+ u64 tx_rate = 0;
+ int ret = 0;
+@@ -2615,17 +2843,40 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter,
+ return -EINVAL;
+ if (mqprio_qopt->min_rate[i]) {
+ dev_err(&adapter->pdev->dev,
+- "Invalid min tx rate (greater than 0) specified\n");
++ "Invalid min tx rate (greater than 0) specified for TC%d\n",
++ i);
+ return -EINVAL;
+ }
+- /*convert to Mbps */
++
++ /* convert to Mbps */
+ tx_rate = div_u64(mqprio_qopt->max_rate[i],
+ IAVF_MBPS_DIVISOR);
++
++ if (mqprio_qopt->max_rate[i] &&
++ tx_rate < IAVF_MBPS_QUANTA) {
++ dev_err(&adapter->pdev->dev,
++ "Invalid max tx rate for TC%d, minimum %dMbps\n",
++ i, IAVF_MBPS_QUANTA);
++ return -EINVAL;
++ }
++
++ (void)div_u64_rem(tx_rate, IAVF_MBPS_QUANTA, &tx_rate_rem);
++
++ if (tx_rate_rem != 0) {
++ dev_err(&adapter->pdev->dev,
++ "Invalid max tx rate for TC%d, not divisible by %d\n",
++ i, IAVF_MBPS_QUANTA);
++ return -EINVAL;
++ }
++
+ total_max_rate += tx_rate;
+ num_qps += mqprio_qopt->qopt.count[i];
+ }
+- if (num_qps > IAVF_MAX_REQ_QUEUES)
++ if (num_qps > adapter->num_active_queues) {
++ dev_err(&adapter->pdev->dev,
++ "Cannot support requested number of queues\n");
+ return -EINVAL;
++ }
+
+ ret = iavf_validate_tx_bandwidth(adapter, total_max_rate);
+ return ret;
+@@ -2684,6 +2935,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data)
+ netif_tx_disable(netdev);
+ iavf_del_all_cloud_filters(adapter);
+ adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS;
++ total_qps = adapter->orig_num_active_queues;
+ goto exit;
+ } else {
+ return -EINVAL;
+@@ -2727,7 +2979,21 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data)
+ adapter->ch_config.ch_info[i].offset = 0;
+ }
+ }
++
++ /* Take snapshot of original config such as "num_active_queues"
++ * It is used later when delete ADQ flow is exercised, so that
++ * once delete ADQ flow completes, VF shall go back to its
++ * original queue configuration
++ */
++
++ adapter->orig_num_active_queues = adapter->num_active_queues;
++
++ /* Store queue info based on TC so that VF gets configured
++ * with correct number of queues when VF completes ADQ config
++ * flow
++ */
+ adapter->ch_config.total_qps = total_qps;
++
+ netif_tx_stop_all_queues(netdev);
+ netif_tx_disable(netdev);
+ adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS;
+@@ -2744,6 +3010,12 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data)
+ }
+ }
+ exit:
++ if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
++ return 0;
++
++ netif_set_real_num_rx_queues(netdev, total_qps);
++ netif_set_real_num_tx_queues(netdev, total_qps);
++
+ return ret;
+ }
+
+@@ -2826,7 +3098,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+ match.mask->dst);
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ }
+
+@@ -2836,7 +3108,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+ match.mask->src);
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ }
+
+@@ -2871,7 +3143,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+ match.mask->vlan_id);
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ }
+ vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+@@ -2895,7 +3167,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+ be32_to_cpu(match.mask->dst));
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ }
+
+@@ -2904,14 +3176,14 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ field_flags |= IAVF_CLOUD_FIELD_IIP;
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+- be32_to_cpu(match.mask->dst));
+- return IAVF_ERR_CONFIG;
++ be32_to_cpu(match.mask->src));
++ return -EINVAL;
+ }
+ }
+
+ if (field_flags & IAVF_CLOUD_FIELD_TEN_ID) {
+ dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ if (match.key->dst) {
+ vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+@@ -2932,7 +3204,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ if (ipv6_addr_any(&match.mask->dst)) {
+ dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+ IPV6_ADDR_ANY);
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+
+ /* src and dest IPv6 address should not be LOOPBACK
+@@ -2942,7 +3214,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ ipv6_addr_loopback(&match.key->src)) {
+ dev_err(&adapter->pdev->dev,
+ "ipv6 addr should not be loopback\n");
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ if (!ipv6_addr_any(&match.mask->dst) ||
+ !ipv6_addr_any(&match.mask->src))
+@@ -2967,7 +3239,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+ be16_to_cpu(match.mask->src));
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ }
+
+@@ -2977,7 +3249,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+ } else {
+ dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+ be16_to_cpu(match.mask->dst));
+- return IAVF_ERR_CONFIG;
++ return -EINVAL;
+ }
+ }
+ if (match.key->dst) {
+@@ -3041,8 +3313,10 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter,
+ return -ENOMEM;
+
+ while (!mutex_trylock(&adapter->crit_lock)) {
+- if (--count == 0)
+- goto err;
++ if (--count == 0) {
++ kfree(filter);
++ return err;
++ }
+ udelay(1);
+ }
+
+@@ -3053,11 +3327,11 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter,
+ /* start out with flow type and eth type IPv4 to begin with */
+ filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+ err = iavf_parse_cls_flower(adapter, cls_flower, filter);
+- if (err < 0)
++ if (err)
+ goto err;
+
+ err = iavf_handle_tclass(adapter, tc, filter);
+- if (err < 0)
++ if (err)
+ goto err;
+
+ /* add filter to the list */
+@@ -3226,6 +3500,13 @@ static int iavf_open(struct net_device *netdev)
+ goto err_unlock;
+ }
+
++ if (adapter->state == __IAVF_RUNNING &&
++ !test_bit(__IAVF_VSI_DOWN, adapter->vsi.state)) {
++ dev_dbg(&adapter->pdev->dev, "VF is already open.\n");
++ err = 0;
++ goto err_unlock;
++ }
++
+ /* allocate transmit descriptors */
+ err = iavf_setup_all_tx_resources(adapter);
+ if (err)
+@@ -3247,6 +3528,9 @@ static int iavf_open(struct net_device *netdev)
+
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
++ /* Restore VLAN filters that were removed with IFF_DOWN */
++ iavf_restore_filters(adapter);
++
+ iavf_configure(adapter);
+
+ iavf_up_complete(adapter);
+@@ -3284,20 +3568,45 @@ err_unlock:
+ static int iavf_close(struct net_device *netdev)
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
++ u64 aq_to_restore;
+ int status;
+
+- if (adapter->state <= __IAVF_DOWN_PENDING)
+- return 0;
++ mutex_lock(&adapter->crit_lock);
+
+- while (!mutex_trylock(&adapter->crit_lock))
+- usleep_range(500, 1000);
++ if (adapter->state <= __IAVF_DOWN_PENDING) {
++ mutex_unlock(&adapter->crit_lock);
++ return 0;
++ }
+
+ set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+ if (CLIENT_ENABLED(adapter))
+ adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE;
++ /* We cannot send IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS before
++ * IAVF_FLAG_AQ_DISABLE_QUEUES because in such case there is rtnl
++ * deadlock with adminq_task() until iavf_close timeouts. We must send
++ * IAVF_FLAG_AQ_GET_CONFIG before IAVF_FLAG_AQ_DISABLE_QUEUES to make
++ * disable queues possible for vf. Give only necessary flags to
++ * iavf_down and save other to set them right before iavf_close()
++ * returns, when IAVF_FLAG_AQ_DISABLE_QUEUES will be already sent and
++ * iavf will be in DOWN state.
++ */
++ aq_to_restore = adapter->aq_required;
++ adapter->aq_required &= IAVF_FLAG_AQ_GET_CONFIG;
++
++ /* Remove flags which we do not want to send after close or we want to
++ * send before disable queues.
++ */
++ aq_to_restore &= ~(IAVF_FLAG_AQ_GET_CONFIG |
++ IAVF_FLAG_AQ_ENABLE_QUEUES |
++ IAVF_FLAG_AQ_CONFIGURE_QUEUES |
++ IAVF_FLAG_AQ_ADD_VLAN_FILTER |
++ IAVF_FLAG_AQ_ADD_MAC_FILTER |
++ IAVF_FLAG_AQ_ADD_CLOUD_FILTER |
++ IAVF_FLAG_AQ_ADD_FDIR_FILTER |
++ IAVF_FLAG_AQ_ADD_ADV_RSS_CFG);
+
+ iavf_down(adapter);
+- adapter->state = __IAVF_DOWN_PENDING;
++ iavf_change_state(adapter, __IAVF_DOWN_PENDING);
+ iavf_free_traffic_irqs(adapter);
+
+ mutex_unlock(&adapter->crit_lock);
+@@ -3318,6 +3627,10 @@ static int iavf_close(struct net_device *netdev)
+ msecs_to_jiffies(500));
+ if (!status)
+ netdev_warn(netdev, "Device resources not yet released\n");
++
++ mutex_lock(&adapter->crit_lock);
++ adapter->aq_required |= aq_to_restore;
++ mutex_unlock(&adapter->crit_lock);
+ return 0;
+ }
+
+@@ -3337,8 +3650,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+ iavf_notify_client_l2_params(&adapter->vsi);
+ adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+ }
+- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+- queue_work(iavf_wq, &adapter->reset_task);
++
++ if (netif_running(netdev)) {
++ adapter->flags |= IAVF_FLAG_RESET_NEEDED;
++ queue_work(iavf_wq, &adapter->reset_task);
++ }
+
+ return 0;
+ }
+@@ -3354,11 +3670,16 @@ static int iavf_set_features(struct net_device *netdev,
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+
+- /* Don't allow changing VLAN_RX flag when adapter is not capable
+- * of VLAN offload
++ /* Don't allow enabling VLAN features when adapter is not capable
++ * of VLAN offload/filtering
+ */
+ if (!VLAN_ALLOWED(adapter)) {
+- if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX)
++ netdev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
++ NETIF_F_HW_VLAN_CTAG_TX |
++ NETIF_F_HW_VLAN_CTAG_FILTER);
++ if (features & (NETIF_F_HW_VLAN_CTAG_RX |
++ NETIF_F_HW_VLAN_CTAG_TX |
++ NETIF_F_HW_VLAN_CTAG_FILTER))
+ return -EINVAL;
+ } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+@@ -3442,7 +3763,8 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev,
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+
+- if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
++ if (adapter->vf_res &&
++ !(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+ features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_FILTER);
+@@ -3630,72 +3952,14 @@ int iavf_process_config(struct iavf_adapter *adapter)
+ return 0;
+ }
+
+-/**
+- * iavf_init_task - worker thread to perform delayed initialization
+- * @work: pointer to work_struct containing our data
+- *
+- * This task completes the work that was begun in probe. Due to the nature
+- * of VF-PF communications, we may need to wait tens of milliseconds to get
+- * responses back from the PF. Rather than busy-wait in probe and bog down the
+- * whole system, we'll do it in a task so we can sleep.
+- * This task only runs during driver init. Once we've established
+- * communications with the PF driver and set up our netdev, the watchdog
+- * takes over.
+- **/
+-static void iavf_init_task(struct work_struct *work)
+-{
+- struct iavf_adapter *adapter = container_of(work,
+- struct iavf_adapter,
+- init_task.work);
+- struct iavf_hw *hw = &adapter->hw;
+-
+- if (iavf_lock_timeout(&adapter->crit_lock, 5000)) {
+- dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
+- return;
+- }
+- switch (adapter->state) {
+- case __IAVF_STARTUP:
+- if (iavf_startup(adapter) < 0)
+- goto init_failed;
+- break;
+- case __IAVF_INIT_VERSION_CHECK:
+- if (iavf_init_version_check(adapter) < 0)
+- goto init_failed;
+- break;
+- case __IAVF_INIT_GET_RESOURCES:
+- if (iavf_init_get_resources(adapter) < 0)
+- goto init_failed;
+- goto out;
+- default:
+- goto init_failed;
+- }
+-
+- queue_delayed_work(iavf_wq, &adapter->init_task,
+- msecs_to_jiffies(30));
+- goto out;
+-init_failed:
+- if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
+- dev_err(&adapter->pdev->dev,
+- "Failed to communicate with PF; waiting before retry\n");
+- adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
+- iavf_shutdown_adminq(hw);
+- adapter->state = __IAVF_STARTUP;
+- queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
+- goto out;
+- }
+- queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
+-out:
+- mutex_unlock(&adapter->crit_lock);
+-}
+-
+ /**
+ * iavf_shutdown - Shutdown the device in preparation for a reboot
+ * @pdev: pci device structure
+ **/
+ static void iavf_shutdown(struct pci_dev *pdev)
+ {
+- struct net_device *netdev = pci_get_drvdata(pdev);
+- struct iavf_adapter *adapter = netdev_priv(netdev);
++ struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev);
++ struct net_device *netdev = adapter->netdev;
+
+ netif_device_detach(netdev);
+
+@@ -3705,7 +3969,7 @@ static void iavf_shutdown(struct pci_dev *pdev)
+ if (iavf_lock_timeout(&adapter->crit_lock, 5000))
+ dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
+ /* Prevent the watchdog from running. */
+- adapter->state = __IAVF_REMOVE;
++ iavf_change_state(adapter, __IAVF_REMOVE);
+ adapter->aq_required = 0;
+ mutex_unlock(&adapter->crit_lock);
+
+@@ -3778,7 +4042,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ hw->back = adapter;
+
+ adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+- adapter->state = __IAVF_STARTUP;
++ iavf_change_state(adapter, __IAVF_STARTUP);
+
+ /* Call save state here because it relies on the adapter struct. */
+ pci_save_state(pdev);
+@@ -3803,7 +4067,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ */
+ mutex_init(&adapter->crit_lock);
+ mutex_init(&adapter->client_lock);
+- mutex_init(&adapter->remove_lock);
+ mutex_init(&hw->aq.asq_mutex);
+ mutex_init(&hw->aq.arq_mutex);
+
+@@ -3822,8 +4085,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
+ INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
+ INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
+- INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task);
+- queue_delayed_work(iavf_wq, &adapter->init_task,
++ queue_delayed_work(iavf_wq, &adapter->watchdog_task,
+ msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
+
+ /* Setup the wait queue for indicating transition to down status */
+@@ -3880,10 +4142,11 @@ static int __maybe_unused iavf_suspend(struct device *dev_d)
+ static int __maybe_unused iavf_resume(struct device *dev_d)
+ {
+ struct pci_dev *pdev = to_pci_dev(dev_d);
+- struct net_device *netdev = pci_get_drvdata(pdev);
+- struct iavf_adapter *adapter = netdev_priv(netdev);
++ struct iavf_adapter *adapter;
+ u32 err;
+
++ adapter = iavf_pdev_to_adapter(pdev);
++
+ pci_set_master(pdev);
+
+ rtnl_lock();
+@@ -3902,7 +4165,7 @@ static int __maybe_unused iavf_resume(struct device *dev_d)
+
+ queue_work(iavf_wq, &adapter->reset_task);
+
+- netif_device_attach(netdev);
++ netif_device_attach(adapter->netdev);
+
+ return err;
+ }
+@@ -3918,23 +4181,49 @@ static int __maybe_unused iavf_resume(struct device *dev_d)
+ **/
+ static void iavf_remove(struct pci_dev *pdev)
+ {
+- struct net_device *netdev = pci_get_drvdata(pdev);
+- struct iavf_adapter *adapter = netdev_priv(netdev);
++ struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev);
+ struct iavf_fdir_fltr *fdir, *fdirtmp;
+ struct iavf_vlan_filter *vlf, *vlftmp;
++ struct iavf_cloud_filter *cf, *cftmp;
+ struct iavf_adv_rss *rss, *rsstmp;
+ struct iavf_mac_filter *f, *ftmp;
+- struct iavf_cloud_filter *cf, *cftmp;
+- struct iavf_hw *hw = &adapter->hw;
++ struct net_device *netdev;
++ struct iavf_hw *hw;
+ int err;
+- /* Indicate we are in remove and not to run reset_task */
+- mutex_lock(&adapter->remove_lock);
+- cancel_delayed_work_sync(&adapter->init_task);
+- cancel_work_sync(&adapter->reset_task);
+- cancel_delayed_work_sync(&adapter->client_task);
++
++ netdev = adapter->netdev;
++ hw = &adapter->hw;
++
++ if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
++ return;
++
++ /* Wait until port initialization is complete.
++ * There are flows where register/unregister netdev may race.
++ */
++ while (1) {
++ mutex_lock(&adapter->crit_lock);
++ if (adapter->state == __IAVF_RUNNING ||
++ adapter->state == __IAVF_DOWN ||
++ adapter->state == __IAVF_INIT_FAILED) {
++ mutex_unlock(&adapter->crit_lock);
++ break;
++ }
++ /* Simply return if we already went through iavf_shutdown */
++ if (adapter->state == __IAVF_REMOVE) {
++ mutex_unlock(&adapter->crit_lock);
++ return;
++ }
++
++ mutex_unlock(&adapter->crit_lock);
++ usleep_range(500, 1000);
++ }
++ cancel_delayed_work_sync(&adapter->watchdog_task);
++
+ if (adapter->netdev_registered) {
+- unregister_netdev(netdev);
++ rtnl_lock();
++ unregister_netdevice(netdev);
+ adapter->netdev_registered = false;
++ rtnl_unlock();
+ }
+ if (CLIENT_ALLOWED(adapter)) {
+ err = iavf_lan_del_device(adapter);
+@@ -3943,6 +4232,10 @@ static void iavf_remove(struct pci_dev *pdev)
+ err);
+ }
+
++ mutex_lock(&adapter->crit_lock);
++ dev_info(&adapter->pdev->dev, "Remove device\n");
++ iavf_change_state(adapter, __IAVF_REMOVE);
++
+ iavf_request_reset(adapter);
+ msleep(50);
+ /* If the FW isn't responding, kick it once, but only once. */
+@@ -3950,24 +4243,24 @@ static void iavf_remove(struct pci_dev *pdev)
+ iavf_request_reset(adapter);
+ msleep(50);
+ }
+- if (iavf_lock_timeout(&adapter->crit_lock, 5000))
+- dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
+
++ iavf_misc_irq_disable(adapter);
+ /* Shut down all the garbage mashers on the detention level */
+- adapter->state = __IAVF_REMOVE;
++ cancel_work_sync(&adapter->reset_task);
++ cancel_delayed_work_sync(&adapter->watchdog_task);
++ cancel_work_sync(&adapter->adminq_task);
++ cancel_delayed_work_sync(&adapter->client_task);
++
+ adapter->aq_required = 0;
+ adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
++
+ iavf_free_all_tx_resources(adapter);
+ iavf_free_all_rx_resources(adapter);
+- iavf_misc_irq_disable(adapter);
+ iavf_free_misc_irq(adapter);
++
+ iavf_reset_interrupt_capability(adapter);
+ iavf_free_q_vectors(adapter);
+
+- cancel_delayed_work_sync(&adapter->watchdog_task);
+-
+- cancel_work_sync(&adapter->adminq_task);
+-
+ iavf_free_rss(adapter);
+
+ if (hw->aq.asq.count)
+@@ -3979,8 +4272,6 @@ static void iavf_remove(struct pci_dev *pdev)
+ mutex_destroy(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
+ mutex_destroy(&adapter->crit_lock);
+- mutex_unlock(&adapter->remove_lock);
+- mutex_destroy(&adapter->remove_lock);
+
+ iounmap(hw->hw_addr);
+ pci_release_regions(pdev);
+@@ -4062,7 +4353,11 @@ static int __init iavf_init_module(void)
+ pr_err("%s: Failed to create workqueue\n", iavf_driver_name);
+ return -ENOMEM;
+ }
++
+ ret = pci_register_driver(&iavf_driver);
++ if (ret)
++ destroy_workqueue(iavf_wq);
++
+ return ret;
+ }
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_register.h b/drivers/net/ethernet/intel/iavf/iavf_register.h
+index bf793332fc9d5..a19e88898a0bb 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_register.h
++++ b/drivers/net/ethernet/intel/iavf/iavf_register.h
+@@ -40,7 +40,7 @@
+ #define IAVF_VFINT_DYN_CTL01_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTL01_INTENA_SHIFT)
+ #define IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3
+ #define IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT)
+-#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
++#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...63 */ /* Reset: VFR */
+ #define IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT 0
+ #define IAVF_VFINT_DYN_CTLN1_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT)
+ #define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+index 3525eab8e9f9a..643dbe5bf9973 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+@@ -114,8 +114,11 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
+ {
+ u32 head, tail;
+
++ /* underlying hardware might not allow access and/or always return
++ * 0 for the head/tail registers so just use the cached values
++ */
+ head = ring->next_to_clean;
+- tail = readl(ring->tail);
++ tail = ring->next_to_use;
+
+ if (head != tail)
+ return (head < tail) ?
+@@ -1058,7 +1061,7 @@ static inline void iavf_rx_hash(struct iavf_ring *ring,
+ cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
+ IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT);
+
+- if (ring->netdev->features & NETIF_F_RXHASH)
++ if (!(ring->netdev->features & NETIF_F_RXHASH))
+ return;
+
+ if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
+@@ -1250,11 +1253,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
+ {
+ struct iavf_rx_buffer *rx_buffer;
+
+- if (!size)
+- return NULL;
+-
+ rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+ prefetchw(rx_buffer->page);
++ if (!size)
++ return rx_buffer;
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+@@ -1356,7 +1358,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
+ #endif
+ struct sk_buff *skb;
+
+- if (!rx_buffer)
++ if (!rx_buffer || !size)
+ return NULL;
+ /* prefetch first cache line of first page */
+ va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+@@ -1514,7 +1516,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buff_failed++;
+- if (rx_buffer)
++ if (rx_buffer && size)
+ rx_buffer->pagecnt_bias++;
+ break;
+ }
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 3c735968e1b85..262482c694587 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -244,11 +244,14 @@ out:
+ void iavf_configure_queues(struct iavf_adapter *adapter)
+ {
+ struct virtchnl_vsi_queue_config_info *vqci;
+- struct virtchnl_queue_pair_info *vqpi;
++ int i, max_frame = adapter->vf_res->max_mtu;
+ int pairs = adapter->num_active_queues;
+- int i, max_frame = IAVF_MAX_RXBUFFER;
++ struct virtchnl_queue_pair_info *vqpi;
+ size_t len;
+
++ if (max_frame > IAVF_MAX_RXBUFFER || !max_frame)
++ max_frame = IAVF_MAX_RXBUFFER;
++
+ if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ /* bail because we already have a command pending */
+ dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n",
+@@ -607,7 +610,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
+ if (f->add)
+ count++;
+ }
+- if (!count) {
++ if (!count || !VLAN_ALLOWED(adapter)) {
+ adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ return;
+@@ -673,9 +676,19 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+- list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+- if (f->remove)
++ list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
++ /* since VLAN capabilities are not allowed, we dont want to send
++ * a VLAN delete request because it will most likely fail and
++ * create unnecessary errors/noise, so just free the VLAN
++ * filters marked for removal to enable bailing out before
++ * sending a virtchnl message
++ */
++ if (f->remove && !VLAN_ALLOWED(adapter)) {
++ list_del(&f->list);
++ kfree(f);
++ } else if (f->remove) {
+ count++;
++ }
+ }
+ if (!count) {
+ adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+@@ -784,6 +797,8 @@ void iavf_request_stats(struct iavf_adapter *adapter)
+ /* no error message, this isn't crucial */
+ return;
+ }
++
++ adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_STATS;
+ adapter->current_op = VIRTCHNL_OP_GET_STATS;
+ vqs.vsi_id = adapter->vsi_res->vsi_id;
+ /* queue maps are ignored for this message - only the vsi is used */
+@@ -1448,6 +1463,22 @@ void iavf_request_reset(struct iavf_adapter *adapter)
+ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+ }
+
++/**
++ * iavf_netdev_features_vlan_strip_set - update vlan strip status
++ * @netdev: ptr to netdev being adjusted
++ * @enable: enable or disable vlan strip
++ *
++ * Helper function to change vlan strip status in netdev->features.
++ */
++static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev,
++ const bool enable)
++{
++ if (enable)
++ netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
++ else
++ netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
++}
++
+ /**
+ * iavf_virtchnl_completion
+ * @adapter: adapter structure
+@@ -1671,8 +1702,18 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ }
+ break;
+ case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
++ dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
++ /* Vlan stripping could not be enabled by ethtool.
++ * Disable it in netdev->features.
++ */
++ iavf_netdev_features_vlan_strip_set(netdev, false);
++ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+ dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
++ /* Vlan stripping could not be disabled by ethtool.
++ * Enable it in netdev->features.
++ */
++ iavf_netdev_features_vlan_strip_set(netdev, true);
+ break;
+ default:
+ dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
+@@ -1685,7 +1726,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ if (!v_retval)
+ iavf_mac_add_ok(adapter);
+ if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
+- ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
++ eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+ break;
+ case VIRTCHNL_OP_GET_STATS: {
+ struct iavf_eth_stats *stats =
+@@ -1716,14 +1757,31 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+ } else {
+ /* refresh current mac address if changed */
+- ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
++ eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+ ether_addr_copy(netdev->perm_addr,
+ adapter->hw.mac.addr);
+ }
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ iavf_add_filter(adapter, adapter->hw.mac.addr);
++
++ if (VLAN_ALLOWED(adapter)) {
++ if (!list_empty(&adapter->vlan_filter_list)) {
++ struct iavf_vlan_filter *vlf;
++
++ /* re-add all VLAN filters over virtchnl */
++ list_for_each_entry(vlf,
++ &adapter->vlan_filter_list,
++ list)
++ vlf->add = true;
++
++ adapter->aq_required |=
++ IAVF_FLAG_AQ_ADD_VLAN_FILTER;
++ }
++ }
++
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ iavf_process_config(adapter);
++ adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
+ }
+ break;
+ case VIRTCHNL_OP_ENABLE_QUEUES:
+@@ -1735,7 +1793,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ iavf_free_all_tx_resources(adapter);
+ iavf_free_all_rx_resources(adapter);
+ if (adapter->state == __IAVF_DOWN_PENDING) {
+- adapter->state = __IAVF_DOWN;
++ iavf_change_state(adapter, __IAVF_DOWN);
+ wake_up(&adapter->down_waitqueue);
+ }
+ break;
+@@ -1889,6 +1947,20 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ spin_unlock_bh(&adapter->adv_rss_lock);
+ }
+ break;
++ case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
++ /* PF enabled vlan strip on this VF.
++ * Update netdev->features if needed to be in sync with ethtool.
++ */
++ if (!v_retval)
++ iavf_netdev_features_vlan_strip_set(netdev, true);
++ break;
++ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
++ /* PF disabled vlan strip on this VF.
++ * Update netdev->features if needed to be in sync with ethtool.
++ */
++ if (!v_retval)
++ iavf_netdev_features_vlan_strip_set(netdev, false);
++ break;
+ default:
+ if (adapter->current_op && (v_opcode != adapter->current_op))
+ dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
+diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
+index 3c4f08d20414e..43fe91213aa58 100644
+--- a/drivers/net/ethernet/intel/ice/ice.h
++++ b/drivers/net/ethernet/intel/ice/ice.h
+@@ -139,13 +139,10 @@
+ #define ice_for_each_q_vector(vsi, i) \
+ for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
+
+-#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \
+- ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX)
++#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_UCAST_RX)
+
+ #define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \
+- ICE_PROMISC_MCAST_TX | \
+ ICE_PROMISC_UCAST_RX | \
+- ICE_PROMISC_MCAST_RX | \
+ ICE_PROMISC_VLAN_TX | \
+ ICE_PROMISC_VLAN_RX)
+
+@@ -234,7 +231,6 @@ enum ice_pf_state {
+ ICE_VFLR_EVENT_PENDING,
+ ICE_FLTR_OVERFLOW_PROMISC,
+ ICE_VF_DIS,
+- ICE_VF_DEINIT_IN_PROGRESS,
+ ICE_CFG_BUSY,
+ ICE_SERVICE_SCHED,
+ ICE_SERVICE_DIS,
+@@ -245,6 +241,7 @@ enum ice_pf_state {
+ ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+ ICE_PHY_INIT_COMPLETE,
+ ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */
++ ICE_AUX_ERR_PENDING,
+ ICE_STATE_NBITS /* must be last */
+ };
+
+@@ -306,10 +303,6 @@ struct ice_vsi {
+ spinlock_t arfs_lock; /* protects aRFS hash table and filter state */
+ atomic_t *arfs_last_fltr_id;
+
+- /* devlink port data */
+- struct devlink_port devlink_port;
+- bool devlink_port_registered;
+-
+ u16 max_frame;
+ u16 rx_buf_len;
+
+@@ -405,6 +398,9 @@ enum ice_pf_flags {
+ ICE_FLAG_VF_TRUE_PROMISC_ENA,
+ ICE_FLAG_MDD_AUTO_RESET_VF,
+ ICE_FLAG_LINK_LENIENT_MODE_ENA,
++ ICE_FLAG_PLUG_AUX_DEV,
++ ICE_FLAG_UNPLUG_AUX_DEV,
++ ICE_FLAG_MTU_CHANGED,
+ ICE_PF_FLAGS_NBITS /* must be last */
+ };
+
+@@ -421,6 +417,9 @@ struct ice_pf {
+ struct devlink_region *nvm_region;
+ struct devlink_region *devcaps_region;
+
++ /* devlink port data */
++ struct devlink_port devlink_port;
++
+ /* OS reserved IRQ details */
+ struct msix_entry *msix_entries;
+ struct ice_res_tracker *irq_tracker;
+@@ -454,6 +453,7 @@ struct ice_pf {
+ struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */
+ struct mutex sw_mutex; /* lock for protecting VSI alloc flow */
+ struct mutex tc_mutex; /* lock to protect TC changes */
++ struct mutex adev_mutex; /* lock to protect aux device access */
+ u32 msg_enable;
+ struct ice_ptp ptp;
+ u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */
+@@ -467,6 +467,7 @@ struct ice_pf {
+ wait_queue_head_t reset_wait_queue;
+
+ u32 hw_csum_rx_error;
++ u32 oicr_err_reg;
+ u16 oicr_idx; /* Other interrupt cause MSIX vector index */
+ u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */
+ u16 max_pf_txqs; /* Total Tx queues PF wide */
+@@ -553,7 +554,7 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
+
+ static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+ {
+- return !!vsi->xdp_prog;
++ return !!READ_ONCE(vsi->xdp_prog);
+ }
+
+ static inline void ice_set_ring_xdp(struct ice_ring *ring)
+@@ -641,7 +642,7 @@ void ice_set_ethtool_ops(struct net_device *netdev);
+ void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
+ u16 ice_get_avail_txq_count(struct ice_pf *pf);
+ u16 ice_get_avail_rxq_count(struct ice_pf *pf);
+-int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx);
++int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked);
+ void ice_update_vsi_stats(struct ice_vsi *vsi);
+ void ice_update_pf_stats(struct ice_pf *pf);
+ int ice_up(struct ice_vsi *vsi);
+@@ -696,7 +697,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
+ if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) {
+ set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+ set_bit(ICE_FLAG_AUX_ENA, pf->flags);
+- ice_plug_aux_dev(pf);
++ set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
+ }
+ }
+
+@@ -706,7 +707,11 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
+ */
+ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
+ {
+- ice_unplug_aux_dev(pf);
++ /* defer unplug to service task to avoid RTNL lock and
++ * clear PLUG bit so that pending plugs don't interfere
++ */
++ clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
++ set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
+ clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+ clear_bit(ICE_FLAG_AUX_ENA, pf->flags);
+ }
+diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
+index c36057efc7ae3..09525dbeccfec 100644
+--- a/drivers/net/ethernet/intel/ice/ice_base.c
++++ b/drivers/net/ethernet/intel/ice/ice_base.c
+@@ -115,6 +115,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
+ q_vector->rx.itr_setting = ICE_DFLT_RX_ITR;
+ q_vector->tx.itr_mode = ITR_DYNAMIC;
+ q_vector->rx.itr_mode = ITR_DYNAMIC;
++ q_vector->tx.type = ICE_TX_CONTAINER;
++ q_vector->rx.type = ICE_RX_CONTAINER;
+
+ if (vsi->type == ICE_VSI_VF)
+ goto out;
+@@ -357,7 +359,8 @@ static int ice_setup_rx_ctx(struct ice_ring *ring)
+ /* Receive Packet Data Buffer Size.
+ * The Packet Data Buffer Size is defined in 128 byte units.
+ */
+- rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
++ rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
++ BIT_ULL(ICE_RLAN_CTX_DBUF_S));
+
+ /* use 32 byte descriptors */
+ rlan_ctx.dsize = 1;
+@@ -909,7 +912,7 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ } else if (status == ICE_ERR_DOES_NOT_EXIST) {
+ dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n");
+ } else if (status) {
+- dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n",
++ dev_dbg(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n",
+ ice_stat_str(status));
+ return -ENODEV;
+ }
+diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
+index df5ad4de1f00e..3de6f16f985ab 100644
+--- a/drivers/net/ethernet/intel/ice/ice_common.c
++++ b/drivers/net/ethernet/intel/ice/ice_common.c
+@@ -3270,9 +3270,10 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+
+ if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) &&
+ !ice_fw_supports_report_dflt_cfg(hw)) {
+- struct ice_link_default_override_tlv tlv;
++ struct ice_link_default_override_tlv tlv = { 0 };
+
+- if (ice_get_link_default_override(&tlv, pi))
++ status = ice_get_link_default_override(&tlv, pi);
++ if (status)
+ goto out;
+
+ if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) &&
+diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+index 926cf748c5ecd..dd4195e964faf 100644
+--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+@@ -355,7 +355,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
+ goto out;
+ }
+
+- ice_pf_dcb_recfg(pf);
++ ice_pf_dcb_recfg(pf, false);
+
+ out:
+ ice_ena_vsi(pf_vsi, true);
+@@ -644,12 +644,13 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
+ /**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
++ * @locked: is adev device lock held
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+-void ice_pf_dcb_recfg(struct ice_pf *pf)
++void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked)
+ {
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+ struct iidc_event *event;
+@@ -688,14 +689,16 @@ void ice_pf_dcb_recfg(struct ice_pf *pf)
+ if (vsi->type == ICE_VSI_PF)
+ ice_dcbnl_set_all(vsi);
+ }
+- /* Notify the AUX drivers that TC change is finished */
+- event = kzalloc(sizeof(*event), GFP_KERNEL);
+- if (!event)
+- return;
++ if (!locked) {
++ /* Notify the AUX drivers that TC change is finished */
++ event = kzalloc(sizeof(*event), GFP_KERNEL);
++ if (!event)
++ return;
+
+- set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
+- ice_send_event_to_aux(pf, event);
+- kfree(event);
++ set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
++ ice_send_event_to_aux(pf, event);
++ kfree(event);
++ }
+ }
+
+ /**
+@@ -943,7 +946,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+ }
+
+ /* changes in configuration update VSI */
+- ice_pf_dcb_recfg(pf);
++ ice_pf_dcb_recfg(pf, false);
+
+ ice_ena_vsi(pf_vsi, true);
+ unlock_rtnl:
+diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+index 261b6e2ed7bc2..33a609e92d253 100644
+--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
++++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+@@ -23,7 +23,7 @@ u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
+ int
+ ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
+ int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg);
+-void ice_pf_dcb_recfg(struct ice_pf *pf);
++void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked);
+ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
+ int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
+ void ice_update_dcb_stats(struct ice_pf *pf);
+@@ -113,7 +113,7 @@ ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf,
+ return false;
+ }
+
+-static inline void ice_pf_dcb_recfg(struct ice_pf *pf) { }
++static inline void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) { }
+ static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { }
+ static inline void ice_update_dcb_stats(struct ice_pf *pf) { }
+ static inline void
+diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
+index da7288bdc9a3f..2ec5d5cb72803 100644
+--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
++++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
+@@ -526,60 +526,115 @@ void ice_devlink_unregister(struct ice_pf *pf)
+ }
+
+ /**
+- * ice_devlink_create_port - Create a devlink port for this VSI
+- * @vsi: the VSI to create a port for
++ * ice_devlink_create_pf_port - Create a devlink port for this PF
++ * @pf: the PF to create a devlink port for
+ *
+- * Create and register a devlink_port for this VSI.
++ * Create and register a devlink_port for this PF.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+-int ice_devlink_create_port(struct ice_vsi *vsi)
++int ice_devlink_create_pf_port(struct ice_pf *pf)
+ {
+ struct devlink_port_attrs attrs = {};
+- struct ice_port_info *pi;
++ struct devlink_port *devlink_port;
+ struct devlink *devlink;
++ struct ice_vsi *vsi;
+ struct device *dev;
+- struct ice_pf *pf;
+ int err;
+
+- /* Currently we only create devlink_port instances for PF VSIs */
+- if (vsi->type != ICE_VSI_PF)
+- return -EINVAL;
+-
+- pf = vsi->back;
+- devlink = priv_to_devlink(pf);
+ dev = ice_pf_to_dev(pf);
+- pi = pf->hw.port_info;
++
++ devlink_port = &pf->devlink_port;
++
++ vsi = ice_get_main_vsi(pf);
++ if (!vsi)
++ return -EIO;
+
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+- attrs.phys.port_number = pi->lport;
+- devlink_port_attrs_set(&vsi->devlink_port, &attrs);
+- err = devlink_port_register(devlink, &vsi->devlink_port, vsi->idx);
++ attrs.phys.port_number = pf->hw.bus.func;
++ devlink_port_attrs_set(devlink_port, &attrs);
++ devlink = priv_to_devlink(pf);
++
++ err = devlink_port_register(devlink, devlink_port, vsi->idx);
+ if (err) {
+- dev_err(dev, "devlink_port_register failed: %d\n", err);
++ dev_err(dev, "Failed to create devlink port for PF %d, error %d\n",
++ pf->hw.pf_id, err);
+ return err;
+ }
+
+- vsi->devlink_port_registered = true;
++ return 0;
++}
++
++/**
++ * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF
++ * @pf: the PF to cleanup
++ *
++ * Unregisters the devlink_port structure associated with this PF.
++ */
++void ice_devlink_destroy_pf_port(struct ice_pf *pf)
++{
++ struct devlink_port *devlink_port;
++
++ devlink_port = &pf->devlink_port;
++
++ devlink_port_type_clear(devlink_port);
++ devlink_port_unregister(devlink_port);
++}
++
++/**
++ * ice_devlink_create_vf_port - Create a devlink port for this VF
++ * @vf: the VF to create a port for
++ *
++ * Create and register a devlink_port for this VF.
++ *
++ * Return: zero on success or an error code on failure.
++ */
++int ice_devlink_create_vf_port(struct ice_vf *vf)
++{
++ struct devlink_port_attrs attrs = {};
++ struct devlink_port *devlink_port;
++ struct devlink *devlink;
++ struct ice_vsi *vsi;
++ struct device *dev;
++ struct ice_pf *pf;
++ int err;
++
++ pf = vf->pf;
++ dev = ice_pf_to_dev(pf);
++ vsi = ice_get_vf_vsi(vf);
++ devlink_port = &vf->devlink_port;
++
++ attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF;
++ attrs.pci_vf.pf = pf->hw.bus.func;
++ attrs.pci_vf.vf = vf->vf_id;
++
++ devlink_port_attrs_set(devlink_port, &attrs);
++ devlink = priv_to_devlink(pf);
++
++ err = devlink_port_register(devlink, devlink_port, vsi->idx);
++ if (err) {
++ dev_err(dev, "Failed to create devlink port for VF %d, error %d\n",
++ vf->vf_id, err);
++ return err;
++ }
+
+ return 0;
+ }
+
+ /**
+- * ice_devlink_destroy_port - Destroy the devlink_port for this VSI
+- * @vsi: the VSI to cleanup
++ * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF
++ * @vf: the VF to cleanup
+ *
+- * Unregisters the devlink_port structure associated with this VSI.
++ * Unregisters the devlink_port structure associated with this VF.
+ */
+-void ice_devlink_destroy_port(struct ice_vsi *vsi)
++void ice_devlink_destroy_vf_port(struct ice_vf *vf)
+ {
+- if (!vsi->devlink_port_registered)
+- return;
++ struct devlink_port *devlink_port;
+
+- devlink_port_type_clear(&vsi->devlink_port);
+- devlink_port_unregister(&vsi->devlink_port);
++ devlink_port = &vf->devlink_port;
+
+- vsi->devlink_port_registered = false;
++ devlink_port_type_clear(devlink_port);
++ devlink_port_unregister(devlink_port);
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h
+index e07e74426bde8..e30284ccbed4c 100644
+--- a/drivers/net/ethernet/intel/ice/ice_devlink.h
++++ b/drivers/net/ethernet/intel/ice/ice_devlink.h
+@@ -8,8 +8,10 @@ struct ice_pf *ice_allocate_pf(struct device *dev);
+
+ int ice_devlink_register(struct ice_pf *pf);
+ void ice_devlink_unregister(struct ice_pf *pf);
+-int ice_devlink_create_port(struct ice_vsi *vsi);
+-void ice_devlink_destroy_port(struct ice_vsi *vsi);
++int ice_devlink_create_pf_port(struct ice_pf *pf);
++void ice_devlink_destroy_pf_port(struct ice_pf *pf);
++int ice_devlink_create_vf_port(struct ice_vf *vf);
++void ice_devlink_destroy_vf_port(struct ice_vf *vf);
+
+ void ice_devlink_init_regions(struct ice_pf *pf);
+ void ice_devlink_destroy_regions(struct ice_pf *pf);
+diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+index c451cf401e635..60f73e775beeb 100644
+--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+@@ -651,7 +651,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+ rx_desc = ICE_RX_DESC(rx_ring, i);
+
+ if (!(rx_desc->wb.status_error0 &
+- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
++ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
++ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
+ continue;
+
+ rx_buf = &rx_ring->rx_buf[i];
+@@ -2150,6 +2151,42 @@ ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
+ return err;
+ }
+
++/**
++ * ice_set_phy_type_from_speed - set phy_types based on speeds
++ * and advertised modes
++ * @ks: ethtool link ksettings struct
++ * @phy_type_low: pointer to the lower part of phy_type
++ * @phy_type_high: pointer to the higher part of phy_type
++ * @adv_link_speed: targeted link speeds bitmap
++ */
++static void
++ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks,
++ u64 *phy_type_low, u64 *phy_type_high,
++ u16 adv_link_speed)
++{
++ /* Handle 1000M speed in a special way because ice_update_phy_type
++ * enables all link modes, but having mixed copper and optical
++ * standards is not supported.
++ */
++ adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB;
++
++ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
++ 1000baseT_Full))
++ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T |
++ ICE_PHY_TYPE_LOW_1G_SGMII;
++
++ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
++ 1000baseKX_Full))
++ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX;
++
++ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
++ 1000baseX_Full))
++ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX |
++ ICE_PHY_TYPE_LOW_1000BASE_LX;
++
++ ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed);
++}
++
+ /**
+ * ice_set_link_ksettings - Set Speed and Duplex
+ * @netdev: network interface device structure
+@@ -2275,7 +2312,7 @@ ice_set_link_ksettings(struct net_device *netdev,
+ goto done;
+ }
+
+- curr_link_speed = pi->phy.link_info.link_speed;
++ curr_link_speed = pi->phy.curr_user_speed_req;
+ adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
+
+ /* If speed didn't get set, set it to what it currently is.
+@@ -2286,7 +2323,8 @@ ice_set_link_ksettings(struct net_device *netdev,
+ adv_link_speed = curr_link_speed;
+
+ /* Convert the advertise link speeds to their corresponded PHY_TYPE */
+- ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed);
++ ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high,
++ adv_link_speed);
+
+ if (!autoneg_changed && adv_link_speed == curr_link_speed) {
+ netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
+@@ -2750,6 +2788,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
+ tx_rings[i].count = new_tx_cnt;
+ tx_rings[i].desc = NULL;
+ tx_rings[i].tx_buf = NULL;
++ tx_rings[i].tx_tstamps = &pf->ptp.port.tx;
+ err = ice_setup_tx_ring(&tx_rings[i]);
+ if (err) {
+ while (i--)
+@@ -3354,7 +3393,9 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ int new_rx = 0, new_tx = 0;
++ bool locked = false;
+ u32 curr_combined;
++ int ret = 0;
+
+ /* do not support changing channels in Safe Mode */
+ if (ice_is_safe_mode(pf)) {
+@@ -3403,15 +3444,33 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+ return -EINVAL;
+ }
+
+- ice_vsi_recfg_qs(vsi, new_rx, new_tx);
++ if (pf->adev) {
++ mutex_lock(&pf->adev_mutex);
++ device_lock(&pf->adev->dev);
++ locked = true;
++ if (pf->adev->dev.driver) {
++ netdev_err(dev, "Cannot change channels when RDMA is active\n");
++ ret = -EBUSY;
++ goto adev_unlock;
++ }
++ }
++
++ ice_vsi_recfg_qs(vsi, new_rx, new_tx, locked);
+
+- if (!netif_is_rxfh_configured(dev))
+- return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
++ if (!netif_is_rxfh_configured(dev)) {
++ ret = ice_vsi_set_dflt_rss_lut(vsi, new_rx);
++ goto adev_unlock;
++ }
+
+ /* Update rss_size due to change in Rx queues */
+ vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
+
+- return 0;
++adev_unlock:
++ if (locked) {
++ device_unlock(&pf->adev->dev);
++ mutex_unlock(&pf->adev_mutex);
++ }
++ return ret;
+ }
+
+ /**
+@@ -3466,15 +3525,9 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+ return 0;
+ }
+
+-enum ice_container_type {
+- ICE_RX_CONTAINER,
+- ICE_TX_CONTAINER,
+-};
+-
+ /**
+ * ice_get_rc_coalesce - get ITR values for specific ring container
+ * @ec: ethtool structure to fill with driver's coalesce settings
+- * @c_type: container type, Rx or Tx
+ * @rc: ring container that the ITR values will come from
+ *
+ * Query the device for ice_ring_container specific ITR values. This is
+@@ -3484,13 +3537,12 @@ enum ice_container_type {
+ * Returns 0 on success, negative otherwise.
+ */
+ static int
+-ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
+- struct ice_ring_container *rc)
++ice_get_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc)
+ {
+ if (!rc->ring)
+ return -EINVAL;
+
+- switch (c_type) {
++ switch (rc->type) {
+ case ICE_RX_CONTAINER:
+ ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc);
+ ec->rx_coalesce_usecs = rc->itr_setting;
+@@ -3501,7 +3553,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
+ ec->tx_coalesce_usecs = rc->itr_setting;
+ break;
+ default:
+- dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", c_type);
++ dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", rc->type);
+ return -EINVAL;
+ }
+
+@@ -3522,18 +3574,18 @@ static int
+ ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+ {
+ if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+- if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
++ if (ice_get_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx))
+ return -EINVAL;
+- if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
++ if (ice_get_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx))
+ return -EINVAL;
+ } else if (q_num < vsi->num_rxq) {
+- if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
++ if (ice_get_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx))
+ return -EINVAL;
+ } else if (q_num < vsi->num_txq) {
+- if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
++ if (ice_get_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx))
+ return -EINVAL;
+ } else {
+@@ -3585,7 +3637,6 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+
+ /**
+ * ice_set_rc_coalesce - set ITR values for specific ring container
+- * @c_type: container type, Rx or Tx
+ * @ec: ethtool structure from user to update ITR settings
+ * @rc: ring container that the ITR values will come from
+ * @vsi: VSI associated to the ring container
+@@ -3597,10 +3648,10 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+ * Returns 0 on success, negative otherwise.
+ */
+ static int
+-ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
++ice_set_rc_coalesce(struct ethtool_coalesce *ec,
+ struct ice_ring_container *rc, struct ice_vsi *vsi)
+ {
+- const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx";
++ const char *c_type_str = (rc->type == ICE_RX_CONTAINER) ? "rx" : "tx";
+ u32 use_adaptive_coalesce, coalesce_usecs;
+ struct ice_pf *pf = vsi->back;
+ u16 itr_setting;
+@@ -3608,7 +3659,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
+ if (!rc->ring)
+ return -EINVAL;
+
+- switch (c_type) {
++ switch (rc->type) {
+ case ICE_RX_CONTAINER:
+ if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
+ (ec->rx_coalesce_usecs_high &&
+@@ -3641,7 +3692,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
+ break;
+ default:
+ dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
+- c_type);
++ rc->type);
+ return -EINVAL;
+ }
+
+@@ -3690,22 +3741,22 @@ static int
+ ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+ {
+ if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+- if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
++ if (ice_set_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx,
+ vsi))
+ return -EINVAL;
+
+- if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
++ if (ice_set_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx,
+ vsi))
+ return -EINVAL;
+ } else if (q_num < vsi->num_rxq) {
+- if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
++ if (ice_set_rc_coalesce(ec,
+ &vsi->rx_rings[q_num]->q_vector->rx,
+ vsi))
+ return -EINVAL;
+ } else if (q_num < vsi->num_txq) {
+- if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
++ if (ice_set_rc_coalesce(ec,
+ &vsi->tx_rings[q_num]->q_vector->tx,
+ vsi))
+ return -EINVAL;
+@@ -3947,6 +3998,8 @@ ice_get_module_eeprom(struct net_device *netdev,
+ * SFP modules only ever use page 0.
+ */
+ if (page == 0 || !(data[0x2] & 0x4)) {
++ u32 copy_len;
++
+ /* If i2c bus is busy due to slow page change or
+ * link management access, call can fail. This is normal.
+ * So we retry this a few times.
+@@ -3970,8 +4023,8 @@ ice_get_module_eeprom(struct net_device *netdev,
+ }
+
+ /* Make sure we have enough room for the new block */
+- if ((i + SFF_READ_BLOCK_SIZE) < ee->len)
+- memcpy(data + i, value, SFF_READ_BLOCK_SIZE);
++ copy_len = min_t(u32, SFF_READ_BLOCK_SIZE, ee->len - i);
++ memcpy(data + i, value, copy_len);
+ }
+ }
+ return 0;
+diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+index 16de603b280c6..0106ea3519a01 100644
+--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
++++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+@@ -1135,16 +1135,21 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
+ ICE_FLOW_FLD_OFF_INVAL);
+ }
+
+- /* add filter for outer headers */
+ fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
++
++ assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter);
++
++ /* add filter for outer headers */
+ ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
+ ICE_FD_HW_SEG_NON_TUN);
+- if (ret == -EEXIST)
+- /* Rule already exists, free memory and continue */
+- devm_kfree(dev, seg);
+- else if (ret)
++ if (ret == -EEXIST) {
++ /* Rule already exists, free memory and count as success */
++ ret = 0;
++ goto err_exit;
++ } else if (ret) {
+ /* could not write filter, free memory */
+ goto err_exit;
++ }
+
+ /* make tunneled filter HW entries if possible */
+ memcpy(&tun_seg[1], seg, sizeof(*seg));
+@@ -1159,18 +1164,13 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
+ devm_kfree(dev, tun_seg);
+ }
+
+- if (perfect_filter)
+- set_bit(fltr_idx, hw->fdir_perfect_fltr);
+- else
+- clear_bit(fltr_idx, hw->fdir_perfect_fltr);
+-
+ return ret;
+
+ err_exit:
+ devm_kfree(dev, tun_seg);
+ devm_kfree(dev, seg);
+
+- return -EOPNOTSUPP;
++ return ret;
+ }
+
+ /**
+@@ -1684,7 +1684,9 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+ input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+
+ /* input struct is added to the HW filter list */
+- ice_fdir_update_list_entry(pf, input, fsp->location);
++ ret = ice_fdir_update_list_entry(pf, input, fsp->location);
++ if (ret)
++ goto release_lock;
+
+ ret = ice_fdir_write_all_fltr(pf, input, true);
+ if (ret)
+diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
+index 2418d4fff037f..e27b4de7e7aa3 100644
+--- a/drivers/net/ethernet/intel/ice/ice_fltr.c
++++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
+@@ -128,7 +128,7 @@ void ice_fltr_remove_all(struct ice_vsi *vsi)
+ * @mac: MAC address to add
+ * @action: filter action
+ */
+-int
++enum ice_status
+ ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+ const u8 *mac, enum ice_sw_fwd_act_type action)
+ {
+diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
+index adcc9a251595a..1dd3622991c65 100644
+--- a/drivers/net/ethernet/intel/ice/ice_idc.c
++++ b/drivers/net/ethernet/intel/ice/ice_idc.c
+@@ -34,14 +34,20 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
+ {
+ struct iidc_auxiliary_drv *iadrv;
+
+- if (!pf->adev)
++ if (WARN_ON_ONCE(!in_task()))
+ return;
+
++ mutex_lock(&pf->adev_mutex);
++ if (!pf->adev)
++ goto finish;
++
+ device_lock(&pf->adev->dev);
+ iadrv = ice_get_auxiliary_drv(pf);
+ if (iadrv && iadrv->event_handler)
+ iadrv->event_handler(pf, event);
+ device_unlock(&pf->adev->dev);
++finish:
++ mutex_unlock(&pf->adev_mutex);
+ }
+
+ /**
+@@ -282,7 +288,6 @@ int ice_plug_aux_dev(struct ice_pf *pf)
+ return -ENOMEM;
+
+ adev = &iadev->adev;
+- pf->adev = adev;
+ iadev->pf = pf;
+
+ adev->id = pf->aux_idx;
+@@ -292,18 +297,20 @@ int ice_plug_aux_dev(struct ice_pf *pf)
+
+ ret = auxiliary_device_init(adev);
+ if (ret) {
+- pf->adev = NULL;
+ kfree(iadev);
+ return ret;
+ }
+
+ ret = auxiliary_device_add(adev);
+ if (ret) {
+- pf->adev = NULL;
+ auxiliary_device_uninit(adev);
+ return ret;
+ }
+
++ mutex_lock(&pf->adev_mutex);
++ pf->adev = adev;
++ mutex_unlock(&pf->adev_mutex);
++
+ return 0;
+ }
+
+@@ -312,12 +319,17 @@ int ice_plug_aux_dev(struct ice_pf *pf)
+ */
+ void ice_unplug_aux_dev(struct ice_pf *pf)
+ {
+- if (!pf->adev)
+- return;
++ struct auxiliary_device *adev;
+
+- auxiliary_device_delete(pf->adev);
+- auxiliary_device_uninit(pf->adev);
++ mutex_lock(&pf->adev_mutex);
++ adev = pf->adev;
+ pf->adev = NULL;
++ mutex_unlock(&pf->adev_mutex);
++
++ if (adev) {
++ auxiliary_device_delete(adev);
++ auxiliary_device_uninit(adev);
++ }
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
+index e375ac849aecd..4f954db01b929 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lag.c
++++ b/drivers/net/ethernet/intel/ice/ice_lag.c
+@@ -204,17 +204,39 @@ ice_lag_unlink(struct ice_lag *lag,
+ lag->upper_netdev = NULL;
+ }
+
+- if (lag->peer_netdev) {
+- dev_put(lag->peer_netdev);
+- lag->peer_netdev = NULL;
+- }
+-
++ lag->peer_netdev = NULL;
+ ice_set_sriov_cap(pf);
+ ice_set_rdma_cap(pf);
+ lag->bonded = false;
+ lag->role = ICE_LAG_NONE;
+ }
+
++/**
++ * ice_lag_unregister - handle netdev unregister events
++ * @lag: LAG info struct
++ * @netdev: netdev reporting the event
++ */
++static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev)
++{
++ struct ice_pf *pf = lag->pf;
++
++ /* check to see if this event is for this netdev
++ * check that we are in an aggregate
++ */
++ if (netdev != lag->netdev || !lag->bonded)
++ return;
++
++ if (lag->upper_netdev) {
++ dev_put(lag->upper_netdev);
++ lag->upper_netdev = NULL;
++ ice_set_sriov_cap(pf);
++ ice_set_rdma_cap(pf);
++ }
++ /* perform some cleanup in case we come back */
++ lag->bonded = false;
++ lag->role = ICE_LAG_NONE;
++}
++
+ /**
+ * ice_lag_changeupper_event - handle LAG changeupper event
+ * @lag: LAG info struct
+@@ -307,7 +329,7 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
+ ice_lag_info_event(lag, ptr);
+ break;
+ case NETDEV_UNREGISTER:
+- ice_lag_unlink(lag, ptr);
++ ice_lag_unregister(lag, netdev);
+ break;
+ default:
+ break;
+diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+index 80736e0ec0dca..3f635fdbfaff9 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
++++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+@@ -528,6 +528,7 @@ struct ice_tx_ctx_desc {
+ (0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
+
+ #define ICE_TXD_CTX_QW1_MSS_S 50
++#define ICE_TXD_CTX_MIN_MSS 64
+
+ enum ice_tx_ctx_desc_cmd_bits {
+ ICE_TX_CTX_DESC_TSO = 0x01,
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index b718e196af2a4..4417238b0e64f 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -83,8 +83,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
+ if (!vsi->rx_rings)
+ goto err_rings;
+
+- /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
+- vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq),
++ /* txq_map needs to have enough space to track both Tx (stack) rings
++ * and XDP rings; at this point vsi->num_xdp_txq might not be set,
++ * so use num_possible_cpus() as we want to always provide XDP ring
++ * per CPU, regardless of queue count settings from user that might
++ * have come from ethtool's set_channels() callback;
++ */
++ vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
+ sizeof(*vsi->txq_map), GFP_KERNEL);
+
+ if (!vsi->txq_map)
+@@ -1301,6 +1306,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
+ ring->tx_tstamps = &pf->ptp.port.tx;
+ ring->dev = dev;
+ ring->count = vsi->num_tx_desc;
++ ring->txq_teid = ICE_INVAL_TEID;
+ WRITE_ONCE(vsi->tx_rings[i], ring);
+ }
+
+@@ -1516,6 +1522,12 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
+ if (status)
+ dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %s\n",
+ vsi_num, ice_stat_str(status));
++
++ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_ESP_SPI,
++ ICE_FLOW_SEG_HDR_ESP);
++ if (status)
++ dev_dbg(dev, "ice_add_rss_cfg failed for esp/spi flow, vsi = %d, error = %d\n",
++ vsi_num, status);
+ }
+
+ /**
+@@ -2860,7 +2872,8 @@ int ice_vsi_release(struct ice_vsi *vsi)
+ clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+ }
+
+- ice_devlink_destroy_port(vsi);
++ if (vsi->type == ICE_VSI_PF)
++ ice_devlink_destroy_pf_port(pf);
+
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ ice_rss_clean(vsi);
+@@ -2911,6 +2924,8 @@ int ice_vsi_release(struct ice_vsi *vsi)
+ }
+ }
+
++ if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi))
++ ice_clear_dflt_vsi(pf->first_sw);
+ ice_fltr_remove_all(vsi);
+ ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+ err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
+@@ -2965,8 +2980,8 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+- coalesce[i].itr_tx = q_vector->tx.itr_setting;
+- coalesce[i].itr_rx = q_vector->rx.itr_setting;
++ coalesce[i].itr_tx = q_vector->tx.itr_settings;
++ coalesce[i].itr_rx = q_vector->rx.itr_settings;
+ coalesce[i].intrl = q_vector->intrl;
+
+ if (i < vsi->num_txq)
+@@ -3022,21 +3037,21 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+ */
+ if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
+ rc = &vsi->q_vectors[i]->rx;
+- rc->itr_setting = coalesce[i].itr_rx;
++ rc->itr_settings = coalesce[i].itr_rx;
+ ice_write_itr(rc, rc->itr_setting);
+ } else if (i < vsi->alloc_rxq) {
+ rc = &vsi->q_vectors[i]->rx;
+- rc->itr_setting = coalesce[0].itr_rx;
++ rc->itr_settings = coalesce[0].itr_rx;
+ ice_write_itr(rc, rc->itr_setting);
+ }
+
+ if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
+ rc = &vsi->q_vectors[i]->tx;
+- rc->itr_setting = coalesce[i].itr_tx;
++ rc->itr_settings = coalesce[i].itr_tx;
+ ice_write_itr(rc, rc->itr_setting);
+ } else if (i < vsi->alloc_txq) {
+ rc = &vsi->q_vectors[i]->tx;
+- rc->itr_setting = coalesce[0].itr_tx;
++ rc->itr_settings = coalesce[0].itr_tx;
+ ice_write_itr(rc, rc->itr_setting);
+ }
+
+@@ -3050,12 +3065,12 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+ for (; i < vsi->num_q_vectors; i++) {
+ /* transmit */
+ rc = &vsi->q_vectors[i]->tx;
+- rc->itr_setting = coalesce[0].itr_tx;
++ rc->itr_settings = coalesce[0].itr_tx;
+ ice_write_itr(rc, rc->itr_setting);
+
+ /* receive */
+ rc = &vsi->q_vectors[i]->rx;
+- rc->itr_setting = coalesce[0].itr_rx;
++ rc->itr_settings = coalesce[0].itr_rx;
+ ice_write_itr(rc, rc->itr_setting);
+
+ vsi->q_vectors[i]->intrl = coalesce[0].intrl;
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 06fa93e597fbc..deba18cdc5ef7 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -1177,6 +1177,7 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
+ static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
+ struct ice_rq_event_info *event)
+ {
++ struct ice_rq_event_info *task_ev;
+ struct ice_aq_task *task;
+ bool found = false;
+
+@@ -1185,15 +1186,15 @@ static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
+ if (task->state || task->opcode != opcode)
+ continue;
+
+- memcpy(&task->event->desc, &event->desc, sizeof(event->desc));
+- task->event->msg_len = event->msg_len;
++ task_ev = task->event;
++ memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
++ task_ev->msg_len = event->msg_len;
+
+ /* Only copy the data buffer if a destination was set */
+- if (task->event->msg_buf &&
+- task->event->buf_len > event->buf_len) {
+- memcpy(task->event->msg_buf, event->msg_buf,
++ if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
++ memcpy(task_ev->msg_buf, event->msg_buf,
+ event->buf_len);
+- task->event->buf_len = event->buf_len;
++ task_ev->buf_len = event->buf_len;
+ }
+
+ task->state = ICE_AQ_TASK_COMPLETE;
+@@ -1679,7 +1680,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
+ * reset, so print the event prior to reset.
+ */
+ ice_print_vf_rx_mdd_event(vf);
++ mutex_lock(&pf->vf[i].cfg_lock);
+ ice_reset_vf(&pf->vf[i], false);
++ mutex_unlock(&pf->vf[i].cfg_lock);
+ }
+ }
+ }
+@@ -2141,6 +2144,40 @@ static void ice_service_task(struct work_struct *work)
+ return;
+ }
+
++ if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {
++ struct iidc_event *event;
++
++ event = kzalloc(sizeof(*event), GFP_KERNEL);
++ if (event) {
++ set_bit(IIDC_EVENT_CRIT_ERR, event->type);
++ /* report the entire OICR value to AUX driver */
++ swap(event->reg, pf->oicr_err_reg);
++ ice_send_event_to_aux(pf, event);
++ kfree(event);
++ }
++ }
++
++ /* unplug aux dev per request, if an unplug request came in
++ * while processing a plug request, this will handle it
++ */
++ if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags))
++ ice_unplug_aux_dev(pf);
++
++ /* Plug aux device per request */
++ if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
++ ice_plug_aux_dev(pf);
++
++ if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
++ struct iidc_event *event;
++
++ event = kzalloc(sizeof(*event), GFP_KERNEL);
++ if (event) {
++ set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
++ ice_send_event_to_aux(pf, event);
++ kfree(event);
++ }
++ }
++
+ ice_clean_adminq_subtask(pf);
+ ice_check_media_subtask(pf);
+ ice_check_for_hang_subtask(pf);
+@@ -2216,8 +2253,6 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
+ return -EBUSY;
+ }
+
+- ice_unplug_aux_dev(pf);
+-
+ switch (reset) {
+ case ICE_RESET_PFR:
+ set_bit(ICE_PFR_REQ, pf->state);
+@@ -2497,7 +2532,18 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+ ice_stat_str(status));
+ goto clear_xdp_rings;
+ }
+- ice_vsi_assign_bpf_prog(vsi, prog);
++
++ /* assign the prog only when it's not already present on VSI;
++ * this flow is a subject of both ethtool -L and ndo_bpf flows;
++ * VSI rebuild that happens under ethtool -L can expose us to
++ * the bpf_prog refcount issues as we would be swapping same
++ * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
++ * on it as it would be treated as an 'old_prog'; for ndo_bpf
++ * this is not harmful as dev_xdp_install bumps the refcount
++ * before calling the op exposed by the driver;
++ */
++ if (!ice_is_xdp_ena_vsi(vsi))
++ ice_vsi_assign_bpf_prog(vsi, prog);
+
+ return 0;
+ clear_xdp_rings:
+@@ -2562,8 +2608,10 @@ free_qmap:
+
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ if (vsi->xdp_rings[i]) {
+- if (vsi->xdp_rings[i]->desc)
++ if (vsi->xdp_rings[i]->desc) {
++ synchronize_rcu();
+ ice_free_tx_ring(vsi->xdp_rings[i]);
++ }
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+@@ -2643,6 +2691,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
+ } else {
++ /* safe to call even when prog == vsi->xdp_prog as
++ * dev_xdp_install in net/core/dev.c incremented prog's
++ * refcount so corresponding bpf_prog_put won't cause
++ * underflow
++ */
+ ice_vsi_assign_bpf_prog(vsi, prog);
+ }
+
+@@ -2839,17 +2892,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
+
+ #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
+ if (oicr & ICE_AUX_CRIT_ERR) {
+- struct iidc_event *event;
+-
++ pf->oicr_err_reg |= oicr;
++ set_bit(ICE_AUX_ERR_PENDING, pf->state);
+ ena_mask &= ~ICE_AUX_CRIT_ERR;
+- event = kzalloc(sizeof(*event), GFP_KERNEL);
+- if (event) {
+- set_bit(IIDC_EVENT_CRIT_ERR, event->type);
+- /* report the entire OICR value to AUX driver */
+- event->reg = oicr;
+- ice_send_event_to_aux(pf, event);
+- kfree(event);
+- }
+ }
+
+ /* Report any remaining unexpected interrupts */
+@@ -3139,7 +3184,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
+ if (vsi->type == ICE_VSI_PF) {
+ SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
+ ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+- ether_addr_copy(netdev->dev_addr, mac_addr);
++ eth_hw_addr_set(netdev, mac_addr);
+ ether_addr_copy(netdev->perm_addr, mac_addr);
+ }
+
+@@ -3398,6 +3443,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf)
+ static void ice_deinit_pf(struct ice_pf *pf)
+ {
+ ice_service_task_stop(pf);
++ mutex_destroy(&pf->adev_mutex);
+ mutex_destroy(&pf->sw_mutex);
+ mutex_destroy(&pf->tc_mutex);
+ mutex_destroy(&pf->avail_q_mutex);
+@@ -3478,6 +3524,7 @@ static int ice_init_pf(struct ice_pf *pf)
+
+ mutex_init(&pf->sw_mutex);
+ mutex_init(&pf->tc_mutex);
++ mutex_init(&pf->adev_mutex);
+
+ INIT_HLIST_HEAD(&pf->aq_wait_list);
+ spin_lock_init(&pf->aq_wait_lock);
+@@ -3498,7 +3545,7 @@ static int ice_init_pf(struct ice_pf *pf)
+
+ pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
+ if (!pf->avail_rxqs) {
+- devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs);
++ bitmap_free(pf->avail_txqs);
+ pf->avail_txqs = NULL;
+ return -ENOMEM;
+ }
+@@ -3727,12 +3774,13 @@ bool ice_is_wol_supported(struct ice_hw *hw)
+ * @vsi: VSI being changed
+ * @new_rx: new number of Rx queues
+ * @new_tx: new number of Tx queues
++ * @locked: is adev device_lock held
+ *
+ * Only change the number of queues if new_tx, or new_rx is non-0.
+ *
+ * Returns 0 on success.
+ */
+-int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
++int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
+ {
+ struct ice_pf *pf = vsi->back;
+ int err = 0, timeout = 50;
+@@ -3761,7 +3809,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
+
+ ice_vsi_close(vsi);
+ ice_vsi_rebuild(vsi, false);
+- ice_pf_dcb_recfg(pf);
++ ice_pf_dcb_recfg(pf, locked);
+ ice_vsi_open(vsi);
+ done:
+ clear_bit(ICE_CFG_BUSY, pf->state);
+@@ -4170,11 +4218,11 @@ static int ice_register_netdev(struct ice_pf *pf)
+ set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+ netif_carrier_off(vsi->netdev);
+ netif_tx_stop_all_queues(vsi->netdev);
+- err = ice_devlink_create_port(vsi);
++ err = ice_devlink_create_pf_port(pf);
+ if (err)
+ goto err_devlink_create;
+
+- devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
++ devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev);
+
+ return 0;
+ err_devlink_create:
+@@ -4600,9 +4648,6 @@ static void ice_remove(struct pci_dev *pdev)
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+ int i;
+
+- if (!pf)
+- return;
+-
+ for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
+ if (!ice_is_reset_in_progress(pf->state))
+ break;
+@@ -5083,7 +5128,7 @@ static int __init ice_module_init(void)
+ pr_info("%s\n", ice_driver_string);
+ pr_info("%s\n", ice_copyright);
+
+- ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME);
++ ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME);
+ if (!ice_wq) {
+ pr_err("Failed to create workqueue\n");
+ return -ENOMEM;
+@@ -5181,7 +5226,7 @@ err_update_filters:
+ netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
+ mac);
+ netif_addr_lock_bh(netdev);
+- ether_addr_copy(netdev->dev_addr, old_mac);
++ eth_hw_addr_set(netdev, old_mac);
+ netif_addr_unlock_bh(netdev);
+ return err;
+ }
+@@ -5430,11 +5475,10 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
+ {
+ int err;
+
+- if (vsi->netdev) {
++ if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+ ice_set_rx_mode(vsi->netdev);
+
+ err = ice_vsi_vlan_setup(vsi);
+-
+ if (err)
+ return err;
+ }
+@@ -5602,13 +5646,19 @@ static int ice_up_complete(struct ice_vsi *vsi)
+
+ if (vsi->port_info &&
+ (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
+- vsi->netdev) {
++ vsi->netdev && vsi->type == ICE_VSI_PF) {
+ ice_print_link_msg(vsi, true);
+ netif_tx_start_all_queues(vsi->netdev);
+ netif_carrier_on(vsi->netdev);
+ }
+
+- ice_service_task_schedule(pf);
++ /* Perform an initial read of the statistics registers now to
++ * set the baseline so counters are ready when interface is up
++ */
++ ice_update_eth_stats(vsi);
++
++ if (vsi->type == ICE_VSI_PF)
++ ice_service_task_schedule(pf);
+
+ return 0;
+ }
+@@ -6511,7 +6561,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+- struct iidc_event *event;
+ u8 count = 0;
+ int err = 0;
+
+@@ -6546,14 +6595,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+ return -EBUSY;
+ }
+
+- event = kzalloc(sizeof(*event), GFP_KERNEL);
+- if (!event)
+- return -ENOMEM;
+-
+- set_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
+- ice_send_event_to_aux(pf, event);
+- clear_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
+-
+ netdev->mtu = (unsigned int)new_mtu;
+
+ /* if VSI is up, bring it down and then back up */
+@@ -6561,21 +6602,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+ err = ice_down(vsi);
+ if (err) {
+ netdev_err(netdev, "change MTU if_down err %d\n", err);
+- goto event_after;
++ return err;
+ }
+
+ err = ice_up(vsi);
+ if (err) {
+ netdev_err(netdev, "change MTU if_up err %d\n", err);
+- goto event_after;
++ return err;
+ }
+ }
+
+ netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
+-event_after:
+- set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
+- ice_send_event_to_aux(pf, event);
+- kfree(event);
++ set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);
+
+ return err;
+ }
+@@ -7190,6 +7228,7 @@ ice_features_check(struct sk_buff *skb,
+ struct net_device __always_unused *netdev,
+ netdev_features_t features)
+ {
++ bool gso = skb_is_gso(skb);
+ size_t len;
+
+ /* No point in doing any of this if neither checksum nor GSO are
+@@ -7202,24 +7241,32 @@ ice_features_check(struct sk_buff *skb,
+ /* We cannot support GSO if the MSS is going to be less than
+ * 64 bytes. If it is then we need to drop support for GSO.
+ */
+- if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
++ if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
+ features &= ~NETIF_F_GSO_MASK;
+
+- len = skb_network_header(skb) - skb->data;
++ len = skb_network_offset(skb);
+ if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
+ goto out_rm_features;
+
+- len = skb_transport_header(skb) - skb_network_header(skb);
++ len = skb_network_header_len(skb);
+ if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+ goto out_rm_features;
+
+ if (skb->encapsulation) {
+- len = skb_inner_network_header(skb) - skb_transport_header(skb);
+- if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
+- goto out_rm_features;
++ /* this must work for VXLAN frames AND IPIP/SIT frames, and in
++ * the case of IPIP frames, the transport header pointer is
++ * after the inner header! So check to make sure that this
++ * is a GRE or UDP_TUNNEL frame before doing that math.
++ */
++ if (gso && (skb_shinfo(skb)->gso_type &
++ (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
++ len = skb_inner_network_header(skb) -
++ skb_transport_header(skb);
++ if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
++ goto out_rm_features;
++ }
+
+- len = skb_inner_transport_header(skb) -
+- skb_inner_network_header(skb);
++ len = skb_inner_network_header_len(skb);
+ if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+ goto out_rm_features;
+ }
+diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
+index d1ef3d48a4b03..4d7aa49b7c147 100644
+--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
++++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
+@@ -254,12 +254,19 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
+ * This function must be called periodically to ensure that the cached value
+ * is never more than 2 seconds old. It must also be called whenever the PHC
+ * time has been changed.
++ *
++ * Return:
++ * * 0 - OK, successfully updated
++ * * -EAGAIN - PF was busy, need to reschedule the update
+ */
+-static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
++static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
+ {
+ u64 systime;
+ int i;
+
++ if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
++ return -EAGAIN;
++
+ /* Read the current PHC time */
+ systime = ice_ptp_read_src_clk_reg(pf, NULL);
+
+@@ -282,6 +289,9 @@ static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
+ WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
+ }
+ }
++ clear_bit(ICE_CFG_BUSY, pf->state);
++
++ return 0;
+ }
+
+ /**
+@@ -459,7 +469,7 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm)
+ scaled_ppm = -scaled_ppm;
+ }
+
+- while ((u64)scaled_ppm > div_u64(U64_MAX, incval)) {
++ while ((u64)scaled_ppm > div64_u64(U64_MAX, incval)) {
+ /* handle overflow by scaling down the scaled_ppm and
+ * the divisor, losing some precision
+ */
+@@ -846,9 +856,12 @@ exit:
+ static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta)
+ {
+ struct timespec64 now, then;
++ int ret;
+
+ then = ns_to_timespec64(delta);
+- ice_ptp_gettimex64(info, &now, NULL);
++ ret = ice_ptp_gettimex64(info, &now, NULL);
++ if (ret)
++ return ret;
+ now = timespec64_add(now, then);
+
+ return ice_ptp_settime64(info, (const struct timespec64 *)&now);
+@@ -1077,7 +1090,7 @@ static void ice_ptp_set_caps(struct ice_pf *pf)
+ snprintf(info->name, sizeof(info->name) - 1, "%s-%s-clk",
+ dev_driver_string(dev), dev_name(dev));
+ info->owner = THIS_MODULE;
+- info->max_adj = 999999999;
++ info->max_adj = 100000000;
+ info->adjtime = ice_ptp_adjtime;
+ info->adjfine = ice_ptp_adjfine;
+ info->gettimex64 = ice_ptp_gettimex64;
+@@ -1182,19 +1195,16 @@ static void ice_ptp_tx_tstamp_work(struct kthread_work *work)
+ if (err)
+ continue;
+
+- /* Check if the timestamp is valid */
+- if (!(raw_tstamp & ICE_PTP_TS_VALID))
++ /* Check if the timestamp is invalid or stale */
++ if (!(raw_tstamp & ICE_PTP_TS_VALID) ||
++ raw_tstamp == tx->tstamps[idx].cached_tstamp)
+ continue;
+
+- /* clear the timestamp register, so that it won't show valid
+- * again when re-used.
+- */
+- ice_clear_phy_tstamp(hw, tx->quad, phy_idx);
+-
+ /* The timestamp is valid, so we'll go ahead and clear this
+ * index and then send the timestamp up to the stack.
+ */
+ spin_lock(&tx->lock);
++ tx->tstamps[idx].cached_tstamp = raw_tstamp;
+ clear_bit(idx, tx->in_use);
+ skb = tx->tstamps[idx].skb;
+ tx->tstamps[idx].skb = NULL;
+@@ -1375,6 +1385,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
+
+ /**
+ * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
++ * @hw: pointer to the hw struct
+ * @tx: PTP Tx tracker to clean up
+ *
+ * Loop through the Tx timestamp requests and see if any of them have been
+@@ -1383,7 +1394,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
+ * timestamp will never be captured. This might happen if the packet gets
+ * discarded before it reaches the PHY timestamping block.
+ */
+-static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
++static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx)
+ {
+ u8 idx;
+
+@@ -1392,11 +1403,16 @@ static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
+
+ for_each_set_bit(idx, tx->in_use, tx->len) {
+ struct sk_buff *skb;
++ u64 raw_tstamp;
+
+ /* Check if this SKB has been waiting for too long */
+ if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
+ continue;
+
++ /* Read tstamp to be able to use this register again */
++ ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
++ &raw_tstamp);
++
+ spin_lock(&tx->lock);
+ skb = tx->tstamps[idx].skb;
+ tx->tstamps[idx].skb = NULL;
+@@ -1412,17 +1428,18 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
+ {
+ struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
+ struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
++ int err;
+
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ return;
+
+- ice_ptp_update_cached_phctime(pf);
++ err = ice_ptp_update_cached_phctime(pf);
+
+- ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx);
++ ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx);
+
+- /* Run twice a second */
++ /* Run twice a second or reschedule if phc update failed */
+ kthread_queue_delayed_work(ptp->kworker, &ptp->work,
+- msecs_to_jiffies(500));
++ msecs_to_jiffies(err ? 10 : 500));
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
+index e1c787bd5b967..8cdd6f7046b73 100644
+--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
++++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
+@@ -46,15 +46,21 @@ struct ice_perout_channel {
+ * struct ice_tx_tstamp - Tracking for a single Tx timestamp
+ * @skb: pointer to the SKB for this timestamp request
+ * @start: jiffies when the timestamp was first requested
++ * @cached_tstamp: last read timestamp
+ *
+ * This structure tracks a single timestamp request. The SKB pointer is
+ * provided when initiating a request. The start time is used to ensure that
+ * we discard old requests that were not fulfilled within a 2 second time
+ * window.
++ * Timestamp values in the PHY are read only and do not get cleared except at
++ * hardware reset or when a new timestamp value is captured. The cached_tstamp
++ * field is used to detect the case where a new timestamp has not yet been
++ * captured, ensuring that we avoid sending stale timestamp data to the stack.
+ */
+ struct ice_tx_tstamp {
+ struct sk_buff *skb;
+ unsigned long start;
++ u64 cached_tstamp;
+ };
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
+index 2d9b10277186b..0b61fde449152 100644
+--- a/drivers/net/ethernet/intel/ice/ice_sched.c
++++ b/drivers/net/ethernet/intel/ice/ice_sched.c
+@@ -2758,7 +2758,7 @@ static enum ice_status
+ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+ u16 vsi_handle, unsigned long *tc_bitmap)
+ {
+- struct ice_sched_agg_vsi_info *agg_vsi_info, *old_agg_vsi_info = NULL;
++ struct ice_sched_agg_vsi_info *agg_vsi_info, *iter, *old_agg_vsi_info = NULL;
+ struct ice_sched_agg_info *agg_info, *old_agg_info;
+ enum ice_status status = 0;
+ struct ice_hw *hw = pi->hw;
+@@ -2776,11 +2776,13 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+ if (old_agg_info && old_agg_info != agg_info) {
+ struct ice_sched_agg_vsi_info *vtmp;
+
+- list_for_each_entry_safe(old_agg_vsi_info, vtmp,
++ list_for_each_entry_safe(iter, vtmp,
+ &old_agg_info->agg_vsi_list,
+ list_entry)
+- if (old_agg_vsi_info->vsi_handle == vsi_handle)
++ if (iter->vsi_handle == vsi_handle) {
++ old_agg_vsi_info = iter;
+ break;
++ }
+ }
+
+ /* check if entry already exist */
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 3b6c1420aa7be..deb828e761fa5 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -2614,7 +2614,7 @@ ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+ else
+ status = ice_set_vsi_promisc(hw, vsi_handle,
+ promisc_mask, vlan_id);
+- if (status)
++ if (status && status != -EEXIST)
+ break;
+ }
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
+index 1e46e80f3d6f8..4adc3dff04ba7 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
+@@ -164,17 +164,10 @@ struct ice_tx_offload_params {
+ };
+
+ struct ice_rx_buf {
+- union {
+- struct {
+- dma_addr_t dma;
+- struct page *page;
+- unsigned int page_offset;
+- u16 pagecnt_bias;
+- };
+- struct {
+- struct xdp_buff *xdp;
+- };
+- };
++ dma_addr_t dma;
++ struct page *page;
++ unsigned int page_offset;
++ u16 pagecnt_bias;
+ };
+
+ struct ice_q_stats {
+@@ -270,6 +263,7 @@ struct ice_ring {
+ union {
+ struct ice_tx_buf *tx_buf;
+ struct ice_rx_buf *rx_buf;
++ struct xdp_buff **xdp_buf;
+ };
+ /* CL2 - 2nd cacheline starts here */
+ u16 q_index; /* Queue number of ring */
+@@ -338,6 +332,11 @@ static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+ return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+ }
+
++enum ice_container_type {
++ ICE_RX_CONTAINER,
++ ICE_TX_CONTAINER,
++};
++
+ struct ice_ring_container {
+ /* head of linked-list of rings */
+ struct ice_ring *ring;
+@@ -346,9 +345,15 @@ struct ice_ring_container {
+ /* this matches the maximum number of ITR bits, but in usec
+ * values, so it is shifted left one bit (bit zero is ignored)
+ */
+- u16 itr_setting:13;
+- u16 itr_reserved:2;
+- u16 itr_mode:1;
++ union {
++ struct {
++ u16 itr_setting:13;
++ u16 itr_reserved:2;
++ u16 itr_mode:1;
++ };
++ u16 itr_settings;
++ };
++ enum ice_container_type type;
+ };
+
+ struct ice_coalesce_stored {
+diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+index eee180d8c0247..412deb36b645b 100644
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+@@ -731,6 +731,87 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
+ }
+ }
+
++/**
++ * ice_vc_fdir_reset_cnt_all - reset all FDIR counters for this VF FDIR
++ * @fdir: pointer to the VF FDIR structure
++ */
++static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir)
++{
++ enum ice_fltr_ptype flow;
++
++ for (flow = ICE_FLTR_PTYPE_NONF_NONE;
++ flow < ICE_FLTR_PTYPE_MAX; flow++) {
++ fdir->fdir_fltr_cnt[flow][0] = 0;
++ fdir->fdir_fltr_cnt[flow][1] = 0;
++ }
++}
++
++/**
++ * ice_vc_fdir_has_prof_conflict
++ * @vf: pointer to the VF structure
++ * @conf: FDIR configuration for each filter
++ *
++ * Check if @conf has conflicting profile with existing profiles
++ *
++ * Return: true on success, and false on error.
++ */
++static bool
++ice_vc_fdir_has_prof_conflict(struct ice_vf *vf,
++ struct virtchnl_fdir_fltr_conf *conf)
++{
++ struct ice_fdir_fltr *desc;
++
++ list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
++ struct virtchnl_fdir_fltr_conf *existing_conf;
++ enum ice_fltr_ptype flow_type_a, flow_type_b;
++ struct ice_fdir_fltr *a, *b;
++
++ existing_conf = to_fltr_conf_from_desc(desc);
++ a = &existing_conf->input;
++ b = &conf->input;
++ flow_type_a = a->flow_type;
++ flow_type_b = b->flow_type;
++
++ /* No need to compare two rules with different tunnel types or
++ * with the same protocol type.
++ */
++ if (existing_conf->ttype != conf->ttype ||
++ flow_type_a == flow_type_b)
++ continue;
++
++ switch (flow_type_a) {
++ case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
++ case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
++ case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
++ return true;
++ break;
++ case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_SCTP)
++ return true;
++ break;
++ case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
++ case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
++ case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_OTHER)
++ return true;
++ break;
++ case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
++ if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
++ flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_SCTP)
++ return true;
++ break;
++ default:
++ break;
++ }
++ }
++
++ return false;
++}
++
+ /**
+ * ice_vc_fdir_write_flow_prof
+ * @vf: pointer to the VF structure
+@@ -871,6 +952,13 @@ ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+ enum ice_fltr_ptype flow;
+ int ret;
+
++ ret = ice_vc_fdir_has_prof_conflict(vf, conf);
++ if (ret) {
++ dev_dbg(dev, "Found flow profile conflict for VF %d\n",
++ vf->vf_id);
++ return ret;
++ }
++
+ flow = input->flow_type;
+ ret = ice_vc_fdir_alloc_prof(vf, flow);
+ if (ret) {
+@@ -2063,7 +2151,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
+ v_ret = VIRTCHNL_STATUS_SUCCESS;
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+- goto err_free_conf;
++ goto err_rem_entry;
+ }
+
+ ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
+@@ -2072,15 +2160,16 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
+ stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+ dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+ vf->vf_id, ret);
+- goto err_rem_entry;
++ goto err_clr_irq;
+ }
+
+ exit:
+ kfree(stat);
+ return ret;
+
+-err_rem_entry:
++err_clr_irq:
+ ice_vc_fdir_clear_irq_ctx(vf);
++err_rem_entry:
+ ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+ err_free_conf:
+ devm_kfree(dev, conf);
+@@ -2189,6 +2278,7 @@ void ice_vf_fdir_init(struct ice_vf *vf)
+ spin_lock_init(&fdir->ctx_lock);
+ fdir->ctx_irq.flags = 0;
+ fdir->ctx_done.flags = 0;
++ ice_vc_fdir_reset_cnt_all(fdir);
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+index e93430ab37f1e..9d4d58757e040 100644
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+@@ -251,7 +251,7 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list_comms[] = {
+ * ice_get_vf_vsi - get VF's VSI based on the stored index
+ * @vf: VF used to get VSI
+ */
+-static struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
++struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
+ {
+ return vf->pf->vsi[vf->lan_vsi_idx];
+ }
+@@ -615,8 +615,6 @@ void ice_free_vfs(struct ice_pf *pf)
+ struct ice_hw *hw = &pf->hw;
+ unsigned int tmp, i;
+
+- set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
+-
+ if (!pf->vf)
+ return;
+
+@@ -632,21 +630,26 @@ void ice_free_vfs(struct ice_pf *pf)
+ else
+ dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
+
+- /* Avoid wait time by stopping all VFs at the same time */
+- ice_for_each_vf(pf, i)
+- if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states))
+- ice_dis_vf_qs(&pf->vf[i]);
+-
+ tmp = pf->num_alloc_vfs;
+ pf->num_qps_per_vf = 0;
+ pf->num_alloc_vfs = 0;
+ for (i = 0; i < tmp; i++) {
+- if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
++ struct ice_vf *vf = &pf->vf[i];
++
++ mutex_lock(&vf->cfg_lock);
++
++ ice_dis_vf_qs(vf);
++
++ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+ /* disable VF qp mappings and set VF disable state */
+- ice_dis_vf_mappings(&pf->vf[i]);
+- set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
+- ice_free_vf_res(&pf->vf[i]);
++ ice_dis_vf_mappings(vf);
++ set_bit(ICE_VF_STATE_DIS, vf->vf_states);
++ ice_free_vf_res(vf);
+ }
++
++ mutex_unlock(&vf->cfg_lock);
++
++ mutex_destroy(&vf->cfg_lock);
+ }
+
+ if (ice_sriov_free_msix_res(pf))
+@@ -682,7 +685,6 @@ void ice_free_vfs(struct ice_pf *pf)
+ i);
+
+ clear_bit(ICE_VF_DIS, pf->state);
+- clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
+ clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+ }
+
+@@ -1566,10 +1568,13 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
+ ice_for_each_vf(pf, v) {
+ vf = &pf->vf[v];
+
++ mutex_lock(&vf->cfg_lock);
++
+ vf->driver_caps = 0;
+ ice_vc_set_default_allowlist(vf);
+
+ ice_vf_fdir_exit(vf);
++ ice_vf_fdir_init(vf);
+ /* clean VF control VSI when resetting VFs since it should be
+ * setup only when VF creates its first FDIR rule.
+ */
+@@ -1579,6 +1584,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
+ ice_vf_pre_vsi_rebuild(vf);
+ ice_vf_rebuild_vsi(vf);
+ ice_vf_post_vsi_rebuild(vf);
++
++ mutex_unlock(&vf->cfg_lock);
+ }
+
+ ice_flush(hw);
+@@ -1625,6 +1632,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
+ u32 reg;
+ int i;
+
++ lockdep_assert_held(&vf->cfg_lock);
++
+ dev = ice_pf_to_dev(pf);
+
+ if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
+@@ -1645,8 +1654,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
+
+ vsi = ice_get_vf_vsi(vf);
+
+- if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
+- ice_dis_vf_qs(vf);
++ ice_dis_vf_qs(vf);
+
+ /* Call Disable LAN Tx queue AQ whether or not queues are
+ * enabled. This is needed for successful completion of VFR.
+@@ -1697,6 +1705,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
+ }
+
+ ice_vf_fdir_exit(vf);
++ ice_vf_fdir_init(vf);
+ /* clean VF control VSI when resetting VF since it should be setup
+ * only when VF creates its first FDIR rule.
+ */
+@@ -1894,6 +1903,8 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
+ */
+ ice_vf_ctrl_invalidate_vsi(vf);
+ ice_vf_fdir_init(vf);
++
++ mutex_init(&vf->cfg_lock);
+ }
+ }
+
+@@ -2109,9 +2120,12 @@ void ice_process_vflr_event(struct ice_pf *pf)
+ bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+ /* read GLGEN_VFLRSTAT register to find out the flr VFs */
+ reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
+- if (reg & BIT(bit_idx))
++ if (reg & BIT(bit_idx)) {
+ /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
++ mutex_lock(&vf->cfg_lock);
+ ice_reset_vf(vf, true);
++ mutex_unlock(&vf->cfg_lock);
++ }
+ }
+ }
+
+@@ -2188,7 +2202,9 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+ if (!vf)
+ return;
+
++ mutex_lock(&vf->cfg_lock);
+ ice_vc_reset_vf(vf);
++ mutex_unlock(&vf->cfg_lock);
+ }
+
+ /**
+@@ -2218,24 +2234,6 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+
+ dev = ice_pf_to_dev(pf);
+
+- /* single place to detect unsuccessful return values */
+- if (v_retval) {
+- vf->num_inval_msgs++;
+- dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id,
+- v_opcode, v_retval);
+- if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
+- dev_err(dev, "Number of invalid messages exceeded for VF %d\n",
+- vf->vf_id);
+- dev_err(dev, "Use PF Control I/F to enable the VF\n");
+- set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+- return -EIO;
+- }
+- } else {
+- vf->num_valid_msgs++;
+- /* reset the invalid counter, if a valid message is received. */
+- vf->num_inval_msgs = 0;
+- }
+-
+ aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
+ msg, msglen, NULL);
+ if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
+@@ -2954,6 +2952,7 @@ bool ice_is_any_vf_in_promisc(struct ice_pf *pf)
+ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
+ {
+ enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
++ enum ice_status mcast_status = 0, ucast_status = 0;
+ bool rm_promisc, alluni = false, allmulti = false;
+ struct virtchnl_promisc_info *info =
+ (struct virtchnl_promisc_info *)msg;
+@@ -3043,52 +3042,51 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
+ goto error_param;
+ }
+ } else {
+- enum ice_status status;
+- u8 promisc_m;
+-
+- if (alluni) {
+- if (vf->port_vlan_info || vsi->num_vlan)
+- promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+- else
+- promisc_m = ICE_UCAST_PROMISC_BITS;
+- } else if (allmulti) {
+- if (vf->port_vlan_info || vsi->num_vlan)
+- promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
+- else
+- promisc_m = ICE_MCAST_PROMISC_BITS;
++ u8 mcast_m, ucast_m;
++
++ if (vf->port_vlan_info || vsi->num_vlan > 1) {
++ mcast_m = ICE_MCAST_VLAN_PROMISC_BITS;
++ ucast_m = ICE_UCAST_VLAN_PROMISC_BITS;
+ } else {
+- if (vf->port_vlan_info || vsi->num_vlan)
+- promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+- else
+- promisc_m = ICE_UCAST_PROMISC_BITS;
++ mcast_m = ICE_MCAST_PROMISC_BITS;
++ ucast_m = ICE_UCAST_PROMISC_BITS;
+ }
+
+- /* Configure multicast/unicast with or without VLAN promiscuous
+- * mode
+- */
+- status = ice_vf_set_vsi_promisc(vf, vsi, promisc_m, rm_promisc);
+- if (status) {
+- dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %s\n",
+- rm_promisc ? "dis" : "en", vf->vf_id,
+- ice_stat_str(status));
+- v_ret = ice_err_to_virt_err(status);
+- goto error_param;
+- } else {
+- dev_dbg(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d succeeded\n",
+- rm_promisc ? "dis" : "en", vf->vf_id);
++ ucast_status = ice_vf_set_vsi_promisc(vf, vsi, ucast_m,
++ !alluni);
++ if (ucast_status) {
++ dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed\n",
++ alluni ? "en" : "dis", vf->vf_id);
++ v_ret = ice_err_to_virt_err(ucast_status);
++ }
++
++ mcast_status = ice_vf_set_vsi_promisc(vf, vsi, mcast_m,
++ !allmulti);
++ if (mcast_status) {
++ dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed\n",
++ allmulti ? "en" : "dis", vf->vf_id);
++ v_ret = ice_err_to_virt_err(mcast_status);
+ }
+ }
+
+- if (allmulti &&
+- !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+- dev_info(dev, "VF %u successfully set multicast promiscuous mode\n", vf->vf_id);
+- else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+- dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n", vf->vf_id);
++ if (!mcast_status) {
++ if (allmulti &&
++ !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
++ dev_info(dev, "VF %u successfully set multicast promiscuous mode\n",
++ vf->vf_id);
++ else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
++ dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n",
++ vf->vf_id);
++ }
+
+- if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+- dev_info(dev, "VF %u successfully set unicast promiscuous mode\n", vf->vf_id);
+- else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+- dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n", vf->vf_id);
++ if (!ucast_status) {
++ if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
++ dev_info(dev, "VF %u successfully set unicast promiscuous mode\n",
++ vf->vf_id);
++ else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
++ dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n",
++ vf->vf_id);
++ }
+
+ error_param:
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+@@ -3337,9 +3335,9 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+ goto error_param;
+ }
+
+- /* Skip queue if not enabled */
+ if (!test_bit(vf_q_id, vf->txq_ena))
+- continue;
++ dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
++ vf_q_id, vsi->vsi_num);
+
+ ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+@@ -3762,6 +3760,7 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ u8 *mac_addr = vc_ether_addr->addr;
+ enum ice_status status;
++ int ret = 0;
+
+ /* device MAC already added */
+ if (ether_addr_equal(mac_addr, vf->dev_lan_addr.addr))
+@@ -3774,20 +3773,23 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+
+ status = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
+ if (status == ICE_ERR_ALREADY_EXISTS) {
+- dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr,
++ dev_dbg(dev, "MAC %pM already exists for VF %d\n", mac_addr,
+ vf->vf_id);
+- return -EEXIST;
++ /* don't return since we might need to update
++ * the primary MAC in ice_vfhw_mac_add() below
++ */
++ ret = -EEXIST;
+ } else if (status) {
+ dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %s\n",
+ mac_addr, vf->vf_id, ice_stat_str(status));
+ return -EIO;
++ } else {
++ vf->num_mac++;
+ }
+
+ ice_vfhw_mac_add(vf, vc_ether_addr);
+
+- vf->num_mac++;
+-
+- return 0;
++ return ret;
+ }
+
+ /**
+@@ -4078,6 +4080,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+ return 0;
+ }
+
++ mutex_lock(&vf->cfg_lock);
++
+ vf->port_vlan_info = vlanprio;
+
+ if (vf->port_vlan_info)
+@@ -4087,6 +4091,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+ dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
+
+ ice_vc_reset_vf(vf);
++ mutex_unlock(&vf->cfg_lock);
+
+ return 0;
+ }
+@@ -4418,10 +4423,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
+ struct device *dev;
+ int err = 0;
+
+- /* if de-init is underway, don't process messages from VF */
+- if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state))
+- return;
+-
+ dev = ice_pf_to_dev(pf);
+ if (ice_validate_vf_id(pf, vf_id)) {
+ err = -EINVAL;
+@@ -4461,6 +4462,15 @@ error_handler:
+ return;
+ }
+
++ /* VF is being configured in another context that triggers a VFR, so no
++ * need to process this message
++ */
++ if (!mutex_trylock(&vf->cfg_lock)) {
++ dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
++ vf->vf_id);
++ return;
++ }
++
+ switch (v_opcode) {
+ case VIRTCHNL_OP_VERSION:
+ err = ice_vc_get_ver_msg(vf, msg);
+@@ -4549,6 +4559,8 @@ error_handler:
+ dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
+ vf_id, v_opcode, err);
+ }
++
++ mutex_unlock(&vf->cfg_lock);
+ }
+
+ /**
+@@ -4664,6 +4676,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+ return -EINVAL;
+ }
+
++ mutex_lock(&vf->cfg_lock);
++
+ /* VF is notified of its new MAC via the PF's response to the
+ * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
+ */
+@@ -4682,6 +4696,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+ }
+
+ ice_vc_reset_vf(vf);
++ mutex_unlock(&vf->cfg_lock);
+ return 0;
+ }
+
+@@ -4711,11 +4726,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
+ if (trusted == vf->trusted)
+ return 0;
+
++ mutex_lock(&vf->cfg_lock);
++
+ vf->trusted = trusted;
+ ice_vc_reset_vf(vf);
+ dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
+ vf_id, trusted ? "" : "un");
+
++ mutex_unlock(&vf->cfg_lock);
++
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+index 842cb077df861..532f57f014673 100644
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+@@ -14,7 +14,6 @@
+ #define ICE_MAX_MACADDR_PER_VF 18
+
+ /* Malicious Driver Detection */
+-#define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED 10
+ #define ICE_MDD_EVENTS_THRESHOLD 30
+
+ /* Static VF transaction/status register def */
+@@ -74,6 +73,11 @@ struct ice_mdd_vf_events {
+ struct ice_vf {
+ struct ice_pf *pf;
+
++ /* Used during virtchnl message handling and NDO ops against the VF
++ * that will trigger a VFR
++ */
++ struct mutex cfg_lock;
++
+ u16 vf_id; /* VF ID in the PF space */
+ u16 lan_vsi_idx; /* index into PF struct */
+ u16 ctrl_vsi_idx;
+@@ -102,8 +106,6 @@ struct ice_vf {
+ unsigned int tx_rate; /* Tx bandwidth limit in Mbps */
+ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
+
+- u64 num_inval_msgs; /* number of continuous invalid msgs */
+- u64 num_valid_msgs; /* number of valid msgs detected */
+ unsigned long vf_caps; /* VF's adv. capabilities */
+ u8 num_req_qs; /* num of queue pairs requested by VF */
+ u16 num_mac;
+@@ -111,9 +113,13 @@ struct ice_vf {
+ struct ice_mdd_vf_events mdd_rx_events;
+ struct ice_mdd_vf_events mdd_tx_events;
+ DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
++
++ /* devlink port data */
++ struct devlink_port devlink_port;
+ };
+
+ #ifdef CONFIG_PCI_IOV
++struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
+ void ice_process_vflr_event(struct ice_pf *pf);
+ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs);
+ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
+@@ -171,6 +177,11 @@ static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { }
+ static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { }
+ static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { }
+
++static inline struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
++{
++ return NULL;
++}
++
+ static inline bool
+ ice_is_malicious_vf(struct ice_pf __always_unused *pf,
+ struct ice_rq_event_info __always_unused *event,
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index 5a9f61deeb38d..070be30cbaa91 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -36,8 +36,10 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+ static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+ {
+ ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+- if (ice_is_xdp_ena_vsi(vsi))
++ if (ice_is_xdp_ena_vsi(vsi)) {
++ synchronize_rcu();
+ ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
++ }
+ ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+ }
+
+@@ -164,8 +166,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+ }
+ netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+- ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+-
+ ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+ if (err)
+@@ -180,6 +180,8 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+ if (err)
+ return err;
+ }
++ ice_qvec_dis_irq(vsi, rx_ring, q_vector);
++
+ err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
+ if (err)
+ return err;
+@@ -319,6 +321,19 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+ bool if_running, pool_present = !!pool;
+ int ret = 0, pool_failure = 0;
+
++ if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
++ netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
++ pool_failure = -EINVAL;
++ goto failure;
++ }
++
++ if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
++ !is_power_of_2(vsi->tx_rings[qid]->count)) {
++ netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
++ pool_failure = -EINVAL;
++ goto failure;
++ }
++
+ if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+ if (if_running) {
+@@ -341,6 +356,7 @@ xsk_pool_if_up:
+ netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
+ }
+
++failure:
+ if (pool_failure) {
+ netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+ pool_present ? "en" : "dis", pool_failure);
+@@ -364,7 +380,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
+ {
+ union ice_32b_rx_flex_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+- struct ice_rx_buf *rx_buf;
++ struct xdp_buff **xdp;
+ bool ok = true;
+ dma_addr_t dma;
+
+@@ -372,26 +388,26 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
+ return true;
+
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+- rx_buf = &rx_ring->rx_buf[ntu];
++ xdp = &rx_ring->xdp_buf[ntu];
+
+ do {
+- rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
+- if (!rx_buf->xdp) {
++ *xdp = xsk_buff_alloc(rx_ring->xsk_pool);
++ if (!*xdp) {
+ ok = false;
+ break;
+ }
+
+- dma = xsk_buff_xdp_get_dma(rx_buf->xdp);
++ dma = xsk_buff_xdp_get_dma(*xdp);
+ rx_desc->read.pkt_addr = cpu_to_le64(dma);
+ rx_desc->wb.status_error0 = 0;
+
+ rx_desc++;
+- rx_buf++;
++ xdp++;
+ ntu++;
+
+ if (unlikely(ntu == rx_ring->count)) {
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+- rx_buf = rx_ring->rx_buf;
++ xdp = rx_ring->xdp_buf;
+ ntu = 0;
+ }
+ } while (--count);
+@@ -421,19 +437,19 @@ static void ice_bump_ntc(struct ice_ring *rx_ring)
+ /**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+- * @rx_buf: zero-copy Rx buffer
++ * @xdp_arr: Pointer to the SW ring of xdp_buff pointers
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+ static struct sk_buff *
+-ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
++ice_construct_skb_zc(struct ice_ring *rx_ring, struct xdp_buff **xdp_arr)
+ {
+- unsigned int metasize = rx_buf->xdp->data - rx_buf->xdp->data_meta;
+- unsigned int datasize = rx_buf->xdp->data_end - rx_buf->xdp->data;
+- unsigned int datasize_hard = rx_buf->xdp->data_end -
+- rx_buf->xdp->data_hard_start;
++ struct xdp_buff *xdp = *xdp_arr;
++ unsigned int metasize = xdp->data - xdp->data_meta;
++ unsigned int datasize = xdp->data_end - xdp->data;
++ unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start;
+ struct sk_buff *skb;
+
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+@@ -441,13 +457,13 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+ if (unlikely(!skb))
+ return NULL;
+
+- skb_reserve(skb, rx_buf->xdp->data - rx_buf->xdp->data_hard_start);
+- memcpy(__skb_put(skb, datasize), rx_buf->xdp->data, datasize);
++ skb_reserve(skb, xdp->data - xdp->data_hard_start);
++ memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+- xsk_buff_free(rx_buf->xdp);
+- rx_buf->xdp = NULL;
++ xsk_buff_free(xdp);
++ *xdp_arr = NULL;
+ return skb;
+ }
+
+@@ -521,7 +537,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+ while (likely(total_rx_packets < (unsigned int)budget)) {
+ union ice_32b_rx_flex_desc *rx_desc;
+ unsigned int size, xdp_res = 0;
+- struct ice_rx_buf *rx_buf;
++ struct xdp_buff **xdp;
+ struct sk_buff *skb;
+ u16 stat_err_bits;
+ u16 vlan_tag = 0;
+@@ -544,18 +560,18 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+ if (!size)
+ break;
+
+- rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+- rx_buf->xdp->data_end = rx_buf->xdp->data + size;
+- xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool);
++ xdp = &rx_ring->xdp_buf[rx_ring->next_to_clean];
++ (*xdp)->data_end = (*xdp)->data + size;
++ xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool);
+
+- xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
++ xdp_res = ice_run_xdp_zc(rx_ring, *xdp);
+ if (xdp_res) {
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))
+ xdp_xmit |= xdp_res;
+ else
+- xsk_buff_free(rx_buf->xdp);
++ xsk_buff_free(*xdp);
+
+- rx_buf->xdp = NULL;
++ *xdp = NULL;
+ total_rx_bytes += size;
+ total_rx_packets++;
+ cleaned_count++;
+@@ -565,7 +581,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+ }
+
+ /* XDP_PASS path */
+- skb = ice_construct_skb_zc(rx_ring, rx_buf);
++ skb = ice_construct_skb_zc(rx_ring, xdp);
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buf_failed++;
+ break;
+@@ -759,7 +775,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_ring *ring;
+
+- if (test_bit(ICE_DOWN, vsi->state))
++ if (test_bit(ICE_VSI_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+@@ -810,15 +826,15 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+ */
+ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+ {
+- u16 i;
+-
+- for (i = 0; i < rx_ring->count; i++) {
+- struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
++ u16 count_mask = rx_ring->count - 1;
++ u16 ntc = rx_ring->next_to_clean;
++ u16 ntu = rx_ring->next_to_use;
+
+- if (!rx_buf->xdp)
+- continue;
++ for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) {
++ struct xdp_buff **xdp = &rx_ring->xdp_buf[ntc];
+
+- rx_buf->xdp = NULL;
++ xsk_buff_free(*xdp);
++ *xdp = NULL;
+ }
+ }
+
+diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
+index 1277c5c7d0996..7be0c7ce9394b 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
++++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
+@@ -426,7 +426,7 @@ void igb_mta_set(struct e1000_hw *hw, u32 hash_value)
+ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+ {
+ u32 hash_value, hash_mask;
+- u8 bit_shift = 0;
++ u8 bit_shift = 1;
+
+ /* Register count multiplied by bits per register */
+ hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+@@ -434,7 +434,7 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+ /* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+ * where 0xFF would still fall within the hash mask.
+ */
+- while (hash_mask >> bit_shift != 0xFF)
++ while (hash_mask >> bit_shift != 0xFF && bit_shift < 4)
+ bit_shift++;
+
+ /* The portion of the address that is used for the hash table
+diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
+index 2d3daf022651c..a2b759531cb7b 100644
+--- a/drivers/net/ethernet/intel/igb/igb.h
++++ b/drivers/net/ethernet/intel/igb/igb.h
+@@ -34,11 +34,11 @@ struct igb_adapter;
+ /* TX/RX descriptor defines */
+ #define IGB_DEFAULT_TXD 256
+ #define IGB_DEFAULT_TX_WORK 128
+-#define IGB_MIN_TXD 80
++#define IGB_MIN_TXD 64
+ #define IGB_MAX_TXD 4096
+
+ #define IGB_DEFAULT_RXD 256
+-#define IGB_MIN_RXD 80
++#define IGB_MIN_RXD 64
+ #define IGB_MAX_RXD 4096
+
+ #define IGB_DEFAULT_ITR 3 /* dynamic */
+@@ -664,6 +664,8 @@ struct igb_adapter {
+ struct igb_mac_addr *mac_table;
+ struct vf_mac_filter vf_macs;
+ struct vf_mac_filter *vf_mac_list;
++ /* lock for VF resources */
++ spinlock_t vfs_lock;
+ };
+
+ /* flags controlling PTP/1588 function */
+diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
+index fb1029352c3e7..b2f46004a3d0f 100644
+--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
++++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
+@@ -822,6 +822,8 @@ static int igb_set_eeprom(struct net_device *netdev,
+ */
+ ret_val = hw->nvm.ops.read(hw, last_word, 1,
+ &eeprom_buff[last_word - first_word]);
++ if (ret_val)
++ goto out;
+ }
+
+ /* Device's eeprom is always little-endian, word addressable */
+@@ -841,6 +843,7 @@ static int igb_set_eeprom(struct net_device *netdev,
+ hw->nvm.ops.update(hw);
+
+ igb_set_fw_version(adapter);
++out:
+ kfree(eeprom_buff);
+ return ret_val;
+ }
+@@ -961,10 +964,6 @@ static int igb_set_ringparam(struct net_device *netdev,
+ memcpy(&temp_ring[i], adapter->rx_ring[i],
+ sizeof(struct igb_ring));
+
+- /* Clear copied XDP RX-queue info */
+- memset(&temp_ring[i].xdp_rxq, 0,
+- sizeof(temp_ring[i].xdp_rxq));
+-
+ temp_ring[i].count = new_rx_count;
+ err = igb_setup_rx_resources(&temp_ring[i]);
+ if (err) {
+@@ -1413,6 +1412,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
+ *data = 1;
+ return -1;
+ }
++ wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8);
++ wr32(E1000_EIMS, BIT(0));
+ } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
+ shared_int = false;
+ if (request_irq(irq,
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 751de06019a0e..c01114cabbb09 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -1204,8 +1204,12 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
+ if (!q_vector) {
+ q_vector = kzalloc(size, GFP_KERNEL);
+ } else if (size > ksize(q_vector)) {
+- kfree_rcu(q_vector, rcu);
+- q_vector = kzalloc(size, GFP_KERNEL);
++ struct igb_q_vector *new_q_vector;
++
++ new_q_vector = kzalloc(size, GFP_KERNEL);
++ if (new_q_vector)
++ kfree_rcu(q_vector, rcu);
++ q_vector = new_q_vector;
+ } else {
+ memset(q_vector, 0, size);
+ }
+@@ -3637,6 +3641,7 @@ static int igb_disable_sriov(struct pci_dev *pdev)
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
++ unsigned long flags;
+
+ /* reclaim resources allocated to VFs */
+ if (adapter->vf_data) {
+@@ -3649,12 +3654,13 @@ static int igb_disable_sriov(struct pci_dev *pdev)
+ pci_disable_sriov(pdev);
+ msleep(500);
+ }
+-
++ spin_lock_irqsave(&adapter->vfs_lock, flags);
+ kfree(adapter->vf_mac_list);
+ adapter->vf_mac_list = NULL;
+ kfree(adapter->vf_data);
+ adapter->vf_data = NULL;
+ adapter->vfs_allocated_count = 0;
++ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+ wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+ wrfl();
+ msleep(100);
+@@ -3850,8 +3856,9 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
+ struct pci_dev *pdev = adapter->pdev;
+ struct e1000_hw *hw = &adapter->hw;
+
+- /* Virtualization features not supported on i210 family. */
+- if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
++ /* Virtualization features not supported on i210 and 82580 family. */
++ if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
++ (hw->mac.type == e1000_82580))
+ return;
+
+ /* Of the below we really only want the effect of getting
+@@ -3974,6 +3981,9 @@ static int igb_sw_init(struct igb_adapter *adapter)
+
+ spin_lock_init(&adapter->nfc_lock);
+ spin_lock_init(&adapter->stats64_lock);
++
++ /* init spinlock to avoid concurrency of VF resources */
++ spin_lock_init(&adapter->vfs_lock);
+ #ifdef CONFIG_PCI_IOV
+ switch (hw->mac.type) {
+ case e1000_82576:
+@@ -4345,7 +4355,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
+ {
+ struct igb_adapter *adapter = netdev_priv(rx_ring->netdev);
+ struct device *dev = rx_ring->dev;
+- int size;
++ int size, res;
++
++ /* XDP RX-queue info */
++ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
++ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
++ res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
++ rx_ring->queue_index, 0);
++ if (res < 0) {
++ dev_err(dev, "Failed to register xdp_rxq index %u\n",
++ rx_ring->queue_index);
++ return res;
++ }
+
+ size = sizeof(struct igb_rx_buffer) * rx_ring->count;
+
+@@ -4368,14 +4389,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
+
+ rx_ring->xdp_prog = adapter->xdp_prog;
+
+- /* XDP RX-queue info */
+- if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+- rx_ring->queue_index, 0) < 0)
+- goto err;
+-
+ return 0;
+
+ err:
++ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+@@ -4721,6 +4738,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
+ static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
+ struct igb_ring *rx_ring)
+ {
++#if (PAGE_SIZE < 8192)
++ struct e1000_hw *hw = &adapter->hw;
++#endif
++
+ /* set build_skb and buffer size flags */
+ clear_ring_build_skb_enabled(rx_ring);
+ clear_ring_uses_large_buffer(rx_ring);
+@@ -4731,10 +4752,9 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
+ set_ring_build_skb_enabled(rx_ring);
+
+ #if (PAGE_SIZE < 8192)
+- if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+- return;
+-
+- set_ring_uses_large_buffer(rx_ring);
++ if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB ||
++ rd32(E1000_RCTL) & E1000_RCTL_SBP)
++ set_ring_uses_large_buffer(rx_ring);
+ #endif
+ }
+
+@@ -4812,8 +4832,11 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
+ while (i != tx_ring->next_to_use) {
+ union e1000_adv_tx_desc *eop_desc, *tx_desc;
+
+- /* Free all the Tx ring sk_buffs */
+- dev_kfree_skb_any(tx_buffer->skb);
++ /* Free all the Tx ring sk_buffs or xdp frames */
++ if (tx_buffer->type == IGB_TYPE_SKB)
++ dev_kfree_skb_any(tx_buffer->skb);
++ else
++ xdp_return_frame(tx_buffer->xdpf);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+@@ -5498,7 +5521,8 @@ static void igb_watchdog_task(struct work_struct *work)
+ break;
+ }
+
+- if (adapter->link_speed != SPEED_1000)
++ if (adapter->link_speed != SPEED_1000 ||
++ !hw->phy.ops.read_reg)
+ goto no_wait;
+
+ /* wait for Remote receiver status OK */
+@@ -7392,7 +7416,7 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
+ {
+ struct e1000_hw *hw = &adapter->hw;
+ unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+- u32 reg, msgbuf[3];
++ u32 reg, msgbuf[3] = {};
+ u8 *addr = (u8 *)(&msgbuf[1]);
+
+ /* process all the same items cleared in a function level reset */
+@@ -7641,6 +7665,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf,
+ struct vf_mac_filter *entry = NULL;
+ int ret = 0;
+
++ if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
++ !vf_data->trusted) {
++ dev_warn(&pdev->dev,
++ "VF %d requested MAC filter but is administratively denied\n",
++ vf);
++ return -EINVAL;
++ }
++ if (!is_valid_ether_addr(addr)) {
++ dev_warn(&pdev->dev,
++ "VF %d attempted to set invalid MAC filter\n",
++ vf);
++ return -EINVAL;
++ }
++
+ switch (info) {
+ case E1000_VF_MAC_FILTER_CLR:
+ /* remove all unicast MAC filters related to the current VF */
+@@ -7654,20 +7692,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf,
+ }
+ break;
+ case E1000_VF_MAC_FILTER_ADD:
+- if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+- !vf_data->trusted) {
+- dev_warn(&pdev->dev,
+- "VF %d requested MAC filter but is administratively denied\n",
+- vf);
+- return -EINVAL;
+- }
+- if (!is_valid_ether_addr(addr)) {
+- dev_warn(&pdev->dev,
+- "VF %d attempted to set invalid MAC filter\n",
+- vf);
+- return -EINVAL;
+- }
+-
+ /* try to find empty slot in the list */
+ list_for_each(pos, &adapter->vf_macs.l) {
+ entry = list_entry(pos, struct vf_mac_filter, l);
+@@ -7835,8 +7859,10 @@ unlock:
+ static void igb_msg_task(struct igb_adapter *adapter)
+ {
+ struct e1000_hw *hw = &adapter->hw;
++ unsigned long flags;
+ u32 vf;
+
++ spin_lock_irqsave(&adapter->vfs_lock, flags);
+ for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
+ /* process any reset requests */
+ if (!igb_check_for_rst(hw, vf))
+@@ -7850,6 +7876,7 @@ static void igb_msg_task(struct igb_adapter *adapter)
+ if (!igb_check_for_ack(hw, vf))
+ igb_rcv_ack_from_vf(adapter, vf);
+ }
++ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+ }
+
+ /**
+@@ -8019,7 +8046,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
+ if (likely(napi_complete_done(napi, work_done)))
+ igb_ring_irq_enable(q_vector);
+
+- return min(work_done, budget - 1);
++ return work_done;
+ }
+
+ /**
+@@ -9247,7 +9274,7 @@ static int __maybe_unused igb_suspend(struct device *dev)
+ return __igb_shutdown(to_pci_dev(dev), NULL, 0);
+ }
+
+-static int __maybe_unused igb_resume(struct device *dev)
++static int __maybe_unused __igb_resume(struct device *dev, bool rpm)
+ {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct net_device *netdev = pci_get_drvdata(pdev);
+@@ -9290,17 +9317,24 @@ static int __maybe_unused igb_resume(struct device *dev)
+
+ wr32(E1000_WUS, ~0);
+
+- rtnl_lock();
++ if (!rpm)
++ rtnl_lock();
+ if (!err && netif_running(netdev))
+ err = __igb_open(netdev, true);
+
+ if (!err)
+ netif_device_attach(netdev);
+- rtnl_unlock();
++ if (!rpm)
++ rtnl_unlock();
+
+ return err;
+ }
+
++static int __maybe_unused igb_resume(struct device *dev)
++{
++ return __igb_resume(dev, false);
++}
++
+ static int __maybe_unused igb_runtime_idle(struct device *dev)
+ {
+ struct net_device *netdev = dev_get_drvdata(dev);
+@@ -9319,7 +9353,7 @@ static int __maybe_unused igb_runtime_suspend(struct device *dev)
+
+ static int __maybe_unused igb_runtime_resume(struct device *dev)
+ {
+- return igb_resume(dev);
++ return __igb_resume(dev, true);
+ }
+
+ static void igb_shutdown(struct pci_dev *pdev)
+@@ -9435,7 +9469,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+- * resembles the first-half of the igb_resume routine.
++ * resembles the first-half of the __igb_resume routine.
+ **/
+ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
+ {
+@@ -9475,7 +9509,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+- * second-half of the igb_resume routine.
++ * second-half of the __igb_resume routine.
+ */
+ static void igb_io_resume(struct pci_dev *pdev)
+ {
+@@ -9805,11 +9839,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
+ struct e1000_hw *hw = &adapter->hw;
+ u32 dmac_thr;
+ u16 hwm;
++ u32 reg;
+
+ if (hw->mac.type > e1000_82580) {
+ if (adapter->flags & IGB_FLAG_DMAC) {
+- u32 reg;
+-
+ /* force threshold to 0. */
+ wr32(E1000_DMCTXTH, 0);
+
+@@ -9842,7 +9875,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
+ /* Disable BMC-to-OS Watchdog Enable */
+ if (hw->mac.type != e1000_i354)
+ reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
+-
+ wr32(E1000_DMACR, reg);
+
+ /* no lower threshold to disable
+@@ -9859,12 +9891,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
+ */
+ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
+ (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
++ }
+
+- /* make low power state decision controlled
+- * by DMA coal
+- */
++ if (hw->mac.type >= e1000_i210 ||
++ (adapter->flags & IGB_FLAG_DMAC)) {
+ reg = rd32(E1000_PCIEMISC);
+- reg &= ~E1000_PCIEMISC_LX_DECISION;
++ reg |= E1000_PCIEMISC_LX_DECISION;
+ wr32(E1000_PCIEMISC, reg);
+ } /* endif adapter->dmac is not disabled */
+ } else if (hw->mac.type == e1000_82580) {
+diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
+index 0011b15e678c3..9cdb7a856ab6c 100644
+--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
++++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
+@@ -1260,18 +1260,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
+ return;
+ }
+
+- spin_lock_init(&adapter->tmreg_lock);
+- INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+-
+- if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+- INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+- igb_ptp_overflow_check);
+-
+- adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+- adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+-
+- igb_ptp_reset(adapter);
+-
+ adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+ &adapter->pdev->dev);
+ if (IS_ERR(adapter->ptp_clock)) {
+@@ -1281,6 +1269,18 @@ void igb_ptp_init(struct igb_adapter *adapter)
+ dev_info(&adapter->pdev->dev, "added PHC on %s\n",
+ adapter->netdev->name);
+ adapter->ptp_flags |= IGB_PTP_ENABLED;
++
++ spin_lock_init(&adapter->tmreg_lock);
++ INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
++
++ if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
++ INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
++ igb_ptp_overflow_check);
++
++ adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
++ adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
++
++ igb_ptp_reset(adapter);
+ }
+ }
+
+diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h
+index 975eb47ee04df..b39fca9827dc2 100644
+--- a/drivers/net/ethernet/intel/igbvf/igbvf.h
++++ b/drivers/net/ethernet/intel/igbvf/igbvf.h
+@@ -39,11 +39,11 @@ enum latency_range {
+ /* Tx/Rx descriptor defines */
+ #define IGBVF_DEFAULT_TXD 256
+ #define IGBVF_MAX_TXD 4096
+-#define IGBVF_MIN_TXD 80
++#define IGBVF_MIN_TXD 64
+
+ #define IGBVF_DEFAULT_RXD 256
+ #define IGBVF_MAX_RXD 4096
+-#define IGBVF_MIN_RXD 80
++#define IGBVF_MIN_RXD 64
+
+ #define IGBVF_MIN_ITR_USECS 10 /* 100000 irq/sec */
+ #define IGBVF_MAX_ITR_USECS 10000 /* 100 irq/sec */
+diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
+index d32e72d953c8d..ebd6d464fa0cf 100644
+--- a/drivers/net/ethernet/intel/igbvf/netdev.c
++++ b/drivers/net/ethernet/intel/igbvf/netdev.c
+@@ -1074,7 +1074,7 @@ static int igbvf_request_msix(struct igbvf_adapter *adapter)
+ igbvf_intr_msix_rx, 0, adapter->rx_ring->name,
+ netdev);
+ if (err)
+- goto out;
++ goto free_irq_tx;
+
+ adapter->rx_ring->itr_register = E1000_EITR(vector);
+ adapter->rx_ring->itr_val = adapter->current_itr;
+@@ -1083,10 +1083,14 @@ static int igbvf_request_msix(struct igbvf_adapter *adapter)
+ err = request_irq(adapter->msix_entries[vector].vector,
+ igbvf_msix_other, 0, netdev->name, netdev);
+ if (err)
+- goto out;
++ goto free_irq_rx;
+
+ igbvf_configure_msix(adapter);
+ return 0;
++free_irq_rx:
++ free_irq(adapter->msix_entries[--vector].vector, netdev);
++free_irq_tx:
++ free_irq(adapter->msix_entries[--vector].vector, netdev);
+ out:
+ return err;
+ }
+@@ -2861,6 +2865,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ return 0;
+
+ err_hw_init:
++ netif_napi_del(&adapter->rx_ring->napi);
+ kfree(adapter->tx_ring);
+ kfree(adapter->rx_ring);
+ err_sw_init:
+diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c
+index b8ba3f94c3632..a47a2e3e548cf 100644
+--- a/drivers/net/ethernet/intel/igbvf/vf.c
++++ b/drivers/net/ethernet/intel/igbvf/vf.c
+@@ -1,6 +1,8 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2009 - 2018 Intel Corporation. */
+
++#include <linux/etherdevice.h>
++
+ #include "vf.h"
+
+ static s32 e1000_check_for_link_vf(struct e1000_hw *hw);
+@@ -131,11 +133,16 @@ static s32 e1000_reset_hw_vf(struct e1000_hw *hw)
+ /* set our "perm_addr" based on info provided by PF */
+ ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
+ if (!ret_val) {
+- if (msgbuf[0] == (E1000_VF_RESET |
+- E1000_VT_MSGTYPE_ACK))
++ switch (msgbuf[0]) {
++ case E1000_VF_RESET | E1000_VT_MSGTYPE_ACK:
+ memcpy(hw->mac.perm_addr, addr, ETH_ALEN);
+- else
++ break;
++ case E1000_VF_RESET | E1000_VT_MSGTYPE_NACK:
++ eth_zero_addr(hw->mac.perm_addr);
++ break;
++ default:
+ ret_val = -E1000_ERR_MAC_INIT;
++ }
+ }
+ }
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index 3e386c38d016c..e09ca21b8e3fe 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -13,6 +13,7 @@
+ #include <linux/ptp_clock_kernel.h>
+ #include <linux/timecounter.h>
+ #include <linux/net_tstamp.h>
++#include <linux/bitfield.h>
+
+ #include "igc_hw.h"
+
+@@ -94,6 +95,8 @@ struct igc_ring {
+ u8 queue_index; /* logical index of the ring*/
+ u8 reg_idx; /* physical index of the ring */
+ bool launchtime_enable; /* true if LaunchTime is enabled */
++ ktime_t last_tx_cycle; /* end of the cycle with a launchtime transmission */
++ ktime_t last_ff_cycle; /* Last cycle with an active first flag */
+
+ u32 start_time;
+ u32 end_time;
+@@ -182,6 +185,7 @@ struct igc_adapter {
+
+ ktime_t base_time;
+ ktime_t cycle_time;
++ bool qbv_enable;
+
+ /* OS defined structs */
+ struct pci_dev *pdev;
+@@ -224,6 +228,10 @@ struct igc_adapter {
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_caps;
+ struct work_struct ptp_tx_work;
++ /* Access to ptp_tx_skb and ptp_tx_start are protected by the
++ * ptp_tx_lock.
++ */
++ spinlock_t ptp_tx_lock;
+ struct sk_buff *ptp_tx_skb;
+ struct hwtstamp_config tstamp_config;
+ unsigned long ptp_tx_start;
+@@ -309,6 +317,33 @@ extern char igc_driver_name[];
+ #define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000
+ #define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
+
++/* RX-desc Write-Back format RSS Type's */
++enum igc_rss_type_num {
++ IGC_RSS_TYPE_NO_HASH = 0,
++ IGC_RSS_TYPE_HASH_TCP_IPV4 = 1,
++ IGC_RSS_TYPE_HASH_IPV4 = 2,
++ IGC_RSS_TYPE_HASH_TCP_IPV6 = 3,
++ IGC_RSS_TYPE_HASH_IPV6_EX = 4,
++ IGC_RSS_TYPE_HASH_IPV6 = 5,
++ IGC_RSS_TYPE_HASH_TCP_IPV6_EX = 6,
++ IGC_RSS_TYPE_HASH_UDP_IPV4 = 7,
++ IGC_RSS_TYPE_HASH_UDP_IPV6 = 8,
++ IGC_RSS_TYPE_HASH_UDP_IPV6_EX = 9,
++ IGC_RSS_TYPE_MAX = 10,
++};
++#define IGC_RSS_TYPE_MAX_TABLE 16
++#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
++
++/* igc_rss_type - Rx descriptor RSS type field */
++static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
++{
++ /* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
++ * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
++ * is slightly slower than via u32 (wb.lower.lo_dword.data)
++ */
++ return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
++}
++
+ /* Interrupt defines */
+ #define IGC_START_ITR 648 /* ~6000 ints/sec */
+ #define IGC_4K_ITR 980
+@@ -324,11 +359,11 @@ extern char igc_driver_name[];
+ /* TX/RX descriptor defines */
+ #define IGC_DEFAULT_TXD 256
+ #define IGC_DEFAULT_TX_WORK 128
+-#define IGC_MIN_TXD 80
++#define IGC_MIN_TXD 64
+ #define IGC_MAX_TXD 4096
+
+ #define IGC_DEFAULT_RXD 256
+-#define IGC_MIN_RXD 80
++#define IGC_MIN_RXD 64
+ #define IGC_MAX_RXD 4096
+
+ /* Supported Rx Buffer Sizes */
+@@ -399,7 +434,6 @@ enum igc_state_t {
+ __IGC_TESTING,
+ __IGC_RESETTING,
+ __IGC_DOWN,
+- __IGC_PTP_TX_IN_PROGRESS,
+ };
+
+ enum igc_tx_flags {
+diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
+index ce530f5fd7bda..52849f5e8048d 100644
+--- a/drivers/net/ethernet/intel/igc/igc_base.h
++++ b/drivers/net/ethernet/intel/igc/igc_base.h
+@@ -85,8 +85,13 @@ union igc_adv_rx_desc {
+ #define IGC_RXDCTL_SWFLUSH 0x04000000 /* Receive Software Flush */
+
+ /* SRRCTL bit definitions */
+-#define IGC_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */
+-#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */
+-#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
++#define IGC_SRRCTL_BSIZEPKT_MASK GENMASK(6, 0)
++#define IGC_SRRCTL_BSIZEPKT(x) FIELD_PREP(IGC_SRRCTL_BSIZEPKT_MASK, \
++ (x) / 1024) /* in 1 KB resolution */
++#define IGC_SRRCTL_BSIZEHDR_MASK GENMASK(13, 8)
++#define IGC_SRRCTL_BSIZEHDR(x) FIELD_PREP(IGC_SRRCTL_BSIZEHDR_MASK, \
++ (x) / 64) /* in 64 bytes resolution */
++#define IGC_SRRCTL_DESCTYPE_MASK GENMASK(27, 25)
++#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF FIELD_PREP(IGC_SRRCTL_DESCTYPE_MASK, 1)
+
+ #endif /* _IGC_BASE_H */
+diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
+index a4bbee7487984..703b62c5f79b5 100644
+--- a/drivers/net/ethernet/intel/igc/igc_defines.h
++++ b/drivers/net/ethernet/intel/igc/igc_defines.h
+@@ -324,6 +324,8 @@
+ #define IGC_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
+ #define IGC_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
+
++#define IGC_ADVTXD_TSN_CNTX_FIRST 0x00000080
++
+ /* Transmit Control */
+ #define IGC_TCTL_EN 0x00000002 /* enable Tx */
+ #define IGC_TCTL_PSP 0x00000008 /* pad short packets */
+@@ -467,7 +469,9 @@
+ #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */
+ #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */
+ #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */
++#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */
+ #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */
++#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */
+ #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */
+ #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */
+ #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */
+@@ -535,7 +539,7 @@
+ #define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */
+ #define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */
+ #define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */
+-#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x2f) << 2)
++#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2)
+ #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8)
+
+ #define IGC_PTM_SHORT_CYC_DEFAULT 10 /* Default Short/interrupted cycle interval */
+diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+index e0a76ac1bbbcd..859ddc07fbbfe 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+@@ -1701,6 +1701,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
+ /* twisted pair */
+ cmd->base.port = PORT_TP;
+ cmd->base.phy_address = hw->phy.addr;
++ ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
++ ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+
+ /* advertising link modes */
+ if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF)
+diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
+index b2ef9fde97b38..a0e2a404d5355 100644
+--- a/drivers/net/ethernet/intel/igc/igc_i225.c
++++ b/drivers/net/ethernet/intel/igc/igc_i225.c
+@@ -156,8 +156,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+ {
+ u32 swfw_sync;
+
+- while (igc_get_hw_semaphore_i225(hw))
+- ; /* Empty */
++ /* Releasing the resource requires first getting the HW semaphore.
++ * If we fail to get the semaphore, there is nothing we can do,
++ * except log an error and quit. We are not allowed to hang here
++ * indefinitely, as it may cause denial of service or system crash.
++ */
++ if (igc_get_hw_semaphore_i225(hw)) {
++ hw_dbg("Failed to release SW_FW_SYNC.\n");
++ return;
++ }
+
+ swfw_sync = rd32(IGC_SW_FW_SYNC);
+ swfw_sync &= ~mask;
+@@ -636,7 +643,7 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link)
+ ltrv = rd32(IGC_LTRMAXV);
+ if (ltr_max != (ltrv & IGC_LTRMAXV_LTRV_MASK)) {
+ ltrv = IGC_LTRMAXV_LSNP_REQ | ltr_max |
+- (scale_min << IGC_LTRMAXV_SCALE_SHIFT);
++ (scale_max << IGC_LTRMAXV_SCALE_SHIFT);
+ wr32(IGC_LTRMAXV, ltrv);
+ }
+ }
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 0e19b4d02e628..a8c24a1c12b43 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -254,6 +254,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
+ /* reset BQL for queue */
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
++ /* Zero out the buffer ring */
++ memset(tx_ring->tx_buffer_info, 0,
++ sizeof(*tx_ring->tx_buffer_info) * tx_ring->count);
++
++ /* Zero out the descriptor ring */
++ memset(tx_ring->desc, 0, tx_ring->size);
++
+ /* reset next_to_use and next_to_clean */
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+@@ -267,7 +274,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
+ */
+ void igc_free_tx_resources(struct igc_ring *tx_ring)
+ {
+- igc_clean_tx_ring(tx_ring);
++ igc_disable_tx_ring(tx_ring);
+
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+@@ -309,6 +316,33 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
+ igc_clean_tx_ring(adapter->tx_ring[i]);
+ }
+
++static void igc_disable_tx_ring_hw(struct igc_ring *ring)
++{
++ struct igc_hw *hw = &ring->q_vector->adapter->hw;
++ u8 idx = ring->reg_idx;
++ u32 txdctl;
++
++ txdctl = rd32(IGC_TXDCTL(idx));
++ txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
++ txdctl |= IGC_TXDCTL_SWFLUSH;
++ wr32(IGC_TXDCTL(idx), txdctl);
++}
++
++/**
++ * igc_disable_all_tx_rings_hw - Disable all transmit queue operation
++ * @adapter: board private structure
++ */
++static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter)
++{
++ int i;
++
++ for (i = 0; i < adapter->num_tx_queues; i++) {
++ struct igc_ring *tx_ring = adapter->tx_ring[i];
++
++ igc_disable_tx_ring_hw(tx_ring);
++ }
++}
++
+ /**
+ * igc_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+@@ -504,6 +538,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
+ u8 index = rx_ring->queue_index;
+ int size, desc_len, res;
+
++ /* XDP RX-queue info */
++ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
++ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
+ rx_ring->q_vector->napi.napi_id);
+ if (res < 0) {
+@@ -637,8 +674,11 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
+ else
+ buf_size = IGC_RXBUFFER_2048;
+
+- srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
+- srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
++ srrctl = rd32(IGC_SRRCTL(reg_idx));
++ srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK |
++ IGC_SRRCTL_DESCTYPE_MASK);
++ srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN);
++ srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size);
+ srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
+
+ wr32(IGC_SRRCTL(reg_idx), srrctl);
+@@ -698,7 +738,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter,
+ /* disable the queue */
+ wr32(IGC_TXDCTL(reg_idx), 0);
+ wrfl();
+- mdelay(10);
+
+ wr32(IGC_TDLEN(reg_idx),
+ ring->count * sizeof(union igc_adv_tx_desc));
+@@ -996,25 +1035,118 @@ static int igc_write_mc_addr_list(struct net_device *netdev)
+ return netdev_mc_count(netdev);
+ }
+
+-static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
++static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
++ bool *first_flag, bool *insert_empty)
+ {
++ struct igc_adapter *adapter = netdev_priv(ring->netdev);
+ ktime_t cycle_time = adapter->cycle_time;
+ ktime_t base_time = adapter->base_time;
+- u32 launchtime;
++ ktime_t now = ktime_get_clocktai();
++ ktime_t baset_est, end_of_cycle;
++ s32 launchtime;
++ s64 n;
++
++ n = div64_s64(ktime_sub_ns(now, base_time), cycle_time);
++
++ baset_est = ktime_add_ns(base_time, cycle_time * (n));
++ end_of_cycle = ktime_add_ns(baset_est, cycle_time);
+
+- /* FIXME: when using ETF together with taprio, we may have a
+- * case where 'delta' is larger than the cycle_time, this may
+- * cause problems if we don't read the current value of
+- * IGC_BASET, as the value writen into the launchtime
+- * descriptor field may be misinterpreted.
++ if (ktime_compare(txtime, end_of_cycle) >= 0) {
++ if (baset_est != ring->last_ff_cycle) {
++ *first_flag = true;
++ ring->last_ff_cycle = baset_est;
++
++ if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0)
++ *insert_empty = true;
++ }
++ }
++
++ /* Introducing a window at end of cycle on which packets
++ * potentially not honor launchtime. Window of 5us chosen
++ * considering software update the tail pointer and packets
++ * are dma'ed to packet buffer.
+ */
+- div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
++ if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC))
++ netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n",
++ txtime);
++
++ ring->last_tx_cycle = end_of_cycle;
++
++ launchtime = ktime_sub_ns(txtime, baset_est);
++ if (launchtime > 0)
++ div_s64_rem(launchtime, cycle_time, &launchtime);
++ else
++ launchtime = 0;
+
+ return cpu_to_le32(launchtime);
+ }
+
++static int igc_init_empty_frame(struct igc_ring *ring,
++ struct igc_tx_buffer *buffer,
++ struct sk_buff *skb)
++{
++ unsigned int size;
++ dma_addr_t dma;
++
++ size = skb_headlen(skb);
++
++ dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE);
++ if (dma_mapping_error(ring->dev, dma)) {
++ netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
++ return -ENOMEM;
++ }
++
++ buffer->skb = skb;
++ buffer->protocol = 0;
++ buffer->bytecount = skb->len;
++ buffer->gso_segs = 1;
++ buffer->time_stamp = jiffies;
++ dma_unmap_len_set(buffer, len, skb->len);
++ dma_unmap_addr_set(buffer, dma, dma);
++
++ return 0;
++}
++
++static int igc_init_tx_empty_descriptor(struct igc_ring *ring,
++ struct sk_buff *skb,
++ struct igc_tx_buffer *first)
++{
++ union igc_adv_tx_desc *desc;
++ u32 cmd_type, olinfo_status;
++ int err;
++
++ if (!igc_desc_unused(ring))
++ return -EBUSY;
++
++ err = igc_init_empty_frame(ring, first, skb);
++ if (err)
++ return err;
++
++ cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
++ IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
++ first->bytecount;
++ olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
++
++ desc = IGC_TX_DESC(ring, ring->next_to_use);
++ desc->read.cmd_type_len = cpu_to_le32(cmd_type);
++ desc->read.olinfo_status = cpu_to_le32(olinfo_status);
++ desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma));
++
++ netdev_tx_sent_queue(txring_txq(ring), skb->len);
++
++ first->next_to_watch = desc;
++
++ ring->next_to_use++;
++ if (ring->next_to_use == ring->count)
++ ring->next_to_use = 0;
++
++ return 0;
++}
++
++#define IGC_EMPTY_FRAME_SIZE 60
++
+ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
+- struct igc_tx_buffer *first,
++ __le32 launch_time, bool first_flag,
+ u32 vlan_macip_lens, u32 type_tucmd,
+ u32 mss_l4len_idx)
+ {
+@@ -1033,26 +1165,17 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
+ if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+ mss_l4len_idx |= tx_ring->reg_idx << 4;
+
++ if (first_flag)
++ mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST;
++
+ context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
+ context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
+ context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
+-
+- /* We assume there is always a valid Tx time available. Invalid times
+- * should have been handled by the upper layers.
+- */
+- if (tx_ring->launchtime_enable) {
+- struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
+- ktime_t txtime = first->skb->tstamp;
+-
+- skb_txtime_consumed(first->skb);
+- context_desc->launch_time = igc_tx_launchtime(adapter,
+- txtime);
+- } else {
+- context_desc->launch_time = 0;
+- }
++ context_desc->launch_time = launch_time;
+ }
+
+-static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
++static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first,
++ __le32 launch_time, bool first_flag)
+ {
+ struct sk_buff *skb = first->skb;
+ u32 vlan_macip_lens = 0;
+@@ -1092,7 +1215,8 @@ no_csum:
+ vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
+ vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
+
+- igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
++ igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
++ vlan_macip_lens, type_tucmd, 0);
+ }
+
+ static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+@@ -1316,6 +1440,7 @@ dma_error:
+
+ static int igc_tso(struct igc_ring *tx_ring,
+ struct igc_tx_buffer *first,
++ __le32 launch_time, bool first_flag,
+ u8 *hdr_len)
+ {
+ u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+@@ -1402,8 +1527,8 @@ static int igc_tso(struct igc_ring *tx_ring,
+ vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
+ vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
+
+- igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
+- type_tucmd, mss_l4len_idx);
++ igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
++ vlan_macip_lens, type_tucmd, mss_l4len_idx);
+
+ return 1;
+ }
+@@ -1411,11 +1536,14 @@ static int igc_tso(struct igc_ring *tx_ring,
+ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+ struct igc_ring *tx_ring)
+ {
++ bool first_flag = false, insert_empty = false;
+ u16 count = TXD_USE_COUNT(skb_headlen(skb));
+ __be16 protocol = vlan_get_protocol(skb);
+ struct igc_tx_buffer *first;
++ __le32 launch_time = 0;
+ u32 tx_flags = 0;
+ unsigned short f;
++ ktime_t txtime;
+ u8 hdr_len = 0;
+ int tso = 0;
+
+@@ -1429,11 +1557,40 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+ count += TXD_USE_COUNT(skb_frag_size(
+ &skb_shinfo(skb)->frags[f]));
+
+- if (igc_maybe_stop_tx(tx_ring, count + 3)) {
++ if (igc_maybe_stop_tx(tx_ring, count + 5)) {
+ /* this is a hard error */
+ return NETDEV_TX_BUSY;
+ }
+
++ if (!tx_ring->launchtime_enable)
++ goto done;
++
++ txtime = skb->tstamp;
++ skb->tstamp = ktime_set(0, 0);
++ launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty);
++
++ if (insert_empty) {
++ struct igc_tx_buffer *empty_info;
++ struct sk_buff *empty;
++ void *data;
++
++ empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
++ empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
++ if (!empty)
++ goto done;
++
++ data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
++ memset(data, 0, IGC_EMPTY_FRAME_SIZE);
++
++ igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
++
++ if (igc_init_tx_empty_descriptor(tx_ring,
++ empty,
++ empty_info) < 0)
++ dev_kfree_skb_any(empty);
++ }
++
++done:
+ /* record the location of the first descriptor for this packet */
+ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ first->type = IGC_TX_BUFFER_TYPE_SKB;
+@@ -1448,9 +1605,10 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+ * the other timer registers before skipping the
+ * timestamping request.
+ */
+- if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
+- !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
+- &adapter->state)) {
++ unsigned long flags;
++
++ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
++ if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !adapter->ptp_tx_skb) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ tx_flags |= IGC_TX_FLAGS_TSTAMP;
+
+@@ -1459,6 +1617,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+ } else {
+ adapter->tx_hwtstamp_skipped++;
+ }
++
++ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+ }
+
+ if (skb_vlan_tag_present(skb)) {
+@@ -1470,11 +1630,11 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+ first->tx_flags = tx_flags;
+ first->protocol = protocol;
+
+- tso = igc_tso(tx_ring, first, &hdr_len);
++ tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len);
+ if (tso < 0)
+ goto out_drop;
+ else if (!tso)
+- igc_tx_csum(tx_ring, first);
++ igc_tx_csum(tx_ring, first, launch_time, first_flag);
+
+ igc_tx_map(tx_ring, first, hdr_len);
+
+@@ -1555,14 +1715,36 @@ static void igc_rx_checksum(struct igc_ring *ring,
+ le32_to_cpu(rx_desc->wb.upper.status_error));
+ }
+
++/* Mapping HW RSS Type to enum pkt_hash_types */
++static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
++ [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2,
++ [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4,
++ [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3,
++ [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4,
++ [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3,
++ [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3,
++ [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
++ [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4,
++ [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4,
++ [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
++ [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */
++ [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */
++ [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */
++ [13] = PKT_HASH_TYPE_NONE,
++ [14] = PKT_HASH_TYPE_NONE,
++ [15] = PKT_HASH_TYPE_NONE,
++};
++
+ static inline void igc_rx_hash(struct igc_ring *ring,
+ union igc_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+ {
+- if (ring->netdev->features & NETIF_F_RXHASH)
+- skb_set_hash(skb,
+- le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+- PKT_HASH_TYPE_L3);
++ if (ring->netdev->features & NETIF_F_RXHASH) {
++ u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
++ u32 rss_type = igc_rss_type(rx_desc);
++
++ skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
++ }
+ }
+
+ static void igc_rx_vlan(struct igc_ring *rx_ring,
+@@ -2434,21 +2616,24 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
+ struct xdp_buff *xdp)
+ {
++ unsigned int totalsize = xdp->data_end - xdp->data_meta;
+ unsigned int metasize = xdp->data - xdp->data_meta;
+- unsigned int datasize = xdp->data_end - xdp->data;
+- unsigned int totalsize = metasize + datasize;
+ struct sk_buff *skb;
+
+- skb = __napi_alloc_skb(&ring->q_vector->napi,
+- xdp->data_end - xdp->data_hard_start,
++ net_prefetch(xdp->data_meta);
++
++ skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+
+- skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+- memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
+- if (metasize)
++ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
++ ALIGN(totalsize, sizeof(long)));
++
++ if (metasize) {
+ skb_metadata_set(skb, metasize);
++ __skb_pull(skb, metasize);
++ }
+
+ return skb;
+ }
+@@ -2593,15 +2778,15 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
+ struct netdev_queue *nq = txring_txq(ring);
+ union igc_adv_tx_desc *tx_desc = NULL;
+ int cpu = smp_processor_id();
+- u16 ntu = ring->next_to_use;
+ struct xdp_desc xdp_desc;
+- u16 budget;
++ u16 budget, ntu;
+
+ if (!netif_carrier_ok(ring->netdev))
+ return;
+
+ __netif_tx_lock(nq, cpu);
+
++ ntu = ring->next_to_use;
+ budget = igc_desc_unused(ring);
+
+ while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+@@ -2768,7 +2953,9 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
+ if (tx_buffer->next_to_watch &&
+ time_after(jiffies, tx_buffer->time_stamp +
+ (adapter->tx_timeout_factor * HZ)) &&
+- !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
++ !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) &&
++ (rd32(IGC_TDH(tx_ring->reg_idx)) !=
++ readl(tx_ring->tail))) {
+ /* detected Tx unit hang */
+ netdev_err(tx_ring->netdev,
+ "Detected Tx Unit Hang\n"
+@@ -4818,6 +5005,7 @@ void igc_down(struct igc_adapter *adapter)
+ /* clear VLAN promisc flag so VFTA will be updated if necessary */
+ adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
+
++ igc_disable_all_tx_rings_hw(adapter);
+ igc_clean_all_tx_rings(adapter);
+ igc_clean_all_rx_rings(adapter);
+ }
+@@ -4895,6 +5083,24 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
+ return 0;
+ }
+
++/**
++ * igc_tx_timeout - Respond to a Tx Hang
++ * @netdev: network interface device structure
++ * @txqueue: queue number that timed out
++ **/
++static void igc_tx_timeout(struct net_device *netdev,
++ unsigned int __always_unused txqueue)
++{
++ struct igc_adapter *adapter = netdev_priv(netdev);
++ struct igc_hw *hw = &adapter->hw;
++
++ /* Do the reset outside of interrupt context */
++ adapter->tx_timeout_count++;
++ schedule_work(&adapter->reset_task);
++ wr32(IGC_EICS,
++ (adapter->eims_enable_mask & ~adapter->eims_other));
++}
++
+ /**
+ * igc_get_stats64 - Get System Network Statistics
+ * @netdev: network interface device structure
+@@ -5322,7 +5528,7 @@ static void igc_watchdog_task(struct work_struct *work)
+ case SPEED_100:
+ case SPEED_1000:
+ case SPEED_2500:
+- adapter->tx_timeout_factor = 7;
++ adapter->tx_timeout_factor = 1;
+ break;
+ }
+
+@@ -5466,6 +5672,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data)
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
++ if (icr & IGC_ICR_TS)
++ igc_tsync_interrupt(adapter);
++
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+@@ -5509,6 +5718,9 @@ static irqreturn_t igc_intr(int irq, void *data)
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
++ if (icr & IGC_ICR_TS)
++ igc_tsync_interrupt(adapter);
++
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+@@ -5789,9 +6001,10 @@ static bool validate_schedule(struct igc_adapter *adapter,
+ return false;
+
+ for (n = 0; n < qopt->num_entries; n++) {
+- const struct tc_taprio_sched_entry *e;
++ const struct tc_taprio_sched_entry *e, *prev;
+ int i;
+
++ prev = n ? &qopt->entries[n - 1] : NULL;
+ e = &qopt->entries[n];
+
+ /* i225 only supports "global" frame preemption
+@@ -5800,13 +6013,18 @@ static bool validate_schedule(struct igc_adapter *adapter,
+ if (e->command != TC_TAPRIO_CMD_SET_GATES)
+ return false;
+
+- for (i = 0; i < adapter->num_tx_queues; i++) {
+- if (e->gate_mask & BIT(i))
++ for (i = 0; i < adapter->num_tx_queues; i++)
++ if (e->gate_mask & BIT(i)) {
+ queue_uses[i]++;
+
+- if (queue_uses[i] > 1)
+- return false;
+- }
++ /* There are limitations: A single queue cannot
++ * be opened and closed multiple times per cycle
++ * unless the gate stays open. Check for it.
++ */
++ if (queue_uses[i] > 1 &&
++ !(prev->gate_mask & BIT(i)))
++ return false;
++ }
+ }
+
+ return true;
+@@ -5848,12 +6066,19 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+ struct tc_taprio_qopt_offload *qopt)
+ {
++ bool queue_configured[IGC_MAX_TX_QUEUES] = { };
+ u32 start_time = 0, end_time = 0;
+ size_t n;
++ int i;
++
++ adapter->qbv_enable = qopt->enable;
+
+ if (!qopt->enable)
+ return igc_tsn_clear_schedule(adapter);
+
++ if (qopt->base_time < 0)
++ return -ERANGE;
++
+ if (adapter->base_time)
+ return -EALREADY;
+
+@@ -5863,28 +6088,58 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+ adapter->cycle_time = qopt->cycle_time;
+ adapter->base_time = qopt->base_time;
+
+- /* FIXME: be a little smarter about cases when the gate for a
+- * queue stays open for more than one entry.
+- */
+ for (n = 0; n < qopt->num_entries; n++) {
+ struct tc_taprio_sched_entry *e = &qopt->entries[n];
+- int i;
+
+ end_time += e->interval;
+
++ /* If any of the conditions below are true, we need to manually
++ * control the end time of the cycle.
++ * 1. Qbv users can specify a cycle time that is not equal
++ * to the total GCL intervals. Hence, recalculation is
++ * necessary here to exclude the time interval that
++ * exceeds the cycle time.
++ * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2,
++ * once the end of the list is reached, it will switch
++ * to the END_OF_CYCLE state and leave the gates in the
++ * same state until the next cycle is started.
++ */
++ if (end_time > adapter->cycle_time ||
++ n + 1 == qopt->num_entries)
++ end_time = adapter->cycle_time;
++
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *ring = adapter->tx_ring[i];
+
+ if (!(e->gate_mask & BIT(i)))
+ continue;
+
+- ring->start_time = start_time;
++ /* Check whether a queue stays open for more than one
++ * entry. If so, keep the start and advance the end
++ * time.
++ */
++ if (!queue_configured[i])
++ ring->start_time = start_time;
+ ring->end_time = end_time;
++
++ queue_configured[i] = true;
+ }
+
+ start_time += e->interval;
+ }
+
++ /* Check whether a queue gets configured.
++ * If not, set the start and end time to be end time.
++ */
++ for (i = 0; i < adapter->num_tx_queues; i++) {
++ if (!queue_configured[i]) {
++ struct igc_ring *ring = adapter->tx_ring[i];
++
++ ring->start_time = end_time;
++ ring->end_time = end_time;
++ }
++ }
++
+ return 0;
+ }
+
+@@ -6091,6 +6346,7 @@ static const struct net_device_ops igc_netdev_ops = {
+ .ndo_set_rx_mode = igc_set_rx_mode,
+ .ndo_set_mac_address = igc_set_mac,
+ .ndo_change_mtu = igc_change_mtu,
++ .ndo_tx_timeout = igc_tx_timeout,
+ .ndo_get_stats64 = igc_get_stats64,
+ .ndo_fix_features = igc_fix_features,
+ .ndo_set_features = igc_set_features,
+@@ -6147,6 +6403,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
+ u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+ u32 value = 0;
+
++ if (IGC_REMOVED(hw_addr))
++ return ~value;
++
+ value = readl(&hw_addr[reg]);
+
+ /* reads should not return all F's */
+@@ -6321,6 +6580,7 @@ static int igc_probe(struct pci_dev *pdev,
+ netdev->features |= NETIF_F_TSO;
+ netdev->features |= NETIF_F_TSO6;
+ netdev->features |= NETIF_F_TSO_ECN;
++ netdev->features |= NETIF_F_RXHASH;
+ netdev->features |= NETIF_F_RXCSUM;
+ netdev->features |= NETIF_F_HW_CSUM;
+ netdev->features |= NETIF_F_SCTP_CRC;
+@@ -6895,18 +7155,6 @@ void igc_enable_rx_ring(struct igc_ring *ring)
+ igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+ }
+
+-static void igc_disable_tx_ring_hw(struct igc_ring *ring)
+-{
+- struct igc_hw *hw = &ring->q_vector->adapter->hw;
+- u8 idx = ring->reg_idx;
+- u32 txdctl;
+-
+- txdctl = rd32(IGC_TXDCTL(idx));
+- txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
+- txdctl |= IGC_TXDCTL_SWFLUSH;
+- wr32(IGC_TXDCTL(idx), txdctl);
+-}
+-
+ void igc_disable_tx_ring(struct igc_ring *ring)
+ {
+ igc_disable_tx_ring_hw(ring);
+diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
+index 5cad31c3c7b09..6961f65d36b9a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_phy.c
++++ b/drivers/net/ethernet/intel/igc/igc_phy.c
+@@ -581,7 +581,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
+ * the lower time out
+ */
+ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+- usleep_range(500, 1000);
++ udelay(50);
+ mdic = rd32(IGC_MDIC);
+ if (mdic & IGC_MDIC_READY)
+ break;
+@@ -638,7 +638,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
+ * the lower time out
+ */
+ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+- usleep_range(500, 1000);
++ udelay(50);
+ mdic = rd32(IGC_MDIC);
+ if (mdic & IGC_MDIC_READY)
+ break;
+@@ -746,8 +746,6 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data)
+ if (ret_val)
+ return ret_val;
+ ret_val = igc_write_phy_reg_mdic(hw, offset, data);
+- if (ret_val)
+- return ret_val;
+ hw->phy.ops.release(hw);
+ } else {
+ ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr,
+@@ -779,8 +777,6 @@ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data)
+ if (ret_val)
+ return ret_val;
+ ret_val = igc_read_phy_reg_mdic(hw, offset, data);
+- if (ret_val)
+- return ret_val;
+ hw->phy.ops.release(hw);
+ } else {
+ ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr,
+diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
+index 0f021909b430a..556750b61c98f 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
++++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
+@@ -323,7 +323,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+ ts = ns_to_timespec64(ns);
+ if (rq->perout.index == 1) {
+ if (use_freq) {
+- tsauxc_mask = IGC_TSAUXC_EN_CLK1;
++ tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1;
+ tsim_mask = 0;
+ } else {
+ tsauxc_mask = IGC_TSAUXC_EN_TT1;
+@@ -334,7 +334,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+ freqout = IGC_FREQOUT1;
+ } else {
+ if (use_freq) {
+- tsauxc_mask = IGC_TSAUXC_EN_CLK0;
++ tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0;
+ tsim_mask = 0;
+ } else {
+ tsauxc_mask = IGC_TSAUXC_EN_TT0;
+@@ -348,23 +348,44 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+ tsauxc = rd32(IGC_TSAUXC);
+ tsim = rd32(IGC_TSIM);
+ if (rq->perout.index == 1) {
+- tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1);
++ tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 |
++ IGC_TSAUXC_ST1);
+ tsim &= ~IGC_TSICR_TT1;
+ } else {
+- tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0);
++ tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 |
++ IGC_TSAUXC_ST0);
+ tsim &= ~IGC_TSICR_TT0;
+ }
+ if (on) {
++ struct timespec64 safe_start;
+ int i = rq->perout.index;
+
+ igc_pin_perout(igc, i, pin, use_freq);
+- igc->perout[i].start.tv_sec = rq->perout.start.sec;
++ igc_ptp_read(igc, &safe_start);
++
++ /* PPS output start time is triggered by Target time(TT)
++ * register. Programming any past time value into TT
++ * register will cause PPS to never start. Need to make
++ * sure we program the TT register a time ahead in
++ * future. There isn't a stringent need to fire PPS out
++ * right away. Adding +2 seconds should take care of
++ * corner cases. Let's say if the SYSTIML is close to
++ * wrap up and the timer keeps ticking as we program the
++ * register, adding +2seconds is safe bet.
++ */
++ safe_start.tv_sec += 2;
++
++ if (rq->perout.start.sec < safe_start.tv_sec)
++ igc->perout[i].start.tv_sec = safe_start.tv_sec;
++ else
++ igc->perout[i].start.tv_sec = rq->perout.start.sec;
+ igc->perout[i].start.tv_nsec = rq->perout.start.nsec;
+ igc->perout[i].period.tv_sec = ts.tv_sec;
+ igc->perout[i].period.tv_nsec = ts.tv_nsec;
+- wr32(trgttimh, rq->perout.start.sec);
++ wr32(trgttimh, (u32)igc->perout[i].start.tv_sec);
+ /* For now, always select timer 0 as source. */
+- wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
++ wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec |
++ IGC_TT_IO_TIMER_SEL_SYSTIM0));
+ if (use_freq)
+ wr32(freqout, ns);
+ tsauxc |= tsauxc_mask;
+@@ -416,10 +437,12 @@ static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
++ *
++ * Returns 0 on success.
+ **/
+-static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
+- struct skb_shared_hwtstamps *hwtstamps,
+- u64 systim)
++static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
++ struct skb_shared_hwtstamps *hwtstamps,
++ u64 systim)
+ {
+ switch (adapter->hw.mac.type) {
+ case igc_i225:
+@@ -429,8 +452,9 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
+ systim & 0xFFFFFFFF);
+ break;
+ default:
+- break;
++ return -EINVAL;
+ }
++ return 0;
+ }
+
+ /**
+@@ -603,6 +627,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
+ return 0;
+ }
+
++/* Requires adapter->ptp_tx_lock held by caller. */
+ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
+ {
+ struct igc_hw *hw = &adapter->hw;
+@@ -610,7 +635,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ adapter->tx_hwtstamp_timeouts++;
+- clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+ /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */
+ rd32(IGC_TXSTMPH);
+ netdev_warn(adapter->netdev, "Tx timestamp timeout\n");
+@@ -618,20 +642,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
+
+ void igc_ptp_tx_hang(struct igc_adapter *adapter)
+ {
+- bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
+- IGC_PTP_TX_TIMEOUT);
++ unsigned long flags;
+
+- if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
+- return;
++ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+
+- /* If we haven't received a timestamp within the timeout, it is
+- * reasonable to assume that it will never occur, so we can unlock the
+- * timestamp bit when this occurs.
+- */
+- if (timeout) {
+- cancel_work_sync(&adapter->ptp_tx_work);
+- igc_ptp_tx_timeout(adapter);
+- }
++ if (!adapter->ptp_tx_skb)
++ goto unlock;
++
++ if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT))
++ goto unlock;
++
++ igc_ptp_tx_timeout(adapter);
++
++unlock:
++ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+ }
+
+ /**
+@@ -641,6 +665,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter)
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
++ *
++ * Context: Expects adapter->ptp_tx_lock to be held by caller.
+ */
+ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
+ {
+@@ -655,7 +681,8 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
+
+ regval = rd32(IGC_TXSTMPL);
+ regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+- igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
++ if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval))
++ return;
+
+ switch (adapter->link_speed) {
+ case SPEED_10:
+@@ -675,13 +702,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
+ shhwtstamps.hwtstamp =
+ ktime_add_ns(shhwtstamps.hwtstamp, adjust);
+
+- /* Clear the lock early before calling skb_tstamp_tx so that
+- * applications are not woken up before the lock bit is clear. We use
+- * a copy of the skb pointer to ensure other threads can't change it
+- * while we're notifying the stack.
+- */
+ adapter->ptp_tx_skb = NULL;
+- clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+
+ /* Notify the stack and free the skb after we've unlocked */
+ skb_tstamp_tx(skb, &shhwtstamps);
+@@ -692,24 +713,33 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
+ * igc_ptp_tx_work
+ * @work: pointer to work struct
+ *
+- * This work function polls the TSYNCTXCTL valid bit to determine when a
+- * timestamp has been taken for the current stored skb.
++ * This work function checks the TSYNCTXCTL valid bit to determine when
++ * a timestamp has been taken for the current stored skb.
+ */
+ static void igc_ptp_tx_work(struct work_struct *work)
+ {
+ struct igc_adapter *adapter = container_of(work, struct igc_adapter,
+ ptp_tx_work);
+ struct igc_hw *hw = &adapter->hw;
++ unsigned long flags;
+ u32 tsynctxctl;
+
+- if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
+- return;
++ spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
++
++ if (!adapter->ptp_tx_skb)
++ goto unlock;
+
+ tsynctxctl = rd32(IGC_TSYNCTXCTL);
+- if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0)))
+- return;
++ tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0;
++ if (!tsynctxctl) {
++ WARN_ONCE(1, "Received a TSTAMP interrupt but no TSTAMP is ready.\n");
++ goto unlock;
++ }
+
+ igc_ptp_tx_hwtstamp(adapter);
++
++unlock:
++ spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+ }
+
+ /**
+@@ -768,12 +798,25 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
+ */
+ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter)
+ {
+- return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false;
++ if (!IS_ENABLED(CONFIG_X86_TSC))
++ return false;
++
++ /* FIXME: it was noticed that enabling support for PCIe PTM in
++ * some i225-V models could cause lockups when bringing the
++ * interface up/down. There should be no downsides to
++ * disabling crosstimestamping support for i225-V, as it
++ * doesn't have any PTP support. That way we gain some time
++ * while root causing the issue.
++ */
++ if (adapter->pdev->device == IGC_DEV_ID_I225_V)
++ return false;
++
++ return pcie_ptm_enabled(adapter->pdev);
+ }
+
+ static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp)
+ {
+-#if IS_ENABLED(CONFIG_X86_TSC)
++#if IS_ENABLED(CONFIG_X86_TSC) && !defined(CONFIG_UML)
+ return convert_art_ns_to_tsc(tstamp);
+ #else
+ return (struct system_counterval_t) { };
+@@ -945,6 +988,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
+ return;
+ }
+
++ spin_lock_init(&adapter->ptp_tx_lock);
+ spin_lock_init(&adapter->tmreg_lock);
+ INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work);
+
+@@ -983,6 +1027,17 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter)
+ igc_ptp_write_i225(adapter, &ts);
+ }
+
++static void igc_ptm_stop(struct igc_adapter *adapter)
++{
++ struct igc_hw *hw = &adapter->hw;
++ u32 ctrl;
++
++ ctrl = rd32(IGC_PTM_CTRL);
++ ctrl &= ~IGC_PTM_CTRL_EN;
++
++ wr32(IGC_PTM_CTRL, ctrl);
++}
++
+ /**
+ * igc_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
+@@ -998,10 +1053,11 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
+ cancel_work_sync(&adapter->ptp_tx_work);
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+- clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+
+- if (pci_device_is_present(adapter->pdev))
++ if (pci_device_is_present(adapter->pdev)) {
+ igc_ptp_time_save(adapter);
++ igc_ptm_stop(adapter);
++ }
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
+index e197a33d93a03..026c3b65fc37a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_regs.h
++++ b/drivers/net/ethernet/intel/igc/igc_regs.h
+@@ -306,7 +306,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg);
+ #define wr32(reg, val) \
+ do { \
+ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+- writel((val), &hw_addr[(reg)]); \
++ if (!IGC_REMOVED(hw_addr)) \
++ writel((val), &hw_addr[(reg)]); \
+ } while (0)
+
+ #define rd32(reg) (igc_rd32(hw, reg))
+@@ -318,4 +319,6 @@ do { \
+
+ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+
++#define IGC_REMOVED(h) unlikely(!(h))
++
+ #endif
+diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
+index 0fce22de2ab85..356c7455c5cee 100644
+--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
+@@ -36,7 +36,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
+ {
+ unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED;
+
+- if (adapter->base_time)
++ if (adapter->qbv_enable)
+ new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+ if (is_any_launchtime(adapter))
+@@ -110,15 +110,8 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+ wr32(IGC_STQT(i), ring->start_time);
+ wr32(IGC_ENDQT(i), ring->end_time);
+
+- if (adapter->base_time) {
+- /* If we have a base_time we are in "taprio"
+- * mode and we need to be strict about the
+- * cycles: only transmit a packet if it can be
+- * completed during that cycle.
+- */
+- txqctl |= IGC_TXQCTL_STRICT_CYCLE |
+- IGC_TXQCTL_STRICT_END;
+- }
++ txqctl |= IGC_TXQCTL_STRICT_CYCLE |
++ IGC_TXQCTL_STRICT_END;
+
+ if (ring->launchtime_enable)
+ txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+index a604552fa634e..737590a0d849e 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+@@ -67,6 +67,8 @@
+ #define IXGBE_RXBUFFER_4K 4096
+ #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */
+
++#define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
++
+ /* Attempt to maximize the headroom available for incoming frames. We
+ * use a 2K buffer for receives and need 1536/1534 to store the data for
+ * the frame. This leaves us with 512 bytes of room. From that we need
+@@ -770,6 +772,7 @@ struct ixgbe_adapter {
+ #ifdef CONFIG_IXGBE_IPSEC
+ struct ixgbe_ipsec *ipsec;
+ #endif /* CONFIG_IXGBE_IPSEC */
++ spinlock_t vfs_lock;
+ };
+
+ static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+index beda8e0ef7d42..c829cb65171c7 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+@@ -2633,6 +2633,14 @@ static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter,
+ return 0;
+ }
+
++static int ixgbe_rss_indir_tbl_max(struct ixgbe_adapter *adapter)
++{
++ if (adapter->hw.mac.type < ixgbe_mac_X550)
++ return 16;
++ else
++ return 64;
++}
++
+ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+ {
+@@ -2641,7 +2649,8 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+- cmd->data = adapter->num_rx_queues;
++ cmd->data = min_t(int, adapter->num_rx_queues,
++ ixgbe_rss_indir_tbl_max(adapter));
+ ret = 0;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+@@ -3043,14 +3052,6 @@ static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+ return ret;
+ }
+
+-static int ixgbe_rss_indir_tbl_max(struct ixgbe_adapter *adapter)
+-{
+- if (adapter->hw.mac.type < ixgbe_mac_X550)
+- return 16;
+- else
+- return 64;
+-}
+-
+ static u32 ixgbe_get_rxfh_key_size(struct net_device *netdev)
+ {
+ return IXGBE_RSS_KEY_SIZE;
+@@ -3099,8 +3100,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
+ int i;
+ u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
+
+- if (hfunc)
+- return -EINVAL;
++ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
++ return -EOPNOTSUPP;
+
+ /* Fill out the redirection table */
+ if (indir) {
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+index e596e1a9fc757..69d11ff7677d6 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+@@ -903,7 +903,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+ /* Tx IPsec offload doesn't seem to work on this
+ * device, so block these requests for now.
+ */
+- if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) {
++ sam->flags = sam->flags & ~XFRM_OFFLOAD_IPV6;
++ if (sam->flags != XFRM_OFFLOAD_INBOUND) {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+index 13c4782b920a7..af824370a2f6f 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -5526,6 +5526,10 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw)
+ if (!speed && hw->mac.ops.get_link_capabilities) {
+ ret = hw->mac.ops.get_link_capabilities(hw, &speed,
+ &autoneg);
++ /* remove NBASE-T speeds from default autonegotiation
++ * to accommodate broken network switches in the field
++ * which cannot cope with advertised NBASE-T speeds
++ */
+ speed &= ~(IXGBE_LINK_SPEED_5GB_FULL |
+ IXGBE_LINK_SPEED_2_5GB_FULL);
+ }
+@@ -6393,6 +6397,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
+ /* n-tuple support exists, always init our spinlock */
+ spin_lock_init(&adapter->fdir_perfect_lock);
+
++ /* init spinlock to avoid concurrency of VF resources */
++ spin_lock_init(&adapter->vfs_lock);
++
+ #ifdef CONFIG_IXGBE_DCB
+ ixgbe_init_dcb(adapter);
+ #endif
+@@ -6722,6 +6729,18 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
+ ixgbe_free_rx_resources(adapter->rx_ring[i]);
+ }
+
++/**
++ * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP
++ * @adapter: device handle, pointer to adapter
++ */
++static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter)
++{
++ if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
++ return IXGBE_RXBUFFER_2K;
++ else
++ return IXGBE_RXBUFFER_3K;
++}
++
+ /**
+ * ixgbe_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+@@ -6733,18 +6752,12 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
+ {
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+- if (adapter->xdp_prog) {
+- int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN +
+- VLAN_HLEN;
+- int i;
+-
+- for (i = 0; i < adapter->num_rx_queues; i++) {
+- struct ixgbe_ring *ring = adapter->rx_ring[i];
++ if (ixgbe_enabled_xdp_adapter(adapter)) {
++ int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD;
+
+- if (new_frame_size > ixgbe_rx_bufsz(ring)) {
+- e_warn(probe, "Requested MTU size is not supported with XDP\n");
+- return -EINVAL;
+- }
++ if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) {
++ e_warn(probe, "Requested MTU size is not supported with XDP\n");
++ return -EINVAL;
+ }
+ }
+
+@@ -8385,7 +8398,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
+ struct ixgbe_adapter *adapter = q_vector->adapter;
+
+ if (unlikely(skb_tail_pointer(skb) < hdr.network +
+- VXLAN_HEADROOM))
++ vxlan_headroom(0)))
+ return;
+
+ /* verify the port is recognized as VXLAN */
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+index 24aa97f993ca1..123dca9ce4683 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+@@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn)
+ rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn);
+ if (rp_pdev && rp_pdev->subordinate) {
+ bus = rp_pdev->subordinate->number;
++ pci_dev_put(rp_pdev);
+ return pci_get_domain_bus_and_slot(0, bus, 0);
+ }
+
++ pci_dev_put(rp_pdev);
+ return NULL;
+ }
+
+@@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw)
+ struct ixgbe_adapter *adapter = hw->back;
+ struct pci_dev *pdev = adapter->pdev;
+ struct pci_dev *func0_pdev;
++ bool has_mii = false;
+
+ /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices
+ * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0
+@@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw)
+ func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0));
+ if (func0_pdev) {
+ if (func0_pdev == pdev)
+- return true;
+- else
+- return false;
++ has_mii = true;
++ goto out;
+ }
+ func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0));
+ if (func0_pdev == pdev)
+- return true;
++ has_mii = true;
+
+- return false;
++out:
++ pci_dev_put(func0_pdev);
++ return has_mii;
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+index 23ddfd79fc8b6..affd132534eab 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+@@ -989,6 +989,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+ u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED;
+ u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED;
+ u32 tsync_rx_mtrl = PTP_EV_PORT << 16;
++ u32 aflags = adapter->flags;
+ bool is_l2 = false;
+ u32 regval;
+
+@@ -1010,20 +1011,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+ case HWTSTAMP_FILTER_NONE:
+ tsync_rx_ctl = 0;
+ tsync_rx_mtrl = 0;
+- adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
++ aflags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+ tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG;
+- adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
++ aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+ tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG;
+- adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
++ aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+@@ -1037,8 +1038,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+ tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
+ is_l2 = true;
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+- adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
++ aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_NTP_ALL:
+@@ -1049,7 +1050,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+ if (hw->mac.type >= ixgbe_mac_X550) {
+ tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL;
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+- adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
++ aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+ break;
+ }
+ fallthrough;
+@@ -1060,8 +1061,6 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+ * Delay_Req messages and hardware does not support
+ * timestamping all packets => return error
+ */
+- adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+ config->rx_filter = HWTSTAMP_FILTER_NONE;
+ return -ERANGE;
+ }
+@@ -1093,8 +1092,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+ IXGBE_TSYNCRXCTL_TYPE_ALL |
+ IXGBE_TSYNCRXCTL_TSIP_UT_EN;
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+- adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+- adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER;
++ aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
++ aflags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER;
+ is_l2 = true;
+ break;
+ default:
+@@ -1127,6 +1126,9 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+
+ IXGBE_WRITE_FLUSH(hw);
+
++ /* configure adapter flags only when HW is actually configured */
++ adapter->flags = aflags;
++
+ /* clear TX/RX time stamp registers, just to be sure */
+ ixgbe_ptp_clear_tx_timestamp(adapter);
+ IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
+@@ -1212,7 +1214,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+ struct cyclecounter cc;
+ unsigned long flags;
+ u32 incval = 0;
+- u32 tsauxc = 0;
+ u32 fuse0 = 0;
+
+ /* For some of the boards below this mask is technically incorrect.
+@@ -1247,18 +1248,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+ case ixgbe_mac_x550em_a:
+ case ixgbe_mac_X550:
+ cc.read = ixgbe_ptp_read_X550;
+-
+- /* enable SYSTIME counter */
+- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
+- IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+- tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
+- IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
+- tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
+- IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
+- IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
+-
+- IXGBE_WRITE_FLUSH(hw);
+ break;
+ case ixgbe_mac_X540:
+ cc.read = ixgbe_ptp_read_82599;
+@@ -1290,6 +1279,50 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+ }
+
++/**
++ * ixgbe_ptp_init_systime - Initialize SYSTIME registers
++ * @adapter: the ixgbe private board structure
++ *
++ * Initialize and start the SYSTIME registers.
++ */
++static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter)
++{
++ struct ixgbe_hw *hw = &adapter->hw;
++ u32 tsauxc;
++
++ switch (hw->mac.type) {
++ case ixgbe_mac_X550EM_x:
++ case ixgbe_mac_x550em_a:
++ case ixgbe_mac_X550:
++ tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
++
++ /* Reset SYSTIME registers to 0 */
++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
++
++ /* Reset interrupt settings */
++ IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
++ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
++
++ /* Activate the SYSTIME counter */
++ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
++ tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
++ break;
++ case ixgbe_mac_X540:
++ case ixgbe_mac_82599EB:
++ /* Reset SYSTIME registers to 0 */
++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
++ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
++ break;
++ default:
++ /* Other devices aren't supported */
++ return;
++ };
++
++ IXGBE_WRITE_FLUSH(hw);
++}
++
+ /**
+ * ixgbe_ptp_reset
+ * @adapter: the ixgbe private board structure
+@@ -1316,6 +1349,8 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
+
+ ixgbe_ptp_start_cyclecounter(adapter);
+
++ ixgbe_ptp_init_systime(adapter);
++
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
+ ktime_to_ns(ktime_get_real()));
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+index 214a38de3f415..0078ae5926164 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+@@ -204,10 +204,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs)
+ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
+ {
+ unsigned int num_vfs = adapter->num_vfs, vf;
++ unsigned long flags;
+ int rss;
+
++ spin_lock_irqsave(&adapter->vfs_lock, flags);
+ /* set num VFs to 0 to prevent access to vfinfo */
+ adapter->num_vfs = 0;
++ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+
+ /* put the reference to all of the vf devices */
+ for (vf = 0; vf < num_vfs; ++vf) {
+@@ -1157,9 +1160,9 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
+
+ switch (xcast_mode) {
+ case IXGBEVF_XCAST_MODE_NONE:
+- disable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE |
++ disable = IXGBE_VMOLR_ROMPE |
+ IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
+- enable = 0;
++ enable = IXGBE_VMOLR_BAM;
+ break;
+ case IXGBEVF_XCAST_MODE_MULTI:
+ disable = IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
+@@ -1181,9 +1184,9 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
+ return -EPERM;
+ }
+
+- disable = 0;
++ disable = IXGBE_VMOLR_VPE;
+ enable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE |
+- IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
++ IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE;
+ break;
+ default:
+ return -EOPNOTSUPP;
+@@ -1305,8 +1308,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
+ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
+ {
+ struct ixgbe_hw *hw = &adapter->hw;
++ unsigned long flags;
+ u32 vf;
+
++ spin_lock_irqsave(&adapter->vfs_lock, flags);
+ for (vf = 0; vf < adapter->num_vfs; vf++) {
+ /* process any reset requests */
+ if (!ixgbe_check_for_rst(hw, vf))
+@@ -1320,6 +1325,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
+ if (!ixgbe_check_for_ack(hw, vf))
+ ixgbe_rcv_ack_from_vf(adapter, vf);
+ }
++ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+ }
+
+ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter)
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+index 9724ffb165189..e4b50c7781ffa 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+@@ -3405,6 +3405,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
+ /* flush pending Tx transactions */
+ ixgbe_clear_tx_pending(hw);
+
++ /* set MDIO speed before talking to the PHY in case it's the 1st time */
++ ixgbe_set_mdio_speed(hw);
++
+ /* PHY ops must be identified and initialized prior to reset */
+ status = hw->phy.ops.init(hw);
+ if (status == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+index b1d22e4d5ec9c..b399b9c147172 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+@@ -201,26 +201,28 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
+ }
+
+ static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
+- struct ixgbe_rx_buffer *bi)
++ const struct xdp_buff *xdp)
+ {
+- unsigned int metasize = bi->xdp->data - bi->xdp->data_meta;
+- unsigned int datasize = bi->xdp->data_end - bi->xdp->data;
++ unsigned int totalsize = xdp->data_end - xdp->data_meta;
++ unsigned int metasize = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
+
++ net_prefetch(xdp->data_meta);
++
+ /* allocate a skb to store the frags */
+- skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+- bi->xdp->data_end - bi->xdp->data_hard_start,
++ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+
+- skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start);
+- memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize);
+- if (metasize)
++ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
++ ALIGN(totalsize, sizeof(long)));
++
++ if (metasize) {
+ skb_metadata_set(skb, metasize);
++ __skb_pull(skb, metasize);
++ }
+
+- xsk_buff_free(bi->xdp);
+- bi->xdp = NULL;
+ return skb;
+ }
+
+@@ -311,12 +313,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
+ }
+
+ /* XDP_PASS path */
+- skb = ixgbe_construct_skb_zc(rx_ring, bi);
++ skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp);
+ if (!skb) {
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
+ break;
+ }
+
++ xsk_buff_free(bi->xdp);
++ bi->xdp = NULL;
++
+ cleaned_count++;
+ ixgbe_inc_ntc(rx_ring);
+
+@@ -388,12 +393,14 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
+ u32 cmd_type;
+
+ while (budget-- > 0) {
+- if (unlikely(!ixgbe_desc_unused(xdp_ring)) ||
+- !netif_carrier_ok(xdp_ring->netdev)) {
++ if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
+ work_done = false;
+ break;
+ }
+
++ if (!netif_carrier_ok(xdp_ring->netdev))
++ break;
++
+ if (!xsk_tx_peek_desc(pool, &desc))
+ break;
+
+diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+index c714e1ecd3089..3a05e458ded2f 100644
+--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+@@ -1984,14 +1984,15 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+ if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+ return;
+
+- set_ring_build_skb_enabled(rx_ring);
++ if (PAGE_SIZE < 8192)
++ if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB)
++ set_ring_uses_large_buffer(rx_ring);
+
+- if (PAGE_SIZE < 8192) {
+- if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+- return;
++ /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */
++ if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring))
++ return;
+
+- set_ring_uses_large_buffer(rx_ring);
+- }
++ set_ring_build_skb_enabled(rx_ring);
+ }
+
+ /**
+@@ -2540,7 +2541,7 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter)
+ }
+
+ if (is_valid_ether_addr(adapter->hw.mac.addr)) {
+- ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
++ eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+ ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
+ }
+
+@@ -3054,7 +3055,7 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter)
+ else if (is_zero_ether_addr(adapter->hw.mac.addr))
+ dev_info(&pdev->dev,
+ "MAC address not assigned by administrator.\n");
+- ether_addr_copy(netdev->dev_addr, hw->mac.addr);
++ eth_hw_addr_set(netdev, hw->mac.addr);
+ }
+
+ if (!is_valid_ether_addr(netdev->dev_addr)) {
+@@ -4231,7 +4232,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
+
+ ether_addr_copy(hw->mac.addr, addr->sa_data);
+ ether_addr_copy(hw->mac.perm_addr, addr->sa_data);
+- ether_addr_copy(netdev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(netdev, addr->sa_data);
+
+ return 0;
+ }
+@@ -4858,6 +4859,8 @@ static struct pci_driver ixgbevf_driver = {
+ **/
+ static int __init ixgbevf_init_module(void)
+ {
++ int err;
++
+ pr_info("%s\n", ixgbevf_driver_string);
+ pr_info("%s\n", ixgbevf_copyright);
+ ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name);
+@@ -4866,7 +4869,13 @@ static int __init ixgbevf_init_module(void)
+ return -ENOMEM;
+ }
+
+- return pci_register_driver(&ixgbevf_driver);
++ err = pci_register_driver(&ixgbevf_driver);
++ if (err) {
++ destroy_workqueue(ixgbevf_wq);
++ return err;
++ }
++
++ return 0;
+ }
+
+ module_init(ixgbevf_init_module);
+diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
+index 3e9f324f1061f..916e85039b610 100644
+--- a/drivers/net/ethernet/korina.c
++++ b/drivers/net/ethernet/korina.c
+@@ -1297,15 +1297,14 @@ static int korina_probe(struct platform_device *pdev)
+ lp = netdev_priv(dev);
+
+ if (mac_addr)
+- ether_addr_copy(dev->dev_addr, mac_addr);
+- else if (of_get_mac_address(pdev->dev.of_node, dev->dev_addr) < 0)
++ eth_hw_addr_set(dev, mac_addr);
++ else if (of_get_ethdev_address(pdev->dev.of_node, dev) < 0)
+ eth_hw_addr_random(dev);
+
+- clk = devm_clk_get_optional(&pdev->dev, "mdioclk");
++ clk = devm_clk_get_optional_enabled(&pdev->dev, "mdioclk");
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+ if (clk) {
+- clk_prepare_enable(clk);
+ lp->mii_clock_freq = clk_get_rate(clk);
+ } else {
+ lp->mii_clock_freq = 200000000; /* max possible input clk */
+diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
+index 62f8c52121822..057d655d17692 100644
+--- a/drivers/net/ethernet/lantiq_etop.c
++++ b/drivers/net/ethernet/lantiq_etop.c
+@@ -466,7 +466,6 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev)
+ len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len;
+
+ if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) || ch->skb[ch->dma.desc]) {
+- dev_kfree_skb_any(skb);
+ netdev_err(dev, "tx ring full\n");
+ netif_tx_stop_queue(txq);
+ return NETDEV_TX_BUSY;
+diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
+index fb78f17d734fe..fa0ef1852fb76 100644
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
+ skb->protocol = eth_type_trans(skb, net_dev);
+ netif_receive_skb(skb);
+ net_dev->stats.rx_packets++;
+- net_dev->stats.rx_bytes += len - ETH_FCS_LEN;
++ net_dev->stats.rx_bytes += len;
+
+ return 0;
+ }
+@@ -474,7 +474,7 @@ static int xrx200_probe(struct platform_device *pdev)
+ return PTR_ERR(priv->clk);
+ }
+
+- err = of_get_mac_address(np, net_dev->dev_addr);
++ err = of_get_ethdev_address(np, net_dev);
+ if (err)
+ eth_hw_addr_random(net_dev);
+
+diff --git a/drivers/net/ethernet/litex/Kconfig b/drivers/net/ethernet/litex/Kconfig
+index 63bf01d28f0cf..04345b929d8e5 100644
+--- a/drivers/net/ethernet/litex/Kconfig
++++ b/drivers/net/ethernet/litex/Kconfig
+@@ -17,7 +17,7 @@ if NET_VENDOR_LITEX
+
+ config LITEX_LITEETH
+ tristate "LiteX Ethernet support"
+- depends on OF_NET
++ depends on OF && HAS_IOMEM
+ help
+ If you wish to compile a kernel for hardware with a LiteX LiteEth
+ device then you should answer Y to this.
+diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
+index a9bdbf0dcfe1e..24ed6e180c759 100644
+--- a/drivers/net/ethernet/litex/litex_liteeth.c
++++ b/drivers/net/ethernet/litex/litex_liteeth.c
+@@ -266,7 +266,7 @@ static int liteeth_probe(struct platform_device *pdev)
+ priv->tx_base = buf_base + priv->num_rx_slots * priv->slot_size;
+ priv->tx_slot = 0;
+
+- err = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
++ err = of_get_ethdev_address(pdev->dev.of_node, netdev);
+ if (err)
+ eth_hw_addr_random(netdev);
+
+@@ -289,7 +289,6 @@ static int liteeth_remove(struct platform_device *pdev)
+ struct net_device *netdev = platform_get_drvdata(pdev);
+
+ unregister_netdev(netdev);
+- free_netdev(netdev);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
+index 28d5ad296646a..fc67e9d31f6da 100644
+--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
++++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
+@@ -2477,6 +2477,7 @@ out_free:
+ for (i = 0; i < mp->rxq_count; i++)
+ rxq_deinit(mp->rxq + i);
+ out:
++ napi_disable(&mp->napi);
+ free_irq(dev->irq, dev);
+
+ return err;
+@@ -2700,6 +2701,16 @@ MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids);
+
+ static struct platform_device *port_platdev[3];
+
++static void mv643xx_eth_shared_of_remove(void)
++{
++ int n;
++
++ for (n = 0; n < 3; n++) {
++ platform_device_del(port_platdev[n]);
++ port_platdev[n] = NULL;
++ }
++}
++
+ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
+ struct device_node *pnp)
+ {
+@@ -2736,7 +2747,9 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
+ return -EINVAL;
+ }
+
+- of_get_mac_address(pnp, ppd.mac_addr);
++ ret = of_get_mac_address(pnp, ppd.mac_addr);
++ if (ret == -EPROBE_DEFER)
++ return ret;
+
+ mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size);
+ mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr);
+@@ -2800,21 +2813,13 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
+ ret = mv643xx_eth_shared_of_add_port(pdev, pnp);
+ if (ret) {
+ of_node_put(pnp);
++ mv643xx_eth_shared_of_remove();
+ return ret;
+ }
+ }
+ return 0;
+ }
+
+-static void mv643xx_eth_shared_of_remove(void)
+-{
+- int n;
+-
+- for (n = 0; n < 3; n++) {
+- platform_device_del(port_platdev[n]);
+- port_platdev[n] = NULL;
+- }
+-}
+ #else
+ static inline int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
+ {
+diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
+index 62a97c46fba05..ef878973b8597 100644
+--- a/drivers/net/ethernet/marvell/mvmdio.c
++++ b/drivers/net/ethernet/marvell/mvmdio.c
+@@ -429,12 +429,14 @@ static const struct of_device_id orion_mdio_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, orion_mdio_match);
+
++#ifdef CONFIG_ACPI
+ static const struct acpi_device_id orion_mdio_acpi_match[] = {
+ { "MRVL0100", BUS_TYPE_SMI },
+ { "MRVL0101", BUS_TYPE_XSMI },
+ { },
+ };
+ MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match);
++#endif
+
+ static struct platform_driver orion_mdio_driver = {
+ .probe = orion_mdio_probe,
+diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
+index 9d460a2706012..5fa81322a44be 100644
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1474,7 +1474,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
+ */
+ if (txq_number == 1)
+ txq_map = (cpu == pp->rxq_def) ?
+- MVNETA_CPU_TXQ_ACCESS(1) : 0;
++ MVNETA_CPU_TXQ_ACCESS(0) : 0;
+
+ } else {
+ txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+@@ -4162,7 +4162,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
+ /* Use the cpu associated to the rxq when it is online, in all
+ * the other cases, use the cpu 0 which can't be offline.
+ */
+- if (cpu_online(pp->rxq_def))
++ if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def))
+ elected_cpu = pp->rxq_def;
+
+ max_cpu = num_present_cpus();
+@@ -4185,7 +4185,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
+ */
+ if (txq_number == 1)
+ txq_map = (cpu == elected_cpu) ?
+- MVNETA_CPU_TXQ_ACCESS(1) : 0;
++ MVNETA_CPU_TXQ_ACCESS(0) : 0;
+ else
+ txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
+ MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+@@ -5242,7 +5242,7 @@ static int mvneta_probe(struct platform_device *pdev)
+ goto err_free_ports;
+ }
+
+- err = of_get_mac_address(dn, dev->dev_addr);
++ err = of_get_ethdev_address(dn, dev);
+ if (!err) {
+ mac_from = "device tree";
+ } else {
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+index cf8acabb90ac1..72608a47d4e02 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+@@ -1529,6 +1529,7 @@ u32 mvpp2_read(struct mvpp2 *priv, u32 offset);
+ void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name);
+
+ void mvpp2_dbgfs_cleanup(struct mvpp2 *priv);
++void mvpp2_dbgfs_exit(void);
+
+ void mvpp23_rx_fifo_fc_en(struct mvpp2 *priv, int port, bool en);
+
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+index 41d935d1aaf6f..40aeaa7bd739f 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+@@ -62,35 +62,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
+ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+ MVPP22_CLS_HEK_IP4_2T,
+ MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+- MVPP2_PRS_RI_L4_TCP,
++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+ MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+ MVPP22_CLS_HEK_IP4_2T,
+ MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+- MVPP2_PRS_RI_L4_TCP,
++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+ MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+ MVPP22_CLS_HEK_IP4_2T,
+ MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+- MVPP2_PRS_RI_L4_TCP,
++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+ MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+ /* TCP over IPv4 flows, fragmented, with vlan tag */
+ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+ MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+- MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
++ MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE |
++ MVPP2_PRS_RI_L4_TCP,
+ MVPP2_PRS_IP_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+ MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+- MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
++ MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE |
++ MVPP2_PRS_RI_L4_TCP,
+ MVPP2_PRS_IP_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+ MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+- MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
++ MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE |
++ MVPP2_PRS_RI_L4_TCP,
+ MVPP2_PRS_IP_MASK),
+
+ /* UDP over IPv4 flows, Not fragmented, no vlan tag */
+@@ -132,35 +135,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
+ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+ MVPP22_CLS_HEK_IP4_2T,
+ MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+- MVPP2_PRS_RI_L4_UDP,
++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+ MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+ MVPP22_CLS_HEK_IP4_2T,
+ MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+- MVPP2_PRS_RI_L4_UDP,
++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+ MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+ MVPP22_CLS_HEK_IP4_2T,
+ MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+- MVPP2_PRS_RI_L4_UDP,
++ MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+ MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+ /* UDP over IPv4 flows, fragmented, with vlan tag */
+ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+ MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+- MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
++ MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE |
++ MVPP2_PRS_RI_L4_UDP,
+ MVPP2_PRS_IP_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+ MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+- MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
++ MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE |
++ MVPP2_PRS_RI_L4_UDP,
+ MVPP2_PRS_IP_MASK),
+
+ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+ MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+- MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
++ MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE |
++ MVPP2_PRS_RI_L4_UDP,
+ MVPP2_PRS_IP_MASK),
+
+ /* TCP over IPv6 flows, not fragmented, no vlan tag */
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
+index 4a3baa7e01424..75e83ea2a926e 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
+@@ -691,6 +691,13 @@ static int mvpp2_dbgfs_port_init(struct dentry *parent,
+ return 0;
+ }
+
++static struct dentry *mvpp2_root;
++
++void mvpp2_dbgfs_exit(void)
++{
++ debugfs_remove(mvpp2_root);
++}
++
+ void mvpp2_dbgfs_cleanup(struct mvpp2 *priv)
+ {
+ debugfs_remove_recursive(priv->dbgfs_dir);
+@@ -700,10 +707,9 @@ void mvpp2_dbgfs_cleanup(struct mvpp2 *priv)
+
+ void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name)
+ {
+- struct dentry *mvpp2_dir, *mvpp2_root;
++ struct dentry *mvpp2_dir;
+ int ret, i;
+
+- mvpp2_root = debugfs_lookup(MVPP2_DRIVER_NAME, NULL);
+ if (!mvpp2_root)
+ mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL);
+
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+index d5c92e43f89e6..31b3ede563c04 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -1605,7 +1605,7 @@ static void mvpp22_gop_fca_set_periodic_timer(struct mvpp2_port *port)
+ mvpp22_gop_fca_enable_periodic(port, true);
+ }
+
+-static int mvpp22_gop_init(struct mvpp2_port *port)
++static int mvpp22_gop_init(struct mvpp2_port *port, phy_interface_t interface)
+ {
+ struct mvpp2 *priv = port->priv;
+ u32 val;
+@@ -1613,7 +1613,7 @@ static int mvpp22_gop_init(struct mvpp2_port *port)
+ if (!priv->sysctrl_base)
+ return 0;
+
+- switch (port->phy_interface) {
++ switch (interface) {
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+@@ -1743,15 +1743,15 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
+ * lanes by the physical layer. This is why configurations like
+ * "PPv2 (2500BaseX) - COMPHY (2500SGMII)" are valid.
+ */
+-static int mvpp22_comphy_init(struct mvpp2_port *port)
++static int mvpp22_comphy_init(struct mvpp2_port *port,
++ phy_interface_t interface)
+ {
+ int ret;
+
+ if (!port->comphy)
+ return 0;
+
+- ret = phy_set_mode_ext(port->comphy, PHY_MODE_ETHERNET,
+- port->phy_interface);
++ ret = phy_set_mode_ext(port->comphy, PHY_MODE_ETHERNET, interface);
+ if (ret)
+ return ret;
+
+@@ -2172,7 +2172,8 @@ static void mvpp22_pcs_reset_assert(struct mvpp2_port *port)
+ writel(val & ~MVPP22_XPCS_CFG0_RESET_DIS, xpcs + MVPP22_XPCS_CFG0);
+ }
+
+-static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port)
++static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port,
++ phy_interface_t interface)
+ {
+ struct mvpp2 *priv = port->priv;
+ void __iomem *mpcs, *xpcs;
+@@ -2184,7 +2185,7 @@ static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port)
+ mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id);
+ xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
+
+- switch (port->phy_interface) {
++ switch (interface) {
+ case PHY_INTERFACE_MODE_10GBASER:
+ val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
+ val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX |
+@@ -2959,11 +2960,11 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
+ mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size);
+
+ if (priv->percpu_pools) {
+- err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->id, 0);
++ err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->logic_rxq, 0);
+ if (err < 0)
+ goto err_free_dma;
+
+- err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->id, 0);
++ err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->logic_rxq, 0);
+ if (err < 0)
+ goto err_unregister_rxq_short;
+
+@@ -4529,7 +4530,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
+ return rx_done;
+ }
+
+-static void mvpp22_mode_reconfigure(struct mvpp2_port *port)
++static void mvpp22_mode_reconfigure(struct mvpp2_port *port,
++ phy_interface_t interface)
+ {
+ u32 ctrl3;
+
+@@ -4540,18 +4542,18 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port)
+ mvpp22_pcs_reset_assert(port);
+
+ /* comphy reconfiguration */
+- mvpp22_comphy_init(port);
++ mvpp22_comphy_init(port, interface);
+
+ /* gop reconfiguration */
+- mvpp22_gop_init(port);
++ mvpp22_gop_init(port, interface);
+
+- mvpp22_pcs_reset_deassert(port);
++ mvpp22_pcs_reset_deassert(port, interface);
+
+ if (mvpp2_port_supports_xlg(port)) {
+ ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG);
+ ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK;
+
+- if (mvpp2_is_xlg(port->phy_interface))
++ if (mvpp2_is_xlg(interface))
+ ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G;
+ else
+ ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC;
+@@ -4559,7 +4561,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port)
+ writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG);
+ }
+
+- if (mvpp2_port_supports_xlg(port) && mvpp2_is_xlg(port->phy_interface))
++ if (mvpp2_port_supports_xlg(port) && mvpp2_is_xlg(interface))
+ mvpp2_xlg_max_rx_size_set(port);
+ else
+ mvpp2_gmac_max_rx_size_set(port);
+@@ -4579,7 +4581,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
+ mvpp2_interrupts_enable(port);
+
+ if (port->priv->hw_version >= MVPP22)
+- mvpp22_mode_reconfigure(port);
++ mvpp22_mode_reconfigure(port, port->phy_interface);
+
+ if (port->phylink) {
+ phylink_start(port->phylink);
+@@ -5015,11 +5017,13 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu)
+ mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
+ }
+
++ if (port->xdp_prog && mtu > MVPP2_MAX_RX_BUF_SIZE) {
++ netdev_err(dev, "Illegal MTU value %d (> %d) for XDP mode\n",
++ mtu, (int)MVPP2_MAX_RX_BUF_SIZE);
++ return -EINVAL;
++ }
++
+ if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
+- if (port->xdp_prog) {
+- netdev_err(dev, "Jumbo frames are not supported with XDP\n");
+- return -EINVAL;
+- }
+ if (priv->percpu_pools) {
+ netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
+ mvpp2_bm_switch_buffers(priv, false);
+@@ -5305,8 +5309,8 @@ static int mvpp2_xdp_setup(struct mvpp2_port *port, struct netdev_bpf *bpf)
+ bool running = netif_running(port->dev);
+ bool reset = !prog != !port->xdp_prog;
+
+- if (port->dev->mtu > ETH_DATA_LEN) {
+- NL_SET_ERR_MSG_MOD(bpf->extack, "XDP is not supported with jumbo frames enabled");
++ if (port->dev->mtu > MVPP2_MAX_RX_BUF_SIZE) {
++ NL_SET_ERR_MSG_MOD(bpf->extack, "MTU too large for XDP");
+ return -EOPNOTSUPP;
+ }
+
+@@ -5568,6 +5572,11 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
++ if (loc == info->rule_cnt) {
++ ret = -EMSGSIZE;
++ break;
++ }
++
+ if (port->rfs_rules[i])
+ rules[loc++] = i;
+ }
+@@ -6083,7 +6092,7 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
+
+ if (fwnode_get_mac_address(fwnode, fw_mac_addr, ETH_ALEN)) {
+ *mac_from = "firmware node";
+- ether_addr_copy(dev->dev_addr, fw_mac_addr);
++ eth_hw_addr_set(dev, fw_mac_addr);
+ return;
+ }
+
+@@ -6091,7 +6100,7 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
+ mvpp21_get_mac_address(port, hw_mac_addr);
+ if (is_valid_ether_addr(hw_mac_addr)) {
+ *mac_from = "hardware";
+- ether_addr_copy(dev->dev_addr, hw_mac_addr);
++ eth_hw_addr_set(dev, hw_mac_addr);
+ return;
+ }
+ }
+@@ -6477,6 +6486,9 @@ static int mvpp2__mac_prepare(struct phylink_config *config, unsigned int mode,
+ mvpp22_gop_mask_irq(port);
+
+ phy_power_off(port->comphy);
++
++ /* Reconfigure the serdes lanes */
++ mvpp22_mode_reconfigure(port, interface);
+ }
+ }
+
+@@ -6531,9 +6543,6 @@ static int mvpp2_mac_finish(struct phylink_config *config, unsigned int mode,
+ port->phy_interface != interface) {
+ port->phy_interface = interface;
+
+- /* Reconfigure the serdes lanes */
+- mvpp22_mode_reconfigure(port);
+-
+ /* Unmask interrupts */
+ mvpp22_gop_unmask_irq(port);
+ }
+@@ -6960,7 +6969,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
+ * driver does this, we can remove this code.
+ */
+ if (port->comphy) {
+- err = mvpp22_comphy_init(port);
++ err = mvpp22_comphy_init(port, port->phy_interface);
+ if (err == 0)
+ phy_power_off(port->comphy);
+ }
+@@ -7352,6 +7361,7 @@ static int mvpp2_get_sram(struct platform_device *pdev,
+ struct mvpp2 *priv)
+ {
+ struct resource *res;
++ void __iomem *base;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+ if (!res) {
+@@ -7362,9 +7372,12 @@ static int mvpp2_get_sram(struct platform_device *pdev,
+ return 0;
+ }
+
+- priv->cm3_base = devm_ioremap_resource(&pdev->dev, res);
++ base = devm_ioremap_resource(&pdev->dev, res);
++ if (IS_ERR(base))
++ return PTR_ERR(base);
+
+- return PTR_ERR_OR_ZERO(priv->cm3_base);
++ priv->cm3_base = base;
++ return 0;
+ }
+
+ static int mvpp2_probe(struct platform_device *pdev)
+@@ -7453,7 +7466,7 @@ static int mvpp2_probe(struct platform_device *pdev)
+
+ shared = num_present_cpus() - priv->nthreads;
+ if (shared > 0)
+- bitmap_fill(&priv->lock_map,
++ bitmap_set(&priv->lock_map, 0,
+ min_t(int, shared, MVPP2_MAX_THREADS));
+
+ for (i = 0; i < MVPP2_MAX_THREADS; i++) {
+@@ -7706,7 +7719,18 @@ static struct platform_driver mvpp2_driver = {
+ },
+ };
+
+-module_platform_driver(mvpp2_driver);
++static int __init mvpp2_driver_init(void)
++{
++ return platform_driver_register(&mvpp2_driver);
++}
++module_init(mvpp2_driver_init);
++
++static void __exit mvpp2_driver_exit(void)
++{
++ platform_driver_unregister(&mvpp2_driver);
++ mvpp2_dbgfs_exit();
++}
++module_exit(mvpp2_driver_exit);
+
+ MODULE_DESCRIPTION("Marvell PPv2 Ethernet Driver - www.marvell.com");
+ MODULE_AUTHOR("Marcin Wojtas <mw@semihalf.com>");
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+index 93575800ca92a..9af22f497a40f 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+@@ -1539,8 +1539,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
+ if (!priv->prs_double_vlans)
+ return -ENOMEM;
+
+- /* Double VLAN: 0x8100, 0x88A8 */
+- err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021Q, ETH_P_8021AD,
++ /* Double VLAN: 0x88A8, 0x8100 */
++ err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021AD, ETH_P_8021Q,
+ MVPP2_PRS_PORT_MASK);
+ if (err)
+ return err;
+@@ -1607,59 +1607,45 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
+ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv)
+ {
+ struct mvpp2_prs_entry pe;
+- int tid;
+-
+- /* IPv4 over PPPoE with options */
+- tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+- MVPP2_PE_LAST_FREE_TID);
+- if (tid < 0)
+- return tid;
+-
+- memset(&pe, 0, sizeof(pe));
+- mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
+- pe.index = tid;
+-
+- mvpp2_prs_match_etype(&pe, 0, PPP_IP);
+-
+- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
+- mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT,
+- MVPP2_PRS_RI_L3_PROTO_MASK);
+- /* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */
+- mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN +
+- sizeof(struct iphdr) - 4,
+- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+- /* Set L3 offset */
+- mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+- MVPP2_ETH_TYPE_LEN,
+- MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+-
+- /* Update shadow table and hw entry */
+- mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+- mvpp2_prs_hw_write(priv, &pe);
++ int tid, ihl;
+
+- /* IPv4 over PPPoE without options */
+- tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+- MVPP2_PE_LAST_FREE_TID);
+- if (tid < 0)
+- return tid;
++ /* IPv4 over PPPoE with header length >= 5 */
++ for (ihl = MVPP2_PRS_IPV4_IHL_MIN; ihl <= MVPP2_PRS_IPV4_IHL_MAX; ihl++) {
++ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
++ MVPP2_PE_LAST_FREE_TID);
++ if (tid < 0)
++ return tid;
+
+- pe.index = tid;
++ memset(&pe, 0, sizeof(pe));
++ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
++ pe.index = tid;
+
+- mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
+- MVPP2_PRS_IPV4_HEAD |
+- MVPP2_PRS_IPV4_IHL_MIN,
+- MVPP2_PRS_IPV4_HEAD_MASK |
+- MVPP2_PRS_IPV4_IHL_MASK);
++ mvpp2_prs_match_etype(&pe, 0, PPP_IP);
++ mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
++ MVPP2_PRS_IPV4_HEAD | ihl,
++ MVPP2_PRS_IPV4_HEAD_MASK |
++ MVPP2_PRS_IPV4_IHL_MASK);
+
+- /* Clear ri before updating */
+- pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+- pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+- mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
+- MVPP2_PRS_RI_L3_PROTO_MASK);
++ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
++ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
++ MVPP2_PRS_RI_L3_PROTO_MASK);
++ /* goto ipv4 dst-address (skip eth_type + IP-header-size - 4) */
++ mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN +
++ sizeof(struct iphdr) - 4,
++ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
++ /* Set L3 offset */
++ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
++ MVPP2_ETH_TYPE_LEN,
++ MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
++ /* Set L4 offset */
++ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
++ MVPP2_ETH_TYPE_LEN + (ihl * 4),
++ MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+
+- /* Update shadow table and hw entry */
+- mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+- mvpp2_prs_hw_write(priv, &pe);
++ /* Update shadow table and hw entry */
++ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
++ mvpp2_prs_hw_write(priv, &pe);
++ }
+
+ /* IPv6 over PPPoE */
+ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+@@ -2347,7 +2333,7 @@ int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
+ return err;
+
+ /* Set addr in the device */
+- ether_addr_copy(dev->dev_addr, da);
++ eth_hw_addr_set(dev, da);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig
+index 3f982ccf2c85f..639893d870550 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/Kconfig
++++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig
+@@ -31,6 +31,7 @@ config NDC_DIS_DYNAMIC_CACHING
+ config OCTEONTX2_PF
+ tristate "Marvell OcteonTX2 NIC Physical Function driver"
+ select OCTEONTX2_MBOX
++ select NET_DEVLINK
+ depends on (64BIT && COMPILE_TEST) || ARM64
+ depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+index 34a089b71e554..8ac95cb7bbb74 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+@@ -167,6 +167,9 @@ void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val)
+ {
+ struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+
++ /* Software must not access disabled LMAC registers */
++ if (!is_lmac_valid(cgx_dev, lmac_id))
++ return;
+ cgx_write(cgx_dev, lmac_id, offset, val);
+ }
+
+@@ -174,6 +177,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset)
+ {
+ struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+
++ /* Software must not access disabled LMAC registers */
++ if (!is_lmac_valid(cgx_dev, lmac_id))
++ return 0;
++
+ return cgx_read(cgx_dev, lmac_id, offset);
+ }
+
+@@ -695,9 +702,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable)
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+ if (enable)
+- cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN;
++ cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN;
+ else
+- cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN);
++ cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN);
+ cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg);
+ return 0;
+ }
+@@ -838,9 +845,6 @@ void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable)
+ if (!cgx)
+ return;
+
+- if (is_dev_rpm(cgx))
+- return;
+-
+ if (enable) {
+ /* Enable inbound PTP timestamping */
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+@@ -1545,9 +1549,11 @@ static int cgx_lmac_exit(struct cgx *cgx)
+ static void cgx_populate_features(struct cgx *cgx)
+ {
+ if (is_dev_rpm(cgx))
+- cgx->hw_features = (RVU_MAC_RPM | RVU_LMAC_FEAT_FC);
++ cgx->hw_features = (RVU_LMAC_FEAT_DMACF | RVU_MAC_RPM |
++ RVU_LMAC_FEAT_FC | RVU_LMAC_FEAT_PTP);
+ else
+- cgx->hw_features = (RVU_LMAC_FEAT_FC | RVU_LMAC_FEAT_PTP);
++ cgx->hw_features = (RVU_LMAC_FEAT_FC | RVU_LMAC_FEAT_HIGIG2 |
++ RVU_LMAC_FEAT_PTP | RVU_LMAC_FEAT_DMACF);
+ }
+
+ static struct mac_ops cgx_mac_ops = {
+@@ -1571,6 +1577,9 @@ static struct mac_ops cgx_mac_ops = {
+ .mac_get_pause_frm_status = cgx_lmac_get_pause_frm_status,
+ .mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm,
+ .mac_pause_frm_config = cgx_lmac_pause_frm_config,
++ .mac_enadis_ptp_config = cgx_lmac_ptp_config,
++ .mac_rx_tx_enable = cgx_lmac_rx_tx_enable,
++ .mac_tx_enable = cgx_lmac_tx_enable,
+ };
+
+ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+index ab1e4abdea38b..5714280a4252d 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+@@ -30,7 +30,6 @@
+ #define CMR_P2X_SEL_SHIFT 59ULL
+ #define CMR_P2X_SEL_NIX0 1ULL
+ #define CMR_P2X_SEL_NIX1 2ULL
+-#define CMR_EN BIT_ULL(55)
+ #define DATA_PKT_TX_EN BIT_ULL(53)
+ #define DATA_PKT_RX_EN BIT_ULL(54)
+ #define CGX_LMAC_TYPE_SHIFT 40
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+index c38306b3384a7..b33e7d1d0851c 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+@@ -102,6 +102,14 @@ struct mac_ops {
+ void (*mac_pause_frm_config)(void *cgxd,
+ int lmac_id,
+ bool enable);
++
++ /* Enable/Disable Inbound PTP */
++ void (*mac_enadis_ptp_config)(void *cgxd,
++ int lmac_id,
++ bool enable);
++
++ int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable);
++ int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable);
+ };
+
+ struct cgx {
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+index 2898931d5260a..9690ac01f02c8 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+@@ -157,7 +157,7 @@ EXPORT_SYMBOL(otx2_mbox_init);
+ */
+ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void **hwbase,
+ struct pci_dev *pdev, void *reg_base,
+- int direction, int ndevs)
++ int direction, int ndevs, unsigned long *pf_bmap)
+ {
+ struct otx2_mbox_dev *mdev;
+ int devid, err;
+@@ -169,6 +169,9 @@ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void **hwbase,
+ mbox->hwbase = hwbase[0];
+
+ for (devid = 0; devid < ndevs; devid++) {
++ if (!test_bit(devid, pf_bmap))
++ continue;
++
+ mdev = &mbox->dev[devid];
+ mdev->mbase = hwbase[devid];
+ mdev->hwbase = hwbase[devid];
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+index 154877706a0e1..2b6cbd5af100d 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+@@ -84,7 +84,7 @@ struct mbox_msghdr {
+ #define OTX2_MBOX_REQ_SIG (0xdead)
+ #define OTX2_MBOX_RSP_SIG (0xbeef)
+ u16 sig; /* Signature, for validating corrupted msgs */
+-#define OTX2_MBOX_VERSION (0x0009)
++#define OTX2_MBOX_VERSION (0x000a)
+ u16 ver; /* Version of msg's structure for this ID */
+ u16 next_msgoff; /* Offset of next msg within mailbox region */
+ int rc; /* Msg process'ed response code */
+@@ -96,9 +96,10 @@ void otx2_mbox_destroy(struct otx2_mbox *mbox);
+ int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase,
+ struct pci_dev *pdev, void __force *reg_base,
+ int direction, int ndevs);
++
+ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void __force **hwbase,
+ struct pci_dev *pdev, void __force *reg_base,
+- int direction, int ndevs);
++ int direction, int ndevs, unsigned long *bmap);
+ void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid);
+ int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid);
+ int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid);
+@@ -154,23 +155,23 @@ M(CGX_PTP_RX_ENABLE, 0x20C, cgx_ptp_rx_enable, msg_req, msg_rsp) \
+ M(CGX_PTP_RX_DISABLE, 0x20D, cgx_ptp_rx_disable, msg_req, msg_rsp) \
+ M(CGX_CFG_PAUSE_FRM, 0x20E, cgx_cfg_pause_frm, cgx_pause_frm_cfg, \
+ cgx_pause_frm_cfg) \
+-M(CGX_FEC_SET, 0x210, cgx_set_fec_param, fec_mode, fec_mode) \
+-M(CGX_FEC_STATS, 0x211, cgx_fec_stats, msg_req, cgx_fec_stats_rsp) \
+-M(CGX_GET_PHY_FEC_STATS, 0x212, cgx_get_phy_fec_stats, msg_req, msg_rsp) \
+-M(CGX_FW_DATA_GET, 0x213, cgx_get_aux_link_info, msg_req, cgx_fw_data) \
+-M(CGX_SET_LINK_MODE, 0x214, cgx_set_link_mode, cgx_set_link_mode_req,\
+- cgx_set_link_mode_rsp) \
+-M(CGX_FEATURES_GET, 0x215, cgx_features_get, msg_req, \
+- cgx_features_info_msg) \
+-M(RPM_STATS, 0x216, rpm_stats, msg_req, rpm_stats_rsp) \
+-M(CGX_MAC_ADDR_ADD, 0x217, cgx_mac_addr_add, cgx_mac_addr_add_req, \
+- cgx_mac_addr_add_rsp) \
+-M(CGX_MAC_ADDR_DEL, 0x218, cgx_mac_addr_del, cgx_mac_addr_del_req, \
++M(CGX_FW_DATA_GET, 0x20F, cgx_get_aux_link_info, msg_req, cgx_fw_data) \
++M(CGX_FEC_SET, 0x210, cgx_set_fec_param, fec_mode, fec_mode) \
++M(CGX_MAC_ADDR_ADD, 0x211, cgx_mac_addr_add, cgx_mac_addr_add_req, \
++ cgx_mac_addr_add_rsp) \
++M(CGX_MAC_ADDR_DEL, 0x212, cgx_mac_addr_del, cgx_mac_addr_del_req, \
+ msg_rsp) \
+-M(CGX_MAC_MAX_ENTRIES_GET, 0x219, cgx_mac_max_entries_get, msg_req, \
++M(CGX_MAC_MAX_ENTRIES_GET, 0x213, cgx_mac_max_entries_get, msg_req, \
+ cgx_max_dmac_entries_get_rsp) \
+-M(CGX_MAC_ADDR_RESET, 0x21A, cgx_mac_addr_reset, msg_req, msg_rsp) \
+-M(CGX_MAC_ADDR_UPDATE, 0x21B, cgx_mac_addr_update, cgx_mac_addr_update_req, \
++M(CGX_FEC_STATS, 0x217, cgx_fec_stats, msg_req, cgx_fec_stats_rsp) \
++M(CGX_SET_LINK_MODE, 0x218, cgx_set_link_mode, cgx_set_link_mode_req,\
++ cgx_set_link_mode_rsp) \
++M(CGX_GET_PHY_FEC_STATS, 0x219, cgx_get_phy_fec_stats, msg_req, msg_rsp) \
++M(CGX_FEATURES_GET, 0x21B, cgx_features_get, msg_req, \
++ cgx_features_info_msg) \
++M(RPM_STATS, 0x21C, rpm_stats, msg_req, rpm_stats_rsp) \
++M(CGX_MAC_ADDR_RESET, 0x21D, cgx_mac_addr_reset, msg_req, msg_rsp) \
++M(CGX_MAC_ADDR_UPDATE, 0x21E, cgx_mac_addr_update, cgx_mac_addr_update_req, \
+ msg_rsp) \
+ /* NPA mbox IDs (range 0x400 - 0x5FF) */ \
+ M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \
+@@ -229,6 +230,8 @@ M(NPC_DELETE_FLOW, 0x600e, npc_delete_flow, \
+ M(NPC_MCAM_READ_ENTRY, 0x600f, npc_mcam_read_entry, \
+ npc_mcam_read_entry_req, \
+ npc_mcam_read_entry_rsp) \
++M(NPC_SET_PKIND, 0x6010, npc_set_pkind, \
++ npc_set_pkind, msg_rsp) \
+ M(NPC_MCAM_READ_BASE_RULE, 0x6011, npc_read_base_steer_rule, \
+ msg_req, npc_mcam_read_base_rule_rsp) \
+ M(NPC_MCAM_GET_STATS, 0x6012, npc_mcam_entry_stats, \
+@@ -575,10 +578,13 @@ struct cgx_mac_addr_update_req {
+ };
+
+ #define RVU_LMAC_FEAT_FC BIT_ULL(0) /* pause frames */
+-#define RVU_LMAC_FEAT_PTP BIT_ULL(1) /* precision time protocol */
+-#define RVU_MAC_VERSION BIT_ULL(2)
+-#define RVU_MAC_CGX BIT_ULL(3)
+-#define RVU_MAC_RPM BIT_ULL(4)
++#define RVU_LMAC_FEAT_HIGIG2 BIT_ULL(1)
++ /* flow control from physical link higig2 messages */
++#define RVU_LMAC_FEAT_PTP BIT_ULL(2) /* precison time protocol */
++#define RVU_LMAC_FEAT_DMACF BIT_ULL(3) /* DMAC FILTER */
++#define RVU_MAC_VERSION BIT_ULL(4)
++#define RVU_MAC_CGX BIT_ULL(5)
++#define RVU_MAC_RPM BIT_ULL(6)
+
+ struct cgx_features_info_msg {
+ struct mbox_msghdr hdr;
+@@ -593,6 +599,22 @@ struct rpm_stats_rsp {
+ u64 tx_stats[RPM_TX_STATS_COUNT];
+ };
+
++struct npc_set_pkind {
++ struct mbox_msghdr hdr;
++#define OTX2_PRIV_FLAGS_DEFAULT BIT_ULL(0)
++#define OTX2_PRIV_FLAGS_CUSTOM BIT_ULL(63)
++ u64 mode;
++#define PKIND_TX BIT_ULL(0)
++#define PKIND_RX BIT_ULL(1)
++ u8 dir;
++ u8 pkind; /* valid only in case custom flag */
++ u8 var_len_off; /* Offset of custom header length field.
++ * Valid only for pkind NPC_RX_CUSTOM_PRE_L2_PKIND
++ */
++ u8 var_len_off_mask; /* Mask for length with in offset */
++ u8 shift_dir; /* shift direction to get length of the header at var_len_off */
++};
++
+ /* NPA mbox message formats */
+
+ /* NPA mailbox error codes
+@@ -698,6 +720,9 @@ enum nix_af_status {
+ NIX_AF_ERR_INVALID_BANDPROF = -426,
+ NIX_AF_ERR_IPOLICER_NOTSUPP = -427,
+ NIX_AF_ERR_BANDPROF_INVAL_REQ = -428,
++ NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429,
++ NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430,
++ NIX_AF_ERR_LINK_CREDITS = -431,
+ };
+
+ /* For NIX RX vtag action */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+index 3a819b24accc6..6e1192f526089 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+@@ -31,9 +31,9 @@ enum npc_kpu_la_ltype {
+ NPC_LT_LA_HIGIG2_ETHER,
+ NPC_LT_LA_IH_NIX_HIGIG2_ETHER,
+ NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_LT_LA_CH_LEN_90B_ETHER,
+ NPC_LT_LA_CPT_HDR,
+ NPC_LT_LA_CUSTOM_L2_24B_ETHER,
++ NPC_LT_LA_CUSTOM_PRE_L2_ETHER,
+ NPC_LT_LA_CUSTOM0 = 0xE,
+ NPC_LT_LA_CUSTOM1 = 0xF,
+ };
+@@ -148,10 +148,11 @@ enum npc_kpu_lh_ltype {
+ * Software assigns pkind for each incoming port such as CGX
+ * Ethernet interfaces, LBK interfaces, etc.
+ */
+-#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND
++#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_CUSTOM_PRE_L2_PKIND
+
+ enum npc_pkind_type {
+ NPC_RX_LBK_PKIND = 0ULL,
++ NPC_RX_CUSTOM_PRE_L2_PKIND = 55ULL,
+ NPC_RX_VLAN_EXDSA_PKIND = 56ULL,
+ NPC_RX_CHLEN24B_PKIND = 57ULL,
+ NPC_RX_CPT_HDR_PKIND,
+@@ -162,6 +163,10 @@ enum npc_pkind_type {
+ NPC_TX_DEF_PKIND, /* NIX-TX PKIND */
+ };
+
++enum npc_interface_type {
++ NPC_INTF_MODE_DEF,
++};
++
+ /* list of known and supported fields in packet header and
+ * fields present in key structure.
+ */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+index 588822a0cf21e..695123e32ba85 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+@@ -176,9 +176,8 @@ enum npc_kpu_parser_state {
+ NPC_S_KPU1_EXDSA,
+ NPC_S_KPU1_HIGIG2,
+ NPC_S_KPU1_IH_NIX_HIGIG2,
+- NPC_S_KPU1_CUSTOM_L2_90B,
++ NPC_S_KPU1_CUSTOM_PRE_L2,
+ NPC_S_KPU1_CPT_HDR,
+- NPC_S_KPU1_CUSTOM_L2_24B,
+ NPC_S_KPU1_VLAN_EXDSA,
+ NPC_S_KPU2_CTAG,
+ NPC_S_KPU2_CTAG2,
+@@ -187,7 +186,8 @@ enum npc_kpu_parser_state {
+ NPC_S_KPU2_ETAG,
+ NPC_S_KPU2_PREHEADER,
+ NPC_S_KPU2_EXDSA,
+- NPC_S_KPU2_NGIO,
++ NPC_S_KPU2_CPT_CTAG,
++ NPC_S_KPU2_CPT_QINQ,
+ NPC_S_KPU3_CTAG,
+ NPC_S_KPU3_STAG,
+ NPC_S_KPU3_QINQ,
+@@ -212,6 +212,7 @@ enum npc_kpu_parser_state {
+ NPC_S_KPU5_NSH,
+ NPC_S_KPU5_CPT_IP,
+ NPC_S_KPU5_CPT_IP6,
++ NPC_S_KPU5_NGIO,
+ NPC_S_KPU6_IP6_EXT,
+ NPC_S_KPU6_IP6_HOP_DEST,
+ NPC_S_KPU6_IP6_ROUT,
+@@ -979,8 +980,8 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 16, 20, 0, 0,
+- NPC_S_KPU1_ETHER, 0, 0,
+- NPC_LID_LA, NPC_LT_NA,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0, 1,
++ NPC_LID_LA, NPC_LT_LA_CUSTOM_PRE_L2_ETHER,
+ 0,
+ 0, 0, 0, 0,
+
+@@ -996,27 +997,27 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 36, 40, 44, 0, 0,
+- NPC_S_KPU1_CUSTOM_L2_24B, 0, 0,
+- NPC_LID_LA, NPC_LT_NA,
++ 12, 16, 20, 0, 0,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 24, 1,
++ NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+ 0,
+ 0, 0, 0, 0,
+
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 40, 54, 58, 0, 0,
+- NPC_S_KPU1_CPT_HDR, 0, 0,
++ 12, 16, 20, 0, 0,
++ NPC_S_KPU1_CPT_HDR, 40, 0,
+ NPC_LID_LA, NPC_LT_NA,
+ 0,
+- 0, 0, 0, 0,
++ 7, 7, 0, 0,
+
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 102, 106, 110, 0, 0,
+- NPC_S_KPU1_CUSTOM_L2_90B, 0, 0,
+- NPC_LID_LA, NPC_LT_NA,
++ 12, 16, 20, 0, 0,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 90, 1,
++ NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+ 0,
+ 0, 0, 0, 0,
+
+@@ -1116,15 +1117,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ 0x0000,
+ },
+- {
+- NPC_S_KPU1_ETHER, 0xff,
+- NPC_ETYPE_CTAG,
+- 0xffff,
+- NPC_ETYPE_NGIO,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- },
+ {
+ NPC_S_KPU1_ETHER, 0xff,
+ NPC_ETYPE_CTAG,
+@@ -1711,7 +1703,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_IP,
+ 0xffff,
+ 0x0000,
+@@ -1720,7 +1712,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_IP6,
+ 0xffff,
+ 0x0000,
+@@ -1729,7 +1721,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_ARP,
+ 0xffff,
+ 0x0000,
+@@ -1738,7 +1730,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_RARP,
+ 0xffff,
+ 0x0000,
+@@ -1747,7 +1739,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_PTP,
+ 0xffff,
+ 0x0000,
+@@ -1756,7 +1748,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_FCOE,
+ 0xffff,
+ 0x0000,
+@@ -1765,7 +1757,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_CTAG,
+ 0xffff,
+ NPC_ETYPE_CTAG,
+@@ -1774,7 +1766,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_CTAG,
+ 0xffff,
+ 0x0000,
+@@ -1783,7 +1775,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_SBTAG,
+ 0xffff,
+ 0x0000,
+@@ -1792,7 +1784,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_QINQ,
+ 0xffff,
+ 0x0000,
+@@ -1801,7 +1793,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_ETAG,
+ 0xffff,
+ 0x0000,
+@@ -1810,7 +1802,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_MPLSU,
+ 0xffff,
+ 0x0000,
+@@ -1819,7 +1811,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_MPLSM,
+ 0xffff,
+ 0x0000,
+@@ -1828,7 +1820,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ NPC_ETYPE_NSH,
+ 0xffff,
+ 0x0000,
+@@ -1837,7 +1829,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
++ NPC_S_KPU1_CUSTOM_PRE_L2, 0xff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+@@ -1847,150 +1839,24 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ },
+ {
+ NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0xffff,
+ NPC_ETYPE_IP,
+ 0xffff,
+ 0x0000,
+ 0x0000,
+- },
+- {
+- NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0xffff,
+- NPC_ETYPE_IP6,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0xffff,
+- NPC_ETYPE_CTAG,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0xffff,
+- NPC_ETYPE_QINQ,
+- 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+ {
+ NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- NPC_ETYPE_IP,
+- 0xffff,
+- },
+- {
+- NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+ NPC_ETYPE_IP6,
+ 0xffff,
+- },
+- {
+- NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0xffff,
+- 0x0000,
+ 0x0000,
+- NPC_ETYPE_CTAG,
+- 0xffff,
+- },
+- {
+- NPC_S_KPU1_CPT_HDR, 0xff,
+ 0x0000,
+- 0xffff,
+ 0x0000,
+ 0x0000,
+- NPC_ETYPE_QINQ,
+- 0xffff,
+ },
+ {
+ NPC_S_KPU1_CPT_HDR, 0xff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_IP,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_IP6,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_ARP,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_RARP,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_PTP,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_FCOE,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_CTAG,
+- 0xffff,
+- NPC_ETYPE_CTAG,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+ NPC_ETYPE_CTAG,
+ 0xffff,
+ 0x0000,
+@@ -1999,16 +1865,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_SBTAG,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
++ NPC_S_KPU1_CPT_HDR, 0xff,
+ NPC_ETYPE_QINQ,
+ 0xffff,
+ 0x0000,
+@@ -2016,51 +1873,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+ 0x0000,
+ 0x0000,
+ },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_ETAG,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_MPLSU,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_MPLSM,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- NPC_ETYPE_NSH,
+- 0xffff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+- {
+- NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- 0x0000,
+- },
+ {
+ NPC_S_KPU1_VLAN_EXDSA, 0xff,
+ NPC_ETYPE_CTAG,
+@@ -2165,6 +1977,15 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
+ 0x0000,
+ 0x0000,
+ },
++ {
++ NPC_S_KPU2_CTAG, 0xff,
++ NPC_ETYPE_NGIO,
++ 0xffff,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ },
+ {
+ NPC_S_KPU2_CTAG, 0xff,
+ NPC_ETYPE_PPPOE,
+@@ -3057,11 +2878,38 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
+ 0x0000,
+ },
+ {
+- NPC_S_KPU2_NGIO, 0xff,
++ NPC_S_KPU2_CPT_CTAG, 0xff,
++ NPC_ETYPE_IP,
++ 0xffff,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
++ },
++ {
++ NPC_S_KPU2_CPT_CTAG, 0xff,
++ NPC_ETYPE_IP6,
++ 0xffff,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ },
++ {
++ NPC_S_KPU2_CPT_QINQ, 0xff,
++ NPC_ETYPE_CTAG,
++ 0xffff,
++ NPC_ETYPE_IP,
++ 0xffff,
++ 0x0000,
++ 0x0000,
++ },
++ {
++ NPC_S_KPU2_CPT_QINQ, 0xff,
++ NPC_ETYPE_CTAG,
++ 0xffff,
++ NPC_ETYPE_IP6,
++ 0xffff,
+ 0x0000,
+ 0x0000,
+ },
+@@ -5348,6 +5196,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
+ 0x0000,
+ 0x0000,
+ },
++ {
++ NPC_S_KPU5_NGIO, 0xff,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ 0x0000,
++ },
+ {
+ NPC_S_NA, 0X00,
+ 0x0000,
+@@ -8642,14 +8499,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
+ 0,
+ 0, 0, 0, 0,
+ },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 8, 12, 0, 0, 0,
+- NPC_S_KPU2_NGIO, 12, 1,
+- NPC_LID_LA, NPC_LT_LA_ETHER,
+- 0,
+- 0, 0, 0, 0,
+- },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 8, 12, 0, 0, 0,
+@@ -9192,159 +9041,127 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 8, 0, 6, 3, 0,
+- NPC_S_KPU5_IP, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
++ NPC_S_KPU5_IP, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 6, 0, 0, 3, 0,
+- NPC_S_KPU5_IP6, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
++ NPC_S_KPU5_IP6, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 3, 0,
+- NPC_S_KPU5_ARP, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
++ NPC_S_KPU5_ARP, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 3, 0,
+- NPC_S_KPU5_RARP, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
++ NPC_S_KPU5_RARP, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 3, 0,
+- NPC_S_KPU5_PTP, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
++ NPC_S_KPU5_PTP, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 0, 0, 0, 3, 0,
+- NPC_S_KPU5_FCOE, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
++ NPC_S_KPU5_FCOE, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 8, 12, 0, 0, 0,
+- NPC_S_KPU2_CTAG2, 102, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
++ NPC_S_KPU2_CTAG2, 12, 0,
++ NPC_LID_LA, NPC_LT_NA,
++ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 4, 8, 0, 0, 0,
+- NPC_S_KPU2_CTAG, 102, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
++ NPC_S_KPU2_CTAG, 12, 0,
++ NPC_LID_LA, NPC_LT_NA,
++ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 4, 8, 22, 0, 0,
+- NPC_S_KPU2_SBTAG, 102, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
++ NPC_S_KPU2_SBTAG, 12, 0,
++ NPC_LID_LA, NPC_LT_NA,
++ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 4, 8, 0, 0, 0,
+- NPC_S_KPU2_QINQ, 102, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
++ NPC_S_KPU2_QINQ, 12, 0,
++ NPC_LID_LA, NPC_LT_NA,
++ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 8, 12, 26, 0, 0,
+- NPC_S_KPU2_ETAG, 102, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ETAG,
++ NPC_S_KPU2_ETAG, 12, 0,
++ NPC_LID_LA, NPC_LT_NA,
++ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 2, 6, 10, 2, 0,
+- NPC_S_KPU4_MPLS, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_L_WITH_MPLS,
++ NPC_S_KPU4_MPLS, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
++ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 2, 6, 10, 2, 0,
+- NPC_S_KPU4_MPLS, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_L_WITH_MPLS,
++ NPC_S_KPU4_MPLS, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
++ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 2, 0, 0, 2, 0,
+- NPC_S_KPU4_NSH, 104, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_L_WITH_NSH,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 0, 1,
+- NPC_S_NA, 0, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+- NPC_F_LA_L_UNK_ETYPE,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 8, 0, 6, 3, 0,
+- NPC_S_KPU5_CPT_IP, 56, 1,
+- NPC_LID_LA, NPC_LT_LA_CPT_HDR,
++ NPC_S_KPU4_NSH, 14, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 6, 0, 0, 3, 0,
+- NPC_S_KPU5_CPT_IP6, 56, 1,
+- NPC_LID_LA, NPC_LT_LA_CPT_HDR,
++ 0, 0, 0, 0, 1,
++ NPC_S_NA, 0, 0,
++ NPC_LID_LA, NPC_LT_NA,
+ 0,
+ 0, 0, 0, 0,
+ },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 4, 8, 0, 0, 0,
+- NPC_S_KPU2_CTAG, 54, 1,
+- NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 4, 8, 0, 0, 0,
+- NPC_S_KPU2_QINQ, 54, 1,
+- NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+- 0, 0, 0, 0,
+- },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 8, 0, 6, 3, 0,
+- NPC_S_KPU5_CPT_IP, 60, 1,
++ NPC_S_KPU5_CPT_IP, 14, 1,
+ NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+ 0,
+ 0, 0, 0, 0,
+@@ -9352,7 +9169,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 6, 0, 0, 3, 0,
+- NPC_S_KPU5_CPT_IP6, 60, 1,
++ NPC_S_KPU5_CPT_IP6, 14, 1,
+ NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+ 0,
+ 0, 0, 0, 0,
+@@ -9360,7 +9177,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 4, 8, 0, 0, 0,
+- NPC_S_KPU2_CTAG, 58, 1,
++ NPC_S_KPU2_CPT_CTAG, 12, 1,
+ NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+ NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+ 0, 0, 0, 0,
+@@ -9368,139 +9185,11 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 4, 8, 0, 0, 0,
+- NPC_S_KPU2_QINQ, 58, 1,
++ NPC_S_KPU2_CPT_QINQ, 12, 1,
+ NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+ NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+ 0, 0, 0, 0,
+ },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 0, 1,
+- NPC_S_NA, 0, 1,
+- NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+- NPC_F_LA_L_UNK_ETYPE,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 8, 0, 6, 3, 0,
+- NPC_S_KPU5_IP, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- 0,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 6, 0, 0, 3, 0,
+- NPC_S_KPU5_IP6, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- 0,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 3, 0,
+- NPC_S_KPU5_ARP, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- 0,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 3, 0,
+- NPC_S_KPU5_RARP, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- 0,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 3, 0,
+- NPC_S_KPU5_PTP, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- 0,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 3, 0,
+- NPC_S_KPU5_FCOE, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- 0,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 8, 12, 0, 0, 0,
+- NPC_S_KPU2_CTAG2, 36, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 4, 8, 0, 0, 0,
+- NPC_S_KPU2_CTAG, 36, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 4, 8, 22, 0, 0,
+- NPC_S_KPU2_SBTAG, 36, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 4, 8, 0, 0, 0,
+- NPC_S_KPU2_QINQ, 36, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 8, 12, 26, 0, 0,
+- NPC_S_KPU2_ETAG, 36, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ETAG,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 2, 6, 10, 2, 0,
+- NPC_S_KPU4_MPLS, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_L_WITH_MPLS,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 2, 6, 10, 2, 0,
+- NPC_S_KPU4_MPLS, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_L_WITH_MPLS,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 2, 0, 0, 2, 0,
+- NPC_S_KPU4_NSH, 38, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_L_WITH_NSH,
+- 0, 0, 0, 0,
+- },
+- {
+- NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 0, 1,
+- NPC_S_NA, 0, 1,
+- NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+- NPC_F_LA_L_UNK_ETYPE,
+- 0, 0, 0, 0,
+- },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 12, 0, 0, 1, 0,
+@@ -9594,6 +9283,14 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
+ 0,
+ 0, 0, 0, 0,
+ },
++ {
++ NPC_ERRLEV_RE, NPC_EC_NOERR,
++ 0, 0, 0, 2, 0,
++ NPC_S_KPU5_NGIO, 6, 1,
++ NPC_LID_LB, NPC_LT_LB_CTAG,
++ 0,
++ 0, 0, 0, 0,
++ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+ 8, 0, 6, 2, 0,
+@@ -10388,12 +10085,36 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
+ },
+ {
+ NPC_ERRLEV_RE, NPC_EC_NOERR,
+- 0, 0, 0, 0, 1,
+- NPC_S_NA, 0, 1,
+- NPC_LID_LC, NPC_LT_LC_NGIO,
++ 8, 0, 6, 2, 0,
++ NPC_S_KPU5_CPT_IP, 6, 1,
++ NPC_LID_LB, NPC_LT_LB_CTAG,
++ 0,
++ 0, 0, 0, 0,
++ },
++ {
++ NPC_ERRLEV_RE, NPC_EC_NOERR,
++ 6, 0, 0, 2, 0,
++ NPC_S_KPU5_CPT_IP6, 6, 1,
++ NPC_LID_LB, NPC_LT_LB_CTAG,
+ 0,
+ 0, 0, 0, 0,
+ },
++ {
++ NPC_ERRLEV_RE, NPC_EC_NOERR,
++ 8, 0, 6, 2, 0,
++ NPC_S_KPU5_CPT_IP, 10, 1,
++ NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
++ NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
++ 0, 0, 0, 0,
++ },
++ {
++ NPC_ERRLEV_RE, NPC_EC_NOERR,
++ 6, 0, 0, 2, 0,
++ NPC_S_KPU5_CPT_IP6, 10, 1,
++ NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
++ NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
++ 0, 0, 0, 0,
++ },
+ {
+ NPC_ERRLEV_LB, NPC_EC_L2_K3,
+ 0, 0, 0, 0, 1,
+@@ -12425,6 +12146,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
+ 0,
+ 0, 0, 0, 0,
+ },
++ {
++ NPC_ERRLEV_RE, NPC_EC_NOERR,
++ 0, 0, 0, 0, 1,
++ NPC_S_NA, 0, 1,
++ NPC_LID_LC, NPC_LT_LC_NGIO,
++ 0,
++ 0, 0, 0, 0,
++ },
+ {
+ NPC_ERRLEV_LC, NPC_EC_UNK,
+ 0, 0, 0, 0, 1,
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
+index 9b8e59f4c206d..77cb52b80c60f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
+@@ -85,6 +85,8 @@ struct ptp *ptp_get(void)
+ /* Check driver is bound to PTP block */
+ if (!ptp)
+ ptp = ERR_PTR(-EPROBE_DEFER);
++ else
++ pci_dev_get(ptp->pdev);
+
+ return ptp;
+ }
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+index 07b0eafccad87..9ea2f6ac38ec1 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+@@ -29,6 +29,9 @@ static struct mac_ops rpm_mac_ops = {
+ .mac_get_pause_frm_status = rpm_lmac_get_pause_frm_status,
+ .mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm,
+ .mac_pause_frm_config = rpm_lmac_pause_frm_config,
++ .mac_enadis_ptp_config = rpm_lmac_ptp_config,
++ .mac_rx_tx_enable = rpm_lmac_rx_tx_enable,
++ .mac_tx_enable = rpm_lmac_tx_enable,
+ };
+
+ struct mac_ops *rpm_get_mac_ops(void)
+@@ -53,6 +56,43 @@ int rpm_get_nr_lmacs(void *rpmd)
+ return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL);
+ }
+
++int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable)
++{
++ rpm_t *rpm = rpmd;
++ u64 cfg, last;
++
++ if (!is_lmac_valid(rpm, lmac_id))
++ return -ENODEV;
++
++ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
++ last = cfg;
++ if (enable)
++ cfg |= RPM_TX_EN;
++ else
++ cfg &= ~(RPM_TX_EN);
++
++ if (cfg != last)
++ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
++ return !!(last & RPM_TX_EN);
++}
++
++int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable)
++{
++ rpm_t *rpm = rpmd;
++ u64 cfg;
++
++ if (!is_lmac_valid(rpm, lmac_id))
++ return -ENODEV;
++
++ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
++ if (enable)
++ cfg |= RPM_RX_EN | RPM_TX_EN;
++ else
++ cfg &= ~(RPM_RX_EN | RPM_TX_EN);
++ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
++ return 0;
++}
++
+ void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable)
+ {
+ rpm_t *rpm = rpmd;
+@@ -251,22 +291,35 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable)
+ if (!rpm || lmac_id >= rpm->lmac_count)
+ return -ENODEV;
+ lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id);
+- if (lmac_type == LMAC_MODE_100G_R) {
+- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
+-
+- if (enable)
+- cfg |= RPMX_MTI_PCS_LBK;
+- else
+- cfg &= ~RPMX_MTI_PCS_LBK;
+- rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
+- } else {
+- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1);
+- if (enable)
+- cfg |= RPMX_MTI_PCS_LBK;
+- else
+- cfg &= ~RPMX_MTI_PCS_LBK;
+- rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg);
++
++ if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) {
++ dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n");
++ return 0;
+ }
+
++ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
++
++ if (enable)
++ cfg |= RPMX_MTI_PCS_LBK;
++ else
++ cfg &= ~RPMX_MTI_PCS_LBK;
++ rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
++
+ return 0;
+ }
++
++void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable)
++{
++ rpm_t *rpm = rpmd;
++ u64 cfg;
++
++ if (!is_lmac_valid(rpm, lmac_id))
++ return;
++
++ cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_CFG);
++ if (enable)
++ cfg |= RPMX_RX_TS_PREPEND;
++ else
++ cfg &= ~RPMX_RX_TS_PREPEND;
++ rpm_write(rpm, lmac_id, RPMX_CMRX_CFG, cfg);
++}
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+index f0b069442dccb..ff580311edd03 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+@@ -14,6 +14,8 @@
+ #define PCI_DEVID_CN10K_RPM 0xA060
+
+ /* Registers */
++#define RPMX_CMRX_CFG 0x00
++#define RPMX_RX_TS_PREPEND BIT_ULL(22)
+ #define RPMX_CMRX_SW_INT 0x180
+ #define RPMX_CMRX_SW_INT_W1S 0x188
+ #define RPMX_CMRX_SW_INT_ENA_W1S 0x198
+@@ -41,6 +43,8 @@
+ #define RPMX_MTI_STAT_DATA_HI_CDC 0x10038
+
+ #define RPM_LMAC_FWI 0xa
++#define RPM_TX_EN BIT_ULL(0)
++#define RPM_RX_EN BIT_ULL(1)
+
+ /* Function Declarations */
+ int rpm_get_nr_lmacs(void *rpmd);
+@@ -54,4 +58,7 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
+ u8 rx_pause);
+ int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat);
+ int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat);
++void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable);
++int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable);
++int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable);
+ #endif /* RPM_H */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+index 35836903b7fbc..f64509b1d120c 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+@@ -520,8 +520,11 @@ static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
+
+ rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
+ err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+- if (err)
+- dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr);
++ if (err) {
++ dev_err(rvu->dev, "HW block:%d reset timeout retrying again\n", blkaddr);
++ while (rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true) == -EBUSY)
++ ;
++ }
+ }
+
+ static void rvu_reset_all_blocks(struct rvu *rvu)
+@@ -2193,7 +2196,7 @@ static inline void rvu_afvf_mbox_up_handler(struct work_struct *work)
+ }
+
+ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr,
+- int num, int type)
++ int num, int type, unsigned long *pf_bmap)
+ {
+ struct rvu_hwinfo *hw = rvu->hw;
+ int region;
+@@ -2205,6 +2208,9 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr,
+ */
+ if (type == TYPE_AFVF) {
+ for (region = 0; region < num; region++) {
++ if (!test_bit(region, pf_bmap))
++ continue;
++
+ if (hw->cap.per_pf_mbox_regs) {
+ bar4 = rvu_read64(rvu, BLKADDR_RVUM,
+ RVU_AF_PFX_BAR4_ADDR(0)) +
+@@ -2226,6 +2232,9 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr,
+ * RVU_AF_PF_BAR4_ADDR register.
+ */
+ for (region = 0; region < num; region++) {
++ if (!test_bit(region, pf_bmap))
++ continue;
++
+ if (hw->cap.per_pf_mbox_regs) {
+ bar4 = rvu_read64(rvu, BLKADDR_RVUM,
+ RVU_AF_PFX_BAR4_ADDR(region));
+@@ -2254,20 +2263,41 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+ int err = -EINVAL, i, dir, dir_up;
+ void __iomem *reg_base;
+ struct rvu_work *mwork;
++ unsigned long *pf_bmap;
+ void **mbox_regions;
+ const char *name;
++ u64 cfg;
+
+- mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL);
+- if (!mbox_regions)
++ pf_bmap = bitmap_zalloc(num, GFP_KERNEL);
++ if (!pf_bmap)
+ return -ENOMEM;
+
++ /* RVU VFs */
++ if (type == TYPE_AFVF)
++ bitmap_set(pf_bmap, 0, num);
++
++ if (type == TYPE_AFPF) {
++ /* Mark enabled PFs in bitmap */
++ for (i = 0; i < num; i++) {
++ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(i));
++ if (cfg & BIT_ULL(20))
++ set_bit(i, pf_bmap);
++ }
++ }
++
++ mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL);
++ if (!mbox_regions) {
++ err = -ENOMEM;
++ goto free_bitmap;
++ }
++
+ switch (type) {
+ case TYPE_AFPF:
+ name = "rvu_afpf_mailbox";
+ dir = MBOX_DIR_AFPF;
+ dir_up = MBOX_DIR_AFPF_UP;
+ reg_base = rvu->afreg_base;
+- err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFPF);
++ err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFPF, pf_bmap);
+ if (err)
+ goto free_regions;
+ break;
+@@ -2276,12 +2306,12 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+ dir = MBOX_DIR_PFVF;
+ dir_up = MBOX_DIR_PFVF_UP;
+ reg_base = rvu->pfreg_base;
+- err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFVF);
++ err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFVF, pf_bmap);
+ if (err)
+ goto free_regions;
+ break;
+ default:
+- return err;
++ goto free_regions;
+ }
+
+ mw->mbox_wq = alloc_workqueue(name,
+@@ -2307,16 +2337,19 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+ }
+
+ err = otx2_mbox_regions_init(&mw->mbox, mbox_regions, rvu->pdev,
+- reg_base, dir, num);
++ reg_base, dir, num, pf_bmap);
+ if (err)
+ goto exit;
+
+ err = otx2_mbox_regions_init(&mw->mbox_up, mbox_regions, rvu->pdev,
+- reg_base, dir_up, num);
++ reg_base, dir_up, num, pf_bmap);
+ if (err)
+ goto exit;
+
+ for (i = 0; i < num; i++) {
++ if (!test_bit(i, pf_bmap))
++ continue;
++
+ mwork = &mw->mbox_wrk[i];
+ mwork->rvu = rvu;
+ INIT_WORK(&mwork->work, mbox_handler);
+@@ -2325,8 +2358,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
+ mwork->rvu = rvu;
+ INIT_WORK(&mwork->work, mbox_up_handler);
+ }
+- kfree(mbox_regions);
+- return 0;
++ goto free_regions;
+
+ exit:
+ destroy_workqueue(mw->mbox_wq);
+@@ -2335,6 +2367,8 @@ unmap_regions:
+ iounmap((void __iomem *)mbox_regions[num]);
+ free_regions:
+ kfree(mbox_regions);
++free_bitmap:
++ bitmap_free(pf_bmap);
+ return err;
+ }
+
+@@ -2501,6 +2535,12 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc)
+ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NPA);
+ rvu_reset_lmt_map_tbl(rvu, pcifunc);
+ rvu_detach_rsrcs(rvu, NULL, pcifunc);
++ /* In scenarios where PF/VF drivers detach NIXLF without freeing MCAM
++ * entries, check and free the MCAM entries explicitly to avoid leak.
++ * Since LF is detached use LF number as -1.
++ */
++ rvu_npc_free_mcam_entries(rvu, pcifunc, -1);
++
+ mutex_unlock(&rvu->flr_lock);
+ }
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+index 1d9411232f1da..9d517e6dac2f0 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+@@ -23,6 +23,7 @@
+ #define PCI_DEVID_OCTEONTX2_LBK 0xA061
+
+ /* Subsystem Device ID */
++#define PCI_SUBSYS_DEVID_98XX 0xB100
+ #define PCI_SUBSYS_DEVID_96XX 0xB200
+ #define PCI_SUBSYS_DEVID_CN10K_A 0xB900
+
+@@ -220,6 +221,7 @@ struct rvu_pfvf {
+ u16 maxlen;
+ u16 minlen;
+
++ bool hw_rx_tstamp_en; /* Is rx_tstamp enabled */
+ u8 mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */
+ u8 default_mac[ETH_ALEN]; /* MAC address from FWdata */
+
+@@ -237,6 +239,7 @@ struct rvu_pfvf {
+ bool cgx_in_use; /* this PF/VF using CGX? */
+ int cgx_users; /* number of cgx users - used only by PFs */
+
++ int intf_mode;
+ u8 nix_blkaddr; /* BLKADDR_NIX0/1 assigned to this PF */
+ u8 nix_rx_intf; /* NIX0_RX/NIX1_RX interface to NPC */
+ u8 nix_tx_intf; /* NIX0_TX/NIX1_TX interface to NPC */
+@@ -612,6 +615,16 @@ static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan)
+ return rvu->hw->cpt_chan_base + chan;
+ }
+
++static inline bool is_rvu_supports_nix1(struct rvu *rvu)
++{
++ struct pci_dev *pdev = rvu->pdev;
++
++ if (pdev->subsystem_device == PCI_SUBSYS_DEVID_98XX)
++ return true;
++
++ return false;
++}
++
+ /* Function Prototypes
+ * RVU
+ */
+@@ -794,10 +807,12 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ int blkaddr, u16 src, struct mcam_entry *entry,
+ u8 *intf, u8 *ena);
++bool is_cgx_config_permitted(struct rvu *rvu, u16 pcifunc);
+ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature);
+ u32 rvu_cgx_get_fifolen(struct rvu *rvu);
+ void *rvu_first_cgx_pdata(struct rvu *rvu);
+ int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id);
++int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable);
+
+ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
+ int type);
+@@ -807,6 +822,9 @@ bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr,
+ /* CPT APIs */
+ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot);
+
++#define NDC_AF_BANK_MASK GENMASK_ULL(7, 0)
++#define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16)
++
+ /* CN10K RVU */
+ int rvu_set_channels_base(struct rvu *rvu);
+ void rvu_program_channels(struct rvu *rvu);
+@@ -822,9 +840,14 @@ static inline void rvu_dbg_init(struct rvu *rvu) {}
+ static inline void rvu_dbg_exit(struct rvu *rvu) {}
+ #endif
+
++int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr);
++
+ /* RVU Switch */
+ void rvu_switch_enable(struct rvu *rvu);
+ void rvu_switch_disable(struct rvu *rvu);
+ void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc);
+
++int rvu_npc_set_parse_mode(struct rvu *rvu, u16 pcifunc, u64 mode, u8 dir,
++ u64 pkind, u8 var_len_off, u8 var_len_off_mask,
++ u8 shift_dir);
+ #endif /* RVU_H */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+index 81e8ea9ee30ea..f4c7bb6bf053a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+@@ -111,7 +111,7 @@ static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf,
+ p2x = cgx_lmac_get_p2x(cgx_id, lmac_id);
+ /* Firmware sets P2X_SELECT as either NIX0 or NIX1 */
+ pfvf->nix_blkaddr = BLKADDR_NIX0;
+- if (p2x == CMR_P2X_SEL_NIX1)
++ if (is_rvu_supports_nix1(rvu) && p2x == CMR_P2X_SEL_NIX1)
+ pfvf->nix_blkaddr = BLKADDR_NIX1;
+ }
+
+@@ -411,7 +411,7 @@ int rvu_cgx_exit(struct rvu *rvu)
+ * VF's of mapped PF and other PFs are not allowed. This fn() checks
+ * whether a PFFUNC is permitted to do the config or not.
+ */
+-static bool is_cgx_config_permitted(struct rvu *rvu, u16 pcifunc)
++inline bool is_cgx_config_permitted(struct rvu *rvu, u16 pcifunc)
+ {
+ if ((pcifunc & RVU_PFVF_FUNC_MASK) ||
+ !is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc)))
+@@ -442,16 +442,26 @@ void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable)
+ int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
+ {
+ int pf = rvu_get_pf(pcifunc);
++ struct mac_ops *mac_ops;
+ u8 cgx_id, lmac_id;
++ void *cgxd;
+
+ if (!is_cgx_config_permitted(rvu, pcifunc))
+ return LMAC_AF_ERR_PERM_DENIED;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
++ cgxd = rvu_cgx_pdata(cgx_id, rvu);
++ mac_ops = get_mac_ops(cgxd);
+
+- cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start);
++ return mac_ops->mac_rx_tx_enable(cgxd, lmac_id, start);
++}
+
+- return 0;
++int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable)
++{
++ struct mac_ops *mac_ops;
++
++ mac_ops = get_mac_ops(cgxd);
++ return mac_ops->mac_tx_enable(cgxd, lmac_id, enable);
+ }
+
+ void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc)
+@@ -694,7 +704,9 @@ int rvu_mbox_handler_cgx_promisc_disable(struct rvu *rvu, struct msg_req *req,
+
+ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable)
+ {
++ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ int pf = rvu_get_pf(pcifunc);
++ struct mac_ops *mac_ops;
+ u8 cgx_id, lmac_id;
+ void *cgxd;
+
+@@ -711,13 +723,16 @@ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable)
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+ cgxd = rvu_cgx_pdata(cgx_id, rvu);
+
+- cgx_lmac_ptp_config(cgxd, lmac_id, enable);
++ mac_ops = get_mac_ops(cgxd);
++ mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, enable);
+ /* If PTP is enabled then inform NPC that packets to be
+ * parsed by this PF will have their data shifted by 8 bytes
+ * and if PTP is disabled then no shift is required
+ */
+ if (npc_config_ts_kpuaction(rvu, pf, pcifunc, enable))
+ return -EINVAL;
++ /* This flag is required to clean up CGX conf if app gets killed */
++ pfvf->hw_rx_tstamp_en = enable;
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+index 46a41cfff5751..25713287a288f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+@@ -60,13 +60,14 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
+ u64 iova, u64 *lmt_addr)
+ {
+ u64 pa, val, pf;
+- int err;
++ int err = 0;
+
+ if (!iova) {
+ dev_err(rvu->dev, "%s Requested Null address for transulation\n", __func__);
+ return -EINVAL;
+ }
+
++ mutex_lock(&rvu->rsrc_lock);
+ rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova);
+ pf = rvu_get_pf(pcifunc) & 0x1F;
+ val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 |
+@@ -76,12 +77,13 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
+ err = rvu_poll_reg(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_RSP_STS, BIT_ULL(0), false);
+ if (err) {
+ dev_err(rvu->dev, "%s LMTLINE iova transulation failed\n", __func__);
+- return err;
++ goto exit;
+ }
+ val = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_RSP_STS);
+ if (val & ~0x1ULL) {
+ dev_err(rvu->dev, "%s LMTLINE iova transulation failed err:%llx\n", __func__, val);
+- return -EIO;
++ err = -EIO;
++ goto exit;
+ }
+ /* PA[51:12] = RVU_AF_SMMU_TLN_FLIT0[57:18]
+ * PA[11:0] = IOVA[11:0]
+@@ -89,8 +91,9 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
+ pa = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TLN_FLIT0) >> 18;
+ pa &= GENMASK_ULL(39, 0);
+ *lmt_addr = (pa << 12) | (iova & 0xFFF);
+-
+- return 0;
++exit:
++ mutex_unlock(&rvu->rsrc_lock);
++ return err;
+ }
+
+ static int rvu_update_lmtaddr(struct rvu *rvu, u16 pcifunc, u64 lmt_addr)
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+index 1f90a7403392d..4895faa667b50 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+@@ -206,7 +206,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
+
+ blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
+ if (blkaddr < 0)
+- return blkaddr;
++ return false;
+
+ /* Registers that can be accessed from PF/VF */
+ if ((offset & 0xFF000) == CPT_AF_LFX_CTL(0) ||
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+index 49d822a98adab..4dddf6ec3be87 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+@@ -196,9 +196,6 @@ enum cpt_eng_type {
+ CPT_IE_TYPE = 3,
+ };
+
+-#define NDC_MAX_BANK(rvu, blk_addr) (rvu_read64(rvu, \
+- blk_addr, NDC_AF_CONST) & 0xFF)
+-
+ #define rvu_dbg_NULL NULL
+ #define rvu_dbg_open_NULL NULL
+
+@@ -441,6 +438,8 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused)
+ sprintf(lmac, "LMAC%d", lmac_id);
+ seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n",
+ dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac);
++
++ pci_dev_put(pdev);
+ }
+ return 0;
+ }
+@@ -1007,6 +1006,7 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr)
+ struct nix_hw *nix_hw;
+ struct rvu *rvu;
+ int bank, max_bank;
++ u64 ndc_af_const;
+
+ if (blk_addr == BLKADDR_NDC_NPA0) {
+ rvu = s->private;
+@@ -1015,7 +1015,8 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr)
+ rvu = nix_hw->rvu;
+ }
+
+- max_bank = NDC_MAX_BANK(rvu, blk_addr);
++ ndc_af_const = rvu_read64(rvu, blk_addr, NDC_AF_CONST);
++ max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const);
+ for (bank = 0; bank < max_bank; bank++) {
+ seq_printf(s, "BANK:%d\n", bank);
+ seq_printf(s, "\tHits:\t%lld\n",
+@@ -1131,6 +1132,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m,
+ seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n",
+ sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld);
+
++ seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n",
++ sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend);
+ seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb);
+ seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb);
+ seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb);
+@@ -2125,6 +2128,7 @@ static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id)
+ }
+ }
+
++ pci_dev_put(pdev);
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 6970540dc4709..1593efc4502b5 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -28,6 +28,7 @@ static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req,
+ static int nix_free_all_bandprof(struct rvu *rvu, u16 pcifunc);
+ static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw,
+ u32 leaf_prof);
++static const char *nix_get_ctx_name(int ctype);
+
+ enum mc_tbl_sz {
+ MC_TBL_SZ_256,
+@@ -511,11 +512,11 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+ lmac_chan_cnt = cfg & 0xFF;
+
+- cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+- sdp_chan_cnt = cfg & 0xFFF;
+-
+ cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
+ lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
++
++ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
++ sdp_chan_cnt = cfg & 0xFFF;
+ sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
+
+ pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+@@ -796,6 +797,7 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+ struct nix_aq_res_s *result;
+ int timeout = 1000;
+ u64 reg, head;
++ int ret;
+
+ result = (struct nix_aq_res_s *)aq->res->base;
+
+@@ -819,13 +821,41 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+ return -EBUSY;
+ }
+
+- if (result->compcode != NIX_AQ_COMP_GOOD)
++ if (result->compcode != NIX_AQ_COMP_GOOD) {
+ /* TODO: Replace this with some error code */
++ if (result->compcode == NIX_AQ_COMP_CTX_FAULT ||
++ result->compcode == NIX_AQ_COMP_LOCKERR ||
++ result->compcode == NIX_AQ_COMP_CTX_POISON) {
++ ret = rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_RX);
++ ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX0_TX);
++ ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_RX);
++ ret |= rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NIX1_TX);
++ if (ret)
++ dev_err(rvu->dev,
++ "%s: Not able to unlock cachelines\n", __func__);
++ }
++
+ return -EBUSY;
++ }
+
+ return 0;
+ }
+
++static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req,
++ u16 *smq, u16 *smq_mask)
++{
++ struct nix_cn10k_aq_enq_req *aq_req;
++
++ if (!is_rvu_otx2(rvu)) {
++ aq_req = (struct nix_cn10k_aq_enq_req *)req;
++ *smq = aq_req->sq.smq;
++ *smq_mask = aq_req->sq_mask.smq;
++ } else {
++ *smq = req->sq.smq;
++ *smq_mask = req->sq_mask.smq;
++ }
++}
++
+ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+ struct nix_aq_enq_req *req,
+ struct nix_aq_enq_rsp *rsp)
+@@ -837,6 +867,7 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+ struct rvu_block *block;
+ struct admin_queue *aq;
+ struct rvu_pfvf *pfvf;
++ u16 smq, smq_mask;
+ void *ctx, *mask;
+ bool ena;
+ u64 cfg;
+@@ -908,13 +939,14 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+ if (rc)
+ return rc;
+
++ nix_get_aq_req_smq(rvu, req, &smq, &smq_mask);
+ /* Check if SQ pointed SMQ belongs to this PF/VF or not */
+ if (req->ctype == NIX_AQ_CTYPE_SQ &&
+ ((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) ||
+ (req->op == NIX_AQ_INSTOP_WRITE &&
+- req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) {
++ req->sq_mask.ena && req->sq.ena && smq_mask))) {
+ if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
+- pcifunc, req->sq.smq))
++ pcifunc, smq))
+ return NIX_AF_ERR_AQ_ENQUEUE;
+ }
+
+@@ -1061,10 +1093,68 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+ return 0;
+ }
+
++static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw,
++ struct nix_aq_enq_req *req, u8 ctype)
++{
++ struct nix_cn10k_aq_enq_req aq_req;
++ struct nix_cn10k_aq_enq_rsp aq_rsp;
++ int rc, word;
++
++ if (req->ctype != NIX_AQ_CTYPE_CQ)
++ return 0;
++
++ rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp,
++ req->hdr.pcifunc, ctype, req->qidx);
++ if (rc) {
++ dev_err(rvu->dev,
++ "%s: Failed to fetch %s%d context of PFFUNC 0x%x\n",
++ __func__, nix_get_ctx_name(ctype), req->qidx,
++ req->hdr.pcifunc);
++ return rc;
++ }
++
++ /* Make copy of original context & mask which are required
++ * for resubmission
++ */
++ memcpy(&aq_req.cq_mask, &req->cq_mask, sizeof(struct nix_cq_ctx_s));
++ memcpy(&aq_req.cq, &req->cq, sizeof(struct nix_cq_ctx_s));
++
++ /* exclude fields which HW can update */
++ aq_req.cq_mask.cq_err = 0;
++ aq_req.cq_mask.wrptr = 0;
++ aq_req.cq_mask.tail = 0;
++ aq_req.cq_mask.head = 0;
++ aq_req.cq_mask.avg_level = 0;
++ aq_req.cq_mask.update_time = 0;
++ aq_req.cq_mask.substream = 0;
++
++ /* Context mask (cq_mask) holds mask value of fields which
++ * are changed in AQ WRITE operation.
++ * for example cq.drop = 0xa;
++ * cq_mask.drop = 0xff;
++ * Below logic performs '&' between cq and cq_mask so that non
++ * updated fields are masked out for request and response
++ * comparison
++ */
++ for (word = 0; word < sizeof(struct nix_cq_ctx_s) / sizeof(u64);
++ word++) {
++ *(u64 *)((u8 *)&aq_rsp.cq + word * 8) &=
++ (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8));
++ *(u64 *)((u8 *)&aq_req.cq + word * 8) &=
++ (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8));
++ }
++
++ if (memcmp(&aq_req.cq, &aq_rsp.cq, sizeof(struct nix_cq_ctx_s)))
++ return NIX_AF_ERR_AQ_CTX_RETRY_WRITE;
++
++ return 0;
++}
++
+ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+ struct nix_aq_enq_rsp *rsp)
+ {
+ struct nix_hw *nix_hw;
++ int err, retries = 5;
+ int blkaddr;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc);
+@@ -1075,7 +1165,24 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+ if (!nix_hw)
+ return NIX_AF_ERR_INVALID_NIXBLK;
+
+- return rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
++retry:
++ err = rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
++
++ /* HW errata 'AQ Modification to CQ could be discarded on heavy traffic'
++ * As a work around perfrom CQ context read after each AQ write. If AQ
++ * read shows AQ write is not updated perform AQ write again.
++ */
++ if (!err && req->op == NIX_AQ_INSTOP_WRITE) {
++ err = rvu_nix_verify_aq_ctx(rvu, nix_hw, req, NIX_AQ_CTYPE_CQ);
++ if (err == NIX_AF_ERR_AQ_CTX_RETRY_WRITE) {
++ if (retries--)
++ goto retry;
++ else
++ return NIX_AF_ERR_CQ_CTX_WRITE_ERR;
++ }
++ }
++
++ return err;
+ }
+
+ static const char *nix_get_ctx_name(int ctype)
+@@ -1795,7 +1902,8 @@ static int nix_check_txschq_alloc_req(struct rvu *rvu, int lvl, u16 pcifunc,
+ free_cnt = rvu_rsrc_free_count(&txsch->schq);
+ }
+
+- if (free_cnt < req_schq || req_schq > MAX_TXSCHQ_PER_FUNC)
++ if (free_cnt < req_schq || req->schq[lvl] > MAX_TXSCHQ_PER_FUNC ||
++ req->schq_contig[lvl] > MAX_TXSCHQ_PER_FUNC)
+ return NIX_AF_ERR_TLX_ALLOC_FAIL;
+
+ /* If contiguous queues are needed, check for availability */
+@@ -1992,8 +2100,8 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+ /* enable cgx tx if disabled */
+ if (is_pf_cgxmapped(rvu, pf)) {
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+- restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
+- lmac_id, true);
++ restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu),
++ lmac_id, true);
+ }
+
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
+@@ -2016,7 +2124,7 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+ rvu_cgx_enadis_rx_bp(rvu, pf, true);
+ /* restore cgx tx state */
+ if (restore_tx_en)
+- cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
++ rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+ return err;
+ }
+
+@@ -3802,7 +3910,7 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
+ /* Enable cgx tx if disabled for credits to be back */
+ if (is_pf_cgxmapped(rvu, pf)) {
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+- restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
++ restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu),
+ lmac_id, true);
+ }
+
+@@ -3815,8 +3923,8 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
+ NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
+ }
+
+- rc = -EBUSY;
+- poll_tmo = jiffies + usecs_to_jiffies(10000);
++ rc = NIX_AF_ERR_LINK_CREDITS;
++ poll_tmo = jiffies + usecs_to_jiffies(200000);
+ /* Wait for credits to return */
+ do {
+ if (time_after(jiffies, poll_tmo))
+@@ -3842,7 +3950,7 @@ exit:
+
+ /* Restore state of cgx tx */
+ if (restore_tx_en)
+- cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
++ rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+
+ mutex_unlock(&rvu->rsrc_lock);
+ return rc;
+@@ -3922,9 +4030,10 @@ rx_frscfg:
+ if (link < 0)
+ return NIX_AF_ERR_RX_LINK_INVALID;
+
+- nix_find_link_frs(rvu, req, pcifunc);
+
+ linkcfg:
++ nix_find_link_frs(rvu, req, pcifunc);
++
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link));
+ cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16);
+ if (req->update_minlen)
+@@ -3976,10 +4085,6 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req,
+
+ static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs)
+ {
+- /* CN10k supports 72KB FIFO size and max packet size of 64k */
+- if (rvu->hw->lbk_bufsize == 0x12000)
+- return (rvu->hw->lbk_bufsize - lbk_max_frs) / 16;
+-
+ return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */
+ }
+
+@@ -4443,6 +4548,10 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
+ {
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ struct hwctx_disable_req ctx_req;
++ int pf = rvu_get_pf(pcifunc);
++ struct mac_ops *mac_ops;
++ u8 cgx_id, lmac_id;
++ void *cgxd;
+ int err;
+
+ ctx_req.hdr.pcifunc = pcifunc;
+@@ -4479,6 +4588,22 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
+ dev_err(rvu->dev, "CQ ctx disable failed\n");
+ }
+
++ /* reset HW config done for Switch headers */
++ rvu_npc_set_parse_mode(rvu, pcifunc, OTX2_PRIV_FLAGS_DEFAULT,
++ (PKIND_TX | PKIND_RX), 0, 0, 0, 0);
++
++ /* Disabling CGX and NPC config done for PTP */
++ if (pfvf->hw_rx_tstamp_en) {
++ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
++ cgxd = rvu_cgx_pdata(cgx_id, rvu);
++ mac_ops = get_mac_ops(cgxd);
++ mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, false);
++ /* Undo NPC config done for PTP */
++ if (npc_config_ts_kpuaction(rvu, pf, pcifunc, false))
++ dev_err(rvu->dev, "NPC config for PTP failed\n");
++ pfvf->hw_rx_tstamp_en = false;
++ }
++
+ nix_ctx_free(rvu, pfvf);
+
+ nix_free_all_bandprof(rvu, pcifunc);
+@@ -4736,6 +4861,8 @@ static int nix_setup_ipolicers(struct rvu *rvu,
+ ipolicer->ref_count = devm_kcalloc(rvu->dev,
+ ipolicer->band_prof.max,
+ sizeof(u16), GFP_KERNEL);
++ if (!ipolicer->ref_count)
++ return -ENOMEM;
+ }
+
+ /* Set policer timeunit to 2us ie (19 + 1) * 100 nsec = 2us */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
+index 70bd036ed76e4..4f5ca5ab13a40 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
+@@ -4,7 +4,7 @@
+ * Copyright (C) 2018 Marvell.
+ *
+ */
+-
++#include <linux/bitfield.h>
+ #include <linux/module.h>
+ #include <linux/pci.h>
+
+@@ -42,9 +42,18 @@ static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+ return -EBUSY;
+ }
+
+- if (result->compcode != NPA_AQ_COMP_GOOD)
++ if (result->compcode != NPA_AQ_COMP_GOOD) {
+ /* TODO: Replace this with some error code */
++ if (result->compcode == NPA_AQ_COMP_CTX_FAULT ||
++ result->compcode == NPA_AQ_COMP_LOCKERR ||
++ result->compcode == NPA_AQ_COMP_CTX_POISON) {
++ if (rvu_ndc_fix_locked_cacheline(rvu, BLKADDR_NDC_NPA0))
++ dev_err(rvu->dev,
++ "%s: Not able to unlock cachelines\n", __func__);
++ }
++
+ return -EBUSY;
++ }
+
+ return 0;
+ }
+@@ -545,3 +554,48 @@ void rvu_npa_lf_teardown(struct rvu *rvu, u16 pcifunc, int npalf)
+
+ npa_ctx_free(rvu, pfvf);
+ }
++
++/* Due to an Hardware errata, in some corner cases, AQ context lock
++ * operations can result in a NDC way getting into an illegal state
++ * of not valid but locked.
++ *
++ * This API solves the problem by clearing the lock bit of the NDC block.
++ * The operation needs to be done for each line of all the NDC banks.
++ */
++int rvu_ndc_fix_locked_cacheline(struct rvu *rvu, int blkaddr)
++{
++ int bank, max_bank, line, max_line, err;
++ u64 reg, ndc_af_const;
++
++ /* Set the ENABLE bit(63) to '0' */
++ reg = rvu_read64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL);
++ rvu_write64(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, reg & GENMASK_ULL(62, 0));
++
++ /* Poll until the BUSY bits(47:32) are set to '0' */
++ err = rvu_poll_reg(rvu, blkaddr, NDC_AF_CAMS_RD_INTERVAL, GENMASK_ULL(47, 32), true);
++ if (err) {
++ dev_err(rvu->dev, "Timed out while polling for NDC CAM busy bits.\n");
++ return err;
++ }
++
++ ndc_af_const = rvu_read64(rvu, blkaddr, NDC_AF_CONST);
++ max_bank = FIELD_GET(NDC_AF_BANK_MASK, ndc_af_const);
++ max_line = FIELD_GET(NDC_AF_BANK_LINE_MASK, ndc_af_const);
++ for (bank = 0; bank < max_bank; bank++) {
++ for (line = 0; line < max_line; line++) {
++ /* Check if 'cache line valid bit(63)' is not set
++ * but 'cache line lock bit(60)' is set and on
++ * success, reset the lock bit(60).
++ */
++ reg = rvu_read64(rvu, blkaddr,
++ NDC_AF_BANKX_LINEX_METADATA(bank, line));
++ if (!(reg & BIT_ULL(63)) && (reg & BIT_ULL(60))) {
++ rvu_write64(rvu, blkaddr,
++ NDC_AF_BANKX_LINEX_METADATA(bank, line),
++ reg & ~BIT_ULL(60));
++ }
++ }
++ }
++
++ return 0;
++}
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+index 5efb4174e82df..d1249da7a18fb 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -402,6 +402,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
+ int blkaddr, int index, struct mcam_entry *entry,
+ bool *enable)
+ {
++ struct rvu_npc_mcam_rule *rule;
+ u16 owner, target_func;
+ struct rvu_pfvf *pfvf;
+ u64 rx_action;
+@@ -423,6 +424,12 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
+ test_bit(NIXLF_INITIALIZED, &pfvf->flags)))
+ *enable = false;
+
++ /* fix up not needed for the rules added by user(ntuple filters) */
++ list_for_each_entry(rule, &mcam->mcam_rules, list) {
++ if (rule->entry == index)
++ return;
++ }
++
+ /* copy VF default entry action to the VF mcam entry */
+ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
+ target_func);
+@@ -489,8 +496,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ }
+
+ /* PF installing VF rule */
+- if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries)
+- npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable);
++ if (is_npc_intf_rx(intf) && actindex < mcam->bmap_entries)
++ npc_fixup_vf_rule(rvu, mcam, blkaddr, actindex, entry, &enable);
+
+ /* Set 'action' */
+ rvu_write64(rvu, blkaddr,
+@@ -598,7 +605,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
+ struct npc_install_flow_req req = { 0 };
+ struct npc_install_flow_rsp rsp = { 0 };
+ struct npc_mcam *mcam = &rvu->hw->mcam;
+- struct nix_rx_action action;
++ struct nix_rx_action action = { 0 };
+ int blkaddr, index;
+
+ /* AF's and SDP VFs work in promiscuous mode */
+@@ -619,7 +626,6 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
+ *(u64 *)&action = npc_get_mcam_action(rvu, mcam,
+ blkaddr, index);
+ } else {
+- *(u64 *)&action = 0x00;
+ action.op = NIX_RX_ACTIONOP_UCAST;
+ action.pf_func = pcifunc;
+ }
+@@ -650,7 +656,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
+ struct npc_mcam *mcam = &rvu->hw->mcam;
+ struct rvu_hwinfo *hw = rvu->hw;
+ int blkaddr, ucast_idx, index;
+- struct nix_rx_action action;
++ struct nix_rx_action action = { 0 };
+ u64 relaxed_mask;
+
+ if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc))
+@@ -678,14 +684,14 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
+ blkaddr, ucast_idx);
+
+ if (action.op != NIX_RX_ACTIONOP_RSS) {
+- *(u64 *)&action = 0x00;
++ *(u64 *)&action = 0;
+ action.op = NIX_RX_ACTIONOP_UCAST;
+ }
+
+ /* RX_ACTION set to MCAST for CGX PF's */
+ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list &&
+ is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) {
+- *(u64 *)&action = 0x00;
++ *(u64 *)&action = 0;
+ action.op = NIX_RX_ACTIONOP_MCAST;
+ pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+ action.index = pfvf->promisc_mce_idx;
+@@ -825,7 +831,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+ struct rvu_hwinfo *hw = rvu->hw;
+ int blkaddr, ucast_idx, index;
+ u8 mac_addr[ETH_ALEN] = { 0 };
+- struct nix_rx_action action;
++ struct nix_rx_action action = { 0 };
+ struct rvu_pfvf *pfvf;
+ u16 vf_func;
+
+@@ -854,14 +860,14 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+ blkaddr, ucast_idx);
+
+ if (action.op != NIX_RX_ACTIONOP_RSS) {
+- *(u64 *)&action = 0x00;
++ *(u64 *)&action = 0;
+ action.op = NIX_RX_ACTIONOP_UCAST;
+ action.pf_func = pcifunc;
+ }
+
+ /* RX_ACTION set to MCAST for CGX PF's */
+ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list) {
+- *(u64 *)&action = 0x00;
++ *(u64 *)&action = 0;
+ action.op = NIX_RX_ACTIONOP_MCAST;
+ action.index = pfvf->mcast_mce_idx;
+ }
+@@ -916,7 +922,8 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ int blkaddr, u16 pcifunc, u64 rx_action)
+ {
+ int actindex, index, bank, entry;
+- bool enable;
++ struct rvu_npc_mcam_rule *rule;
++ bool enable, update;
+
+ if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+ return;
+@@ -924,6 +931,14 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ mutex_lock(&mcam->lock);
+ for (index = 0; index < mcam->bmap_entries; index++) {
+ if (mcam->entry2target_pffunc[index] == pcifunc) {
++ update = true;
++ /* update not needed for the rules added via ntuple filters */
++ list_for_each_entry(rule, &mcam->mcam_rules, list) {
++ if (rule->entry == index)
++ update = false;
++ }
++ if (!update)
++ continue;
+ bank = npc_get_bank(mcam, index);
+ actindex = index;
+ entry = index & (mcam->banksize - 1);
+@@ -1081,6 +1096,9 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
+
+ void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
+ {
++ if (nixlf < 0)
++ return;
++
+ npc_enadis_default_entries(rvu, pcifunc, nixlf, false);
+
+ /* Delete multicast and promisc MCAM entries */
+@@ -1092,6 +1110,9 @@ void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
+
+ void rvu_npc_enable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
+ {
++ if (nixlf < 0)
++ return;
++
+ /* Enables only broadcast match entry. Promisc/Allmulti are enabled
+ * in set_rx_mode mbox handler.
+ */
+@@ -1635,7 +1656,7 @@ static void npc_load_kpu_profile(struct rvu *rvu)
+ * Firmware database method.
+ * Default KPU profile.
+ */
+- if (!request_firmware(&fw, kpu_profile, rvu->dev)) {
++ if (!request_firmware_direct(&fw, kpu_profile, rvu->dev)) {
+ dev_info(rvu->dev, "Loading KPU profile from firmware: %s\n",
+ kpu_profile);
+ rvu->kpu_fwdata = kzalloc(fw->size, GFP_KERNEL);
+@@ -1900,6 +1921,7 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr)
+
+ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr)
+ {
++ struct npc_mcam_kex *mkex = rvu->kpu.mkex;
+ struct npc_mcam *mcam = &rvu->hw->mcam;
+ struct rvu_hwinfo *hw = rvu->hw;
+ u64 nibble_ena, rx_kex, tx_kex;
+@@ -1912,15 +1934,15 @@ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr)
+ mcam->counters.max--;
+ mcam->rx_miss_act_cntr = mcam->counters.max;
+
+- rx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_RX];
+- tx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_TX];
++ rx_kex = mkex->keyx_cfg[NIX_INTF_RX];
++ tx_kex = mkex->keyx_cfg[NIX_INTF_TX];
+ nibble_ena = FIELD_GET(NPC_PARSE_NIBBLE, rx_kex);
+
+ nibble_ena = rvu_npc_get_tx_nibble_cfg(rvu, nibble_ena);
+ if (nibble_ena) {
+ tx_kex &= ~NPC_PARSE_NIBBLE;
+ tx_kex |= FIELD_PREP(NPC_PARSE_NIBBLE, nibble_ena);
+- npc_mkex_default.keyx_cfg[NIX_INTF_TX] = tx_kex;
++ mkex->keyx_cfg[NIX_INTF_TX] = tx_kex;
+ }
+
+ /* Configure RX interfaces */
+@@ -3167,6 +3189,102 @@ int rvu_mbox_handler_npc_get_kex_cfg(struct rvu *rvu, struct msg_req *req,
+ return 0;
+ }
+
++static int
++npc_set_var_len_offset_pkind(struct rvu *rvu, u16 pcifunc, u64 pkind,
++ u8 var_len_off, u8 var_len_off_mask, u8 shift_dir)
++{
++ struct npc_kpu_action0 *act0;
++ u8 shift_count = 0;
++ int blkaddr;
++ u64 val;
++
++ if (!var_len_off_mask)
++ return -EINVAL;
++
++ if (var_len_off_mask != 0xff) {
++ if (shift_dir)
++ shift_count = __ffs(var_len_off_mask);
++ else
++ shift_count = (8 - __fls(var_len_off_mask));
++ }
++ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, pcifunc);
++ if (blkaddr < 0) {
++ dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__);
++ return -EINVAL;
++ }
++ val = rvu_read64(rvu, blkaddr, NPC_AF_PKINDX_ACTION0(pkind));
++ act0 = (struct npc_kpu_action0 *)&val;
++ act0->var_len_shift = shift_count;
++ act0->var_len_right = shift_dir;
++ act0->var_len_mask = var_len_off_mask;
++ act0->var_len_offset = var_len_off;
++ rvu_write64(rvu, blkaddr, NPC_AF_PKINDX_ACTION0(pkind), val);
++ return 0;
++}
++
++int rvu_npc_set_parse_mode(struct rvu *rvu, u16 pcifunc, u64 mode, u8 dir,
++ u64 pkind, u8 var_len_off, u8 var_len_off_mask,
++ u8 shift_dir)
++
++{
++ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
++ int blkaddr, nixlf, rc, intf_mode;
++ int pf = rvu_get_pf(pcifunc);
++ u64 rxpkind, txpkind;
++ u8 cgx_id, lmac_id;
++
++ /* use default pkind to disable edsa/higig */
++ rxpkind = rvu_npc_get_pkind(rvu, pf);
++ txpkind = NPC_TX_DEF_PKIND;
++ intf_mode = NPC_INTF_MODE_DEF;
++
++ if (mode & OTX2_PRIV_FLAGS_CUSTOM) {
++ if (pkind == NPC_RX_CUSTOM_PRE_L2_PKIND) {
++ rc = npc_set_var_len_offset_pkind(rvu, pcifunc, pkind,
++ var_len_off,
++ var_len_off_mask,
++ shift_dir);
++ if (rc)
++ return rc;
++ }
++ rxpkind = pkind;
++ txpkind = pkind;
++ }
++
++ if (dir & PKIND_RX) {
++ /* rx pkind set req valid only for cgx mapped PFs */
++ if (!is_cgx_config_permitted(rvu, pcifunc))
++ return 0;
++ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
++
++ rc = cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id,
++ rxpkind);
++ if (rc)
++ return rc;
++ }
++
++ if (dir & PKIND_TX) {
++ /* Tx pkind set request valid if PCIFUNC has NIXLF attached */
++ rc = nix_get_nixlf(rvu, pcifunc, &nixlf, &blkaddr);
++ if (rc)
++ return rc;
++
++ rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_PARSE_CFG(nixlf),
++ txpkind);
++ }
++
++ pfvf->intf_mode = intf_mode;
++ return 0;
++}
++
++int rvu_mbox_handler_npc_set_pkind(struct rvu *rvu, struct npc_set_pkind *req,
++ struct msg_rsp *rsp)
++{
++ return rvu_npc_set_parse_mode(rvu, req->hdr.pcifunc, req->mode,
++ req->dir, req->pkind, req->var_len_off,
++ req->var_len_off_mask, req->shift_dir);
++}
++
+ int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu,
+ struct msg_req *req,
+ struct npc_mcam_read_base_rule_rsp *rsp)
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+index 51ddc7b81d0bd..750aaa1676878 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+@@ -445,7 +445,8 @@ do { \
+ NPC_SCAN_HDR(NPC_VLAN_TAG1, NPC_LID_LB, NPC_LT_LB_CTAG, 2, 2);
+ NPC_SCAN_HDR(NPC_VLAN_TAG2, NPC_LID_LB, NPC_LT_LB_STAG_QINQ, 2, 2);
+ NPC_SCAN_HDR(NPC_DMAC, NPC_LID_LA, la_ltype, la_start, 6);
+- NPC_SCAN_HDR(NPC_SMAC, NPC_LID_LA, la_ltype, la_start, 6);
++ /* SMAC follows the DMAC(which is 6 bytes) */
++ NPC_SCAN_HDR(NPC_SMAC, NPC_LID_LA, la_ltype, la_start + 6, 6);
+ /* PF_FUNC is 2 bytes at 0th byte of NPC_LT_LA_IH_NIX_ETHER */
+ NPC_SCAN_HDR(NPC_PF_FUNC, NPC_LID_LA, NPC_LT_LA_IH_NIX_ETHER, 0, 2);
+ }
+@@ -1098,14 +1099,6 @@ find_rule:
+ write_req.cntr = rule->cntr;
+ }
+
+- err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
+- &write_rsp);
+- if (err) {
+- rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
+- if (new)
+- kfree(rule);
+- return err;
+- }
+ /* update rule */
+ memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet));
+ memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask));
+@@ -1129,6 +1122,18 @@ find_rule:
+ if (req->default_rule)
+ pfvf->def_ucast_rule = rule;
+
++ /* write to mcam entry registers */
++ err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
++ &write_rsp);
++ if (err) {
++ rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
++ if (new) {
++ list_del(&rule->list);
++ kfree(rule);
++ }
++ return err;
++ }
++
+ /* VF's MAC address is being changed via PF */
+ if (pf_set_vfs_mac) {
+ ether_addr_copy(pfvf->default_mac, req->packet.dmac);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+index 21f1ed4e222f7..d81b63a0d430f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+@@ -670,6 +670,7 @@
+ #define NDC_AF_INTR_ENA_W1S (0x00068)
+ #define NDC_AF_INTR_ENA_W1C (0x00070)
+ #define NDC_AF_ACTIVE_PC (0x00078)
++#define NDC_AF_CAMS_RD_INTERVAL (0x00080)
+ #define NDC_AF_BP_TEST_ENABLE (0x001F8)
+ #define NDC_AF_BP_TEST(a) (0x00200 | (a) << 3)
+ #define NDC_AF_BLK_RST (0x002F0)
+@@ -685,6 +686,8 @@
+ (0x00F00 | (a) << 5 | (b) << 4)
+ #define NDC_AF_BANKX_HIT_PC(a) (0x01000 | (a) << 3)
+ #define NDC_AF_BANKX_MISS_PC(a) (0x01100 | (a) << 3)
++#define NDC_AF_BANKX_LINEX_METADATA(a, b) \
++ (0x10000 | (a) << 12 | (b) << 3)
+
+ /* LBK */
+ #define LBK_CONST (0x10ull)
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
+index b04fb226f708a..ae50d56258ec6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
+@@ -62,15 +62,18 @@ int rvu_sdp_init(struct rvu *rvu)
+ pfvf->sdp_info = devm_kzalloc(rvu->dev,
+ sizeof(struct sdp_node_info),
+ GFP_KERNEL);
+- if (!pfvf->sdp_info)
++ if (!pfvf->sdp_info) {
++ pci_dev_put(pdev);
+ return -ENOMEM;
++ }
+
+ dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]);
+
+- put_device(&pdev->dev);
+ i++;
+ }
+
++ pci_dev_put(pdev);
++
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+index 78df173e6df24..b743646993ca2 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+@@ -188,7 +188,7 @@ static int otx2_hw_get_mac_addr(struct otx2_nic *pfvf,
+ return PTR_ERR(msghdr);
+ }
+ rsp = (struct nix_get_mac_addr_rsp *)msghdr;
+- ether_addr_copy(netdev->dev_addr, rsp->mac_addr);
++ eth_hw_addr_set(netdev, rsp->mac_addr);
+ mutex_unlock(&pfvf->mbox.lock);
+
+ return 0;
+@@ -631,6 +631,12 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
+ req->num_regs++;
+ req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
+ req->regval[1] = dwrr_val;
++ if (lvl == hw->txschq_link_cfg_lvl) {
++ req->num_regs++;
++ req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, hw->tx_link);
++ /* Enable this queue and backpressure */
++ req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
++ }
+ } else if (lvl == NIX_TXSCH_LVL_TL2) {
+ parent = hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
+ req->reg[0] = NIX_AF_TL2X_PARENT(schq);
+@@ -640,11 +646,12 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
+ req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
+ req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
+
+- req->num_regs++;
+- req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, hw->tx_link);
+- /* Enable this queue and backpressure */
+- req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
+-
++ if (lvl == hw->txschq_link_cfg_lvl) {
++ req->num_regs++;
++ req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, hw->tx_link);
++ /* Enable this queue and backpressure */
++ req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
++ }
+ } else if (lvl == NIX_TXSCH_LVL_TL1) {
+ /* Default config for TL1.
+ * For VF this is always ignored.
+@@ -1006,6 +1013,9 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf)
+ return err;
+ }
+
++ pfvf->cq_op_addr = (__force u64 *)otx2_get_regaddr(pfvf,
++ NIX_LF_CQ_OP_STATUS);
++
+ /* Initialize work queue for receive buffer refill */
+ pfvf->refill_wrk = devm_kcalloc(pfvf->dev, pfvf->qset.cq_cnt,
+ sizeof(struct refill_work), GFP_KERNEL);
+@@ -1312,18 +1322,23 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+ sq = &qset->sq[qidx];
+ sq->sqb_count = 0;
+ sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL);
+- if (!sq->sqb_ptrs)
+- return -ENOMEM;
++ if (!sq->sqb_ptrs) {
++ err = -ENOMEM;
++ goto err_mem;
++ }
+
+ for (ptr = 0; ptr < num_sqbs; ptr++) {
+- if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
+- return -ENOMEM;
++ err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
++ if (err)
++ goto err_mem;
+ pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
+ sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
+ }
+ }
+
+- return 0;
++err_mem:
++ return err ? -ENOMEM : 0;
++
+ fail:
+ otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+ otx2_aura_pool_free(pfvf);
+@@ -1366,13 +1381,13 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
+ for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
+ pool = &pfvf->qset.pool[pool_id];
+ for (ptr = 0; ptr < num_ptrs; ptr++) {
+- if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
++ err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
++ if (err)
+ return -ENOMEM;
+ pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
+ bufptr + OTX2_HEAD_ROOM);
+ }
+ }
+-
+ return 0;
+ fail:
+ otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+@@ -1563,6 +1578,8 @@ void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf,
+ for (schq = 0; schq < rsp->schq[lvl]; schq++)
+ pf->hw.txschq_list[lvl][schq] =
+ rsp->schq_list[lvl][schq];
++
++ pf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl;
+ }
+ EXPORT_SYMBOL(mbox_handler_nix_txsch_alloc);
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+index a51ecd771d075..e685628b92942 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+@@ -182,6 +182,7 @@ struct otx2_hw {
+ u16 sqb_size;
+
+ /* NIX */
++ u8 txschq_link_cfg_lvl;
+ u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+ u16 matchall_ipolicer;
+ u32 dwrr_mtu;
+@@ -336,6 +337,7 @@ struct otx2_nic {
+ #define OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED BIT_ULL(13)
+ #define OTX2_FLAG_DMACFLTR_SUPPORT BIT_ULL(14)
+ u64 flags;
++ u64 *cq_op_addr;
+
+ struct otx2_qset qset;
+ struct otx2_hw hw;
+@@ -591,6 +593,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
+ size++;
+ tar_addr |= ((size - 1) & 0x7) << 4;
+ }
++ dma_wmb();
+ memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
+ /* Perform LMTST flush */
+ cn10k_lmt_flush(val, tar_addr);
+@@ -602,8 +605,10 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
+ u64 ptrs[2];
+
+ ptrs[1] = buf;
++ get_cpu();
+ /* Free only one buffer at time during init and teardown */
+ __cn10k_aura_freeptr(pfvf, aura, ptrs, 2);
++ put_cpu();
+ }
+
+ /* Alloc pointer from pool/aura */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+index 77a13fb555fb6..483f660cebc40 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+@@ -748,7 +748,7 @@ static int otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp,
+
+ /* NPC profile doesn't extract AH/ESP header fields */
+ if ((ah_esp_mask->spi & ah_esp_hdr->spi) ||
+- (ah_esp_mask->tclass & ah_esp_mask->tclass))
++ (ah_esp_mask->tclass & ah_esp_hdr->tclass))
+ return -EOPNOTSUPP;
+
+ if (flow_type == AH_V6_FLOW)
+@@ -827,6 +827,14 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
+ return -EINVAL;
+
+ vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype);
++
++ /* Drop rule with vlan_etype == 802.1Q
++ * and vlan_id == 0 is not supported
++ */
++ if (vlan_etype == ETH_P_8021Q && !fsp->m_ext.vlan_tci &&
++ fsp->ring_cookie == RX_CLS_FLOW_DISC)
++ return -EINVAL;
++
+ /* Only ETH_P_8021Q and ETH_P_802AD types supported */
+ if (vlan_etype != ETH_P_8021Q &&
+ vlan_etype != ETH_P_8021AD)
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 53df7fff92c40..8fc4ecc4f7140 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -13,6 +13,7 @@
+ #include <linux/if_vlan.h>
+ #include <linux/iommu.h>
+ #include <net/ip.h>
++#include <linux/bitfield.h>
+
+ #include "otx2_reg.h"
+ #include "otx2_common.h"
+@@ -386,7 +387,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf,
+ dst_mdev->msg_size = mbox_hdr->msg_size;
+ dst_mdev->num_msgs = num_msgs;
+ err = otx2_sync_mbox_msg(dst_mbox);
+- if (err) {
++ /* Error code -EIO indicate there is a communication failure
++ * to the AF. Rest of the error codes indicate that AF processed
++ * VF messages and set the error codes in response messages
++ * (if any) so simply forward responses to VF.
++ */
++ if (err == -EIO) {
+ dev_warn(pf->dev,
+ "AF not responding to VF%d messages\n", vf);
+ /* restore PF mbase and exit */
+@@ -1148,6 +1154,59 @@ int otx2_set_real_num_queues(struct net_device *netdev,
+ }
+ EXPORT_SYMBOL(otx2_set_real_num_queues);
+
++static char *nix_sqoperr_e_str[NIX_SQOPERR_MAX] = {
++ "NIX_SQOPERR_OOR",
++ "NIX_SQOPERR_CTX_FAULT",
++ "NIX_SQOPERR_CTX_POISON",
++ "NIX_SQOPERR_DISABLED",
++ "NIX_SQOPERR_SIZE_ERR",
++ "NIX_SQOPERR_OFLOW",
++ "NIX_SQOPERR_SQB_NULL",
++ "NIX_SQOPERR_SQB_FAULT",
++ "NIX_SQOPERR_SQE_SZ_ZERO",
++};
++
++static char *nix_mnqerr_e_str[NIX_MNQERR_MAX] = {
++ "NIX_MNQERR_SQ_CTX_FAULT",
++ "NIX_MNQERR_SQ_CTX_POISON",
++ "NIX_MNQERR_SQB_FAULT",
++ "NIX_MNQERR_SQB_POISON",
++ "NIX_MNQERR_TOTAL_ERR",
++ "NIX_MNQERR_LSO_ERR",
++ "NIX_MNQERR_CQ_QUERY_ERR",
++ "NIX_MNQERR_MAX_SQE_SIZE_ERR",
++ "NIX_MNQERR_MAXLEN_ERR",
++ "NIX_MNQERR_SQE_SIZEM1_ZERO",
++};
++
++static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] = {
++ "NIX_SND_STATUS_GOOD",
++ "NIX_SND_STATUS_SQ_CTX_FAULT",
++ "NIX_SND_STATUS_SQ_CTX_POISON",
++ "NIX_SND_STATUS_SQB_FAULT",
++ "NIX_SND_STATUS_SQB_POISON",
++ "NIX_SND_STATUS_HDR_ERR",
++ "NIX_SND_STATUS_EXT_ERR",
++ "NIX_SND_STATUS_JUMP_FAULT",
++ "NIX_SND_STATUS_JUMP_POISON",
++ "NIX_SND_STATUS_CRC_ERR",
++ "NIX_SND_STATUS_IMM_ERR",
++ "NIX_SND_STATUS_SG_ERR",
++ "NIX_SND_STATUS_MEM_ERR",
++ "NIX_SND_STATUS_INVALID_SUBDC",
++ "NIX_SND_STATUS_SUBDC_ORDER_ERR",
++ "NIX_SND_STATUS_DATA_FAULT",
++ "NIX_SND_STATUS_DATA_POISON",
++ "NIX_SND_STATUS_NPC_DROP_ACTION",
++ "NIX_SND_STATUS_LOCK_VIOL",
++ "NIX_SND_STATUS_NPC_UCAST_CHAN_ERR",
++ "NIX_SND_STATUS_NPC_MCAST_CHAN_ERR",
++ "NIX_SND_STATUS_NPC_MCAST_ABORT",
++ "NIX_SND_STATUS_NPC_VTAG_PTR_ERR",
++ "NIX_SND_STATUS_NPC_VTAG_SIZE_ERR",
++ "NIX_SND_STATUS_SEND_STATS_ERR",
++};
++
+ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+ {
+ struct otx2_nic *pf = data;
+@@ -1181,46 +1240,67 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+
+ /* SQ */
+ for (qidx = 0; qidx < pf->hw.tx_queues; qidx++) {
++ u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg;
++ u8 sq_op_err_code, mnq_err_code, snd_err_code;
++
++ /* Below debug registers captures first errors corresponding to
++ * those registers. We don't have to check against SQ qid as
++ * these are fatal errors.
++ */
++
+ ptr = otx2_get_regaddr(pf, NIX_LF_SQ_OP_INT);
+ val = otx2_atomic64_add((qidx << 44), ptr);
+ otx2_write64(pf, NIX_LF_SQ_OP_INT, (qidx << 44) |
+ (val & NIX_SQINT_BITS));
+
+- if (!(val & (NIX_SQINT_BITS | BIT_ULL(42))))
+- continue;
+-
+ if (val & BIT_ULL(42)) {
+ netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+ qidx, otx2_read64(pf, NIX_LF_ERR_INT));
+- } else {
+- if (val & BIT_ULL(NIX_SQINT_LMT_ERR)) {
+- netdev_err(pf->netdev, "SQ%lld: LMT store error NIX_LF_SQ_OP_ERR_DBG:0x%llx",
+- qidx,
+- otx2_read64(pf,
+- NIX_LF_SQ_OP_ERR_DBG));
+- otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG,
+- BIT_ULL(44));
+- }
+- if (val & BIT_ULL(NIX_SQINT_MNQ_ERR)) {
+- netdev_err(pf->netdev, "SQ%lld: Meta-descriptor enqueue error NIX_LF_MNQ_ERR_DGB:0x%llx\n",
+- qidx,
+- otx2_read64(pf, NIX_LF_MNQ_ERR_DBG));
+- otx2_write64(pf, NIX_LF_MNQ_ERR_DBG,
+- BIT_ULL(44));
+- }
+- if (val & BIT_ULL(NIX_SQINT_SEND_ERR)) {
+- netdev_err(pf->netdev, "SQ%lld: Send error, NIX_LF_SEND_ERR_DBG 0x%llx",
+- qidx,
+- otx2_read64(pf,
+- NIX_LF_SEND_ERR_DBG));
+- otx2_write64(pf, NIX_LF_SEND_ERR_DBG,
+- BIT_ULL(44));
+- }
+- if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
+- netdev_err(pf->netdev, "SQ%lld: SQB allocation failed",
+- qidx);
++ goto done;
++ }
++
++ sq_op_err_dbg = otx2_read64(pf, NIX_LF_SQ_OP_ERR_DBG);
++ if (!(sq_op_err_dbg & BIT(44)))
++ goto chk_mnq_err_dbg;
++
++ sq_op_err_code = FIELD_GET(GENMASK(7, 0), sq_op_err_dbg);
++ netdev_err(pf->netdev, "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(%llx) err=%s\n",
++ qidx, sq_op_err_dbg, nix_sqoperr_e_str[sq_op_err_code]);
++
++ otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, BIT_ULL(44));
++
++ if (sq_op_err_code == NIX_SQOPERR_SQB_NULL)
++ goto chk_mnq_err_dbg;
++
++ /* Err is not NIX_SQOPERR_SQB_NULL, call aq function to read SQ structure.
++ * TODO: But we are in irq context. How to call mbox functions which does sleep
++ */
++
++chk_mnq_err_dbg:
++ mnq_err_dbg = otx2_read64(pf, NIX_LF_MNQ_ERR_DBG);
++ if (!(mnq_err_dbg & BIT(44)))
++ goto chk_snd_err_dbg;
++
++ mnq_err_code = FIELD_GET(GENMASK(7, 0), mnq_err_dbg);
++ netdev_err(pf->netdev, "SQ%lld: NIX_LF_MNQ_ERR_DBG(%llx) err=%s\n",
++ qidx, mnq_err_dbg, nix_mnqerr_e_str[mnq_err_code]);
++ otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, BIT_ULL(44));
++
++chk_snd_err_dbg:
++ snd_err_dbg = otx2_read64(pf, NIX_LF_SEND_ERR_DBG);
++ if (snd_err_dbg & BIT(44)) {
++ snd_err_code = FIELD_GET(GENMASK(7, 0), snd_err_dbg);
++ netdev_err(pf->netdev, "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s\n",
++ qidx, snd_err_dbg, nix_snd_status_e_str[snd_err_code]);
++ otx2_write64(pf, NIX_LF_SEND_ERR_DBG, BIT_ULL(44));
+ }
+
++done:
++ /* Print values and reset */
++ if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
++ netdev_err(pf->netdev, "SQ%lld: SQB allocation failed",
++ qidx);
++
+ schedule_work(&pf->reset_task);
+ }
+
+@@ -1350,8 +1430,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
+ if (err)
+ goto err_free_npa_lf;
+
+- /* Enable backpressure */
+- otx2_nix_config_bp(pf, true);
++ /* Enable backpressure for CGX mapped PF/VFs */
++ if (!is_otx2_lbkvf(pf->pdev))
++ otx2_nix_config_bp(pf, true);
+
+ /* Init Auras and pools used by NIX RQ, for free buffer ptrs */
+ err = otx2_rq_aura_pool_init(pf);
+@@ -1493,6 +1574,44 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
+ mutex_unlock(&mbox->lock);
+ }
+
++static void otx2_do_set_rx_mode(struct otx2_nic *pf)
++{
++ struct net_device *netdev = pf->netdev;
++ struct nix_rx_mode *req;
++ bool promisc = false;
++
++ if (!(netdev->flags & IFF_UP))
++ return;
++
++ if ((netdev->flags & IFF_PROMISC) ||
++ (netdev_uc_count(netdev) > OTX2_MAX_UNICAST_FLOWS)) {
++ promisc = true;
++ }
++
++ /* Write unicast address to mcam entries or del from mcam */
++ if (!promisc && netdev->priv_flags & IFF_UNICAST_FLT)
++ __dev_uc_sync(netdev, otx2_add_macfilter, otx2_del_macfilter);
++
++ mutex_lock(&pf->mbox.lock);
++ req = otx2_mbox_alloc_msg_nix_set_rx_mode(&pf->mbox);
++ if (!req) {
++ mutex_unlock(&pf->mbox.lock);
++ return;
++ }
++
++ req->mode = NIX_RX_MODE_UCAST;
++
++ if (promisc)
++ req->mode |= NIX_RX_MODE_PROMISC;
++ if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
++ req->mode |= NIX_RX_MODE_ALLMULTI;
++
++ req->mode |= NIX_RX_MODE_USE_MCE;
++
++ otx2_sync_mbox_msg(&pf->mbox);
++ mutex_unlock(&pf->mbox.lock);
++}
++
+ int otx2_open(struct net_device *netdev)
+ {
+ struct otx2_nic *pf = netdev_priv(netdev);
+@@ -1643,11 +1762,22 @@ int otx2_open(struct net_device *netdev)
+ otx2_dmacflt_reinstall_flows(pf);
+
+ err = otx2_rxtx_enable(pf, true);
+- if (err)
++ /* If a mbox communication error happens at this point then interface
++ * will end up in a state such that it is in down state but hardware
++ * mcam entries are enabled to receive the packets. Hence disable the
++ * packet I/O.
++ */
++ if (err == EIO)
++ goto err_disable_rxtx;
++ else if (err)
+ goto err_tx_stop_queues;
+
++ otx2_do_set_rx_mode(pf);
++
+ return 0;
+
++err_disable_rxtx:
++ otx2_rxtx_enable(pf, false);
+ err_tx_stop_queues:
+ netif_tx_stop_all_queues(netdev);
+ netif_carrier_off(netdev);
+@@ -1791,43 +1921,11 @@ static void otx2_set_rx_mode(struct net_device *netdev)
+ queue_work(pf->otx2_wq, &pf->rx_mode_work);
+ }
+
+-static void otx2_do_set_rx_mode(struct work_struct *work)
++static void otx2_rx_mode_wrk_handler(struct work_struct *work)
+ {
+ struct otx2_nic *pf = container_of(work, struct otx2_nic, rx_mode_work);
+- struct net_device *netdev = pf->netdev;
+- struct nix_rx_mode *req;
+- bool promisc = false;
+
+- if (!(netdev->flags & IFF_UP))
+- return;
+-
+- if ((netdev->flags & IFF_PROMISC) ||
+- (netdev_uc_count(netdev) > OTX2_MAX_UNICAST_FLOWS)) {
+- promisc = true;
+- }
+-
+- /* Write unicast address to mcam entries or del from mcam */
+- if (!promisc && netdev->priv_flags & IFF_UNICAST_FLT)
+- __dev_uc_sync(netdev, otx2_add_macfilter, otx2_del_macfilter);
+-
+- mutex_lock(&pf->mbox.lock);
+- req = otx2_mbox_alloc_msg_nix_set_rx_mode(&pf->mbox);
+- if (!req) {
+- mutex_unlock(&pf->mbox.lock);
+- return;
+- }
+-
+- req->mode = NIX_RX_MODE_UCAST;
+-
+- if (promisc)
+- req->mode |= NIX_RX_MODE_PROMISC;
+- if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
+- req->mode |= NIX_RX_MODE_ALLMULTI;
+-
+- req->mode |= NIX_RX_MODE_USE_MCE;
+-
+- otx2_sync_mbox_msg(&pf->mbox);
+- mutex_unlock(&pf->mbox.lock);
++ otx2_do_set_rx_mode(pf);
+ }
+
+ static int otx2_set_features(struct net_device *netdev,
+@@ -2358,7 +2456,7 @@ static int otx2_wq_init(struct otx2_nic *pf)
+ if (!pf->otx2_wq)
+ return -ENOMEM;
+
+- INIT_WORK(&pf->rx_mode_work, otx2_do_set_rx_mode);
++ INIT_WORK(&pf->rx_mode_work, otx2_rx_mode_wrk_handler);
+ INIT_WORK(&pf->reset_task, otx2_reset_task);
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
+index 4bbd12ff26e64..e5f30fd778fc1 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
+@@ -274,4 +274,61 @@ enum nix_sqint_e {
+ BIT_ULL(NIX_SQINT_SEND_ERR) | \
+ BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
+
++enum nix_sqoperr_e {
++ NIX_SQOPERR_OOR = 0,
++ NIX_SQOPERR_CTX_FAULT = 1,
++ NIX_SQOPERR_CTX_POISON = 2,
++ NIX_SQOPERR_DISABLED = 3,
++ NIX_SQOPERR_SIZE_ERR = 4,
++ NIX_SQOPERR_OFLOW = 5,
++ NIX_SQOPERR_SQB_NULL = 6,
++ NIX_SQOPERR_SQB_FAULT = 7,
++ NIX_SQOPERR_SQE_SZ_ZERO = 8,
++ NIX_SQOPERR_MAX,
++};
++
++enum nix_mnqerr_e {
++ NIX_MNQERR_SQ_CTX_FAULT = 0,
++ NIX_MNQERR_SQ_CTX_POISON = 1,
++ NIX_MNQERR_SQB_FAULT = 2,
++ NIX_MNQERR_SQB_POISON = 3,
++ NIX_MNQERR_TOTAL_ERR = 4,
++ NIX_MNQERR_LSO_ERR = 5,
++ NIX_MNQERR_CQ_QUERY_ERR = 6,
++ NIX_MNQERR_MAX_SQE_SIZE_ERR = 7,
++ NIX_MNQERR_MAXLEN_ERR = 8,
++ NIX_MNQERR_SQE_SIZEM1_ZERO = 9,
++ NIX_MNQERR_MAX,
++};
++
++enum nix_snd_status_e {
++ NIX_SND_STATUS_GOOD = 0x0,
++ NIX_SND_STATUS_SQ_CTX_FAULT = 0x1,
++ NIX_SND_STATUS_SQ_CTX_POISON = 0x2,
++ NIX_SND_STATUS_SQB_FAULT = 0x3,
++ NIX_SND_STATUS_SQB_POISON = 0x4,
++ NIX_SND_STATUS_HDR_ERR = 0x5,
++ NIX_SND_STATUS_EXT_ERR = 0x6,
++ NIX_SND_STATUS_JUMP_FAULT = 0x7,
++ NIX_SND_STATUS_JUMP_POISON = 0x8,
++ NIX_SND_STATUS_CRC_ERR = 0x9,
++ NIX_SND_STATUS_IMM_ERR = 0x10,
++ NIX_SND_STATUS_SG_ERR = 0x11,
++ NIX_SND_STATUS_MEM_ERR = 0x12,
++ NIX_SND_STATUS_INVALID_SUBDC = 0x13,
++ NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x14,
++ NIX_SND_STATUS_DATA_FAULT = 0x15,
++ NIX_SND_STATUS_DATA_POISON = 0x16,
++ NIX_SND_STATUS_NPC_DROP_ACTION = 0x17,
++ NIX_SND_STATUS_LOCK_VIOL = 0x18,
++ NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x19,
++ NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x20,
++ NIX_SND_STATUS_NPC_MCAST_ABORT = 0x21,
++ NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x22,
++ NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x23,
++ NIX_SND_STATUS_SEND_MEM_FAULT = 0x24,
++ NIX_SND_STATUS_SEND_STATS_ERR = 0x25,
++ NIX_SND_STATUS_MAX,
++};
++
+ #endif /* OTX2_STRUCT_H */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+index 626961a41089d..26231c59b0241 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+@@ -28,6 +28,9 @@
+ #define MAX_RATE_EXPONENT 0x0FULL
+ #define MAX_RATE_MANTISSA 0xFFULL
+
++#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL
++#define CN10K_MAX_BURST_SIZE 8453888ULL
++
+ /* Bitfields in NIX_TLX_PIR register */
+ #define TLX_RATE_MANTISSA GENMASK_ULL(8, 1)
+ #define TLX_RATE_EXPONENT GENMASK_ULL(12, 9)
+@@ -35,6 +38,9 @@
+ #define TLX_BURST_MANTISSA GENMASK_ULL(36, 29)
+ #define TLX_BURST_EXPONENT GENMASK_ULL(40, 37)
+
++#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29)
++#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44)
++
+ struct otx2_tc_flow_stats {
+ u64 bytes;
+ u64 pkts;
+@@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
+ }
+ EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
+
+-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
+- u32 *burst_mantissa)
++static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
++ u32 *burst_exp, u32 *burst_mantissa)
+ {
++ int max_burst, max_mantissa;
+ unsigned int tmp;
+
++ if (is_dev_otx2(nic->pdev)) {
++ max_burst = MAX_BURST_SIZE;
++ max_mantissa = MAX_BURST_MANTISSA;
++ } else {
++ max_burst = CN10K_MAX_BURST_SIZE;
++ max_mantissa = CN10K_MAX_BURST_MANTISSA;
++ }
++
+ /* Burst is calculated as
+ * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
+ * Max supported burst size is 130,816 bytes.
+ */
+- burst = min_t(u32, burst, MAX_BURST_SIZE);
++ burst = min_t(u32, burst, max_burst);
+ if (burst) {
+ *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
+ tmp = burst - rounddown_pow_of_two(burst);
+- if (burst < MAX_BURST_MANTISSA)
++ if (burst < max_mantissa)
+ *burst_mantissa = tmp * 2;
+ else
+ *burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
+ } else {
+ *burst_exp = MAX_BURST_EXPONENT;
+- *burst_mantissa = MAX_BURST_MANTISSA;
++ *burst_mantissa = max_mantissa;
+ }
+ }
+
+-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
++static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
+ u32 *mantissa, u32 *div_exp)
+ {
+- unsigned int tmp;
++ u64 tmp;
+
+ /* Rate calculation by hardware
+ *
+@@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+ }
+ }
+
+-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
++static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
++ u64 maxrate, u32 burst)
+ {
+- struct otx2_hw *hw = &nic->hw;
+- struct nix_txschq_config *req;
+ u32 burst_exp, burst_mantissa;
+ u32 exp, mantissa, div_exp;
++ u64 regval = 0;
++
++ /* Get exponent and mantissa values from the desired rate */
++ otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa);
++ otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
++
++ if (is_dev_otx2(nic->pdev)) {
++ regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) |
++ FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ } else {
++ regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) |
++ FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ }
++
++ return regval;
++}
++
++static int otx2_set_matchall_egress_rate(struct otx2_nic *nic,
++ u32 burst, u64 maxrate)
++{
++ struct otx2_hw *hw = &nic->hw;
++ struct nix_txschq_config *req;
+ int txschq, err;
+
+ /* All SQs share the same TL4, so pick the first scheduler */
+ txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+
+- /* Get exponent and mantissa values from the desired rate */
+- otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
+- otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+-
+ mutex_lock(&nic->mbox.lock);
+ req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
+ if (!req) {
+@@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma
+ req->lvl = NIX_TXSCH_LVL_TL4;
+ req->num_regs = 1;
+ req->reg[0] = NIX_AF_TL4X_PIR(txschq);
+- req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
+- FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
+- FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+- FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+- FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst);
+
+ err = otx2_sync_mbox_msg(&nic->mbox);
+ mutex_unlock(&nic->mbox.lock);
+@@ -196,7 +230,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct flow_action *actions = &cls->rule->action;
+ struct flow_action_entry *entry;
+- u32 rate;
++ u64 rate;
+ int err;
+
+ err = otx2_tc_validate_flow(nic, actions, extack);
+@@ -218,7 +252,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+ }
+ /* Convert bytes per second to Mbps */
+ rate = entry->police.rate_bytes_ps * 8;
+- rate = max_t(u32, rate / 1000000, 1);
++ rate = max_t(u64, rate / 1000000, 1);
+ err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
+ if (err)
+ return err;
+@@ -502,6 +536,21 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
+ return -EOPNOTSUPP;
+ }
+
++ if (!match.mask->vlan_id) {
++ struct flow_action_entry *act;
++ int i;
++
++ flow_action_for_each(i, act, &rule->action) {
++ if (act->id == FLOW_ACTION_DROP) {
++ netdev_err(nic->netdev,
++ "vlan tpid 0x%x with vlan_id %d is not supported for DROP rule.\n",
++ ntohs(match.key->vlan_tpid),
++ match.key->vlan_id);
++ return -EOPNOTSUPP;
++ }
++ }
++ }
++
+ if (match.mask->vlan_id ||
+ match.mask->vlan_dei ||
+ match.mask->vlan_priority) {
+@@ -571,21 +620,27 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
+
+ flow_spec->dport = match.key->dst;
+ flow_mask->dport = match.mask->dst;
+- if (ip_proto == IPPROTO_UDP)
+- req->features |= BIT_ULL(NPC_DPORT_UDP);
+- else if (ip_proto == IPPROTO_TCP)
+- req->features |= BIT_ULL(NPC_DPORT_TCP);
+- else if (ip_proto == IPPROTO_SCTP)
+- req->features |= BIT_ULL(NPC_DPORT_SCTP);
++
++ if (flow_mask->dport) {
++ if (ip_proto == IPPROTO_UDP)
++ req->features |= BIT_ULL(NPC_DPORT_UDP);
++ else if (ip_proto == IPPROTO_TCP)
++ req->features |= BIT_ULL(NPC_DPORT_TCP);
++ else if (ip_proto == IPPROTO_SCTP)
++ req->features |= BIT_ULL(NPC_DPORT_SCTP);
++ }
+
+ flow_spec->sport = match.key->src;
+ flow_mask->sport = match.mask->src;
+- if (ip_proto == IPPROTO_UDP)
+- req->features |= BIT_ULL(NPC_SPORT_UDP);
+- else if (ip_proto == IPPROTO_TCP)
+- req->features |= BIT_ULL(NPC_SPORT_TCP);
+- else if (ip_proto == IPPROTO_SCTP)
+- req->features |= BIT_ULL(NPC_SPORT_SCTP);
++
++ if (flow_mask->sport) {
++ if (ip_proto == IPPROTO_UDP)
++ req->features |= BIT_ULL(NPC_SPORT_UDP);
++ else if (ip_proto == IPPROTO_TCP)
++ req->features |= BIT_ULL(NPC_SPORT_TCP);
++ else if (ip_proto == IPPROTO_SCTP)
++ req->features |= BIT_ULL(NPC_SPORT_SCTP);
++ }
+ }
+
+ return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
+@@ -1050,7 +1105,12 @@ int otx2_init_tc(struct otx2_nic *nic)
+ return err;
+
+ tc->flow_ht_params = tc_flow_ht_params;
+- return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
++ err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
++ if (err) {
++ kfree(tc->tc_entries_bitmap);
++ tc->tc_entries_bitmap = NULL;
++ }
++ return err;
+ }
+
+ void otx2_shutdown_tc(struct otx2_nic *nic)
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+index f42b1d4e0c679..d1e3928a24f5c 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+@@ -18,6 +18,31 @@
+
+ #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx)))
+
++static int otx2_nix_cq_op_status(struct otx2_nic *pfvf,
++ struct otx2_cq_queue *cq)
++{
++ u64 incr = (u64)(cq->cq_idx) << 32;
++ u64 status;
++
++ status = otx2_atomic64_fetch_add(incr, pfvf->cq_op_addr);
++
++ if (unlikely(status & BIT_ULL(CQ_OP_STAT_OP_ERR) ||
++ status & BIT_ULL(CQ_OP_STAT_CQ_ERR))) {
++ dev_err(pfvf->dev, "CQ stopped due to error");
++ return -EINVAL;
++ }
++
++ cq->cq_tail = status & 0xFFFFF;
++ cq->cq_head = (status >> 20) & 0xFFFFF;
++ if (cq->cq_tail < cq->cq_head)
++ cq->pend_cqe = (cq->cqe_cnt - cq->cq_head) +
++ cq->cq_tail;
++ else
++ cq->pend_cqe = cq->cq_tail - cq->cq_head;
++
++ return 0;
++}
++
+ static struct nix_cqe_hdr_s *otx2_get_next_cqe(struct otx2_cq_queue *cq)
+ {
+ struct nix_cqe_hdr_s *cqe_hdr;
+@@ -318,7 +343,14 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
+ struct nix_cqe_rx_s *cqe;
+ int processed_cqe = 0;
+
+- while (likely(processed_cqe < budget)) {
++ if (cq->pend_cqe >= budget)
++ goto process_cqe;
++
++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
++ return 0;
++
++process_cqe:
++ while (likely(processed_cqe < budget) && cq->pend_cqe) {
+ cqe = (struct nix_cqe_rx_s *)CQE_ADDR(cq, cq->cq_head);
+ if (cqe->hdr.cqe_type == NIX_XQE_TYPE_INVALID ||
+ !cqe->sg.seg_addr) {
+@@ -334,6 +366,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
+ cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
+ cqe->sg.seg_addr = 0x00;
+ processed_cqe++;
++ cq->pend_cqe--;
+ }
+
+ /* Free CQEs to HW */
+@@ -368,7 +401,14 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
+ struct nix_cqe_tx_s *cqe;
+ int processed_cqe = 0;
+
+- while (likely(processed_cqe < budget)) {
++ if (cq->pend_cqe >= budget)
++ goto process_cqe;
++
++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
++ return 0;
++
++process_cqe:
++ while (likely(processed_cqe < budget) && cq->pend_cqe) {
+ cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
+ if (unlikely(!cqe)) {
+ if (!processed_cqe)
+@@ -380,6 +420,7 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
+
+ cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
+ processed_cqe++;
++ cq->pend_cqe--;
+ }
+
+ /* Free CQEs to HW */
+@@ -533,9 +574,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+ htons(ext->lso_sb - skb_network_offset(skb));
+ } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+ ext->lso_format = pfvf->hw.lso_tsov6_idx;
+-
+- ipv6_hdr(skb)->payload_len =
+- htons(ext->lso_sb - skb_network_offset(skb));
++ ipv6_hdr(skb)->payload_len = htons(tcp_hdrlen(skb));
+ } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ __be16 l3_proto = vlan_get_protocol(skb);
+ struct udphdr *udph = udp_hdr(skb);
+@@ -936,10 +975,16 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+ int processed_cqe = 0;
+ u64 iova, pa;
+
+- while ((cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq))) {
+- if (!cqe->sg.subdc)
+- continue;
++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
++ return;
++
++ while (cq->pend_cqe) {
++ cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq);
+ processed_cqe++;
++ cq->pend_cqe--;
++
++ if (!cqe)
++ continue;
+ if (cqe->sg.segs > 1) {
+ otx2_free_rcv_seg(pfvf, cqe, cq->cq_idx);
+ continue;
+@@ -965,7 +1010,16 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+
+ sq = &pfvf->qset.sq[cq->cint_idx];
+
+- while ((cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq))) {
++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
++ return;
++
++ while (cq->pend_cqe) {
++ cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
++ processed_cqe++;
++ cq->pend_cqe--;
++
++ if (!cqe)
++ continue;
+ sg = &sq->sg[cqe->comp.sqe_id];
+ skb = (struct sk_buff *)sg->skb;
+ if (skb) {
+@@ -973,7 +1027,6 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+ dev_kfree_skb_any(skb);
+ sg->skb = (u64)NULL;
+ }
+- processed_cqe++;
+ }
+
+ /* Free CQEs to HW */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+index 3ff1ad79c0011..6a97631ff2269 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+@@ -56,6 +56,9 @@
+ */
+ #define CQ_QCOUNT_DEFAULT 1
+
++#define CQ_OP_STAT_OP_ERR 63
++#define CQ_OP_STAT_CQ_ERR 46
++
+ struct queue_stats {
+ u64 bytes;
+ u64 pkts;
+@@ -122,6 +125,8 @@ struct otx2_cq_queue {
+ u16 pool_ptrs;
+ u32 cqe_cnt;
+ u32 cq_head;
++ u32 cq_tail;
++ u32 pend_cqe;
+ void *cqe_base;
+ struct qmem *cqe;
+ struct otx2_pool *rbpool;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+index 03b4ec630432b..e69b0e2729cb2 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+@@ -630,7 +630,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+
+ err = otx2vf_realloc_msix_vectors(vf);
+ if (err)
+- goto err_mbox_destroy;
++ goto err_detach_rsrc;
+
+ err = otx2_set_real_num_queues(netdev, qcount, qcount);
+ if (err)
+@@ -704,6 +704,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ err_unreg_netdev:
+ unregister_netdev(netdev);
+ err_detach_rsrc:
++ free_percpu(vf->hw.lmt_info);
+ if (test_bit(CN10K_LMTST, &vf->hw.cap_flag))
+ qmem_free(vf->dev, vf->dync_lmt);
+ otx2_detach_resources(&vf->mbox);
+@@ -738,6 +739,7 @@ static void otx2vf_remove(struct pci_dev *pdev)
+ destroy_workqueue(vf->otx2_wq);
+ otx2vf_disable_mbox_intr(vf);
+ otx2_detach_resources(&vf->mbox);
++ free_percpu(vf->hw.lmt_info);
+ if (test_bit(CN10K_LMTST, &vf->hw.cap_flag))
+ qmem_free(vf->dev, vf->dync_lmt);
+ otx2vf_vfaf_mbox_destroy(vf);
+diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
+index 44c670807fb3c..912759ea6ec59 100644
+--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
++++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
+@@ -54,12 +54,14 @@ int prestera_port_pvid_set(struct prestera_port *port, u16 vid)
+ struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw,
+ u32 dev_id, u32 hw_id)
+ {
+- struct prestera_port *port = NULL;
++ struct prestera_port *port = NULL, *tmp;
+
+ read_lock(&sw->port_list_lock);
+- list_for_each_entry(port, &sw->port_list, list) {
+- if (port->dev_id == dev_id && port->hw_id == hw_id)
++ list_for_each_entry(tmp, &sw->port_list, list) {
++ if (tmp->dev_id == dev_id && tmp->hw_id == hw_id) {
++ port = tmp;
+ break;
++ }
+ }
+ read_unlock(&sw->port_list_lock);
+
+@@ -68,12 +70,14 @@ struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw,
+
+ struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id)
+ {
+- struct prestera_port *port = NULL;
++ struct prestera_port *port = NULL, *tmp;
+
+ read_lock(&sw->port_list_lock);
+- list_for_each_entry(port, &sw->port_list, list) {
+- if (port->id == id)
++ list_for_each_entry(tmp, &sw->port_list, list) {
++ if (tmp->id == id) {
++ port = tmp;
+ break;
++ }
+ }
+ read_unlock(&sw->port_list_lock);
+
+@@ -137,7 +141,7 @@ static int prestera_port_set_mac_address(struct net_device *dev, void *p)
+ if (err)
+ return err;
+
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+
+ return 0;
+ }
+@@ -488,6 +492,7 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw)
+ dev_info(prestera_dev(sw), "using random base mac address\n");
+ }
+ of_node_put(base_mac_np);
++ of_node_put(np);
+
+ return prestera_hw_switch_mac_set(sw, sw->base_mac);
+ }
+@@ -703,23 +708,27 @@ static int prestera_netdev_port_event(struct net_device *lower,
+ struct net_device *dev,
+ unsigned long event, void *ptr)
+ {
+- struct netdev_notifier_changeupper_info *info = ptr;
++ struct netdev_notifier_info *info = ptr;
++ struct netdev_notifier_changeupper_info *cu_info;
+ struct prestera_port *port = netdev_priv(dev);
+ struct netlink_ext_ack *extack;
+ struct net_device *upper;
+
+- extack = netdev_notifier_info_to_extack(&info->info);
+- upper = info->upper_dev;
++ extack = netdev_notifier_info_to_extack(info);
++ cu_info = container_of(info,
++ struct netdev_notifier_changeupper_info,
++ info);
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
++ upper = cu_info->upper_dev;
+ if (!netif_is_bridge_master(upper) &&
+ !netif_is_lag_master(upper)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+ return -EINVAL;
+ }
+
+- if (!info->linking)
++ if (!cu_info->linking)
+ break;
+
+ if (netdev_has_any_upper_dev(upper)) {
+@@ -728,7 +737,7 @@ static int prestera_netdev_port_event(struct net_device *lower,
+ }
+
+ if (netif_is_lag_master(upper) &&
+- !prestera_lag_master_check(upper, info->upper_info, extack))
++ !prestera_lag_master_check(upper, cu_info->upper_info, extack))
+ return -EOPNOTSUPP;
+ if (netif_is_lag_master(upper) && vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+@@ -744,14 +753,15 @@ static int prestera_netdev_port_event(struct net_device *lower,
+ break;
+
+ case NETDEV_CHANGEUPPER:
++ upper = cu_info->upper_dev;
+ if (netif_is_bridge_master(upper)) {
+- if (info->linking)
++ if (cu_info->linking)
+ return prestera_bridge_port_join(upper, port,
+ extack);
+ else
+ prestera_bridge_port_leave(upper, port);
+ } else if (netif_is_lag_master(upper)) {
+- if (info->linking)
++ if (cu_info->linking)
+ return prestera_lag_port_add(port, upper);
+ else
+ prestera_lag_port_del(port);
+diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+index a250d394da380..6bef633aa6330 100644
+--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
++++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+@@ -645,7 +645,8 @@ pick_fw_ver:
+
+ err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev);
+ if (err) {
+- if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) {
++ if (ver_maj != PRESTERA_PREV_FW_MAJ_VER ||
++ ver_min != PRESTERA_PREV_FW_MIN_VER) {
+ ver_maj = PRESTERA_PREV_FW_MAJ_VER;
+ ver_min = PRESTERA_PREV_FW_MIN_VER;
+
+@@ -815,6 +816,7 @@ static void prestera_pci_remove(struct pci_dev *pdev)
+ static const struct pci_device_id prestera_pci_devices[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC804) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC80C) },
++ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xCC1E) },
+ { }
+ };
+ MODULE_DEVICE_TABLE(pci, prestera_pci_devices);
+diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
+index 73d2eba5262f0..a47aa624f7454 100644
+--- a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
++++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
+@@ -776,6 +776,7 @@ tx_done:
+ int prestera_rxtx_switch_init(struct prestera_switch *sw)
+ {
+ struct prestera_rxtx *rxtx;
++ int err;
+
+ rxtx = kzalloc(sizeof(*rxtx), GFP_KERNEL);
+ if (!rxtx)
+@@ -783,7 +784,11 @@ int prestera_rxtx_switch_init(struct prestera_switch *sw)
+
+ sw->rxtx = rxtx;
+
+- return prestera_sdma_switch_init(sw);
++ err = prestera_sdma_switch_init(sw);
++ if (err)
++ kfree(rxtx);
++
++ return err;
+ }
+
+ void prestera_rxtx_switch_fini(struct prestera_switch *sw)
+diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+index 3ce6ccd0f5394..b4599fe4ca8da 100644
+--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
++++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+@@ -497,8 +497,8 @@ int prestera_bridge_port_join(struct net_device *br_dev,
+
+ br_port = prestera_bridge_port_add(bridge, port->dev);
+ if (IS_ERR(br_port)) {
+- err = PTR_ERR(br_port);
+- goto err_brport_create;
++ prestera_bridge_put(bridge);
++ return PTR_ERR(br_port);
+ }
+
+ err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
+@@ -519,8 +519,6 @@ err_port_join:
+ switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
+ err_switchdev_offload:
+ prestera_bridge_port_put(br_port);
+-err_brport_create:
+- prestera_bridge_put(bridge);
+ return err;
+ }
+
+@@ -1124,7 +1122,7 @@ static int prestera_switchdev_blk_event(struct notifier_block *unused,
+ prestera_port_obj_attr_set);
+ break;
+ default:
+- err = -EOPNOTSUPP;
++ return NOTIFY_DONE;
+ }
+
+ return notifier_from_errno(err);
+diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
+index fab53c9b8380c..8ebd13f089db2 100644
+--- a/drivers/net/ethernet/marvell/pxa168_eth.c
++++ b/drivers/net/ethernet/marvell/pxa168_eth.c
+@@ -1434,7 +1434,7 @@ static int pxa168_eth_probe(struct platform_device *pdev)
+
+ INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
+
+- err = of_get_mac_address(pdev->dev.of_node, dev->dev_addr);
++ err = of_get_ethdev_address(pdev->dev.of_node, dev);
+ if (err) {
+ /* try reading the mac address, if set by the bootloader */
+ pxa168_eth_get_mac_address(dev, dev->dev_addr);
+diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
+index e9fc74e54b22e..ac0dbf1b97437 100644
+--- a/drivers/net/ethernet/marvell/sky2.c
++++ b/drivers/net/ethernet/marvell/sky2.c
+@@ -4802,7 +4802,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
+ * 1) from device tree data
+ * 2) from internal registers set by bootloader
+ */
+- ret = of_get_mac_address(hw->pdev->dev.of_node, dev->dev_addr);
++ ret = of_get_ethdev_address(hw->pdev->dev.of_node, dev);
+ if (ret)
+ memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8,
+ ETH_ALEN);
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 398c23cec8151..139dfdb1e58bd 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -91,46 +91,53 @@ static int mtk_mdio_busy_wait(struct mtk_eth *eth)
+ }
+
+ dev_err(eth->dev, "mdio: MDIO timeout\n");
+- return -1;
++ return -ETIMEDOUT;
+ }
+
+-static u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
+- u32 phy_register, u32 write_data)
++static int _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr, u32 phy_reg,
++ u32 write_data)
+ {
+- if (mtk_mdio_busy_wait(eth))
+- return -1;
++ int ret;
+
+- write_data &= 0xffff;
++ ret = mtk_mdio_busy_wait(eth);
++ if (ret < 0)
++ return ret;
+
+- mtk_w32(eth, PHY_IAC_ACCESS | PHY_IAC_START | PHY_IAC_WRITE |
+- (phy_register << PHY_IAC_REG_SHIFT) |
+- (phy_addr << PHY_IAC_ADDR_SHIFT) | write_data,
++ mtk_w32(eth, PHY_IAC_ACCESS |
++ PHY_IAC_START_C22 |
++ PHY_IAC_CMD_WRITE |
++ PHY_IAC_REG(phy_reg) |
++ PHY_IAC_ADDR(phy_addr) |
++ PHY_IAC_DATA(write_data),
+ MTK_PHY_IAC);
+
+- if (mtk_mdio_busy_wait(eth))
+- return -1;
++ ret = mtk_mdio_busy_wait(eth);
++ if (ret < 0)
++ return ret;
+
+ return 0;
+ }
+
+-static u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
++static int _mtk_mdio_read(struct mtk_eth *eth, u32 phy_addr, u32 phy_reg)
+ {
+- u32 d;
++ int ret;
+
+- if (mtk_mdio_busy_wait(eth))
+- return 0xffff;
++ ret = mtk_mdio_busy_wait(eth);
++ if (ret < 0)
++ return ret;
+
+- mtk_w32(eth, PHY_IAC_ACCESS | PHY_IAC_START | PHY_IAC_READ |
+- (phy_reg << PHY_IAC_REG_SHIFT) |
+- (phy_addr << PHY_IAC_ADDR_SHIFT),
++ mtk_w32(eth, PHY_IAC_ACCESS |
++ PHY_IAC_START_C22 |
++ PHY_IAC_CMD_C22_READ |
++ PHY_IAC_REG(phy_reg) |
++ PHY_IAC_ADDR(phy_addr),
+ MTK_PHY_IAC);
+
+- if (mtk_mdio_busy_wait(eth))
+- return 0xffff;
+-
+- d = mtk_r32(eth, MTK_PHY_IAC) & 0xffff;
++ ret = mtk_mdio_busy_wait(eth);
++ if (ret < 0)
++ return ret;
+
+- return d;
++ return mtk_r32(eth, MTK_PHY_IAC) & PHY_IAC_DATA_MASK;
+ }
+
+ static int mtk_mdio_write(struct mii_bus *bus, int phy_addr,
+@@ -217,7 +224,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
+ phylink_config);
+ struct mtk_eth *eth = mac->hw;
+ u32 mcr_cur, mcr_new, sid, i;
+- int val, ge_mode, err;
++ int val, ge_mode, err = 0;
+
+ /* MT76x8 has no hardware settings between for the MAC */
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) &&
+@@ -356,7 +363,8 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
+ mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+ mcr_new = mcr_cur;
+ mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
+- MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
++ MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK |
++ MAC_MCR_RX_FIFO_CLR_DIS;
+
+ /* Only update control register when needed! */
+ if (mcr_new != mcr_cur)
+@@ -813,6 +821,17 @@ static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd,
+ return true;
+ }
+
++static void *mtk_max_lro_buf_alloc(gfp_t gfp_mask)
++{
++ unsigned int size = mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH);
++ unsigned long data;
++
++ data = __get_free_pages(gfp_mask | __GFP_COMP | __GFP_NOWARN,
++ get_order(size));
++
++ return (void *)data;
++}
++
+ /* the qdma core needs scratch memory to be setup */
+ static int mtk_init_fq_dma(struct mtk_eth *eth)
+ {
+@@ -1304,7 +1323,10 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ goto release_desc;
+
+ /* alloc new buffer */
+- new_data = napi_alloc_frag(ring->frag_size);
++ if (ring->frag_size <= PAGE_SIZE)
++ new_data = napi_alloc_frag(ring->frag_size);
++ else
++ new_data = mtk_max_lro_buf_alloc(GFP_ATOMIC);
+ if (unlikely(!new_data)) {
+ netdev->stats.rx_dropped++;
+ goto release_desc;
+@@ -1718,7 +1740,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
+ return -ENOMEM;
+
+ for (i = 0; i < rx_dma_size; i++) {
+- ring->data[i] = netdev_alloc_frag(ring->frag_size);
++ if (ring->frag_size <= PAGE_SIZE)
++ ring->data[i] = netdev_alloc_frag(ring->frag_size);
++ else
++ ring->data[i] = mtk_max_lro_buf_alloc(GFP_KERNEL);
+ if (!ring->data[i])
+ return -ENOMEM;
+ }
+@@ -1984,6 +2009,9 @@ static int mtk_hwlro_get_fdir_entry(struct net_device *dev,
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+
++ if (fsp->location >= ARRAY_SIZE(mac->hwlro_ip))
++ return -EINVAL;
++
+ /* only tcp dst ipv4 is meaningful, others are meaningless */
+ fsp->flow_type = TCP_V4_FLOW;
+ fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]);
+@@ -2010,6 +2038,9 @@ static int mtk_hwlro_get_fdir_all(struct net_device *dev,
+ int i;
+
+ for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
++ if (cnt == cmd->rule_cnt)
++ return -EMSGSIZE;
++
+ if (mac->hwlro_ip[i]) {
+ rule_locs[cnt] = i;
+ cnt++;
+@@ -2300,8 +2331,10 @@ static int mtk_open(struct net_device *dev)
+ int err;
+
+ err = mtk_start_dma(eth);
+- if (err)
++ if (err) {
++ phylink_disconnect_phy(mac->phylink);
+ return err;
++ }
+
+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
+ gdm_config = MTK_GDMA_TO_PPE;
+@@ -2582,23 +2615,6 @@ static int mtk_hw_deinit(struct mtk_eth *eth)
+ return 0;
+ }
+
+-static int __init mtk_init(struct net_device *dev)
+-{
+- struct mtk_mac *mac = netdev_priv(dev);
+- struct mtk_eth *eth = mac->hw;
+- int ret;
+-
+- ret = of_get_mac_address(mac->of_node, dev->dev_addr);
+- if (ret) {
+- /* If the mac address is invalid, use random mac address */
+- eth_hw_addr_random(dev);
+- dev_err(eth->dev, "generated random MAC address %pM\n",
+- dev->dev_addr);
+- }
+-
+- return 0;
+-}
+-
+ static void mtk_uninit(struct net_device *dev)
+ {
+ struct mtk_mac *mac = netdev_priv(dev);
+@@ -2926,7 +2942,6 @@ static const struct ethtool_ops mtk_ethtool_ops = {
+ };
+
+ static const struct net_device_ops mtk_netdev_ops = {
+- .ndo_init = mtk_init,
+ .ndo_uninit = mtk_uninit,
+ .ndo_open = mtk_open,
+ .ndo_stop = mtk_stop,
+@@ -2980,6 +2995,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
+ mac->hw = eth;
+ mac->of_node = np;
+
++ err = of_get_ethdev_address(mac->of_node, eth->netdev[id]);
++ if (err == -EPROBE_DEFER)
++ return err;
++
++ if (err) {
++ /* If the mac address is invalid, use random mac address */
++ eth_hw_addr_random(eth->netdev[id]);
++ dev_err(eth->dev, "generated random MAC address %pM\n",
++ eth->netdev[id]->dev_addr);
++ }
++
+ memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
+ mac->hwlro_ip_cnt = 0;
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index 5ef70dd8b49c6..d60260e00a3fc 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -341,11 +341,17 @@
+ /* PHY Indirect Access Control registers */
+ #define MTK_PHY_IAC 0x10004
+ #define PHY_IAC_ACCESS BIT(31)
+-#define PHY_IAC_READ BIT(19)
+-#define PHY_IAC_WRITE BIT(18)
+-#define PHY_IAC_START BIT(16)
+-#define PHY_IAC_ADDR_SHIFT 20
+-#define PHY_IAC_REG_SHIFT 25
++#define PHY_IAC_REG_MASK GENMASK(29, 25)
++#define PHY_IAC_REG(x) FIELD_PREP(PHY_IAC_REG_MASK, (x))
++#define PHY_IAC_ADDR_MASK GENMASK(24, 20)
++#define PHY_IAC_ADDR(x) FIELD_PREP(PHY_IAC_ADDR_MASK, (x))
++#define PHY_IAC_CMD_MASK GENMASK(19, 18)
++#define PHY_IAC_CMD_WRITE FIELD_PREP(PHY_IAC_CMD_MASK, 1)
++#define PHY_IAC_CMD_C22_READ FIELD_PREP(PHY_IAC_CMD_MASK, 2)
++#define PHY_IAC_START_MASK GENMASK(17, 16)
++#define PHY_IAC_START_C22 FIELD_PREP(PHY_IAC_START_MASK, 1)
++#define PHY_IAC_DATA_MASK GENMASK(15, 0)
++#define PHY_IAC_DATA(x) FIELD_PREP(PHY_IAC_DATA_MASK, (x))
+ #define PHY_IAC_TIMEOUT HZ
+
+ #define MTK_MAC_MISC 0x1000c
+@@ -363,6 +369,7 @@
+ #define MAC_MCR_FORCE_MODE BIT(15)
+ #define MAC_MCR_TX_EN BIT(14)
+ #define MAC_MCR_RX_EN BIT(13)
++#define MAC_MCR_RX_FIFO_CLR_DIS BIT(12)
+ #define MAC_MCR_BACKOFF_EN BIT(9)
+ #define MAC_MCR_BACKPR_EN BIT(8)
+ #define MAC_MCR_FORCE_RX_FC BIT(5)
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+index 3ad10c793308e..66298e2235c91 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -395,7 +395,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
+ int i, k;
+
+- memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table));
++ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
+
+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
+ return;
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+index 7bb1f20002b58..7c5403c010715 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -462,6 +462,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+ if (IS_ERR(block_cb))
+ return PTR_ERR(block_cb);
+
++ flow_block_cb_incref(block_cb);
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &block_cb_list);
+ return 0;
+@@ -470,7 +471,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+ if (!block_cb)
+ return -ENOENT;
+
+- if (flow_block_cb_decref(block_cb)) {
++ if (!flow_block_cb_decref(block_cb)) {
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ }
+diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
+index 32d83421226a2..5897940a418b6 100644
+--- a/drivers/net/ethernet/mediatek/mtk_sgmii.c
++++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c
+@@ -26,6 +26,7 @@ int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3)
+ break;
+
+ ss->regmap[i] = syscon_node_to_regmap(np);
++ of_node_put(np);
+ if (IS_ERR(ss->regmap[i]))
+ return PTR_ERR(ss->regmap[i]);
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+index ef518b1040f72..c3cffb32fb067 100644
+--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+@@ -663,7 +663,7 @@ void __init mlx4_en_init_ptys2ethtool_map(void)
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000,
+- ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
++ ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000,
+@@ -675,9 +675,9 @@ void __init mlx4_en_init_ptys2ethtool_map(void)
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000,
+- ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
++ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000,
+- ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
++ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000,
+ ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
+@@ -2103,7 +2103,7 @@ static int mlx4_en_get_module_eeprom(struct net_device *dev,
+ en_err(priv,
+ "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n",
+ i, offset, ee->len - i, ret);
+- return 0;
++ return ret;
+ }
+
+ i += ret;
+diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+index 8af7f28273225..3bd3603873e32 100644
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -2286,9 +2286,14 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+ bool carry_xdp_prog)
+ {
+ struct bpf_prog *xdp_prog;
+- int i, t;
++ int i, t, ret;
+
+- mlx4_en_copy_priv(tmp, priv, prof);
++ ret = mlx4_en_copy_priv(tmp, priv, prof);
++ if (ret) {
++ en_warn(priv, "%s: mlx4_en_copy_priv() failed, return\n",
++ __func__);
++ return ret;
++ }
+
+ if (mlx4_en_alloc_resources(tmp)) {
+ en_warn(priv,
+diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
+index b149e601f6737..48cfaa7eaf50c 100644
+--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
++++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
+@@ -697,7 +697,8 @@ static int mlx4_create_zones(struct mlx4_dev *dev,
+ err = mlx4_bitmap_init(*bitmap + k, 1,
+ MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0,
+ 0);
+- mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0);
++ if (!err)
++ mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0);
+ }
+
+ if (err)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+index db5dfff585c99..41c15a65fb459 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -130,11 +130,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd)
+
+ static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
+ {
+- unsigned long flags;
+-
+- spin_lock_irqsave(&cmd->alloc_lock, flags);
++ lockdep_assert_held(&cmd->alloc_lock);
+ set_bit(idx, &cmd->bitmask);
+- spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+ }
+
+ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
+@@ -144,13 +141,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
+
+ static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
+ {
++ struct mlx5_cmd *cmd = ent->cmd;
++ unsigned long flags;
++
++ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ if (!refcount_dec_and_test(&ent->refcnt))
+- return;
++ goto out;
+
+- if (ent->idx >= 0)
+- cmd_free_index(ent->cmd, ent->idx);
++ if (ent->idx >= 0) {
++ cmd_free_index(cmd, ent->idx);
++ up(ent->page_queue ? &cmd->pages_sem : &cmd->sem);
++ }
+
+ cmd_free_ent(ent);
++out:
++ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+ }
+
+ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
+@@ -334,6 +339,9 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+ case MLX5_CMD_OP_DEALLOC_SF:
++ case MLX5_CMD_OP_DESTROY_UCTX:
++ case MLX5_CMD_OP_DESTROY_UMEM:
++ case MLX5_CMD_OP_MODIFY_RQT:
+ return MLX5_CMD_STAT_OK;
+
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+@@ -439,7 +447,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+ case MLX5_CMD_OP_MODIFY_TIS:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_CREATE_RQT:
+- case MLX5_CMD_OP_MODIFY_RQT:
+ case MLX5_CMD_OP_QUERY_RQT:
+
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+@@ -459,9 +466,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_CREATE_UCTX:
+- case MLX5_CMD_OP_DESTROY_UCTX:
+ case MLX5_CMD_OP_CREATE_UMEM:
+- case MLX5_CMD_OP_DESTROY_UMEM:
+ case MLX5_CMD_OP_ALLOC_MEMIC:
+ case MLX5_CMD_OP_MODIFY_XRQ:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+@@ -895,25 +900,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
+ return cmd->allowed_opcode == opcode;
+ }
+
+-static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
+-{
+- unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
+- int idx;
+-
+-retry:
+- idx = cmd_alloc_index(cmd);
+- if (idx < 0 && time_before(jiffies, alloc_end)) {
+- /* Index allocation can fail on heavy load of commands. This is a temporary
+- * situation as the current command already holds the semaphore, meaning that
+- * another command completion is being handled and it is expected to release
+- * the entry index soon.
+- */
+- cpu_relax();
+- goto retry;
+- }
+- return idx;
+-}
+-
+ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
+ {
+ return pci_channel_offline(dev->pdev) ||
+@@ -938,7 +924,7 @@ static void cmd_work_handler(struct work_struct *work)
+ sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+ down(sem);
+ if (!ent->page_queue) {
+- alloc_ret = cmd_alloc_index_retry(cmd);
++ alloc_ret = cmd_alloc_index(cmd);
+ if (alloc_ret < 0) {
+ mlx5_core_err_rl(dev, "failed to allocate command entry\n");
+ if (ent->callback) {
+@@ -985,6 +971,7 @@ static void cmd_work_handler(struct work_struct *work)
+ cmd_ent_get(ent);
+ set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
+
++ cmd_ent_get(ent); /* for the _real_ FW event on completion */
+ /* Skip sending command to fw if internal error */
+ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
+ u8 status = 0;
+@@ -998,7 +985,6 @@ static void cmd_work_handler(struct work_struct *work)
+ return;
+ }
+
+- cmd_ent_get(ent); /* for the _real_ FW event on completion */
+ /* ring doorbell after the descriptor is valid */
+ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
+ wmb();
+@@ -1448,8 +1434,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
+ return -EFAULT;
+
+ err = sscanf(outlen_str, "%d", &outlen);
+- if (err < 0)
+- return err;
++ if (err != 1)
++ return -EINVAL;
+
+ ptr = kzalloc(outlen, GFP_KERNEL);
+ if (!ptr)
+@@ -1594,8 +1580,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
+ vector = vec & 0xffffffff;
+ for (i = 0; i < (1 << cmd->log_sz); i++) {
+ if (test_bit(i, &vector)) {
+- struct semaphore *sem;
+-
+ ent = cmd->ent_arr[i];
+
+ /* if we already completed the command, ignore it */
+@@ -1614,14 +1598,10 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
+ cmd_ent_put(ent); /* timeout work was canceled */
+
+ if (!forced || /* Real FW completion */
+- pci_channel_offline(dev->pdev) || /* FW is inaccessible */
+- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
++ mlx5_cmd_is_down(dev) || /* No real FW completion is expected */
++ !opcode_allowed(cmd, ent->op))
+ cmd_ent_put(ent);
+
+- if (ent->page_queue)
+- sem = &cmd->pages_sem;
+- else
+- sem = &cmd->sem;
+ ent->ts2 = ktime_get_ns();
+ memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
+ dump_command(dev, ent, 0);
+@@ -1675,7 +1655,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
+ */
+ complete(&ent->done);
+ }
+- up(sem);
+ }
+ }
+ }
+@@ -1720,12 +1699,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev)
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+- for (i = 0; i < cmd->max_reg_cmds; i++)
+- while (down_trylock(&cmd->sem))
++ for (i = 0; i < cmd->max_reg_cmds; i++) {
++ while (down_trylock(&cmd->sem)) {
+ mlx5_cmd_trigger_completions(dev);
++ cond_resched();
++ }
++ }
+
+- while (down_trylock(&cmd->pages_sem))
++ while (down_trylock(&cmd->pages_sem)) {
+ mlx5_cmd_trigger_completions(dev);
++ cond_resched();
++ }
+
+ /* Unlock cmdif */
+ up(&cmd->pages_sem);
+@@ -1886,7 +1870,7 @@ void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+ ctx->dev = dev;
+ /* Starts at 1 to avoid doing wake_up if we are not cleaning up */
+ atomic_set(&ctx->num_inflight, 1);
+- init_waitqueue_head(&ctx->wait);
++ init_completion(&ctx->inflight_done);
+ }
+ EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
+
+@@ -1900,8 +1884,8 @@ EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
+ */
+ void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
+ {
+- atomic_dec(&ctx->num_inflight);
+- wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0);
++ if (!atomic_dec_and_test(&ctx->num_inflight))
++ wait_for_completion(&ctx->inflight_done);
+ }
+ EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
+
+@@ -1912,7 +1896,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
+
+ work->user_callback(status, work);
+ if (atomic_dec_and_test(&ctx->num_inflight))
+- wake_up(&ctx->wait);
++ complete(&ctx->inflight_done);
+ }
+
+ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+@@ -1928,7 +1912,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+ ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
+ mlx5_cmd_exec_cb_handler, work, false);
+ if (ret && atomic_dec_and_test(&ctx->num_inflight))
+- wake_up(&ctx->wait);
++ complete(&ctx->inflight_done);
+
+ return ret;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+index 02e77ffe5c3e4..5371ad0a12eb5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+@@ -164,13 +164,14 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+ MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+ MLX5_SET(destroy_cq_in, in, uid, cq->uid);
+ err = mlx5_cmd_exec_in(dev, destroy_cq, in);
++ if (err)
++ return err;
+
+ synchronize_irq(cq->irqn);
+-
+ mlx5_cq_put(cq);
+ wait_for_completion(&cq->free);
+
+- return err;
++ return 0;
+ }
+ EXPORT_SYMBOL(mlx5_core_destroy_cq);
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+index 07c8d9811bc81..10d195042ab55 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+@@ -507,6 +507,8 @@ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+ if (!mlx5_debugfs_root)
+ return;
+
+- if (cq->dbg)
++ if (cq->dbg) {
+ rem_res_tree(cq->dbg);
++ cq->dbg = NULL;
++ }
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+index e8093c4e09d4e..949f12ede3d29 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+@@ -544,12 +544,9 @@ static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev)
+ PCI_SLOT(dev->pdev->devfn));
+ }
+
+-static int next_phys_dev(struct device *dev, const void *data)
++static int _next_phys_dev(struct mlx5_core_dev *mdev,
++ const struct mlx5_core_dev *curr)
+ {
+- struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev);
+- struct mlx5_core_dev *mdev = madev->mdev;
+- const struct mlx5_core_dev *curr = data;
+-
+ if (!mlx5_core_is_pf(mdev))
+ return 0;
+
+@@ -562,22 +559,69 @@ static int next_phys_dev(struct device *dev, const void *data)
+ return 1;
+ }
+
+-/* Must be called with intf_mutex held */
+-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
++static void *pci_get_other_drvdata(struct device *this, struct device *other)
+ {
+- struct auxiliary_device *adev;
+- struct mlx5_adev *madev;
++ if (this->driver != other->driver)
++ return NULL;
++
++ return pci_get_drvdata(to_pci_dev(other));
++}
++
++static int next_phys_dev(struct device *dev, const void *data)
++{
++ struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
++
++ mdev = pci_get_other_drvdata(this->device, dev);
++ if (!mdev)
++ return 0;
++
++ return _next_phys_dev(mdev, data);
++}
++
++static int next_phys_dev_lag(struct device *dev, const void *data)
++{
++ struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
++
++ mdev = pci_get_other_drvdata(this->device, dev);
++ if (!mdev)
++ return 0;
++
++ if (!MLX5_CAP_GEN(mdev, vport_group_manager) ||
++ !MLX5_CAP_GEN(mdev, lag_master) ||
++ MLX5_CAP_GEN(mdev, num_lag_ports) != MLX5_MAX_PORTS)
++ return 0;
++
++ return _next_phys_dev(mdev, data);
++}
++
++static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev,
++ int (*match)(struct device *dev, const void *data))
++{
++ struct device *next;
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+- adev = auxiliary_find_device(NULL, dev, &next_phys_dev);
+- if (!adev)
++ next = bus_find_device(&pci_bus_type, NULL, dev, match);
++ if (!next)
+ return NULL;
+
+- madev = container_of(adev, struct mlx5_adev, adev);
+- put_device(&adev->dev);
+- return madev->mdev;
++ put_device(next);
++ return pci_get_drvdata(to_pci_dev(next));
++}
++
++/* Must be called with intf_mutex held */
++struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
++{
++ lockdep_assert_held(&mlx5_intf_mutex);
++ return mlx5_get_next_dev(dev, &next_phys_dev);
++}
++
++/* Must be called with intf_mutex held */
++struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
++{
++ lockdep_assert_held(&mlx5_intf_mutex);
++ return mlx5_get_next_dev(dev, &next_phys_dev_lag);
+ }
+
+ void mlx5_dev_list_lock(void)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+index dcf9f27ba2efd..7d56a927081d0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+@@ -625,7 +625,6 @@ static int mlx5_devlink_eth_param_register(struct devlink *devlink)
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ value);
+- devlink_param_publish(devlink, &enable_eth_param);
+ return 0;
+ }
+
+@@ -636,7 +635,6 @@ static void mlx5_devlink_eth_param_unregister(struct devlink *devlink)
+ if (!mlx5_eth_supported(dev))
+ return;
+
+- devlink_param_unpublish(devlink, &enable_eth_param);
+ devlink_param_unregister(devlink, &enable_eth_param);
+ }
+
+@@ -672,7 +670,6 @@ static int mlx5_devlink_rdma_param_register(struct devlink *devlink)
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ value);
+- devlink_param_publish(devlink, &enable_rdma_param);
+ return 0;
+ }
+
+@@ -681,7 +678,6 @@ static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink)
+ if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
+ return;
+
+- devlink_param_unpublish(devlink, &enable_rdma_param);
+ devlink_param_unregister(devlink, &enable_rdma_param);
+ }
+
+@@ -706,7 +702,6 @@ static int mlx5_devlink_vnet_param_register(struct devlink *devlink)
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ value);
+- devlink_param_publish(devlink, &enable_rdma_param);
+ return 0;
+ }
+
+@@ -717,7 +712,6 @@ static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink)
+ if (!mlx5_vnet_supported(dev))
+ return;
+
+- devlink_param_unpublish(devlink, &enable_vnet_param);
+ devlink_param_unregister(devlink, &enable_vnet_param);
+ }
+
+@@ -808,7 +802,6 @@ int mlx5_devlink_register(struct devlink *devlink)
+ if (err)
+ goto params_reg_err;
+ mlx5_devlink_set_params_init_values(devlink);
+- devlink_params_publish(devlink);
+
+ err = mlx5_devlink_auxdev_params_register(devlink);
+ if (err)
+@@ -818,6 +811,7 @@ int mlx5_devlink_register(struct devlink *devlink)
+ if (err)
+ goto traps_reg_err;
+
++ devlink_params_publish(devlink);
+ return 0;
+
+ traps_reg_err:
+@@ -832,9 +826,9 @@ params_reg_err:
+
+ void mlx5_devlink_unregister(struct devlink *devlink)
+ {
++ devlink_params_unpublish(devlink);
+ mlx5_devlink_traps_unregister(devlink);
+ mlx5_devlink_auxdev_params_unregister(devlink);
+- devlink_params_unpublish(devlink);
+ devlink_params_unregister(devlink, mlx5_devlink_params,
+ ARRAY_SIZE(mlx5_devlink_params));
+ devlink_unregister(devlink);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+index f9cf9fb315479..958cdb9755598 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+@@ -64,6 +64,7 @@ static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
+ MLX5_GET(mtrc_cap, out, num_string_trace);
+ tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
+ tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
++ tracer->str_db.loaded = false;
+
+ for (i = 0; i < tracer->str_db.num_string_db; i++) {
+ mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
+@@ -482,7 +483,7 @@ static void poll_trace(struct mlx5_fw_tracer *tracer,
+ (u64)timestamp_low;
+ break;
+ default:
+- if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
++ if (tracer_event->event_id >= tracer->str_db.first_string_trace &&
+ tracer_event->event_id <= tracer->str_db.first_string_trace +
+ tracer->str_db.num_string_trace) {
+ tracer_event->type = TRACER_EVENT_TYPE_STRING;
+@@ -602,7 +603,7 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
+ } else {
+ cur_string = mlx5_tracer_message_get(tracer, tracer_event);
+ if (!cur_string) {
+- pr_debug("%s Got string event for unknown string tdsm: %d\n",
++ pr_debug("%s Got string event for unknown string tmsn: %d\n",
+ __func__, tracer_event->string_event.tmsn);
+ return -1;
+ }
+@@ -638,7 +639,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer,
+ trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
+ (str_frmt->timestamp & MASK_6_0);
+ else
+- trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
++ trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) |
+ (str_frmt->timestamp & MASK_6_0);
+
+ mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
+@@ -675,6 +676,9 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+ if (!tracer->owner)
+ return;
+
++ if (unlikely(!tracer->str_db.loaded))
++ goto arm;
++
+ block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
+ start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+@@ -732,6 +736,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+ &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+ }
+
++arm:
+ mlx5_fw_tracer_arm(dev);
+ }
+
+@@ -752,6 +757,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+
++ tracer->buff.consumer_index = 0;
+ return err;
+ }
+
+@@ -816,7 +822,6 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+ mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
+ if (tracer->owner) {
+ tracer->owner = false;
+- tracer->buff.consumer_index = 0;
+ return;
+ }
+
+@@ -1137,8 +1142,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void
+ queue_work(tracer->work_queue, &tracer->ownership_change_work);
+ break;
+ case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
+- if (likely(tracer->str_db.loaded))
+- queue_work(tracer->work_queue, &tracer->handle_traces_work);
++ queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ break;
+ default:
+ mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+index ed4fb79b4db76..75b6060f7a9ae 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+@@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = {
+ struct mlx5_rsc_dump {
+ u32 pdn;
+ struct mlx5_core_mkey mkey;
++ u32 number_of_menu_items;
+ u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
+ };
+
+@@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name)
+ return -EINVAL;
+ }
+
+-static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page)
++#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \
++ MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \
++ MLX5_ST_SZ_BYTES(resource_dump_menu_segment))
++
++static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page,
++ int read_size, int start_idx)
+ {
+ void *data = page_address(page);
+ enum mlx5_sgmt_type sgmt_idx;
+ int num_of_items;
+ char *sgmt_name;
+ void *member;
++ int size = 0;
+ void *menu;
+ int i;
+
+- menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
+- num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records);
++ if (!start_idx) {
++ menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
++ rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu,
++ num_of_records);
++ size = MLX5_RSC_DUMP_MENU_HEADER_SIZE;
++ data += size;
++ }
++ num_of_items = rsc_dump->number_of_menu_items;
++
++ for (i = 0; start_idx + i < num_of_items; i++) {
++ size += MLX5_ST_SZ_BYTES(resource_dump_menu_record);
++ if (size >= read_size)
++ return start_idx + i;
+
+- for (i = 0; i < num_of_items; i++) {
+- member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]);
++ member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i;
+ sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name);
+ sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name);
+ if (sgmt_idx == -EINVAL)
+@@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct
+ rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record,
+ member, segment_type);
+ }
++ return 0;
+ }
+
+ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+@@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
+ struct mlx5_rsc_dump_cmd *cmd = NULL;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
++ int start_idx = 0;
+ int size;
+ int err;
+
+@@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
+ if (err < 0)
+ goto destroy_cmd;
+
+- mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page);
++ start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx);
+
+ } while (err > 0);
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+index 03a7a4ce5cd5e..c822c3ac0544b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -103,7 +103,7 @@ struct page_pool;
+ #define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1))
+ #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
+ #define MLX5E_MAX_RQ_NUM_MTTS \
+- ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
++ (ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
+ #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
+ #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \
+ (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
+@@ -244,6 +244,17 @@ enum mlx5e_priv_flag {
+
+ #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
+
++enum packet_merge {
++ MLX5E_PACKET_MERGE_NONE,
++ MLX5E_PACKET_MERGE_LRO,
++ MLX5E_PACKET_MERGE_SHAMPO,
++};
++
++struct mlx5e_packet_merge_param {
++ enum packet_merge type;
++ u32 timeout;
++};
++
+ struct mlx5e_params {
+ u8 log_sq_size;
+ u8 rq_wq_type;
+@@ -258,13 +269,12 @@ struct mlx5e_params {
+ bool tunneled_offload_en;
+ struct dim_cq_moder rx_cq_moderation;
+ struct dim_cq_moder tx_cq_moderation;
+- bool lro_en;
++ struct mlx5e_packet_merge_param packet_merge;
+ u8 tx_min_inline_mode;
+ bool vlan_strip_disable;
+ bool scatter_fcs_en;
+ bool rx_dim_enabled;
+ bool tx_dim_enabled;
+- u32 lro_timeout;
+ u32 pflags;
+ struct bpf_prog *xdp_prog;
+ struct mlx5e_xsk *xsk;
+@@ -717,6 +727,8 @@ struct mlx5e_channel {
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
+ int ix;
+ int cpu;
++ /* Sync between icosq recovery and XSK enable/disable. */
++ struct mutex icosq_recovery_lock;
+ };
+
+ struct mlx5e_ptp;
+@@ -944,9 +956,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
+ void mlx5e_destroy_rq(struct mlx5e_rq *rq);
+
+ struct mlx5e_sq_param;
+-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
+-void mlx5e_close_icosq(struct mlx5e_icosq *sq);
+ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
+ struct mlx5e_xdpsq *sq, bool is_redirect);
+@@ -993,7 +1002,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
+ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
+ int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
+
+-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
++int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state);
+ void mlx5e_activate_rq(struct mlx5e_rq *rq);
+ void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+index a88a1a48229f6..d634c034a4199 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+@@ -12,6 +12,7 @@ struct mlx5e_post_act;
+ enum {
+ MLX5E_TC_FT_LEVEL = 0,
+ MLX5E_TC_TTC_FT_LEVEL,
++ MLX5E_TC_MISS_LEVEL,
+ };
+
+ struct mlx5e_tc_table {
+@@ -20,6 +21,7 @@ struct mlx5e_tc_table {
+ */
+ struct mutex t_lock;
+ struct mlx5_flow_table *t;
++ struct mlx5_flow_table *miss_t;
+ struct mlx5_fs_chains *chains;
+ struct mlx5e_post_act *post_act;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+index 018262d0164b3..3aaf3c2752feb 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+@@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
+ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+ void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
+ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
++void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
++void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
+
+ #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
+ #define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+index 3cbb596821e89..15f441a1b80c2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+@@ -87,7 +87,8 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+ mlx5e_rx_get_linear_frag_sz(params, NULL));
+
+- return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
++ return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE &&
++ linear_frag_sz <= PAGE_SIZE;
+ }
+
+ bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+@@ -164,19 +165,8 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ mlx5e_rx_is_linear_skb(params, xsk) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+
+- return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+-}
+-
+-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
+-{
+- struct mlx5e_lro_param lro_param;
+-
+- lro_param = (struct mlx5e_lro_param) {
+- .enabled = params->lro_en,
+- .timeout = params->lro_timeout,
+- };
+-
+- return lro_param;
++ return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
++ mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+ }
+
+ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+@@ -485,10 +475,11 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
+
+ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+ {
++ bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
+ bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+ MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+- return ro && params->lro_en ?
++ return ro && lro_en ?
+ MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+index 879ad46d754e1..e9593f5f06610 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+@@ -11,11 +11,6 @@ struct mlx5e_xsk_param {
+ u16 chunk_size;
+ };
+
+-struct mlx5e_lro_param {
+- bool enabled;
+- u32 timeout;
+-};
+-
+ struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+@@ -125,7 +120,6 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params);
+
+ /* Build queue parameters */
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+index 673f1c82d3815..c9d5d8d93994d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+@@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ if (err)
+ return err;
+
+- err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz,
+- xoff, &port_buffer, &update_buffer);
++ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
++ port_buff_cell_sz, &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+index 3a86f66d12955..ce941e6091c57 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+@@ -126,6 +126,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
++ mlx5e_txqsq_wake(&ptpsq->txqsq);
++
+ return work_done == budget;
+ }
+
+@@ -674,8 +676,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev)));
+ cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL);
+- if (!c || !cparams)
+- return -ENOMEM;
++ if (!c || !cparams) {
++ err = -ENOMEM;
++ goto err_free;
++ }
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+index a71a32e00ebb9..dc7c57e6de77a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+@@ -6,6 +6,7 @@
+
+ #include "en.h"
+ #include "en_stats.h"
++#include "en/txrx.h"
+ #include <linux/ptp_classify.h>
+
+ #define MLX5E_PTP_CHANNEL_IX 0
+@@ -67,6 +68,14 @@ static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
+ fk.ports.dst == htons(PTP_EV_PORT));
+ }
+
++static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq)
++{
++ if (!sq->ptpsq)
++ return true;
++
++ return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo);
++}
++
+ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ u8 lag_port, struct mlx5e_ptp **cp);
+ void mlx5e_ptp_close(struct mlx5e_ptp *c);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+index e8a8d78e3e4d5..965838893432d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+@@ -553,7 +553,8 @@ static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate,
+
+ static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
+ {
+- *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
++ /* Hardware treats 0 as "unlimited", set at least 1. */
++ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+index 9c076aa20306a..b6f5c1bcdbcd4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+
+ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+ {
+- struct mlx5e_rep_priv *rpriv;
+- struct mlx5e_priv *priv;
+-
+- /* A given netdev is not a representor or not a slave of LAG configuration */
+- if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
+- return false;
+-
+- priv = netdev_priv(netdev);
+- rpriv = priv->ppriv;
+-
+- /* Egress acl forward to vport is supported only non-uplink representor */
+- return rpriv->rep->vport != MLX5_VPORT_UPLINK;
++ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+ }
+
+ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
+ u16 fwd_vport_num;
+ int err;
+
+- if (!mlx5e_rep_is_lag_netdev(netdev))
+- return;
+-
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+- if (!mlx5e_rep_is_lag_netdev(netdev))
+- return;
+-
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+ {
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
++ struct mlx5e_rep_priv *rpriv;
++ struct mlx5e_rep_bond *bond;
++ struct mlx5e_priv *priv;
++
++ if (!mlx5e_rep_is_lag_netdev(netdev))
++ return NOTIFY_DONE;
++
++ bond = container_of(nb, struct mlx5e_rep_bond, nb);
++ priv = netdev_priv(netdev);
++ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
++ /* Verify VF representor is on the same device of the bond handling the netevent. */
++ if (rpriv->uplink_priv.bond != bond)
++ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+index c6d2f8c78db71..291bd59639044 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -164,6 +164,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr
+ return err;
+ }
+
++static int
++mlx5_esw_bridge_changeupper_validate_netdev(void *ptr)
++{
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++ struct netdev_notifier_changeupper_info *info = ptr;
++ struct net_device *upper = info->upper_dev;
++ struct net_device *lower;
++ struct list_head *iter;
++
++ if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev))
++ return 0;
++
++ netdev_for_each_lower_dev(dev, lower, iter) {
++ struct mlx5_core_dev *mdev;
++ struct mlx5e_priv *priv;
++
++ if (!mlx5e_eswitch_rep(lower))
++ continue;
++
++ priv = netdev_priv(lower);
++ mdev = priv->mdev;
++ if (!mlx5_lag_is_active(mdev))
++ return -EAGAIN;
++ if (!mlx5_lag_is_shared_fdb(mdev))
++ return -EOPNOTSUPP;
++ }
++
++ return 0;
++}
++
+ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+ {
+@@ -171,6 +201,7 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
++ err = mlx5_esw_bridge_changeupper_validate_netdev(ptr);
+ break;
+
+ case NETDEV_CHANGEUPPER:
+@@ -401,10 +432,6 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
+
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+- /* only handle the event on native eswtich of representor */
+- if (!mlx5_esw_bridge_is_local(dev, rep, esw))
+- break;
+-
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+@@ -491,7 +518,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
+ }
+
+ br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+- err = register_netdevice_notifier(&br_offloads->netdev_nb);
++ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+ err);
+@@ -509,7 +536,9 @@ err_register_swdev_blk:
+ err_register_swdev:
+ destroy_workqueue(br_offloads->wq);
+ err_alloc_wq:
++ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
++ rtnl_unlock();
+ }
+
+ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+@@ -524,7 +553,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+ return;
+
+ cancel_delayed_work_sync(&br_offloads->update_work);
+- unregister_netdevice_notifier(&br_offloads->netdev_nb);
++ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ unregister_switchdev_notifier(&br_offloads->nb);
+ destroy_workqueue(br_offloads->wq);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+index de03684528bbf..8451940c16ab9 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+@@ -647,9 +647,7 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk
+ "Failed to restore tunnel info for sampled packet\n");
+ return;
+ }
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ mlx5e_tc_sample_skb(skb, mapped_obj);
+-#endif /* CONFIG_MLX5_TC_SAMPLE */
+ mlx5_rep_tc_post_napi_receive(tc_priv);
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+index 0eb125316fe20..899a9a73eef68 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+@@ -59,6 +59,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
+
+ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+ {
++ struct mlx5e_rq *xskrq = NULL;
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_icosq *icosq;
+ struct net_device *dev;
+@@ -67,7 +68,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+ int err;
+
+ icosq = ctx;
++
++ mutex_lock(&icosq->channel->icosq_recovery_lock);
++
++ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
+ rq = &icosq->channel->rq;
++ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
++ xskrq = &icosq->channel->xskrq;
+ mdev = icosq->channel->mdev;
+ dev = icosq->channel->netdev;
+ err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
+@@ -81,6 +88,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+ goto out;
+
+ mlx5e_deactivate_rq(rq);
++ if (xskrq)
++ mlx5e_deactivate_rq(xskrq);
++
+ err = mlx5e_wait_for_icosq_flush(icosq);
+ if (err)
+ goto out;
+@@ -94,35 +104,29 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+ goto out;
+
+ mlx5e_reset_icosq_cc_pc(icosq);
++
+ mlx5e_free_rx_in_progress_descs(rq);
++ if (xskrq)
++ mlx5e_free_rx_in_progress_descs(xskrq);
++
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mlx5e_activate_icosq(icosq);
+- mlx5e_activate_rq(rq);
+
++ mlx5e_activate_rq(rq);
+ rq->stats->recover++;
+- return 0;
+-out:
+- clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+- return err;
+-}
+-
+-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+-{
+- struct net_device *dev = rq->netdev;
+- int err;
+
+- err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
+- if (err) {
+- netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
+- return err;
+- }
+- err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+- if (err) {
+- netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
+- return err;
++ if (xskrq) {
++ mlx5e_activate_rq(xskrq);
++ xskrq->stats->recover++;
+ }
+
++ mutex_unlock(&icosq->channel->icosq_recovery_lock);
++
+ return 0;
++out:
++ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
++ mutex_unlock(&icosq->channel->icosq_recovery_lock);
++ return err;
+ }
+
+ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
+@@ -131,19 +135,14 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
+ int err;
+
+ mlx5e_deactivate_rq(rq);
+- mlx5e_free_rx_descs(rq);
+-
+- err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
++ err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
++ clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+ if (err)
+- goto out;
++ return err;
+
+- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+ mlx5e_activate_rq(rq);
+ rq->stats->recover++;
+ return 0;
+-out:
+- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
+- return err;
+ }
+
+ static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+@@ -703,6 +702,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
+ mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+ }
+
++void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
++{
++ mutex_lock(&c->icosq_recovery_lock);
++}
++
++void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
++{
++ mutex_unlock(&c->icosq_recovery_lock);
++}
++
+ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
+ .name = "rx",
+ .recover = mlx5e_rx_reporter_recover,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+index bb682fd751c98..8024599994642 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+@@ -463,6 +463,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms
+ return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ }
+
++static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
++ void *ctx)
++{
++ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
++
++ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
++}
++
+ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
+ struct devlink_fmsg *fmsg)
+ {
+@@ -558,7 +566,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
+ to_ctx.sq = sq;
+ err_ctx.ctx = &to_ctx;
+ err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+- err_ctx.dump = mlx5e_tx_reporter_dump_sq;
++ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
+ snprintf(err_str, sizeof(err_str),
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
+ sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+index 625cd49ef96c5..7b55b14d47ef7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+@@ -127,7 +127,7 @@ mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+
+ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+- const struct mlx5e_lro_param *init_lro_param,
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+ {
+ struct mlx5e_rss_params_traffic_type rss_tt;
+@@ -161,7 +161,7 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+ rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
+ mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
+ rqtn, rss->inner_ft_support);
+- mlx5e_tir_builder_build_lro(builder, init_lro_param);
++ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+ mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+
+@@ -198,14 +198,14 @@ static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types
+ }
+
+ static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
+- const struct mlx5e_lro_param *init_lro_param,
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner)
+ {
+ enum mlx5_traffic_types tt, max_tt;
+ int err;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+- err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
++ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ goto err_destroy_tirs;
+ }
+@@ -297,7 +297,7 @@ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+
+ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+- const struct mlx5e_lro_param *init_lro_param)
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+ {
+ int err;
+
+@@ -305,12 +305,12 @@ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ if (err)
+ goto err_out;
+
+- err = mlx5e_rss_create_tirs(rss, init_lro_param, false);
++ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
+ if (err)
+ goto err_destroy_rqt;
+
+ if (inner_ft_support) {
+- err = mlx5e_rss_create_tirs(rss, init_lro_param, true);
++ err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
+ if (err)
+ goto err_destroy_tirs;
+ }
+@@ -372,7 +372,7 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ */
+ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+- const struct mlx5e_lro_param *init_lro_param,
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn)
+ {
+ struct mlx5e_tir *tir;
+@@ -381,7 +381,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ if (!tir) { /* TIR doesn't exist, create one */
+ int err;
+
+- err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
++ err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
+ if (err)
+ return err;
+ tir = rss_get_tir(rss, tt, inner);
+@@ -418,7 +418,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss)
+ mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
+ }
+
+-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param)
++int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
++ struct mlx5e_packet_merge_param *pkt_merge_param)
+ {
+ struct mlx5e_tir_builder *builder;
+ enum mlx5_traffic_types tt;
+@@ -428,7 +429,7 @@ int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_p
+ if (!builder)
+ return -ENOMEM;
+
+- mlx5e_tir_builder_build_lro(builder, lro_param);
++ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+index d522a10dadf33..c6b2164163440 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+@@ -17,7 +17,7 @@ struct mlx5e_rss *mlx5e_rss_alloc(void);
+ void mlx5e_rss_free(struct mlx5e_rss *rss);
+ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn,
+- const struct mlx5e_lro_param *init_lro_param);
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param);
+ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+ bool inner_ft_support, u32 drop_rqn);
+ int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
+@@ -30,13 +30,14 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+ bool inner);
+ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+ enum mlx5_traffic_types tt,
+- const struct mlx5e_lro_param *init_lro_param,
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ bool inner, u32 *tirn);
+
+ void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+ void mlx5e_rss_disable(struct mlx5e_rss *rss);
+
+-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param);
++int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
++ struct mlx5e_packet_merge_param *pkt_merge_param);
+ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
+ int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+ const u8 *key, const u8 *hfunc,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+index 13056cb9757d4..0015a81eb9a17 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+@@ -13,6 +13,9 @@ struct mlx5e_rx_res {
+ unsigned int max_nch;
+ u32 drop_rqn;
+
++ struct mlx5e_packet_merge_param pkt_merge_param;
++ struct rw_semaphore pkt_merge_param_sem;
++
+ struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
+ bool rss_active;
+ u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+@@ -34,7 +37,7 @@ struct mlx5e_rx_res {
+ /* API for rx_res_rss_* */
+
+ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+- const struct mlx5e_lro_param *init_lro_param,
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch)
+ {
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+@@ -49,7 +52,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+ return -ENOMEM;
+
+ err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
+- init_lro_param);
++ init_pkt_merge_param);
+ if (err)
+ goto err_rss_free;
+
+@@ -275,7 +278,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+ }
+
+ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+- const struct mlx5e_lro_param *init_lro_param)
++ const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+ {
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+@@ -306,7 +309,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ inner_ft_support);
+- mlx5e_tir_builder_build_lro(builder, init_lro_param);
++ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+@@ -336,7 +339,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ inner_ft_support);
+- mlx5e_tir_builder_build_lro(builder, init_lro_param);
++ mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
+@@ -392,6 +395,7 @@ static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+ if (err)
+ goto out;
+
++ /* Separated from the channels RQs, does not share pkt_merge state with them */
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ inner_ft_support);
+@@ -437,7 +441,7 @@ static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+
+ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+- u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
++ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch)
+ {
+ int err;
+@@ -447,11 +451,14 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ res->max_nch = max_nch;
+ res->drop_rqn = drop_rqn;
+
+- err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch);
++ res->pkt_merge_param = *init_pkt_merge_param;
++ init_rwsem(&res->pkt_merge_param_sem);
++
++ err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
+ if (err)
+ goto err_out;
+
+- err = mlx5e_rx_res_channels_init(res, init_lro_param);
++ err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param);
+ if (err)
+ goto err_rss_destroy;
+
+@@ -513,7 +520,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+ return mlx5e_tir_get_tirn(&res->ptp.tir);
+ }
+
+-u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
++static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+ {
+ return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+ }
+@@ -645,7 +652,8 @@ int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
+ return err;
+ }
+
+-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
++int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
++ struct mlx5e_packet_merge_param *pkt_merge_param)
+ {
+ struct mlx5e_tir_builder *builder;
+ int err, final_err;
+@@ -655,7 +663,10 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
+ if (!builder)
+ return -ENOMEM;
+
+- mlx5e_tir_builder_build_lro(builder, lro_param);
++ down_write(&res->pkt_merge_param_sem);
++ res->pkt_merge_param = *pkt_merge_param;
++
++ mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
+
+ final_err = 0;
+
+@@ -665,7 +676,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
+ if (!rss)
+ continue;
+
+- err = mlx5e_rss_lro_set_param(rss, lro_param);
++ err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
+ if (err)
+ final_err = final_err ? : err;
+ }
+@@ -673,13 +684,14 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+ if (err) {
+- mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
++ mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
+ mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
++ up_write(&res->pkt_merge_param_sem);
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+ }
+@@ -688,3 +700,31 @@ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *
+ {
+ return mlx5e_rss_get_hash(res->rss[0]);
+ }
++
++int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
++ struct mlx5e_tir *tir)
++{
++ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
++ struct mlx5e_tir_builder *builder;
++ u32 rqtn;
++ int err;
++
++ builder = mlx5e_tir_builder_alloc(false);
++ if (!builder)
++ return -ENOMEM;
++
++ rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq);
++
++ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn,
++ inner_ft_support);
++ mlx5e_tir_builder_build_direct(builder);
++ mlx5e_tir_builder_build_tls(builder);
++ down_read(&res->pkt_merge_param_sem);
++ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
++ err = mlx5e_tir_init(tir, builder, res->mdev, false);
++ up_read(&res->pkt_merge_param_sem);
++
++ mlx5e_tir_builder_free(builder);
++
++ return err;
++}
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+index 4a15942d79f7d..b39b20a720e0f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+@@ -25,7 +25,7 @@ enum mlx5e_rx_res_features {
+ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+- u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
++ u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
+ unsigned int init_nch);
+ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+ void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+@@ -37,9 +37,6 @@ u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types
+ u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+-/* RQTN getters for modules that create their own TIRs */
+-u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+-
+ /* Activate/deactivate API */
+ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+@@ -57,7 +54,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+ u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+ int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
++int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
++ struct mlx5e_packet_merge_param *pkt_merge_param);
+
+ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
+ int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
+@@ -68,4 +66,7 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
+ /* Workaround for hairpin */
+ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
+
++/* Accel TIRs */
++int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
++ struct mlx5e_tir *tir);
+ #endif /* __MLX5_EN_RX_RES_H__ */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+index db0146df9b303..9ef8a49d78014 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+@@ -19,6 +19,8 @@ struct mlx5e_sample_attr {
+ struct mlx5e_sample_flow *sample_flow;
+ };
+
++#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
++
+ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
+ struct mlx5_flow_handle *
+@@ -38,4 +40,29 @@ mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+ void
+ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+
++#else /* CONFIG_MLX5_TC_SAMPLE */
++
++static inline struct mlx5_flow_handle *
++mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
++ struct mlx5_flow_spec *spec,
++ struct mlx5_flow_attr *attr,
++ u32 tunnel_id)
++{ return ERR_PTR(-EOPNOTSUPP); }
++
++static inline void
++mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
++ struct mlx5_flow_handle *rule,
++ struct mlx5_flow_attr *attr) {}
++
++static inline struct mlx5e_tc_psample *
++mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
++{ return ERR_PTR(-EOPNOTSUPP); }
++
++static inline void
++mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {}
++
++static inline void
++mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {}
++
++#endif /* CONFIG_MLX5_TC_SAMPLE */
+ #endif /* __MLX5_EN_TC_SAMPLE_H__ */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+index 6c949abcd2e14..94200f2dd92b0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+@@ -650,7 +650,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule,
+ struct mlx5e_mod_hdr_handle **mh,
+- u8 zone_restore_id, bool nat)
++ u8 zone_restore_id, bool nat_table, bool has_nat)
+ {
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct flow_action_entry *meta;
+@@ -665,11 +665,12 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ &attr->ct_attr.ct_labels_id);
+ if (err)
+ return -EOPNOTSUPP;
+- if (nat) {
+- err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
+- &mod_acts);
+- if (err)
+- goto err_mapping;
++ if (nat_table) {
++ if (has_nat) {
++ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
++ if (err)
++ goto err_mapping;
++ }
+
+ ct_state |= MLX5_CT_STATE_NAT_BIT;
+ }
+@@ -684,7 +685,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ if (err)
+ goto err_mapping;
+
+- if (nat) {
++ if (nat_table && has_nat) {
+ attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
+ mod_acts.num_actions,
+ mod_acts.actions);
+@@ -752,7 +753,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+
+ err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
+ &zone_rule->mh,
+- zone_restore_id, nat);
++ zone_restore_id,
++ nat,
++ mlx5_tc_ct_entry_has_nat(entry));
+ if (err) {
+ ct_dbg("Failed to create ct entry mod hdr");
+ goto err_mod_hdr;
+@@ -1356,9 +1359,13 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
+ int
+ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
++ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+ {
++ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
++ int err;
++
+ if (!priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct action isn't available");
+@@ -1369,6 +1376,17 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ attr->ct_attr.ct_action = act->ct.action;
+ attr->ct_attr.nf_ft = act->ct.flow_table;
+
++ if (!clear_action)
++ goto out;
++
++ err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
++ if (err) {
++ NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear");
++ return err;
++ }
++ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
++
++out:
+ return 0;
+ }
+
+@@ -1684,6 +1702,8 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
+ static void
+ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+ {
++ struct mlx5e_priv *priv;
++
+ if (!refcount_dec_and_test(&ft->refcount))
+ return;
+
+@@ -1693,6 +1713,8 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+ rhashtable_free_and_destroy(&ft->ct_entries_ht,
+ mlx5_tc_ct_flush_ft_entry,
+ ct_priv);
++ priv = netdev_priv(ct_priv->netdev);
++ flush_workqueue(priv->wq);
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+ mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
+ kfree(ft);
+@@ -1898,23 +1920,16 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
+
+ memcpy(pre_ct_attr, attr, attr_sz);
+
+- err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
+- if (err) {
+- ct_dbg("Failed to set register for ct clear");
+- goto err_set_registers;
+- }
+-
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
+ mod_acts->num_actions,
+ mod_acts->actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to add create ct clear mod hdr");
+- goto err_set_registers;
++ goto err_mod_hdr;
+ }
+
+ pre_ct_attr->modify_hdr = mod_hdr;
+- pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
+ if (IS_ERR(rule)) {
+@@ -1930,7 +1945,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
+
+ err_insert:
+ mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
+-err_set_registers:
++err_mod_hdr:
+ netdev_warn(priv->netdev,
+ "Failed to offload ct clear flow, err %d\n", err);
+ kfree(pre_ct_attr);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+index 363329f4aac61..99662af1e41a7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+@@ -110,6 +110,7 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
+ int
+ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
++ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
+@@ -172,6 +173,7 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
+ static inline int
+ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
++ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+ {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+index d1599b7b944bf..c340bf90354a0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+@@ -102,6 +102,7 @@ struct mlx5e_tc_flow {
+ refcount_t refcnt;
+ struct rcu_head rcu_head;
+ struct completion init_done;
++ struct completion del_hw_done;
+ int tunnel_id; /* the mapped tunnel id of this flow */
+ struct mlx5_flow_attr *attr;
+ };
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+index 4a13ef561587d..d90c6dc41c9f4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+@@ -1,6 +1,7 @@
+ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+ /* Copyright (c) 2018 Mellanox Technologies. */
+
++#include <net/inet_ecn.h>
+ #include <net/vxlan.h>
+ #include <net/gre.h>
+ #include <net/geneve.h>
+@@ -229,7 +230,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ int err;
+
+ /* add the IP fields */
+- attr.fl.fl4.flowi4_tos = tun_key->tos;
++ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+@@ -344,7 +345,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
+ int err;
+
+ /* add the IP fields */
+- attr.fl.fl4.flowi4_tos = tun_key->tos;
++ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
+ attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
+ attr.fl.fl4.saddr = tun_key->u.ipv4.src;
+ attr.ttl = tun_key->ttl;
+@@ -496,7 +497,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ int err;
+
+ attr.ttl = tun_key->ttl;
+- attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
++ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+@@ -610,7 +611,7 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+
+ attr.ttl = tun_key->ttl;
+
+- attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
++ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
+ attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
+ attr.fl.fl6.saddr = tun_key->u.ipv6.src;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+index 1c44c6c345f5d..a8d7f07ee2ca0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -188,12 +188,19 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ int err;
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+- if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
++ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
+
++ /* Clear pkt_reformat before checking slow path flag. Because
++ * in next iteration, the same flow is already set slow path
++ * flag, but still need to clear the pkt_reformat.
++ */
++ if (flow_flag_test(flow, SLOW))
++ continue;
++
+ /* update from encap rule to slow path rule */
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+@@ -221,8 +228,14 @@ static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
+ struct list_head *flow_list,
+ int index)
+ {
+- if (IS_ERR(mlx5e_flow_get(flow)))
++ if (IS_ERR(mlx5e_flow_get(flow))) {
++ /* Flow is being deleted concurrently. Wait for it to be
++ * unoffloaded from hardware, otherwise deleting encap will
++ * fail.
++ */
++ wait_for_completion(&flow->del_hw_done);
+ return;
++ }
+ wait_for_completion(&flow->init_done);
+
+ flow->tmp_entry_index = index;
+@@ -1336,7 +1349,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
+ continue;
+ }
+
+- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
++ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
+ err);
+@@ -1538,6 +1551,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
+ struct net_device *fib_dev;
+
+ fen_info = container_of(info, struct fib_entry_notifier_info, info);
++ if (fen_info->fi->nh)
++ return NULL;
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+ fen_info->dst_len != 32)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+index 60952b33b5688..d2333310b56fe 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+@@ -60,37 +60,31 @@ static int parse_tunnel(struct mlx5e_priv *priv,
+ void *headers_v)
+ {
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+- struct flow_match_enc_keyid enc_keyid;
+ struct flow_match_mpls match;
+ void *misc2_c;
+ void *misc2_v;
+
+- misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+- misc_parameters_2);
+- misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+- misc_parameters_2);
+-
+- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+- return 0;
+-
+- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+- return 0;
+-
+- flow_rule_match_enc_keyid(rule, &enc_keyid);
+-
+- if (!enc_keyid.mask->keyid)
+- return 0;
+-
+ if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
+ !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
+ return -EOPNOTSUPP;
+
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
++ return -EOPNOTSUPP;
++
++ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
++ return 0;
++
+ flow_rule_match_mpls(rule, &match);
+
+ /* Only support matching the first LSE */
+ if (match.mask->used_lses != 1)
+ return -EOPNOTSUPP;
+
++ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
++ misc_parameters_2);
++ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
++ misc_parameters_2);
++
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_label,
+ match.mask->ls[0].mpls_label);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+index 4267f3a1059e7..78b1a6ddd9675 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+@@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh;
+
++ if (tun_key->tun_flags & TUNNEL_VXLAN_OPT)
++ return -EOPNOTSUPP;
+ vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+ *ip_proto = IPPROTO_UDP;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+index de936dc4bc483..a1afb8585e37f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+@@ -70,24 +70,24 @@ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
+ }
+
+-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+- const struct mlx5e_lro_param *lro_param)
++void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
++ const struct mlx5e_packet_merge_param *pkt_merge_param)
+ {
+ void *tirc = mlx5e_tir_builder_get_tirc(builder);
+ const unsigned int rough_max_l2_l3_hdr_sz = 256;
+
+ if (builder->modify)
+- MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1);
++ MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
+
+- if (!lro_param->enabled)
++ if (pkt_merge_param->type == MLX5E_PACKET_MERGE_NONE)
+ return;
+
+- MLX5_SET(tirc, tirc, lro_enable_mask,
+- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
++ MLX5_SET(tirc, tirc, packet_merge_mask,
++ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
++ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+- MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
++ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
+ }
+
+ static int mlx5e_hfunc_to_hw(u8 hfunc)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
+index e45149a78ed9d..857a84bcd53af 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
+@@ -18,7 +18,7 @@ struct mlx5e_rss_params_traffic_type {
+ };
+
+ struct mlx5e_tir_builder;
+-struct mlx5e_lro_param;
++struct mlx5e_packet_merge_param;
+
+ struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
+ void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
+@@ -27,8 +27,8 @@ void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
+ void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
+ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+ u32 rqtn, bool inner_ft_support);
+-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+- const struct mlx5e_lro_param *lro_param);
++void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
++ const struct mlx5e_packet_merge_param *pkt_merge_param);
+ void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+ const struct mlx5e_rss_params_hash *rss_hash,
+ const struct mlx5e_rss_params_traffic_type *rss_tt,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+index 055c3bc237339..cf62d1f6d7f20 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+@@ -73,6 +73,12 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
+ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
+
++static inline bool
++mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
++{
++ return (*fifo->pc - *fifo->cc) < fifo->mask;
++}
++
+ static inline bool
+ mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+ {
+@@ -166,6 +172,8 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+ return pi;
+ }
+
++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
++
+ struct mlx5e_icosq_wqe_info {
+ u8 wqe_type;
+ u8 num_wqebbs;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+index 7b562d2c8a196..279cd8f4e79f7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+@@ -11,13 +11,13 @@ static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
+ {
+ struct device *dev = mlx5_core_dma_dev(priv->mdev);
+
+- return xsk_pool_dma_map(pool, dev, 0);
++ return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
+ }
+
+ static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
+ struct xsk_buff_pool *pool)
+ {
+- return xsk_pool_dma_unmap(pool, 0);
++ return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
+ }
+
+ static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+index 538bc2419bd83..8526a5fbbf0bf 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+@@ -4,6 +4,7 @@
+ #include "setup.h"
+ #include "en/params.h"
+ #include "en/txrx.h"
++#include "en/health.h"
+
+ /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
+ * change unexpectedly, and mlx5e has a minimum valid stride size for striding
+@@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
+
+ void mlx5e_activate_xsk(struct mlx5e_channel *c)
+ {
++ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
++ * activating XSKRQ in the middle of recovery.
++ */
++ mlx5e_reporter_icosq_suspend_recovery(c);
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
++ mlx5e_reporter_icosq_resume_recovery(c);
++
+ /* TX queue is created active. */
+
+ spin_lock_bh(&c->async_icosq_lock);
+@@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c)
+
+ void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+ {
+- mlx5e_deactivate_rq(&c->xskrq);
++ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
++ * middle of recovery. Suspend the recovery to avoid it.
++ */
++ mlx5e_reporter_icosq_suspend_recovery(c);
++ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
++ mlx5e_reporter_icosq_resume_recovery(c);
++ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
++
+ /* TX queue is disabled on close. */
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+index 4c4ee524176c7..153c16e3ff3bf 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+@@ -194,6 +194,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kfree(ft->g);
++ ft->g = NULL;
+ kvfree(in);
+ return -ENOMEM;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 7cab08a2f7152..05882d1a4407c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -113,7 +113,6 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
+ struct xfrm_replay_state_esn *replay_esn;
+ u32 seq_bottom = 0;
+ u8 overlap;
+- u32 *esn;
+
+ if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+ sa_entry->esn_state.trigger = 0;
+@@ -128,11 +127,9 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
+
+ sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+ htonl(seq_bottom));
+- esn = &sa_entry->esn_state.esn;
+
+ sa_entry->esn_state.trigger = 1;
+ if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+- ++(*esn);
+ sa_entry->esn_state.overlap = 0;
+ return true;
+ } else if (unlikely(!overlap &&
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+index fb5397324aa4f..4590d19c25cf7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+@@ -121,7 +121,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+
+ trailer_len = alen + plen + 2;
+
+- pskb_trim(skb, skb->len - trailer_len);
++ ret = pskb_trim(skb, skb->len - trailer_len);
++ if (unlikely(ret))
++ return ret;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+ ip_send_check(ipv4hdr);
+@@ -157,11 +159,20 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+ /* Tunnel mode */
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+- eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+- if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP)
++
++ switch (xo->inner_ipproto) {
++ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
++ fallthrough;
++ case IPPROTO_TCP:
++ /* IP | ESP | IP | [TCP | UDP] */
++ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
++ break;
++ default:
++ break;
++ }
+ return;
+ }
+
+@@ -191,7 +202,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ eseg->swp_inner_l4_offset =
+ (skb->csum_start + skb->head - skb->data) / 2;
+- if (skb->protocol == htons(ETH_P_IPV6))
++ if (inner_ip_hdr(skb)->version == 6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ break;
+ default:
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+index 5120a59361e6a..428881e0adcbe 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+@@ -127,6 +127,28 @@ out_disable:
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+ }
+
++static inline bool
++mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
++ struct mlx5_wqe_eth_seg *eseg)
++{
++ u8 inner_ipproto;
++
++ if (!mlx5e_ipsec_eseg_meta(eseg))
++ return false;
++
++ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
++ inner_ipproto = xfrm_offload(skb)->inner_ipproto;
++ if (inner_ipproto) {
++ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
++ if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP)
++ eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
++ } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
++ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
++ sq->stats->csum_partial_inner++;
++ }
++
++ return true;
++}
+ #else
+ static inline
+ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
+@@ -143,6 +165,13 @@ static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false;
+ static inline netdev_features_t
+ mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
+ { return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); }
++
++static inline bool
++mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
++ struct mlx5_wqe_eth_seg *eseg)
++{
++ return false;
++}
+ #endif /* CONFIG_MLX5_EN_IPSEC */
+
+ #endif /* __MLX5E_IPSEC_RXTX_H__ */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+index d93aadbf10da8..90ea78239d402 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+@@ -16,7 +16,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+- if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info)))
++ if (!mlx5e_ktls_type_check(mdev, crypto_info))
+ return -EOPNOTSUPP;
+
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+index 62abce008c7b8..d92b97c56f4cd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+@@ -55,6 +55,7 @@ struct mlx5e_ktls_offload_context_rx {
+ DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS);
+
+ /* resync */
++ spinlock_t lock; /* protects resync fields */
+ struct mlx5e_ktls_rx_resync_ctx resync;
+ struct list_head list;
+ };
+@@ -99,25 +100,6 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
+ return resp_list;
+ }
+
+-static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn)
+-{
+- struct mlx5e_tir_builder *builder;
+- int err;
+-
+- builder = mlx5e_tir_builder_alloc(false);
+- if (!builder)
+- return -ENOMEM;
+-
+- mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false);
+- mlx5e_tir_builder_build_direct(builder);
+- mlx5e_tir_builder_build_tls(builder);
+- err = mlx5e_tir_init(tir, builder, mdev, false);
+-
+- mlx5e_tir_builder_free(builder);
+-
+- return err;
+-}
+-
+ static void accel_rule_handle_work(struct work_struct *work)
+ {
+ struct mlx5e_ktls_offload_context_rx *priv_rx;
+@@ -249,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_rx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
+
+- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) >
+- TLS_OFFLOAD_CONTEXT_SIZE_RX);
++ BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX);
+
+ *ctx = priv_rx;
+ }
+@@ -386,14 +367,18 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r
+ struct mlx5e_icosq *sq;
+ bool trigger_poll;
+
+- memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
+-
+ sq = &c->async_icosq;
+ ktls_resync = sq->ktls_resync;
++ trigger_poll = false;
+
+ spin_lock_bh(&ktls_resync->lock);
+- list_add_tail(&priv_rx->list, &ktls_resync->list);
+- trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
++ spin_lock_bh(&priv_rx->lock);
++ memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
++ if (list_empty(&priv_rx->list)) {
++ list_add_tail(&priv_rx->list, &ktls_resync->list);
++ trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
++ }
++ spin_unlock_bh(&priv_rx->lock);
+ spin_unlock_bh(&ktls_resync->lock);
+
+ if (!trigger_poll)
+@@ -604,7 +589,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+ int rxq, err;
+- u32 rqtn;
+
+ tls_ctx = tls_get_ctx(sk);
+ priv = netdev_priv(netdev);
+@@ -617,6 +601,8 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+ if (err)
+ goto err_create_key;
+
++ INIT_LIST_HEAD(&priv_rx->list);
++ spin_lock_init(&priv_rx->lock);
+ priv_rx->crypto_info =
+ *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+@@ -628,9 +614,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
+ priv_rx->sw_stats = &priv->tls->sw_stats;
+ mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
+
+- rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
+-
+- err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
++ err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir);
+ if (err)
+ goto err_create_tir;
+
+@@ -730,10 +714,14 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
+ priv_rx = list_first_entry(&local_list,
+ struct mlx5e_ktls_offload_context_rx,
+ list);
++ spin_lock(&priv_rx->lock);
+ cseg = post_static_params(sq, priv_rx);
+- if (IS_ERR(cseg))
++ if (IS_ERR(cseg)) {
++ spin_unlock(&priv_rx->lock);
+ break;
+- list_del(&priv_rx->list);
++ }
++ list_del_init(&priv_rx->list);
++ spin_unlock(&priv_rx->lock);
+ db_cseg = cseg;
+ }
+ if (db_cseg)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+index 9ad3459fb63a6..dadb71081ed06 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+@@ -68,8 +68,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_tx **ctx =
+ __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
+
+- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) >
+- TLS_OFFLOAD_CONTEXT_SIZE_TX);
++ BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX);
+
+ *ctx = priv_tx;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+index 84eb7201c142e..9a28ea165236b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+@@ -140,10 +140,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+- if (!in) {
+- err = -ENOMEM;
+- goto out;
+- }
++ if (!in)
++ return -ENOMEM;
+
+ if (enable_uc_lb)
+ lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+@@ -161,14 +159,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+ tirn = tir->tirn;
+ err = mlx5_core_modify_tir(mdev, tirn, in);
+ if (err)
+- goto out;
++ break;
+ }
++ mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
+
+-out:
+ kvfree(in);
+ if (err)
+ netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+- mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
+
+ return err;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+index a4c8d8d00d5a4..f2862100d1a2e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+@@ -117,12 +117,14 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return -EOPNOTSUPP;
+
+- ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
+- for (i = 0; i < ets->ets_cap; i++) {
++ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]);
+ if (err)
+ return err;
++ }
+
++ ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
++ for (i = 0; i < ets->ets_cap; i++) {
+ err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]);
+ if (err)
+ return err;
+@@ -1198,6 +1200,16 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
+ if (err)
+ return err;
+
++ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) {
++ /*
++ * Align the driver state with the register state.
++ * Temporary state change is required to enable the app list reset.
++ */
++ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP;
++ mlx5e_dcbnl_delete_app(priv);
++ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
++ }
++
+ mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
+ priv->dcbx_dp.trust_state);
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+index 9d451b8ee467c..2d3cd237355a6 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -1754,7 +1754,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
+ if (size_read < 0) {
+ netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
+ __func__, size_read);
+- return 0;
++ return size_read;
+ }
+
+ i += size_read;
+@@ -1954,8 +1954,8 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
+ return -EOPNOTSUPP;
+ if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
+ return -EINVAL;
+- } else if (priv->channels.params.lro_en) {
+- netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
++ } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
++ netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
+ return -EINVAL;
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+index 03693fa74a704..d32b70c62c949 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -411,7 +411,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
+ u32 rss_context, u32 *tirn)
+ {
+ if (fs->flow_type & FLOW_RSS) {
+- struct mlx5e_lro_param lro_param;
++ struct mlx5e_packet_merge_param pkt_merge_param;
+ struct mlx5e_rss *rss;
+ u32 flow_type;
+ int err;
+@@ -426,8 +426,8 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
+ if (tt < 0)
+ return -EINVAL;
+
+- lro_param = mlx5e_get_lro_param(&priv->channels.params);
+- err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn);
++ pkt_merge_param = priv->channels.params.packet_merge;
++ err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn);
+ if (err)
+ return err;
+ eth_rule->rss = rss;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 41ef6eb70a585..fdc4a5a80da41 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -392,7 +392,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
+ rq->icosq = &c->icosq;
+ rq->ix = c->ix;
+ rq->mdev = mdev;
+- rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
++ rq->hw_mtu =
++ MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en;
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->stats = &c->priv->channel_stats[c->ix].rq;
+ rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+@@ -672,7 +673,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
+ return err;
+ }
+
+-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
++static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
+ {
+ struct mlx5_core_dev *mdev = rq->mdev;
+
+@@ -701,33 +702,30 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
+ return err;
+ }
+
+-static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
++static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+ {
+- struct mlx5_core_dev *mdev = rq->mdev;
+-
+- void *in;
+- void *rqc;
+- int inlen;
++ struct net_device *dev = rq->netdev;
+ int err;
+
+- inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+- in = kvzalloc(inlen, GFP_KERNEL);
+- if (!in)
+- return -ENOMEM;
+-
+- rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+-
+- MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
+- MLX5_SET64(modify_rq_in, in, modify_bitmask,
+- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS);
+- MLX5_SET(rqc, rqc, scatter_fcs, enable);
+- MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
++ err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
++ if (err) {
++ netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
++ return err;
++ }
++ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
++ if (err) {
++ netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
++ return err;
++ }
+
+- err = mlx5_core_modify_rq(mdev, rq->rqn, in);
++ return 0;
++}
+
+- kvfree(in);
++int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
++{
++ mlx5e_free_rx_descs(rq);
+
+- return err;
++ return mlx5e_rq_to_ready(rq, curr_state);
+ }
+
+ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
+@@ -911,8 +909,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+ void mlx5e_close_rq(struct mlx5e_rq *rq)
+ {
+ cancel_work_sync(&rq->dim.work);
+- if (rq->icosq)
+- cancel_work_sync(&rq->icosq->recover_work);
+ cancel_work_sync(&rq->recover_work);
+ mlx5e_destroy_rq(rq);
+ mlx5e_free_rx_descs(rq);
+@@ -979,7 +975,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
+ sq->channel = c;
+ sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+- sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
++ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
+ sq->xsk_pool = xsk_pool;
+
+ sq->stats = sq->xsk_pool ?
+@@ -1038,9 +1034,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
+ mlx5e_reporter_icosq_cqe_err(sq);
+ }
+
++static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
++{
++ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
++ recover_work);
++
++ /* Not implemented yet. */
++
++ netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
++}
++
+ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+ struct mlx5e_sq_param *param,
+- struct mlx5e_icosq *sq)
++ struct mlx5e_icosq *sq,
++ work_func_t recover_work_func)
+ {
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+@@ -1061,7 +1068,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+ if (err)
+ goto err_sq_wq_destroy;
+
+- INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
++ INIT_WORK(&sq->recover_work, recover_work_func);
+
+ return 0;
+
+@@ -1399,13 +1406,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
+ mlx5e_reporter_tx_err_cqe(sq);
+ }
+
+-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
++static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
++ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
++ work_func_t recover_work_func)
+ {
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+- err = mlx5e_alloc_icosq(c, param, sq);
++ err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
+ if (err)
+ return err;
+
+@@ -1444,7 +1452,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
+ synchronize_net(); /* Sync with NAPI. */
+ }
+
+-void mlx5e_close_icosq(struct mlx5e_icosq *sq)
++static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+ {
+ struct mlx5e_channel *c = sq->channel;
+
+@@ -1871,11 +1879,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
+
+ spin_lock_init(&c->async_icosq_lock);
+
+- err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq);
++ err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
++ mlx5e_async_icosq_err_cqe_work);
+ if (err)
+ goto err_close_xdpsq_cq;
+
+- err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
++ mutex_init(&c->icosq_recovery_lock);
++
++ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
++ mlx5e_icosq_err_cqe_work);
+ if (err)
+ goto err_close_async_icosq;
+
+@@ -1943,9 +1955,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c)
+ mlx5e_close_xdpsq(&c->xdpsq);
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq_xdpsq);
++ /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
++ cancel_work_sync(&c->icosq.recover_work);
+ mlx5e_close_rq(&c->rq);
+ mlx5e_close_sqs(c);
+ mlx5e_close_icosq(&c->icosq);
++ mutex_destroy(&c->icosq_recovery_lock);
+ mlx5e_close_icosq(&c->async_icosq);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq_xdpsq.cq);
+@@ -2185,17 +2200,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
+ chs->num = 0;
+ }
+
+-static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
++static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
+ {
+ struct mlx5e_rx_res *res = priv->rx_res;
+- struct mlx5e_lro_param lro_param;
+-
+- lro_param = mlx5e_get_lro_param(&priv->channels.params);
+
+- return mlx5e_rx_res_lro_set_param(res, &lro_param);
++ return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
+ }
+
+-static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
++static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
+
+ static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 mtu)
+@@ -2836,20 +2848,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+ mlx5e_destroy_tises(priv);
+ }
+
+-static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
+-{
+- int err = 0;
+- int i;
+-
+- for (i = 0; i < chs->num; i++) {
+- err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
+- if (err)
+- return err;
+- }
+-
+- return 0;
+-}
+-
+ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
+ {
+ int err;
+@@ -3226,7 +3224,7 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr)
+ return -EADDRNOTAVAIL;
+
+ netif_addr_lock_bh(netdev);
+- ether_addr_copy(netdev->dev_addr, saddr->sa_data);
++ eth_hw_addr_set(netdev, saddr->sa_data);
+ netif_addr_unlock_bh(netdev);
+
+ mlx5e_nic_set_rx_mode(priv);
+@@ -3270,16 +3268,25 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
+ }
+
+ new_params = *cur_params;
+- new_params.lro_en = enable;
+
+- if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
+- mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
+- reset = false;
++ if (enable)
++ new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
++ else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
++ new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
++ else
++ goto out;
++
++ if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
++ new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
++ if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
++ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
++ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
++ reset = false;
++ }
+ }
+
+ err = mlx5e_safe_switch_params(priv, &new_params,
+- mlx5e_modify_tirs_lro_ctx, NULL, reset);
++ mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
+ out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+@@ -3302,7 +3309,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable)
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+- if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
++ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
++ MLX5_TC_FLAG(NIC_OFFLOAD);
++ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
+ netdev_err(netdev,
+ "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+ return -EINVAL;
+@@ -3350,41 +3359,27 @@ static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+ }
+
++static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx)
++{
++ struct mlx5_core_dev *mdev = priv->mdev;
++ bool enable = *(bool *)ctx;
++
++ return mlx5e_set_rx_port_ts(mdev, enable);
++}
++
+ static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
+ {
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels *chs = &priv->channels;
+- struct mlx5_core_dev *mdev = priv->mdev;
++ struct mlx5e_params new_params;
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+- if (enable) {
+- err = mlx5e_set_rx_port_ts(mdev, false);
+- if (err)
+- goto out;
+-
+- chs->params.scatter_fcs_en = true;
+- err = mlx5e_modify_channels_scatter_fcs(chs, true);
+- if (err) {
+- chs->params.scatter_fcs_en = false;
+- mlx5e_set_rx_port_ts(mdev, true);
+- }
+- } else {
+- chs->params.scatter_fcs_en = false;
+- err = mlx5e_modify_channels_scatter_fcs(chs, false);
+- if (err) {
+- chs->params.scatter_fcs_en = true;
+- goto out;
+- }
+- err = mlx5e_set_rx_port_ts(mdev, true);
+- if (err) {
+- mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err);
+- err = 0;
+- }
+- }
+-
+-out:
++ new_params = chs->params;
++ new_params.scatter_fcs_en = enable;
++ err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap,
++ &new_params.scatter_fcs_en, true);
+ mutex_unlock(&priv->state_lock);
+ return err;
+ }
+@@ -3427,12 +3422,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
+
+ static int mlx5e_handle_feature(struct net_device *netdev,
+ netdev_features_t *features,
+- netdev_features_t wanted_features,
+ netdev_features_t feature,
+ mlx5e_feature_handler feature_handler)
+ {
+- netdev_features_t changes = wanted_features ^ netdev->features;
+- bool enable = !!(wanted_features & feature);
++ netdev_features_t changes = *features ^ netdev->features;
++ bool enable = !!(*features & feature);
+ int err;
+
+ if (!(changes & feature))
+@@ -3440,22 +3434,22 @@ static int mlx5e_handle_feature(struct net_device *netdev,
+
+ err = feature_handler(netdev, enable);
+ if (err) {
++ MLX5E_SET_FEATURE(features, feature, !enable);
+ netdev_err(netdev, "%s feature %pNF failed, err %d\n",
+ enable ? "Enable" : "Disable", &feature, err);
+ return err;
+ }
+
+- MLX5E_SET_FEATURE(features, feature, enable);
+ return 0;
+ }
+
+ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
+ {
+- netdev_features_t oper_features = netdev->features;
++ netdev_features_t oper_features = features;
+ int err = 0;
+
+ #define MLX5E_HANDLE_FEATURE(feature, handler) \
+- mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
++ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
+
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
+@@ -3520,14 +3514,25 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
+ }
+ }
+
++ if (params->xdp_prog) {
++ if (features & NETIF_F_LRO) {
++ netdev_warn(netdev, "LRO is incompatible with XDP\n");
++ features &= ~NETIF_F_LRO;
++ }
++ }
++
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ features &= ~NETIF_F_RXHASH;
+ if (netdev->features & NETIF_F_RXHASH)
+ netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+ }
+
+- if (mlx5e_is_uplink_rep(priv))
++ if (mlx5e_is_uplink_rep(priv)) {
+ features = mlx5e_fix_uplink_rep_features(netdev, features);
++ features |= NETIF_F_NETNS_LOCAL;
++ } else {
++ features &= ~NETIF_F_NETNS_LOCAL;
++ }
+
+ mutex_unlock(&priv->state_lock);
+
+@@ -3606,7 +3611,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+ goto out;
+ }
+
+- if (params->lro_en)
++ if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+ reset = false;
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+@@ -4063,8 +4068,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_params new_params;
+
+- if (priv->channels.params.lro_en) {
+- netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
++ if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
++ netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
+ return -EINVAL;
+ }
+
+@@ -4152,6 +4157,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+
+ unlock:
+ mutex_unlock(&priv->state_lock);
++
++ /* Need to fix some features. */
++ if (!err)
++ netdev_update_features(netdev);
++
+ return err;
+ }
+
+@@ -4316,14 +4326,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
+ /* RQ */
+ mlx5e_build_rq_params(mdev, params);
+
+- /* HW LRO */
+- if (MLX5_CAP_ETH(mdev, lro_cap) &&
+- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+- /* No XSK params: checking the availability of striding RQ in general. */
+- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+- params->lro_en = !slow_pci_heuristic(mdev);
+- }
+- params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
++ params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+
+ /* CQ moderation params */
+ rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+@@ -4462,15 +4465,22 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
+ }
+
+ if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
+- netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+- netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+- netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
++ netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
++ NETIF_F_GSO_UDP_TUNNEL_CSUM;
++ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
++ NETIF_F_GSO_UDP_TUNNEL_CSUM;
++ netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
++ netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
++ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ }
+
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
+- netdev->hw_features |= NETIF_F_GSO_GRE;
+- netdev->hw_enc_features |= NETIF_F_GSO_GRE;
+- netdev->gso_partial_features |= NETIF_F_GSO_GRE;
++ netdev->hw_features |= NETIF_F_GSO_GRE |
++ NETIF_F_GSO_GRE_CSUM;
++ netdev->hw_enc_features |= NETIF_F_GSO_GRE |
++ NETIF_F_GSO_GRE_CSUM;
++ netdev->gso_partial_features |= NETIF_F_GSO_GRE |
++ NETIF_F_GSO_GRE_CSUM;
+ }
+
+ if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
+@@ -4608,7 +4618,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+ {
+ struct mlx5_core_dev *mdev = priv->mdev;
+ enum mlx5e_rx_res_features features;
+- struct mlx5e_lro_param lro_param;
+ int err;
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+@@ -4626,9 +4635,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+ features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+ if (priv->channels.params.tunneled_offload_en)
+ features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+- lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+- priv->max_nch, priv->drop_rq.rqn, &lro_param,
++ priv->max_nch, priv->drop_rq.rqn,
++ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
+ if (err)
+ goto err_close_drop_rq;
+@@ -4885,6 +4894,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof
+ }
+
+ netif_carrier_off(netdev);
++ netif_tx_disable(netdev);
+ dev_net_set(netdev, mlx5_core_net(mdev));
+
+ return netdev;
+@@ -4998,8 +5008,8 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
+ }
+
+ static int
+-mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
+- const struct mlx5e_profile *new_profile, void *new_ppriv)
++mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
++ const struct mlx5e_profile *new_profile, void *new_ppriv)
+ {
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+@@ -5015,6 +5025,25 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde
+ err = new_profile->init(priv->mdev, priv->netdev);
+ if (err)
+ goto priv_cleanup;
++
++ return 0;
++
++priv_cleanup:
++ mlx5e_priv_cleanup(priv);
++ return err;
++}
++
++static int
++mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
++ const struct mlx5e_profile *new_profile, void *new_ppriv)
++{
++ struct mlx5e_priv *priv = netdev_priv(netdev);
++ int err;
++
++ err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv);
++ if (err)
++ return err;
++
+ err = mlx5e_attach_netdev(priv);
+ if (err)
+ goto profile_cleanup;
+@@ -5022,7 +5051,6 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde
+
+ profile_cleanup:
+ new_profile->cleanup(priv);
+-priv_cleanup:
+ mlx5e_priv_cleanup(priv);
+ return err;
+ }
+@@ -5041,6 +5069,12 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
+ priv->profile->cleanup(priv);
+ mlx5e_priv_cleanup(priv);
+
++ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
++ mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv);
++ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
++ return -EIO;
++ }
++
+ err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
+ if (err) { /* roll back to original profile */
+ netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
+@@ -5100,8 +5134,11 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+- if (!netif_device_present(netdev))
++ if (!netif_device_present(netdev)) {
++ if (test_bit(MLX5E_STATE_DESTROYING, &priv->state))
++ mlx5e_destroy_mdev_resources(mdev);
+ return -ENODEV;
++ }
+
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_mdev_resources(mdev);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+index 0684ac6699b2d..3d614bf5cff9e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -50,6 +50,7 @@
+ #include "fs_core.h"
+ #include "lib/mlx5.h"
+ #include "lib/devcom.h"
++#include "lib/vxlan.h"
+ #define CREATE_TRACE_POINTS
+ #include "diag/en_rep_tracepoint.h"
+ #include "en_accel/ipsec.h"
+@@ -617,6 +618,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
+
+ params->mqprio.num_tc = 1;
+ params->tunneled_offload_en = false;
++ if (rep->vport != MLX5_VPORT_UPLINK)
++ params->vlan_strip_disable = true;
+
+ /* Set an initial non-zero value, so that mlx5e_select_queue won't
+ * divide by zero if called before first activating channels.
+@@ -793,7 +796,6 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
+ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+ {
+ struct mlx5_core_dev *mdev = priv->mdev;
+- struct mlx5e_lro_param lro_param;
+ int err;
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+@@ -808,9 +810,9 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+ return err;
+ }
+
+- lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+- priv->max_nch, priv->drop_rq.rqn, &lro_param,
++ priv->max_nch, priv->drop_rq.rqn,
++ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
+ if (err)
+ goto err_close_drop_rq;
+@@ -1017,6 +1019,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
+ rtnl_lock();
+ if (netif_running(netdev))
+ mlx5e_open(netdev);
++ udp_tunnel_nic_reset_ntf(priv->netdev);
+ netif_device_attach(netdev);
+ rtnl_unlock();
+ }
+@@ -1038,6 +1041,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+ mlx5_notifier_unregister(mdev, &priv->events_nb);
+ mlx5e_rep_tc_disable(priv);
+ mlx5_lag_remove_netdev(mdev, priv->netdev);
++ mlx5_vxlan_reset_to_default(mdev->vxlan);
+ }
+
+ static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
+@@ -1070,6 +1074,10 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
+ &MLX5E_STATS_GRP(pme),
+ &MLX5E_STATS_GRP(channels),
+ &MLX5E_STATS_GRP(per_port_buff_congest),
++#ifdef CONFIG_MLX5_EN_IPSEC
++ &MLX5E_STATS_GRP(ipsec_sw),
++ &MLX5E_STATS_GRP(ipsec_hw),
++#endif
+ };
+
+ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+index 29a6586ef28dc..d2de1e6c514c1 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -271,8 +271,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
+ if (unlikely(!dma_info->page))
+ return -ENOMEM;
+
+- dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
+- PAGE_SIZE, rq->buff.map_dir);
++ dma_info->addr = dma_map_page_attrs(rq->pdev, dma_info->page, 0, PAGE_SIZE,
++ rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC);
+ if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
+ page_pool_recycle_direct(rq->page_pool, dma_info->page);
+ dma_info->page = NULL;
+@@ -293,7 +293,8 @@ static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
+
+ void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
+ {
+- dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
++ dma_unmap_page_attrs(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir,
++ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+
+ void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+@@ -986,7 +987,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
+ }
+
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+- if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
++ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
++ get_cqe_tls_offload(cqe))
+ goto csum_unnecessary;
+
+ /* CQE csum doesn't cover padding octets in short ethernet
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+index e1dd17019030e..5a5c6eda29d28 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+@@ -614,7 +614,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env)
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+- if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
++ if (!mlx5e_stats_grp_vnic_env_num_stats(priv))
+ return;
+
+ MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index 129ff7e0d65cc..d123d9b4adf5e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -248,7 +248,6 @@ get_ct_priv(struct mlx5e_priv *priv)
+ return priv->fs.tc.ct;
+ }
+
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ static struct mlx5e_tc_psample *
+ get_sample_priv(struct mlx5e_priv *priv)
+ {
+@@ -265,7 +264,6 @@ get_sample_priv(struct mlx5e_priv *priv)
+
+ return NULL;
+ }
+-#endif
+
+ struct mlx5_flow_handle *
+ mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+@@ -1148,11 +1146,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
+ flow, spec, attr,
+ mod_hdr_acts);
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ } else if (flow_flag_test(flow, SAMPLE)) {
+ rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
+ mlx5e_tc_get_flow_tun_id(flow));
+-#endif
+ } else {
+ rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+ }
+@@ -1183,23 +1179,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
+ goto offload_rule_0;
+
+- if (flow_flag_test(flow, CT)) {
+- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+- return;
+- }
+-
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+- if (flow_flag_test(flow, SAMPLE)) {
+- mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+- return;
+- }
+-#endif
+-
+ if (attr->esw_attr->split_count)
+ mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+
++ if (flow_flag_test(flow, CT))
++ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
++ else if (flow_flag_test(flow, SAMPLE))
++ mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
++ else
+ offload_rule_0:
+- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
++ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ }
+
+ struct mlx5_flow_handle *
+@@ -1293,7 +1282,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
+ uplink_priv = &rpriv->uplink_priv;
+
+ mutex_lock(&uplink_priv->unready_flows_lock);
+- unready_flow_del(flow);
++ if (flow_flag_test(flow, NOT_READY))
++ unready_flow_del(flow);
+ mutex_unlock(&uplink_priv->unready_flows_lock);
+ }
+
+@@ -1319,11 +1309,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
+ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
+ {
+ struct mlx5e_priv *out_priv, *route_priv;
+- struct mlx5_devcom *devcom = NULL;
+ struct mlx5_core_dev *route_mdev;
+ struct mlx5_eswitch *esw;
+ u16 vhca_id;
+- int err;
+
+ out_priv = netdev_priv(out_dev);
+ esw = out_priv->mdev->priv.eswitch;
+@@ -1332,6 +1320,9 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
+
+ vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+ if (mlx5_lag_is_active(out_priv->mdev)) {
++ struct mlx5_devcom *devcom;
++ int err;
++
+ /* In lag case we may get devices from different eswitch instances.
+ * If we failed to get vport num, it means, mostly, that we on the wrong
+ * eswitch.
+@@ -1340,23 +1331,23 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
+ if (err != -ENOENT)
+ return err;
+
++ rcu_read_lock();
+ devcom = out_priv->mdev->priv.devcom;
+- esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+- if (!esw)
+- return -ENODEV;
++ esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
++ err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
++ rcu_read_unlock();
++
++ return err;
+ }
+
+- err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+- if (devcom)
+- mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+- return err;
++ return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ }
+
+ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+- struct mlx5e_tc_flow_parse_attr *parse_attr,
+- struct mlx5e_tc_flow *flow)
++ struct mlx5e_tc_flow *flow,
++ struct mlx5_flow_attr *attr)
+ {
+- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts;
++ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+ struct mlx5_modify_hdr *mod_hdr;
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev,
+@@ -1366,8 +1357,8 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ if (IS_ERR(mod_hdr))
+ return PTR_ERR(mod_hdr);
+
+- WARN_ON(flow->attr->modify_hdr);
+- flow->attr->modify_hdr = mod_hdr;
++ WARN_ON(attr->modify_hdr);
++ attr->modify_hdr = mod_hdr;
+
+ return 0;
+ }
+@@ -1468,7 +1459,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
+ if (vf_tun) {
+- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
++ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
+ if (err)
+ goto err_out;
+ } else {
+@@ -1535,8 +1526,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+ esw_attr = attr->esw_attr;
+ mlx5e_put_flow_tunnel_id(flow);
+
+- if (flow_flag_test(flow, NOT_READY))
+- remove_unready_flow(flow);
++ remove_unready_flow(flow);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ if (flow_flag_test(flow, SLOW))
+@@ -1544,6 +1534,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+ }
++ complete_all(&flow->del_hw_done);
+
+ if (mlx5_flow_has_geneve_opt(flow))
+ mlx5_geneve_tlv_option_del(priv->mdev->geneve);
+@@ -1884,6 +1875,111 @@ u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
+ return ip_version;
+ }
+
++/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
++ * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
++ * +---------+----------------------------------------+
++ * |Arriving | Arriving Outer Header |
++ * | Inner +---------+---------+---------+----------+
++ * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
++ * +---------+---------+---------+---------+----------+
++ * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
++ * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
++ * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
++ * | CE | CE | CE | CE | CE |
++ * +---------+---------+---------+---------+----------+
++ *
++ * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
++ * the inner ip_ecn value before hardware decap action.
++ *
++ * Cells marked are changed from original inner packet ip_ecn value during decap, and
++ * so matching those values on inner ip_ecn before decap will fail.
++ *
++ * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
++ * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
++ * and such we can drop the inner ip_ecn=CE match.
++ */
++
++static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
++ struct flow_cls_offload *f,
++ bool *match_inner_ecn)
++{
++ u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
++ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
++ struct netlink_ext_ack *extack = f->common.extack;
++ struct flow_match_ip match;
++
++ *match_inner_ecn = true;
++
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
++ flow_rule_match_enc_ip(rule, &match);
++ outer_ecn_key = match.key->tos & INET_ECN_MASK;
++ outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
++ }
++
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
++ flow_rule_match_ip(rule, &match);
++ inner_ecn_key = match.key->tos & INET_ECN_MASK;
++ inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
++ }
++
++ if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
++ NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
++ netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
++ return -EOPNOTSUPP;
++ }
++
++ if (!outer_ecn_mask) {
++ if (!inner_ecn_mask)
++ return 0;
++
++ NL_SET_ERR_MSG_MOD(extack,
++ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
++ netdev_warn(priv->netdev,
++ "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
++ return -EOPNOTSUPP;
++ }
++
++ if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
++ NL_SET_ERR_MSG_MOD(extack,
++ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
++ netdev_warn(priv->netdev,
++ "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
++ return -EOPNOTSUPP;
++ }
++
++ if (!inner_ecn_mask)
++ return 0;
++
++ /* Both inner and outer have full mask on ecn */
++
++ if (outer_ecn_key == INET_ECN_ECT_1) {
++ /* inner ecn might change by DECAP action */
++
++ NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
++ netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
++ return -EOPNOTSUPP;
++ }
++
++ if (outer_ecn_key != INET_ECN_CE)
++ return 0;
++
++ if (inner_ecn_key != INET_ECN_CE) {
++ /* Can't happen in software, as packet ecn will be changed to CE after decap */
++ NL_SET_ERR_MSG_MOD(extack,
++ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
++ netdev_warn(priv->netdev,
++ "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
++ return -EOPNOTSUPP;
++ }
++
++ /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
++ * drop match on inner ecn
++ */
++ *match_inner_ecn = false;
++
++ return 0;
++}
++
+ static int parse_tunnel_attr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+@@ -2077,6 +2173,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_dissector *dissector = rule->match.dissector;
+ enum fs_flow_table_type fs_type;
++ bool match_inner_ecn = true;
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+ u8 *match_level;
+@@ -2130,6 +2227,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
+ headers_c = get_match_inner_headers_criteria(spec);
+ headers_v = get_match_inner_headers_value(spec);
+ }
++
++ err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
++ if (err)
++ return err;
+ }
+
+ err = mlx5e_flower_parse_meta(filter_dev, f);
+@@ -2191,6 +2292,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
+ match.key->vlan_priority);
+
+ *match_level = MLX5_MATCH_L2;
++
++ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
++ match.mask->vlan_eth_type &&
++ MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
++ ft_field_support.outer_second_vid,
++ fs_type)) {
++ MLX5_SET(fte_match_set_misc, misc_c,
++ outer_second_cvlan_tag, 1);
++ spec->match_criteria_enable |=
++ MLX5_MATCH_MISC_PARAMETERS;
++ }
+ }
+ } else if (*match_level != MLX5_MATCH_NONE) {
+ /* cvlan_tag enabled in match criteria and
+@@ -2351,10 +2463,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
+ struct flow_match_ip match;
+
+ flow_rule_match_ip(rule, &match);
+- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+- match.mask->tos & 0x3);
+- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+- match.key->tos & 0x3);
++ if (match_inner_ecn) {
++ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
++ match.mask->tos & 0x3);
++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
++ match.key->tos & 0x3);
++ }
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+ match.mask->tos >> 2);
+@@ -3178,37 +3292,68 @@ out_ok:
+ return true;
+ }
+
+-static bool actions_match_supported(struct mlx5e_priv *priv,
+- struct flow_action *flow_action,
+- struct mlx5e_tc_flow_parse_attr *parse_attr,
+- struct mlx5e_tc_flow *flow,
+- struct netlink_ext_ack *extack)
++static bool
++actions_match_supported_fdb(struct mlx5e_priv *priv,
++ struct mlx5e_tc_flow_parse_attr *parse_attr,
++ struct mlx5e_tc_flow *flow,
++ struct netlink_ext_ack *extack)
+ {
+- bool ct_flow = false, ct_clear = false;
+- u32 actions;
++ bool ct_flow, ct_clear;
+
+- ct_clear = flow->attr->ct_attr.ct_action &
+- TCA_CT_ACT_CLEAR;
++ ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
+ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
+- actions = flow->attr->action;
+
+- if (mlx5e_is_eswitch_flow(flow)) {
+- if (flow->attr->esw_attr->split_count && ct_flow &&
+- !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) {
+- /* All registers used by ct are cleared when using
+- * split rules.
+- */
+- NL_SET_ERR_MSG_MOD(extack,
+- "Can't offload mirroring with action ct");
+- return false;
+- }
++ if (flow->attr->esw_attr->split_count && ct_flow &&
++ !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) {
++ /* All registers used by ct are cleared when using
++ * split rules.
++ */
++ NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
++ return false;
+ }
+
+- if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+- return modify_header_match_supported(priv, &parse_attr->spec,
+- flow_action, actions,
+- ct_flow, ct_clear,
+- extack);
++ return true;
++}
++
++static bool
++actions_match_supported(struct mlx5e_priv *priv,
++ struct flow_action *flow_action,
++ struct mlx5e_tc_flow_parse_attr *parse_attr,
++ struct mlx5e_tc_flow *flow,
++ struct netlink_ext_ack *extack)
++{
++ u32 actions = flow->attr->action;
++ bool ct_flow, ct_clear;
++
++ ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
++ ct_flow = flow_flag_test(flow, CT) && !ct_clear;
++
++ if (!(actions &
++ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
++ NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
++ return false;
++ }
++
++ if (!(~actions &
++ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
++ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
++ return false;
++ }
++
++ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
++ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
++ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
++ return false;
++ }
++
++ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
++ !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
++ actions, ct_flow, ct_clear, extack))
++ return false;
++
++ if (mlx5e_is_eswitch_flow(flow) &&
++ !actions_match_supported_fdb(priv, parse_attr, flow, extack))
++ return false;
+
+ return true;
+ }
+@@ -3453,11 +3598,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
+ if (err)
+ return err;
+
+- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
++ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
++ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = act->chain_index;
+ break;
+ case FLOW_ACTION_CT:
+- err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
++ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
++ &parse_attr->mod_hdr_acts,
++ act, extack);
+ if (err)
+ return err;
+
+@@ -3486,12 +3634,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
+
+ attr->action = action;
+
+- if (attr->dest_chain) {
+- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+- NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
+- return -EOPNOTSUPP;
+- }
+- attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
++ if (attr->dest_chain && parse_attr->mirred_ifindex[0]) {
++ NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
++ return -EOPNOTSUPP;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+@@ -4000,7 +4145,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
+ if (err)
+ return err;
+
+- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
++ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
++ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = act->chain_index;
+ break;
+ case FLOW_ACTION_CT:
+@@ -4008,7 +4154,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
+ NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
+ return -EOPNOTSUPP;
+ }
+- err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
++ err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
++ &parse_attr->mod_hdr_acts,
++ act, extack);
+ if (err)
+ return err;
+
+@@ -4070,30 +4218,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
+ if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
+ return -EOPNOTSUPP;
+
+- if (attr->dest_chain) {
+- if (decap) {
+- /* It can be supported if we'll create a mapping for
+- * the tunnel device only (without tunnel), and set
+- * this tunnel id with this decap flow.
+- *
+- * On restore (miss), we'll just set this saved tunnel
+- * device.
+- */
+-
+- NL_SET_ERR_MSG(extack,
+- "Decap with goto isn't supported");
+- netdev_warn(priv->netdev,
+- "Decap with goto isn't supported");
+- return -EOPNOTSUPP;
+- }
+-
+- attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+- }
++ if (attr->dest_chain && decap) {
++ /* It can be supported if we'll create a mapping for
++ * the tunnel device only (without tunnel), and set
++ * this tunnel id with this decap flow.
++ *
++ * On restore (miss), we'll just set this saved tunnel
++ * device.
++ */
+
+- if (!(attr->action &
+- (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+- NL_SET_ERR_MSG_MOD(extack,
+- "Rule must have at least one forward/drop action");
++ NL_SET_ERR_MSG(extack, "Decap with goto isn't supported");
++ netdev_warn(priv->netdev, "Decap with goto isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+@@ -4222,6 +4357,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
+ INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
+ refcount_set(&flow->refcnt, 1);
+ init_completion(&flow->init_done);
++ init_completion(&flow->del_hw_done);
+
+ *__flow = flow;
+ *__parse_attr = parse_attr;
+@@ -4875,6 +5011,33 @@ static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
+ return tc_tbl_size;
+ }
+
++static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
++{
++ struct mlx5_flow_table **ft = &priv->fs.tc.miss_t;
++ struct mlx5_flow_table_attr ft_attr = {};
++ struct mlx5_flow_namespace *ns;
++ int err = 0;
++
++ ft_attr.max_fte = 1;
++ ft_attr.autogroup.max_num_groups = 1;
++ ft_attr.level = MLX5E_TC_MISS_LEVEL;
++ ft_attr.prio = 0;
++ ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
++
++ *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
++ if (IS_ERR(*ft)) {
++ err = PTR_ERR(*ft);
++ netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
++ }
++
++ return err;
++}
++
++static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
++{
++ mlx5_destroy_flow_table(priv->fs.tc.miss_t);
++}
++
+ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
+ {
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
+@@ -4907,19 +5070,23 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
+ }
+ tc->mapping = chains_mapping;
+
++ err = mlx5e_tc_nic_create_miss_table(priv);
++ if (err)
++ goto err_chains;
++
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
+ attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
+ MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
+ attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
+ attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
+ attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
+- attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
++ attr.default_ft = priv->fs.tc.miss_t;
+ attr.mapping = chains_mapping;
+
+ tc->chains = mlx5_chains_create(dev, &attr);
+ if (IS_ERR(tc->chains)) {
+ err = PTR_ERR(tc->chains);
+- goto err_chains;
++ goto err_miss;
+ }
+
+ tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
+@@ -4942,6 +5109,8 @@ err_reg:
+ mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
+ mlx5_chains_destroy(tc->chains);
++err_miss:
++ mlx5e_tc_nic_destroy_miss_table(priv);
+ err_chains:
+ mapping_destroy(chains_mapping);
+ err_mapping:
+@@ -4982,6 +5151,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
+ mlx5e_tc_post_act_destroy(tc->post_act);
+ mapping_destroy(tc->mapping);
+ mlx5_chains_destroy(tc->chains);
++ mlx5e_tc_nic_destroy_miss_table(priv);
+ }
+
+ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+@@ -5008,9 +5178,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
+
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
+-#endif
+
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+@@ -5054,9 +5222,7 @@ err_ht_init:
+ err_enc_opts_mapping:
+ mapping_destroy(uplink_priv->tunnel_mapping);
+ err_tun_mapping:
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+-#endif
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ netdev_warn(priv->netdev,
+ "Failed to initialize tc (eswitch), err: %d", err);
+@@ -5076,9 +5242,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
+ mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+ mapping_destroy(uplink_priv->tunnel_mapping);
+
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+-#endif
+ mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+index 1a4cd882f0fba..f48af82781f88 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+@@ -241,8 +241,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ u32 data);
+
+ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+- struct mlx5e_tc_flow_parse_attr *parse_attr,
+- struct mlx5e_tc_flow *flow);
++ struct mlx5e_tc_flow *flow,
++ struct mlx5_flow_attr *attr);
+
+ int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
+ int namespace,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+index 188994d091c54..6813279b57f89 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -38,6 +38,7 @@
+ #include "en/txrx.h"
+ #include "ipoib/ipoib.h"
+ #include "en_accel/en_accel.h"
++#include "en_accel/ipsec_rxtx.h"
+ #include "en/ptp.h"
+
+ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
+@@ -213,30 +214,13 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
+ memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
+ }
+
+-static void
+-ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+- struct mlx5_wqe_eth_seg *eseg)
+-{
+- struct xfrm_offload *xo = xfrm_offload(skb);
+-
+- eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+- if (xo->inner_ipproto) {
+- eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM;
+- } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+- eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+- sq->stats->csum_partial_inner++;
+- }
+-}
+-
+ static inline void
+ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel,
+ struct mlx5_wqe_eth_seg *eseg)
+ {
+- if (unlikely(mlx5e_ipsec_eseg_meta(eseg))) {
+- ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
++ if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg)))
+ return;
+- }
+
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+@@ -445,6 +429,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
+ }
+ }
+
++static void mlx5e_tx_flush(struct mlx5e_txqsq *sq)
++{
++ struct mlx5e_tx_wqe_info *wi;
++ struct mlx5e_tx_wqe *wqe;
++ u16 pi;
++
++ /* Must not be called when a MPWQE session is active but empty. */
++ mlx5e_tx_mpwqe_ensure_complete(sq);
++
++ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
++ wi = &sq->db.wqe_info[pi];
++
++ *wi = (struct mlx5e_tx_wqe_info) {
++ .num_wqebbs = 1,
++ };
++
++ wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
++ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
++}
++
+ static inline void
+ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr,
+@@ -475,6 +479,11 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ if (unlikely(sq->ptpsq)) {
+ mlx5e_skb_cb_hwtstamp_init(skb);
+ mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb);
++ if (!netif_tx_queue_stopped(sq->txq) &&
++ !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) {
++ netif_tx_stop_queue(sq->txq);
++ sq->stats->stopped++;
++ }
+ skb_get(skb);
+ }
+
+@@ -537,6 +546,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
++ mlx5e_tx_flush(sq);
+ }
+
+ static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
+@@ -638,6 +648,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_xmit_data txd;
+
++ txd.data = skb->data;
++ txd.len = skb->len;
++
++ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
++ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
++ goto err_unmap;
++
+ if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
+@@ -647,18 +664,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+
+ sq->stats->xmit_more += xmit_more;
+
+- txd.data = skb->data;
+- txd.len = skb->len;
+-
+- txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+- if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+- goto err_unmap;
+ mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
+-
+ mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
+-
+ mlx5e_tx_mpwqe_add_dseg(sq, &txd);
+-
+ mlx5e_tx_skb_update_hwts_flags(skb);
+
+ if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
+@@ -680,6 +688,7 @@ err_unmap:
+ mlx5e_dma_unmap_wqe_err(sq, 1);
+ sq->stats->dropped++;
+ dev_kfree_skb_any(skb);
++ mlx5e_tx_flush(sq);
+ }
+
+ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
+@@ -801,6 +810,17 @@ static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_t
+ }
+ }
+
++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq)
++{
++ if (netif_tx_queue_stopped(sq->txq) &&
++ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
++ mlx5e_ptpsq_fifo_has_room(sq) &&
++ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
++ netif_tx_wake_queue(sq->txq);
++ sq->stats->wake++;
++ }
++}
++
+ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
+ {
+ struct mlx5e_sq_stats *stats;
+@@ -900,12 +920,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
+
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+
+- if (netif_tx_queue_stopped(sq->txq) &&
+- mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
+- !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+- netif_tx_wake_queue(sq->txq);
+- stats->wake++;
+- }
++ mlx5e_txqsq_wake(sq);
+
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+ }
+@@ -1049,5 +1064,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
++ mlx5e_tx_flush(sq);
+ }
+ #endif
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+index 833be29170a13..c47c360a5d5e9 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+@@ -150,20 +150,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
+ }
+ }
+
++ /* budget=0 means we may be in IRQ context, do as little as possible */
++ if (unlikely(!budget))
++ goto out;
++
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+
+ if (c->xdp)
+ busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
+
+- if (likely(budget)) { /* budget=0 means: don't poll rx rings */
+- if (xsk_open)
+- work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
++ if (xsk_open)
++ work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+
+- if (likely(budget - work_done))
+- work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
++ if (likely(budget - work_done))
++ work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+
+- busy |= work_done == budget;
+- }
++ busy |= work_done == budget;
+
+ mlx5e_poll_ico_cq(&c->icosq.cq);
+ if (mlx5e_poll_ico_cq(&c->async_icosq.cq))
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index 605c8ecc3610f..ccccbac044287 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -981,7 +981,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
+ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ if (!mlx5_core_is_sf(dev))
+ clear_rmap(dev);
+- mlx5_irq_table_destroy(dev);
++ mlx5_irq_table_free_irqs(dev);
+ mutex_unlock(&table->lock);
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+index 60a73990017c2..6b4c9ffad95b2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+@@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
+ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+ {
++ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_fc *drop_counter = NULL;
+@@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+ */
+ int table_size = 2;
+ int dest_num = 0;
++ int actions_flag;
+ int err = 0;
+
+ if (vport->egress.legacy.drop_counter) {
+@@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ /* Allowed vlan rule */
++ actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
++ if (vst_mode_steering)
++ actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
+- MLX5_FLOW_CONTEXT_ACTION_ALLOW);
++ actions_flag);
+ if (err)
+ goto out;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+index b1a5199260f69..093ed86a0acd8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+@@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
+ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+ {
++ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_fc *counter = NULL;
++ bool vst_check_cvlan = false;
++ bool vst_push_cvlan = false;
+ /* The ingress acl table contains 4 groups
+ * (2 active rules at the same time -
+ * 1 allow rule from one of the first 3 groups.
+@@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ goto out;
+ }
+
+- if (vport->info.vlan || vport->info.qos)
++ if ((vport->info.vlan || vport->info.qos)) {
++ if (vst_mode_steering)
++ vst_push_cvlan = true;
++ else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always))
++ vst_check_cvlan = true;
++ }
++
++ if (vst_check_cvlan || vport->info.spoofchk)
++ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
++
++ /* Create ingress allow rule */
++ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
++ if (vst_push_cvlan) {
++ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
++ flow_act.vlan[0].prio = vport->info.qos;
++ flow_act.vlan[0].vid = vport->info.vlan;
++ flow_act.vlan[0].ethtype = ETH_P_8021Q;
++ }
++
++ if (vst_check_cvlan)
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+
+@@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ ether_addr_copy(smac_v, vport->info.mac);
+ }
+
+- /* Create ingress allow rule */
+- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+@@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ goto out;
+ }
+
++ if (!vst_check_cvlan && !vport->info.spoofchk)
++ goto out;
++
+ memset(&flow_act, 0, sizeof(flow_act));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ /* Attach drop flow counter */
+@@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ return 0;
+
+ out:
+- esw_acl_ingress_lgcy_cleanup(esw, vport);
++ if (err)
++ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ kvfree(spec);
+ return err;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+index 39e948bc12041..34a6542c03f61 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+@@ -301,8 +301,7 @@ int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_n
+
+ if (WARN_ON_ONCE(IS_ERR(vport))) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+- err = PTR_ERR(vport);
+- goto out;
++ return PTR_ERR(vport);
+ }
+
+ esw_acl_ingress_ofld_rules_destroy(esw, vport);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+index 7e221038df8d5..aec0f67cef005 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+@@ -1270,7 +1270,7 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16
+ struct mlx5_esw_bridge *bridge;
+
+ port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+- if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER)
++ if (!port)
+ return;
+
+ bridge = port->bridge;
+@@ -1385,6 +1385,8 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
+ {
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
++ ASSERT_RTNL();
++
+ br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
+ if (!br_offloads)
+ return ERR_PTR(-ENOMEM);
+@@ -1401,6 +1403,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
+ {
+ struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+
++ ASSERT_RTNL();
++
+ if (!br_offloads)
+ return;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+index 3401188e0a602..51ac24e6ec3c3 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+@@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template,
+ __field(unsigned int, used)
+ ),
+ TP_fast_assign(
+- strncpy(__entry->dev_name,
++ strscpy(__entry->dev_name,
+ netdev_name(fdb->dev),
+ IFNAMSIZ);
+ memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+index df277a6cddc0b..3a2575dc5355d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+@@ -11,6 +11,7 @@
+ #include "mlx5_core.h"
+ #include "eswitch.h"
+ #include "fs_core.h"
++#include "fs_ft_pool.h"
+ #include "esw/qos.h"
+
+ enum {
+@@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
+ if (!flow_group_in)
+ return -ENOMEM;
+
+- table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+- ft_attr.max_fte = table_size;
++ ft_attr.max_fte = POOL_NEXT_SIZE;
+ ft_attr.prio = LEGACY_FDB_PRIO;
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+@@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
+ goto out;
+ }
+ esw->fdb_table.legacy.fdb = fdb;
++ table_size = fdb->max_fte;
+
+ /* Addresses group : Full match unicast/multicast addresses */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+@@ -431,7 +432,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int err = 0;
+
+ if (!mlx5_esw_allowed(esw))
+- return -EPERM;
++ return vlan ? -EPERM : 0;
+
+ if (vlan || qos)
+ set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+index c6cc67cb4f6ad..65c8f1f08472c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+@@ -22,15 +22,13 @@ struct mlx5_esw_rate_group {
+ };
+
+ static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
+- u32 parent_ix, u32 tsar_ix,
+- u32 max_rate, u32 bw_share)
++ u32 tsar_ix, u32 max_rate, u32 bw_share)
+ {
+ u32 bitmask = 0;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+- MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+@@ -51,7 +49,7 @@ static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_g
+ int err;
+
+ err = esw_qos_tsar_config(dev, sched_ctx,
+- esw->qos.root_tsar_ix, group->tsar_ix,
++ group->tsar_ix,
+ max_rate, bw_share);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
+@@ -67,23 +65,13 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw,
+ struct netlink_ext_ack *extack)
+ {
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+- struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+- u32 parent_tsar_ix;
+- void *vport_elem;
+ int err;
+
+ if (!vport->qos.enabled)
+ return -EIO;
+
+- parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+- MLX5_SET(scheduling_context, sched_ctx, element_type,
+- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+- element_attributes);
+- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+-
+- err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
++ err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
+ max_rate, bw_share);
+ if (err) {
+ esw_warn(esw->dev,
+@@ -130,7 +118,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
+ /* If vports min rate divider is 0 but their group has bw_share configured, then
+ * need to set bw_share for vports to minimal value.
+ */
+- if (!group_level && !max_guarantee && group->bw_share)
++ if (!group_level && !max_guarantee && group && group->bw_share)
+ return 1;
+ return 0;
+ }
+@@ -423,7 +411,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ return err;
+
+ /* Recalculate bw share weights of old and new groups */
+- if (vport->qos.bw_share) {
++ if (vport->qos.bw_share || new_group->bw_share) {
+ esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+ esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index ec136b4992045..7315bf447e061 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -160,10 +160,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
+ esw_vport_context.vport_cvlan_strip, 1);
+
+ if (set_flags & SET_VLAN_INSERT) {
+- /* insert only if no vlan in packet */
+- MLX5_SET(modify_esw_vport_context_in, in,
+- esw_vport_context.vport_cvlan_insert, 1);
+-
++ if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) {
++ /* insert either if vlan exist in packet or not */
++ MLX5_SET(modify_esw_vport_context_in, in,
++ esw_vport_context.vport_cvlan_insert,
++ MLX5_VPORT_CVLAN_INSERT_ALWAYS);
++ } else {
++ /* insert only if no vlan in packet */
++ MLX5_SET(modify_esw_vport_context_in, in,
++ esw_vport_context.vport_cvlan_insert,
++ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN);
++ }
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_pcp, qos);
+ MLX5_SET(modify_esw_vport_context_in, in,
+@@ -773,6 +780,7 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
+
+ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+ {
++ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ u16 vport_num = vport->vport;
+ int flags;
+ int err;
+@@ -802,8 +810,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+
+ flags = (vport->info.vlan || vport->info.qos) ?
+ SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
+- modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
+- vport->info.qos, flags);
++ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering)
++ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
++ vport->info.qos, flags);
+
+ return 0;
+ }
+@@ -909,6 +918,7 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
+ */
+ esw_vport_change_handle_locked(vport);
+ vport->enabled_events = 0;
++ esw_apply_vport_rx_mode(esw, vport, false, false);
+ esw_vport_cleanup(esw, vport);
+ esw->enabled_vports--;
+
+@@ -1305,12 +1315,17 @@ abort:
+ */
+ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
+ {
++ bool toggle_lag;
+ int ret;
+
+ if (!mlx5_esw_allowed(esw))
+ return 0;
+
+- mlx5_lag_disable_change(esw->dev);
++ toggle_lag = esw->mode == MLX5_ESWITCH_NONE;
++
++ if (toggle_lag)
++ mlx5_lag_disable_change(esw->dev);
++
+ down_write(&esw->mode_lock);
+ if (esw->mode == MLX5_ESWITCH_NONE) {
+ ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
+@@ -1324,7 +1339,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
+ esw->esw_funcs.num_vfs = num_vfs;
+ }
+ up_write(&esw->mode_lock);
+- mlx5_lag_enable_change(esw->dev);
++
++ if (toggle_lag)
++ mlx5_lag_enable_change(esw->dev);
++
+ return ret;
+ }
+
+@@ -1572,6 +1590,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
+ esw->enabled_vports = 0;
+ esw->mode = MLX5_ESWITCH_NONE;
+ esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
++ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
++ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
++ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
++ else
++ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+
+ dev->priv.eswitch = esw;
+ BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+@@ -1833,6 +1856,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ u16 vport, u16 vlan, u8 qos, u8 set_flags)
+ {
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
++ bool vst_mode_steering = esw_vst_mode_is_steering(esw);
+ int err = 0;
+
+ if (IS_ERR(evport))
+@@ -1840,9 +1864,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ if (vlan > 4095 || qos > 7)
+ return -EINVAL;
+
+- err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+- if (err)
+- return err;
++ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) {
++ err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
++ if (err)
++ return err;
++ }
+
+ evport->info.vlan = vlan;
+ evport->info.qos = qos;
+@@ -1934,7 +1960,7 @@ free_out:
+ return err;
+ }
+
+-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
++u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
+ {
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+@@ -1948,7 +1974,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+- return mlx5_esw_allowed(esw) ? esw->offloads.encap :
++ return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS) ? esw->offloads.encap :
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ }
+ EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+index 2c7444101bb93..0e2c9e6fccb67 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -505,6 +505,12 @@ static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw)
+ return esw->qos.enabled;
+ }
+
++static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw)
++{
++ return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) &&
++ MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan));
++}
++
+ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
+ u8 vlan_depth)
+ {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 0d461e38add37..829f703233a9e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -656,7 +656,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
+
+ return rule;
+ err_chain_src_rewrite:
+- esw_put_dest_tables_loop(esw, attr, 0, i);
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ err_get_fwd:
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+@@ -698,7 +697,6 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
+ if (fwd_rule) {
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+ mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
+- esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
+ } else {
+ if (split)
+ mlx5_esw_vporttbl_put(esw, &fwd_attr);
+@@ -962,7 +960,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+- if (rep->vport == MLX5_VPORT_UPLINK)
++ if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) &&
++ rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
+ flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
+@@ -2471,6 +2470,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_eswitch *esw = master->priv.eswitch;
+ struct mlx5_flow_table_attr ft_attr = {
+ .max_fte = 1, .prio = 0, .level = 0,
++ .flags = MLX5_FLOW_TABLE_OTHER_VPORT,
+ };
+ struct mlx5_flow_namespace *egress_ns;
+ struct mlx5_flow_table *acl;
+@@ -2783,10 +2783,6 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return false;
+
+- if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+- mlx5_ecpf_vport_exists(esw->dev))
+- return false;
+-
+ return true;
+ }
+
+@@ -3141,12 +3137,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
+ u64 mapping_id;
+ int err;
+
+- if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
+- MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
+- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+- else
+- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+-
+ mutex_init(&esw->offloads.termtbl_mutex);
+ mlx5_rdma_enable_roce(esw->dev);
+
+@@ -3244,7 +3234,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
+ esw_offloads_metadata_uninit(esw);
+ mlx5_rdma_disable_roce(esw->dev);
+ mutex_destroy(&esw->offloads.termtbl_mutex);
+- esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ }
+
+ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+@@ -3588,7 +3577,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ *encap = esw->offloads.encap;
+ unlock:
+ up_write(&esw->mode_lock);
+- return 0;
++ return err;
+ }
+
+ static bool
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+index b459549058450..1b417b1d1cf8f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+@@ -30,9 +30,9 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act,
+ sizeof(dest->vport.num), hash);
+ hash = jhash((const void *)&dest->vport.vhca_id,
+ sizeof(dest->vport.num), hash);
+- if (dest->vport.pkt_reformat)
+- hash = jhash(dest->vport.pkt_reformat,
+- sizeof(*dest->vport.pkt_reformat),
++ if (flow_act->pkt_reformat)
++ hash = jhash(flow_act->pkt_reformat,
++ sizeof(*flow_act->pkt_reformat),
+ hash);
+ return hash;
+ }
+@@ -53,9 +53,11 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1,
+ if (ret)
+ return ret;
+
+- return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ?
+- memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat,
+- sizeof(*dest1->vport.pkt_reformat)) : 0;
++ if (flow_act1->pkt_reformat && flow_act2->pkt_reformat)
++ return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat,
++ sizeof(*flow_act1->pkt_reformat));
++
++ return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat);
+ }
+
+ static int
+@@ -307,6 +309,8 @@ revert_changes:
+ for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) {
+ struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl;
+
++ attr->dests[curr_dest].termtbl = NULL;
++
+ /* search for the destination associated with the
+ * current term table
+ */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index fe501ba88bea9..161ad2ae40196 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -113,7 +113,7 @@
+ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
+
+ #define KERNEL_NIC_TC_NUM_PRIOS 1
+-#define KERNEL_NIC_TC_NUM_LEVELS 2
++#define KERNEL_NIC_TC_NUM_LEVELS 3
+
+ #define ANCHOR_NUM_LEVELS 1
+ #define ANCHOR_NUM_PRIOS 1
+@@ -802,7 +802,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
+ struct fs_node *iter = list_entry(start, struct fs_node, list);
+ struct mlx5_flow_table *ft = NULL;
+
+- if (!root || root->type == FS_TYPE_PRIO_CHAINS)
++ if (!root)
+ return NULL;
+
+ list_for_each_advance_continue(iter, &root->children, reverse) {
+@@ -818,20 +818,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
+ return ft;
+ }
+
+-/* If reverse is false then return the first flow table in next priority of
+- * prio in the tree, else return the last flow table in the previous priority
+- * of prio in the tree.
++static struct fs_node *find_prio_chains_parent(struct fs_node *parent,
++ struct fs_node **child)
++{
++ struct fs_node *node = NULL;
++
++ while (parent && parent->type != FS_TYPE_PRIO_CHAINS) {
++ node = parent;
++ parent = parent->parent;
++ }
++
++ if (child)
++ *child = node;
++
++ return parent;
++}
++
++/* If reverse is false then return the first flow table next to the passed node
++ * in the tree, else return the last flow table before the node in the tree.
++ * If skip is true, skip the flow tables in the same prio_chains prio.
+ */
+-static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
++static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse,
++ bool skip)
+ {
++ struct fs_node *prio_chains_parent = NULL;
+ struct mlx5_flow_table *ft = NULL;
+ struct fs_node *curr_node;
+ struct fs_node *parent;
+
+- parent = prio->node.parent;
+- curr_node = &prio->node;
++ if (skip)
++ prio_chains_parent = find_prio_chains_parent(node, NULL);
++ parent = node->parent;
++ curr_node = node;
+ while (!ft && parent) {
+- ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
++ if (parent != prio_chains_parent)
++ ft = find_closest_ft_recursive(parent, &curr_node->list,
++ reverse);
+ curr_node = parent;
+ parent = curr_node->parent;
+ }
+@@ -839,15 +861,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers
+ }
+
+ /* Assuming all the tree is locked by mutex chain lock */
+-static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
++static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node)
+ {
+- return find_closest_ft(prio, false);
++ return find_closest_ft(node, false, true);
+ }
+
+ /* Assuming all the tree is locked by mutex chain lock */
+-static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
++static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node)
+ {
+- return find_closest_ft(prio, true);
++ return find_closest_ft(node, true, true);
+ }
+
+ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+@@ -859,7 +881,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+ next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+ fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
+
+- return find_next_chained_ft(prio);
++ return find_next_chained_ft(&prio->node);
+ }
+
+ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+@@ -883,21 +905,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+ return 0;
+ }
+
++static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node,
++ struct fs_node *parent,
++ struct fs_node **child,
++ bool reverse)
++{
++ struct mlx5_flow_table *ft;
++
++ ft = find_closest_ft(node, reverse, false);
++
++ if (ft && parent == find_prio_chains_parent(&ft->node, child))
++ return ft;
++
++ return NULL;
++}
++
+ /* Connect flow tables from previous priority of prio to ft */
+ static int connect_prev_fts(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+ {
++ struct fs_node *prio_parent, *parent = NULL, *child, *node;
+ struct mlx5_flow_table *prev_ft;
++ int err = 0;
++
++ prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+- prev_ft = find_prev_chained_ft(prio);
+- if (prev_ft) {
++ /* return directly if not under the first sub ns of prio_chains prio */
++ if (prio_parent && !list_is_first(&child->list, &prio_parent->children))
++ return 0;
++
++ prev_ft = find_prev_chained_ft(&prio->node);
++ while (prev_ft) {
+ struct fs_prio *prev_prio;
+
+ fs_get_obj(prev_prio, prev_ft->node.parent);
+- return connect_fts_in_prio(dev, prev_prio, ft);
++ err = connect_fts_in_prio(dev, prev_prio, ft);
++ if (err)
++ break;
++
++ if (!parent) {
++ parent = find_prio_chains_parent(&prev_prio->node, &child);
++ if (!parent)
++ break;
++ }
++
++ node = child;
++ prev_ft = find_closet_ft_prio_chains(node, parent, &child, true);
+ }
+- return 0;
++ return err;
+ }
+
+ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
+@@ -1036,7 +1092,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
+ if (err)
+ return err;
+
+- next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
++ next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node);
+ err = connect_fwd_rules(dev, ft, next_ft);
+ if (err)
+ return err;
+@@ -1111,7 +1167,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
+
+ tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
+ next_ft = unmanaged ? ft_attr->next_ft :
+- find_next_chained_ft(fs_prio);
++ find_next_chained_ft(&fs_prio->node);
+ ft->def_miss_action = ns->def_miss_action;
+ ft->ns = ns;
+ err = root->cmds->create_flow_table(root, ft, ft_attr->max_fte, next_ft);
+@@ -1527,9 +1583,22 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
+ return NULL;
+ }
+
+-static bool check_conflicting_actions(u32 action1, u32 action2)
++static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0,
++ const struct mlx5_fs_vlan *vlan1)
+ {
+- u32 xored_actions = action1 ^ action2;
++ return vlan0->ethtype != vlan1->ethtype ||
++ vlan0->vid != vlan1->vid ||
++ vlan0->prio != vlan1->prio;
++}
++
++static bool check_conflicting_actions(const struct mlx5_flow_act *act1,
++ const struct mlx5_flow_act *act2)
++{
++ u32 action1 = act1->action;
++ u32 action2 = act2->action;
++ u32 xored_actions;
++
++ xored_actions = action1 ^ action2;
+
+ /* if one rule only wants to count, it's ok */
+ if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
+@@ -1546,6 +1615,22 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
+ return true;
+
++ if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
++ act1->pkt_reformat != act2->pkt_reformat)
++ return true;
++
++ if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
++ act1->modify_hdr != act2->modify_hdr)
++ return true;
++
++ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
++ check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0]))
++ return true;
++
++ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 &&
++ check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1]))
++ return true;
++
+ return false;
+ }
+
+@@ -1553,7 +1638,7 @@ static int check_conflicting_ftes(struct fs_fte *fte,
+ const struct mlx5_flow_context *flow_context,
+ const struct mlx5_flow_act *flow_act)
+ {
+- if (check_conflicting_actions(flow_act->action, fte->action.action)) {
++ if (check_conflicting_actions(flow_act, &fte->action)) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "Found two FTEs with conflicting actions\n");
+ return -EEXIST;
+@@ -2031,16 +2116,18 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
+ down_write_ref_node(&fte->node, false);
+ for (i = handle->num_rules - 1; i >= 0; i--)
+ tree_remove_node(&handle->rule[i]->node, true);
+- if (fte->dests_size) {
+- if (fte->modify_mask)
+- modify_fte(fte);
+- up_write_ref_node(&fte->node, false);
+- } else if (list_empty(&fte->node.children)) {
++ if (list_empty(&fte->node.children)) {
+ del_hw_fte(&fte->node);
+ /* Avoid double call to del_hw_fte */
+ fte->node.del_hw_func = NULL;
+ up_write_ref_node(&fte->node, false);
+ tree_put_node(&fte->node, false);
++ } else if (fte->dests_size) {
++ if (fte->modify_mask)
++ modify_fte(fte);
++ up_write_ref_node(&fte->node, false);
++ } else {
++ up_write_ref_node(&fte->node, false);
+ }
+ kfree(handle);
+ }
+@@ -2049,13 +2136,20 @@ EXPORT_SYMBOL(mlx5_del_flow_rules);
+ /* Assuming prio->node.children(flow tables) is sorted by level */
+ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+ {
++ struct fs_node *prio_parent, *child;
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+
+ if (!list_is_last(&ft->node.list, &prio->node.children))
+ return list_next_entry(ft, node.list);
+- return find_next_chained_ft(prio);
++
++ prio_parent = find_prio_chains_parent(&prio->node, &child);
++
++ if (prio_parent && list_is_first(&child->list, &prio_parent->children))
++ return find_closest_ft(&prio->node, false, false);
++
++ return find_next_chained_ft(&prio->node);
+ }
+
+ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+index 106b50e42b464..8ed1549a99c42 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+@@ -111,6 +111,28 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
+ }
+ }
+
++static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
++{
++ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
++
++ del_timer_sync(&fw_reset->timer);
++}
++
++static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
++{
++ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
++
++ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
++ mlx5_core_warn(dev, "Reset request was already cleared\n");
++ return -EALREADY;
++ }
++
++ mlx5_stop_sync_reset_poll(dev);
++ if (poll_health)
++ mlx5_start_health_poll(dev);
++ return 0;
++}
++
+ static void mlx5_sync_reset_reload_work(struct work_struct *work)
+ {
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+@@ -118,6 +140,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
++ mlx5_sync_reset_clear_reset_requested(dev, false);
+ mlx5_enter_error_state(dev, true);
+ mlx5_unload_one(dev);
+ err = mlx5_health_wait_pci_up(dev);
+@@ -127,23 +150,6 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
+ mlx5_fw_reset_complete_reload(dev);
+ }
+
+-static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
+-{
+- struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+-
+- del_timer(&fw_reset->timer);
+-}
+-
+-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+-{
+- struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+-
+- mlx5_stop_sync_reset_poll(dev);
+- clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
+- if (poll_health)
+- mlx5_start_health_poll(dev);
+-}
+-
+ #define MLX5_RESET_POLL_INTERVAL (HZ / 10)
+ static void poll_sync_reset(struct timer_list *t)
+ {
+@@ -158,7 +164,6 @@ static void poll_sync_reset(struct timer_list *t)
+
+ if (fatal_error) {
+ mlx5_core_warn(dev, "Got Device Reset\n");
+- mlx5_sync_reset_clear_reset_requested(dev, false);
+ queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ return;
+ }
+@@ -185,13 +190,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
+ return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
+ }
+
+-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
++static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
++ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
++ mlx5_core_warn(dev, "Reset request was already set\n");
++ return -EALREADY;
++ }
+ mlx5_stop_health_poll(dev, true);
+- set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
+ mlx5_start_sync_reset_poll(dev);
++ return 0;
+ }
+
+ static void mlx5_fw_live_patch_event(struct work_struct *work)
+@@ -220,7 +229,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
+ err ? "Failed" : "Sent");
+ return;
+ }
+- mlx5_sync_reset_set_reset_requested(dev);
++ if (mlx5_sync_reset_set_reset_requested(dev))
++ return;
++
+ err = mlx5_fw_reset_set_reset_sync_ack(dev);
+ if (err)
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
+@@ -263,16 +274,11 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+ pci_cfg_access_lock(sdev);
+ }
+ /* PCI link toggle */
+- err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, &reg16);
+- if (err)
+- return err;
+- reg16 |= PCI_EXP_LNKCTL_LD;
+- err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
++ err = pcie_capability_set_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD);
+ if (err)
+ return err;
+ msleep(500);
+- reg16 &= ~PCI_EXP_LNKCTL_LD;
+- err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
++ err = pcie_capability_clear_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD);
+ if (err)
+ return err;
+
+@@ -320,7 +326,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
+ struct mlx5_core_dev *dev = fw_reset->dev;
+ int err;
+
+- mlx5_sync_reset_clear_reset_requested(dev, false);
++ if (mlx5_sync_reset_clear_reset_requested(dev, false))
++ return;
+
+ mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
+
+@@ -349,10 +356,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
+ reset_abort_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+- if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
++ if (mlx5_sync_reset_clear_reset_requested(dev, true))
+ return;
+-
+- mlx5_sync_reset_clear_reset_requested(dev, true);
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+index 037e18dd4be0e..1504856fafde4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -614,6 +614,13 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
++ mutex_lock(&dev->intf_state_mutex);
++ if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
++ mlx5_core_err(dev, "health works are not permitted at this stage\n");
++ mutex_unlock(&dev->intf_state_mutex);
++ return;
++ }
++ mutex_unlock(&dev->intf_state_mutex);
+ enter_error_state(dev, false);
+ if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+ if (mlx5_health_try_recover(dev))
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+index 0c8594c7df21d..908e5ee1a30fa 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+@@ -172,16 +172,16 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
+ }
+ }
+
+-static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
++static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
+ {
+ int rate, width;
+
+ rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper);
+ if (rate < 0)
+- return -EINVAL;
++ return SPEED_UNKNOWN;
+ width = mlx5_ptys_width_enum_to_int(ib_link_width_oper);
+ if (width < 0)
+- return -EINVAL;
++ return SPEED_UNKNOWN;
+
+ return rate * width;
+ }
+@@ -204,16 +204,13 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev,
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper);
+- if (speed < 0)
+- return -EINVAL;
++ link_ksettings->base.speed = speed;
++ link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL;
+
+- link_ksettings->base.duplex = DUPLEX_FULL;
+ link_ksettings->base.port = PORT_OTHER;
+
+ link_ksettings->base.autoneg = AUTONEG_DISABLE;
+
+- link_ksettings->base.speed = speed;
+-
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+index 269ebb53eda67..10940b8dc83e2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+@@ -67,9 +67,13 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+- params->lro_en = false;
++ params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+ params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+ params->tunneled_offload_en = false;
++
++ /* CQE compression is not supported for IPoIB */
++ params->rx_cqe_compress_def = false;
++ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+ }
+
+ /* Called directly after IPoIB netdevice was created to initialize SW structs */
+@@ -353,7 +357,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
+ static int mlx5i_init_rx(struct mlx5e_priv *priv)
+ {
+ struct mlx5_core_dev *mdev = priv->mdev;
+- struct mlx5e_lro_param lro_param;
+ int err;
+
+ priv->rx_res = mlx5e_rx_res_alloc();
+@@ -368,9 +371,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
+ goto err_destroy_q_counters;
+ }
+
+- lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+- priv->max_nch, priv->drop_rq.rqn, &lro_param,
++ priv->max_nch, priv->drop_rq.rqn,
++ &priv->channels.params.packet_merge,
+ priv->channels.params.num_channels);
+ if (err)
+ goto err_close_drop_rq;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+index d2105c1635c34..5f8b7f3735b69 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+@@ -435,7 +435,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
+ {
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+- struct lag_tracker tracker;
++ struct lag_tracker tracker = { };
+ bool do_bond, roce_lag;
+ int err;
+
+@@ -565,6 +565,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+ bool is_bonded, is_in_lag, mode_supported;
+ int bond_status = 0;
+ int num_slaves = 0;
++ int changed = 0;
+ int idx;
+
+ if (!netif_is_lag_master(upper))
+@@ -601,27 +602,27 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+ */
+ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
+
+- if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
+- NL_SET_ERR_MSG_MOD(info->info.extack,
+- "Can't activate LAG offload, PF is configured with more than 64 VFs");
+- return 0;
+- }
+-
+ /* Lag mode must be activebackup or hash. */
+ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
+ tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
+
+- if (is_in_lag && !mode_supported)
+- NL_SET_ERR_MSG_MOD(info->info.extack,
+- "Can't activate LAG offload, TX type isn't supported");
+-
+ is_bonded = is_in_lag && mode_supported;
+ if (tracker->is_bonded != is_bonded) {
+ tracker->is_bonded = is_bonded;
+- return 1;
++ changed = 1;
+ }
+
+- return 0;
++ if (!is_in_lag)
++ return changed;
++
++ if (!mlx5_lag_is_ready(ldev))
++ NL_SET_ERR_MSG_MOD(info->info.extack,
++ "Can't activate LAG offload, PF is configured with more than 64 VFs");
++ else if (!mode_supported)
++ NL_SET_ERR_MSG_MOD(info->info.extack,
++ "Can't activate LAG offload, TX type isn't supported");
++
++ return changed;
+ }
+
+ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
+@@ -664,9 +665,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
+
+ ldev = container_of(this, struct mlx5_lag, nb);
+
+- if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
+- return NOTIFY_DONE;
+-
+ tracker = ldev->tracker;
+
+ switch (event) {
+@@ -693,30 +691,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
+ struct net_device *netdev)
+ {
+ unsigned int fn = PCI_FUNC(dev->pdev->devfn);
++ unsigned long flags;
+
+ if (fn >= MLX5_MAX_PORTS)
+ return;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev->pf[fn].netdev = netdev;
+ ldev->tracker.netdev_state[fn].link_up = 0;
+ ldev->tracker.netdev_state[fn].tx_enabled = 0;
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+ }
+
+ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
+ struct net_device *netdev)
+ {
++ unsigned long flags;
+ int i;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ if (ldev->pf[i].netdev == netdev) {
+ ldev->pf[i].netdev = NULL;
+ break;
+ }
+ }
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+ }
+
+ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
+@@ -754,12 +754,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+ struct mlx5_lag *ldev = NULL;
+ struct mlx5_core_dev *tmp_dev;
+
+- if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+- !MLX5_CAP_GEN(dev, lag_master) ||
+- MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
+- return 0;
+-
+- tmp_dev = mlx5_get_next_phys_dev(dev);
++ tmp_dev = mlx5_get_next_phys_dev_lag(dev);
+ if (tmp_dev)
+ ldev = tmp_dev->priv.lag;
+
+@@ -804,6 +799,11 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
+ {
+ int err;
+
++ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
++ !MLX5_CAP_GEN(dev, lag_master) ||
++ MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
++ return;
++
+ recheck:
+ mlx5_dev_list_lock();
+ err = __mlx5_lag_dev_add_mdev(dev);
+@@ -857,12 +857,13 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
+ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+ bool res;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_roce(ldev);
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+ }
+@@ -871,12 +872,13 @@ EXPORT_SYMBOL(mlx5_lag_is_roce);
+ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+ bool res;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev);
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+ }
+@@ -885,13 +887,14 @@ EXPORT_SYMBOL(mlx5_lag_is_active);
+ bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+ bool res;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev) &&
+ dev == ldev->pf[MLX5_LAG_P1].dev;
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+ }
+@@ -900,12 +903,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master);
+ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+ bool res;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev);
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+ }
+@@ -914,12 +918,13 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov);
+ bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+ bool res;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return res;
+ }
+@@ -967,8 +972,9 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+ {
+ struct net_device *ndev = NULL;
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
+@@ -985,7 +991,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+ dev_hold(ndev);
+
+ unlock:
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+
+ return ndev;
+ }
+@@ -995,9 +1001,10 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
+ struct net_device *slave)
+ {
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+ u8 port = 0;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
+ goto unlock;
+@@ -1010,7 +1017,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
+ port = ldev->v2p_map[port];
+
+ unlock:
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+ return port;
+ }
+ EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+@@ -1019,8 +1026,9 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_core_dev *peer_dev = NULL;
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ goto unlock;
+@@ -1030,7 +1038,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+ ldev->pf[MLX5_LAG_P1].dev;
+
+ unlock:
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+ return peer_dev;
+ }
+ EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+@@ -1043,6 +1051,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+ struct mlx5_lag *ldev;
++ unsigned long flags;
+ int num_ports;
+ int ret, i, j;
+ void *out;
+@@ -1053,7 +1062,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+
+ memset(values, 0, sizeof(*values) * num_counters);
+
+- spin_lock(&lag_lock);
++ spin_lock_irqsave(&lag_lock, flags);
+ ldev = mlx5_lag_dev(dev);
+ if (ldev && __mlx5_lag_is_active(ldev)) {
+ num_ports = MLX5_MAX_PORTS;
+@@ -1063,7 +1072,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ num_ports = 1;
+ mdev[MLX5_LAG_P1] = dev;
+ }
+- spin_unlock(&lag_lock);
++ spin_unlock_irqrestore(&lag_lock, flags);
+
+ for (i = 0; i < num_ports; ++i) {
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+index 21fdaf708f1fe..81786a9a424c8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+@@ -100,6 +100,14 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
+ flush_workqueue(mp->wq);
+ }
+
++static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
++{
++ mp->fib.mfi = fi;
++ mp->fib.priority = fi->fib_priority;
++ mp->fib.dst = dst;
++ mp->fib.dst_len = dst_len;
++}
++
+ struct mlx5_fib_event_work {
+ struct work_struct work;
+ struct mlx5_lag *ldev;
+@@ -110,10 +118,10 @@ struct mlx5_fib_event_work {
+ };
+ };
+
+-static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+- unsigned long event,
+- struct fib_info *fi)
++static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
++ struct fib_entry_notifier_info *fen_info)
+ {
++ struct fib_info *fi = fen_info->fi;
+ struct lag_mp *mp = &ldev->lag_mp;
+ struct fib_nh *fib_nh0, *fib_nh1;
+ unsigned int nhs;
+@@ -121,11 +129,17 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ /* Handle delete event */
+ if (event == FIB_EVENT_ENTRY_DEL) {
+ /* stop track */
+- if (mp->mfi == fi)
+- mp->mfi = NULL;
++ if (mp->fib.mfi == fi)
++ mp->fib.mfi = NULL;
+ return;
+ }
+
++ /* Handle multipath entry with lower priority value */
++ if (mp->fib.mfi && mp->fib.mfi != fi &&
++ (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
++ fi->fib_priority >= mp->fib.priority)
++ return;
++
+ /* Handle add/replace event */
+ nhs = fib_info_num_path(fi);
+ if (nhs == 1) {
+@@ -135,12 +149,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
+
+ if (i < 0)
+- i = MLX5_LAG_NORMAL_AFFINITY;
+- else
+- ++i;
++ return;
+
++ i++;
+ mlx5_lag_set_port_affinity(ldev, i);
++ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
+ }
++
+ return;
+ }
+
+@@ -160,7 +175,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ }
+
+ /* First time we see multipath route */
+- if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
++ if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
+ struct lag_tracker tracker;
+
+ tracker = ldev->tracker;
+@@ -168,7 +183,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ }
+
+ mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
+- mp->mfi = fi;
++ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
+ }
+
+ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+@@ -179,7 +194,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+ struct lag_mp *mp = &ldev->lag_mp;
+
+ /* Check the nh event is related to the route */
+- if (!mp->mfi || mp->mfi != fi)
++ if (!mp->fib.mfi || mp->fib.mfi != fi)
+ return;
+
+ /* nh added/removed */
+@@ -209,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work)
+ case FIB_EVENT_ENTRY_REPLACE:
+ case FIB_EVENT_ENTRY_DEL:
+ mlx5_lag_fib_route_event(ldev, fib_work->event,
+- fib_work->fen_info.fi);
++ &fib_work->fen_info);
+ fib_info_put(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_NH_ADD:
+@@ -268,10 +283,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
+ fen_info = container_of(info, struct fib_entry_notifier_info,
+ info);
+ fi = fen_info->fi;
+- if (fi->nh) {
+- NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+- return notifier_from_errno(-EINVAL);
+- }
++ if (fi->nh)
++ return NOTIFY_DONE;
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
+ fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
+@@ -310,7 +323,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
+ /* Clear mfi, as it might become stale when a route delete event
+ * has been missed, see mlx5_lag_fib_route_event().
+ */
+- ldev->lag_mp.mfi = NULL;
++ ldev->lag_mp.fib.mfi = NULL;
+ }
+
+ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
+@@ -321,7 +334,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
+ /* always clear mfi, as it might become stale when a route delete event
+ * has been missed
+ */
+- mp->mfi = NULL;
++ mp->fib.mfi = NULL;
+
+ if (mp->fib_nb.notifier_call)
+ return 0;
+@@ -351,5 +364,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
+ unregister_fib_notifier(&init_net, &mp->fib_nb);
+ destroy_workqueue(mp->wq);
+ mp->fib_nb.notifier_call = NULL;
+- mp->mfi = NULL;
++ mp->fib.mfi = NULL;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+index dea199e79beda..b3a7f18b9e303 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+@@ -15,7 +15,12 @@ enum mlx5_lag_port_affinity {
+
+ struct lag_mp {
+ struct notifier_block fib_nb;
+- struct fib_info *mfi; /* used in tracking fib events */
++ struct {
++ const void *mfi; /* used in tracking fib events */
++ u32 priority;
++ u32 dst;
++ int dst_len;
++ } fib;
+ struct workqueue_struct *wq;
+ };
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+index 91e806c1aa211..6fece284de0f3 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+@@ -189,10 +189,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
+ clock = container_of(timer, struct mlx5_clock, timer);
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+
++ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
++ goto out;
++
+ write_seqlock_irqsave(&clock->lock, flags);
+ timecounter_read(&timer->tc);
+ mlx5_update_clock_info_page(mdev);
+ write_sequnlock_irqrestore(&clock->lock, flags);
++
++out:
+ schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
+ }
+
+@@ -599,7 +604,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ static const struct ptp_clock_info mlx5_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "mlx5_ptp",
+- .max_adj = 100000000,
++ .max_adj = 50000000,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+index bced2efe9bef4..b7d779d08d837 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+@@ -3,6 +3,7 @@
+
+ #include <linux/mlx5/vport.h>
+ #include "lib/devcom.h"
++#include "mlx5_core.h"
+
+ static LIST_HEAD(devcom_list);
+
+@@ -13,8 +14,8 @@ static LIST_HEAD(devcom_list);
+
+ struct mlx5_devcom_component {
+ struct {
+- void *data;
+- } device[MLX5_MAX_PORTS];
++ void __rcu *data;
++ } device[MLX5_DEVCOM_PORTS_SUPPORTED];
+
+ mlx5_devcom_event_handler_t handler;
+ struct rw_semaphore sem;
+@@ -25,7 +26,7 @@ struct mlx5_devcom_list {
+ struct list_head list;
+
+ struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
+- struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
++ struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED];
+ };
+
+ struct mlx5_devcom {
+@@ -74,13 +75,16 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
++ if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
++ return NULL;
+
++ mlx5_dev_list_lock();
+ sguid0 = mlx5_query_nic_system_image_guid(dev);
+ list_for_each_entry(iter, &devcom_list, list) {
+ struct mlx5_core_dev *tmp_dev = NULL;
+
+ idx = -1;
+- for (i = 0; i < MLX5_MAX_PORTS; i++) {
++ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
+ if (iter->devs[i])
+ tmp_dev = iter->devs[i];
+ else
+@@ -100,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+
+ if (!priv) {
+ priv = mlx5_devcom_list_alloc();
+- if (!priv)
+- return ERR_PTR(-ENOMEM);
++ if (!priv) {
++ devcom = ERR_PTR(-ENOMEM);
++ goto out;
++ }
+
+ idx = 0;
+ new_priv = true;
+@@ -110,13 +116,16 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+ priv->devs[idx] = dev;
+ devcom = mlx5_devcom_alloc(priv, idx);
+ if (!devcom) {
+- kfree(priv);
+- return ERR_PTR(-ENOMEM);
++ if (new_priv)
++ kfree(priv);
++ devcom = ERR_PTR(-ENOMEM);
++ goto out;
+ }
+
+ if (new_priv)
+ list_add(&priv->list, &devcom_list);
+-
++out:
++ mlx5_dev_list_unlock();
+ return devcom;
+ }
+
+@@ -129,20 +138,23 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
++ mlx5_dev_list_lock();
+ priv = devcom->priv;
+ priv->devs[devcom->idx] = NULL;
+
+ kfree(devcom);
+
+- for (i = 0; i < MLX5_MAX_PORTS; i++)
++ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+ if (priv->devs[i])
+ break;
+
+- if (i != MLX5_MAX_PORTS)
+- return;
++ if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
++ goto out;
+
+ list_del(&priv->list);
+ kfree(priv);
++out:
++ mlx5_dev_list_unlock();
+ }
+
+ void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+@@ -160,7 +172,7 @@ void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->handler = handler;
+- comp->device[devcom->idx].data = data;
++ rcu_assign_pointer(comp->device[devcom->idx].data, data);
+ up_write(&comp->sem);
+ }
+
+@@ -174,8 +186,9 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+- comp->device[devcom->idx].data = NULL;
++ RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL);
+ up_write(&comp->sem);
++ synchronize_rcu();
+ }
+
+ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+@@ -191,12 +204,15 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+- for (i = 0; i < MLX5_MAX_PORTS; i++)
+- if (i != devcom->idx && comp->device[i].data) {
+- err = comp->handler(event, comp->device[i].data,
+- event_data);
++ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
++ void *data = rcu_dereference_protected(comp->device[i].data,
++ lockdep_is_held(&comp->sem));
++
++ if (i != devcom->idx && data) {
++ err = comp->handler(event, data, event_data);
+ break;
+ }
++ }
+
+ up_write(&comp->sem);
+ return err;
+@@ -211,7 +227,7 @@ void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ comp = &devcom->priv->components[id];
+ WARN_ON(!rwsem_is_locked(&comp->sem));
+
+- comp->paired = paired;
++ WRITE_ONCE(comp->paired, paired);
+ }
+
+ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+@@ -220,7 +236,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ if (IS_ERR_OR_NULL(devcom))
+ return false;
+
+- return devcom->priv->components[id].paired;
++ return READ_ONCE(devcom->priv->components[id].paired);
+ }
+
+ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+@@ -234,16 +250,38 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+
+ comp = &devcom->priv->components[id];
+ down_read(&comp->sem);
+- if (!comp->paired) {
++ if (!READ_ONCE(comp->paired)) {
+ up_read(&comp->sem);
+ return NULL;
+ }
+
+- for (i = 0; i < MLX5_MAX_PORTS; i++)
++ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+ if (i != devcom->idx)
+ break;
+
+- return comp->device[i].data;
++ return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
++}
++
++void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
++{
++ struct mlx5_devcom_component *comp;
++ int i;
++
++ if (IS_ERR_OR_NULL(devcom))
++ return NULL;
++
++ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
++ if (i != devcom->idx)
++ break;
++
++ comp = &devcom->priv->components[id];
++ /* This can change concurrently, however 'data' pointer will remain
++ * valid for the duration of RCU read section.
++ */
++ if (!READ_ONCE(comp->paired))
++ return NULL;
++
++ return rcu_dereference(comp->device[i].data);
+ }
+
+ void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+index 939d5bf1581b5..9a496f4722dad 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+@@ -6,6 +6,8 @@
+
+ #include <linux/mlx5/driver.h>
+
++#define MLX5_DEVCOM_PORTS_SUPPORTED 2
++
+ enum mlx5_devcom_components {
+ MLX5_DEVCOM_ESW_OFFLOADS,
+
+@@ -39,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+
+ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
++void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
+ void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+index 97e5845b4cfdd..df58cba37930a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -124,6 +124,10 @@ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
++ if (!chains->dev->priv.eswitch ||
++ chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
++ return 1;
++
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_PRIO;
+ }
+@@ -208,7 +212,7 @@ static int
+ create_chain_restore(struct fs_chain *chain)
+ {
+ struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+- char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
++ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_fs_chains *chains = chain->chains;
+ enum mlx5e_tc_attr_to_reg chain_to_reg;
+ struct mlx5_modify_hdr *mod_hdr;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
+index 23361a9ae4fa0..6dc83e871cd76 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c
+@@ -105,6 +105,7 @@ int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *op
+ geneve->opt_type = opt->type;
+ geneve->obj_id = res;
+ geneve->refcount++;
++ res = 0;
+ }
+
+ unlock:
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+index 839a01da110f3..8ff16318e32dc 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+@@ -122,7 +122,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+
+- if (!MLX5_ESWITCH_MANAGER(dev))
++ if (!mpfs)
+ return;
+
+ WARN_ON(!hlist_empty(mpfs->hash));
+@@ -137,7 +137,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
+ int err = 0;
+ u32 index;
+
+- if (!MLX5_ESWITCH_MANAGER(dev))
++ if (!mpfs)
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+@@ -185,7 +185,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
+ int err = 0;
+ u32 index;
+
+- if (!MLX5_ESWITCH_MANAGER(dev))
++ if (!mpfs)
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 79482824c64ff..13eceb6016344 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -97,6 +97,8 @@ enum {
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+ };
+
++#define LOG_MAX_SUPPORTED_QPS 0xff
++
+ static struct mlx5_profile profile[] = {
+ [0] = {
+ .mask = 0,
+@@ -108,7 +110,7 @@ static struct mlx5_profile profile[] = {
+ [2] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE |
+ MLX5_PROF_MASK_MR_CACHE,
+- .log_max_qp = 18,
++ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
+ .mr_cache[0] = {
+ .size = 500,
+ .limit = 250
+@@ -513,7 +515,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+ to_fw_pkey_sz(dev, 128));
+
+ /* Check log_max_qp from HCA caps to set in current profile */
+- if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
++ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
++ prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp));
++ } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
+ mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
+ prof->log_max_qp,
+ MLX5_CAP_GEN_MAX(dev, log_max_qp));
+@@ -777,7 +781,6 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
+ }
+
+ mlx5_pci_vsc_init(dev);
+- dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
+ return 0;
+
+ err_clr_master:
+@@ -899,7 +902,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
+
+ dev->dm = mlx5_dm_create(dev);
+ if (IS_ERR(dev->dm))
+- mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
++ mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
+
+ dev->tracer = mlx5_fw_tracer_create(dev);
+ dev->hv_vhca = mlx5_hv_vhca_create(dev);
+@@ -924,6 +927,8 @@ err_rl_cleanup:
+ err_tables_cleanup:
+ mlx5_geneve_destroy(dev->geneve);
+ mlx5_vxlan_destroy(dev->vxlan);
++ mlx5_cleanup_clock(dev);
++ mlx5_cleanup_reserved_gids(dev);
+ mlx5_cq_debugfs_cleanup(dev);
+ mlx5_fw_reset_cleanup(dev);
+ err_events_cleanup:
+@@ -997,6 +1002,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
+ goto err_cmd_cleanup;
+ }
+
++ dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
+ mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
+
+ err = mlx5_core_enable_hca(dev, 0);
+@@ -1423,7 +1429,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
+ memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
+ INIT_LIST_HEAD(&priv->ctx_list);
+ spin_lock_init(&priv->ctx_lock);
++ lockdep_register_key(&dev->lock_key);
+ mutex_init(&dev->intf_state_mutex);
++ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
+
+ mutex_init(&priv->bfregs.reg_head.lock);
+ mutex_init(&priv->bfregs.wc_head.lock);
+@@ -1470,6 +1478,7 @@ err_health_init:
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
++ lockdep_unregister_key(&dev->lock_key);
+ return err;
+ }
+
+@@ -1487,6 +1496,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
++ lockdep_unregister_key(&dev->lock_key);
+ }
+
+ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+@@ -1568,12 +1578,28 @@ static void remove_one(struct pci_dev *pdev)
+ mlx5_devlink_free(devlink);
+ }
+
++#define mlx5_pci_trace(dev, fmt, ...) ({ \
++ struct mlx5_core_dev *__dev = (dev); \
++ mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \
++ __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \
++ __dev->pci_status, ##__VA_ARGS__); \
++})
++
++static const char *result2str(enum pci_ers_result result)
++{
++ return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" :
++ result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" :
++ result == PCI_ERS_RESULT_RECOVERED ? "recovered" :
++ "unknown";
++}
++
+ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+ {
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
++ enum pci_ers_result res;
+
+- mlx5_core_info(dev, "%s was called\n", __func__);
++ mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state);
+
+ mlx5_enter_error_state(dev, false);
+ mlx5_error_sw_reset(dev);
+@@ -1581,8 +1607,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+ mlx5_drain_health_wq(dev);
+ mlx5_pci_disable_device(dev);
+
+- return state == pci_channel_io_perm_failure ?
++ res = state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
++
++ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n",
++ __func__, dev->state, dev->pci_status, res, result2str(res));
++ return res;
+ }
+
+ /* wait for the device to show vital signs by waiting
+@@ -1616,28 +1646,36 @@ static int wait_vital(struct pci_dev *pdev)
+
+ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+ {
++ enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT;
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+- mlx5_core_info(dev, "%s was called\n", __func__);
++ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n",
++ __func__, dev->state, dev->pci_status);
+
+ err = mlx5_pci_enable_device(dev);
+ if (err) {
+ mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
+ __func__, err);
+- return PCI_ERS_RESULT_DISCONNECT;
++ goto out;
+ }
+
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+- if (wait_vital(pdev)) {
+- mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
+- return PCI_ERS_RESULT_DISCONNECT;
++ err = wait_vital(pdev);
++ if (err) {
++ mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n",
++ __func__, err);
++ goto out;
+ }
+
+- return PCI_ERS_RESULT_RECOVERED;
++ res = PCI_ERS_RESULT_RECOVERED;
++out:
++ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n",
++ __func__, dev->state, dev->pci_status, err, res, result2str(res));
++ return res;
+ }
+
+ static void mlx5_pci_resume(struct pci_dev *pdev)
+@@ -1645,14 +1683,16 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+- mlx5_core_info(dev, "%s was called\n", __func__);
++ mlx5_pci_trace(dev, "Enter, loading driver..\n");
+
+ err = mlx5_load_one(dev);
+- if (err)
+- mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
+- __func__, err);
+- else
+- mlx5_core_info(dev, "%s: device recovered\n", __func__);
++
++ if (!err)
++ devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter,
++ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
++
++ mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
++ !err ? "recovered" : "Failed");
+ }
+
+ static const struct pci_error_handlers mlx5_err_handler = {
+@@ -1756,10 +1796,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
+ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
+ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
++ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
+ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */
++ { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */
+ { 0, }
+ };
+
+@@ -1773,12 +1815,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
+
+ int mlx5_recover_device(struct mlx5_core_dev *dev)
+ {
+- int ret = -EIO;
++ if (!mlx5_core_is_sf(dev)) {
++ mlx5_pci_disable_device(dev);
++ if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
++ return -EIO;
++ }
+
+- mlx5_pci_disable_device(dev);
+- if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
+- ret = mlx5_load_one(dev);
+- return ret;
++ return mlx5_load_one(dev);
+ }
+
+ static struct pci_driver mlx5_core_driver = {
+@@ -1806,7 +1849,7 @@ static void mlx5_core_verify_params(void)
+ }
+ }
+
+-static int __init init(void)
++static int __init mlx5_init(void)
+ {
+ int err;
+
+@@ -1819,7 +1862,7 @@ static int __init init(void)
+ mlx5_fpga_ipsec_build_fs_cmds();
+ mlx5_register_debugfs();
+
+- err = pci_register_driver(&mlx5_core_driver);
++ err = mlx5e_init();
+ if (err)
+ goto err_debug;
+
+@@ -1827,28 +1870,28 @@ static int __init init(void)
+ if (err)
+ goto err_sf;
+
+- err = mlx5e_init();
++ err = pci_register_driver(&mlx5_core_driver);
+ if (err)
+- goto err_en;
++ goto err_pci;
+
+ return 0;
+
+-err_en:
++err_pci:
+ mlx5_sf_driver_unregister();
+ err_sf:
+- pci_unregister_driver(&mlx5_core_driver);
++ mlx5e_cleanup();
+ err_debug:
+ mlx5_unregister_debugfs();
+ return err;
+ }
+
+-static void __exit cleanup(void)
++static void __exit mlx5_cleanup(void)
+ {
+- mlx5e_cleanup();
+- mlx5_sf_driver_unregister();
+ pci_unregister_driver(&mlx5_core_driver);
++ mlx5_sf_driver_unregister();
++ mlx5e_cleanup();
+ mlx5_unregister_debugfs();
+ }
+
+-module_init(init);
+-module_exit(cleanup);
++module_init(mlx5_init);
++module_exit(mlx5_cleanup);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+index 230eab7e3bc91..3f3ea8d268ce4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+@@ -186,6 +186,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev);
+ int mlx5_register_device(struct mlx5_core_dev *dev);
+ void mlx5_unregister_device(struct mlx5_core_dev *dev);
+ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
++struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev);
+ void mlx5_dev_list_lock(void);
+ void mlx5_dev_list_unlock(void);
+ int mlx5_dev_list_trylock(void);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+index abd024173c42e..8cf40a3658d99 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+@@ -16,6 +16,7 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+ int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
++void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
+ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
+ int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
+ struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+index 110c0837f95b9..ae6ac51b8ab03 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+@@ -216,7 +216,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function)
+
+ n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+ if (n >= MLX5_NUM_4K_IN_PAGE) {
+- mlx5_core_warn(dev, "alloc 4k bug\n");
++ mlx5_core_warn(dev, "alloc 4k bug: fw page = 0x%llx, n = %u, bitmask: %lu, max num of 4K pages: %d\n",
++ fp->addr, n, fp->bitmask, MLX5_NUM_4K_IN_PAGE);
+ return -ENOENT;
+ }
+ clear_bit(n, &fp->bitmask);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+index 763c83a023809..2fa84556bc20e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -138,18 +138,23 @@ out:
+ return ret;
+ }
+
+-static void irq_release(struct mlx5_irq *irq)
++static void mlx5_system_free_irq(struct mlx5_irq *irq)
+ {
+- struct mlx5_irq_pool *pool = irq->pool;
+-
+- xa_erase(&pool->irqs, irq->index);
+ /* free_irq requires that affinity and rmap will be cleared
+ * before calling it. This is why there is asymmetry with set_rmap
+ * which should be called after alloc_irq but before request_irq.
+ */
+ irq_set_affinity_hint(irq->irqn, NULL);
+- free_cpumask_var(irq->mask);
+ free_irq(irq->irqn, &irq->nh);
++}
++
++static void irq_release(struct mlx5_irq *irq)
++{
++ struct mlx5_irq_pool *pool = irq->pool;
++
++ xa_erase(&pool->irqs, irq->index);
++ mlx5_system_free_irq(irq);
++ free_cpumask_var(irq->mask);
+ kfree(irq);
+ }
+
+@@ -346,8 +351,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
+ new_irq = irq_pool_create_irq(pool, affinity);
+ if (IS_ERR(new_irq)) {
+ if (!least_loaded_irq) {
+- mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n",
+- cpumask_first(affinity));
++ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
++ PTR_ERR(new_irq));
+ mutex_unlock(&pool->lock);
+ return new_irq;
+ }
+@@ -550,6 +555,24 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
+ irq_pool_free(table->pf_pool);
+ }
+
++static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
++{
++ struct mlx5_irq *irq;
++ unsigned long index;
++
++ xa_for_each(&pool->irqs, index, irq)
++ mlx5_system_free_irq(irq);
++}
++
++static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
++{
++ if (table->sf_ctrl_pool) {
++ mlx5_irq_pool_free_irqs(table->sf_comp_pool);
++ mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
++ }
++ mlx5_irq_pool_free_irqs(table->pf_pool);
++}
++
+ /* irq_table API */
+
+ int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+@@ -630,6 +653,17 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+ pci_free_irq_vectors(dev->pdev);
+ }
+
++void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
++{
++ struct mlx5_irq_table *table = dev->priv.irq_table;
++
++ if (mlx5_core_is_sf(dev))
++ return;
++
++ mlx5_irq_pools_free_irqs(table);
++ pci_free_irq_vectors(dev->pdev);
++}
++
+ int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
+ {
+ if (table->sf_comp_pool)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+index 1ef2b6a848c10..fd79860de723b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -406,23 +406,24 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
+
+ switch (module_id) {
+ case MLX5_MODULE_ID_SFP:
+- mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++ mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ case MLX5_MODULE_ID_QSFP:
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+- mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++ mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ default:
+ mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+ return -EINVAL;
+ }
+
+- if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH)
++ if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+- size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
++ size = MLX5_EEPROM_PAGE_LENGTH - offset;
+
+ query.size = size;
++ query.offset = offset;
+
+ return mlx5_query_mcia(dev, &query, data);
+ }
+@@ -432,35 +433,12 @@ int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev,
+ struct mlx5_module_eeprom_query_params *params,
+ u8 *data)
+ {
+- u8 module_id;
+ int err;
+
+ err = mlx5_query_module_num(dev, &params->module_number);
+ if (err)
+ return err;
+
+- err = mlx5_query_module_id(dev, params->module_number, &module_id);
+- if (err)
+- return err;
+-
+- switch (module_id) {
+- case MLX5_MODULE_ID_SFP:
+- if (params->page > 0)
+- return -EINVAL;
+- break;
+- case MLX5_MODULE_ID_QSFP:
+- case MLX5_MODULE_ID_QSFP28:
+- case MLX5_MODULE_ID_QSFP_PLUS:
+- if (params->page > 3)
+- return -EINVAL;
+- break;
+- case MLX5_MODULE_ID_DSFP:
+- break;
+- default:
+- mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+- return -EINVAL;
+- }
+-
+ if (params->i2c_address != MLX5_I2C_ADDR_HIGH &&
+ params->i2c_address != MLX5_I2C_ADDR_LOW) {
+ mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+index 871c2fbe18d39..64bbc18332d56 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+@@ -28,10 +28,7 @@ bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
+ {
+ struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
+
+- if (!mlx5_sf_dev_supported(dev))
+- return false;
+-
+- return !xa_empty(&table->devices);
++ return table && !xa_empty(&table->devices);
+ }
+
+ static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+index e8185b69ac6c2..373d3d4bf3a65 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+@@ -256,8 +256,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+ host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_total_vfs);
+ kvfree(out);
+- if (host_total_vfs)
+- return host_total_vfs;
++ return host_total_vfs;
+ }
+
+ done:
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+index a5b9f65db23c6..897c7f8521238 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+@@ -846,7 +846,8 @@ struct mlx5dr_action *
+ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action_dest *dests,
+ u32 num_of_dests,
+- bool ignore_flow_level)
++ bool ignore_flow_level,
++ u32 flow_source)
+ {
+ struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
+ struct mlx5dr_action **ref_actions;
+@@ -914,7 +915,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ reformat_req,
+ &action->dest_tbl->fw_tbl.id,
+ &action->dest_tbl->fw_tbl.group_id,
+- ignore_flow_level);
++ ignore_flow_level,
++ flow_source);
+ if (ret)
+ goto free_action;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+index 56307283bf9b3..aa003a75946bb 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+@@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+ caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
+ caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
+ caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version);
++ caps->roce_caps.fl_rc_qp_when_roce_disabled =
++ MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
+
+ if (MLX5_CAP_GEN(mdev, roce)) {
+ err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
+@@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+ return err;
+
+ caps->roce_caps.roce_en = roce_en;
+- caps->roce_caps.fl_rc_qp_when_roce_disabled =
++ caps->roce_caps.fl_rc_qp_when_roce_disabled |=
+ MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
+ caps->roce_caps.fl_rc_qp_when_roce_enabled =
+ MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
+@@ -526,11 +528,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+- return err;
++ goto err_free_in;
+
+ *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
+- kvfree(in);
+
++err_free_in:
++ kvfree(in);
+ return err;
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+index 0fe159809ba15..ea1b8ca5bf3aa 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+@@ -2,6 +2,7 @@
+ /* Copyright (c) 2019 Mellanox Technologies. */
+
+ #include <linux/mlx5/eswitch.h>
++#include <linux/err.h>
+ #include "dr_types.h"
+
+ #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \
+@@ -75,9 +76,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
+ }
+
+ dmn->uar = mlx5_get_uars_page(dmn->mdev);
+- if (!dmn->uar) {
++ if (IS_ERR(dmn->uar)) {
+ mlx5dr_err(dmn, "Couldn't allocate UAR\n");
+- ret = -ENOMEM;
++ ret = PTR_ERR(dmn->uar);
+ goto clean_pd;
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+index 0d6f86eb248b9..c74083de1801b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+@@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+ bool reformat_req,
+ u32 *tbl_id,
+ u32 *group_id,
+- bool ignore_flow_level)
++ bool ignore_flow_level,
++ u32 flow_source)
+ {
+ struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
+ struct mlx5dr_cmd_fte_info fte_info = {};
+@@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+ fte_info.val = val;
+ fte_info.dest_arr = dest;
+ fte_info.ignore_flow_level = ignore_flow_level;
++ fte_info.flow_context.flow_source = flow_source;
+
+ ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
+ if (ret) {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+index 66c24767e3b00..8ad8d73e17f06 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+@@ -4,7 +4,6 @@
+ #include "dr_types.h"
+
+ #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
+-#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024)
+
+ struct mlx5dr_icm_pool {
+ enum mlx5dr_icm_type icm_type;
+@@ -136,37 +135,35 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
+ kvfree(icm_mr);
+ }
+
+-static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk)
++static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
+ {
+- chunk->ste_arr = kvzalloc(chunk->num_of_entries *
+- sizeof(chunk->ste_arr[0]), GFP_KERNEL);
+- if (!chunk->ste_arr)
+- return -ENOMEM;
+-
+- chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries *
+- DR_STE_SIZE_REDUCED, GFP_KERNEL);
+- if (!chunk->hw_ste_arr)
+- goto out_free_ste_arr;
+-
+- chunk->miss_list = kvmalloc(chunk->num_of_entries *
+- sizeof(chunk->miss_list[0]), GFP_KERNEL);
+- if (!chunk->miss_list)
+- goto out_free_hw_ste_arr;
++ /* We support only one type of STE size, both for ConnectX-5 and later
++ * devices. Once the support for match STE which has a larger tag is
++ * added (32B instead of 16B), the STE size for devices later than
++ * ConnectX-5 needs to account for that.
++ */
++ return DR_STE_SIZE_REDUCED;
++}
+
+- return 0;
++static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
++{
++ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
++ int index = offset / DR_STE_SIZE;
+
+-out_free_hw_ste_arr:
+- kvfree(chunk->hw_ste_arr);
+-out_free_ste_arr:
+- kvfree(chunk->ste_arr);
+- return -ENOMEM;
++ chunk->ste_arr = &buddy->ste_arr[index];
++ chunk->miss_list = &buddy->miss_list[index];
++ chunk->hw_ste_arr = buddy->hw_ste_arr +
++ index * dr_icm_buddy_get_ste_size(buddy);
+ }
+
+ static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
+ {
+- kvfree(chunk->miss_list);
+- kvfree(chunk->hw_ste_arr);
+- kvfree(chunk->ste_arr);
++ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
++
++ memset(chunk->hw_ste_arr, 0,
++ chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy));
++ memset(chunk->ste_arr, 0,
++ chunk->num_of_entries * sizeof(chunk->ste_arr[0]));
+ }
+
+ static enum mlx5dr_icm_type
+@@ -189,6 +186,44 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
+ kvfree(chunk);
+ }
+
++static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
++{
++ int num_of_entries =
++ mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);
++
++ buddy->ste_arr = kvcalloc(num_of_entries,
++ sizeof(struct mlx5dr_ste), GFP_KERNEL);
++ if (!buddy->ste_arr)
++ return -ENOMEM;
++
++ /* Preallocate full STE size on non-ConnectX-5 devices since
++ * we need to support both full and reduced with the same cache.
++ */
++ buddy->hw_ste_arr = kvcalloc(num_of_entries,
++ dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
++ if (!buddy->hw_ste_arr)
++ goto free_ste_arr;
++
++ buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
++ if (!buddy->miss_list)
++ goto free_hw_ste_arr;
++
++ return 0;
++
++free_hw_ste_arr:
++ kvfree(buddy->hw_ste_arr);
++free_ste_arr:
++ kvfree(buddy->ste_arr);
++ return -ENOMEM;
++}
++
++static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
++{
++ kvfree(buddy->ste_arr);
++ kvfree(buddy->hw_ste_arr);
++ kvfree(buddy->miss_list);
++}
++
+ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
+ {
+ struct mlx5dr_icm_buddy_mem *buddy;
+@@ -208,11 +243,19 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
+ buddy->icm_mr = icm_mr;
+ buddy->pool = pool;
+
++ if (pool->icm_type == DR_ICM_TYPE_STE) {
++ /* Reduce allocations by preallocating and reusing the STE structures */
++ if (dr_icm_buddy_init_ste_cache(buddy))
++ goto err_cleanup_buddy;
++ }
++
+ /* add it to the -start- of the list in order to search in it first */
+ list_add(&buddy->list_node, &pool->buddy_mem_list);
+
+ return 0;
+
++err_cleanup_buddy:
++ mlx5dr_buddy_cleanup(buddy);
+ err_free_buddy:
+ kvfree(buddy);
+ free_mr:
+@@ -234,6 +277,9 @@ static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
+
+ mlx5dr_buddy_cleanup(buddy);
+
++ if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
++ dr_icm_buddy_cleanup_ste_cache(buddy);
++
+ kvfree(buddy);
+ }
+
+@@ -261,34 +307,30 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
+ chunk->byte_size =
+ mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type);
+ chunk->seg = seg;
++ chunk->buddy_mem = buddy_mem_pool;
+
+- if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) {
+- mlx5dr_err(pool->dmn,
+- "Failed to init ste arrays (order: %d)\n",
+- chunk_size);
+- goto out_free_chunk;
+- }
++ if (pool->icm_type == DR_ICM_TYPE_STE)
++ dr_icm_chunk_ste_init(chunk, offset);
+
+ buddy_mem_pool->used_memory += chunk->byte_size;
+- chunk->buddy_mem = buddy_mem_pool;
+ INIT_LIST_HEAD(&chunk->chunk_list);
+
+ /* chunk now is part of the used_list */
+ list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list);
+
+ return chunk;
+-
+-out_free_chunk:
+- kvfree(chunk);
+- return NULL;
+ }
+
+ static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
+ {
+- if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL)
+- return true;
++ int allow_hot_size;
++
++ /* sync when hot memory reaches half of the pool size */
++ allow_hot_size =
++ mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
++ pool->icm_type) / 2;
+
+- return false;
++ return pool->hot_memory_size > allow_hot_size;
+ }
+
+ static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+index b5409cc021d33..0f99d3612f89d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+@@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec)
+ return (spec->dmac_47_16 || spec->dmac_15_0);
+ }
+
+-static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec)
+-{
+- return (spec->src_ip_127_96 || spec->src_ip_95_64 ||
+- spec->src_ip_63_32 || spec->src_ip_31_0);
+-}
+-
+-static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec)
+-{
+- return (spec->dst_ip_127_96 || spec->dst_ip_95_64 ||
+- spec->dst_ip_63_32 || spec->dst_ip_31_0);
+-}
+-
+ static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec)
+ {
+ return (spec->ip_protocol || spec->frag || spec->tcp_flags ||
+@@ -480,11 +468,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
+ &mask, inner, rx);
+
+ if (outer_ipv == DR_RULE_IPV6) {
+- if (dr_mask_is_dst_addr_set(&mask.outer))
++ if (DR_MASK_IS_DST_IP_SET(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+- if (dr_mask_is_src_addr_set(&mask.outer))
++ if (DR_MASK_IS_SRC_IP_SET(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+@@ -580,11 +568,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
+ &mask, inner, rx);
+
+ if (inner_ipv == DR_RULE_IPV6) {
+- if (dr_mask_is_dst_addr_set(&mask.inner))
++ if (DR_MASK_IS_DST_IP_SET(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+- if (dr_mask_is_src_addr_set(&mask.inner))
++ if (DR_MASK_IS_SRC_IP_SET(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++],
+ &mask, inner, rx);
+
+@@ -721,7 +709,7 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
+ int ret;
+
+ next_matcher = NULL;
+- list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) {
++ list_for_each_entry(tmp_matcher, &tbl->matcher_list, list_node) {
+ if (tmp_matcher->prio >= matcher->prio) {
+ next_matcher = tmp_matcher;
+ break;
+@@ -731,11 +719,11 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
+
+ prev_matcher = NULL;
+ if (next_matcher && !first)
+- prev_matcher = list_prev_entry(next_matcher, matcher_list);
++ prev_matcher = list_prev_entry(next_matcher, list_node);
+ else if (!first)
+ prev_matcher = list_last_entry(&tbl->matcher_list,
+ struct mlx5dr_matcher,
+- matcher_list);
++ list_node);
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+@@ -756,12 +744,12 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
+ }
+
+ if (prev_matcher)
+- list_add(&matcher->matcher_list, &prev_matcher->matcher_list);
++ list_add(&matcher->list_node, &prev_matcher->list_node);
+ else if (next_matcher)
+- list_add_tail(&matcher->matcher_list,
+- &next_matcher->matcher_list);
++ list_add_tail(&matcher->list_node,
++ &next_matcher->list_node);
+ else
+- list_add(&matcher->matcher_list, &tbl->matcher_list);
++ list_add(&matcher->list_node, &tbl->matcher_list);
+
+ return 0;
+ }
+@@ -934,7 +922,7 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
+ matcher->prio = priority;
+ matcher->match_criteria = match_criteria_enable;
+ refcount_set(&matcher->refcount, 1);
+- INIT_LIST_HEAD(&matcher->matcher_list);
++ INIT_LIST_HEAD(&matcher->list_node);
+
+ mlx5dr_domain_lock(tbl->dmn);
+
+@@ -997,15 +985,15 @@ static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher)
+ struct mlx5dr_domain *dmn = tbl->dmn;
+ int ret = 0;
+
+- if (list_is_last(&matcher->matcher_list, &tbl->matcher_list))
++ if (list_is_last(&matcher->list_node, &tbl->matcher_list))
+ next_matcher = NULL;
+ else
+- next_matcher = list_next_entry(matcher, matcher_list);
++ next_matcher = list_next_entry(matcher, list_node);
+
+- if (matcher->matcher_list.prev == &tbl->matcher_list)
++ if (matcher->list_node.prev == &tbl->matcher_list)
+ prev_matcher = NULL;
+ else
+- prev_matcher = list_prev_entry(matcher, matcher_list);
++ prev_matcher = list_prev_entry(matcher, list_node);
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+@@ -1025,7 +1013,7 @@ static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher)
+ return ret;
+ }
+
+- list_del(&matcher->matcher_list);
++ list_del(&matcher->list_node);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+index 1cdfe4fccc7a9..5faf9fb1061c7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+@@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
+ {
+ u32 crc = crc32(0, input_data, length);
+
+- return (__force u32)htonl(crc);
++ return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
++ ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
+ }
+
+ bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)
+@@ -602,12 +603,34 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx,
+ used_hw_action_num);
+ }
+
++static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn,
++ struct mlx5dr_match_spec *spec)
++{
++ if (spec->ip_version) {
++ if (spec->ip_version != 0xf) {
++ mlx5dr_err(dmn,
++ "Partial ip_version mask with src/dst IP is not supported\n");
++ return -EINVAL;
++ }
++ } else if (spec->ethertype != 0xffff &&
++ (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) {
++ mlx5dr_err(dmn,
++ "Partial/no ethertype mask with src/dst IP is not supported\n");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
+ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+ u8 match_criteria,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_match_param *value)
+ {
+- if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
++ if (value)
++ return 0;
++
++ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+ if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
+ mlx5dr_err(dmn,
+ "Partial mask source_port is not supported\n");
+@@ -621,6 +644,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+ }
+ }
+
++ if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) &&
++ dr_ste_build_pre_check_spec(dmn, &mask->outer))
++ return -EINVAL;
++
++ if ((match_criteria & DR_MATCHER_CRITERIA_INNER) &&
++ dr_ste_build_pre_check_spec(dmn, &mask->inner))
++ return -EINVAL;
++
+ return 0;
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+index 30ae3cda6d2e0..0c7b57bf01d0d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+@@ -9,7 +9,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_matcher *last_matcher = NULL;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *last_htbl;
+- int ret;
++ int ret = -EOPNOTSUPP;
+
+ if (action && action->action_type != DR_ACTION_TYP_FT)
+ return -EOPNOTSUPP;
+@@ -19,7 +19,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ if (!list_empty(&tbl->matcher_list))
+ last_matcher = list_last_entry(&tbl->matcher_list,
+ struct mlx5dr_matcher,
+- matcher_list);
++ list_node);
+
+ if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX ||
+ tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
+@@ -68,6 +68,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ }
+ }
+
++ if (ret)
++ goto out;
++
+ /* Release old action */
+ if (tbl->miss_action)
+ refcount_dec(&tbl->miss_action->refcount);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+index b20e8aabb861b..9e2102f8bed1c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+@@ -740,6 +740,16 @@ struct mlx5dr_match_param {
+ (_misc3)->icmpv4_code || \
+ (_misc3)->icmpv4_header_data)
+
++#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \
++ (_spec)->src_ip_95_64 || \
++ (_spec)->src_ip_63_32 || \
++ (_spec)->src_ip_31_0)
++
++#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \
++ (_spec)->dst_ip_95_64 || \
++ (_spec)->dst_ip_63_32 || \
++ (_spec)->dst_ip_31_0)
++
+ struct mlx5dr_esw_caps {
+ u64 drop_icm_address_rx;
+ u64 drop_icm_address_tx;
+@@ -881,7 +891,7 @@ struct mlx5dr_matcher {
+ struct mlx5dr_table *tbl;
+ struct mlx5dr_matcher_rx_tx rx;
+ struct mlx5dr_matcher_rx_tx tx;
+- struct list_head matcher_list;
++ struct list_head list_node;
+ u32 prio;
+ struct mlx5dr_match_param mask;
+ u8 match_criteria;
+@@ -1384,7 +1394,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+ bool reformat_req,
+ u32 *tbl_id,
+ u32 *group_id,
+- bool ignore_flow_level);
++ bool ignore_flow_level,
++ u32 flow_source);
+ void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
+ u32 group_id);
+ #endif /* _DR_TYPES_H_ */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+index 7e58f4e594b74..0553ee1fe80aa 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+@@ -43,11 +43,10 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns,
+ err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action);
+ if (err && action) {
+ err = mlx5dr_action_destroy(action);
+- if (err) {
+- action = NULL;
+- mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
+- err);
+- }
++ if (err)
++ mlx5_core_err(ns->dev,
++ "Failed to destroy action (%d)\n", err);
++ action = NULL;
+ }
+ ft->fs_dr_table.miss_action = action;
+ if (old_miss_action) {
+@@ -492,11 +491,13 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
+ } else if (num_term_actions > 1) {
+ bool ignore_flow_level =
+ !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
++ u32 flow_source = fte->flow_context.flow_source;
+
+ tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
+ term_actions,
+ num_term_actions,
+- ignore_flow_level);
++ ignore_flow_level,
++ flow_source);
+ if (!tmp_action) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+index c5a8b16019991..7806e5c05b677 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+@@ -96,7 +96,8 @@ struct mlx5dr_action *
+ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action_dest *dests,
+ u32 num_of_dests,
+- bool ignore_flow_level);
++ bool ignore_flow_level,
++ u32 flow_source);
+
+ struct mlx5dr_action *mlx5dr_action_create_drop(void);
+
+@@ -160,6 +161,11 @@ struct mlx5dr_icm_buddy_mem {
+ * sync_ste command sets them free.
+ */
+ struct list_head hot_list;
++
++ /* Memory optimisation */
++ struct mlx5dr_ste *ste_arr;
++ struct list_head *miss_list;
++ u8 *hw_ste_arr;
+ };
+
+ int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+index e3509e69ed1c6..3e8725b7f0b70 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
+@@ -80,6 +80,7 @@ struct mlxbf_gige {
+ struct net_device *netdev;
+ struct platform_device *pdev;
+ void __iomem *mdio_io;
++ void __iomem *clk_io;
+ struct mii_bus *mdiobus;
+ void __iomem *gpio_io;
+ struct irq_domain *irqdomain;
+@@ -149,7 +150,8 @@ enum mlxbf_gige_res {
+ MLXBF_GIGE_RES_MDIO9,
+ MLXBF_GIGE_RES_GPIO0,
+ MLXBF_GIGE_RES_LLU,
+- MLXBF_GIGE_RES_PLU
++ MLXBF_GIGE_RES_PLU,
++ MLXBF_GIGE_RES_CLK
+ };
+
+ /* Version of register data returned by mlxbf_gige_get_regs() */
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+index 6704f5c1aa32e..b990782c1eb1f 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+@@ -75,7 +75,7 @@ static void mlxbf_gige_initial_mac(struct mlxbf_gige *priv)
+ u64_to_ether_addr(local_mac, mac);
+
+ if (is_valid_ether_addr(mac)) {
+- ether_addr_copy(priv->netdev->dev_addr, mac);
++ eth_hw_addr_set(priv->netdev, mac);
+ } else {
+ /* Provide a random MAC if for some reason the device has
+ * not been configured with a valid MAC address already.
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
+index 7905179a95753..5819584345ab8 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
+@@ -22,10 +22,23 @@
+ #include <linux/property.h>
+
+ #include "mlxbf_gige.h"
++#include "mlxbf_gige_regs.h"
+
+ #define MLXBF_GIGE_MDIO_GW_OFFSET 0x0
+ #define MLXBF_GIGE_MDIO_CFG_OFFSET 0x4
+
++#define MLXBF_GIGE_MDIO_FREQ_REFERENCE 156250000ULL
++#define MLXBF_GIGE_MDIO_COREPLL_CONST 16384ULL
++#define MLXBF_GIGE_MDC_CLK_NS 400
++#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG1 0x4
++#define MLXBF_GIGE_MDIO_PLL_I1CLK_REG2 0x8
++#define MLXBF_GIGE_MDIO_CORE_F_SHIFT 0
++#define MLXBF_GIGE_MDIO_CORE_F_MASK GENMASK(25, 0)
++#define MLXBF_GIGE_MDIO_CORE_R_SHIFT 26
++#define MLXBF_GIGE_MDIO_CORE_R_MASK GENMASK(31, 26)
++#define MLXBF_GIGE_MDIO_CORE_OD_SHIFT 0
++#define MLXBF_GIGE_MDIO_CORE_OD_MASK GENMASK(3, 0)
++
+ /* Support clause 22 */
+ #define MLXBF_GIGE_MDIO_CL22_ST1 0x1
+ #define MLXBF_GIGE_MDIO_CL22_WRITE 0x1
+@@ -50,27 +63,76 @@
+ #define MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK GENMASK(23, 16)
+ #define MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK GENMASK(31, 24)
+
++#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \
++ FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \
++ FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \
++ FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \
++ FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13))
++
++#define MLXBF_GIGE_BF2_COREPLL_ADDR 0x02800c30
++#define MLXBF_GIGE_BF2_COREPLL_SIZE 0x0000000c
++
++static struct resource corepll_params[] = {
++ [MLXBF_GIGE_VERSION_BF2] = {
++ .start = MLXBF_GIGE_BF2_COREPLL_ADDR,
++ .end = MLXBF_GIGE_BF2_COREPLL_ADDR + MLXBF_GIGE_BF2_COREPLL_SIZE - 1,
++ .name = "COREPLL_RES"
++ },
++};
++
++/* Returns core clock i1clk in Hz */
++static u64 calculate_i1clk(struct mlxbf_gige *priv)
++{
++ u8 core_od, core_r;
++ u64 freq_output;
++ u32 reg1, reg2;
++ u32 core_f;
++
++ reg1 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG1);
++ reg2 = readl(priv->clk_io + MLXBF_GIGE_MDIO_PLL_I1CLK_REG2);
++
++ core_f = (reg1 & MLXBF_GIGE_MDIO_CORE_F_MASK) >>
++ MLXBF_GIGE_MDIO_CORE_F_SHIFT;
++ core_r = (reg1 & MLXBF_GIGE_MDIO_CORE_R_MASK) >>
++ MLXBF_GIGE_MDIO_CORE_R_SHIFT;
++ core_od = (reg2 & MLXBF_GIGE_MDIO_CORE_OD_MASK) >>
++ MLXBF_GIGE_MDIO_CORE_OD_SHIFT;
++
++ /* Compute PLL output frequency as follow:
++ *
++ * CORE_F / 16384
++ * freq_output = freq_reference * ----------------------------
++ * (CORE_R + 1) * (CORE_OD + 1)
++ */
++ freq_output = div_u64((MLXBF_GIGE_MDIO_FREQ_REFERENCE * core_f),
++ MLXBF_GIGE_MDIO_COREPLL_CONST);
++ freq_output = div_u64(freq_output, (core_r + 1) * (core_od + 1));
++
++ return freq_output;
++}
++
+ /* Formula for encoding the MDIO period. The encoded value is
+ * passed to the MDIO config register.
+ *
+- * mdc_clk = 2*(val + 1)*i1clk
++ * mdc_clk = 2*(val + 1)*(core clock in sec)
+ *
+- * 400 ns = 2*(val + 1)*(((1/430)*1000) ns)
++ * i1clk is in Hz:
++ * 400 ns = 2*(val + 1)*(1/i1clk)
+ *
+- * val = (((400 * 430 / 1000) / 2) - 1)
++ * val = (((400/10^9) / (1/i1clk) / 2) - 1)
++ * val = (400/2 * i1clk)/10^9 - 1
+ */
+-#define MLXBF_GIGE_I1CLK_MHZ 430
+-#define MLXBF_GIGE_MDC_CLK_NS 400
++static u8 mdio_period_map(struct mlxbf_gige *priv)
++{
++ u8 mdio_period;
++ u64 i1clk;
+
+-#define MLXBF_GIGE_MDIO_PERIOD (((MLXBF_GIGE_MDC_CLK_NS * MLXBF_GIGE_I1CLK_MHZ / 1000) / 2) - 1)
++ i1clk = calculate_i1clk(priv);
+
+-#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \
+- FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \
+- FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \
+- FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, \
+- MLXBF_GIGE_MDIO_PERIOD) | \
+- FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \
+- FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13))
++ mdio_period = div_u64((MLXBF_GIGE_MDC_CLK_NS >> 1) * i1clk, 1000000000) - 1;
++
++ return mdio_period;
++}
+
+ static u32 mlxbf_gige_mdio_create_cmd(u16 data, int phy_add,
+ int phy_reg, u32 opcode)
+@@ -116,6 +178,9 @@ static int mlxbf_gige_mdio_read(struct mii_bus *bus, int phy_add, int phy_reg)
+ /* Only return ad bits of the gw register */
+ ret &= MLXBF_GIGE_MDIO_GW_AD_MASK;
+
++ /* The MDIO lock is set on read. To release it, clear gw register */
++ writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
++
+ return ret;
+ }
+
+@@ -123,9 +188,9 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add,
+ int phy_reg, u16 val)
+ {
+ struct mlxbf_gige *priv = bus->priv;
++ u32 temp;
+ u32 cmd;
+ int ret;
+- u32 temp;
+
+ if (phy_reg & MII_ADDR_C45)
+ return -EOPNOTSUPP;
+@@ -139,21 +204,50 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add,
+ ret = readl_poll_timeout_atomic(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET,
+ temp, !(temp & MLXBF_GIGE_MDIO_GW_BUSY_MASK), 100, 1000000);
+
++ /* The MDIO lock is set on read. To release it, clear gw register */
++ writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
++
+ return ret;
+ }
+
++static void mlxbf_gige_mdio_cfg(struct mlxbf_gige *priv)
++{
++ u8 mdio_period;
++ u32 val;
++
++ mdio_period = mdio_period_map(priv);
++
++ val = MLXBF_GIGE_MDIO_CFG_VAL;
++ val |= FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, mdio_period);
++ writel(val, priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET);
++}
++
+ int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
+ {
+ struct device *dev = &pdev->dev;
++ struct resource *res;
+ int ret;
+
+ priv->mdio_io = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MDIO9);
+ if (IS_ERR(priv->mdio_io))
+ return PTR_ERR(priv->mdio_io);
+
+- /* Configure mdio parameters */
+- writel(MLXBF_GIGE_MDIO_CFG_VAL,
+- priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET);
++ /* clk resource shared with other drivers so cannot use
++ * devm_platform_ioremap_resource
++ */
++ res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_CLK);
++ if (!res) {
++ /* For backward compatibility with older ACPI tables, also keep
++ * CLK resource internal to the driver.
++ */
++ res = &corepll_params[MLXBF_GIGE_VERSION_BF2];
++ }
++
++ priv->clk_io = devm_ioremap(dev, res->start, resource_size(res));
++ if (!priv->clk_io)
++ return -ENOMEM;
++
++ mlxbf_gige_mdio_cfg(priv);
+
+ priv->mdiobus = devm_mdiobus_alloc(dev);
+ if (!priv->mdiobus) {
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+index 5fb33c9294bf9..7be3a793984d5 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
+@@ -8,6 +8,8 @@
+ #ifndef __MLXBF_GIGE_REGS_H__
+ #define __MLXBF_GIGE_REGS_H__
+
++#define MLXBF_GIGE_VERSION 0x0000
++#define MLXBF_GIGE_VERSION_BF2 0x0
+ #define MLXBF_GIGE_STATUS 0x0010
+ #define MLXBF_GIGE_STATUS_READY BIT(0)
+ #define MLXBF_GIGE_INT_STATUS 0x0028
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+index afa3b92a6905f..0d5a41a2ae010 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+@@ -245,12 +245,6 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
+
+ skb = priv->rx_skb[rx_pi_rem];
+
+- skb_put(skb, datalen);
+-
+- skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */
+-
+- skb->protocol = eth_type_trans(skb, netdev);
+-
+ /* Alloc another RX SKB for this same index */
+ rx_skb = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ,
+ &rx_buf_dma, DMA_FROM_DEVICE);
+@@ -259,6 +253,13 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
+ priv->rx_skb[rx_pi_rem] = rx_skb;
+ dma_unmap_single(priv->dev, *rx_wqe_addr,
+ MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
++
++ skb_put(skb, datalen);
++
++ skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */
++
++ skb->protocol = eth_type_trans(skb, netdev);
++
+ *rx_wqe_addr = rx_buf_dma;
+ } else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) {
+ priv->stats.rx_mac_errors++;
+diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
+index 017d68f1e1232..972c571b41587 100644
+--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
++++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
+@@ -31,6 +31,8 @@ mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file,
+
+ if (tlv->type == MLXFW_MFA2_TLV_MULTI_PART) {
+ multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, tlv);
++ if (!multi)
++ return NULL;
+ tlv_len = NLA_ALIGN(tlv_len + be16_to_cpu(multi->total_len));
+ }
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+index 392ce3cb27f72..51b260d54237e 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+@@ -935,6 +935,18 @@ static inline int mlxsw_cmd_sw2hw_rdq(struct mlxsw_core *mlxsw_core,
+ */
+ MLXSW_ITEM32(cmd_mbox, sw2hw_dq, cq, 0x00, 24, 8);
+
++enum mlxsw_cmd_mbox_sw2hw_dq_sdq_lp {
++ MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE,
++ MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_IGNORE_WQE,
++};
++
++/* cmd_mbox_sw2hw_dq_sdq_lp
++ * SDQ local Processing
++ * 0: local processing by wqe.lp
++ * 1: local processing (ignoring wqe.lp)
++ */
++MLXSW_ITEM32(cmd_mbox, sw2hw_dq, sdq_lp, 0x00, 23, 1);
++
+ /* cmd_mbox_sw2hw_dq_sdq_tclass
+ * SDQ: CPU Egress TClass
+ * RDQ: Reserved
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+index 939b692ffc335..61d2f621d65fc 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+@@ -47,6 +47,7 @@
+ #define MLXSW_I2C_MBOX_SIZE_BITS 12
+ #define MLXSW_I2C_ADDR_BUF_SIZE 4
+ #define MLXSW_I2C_BLK_DEF 32
++#define MLXSW_I2C_BLK_MAX 100
+ #define MLXSW_I2C_RETRY 5
+ #define MLXSW_I2C_TIMEOUT_MSECS 5000
+ #define MLXSW_I2C_MAX_DATA_SIZE 256
+@@ -428,7 +429,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
+ } else {
+ /* No input mailbox is case of initialization query command. */
+ reg_size = MLXSW_I2C_MAX_DATA_SIZE;
+- num = reg_size / mlxsw_i2c->block_size;
++ num = DIV_ROUND_UP(reg_size, mlxsw_i2c->block_size);
+
+ if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
+ dev_err(&client->dev, "Could not acquire lock");
+@@ -576,7 +577,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
+ return -EOPNOTSUPP;
+ }
+
+- mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF,
++ mlxsw_i2c->block_size = min_t(u16, MLXSW_I2C_BLK_MAX,
+ min_t(u16, quirks->max_read_len,
+ quirks->max_write_len));
+ } else {
+@@ -650,6 +651,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
+ return 0;
+
+ errout:
++ mutex_destroy(&mlxsw_i2c->cmd.lock);
+ i2c_set_clientdata(client, NULL);
+
+ return err;
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
+index fcace73eae40f..d9f9cbba62465 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
+@@ -285,6 +285,7 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ struct mlxsw_pci_queue *q)
+ {
+ int tclass;
++ int lp;
+ int i;
+ int err;
+
+@@ -292,9 +293,12 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ q->consumer_counter = 0;
+ tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC :
+ MLXSW_PCI_SDQ_CTL_TC;
++ lp = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_IGNORE_WQE :
++ MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE;
+
+ /* Set CQ of same number of this SDQ. */
+ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num);
++ mlxsw_cmd_mbox_sw2hw_dq_sdq_lp_set(mbox, lp);
+ mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass);
+ mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
+ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+@@ -1678,7 +1682,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
+
+ wqe = elem_info->elem;
+ mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */
+- mlxsw_pci_wqe_lp_set(wqe, !!tx_info->is_emad);
++ mlxsw_pci_wqe_lp_set(wqe, 0);
+ mlxsw_pci_wqe_type_set(wqe, MLXSW_PCI_WQE_TYPE_ETHERNET);
+
+ err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
+@@ -1973,6 +1977,7 @@ int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
+ {
+ pci_driver->probe = mlxsw_pci_probe;
+ pci_driver->remove = mlxsw_pci_remove;
++ pci_driver->shutdown = mlxsw_pci_remove;
+ return pci_register_driver(pci_driver);
+ }
+ EXPORT_SYMBOL(mlxsw_pci_driver_register);
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+index 7b531228d6c0f..25e9f47db2a62 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+@@ -26,7 +26,7 @@
+ #define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000
+
+ #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000
+-#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200
++#define MLXSW_PCI_SW_RESET_WAIT_MSECS 400
+ #define MLXSW_PCI_FW_READY 0xA1844
+ #define MLXSW_PCI_FW_READY_MASK 0xFFFF
+ #define MLXSW_PCI_FW_READY_MAGIC 0x5E
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+index 250c5a24264dc..35908a8c640a1 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -1730,9 +1730,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+
+ cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+ cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw);
+- mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
+ mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
+ unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
++ mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
+ mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, true, true);
+ mlxsw_sp->ports[local_port] = NULL;
+ mlxsw_sp_port_vlan_flush(mlxsw_sp_port, true);
+@@ -2131,7 +2131,7 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
+ max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+ local_port = mlxsw_reg_pude_local_port_get(pude_pl);
+
+- if (WARN_ON_ONCE(local_port >= max_ports))
++ if (WARN_ON_ONCE(!local_port || local_port >= max_ports))
+ return;
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
+index a68d931090dd5..15c8d4de83508 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
+@@ -8,8 +8,8 @@
+ #include "spectrum.h"
+
+ enum mlxsw_sp_counter_sub_pool_id {
+- MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+ MLXSW_SP_COUNTER_SUB_POOL_RIF,
++ MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+ };
+
+ int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+index 5f92b16913605..aff6d4f35cd2f 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+@@ -168,8 +168,6 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
+ static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev,
+ struct dcb_app *app)
+ {
+- int prio;
+-
+ if (app->priority >= IEEE_8021QAZ_MAX_TCS) {
+ netdev_err(dev, "APP entry with priority value %u is invalid\n",
+ app->priority);
+@@ -183,17 +181,6 @@ static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev,
+ app->protocol);
+ return -EINVAL;
+ }
+-
+- /* Warn about any DSCP APP entries with the same PID. */
+- prio = fls(dcb_ieee_getapp_mask(dev, app));
+- if (prio--) {
+- if (prio < app->priority)
+- netdev_warn(dev, "Choosing priority %d for DSCP %d in favor of previously-active value of %d\n",
+- app->priority, app->protocol, prio);
+- else if (prio > app->priority)
+- netdev_warn(dev, "Ignoring new priority %d for DSCP %d in favor of current value of %d\n",
+- app->priority, app->protocol, prio);
+- }
+ break;
+
+ case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+index 19bb3ca0515e2..55de90d5ae591 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -4293,6 +4293,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
+ return 0;
+
+ err_nexthop_neigh_init:
++ list_del(&nh->router_list_node);
++ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
+ return err;
+ }
+@@ -5194,7 +5196,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+ {
+ const struct fib_nh *nh = fib_info_nh(fi, 0);
+
+- return nh->fib_nh_scope == RT_SCOPE_LINK ||
++ return nh->fib_nh_gw_family ||
+ mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
+ }
+
+@@ -6578,6 +6580,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
+ const struct fib6_info *rt)
+ {
+ struct net_device *dev = rt->fib6_nh->fib_nh_dev;
++ int err;
+
+ nh->nhgi = nh_grp->nhgi;
+ nh->nh_weight = rt->fib6_nh->fib_nh_weight;
+@@ -6593,7 +6596,16 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
+ return 0;
+ nh->ifindex = dev->ifindex;
+
+- return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
++ err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
++ if (err)
++ goto err_nexthop_type_init;
++
++ return 0;
++
++err_nexthop_type_init:
++ list_del(&nh->router_list_node);
++ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
++ return err;
+ }
+
+ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
+@@ -9576,7 +9588,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
+ unsigned long *fields = config->fields;
+ u32 hash_fields;
+
+- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
++ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
+ case 0:
+ mlxsw_sp_mp4_hash_outer_addr(config);
+ break;
+@@ -9594,7 +9606,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
+ mlxsw_sp_mp_hash_inner_l3(config);
+ break;
+ case 3:
+- hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
++ hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
+ /* Outer */
+ MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
+ MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
+@@ -9775,13 +9787,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
+ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+ {
+ struct net *net = mlxsw_sp_net(mlxsw_sp);
+- bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
+ char rgcr_pl[MLXSW_REG_RGCR_LEN];
+ u64 max_rifs;
++ bool usp;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
+ return -EIO;
+ max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
++ usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
+
+ mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
+ mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+index 26d01adbedad3..ce6f6590a7777 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+@@ -864,7 +864,7 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
+ .trap = MLXSW_SP_TRAP_CONTROL(LLDP, LLDP, TRAP),
+ .listeners_arr = {
+ MLXSW_RXL(mlxsw_sp_rx_ptp_listener, LLDP, TRAP_TO_CPU,
+- false, SP_LLDP, DISCARD),
++ true, SP_LLDP, DISCARD),
+ },
+ },
+ {
+diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig
+index 93df3049cdc05..1b632cdd76309 100644
+--- a/drivers/net/ethernet/micrel/Kconfig
++++ b/drivers/net/ethernet/micrel/Kconfig
+@@ -39,6 +39,7 @@ config KS8851
+ config KS8851_MLL
+ tristate "Micrel KS8851 MLL"
+ depends on HAS_IOMEM
++ depends on PTP_1588_CLOCK_OPTIONAL
+ select MII
+ select CRC32
+ select EEPROM_93CX6
+diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
+index a6db1a8156e1a..2c80dba2606cd 100644
+--- a/drivers/net/ethernet/micrel/ks8851_common.c
++++ b/drivers/net/ethernet/micrel/ks8851_common.c
+@@ -195,7 +195,7 @@ static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
+ struct net_device *dev = ks->netdev;
+ int ret;
+
+- ret = of_get_mac_address(np, dev->dev_addr);
++ ret = of_get_ethdev_address(np, dev);
+ if (!ret) {
+ ks8851_write_mac_addr(dev);
+ return;
+diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
+index 2e8fcce50f9d1..c6f517c07bb9a 100644
+--- a/drivers/net/ethernet/micrel/ks8851_par.c
++++ b/drivers/net/ethernet/micrel/ks8851_par.c
+@@ -321,6 +321,8 @@ static int ks8851_probe_par(struct platform_device *pdev)
+ return ret;
+
+ netdev->irq = platform_get_irq(pdev, 0);
++ if (netdev->irq < 0)
++ return netdev->irq;
+
+ return ks8851_probe_common(netdev, dev, msg_enable);
+ }
+diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
+index a0ee155f9f516..f56bcd3e36d21 100644
+--- a/drivers/net/ethernet/micrel/ksz884x.c
++++ b/drivers/net/ethernet/micrel/ksz884x.c
+@@ -6848,7 +6848,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id)
+ char banner[sizeof(version)];
+ struct ksz_switch *sw = NULL;
+
+- result = pci_enable_device(pdev);
++ result = pcim_enable_device(pdev);
+ if (result)
+ return result;
+
+diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
+index 09cdc2f2e7ffb..bf77e8adffbf3 100644
+--- a/drivers/net/ethernet/microchip/enc28j60.c
++++ b/drivers/net/ethernet/microchip/enc28j60.c
+@@ -517,7 +517,7 @@ static int enc28j60_set_mac_address(struct net_device *dev, void *addr)
+ if (!is_valid_ether_addr(address->sa_data))
+ return -EADDRNOTAVAIL;
+
+- ether_addr_copy(dev->dev_addr, address->sa_data);
++ eth_hw_addr_set(dev, address->sa_data);
+ return enc28j60_set_hw_macaddr(dev);
+ }
+
+@@ -1573,7 +1573,7 @@ static int enc28j60_probe(struct spi_device *spi)
+ }
+
+ if (device_get_mac_address(&spi->dev, macaddr, sizeof(macaddr)))
+- ether_addr_copy(dev->dev_addr, macaddr);
++ eth_hw_addr_set(dev, macaddr);
+ else
+ eth_hw_addr_random(dev);
+ enc28j60_set_hw_macaddr(dev);
+diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
+index 81a8ccca7e5e0..5693784eec5bc 100644
+--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
++++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
+@@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg,
+ goto err_out;
+
+ usleep_range(26, 100);
+- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) &&
++ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) &&
+ (mistat & BUSY))
+ cpu_relax();
+
+@@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg,
+ goto err_out;
+
+ usleep_range(26, 100);
+- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) &&
++ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) &&
+ (mistat & BUSY))
+ cpu_relax();
+
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
+index 4d5a5d6595b3b..a3392c74372a8 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.c
++++ b/drivers/net/ethernet/microchip/lan743x_main.c
+@@ -83,6 +83,18 @@ static int lan743x_csr_light_reset(struct lan743x_adapter *adapter)
+ !(data & HW_CFG_LRST_), 100000, 10000000);
+ }
+
++static int lan743x_csr_wait_for_bit_atomic(struct lan743x_adapter *adapter,
++ int offset, u32 bit_mask,
++ int target_value, int udelay_min,
++ int udelay_max, int count)
++{
++ u32 data;
++
++ return readx_poll_timeout_atomic(LAN743X_CSR_READ_OP, offset, data,
++ target_value == !!(data & bit_mask),
++ udelay_max, udelay_min * count);
++}
++
+ static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter,
+ int offset, u32 bit_mask,
+ int target_value, int usleep_min,
+@@ -671,8 +683,8 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter,
+ u32 dp_sel;
+ int i;
+
+- if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+- 1, 40, 100, 100))
++ if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, DP_SEL_DPRDY_,
++ 1, 40, 100, 100))
+ return -EIO;
+ dp_sel = lan743x_csr_read(adapter, DP_SEL);
+ dp_sel &= ~DP_SEL_MASK_;
+@@ -683,8 +695,9 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter,
+ lan743x_csr_write(adapter, DP_ADDR, addr + i);
+ lan743x_csr_write(adapter, DP_DATA_0, buf[i]);
+ lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_);
+- if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+- 1, 40, 100, 100))
++ if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL,
++ DP_SEL_DPRDY_,
++ 1, 40, 100, 100))
+ return -EIO;
+ }
+
+@@ -816,7 +829,7 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter)
+ eth_random_addr(adapter->mac_address);
+ }
+ lan743x_mac_set_address(adapter, adapter->mac_address);
+- ether_addr_copy(netdev->dev_addr, adapter->mac_address);
++ eth_hw_addr_set(netdev, adapter->mac_address);
+
+ return 0;
+ }
+@@ -914,8 +927,7 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter)
+ }
+
+ static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
+- u8 duplex, u16 local_adv,
+- u16 remote_adv)
++ u16 local_adv, u16 remote_adv)
+ {
+ struct lan743x_phy *phy = &adapter->phy;
+ u8 cap;
+@@ -943,7 +955,6 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
+
+ phy_print_status(phydev);
+ if (phydev->state == PHY_RUNNING) {
+- struct ethtool_link_ksettings ksettings;
+ int remote_advertisement = 0;
+ int local_advertisement = 0;
+
+@@ -980,18 +991,14 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
+ }
+ lan743x_csr_write(adapter, MAC_CR, data);
+
+- memset(&ksettings, 0, sizeof(ksettings));
+- phy_ethtool_get_link_ksettings(netdev, &ksettings);
+ local_advertisement =
+ linkmode_adv_to_mii_adv_t(phydev->advertising);
+ remote_advertisement =
+ linkmode_adv_to_mii_adv_t(phydev->lp_advertising);
+
+- lan743x_phy_update_flowcontrol(adapter,
+- ksettings.base.duplex,
+- local_advertisement,
++ lan743x_phy_update_flowcontrol(adapter, local_advertisement,
+ remote_advertisement);
+- lan743x_ptp_update_latency(adapter, ksettings.base.speed);
++ lan743x_ptp_update_latency(adapter, phydev->speed);
+ }
+ }
+
+@@ -2670,7 +2677,7 @@ static int lan743x_netdev_set_mac_address(struct net_device *netdev,
+ ret = eth_prepare_mac_addr_change(netdev, sock_addr);
+ if (ret)
+ return ret;
+- ether_addr_copy(netdev->dev_addr, sock_addr->sa_data);
++ eth_hw_addr_set(netdev, sock_addr->sa_data);
+ lan743x_mac_set_address(adapter, sock_addr->sa_data);
+ lan743x_rfe_update_mac_address(adapter);
+ return 0;
+diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig
+index 7bdbb2d09a148..cc5e48e1bb4c3 100644
+--- a/drivers/net/ethernet/microchip/sparx5/Kconfig
++++ b/drivers/net/ethernet/microchip/sparx5/Kconfig
+@@ -4,6 +4,8 @@ config SPARX5_SWITCH
+ depends on HAS_IOMEM
+ depends on OF
+ depends on ARCH_SPARX5 || COMPILE_TEST
++ depends on PTP_1588_CLOCK_OPTIONAL
++ depends on BRIDGE || BRIDGE=n
+ select PHYLINK
+ select PHY_SPARX5_SERDES
+ select RESET_CONTROLLER
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
+index 59783fc46a7b9..060274caa4d0e 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
+@@ -1103,7 +1103,7 @@ void sparx5_get_stats64(struct net_device *ndev,
+ stats->tx_carrier_errors = portstats[spx5_stats_tx_csense_cnt];
+ stats->tx_window_errors = portstats[spx5_stats_tx_late_coll_cnt];
+ stats->rx_dropped = portstats[spx5_stats_ana_ac_port_stat_lsb_cnt];
+- for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++stats)
++ for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx)
+ stats->rx_dropped += portstats[spx5_stats_green_p0_rx_port_drop
+ + idx];
+ stats->tx_dropped = portstats[spx5_stats_tx_local_drop];
+@@ -1219,6 +1219,9 @@ int sparx_stats_init(struct sparx5 *sparx5)
+ snprintf(queue_name, sizeof(queue_name), "%s-stats",
+ dev_name(sparx5->dev));
+ sparx5->stats_queue = create_singlethread_workqueue(queue_name);
++ if (!sparx5->stats_queue)
++ return -ENOMEM;
++
+ INIT_DELAYED_WORK(&sparx5->stats_work, sparx5_check_stats_work);
+ queue_delayed_work(sparx5->stats_queue, &sparx5->stats_work,
+ SPX5_STATS_CHECK_DELAY);
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
+index 7436f62fa1525..174ad95e746a3 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
+@@ -420,6 +420,8 @@ static int sparx5_fdma_tx_alloc(struct sparx5 *sparx5)
+ db_hw->dataptr = phys;
+ db_hw->status = 0;
+ db = devm_kzalloc(sparx5->dev, sizeof(*db), GFP_KERNEL);
++ if (!db)
++ return -ENOMEM;
+ db->cpu_addr = cpu_addr;
+ list_add_tail(&db->list, &tx->db_list);
+ }
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+index 5030dfca38798..174d89ee63749 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+@@ -629,6 +629,9 @@ static int sparx5_start(struct sparx5 *sparx5)
+ snprintf(queue_name, sizeof(queue_name), "%s-mact",
+ dev_name(sparx5->dev));
+ sparx5->mact_queue = create_singlethread_workqueue(queue_name);
++ if (!sparx5->mact_queue)
++ return -ENOMEM;
++
+ INIT_DELAYED_WORK(&sparx5->mact_work, sparx5_mact_pull_work);
+ queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work,
+ SPX5_MACT_PULL_DELAY);
+@@ -776,7 +779,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
+ if (err)
+ goto cleanup_config;
+
+- if (!of_get_mac_address(np, sparx5->base_mac)) {
++ if (of_get_mac_address(np, sparx5->base_mac)) {
+ dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n");
+ eth_random_addr(sparx5->base_mac);
+ sparx5->base_mac[5] = 0;
+@@ -826,6 +829,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
+
+ cleanup_ports:
+ sparx5_cleanup_ports(sparx5);
++ if (sparx5->mact_queue)
++ destroy_workqueue(sparx5->mact_queue);
+ cleanup_config:
+ kfree(configs);
+ cleanup_pnode:
+@@ -849,6 +854,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
+ sparx5_cleanup_ports(sparx5);
+ /* Unregister netdevs */
+ sparx5_unregister_notifier_blocks(sparx5);
++ destroy_workqueue(sparx5->mact_queue);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+index cb68eaaac8811..a84038db8e1ad 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+@@ -83,7 +83,7 @@ static int sparx5_port_open(struct net_device *ndev)
+ err = phylink_of_phy_connect(port->phylink, port->of_node, 0);
+ if (err) {
+ netdev_err(ndev, "Could not attach to PHY\n");
+- return err;
++ goto err_connect;
+ }
+
+ phylink_start(port->phylink);
+@@ -95,10 +95,20 @@ static int sparx5_port_open(struct net_device *ndev)
+ err = sparx5_serdes_set(port->sparx5, port, &port->conf);
+ else
+ err = phy_power_on(port->serdes);
+- if (err)
++ if (err) {
+ netdev_err(ndev, "%s failed\n", __func__);
++ goto out_power;
++ }
+ }
+
++ return 0;
++
++out_power:
++ phylink_stop(port->phylink);
++ phylink_disconnect_phy(port->phylink);
++err_connect:
++ sparx5_port_enable(port, false);
++
+ return err;
+ }
+
+@@ -162,7 +172,7 @@ static int sparx5_set_mac_address(struct net_device *dev, void *p)
+ sparx5_mact_learn(sparx5, PGID_CPU, addr->sa_data, port->pvid);
+
+ /* Record the address */
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+index dc7e5ea6ec158..c460168131c26 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+@@ -107,6 +107,8 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap)
+ /* This assumes STATUS_WORD_POS == 1, Status
+ * just after last data
+ */
++ if (!byte_swap)
++ val = ntohl((__force __be32)val);
+ byte_cnt -= (4 - XTR_VALID_BYTES(val));
+ eof_flag = true;
+ break;
+@@ -145,9 +147,9 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap)
+ skb_put(skb, byte_cnt - ETH_FCS_LEN);
+ eth_skb_pad(skb);
+ skb->protocol = eth_type_trans(skb, netdev);
+- netif_rx(skb);
+ netdev->stats.rx_bytes += skb->len;
+ netdev->stats.rx_packets++;
++ netif_rx(skb);
+ }
+
+ static int sparx5_inject(struct sparx5 *sparx5,
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
+index 4ce490a25f332..8e56ffa1c4f7a 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
+@@ -58,16 +58,6 @@ int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid,
+ struct sparx5 *sparx5 = port->sparx5;
+ int ret;
+
+- /* Make the port a member of the VLAN */
+- set_bit(port->portno, sparx5->vlan_mask[vid]);
+- ret = sparx5_vlant_set_mask(sparx5, vid);
+- if (ret)
+- return ret;
+-
+- /* Default ingress vlan classification */
+- if (pvid)
+- port->pvid = vid;
+-
+ /* Untagged egress vlan classification */
+ if (untagged && port->vid != vid) {
+ if (port->vid) {
+@@ -79,6 +69,16 @@ int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid,
+ port->vid = vid;
+ }
+
++ /* Make the port a member of the VLAN */
++ set_bit(port->portno, sparx5->vlan_mask[vid]);
++ ret = sparx5_vlant_set_mask(sparx5, vid);
++ if (ret)
++ return ret;
++
++ /* Default ingress vlan classification */
++ if (pvid)
++ port->pvid = vid;
++
+ sparx5_vlan_port_apply(sparx5, port);
+
+ return 0;
+diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
+index 41ecd156e95f5..f74f416a296f6 100644
+--- a/drivers/net/ethernet/microsoft/mana/gdma.h
++++ b/drivers/net/ethernet/microsoft/mana/gdma.h
+@@ -324,9 +324,12 @@ struct gdma_queue_spec {
+ };
+ };
+
++#define MANA_IRQ_NAME_SZ 32
++
+ struct gdma_irq_context {
+ void (*handler)(void *arg);
+ void *arg;
++ char name[MANA_IRQ_NAME_SZ];
+ };
+
+ struct gdma_context {
+@@ -488,7 +491,14 @@ enum {
+
+ #define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
+
+-#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT
++/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed,
++ * so the driver is able to reliably support features like busy_poll.
++ */
++#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
++
++#define GDMA_DRV_CAP_FLAGS1 \
++ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
++ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX)
+
+ #define GDMA_DRV_CAP_FLAGS2 0
+
+diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
+index cee75b561f59d..0fb42193643dc 100644
+--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
+@@ -368,6 +368,11 @@ static void mana_gd_process_eq_events(void *arg)
+ break;
+ }
+
++ /* Per GDMA spec, rmb is necessary after checking owner_bits, before
++ * reading eqe.
++ */
++ rmb();
++
+ mana_gd_process_eqe(eq);
+
+ eq->head++;
+@@ -1096,6 +1101,11 @@ static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp)
+ if (WARN_ON_ONCE(owner_bits != new_bits))
+ return -1;
+
++ /* Per GDMA spec, rmb is necessary after checking owner_bits, before
++ * reading completion info
++ */
++ rmb();
++
+ comp->wq_num = cqe->cqe_info.wq_num;
+ comp->is_sq = cqe->cqe_info.is_sq;
+ memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE);
+@@ -1185,13 +1195,20 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
+ gic->handler = NULL;
+ gic->arg = NULL;
+
++ if (!i)
++ snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s",
++ pci_name(pdev));
++ else
++ snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s",
++ i - 1, pci_name(pdev));
++
+ irq = pci_irq_vector(pdev, i);
+ if (irq < 0) {
+ err = irq;
+ goto free_irq;
+ }
+
+- err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic);
++ err = request_irq(irq, mana_gd_intr, 0, gic->name, gic);
+ if (err)
+ goto free_irq;
+ }
+diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
+index 030ae89f3a337..6224b7c21e0af 100644
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -980,8 +980,10 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
+ break;
+
+ case CQE_RX_TRUNCATED:
+- netdev_err(ndev, "Dropped a truncated packet\n");
+- return;
++ ++ndev->stats.rx_dropped;
++ rxbuf_oob = &rxq->rx_oobs[rxq->buf_index];
++ netdev_warn_once(ndev, "Dropped a truncated packet\n");
++ goto drop;
+
+ case CQE_RX_COALESCED_4:
+ netdev_err(ndev, "RX coalescing is unsupported\n");
+@@ -1043,6 +1045,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
+
+ mana_rx_skb(old_buf, oob, rxq);
+
++drop:
+ mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
+
+ mana_post_pkt_rxq(rxq);
+@@ -1068,10 +1071,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
+ }
+ }
+
+-static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
++static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
+ {
+ struct mana_cq *cq = context;
+ u8 arm_bit;
++ int w;
+
+ WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
+
+@@ -1080,26 +1084,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
+ else
+ mana_poll_tx_cq(cq);
+
+- if (cq->work_done < cq->budget &&
+- napi_complete_done(&cq->napi, cq->work_done)) {
++ w = cq->work_done;
++
++ if (w < cq->budget &&
++ napi_complete_done(&cq->napi, w)) {
+ arm_bit = SET_ARM_BIT;
+ } else {
+ arm_bit = 0;
+ }
+
+ mana_gd_ring_cq(gdma_queue, arm_bit);
++
++ return w;
+ }
+
+ static int mana_poll(struct napi_struct *napi, int budget)
+ {
+ struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
++ int w;
+
+ cq->work_done = 0;
+ cq->budget = budget;
+
+- mana_cq_handler(cq, cq->gdma_cq);
++ w = mana_cq_handler(cq, cq->gdma_cq);
+
+- return min(cq->work_done, budget);
++ return min(w, budget);
+ }
+
+ static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
+@@ -1610,7 +1619,7 @@ static int mana_init_port(struct net_device *ndev)
+ if (apc->num_queues > apc->max_queues)
+ apc->num_queues = apc->max_queues;
+
+- ether_addr_copy(ndev->dev_addr, apc->mac_addr);
++ eth_hw_addr_set(ndev, apc->mac_addr);
+
+ return 0;
+
+diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
+index 49def6934cad1..fa4c596e6ec6f 100644
+--- a/drivers/net/ethernet/moxa/moxart_ether.c
++++ b/drivers/net/ethernet/moxa/moxart_ether.c
+@@ -74,11 +74,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr)
+ static void moxart_mac_free_memory(struct net_device *ndev)
+ {
+ struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+- int i;
+-
+- for (i = 0; i < RX_DESC_NUM; i++)
+- dma_unmap_single(&ndev->dev, priv->rx_mapping[i],
+- priv->rx_buf_size, DMA_FROM_DEVICE);
+
+ if (priv->tx_desc_base)
+ dma_free_coherent(&priv->pdev->dev,
+@@ -147,11 +142,11 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
+ desc + RX_REG_OFFSET_DESC1);
+
+ priv->rx_buf[i] = priv->rx_buf_base + priv->rx_buf_size * i;
+- priv->rx_mapping[i] = dma_map_single(&ndev->dev,
++ priv->rx_mapping[i] = dma_map_single(&priv->pdev->dev,
+ priv->rx_buf[i],
+ priv->rx_buf_size,
+ DMA_FROM_DEVICE);
+- if (dma_mapping_error(&ndev->dev, priv->rx_mapping[i]))
++ if (dma_mapping_error(&priv->pdev->dev, priv->rx_mapping[i]))
+ netdev_err(ndev, "DMA mapping error\n");
+
+ moxart_desc_write(priv->rx_mapping[i],
+@@ -193,6 +188,7 @@ static int moxart_mac_open(struct net_device *ndev)
+ static int moxart_mac_stop(struct net_device *ndev)
+ {
+ struct moxart_mac_priv_t *priv = netdev_priv(ndev);
++ int i;
+
+ napi_disable(&priv->napi);
+
+@@ -204,6 +200,11 @@ static int moxart_mac_stop(struct net_device *ndev)
+ /* disable all functions */
+ writel(0, priv->base + REG_MAC_CTRL);
+
++ /* unmap areas mapped in moxart_mac_setup_desc_ring() */
++ for (i = 0; i < RX_DESC_NUM; i++)
++ dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
++ priv->rx_buf_size, DMA_FROM_DEVICE);
++
+ return 0;
+ }
+
+@@ -240,7 +241,7 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
+ if (len > RX_BUF_SIZE)
+ len = RX_BUF_SIZE;
+
+- dma_sync_single_for_cpu(&ndev->dev,
++ dma_sync_single_for_cpu(&priv->pdev->dev,
+ priv->rx_mapping[rx_head],
+ priv->rx_buf_size, DMA_FROM_DEVICE);
+ skb = netdev_alloc_skb_ip_align(ndev, len);
+@@ -294,7 +295,7 @@ static void moxart_tx_finished(struct net_device *ndev)
+ unsigned int tx_tail = priv->tx_tail;
+
+ while (tx_tail != tx_head) {
+- dma_unmap_single(&ndev->dev, priv->tx_mapping[tx_tail],
++ dma_unmap_single(&priv->pdev->dev, priv->tx_mapping[tx_tail],
+ priv->tx_len[tx_tail], DMA_TO_DEVICE);
+
+ ndev->stats.tx_packets++;
+@@ -358,9 +359,9 @@ static netdev_tx_t moxart_mac_start_xmit(struct sk_buff *skb,
+
+ len = skb->len > TX_BUF_SIZE ? TX_BUF_SIZE : skb->len;
+
+- priv->tx_mapping[tx_head] = dma_map_single(&ndev->dev, skb->data,
++ priv->tx_mapping[tx_head] = dma_map_single(&priv->pdev->dev, skb->data,
+ len, DMA_TO_DEVICE);
+- if (dma_mapping_error(&ndev->dev, priv->tx_mapping[tx_head])) {
++ if (dma_mapping_error(&priv->pdev->dev, priv->tx_mapping[tx_head])) {
+ netdev_err(ndev, "DMA mapping error\n");
+ goto out_unlock;
+ }
+@@ -379,7 +380,7 @@ static netdev_tx_t moxart_mac_start_xmit(struct sk_buff *skb,
+ len = ETH_ZLEN;
+ }
+
+- dma_sync_single_for_device(&ndev->dev, priv->tx_mapping[tx_head],
++ dma_sync_single_for_device(&priv->pdev->dev, priv->tx_mapping[tx_head],
+ priv->tx_buf_size, DMA_TO_DEVICE);
+
+ txdes1 = TX_DESC1_LTS | TX_DESC1_FTS | (len & TX_DESC1_BUF_SIZE_MASK);
+@@ -493,7 +494,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
+ priv->tx_buf_size = TX_BUF_SIZE;
+ priv->rx_buf_size = RX_BUF_SIZE;
+
+- priv->tx_desc_base = dma_alloc_coherent(&pdev->dev, TX_REG_DESC_SIZE *
++ priv->tx_desc_base = dma_alloc_coherent(p_dev, TX_REG_DESC_SIZE *
+ TX_DESC_NUM, &priv->tx_base,
+ GFP_DMA | GFP_KERNEL);
+ if (!priv->tx_desc_base) {
+@@ -501,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
+ goto init_fail;
+ }
+
+- priv->rx_desc_base = dma_alloc_coherent(&pdev->dev, RX_REG_DESC_SIZE *
++ priv->rx_desc_base = dma_alloc_coherent(p_dev, RX_REG_DESC_SIZE *
+ RX_DESC_NUM, &priv->rx_base,
+ GFP_DMA | GFP_KERNEL);
+ if (!priv->rx_desc_base) {
+diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
+index b6a73d151dec1..8dd8c7f425d2c 100644
+--- a/drivers/net/ethernet/mscc/Kconfig
++++ b/drivers/net/ethernet/mscc/Kconfig
+@@ -28,7 +28,7 @@ config MSCC_OCELOT_SWITCH
+ depends on BRIDGE || BRIDGE=n
+ depends on NET_SWITCHDEV
+ depends on HAS_IOMEM
+- depends on OF_NET
++ depends on OF
+ select MSCC_OCELOT_SWITCH_LIB
+ select GENERIC_PHY
+ help
+diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
+index a08e4f530c1c1..96b1e394a397f 100644
+--- a/drivers/net/ethernet/mscc/ocelot.c
++++ b/drivers/net/ethernet/mscc/ocelot.c
+@@ -555,7 +555,10 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
+
+ ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
+
+- ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
++ /* Don't attempt to send PAUSE frames on the NPI port, it's broken */
++ if (port != ocelot->npi)
++ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA,
++ tx_pause);
+
+ /* Undo the effects of ocelot_phylink_mac_link_down:
+ * enable MAC module
+@@ -1175,12 +1178,6 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr)
+ switch (cfg.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ break;
+- case HWTSTAMP_FILTER_ALL:
+- case HWTSTAMP_FILTER_SOME:
+- case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+- case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+- case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+- case HWTSTAMP_FILTER_NTP_ALL:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+@@ -1218,12 +1215,11 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
+ }
+ EXPORT_SYMBOL(ocelot_get_strings);
+
++/* Caller must hold &ocelot->stats_lock */
+ static void ocelot_update_stats(struct ocelot *ocelot)
+ {
+ int i, j;
+
+- mutex_lock(&ocelot->stats_lock);
+-
+ for (i = 0; i < ocelot->num_phys_ports; i++) {
+ /* Configure the port to read the stats from */
+ ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
+@@ -1242,8 +1238,6 @@ static void ocelot_update_stats(struct ocelot *ocelot)
+ ~(u64)U32_MAX) + val;
+ }
+ }
+-
+- mutex_unlock(&ocelot->stats_lock);
+ }
+
+ static void ocelot_check_stats_work(struct work_struct *work)
+@@ -1252,7 +1246,9 @@ static void ocelot_check_stats_work(struct work_struct *work)
+ struct ocelot *ocelot = container_of(del_work, struct ocelot,
+ stats_work);
+
++ mutex_lock(&ocelot->stats_lock);
+ ocelot_update_stats(ocelot);
++ mutex_unlock(&ocelot->stats_lock);
+
+ queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
+ OCELOT_STATS_CHECK_DELAY);
+@@ -1262,12 +1258,16 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
+ {
+ int i;
+
++ mutex_lock(&ocelot->stats_lock);
++
+ /* check and update now */
+ ocelot_update_stats(ocelot);
+
+ /* Copy all counters */
+ for (i = 0; i < ocelot->num_stats; i++)
+ *data++ = ocelot->stats[port * ocelot->num_stats + i];
++
++ mutex_unlock(&ocelot->stats_lock);
+ }
+ EXPORT_SYMBOL(ocelot_get_ethtool_stats);
+
+@@ -1299,14 +1299,16 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port,
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
+ BIT(HWTSTAMP_TX_ONESTEP_SYNC);
+- info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
++ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
++ BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
++ BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
++ BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
+
+ return 0;
+ }
+ EXPORT_SYMBOL(ocelot_get_ts_info);
+
+-static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
+- bool only_active_ports)
++static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond)
+ {
+ u32 mask = 0;
+ int port;
+@@ -1317,12 +1319,8 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
+ if (!ocelot_port)
+ continue;
+
+- if (ocelot_port->bond == bond) {
+- if (only_active_ports && !ocelot_port->lag_tx_active)
+- continue;
+-
++ if (ocelot_port->bond == bond)
+ mask |= BIT(port);
+- }
+ }
+
+ return mask;
+@@ -1409,10 +1407,8 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
+ mask = ocelot_get_bridge_fwd_mask(ocelot, port, bridge);
+ mask |= cpu_fwd_mask;
+ mask &= ~BIT(port);
+- if (bond) {
+- mask &= ~ocelot_get_bond_mask(ocelot, bond,
+- false);
+- }
++ if (bond)
++ mask &= ~ocelot_get_bond_mask(ocelot, bond);
+ } else {
+ /* Standalone ports forward only to DSA tag_8021q CPU
+ * ports (if those exist), or to the hardware CPU port
+@@ -1730,13 +1726,17 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
+ if (!bond || (visited & BIT(lag)))
+ continue;
+
+- bond_mask = ocelot_get_bond_mask(ocelot, bond, true);
++ bond_mask = ocelot_get_bond_mask(ocelot, bond);
+
+ for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
++ struct ocelot_port *ocelot_port = ocelot->ports[port];
++
+ // Destination mask
+ ocelot_write_rix(ocelot, bond_mask,
+ ANA_PGID_PGID, port);
+- aggr_idx[num_active_ports++] = port;
++
++ if (ocelot_port->lag_tx_active)
++ aggr_idx[num_active_ports++] = port;
+ }
+
+ for_each_aggr_pgid(ocelot, i) {
+@@ -1785,8 +1785,7 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)
+
+ bond = ocelot_port->bond;
+ if (bond) {
+- int lag = __ffs(ocelot_get_bond_mask(ocelot, bond,
+- false));
++ int lag = __ffs(ocelot_get_bond_mask(ocelot, bond));
+
+ ocelot_rmw_gix(ocelot,
+ ANA_PORT_PORT_CFG_PORTID_VAL(lag),
+@@ -1933,6 +1932,8 @@ static void ocelot_port_set_mcast_flood(struct ocelot *ocelot, int port,
+ val = BIT(port);
+
+ ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MC);
++ ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV4);
++ ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV6);
+ }
+
+ static void ocelot_port_set_bcast_flood(struct ocelot *ocelot, int port,
+@@ -2205,11 +2206,15 @@ int ocelot_init(struct ocelot *ocelot)
+ ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
+ ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
+ ANA_PGID_PGID, PGID_MC);
++ ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
++ ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
++ ANA_PGID_PGID, PGID_MCIPV4);
++ ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
++ ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
++ ANA_PGID_PGID, PGID_MCIPV6);
+ ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
+ ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
+ ANA_PGID_PGID, PGID_BC);
+- ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV4);
+- ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV6);
+
+ /* Allow manual injection via DEVCPU_QS registers, and byte swap these
+ * registers endianness.
+diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
+index 8b843d3c9189a..b7e7bd744a1b8 100644
+--- a/drivers/net/ethernet/mscc/ocelot_flower.c
++++ b/drivers/net/ethernet/mscc/ocelot_flower.c
+@@ -54,6 +54,12 @@ static int ocelot_chain_to_block(int chain, bool ingress)
+ */
+ static int ocelot_chain_to_lookup(int chain)
+ {
++ /* Backwards compatibility with older, single-chain tc-flower
++ * offload support in Ocelot
++ */
++ if (chain == 0)
++ return 0;
++
+ return (chain / VCAP_LOOKUP) % 10;
+ }
+
+@@ -62,7 +68,15 @@ static int ocelot_chain_to_lookup(int chain)
+ */
+ static int ocelot_chain_to_pag(int chain)
+ {
+- int lookup = ocelot_chain_to_lookup(chain);
++ int lookup;
++
++ /* Backwards compatibility with older, single-chain tc-flower
++ * offload support in Ocelot
++ */
++ if (chain == 0)
++ return 0;
++
++ lookup = ocelot_chain_to_lookup(chain);
+
+ /* calculate PAG value as chain index relative to the first PAG */
+ return chain - VCAP_IS2_CHAIN(lookup, 0);
+@@ -192,9 +206,10 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
+ filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+ break;
+ case FLOW_ACTION_TRAP:
+- if (filter->block_id != VCAP_IS2) {
++ if (filter->block_id != VCAP_IS2 ||
++ filter->lookup != 0) {
+ NL_SET_ERR_MSG_MOD(extack,
+- "Trap action can only be offloaded to VCAP IS2");
++ "Trap action can only be offloaded to VCAP IS2 lookup 0");
+ return -EOPNOTSUPP;
+ }
+ if (filter->goto_target != -1) {
+@@ -458,6 +473,18 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
+ flow_rule_match_control(rule, &match);
+ }
+
++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
++ struct flow_match_vlan match;
++
++ flow_rule_match_vlan(rule, &match);
++ filter->key_type = OCELOT_VCAP_KEY_ANY;
++ filter->vlan.vid.value = match.key->vlan_id;
++ filter->vlan.vid.mask = match.mask->vlan_id;
++ filter->vlan.pcp.value[0] = match.key->vlan_priority;
++ filter->vlan.pcp.mask[0] = match.mask->vlan_priority;
++ match_protocol = false;
++ }
++
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_match_eth_addrs match;
+
+@@ -467,13 +494,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
+ return -EOPNOTSUPP;
+ }
+
+- if (filter->block_id == VCAP_IS1 &&
+- !is_zero_ether_addr(match.mask->dst)) {
+- NL_SET_ERR_MSG_MOD(extack,
+- "Key type S1_NORMAL cannot match on destination MAC");
+- return -EOPNOTSUPP;
+- }
+-
+ /* The hw support mac matches only for MAC_ETYPE key,
+ * therefore if other matches(port, tcp flags, etc) are added
+ * then just bail out
+@@ -488,6 +508,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
+ return -EOPNOTSUPP;
+
+ flow_rule_match_eth_addrs(rule, &match);
++
++ if (filter->block_id == VCAP_IS1 &&
++ !is_zero_ether_addr(match.mask->dst)) {
++ NL_SET_ERR_MSG_MOD(extack,
++ "Key type S1_NORMAL cannot match on destination MAC");
++ return -EOPNOTSUPP;
++ }
++
+ filter->key_type = OCELOT_VCAP_KEY_ETYPE;
+ ether_addr_copy(filter->key.etype.dmac.value,
+ match.key->dst);
+@@ -589,18 +617,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
+ match_protocol = false;
+ }
+
+- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+- struct flow_match_vlan match;
+-
+- flow_rule_match_vlan(rule, &match);
+- filter->key_type = OCELOT_VCAP_KEY_ANY;
+- filter->vlan.vid.value = match.key->vlan_id;
+- filter->vlan.vid.mask = match.mask->vlan_id;
+- filter->vlan.pcp.value[0] = match.key->vlan_priority;
+- filter->vlan.pcp.mask[0] = match.mask->vlan_priority;
+- match_protocol = false;
+- }
+-
+ finished_key_parsing:
+ if (match_protocol && proto != ETH_P_ALL) {
+ if (filter->block_id == VCAP_ES0) {
+diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
+index 2545727fd5b2f..da8a4e01d4be3 100644
+--- a/drivers/net/ethernet/mscc/ocelot_net.c
++++ b/drivers/net/ethernet/mscc/ocelot_net.c
+@@ -606,7 +606,7 @@ static int ocelot_port_set_mac_address(struct net_device *dev, void *p)
+ /* Then forget the previous one. */
+ ocelot_mact_forget(ocelot, dev->dev_addr, ocelot_port->pvid_vlan.vid);
+
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+ return 0;
+ }
+
+@@ -1168,7 +1168,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
+ ocelot_port_bridge_join(ocelot, port, bridge);
+
+ err = switchdev_bridge_port_offload(brport_dev, dev, priv,
+- &ocelot_netdevice_nb,
++ &ocelot_switchdev_nb,
+ &ocelot_switchdev_blocking_nb,
+ false, extack);
+ if (err)
+@@ -1182,7 +1182,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
+
+ err_switchdev_sync:
+ switchdev_bridge_port_unoffload(brport_dev, priv,
+- &ocelot_netdevice_nb,
++ &ocelot_switchdev_nb,
+ &ocelot_switchdev_blocking_nb);
+ err_switchdev_offload:
+ ocelot_port_bridge_leave(ocelot, port, bridge);
+@@ -1195,7 +1195,7 @@ static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev,
+ struct ocelot_port_private *priv = netdev_priv(dev);
+
+ switchdev_bridge_port_unoffload(brport_dev, priv,
+- &ocelot_netdevice_nb,
++ &ocelot_switchdev_nb,
+ &ocelot_switchdev_blocking_nb);
+ }
+
+diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c
+index 99d7376a70a74..732a4ef22518d 100644
+--- a/drivers/net/ethernet/mscc/ocelot_vcap.c
++++ b/drivers/net/ethernet/mscc/ocelot_vcap.c
+@@ -373,7 +373,6 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
+ OCELOT_VCAP_BIT_0);
+ vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0,
+ ~filter->ingress_port_mask);
+- vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY);
+ vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH,
+ OCELOT_VCAP_BIT_ANY);
+ vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc);
+@@ -1153,6 +1152,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
+ struct ocelot_vcap_filter *tmp;
+
+ tmp = ocelot_vcap_block_find_filter_by_index(block, i);
++ /* Read back the filter's counters before moving it */
++ vcap_entry_get(ocelot, i - 1, tmp);
+ vcap_entry_set(ocelot, i, tmp);
+ }
+
+@@ -1192,7 +1193,11 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
+ struct ocelot_vcap_filter del_filter;
+ int i, index;
+
++ /* Need to inherit the block_id so that vcap_entry_set()
++ * does not get confused and knows where to install it.
++ */
+ memset(&del_filter, 0, sizeof(del_filter));
++ del_filter.block_id = filter->block_id;
+
+ /* Gets index of the filter */
+ index = ocelot_vcap_block_get_filter_index(block, filter);
+@@ -1207,6 +1212,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
+ struct ocelot_vcap_filter *tmp;
+
+ tmp = ocelot_vcap_block_find_filter_by_index(block, i);
++ /* Read back the filter's counters before moving it */
++ vcap_entry_get(ocelot, i + 1, tmp);
+ vcap_entry_set(ocelot, i, tmp);
+ }
+
+diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+index c1a75b08ced7e..97c2604df019a 100644
+--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
++++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+@@ -2900,11 +2900,9 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
+ status = myri10ge_xmit(curr, dev);
+ if (status != 0) {
+ dev_kfree_skb_any(curr);
+- if (segs != NULL) {
+- curr = segs;
+- segs = next;
++ skb_list_walk_safe(next, curr, next) {
+ curr->next = NULL;
+- dev_kfree_skb_any(segs);
++ dev_kfree_skb_any(curr);
+ }
+ goto drop;
+ }
+@@ -3925,6 +3923,7 @@ abort_with_slices:
+ myri10ge_free_slices(mgp);
+
+ abort_with_firmware:
++ kfree(mgp->msix_vectors);
+ myri10ge_dummy_rdma(mgp, 0);
+
+ abort_with_ioremap:
+diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
+index d17d1b4f2585f..825356ee3492e 100644
+--- a/drivers/net/ethernet/natsemi/sonic.c
++++ b/drivers/net/ethernet/natsemi/sonic.c
+@@ -292,7 +292,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
+ */
+
+ laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE);
+- if (!laddr) {
++ if (dma_mapping_error(lp->device, laddr)) {
+ pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+@@ -509,7 +509,7 @@ static bool sonic_alloc_rb(struct net_device *dev, struct sonic_local *lp,
+
+ *new_addr = dma_map_single(lp->device, skb_put(*new_skb, SONIC_RBSIZE),
+ SONIC_RBSIZE, DMA_FROM_DEVICE);
+- if (!*new_addr) {
++ if (dma_mapping_error(lp->device, *new_addr)) {
+ dev_kfree_skb(*new_skb);
+ *new_skb = NULL;
+ return false;
+diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
+index ca4686094701c..0a02d8bd0a3e5 100644
+--- a/drivers/net/ethernet/natsemi/xtsonic.c
++++ b/drivers/net/ethernet/natsemi/xtsonic.c
+@@ -120,7 +120,7 @@ static const struct net_device_ops xtsonic_netdev_ops = {
+ .ndo_set_mac_address = eth_mac_addr,
+ };
+
+-static int __init sonic_probe1(struct net_device *dev)
++static int sonic_probe1(struct net_device *dev)
+ {
+ unsigned int silicon_revision;
+ struct sonic_local *lp = netdev_priv(dev);
+diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
+index 3b6b2e61139e6..65ccdbe665e5c 100644
+--- a/drivers/net/ethernet/neterion/s2io.c
++++ b/drivers/net/ethernet/neterion/s2io.c
+@@ -2386,7 +2386,7 @@ static void free_tx_buffers(struct s2io_nic *nic)
+ skb = s2io_txdl_getskb(&mac_control->fifos[i], txdp, j);
+ if (skb) {
+ swstats->mem_freed += skb->truesize;
+- dev_kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ cnt++;
+ }
+ }
+@@ -7125,9 +7125,8 @@ static int s2io_card_up(struct s2io_nic *sp)
+ if (ret) {
+ DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n",
+ dev->name);
+- s2io_reset(sp);
+- free_rx_buffers(sp);
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto err_fill_buff;
+ }
+ DBG_PRINT(INFO_DBG, "Buf in ring:%d is %d:\n", i,
+ ring->rx_bufs_left);
+@@ -7165,18 +7164,16 @@ static int s2io_card_up(struct s2io_nic *sp)
+ /* Enable Rx Traffic and interrupts on the NIC */
+ if (start_nic(sp)) {
+ DBG_PRINT(ERR_DBG, "%s: Starting NIC failed\n", dev->name);
+- s2io_reset(sp);
+- free_rx_buffers(sp);
+- return -ENODEV;
++ ret = -ENODEV;
++ goto err_out;
+ }
+
+ /* Add interrupt service routine */
+ if (s2io_add_isr(sp) != 0) {
+ if (sp->config.intr_type == MSI_X)
+ s2io_rem_isr(sp);
+- s2io_reset(sp);
+- free_rx_buffers(sp);
+- return -ENODEV;
++ ret = -ENODEV;
++ goto err_out;
+ }
+
+ timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0);
+@@ -7196,6 +7193,20 @@ static int s2io_card_up(struct s2io_nic *sp)
+ }
+
+ return 0;
++
++err_out:
++ if (config->napi) {
++ if (config->intr_type == MSI_X) {
++ for (i = 0; i < sp->config.rx_ring_num; i++)
++ napi_disable(&sp->mac_control.rings[i].napi);
++ } else {
++ napi_disable(&sp->napi);
++ }
++ }
++err_fill_buff:
++ s2io_reset(sp);
++ free_rx_buffers(sp);
++ return ret;
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
+index 605a1617b195e..5d3df28c648ff 100644
+--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
++++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
+@@ -305,7 +305,7 @@ nfp_abm_vnic_set_mac(struct nfp_pf *pf, struct nfp_abm *abm, struct nfp_net *nn,
+ return;
+ }
+
+- ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
++ eth_hw_addr_set(nn->dp.netdev, mac_addr);
+ ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
+ }
+
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
+index 2a432de11858d..df5a6a0bf1d5d 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
+@@ -472,7 +472,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
+ set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
+ ip_rt_put(rt);
+ } else {
+- set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
++ set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
+ }
+ }
+
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+index bfd7d1c350767..7e9fcc16286e2 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+@@ -442,6 +442,11 @@ nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map)
+ key_size += sizeof(struct nfp_flower_ipv6);
+ }
+
++ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
++ map[FLOW_PAY_QINQ] = key_size;
++ key_size += sizeof(struct nfp_flower_vlan);
++ }
++
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+ map[FLOW_PAY_GRE] = key_size;
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
+@@ -450,11 +455,6 @@ nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map)
+ key_size += sizeof(struct nfp_flower_ipv4_gre_tun);
+ }
+
+- if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
+- map[FLOW_PAY_QINQ] = key_size;
+- key_size += sizeof(struct nfp_flower_vlan);
+- }
+-
+ if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) ||
+ (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) {
+ map[FLOW_PAY_UDP_TUN] = key_size;
+@@ -693,6 +693,17 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
+ }
+ }
+
++ if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) {
++ offset = key_map[FLOW_PAY_QINQ];
++ key = kdata + offset;
++ msk = mdata + offset;
++ for (i = 0; i < _CT_TYPE_MAX; i++) {
++ nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
++ (struct nfp_flower_vlan *)msk,
++ rules[i]);
++ }
++ }
++
+ if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+ offset = key_map[FLOW_PAY_GRE];
+ key = kdata + offset;
+@@ -733,17 +744,6 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
+ }
+ }
+
+- if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) {
+- offset = key_map[FLOW_PAY_QINQ];
+- key = kdata + offset;
+- msk = mdata + offset;
+- for (i = 0; i < _CT_TYPE_MAX; i++) {
+- nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
+- (struct nfp_flower_vlan *)msk,
+- rules[i]);
+- }
+- }
+-
+ if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN ||
+ key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
+ offset = key_map[FLOW_PAY_UDP_TUN];
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
+index 9d86eea4dc169..fb8bd2135c63a 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
+@@ -602,6 +602,14 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
+ msk += sizeof(struct nfp_flower_ipv6);
+ }
+
++ if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) {
++ nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext,
++ (struct nfp_flower_vlan *)msk,
++ rule);
++ ext += sizeof(struct nfp_flower_vlan);
++ msk += sizeof(struct nfp_flower_vlan);
++ }
++
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+ struct nfp_flower_ipv6_gre_tun *gre_match;
+@@ -637,14 +645,6 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
+ }
+ }
+
+- if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) {
+- nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext,
+- (struct nfp_flower_vlan *)msk,
+- rule);
+- ext += sizeof(struct nfp_flower_vlan);
+- msk += sizeof(struct nfp_flower_vlan);
+- }
+-
+ if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
+ key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+index ab70179728f63..babd374333f34 100644
+--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+@@ -922,8 +922,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev,
+ int port, bool mod)
+ {
+ struct nfp_flower_priv *priv = app->priv;
+- int ida_idx = NFP_MAX_MAC_INDEX, err;
+ struct nfp_tun_offloaded_mac *entry;
++ int ida_idx = -1, err;
+ u16 nfp_mac_idx = 0;
+
+ entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr);
+@@ -997,7 +997,7 @@ err_remove_hash:
+ err_free_entry:
+ kfree(entry);
+ err_free_ida:
+- if (ida_idx != NFP_MAX_MAC_INDEX)
++ if (ida_idx != -1)
+ ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
+
+ return err;
+@@ -1011,6 +1011,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
+ struct nfp_flower_repr_priv *repr_priv;
+ struct nfp_tun_offloaded_mac *entry;
+ struct nfp_repr *repr;
++ u16 nfp_mac_idx;
+ int ida_idx;
+
+ entry = nfp_tunnel_lookup_offloaded_macs(app, mac);
+@@ -1029,8 +1030,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
+ entry->bridge_count--;
+
+ if (!entry->bridge_count && entry->ref_count) {
+- u16 nfp_mac_idx;
+-
+ nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT;
+ if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx,
+ false)) {
+@@ -1046,7 +1045,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
+
+ /* If MAC is now used by 1 repr set the offloaded MAC index to port. */
+ if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) {
+- u16 nfp_mac_idx;
+ int port, err;
+
+ repr_priv = list_first_entry(&entry->repr_list,
+@@ -1074,8 +1072,14 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs,
+ &entry->ht_node,
+ offloaded_macs_params));
++
++ if (nfp_flower_is_supported_bridge(netdev))
++ nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT;
++ else
++ nfp_mac_idx = entry->index;
++
+ /* If MAC has global ID then extract and free the ida entry. */
+- if (nfp_tunnel_is_mac_idx_global(entry->index)) {
++ if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) {
+ ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index);
+ ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
+ }
+diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+index bea978df77138..1647b6b180cc5 100644
+--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+@@ -363,7 +363,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
+ return ret;
+
+ attrs.split = eth_port.is_split;
+- attrs.splittable = !attrs.split;
++ attrs.splittable = eth_port.port_lanes > 1 && !attrs.split;
+ attrs.lanes = eth_port.port_lanes;
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = eth_port.label_port;
+diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
+index df203738511bf..0b1865e9f0b59 100644
+--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
++++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
+@@ -565,7 +565,6 @@ struct nfp_net_dp {
+ * @exn_name: Name for Exception interrupt
+ * @shared_handler: Handler for shared interrupts
+ * @shared_name: Name for shared interrupt
+- * @me_freq_mhz: ME clock_freq (MHz)
+ * @reconfig_lock: Protects @reconfig_posted, @reconfig_timer_active,
+ * @reconfig_sync_present and HW reconfiguration request
+ * regs/machinery from async requests (sync must take
+@@ -650,8 +649,6 @@ struct nfp_net {
+ irq_handler_t shared_handler;
+ char shared_name[IFNAMSIZ + 8];
+
+- u32 me_freq_mhz;
+-
+ bool link_up;
+ spinlock_t link_status_lock;
+
+diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+index 5bfa22accf2c9..69ac205bbdbd0 100644
+--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+@@ -2067,7 +2067,7 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
+ if (napi_complete_done(napi, pkts_polled))
+ nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
+
+- if (r_vec->nfp_net->rx_coalesce_adapt_on) {
++ if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) {
+ struct dim_sample dim_sample = {};
+ unsigned int start;
+ u64 pkts, bytes;
+@@ -2082,7 +2082,7 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
+ net_dim(&r_vec->rx_dim, dim_sample);
+ }
+
+- if (r_vec->nfp_net->tx_coalesce_adapt_on) {
++ if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
+ struct dim_sample dim_sample = {};
+ unsigned int start;
+ u64 pkts, bytes;
+@@ -3016,10 +3016,8 @@ static void nfp_net_rx_dim_work(struct work_struct *work)
+
+ /* copy RX interrupt coalesce parameters */
+ value = (moder.pkts << 16) | (factor * moder.usec);
+- rtnl_lock();
+ nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(r_vec->rx_ring->idx), value);
+ (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
+- rtnl_unlock();
+
+ dim->state = DIM_START_MEASURE;
+ }
+@@ -3047,10 +3045,8 @@ static void nfp_net_tx_dim_work(struct work_struct *work)
+
+ /* copy TX interrupt coalesce parameters */
+ value = (moder.pkts << 16) | (factor * moder.usec);
+- rtnl_lock();
+ nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(r_vec->tx_ring->idx), value);
+ (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
+- rtnl_unlock();
+
+ dim->state = DIM_START_MEASURE;
+ }
+@@ -3486,21 +3482,21 @@ static void nfp_net_stat64(struct net_device *netdev,
+ unsigned int start;
+
+ do {
+- start = u64_stats_fetch_begin(&r_vec->rx_sync);
++ start = u64_stats_fetch_begin_irq(&r_vec->rx_sync);
+ data[0] = r_vec->rx_pkts;
+ data[1] = r_vec->rx_bytes;
+ data[2] = r_vec->rx_drops;
+- } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
++ } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start));
+ stats->rx_packets += data[0];
+ stats->rx_bytes += data[1];
+ stats->rx_dropped += data[2];
+
+ do {
+- start = u64_stats_fetch_begin(&r_vec->tx_sync);
++ start = u64_stats_fetch_begin_irq(&r_vec->tx_sync);
+ data[0] = r_vec->tx_pkts;
+ data[1] = r_vec->tx_bytes;
+ data[2] = r_vec->tx_errors;
+- } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
++ } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start));
+ stats->tx_packets += data[0];
+ stats->tx_bytes += data[1];
+ stats->tx_errors += data[2];
+diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+index 0685ece1f155d..d295942968f33 100644
+--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+@@ -286,8 +286,6 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
+
+ /* Init to unknowns */
+ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+- ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+- ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+ cmd->base.port = PORT_OTHER;
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+@@ -295,6 +293,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
+ port = nfp_port_from_netdev(netdev);
+ eth_port = nfp_port_get_eth_port(port);
+ if (eth_port) {
++ ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
++ ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+ cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ?
+ AUTONEG_ENABLE : AUTONEG_DISABLE;
+ nfp_net_set_fec_link_mode(eth_port, cmd);
+@@ -483,7 +483,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
+ unsigned int start;
+
+ do {
+- start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync);
++ start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync);
+ data[0] = nn->r_vecs[i].rx_pkts;
+ tmp[0] = nn->r_vecs[i].hw_csum_rx_ok;
+ tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok;
+@@ -491,10 +491,10 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
+ tmp[3] = nn->r_vecs[i].hw_csum_rx_error;
+ tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail;
+ tmp[5] = nn->r_vecs[i].hw_tls_rx;
+- } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start));
++ } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start));
+
+ do {
+- start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync);
++ start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync);
+ data[1] = nn->r_vecs[i].tx_pkts;
+ data[2] = nn->r_vecs[i].tx_busy;
+ tmp[6] = nn->r_vecs[i].hw_csum_tx;
+@@ -504,7 +504,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
+ tmp[10] = nn->r_vecs[i].hw_tls_tx;
+ tmp[11] = nn->r_vecs[i].tls_tx_fallback;
+ tmp[12] = nn->r_vecs[i].tls_tx_no_fallback;
+- } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start));
++ } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start));
+
+ data += NN_RVEC_PER_Q_STATS;
+
+@@ -1219,6 +1219,11 @@ nfp_port_get_module_info(struct net_device *netdev,
+ u8 data;
+
+ port = nfp_port_from_netdev(netdev);
++ if (!port)
++ return -EOPNOTSUPP;
++
++ /* update port state to get latest interface */
++ set_bit(NFP_PORT_CHANGED, &port->flags);
+ eth_port = nfp_port_get_eth_port(port);
+ if (!eth_port)
+ return -EOPNOTSUPP;
+@@ -1262,15 +1267,15 @@ nfp_port_get_module_info(struct net_device *netdev,
+
+ if (data < 0x3) {
+ modinfo->type = ETH_MODULE_SFF_8436;
+- modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
++ modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8636;
+- modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
++ modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
+ }
+ break;
+ case NFP_INTERFACE_QSFP28:
+ modinfo->type = ETH_MODULE_SFF_8636;
+- modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
++ modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
+ break;
+ default:
+ netdev_err(netdev, "Unsupported module 0x%x detected\n",
+@@ -1343,7 +1348,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
+ * ME timestamp ticks. There are 16 ME clock cycles for each timestamp
+ * count.
+ */
+- factor = nn->me_freq_mhz / 16;
++ factor = nn->tlv_caps.me_freq_mhz / 16;
+
+ /* Each pair of (usecs, max_frames) fields specifies that interrupts
+ * should be coalesced until
+diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+index d10a938013445..74c4bf4d397d8 100644
+--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+@@ -55,7 +55,7 @@ nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev,
+ return;
+ }
+
+- ether_addr_copy(netdev->dev_addr, eth_port->mac_addr);
++ eth_hw_addr_set(netdev, eth_port->mac_addr);
+ ether_addr_copy(netdev->perm_addr, eth_port->mac_addr);
+ }
+
+diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+index c0e2f4394aef8..87f2268b16d6e 100644
+--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+@@ -58,7 +58,7 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn)
+ return;
+ }
+
+- ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
++ eth_hw_addr_set(nn->dp.netdev, mac_addr);
+ ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
+ }
+
+diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+index d7ac0307797fd..a8286d0032d1e 100644
+--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
++++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+@@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size)
+ return -ENOMEM;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+- if (!cache)
++ if (!cache) {
++ nfp_cpp_area_free(area);
+ return -ENOMEM;
++ }
+
+ cache->id = 0;
+ cache->addr = 0;
+@@ -872,7 +874,6 @@ area_cache_get(struct nfp_cpp *cpp, u32 id,
+ }
+
+ /* Adjust the start address to be cache size aligned */
+- cache->id = id;
+ cache->addr = addr & ~(u64)(cache->size - 1);
+
+ /* Re-init to the new ID and address */
+@@ -892,6 +893,8 @@ area_cache_get(struct nfp_cpp *cpp, u32 id,
+ return NULL;
+ }
+
++ cache->id = id;
++
+ exit:
+ /* Adjust offset */
+ *offset = addr - cache->addr;
+diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
+index 346145d3180eb..cb63c037753de 100644
+--- a/drivers/net/ethernet/ni/nixge.c
++++ b/drivers/net/ethernet/ni/nixge.c
+@@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev)
+ struct sk_buff *skb;
+ int i;
+
+- for (i = 0; i < RX_BD_NUM; i++) {
+- phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
+- phys);
+-
+- dma_unmap_single(ndev->dev.parent, phys_addr,
+- NIXGE_MAX_JUMBO_FRAME_SIZE,
+- DMA_FROM_DEVICE);
+-
+- skb = (struct sk_buff *)(uintptr_t)
+- nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
+- sw_id_offset);
+- dev_kfree_skb(skb);
+- }
++ if (priv->rx_bd_v) {
++ for (i = 0; i < RX_BD_NUM; i++) {
++ phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
++ phys);
++
++ dma_unmap_single(ndev->dev.parent, phys_addr,
++ NIXGE_MAX_JUMBO_FRAME_SIZE,
++ DMA_FROM_DEVICE);
++
++ skb = (struct sk_buff *)(uintptr_t)
++ nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
++ sw_id_offset);
++ dev_kfree_skb(skb);
++ }
+
+- if (priv->rx_bd_v)
+ dma_free_coherent(ndev->dev.parent,
+ sizeof(*priv->rx_bd_v) * RX_BD_NUM,
+ priv->rx_bd_v,
+ priv->rx_bd_p);
++ }
+
+ if (priv->tx_skb)
+ devm_kfree(ndev->dev.parent, priv->tx_skb);
+@@ -899,6 +900,7 @@ static int nixge_open(struct net_device *ndev)
+ err_rx_irq:
+ free_irq(priv->tx_irq, ndev);
+ err_tx_irq:
++ napi_disable(&priv->napi);
+ phy_stop(phy);
+ phy_disconnect(phy);
+ tasklet_kill(&priv->dma_err_tasklet);
+@@ -1209,7 +1211,7 @@ static void *nixge_get_nvmem_address(struct device *dev)
+
+ cell = nvmem_cell_get(dev, "address");
+ if (IS_ERR(cell))
+- return NULL;
++ return cell;
+
+ mac = nvmem_cell_read(cell, &cell_size);
+ nvmem_cell_put(cell);
+@@ -1282,8 +1284,8 @@ static int nixge_probe(struct platform_device *pdev)
+ ndev->max_mtu = NIXGE_JUMBO_MTU;
+
+ mac_addr = nixge_get_nvmem_address(&pdev->dev);
+- if (mac_addr && is_valid_ether_addr(mac_addr)) {
+- ether_addr_copy(ndev->dev_addr, mac_addr);
++ if (!IS_ERR(mac_addr) && is_valid_ether_addr(mac_addr)) {
++ eth_hw_addr_set(ndev, mac_addr);
+ kfree(mac_addr);
+ } else {
+ eth_hw_addr_random(ndev);
+diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
+index ef3fb4cc90af6..d350c1ef9e0be 100644
+--- a/drivers/net/ethernet/nvidia/forcedeth.c
++++ b/drivers/net/ethernet/nvidia/forcedeth.c
+@@ -6129,6 +6129,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
+ return 0;
+
+ out_error:
++ nv_mgmt_release_sema(dev);
+ if (phystate_orig)
+ writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
+ out_freering:
+diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
+index c910fa2f40a4b..b42b65fb034ed 100644
+--- a/drivers/net/ethernet/nxp/lpc_eth.c
++++ b/drivers/net/ethernet/nxp/lpc_eth.c
+@@ -1349,7 +1349,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
+ __lpc_get_mac(pldat, ndev->dev_addr);
+
+ if (!is_valid_ether_addr(ndev->dev_addr)) {
+- of_get_mac_address(np, ndev->dev_addr);
++ of_get_ethdev_address(np, ndev);
+ }
+ if (!is_valid_ether_addr(ndev->dev_addr))
+ eth_hw_addr_random(ndev);
+@@ -1469,6 +1469,7 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
+ {
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct netdata_local *pldat;
++ int ret;
+
+ if (device_may_wakeup(&pdev->dev))
+ disable_irq_wake(ndev->irq);
+@@ -1478,7 +1479,9 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
+ pldat = netdev_priv(ndev);
+
+ /* Enable interface clock */
+- clk_enable(pldat->clk);
++ ret = clk_enable(pldat->clk);
++ if (ret)
++ return ret;
+
+ /* Reset and initialize */
+ __lpc_eth_reset(pldat);
+diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+index ec3e558f890ee..d555b4cc6049d 100644
+--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
++++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+@@ -1148,6 +1148,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter,
+ buffer_info->dma = 0;
+ buffer_info->time_stamp = 0;
+ tx_ring->next_to_use = ring_num;
++ dev_kfree_skb_any(skb);
+ return;
+ }
+ buffer_info->mapped = true;
+@@ -2464,6 +2465,7 @@ static void pch_gbe_remove(struct pci_dev *pdev)
+ unregister_netdev(netdev);
+
+ pch_gbe_phy_hw_reset(&adapter->hw);
++ pci_dev_put(adapter->ptp_pdev);
+
+ free_netdev(netdev);
+ }
+@@ -2539,7 +2541,7 @@ static int pch_gbe_probe(struct pci_dev *pdev,
+ /* setup the private structure */
+ ret = pch_gbe_sw_init(adapter);
+ if (ret)
+- goto err_free_netdev;
++ goto err_put_dev;
+
+ /* Initialize PHY */
+ ret = pch_gbe_init_phy(adapter);
+@@ -2597,6 +2599,8 @@ static int pch_gbe_probe(struct pci_dev *pdev,
+
+ err_free_adapter:
+ pch_gbe_phy_hw_reset(&adapter->hw);
++err_put_dev:
++ pci_dev_put(adapter->ptp_pdev);
+ err_free_netdev:
+ free_netdev(netdev);
+ return ret;
+diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
+index 7e096b2888b92..b223488318ad7 100644
+--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
++++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
+@@ -1423,7 +1423,7 @@ static void pasemi_mac_queue_csdesc(const struct sk_buff *skb,
+ write_dma_reg(PAS_DMA_TXCHAN_INCR(txring->chan.chno), 2);
+ }
+
+-static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
++static netdev_tx_t pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct pasemi_mac * const mac = netdev_priv(dev);
+ struct pasemi_mac_txring * const txring = tx_ring(mac);
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+index 7e296fa71b368..d324c292318b3 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+@@ -255,7 +255,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ err = ionic_map_bars(ionic);
+ if (err)
+- goto err_out_pci_disable_device;
++ goto err_out_pci_release_regions;
+
+ /* Configure the device */
+ err = ionic_setup(ionic);
+@@ -331,6 +331,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ goto err_out_deregister_lifs;
+ }
+
++ mod_timer(&ionic->watchdog_timer,
++ round_jiffies(jiffies + ionic->watchdog_period));
++
+ return 0;
+
+ err_out_deregister_lifs:
+@@ -348,7 +351,6 @@ err_out_port_reset:
+ err_out_reset:
+ ionic_reset(ionic);
+ err_out_teardown:
+- del_timer_sync(&ionic->watchdog_timer);
+ pci_clear_master(pdev);
+ /* Don't fail the probe for these errors, keep
+ * the hw interface around for inspection
+@@ -357,6 +359,7 @@ err_out_teardown:
+
+ err_out_unmap_bars:
+ ionic_unmap_bars(ionic);
++err_out_pci_release_regions:
+ pci_release_regions(pdev);
+ err_out_pci_disable_device:
+ pci_disable_device(pdev);
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+index 0d6858ab511c6..b778d8264bca0 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+@@ -122,9 +122,6 @@ int ionic_dev_setup(struct ionic *ionic)
+ idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+ ioread8(&idev->dev_info_regs->fw_status);
+
+- mod_timer(&ionic->watchdog_timer,
+- round_jiffies(jiffies + ionic->watchdog_period));
+-
+ idev->db_pages = bar->vaddr;
+ idev->phy_db_pages = bar->bus_addr;
+
+@@ -132,6 +129,16 @@ int ionic_dev_setup(struct ionic *ionic)
+ }
+
+ /* Devcmd Interface */
++bool ionic_is_fw_running(struct ionic_dev *idev)
++{
++ u8 fw_status = ioread8(&idev->dev_info_regs->fw_status);
++
++ /* firmware is useful only if the running bit is set and
++ * fw_status != 0xff (bad PCI read)
++ */
++ return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
++}
++
+ int ionic_heartbeat_check(struct ionic *ionic)
+ {
+ struct ionic_dev *idev = &ionic->idev;
+@@ -155,13 +162,10 @@ do_check_time:
+ goto do_check_time;
+ }
+
+- /* firmware is useful only if the running bit is set and
+- * fw_status != 0xff (bad PCI read)
+- * If fw_status is not ready don't bother with the generation.
+- */
+ fw_status = ioread8(&idev->dev_info_regs->fw_status);
+
+- if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
++ /* If fw_status is not ready don't bother with the generation */
++ if (!ionic_is_fw_running(idev)) {
+ fw_status_ready = false;
+ } else {
+ fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+index 8311086fb1f49..922bb6c9e01d5 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
++++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+@@ -357,5 +357,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
+ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
+ unsigned int stop_index);
+ int ionic_heartbeat_check(struct ionic *ionic);
++bool ionic_is_fw_running(struct ionic_dev *idev);
+
+ #endif /* _IONIC_DEV_H_ */
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+index c7d0e195d1760..5c06decc868c4 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+@@ -65,6 +65,8 @@ struct ionic *ionic_devlink_alloc(struct device *dev)
+ struct devlink *dl;
+
+ dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev);
++ if (!dl)
++ return NULL;
+
+ return devlink_priv(dl);
+ }
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+index 3de1a03839e25..2fa116c3694c4 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+@@ -724,7 +724,7 @@ static int ionic_get_rxnfc(struct net_device *netdev,
+ info->data = lif->nxqs;
+ break;
+ default:
+- netdev_err(netdev, "Command parameter %d is not supported\n",
++ netdev_dbg(netdev, "Command parameter %d is not supported\n",
+ info->cmd);
+ err = -EOPNOTSUPP;
+ }
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+index 7f3322ce044c7..2cc126d378353 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+@@ -268,6 +268,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq)
+ .oper = IONIC_Q_ENABLE,
+ },
+ };
++ int ret;
+
+ idev = &lif->ionic->idev;
+ dev = lif->ionic->dev;
+@@ -275,16 +276,24 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq)
+ dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n",
+ ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+
++ if (qcq->flags & IONIC_QCQ_F_INTR)
++ ionic_intr_clean(idev->intr_ctrl, qcq->intr.index);
++
++ ret = ionic_adminq_post_wait(lif, &ctx);
++ if (ret)
++ return ret;
++
++ if (qcq->napi.poll)
++ napi_enable(&qcq->napi);
++
+ if (qcq->flags & IONIC_QCQ_F_INTR) {
+ irq_set_affinity_hint(qcq->intr.vector,
+ &qcq->intr.affinity_mask);
+- napi_enable(&qcq->napi);
+- ionic_intr_clean(idev->intr_ctrl, qcq->intr.index);
+ ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+ IONIC_INTR_MASK_CLEAR);
+ }
+
+- return ionic_adminq_post_wait(lif, &ctx);
++ return 0;
+ }
+
+ static int ionic_qcq_disable(struct ionic_qcq *qcq, bool send_to_hw)
+@@ -442,11 +451,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif)
+ static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
+ struct ionic_qcq *n_qcq)
+ {
+- if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) {
+- ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index);
+- n_qcq->flags &= ~IONIC_QCQ_F_INTR;
+- }
+-
+ n_qcq->intr.vector = src_qcq->intr.vector;
+ n_qcq->intr.index = src_qcq->intr.index;
+ }
+@@ -1565,7 +1569,7 @@ static int ionic_set_nic_features(struct ionic_lif *lif,
+ if ((old_hw_features ^ lif->hw_features) & IONIC_ETH_HW_RX_HASH)
+ ionic_lif_rss_config(lif, lif->rss_types, NULL, NULL);
+
+- if ((vlan_flags & features) &&
++ if ((vlan_flags & le64_to_cpu(ctx.cmd.lif_setattr.features)) &&
+ !(vlan_flags & le64_to_cpu(ctx.comp.lif_setattr.features)))
+ dev_info_once(lif->ionic->dev, "NIC is not supporting vlan offload, likely in SmartNIC mode\n");
+
+@@ -1692,8 +1696,67 @@ static int ionic_set_features(struct net_device *netdev,
+ return err;
+ }
+
++static int ionic_set_attr_mac(struct ionic_lif *lif, u8 *mac)
++{
++ struct ionic_admin_ctx ctx = {
++ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
++ .cmd.lif_setattr = {
++ .opcode = IONIC_CMD_LIF_SETATTR,
++ .index = cpu_to_le16(lif->index),
++ .attr = IONIC_LIF_ATTR_MAC,
++ },
++ };
++
++ ether_addr_copy(ctx.cmd.lif_setattr.mac, mac);
++ return ionic_adminq_post_wait(lif, &ctx);
++}
++
++static int ionic_get_attr_mac(struct ionic_lif *lif, u8 *mac_addr)
++{
++ struct ionic_admin_ctx ctx = {
++ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
++ .cmd.lif_getattr = {
++ .opcode = IONIC_CMD_LIF_GETATTR,
++ .index = cpu_to_le16(lif->index),
++ .attr = IONIC_LIF_ATTR_MAC,
++ },
++ };
++ int err;
++
++ err = ionic_adminq_post_wait(lif, &ctx);
++ if (err)
++ return err;
++
++ ether_addr_copy(mac_addr, ctx.comp.lif_getattr.mac);
++ return 0;
++}
++
++static int ionic_program_mac(struct ionic_lif *lif, u8 *mac)
++{
++ u8 get_mac[ETH_ALEN];
++ int err;
++
++ err = ionic_set_attr_mac(lif, mac);
++ if (err)
++ return err;
++
++ err = ionic_get_attr_mac(lif, get_mac);
++ if (err)
++ return err;
++
++ /* To deal with older firmware that silently ignores the set attr mac:
++ * doesn't actually change the mac and doesn't return an error, so we
++ * do the get attr to verify whether or not the set actually happened
++ */
++ if (!ether_addr_equal(get_mac, mac))
++ return 1;
++
++ return 0;
++}
++
+ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
+ {
++ struct ionic_lif *lif = netdev_priv(netdev);
+ struct sockaddr *addr = sa;
+ u8 *mac;
+ int err;
+@@ -1702,6 +1765,14 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
+ if (ether_addr_equal(netdev->dev_addr, mac))
+ return 0;
+
++ err = ionic_program_mac(lif, mac);
++ if (err < 0)
++ return err;
++
++ if (err > 0)
++ netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n",
++ __func__);
++
+ err = eth_prepare_mac_addr_change(netdev, addr);
+ if (err)
+ return err;
+@@ -2813,11 +2884,15 @@ err_out:
+ * than the full array, but leave the qcq shells in place
+ */
+ for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) {
+- lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+- ionic_qcq_free(lif, lif->txqcqs[i]);
++ if (lif->txqcqs && lif->txqcqs[i]) {
++ lif->txqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
++ ionic_qcq_free(lif, lif->txqcqs[i]);
++ }
+
+- lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
+- ionic_qcq_free(lif, lif->rxqcqs[i]);
++ if (lif->rxqcqs && lif->rxqcqs[i]) {
++ lif->rxqcqs[i]->flags &= ~IONIC_QCQ_F_INTR;
++ ionic_qcq_free(lif, lif->rxqcqs[i]);
++ }
+ }
+
+ if (err)
+@@ -2974,11 +3049,10 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
+
+ netif_device_detach(lif->netdev);
+
++ mutex_lock(&lif->queue_lock);
+ if (test_bit(IONIC_LIF_F_UP, lif->state)) {
+ dev_info(ionic->dev, "Surprise FW stop, stopping queues\n");
+- mutex_lock(&lif->queue_lock);
+ ionic_stop_queues(lif);
+- mutex_unlock(&lif->queue_lock);
+ }
+
+ if (netif_running(lif->netdev)) {
+@@ -2989,6 +3063,8 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
+ ionic_reset(ionic);
+ ionic_qcqs_free(lif);
+
++ mutex_unlock(&lif->queue_lock);
++
+ dev_info(ionic->dev, "FW Down: LIFs stopped\n");
+ }
+
+@@ -3012,9 +3088,15 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
+ err = ionic_port_init(ionic);
+ if (err)
+ goto err_out;
++
++ mutex_lock(&lif->queue_lock);
++
++ if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
++ dev_info(ionic->dev, "FW Up: clearing broken state\n");
++
+ err = ionic_qcqs_alloc(lif);
+ if (err)
+- goto err_out;
++ goto err_unlock;
+
+ err = ionic_lif_init(lif);
+ if (err)
+@@ -3035,6 +3117,8 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
+ goto err_txrx_free;
+ }
+
++ mutex_unlock(&lif->queue_lock);
++
+ clear_bit(IONIC_LIF_F_FW_RESET, lif->state);
+ ionic_link_status_check_request(lif, CAN_SLEEP);
+ netif_device_attach(lif->netdev);
+@@ -3051,6 +3135,8 @@ err_lifs_deinit:
+ ionic_lif_deinit(lif);
+ err_qcqs_free:
+ ionic_qcqs_free(lif);
++err_unlock:
++ mutex_unlock(&lif->queue_lock);
+ err_out:
+ dev_err(ionic->dev, "FW Up: LIFs restart failed - err %d\n", err);
+ }
+@@ -3215,6 +3301,7 @@ static int ionic_station_set(struct ionic_lif *lif)
+ .attr = IONIC_LIF_ATTR_MAC,
+ },
+ };
++ u8 mac_address[ETH_ALEN];
+ struct sockaddr addr;
+ int err;
+
+@@ -3223,8 +3310,23 @@ static int ionic_station_set(struct ionic_lif *lif)
+ return err;
+ netdev_dbg(lif->netdev, "found initial MAC addr %pM\n",
+ ctx.comp.lif_getattr.mac);
+- if (is_zero_ether_addr(ctx.comp.lif_getattr.mac))
+- return 0;
++ ether_addr_copy(mac_address, ctx.comp.lif_getattr.mac);
++
++ if (is_zero_ether_addr(mac_address)) {
++ eth_hw_addr_random(netdev);
++ netdev_dbg(netdev, "Random Mac generated: %pM\n", netdev->dev_addr);
++ ether_addr_copy(mac_address, netdev->dev_addr);
++
++ err = ionic_program_mac(lif, mac_address);
++ if (err < 0)
++ return err;
++
++ if (err > 0) {
++ netdev_dbg(netdev, "%s:SET/GET ATTR Mac are not same-due to old FW running\n",
++ __func__);
++ return 0;
++ }
++ }
+
+ if (!is_zero_ether_addr(netdev->dev_addr)) {
+ /* If the netdev mac is non-zero and doesn't match the default
+@@ -3232,12 +3334,11 @@ static int ionic_station_set(struct ionic_lif *lif)
+ * likely here again after a fw-upgrade reset. We need to be
+ * sure the netdev mac is in our filter list.
+ */
+- if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
+- netdev->dev_addr))
++ if (!ether_addr_equal(mac_address, netdev->dev_addr))
+ ionic_lif_addr_add(lif, netdev->dev_addr);
+ } else {
+ /* Update the netdev mac with the device's mac */
+- memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
++ ether_addr_copy(addr.sa_data, mac_address);
+ addr.sa_family = AF_INET;
+ err = eth_prepare_mac_addr_change(netdev, &addr);
+ if (err) {
+@@ -3283,7 +3384,7 @@ int ionic_lif_init(struct ionic_lif *lif)
+ return -EINVAL;
+ }
+
+- lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL);
++ lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
+ if (!lif->dbid_inuse) {
+ dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
+ return -ENOMEM;
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
+index 6f07bf509efed..538c024afed52 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
+@@ -328,10 +328,10 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+
+ static void ionic_dev_cmd_clean(struct ionic *ionic)
+ {
+- union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
++ struct ionic_dev *idev = &ionic->idev;
+
+- iowrite32(0, &regs->doorbell);
+- memset_io(&regs->cmd, 0, sizeof(regs->cmd));
++ iowrite32(0, &idev->dev_cmd_regs->doorbell);
++ memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
+ }
+
+ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+@@ -395,8 +395,8 @@ try_again:
+ ionic_opcode_to_str(opcode), opcode,
+ ionic_error_to_str(err), err);
+
+- msleep(1000);
+ iowrite32(0, &idev->dev_cmd_regs->done);
++ msleep(1000);
+ iowrite32(1, &idev->dev_cmd_regs->doorbell);
+ goto try_again;
+ }
+@@ -409,6 +409,8 @@ try_again:
+ return ionic_error_to_errno(err);
+ }
+
++ ionic_dev_cmd_clean(ionic);
++
+ return 0;
+ }
+
+@@ -488,6 +490,9 @@ int ionic_reset(struct ionic *ionic)
+ struct ionic_dev *idev = &ionic->idev;
+ int err;
+
++ if (!ionic_is_fw_running(idev))
++ return 0;
++
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_reset(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+@@ -560,15 +565,17 @@ int ionic_port_init(struct ionic *ionic)
+ int ionic_port_reset(struct ionic *ionic)
+ {
+ struct ionic_dev *idev = &ionic->idev;
+- int err;
++ int err = 0;
+
+ if (!idev->port_info)
+ return 0;
+
+- mutex_lock(&ionic->dev_cmd_lock);
+- ionic_dev_cmd_port_reset(idev);
+- err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+- mutex_unlock(&ionic->dev_cmd_lock);
++ if (ionic_is_fw_running(idev)) {
++ mutex_lock(&ionic->dev_cmd_lock);
++ ionic_dev_cmd_port_reset(idev);
++ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
++ mutex_unlock(&ionic->dev_cmd_lock);
++ }
+
+ dma_free_coherent(ionic->dev, idev->port_info_sz,
+ idev->port_info, idev->port_info_pa);
+@@ -576,16 +583,19 @@ int ionic_port_reset(struct ionic *ionic)
+ idev->port_info = NULL;
+ idev->port_info_pa = 0;
+
+- if (err)
+- dev_err(ionic->dev, "Failed to reset port\n");
+-
+ return err;
+ }
+
+ static int __init ionic_init_module(void)
+ {
++ int ret;
++
+ ionic_debugfs_create();
+- return ionic_bus_register_driver();
++ ret = ionic_bus_register_driver();
++ if (ret)
++ ionic_debugfs_destroy();
++
++ return ret;
+ }
+
+ static void __exit ionic_cleanup_module(void)
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+index 37c39581b6599..376f97b4008bb 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+@@ -353,16 +353,25 @@ void ionic_rx_fill(struct ionic_queue *q)
+ struct ionic_rxq_sg_desc *sg_desc;
+ struct ionic_rxq_sg_elem *sg_elem;
+ struct ionic_buf_info *buf_info;
++ unsigned int fill_threshold;
+ struct ionic_rxq_desc *desc;
+ unsigned int remain_len;
+ unsigned int frag_len;
+ unsigned int nfrags;
++ unsigned int n_fill;
+ unsigned int i, j;
+ unsigned int len;
+
++ n_fill = ionic_q_space_avail(q);
++
++ fill_threshold = min_t(unsigned int, IONIC_RX_FILL_THRESHOLD,
++ q->num_descs / IONIC_RX_FILL_DIV);
++ if (n_fill < fill_threshold)
++ return;
++
+ len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
+
+- for (i = ionic_q_space_avail(q); i; i--) {
++ for (i = n_fill; i; i--) {
+ nfrags = 0;
+ remain_len = len;
+ desc_info = &q->info[q->head_idx];
+@@ -518,7 +527,6 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
+ struct ionic_cq *cq = napi_to_cq(napi);
+ struct ionic_dev *idev;
+ struct ionic_lif *lif;
+- u16 rx_fill_threshold;
+ u32 work_done = 0;
+ u32 flags = 0;
+
+@@ -528,10 +536,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
+ work_done = ionic_cq_service(cq, budget,
+ ionic_rx_service, NULL, NULL);
+
+- rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD,
+- cq->num_descs / IONIC_RX_FILL_DIV);
+- if (work_done && ionic_q_space_avail(cq->bound_q) >= rx_fill_threshold)
+- ionic_rx_fill(cq->bound_q);
++ ionic_rx_fill(cq->bound_q);
+
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
+ ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR);
+@@ -559,7 +564,6 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
+ struct ionic_dev *idev;
+ struct ionic_lif *lif;
+ struct ionic_cq *txcq;
+- u16 rx_fill_threshold;
+ u32 rx_work_done = 0;
+ u32 tx_work_done = 0;
+ u32 flags = 0;
+@@ -574,10 +578,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
+ rx_work_done = ionic_cq_service(rxcq, budget,
+ ionic_rx_service, NULL, NULL);
+
+- rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD,
+- rxcq->num_descs / IONIC_RX_FILL_DIV);
+- if (rx_work_done && ionic_q_space_avail(rxcq->bound_q) >= rx_fill_threshold)
+- ionic_rx_fill(rxcq->bound_q);
++ ionic_rx_fill(rxcq->bound_q);
+
+ if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
+ ionic_dim_update(qcq, 0);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
+index d58e021614cd0..b656408b9d700 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed.h
++++ b/drivers/net/ethernet/qlogic/qed/qed.h
+@@ -877,12 +877,13 @@ u32 qed_get_hsi_def_val(struct qed_dev *cdev, enum qed_hsi_def_type type);
+
+
+ /**
+- * @brief qed_concrete_to_sw_fid - get the sw function id from
+- * the concrete value.
++ * qed_concrete_to_sw_fid(): Get the sw function id from
++ * the concrete value.
+ *
+- * @param concrete_fid
++ * @cdev: Qed dev pointer.
++ * @concrete_fid: Concrete fid.
+ *
+- * @return inline u8
++ * Return: inline u8.
+ */
+ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
+ u32 concrete_fid)
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+index 8adb7ed0c12db..d31196db7bdde 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+@@ -28,24 +28,23 @@ struct qed_tid_mem {
+ };
+
+ /**
+- * @brief qedo_cid_get_cxt_info - Returns the context info for a specific cid
++ * qed_cxt_get_cid_info(): Returns the context info for a specific cidi.
+ *
++ * @p_hwfn: HW device data.
++ * @p_info: In/out.
+ *
+- * @param p_hwfn
+- * @param p_info in/out
+- *
+- * @return int
++ * Return: Int.
+ */
+ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
+ struct qed_cxt_info *p_info);
+
+ /**
+- * @brief qed_cxt_get_tid_mem_info
++ * qed_cxt_get_tid_mem_info(): Returns the tid mem info.
+ *
+- * @param p_hwfn
+- * @param p_info
++ * @p_hwfn: HW device data.
++ * @p_info: in/out.
+ *
+- * @return int
++ * Return: int.
+ */
+ int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
+ struct qed_tid_mem *p_info);
+@@ -64,142 +63,155 @@ u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn,
+ enum protocol_type type, u32 *vf_cid);
+
+ /**
+- * @brief qed_cxt_set_pf_params - Set the PF params for cxt init
++ * qed_cxt_set_pf_params(): Set the PF params for cxt init.
++ *
++ * @p_hwfn: HW device data.
++ * @rdma_tasks: Requested maximum.
+ *
+- * @param p_hwfn
+- * @param rdma_tasks - requested maximum
+- * @return int
++ * Return: int.
+ */
+ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks);
+
+ /**
+- * @brief qed_cxt_cfg_ilt_compute - compute ILT init parameters
++ * qed_cxt_cfg_ilt_compute(): Compute ILT init parameters.
+ *
+- * @param p_hwfn
+- * @param last_line
++ * @p_hwfn: HW device data.
++ * @last_line: Last_line.
+ *
+- * @return int
++ * Return: Int
+ */
+ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *last_line);
+
+ /**
+- * @brief qed_cxt_cfg_ilt_compute_excess - how many lines can be decreased
++ * qed_cxt_cfg_ilt_compute_excess(): How many lines can be decreased.
++ *
++ * @p_hwfn: HW device data.
++ * @used_lines: Used lines.
+ *
+- * @param p_hwfn
+- * @param used_lines
++ * Return: Int.
+ */
+ u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines);
+
+ /**
+- * @brief qed_cxt_mngr_alloc - Allocate and init the context manager struct
++ * qed_cxt_mngr_alloc(): Allocate and init the context manager struct.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_cxt_mngr_free
++ * qed_cxt_mngr_free() - Context manager free.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ */
+ void qed_cxt_mngr_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_cxt_tables_alloc - Allocate ILT shadow, Searcher T2, acquired map
++ * qed_cxt_tables_alloc(): Allocate ILT shadow, Searcher T2, acquired map.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_cxt_mngr_setup - Reset the acquired CIDs
++ * qed_cxt_mngr_setup(): Reset the acquired CIDs.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ */
+ void qed_cxt_mngr_setup(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_cxt_hw_init_common - Initailze ILT and DQ, common phase, per path.
+- *
++ * qed_cxt_hw_init_common(): Initailze ILT and DQ, common phase, per path.
+ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_cxt_hw_init_common(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_cxt_hw_init_pf - Initailze ILT and DQ, PF phase, per path.
++ * qed_cxt_hw_init_pf(): Initailze ILT and DQ, PF phase, per path.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Void.
+ */
+ void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief qed_qm_init_pf - Initailze the QM PF phase, per path
++ * qed_qm_init_pf(): Initailze the QM PF phase, per path.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @is_pf_loading: Is pf pending.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param is_pf_loading
++ * Return: Void.
+ */
+ void qed_qm_init_pf(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, bool is_pf_loading);
+
+ /**
+- * @brief Reconfigures QM pf on the fly
++ * qed_qm_reconf(): Reconfigures QM pf on the fly.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ #define QED_CXT_PF_CID (0xff)
+
+ /**
+- * @brief qed_cxt_release - Release a cid
++ * qed_cxt_release_cid(): Release a cid.
+ *
+- * @param p_hwfn
+- * @param cid
++ * @p_hwfn: HW device data.
++ * @cid: Cid.
++ *
++ * Return: Void.
+ */
+ void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid);
+
+ /**
+- * @brief qed_cxt_release - Release a cid belonging to a vf-queue
++ * _qed_cxt_release_cid(): Release a cid belonging to a vf-queue.
++ *
++ * @p_hwfn: HW device data.
++ * @cid: Cid.
++ * @vfid: Engine relative index. QED_CXT_PF_CID if belongs to PF.
+ *
+- * @param p_hwfn
+- * @param cid
+- * @param vfid - engine relative index. QED_CXT_PF_CID if belongs to PF
++ * Return: Void.
+ */
+ void _qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid, u8 vfid);
+
+ /**
+- * @brief qed_cxt_acquire - Acquire a new cid of a specific protocol type
++ * qed_cxt_acquire_cid(): Acquire a new cid of a specific protocol type.
+ *
+- * @param p_hwfn
+- * @param type
+- * @param p_cid
++ * @p_hwfn: HW device data.
++ * @type: Type.
++ * @p_cid: Pointer cid.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
+ enum protocol_type type, u32 *p_cid);
+
+ /**
+- * @brief _qed_cxt_acquire - Acquire a new cid of a specific protocol type
+- * for a vf-queue
++ * _qed_cxt_acquire_cid(): Acquire a new cid of a specific protocol type
++ * for a vf-queue.
+ *
+- * @param p_hwfn
+- * @param type
+- * @param p_cid
+- * @param vfid - engine relative index. QED_CXT_PF_CID if belongs to PF
++ * @p_hwfn: HW device data.
++ * @type: Type.
++ * @p_cid: Pointer cid.
++ * @vfid: Engine relative index. QED_CXT_PF_CID if belongs to PF.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int _qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
+ enum protocol_type type, u32 *p_cid, u8 vfid);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
+index 6ab3e60d4928c..4b4077cf2d266 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
+@@ -1796,9 +1796,10 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
+ u8 split_id)
+ {
+ struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+- u8 port_id = 0, pf_id = 0, vf_id = 0, fid = 0;
++ u8 port_id = 0, pf_id = 0, vf_id = 0;
+ bool read_using_dmae = false;
+ u32 thresh;
++ u16 fid;
+
+ if (!dump)
+ return len;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
+index 0410c3604abdb..ba445724ee65e 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
+@@ -5022,6 +5022,11 @@ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
+
+ num_vports = p_hwfn->qm_info.num_vports;
+
++ if (num_vports < 2) {
++ DP_NOTICE(p_hwfn, "Unexpected num_vports: %d\n", num_vports);
++ return -EINVAL;
++ }
++
+ /* Accounting for the vports which are configured for WFQ explicitly */
+ for (i = 0; i < num_vports; i++) {
+ u32 tmp_speed;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+index d3c1f3879be87..a0a766a1723cc 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+@@ -15,44 +15,52 @@
+ #include "qed_int.h"
+
+ /**
+- * @brief qed_init_dp - initialize the debug level
++ * qed_init_dp(): Initialize the debug level.
+ *
+- * @param cdev
+- * @param dp_module
+- * @param dp_level
++ * @cdev: Qed dev pointer.
++ * @dp_module: Module debug parameter.
++ * @dp_level: Module debug level.
++ *
++ * Return: Void.
+ */
+ void qed_init_dp(struct qed_dev *cdev,
+ u32 dp_module,
+ u8 dp_level);
+
+ /**
+- * @brief qed_init_struct - initialize the device structure to
+- * its defaults
++ * qed_init_struct(): Initialize the device structure to
++ * its defaults.
++ *
++ * @cdev: Qed dev pointer.
+ *
+- * @param cdev
++ * Return: Void.
+ */
+ void qed_init_struct(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_resc_free -
++ * qed_resc_free: Free device resources.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
++ *
++ * Return: Void.
+ */
+ void qed_resc_free(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_resc_alloc -
++ * qed_resc_alloc(): Alloc device resources.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_resc_alloc(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_resc_setup -
++ * qed_resc_setup(): Setup device resources.
++ *
++ * @cdev: Qed dev pointer.
+ *
+- * @param cdev
++ * Return: Void.
+ */
+ void qed_resc_setup(struct qed_dev *cdev);
+
+@@ -105,94 +113,113 @@ struct qed_hw_init_params {
+ };
+
+ /**
+- * @brief qed_hw_init -
++ * qed_hw_init(): Init Qed hardware.
+ *
+- * @param cdev
+- * @param p_params
++ * @cdev: Qed dev pointer.
++ * @p_params: Pointers to params.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params);
+
+ /**
+- * @brief qed_hw_timers_stop_all - stop the timers HW block
++ * qed_hw_timers_stop_all(): Stop the timers HW block.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return void
++ * Return: void.
+ */
+ void qed_hw_timers_stop_all(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_hw_stop -
++ * qed_hw_stop(): Stop Qed hardware.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: int.
+ */
+ int qed_hw_stop(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_hw_stop_fastpath -should be called incase
+- * slowpath is still required for the device,
+- * but fastpath is not.
++ * qed_hw_stop_fastpath(): Should be called incase
++ * slowpath is still required for the device,
++ * but fastpath is not.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_hw_stop_fastpath(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_hw_start_fastpath -restart fastpath traffic,
+- * only if hw_stop_fastpath was called
++ * qed_hw_start_fastpath(): Restart fastpath traffic,
++ * only if hw_stop_fastpath was called.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_hw_start_fastpath(struct qed_hwfn *p_hwfn);
+
+
+ /**
+- * @brief qed_hw_prepare -
++ * qed_hw_prepare(): Prepare Qed hardware.
+ *
+- * @param cdev
+- * @param personality - personality to initialize
++ * @cdev: Qed dev pointer.
++ * @personality: Personality to initialize.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_hw_prepare(struct qed_dev *cdev,
+ int personality);
+
+ /**
+- * @brief qed_hw_remove -
++ * qed_hw_remove(): Remove Qed hardware.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
++ *
++ * Return: Void.
+ */
+ void qed_hw_remove(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_ptt_acquire - Allocate a PTT window
++ * qed_ptt_acquire(): Allocate a PTT window.
+ *
+- * Should be called at the entry point to the driver (at the beginning of an
+- * exported function)
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: struct qed_ptt.
+ *
+- * @return struct qed_ptt
++ * Should be called at the entry point to the driver (at the beginning of an
++ * exported function).
+ */
+ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_ptt_release - Release PTT Window
++ * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
++ * atomicy.
+ *
+- * Should be called at the end of a flow - at the end of the function that
+- * acquired the PTT.
++ * @p_hwfn: HW device data.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
++ *
++ * Context: The function should not sleep in case is_atomic == true.
++ * Return: struct qed_ptt.
++ *
++ * Should be called at the entry point to the driver
++ * (at the beginning of an exported function).
++ */
++struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
++ bool is_atomic);
++
++/**
++ * qed_ptt_release(): Release PTT Window.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
++ * Return: Void.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * Should be called at the end of a flow - at the end of the function that
++ * acquired the PTT.
+ */
+ void qed_ptt_release(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+@@ -205,15 +232,17 @@ enum qed_dmae_address_type_t {
+ };
+
+ /**
+- * @brief qed_dmae_host2grc - copy data from source addr to
+- * dmae registers using the given ptt
++ * qed_dmae_host2grc(): Copy data from source addr to
++ * dmae registers using the given ptt.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param source_addr
+- * @param grc_addr (dmae_data_offset)
+- * @param size_in_dwords
+- * @param p_params (default parameters will be used in case of NULL)
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @source_addr: Source address.
++ * @grc_addr: GRC address (dmae_data_offset).
++ * @size_in_dwords: Size.
++ * @p_params: (default parameters will be used in case of NULL).
++ *
++ * Return: Int.
+ */
+ int
+ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
+@@ -224,29 +253,34 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
+ struct qed_dmae_params *p_params);
+
+ /**
+- * @brief qed_dmae_grc2host - Read data from dmae data offset
+- * to source address using the given ptt
++ * qed_dmae_grc2host(): Read data from dmae data offset
++ * to source address using the given ptt.
++ *
++ * @p_ptt: P_ptt.
++ * @grc_addr: GRC address (dmae_data_offset).
++ * @dest_addr: Destination Address.
++ * @size_in_dwords: Size.
++ * @p_params: (default parameters will be used in case of NULL).
+ *
+- * @param p_ptt
+- * @param grc_addr (dmae_data_offset)
+- * @param dest_addr
+- * @param size_in_dwords
+- * @param p_params (default parameters will be used in case of NULL)
++ * Return: Int.
+ */
+ int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords,
+ struct qed_dmae_params *p_params);
+
+ /**
+- * @brief qed_dmae_host2host - copy data from to source address
+- * to a destination adress (for SRIOV) using the given ptt
++ * qed_dmae_host2host(): Copy data from to source address
++ * to a destination adrress (for SRIOV) using the given
++ * ptt.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @source_addr: Source address.
++ * @dest_addr: Destination address.
++ * @size_in_dwords: size.
++ * @p_params: (default parameters will be used in case of NULL).
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param source_addr
+- * @param dest_addr
+- * @param size_in_dwords
+- * @param p_params (default parameters will be used in case of NULL)
++ * Return: Int.
+ */
+ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -259,51 +293,51 @@ int qed_chain_alloc(struct qed_dev *cdev, struct qed_chain *chain,
+ void qed_chain_free(struct qed_dev *cdev, struct qed_chain *chain);
+
+ /**
+- * @@brief qed_fw_l2_queue - Get absolute L2 queue ID
++ * qed_fw_l2_queue(): Get absolute L2 queue ID.
+ *
+- * @param p_hwfn
+- * @param src_id - relative to p_hwfn
+- * @param dst_id - absolute per engine
++ * @p_hwfn: HW device data.
++ * @src_id: Relative to p_hwfn.
++ * @dst_id: Absolute per engine.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_fw_l2_queue(struct qed_hwfn *p_hwfn,
+ u16 src_id,
+ u16 *dst_id);
+
+ /**
+- * @@brief qed_fw_vport - Get absolute vport ID
++ * qed_fw_vport(): Get absolute vport ID.
+ *
+- * @param p_hwfn
+- * @param src_id - relative to p_hwfn
+- * @param dst_id - absolute per engine
++ * @p_hwfn: HW device data.
++ * @src_id: Relative to p_hwfn.
++ * @dst_id: Absolute per engine.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_fw_vport(struct qed_hwfn *p_hwfn,
+ u8 src_id,
+ u8 *dst_id);
+
+ /**
+- * @@brief qed_fw_rss_eng - Get absolute RSS engine ID
++ * qed_fw_rss_eng(): Get absolute RSS engine ID.
+ *
+- * @param p_hwfn
+- * @param src_id - relative to p_hwfn
+- * @param dst_id - absolute per engine
++ * @p_hwfn: HW device data.
++ * @src_id: Relative to p_hwfn.
++ * @dst_id: Absolute per engine.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
+ u8 src_id,
+ u8 *dst_id);
+
+ /**
+- * @brief qed_llh_get_num_ppfid - Return the allocated number of LLH filter
+- * banks that are allocated to the PF.
++ * qed_llh_get_num_ppfid(): Return the allocated number of LLH filter
++ * banks that are allocated to the PF.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return u8 - Number of LLH filter banks
++ * Return: u8 Number of LLH filter banks.
+ */
+ u8 qed_llh_get_num_ppfid(struct qed_dev *cdev);
+
+@@ -314,45 +348,50 @@ enum qed_eng {
+ };
+
+ /**
+- * @brief qed_llh_set_ppfid_affinity - Set the engine affinity for the given
+- * LLH filter bank.
++ * qed_llh_set_ppfid_affinity(): Set the engine affinity for the given
++ * LLH filter bank.
+ *
+- * @param cdev
+- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+- * @param eng
++ * @cdev: Qed dev pointer.
++ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
++ * @eng: Engine.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_llh_set_ppfid_affinity(struct qed_dev *cdev,
+ u8 ppfid, enum qed_eng eng);
+
+ /**
+- * @brief qed_llh_set_roce_affinity - Set the RoCE engine affinity
++ * qed_llh_set_roce_affinity(): Set the RoCE engine affinity.
+ *
+- * @param cdev
+- * @param eng
++ * @cdev: Qed dev pointer.
++ * @eng: Engine.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng);
+
+ /**
+- * @brief qed_llh_add_mac_filter - Add a LLH MAC filter into the given filter
+- * bank.
++ * qed_llh_add_mac_filter(): Add a LLH MAC filter into the given filter
++ * bank.
+ *
+- * @param cdev
+- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+- * @param mac_addr - MAC to add
++ * @cdev: Qed dev pointer.
++ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
++ * @mac_addr: MAC to add.
++ *
++ * Return: Int.
+ */
+ int qed_llh_add_mac_filter(struct qed_dev *cdev,
+ u8 ppfid, u8 mac_addr[ETH_ALEN]);
+
+ /**
+- * @brief qed_llh_remove_mac_filter - Remove a LLH MAC filter from the given
+- * filter bank.
++ * qed_llh_remove_mac_filter(): Remove a LLH MAC filter from the given
++ * filter bank.
++ *
++ * @cdev: Qed dev pointer.
++ * @ppfid: Ppfid.
++ * @mac_addr: MAC to remove
+ *
+- * @param p_ptt
+- * @param p_filter - MAC to remove
++ * Return: Void.
+ */
+ void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+ u8 ppfid, u8 mac_addr[ETH_ALEN]);
+@@ -368,15 +407,16 @@ enum qed_llh_prot_filter_type_t {
+ };
+
+ /**
+- * @brief qed_llh_add_protocol_filter - Add a LLH protocol filter into the
+- * given filter bank.
++ * qed_llh_add_protocol_filter(): Add a LLH protocol filter into the
++ * given filter bank.
++ *
++ * @cdev: Qed dev pointer.
++ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
++ * @type: Type of filters and comparing.
++ * @source_port_or_eth_type: Source port or ethertype to add.
++ * @dest_port: Destination port to add.
+ *
+- * @param cdev
+- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+- * @param type - type of filters and comparing
+- * @param source_port_or_eth_type - source port or ethertype to add
+- * @param dest_port - destination port to add
+- * @param type - type of filters and comparing
++ * Return: Int.
+ */
+ int
+ qed_llh_add_protocol_filter(struct qed_dev *cdev,
+@@ -385,14 +425,14 @@ qed_llh_add_protocol_filter(struct qed_dev *cdev,
+ u16 source_port_or_eth_type, u16 dest_port);
+
+ /**
+- * @brief qed_llh_remove_protocol_filter - Remove a LLH protocol filter from
+- * the given filter bank.
++ * qed_llh_remove_protocol_filter(): Remove a LLH protocol filter from
++ * the given filter bank.
+ *
+- * @param cdev
+- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+- * @param type - type of filters and comparing
+- * @param source_port_or_eth_type - source port or ethertype to add
+- * @param dest_port - destination port to add
++ * @cdev: Qed dev pointer.
++ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
++ * @type: Type of filters and comparing.
++ * @source_port_or_eth_type: Source port or ethertype to add.
++ * @dest_port: Destination port to add.
+ */
+ void
+ qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+@@ -401,31 +441,31 @@ qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+ u16 source_port_or_eth_type, u16 dest_port);
+
+ /**
+- * *@brief Cleanup of previous driver remains prior to load
++ * qed_final_cleanup(): Cleanup of previous driver remains prior to load.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param id - For PF, engine-relative. For VF, PF-relative.
+- * @param is_vf - true iff cleanup is made for a VF.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @id: For PF, engine-relative. For VF, PF-relative.
++ * @is_vf: True iff cleanup is made for a VF.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_final_cleanup(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u16 id, bool is_vf);
+
+ /**
+- * @brief qed_get_queue_coalesce - Retrieve coalesce value for a given queue.
++ * qed_get_queue_coalesce(): Retrieve coalesce value for a given queue.
+ *
+- * @param p_hwfn
+- * @param p_coal - store coalesce value read from the hardware.
+- * @param p_handle
++ * @p_hwfn: HW device data.
++ * @coal: Store coalesce value read from the hardware.
++ * @handle: P_handle.
+ *
+- * @return int
++ * Return: Int.
+ **/
+ int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
+
+ /**
+- * @brief qed_set_queue_coalesce - Configure coalesce parameters for Rx and
++ * qed_set_queue_coalesce(): Configure coalesce parameters for Rx and
+ * Tx queue. The fact that we can configure coalescing to up to 511, but on
+ * varying accuracy [the bigger the value the less accurate] up to a mistake
+ * of 3usec for the highest values.
+@@ -433,37 +473,38 @@ int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
+ * should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
+ * otherwise configuration would break.
+ *
++ * @rx_coal: Rx Coalesce value in micro seconds.
++ * @tx_coal: TX Coalesce value in micro seconds.
++ * @p_handle: P_handle.
+ *
+- * @param rx_coal - Rx Coalesce value in micro seconds.
+- * @param tx_coal - TX Coalesce value in micro seconds.
+- * @param p_handle
+- *
+- * @return int
++ * Return: Int.
+ **/
+ int
+ qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle);
+
+ /**
+- * @brief qed_pglueb_set_pfid_enable - Enable or disable PCI BUS MASTER
++ * qed_pglueb_set_pfid_enable(): Enable or disable PCI BUS MASTER.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param b_enable - true/false
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @b_enable: True/False.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, bool b_enable);
+
+ /**
+- * @brief db_recovery_add - add doorbell information to the doorbell
+- * recovery mechanism.
++ * qed_db_recovery_add(): add doorbell information to the doorbell
++ * recovery mechanism.
++ *
++ * @cdev: Qed dev pointer.
++ * @db_addr: Doorbell address.
++ * @db_data: Address of where db_data is stored.
++ * @db_width: Doorbell is 32b pr 64b.
++ * @db_space: Doorbell recovery addresses are user or kernel space.
+ *
+- * @param cdev
+- * @param db_addr - doorbell address
+- * @param db_data - address of where db_data is stored
+- * @param db_width - doorbell is 32b pr 64b
+- * @param db_space - doorbell recovery addresses are user or kernel space
++ * Return: Int.
+ */
+ int qed_db_recovery_add(struct qed_dev *cdev,
+ void __iomem *db_addr,
+@@ -472,13 +513,15 @@ int qed_db_recovery_add(struct qed_dev *cdev,
+ enum qed_db_rec_space db_space);
+
+ /**
+- * @brief db_recovery_del - remove doorbell information from the doorbell
++ * qed_db_recovery_del() - remove doorbell information from the doorbell
+ * recovery mechanism. db_data serves as key (db_addr is not unique).
+ *
+- * @param cdev
+- * @param db_addr - doorbell address
+- * @param db_data - address where db_data is stored. Serves as key for the
++ * @cdev: Qed dev pointer.
++ * @db_addr: doorbell address.
++ * @db_data: address where db_data is stored. Serves as key for the
+ * entry to delete.
++ *
++ * Return: Int.
+ */
+ int qed_db_recovery_del(struct qed_dev *cdev,
+ void __iomem *db_addr, void *db_data);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+index b768f0698170e..0c55249b3a358 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+@@ -694,13 +694,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
+ }
+
+ static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
+- struct qed_fcoe_stats *p_stats)
++ struct qed_fcoe_stats *p_stats,
++ bool is_atomic)
+ {
+ struct qed_ptt *p_ptt;
+
+ memset(p_stats, 0, sizeof(*p_stats));
+
+- p_ptt = qed_ptt_acquire(p_hwfn);
++ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
+
+ if (!p_ptt) {
+ DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+@@ -974,19 +975,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
+ QED_SPQ_MODE_EBLOCK, NULL);
+ }
+
++static int qed_fcoe_stats_context(struct qed_dev *cdev,
++ struct qed_fcoe_stats *stats,
++ bool is_atomic)
++{
++ return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
++}
++
+ static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
+ {
+- return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
++ return qed_fcoe_stats_context(cdev, stats, false);
+ }
+
+ void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
+- struct qed_mcp_fcoe_stats *stats)
++ struct qed_mcp_fcoe_stats *stats,
++ bool is_atomic)
+ {
+ struct qed_fcoe_stats proto_stats;
+
+ /* Retrieve FW statistics */
+ memset(&proto_stats, 0, sizeof(proto_stats));
+- if (qed_fcoe_stats(cdev, &proto_stats)) {
++ if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
+ DP_VERBOSE(cdev, QED_MSG_STORAGE,
+ "Failed to collect FCoE statistics\n");
+ return;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+index 19c85adf4ceb1..214e8299ecb4e 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+@@ -28,8 +28,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
+ void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
+
+ void qed_fcoe_free(struct qed_hwfn *p_hwfn);
++/**
++ * qed_get_protocol_stats_fcoe(): Fills provided statistics
++ * struct with statistics.
++ *
++ * @cdev: Qed dev pointer.
++ * @stats: Points to struct that will be filled with statistics.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
++ *
++ * Context: The function should not sleep in case is_atomic == true.
++ * Return: Void.
++ */
+ void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
+- struct qed_mcp_fcoe_stats *stats);
++ struct qed_mcp_fcoe_stats *stats,
++ bool is_atomic);
+ #else /* CONFIG_QED_FCOE */
+ static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
+ {
+@@ -40,7 +52,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
+ static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
+
+ static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
+- struct qed_mcp_fcoe_stats *stats)
++ struct qed_mcp_fcoe_stats *stats,
++ bool is_atomic)
+ {
+ }
+ #endif /* CONFIG_QED_FCOE */
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+index fb1baa2da2d0d..744c82a108754 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+@@ -3012,96 +3012,102 @@ struct iro {
+ /***************************** Public Functions *******************************/
+
+ /**
+- * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug
+- * arrays.
++ * qed_dbg_set_bin_ptr(): Sets a pointer to the binary data with debug
++ * arrays.
+ *
+- * @param p_hwfn - HW device data
+- * @param bin_ptr - a pointer to the binary data with debug arrays.
++ * @p_hwfn: HW device data.
++ * @bin_ptr: A pointer to the binary data with debug arrays.
++ *
++ * Return: enum dbg status.
+ */
+ enum dbg_status qed_dbg_set_bin_ptr(struct qed_hwfn *p_hwfn,
+ const u8 * const bin_ptr);
+
+ /**
+- * @brief qed_read_regs - Reads registers into a buffer (using GRC).
++ * qed_read_regs(): Reads registers into a buffer (using GRC).
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf: Destination buffer.
++ * @addr: Source GRC address in dwords.
++ * @len: Number of registers to read.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf - Destination buffer.
+- * @param addr - Source GRC address in dwords.
+- * @param len - Number of registers to read.
++ * Return: Void.
+ */
+ void qed_read_regs(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *buf, u32 addr, u32 len);
+
+ /**
+- * @brief qed_read_fw_info - Reads FW info from the chip.
++ * qed_read_fw_info(): Reads FW info from the chip.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @fw_info: (Out) a pointer to write the FW info into.
++ *
++ * Return: True if the FW info was read successfully from one of the Storms,
++ * or false if all Storms are in reset.
+ *
+ * The FW info contains FW-related information, such as the FW version,
+ * FW image (main/L2B/kuku), FW timestamp, etc.
+ * The FW info is read from the internal RAM of the first Storm that is not in
+ * reset.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param fw_info - Out: a pointer to write the FW info into.
+- *
+- * @return true if the FW info was read successfully from one of the Storms,
+- * or false if all Storms are in reset.
+ */
+ bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, struct fw_info *fw_info);
+ /**
+- * @brief qed_dbg_grc_config - Sets the value of a GRC parameter.
++ * qed_dbg_grc_config(): Sets the value of a GRC parameter.
+ *
+- * @param p_hwfn - HW device data
+- * @param grc_param - GRC parameter
+- * @param val - Value to set.
++ * @p_hwfn: HW device data.
++ * @grc_param: GRC parameter.
++ * @val: Value to set.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - grc_param is invalid
+- * - val is outside the allowed boundaries
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - Grc_param is invalid.
++ * - Val is outside the allowed boundaries.
+ */
+ enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
+ enum dbg_grc_params grc_param, u32 val);
+
+ /**
+- * @brief qed_dbg_grc_set_params_default - Reverts all GRC parameters to their
+- * default value.
++ * qed_dbg_grc_set_params_default(): Reverts all GRC parameters to their
++ * default value.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn - HW device data
++ * Return: Void.
+ */
+ void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn);
+ /**
+- * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for
+- * GRC Dump.
++ * qed_dbg_grc_get_dump_buf_size(): Returns the required buffer size for
++ * GRC Dump.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf_size - OUT: required buffer size (in dwords) for the GRC Dump
+- * data.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf_size: (OUT) required buffer size (in dwords) for the GRC Dump
++ * data.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *buf_size);
+
+ /**
+- * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param dump_buf - Pointer to write the collected GRC data into.
+- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+- * @param num_dumped_dwords - OUT: number of dumped dwords.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the specified dump buffer is too small
+- * Otherwise, returns ok.
++ * qed_dbg_grc_dump(): Dumps GRC data into the specified buffer.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dump_buf: Pointer to write the collected GRC data into.
++ * @buf_size_in_dwords:Size of the specified buffer in dwords.
++ * @num_dumped_dwords: (OUT) number of dumped dwords.
++ *
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - The specified dump buffer is too small.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3110,36 +3116,36 @@ enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
+ u32 *num_dumped_dwords);
+
+ /**
+- * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size
+- * for idle check results.
++ * qed_dbg_idle_chk_get_dump_buf_size(): Returns the required buffer size
++ * for idle check results.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf_size - OUT: required buffer size (in dwords) for the idle check
+- * data.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf_size: (OUT) required buffer size (in dwords) for the idle check
++ * data.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * return: Error if one of the following holds:
++ * - The version wasn't set.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *buf_size);
+
+ /**
+- * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results
+- * into the specified buffer.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param dump_buf - Pointer to write the idle check data into.
+- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+- * @param num_dumped_dwords - OUT: number of dumped dwords.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the specified buffer is too small
+- * Otherwise, returns ok.
++ * qed_dbg_idle_chk_dump: Performs idle check and writes the results
++ * into the specified buffer.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dump_buf: Pointer to write the idle check data into.
++ * @buf_size_in_dwords: Size of the specified buffer in dwords.
++ * @num_dumped_dwords: (OUT) number of dumped dwords.
++ *
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - The specified buffer is too small.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3148,42 +3154,42 @@ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
+ u32 *num_dumped_dwords);
+
+ /**
+- * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size
+- * for mcp trace results.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf_size - OUT: required buffer size (in dwords) for mcp trace data.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the trace data in MCP scratchpad contain an invalid signature
+- * - the bundle ID in NVRAM is invalid
+- * - the trace meta data cannot be found (in NVRAM or image file)
+- * Otherwise, returns ok.
++ * qed_dbg_mcp_trace_get_dump_buf_size(): Returns the required buffer size
++ * for mcp trace results.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf_size: (OUT) Required buffer size (in dwords) for mcp trace data.
++ *
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - The trace data in MCP scratchpad contain an invalid signature.
++ * - The bundle ID in NVRAM is invalid.
++ * - The trace meta data cannot be found (in NVRAM or image file).
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *buf_size);
+
+ /**
+- * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results
+- * into the specified buffer.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param dump_buf - Pointer to write the mcp trace data into.
+- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+- * @param num_dumped_dwords - OUT: number of dumped dwords.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the specified buffer is too small
+- * - the trace data in MCP scratchpad contain an invalid signature
+- * - the bundle ID in NVRAM is invalid
+- * - the trace meta data cannot be found (in NVRAM or image file)
+- * - the trace meta data cannot be read (from NVRAM or image file)
+- * Otherwise, returns ok.
++ * qed_dbg_mcp_trace_dump(): Performs mcp trace and writes the results
++ * into the specified buffer.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dump_buf: Pointer to write the mcp trace data into.
++ * @buf_size_in_dwords: Size of the specified buffer in dwords.
++ * @num_dumped_dwords: (OUT) number of dumped dwords.
++ *
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - The specified buffer is too small.
++ * - The trace data in MCP scratchpad contain an invalid signature.
++ * - The bundle ID in NVRAM is invalid.
++ * - The trace meta data cannot be found (in NVRAM or image file).
++ * - The trace meta data cannot be read (from NVRAM or image file).
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3192,36 +3198,36 @@ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+ u32 *num_dumped_dwords);
+
+ /**
+- * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size
+- * for grc trace fifo results.
++ * qed_dbg_reg_fifo_get_dump_buf_size(): Returns the required buffer size
++ * for grc trace fifo results.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf_size - OUT: required buffer size (in dwords) for reg fifo data.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf_size: (OUT) Required buffer size (in dwords) for reg fifo data.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *buf_size);
+
+ /**
+- * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into
+- * the specified buffer.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param dump_buf - Pointer to write the reg fifo data into.
+- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+- * @param num_dumped_dwords - OUT: number of dumped dwords.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the specified buffer is too small
+- * - DMAE transaction failed
+- * Otherwise, returns ok.
++ * qed_dbg_reg_fifo_dump(): Reads the reg fifo and writes the results into
++ * the specified buffer.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dump_buf: Pointer to write the reg fifo data into.
++ * @buf_size_in_dwords: Size of the specified buffer in dwords.
++ * @num_dumped_dwords: (OUT) number of dumped dwords.
++ *
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - The specified buffer is too small.
++ * - DMAE transaction failed.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3230,37 +3236,37 @@ enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+ u32 *num_dumped_dwords);
+
+ /**
+- * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size
+- * for the IGU fifo results.
++ * qed_dbg_igu_fifo_get_dump_buf_size(): Returns the required buffer size
++ * for the IGU fifo results.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf_size - OUT: required buffer size (in dwords) for the IGU fifo
+- * data.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf_size: (OUT) Required buffer size (in dwords) for the IGU fifo
++ * data.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *buf_size);
+
+ /**
+- * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into
+- * the specified buffer.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param dump_buf - Pointer to write the IGU fifo data into.
+- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+- * @param num_dumped_dwords - OUT: number of dumped dwords.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the specified buffer is too small
+- * - DMAE transaction failed
+- * Otherwise, returns ok.
++ * qed_dbg_igu_fifo_dump(): Reads the IGU fifo and writes the results into
++ * the specified buffer.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dump_buf: Pointer to write the IGU fifo data into.
++ * @buf_size_in_dwords: Size of the specified buffer in dwords.
++ * @num_dumped_dwords: (OUT) number of dumped dwords.
++ *
++ * Return: Error if one of the following holds:
++ * - The version wasn't set
++ * - The specified buffer is too small
++ * - DMAE transaction failed
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3269,37 +3275,37 @@ enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+ u32 *num_dumped_dwords);
+
+ /**
+- * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required
+- * buffer size for protection override window results.
++ * qed_dbg_protection_override_get_dump_buf_size(): Returns the required
++ * buffer size for protection override window results.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf_size - OUT: required buffer size (in dwords) for protection
+- * override data.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf_size: (OUT) Required buffer size (in dwords) for protection
++ * override data.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set
++ * Otherwise, returns ok.
+ */
+ enum dbg_status
+ qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *buf_size);
+ /**
+- * @brief qed_dbg_protection_override_dump - Reads protection override window
+- * entries and writes the results into the specified buffer.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param dump_buf - Pointer to write the protection override data into.
+- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+- * @param num_dumped_dwords - OUT: number of dumped dwords.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the specified buffer is too small
+- * - DMAE transaction failed
+- * Otherwise, returns ok.
++ * qed_dbg_protection_override_dump(): Reads protection override window
++ * entries and writes the results into the specified buffer.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dump_buf: Pointer to write the protection override data into.
++ * @buf_size_in_dwords: Size of the specified buffer in dwords.
++ * @num_dumped_dwords: (OUT) number of dumped dwords.
++ *
++ * @return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - The specified buffer is too small.
++ * - DMAE transaction failed.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3307,34 +3313,34 @@ enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
+ u32 buf_size_in_dwords,
+ u32 *num_dumped_dwords);
+ /**
+- * @brief qed_dbg_fw_asserts_get_dump_buf_size - Returns the required buffer
+- * size for FW Asserts results.
++ * qed_dbg_fw_asserts_get_dump_buf_size(): Returns the required buffer
++ * size for FW Asserts results.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param buf_size - OUT: required buffer size (in dwords) for FW Asserts data.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @buf_size: (OUT) Required buffer size (in dwords) for FW Asserts data.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *buf_size);
+ /**
+- * @brief qed_dbg_fw_asserts_dump - Reads the FW Asserts and writes the results
+- * into the specified buffer.
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param dump_buf - Pointer to write the FW Asserts data into.
+- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+- * @param num_dumped_dwords - OUT: number of dumped dwords.
+- *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * - the specified buffer is too small
+- * Otherwise, returns ok.
++ * qed_dbg_fw_asserts_dump(): Reads the FW Asserts and writes the results
++ * into the specified buffer.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dump_buf: Pointer to write the FW Asserts data into.
++ * @buf_size_in_dwords: Size of the specified buffer in dwords.
++ * @num_dumped_dwords: (OUT) number of dumped dwords.
++ *
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * - The specified buffer is too small.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3343,19 +3349,19 @@ enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+ u32 *num_dumped_dwords);
+
+ /**
+- * @brief qed_dbg_read_attn - Reads the attention registers of the specified
++ * qed_dbg_read_attn(): Reads the attention registers of the specified
+ * block and type, and writes the results into the specified buffer.
+ *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - Ptt window used for writing the registers.
+- * @param block - Block ID.
+- * @param attn_type - Attention type.
+- * @param clear_status - Indicates if the attention status should be cleared.
+- * @param results - OUT: Pointer to write the read results into
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @block: Block ID.
++ * @attn_type: Attention type.
++ * @clear_status: Indicates if the attention status should be cleared.
++ * @results: (OUT) Pointer to write the read results into.
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3365,15 +3371,15 @@ enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
+ struct dbg_attn_block_result *results);
+
+ /**
+- * @brief qed_dbg_print_attn - Prints attention registers values in the
+- * specified results struct.
++ * qed_dbg_print_attn(): Prints attention registers values in the
++ * specified results struct.
+ *
+- * @param p_hwfn
+- * @param results - Pointer to the attention read results
++ * @p_hwfn: HW device data.
++ * @results: Pointer to the attention read results
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
+ struct dbg_attn_block_result *results);
+@@ -3420,60 +3426,64 @@ struct dbg_tools_user_data {
+ /***************************** Public Functions *******************************/
+
+ /**
+- * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with
+- * debug arrays.
++ * qed_dbg_user_set_bin_ptr(): Sets a pointer to the binary data with
++ * debug arrays.
+ *
+- * @param p_hwfn - HW device data
+- * @param bin_ptr - a pointer to the binary data with debug arrays.
++ * @p_hwfn: HW device data.
++ * @bin_ptr: a pointer to the binary data with debug arrays.
++ *
++ * Return: dbg_status.
+ */
+ enum dbg_status qed_dbg_user_set_bin_ptr(struct qed_hwfn *p_hwfn,
+ const u8 * const bin_ptr);
+
+ /**
+- * @brief qed_dbg_alloc_user_data - Allocates user debug data.
++ * qed_dbg_alloc_user_data(): Allocates user debug data.
++ *
++ * @p_hwfn: HW device data.
++ * @user_data_ptr: (OUT) a pointer to the allocated memory.
+ *
+- * @param p_hwfn - HW device data
+- * @param user_data_ptr - OUT: a pointer to the allocated memory.
++ * Return: dbg_status.
+ */
+ enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn,
+ void **user_data_ptr);
+
+ /**
+- * @brief qed_dbg_get_status_str - Returns a string for the specified status.
++ * qed_dbg_get_status_str(): Returns a string for the specified status.
+ *
+- * @param status - a debug status code.
++ * @status: A debug status code.
+ *
+- * @return a string for the specified status
++ * Return: A string for the specified status.
+ */
+ const char *qed_dbg_get_status_str(enum dbg_status status);
+
+ /**
+- * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size
+- * for idle check results (in bytes).
++ * qed_get_idle_chk_results_buf_size(): Returns the required buffer size
++ * for idle check results (in bytes).
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - idle check dump buffer.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+- * results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: idle check dump buffer.
++ * @num_dumped_dwords: number of dwords that were dumped.
++ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
++ * results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+ u32 num_dumped_dwords,
+ u32 *results_buf_size);
+ /**
+- * @brief qed_print_idle_chk_results - Prints idle check results
++ * qed_print_idle_chk_results(): Prints idle check results
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - idle check dump buffer.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf - buffer for printing the idle check results.
+- * @param num_errors - OUT: number of errors found in idle check.
+- * @param num_warnings - OUT: number of warnings found in idle check.
++ * @p_hwfn: HW device data.
++ * @dump_buf: idle check dump buffer.
++ * @num_dumped_dwords: number of dwords that were dumped.
++ * @results_buf: buffer for printing the idle check results.
++ * @num_errors: (OUT) number of errors found in idle check.
++ * @num_warnings: (OUT) number of warnings found in idle check.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3483,28 +3493,30 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
+ u32 *num_warnings);
+
+ /**
+- * @brief qed_dbg_mcp_trace_set_meta_data - Sets the MCP Trace meta data.
++ * qed_dbg_mcp_trace_set_meta_data(): Sets the MCP Trace meta data.
++ *
++ * @p_hwfn: HW device data.
++ * @meta_buf: Meta buffer.
++ *
++ * Return: Void.
+ *
+ * Needed in case the MCP Trace dump doesn't contain the meta data (e.g. due to
+ * no NVRAM access).
+- *
+- * @param data - pointer to MCP Trace meta data
+- * @param size - size of MCP Trace meta data in dwords
+ */
+ void qed_dbg_mcp_trace_set_meta_data(struct qed_hwfn *p_hwfn,
+ const u32 *meta_buf);
+
+ /**
+- * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size
+- * for MCP Trace results (in bytes).
++ * qed_get_mcp_trace_results_buf_size(): Returns the required buffer size
++ * for MCP Trace results (in bytes).
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - MCP Trace dump buffer.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+- * results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: MCP Trace dump buffer.
++ * @num_dumped_dwords: number of dwords that were dumped.
++ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
++ * results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Rrror if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3512,14 +3524,14 @@ enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *results_buf_size);
+
+ /**
+- * @brief qed_print_mcp_trace_results - Prints MCP Trace results
++ * qed_print_mcp_trace_results(): Prints MCP Trace results
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - mcp trace dump buffer, starting from the header.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf - buffer for printing the mcp trace results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: MCP trace dump buffer, starting from the header.
++ * @num_dumped_dwords: Member of dwords that were dumped.
++ * @results_buf: Buffer for printing the mcp trace results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3527,30 +3539,30 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
+ char *results_buf);
+
+ /**
+- * @brief qed_print_mcp_trace_results_cont - Prints MCP Trace results, and
++ * qed_print_mcp_trace_results_cont(): Prints MCP Trace results, and
+ * keeps the MCP trace meta data allocated, to support continuous MCP Trace
+ * parsing. After the continuous parsing ends, mcp_trace_free_meta_data should
+ * be called to free the meta data.
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - mcp trace dump buffer, starting from the header.
+- * @param results_buf - buffer for printing the mcp trace results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: MVP trace dump buffer, starting from the header.
++ * @results_buf: Buffer for printing the mcp trace results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+ char *results_buf);
+
+ /**
+- * @brief print_mcp_trace_line - Prints MCP Trace results for a single line
++ * qed_print_mcp_trace_line(): Prints MCP Trace results for a single line
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - mcp trace dump buffer, starting from the header.
+- * @param num_dumped_bytes - number of bytes that were dumped.
+- * @param results_buf - buffer for printing the mcp trace results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: MCP trace dump buffer, starting from the header.
++ * @num_dumped_bytes: Number of bytes that were dumped.
++ * @results_buf: Buffer for printing the mcp trace results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
+ u8 *dump_buf,
+@@ -3558,24 +3570,26 @@ enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
+ char *results_buf);
+
+ /**
+- * @brief mcp_trace_free_meta_data - Frees the MCP Trace meta data.
++ * qed_mcp_trace_free_meta_data(): Frees the MCP Trace meta data.
+ * Should be called after continuous MCP Trace parsing.
+ *
+- * @param p_hwfn - HW device data
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ */
+ void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
+- * for reg_fifo results (in bytes).
++ * qed_get_reg_fifo_results_buf_size(): Returns the required buffer size
++ * for reg_fifo results (in bytes).
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - reg fifo dump buffer.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+- * results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: Reg fifo dump buffer.
++ * @num_dumped_dwords: Number of dwords that were dumped.
++ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
++ * results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3583,14 +3597,14 @@ enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *results_buf_size);
+
+ /**
+- * @brief qed_print_reg_fifo_results - Prints reg fifo results
++ * qed_print_reg_fifo_results(): Prints reg fifo results.
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - reg fifo dump buffer, starting from the header.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf - buffer for printing the reg fifo results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: Reg fifo dump buffer, starting from the header.
++ * @num_dumped_dwords: Number of dwords that were dumped.
++ * @results_buf: Buffer for printing the reg fifo results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3598,16 +3612,16 @@ enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
+ char *results_buf);
+
+ /**
+- * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size
+- * for igu_fifo results (in bytes).
++ * qed_get_igu_fifo_results_buf_size(): Returns the required buffer size
++ * for igu_fifo results (in bytes).
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - IGU fifo dump buffer.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+- * results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: IGU fifo dump buffer.
++ * @num_dumped_dwords: number of dwords that were dumped.
++ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
++ * results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3615,14 +3629,14 @@ enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *results_buf_size);
+
+ /**
+- * @brief qed_print_igu_fifo_results - Prints IGU fifo results
++ * qed_print_igu_fifo_results(): Prints IGU fifo results
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - IGU fifo dump buffer, starting from the header.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf - buffer for printing the IGU fifo results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: IGU fifo dump buffer, starting from the header.
++ * @num_dumped_dwords: Number of dwords that were dumped.
++ * @results_buf: Buffer for printing the IGU fifo results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3630,16 +3644,16 @@ enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
+ char *results_buf);
+
+ /**
+- * @brief qed_get_protection_override_results_buf_size - Returns the required
+- * buffer size for protection override results (in bytes).
++ * qed_get_protection_override_results_buf_size(): Returns the required
++ * buffer size for protection override results (in bytes).
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - protection override dump buffer.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+- * results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: Protection override dump buffer.
++ * @num_dumped_dwords: Number of dwords that were dumped.
++ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
++ * results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status
+ qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
+@@ -3648,15 +3662,15 @@ qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *results_buf_size);
+
+ /**
+- * @brief qed_print_protection_override_results - Prints protection override
+- * results.
++ * qed_print_protection_override_results(): Prints protection override
++ * results.
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - protection override dump buffer, starting from the header.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf - buffer for printing the reg fifo results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: Protection override dump buffer, starting from the header.
++ * @num_dumped_dwords: Number of dwords that were dumped.
++ * @results_buf: Buffer for printing the reg fifo results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3664,16 +3678,16 @@ enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
+ char *results_buf);
+
+ /**
+- * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size
+- * for FW Asserts results (in bytes).
++ * qed_get_fw_asserts_results_buf_size(): Returns the required buffer size
++ * for FW Asserts results (in bytes).
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - FW Asserts dump buffer.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+- * results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: FW Asserts dump buffer.
++ * @num_dumped_dwords: number of dwords that were dumped.
++ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
++ * results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3681,14 +3695,14 @@ enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
+ u32 *results_buf_size);
+
+ /**
+- * @brief qed_print_fw_asserts_results - Prints FW Asserts results
++ * qed_print_fw_asserts_results(): Prints FW Asserts results.
+ *
+- * @param p_hwfn - HW device data
+- * @param dump_buf - FW Asserts dump buffer, starting from the header.
+- * @param num_dumped_dwords - number of dwords that were dumped.
+- * @param results_buf - buffer for printing the FW Asserts results.
++ * @p_hwfn: HW device data.
++ * @dump_buf: FW Asserts dump buffer, starting from the header.
++ * @num_dumped_dwords: number of dwords that were dumped.
++ * @results_buf: buffer for printing the FW Asserts results.
+ *
+- * @return error if the parsing fails, ok otherwise.
++ * Return: Error if the parsing fails, ok otherwise.
+ */
+ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
+ u32 *dump_buf,
+@@ -3696,15 +3710,15 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
+ char *results_buf);
+
+ /**
+- * @brief qed_dbg_parse_attn - Parses and prints attention registers values in
+- * the specified results struct.
++ * qed_dbg_parse_attn(): Parses and prints attention registers values in
++ * the specified results struct.
+ *
+- * @param p_hwfn - HW device data
+- * @param results - Pointer to the attention read results
++ * @p_hwfn: HW device data.
++ * @results: Pointer to the attention read results
+ *
+- * @return error if one of the following holds:
+- * - the version wasn't set
+- * Otherwise, returns ok.
++ * Return: Error if one of the following holds:
++ * - The version wasn't set.
++ * Otherwise, returns ok.
+ */
+ enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
+ struct dbg_attn_block_result *results);
+@@ -3746,18 +3760,18 @@ enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
+ #define GTT_BAR0_MAP_REG_PSDM_RAM 0x01a000UL
+
+ /**
+- * @brief qed_qm_pf_mem_size - prepare QM ILT sizes
++ * qed_qm_pf_mem_size(): Prepare QM ILT sizes.
+ *
+- * Returns the required host memory size in 4KB units.
+- * Must be called before all QM init HSI functions.
++ * @num_pf_cids: Number of connections used by this PF.
++ * @num_vf_cids: Number of connections used by VFs of this PF.
++ * @num_tids: Number of tasks used by this PF.
++ * @num_pf_pqs: Number of PQs used by this PF.
++ * @num_vf_pqs: Number of PQs used by VFs of this PF.
+ *
+- * @param num_pf_cids - number of connections used by this PF
+- * @param num_vf_cids - number of connections used by VFs of this PF
+- * @param num_tids - number of tasks used by this PF
+- * @param num_pf_pqs - number of PQs used by this PF
+- * @param num_vf_pqs - number of PQs used by VFs of this PF
++ * Return: The required host memory size in 4KB units.
+ *
+- * @return The required host memory size in 4KB units.
++ * Returns the required host memory size in 4KB units.
++ * Must be called before all QM init HSI functions.
+ */
+ u32 qed_qm_pf_mem_size(u32 num_pf_cids,
+ u32 num_vf_cids,
+@@ -3800,74 +3814,74 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
+ struct qed_qm_pf_rt_init_params *p_params);
+
+ /**
+- * @brief qed_init_pf_wfq - Initializes the WFQ weight of the specified PF
++ * qed_init_pf_wfq(): Initializes the WFQ weight of the specified PF.
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers
+- * @param pf_id - PF ID
+- * @param pf_wfq - WFQ weight. Must be non-zero.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers
++ * @pf_id: PF ID
++ * @pf_wfq: WFQ weight. Must be non-zero.
+ *
+- * @return 0 on success, -1 on error.
++ * Return: 0 on success, -1 on error.
+ */
+ int qed_init_pf_wfq(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 pf_id, u16 pf_wfq);
+
+ /**
+- * @brief qed_init_pf_rl - Initializes the rate limit of the specified PF
++ * qed_init_pf_rl(): Initializes the rate limit of the specified PF
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers
+- * @param pf_id - PF ID
+- * @param pf_rl - rate limit in Mb/sec units
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @pf_id: PF ID.
++ * @pf_rl: rate limit in Mb/sec units
+ *
+- * @return 0 on success, -1 on error.
++ * Return: 0 on success, -1 on error.
+ */
+ int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 pf_id, u32 pf_rl);
+
+ /**
+- * @brief qed_init_vport_wfq Initializes the WFQ weight of the specified VPORT
++ * qed_init_vport_wfq(): Initializes the WFQ weight of the specified VPORT
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers
+- * @param first_tx_pq_id- An array containing the first Tx PQ ID associated
+- * with the VPORT for each TC. This array is filled by
+- * qed_qm_pf_rt_init
+- * @param vport_wfq - WFQ weight. Must be non-zero.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers
++ * @first_tx_pq_id: An array containing the first Tx PQ ID associated
++ * with the VPORT for each TC. This array is filled by
++ * qed_qm_pf_rt_init
++ * @wfq: WFQ weight. Must be non-zero.
+ *
+- * @return 0 on success, -1 on error.
++ * Return: 0 on success, -1 on error.
+ */
+ int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u16 first_tx_pq_id[NUM_OF_TCS], u16 wfq);
+
+ /**
+- * @brief qed_init_global_rl - Initializes the rate limit of the specified
+- * rate limiter
++ * qed_init_global_rl(): Initializes the rate limit of the specified
++ * rate limiter.
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers
+- * @param rl_id - RL ID
+- * @param rate_limit - rate limit in Mb/sec units
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @rl_id: RL ID.
++ * @rate_limit: Rate limit in Mb/sec units
+ *
+- * @return 0 on success, -1 on error.
++ * Return: 0 on success, -1 on error.
+ */
+ int qed_init_global_rl(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u16 rl_id, u32 rate_limit);
+
+ /**
+- * @brief qed_send_qm_stop_cmd Sends a stop command to the QM
++ * qed_send_qm_stop_cmd(): Sends a stop command to the QM.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param is_release_cmd - true for release, false for stop.
+- * @param is_tx_pq - true for Tx PQs, false for Other PQs.
+- * @param start_pq - first PQ ID to stop
+- * @param num_pqs - Number of PQs to stop, starting from start_pq.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @is_release_cmd: true for release, false for stop.
++ * @is_tx_pq: true for Tx PQs, false for Other PQs.
++ * @start_pq: first PQ ID to stop
++ * @num_pqs: Number of PQs to stop, starting from start_pq.
+ *
+- * @return bool, true if successful, false if timeout occurred while waiting for
+- * QM command done.
++ * Return: Bool, true if successful, false if timeout occurred while waiting
++ * for QM command done.
+ */
+ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3875,53 +3889,64 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
+ bool is_tx_pq, u16 start_pq, u16 num_pqs);
+
+ /**
+- * @brief qed_set_vxlan_dest_port - initializes vxlan tunnel destination udp port
++ * qed_set_vxlan_dest_port(): Initializes vxlan tunnel destination udp port.
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param dest_port - vxlan destination udp port.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dest_port: vxlan destination udp port.
++ *
++ * Return: Void.
+ */
+ void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u16 dest_port);
+
+ /**
+- * @brief qed_set_vxlan_enable - enable or disable VXLAN tunnel in HW
++ * qed_set_vxlan_enable(): Enable or disable VXLAN tunnel in HW.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @vxlan_enable: vxlan enable flag.
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param vxlan_enable - vxlan enable flag.
++ * Return: Void.
+ */
+ void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, bool vxlan_enable);
+
+ /**
+- * @brief qed_set_gre_enable - enable or disable GRE tunnel in HW
++ * qed_set_gre_enable(): Enable or disable GRE tunnel in HW.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @eth_gre_enable: Eth GRE enable flag.
++ * @ip_gre_enable: IP GRE enable flag.
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param eth_gre_enable - eth GRE enable enable flag.
+- * @param ip_gre_enable - IP GRE enable enable flag.
++ * Return: Void.
+ */
+ void qed_set_gre_enable(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ bool eth_gre_enable, bool ip_gre_enable);
+
+ /**
+- * @brief qed_set_geneve_dest_port - initializes geneve tunnel destination udp port
++ * qed_set_geneve_dest_port(): Initializes geneve tunnel destination udp port
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param dest_port - geneve destination udp port.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @dest_port: Geneve destination udp port.
++ *
++ * Retur: Void.
+ */
+ void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u16 dest_port);
+
+ /**
+- * @brief qed_set_gre_enable - enable or disable GRE tunnel in HW
++ * qed_set_geneve_enable(): Enable or disable GRE tunnel in HW.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @eth_geneve_enable: Eth GENEVE enable flag.
++ * @ip_geneve_enable: IP GENEVE enable flag.
+ *
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param eth_geneve_enable - eth GENEVE enable enable flag.
+- * @param ip_geneve_enable - IP GENEVE enable enable flag.
++ * Return: Void.
+ */
+ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3931,25 +3956,29 @@ void qed_set_vxlan_no_l2_enable(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, bool enable);
+
+ /**
+- * @brief qed_gft_disable - Disable GFT
++ * qed_gft_disable(): Disable GFT.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @pf_id: PF on which to disable GFT.
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param pf_id - pf on which to disable GFT.
++ * Return: Void.
+ */
+ void qed_gft_disable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 pf_id);
+
+ /**
+- * @brief qed_gft_config - Enable and configure HW for GFT
+- *
+- * @param p_hwfn - HW device data
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param pf_id - pf on which to enable GFT.
+- * @param tcp - set profile tcp packets.
+- * @param udp - set profile udp packet.
+- * @param ipv4 - set profile ipv4 packet.
+- * @param ipv6 - set profile ipv6 packet.
+- * @param profile_type - define packet same fields. Use enum gft_profile_type.
++ * qed_gft_config(): Enable and configure HW for GFT.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @pf_id: PF on which to enable GFT.
++ * @tcp: Set profile tcp packets.
++ * @udp: Set profile udp packet.
++ * @ipv4: Set profile ipv4 packet.
++ * @ipv6: Set profile ipv6 packet.
++ * @profile_type: Define packet same fields. Use enum gft_profile_type.
++ *
++ * Return: Void.
+ */
+ void qed_gft_config(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -3959,107 +3988,120 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
+ bool ipv4, bool ipv6, enum gft_profile_type profile_type);
+
+ /**
+- * @brief qed_enable_context_validation - Enable and configure context
+- * validation.
++ * qed_enable_context_validation(): Enable and configure context
++ * validation.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
+ *
+- * @param p_hwfn
+- * @param p_ptt - ptt window used for writing the registers.
++ * Return: Void.
+ */
+ void qed_enable_context_validation(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief qed_calc_session_ctx_validation - Calcualte validation byte for
+- * session context.
++ * qed_calc_session_ctx_validation(): Calcualte validation byte for
++ * session context.
+ *
+- * @param p_ctx_mem - pointer to context memory.
+- * @param ctx_size - context size.
+- * @param ctx_type - context type.
+- * @param cid - context cid.
++ * @p_ctx_mem: Pointer to context memory.
++ * @ctx_size: Context size.
++ * @ctx_type: Context type.
++ * @cid: Context cid.
++ *
++ * Return: Void.
+ */
+ void qed_calc_session_ctx_validation(void *p_ctx_mem,
+ u16 ctx_size, u8 ctx_type, u32 cid);
+
+ /**
+- * @brief qed_calc_task_ctx_validation - Calcualte validation byte for task
+- * context.
++ * qed_calc_task_ctx_validation(): Calcualte validation byte for task
++ * context.
++ *
++ * @p_ctx_mem: Pointer to context memory.
++ * @ctx_size: Context size.
++ * @ctx_type: Context type.
++ * @tid: Context tid.
+ *
+- * @param p_ctx_mem - pointer to context memory.
+- * @param ctx_size - context size.
+- * @param ctx_type - context type.
+- * @param tid - context tid.
++ * Return: Void.
+ */
+ void qed_calc_task_ctx_validation(void *p_ctx_mem,
+ u16 ctx_size, u8 ctx_type, u32 tid);
+
+ /**
+- * @brief qed_memset_session_ctx - Memset session context to 0 while
+- * preserving validation bytes.
++ * qed_memset_session_ctx(): Memset session context to 0 while
++ * preserving validation bytes.
++ *
++ * @p_ctx_mem: Pointer to context memory.
++ * @ctx_size: Size to initialzie.
++ * @ctx_type: Context type.
+ *
+- * @param p_hwfn -
+- * @param p_ctx_mem - pointer to context memory.
+- * @param ctx_size - size to initialzie.
+- * @param ctx_type - context type.
++ * Return: Void.
+ */
+ void qed_memset_session_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type);
+
+ /**
+- * @brief qed_memset_task_ctx - Memset task context to 0 while preserving
+- * validation bytes.
++ * qed_memset_task_ctx(): Memset task context to 0 while preserving
++ * validation bytes.
+ *
+- * @param p_ctx_mem - pointer to context memory.
+- * @param ctx_size - size to initialzie.
+- * @param ctx_type - context type.
++ * @p_ctx_mem: Pointer to context memory.
++ * @ctx_size: size to initialzie.
++ * @ctx_type: context type.
++ *
++ * Return: Void.
+ */
+ void qed_memset_task_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type);
+
+ #define NUM_STORMS 6
+
+ /**
+- * @brief qed_set_rdma_error_level - Sets the RDMA assert level.
+- * If the severity of the error will be
+- * above the level, the FW will assert.
+- * @param p_hwfn - HW device data
+- * @param p_ptt - ptt window used for writing the registers
+- * @param assert_level - An array of assert levels for each storm.
++ * qed_set_rdma_error_level(): Sets the RDMA assert level.
++ * If the severity of the error will be
++ * above the level, the FW will assert.
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @assert_level: An array of assert levels for each storm.
+ *
++ * Return: Void.
+ */
+ void qed_set_rdma_error_level(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u8 assert_level[NUM_STORMS]);
+ /**
+- * @brief qed_fw_overlay_mem_alloc - Allocates and fills the FW overlay memory.
++ * qed_fw_overlay_mem_alloc(): Allocates and fills the FW overlay memory.
+ *
+- * @param p_hwfn - HW device data
+- * @param fw_overlay_in_buf - the input FW overlay buffer.
+- * @param buf_size - the size of the input FW overlay buffer in bytes.
+- * must be aligned to dwords.
+- * @param fw_overlay_out_mem - OUT: a pointer to the allocated overlays memory.
++ * @p_hwfn: HW device data.
++ * @fw_overlay_in_buf: The input FW overlay buffer.
++ * @buf_size_in_bytes: The size of the input FW overlay buffer in bytes.
++ * must be aligned to dwords.
+ *
+- * @return a pointer to the allocated overlays memory,
++ * Return: A pointer to the allocated overlays memory,
+ * or NULL in case of failures.
+ */
+ struct phys_mem_desc *
+ qed_fw_overlay_mem_alloc(struct qed_hwfn *p_hwfn,
+- const u32 * const fw_overlay_in_buf,
++ const u32 *const fw_overlay_in_buf,
+ u32 buf_size_in_bytes);
+
+ /**
+- * @brief qed_fw_overlay_init_ram - Initializes the FW overlay RAM.
++ * qed_fw_overlay_init_ram(): Initializes the FW overlay RAM.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: Ptt window used for writing the registers.
++ * @fw_overlay_mem: the allocated FW overlay memory.
+ *
+- * @param p_hwfn - HW device data.
+- * @param p_ptt - ptt window used for writing the registers.
+- * @param fw_overlay_mem - the allocated FW overlay memory.
++ * Return: Void.
+ */
+ void qed_fw_overlay_init_ram(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct phys_mem_desc *fw_overlay_mem);
+
+ /**
+- * @brief qed_fw_overlay_mem_free - Frees the FW overlay memory.
++ * qed_fw_overlay_mem_free(): Frees the FW overlay memory.
++ *
++ * @p_hwfn: HW device data.
++ * @fw_overlay_mem: The allocated FW overlay memory to free.
+ *
+- * @param p_hwfn - HW device data.
+- * @param fw_overlay_mem - the allocated FW overlay memory to free.
++ * Return: Void.
+ */
+ void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
+ struct phys_mem_desc *fw_overlay_mem);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
+index 554f30b0cfd5e..6263f847b6b92 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
+@@ -23,7 +23,10 @@
+ #include "qed_reg_addr.h"
+ #include "qed_sriov.h"
+
+-#define QED_BAR_ACQUIRE_TIMEOUT 1000
++#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT 1000
++#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP 1000
++#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT 100000
++#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY 10
+
+ /* Invalid values */
+ #define QED_BAR_INVALID_OFFSET (cpu_to_le32(-1))
+@@ -84,12 +87,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
+ }
+
+ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
++{
++ return qed_ptt_acquire_context(p_hwfn, false);
++}
++
++struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
+ {
+ struct qed_ptt *p_ptt;
+- unsigned int i;
++ unsigned int i, count;
++
++ if (is_atomic)
++ count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
++ else
++ count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
+
+ /* Take the free PTT from the list */
+- for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
++ for (i = 0; i < count; i++) {
+ spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
+
+ if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
+@@ -105,7 +118,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
+ }
+
+ spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
+- usleep_range(1000, 2000);
++
++ if (is_atomic)
++ udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
++ else
++ usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
++ QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
+ }
+
+ DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
+index 2734f49956f76..e535983ce21bb 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
+@@ -53,85 +53,94 @@ enum _dmae_cmd_crc_mask {
+ #define DMAE_MAX_CLIENTS 32
+
+ /**
+- * @brief qed_gtt_init - Initialize GTT windows
++ * qed_gtt_init(): Initialize GTT windows.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ */
+ void qed_gtt_init(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_ptt_invalidate - Forces all ptt entries to be re-configured
++ * qed_ptt_invalidate(): Forces all ptt entries to be re-configured
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_ptt_invalidate(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_ptt_pool_alloc - Allocate and initialize PTT pool
++ * qed_ptt_pool_alloc(): Allocate and initialize PTT pool.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return struct _qed_status - success (0), negative - error.
++ * Return: struct _qed_status - success (0), negative - error.
+ */
+ int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_ptt_pool_free -
++ * qed_ptt_pool_free(): Free PTT pool.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_ptt_get_hw_addr - Get PTT's GRC/HW address
++ * qed_ptt_get_hw_addr(): Get PTT's GRC/HW address.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt
+ *
+- * @return u32
++ * Return: u32.
+ */
+ u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief qed_ptt_get_bar_addr - Get PPT's external BAR address
++ * qed_ptt_get_bar_addr(): Get PPT's external BAR address.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_ptt: P_ptt
+ *
+- * @return u32
++ * Return: u32.
+ */
+ u32 qed_ptt_get_bar_addr(struct qed_ptt *p_ptt);
+
+ /**
+- * @brief qed_ptt_set_win - Set PTT Window's GRC BAR address
++ * qed_ptt_set_win(): Set PTT Window's GRC BAR address
+ *
+- * @param p_hwfn
+- * @param new_hw_addr
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @new_hw_addr: New HW address.
++ * @p_ptt: P_Ptt
++ *
++ * Return: Void.
+ */
+ void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 new_hw_addr);
+
+ /**
+- * @brief qed_get_reserved_ptt - Get a specific reserved PTT
++ * qed_get_reserved_ptt(): Get a specific reserved PTT.
+ *
+- * @param p_hwfn
+- * @param ptt_idx
++ * @p_hwfn: HW device data.
++ * @ptt_idx: Ptt Index.
+ *
+- * @return struct qed_ptt *
++ * Return: struct qed_ptt *.
+ */
+ struct qed_ptt *qed_get_reserved_ptt(struct qed_hwfn *p_hwfn,
+ enum reserved_ptts ptt_idx);
+
+ /**
+- * @brief qed_wr - Write value to BAR using the given ptt
++ * qed_wr(): Write value to BAR using the given ptt.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @val: Val.
++ * @hw_addr: HW address
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param val
+- * @param hw_addr
++ * Return: Void.
+ */
+ void qed_wr(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -139,26 +148,28 @@ void qed_wr(struct qed_hwfn *p_hwfn,
+ u32 val);
+
+ /**
+- * @brief qed_rd - Read value from BAR using the given ptt
++ * qed_rd(): Read value from BAR using the given ptt.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @hw_addr: HW address
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param val
+- * @param hw_addr
++ * Return: Void.
+ */
+ u32 qed_rd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 hw_addr);
+
+ /**
+- * @brief qed_memcpy_from - copy n bytes from BAR using the given
+- * ptt
+- *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param dest
+- * @param hw_addr
+- * @param n
++ * qed_memcpy_from(): Copy n bytes from BAR using the given ptt.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @dest: Destination.
++ * @hw_addr: HW address.
++ * @n: N
++ *
++ * Return: Void.
+ */
+ void qed_memcpy_from(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -167,14 +178,15 @@ void qed_memcpy_from(struct qed_hwfn *p_hwfn,
+ size_t n);
+
+ /**
+- * @brief qed_memcpy_to - copy n bytes to BAR using the given
+- * ptt
+- *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param hw_addr
+- * @param src
+- * @param n
++ * qed_memcpy_to(): Copy n bytes to BAR using the given ptt
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @hw_addr: HW address.
++ * @src: Source.
++ * @n: N
++ *
++ * Return: Void.
+ */
+ void qed_memcpy_to(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -182,83 +194,97 @@ void qed_memcpy_to(struct qed_hwfn *p_hwfn,
+ void *src,
+ size_t n);
+ /**
+- * @brief qed_fid_pretend - pretend to another function when
+- * accessing the ptt window. There is no way to unpretend
+- * a function. The only way to cancel a pretend is to
+- * pretend back to the original function.
+- *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param fid - fid field of pxp_pretend structure. Can contain
+- * either pf / vf, port/path fields are don't care.
++ * qed_fid_pretend(): pretend to another function when
++ * accessing the ptt window. There is no way to unpretend
++ * a function. The only way to cancel a pretend is to
++ * pretend back to the original function.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @fid: fid field of pxp_pretend structure. Can contain
++ * either pf / vf, port/path fields are don't care.
++ *
++ * Return: Void.
+ */
+ void qed_fid_pretend(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u16 fid);
+
+ /**
+- * @brief qed_port_pretend - pretend to another port when
+- * accessing the ptt window
++ * qed_port_pretend(): Pretend to another port when accessing the ptt window
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param port_id - the port to pretend to
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @port_id: The port to pretend to
++ *
++ * Return: Void.
+ */
+ void qed_port_pretend(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u8 port_id);
+
+ /**
+- * @brief qed_port_unpretend - cancel any previously set port
+- * pretend
++ * qed_port_unpretend(): Cancel any previously set port pretend
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * Return: Void.
+ */
+ void qed_port_unpretend(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief qed_port_fid_pretend - pretend to another port and another function
+- * when accessing the ptt window
++ * qed_port_fid_pretend(): Pretend to another port and another function
++ * when accessing the ptt window
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @port_id: The port to pretend to
++ * @fid: fid field of pxp_pretend structure. Can contain either pf / vf.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param port_id - the port to pretend to
+- * @param fid - fid field of pxp_pretend structure. Can contain either pf / vf.
++ * Return: Void.
+ */
+ void qed_port_fid_pretend(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 port_id, u16 fid);
+
+ /**
+- * @brief qed_vfid_to_concrete - build a concrete FID for a
+- * given VF ID
++ * qed_vfid_to_concrete(): Build a concrete FID for a given VF ID
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param vfid
++ * @p_hwfn: HW device data.
++ * @vfid: VFID.
++ *
++ * Return: Void.
+ */
+ u32 qed_vfid_to_concrete(struct qed_hwfn *p_hwfn, u8 vfid);
+
+ /**
+- * @brief qed_dmae_idx_to_go_cmd - map the idx to dmae cmd
+- * this is declared here since other files will require it.
+- * @param idx
++ * qed_dmae_idx_to_go_cmd(): Map the idx to dmae cmd
++ * this is declared here since other files will require it.
++ *
++ * @idx: Index
++ *
++ * Return: Void.
+ */
+ u32 qed_dmae_idx_to_go_cmd(u8 idx);
+
+ /**
+- * @brief qed_dmae_info_alloc - Init the dmae_info structure
+- * which is part of p_hwfn.
+- * @param p_hwfn
++ * qed_dmae_info_alloc(): Init the dmae_info structure
++ * which is part of p_hwfn.
++ *
++ * @p_hwfn: HW device data.
++ *
++ * Return: Int.
+ */
+ int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_dmae_info_free - Free the dmae_info structure
+- * which is part of p_hwfn
++ * qed_dmae_info_free(): Free the dmae_info structure
++ * which is part of p_hwfn.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_dmae_info_free(struct qed_hwfn *p_hwfn);
+
+@@ -292,14 +318,16 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
+ #define QED_HW_ERR_MAX_STR_SIZE 256
+
+ /**
+- * @brief qed_hw_err_notify - Notify upper layer driver and management FW
+- * about a HW error.
+- *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param err_type
+- * @param fmt - debug data buffer to send to the MFW
+- * @param ... - buffer format args
++ * qed_hw_err_notify(): Notify upper layer driver and management FW
++ * about a HW error.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @err_type: Err Type.
++ * @fmt: Debug data buffer to send to the MFW
++ * @...: buffer format args
++ *
++ * Return void.
+ */
+ void __printf(4, 5) __cold qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.h b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
+index a573c89219820..1dbc460c9eec8 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
+@@ -12,23 +12,24 @@
+ #include "qed.h"
+
+ /**
+- * @brief qed_init_iro_array - init iro_arr.
++ * qed_init_iro_array(): init iro_arr.
+ *
++ * @cdev: Qed dev pointer.
+ *
+- * @param cdev
++ * Return: Void.
+ */
+ void qed_init_iro_array(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_init_run - Run the init-sequence.
++ * qed_init_run(): Run the init-sequence.
+ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @phase: Phase.
++ * @phase_id: Phase ID.
++ * @modes: Mode.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param phase
+- * @param phase_id
+- * @param modes
+- * @return _qed_status_t
++ * Return: _qed_status_t
+ */
+ int qed_init_run(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -37,30 +38,31 @@ int qed_init_run(struct qed_hwfn *p_hwfn,
+ int modes);
+
+ /**
+- * @brief qed_init_hwfn_allocate - Allocate RT array, Store 'values' ptrs.
++ * qed_init_alloc(): Allocate RT array, Store 'values' ptrs.
+ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
+- *
+- * @return _qed_status_t
++ * Return: _qed_status_t.
+ */
+ int qed_init_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_init_hwfn_deallocate
++ * qed_init_free(): Init HW function deallocate.
+ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_init_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_init_store_rt_reg - Store a configuration value in the RT array.
++ * qed_init_store_rt_reg(): Store a configuration value in the RT array.
+ *
++ * @p_hwfn: HW device data.
++ * @rt_offset: RT offset.
++ * @val: Val.
+ *
+- * @param p_hwfn
+- * @param rt_offset
+- * @param val
++ * Return: Void.
+ */
+ void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
+ u32 rt_offset,
+@@ -72,15 +74,6 @@ void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
+ #define OVERWRITE_RT_REG(hwfn, offset, val) \
+ qed_init_store_rt_reg(hwfn, offset, val)
+
+-/**
+- * @brief
+- *
+- *
+- * @param p_hwfn
+- * @param rt_offset
+- * @param val
+- * @param size
+- */
+ void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
+ u32 rt_offset,
+ u32 *val,
+@@ -90,11 +83,12 @@ void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
+ qed_init_store_rt_agg(hwfn, offset, (u32 *)&val, sizeof(val))
+
+ /**
+- * @brief
+- * Initialize GTT global windows and set admin window
+- * related params of GTT/PTT to default values.
++ * qed_gtt_init(): Initialize GTT global windows and set admin window
++ * related params of GTT/PTT to default values.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return Void.
+ */
+ void qed_gtt_init(struct qed_hwfn *p_hwfn);
+ #endif
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
+index c5550e96bbe1f..eb8e0f4242d79 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
+@@ -53,51 +53,54 @@ enum qed_coalescing_fsm {
+ };
+
+ /**
+- * @brief qed_int_igu_enable_int - enable device interrupts
++ * qed_int_igu_enable_int(): Enable device interrupts.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param int_mode - interrupt mode to use
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @int_mode: Interrupt mode to use.
++ *
++ * Return: Void.
+ */
+ void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_int_mode int_mode);
+
+ /**
+- * @brief qed_int_igu_disable_int - disable device interrupts
++ * qed_int_igu_disable_int(): Disable device interrupts.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * Return: Void.
+ */
+ void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief qed_int_igu_read_sisr_reg - Reads the single isr multiple dpc
+- * register from igu.
++ * qed_int_igu_read_sisr_reg(): Reads the single isr multiple dpc
++ * register from igu.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return u64
++ * Return: u64.
+ */
+ u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn);
+
+ #define QED_SP_SB_ID 0xffff
+ /**
+- * @brief qed_int_sb_init - Initializes the sb_info structure.
++ * qed_int_sb_init(): Initializes the sb_info structure.
+ *
+- * once the structure is initialized it can be passed to sb related functions.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @sb_info: points to an uninitialized (but allocated) sb_info structure
++ * @sb_virt_addr: SB Virtual address.
++ * @sb_phy_addr: SB Physial address.
++ * @sb_id: the sb_id to be used (zero based in driver)
++ * should use QED_SP_SB_ID for SP Status block
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param sb_info points to an uninitialized (but
+- * allocated) sb_info structure
+- * @param sb_virt_addr
+- * @param sb_phy_addr
+- * @param sb_id the sb_id to be used (zero based in driver)
+- * should use QED_SP_SB_ID for SP Status block
++ * Return: int.
+ *
+- * @return int
++ * Once the structure is initialized it can be passed to sb related functions.
+ */
+ int qed_int_sb_init(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -106,82 +109,91 @@ int qed_int_sb_init(struct qed_hwfn *p_hwfn,
+ dma_addr_t sb_phy_addr,
+ u16 sb_id);
+ /**
+- * @brief qed_int_sb_setup - Setup the sb.
++ * qed_int_sb_setup(): Setup the sb.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @sb_info: Initialized sb_info structure.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param sb_info initialized sb_info structure
++ * Return: Void.
+ */
+ void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_sb_info *sb_info);
+
+ /**
+- * @brief qed_int_sb_release - releases the sb_info structure.
++ * qed_int_sb_release(): Releases the sb_info structure.
+ *
+- * once the structure is released, it's memory can be freed
++ * @p_hwfn: HW device data.
++ * @sb_info: Points to an allocated sb_info structure.
++ * @sb_id: The sb_id to be used (zero based in driver)
++ * should never be equal to QED_SP_SB_ID
++ * (SP Status block).
+ *
+- * @param p_hwfn
+- * @param sb_info points to an allocated sb_info structure
+- * @param sb_id the sb_id to be used (zero based in driver)
+- * should never be equal to QED_SP_SB_ID
+- * (SP Status block)
++ * Return: int.
+ *
+- * @return int
++ * Once the structure is released, it's memory can be freed.
+ */
+ int qed_int_sb_release(struct qed_hwfn *p_hwfn,
+ struct qed_sb_info *sb_info,
+ u16 sb_id);
+
+ /**
+- * @brief qed_int_sp_dpc - To be called when an interrupt is received on the
+- * default status block.
++ * qed_int_sp_dpc(): To be called when an interrupt is received on the
++ * default status block.
+ *
+- * @param p_hwfn - pointer to hwfn
++ * @t: Tasklet.
++ *
++ * Return: Void.
+ *
+ */
+ void qed_int_sp_dpc(struct tasklet_struct *t);
+
+ /**
+- * @brief qed_int_get_num_sbs - get the number of status
+- * blocks configured for this funciton in the igu.
++ * qed_int_get_num_sbs(): Get the number of status blocks configured
++ * for this funciton in the igu.
+ *
+- * @param p_hwfn
+- * @param p_sb_cnt_info
++ * @p_hwfn: HW device data.
++ * @p_sb_cnt_info: Pointer to SB count info.
+ *
+- * @return int - number of status blocks configured
++ * Return: Void.
+ */
+ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
+ struct qed_sb_cnt_info *p_sb_cnt_info);
+
+ /**
+- * @brief qed_int_disable_post_isr_release - performs the cleanup post ISR
++ * qed_int_disable_post_isr_release(): Performs the cleanup post ISR
+ * release. The API need to be called after releasing all slowpath IRQs
+ * of the device.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
++ * Return: Void.
+ */
+ void qed_int_disable_post_isr_release(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_int_attn_clr_enable - sets whether the general behavior is
++ * qed_int_attn_clr_enable: Sets whether the general behavior is
+ * preventing attentions from being reasserted, or following the
+ * attributes of the specific attention.
+ *
+- * @param cdev
+- * @param clr_enable
++ * @cdev: Qed dev pointer.
++ * @clr_enable: Clear enable
++ *
++ * Return: Void.
+ *
+ */
+ void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable);
+
+ /**
+- * @brief - Doorbell Recovery handler.
++ * qed_db_rec_handler(): Doorbell Recovery handler.
+ * Run doorbell recovery in case of PF overflow (and flush DORQ if
+ * needed).
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Int.
+ */
+ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+@@ -223,30 +235,34 @@ struct qed_igu_info {
+ };
+
+ /**
+- * @brief - Make sure the IGU CAM reflects the resources provided by MFW
++ * qed_int_igu_reset_cam(): Make sure the IGU CAM reflects the resources
++ * provided by MFW.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * Return: Void.
+ */
+ int qed_int_igu_reset_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Translate the weakly-defined client sb-id into an IGU sb-id
++ * qed_get_igu_sb_id(): Translate the weakly-defined client sb-id into
++ * an IGU sb-id
+ *
+- * @param p_hwfn
+- * @param sb_id - user provided sb_id
++ * @p_hwfn: HW device data.
++ * @sb_id: user provided sb_id.
+ *
+- * @return an index inside IGU CAM where the SB resides
++ * Return: An index inside IGU CAM where the SB resides.
+ */
+ u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id);
+
+ /**
+- * @brief return a pointer to an unused valid SB
++ * qed_get_igu_free_sb(): Return a pointer to an unused valid SB
+ *
+- * @param p_hwfn
+- * @param b_is_pf - true iff we want a SB belonging to a PF
++ * @p_hwfn: HW device data.
++ * @b_is_pf: True iff we want a SB belonging to a PF.
+ *
+- * @return point to an igu_block, NULL if none is available
++ * Return: Point to an igu_block, NULL if none is available.
+ */
+ struct qed_igu_block *qed_get_igu_free_sb(struct qed_hwfn *p_hwfn,
+ bool b_is_pf);
+@@ -259,15 +275,15 @@ void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn,
+ void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_int_igu_read_cam - Reads the IGU CAM.
++ * qed_int_igu_read_cam(): Reads the IGU CAM.
+ * This function needs to be called during hardware
+ * prepare. It reads the info from igu cam to know which
+ * status block is the default / base status block etc.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+@@ -275,24 +291,22 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
+ typedef int (*qed_int_comp_cb_t)(struct qed_hwfn *p_hwfn,
+ void *cookie);
+ /**
+- * @brief qed_int_register_cb - Register callback func for
+- * slowhwfn statusblock.
+- *
+- * Every protocol that uses the slowhwfn status block
+- * should register a callback function that will be called
+- * once there is an update of the sp status block.
+- *
+- * @param p_hwfn
+- * @param comp_cb - function to be called when there is an
+- * interrupt on the sp sb
+- *
+- * @param cookie - passed to the callback function
+- * @param sb_idx - OUT parameter which gives the chosen index
+- * for this protocol.
+- * @param p_fw_cons - pointer to the actual address of the
+- * consumer for this protocol.
+- *
+- * @return int
++ * qed_int_register_cb(): Register callback func for slowhwfn statusblock.
++ *
++ * @p_hwfn: HW device data.
++ * @comp_cb: Function to be called when there is an
++ * interrupt on the sp sb
++ * @cookie: Passed to the callback function
++ * @sb_idx: (OUT) parameter which gives the chosen index
++ * for this protocol.
++ * @p_fw_cons: Pointer to the actual address of the
++ * consumer for this protocol.
++ *
++ * Return: Int.
++ *
++ * Every protocol that uses the slowhwfn status block
++ * should register a callback function that will be called
++ * once there is an update of the sp status block.
+ */
+ int qed_int_register_cb(struct qed_hwfn *p_hwfn,
+ qed_int_comp_cb_t comp_cb,
+@@ -301,37 +315,40 @@ int qed_int_register_cb(struct qed_hwfn *p_hwfn,
+ __le16 **p_fw_cons);
+
+ /**
+- * @brief qed_int_unregister_cb - Unregisters callback
+- * function from sp sb.
+- * Partner of qed_int_register_cb -> should be called
+- * when no longer required.
++ * qed_int_unregister_cb(): Unregisters callback function from sp sb.
++ *
++ * @p_hwfn: HW device data.
++ * @pi: Producer Index.
+ *
+- * @param p_hwfn
+- * @param pi
++ * Return: Int.
+ *
+- * @return int
++ * Partner of qed_int_register_cb -> should be called
++ * when no longer required.
+ */
+ int qed_int_unregister_cb(struct qed_hwfn *p_hwfn,
+ u8 pi);
+
+ /**
+- * @brief qed_int_get_sp_sb_id - Get the slowhwfn sb id.
++ * qed_int_get_sp_sb_id(): Get the slowhwfn sb id.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return u16
++ * Return: u16.
+ */
+ u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief Status block cleanup. Should be called for each status
+- * block that will be used -> both PF / VF
+- *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param igu_sb_id - igu status block id
+- * @param opaque - opaque fid of the sb owner.
+- * @param b_set - set(1) / clear(0)
++ * qed_int_igu_init_pure_rt_single(): Status block cleanup.
++ * Should be called for each status
++ * block that will be used -> both PF / VF.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @igu_sb_id: IGU status block id.
++ * @opaque: Opaque fid of the sb owner.
++ * @b_set: Set(1) / Clear(0).
++ *
++ * Return: Void.
+ */
+ void qed_int_igu_init_pure_rt_single(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -340,15 +357,16 @@ void qed_int_igu_init_pure_rt_single(struct qed_hwfn *p_hwfn,
+ bool b_set);
+
+ /**
+- * @brief qed_int_cau_conf - configure cau for a given status
+- * block
+- *
+- * @param p_hwfn
+- * @param ptt
+- * @param sb_phys
+- * @param igu_sb_id
+- * @param vf_number
+- * @param vf_valid
++ * qed_int_cau_conf_sb(): Configure cau for a given status block.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @sb_phys: SB Physical.
++ * @igu_sb_id: IGU status block id.
++ * @vf_number: VF number
++ * @vf_valid: VF valid or not.
++ *
++ * Return: Void.
+ */
+ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -358,52 +376,58 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
+ u8 vf_valid);
+
+ /**
+- * @brief qed_int_alloc
++ * qed_int_alloc(): QED interrupt alloc.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_int_alloc(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief qed_int_free
++ * qed_int_free(): QED interrupt free.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_int_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_int_setup
++ * qed_int_setup(): QED interrupt setup.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Void.
+ */
+ void qed_int_setup(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief - Enable Interrupt & Attention for hw function
++ * qed_int_igu_enable(): Enable Interrupt & Attention for hw function.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param int_mode
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @int_mode: Interrut mode
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ enum qed_int_mode int_mode);
+
+ /**
+- * @brief - Initialize CAU status block entry
++ * qed_init_cau_sb_entry(): Initialize CAU status block entry.
++ *
++ * @p_hwfn: HW device data.
++ * @p_sb_entry: Pointer SB entry.
++ * @pf_id: PF number
++ * @vf_number: VF number
++ * @vf_valid: VF valid or not.
+ *
+- * @param p_hwfn
+- * @param p_sb_entry
+- * @param pf_id
+- * @param vf_number
+- * @param vf_valid
++ * Return: Void.
+ */
+ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
+ struct cau_sb_entry *p_sb_entry,
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+index db926d8b30334..f111391772778 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+@@ -1000,13 +1000,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
+ }
+
+ static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
+- struct qed_iscsi_stats *stats)
++ struct qed_iscsi_stats *stats,
++ bool is_atomic)
+ {
+ struct qed_ptt *p_ptt;
+
+ memset(stats, 0, sizeof(*stats));
+
+- p_ptt = qed_ptt_acquire(p_hwfn);
++ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
+ if (!p_ptt) {
+ DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+ return -EAGAIN;
+@@ -1337,9 +1338,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
+ QED_SPQ_MODE_EBLOCK, NULL);
+ }
+
++static int qed_iscsi_stats_context(struct qed_dev *cdev,
++ struct qed_iscsi_stats *stats,
++ bool is_atomic)
++{
++ return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
++}
++
+ static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
+ {
+- return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
++ return qed_iscsi_stats_context(cdev, stats, false);
+ }
+
+ static int qed_iscsi_change_mac(struct qed_dev *cdev,
+@@ -1359,13 +1367,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
+ }
+
+ void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
+- struct qed_mcp_iscsi_stats *stats)
++ struct qed_mcp_iscsi_stats *stats,
++ bool is_atomic)
+ {
+ struct qed_iscsi_stats proto_stats;
+
+ /* Retrieve FW statistics */
+ memset(&proto_stats, 0, sizeof(proto_stats));
+- if (qed_iscsi_stats(cdev, &proto_stats)) {
++ if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
+ DP_VERBOSE(cdev, QED_MSG_STORAGE,
+ "Failed to collect ISCSI statistics\n");
+ return;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+index dab7a5d09f874..974cb8d26608c 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+@@ -34,13 +34,19 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn);
+ void qed_iscsi_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief - Fills provided statistics struct with statistics.
++ * qed_get_protocol_stats_iscsi(): Fills provided statistics
++ * struct with statistics.
+ *
+- * @param cdev
+- * @param stats - points to struct that will be filled with statistics.
++ * @cdev: Qed dev pointer.
++ * @stats: Points to struct that will be filled with statistics.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
++ *
++ * Context: The function should not sleep in case is_atomic == true.
++ * Return: Void.
+ */
+ void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
+- struct qed_mcp_iscsi_stats *stats);
++ struct qed_mcp_iscsi_stats *stats,
++ bool is_atomic);
+ #else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+ static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
+ {
+@@ -53,7 +59,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
+
+ static inline void
+ qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
+- struct qed_mcp_iscsi_stats *stats) {}
++ struct qed_mcp_iscsi_stats *stats,
++ bool is_atomic) {}
+ #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+
+ #endif
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
+index dfaf10edfabfd..6ffa6425a75a5 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
+@@ -1863,7 +1863,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
+ }
+
+ static void _qed_get_vport_stats(struct qed_dev *cdev,
+- struct qed_eth_stats *stats)
++ struct qed_eth_stats *stats,
++ bool is_atomic)
+ {
+ u8 fw_vport = 0;
+ int i;
+@@ -1872,10 +1873,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
+
+ for_each_hwfn(cdev, i) {
+ struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+- struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
+- : NULL;
++ struct qed_ptt *p_ptt;
+ bool b_get_port_stats;
+
++ p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
++ : NULL;
+ if (IS_PF(cdev)) {
+ /* The main vport index is relative first */
+ if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
+@@ -1900,15 +1902,22 @@ out:
+ }
+
+ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
++{
++ qed_get_vport_stats_context(cdev, stats, false);
++}
++
++void qed_get_vport_stats_context(struct qed_dev *cdev,
++ struct qed_eth_stats *stats,
++ bool is_atomic)
+ {
+ u32 i;
+
+- if (!cdev) {
++ if (!cdev || cdev->recov_in_prog) {
+ memset(stats, 0, sizeof(*stats));
+ return;
+ }
+
+- _qed_get_vport_stats(cdev, stats);
++ _qed_get_vport_stats(cdev, stats, is_atomic);
+
+ if (!cdev->reset_stats)
+ return;
+@@ -1960,7 +1969,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
+ if (!cdev->reset_stats) {
+ DP_INFO(cdev, "Reset stats not allocated\n");
+ } else {
+- _qed_get_vport_stats(cdev, cdev->reset_stats);
++ _qed_get_vport_stats(cdev, cdev->reset_stats, false);
+ cdev->reset_stats->common.link_change_count = 0;
+ }
+ }
+@@ -2763,25 +2772,6 @@ static int qed_configure_filter_mcast(struct qed_dev *cdev,
+ return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL);
+ }
+
+-static int qed_configure_filter(struct qed_dev *cdev,
+- struct qed_filter_params *params)
+-{
+- enum qed_filter_rx_mode_type accept_flags;
+-
+- switch (params->type) {
+- case QED_FILTER_TYPE_UCAST:
+- return qed_configure_filter_ucast(cdev, &params->filter.ucast);
+- case QED_FILTER_TYPE_MCAST:
+- return qed_configure_filter_mcast(cdev, &params->filter.mcast);
+- case QED_FILTER_TYPE_RX_MODE:
+- accept_flags = params->filter.accept_flags;
+- return qed_configure_filter_rx_mode(cdev, accept_flags);
+- default:
+- DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type);
+- return -EINVAL;
+- }
+-}
+-
+ static int qed_configure_arfs_searcher(struct qed_dev *cdev,
+ enum qed_filter_config_mode mode)
+ {
+@@ -2904,7 +2894,9 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
+ .q_rx_stop = &qed_stop_rxq,
+ .q_tx_start = &qed_start_txq,
+ .q_tx_stop = &qed_stop_txq,
+- .filter_config = &qed_configure_filter,
++ .filter_config_rx_mode = &qed_configure_filter_rx_mode,
++ .filter_config_ucast = &qed_configure_filter_ucast,
++ .filter_config_mcast = &qed_configure_filter_mcast,
+ .fastpath_stop = &qed_fastpath_stop,
+ .eth_cqe_completion = &qed_fp_cqe_completion,
+ .get_vport_stats = &qed_get_vport_stats,
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
+index 8eceeebb1a7be..602a12a348b2e 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
+@@ -92,18 +92,18 @@ struct qed_filter_mcast {
+ };
+
+ /**
+- * @brief qed_eth_rx_queue_stop - This ramrod closes an Rx queue
++ * qed_eth_rx_queue_stop(): This ramrod closes an Rx queue.
+ *
+- * @param p_hwfn
+- * @param p_rxq Handler of queue to close
+- * @param eq_completion_only If True completion will be on
+- * EQe, if False completion will be
+- * on EQe if p_hwfn opaque
+- * different from the RXQ opaque
+- * otherwise on CQe.
+- * @param cqe_completion If True completion will be
+- * receive on CQe.
+- * @return int
++ * @p_hwfn: HW device data.
++ * @p_rxq: Handler of queue to close
++ * @eq_completion_only: If True completion will be on
++ * EQe, if False completion will be
++ * on EQe if p_hwfn opaque
++ * different from the RXQ opaque
++ * otherwise on CQe.
++ * @cqe_completion: If True completion will be receive on CQe.
++ *
++ * Return: Int.
+ */
+ int
+ qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+@@ -111,12 +111,12 @@ qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+ bool eq_completion_only, bool cqe_completion);
+
+ /**
+- * @brief qed_eth_tx_queue_stop - closes a Tx queue
++ * qed_eth_tx_queue_stop(): Closes a Tx queue.
+ *
+- * @param p_hwfn
+- * @param p_txq - handle to Tx queue needed to be closed
++ * @p_hwfn: HW device data.
++ * @p_txq: handle to Tx queue needed to be closed.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_txq);
+
+@@ -205,16 +205,15 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn,
+ struct qed_spq_comp_cb *p_comp_data);
+
+ /**
+- * @brief qed_sp_vport_stop -
+- *
+- * This ramrod closes a VPort after all its RX and TX queues are terminated.
+- * An Assert is generated if any queues are left open.
++ * qed_sp_vport_stop: This ramrod closes a VPort after all its
++ * RX and TX queues are terminated.
++ * An Assert is generated if any queues are left open.
+ *
+- * @param p_hwfn
+- * @param opaque_fid
+- * @param vport_id VPort ID
++ * @p_hwfn: HW device data.
++ * @opaque_fid: Opaque FID
++ * @vport_id: VPort ID.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, u16 opaque_fid, u8 vport_id);
+
+@@ -225,22 +224,21 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
+ struct qed_spq_comp_cb *p_comp_data);
+
+ /**
+- * @brief qed_sp_rx_eth_queues_update -
+- *
+- * This ramrod updates an RX queue. It is used for setting the active state
+- * of the queue and updating the TPA and SGE parameters.
+- *
+- * @note At the moment - only used by non-linux VFs.
++ * qed_sp_eth_rx_queues_update(): This ramrod updates an RX queue.
++ * It is used for setting the active state
++ * of the queue and updating the TPA and
++ * SGE parameters.
++ * @p_hwfn: HW device data.
++ * @pp_rxq_handlers: An array of queue handlers to be updated.
++ * @num_rxqs: number of queues to update.
++ * @complete_cqe_flg: Post completion to the CQE Ring if set.
++ * @complete_event_flg: Post completion to the Event Ring if set.
++ * @comp_mode: Comp mode.
++ * @p_comp_data: Pointer Comp data.
+ *
+- * @param p_hwfn
+- * @param pp_rxq_handlers An array of queue handlers to be updated.
+- * @param num_rxqs number of queues to update.
+- * @param complete_cqe_flg Post completion to the CQE Ring if set
+- * @param complete_event_flg Post completion to the Event Ring if set
+- * @param comp_mode
+- * @param p_comp_data
++ * Return: Int.
+ *
+- * @return int
++ * Note At the moment - only used by non-linux VFs.
+ */
+
+ int
+@@ -252,35 +250,61 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
+ enum spq_mode comp_mode,
+ struct qed_spq_comp_cb *p_comp_data);
+
++/**
++ * qed_get_vport_stats(): Fills provided statistics
++ * struct with statistics.
++ *
++ * @cdev: Qed dev pointer.
++ * @stats: Points to struct that will be filled with statistics.
++ *
++ * Return: Void.
++ */
+ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
+
++/**
++ * qed_get_vport_stats_context(): Fills provided statistics
++ * struct with statistics.
++ *
++ * @cdev: Qed dev pointer.
++ * @stats: Points to struct that will be filled with statistics.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
++ *
++ * Context: The function should not sleep in case is_atomic == true.
++ * Return: Void.
++ */
++void qed_get_vport_stats_context(struct qed_dev *cdev,
++ struct qed_eth_stats *stats,
++ bool is_atomic);
++
+ void qed_reset_vport_stats(struct qed_dev *cdev);
+
+ /**
+- * *@brief qed_arfs_mode_configure -
+- *
+- **Enable or disable rfs mode. It must accept atleast one of tcp or udp true
+- **and atleast one of ipv4 or ipv6 true to enable rfs mode.
++ * qed_arfs_mode_configure(): Enable or disable rfs mode.
++ * It must accept at least one of tcp or udp true
++ * and at least one of ipv4 or ipv6 true to enable
++ * rfs mode.
+ *
+- **@param p_hwfn
+- **@param p_ptt
+- **@param p_cfg_params - arfs mode configuration parameters.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_cfg_params: arfs mode configuration parameters.
+ *
++ * Return. Void.
+ */
+ void qed_arfs_mode_configure(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_arfs_config_params *p_cfg_params);
+
+ /**
+- * @brief - qed_configure_rfs_ntuple_filter
++ * qed_configure_rfs_ntuple_filter(): This ramrod should be used to add
++ * or remove arfs hw filter
+ *
+- * This ramrod should be used to add or remove arfs hw filter
++ * @p_hwfn: HW device data.
++ * @p_cb: Used for QED_SPQ_MODE_CB,where client would initialize
++ * it with cookie and callback function address, if not
++ * using this mode then client must pass NULL.
++ * @p_params: Pointer to params.
+ *
+- * @params p_hwfn
+- * @params p_cb - Used for QED_SPQ_MODE_CB,where client would initialize
+- * it with cookie and callback function address, if not
+- * using this mode then client must pass NULL.
+- * @params p_params
++ * Return: Void.
+ */
+ int
+ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
+@@ -374,16 +398,17 @@ qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
+ struct qed_sp_vport_start_params *p_params);
+
+ /**
+- * @brief - Starts an Rx queue, when queue_cid is already prepared
++ * qed_eth_rxq_start_ramrod(): Starts an Rx queue, when queue_cid is
++ * already prepared
+ *
+- * @param p_hwfn
+- * @param p_cid
+- * @param bd_max_bytes
+- * @param bd_chain_phys_addr
+- * @param cqe_pbl_addr
+- * @param cqe_pbl_size
++ * @p_hwfn: HW device data.
++ * @p_cid: Pointer CID.
++ * @bd_max_bytes: Max bytes.
++ * @bd_chain_phys_addr: Chain physcial address.
++ * @cqe_pbl_addr: PBL address.
++ * @cqe_pbl_size: PBL size.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int
+ qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+@@ -393,15 +418,16 @@ qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+ dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size);
+
+ /**
+- * @brief - Starts a Tx queue, where queue_cid is already prepared
++ * qed_eth_txq_start_ramrod(): Starts a Tx queue, where queue_cid is
++ * already prepared
+ *
+- * @param p_hwfn
+- * @param p_cid
+- * @param pbl_addr
+- * @param pbl_size
+- * @param p_pq_params - parameters for choosing the PQ for this Tx queue
++ * @p_hwfn: HW device data.
++ * @p_cid: Pointer CID.
++ * @pbl_addr: PBL address.
++ * @pbl_size: PBL size.
++ * @pq_id: Parameters for choosing the PQ for this Tx queue.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int
+ qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+index df88d00053a29..f80f7739ff8d6 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+@@ -119,41 +119,41 @@ struct qed_ll2_info {
+ extern const struct qed_ll2_ops qed_ll2_ops_pass;
+
+ /**
+- * @brief qed_ll2_acquire_connection - allocate resources,
+- * starts rx & tx (if relevant) queues pair. Provides
+- * connecion handler as output parameter.
++ * qed_ll2_acquire_connection(): Allocate resources,
++ * starts rx & tx (if relevant) queues pair.
++ * Provides connecion handler as output
++ * parameter.
+ *
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @data: Describes connection parameters.
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param data - describes connection parameters
+- * @return int
++ * Return: Int.
+ */
+ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data);
+
+ /**
+- * @brief qed_ll2_establish_connection - start previously
+- * allocated LL2 queues pair
++ * qed_ll2_establish_connection(): start previously allocated LL2 queues pair
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param p_ptt
+- * @param connection_handle LL2 connection's handle obtained from
+- * qed_ll2_require_connection
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @connection_handle: LL2 connection's handle obtained from
++ * qed_ll2_require_connection.
+ *
+- * @return 0 on success, failure otherwise
++ * Return: 0 on success, failure otherwise.
+ */
+ int qed_ll2_establish_connection(void *cxt, u8 connection_handle);
+
+ /**
+- * @brief qed_ll2_post_rx_buffers - submit buffers to LL2 Rx queue.
++ * qed_ll2_post_rx_buffer(): Submit buffers to LL2 Rx queue.
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param connection_handle LL2 connection's handle obtained from
+- * qed_ll2_require_connection
+- * @param addr rx (physical address) buffers to submit
+- * @param cookie
+- * @param notify_fw produce corresponding Rx BD immediately
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @connection_handle: LL2 connection's handle obtained from
++ * qed_ll2_require_connection.
++ * @addr: RX (physical address) buffers to submit.
++ * @buf_len: Buffer Len.
++ * @cookie: Cookie.
++ * @notify_fw: Produce corresponding Rx BD immediately.
+ *
+- * @return 0 on success, failure otherwise
++ * Return: 0 on success, failure otherwise.
+ */
+ int qed_ll2_post_rx_buffer(void *cxt,
+ u8 connection_handle,
+@@ -161,15 +161,15 @@ int qed_ll2_post_rx_buffer(void *cxt,
+ u16 buf_len, void *cookie, u8 notify_fw);
+
+ /**
+- * @brief qed_ll2_prepare_tx_packet - request for start Tx BD
+- * to prepare Tx packet submission to FW.
++ * qed_ll2_prepare_tx_packet(): Request for start Tx BD
++ * to prepare Tx packet submission to FW.
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param connection_handle
+- * @param pkt - info regarding the tx packet
+- * @param notify_fw - issue doorbell to fw for this packet
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @connection_handle: Connection handle.
++ * @pkt: Info regarding the tx packet.
++ * @notify_fw: Issue doorbell to fw for this packet.
+ *
+- * @return 0 on success, failure otherwise
++ * Return: 0 on success, failure otherwise.
+ */
+ int qed_ll2_prepare_tx_packet(void *cxt,
+ u8 connection_handle,
+@@ -177,81 +177,83 @@ int qed_ll2_prepare_tx_packet(void *cxt,
+ bool notify_fw);
+
+ /**
+- * @brief qed_ll2_release_connection - releases resources
+- * allocated for LL2 connection
++ * qed_ll2_release_connection(): Releases resources allocated for LL2
++ * connection.
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param connection_handle LL2 connection's handle obtained from
+- * qed_ll2_require_connection
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @connection_handle: LL2 connection's handle obtained from
++ * qed_ll2_require_connection.
++ *
++ * Return: Void.
+ */
+ void qed_ll2_release_connection(void *cxt, u8 connection_handle);
+
+ /**
+- * @brief qed_ll2_set_fragment_of_tx_packet - provides fragments to fill
+- * Tx BD of BDs requested by
+- * qed_ll2_prepare_tx_packet
++ * qed_ll2_set_fragment_of_tx_packet(): Provides fragments to fill
++ * Tx BD of BDs requested by
++ * qed_ll2_prepare_tx_packet
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param connection_handle LL2 connection's handle
+- * obtained from
+- * qed_ll2_require_connection
+- * @param addr
+- * @param nbytes
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @connection_handle: LL2 connection's handle obtained from
++ * qed_ll2_require_connection.
++ * @addr: Address.
++ * @nbytes: Number of bytes.
+ *
+- * @return 0 on success, failure otherwise
++ * Return: 0 on success, failure otherwise.
+ */
+ int qed_ll2_set_fragment_of_tx_packet(void *cxt,
+ u8 connection_handle,
+ dma_addr_t addr, u16 nbytes);
+
+ /**
+- * @brief qed_ll2_terminate_connection - stops Tx/Rx queues
+- *
++ * qed_ll2_terminate_connection(): Stops Tx/Rx queues
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param connection_handle LL2 connection's handle
+- * obtained from
+- * qed_ll2_require_connection
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @connection_handle: LL2 connection's handle obtained from
++ * qed_ll2_require_connection.
+ *
+- * @return 0 on success, failure otherwise
++ * Return: 0 on success, failure otherwise.
+ */
+ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle);
+
+ /**
+- * @brief qed_ll2_get_stats - get LL2 queue's statistics
+- *
++ * qed_ll2_get_stats(): Get LL2 queue's statistics
+ *
+- * @param cxt - pointer to the hw-function [opaque to some]
+- * @param connection_handle LL2 connection's handle obtained from
+- * qed_ll2_require_connection
+- * @param p_stats
++ * @cxt: Pointer to the hw-function [opaque to some].
++ * @connection_handle: LL2 connection's handle obtained from
++ * qed_ll2_require_connection.
++ * @p_stats: Pointer Status.
+ *
+- * @return 0 on success, failure otherwise
++ * Return: 0 on success, failure otherwise.
+ */
+ int qed_ll2_get_stats(void *cxt,
+ u8 connection_handle, struct qed_ll2_stats *p_stats);
+
+ /**
+- * @brief qed_ll2_alloc - Allocates LL2 connections set
++ * qed_ll2_alloc(): Allocates LL2 connections set.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_ll2_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_ll2_setup - Inits LL2 connections set
++ * qed_ll2_setup(): Inits LL2 connections set.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ *
+ */
+ void qed_ll2_setup(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_ll2_free - Releases LL2 connections set
++ * qed_ll2_free(): Releases LL2 connections set
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ *
+ */
+ void qed_ll2_free(struct qed_hwfn *p_hwfn);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
+index d10e1cd6d2ba9..26700b0b4b370 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
+@@ -3054,7 +3054,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
+
+ switch (type) {
+ case QED_MCP_LAN_STATS:
+- qed_get_vport_stats(cdev, &eth_stats);
++ qed_get_vport_stats_context(cdev, &eth_stats, true);
+ stats->lan_stats.ucast_rx_pkts =
+ eth_stats.common.rx_ucast_pkts;
+ stats->lan_stats.ucast_tx_pkts =
+@@ -3062,10 +3062,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
+ stats->lan_stats.fcs_err = -1;
+ break;
+ case QED_MCP_FCOE_STATS:
+- qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
++ qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
+ break;
+ case QED_MCP_ISCSI_STATS:
+- qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
++ qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
+ break;
+ default:
+ DP_VERBOSE(cdev, QED_MSG_SP,
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+index 8edb450d0abfc..352b757183e8e 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+@@ -266,97 +266,97 @@ union qed_mfw_tlv_data {
+ #define QED_NVM_CFG_OPTION_ENTITY_SEL BIT(4)
+
+ /**
+- * @brief - returns the link params of the hw function
++ * qed_mcp_get_link_params(): Returns the link params of the hw function.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @returns pointer to link params
++ * Returns: Pointer to link params.
+ */
+-struct qed_mcp_link_params *qed_mcp_get_link_params(struct qed_hwfn *);
++struct qed_mcp_link_params *qed_mcp_get_link_params(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief - return the link state of the hw function
++ * qed_mcp_get_link_state(): Return the link state of the hw function.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @returns pointer to link state
++ * Returns: Pointer to link state.
+ */
+-struct qed_mcp_link_state *qed_mcp_get_link_state(struct qed_hwfn *);
++struct qed_mcp_link_state *qed_mcp_get_link_state(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief - return the link capabilities of the hw function
++ * qed_mcp_get_link_capabilities(): Return the link capabilities of the
++ * hw function.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @returns pointer to link capabilities
++ * Returns: Pointer to link capabilities.
+ */
+ struct qed_mcp_link_capabilities
+ *qed_mcp_get_link_capabilities(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief Request the MFW to set the the link according to 'link_input'.
++ * qed_mcp_set_link(): Request the MFW to set the link according
++ * to 'link_input'.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param b_up - raise link if `true'. Reset link if `false'.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @b_up: Raise link if `true'. Reset link if `false'.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_mcp_set_link(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ bool b_up);
+
+ /**
+- * @brief Get the management firmware version value
++ * qed_mcp_get_mfw_ver(): Get the management firmware version value.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_mfw_ver - mfw version value
+- * @param p_running_bundle_id - image id in nvram; Optional.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_mfw_ver: MFW version value.
++ * @p_running_bundle_id: Image id in nvram; Optional.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - operation was successful.
+ */
+ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *p_mfw_ver, u32 *p_running_bundle_id);
+
+ /**
+- * @brief Get the MBI version value
++ * qed_mcp_get_mbi_ver(): Get the MBI version value.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_mbi_ver - A pointer to a variable to be filled with the MBI version.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_mbi_ver: A pointer to a variable to be filled with the MBI version.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - operation was successful.
+ */
+ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_mbi_ver);
+
+ /**
+- * @brief Get media type value of the port.
++ * qed_mcp_get_media_type(): Get media type value of the port.
+ *
+- * @param cdev - qed dev pointer
+- * @param p_ptt
+- * @param mfw_ver - media type value
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @media_type: Media type value
+ *
+- * @return int -
+- * 0 - Operation was successul.
+- * -EBUSY - Operation failed
++ * Return: Int - 0 - Operation was successul.
++ * -EBUSY - Operation failed
+ */
+ int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *media_type);
+
+ /**
+- * @brief Get transceiver data of the port.
++ * qed_mcp_get_transceiver_data(): Get transceiver data of the port.
+ *
+- * @param cdev - qed dev pointer
+- * @param p_ptt
+- * @param p_transceiver_state - transceiver state.
+- * @param p_transceiver_type - media type value
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_transceiver_state: Transceiver state.
++ * @p_tranceiver_type: Media type value.
+ *
+- * @return int -
+- * 0 - Operation was successful.
+- * -EBUSY - Operation failed
++ * Return: Int - 0 - Operation was successul.
++ * -EBUSY - Operation failed
+ */
+ int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -364,50 +364,48 @@ int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+ u32 *p_tranceiver_type);
+
+ /**
+- * @brief Get transceiver supported speed mask.
++ * qed_mcp_trans_speed_mask(): Get transceiver supported speed mask.
+ *
+- * @param cdev - qed dev pointer
+- * @param p_ptt
+- * @param p_speed_mask - Bit mask of all supported speeds.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_speed_mask: Bit mask of all supported speeds.
+ *
+- * @return int -
+- * 0 - Operation was successful.
+- * -EBUSY - Operation failed
++ * Return: Int - 0 - Operation was successul.
++ * -EBUSY - Operation failed
+ */
+
+ int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_speed_mask);
+
+ /**
+- * @brief Get board configuration.
++ * qed_mcp_get_board_config(): Get board configuration.
+ *
+- * @param cdev - qed dev pointer
+- * @param p_ptt
+- * @param p_board_config - Board config.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_board_config: Board config.
+ *
+- * @return int -
+- * 0 - Operation was successful.
+- * -EBUSY - Operation failed
++ * Return: Int - 0 - Operation was successul.
++ * -EBUSY - Operation failed
+ */
+ int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_board_config);
+
+ /**
+- * @brief General function for sending commands to the MCP
+- * mailbox. It acquire mutex lock for the entire
+- * operation, from sending the request until the MCP
+- * response. Waiting for MCP response will be checked up
+- * to 5 seconds every 5ms.
++ * qed_mcp_cmd(): General function for sending commands to the MCP
++ * mailbox. It acquire mutex lock for the entire
++ * operation, from sending the request until the MCP
++ * response. Waiting for MCP response will be checked up
++ * to 5 seconds every 5ms.
+ *
+- * @param p_hwfn - hw function
+- * @param p_ptt - PTT required for register access
+- * @param cmd - command to be sent to the MCP.
+- * @param param - Optional param
+- * @param o_mcp_resp - The MCP response code (exclude sequence).
+- * @param o_mcp_param- Optional parameter provided by the MCP
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
++ * @cmd: command to be sent to the MCP.
++ * @param: Optional param
++ * @o_mcp_resp: The MCP response code (exclude sequence).
++ * @o_mcp_param: Optional parameter provided by the MCP
+ * response
+- * @return int - 0 - operation
+- * was successul.
++ *
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -417,37 +415,39 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+ u32 *o_mcp_param);
+
+ /**
+- * @brief - drains the nig, allowing completion to pass in case of pauses.
+- * (Should be called only from sleepable context)
++ * qed_mcp_drain(): drains the nig, allowing completion to pass in
++ * case of pauses.
++ * (Should be called only from sleepable context)
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
++ *
++ * Return: Int.
+ */
+ int qed_mcp_drain(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Get the flash size value
++ * qed_mcp_get_flash_size(): Get the flash size value.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_flash_size - flash size in bytes to be filled.
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
++ * @p_flash_size: Flash size in bytes to be filled.
+ *
+- * @return int - 0 - operation was successul.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *p_flash_size);
+
+ /**
+- * @brief Send driver version to MFW
++ * qed_mcp_send_drv_version(): Send driver version to MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param version - Version value
+- * @param name - Protocol driver name
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
++ * @p_ver: Version value.
+ *
+- * @return int - 0 - operation was successul.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int
+ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
+@@ -455,146 +455,148 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
+ struct qed_mcp_drv_version *p_ver);
+
+ /**
+- * @brief Read the MFW process kill counter
++ * qed_get_process_kill_counter(): Read the MFW process kill counter.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
+ *
+- * @return u32
++ * Return: u32.
+ */
+ u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Trigger a recovery process
++ * qed_start_recovery_process(): Trigger a recovery process.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief A recovery handler must call this function as its first step.
+- * It is assumed that the handler is not run from an interrupt context.
++ * qed_recovery_prolog(): A recovery handler must call this function
++ * as its first step.
++ * It is assumed that the handler is not run from
++ * an interrupt context.
+ *
+- * @param cdev
+- * @param p_ptt
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: int.
+ */
+ int qed_recovery_prolog(struct qed_dev *cdev);
+
+ /**
+- * @brief Notify MFW about the change in base device properties
++ * qed_mcp_ov_update_current_config(): Notify MFW about the change in base
++ * device properties
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param client - qed client type
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @client: Qed client type.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_ov_client client);
+
+ /**
+- * @brief Notify MFW about the driver state
++ * qed_mcp_ov_update_driver_state(): Notify MFW about the driver state.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param drv_state - Driver state
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @drv_state: Driver state.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_ov_driver_state drv_state);
+
+ /**
+- * @brief Send MTU size to MFW
++ * qed_mcp_ov_update_mtu(): Send MTU size to MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param mtu - MTU size
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @mtu: MTU size.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u16 mtu);
+
+ /**
+- * @brief Send MAC address to MFW
++ * qed_mcp_ov_update_mac(): Send MAC address to MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param mac - MAC address
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @mac: MAC address.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 *mac);
+
+ /**
+- * @brief Send WOL mode to MFW
++ * qed_mcp_ov_update_wol(): Send WOL mode to MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param wol - WOL mode
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @wol: WOL mode.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_ov_wol wol);
+
+ /**
+- * @brief Set LED status
++ * qed_mcp_set_led(): Set LED status.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param mode - LED mode
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @mode: LED mode.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum qed_led_mode mode);
+
+ /**
+- * @brief Read from nvm
++ * qed_mcp_nvm_read(): Read from NVM.
+ *
+- * @param cdev
+- * @param addr - nvm offset
+- * @param p_buf - nvm read buffer
+- * @param len - buffer len
++ * @cdev: Qed dev pointer.
++ * @addr: NVM offset.
++ * @p_buf: NVM read buffer.
++ * @len: Buffer len.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len);
+
+ /**
+- * @brief Write to nvm
++ * qed_mcp_nvm_write(): Write to NVM.
+ *
+- * @param cdev
+- * @param addr - nvm offset
+- * @param cmd - nvm command
+- * @param p_buf - nvm write buffer
+- * @param len - buffer len
++ * @cdev: Qed dev pointer.
++ * @addr: NVM offset.
++ * @cmd: NVM command.
++ * @p_buf: NVM write buffer.
++ * @len: Buffer len.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_nvm_write(struct qed_dev *cdev,
+ u32 cmd, u32 addr, u8 *p_buf, u32 len);
+
+ /**
+- * @brief Check latest response
++ * qed_mcp_nvm_resp(): Check latest response.
+ *
+- * @param cdev
+- * @param p_buf - nvm write buffer
++ * @cdev: Qed dev pointer.
++ * @p_buf: NVM write buffer.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_nvm_resp(struct qed_dev *cdev, u8 *p_buf);
+
+@@ -604,13 +606,13 @@ struct qed_nvm_image_att {
+ };
+
+ /**
+- * @brief Allows reading a whole nvram image
++ * qed_mcp_get_nvm_image_att(): Allows reading a whole nvram image.
+ *
+- * @param p_hwfn
+- * @param image_id - image to get attributes for
+- * @param p_image_att - image attributes structure into which to fill data
++ * @p_hwfn: HW device data.
++ * @image_id: Image to get attributes for.
++ * @p_image_att: Image attributes structure into which to fill data.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int
+ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
+@@ -618,64 +620,65 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
+ struct qed_nvm_image_att *p_image_att);
+
+ /**
+- * @brief Allows reading a whole nvram image
++ * qed_mcp_get_nvm_image(): Allows reading a whole nvram image.
+ *
+- * @param p_hwfn
+- * @param image_id - image requested for reading
+- * @param p_buffer - allocated buffer into which to fill data
+- * @param buffer_len - length of the allocated buffer.
++ * @p_hwfn: HW device data.
++ * @image_id: image requested for reading.
++ * @p_buffer: allocated buffer into which to fill data.
++ * @buffer_len: length of the allocated buffer.
+ *
+- * @return 0 iff p_buffer now contains the nvram image.
++ * Return: 0 if p_buffer now contains the nvram image.
+ */
+ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
+ enum qed_nvm_images image_id,
+ u8 *p_buffer, u32 buffer_len);
+
+ /**
+- * @brief Bist register test
++ * qed_mcp_bist_register_test(): Bist register test.
+ *
+- * @param p_hwfn - hw function
+- * @param p_ptt - PTT required for register access
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Bist clock test
++ * qed_mcp_bist_clock_test(): Bist clock test.
+ *
+- * @param p_hwfn - hw function
+- * @param p_ptt - PTT required for register access
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Bist nvm test - get number of images
++ * qed_mcp_bist_nvm_get_num_images(): Bist nvm test - get number of images.
+ *
+- * @param p_hwfn - hw function
+- * @param p_ptt - PTT required for register access
+- * @param num_images - number of images if operation was
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
++ * @num_images: number of images if operation was
+ * successful. 0 if not.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_bist_nvm_get_num_images(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *num_images);
+
+ /**
+- * @brief Bist nvm test - get image attributes by index
++ * qed_mcp_bist_nvm_get_image_att(): Bist nvm test - get image attributes
++ * by index.
+ *
+- * @param p_hwfn - hw function
+- * @param p_ptt - PTT required for register access
+- * @param p_image_att - Attributes of image
+- * @param image_index - Index of image to get information for
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
++ * @p_image_att: Attributes of image.
++ * @image_index: Index of image to get information for.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -683,23 +686,26 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
+ u32 image_index);
+
+ /**
+- * @brief - Processes the TLV request from MFW i.e., get the required TLV info
+- * from the qed client and send it to the MFW.
++ * qed_mfw_process_tlv_req(): Processes the TLV request from MFW i.e.,
++ * get the required TLV info
++ * from the qed client and send it to the MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param return 0 upon success.
++ * Return: 0 upon success.
+ */
+ int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Send raw debug data to the MFW
++ * qed_mcp_send_raw_debug_data(): Send raw debug data to the MFW
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_buf: raw debug data buffer.
++ * @size: Buffer size.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_buf - raw debug data buffer
+- * @param size - buffer size
++ * Return : Int.
+ */
+ int
+ qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
+@@ -796,47 +802,49 @@ qed_mcp_is_ext_speed_supported(const struct qed_hwfn *p_hwfn)
+ }
+
+ /**
+- * @brief Initialize the interface with the MCP
++ * qed_mcp_cmd_init(): Initialize the interface with the MCP.
+ *
+- * @param p_hwfn - HW func
+- * @param p_ptt - PTT required for register access
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Initialize the port interface with the MCP
++ * qed_mcp_cmd_port_init(): Initialize the port interface with the MCP
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Void.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+ * Can only be called after `num_ports_in_engines' is set
+ */
+ void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+ /**
+- * @brief Releases resources allocated during the init process.
++ * qed_mcp_free(): Releases resources allocated during the init process.
+ *
+- * @param p_hwfn - HW func
+- * @param p_ptt - PTT required for register access
++ * @p_hwfn: HW function.
+ *
+- * @return int
++ * Return: Int.
+ */
+
+ int qed_mcp_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief This function is called from the DPC context. After
+- * pointing PTT to the mfw mb, check for events sent by the MCP
+- * to the driver and ack them. In case a critical event
+- * detected, it will be handled here, otherwise the work will be
+- * queued to a sleepable work-queue.
++ * qed_mcp_handle_events(): This function is called from the DPC context.
++ * After pointing PTT to the mfw mb, check for events sent by
++ * the MCP to the driver and ack them. In case a critical event
++ * detected, it will be handled here, otherwise the work will be
++ * queued to a sleepable work-queue.
++ *
++ * @p_hwfn: HW function.
++ * @p_ptt: PTT required for register access.
+ *
+- * @param p_hwfn - HW function
+- * @param p_ptt - PTT required for register access
+- * @return int - 0 - operation
+- * was successul.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+@@ -858,106 +866,111 @@ struct qed_load_req_params {
+ };
+
+ /**
+- * @brief Sends a LOAD_REQ to the MFW, and in case the operation succeeds,
+- * returns whether this PF is the first on the engine/port or function.
++ * qed_mcp_load_req(): Sends a LOAD_REQ to the MFW, and in case the
++ * operation succeeds, returns whether this PF is
++ * the first on the engine/port or function.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_params
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_params: Params.
+ *
+- * @return int - 0 - Operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_load_req_params *p_params);
+
+ /**
+- * @brief Sends a LOAD_DONE message to the MFW
++ * qed_mcp_load_done(): Sends a LOAD_DONE message to the MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @return int - 0 - Operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Sends a UNLOAD_REQ message to the MFW
++ * qed_mcp_unload_req(): Sends a UNLOAD_REQ message to the MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @return int - 0 - Operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Sends a UNLOAD_DONE message to the MFW
++ * qed_mcp_unload_done(): Sends a UNLOAD_DONE message to the MFW
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @return int - 0 - Operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Read the MFW mailbox into Current buffer.
++ * qed_mcp_read_mb(): Read the MFW mailbox into Current buffer.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Void.
+ */
+ void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Ack to mfw that driver finished FLR process for VFs
++ * qed_mcp_ack_vf_flr(): Ack to mfw that driver finished FLR process for VFs
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param vfs_to_ack - bit mask of all engine VFs for which the PF acks.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @vfs_to_ack: bit mask of all engine VFs for which the PF acks.
+ *
+- * @param return int - 0 upon success.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_ack_vf_flr(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *vfs_to_ack);
+
+ /**
+- * @brief - calls during init to read shmem of all function-related info.
++ * qed_mcp_fill_shmem_func_info(): Calls during init to read shmem of
++ * all function-related info.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param return 0 upon success.
++ * Return: 0 upon success.
+ */
+ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief - Reset the MCP using mailbox command.
++ * qed_mcp_reset(): Reset the MCP using mailbox command.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param return 0 upon success.
++ * Return: 0 upon success.
+ */
+ int qed_mcp_reset(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt);
+
+ /**
+- * @brief - Sends an NVM read command request to the MFW to get
+- * a buffer.
++ * qed_mcp_nvm_rd_cmd(): Sends an NVM read command request to the MFW to get
++ * a buffer.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or
+- * DRV_MSG_CODE_NVM_READ_NVRAM commands
+- * @param param - [0:23] - Offset [24:31] - Size
+- * @param o_mcp_resp - MCP response
+- * @param o_mcp_param - MCP response param
+- * @param o_txn_size - Buffer size output
+- * @param o_buf - Pointer to the buffer returned by the MFW.
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @cmd: (Command) DRV_MSG_CODE_NVM_GET_FILE_DATA or
++ * DRV_MSG_CODE_NVM_READ_NVRAM commands.
++ * @param: [0:23] - Offset [24:31] - Size.
++ * @o_mcp_resp: MCP response.
++ * @o_mcp_param: MCP response param.
++ * @o_txn_size: Buffer size output.
++ * @o_buf: Pointer to the buffer returned by the MFW.
+ *
+- * @param return 0 upon success.
++ * Return: 0 upon success.
+ */
+ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -967,60 +980,61 @@ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+ u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf);
+
+ /**
+- * @brief Read from sfp
++ * qed_mcp_phy_sfp_read(): Read from sfp.
+ *
+- * @param p_hwfn - hw function
+- * @param p_ptt - PTT required for register access
+- * @param port - transceiver port
+- * @param addr - I2C address
+- * @param offset - offset in sfp
+- * @param len - buffer length
+- * @param p_buf - buffer to read into
++ * @p_hwfn: HW device data.
++ * @p_ptt: PTT required for register access.
++ * @port: transceiver port.
++ * @addr: I2C address.
++ * @offset: offset in sfp.
++ * @len: buffer length.
++ * @p_buf: buffer to read into.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf);
+
+ /**
+- * @brief indicates whether the MFW objects [under mcp_info] are accessible
++ * qed_mcp_is_init(): indicates whether the MFW objects [under mcp_info]
++ * are accessible
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return true iff MFW is running and mcp_info is initialized
++ * Return: true if MFW is running and mcp_info is initialized.
+ */
+ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief request MFW to configure MSI-X for a VF
++ * qed_mcp_config_vf_msix(): Request MFW to configure MSI-X for a VF.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param vf_id - absolute inside engine
+- * @param num_sbs - number of entries to request
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @vf_id: absolute inside engine.
++ * @num: number of entries to request.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 vf_id, u8 num);
+
+ /**
+- * @brief - Halt the MCP.
++ * qed_mcp_halt(): Halt the MCP.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param return 0 upon success.
++ * Return: 0 upon success.
+ */
+ int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief - Wake up the MCP.
++ * qed_mcp_resume: Wake up the MCP.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param return 0 upon success.
++ * Return: 0 upon success.
+ */
+ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+@@ -1038,13 +1052,13 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
+ int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 mask_parities);
+
+-/* @brief - Gets the mdump retained data from the MFW.
++/* qed_mcp_mdump_get_retain(): Gets the mdump retained data from the MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_mdump_retain
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_mdump_retain: mdump retain.
+ *
+- * @param return 0 upon success.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int
+ qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
+@@ -1052,15 +1066,15 @@ qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
+ struct mdump_retain_data_stc *p_mdump_retain);
+
+ /**
+- * @brief - Sets the MFW's max value for the given resource
++ * qed_mcp_set_resc_max_val(): Sets the MFW's max value for the given resource.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param res_id
+- * @param resc_max_val
+- * @param p_mcp_resp
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @res_id: RES ID.
++ * @resc_max_val: Resec max val.
++ * @p_mcp_resp: MCP Resp
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int
+ qed_mcp_set_resc_max_val(struct qed_hwfn *p_hwfn,
+@@ -1069,16 +1083,17 @@ qed_mcp_set_resc_max_val(struct qed_hwfn *p_hwfn,
+ u32 resc_max_val, u32 *p_mcp_resp);
+
+ /**
+- * @brief - Gets the MFW allocation info for the given resource
++ * qed_mcp_get_resc_info(): Gets the MFW allocation info for the given
++ * resource.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param res_id
+- * @param p_mcp_resp
+- * @param p_resc_num
+- * @param p_resc_start
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @res_id: Res ID.
++ * @p_mcp_resp: MCP resp.
++ * @p_resc_num: Resc num.
++ * @p_resc_start: Resc start.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int
+ qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+@@ -1087,13 +1102,13 @@ qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+ u32 *p_mcp_resp, u32 *p_resc_num, u32 *p_resc_start);
+
+ /**
+- * @brief Send eswitch mode to MFW
++ * qed_mcp_ov_update_eswitch(): Send eswitch mode to MFW.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param eswitch - eswitch mode
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @eswitch: eswitch mode.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+@@ -1113,12 +1128,12 @@ enum qed_resc_lock {
+ };
+
+ /**
+- * @brief - Initiates PF FLR
++ * qed_mcp_initiate_pf_flr(): Initiates PF FLR.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int qed_mcp_initiate_pf_flr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+ struct qed_resc_lock_params {
+@@ -1151,13 +1166,13 @@ struct qed_resc_lock_params {
+ };
+
+ /**
+- * @brief Acquires MFW generic resource lock
++ * qed_mcp_resc_lock(): Acquires MFW generic resource lock.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_params
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_params: Params.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int
+ qed_mcp_resc_lock(struct qed_hwfn *p_hwfn,
+@@ -1175,13 +1190,13 @@ struct qed_resc_unlock_params {
+ };
+
+ /**
+- * @brief Releases MFW generic resource lock
++ * qed_mcp_resc_unlock(): Releases MFW generic resource lock.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_params
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_params: Params.
+ *
+- * @return int - 0 - operation was successful.
++ * Return: Int - 0 - Operation was successul.
+ */
+ int
+ qed_mcp_resc_unlock(struct qed_hwfn *p_hwfn,
+@@ -1189,12 +1204,15 @@ qed_mcp_resc_unlock(struct qed_hwfn *p_hwfn,
+ struct qed_resc_unlock_params *p_params);
+
+ /**
+- * @brief - default initialization for lock/unlock resource structs
++ * qed_mcp_resc_lock_default_init(): Default initialization for
++ * lock/unlock resource structs.
+ *
+- * @param p_lock - lock params struct to be initialized; Can be NULL
+- * @param p_unlock - unlock params struct to be initialized; Can be NULL
+- * @param resource - the requested resource
+- * @paral b_is_permanent - disable retries & aging when set
++ * @p_lock: lock params struct to be initialized; Can be NULL.
++ * @p_unlock: unlock params struct to be initialized; Can be NULL.
++ * @resource: the requested resource.
++ * @b_is_permanent: disable retries & aging when set.
++ *
++ * Return: Void.
+ */
+ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
+ struct qed_resc_unlock_params *p_unlock,
+@@ -1202,94 +1220,117 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
+ resource, bool b_is_permanent);
+
+ /**
+- * @brief - Return whether management firmware support smart AN
++ * qed_mcp_is_smart_an_supported(): Return whether management firmware
++ * support smart AN
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return bool - true if feature is supported.
++ * Return: bool true if feature is supported.
+ */
+ bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief Learn of supported MFW features; To be done during early init
++ * qed_mcp_get_capabilities(): Learn of supported MFW features;
++ * To be done during early init.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Int.
+ */
+ int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Inform MFW of set of features supported by driver. Should be done
+- * inside the content of the LOAD_REQ.
++ * qed_mcp_set_capabilities(): Inform MFW of set of features supported
++ * by driver. Should be done inside the content
++ * of the LOAD_REQ.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * Return: Int.
+ */
+ int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Read ufp config from the shared memory.
++ * qed_mcp_read_ufp_config(): Read ufp config from the shared memory.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * Return: Void.
+ */
+ void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Populate the nvm info shadow in the given hardware function
++ * qed_mcp_nvm_info_populate(): Populate the nvm info shadow in the given
++ * hardware function.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Int.
+ */
+ int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief Delete nvm info shadow in the given hardware function
++ * qed_mcp_nvm_info_free(): Delete nvm info shadow in the given
++ * hardware function.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ */
+ void qed_mcp_nvm_info_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief Get the engine affinity configuration.
++ * qed_mcp_get_engine_config(): Get the engine affinity configuration.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Int.
+ */
+ int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Get the PPFID bitmap.
++ * qed_mcp_get_ppfid_bitmap(): Get the PPFID bitmap.
+ *
+- * @param p_hwfn
+- * @param p_ptt
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ *
++ * Return: Int.
+ */
+ int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+ /**
+- * @brief Get NVM config attribute value.
++ * qed_mcp_nvm_get_cfg(): Get NVM config attribute value.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @option_id: Option ID.
++ * @entity_id: Entity ID.
++ * @flags: Flags.
++ * @p_buf: Buf.
++ * @p_len: Len.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param option_id
+- * @param entity_id
+- * @param flags
+- * @param p_buf
+- * @param p_len
++ * Return: Int.
+ */
+ int qed_mcp_nvm_get_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+ u32 *p_len);
+
+ /**
+- * @brief Set NVM config attribute value.
++ * qed_mcp_nvm_set_cfg(): Set NVM config attribute value.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param option_id
+- * @param entity_id
+- * @param flags
+- * @param p_buf
+- * @param len
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @option_id: Option ID.
++ * @entity_id: Entity ID.
++ * @flags: Flags.
++ * @p_buf: Buf.
++ * @len: Len.
++ *
++ * Return: Int.
+ */
+ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+index 6190adf965bca..f55eed092f25d 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+@@ -422,7 +422,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time,
+ if (p_time->hour > 23)
+ p_time->hour = 0;
+ if (p_time->min > 59)
+- p_time->hour = 0;
++ p_time->min = 0;
+ if (p_time->msec > 999)
+ p_time->msec = 0;
+ if (p_time->usec > 999)
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.h b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
+index e27dd9a4547e8..7a3bd749e1e4c 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
+@@ -6,47 +6,47 @@
+ #include <linux/types.h>
+
+ /**
+- * @brief qed_selftest_memory - Perform memory test
++ * qed_selftest_memory(): Perform memory test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_selftest_memory(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_selftest_interrupt - Perform interrupt test
++ * qed_selftest_interrupt(): Perform interrupt test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_selftest_interrupt(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_selftest_register - Perform register test
++ * qed_selftest_register(): Perform register test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_selftest_register(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_selftest_clock - Perform clock test
++ * qed_selftest_clock(): Perform clock test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_selftest_clock(struct qed_dev *cdev);
+
+ /**
+- * @brief qed_selftest_nvram - Perform nvram test
++ * qed_selftest_nvram(): Perform nvram test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_selftest_nvram(struct qed_dev *cdev);
+
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
+index 60ff3222bf551..c5a38f3c92b04 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
+@@ -31,23 +31,18 @@ struct qed_spq_comp_cb {
+ };
+
+ /**
+- * @brief qed_eth_cqe_completion - handles the completion of a
+- * ramrod on the cqe ring
++ * qed_eth_cqe_completion(): handles the completion of a
++ * ramrod on the cqe ring.
+ *
+- * @param p_hwfn
+- * @param cqe
++ * @p_hwfn: HW device data.
++ * @cqe: CQE.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn,
+ struct eth_slow_path_rx_cqe *cqe);
+
+-/**
+- * @file
+- *
+- * QED Slow-hwfn queue interface
+- */
+-
++ /* QED Slow-hwfn queue interface */
+ union ramrod_data {
+ struct pf_start_ramrod_data pf_start;
+ struct pf_update_ramrod_data pf_update;
+@@ -207,117 +202,128 @@ struct qed_spq {
+ };
+
+ /**
+- * @brief qed_spq_post - Posts a Slow hwfn request to FW, or lacking that
+- * Pends it to the future list.
++ * qed_spq_post(): Posts a Slow hwfn request to FW, or lacking that
++ * Pends it to the future list.
+ *
+- * @param p_hwfn
+- * @param p_req
++ * @p_hwfn: HW device data.
++ * @p_ent: Ent.
++ * @fw_return_code: Return code from firmware.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_spq_post(struct qed_hwfn *p_hwfn,
+ struct qed_spq_entry *p_ent,
+ u8 *fw_return_code);
+
+ /**
+- * @brief qed_spq_allocate - Alloocates & initializes the SPQ and EQ.
++ * qed_spq_alloc(): Alloocates & initializes the SPQ and EQ.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_spq_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_spq_setup - Reset the SPQ to its start state.
++ * qed_spq_setup(): Reset the SPQ to its start state.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ */
+ void qed_spq_setup(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_spq_deallocate - Deallocates the given SPQ struct.
++ * qed_spq_free(): Deallocates the given SPQ struct.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_spq_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_spq_get_entry - Obtain an entrry from the spq
+- * free pool list.
+- *
+- *
++ * qed_spq_get_entry(): Obtain an entrry from the spq
++ * free pool list.
+ *
+- * @param p_hwfn
+- * @param pp_ent
++ * @p_hwfn: HW device data.
++ * @pp_ent: PP ENT.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int
+ qed_spq_get_entry(struct qed_hwfn *p_hwfn,
+ struct qed_spq_entry **pp_ent);
+
+ /**
+- * @brief qed_spq_return_entry - Return an entry to spq free
+- * pool list
++ * qed_spq_return_entry(): Return an entry to spq free pool list.
+ *
+- * @param p_hwfn
+- * @param p_ent
++ * @p_hwfn: HW device data.
++ * @p_ent: P ENT.
++ *
++ * Return: Void.
+ */
+ void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
+ struct qed_spq_entry *p_ent);
+ /**
+- * @brief qed_eq_allocate - Allocates & initializes an EQ struct
++ * qed_eq_alloc(): Allocates & initializes an EQ struct.
+ *
+- * @param p_hwfn
+- * @param num_elem number of elements in the eq
++ * @p_hwfn: HW device data.
++ * @num_elem: number of elements in the eq.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem);
+
+ /**
+- * @brief qed_eq_setup - Reset the EQ to its start state.
++ * qed_eq_setup(): Reset the EQ to its start state.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_eq_setup(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_eq_free - deallocates the given EQ struct.
++ * qed_eq_free(): deallocates the given EQ struct.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ */
+ void qed_eq_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_eq_prod_update - update the FW with default EQ producer
++ * qed_eq_prod_update(): update the FW with default EQ producer.
++ *
++ * @p_hwfn: HW device data.
++ * @prod: Prod.
+ *
+- * @param p_hwfn
+- * @param prod
++ * Return: Void.
+ */
+ void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
+ u16 prod);
+
+ /**
+- * @brief qed_eq_completion - Completes currently pending EQ elements
++ * qed_eq_completion(): Completes currently pending EQ elements.
+ *
+- * @param p_hwfn
+- * @param cookie
++ * @p_hwfn: HW device data.
++ * @cookie: Cookie.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_eq_completion(struct qed_hwfn *p_hwfn,
+ void *cookie);
+
+ /**
+- * @brief qed_spq_completion - Completes a single event
++ * qed_spq_completion(): Completes a single event.
+ *
+- * @param p_hwfn
+- * @param echo - echo value from cookie (used for determining completion)
+- * @param p_data - data from cookie (used in callback function if applicable)
++ * @p_hwfn: HW device data.
++ * @echo: echo value from cookie (used for determining completion).
++ * @fw_return_code: FW return code.
++ * @p_data: data from cookie (used in callback function if applicable).
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_spq_completion(struct qed_hwfn *p_hwfn,
+ __le16 echo,
+@@ -325,44 +331,43 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
+ union event_ring_data *p_data);
+
+ /**
+- * @brief qed_spq_get_cid - Given p_hwfn, return cid for the hwfn's SPQ
++ * qed_spq_get_cid(): Given p_hwfn, return cid for the hwfn's SPQ.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return u32 - SPQ CID
++ * Return: u32 - SPQ CID.
+ */
+ u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_consq_alloc - Allocates & initializes an ConsQ
+- * struct
++ * qed_consq_alloc(): Allocates & initializes an ConsQ struct.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_consq_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_consq_setup - Reset the ConsQ to its start state.
++ * qed_consq_setup(): Reset the ConsQ to its start state.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return Void.
+ */
+ void qed_consq_setup(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_consq_free - deallocates the given ConsQ struct.
++ * qed_consq_free(): deallocates the given ConsQ struct.
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return Void.
+ */
+ void qed_consq_free(struct qed_hwfn *p_hwfn);
+ int qed_spq_pend_post(struct qed_hwfn *p_hwfn);
+
+-/**
+- * @file
+- *
+- * @brief Slow-hwfn low-level commands (Ramrods) function definitions.
+- */
++/* Slow-hwfn low-level commands (Ramrods) function definitions. */
+
+ #define QED_SP_EQ_COMPLETION 0x01
+ #define QED_SP_CQE_COMPLETION 0x02
+@@ -377,12 +382,15 @@ struct qed_sp_init_data {
+ };
+
+ /**
+- * @brief Returns a SPQ entry to the pool / frees the entry if allocated.
+- * Should be called on in error flows after initializing the SPQ entry
+- * and before posting it.
++ * qed_sp_destroy_request(): Returns a SPQ entry to the pool / frees the
++ * entry if allocated. Should be called on in error
++ * flows after initializing the SPQ entry
++ * and before posting it.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ent: Ent.
+ *
+- * @param p_hwfn
+- * @param p_ent
++ * Return: Void.
+ */
+ void qed_sp_destroy_request(struct qed_hwfn *p_hwfn,
+ struct qed_spq_entry *p_ent);
+@@ -394,7 +402,14 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
+ struct qed_sp_init_data *p_data);
+
+ /**
+- * @brief qed_sp_pf_start - PF Function Start Ramrod
++ * qed_sp_pf_start(): PF Function Start Ramrod.
++ *
++ * @p_hwfn: HW device data.
++ * @p_ptt: P_ptt.
++ * @p_tunn: P_tunn.
++ * @allow_npar_tx_switch: Allow NPAR TX Switch.
++ *
++ * Return: Int.
+ *
+ * This ramrod is sent to initialize a physical function (PF). It will
+ * configure the function related parameters and write its completion to the
+@@ -404,12 +419,6 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
+ * allocated by the driver on host memory and its parameters are written
+ * to the internal RAM of the UStorm by the Function Start Ramrod.
+ *
+- * @param p_hwfn
+- * @param p_ptt
+- * @param p_tunn
+- * @param allow_npar_tx_switch
+- *
+- * @return int
+ */
+
+ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
+@@ -418,47 +427,33 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
+ bool allow_npar_tx_switch);
+
+ /**
+- * @brief qed_sp_pf_update - PF Function Update Ramrod
++ * qed_sp_pf_update(): PF Function Update Ramrod.
+ *
+- * This ramrod updates function-related parameters. Every parameter can be
+- * updated independently, according to configuration flags.
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Int.
+ *
+- * @return int
++ * This ramrod updates function-related parameters. Every parameter can be
++ * updated independently, according to configuration flags.
+ */
+
+ int qed_sp_pf_update(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_sp_pf_update_stag - Update firmware of new outer tag
++ * qed_sp_pf_update_stag(): Update firmware of new outer tag.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_sp_pf_stop - PF Function Stop Ramrod
+- *
+- * This ramrod is sent to close a Physical Function (PF). It is the last ramrod
+- * sent and the last completion written to the PFs Event Ring. This ramrod also
+- * deletes the context for the Slowhwfn connection on this PF.
+- *
+- * @note Not required for first packet.
+- *
+- * @param p_hwfn
+- *
+- * @return int
+- */
+-
+-/**
+- * @brief qed_sp_pf_update_ufp - PF ufp update Ramrod
++ * qed_sp_pf_update_ufp(): PF ufp update Ramrod.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn);
+
+@@ -470,11 +465,11 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
+ enum spq_mode comp_mode,
+ struct qed_spq_comp_cb *p_comp_data);
+ /**
+- * @brief qed_sp_heartbeat_ramrod - Send empty Ramrod
++ * qed_sp_heartbeat_ramrod(): Send empty Ramrod.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+
+ int qed_sp_heartbeat_ramrod(struct qed_hwfn *p_hwfn);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+index ed2b6fe5a78d3..bf0ba3855da1d 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+@@ -2982,12 +2982,16 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn,
+ u8 mask = QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED;
+ struct qed_filter_accept_flags *flags = &params->accept_flags;
+ struct qed_public_vf_info *vf_info;
++ u16 tlv_mask;
++
++ tlv_mask = BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM) |
++ BIT(QED_IOV_VP_UPDATE_ACCEPT_ANY_VLAN);
+
+ /* Untrusted VFs can't even be trusted to know that fact.
+ * Simply indicate everything is configured fine, and trace
+ * configuration 'behind their back'.
+ */
+- if (!(*tlvs & BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM)))
++ if (!(*tlvs & tlv_mask))
+ return 0;
+
+ vf_info = qed_iov_get_public_vf_info(hwfn, vfid, true);
+@@ -3004,6 +3008,13 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn,
+ flags->tx_accept_filter &= ~mask;
+ }
+
++ if (params->update_accept_any_vlan_flg) {
++ vf_info->accept_any_vlan = params->accept_any_vlan;
++
++ if (vf_info->forced_vlan && !vf_info->is_trusted_configured)
++ params->accept_any_vlan = false;
++ }
++
+ return 0;
+ }
+
+@@ -3778,11 +3789,11 @@ bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs)
+ return found;
+ }
+
+-static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
+- u16 vfid,
+- struct qed_mcp_link_params *p_params,
+- struct qed_mcp_link_state *p_link,
+- struct qed_mcp_link_capabilities *p_caps)
++static int qed_iov_get_link(struct qed_hwfn *p_hwfn,
++ u16 vfid,
++ struct qed_mcp_link_params *p_params,
++ struct qed_mcp_link_state *p_link,
++ struct qed_mcp_link_capabilities *p_caps)
+ {
+ struct qed_vf_info *p_vf = qed_iov_get_vf_info(p_hwfn,
+ vfid,
+@@ -3790,7 +3801,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
+ struct qed_bulletin_content *p_bulletin;
+
+ if (!p_vf)
+- return;
++ return -EINVAL;
+
+ p_bulletin = p_vf->bulletin.p_virt;
+
+@@ -3800,6 +3811,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
+ __qed_vf_get_link_state(p_hwfn, p_link, p_bulletin);
+ if (p_caps)
+ __qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin);
++ return 0;
+ }
+
+ static int
+@@ -4366,6 +4378,9 @@ qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate)
+ }
+
+ vf = qed_iov_get_vf_info(QED_LEADING_HWFN(cdev), (u16)vfid, true);
++ if (!vf)
++ return -EINVAL;
++
+ vport_id = vf->vport_id;
+
+ return qed_configure_vport_wfq(cdev, vport_id, rate);
+@@ -4658,6 +4673,7 @@ static int qed_get_vf_config(struct qed_dev *cdev,
+ struct qed_public_vf_info *vf_info;
+ struct qed_mcp_link_state link;
+ u32 tx_rate;
++ int ret;
+
+ /* Sanitize request */
+ if (IS_VF(cdev))
+@@ -4671,7 +4687,9 @@ static int qed_get_vf_config(struct qed_dev *cdev,
+
+ vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true);
+
+- qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL);
++ ret = qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL);
++ if (ret)
++ return ret;
+
+ /* Fill information about VF */
+ ivi->vf = vf_id;
+@@ -4687,6 +4705,7 @@ static int qed_get_vf_config(struct qed_dev *cdev,
+ tx_rate = vf_info->tx_rate;
+ ivi->max_tx_rate = tx_rate ? tx_rate : link.speed;
+ ivi->min_tx_rate = qed_iov_get_vf_min_rate(hwfn, vf_id);
++ ivi->trusted = vf_info->is_trusted_request;
+
+ return 0;
+ }
+@@ -5108,7 +5127,7 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
+
+ /* Validate that the VF has a configured vport */
+ vf = qed_iov_get_vf_info(hwfn, i, true);
+- if (!vf->vport_instance)
++ if (!vf || !vf->vport_instance)
+ continue;
+
+ memset(&params, 0, sizeof(params));
+@@ -5117,6 +5136,12 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
+
+ params.update_ctl_frame_check = 1;
+ params.mac_chk_en = !vf_info->is_trusted_configured;
++ params.update_accept_any_vlan_flg = 0;
++
++ if (vf_info->accept_any_vlan && vf_info->forced_vlan) {
++ params.update_accept_any_vlan_flg = 1;
++ params.accept_any_vlan = vf_info->accept_any_vlan;
++ }
+
+ if (vf_info->rx_accept_mode & mask) {
+ flags->update_rx_mode_config = 1;
+@@ -5132,13 +5157,20 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn)
+ if (!vf_info->is_trusted_configured) {
+ flags->rx_accept_filter &= ~mask;
+ flags->tx_accept_filter &= ~mask;
++ params.accept_any_vlan = false;
+ }
+
+ if (flags->update_rx_mode_config ||
+ flags->update_tx_mode_config ||
+- params.update_ctl_frame_check)
++ params.update_ctl_frame_check ||
++ params.update_accept_any_vlan_flg) {
++ DP_VERBOSE(hwfn, QED_MSG_IOV,
++ "vport update config for %s VF[abs 0x%x rel 0x%x]\n",
++ vf_info->is_trusted_configured ? "trusted" : "untrusted",
++ vf->abs_vf_id, vf->relative_vf_id);
+ qed_sp_vport_update(hwfn, &params,
+ QED_SPQ_MODE_EBLOCK, NULL);
++ }
+ }
+ }
+
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+index eacd6457f195c..0a1e44d45c1a2 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+@@ -62,6 +62,7 @@ struct qed_public_vf_info {
+ bool is_trusted_request;
+ u8 rx_accept_mode;
+ u8 tx_accept_mode;
++ bool accept_any_vlan;
+ };
+
+ struct qed_iov_vf_init_params {
+@@ -250,29 +251,31 @@ extern const struct qed_iov_hv_ops qed_iov_ops_pass;
+
+ #ifdef CONFIG_QED_SRIOV
+ /**
+- * @brief Check if given VF ID @vfid is valid
+- * w.r.t. @b_enabled_only value
+- * if b_enabled_only = true - only enabled VF id is valid
+- * else any VF id less than max_vfs is valid
++ * qed_iov_is_valid_vfid(): Check if given VF ID @vfid is valid
++ * w.r.t. @b_enabled_only value
++ * if b_enabled_only = true - only enabled
++ * VF id is valid.
++ * else any VF id less than max_vfs is valid.
+ *
+- * @param p_hwfn
+- * @param rel_vf_id - Relative VF ID
+- * @param b_enabled_only - consider only enabled VF
+- * @param b_non_malicious - true iff we want to validate vf isn't malicious.
++ * @p_hwfn: HW device data.
++ * @rel_vf_id: Relative VF ID.
++ * @b_enabled_only: consider only enabled VF.
++ * @b_non_malicious: true iff we want to validate vf isn't malicious.
+ *
+- * @return bool - true for valid VF ID
++ * Return: bool - true for valid VF ID
+ */
+ bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
+ int rel_vf_id,
+ bool b_enabled_only, bool b_non_malicious);
+
+ /**
+- * @brief - Given a VF index, return index of next [including that] active VF.
++ * qed_iov_get_next_active_vf(): Given a VF index, return index of
++ * next [including that] active VF.
+ *
+- * @param p_hwfn
+- * @param rel_vf_id
++ * @p_hwfn: HW device data.
++ * @rel_vf_id: VF ID.
+ *
+- * @return MAX_NUM_VFS in case no further active VFs, otherwise index.
++ * Return: MAX_NUM_VFS in case no further active VFs, otherwise index.
+ */
+ u16 qed_iov_get_next_active_vf(struct qed_hwfn *p_hwfn, u16 rel_vf_id);
+
+@@ -280,83 +283,92 @@ void qed_iov_bulletin_set_udp_ports(struct qed_hwfn *p_hwfn,
+ int vfid, u16 vxlan_port, u16 geneve_port);
+
+ /**
+- * @brief Read sriov related information and allocated resources
+- * reads from configuration space, shmem, etc.
++ * qed_iov_hw_info(): Read sriov related information and allocated resources
++ * reads from configuration space, shmem, etc.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_iov_hw_info(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_add_tlv - place a given tlv on the tlv buffer at next offset
++ * qed_add_tlv(): place a given tlv on the tlv buffer at next offset
+ *
+- * @param p_hwfn
+- * @param p_iov
+- * @param type
+- * @param length
++ * @p_hwfn: HW device data.
++ * @offset: offset.
++ * @type: Type
++ * @length: Length.
+ *
+- * @return pointer to the newly placed tlv
++ * Return: pointer to the newly placed tlv
+ */
+ void *qed_add_tlv(struct qed_hwfn *p_hwfn, u8 **offset, u16 type, u16 length);
+
+ /**
+- * @brief list the types and lengths of the tlvs on the buffer
++ * qed_dp_tlv_list(): list the types and lengths of the tlvs on the buffer
+ *
+- * @param p_hwfn
+- * @param tlvs_list
++ * @p_hwfn: HW device data.
++ * @tlvs_list: Tlvs_list.
++ *
++ * Return: Void.
+ */
+ void qed_dp_tlv_list(struct qed_hwfn *p_hwfn, void *tlvs_list);
+
+ /**
+- * @brief qed_iov_alloc - allocate sriov related resources
++ * qed_iov_alloc(): allocate sriov related resources
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_iov_alloc(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_iov_setup - setup sriov related resources
++ * qed_iov_setup(): setup sriov related resources
++ *
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
++ * Return: Void.
+ */
+ void qed_iov_setup(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_iov_free - free sriov related resources
++ * qed_iov_free(): free sriov related resources
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
++ *
++ * Return: Void.
+ */
+ void qed_iov_free(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief free sriov related memory that was allocated during hw_prepare
++ * qed_iov_free_hw_info(): free sriov related memory that was
++ * allocated during hw_prepare
++ *
++ * @cdev: Qed dev pointer.
+ *
+- * @param cdev
++ * Return: Void.
+ */
+ void qed_iov_free_hw_info(struct qed_dev *cdev);
+
+ /**
+- * @brief Mark structs of vfs that have been FLR-ed.
++ * qed_iov_mark_vf_flr(): Mark structs of vfs that have been FLR-ed.
+ *
+- * @param p_hwfn
+- * @param disabled_vfs - bitmask of all VFs on path that were FLRed
++ * @p_hwfn: HW device data.
++ * @disabled_vfs: bitmask of all VFs on path that were FLRed
+ *
+- * @return true iff one of the PF's vfs got FLRed. false otherwise.
++ * Return: true iff one of the PF's vfs got FLRed. false otherwise.
+ */
+ bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *disabled_vfs);
+
+ /**
+- * @brief Search extended TLVs in request/reply buffer.
++ * qed_iov_search_list_tlvs(): Search extended TLVs in request/reply buffer.
+ *
+- * @param p_hwfn
+- * @param p_tlvs_list - Pointer to tlvs list
+- * @param req_type - Type of TLV
++ * @p_hwfn: HW device data.
++ * @p_tlvs_list: Pointer to tlvs list
++ * @req_type: Type of TLV
+ *
+- * @return pointer to tlv type if found, otherwise returns NULL.
++ * Return: pointer to tlv type if found, otherwise returns NULL.
+ */
+ void *qed_iov_search_list_tlvs(struct qed_hwfn *p_hwfn,
+ void *p_tlvs_list, u16 req_type);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
+index 72a38d53d33f6..e2a5a6a373cbe 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
+@@ -513,6 +513,9 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
+ p_iov->bulletin.size,
+ &p_iov->bulletin.phys,
+ GFP_KERNEL);
++ if (!p_iov->bulletin.p_virt)
++ goto free_pf2vf_reply;
++
+ DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+ "VF's bulletin Board [%p virt 0x%llx phys 0x%08x bytes]\n",
+ p_iov->bulletin.p_virt,
+@@ -552,6 +555,10 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
+
+ return rc;
+
++free_pf2vf_reply:
++ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
++ sizeof(union pfvf_tlvs),
++ p_iov->pf2vf_reply, p_iov->pf2vf_reply_phys);
+ free_vf2pf_request:
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(union vfpf_tlvs),
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
+index 60d2bb64e65fb..976201fc7d4ae 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
+@@ -688,13 +688,16 @@ struct qed_vf_iov {
+ };
+
+ /**
+- * @brief VF - Set Rx/Tx coalesce per VF's relative queue.
+- * Coalesce value '0' will omit the configuration.
++ * qed_vf_pf_set_coalesce(): VF - Set Rx/Tx coalesce per VF's relative queue.
++ * Coalesce value '0' will omit the
++ * configuration.
+ *
+- * @param p_hwfn
+- * @param rx_coal - coalesce value in micro second for rx queue
+- * @param tx_coal - coalesce value in micro second for tx queue
+- * @param p_cid - queue cid
++ * @p_hwfn: HW device data.
++ * @rx_coal: coalesce value in micro second for rx queue.
++ * @tx_coal: coalesce value in micro second for tx queue.
++ * @p_cid: queue cid.
++ *
++ * Return: Int.
+ *
+ **/
+ int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+@@ -702,148 +705,172 @@ int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+ u16 tx_coal, struct qed_queue_cid *p_cid);
+
+ /**
+- * @brief VF - Get coalesce per VF's relative queue.
++ * qed_vf_pf_get_coalesce(): VF - Get coalesce per VF's relative queue.
+ *
+- * @param p_hwfn
+- * @param p_coal - coalesce value in micro second for VF queues.
+- * @param p_cid - queue cid
++ * @p_hwfn: HW device data.
++ * @p_coal: coalesce value in micro second for VF queues.
++ * @p_cid: queue cid.
+ *
++ * Return: Int.
+ **/
+ int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
+ u16 *p_coal, struct qed_queue_cid *p_cid);
+
+ #ifdef CONFIG_QED_SRIOV
+ /**
+- * @brief Read the VF bulletin and act on it if needed
++ * qed_vf_read_bulletin(): Read the VF bulletin and act on it if needed.
+ *
+- * @param p_hwfn
+- * @param p_change - qed fills 1 iff bulletin board has changed, 0 otherwise.
++ * @p_hwfn: HW device data.
++ * @p_change: qed fills 1 iff bulletin board has changed, 0 otherwise.
+ *
+- * @return enum _qed_status
++ * Return: enum _qed_status.
+ */
+ int qed_vf_read_bulletin(struct qed_hwfn *p_hwfn, u8 *p_change);
+
+ /**
+- * @brief Get link paramters for VF from qed
++ * qed_vf_get_link_params(): Get link parameters for VF from qed
++ *
++ * @p_hwfn: HW device data.
++ * @params: the link params structure to be filled for the VF.
+ *
+- * @param p_hwfn
+- * @param params - the link params structure to be filled for the VF
++ * Return: Void.
+ */
+ void qed_vf_get_link_params(struct qed_hwfn *p_hwfn,
+ struct qed_mcp_link_params *params);
+
+ /**
+- * @brief Get link state for VF from qed
++ * qed_vf_get_link_state(): Get link state for VF from qed.
++ *
++ * @p_hwfn: HW device data.
++ * @link: the link state structure to be filled for the VF
+ *
+- * @param p_hwfn
+- * @param link - the link state structure to be filled for the VF
++ * Return: Void.
+ */
+ void qed_vf_get_link_state(struct qed_hwfn *p_hwfn,
+ struct qed_mcp_link_state *link);
+
+ /**
+- * @brief Get link capabilities for VF from qed
++ * qed_vf_get_link_caps(): Get link capabilities for VF from qed.
+ *
+- * @param p_hwfn
+- * @param p_link_caps - the link capabilities structure to be filled for the VF
++ * @p_hwfn: HW device data.
++ * @p_link_caps: the link capabilities structure to be filled for the VF
++ *
++ * Return: Void.
+ */
+ void qed_vf_get_link_caps(struct qed_hwfn *p_hwfn,
+ struct qed_mcp_link_capabilities *p_link_caps);
+
+ /**
+- * @brief Get number of Rx queues allocated for VF by qed
++ * qed_vf_get_num_rxqs(): Get number of Rx queues allocated for VF by qed
++ *
++ * @p_hwfn: HW device data.
++ * @num_rxqs: allocated RX queues
+ *
+- * @param p_hwfn
+- * @param num_rxqs - allocated RX queues
++ * Return: Void.
+ */
+ void qed_vf_get_num_rxqs(struct qed_hwfn *p_hwfn, u8 *num_rxqs);
+
+ /**
+- * @brief Get number of Rx queues allocated for VF by qed
++ * qed_vf_get_num_txqs(): Get number of Rx queues allocated for VF by qed
+ *
+- * @param p_hwfn
+- * @param num_txqs - allocated RX queues
++ * @p_hwfn: HW device data.
++ * @num_txqs: allocated RX queues
++ *
++ * Return: Void.
+ */
+ void qed_vf_get_num_txqs(struct qed_hwfn *p_hwfn, u8 *num_txqs);
+
+ /**
+- * @brief Get number of available connections [both Rx and Tx] for VF
++ * qed_vf_get_num_cids(): Get number of available connections
++ * [both Rx and Tx] for VF
++ *
++ * @p_hwfn: HW device data.
++ * @num_cids: allocated number of connections
+ *
+- * @param p_hwfn
+- * @param num_cids - allocated number of connections
++ * Return: Void.
+ */
+ void qed_vf_get_num_cids(struct qed_hwfn *p_hwfn, u8 *num_cids);
+
+ /**
+- * @brief Get port mac address for VF
++ * qed_vf_get_port_mac(): Get port mac address for VF.
+ *
+- * @param p_hwfn
+- * @param port_mac - destination location for port mac
++ * @p_hwfn: HW device data.
++ * @port_mac: destination location for port mac
++ *
++ * Return: Void.
+ */
+ void qed_vf_get_port_mac(struct qed_hwfn *p_hwfn, u8 *port_mac);
+
+ /**
+- * @brief Get number of VLAN filters allocated for VF by qed
++ * qed_vf_get_num_vlan_filters(): Get number of VLAN filters allocated
++ * for VF by qed.
++ *
++ * @p_hwfn: HW device data.
++ * @num_vlan_filters: allocated VLAN filters
+ *
+- * @param p_hwfn
+- * @param num_rxqs - allocated VLAN filters
++ * Return: Void.
+ */
+ void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn,
+ u8 *num_vlan_filters);
+
+ /**
+- * @brief Get number of MAC filters allocated for VF by qed
++ * qed_vf_get_num_mac_filters(): Get number of MAC filters allocated
++ * for VF by qed
+ *
+- * @param p_hwfn
+- * @param num_rxqs - allocated MAC filters
++ * @p_hwfn: HW device data.
++ * @num_mac_filters: allocated MAC filters
++ *
++ * Return: Void.
+ */
+ void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters);
+
+ /**
+- * @brief Check if VF can set a MAC address
++ * qed_vf_check_mac(): Check if VF can set a MAC address
+ *
+- * @param p_hwfn
+- * @param mac
++ * @p_hwfn: HW device data.
++ * @mac: Mac.
+ *
+- * @return bool
++ * Return: bool.
+ */
+ bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac);
+
+ /**
+- * @brief Set firmware version information in dev_info from VFs acquire response tlv
++ * qed_vf_get_fw_version(): Set firmware version information
++ * in dev_info from VFs acquire response tlv
++ *
++ * @p_hwfn: HW device data.
++ * @fw_major: FW major.
++ * @fw_minor: FW minor.
++ * @fw_rev: FW rev.
++ * @fw_eng: FW eng.
+ *
+- * @param p_hwfn
+- * @param fw_major
+- * @param fw_minor
+- * @param fw_rev
+- * @param fw_eng
++ * Return: Void.
+ */
+ void qed_vf_get_fw_version(struct qed_hwfn *p_hwfn,
+ u16 *fw_major, u16 *fw_minor,
+ u16 *fw_rev, u16 *fw_eng);
+
+ /**
+- * @brief hw preparation for VF
+- * sends ACQUIRE message
++ * qed_vf_hw_prepare(): hw preparation for VF sends ACQUIRE message
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief VF - start the RX Queue by sending a message to the PF
+- * @param p_hwfn
+- * @param p_cid - Only relative fields are relevant
+- * @param bd_max_bytes - maximum number of bytes per bd
+- * @param bd_chain_phys_addr - physical address of bd chain
+- * @param cqe_pbl_addr - physical address of pbl
+- * @param cqe_pbl_size - pbl size
+- * @param pp_prod - pointer to the producer to be
+- * used in fastpath
++ * qed_vf_pf_rxq_start(): start the RX Queue by sending a message to the PF
++ *
++ * @p_hwfn: HW device data.
++ * @p_cid: Only relative fields are relevant
++ * @bd_max_bytes: maximum number of bytes per bd
++ * @bd_chain_phys_addr: physical address of bd chain
++ * @cqe_pbl_addr: physical address of pbl
++ * @cqe_pbl_size: pbl size
++ * @pp_prod: pointer to the producer to be used in fastpath
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
+ struct qed_queue_cid *p_cid,
+@@ -853,18 +880,16 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
+ u16 cqe_pbl_size, void __iomem **pp_prod);
+
+ /**
+- * @brief VF - start the TX queue by sending a message to the
+- * PF.
++ * qed_vf_pf_txq_start(): VF - start the TX queue by sending a message to the
++ * PF.
+ *
+- * @param p_hwfn
+- * @param tx_queue_id - zero based within the VF
+- * @param sb - status block for this queue
+- * @param sb_index - index within the status block
+- * @param bd_chain_phys_addr - physical address of tx chain
+- * @param pp_doorbell - pointer to address to which to
+- * write the doorbell too..
++ * @p_hwfn: HW device data.
++ * @p_cid: CID.
++ * @pbl_addr: PBL address.
++ * @pbl_size: PBL Size.
++ * @pp_doorbell: pointer to address to which to write the doorbell too.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int
+ qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+@@ -873,90 +898,91 @@ qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+ u16 pbl_size, void __iomem **pp_doorbell);
+
+ /**
+- * @brief VF - stop the RX queue by sending a message to the PF
++ * qed_vf_pf_rxq_stop(): VF - stop the RX queue by sending a message to the PF.
+ *
+- * @param p_hwfn
+- * @param p_cid
+- * @param cqe_completion
++ * @p_hwfn: HW device data.
++ * @p_cid: CID.
++ * @cqe_completion: CQE Completion.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
+ struct qed_queue_cid *p_cid, bool cqe_completion);
+
+ /**
+- * @brief VF - stop the TX queue by sending a message to the PF
++ * qed_vf_pf_txq_stop(): VF - stop the TX queue by sending a message to the PF.
+ *
+- * @param p_hwfn
+- * @param tx_qid
++ * @p_hwfn: HW device data.
++ * @p_cid: CID.
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid);
+
+ /**
+- * @brief VF - send a vport update command
++ * qed_vf_pf_vport_update(): VF - send a vport update command.
+ *
+- * @param p_hwfn
+- * @param params
++ * @p_hwfn: HW device data.
++ * @p_params: Params
+ *
+- * @return int
++ * Return: Int.
+ */
+ int qed_vf_pf_vport_update(struct qed_hwfn *p_hwfn,
+ struct qed_sp_vport_update_params *p_params);
+
+ /**
++ * qed_vf_pf_reset(): VF - send a close message to PF.
+ *
+- * @brief VF - send a close message to PF
++ * @p_hwfn: HW device data.
+ *
+- * @param p_hwfn
+- *
+- * @return enum _qed_status
++ * Return: enum _qed_status
+ */
+ int qed_vf_pf_reset(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief VF - free vf`s memories
++ * qed_vf_pf_release(): VF - free vf`s memories.
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return enum _qed_status
++ * Return: enum _qed_status
+ */
+ int qed_vf_pf_release(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief qed_vf_get_igu_sb_id - Get the IGU SB ID for a given
++ * qed_vf_get_igu_sb_id(): Get the IGU SB ID for a given
+ * sb_id. For VFs igu sbs don't have to be contiguous
+ *
+- * @param p_hwfn
+- * @param sb_id
++ * @p_hwfn: HW device data.
++ * @sb_id: SB ID.
+ *
+- * @return INLINE u16
++ * Return: INLINE u16
+ */
+ u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id);
+
+ /**
+- * @brief Stores [or removes] a configured sb_info.
++ * qed_vf_set_sb_info(): Stores [or removes] a configured sb_info.
++ *
++ * @p_hwfn: HW device data.
++ * @sb_id: zero-based SB index [for fastpath]
++ * @p_sb: may be NULL [during removal].
+ *
+- * @param p_hwfn
+- * @param sb_id - zero-based SB index [for fastpath]
+- * @param sb_info - may be NULL [during removal].
++ * Return: Void.
+ */
+ void qed_vf_set_sb_info(struct qed_hwfn *p_hwfn,
+ u16 sb_id, struct qed_sb_info *p_sb);
+
+ /**
+- * @brief qed_vf_pf_vport_start - perform vport start for VF.
++ * qed_vf_pf_vport_start(): perform vport start for VF.
+ *
+- * @param p_hwfn
+- * @param vport_id
+- * @param mtu
+- * @param inner_vlan_removal
+- * @param tpa_mode
+- * @param max_buffers_per_cqe,
+- * @param only_untagged - default behavior regarding vlan acceptance
++ * @p_hwfn: HW device data.
++ * @vport_id: Vport ID.
++ * @mtu: MTU.
++ * @inner_vlan_removal: Innter VLAN removal.
++ * @tpa_mode: TPA mode
++ * @max_buffers_per_cqe: Max buffer pre CQE.
++ * @only_untagged: default behavior regarding vlan acceptance
+ *
+- * @return enum _qed_status
++ * Return: enum _qed_status
+ */
+ int qed_vf_pf_vport_start(struct qed_hwfn *p_hwfn,
+ u8 vport_id,
+@@ -966,11 +992,11 @@ int qed_vf_pf_vport_start(struct qed_hwfn *p_hwfn,
+ u8 max_buffers_per_cqe, u8 only_untagged);
+
+ /**
+- * @brief qed_vf_pf_vport_stop - stop the VF's vport
++ * qed_vf_pf_vport_stop(): stop the VF's vport
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return enum _qed_status
++ * Return: enum _qed_status
+ */
+ int qed_vf_pf_vport_stop(struct qed_hwfn *p_hwfn);
+
+@@ -981,42 +1007,49 @@ void qed_vf_pf_filter_mcast(struct qed_hwfn *p_hwfn,
+ struct qed_filter_mcast *p_filter_cmd);
+
+ /**
+- * @brief qed_vf_pf_int_cleanup - clean the SB of the VF
++ * qed_vf_pf_int_cleanup(): clean the SB of the VF
+ *
+- * @param p_hwfn
++ * @p_hwfn: HW device data.
+ *
+- * @return enum _qed_status
++ * Return: enum _qed_status
+ */
+ int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn);
+
+ /**
+- * @brief - return the link params in a given bulletin board
++ * __qed_vf_get_link_params(): return the link params in a given bulletin board
+ *
+- * @param p_hwfn
+- * @param p_params - pointer to a struct to fill with link params
+- * @param p_bulletin
++ * @p_hwfn: HW device data.
++ * @p_params: pointer to a struct to fill with link params
++ * @p_bulletin: Bulletin.
++ *
++ * Return: Void.
+ */
+ void __qed_vf_get_link_params(struct qed_hwfn *p_hwfn,
+ struct qed_mcp_link_params *p_params,
+ struct qed_bulletin_content *p_bulletin);
+
+ /**
+- * @brief - return the link state in a given bulletin board
++ * __qed_vf_get_link_state(): return the link state in a given bulletin board
++ *
++ * @p_hwfn: HW device data.
++ * @p_link: pointer to a struct to fill with link state
++ * @p_bulletin: Bulletin.
+ *
+- * @param p_hwfn
+- * @param p_link - pointer to a struct to fill with link state
+- * @param p_bulletin
++ * Return: Void.
+ */
+ void __qed_vf_get_link_state(struct qed_hwfn *p_hwfn,
+ struct qed_mcp_link_state *p_link,
+ struct qed_bulletin_content *p_bulletin);
+
+ /**
+- * @brief - return the link capabilities in a given bulletin board
++ * __qed_vf_get_link_caps(): return the link capabilities in a given
++ * bulletin board
+ *
+- * @param p_hwfn
+- * @param p_link - pointer to a struct to fill with link capabilities
+- * @param p_bulletin
++ * @p_hwfn: HW device data.
++ * @p_link_caps: pointer to a struct to fill with link capabilities
++ * @p_bulletin: Bulletin.
++ *
++ * Return: Void.
+ */
+ void __qed_vf_get_link_caps(struct qed_hwfn *p_hwfn,
+ struct qed_mcp_link_capabilities *p_link_caps,
+@@ -1029,9 +1062,13 @@ int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn,
+
+ u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id);
+ /**
+- * @brief - Ask PF to update the MAC address in it's bulletin board
++ * qed_vf_pf_bulletin_update_mac(): Ask PF to update the MAC address in
++ * it's bulletin board
++ *
++ * @p_hwfn: HW device data.
++ * @p_mac: mac address to be updated in bulletin board
+ *
+- * @param p_mac - mac address to be updated in bulletin board
++ * Return: Int.
+ */
+ int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac);
+
+diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
+index f90dcfe9ee688..8a63f99d499c4 100644
+--- a/drivers/net/ethernet/qlogic/qede/qede.h
++++ b/drivers/net/ethernet/qlogic/qede/qede.h
+@@ -271,6 +271,10 @@ struct qede_dev {
+ #define QEDE_ERR_WARN 3
+
+ struct qede_dump_info dump_info;
++ struct delayed_work periodic_task;
++ unsigned long stats_coal_ticks;
++ u32 stats_coal_usecs;
++ spinlock_t stats_lock; /* lock for vport stats access */
+ };
+
+ enum QEDE_STATE {
+diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+index 8284c4c1528f7..28108f6324fb2 100644
+--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
++++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+@@ -426,6 +426,8 @@ static void qede_get_ethtool_stats(struct net_device *dev,
+ }
+ }
+
++ spin_lock(&edev->stats_lock);
++
+ for (i = 0; i < QEDE_NUM_STATS; i++) {
+ if (qede_is_irrelevant_stat(edev, i))
+ continue;
+@@ -435,6 +437,8 @@ static void qede_get_ethtool_stats(struct net_device *dev,
+ buf++;
+ }
+
++ spin_unlock(&edev->stats_lock);
++
+ __qede_unlock(edev);
+ }
+
+@@ -817,6 +821,7 @@ out:
+
+ coal->rx_coalesce_usecs = rx_coal;
+ coal->tx_coalesce_usecs = tx_coal;
++ coal->stats_block_coalesce_usecs = edev->stats_coal_usecs;
+
+ return rc;
+ }
+@@ -830,6 +835,19 @@ int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
+ int i, rc = 0;
+ u16 rxc, txc;
+
++ if (edev->stats_coal_usecs != coal->stats_block_coalesce_usecs) {
++ edev->stats_coal_usecs = coal->stats_block_coalesce_usecs;
++ if (edev->stats_coal_usecs) {
++ edev->stats_coal_ticks = usecs_to_jiffies(edev->stats_coal_usecs);
++ schedule_delayed_work(&edev->periodic_task, 0);
++
++ DP_INFO(edev, "Configured stats coal ticks=%lu jiffies\n",
++ edev->stats_coal_ticks);
++ } else {
++ cancel_delayed_work_sync(&edev->periodic_task);
++ }
++ }
++
+ if (!netif_running(dev)) {
+ DP_INFO(edev, "Interface is down\n");
+ return -EINVAL;
+@@ -2236,7 +2254,8 @@ out:
+ }
+
+ static const struct ethtool_ops qede_ethtool_ops = {
+- .supported_coalesce_params = ETHTOOL_COALESCE_USECS,
++ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
++ ETHTOOL_COALESCE_STATS_BLOCK_USECS,
+ .get_link_ksettings = qede_get_link_ksettings,
+ .set_link_ksettings = qede_set_link_ksettings,
+ .get_drvinfo = qede_get_drvinfo,
+@@ -2287,7 +2306,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
+ };
+
+ static const struct ethtool_ops qede_vf_ethtool_ops = {
+- .supported_coalesce_params = ETHTOOL_COALESCE_USECS,
++ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
++ ETHTOOL_COALESCE_STATS_BLOCK_USECS,
+ .get_link_ksettings = qede_get_link_ksettings,
+ .get_drvinfo = qede_get_drvinfo,
+ .get_msglevel = qede_get_msglevel,
+diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
+index a2e4dfb5cb44e..03c51dd37e1f3 100644
+--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
++++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
+@@ -557,7 +557,7 @@ void qede_force_mac(void *dev, u8 *mac, bool forced)
+ return;
+ }
+
+- ether_addr_copy(edev->ndev->dev_addr, mac);
++ eth_hw_addr_set(edev->ndev, mac);
+ __qede_unlock(edev);
+ }
+
+@@ -619,30 +619,28 @@ static int qede_set_ucast_rx_mac(struct qede_dev *edev,
+ enum qed_filter_xcast_params_type opcode,
+ unsigned char mac[ETH_ALEN])
+ {
+- struct qed_filter_params filter_cmd;
++ struct qed_filter_ucast_params ucast;
+
+- memset(&filter_cmd, 0, sizeof(filter_cmd));
+- filter_cmd.type = QED_FILTER_TYPE_UCAST;
+- filter_cmd.filter.ucast.type = opcode;
+- filter_cmd.filter.ucast.mac_valid = 1;
+- ether_addr_copy(filter_cmd.filter.ucast.mac, mac);
++ memset(&ucast, 0, sizeof(ucast));
++ ucast.type = opcode;
++ ucast.mac_valid = 1;
++ ether_addr_copy(ucast.mac, mac);
+
+- return edev->ops->filter_config(edev->cdev, &filter_cmd);
++ return edev->ops->filter_config_ucast(edev->cdev, &ucast);
+ }
+
+ static int qede_set_ucast_rx_vlan(struct qede_dev *edev,
+ enum qed_filter_xcast_params_type opcode,
+ u16 vid)
+ {
+- struct qed_filter_params filter_cmd;
++ struct qed_filter_ucast_params ucast;
+
+- memset(&filter_cmd, 0, sizeof(filter_cmd));
+- filter_cmd.type = QED_FILTER_TYPE_UCAST;
+- filter_cmd.filter.ucast.type = opcode;
+- filter_cmd.filter.ucast.vlan_valid = 1;
+- filter_cmd.filter.ucast.vlan = vid;
++ memset(&ucast, 0, sizeof(ucast));
++ ucast.type = opcode;
++ ucast.vlan_valid = 1;
++ ucast.vlan = vid;
+
+- return edev->ops->filter_config(edev->cdev, &filter_cmd);
++ return edev->ops->filter_config_ucast(edev->cdev, &ucast);
+ }
+
+ static int qede_config_accept_any_vlan(struct qede_dev *edev, bool action)
+@@ -1057,18 +1055,17 @@ static int qede_set_mcast_rx_mac(struct qede_dev *edev,
+ enum qed_filter_xcast_params_type opcode,
+ unsigned char *mac, int num_macs)
+ {
+- struct qed_filter_params filter_cmd;
++ struct qed_filter_mcast_params mcast;
+ int i;
+
+- memset(&filter_cmd, 0, sizeof(filter_cmd));
+- filter_cmd.type = QED_FILTER_TYPE_MCAST;
+- filter_cmd.filter.mcast.type = opcode;
+- filter_cmd.filter.mcast.num = num_macs;
++ memset(&mcast, 0, sizeof(mcast));
++ mcast.type = opcode;
++ mcast.num = num_macs;
+
+ for (i = 0; i < num_macs; i++, mac += ETH_ALEN)
+- ether_addr_copy(filter_cmd.filter.mcast.mac[i], mac);
++ ether_addr_copy(mcast.mac[i], mac);
+
+- return edev->ops->filter_config(edev->cdev, &filter_cmd);
++ return edev->ops->filter_config_mcast(edev->cdev, &mcast);
+ }
+
+ int qede_set_mac_addr(struct net_device *ndev, void *p)
+@@ -1104,7 +1101,7 @@ int qede_set_mac_addr(struct net_device *ndev, void *p)
+ goto out;
+ }
+
+- ether_addr_copy(ndev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(ndev, addr->sa_data);
+ DP_INFO(edev, "Setting device MAC to %pM\n", addr->sa_data);
+
+ if (edev->state != QEDE_STATE_OPEN) {
+@@ -1194,7 +1191,6 @@ void qede_config_rx_mode(struct net_device *ndev)
+ {
+ enum qed_filter_rx_mode_type accept_flags;
+ struct qede_dev *edev = netdev_priv(ndev);
+- struct qed_filter_params rx_mode;
+ unsigned char *uc_macs, *temp;
+ struct netdev_hw_addr *ha;
+ int rc, uc_count;
+@@ -1220,10 +1216,6 @@ void qede_config_rx_mode(struct net_device *ndev)
+
+ netif_addr_unlock_bh(ndev);
+
+- /* Configure the struct for the Rx mode */
+- memset(&rx_mode, 0, sizeof(struct qed_filter_params));
+- rx_mode.type = QED_FILTER_TYPE_RX_MODE;
+-
+ /* Remove all previous unicast secondary macs and multicast macs
+ * (configure / leave the primary mac)
+ */
+@@ -1271,8 +1263,7 @@ void qede_config_rx_mode(struct net_device *ndev)
+ qede_config_accept_any_vlan(edev, false);
+ }
+
+- rx_mode.filter.accept_flags = accept_flags;
+- edev->ops->filter_config(edev->cdev, &rx_mode);
++ edev->ops->filter_config_rx_mode(edev->cdev, accept_flags);
+ out:
+ kfree(uc_macs);
+ }
+diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
+index 065e9004598ee..d67d4e74b326d 100644
+--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
++++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
+@@ -747,6 +747,9 @@ qede_build_skb(struct qede_rx_queue *rxq,
+ buf = page_address(bd->data) + bd->page_offset;
+ skb = build_skb(buf, rxq->rx_buf_seg_size);
+
++ if (unlikely(!skb))
++ return NULL;
++
+ skb_reserve(skb, pad);
+ skb_put(skb, len);
+
+@@ -1436,6 +1439,10 @@ int qede_poll(struct napi_struct *napi, int budget)
+ rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
+ qede_has_rx_work(fp->rxq)) ?
+ qede_rx_int(fp, budget) : 0;
++
++ if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
++ xdp_do_flush();
++
+ /* Handle case where we are called by netpoll with a budget of 0 */
+ if (rx_work_done < budget || !budget) {
+ if (!qede_poll_is_more_work(fp)) {
+@@ -1455,9 +1462,6 @@ int qede_poll(struct napi_struct *napi, int budget)
+ qede_update_tx_producer(fp->xdp_tx);
+ }
+
+- if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
+- xdp_do_flush_map();
+-
+ return rx_work_done;
+ }
+
+@@ -1643,6 +1647,13 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ data_split = true;
+ }
+ } else {
++ if (unlikely(skb->len > ETH_TX_MAX_NON_LSO_PKT_LEN)) {
++ DP_ERR(edev, "Unexpected non LSO skb length = 0x%x\n", skb->len);
++ qede_free_failed_tx_pkt(txq, first_bd, 0, false);
++ qede_update_tx_producer(txq);
++ return NETDEV_TX_OK;
++ }
++
+ val |= ((skb->len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
+ ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT);
+ }
+diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
+index 9837bdb89cd40..fee47c8eeff49 100644
+--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
++++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
+@@ -308,6 +308,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
+
+ edev->ops->get_vport_stats(edev->cdev, &stats);
+
++ spin_lock(&edev->stats_lock);
++
+ p_common->no_buff_discards = stats.common.no_buff_discards;
+ p_common->packet_too_big_discard = stats.common.packet_too_big_discard;
+ p_common->ttl0_discard = stats.common.ttl0_discard;
+@@ -405,6 +407,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
+ p_ah->tx_1519_to_max_byte_packets =
+ stats.ah.tx_1519_to_max_byte_packets;
+ }
++
++ spin_unlock(&edev->stats_lock);
+ }
+
+ static void qede_get_stats64(struct net_device *dev,
+@@ -413,9 +417,10 @@ static void qede_get_stats64(struct net_device *dev,
+ struct qede_dev *edev = netdev_priv(dev);
+ struct qede_stats_common *p_common;
+
+- qede_fill_by_demand_stats(edev);
+ p_common = &edev->stats.common;
+
++ spin_lock(&edev->stats_lock);
++
+ stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts +
+ p_common->rx_bcast_pkts;
+ stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts +
+@@ -435,6 +440,8 @@ static void qede_get_stats64(struct net_device *dev,
+ stats->collisions = edev->stats.bb.tx_total_collisions;
+ stats->rx_crc_errors = p_common->rx_crc_errors;
+ stats->rx_frame_errors = p_common->rx_align_errors;
++
++ spin_unlock(&edev->stats_lock);
+ }
+
+ #ifdef CONFIG_QED_SRIOV
+@@ -836,7 +843,7 @@ static void qede_init_ndev(struct qede_dev *edev)
+ ndev->max_mtu = QEDE_MAX_JUMBO_PACKET_SIZE;
+
+ /* Set network device HW mac */
+- ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac);
++ eth_hw_addr_set(edev->ndev, edev->dev_info.common.hw_mac);
+
+ ndev->mtu = edev->dev_info.common.mtu;
+ }
+@@ -899,7 +906,6 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
+ {
+ u8 fp_combined, fp_rx = edev->fp_num_rx;
+ struct qede_fastpath *fp;
+- void *mem;
+ int i;
+
+ edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev),
+@@ -909,14 +915,15 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
+ goto err;
+ }
+
+- mem = krealloc(edev->coal_entry, QEDE_QUEUE_CNT(edev) *
+- sizeof(*edev->coal_entry), GFP_KERNEL);
+- if (!mem) {
+- DP_ERR(edev, "coalesce entry allocation failed\n");
+- kfree(edev->coal_entry);
+- goto err;
++ if (!edev->coal_entry) {
++ edev->coal_entry = kcalloc(QEDE_MAX_RSS_CNT(edev),
++ sizeof(*edev->coal_entry),
++ GFP_KERNEL);
++ if (!edev->coal_entry) {
++ DP_ERR(edev, "coalesce entry allocation failed\n");
++ goto err;
++ }
+ }
+- edev->coal_entry = mem;
+
+ fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx;
+
+@@ -1000,6 +1007,23 @@ static void qede_unlock(struct qede_dev *edev)
+ rtnl_unlock();
+ }
+
++static void qede_periodic_task(struct work_struct *work)
++{
++ struct qede_dev *edev = container_of(work, struct qede_dev,
++ periodic_task.work);
++
++ qede_fill_by_demand_stats(edev);
++ schedule_delayed_work(&edev->periodic_task, edev->stats_coal_ticks);
++}
++
++static void qede_init_periodic_task(struct qede_dev *edev)
++{
++ INIT_DELAYED_WORK(&edev->periodic_task, qede_periodic_task);
++ spin_lock_init(&edev->stats_lock);
++ edev->stats_coal_usecs = USEC_PER_SEC;
++ edev->stats_coal_ticks = usecs_to_jiffies(USEC_PER_SEC);
++}
++
+ static void qede_sp_task(struct work_struct *work)
+ {
+ struct qede_dev *edev = container_of(work, struct qede_dev,
+@@ -1019,6 +1043,7 @@ static void qede_sp_task(struct work_struct *work)
+ */
+
+ if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
++ cancel_delayed_work_sync(&edev->periodic_task);
+ #ifdef CONFIG_QED_SRIOV
+ /* SRIOV must be disabled outside the lock to avoid a deadlock.
+ * The recovery of the active VFs is currently not supported.
+@@ -1176,19 +1201,17 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
+ edev->devlink = qed_ops->common->devlink_register(cdev);
+ if (IS_ERR(edev->devlink)) {
+ DP_NOTICE(edev, "Cannot register devlink\n");
++ rc = PTR_ERR(edev->devlink);
+ edev->devlink = NULL;
+- /* Go on, we can live without devlink */
++ goto err3;
+ }
+ } else {
+ struct net_device *ndev = pci_get_drvdata(pdev);
++ struct qed_devlink *qdl;
+
+ edev = netdev_priv(ndev);
+-
+- if (edev->devlink) {
+- struct qed_devlink *qdl = devlink_priv(edev->devlink);
+-
+- qdl->cdev = cdev;
+- }
++ qdl = devlink_priv(edev->devlink);
++ qdl->cdev = cdev;
+ edev->cdev = cdev;
+ memset(&edev->stats, 0, sizeof(edev->stats));
+ memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
+@@ -1211,6 +1234,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
+ */
+ INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
+ mutex_init(&edev->qede_lock);
++ qede_init_periodic_task(edev);
+
+ rc = register_netdev(edev->ndev);
+ if (rc) {
+@@ -1235,6 +1259,11 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
+ edev->rx_copybreak = QEDE_RX_HDR_SIZE;
+
+ qede_log_probe(edev);
++
++ /* retain user config (for example - after recovery) */
++ if (edev->stats_coal_usecs)
++ schedule_delayed_work(&edev->periodic_task, 0);
++
+ return 0;
+
+ err4:
+@@ -1303,6 +1332,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
+ unregister_netdev(ndev);
+
+ cancel_delayed_work_sync(&edev->sp_task);
++ cancel_delayed_work_sync(&edev->periodic_task);
+
+ edev->ops->common->set_power_state(cdev, PCI_D0);
+
+@@ -2802,10 +2832,13 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
+ }
+
+ /**
+- * qede_io_error_detected - called when PCI error is detected
++ * qede_io_error_detected(): Called when PCI error is detected
++ *
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
++ *Return: pci_ers_result_t.
++ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
+index c00ad57575eab..29837e533cee8 100644
+--- a/drivers/net/ethernet/qlogic/qla3xxx.c
++++ b/drivers/net/ethernet/qlogic/qla3xxx.c
+@@ -2469,6 +2469,7 @@ static netdev_tx_t ql3xxx_send(struct sk_buff *skb,
+ skb_shinfo(skb)->nr_frags);
+ if (tx_cb->seg_count == -1) {
+ netdev_err(ndev, "%s: invalid segment count!\n", __func__);
++ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+@@ -3478,20 +3479,19 @@ static int ql_adapter_up(struct ql3_adapter *qdev)
+
+ spin_lock_irqsave(&qdev->hw_lock, hw_flags);
+
+- err = ql_wait_for_drvr_lock(qdev);
+- if (err) {
+- err = ql_adapter_initialize(qdev);
+- if (err) {
+- netdev_err(ndev, "Unable to initialize adapter\n");
+- goto err_init;
+- }
+- netdev_err(ndev, "Releasing driver lock\n");
+- ql_sem_unlock(qdev, QL_DRVR_SEM_MASK);
+- } else {
++ if (!ql_wait_for_drvr_lock(qdev)) {
+ netdev_err(ndev, "Could not acquire driver lock\n");
++ err = -ENODEV;
+ goto err_lock;
+ }
+
++ err = ql_adapter_initialize(qdev);
++ if (err) {
++ netdev_err(ndev, "Unable to initialize adapter\n");
++ goto err_init;
++ }
++ ql_sem_unlock(qdev, QL_DRVR_SEM_MASK);
++
+ spin_unlock_irqrestore(&qdev->hw_lock, hw_flags);
+
+ set_bit(QL_ADAPTER_UP, &qdev->flags);
+@@ -3613,7 +3613,8 @@ static void ql_reset_work(struct work_struct *work)
+ qdev->mem_map_registers;
+ unsigned long hw_flags;
+
+- if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) {
++ if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) ||
++ test_bit(QL_RESET_START, &qdev->flags)) {
+ clear_bit(QL_LINK_MASTER, &qdev->flags);
+
+ /*
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+index d51bac7ba5afa..2fd5c6fdb5003 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+@@ -1077,8 +1077,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter)
+ sds_mbx_size = sizeof(struct qlcnic_sds_mbx);
+ context_id = recv_ctx->context_id;
+ num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS;
+- ahw->hw_ops->alloc_mbx_args(&cmd, adapter,
+- QLCNIC_CMD_ADD_RCV_RINGS);
++ err = ahw->hw_ops->alloc_mbx_args(&cmd, adapter,
++ QLCNIC_CMD_ADD_RCV_RINGS);
++ if (err) {
++ dev_err(&adapter->pdev->dev,
++ "Failed to alloc mbx args %d\n", err);
++ return err;
++ }
++
+ cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16);
+
+ /* set up status rings, mbx 2-81 */
+@@ -2985,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter)
+ QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val);
+ dev_info(&adapter->pdev->dev,
+ "%s: lock recovery initiated\n", __func__);
+- msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY);
++ mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY);
+ val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK);
+ id = ((val >> 2) & 0xF);
+ if (id == adapter->portnum) {
+@@ -3021,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter)
+ if (status)
+ break;
+
+- msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY);
++ mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY);
+ i++;
+
+ if (i == 1)
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+index 27dffa299ca6f..7c3cf9ad4563c 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+@@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac)
+ goto disable_mbx_intr;
+
+ qlcnic_83xx_clear_function_resources(adapter);
+- qlcnic_dcb_enable(adapter->dcb);
++
++ err = qlcnic_dcb_enable(adapter->dcb);
++ if (err) {
++ qlcnic_dcb_free(adapter->dcb);
++ goto disable_mbx_intr;
++ }
++
+ qlcnic_83xx_initialize_nic(adapter, 1);
+ qlcnic_dcb_get_info(adapter->dcb);
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+index 87f76bac2e463..eb827b86ecae8 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+@@ -628,7 +628,13 @@ int qlcnic_fw_create_ctx(struct qlcnic_adapter *dev)
+ int i, err, ring;
+
+ if (dev->flags & QLCNIC_NEED_FLR) {
+- pci_reset_function(dev->pdev);
++ err = pci_reset_function(dev->pdev);
++ if (err) {
++ dev_err(&dev->pdev->dev,
++ "Adapter reset failed (%d). Please reboot\n",
++ err);
++ return err;
++ }
+ dev->flags &= ~QLCNIC_NEED_FLR;
+ }
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+index 5d79ee4370bcd..22afa2be85fdb 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+@@ -41,17 +41,12 @@ struct qlcnic_dcb {
+ unsigned long state;
+ };
+
+-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb)
+-{
+- kfree(dcb);
+-}
+-
+ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb)
+ {
+ if (dcb && dcb->ops->get_hw_capability)
+ return dcb->ops->get_hw_capability(dcb);
+
+- return 0;
++ return -EOPNOTSUPP;
+ }
+
+ static inline void qlcnic_dcb_free(struct qlcnic_dcb *dcb)
+@@ -65,7 +60,7 @@ static inline int qlcnic_dcb_attach(struct qlcnic_dcb *dcb)
+ if (dcb && dcb->ops->attach)
+ return dcb->ops->attach(dcb);
+
+- return 0;
++ return -EOPNOTSUPP;
+ }
+
+ static inline int
+@@ -74,7 +69,7 @@ qlcnic_dcb_query_hw_capability(struct qlcnic_dcb *dcb, char *buf)
+ if (dcb && dcb->ops->query_hw_capability)
+ return dcb->ops->query_hw_capability(dcb, buf);
+
+- return 0;
++ return -EOPNOTSUPP;
+ }
+
+ static inline void qlcnic_dcb_get_info(struct qlcnic_dcb *dcb)
+@@ -89,7 +84,7 @@ qlcnic_dcb_query_cee_param(struct qlcnic_dcb *dcb, char *buf, u8 type)
+ if (dcb && dcb->ops->query_cee_param)
+ return dcb->ops->query_cee_param(dcb, buf, type);
+
+- return 0;
++ return -EOPNOTSUPP;
+ }
+
+ static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb)
+@@ -97,7 +92,7 @@ static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb)
+ if (dcb && dcb->ops->get_cee_cfg)
+ return dcb->ops->get_cee_cfg(dcb);
+
+- return 0;
++ return -EOPNOTSUPP;
+ }
+
+ static inline void qlcnic_dcb_aen_handler(struct qlcnic_dcb *dcb, void *msg)
+@@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb)
+ dcb->ops->init_dcbnl_ops(dcb);
+ }
+
+-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
++static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
+ {
+- if (dcb && qlcnic_dcb_attach(dcb))
+- qlcnic_clear_dcb_ops(dcb);
++ return dcb ? qlcnic_dcb_attach(dcb) : 0;
+ }
+ #endif
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+index 75960a29f80ea..cec07d5bbe67a 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -2616,7 +2616,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ "Device does not support MSI interrupts\n");
+
+ if (qlcnic_82xx_check(adapter)) {
+- qlcnic_dcb_enable(adapter->dcb);
++ err = qlcnic_dcb_enable(adapter->dcb);
++ if (err) {
++ qlcnic_dcb_free(adapter->dcb);
++ dev_err(&pdev->dev, "Failed to enable DCB\n");
++ goto err_out_free_hw;
++ }
++
+ qlcnic_dcb_get_info(adapter->dcb);
+ err = qlcnic_setup_intr(adapter);
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+index 7160b42f51ddd..d0111cb3b40e1 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+@@ -201,7 +201,7 @@ int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *,
+ struct qlcnic_info *, u16);
+ int qlcnic_sriov_cfg_vf_guest_vlan(struct qlcnic_adapter *, u16, u8);
+ void qlcnic_sriov_free_vlans(struct qlcnic_adapter *);
+-void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *);
++int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *);
+ bool qlcnic_sriov_check_any_vlan(struct qlcnic_vf_info *);
+ void qlcnic_sriov_del_vlan_id(struct qlcnic_sriov *,
+ struct qlcnic_vf_info *, u16);
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+index dd03be3fc82a9..df9b84f6600fe 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+@@ -221,6 +221,8 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs)
+ return 0;
+
+ qlcnic_destroy_async_wq:
++ while (i--)
++ kfree(sriov->vf_info[i].vp);
+ destroy_workqueue(bc->bc_async_wq);
+
+ qlcnic_destroy_trans_wq:
+@@ -432,7 +434,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter,
+ struct qlcnic_cmd_args *cmd)
+ {
+ struct qlcnic_sriov *sriov = adapter->ahw->sriov;
+- int i, num_vlans;
++ int i, num_vlans, ret;
+ u16 *vlans;
+
+ if (sriov->allowed_vlans)
+@@ -443,7 +445,9 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter,
+ dev_info(&adapter->pdev->dev, "Number of allowed Guest VLANs = %d\n",
+ sriov->num_allowed_vlans);
+
+- qlcnic_sriov_alloc_vlans(adapter);
++ ret = qlcnic_sriov_alloc_vlans(adapter);
++ if (ret)
++ return ret;
+
+ if (!sriov->any_vlan)
+ return 0;
+@@ -2154,7 +2158,7 @@ static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *adapter)
+ return err;
+ }
+
+-void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter)
++int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter)
+ {
+ struct qlcnic_sriov *sriov = adapter->ahw->sriov;
+ struct qlcnic_vf_info *vf;
+@@ -2164,7 +2168,11 @@ void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter)
+ vf = &sriov->vf_info[i];
+ vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans,
+ sizeof(*vf->sriov_vlans), GFP_KERNEL);
++ if (!vf->sriov_vlans)
++ return -ENOMEM;
+ }
++
++ return 0;
+ }
+
+ void qlcnic_sriov_free_vlans(struct qlcnic_adapter *adapter)
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+index 447720b93e5ab..e90fa97c0ae6c 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+@@ -597,7 +597,9 @@ static int __qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter,
+ if (err)
+ goto del_flr_queue;
+
+- qlcnic_sriov_alloc_vlans(adapter);
++ err = qlcnic_sriov_alloc_vlans(adapter);
++ if (err)
++ goto del_flr_queue;
+
+ return err;
+
+diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
+index 9015a38eaced8..94090856cf3a9 100644
+--- a/drivers/net/ethernet/qualcomm/emac/emac.c
++++ b/drivers/net/ethernet/qualcomm/emac/emac.c
+@@ -550,7 +550,7 @@ static int emac_probe_resources(struct platform_device *pdev,
+
+ /* get mac address */
+ if (device_get_mac_address(&pdev->dev, maddr, ETH_ALEN))
+- ether_addr_copy(netdev->dev_addr, maddr);
++ eth_hw_addr_set(netdev, maddr);
+ else
+ eth_hw_addr_random(netdev);
+
+@@ -728,9 +728,15 @@ static int emac_remove(struct platform_device *pdev)
+ struct net_device *netdev = dev_get_drvdata(&pdev->dev);
+ struct emac_adapter *adpt = netdev_priv(netdev);
+
++ netif_carrier_off(netdev);
++ netif_tx_disable(netdev);
++
+ unregister_netdev(netdev);
+ netif_napi_del(&adpt->rx_q.napi);
+
++ free_irq(adpt->irq.irq, &adpt->irq);
++ cancel_work_sync(&adpt->work_thread);
++
+ emac_clks_teardown(adpt);
+
+ put_device(&adpt->phydev->mdio.dev);
+diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
+index 8427fe1b8fd1c..a047bab1d7c7e 100644
+--- a/drivers/net/ethernet/qualcomm/qca_spi.c
++++ b/drivers/net/ethernet/qualcomm/qca_spi.c
+@@ -582,8 +582,7 @@ qcaspi_spi_thread(void *data)
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ((qca->intr_req == qca->intr_svc) &&
+- (qca->txr.skb[qca->txr.head] == NULL) &&
+- (qca->sync == QCASPI_SYNC_READY))
++ !qca->txr.skb[qca->txr.head])
+ schedule();
+
+ set_current_state(TASK_RUNNING);
+@@ -968,7 +967,7 @@ qca_spi_probe(struct spi_device *spi)
+
+ spi_set_drvdata(spi, qcaspi_devs);
+
+- ret = of_get_mac_address(spi->dev.of_node, qca->net_dev->dev_addr);
++ ret = of_get_ethdev_address(spi->dev.of_node, qca->net_dev);
+ if (ret) {
+ eth_hw_addr_random(qca->net_dev);
+ dev_info(&spi->dev, "Using random MAC address: %pM\n",
+diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
+index ce3f7ce31adc1..27c4f43176aaa 100644
+--- a/drivers/net/ethernet/qualcomm/qca_uart.c
++++ b/drivers/net/ethernet/qualcomm/qca_uart.c
+@@ -347,7 +347,7 @@ static int qca_uart_probe(struct serdev_device *serdev)
+
+ of_property_read_u32(serdev->dev.of_node, "current-speed", &speed);
+
+- ret = of_get_mac_address(serdev->dev.of_node, qca->net_dev->dev_addr);
++ ret = of_get_ethdev_address(serdev->dev.of_node, qca->net_dev);
+ if (ret) {
+ eth_hw_addr_random(qca->net_dev);
+ dev_info(&serdev->dev, "Using random MAC address: %pM\n",
+diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
+index 01ef5efd7bc2a..5a8a6977ec9a7 100644
+--- a/drivers/net/ethernet/rdc/r6040.c
++++ b/drivers/net/ethernet/rdc/r6040.c
+@@ -1159,10 +1159,12 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ err = register_netdev(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to register net device\n");
+- goto err_out_mdio_unregister;
++ goto err_out_phy_disconnect;
+ }
+ return 0;
+
++err_out_phy_disconnect:
++ phy_disconnect(dev->phydev);
+ err_out_mdio_unregister:
+ mdiobus_unregister(lp->mii_bus);
+ err_out_mdio:
+@@ -1186,6 +1188,7 @@ static void r6040_remove_one(struct pci_dev *pdev)
+ struct r6040_private *lp = netdev_priv(dev);
+
+ unregister_netdev(dev);
++ phy_disconnect(dev->phydev);
+ mdiobus_unregister(lp->mii_bus);
+ mdiobus_free(lp->mii_bus);
+ netif_napi_del(&lp->napi);
+diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
+index 2918947dd57c9..264bb3ec44a59 100644
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -2251,28 +2251,6 @@ static int rtl_set_mac_address(struct net_device *dev, void *p)
+ return 0;
+ }
+
+-static void rtl_wol_enable_rx(struct rtl8169_private *tp)
+-{
+- if (tp->mac_version >= RTL_GIGA_MAC_VER_25)
+- RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
+- AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
+-}
+-
+-static void rtl_prepare_power_down(struct rtl8169_private *tp)
+-{
+- if (tp->dash_type != RTL_DASH_NONE)
+- return;
+-
+- if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
+- tp->mac_version == RTL_GIGA_MAC_VER_33)
+- rtl_ephy_write(tp, 0x19, 0xff64);
+-
+- if (device_may_wakeup(tp_to_dev(tp))) {
+- phy_speed_down(tp->phydev, false);
+- rtl_wol_enable_rx(tp);
+- }
+-}
+-
+ static void rtl_init_rxcfg(struct rtl8169_private *tp)
+ {
+ switch (tp->mac_version) {
+@@ -2492,6 +2470,28 @@ static void rtl_enable_rxdvgate(struct rtl8169_private *tp)
+ rtl_wait_txrx_fifo_empty(tp);
+ }
+
++static void rtl_wol_enable_rx(struct rtl8169_private *tp)
++{
++ if (tp->mac_version >= RTL_GIGA_MAC_VER_25)
++ RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
++ AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
++}
++
++static void rtl_prepare_power_down(struct rtl8169_private *tp)
++{
++ if (tp->dash_type != RTL_DASH_NONE)
++ return;
++
++ if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
++ tp->mac_version == RTL_GIGA_MAC_VER_33)
++ rtl_ephy_write(tp, 0x19, 0xff64);
++
++ if (device_may_wakeup(tp_to_dev(tp))) {
++ phy_speed_down(tp->phydev, false);
++ rtl_wol_enable_rx(tp);
++ }
++}
++
+ static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
+ {
+ u32 val = TX_DMA_BURST << TxDMAShift |
+@@ -4177,7 +4177,6 @@ static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts)
+ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
+ struct sk_buff *skb, u32 *opts)
+ {
+- u32 transport_offset = (u32)skb_transport_offset(skb);
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ u32 mss = shinfo->gso_size;
+
+@@ -4194,7 +4193,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
+ WARN_ON_ONCE(1);
+ }
+
+- opts[0] |= transport_offset << GTTCPHO_SHIFT;
++ opts[0] |= skb_transport_offset(skb) << GTTCPHO_SHIFT;
+ opts[1] |= mss << TD1_MSS_SHIFT;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ u8 ip_protocol;
+@@ -4222,7 +4221,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
+ else
+ WARN_ON_ONCE(1);
+
+- opts[1] |= transport_offset << TCPHO_SHIFT;
++ opts[1] |= skb_transport_offset(skb) << TCPHO_SHIFT;
+ } else {
+ unsigned int padto = rtl_quirk_packet_padto(tp, skb);
+
+@@ -4389,14 +4388,13 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+ {
+- int transport_offset = skb_transport_offset(skb);
+ struct rtl8169_private *tp = netdev_priv(dev);
+
+ if (skb_is_gso(skb)) {
+ if (tp->mac_version == RTL_GIGA_MAC_VER_34)
+ features = rtl8168evl_fix_tso(skb, features);
+
+- if (transport_offset > GTTCPHO_MAX &&
++ if (skb_transport_offset(skb) > GTTCPHO_MAX &&
+ rtl_chip_supports_csum_v2(tp))
+ features &= ~NETIF_F_ALL_TSO;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+@@ -4407,7 +4405,7 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb,
+ if (rtl_quirk_packet_padto(tp, skb))
+ features &= ~NETIF_F_CSUM_MASK;
+
+- if (transport_offset > TCPHO_MAX &&
++ if (skb_transport_offset(skb) > TCPHO_MAX &&
+ rtl_chip_supports_csum_v2(tp))
+ features &= ~NETIF_F_CSUM_MASK;
+ }
+diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
+index 50f0f621b1aa8..a84fd859aec9b 100644
+--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
++++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
+@@ -970,6 +970,9 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp,
+ /* disable phy pfm mode */
+ phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
+
++ /* disable 10m pll off */
++ phy_modify_paged(phydev, 0x0a43, 0x10, BIT(0), 0);
++
+ rtl8168g_disable_aldps(phydev);
+ rtl8168g_config_eee_phy(phydev);
+ }
+diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
+index 47c5377e4f424..a475f54a6b63c 100644
+--- a/drivers/net/ethernet/renesas/ravb.h
++++ b/drivers/net/ethernet/renesas/ravb.h
+@@ -1000,8 +1000,8 @@ struct ravb_hw_info {
+ unsigned internal_delay:1; /* AVB-DMAC has internal delays */
+ unsigned tx_counters:1; /* E-MAC has TX counters */
+ unsigned multi_irqs:1; /* AVB-DMAC and E-MAC has multiple irqs */
+- unsigned no_ptp_cfg_active:1; /* AVB-DMAC does not support gPTP active in config mode */
+- unsigned ptp_cfg_active:1; /* AVB-DMAC has gPTP support active in config mode */
++ unsigned gptp:1; /* AVB-DMAC has gPTP support */
++ unsigned ccc_gac:1; /* AVB-DMAC has gPTP support active in config mode */
+ };
+
+ struct ravb_private {
+diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
+index 0f85f2d97b18d..4ee72d33e9cb7 100644
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -30,8 +30,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/sys_soc.h>
+ #include <linux/reset.h>
+-
+-#include <asm/div64.h>
++#include <linux/math64.h>
+
+ #include "ravb.h"
+
+@@ -115,7 +114,7 @@ static void ravb_read_mac_address(struct device_node *np,
+ {
+ int ret;
+
+- ret = of_get_mac_address(np, ndev->dev_addr);
++ ret = of_get_ethdev_address(np, ndev);
+ if (ret) {
+ u32 mahr = ravb_read(ndev, MAHR);
+ u32 malr = ravb_read(ndev, MALR);
+@@ -793,14 +792,14 @@ static void ravb_error_interrupt(struct net_device *ndev)
+ ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS);
+ if (eis & EIS_QFS) {
+ ris2 = ravb_read(ndev, RIS2);
+- ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED),
++ ravb_write(ndev, ~(RIS2_QFF0 | RIS2_QFF1 | RIS2_RFFF | RIS2_RESERVED),
+ RIS2);
+
+ /* Receive Descriptor Empty int */
+ if (ris2 & RIS2_QFF0)
+ priv->stats[RAVB_BE].rx_over_errors++;
+
+- /* Receive Descriptor Empty int */
++ /* Receive Descriptor Empty int */
+ if (ris2 & RIS2_QFF1)
+ priv->stats[RAVB_NC].rx_over_errors++;
+
+@@ -1274,7 +1273,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
+ if (netif_running(ndev)) {
+ netif_device_detach(ndev);
+ /* Stop PTP Clock driver */
+- if (info->no_ptp_cfg_active)
++ if (info->gptp)
+ ravb_ptp_stop(ndev);
+ /* Wait for DMA stopping */
+ error = ravb_stop_dma(ndev);
+@@ -1306,7 +1305,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
+ ravb_emac_init(ndev);
+
+ /* Initialise PTP Clock driver */
+- if (info->no_ptp_cfg_active)
++ if (info->gptp)
+ ravb_ptp_init(ndev, priv->pdev);
+
+ netif_device_attach(ndev);
+@@ -1446,7 +1445,7 @@ static int ravb_open(struct net_device *ndev)
+ ravb_emac_init(ndev);
+
+ /* Initialise PTP Clock driver */
+- if (info->no_ptp_cfg_active)
++ if (info->gptp)
+ ravb_ptp_init(ndev, priv->pdev);
+
+ netif_tx_start_all_queues(ndev);
+@@ -1460,7 +1459,7 @@ static int ravb_open(struct net_device *ndev)
+
+ out_ptp_stop:
+ /* Stop PTP Clock driver */
+- if (info->no_ptp_cfg_active)
++ if (info->gptp)
+ ravb_ptp_stop(ndev);
+ out_free_irq_nc_tx:
+ if (!info->multi_irqs)
+@@ -1508,7 +1507,7 @@ static void ravb_tx_timeout_work(struct work_struct *work)
+ netif_tx_stop_all_queues(ndev);
+
+ /* Stop PTP Clock driver */
+- if (info->no_ptp_cfg_active)
++ if (info->gptp)
+ ravb_ptp_stop(ndev);
+
+ /* Wait for DMA stopping */
+@@ -1543,7 +1542,7 @@ static void ravb_tx_timeout_work(struct work_struct *work)
+
+ out:
+ /* Initialise PTP Clock driver */
+- if (info->no_ptp_cfg_active)
++ if (info->gptp)
+ ravb_ptp_init(ndev, priv->pdev);
+
+ netif_tx_start_all_queues(ndev);
+@@ -1752,7 +1751,7 @@ static int ravb_close(struct net_device *ndev)
+ ravb_write(ndev, 0, TIC);
+
+ /* Stop PTP Clock driver */
+- if (info->no_ptp_cfg_active)
++ if (info->gptp)
+ ravb_ptp_stop(ndev);
+
+ /* Set the config mode to stop the AVB-DMAC's processes */
+@@ -1960,6 +1959,8 @@ static int ravb_mdio_init(struct ravb_private *priv)
+ {
+ struct platform_device *pdev = priv->pdev;
+ struct device *dev = &pdev->dev;
++ struct phy_device *phydev;
++ struct device_node *pn;
+ int error;
+
+ /* Bitbang init */
+@@ -1981,6 +1982,14 @@ static int ravb_mdio_init(struct ravb_private *priv)
+ if (error)
+ goto out_free_bus;
+
++ pn = of_parse_phandle(dev->of_node, "phy-handle", 0);
++ phydev = of_phy_find_device(pn);
++ if (phydev) {
++ phydev->mac_managed_pm = true;
++ put_device(&phydev->mdio.dev);
++ }
++ of_node_put(pn);
++
+ return 0;
+
+ out_free_bus:
+@@ -2018,7 +2027,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = {
+ .internal_delay = 1,
+ .tx_counters = 1,
+ .multi_irqs = 1,
+- .ptp_cfg_active = 1,
++ .ccc_gac = 1,
+ };
+
+ static const struct ravb_hw_info ravb_gen2_hw_info = {
+@@ -2037,7 +2046,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = {
+ .stats_len = ARRAY_SIZE(ravb_gstrings_stats),
+ .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
+ .aligned_tx = 1,
+- .no_ptp_cfg_active = 1,
++ .gptp = 1,
+ };
+
+ static const struct of_device_id ravb_match_table[] = {
+@@ -2061,8 +2070,7 @@ static int ravb_set_gti(struct net_device *ndev)
+ if (!rate)
+ return -EINVAL;
+
+- inc = 1000000000ULL << 20;
+- do_div(inc, rate);
++ inc = div64_ul(1000000000ULL << 20, rate);
+
+ if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) {
+ dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n",
+@@ -2080,7 +2088,7 @@ static void ravb_set_config_mode(struct net_device *ndev)
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+
+- if (info->no_ptp_cfg_active) {
++ if (info->gptp) {
+ ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
+ /* Set CSEL value */
+ ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
+@@ -2301,7 +2309,7 @@ static int ravb_probe(struct platform_device *pdev)
+ INIT_LIST_HEAD(&priv->ts_skb_list);
+
+ /* Initialise PTP Clock driver */
+- if (info->ptp_cfg_active)
++ if (info->ccc_gac)
+ ravb_ptp_init(ndev, pdev);
+
+ /* Debug message level */
+@@ -2349,7 +2357,7 @@ out_dma_free:
+ priv->desc_bat_dma);
+
+ /* Stop PTP Clock driver */
+- if (info->ptp_cfg_active)
++ if (info->ccc_gac)
+ ravb_ptp_stop(ndev);
+ out_disable_refclk:
+ clk_disable_unprepare(priv->refclk);
+@@ -2369,7 +2377,7 @@ static int ravb_remove(struct platform_device *pdev)
+ const struct ravb_hw_info *info = priv->info;
+
+ /* Stop PTP Clock driver */
+- if (info->ptp_cfg_active)
++ if (info->ccc_gac)
+ ravb_ptp_stop(ndev);
+
+ clk_disable_unprepare(priv->refclk);
+@@ -2378,11 +2386,11 @@ static int ravb_remove(struct platform_device *pdev)
+ priv->desc_bat_dma);
+ /* Set reset mode */
+ ravb_write(ndev, CCC_OPC_RESET, CCC);
+- pm_runtime_put_sync(&pdev->dev);
+ unregister_netdev(ndev);
+ netif_napi_del(&priv->napi[RAVB_NC]);
+ netif_napi_del(&priv->napi[RAVB_BE]);
+ ravb_mdio_release(priv);
++ pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ reset_control_assert(priv->rstc);
+ free_netdev(ndev);
+@@ -2446,6 +2454,9 @@ static int __maybe_unused ravb_suspend(struct device *dev)
+ else
+ ret = ravb_close(ndev);
+
++ if (priv->info->ccc_gac)
++ ravb_ptp_stop(ndev);
++
+ return ret;
+ }
+
+@@ -2482,6 +2493,9 @@ static int __maybe_unused ravb_resume(struct device *dev)
+ /* Restore descriptor base address table */
+ ravb_write(ndev, priv->desc_bat_dma, DBAT);
+
++ if (priv->info->ccc_gac)
++ ravb_ptp_init(ndev, priv->pdev);
++
+ if (netif_running(ndev)) {
+ if (priv->wol_enabled) {
+ ret = ravb_wol_restore(ndev);
+@@ -2491,6 +2505,7 @@ static int __maybe_unused ravb_resume(struct device *dev)
+ ret = ravb_open(ndev);
+ if (ret < 0)
+ return ret;
++ ravb_set_rx_mode(ndev);
+ netif_device_attach(ndev);
+ }
+
+diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
+index 1374faa229a27..b6e426d8014d1 100644
+--- a/drivers/net/ethernet/renesas/sh_eth.c
++++ b/drivers/net/ethernet/renesas/sh_eth.c
+@@ -3072,6 +3072,8 @@ static int sh_mdio_init(struct sh_eth_private *mdp,
+ struct bb_info *bitbang;
+ struct platform_device *pdev = mdp->pdev;
+ struct device *dev = &mdp->pdev->dev;
++ struct phy_device *phydev;
++ struct device_node *pn;
+
+ /* create bit control struct for PHY */
+ bitbang = devm_kzalloc(dev, sizeof(struct bb_info), GFP_KERNEL);
+@@ -3106,6 +3108,14 @@ static int sh_mdio_init(struct sh_eth_private *mdp,
+ if (ret)
+ goto out_free_bus;
+
++ pn = of_parse_phandle(dev->of_node, "phy-handle", 0);
++ phydev = of_phy_find_device(pn);
++ if (phydev) {
++ phydev->mac_managed_pm = true;
++ put_device(&phydev->mdio.dev);
++ }
++ of_node_put(pn);
++
+ return 0;
+
+ out_free_bus:
+diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
+index 3e1ca7a8d0295..58cf7cc54f408 100644
+--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
++++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
+@@ -1273,7 +1273,7 @@ static int ofdpa_port_ipv4_neigh(struct ofdpa_port *ofdpa_port,
+ bool removing;
+ int err = 0;
+
+- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
++ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ return -ENOMEM;
+
+@@ -2783,7 +2783,8 @@ static void ofdpa_fib4_abort(struct rocker *rocker)
+ if (!ofdpa_port)
+ continue;
+ nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+- ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE,
++ ofdpa_flow_tbl_del(ofdpa_port,
++ OFDPA_OP_FLAG_REMOVE | OFDPA_OP_FLAG_NOWAIT,
+ flow_entry);
+ }
+ spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags);
+diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+index 6781aa636d588..1b415fe6f9b9f 100644
+--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
++++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+@@ -2282,18 +2282,18 @@ static int __init sxgbe_cmdline_opt(char *str)
+ char *opt;
+
+ if (!str || !*str)
+- return -EINVAL;
++ return 1;
+ while ((opt = strsep(&str, ",")) != NULL) {
+ if (!strncmp(opt, "eee_timer:", 10)) {
+ if (kstrtoint(opt + 10, 0, &eee_timer))
+ goto err;
+ }
+ }
+- return 0;
++ return 1;
+
+ err:
+ pr_err("%s: ERROR broken module parameter conversion\n", __func__);
+- return -EINVAL;
++ return 1;
+ }
+
+ __setup("sxgbeeth=", sxgbe_cmdline_opt);
+diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+index 4639ed9438a3f..9265324666911 100644
+--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
++++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+@@ -118,7 +118,7 @@ static int sxgbe_platform_probe(struct platform_device *pdev)
+ }
+
+ /* Get MAC address if available (DT) */
+- of_get_mac_address(node, priv->dev->dev_addr);
++ of_get_ethdev_address(node, priv->dev);
+
+ /* Get the TX/RX IRQ numbers */
+ for (i = 0, chan = 1; i < SXGBE_TX_QUEUES; i++) {
+diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
+index e7e2223aebbf5..b20dbda37c7ef 100644
+--- a/drivers/net/ethernet/sfc/ef10.c
++++ b/drivers/net/ethernet/sfc/ef10.c
+@@ -1297,14 +1297,17 @@ static void efx_ef10_fini_nic(struct efx_nic *efx)
+ {
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
++ spin_lock_bh(&efx->stats_lock);
+ kfree(nic_data->mc_stats);
+ nic_data->mc_stats = NULL;
++ spin_unlock_bh(&efx->stats_lock);
+ }
+
+ static int efx_ef10_init_nic(struct efx_nic *efx)
+ {
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+- netdev_features_t hw_enc_features = 0;
++ struct net_device *net_dev = efx->net_dev;
++ netdev_features_t tun_feats, tso_feats;
+ int rc;
+
+ if (nic_data->must_check_datapath_caps) {
+@@ -1349,20 +1352,30 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
+ nic_data->must_restore_piobufs = false;
+ }
+
+- /* add encapsulated checksum offload features */
++ /* encap features might change during reset if fw variant changed */
+ if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx))
+- hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+- /* add encapsulated TSO features */
+- if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
+- netdev_features_t encap_tso_features;
++ net_dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
++ else
++ net_dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+
+- encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
+- NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;
++ tun_feats = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
++ NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;
++ tso_feats = NETIF_F_TSO | NETIF_F_TSO6;
+
+- hw_enc_features |= encap_tso_features | NETIF_F_TSO;
+- efx->net_dev->features |= encap_tso_features;
++ if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
++ /* If this is first nic_init, or if it is a reset and a new fw
++ * variant has added new features, enable them by default.
++ * If the features are not new, maintain their current value.
++ */
++ if (!(net_dev->hw_features & tun_feats))
++ net_dev->features |= tun_feats;
++ net_dev->hw_enc_features |= tun_feats | tso_feats;
++ net_dev->hw_features |= tun_feats;
++ } else {
++ net_dev->hw_enc_features &= ~(tun_feats | tso_feats);
++ net_dev->hw_features &= ~tun_feats;
++ net_dev->features &= ~tun_feats;
+ }
+- efx->net_dev->hw_enc_features = hw_enc_features;
+
+ /* don't fail init if RSS setup doesn't work */
+ rc = efx->type->rx_push_rss_config(efx, false,
+@@ -1841,9 +1854,14 @@ static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats,
+
+ efx_ef10_get_stat_mask(efx, mask);
+
+- efx_nic_copy_stats(efx, nic_data->mc_stats);
+- efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT,
+- mask, stats, nic_data->mc_stats, false);
++ /* If NIC was fini'd (probably resetting), then we can't read
++ * updated stats right now.
++ */
++ if (nic_data->mc_stats) {
++ efx_nic_copy_stats(efx, nic_data->mc_stats);
++ efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT,
++ mask, stats, nic_data->mc_stats, false);
++ }
+
+ /* Update derived statistics */
+ efx_nic_fix_nodesc_drop_stat(efx,
+@@ -1932,7 +1950,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx)
+
+ efx_update_sw_stats(efx, stats);
+ out:
++ /* releasing a DMA coherent buffer with BH disabled can panic */
++ spin_unlock_bh(&efx->stats_lock);
+ efx_nic_free_buffer(efx, &stats_buf);
++ spin_lock_bh(&efx->stats_lock);
+ return rc;
+ }
+
+@@ -2256,7 +2277,7 @@ int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ * guaranteed to satisfy the second as we only attempt TSO if
+ * inner_network_header <= 208.
+ */
+- ip_tot_len = -EFX_TSO2_MAX_HDRLEN;
++ ip_tot_len = 0x10000 - EFX_TSO2_MAX_HDRLEN;
+ EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN +
+ (tcp->doff << 2u) > ip_tot_len);
+
+@@ -3268,6 +3289,30 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
+ bool was_enabled = efx->port_enabled;
+ int rc;
+
++#ifdef CONFIG_SFC_SRIOV
++ /* If this function is a VF and we have access to the parent PF,
++ * then use the PF control path to attempt to change the VF MAC address.
++ */
++ if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) {
++ struct efx_nic *efx_pf = pci_get_drvdata(efx->pci_dev->physfn);
++ struct efx_ef10_nic_data *nic_data = efx->nic_data;
++ u8 mac[ETH_ALEN];
++
++ /* net_dev->dev_addr can be zeroed by efx_net_stop in
++ * efx_ef10_sriov_set_vf_mac, so pass in a copy.
++ */
++ ether_addr_copy(mac, efx->net_dev->dev_addr);
++
++ rc = efx_ef10_sriov_set_vf_mac(efx_pf, nic_data->vf_index, mac);
++ if (!rc)
++ return 0;
++
++ netif_dbg(efx, drv, efx->net_dev,
++ "Updating VF mac via PF failed (%d), setting directly\n",
++ rc);
++ }
++#endif
++
+ efx_device_detach_sync(efx);
+ efx_net_stop(efx->net_dev);
+
+@@ -3290,40 +3335,6 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
+ efx_net_open(efx->net_dev);
+ efx_device_attach_if_not_resetting(efx);
+
+-#ifdef CONFIG_SFC_SRIOV
+- if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) {
+- struct efx_ef10_nic_data *nic_data = efx->nic_data;
+- struct pci_dev *pci_dev_pf = efx->pci_dev->physfn;
+-
+- if (rc == -EPERM) {
+- struct efx_nic *efx_pf;
+-
+- /* Switch to PF and change MAC address on vport */
+- efx_pf = pci_get_drvdata(pci_dev_pf);
+-
+- rc = efx_ef10_sriov_set_vf_mac(efx_pf,
+- nic_data->vf_index,
+- efx->net_dev->dev_addr);
+- } else if (!rc) {
+- struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf);
+- struct efx_ef10_nic_data *nic_data = efx_pf->nic_data;
+- unsigned int i;
+-
+- /* MAC address successfully changed by VF (with MAC
+- * spoofing) so update the parent PF if possible.
+- */
+- for (i = 0; i < efx_pf->vf_count; ++i) {
+- struct ef10_vf *vf = nic_data->vf + i;
+-
+- if (vf->efx == efx) {
+- ether_addr_copy(vf->mac,
+- efx->net_dev->dev_addr);
+- return 0;
+- }
+- }
+- }
+- } else
+-#endif
+ if (rc == -EPERM) {
+ netif_err(efx, drv, efx->net_dev,
+ "Cannot change MAC address; use sfboot to enable"
+@@ -3579,6 +3590,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx)
+ n_parts++;
+ }
+
++ if (!n_parts) {
++ kfree(parts);
++ return 0;
++ }
++
+ rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
+ fail:
+ if (rc)
+@@ -3995,7 +4011,10 @@ static unsigned int ef10_check_caps(const struct efx_nic *efx,
+ NETIF_F_HW_VLAN_CTAG_FILTER | \
+ NETIF_F_IPV6_CSUM | \
+ NETIF_F_RXHASH | \
+- NETIF_F_NTUPLE)
++ NETIF_F_NTUPLE | \
++ NETIF_F_SG | \
++ NETIF_F_RXCSUM | \
++ NETIF_F_RXALL)
+
+ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
+ .is_vf = true,
+diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c
+index 67fe44db6b612..b9429e8faba1e 100644
+--- a/drivers/net/ethernet/sfc/ef100_netdev.c
++++ b/drivers/net/ethernet/sfc/ef100_netdev.c
+@@ -96,6 +96,8 @@ static int ef100_net_stop(struct net_device *net_dev)
+ efx_mcdi_free_vis(efx);
+ efx_remove_interrupts(efx);
+
++ efx->state = STATE_NET_DOWN;
++
+ return 0;
+ }
+
+@@ -172,6 +174,8 @@ static int ef100_net_open(struct net_device *net_dev)
+ efx_link_status_changed(efx);
+ mutex_unlock(&efx->mac_lock);
+
++ efx->state = STATE_NET_UP;
++
+ return 0;
+
+ fail:
+@@ -200,6 +204,7 @@ static netdev_tx_t ef100_hard_start_xmit(struct sk_buff *skb,
+ skb->len, skb->data_len, channel->channel);
+ if (!efx->n_channels || !efx->n_tx_channels || !channel) {
+ netif_stop_queue(net_dev);
++ dev_kfree_skb_any(skb);
+ goto err;
+ }
+
+@@ -271,7 +276,7 @@ int ef100_register_netdev(struct efx_nic *efx)
+ /* Always start with carrier off; PHY events will detect the link */
+ netif_carrier_off(net_dev);
+
+- efx->state = STATE_READY;
++ efx->state = STATE_NET_DOWN;
+ rtnl_unlock();
+ efx_init_mcdi_logging(efx);
+
+diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c
+index 518268ce20644..d35cafd422b1c 100644
+--- a/drivers/net/ethernet/sfc/ef100_nic.c
++++ b/drivers/net/ethernet/sfc/ef100_nic.c
+@@ -609,6 +609,9 @@ static size_t ef100_update_stats(struct efx_nic *efx,
+ ef100_common_stat_mask(mask);
+ ef100_ethtool_stat_mask(mask);
+
++ if (!mc_stats)
++ return 0;
++
+ efx_nic_copy_stats(efx, mc_stats);
+ efx_nic_update_stats(ef100_stat_desc, EF100_STAT_COUNT, mask,
+ stats, mc_stats, false);
+diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c
+index 26ef51d6b5423..e92379bd8f447 100644
+--- a/drivers/net/ethernet/sfc/ef100_tx.c
++++ b/drivers/net/ethernet/sfc/ef100_tx.c
+@@ -349,7 +349,8 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
+ * Returns 0 on success, error code otherwise. In case of an error this
+ * function will free the SKB.
+ */
+-int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
++netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue,
++ struct sk_buff *skb)
+ {
+ unsigned int old_insert_count = tx_queue->insert_count;
+ struct efx_nic *efx = tx_queue->efx;
+diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
+index 752d6406f07ed..eeaecea77cb83 100644
+--- a/drivers/net/ethernet/sfc/ef10_sriov.c
++++ b/drivers/net/ethernet/sfc/ef10_sriov.c
+@@ -408,8 +408,9 @@ fail1:
+ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force)
+ {
+ struct pci_dev *dev = efx->pci_dev;
++ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ unsigned int vfs_assigned = pci_vfs_assigned(dev);
+- int rc = 0;
++ int i, rc = 0;
+
+ if (vfs_assigned && !force) {
+ netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; "
+@@ -417,10 +418,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force)
+ return -EBUSY;
+ }
+
+- if (!vfs_assigned)
++ if (!vfs_assigned) {
++ for (i = 0; i < efx->vf_count; i++)
++ nic_data->vf[i].pci_dev = NULL;
+ pci_disable_sriov(dev);
+- else
++ } else {
+ rc = -EBUSY;
++ }
+
+ efx_ef10_sriov_free_vf_vswitching(efx);
+ efx->vf_count = 0;
+@@ -523,7 +527,7 @@ int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac)
+ goto fail;
+
+ if (vf->efx)
+- ether_addr_copy(vf->efx->net_dev->dev_addr, mac);
++ eth_hw_addr_set(vf->efx->net_dev, mac);
+ }
+
+ ether_addr_copy(vf->mac, mac);
+diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
+index 43ef4f5290281..bc1f4350360bc 100644
+--- a/drivers/net/ethernet/sfc/efx.c
++++ b/drivers/net/ethernet/sfc/efx.c
+@@ -105,14 +105,6 @@ static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
+ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
+ u32 flags);
+
+-#define EFX_ASSERT_RESET_SERIALISED(efx) \
+- do { \
+- if ((efx->state == STATE_READY) || \
+- (efx->state == STATE_RECOVERY) || \
+- (efx->state == STATE_DISABLED)) \
+- ASSERT_RTNL(); \
+- } while (0)
+-
+ /**************************************************************************
+ *
+ * Port handling
+@@ -136,7 +128,7 @@ static int efx_probe_port(struct efx_nic *efx)
+ return rc;
+
+ /* Initialise MAC address to permanent address */
+- ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
++ eth_hw_addr_set(efx->net_dev, efx->net_dev->perm_addr);
+
+ return 0;
+ }
+@@ -377,6 +369,8 @@ static int efx_probe_all(struct efx_nic *efx)
+ if (rc)
+ goto fail5;
+
++ efx->state = STATE_NET_DOWN;
++
+ return 0;
+
+ fail5:
+@@ -543,7 +537,9 @@ int efx_net_open(struct net_device *net_dev)
+ efx_start_all(efx);
+ if (efx->state == STATE_DISABLED || efx->reset_pending)
+ netif_device_detach(efx->net_dev);
+- efx_selftest_async_start(efx);
++ else
++ efx->state = STATE_NET_UP;
++
+ return 0;
+ }
+
+@@ -719,8 +715,6 @@ static int efx_register_netdev(struct efx_nic *efx)
+ * already requested. If so, the NIC is probably hosed so we
+ * abort.
+ */
+- efx->state = STATE_READY;
+- smp_mb(); /* ensure we change state before checking reset_pending */
+ if (efx->reset_pending) {
+ pci_err(efx->pci_dev, "aborting probe due to scheduled reset\n");
+ rc = -EIO;
+@@ -747,6 +741,8 @@ static int efx_register_netdev(struct efx_nic *efx)
+
+ efx_associate(efx);
+
++ efx->state = STATE_NET_DOWN;
++
+ rtnl_unlock();
+
+ rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
+@@ -848,7 +844,7 @@ static void efx_pci_remove_main(struct efx_nic *efx)
+ /* Flush reset_work. It can no longer be scheduled since we
+ * are not READY.
+ */
+- BUG_ON(efx->state == STATE_READY);
++ WARN_ON(efx_net_active(efx->state));
+ efx_flush_reset_workqueue(efx);
+
+ efx_disable_interrupts(efx);
+@@ -1003,18 +999,18 @@ static int efx_pci_probe_post_io(struct efx_nic *efx)
+ }
+
+ /* Determine netdevice features */
+- net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
+- NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
+- if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
+- net_dev->features |= NETIF_F_TSO6;
+- /* Check whether device supports TSO */
+- if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
+- net_dev->features &= ~NETIF_F_ALL_TSO;
++ net_dev->features |= efx->type->offload_features;
++
++ /* Add TSO features */
++ if (efx->type->tso_versions && efx->type->tso_versions(efx))
++ net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6;
++
+ /* Mask for features that also apply to VLAN devices */
+ net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
+ NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
+ NETIF_F_RXCSUM);
+
++ /* Determine user configurable features */
+ net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
+
+ /* Disable receiving frames with bad FCS, by default. */
+@@ -1153,13 +1149,13 @@ static int efx_pm_freeze(struct device *dev)
+
+ rtnl_lock();
+
+- if (efx->state != STATE_DISABLED) {
+- efx->state = STATE_UNINIT;
+-
++ if (efx_net_active(efx->state)) {
+ efx_device_detach_sync(efx);
+
+ efx_stop_all(efx);
+ efx_disable_interrupts(efx);
++
++ efx->state = efx_freeze(efx->state);
+ }
+
+ rtnl_unlock();
+@@ -1174,7 +1170,7 @@ static int efx_pm_thaw(struct device *dev)
+
+ rtnl_lock();
+
+- if (efx->state != STATE_DISABLED) {
++ if (efx_frozen(efx->state)) {
+ rc = efx_enable_interrupts(efx);
+ if (rc)
+ goto fail;
+@@ -1187,7 +1183,7 @@ static int efx_pm_thaw(struct device *dev)
+
+ efx_device_attach_if_not_resetting(efx);
+
+- efx->state = STATE_READY;
++ efx->state = efx_thaw(efx->state);
+
+ efx->type->resume_wol(efx);
+ }
+diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
+index 3dbea028b325c..450fcedb7042a 100644
+--- a/drivers/net/ethernet/sfc/efx_channels.c
++++ b/drivers/net/ethernet/sfc/efx_channels.c
+@@ -308,6 +308,7 @@ int efx_probe_interrupts(struct efx_nic *efx)
+ efx->n_channels = 1;
+ efx->n_rx_channels = 1;
+ efx->n_tx_channels = 1;
++ efx->tx_channel_offset = 0;
+ efx->n_xdp_channels = 0;
+ efx->xdp_channel_offset = efx->n_channels;
+ rc = pci_enable_msi(efx->pci_dev);
+@@ -328,6 +329,7 @@ int efx_probe_interrupts(struct efx_nic *efx)
+ efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
+ efx->n_rx_channels = 1;
+ efx->n_tx_channels = 1;
++ efx->tx_channel_offset = efx_separate_tx_channels ? 1 : 0;
+ efx->n_xdp_channels = 0;
+ efx->xdp_channel_offset = efx->n_channels;
+ efx->legacy_irq = efx->pci_dev->irq;
+@@ -763,9 +765,90 @@ void efx_remove_channels(struct efx_nic *efx)
+ kfree(efx->xdp_tx_queues);
+ }
+
++static int efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number,
++ struct efx_tx_queue *tx_queue)
++{
++ if (xdp_queue_number >= efx->xdp_tx_queue_count)
++ return -EINVAL;
++
++ netif_dbg(efx, drv, efx->net_dev,
++ "Channel %u TXQ %u is XDP %u, HW %u\n",
++ tx_queue->channel->channel, tx_queue->label,
++ xdp_queue_number, tx_queue->queue);
++ efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
++ return 0;
++}
++
++static void efx_set_xdp_channels(struct efx_nic *efx)
++{
++ struct efx_tx_queue *tx_queue;
++ struct efx_channel *channel;
++ unsigned int next_queue = 0;
++ int xdp_queue_number = 0;
++ int rc;
++
++ /* We need to mark which channels really have RX and TX
++ * queues, and adjust the TX queue numbers if we have separate
++ * RX-only and TX-only channels.
++ */
++ efx_for_each_channel(channel, efx) {
++ if (channel->channel < efx->tx_channel_offset)
++ continue;
++
++ if (efx_channel_is_xdp_tx(channel)) {
++ efx_for_each_channel_tx_queue(tx_queue, channel) {
++ tx_queue->queue = next_queue++;
++ rc = efx_set_xdp_tx_queue(efx, xdp_queue_number,
++ tx_queue);
++ if (rc == 0)
++ xdp_queue_number++;
++ }
++ } else {
++ efx_for_each_channel_tx_queue(tx_queue, channel) {
++ tx_queue->queue = next_queue++;
++ netif_dbg(efx, drv, efx->net_dev,
++ "Channel %u TXQ %u is HW %u\n",
++ channel->channel, tx_queue->label,
++ tx_queue->queue);
++ }
++
++ /* If XDP is borrowing queues from net stack, it must
++ * use the queue with no csum offload, which is the
++ * first one of the channel
++ * (note: tx_queue_by_type is not initialized yet)
++ */
++ if (efx->xdp_txq_queues_mode ==
++ EFX_XDP_TX_QUEUES_BORROWED) {
++ tx_queue = &channel->tx_queue[0];
++ rc = efx_set_xdp_tx_queue(efx, xdp_queue_number,
++ tx_queue);
++ if (rc == 0)
++ xdp_queue_number++;
++ }
++ }
++ }
++ WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED &&
++ xdp_queue_number != efx->xdp_tx_queue_count);
++ WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED &&
++ xdp_queue_number > efx->xdp_tx_queue_count);
++
++ /* If we have more CPUs than assigned XDP TX queues, assign the already
++ * existing queues to the exceeding CPUs
++ */
++ next_queue = 0;
++ while (xdp_queue_number < efx->xdp_tx_queue_count) {
++ tx_queue = efx->xdp_tx_queues[next_queue++];
++ rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
++ if (rc == 0)
++ xdp_queue_number++;
++ }
++}
++
+ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
+ {
+- struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
++ struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel,
++ *ptp_channel = efx_ptp_channel(efx);
++ struct efx_ptp_data *ptp_data = efx->ptp_data;
+ unsigned int i, next_buffer_table = 0;
+ u32 old_rxq_entries, old_txq_entries;
+ int rc, rc2;
+@@ -818,11 +901,8 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
+ old_txq_entries = efx->txq_entries;
+ efx->rxq_entries = rxq_entries;
+ efx->txq_entries = txq_entries;
+- for (i = 0; i < efx->n_channels; i++) {
+- channel = efx->channel[i];
+- efx->channel[i] = other_channel[i];
+- other_channel[i] = channel;
+- }
++ for (i = 0; i < efx->n_channels; i++)
++ swap(efx->channel[i], other_channel[i]);
+
+ /* Restart buffer table allocation */
+ efx->next_buffer_table = next_buffer_table;
+@@ -837,7 +917,9 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
+ efx_init_napi_channel(efx->channel[i]);
+ }
+
++ efx_set_xdp_channels(efx);
+ out:
++ efx->ptp_data = NULL;
+ /* Destroy unused channel structures */
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = other_channel[i];
+@@ -848,6 +930,7 @@ out:
+ }
+ }
+
++ efx->ptp_data = ptp_data;
+ rc2 = efx_soft_enable_interrupts(efx);
+ if (rc2) {
+ rc = rc ? rc : rc2;
+@@ -864,40 +947,17 @@ rollback:
+ /* Swap back */
+ efx->rxq_entries = old_rxq_entries;
+ efx->txq_entries = old_txq_entries;
+- for (i = 0; i < efx->n_channels; i++) {
+- channel = efx->channel[i];
+- efx->channel[i] = other_channel[i];
+- other_channel[i] = channel;
+- }
++ for (i = 0; i < efx->n_channels; i++)
++ swap(efx->channel[i], other_channel[i]);
++ efx_ptp_update_channel(efx, ptp_channel);
+ goto out;
+ }
+
+-static inline int
+-efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number,
+- struct efx_tx_queue *tx_queue)
+-{
+- if (xdp_queue_number >= efx->xdp_tx_queue_count)
+- return -EINVAL;
+-
+- netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
+- tx_queue->channel->channel, tx_queue->label,
+- xdp_queue_number, tx_queue->queue);
+- efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
+- return 0;
+-}
+-
+ int efx_set_channels(struct efx_nic *efx)
+ {
+- struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
+- unsigned int next_queue = 0;
+- int xdp_queue_number;
+ int rc;
+
+- efx->tx_channel_offset =
+- efx_separate_tx_channels ?
+- efx->n_channels - efx->n_tx_channels : 0;
+-
+ if (efx->xdp_tx_queue_count) {
+ EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
+
+@@ -909,61 +969,14 @@ int efx_set_channels(struct efx_nic *efx)
+ return -ENOMEM;
+ }
+
+- /* We need to mark which channels really have RX and TX
+- * queues, and adjust the TX queue numbers if we have separate
+- * RX-only and TX-only channels.
+- */
+- xdp_queue_number = 0;
+ efx_for_each_channel(channel, efx) {
+ if (channel->channel < efx->n_rx_channels)
+ channel->rx_queue.core_index = channel->channel;
+ else
+ channel->rx_queue.core_index = -1;
+-
+- if (channel->channel >= efx->tx_channel_offset) {
+- if (efx_channel_is_xdp_tx(channel)) {
+- efx_for_each_channel_tx_queue(tx_queue, channel) {
+- tx_queue->queue = next_queue++;
+- rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
+- if (rc == 0)
+- xdp_queue_number++;
+- }
+- } else {
+- efx_for_each_channel_tx_queue(tx_queue, channel) {
+- tx_queue->queue = next_queue++;
+- netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
+- channel->channel, tx_queue->label,
+- tx_queue->queue);
+- }
+-
+- /* If XDP is borrowing queues from net stack, it must use the queue
+- * with no csum offload, which is the first one of the channel
+- * (note: channel->tx_queue_by_type is not initialized yet)
+- */
+- if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) {
+- tx_queue = &channel->tx_queue[0];
+- rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
+- if (rc == 0)
+- xdp_queue_number++;
+- }
+- }
+- }
+ }
+- WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED &&
+- xdp_queue_number != efx->xdp_tx_queue_count);
+- WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED &&
+- xdp_queue_number > efx->xdp_tx_queue_count);
+
+- /* If we have more CPUs than assigned XDP TX queues, assign the already
+- * existing queues to the exceeding CPUs
+- */
+- next_queue = 0;
+- while (xdp_queue_number < efx->xdp_tx_queue_count) {
+- tx_queue = efx->xdp_tx_queues[next_queue++];
+- rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
+- if (rc == 0)
+- xdp_queue_number++;
+- }
++ efx_set_xdp_channels(efx);
+
+ rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
+ if (rc)
+@@ -1107,7 +1120,7 @@ void efx_start_channels(struct efx_nic *efx)
+ struct efx_rx_queue *rx_queue;
+ struct efx_channel *channel;
+
+- efx_for_each_channel(channel, efx) {
++ efx_for_each_channel_rev(channel, efx) {
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ efx_init_tx_queue(tx_queue);
+ atomic_inc(&efx->active_queues);
+diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
+index 896b592531972..7249ea594b31d 100644
+--- a/drivers/net/ethernet/sfc/efx_common.c
++++ b/drivers/net/ethernet/sfc/efx_common.c
+@@ -181,11 +181,11 @@ int efx_set_mac_address(struct net_device *net_dev, void *data)
+
+ /* save old address */
+ ether_addr_copy(old_addr, net_dev->dev_addr);
+- ether_addr_copy(net_dev->dev_addr, new_addr);
++ eth_hw_addr_set(net_dev, new_addr);
+ if (efx->type->set_mac_address) {
+ rc = efx->type->set_mac_address(efx);
+ if (rc) {
+- ether_addr_copy(net_dev->dev_addr, old_addr);
++ eth_hw_addr_set(net_dev, old_addr);
+ return rc;
+ }
+ }
+@@ -542,6 +542,8 @@ void efx_start_all(struct efx_nic *efx)
+ /* Start the hardware monitor if there is one */
+ efx_start_monitor(efx);
+
++ efx_selftest_async_start(efx);
++
+ /* Link state detection is normally event-driven; we have
+ * to poll now because we could have missed a change
+ */
+@@ -897,7 +899,7 @@ static void efx_reset_work(struct work_struct *data)
+ * have changed by now. Now that we have the RTNL lock,
+ * it cannot change again.
+ */
+- if (efx->state == STATE_READY)
++ if (efx_net_active(efx->state))
+ (void)efx_reset(efx, method);
+
+ rtnl_unlock();
+@@ -907,7 +909,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
+ {
+ enum reset_type method;
+
+- if (efx->state == STATE_RECOVERY) {
++ if (efx_recovering(efx->state)) {
+ netif_dbg(efx, drv, efx->net_dev,
+ "recovering: skip scheduling %s reset\n",
+ RESET_TYPE(type));
+@@ -942,7 +944,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
+ /* If we're not READY then just leave the flags set as the cue
+ * to abort probing or reschedule the reset later.
+ */
+- if (READ_ONCE(efx->state) != STATE_READY)
++ if (!efx_net_active(READ_ONCE(efx->state)))
+ return;
+
+ /* efx_process_channel() will no longer read events once a
+@@ -1216,7 +1218,7 @@ static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
+ rtnl_lock();
+
+ if (efx->state != STATE_DISABLED) {
+- efx->state = STATE_RECOVERY;
++ efx->state = efx_recover(efx->state);
+ efx->reset_pending = 0;
+
+ efx_device_detach_sync(efx);
+@@ -1270,7 +1272,7 @@ static void efx_io_resume(struct pci_dev *pdev)
+ netif_err(efx, hw, efx->net_dev,
+ "efx_reset failed after PCI error (%d)\n", rc);
+ } else {
+- efx->state = STATE_READY;
++ efx->state = efx_recovered(efx->state);
+ netif_dbg(efx, hw, efx->net_dev,
+ "Done resetting and resuming IO after PCI error.\n");
+ }
+diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h
+index 65513fd0cf6c4..c72e819da8fd3 100644
+--- a/drivers/net/ethernet/sfc/efx_common.h
++++ b/drivers/net/ethernet/sfc/efx_common.h
+@@ -45,9 +45,7 @@ int efx_reconfigure_port(struct efx_nic *efx);
+
+ #define EFX_ASSERT_RESET_SERIALISED(efx) \
+ do { \
+- if ((efx->state == STATE_READY) || \
+- (efx->state == STATE_RECOVERY) || \
+- (efx->state == STATE_DISABLED)) \
++ if (efx->state != STATE_UNINIT) \
+ ASSERT_RTNL(); \
+ } while (0)
+
+@@ -64,7 +62,7 @@ void efx_port_dummy_op_void(struct efx_nic *efx);
+
+ static inline int efx_check_disabled(struct efx_nic *efx)
+ {
+- if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
++ if (efx->state == STATE_DISABLED || efx_recovering(efx->state)) {
+ netif_err(efx, drv, efx->net_dev,
+ "device is disabled due to earlier errors\n");
+ return -EIO;
+diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c
+index bf1443539a1a4..3846b76b89720 100644
+--- a/drivers/net/ethernet/sfc/ethtool_common.c
++++ b/drivers/net/ethernet/sfc/ethtool_common.c
+@@ -137,7 +137,7 @@ void efx_ethtool_self_test(struct net_device *net_dev,
+ if (!efx_tests)
+ goto fail;
+
+- if (efx->state != STATE_READY) {
++ if (!efx_net_active(efx->state)) {
+ rc = -EBUSY;
+ goto out;
+ }
+@@ -563,20 +563,14 @@ int efx_ethtool_get_link_ksettings(struct net_device *net_dev,
+ {
+ struct efx_nic *efx = netdev_priv(net_dev);
+ struct efx_link_state *link_state = &efx->link_state;
+- u32 supported;
+
+ mutex_lock(&efx->mac_lock);
+ efx_mcdi_phy_get_link_ksettings(efx, cmd);
+ mutex_unlock(&efx->mac_lock);
+
+ /* Both MACs support pause frames (bidirectional and respond-only) */
+- ethtool_convert_link_mode_to_legacy_u32(&supported,
+- cmd->link_modes.supported);
+-
+- supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+-
+- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+- supported);
++ ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
++ ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause);
+
+ if (LOOPBACK_INTERNAL(efx)) {
+ cmd->base.speed = link_state->speed;
+diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
+index 423bdf81200fd..c68837a951f47 100644
+--- a/drivers/net/ethernet/sfc/falcon/efx.c
++++ b/drivers/net/ethernet/sfc/falcon/efx.c
+@@ -1044,7 +1044,7 @@ static int ef4_probe_port(struct ef4_nic *efx)
+ return rc;
+
+ /* Initialise MAC address to permanent address */
+- ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
++ eth_hw_addr_set(efx->net_dev, efx->net_dev->perm_addr);
+
+ return 0;
+ }
+@@ -2162,11 +2162,11 @@ static int ef4_set_mac_address(struct net_device *net_dev, void *data)
+
+ /* save old address */
+ ether_addr_copy(old_addr, net_dev->dev_addr);
+- ether_addr_copy(net_dev->dev_addr, new_addr);
++ eth_hw_addr_set(net_dev, new_addr);
+ if (efx->type->set_mac_address) {
+ rc = efx->type->set_mac_address(efx);
+ if (rc) {
+- ether_addr_copy(net_dev->dev_addr, old_addr);
++ eth_hw_addr_set(net_dev, old_addr);
+ return rc;
+ }
+ }
+diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c
+index 966f13e7475dd..0c6cc21913693 100644
+--- a/drivers/net/ethernet/sfc/falcon/rx.c
++++ b/drivers/net/ethernet/sfc/falcon/rx.c
+@@ -110,6 +110,8 @@ static struct page *ef4_reuse_page(struct ef4_rx_queue *rx_queue)
+ struct ef4_rx_page_state *state;
+ unsigned index;
+
++ if (unlikely(!rx_queue->page_ring))
++ return NULL;
+ index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+ page = rx_queue->page_ring[index];
+ if (page == NULL)
+@@ -293,6 +295,9 @@ static void ef4_recycle_rx_pages(struct ef4_channel *channel,
+ {
+ struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel);
+
++ if (unlikely(!rx_queue->page_ring))
++ return;
++
+ do {
+ ef4_recycle_rx_page(channel, rx_buf);
+ rx_buf = ef4_rx_buf_next(rx_queue, rx_buf);
+@@ -728,7 +733,10 @@ static void ef4_init_rx_recycle_ring(struct ef4_nic *efx,
+ efx->rx_bufs_per_page);
+ rx_queue->page_ring = kcalloc(page_ring_size,
+ sizeof(*rx_queue->page_ring), GFP_KERNEL);
+- rx_queue->page_ptr_mask = page_ring_size - 1;
++ if (!rx_queue->page_ring)
++ rx_queue->page_ptr_mask = 0;
++ else
++ rx_queue->page_ptr_mask = page_ring_size - 1;
+ }
+
+ void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue)
+diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h
+index 40b2af8bfb81c..2ac3c8f1b04b5 100644
+--- a/drivers/net/ethernet/sfc/filter.h
++++ b/drivers/net/ethernet/sfc/filter.h
+@@ -157,7 +157,8 @@ struct efx_filter_spec {
+ u32 flags:6;
+ u32 dmaq_id:12;
+ u32 rss_context;
+- __be16 outer_vid __aligned(4); /* allow jhash2() of match values */
++ u32 vport_id;
++ __be16 outer_vid;
+ __be16 inner_vid;
+ u8 loc_mac[ETH_ALEN];
+ u8 rem_mac[ETH_ALEN];
+diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
+index be6bfd6b7ec75..50baf62b2cbc6 100644
+--- a/drivers/net/ethernet/sfc/mcdi.c
++++ b/drivers/net/ethernet/sfc/mcdi.c
+@@ -163,9 +163,9 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
+ /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */
+ spin_lock_bh(&mcdi->iface_lock);
+ ++mcdi->seqno;
++ seqno = mcdi->seqno & SEQ_MASK;
+ spin_unlock_bh(&mcdi->iface_lock);
+
+- seqno = mcdi->seqno & SEQ_MASK;
+ xflags = 0;
+ if (mcdi->mode == MCDI_MODE_EVENTS)
+ xflags |= MCDI_HEADER_XFLAGS_EVREQ;
+diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c
+index c4fe3c48ac46a..eccb97a5d9387 100644
+--- a/drivers/net/ethernet/sfc/mcdi_port_common.c
++++ b/drivers/net/ethernet/sfc/mcdi_port_common.c
+@@ -974,12 +974,15 @@ static u32 efx_mcdi_phy_module_type(struct efx_nic *efx)
+
+ /* A QSFP+ NIC may actually have an SFP+ module attached.
+ * The ID is page 0, byte 0.
++ * QSFP28 is of type SFF_8636, however, this is treated
++ * the same by ethtool, so we can also treat them the same.
+ */
+ switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) {
+- case 0x3:
++ case 0x3: /* SFP */
+ return MC_CMD_MEDIA_SFP_PLUS;
+- case 0xc:
+- case 0xd:
++ case 0xc: /* QSFP */
++ case 0xd: /* QSFP+ */
++ case 0x11: /* QSFP28 */
+ return MC_CMD_MEDIA_QSFP_PLUS;
+ default:
+ return 0;
+@@ -1077,7 +1080,7 @@ int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *mo
+
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ modinfo->type = ETH_MODULE_SFF_8436;
+- modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
++ modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
+ break;
+
+ default:
+diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
+index f6981810039d0..6df500dbb6b7f 100644
+--- a/drivers/net/ethernet/sfc/net_driver.h
++++ b/drivers/net/ethernet/sfc/net_driver.h
+@@ -627,12 +627,54 @@ enum efx_int_mode {
+ #define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI)
+
+ enum nic_state {
+- STATE_UNINIT = 0, /* device being probed/removed or is frozen */
+- STATE_READY = 1, /* hardware ready and netdev registered */
+- STATE_DISABLED = 2, /* device disabled due to hardware errors */
+- STATE_RECOVERY = 3, /* device recovering from PCI error */
++ STATE_UNINIT = 0, /* device being probed/removed */
++ STATE_NET_DOWN, /* hardware probed and netdev registered */
++ STATE_NET_UP, /* ready for traffic */
++ STATE_DISABLED, /* device disabled due to hardware errors */
++
++ STATE_RECOVERY = 0x100,/* recovering from PCI error */
++ STATE_FROZEN = 0x200, /* frozen by power management */
+ };
+
++static inline bool efx_net_active(enum nic_state state)
++{
++ return state == STATE_NET_DOWN || state == STATE_NET_UP;
++}
++
++static inline bool efx_frozen(enum nic_state state)
++{
++ return state & STATE_FROZEN;
++}
++
++static inline bool efx_recovering(enum nic_state state)
++{
++ return state & STATE_RECOVERY;
++}
++
++static inline enum nic_state efx_freeze(enum nic_state state)
++{
++ WARN_ON(!efx_net_active(state));
++ return state | STATE_FROZEN;
++}
++
++static inline enum nic_state efx_thaw(enum nic_state state)
++{
++ WARN_ON(!efx_frozen(state));
++ return state & ~STATE_FROZEN;
++}
++
++static inline enum nic_state efx_recover(enum nic_state state)
++{
++ WARN_ON(!efx_net_active(state));
++ return state | STATE_RECOVERY;
++}
++
++static inline enum nic_state efx_recovered(enum nic_state state)
++{
++ WARN_ON(!efx_recovering(state));
++ return state & ~STATE_RECOVERY;
++}
++
+ /* Forward declaration */
+ struct efx_nic;
+
+@@ -1533,7 +1575,7 @@ static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel)
+
+ static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
+ {
+- return true;
++ return channel && channel->channel >= channel->efx->tx_channel_offset;
+ }
+
+ static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel)
+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
+index 797e51802ccbb..a2b4e3befa591 100644
+--- a/drivers/net/ethernet/sfc/ptp.c
++++ b/drivers/net/ethernet/sfc/ptp.c
+@@ -45,6 +45,7 @@
+ #include "farch_regs.h"
+ #include "tx.h"
+ #include "nic.h" /* indirectly includes ptp.h */
++#include "efx_channels.h"
+
+ /* Maximum number of events expected to make up a PTP event */
+ #define MAX_EVENT_FRAGS 3
+@@ -541,6 +542,12 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
+ return efx->ptp_data ? efx->ptp_data->channel : NULL;
+ }
+
++void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel)
++{
++ if (efx->ptp_data)
++ efx->ptp_data->channel = channel;
++}
++
+ static u32 last_sync_timestamp_major(struct efx_nic *efx)
+ {
+ struct efx_channel *channel = efx_ptp_channel(efx);
+@@ -1093,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+
+ tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
+ if (tx_queue && tx_queue->timestamping) {
++ /* This code invokes normal driver TX code which is always
++ * protected from softirqs when called from generic TX code,
++ * which in turn disables preemption. Look at __dev_queue_xmit
++ * which uses rcu_read_lock_bh disabling preemption for RCU
++ * plus disabling softirqs. We do not need RCU reader
++ * protection here.
++ *
++ * Although it is theoretically safe for current PTP TX/RX code
++ * running without disabling softirqs, there are three good
++ * reasond for doing so:
++ *
++ * 1) The code invoked is mainly implemented for non-PTP
++ * packets and it is always executed with softirqs
++ * disabled.
++ * 2) This being a single PTP packet, better to not
++ * interrupt its processing by softirqs which can lead
++ * to high latencies.
++ * 3) netdev_xmit_more checks preemption is disabled and
++ * triggers a BUG_ON if not.
++ */
++ local_bh_disable();
+ efx_enqueue_skb(tx_queue, skb);
++ local_bh_enable();
+ } else {
+ WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+ dev_kfree_skb_any(skb);
+@@ -1443,6 +1472,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
+ int rc = 0;
+ unsigned int pos;
+
++ if (efx->ptp_data) {
++ efx->ptp_data->channel = channel;
++ return 0;
++ }
++
+ ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
+ efx->ptp_data = ptp;
+ if (!efx->ptp_data)
+@@ -2179,7 +2213,7 @@ static const struct efx_channel_type efx_ptp_channel_type = {
+ .pre_probe = efx_ptp_probe_channel,
+ .post_remove = efx_ptp_remove_channel,
+ .get_name = efx_ptp_get_channel_name,
+- /* no copy operation; there is no need to reallocate this channel */
++ .copy = efx_copy_channel,
+ .receive_skb = efx_ptp_rx,
+ .want_txqs = efx_ptp_want_txqs,
+ .keep_eventq = false,
+diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h
+index 9855e8c9e544d..7b1ef7002b3f0 100644
+--- a/drivers/net/ethernet/sfc/ptp.h
++++ b/drivers/net/ethernet/sfc/ptp.h
+@@ -16,6 +16,7 @@ struct ethtool_ts_info;
+ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
+ void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
+ struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
++void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel);
+ void efx_ptp_remove(struct efx_nic *efx);
+ int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
+ int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
+diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
+index 68fc7d317693b..a804c754cd7d0 100644
+--- a/drivers/net/ethernet/sfc/rx_common.c
++++ b/drivers/net/ethernet/sfc/rx_common.c
+@@ -45,6 +45,8 @@ static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
+ unsigned int index;
+ struct page *page;
+
++ if (unlikely(!rx_queue->page_ring))
++ return NULL;
+ index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+ page = rx_queue->page_ring[index];
+ if (page == NULL)
+@@ -114,6 +116,9 @@ void efx_recycle_rx_pages(struct efx_channel *channel,
+ {
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
++ if (unlikely(!rx_queue->page_ring))
++ return;
++
+ do {
+ efx_recycle_rx_page(channel, rx_buf);
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+@@ -150,7 +155,10 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+ efx->rx_bufs_per_page);
+ rx_queue->page_ring = kcalloc(page_ring_size,
+ sizeof(*rx_queue->page_ring), GFP_KERNEL);
+- rx_queue->page_ptr_mask = page_ring_size - 1;
++ if (!rx_queue->page_ring)
++ rx_queue->page_ptr_mask = 0;
++ else
++ rx_queue->page_ptr_mask = page_ring_size - 1;
+ }
+
+ static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+@@ -158,6 +166,9 @@ static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+ struct efx_nic *efx = rx_queue->efx;
+ int i;
+
++ if (unlikely(!rx_queue->page_ring))
++ return;
++
+ /* Unmap and release the pages in the recycle ring. Remove the ring. */
+ for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+ struct page *page = rx_queue->page_ring[i];
+@@ -665,17 +676,17 @@ bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
+ return false;
+
+- return memcmp(&left->outer_vid, &right->outer_vid,
++ return memcmp(&left->vport_id, &right->vport_id,
+ sizeof(struct efx_filter_spec) -
+- offsetof(struct efx_filter_spec, outer_vid)) == 0;
++ offsetof(struct efx_filter_spec, vport_id)) == 0;
+ }
+
+ u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
+ {
+- BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
+- return jhash2((const u32 *)&spec->outer_vid,
++ BUILD_BUG_ON(offsetof(struct efx_filter_spec, vport_id) & 3);
++ return jhash2((const u32 *)&spec->vport_id,
+ (sizeof(struct efx_filter_spec) -
+- offsetof(struct efx_filter_spec, outer_vid)) / 4,
++ offsetof(struct efx_filter_spec, vport_id)) / 4,
+ 0);
+ }
+
+diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
+index d16e031e95f44..e0bc2c1dc81a5 100644
+--- a/drivers/net/ethernet/sfc/tx.c
++++ b/drivers/net/ethernet/sfc/tx.c
+@@ -443,6 +443,9 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
+ if (unlikely(!tx_queue))
+ return -EINVAL;
+
++ if (!tx_queue->initialised)
++ return -EINVAL;
++
+ if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED)
+ HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu);
+
+@@ -545,7 +548,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
+ * previous packets out.
+ */
+ if (!netdev_xmit_more())
+- efx_tx_send_pending(tx_queue->channel);
++ efx_tx_send_pending(efx_get_tx_channel(efx, index));
+ return NETDEV_TX_OK;
+ }
+
+diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
+index d530cde2b8648..9bc8281b7f5bd 100644
+--- a/drivers/net/ethernet/sfc/tx_common.c
++++ b/drivers/net/ethernet/sfc/tx_common.c
+@@ -101,6 +101,8 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
+ netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+ "shutting down TX queue %d\n", tx_queue->queue);
+
++ tx_queue->initialised = false;
++
+ if (!tx_queue->buffer)
+ return;
+
+diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
+index 44daf79a8f972..f3b1af9a59e8a 100644
+--- a/drivers/net/ethernet/smsc/epic100.c
++++ b/drivers/net/ethernet/smsc/epic100.c
+@@ -1513,14 +1513,14 @@ static void epic_remove_one(struct pci_dev *pdev)
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct epic_private *ep = netdev_priv(dev);
+
++ unregister_netdev(dev);
+ dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring,
+ ep->tx_ring_dma);
+ dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring,
+ ep->rx_ring_dma);
+- unregister_netdev(dev);
+ pci_iounmap(pdev, ep->ioaddr);
+- pci_release_regions(pdev);
+ free_netdev(dev);
++ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ /* pci_power_off(pdev, -1); */
+ }
+diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
+index b008b4e8a2a5a..0641a1d392b86 100644
+--- a/drivers/net/ethernet/smsc/smc911x.c
++++ b/drivers/net/ethernet/smsc/smc911x.c
+@@ -2070,6 +2070,11 @@ static int smc911x_drv_probe(struct platform_device *pdev)
+
+ ndev->dma = (unsigned char)-1;
+ ndev->irq = platform_get_irq(pdev, 0);
++ if (ndev->irq < 0) {
++ ret = ndev->irq;
++ goto release_both;
++ }
++
+ lp = netdev_priv(ndev);
+ lp->netdev = ndev;
+ #ifdef SMC_DYNAMIC_BUS_CONFIG
+diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
+index 199a973392806..b330dcbe949df 100644
+--- a/drivers/net/ethernet/smsc/smsc911x.c
++++ b/drivers/net/ethernet/smsc/smsc911x.c
+@@ -1064,6 +1064,7 @@ static int smsc911x_mii_init(struct platform_device *pdev,
+ struct net_device *dev)
+ {
+ struct smsc911x_data *pdata = netdev_priv(dev);
++ struct phy_device *phydev;
+ int err = -ENXIO;
+
+ pdata->mii_bus = mdiobus_alloc();
+@@ -1106,6 +1107,10 @@ static int smsc911x_mii_init(struct platform_device *pdev,
+ goto err_out_free_bus_2;
+ }
+
++ phydev = phy_find_first(pdata->mii_bus);
++ if (phydev)
++ phydev->mac_managed_pm = true;
++
+ return 0;
+
+ err_out_free_bus_2:
+@@ -2429,7 +2434,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
+ if (irq == -EPROBE_DEFER) {
+ retval = -EPROBE_DEFER;
+ goto out_0;
+- } else if (irq <= 0) {
++ } else if (irq < 0) {
+ pr_warn("Could not allocate irq resource\n");
+ retval = -ENODEV;
+ goto out_0;
+@@ -2584,6 +2589,8 @@ static int smsc911x_suspend(struct device *dev)
+ if (netif_running(ndev)) {
+ netif_stop_queue(ndev);
+ netif_device_detach(ndev);
++ if (!device_may_wakeup(dev))
++ phy_stop(ndev->phydev);
+ }
+
+ /* enable wake on LAN, energy detection and the external PME
+@@ -2625,6 +2632,8 @@ static int smsc911x_resume(struct device *dev)
+ if (netif_running(ndev)) {
+ netif_device_attach(ndev);
+ netif_start_queue(ndev);
++ if (!device_may_wakeup(dev))
++ phy_start(ndev->phydev);
+ }
+
+ return 0;
+diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
+index 1f46af136aa8c..eb59e8abe6915 100644
+--- a/drivers/net/ethernet/socionext/netsec.c
++++ b/drivers/net/ethernet/socionext/netsec.c
+@@ -1851,6 +1851,17 @@ static int netsec_of_probe(struct platform_device *pdev,
+ return err;
+ }
+
++ /*
++ * SynQuacer is physically configured with TX and RX delays
++ * but the standard firmware claimed otherwise for a long
++ * time, ignore it.
++ */
++ if (of_machine_is_compatible("socionext,developer-box") &&
++ priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) {
++ dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n");
++ priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID;
++ }
++
+ priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+ if (!priv->phy_np) {
+ dev_err(&pdev->dev, "missing required property 'phy-handle'\n");
+@@ -1964,11 +1975,13 @@ static int netsec_register_mdio(struct netsec_priv *priv, u32 phy_addr)
+ ret = PTR_ERR(priv->phydev);
+ dev_err(priv->dev, "get_phy_device err(%d)\n", ret);
+ priv->phydev = NULL;
++ mdiobus_unregister(bus);
+ return -ENODEV;
+ }
+
+ ret = phy_device_register(priv->phydev);
+ if (ret) {
++ phy_device_free(priv->phydev);
+ mdiobus_unregister(bus);
+ dev_err(priv->dev,
+ "phy_device_register err(%d)\n", ret);
+@@ -2039,7 +2052,7 @@ static int netsec_probe(struct platform_device *pdev)
+
+ mac = device_get_mac_address(&pdev->dev, macbuf, sizeof(macbuf));
+ if (mac)
+- ether_addr_copy(ndev->dev_addr, mac);
++ eth_hw_addr_set(ndev, mac);
+
+ if (priv->eeprom_base &&
+ (!mac || !is_valid_ether_addr(ndev->dev_addr))) {
+diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
+index ae31ed93aaf02..8ff56e2c87746 100644
+--- a/drivers/net/ethernet/socionext/sni_ave.c
++++ b/drivers/net/ethernet/socionext/sni_ave.c
+@@ -1229,6 +1229,8 @@ static int ave_init(struct net_device *ndev)
+
+ phy_support_asym_pause(phydev);
+
++ phydev->mac_managed_pm = true;
++
+ phy_attached_info(phydev);
+
+ return 0;
+@@ -1599,7 +1601,7 @@ static int ave_probe(struct platform_device *pdev)
+
+ ndev->max_mtu = AVE_MAX_ETHFRAME - (ETH_HLEN + ETH_FCS_LEN);
+
+- ret = of_get_mac_address(np, ndev->dev_addr);
++ ret = of_get_ethdev_address(np, ndev);
+ if (ret) {
+ /* if the mac address is invalid, use random mac address */
+ eth_hw_addr_random(ndev);
+@@ -1758,6 +1760,10 @@ static int ave_resume(struct device *dev)
+
+ ave_global_reset(ndev);
+
++ ret = phy_init_hw(ndev->phydev);
++ if (ret)
++ return ret;
++
+ ave_ethtool_get_wol(ndev, &wol);
+ wol.wolopts = priv->wolopts;
+ __ave_ethtool_set_wol(ndev, &wol);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+index cd478d2cd871a..00f6d347eaf75 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
++++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+@@ -57,10 +57,6 @@
+ #define TSE_PCS_USE_SGMII_ENA BIT(0)
+ #define TSE_PCS_IF_USE_SGMII 0x03
+
+-#define SGMII_ADAPTER_CTRL_REG 0x00
+-#define SGMII_ADAPTER_DISABLE 0x0001
+-#define SGMII_ADAPTER_ENABLE 0x0000
+-
+ #define AUTONEGO_LINK_TIMER 20
+
+ static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs)
+@@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
+ unsigned int speed)
+ {
+ void __iomem *tse_pcs_base = pcs->tse_pcs_base;
+- void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
+ u32 val;
+
+- writew(SGMII_ADAPTER_ENABLE,
+- sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+-
+ pcs->autoneg = phy_dev->autoneg;
+
+ if (phy_dev->autoneg == AUTONEG_ENABLE) {
+diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
+index 442812c0a4bdc..694ac25ef426b 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
++++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h
+@@ -10,6 +10,10 @@
+ #include <linux/phy.h>
+ #include <linux/timer.h>
+
++#define SGMII_ADAPTER_CTRL_REG 0x00
++#define SGMII_ADAPTER_ENABLE 0x0000
++#define SGMII_ADAPTER_DISABLE 0x0001
++
+ struct tse_pcs {
+ struct device *dev;
+ void __iomem *tse_pcs_base;
+diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
+index b6d945ea903d4..c113ec56f5b02 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/common.h
++++ b/drivers/net/ethernet/stmicro/stmmac/common.h
+@@ -530,7 +530,6 @@ struct mac_device_info {
+ unsigned int xlgmac;
+ unsigned int num_vlan;
+ u32 vlan_filter[32];
+- unsigned int promisc;
+ bool vlan_fail_q_en;
+ u8 vlan_fail_q;
+ };
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+index bc91fd867dcd4..358fc26f8d1fc 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+@@ -361,6 +361,7 @@ bypass_clk_reset_gpio:
+ data->fix_mac_speed = tegra_eqos_fix_speed;
+ data->init = tegra_eqos_init;
+ data->bsp_priv = eqos;
++ data->sph_disable = 1;
+
+ err = tegra_eqos_init(pdev, eqos);
+ if (err < 0)
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+index 9a6d819b84aea..378b4dd826bb5 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+@@ -273,7 +273,8 @@ static int ingenic_mac_probe(struct platform_device *pdev)
+ mac->tx_delay = tx_delay_ps * 1000;
+ } else {
+ dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps);
+- return -EINVAL;
++ ret = -EINVAL;
++ goto err_remove_config_dt;
+ }
+ }
+
+@@ -283,7 +284,8 @@ static int ingenic_mac_probe(struct platform_device *pdev)
+ mac->rx_delay = rx_delay_ps * 1000;
+ } else {
+ dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps);
+- return -EINVAL;
++ ret = -EINVAL;
++ goto err_remove_config_dt;
+ }
+ }
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+index 8e8778cfbbadd..c9e88df9e8665 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+@@ -298,6 +298,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr,
+ *art_time = ns;
+ }
+
++static int stmmac_cross_ts_isr(struct stmmac_priv *priv)
++{
++ return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE);
++}
++
+ static int intel_crosststamp(ktime_t *device,
+ struct system_counterval_t *system,
+ void *ctx)
+@@ -313,8 +318,6 @@ static int intel_crosststamp(ktime_t *device,
+ u32 num_snapshot;
+ u32 gpio_value;
+ u32 acr_value;
+- int ret;
+- u32 v;
+ int i;
+
+ if (!boot_cpu_has(X86_FEATURE_ART))
+@@ -328,6 +331,8 @@ static int intel_crosststamp(ktime_t *device,
+ if (priv->plat->ext_snapshot_en)
+ return -EBUSY;
+
++ priv->plat->int_snapshot_en = 1;
++
+ mutex_lock(&priv->aux_ts_lock);
+ /* Enable Internal snapshot trigger */
+ acr_value = readl(ptpaddr + PTP_ACR);
+@@ -347,6 +352,7 @@ static int intel_crosststamp(ktime_t *device,
+ break;
+ default:
+ mutex_unlock(&priv->aux_ts_lock);
++ priv->plat->int_snapshot_en = 0;
+ return -EINVAL;
+ }
+ writel(acr_value, ptpaddr + PTP_ACR);
+@@ -368,13 +374,12 @@ static int intel_crosststamp(ktime_t *device,
+ gpio_value |= GMAC_GPO1;
+ writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
+
+- /* Poll for time sync operation done */
+- ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
+- (v & GMAC_INT_TSIE), 100, 10000);
+-
+- if (ret == -ETIMEDOUT) {
+- pr_err("%s: Wait for time sync operation timeout\n", __func__);
+- return ret;
++ /* Time sync done Indication - Interrupt method */
++ if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait,
++ stmmac_cross_ts_isr(priv),
++ HZ / 100)) {
++ priv->plat->int_snapshot_en = 0;
++ return -ETIMEDOUT;
+ }
+
+ num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
+@@ -392,6 +397,7 @@ static int intel_crosststamp(ktime_t *device,
+ }
+
+ system->cycles *= intel_priv->crossts_adj;
++ priv->plat->int_snapshot_en = 0;
+
+ return 0;
+ }
+@@ -454,6 +460,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
+ plat->has_gmac4 = 1;
+ plat->force_sf_dma_mode = 0;
+ plat->tso_en = 1;
++ plat->sph_disable = 1;
+
+ /* Multiplying factor to the clk_eee_i clock time
+ * period to make it closer to 100 ns. This value
+@@ -575,6 +582,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
+
+ plat->has_crossts = true;
+ plat->crosststamp = intel_crosststamp;
++ plat->int_snapshot_en = 0;
+
+ /* Setup MSI vector offset specific to Intel mGbE controller */
+ plat->msi_mac_vec = 29;
+@@ -592,7 +600,6 @@ static int ehl_common_data(struct pci_dev *pdev,
+ {
+ plat->rx_queues_to_use = 8;
+ plat->tx_queues_to_use = 8;
+- plat->clk_ptp_rate = 200000000;
+ plat->use_phy_wol = 1;
+
+ plat->safety_feat_cfg->tsoee = 1;
+@@ -617,6 +624,8 @@ static int ehl_sgmii_data(struct pci_dev *pdev,
+ plat->serdes_powerup = intel_serdes_powerup;
+ plat->serdes_powerdown = intel_serdes_powerdown;
+
++ plat->clk_ptp_rate = 204800000;
++
+ return ehl_common_data(pdev, plat);
+ }
+
+@@ -630,6 +639,8 @@ static int ehl_rgmii_data(struct pci_dev *pdev,
+ plat->bus_id = 1;
+ plat->phy_interface = PHY_INTERFACE_MODE_RGMII;
+
++ plat->clk_ptp_rate = 204800000;
++
+ return ehl_common_data(pdev, plat);
+ }
+
+@@ -646,6 +657,8 @@ static int ehl_pse0_common_data(struct pci_dev *pdev,
+ plat->bus_id = 2;
+ plat->addr64 = 32;
+
++ plat->clk_ptp_rate = 200000000;
++
+ intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ);
+
+ return ehl_common_data(pdev, plat);
+@@ -685,6 +698,8 @@ static int ehl_pse1_common_data(struct pci_dev *pdev,
+ plat->bus_id = 3;
+ plat->addr64 = 32;
+
++ plat->clk_ptp_rate = 200000000;
++
+ intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ);
+
+ return ehl_common_data(pdev, plat);
+@@ -720,7 +735,8 @@ static int tgl_common_data(struct pci_dev *pdev,
+ {
+ plat->rx_queues_to_use = 6;
+ plat->tx_queues_to_use = 4;
+- plat->clk_ptp_rate = 200000000;
++ plat->clk_ptp_rate = 204800000;
++ plat->speed_mode_2500 = intel_speed_mode_2500;
+
+ plat->safety_feat_cfg->tsoee = 1;
+ plat->safety_feat_cfg->mrxpee = 0;
+@@ -740,7 +756,6 @@ static int tgl_sgmii_phy0_data(struct pci_dev *pdev,
+ {
+ plat->bus_id = 1;
+ plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+- plat->speed_mode_2500 = intel_speed_mode_2500;
+ plat->serdes_powerup = intel_serdes_powerup;
+ plat->serdes_powerdown = intel_serdes_powerdown;
+ return tgl_common_data(pdev, plat);
+@@ -755,7 +770,6 @@ static int tgl_sgmii_phy1_data(struct pci_dev *pdev,
+ {
+ plat->bus_id = 2;
+ plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+- plat->speed_mode_2500 = intel_speed_mode_2500;
+ plat->serdes_powerup = intel_serdes_powerup;
+ plat->serdes_powerdown = intel_serdes_powerdown;
+ return tgl_common_data(pdev, plat);
+@@ -1072,13 +1086,11 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
+
+ ret = stmmac_dvr_probe(&pdev->dev, plat, &res);
+ if (ret) {
+- goto err_dvr_probe;
++ goto err_alloc_irq;
+ }
+
+ return 0;
+
+-err_dvr_probe:
+- pci_free_irq_vectors(pdev);
+ err_alloc_irq:
+ clk_disable_unprepare(plat->stmmac_clk);
+ clk_unregister_fixed_rate(plat->stmmac_clk);
+@@ -1099,6 +1111,7 @@ static void intel_eth_pci_remove(struct pci_dev *pdev)
+
+ stmmac_dvr_remove(&pdev->dev);
+
++ clk_disable_unprepare(priv->plat->stmmac_clk);
+ clk_unregister_fixed_rate(priv->plat->stmmac_clk);
+
+ pcim_iounmap_regions(pdev, BIT(0));
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+index ecf759ee1c9f5..2ae59f94afe1d 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+@@ -51,7 +51,6 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
+ struct stmmac_resources res;
+ struct device_node *np;
+ int ret, i, phy_mode;
+- bool mdio = false;
+
+ np = dev_of_node(&pdev->dev);
+
+@@ -69,29 +68,31 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
+ if (!plat)
+ return -ENOMEM;
+
++ plat->mdio_node = of_get_child_by_name(np, "mdio");
+ if (plat->mdio_node) {
+- dev_err(&pdev->dev, "Found MDIO subnode\n");
+- mdio = true;
+- }
++ dev_info(&pdev->dev, "Found MDIO subnode\n");
+
+- if (mdio) {
+ plat->mdio_bus_data = devm_kzalloc(&pdev->dev,
+ sizeof(*plat->mdio_bus_data),
+ GFP_KERNEL);
+- if (!plat->mdio_bus_data)
+- return -ENOMEM;
++ if (!plat->mdio_bus_data) {
++ ret = -ENOMEM;
++ goto err_put_node;
++ }
+ plat->mdio_bus_data->needs_reset = true;
+ }
+
+ plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL);
+- if (!plat->dma_cfg)
+- return -ENOMEM;
++ if (!plat->dma_cfg) {
++ ret = -ENOMEM;
++ goto err_put_node;
++ }
+
+ /* Enable pci device */
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__);
+- return ret;
++ goto err_put_node;
+ }
+
+ /* Get the base address of device */
+@@ -100,7 +101,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
+ continue;
+ ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+ if (ret)
+- return ret;
++ goto err_disable_device;
+ break;
+ }
+
+@@ -111,7 +112,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
+ phy_mode = device_get_phy_mode(&pdev->dev);
+ if (phy_mode < 0) {
+ dev_err(&pdev->dev, "phy_mode not found\n");
+- return phy_mode;
++ ret = phy_mode;
++ goto err_disable_device;
+ }
+
+ plat->phy_interface = phy_mode;
+@@ -128,6 +130,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
+ if (res.irq < 0) {
+ dev_err(&pdev->dev, "IRQ macirq not found\n");
+ ret = -ENODEV;
++ goto err_disable_msi;
+ }
+
+ res.wol_irq = of_irq_get_byname(np, "eth_wake_irq");
+@@ -140,15 +143,31 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
+ if (res.lpi_irq < 0) {
+ dev_err(&pdev->dev, "IRQ eth_lpi not found\n");
+ ret = -ENODEV;
++ goto err_disable_msi;
+ }
+
+- return stmmac_dvr_probe(&pdev->dev, plat, &res);
++ ret = stmmac_dvr_probe(&pdev->dev, plat, &res);
++ if (ret)
++ goto err_disable_msi;
++
++ return ret;
++
++err_disable_msi:
++ pci_disable_msi(pdev);
++err_disable_device:
++ pci_disable_device(pdev);
++err_put_node:
++ of_node_put(plat->mdio_node);
++ return ret;
+ }
+
+ static void loongson_dwmac_remove(struct pci_dev *pdev)
+ {
++ struct net_device *ndev = dev_get_drvdata(&pdev->dev);
++ struct stmmac_priv *priv = netdev_priv(ndev);
+ int i;
+
++ of_node_put(priv->plat->mdio_node);
+ stmmac_dvr_remove(&pdev->dev);
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+@@ -158,6 +177,7 @@ static void loongson_dwmac_remove(struct pci_dev *pdev)
+ break;
+ }
+
++ pci_disable_msi(pdev);
+ pci_disable_device(pdev);
+ }
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+index c7a6588d9398b..e8b507f88fbce 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+@@ -272,11 +272,9 @@ static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac,
+ if (ret)
+ return ret;
+
+- devm_add_action_or_reset(dwmac->dev,
+- (void(*)(void *))clk_disable_unprepare,
+- dwmac->rgmii_tx_clk);
+-
+- return 0;
++ return devm_add_action_or_reset(dwmac->dev,
++ (void(*)(void *))clk_disable_unprepare,
++ clk);
+ }
+
+ static int meson8b_init_rgmii_delays(struct meson8b_dwmac *dwmac)
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+index 5c74b6279d690..d0c7f22a4e55a 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+@@ -113,8 +113,10 @@ static void rgmii_updatel(struct qcom_ethqos *ethqos,
+ rgmii_writel(ethqos, temp, offset);
+ }
+
+-static void rgmii_dump(struct qcom_ethqos *ethqos)
++static void rgmii_dump(void *priv)
+ {
++ struct qcom_ethqos *ethqos = priv;
++
+ dev_dbg(&ethqos->pdev->dev, "Rgmii register dump\n");
+ dev_dbg(&ethqos->pdev->dev, "RGMII_IO_MACRO_CONFIG: %x\n",
+ rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG));
+@@ -499,16 +501,17 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
+
+ plat_dat->bsp_priv = ethqos;
+ plat_dat->fix_mac_speed = ethqos_fix_mac_speed;
++ plat_dat->dump_debug_regs = rgmii_dump;
+ plat_dat->has_gmac4 = 1;
+ plat_dat->pmt = 1;
+ plat_dat->tso_en = of_property_read_bool(np, "snps,tso");
++ if (of_device_is_compatible(np, "qcom,qcs404-ethqos"))
++ plat_dat->rx_clk_runs_in_lpi = 1;
+
+ ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+ if (ret)
+ goto err_clk;
+
+- rgmii_dump(ethqos);
+-
+ return ret;
+
+ err_clk:
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+index 6924a6aacbd53..8394a215725d3 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+@@ -33,6 +33,7 @@ struct rk_gmac_ops {
+ void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed);
+ void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed);
+ void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv);
++ bool regs_valid;
+ u32 regs[];
+ };
+
+@@ -1092,6 +1093,7 @@ static const struct rk_gmac_ops rk3568_ops = {
+ .set_to_rmii = rk3568_set_to_rmii,
+ .set_rgmii_speed = rk3568_set_gmac_speed,
+ .set_rmii_speed = rk3568_set_gmac_speed,
++ .regs_valid = true,
+ .regs = {
+ 0xfe2a0000, /* gmac0 */
+ 0xfe010000, /* gmac1 */
+@@ -1345,9 +1347,6 @@ static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable)
+ int ret;
+ struct device *dev = &bsp_priv->pdev->dev;
+
+- if (!ldo)
+- return 0;
+-
+ if (enable) {
+ ret = regulator_enable(ldo);
+ if (ret)
+@@ -1383,7 +1382,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
+ * to be distinguished.
+ */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- if (res) {
++ if (res && ops->regs_valid) {
+ int i = 0;
+
+ while (ops->regs[i]) {
+@@ -1395,14 +1394,11 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
+ }
+ }
+
+- bsp_priv->regulator = devm_regulator_get_optional(dev, "phy");
++ bsp_priv->regulator = devm_regulator_get(dev, "phy");
+ if (IS_ERR(bsp_priv->regulator)) {
+- if (PTR_ERR(bsp_priv->regulator) == -EPROBE_DEFER) {
+- dev_err(dev, "phy regulator is not available yet, deferred probing\n");
+- return ERR_PTR(-EPROBE_DEFER);
+- }
+- dev_err(dev, "no regulator found\n");
+- bsp_priv->regulator = NULL;
++ ret = PTR_ERR(bsp_priv->regulator);
++ dev_err_probe(dev, ret, "failed to get phy regulator\n");
++ return ERR_PTR(ret);
+ }
+
+ ret = of_property_read_string(dev->of_node, "clock_in_out", &strings);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+index 85208128f135c..6b447d8f0bd8a 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+@@ -18,9 +18,6 @@
+
+ #include "altr_tse_pcs.h"
+
+-#define SGMII_ADAPTER_CTRL_REG 0x00
+-#define SGMII_ADAPTER_DISABLE 0x0001
+-
+ #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0
+ #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1
+ #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2
+@@ -62,14 +59,13 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
+ {
+ struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv;
+ void __iomem *splitter_base = dwmac->splitter_base;
+- void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base;
+ void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base;
+ struct device *dev = dwmac->dev;
+ struct net_device *ndev = dev_get_drvdata(dev);
+ struct phy_device *phy_dev = ndev->phydev;
+ u32 val;
+
+- if ((tse_pcs_base) && (sgmii_adapter_base))
++ if (sgmii_adapter_base)
+ writew(SGMII_ADAPTER_DISABLE,
+ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+
+@@ -93,8 +89,11 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
+ writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
+ }
+
+- if (tse_pcs_base && sgmii_adapter_base)
++ if (phy_dev && sgmii_adapter_base) {
++ writew(SGMII_ADAPTER_ENABLE,
++ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+ tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
++ }
+ }
+
+ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev)
+@@ -485,8 +484,28 @@ static int socfpga_dwmac_resume(struct device *dev)
+ }
+ #endif /* CONFIG_PM_SLEEP */
+
+-static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend,
+- socfpga_dwmac_resume);
++static int __maybe_unused socfpga_dwmac_runtime_suspend(struct device *dev)
++{
++ struct net_device *ndev = dev_get_drvdata(dev);
++ struct stmmac_priv *priv = netdev_priv(ndev);
++
++ stmmac_bus_clks_config(priv, false);
++
++ return 0;
++}
++
++static int __maybe_unused socfpga_dwmac_runtime_resume(struct device *dev)
++{
++ struct net_device *ndev = dev_get_drvdata(dev);
++ struct stmmac_priv *priv = netdev_priv(ndev);
++
++ return stmmac_bus_clks_config(priv, true);
++}
++
++static const struct dev_pm_ops socfpga_dwmac_pm_ops = {
++ SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, socfpga_dwmac_resume)
++ SET_RUNTIME_PM_OPS(socfpga_dwmac_runtime_suspend, socfpga_dwmac_runtime_resume, NULL)
++};
+
+ static const struct socfpga_dwmac_ops socfpga_gen5_ops = {
+ .set_phy_mode = socfpga_gen5_set_phy_mode,
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+index 4422baeed3d89..06e2af9387d7c 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+@@ -756,7 +756,7 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv)
+
+ if (err) {
+ dev_err(priv->device, "EMAC reset timeout\n");
+- return -EFAULT;
++ return err;
+ }
+ return 0;
+ }
+@@ -916,6 +916,7 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv)
+
+ ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn,
+ &gmac->mux_handle, priv, priv->mii);
++ of_node_put(mdio_mux);
+ return ret;
+ }
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+index d046e33b8a297..c27441c08dd6f 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+@@ -22,21 +22,21 @@
+ #define ETHER_CLK_SEL_RMII_CLK_EN BIT(2)
+ #define ETHER_CLK_SEL_RMII_CLK_RST BIT(3)
+ #define ETHER_CLK_SEL_DIV_SEL_2 BIT(4)
+-#define ETHER_CLK_SEL_DIV_SEL_20 BIT(0)
++#define ETHER_CLK_SEL_DIV_SEL_20 0
+ #define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8))
+ #define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9)
+ #define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8)
+-#define ETHER_CLK_SEL_FREQ_SEL_2P5M BIT(0)
+-#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0)
++#define ETHER_CLK_SEL_FREQ_SEL_2P5M 0
++#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN 0
+ #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10)
+ #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11)
+-#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN BIT(0)
++#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN 0
+ #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC BIT(12)
+ #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV BIT(13)
+-#define ETHER_CLK_SEL_TX_CLK_O_TX_I BIT(0)
++#define ETHER_CLK_SEL_TX_CLK_O_TX_I 0
+ #define ETHER_CLK_SEL_TX_CLK_O_RMII_I BIT(14)
+ #define ETHER_CLK_SEL_TX_O_E_N_IN BIT(15)
+-#define ETHER_CLK_SEL_RMII_CLK_SEL_IN BIT(0)
++#define ETHER_CLK_SEL_RMII_CLK_SEL_IN 0
+ #define ETHER_CLK_SEL_RMII_CLK_SEL_RX_C BIT(16)
+
+ #define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN)
+@@ -49,13 +49,15 @@ struct visconti_eth {
+ void __iomem *reg;
+ u32 phy_intf_sel;
+ struct clk *phy_ref_clk;
++ struct device *dev;
+ spinlock_t lock; /* lock to protect register update */
+ };
+
+ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
+ {
+ struct visconti_eth *dwmac = priv;
+- unsigned int val, clk_sel_val;
++ struct net_device *netdev = dev_get_drvdata(dwmac->dev);
++ unsigned int val, clk_sel_val = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwmac->lock, flags);
+@@ -85,7 +87,9 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
+ break;
+ default:
+ /* No bit control */
+- break;
++ netdev_err(netdev, "Unsupported speed request (%d)", speed);
++ spin_unlock_irqrestore(&dwmac->lock, flags);
++ return;
+ }
+
+ writel(val, dwmac->reg + MAC_CTRL_REG);
+@@ -96,31 +100,41 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
+ val |= ETHER_CLK_SEL_TX_O_E_N_IN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
++ /* Set Clock-Mux, Start clock, Set TX_O direction */
+ switch (dwmac->phy_intf_sel) {
+ case ETHER_CONFIG_INTF_RGMII:
+ val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ break;
+ case ETHER_CONFIG_INTF_RMII:
+ val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV |
+- ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN |
++ ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
+ ETHER_CLK_SEL_RMII_CLK_SEL_RX_C;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RMII_CLK_RST;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ break;
+ case ETHER_CONFIG_INTF_MII:
+ default:
+ val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC |
+- ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
+- ETHER_CLK_SEL_RMII_CLK_EN;
++ ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ break;
+ }
+
+- /* Start clock */
+- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+- val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
+- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+-
+- val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
+- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+-
+ spin_unlock_irqrestore(&dwmac->lock, flags);
+ }
+
+@@ -220,6 +234,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
+
+ spin_lock_init(&dwmac->lock);
+ dwmac->reg = stmmac_res.addr;
++ dwmac->dev = &pdev->dev;
+ plat_dat->bsp_priv = dwmac;
+ plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed;
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+index 462ca7ed095a2..12c0e60809f47 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+@@ -150,7 +150,8 @@
+ #define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \
+ GMAC_INT_PCS_ANE)
+
+-#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN)
++#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \
++ GMAC_INT_TSIE)
+
+ enum dwmac4_irq_status {
+ time_stamp_irq = 0x00001000,
+@@ -180,6 +181,7 @@ enum power_event {
+ #define GMAC4_LPI_CTRL_STATUS 0xd0
+ #define GMAC4_LPI_TIMER_CTRL 0xd4
+ #define GMAC4_LPI_ENTRY_TIMER 0xd8
++#define GMAC4_MAC_ONEUS_TIC_COUNTER 0xdc
+
+ /* LPI control and status defines */
+ #define GMAC4_LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable */
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+index b217453689839..29480314a4867 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+@@ -23,8 +23,10 @@
+ static void dwmac4_core_init(struct mac_device_info *hw,
+ struct net_device *dev)
+ {
++ struct stmmac_priv *priv = netdev_priv(dev);
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value = readl(ioaddr + GMAC_CONFIG);
++ u32 clk_rate;
+
+ value |= GMAC_CORE_INIT;
+
+@@ -47,6 +49,10 @@ static void dwmac4_core_init(struct mac_device_info *hw,
+
+ writel(value, ioaddr + GMAC_CONFIG);
+
++ /* Configure LPI 1us counter to number of CSR clock ticks in 1us - 1 */
++ clk_rate = clk_get_rate(priv->plat->stmmac_clk);
++ writel((clk_rate / 1000000) - 1, ioaddr + GMAC4_MAC_ONEUS_TIC_COUNTER);
++
+ /* Enable GMAC interrupts */
+ value = GMAC_INT_DEFAULT_ENABLE;
+
+@@ -58,6 +64,9 @@ static void dwmac4_core_init(struct mac_device_info *hw,
+ value |= GMAC_INT_FPE_EN;
+
+ writel(value, ioaddr + GMAC_INT_EN);
++
++ if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE)
++ init_waitqueue_head(&priv->tstamp_busy_wait);
+ }
+
+ static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
+@@ -219,6 +228,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
+ if (queue == 0 || queue == 4) {
+ value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
+ value |= MTL_RXQ_DMA_Q04MDMACH(chan);
++ } else if (queue > 4) {
++ value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4);
++ value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4);
+ } else {
+ value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
+ value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
+@@ -475,12 +487,6 @@ static int dwmac4_add_hw_vlan_rx_fltr(struct net_device *dev,
+ if (vid > 4095)
+ return -EINVAL;
+
+- if (hw->promisc) {
+- netdev_err(dev,
+- "Adding VLAN in promisc mode not supported\n");
+- return -EPERM;
+- }
+-
+ /* Single Rx VLAN Filter */
+ if (hw->num_vlan == 1) {
+ /* For single VLAN filter, VID 0 means VLAN promiscuous */
+@@ -530,12 +536,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev,
+ {
+ int i, ret = 0;
+
+- if (hw->promisc) {
+- netdev_err(dev,
+- "Deleting VLAN in promisc mode not supported\n");
+- return -EPERM;
+- }
+-
+ /* Single Rx VLAN Filter */
+ if (hw->num_vlan == 1) {
+ if ((hw->vlan_filter[0] & GMAC_VLAN_TAG_VID) == vid) {
+@@ -560,39 +560,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev,
+ return ret;
+ }
+
+-static void dwmac4_vlan_promisc_enable(struct net_device *dev,
+- struct mac_device_info *hw)
+-{
+- void __iomem *ioaddr = hw->pcsr;
+- u32 value;
+- u32 hash;
+- u32 val;
+- int i;
+-
+- /* Single Rx VLAN Filter */
+- if (hw->num_vlan == 1) {
+- dwmac4_write_single_vlan(dev, 0);
+- return;
+- }
+-
+- /* Extended Rx VLAN Filter Enable */
+- for (i = 0; i < hw->num_vlan; i++) {
+- if (hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VEN) {
+- val = hw->vlan_filter[i] & ~GMAC_VLAN_TAG_DATA_VEN;
+- dwmac4_write_vlan_filter(dev, hw, i, val);
+- }
+- }
+-
+- hash = readl(ioaddr + GMAC_VLAN_HASH_TABLE);
+- if (hash & GMAC_VLAN_VLHT) {
+- value = readl(ioaddr + GMAC_VLAN_TAG);
+- if (value & GMAC_VLAN_VTHM) {
+- value &= ~GMAC_VLAN_VTHM;
+- writel(value, ioaddr + GMAC_VLAN_TAG);
+- }
+- }
+-}
+-
+ static void dwmac4_restore_hw_vlan_rx_fltr(struct net_device *dev,
+ struct mac_device_info *hw)
+ {
+@@ -712,22 +679,12 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
+ }
+
+ /* VLAN filtering */
+- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
++ if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en)
++ value &= ~GMAC_PACKET_FILTER_VTFE;
++ else if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ value |= GMAC_PACKET_FILTER_VTFE;
+
+ writel(value, ioaddr + GMAC_PACKET_FILTER);
+-
+- if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en) {
+- if (!hw->promisc) {
+- hw->promisc = 1;
+- dwmac4_vlan_promisc_enable(dev, hw);
+- }
+- } else {
+- if (hw->promisc) {
+- hw->promisc = 0;
+- dwmac4_restore_hw_vlan_rx_fltr(dev, hw);
+- }
+- }
+ }
+
+ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
+@@ -742,6 +699,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
+ if (fc & FLOW_RX) {
+ pr_debug("\tReceive Flow-Control ON\n");
+ flow |= GMAC_RX_FLOW_CTRL_RFE;
++ } else {
++ pr_debug("\tReceive Flow-Control OFF\n");
+ }
+ writel(flow, ioaddr + GMAC_RX_FLOW_CTRL);
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+index 9292a1fab7d32..7011c08d2e012 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+@@ -207,13 +207,15 @@ void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
+ void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable)
+ {
+ u32 value = readl(ioaddr + GMAC_CONFIG);
++ u32 old_val = value;
+
+ if (enable)
+ value |= GMAC_CONFIG_RE | GMAC_CONFIG_TE;
+ else
+ value &= ~(GMAC_CONFIG_TE | GMAC_CONFIG_RE);
+
+- writel(value, ioaddr + GMAC_CONFIG);
++ if (value != old_val)
++ writel(value, ioaddr + GMAC_CONFIG);
+ }
+
+ void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+index 9c2d40f853ed0..e95d35f1e5a0c 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+@@ -186,11 +186,25 @@ static void dwmac5_handle_dma_err(struct net_device *ndev,
+ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
+ struct stmmac_safety_feature_cfg *safety_feat_cfg)
+ {
++ struct stmmac_safety_feature_cfg all_safety_feats = {
++ .tsoee = 1,
++ .mrxpee = 1,
++ .mestee = 1,
++ .mrxee = 1,
++ .mtxee = 1,
++ .epsi = 1,
++ .edpp = 1,
++ .prtyen = 1,
++ .tmouten = 1,
++ };
+ u32 value;
+
+ if (!asp)
+ return -EINVAL;
+
++ if (!safety_feat_cfg)
++ safety_feat_cfg = &all_safety_feats;
++
+ /* 1. Enable Safety Features */
+ value = readl(ioaddr + MTL_ECC_CONTROL);
+ value |= MEEAO; /* MTL ECC Error Addr Status Override */
+@@ -527,9 +541,9 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
+ return 0;
+ }
+
+- val |= PPSCMDx(index, 0x2);
+ val |= TRGTMODSELx(index, 0x2);
+ val |= PPSEN0;
++ writel(val, ioaddr + MAC_PPS_CONTROL);
+
+ writel(cfg->start.tv_sec, ioaddr + MAC_PPSx_TARGET_TIME_SEC(index));
+
+@@ -554,6 +568,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
+ writel(period - 1, ioaddr + MAC_PPSx_WIDTH(index));
+
+ /* Finally, activate it */
++ val |= PPSCMDx(index, 0x2);
+ writel(val, ioaddr + MAC_PPS_CONTROL);
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+index 1914ad698cab2..acd70b9a3173c 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+@@ -150,6 +150,7 @@
+
+ #define NUM_DWMAC100_DMA_REGS 9
+ #define NUM_DWMAC1000_DMA_REGS 23
++#define NUM_DWMAC4_DMA_REGS 27
+
+ void dwmac_enable_dma_transmission(void __iomem *ioaddr);
+ void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+index d1c31200bb911..01d0a14f67520 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+@@ -258,14 +258,18 @@ EXPORT_SYMBOL_GPL(stmmac_set_mac_addr);
+ /* Enable disable MAC RX/TX */
+ void stmmac_set_mac(void __iomem *ioaddr, bool enable)
+ {
+- u32 value = readl(ioaddr + MAC_CTRL_REG);
++ u32 old_val, value;
++
++ old_val = readl(ioaddr + MAC_CTRL_REG);
++ value = old_val;
+
+ if (enable)
+ value |= MAC_ENABLE_RX | MAC_ENABLE_TX;
+ else
+ value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX);
+
+- writel(value, ioaddr + MAC_CTRL_REG);
++ if (value != old_val)
++ writel(value, ioaddr + MAC_CTRL_REG);
+ }
+
+ void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+index 43eead726886a..f03779205ade4 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+@@ -172,6 +172,19 @@ struct stmmac_flow_entry {
+ int is_l4;
+ };
+
++/* Rx Frame Steering */
++enum stmmac_rfs_type {
++ STMMAC_RFS_T_VLAN,
++ STMMAC_RFS_T_MAX,
++};
++
++struct stmmac_rfs_entry {
++ unsigned long cookie;
++ int in_use;
++ int type;
++ int tc;
++};
++
+ struct stmmac_priv {
+ /* Frequently used values are kept adjacent for cache effect */
+ u32 tx_coal_frames[MTL_MAX_TX_QUEUES];
+@@ -252,6 +265,7 @@ struct stmmac_priv {
+ spinlock_t ptp_lock;
+ /* Protects auxiliary snapshot registers from concurrent access. */
+ struct mutex aux_ts_lock;
++ wait_queue_head_t tstamp_busy_wait;
+
+ void __iomem *mmcaddr;
+ void __iomem *ptpaddr;
+@@ -289,6 +303,10 @@ struct stmmac_priv {
+ struct stmmac_tc_entry *tc_entries;
+ unsigned int flow_entries_max;
+ struct stmmac_flow_entry *flow_entries;
++ unsigned int rfs_entries_max[STMMAC_RFS_T_MAX];
++ unsigned int rfs_entries_cnt[STMMAC_RFS_T_MAX];
++ unsigned int rfs_entries_total;
++ struct stmmac_rfs_entry *rfs_entries;
+
+ /* Pulse Per Second output */
+ struct stmmac_pps_cfg pps[STMMAC_PPS_MAX];
+@@ -314,10 +332,11 @@ int stmmac_mdio_reset(struct mii_bus *mii);
+ int stmmac_xpcs_setup(struct mii_bus *mii);
+ void stmmac_set_ethtool_ops(struct net_device *netdev);
+
++int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags);
+ void stmmac_ptp_register(struct stmmac_priv *priv);
+ void stmmac_ptp_unregister(struct stmmac_priv *priv);
+-int stmmac_open(struct net_device *dev);
+-int stmmac_release(struct net_device *dev);
++int stmmac_xdp_open(struct net_device *dev);
++void stmmac_xdp_release(struct net_device *dev);
+ int stmmac_resume(struct device *dev);
+ int stmmac_suspend(struct device *dev);
+ int stmmac_dvr_remove(struct device *dev);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+index d89455803beda..9e8ae4384e4fb 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+@@ -21,10 +21,18 @@
+ #include "dwxgmac2.h"
+
+ #define REG_SPACE_SIZE 0x1060
++#define GMAC4_REG_SPACE_SIZE 0x116C
+ #define MAC100_ETHTOOL_NAME "st_mac100"
+ #define GMAC_ETHTOOL_NAME "st_gmac"
+ #define XGMAC_ETHTOOL_NAME "st_xgmac"
+
++/* Same as DMA_CHAN_BASE_ADDR defined in dwmac4_dma.h
++ *
++ * It is here because dwmac_dma.h and dwmac4_dam.h can not be included at the
++ * same time due to the conflicting macro names.
++ */
++#define GMAC4_DMA_CHAN_BASE_ADDR 0x00001100
++
+ #define ETHTOOL_DMA_OFFSET 55
+
+ struct stmmac_stats {
+@@ -435,6 +443,8 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev)
+
+ if (priv->plat->has_xgmac)
+ return XGMAC_REGSIZE * 4;
++ else if (priv->plat->has_gmac4)
++ return GMAC4_REG_SPACE_SIZE;
+ return REG_SPACE_SIZE;
+ }
+
+@@ -447,8 +457,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
+ stmmac_dump_mac_regs(priv, priv->hw, reg_space);
+ stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space);
+
+- if (!priv->plat->has_xgmac) {
+- /* Copy DMA registers to where ethtool expects them */
++ /* Copy DMA registers to where ethtool expects them */
++ if (priv->plat->has_gmac4) {
++ /* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */
++ memcpy(&reg_space[ETHTOOL_DMA_OFFSET],
++ &reg_space[GMAC4_DMA_CHAN_BASE_ADDR / 4],
++ NUM_DWMAC4_DMA_REGS * 4);
++ } else if (!priv->plat->has_xgmac) {
+ memcpy(&reg_space[ETHTOOL_DMA_OFFSET],
+ &reg_space[DMA_BUS_MODE / 4],
+ NUM_DWMAC1000_DMA_REGS * 4);
+@@ -533,16 +548,16 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
+ p = (char *)priv + offsetof(struct stmmac_priv,
+ xstats.txq_stats[q].tx_pkt_n);
+ for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
+- *data++ = (*(u64 *)p);
+- p += sizeof(u64 *);
++ *data++ = (*(unsigned long *)p);
++ p += sizeof(unsigned long);
+ }
+ }
+ for (q = 0; q < rx_cnt; q++) {
+ p = (char *)priv + offsetof(struct stmmac_priv,
+ xstats.rxq_stats[q].rx_pkt_n);
+ for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
+- *data++ = (*(u64 *)p);
+- p += sizeof(u64 *);
++ *data++ = (*(unsigned long *)p);
++ p += sizeof(unsigned long);
+ }
+ }
+ }
+@@ -785,14 +800,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
+ netdev_warn(priv->dev,
+ "Setting EEE tx-lpi is not supported\n");
+
+- if (priv->hw->xpcs) {
+- ret = xpcs_config_eee(priv->hw->xpcs,
+- priv->plat->mult_fact_100ns,
+- edata->eee_enabled);
+- if (ret)
+- return ret;
+- }
+-
+ if (!edata->eee_enabled)
+ stmmac_disable_eee_mode(priv);
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+index 074e2cdfb0fa6..2c6245b2281ca 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+@@ -47,7 +47,8 @@ static void config_sub_second_increment(void __iomem *ioaddr,
+ if (!(value & PTP_TCR_TSCTRLSSR))
+ data = (data * 1000) / 465;
+
+- data &= PTP_SSIR_SSINC_MASK;
++ if (data > PTP_SSIR_SSINC_MAX)
++ data = PTP_SSIR_SSINC_MAX;
+
+ reg_value = data;
+ if (gmac4)
+@@ -71,9 +72,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
+ writel(value, ioaddr + PTP_TCR);
+
+ /* wait for present system time initialize to complete */
+- return readl_poll_timeout(ioaddr + PTP_TCR, value,
++ return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value,
+ !(value & PTP_TCR_TSINIT),
+- 10000, 100000);
++ 10, 100000);
+ }
+
+ static int config_addend(void __iomem *ioaddr, u32 addend)
+@@ -145,15 +146,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
+
+ static void get_systime(void __iomem *ioaddr, u64 *systime)
+ {
+- u64 ns;
+-
+- /* Get the TSSS value */
+- ns = readl(ioaddr + PTP_STNSR);
+- /* Get the TSS and convert sec time value to nanosecond */
+- ns += readl(ioaddr + PTP_STSR) * 1000000000ULL;
++ u64 ns, sec0, sec1;
++
++ /* Get the TSS value */
++ sec1 = readl_relaxed(ioaddr + PTP_STSR);
++ do {
++ sec0 = sec1;
++ /* Get the TSSS value */
++ ns = readl_relaxed(ioaddr + PTP_STNSR);
++ /* Get the TSS value */
++ sec1 = readl_relaxed(ioaddr + PTP_STSR);
++ } while (sec0 != sec1);
+
+ if (systime)
+- *systime = ns;
++ *systime = ns + (sec1 * 1000000000ULL);
+ }
+
+ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time)
+@@ -174,6 +180,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
+ u64 ptp_time;
+ int i;
+
++ if (priv->plat->int_snapshot_en) {
++ wake_up(&priv->tstamp_busy_wait);
++ return;
++ }
++
+ tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE;
+
+ if (!tsync_int)
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 3d67d1fa36906..a43628dd1f4c2 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -50,6 +50,13 @@
+ #include "dwxgmac2.h"
+ #include "hwif.h"
+
++/* As long as the interface is active, we keep the timestamping counter enabled
++ * with fine resolution and binary rollover. This avoid non-monotonic behavior
++ * (clock jumps) when changing timestamping settings at runtime.
++ */
++#define STMMAC_HWTS_ACTIVE (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | \
++ PTP_TCR_TSCTRLSSR)
++
+ #define STMMAC_ALIGN(x) ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16)
+ #define TSO_MAX_BUFF_SIZE (SZ_16K - 1)
+
+@@ -393,7 +400,7 @@ static void stmmac_lpi_entry_timer_config(struct stmmac_priv *priv, bool en)
+ * Description: this function is to verify and enter in LPI mode in case of
+ * EEE.
+ */
+-static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
++static int stmmac_enable_eee_mode(struct stmmac_priv *priv)
+ {
+ u32 tx_cnt = priv->plat->tx_queues_to_use;
+ u32 queue;
+@@ -403,13 +410,14 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
+ struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+ if (tx_q->dirty_tx != tx_q->cur_tx)
+- return; /* still unfinished work */
++ return -EBUSY; /* still unfinished work */
+ }
+
+ /* Check and enter in LPI mode */
+ if (!priv->tx_path_in_lpi_mode)
+ stmmac_set_eee_mode(priv, priv->hw,
+ priv->plat->en_tx_lpi_clockgating);
++ return 0;
+ }
+
+ /**
+@@ -441,8 +449,8 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
+ {
+ struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer);
+
+- stmmac_enable_eee_mode(priv);
+- mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
++ if (stmmac_enable_eee_mode(priv))
++ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
+ }
+
+ /**
+@@ -511,6 +519,14 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
+ return true;
+ }
+
++static inline u32 stmmac_cdc_adjust(struct stmmac_priv *priv)
++{
++ /* Correct the clk domain crossing(CDC) error */
++ if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate)
++ return (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate;
++ return 0;
++}
++
+ /* stmmac_get_tx_hwtstamp - get HW TX timestamps
+ * @priv: driver private structure
+ * @p : descriptor pointer
+@@ -524,7 +540,6 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
+ {
+ struct skb_shared_hwtstamps shhwtstamp;
+ bool found = false;
+- s64 adjust = 0;
+ u64 ns = 0;
+
+ if (!priv->hwts_tx_en)
+@@ -543,12 +558,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
+ }
+
+ if (found) {
+- /* Correct the clk domain crossing(CDC) error */
+- if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+- adjust += -(2 * (NSEC_PER_SEC /
+- priv->plat->clk_ptp_rate));
+- ns += adjust;
+- }
++ ns -= stmmac_cdc_adjust(priv);
+
+ memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
+ shhwtstamp.hwtstamp = ns_to_ktime(ns);
+@@ -573,7 +583,6 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
+ {
+ struct skb_shared_hwtstamps *shhwtstamp = NULL;
+ struct dma_desc *desc = p;
+- u64 adjust = 0;
+ u64 ns = 0;
+
+ if (!priv->hwts_rx_en)
+@@ -586,11 +595,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
+ if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
+ stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
+
+- /* Correct the clk domain crossing(CDC) error */
+- if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+- adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate);
+- ns -= adjust;
+- }
++ ns -= stmmac_cdc_adjust(priv);
+
+ netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
+ shhwtstamp = skb_hwtstamps(skb);
+@@ -616,8 +621,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+ {
+ struct stmmac_priv *priv = netdev_priv(dev);
+ struct hwtstamp_config config;
+- struct timespec64 now;
+- u64 temp = 0;
+ u32 ptp_v2 = 0;
+ u32 tstamp_all = 0;
+ u32 ptp_over_ipv4_udp = 0;
+@@ -626,11 +629,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+ u32 snap_type_sel = 0;
+ u32 ts_master_en = 0;
+ u32 ts_event_en = 0;
+- u32 sec_inc = 0;
+- u32 value = 0;
+- bool xmac;
+-
+- xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+
+ if (!(priv->dma_cap.time_stamp || priv->adv_ts)) {
+ netdev_alert(priv->dev, "No support for HW time stamping\n");
+@@ -792,42 +790,17 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+ priv->hwts_rx_en = ((config.rx_filter == HWTSTAMP_FILTER_NONE) ? 0 : 1);
+ priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON;
+
+- if (!priv->hwts_tx_en && !priv->hwts_rx_en)
+- stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0);
+- else {
+- value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR |
+- tstamp_all | ptp_v2 | ptp_over_ethernet |
+- ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
+- ts_master_en | snap_type_sel);
+- stmmac_config_hw_tstamping(priv, priv->ptpaddr, value);
+-
+- /* program Sub Second Increment reg */
+- stmmac_config_sub_second_increment(priv,
+- priv->ptpaddr, priv->plat->clk_ptp_rate,
+- xmac, &sec_inc);
+- temp = div_u64(1000000000ULL, sec_inc);
+-
+- /* Store sub second increment and flags for later use */
+- priv->sub_second_inc = sec_inc;
+- priv->systime_flags = value;
+-
+- /* calculate default added value:
+- * formula is :
+- * addend = (2^32)/freq_div_ratio;
+- * where, freq_div_ratio = 1e9ns/sec_inc
+- */
+- temp = (u64)(temp << 32);
+- priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
+- stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
+-
+- /* initialize system time */
+- ktime_get_real_ts64(&now);
++ priv->systime_flags = STMMAC_HWTS_ACTIVE;
+
+- /* lower 32 bits of tv_sec are safe until y2106 */
+- stmmac_init_systime(priv, priv->ptpaddr,
+- (u32)now.tv_sec, now.tv_nsec);
++ if (priv->hwts_tx_en || priv->hwts_rx_en) {
++ priv->systime_flags |= tstamp_all | ptp_v2 |
++ ptp_over_ethernet | ptp_over_ipv6_udp |
++ ptp_over_ipv4_udp | ts_event_en |
++ ts_master_en | snap_type_sel;
+ }
+
++ stmmac_config_hw_tstamping(priv, priv->ptpaddr, priv->systime_flags);
++
+ memcpy(&priv->tstamp_config, &config, sizeof(config));
+
+ return copy_to_user(ifr->ifr_data, &config,
+@@ -855,6 +828,57 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+ sizeof(*config)) ? -EFAULT : 0;
+ }
+
++/**
++ * stmmac_init_tstamp_counter - init hardware timestamping counter
++ * @priv: driver private structure
++ * @systime_flags: timestamping flags
++ * Description:
++ * Initialize hardware counter for packet timestamping.
++ * This is valid as long as the interface is open and not suspended.
++ * Will be rerun after resuming from suspend, case in which the timestamping
++ * flags updated by stmmac_hwtstamp_set() also need to be restored.
++ */
++int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
++{
++ bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
++ struct timespec64 now;
++ u32 sec_inc = 0;
++ u64 temp = 0;
++
++ if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
++ return -EOPNOTSUPP;
++
++ stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
++ priv->systime_flags = systime_flags;
++
++ /* program Sub Second Increment reg */
++ stmmac_config_sub_second_increment(priv, priv->ptpaddr,
++ priv->plat->clk_ptp_rate,
++ xmac, &sec_inc);
++ temp = div_u64(1000000000ULL, sec_inc);
++
++ /* Store sub second increment for later use */
++ priv->sub_second_inc = sec_inc;
++
++ /* calculate default added value:
++ * formula is :
++ * addend = (2^32)/freq_div_ratio;
++ * where, freq_div_ratio = 1e9ns/sec_inc
++ */
++ temp = (u64)(temp << 32);
++ priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
++ stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
++
++ /* initialize system time */
++ ktime_get_real_ts64(&now);
++
++ /* lower 32 bits of tv_sec are safe until y2106 */
++ stmmac_init_systime(priv, priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(stmmac_init_tstamp_counter);
++
+ /**
+ * stmmac_init_ptp - init PTP
+ * @priv: driver private structure
+@@ -865,9 +889,14 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+ static int stmmac_init_ptp(struct stmmac_priv *priv)
+ {
+ bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
++ int ret;
+
+- if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
+- return -EOPNOTSUPP;
++ if (priv->plat->ptp_clk_freq_config)
++ priv->plat->ptp_clk_freq_config(priv);
++
++ ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE);
++ if (ret)
++ return ret;
+
+ priv->adv_ts = 0;
+ /* Check if adv_ts can be enabled for dwmac 4.x / xgmac core */
+@@ -887,8 +916,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
+ priv->hwts_tx_en = 0;
+ priv->hwts_rx_en = 0;
+
+- stmmac_ptp_register(priv);
+-
+ return 0;
+ }
+
+@@ -1056,10 +1083,10 @@ static void stmmac_mac_link_up(struct phylink_config *config,
+ bool tx_pause, bool rx_pause)
+ {
+ struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+- u32 ctrl;
++ u32 old_ctrl, ctrl;
+
+- ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
+- ctrl &= ~priv->hw->link.speed_mask;
++ old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
++ ctrl = old_ctrl & ~priv->hw->link.speed_mask;
+
+ if (interface == PHY_INTERFACE_MODE_USXGMII) {
+ switch (speed) {
+@@ -1131,14 +1158,24 @@ static void stmmac_mac_link_up(struct phylink_config *config,
+ ctrl |= priv->hw->link.duplex;
+
+ /* Flow Control operation */
+- if (tx_pause && rx_pause)
+- stmmac_mac_flow_ctrl(priv, duplex);
++ if (rx_pause && tx_pause)
++ priv->flow_ctrl = FLOW_AUTO;
++ else if (rx_pause && !tx_pause)
++ priv->flow_ctrl = FLOW_RX;
++ else if (!rx_pause && tx_pause)
++ priv->flow_ctrl = FLOW_TX;
++ else
++ priv->flow_ctrl = FLOW_OFF;
++
++ stmmac_mac_flow_ctrl(priv, duplex);
+
+- writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
++ if (ctrl != old_ctrl)
++ writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
+
+ stmmac_mac_set(priv, priv->ioaddr, true);
+ if (phy && priv->dma_cap.eee) {
+- priv->eee_active = phy_init_eee(phy, 1) >= 0;
++ priv->eee_active =
++ phy_init_eee(phy, !priv->plat->rx_clk_runs_in_lpi) >= 0;
+ priv->eee_enabled = stmmac_eee_init(priv);
+ priv->tx_lpi_enabled = priv->eee_enabled;
+ stmmac_set_eee_pls(priv, priv->hw, true);
+@@ -1206,6 +1243,11 @@ static int stmmac_init_phy(struct net_device *dev)
+ int addr = priv->plat->phy_addr;
+ struct phy_device *phydev;
+
++ if (addr < 0) {
++ netdev_err(priv->dev, "no phy found\n");
++ return -ENODEV;
++ }
++
+ phydev = mdiobus_get_phy(priv->mii, addr);
+ if (!phydev) {
+ netdev_err(priv->dev, "no phy at addr %d\n", addr);
+@@ -1220,6 +1262,7 @@ static int stmmac_init_phy(struct net_device *dev)
+
+ phylink_ethtool_get_wol(priv->phylink, &wol);
+ device_set_wakeup_capable(priv->device, !!wol.supported);
++ device_set_wakeup_enable(priv->device, !!wol.wolopts);
+ }
+
+ return ret;
+@@ -2232,6 +2275,23 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
+ stmmac_stop_tx(priv, priv->ioaddr, chan);
+ }
+
++static void stmmac_enable_all_dma_irq(struct stmmac_priv *priv)
++{
++ u32 rx_channels_count = priv->plat->rx_queues_to_use;
++ u32 tx_channels_count = priv->plat->tx_queues_to_use;
++ u32 dma_csr_ch = max(rx_channels_count, tx_channels_count);
++ u32 chan;
++
++ for (chan = 0; chan < dma_csr_ch; chan++) {
++ struct stmmac_channel *ch = &priv->channel[chan];
++ unsigned long flags;
++
++ spin_lock_irqsave(&ch->lock, flags);
++ stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
++ spin_unlock_irqrestore(&ch->lock, flags);
++ }
++}
++
+ /**
+ * stmmac_start_all_dma - start all RX and TX DMA channels
+ * @priv: driver private structure
+@@ -2602,8 +2662,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
+
+ if (priv->eee_enabled && !priv->tx_path_in_lpi_mode &&
+ priv->eee_sw_timer_en) {
+- stmmac_enable_eee_mode(priv);
+- mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
++ if (stmmac_enable_eee_mode(priv))
++ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
+ }
+
+ /* We still have pending packets, let's call for a new scheduling */
+@@ -2867,8 +2927,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
+ stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
+
+ /* DMA CSR Channel configuration */
+- for (chan = 0; chan < dma_csr_ch; chan++)
++ for (chan = 0; chan < dma_csr_ch; chan++) {
+ stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
++ stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
++ }
+
+ /* DMA RX Channel Configuration */
+ for (chan = 0; chan < rx_channels_count; chan++) {
+@@ -3203,7 +3265,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
+ /**
+ * stmmac_hw_setup - setup mac in a usable state.
+ * @dev : pointer to the device structure.
+- * @init_ptp: initialize PTP if set
++ * @ptp_register: register PTP if set
+ * Description:
+ * this is the main function to setup the HW in a usable state because the
+ * dma engine is reset, the core registers are configured (e.g. AXI,
+@@ -3213,7 +3275,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
+ * 0 on success and an appropriate (-)ve integer as defined in errno.h
+ * file on failure.
+ */
+-static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
++static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
+ {
+ struct stmmac_priv *priv = netdev_priv(dev);
+ u32 rx_cnt = priv->plat->rx_queues_to_use;
+@@ -3270,18 +3332,22 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
+
+ stmmac_mmc_setup(priv);
+
+- if (init_ptp) {
++ if (ptp_register) {
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0)
+- netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
+-
+- ret = stmmac_init_ptp(priv);
+- if (ret == -EOPNOTSUPP)
+- netdev_warn(priv->dev, "PTP not supported by HW\n");
+- else if (ret)
+- netdev_warn(priv->dev, "PTP init failed\n");
++ netdev_warn(priv->dev,
++ "failed to enable PTP reference clock: %pe\n",
++ ERR_PTR(ret));
+ }
+
++ ret = stmmac_init_ptp(priv);
++ if (ret == -EOPNOTSUPP)
++ netdev_warn(priv->dev, "PTP not supported by HW\n");
++ else if (ret)
++ netdev_warn(priv->dev, "PTP init failed\n");
++ else if (ptp_register)
++ stmmac_ptp_register(priv);
++
+ priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS;
+
+ /* Convert the timer from msec to usec */
+@@ -3635,7 +3701,7 @@ static int stmmac_request_irq(struct net_device *dev)
+ * 0 on success and an appropriate (-)ve integer as defined in errno.h
+ * file on failure.
+ */
+-int stmmac_open(struct net_device *dev)
++static int stmmac_open(struct net_device *dev)
+ {
+ struct stmmac_priv *priv = netdev_priv(dev);
+ int mode = priv->plat->phy_interface;
+@@ -3706,6 +3772,15 @@ int stmmac_open(struct net_device *dev)
+ goto init_error;
+ }
+
++ if (priv->plat->serdes_powerup) {
++ ret = priv->plat->serdes_powerup(dev, priv->plat->bsp_priv);
++ if (ret < 0) {
++ netdev_err(priv->dev, "%s: Serdes powerup failed\n",
++ __func__);
++ goto init_error;
++ }
++ }
++
+ ret = stmmac_hw_setup(dev, true);
+ if (ret < 0) {
+ netdev_err(priv->dev, "%s: Hw setup failed\n", __func__);
+@@ -3724,6 +3799,7 @@ int stmmac_open(struct net_device *dev)
+
+ stmmac_enable_all_queues(priv);
+ netif_tx_start_all_queues(priv->dev);
++ stmmac_enable_all_dma_irq(priv);
+
+ return 0;
+
+@@ -3759,11 +3835,13 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
+ * Description:
+ * This is the stop entry point of the driver.
+ */
+-int stmmac_release(struct net_device *dev)
++static int stmmac_release(struct net_device *dev)
+ {
+ struct stmmac_priv *priv = netdev_priv(dev);
+ u32 chan;
+
++ netif_tx_disable(dev);
++
+ if (device_may_wakeup(priv->device))
+ phylink_speed_down(priv->phylink, false);
+ /* Stop and disconnect the PHY */
+@@ -3792,6 +3870,10 @@ int stmmac_release(struct net_device *dev)
+ /* Disable the MAC Rx/Tx */
+ stmmac_mac_set(priv, priv->ioaddr, false);
+
++ /* Powerdown Serdes if there is */
++ if (priv->plat->serdes_powerdown)
++ priv->plat->serdes_powerdown(dev, priv->plat->bsp_priv);
++
+ netif_carrier_off(dev);
+
+ stmmac_release_ptp(priv);
+@@ -5499,8 +5581,6 @@ static int stmmac_set_features(struct net_device *netdev,
+ netdev_features_t features)
+ {
+ struct stmmac_priv *priv = netdev_priv(netdev);
+- bool sph_en;
+- u32 chan;
+
+ /* Keep the COE Type in case of csum is supporting */
+ if (features & NETIF_F_RXCSUM)
+@@ -5512,10 +5592,13 @@ static int stmmac_set_features(struct net_device *netdev,
+ */
+ stmmac_rx_ipc(priv, priv->hw);
+
+- sph_en = (priv->hw->rx_csum > 0) && priv->sph;
++ if (priv->sph_cap) {
++ bool sph_en = (priv->hw->rx_csum > 0) && priv->sph;
++ u32 chan;
+
+- for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
+- stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
++ for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
++ stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
++ }
+
+ return 0;
+ }
+@@ -6200,6 +6283,10 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
+ bool is_double = false;
+ int ret;
+
++ ret = pm_runtime_resume_and_get(priv->device);
++ if (ret < 0)
++ return ret;
++
+ if (be16_to_cpu(proto) == ETH_P_8021AD)
+ is_double = true;
+
+@@ -6207,16 +6294,18 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
+ ret = stmmac_vlan_update(priv, is_double);
+ if (ret) {
+ clear_bit(vid, priv->active_vlans);
+- return ret;
++ goto err_pm_put;
+ }
+
+ if (priv->hw->num_vlan) {
+ ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
+ if (ret)
+- return ret;
++ goto err_pm_put;
+ }
++err_pm_put:
++ pm_runtime_put(priv->device);
+
+- return 0;
++ return ret;
+ }
+
+ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
+@@ -6421,6 +6510,146 @@ void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue)
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+
++void stmmac_xdp_release(struct net_device *dev)
++{
++ struct stmmac_priv *priv = netdev_priv(dev);
++ u32 chan;
++
++ /* Ensure tx function is not running */
++ netif_tx_disable(dev);
++
++ /* Disable NAPI process */
++ stmmac_disable_all_queues(priv);
++
++ for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
++ hrtimer_cancel(&priv->tx_queue[chan].txtimer);
++
++ /* Free the IRQ lines */
++ stmmac_free_irq(dev, REQ_IRQ_ERR_ALL, 0);
++
++ /* Stop TX/RX DMA channels */
++ stmmac_stop_all_dma(priv);
++
++ /* Release and free the Rx/Tx resources */
++ free_dma_desc_resources(priv);
++
++ /* Disable the MAC Rx/Tx */
++ stmmac_mac_set(priv, priv->ioaddr, false);
++
++ /* set trans_start so we don't get spurious
++ * watchdogs during reset
++ */
++ netif_trans_update(dev);
++ netif_carrier_off(dev);
++}
++
++int stmmac_xdp_open(struct net_device *dev)
++{
++ struct stmmac_priv *priv = netdev_priv(dev);
++ u32 rx_cnt = priv->plat->rx_queues_to_use;
++ u32 tx_cnt = priv->plat->tx_queues_to_use;
++ u32 dma_csr_ch = max(rx_cnt, tx_cnt);
++ struct stmmac_rx_queue *rx_q;
++ struct stmmac_tx_queue *tx_q;
++ u32 buf_size;
++ bool sph_en;
++ u32 chan;
++ int ret;
++
++ ret = alloc_dma_desc_resources(priv);
++ if (ret < 0) {
++ netdev_err(dev, "%s: DMA descriptors allocation failed\n",
++ __func__);
++ goto dma_desc_error;
++ }
++
++ ret = init_dma_desc_rings(dev, GFP_KERNEL);
++ if (ret < 0) {
++ netdev_err(dev, "%s: DMA descriptors initialization failed\n",
++ __func__);
++ goto init_error;
++ }
++
++ /* DMA CSR Channel configuration */
++ for (chan = 0; chan < dma_csr_ch; chan++) {
++ stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
++ stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
++ }
++
++ /* Adjust Split header */
++ sph_en = (priv->hw->rx_csum > 0) && priv->sph;
++
++ /* DMA RX Channel Configuration */
++ for (chan = 0; chan < rx_cnt; chan++) {
++ rx_q = &priv->rx_queue[chan];
++
++ stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
++ rx_q->dma_rx_phy, chan);
++
++ rx_q->rx_tail_addr = rx_q->dma_rx_phy +
++ (rx_q->buf_alloc_num *
++ sizeof(struct dma_desc));
++ stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
++ rx_q->rx_tail_addr, chan);
++
++ if (rx_q->xsk_pool && rx_q->buf_alloc_num) {
++ buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool);
++ stmmac_set_dma_bfsize(priv, priv->ioaddr,
++ buf_size,
++ rx_q->queue_index);
++ } else {
++ stmmac_set_dma_bfsize(priv, priv->ioaddr,
++ priv->dma_buf_sz,
++ rx_q->queue_index);
++ }
++
++ stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
++ }
++
++ /* DMA TX Channel Configuration */
++ for (chan = 0; chan < tx_cnt; chan++) {
++ tx_q = &priv->tx_queue[chan];
++
++ stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
++ tx_q->dma_tx_phy, chan);
++
++ tx_q->tx_tail_addr = tx_q->dma_tx_phy;
++ stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
++ tx_q->tx_tail_addr, chan);
++
++ hrtimer_init(&tx_q->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++ tx_q->txtimer.function = stmmac_tx_timer;
++ }
++
++ /* Enable the MAC Rx/Tx */
++ stmmac_mac_set(priv, priv->ioaddr, true);
++
++ /* Start Rx & Tx DMA Channels */
++ stmmac_start_all_dma(priv);
++
++ ret = stmmac_request_irq(dev);
++ if (ret)
++ goto irq_error;
++
++ /* Enable NAPI process*/
++ stmmac_enable_all_queues(priv);
++ netif_carrier_on(dev);
++ netif_tx_start_all_queues(dev);
++ stmmac_enable_all_dma_irq(priv);
++
++ return 0;
++
++irq_error:
++ for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
++ hrtimer_cancel(&priv->tx_queue[chan].txtimer);
++
++ stmmac_hw_teardown(dev);
++init_error:
++ free_dma_desc_resources(priv);
++dma_desc_error:
++ return ret;
++}
++
+ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
+ {
+ struct stmmac_priv *priv = netdev_priv(dev);
+@@ -6670,7 +6899,7 @@ static void stmmac_napi_del(struct net_device *dev)
+ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
+ {
+ struct stmmac_priv *priv = netdev_priv(dev);
+- int ret = 0;
++ int ret = 0, i;
+
+ if (netif_running(dev))
+ stmmac_release(dev);
+@@ -6679,6 +6908,10 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
+
+ priv->plat->rx_queues_to_use = rx_cnt;
+ priv->plat->tx_queues_to_use = tx_cnt;
++ if (!netif_is_rxfh_configured(dev))
++ for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
++ priv->rss.table[i] = ethtool_rxfh_indir_default(i,
++ rx_cnt);
+
+ stmmac_napi_add(dev);
+
+@@ -6830,7 +7063,8 @@ int stmmac_dvr_probe(struct device *device,
+ priv->wq = create_singlethread_workqueue("stmmac_wq");
+ if (!priv->wq) {
+ dev_err(priv->device, "failed to create workqueue\n");
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto error_wq_init;
+ }
+
+ INIT_WORK(&priv->service_task, stmmac_service_task);
+@@ -6889,7 +7123,7 @@ int stmmac_dvr_probe(struct device *device,
+ dev_info(priv->device, "TSO feature enabled\n");
+ }
+
+- if (priv->dma_cap.sphen) {
++ if (priv->dma_cap.sphen && !priv->plat->sph_disable) {
+ ndev->hw_features |= NETIF_F_GRO;
+ priv->sph_cap = true;
+ priv->sph = priv->sph_cap;
+@@ -7032,18 +7266,13 @@ int stmmac_dvr_probe(struct device *device,
+ goto error_netdev_register;
+ }
+
+- if (priv->plat->serdes_powerup) {
+- ret = priv->plat->serdes_powerup(ndev,
+- priv->plat->bsp_priv);
+-
+- if (ret < 0)
+- goto error_serdes_powerup;
+- }
+-
+ #ifdef CONFIG_DEBUG_FS
+ stmmac_init_fs(ndev);
+ #endif
+
++ if (priv->plat->dump_debug_regs)
++ priv->plat->dump_debug_regs(priv->plat->bsp_priv);
++
+ /* Let pm_runtime_put() disable the clocks.
+ * If CONFIG_PM is not enabled, the clocks will stay powered.
+ */
+@@ -7051,8 +7280,6 @@ int stmmac_dvr_probe(struct device *device,
+
+ return ret;
+
+-error_serdes_powerup:
+- unregister_netdev(ndev);
+ error_netdev_register:
+ phylink_destroy(priv->phylink);
+ error_xpcs_setup:
+@@ -7064,6 +7291,7 @@ error_mdio_register:
+ stmmac_napi_del(ndev);
+ error_hw_init:
+ destroy_workqueue(priv->wq);
++error_wq_init:
+ bitmap_free(priv->af_xdp_zc_qps);
+
+ return ret;
+@@ -7083,17 +7311,13 @@ int stmmac_dvr_remove(struct device *dev)
+
+ netdev_info(priv->dev, "%s: removing driver", __func__);
+
++ pm_runtime_get_sync(dev);
++
+ stmmac_stop_all_dma(priv);
+ stmmac_mac_set(priv, priv->ioaddr, false);
+ netif_carrier_off(ndev);
+ unregister_netdev(ndev);
+
+- /* Serdes power down needs to happen after VLAN filter
+- * is deleted that is triggered by unregister_netdev().
+- */
+- if (priv->plat->serdes_powerdown)
+- priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv);
+-
+ #ifdef CONFIG_DEBUG_FS
+ stmmac_exit_fs(ndev);
+ #endif
+@@ -7101,8 +7325,6 @@ int stmmac_dvr_remove(struct device *dev)
+ if (priv->plat->stmmac_rst)
+ reset_control_assert(priv->plat->stmmac_rst);
+ reset_control_assert(priv->plat->stmmac_ahb_rst);
+- pm_runtime_put(dev);
+- pm_runtime_disable(dev);
+ if (priv->hw->pcs != STMMAC_PCS_TBI &&
+ priv->hw->pcs != STMMAC_PCS_RTBI)
+ stmmac_mdio_unregister(ndev);
+@@ -7110,6 +7332,9 @@ int stmmac_dvr_remove(struct device *dev)
+ mutex_destroy(&priv->lock);
+ bitmap_free(priv->af_xdp_zc_qps);
+
++ pm_runtime_disable(dev);
++ pm_runtime_put_noidle(dev);
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(stmmac_dvr_remove);
+@@ -7280,6 +7505,7 @@ int stmmac_resume(struct device *dev)
+ stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw);
+
+ stmmac_enable_all_queues(priv);
++ stmmac_enable_all_dma_irq(priv);
+
+ mutex_unlock(&priv->lock);
+ rtnl_unlock();
+@@ -7296,7 +7522,7 @@ static int __init stmmac_cmdline_opt(char *str)
+ char *opt;
+
+ if (!str || !*str)
+- return -EINVAL;
++ return 1;
+ while ((opt = strsep(&str, ",")) != NULL) {
+ if (!strncmp(opt, "debug:", 6)) {
+ if (kstrtoint(opt + 6, 0, &debug))
+@@ -7327,11 +7553,11 @@ static int __init stmmac_cmdline_opt(char *str)
+ goto err;
+ }
+ }
+- return 0;
++ return 1;
+
+ err:
+ pr_err("%s: ERROR broken module parameter conversion", __func__);
+- return -EINVAL;
++ return 1;
+ }
+
+ __setup("stmmaceth=", stmmac_cmdline_opt);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+index fcf17d8a0494b..644bb54f5f020 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+@@ -181,7 +181,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
+ return -ENOMEM;
+
+ /* Enable pci device */
+- ret = pci_enable_device(pdev);
++ ret = pcim_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n",
+ __func__);
+@@ -241,8 +241,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev)
+ pcim_iounmap_regions(pdev, BIT(i));
+ break;
+ }
+-
+- pci_disable_device(pdev);
+ }
+
+ static int __maybe_unused stmmac_pci_suspend(struct device *dev)
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+index 232ac98943cd0..e12df9d99089f 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+@@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
+
+ axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en");
+ axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm");
+- axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe");
+- axi->axi_fb = of_property_read_bool(np, "snps,axi_fb");
+- axi->axi_mb = of_property_read_bool(np, "snps,axi_mb");
+- axi->axi_rb = of_property_read_bool(np, "snps,axi_rb");
++ axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe");
++ axi->axi_fb = of_property_read_bool(np, "snps,fb");
++ axi->axi_mb = of_property_read_bool(np, "snps,mb");
++ axi->axi_rb = of_property_read_bool(np, "snps,rb");
+
+ if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt))
+ axi->axi_wr_osr_lmt = 1;
+@@ -431,8 +431,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
+ plat->phylink_node = np;
+
+ /* Get max speed of operation from device tree */
+- if (of_property_read_u32(np, "max-speed", &plat->max_speed))
+- plat->max_speed = -1;
++ of_property_read_u32(np, "max-speed", &plat->max_speed);
+
+ plat->bus_id = of_alias_get_id(np, "ethernet");
+ if (plat->bus_id < 0)
+@@ -559,7 +558,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
+ dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst");
+
+ plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
+- if (plat->force_thresh_dma_mode) {
++ if (plat->force_thresh_dma_mode && plat->force_sf_dma_mode) {
+ plat->force_sf_dma_mode = 0;
+ dev_warn(&pdev->dev,
+ "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n");
+@@ -816,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
+ if (ret)
+ return ret;
+
+- clk_prepare_enable(priv->plat->clk_ptp_ref);
++ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
++ if (ret < 0) {
++ netdev_warn(priv->dev,
++ "failed to enable PTP reference clock: %pe\n",
++ ERR_PTR(ret));
++ return ret;
++ }
+ }
+
+ return 0;
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+index 580cc035536bd..e6221c33572d4 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+@@ -102,7 +102,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta)
+ time.tv_nsec = priv->plat->est->btr_reserve[0];
+ time.tv_sec = priv->plat->est->btr_reserve[1];
+ basetime = timespec64_to_ktime(time);
+- cycle_time = priv->plat->est->ctr[1] * NSEC_PER_SEC +
++ cycle_time = (u64)priv->plat->est->ctr[1] * NSEC_PER_SEC +
+ priv->plat->est->ctr[0];
+ time = stmmac_calc_tas_basetime(basetime,
+ current_time_ns,
+@@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
+ struct stmmac_priv *priv =
+ container_of(ptp, struct stmmac_priv, ptp_clock_ops);
+ void __iomem *ptpaddr = priv->ptpaddr;
+- void __iomem *ioaddr = priv->hw->pcsr;
+ struct stmmac_pps_cfg *cfg;
+- u32 intr_value, acr_value;
+ int ret = -EOPNOTSUPP;
+ unsigned long flags;
++ u32 acr_value;
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_PEROUT:
+@@ -213,23 +212,17 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
+ netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n",
+ priv->plat->ext_snapshot_num >>
+ PTP_ACR_ATSEN_SHIFT);
+- /* Enable Timestamp Interrupt */
+- intr_value = readl(ioaddr + GMAC_INT_EN);
+- intr_value |= GMAC_INT_TSIE;
+- writel(intr_value, ioaddr + GMAC_INT_EN);
+-
+ } else {
+ netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n",
+ priv->plat->ext_snapshot_num >>
+ PTP_ACR_ATSEN_SHIFT);
+- /* Disable Timestamp Interrupt */
+- intr_value = readl(ioaddr + GMAC_INT_EN);
+- intr_value &= ~GMAC_INT_TSIE;
+- writel(intr_value, ioaddr + GMAC_INT_EN);
+ }
+ writel(acr_value, ptpaddr + PTP_ACR);
+ mutex_unlock(&priv->aux_ts_lock);
+- ret = 0;
++ /* wait for auxts fifo clear to finish */
++ ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value,
++ !(acr_value & PTP_ACR_ATSFC),
++ 10, 10000);
+ break;
+
+ default:
+@@ -297,9 +290,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
+ {
+ int i;
+
+- if (priv->plat->ptp_clk_freq_config)
+- priv->plat->ptp_clk_freq_config(priv);
+-
+ for (i = 0; i < priv->dma_cap.pps_out_num; i++) {
+ if (i >= STMMAC_PPS_MAX)
+ break;
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+index 53172a4398101..bf619295d079f 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+@@ -64,7 +64,7 @@
+ #define PTP_TCR_TSENMACADDR BIT(18)
+
+ /* SSIR defines */
+-#define PTP_SSIR_SSINC_MASK 0xff
++#define PTP_SSIR_SSINC_MAX 0xff
+ #define GMAC4_PTP_SSIR_SSINC_SHIFT 16
+
+ /* Auxiliary Control defines */
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+index 0462dcc93e536..ea7200b7b6477 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+@@ -1084,8 +1084,9 @@ static int stmmac_test_rxp(struct stmmac_priv *priv)
+ unsigned char addr[ETH_ALEN] = {0xde, 0xad, 0xbe, 0xef, 0x00, 0x00};
+ struct tc_cls_u32_offload cls_u32 = { };
+ struct stmmac_packet_attrs attr = { };
+- struct tc_action **actions, *act;
++ struct tc_action **actions;
+ struct tc_u32_sel *sel;
++ struct tcf_gact *gact;
+ struct tcf_exts *exts;
+ int ret, i, nk = 1;
+
+@@ -1104,14 +1105,14 @@ static int stmmac_test_rxp(struct stmmac_priv *priv)
+ goto cleanup_sel;
+ }
+
+- actions = kzalloc(nk * sizeof(*actions), GFP_KERNEL);
++ actions = kcalloc(nk, sizeof(*actions), GFP_KERNEL);
+ if (!actions) {
+ ret = -ENOMEM;
+ goto cleanup_exts;
+ }
+
+- act = kzalloc(nk * sizeof(*act), GFP_KERNEL);
+- if (!act) {
++ gact = kcalloc(nk, sizeof(*gact), GFP_KERNEL);
++ if (!gact) {
+ ret = -ENOMEM;
+ goto cleanup_actions;
+ }
+@@ -1126,9 +1127,7 @@ static int stmmac_test_rxp(struct stmmac_priv *priv)
+ exts->nr_actions = nk;
+ exts->actions = actions;
+ for (i = 0; i < nk; i++) {
+- struct tcf_gact *gact = to_gact(&act[i]);
+-
+- actions[i] = &act[i];
++ actions[i] = (struct tc_action *)&gact[i];
+ gact->tcf_action = TC_ACT_SHOT;
+ }
+
+@@ -1152,7 +1151,7 @@ static int stmmac_test_rxp(struct stmmac_priv *priv)
+ stmmac_tc_setup_cls_u32(priv, priv, &cls_u32);
+
+ cleanup_act:
+- kfree(act);
++ kfree(gact);
+ cleanup_actions:
+ kfree(actions);
+ cleanup_exts:
+@@ -1655,12 +1654,16 @@ static int stmmac_test_arpoffload(struct stmmac_priv *priv)
+ }
+
+ ret = stmmac_set_arp_offload(priv, priv->hw, true, ip_addr);
+- if (ret)
++ if (ret) {
++ kfree_skb(skb);
+ goto cleanup;
++ }
+
+ ret = dev_set_promiscuity(priv->dev, 1);
+- if (ret)
++ if (ret) {
++ kfree_skb(skb);
+ goto cleanup;
++ }
+
+ ret = dev_direct_xmit(skb, 0);
+ if (ret)
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+index 8160087ee92f2..d0a2b289f4603 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+@@ -232,11 +232,33 @@ static int tc_setup_cls_u32(struct stmmac_priv *priv,
+ }
+ }
+
++static int tc_rfs_init(struct stmmac_priv *priv)
++{
++ int i;
++
++ priv->rfs_entries_max[STMMAC_RFS_T_VLAN] = 8;
++
++ for (i = 0; i < STMMAC_RFS_T_MAX; i++)
++ priv->rfs_entries_total += priv->rfs_entries_max[i];
++
++ priv->rfs_entries = devm_kcalloc(priv->device,
++ priv->rfs_entries_total,
++ sizeof(*priv->rfs_entries),
++ GFP_KERNEL);
++ if (!priv->rfs_entries)
++ return -ENOMEM;
++
++ dev_info(priv->device, "Enabled RFS Flow TC (entries=%d)\n",
++ priv->rfs_entries_total);
++
++ return 0;
++}
++
+ static int tc_init(struct stmmac_priv *priv)
+ {
+ struct dma_features *dma_cap = &priv->dma_cap;
+ unsigned int count;
+- int i;
++ int ret, i;
+
+ if (dma_cap->l3l4fnum) {
+ priv->flow_entries_max = dma_cap->l3l4fnum;
+@@ -250,10 +272,14 @@ static int tc_init(struct stmmac_priv *priv)
+ for (i = 0; i < priv->flow_entries_max; i++)
+ priv->flow_entries[i].idx = i;
+
+- dev_info(priv->device, "Enabled Flow TC (entries=%d)\n",
++ dev_info(priv->device, "Enabled L3L4 Flow TC (entries=%d)\n",
+ priv->flow_entries_max);
+ }
+
++ ret = tc_rfs_init(priv);
++ if (ret)
++ return -ENOMEM;
++
+ if (!priv->plat->fpe_cfg) {
+ priv->plat->fpe_cfg = devm_kzalloc(priv->device,
+ sizeof(*priv->plat->fpe_cfg),
+@@ -607,16 +633,45 @@ static int tc_del_flow(struct stmmac_priv *priv,
+ return ret;
+ }
+
++static struct stmmac_rfs_entry *tc_find_rfs(struct stmmac_priv *priv,
++ struct flow_cls_offload *cls,
++ bool get_free)
++{
++ int i;
++
++ for (i = 0; i < priv->rfs_entries_total; i++) {
++ struct stmmac_rfs_entry *entry = &priv->rfs_entries[i];
++
++ if (entry->cookie == cls->cookie)
++ return entry;
++ if (get_free && entry->in_use == false)
++ return entry;
++ }
++
++ return NULL;
++}
++
+ #define VLAN_PRIO_FULL_MASK (0x07)
+
+ static int tc_add_vlan_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls)
+ {
++ struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
+ struct flow_match_vlan match;
+
++ if (!entry) {
++ entry = tc_find_rfs(priv, cls, true);
++ if (!entry)
++ return -ENOENT;
++ }
++
++ if (priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN] >=
++ priv->rfs_entries_max[STMMAC_RFS_T_VLAN])
++ return -ENOENT;
++
+ /* Nothing to do here */
+ if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
+ return -EINVAL;
+@@ -638,6 +693,12 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv,
+
+ prio = BIT(match.key->vlan_priority);
+ stmmac_rx_queue_prio(priv, priv->hw, prio, tc);
++
++ entry->in_use = true;
++ entry->cookie = cls->cookie;
++ entry->tc = tc;
++ entry->type = STMMAC_RFS_T_VLAN;
++ priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]++;
+ }
+
+ return 0;
+@@ -646,20 +707,19 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv,
+ static int tc_del_vlan_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls)
+ {
+- struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+- struct flow_dissector *dissector = rule->match.dissector;
+- int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
++ struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false);
+
+- /* Nothing to do here */
+- if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
+- return -EINVAL;
++ if (!entry || !entry->in_use || entry->type != STMMAC_RFS_T_VLAN)
++ return -ENOENT;
+
+- if (tc < 0) {
+- netdev_err(priv->dev, "Invalid traffic class\n");
+- return -EINVAL;
+- }
++ stmmac_rx_queue_prio(priv, priv->hw, 0, entry->tc);
++
++ entry->in_use = false;
++ entry->cookie = 0;
++ entry->tc = 0;
++ entry->type = 0;
+
+- stmmac_rx_queue_prio(priv, priv->hw, 0, tc);
++ priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]--;
+
+ return 0;
+ }
+@@ -786,8 +846,6 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
+ goto disable;
+ if (qopt->num_entries >= dep)
+ return -EINVAL;
+- if (!qopt->base_time)
+- return -ERANGE;
+ if (!qopt->cycle_time)
+ return -ERANGE;
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
+index 2a616c6f7cd0e..9d4d8c3dad0a3 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
+@@ -119,7 +119,7 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
+
+ need_update = !!priv->xdp_prog != !!prog;
+ if (if_running && need_update)
+- stmmac_release(dev);
++ stmmac_xdp_release(dev);
+
+ old_prog = xchg(&priv->xdp_prog, prog);
+ if (old_prog)
+@@ -129,7 +129,7 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
+ priv->sph = priv->sph_cap && !stmmac_xdp_is_enabled(priv);
+
+ if (if_running && need_update)
+- stmmac_open(dev);
++ stmmac_xdp_open(dev);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
+index 287ae4c538aae..21e4df8466c91 100644
+--- a/drivers/net/ethernet/sun/cassini.c
++++ b/drivers/net/ethernet/sun/cassini.c
+@@ -1325,7 +1325,7 @@ static void cas_init_rx_dma(struct cas *cp)
+ writel(val, cp->regs + REG_RX_PAGE_SIZE);
+
+ /* enable the header parser if desired */
+- if (CAS_HP_FIRMWARE == cas_prog_null)
++ if (&CAS_HP_FIRMWARE[0] == &cas_prog_null[0])
+ return;
+
+ val = CAS_BASE(HP_CFG_NUM_CPU, CAS_NCPUS > 63 ? 0 : CAS_NCPUS);
+@@ -3794,7 +3794,7 @@ static void cas_reset(struct cas *cp, int blkflag)
+
+ /* program header parser */
+ if ((cp->cas_flags & CAS_FLAG_TARGET_ABORT) ||
+- (CAS_HP_ALT_FIRMWARE == cas_prog_null)) {
++ (&CAS_HP_ALT_FIRMWARE[0] == &cas_prog_null[0])) {
+ cas_load_firmware(cp, CAS_HP_FIRMWARE);
+ } else {
+ cas_load_firmware(cp, CAS_HP_ALT_FIRMWARE);
+@@ -5123,6 +5123,8 @@ err_out_iounmap:
+ cas_shutdown(cp);
+ mutex_unlock(&cp->pm_mutex);
+
++ vfree(cp->fw_data);
++
+ pci_iounmap(pdev, cp->regs);
+
+
+diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
+index 50bd4e3b0af9d..cde65f76e5cef 100644
+--- a/drivers/net/ethernet/sun/ldmvsw.c
++++ b/drivers/net/ethernet/sun/ldmvsw.c
+@@ -290,6 +290,9 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+
+ hp = mdesc_grab();
+
++ if (!hp)
++ return -ENODEV;
++
+ rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
+ err = -ENODEV;
+ if (!rmac) {
+diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
+index a68a01d1b2b10..3fdc7c9824a39 100644
+--- a/drivers/net/ethernet/sun/niu.c
++++ b/drivers/net/ethernet/sun/niu.c
+@@ -4503,7 +4503,7 @@ static int niu_alloc_channels(struct niu *np)
+
+ err = niu_rbr_fill(np, rp, GFP_KERNEL);
+ if (err)
+- return err;
++ goto out_err;
+ }
+
+ tx_rings = kcalloc(num_tx_rings, sizeof(struct tx_ring_info),
+diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
+index 62f81b0d14ed8..735f24a70626b 100644
+--- a/drivers/net/ethernet/sun/sunhme.c
++++ b/drivers/net/ethernet/sun/sunhme.c
+@@ -2039,9 +2039,9 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
+
+ skb_reserve(copy_skb, 2);
+ skb_put(copy_skb, len);
+- dma_sync_single_for_cpu(hp->dma_dev, dma_addr, len, DMA_FROM_DEVICE);
++ dma_sync_single_for_cpu(hp->dma_dev, dma_addr, len + 2, DMA_FROM_DEVICE);
+ skb_copy_from_linear_data(skb, copy_skb->data, len);
+- dma_sync_single_for_device(hp->dma_dev, dma_addr, len, DMA_FROM_DEVICE);
++ dma_sync_single_for_device(hp->dma_dev, dma_addr, len + 2, DMA_FROM_DEVICE);
+ /* Reuse original ring buffer. */
+ hme_write_rxd(hp, this,
+ (RXFLAG_OWN|((RX_BUF_ALLOC_SIZE-RX_OFFSET)<<16)),
+@@ -3139,7 +3139,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
+ if (err) {
+ printk(KERN_ERR "happymeal(PCI): Cannot register net device, "
+ "aborting.\n");
+- goto err_out_iounmap;
++ goto err_out_free_coherent;
+ }
+
+ pci_set_drvdata(pdev, hp);
+@@ -3172,6 +3172,10 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
+
+ return 0;
+
++err_out_free_coherent:
++ dma_free_coherent(hp->dma_dev, PAGE_SIZE,
++ hp->happy_block, hp->hblock_dvma);
++
+ err_out_iounmap:
+ iounmap(hp->gregs);
+
+diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
+index 58ee89223951e..dcdfc1fd3d2ca 100644
+--- a/drivers/net/ethernet/sun/sunvnet.c
++++ b/drivers/net/ethernet/sun/sunvnet.c
+@@ -431,6 +431,9 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+
+ hp = mdesc_grab();
+
++ if (!hp)
++ return -ENODEV;
++
+ vp = vnet_find_parent(hp, vdev->mp, vdev);
+ if (IS_ERR(vp)) {
+ pr_err("Cannot find port parent vnet\n");
+diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
+index affcf92cd3aa5..6cba4d2c44830 100644
+--- a/drivers/net/ethernet/ti/Kconfig
++++ b/drivers/net/ethernet/ti/Kconfig
+@@ -33,6 +33,7 @@ config TI_DAVINCI_MDIO
+ tristate "TI DaVinci MDIO Support"
+ depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
+ select PHYLIB
++ select MDIO_BITBANG
+ help
+ This driver supports TI's DaVinci MDIO module.
+
+diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+index 130346f74ee8a..daf0779261f3e 100644
+--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
++++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+@@ -564,7 +564,15 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
+ k3_udma_glue_disable_tx_chn(common->tx_chns[i].tx_chn);
+ }
+
++ reinit_completion(&common->tdown_complete);
+ k3_udma_glue_tdown_rx_chn(common->rx_chns.rx_chn, true);
++
++ if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) {
++ i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000));
++ if (!i)
++ dev_err(common->dev, "rx teardown timeout\n");
++ }
++
+ napi_disable(&common->napi_rx);
+
+ for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
+@@ -786,6 +794,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
+
+ if (cppi5_desc_is_tdcm(desc_dma)) {
+ dev_dbg(dev, "%s RX tdown flow: %u\n", __func__, flow_idx);
++ if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ)
++ complete(&common->tdown_complete);
+ return 0;
+ }
+
+@@ -1802,6 +1812,7 @@ static int am65_cpsw_init_cpts(struct am65_cpsw_common *common)
+ if (IS_ERR(cpts)) {
+ int ret = PTR_ERR(cpts);
+
++ of_node_put(node);
+ if (ret == -EOPNOTSUPP) {
+ dev_info(dev, "cpts disabled\n");
+ return 0;
+@@ -1970,7 +1981,7 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
+ ndev_priv->msg_enable = AM65_CPSW_DEBUG;
+ SET_NETDEV_DEV(port->ndev, dev);
+
+- ether_addr_copy(port->ndev->dev_addr, port->slave.mac_addr);
++ eth_hw_addr_set(port->ndev, port->slave.mac_addr);
+
+ port->ndev->min_mtu = AM65_CPSW_MIN_PACKET_SIZE;
+ port->ndev->max_mtu = AM65_CPSW_MAX_PACKET_SIZE;
+@@ -2053,7 +2064,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common)
+
+ for (i = 0; i < common->port_num; i++) {
+ port = &common->ports[i];
+- if (port->ndev)
++ if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(port->ndev);
+ }
+ }
+@@ -2466,7 +2477,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
+ port->port_id, ret);
+ goto dl_port_unreg;
+ }
+- devlink_port_type_eth_set(dl_port, port->ndev);
+ }
+
+ return ret;
+@@ -2513,6 +2523,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
+ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
+ {
+ struct device *dev = common->dev;
++ struct devlink_port *dl_port;
+ struct am65_cpsw_port *port;
+ int ret = 0, i;
+
+@@ -2529,6 +2540,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
+ return ret;
+ }
+
++ ret = am65_cpsw_nuss_register_devlink(common);
++ if (ret)
++ return ret;
++
+ for (i = 0; i < common->port_num; i++) {
+ port = &common->ports[i];
+
+@@ -2541,25 +2556,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
+ i, ret);
+ goto err_cleanup_ndev;
+ }
++
++ dl_port = &port->devlink_port;
++ devlink_port_type_eth_set(dl_port, port->ndev);
+ }
+
+ ret = am65_cpsw_register_notifiers(common);
+ if (ret)
+ goto err_cleanup_ndev;
+
+- ret = am65_cpsw_nuss_register_devlink(common);
+- if (ret)
+- goto clean_unregister_notifiers;
+-
+ /* can't auto unregister ndev using devm_add_action() due to
+ * devres release sequence in DD core for DMA
+ */
+
+ return 0;
+-clean_unregister_notifiers:
+- am65_cpsw_unregister_notifiers(common);
++
+ err_cleanup_ndev:
+ am65_cpsw_nuss_cleanup_ndev(common);
++ am65_cpsw_unregister_devlink(common);
+
+ return ret;
+ }
+@@ -2605,7 +2619,7 @@ static const struct am65_cpsw_pdata j721e_pdata = {
+ };
+
+ static const struct am65_cpsw_pdata am64x_cpswxg_pdata = {
+- .quirks = 0,
++ .quirks = AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ,
+ .ale_dev_id = "am64-cpswxg",
+ .fdqring_mode = K3_RINGACC_RING_MODE_RING,
+ };
+@@ -2668,9 +2682,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
+ if (!node)
+ return -ENOENT;
+ common->port_num = of_get_child_count(node);
++ of_node_put(node);
+ if (common->port_num < 1 || common->port_num > AM65_CPSW_MAX_PORTS)
+ return -ENOENT;
+- of_node_put(node);
+
+ common->rx_flow_id_base = -1;
+ init_completion(&common->tdown_complete);
+@@ -2770,7 +2784,8 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
+ return 0;
+
+ err_of_clear:
+- of_platform_device_destroy(common->mdio_dev, NULL);
++ if (common->mdio_dev)
++ of_platform_device_destroy(common->mdio_dev, NULL);
+ err_pm_clear:
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
+@@ -2799,7 +2814,8 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
+ */
+ am65_cpsw_nuss_cleanup_ndev(common);
+
+- of_platform_device_destroy(common->mdio_dev, NULL);
++ if (common->mdio_dev)
++ of_platform_device_destroy(common->mdio_dev, NULL);
+
+ pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+index 048ed10143c17..74569c8ed2eca 100644
+--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
++++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+@@ -84,6 +84,7 @@ struct am65_cpsw_rx_chn {
+ };
+
+ #define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0)
++#define AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ BIT(1)
+
+ struct am65_cpsw_pdata {
+ u32 quirks;
+diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
+index 66f7ddd9b1f99..ca587fe281507 100644
+--- a/drivers/net/ethernet/ti/cpsw.c
++++ b/drivers/net/ethernet/ti/cpsw.c
+@@ -349,7 +349,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
+ struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev);
+ int pkt_size = cpsw->rx_packet_max;
+ int ret = 0, port, ch = xmeta->ch;
+- int headroom = CPSW_HEADROOM;
++ int headroom = CPSW_HEADROOM_NA;
+ struct net_device *ndev = xmeta->ndev;
+ struct cpsw_priv *priv;
+ struct page_pool *pool;
+@@ -392,7 +392,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
+ }
+
+ if (priv->xdp_prog) {
+- int headroom = CPSW_HEADROOM, size = len;
++ int size = len;
+
+ xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
+ if (status & CPDMA_RX_VLAN_ENCAP) {
+@@ -442,7 +442,7 @@ requeue:
+ xmeta->ndev = ndev;
+ xmeta->ch = ch;
+
+- dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
++ dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
+ ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
+ pkt_size, 0);
+ if (ret < 0) {
+@@ -856,6 +856,8 @@ static int cpsw_ndo_open(struct net_device *ndev)
+
+ err_cleanup:
+ if (!cpsw->usage_count) {
++ napi_disable(&cpsw->napi_rx);
++ napi_disable(&cpsw->napi_tx);
+ cpdma_ctlr_stop(cpsw->dma);
+ cpsw_destroy_xdp_rxqs(cpsw);
+ }
+diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
+index 0c75e0576ee1f..8c59e34d8bcaf 100644
+--- a/drivers/net/ethernet/ti/cpsw_ale.c
++++ b/drivers/net/ethernet/ti/cpsw_ale.c
+@@ -104,23 +104,37 @@ struct cpsw_ale_dev_id {
+
+ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
+ {
+- int idx;
++ int idx, idx2;
++ u32 hi_val = 0;
+
+ idx = start / 32;
++ idx2 = (start + bits - 1) / 32;
++ /* Check if bits to be fetched exceed a word */
++ if (idx != idx2) {
++ idx2 = 2 - idx2; /* flip */
++ hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
++ }
+ start -= idx * 32;
+ idx = 2 - idx; /* flip */
+- return (ale_entry[idx] >> start) & BITMASK(bits);
++ return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits);
+ }
+
+ static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
+ u32 value)
+ {
+- int idx;
++ int idx, idx2;
+
+ value &= BITMASK(bits);
+- idx = start / 32;
++ idx = start / 32;
++ idx2 = (start + bits - 1) / 32;
++ /* Check if bits to be set exceed a word */
++ if (idx != idx2) {
++ idx2 = 2 - idx2; /* flip */
++ ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
++ ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
++ }
+ start -= idx * 32;
+- idx = 2 - idx; /* flip */
++ idx = 2 - idx; /* flip */
+ ale_entry[idx] &= ~(BITMASK(bits) << start);
+ ale_entry[idx] |= (value << start);
+ }
+@@ -1299,10 +1313,8 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params)
+ if (!ale)
+ return ERR_PTR(-ENOMEM);
+
+- ale->p0_untag_vid_mask =
+- devm_kmalloc_array(params->dev, BITS_TO_LONGS(VLAN_N_VID),
+- sizeof(unsigned long),
+- GFP_KERNEL);
++ ale->p0_untag_vid_mask = devm_bitmap_zalloc(params->dev, VLAN_N_VID,
++ GFP_KERNEL);
+ if (!ale->p0_untag_vid_mask)
+ return ERR_PTR(-ENOMEM);
+
+diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
+index 158c8d3793f43..b5bae6324970a 100644
+--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
++++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
+@@ -364,11 +364,9 @@ int cpsw_ethtool_op_begin(struct net_device *ndev)
+ struct cpsw_common *cpsw = priv->cpsw;
+ int ret;
+
+- ret = pm_runtime_get_sync(cpsw->dev);
+- if (ret < 0) {
++ ret = pm_runtime_resume_and_get(cpsw->dev);
++ if (ret < 0)
+ cpsw_err(priv, drv, "ethtool begin failed %d\n", ret);
+- pm_runtime_put_noidle(cpsw->dev);
+- }
+
+ return ret;
+ }
+diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
+index 7968f24d99c85..0d921f6542d6f 100644
+--- a/drivers/net/ethernet/ti/cpsw_new.c
++++ b/drivers/net/ethernet/ti/cpsw_new.c
+@@ -283,7 +283,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
+ {
+ struct page *new_page, *page = token;
+ void *pa = page_address(page);
+- int headroom = CPSW_HEADROOM;
++ int headroom = CPSW_HEADROOM_NA;
+ struct cpsw_meta_xdp *xmeta;
+ struct cpsw_common *cpsw;
+ struct net_device *ndev;
+@@ -336,7 +336,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
+ }
+
+ if (priv->xdp_prog) {
+- int headroom = CPSW_HEADROOM, size = len;
++ int size = len;
+
+ xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
+ if (status & CPDMA_RX_VLAN_ENCAP) {
+@@ -386,7 +386,7 @@ requeue:
+ xmeta->ndev = ndev;
+ xmeta->ch = ch;
+
+- dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
++ dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
+ ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
+ pkt_size, 0);
+ if (ret < 0) {
+@@ -1000,7 +1000,7 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
+ flags, vid);
+
+ ether_addr_copy(priv->mac_addr, addr->sa_data);
+- ether_addr_copy(ndev->dev_addr, priv->mac_addr);
++ eth_hw_addr_set(ndev, priv->mac_addr);
+ cpsw_set_slave_mac(&cpsw->slaves[slave_no], priv);
+
+ pm_runtime_put(cpsw->dev);
+@@ -1246,8 +1246,10 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
+ data->slave_data = devm_kcalloc(dev, CPSW_SLAVE_PORTS_NUM,
+ sizeof(struct cpsw_slave_data),
+ GFP_KERNEL);
+- if (!data->slave_data)
++ if (!data->slave_data) {
++ of_node_put(tmp_node);
+ return -ENOMEM;
++ }
+
+ /* Populate all the child nodes here...
+ */
+@@ -1341,6 +1343,7 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
+
+ err_node_put:
+ of_node_put(port_np);
++ of_node_put(tmp_node);
+ return ret;
+ }
+
+@@ -1401,7 +1404,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw)
+ dev_info(cpsw->dev, "Random MACID = %pM\n",
+ priv->mac_addr);
+ }
+- ether_addr_copy(ndev->dev_addr, slave_data->mac_addr);
++ eth_hw_addr_set(ndev, slave_data->mac_addr);
+ ether_addr_copy(priv->mac_addr, slave_data->mac_addr);
+
+ cpsw->slaves[i].ndev = ndev;
+diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
+index ecc2a6b7e28f2..f8e591d69d2cb 100644
+--- a/drivers/net/ethernet/ti/cpsw_priv.c
++++ b/drivers/net/ethernet/ti/cpsw_priv.c
+@@ -1120,7 +1120,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv)
+ xmeta->ndev = priv->ndev;
+ xmeta->ch = ch;
+
+- dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM;
++ dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM_NA;
+ ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch,
+ page, dma,
+ cpsw->rx_packet_max,
+@@ -1144,7 +1144,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv)
+ static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw,
+ int size)
+ {
+- struct page_pool_params pp_params;
++ struct page_pool_params pp_params = {};
+ struct page_pool *pool;
+
+ pp_params.order = 0;
+diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
+index 43222a34cba06..f9514518700eb 100644
+--- a/drivers/net/ethernet/ti/cpts.c
++++ b/drivers/net/ethernet/ti/cpts.c
+@@ -568,7 +568,9 @@ int cpts_register(struct cpts *cpts)
+ for (i = 0; i < CPTS_MAX_EVENTS; i++)
+ list_add(&cpts->pool_data[i].list, &cpts->pool);
+
+- clk_enable(cpts->refclk);
++ err = clk_enable(cpts->refclk);
++ if (err)
++ return err;
+
+ cpts_write32(cpts, CPTS_EN, control);
+ cpts_write32(cpts, TS_PEND_EN, int_enable);
+diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
+index e8291d8488391..fbd6bd80f51f4 100644
+--- a/drivers/net/ethernet/ti/davinci_emac.c
++++ b/drivers/net/ethernet/ti/davinci_emac.c
+@@ -420,8 +420,20 @@ static int emac_set_coalesce(struct net_device *ndev,
+ u32 int_ctrl, num_interrupts = 0;
+ u32 prescale = 0, addnl_dvdr = 1, coal_intvl = 0;
+
+- if (!coal->rx_coalesce_usecs)
+- return -EINVAL;
++ if (!coal->rx_coalesce_usecs) {
++ priv->coal_intvl = 0;
++
++ switch (priv->version) {
++ case EMAC_VERSION_2:
++ emac_ctrl_write(EMAC_DM646X_CMINTCTRL, 0);
++ break;
++ default:
++ emac_ctrl_write(EMAC_CTRL_EWINTTCNT, 0);
++ break;
++ }
++
++ return 0;
++ }
+
+ coal_intvl = coal->rx_coalesce_usecs;
+
+@@ -1899,7 +1911,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
+
+ rc = davinci_emac_try_get_mac(pdev, res_ctrl ? 0 : 1, priv->mac_addr);
+ if (!rc)
+- ether_addr_copy(ndev->dev_addr, priv->mac_addr);
++ eth_hw_addr_set(ndev, priv->mac_addr);
+
+ if (!is_valid_ether_addr(priv->mac_addr)) {
+ /* Use random MAC if still none obtained. */
+diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
+index a4efd5e351584..995633e1ec5e0 100644
+--- a/drivers/net/ethernet/ti/davinci_mdio.c
++++ b/drivers/net/ethernet/ti/davinci_mdio.c
+@@ -26,6 +26,8 @@
+ #include <linux/of_device.h>
+ #include <linux/of_mdio.h>
+ #include <linux/pinctrl/consumer.h>
++#include <linux/mdio-bitbang.h>
++#include <linux/sys_soc.h>
+
+ /*
+ * This timeout definition is a worst-case ultra defensive measure against
+@@ -41,6 +43,7 @@
+
+ struct davinci_mdio_of_param {
+ int autosuspend_delay_ms;
++ bool manual_mode;
+ };
+
+ struct davinci_mdio_regs {
+@@ -49,6 +52,15 @@ struct davinci_mdio_regs {
+ #define CONTROL_IDLE BIT(31)
+ #define CONTROL_ENABLE BIT(30)
+ #define CONTROL_MAX_DIV (0xffff)
++#define CONTROL_CLKDIV GENMASK(15, 0)
++
++#define MDIO_MAN_MDCLK_O BIT(2)
++#define MDIO_MAN_OE BIT(1)
++#define MDIO_MAN_PIN BIT(0)
++#define MDIO_MANUALMODE BIT(31)
++
++#define MDIO_PIN 0
++
+
+ u32 alive;
+ u32 link;
+@@ -59,7 +71,9 @@ struct davinci_mdio_regs {
+ u32 userintmasked;
+ u32 userintmaskset;
+ u32 userintmaskclr;
+- u32 __reserved_1[20];
++ u32 manualif;
++ u32 poll;
++ u32 __reserved_1[18];
+
+ struct {
+ u32 access;
+@@ -79,6 +93,7 @@ static const struct mdio_platform_data default_pdata = {
+
+ struct davinci_mdio_data {
+ struct mdio_platform_data pdata;
++ struct mdiobb_ctrl bb_ctrl;
+ struct davinci_mdio_regs __iomem *regs;
+ struct clk *clk;
+ struct device *dev;
+@@ -90,6 +105,7 @@ struct davinci_mdio_data {
+ */
+ bool skip_scan;
+ u32 clk_div;
++ bool manual_mode;
+ };
+
+ static void davinci_mdio_init_clk(struct davinci_mdio_data *data)
+@@ -128,9 +144,122 @@ static void davinci_mdio_enable(struct davinci_mdio_data *data)
+ writel(data->clk_div | CONTROL_ENABLE, &data->regs->control);
+ }
+
+-static int davinci_mdio_reset(struct mii_bus *bus)
++static void davinci_mdio_disable(struct davinci_mdio_data *data)
++{
++ u32 reg;
++
++ /* Disable MDIO state machine */
++ reg = readl(&data->regs->control);
++
++ reg &= ~CONTROL_CLKDIV;
++ reg |= data->clk_div;
++
++ reg &= ~CONTROL_ENABLE;
++ writel(reg, &data->regs->control);
++}
++
++static void davinci_mdio_enable_manual_mode(struct davinci_mdio_data *data)
++{
++ u32 reg;
++ /* set manual mode */
++ reg = readl(&data->regs->poll);
++ reg |= MDIO_MANUALMODE;
++ writel(reg, &data->regs->poll);
++}
++
++static void davinci_set_mdc(struct mdiobb_ctrl *ctrl, int level)
++{
++ struct davinci_mdio_data *data;
++ u32 reg;
++
++ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
++ reg = readl(&data->regs->manualif);
++
++ if (level)
++ reg |= MDIO_MAN_MDCLK_O;
++ else
++ reg &= ~MDIO_MAN_MDCLK_O;
++
++ writel(reg, &data->regs->manualif);
++}
++
++static void davinci_set_mdio_dir(struct mdiobb_ctrl *ctrl, int output)
++{
++ struct davinci_mdio_data *data;
++ u32 reg;
++
++ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
++ reg = readl(&data->regs->manualif);
++
++ if (output)
++ reg |= MDIO_MAN_OE;
++ else
++ reg &= ~MDIO_MAN_OE;
++
++ writel(reg, &data->regs->manualif);
++}
++
++static void davinci_set_mdio_data(struct mdiobb_ctrl *ctrl, int value)
++{
++ struct davinci_mdio_data *data;
++ u32 reg;
++
++ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
++ reg = readl(&data->regs->manualif);
++
++ if (value)
++ reg |= MDIO_MAN_PIN;
++ else
++ reg &= ~MDIO_MAN_PIN;
++
++ writel(reg, &data->regs->manualif);
++}
++
++static int davinci_get_mdio_data(struct mdiobb_ctrl *ctrl)
++{
++ struct davinci_mdio_data *data;
++ unsigned long reg;
++
++ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
++ reg = readl(&data->regs->manualif);
++ return test_bit(MDIO_PIN, &reg);
++}
++
++static int davinci_mdiobb_read(struct mii_bus *bus, int phy, int reg)
++{
++ int ret;
++
++ ret = pm_runtime_resume_and_get(bus->parent);
++ if (ret < 0)
++ return ret;
++
++ ret = mdiobb_read(bus, phy, reg);
++
++ pm_runtime_mark_last_busy(bus->parent);
++ pm_runtime_put_autosuspend(bus->parent);
++
++ return ret;
++}
++
++static int davinci_mdiobb_write(struct mii_bus *bus, int phy, int reg,
++ u16 val)
++{
++ int ret;
++
++ ret = pm_runtime_resume_and_get(bus->parent);
++ if (ret < 0)
++ return ret;
++
++ ret = mdiobb_write(bus, phy, reg, val);
++
++ pm_runtime_mark_last_busy(bus->parent);
++ pm_runtime_put_autosuspend(bus->parent);
++
++ return ret;
++}
++
++static int davinci_mdio_common_reset(struct davinci_mdio_data *data)
+ {
+- struct davinci_mdio_data *data = bus->priv;
+ u32 phy_mask, ver;
+ int ret;
+
+@@ -140,6 +269,11 @@ static int davinci_mdio_reset(struct mii_bus *bus)
+ return ret;
+ }
+
++ if (data->manual_mode) {
++ davinci_mdio_disable(data);
++ davinci_mdio_enable_manual_mode(data);
++ }
++
+ /* wait for scan logic to settle */
+ msleep(PHY_MAX_ADDR * data->access_time);
+
+@@ -173,6 +307,23 @@ done:
+ return 0;
+ }
+
++static int davinci_mdio_reset(struct mii_bus *bus)
++{
++ struct davinci_mdio_data *data = bus->priv;
++
++ return davinci_mdio_common_reset(data);
++}
++
++static int davinci_mdiobb_reset(struct mii_bus *bus)
++{
++ struct mdiobb_ctrl *ctrl = bus->priv;
++ struct davinci_mdio_data *data;
++
++ data = container_of(ctrl, struct davinci_mdio_data, bb_ctrl);
++
++ return davinci_mdio_common_reset(data);
++}
++
+ /* wait until hardware is ready for another user access */
+ static inline int wait_for_user_access(struct davinci_mdio_data *data)
+ {
+@@ -324,6 +475,28 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
+ return 0;
+ }
+
++struct k3_mdio_soc_data {
++ bool manual_mode;
++};
++
++static const struct k3_mdio_soc_data am65_mdio_soc_data = {
++ .manual_mode = true,
++};
++
++static const struct soc_device_attribute k3_mdio_socinfo[] = {
++ { .family = "AM62X", .revision = "SR1.0", .data = &am65_mdio_soc_data },
++ { .family = "AM64X", .revision = "SR1.0", .data = &am65_mdio_soc_data },
++ { .family = "AM64X", .revision = "SR2.0", .data = &am65_mdio_soc_data },
++ { .family = "AM65X", .revision = "SR1.0", .data = &am65_mdio_soc_data },
++ { .family = "AM65X", .revision = "SR2.0", .data = &am65_mdio_soc_data },
++ { .family = "J7200", .revision = "SR1.0", .data = &am65_mdio_soc_data },
++ { .family = "J7200", .revision = "SR2.0", .data = &am65_mdio_soc_data },
++ { .family = "J721E", .revision = "SR1.0", .data = &am65_mdio_soc_data },
++ { .family = "J721E", .revision = "SR2.0", .data = &am65_mdio_soc_data },
++ { .family = "J721S2", .revision = "SR1.0", .data = &am65_mdio_soc_data},
++ { /* sentinel */ },
++};
++
+ #if IS_ENABLED(CONFIG_OF)
+ static const struct davinci_mdio_of_param of_cpsw_mdio_data = {
+ .autosuspend_delay_ms = 100,
+@@ -337,6 +510,14 @@ static const struct of_device_id davinci_mdio_of_mtable[] = {
+ MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
+ #endif
+
++static const struct mdiobb_ops davinci_mdiobb_ops = {
++ .owner = THIS_MODULE,
++ .set_mdc = davinci_set_mdc,
++ .set_mdio_dir = davinci_set_mdio_dir,
++ .set_mdio_data = davinci_set_mdio_data,
++ .get_mdio_data = davinci_get_mdio_data,
++};
++
+ static int davinci_mdio_probe(struct platform_device *pdev)
+ {
+ struct mdio_platform_data *pdata = dev_get_platdata(&pdev->dev);
+@@ -351,7 +532,26 @@ static int davinci_mdio_probe(struct platform_device *pdev)
+ if (!data)
+ return -ENOMEM;
+
+- data->bus = devm_mdiobus_alloc(dev);
++ data->manual_mode = false;
++ data->bb_ctrl.ops = &davinci_mdiobb_ops;
++
++ if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
++ const struct soc_device_attribute *soc_match_data;
++
++ soc_match_data = soc_device_match(k3_mdio_socinfo);
++ if (soc_match_data && soc_match_data->data) {
++ const struct k3_mdio_soc_data *socdata =
++ soc_match_data->data;
++
++ data->manual_mode = socdata->manual_mode;
++ }
++ }
++
++ if (data->manual_mode)
++ data->bus = alloc_mdio_bitbang(&data->bb_ctrl);
++ else
++ data->bus = devm_mdiobus_alloc(dev);
++
+ if (!data->bus) {
+ dev_err(dev, "failed to alloc mii bus\n");
+ return -ENOMEM;
+@@ -377,11 +577,20 @@ static int davinci_mdio_probe(struct platform_device *pdev)
+ }
+
+ data->bus->name = dev_name(dev);
+- data->bus->read = davinci_mdio_read;
+- data->bus->write = davinci_mdio_write;
+- data->bus->reset = davinci_mdio_reset;
++
++ if (data->manual_mode) {
++ data->bus->read = davinci_mdiobb_read;
++ data->bus->write = davinci_mdiobb_write;
++ data->bus->reset = davinci_mdiobb_reset;
++
++ dev_info(dev, "Configuring MDIO in manual mode\n");
++ } else {
++ data->bus->read = davinci_mdio_read;
++ data->bus->write = davinci_mdio_write;
++ data->bus->reset = davinci_mdio_reset;
++ data->bus->priv = data;
++ }
+ data->bus->parent = dev;
+- data->bus->priv = data;
+
+ data->clk = devm_clk_get(dev, "fck");
+ if (IS_ERR(data->clk)) {
+@@ -439,9 +648,13 @@ static int davinci_mdio_remove(struct platform_device *pdev)
+ {
+ struct davinci_mdio_data *data = platform_get_drvdata(pdev);
+
+- if (data->bus)
++ if (data->bus) {
+ mdiobus_unregister(data->bus);
+
++ if (data->manual_mode)
++ free_mdio_bitbang(data->bus);
++ }
++
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+
+@@ -458,7 +671,9 @@ static int davinci_mdio_runtime_suspend(struct device *dev)
+ ctrl = readl(&data->regs->control);
+ ctrl &= ~CONTROL_ENABLE;
+ writel(ctrl, &data->regs->control);
+- wait_for_idle(data);
++
++ if (!data->manual_mode)
++ wait_for_idle(data);
+
+ return 0;
+ }
+@@ -467,7 +682,12 @@ static int davinci_mdio_runtime_resume(struct device *dev)
+ {
+ struct davinci_mdio_data *data = dev_get_drvdata(dev);
+
+- davinci_mdio_enable(data);
++ if (data->manual_mode) {
++ davinci_mdio_disable(data);
++ davinci_mdio_enable_manual_mode(data);
++ } else {
++ davinci_mdio_enable(data);
++ }
+ return 0;
+ }
+ #endif
+diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
+index eda2961c0fe2a..a6450055908db 100644
+--- a/drivers/net/ethernet/ti/netcp_core.c
++++ b/drivers/net/ethernet/ti/netcp_core.c
+@@ -1262,7 +1262,7 @@ out:
+ }
+
+ /* Submit the packet */
+-static int netcp_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev)
++static netdev_tx_t netcp_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ {
+ struct netcp_intf *netcp = netdev_priv(ndev);
+ struct netcp_stats *tx_stats = &netcp->stats;
+@@ -2028,14 +2028,14 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
+
+ emac_arch_get_mac_addr(efuse_mac_addr, efuse, efuse_mac);
+ if (is_valid_ether_addr(efuse_mac_addr))
+- ether_addr_copy(ndev->dev_addr, efuse_mac_addr);
++ eth_hw_addr_set(ndev, efuse_mac_addr);
+ else
+ eth_random_addr(ndev->dev_addr);
+
+ devm_iounmap(dev, efuse);
+ devm_release_mem_region(dev, res.start, size);
+ } else {
+- ret = of_get_mac_address(node_interface, ndev->dev_addr);
++ ret = of_get_ethdev_address(node_interface, ndev);
+ if (ret)
+ eth_random_addr(ndev->dev_addr);
+ }
+diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+index 55e652624bd76..78e484ea279bc 100644
+--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
++++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+@@ -317,15 +317,17 @@ static int gelic_card_init_chain(struct gelic_card *card,
+
+ /* set up the hardware pointers in each descriptor */
+ for (i = 0; i < no; i++, descr++) {
++ dma_addr_t cpu_addr;
++
+ gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
+- descr->bus_addr =
+- dma_map_single(ctodev(card), descr,
+- GELIC_DESCR_SIZE,
+- DMA_BIDIRECTIONAL);
+
+- if (!descr->bus_addr)
++ cpu_addr = dma_map_single(ctodev(card), descr,
++ GELIC_DESCR_SIZE, DMA_BIDIRECTIONAL);
++
++ if (dma_mapping_error(ctodev(card), cpu_addr))
+ goto iommu_error;
+
++ descr->bus_addr = cpu_to_be32(cpu_addr);
+ descr->next = descr + 1;
+ descr->prev = descr - 1;
+ }
+@@ -365,26 +367,28 @@ iommu_error:
+ *
+ * allocates a new rx skb, iommu-maps it and attaches it to the descriptor.
+ * Activate the descriptor state-wise
++ *
++ * Gelic RX sk_buffs must be aligned to GELIC_NET_RXBUF_ALIGN and the length
++ * must be a multiple of GELIC_NET_RXBUF_ALIGN.
+ */
+ static int gelic_descr_prepare_rx(struct gelic_card *card,
+ struct gelic_descr *descr)
+ {
++ static const unsigned int rx_skb_size =
++ ALIGN(GELIC_NET_MAX_FRAME, GELIC_NET_RXBUF_ALIGN) +
++ GELIC_NET_RXBUF_ALIGN - 1;
++ dma_addr_t cpu_addr;
+ int offset;
+- unsigned int bufsize;
+
+ if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE)
+ dev_info(ctodev(card), "%s: ERROR status\n", __func__);
+- /* we need to round up the buffer size to a multiple of 128 */
+- bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
+
+- /* and we need to have it 128 byte aligned, therefore we allocate a
+- * bit more */
+- descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
++ descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size);
+ if (!descr->skb) {
+ descr->buf_addr = 0; /* tell DMAC don't touch memory */
+ return -ENOMEM;
+ }
+- descr->buf_size = cpu_to_be32(bufsize);
++ descr->buf_size = cpu_to_be32(rx_skb_size);
+ descr->dmac_cmd_status = 0;
+ descr->result_size = 0;
+ descr->valid_size = 0;
+@@ -395,11 +399,10 @@ static int gelic_descr_prepare_rx(struct gelic_card *card,
+ if (offset)
+ skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);
+ /* io-mmu-map the skb */
+- descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card),
+- descr->skb->data,
+- GELIC_NET_MAX_MTU,
+- DMA_FROM_DEVICE));
+- if (!descr->buf_addr) {
++ cpu_addr = dma_map_single(ctodev(card), descr->skb->data,
++ GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE);
++ descr->buf_addr = cpu_to_be32(cpu_addr);
++ if (dma_mapping_error(ctodev(card), cpu_addr)) {
+ dev_kfree_skb_any(descr->skb);
+ descr->skb = NULL;
+ dev_info(ctodev(card),
+@@ -779,7 +782,7 @@ static int gelic_descr_prepare_tx(struct gelic_card *card,
+
+ buf = dma_map_single(ctodev(card), skb->data, skb->len, DMA_TO_DEVICE);
+
+- if (!buf) {
++ if (dma_mapping_error(ctodev(card), buf)) {
+ dev_err(ctodev(card),
+ "dma map 2 failed (%p, %i). Dropping packet\n",
+ skb->data, skb->len);
+@@ -915,7 +918,7 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr,
+ data_error = be32_to_cpu(descr->data_error);
+ /* unmap skb buffer */
+ dma_unmap_single(ctodev(card), be32_to_cpu(descr->buf_addr),
+- GELIC_NET_MAX_MTU,
++ GELIC_NET_MAX_FRAME,
+ DMA_FROM_DEVICE);
+
+ skb_put(skb, be32_to_cpu(descr->valid_size)?
+diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+index 68f324ed4eaf0..0d98defb011ed 100644
+--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
++++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+@@ -19,8 +19,9 @@
+ #define GELIC_NET_RX_DESCRIPTORS 128 /* num of descriptors */
+ #define GELIC_NET_TX_DESCRIPTORS 128 /* num of descriptors */
+
+-#define GELIC_NET_MAX_MTU VLAN_ETH_FRAME_LEN
+-#define GELIC_NET_MIN_MTU VLAN_ETH_ZLEN
++#define GELIC_NET_MAX_FRAME 2312
++#define GELIC_NET_MAX_MTU 2294
++#define GELIC_NET_MIN_MTU 64
+ #define GELIC_NET_RXBUF_ALIGN 128
+ #define GELIC_CARD_RX_CSUM_DEFAULT 1 /* hw chksum */
+ #define GELIC_NET_WATCHDOG_TIMEOUT 5*HZ
+diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
+index cf0917b29e300..f175c098698d4 100644
+--- a/drivers/net/ethernet/tundra/tsi108_eth.c
++++ b/drivers/net/ethernet/tundra/tsi108_eth.c
+@@ -1302,12 +1302,15 @@ static int tsi108_open(struct net_device *dev)
+
+ data->rxring = dma_alloc_coherent(&data->pdev->dev, rxring_size,
+ &data->rxdma, GFP_KERNEL);
+- if (!data->rxring)
++ if (!data->rxring) {
++ free_irq(data->irq_num, dev);
+ return -ENOMEM;
++ }
+
+ data->txring = dma_alloc_coherent(&data->pdev->dev, txring_size,
+ &data->txdma, GFP_KERNEL);
+ if (!data->txring) {
++ free_irq(data->irq_num, dev);
+ dma_free_coherent(&data->pdev->dev, rxring_size, data->rxring,
+ data->rxdma);
+ return -ENOMEM;
+diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
+index 463094ced104a..b4db50c9e7038 100644
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -1427,6 +1427,8 @@ static int temac_probe(struct platform_device *pdev)
+ lp->indirect_lock = devm_kmalloc(&pdev->dev,
+ sizeof(*lp->indirect_lock),
+ GFP_KERNEL);
++ if (!lp->indirect_lock)
++ return -ENOMEM;
+ spin_lock_init(lp->indirect_lock);
+ }
+
+@@ -1554,15 +1556,15 @@ static int temac_probe(struct platform_device *pdev)
+ }
+
+ /* Error handle returned DMA RX and TX interrupts */
+- if (lp->rx_irq < 0) {
+- if (lp->rx_irq != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "could not get DMA RX irq\n");
+- return lp->rx_irq;
++ if (lp->rx_irq <= 0) {
++ rc = lp->rx_irq ?: -EINVAL;
++ return dev_err_probe(&pdev->dev, rc,
++ "could not get DMA RX irq\n");
+ }
+- if (lp->tx_irq < 0) {
+- if (lp->tx_irq != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "could not get DMA TX irq\n");
+- return lp->tx_irq;
++ if (lp->tx_irq <= 0) {
++ rc = lp->tx_irq ?: -EINVAL;
++ return dev_err_probe(&pdev->dev, rc,
++ "could not get DMA TX irq\n");
+ }
+
+ if (temac_np) {
+diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+index 871b5ec3183d6..e7f6c29b8dd82 100644
+--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+@@ -41,8 +41,9 @@
+ #include "xilinx_axienet.h"
+
+ /* Descriptors defines for Tx and Rx DMA */
+-#define TX_BD_NUM_DEFAULT 64
++#define TX_BD_NUM_DEFAULT 128
+ #define RX_BD_NUM_DEFAULT 1024
++#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1)
+ #define TX_BD_NUM_MAX 4096
+ #define RX_BD_NUM_MAX 4096
+
+@@ -496,7 +497,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options)
+
+ static int __axienet_device_reset(struct axienet_local *lp)
+ {
+- u32 timeout;
++ u32 value;
++ int ret;
+
+ /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
+ * process of Axi DMA takes a while to complete as all pending
+@@ -506,15 +508,23 @@ static int __axienet_device_reset(struct axienet_local *lp)
+ * they both reset the entire DMA core, so only one needs to be used.
+ */
+ axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK);
+- timeout = DELAY_OF_ONE_MILLISEC;
+- while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) &
+- XAXIDMA_CR_RESET_MASK) {
+- udelay(1);
+- if (--timeout == 0) {
+- netdev_err(lp->ndev, "%s: DMA reset timeout!\n",
+- __func__);
+- return -ETIMEDOUT;
+- }
++ ret = read_poll_timeout(axienet_dma_in32, value,
++ !(value & XAXIDMA_CR_RESET_MASK),
++ DELAY_OF_ONE_MILLISEC, 50000, false, lp,
++ XAXIDMA_TX_CR_OFFSET);
++ if (ret) {
++ dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__);
++ return ret;
++ }
++
++ /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */
++ ret = read_poll_timeout(axienet_ior, value,
++ value & XAE_INT_PHYRSTCMPLT_MASK,
++ DELAY_OF_ONE_MILLISEC, 50000, false, lp,
++ XAE_IS_OFFSET);
++ if (ret) {
++ dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__);
++ return ret;
+ }
+
+ return 0;
+@@ -623,6 +633,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
+ if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK))
+ break;
+
++ /* Ensure we see complete descriptor update */
++ dma_rmb();
+ phys = desc_get_phys_addr(lp, cur_p);
+ dma_unmap_single(ndev->dev.parent, phys,
+ (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
+@@ -631,13 +643,15 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
+ if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
+ dev_consume_skb_irq(cur_p->skb);
+
+- cur_p->cntrl = 0;
+ cur_p->app0 = 0;
+ cur_p->app1 = 0;
+ cur_p->app2 = 0;
+ cur_p->app4 = 0;
+- cur_p->status = 0;
+ cur_p->skb = NULL;
++ /* ensure our transmit path and device don't prematurely see status cleared */
++ wmb();
++ cur_p->cntrl = 0;
++ cur_p->status = 0;
+
+ if (sizep)
+ *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
+@@ -646,6 +660,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
+ return i;
+ }
+
++/**
++ * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
++ * @lp: Pointer to the axienet_local structure
++ * @num_frag: The number of BDs to check for
++ *
++ * Return: 0, on success
++ * NETDEV_TX_BUSY, if any of the descriptors are not free
++ *
++ * This function is invoked before BDs are allocated and transmission starts.
++ * This function returns 0 if a BD or group of BDs can be allocated for
++ * transmission. If the BD or any of the BDs are not free the function
++ * returns a busy status. This is invoked from axienet_start_xmit.
++ */
++static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
++ int num_frag)
++{
++ struct axidma_bd *cur_p;
++
++ /* Ensure we see all descriptor updates from device or TX IRQ path */
++ rmb();
++ cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
++ if (cur_p->cntrl)
++ return NETDEV_TX_BUSY;
++ return 0;
++}
++
+ /**
+ * axienet_start_xmit_done - Invoked once a transmit is completed by the
+ * Axi DMA Tx channel.
+@@ -675,30 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev)
+ /* Matches barrier in axienet_start_xmit */
+ smp_mb();
+
+- netif_wake_queue(ndev);
+-}
+-
+-/**
+- * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
+- * @lp: Pointer to the axienet_local structure
+- * @num_frag: The number of BDs to check for
+- *
+- * Return: 0, on success
+- * NETDEV_TX_BUSY, if any of the descriptors are not free
+- *
+- * This function is invoked before BDs are allocated and transmission starts.
+- * This function returns 0 if a BD or group of BDs can be allocated for
+- * transmission. If the BD or any of the BDs are not free the function
+- * returns a busy status. This is invoked from axienet_start_xmit.
+- */
+-static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
+- int num_frag)
+-{
+- struct axidma_bd *cur_p;
+- cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
+- if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
+- return NETDEV_TX_BUSY;
+- return 0;
++ if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
++ netif_wake_queue(ndev);
+ }
+
+ /**
+@@ -730,20 +748,15 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ num_frag = skb_shinfo(skb)->nr_frags;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+
+- if (axienet_check_tx_bd_space(lp, num_frag)) {
+- if (netif_queue_stopped(ndev))
+- return NETDEV_TX_BUSY;
+-
++ if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
++ /* Should not happen as last start_xmit call should have
++ * checked for sufficient space and queue should only be
++ * woken when sufficient space is available.
++ */
+ netif_stop_queue(ndev);
+-
+- /* Matches barrier in axienet_start_xmit_done */
+- smp_mb();
+-
+- /* Space might have just been freed - check again */
+- if (axienet_check_tx_bd_space(lp, num_frag))
+- return NETDEV_TX_BUSY;
+-
+- netif_wake_queue(ndev);
++ if (net_ratelimit())
++ netdev_warn(ndev, "TX ring unexpectedly full\n");
++ return NETDEV_TX_BUSY;
+ }
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+@@ -804,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ if (++lp->tx_bd_tail >= lp->tx_bd_num)
+ lp->tx_bd_tail = 0;
+
++ /* Stop queue if next transmit may not have space */
++ if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
++ netif_stop_queue(ndev);
++
++ /* Matches barrier in axienet_start_xmit_done */
++ smp_mb();
++
++ /* Space might have just been freed - check again */
++ if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
++ netif_wake_queue(ndev);
++ }
++
+ return NETDEV_TX_OK;
+ }
+
+@@ -832,44 +857,53 @@ static void axienet_recv(struct net_device *ndev)
+ while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
+ dma_addr_t phys;
+
+- tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
+-
+- phys = desc_get_phys_addr(lp, cur_p);
+- dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size,
+- DMA_FROM_DEVICE);
++ /* Ensure we see complete descriptor update */
++ dma_rmb();
+
+ skb = cur_p->skb;
+ cur_p->skb = NULL;
+- length = cur_p->app4 & 0x0000FFFF;
+-
+- skb_put(skb, length);
+- skb->protocol = eth_type_trans(skb, ndev);
+- /*skb_checksum_none_assert(skb);*/
+- skb->ip_summed = CHECKSUM_NONE;
+-
+- /* if we're doing Rx csum offload, set it up */
+- if (lp->features & XAE_FEATURE_FULL_RX_CSUM) {
+- csumstatus = (cur_p->app2 &
+- XAE_FULL_CSUM_STATUS_MASK) >> 3;
+- if ((csumstatus == XAE_IP_TCP_CSUM_VALIDATED) ||
+- (csumstatus == XAE_IP_UDP_CSUM_VALIDATED)) {
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++ /* skb could be NULL if a previous pass already received the
++ * packet for this slot in the ring, but failed to refill it
++ * with a newly allocated buffer. In this case, don't try to
++ * receive it again.
++ */
++ if (likely(skb)) {
++ length = cur_p->app4 & 0x0000FFFF;
++
++ phys = desc_get_phys_addr(lp, cur_p);
++ dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size,
++ DMA_FROM_DEVICE);
++
++ skb_put(skb, length);
++ skb->protocol = eth_type_trans(skb, ndev);
++ /*skb_checksum_none_assert(skb);*/
++ skb->ip_summed = CHECKSUM_NONE;
++
++ /* if we're doing Rx csum offload, set it up */
++ if (lp->features & XAE_FEATURE_FULL_RX_CSUM) {
++ csumstatus = (cur_p->app2 &
++ XAE_FULL_CSUM_STATUS_MASK) >> 3;
++ if (csumstatus == XAE_IP_TCP_CSUM_VALIDATED ||
++ csumstatus == XAE_IP_UDP_CSUM_VALIDATED) {
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ }
++ } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 &&
++ skb->protocol == htons(ETH_P_IP) &&
++ skb->len > 64) {
++ skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF);
++ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
+- } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 &&
+- skb->protocol == htons(ETH_P_IP) &&
+- skb->len > 64) {
+- skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF);
+- skb->ip_summed = CHECKSUM_COMPLETE;
+- }
+
+- netif_rx(skb);
++ netif_rx(skb);
+
+- size += length;
+- packets++;
++ size += length;
++ packets++;
++ }
+
+ new_skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size);
+ if (!new_skb)
+- return;
++ break;
+
+ phys = dma_map_single(ndev->dev.parent, new_skb->data,
+ lp->max_frm_size,
+@@ -878,7 +912,7 @@ static void axienet_recv(struct net_device *ndev)
+ if (net_ratelimit())
+ netdev_err(ndev, "RX DMA mapping error\n");
+ dev_kfree_skb(new_skb);
+- return;
++ break;
+ }
+ desc_set_phys_addr(lp, phys, cur_p);
+
+@@ -886,6 +920,11 @@ static void axienet_recv(struct net_device *ndev)
+ cur_p->status = 0;
+ cur_p->skb = new_skb;
+
++ /* Only update tail_p to mark this slot as usable after it has
++ * been successfully refilled.
++ */
++ tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
++
+ if (++lp->rx_bd_ci >= lp->rx_bd_num)
+ lp->rx_bd_ci = 0;
+ cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
+@@ -1346,7 +1385,8 @@ static int axienet_ethtools_set_ringparam(struct net_device *ndev,
+ if (ering->rx_pending > RX_BD_NUM_MAX ||
+ ering->rx_mini_pending ||
+ ering->rx_jumbo_pending ||
+- ering->rx_pending > TX_BD_NUM_MAX)
++ ering->tx_pending < TX_BD_NUM_MIN ||
++ ering->tx_pending > TX_BD_NUM_MAX)
+ return -EINVAL;
+
+ if (netif_running(ndev))
+@@ -2035,6 +2075,11 @@ static int axienet_probe(struct platform_device *pdev)
+ goto cleanup_clk;
+ }
+
++ /* Reset core now that clocks are enabled, prior to accessing MDIO */
++ ret = __axienet_device_reset(lp);
++ if (ret)
++ goto cleanup_clk;
++
+ /* Autodetect the need for 64-bit DMA pointers.
+ * When the IP is configured for a bus width bigger than 32 bits,
+ * writing the MSB registers is mandatory, even if they are all 0.
+@@ -2082,15 +2127,14 @@ static int axienet_probe(struct platform_device *pdev)
+ lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
+ lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
+
+- lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+- if (lp->phy_node) {
+- ret = axienet_mdio_setup(lp);
+- if (ret)
+- dev_warn(&pdev->dev,
+- "error registering MDIO bus: %d\n", ret);
+- }
++ ret = axienet_mdio_setup(lp);
++ if (ret)
++ dev_warn(&pdev->dev,
++ "error registering MDIO bus: %d\n", ret);
++
+ if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII ||
+ lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) {
++ lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+ if (!lp->phy_node) {
+ dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n");
+ ret = -EINVAL;
+diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+index b780aad3550aa..093c75da38c47 100644
+--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
++++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+@@ -543,7 +543,7 @@ static void xemaclite_tx_timeout(struct net_device *dev, unsigned int txqueue)
+ xemaclite_enable_interrupts(lp);
+
+ if (lp->deferred_skb) {
+- dev_kfree_skb(lp->deferred_skb);
++ dev_kfree_skb_irq(lp->deferred_skb);
+ lp->deferred_skb = NULL;
+ dev->stats.tx_errors++;
+ }
+@@ -822,10 +822,10 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg,
+ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
+ {
+ struct mii_bus *bus;
+- int rc;
+ struct resource res;
+ struct device_node *np = of_get_parent(lp->phy_node);
+ struct device_node *npp;
++ int rc, ret;
+
+ /* Don't register the MDIO bus if the phy_node or its parent node
+ * can't be found.
+@@ -835,8 +835,14 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
+ return -ENODEV;
+ }
+ npp = of_get_parent(np);
+-
+- of_address_to_resource(npp, 0, &res);
++ ret = of_address_to_resource(npp, 0, &res);
++ of_node_put(npp);
++ if (ret) {
++ dev_err(dev, "%s resource error!\n",
++ dev->of_node->full_name);
++ of_node_put(np);
++ return ret;
++ }
+ if (lp->ndev->mem_start != res.start) {
+ struct phy_device *phydev;
+ phydev = of_phy_find_device(lp->phy_node);
+@@ -845,6 +851,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
+ "MDIO of the phy is not registered yet\n");
+ else
+ put_device(&phydev->mdio.dev);
++ of_node_put(np);
+ return 0;
+ }
+
+@@ -857,6 +864,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
+ bus = mdiobus_alloc();
+ if (!bus) {
+ dev_err(dev, "Failed to allocate mdiobus\n");
++ of_node_put(np);
+ return -ENOMEM;
+ }
+
+@@ -869,6 +877,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
+ bus->parent = dev;
+
+ rc = of_mdiobus_register(bus, np);
++ of_node_put(np);
+ if (rc) {
+ dev_err(dev, "Failed to register mdio bus.\n");
+ goto err_register;
+@@ -925,8 +934,6 @@ static int xemaclite_open(struct net_device *dev)
+ xemaclite_disable_interrupts(lp);
+
+ if (lp->phy_node) {
+- u32 bmcr;
+-
+ lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node,
+ xemaclite_adjust_link, 0,
+ PHY_INTERFACE_MODE_MII);
+@@ -937,19 +944,6 @@ static int xemaclite_open(struct net_device *dev)
+
+ /* EmacLite doesn't support giga-bit speeds */
+ phy_set_max_speed(lp->phy_dev, SPEED_100);
+-
+- /* Don't advertise 1000BASE-T Full/Half duplex speeds */
+- phy_write(lp->phy_dev, MII_CTRL1000, 0);
+-
+- /* Advertise only 10 and 100mbps full/half duplex speeds */
+- phy_write(lp->phy_dev, MII_ADVERTISE, ADVERTISE_ALL |
+- ADVERTISE_CSMA);
+-
+- /* Restart auto negotiation */
+- bmcr = phy_read(lp->phy_dev, MII_BMCR);
+- bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+- phy_write(lp->phy_dev, MII_BMCR, bmcr);
+-
+ phy_start(lp->phy_dev);
+ }
+
+@@ -1157,7 +1151,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
+ lp->tx_ping_pong = get_bool(ofdev, "xlnx,tx-ping-pong");
+ lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong");
+
+- rc = of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr);
++ rc = of_get_ethdev_address(ofdev->dev.of_node, ndev);
+ if (rc) {
+ dev_warn(dev, "No MAC address found, using random\n");
+ eth_hw_addr_random(ndev);
+@@ -1185,7 +1179,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
+ if (rc) {
+ dev_err(dev,
+ "Cannot register network device, aborting\n");
+- goto error;
++ goto put_node;
+ }
+
+ dev_info(dev,
+@@ -1193,6 +1187,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
+ (unsigned long __force)ndev->mem_start, lp->base_addr, ndev->irq);
+ return 0;
+
++put_node:
++ of_node_put(lp->phy_node);
+ error:
+ free_netdev(ndev);
+ return rc;
+diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
+index ae611e46da6af..f8bbd1489af15 100644
+--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
++++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
+@@ -503,6 +503,11 @@ static void
+ xirc2ps_detach(struct pcmcia_device *link)
+ {
+ struct net_device *dev = link->priv;
++ struct local_info *local = netdev_priv(dev);
++
++ netif_carrier_off(dev);
++ netif_tx_disable(dev);
++ cancel_work_sync(&local->tx_timeout_task);
+
+ dev_dbg(&link->dev, "detach\n");
+
+diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c
+index 39234852e01b0..20f6aa508003b 100644
+--- a/drivers/net/ethernet/xscale/ptp_ixp46x.c
++++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
+@@ -272,7 +272,7 @@ static int ptp_ixp_probe(struct platform_device *pdev)
+ ixp_clock.master_irq = platform_get_irq(pdev, 0);
+ ixp_clock.slave_irq = platform_get_irq(pdev, 1);
+ if (IS_ERR(ixp_clock.regs) ||
+- !ixp_clock.master_irq || !ixp_clock.slave_irq)
++ ixp_clock.master_irq < 0 || ixp_clock.slave_irq < 0)
+ return -ENXIO;
+
+ ixp_clock.caps = ptp_ixp_caps;
+diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
+index 6d1e3f49a3d3d..ebf502290e5f3 100644
+--- a/drivers/net/fddi/defxx.c
++++ b/drivers/net/fddi/defxx.c
+@@ -3831,10 +3831,24 @@ static int dfx_init(void)
+ int status;
+
+ status = pci_register_driver(&dfx_pci_driver);
+- if (!status)
+- status = eisa_driver_register(&dfx_eisa_driver);
+- if (!status)
+- status = tc_register_driver(&dfx_tc_driver);
++ if (status)
++ goto err_pci_register;
++
++ status = eisa_driver_register(&dfx_eisa_driver);
++ if (status)
++ goto err_eisa_register;
++
++ status = tc_register_driver(&dfx_tc_driver);
++ if (status)
++ goto err_tc_register;
++
++ return 0;
++
++err_tc_register:
++ eisa_driver_unregister(&dfx_eisa_driver);
++err_eisa_register:
++ pci_unregister_driver(&dfx_pci_driver);
++err_pci_register:
+ return status;
+ }
+
+diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
+index 185c8a3986816..1d1808afd5295 100644
+--- a/drivers/net/fjes/fjes_main.c
++++ b/drivers/net/fjes/fjes_main.c
+@@ -1261,6 +1261,11 @@ static int fjes_probe(struct platform_device *plat_dev)
+ hw->hw_res.start = res->start;
+ hw->hw_res.size = resource_size(res);
+ hw->hw_res.irq = platform_get_irq(plat_dev, 0);
++ if (hw->hw_res.irq < 0) {
++ err = hw->hw_res.irq;
++ goto err_free_control_wq;
++ }
++
+ err = fjes_hw_init(&adapter->hw);
+ if (err)
+ goto err_free_control_wq;
+diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
+index 1ab94b5f9bbf4..605332f36d9df 100644
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -774,7 +774,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
+ struct geneve_sock *gs4,
+ struct flowi4 *fl4,
+ const struct ip_tunnel_info *info,
+- __be16 dport, __be16 sport)
++ __be16 dport, __be16 sport,
++ __u8 *full_tos)
+ {
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ struct geneve_dev *geneve = netdev_priv(dev);
+@@ -799,6 +800,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
+ use_cache = false;
+ }
+ fl4->flowi4_tos = RT_TOS(tos);
++ if (full_tos)
++ *full_tos = tos;
+
+ dst_cache = (struct dst_cache *)&info->dst_cache;
+ if (use_cache) {
+@@ -852,8 +855,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
+ use_cache = false;
+ }
+
+- fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
+- info->key.label);
++ fl6->flowlabel = ip6_make_flowinfo(prio, info->key.label);
+ dst_cache = (struct dst_cache *)&info->dst_cache;
+ if (use_cache) {
+ dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
+@@ -887,6 +889,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
+ const struct ip_tunnel_key *key = &info->key;
+ struct rtable *rt;
+ struct flowi4 fl4;
++ __u8 full_tos;
+ __u8 tos, ttl;
+ __be16 df = 0;
+ __be16 sport;
+@@ -897,7 +900,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
+
+ sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+ rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
+- geneve->cfg.info.key.tp_dst, sport);
++ geneve->cfg.info.key.tp_dst, sport, &full_tos);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+@@ -941,7 +944,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
+
+ df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+ } else {
+- tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
++ tos = ip_tunnel_ecn_encap(full_tos, ip_hdr(skb), skb);
+ if (geneve->cfg.ttl_inherit)
+ ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
+ else
+@@ -1123,7 +1126,7 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+ 1, USHRT_MAX, true);
+
+ rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
+- geneve->cfg.info.key.tp_dst, sport);
++ geneve->cfg.info.key.tp_dst, sport, NULL);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index 30e0a10595a16..a3878aef0ea4a 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -298,7 +298,9 @@ static void __gtp_encap_destroy(struct sock *sk)
+ gtp->sk1u = NULL;
+ udp_sk(sk)->encap_type = 0;
+ rcu_assign_sk_user_data(sk, NULL);
++ release_sock(sk);
+ sock_put(sk);
++ return;
+ }
+ release_sock(sk);
+ }
+diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
+index 6192244b304ab..36a9fbb704029 100644
+--- a/drivers/net/hamradio/6pack.c
++++ b/drivers/net/hamradio/6pack.c
+@@ -306,7 +306,6 @@ static void sp_setup(struct net_device *dev)
+ {
+ /* Finish setting up the DEVICE info. */
+ dev->netdev_ops = &sp_netdev_ops;
+- dev->needs_free_netdev = true;
+ dev->mtu = SIXP_MTU;
+ dev->hard_header_len = AX25_MAX_HEADER_LEN;
+ dev->header_ops = &ax25_header_ops;
+@@ -669,14 +668,16 @@ static void sixpack_close(struct tty_struct *tty)
+ */
+ netif_stop_queue(sp->dev);
+
++ unregister_netdev(sp->dev);
++
+ del_timer_sync(&sp->tx_t);
+ del_timer_sync(&sp->resync_t);
+
+- /* Free all 6pack frame buffers. */
++ /* Free all 6pack frame buffers after unreg. */
+ kfree(sp->rbuff);
+ kfree(sp->xbuff);
+
+- unregister_netdev(sp->dev);
++ free_netdev(sp->dev);
+ }
+
+ /* Perform I/O control on an active 6pack channel. */
+diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
+index 6b6f28d5b8d5d..f9d03f7b9101e 100644
+--- a/drivers/net/hamradio/baycom_epp.c
++++ b/drivers/net/hamradio/baycom_epp.c
+@@ -758,7 +758,7 @@ static void epp_bh(struct work_struct *work)
+ * ===================== network driver interface =========================
+ */
+
+-static int baycom_send_packet(struct sk_buff *skb, struct net_device *dev)
++static netdev_tx_t baycom_send_packet(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct baycom_state *bc = netdev_priv(dev);
+
+diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
+index d967b0748773d..027b04795421d 100644
+--- a/drivers/net/hamradio/bpqether.c
++++ b/drivers/net/hamradio/bpqether.c
+@@ -534,7 +534,7 @@ static int bpq_device_event(struct notifier_block *this,
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+- if (!dev_is_ethdev(dev))
++ if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
+index 8666110bec555..763d435a9564c 100644
+--- a/drivers/net/hamradio/mkiss.c
++++ b/drivers/net/hamradio/mkiss.c
+@@ -31,6 +31,8 @@
+
+ #define AX_MTU 236
+
++/* some arch define END as assembly function ending, just undef it */
++#undef END
+ /* SLIP/KISS protocol characters. */
+ #define END 0300 /* indicates end of frame */
+ #define ESC 0333 /* indicates byte stuffing */
+@@ -792,13 +794,14 @@ static void mkiss_close(struct tty_struct *tty)
+ */
+ netif_stop_queue(ax->dev);
+
+- /* Free all AX25 frame buffers. */
++ unregister_netdev(ax->dev);
++
++ /* Free all AX25 frame buffers after unreg. */
+ kfree(ax->rbuff);
+ kfree(ax->xbuff);
+
+ ax->tty = NULL;
+
+- unregister_netdev(ax->dev);
+ free_netdev(ax->dev);
+ }
+
+diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
+index e0bb131a33d76..39db3cae4dd1a 100644
+--- a/drivers/net/hamradio/scc.c
++++ b/drivers/net/hamradio/scc.c
+@@ -301,12 +301,12 @@ static inline void scc_discard_buffers(struct scc_channel *scc)
+ spin_lock_irqsave(&scc->lock, flags);
+ if (scc->tx_buff != NULL)
+ {
+- dev_kfree_skb(scc->tx_buff);
++ dev_kfree_skb_irq(scc->tx_buff);
+ scc->tx_buff = NULL;
+ }
+
+ while (!skb_queue_empty(&scc->tx_queue))
+- dev_kfree_skb(skb_dequeue(&scc->tx_queue));
++ dev_kfree_skb_irq(skb_dequeue(&scc->tx_queue));
+
+ spin_unlock_irqrestore(&scc->lock, flags);
+ }
+@@ -1668,7 +1668,7 @@ static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev)
+ if (skb_queue_len(&scc->tx_queue) > scc->dev->tx_queue_len) {
+ struct sk_buff *skb_del;
+ skb_del = skb_dequeue(&scc->tx_queue);
+- dev_kfree_skb(skb_del);
++ dev_kfree_skb_irq(skb_del);
+ }
+ skb_queue_tail(&scc->tx_queue, skb);
+ netif_trans_update(dev);
+diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
+index 6ddacbdb224ba..528d57a435394 100644
+--- a/drivers/net/hamradio/yam.c
++++ b/drivers/net/hamradio/yam.c
+@@ -950,9 +950,7 @@ static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __
+ ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs));
+ if (IS_ERR(ym))
+ return PTR_ERR(ym);
+- if (ym->cmd != SIOCYAMSMCS)
+- return -EINVAL;
+- if (ym->bitrate > YAM_MAXBITRATE) {
++ if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) {
+ kfree(ym);
+ return -EINVAL;
+ }
+diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
+index 7661dbb31162b..50e4bea46d673 100644
+--- a/drivers/net/hippi/rrunner.c
++++ b/drivers/net/hippi/rrunner.c
+@@ -1353,7 +1353,9 @@ static int rr_close(struct net_device *dev)
+
+ rrpriv->fw_running = 0;
+
++ spin_unlock_irqrestore(&rrpriv->lock, flags);
+ del_timer_sync(&rrpriv->timer);
++ spin_lock_irqsave(&rrpriv->lock, flags);
+
+ writel(0, &regs->TxPi);
+ writel(0, &regs->IpRxPi);
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index bc48855dff10b..aab6cb5163fea 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -1037,7 +1037,8 @@ struct net_device_context {
+ u32 vf_alloc;
+ /* Serial number of the VF to team with */
+ u32 vf_serial;
+-
++ /* completion variable to confirm vf association */
++ struct completion vf_add;
+ /* Is the current data path through the VF NIC? */
+ bool data_path_is_vf;
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 7bd9354128534..4156299e039d8 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -814,6 +814,7 @@ static void netvsc_send_completion(struct net_device *ndev,
+ u32 msglen = hv_pkt_datalen(desc);
+ struct nvsp_message *pkt_rqst;
+ u64 cmd_rqst;
++ u32 status;
+
+ /* First check if this is a VMBUS completion without data payload */
+ if (!msglen) {
+@@ -885,6 +886,23 @@ static void netvsc_send_completion(struct net_device *ndev,
+ break;
+
+ case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
++ if (msglen < sizeof(struct nvsp_message_header) +
++ sizeof(struct nvsp_1_message_send_rndis_packet_complete)) {
++ if (net_ratelimit())
++ netdev_err(ndev, "nvsp_rndis_pkt_complete length too small: %u\n",
++ msglen);
++ return;
++ }
++
++ /* If status indicates an error, output a message so we know
++ * there's a problem. But process the completion anyway so the
++ * resources are released.
++ */
++ status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status;
++ if (status != NVSP_STAT_SUCCESS && net_ratelimit())
++ netdev_err(ndev, "nvsp_rndis_pkt_complete error status: %x\n",
++ status);
++
+ netvsc_send_tx_complete(ndev, net_device, incoming_channel,
+ desc, budget);
+ break;
+@@ -1450,6 +1468,10 @@ static void netvsc_send_vf(struct net_device *ndev,
+
+ net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+ net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
++
++ if (net_device_ctx->vf_alloc)
++ complete(&net_device_ctx->vf_add);
++
+ netdev_info(ndev, "VF slot %u %s\n",
+ net_device_ctx->vf_serial,
+ net_device_ctx->vf_alloc ? "added" : "removed");
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 382bebc2420df..ec622e909a685 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1586,6 +1586,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
+ pcpu_sum = kvmalloc_array(num_possible_cpus(),
+ sizeof(struct netvsc_ethtool_pcpu_stats),
+ GFP_KERNEL);
++ if (!pcpu_sum)
++ return;
++
+ netvsc_get_pcpu_stats(dev, pcpu_sum);
+ for_each_present_cpu(cpu) {
+ struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
+@@ -2333,6 +2336,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
+
+ }
+
++ /* Fallback path to check synthetic vf with
++ * help of mac addr
++ */
++ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
++ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
++ if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) {
++ netdev_notice(vf_netdev,
++ "falling back to mac addr based matching\n");
++ return ndev;
++ }
++ }
++
+ netdev_notice(vf_netdev,
+ "no netdev found for vf serial:%u\n", serial);
+ return NULL;
+@@ -2429,6 +2444,11 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
+ if (net_device_ctx->data_path_is_vf == vf_is_up)
+ return NOTIFY_OK;
+
++ if (vf_is_up && !net_device_ctx->vf_alloc) {
++ netdev_info(ndev, "Waiting for the VF association from host\n");
++ wait_for_completion(&net_device_ctx->vf_add);
++ }
++
+ ret = netvsc_switch_datapath(ndev, vf_is_up);
+
+ if (ret) {
+@@ -2460,6 +2480,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
+
+ netvsc_vf_setxdp(vf_netdev, NULL);
+
++ reinit_completion(&net_device_ctx->vf_add);
+ netdev_rx_handler_unregister(vf_netdev);
+ netdev_upper_dev_unlink(vf_netdev, ndev);
+ RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
+@@ -2499,6 +2520,7 @@ static int netvsc_probe(struct hv_device *dev,
+
+ INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
+
++ init_completion(&net_device_ctx->vf_add);
+ spin_lock_init(&net_device_ctx->lock);
+ INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+ INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+@@ -2662,7 +2684,10 @@ static int netvsc_suspend(struct hv_device *dev)
+
+ /* Save the current config info */
+ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev);
+-
++ if (!ndev_ctx->saved_netvsc_dev_info) {
++ ret = -ENOMEM;
++ goto out;
++ }
+ ret = netvsc_detach(net, nvdev);
+ out:
+ rtnl_unlock();
+diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
+index 7db9cbd0f5ded..07adbeec19787 100644
+--- a/drivers/net/ieee802154/adf7242.c
++++ b/drivers/net/ieee802154/adf7242.c
+@@ -1310,10 +1310,11 @@ static int adf7242_remove(struct spi_device *spi)
+
+ debugfs_remove_recursive(lp->debugfs_root);
+
++ ieee802154_unregister_hw(lp->hw);
++
+ cancel_delayed_work_sync(&lp->work);
+ destroy_workqueue(lp->wqueue);
+
+- ieee802154_unregister_hw(lp->hw);
+ mutex_destroy(&lp->bmux);
+ ieee802154_free_hw(lp->hw);
+
+diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
+index 7d67f41387f55..4f5ef8a9a9a87 100644
+--- a/drivers/net/ieee802154/at86rf230.c
++++ b/drivers/net/ieee802154/at86rf230.c
+@@ -100,6 +100,7 @@ struct at86rf230_local {
+ unsigned long cal_timeout;
+ bool is_tx;
+ bool is_tx_from_off;
++ bool was_tx;
+ u8 tx_retry;
+ struct sk_buff *tx_skb;
+ struct at86rf230_state_change tx;
+@@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context)
+ if (ctx->free)
+ kfree(ctx);
+
+- ieee802154_wake_queue(lp->hw);
++ if (lp->was_tx) {
++ lp->was_tx = 0;
++ dev_kfree_skb_any(lp->tx_skb);
++ ieee802154_wake_queue(lp->hw);
++ }
+ }
+
+ static void
+@@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context)
+ struct at86rf230_state_change *ctx = context;
+ struct at86rf230_local *lp = ctx->lp;
+
+- lp->is_tx = 0;
++ if (lp->is_tx) {
++ lp->was_tx = 1;
++ lp->is_tx = 0;
++ }
++
+ at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON,
+ at86rf230_async_error_recover_complete);
+ }
+diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
+index 23ee0b14cbfa1..2f5e7b31032aa 100644
+--- a/drivers/net/ieee802154/atusb.c
++++ b/drivers/net/ieee802154/atusb.c
+@@ -93,7 +93,9 @@ static int atusb_control_msg(struct atusb *atusb, unsigned int pipe,
+
+ ret = usb_control_msg(usb_dev, pipe, request, requesttype,
+ value, index, data, size, timeout);
+- if (ret < 0) {
++ if (ret < size) {
++ ret = ret < 0 ? ret : -ENODATA;
++
+ atusb->err = ret;
+ dev_err(&usb_dev->dev,
+ "%s: req 0x%02x val 0x%x idx 0x%x, error %d\n",
+@@ -861,9 +863,9 @@ static int atusb_get_and_show_build(struct atusb *atusb)
+ if (!build)
+ return -ENOMEM;
+
+- ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0),
+- ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0,
+- build, ATUSB_BUILD_SIZE, 1000);
++ /* We cannot call atusb_control_msg() here, since this request may read various length data */
++ ret = usb_control_msg(atusb->usb_dev, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD,
++ ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000);
+ if (ret >= 0) {
+ build[ret] = 0;
+ dev_info(&usb_dev->dev, "Firmware: build %s\n", build);
+diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
+index 3a2824f24caa8..5834d3ed6dcf5 100644
+--- a/drivers/net/ieee802154/ca8210.c
++++ b/drivers/net/ieee802154/ca8210.c
+@@ -927,7 +927,7 @@ static int ca8210_spi_transfer(
+
+ dev_dbg(&spi->dev, "%s called\n", __func__);
+
+- cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC);
++ cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC);
+ if (!cas_ctl)
+ return -ENOMEM;
+
+@@ -1771,6 +1771,7 @@ static int ca8210_async_xmit_complete(
+ status
+ );
+ if (status != MAC_TRANSACTION_OVERFLOW) {
++ dev_kfree_skb_any(priv->tx_skb);
+ ieee802154_wake_queue(priv->hw);
+ return 0;
+ }
+@@ -1944,10 +1945,9 @@ static int ca8210_skb_tx(
+ struct ca8210_priv *priv
+ )
+ {
+- int status;
+ struct ieee802154_hdr header = { };
+ struct secspec secspec;
+- unsigned int mac_len;
++ int mac_len, status;
+
+ dev_dbg(&priv->spi->dev, "%s called\n", __func__);
+
+@@ -1955,6 +1955,8 @@ static int ca8210_skb_tx(
+ * packet
+ */
+ mac_len = ieee802154_hdr_peek_addrs(skb, &header);
++ if (mac_len < 0)
++ return mac_len;
+
+ secspec.security_level = header.sec.level;
+ secspec.key_id_mode = header.sec.key_id_mode;
+@@ -2976,8 +2978,8 @@ static void ca8210_hw_setup(struct ieee802154_hw *ca8210_hw)
+ ca8210_hw->phy->cca.opt = NL802154_CCA_OPT_ENERGY_CARRIER_AND;
+ ca8210_hw->phy->cca_ed_level = -9800;
+ ca8210_hw->phy->symbol_duration = 16;
+- ca8210_hw->phy->lifs_period = 40;
+- ca8210_hw->phy->sifs_period = 12;
++ ca8210_hw->phy->lifs_period = 40 * ca8210_hw->phy->symbol_duration;
++ ca8210_hw->phy->sifs_period = 12 * ca8210_hw->phy->symbol_duration;
+ ca8210_hw->flags =
+ IEEE802154_HW_AFILT |
+ IEEE802154_HW_OMIT_CKSUM |
+diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
+index 89c046b204e0c..a8369bfa4050b 100644
+--- a/drivers/net/ieee802154/cc2520.c
++++ b/drivers/net/ieee802154/cc2520.c
+@@ -504,6 +504,7 @@ cc2520_tx(struct ieee802154_hw *hw, struct sk_buff *skb)
+ goto err_tx;
+
+ if (status & CC2520_STATUS_TX_UNDERFLOW) {
++ rc = -EINVAL;
+ dev_err(&priv->spi->dev, "cc2520 tx underflow exception\n");
+ goto err_tx;
+ }
+@@ -969,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv)
+
+ if (timeout-- <= 0) {
+ dev_err(&priv->spi->dev, "oscillator start failed!\n");
+- return ret;
++ return -ETIMEDOUT;
+ }
+ udelay(1);
+ } while (!(status & CC2520_STATUS_XOSC32M_STABLE));
+diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
+index 8caa61ec718f5..1ab1ba41c4305 100644
+--- a/drivers/net/ieee802154/mac802154_hwsim.c
++++ b/drivers/net/ieee802154/mac802154_hwsim.c
+@@ -522,7 +522,7 @@ static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info)
+ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info)
+ {
+ struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1];
+- struct hwsim_edge_info *einfo;
++ struct hwsim_edge_info *einfo, *einfo_old;
+ struct hwsim_phy *phy_v0;
+ struct hwsim_edge *e;
+ u32 v0, v1;
+@@ -560,8 +560,10 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info)
+ list_for_each_entry_rcu(e, &phy_v0->edges, list) {
+ if (e->endpoint->idx == v1) {
+ einfo->lqi = lqi;
+- rcu_assign_pointer(e->info, einfo);
++ einfo_old = rcu_replace_pointer(e->info, einfo,
++ lockdep_is_held(&hwsim_phys_lock));
+ rcu_read_unlock();
++ kfree_rcu(einfo_old, rcu);
+ mutex_unlock(&hwsim_phys_lock);
+ return 0;
+ }
+@@ -786,6 +788,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev,
+ goto err_pib;
+ }
+
++ pib->channel = 13;
+ rcu_assign_pointer(phy->pib, pib);
+ phy->idx = idx;
+ INIT_LIST_HEAD(&phy->edges);
+diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
+index 8dc04e2590b18..383231b854642 100644
+--- a/drivers/net/ieee802154/mcr20a.c
++++ b/drivers/net/ieee802154/mcr20a.c
+@@ -976,8 +976,8 @@ static void mcr20a_hw_setup(struct mcr20a_local *lp)
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ phy->symbol_duration = 16;
+- phy->lifs_period = 40;
+- phy->sifs_period = 12;
++ phy->lifs_period = 40 * phy->symbol_duration;
++ phy->sifs_period = 12 * phy->symbol_duration;
+
+ hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+ IEEE802154_HW_AFILT |
+diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
+index e9258a9f3702c..31bc02421dd4e 100644
+--- a/drivers/net/ifb.c
++++ b/drivers/net/ifb.c
+@@ -76,7 +76,9 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
+
+ while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
+ skb->redirected = 0;
++#ifdef CONFIG_NET_CLS_ACT
+ skb->tc_skip_classify = 1;
++#endif
+
+ u64_stats_update_begin(&txp->tsync);
+ txp->tx_packets++;
+diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig
+index d037682fb7adb..6782c2cbf542f 100644
+--- a/drivers/net/ipa/Kconfig
++++ b/drivers/net/ipa/Kconfig
+@@ -2,7 +2,9 @@ config QCOM_IPA
+ tristate "Qualcomm IPA support"
+ depends on NET && QCOM_SMEM
+ depends on ARCH_QCOM || COMPILE_TEST
++ depends on INTERCONNECT
+ depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST)
++ depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n
+ select QCOM_MDT_LOADER if ARCH_QCOM
+ select QCOM_SCM
+ select QCOM_QMI_HELPERS
+diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
+index a2fcdb1abdb96..a734e5576729e 100644
+--- a/drivers/net/ipa/gsi.c
++++ b/drivers/net/ipa/gsi.c
+@@ -1370,9 +1370,10 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
+ struct gsi_event *event_done;
+ struct gsi_event *event;
+ struct gsi_trans *trans;
++ u32 trans_count = 0;
+ u32 byte_count = 0;
+- u32 old_index;
+ u32 event_avail;
++ u32 old_index;
+
+ trans_info = &channel->trans_info;
+
+@@ -1393,6 +1394,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
+ do {
+ trans->len = __le16_to_cpu(event->len);
+ byte_count += trans->len;
++ trans_count++;
+
+ /* Move on to the next event and transaction */
+ if (--event_avail)
+@@ -1404,7 +1406,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
+
+ /* We record RX bytes when they are received */
+ channel->byte_count += byte_count;
+- channel->trans_count++;
++ channel->trans_count += trans_count;
+ }
+
+ /* Initialize a ring, including allocating DMA memory for its entries */
+diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
+index 1544564bc2835..d709e69181959 100644
+--- a/drivers/net/ipa/gsi_trans.c
++++ b/drivers/net/ipa/gsi_trans.c
+@@ -155,7 +155,7 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
+ * gsi_trans_pool_exit_dma() can assume the total allocated
+ * size is exactly (count * size).
+ */
+- total_size = get_order(total_size) << PAGE_SHIFT;
++ total_size = PAGE_SIZE << get_order(total_size);
+
+ virt = dma_alloc_coherent(dev, total_size, &addr, GFP_KERNEL);
+ if (!virt)
+diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
+index cff51731195aa..d57472ea077f2 100644
+--- a/drivers/net/ipa/ipa_cmd.c
++++ b/drivers/net/ipa/ipa_cmd.c
+@@ -661,22 +661,6 @@ void ipa_cmd_pipeline_clear_wait(struct ipa *ipa)
+ wait_for_completion(&ipa->completion);
+ }
+
+-void ipa_cmd_pipeline_clear(struct ipa *ipa)
+-{
+- u32 count = ipa_cmd_pipeline_clear_count();
+- struct gsi_trans *trans;
+-
+- trans = ipa_cmd_trans_alloc(ipa, count);
+- if (trans) {
+- ipa_cmd_pipeline_clear_add(trans);
+- gsi_trans_commit_wait(trans);
+- ipa_cmd_pipeline_clear_wait(ipa);
+- } else {
+- dev_err(&ipa->pdev->dev,
+- "error allocating %u entry tag transaction\n", count);
+- }
+-}
+-
+ static struct ipa_cmd_info *
+ ipa_cmd_info_alloc(struct ipa_endpoint *endpoint, u32 tre_count)
+ {
+diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h
+index 69cd085d427db..05ed7e42e1842 100644
+--- a/drivers/net/ipa/ipa_cmd.h
++++ b/drivers/net/ipa/ipa_cmd.h
+@@ -163,12 +163,6 @@ u32 ipa_cmd_pipeline_clear_count(void);
+ */
+ void ipa_cmd_pipeline_clear_wait(struct ipa *ipa);
+
+-/**
+- * ipa_cmd_pipeline_clear() - Clear the hardware pipeline
+- * @ipa: - IPA pointer
+- */
+-void ipa_cmd_pipeline_clear(struct ipa *ipa);
+-
+ /**
+ * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint
+ * @ipa: IPA pointer
+diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
+index 5528d97110d56..06a791d45f94b 100644
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -570,19 +570,23 @@ static void ipa_endpoint_init_hdr_ext(struct ipa_endpoint *endpoint)
+ struct ipa *ipa = endpoint->ipa;
+ u32 val = 0;
+
+- val |= HDR_ENDIANNESS_FMASK; /* big endian */
+-
+- /* A QMAP header contains a 6 bit pad field at offset 0. The RMNet
+- * driver assumes this field is meaningful in packets it receives,
+- * and assumes the header's payload length includes that padding.
+- * The RMNet driver does *not* pad packets it sends, however, so
+- * the pad field (although 0) should be ignored.
+- */
+- if (endpoint->data->qmap && !endpoint->toward_ipa) {
+- val |= HDR_TOTAL_LEN_OR_PAD_VALID_FMASK;
+- /* HDR_TOTAL_LEN_OR_PAD is 0 (pad, not total_len) */
+- val |= HDR_PAYLOAD_LEN_INC_PADDING_FMASK;
+- /* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0 */
++ if (endpoint->data->qmap) {
++ /* We have a header, so we must specify its endianness */
++ val |= HDR_ENDIANNESS_FMASK; /* big endian */
++
++ /* A QMAP header contains a 6 bit pad field at offset 0.
++ * The RMNet driver assumes this field is meaningful in
++ * packets it receives, and assumes the header's payload
++ * length includes that padding. The RMNet driver does
++ * *not* pad packets it sends, however, so the pad field
++ * (although 0) should be ignored.
++ */
++ if (!endpoint->toward_ipa) {
++ val |= HDR_TOTAL_LEN_OR_PAD_VALID_FMASK;
++ /* HDR_TOTAL_LEN_OR_PAD is 0 (pad, not total_len) */
++ val |= HDR_PAYLOAD_LEN_INC_PADDING_FMASK;
++ /* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0 */
++ }
+ }
+
+ /* HDR_PAYLOAD_LEN_INC_PADDING is 0 */
+@@ -722,13 +726,15 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint)
+
+ if (endpoint->data->aggregation) {
+ if (!endpoint->toward_ipa) {
++ u32 buffer_size;
+ bool close_eof;
+ u32 limit;
+
+ val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK);
+ val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK);
+
+- limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE);
++ buffer_size = IPA_RX_BUFFER_SIZE - NET_SKB_PAD;
++ limit = ipa_aggr_size_kb(buffer_size);
+ val |= aggr_byte_limit_encoded(version, limit);
+
+ limit = IPA_AGGR_TIME_LIMIT;
+@@ -738,8 +744,6 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint)
+
+ close_eof = endpoint->data->rx.aggr_close_eof;
+ val |= aggr_sw_eof_active_encoded(version, close_eof);
+-
+- /* AGGR_HARD_BYTE_LIMIT_ENABLE is 0 */
+ } else {
+ val |= u32_encode_bits(IPA_ENABLE_DEAGGR,
+ AGGR_EN_FMASK);
+@@ -853,6 +857,7 @@ static void ipa_endpoint_init_hol_block_timer(struct ipa_endpoint *endpoint,
+ u32 offset;
+ u32 val;
+
++ /* This should only be changed when HOL_BLOCK_EN is disabled */
+ offset = IPA_REG_ENDP_INIT_HOL_BLOCK_TIMER_N_OFFSET(endpoint_id);
+ val = hol_block_timer_val(ipa, microseconds);
+ iowrite32(val, ipa->reg_virt + offset);
+@@ -868,6 +873,9 @@ ipa_endpoint_init_hol_block_enable(struct ipa_endpoint *endpoint, bool enable)
+ val = enable ? HOL_BLOCK_EN_FMASK : 0;
+ offset = IPA_REG_ENDP_INIT_HOL_BLOCK_EN_N_OFFSET(endpoint_id);
+ iowrite32(val, endpoint->ipa->reg_virt + offset);
++ /* When enabling, the register must be written twice for IPA v4.5+ */
++ if (enable && endpoint->ipa->version >= IPA_VERSION_4_5)
++ iowrite32(val, endpoint->ipa->reg_virt + offset);
+ }
+
+ void ipa_endpoint_modem_hol_block_clear_all(struct ipa *ipa)
+@@ -880,6 +888,7 @@ void ipa_endpoint_modem_hol_block_clear_all(struct ipa *ipa)
+ if (endpoint->toward_ipa || endpoint->ee_id != GSI_EE_MODEM)
+ continue;
+
++ ipa_endpoint_init_hol_block_enable(endpoint, false);
+ ipa_endpoint_init_hol_block_timer(endpoint, 0);
+ ipa_endpoint_init_hol_block_enable(endpoint, true);
+ }
+@@ -1040,7 +1049,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint)
+ err_trans_free:
+ gsi_trans_free(trans);
+ err_free_pages:
+- __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++ put_page(page);
+
+ return -ENOMEM;
+ }
+@@ -1062,27 +1071,38 @@ static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one)
+ {
+ struct gsi *gsi;
+ u32 backlog;
++ int delta;
+
+- if (!endpoint->replenish_enabled) {
++ if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
+ if (add_one)
+ atomic_inc(&endpoint->replenish_saved);
+ return;
+ }
+
++ /* If already active, just update the backlog */
++ if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
++ if (add_one)
++ atomic_inc(&endpoint->replenish_backlog);
++ return;
++ }
++
+ while (atomic_dec_not_zero(&endpoint->replenish_backlog))
+ if (ipa_endpoint_replenish_one(endpoint))
+ goto try_again_later;
++
++ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
++
+ if (add_one)
+ atomic_inc(&endpoint->replenish_backlog);
+
+ return;
+
+ try_again_later:
+- /* The last one didn't succeed, so fix the backlog */
+- backlog = atomic_inc_return(&endpoint->replenish_backlog);
++ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+
+- if (add_one)
+- atomic_inc(&endpoint->replenish_backlog);
++ /* The last one didn't succeed, so fix the backlog */
++ delta = add_one ? 2 : 1;
++ backlog = atomic_add_return(delta, &endpoint->replenish_backlog);
+
+ /* Whenever a receive buffer transaction completes we'll try to
+ * replenish again. It's unlikely, but if we fail to supply even
+@@ -1102,7 +1122,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint)
+ u32 max_backlog;
+ u32 saved;
+
+- endpoint->replenish_enabled = true;
++ set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+ while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
+ atomic_add(saved, &endpoint->replenish_backlog);
+
+@@ -1116,7 +1136,7 @@ static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint)
+ {
+ u32 backlog;
+
+- endpoint->replenish_enabled = false;
++ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+ while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
+ atomic_add(backlog, &endpoint->replenish_saved);
+ }
+@@ -1369,7 +1389,7 @@ void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint,
+ struct page *page = trans->data;
+
+ if (page)
+- __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++ put_page(page);
+ }
+ }
+
+@@ -1631,8 +1651,6 @@ void ipa_endpoint_suspend(struct ipa *ipa)
+ if (ipa->modem_netdev)
+ ipa_modem_suspend(ipa->modem_netdev);
+
+- ipa_cmd_pipeline_clear(ipa);
+-
+ ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);
+ ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
+ }
+@@ -1663,7 +1681,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint)
+ /* RX transactions require a single TRE, so the maximum
+ * backlog is the same as the maximum outstanding TREs.
+ */
+- endpoint->replenish_enabled = false;
++ clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
++ clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+ atomic_set(&endpoint->replenish_saved,
+ gsi_channel_tre_max(gsi, endpoint->channel_id));
+ atomic_set(&endpoint->replenish_backlog, 0);
+diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
+index 0a859d10312dc..0313cdc607de3 100644
+--- a/drivers/net/ipa/ipa_endpoint.h
++++ b/drivers/net/ipa/ipa_endpoint.h
+@@ -40,6 +40,19 @@ enum ipa_endpoint_name {
+
+ #define IPA_ENDPOINT_MAX 32 /* Max supported by driver */
+
++/**
++ * enum ipa_replenish_flag: RX buffer replenish flags
++ *
++ * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled
++ * @IPA_REPLENISH_ACTIVE: Whether replenishing is underway
++ * @IPA_REPLENISH_COUNT: Number of defined replenish flags
++ */
++enum ipa_replenish_flag {
++ IPA_REPLENISH_ENABLED,
++ IPA_REPLENISH_ACTIVE,
++ IPA_REPLENISH_COUNT, /* Number of flags (must be last) */
++};
++
+ /**
+ * struct ipa_endpoint - IPA endpoint information
+ * @ipa: IPA pointer
+@@ -51,7 +64,7 @@ enum ipa_endpoint_name {
+ * @trans_tre_max: Maximum number of TRE descriptors per transaction
+ * @evt_ring_id: GSI event ring used by the endpoint
+ * @netdev: Network device pointer, if endpoint uses one
+- * @replenish_enabled: Whether receive buffer replenishing is enabled
++ * @replenish_flags: Replenishing state flags
+ * @replenish_ready: Number of replenish transactions without doorbell
+ * @replenish_saved: Replenish requests held while disabled
+ * @replenish_backlog: Number of buffers needed to fill hardware queue
+@@ -72,7 +85,7 @@ struct ipa_endpoint {
+ struct net_device *netdev;
+
+ /* Receive buffer replenishing for RX endpoints */
+- bool replenish_enabled;
++ DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
+ u32 replenish_ready;
+ atomic_t replenish_saved;
+ atomic_t replenish_backlog;
+diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c
+index b35170a93b0fa..0c9ff8c055a05 100644
+--- a/drivers/net/ipa/ipa_interrupt.c
++++ b/drivers/net/ipa/ipa_interrupt.c
+@@ -122,6 +122,16 @@ out_power_put:
+ return IRQ_HANDLED;
+ }
+
++void ipa_interrupt_irq_disable(struct ipa *ipa)
++{
++ disable_irq(ipa->interrupt->irq);
++}
++
++void ipa_interrupt_irq_enable(struct ipa *ipa)
++{
++ enable_irq(ipa->interrupt->irq);
++}
++
+ /* Common function used to enable/disable TX_SUSPEND for an endpoint */
+ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt,
+ u32 endpoint_id, bool enable)
+diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h
+index 231390cea52a2..16aa84ee0094f 100644
+--- a/drivers/net/ipa/ipa_interrupt.h
++++ b/drivers/net/ipa/ipa_interrupt.h
+@@ -85,6 +85,22 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt);
+ */
+ void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt);
+
++/**
++ * ipa_interrupt_irq_enable() - Enable IPA interrupts
++ * @ipa: IPA pointer
++ *
++ * This enables the IPA interrupt line
++ */
++void ipa_interrupt_irq_enable(struct ipa *ipa);
++
++/**
++ * ipa_interrupt_irq_disable() - Disable IPA interrupts
++ * @ipa: IPA pointer
++ *
++ * This disables the IPA interrupt line
++ */
++void ipa_interrupt_irq_disable(struct ipa *ipa);
++
+ /**
+ * ipa_interrupt_config() - Configure the IPA interrupt framework
+ * @ipa: IPA pointer
+diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
+index cdfa98a76e1f4..a448ec198bee1 100644
+--- a/drivers/net/ipa/ipa_main.c
++++ b/drivers/net/ipa/ipa_main.c
+@@ -28,6 +28,7 @@
+ #include "ipa_reg.h"
+ #include "ipa_mem.h"
+ #include "ipa_table.h"
++#include "ipa_smp2p.h"
+ #include "ipa_modem.h"
+ #include "ipa_uc.h"
+ #include "ipa_interrupt.h"
+@@ -801,6 +802,11 @@ static int ipa_remove(struct platform_device *pdev)
+ struct device *dev = &pdev->dev;
+ int ret;
+
++ /* Prevent the modem from triggering a call to ipa_setup(). This
++ * also ensures a modem-initiated setup that's underway completes.
++ */
++ ipa_smp2p_irq_disable_setup(ipa);
++
+ ret = pm_runtime_get_sync(dev);
+ if (WARN_ON(ret < 0))
+ goto out_power_put;
+diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
+index 4337b0920d3d7..cad0798985a13 100644
+--- a/drivers/net/ipa/ipa_mem.c
++++ b/drivers/net/ipa/ipa_mem.c
+@@ -570,7 +570,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
+ }
+
+ /* Align the address down and the size up to a page boundary */
+- addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK;
++ addr = qcom_smem_virt_to_phys(virt);
+ phys = addr & PAGE_MASK;
+ size = PAGE_ALIGN(size + addr - phys);
+ iova = phys; /* We just want a direct mapping */
+diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c
+index ad116bcc0580e..d0ab4d70c303b 100644
+--- a/drivers/net/ipa/ipa_modem.c
++++ b/drivers/net/ipa/ipa_modem.c
+@@ -339,9 +339,6 @@ int ipa_modem_stop(struct ipa *ipa)
+ if (state != IPA_MODEM_STATE_RUNNING)
+ return -EBUSY;
+
+- /* Prevent the modem from triggering a call to ipa_setup() */
+- ipa_smp2p_disable(ipa);
+-
+ /* Clean up the netdev and endpoints if it was started */
+ if (netdev) {
+ struct ipa_priv *priv = netdev_priv(netdev);
+@@ -369,6 +366,9 @@ static void ipa_modem_crashed(struct ipa *ipa)
+ struct device *dev = &ipa->pdev->dev;
+ int ret;
+
++ /* Prevent the modem from triggering a call to ipa_setup() */
++ ipa_smp2p_irq_disable_setup(ipa);
++
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+ dev_err(dev, "error %d getting power to handle crash\n", ret);
+diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c
+index b1c6c0fcb654f..07fb367cfc99d 100644
+--- a/drivers/net/ipa/ipa_power.c
++++ b/drivers/net/ipa/ipa_power.c
+@@ -11,6 +11,8 @@
+ #include <linux/pm_runtime.h>
+ #include <linux/bitops.h>
+
++#include "linux/soc/qcom/qcom_aoss.h"
++
+ #include "ipa.h"
+ #include "ipa_power.h"
+ #include "ipa_endpoint.h"
+@@ -64,6 +66,7 @@ enum ipa_power_flag {
+ * struct ipa_power - IPA power management information
+ * @dev: IPA device pointer
+ * @core: IPA core clock
++ * @qmp: QMP handle for AOSS communication
+ * @spinlock: Protects modem TX queue enable/disable
+ * @flags: Boolean state flags
+ * @interconnect_count: Number of elements in interconnect[]
+@@ -72,6 +75,7 @@ enum ipa_power_flag {
+ struct ipa_power {
+ struct device *dev;
+ struct clk *core;
++ struct qmp *qmp;
+ spinlock_t spinlock; /* used with STOPPED/STARTED power flags */
+ DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
+ u32 interconnect_count;
+@@ -273,6 +277,17 @@ static int ipa_suspend(struct device *dev)
+
+ __set_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+
++ /* Increment the disable depth to ensure that the IRQ won't
++ * be re-enabled until the matching _enable call in
++ * ipa_resume(). We do this to ensure that the interrupt
++ * handler won't run whilst PM runtime is disabled.
++ *
++ * Note that disabling the IRQ is NOT the same as disabling
++ * irq wake. If wakeup is enabled for the IPA then the IRQ
++ * will still cause the system to wake up, see irq_set_irq_wake().
++ */
++ ipa_interrupt_irq_disable(ipa);
++
+ return pm_runtime_force_suspend(dev);
+ }
+
+@@ -285,6 +300,12 @@ static int ipa_resume(struct device *dev)
+
+ __clear_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+
++ /* Now that PM runtime is enabled again it's safe
++ * to turn the IRQ back on and process any data
++ * that was received during suspend.
++ */
++ ipa_interrupt_irq_enable(ipa);
++
+ return ret;
+ }
+
+@@ -382,6 +403,47 @@ void ipa_power_modem_queue_active(struct ipa *ipa)
+ clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
+ }
+
++static int ipa_power_retention_init(struct ipa_power *power)
++{
++ struct qmp *qmp = qmp_get(power->dev);
++
++ if (IS_ERR(qmp)) {
++ if (PTR_ERR(qmp) == -EPROBE_DEFER)
++ return -EPROBE_DEFER;
++
++ /* We assume any other error means it's not defined/needed */
++ qmp = NULL;
++ }
++ power->qmp = qmp;
++
++ return 0;
++}
++
++static void ipa_power_retention_exit(struct ipa_power *power)
++{
++ qmp_put(power->qmp);
++ power->qmp = NULL;
++}
++
++/* Control register retention on power collapse */
++void ipa_power_retention(struct ipa *ipa, bool enable)
++{
++ static const char fmt[] = "{ class: bcm, res: ipa_pc, val: %c }";
++ struct ipa_power *power = ipa->power;
++ char buf[36]; /* Exactly enough for fmt[]; size a multiple of 4 */
++ int ret;
++
++ if (!power->qmp)
++ return; /* Not needed on this platform */
++
++ (void)snprintf(buf, sizeof(buf), fmt, enable ? '1' : '0');
++
++ ret = qmp_send(power->qmp, buf, sizeof(buf));
++ if (ret)
++ dev_err(power->dev, "error %d sending QMP %sable request\n",
++ ret, enable ? "en" : "dis");
++}
++
+ int ipa_power_setup(struct ipa *ipa)
+ {
+ int ret;
+@@ -438,12 +500,18 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data)
+ if (ret)
+ goto err_kfree;
+
++ ret = ipa_power_retention_init(power);
++ if (ret)
++ goto err_interconnect_exit;
++
+ pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_enable(dev);
+
+ return power;
+
++err_interconnect_exit:
++ ipa_interconnect_exit(power);
+ err_kfree:
+ kfree(power);
+ err_clk_put:
+@@ -460,6 +528,7 @@ void ipa_power_exit(struct ipa_power *power)
+
+ pm_runtime_disable(dev);
+ pm_runtime_dont_use_autosuspend(dev);
++ ipa_power_retention_exit(power);
+ ipa_interconnect_exit(power);
+ kfree(power);
+ clk_put(clk);
+diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h
+index 2151805d7fbb0..6f84f057a2095 100644
+--- a/drivers/net/ipa/ipa_power.h
++++ b/drivers/net/ipa/ipa_power.h
+@@ -40,6 +40,13 @@ void ipa_power_modem_queue_wake(struct ipa *ipa);
+ */
+ void ipa_power_modem_queue_active(struct ipa *ipa);
+
++/**
++ * ipa_power_retention() - Control register retention on power collapse
++ * @ipa: IPA pointer
++ * @enable: Whether retention should be enabled or disabled
++ */
++void ipa_power_retention(struct ipa *ipa, bool enable);
++
+ /**
+ * ipa_power_setup() - Set up IPA power management
+ * @ipa: IPA pointer
+diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
+index 90f3aec55b365..b84baedda5f69 100644
+--- a/drivers/net/ipa/ipa_qmi.c
++++ b/drivers/net/ipa/ipa_qmi.c
+@@ -308,12 +308,12 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
+ mem = ipa_mem_find(ipa, IPA_MEM_V4_ROUTE);
+ req.v4_route_tbl_info_valid = 1;
+ req.v4_route_tbl_info.start = ipa->mem_offset + mem->offset;
+- req.v4_route_tbl_info.count = mem->size / sizeof(__le64);
++ req.v4_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+
+ mem = ipa_mem_find(ipa, IPA_MEM_V6_ROUTE);
+ req.v6_route_tbl_info_valid = 1;
+ req.v6_route_tbl_info.start = ipa->mem_offset + mem->offset;
+- req.v6_route_tbl_info.count = mem->size / sizeof(__le64);
++ req.v6_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+
+ mem = ipa_mem_find(ipa, IPA_MEM_V4_FILTER);
+ req.v4_filter_tbl_start_valid = 1;
+@@ -352,7 +352,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
+ req.v4_hash_route_tbl_info_valid = 1;
+ req.v4_hash_route_tbl_info.start =
+ ipa->mem_offset + mem->offset;
+- req.v4_hash_route_tbl_info.count = mem->size / sizeof(__le64);
++ req.v4_hash_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+ }
+
+ mem = ipa_mem_find(ipa, IPA_MEM_V6_ROUTE_HASHED);
+@@ -360,7 +360,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
+ req.v6_hash_route_tbl_info_valid = 1;
+ req.v6_hash_route_tbl_info.start =
+ ipa->mem_offset + mem->offset;
+- req.v6_hash_route_tbl_info.count = mem->size / sizeof(__le64);
++ req.v6_hash_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+ }
+
+ mem = ipa_mem_find(ipa, IPA_MEM_V4_FILTER_HASHED);
+diff --git a/drivers/net/ipa/ipa_qmi_msg.c b/drivers/net/ipa/ipa_qmi_msg.c
+index 6838e8065072b..75d3fc0092e92 100644
+--- a/drivers/net/ipa/ipa_qmi_msg.c
++++ b/drivers/net/ipa/ipa_qmi_msg.c
+@@ -311,7 +311,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ .tlv_type = 0x12,
+ .offset = offsetof(struct ipa_init_modem_driver_req,
+ v4_route_tbl_info),
+- .ei_array = ipa_mem_array_ei,
++ .ei_array = ipa_mem_bounds_ei,
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+@@ -332,7 +332,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ .tlv_type = 0x13,
+ .offset = offsetof(struct ipa_init_modem_driver_req,
+ v6_route_tbl_info),
+- .ei_array = ipa_mem_array_ei,
++ .ei_array = ipa_mem_bounds_ei,
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+@@ -496,7 +496,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ .tlv_type = 0x1b,
+ .offset = offsetof(struct ipa_init_modem_driver_req,
+ v4_hash_route_tbl_info),
+- .ei_array = ipa_mem_array_ei,
++ .ei_array = ipa_mem_bounds_ei,
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+@@ -517,7 +517,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ .tlv_type = 0x1c,
+ .offset = offsetof(struct ipa_init_modem_driver_req,
+ v6_hash_route_tbl_info),
+- .ei_array = ipa_mem_array_ei,
++ .ei_array = ipa_mem_bounds_ei,
+ },
+ {
+ .data_type = QMI_OPT_FLAG,
+diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h
+index 3233d145fd87c..51b39ffe020ed 100644
+--- a/drivers/net/ipa/ipa_qmi_msg.h
++++ b/drivers/net/ipa/ipa_qmi_msg.h
+@@ -86,9 +86,11 @@ enum ipa_platform_type {
+ IPA_QMI_PLATFORM_TYPE_MSM_QNX_V01 = 0x5, /* QNX MSM */
+ };
+
+-/* This defines the start and end offset of a range of memory. Both
+- * fields are offsets relative to the start of IPA shared memory.
+- * The end value is the last addressable byte *within* the range.
++/* This defines the start and end offset of a range of memory. The start
++ * value is a byte offset relative to the start of IPA shared memory. The
++ * end value is the last addressable unit *within* the range. Typically
++ * the end value is in units of bytes, however it can also be a maximum
++ * array index value.
+ */
+ struct ipa_mem_bounds {
+ u32 start;
+@@ -129,18 +131,19 @@ struct ipa_init_modem_driver_req {
+ u8 hdr_tbl_info_valid;
+ struct ipa_mem_bounds hdr_tbl_info;
+
+- /* Routing table information. These define the location and size of
+- * non-hashable IPv4 and IPv6 filter tables. The start values are
+- * offsets relative to the start of IPA shared memory.
++ /* Routing table information. These define the location and maximum
++ * *index* (not byte) for the modem portion of non-hashable IPv4 and
++ * IPv6 routing tables. The start values are byte offsets relative
++ * to the start of IPA shared memory.
+ */
+ u8 v4_route_tbl_info_valid;
+- struct ipa_mem_array v4_route_tbl_info;
++ struct ipa_mem_bounds v4_route_tbl_info;
+ u8 v6_route_tbl_info_valid;
+- struct ipa_mem_array v6_route_tbl_info;
++ struct ipa_mem_bounds v6_route_tbl_info;
+
+ /* Filter table information. These define the location of the
+ * non-hashable IPv4 and IPv6 filter tables. The start values are
+- * offsets relative to the start of IPA shared memory.
++ * byte offsets relative to the start of IPA shared memory.
+ */
+ u8 v4_filter_tbl_start_valid;
+ u32 v4_filter_tbl_start;
+@@ -181,18 +184,20 @@ struct ipa_init_modem_driver_req {
+ u8 zip_tbl_info_valid;
+ struct ipa_mem_bounds zip_tbl_info;
+
+- /* Routing table information. These define the location and size
+- * of hashable IPv4 and IPv6 filter tables. The start values are
+- * offsets relative to the start of IPA shared memory.
++ /* Routing table information. These define the location and maximum
++ * *index* (not byte) for the modem portion of hashable IPv4 and IPv6
++ * routing tables (if supported by hardware). The start values are
++ * byte offsets relative to the start of IPA shared memory.
+ */
+ u8 v4_hash_route_tbl_info_valid;
+- struct ipa_mem_array v4_hash_route_tbl_info;
++ struct ipa_mem_bounds v4_hash_route_tbl_info;
+ u8 v6_hash_route_tbl_info_valid;
+- struct ipa_mem_array v6_hash_route_tbl_info;
++ struct ipa_mem_bounds v6_hash_route_tbl_info;
+
+ /* Filter table information. These define the location and size
+- * of hashable IPv4 and IPv6 filter tables. The start values are
+- * offsets relative to the start of IPA shared memory.
++ * of hashable IPv4 and IPv6 filter tables (if supported by hardware).
++ * The start values are byte offsets relative to the start of IPA
++ * shared memory.
+ */
+ u8 v4_hash_filter_tbl_start_valid;
+ u32 v4_hash_filter_tbl_start;
+diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
+index e3da95d694099..06cec71993823 100644
+--- a/drivers/net/ipa/ipa_resource.c
++++ b/drivers/net/ipa/ipa_resource.c
+@@ -52,7 +52,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
+ return false;
+ }
+
+- group_count = data->rsrc_group_src_count;
++ group_count = data->rsrc_group_dst_count;
+ if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX)
+ return false;
+
+diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c
+index df7639c39d716..2112336120391 100644
+--- a/drivers/net/ipa/ipa_smp2p.c
++++ b/drivers/net/ipa/ipa_smp2p.c
+@@ -53,7 +53,7 @@
+ * @setup_ready_irq: IPA interrupt triggered by modem to signal GSI ready
+ * @power_on: Whether IPA power is on
+ * @notified: Whether modem has been notified of power state
+- * @disabled: Whether setup ready interrupt handling is disabled
++ * @setup_disabled: Whether setup ready interrupt handler is disabled
+ * @mutex: Mutex protecting ready-interrupt/shutdown interlock
+ * @panic_notifier: Panic notifier structure
+ */
+@@ -67,7 +67,7 @@ struct ipa_smp2p {
+ u32 setup_ready_irq;
+ bool power_on;
+ bool notified;
+- bool disabled;
++ bool setup_disabled;
+ struct mutex mutex;
+ struct notifier_block panic_notifier;
+ };
+@@ -155,11 +155,9 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
+ struct device *dev;
+ int ret;
+
+- mutex_lock(&smp2p->mutex);
+-
+- if (smp2p->disabled)
+- goto out_mutex_unlock;
+- smp2p->disabled = true; /* If any others arrive, ignore them */
++ /* Ignore any (spurious) interrupts received after the first */
++ if (smp2p->ipa->setup_complete)
++ return IRQ_HANDLED;
+
+ /* Power needs to be active for setup */
+ dev = &smp2p->ipa->pdev->dev;
+@@ -176,8 +174,6 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
+ out_power_put:
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+-out_mutex_unlock:
+- mutex_unlock(&smp2p->mutex);
+
+ return IRQ_HANDLED;
+ }
+@@ -313,7 +309,7 @@ void ipa_smp2p_exit(struct ipa *ipa)
+ kfree(smp2p);
+ }
+
+-void ipa_smp2p_disable(struct ipa *ipa)
++void ipa_smp2p_irq_disable_setup(struct ipa *ipa)
+ {
+ struct ipa_smp2p *smp2p = ipa->smp2p;
+
+@@ -322,7 +318,10 @@ void ipa_smp2p_disable(struct ipa *ipa)
+
+ mutex_lock(&smp2p->mutex);
+
+- smp2p->disabled = true;
++ if (!smp2p->setup_disabled) {
++ disable_irq(smp2p->setup_ready_irq);
++ smp2p->setup_disabled = true;
++ }
+
+ mutex_unlock(&smp2p->mutex);
+ }
+diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h
+index 99a9567896388..59cee31a73836 100644
+--- a/drivers/net/ipa/ipa_smp2p.h
++++ b/drivers/net/ipa/ipa_smp2p.h
+@@ -27,13 +27,12 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init);
+ void ipa_smp2p_exit(struct ipa *ipa);
+
+ /**
+- * ipa_smp2p_disable() - Prevent "ipa-setup-ready" interrupt handling
++ * ipa_smp2p_irq_disable_setup() - Disable the "setup ready" interrupt
+ * @ipa: IPA pointer
+ *
+- * Prevent handling of the "setup ready" interrupt from the modem.
+- * This is used before initiating shutdown of the driver.
++ * Disable the "ipa-setup-ready" interrupt from the modem.
+ */
+-void ipa_smp2p_disable(struct ipa *ipa);
++void ipa_smp2p_irq_disable_setup(struct ipa *ipa);
+
+ /**
+ * ipa_smp2p_notify_reset() - Reset modem notification state
+diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
+index 1da334f54944a..6bf486d2b6799 100644
+--- a/drivers/net/ipa/ipa_table.c
++++ b/drivers/net/ipa/ipa_table.c
+@@ -108,8 +108,6 @@
+
+ /* Assignment of route table entries to the modem and AP */
+ #define IPA_ROUTE_MODEM_MIN 0
+-#define IPA_ROUTE_MODEM_COUNT 8
+-
+ #define IPA_ROUTE_AP_MIN IPA_ROUTE_MODEM_COUNT
+ #define IPA_ROUTE_AP_COUNT \
+ (IPA_ROUTE_COUNT_MAX - IPA_ROUTE_MODEM_COUNT)
+diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
+index b6a9a0d79d68e..1538e2e1732fe 100644
+--- a/drivers/net/ipa/ipa_table.h
++++ b/drivers/net/ipa/ipa_table.h
+@@ -13,6 +13,9 @@ struct ipa;
+ /* The maximum number of filter table entries (IPv4, IPv6; hashed or not) */
+ #define IPA_FILTER_COUNT_MAX 14
+
++/* The number of route table entries allotted to the modem */
++#define IPA_ROUTE_MODEM_COUNT 8
++
+ /* The maximum number of route table entries (IPv4, IPv6; hashed or not) */
+ #define IPA_ROUTE_COUNT_MAX 15
+
+diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c
+index 856e55a080a7f..fe11910518d95 100644
+--- a/drivers/net/ipa/ipa_uc.c
++++ b/drivers/net/ipa/ipa_uc.c
+@@ -11,6 +11,7 @@
+
+ #include "ipa.h"
+ #include "ipa_uc.h"
++#include "ipa_power.h"
+
+ /**
+ * DOC: The IPA embedded microcontroller
+@@ -154,6 +155,7 @@ static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id)
+ case IPA_UC_RESPONSE_INIT_COMPLETED:
+ if (ipa->uc_powered) {
+ ipa->uc_loaded = true;
++ ipa_power_retention(ipa, true);
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+ ipa->uc_powered = false;
+@@ -184,6 +186,9 @@ void ipa_uc_deconfig(struct ipa *ipa)
+
+ ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1);
+ ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0);
++ if (ipa->uc_loaded)
++ ipa_power_retention(ipa, false);
++
+ if (!ipa->uc_powered)
+ return;
+
+diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
+index 6cd50106e6112..e10cb98b0f4f5 100644
+--- a/drivers/net/ipvlan/ipvlan_core.c
++++ b/drivers/net/ipvlan/ipvlan_core.c
+@@ -437,6 +437,9 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
+ goto err;
+ }
+ skb_dst_set(skb, &rt->dst);
++
++ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
++
+ err = ip_local_out(net, skb->sk, skb);
+ if (unlikely(net_xmit_eval(err)))
+ dev->stats.tx_errors++;
+@@ -475,6 +478,9 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+ goto err;
+ }
+ skb_dst_set(skb, dst);
++
++ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
++
+ err = ip6_local_out(net, skb->sk, skb);
+ if (unlikely(net_xmit_eval(err)))
+ dev->stats.tx_errors++;
+@@ -496,7 +502,6 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+
+ static int ipvlan_process_outbound(struct sk_buff *skb)
+ {
+- struct ethhdr *ethh = eth_hdr(skb);
+ int ret = NET_XMIT_DROP;
+
+ /* The ipvlan is a pseudo-L2 device, so the packets that we receive
+@@ -506,6 +511,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
+ if (skb_mac_header_was_set(skb)) {
+ /* In this mode we dont care about
+ * multicast and broadcast traffic */
++ struct ethhdr *ethh = eth_hdr(skb);
++
+ if (is_multicast_ether_addr(ethh->h_dest)) {
+ pr_debug_ratelimited(
+ "Dropped {multi|broad}cast of type=[%x]\n",
+@@ -579,7 +586,8 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
+ consume_skb(skb);
+ return NET_XMIT_DROP;
+ }
+- return ipvlan_rcv_frame(addr, &skb, true);
++ ipvlan_rcv_frame(addr, &skb, true);
++ return NET_XMIT_SUCCESS;
+ }
+ }
+ out:
+@@ -590,7 +598,7 @@ out:
+ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
+ {
+ const struct ipvl_dev *ipvlan = netdev_priv(dev);
+- struct ethhdr *eth = eth_hdr(skb);
++ struct ethhdr *eth = skb_eth_hdr(skb);
+ struct ipvl_addr *addr;
+ void *lyr3h;
+ int addr_type;
+@@ -605,7 +613,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
+ consume_skb(skb);
+ return NET_XMIT_DROP;
+ }
+- return ipvlan_rcv_frame(addr, &skb, true);
++ ipvlan_rcv_frame(addr, &skb, true);
++ return NET_XMIT_SUCCESS;
+ }
+ }
+ skb = skb_share_check(skb, GFP_ATOMIC);
+@@ -617,9 +626,11 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
+ * the skb for the main-dev. At the RX side we just return
+ * RX_PASS for it to be processed further on the stack.
+ */
+- return dev_forward_skb(ipvlan->phy_dev, skb);
++ dev_forward_skb(ipvlan->phy_dev, skb);
++ return NET_XMIT_SUCCESS;
+
+ } else if (is_multicast_ether_addr(eth->h_dest)) {
++ skb_reset_mac_header(skb);
+ ipvlan_skb_crossing_ns(skb, NULL);
+ ipvlan_multicast_enqueue(ipvlan->port, skb, true);
+ return NET_XMIT_SUCCESS;
+diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c
+index 943d26cbf39f5..d5b05e8032199 100644
+--- a/drivers/net/ipvlan/ipvlan_l3s.c
++++ b/drivers/net/ipvlan/ipvlan_l3s.c
+@@ -101,6 +101,11 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+ goto out;
+
+ skb->dev = addr->master->dev;
++ skb->skb_iif = skb->dev->ifindex;
++#if IS_ENABLED(CONFIG_IPV6)
++ if (addr->atype == IPVL_IPV6)
++ IP6CB(skb)->iif = skb->dev->ifindex;
++#endif
+ len = skb->len + ETH_HLEN;
+ ipvlan_count_rx(addr->master, len, true, false);
+ out:
+diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
+index c0b21a5580d52..c199f0b465cd0 100644
+--- a/drivers/net/ipvlan/ipvlan_main.c
++++ b/drivers/net/ipvlan/ipvlan_main.c
+@@ -748,7 +748,8 @@ static int ipvlan_device_event(struct notifier_block *unused,
+
+ write_pnet(&port->pnet, newnet);
+
+- ipvlan_migrate_l3s_hook(oldnet, newnet);
++ if (port->mode == IPVLAN_MODE_L3S)
++ ipvlan_migrate_l3s_hook(oldnet, newnet);
+ break;
+ }
+ case NETDEV_UNREGISTER:
+@@ -787,7 +788,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
+
+ case NETDEV_CHANGEADDR:
+ list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+- ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
++ eth_hw_addr_set(ipvlan->dev, dev->dev_addr);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
+ }
+ break;
+diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
+index 1cedb634f4f7b..f01078b2581ce 100644
+--- a/drivers/net/ipvlan/ipvtap.c
++++ b/drivers/net/ipvlan/ipvtap.c
+@@ -194,7 +194,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = {
+ .notifier_call = ipvtap_device_event,
+ };
+
+-static int ipvtap_init(void)
++static int __init ipvtap_init(void)
+ {
+ int err;
+
+@@ -228,7 +228,7 @@ out1:
+ }
+ module_init(ipvtap_init);
+
+-static void ipvtap_exit(void)
++static void __exit ipvtap_exit(void)
+ {
+ rtnl_link_unregister(&ipvtap_link_ops);
+ unregister_netdevice_notifier(&ipvtap_notifier_block);
+diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
+index a1c77cc004165..498e5c8013efb 100644
+--- a/drivers/net/loopback.c
++++ b/drivers/net/loopback.c
+@@ -208,7 +208,7 @@ static __net_init int loopback_net_init(struct net *net)
+ int err;
+
+ err = -ENOMEM;
+- dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup);
++ dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup);
+ if (!dev)
+ goto out;
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 93dc48b9b4f24..21f41f25a8abe 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -160,6 +160,19 @@ static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr)
+ return sa;
+ }
+
++static struct macsec_rx_sa *macsec_active_rxsa_get(struct macsec_rx_sc *rx_sc)
++{
++ struct macsec_rx_sa *sa = NULL;
++ int an;
++
++ for (an = 0; an < MACSEC_NUM_AN; an++) {
++ sa = macsec_rxsa_get(rx_sc->sa[an]);
++ if (sa)
++ break;
++ }
++ return sa;
++}
++
+ static void free_rx_sc_rcu(struct rcu_head *head)
+ {
+ struct macsec_rx_sc *rx_sc = container_of(head, struct macsec_rx_sc, rcu_head);
+@@ -241,6 +254,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
+ #define DEFAULT_SEND_SCI true
+ #define DEFAULT_ENCRYPT false
+ #define DEFAULT_ENCODING_SA 0
++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
+
+ static bool send_sci(const struct macsec_secy *secy)
+ {
+@@ -446,11 +460,6 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb)
+ return (struct macsec_eth_header *)skb_mac_header(skb);
+ }
+
+-static sci_t dev_to_sci(struct net_device *dev, __be16 port)
+-{
+- return make_sci(dev->dev_addr, port);
+-}
+-
+ static void __macsec_pn_wrapped(struct macsec_secy *secy,
+ struct macsec_tx_sa *tx_sa)
+ {
+@@ -497,18 +506,28 @@ static void macsec_encrypt_finish(struct sk_buff *skb, struct net_device *dev)
+ skb->protocol = eth_hdr(skb)->h_proto;
+ }
+
++static unsigned int macsec_msdu_len(struct sk_buff *skb)
++{
++ struct macsec_dev *macsec = macsec_priv(skb->dev);
++ struct macsec_secy *secy = &macsec->secy;
++ bool sci_present = macsec_skb_cb(skb)->has_sci;
++
++ return skb->len - macsec_hdr_len(sci_present) - secy->icv_len;
++}
++
+ static void macsec_count_tx(struct sk_buff *skb, struct macsec_tx_sc *tx_sc,
+ struct macsec_tx_sa *tx_sa)
+ {
++ unsigned int msdu_len = macsec_msdu_len(skb);
+ struct pcpu_tx_sc_stats *txsc_stats = this_cpu_ptr(tx_sc->stats);
+
+ u64_stats_update_begin(&txsc_stats->syncp);
+ if (tx_sc->encrypt) {
+- txsc_stats->stats.OutOctetsEncrypted += skb->len;
++ txsc_stats->stats.OutOctetsEncrypted += msdu_len;
+ txsc_stats->stats.OutPktsEncrypted++;
+ this_cpu_inc(tx_sa->stats->OutPktsEncrypted);
+ } else {
+- txsc_stats->stats.OutOctetsProtected += skb->len;
++ txsc_stats->stats.OutOctetsProtected += msdu_len;
+ txsc_stats->stats.OutPktsProtected++;
+ this_cpu_inc(tx_sa->stats->OutPktsProtected);
+ }
+@@ -538,9 +557,10 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
+ aead_request_free(macsec_skb_cb(skb)->req);
+
+ rcu_read_lock_bh();
+- macsec_encrypt_finish(skb, dev);
+ macsec_count_tx(skb, &macsec->secy.tx_sc, macsec_skb_cb(skb)->tx_sa);
+- len = skb->len;
++ /* packet is encrypted/protected so tx_bytes must be calculated */
++ len = macsec_msdu_len(skb) + 2 * ETH_ALEN;
++ macsec_encrypt_finish(skb, dev);
+ ret = dev_queue_xmit(skb);
+ count_tx(dev, ret, len);
+ rcu_read_unlock_bh();
+@@ -699,6 +719,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
+
+ macsec_skb_cb(skb)->req = req;
+ macsec_skb_cb(skb)->tx_sa = tx_sa;
++ macsec_skb_cb(skb)->has_sci = sci_present;
+ aead_request_set_callback(req, 0, macsec_encrypt_done, skb);
+
+ dev_hold(skb->dev);
+@@ -740,15 +761,17 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
+ u64_stats_update_begin(&rxsc_stats->syncp);
+ rxsc_stats->stats.InPktsLate++;
+ u64_stats_update_end(&rxsc_stats->syncp);
++ DEV_STATS_INC(secy->netdev, rx_dropped);
+ return false;
+ }
+
+ if (secy->validate_frames != MACSEC_VALIDATE_DISABLED) {
++ unsigned int msdu_len = macsec_msdu_len(skb);
+ u64_stats_update_begin(&rxsc_stats->syncp);
+ if (hdr->tci_an & MACSEC_TCI_E)
+- rxsc_stats->stats.InOctetsDecrypted += skb->len;
++ rxsc_stats->stats.InOctetsDecrypted += msdu_len;
+ else
+- rxsc_stats->stats.InOctetsValidated += skb->len;
++ rxsc_stats->stats.InOctetsValidated += msdu_len;
+ u64_stats_update_end(&rxsc_stats->syncp);
+ }
+
+@@ -761,6 +784,8 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
+ u64_stats_update_begin(&rxsc_stats->syncp);
+ rxsc_stats->stats.InPktsNotValid++;
+ u64_stats_update_end(&rxsc_stats->syncp);
++ this_cpu_inc(rx_sa->stats->InPktsNotValid);
++ DEV_STATS_INC(secy->netdev, rx_errors);
+ return false;
+ }
+
+@@ -853,9 +878,9 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err)
+
+ macsec_finalize_skb(skb, macsec->secy.icv_len,
+ macsec_extra_len(macsec_skb_cb(skb)->has_sci));
++ len = skb->len;
+ macsec_reset_skb(skb, macsec->secy.netdev);
+
+- len = skb->len;
+ if (gro_cells_receive(&macsec->gro_cells, skb) == NET_RX_SUCCESS)
+ count_rx(dev, len);
+
+@@ -1046,6 +1071,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
+ u64_stats_update_begin(&secy_stats->syncp);
+ secy_stats->stats.InPktsNoTag++;
+ u64_stats_update_end(&secy_stats->syncp);
++ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
+ continue;
+ }
+
+@@ -1155,6 +1181,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
+ u64_stats_update_begin(&secy_stats->syncp);
+ secy_stats->stats.InPktsBadTag++;
+ u64_stats_update_end(&secy_stats->syncp);
++ DEV_STATS_INC(secy->netdev, rx_errors);
+ goto drop_nosa;
+ }
+
+@@ -1165,11 +1192,15 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
+ /* If validateFrames is Strict or the C bit in the
+ * SecTAG is set, discard
+ */
++ struct macsec_rx_sa *active_rx_sa = macsec_active_rxsa_get(rx_sc);
+ if (hdr->tci_an & MACSEC_TCI_C ||
+ secy->validate_frames == MACSEC_VALIDATE_STRICT) {
+ u64_stats_update_begin(&rxsc_stats->syncp);
+ rxsc_stats->stats.InPktsNotUsingSA++;
+ u64_stats_update_end(&rxsc_stats->syncp);
++ DEV_STATS_INC(secy->netdev, rx_errors);
++ if (active_rx_sa)
++ this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA);
+ goto drop_nosa;
+ }
+
+@@ -1179,6 +1210,8 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
+ u64_stats_update_begin(&rxsc_stats->syncp);
+ rxsc_stats->stats.InPktsUnusedSA++;
+ u64_stats_update_end(&rxsc_stats->syncp);
++ if (active_rx_sa)
++ this_cpu_inc(active_rx_sa->stats->InPktsUnusedSA);
+ goto deliver;
+ }
+
+@@ -1199,6 +1232,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
+ u64_stats_update_begin(&rxsc_stats->syncp);
+ rxsc_stats->stats.InPktsLate++;
+ u64_stats_update_end(&rxsc_stats->syncp);
++ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
+ goto drop;
+ }
+ }
+@@ -1227,6 +1261,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
+ deliver:
+ macsec_finalize_skb(skb, secy->icv_len,
+ macsec_extra_len(macsec_skb_cb(skb)->has_sci));
++ len = skb->len;
+ macsec_reset_skb(skb, secy->netdev);
+
+ if (rx_sa)
+@@ -1234,12 +1269,11 @@ deliver:
+ macsec_rxsc_put(rx_sc);
+
+ skb_orphan(skb);
+- len = skb->len;
+ ret = gro_cells_receive(&macsec->gro_cells, skb);
+ if (ret == NET_RX_SUCCESS)
+ count_rx(dev, len);
+ else
+- macsec->secy.netdev->stats.rx_dropped++;
++ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
+
+ rcu_read_unlock();
+
+@@ -1276,6 +1310,7 @@ nosci:
+ u64_stats_update_begin(&secy_stats->syncp);
+ secy_stats->stats.InPktsNoSCI++;
+ u64_stats_update_end(&secy_stats->syncp);
++ DEV_STATS_INC(macsec->secy.netdev, rx_errors);
+ continue;
+ }
+
+@@ -1294,7 +1329,7 @@ nosci:
+ secy_stats->stats.InPktsUnknownSCI++;
+ u64_stats_update_end(&secy_stats->syncp);
+ } else {
+- macsec->secy.netdev->stats.rx_dropped++;
++ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
+ }
+ }
+
+@@ -1308,8 +1343,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
+ struct crypto_aead *tfm;
+ int ret;
+
+- /* Pick a sync gcm(aes) cipher to ensure order is preserved. */
+- tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
++ tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+
+ if (IS_ERR(tfm))
+ return tfm;
+@@ -1390,7 +1424,8 @@ static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci)
+ return NULL;
+ }
+
+-static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci)
++static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci,
++ bool active)
+ {
+ struct macsec_rx_sc *rx_sc;
+ struct macsec_dev *macsec;
+@@ -1414,7 +1449,7 @@ static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci)
+ }
+
+ rx_sc->sci = sci;
+- rx_sc->active = true;
++ rx_sc->active = active;
+ refcount_set(&rx_sc->refcnt, 1);
+
+ secy = &macsec_priv(dev)->secy;
+@@ -1695,7 +1730,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_PN] &&
+- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -1751,7 +1786,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ }
+
+ pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
+- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
++ if (tb_sa[MACSEC_SA_ATTR_PN] &&
++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+ pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
+ rtnl_unlock();
+@@ -1767,7 +1803,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+ pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+- MACSEC_SA_ATTR_SALT);
++ MACSEC_SALT_LEN);
+ rtnl_unlock();
+ return -EINVAL;
+ }
+@@ -1822,6 +1858,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ secy->key_len);
+
+ err = macsec_offload(ops->mdo_add_rxsa, &ctx);
++ memzero_explicit(ctx.sa.key, secy->key_len);
+ if (err)
+ goto cleanup;
+ }
+@@ -1840,7 +1877,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ return 0;
+
+ cleanup:
+- kfree(rx_sa);
++ macsec_rxsa_put(rx_sa);
+ rtnl_unlock();
+ return err;
+ }
+@@ -1866,7 +1903,7 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info)
+ struct macsec_rx_sc *rx_sc;
+ struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1];
+ struct macsec_secy *secy;
+- bool was_active;
++ bool active = true;
+ int ret;
+
+ if (!attrs[MACSEC_ATTR_IFINDEX])
+@@ -1888,16 +1925,15 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info)
+ secy = &macsec_priv(dev)->secy;
+ sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]);
+
+- rx_sc = create_rx_sc(dev, sci);
++ if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE])
++ active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]);
++
++ rx_sc = create_rx_sc(dev, sci, active);
+ if (IS_ERR(rx_sc)) {
+ rtnl_unlock();
+ return PTR_ERR(rx_sc);
+ }
+
+- was_active = rx_sc->active;
+- if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE])
+- rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]);
+-
+ if (macsec_is_offloaded(netdev_priv(dev))) {
+ const struct macsec_ops *ops;
+ struct macsec_context ctx;
+@@ -1921,7 +1957,8 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info)
+ return 0;
+
+ cleanup:
+- rx_sc->active = was_active;
++ del_rx_sc(secy, sci);
++ free_rx_sc(rx_sc);
+ rtnl_unlock();
+ return ret;
+ }
+@@ -1937,7 +1974,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+ return false;
+
+- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -2009,7 +2046,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+ pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+- MACSEC_SA_ATTR_SALT);
++ MACSEC_SALT_LEN);
+ rtnl_unlock();
+ return -EINVAL;
+ }
+@@ -2064,6 +2101,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+ secy->key_len);
+
+ err = macsec_offload(ops->mdo_add_txsa, &ctx);
++ memzero_explicit(ctx.sa.key, secy->key_len);
+ if (err)
+ goto cleanup;
+ }
+@@ -2083,7 +2121,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+
+ cleanup:
+ secy->operational = was_operational;
+- kfree(tx_sa);
++ macsec_txsa_put(tx_sa);
+ rtnl_unlock();
+ return err;
+ }
+@@ -2291,7 +2329,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+ return false;
+
+- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -2560,7 +2598,7 @@ static bool macsec_is_configured(struct macsec_dev *macsec)
+ struct macsec_tx_sc *tx_sc = &secy->tx_sc;
+ int i;
+
+- if (secy->n_rx_sc > 0)
++ if (secy->rx_sc)
+ return true;
+
+ for (i = 0; i < MACSEC_NUM_AN; i++)
+@@ -2580,7 +2618,7 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info)
+ const struct macsec_ops *ops;
+ struct macsec_context ctx;
+ struct macsec_dev *macsec;
+- int ret;
++ int ret = 0;
+
+ if (!attrs[MACSEC_ATTR_IFINDEX])
+ return -EINVAL;
+@@ -2593,28 +2631,36 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info)
+ macsec_genl_offload_policy, NULL))
+ return -EINVAL;
+
++ rtnl_lock();
++
+ dev = get_dev_from_nl(genl_info_net(info), attrs);
+- if (IS_ERR(dev))
+- return PTR_ERR(dev);
++ if (IS_ERR(dev)) {
++ ret = PTR_ERR(dev);
++ goto out;
++ }
+ macsec = macsec_priv(dev);
+
+- if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE])
+- return -EINVAL;
++ if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) {
++ ret = -EINVAL;
++ goto out;
++ }
+
+ offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]);
+ if (macsec->offload == offload)
+- return 0;
++ goto out;
+
+ /* Check if the offloading mode is supported by the underlying layers */
+ if (offload != MACSEC_OFFLOAD_OFF &&
+- !macsec_check_offload(offload, macsec))
+- return -EOPNOTSUPP;
++ !macsec_check_offload(offload, macsec)) {
++ ret = -EOPNOTSUPP;
++ goto out;
++ }
+
+ /* Check if the net device is busy. */
+- if (netif_running(dev))
+- return -EBUSY;
+-
+- rtnl_lock();
++ if (netif_running(dev)) {
++ ret = -EBUSY;
++ goto out;
++ }
+
+ prev_offload = macsec->offload;
+ macsec->offload = offload;
+@@ -2644,17 +2690,12 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info)
+ if (ret)
+ goto rollback;
+
+- /* Force features update, since they are different for SW MACSec and
+- * HW offloading cases.
+- */
+- netdev_update_features(dev);
+-
+ rtnl_unlock();
+ return 0;
+
+ rollback:
+ macsec->offload = prev_offload;
+-
++out:
+ rtnl_unlock();
+ return ret;
+ }
+@@ -3396,36 +3437,29 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
+
+ if (!secy->operational) {
+ kfree_skb(skb);
+- dev->stats.tx_dropped++;
++ DEV_STATS_INC(dev, tx_dropped);
+ return NETDEV_TX_OK;
+ }
+
++ len = skb->len;
+ skb = macsec_encrypt(skb, dev);
+ if (IS_ERR(skb)) {
+ if (PTR_ERR(skb) != -EINPROGRESS)
+- dev->stats.tx_dropped++;
++ DEV_STATS_INC(dev, tx_dropped);
+ return NETDEV_TX_OK;
+ }
+
+ macsec_count_tx(skb, &macsec->secy.tx_sc, macsec_skb_cb(skb)->tx_sa);
+
+ macsec_encrypt_finish(skb, dev);
+- len = skb->len;
+ ret = dev_queue_xmit(skb);
+ count_tx(dev, ret, len);
+ return ret;
+ }
+
+-#define SW_MACSEC_FEATURES \
++#define MACSEC_FEATURES \
+ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
+
+-/* If h/w offloading is enabled, use real device features save for
+- * VLAN_FEATURES - they require additional ops
+- * HW_MACSEC - no reason to report it
+- */
+-#define REAL_DEV_FEATURES(dev) \
+- ((dev)->features & ~(NETIF_F_VLAN_FEATURES | NETIF_F_HW_MACSEC))
+-
+ static int macsec_dev_init(struct net_device *dev)
+ {
+ struct macsec_dev *macsec = macsec_priv(dev);
+@@ -3442,12 +3476,8 @@ static int macsec_dev_init(struct net_device *dev)
+ return err;
+ }
+
+- if (macsec_is_offloaded(macsec)) {
+- dev->features = REAL_DEV_FEATURES(real_dev);
+- } else {
+- dev->features = real_dev->features & SW_MACSEC_FEATURES;
+- dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE;
+- }
++ dev->features = real_dev->features & MACSEC_FEATURES;
++ dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE;
+
+ dev->needed_headroom = real_dev->needed_headroom +
+ MACSEC_NEEDED_HEADROOM;
+@@ -3476,10 +3506,7 @@ static netdev_features_t macsec_fix_features(struct net_device *dev,
+ struct macsec_dev *macsec = macsec_priv(dev);
+ struct net_device *real_dev = macsec->real_dev;
+
+- if (macsec_is_offloaded(macsec))
+- return REAL_DEV_FEATURES(real_dev);
+-
+- features &= (real_dev->features & SW_MACSEC_FEATURES) |
++ features &= (real_dev->features & MACSEC_FEATURES) |
+ NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES;
+ features |= NETIF_F_LLTX;
+
+@@ -3614,8 +3641,7 @@ static int macsec_set_mac_address(struct net_device *dev, void *p)
+ dev_uc_del(real_dev, dev->dev_addr);
+
+ out:
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
+- macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES);
++ eth_hw_addr_set(dev, addr->sa_data);
+
+ /* If h/w offloading is available, propagate to the device */
+ if (macsec_is_offloaded(macsec)) {
+@@ -3653,8 +3679,9 @@ static void macsec_get_stats64(struct net_device *dev,
+
+ dev_fetch_sw_netstats(s, dev->tstats);
+
+- s->rx_dropped = dev->stats.rx_dropped;
+- s->tx_dropped = dev->stats.tx_dropped;
++ s->rx_dropped = atomic_long_read(&dev->stats.__rx_dropped);
++ s->tx_dropped = atomic_long_read(&dev->stats.__tx_dropped);
++ s->rx_errors = atomic_long_read(&dev->stats.__rx_errors);
+ }
+
+ static int macsec_get_iflink(const struct net_device *dev)
+@@ -3695,6 +3722,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
+ [IFLA_MACSEC_SCB] = { .type = NLA_U8 },
+ [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 },
+ [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 },
++ [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 },
+ };
+
+ static void macsec_free_netdev(struct net_device *dev)
+@@ -3738,9 +3766,6 @@ static int macsec_changelink_common(struct net_device *dev,
+ secy->operational = tx_sa && tx_sa->active;
+ }
+
+- if (data[IFLA_MACSEC_WINDOW])
+- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+-
+ if (data[IFLA_MACSEC_ENCRYPT])
+ tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
+
+@@ -3786,6 +3811,16 @@ static int macsec_changelink_common(struct net_device *dev,
+ }
+ }
+
++ if (data[IFLA_MACSEC_WINDOW]) {
++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
++
++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
++ * for XPN cipher suites */
++ if (secy->xpn &&
++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
++ return -EINVAL;
++ }
++
+ return 0;
+ }
+
+@@ -3815,13 +3850,12 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],
+
+ ret = macsec_changelink_common(dev, data);
+ if (ret)
+- return ret;
++ goto cleanup;
+
+ /* If h/w offloading is available, propagate to the device */
+ if (macsec_is_offloaded(macsec)) {
+ const struct macsec_ops *ops;
+ struct macsec_context ctx;
+- int ret;
+
+ ops = macsec_get_ops(netdev_priv(dev), &ctx);
+ if (!ops) {
+@@ -3870,6 +3904,18 @@ static void macsec_common_dellink(struct net_device *dev, struct list_head *head
+ struct macsec_dev *macsec = macsec_priv(dev);
+ struct net_device *real_dev = macsec->real_dev;
+
++ /* If h/w offloading is available, propagate to the device */
++ if (macsec_is_offloaded(macsec)) {
++ const struct macsec_ops *ops;
++ struct macsec_context ctx;
++
++ ops = macsec_get_ops(netdev_priv(dev), &ctx);
++ if (ops) {
++ ctx.secy = &macsec->secy;
++ macsec_offload(ops->mdo_del_secy, &ctx);
++ }
++ }
++
+ unregister_netdevice_queue(dev, head);
+ list_del_rcu(&macsec->secys);
+ macsec_del_dev(macsec);
+@@ -3884,18 +3930,6 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head)
+ struct net_device *real_dev = macsec->real_dev;
+ struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
+
+- /* If h/w offloading is available, propagate to the device */
+- if (macsec_is_offloaded(macsec)) {
+- const struct macsec_ops *ops;
+- struct macsec_context ctx;
+-
+- ops = macsec_get_ops(netdev_priv(dev), &ctx);
+- if (ops) {
+- ctx.secy = &macsec->secy;
+- macsec_offload(ops->mdo_del_secy, &ctx);
+- }
+- }
+-
+ macsec_common_dellink(dev, head);
+
+ if (list_empty(&rxd->secys)) {
+@@ -3944,6 +3978,11 @@ static bool sci_exists(struct net_device *dev, sci_t sci)
+ return false;
+ }
+
++static sci_t dev_to_sci(struct net_device *dev, __be16 port)
++{
++ return make_sci(dev->dev_addr, port);
++}
++
+ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len)
+ {
+ struct macsec_dev *macsec = macsec_priv(dev);
+@@ -4018,6 +4057,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
+ !macsec_check_offload(macsec->offload, macsec))
+ return -EOPNOTSUPP;
+
++ /* send_sci must be set to true when transmit sci explicitly is set */
++ if ((data && data[IFLA_MACSEC_SCI]) &&
++ (data && data[IFLA_MACSEC_INC_SCI])) {
++ u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]);
++
++ if (!send_sci)
++ return -EINVAL;
++ }
++
+ if (data && data[IFLA_MACSEC_ICV_LEN])
+ icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]);
+ mtu = real_dev->mtu - icv_len - macsec_extra_len(true);
+diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
+index 35f46ad040b0d..3dd1528dde028 100644
+--- a/drivers/net/macvlan.c
++++ b/drivers/net/macvlan.c
+@@ -141,7 +141,7 @@ static struct macvlan_source_entry *macvlan_hash_lookup_source(
+ u32 idx = macvlan_eth_hash(addr);
+ struct hlist_head *h = &vlan->port->vlan_source_hash[idx];
+
+- hlist_for_each_entry_rcu(entry, h, hlist) {
++ hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) {
+ if (ether_addr_equal_64bits(entry->addr, addr) &&
+ entry->vlan == vlan)
+ return entry;
+@@ -460,8 +460,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
+ return RX_HANDLER_CONSUMED;
+ *pskb = skb;
+ eth = eth_hdr(skb);
+- if (macvlan_forward_source(skb, port, eth->h_source))
++ if (macvlan_forward_source(skb, port, eth->h_source)) {
++ kfree_skb(skb);
+ return RX_HANDLER_CONSUMED;
++ }
+ src = macvlan_hash_lookup(port, eth->h_source);
+ if (src && src->mode != MACVLAN_MODE_VEPA &&
+ src->mode != MACVLAN_MODE_BRIDGE) {
+@@ -480,8 +482,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
+ return RX_HANDLER_PASS;
+ }
+
+- if (macvlan_forward_source(skb, port, eth->h_source))
++ if (macvlan_forward_source(skb, port, eth->h_source)) {
++ kfree_skb(skb);
+ return RX_HANDLER_CONSUMED;
++ }
+ if (macvlan_passthru(port))
+ vlan = list_first_or_null_rcu(&port->vlans,
+ struct macvlan_dev, list);
+@@ -707,7 +711,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
+
+ if (!(dev->flags & IFF_UP)) {
+ /* Just copy in the new address */
+- ether_addr_copy(dev->dev_addr, addr);
++ eth_hw_addr_set(dev, addr);
+ } else {
+ /* Rehash and update the device filters */
+ if (macvlan_addr_busy(vlan->port, addr))
+@@ -1177,7 +1181,7 @@ void macvlan_common_setup(struct net_device *dev)
+ {
+ ether_setup(dev);
+
+- dev->min_mtu = 0;
++ /* ether_setup() has set dev->min_mtu to ETH_MIN_MTU. */
+ dev->max_mtu = ETH_MAX_MTU;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ netif_keep_dst(dev);
+@@ -1517,8 +1521,10 @@ destroy_macvlan_port:
+ /* the macvlan port may be freed by macvlan_uninit when fail to register.
+ * so we destroy the macvlan port only when it's valid.
+ */
+- if (create && macvlan_port_get_rtnl(lowerdev))
++ if (create && macvlan_port_get_rtnl(lowerdev)) {
++ macvlan_flush_sources(port, vlan);
+ macvlan_port_destroy(port->dev);
++ }
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(macvlan_common_newlink);
+@@ -1629,7 +1635,7 @@ static int macvlan_fill_info_macaddr(struct sk_buff *skb,
+ struct hlist_head *h = &vlan->port->vlan_source_hash[i];
+ struct macvlan_source_entry *entry;
+
+- hlist_for_each_entry_rcu(entry, h, hlist) {
++ hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) {
+ if (entry->vlan != vlan)
+ continue;
+ if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr))
+diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
+index 694e2f5dbbe59..39801c31e5071 100644
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -133,11 +133,17 @@ static void macvtap_setup(struct net_device *dev)
+ dev->tx_queue_len = TUN_READQ_SIZE;
+ }
+
++static struct net *macvtap_link_net(const struct net_device *dev)
++{
++ return dev_net(macvlan_dev_real_dev(dev));
++}
++
+ static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
+ .kind = "macvtap",
+ .setup = macvtap_setup,
+ .newlink = macvtap_newlink,
+ .dellink = macvtap_dellink,
++ .get_link_net = macvtap_link_net,
+ .priv_size = sizeof(struct macvtap_dev),
+ };
+
+diff --git a/drivers/net/mdio/acpi_mdio.c b/drivers/net/mdio/acpi_mdio.c
+index d77c987fda9cd..4630dde019749 100644
+--- a/drivers/net/mdio/acpi_mdio.c
++++ b/drivers/net/mdio/acpi_mdio.c
+@@ -18,16 +18,18 @@ MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>");
+ MODULE_LICENSE("GPL");
+
+ /**
+- * acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL.
++ * __acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL.
+ * @mdio: pointer to mii_bus structure
+ * @fwnode: pointer to fwnode of MDIO bus. This fwnode is expected to represent
++ * @owner: module owning this @mdio object.
+ * an ACPI device object corresponding to the MDIO bus and its children are
+ * expected to correspond to the PHY devices on that bus.
+ *
+ * This function registers the mii_bus structure and registers a phy_device
+ * for each child node of @fwnode.
+ */
+-int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode)
++int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode,
++ struct module *owner)
+ {
+ struct fwnode_handle *child;
+ u32 addr;
+@@ -35,7 +37,7 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode)
+
+ /* Mask out all PHYs from auto probing. */
+ mdio->phy_mask = GENMASK(31, 0);
+- ret = mdiobus_register(mdio);
++ ret = __mdiobus_register(mdio, owner);
+ if (ret)
+ return ret;
+
+@@ -55,4 +57,4 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode)
+ }
+ return 0;
+ }
+-EXPORT_SYMBOL(acpi_mdiobus_register);
++EXPORT_SYMBOL(__acpi_mdiobus_register);
+diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
+index 1becb1a731f67..2c47efdae73b4 100644
+--- a/drivers/net/mdio/fwnode_mdio.c
++++ b/drivers/net/mdio/fwnode_mdio.c
+@@ -43,6 +43,11 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
+ int rc;
+
+ rc = fwnode_irq_get(child, 0);
++ /* Don't wait forever if the IRQ provider doesn't become available,
++ * just fall back to poll mode
++ */
++ if (rc == -EPROBE_DEFER)
++ rc = driver_deferred_probe_check_state(&phy->mdio.dev);
+ if (rc == -EPROBE_DEFER)
+ return rc;
+
+@@ -72,6 +77,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
+ */
+ rc = phy_device_register(phy);
+ if (rc) {
++ device_set_node(&phy->mdio.dev, NULL);
+ fwnode_handle_put(child);
+ return rc;
+ }
+@@ -105,8 +111,8 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus,
+ else
+ phy = phy_device_create(bus, addr, phy_id, 0, NULL);
+ if (IS_ERR(phy)) {
+- unregister_mii_timestamper(mii_ts);
+- return PTR_ERR(phy);
++ rc = PTR_ERR(phy);
++ goto clean_mii_ts;
+ }
+
+ if (is_acpi_node(child)) {
+@@ -115,22 +121,19 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus,
+ /* Associate the fwnode with the device structure so it
+ * can be looked up later.
+ */
+- phy->mdio.dev.fwnode = child;
++ phy->mdio.dev.fwnode = fwnode_handle_get(child);
+
+ /* All data is now stored in the phy struct, so register it */
+ rc = phy_device_register(phy);
+ if (rc) {
+- phy_device_free(phy);
+- fwnode_handle_put(phy->mdio.dev.fwnode);
+- return rc;
++ phy->mdio.dev.fwnode = NULL;
++ fwnode_handle_put(child);
++ goto clean_phy;
+ }
+ } else if (is_of_node(child)) {
+ rc = fwnode_mdiobus_phy_device_register(bus, phy, child, addr);
+- if (rc) {
+- unregister_mii_timestamper(mii_ts);
+- phy_device_free(phy);
+- return rc;
+- }
++ if (rc)
++ goto clean_phy;
+ }
+
+ /* phy->mii_ts may already be defined by the PHY driver. A
+@@ -140,5 +143,12 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus,
+ if (mii_ts)
+ phy->mii_ts = mii_ts;
+ return 0;
++
++clean_phy:
++ phy_device_free(phy);
++clean_mii_ts:
++ unregister_mii_timestamper(mii_ts);
++
++ return rc;
+ }
+ EXPORT_SYMBOL(fwnode_mdiobus_register_phy);
+diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
+index cad820568f751..e2273588c75b6 100644
+--- a/drivers/net/mdio/mdio-aspeed.c
++++ b/drivers/net/mdio/mdio-aspeed.c
+@@ -61,6 +61,13 @@ static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum)
+
+ iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL);
+
++ rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl,
++ !(ctrl & ASPEED_MDIO_CTRL_FIRE),
++ ASPEED_MDIO_INTERVAL_US,
++ ASPEED_MDIO_TIMEOUT_US);
++ if (rc < 0)
++ return rc;
++
+ rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data,
+ data & ASPEED_MDIO_DATA_IDLE,
+ ASPEED_MDIO_INTERVAL_US,
+@@ -141,6 +148,7 @@ static const struct of_device_id aspeed_mdio_of_match[] = {
+ { .compatible = "aspeed,ast2600-mdio", },
+ { },
+ };
++MODULE_DEVICE_TABLE(of, aspeed_mdio_of_match);
+
+ static struct platform_driver aspeed_mdio_driver = {
+ .driver = {
+diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
+index 5f4cd24a0241d..4eba5a91075c0 100644
+--- a/drivers/net/mdio/mdio-ipq4019.c
++++ b/drivers/net/mdio/mdio-ipq4019.c
+@@ -200,7 +200,11 @@ static int ipq_mdio_reset(struct mii_bus *bus)
+ if (ret)
+ return ret;
+
+- return clk_prepare_enable(priv->mdio_clk);
++ ret = clk_prepare_enable(priv->mdio_clk);
++ if (ret == 0)
++ mdelay(10);
++
++ return ret;
+ }
+
+ static int ipq4019_mdio_probe(struct platform_device *pdev)
+diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
+index 17f98f609ec82..5070ca2f2637a 100644
+--- a/drivers/net/mdio/mdio-mscc-miim.c
++++ b/drivers/net/mdio/mdio-mscc-miim.c
+@@ -76,6 +76,9 @@ static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum)
+ u32 val;
+ int ret;
+
++ if (regnum & MII_ADDR_C45)
++ return -EOPNOTSUPP;
++
+ ret = mscc_miim_wait_pending(bus);
+ if (ret)
+ goto out;
+@@ -105,6 +108,9 @@ static int mscc_miim_write(struct mii_bus *bus, int mii_id,
+ struct mscc_miim_dev *miim = bus->priv;
+ int ret;
+
++ if (regnum & MII_ADDR_C45)
++ return -EOPNOTSUPP;
++
+ ret = mscc_miim_wait_pending(bus);
+ if (ret < 0)
+ goto out;
+diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c
+index 6dcbf987d61b5..8b444a8eb6b55 100644
+--- a/drivers/net/mdio/mdio-mux-bcm6368.c
++++ b/drivers/net/mdio/mdio-mux-bcm6368.c
+@@ -115,7 +115,7 @@ static int bcm6368_mdiomux_probe(struct platform_device *pdev)
+ md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
+ if (!md->mii_bus) {
+ dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
+- return ENOMEM;
++ return -ENOMEM;
+ }
+
+ bus = md->mii_bus;
+diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c
+index b8866bc3f2e8b..917c8a10eea02 100644
+--- a/drivers/net/mdio/mdio-mux-meson-g12a.c
++++ b/drivers/net/mdio/mdio-mux-meson-g12a.c
+@@ -4,6 +4,7 @@
+ */
+
+ #include <linux/bitfield.h>
++#include <linux/delay.h>
+ #include <linux/clk.h>
+ #include <linux/clk-provider.h>
+ #include <linux/device.h>
+@@ -150,6 +151,7 @@ static const struct clk_ops g12a_ephy_pll_ops = {
+
+ static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv)
+ {
++ u32 value;
+ int ret;
+
+ /* Enable the phy clock */
+@@ -163,18 +165,25 @@ static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv)
+
+ /* Initialize ephy control */
+ writel(EPHY_G12A_ID, priv->regs + ETH_PHY_CNTL0);
+- writel(FIELD_PREP(PHY_CNTL1_ST_MODE, 3) |
+- FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) |
+- FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) |
+- PHY_CNTL1_CLK_EN |
+- PHY_CNTL1_CLKFREQ |
+- PHY_CNTL1_PHY_ENB,
+- priv->regs + ETH_PHY_CNTL1);
++
++ /* Make sure we get a 0 -> 1 transition on the enable bit */
++ value = FIELD_PREP(PHY_CNTL1_ST_MODE, 3) |
++ FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) |
++ FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) |
++ PHY_CNTL1_CLK_EN |
++ PHY_CNTL1_CLKFREQ;
++ writel(value, priv->regs + ETH_PHY_CNTL1);
+ writel(PHY_CNTL2_USE_INTERNAL |
+ PHY_CNTL2_SMI_SRC_MAC |
+ PHY_CNTL2_RX_CLK_EPHY,
+ priv->regs + ETH_PHY_CNTL2);
+
++ value |= PHY_CNTL1_PHY_ENB;
++ writel(value, priv->regs + ETH_PHY_CNTL1);
++
++ /* The phy needs a bit of time to power up */
++ mdelay(10);
++
+ return 0;
+ }
+
+diff --git a/drivers/net/mdio/mdio-mvusb.c b/drivers/net/mdio/mdio-mvusb.c
+index d5eabddfdf51b..11e048136ac23 100644
+--- a/drivers/net/mdio/mdio-mvusb.c
++++ b/drivers/net/mdio/mdio-mvusb.c
+@@ -73,6 +73,7 @@ static int mvusb_mdio_probe(struct usb_interface *interface,
+ struct device *dev = &interface->dev;
+ struct mvusb_mdio *mvusb;
+ struct mii_bus *mdio;
++ int ret;
+
+ mdio = devm_mdiobus_alloc_size(dev, sizeof(*mvusb));
+ if (!mdio)
+@@ -93,7 +94,15 @@ static int mvusb_mdio_probe(struct usb_interface *interface,
+ mdio->write = mvusb_mdio_write;
+
+ usb_set_intfdata(interface, mvusb);
+- return of_mdiobus_register(mdio, dev->of_node);
++ ret = of_mdiobus_register(mdio, dev->of_node);
++ if (ret)
++ goto put_dev;
++
++ return 0;
++
++put_dev:
++ usb_put_dev(mvusb->udev);
++ return ret;
+ }
+
+ static void mvusb_mdio_disconnect(struct usb_interface *interface)
+diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c
+index 822d2cdd2f359..394b864aaa372 100644
+--- a/drivers/net/mdio/mdio-thunder.c
++++ b/drivers/net/mdio/mdio-thunder.c
+@@ -104,6 +104,7 @@ static int thunder_mdiobus_pci_probe(struct pci_dev *pdev,
+ if (i >= ARRAY_SIZE(nexus->buses))
+ break;
+ }
++ fwnode_handle_put(fwn);
+ return 0;
+
+ err_release_regions:
+diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
+index 9e3c815a070f1..1e46e39f5f46a 100644
+--- a/drivers/net/mdio/of_mdio.c
++++ b/drivers/net/mdio/of_mdio.c
+@@ -68,8 +68,9 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
+ /* All data is now stored in the mdiodev struct; register it. */
+ rc = mdio_device_register(mdiodev);
+ if (rc) {
++ device_set_node(&mdiodev->dev, NULL);
++ fwnode_handle_put(fwnode);
+ mdio_device_free(mdiodev);
+- of_node_put(child);
+ return rc;
+ }
+
+@@ -138,21 +139,23 @@ bool of_mdiobus_child_is_phy(struct device_node *child)
+ EXPORT_SYMBOL(of_mdiobus_child_is_phy);
+
+ /**
+- * of_mdiobus_register - Register mii_bus and create PHYs from the device tree
++ * __of_mdiobus_register - Register mii_bus and create PHYs from the device tree
+ * @mdio: pointer to mii_bus structure
+ * @np: pointer to device_node of MDIO bus.
++ * @owner: module owning the @mdio object.
+ *
+ * This function registers the mii_bus structure and registers a phy_device
+ * for each child node of @np.
+ */
+-int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
++int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np,
++ struct module *owner)
+ {
+ struct device_node *child;
+ bool scanphys = false;
+ int addr, rc;
+
+ if (!np)
+- return mdiobus_register(mdio);
++ return __mdiobus_register(mdio, owner);
+
+ /* Do not continue if the node is disabled */
+ if (!of_device_is_available(np))
+@@ -171,7 +174,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
+ of_property_read_u32(np, "reset-post-delay-us", &mdio->reset_post_delay_us);
+
+ /* Register the MDIO bus */
+- rc = mdiobus_register(mdio);
++ rc = __mdiobus_register(mdio, owner);
+ if (rc)
+ return rc;
+
+@@ -231,10 +234,11 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
+ return 0;
+
+ unregister:
++ of_node_put(child);
+ mdiobus_unregister(mdio);
+ return rc;
+ }
+-EXPORT_SYMBOL(of_mdiobus_register);
++EXPORT_SYMBOL(__of_mdiobus_register);
+
+ /**
+ * of_mdio_find_device - Given a device tree node, find the mdio_device
+diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c
+index aaa628f859fd4..f84554aa02af1 100644
+--- a/drivers/net/mhi_net.c
++++ b/drivers/net/mhi_net.c
+@@ -343,6 +343,8 @@ static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
+
+ kfree_skb(mhi_netdev->skbagg_head);
+
++ free_netdev(ndev);
++
+ dev_set_drvdata(&mhi_dev->dev, NULL);
+ }
+
+diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
+index 2a4892402ed8c..16b36e9563607 100644
+--- a/drivers/net/net_failover.c
++++ b/drivers/net/net_failover.c
+@@ -130,14 +130,10 @@ static u16 net_failover_select_queue(struct net_device *dev,
+ txq = ops->ndo_select_queue(primary_dev, skb, sb_dev);
+ else
+ txq = netdev_pick_tx(primary_dev, skb, NULL);
+-
+- qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+-
+- return txq;
++ } else {
++ txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+ }
+
+- txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+-
+ /* Save the original txq to restore before passing to the driver */
+ qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+
+diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
+index 90aafb56f1409..50854265864d1 100644
+--- a/drivers/net/netdevsim/bpf.c
++++ b/drivers/net/netdevsim/bpf.c
+@@ -351,10 +351,12 @@ nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx)
+ {
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+
+- nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER);
++ nmap->entry[idx].key = kmalloc(offmap->map.key_size,
++ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (!nmap->entry[idx].key)
+ return -ENOMEM;
+- nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER);
++ nmap->entry[idx].value = kmalloc(offmap->map.value_size,
++ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (!nmap->entry[idx].value) {
+ kfree(nmap->entry[idx].key);
+ nmap->entry[idx].key = NULL;
+@@ -496,7 +498,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
+ if (offmap->map.map_flags)
+ return -EINVAL;
+
+- nmap = kzalloc(sizeof(*nmap), GFP_USER);
++ nmap = kzalloc(sizeof(*nmap), GFP_KERNEL_ACCOUNT);
+ if (!nmap)
+ return -ENOMEM;
+
+@@ -514,6 +516,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
+ goto err_free;
+ key = nmap->entry[i].key;
+ *key = i;
++ memset(nmap->entry[i].value, 0, offmap->map.value_size);
+ }
+ }
+
+diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
+index 54313bd577973..a7279356299af 100644
+--- a/drivers/net/netdevsim/dev.c
++++ b/drivers/net/netdevsim/dev.c
+@@ -168,13 +168,10 @@ static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file,
+ cookie_len = (count - 1) / 2;
+ if ((count - 1) % 2)
+ return -EINVAL;
+- buf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+- if (!buf)
+- return -ENOMEM;
+
+- ret = simple_write_to_buffer(buf, count, ppos, data, count);
+- if (ret < 0)
+- goto free_buf;
++ buf = memdup_user(data, count);
++ if (IS_ERR(buf))
++ return PTR_ERR(buf);
+
+ fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len,
+ GFP_KERNEL | __GFP_NOWARN);
+@@ -229,8 +226,10 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
+ if (IS_ERR(nsim_dev->ddir))
+ return PTR_ERR(nsim_dev->ddir);
+ nsim_dev->ports_ddir = debugfs_create_dir("ports", nsim_dev->ddir);
+- if (IS_ERR(nsim_dev->ports_ddir))
+- return PTR_ERR(nsim_dev->ports_ddir);
++ if (IS_ERR(nsim_dev->ports_ddir)) {
++ err = PTR_ERR(nsim_dev->ports_ddir);
++ goto err_ddir;
++ }
+ debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir,
+ &nsim_dev->fw_update_status);
+ debugfs_create_u32("fw_update_overwrite_mask", 0600, nsim_dev->ddir,
+@@ -267,7 +266,7 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
+ nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir);
+ if (IS_ERR(nsim_dev->nodes_ddir)) {
+ err = PTR_ERR(nsim_dev->nodes_ddir);
+- goto err_out;
++ goto err_ports_ddir;
+ }
+ debugfs_create_bool("fail_trap_drop_counter_get", 0600,
+ nsim_dev->ddir,
+@@ -275,8 +274,9 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
+ nsim_udp_tunnels_debugfs_create(nsim_dev);
+ return 0;
+
+-err_out:
++err_ports_ddir:
+ debugfs_remove_recursive(nsim_dev->ports_ddir);
++err_ddir:
+ debugfs_remove_recursive(nsim_dev->ddir);
+ return err;
+ }
+diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c
+index b03a0513eb7e7..2e7c1cc16cb93 100644
+--- a/drivers/net/netdevsim/ethtool.c
++++ b/drivers/net/netdevsim/ethtool.c
+@@ -77,7 +77,10 @@ static int nsim_set_ringparam(struct net_device *dev,
+ {
+ struct netdevsim *ns = netdev_priv(dev);
+
+- memcpy(&ns->ethtool.ring, ring, sizeof(ns->ethtool.ring));
++ ns->ethtool.ring.rx_pending = ring->rx_pending;
++ ns->ethtool.ring.rx_jumbo_pending = ring->rx_jumbo_pending;
++ ns->ethtool.ring.rx_mini_pending = ring->rx_mini_pending;
++ ns->ethtool.ring.tx_pending = ring->tx_pending;
+ return 0;
+ }
+
+diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
+index 4300261e2f9e7..14787d17f703f 100644
+--- a/drivers/net/netdevsim/fib.c
++++ b/drivers/net/netdevsim/fib.c
+@@ -53,6 +53,7 @@ struct nsim_fib_data {
+ struct rhashtable nexthop_ht;
+ struct devlink *devlink;
+ struct work_struct fib_event_work;
++ struct work_struct fib_flush_work;
+ struct list_head fib_event_queue;
+ spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
+ struct mutex nh_lock; /* Protects NH HT */
+@@ -623,14 +624,14 @@ static int nsim_fib6_rt_append(struct nsim_fib_data *data,
+ if (err)
+ goto err_fib6_rt_nh_del;
+
+- fib6_event->rt_arr[i]->trap = true;
++ WRITE_ONCE(fib6_event->rt_arr[i]->trap, true);
+ }
+
+ return 0;
+
+ err_fib6_rt_nh_del:
+ for (i--; i >= 0; i--) {
+- fib6_event->rt_arr[i]->trap = false;
++ WRITE_ONCE(fib6_event->rt_arr[i]->trap, false);
+ nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]);
+ }
+ return err;
+@@ -977,7 +978,7 @@ static int nsim_fib_event_schedule_work(struct nsim_fib_data *data,
+
+ fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC);
+ if (!fib_event)
+- return NOTIFY_BAD;
++ goto err_fib_event_alloc;
+
+ fib_event->data = data;
+ fib_event->event = event;
+@@ -1005,6 +1006,9 @@ static int nsim_fib_event_schedule_work(struct nsim_fib_data *data,
+
+ err_fib_prepare_event:
+ kfree(fib_event);
++err_fib_event_alloc:
++ if (event == FIB_EVENT_ENTRY_DEL)
++ schedule_work(&data->fib_flush_work);
+ return NOTIFY_BAD;
+ }
+
+@@ -1482,6 +1486,24 @@ static void nsim_fib_event_work(struct work_struct *work)
+ mutex_unlock(&data->fib_lock);
+ }
+
++static void nsim_fib_flush_work(struct work_struct *work)
++{
++ struct nsim_fib_data *data = container_of(work, struct nsim_fib_data,
++ fib_flush_work);
++ struct nsim_fib_rt *fib_rt, *fib_rt_tmp;
++
++ /* Process pending work. */
++ flush_work(&data->fib_event_work);
++
++ mutex_lock(&data->fib_lock);
++ list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) {
++ rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node,
++ nsim_fib_rt_ht_params);
++ nsim_fib_rt_free(fib_rt, data);
++ }
++ mutex_unlock(&data->fib_lock);
++}
++
+ static int
+ nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev)
+ {
+@@ -1540,6 +1562,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
+ goto err_rhashtable_nexthop_destroy;
+
+ INIT_WORK(&data->fib_event_work, nsim_fib_event_work);
++ INIT_WORK(&data->fib_flush_work, nsim_fib_flush_work);
+ INIT_LIST_HEAD(&data->fib_event_queue);
+ spin_lock_init(&data->fib_event_queue_lock);
+
+@@ -1586,6 +1609,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
+ err_nexthop_nb_unregister:
+ unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb);
+ err_rhashtable_fib_destroy:
++ cancel_work_sync(&data->fib_flush_work);
+ flush_work(&data->fib_event_work);
+ rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
+ data);
+@@ -1615,6 +1639,7 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
+ NSIM_RESOURCE_IPV4_FIB);
+ unregister_fib_notifier(devlink_net(devlink), &data->fib_nb);
+ unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb);
++ cancel_work_sync(&data->fib_flush_work);
+ flush_work(&data->fib_event_work);
+ rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
+ data);
+diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
+index 50572e0f1f529..84741715f6705 100644
+--- a/drivers/net/netdevsim/netdev.c
++++ b/drivers/net/netdevsim/netdev.c
+@@ -67,10 +67,10 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+ unsigned int start;
+
+ do {
+- start = u64_stats_fetch_begin(&ns->syncp);
++ start = u64_stats_fetch_begin_irq(&ns->syncp);
+ stats->tx_bytes = ns->tx_bytes;
+ stats->tx_packets = ns->tx_packets;
+- } while (u64_stats_fetch_retry(&ns->syncp, start));
++ } while (u64_stats_fetch_retry_irq(&ns->syncp, start));
+ }
+
+ static int
+diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
+index a5bab614ff845..b701ee83e64a8 100644
+--- a/drivers/net/ntb_netdev.c
++++ b/drivers/net/ntb_netdev.c
+@@ -137,7 +137,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
+ enqueue_again:
+ rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN);
+ if (rc) {
+- dev_kfree_skb(skb);
++ dev_kfree_skb_any(skb);
+ ndev->stats.rx_errors++;
+ ndev->stats.rx_fifo_errors++;
+ }
+@@ -192,7 +192,7 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
+ ndev->stats.tx_aborted_errors++;
+ }
+
+- dev_kfree_skb(skb);
++ dev_kfree_skb_any(skb);
+
+ if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) {
+ /* Make sure anybody stopping the queue after this sees the new
+@@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void)
+ rc = ntb_transport_register_client_dev(KBUILD_MODNAME);
+ if (rc)
+ return rc;
+- return ntb_transport_register_client(&ntb_netdev_client);
++
++ rc = ntb_transport_register_client(&ntb_netdev_client);
++ if (rc) {
++ ntb_transport_unregister_client_dev(KBUILD_MODNAME);
++ return rc;
++ }
++
++ return 0;
+ }
+ module_init(ntb_netdev_init_module);
+
+diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
+index 7de631f5356fc..556ca98843565 100644
+--- a/drivers/net/pcs/pcs-xpcs.c
++++ b/drivers/net/pcs/pcs-xpcs.c
+@@ -309,7 +309,7 @@ static int xpcs_read_fault_c73(struct dw_xpcs *xpcs,
+ return 0;
+ }
+
+-static int xpcs_read_link_c73(struct dw_xpcs *xpcs, bool an)
++static int xpcs_read_link_c73(struct dw_xpcs *xpcs)
+ {
+ bool link = true;
+ int ret;
+@@ -321,15 +321,6 @@ static int xpcs_read_link_c73(struct dw_xpcs *xpcs, bool an)
+ if (!(ret & MDIO_STAT1_LSTATUS))
+ link = false;
+
+- if (an) {
+- ret = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_STAT1);
+- if (ret < 0)
+- return ret;
+-
+- if (!(ret & MDIO_STAT1_LSTATUS))
+- link = false;
+- }
+-
+ return link;
+ }
+
+@@ -847,7 +838,7 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
+ int ret;
+
+ /* Link needs to be read first ... */
+- state->link = xpcs_read_link_c73(xpcs, state->an_enabled) > 0 ? 1 : 0;
++ state->link = xpcs_read_link_c73(xpcs) > 0 ? 1 : 0;
+
+ /* ... and then we check the faults. */
+ ret = xpcs_read_fault_c73(xpcs, state);
+@@ -890,7 +881,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
+ */
+ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+ if (ret < 0)
+- return false;
++ return ret;
+
+ if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
+ int speed_value;
+diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
+index 968dd43a2b1e0..2f2765d7f84c8 100644
+--- a/drivers/net/phy/aquantia_main.c
++++ b/drivers/net/phy/aquantia_main.c
+@@ -34,6 +34,8 @@
+ #define MDIO_AN_VEND_PROV 0xc400
+ #define MDIO_AN_VEND_PROV_1000BASET_FULL BIT(15)
+ #define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14)
++#define MDIO_AN_VEND_PROV_5000BASET_FULL BIT(11)
++#define MDIO_AN_VEND_PROV_2500BASET_FULL BIT(10)
+ #define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4)
+ #define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0)
+ #define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4
+@@ -88,6 +90,9 @@
+ #define VEND1_GLOBAL_FW_ID_MAJOR GENMASK(15, 8)
+ #define VEND1_GLOBAL_FW_ID_MINOR GENMASK(7, 0)
+
++#define VEND1_GLOBAL_GEN_STAT2 0xc831
++#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG BIT(15)
++
+ #define VEND1_GLOBAL_RSVD_STAT1 0xc885
+ #define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID GENMASK(7, 4)
+ #define VEND1_GLOBAL_RSVD_STAT1_PROV_ID GENMASK(3, 0)
+@@ -122,6 +127,12 @@
+ #define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2 BIT(1)
+ #define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 BIT(0)
+
++/* Sleep and timeout for checking if the Processor-Intensive
++ * MDIO operation is finished
++ */
++#define AQR107_OP_IN_PROG_SLEEP 1000
++#define AQR107_OP_IN_PROG_TIMEOUT 100000
++
+ struct aqr107_hw_stat {
+ const char *name;
+ int reg;
+@@ -231,9 +242,20 @@ static int aqr_config_aneg(struct phy_device *phydev)
+ phydev->advertising))
+ reg |= MDIO_AN_VEND_PROV_1000BASET_HALF;
+
++ /* Handle the case when the 2.5G and 5G speeds are not advertised */
++ if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
++ phydev->advertising))
++ reg |= MDIO_AN_VEND_PROV_2500BASET_FULL;
++
++ if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
++ phydev->advertising))
++ reg |= MDIO_AN_VEND_PROV_5000BASET_FULL;
++
+ ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV,
+ MDIO_AN_VEND_PROV_1000BASET_HALF |
+- MDIO_AN_VEND_PROV_1000BASET_FULL, reg);
++ MDIO_AN_VEND_PROV_1000BASET_FULL |
++ MDIO_AN_VEND_PROV_2500BASET_FULL |
++ MDIO_AN_VEND_PROV_5000BASET_FULL, reg);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+@@ -585,16 +607,52 @@ static void aqr107_link_change_notify(struct phy_device *phydev)
+ phydev_info(phydev, "Aquantia 1000Base-T2 mode active\n");
+ }
+
++static int aqr107_wait_processor_intensive_op(struct phy_device *phydev)
++{
++ int val, err;
++
++ /* The datasheet notes to wait at least 1ms after issuing a
++ * processor intensive operation before checking.
++ * We cannot use the 'sleep_before_read' parameter of read_poll_timeout
++ * because that just determines the maximum time slept, not the minimum.
++ */
++ usleep_range(1000, 5000);
++
++ err = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
++ VEND1_GLOBAL_GEN_STAT2, val,
++ !(val & VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG),
++ AQR107_OP_IN_PROG_SLEEP,
++ AQR107_OP_IN_PROG_TIMEOUT, false);
++ if (err) {
++ phydev_err(phydev, "timeout: processor-intensive MDIO operation\n");
++ return err;
++ }
++
++ return 0;
++}
++
+ static int aqr107_suspend(struct phy_device *phydev)
+ {
+- return phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
+- MDIO_CTRL1_LPOWER);
++ int err;
++
++ err = phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
++ MDIO_CTRL1_LPOWER);
++ if (err)
++ return err;
++
++ return aqr107_wait_processor_intensive_op(phydev);
+ }
+
+ static int aqr107_resume(struct phy_device *phydev)
+ {
+- return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
+- MDIO_CTRL1_LPOWER);
++ int err;
++
++ err = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
++ MDIO_CTRL1_LPOWER);
++ if (err)
++ return err;
++
++ return aqr107_wait_processor_intensive_op(phydev);
+ }
+
+ static int aqr107_probe(struct phy_device *phydev)
+diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
+index bdac087058b26..ba61007bfc499 100644
+--- a/drivers/net/phy/at803x.c
++++ b/drivers/net/phy/at803x.c
+@@ -666,25 +666,7 @@ static int at803x_probe(struct phy_device *phydev)
+ return ret;
+ }
+
+- /* Some bootloaders leave the fiber page selected.
+- * Switch to the copper page, as otherwise we read
+- * the PHY capabilities from the fiber side.
+- */
+- if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+- phy_lock_mdio_bus(phydev);
+- ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
+- phy_unlock_mdio_bus(phydev);
+- if (ret)
+- goto err;
+- }
+-
+ return 0;
+-
+-err:
+- if (priv->vddio)
+- regulator_disable(priv->vddio);
+-
+- return ret;
+ }
+
+ static void at803x_remove(struct phy_device *phydev)
+@@ -785,6 +767,22 @@ static int at803x_config_init(struct phy_device *phydev)
+ {
+ int ret;
+
++ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
++ /* Some bootloaders leave the fiber page selected.
++ * Switch to the copper page, as otherwise we read
++ * the PHY capabilities from the fiber side.
++ */
++ phy_lock_mdio_bus(phydev);
++ ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
++ phy_unlock_mdio_bus(phydev);
++ if (ret)
++ return ret;
++
++ ret = at8031_pll_config(phydev);
++ if (ret < 0)
++ return ret;
++ }
++
+ /* The RX and TX delay default is:
+ * after HW reset: RX delay enabled and TX delay disabled
+ * after SW reset: RX delay enabled, while TX delay retains the
+@@ -814,12 +812,6 @@ static int at803x_config_init(struct phy_device *phydev)
+ if (ret < 0)
+ return ret;
+
+- if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+- ret = at8031_pll_config(phydev);
+- if (ret < 0)
+- return ret;
+- }
+-
+ /* Ar803x extended next page bit is enabled by default. Cisco
+ * multigig switches read this bit and attempt to negotiate 10Gbps
+ * rates even if the next page bit is disabled. This is incorrect
+@@ -1383,8 +1375,6 @@ static struct phy_driver at803x_driver[] = {
+ .flags = PHY_POLL_CABLE_TEST,
+ .config_init = at803x_config_init,
+ .link_change_notify = at803x_link_change_notify,
+- .set_wol = at803x_set_wol,
+- .get_wol = at803x_get_wol,
+ .suspend = at803x_suspend,
+ .resume = at803x_resume,
+ /* PHY_BASIC_FEATURES */
+diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c
+index 4578963375055..0f1e617a26c91 100644
+--- a/drivers/net/phy/ax88796b.c
++++ b/drivers/net/phy/ax88796b.c
+@@ -88,8 +88,10 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev)
+ /* Reset PHY, otherwise MII_LPA will provide outdated information.
+ * This issue is reproducible only with some link partner PHYs
+ */
+- if (phydev->state == PHY_NOLINK && phydev->drv->soft_reset)
+- phydev->drv->soft_reset(phydev);
++ if (phydev->state == PHY_NOLINK) {
++ phy_init_hw(phydev);
++ phy_start_aneg(phydev);
++ }
+ }
+
+ static struct phy_driver asix_driver[] = {
+diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
+index 83aea5c5cd03c..f3b39af83a272 100644
+--- a/drivers/net/phy/broadcom.c
++++ b/drivers/net/phy/broadcom.c
+@@ -11,6 +11,7 @@
+ */
+
+ #include "bcm-phy-lib.h"
++#include <linux/delay.h>
+ #include <linux/module.h>
+ #include <linux/phy.h>
+ #include <linux/brcmphy.h>
+@@ -411,6 +412,17 @@ static int bcm54xx_resume(struct phy_device *phydev)
+ return bcm54xx_config_init(phydev);
+ }
+
++static int bcm54810_read_mmd(struct phy_device *phydev, int devnum, u16 regnum)
++{
++ return -EOPNOTSUPP;
++}
++
++static int bcm54810_write_mmd(struct phy_device *phydev, int devnum, u16 regnum,
++ u16 val)
++{
++ return -EOPNOTSUPP;
++}
++
+ static int bcm54811_config_init(struct phy_device *phydev)
+ {
+ int err, reg;
+@@ -553,6 +565,26 @@ static int brcm_fet_config_init(struct phy_device *phydev)
+ if (err < 0)
+ return err;
+
++ /* The datasheet indicates the PHY needs up to 1us to complete a reset,
++ * build some slack here.
++ */
++ usleep_range(1000, 2000);
++
++ /* The PHY requires 65 MDC clock cycles to complete a write operation
++ * and turnaround the line properly.
++ *
++ * We ignore -EIO here as the MDIO controller (e.g.: mdio-bcm-unimac)
++ * may flag the lack of turn-around as a read failure. This is
++ * particularly true with this combination since the MDIO controller
++ * only used 64 MDC cycles. This is not a critical failure in this
++ * specific case and it has no functional impact otherwise, so we let
++ * that one go through. If there is a genuine bus error, the next read
++ * of MII_BRCM_FET_INTREG will error out.
++ */
++ err = phy_read(phydev, MII_BMCR);
++ if (err < 0 && err != -EIO)
++ return err;
++
+ reg = phy_read(phydev, MII_BRCM_FET_INTREG);
+ if (reg < 0)
+ return reg;
+@@ -768,6 +800,7 @@ static struct phy_driver broadcom_drivers[] = {
+ .phy_id_mask = 0xfffffff0,
+ .name = "Broadcom BCM54616S",
+ /* PHY_GBIT_FEATURES */
++ .soft_reset = genphy_soft_reset,
+ .config_init = bcm54xx_config_init,
+ .config_aneg = bcm54616s_config_aneg,
+ .config_intr = bcm_phy_config_intr,
+@@ -810,6 +843,8 @@ static struct phy_driver broadcom_drivers[] = {
+ .get_strings = bcm_phy_get_strings,
+ .get_stats = bcm54xx_get_stats,
+ .probe = bcm54xx_phy_probe,
++ .read_mmd = bcm54810_read_mmd,
++ .write_mmd = bcm54810_write_mmd,
+ .config_init = bcm54xx_config_init,
+ .config_aneg = bcm5481_config_aneg,
+ .config_intr = bcm_phy_config_intr,
+diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
+index 211b5476a6f51..f070aa97c77b4 100644
+--- a/drivers/net/phy/dp83822.c
++++ b/drivers/net/phy/dp83822.c
+@@ -228,13 +228,12 @@ static int dp83822_config_intr(struct phy_device *phydev)
+ if (misr_status < 0)
+ return misr_status;
+
+- misr_status |= (DP83822_RX_ERR_HF_INT_EN |
+- DP83822_FALSE_CARRIER_HF_INT_EN |
+- DP83822_LINK_STAT_INT_EN |
++ misr_status |= (DP83822_LINK_STAT_INT_EN |
+ DP83822_ENERGY_DET_INT_EN |
+ DP83822_LINK_QUAL_INT_EN);
+
+- if (!dp83822->fx_enabled)
++ /* Private data pointer is NULL on DP83825/26 */
++ if (!dp83822 || !dp83822->fx_enabled)
+ misr_status |= DP83822_ANEG_COMPLETE_INT_EN |
+ DP83822_DUP_MODE_CHANGE_INT_EN |
+ DP83822_SPEED_CHANGED_INT_EN;
+@@ -254,9 +253,9 @@ static int dp83822_config_intr(struct phy_device *phydev)
+ DP83822_PAGE_RX_INT_EN |
+ DP83822_EEE_ERROR_CHANGE_INT_EN);
+
+- if (!dp83822->fx_enabled)
+- misr_status |= DP83822_MDI_XOVER_INT_EN |
+- DP83822_ANEG_ERR_INT_EN |
++ /* Private data pointer is NULL on DP83825/26 */
++ if (!dp83822 || !dp83822->fx_enabled)
++ misr_status |= DP83822_ANEG_ERR_INT_EN |
+ DP83822_WOL_PKT_INT_EN;
+
+ err = phy_write(phydev, MII_DP83822_MISR2, misr_status);
+@@ -274,7 +273,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
+ if (err < 0)
+ return err;
+
+- err = phy_write(phydev, MII_DP83822_MISR1, 0);
++ err = phy_write(phydev, MII_DP83822_MISR2, 0);
+ if (err < 0)
+ return err;
+
+diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
+index 6bbc81ad295fb..76ca43108d992 100644
+--- a/drivers/net/phy/dp83867.c
++++ b/drivers/net/phy/dp83867.c
+@@ -41,6 +41,7 @@
+ #define DP83867_STRAP_STS1 0x006E
+ #define DP83867_STRAP_STS2 0x006f
+ #define DP83867_RGMIIDCTL 0x0086
++#define DP83867_DSP_FFE_CFG 0x012c
+ #define DP83867_RXFCFG 0x0134
+ #define DP83867_RXFPMD1 0x0136
+ #define DP83867_RXFPMD2 0x0137
+@@ -137,6 +138,7 @@
+ #define DP83867_DOWNSHIFT_2_COUNT 2
+ #define DP83867_DOWNSHIFT_4_COUNT 4
+ #define DP83867_DOWNSHIFT_8_COUNT 8
++#define DP83867_SGMII_AUTONEG_EN BIT(7)
+
+ /* CFG3 bits */
+ #define DP83867_CFG3_INT_OE BIT(7)
+@@ -790,6 +792,14 @@ static int dp83867_config_init(struct phy_device *phydev)
+ else
+ val &= ~DP83867_SGMII_TYPE;
+ phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_SGMIICTL, val);
++
++ /* This is a SW workaround for link instability if RX_CTRL is
++ * not strapped to mode 3 or 4 in HW. This is required for SGMII
++ * in addition to clearing bit 7, handled above.
++ */
++ if (dp83867->rxctrl_strap_quirk)
++ phy_set_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4,
++ BIT(8));
+ }
+
+ val = phy_read(phydev, DP83867_CFG3);
+@@ -826,14 +836,59 @@ static int dp83867_phy_reset(struct phy_device *phydev)
+ {
+ int err;
+
+- err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
++ err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET);
+ if (err < 0)
+ return err;
+
+ usleep_range(10, 20);
+
+- return phy_modify(phydev, MII_DP83867_PHYCTRL,
++ err = phy_modify(phydev, MII_DP83867_PHYCTRL,
+ DP83867_PHYCR_FORCE_LINK_GOOD, 0);
++ if (err < 0)
++ return err;
++
++ /* Configure the DSP Feedforward Equalizer Configuration register to
++ * improve short cable (< 1 meter) performance. This will not affect
++ * long cable performance.
++ */
++ err = phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_DSP_FFE_CFG,
++ 0x0e81);
++ if (err < 0)
++ return err;
++
++ err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
++ if (err < 0)
++ return err;
++
++ usleep_range(10, 20);
++
++ return 0;
++}
++
++static void dp83867_link_change_notify(struct phy_device *phydev)
++{
++ /* There is a limitation in DP83867 PHY device where SGMII AN is
++ * only triggered once after the device is booted up. Even after the
++ * PHY TPI is down and up again, SGMII AN is not triggered and
++ * hence no new in-band message from PHY to MAC side SGMII.
++ * This could cause an issue during power up, when PHY is up prior
++ * to MAC. At this condition, once MAC side SGMII is up, MAC side
++ * SGMII wouldn`t receive new in-band message from TI PHY with
++ * correct link status, speed and duplex info.
++ * Thus, implemented a SW solution here to retrigger SGMII Auto-Neg
++ * whenever there is a link change.
++ */
++ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
++ int val = 0;
++
++ val = phy_clear_bits(phydev, DP83867_CFG2,
++ DP83867_SGMII_AUTONEG_EN);
++ if (val < 0)
++ return;
++
++ phy_set_bits(phydev, DP83867_CFG2,
++ DP83867_SGMII_AUTONEG_EN);
++ }
+ }
+
+ static struct phy_driver dp83867_driver[] = {
+@@ -860,6 +915,8 @@ static struct phy_driver dp83867_driver[] = {
+
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
++
++ .link_change_notify = dp83867_link_change_notify,
+ },
+ };
+ module_phy_driver(dp83867_driver);
+diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
+index 755220c6451fb..cdf4e22fe85d0 100644
+--- a/drivers/net/phy/dp83869.c
++++ b/drivers/net/phy/dp83869.c
+@@ -587,15 +587,13 @@ static int dp83869_of_init(struct phy_device *phydev)
+ &dp83869_internal_delay[0],
+ delay_size, true);
+ if (dp83869->rx_int_delay < 0)
+- dp83869->rx_int_delay =
+- dp83869_internal_delay[DP83869_CLK_DELAY_DEF];
++ dp83869->rx_int_delay = DP83869_CLK_DELAY_DEF;
+
+ dp83869->tx_int_delay = phy_get_internal_delay(phydev, dev,
+ &dp83869_internal_delay[0],
+ delay_size, false);
+ if (dp83869->tx_int_delay < 0)
+- dp83869->tx_int_delay =
+- dp83869_internal_delay[DP83869_CLK_DELAY_DEF];
++ dp83869->tx_int_delay = DP83869_CLK_DELAY_DEF;
+
+ return ret;
+ }
+diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
+index 4fcfca4e17021..87975b843d276 100644
+--- a/drivers/net/phy/marvell.c
++++ b/drivers/net/phy/marvell.c
+@@ -189,6 +189,8 @@
+ #define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII 0x4
+ #define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */
+
++#define MII_88E1510_MSCR_2 0x15
++
+ #define MII_VCT5_TX_RX_MDI0_COUPLING 0x10
+ #define MII_VCT5_TX_RX_MDI1_COUPLING 0x11
+ #define MII_VCT5_TX_RX_MDI2_COUPLING 0x12
+@@ -551,9 +553,9 @@ static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev)
+ else
+ mscr = 0;
+
+- return phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE,
+- MII_88E1121_PHY_MSCR_REG,
+- MII_88E1121_PHY_MSCR_DELAY_MASK, mscr);
++ return phy_modify_paged_changed(phydev, MII_MARVELL_MSCR_PAGE,
++ MII_88E1121_PHY_MSCR_REG,
++ MII_88E1121_PHY_MSCR_DELAY_MASK, mscr);
+ }
+
+ static int m88e1121_config_aneg(struct phy_device *phydev)
+@@ -567,11 +569,13 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
+ return err;
+ }
+
++ changed = err;
++
+ err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
+ if (err < 0)
+ return err;
+
+- changed = err;
++ changed |= err;
+
+ err = genphy_config_aneg(phydev);
+ if (err < 0)
+@@ -1211,16 +1215,15 @@ static int m88e1118_config_aneg(struct phy_device *phydev)
+ {
+ int err;
+
+- err = genphy_soft_reset(phydev);
++ err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
+ if (err < 0)
+ return err;
+
+- err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
++ err = genphy_config_aneg(phydev);
+ if (err < 0)
+ return err;
+
+- err = genphy_config_aneg(phydev);
+- return 0;
++ return genphy_soft_reset(phydev);
+ }
+
+ static int m88e1118_config_init(struct phy_device *phydev)
+@@ -1242,6 +1245,12 @@ static int m88e1118_config_init(struct phy_device *phydev)
+ if (err < 0)
+ return err;
+
++ if (phy_interface_is_rgmii(phydev)) {
++ err = m88e1121_config_aneg_rgmii_delays(phydev);
++ if (err < 0)
++ return err;
++ }
++
+ /* Adjust LED Control */
+ if (phydev->dev_flags & MARVELL_PHY_M1118_DNS323_LEDS)
+ err = phy_write(phydev, 0x10, 0x1100);
+@@ -1684,8 +1693,8 @@ static int marvell_suspend(struct phy_device *phydev)
+ int err;
+
+ /* Suspend the fiber mode first */
+- if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+- phydev->supported)) {
++ if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
++ phydev->supported)) {
+ err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
+ if (err < 0)
+ goto error;
+@@ -1719,8 +1728,8 @@ static int marvell_resume(struct phy_device *phydev)
+ int err;
+
+ /* Resume the fiber mode first */
+- if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+- phydev->supported)) {
++ if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
++ phydev->supported)) {
+ err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
+ if (err < 0)
+ goto error;
+@@ -1932,6 +1941,60 @@ static void marvell_get_stats(struct phy_device *phydev,
+ data[i] = marvell_get_stat(phydev, i);
+ }
+
++static int m88e1510_loopback(struct phy_device *phydev, bool enable)
++{
++ int err;
++
++ if (enable) {
++ u16 bmcr_ctl = 0, mscr2_ctl = 0;
++
++ if (phydev->speed == SPEED_1000)
++ bmcr_ctl = BMCR_SPEED1000;
++ else if (phydev->speed == SPEED_100)
++ bmcr_ctl = BMCR_SPEED100;
++
++ if (phydev->duplex == DUPLEX_FULL)
++ bmcr_ctl |= BMCR_FULLDPLX;
++
++ err = phy_write(phydev, MII_BMCR, bmcr_ctl);
++ if (err < 0)
++ return err;
++
++ if (phydev->speed == SPEED_1000)
++ mscr2_ctl = BMCR_SPEED1000;
++ else if (phydev->speed == SPEED_100)
++ mscr2_ctl = BMCR_SPEED100;
++
++ err = phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE,
++ MII_88E1510_MSCR_2, BMCR_SPEED1000 |
++ BMCR_SPEED100, mscr2_ctl);
++ if (err < 0)
++ return err;
++
++ /* Need soft reset to have speed configuration takes effect */
++ err = genphy_soft_reset(phydev);
++ if (err < 0)
++ return err;
++
++ err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
++ BMCR_LOOPBACK);
++
++ if (!err) {
++ /* It takes some time for PHY device to switch
++ * into/out-of loopback mode.
++ */
++ msleep(1000);
++ }
++ return err;
++ } else {
++ err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0);
++ if (err < 0)
++ return err;
++
++ return phy_config_aneg(phydev);
++ }
++}
++
+ static int marvell_vct5_wait_complete(struct phy_device *phydev)
+ {
+ int i;
+@@ -3078,7 +3141,7 @@ static struct phy_driver marvell_drivers[] = {
+ .get_sset_count = marvell_get_sset_count,
+ .get_strings = marvell_get_strings,
+ .get_stats = marvell_get_stats,
+- .set_loopback = genphy_loopback,
++ .set_loopback = m88e1510_loopback,
+ .get_tunable = m88e1011_get_tunable,
+ .set_tunable = m88e1011_set_tunable,
+ .cable_test_start = marvell_vct7_cable_test_start,
+diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
+index bd310e8d5e43d..1caa6d943a7b7 100644
+--- a/drivers/net/phy/marvell10g.c
++++ b/drivers/net/phy/marvell10g.c
+@@ -307,6 +307,13 @@ static int mv3310_power_up(struct phy_device *phydev)
+ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL,
+ MV_V2_PORT_CTRL_PWRDOWN);
+
++ /* Sometimes, the power down bit doesn't clear immediately, and
++ * a read of this register causes the bit not to clear. Delay
++ * 100us to allow the PHY to come out of power down mode before
++ * the next access.
++ */
++ udelay(100);
++
+ if (phydev->drv->phy_id != MARVELL_PHY_ID_88X3310 ||
+ priv->firmware_ver < 0x00030000)
+ return ret;
+@@ -789,7 +796,7 @@ static int mv3310_read_status_copper(struct phy_device *phydev)
+
+ cssr1 = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_CSSR1);
+ if (cssr1 < 0)
+- return val;
++ return cssr1;
+
+ /* If the link settings are not resolved, mark the link down */
+ if (!(cssr1 & MV_PCS_CSSR1_RESOLVED)) {
+diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
+index 6865d9319197f..5f89828fd9f17 100644
+--- a/drivers/net/phy/mdio_bus.c
++++ b/drivers/net/phy/mdio_bus.c
+@@ -108,7 +108,12 @@ EXPORT_SYMBOL(mdiobus_unregister_device);
+
+ struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr)
+ {
+- struct mdio_device *mdiodev = bus->mdio_map[addr];
++ struct mdio_device *mdiodev;
++
++ if (addr < 0 || addr >= ARRAY_SIZE(bus->mdio_map))
++ return NULL;
++
++ mdiodev = bus->mdio_map[addr];
+
+ if (!mdiodev)
+ return NULL;
+@@ -577,7 +582,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
+ }
+
+ for (i = 0; i < PHY_MAX_ADDR; i++) {
+- if ((bus->phy_mask & (1 << i)) == 0) {
++ if ((bus->phy_mask & BIT(i)) == 0) {
+ struct phy_device *phydev;
+
+ phydev = mdiobus_scan(bus, i);
+@@ -591,7 +596,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
+ mdiobus_setup_mdiodev_from_board_info(bus, mdiobus_create_device);
+
+ bus->state = MDIOBUS_REGISTERED;
+- pr_info("%s: probed\n", bus->name);
++ dev_dbg(&bus->dev, "probed\n");
+ return 0;
+
+ error:
+@@ -1011,7 +1016,6 @@ int __init mdio_bus_init(void)
+
+ return ret;
+ }
+-EXPORT_SYMBOL_GPL(mdio_bus_init);
+
+ #if IS_ENABLED(CONFIG_PHYLIB)
+ void mdio_bus_exit(void)
+diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
+index 250742ffdfd91..044828d081d22 100644
+--- a/drivers/net/phy/mdio_device.c
++++ b/drivers/net/phy/mdio_device.c
+@@ -21,6 +21,7 @@
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/unistd.h>
++#include <linux/property.h>
+
+ void mdio_device_free(struct mdio_device *mdiodev)
+ {
+@@ -30,6 +31,7 @@ EXPORT_SYMBOL(mdio_device_free);
+
+ static void mdio_device_release(struct device *dev)
+ {
++ fwnode_handle_put(dev->fwnode);
+ kfree(to_mdio_device(dev));
+ }
+
+diff --git a/drivers/net/phy/mdio_devres.c b/drivers/net/phy/mdio_devres.c
+index b560e99695dfd..69b829e6ab35b 100644
+--- a/drivers/net/phy/mdio_devres.c
++++ b/drivers/net/phy/mdio_devres.c
+@@ -98,13 +98,14 @@ EXPORT_SYMBOL(__devm_mdiobus_register);
+
+ #if IS_ENABLED(CONFIG_OF_MDIO)
+ /**
+- * devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register()
++ * __devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register()
+ * @dev: Device to register mii_bus for
+ * @mdio: MII bus structure to register
+ * @np: Device node to parse
++ * @owner: Owning module
+ */
+-int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
+- struct device_node *np)
++int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
++ struct device_node *np, struct module *owner)
+ {
+ struct mdiobus_devres *dr;
+ int ret;
+@@ -117,7 +118,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
+ if (!dr)
+ return -ENOMEM;
+
+- ret = of_mdiobus_register(mdio, np);
++ ret = __of_mdiobus_register(mdio, np, owner);
+ if (ret) {
+ devres_free(dr);
+ return ret;
+@@ -127,7 +128,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
+ devres_add(dev, dr);
+ return 0;
+ }
+-EXPORT_SYMBOL(devm_of_mdiobus_register);
++EXPORT_SYMBOL(__devm_of_mdiobus_register);
+ #endif /* CONFIG_OF_MDIO */
+
+ MODULE_LICENSE("GPL");
+diff --git a/drivers/net/phy/mediatek-ge.c b/drivers/net/phy/mediatek-ge.c
+index b7a5ae20edd53..68ee434f9dea3 100644
+--- a/drivers/net/phy/mediatek-ge.c
++++ b/drivers/net/phy/mediatek-ge.c
+@@ -55,9 +55,6 @@ static int mt7530_phy_config_init(struct phy_device *phydev)
+
+ static int mt7531_phy_config_init(struct phy_device *phydev)
+ {
+- if (phydev->interface != PHY_INTERFACE_MODE_INTERNAL)
+- return -EINVAL;
+-
+ mtk_gephy_config_init(phydev);
+
+ /* PHY link down power saving enable */
+diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
+index 7e7904fee1d97..a6015cd03bff8 100644
+--- a/drivers/net/phy/meson-gxl.c
++++ b/drivers/net/phy/meson-gxl.c
+@@ -30,8 +30,12 @@
+ #define INTSRC_LINK_DOWN BIT(4)
+ #define INTSRC_REMOTE_FAULT BIT(5)
+ #define INTSRC_ANEG_COMPLETE BIT(6)
++#define INTSRC_ENERGY_DETECT BIT(7)
+ #define INTSRC_MASK 30
+
++#define INT_SOURCES (INTSRC_LINK_DOWN | INTSRC_ANEG_COMPLETE | \
++ INTSRC_ENERGY_DETECT)
++
+ #define BANK_ANALOG_DSP 0
+ #define BANK_WOL 1
+ #define BANK_BIST 3
+@@ -200,7 +204,6 @@ static int meson_gxl_ack_interrupt(struct phy_device *phydev)
+
+ static int meson_gxl_config_intr(struct phy_device *phydev)
+ {
+- u16 val;
+ int ret;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+@@ -209,16 +212,9 @@ static int meson_gxl_config_intr(struct phy_device *phydev)
+ if (ret)
+ return ret;
+
+- val = INTSRC_ANEG_PR
+- | INTSRC_PARALLEL_FAULT
+- | INTSRC_ANEG_LP_ACK
+- | INTSRC_LINK_DOWN
+- | INTSRC_REMOTE_FAULT
+- | INTSRC_ANEG_COMPLETE;
+- ret = phy_write(phydev, INTSRC_MASK, val);
++ ret = phy_write(phydev, INTSRC_MASK, INT_SOURCES);
+ } else {
+- val = 0;
+- ret = phy_write(phydev, INTSRC_MASK, val);
++ ret = phy_write(phydev, INTSRC_MASK, 0);
+
+ /* Ack any pending IRQ */
+ ret = meson_gxl_ack_interrupt(phydev);
+@@ -237,9 +233,16 @@ static irqreturn_t meson_gxl_handle_interrupt(struct phy_device *phydev)
+ return IRQ_NONE;
+ }
+
++ irq_status &= INT_SOURCES;
++
+ if (irq_status == 0)
+ return IRQ_NONE;
+
++ /* Aneg-complete interrupt is used for link-up detection */
++ if (phydev->autoneg == AUTONEG_ENABLE &&
++ irq_status == INTSRC_ENERGY_DETECT)
++ return IRQ_HANDLED;
++
+ phy_trigger_machine(phydev);
+
+ return IRQ_HANDLED;
+@@ -258,6 +261,8 @@ static struct phy_driver meson_gxl_phy[] = {
+ .handle_interrupt = meson_gxl_handle_interrupt,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
++ .read_mmd = genphy_read_mmd_unsupported,
++ .write_mmd = genphy_write_mmd_unsupported,
+ }, {
+ PHY_ID_MATCH_EXACT(0x01803301),
+ .name = "Meson G12A Internal PHY",
+@@ -268,6 +273,8 @@ static struct phy_driver meson_gxl_phy[] = {
+ .handle_interrupt = meson_gxl_handle_interrupt,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
++ .read_mmd = genphy_read_mmd_unsupported,
++ .write_mmd = genphy_write_mmd_unsupported,
+ },
+ };
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 5c928f827173c..05a8985d71073 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -335,7 +335,7 @@ static int kszphy_config_reset(struct phy_device *phydev)
+ }
+ }
+
+- if (priv->led_mode >= 0)
++ if (priv->type && priv->led_mode >= 0)
+ kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode);
+
+ return 0;
+@@ -351,10 +351,10 @@ static int kszphy_config_init(struct phy_device *phydev)
+
+ type = priv->type;
+
+- if (type->has_broadcast_disable)
++ if (type && type->has_broadcast_disable)
+ kszphy_broadcast_disable(phydev);
+
+- if (type->has_nand_tree_disable)
++ if (type && type->has_nand_tree_disable)
+ kszphy_nand_tree_disable(phydev);
+
+ return kszphy_config_reset(phydev);
+@@ -863,9 +863,9 @@ static int ksz9031_config_init(struct phy_device *phydev)
+ MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
+ tx_data_skews, 4, &update);
+
+- if (update && phydev->interface != PHY_INTERFACE_MODE_RGMII)
++ if (update && !phy_interface_is_rgmii(phydev))
+ phydev_warn(phydev,
+- "*-skew-ps values should be used only with phy-mode = \"rgmii\"\n");
++ "*-skew-ps values should be used only with RGMII PHY modes\n");
+
+ /* Silicon Errata Sheet (DS80000691D or DS80000692D):
+ * When the device links in the 1000BASE-T slave mode only,
+@@ -1328,7 +1328,7 @@ static int kszphy_probe(struct phy_device *phydev)
+
+ priv->type = type;
+
+- if (type->led_mode_reg) {
++ if (type && type->led_mode_reg) {
+ ret = of_property_read_u32(np, "micrel,led-mode",
+ &priv->led_mode);
+ if (ret)
+@@ -1349,7 +1349,8 @@ static int kszphy_probe(struct phy_device *phydev)
+ unsigned long rate = clk_get_rate(clk);
+ bool rmii_ref_clk_sel_25_mhz;
+
+- priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel;
++ if (type)
++ priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel;
+ rmii_ref_clk_sel_25_mhz = of_property_read_bool(np,
+ "micrel,rmii-reference-clock-select-25-mhz");
+
+@@ -1544,11 +1545,12 @@ static struct phy_driver ksphy_driver[] = {
+ .name = "Micrel KS8737",
+ /* PHY_BASIC_FEATURES */
+ .driver_data = &ks8737_type,
++ .probe = kszphy_probe,
+ .config_init = kszphy_config_init,
+ .config_intr = kszphy_config_intr,
+ .handle_interrupt = kszphy_handle_interrupt,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .suspend = kszphy_suspend,
++ .resume = kszphy_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ8021,
+ .phy_id_mask = 0x00ffffff,
+@@ -1562,8 +1564,8 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .suspend = kszphy_suspend,
++ .resume = kszphy_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ8031,
+ .phy_id_mask = 0x00ffffff,
+@@ -1577,8 +1579,8 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .suspend = kszphy_suspend,
++ .resume = kszphy_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ8041,
+ .phy_id_mask = MICREL_PHY_ID_MASK,
+@@ -1593,8 +1595,9 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ /* No suspend/resume callbacks because of errata DS80000700A,
++ * receiver error following software power down.
++ */
+ }, {
+ .phy_id = PHY_ID_KSZ8041RNLI,
+ .phy_id_mask = MICREL_PHY_ID_MASK,
+@@ -1608,8 +1611,8 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .suspend = kszphy_suspend,
++ .resume = kszphy_resume,
+ }, {
+ .name = "Micrel KSZ8051",
+ /* PHY_BASIC_FEATURES */
+@@ -1622,8 +1625,8 @@ static struct phy_driver ksphy_driver[] = {
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+ .match_phy_device = ksz8051_match_phy_device,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .suspend = kszphy_suspend,
++ .resume = kszphy_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ8001,
+ .name = "Micrel KSZ8001 or KS8721",
+@@ -1637,8 +1640,8 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .suspend = kszphy_suspend,
++ .resume = kszphy_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ8081,
+ .name = "Micrel KSZ8081 or KSZ8091",
+@@ -1684,8 +1687,8 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .suspend = kszphy_suspend,
++ .resume = kszphy_resume,
+ .read_mmd = genphy_read_mmd_unsupported,
+ .write_mmd = genphy_write_mmd_unsupported,
+ }, {
+@@ -1703,7 +1706,7 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
++ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
+ }, {
+ .phy_id = PHY_ID_LAN8814,
+@@ -1731,7 +1734,7 @@ static struct phy_driver ksphy_driver[] = {
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+- .suspend = genphy_suspend,
++ .suspend = kszphy_suspend,
+ .resume = kszphy_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ8873MLL,
+diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
+index 9f1f2b6c97d4f..230f2fcf9c46a 100644
+--- a/drivers/net/phy/microchip.c
++++ b/drivers/net/phy/microchip.c
+@@ -342,6 +342,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev)
+ return genphy_config_aneg(phydev);
+ }
+
++static void lan88xx_link_change_notify(struct phy_device *phydev)
++{
++ int temp;
++
++ /* At forced 100 F/H mode, chip may fail to set mode correctly
++ * when cable is switched between long(~50+m) and short one.
++ * As workaround, set to 10 before setting to 100
++ * at forced 100 F/H mode.
++ */
++ if (!phydev->autoneg && phydev->speed == 100) {
++ /* disable phy interrupt */
++ temp = phy_read(phydev, LAN88XX_INT_MASK);
++ temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
++ phy_write(phydev, LAN88XX_INT_MASK, temp);
++
++ temp = phy_read(phydev, MII_BMCR);
++ temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
++ phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
++ temp |= BMCR_SPEED100;
++ phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
++
++ /* clear pending interrupt generated while workaround */
++ temp = phy_read(phydev, LAN88XX_INT_STS);
++
++ /* enable phy interrupt back */
++ temp = phy_read(phydev, LAN88XX_INT_MASK);
++ temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
++ phy_write(phydev, LAN88XX_INT_MASK, temp);
++ }
++}
++
+ static struct phy_driver microchip_phy_driver[] = {
+ {
+ .phy_id = 0x0007c130,
+@@ -355,6 +386,7 @@ static struct phy_driver microchip_phy_driver[] = {
+
+ .config_init = lan88xx_config_init,
+ .config_aneg = lan88xx_config_aneg,
++ .link_change_notify = lan88xx_link_change_notify,
+
+ .config_intr = lan88xx_phy_config_intr,
+ .handle_interrupt = lan88xx_handle_interrupt,
+diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h
+index a50235fdf7d99..055e4ca5b3b5c 100644
+--- a/drivers/net/phy/mscc/mscc.h
++++ b/drivers/net/phy/mscc/mscc.h
+@@ -179,6 +179,7 @@ enum rgmii_clock_delay {
+ #define VSC8502_RGMII_CNTL 20
+ #define VSC8502_RGMII_RX_DELAY_MASK 0x0070
+ #define VSC8502_RGMII_TX_DELAY_MASK 0x0007
++#define VSC8502_RGMII_RX_CLK_DISABLE 0x0800
+
+ #define MSCC_PHY_WOL_LOWER_MAC_ADDR 21
+ #define MSCC_PHY_WOL_MID_MAC_ADDR 22
+diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c
+index b7b2521c73fb6..c00eef457b850 100644
+--- a/drivers/net/phy/mscc/mscc_macsec.c
++++ b/drivers/net/phy/mscc/mscc_macsec.c
+@@ -632,6 +632,7 @@ static void vsc8584_macsec_free_flow(struct vsc8531_private *priv,
+
+ list_del(&flow->list);
+ clear_bit(flow->index, bitmap);
++ memzero_explicit(flow->key, sizeof(flow->key));
+ kfree(flow);
+ }
+
+diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
+index 6e32da28e138f..cef43b1344a94 100644
+--- a/drivers/net/phy/mscc/mscc_main.c
++++ b/drivers/net/phy/mscc/mscc_main.c
+@@ -527,14 +527,27 @@ out_unlock:
+ * * 2.0 ns (which causes the data to be sampled at exactly half way between
+ * clock transitions at 1000 Mbps) if delays should be enabled
+ */
+-static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
+- u16 rgmii_rx_delay_mask,
+- u16 rgmii_tx_delay_mask)
++static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
++ u16 rgmii_rx_delay_mask,
++ u16 rgmii_tx_delay_mask)
+ {
+ u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1;
+ u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1;
+ u16 reg_val = 0;
+- int rc;
++ u16 mask = 0;
++ int rc = 0;
++
++ /* For traffic to pass, the VSC8502 family needs the RX_CLK disable bit
++ * to be unset for all PHY modes, so do that as part of the paged
++ * register modification.
++ * For some family members (like VSC8530/31/40/41) this bit is reserved
++ * and read-only, and the RX clock is enabled by default.
++ */
++ if (rgmii_cntl == VSC8502_RGMII_CNTL)
++ mask |= VSC8502_RGMII_RX_CLK_DISABLE;
++
++ if (phy_interface_is_rgmii(phydev))
++ mask |= rgmii_rx_delay_mask | rgmii_tx_delay_mask;
+
+ mutex_lock(&phydev->lock);
+
+@@ -545,10 +558,9 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+ reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos;
+
+- rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
+- rgmii_cntl,
+- rgmii_rx_delay_mask | rgmii_tx_delay_mask,
+- reg_val);
++ if (mask)
++ rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
++ rgmii_cntl, mask, reg_val);
+
+ mutex_unlock(&phydev->lock);
+
+@@ -557,19 +569,11 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
+
+ static int vsc85xx_default_config(struct phy_device *phydev)
+ {
+- int rc;
+-
+ phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+
+- if (phy_interface_mode_is_rgmii(phydev->interface)) {
+- rc = vsc85xx_rgmii_set_skews(phydev, VSC8502_RGMII_CNTL,
+- VSC8502_RGMII_RX_DELAY_MASK,
+- VSC8502_RGMII_TX_DELAY_MASK);
+- if (rc)
+- return rc;
+- }
+-
+- return 0;
++ return vsc85xx_update_rgmii_cntl(phydev, VSC8502_RGMII_CNTL,
++ VSC8502_RGMII_RX_DELAY_MASK,
++ VSC8502_RGMII_TX_DELAY_MASK);
+ }
+
+ static int vsc85xx_get_tunable(struct phy_device *phydev,
+@@ -1766,13 +1770,11 @@ static int vsc8584_config_init(struct phy_device *phydev)
+ if (ret)
+ return ret;
+
+- if (phy_interface_is_rgmii(phydev)) {
+- ret = vsc85xx_rgmii_set_skews(phydev, VSC8572_RGMII_CNTL,
+- VSC8572_RGMII_RX_DELAY_MASK,
+- VSC8572_RGMII_TX_DELAY_MASK);
+- if (ret)
+- return ret;
+- }
++ ret = vsc85xx_update_rgmii_cntl(phydev, VSC8572_RGMII_CNTL,
++ VSC8572_RGMII_RX_DELAY_MASK,
++ VSC8572_RGMII_TX_DELAY_MASK);
++ if (ret)
++ return ret;
+
+ ret = genphy_soft_reset(phydev);
+ if (ret)
+@@ -2664,6 +2666,7 @@ static struct phy_driver vsc85xx_driver[] = {
+ module_phy_driver(vsc85xx_driver);
+
+ static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
++ { PHY_ID_VSC8502, 0xfffffff0, },
+ { PHY_ID_VSC8504, 0xfffffff0, },
+ { PHY_ID_VSC8514, 0xfffffff0, },
+ { PHY_ID_VSC8530, 0xfffffff0, },
+@@ -2685,3 +2688,6 @@ MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
+ MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver");
+ MODULE_AUTHOR("Nagaraju Lakkaraju");
+ MODULE_LICENSE("Dual MIT/GPL");
++
++MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW);
++MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW);
+diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
+index 5ce1bf03bbd71..f9c70476d7e8c 100644
+--- a/drivers/net/phy/mxl-gpy.c
++++ b/drivers/net/phy/mxl-gpy.c
+@@ -96,6 +96,7 @@ static int gpy_config_init(struct phy_device *phydev)
+
+ static int gpy_probe(struct phy_device *phydev)
+ {
++ int fw_version;
+ int ret;
+
+ if (!phydev->is_c45) {
+@@ -105,12 +106,12 @@ static int gpy_probe(struct phy_device *phydev)
+ }
+
+ /* Show GPY PHY FW version in dmesg */
+- ret = phy_read(phydev, PHY_FWV);
+- if (ret < 0)
+- return ret;
++ fw_version = phy_read(phydev, PHY_FWV);
++ if (fw_version < 0)
++ return fw_version;
+
+- phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret,
+- (ret & PHY_FWV_REL_MASK) ? "release" : "test");
++ phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", fw_version,
++ (fw_version & PHY_FWV_REL_MASK) ? "release" : "test");
+
+ return 0;
+ }
+diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
+index 91a327f67a420..a3196c04caf68 100644
+--- a/drivers/net/phy/nxp-c45-tja11xx.c
++++ b/drivers/net/phy/nxp-c45-tja11xx.c
+@@ -79,7 +79,7 @@
+ #define SGMII_ABILITY BIT(0)
+
+ #define VEND1_MII_BASIC_CONFIG 0xAFC6
+-#define MII_BASIC_CONFIG_REV BIT(8)
++#define MII_BASIC_CONFIG_REV BIT(4)
+ #define MII_BASIC_CONFIG_SGMII 0x9
+ #define MII_BASIC_CONFIG_RGMII 0x7
+ #define MII_BASIC_CONFIG_RMII 0x5
+@@ -168,7 +168,7 @@
+ #define MAX_ID_PS 2260U
+ #define DEFAULT_ID_PS 2000U
+
+-#define PPM_TO_SUBNS_INC(ppb) div_u64(GENMASK(31, 0) * (ppb) * \
++#define PPM_TO_SUBNS_INC(ppb) div_u64(GENMASK_ULL(31, 0) * (ppb) * \
+ PTP_CLK_PERIOD_100BT1, NSEC_PER_SEC)
+
+ #define NXP_C45_SKB_CB(skb) ((struct nxp_c45_skb_cb *)(skb)->cb)
+@@ -1117,6 +1117,17 @@ no_ptp_support:
+ return ret;
+ }
+
++static void nxp_c45_remove(struct phy_device *phydev)
++{
++ struct nxp_c45_phy *priv = phydev->priv;
++
++ if (priv->ptp_clock)
++ ptp_clock_unregister(priv->ptp_clock);
++
++ skb_queue_purge(&priv->tx_queue);
++ skb_queue_purge(&priv->rx_queue);
++}
++
+ static struct phy_driver nxp_c45_driver[] = {
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_TJA_1103),
+@@ -1139,6 +1150,7 @@ static struct phy_driver nxp_c45_driver[] = {
+ .set_loopback = genphy_c45_loopback,
+ .get_sqi = nxp_c45_get_sqi,
+ .get_sqi_max = nxp_c45_get_sqi_max,
++ .remove = nxp_c45_remove,
+ },
+ };
+
+diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
+index 2870c33b8975d..271fc01f7f7fd 100644
+--- a/drivers/net/phy/phy-core.c
++++ b/drivers/net/phy/phy-core.c
+@@ -162,11 +162,11 @@ static const struct phy_setting settings[] = {
+ PHY_SETTING( 2500, FULL, 2500baseT_Full ),
+ PHY_SETTING( 2500, FULL, 2500baseX_Full ),
+ /* 1G */
+- PHY_SETTING( 1000, FULL, 1000baseKX_Full ),
+ PHY_SETTING( 1000, FULL, 1000baseT_Full ),
+ PHY_SETTING( 1000, HALF, 1000baseT_Half ),
+ PHY_SETTING( 1000, FULL, 1000baseT1_Full ),
+ PHY_SETTING( 1000, FULL, 1000baseX_Full ),
++ PHY_SETTING( 1000, FULL, 1000baseKX_Full ),
+ /* 100M */
+ PHY_SETTING( 100, FULL, 100baseT_Full ),
+ PHY_SETTING( 100, FULL, 100baseT1_Full ),
+diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
+index a3bfb156c83d7..1135e63a4a76e 100644
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -31,6 +31,7 @@
+ #include <linux/io.h>
+ #include <linux/uaccess.h>
+ #include <linux/atomic.h>
++#include <linux/suspend.h>
+ #include <net/netlink.h>
+ #include <net/genetlink.h>
+ #include <net/sock.h>
+@@ -56,6 +57,18 @@ static const char *phy_state_to_str(enum phy_state st)
+ return NULL;
+ }
+
++static void phy_process_state_change(struct phy_device *phydev,
++ enum phy_state old_state)
++{
++ if (old_state != phydev->state) {
++ phydev_dbg(phydev, "PHY state change %s -> %s\n",
++ phy_state_to_str(old_state),
++ phy_state_to_str(phydev->state));
++ if (phydev->drv && phydev->drv->link_change_notify)
++ phydev->drv->link_change_notify(phydev);
++ }
++}
++
+ static void phy_link_up(struct phy_device *phydev)
+ {
+ phydev->phy_link_change(phydev, true);
+@@ -815,7 +828,12 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
+ phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
+
+ /* Restart the PHY */
+- _phy_start_aneg(phydev);
++ if (phy_is_started(phydev)) {
++ phydev->state = PHY_UP;
++ phy_trigger_machine(phydev);
++ } else {
++ _phy_start_aneg(phydev);
++ }
+
+ mutex_unlock(&phydev->lock);
+ return 0;
+@@ -965,8 +983,35 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
+ {
+ struct phy_device *phydev = phy_dat;
+ struct phy_driver *drv = phydev->drv;
++ irqreturn_t ret;
++
++ /* Wakeup interrupts may occur during a system sleep transition.
++ * Postpone handling until the PHY has resumed.
++ */
++ if (IS_ENABLED(CONFIG_PM_SLEEP) && phydev->irq_suspended) {
++ struct net_device *netdev = phydev->attached_dev;
++
++ if (netdev) {
++ struct device *parent = netdev->dev.parent;
++
++ if (netdev->wol_enabled)
++ pm_system_wakeup();
++ else if (device_may_wakeup(&netdev->dev))
++ pm_wakeup_dev_event(&netdev->dev, 0, true);
++ else if (parent && device_may_wakeup(parent))
++ pm_wakeup_dev_event(parent, 0, true);
++ }
++
++ phydev->irq_rerun = 1;
++ disable_irq_nosync(irq);
++ return IRQ_HANDLED;
++ }
++
++ mutex_lock(&phydev->lock);
++ ret = drv->handle_interrupt(phydev);
++ mutex_unlock(&phydev->lock);
+
+- return drv->handle_interrupt(phydev);
++ return ret;
+ }
+
+ /**
+@@ -1028,6 +1073,7 @@ EXPORT_SYMBOL(phy_free_interrupt);
+ void phy_stop(struct phy_device *phydev)
+ {
+ struct net_device *dev = phydev->attached_dev;
++ enum phy_state old_state;
+
+ if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) {
+ WARN(1, "called from state %s\n",
+@@ -1036,6 +1082,7 @@ void phy_stop(struct phy_device *phydev)
+ }
+
+ mutex_lock(&phydev->lock);
++ old_state = phydev->state;
+
+ if (phydev->state == PHY_CABLETEST) {
+ phy_abort_cable_test(phydev);
+@@ -1046,6 +1093,7 @@ void phy_stop(struct phy_device *phydev)
+ sfp_upstream_stop(phydev->sfp_bus);
+
+ phydev->state = PHY_HALTED;
++ phy_process_state_change(phydev, old_state);
+
+ mutex_unlock(&phydev->lock);
+
+@@ -1163,13 +1211,7 @@ void phy_state_machine(struct work_struct *work)
+ if (err < 0)
+ phy_error(phydev);
+
+- if (old_state != phydev->state) {
+- phydev_dbg(phydev, "PHY state change %s -> %s\n",
+- phy_state_to_str(old_state),
+- phy_state_to_str(phydev->state));
+- if (phydev->drv && phydev->drv->link_change_notify)
+- phydev->drv->link_change_notify(phydev);
+- }
++ phy_process_state_change(phydev, old_state);
+
+ /* Only re-schedule a PHY state machine change if we are polling the
+ * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 4f9990b47a377..0429825a7179d 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -215,6 +215,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev)
+
+ static void phy_device_release(struct device *dev)
+ {
++ fwnode_handle_put(dev->fwnode);
+ kfree(to_phy_device(dev));
+ }
+
+@@ -277,6 +278,15 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
+ if (phydev->mac_managed_pm)
+ return 0;
+
++ /* Wakeup interrupts may occur during the system sleep transition when
++ * the PHY is inaccessible. Set flag to postpone handling until the PHY
++ * has resumed. Wait for concurrent interrupt handler to complete.
++ */
++ if (phy_interrupt_is_valid(phydev)) {
++ phydev->irq_suspended = 1;
++ synchronize_irq(phydev->irq);
++ }
++
+ /* We must stop the state machine manually, otherwise it stops out of
+ * control, possibly with the phydev->lock held. Upon resume, netdev
+ * may call phy routines that try to grab the same lock, and that may
+@@ -306,6 +316,14 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
+
+ phydev->suspended_by_mdio_bus = 0;
+
++ /* If we managed to get here with the PHY state machine in a state
++ * neither PHY_HALTED, PHY_READY nor PHY_UP, this is an indication
++ * that something went wrong and we should most likely be using
++ * MAC managed PM, but we are not.
++ */
++ WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY &&
++ phydev->state != PHY_UP);
++
+ ret = phy_init_hw(phydev);
+ if (ret < 0)
+ return ret;
+@@ -314,6 +332,20 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
+ if (ret < 0)
+ return ret;
+ no_resume:
++ if (phy_interrupt_is_valid(phydev)) {
++ phydev->irq_suspended = 0;
++ synchronize_irq(phydev->irq);
++
++ /* Rerun interrupts which were postponed by phy_interrupt()
++ * because they occurred during the system sleep transition.
++ */
++ if (phydev->irq_rerun) {
++ phydev->irq_rerun = 0;
++ enable_irq(phydev->irq);
++ irq_wake_thread(phydev->irq, phydev);
++ }
++ }
++
+ if (phydev->attached_dev && phydev->adjust_link)
+ phy_start_machine(phydev);
+
+@@ -1487,6 +1519,7 @@ error:
+
+ error_module_put:
+ module_put(d->driver->owner);
++ d->driver = NULL;
+ error_put_device:
+ put_device(d);
+ if (ndev_owner != bus->owner)
+@@ -1746,6 +1779,9 @@ void phy_detach(struct phy_device *phydev)
+ phy_driver_is_genphy_10g(phydev))
+ device_release_driver(&phydev->mdio.dev);
+
++ /* Assert the reset signal */
++ phy_device_reset(phydev, 1);
++
+ /*
+ * The phydev might go away on the put_device() below, so avoid
+ * a use-after-free bug by reading the underlying bus first.
+@@ -1757,9 +1793,6 @@ void phy_detach(struct phy_device *phydev)
+ ndev_owner = dev->dev.parent->driver->owner;
+ if (ndev_owner != bus->owner)
+ module_put(bus->owner);
+-
+- /* Assert the reset signal */
+- phy_device_reset(phydev, 1);
+ }
+ EXPORT_SYMBOL(phy_detach);
+
+@@ -3019,8 +3052,6 @@ static int phy_probe(struct device *dev)
+ if (phydrv->flags & PHY_IS_INTERNAL)
+ phydev->is_internal = true;
+
+- mutex_lock(&phydev->lock);
+-
+ /* Deassert the reset signal */
+ phy_device_reset(phydev, 0);
+
+@@ -3030,6 +3061,8 @@ static int phy_probe(struct device *dev)
+ goto out;
+ }
+
++ phy_disable_interrupts(phydev);
++
+ /* Start out supporting everything. Eventually,
+ * a controller will attach, and may modify one
+ * or both of these values
+@@ -3088,12 +3121,10 @@ static int phy_probe(struct device *dev)
+ phydev->state = PHY_READY;
+
+ out:
+- /* Assert the reset signal */
++ /* Re-assert the reset signal on error */
+ if (err)
+ phy_device_reset(phydev, 1);
+
+- mutex_unlock(&phydev->lock);
+-
+ return err;
+ }
+
+@@ -3103,9 +3134,7 @@ static int phy_remove(struct device *dev)
+
+ cancel_delayed_work_sync(&phydev->state_queue);
+
+- mutex_lock(&phydev->lock);
+ phydev->state = PHY_DOWN;
+- mutex_unlock(&phydev->lock);
+
+ sfp_bus_del_upstream(phydev->sfp_bus);
+ phydev->sfp_bus = NULL;
+@@ -3121,16 +3150,6 @@ static int phy_remove(struct device *dev)
+ return 0;
+ }
+
+-static void phy_shutdown(struct device *dev)
+-{
+- struct phy_device *phydev = to_phy_device(dev);
+-
+- if (phydev->state == PHY_READY || !phydev->attached_dev)
+- return;
+-
+- phy_disable_interrupts(phydev);
+-}
+-
+ /**
+ * phy_driver_register - register a phy_driver with the PHY layer
+ * @new_driver: new phy_driver to register
+@@ -3154,7 +3173,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
+ new_driver->mdiodrv.driver.bus = &mdio_bus_type;
+ new_driver->mdiodrv.driver.probe = phy_probe;
+ new_driver->mdiodrv.driver.remove = phy_remove;
+- new_driver->mdiodrv.driver.shutdown = phy_shutdown;
+ new_driver->mdiodrv.driver.owner = owner;
+ new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
+
+@@ -3226,23 +3244,30 @@ static int __init phy_init(void)
+ {
+ int rc;
+
++ ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
++
+ rc = mdio_bus_init();
+ if (rc)
+- return rc;
++ goto err_ethtool_phy_ops;
+
+- ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
+ features_init();
+
+ rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE);
+ if (rc)
+- goto err_c45;
++ goto err_mdio_bus;
+
+ rc = phy_driver_register(&genphy_driver, THIS_MODULE);
+- if (rc) {
+- phy_driver_unregister(&genphy_c45_driver);
++ if (rc)
++ goto err_c45;
++
++ return 0;
++
+ err_c45:
+- mdio_bus_exit();
+- }
++ phy_driver_unregister(&genphy_c45_driver);
++err_mdio_bus:
++ mdio_bus_exit();
++err_ethtool_phy_ops:
++ ethtool_set_ethtool_phy_ops(NULL);
+
+ return rc;
+ }
+diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
+index 0a0abe8e4be0b..422dc92ecac94 100644
+--- a/drivers/net/phy/phylink.c
++++ b/drivers/net/phy/phylink.c
+@@ -657,6 +657,7 @@ static void phylink_resolve(struct work_struct *w)
+ struct phylink_link_state link_state;
+ struct net_device *ndev = pl->netdev;
+ bool mac_config = false;
++ bool retrigger = false;
+ bool cur_link_state;
+
+ mutex_lock(&pl->state_mutex);
+@@ -670,6 +671,7 @@ static void phylink_resolve(struct work_struct *w)
+ link_state.link = false;
+ } else if (pl->mac_link_dropped) {
+ link_state.link = false;
++ retrigger = true;
+ } else {
+ switch (pl->cur_link_an_mode) {
+ case MLO_AN_PHY:
+@@ -686,6 +688,19 @@ static void phylink_resolve(struct work_struct *w)
+ case MLO_AN_INBAND:
+ phylink_mac_pcs_get_state(pl, &link_state);
+
++ /* The PCS may have a latching link-fail indicator.
++ * If the link was up, bring the link down and
++ * re-trigger the resolve. Otherwise, re-read the
++ * PCS state to get the current status of the link.
++ */
++ if (!link_state.link) {
++ if (cur_link_state)
++ retrigger = true;
++ else
++ phylink_mac_pcs_get_state(pl,
++ &link_state);
++ }
++
+ /* If we have a phy, the "up" state is the union of
+ * both the PHY and the MAC
+ */
+@@ -694,6 +709,15 @@ static void phylink_resolve(struct work_struct *w)
+
+ /* Only update if the PHY link is up */
+ if (pl->phydev && pl->phy_state.link) {
++ /* If the interface has changed, force a
++ * link down event if the link isn't already
++ * down, and re-resolve.
++ */
++ if (link_state.interface !=
++ pl->phy_state.interface) {
++ retrigger = true;
++ link_state.link = false;
++ }
+ link_state.interface = pl->phy_state.interface;
+
+ /* If we have a PHY, we need to update with
+@@ -736,7 +760,7 @@ static void phylink_resolve(struct work_struct *w)
+ else
+ phylink_link_up(pl, link_state);
+ }
+- if (!link_state.link && pl->mac_link_dropped) {
++ if (!link_state.link && retrigger) {
+ pl->mac_link_dropped = false;
+ queue_work(system_power_efficient_wq, &pl->resolve);
+ }
+@@ -1026,6 +1050,9 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
+ if (phy_interrupt_is_valid(phy))
+ phy_request_interrupt(phy);
+
++ if (pl->config->mac_managed_pm)
++ phy->mac_managed_pm = true;
++
+ return 0;
+ }
+
+@@ -1139,10 +1166,9 @@ int phylink_fwnode_phy_connect(struct phylink *pl,
+
+ ret = phy_attach_direct(pl->netdev, phy_dev, flags,
+ pl->link_interface);
+- if (ret) {
+- phy_device_free(phy_dev);
++ phy_device_free(phy_dev);
++ if (ret)
+ return ret;
+- }
+
+ ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface);
+ if (ret)
+@@ -1333,7 +1359,10 @@ void phylink_suspend(struct phylink *pl, bool mac_wol)
+ * but one would hope all packets have been sent. This
+ * also means phylink_resolve() will do nothing.
+ */
+- netif_carrier_off(pl->netdev);
++ if (pl->netdev)
++ netif_carrier_off(pl->netdev);
++ else
++ pl->old_link_state = false;
+
+ /* We do not call mac_link_down() here as we want the
+ * link to remain up to receive the WoL packets.
+@@ -1724,7 +1753,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
+ return -EOPNOTSUPP;
+
+ if (!phylink_test(pl->supported, Asym_Pause) &&
+- !pause->autoneg && pause->rx_pause != pause->tx_pause)
++ pause->rx_pause != pause->tx_pause)
+ return -EINVAL;
+
+ pause_state = 0;
+diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
+index 7362f8c3271c9..4369d6249e7bb 100644
+--- a/drivers/net/phy/sfp-bus.c
++++ b/drivers/net/phy/sfp-bus.c
+@@ -74,6 +74,12 @@ static const struct sfp_quirk sfp_quirks[] = {
+ .vendor = "HUAWEI",
+ .part = "MA5671A",
+ .modes = sfp_quirk_2500basex,
++ }, {
++ // Lantech 8330-262D-E can operate at 2500base-X, but
++ // incorrectly report 2500MBd NRZ in their EEPROM
++ .vendor = "Lantech",
++ .part = "8330-262D-E",
++ .modes = sfp_quirk_2500basex,
+ }, {
+ .vendor = "UBNT",
+ .part = "UF-INSTANT",
+@@ -651,6 +657,11 @@ struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode)
+ else if (ret < 0)
+ return ERR_PTR(ret);
+
++ if (!fwnode_device_is_available(ref.fwnode)) {
++ fwnode_handle_put(ref.fwnode);
++ return NULL;
++ }
++
+ bus = sfp_bus_get(ref.fwnode);
+ fwnode_handle_put(ref.fwnode);
+ if (!bus)
+diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
+index ab77a9f439ef9..d5918605eae6f 100644
+--- a/drivers/net/phy/sfp.c
++++ b/drivers/net/phy/sfp.c
+@@ -208,6 +208,12 @@ static const enum gpiod_flags gpio_flags[] = {
+ */
+ #define SFP_PHY_ADDR 22
+
++/* SFP_EEPROM_BLOCK_SIZE is the size of data chunk to read the EEPROM
++ * at a time. Some SFP modules and also some Linux I2C drivers do not like
++ * reads longer than 16 bytes.
++ */
++#define SFP_EEPROM_BLOCK_SIZE 16
++
+ struct sff_data {
+ unsigned int gpios;
+ bool (*module_supported)(const struct sfp_eeprom_id *id);
+@@ -250,6 +256,7 @@ struct sfp {
+ struct sfp_eeprom_id id;
+ unsigned int module_power_mW;
+ unsigned int module_t_start_up;
++ bool tx_fault_ignore;
+
+ #if IS_ENABLED(CONFIG_HWMON)
+ struct sfp_diag diag;
+@@ -1641,17 +1648,20 @@ static int sfp_sm_probe_for_phy(struct sfp *sfp)
+ static int sfp_module_parse_power(struct sfp *sfp)
+ {
+ u32 power_mW = 1000;
++ bool supports_a2;
+
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+ power_mW = 1500;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+ power_mW = 2000;
+
++ supports_a2 = sfp->id.ext.sff8472_compliance !=
++ SFP_SFF8472_COMPLIANCE_NONE ||
++ sfp->id.ext.diagmon & SFP_DIAGMON_DDM;
++
+ if (power_mW > sfp->max_power_mW) {
+ /* Module power specification exceeds the allowed maximum. */
+- if (sfp->id.ext.sff8472_compliance ==
+- SFP_SFF8472_COMPLIANCE_NONE &&
+- !(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) {
++ if (!supports_a2) {
+ /* The module appears not to implement bus address
+ * 0xa2, so assume that the module powers up in the
+ * indicated mode.
+@@ -1668,11 +1678,25 @@ static int sfp_module_parse_power(struct sfp *sfp)
+ }
+ }
+
++ if (power_mW <= 1000) {
++ /* Modules below 1W do not require a power change sequence */
++ sfp->module_power_mW = power_mW;
++ return 0;
++ }
++
++ if (!supports_a2) {
++ /* The module power level is below the host maximum and the
++ * module appears not to implement bus address 0xa2, so assume
++ * that the module powers up in the indicated mode.
++ */
++ return 0;
++ }
++
+ /* If the module requires a higher power mode, but also requires
+ * an address change sequence, warn the user that the module may
+ * not be functional.
+ */
+- if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE && power_mW > 1000) {
++ if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) {
+ dev_warn(sfp->dev,
+ "Address Change Sequence not supported but module requires %u.%uW, module may not be functional\n",
+ power_mW / 1000, (power_mW / 100) % 10);
+@@ -1788,11 +1812,7 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
+ u8 check;
+ int ret;
+
+- /* Some SFP modules and also some Linux I2C drivers do not like reads
+- * longer than 16 bytes, so read the EEPROM in chunks of 16 bytes at
+- * a time.
+- */
+- sfp->i2c_block_size = 16;
++ sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE;
+
+ ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base));
+ if (ret < 0) {
+@@ -1928,6 +1948,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
+ else
+ sfp->module_t_start_up = T_START_UP;
+
++ if (!memcmp(id.base.vendor_name, "HUAWEI ", 16) &&
++ !memcmp(id.base.vendor_pn, "MA5671A ", 16))
++ sfp->tx_fault_ignore = true;
++ else
++ sfp->tx_fault_ignore = false;
++
+ return 0;
+ }
+
+@@ -2380,7 +2406,10 @@ static void sfp_check_state(struct sfp *sfp)
+ mutex_lock(&sfp->st_mutex);
+ state = sfp_get_state(sfp);
+ changed = state ^ sfp->state;
+- changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
++ if (sfp->tx_fault_ignore)
++ changed &= SFP_F_PRESENT | SFP_F_LOS;
++ else
++ changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
+
+ for (i = 0; i < GPIO_MAX; i++)
+ if (changed & BIT(i))
+@@ -2435,6 +2464,7 @@ static struct sfp *sfp_alloc(struct device *dev)
+ return ERR_PTR(-ENOMEM);
+
+ sfp->dev = dev;
++ sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE;
+
+ mutex_init(&sfp->sm_mutex);
+ mutex_init(&sfp->st_mutex);
+@@ -2477,7 +2507,7 @@ static int sfp_probe(struct platform_device *pdev)
+
+ platform_set_drvdata(pdev, sfp);
+
+- err = devm_add_action(sfp->dev, sfp_cleanup, sfp);
++ err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp);
+ if (err < 0)
+ return err;
+
+diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
+index d8cac02a79b95..36dcf6c7f445d 100644
+--- a/drivers/net/phy/smsc.c
++++ b/drivers/net/phy/smsc.c
+@@ -57,8 +57,6 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
+
+ static int smsc_phy_config_intr(struct phy_device *phydev)
+ {
+- struct smsc_phy_priv *priv = phydev->priv;
+- u16 intmask = 0;
+ int rc;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+@@ -66,12 +64,10 @@ static int smsc_phy_config_intr(struct phy_device *phydev)
+ if (rc)
+ return rc;
+
+- intmask = MII_LAN83C185_ISF_INT4 | MII_LAN83C185_ISF_INT6;
+- if (priv->energy_enable)
+- intmask |= MII_LAN83C185_ISF_INT7;
+- rc = phy_write(phydev, MII_LAN83C185_IM, intmask);
++ rc = phy_write(phydev, MII_LAN83C185_IM,
++ MII_LAN83C185_ISF_INT_PHYLIB_EVENTS);
+ } else {
+- rc = phy_write(phydev, MII_LAN83C185_IM, intmask);
++ rc = phy_write(phydev, MII_LAN83C185_IM, 0);
+ if (rc)
+ return rc;
+
+@@ -83,13 +79,7 @@ static int smsc_phy_config_intr(struct phy_device *phydev)
+
+ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev)
+ {
+- int irq_status, irq_enabled;
+-
+- irq_enabled = phy_read(phydev, MII_LAN83C185_IM);
+- if (irq_enabled < 0) {
+- phy_error(phydev);
+- return IRQ_NONE;
+- }
++ int irq_status;
+
+ irq_status = phy_read(phydev, MII_LAN83C185_ISF);
+ if (irq_status < 0) {
+@@ -97,7 +87,7 @@ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev)
+ return IRQ_NONE;
+ }
+
+- if (!(irq_status & irq_enabled))
++ if (!(irq_status & MII_LAN83C185_ISF_INT_PHYLIB_EVENTS))
+ return IRQ_NONE;
+
+ phy_trigger_machine(phydev);
+@@ -110,7 +100,7 @@ static int smsc_phy_config_init(struct phy_device *phydev)
+ struct smsc_phy_priv *priv = phydev->priv;
+ int rc;
+
+- if (!priv->energy_enable)
++ if (!priv->energy_enable || phydev->irq != PHY_POLL)
+ return 0;
+
+ rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
+@@ -210,14 +200,19 @@ static int lan95xx_config_aneg_ext(struct phy_device *phydev)
+ * response on link pulses to detect presence of plugged Ethernet cable.
+ * The Energy Detect Power-Down mode is enabled again in the end of procedure to
+ * save approximately 220 mW of power if cable is unplugged.
++ * The workaround is only applicable to poll mode. Energy Detect Power-Down may
++ * not be used in interrupt mode lest link change detection becomes unreliable.
+ */
+ static int lan87xx_read_status(struct phy_device *phydev)
+ {
+ struct smsc_phy_priv *priv = phydev->priv;
++ int err;
+
+- int err = genphy_read_status(phydev);
++ err = genphy_read_status(phydev);
++ if (err)
++ return err;
+
+- if (!phydev->link && priv->energy_enable) {
++ if (!phydev->link && priv->energy_enable && phydev->irq == PHY_POLL) {
+ /* Disable EDPD to wake up PHY */
+ int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
+ if (rc < 0)
+diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
+index 8dcb49ed1f3d9..7fd9fe6a602bc 100644
+--- a/drivers/net/phy/xilinx_gmii2rgmii.c
++++ b/drivers/net/phy/xilinx_gmii2rgmii.c
+@@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+
+ if (!priv->phy_dev->drv) {
+ dev_info(dev, "Attached phy not ready\n");
++ put_device(&priv->phy_dev->mdio.dev);
+ return -EPROBE_DEFER;
+ }
+
+diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
+index 82d6094017113..b1776116f9f7d 100644
+--- a/drivers/net/plip/plip.c
++++ b/drivers/net/plip/plip.c
+@@ -446,12 +446,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl,
+ }
+ rcv->state = PLIP_PK_DONE;
+ if (rcv->skb) {
+- kfree_skb(rcv->skb);
++ dev_kfree_skb_irq(rcv->skb);
+ rcv->skb = NULL;
+ }
+ snd->state = PLIP_PK_DONE;
+ if (snd->skb) {
+- dev_kfree_skb(snd->skb);
++ dev_consume_skb_irq(snd->skb);
+ snd->skb = NULL;
+ }
+ spin_unlock_irq(&nl->lock);
+@@ -1107,7 +1107,7 @@ plip_open(struct net_device *dev)
+ /* Any address will do - we take the first. We already
+ have the first two bytes filled with 0xfc, from
+ plip_init_dev(). */
+- const struct in_ifaddr *ifa = rcu_dereference(in_dev->ifa_list);
++ const struct in_ifaddr *ifa = rtnl_dereference(in_dev->ifa_list);
+ if (ifa != NULL) {
+ memcpy(dev->dev_addr+2, &ifa->ifa_local, 4);
+ }
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index fb52cd175b45d..c1f11d1df4cd6 100644
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -69,6 +69,8 @@
+ #define MPHDRLEN 6 /* multilink protocol header length */
+ #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */
+
++#define PPP_PROTO_LEN 2
++
+ /*
+ * An instance of /dev/ppp can be associated with either a ppp
+ * interface unit or a ppp channel. In both cases, file->private_data
+@@ -497,6 +499,9 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
+
+ if (!pf)
+ return -ENXIO;
++ /* All PPP packets should start with the 2-byte protocol */
++ if (count < PPP_PROTO_LEN)
++ return -EINVAL;
+ ret = -ENOMEM;
+ skb = alloc_skb(count + pf->hdrlen, GFP_KERNEL);
+ if (!skb)
+@@ -1737,6 +1742,8 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
+ int len;
+ unsigned char *cp;
+
++ skb->dev = ppp->dev;
++
+ if (proto < 0x8000) {
+ #ifdef CONFIG_PPP_FILTER
+ /* check if we should pass this packet */
+@@ -1764,7 +1771,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
+ }
+
+ ++ppp->stats64.tx_packets;
+- ppp->stats64.tx_bytes += skb->len - 2;
++ ppp->stats64.tx_bytes += skb->len - PPP_PROTO_LEN;
+
+ switch (proto) {
+ case PPP_IP:
+diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
+index 3619520340b74..e172743948ed7 100644
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
+ path->encap.proto = htons(ETH_P_PPP_SES);
+ path->encap.id = be16_to_cpu(po->num);
+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
++ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
+ path->dev = ctx->dev;
+ ctx->dev = dev;
+
+diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
+index 0fe78826c8fa4..32183f24e63ff 100644
+--- a/drivers/net/ppp/pptp.c
++++ b/drivers/net/ppp/pptp.c
+@@ -24,6 +24,7 @@
+ #include <linux/in.h>
+ #include <linux/ip.h>
+ #include <linux/rcupdate.h>
++#include <linux/security.h>
+ #include <linux/spinlock.h>
+
+ #include <net/sock.h>
+@@ -128,6 +129,23 @@ static void del_chan(struct pppox_sock *sock)
+ spin_unlock(&chan_lock);
+ }
+
++static struct rtable *pptp_route_output(struct pppox_sock *po,
++ struct flowi4 *fl4)
++{
++ struct sock *sk = &po->sk;
++ struct net *net;
++
++ net = sock_net(sk);
++ flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 0,
++ RT_SCOPE_UNIVERSE, IPPROTO_GRE, 0,
++ po->proto.pptp.dst_addr.sin_addr.s_addr,
++ po->proto.pptp.src_addr.sin_addr.s_addr,
++ 0, 0, sock_net_uid(net, sk));
++ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
++
++ return ip_route_output_flow(net, fl4, sk);
++}
++
+ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+ {
+ struct sock *sk = (struct sock *) chan->private;
+@@ -151,11 +169,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+ if (sk_pppox(po)->sk_state & PPPOX_DEAD)
+ goto tx_error;
+
+- rt = ip_route_output_ports(net, &fl4, NULL,
+- opt->dst_addr.sin_addr.s_addr,
+- opt->src_addr.sin_addr.s_addr,
+- 0, 0, IPPROTO_GRE,
+- RT_TOS(0), sk->sk_bound_dev_if);
++ rt = pptp_route_output(po, &fl4);
+ if (IS_ERR(rt))
+ goto tx_error;
+
+@@ -438,12 +452,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
+ po->chan.private = sk;
+ po->chan.ops = &pptp_chan_ops;
+
+- rt = ip_route_output_ports(sock_net(sk), &fl4, sk,
+- opt->dst_addr.sin_addr.s_addr,
+- opt->src_addr.sin_addr.s_addr,
+- 0, 0,
+- IPPROTO_GRE, RT_CONN_FLAGS(sk),
+- sk->sk_bound_dev_if);
++ rt = pptp_route_output(po, &fl4);
+ if (IS_ERR(rt)) {
+ error = -EHOSTUNREACH;
+ goto end;
+diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
+index 5435b5689ce6b..2a3892528ec36 100644
+--- a/drivers/net/slip/slip.c
++++ b/drivers/net/slip/slip.c
+@@ -469,7 +469,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue)
+ spin_lock(&sl->lock);
+
+ if (netif_queue_stopped(dev)) {
+- if (!netif_running(dev))
++ if (!netif_running(dev) || !sl->tty)
+ goto out;
+
+ /* May be we must check transmitter timeout here ?
+diff --git a/drivers/net/slip/slip.h b/drivers/net/slip/slip.h
+index c420e59485221..3d7f88b330c1e 100644
+--- a/drivers/net/slip/slip.h
++++ b/drivers/net/slip/slip.h
+@@ -40,6 +40,8 @@
+ insmod -oslip_maxdev=nnn */
+ #define SL_MTU 296 /* 296; I am used to 600- FvK */
+
++/* some arch define END as assembly function ending, just undef it */
++#undef END
+ /* SLIP protocol characters. */
+ #define END 0300 /* indicates end of frame */
+ #define ESC 0333 /* indicates byte stuffing */
+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
+index 291fa449993fb..45f295403cb55 100644
+--- a/drivers/net/sungem_phy.c
++++ b/drivers/net/sungem_phy.c
+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)
+ int can_low_power = 1;
+ if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
+ can_low_power = 0;
++ of_node_put(np);
+ if (can_low_power) {
+ /* Enable automatic low-power */
+ sungem_phy_write(phy, 0x1c, 0x9002);
+diff --git a/drivers/net/tap.c b/drivers/net/tap.c
+index 8e3a28ba6b282..bdb05d246b86e 100644
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -523,7 +523,7 @@ static int tap_open(struct inode *inode, struct file *file)
+ q->sock.state = SS_CONNECTED;
+ q->sock.file = file;
+ q->sock.ops = &tap_socket_ops;
+- sock_init_data(&q->sock, &q->sk);
++ sock_init_data_uid(&q->sock, &q->sk, current_fsuid());
+ q->sk.sk_write_space = tap_sock_write_space;
+ q->sk.sk_destruct = tap_sock_destruct;
+ q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
+@@ -714,7 +714,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
+
+ /* Move network header to the right position for VLAN tagged packets */
+ if (eth_type_vlan(skb->protocol) &&
+- __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
++ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
+ skb_set_network_header(skb, depth);
+
+ rcu_read_lock();
+@@ -1163,7 +1163,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
+
+ /* Move network header to the right position for VLAN tagged packets */
+ if (eth_type_vlan(skb->protocol) &&
+- __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
++ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
+ skb_set_network_header(skb, depth);
+
+ rcu_read_lock();
+@@ -1198,7 +1198,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m,
+ struct xdp_buff *xdp;
+ int i;
+
+- if (ctl && (ctl->type == TUN_MSG_PTR)) {
++ if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
++ ctl && ctl->type == TUN_MSG_PTR) {
+ for (i = 0; i < ctl->num; i++) {
+ xdp = &((struct xdp_buff *)ctl->ptr)[i];
+ tap_get_user_xdp(q, xdp);
+diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
+index dd7917cab2b12..f99df92d211e2 100644
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -1270,10 +1270,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
+ }
+ }
+
+- netif_addr_lock_bh(dev);
+- dev_uc_sync_multiple(port_dev, dev);
+- dev_mc_sync_multiple(port_dev, dev);
+- netif_addr_unlock_bh(dev);
++ if (dev->flags & IFF_UP) {
++ netif_addr_lock_bh(dev);
++ dev_uc_sync_multiple(port_dev, dev);
++ dev_mc_sync_multiple(port_dev, dev);
++ netif_addr_unlock_bh(dev);
++ }
+
+ port->index = -1;
+ list_add_tail_rcu(&port->list, &team->port_list);
+@@ -1344,8 +1346,10 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
+ netdev_rx_handler_unregister(port_dev);
+ team_port_disable_netpoll(port);
+ vlan_vids_del_by_dev(port_dev, dev);
+- dev_uc_unsync(port_dev, dev);
+- dev_mc_unsync(port_dev, dev);
++ if (dev->flags & IFF_UP) {
++ dev_uc_unsync(port_dev, dev);
++ dev_mc_unsync(port_dev, dev);
++ }
+ dev_close(port_dev);
+ team_port_leave(team, port);
+
+@@ -1620,6 +1624,7 @@ static int team_init(struct net_device *dev)
+
+ team->dev = dev;
+ team_set_no_mode(team);
++ team->notifier_ctx = false;
+
+ team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
+ if (!team->pcpu_stats)
+@@ -1695,6 +1700,14 @@ static int team_open(struct net_device *dev)
+
+ static int team_close(struct net_device *dev)
+ {
++ struct team *team = netdev_priv(dev);
++ struct team_port *port;
++
++ list_for_each_entry(port, &team->port_list, list) {
++ dev_uc_unsync(port->dev, dev);
++ dev_mc_unsync(port->dev, dev);
++ }
++
+ return 0;
+ }
+
+@@ -2117,6 +2130,15 @@ static void team_setup_by_port(struct net_device *dev,
+ dev->mtu = port_dev->mtu;
+ memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len);
+ eth_hw_addr_inherit(dev, port_dev);
++
++ if (port_dev->flags & IFF_POINTOPOINT) {
++ dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
++ dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
++ } else if ((port_dev->flags & (IFF_BROADCAST | IFF_MULTICAST)) ==
++ (IFF_BROADCAST | IFF_MULTICAST)) {
++ dev->flags |= (IFF_BROADCAST | IFF_MULTICAST);
++ dev->flags &= ~(IFF_POINTOPOINT | IFF_NOARP);
++ }
+ }
+
+ static int team_dev_type_check_change(struct net_device *dev,
+@@ -2173,7 +2195,9 @@ static void team_setup(struct net_device *dev)
+
+ dev->hw_features = TEAM_VLAN_FEATURES |
+ NETIF_F_HW_VLAN_CTAG_RX |
+- NETIF_F_HW_VLAN_CTAG_FILTER;
++ NETIF_F_HW_VLAN_CTAG_FILTER |
++ NETIF_F_HW_VLAN_STAG_RX |
++ NETIF_F_HW_VLAN_STAG_FILTER;
+
+ dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+ dev->features |= dev->hw_features;
+@@ -3004,7 +3028,11 @@ static int team_device_event(struct notifier_block *unused,
+ team_del_slave(port->team->dev, dev);
+ break;
+ case NETDEV_FEAT_CHANGE:
+- team_compute_features(port->team);
++ if (!port->team->notifier_ctx) {
++ port->team->notifier_ctx = true;
++ team_compute_features(port->team);
++ port->team->notifier_ctx = false;
++ }
+ break;
+ case NETDEV_PRECHANGEMTU:
+ /* Forbid to change mtu of underlaying device */
+diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
+index 9a6a8353e1921..3395dcb0b262b 100644
+--- a/drivers/net/thunderbolt.c
++++ b/drivers/net/thunderbolt.c
+@@ -612,18 +612,13 @@ static void tbnet_connected_work(struct work_struct *work)
+ return;
+ }
+
+- /* Both logins successful so enable the high-speed DMA paths and
+- * start the network device queue.
++ /* Both logins successful so enable the rings, high-speed DMA
++ * paths and start the network device queue.
++ *
++ * Note we enable the DMA paths last to make sure we have primed
++ * the Rx ring before any incoming packets are allowed to
++ * arrive.
+ */
+- ret = tb_xdomain_enable_paths(net->xd, net->local_transmit_path,
+- net->rx_ring.ring->hop,
+- net->remote_transmit_path,
+- net->tx_ring.ring->hop);
+- if (ret) {
+- netdev_err(net->dev, "failed to enable DMA paths\n");
+- return;
+- }
+-
+ tb_ring_start(net->tx_ring.ring);
+ tb_ring_start(net->rx_ring.ring);
+
+@@ -635,10 +630,21 @@ static void tbnet_connected_work(struct work_struct *work)
+ if (ret)
+ goto err_free_rx_buffers;
+
++ ret = tb_xdomain_enable_paths(net->xd, net->local_transmit_path,
++ net->rx_ring.ring->hop,
++ net->remote_transmit_path,
++ net->tx_ring.ring->hop);
++ if (ret) {
++ netdev_err(net->dev, "failed to enable DMA paths\n");
++ goto err_free_tx_buffers;
++ }
++
+ netif_carrier_on(net->dev);
+ netif_start_queue(net->dev);
+ return;
+
++err_free_tx_buffers:
++ tbnet_free_buffers(&net->tx_ring);
+ err_free_rx_buffers:
+ tbnet_free_buffers(&net->rx_ring);
+ err_stop_rings:
+@@ -896,6 +902,7 @@ static int tbnet_open(struct net_device *dev)
+ tbnet_start_poll, net);
+ if (!ring) {
+ netdev_err(dev, "failed to allocate Rx ring\n");
++ tb_xdomain_release_out_hopid(xd, hopid);
+ tb_ring_free(net->tx_ring.ring);
+ net->tx_ring.ring = NULL;
+ return -ENOMEM;
+@@ -1371,12 +1378,21 @@ static int __init tbnet_init(void)
+ TBNET_MATCH_FRAGS_ID | TBNET_64K_FRAMES);
+
+ ret = tb_register_property_dir("network", tbnet_dir);
+- if (ret) {
+- tb_property_free_dir(tbnet_dir);
+- return ret;
+- }
++ if (ret)
++ goto err_free_dir;
+
+- return tb_register_service_driver(&tbnet_driver);
++ ret = tb_register_service_driver(&tbnet_driver);
++ if (ret)
++ goto err_unregister;
++
++ return 0;
++
++err_unregister:
++ tb_unregister_property_dir("network", tbnet_dir);
++err_free_dir:
++ tb_property_free_dir(tbnet_dir);
++
++ return ret;
+ }
+ module_init(tbnet_init);
+
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index fecc9a1d293ae..e685c84ebe3a3 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -209,6 +209,9 @@ struct tun_struct {
+ struct tun_prog __rcu *steering_prog;
+ struct tun_prog __rcu *filter_prog;
+ struct ethtool_link_ksettings link_ksettings;
++ /* init args */
++ struct file *file;
++ struct ifreq *ifr;
+ };
+
+ struct veth {
+@@ -216,6 +219,9 @@ struct veth {
+ __be16 h_vlan_TCI;
+ };
+
++static void tun_flow_init(struct tun_struct *tun);
++static void tun_flow_uninit(struct tun_struct *tun);
++
+ static int tun_napi_receive(struct napi_struct *napi, int budget)
+ {
+ struct tun_file *tfile = container_of(napi, struct tun_file, napi);
+@@ -268,6 +274,12 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
+ }
+ }
+
++static void tun_napi_enable(struct tun_file *tfile)
++{
++ if (tfile->napi_enabled)
++ napi_enable(&tfile->napi);
++}
++
+ static void tun_napi_disable(struct tun_file *tfile)
+ {
+ if (tfile->napi_enabled)
+@@ -629,7 +641,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
+ tun = rtnl_dereference(tfile->tun);
+
+ if (tun && clean) {
+- tun_napi_disable(tfile);
++ if (!tfile->detached)
++ tun_napi_disable(tfile);
+ tun_napi_del(tfile);
+ }
+
+@@ -648,8 +661,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
+ if (clean) {
+ RCU_INIT_POINTER(tfile->tun, NULL);
+ sock_put(&tfile->sk);
+- } else
++ } else {
+ tun_disable_queue(tun, tfile);
++ tun_napi_disable(tfile);
++ }
+
+ synchronize_net();
+ tun_flow_delete_by_queue(tun, tun->numqueues + 1);
+@@ -672,7 +687,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
+ if (tun)
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+- sock_put(&tfile->sk);
+ }
+ }
+
+@@ -688,6 +702,9 @@ static void tun_detach(struct tun_file *tfile, bool clean)
+ if (dev)
+ netdev_state_change(dev);
+ rtnl_unlock();
++
++ if (clean)
++ sock_put(&tfile->sk);
+ }
+
+ static void tun_detach_all(struct net_device *dev)
+@@ -722,6 +739,7 @@ static void tun_detach_all(struct net_device *dev)
+ sock_put(&tfile->sk);
+ }
+ list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
++ tun_napi_del(tfile);
+ tun_enable_queue(tfile);
+ tun_queue_purge(tfile);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+@@ -802,6 +820,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
+
+ if (tfile->detached) {
+ tun_enable_queue(tfile);
++ tun_napi_enable(tfile);
+ } else {
+ sock_hold(&tfile->sk);
+ tun_napi_init(tun, tfile, napi, napi_frags);
+@@ -953,6 +972,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
+
+ static const struct ethtool_ops tun_ethtool_ops;
+
++static int tun_net_init(struct net_device *dev)
++{
++ struct tun_struct *tun = netdev_priv(dev);
++ struct ifreq *ifr = tun->ifr;
++ int err;
++
++ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
++ if (!dev->tstats)
++ return -ENOMEM;
++
++ spin_lock_init(&tun->lock);
++
++ err = security_tun_dev_alloc_security(&tun->security);
++ if (err < 0) {
++ free_percpu(dev->tstats);
++ return err;
++ }
++
++ tun_flow_init(tun);
++
++ dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
++ TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
++ NETIF_F_HW_VLAN_STAG_TX;
++ dev->features = dev->hw_features | NETIF_F_LLTX;
++ dev->vlan_features = dev->features &
++ ~(NETIF_F_HW_VLAN_CTAG_TX |
++ NETIF_F_HW_VLAN_STAG_TX);
++
++ tun->flags = (tun->flags & ~TUN_FEATURES) |
++ (ifr->ifr_flags & TUN_FEATURES);
++
++ INIT_LIST_HEAD(&tun->disabled);
++ err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI,
++ ifr->ifr_flags & IFF_NAPI_FRAGS, false);
++ if (err < 0) {
++ tun_flow_uninit(tun);
++ security_tun_dev_free_security(tun->security);
++ free_percpu(dev->tstats);
++ return err;
++ }
++ return 0;
++}
++
+ /* Net device detach from fd. */
+ static void tun_net_uninit(struct net_device *dev)
+ {
+@@ -1010,6 +1072,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct tun_struct *tun = netdev_priv(dev);
+ int txq = skb->queue_mapping;
++ struct netdev_queue *queue;
+ struct tun_file *tfile;
+ int len = skb->len;
+
+@@ -1036,7 +1099,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ goto drop;
+
+ len = run_ebpf_filter(tun, skb, len);
+- if (len == 0 || pskb_trim(skb, len))
++ if (len == 0)
++ goto drop;
++
++ if (pskb_trim(skb, len))
+ goto drop;
+
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+@@ -1054,6 +1120,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ if (ptr_ring_produce(&tfile->tx_ring, skb))
+ goto drop;
+
++ /* NETIF_F_LLTX requires to do our own update of trans_start */
++ queue = netdev_get_tx_queue(dev, txq);
++ queue->trans_start = jiffies;
++
+ /* Notify and wake up reader process */
+ if (tfile->flags & TUN_FASYNC)
+ kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
+@@ -1164,6 +1234,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier)
+ }
+
+ static const struct net_device_ops tun_netdev_ops = {
++ .ndo_init = tun_net_init,
+ .ndo_uninit = tun_net_uninit,
+ .ndo_open = tun_net_open,
+ .ndo_stop = tun_net_close,
+@@ -1247,6 +1318,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
+ }
+
+ static const struct net_device_ops tap_netdev_ops = {
++ .ndo_init = tun_net_init,
+ .ndo_uninit = tun_net_uninit,
+ .ndo_open = tun_net_open,
+ .ndo_stop = tun_net_close,
+@@ -1287,7 +1359,7 @@ static void tun_flow_uninit(struct tun_struct *tun)
+ #define MAX_MTU 65535
+
+ /* Initialize net device. */
+-static void tun_net_init(struct net_device *dev)
++static void tun_net_initialize(struct net_device *dev)
+ {
+ struct tun_struct *tun = netdev_priv(dev);
+
+@@ -1375,7 +1447,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
+ int err;
+ int i;
+
+- if (it->nr_segs > MAX_SKB_FRAGS + 1)
++ if (it->nr_segs > MAX_SKB_FRAGS + 1 ||
++ len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN))
+ return ERR_PTR(-EMSGSIZE);
+
+ local_bh_disable();
+@@ -1501,7 +1574,7 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
+ if (zerocopy)
+ return false;
+
+- if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
++ if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
+ return false;
+
+@@ -1878,17 +1951,25 @@ drop:
+ skb_headlen(skb));
+
+ if (unlikely(headlen > skb_headlen(skb))) {
++ WARN_ON_ONCE(1);
++ err = -ENOMEM;
+ atomic_long_inc(&tun->dev->rx_dropped);
++napi_busy:
+ napi_free_frags(&tfile->napi);
+ rcu_read_unlock();
+ mutex_unlock(&tfile->napi_mutex);
+- WARN_ON(1);
+- return -ENOMEM;
++ return err;
+ }
+
+- local_bh_disable();
+- napi_gro_frags(&tfile->napi);
+- local_bh_enable();
++ if (likely(napi_schedule_prep(&tfile->napi))) {
++ local_bh_disable();
++ napi_gro_frags(&tfile->napi);
++ napi_complete(&tfile->napi);
++ local_bh_enable();
++ } else {
++ err = -EBUSY;
++ goto napi_busy;
++ }
+ mutex_unlock(&tfile->napi_mutex);
+ } else if (tfile->napi_enabled) {
+ struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+@@ -2201,11 +2282,6 @@ static void tun_free_netdev(struct net_device *dev)
+ BUG_ON(!(list_empty(&tun->disabled)));
+
+ free_percpu(dev->tstats);
+- /* We clear tstats so that tun_set_iff() can tell if
+- * tun_free_netdev() has been called from register_netdevice().
+- */
+- dev->tstats = NULL;
+-
+ tun_flow_uninit(tun);
+ security_tun_dev_free_security(tun->security);
+ __tun_set_ebpf(tun, &tun->steering_prog, NULL);
+@@ -2438,7 +2514,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+ if (!tun)
+ return -EBADFD;
+
+- if (ctl && (ctl->type == TUN_MSG_PTR)) {
++ if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
++ ctl && ctl->type == TUN_MSG_PTR) {
+ struct tun_page tpage;
+ int n = ctl->num;
+ int flush = 0;
+@@ -2711,41 +2788,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
+ tun->rx_batched = 0;
+ RCU_INIT_POINTER(tun->steering_prog, NULL);
+
+- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+- if (!dev->tstats) {
+- err = -ENOMEM;
+- goto err_free_dev;
+- }
+-
+- spin_lock_init(&tun->lock);
+-
+- err = security_tun_dev_alloc_security(&tun->security);
+- if (err < 0)
+- goto err_free_stat;
+-
+- tun_net_init(dev);
+- tun_flow_init(tun);
+-
+- dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
+- TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
+- NETIF_F_HW_VLAN_STAG_TX;
+- dev->features = dev->hw_features | NETIF_F_LLTX;
+- dev->vlan_features = dev->features &
+- ~(NETIF_F_HW_VLAN_CTAG_TX |
+- NETIF_F_HW_VLAN_STAG_TX);
++ tun->ifr = ifr;
++ tun->file = file;
+
+- tun->flags = (tun->flags & ~TUN_FEATURES) |
+- (ifr->ifr_flags & TUN_FEATURES);
+-
+- INIT_LIST_HEAD(&tun->disabled);
+- err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
+- ifr->ifr_flags & IFF_NAPI_FRAGS, false);
+- if (err < 0)
+- goto err_free_flow;
++ tun_net_initialize(dev);
+
+ err = register_netdevice(tun->dev);
+- if (err < 0)
+- goto err_detach;
++ if (err < 0) {
++ free_netdev(dev);
++ return err;
++ }
+ /* free_netdev() won't check refcnt, to avoid race
+ * with dev_put() we need publish tun after registration.
+ */
+@@ -2762,24 +2814,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
+
+ strcpy(ifr->ifr_name, tun->dev->name);
+ return 0;
+-
+-err_detach:
+- tun_detach_all(dev);
+- /* We are here because register_netdevice() has failed.
+- * If register_netdevice() already called tun_free_netdev()
+- * while dealing with the error, dev->stats has been cleared.
+- */
+- if (!dev->tstats)
+- goto err_free_dev;
+-
+-err_free_flow:
+- tun_flow_uninit(tun);
+- security_tun_dev_free_security(tun->security);
+-err_free_stat:
+- free_percpu(dev->tstats);
+-err_free_dev:
+- free_netdev(dev);
+- return err;
+ }
+
+ static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr)
+@@ -3377,7 +3411,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
+ tfile->socket.file = file;
+ tfile->socket.ops = &tun_socket_ops;
+
+- sock_init_data(&tfile->socket, &tfile->sk);
++ sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());
+
+ tfile->sk.sk_write_space = tun_sock_write_space;
+ tfile->sk.sk_sndbuf = INT_MAX;
+diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
+index b554054a7560a..8939e5fbd50a8 100644
+--- a/drivers/net/usb/Kconfig
++++ b/drivers/net/usb/Kconfig
+@@ -636,8 +636,9 @@ config USB_NET_AQC111
+ * Aquantia AQtion USB to 5GbE
+
+ config USB_RTL8153_ECM
+- tristate "RTL8153 ECM support"
++ tristate
+ depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n)
++ default y
+ help
+ This option supports ECM mode for RTL8153 ethernet adapter, when
+ CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not
+diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
+index 73b97f4cc1ec1..e8d49886d6953 100644
+--- a/drivers/net/usb/aqc111.c
++++ b/drivers/net/usb/aqc111.c
+@@ -1102,10 +1102,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ if (start_of_descs != desc_offset)
+ goto err;
+
+- /* self check desc_offset from header*/
+- if (desc_offset >= skb_len)
++ /* self check desc_offset from header and make sure that the
++ * bounds of the metadata array are inside the SKB
++ */
++ if (pkt_count * 2 + desc_offset >= skb_len)
+ goto err;
+
++ /* Packets must not overlap the metadata array */
++ skb_trim(skb, desc_offset);
++
+ if (pkt_count == 0)
+ goto err;
+
+diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
+index 2a1e31defe718..c126df1c13ee7 100644
+--- a/drivers/net/usb/asix.h
++++ b/drivers/net/usb/asix.h
+@@ -126,8 +126,7 @@
+ AX_MEDIUM_RE)
+
+ #define AX88772_MEDIUM_DEFAULT \
+- (AX_MEDIUM_FD | AX_MEDIUM_RFC | \
+- AX_MEDIUM_TFC | AX_MEDIUM_PS | \
++ (AX_MEDIUM_FD | AX_MEDIUM_PS | \
+ AX_MEDIUM_AC | AX_MEDIUM_RE)
+
+ /* AX88772 & AX88178 RX_CTL values */
+@@ -192,8 +191,8 @@ extern const struct driver_info ax88172a_info;
+ /* ASIX specific flags */
+ #define FLAG_EEPROM_MAC (1UL << 0) /* init device MAC from eeprom */
+
+-int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+- u16 size, void *data, int in_pm);
++int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
++ u16 size, void *data, int in_pm);
+
+ int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+ u16 size, void *data, int in_pm);
+diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
+index 38cda590895cc..00c23f1d1c946 100644
+--- a/drivers/net/usb/asix_common.c
++++ b/drivers/net/usb/asix_common.c
+@@ -9,8 +9,10 @@
+
+ #include "asix.h"
+
+-int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+- u16 size, void *data, int in_pm)
++#define AX_HOST_EN_RETRIES 30
++
++int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
++ u16 size, void *data, int in_pm)
+ {
+ int ret;
+ int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
+@@ -25,9 +27,12 @@ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
+ ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+ value, index, data, size);
+
+- if (unlikely(ret < 0))
++ if (unlikely(ret < size)) {
++ ret = ret < 0 ? ret : -ENODATA;
++
+ netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n",
+ index, ret);
++ }
+
+ return ret;
+ }
+@@ -68,7 +73,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm)
+ int i, ret;
+ u8 smsr;
+
+- for (i = 0; i < 30; ++i) {
++ for (i = 0; i < AX_HOST_EN_RETRIES; ++i) {
+ ret = asix_set_sw_mii(dev, in_pm);
+ if (ret == -ENODEV || ret == -ETIMEDOUT)
+ break;
+@@ -83,7 +88,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm)
+ break;
+ }
+
+- return ret;
++ return i >= AX_HOST_EN_RETRIES ? -ETIMEDOUT : ret;
+ }
+
+ static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx)
+@@ -426,6 +431,7 @@ void asix_adjust_link(struct net_device *netdev)
+
+ asix_write_medium_mode(dev, mode, 0);
+ phy_print_status(phydev);
++ usbnet_link_change(dev, phydev->link, 0);
+ }
+
+ int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm)
+@@ -577,8 +583,12 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
+ return ret;
+ }
+
+- asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
+- (__u16)loc, 2, &res, 1);
++ ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
++ (__u16)loc, 2, &res, 1);
++ if (ret < 0) {
++ mutex_unlock(&dev->phy_mutex);
++ return ret;
++ }
+ asix_set_hw_mii(dev, 1);
+ mutex_unlock(&dev->phy_mutex);
+
+diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
+index 30821f6a6d7ac..396505396a2e4 100644
+--- a/drivers/net/usb/asix_devices.c
++++ b/drivers/net/usb/asix_devices.c
+@@ -755,7 +755,12 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
+ priv->phy_addr = ret;
+ priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10);
+
+- asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
++ ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
++ if (ret < 0) {
++ netdev_dbg(dev->net, "Failed to read STATMNGSTS_REG: %d\n", ret);
++ return ret;
++ }
++
+ chipcode &= AX_CHIPCODE_MASK;
+
+ ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) :
+@@ -794,11 +799,7 @@ static int ax88772_stop(struct usbnet *dev)
+ {
+ struct asix_common_private *priv = dev->driver_priv;
+
+- /* On unplugged USB, we will get MDIO communication errors and the
+- * PHY will be set in to PHY_HALTED state.
+- */
+- if (priv->phydev->state != PHY_HALTED)
+- phy_stop(priv->phydev);
++ phy_stop(priv->phydev);
+
+ return 0;
+ }
+@@ -920,11 +921,21 @@ static int ax88178_reset(struct usbnet *dev)
+ int gpio0 = 0;
+ u32 phyid;
+
+- asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
++ ret = asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
++ if (ret < 0) {
++ netdev_dbg(dev->net, "Failed to read GPIOS: %d\n", ret);
++ return ret;
++ }
++
+ netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status);
+
+ asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0);
+- asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
++ ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
++ if (ret < 0) {
++ netdev_dbg(dev->net, "Failed to read EEPROM: %d\n", ret);
++ return ret;
++ }
++
+ asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0);
+
+ netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom);
+diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
+index f25448a088707..0a2c3860179e7 100644
+--- a/drivers/net/usb/ax88179_178a.c
++++ b/drivers/net/usb/ax88179_178a.c
+@@ -1467,58 +1467,119 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ u16 hdr_off;
+ u32 *pkt_hdr;
+
+- /* This check is no longer done by usbnet */
+- if (skb->len < dev->net->hard_header_len)
++ /* At the end of the SKB, there's a header telling us how many packets
++ * are bundled into this buffer and where we can find an array of
++ * per-packet metadata (which contains elements encoded into u16).
++ */
++
++ /* SKB contents for current firmware:
++ * <packet 1> <padding>
++ * ...
++ * <packet N> <padding>
++ * <per-packet metadata entry 1> <dummy header>
++ * ...
++ * <per-packet metadata entry N> <dummy header>
++ * <padding2> <rx_hdr>
++ *
++ * where:
++ * <packet N> contains pkt_len bytes:
++ * 2 bytes of IP alignment pseudo header
++ * packet received
++ * <per-packet metadata entry N> contains 4 bytes:
++ * pkt_len and fields AX_RXHDR_*
++ * <padding> 0-7 bytes to terminate at
++ * 8 bytes boundary (64-bit).
++ * <padding2> 4 bytes to make rx_hdr terminate at
++ * 8 bytes boundary (64-bit)
++ * <dummy-header> contains 4 bytes:
++ * pkt_len=0 and AX_RXHDR_DROP_ERR
++ * <rx-hdr> contains 4 bytes:
++ * pkt_cnt and hdr_off (offset of
++ * <per-packet metadata entry 1>)
++ *
++ * pkt_cnt is number of entrys in the per-packet metadata.
++ * In current firmware there is 2 entrys per packet.
++ * The first points to the packet and the
++ * second is a dummy header.
++ * This was done probably to align fields in 64-bit and
++ * maintain compatibility with old firmware.
++ * This code assumes that <dummy header> and <padding2> are
++ * optional.
++ */
++
++ if (skb->len < 4)
+ return 0;
+-
+ skb_trim(skb, skb->len - 4);
+ rx_hdr = get_unaligned_le32(skb_tail_pointer(skb));
+-
+ pkt_cnt = (u16)rx_hdr;
+ hdr_off = (u16)(rx_hdr >> 16);
++
++ if (pkt_cnt == 0)
++ return 0;
++
++ /* Make sure that the bounds of the metadata array are inside the SKB
++ * (and in front of the counter at the end).
++ */
++ if (pkt_cnt * 4 + hdr_off > skb->len)
++ return 0;
+ pkt_hdr = (u32 *)(skb->data + hdr_off);
+
+- while (pkt_cnt--) {
++ /* Packets must not overlap the metadata array */
++ skb_trim(skb, hdr_off);
++
++ for (; pkt_cnt > 0; pkt_cnt--, pkt_hdr++) {
++ u16 pkt_len_plus_padd;
+ u16 pkt_len;
+
+ le32_to_cpus(pkt_hdr);
+ pkt_len = (*pkt_hdr >> 16) & 0x1fff;
++ pkt_len_plus_padd = (pkt_len + 7) & 0xfff8;
++
++ /* Skip dummy header used for alignment
++ */
++ if (pkt_len == 0)
++ continue;
++
++ if (pkt_len_plus_padd > skb->len)
++ return 0;
+
+ /* Check CRC or runt packet */
+- if ((*pkt_hdr & AX_RXHDR_CRC_ERR) ||
+- (*pkt_hdr & AX_RXHDR_DROP_ERR)) {
+- skb_pull(skb, (pkt_len + 7) & 0xFFF8);
+- pkt_hdr++;
++ if ((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) ||
++ pkt_len < 2 + ETH_HLEN) {
++ dev->net->stats.rx_errors++;
++ skb_pull(skb, pkt_len_plus_padd);
+ continue;
+ }
+
+- if (pkt_cnt == 0) {
+- skb->len = pkt_len;
++ /* last packet */
++ if (pkt_len_plus_padd == skb->len) {
++ skb_trim(skb, pkt_len);
++
+ /* Skip IP alignment pseudo header */
+ skb_pull(skb, 2);
+- skb_set_tail_pointer(skb, skb->len);
+- skb->truesize = pkt_len + sizeof(struct sk_buff);
++
++ skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd);
+ ax88179_rx_checksum(skb, pkt_hdr);
+ return 1;
+ }
+
+ ax_skb = skb_clone(skb, GFP_ATOMIC);
+- if (ax_skb) {
+- ax_skb->len = pkt_len;
+- /* Skip IP alignment pseudo header */
+- skb_pull(ax_skb, 2);
+- skb_set_tail_pointer(ax_skb, ax_skb->len);
+- ax_skb->truesize = pkt_len + sizeof(struct sk_buff);
+- ax88179_rx_checksum(ax_skb, pkt_hdr);
+- usbnet_skb_return(dev, ax_skb);
+- } else {
++ if (!ax_skb)
+ return 0;
+- }
++ skb_trim(ax_skb, pkt_len);
++
++ /* Skip IP alignment pseudo header */
++ skb_pull(ax_skb, 2);
++
++ skb->truesize = pkt_len_plus_padd +
++ SKB_DATA_ALIGN(sizeof(struct sk_buff));
++ ax88179_rx_checksum(ax_skb, pkt_hdr);
++ usbnet_skb_return(dev, ax_skb);
+
+- skb_pull(skb, (pkt_len + 7) & 0xFFF8);
+- pkt_hdr++;
++ skb_pull(skb, pkt_len_plus_padd);
+ }
+- return 1;
++
++ return 0;
+ }
+
+ static struct sk_buff *
+diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
+index eb3817d70f2b8..695e4efdc0114 100644
+--- a/drivers/net/usb/cdc_ether.c
++++ b/drivers/net/usb/cdc_ether.c
+@@ -583,6 +583,11 @@ static const struct usb_device_id products[] = {
+ .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
+ .bInterfaceProtocol = USB_CDC_PROTO_NONE
+
++#define ZAURUS_FAKE_INTERFACE \
++ .bInterfaceClass = USB_CLASS_COMM, \
++ .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \
++ .bInterfaceProtocol = USB_CDC_PROTO_NONE
++
+ /* SA-1100 based Sharp Zaurus ("collie"), or compatible;
+ * wire-incompatible with true CDC Ethernet implementations.
+ * (And, it seems, needlessly so...)
+@@ -612,9 +617,23 @@ static const struct usb_device_id products[] = {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
++ .idProduct = 0x8005, /* A-300 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = 0,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
+ .idProduct = 0x8006, /* B-500/SL-5600 */
+ ZAURUS_MASTER_INTERFACE,
+ .driver_info = 0,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
++ .idProduct = 0x8006, /* B-500/SL-5600 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = 0,
+ }, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+@@ -622,6 +641,13 @@ static const struct usb_device_id products[] = {
+ .idProduct = 0x8007, /* C-700 */
+ ZAURUS_MASTER_INTERFACE,
+ .driver_info = 0,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
++ .idProduct = 0x8007, /* C-700 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = 0,
+ }, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+@@ -636,6 +662,13 @@ static const struct usb_device_id products[] = {
+ .idProduct = 0x9032, /* SL-6000 */
+ ZAURUS_MASTER_INTERFACE,
+ .driver_info = 0,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
++ .idProduct = 0x9032, /* SL-6000 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = 0,
+ }, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+@@ -764,6 +797,13 @@ static const struct usb_device_id products[] = {
+ },
+ #endif
+
++/* Lenovo ThinkPad OneLink+ Dock (based on Realtek RTL8153) */
++{
++ USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3054, USB_CLASS_COMM,
++ USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
++ .driver_info = 0,
++},
++
+ /* ThinkPad USB-C Dock (based on Realtek RTL8153) */
+ {
+ USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM,
+@@ -981,6 +1021,12 @@ static const struct usb_device_id products[] = {
+ USB_CDC_SUBCLASS_ETHERNET,
+ USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&wwan_info,
++}, {
++ /* Cinterion PLS62-W modem by GEMALTO/THALES */
++ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM,
++ USB_CDC_SUBCLASS_ETHERNET,
++ USB_CDC_PROTO_NONE),
++ .driver_info = (unsigned long)&wwan_info,
+ }, {
+ /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */
+ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM,
+diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
+index 82bb5ed94c485..a3ccf0cee093c 100644
+--- a/drivers/net/usb/cdc_mbim.c
++++ b/drivers/net/usb/cdc_mbim.c
+@@ -659,6 +659,16 @@ static const struct usb_device_id mbim_devs[] = {
+ .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
+ },
+
++ /* Telit FN990 */
++ { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1071, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
++ .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
++ },
++
++ /* Telit FE990 */
++ { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1081, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
++ .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
++ },
++
+ /* default entry */
+ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&cdc_mbim_info_zlp,
+diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
+index 24753a4da7e60..dd7469d310c34 100644
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -180,7 +180,12 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
+ else
+ min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32);
+
+- max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
++ if (le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) == 0)
++ max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */
++ else
++ max = clamp_t(u32, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize),
++ USB_CDC_NCM_NTB_MIN_OUT_SIZE,
++ CDC_NCM_NTB_MAX_SIZE_TX);
+
+ /* some devices set dwNtbOutMaxSize too low for the above default */
+ min = min(min, max);
+@@ -1241,6 +1246,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
+ * further.
+ */
+ if (skb_out == NULL) {
++ /* If even the smallest allocation fails, abort. */
++ if (ctx->tx_curr_size == USB_CDC_NCM_NTB_MIN_OUT_SIZE)
++ goto alloc_failed;
+ ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
+ (unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
+ ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
+@@ -1259,13 +1267,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
+ skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+
+ /* No allocation possible so we will abort */
+- if (skb_out == NULL) {
+- if (skb != NULL) {
+- dev_kfree_skb_any(skb);
+- dev->net->stats.tx_dropped++;
+- }
+- goto exit_no_skb;
+- }
++ if (!skb_out)
++ goto alloc_failed;
+ ctx->tx_low_mem_val--;
+ }
+ if (ctx->is_ndp16) {
+@@ -1458,6 +1461,11 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
+
+ return skb_out;
+
++alloc_failed:
++ if (skb) {
++ dev_kfree_skb_any(skb);
++ dev->net->stats.tx_dropped++;
++ }
+ exit_no_skb:
+ /* Start timer, if there is a remaining non-empty skb */
+ if (ctx->tx_curr_skb != NULL && n > 0)
+@@ -1713,10 +1721,10 @@ int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
+ {
+ struct sk_buff *skb;
+ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+- int len;
++ unsigned int len;
+ int nframes;
+ int x;
+- int offset;
++ unsigned int offset;
+ union {
+ struct usb_cdc_ncm_ndp16 *ndp16;
+ struct usb_cdc_ncm_ndp32 *ndp32;
+@@ -1788,8 +1796,8 @@ next_ndp:
+ break;
+ }
+
+- /* sanity checking */
+- if (((offset + len) > skb_in->len) ||
++ /* sanity checking - watch out for integer wrap*/
++ if ((offset > skb_in->len) || (len > skb_in->len - offset) ||
+ (len > ctx->rx_max) || (len < ETH_HLEN)) {
+ netif_dbg(dev, rx_err, dev->net,
+ "invalid frame detected (ignored) offset[%u]=%u, length=%u, skb=%p\n",
+diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
+index 06e2181e58108..d56e276e4d805 100644
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone)
+ if (tx_buf == NULL)
+ goto free_rx_urb;
+
+- rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE,
++ rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+ GFP_KERNEL, &rx_urb->transfer_dma);
+ if (rx_buf == NULL)
+ goto free_tx_buf;
+@@ -146,7 +146,7 @@ error_nomem:
+
+ static void ipheth_free_urbs(struct ipheth_device *iphone)
+ {
+- usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf,
++ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf,
+ iphone->rx_urb->transfer_dma);
+ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf,
+ iphone->tx_urb->transfer_dma);
+@@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags)
+
+ usb_fill_bulk_urb(dev->rx_urb, udev,
+ usb_rcvbulkpipe(udev, dev->bulk_in),
+- dev->rx_buf, IPHETH_BUF_SIZE,
++ dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+ ipheth_rcvbulk_callback,
+ dev);
+ dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
+index fc5895f85cee2..a552bb1665b8a 100644
+--- a/drivers/net/usb/kalmia.c
++++ b/drivers/net/usb/kalmia.c
+@@ -65,8 +65,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len,
+ init_msg, init_msg_len, &act_len, KALMIA_USB_TIMEOUT);
+ if (status != 0) {
+ netdev_err(dev->net,
+- "Error sending init packet. Status %i, length %i\n",
+- status, act_len);
++ "Error sending init packet. Status %i\n",
++ status);
+ return status;
+ }
+ else if (act_len != init_msg_len) {
+@@ -83,8 +83,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len,
+
+ if (status != 0)
+ netdev_err(dev->net,
+- "Error receiving init result. Status %i, length %i\n",
+- status, act_len);
++ "Error receiving init result. Status %i\n",
++ status);
+ else if (act_len != expected_len)
+ netdev_err(dev->net, "Unexpected init result length: %i\n",
+ act_len);
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 63cd72c5f580c..5700c9d20a3e2 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -76,6 +76,8 @@
+ #define LAN7801_USB_PRODUCT_ID (0x7801)
+ #define LAN78XX_EEPROM_MAGIC (0x78A5)
+ #define LAN78XX_OTP_MAGIC (0x78F3)
++#define AT29M2AF_USB_VENDOR_ID (0x07C9)
++#define AT29M2AF_USB_PRODUCT_ID (0x0012)
+
+ #define MII_READ 1
+ #define MII_WRITE 0
+@@ -1948,33 +1950,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev)
+ static void lan78xx_link_status_change(struct net_device *net)
+ {
+ struct phy_device *phydev = net->phydev;
+- int temp;
+-
+- /* At forced 100 F/H mode, chip may fail to set mode correctly
+- * when cable is switched between long(~50+m) and short one.
+- * As workaround, set to 10 before setting to 100
+- * at forced 100 F/H mode.
+- */
+- if (!phydev->autoneg && (phydev->speed == 100)) {
+- /* disable phy interrupt */
+- temp = phy_read(phydev, LAN88XX_INT_MASK);
+- temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
+- phy_write(phydev, LAN88XX_INT_MASK, temp);
+
+- temp = phy_read(phydev, MII_BMCR);
+- temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
+- phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
+- temp |= BMCR_SPEED100;
+- phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
+-
+- /* clear pending interrupt generated while workaround */
+- temp = phy_read(phydev, LAN88XX_INT_STS);
+-
+- /* enable phy interrupt back */
+- temp = phy_read(phydev, LAN88XX_INT_MASK);
+- temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
+- phy_write(phydev, LAN88XX_INT_MASK, temp);
+- }
++ phy_print_status(phydev);
+ }
+
+ static int irq_map(struct irq_domain *d, unsigned int irq,
+@@ -2228,7 +2205,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
+ if (dev->domain_data.phyirq > 0)
+ phydev->irq = dev->domain_data.phyirq;
+ else
+- phydev->irq = 0;
++ phydev->irq = PHY_POLL;
+ netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq);
+
+ /* set to AUTOMDIX */
+@@ -4734,6 +4711,10 @@ static const struct usb_device_id products[] = {
+ /* LAN7801 USB Gigabit Ethernet Device */
+ USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7801_USB_PRODUCT_ID),
+ },
++ {
++ /* ATM2-AF USB Gigabit Ethernet Device */
++ USB_DEVICE(AT29M2AF_USB_VENDOR_ID, AT29M2AF_USB_PRODUCT_ID),
++ },
+ {},
+ };
+ MODULE_DEVICE_TABLE(usb, products);
+diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
+index 66866bef25df7..a31a3b9cbd58d 100644
+--- a/drivers/net/usb/mcs7830.c
++++ b/drivers/net/usb/mcs7830.c
+@@ -108,8 +108,16 @@ static const char driver_name[] = "MOSCHIP usb-ethernet driver";
+
+ static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data)
+ {
+- return usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ,
+- 0x0000, index, data, size);
++ int ret;
++
++ ret = usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ,
++ 0x0000, index, data, size);
++ if (ret < 0)
++ return ret;
++ else if (ret < size)
++ return -ENODATA;
++
++ return ret;
+ }
+
+ static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, const void *data)
+diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
+index 6a92a3fef75e5..cd063f45785b7 100644
+--- a/drivers/net/usb/pegasus.c
++++ b/drivers/net/usb/pegasus.c
+@@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb)
+ goto goon;
+
+ rx_status = buf[count - 2];
+- if (rx_status & 0x1e) {
++ if (rx_status & 0x1c) {
+ netif_dbg(pegasus, rx_err, net,
+ "RX packet error %x\n", rx_status);
+ net->stats.rx_errors++;
+- if (rx_status & 0x06) /* long or runt */
++ if (rx_status & 0x04) /* runt */
+ net->stats.rx_length_errors++;
+ if (rx_status & 0x08)
+ net->stats.rx_crc_errors++;
+diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c
+index 17c9c63b8eebb..ce7862dac2b75 100644
+--- a/drivers/net/usb/plusb.c
++++ b/drivers/net/usb/plusb.c
+@@ -57,9 +57,7 @@
+ static inline int
+ pl_vendor_req(struct usbnet *dev, u8 req, u8 val, u8 index)
+ {
+- return usbnet_read_cmd(dev, req,
+- USB_DIR_IN | USB_TYPE_VENDOR |
+- USB_RECIP_DEVICE,
++ return usbnet_write_cmd(dev, req, USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+ val, index, NULL, 0);
+ }
+
+diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
+index 33ada2c59952e..5c516bf4d3a5f 100644
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -1085,6 +1085,7 @@ static const struct usb_device_id products[] = {
+ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */
+ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)}, /* Quectel EM160R-GL */
+ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */
++ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0801)}, /* Quectel RM520N */
+
+ /* 3. Combined interface devices matching on interface number */
+ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */
+@@ -1216,7 +1217,9 @@ static const struct usb_device_id products[] = {
+ {QMI_FIXED_INTF(0x05c6, 0x9080, 8)},
+ {QMI_FIXED_INTF(0x05c6, 0x9083, 3)},
+ {QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
++ {QMI_QUIRK_SET_DTR(0x05c6, 0x9091, 2)}, /* Compal RXM-G1 */
+ {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */
++ {QMI_QUIRK_SET_DTR(0x05c6, 0x90db, 2)}, /* Compal RXM-G1 */
+ {QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
+ {QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
+ {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
+@@ -1320,7 +1323,7 @@ static const struct usb_device_id products[] = {
+ {QMI_FIXED_INTF(0x2001, 0x7e3d, 4)}, /* D-Link DWM-222 A2 */
+ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */
+ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */
+- {QMI_FIXED_INTF(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */
++ {QMI_QUIRK_SET_DTR(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */
+ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */
+ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */
+ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */
+@@ -1352,9 +1355,12 @@ static const struct usb_device_id products[] = {
+ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
+ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */
++ {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */
++ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */
++ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */
+ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
+ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */
+ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
+@@ -1391,10 +1397,13 @@ static const struct usb_device_id products[] = {
+ {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */
+ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */
++ {QMI_FIXED_INTF(0x413c, 0x81c2, 8)}, /* Dell Wireless 5811e */
+ {QMI_FIXED_INTF(0x413c, 0x81cc, 8)}, /* Dell Wireless 5816e */
+ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */
+ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */
+ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/
++ {QMI_FIXED_INTF(0x413c, 0x81e4, 0)}, /* Dell Wireless 5829e with eSIM support*/
++ {QMI_FIXED_INTF(0x413c, 0x81e6, 0)}, /* Dell Wireless 5829e */
+ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
+ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
+@@ -1403,10 +1412,12 @@ static const struct usb_device_id products[] = {
+ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
+ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */
+ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
++ {QMI_QUIRK_SET_DTR(0x2c7c, 0x030e, 4)}, /* Quectel EM05GV2 */
+ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */
+ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */
+ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/
+ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
++ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+
+ /* 4. Gobi 1000 devices */
+ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
+diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
+index f329e39100a7d..4cd9bcca84c5b 100644
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -32,7 +32,7 @@
+ #define NETNEXT_VERSION "12"
+
+ /* Information for net */
+-#define NET_VERSION "11"
++#define NET_VERSION "13"
+
+ #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
+ #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
+@@ -199,6 +199,7 @@
+ #define OCP_EEE_AR 0xa41a
+ #define OCP_EEE_DATA 0xa41c
+ #define OCP_PHY_STATUS 0xa420
++#define OCP_INTR_EN 0xa424
+ #define OCP_NCTL_CFG 0xa42c
+ #define OCP_POWER_CFG 0xa430
+ #define OCP_EEE_CFG 0xa432
+@@ -620,6 +621,9 @@ enum spd_duplex {
+ #define PHY_STAT_LAN_ON 3
+ #define PHY_STAT_PWRDN 5
+
++/* OCP_INTR_EN */
++#define INTR_SPEED_FORCE BIT(3)
++
+ /* OCP_NCTL_CFG */
+ #define PGA_RETURN_EN BIT(1)
+
+@@ -770,6 +774,7 @@ enum rtl8152_flags {
+ RX_EPROTO,
+ };
+
++#define DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK 0x3054
+ #define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082
+ #define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2 0xa387
+
+@@ -1871,7 +1876,9 @@ static void intr_callback(struct urb *urb)
+ "Stop submitting intr, status %d\n", status);
+ return;
+ case -EOVERFLOW:
+- netif_info(tp, intr, tp->netdev, "intr status -EOVERFLOW\n");
++ if (net_ratelimit())
++ netif_info(tp, intr, tp->netdev,
++ "intr status -EOVERFLOW\n");
+ goto resubmit;
+ /* -EPIPE: should clear the halt */
+ default:
+@@ -2618,6 +2625,9 @@ static int r8152_poll(struct napi_struct *napi, int budget)
+ struct r8152 *tp = container_of(napi, struct r8152, napi);
+ int work_done;
+
++ if (!budget)
++ return 0;
++
+ work_done = rx_bottom(tp, budget);
+
+ if (work_done < budget) {
+@@ -3013,12 +3023,16 @@ static int rtl_enable(struct r8152 *tp)
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data);
+
+ switch (tp->version) {
+- case RTL_VER_08:
+- case RTL_VER_09:
+- case RTL_VER_14:
+- r8153b_rx_agg_chg_indicate(tp);
++ case RTL_VER_01:
++ case RTL_VER_02:
++ case RTL_VER_03:
++ case RTL_VER_04:
++ case RTL_VER_05:
++ case RTL_VER_06:
++ case RTL_VER_07:
+ break;
+ default:
++ r8153b_rx_agg_chg_indicate(tp);
+ break;
+ }
+
+@@ -3072,7 +3086,6 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
+ 640 / 8);
+ ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EXTRA_AGGR_TMR,
+ ocp_data);
+- r8153b_rx_agg_chg_indicate(tp);
+ break;
+
+ default:
+@@ -3106,7 +3119,6 @@ static void r8153_set_rx_early_size(struct r8152 *tp)
+ case RTL_VER_15:
+ ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE,
+ ocp_data / 8);
+- r8153b_rx_agg_chg_indicate(tp);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+@@ -4016,6 +4028,11 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type)
+ ocp_write_word(tp, type, PLA_BP_BA, 0);
+ }
+
++static inline void rtl_reset_ocp_base(struct r8152 *tp)
++{
++ tp->ocp_base = -1;
++}
++
+ static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait)
+ {
+ u16 data, check;
+@@ -4087,8 +4104,6 @@ static int rtl_post_ram_code(struct r8152 *tp, u16 key_addr, bool wait)
+
+ rtl_phy_patch_request(tp, false, wait);
+
+- ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base);
+-
+ return 0;
+ }
+
+@@ -4800,6 +4815,8 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy,
+ u32 len;
+ u8 *data;
+
++ rtl_reset_ocp_base(tp);
++
+ if (sram_read(tp, SRAM_GPHY_FW_VER) >= __le16_to_cpu(phy->version)) {
+ dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n");
+ return;
+@@ -4845,7 +4862,8 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy,
+ }
+ }
+
+- ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base);
++ rtl_reset_ocp_base(tp);
++
+ rtl_phy_patch_request(tp, false, wait);
+
+ if (sram_read(tp, SRAM_GPHY_FW_VER) == __le16_to_cpu(phy->version))
+@@ -4861,6 +4879,8 @@ static int rtl8152_fw_phy_ver(struct r8152 *tp, struct fw_phy_ver *phy_ver)
+ ver_addr = __le16_to_cpu(phy_ver->ver.addr);
+ ver = __le16_to_cpu(phy_ver->ver.data);
+
++ rtl_reset_ocp_base(tp);
++
+ if (sram_read(tp, ver_addr) >= ver) {
+ dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n");
+ return 0;
+@@ -4877,6 +4897,8 @@ static void rtl8152_fw_phy_fixup(struct r8152 *tp, struct fw_phy_fixup *fix)
+ {
+ u16 addr, data;
+
++ rtl_reset_ocp_base(tp);
++
+ addr = __le16_to_cpu(fix->setting.addr);
+ data = ocp_reg_read(tp, addr);
+
+@@ -4908,6 +4930,8 @@ static void rtl8152_fw_phy_union_apply(struct r8152 *tp, struct fw_phy_union *ph
+ u32 length;
+ int i, num;
+
++ rtl_reset_ocp_base(tp);
++
+ num = phy->pre_num;
+ for (i = 0; i < num; i++)
+ sram_write(tp, __le16_to_cpu(phy->pre_set[i].addr),
+@@ -4938,6 +4962,8 @@ static void rtl8152_fw_phy_nc_apply(struct r8152 *tp, struct fw_phy_nc *phy)
+ u32 length, i, num;
+ __le16 *data;
+
++ rtl_reset_ocp_base(tp);
++
+ mode_reg = __le16_to_cpu(phy->mode_reg);
+ sram_write(tp, mode_reg, __le16_to_cpu(phy->mode_pre));
+ sram_write(tp, __le16_to_cpu(phy->ba_reg),
+@@ -5107,6 +5133,7 @@ post_fw:
+ if (rtl_fw->post_fw)
+ rtl_fw->post_fw(tp);
+
++ rtl_reset_ocp_base(tp);
+ strscpy(rtl_fw->version, fw_hdr->version, RTL_VER_SIZE);
+ dev_info(&tp->intf->dev, "load %s successfully\n", rtl_fw->version);
+ }
+@@ -5889,6 +5916,11 @@ static void r8153_enter_oob(struct r8152 *tp)
+ ocp_data &= ~NOW_IS_OOB;
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+
++ /* RX FIFO settings for OOB */
++ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB);
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_OOB);
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_OOB);
++
+ rtl_disable(tp);
+ rtl_reset_bmu(tp);
+
+@@ -5900,7 +5932,8 @@ static void r8153_enter_oob(struct r8152 *tp)
+
+ wait_oob_link_list_ready(tp);
+
+- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
++ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
+
+ switch (tp->version) {
+ case RTL_VER_03:
+@@ -5936,6 +5969,10 @@ static void r8153_enter_oob(struct r8152 *tp)
+ ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+
++ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
++ ocp_data |= MCU_BORW_EN;
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
++
+ rxdy_gated_en(tp, false);
+
+ ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
+@@ -5951,6 +5988,25 @@ static void rtl8153_disable(struct r8152 *tp)
+ r8153_aldps_en(tp, true);
+ }
+
++static u32 fc_pause_on_auto(struct r8152 *tp)
++{
++ return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 6 * 1024);
++}
++
++static u32 fc_pause_off_auto(struct r8152 *tp)
++{
++ return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 14 * 1024);
++}
++
++static void r8156_fc_parameter(struct r8152 *tp)
++{
++ u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp);
++ u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp);
++
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16);
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16);
++}
++
+ static int rtl8156_enable(struct r8152 *tp)
+ {
+ u32 ocp_data;
+@@ -5959,6 +6015,7 @@ static int rtl8156_enable(struct r8152 *tp)
+ if (test_bit(RTL8152_UNPLUG, &tp->flags))
+ return -ENODEV;
+
++ r8156_fc_parameter(tp);
+ set_tx_qlen(tp);
+ rtl_set_eee_plus(tp);
+ r8153_set_rx_early_timeout(tp);
+@@ -5990,9 +6047,24 @@ static int rtl8156_enable(struct r8152 *tp)
+ ocp_write_word(tp, MCU_TYPE_USB, USB_L1_CTRL, ocp_data);
+ }
+
++ ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK);
++ ocp_data &= ~FC_PATCH_TASK;
++ ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
++ usleep_range(1000, 2000);
++ ocp_data |= FC_PATCH_TASK;
++ ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
++
+ return rtl_enable(tp);
+ }
+
++static void rtl8156_disable(struct r8152 *tp)
++{
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, 0);
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, 0);
++
++ rtl8153_disable(tp);
++}
++
+ static int rtl8156b_enable(struct r8152 *tp)
+ {
+ u32 ocp_data;
+@@ -6394,38 +6466,6 @@ static void rtl8153c_up(struct r8152 *tp)
+ r8153b_u1u2en(tp, true);
+ }
+
+-static inline u32 fc_pause_on_auto(struct r8152 *tp)
+-{
+- return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 6 * 1024);
+-}
+-
+-static inline u32 fc_pause_off_auto(struct r8152 *tp)
+-{
+- return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 14 * 1024);
+-}
+-
+-static void r8156_fc_parameter(struct r8152 *tp)
+-{
+- u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp);
+- u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp);
+-
+- switch (tp->version) {
+- case RTL_VER_10:
+- case RTL_VER_11:
+- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 8);
+- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 8);
+- break;
+- case RTL_VER_12:
+- case RTL_VER_13:
+- case RTL_VER_15:
+- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16);
+- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16);
+- break;
+- default:
+- break;
+- }
+-}
+-
+ static void rtl8156_change_mtu(struct r8152 *tp)
+ {
+ u32 rx_max_size = mtu_to_size(tp->netdev->mtu);
+@@ -6535,9 +6575,17 @@ static void rtl8156_down(struct r8152 *tp)
+ ocp_data &= ~NOW_IS_OOB;
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+
++ /* RX FIFO settings for OOB */
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, 64 / 16);
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, 1024 / 16);
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, 4096 / 16);
++
+ rtl_disable(tp);
+ rtl_reset_bmu(tp);
+
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
++ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
++
+ /* Clear teredo wake event. bit[15:8] is the teredo wakeup
+ * type. Set it to zero. bits[7:0] are the W1C bits about
+ * the events. Set them to all 1 to clear them.
+@@ -6548,6 +6596,10 @@ static void rtl8156_down(struct r8152 *tp)
+ ocp_data |= NOW_IS_OOB;
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+
++ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
++ ocp_data |= MCU_BORW_EN;
++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
++
+ rtl_rx_vlan_en(tp, true);
+ rxdy_gated_en(tp, false);
+
+@@ -6584,6 +6636,21 @@ static bool rtl8153_in_nway(struct r8152 *tp)
+ return true;
+ }
+
++static void r8156_mdio_force_mode(struct r8152 *tp)
++{
++ u16 data;
++
++ /* Select force mode through 0xa5b4 bit 15
++ * 0: MDIO force mode
++ * 1: MMD force mode
++ */
++ data = ocp_reg_read(tp, 0xa5b4);
++ if (data & BIT(15)) {
++ data &= ~BIT(15);
++ ocp_reg_write(tp, 0xa5b4, data);
++ }
++}
++
+ static void set_carrier(struct r8152 *tp)
+ {
+ struct net_device *netdev = tp->netdev;
+@@ -7489,6 +7556,11 @@ static void r8156_hw_phy_cfg(struct r8152 *tp)
+ ((swap_a & 0x1f) << 8) |
+ ((swap_a >> 8) & 0x1f));
+ }
++
++ /* Notify the MAC when the speed is changed to force mode. */
++ data = ocp_reg_read(tp, OCP_INTR_EN);
++ data |= INTR_SPEED_FORCE;
++ ocp_reg_write(tp, OCP_INTR_EN, data);
+ break;
+ default:
+ break;
+@@ -7884,6 +7956,11 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp)
+ break;
+ }
+
++ /* Notify the MAC when the speed is changed to force mode. */
++ data = ocp_reg_read(tp, OCP_INTR_EN);
++ data |= INTR_SPEED_FORCE;
++ ocp_reg_write(tp, OCP_INTR_EN, data);
++
+ if (rtl_phy_patch_request(tp, true, true))
+ return;
+
+@@ -8016,6 +8093,7 @@ static void r8156_init(struct r8152 *tp)
+ ocp_data |= ACT_ODMA;
+ ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ocp_data);
+
++ r8156_mdio_force_mode(tp);
+ rtl_tally_reset(tp);
+
+ tp->coalesce = 15000; /* 15 us */
+@@ -8145,6 +8223,7 @@ static void r8156b_init(struct r8152 *tp)
+ ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
+ ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
+
++ r8156_mdio_force_mode(tp);
+ rtl_tally_reset(tp);
+
+ tp->coalesce = 15000; /* 15 us */
+@@ -8467,6 +8546,8 @@ static int rtl8152_resume(struct usb_interface *intf)
+
+ mutex_lock(&tp->control);
+
++ rtl_reset_ocp_base(tp);
++
+ if (test_bit(SELECTIVE_SUSPEND, &tp->flags))
+ ret = rtl8152_runtime_resume(tp);
+ else
+@@ -8482,6 +8563,7 @@ static int rtl8152_reset_resume(struct usb_interface *intf)
+ struct r8152 *tp = usb_get_intfdata(intf);
+
+ clear_bit(SELECTIVE_SUSPEND, &tp->flags);
++ rtl_reset_ocp_base(tp);
+ tp->rtl_ops.init(tp);
+ queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
+ set_ethernet_addr(tp, true);
+@@ -9319,7 +9401,7 @@ static int rtl_ops_init(struct r8152 *tp)
+ case RTL_VER_10:
+ ops->init = r8156_init;
+ ops->enable = rtl8156_enable;
+- ops->disable = rtl8153_disable;
++ ops->disable = rtl8156_disable;
+ ops->up = rtl8156_up;
+ ops->down = rtl8156_down;
+ ops->unload = rtl8153_unload;
+@@ -9605,6 +9687,7 @@ static int rtl8152_probe(struct usb_interface *intf,
+
+ if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO) {
+ switch (le16_to_cpu(udev->descriptor.idProduct)) {
++ case DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK:
+ case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2:
+ case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2:
+ tp->lenovo_macpassthru = 1;
+@@ -9761,8 +9844,10 @@ static const struct usb_device_id rtl8152_table[] = {
+ REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab),
+ REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6),
+ REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927),
++ REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e),
+ REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101),
+ REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f),
++ REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054),
+ REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062),
+ REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3069),
+ REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3082),
+diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
+index 85a8b96e39a65..e5f6614da5acc 100644
+--- a/drivers/net/usb/rndis_host.c
++++ b/drivers/net/usb/rndis_host.c
+@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf,
+
+ off = le32_to_cpu(u.get_c->offset);
+ len = le32_to_cpu(u.get_c->len);
+- if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE))
++ if (unlikely((off > CONTROL_BUFFER_SIZE - 8) ||
++ (len > CONTROL_BUFFER_SIZE - 8 - off)))
+ goto response_error;
+
+ if (*reply_len != -1 && len != *reply_len)
+@@ -608,6 +609,11 @@ static const struct usb_device_id products [] = {
+ USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042,
+ USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
+ .driver_info = (unsigned long) &rndis_poll_status_info,
++}, {
++ /* Hytera Communications DMR radios' "Radio to PC Network" */
++ USB_VENDOR_AND_INTERFACE_INFO(0x238b,
++ USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
++ .driver_info = (unsigned long)&rndis_info,
+ }, {
+ /* RNDIS is MSFT's un-official variant of CDC ACM */
+ USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
+diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
+index 76f7af1613139..7c3e866514199 100644
+--- a/drivers/net/usb/smsc75xx.c
++++ b/drivers/net/usb/smsc75xx.c
+@@ -2199,6 +2199,13 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ size = (rx_cmd_a & RX_CMD_A_LEN) - RXW_PADDING;
+ align_count = (4 - ((size + RXW_PADDING) % 4)) % 4;
+
++ if (unlikely(size > skb->len)) {
++ netif_dbg(dev, rx_err, dev->net,
++ "size err rx_cmd_a=0x%08x\n",
++ rx_cmd_a);
++ return 0;
++ }
++
+ if (unlikely(rx_cmd_a & RX_CMD_A_RED)) {
+ netif_dbg(dev, rx_err, dev->net,
+ "Error rx_cmd_a=0x%08x\n", rx_cmd_a);
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index 26b1bd8e845b4..649d9f9af6e67 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -84,9 +84,10 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
+ ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN
+ | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+ 0, index, &buf, 4);
+- if (unlikely(ret < 0)) {
+- netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n",
+- index, ret);
++ if (ret < 0) {
++ if (ret != -ENODEV)
++ netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n",
++ index, ret);
+ return ret;
+ }
+
+@@ -116,7 +117,7 @@ static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
+ ret = fn(dev, USB_VENDOR_REQUEST_WRITE_REGISTER, USB_DIR_OUT
+ | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+ 0, index, &buf, 4);
+- if (unlikely(ret < 0))
++ if (ret < 0 && ret != -ENODEV)
+ netdev_warn(dev->net, "Failed to write reg index 0x%08x: %d\n",
+ index, ret);
+
+@@ -159,6 +160,9 @@ static int __must_check __smsc95xx_phy_wait_not_busy(struct usbnet *dev,
+ do {
+ ret = __smsc95xx_read_reg(dev, MII_ADDR, &val, in_pm);
+ if (ret < 0) {
++ /* Ignore -ENODEV error during disconnect() */
++ if (ret == -ENODEV)
++ return 0;
+ netdev_warn(dev->net, "Error reading MII_ACCESS\n");
+ return ret;
+ }
+@@ -194,7 +198,8 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
+ addr = mii_address_cmd(phy_id, idx, MII_READ_ | MII_BUSY_);
+ ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
+ if (ret < 0) {
+- netdev_warn(dev->net, "Error writing MII_ADDR\n");
++ if (ret != -ENODEV)
++ netdev_warn(dev->net, "Error writing MII_ADDR\n");
+ goto done;
+ }
+
+@@ -206,7 +211,8 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
+
+ ret = __smsc95xx_read_reg(dev, MII_DATA, &val, in_pm);
+ if (ret < 0) {
+- netdev_warn(dev->net, "Error reading MII_DATA\n");
++ if (ret != -ENODEV)
++ netdev_warn(dev->net, "Error reading MII_DATA\n");
+ goto done;
+ }
+
+@@ -214,6 +220,10 @@ static int __smsc95xx_mdio_read(struct usbnet *dev, int phy_id, int idx,
+
+ done:
+ mutex_unlock(&dev->phy_mutex);
++
++ /* Ignore -ENODEV error during disconnect() */
++ if (ret == -ENODEV)
++ return 0;
+ return ret;
+ }
+
+@@ -235,7 +245,8 @@ static void __smsc95xx_mdio_write(struct usbnet *dev, int phy_id,
+ val = regval;
+ ret = __smsc95xx_write_reg(dev, MII_DATA, val, in_pm);
+ if (ret < 0) {
+- netdev_warn(dev->net, "Error writing MII_DATA\n");
++ if (ret != -ENODEV)
++ netdev_warn(dev->net, "Error writing MII_DATA\n");
+ goto done;
+ }
+
+@@ -243,7 +254,8 @@ static void __smsc95xx_mdio_write(struct usbnet *dev, int phy_id,
+ addr = mii_address_cmd(phy_id, idx, MII_WRITE_ | MII_BUSY_);
+ ret = __smsc95xx_write_reg(dev, MII_ADDR, addr, in_pm);
+ if (ret < 0) {
+- netdev_warn(dev->net, "Error writing MII_ADDR\n");
++ if (ret != -ENODEV)
++ netdev_warn(dev->net, "Error writing MII_ADDR\n");
+ goto done;
+ }
+
+@@ -552,16 +564,12 @@ static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev)
+ return smsc95xx_write_reg(dev, AFC_CFG, afc_cfg);
+ }
+
+-static int smsc95xx_link_reset(struct usbnet *dev)
++static void smsc95xx_mac_update_fullduplex(struct usbnet *dev)
+ {
+ struct smsc95xx_priv *pdata = dev->driver_priv;
+ unsigned long flags;
+ int ret;
+
+- ret = smsc95xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL_);
+- if (ret < 0)
+- return ret;
+-
+ spin_lock_irqsave(&pdata->mac_cr_lock, flags);
+ if (pdata->phydev->duplex != DUPLEX_FULL) {
+ pdata->mac_cr &= ~MAC_CR_FDPX_;
+@@ -573,14 +581,16 @@ static int smsc95xx_link_reset(struct usbnet *dev)
+ spin_unlock_irqrestore(&pdata->mac_cr_lock, flags);
+
+ ret = smsc95xx_write_reg(dev, MAC_CR, pdata->mac_cr);
+- if (ret < 0)
+- return ret;
++ if (ret < 0) {
++ if (ret != -ENODEV)
++ netdev_warn(dev->net,
++ "Error updating MAC full duplex mode\n");
++ return;
++ }
+
+ ret = smsc95xx_phy_update_flowcontrol(dev);
+ if (ret < 0)
+ netdev_warn(dev->net, "Error updating PHY flow control\n");
+-
+- return ret;
+ }
+
+ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
+@@ -597,7 +607,7 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
+ netif_dbg(dev, link, dev->net, "intdata: 0x%08X\n", intdata);
+
+ if (intdata & INT_ENP_PHY_INT_)
+- usbnet_defer_kevent(dev, EVENT_LINK_RESET);
++ ;
+ else
+ netdev_warn(dev->net, "unexpected interrupt, intdata=0x%08X\n",
+ intdata);
+@@ -1049,6 +1059,15 @@ static const struct net_device_ops smsc95xx_netdev_ops = {
+ .ndo_set_features = smsc95xx_set_features,
+ };
+
++static void smsc95xx_handle_link_change(struct net_device *net)
++{
++ struct usbnet *dev = netdev_priv(net);
++
++ phy_print_status(net->phydev);
++ smsc95xx_mac_update_fullduplex(dev);
++ usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
++}
++
+ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
+ {
+ struct smsc95xx_priv *pdata;
+@@ -1153,6 +1172,17 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
+ dev->net->min_mtu = ETH_MIN_MTU;
+ dev->net->max_mtu = ETH_DATA_LEN;
+ dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
++
++ ret = phy_connect_direct(dev->net, pdata->phydev,
++ &smsc95xx_handle_link_change,
++ PHY_INTERFACE_MODE_MII);
++ if (ret) {
++ netdev_err(dev->net, "can't attach PHY to %s\n", pdata->mdiobus->id);
++ goto unregister_mdio;
++ }
++
++ phy_attached_info(dev->net->phydev);
++
+ return 0;
+
+ unregister_mdio:
+@@ -1170,47 +1200,24 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
+ {
+ struct smsc95xx_priv *pdata = dev->driver_priv;
+
++ phy_disconnect(dev->net->phydev);
+ mdiobus_unregister(pdata->mdiobus);
+ mdiobus_free(pdata->mdiobus);
+ netif_dbg(dev, ifdown, dev->net, "free pdata\n");
+ kfree(pdata);
+ }
+
+-static void smsc95xx_handle_link_change(struct net_device *net)
+-{
+- struct usbnet *dev = netdev_priv(net);
+-
+- phy_print_status(net->phydev);
+- usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
+-}
+-
+ static int smsc95xx_start_phy(struct usbnet *dev)
+ {
+- struct smsc95xx_priv *pdata = dev->driver_priv;
+- struct net_device *net = dev->net;
+- int ret;
++ phy_start(dev->net->phydev);
+
+- ret = smsc95xx_reset(dev);
+- if (ret < 0)
+- return ret;
+-
+- ret = phy_connect_direct(net, pdata->phydev,
+- &smsc95xx_handle_link_change,
+- PHY_INTERFACE_MODE_MII);
+- if (ret) {
+- netdev_err(net, "can't attach PHY to %s\n", pdata->mdiobus->id);
+- return ret;
+- }
+-
+- phy_attached_info(net->phydev);
+- phy_start(net->phydev);
+ return 0;
+ }
+
+-static int smsc95xx_disconnect_phy(struct usbnet *dev)
++static int smsc95xx_stop(struct usbnet *dev)
+ {
+ phy_stop(dev->net->phydev);
+- phy_disconnect(dev->net->phydev);
++
+ return 0;
+ }
+
+@@ -1801,6 +1808,12 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ size = (u16)((header & RX_STS_FL_) >> 16);
+ align_count = (4 - ((size + NET_IP_ALIGN) % 4)) % 4;
+
++ if (unlikely(size > skb->len)) {
++ netif_dbg(dev, rx_err, dev->net,
++ "size err header=0x%08x\n", header);
++ return 0;
++ }
++
+ if (unlikely(header & RX_STS_ES_)) {
+ netif_dbg(dev, rx_err, dev->net,
+ "Error header=0x%08x\n", header);
+@@ -1963,9 +1976,9 @@ static const struct driver_info smsc95xx_info = {
+ .description = "smsc95xx USB 2.0 Ethernet",
+ .bind = smsc95xx_bind,
+ .unbind = smsc95xx_unbind,
+- .link_reset = smsc95xx_link_reset,
+- .reset = smsc95xx_start_phy,
+- .stop = smsc95xx_disconnect_phy,
++ .reset = smsc95xx_reset,
++ .check_connect = smsc95xx_start_phy,
++ .stop = smsc95xx_stop,
+ .rx_fixup = smsc95xx_rx_fixup,
+ .tx_fixup = smsc95xx_tx_fixup,
+ .status = smsc95xx_status,
+diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
+index 6516a37893e27..1fac6ee273c4e 100644
+--- a/drivers/net/usb/sr9700.c
++++ b/drivers/net/usb/sr9700.c
+@@ -410,7 +410,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ /* ignore the CRC length */
+ len = (skb->data[1] | (skb->data[2] << 8)) - 4;
+
+- if (len > ETH_FRAME_LEN)
++ if (len > ETH_FRAME_LEN || len > skb->len || len < 0)
+ return 0;
+
+ /* the last packet of current skb */
+diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
+index a33d7fb82a00b..566aa01ad2810 100644
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -847,13 +847,11 @@ int usbnet_stop (struct net_device *net)
+
+ mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
+
+- /* deferred work (task, timer, softirq) must also stop.
+- * can't flush_scheduled_work() until we drop rtnl (later),
+- * else workers could deadlock; so make workers a NOP.
+- */
++ /* deferred work (timer, softirq, task) must also stop */
+ dev->flags = 0;
+ del_timer_sync (&dev->delay);
+ tasklet_kill (&dev->bh);
++ cancel_work_sync(&dev->kevent);
+ if (!pm)
+ usb_autopm_put_interface(dev->intf);
+
+@@ -1601,6 +1599,7 @@ void usbnet_disconnect (struct usb_interface *intf)
+ struct usbnet *dev;
+ struct usb_device *xdev;
+ struct net_device *net;
++ struct urb *urb;
+
+ dev = usb_get_intfdata(intf);
+ usb_set_intfdata(intf, NULL);
+@@ -1614,15 +1613,17 @@ void usbnet_disconnect (struct usb_interface *intf)
+ xdev->bus->bus_name, xdev->devpath,
+ dev->driver_info->description);
+
+- if (dev->driver_info->unbind)
+- dev->driver_info->unbind(dev, intf);
+-
+ net = dev->net;
+ unregister_netdev (net);
+
+- cancel_work_sync(&dev->kevent);
++ while ((urb = usb_get_from_anchor(&dev->deferred))) {
++ dev_kfree_skb(urb->context);
++ kfree(urb->sg);
++ usb_free_urb(urb);
++ }
+
+- usb_scuttle_anchored_urbs(&dev->deferred);
++ if (dev->driver_info->unbind)
++ dev->driver_info->unbind(dev, intf);
+
+ usb_kill_urb(dev->interrupt);
+ usb_free_urb(dev->interrupt);
+@@ -1770,6 +1771,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
+ } else if (!info->in || !info->out)
+ status = usbnet_get_endpoints (dev, udev);
+ else {
++ u8 ep_addrs[3] = {
++ info->in + USB_DIR_IN, info->out + USB_DIR_OUT, 0
++ };
++
+ dev->in = usb_rcvbulkpipe (xdev, info->in);
+ dev->out = usb_sndbulkpipe (xdev, info->out);
+ if (!(info->flags & FLAG_NO_SETINT))
+@@ -1779,6 +1784,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
+ else
+ status = 0;
+
++ if (status == 0 && !usb_check_bulk_endpoints(udev, ep_addrs))
++ status = -EINVAL;
+ }
+ if (status >= 0 && dev->status)
+ status = init_status (dev, udev);
+@@ -2002,7 +2009,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+ cmd, reqtype, value, index, size);
+
+ if (size) {
+- buf = kmalloc(size, GFP_KERNEL);
++ buf = kmalloc(size, GFP_NOIO);
+ if (!buf)
+ goto out;
+ }
+@@ -2034,7 +2041,7 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
+ cmd, reqtype, value, index, size);
+
+ if (data) {
+- buf = kmemdup(data, size, GFP_KERNEL);
++ buf = kmemdup(data, size, GFP_NOIO);
+ if (!buf)
+ goto out;
+ } else {
+@@ -2135,7 +2142,7 @@ static void usbnet_async_cmd_cb(struct urb *urb)
+ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
+ u16 value, u16 index, const void *data, u16 size)
+ {
+- struct usb_ctrlrequest *req = NULL;
++ struct usb_ctrlrequest *req;
+ struct urb *urb;
+ int err = -ENOMEM;
+ void *buf = NULL;
+@@ -2153,7 +2160,7 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
+ if (!buf) {
+ netdev_err(dev->net, "Error allocating buffer"
+ " in %s!\n", __func__);
+- goto fail_free;
++ goto fail_free_urb;
+ }
+ }
+
+@@ -2177,14 +2184,21 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
+ if (err < 0) {
+ netdev_err(dev->net, "Error submitting the control"
+ " message: status=%d\n", err);
+- goto fail_free;
++ goto fail_free_all;
+ }
+ return 0;
+
++fail_free_all:
++ kfree(req);
+ fail_free_buf:
+ kfree(buf);
+-fail_free:
+- kfree(req);
++ /*
++ * avoid a double free
++ * needed because the flag can be set only
++ * after filling the URB
++ */
++ urb->transfer_flags = 0;
++fail_free_urb:
+ usb_free_urb(urb);
+ fail:
+ return err;
+diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c
+index 8e717a0b559b3..df3617c4c44e8 100644
+--- a/drivers/net/usb/zaurus.c
++++ b/drivers/net/usb/zaurus.c
+@@ -256,6 +256,11 @@ static const struct usb_device_id products [] = {
+ .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
+ .bInterfaceProtocol = USB_CDC_PROTO_NONE
+
++#define ZAURUS_FAKE_INTERFACE \
++ .bInterfaceClass = USB_CLASS_COMM, \
++ .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \
++ .bInterfaceProtocol = USB_CDC_PROTO_NONE
++
+ /* SA-1100 based Sharp Zaurus ("collie"), or compatible. */
+ {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+@@ -284,9 +289,23 @@ static const struct usb_device_id products [] = {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
++ .idProduct = 0x8005, /* A-300 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = (unsigned long)&bogus_mdlm_info,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
+ .idProduct = 0x8006, /* B-500/SL-5600 */
+ ZAURUS_MASTER_INTERFACE,
+ .driver_info = ZAURUS_PXA_INFO,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
++ .idProduct = 0x8006, /* B-500/SL-5600 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = (unsigned long)&bogus_mdlm_info,
+ }, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+@@ -294,6 +313,13 @@ static const struct usb_device_id products [] = {
+ .idProduct = 0x8007, /* C-700 */
+ ZAURUS_MASTER_INTERFACE,
+ .driver_info = ZAURUS_PXA_INFO,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
++ .idProduct = 0x8007, /* C-700 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = (unsigned long)&bogus_mdlm_info,
+ }, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+@@ -313,6 +339,13 @@ static const struct usb_device_id products [] = {
+ .idProduct = 0x9032, /* SL-6000 */
+ ZAURUS_MASTER_INTERFACE,
+ .driver_info = ZAURUS_PXA_INFO,
++}, {
++ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
++ | USB_DEVICE_ID_MATCH_DEVICE,
++ .idVendor = 0x04DD,
++ .idProduct = 0x9032, /* SL-6000 */
++ ZAURUS_FAKE_INTERFACE,
++ .driver_info = (unsigned long)&bogus_mdlm_info,
+ }, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index 50eb43e5bf459..984a153804096 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -272,9 +272,10 @@ static void __veth_xdp_flush(struct veth_rq *rq)
+ {
+ /* Write ptr_ring before reading rx_notify_masked */
+ smp_mb();
+- if (!rq->rx_notify_masked) {
+- rq->rx_notify_masked = true;
+- napi_schedule(&rq->xdp_napi);
++ if (!READ_ONCE(rq->rx_notify_masked) &&
++ napi_schedule_prep(&rq->xdp_napi)) {
++ WRITE_ONCE(rq->rx_notify_masked, true);
++ __napi_schedule(&rq->xdp_napi);
+ }
+ }
+
+@@ -319,6 +320,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
+ struct veth_rq *rq = NULL;
++ int ret = NETDEV_TX_OK;
+ struct net_device *rcv;
+ int length = skb->len;
+ bool use_napi = false;
+@@ -326,7 +328,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+
+ rcu_read_lock();
+ rcv = rcu_dereference(priv->peer);
+- if (unlikely(!rcv)) {
++ if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) {
+ kfree_skb(skb);
+ goto drop;
+ }
+@@ -342,7 +344,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+ */
+ use_napi = rcu_access_pointer(rq->napi) &&
+ veth_skb_is_eligible_for_gro(dev, rcv, skb);
+- skb_record_rx_queue(skb, rxq);
+ }
+
+ skb_tx_timestamp(skb);
+@@ -352,6 +353,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+ } else {
+ drop:
+ atomic64_inc(&priv->dropped);
++ ret = NET_XMIT_DROP;
+ }
+
+ if (use_napi)
+@@ -359,7 +361,7 @@ drop:
+
+ rcu_read_unlock();
+
+- return NETDEV_TX_OK;
++ return ret;
+ }
+
+ static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
+@@ -879,8 +881,12 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
+
+ stats->xdp_bytes += skb->len;
+ skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
+- if (skb)
+- napi_gro_receive(&rq->xdp_napi, skb);
++ if (skb) {
++ if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC))
++ netif_receive_skb(skb);
++ else
++ napi_gro_receive(&rq->xdp_napi, skb);
++ }
+ }
+ done++;
+ }
+@@ -912,19 +918,22 @@ static int veth_poll(struct napi_struct *napi, int budget)
+ xdp_set_return_frame_no_direct();
+ done = veth_xdp_rcv(rq, budget, &bq, &stats);
+
++ if (stats.xdp_redirect > 0)
++ xdp_do_flush();
++
+ if (done < budget && napi_complete_done(napi, done)) {
+ /* Write rx_notify_masked before reading ptr_ring */
+ smp_store_mb(rq->rx_notify_masked, false);
+ if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
+- rq->rx_notify_masked = true;
+- napi_schedule(&rq->xdp_napi);
++ if (napi_schedule_prep(&rq->xdp_napi)) {
++ WRITE_ONCE(rq->rx_notify_masked, true);
++ __napi_schedule(&rq->xdp_napi);
++ }
+ }
+ }
+
+ if (stats.xdp_tx > 0)
+ veth_xdp_flush(rq, &bq);
+- if (stats.xdp_redirect > 0)
+- xdp_do_flush();
+ xdp_clear_return_frame_no_direct();
+
+ return done;
+@@ -1647,10 +1656,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
+
+ nla_peer = data[VETH_INFO_PEER];
+ ifmp = nla_data(nla_peer);
+- err = rtnl_nla_parse_ifla(peer_tb,
+- nla_data(nla_peer) + sizeof(struct ifinfomsg),
+- nla_len(nla_peer) - sizeof(struct ifinfomsg),
+- NULL);
++ err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
+ if (err < 0)
+ return err;
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 4ad25a8b0870c..3eefe81719254 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -213,9 +213,15 @@ struct virtnet_info {
+ /* Packet virtio header size */
+ u8 hdr_len;
+
+- /* Work struct for refilling if we run low on memory. */
++ /* Work struct for delayed refilling if we run low on memory. */
+ struct delayed_work refill;
+
++ /* Is delayed refill enabled? */
++ bool refill_enabled;
++
++ /* The lock to synchronize the access to refill_enabled */
++ spinlock_t refill_lock;
++
+ /* Work struct for config space updates */
+ struct work_struct config_work;
+
+@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
+ return p;
+ }
+
++static void enable_delayed_refill(struct virtnet_info *vi)
++{
++ spin_lock_bh(&vi->refill_lock);
++ vi->refill_enabled = true;
++ spin_unlock_bh(&vi->refill_lock);
++}
++
++static void disable_delayed_refill(struct virtnet_info *vi)
++{
++ spin_lock_bh(&vi->refill_lock);
++ vi->refill_enabled = false;
++ spin_unlock_bh(&vi->refill_lock);
++}
++
+ static void virtqueue_napi_schedule(struct napi_struct *napi,
+ struct virtqueue *vq)
+ {
+@@ -659,8 +679,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
+ int page_off,
+ unsigned int *len)
+ {
+- struct page *page = alloc_page(GFP_ATOMIC);
++ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
++ struct page *page;
++
++ if (page_off + *len + tailroom > PAGE_SIZE)
++ return NULL;
+
++ page = alloc_page(GFP_ATOMIC);
+ if (!page)
+ return NULL;
+
+@@ -668,7 +693,6 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
+ page_off += *len;
+
+ while (--*num_buf) {
+- int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ unsigned int buflen;
+ void *buf;
+ int off;
+@@ -965,6 +989,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
+ * xdp.data_meta were adjusted
+ */
+ len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
++
++ /* recalculate headroom if xdp.data or xdp_data_meta
++ * were adjusted, note that offset should always point
++ * to the start of the reserved bytes for virtio_net
++ * header which are followed by xdp.data, that means
++ * that offset is equal to the headroom (when buf is
++ * starting at the beginning of the page, otherwise
++ * there is a base offset inside the page) but it's used
++ * with a different starting point (buf start) than
++ * xdp.data (buf start + vnet hdr size). If xdp.data or
++ * data_meta were adjusted by the xdp prog then the
++ * headroom size has changed and so has the offset, we
++ * can use data_hard_start, which points at buf start +
++ * vnet hdr size, to calculate the new headroom and use
++ * it later to compute buf start in page_to_skb()
++ */
++ headroom = xdp.data - xdp.data_hard_start - metasize;
++
+ /* We can only create skb based on xdp_page. */
+ if (unlikely(xdp_page != page)) {
+ rcu_read_unlock();
+@@ -972,15 +1014,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
+ head_skb = page_to_skb(vi, rq, xdp_page, offset,
+ len, PAGE_SIZE, false,
+ metasize,
+- VIRTIO_XDP_HEADROOM);
++ headroom);
+ return head_skb;
+ }
+ break;
+ case XDP_TX:
+ stats->xdp_tx++;
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+- if (unlikely(!xdpf))
++ if (unlikely(!xdpf)) {
++ if (unlikely(xdp_page != page))
++ put_page(xdp_page);
+ goto err_xdp;
++ }
+ err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
+ if (unlikely(!err)) {
+ xdp_return_frame_rx_napi(xdpf);
+@@ -1436,8 +1481,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
+ }
+
+ if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
+- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+- schedule_delayed_work(&vi->refill, 0);
++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
++ spin_lock(&vi->refill_lock);
++ if (vi->refill_enabled)
++ schedule_delayed_work(&vi->refill, 0);
++ spin_unlock(&vi->refill_lock);
++ }
+ }
+
+ u64_stats_update_begin(&rq->stats.syncp);
+@@ -1535,13 +1584,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
+
+ received = virtnet_receive(rq, budget, &xdp_xmit);
+
++ if (xdp_xmit & VIRTIO_XDP_REDIR)
++ xdp_do_flush();
++
+ /* Out of packets? */
+ if (received < budget)
+ virtqueue_napi_complete(napi, rq->vq, received);
+
+- if (xdp_xmit & VIRTIO_XDP_REDIR)
+- xdp_do_flush();
+-
+ if (xdp_xmit & VIRTIO_XDP_TX) {
+ sq = virtnet_xdp_get_sq(vi);
+ if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
+@@ -1555,33 +1604,65 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
+ return received;
+ }
+
++static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
++{
++ virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
++ napi_disable(&vi->rq[qp_index].napi);
++ xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
++}
++
++static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
++{
++ struct net_device *dev = vi->dev;
++ int err;
++
++ err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index,
++ vi->rq[qp_index].napi.napi_id);
++ if (err < 0)
++ return err;
++
++ err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
++ MEM_TYPE_PAGE_SHARED, NULL);
++ if (err < 0)
++ goto err_xdp_reg_mem_model;
++
++ virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
++ virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
++
++ return 0;
++
++err_xdp_reg_mem_model:
++ xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
++ return err;
++}
++
+ static int virtnet_open(struct net_device *dev)
+ {
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i, err;
+
++ enable_delayed_refill(vi);
++
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (i < vi->curr_queue_pairs)
+ /* Make sure we have some buffers: if oom use wq. */
+ if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
+ schedule_delayed_work(&vi->refill, 0);
+
+- err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id);
++ err = virtnet_enable_queue_pair(vi, i);
+ if (err < 0)
+- return err;
+-
+- err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
+- MEM_TYPE_PAGE_SHARED, NULL);
+- if (err < 0) {
+- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
+- return err;
+- }
+-
+- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+- virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
++ goto err_enable_qp;
+ }
+
+ return 0;
++
++err_enable_qp:
++ disable_delayed_refill(vi);
++ cancel_delayed_work_sync(&vi->refill);
++
++ for (i--; i >= 0; i--)
++ virtnet_disable_queue_pair(vi, i);
++ return err;
+ }
+
+ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
+@@ -1733,8 +1814,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
+ */
+ if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
+ netif_stop_subqueue(dev, qnum);
+- if (!use_napi &&
+- unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
++ if (use_napi) {
++ if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
++ virtqueue_napi_schedule(&sq->napi, sq->vq);
++ } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+ /* More just got used, free them then recheck. */
+ free_old_xmit_skbs(sq, false);
+ if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+@@ -1940,14 +2023,13 @@ static int virtnet_close(struct net_device *dev)
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i;
+
++ /* Make sure NAPI doesn't schedule refill work */
++ disable_delayed_refill(vi);
+ /* Make sure refill_work doesn't re-enable napi! */
+ cancel_delayed_work_sync(&vi->refill);
+
+- for (i = 0; i < vi->max_queue_pairs; i++) {
+- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
+- napi_disable(&vi->rq[i].napi);
+- virtnet_napi_tx_disable(&vi->sq[i].napi);
+- }
++ for (i = 0; i < vi->max_queue_pairs; i++)
++ virtnet_disable_queue_pair(vi, i);
+
+ return 0;
+ }
+@@ -2413,7 +2495,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
+ static void virtnet_freeze_down(struct virtio_device *vdev)
+ {
+ struct virtnet_info *vi = vdev->priv;
+- int i;
+
+ /* Make sure no work handler is accessing the device */
+ flush_work(&vi->config_work);
+@@ -2421,14 +2502,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
+ netif_tx_lock_bh(vi->dev);
+ netif_device_detach(vi->dev);
+ netif_tx_unlock_bh(vi->dev);
+- cancel_delayed_work_sync(&vi->refill);
+-
+- if (netif_running(vi->dev)) {
+- for (i = 0; i < vi->max_queue_pairs; i++) {
+- napi_disable(&vi->rq[i].napi);
+- virtnet_napi_tx_disable(&vi->sq[i].napi);
+- }
+- }
++ if (netif_running(vi->dev))
++ virtnet_close(vi->dev);
+ }
+
+ static int init_vqs(struct virtnet_info *vi);
+@@ -2436,7 +2511,7 @@ static int init_vqs(struct virtnet_info *vi);
+ static int virtnet_restore_up(struct virtio_device *vdev)
+ {
+ struct virtnet_info *vi = vdev->priv;
+- int err, i;
++ int err;
+
+ err = init_vqs(vi);
+ if (err)
+@@ -2444,16 +2519,12 @@ static int virtnet_restore_up(struct virtio_device *vdev)
+
+ virtio_device_ready(vdev);
+
+- if (netif_running(vi->dev)) {
+- for (i = 0; i < vi->curr_queue_pairs; i++)
+- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
+- schedule_delayed_work(&vi->refill, 0);
++ enable_delayed_refill(vi);
+
+- for (i = 0; i < vi->max_queue_pairs; i++) {
+- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+- &vi->sq[i].napi);
+- }
++ if (netif_running(vi->dev)) {
++ err = virtnet_open(vi->dev);
++ if (err)
++ return err;
+ }
+
+ netif_tx_lock_bh(vi->dev);
+@@ -2770,6 +2841,27 @@ static void free_receive_page_frags(struct virtnet_info *vi)
+ put_page(vi->rq[i].alloc_frag.page);
+ }
+
++static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
++{
++ if (!is_xdp_frame(buf))
++ dev_kfree_skb(buf);
++ else
++ xdp_return_frame(ptr_to_xdp(buf));
++}
++
++static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
++{
++ struct virtnet_info *vi = vq->vdev->priv;
++ int i = vq2rxq(vq);
++
++ if (vi->mergeable_rx_bufs)
++ put_page(virt_to_head_page(buf));
++ else if (vi->big_packets)
++ give_pages(&vi->rq[i], buf);
++ else
++ put_page(virt_to_head_page(buf));
++}
++
+ static void free_unused_bufs(struct virtnet_info *vi)
+ {
+ void *buf;
+@@ -2777,26 +2869,16 @@ static void free_unused_bufs(struct virtnet_info *vi)
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ struct virtqueue *vq = vi->sq[i].vq;
+- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
+- if (!is_xdp_frame(buf))
+- dev_kfree_skb(buf);
+- else
+- xdp_return_frame(ptr_to_xdp(buf));
+- }
++ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
++ virtnet_sq_free_unused_buf(vq, buf);
++ cond_resched();
+ }
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ struct virtqueue *vq = vi->rq[i].vq;
+-
+- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
+- if (vi->mergeable_rx_bufs) {
+- put_page(virt_to_head_page(buf));
+- } else if (vi->big_packets) {
+- give_pages(&vi->rq[i], buf);
+- } else {
+- put_page(virt_to_head_page(buf));
+- }
+- }
++ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
++ virtnet_rq_free_unused_buf(vq, buf);
++ cond_resched();
+ }
+ }
+
+@@ -3157,6 +3239,7 @@ static int virtnet_probe(struct virtio_device *vdev)
+ vdev->priv = vi;
+
+ INIT_WORK(&vi->config_work, virtnet_config_changed_work);
++ spin_lock_init(&vi->refill_lock);
+
+ /* If we can receive ANY GSO packets, we must allocate large ones. */
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+@@ -3236,22 +3319,28 @@ static int virtnet_probe(struct virtio_device *vdev)
+ }
+ }
+
+- err = register_netdev(dev);
++ /* serialize netdev register + virtio_device_ready() with ndo_open() */
++ rtnl_lock();
++
++ err = register_netdevice(dev);
+ if (err) {
+ pr_debug("virtio_net: registering device failed\n");
++ rtnl_unlock();
+ goto free_failover;
+ }
+
+ virtio_device_ready(vdev);
+
++ _virtnet_set_queues(vi, vi->curr_queue_pairs);
++
++ rtnl_unlock();
++
+ err = virtnet_cpu_notif_add(vi);
+ if (err) {
+ pr_debug("virtio_net: registering cpu notifier failed\n");
+ goto free_unregister_netdev;
+ }
+
+- virtnet_set_queues(vi, vi->curr_queue_pairs);
+-
+ /* Assume link up if device can't report link status,
+ otherwise get link status from config. */
+ netif_carrier_off(dev);
+diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
+index 8799854bacb29..b88092a6bc851 100644
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -75,8 +75,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
+
+ for (i = 0; i < adapter->intr.num_intrs; i++)
+ vmxnet3_enable_intr(adapter, i);
+- adapter->shared->devRead.intrConf.intrCtrl &=
++ if (!VMXNET3_VERSION_GE_6(adapter) ||
++ !adapter->queuesExtEnabled) {
++ adapter->shared->devRead.intrConf.intrCtrl &=
++ cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
++ } else {
++ adapter->shared->devReadExt.intrConfExt.intrCtrl &=
+ cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
++ }
+ }
+
+
+@@ -85,8 +91,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
+ {
+ int i;
+
+- adapter->shared->devRead.intrConf.intrCtrl |=
++ if (!VMXNET3_VERSION_GE_6(adapter) ||
++ !adapter->queuesExtEnabled) {
++ adapter->shared->devRead.intrConf.intrCtrl |=
+ cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
++ } else {
++ adapter->shared->devReadExt.intrConfExt.intrCtrl |=
++ cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
++ }
+ for (i = 0; i < adapter->intr.num_intrs; i++)
+ vmxnet3_disable_intr(adapter, i);
+ }
+@@ -589,6 +601,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
+ if (dma_mapping_error(&adapter->pdev->dev,
+ rbi->dma_addr)) {
+ dev_kfree_skb_any(rbi->skb);
++ rbi->skb = NULL;
+ rq->stats.rx_buf_alloc_failure++;
+ break;
+ }
+@@ -613,6 +626,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
+ if (dma_mapping_error(&adapter->pdev->dev,
+ rbi->dma_addr)) {
+ put_page(rbi->page);
++ rbi->page = NULL;
+ rq->stats.rx_buf_alloc_failure++;
+ break;
+ }
+@@ -1228,6 +1242,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
+ (le32_to_cpu(gdesc->dword[3]) &
+ VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ if ((le32_to_cpu(gdesc->dword[0]) &
++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
++ skb->csum_level = 1;
++ }
+ WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+ !(le32_to_cpu(gdesc->dword[0]) &
+ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+@@ -1237,6 +1255,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
+ } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
+ (1 << VMXNET3_RCD_TUC_SHIFT))) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ if ((le32_to_cpu(gdesc->dword[0]) &
++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
++ skb->csum_level = 1;
++ }
+ WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+ !(le32_to_cpu(gdesc->dword[0]) &
+ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+@@ -1348,6 +1370,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
+ };
+ u32 num_pkts = 0;
+ bool skip_page_frags = false;
++ bool encap_lro = false;
+ struct Vmxnet3_RxCompDesc *rcd;
+ struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
+ u16 segCnt = 0, mss = 0;
+@@ -1506,13 +1529,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
+ if (VMXNET3_VERSION_GE_2(adapter) &&
+ rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
+ struct Vmxnet3_RxCompDescExt *rcdlro;
++ union Vmxnet3_GenericDesc *gdesc;
++
+ rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
++ gdesc = (union Vmxnet3_GenericDesc *)rcd;
+
+ segCnt = rcdlro->segCnt;
+ WARN_ON_ONCE(segCnt == 0);
+ mss = rcdlro->mss;
+ if (unlikely(segCnt <= 1))
+ segCnt = 0;
++ encap_lro = (le32_to_cpu(gdesc->dword[0]) &
++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT));
+ } else {
+ segCnt = 0;
+ }
+@@ -1580,7 +1608,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
+ vmxnet3_rx_csum(adapter, skb,
+ (union Vmxnet3_GenericDesc *)rcd);
+ skb->protocol = eth_type_trans(skb, adapter->netdev);
+- if (!rcd->tcp ||
++ if ((!rcd->tcp && !encap_lro) ||
+ !(adapter->netdev->features & NETIF_F_LRO))
+ goto not_lro;
+
+@@ -1589,7 +1617,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
+ SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_size = mss;
+ skb_shinfo(skb)->gso_segs = segCnt;
+- } else if (segCnt != 0 || skb->len > mtu) {
++ } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) {
+ u32 hlen;
+
+ hlen = vmxnet3_get_hdr_len(adapter, skb,
+@@ -1618,6 +1646,7 @@ not_lro:
+ napi_gro_receive(&rq->napi, skb);
+
+ ctx->skb = NULL;
++ encap_lro = false;
+ num_pkts++;
+ }
+
+@@ -1666,6 +1695,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
+ u32 i, ring_idx;
+ struct Vmxnet3_RxDesc *rxd;
+
++ /* ring has already been cleaned up */
++ if (!rq->rx_ring[0].base)
++ return;
++
+ for (ring_idx = 0; ring_idx < 2; ring_idx++) {
+ for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
+ #ifdef __BIG_ENDIAN_BITFIELD
+@@ -3261,7 +3294,7 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
+
+ #ifdef CONFIG_PCI_MSI
+ if (adapter->intr.type == VMXNET3_IT_MSIX) {
+- int i, nvec;
++ int i, nvec, nvec_allocated;
+
+ nvec = adapter->share_intr == VMXNET3_INTR_TXSHARE ?
+ 1 : adapter->num_tx_queues;
+@@ -3274,14 +3307,15 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
+ for (i = 0; i < nvec; i++)
+ adapter->intr.msix_entries[i].entry = i;
+
+- nvec = vmxnet3_acquire_msix_vectors(adapter, nvec);
+- if (nvec < 0)
++ nvec_allocated = vmxnet3_acquire_msix_vectors(adapter, nvec);
++ if (nvec_allocated < 0)
+ goto msix_err;
+
+ /* If we cannot allocate one MSIx vector per queue
+ * then limit the number of rx queues to 1
+ */
+- if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) {
++ if (nvec_allocated == VMXNET3_LINUX_MIN_MSIX_VECT &&
++ nvec != VMXNET3_LINUX_MIN_MSIX_VECT) {
+ if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
+ || adapter->num_rx_queues != 1) {
+ adapter->share_intr = VMXNET3_INTR_TXSHARE;
+@@ -3291,14 +3325,14 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
+ }
+ }
+
+- adapter->intr.num_intrs = nvec;
++ adapter->intr.num_intrs = nvec_allocated;
+ return;
+
+ msix_err:
+ /* If we cannot allocate MSIx vectors use only one rx queue */
+ dev_info(&adapter->pdev->dev,
+ "Failed to enable MSI-X, error %d. "
+- "Limiting #rx queues to 1, try MSI.\n", nvec);
++ "Limiting #rx queues to 1, try MSI.\n", nvec_allocated);
+
+ adapter->intr.type = VMXNET3_IT_MSI;
+ }
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index 662e261173539..091dd7caf10cc 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -35,6 +35,7 @@
+ #include <net/l3mdev.h>
+ #include <net/fib_rules.h>
+ #include <net/netns/generic.h>
++#include <net/netfilter/nf_conntrack.h>
+
+ #define DRV_NAME "vrf"
+ #define DRV_VERSION "1.1"
+@@ -424,12 +425,26 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
+ return NETDEV_TX_OK;
+ }
+
++static void vrf_nf_set_untracked(struct sk_buff *skb)
++{
++ if (skb_get_nfct(skb) == 0)
++ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
++}
++
++static void vrf_nf_reset_ct(struct sk_buff *skb)
++{
++ if (skb_get_nfct(skb) == IP_CT_UNTRACKED)
++ nf_reset_ct(skb);
++}
++
+ #if IS_ENABLED(CONFIG_IPV6)
+ static int vrf_ip6_local_out(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+ {
+ int err;
+
++ vrf_nf_reset_ct(skb);
++
+ err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
+ sk, skb, NULL, skb_dst(skb)->dev, dst_output);
+
+@@ -482,6 +497,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
+ /* strip the ethernet header added for pass through VRF device */
+ __skb_pull(skb, skb_network_offset(skb));
+
++ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ ret = vrf_ip6_local_out(net, skb->sk, skb);
+ if (unlikely(net_xmit_eval(ret)))
+ dev->stats.tx_errors++;
+@@ -508,6 +524,8 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk,
+ {
+ int err;
+
++ vrf_nf_reset_ct(skb);
++
+ err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, skb_dst(skb)->dev, dst_output);
+ if (likely(err == 1))
+@@ -562,6 +580,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
+ RT_SCOPE_LINK);
+ }
+
++ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+ if (unlikely(net_xmit_eval(ret)))
+ vrf_dev->stats.tx_errors++;
+@@ -626,8 +645,7 @@ static void vrf_finish_direct(struct sk_buff *skb)
+ skb_pull(skb, ETH_HLEN);
+ }
+
+- /* reset skb device */
+- nf_reset_ct(skb);
++ vrf_nf_reset_ct(skb);
+ }
+
+ #if IS_ENABLED(CONFIG_IPV6)
+@@ -641,7 +659,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
+ struct neighbour *neigh;
+ int ret;
+
+- nf_reset_ct(skb);
++ vrf_nf_reset_ct(skb);
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+@@ -772,6 +790,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+ if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+ return skb;
+
++ vrf_nf_set_untracked(skb);
++
+ if (qdisc_tx_is_default(vrf_dev) ||
+ IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
+ return vrf_ip6_out_direct(vrf_dev, sk, skb);
+@@ -858,7 +878,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
+ struct neighbour *neigh;
+ bool is_v6gw = false;
+
+- nf_reset_ct(skb);
++ vrf_nf_reset_ct(skb);
+
+ /* Be paranoid, rather than too clever. */
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+@@ -1001,6 +1021,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+ ipv4_is_lbcast(ip_hdr(skb)->daddr))
+ return skb;
+
++ vrf_nf_set_untracked(skb);
++
+ if (qdisc_tx_is_default(vrf_dev) ||
+ IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
+ return vrf_ip_out_direct(vrf_dev, sk, skb);
+@@ -1243,6 +1265,7 @@ static int vrf_prepare_mac_header(struct sk_buff *skb,
+ eth = (struct ethhdr *)skb->data;
+
+ skb_reset_mac_header(skb);
++ skb_reset_mac_len(skb);
+
+ /* we set the ethernet destination and the source addresses to the
+ * address of the VRF device.
+@@ -1272,9 +1295,9 @@ static int vrf_prepare_mac_header(struct sk_buff *skb,
+ */
+ static int vrf_add_mac_header_if_unset(struct sk_buff *skb,
+ struct net_device *vrf_dev,
+- u16 proto)
++ u16 proto, struct net_device *orig_dev)
+ {
+- if (skb_mac_header_was_set(skb))
++ if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev))
+ return 0;
+
+ return vrf_prepare_mac_header(skb, vrf_dev, proto);
+@@ -1380,6 +1403,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+
+ /* if packet is NDISC then keep the ingress interface */
+ if (!is_ndisc) {
++ struct net_device *orig_dev = skb->dev;
++
+ vrf_rx_stats(vrf_dev, skb->len);
+ skb->dev = vrf_dev;
+ skb->skb_iif = vrf_dev->ifindex;
+@@ -1388,7 +1413,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+ int err;
+
+ err = vrf_add_mac_header_if_unset(skb, vrf_dev,
+- ETH_P_IPV6);
++ ETH_P_IPV6,
++ orig_dev);
+ if (likely(!err)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+@@ -1418,6 +1444,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
+ struct sk_buff *skb)
+ {
++ struct net_device *orig_dev = skb->dev;
++
+ skb->dev = vrf_dev;
+ skb->skb_iif = vrf_dev->ifindex;
+ IPCB(skb)->flags |= IPSKB_L3SLAVE;
+@@ -1438,7 +1466,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
+ if (!list_empty(&vrf_dev->ptype_all)) {
+ int err;
+
+- err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP);
++ err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP,
++ orig_dev);
+ if (likely(!err)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
+deleted file mode 100644
+index 141635a35c28a..0000000000000
+--- a/drivers/net/vxlan.c
++++ /dev/null
+@@ -1,4834 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * VXLAN: Virtual eXtensible Local Area Network
+- *
+- * Copyright (c) 2012-2013 Vyatta Inc.
+- */
+-
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/errno.h>
+-#include <linux/slab.h>
+-#include <linux/udp.h>
+-#include <linux/igmp.h>
+-#include <linux/if_ether.h>
+-#include <linux/ethtool.h>
+-#include <net/arp.h>
+-#include <net/ndisc.h>
+-#include <net/ipv6_stubs.h>
+-#include <net/ip.h>
+-#include <net/icmp.h>
+-#include <net/rtnetlink.h>
+-#include <net/inet_ecn.h>
+-#include <net/net_namespace.h>
+-#include <net/netns/generic.h>
+-#include <net/tun_proto.h>
+-#include <net/vxlan.h>
+-#include <net/nexthop.h>
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+-#include <net/ip6_tunnel.h>
+-#include <net/ip6_checksum.h>
+-#endif
+-
+-#define VXLAN_VERSION "0.1"
+-
+-#define PORT_HASH_BITS 8
+-#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
+-#define FDB_AGE_DEFAULT 300 /* 5 min */
+-#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */
+-
+-/* UDP port for VXLAN traffic.
+- * The IANA assigned port is 4789, but the Linux default is 8472
+- * for compatibility with early adopters.
+- */
+-static unsigned short vxlan_port __read_mostly = 8472;
+-module_param_named(udp_port, vxlan_port, ushort, 0444);
+-MODULE_PARM_DESC(udp_port, "Destination UDP port");
+-
+-static bool log_ecn_error = true;
+-module_param(log_ecn_error, bool, 0644);
+-MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+-
+-static unsigned int vxlan_net_id;
+-static struct rtnl_link_ops vxlan_link_ops;
+-
+-static const u8 all_zeros_mac[ETH_ALEN + 2];
+-
+-static int vxlan_sock_add(struct vxlan_dev *vxlan);
+-
+-static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
+-
+-/* per-network namespace private data for this module */
+-struct vxlan_net {
+- struct list_head vxlan_list;
+- struct hlist_head sock_list[PORT_HASH_SIZE];
+- spinlock_t sock_lock;
+- struct notifier_block nexthop_notifier_block;
+-};
+-
+-/* Forwarding table entry */
+-struct vxlan_fdb {
+- struct hlist_node hlist; /* linked list of entries */
+- struct rcu_head rcu;
+- unsigned long updated; /* jiffies */
+- unsigned long used;
+- struct list_head remotes;
+- u8 eth_addr[ETH_ALEN];
+- u16 state; /* see ndm_state */
+- __be32 vni;
+- u16 flags; /* see ndm_flags and below */
+- struct list_head nh_list;
+- struct nexthop __rcu *nh;
+- struct vxlan_dev __rcu *vdev;
+-};
+-
+-#define NTF_VXLAN_ADDED_BY_USER 0x100
+-
+-/* salt for hash table */
+-static u32 vxlan_salt __read_mostly;
+-
+-static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
+-{
+- return vs->flags & VXLAN_F_COLLECT_METADATA ||
+- ip_tunnel_collect_metadata();
+-}
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+-static inline
+-bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
+-{
+- if (a->sa.sa_family != b->sa.sa_family)
+- return false;
+- if (a->sa.sa_family == AF_INET6)
+- return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
+- else
+- return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+-}
+-
+-static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
+-{
+- if (nla_len(nla) >= sizeof(struct in6_addr)) {
+- ip->sin6.sin6_addr = nla_get_in6_addr(nla);
+- ip->sa.sa_family = AF_INET6;
+- return 0;
+- } else if (nla_len(nla) >= sizeof(__be32)) {
+- ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+- ip->sa.sa_family = AF_INET;
+- return 0;
+- } else {
+- return -EAFNOSUPPORT;
+- }
+-}
+-
+-static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+- const union vxlan_addr *ip)
+-{
+- if (ip->sa.sa_family == AF_INET6)
+- return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
+- else
+- return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+-}
+-
+-#else /* !CONFIG_IPV6 */
+-
+-static inline
+-bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
+-{
+- return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+-}
+-
+-static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
+-{
+- if (nla_len(nla) >= sizeof(struct in6_addr)) {
+- return -EAFNOSUPPORT;
+- } else if (nla_len(nla) >= sizeof(__be32)) {
+- ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+- ip->sa.sa_family = AF_INET;
+- return 0;
+- } else {
+- return -EAFNOSUPPORT;
+- }
+-}
+-
+-static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+- const union vxlan_addr *ip)
+-{
+- return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+-}
+-#endif
+-
+-/* Virtual Network hash table head */
+-static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
+-{
+- return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
+-}
+-
+-/* Socket hash table head */
+-static inline struct hlist_head *vs_head(struct net *net, __be16 port)
+-{
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+-
+- return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+-}
+-
+-/* First remote destination for a forwarding entry.
+- * Guaranteed to be non-NULL because remotes are never deleted.
+- */
+-static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
+-{
+- if (rcu_access_pointer(fdb->nh))
+- return NULL;
+- return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
+-}
+-
+-static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
+-{
+- if (rcu_access_pointer(fdb->nh))
+- return NULL;
+- return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
+-}
+-
+-/* Find VXLAN socket based on network namespace, address family, UDP port,
+- * enabled unshareable flags and socket device binding (see l3mdev with
+- * non-default VRF).
+- */
+-static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
+- __be16 port, u32 flags, int ifindex)
+-{
+- struct vxlan_sock *vs;
+-
+- flags &= VXLAN_F_RCV_FLAGS;
+-
+- hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
+- if (inet_sk(vs->sock->sk)->inet_sport == port &&
+- vxlan_get_sk_family(vs) == family &&
+- vs->flags == flags &&
+- vs->sock->sk->sk_bound_dev_if == ifindex)
+- return vs;
+- }
+- return NULL;
+-}
+-
+-static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
+- __be32 vni)
+-{
+- struct vxlan_dev_node *node;
+-
+- /* For flow based devices, map all packets to VNI 0 */
+- if (vs->flags & VXLAN_F_COLLECT_METADATA)
+- vni = 0;
+-
+- hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
+- if (node->vxlan->default_dst.remote_vni != vni)
+- continue;
+-
+- if (IS_ENABLED(CONFIG_IPV6)) {
+- const struct vxlan_config *cfg = &node->vxlan->cfg;
+-
+- if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
+- cfg->remote_ifindex != ifindex)
+- continue;
+- }
+-
+- return node->vxlan;
+- }
+-
+- return NULL;
+-}
+-
+-/* Look up VNI in a per net namespace table */
+-static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
+- __be32 vni, sa_family_t family,
+- __be16 port, u32 flags)
+-{
+- struct vxlan_sock *vs;
+-
+- vs = vxlan_find_sock(net, family, port, flags, ifindex);
+- if (!vs)
+- return NULL;
+-
+- return vxlan_vs_find_vni(vs, ifindex, vni);
+-}
+-
+-/* Fill in neighbour message in skbuff. */
+-static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
+- const struct vxlan_fdb *fdb,
+- u32 portid, u32 seq, int type, unsigned int flags,
+- const struct vxlan_rdst *rdst)
+-{
+- unsigned long now = jiffies;
+- struct nda_cacheinfo ci;
+- bool send_ip, send_eth;
+- struct nlmsghdr *nlh;
+- struct nexthop *nh;
+- struct ndmsg *ndm;
+- int nh_family;
+- u32 nh_id;
+-
+- nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+- if (nlh == NULL)
+- return -EMSGSIZE;
+-
+- ndm = nlmsg_data(nlh);
+- memset(ndm, 0, sizeof(*ndm));
+-
+- send_eth = send_ip = true;
+-
+- rcu_read_lock();
+- nh = rcu_dereference(fdb->nh);
+- if (nh) {
+- nh_family = nexthop_get_family(nh);
+- nh_id = nh->id;
+- }
+- rcu_read_unlock();
+-
+- if (type == RTM_GETNEIGH) {
+- if (rdst) {
+- send_ip = !vxlan_addr_any(&rdst->remote_ip);
+- ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
+- } else if (nh) {
+- ndm->ndm_family = nh_family;
+- }
+- send_eth = !is_zero_ether_addr(fdb->eth_addr);
+- } else
+- ndm->ndm_family = AF_BRIDGE;
+- ndm->ndm_state = fdb->state;
+- ndm->ndm_ifindex = vxlan->dev->ifindex;
+- ndm->ndm_flags = fdb->flags;
+- if (rdst && rdst->offloaded)
+- ndm->ndm_flags |= NTF_OFFLOADED;
+- ndm->ndm_type = RTN_UNICAST;
+-
+- if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
+- nla_put_s32(skb, NDA_LINK_NETNSID,
+- peernet2id(dev_net(vxlan->dev), vxlan->net)))
+- goto nla_put_failure;
+-
+- if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
+- goto nla_put_failure;
+- if (nh) {
+- if (nla_put_u32(skb, NDA_NH_ID, nh_id))
+- goto nla_put_failure;
+- } else if (rdst) {
+- if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
+- &rdst->remote_ip))
+- goto nla_put_failure;
+-
+- if (rdst->remote_port &&
+- rdst->remote_port != vxlan->cfg.dst_port &&
+- nla_put_be16(skb, NDA_PORT, rdst->remote_port))
+- goto nla_put_failure;
+- if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
+- nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
+- goto nla_put_failure;
+- if (rdst->remote_ifindex &&
+- nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
+- goto nla_put_failure;
+- }
+-
+- if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
+- nla_put_u32(skb, NDA_SRC_VNI,
+- be32_to_cpu(fdb->vni)))
+- goto nla_put_failure;
+-
+- ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
+- ci.ndm_confirmed = 0;
+- ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
+- ci.ndm_refcnt = 0;
+-
+- if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
+- goto nla_put_failure;
+-
+- nlmsg_end(skb, nlh);
+- return 0;
+-
+-nla_put_failure:
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+-}
+-
+-static inline size_t vxlan_nlmsg_size(void)
+-{
+- return NLMSG_ALIGN(sizeof(struct ndmsg))
+- + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+- + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
+- + nla_total_size(sizeof(__be16)) /* NDA_PORT */
+- + nla_total_size(sizeof(__be32)) /* NDA_VNI */
+- + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
+- + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
+- + nla_total_size(sizeof(struct nda_cacheinfo));
+-}
+-
+-static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+- struct vxlan_rdst *rd, int type)
+-{
+- struct net *net = dev_net(vxlan->dev);
+- struct sk_buff *skb;
+- int err = -ENOBUFS;
+-
+- skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
+- if (skb == NULL)
+- goto errout;
+-
+- err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, rd);
+- if (err < 0) {
+- /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
+- WARN_ON(err == -EMSGSIZE);
+- kfree_skb(skb);
+- goto errout;
+- }
+-
+- rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+- return;
+-errout:
+- if (err < 0)
+- rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+-}
+-
+-static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
+- const struct vxlan_fdb *fdb,
+- const struct vxlan_rdst *rd,
+- struct netlink_ext_ack *extack,
+- struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+-{
+- fdb_info->info.dev = vxlan->dev;
+- fdb_info->info.extack = extack;
+- fdb_info->remote_ip = rd->remote_ip;
+- fdb_info->remote_port = rd->remote_port;
+- fdb_info->remote_vni = rd->remote_vni;
+- fdb_info->remote_ifindex = rd->remote_ifindex;
+- memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN);
+- fdb_info->vni = fdb->vni;
+- fdb_info->offloaded = rd->offloaded;
+- fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
+-}
+-
+-static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
+- struct vxlan_fdb *fdb,
+- struct vxlan_rdst *rd,
+- bool adding,
+- struct netlink_ext_ack *extack)
+-{
+- struct switchdev_notifier_vxlan_fdb_info info;
+- enum switchdev_notifier_type notifier_type;
+- int ret;
+-
+- if (WARN_ON(!rd))
+- return 0;
+-
+- notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
+- : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
+- vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
+- ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
+- &info.info, extack);
+- return notifier_to_errno(ret);
+-}
+-
+-static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+- struct vxlan_rdst *rd, int type, bool swdev_notify,
+- struct netlink_ext_ack *extack)
+-{
+- int err;
+-
+- if (swdev_notify && rd) {
+- switch (type) {
+- case RTM_NEWNEIGH:
+- err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
+- true, extack);
+- if (err)
+- return err;
+- break;
+- case RTM_DELNEIGH:
+- vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
+- false, extack);
+- break;
+- }
+- }
+-
+- __vxlan_fdb_notify(vxlan, fdb, rd, type);
+- return 0;
+-}
+-
+-static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_fdb f = {
+- .state = NUD_STALE,
+- };
+- struct vxlan_rdst remote = {
+- .remote_ip = *ipa, /* goes to NDA_DST */
+- .remote_vni = cpu_to_be32(VXLAN_N_VID),
+- };
+-
+- vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
+-}
+-
+-static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
+-{
+- struct vxlan_fdb f = {
+- .state = NUD_STALE,
+- };
+- struct vxlan_rdst remote = { };
+-
+- memcpy(f.eth_addr, eth_addr, ETH_ALEN);
+-
+- vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
+-}
+-
+-/* Hash Ethernet address */
+-static u32 eth_hash(const unsigned char *addr)
+-{
+- u64 value = get_unaligned((u64 *)addr);
+-
+- /* only want 6 bytes */
+-#ifdef __BIG_ENDIAN
+- value >>= 16;
+-#else
+- value <<= 16;
+-#endif
+- return hash_64(value, FDB_HASH_BITS);
+-}
+-
+-static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
+-{
+- /* use 1 byte of OUI and 3 bytes of NIC */
+- u32 key = get_unaligned((u32 *)(addr + 2));
+-
+- return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
+-}
+-
+-static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
+-{
+- if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
+- return eth_vni_hash(mac, vni);
+- else
+- return eth_hash(mac);
+-}
+-
+-/* Hash chain to use given mac address */
+-static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
+- const u8 *mac, __be32 vni)
+-{
+- return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
+-}
+-
+-/* Look up Ethernet address in forwarding table */
+-static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
+- const u8 *mac, __be32 vni)
+-{
+- struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
+- struct vxlan_fdb *f;
+-
+- hlist_for_each_entry_rcu(f, head, hlist) {
+- if (ether_addr_equal(mac, f->eth_addr)) {
+- if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
+- if (vni == f->vni)
+- return f;
+- } else {
+- return f;
+- }
+- }
+- }
+-
+- return NULL;
+-}
+-
+-static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
+- const u8 *mac, __be32 vni)
+-{
+- struct vxlan_fdb *f;
+-
+- f = __vxlan_find_mac(vxlan, mac, vni);
+- if (f && f->used != jiffies)
+- f->used = jiffies;
+-
+- return f;
+-}
+-
+-/* caller should hold vxlan->hash_lock */
+-static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
+- union vxlan_addr *ip, __be16 port,
+- __be32 vni, __u32 ifindex)
+-{
+- struct vxlan_rdst *rd;
+-
+- list_for_each_entry(rd, &f->remotes, list) {
+- if (vxlan_addr_equal(&rd->remote_ip, ip) &&
+- rd->remote_port == port &&
+- rd->remote_vni == vni &&
+- rd->remote_ifindex == ifindex)
+- return rd;
+- }
+-
+- return NULL;
+-}
+-
+-int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+- struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- u8 eth_addr[ETH_ALEN + 2] = { 0 };
+- struct vxlan_rdst *rdst;
+- struct vxlan_fdb *f;
+- int rc = 0;
+-
+- if (is_multicast_ether_addr(mac) ||
+- is_zero_ether_addr(mac))
+- return -EINVAL;
+-
+- ether_addr_copy(eth_addr, mac);
+-
+- rcu_read_lock();
+-
+- f = __vxlan_find_mac(vxlan, eth_addr, vni);
+- if (!f) {
+- rc = -ENOENT;
+- goto out;
+- }
+-
+- rdst = first_remote_rcu(f);
+- vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
+-
+-out:
+- rcu_read_unlock();
+- return rc;
+-}
+-EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
+-
+-static int vxlan_fdb_notify_one(struct notifier_block *nb,
+- const struct vxlan_dev *vxlan,
+- const struct vxlan_fdb *f,
+- const struct vxlan_rdst *rdst,
+- struct netlink_ext_ack *extack)
+-{
+- struct switchdev_notifier_vxlan_fdb_info fdb_info;
+- int rc;
+-
+- vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
+- rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
+- &fdb_info);
+- return notifier_to_errno(rc);
+-}
+-
+-int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
+- struct notifier_block *nb,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_dev *vxlan;
+- struct vxlan_rdst *rdst;
+- struct vxlan_fdb *f;
+- unsigned int h;
+- int rc = 0;
+-
+- if (!netif_is_vxlan(dev))
+- return -EINVAL;
+- vxlan = netdev_priv(dev);
+-
+- for (h = 0; h < FDB_HASH_SIZE; ++h) {
+- spin_lock_bh(&vxlan->hash_lock[h]);
+- hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
+- if (f->vni == vni) {
+- list_for_each_entry(rdst, &f->remotes, list) {
+- rc = vxlan_fdb_notify_one(nb, vxlan,
+- f, rdst,
+- extack);
+- if (rc)
+- goto unlock;
+- }
+- }
+- }
+- spin_unlock_bh(&vxlan->hash_lock[h]);
+- }
+- return 0;
+-
+-unlock:
+- spin_unlock_bh(&vxlan->hash_lock[h]);
+- return rc;
+-}
+-EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
+-
+-void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
+-{
+- struct vxlan_dev *vxlan;
+- struct vxlan_rdst *rdst;
+- struct vxlan_fdb *f;
+- unsigned int h;
+-
+- if (!netif_is_vxlan(dev))
+- return;
+- vxlan = netdev_priv(dev);
+-
+- for (h = 0; h < FDB_HASH_SIZE; ++h) {
+- spin_lock_bh(&vxlan->hash_lock[h]);
+- hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
+- if (f->vni == vni)
+- list_for_each_entry(rdst, &f->remotes, list)
+- rdst->offloaded = false;
+- spin_unlock_bh(&vxlan->hash_lock[h]);
+- }
+-
+-}
+-EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
+-
+-/* Replace destination of unicast mac */
+-static int vxlan_fdb_replace(struct vxlan_fdb *f,
+- union vxlan_addr *ip, __be16 port, __be32 vni,
+- __u32 ifindex, struct vxlan_rdst *oldrd)
+-{
+- struct vxlan_rdst *rd;
+-
+- rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
+- if (rd)
+- return 0;
+-
+- rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
+- if (!rd)
+- return 0;
+-
+- *oldrd = *rd;
+- dst_cache_reset(&rd->dst_cache);
+- rd->remote_ip = *ip;
+- rd->remote_port = port;
+- rd->remote_vni = vni;
+- rd->remote_ifindex = ifindex;
+- rd->offloaded = false;
+- return 1;
+-}
+-
+-/* Add/update destinations for multicast */
+-static int vxlan_fdb_append(struct vxlan_fdb *f,
+- union vxlan_addr *ip, __be16 port, __be32 vni,
+- __u32 ifindex, struct vxlan_rdst **rdp)
+-{
+- struct vxlan_rdst *rd;
+-
+- rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
+- if (rd)
+- return 0;
+-
+- rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
+- if (rd == NULL)
+- return -ENOBUFS;
+-
+- if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
+- kfree(rd);
+- return -ENOBUFS;
+- }
+-
+- rd->remote_ip = *ip;
+- rd->remote_port = port;
+- rd->offloaded = false;
+- rd->remote_vni = vni;
+- rd->remote_ifindex = ifindex;
+-
+- list_add_tail_rcu(&rd->list, &f->remotes);
+-
+- *rdp = rd;
+- return 1;
+-}
+-
+-static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
+- unsigned int off,
+- struct vxlanhdr *vh, size_t hdrlen,
+- __be32 vni_field,
+- struct gro_remcsum *grc,
+- bool nopartial)
+-{
+- size_t start, offset;
+-
+- if (skb->remcsum_offload)
+- return vh;
+-
+- if (!NAPI_GRO_CB(skb)->csum_valid)
+- return NULL;
+-
+- start = vxlan_rco_start(vni_field);
+- offset = start + vxlan_rco_offset(vni_field);
+-
+- vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
+- start, offset, grc, nopartial);
+-
+- skb->remcsum_offload = 1;
+-
+- return vh;
+-}
+-
+-static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+- struct list_head *head,
+- struct sk_buff *skb)
+-{
+- struct sk_buff *pp = NULL;
+- struct sk_buff *p;
+- struct vxlanhdr *vh, *vh2;
+- unsigned int hlen, off_vx;
+- int flush = 1;
+- struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
+- __be32 flags;
+- struct gro_remcsum grc;
+-
+- skb_gro_remcsum_init(&grc);
+-
+- off_vx = skb_gro_offset(skb);
+- hlen = off_vx + sizeof(*vh);
+- vh = skb_gro_header_fast(skb, off_vx);
+- if (skb_gro_header_hard(skb, hlen)) {
+- vh = skb_gro_header_slow(skb, hlen, off_vx);
+- if (unlikely(!vh))
+- goto out;
+- }
+-
+- skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+-
+- flags = vh->vx_flags;
+-
+- if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
+- vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
+- vh->vx_vni, &grc,
+- !!(vs->flags &
+- VXLAN_F_REMCSUM_NOPARTIAL));
+-
+- if (!vh)
+- goto out;
+- }
+-
+- skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+-
+- list_for_each_entry(p, head, list) {
+- if (!NAPI_GRO_CB(p)->same_flow)
+- continue;
+-
+- vh2 = (struct vxlanhdr *)(p->data + off_vx);
+- if (vh->vx_flags != vh2->vx_flags ||
+- vh->vx_vni != vh2->vx_vni) {
+- NAPI_GRO_CB(p)->same_flow = 0;
+- continue;
+- }
+- }
+-
+- pp = call_gro_receive(eth_gro_receive, head, skb);
+- flush = 0;
+-
+-out:
+- skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
+-
+- return pp;
+-}
+-
+-static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
+-{
+- /* Sets 'skb->inner_mac_header' since we are always called with
+- * 'skb->encapsulation' set.
+- */
+- return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
+-}
+-
+-static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
+- __u16 state, __be32 src_vni,
+- __u16 ndm_flags)
+-{
+- struct vxlan_fdb *f;
+-
+- f = kmalloc(sizeof(*f), GFP_ATOMIC);
+- if (!f)
+- return NULL;
+- f->state = state;
+- f->flags = ndm_flags;
+- f->updated = f->used = jiffies;
+- f->vni = src_vni;
+- f->nh = NULL;
+- RCU_INIT_POINTER(f->vdev, vxlan);
+- INIT_LIST_HEAD(&f->nh_list);
+- INIT_LIST_HEAD(&f->remotes);
+- memcpy(f->eth_addr, mac, ETH_ALEN);
+-
+- return f;
+-}
+-
+-static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
+- __be32 src_vni, struct vxlan_fdb *f)
+-{
+- ++vxlan->addrcnt;
+- hlist_add_head_rcu(&f->hlist,
+- vxlan_fdb_head(vxlan, mac, src_vni));
+-}
+-
+-static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+- u32 nhid, struct netlink_ext_ack *extack)
+-{
+- struct nexthop *old_nh = rtnl_dereference(fdb->nh);
+- struct nexthop *nh;
+- int err = -EINVAL;
+-
+- if (old_nh && old_nh->id == nhid)
+- return 0;
+-
+- nh = nexthop_find_by_id(vxlan->net, nhid);
+- if (!nh) {
+- NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+- goto err_inval;
+- }
+-
+- if (nh) {
+- if (!nexthop_get(nh)) {
+- NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+- nh = NULL;
+- goto err_inval;
+- }
+- if (!nexthop_is_fdb(nh)) {
+- NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
+- goto err_inval;
+- }
+-
+- if (!nexthop_is_multipath(nh)) {
+- NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
+- goto err_inval;
+- }
+-
+- /* check nexthop group family */
+- switch (vxlan->default_dst.remote_ip.sa.sa_family) {
+- case AF_INET:
+- if (!nexthop_has_v4(nh)) {
+- err = -EAFNOSUPPORT;
+- NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+- goto err_inval;
+- }
+- break;
+- case AF_INET6:
+- if (nexthop_has_v4(nh)) {
+- err = -EAFNOSUPPORT;
+- NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+- goto err_inval;
+- }
+- }
+- }
+-
+- if (old_nh) {
+- list_del_rcu(&fdb->nh_list);
+- nexthop_put(old_nh);
+- }
+- rcu_assign_pointer(fdb->nh, nh);
+- list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
+- return 1;
+-
+-err_inval:
+- if (nh)
+- nexthop_put(nh);
+- return err;
+-}
+-
+-static int vxlan_fdb_create(struct vxlan_dev *vxlan,
+- const u8 *mac, union vxlan_addr *ip,
+- __u16 state, __be16 port, __be32 src_vni,
+- __be32 vni, __u32 ifindex, __u16 ndm_flags,
+- u32 nhid, struct vxlan_fdb **fdb,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_rdst *rd = NULL;
+- struct vxlan_fdb *f;
+- int rc;
+-
+- if (vxlan->cfg.addrmax &&
+- vxlan->addrcnt >= vxlan->cfg.addrmax)
+- return -ENOSPC;
+-
+- netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+- f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
+- if (!f)
+- return -ENOMEM;
+-
+- if (nhid)
+- rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+- else
+- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+- if (rc < 0)
+- goto errout;
+-
+- *fdb = f;
+-
+- return 0;
+-
+-errout:
+- kfree(f);
+- return rc;
+-}
+-
+-static void __vxlan_fdb_free(struct vxlan_fdb *f)
+-{
+- struct vxlan_rdst *rd, *nd;
+- struct nexthop *nh;
+-
+- nh = rcu_dereference_raw(f->nh);
+- if (nh) {
+- rcu_assign_pointer(f->nh, NULL);
+- rcu_assign_pointer(f->vdev, NULL);
+- nexthop_put(nh);
+- }
+-
+- list_for_each_entry_safe(rd, nd, &f->remotes, list) {
+- dst_cache_destroy(&rd->dst_cache);
+- kfree(rd);
+- }
+- kfree(f);
+-}
+-
+-static void vxlan_fdb_free(struct rcu_head *head)
+-{
+- struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
+-
+- __vxlan_fdb_free(f);
+-}
+-
+-static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+- bool do_notify, bool swdev_notify)
+-{
+- struct vxlan_rdst *rd;
+-
+- netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
+-
+- --vxlan->addrcnt;
+- if (do_notify) {
+- if (rcu_access_pointer(f->nh))
+- vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
+- swdev_notify, NULL);
+- else
+- list_for_each_entry(rd, &f->remotes, list)
+- vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
+- swdev_notify, NULL);
+- }
+-
+- hlist_del_rcu(&f->hlist);
+- list_del_rcu(&f->nh_list);
+- call_rcu(&f->rcu, vxlan_fdb_free);
+-}
+-
+-static void vxlan_dst_free(struct rcu_head *head)
+-{
+- struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+-
+- dst_cache_destroy(&rd->dst_cache);
+- kfree(rd);
+-}
+-
+-static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
+- union vxlan_addr *ip,
+- __u16 state, __u16 flags,
+- __be16 port, __be32 vni,
+- __u32 ifindex, __u16 ndm_flags,
+- struct vxlan_fdb *f, u32 nhid,
+- bool swdev_notify,
+- struct netlink_ext_ack *extack)
+-{
+- __u16 fdb_flags = (ndm_flags & ~NTF_USE);
+- struct vxlan_rdst *rd = NULL;
+- struct vxlan_rdst oldrd;
+- int notify = 0;
+- int rc = 0;
+- int err;
+-
+- if (nhid && !rcu_access_pointer(f->nh)) {
+- NL_SET_ERR_MSG(extack,
+- "Cannot replace an existing non nexthop fdb with a nexthop");
+- return -EOPNOTSUPP;
+- }
+-
+- if (nhid && (flags & NLM_F_APPEND)) {
+- NL_SET_ERR_MSG(extack,
+- "Cannot append to a nexthop fdb");
+- return -EOPNOTSUPP;
+- }
+-
+- /* Do not allow an externally learned entry to take over an entry added
+- * by the user.
+- */
+- if (!(fdb_flags & NTF_EXT_LEARNED) ||
+- !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
+- if (f->state != state) {
+- f->state = state;
+- f->updated = jiffies;
+- notify = 1;
+- }
+- if (f->flags != fdb_flags) {
+- f->flags = fdb_flags;
+- f->updated = jiffies;
+- notify = 1;
+- }
+- }
+-
+- if ((flags & NLM_F_REPLACE)) {
+- /* Only change unicasts */
+- if (!(is_multicast_ether_addr(f->eth_addr) ||
+- is_zero_ether_addr(f->eth_addr))) {
+- if (nhid) {
+- rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+- if (rc < 0)
+- return rc;
+- } else {
+- rc = vxlan_fdb_replace(f, ip, port, vni,
+- ifindex, &oldrd);
+- }
+- notify |= rc;
+- } else {
+- NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
+- return -EOPNOTSUPP;
+- }
+- }
+- if ((flags & NLM_F_APPEND) &&
+- (is_multicast_ether_addr(f->eth_addr) ||
+- is_zero_ether_addr(f->eth_addr))) {
+- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+-
+- if (rc < 0)
+- return rc;
+- notify |= rc;
+- }
+-
+- if (ndm_flags & NTF_USE)
+- f->used = jiffies;
+-
+- if (notify) {
+- if (rd == NULL)
+- rd = first_remote_rtnl(f);
+-
+- err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
+- swdev_notify, extack);
+- if (err)
+- goto err_notify;
+- }
+-
+- return 0;
+-
+-err_notify:
+- if (nhid)
+- return err;
+- if ((flags & NLM_F_REPLACE) && rc)
+- *rd = oldrd;
+- else if ((flags & NLM_F_APPEND) && rc) {
+- list_del_rcu(&rd->list);
+- call_rcu(&rd->rcu, vxlan_dst_free);
+- }
+- return err;
+-}
+-
+-static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
+- const u8 *mac, union vxlan_addr *ip,
+- __u16 state, __u16 flags,
+- __be16 port, __be32 src_vni, __be32 vni,
+- __u32 ifindex, __u16 ndm_flags, u32 nhid,
+- bool swdev_notify,
+- struct netlink_ext_ack *extack)
+-{
+- __u16 fdb_flags = (ndm_flags & ~NTF_USE);
+- struct vxlan_fdb *f;
+- int rc;
+-
+- /* Disallow replace to add a multicast entry */
+- if ((flags & NLM_F_REPLACE) &&
+- (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
+- return -EOPNOTSUPP;
+-
+- netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+- rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
+- vni, ifindex, fdb_flags, nhid, &f, extack);
+- if (rc < 0)
+- return rc;
+-
+- vxlan_fdb_insert(vxlan, mac, src_vni, f);
+- rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
+- swdev_notify, extack);
+- if (rc)
+- goto err_notify;
+-
+- return 0;
+-
+-err_notify:
+- vxlan_fdb_destroy(vxlan, f, false, false);
+- return rc;
+-}
+-
+-/* Add new entry to forwarding table -- assumes lock held */
+-static int vxlan_fdb_update(struct vxlan_dev *vxlan,
+- const u8 *mac, union vxlan_addr *ip,
+- __u16 state, __u16 flags,
+- __be16 port, __be32 src_vni, __be32 vni,
+- __u32 ifindex, __u16 ndm_flags, u32 nhid,
+- bool swdev_notify,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_fdb *f;
+-
+- f = __vxlan_find_mac(vxlan, mac, src_vni);
+- if (f) {
+- if (flags & NLM_F_EXCL) {
+- netdev_dbg(vxlan->dev,
+- "lost race to create %pM\n", mac);
+- return -EEXIST;
+- }
+-
+- return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
+- vni, ifindex, ndm_flags, f,
+- nhid, swdev_notify, extack);
+- } else {
+- if (!(flags & NLM_F_CREATE))
+- return -ENOENT;
+-
+- return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
+- port, src_vni, vni, ifindex,
+- ndm_flags, nhid, swdev_notify,
+- extack);
+- }
+-}
+-
+-static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+- struct vxlan_rdst *rd, bool swdev_notify)
+-{
+- list_del_rcu(&rd->list);
+- vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
+- call_rcu(&rd->rcu, vxlan_dst_free);
+-}
+-
+-static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
+- union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
+- __be32 *vni, u32 *ifindex, u32 *nhid)
+-{
+- struct net *net = dev_net(vxlan->dev);
+- int err;
+-
+- if (tb[NDA_NH_ID] && (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] ||
+- tb[NDA_PORT]))
+- return -EINVAL;
+-
+- if (tb[NDA_DST]) {
+- err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
+- if (err)
+- return err;
+- } else {
+- union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
+-
+- if (remote->sa.sa_family == AF_INET) {
+- ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
+- ip->sa.sa_family = AF_INET;
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- ip->sin6.sin6_addr = in6addr_any;
+- ip->sa.sa_family = AF_INET6;
+-#endif
+- }
+- }
+-
+- if (tb[NDA_PORT]) {
+- if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
+- return -EINVAL;
+- *port = nla_get_be16(tb[NDA_PORT]);
+- } else {
+- *port = vxlan->cfg.dst_port;
+- }
+-
+- if (tb[NDA_VNI]) {
+- if (nla_len(tb[NDA_VNI]) != sizeof(u32))
+- return -EINVAL;
+- *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
+- } else {
+- *vni = vxlan->default_dst.remote_vni;
+- }
+-
+- if (tb[NDA_SRC_VNI]) {
+- if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32))
+- return -EINVAL;
+- *src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
+- } else {
+- *src_vni = vxlan->default_dst.remote_vni;
+- }
+-
+- if (tb[NDA_IFINDEX]) {
+- struct net_device *tdev;
+-
+- if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
+- return -EINVAL;
+- *ifindex = nla_get_u32(tb[NDA_IFINDEX]);
+- tdev = __dev_get_by_index(net, *ifindex);
+- if (!tdev)
+- return -EADDRNOTAVAIL;
+- } else {
+- *ifindex = 0;
+- }
+-
+- if (tb[NDA_NH_ID])
+- *nhid = nla_get_u32(tb[NDA_NH_ID]);
+- else
+- *nhid = 0;
+-
+- return 0;
+-}
+-
+-/* Add static entry (via netlink) */
+-static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+- struct net_device *dev,
+- const unsigned char *addr, u16 vid, u16 flags,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- /* struct net *net = dev_net(vxlan->dev); */
+- union vxlan_addr ip;
+- __be16 port;
+- __be32 src_vni, vni;
+- u32 ifindex, nhid;
+- u32 hash_index;
+- int err;
+-
+- if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
+- pr_info("RTM_NEWNEIGH with invalid state %#x\n",
+- ndm->ndm_state);
+- return -EINVAL;
+- }
+-
+- if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
+- return -EINVAL;
+-
+- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+- &nhid);
+- if (err)
+- return err;
+-
+- if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
+- return -EAFNOSUPPORT;
+-
+- hash_index = fdb_head_index(vxlan, addr, src_vni);
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+- err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
+- port, src_vni, vni, ifindex,
+- ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
+- nhid, true, extack);
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+-
+- return err;
+-}
+-
+-static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
+- const unsigned char *addr, union vxlan_addr ip,
+- __be16 port, __be32 src_vni, __be32 vni,
+- u32 ifindex, bool swdev_notify)
+-{
+- struct vxlan_rdst *rd = NULL;
+- struct vxlan_fdb *f;
+- int err = -ENOENT;
+-
+- f = vxlan_find_mac(vxlan, addr, src_vni);
+- if (!f)
+- return err;
+-
+- if (!vxlan_addr_any(&ip)) {
+- rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex);
+- if (!rd)
+- goto out;
+- }
+-
+- /* remove a destination if it's not the only one on the list,
+- * otherwise destroy the fdb entry
+- */
+- if (rd && !list_is_singular(&f->remotes)) {
+- vxlan_fdb_dst_destroy(vxlan, f, rd, swdev_notify);
+- goto out;
+- }
+-
+- vxlan_fdb_destroy(vxlan, f, true, swdev_notify);
+-
+-out:
+- return 0;
+-}
+-
+-/* Delete entry (via netlink) */
+-static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+- struct net_device *dev,
+- const unsigned char *addr, u16 vid)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- union vxlan_addr ip;
+- __be32 src_vni, vni;
+- u32 ifindex, nhid;
+- u32 hash_index;
+- __be16 port;
+- int err;
+-
+- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+- &nhid);
+- if (err)
+- return err;
+-
+- hash_index = fdb_head_index(vxlan, addr, src_vni);
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+- err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
+- true);
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+-
+- return err;
+-}
+-
+-/* Dump forwarding table */
+-static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+- struct net_device *dev,
+- struct net_device *filter_dev, int *idx)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- unsigned int h;
+- int err = 0;
+-
+- for (h = 0; h < FDB_HASH_SIZE; ++h) {
+- struct vxlan_fdb *f;
+-
+- rcu_read_lock();
+- hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
+- struct vxlan_rdst *rd;
+-
+- if (rcu_access_pointer(f->nh)) {
+- if (*idx < cb->args[2])
+- goto skip_nh;
+- err = vxlan_fdb_info(skb, vxlan, f,
+- NETLINK_CB(cb->skb).portid,
+- cb->nlh->nlmsg_seq,
+- RTM_NEWNEIGH,
+- NLM_F_MULTI, NULL);
+- if (err < 0) {
+- rcu_read_unlock();
+- goto out;
+- }
+-skip_nh:
+- *idx += 1;
+- continue;
+- }
+-
+- list_for_each_entry_rcu(rd, &f->remotes, list) {
+- if (*idx < cb->args[2])
+- goto skip;
+-
+- err = vxlan_fdb_info(skb, vxlan, f,
+- NETLINK_CB(cb->skb).portid,
+- cb->nlh->nlmsg_seq,
+- RTM_NEWNEIGH,
+- NLM_F_MULTI, rd);
+- if (err < 0) {
+- rcu_read_unlock();
+- goto out;
+- }
+-skip:
+- *idx += 1;
+- }
+- }
+- rcu_read_unlock();
+- }
+-out:
+- return err;
+-}
+-
+-static int vxlan_fdb_get(struct sk_buff *skb,
+- struct nlattr *tb[],
+- struct net_device *dev,
+- const unsigned char *addr,
+- u16 vid, u32 portid, u32 seq,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_fdb *f;
+- __be32 vni;
+- int err;
+-
+- if (tb[NDA_VNI])
+- vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
+- else
+- vni = vxlan->default_dst.remote_vni;
+-
+- rcu_read_lock();
+-
+- f = __vxlan_find_mac(vxlan, addr, vni);
+- if (!f) {
+- NL_SET_ERR_MSG(extack, "Fdb entry not found");
+- err = -ENOENT;
+- goto errout;
+- }
+-
+- err = vxlan_fdb_info(skb, vxlan, f, portid, seq,
+- RTM_NEWNEIGH, 0, first_remote_rcu(f));
+-errout:
+- rcu_read_unlock();
+- return err;
+-}
+-
+-/* Watch incoming packets to learn mapping between Ethernet address
+- * and Tunnel endpoint.
+- * Return true if packet is bogus and should be dropped.
+- */
+-static bool vxlan_snoop(struct net_device *dev,
+- union vxlan_addr *src_ip, const u8 *src_mac,
+- u32 src_ifindex, __be32 vni)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_fdb *f;
+- u32 ifindex = 0;
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+- if (src_ip->sa.sa_family == AF_INET6 &&
+- (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL))
+- ifindex = src_ifindex;
+-#endif
+-
+- f = vxlan_find_mac(vxlan, src_mac, vni);
+- if (likely(f)) {
+- struct vxlan_rdst *rdst = first_remote_rcu(f);
+-
+- if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
+- rdst->remote_ifindex == ifindex))
+- return false;
+-
+- /* Don't migrate static entries, drop packets */
+- if (f->state & (NUD_PERMANENT | NUD_NOARP))
+- return true;
+-
+- /* Don't override an fdb with nexthop with a learnt entry */
+- if (rcu_access_pointer(f->nh))
+- return true;
+-
+- if (net_ratelimit())
+- netdev_info(dev,
+- "%pM migrated from %pIS to %pIS\n",
+- src_mac, &rdst->remote_ip.sa, &src_ip->sa);
+-
+- rdst->remote_ip = *src_ip;
+- f->updated = jiffies;
+- vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
+- } else {
+- u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
+-
+- /* learned new entry */
+- spin_lock(&vxlan->hash_lock[hash_index]);
+-
+- /* close off race between vxlan_flush and incoming packets */
+- if (netif_running(dev))
+- vxlan_fdb_update(vxlan, src_mac, src_ip,
+- NUD_REACHABLE,
+- NLM_F_EXCL|NLM_F_CREATE,
+- vxlan->cfg.dst_port,
+- vni,
+- vxlan->default_dst.remote_vni,
+- ifindex, NTF_SELF, 0, true, NULL);
+- spin_unlock(&vxlan->hash_lock[hash_index]);
+- }
+-
+- return false;
+-}
+-
+-/* See if multicast group is already in use by other ID */
+-static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
+-{
+- struct vxlan_dev *vxlan;
+- struct vxlan_sock *sock4;
+-#if IS_ENABLED(CONFIG_IPV6)
+- struct vxlan_sock *sock6;
+-#endif
+- unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
+-
+- sock4 = rtnl_dereference(dev->vn4_sock);
+-
+- /* The vxlan_sock is only used by dev, leaving group has
+- * no effect on other vxlan devices.
+- */
+- if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1)
+- return false;
+-#if IS_ENABLED(CONFIG_IPV6)
+- sock6 = rtnl_dereference(dev->vn6_sock);
+- if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1)
+- return false;
+-#endif
+-
+- list_for_each_entry(vxlan, &vn->vxlan_list, next) {
+- if (!netif_running(vxlan->dev) || vxlan == dev)
+- continue;
+-
+- if (family == AF_INET &&
+- rtnl_dereference(vxlan->vn4_sock) != sock4)
+- continue;
+-#if IS_ENABLED(CONFIG_IPV6)
+- if (family == AF_INET6 &&
+- rtnl_dereference(vxlan->vn6_sock) != sock6)
+- continue;
+-#endif
+-
+- if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
+- &dev->default_dst.remote_ip))
+- continue;
+-
+- if (vxlan->default_dst.remote_ifindex !=
+- dev->default_dst.remote_ifindex)
+- continue;
+-
+- return true;
+- }
+-
+- return false;
+-}
+-
+-static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
+-{
+- struct vxlan_net *vn;
+-
+- if (!vs)
+- return false;
+- if (!refcount_dec_and_test(&vs->refcnt))
+- return false;
+-
+- vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
+- spin_lock(&vn->sock_lock);
+- hlist_del_rcu(&vs->hlist);
+- udp_tunnel_notify_del_rx_port(vs->sock,
+- (vs->flags & VXLAN_F_GPE) ?
+- UDP_TUNNEL_TYPE_VXLAN_GPE :
+- UDP_TUNNEL_TYPE_VXLAN);
+- spin_unlock(&vn->sock_lock);
+-
+- return true;
+-}
+-
+-static void vxlan_sock_release(struct vxlan_dev *vxlan)
+-{
+- struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
+-#if IS_ENABLED(CONFIG_IPV6)
+- struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+-
+- RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
+-#endif
+-
+- RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
+- synchronize_net();
+-
+- vxlan_vs_del_dev(vxlan);
+-
+- if (__vxlan_sock_release_prep(sock4)) {
+- udp_tunnel_sock_release(sock4->sock);
+- kfree(sock4);
+- }
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+- if (__vxlan_sock_release_prep(sock6)) {
+- udp_tunnel_sock_release(sock6->sock);
+- kfree(sock6);
+- }
+-#endif
+-}
+-
+-/* Update multicast group membership when first VNI on
+- * multicast address is brought up
+- */
+-static int vxlan_igmp_join(struct vxlan_dev *vxlan)
+-{
+- struct sock *sk;
+- union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
+- int ifindex = vxlan->default_dst.remote_ifindex;
+- int ret = -EINVAL;
+-
+- if (ip->sa.sa_family == AF_INET) {
+- struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
+- struct ip_mreqn mreq = {
+- .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
+- .imr_ifindex = ifindex,
+- };
+-
+- sk = sock4->sock->sk;
+- lock_sock(sk);
+- ret = ip_mc_join_group(sk, &mreq);
+- release_sock(sk);
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+-
+- sk = sock6->sock->sk;
+- lock_sock(sk);
+- ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
+- &ip->sin6.sin6_addr);
+- release_sock(sk);
+-#endif
+- }
+-
+- return ret;
+-}
+-
+-/* Inverse of vxlan_igmp_join when last VNI is brought down */
+-static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
+-{
+- struct sock *sk;
+- union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
+- int ifindex = vxlan->default_dst.remote_ifindex;
+- int ret = -EINVAL;
+-
+- if (ip->sa.sa_family == AF_INET) {
+- struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
+- struct ip_mreqn mreq = {
+- .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
+- .imr_ifindex = ifindex,
+- };
+-
+- sk = sock4->sock->sk;
+- lock_sock(sk);
+- ret = ip_mc_leave_group(sk, &mreq);
+- release_sock(sk);
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+-
+- sk = sock6->sock->sk;
+- lock_sock(sk);
+- ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
+- &ip->sin6.sin6_addr);
+- release_sock(sk);
+-#endif
+- }
+-
+- return ret;
+-}
+-
+-static bool vxlan_remcsum(struct vxlanhdr *unparsed,
+- struct sk_buff *skb, u32 vxflags)
+-{
+- size_t start, offset;
+-
+- if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
+- goto out;
+-
+- start = vxlan_rco_start(unparsed->vx_vni);
+- offset = start + vxlan_rco_offset(unparsed->vx_vni);
+-
+- if (!pskb_may_pull(skb, offset + sizeof(u16)))
+- return false;
+-
+- skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
+- !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
+-out:
+- unparsed->vx_flags &= ~VXLAN_HF_RCO;
+- unparsed->vx_vni &= VXLAN_VNI_MASK;
+- return true;
+-}
+-
+-static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
+- struct sk_buff *skb, u32 vxflags,
+- struct vxlan_metadata *md)
+-{
+- struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
+- struct metadata_dst *tun_dst;
+-
+- if (!(unparsed->vx_flags & VXLAN_HF_GBP))
+- goto out;
+-
+- md->gbp = ntohs(gbp->policy_id);
+-
+- tun_dst = (struct metadata_dst *)skb_dst(skb);
+- if (tun_dst) {
+- tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+- tun_dst->u.tun_info.options_len = sizeof(*md);
+- }
+- if (gbp->dont_learn)
+- md->gbp |= VXLAN_GBP_DONT_LEARN;
+-
+- if (gbp->policy_applied)
+- md->gbp |= VXLAN_GBP_POLICY_APPLIED;
+-
+- /* In flow-based mode, GBP is carried in dst_metadata */
+- if (!(vxflags & VXLAN_F_COLLECT_METADATA))
+- skb->mark = md->gbp;
+-out:
+- unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
+-}
+-
+-static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
+- __be16 *protocol,
+- struct sk_buff *skb, u32 vxflags)
+-{
+- struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
+-
+- /* Need to have Next Protocol set for interfaces in GPE mode. */
+- if (!gpe->np_applied)
+- return false;
+- /* "The initial version is 0. If a receiver does not support the
+- * version indicated it MUST drop the packet.
+- */
+- if (gpe->version != 0)
+- return false;
+- /* "When the O bit is set to 1, the packet is an OAM packet and OAM
+- * processing MUST occur." However, we don't implement OAM
+- * processing, thus drop the packet.
+- */
+- if (gpe->oam_flag)
+- return false;
+-
+- *protocol = tun_p_to_eth_p(gpe->next_protocol);
+- if (!*protocol)
+- return false;
+-
+- unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
+- return true;
+-}
+-
+-static bool vxlan_set_mac(struct vxlan_dev *vxlan,
+- struct vxlan_sock *vs,
+- struct sk_buff *skb, __be32 vni)
+-{
+- union vxlan_addr saddr;
+- u32 ifindex = skb->dev->ifindex;
+-
+- skb_reset_mac_header(skb);
+- skb->protocol = eth_type_trans(skb, vxlan->dev);
+- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+-
+- /* Ignore packet loops (and multicast echo) */
+- if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
+- return false;
+-
+- /* Get address from the outer IP header */
+- if (vxlan_get_sk_family(vs) == AF_INET) {
+- saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+- saddr.sa.sa_family = AF_INET;
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- saddr.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+- saddr.sa.sa_family = AF_INET6;
+-#endif
+- }
+-
+- if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
+- vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
+- return false;
+-
+- return true;
+-}
+-
+-static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
+- struct sk_buff *skb)
+-{
+- int err = 0;
+-
+- if (vxlan_get_sk_family(vs) == AF_INET)
+- err = IP_ECN_decapsulate(oiph, skb);
+-#if IS_ENABLED(CONFIG_IPV6)
+- else
+- err = IP6_ECN_decapsulate(oiph, skb);
+-#endif
+-
+- if (unlikely(err) && log_ecn_error) {
+- if (vxlan_get_sk_family(vs) == AF_INET)
+- net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+- &((struct iphdr *)oiph)->saddr,
+- ((struct iphdr *)oiph)->tos);
+- else
+- net_info_ratelimited("non-ECT from %pI6\n",
+- &((struct ipv6hdr *)oiph)->saddr);
+- }
+- return err <= 1;
+-}
+-
+-/* Callback from net/ipv4/udp.c to receive packets */
+-static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
+-{
+- struct vxlan_dev *vxlan;
+- struct vxlan_sock *vs;
+- struct vxlanhdr unparsed;
+- struct vxlan_metadata _md;
+- struct vxlan_metadata *md = &_md;
+- __be16 protocol = htons(ETH_P_TEB);
+- bool raw_proto = false;
+- void *oiph;
+- __be32 vni = 0;
+-
+- /* Need UDP and VXLAN header to be present */
+- if (!pskb_may_pull(skb, VXLAN_HLEN))
+- goto drop;
+-
+- unparsed = *vxlan_hdr(skb);
+- /* VNI flag always required to be set */
+- if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
+- netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
+- ntohl(vxlan_hdr(skb)->vx_flags),
+- ntohl(vxlan_hdr(skb)->vx_vni));
+- /* Return non vxlan pkt */
+- goto drop;
+- }
+- unparsed.vx_flags &= ~VXLAN_HF_VNI;
+- unparsed.vx_vni &= ~VXLAN_VNI_MASK;
+-
+- vs = rcu_dereference_sk_user_data(sk);
+- if (!vs)
+- goto drop;
+-
+- vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
+-
+- vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
+- if (!vxlan)
+- goto drop;
+-
+- /* For backwards compatibility, only allow reserved fields to be
+- * used by VXLAN extensions if explicitly requested.
+- */
+- if (vs->flags & VXLAN_F_GPE) {
+- if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+- goto drop;
+- raw_proto = true;
+- }
+-
+- if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
+- !net_eq(vxlan->net, dev_net(vxlan->dev))))
+- goto drop;
+-
+- if (vs->flags & VXLAN_F_REMCSUM_RX)
+- if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags)))
+- goto drop;
+-
+- if (vxlan_collect_metadata(vs)) {
+- struct metadata_dst *tun_dst;
+-
+- tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
+- key32_to_tunnel_id(vni), sizeof(*md));
+-
+- if (!tun_dst)
+- goto drop;
+-
+- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+-
+- skb_dst_set(skb, (struct dst_entry *)tun_dst);
+- } else {
+- memset(md, 0, sizeof(*md));
+- }
+-
+- if (vs->flags & VXLAN_F_GBP)
+- vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
+- /* Note that GBP and GPE can never be active together. This is
+- * ensured in vxlan_dev_configure.
+- */
+-
+- if (unparsed.vx_flags || unparsed.vx_vni) {
+- /* If there are any unprocessed flags remaining treat
+- * this as a malformed packet. This behavior diverges from
+- * VXLAN RFC (RFC7348) which stipulates that bits in reserved
+- * in reserved fields are to be ignored. The approach here
+- * maintains compatibility with previous stack code, and also
+- * is more robust and provides a little more security in
+- * adding extensions to VXLAN.
+- */
+- goto drop;
+- }
+-
+- if (!raw_proto) {
+- if (!vxlan_set_mac(vxlan, vs, skb, vni))
+- goto drop;
+- } else {
+- skb_reset_mac_header(skb);
+- skb->dev = vxlan->dev;
+- skb->pkt_type = PACKET_HOST;
+- }
+-
+- oiph = skb_network_header(skb);
+- skb_reset_network_header(skb);
+-
+- if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
+- ++vxlan->dev->stats.rx_frame_errors;
+- ++vxlan->dev->stats.rx_errors;
+- goto drop;
+- }
+-
+- rcu_read_lock();
+-
+- if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
+- rcu_read_unlock();
+- atomic_long_inc(&vxlan->dev->rx_dropped);
+- goto drop;
+- }
+-
+- dev_sw_netstats_rx_add(vxlan->dev, skb->len);
+- gro_cells_receive(&vxlan->gro_cells, skb);
+-
+- rcu_read_unlock();
+-
+- return 0;
+-
+-drop:
+- /* Consume bad packet */
+- kfree_skb(skb);
+- return 0;
+-}
+-
+-/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
+-static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
+-{
+- struct vxlan_dev *vxlan;
+- struct vxlan_sock *vs;
+- struct vxlanhdr *hdr;
+- __be32 vni;
+-
+- if (!pskb_may_pull(skb, skb_transport_offset(skb) + VXLAN_HLEN))
+- return -EINVAL;
+-
+- hdr = vxlan_hdr(skb);
+-
+- if (!(hdr->vx_flags & VXLAN_HF_VNI))
+- return -EINVAL;
+-
+- vs = rcu_dereference_sk_user_data(sk);
+- if (!vs)
+- return -ENOENT;
+-
+- vni = vxlan_vni(hdr->vx_vni);
+- vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
+- if (!vxlan)
+- return -ENOENT;
+-
+- return 0;
+-}
+-
+-static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct arphdr *parp;
+- u8 *arpptr, *sha;
+- __be32 sip, tip;
+- struct neighbour *n;
+-
+- if (dev->flags & IFF_NOARP)
+- goto out;
+-
+- if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
+- dev->stats.tx_dropped++;
+- goto out;
+- }
+- parp = arp_hdr(skb);
+-
+- if ((parp->ar_hrd != htons(ARPHRD_ETHER) &&
+- parp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+- parp->ar_pro != htons(ETH_P_IP) ||
+- parp->ar_op != htons(ARPOP_REQUEST) ||
+- parp->ar_hln != dev->addr_len ||
+- parp->ar_pln != 4)
+- goto out;
+- arpptr = (u8 *)parp + sizeof(struct arphdr);
+- sha = arpptr;
+- arpptr += dev->addr_len; /* sha */
+- memcpy(&sip, arpptr, sizeof(sip));
+- arpptr += sizeof(sip);
+- arpptr += dev->addr_len; /* tha */
+- memcpy(&tip, arpptr, sizeof(tip));
+-
+- if (ipv4_is_loopback(tip) ||
+- ipv4_is_multicast(tip))
+- goto out;
+-
+- n = neigh_lookup(&arp_tbl, &tip, dev);
+-
+- if (n) {
+- struct vxlan_fdb *f;
+- struct sk_buff *reply;
+-
+- if (!(n->nud_state & NUD_CONNECTED)) {
+- neigh_release(n);
+- goto out;
+- }
+-
+- f = vxlan_find_mac(vxlan, n->ha, vni);
+- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+- /* bridge-local neighbor */
+- neigh_release(n);
+- goto out;
+- }
+-
+- reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
+- n->ha, sha);
+-
+- neigh_release(n);
+-
+- if (reply == NULL)
+- goto out;
+-
+- skb_reset_mac_header(reply);
+- __skb_pull(reply, skb_network_offset(reply));
+- reply->ip_summed = CHECKSUM_UNNECESSARY;
+- reply->pkt_type = PACKET_HOST;
+-
+- if (netif_rx_ni(reply) == NET_RX_DROP)
+- dev->stats.rx_dropped++;
+- } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
+- union vxlan_addr ipa = {
+- .sin.sin_addr.s_addr = tip,
+- .sin.sin_family = AF_INET,
+- };
+-
+- vxlan_ip_miss(dev, &ipa);
+- }
+-out:
+- consume_skb(skb);
+- return NETDEV_TX_OK;
+-}
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+-static struct sk_buff *vxlan_na_create(struct sk_buff *request,
+- struct neighbour *n, bool isrouter)
+-{
+- struct net_device *dev = request->dev;
+- struct sk_buff *reply;
+- struct nd_msg *ns, *na;
+- struct ipv6hdr *pip6;
+- u8 *daddr;
+- int na_olen = 8; /* opt hdr + ETH_ALEN for target */
+- int ns_olen;
+- int i, len;
+-
+- if (dev == NULL || !pskb_may_pull(request, request->len))
+- return NULL;
+-
+- len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
+- sizeof(*na) + na_olen + dev->needed_tailroom;
+- reply = alloc_skb(len, GFP_ATOMIC);
+- if (reply == NULL)
+- return NULL;
+-
+- reply->protocol = htons(ETH_P_IPV6);
+- reply->dev = dev;
+- skb_reserve(reply, LL_RESERVED_SPACE(request->dev));
+- skb_push(reply, sizeof(struct ethhdr));
+- skb_reset_mac_header(reply);
+-
+- ns = (struct nd_msg *)(ipv6_hdr(request) + 1);
+-
+- daddr = eth_hdr(request)->h_source;
+- ns_olen = request->len - skb_network_offset(request) -
+- sizeof(struct ipv6hdr) - sizeof(*ns);
+- for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
+- if (!ns->opt[i + 1]) {
+- kfree_skb(reply);
+- return NULL;
+- }
+- if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
+- daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+- break;
+- }
+- }
+-
+- /* Ethernet header */
+- ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
+- ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
+- eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
+- reply->protocol = htons(ETH_P_IPV6);
+-
+- skb_pull(reply, sizeof(struct ethhdr));
+- skb_reset_network_header(reply);
+- skb_put(reply, sizeof(struct ipv6hdr));
+-
+- /* IPv6 header */
+-
+- pip6 = ipv6_hdr(reply);
+- memset(pip6, 0, sizeof(struct ipv6hdr));
+- pip6->version = 6;
+- pip6->priority = ipv6_hdr(request)->priority;
+- pip6->nexthdr = IPPROTO_ICMPV6;
+- pip6->hop_limit = 255;
+- pip6->daddr = ipv6_hdr(request)->saddr;
+- pip6->saddr = *(struct in6_addr *)n->primary_key;
+-
+- skb_pull(reply, sizeof(struct ipv6hdr));
+- skb_reset_transport_header(reply);
+-
+- /* Neighbor Advertisement */
+- na = skb_put_zero(reply, sizeof(*na) + na_olen);
+- na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+- na->icmph.icmp6_router = isrouter;
+- na->icmph.icmp6_override = 1;
+- na->icmph.icmp6_solicited = 1;
+- na->target = ns->target;
+- ether_addr_copy(&na->opt[2], n->ha);
+- na->opt[0] = ND_OPT_TARGET_LL_ADDR;
+- na->opt[1] = na_olen >> 3;
+-
+- na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
+- &pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6,
+- csum_partial(na, sizeof(*na)+na_olen, 0));
+-
+- pip6->payload_len = htons(sizeof(*na)+na_olen);
+-
+- skb_push(reply, sizeof(struct ipv6hdr));
+-
+- reply->ip_summed = CHECKSUM_UNNECESSARY;
+-
+- return reply;
+-}
+-
+-static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- const struct in6_addr *daddr;
+- const struct ipv6hdr *iphdr;
+- struct inet6_dev *in6_dev;
+- struct neighbour *n;
+- struct nd_msg *msg;
+-
+- rcu_read_lock();
+- in6_dev = __in6_dev_get(dev);
+- if (!in6_dev)
+- goto out;
+-
+- iphdr = ipv6_hdr(skb);
+- daddr = &iphdr->daddr;
+- msg = (struct nd_msg *)(iphdr + 1);
+-
+- if (ipv6_addr_loopback(daddr) ||
+- ipv6_addr_is_multicast(&msg->target))
+- goto out;
+-
+- n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
+-
+- if (n) {
+- struct vxlan_fdb *f;
+- struct sk_buff *reply;
+-
+- if (!(n->nud_state & NUD_CONNECTED)) {
+- neigh_release(n);
+- goto out;
+- }
+-
+- f = vxlan_find_mac(vxlan, n->ha, vni);
+- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+- /* bridge-local neighbor */
+- neigh_release(n);
+- goto out;
+- }
+-
+- reply = vxlan_na_create(skb, n,
+- !!(f ? f->flags & NTF_ROUTER : 0));
+-
+- neigh_release(n);
+-
+- if (reply == NULL)
+- goto out;
+-
+- if (netif_rx_ni(reply) == NET_RX_DROP)
+- dev->stats.rx_dropped++;
+-
+- } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
+- union vxlan_addr ipa = {
+- .sin6.sin6_addr = msg->target,
+- .sin6.sin6_family = AF_INET6,
+- };
+-
+- vxlan_ip_miss(dev, &ipa);
+- }
+-
+-out:
+- rcu_read_unlock();
+- consume_skb(skb);
+- return NETDEV_TX_OK;
+-}
+-#endif
+-
+-static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct neighbour *n;
+-
+- if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
+- return false;
+-
+- n = NULL;
+- switch (ntohs(eth_hdr(skb)->h_proto)) {
+- case ETH_P_IP:
+- {
+- struct iphdr *pip;
+-
+- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+- return false;
+- pip = ip_hdr(skb);
+- n = neigh_lookup(&arp_tbl, &pip->daddr, dev);
+- if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
+- union vxlan_addr ipa = {
+- .sin.sin_addr.s_addr = pip->daddr,
+- .sin.sin_family = AF_INET,
+- };
+-
+- vxlan_ip_miss(dev, &ipa);
+- return false;
+- }
+-
+- break;
+- }
+-#if IS_ENABLED(CONFIG_IPV6)
+- case ETH_P_IPV6:
+- {
+- struct ipv6hdr *pip6;
+-
+- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+- return false;
+- pip6 = ipv6_hdr(skb);
+- n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev);
+- if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
+- union vxlan_addr ipa = {
+- .sin6.sin6_addr = pip6->daddr,
+- .sin6.sin6_family = AF_INET6,
+- };
+-
+- vxlan_ip_miss(dev, &ipa);
+- return false;
+- }
+-
+- break;
+- }
+-#endif
+- default:
+- return false;
+- }
+-
+- if (n) {
+- bool diff;
+-
+- diff = !ether_addr_equal(eth_hdr(skb)->h_dest, n->ha);
+- if (diff) {
+- memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+- dev->addr_len);
+- memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len);
+- }
+- neigh_release(n);
+- return diff;
+- }
+-
+- return false;
+-}
+-
+-static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
+- struct vxlan_metadata *md)
+-{
+- struct vxlanhdr_gbp *gbp;
+-
+- if (!md->gbp)
+- return;
+-
+- gbp = (struct vxlanhdr_gbp *)vxh;
+- vxh->vx_flags |= VXLAN_HF_GBP;
+-
+- if (md->gbp & VXLAN_GBP_DONT_LEARN)
+- gbp->dont_learn = 1;
+-
+- if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
+- gbp->policy_applied = 1;
+-
+- gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
+-}
+-
+-static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
+- __be16 protocol)
+-{
+- struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
+-
+- gpe->np_applied = 1;
+- gpe->next_protocol = tun_p_from_eth_p(protocol);
+- if (!gpe->next_protocol)
+- return -EPFNOSUPPORT;
+- return 0;
+-}
+-
+-static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
+- int iphdr_len, __be32 vni,
+- struct vxlan_metadata *md, u32 vxflags,
+- bool udp_sum)
+-{
+- struct vxlanhdr *vxh;
+- int min_headroom;
+- int err;
+- int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+- __be16 inner_protocol = htons(ETH_P_TEB);
+-
+- if ((vxflags & VXLAN_F_REMCSUM_TX) &&
+- skb->ip_summed == CHECKSUM_PARTIAL) {
+- int csum_start = skb_checksum_start_offset(skb);
+-
+- if (csum_start <= VXLAN_MAX_REMCSUM_START &&
+- !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
+- (skb->csum_offset == offsetof(struct udphdr, check) ||
+- skb->csum_offset == offsetof(struct tcphdr, check)))
+- type |= SKB_GSO_TUNNEL_REMCSUM;
+- }
+-
+- min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
+- + VXLAN_HLEN + iphdr_len;
+-
+- /* Need space for new headers (invalidates iph ptr) */
+- err = skb_cow_head(skb, min_headroom);
+- if (unlikely(err))
+- return err;
+-
+- err = iptunnel_handle_offloads(skb, type);
+- if (err)
+- return err;
+-
+- vxh = __skb_push(skb, sizeof(*vxh));
+- vxh->vx_flags = VXLAN_HF_VNI;
+- vxh->vx_vni = vxlan_vni_field(vni);
+-
+- if (type & SKB_GSO_TUNNEL_REMCSUM) {
+- unsigned int start;
+-
+- start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
+- vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
+- vxh->vx_flags |= VXLAN_HF_RCO;
+-
+- if (!skb_is_gso(skb)) {
+- skb->ip_summed = CHECKSUM_NONE;
+- skb->encapsulation = 0;
+- }
+- }
+-
+- if (vxflags & VXLAN_F_GBP)
+- vxlan_build_gbp_hdr(vxh, vxflags, md);
+- if (vxflags & VXLAN_F_GPE) {
+- err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
+- if (err < 0)
+- return err;
+- inner_protocol = skb->protocol;
+- }
+-
+- skb_set_inner_protocol(skb, inner_protocol);
+- return 0;
+-}
+-
+-static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
+- struct vxlan_sock *sock4,
+- struct sk_buff *skb, int oif, u8 tos,
+- __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport,
+- struct dst_cache *dst_cache,
+- const struct ip_tunnel_info *info)
+-{
+- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
+- struct rtable *rt = NULL;
+- struct flowi4 fl4;
+-
+- if (!sock4)
+- return ERR_PTR(-EIO);
+-
+- if (tos && !info)
+- use_cache = false;
+- if (use_cache) {
+- rt = dst_cache_get_ip4(dst_cache, saddr);
+- if (rt)
+- return rt;
+- }
+-
+- memset(&fl4, 0, sizeof(fl4));
+- fl4.flowi4_oif = oif;
+- fl4.flowi4_tos = RT_TOS(tos);
+- fl4.flowi4_mark = skb->mark;
+- fl4.flowi4_proto = IPPROTO_UDP;
+- fl4.daddr = daddr;
+- fl4.saddr = *saddr;
+- fl4.fl4_dport = dport;
+- fl4.fl4_sport = sport;
+-
+- rt = ip_route_output_key(vxlan->net, &fl4);
+- if (!IS_ERR(rt)) {
+- if (rt->dst.dev == dev) {
+- netdev_dbg(dev, "circular route to %pI4\n", &daddr);
+- ip_rt_put(rt);
+- return ERR_PTR(-ELOOP);
+- }
+-
+- *saddr = fl4.saddr;
+- if (use_cache)
+- dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
+- } else {
+- netdev_dbg(dev, "no route to %pI4\n", &daddr);
+- return ERR_PTR(-ENETUNREACH);
+- }
+- return rt;
+-}
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+-static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
+- struct net_device *dev,
+- struct vxlan_sock *sock6,
+- struct sk_buff *skb, int oif, u8 tos,
+- __be32 label,
+- const struct in6_addr *daddr,
+- struct in6_addr *saddr,
+- __be16 dport, __be16 sport,
+- struct dst_cache *dst_cache,
+- const struct ip_tunnel_info *info)
+-{
+- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
+- struct dst_entry *ndst;
+- struct flowi6 fl6;
+-
+- if (!sock6)
+- return ERR_PTR(-EIO);
+-
+- if (tos && !info)
+- use_cache = false;
+- if (use_cache) {
+- ndst = dst_cache_get_ip6(dst_cache, saddr);
+- if (ndst)
+- return ndst;
+- }
+-
+- memset(&fl6, 0, sizeof(fl6));
+- fl6.flowi6_oif = oif;
+- fl6.daddr = *daddr;
+- fl6.saddr = *saddr;
+- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
+- fl6.flowi6_mark = skb->mark;
+- fl6.flowi6_proto = IPPROTO_UDP;
+- fl6.fl6_dport = dport;
+- fl6.fl6_sport = sport;
+-
+- ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
+- &fl6, NULL);
+- if (IS_ERR(ndst)) {
+- netdev_dbg(dev, "no route to %pI6\n", daddr);
+- return ERR_PTR(-ENETUNREACH);
+- }
+-
+- if (unlikely(ndst->dev == dev)) {
+- netdev_dbg(dev, "circular route to %pI6\n", daddr);
+- dst_release(ndst);
+- return ERR_PTR(-ELOOP);
+- }
+-
+- *saddr = fl6.saddr;
+- if (use_cache)
+- dst_cache_set_ip6(dst_cache, ndst, saddr);
+- return ndst;
+-}
+-#endif
+-
+-/* Bypass encapsulation if the destination is local */
+-static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
+- struct vxlan_dev *dst_vxlan, __be32 vni,
+- bool snoop)
+-{
+- struct pcpu_sw_netstats *tx_stats, *rx_stats;
+- union vxlan_addr loopback;
+- union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
+- struct net_device *dev;
+- int len = skb->len;
+-
+- tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
+- rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
+- skb->pkt_type = PACKET_HOST;
+- skb->encapsulation = 0;
+- skb->dev = dst_vxlan->dev;
+- __skb_pull(skb, skb_network_offset(skb));
+-
+- if (remote_ip->sa.sa_family == AF_INET) {
+- loopback.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+- loopback.sa.sa_family = AF_INET;
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- loopback.sin6.sin6_addr = in6addr_loopback;
+- loopback.sa.sa_family = AF_INET6;
+-#endif
+- }
+-
+- rcu_read_lock();
+- dev = skb->dev;
+- if (unlikely(!(dev->flags & IFF_UP))) {
+- kfree_skb(skb);
+- goto drop;
+- }
+-
+- if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
+- vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
+-
+- u64_stats_update_begin(&tx_stats->syncp);
+- tx_stats->tx_packets++;
+- tx_stats->tx_bytes += len;
+- u64_stats_update_end(&tx_stats->syncp);
+-
+- if (netif_rx(skb) == NET_RX_SUCCESS) {
+- u64_stats_update_begin(&rx_stats->syncp);
+- rx_stats->rx_packets++;
+- rx_stats->rx_bytes += len;
+- u64_stats_update_end(&rx_stats->syncp);
+- } else {
+-drop:
+- dev->stats.rx_dropped++;
+- }
+- rcu_read_unlock();
+-}
+-
+-static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
+- struct vxlan_dev *vxlan,
+- union vxlan_addr *daddr,
+- __be16 dst_port, int dst_ifindex, __be32 vni,
+- struct dst_entry *dst,
+- u32 rt_flags)
+-{
+-#if IS_ENABLED(CONFIG_IPV6)
+- /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
+- * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
+- * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
+- */
+- BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
+-#endif
+- /* Bypass encapsulation if the destination is local */
+- if (rt_flags & RTCF_LOCAL &&
+- !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
+- struct vxlan_dev *dst_vxlan;
+-
+- dst_release(dst);
+- dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
+- daddr->sa.sa_family, dst_port,
+- vxlan->cfg.flags);
+- if (!dst_vxlan) {
+- dev->stats.tx_errors++;
+- kfree_skb(skb);
+-
+- return -ENOENT;
+- }
+- vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+-static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
+- __be32 default_vni, struct vxlan_rdst *rdst,
+- bool did_rsc)
+-{
+- struct dst_cache *dst_cache;
+- struct ip_tunnel_info *info;
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- const struct iphdr *old_iph = ip_hdr(skb);
+- union vxlan_addr *dst;
+- union vxlan_addr remote_ip, local_ip;
+- struct vxlan_metadata _md;
+- struct vxlan_metadata *md = &_md;
+- __be16 src_port = 0, dst_port;
+- struct dst_entry *ndst = NULL;
+- __be32 vni, label;
+- __u8 tos, ttl;
+- int ifindex;
+- int err;
+- u32 flags = vxlan->cfg.flags;
+- bool udp_sum = false;
+- bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
+-
+- info = skb_tunnel_info(skb);
+-
+- if (rdst) {
+- dst = &rdst->remote_ip;
+- if (vxlan_addr_any(dst)) {
+- if (did_rsc) {
+- /* short-circuited back to local bridge */
+- vxlan_encap_bypass(skb, vxlan, vxlan,
+- default_vni, true);
+- return;
+- }
+- goto drop;
+- }
+-
+- dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
+- vni = (rdst->remote_vni) ? : default_vni;
+- ifindex = rdst->remote_ifindex;
+- local_ip = vxlan->cfg.saddr;
+- dst_cache = &rdst->dst_cache;
+- md->gbp = skb->mark;
+- if (flags & VXLAN_F_TTL_INHERIT) {
+- ttl = ip_tunnel_get_ttl(old_iph, skb);
+- } else {
+- ttl = vxlan->cfg.ttl;
+- if (!ttl && vxlan_addr_multicast(dst))
+- ttl = 1;
+- }
+-
+- tos = vxlan->cfg.tos;
+- if (tos == 1)
+- tos = ip_tunnel_get_dsfield(old_iph, skb);
+-
+- if (dst->sa.sa_family == AF_INET)
+- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+- else
+- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+- label = vxlan->cfg.label;
+- } else {
+- if (!info) {
+- WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
+- dev->name);
+- goto drop;
+- }
+- remote_ip.sa.sa_family = ip_tunnel_info_af(info);
+- if (remote_ip.sa.sa_family == AF_INET) {
+- remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
+- local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
+- } else {
+- remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
+- local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
+- }
+- dst = &remote_ip;
+- dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
+- vni = tunnel_id_to_key32(info->key.tun_id);
+- ifindex = 0;
+- dst_cache = &info->dst_cache;
+- if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+- if (info->options_len < sizeof(*md))
+- goto drop;
+- md = ip_tunnel_info_opts(info);
+- }
+- ttl = info->key.ttl;
+- tos = info->key.tos;
+- label = info->key.label;
+- udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
+- }
+- src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+- vxlan->cfg.port_max, true);
+-
+- rcu_read_lock();
+- if (dst->sa.sa_family == AF_INET) {
+- struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+- struct rtable *rt;
+- __be16 df = 0;
+-
+- if (!ifindex)
+- ifindex = sock4->sock->sk->sk_bound_dev_if;
+-
+- rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
+- dst->sin.sin_addr.s_addr,
+- &local_ip.sin.sin_addr.s_addr,
+- dst_port, src_port,
+- dst_cache, info);
+- if (IS_ERR(rt)) {
+- err = PTR_ERR(rt);
+- goto tx_error;
+- }
+-
+- if (!info) {
+- /* Bypass encapsulation if the destination is local */
+- err = encap_bypass_if_local(skb, dev, vxlan, dst,
+- dst_port, ifindex, vni,
+- &rt->dst, rt->rt_flags);
+- if (err)
+- goto out_unlock;
+-
+- if (vxlan->cfg.df == VXLAN_DF_SET) {
+- df = htons(IP_DF);
+- } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
+- struct ethhdr *eth = eth_hdr(skb);
+-
+- if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
+- (ntohs(eth->h_proto) == ETH_P_IP &&
+- old_iph->frag_off & htons(IP_DF)))
+- df = htons(IP_DF);
+- }
+- } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
+- df = htons(IP_DF);
+- }
+-
+- ndst = &rt->dst;
+- err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+- netif_is_any_bridge_port(dev));
+- if (err < 0) {
+- goto tx_error;
+- } else if (err) {
+- if (info) {
+- struct ip_tunnel_info *unclone;
+- struct in_addr src, dst;
+-
+- unclone = skb_tunnel_info_unclone(skb);
+- if (unlikely(!unclone))
+- goto tx_error;
+-
+- src = remote_ip.sin.sin_addr;
+- dst = local_ip.sin.sin_addr;
+- unclone->key.u.ipv4.src = src.s_addr;
+- unclone->key.u.ipv4.dst = dst.s_addr;
+- }
+- vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+- dst_release(ndst);
+- goto out_unlock;
+- }
+-
+- tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
+- ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+- err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
+- vni, md, flags, udp_sum);
+- if (err < 0)
+- goto tx_error;
+-
+- udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr,
+- dst->sin.sin_addr.s_addr, tos, ttl, df,
+- src_port, dst_port, xnet, !udp_sum);
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
+-
+- if (!ifindex)
+- ifindex = sock6->sock->sk->sk_bound_dev_if;
+-
+- ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
+- label, &dst->sin6.sin6_addr,
+- &local_ip.sin6.sin6_addr,
+- dst_port, src_port,
+- dst_cache, info);
+- if (IS_ERR(ndst)) {
+- err = PTR_ERR(ndst);
+- ndst = NULL;
+- goto tx_error;
+- }
+-
+- if (!info) {
+- u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
+-
+- err = encap_bypass_if_local(skb, dev, vxlan, dst,
+- dst_port, ifindex, vni,
+- ndst, rt6i_flags);
+- if (err)
+- goto out_unlock;
+- }
+-
+- err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+- netif_is_any_bridge_port(dev));
+- if (err < 0) {
+- goto tx_error;
+- } else if (err) {
+- if (info) {
+- struct ip_tunnel_info *unclone;
+- struct in6_addr src, dst;
+-
+- unclone = skb_tunnel_info_unclone(skb);
+- if (unlikely(!unclone))
+- goto tx_error;
+-
+- src = remote_ip.sin6.sin6_addr;
+- dst = local_ip.sin6.sin6_addr;
+- unclone->key.u.ipv6.src = src;
+- unclone->key.u.ipv6.dst = dst;
+- }
+-
+- vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+- dst_release(ndst);
+- goto out_unlock;
+- }
+-
+- tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
+- ttl = ttl ? : ip6_dst_hoplimit(ndst);
+- skb_scrub_packet(skb, xnet);
+- err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
+- vni, md, flags, udp_sum);
+- if (err < 0)
+- goto tx_error;
+-
+- udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
+- &local_ip.sin6.sin6_addr,
+- &dst->sin6.sin6_addr, tos, ttl,
+- label, src_port, dst_port, !udp_sum);
+-#endif
+- }
+-out_unlock:
+- rcu_read_unlock();
+- return;
+-
+-drop:
+- dev->stats.tx_dropped++;
+- dev_kfree_skb(skb);
+- return;
+-
+-tx_error:
+- rcu_read_unlock();
+- if (err == -ELOOP)
+- dev->stats.collisions++;
+- else if (err == -ENETUNREACH)
+- dev->stats.tx_carrier_errors++;
+- dst_release(ndst);
+- dev->stats.tx_errors++;
+- kfree_skb(skb);
+-}
+-
+-static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
+- struct vxlan_fdb *f, __be32 vni, bool did_rsc)
+-{
+- struct vxlan_rdst nh_rdst;
+- struct nexthop *nh;
+- bool do_xmit;
+- u32 hash;
+-
+- memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
+- hash = skb_get_hash(skb);
+-
+- rcu_read_lock();
+- nh = rcu_dereference(f->nh);
+- if (!nh) {
+- rcu_read_unlock();
+- goto drop;
+- }
+- do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
+- rcu_read_unlock();
+-
+- if (likely(do_xmit))
+- vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
+- else
+- goto drop;
+-
+- return;
+-
+-drop:
+- dev->stats.tx_dropped++;
+- dev_kfree_skb(skb);
+-}
+-
+-/* Transmit local packets over Vxlan
+- *
+- * Outer IP header inherits ECN and DF from inner header.
+- * Outer UDP destination is the VXLAN assigned port.
+- * source port is based on hash of flow
+- */
+-static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_rdst *rdst, *fdst = NULL;
+- const struct ip_tunnel_info *info;
+- bool did_rsc = false;
+- struct vxlan_fdb *f;
+- struct ethhdr *eth;
+- __be32 vni = 0;
+-
+- info = skb_tunnel_info(skb);
+-
+- skb_reset_mac_header(skb);
+-
+- if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
+- if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
+- info->mode & IP_TUNNEL_INFO_TX) {
+- vni = tunnel_id_to_key32(info->key.tun_id);
+- } else {
+- if (info && info->mode & IP_TUNNEL_INFO_TX)
+- vxlan_xmit_one(skb, dev, vni, NULL, false);
+- else
+- kfree_skb(skb);
+- return NETDEV_TX_OK;
+- }
+- }
+-
+- if (vxlan->cfg.flags & VXLAN_F_PROXY) {
+- eth = eth_hdr(skb);
+- if (ntohs(eth->h_proto) == ETH_P_ARP)
+- return arp_reduce(dev, skb, vni);
+-#if IS_ENABLED(CONFIG_IPV6)
+- else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
+- pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+- sizeof(struct nd_msg)) &&
+- ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+- struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1);
+-
+- if (m->icmph.icmp6_code == 0 &&
+- m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
+- return neigh_reduce(dev, skb, vni);
+- }
+-#endif
+- }
+-
+- eth = eth_hdr(skb);
+- f = vxlan_find_mac(vxlan, eth->h_dest, vni);
+- did_rsc = false;
+-
+- if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) &&
+- (ntohs(eth->h_proto) == ETH_P_IP ||
+- ntohs(eth->h_proto) == ETH_P_IPV6)) {
+- did_rsc = route_shortcircuit(dev, skb);
+- if (did_rsc)
+- f = vxlan_find_mac(vxlan, eth->h_dest, vni);
+- }
+-
+- if (f == NULL) {
+- f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
+- if (f == NULL) {
+- if ((vxlan->cfg.flags & VXLAN_F_L2MISS) &&
+- !is_multicast_ether_addr(eth->h_dest))
+- vxlan_fdb_miss(vxlan, eth->h_dest);
+-
+- dev->stats.tx_dropped++;
+- kfree_skb(skb);
+- return NETDEV_TX_OK;
+- }
+- }
+-
+- if (rcu_access_pointer(f->nh)) {
+- vxlan_xmit_nh(skb, dev, f,
+- (vni ? : vxlan->default_dst.remote_vni), did_rsc);
+- } else {
+- list_for_each_entry_rcu(rdst, &f->remotes, list) {
+- struct sk_buff *skb1;
+-
+- if (!fdst) {
+- fdst = rdst;
+- continue;
+- }
+- skb1 = skb_clone(skb, GFP_ATOMIC);
+- if (skb1)
+- vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
+- }
+- if (fdst)
+- vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
+- else
+- kfree_skb(skb);
+- }
+-
+- return NETDEV_TX_OK;
+-}
+-
+-/* Walk the forwarding table and purge stale entries */
+-static void vxlan_cleanup(struct timer_list *t)
+-{
+- struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
+- unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
+- unsigned int h;
+-
+- if (!netif_running(vxlan->dev))
+- return;
+-
+- for (h = 0; h < FDB_HASH_SIZE; ++h) {
+- struct hlist_node *p, *n;
+-
+- spin_lock(&vxlan->hash_lock[h]);
+- hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
+- struct vxlan_fdb *f
+- = container_of(p, struct vxlan_fdb, hlist);
+- unsigned long timeout;
+-
+- if (f->state & (NUD_PERMANENT | NUD_NOARP))
+- continue;
+-
+- if (f->flags & NTF_EXT_LEARNED)
+- continue;
+-
+- timeout = f->used + vxlan->cfg.age_interval * HZ;
+- if (time_before_eq(timeout, jiffies)) {
+- netdev_dbg(vxlan->dev,
+- "garbage collect %pM\n",
+- f->eth_addr);
+- f->state = NUD_STALE;
+- vxlan_fdb_destroy(vxlan, f, true, true);
+- } else if (time_before(timeout, next_timer))
+- next_timer = timeout;
+- }
+- spin_unlock(&vxlan->hash_lock[h]);
+- }
+-
+- mod_timer(&vxlan->age_timer, next_timer);
+-}
+-
+-static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
+-{
+- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+-
+- spin_lock(&vn->sock_lock);
+- hlist_del_init_rcu(&vxlan->hlist4.hlist);
+-#if IS_ENABLED(CONFIG_IPV6)
+- hlist_del_init_rcu(&vxlan->hlist6.hlist);
+-#endif
+- spin_unlock(&vn->sock_lock);
+-}
+-
+-static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
+- struct vxlan_dev_node *node)
+-{
+- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+- __be32 vni = vxlan->default_dst.remote_vni;
+-
+- node->vxlan = vxlan;
+- spin_lock(&vn->sock_lock);
+- hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
+- spin_unlock(&vn->sock_lock);
+-}
+-
+-/* Setup stats when device is created */
+-static int vxlan_init(struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- int err;
+-
+- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+- if (!dev->tstats)
+- return -ENOMEM;
+-
+- err = gro_cells_init(&vxlan->gro_cells, dev);
+- if (err) {
+- free_percpu(dev->tstats);
+- return err;
+- }
+-
+- return 0;
+-}
+-
+-static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
+-{
+- struct vxlan_fdb *f;
+- u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
+-
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+- f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
+- if (f)
+- vxlan_fdb_destroy(vxlan, f, true, true);
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+-}
+-
+-static void vxlan_uninit(struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+-
+- gro_cells_destroy(&vxlan->gro_cells);
+-
+- vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
+-
+- free_percpu(dev->tstats);
+-}
+-
+-/* Start ageing timer and join group when device is brought up */
+-static int vxlan_open(struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- int ret;
+-
+- ret = vxlan_sock_add(vxlan);
+- if (ret < 0)
+- return ret;
+-
+- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
+- ret = vxlan_igmp_join(vxlan);
+- if (ret == -EADDRINUSE)
+- ret = 0;
+- if (ret) {
+- vxlan_sock_release(vxlan);
+- return ret;
+- }
+- }
+-
+- if (vxlan->cfg.age_interval)
+- mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
+-
+- return ret;
+-}
+-
+-/* Purge the forwarding table */
+-static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
+-{
+- unsigned int h;
+-
+- for (h = 0; h < FDB_HASH_SIZE; ++h) {
+- struct hlist_node *p, *n;
+-
+- spin_lock_bh(&vxlan->hash_lock[h]);
+- hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
+- struct vxlan_fdb *f
+- = container_of(p, struct vxlan_fdb, hlist);
+- if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
+- continue;
+- /* the all_zeros_mac entry is deleted at vxlan_uninit */
+- if (is_zero_ether_addr(f->eth_addr) &&
+- f->vni == vxlan->cfg.vni)
+- continue;
+- vxlan_fdb_destroy(vxlan, f, true, true);
+- }
+- spin_unlock_bh(&vxlan->hash_lock[h]);
+- }
+-}
+-
+-/* Cleanup timer and forwarding table on shutdown */
+-static int vxlan_stop(struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+- int ret = 0;
+-
+- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
+- !vxlan_group_used(vn, vxlan))
+- ret = vxlan_igmp_leave(vxlan);
+-
+- del_timer_sync(&vxlan->age_timer);
+-
+- vxlan_flush(vxlan, false);
+- vxlan_sock_release(vxlan);
+-
+- return ret;
+-}
+-
+-/* Stub, nothing needs to be done. */
+-static void vxlan_set_multicast_list(struct net_device *dev)
+-{
+-}
+-
+-static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_rdst *dst = &vxlan->default_dst;
+- struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+- dst->remote_ifindex);
+- bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);
+-
+- /* This check is different than dev->max_mtu, because it looks at
+- * the lowerdev->mtu, rather than the static dev->max_mtu
+- */
+- if (lowerdev) {
+- int max_mtu = lowerdev->mtu -
+- (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+- if (new_mtu > max_mtu)
+- return -EINVAL;
+- }
+-
+- dev->mtu = new_mtu;
+- return 0;
+-}
+-
+-static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct ip_tunnel_info *info = skb_tunnel_info(skb);
+- __be16 sport, dport;
+-
+- sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+- vxlan->cfg.port_max, true);
+- dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
+-
+- if (ip_tunnel_info_af(info) == AF_INET) {
+- struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+- struct rtable *rt;
+-
+- rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
+- info->key.u.ipv4.dst,
+- &info->key.u.ipv4.src, dport, sport,
+- &info->dst_cache, info);
+- if (IS_ERR(rt))
+- return PTR_ERR(rt);
+- ip_rt_put(rt);
+- } else {
+-#if IS_ENABLED(CONFIG_IPV6)
+- struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
+- struct dst_entry *ndst;
+-
+- ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
+- info->key.label, &info->key.u.ipv6.dst,
+- &info->key.u.ipv6.src, dport, sport,
+- &info->dst_cache, info);
+- if (IS_ERR(ndst))
+- return PTR_ERR(ndst);
+- dst_release(ndst);
+-#else /* !CONFIG_IPV6 */
+- return -EPFNOSUPPORT;
+-#endif
+- }
+- info->key.tp_src = sport;
+- info->key.tp_dst = dport;
+- return 0;
+-}
+-
+-static const struct net_device_ops vxlan_netdev_ether_ops = {
+- .ndo_init = vxlan_init,
+- .ndo_uninit = vxlan_uninit,
+- .ndo_open = vxlan_open,
+- .ndo_stop = vxlan_stop,
+- .ndo_start_xmit = vxlan_xmit,
+- .ndo_get_stats64 = dev_get_tstats64,
+- .ndo_set_rx_mode = vxlan_set_multicast_list,
+- .ndo_change_mtu = vxlan_change_mtu,
+- .ndo_validate_addr = eth_validate_addr,
+- .ndo_set_mac_address = eth_mac_addr,
+- .ndo_fdb_add = vxlan_fdb_add,
+- .ndo_fdb_del = vxlan_fdb_delete,
+- .ndo_fdb_dump = vxlan_fdb_dump,
+- .ndo_fdb_get = vxlan_fdb_get,
+- .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
+- .ndo_change_proto_down = dev_change_proto_down_generic,
+-};
+-
+-static const struct net_device_ops vxlan_netdev_raw_ops = {
+- .ndo_init = vxlan_init,
+- .ndo_uninit = vxlan_uninit,
+- .ndo_open = vxlan_open,
+- .ndo_stop = vxlan_stop,
+- .ndo_start_xmit = vxlan_xmit,
+- .ndo_get_stats64 = dev_get_tstats64,
+- .ndo_change_mtu = vxlan_change_mtu,
+- .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
+-};
+-
+-/* Info for udev, that this is a virtual tunnel endpoint */
+-static struct device_type vxlan_type = {
+- .name = "vxlan",
+-};
+-
+-/* Calls the ndo_udp_tunnel_add of the caller in order to
+- * supply the listening VXLAN udp ports. Callers are expected
+- * to implement the ndo_udp_tunnel_add.
+- */
+-static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
+-{
+- struct vxlan_sock *vs;
+- struct net *net = dev_net(dev);
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+- unsigned int i;
+-
+- spin_lock(&vn->sock_lock);
+- for (i = 0; i < PORT_HASH_SIZE; ++i) {
+- hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
+- unsigned short type;
+-
+- if (vs->flags & VXLAN_F_GPE)
+- type = UDP_TUNNEL_TYPE_VXLAN_GPE;
+- else
+- type = UDP_TUNNEL_TYPE_VXLAN;
+-
+- if (push)
+- udp_tunnel_push_rx_port(dev, vs->sock, type);
+- else
+- udp_tunnel_drop_rx_port(dev, vs->sock, type);
+- }
+- }
+- spin_unlock(&vn->sock_lock);
+-}
+-
+-/* Initialize the device structure. */
+-static void vxlan_setup(struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- unsigned int h;
+-
+- eth_hw_addr_random(dev);
+- ether_setup(dev);
+-
+- dev->needs_free_netdev = true;
+- SET_NETDEV_DEVTYPE(dev, &vxlan_type);
+-
+- dev->features |= NETIF_F_LLTX;
+- dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
+- dev->features |= NETIF_F_RXCSUM;
+- dev->features |= NETIF_F_GSO_SOFTWARE;
+-
+- dev->vlan_features = dev->features;
+- dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
+- dev->hw_features |= NETIF_F_RXCSUM;
+- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+- netif_keep_dst(dev);
+- dev->priv_flags |= IFF_NO_QUEUE;
+-
+- /* MTU range: 68 - 65535 */
+- dev->min_mtu = ETH_MIN_MTU;
+- dev->max_mtu = ETH_MAX_MTU;
+-
+- INIT_LIST_HEAD(&vxlan->next);
+-
+- timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
+-
+- vxlan->dev = dev;
+-
+- for (h = 0; h < FDB_HASH_SIZE; ++h) {
+- spin_lock_init(&vxlan->hash_lock[h]);
+- INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
+- }
+-}
+-
+-static void vxlan_ether_setup(struct net_device *dev)
+-{
+- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+- dev->netdev_ops = &vxlan_netdev_ether_ops;
+-}
+-
+-static void vxlan_raw_setup(struct net_device *dev)
+-{
+- dev->header_ops = NULL;
+- dev->type = ARPHRD_NONE;
+- dev->hard_header_len = 0;
+- dev->addr_len = 0;
+- dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+- dev->netdev_ops = &vxlan_netdev_raw_ops;
+-}
+-
+-static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
+- [IFLA_VXLAN_ID] = { .type = NLA_U32 },
+- [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) },
+- [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
+- [IFLA_VXLAN_LINK] = { .type = NLA_U32 },
+- [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
+- [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
+- [IFLA_VXLAN_TOS] = { .type = NLA_U8 },
+- [IFLA_VXLAN_TTL] = { .type = NLA_U8 },
+- [IFLA_VXLAN_LABEL] = { .type = NLA_U32 },
+- [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 },
+- [IFLA_VXLAN_AGEING] = { .type = NLA_U32 },
+- [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 },
+- [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct ifla_vxlan_port_range) },
+- [IFLA_VXLAN_PROXY] = { .type = NLA_U8 },
+- [IFLA_VXLAN_RSC] = { .type = NLA_U8 },
+- [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 },
+- [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 },
+- [IFLA_VXLAN_COLLECT_METADATA] = { .type = NLA_U8 },
+- [IFLA_VXLAN_PORT] = { .type = NLA_U16 },
+- [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 },
+- [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
+- [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
+- [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
+- [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
+- [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
+- [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
+- [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
+- [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
+- [IFLA_VXLAN_DF] = { .type = NLA_U8 },
+-};
+-
+-static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
+- struct netlink_ext_ack *extack)
+-{
+- if (tb[IFLA_ADDRESS]) {
+- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+- "Provided link layer address is not Ethernet");
+- return -EINVAL;
+- }
+-
+- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+- "Provided Ethernet address is not unicast");
+- return -EADDRNOTAVAIL;
+- }
+- }
+-
+- if (tb[IFLA_MTU]) {
+- u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+-
+- if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+- "MTU must be between 68 and 65535");
+- return -EINVAL;
+- }
+- }
+-
+- if (!data) {
+- NL_SET_ERR_MSG(extack,
+- "Required attributes not provided to perform the operation");
+- return -EINVAL;
+- }
+-
+- if (data[IFLA_VXLAN_ID]) {
+- u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
+-
+- if (id >= VXLAN_N_VID) {
+- NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_ID],
+- "VXLAN ID must be lower than 16777216");
+- return -ERANGE;
+- }
+- }
+-
+- if (data[IFLA_VXLAN_PORT_RANGE]) {
+- const struct ifla_vxlan_port_range *p
+- = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
+-
+- if (ntohs(p->high) < ntohs(p->low)) {
+- NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_PORT_RANGE],
+- "Invalid source port range");
+- return -EINVAL;
+- }
+- }
+-
+- if (data[IFLA_VXLAN_DF]) {
+- enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
+-
+- if (df < 0 || df > VXLAN_DF_MAX) {
+- NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_DF],
+- "Invalid DF attribute");
+- return -EINVAL;
+- }
+- }
+-
+- return 0;
+-}
+-
+-static void vxlan_get_drvinfo(struct net_device *netdev,
+- struct ethtool_drvinfo *drvinfo)
+-{
+- strlcpy(drvinfo->version, VXLAN_VERSION, sizeof(drvinfo->version));
+- strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
+-}
+-
+-static int vxlan_get_link_ksettings(struct net_device *dev,
+- struct ethtool_link_ksettings *cmd)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_rdst *dst = &vxlan->default_dst;
+- struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+- dst->remote_ifindex);
+-
+- if (!lowerdev) {
+- cmd->base.duplex = DUPLEX_UNKNOWN;
+- cmd->base.port = PORT_OTHER;
+- cmd->base.speed = SPEED_UNKNOWN;
+-
+- return 0;
+- }
+-
+- return __ethtool_get_link_ksettings(lowerdev, cmd);
+-}
+-
+-static const struct ethtool_ops vxlan_ethtool_ops = {
+- .get_drvinfo = vxlan_get_drvinfo,
+- .get_link = ethtool_op_get_link,
+- .get_link_ksettings = vxlan_get_link_ksettings,
+-};
+-
+-static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
+- __be16 port, u32 flags, int ifindex)
+-{
+- struct socket *sock;
+- struct udp_port_cfg udp_conf;
+- int err;
+-
+- memset(&udp_conf, 0, sizeof(udp_conf));
+-
+- if (ipv6) {
+- udp_conf.family = AF_INET6;
+- udp_conf.use_udp6_rx_checksums =
+- !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
+- udp_conf.ipv6_v6only = 1;
+- } else {
+- udp_conf.family = AF_INET;
+- }
+-
+- udp_conf.local_udp_port = port;
+- udp_conf.bind_ifindex = ifindex;
+-
+- /* Open UDP socket */
+- err = udp_sock_create(net, &udp_conf, &sock);
+- if (err < 0)
+- return ERR_PTR(err);
+-
+- udp_allow_gso(sock->sk);
+- return sock;
+-}
+-
+-/* Create new listen socket if needed */
+-static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
+- __be16 port, u32 flags,
+- int ifindex)
+-{
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+- struct vxlan_sock *vs;
+- struct socket *sock;
+- unsigned int h;
+- struct udp_tunnel_sock_cfg tunnel_cfg;
+-
+- vs = kzalloc(sizeof(*vs), GFP_KERNEL);
+- if (!vs)
+- return ERR_PTR(-ENOMEM);
+-
+- for (h = 0; h < VNI_HASH_SIZE; ++h)
+- INIT_HLIST_HEAD(&vs->vni_list[h]);
+-
+- sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
+- if (IS_ERR(sock)) {
+- kfree(vs);
+- return ERR_CAST(sock);
+- }
+-
+- vs->sock = sock;
+- refcount_set(&vs->refcnt, 1);
+- vs->flags = (flags & VXLAN_F_RCV_FLAGS);
+-
+- spin_lock(&vn->sock_lock);
+- hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
+- udp_tunnel_notify_add_rx_port(sock,
+- (vs->flags & VXLAN_F_GPE) ?
+- UDP_TUNNEL_TYPE_VXLAN_GPE :
+- UDP_TUNNEL_TYPE_VXLAN);
+- spin_unlock(&vn->sock_lock);
+-
+- /* Mark socket as an encapsulation socket. */
+- memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
+- tunnel_cfg.sk_user_data = vs;
+- tunnel_cfg.encap_type = 1;
+- tunnel_cfg.encap_rcv = vxlan_rcv;
+- tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
+- tunnel_cfg.encap_destroy = NULL;
+- tunnel_cfg.gro_receive = vxlan_gro_receive;
+- tunnel_cfg.gro_complete = vxlan_gro_complete;
+-
+- setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
+-
+- return vs;
+-}
+-
+-static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
+-{
+- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+- struct vxlan_sock *vs = NULL;
+- struct vxlan_dev_node *node;
+- int l3mdev_index = 0;
+-
+- if (vxlan->cfg.remote_ifindex)
+- l3mdev_index = l3mdev_master_upper_ifindex_by_index(
+- vxlan->net, vxlan->cfg.remote_ifindex);
+-
+- if (!vxlan->cfg.no_share) {
+- spin_lock(&vn->sock_lock);
+- vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
+- vxlan->cfg.dst_port, vxlan->cfg.flags,
+- l3mdev_index);
+- if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
+- spin_unlock(&vn->sock_lock);
+- return -EBUSY;
+- }
+- spin_unlock(&vn->sock_lock);
+- }
+- if (!vs)
+- vs = vxlan_socket_create(vxlan->net, ipv6,
+- vxlan->cfg.dst_port, vxlan->cfg.flags,
+- l3mdev_index);
+- if (IS_ERR(vs))
+- return PTR_ERR(vs);
+-#if IS_ENABLED(CONFIG_IPV6)
+- if (ipv6) {
+- rcu_assign_pointer(vxlan->vn6_sock, vs);
+- node = &vxlan->hlist6;
+- } else
+-#endif
+- {
+- rcu_assign_pointer(vxlan->vn4_sock, vs);
+- node = &vxlan->hlist4;
+- }
+- vxlan_vs_add_dev(vs, vxlan, node);
+- return 0;
+-}
+-
+-static int vxlan_sock_add(struct vxlan_dev *vxlan)
+-{
+- bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
+- bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata;
+- bool ipv4 = !ipv6 || metadata;
+- int ret = 0;
+-
+- RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
+-#if IS_ENABLED(CONFIG_IPV6)
+- RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
+- if (ipv6) {
+- ret = __vxlan_sock_add(vxlan, true);
+- if (ret < 0 && ret != -EAFNOSUPPORT)
+- ipv4 = false;
+- }
+-#endif
+- if (ipv4)
+- ret = __vxlan_sock_add(vxlan, false);
+- if (ret < 0)
+- vxlan_sock_release(vxlan);
+- return ret;
+-}
+-
+-static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
+- struct net_device **lower,
+- struct vxlan_dev *old,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
+- struct vxlan_dev *tmp;
+- bool use_ipv6 = false;
+-
+- if (conf->flags & VXLAN_F_GPE) {
+- /* For now, allow GPE only together with
+- * COLLECT_METADATA. This can be relaxed later; in such
+- * case, the other side of the PtP link will have to be
+- * provided.
+- */
+- if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
+- !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+- NL_SET_ERR_MSG(extack,
+- "VXLAN GPE does not support this combination of attributes");
+- return -EINVAL;
+- }
+- }
+-
+- if (!conf->remote_ip.sa.sa_family && !conf->saddr.sa.sa_family) {
+- /* Unless IPv6 is explicitly requested, assume IPv4 */
+- conf->remote_ip.sa.sa_family = AF_INET;
+- conf->saddr.sa.sa_family = AF_INET;
+- } else if (!conf->remote_ip.sa.sa_family) {
+- conf->remote_ip.sa.sa_family = conf->saddr.sa.sa_family;
+- } else if (!conf->saddr.sa.sa_family) {
+- conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family;
+- }
+-
+- if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family) {
+- NL_SET_ERR_MSG(extack,
+- "Local and remote address must be from the same family");
+- return -EINVAL;
+- }
+-
+- if (vxlan_addr_multicast(&conf->saddr)) {
+- NL_SET_ERR_MSG(extack, "Local address cannot be multicast");
+- return -EINVAL;
+- }
+-
+- if (conf->saddr.sa.sa_family == AF_INET6) {
+- if (!IS_ENABLED(CONFIG_IPV6)) {
+- NL_SET_ERR_MSG(extack,
+- "IPv6 support not enabled in the kernel");
+- return -EPFNOSUPPORT;
+- }
+- use_ipv6 = true;
+- conf->flags |= VXLAN_F_IPV6;
+-
+- if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+- int local_type =
+- ipv6_addr_type(&conf->saddr.sin6.sin6_addr);
+- int remote_type =
+- ipv6_addr_type(&conf->remote_ip.sin6.sin6_addr);
+-
+- if (local_type & IPV6_ADDR_LINKLOCAL) {
+- if (!(remote_type & IPV6_ADDR_LINKLOCAL) &&
+- (remote_type != IPV6_ADDR_ANY)) {
+- NL_SET_ERR_MSG(extack,
+- "Invalid combination of local and remote address scopes");
+- return -EINVAL;
+- }
+-
+- conf->flags |= VXLAN_F_IPV6_LINKLOCAL;
+- } else {
+- if (remote_type ==
+- (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL)) {
+- NL_SET_ERR_MSG(extack,
+- "Invalid combination of local and remote address scopes");
+- return -EINVAL;
+- }
+-
+- conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL;
+- }
+- }
+- }
+-
+- if (conf->label && !use_ipv6) {
+- NL_SET_ERR_MSG(extack,
+- "Label attribute only applies to IPv6 VXLAN devices");
+- return -EINVAL;
+- }
+-
+- if (conf->remote_ifindex) {
+- struct net_device *lowerdev;
+-
+- lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
+- if (!lowerdev) {
+- NL_SET_ERR_MSG(extack,
+- "Invalid local interface, device not found");
+- return -ENODEV;
+- }
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+- if (use_ipv6) {
+- struct inet6_dev *idev = __in6_dev_get(lowerdev);
+-
+- if (idev && idev->cnf.disable_ipv6) {
+- NL_SET_ERR_MSG(extack,
+- "IPv6 support disabled by administrator");
+- return -EPERM;
+- }
+- }
+-#endif
+-
+- *lower = lowerdev;
+- } else {
+- if (vxlan_addr_multicast(&conf->remote_ip)) {
+- NL_SET_ERR_MSG(extack,
+- "Local interface required for multicast remote destination");
+-
+- return -EINVAL;
+- }
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+- if (conf->flags & VXLAN_F_IPV6_LINKLOCAL) {
+- NL_SET_ERR_MSG(extack,
+- "Local interface required for link-local local/remote addresses");
+- return -EINVAL;
+- }
+-#endif
+-
+- *lower = NULL;
+- }
+-
+- if (!conf->dst_port) {
+- if (conf->flags & VXLAN_F_GPE)
+- conf->dst_port = htons(4790); /* IANA VXLAN-GPE port */
+- else
+- conf->dst_port = htons(vxlan_port);
+- }
+-
+- if (!conf->age_interval)
+- conf->age_interval = FDB_AGE_DEFAULT;
+-
+- list_for_each_entry(tmp, &vn->vxlan_list, next) {
+- if (tmp == old)
+- continue;
+-
+- if (tmp->cfg.vni != conf->vni)
+- continue;
+- if (tmp->cfg.dst_port != conf->dst_port)
+- continue;
+- if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
+- (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)))
+- continue;
+-
+- if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) &&
+- tmp->cfg.remote_ifindex != conf->remote_ifindex)
+- continue;
+-
+- NL_SET_ERR_MSG(extack,
+- "A VXLAN device with the specified VNI already exists");
+- return -EEXIST;
+- }
+-
+- return 0;
+-}
+-
+-static void vxlan_config_apply(struct net_device *dev,
+- struct vxlan_config *conf,
+- struct net_device *lowerdev,
+- struct net *src_net,
+- bool changelink)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_rdst *dst = &vxlan->default_dst;
+- unsigned short needed_headroom = ETH_HLEN;
+- bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6);
+- int max_mtu = ETH_MAX_MTU;
+-
+- if (!changelink) {
+- if (conf->flags & VXLAN_F_GPE)
+- vxlan_raw_setup(dev);
+- else
+- vxlan_ether_setup(dev);
+-
+- if (conf->mtu)
+- dev->mtu = conf->mtu;
+-
+- vxlan->net = src_net;
+- }
+-
+- dst->remote_vni = conf->vni;
+-
+- memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
+-
+- if (lowerdev) {
+- dst->remote_ifindex = conf->remote_ifindex;
+-
+- dev->gso_max_size = lowerdev->gso_max_size;
+- dev->gso_max_segs = lowerdev->gso_max_segs;
+-
+- needed_headroom = lowerdev->hard_header_len;
+- needed_headroom += lowerdev->needed_headroom;
+-
+- dev->needed_tailroom = lowerdev->needed_tailroom;
+-
+- max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
+- VXLAN_HEADROOM);
+- if (max_mtu < ETH_MIN_MTU)
+- max_mtu = ETH_MIN_MTU;
+-
+- if (!changelink && !conf->mtu)
+- dev->mtu = max_mtu;
+- }
+-
+- if (dev->mtu > max_mtu)
+- dev->mtu = max_mtu;
+-
+- if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
+- needed_headroom += VXLAN6_HEADROOM;
+- else
+- needed_headroom += VXLAN_HEADROOM;
+- dev->needed_headroom = needed_headroom;
+-
+- memcpy(&vxlan->cfg, conf, sizeof(*conf));
+-}
+-
+-static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
+- struct vxlan_config *conf, bool changelink,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct net_device *lowerdev;
+- int ret;
+-
+- ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan, extack);
+- if (ret)
+- return ret;
+-
+- vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
+-
+- return 0;
+-}
+-
+-static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+- struct vxlan_config *conf,
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct net_device *remote_dev = NULL;
+- struct vxlan_fdb *f = NULL;
+- bool unregister = false;
+- struct vxlan_rdst *dst;
+- int err;
+-
+- dst = &vxlan->default_dst;
+- err = vxlan_dev_configure(net, dev, conf, false, extack);
+- if (err)
+- return err;
+-
+- dev->ethtool_ops = &vxlan_ethtool_ops;
+-
+- /* create an fdb entry for a valid default destination */
+- if (!vxlan_addr_any(&dst->remote_ip)) {
+- err = vxlan_fdb_create(vxlan, all_zeros_mac,
+- &dst->remote_ip,
+- NUD_REACHABLE | NUD_PERMANENT,
+- vxlan->cfg.dst_port,
+- dst->remote_vni,
+- dst->remote_vni,
+- dst->remote_ifindex,
+- NTF_SELF, 0, &f, extack);
+- if (err)
+- return err;
+- }
+-
+- err = register_netdevice(dev);
+- if (err)
+- goto errout;
+- unregister = true;
+-
+- if (dst->remote_ifindex) {
+- remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
+- if (!remote_dev) {
+- err = -ENODEV;
+- goto errout;
+- }
+-
+- err = netdev_upper_dev_link(remote_dev, dev, extack);
+- if (err)
+- goto errout;
+- }
+-
+- err = rtnl_configure_link(dev, NULL);
+- if (err < 0)
+- goto unlink;
+-
+- if (f) {
+- vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
+-
+- /* notify default fdb entry */
+- err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
+- RTM_NEWNEIGH, true, extack);
+- if (err) {
+- vxlan_fdb_destroy(vxlan, f, false, false);
+- if (remote_dev)
+- netdev_upper_dev_unlink(remote_dev, dev);
+- goto unregister;
+- }
+- }
+-
+- list_add(&vxlan->next, &vn->vxlan_list);
+- if (remote_dev)
+- dst->remote_dev = remote_dev;
+- return 0;
+-unlink:
+- if (remote_dev)
+- netdev_upper_dev_unlink(remote_dev, dev);
+-errout:
+- /* unregister_netdevice() destroys the default FDB entry with deletion
+- * notification. But the addition notification was not sent yet, so
+- * destroy the entry by hand here.
+- */
+- if (f)
+- __vxlan_fdb_free(f);
+-unregister:
+- if (unregister)
+- unregister_netdevice(dev);
+- return err;
+-}
+-
+-/* Set/clear flags based on attribute */
+-static int vxlan_nl2flag(struct vxlan_config *conf, struct nlattr *tb[],
+- int attrtype, unsigned long mask, bool changelink,
+- bool changelink_supported,
+- struct netlink_ext_ack *extack)
+-{
+- unsigned long flags;
+-
+- if (!tb[attrtype])
+- return 0;
+-
+- if (changelink && !changelink_supported) {
+- vxlan_flag_attr_error(attrtype, extack);
+- return -EOPNOTSUPP;
+- }
+-
+- if (vxlan_policy[attrtype].type == NLA_FLAG)
+- flags = conf->flags | mask;
+- else if (nla_get_u8(tb[attrtype]))
+- flags = conf->flags | mask;
+- else
+- flags = conf->flags & ~mask;
+-
+- conf->flags = flags;
+-
+- return 0;
+-}
+-
+-static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
+- struct net_device *dev, struct vxlan_config *conf,
+- bool changelink, struct netlink_ext_ack *extack)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- int err = 0;
+-
+- memset(conf, 0, sizeof(*conf));
+-
+- /* if changelink operation, start with old existing cfg */
+- if (changelink)
+- memcpy(conf, &vxlan->cfg, sizeof(*conf));
+-
+- if (data[IFLA_VXLAN_ID]) {
+- __be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
+-
+- if (changelink && (vni != conf->vni)) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], "Cannot change VNI");
+- return -EOPNOTSUPP;
+- }
+- conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
+- }
+-
+- if (data[IFLA_VXLAN_GROUP]) {
+- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
+- return -EOPNOTSUPP;
+- }
+-
+- conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+- conf->remote_ip.sa.sa_family = AF_INET;
+- } else if (data[IFLA_VXLAN_GROUP6]) {
+- if (!IS_ENABLED(CONFIG_IPV6)) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
+- return -EPFNOSUPPORT;
+- }
+-
+- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "New group address family does not match old group");
+- return -EOPNOTSUPP;
+- }
+-
+- conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
+- conf->remote_ip.sa.sa_family = AF_INET6;
+- }
+-
+- if (data[IFLA_VXLAN_LOCAL]) {
+- if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
+- return -EOPNOTSUPP;
+- }
+-
+- conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
+- conf->saddr.sa.sa_family = AF_INET;
+- } else if (data[IFLA_VXLAN_LOCAL6]) {
+- if (!IS_ENABLED(CONFIG_IPV6)) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "IPv6 support not enabled in the kernel");
+- return -EPFNOSUPPORT;
+- }
+-
+- if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "New local address family does not match old");
+- return -EOPNOTSUPP;
+- }
+-
+- /* TODO: respect scope id */
+- conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
+- conf->saddr.sa.sa_family = AF_INET6;
+- }
+-
+- if (data[IFLA_VXLAN_LINK])
+- conf->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
+-
+- if (data[IFLA_VXLAN_TOS])
+- conf->tos = nla_get_u8(data[IFLA_VXLAN_TOS]);
+-
+- if (data[IFLA_VXLAN_TTL])
+- conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
+-
+- if (data[IFLA_VXLAN_TTL_INHERIT]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_TTL_INHERIT,
+- VXLAN_F_TTL_INHERIT, changelink, false,
+- extack);
+- if (err)
+- return err;
+-
+- }
+-
+- if (data[IFLA_VXLAN_LABEL])
+- conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
+- IPV6_FLOWLABEL_MASK;
+-
+- if (data[IFLA_VXLAN_LEARNING]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
+- VXLAN_F_LEARN, changelink, true,
+- extack);
+- if (err)
+- return err;
+- } else if (!changelink) {
+- /* default to learn on a new device */
+- conf->flags |= VXLAN_F_LEARN;
+- }
+-
+- if (data[IFLA_VXLAN_AGEING])
+- conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
+-
+- if (data[IFLA_VXLAN_PROXY]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_PROXY,
+- VXLAN_F_PROXY, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_RSC]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_RSC,
+- VXLAN_F_RSC, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_L2MISS]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L2MISS,
+- VXLAN_F_L2MISS, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_L3MISS]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L3MISS,
+- VXLAN_F_L3MISS, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_LIMIT]) {
+- if (changelink) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LIMIT],
+- "Cannot change limit");
+- return -EOPNOTSUPP;
+- }
+- conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
+- }
+-
+- if (data[IFLA_VXLAN_COLLECT_METADATA]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_COLLECT_METADATA,
+- VXLAN_F_COLLECT_METADATA, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_PORT_RANGE]) {
+- if (!changelink) {
+- const struct ifla_vxlan_port_range *p
+- = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
+- conf->port_min = ntohs(p->low);
+- conf->port_max = ntohs(p->high);
+- } else {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
+- "Cannot change port range");
+- return -EOPNOTSUPP;
+- }
+- }
+-
+- if (data[IFLA_VXLAN_PORT]) {
+- if (changelink) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT],
+- "Cannot change port");
+- return -EOPNOTSUPP;
+- }
+- conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
+- }
+-
+- if (data[IFLA_VXLAN_UDP_CSUM]) {
+- if (changelink) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_UDP_CSUM],
+- "Cannot change UDP_CSUM flag");
+- return -EOPNOTSUPP;
+- }
+- if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+- conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
+- }
+-
+- if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+- VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
+- false, extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+- VXLAN_F_UDP_ZERO_CSUM6_RX, changelink,
+- false, extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_REMCSUM_TX]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_TX,
+- VXLAN_F_REMCSUM_TX, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_REMCSUM_RX]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_RX,
+- VXLAN_F_REMCSUM_RX, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_GBP]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GBP,
+- VXLAN_F_GBP, changelink, false, extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_GPE]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GPE,
+- VXLAN_F_GPE, changelink, false,
+- extack);
+- if (err)
+- return err;
+- }
+-
+- if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
+- err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
+- VXLAN_F_REMCSUM_NOPARTIAL, changelink,
+- false, extack);
+- if (err)
+- return err;
+- }
+-
+- if (tb[IFLA_MTU]) {
+- if (changelink) {
+- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+- "Cannot change mtu");
+- return -EOPNOTSUPP;
+- }
+- conf->mtu = nla_get_u32(tb[IFLA_MTU]);
+- }
+-
+- if (data[IFLA_VXLAN_DF])
+- conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
+-
+- return 0;
+-}
+-
+-static int vxlan_newlink(struct net *src_net, struct net_device *dev,
+- struct nlattr *tb[], struct nlattr *data[],
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_config conf;
+- int err;
+-
+- err = vxlan_nl2conf(tb, data, dev, &conf, false, extack);
+- if (err)
+- return err;
+-
+- return __vxlan_dev_create(src_net, dev, &conf, extack);
+-}
+-
+-static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
+- struct nlattr *data[],
+- struct netlink_ext_ack *extack)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct net_device *lowerdev;
+- struct vxlan_config conf;
+- struct vxlan_rdst *dst;
+- int err;
+-
+- dst = &vxlan->default_dst;
+- err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
+- if (err)
+- return err;
+-
+- err = vxlan_config_validate(vxlan->net, &conf, &lowerdev,
+- vxlan, extack);
+- if (err)
+- return err;
+-
+- if (dst->remote_dev == lowerdev)
+- lowerdev = NULL;
+-
+- err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
+- extack);
+- if (err)
+- return err;
+-
+- /* handle default dst entry */
+- if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
+- u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
+-
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+- if (!vxlan_addr_any(&conf.remote_ip)) {
+- err = vxlan_fdb_update(vxlan, all_zeros_mac,
+- &conf.remote_ip,
+- NUD_REACHABLE | NUD_PERMANENT,
+- NLM_F_APPEND | NLM_F_CREATE,
+- vxlan->cfg.dst_port,
+- conf.vni, conf.vni,
+- conf.remote_ifindex,
+- NTF_SELF, 0, true, extack);
+- if (err) {
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+- netdev_adjacent_change_abort(dst->remote_dev,
+- lowerdev, dev);
+- return err;
+- }
+- }
+- if (!vxlan_addr_any(&dst->remote_ip))
+- __vxlan_fdb_delete(vxlan, all_zeros_mac,
+- dst->remote_ip,
+- vxlan->cfg.dst_port,
+- dst->remote_vni,
+- dst->remote_vni,
+- dst->remote_ifindex,
+- true);
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+- }
+-
+- if (conf.age_interval != vxlan->cfg.age_interval)
+- mod_timer(&vxlan->age_timer, jiffies);
+-
+- netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
+- if (lowerdev && lowerdev != dst->remote_dev)
+- dst->remote_dev = lowerdev;
+- vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
+- return 0;
+-}
+-
+-static void vxlan_dellink(struct net_device *dev, struct list_head *head)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+-
+- vxlan_flush(vxlan, true);
+-
+- list_del(&vxlan->next);
+- unregister_netdevice_queue(dev, head);
+- if (vxlan->default_dst.remote_dev)
+- netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
+-}
+-
+-static size_t vxlan_get_size(const struct net_device *dev)
+-{
+-
+- return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */
+- nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
+- nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
+- nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
+- nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA */
+- nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
+- nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
+- nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
+- nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
+- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
+- 0;
+-}
+-
+-static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
+-{
+- const struct vxlan_dev *vxlan = netdev_priv(dev);
+- const struct vxlan_rdst *dst = &vxlan->default_dst;
+- struct ifla_vxlan_port_range ports = {
+- .low = htons(vxlan->cfg.port_min),
+- .high = htons(vxlan->cfg.port_max),
+- };
+-
+- if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
+- goto nla_put_failure;
+-
+- if (!vxlan_addr_any(&dst->remote_ip)) {
+- if (dst->remote_ip.sa.sa_family == AF_INET) {
+- if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
+- dst->remote_ip.sin.sin_addr.s_addr))
+- goto nla_put_failure;
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
+- &dst->remote_ip.sin6.sin6_addr))
+- goto nla_put_failure;
+-#endif
+- }
+- }
+-
+- if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
+- goto nla_put_failure;
+-
+- if (!vxlan_addr_any(&vxlan->cfg.saddr)) {
+- if (vxlan->cfg.saddr.sa.sa_family == AF_INET) {
+- if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
+- vxlan->cfg.saddr.sin.sin_addr.s_addr))
+- goto nla_put_failure;
+-#if IS_ENABLED(CONFIG_IPV6)
+- } else {
+- if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
+- &vxlan->cfg.saddr.sin6.sin6_addr))
+- goto nla_put_failure;
+-#endif
+- }
+- }
+-
+- if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
+- nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
+- !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
+- nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+- nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
+- nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
+- nla_put_u8(skb, IFLA_VXLAN_LEARNING,
+- !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
+- nla_put_u8(skb, IFLA_VXLAN_PROXY,
+- !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
+- nla_put_u8(skb, IFLA_VXLAN_RSC,
+- !!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
+- nla_put_u8(skb, IFLA_VXLAN_L2MISS,
+- !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
+- nla_put_u8(skb, IFLA_VXLAN_L3MISS,
+- !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
+- nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
+- !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
+- nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
+- nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
+- nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
+- nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
+- !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
+- nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+- nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
+- nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
+- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
+- nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
+- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
+- goto nla_put_failure;
+-
+- if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
+- goto nla_put_failure;
+-
+- if (vxlan->cfg.flags & VXLAN_F_GBP &&
+- nla_put_flag(skb, IFLA_VXLAN_GBP))
+- goto nla_put_failure;
+-
+- if (vxlan->cfg.flags & VXLAN_F_GPE &&
+- nla_put_flag(skb, IFLA_VXLAN_GPE))
+- goto nla_put_failure;
+-
+- if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL &&
+- nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
+- goto nla_put_failure;
+-
+- return 0;
+-
+-nla_put_failure:
+- return -EMSGSIZE;
+-}
+-
+-static struct net *vxlan_get_link_net(const struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+-
+- return vxlan->net;
+-}
+-
+-static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
+- .kind = "vxlan",
+- .maxtype = IFLA_VXLAN_MAX,
+- .policy = vxlan_policy,
+- .priv_size = sizeof(struct vxlan_dev),
+- .setup = vxlan_setup,
+- .validate = vxlan_validate,
+- .newlink = vxlan_newlink,
+- .changelink = vxlan_changelink,
+- .dellink = vxlan_dellink,
+- .get_size = vxlan_get_size,
+- .fill_info = vxlan_fill_info,
+- .get_link_net = vxlan_get_link_net,
+-};
+-
+-struct net_device *vxlan_dev_create(struct net *net, const char *name,
+- u8 name_assign_type,
+- struct vxlan_config *conf)
+-{
+- struct nlattr *tb[IFLA_MAX + 1];
+- struct net_device *dev;
+- int err;
+-
+- memset(&tb, 0, sizeof(tb));
+-
+- dev = rtnl_create_link(net, name, name_assign_type,
+- &vxlan_link_ops, tb, NULL);
+- if (IS_ERR(dev))
+- return dev;
+-
+- err = __vxlan_dev_create(net, dev, conf, NULL);
+- if (err < 0) {
+- free_netdev(dev);
+- return ERR_PTR(err);
+- }
+-
+- err = rtnl_configure_link(dev, NULL);
+- if (err < 0) {
+- LIST_HEAD(list_kill);
+-
+- vxlan_dellink(dev, &list_kill);
+- unregister_netdevice_many(&list_kill);
+- return ERR_PTR(err);
+- }
+-
+- return dev;
+-}
+-EXPORT_SYMBOL_GPL(vxlan_dev_create);
+-
+-static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
+- struct net_device *dev)
+-{
+- struct vxlan_dev *vxlan, *next;
+- LIST_HEAD(list_kill);
+-
+- list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
+- struct vxlan_rdst *dst = &vxlan->default_dst;
+-
+- /* In case we created vxlan device with carrier
+- * and we loose the carrier due to module unload
+- * we also need to remove vxlan device. In other
+- * cases, it's not necessary and remote_ifindex
+- * is 0 here, so no matches.
+- */
+- if (dst->remote_ifindex == dev->ifindex)
+- vxlan_dellink(vxlan->dev, &list_kill);
+- }
+-
+- unregister_netdevice_many(&list_kill);
+-}
+-
+-static int vxlan_netdevice_event(struct notifier_block *unused,
+- unsigned long event, void *ptr)
+-{
+- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+-
+- if (event == NETDEV_UNREGISTER)
+- vxlan_handle_lowerdev_unregister(vn, dev);
+- else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
+- vxlan_offload_rx_ports(dev, true);
+- else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
+- vxlan_offload_rx_ports(dev, false);
+-
+- return NOTIFY_DONE;
+-}
+-
+-static struct notifier_block vxlan_notifier_block __read_mostly = {
+- .notifier_call = vxlan_netdevice_event,
+-};
+-
+-static void
+-vxlan_fdb_offloaded_set(struct net_device *dev,
+- struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_rdst *rdst;
+- struct vxlan_fdb *f;
+- u32 hash_index;
+-
+- hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
+-
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+-
+- f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+- if (!f)
+- goto out;
+-
+- rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
+- fdb_info->remote_port,
+- fdb_info->remote_vni,
+- fdb_info->remote_ifindex);
+- if (!rdst)
+- goto out;
+-
+- rdst->offloaded = fdb_info->offloaded;
+-
+-out:
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+-}
+-
+-static int
+-vxlan_fdb_external_learn_add(struct net_device *dev,
+- struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct netlink_ext_ack *extack;
+- u32 hash_index;
+- int err;
+-
+- hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
+- extack = switchdev_notifier_info_to_extack(&fdb_info->info);
+-
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+- err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
+- NUD_REACHABLE,
+- NLM_F_CREATE | NLM_F_REPLACE,
+- fdb_info->remote_port,
+- fdb_info->vni,
+- fdb_info->remote_vni,
+- fdb_info->remote_ifindex,
+- NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
+- 0, false, extack);
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+-
+- return err;
+-}
+-
+-static int
+-vxlan_fdb_external_learn_del(struct net_device *dev,
+- struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+-{
+- struct vxlan_dev *vxlan = netdev_priv(dev);
+- struct vxlan_fdb *f;
+- u32 hash_index;
+- int err = 0;
+-
+- hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+-
+- f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+- if (!f)
+- err = -ENOENT;
+- else if (f->flags & NTF_EXT_LEARNED)
+- err = __vxlan_fdb_delete(vxlan, fdb_info->eth_addr,
+- fdb_info->remote_ip,
+- fdb_info->remote_port,
+- fdb_info->vni,
+- fdb_info->remote_vni,
+- fdb_info->remote_ifindex,
+- false);
+-
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+-
+- return err;
+-}
+-
+-static int vxlan_switchdev_event(struct notifier_block *unused,
+- unsigned long event, void *ptr)
+-{
+- struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+- struct switchdev_notifier_vxlan_fdb_info *fdb_info;
+- int err = 0;
+-
+- switch (event) {
+- case SWITCHDEV_VXLAN_FDB_OFFLOADED:
+- vxlan_fdb_offloaded_set(dev, ptr);
+- break;
+- case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
+- fdb_info = ptr;
+- err = vxlan_fdb_external_learn_add(dev, fdb_info);
+- if (err) {
+- err = notifier_from_errno(err);
+- break;
+- }
+- fdb_info->offloaded = true;
+- vxlan_fdb_offloaded_set(dev, fdb_info);
+- break;
+- case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
+- fdb_info = ptr;
+- err = vxlan_fdb_external_learn_del(dev, fdb_info);
+- if (err) {
+- err = notifier_from_errno(err);
+- break;
+- }
+- fdb_info->offloaded = false;
+- vxlan_fdb_offloaded_set(dev, fdb_info);
+- break;
+- }
+-
+- return err;
+-}
+-
+-static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
+- .notifier_call = vxlan_switchdev_event,
+-};
+-
+-static void vxlan_fdb_nh_flush(struct nexthop *nh)
+-{
+- struct vxlan_fdb *fdb;
+- struct vxlan_dev *vxlan;
+- u32 hash_index;
+-
+- rcu_read_lock();
+- list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
+- vxlan = rcu_dereference(fdb->vdev);
+- WARN_ON(!vxlan);
+- hash_index = fdb_head_index(vxlan, fdb->eth_addr,
+- vxlan->default_dst.remote_vni);
+- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+- if (!hlist_unhashed(&fdb->hlist))
+- vxlan_fdb_destroy(vxlan, fdb, false, false);
+- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+- }
+- rcu_read_unlock();
+-}
+-
+-static int vxlan_nexthop_event(struct notifier_block *nb,
+- unsigned long event, void *ptr)
+-{
+- struct nh_notifier_info *info = ptr;
+- struct nexthop *nh;
+-
+- if (event != NEXTHOP_EVENT_DEL)
+- return NOTIFY_DONE;
+-
+- nh = nexthop_find_by_id(info->net, info->id);
+- if (!nh)
+- return NOTIFY_DONE;
+-
+- vxlan_fdb_nh_flush(nh);
+-
+- return NOTIFY_DONE;
+-}
+-
+-static __net_init int vxlan_init_net(struct net *net)
+-{
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+- unsigned int h;
+-
+- INIT_LIST_HEAD(&vn->vxlan_list);
+- spin_lock_init(&vn->sock_lock);
+- vn->nexthop_notifier_block.notifier_call = vxlan_nexthop_event;
+-
+- for (h = 0; h < PORT_HASH_SIZE; ++h)
+- INIT_HLIST_HEAD(&vn->sock_list[h]);
+-
+- return register_nexthop_notifier(net, &vn->nexthop_notifier_block,
+- NULL);
+-}
+-
+-static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
+-{
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+- struct vxlan_dev *vxlan, *next;
+- struct net_device *dev, *aux;
+-
+- for_each_netdev_safe(net, dev, aux)
+- if (dev->rtnl_link_ops == &vxlan_link_ops)
+- unregister_netdevice_queue(dev, head);
+-
+- list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
+- /* If vxlan->dev is in the same netns, it has already been added
+- * to the list by the previous loop.
+- */
+- if (!net_eq(dev_net(vxlan->dev), net))
+- unregister_netdevice_queue(vxlan->dev, head);
+- }
+-
+-}
+-
+-static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
+-{
+- struct net *net;
+- LIST_HEAD(list);
+- unsigned int h;
+-
+- list_for_each_entry(net, net_list, exit_list) {
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+-
+- unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
+- }
+- rtnl_lock();
+- list_for_each_entry(net, net_list, exit_list)
+- vxlan_destroy_tunnels(net, &list);
+-
+- unregister_netdevice_many(&list);
+- rtnl_unlock();
+-
+- list_for_each_entry(net, net_list, exit_list) {
+- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+-
+- for (h = 0; h < PORT_HASH_SIZE; ++h)
+- WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
+- }
+-}
+-
+-static struct pernet_operations vxlan_net_ops = {
+- .init = vxlan_init_net,
+- .exit_batch = vxlan_exit_batch_net,
+- .id = &vxlan_net_id,
+- .size = sizeof(struct vxlan_net),
+-};
+-
+-static int __init vxlan_init_module(void)
+-{
+- int rc;
+-
+- get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
+-
+- rc = register_pernet_subsys(&vxlan_net_ops);
+- if (rc)
+- goto out1;
+-
+- rc = register_netdevice_notifier(&vxlan_notifier_block);
+- if (rc)
+- goto out2;
+-
+- rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
+- if (rc)
+- goto out3;
+-
+- rc = rtnl_link_register(&vxlan_link_ops);
+- if (rc)
+- goto out4;
+-
+- return 0;
+-out4:
+- unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
+-out3:
+- unregister_netdevice_notifier(&vxlan_notifier_block);
+-out2:
+- unregister_pernet_subsys(&vxlan_net_ops);
+-out1:
+- return rc;
+-}
+-late_initcall(vxlan_init_module);
+-
+-static void __exit vxlan_cleanup_module(void)
+-{
+- rtnl_link_unregister(&vxlan_link_ops);
+- unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
+- unregister_netdevice_notifier(&vxlan_notifier_block);
+- unregister_pernet_subsys(&vxlan_net_ops);
+- /* rcu_barrier() is called by netns */
+-}
+-module_exit(vxlan_cleanup_module);
+-
+-MODULE_LICENSE("GPL");
+-MODULE_VERSION(VXLAN_VERSION);
+-MODULE_AUTHOR("Stephen Hemminger <stephen@networkplumber.org>");
+-MODULE_DESCRIPTION("Driver for VXLAN encapsulated traffic");
+-MODULE_ALIAS_RTNL_LINK("vxlan");
+diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile
+new file mode 100644
+index 0000000000000..5672661335933
+--- /dev/null
++++ b/drivers/net/vxlan/Makefile
+@@ -0,0 +1,7 @@
++#
++# Makefile for the vxlan driver
++#
++
++obj-$(CONFIG_VXLAN) += vxlan.o
++
++vxlan-objs := vxlan_core.o
+diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
+new file mode 100644
+index 0000000000000..41b1b23fdd3e9
+--- /dev/null
++++ b/drivers/net/vxlan/vxlan_core.c
+@@ -0,0 +1,4829 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * VXLAN: Virtual eXtensible Local Area Network
++ *
++ * Copyright (c) 2012-2013 Vyatta Inc.
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/slab.h>
++#include <linux/udp.h>
++#include <linux/igmp.h>
++#include <linux/if_ether.h>
++#include <linux/ethtool.h>
++#include <net/arp.h>
++#include <net/ndisc.h>
++#include <net/ipv6_stubs.h>
++#include <net/ip.h>
++#include <net/icmp.h>
++#include <net/rtnetlink.h>
++#include <net/inet_ecn.h>
++#include <net/net_namespace.h>
++#include <net/netns/generic.h>
++#include <net/tun_proto.h>
++#include <net/vxlan.h>
++#include <net/nexthop.h>
++
++#if IS_ENABLED(CONFIG_IPV6)
++#include <net/ip6_tunnel.h>
++#include <net/ip6_checksum.h>
++#endif
++
++#define VXLAN_VERSION "0.1"
++
++#define PORT_HASH_BITS 8
++#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
++#define FDB_AGE_DEFAULT 300 /* 5 min */
++#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */
++
++/* UDP port for VXLAN traffic.
++ * The IANA assigned port is 4789, but the Linux default is 8472
++ * for compatibility with early adopters.
++ */
++static unsigned short vxlan_port __read_mostly = 8472;
++module_param_named(udp_port, vxlan_port, ushort, 0444);
++MODULE_PARM_DESC(udp_port, "Destination UDP port");
++
++static bool log_ecn_error = true;
++module_param(log_ecn_error, bool, 0644);
++MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
++
++static unsigned int vxlan_net_id;
++static struct rtnl_link_ops vxlan_link_ops;
++
++static const u8 all_zeros_mac[ETH_ALEN + 2];
++
++static int vxlan_sock_add(struct vxlan_dev *vxlan);
++
++static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
++
++/* per-network namespace private data for this module */
++struct vxlan_net {
++ struct list_head vxlan_list;
++ struct hlist_head sock_list[PORT_HASH_SIZE];
++ spinlock_t sock_lock;
++ struct notifier_block nexthop_notifier_block;
++};
++
++/* Forwarding table entry */
++struct vxlan_fdb {
++ struct hlist_node hlist; /* linked list of entries */
++ struct rcu_head rcu;
++ unsigned long updated; /* jiffies */
++ unsigned long used;
++ struct list_head remotes;
++ u8 eth_addr[ETH_ALEN];
++ u16 state; /* see ndm_state */
++ __be32 vni;
++ u16 flags; /* see ndm_flags and below */
++ struct list_head nh_list;
++ struct nexthop __rcu *nh;
++ struct vxlan_dev __rcu *vdev;
++};
++
++#define NTF_VXLAN_ADDED_BY_USER 0x100
++
++/* salt for hash table */
++static u32 vxlan_salt __read_mostly;
++
++static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
++{
++ return vs->flags & VXLAN_F_COLLECT_METADATA ||
++ ip_tunnel_collect_metadata();
++}
++
++#if IS_ENABLED(CONFIG_IPV6)
++static inline
++bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
++{
++ if (a->sa.sa_family != b->sa.sa_family)
++ return false;
++ if (a->sa.sa_family == AF_INET6)
++ return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
++ else
++ return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
++}
++
++static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
++{
++ if (nla_len(nla) >= sizeof(struct in6_addr)) {
++ ip->sin6.sin6_addr = nla_get_in6_addr(nla);
++ ip->sa.sa_family = AF_INET6;
++ return 0;
++ } else if (nla_len(nla) >= sizeof(__be32)) {
++ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
++ ip->sa.sa_family = AF_INET;
++ return 0;
++ } else {
++ return -EAFNOSUPPORT;
++ }
++}
++
++static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
++ const union vxlan_addr *ip)
++{
++ if (ip->sa.sa_family == AF_INET6)
++ return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
++ else
++ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
++}
++
++#else /* !CONFIG_IPV6 */
++
++static inline
++bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
++{
++ return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
++}
++
++static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
++{
++ if (nla_len(nla) >= sizeof(struct in6_addr)) {
++ return -EAFNOSUPPORT;
++ } else if (nla_len(nla) >= sizeof(__be32)) {
++ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
++ ip->sa.sa_family = AF_INET;
++ return 0;
++ } else {
++ return -EAFNOSUPPORT;
++ }
++}
++
++static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
++ const union vxlan_addr *ip)
++{
++ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
++}
++#endif
++
++/* Virtual Network hash table head */
++static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
++{
++ return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
++}
++
++/* Socket hash table head */
++static inline struct hlist_head *vs_head(struct net *net, __be16 port)
++{
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++
++ return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
++}
++
++/* First remote destination for a forwarding entry.
++ * Guaranteed to be non-NULL because remotes are never deleted.
++ */
++static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
++{
++ if (rcu_access_pointer(fdb->nh))
++ return NULL;
++ return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
++}
++
++static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
++{
++ if (rcu_access_pointer(fdb->nh))
++ return NULL;
++ return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
++}
++
++/* Find VXLAN socket based on network namespace, address family, UDP port,
++ * enabled unshareable flags and socket device binding (see l3mdev with
++ * non-default VRF).
++ */
++static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
++ __be16 port, u32 flags, int ifindex)
++{
++ struct vxlan_sock *vs;
++
++ flags &= VXLAN_F_RCV_FLAGS;
++
++ hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
++ if (inet_sk(vs->sock->sk)->inet_sport == port &&
++ vxlan_get_sk_family(vs) == family &&
++ vs->flags == flags &&
++ vs->sock->sk->sk_bound_dev_if == ifindex)
++ return vs;
++ }
++ return NULL;
++}
++
++static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
++ __be32 vni)
++{
++ struct vxlan_dev_node *node;
++
++ /* For flow based devices, map all packets to VNI 0 */
++ if (vs->flags & VXLAN_F_COLLECT_METADATA)
++ vni = 0;
++
++ hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
++ if (node->vxlan->default_dst.remote_vni != vni)
++ continue;
++
++ if (IS_ENABLED(CONFIG_IPV6)) {
++ const struct vxlan_config *cfg = &node->vxlan->cfg;
++
++ if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
++ cfg->remote_ifindex != ifindex)
++ continue;
++ }
++
++ return node->vxlan;
++ }
++
++ return NULL;
++}
++
++/* Look up VNI in a per net namespace table */
++static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
++ __be32 vni, sa_family_t family,
++ __be16 port, u32 flags)
++{
++ struct vxlan_sock *vs;
++
++ vs = vxlan_find_sock(net, family, port, flags, ifindex);
++ if (!vs)
++ return NULL;
++
++ return vxlan_vs_find_vni(vs, ifindex, vni);
++}
++
++/* Fill in neighbour message in skbuff. */
++static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
++ const struct vxlan_fdb *fdb,
++ u32 portid, u32 seq, int type, unsigned int flags,
++ const struct vxlan_rdst *rdst)
++{
++ unsigned long now = jiffies;
++ struct nda_cacheinfo ci;
++ bool send_ip, send_eth;
++ struct nlmsghdr *nlh;
++ struct nexthop *nh;
++ struct ndmsg *ndm;
++ int nh_family;
++ u32 nh_id;
++
++ nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
++ if (nlh == NULL)
++ return -EMSGSIZE;
++
++ ndm = nlmsg_data(nlh);
++ memset(ndm, 0, sizeof(*ndm));
++
++ send_eth = send_ip = true;
++
++ rcu_read_lock();
++ nh = rcu_dereference(fdb->nh);
++ if (nh) {
++ nh_family = nexthop_get_family(nh);
++ nh_id = nh->id;
++ }
++ rcu_read_unlock();
++
++ if (type == RTM_GETNEIGH) {
++ if (rdst) {
++ send_ip = !vxlan_addr_any(&rdst->remote_ip);
++ ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
++ } else if (nh) {
++ ndm->ndm_family = nh_family;
++ }
++ send_eth = !is_zero_ether_addr(fdb->eth_addr);
++ } else
++ ndm->ndm_family = AF_BRIDGE;
++ ndm->ndm_state = fdb->state;
++ ndm->ndm_ifindex = vxlan->dev->ifindex;
++ ndm->ndm_flags = fdb->flags;
++ if (rdst && rdst->offloaded)
++ ndm->ndm_flags |= NTF_OFFLOADED;
++ ndm->ndm_type = RTN_UNICAST;
++
++ if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
++ nla_put_s32(skb, NDA_LINK_NETNSID,
++ peernet2id(dev_net(vxlan->dev), vxlan->net)))
++ goto nla_put_failure;
++
++ if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
++ goto nla_put_failure;
++ if (nh) {
++ if (nla_put_u32(skb, NDA_NH_ID, nh_id))
++ goto nla_put_failure;
++ } else if (rdst) {
++ if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
++ &rdst->remote_ip))
++ goto nla_put_failure;
++
++ if (rdst->remote_port &&
++ rdst->remote_port != vxlan->cfg.dst_port &&
++ nla_put_be16(skb, NDA_PORT, rdst->remote_port))
++ goto nla_put_failure;
++ if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
++ nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
++ goto nla_put_failure;
++ if (rdst->remote_ifindex &&
++ nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
++ goto nla_put_failure;
++ }
++
++ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
++ nla_put_u32(skb, NDA_SRC_VNI,
++ be32_to_cpu(fdb->vni)))
++ goto nla_put_failure;
++
++ ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
++ ci.ndm_confirmed = 0;
++ ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
++ ci.ndm_refcnt = 0;
++
++ if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
++ goto nla_put_failure;
++
++ nlmsg_end(skb, nlh);
++ return 0;
++
++nla_put_failure:
++ nlmsg_cancel(skb, nlh);
++ return -EMSGSIZE;
++}
++
++static inline size_t vxlan_nlmsg_size(void)
++{
++ return NLMSG_ALIGN(sizeof(struct ndmsg))
++ + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
++ + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
++ + nla_total_size(sizeof(__be16)) /* NDA_PORT */
++ + nla_total_size(sizeof(__be32)) /* NDA_VNI */
++ + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
++ + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
++ + nla_total_size(sizeof(struct nda_cacheinfo));
++}
++
++static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
++ struct vxlan_rdst *rd, int type)
++{
++ struct net *net = dev_net(vxlan->dev);
++ struct sk_buff *skb;
++ int err = -ENOBUFS;
++
++ skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
++ if (skb == NULL)
++ goto errout;
++
++ err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, rd);
++ if (err < 0) {
++ /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
++ WARN_ON(err == -EMSGSIZE);
++ kfree_skb(skb);
++ goto errout;
++ }
++
++ rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
++ return;
++errout:
++ if (err < 0)
++ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
++}
++
++static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
++ const struct vxlan_fdb *fdb,
++ const struct vxlan_rdst *rd,
++ struct netlink_ext_ack *extack,
++ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
++{
++ fdb_info->info.dev = vxlan->dev;
++ fdb_info->info.extack = extack;
++ fdb_info->remote_ip = rd->remote_ip;
++ fdb_info->remote_port = rd->remote_port;
++ fdb_info->remote_vni = rd->remote_vni;
++ fdb_info->remote_ifindex = rd->remote_ifindex;
++ memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN);
++ fdb_info->vni = fdb->vni;
++ fdb_info->offloaded = rd->offloaded;
++ fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
++}
++
++static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
++ struct vxlan_fdb *fdb,
++ struct vxlan_rdst *rd,
++ bool adding,
++ struct netlink_ext_ack *extack)
++{
++ struct switchdev_notifier_vxlan_fdb_info info;
++ enum switchdev_notifier_type notifier_type;
++ int ret;
++
++ if (WARN_ON(!rd))
++ return 0;
++
++ notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
++ : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
++ vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
++ ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
++ &info.info, extack);
++ return notifier_to_errno(ret);
++}
++
++static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
++ struct vxlan_rdst *rd, int type, bool swdev_notify,
++ struct netlink_ext_ack *extack)
++{
++ int err;
++
++ if (swdev_notify && rd) {
++ switch (type) {
++ case RTM_NEWNEIGH:
++ err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
++ true, extack);
++ if (err)
++ return err;
++ break;
++ case RTM_DELNEIGH:
++ vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
++ false, extack);
++ break;
++ }
++ }
++
++ __vxlan_fdb_notify(vxlan, fdb, rd, type);
++ return 0;
++}
++
++static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_fdb f = {
++ .state = NUD_STALE,
++ };
++ struct vxlan_rdst remote = {
++ .remote_ip = *ipa, /* goes to NDA_DST */
++ .remote_vni = cpu_to_be32(VXLAN_N_VID),
++ };
++
++ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
++}
++
++static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
++{
++ struct vxlan_fdb f = {
++ .state = NUD_STALE,
++ };
++ struct vxlan_rdst remote = { };
++
++ memcpy(f.eth_addr, eth_addr, ETH_ALEN);
++
++ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
++}
++
++/* Hash Ethernet address */
++static u32 eth_hash(const unsigned char *addr)
++{
++ u64 value = get_unaligned((u64 *)addr);
++
++ /* only want 6 bytes */
++#ifdef __BIG_ENDIAN
++ value >>= 16;
++#else
++ value <<= 16;
++#endif
++ return hash_64(value, FDB_HASH_BITS);
++}
++
++static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
++{
++ /* use 1 byte of OUI and 3 bytes of NIC */
++ u32 key = get_unaligned((u32 *)(addr + 2));
++
++ return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
++}
++
++static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
++{
++ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
++ return eth_vni_hash(mac, vni);
++ else
++ return eth_hash(mac);
++}
++
++/* Hash chain to use given mac address */
++static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
++ const u8 *mac, __be32 vni)
++{
++ return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
++}
++
++/* Look up Ethernet address in forwarding table */
++static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
++ const u8 *mac, __be32 vni)
++{
++ struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
++ struct vxlan_fdb *f;
++
++ hlist_for_each_entry_rcu(f, head, hlist) {
++ if (ether_addr_equal(mac, f->eth_addr)) {
++ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
++ if (vni == f->vni)
++ return f;
++ } else {
++ return f;
++ }
++ }
++ }
++
++ return NULL;
++}
++
++static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
++ const u8 *mac, __be32 vni)
++{
++ struct vxlan_fdb *f;
++
++ f = __vxlan_find_mac(vxlan, mac, vni);
++ if (f && f->used != jiffies)
++ f->used = jiffies;
++
++ return f;
++}
++
++/* caller should hold vxlan->hash_lock */
++static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
++ union vxlan_addr *ip, __be16 port,
++ __be32 vni, __u32 ifindex)
++{
++ struct vxlan_rdst *rd;
++
++ list_for_each_entry(rd, &f->remotes, list) {
++ if (vxlan_addr_equal(&rd->remote_ip, ip) &&
++ rd->remote_port == port &&
++ rd->remote_vni == vni &&
++ rd->remote_ifindex == ifindex)
++ return rd;
++ }
++
++ return NULL;
++}
++
++int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
++ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ u8 eth_addr[ETH_ALEN + 2] = { 0 };
++ struct vxlan_rdst *rdst;
++ struct vxlan_fdb *f;
++ int rc = 0;
++
++ if (is_multicast_ether_addr(mac) ||
++ is_zero_ether_addr(mac))
++ return -EINVAL;
++
++ ether_addr_copy(eth_addr, mac);
++
++ rcu_read_lock();
++
++ f = __vxlan_find_mac(vxlan, eth_addr, vni);
++ if (!f) {
++ rc = -ENOENT;
++ goto out;
++ }
++
++ rdst = first_remote_rcu(f);
++ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
++
++out:
++ rcu_read_unlock();
++ return rc;
++}
++EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
++
++static int vxlan_fdb_notify_one(struct notifier_block *nb,
++ const struct vxlan_dev *vxlan,
++ const struct vxlan_fdb *f,
++ const struct vxlan_rdst *rdst,
++ struct netlink_ext_ack *extack)
++{
++ struct switchdev_notifier_vxlan_fdb_info fdb_info;
++ int rc;
++
++ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
++ rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
++ &fdb_info);
++ return notifier_to_errno(rc);
++}
++
++int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
++ struct notifier_block *nb,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_dev *vxlan;
++ struct vxlan_rdst *rdst;
++ struct vxlan_fdb *f;
++ unsigned int h;
++ int rc = 0;
++
++ if (!netif_is_vxlan(dev))
++ return -EINVAL;
++ vxlan = netdev_priv(dev);
++
++ for (h = 0; h < FDB_HASH_SIZE; ++h) {
++ spin_lock_bh(&vxlan->hash_lock[h]);
++ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
++ if (f->vni == vni) {
++ list_for_each_entry(rdst, &f->remotes, list) {
++ rc = vxlan_fdb_notify_one(nb, vxlan,
++ f, rdst,
++ extack);
++ if (rc)
++ goto unlock;
++ }
++ }
++ }
++ spin_unlock_bh(&vxlan->hash_lock[h]);
++ }
++ return 0;
++
++unlock:
++ spin_unlock_bh(&vxlan->hash_lock[h]);
++ return rc;
++}
++EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
++
++void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
++{
++ struct vxlan_dev *vxlan;
++ struct vxlan_rdst *rdst;
++ struct vxlan_fdb *f;
++ unsigned int h;
++
++ if (!netif_is_vxlan(dev))
++ return;
++ vxlan = netdev_priv(dev);
++
++ for (h = 0; h < FDB_HASH_SIZE; ++h) {
++ spin_lock_bh(&vxlan->hash_lock[h]);
++ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
++ if (f->vni == vni)
++ list_for_each_entry(rdst, &f->remotes, list)
++ rdst->offloaded = false;
++ spin_unlock_bh(&vxlan->hash_lock[h]);
++ }
++
++}
++EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
++
++/* Replace destination of unicast mac */
++static int vxlan_fdb_replace(struct vxlan_fdb *f,
++ union vxlan_addr *ip, __be16 port, __be32 vni,
++ __u32 ifindex, struct vxlan_rdst *oldrd)
++{
++ struct vxlan_rdst *rd;
++
++ rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
++ if (rd)
++ return 0;
++
++ rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
++ if (!rd)
++ return 0;
++
++ *oldrd = *rd;
++ dst_cache_reset(&rd->dst_cache);
++ rd->remote_ip = *ip;
++ rd->remote_port = port;
++ rd->remote_vni = vni;
++ rd->remote_ifindex = ifindex;
++ rd->offloaded = false;
++ return 1;
++}
++
++/* Add/update destinations for multicast */
++static int vxlan_fdb_append(struct vxlan_fdb *f,
++ union vxlan_addr *ip, __be16 port, __be32 vni,
++ __u32 ifindex, struct vxlan_rdst **rdp)
++{
++ struct vxlan_rdst *rd;
++
++ rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
++ if (rd)
++ return 0;
++
++ rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
++ if (rd == NULL)
++ return -ENOMEM;
++
++ if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
++ kfree(rd);
++ return -ENOMEM;
++ }
++
++ rd->remote_ip = *ip;
++ rd->remote_port = port;
++ rd->offloaded = false;
++ rd->remote_vni = vni;
++ rd->remote_ifindex = ifindex;
++
++ list_add_tail_rcu(&rd->list, &f->remotes);
++
++ *rdp = rd;
++ return 1;
++}
++
++static bool vxlan_parse_gpe_proto(struct vxlanhdr *hdr, __be16 *protocol)
++{
++ struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)hdr;
++
++ /* Need to have Next Protocol set for interfaces in GPE mode. */
++ if (!gpe->np_applied)
++ return false;
++ /* "The initial version is 0. If a receiver does not support the
++ * version indicated it MUST drop the packet.
++ */
++ if (gpe->version != 0)
++ return false;
++ /* "When the O bit is set to 1, the packet is an OAM packet and OAM
++ * processing MUST occur." However, we don't implement OAM
++ * processing, thus drop the packet.
++ */
++ if (gpe->oam_flag)
++ return false;
++
++ *protocol = tun_p_to_eth_p(gpe->next_protocol);
++ if (!*protocol)
++ return false;
++
++ return true;
++}
++
++static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
++ unsigned int off,
++ struct vxlanhdr *vh, size_t hdrlen,
++ __be32 vni_field,
++ struct gro_remcsum *grc,
++ bool nopartial)
++{
++ size_t start, offset;
++
++ if (skb->remcsum_offload)
++ return vh;
++
++ if (!NAPI_GRO_CB(skb)->csum_valid)
++ return NULL;
++
++ start = vxlan_rco_start(vni_field);
++ offset = start + vxlan_rco_offset(vni_field);
++
++ vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
++ start, offset, grc, nopartial);
++
++ skb->remcsum_offload = 1;
++
++ return vh;
++}
++
++static struct sk_buff *vxlan_gro_receive(struct sock *sk,
++ struct list_head *head,
++ struct sk_buff *skb)
++{
++ struct sk_buff *pp = NULL;
++ struct sk_buff *p;
++ struct vxlanhdr *vh, *vh2;
++ unsigned int hlen, off_vx;
++ int flush = 1;
++ struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
++ __be32 flags;
++ struct gro_remcsum grc;
++
++ skb_gro_remcsum_init(&grc);
++
++ off_vx = skb_gro_offset(skb);
++ hlen = off_vx + sizeof(*vh);
++ vh = skb_gro_header_fast(skb, off_vx);
++ if (skb_gro_header_hard(skb, hlen)) {
++ vh = skb_gro_header_slow(skb, hlen, off_vx);
++ if (unlikely(!vh))
++ goto out;
++ }
++
++ skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
++
++ flags = vh->vx_flags;
++
++ if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
++ vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
++ vh->vx_vni, &grc,
++ !!(vs->flags &
++ VXLAN_F_REMCSUM_NOPARTIAL));
++
++ if (!vh)
++ goto out;
++ }
++
++ skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
++
++ list_for_each_entry(p, head, list) {
++ if (!NAPI_GRO_CB(p)->same_flow)
++ continue;
++
++ vh2 = (struct vxlanhdr *)(p->data + off_vx);
++ if (vh->vx_flags != vh2->vx_flags ||
++ vh->vx_vni != vh2->vx_vni) {
++ NAPI_GRO_CB(p)->same_flow = 0;
++ continue;
++ }
++ }
++
++ pp = call_gro_receive(eth_gro_receive, head, skb);
++ flush = 0;
++
++out:
++ skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
++
++ return pp;
++}
++
++static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
++{
++ /* Sets 'skb->inner_mac_header' since we are always called with
++ * 'skb->encapsulation' set.
++ */
++ return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
++}
++
++static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
++ __u16 state, __be32 src_vni,
++ __u16 ndm_flags)
++{
++ struct vxlan_fdb *f;
++
++ f = kmalloc(sizeof(*f), GFP_ATOMIC);
++ if (!f)
++ return NULL;
++ f->state = state;
++ f->flags = ndm_flags;
++ f->updated = f->used = jiffies;
++ f->vni = src_vni;
++ f->nh = NULL;
++ RCU_INIT_POINTER(f->vdev, vxlan);
++ INIT_LIST_HEAD(&f->nh_list);
++ INIT_LIST_HEAD(&f->remotes);
++ memcpy(f->eth_addr, mac, ETH_ALEN);
++
++ return f;
++}
++
++static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
++ __be32 src_vni, struct vxlan_fdb *f)
++{
++ ++vxlan->addrcnt;
++ hlist_add_head_rcu(&f->hlist,
++ vxlan_fdb_head(vxlan, mac, src_vni));
++}
++
++static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
++ u32 nhid, struct netlink_ext_ack *extack)
++{
++ struct nexthop *old_nh = rtnl_dereference(fdb->nh);
++ struct nexthop *nh;
++ int err = -EINVAL;
++
++ if (old_nh && old_nh->id == nhid)
++ return 0;
++
++ nh = nexthop_find_by_id(vxlan->net, nhid);
++ if (!nh) {
++ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
++ goto err_inval;
++ }
++
++ if (nh) {
++ if (!nexthop_get(nh)) {
++ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
++ nh = NULL;
++ goto err_inval;
++ }
++ if (!nexthop_is_fdb(nh)) {
++ NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
++ goto err_inval;
++ }
++
++ if (!nexthop_is_multipath(nh)) {
++ NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
++ goto err_inval;
++ }
++
++ /* check nexthop group family */
++ switch (vxlan->default_dst.remote_ip.sa.sa_family) {
++ case AF_INET:
++ if (!nexthop_has_v4(nh)) {
++ err = -EAFNOSUPPORT;
++ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
++ goto err_inval;
++ }
++ break;
++ case AF_INET6:
++ if (nexthop_has_v4(nh)) {
++ err = -EAFNOSUPPORT;
++ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
++ goto err_inval;
++ }
++ }
++ }
++
++ if (old_nh) {
++ list_del_rcu(&fdb->nh_list);
++ nexthop_put(old_nh);
++ }
++ rcu_assign_pointer(fdb->nh, nh);
++ list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
++ return 1;
++
++err_inval:
++ if (nh)
++ nexthop_put(nh);
++ return err;
++}
++
++static int vxlan_fdb_create(struct vxlan_dev *vxlan,
++ const u8 *mac, union vxlan_addr *ip,
++ __u16 state, __be16 port, __be32 src_vni,
++ __be32 vni, __u32 ifindex, __u16 ndm_flags,
++ u32 nhid, struct vxlan_fdb **fdb,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_rdst *rd = NULL;
++ struct vxlan_fdb *f;
++ int rc;
++
++ if (vxlan->cfg.addrmax &&
++ vxlan->addrcnt >= vxlan->cfg.addrmax)
++ return -ENOSPC;
++
++ netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
++ f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
++ if (!f)
++ return -ENOMEM;
++
++ if (nhid)
++ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
++ else
++ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
++ if (rc < 0)
++ goto errout;
++
++ *fdb = f;
++
++ return 0;
++
++errout:
++ kfree(f);
++ return rc;
++}
++
++static void __vxlan_fdb_free(struct vxlan_fdb *f)
++{
++ struct vxlan_rdst *rd, *nd;
++ struct nexthop *nh;
++
++ nh = rcu_dereference_raw(f->nh);
++ if (nh) {
++ rcu_assign_pointer(f->nh, NULL);
++ rcu_assign_pointer(f->vdev, NULL);
++ nexthop_put(nh);
++ }
++
++ list_for_each_entry_safe(rd, nd, &f->remotes, list) {
++ dst_cache_destroy(&rd->dst_cache);
++ kfree(rd);
++ }
++ kfree(f);
++}
++
++static void vxlan_fdb_free(struct rcu_head *head)
++{
++ struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
++
++ __vxlan_fdb_free(f);
++}
++
++static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
++ bool do_notify, bool swdev_notify)
++{
++ struct vxlan_rdst *rd;
++
++ netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
++
++ --vxlan->addrcnt;
++ if (do_notify) {
++ if (rcu_access_pointer(f->nh))
++ vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
++ swdev_notify, NULL);
++ else
++ list_for_each_entry(rd, &f->remotes, list)
++ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
++ swdev_notify, NULL);
++ }
++
++ hlist_del_rcu(&f->hlist);
++ list_del_rcu(&f->nh_list);
++ call_rcu(&f->rcu, vxlan_fdb_free);
++}
++
++static void vxlan_dst_free(struct rcu_head *head)
++{
++ struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
++
++ dst_cache_destroy(&rd->dst_cache);
++ kfree(rd);
++}
++
++static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
++ union vxlan_addr *ip,
++ __u16 state, __u16 flags,
++ __be16 port, __be32 vni,
++ __u32 ifindex, __u16 ndm_flags,
++ struct vxlan_fdb *f, u32 nhid,
++ bool swdev_notify,
++ struct netlink_ext_ack *extack)
++{
++ __u16 fdb_flags = (ndm_flags & ~NTF_USE);
++ struct vxlan_rdst *rd = NULL;
++ struct vxlan_rdst oldrd;
++ int notify = 0;
++ int rc = 0;
++ int err;
++
++ if (nhid && !rcu_access_pointer(f->nh)) {
++ NL_SET_ERR_MSG(extack,
++ "Cannot replace an existing non nexthop fdb with a nexthop");
++ return -EOPNOTSUPP;
++ }
++
++ if (nhid && (flags & NLM_F_APPEND)) {
++ NL_SET_ERR_MSG(extack,
++ "Cannot append to a nexthop fdb");
++ return -EOPNOTSUPP;
++ }
++
++ /* Do not allow an externally learned entry to take over an entry added
++ * by the user.
++ */
++ if (!(fdb_flags & NTF_EXT_LEARNED) ||
++ !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
++ if (f->state != state) {
++ f->state = state;
++ f->updated = jiffies;
++ notify = 1;
++ }
++ if (f->flags != fdb_flags) {
++ f->flags = fdb_flags;
++ f->updated = jiffies;
++ notify = 1;
++ }
++ }
++
++ if ((flags & NLM_F_REPLACE)) {
++ /* Only change unicasts */
++ if (!(is_multicast_ether_addr(f->eth_addr) ||
++ is_zero_ether_addr(f->eth_addr))) {
++ if (nhid) {
++ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
++ if (rc < 0)
++ return rc;
++ } else {
++ rc = vxlan_fdb_replace(f, ip, port, vni,
++ ifindex, &oldrd);
++ }
++ notify |= rc;
++ } else {
++ NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
++ return -EOPNOTSUPP;
++ }
++ }
++ if ((flags & NLM_F_APPEND) &&
++ (is_multicast_ether_addr(f->eth_addr) ||
++ is_zero_ether_addr(f->eth_addr))) {
++ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
++
++ if (rc < 0)
++ return rc;
++ notify |= rc;
++ }
++
++ if (ndm_flags & NTF_USE)
++ f->used = jiffies;
++
++ if (notify) {
++ if (rd == NULL)
++ rd = first_remote_rtnl(f);
++
++ err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
++ swdev_notify, extack);
++ if (err)
++ goto err_notify;
++ }
++
++ return 0;
++
++err_notify:
++ if (nhid)
++ return err;
++ if ((flags & NLM_F_REPLACE) && rc)
++ *rd = oldrd;
++ else if ((flags & NLM_F_APPEND) && rc) {
++ list_del_rcu(&rd->list);
++ call_rcu(&rd->rcu, vxlan_dst_free);
++ }
++ return err;
++}
++
++static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
++ const u8 *mac, union vxlan_addr *ip,
++ __u16 state, __u16 flags,
++ __be16 port, __be32 src_vni, __be32 vni,
++ __u32 ifindex, __u16 ndm_flags, u32 nhid,
++ bool swdev_notify,
++ struct netlink_ext_ack *extack)
++{
++ __u16 fdb_flags = (ndm_flags & ~NTF_USE);
++ struct vxlan_fdb *f;
++ int rc;
++
++ /* Disallow replace to add a multicast entry */
++ if ((flags & NLM_F_REPLACE) &&
++ (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
++ return -EOPNOTSUPP;
++
++ netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
++ rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
++ vni, ifindex, fdb_flags, nhid, &f, extack);
++ if (rc < 0)
++ return rc;
++
++ vxlan_fdb_insert(vxlan, mac, src_vni, f);
++ rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
++ swdev_notify, extack);
++ if (rc)
++ goto err_notify;
++
++ return 0;
++
++err_notify:
++ vxlan_fdb_destroy(vxlan, f, false, false);
++ return rc;
++}
++
++/* Add new entry to forwarding table -- assumes lock held */
++static int vxlan_fdb_update(struct vxlan_dev *vxlan,
++ const u8 *mac, union vxlan_addr *ip,
++ __u16 state, __u16 flags,
++ __be16 port, __be32 src_vni, __be32 vni,
++ __u32 ifindex, __u16 ndm_flags, u32 nhid,
++ bool swdev_notify,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_fdb *f;
++
++ f = __vxlan_find_mac(vxlan, mac, src_vni);
++ if (f) {
++ if (flags & NLM_F_EXCL) {
++ netdev_dbg(vxlan->dev,
++ "lost race to create %pM\n", mac);
++ return -EEXIST;
++ }
++
++ return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
++ vni, ifindex, ndm_flags, f,
++ nhid, swdev_notify, extack);
++ } else {
++ if (!(flags & NLM_F_CREATE))
++ return -ENOENT;
++
++ return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
++ port, src_vni, vni, ifindex,
++ ndm_flags, nhid, swdev_notify,
++ extack);
++ }
++}
++
++static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
++ struct vxlan_rdst *rd, bool swdev_notify)
++{
++ list_del_rcu(&rd->list);
++ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
++ call_rcu(&rd->rcu, vxlan_dst_free);
++}
++
++static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
++ union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
++ __be32 *vni, u32 *ifindex, u32 *nhid)
++{
++ struct net *net = dev_net(vxlan->dev);
++ int err;
++
++ if (tb[NDA_NH_ID] && (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] ||
++ tb[NDA_PORT]))
++ return -EINVAL;
++
++ if (tb[NDA_DST]) {
++ err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
++ if (err)
++ return err;
++ } else {
++ union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
++
++ if (remote->sa.sa_family == AF_INET) {
++ ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
++ ip->sa.sa_family = AF_INET;
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ ip->sin6.sin6_addr = in6addr_any;
++ ip->sa.sa_family = AF_INET6;
++#endif
++ }
++ }
++
++ if (tb[NDA_PORT]) {
++ if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
++ return -EINVAL;
++ *port = nla_get_be16(tb[NDA_PORT]);
++ } else {
++ *port = vxlan->cfg.dst_port;
++ }
++
++ if (tb[NDA_VNI]) {
++ if (nla_len(tb[NDA_VNI]) != sizeof(u32))
++ return -EINVAL;
++ *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
++ } else {
++ *vni = vxlan->default_dst.remote_vni;
++ }
++
++ if (tb[NDA_SRC_VNI]) {
++ if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32))
++ return -EINVAL;
++ *src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI]));
++ } else {
++ *src_vni = vxlan->default_dst.remote_vni;
++ }
++
++ if (tb[NDA_IFINDEX]) {
++ struct net_device *tdev;
++
++ if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
++ return -EINVAL;
++ *ifindex = nla_get_u32(tb[NDA_IFINDEX]);
++ tdev = __dev_get_by_index(net, *ifindex);
++ if (!tdev)
++ return -EADDRNOTAVAIL;
++ } else {
++ *ifindex = 0;
++ }
++
++ if (tb[NDA_NH_ID])
++ *nhid = nla_get_u32(tb[NDA_NH_ID]);
++ else
++ *nhid = 0;
++
++ return 0;
++}
++
++/* Add static entry (via netlink) */
++static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
++ struct net_device *dev,
++ const unsigned char *addr, u16 vid, u16 flags,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ /* struct net *net = dev_net(vxlan->dev); */
++ union vxlan_addr ip;
++ __be16 port;
++ __be32 src_vni, vni;
++ u32 ifindex, nhid;
++ u32 hash_index;
++ int err;
++
++ if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
++ pr_info("RTM_NEWNEIGH with invalid state %#x\n",
++ ndm->ndm_state);
++ return -EINVAL;
++ }
++
++ if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
++ return -EINVAL;
++
++ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
++ &nhid);
++ if (err)
++ return err;
++
++ if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
++ return -EAFNOSUPPORT;
++
++ hash_index = fdb_head_index(vxlan, addr, src_vni);
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++ err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
++ port, src_vni, vni, ifindex,
++ ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
++ nhid, true, extack);
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++
++ return err;
++}
++
++static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
++ const unsigned char *addr, union vxlan_addr ip,
++ __be16 port, __be32 src_vni, __be32 vni,
++ u32 ifindex, bool swdev_notify)
++{
++ struct vxlan_rdst *rd = NULL;
++ struct vxlan_fdb *f;
++ int err = -ENOENT;
++
++ f = vxlan_find_mac(vxlan, addr, src_vni);
++ if (!f)
++ return err;
++
++ if (!vxlan_addr_any(&ip)) {
++ rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex);
++ if (!rd)
++ goto out;
++ }
++
++ /* remove a destination if it's not the only one on the list,
++ * otherwise destroy the fdb entry
++ */
++ if (rd && !list_is_singular(&f->remotes)) {
++ vxlan_fdb_dst_destroy(vxlan, f, rd, swdev_notify);
++ goto out;
++ }
++
++ vxlan_fdb_destroy(vxlan, f, true, swdev_notify);
++
++out:
++ return 0;
++}
++
++/* Delete entry (via netlink) */
++static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
++ struct net_device *dev,
++ const unsigned char *addr, u16 vid)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ union vxlan_addr ip;
++ __be32 src_vni, vni;
++ u32 ifindex, nhid;
++ u32 hash_index;
++ __be16 port;
++ int err;
++
++ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
++ &nhid);
++ if (err)
++ return err;
++
++ hash_index = fdb_head_index(vxlan, addr, src_vni);
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++ err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
++ true);
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++
++ return err;
++}
++
++/* Dump forwarding table */
++static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
++ struct net_device *dev,
++ struct net_device *filter_dev, int *idx)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ unsigned int h;
++ int err = 0;
++
++ for (h = 0; h < FDB_HASH_SIZE; ++h) {
++ struct vxlan_fdb *f;
++
++ rcu_read_lock();
++ hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
++ struct vxlan_rdst *rd;
++
++ if (rcu_access_pointer(f->nh)) {
++ if (*idx < cb->args[2])
++ goto skip_nh;
++ err = vxlan_fdb_info(skb, vxlan, f,
++ NETLINK_CB(cb->skb).portid,
++ cb->nlh->nlmsg_seq,
++ RTM_NEWNEIGH,
++ NLM_F_MULTI, NULL);
++ if (err < 0) {
++ rcu_read_unlock();
++ goto out;
++ }
++skip_nh:
++ *idx += 1;
++ continue;
++ }
++
++ list_for_each_entry_rcu(rd, &f->remotes, list) {
++ if (*idx < cb->args[2])
++ goto skip;
++
++ err = vxlan_fdb_info(skb, vxlan, f,
++ NETLINK_CB(cb->skb).portid,
++ cb->nlh->nlmsg_seq,
++ RTM_NEWNEIGH,
++ NLM_F_MULTI, rd);
++ if (err < 0) {
++ rcu_read_unlock();
++ goto out;
++ }
++skip:
++ *idx += 1;
++ }
++ }
++ rcu_read_unlock();
++ }
++out:
++ return err;
++}
++
++static int vxlan_fdb_get(struct sk_buff *skb,
++ struct nlattr *tb[],
++ struct net_device *dev,
++ const unsigned char *addr,
++ u16 vid, u32 portid, u32 seq,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_fdb *f;
++ __be32 vni;
++ int err;
++
++ if (tb[NDA_VNI])
++ vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
++ else
++ vni = vxlan->default_dst.remote_vni;
++
++ rcu_read_lock();
++
++ f = __vxlan_find_mac(vxlan, addr, vni);
++ if (!f) {
++ NL_SET_ERR_MSG(extack, "Fdb entry not found");
++ err = -ENOENT;
++ goto errout;
++ }
++
++ err = vxlan_fdb_info(skb, vxlan, f, portid, seq,
++ RTM_NEWNEIGH, 0, first_remote_rcu(f));
++errout:
++ rcu_read_unlock();
++ return err;
++}
++
++/* Watch incoming packets to learn mapping between Ethernet address
++ * and Tunnel endpoint.
++ * Return true if packet is bogus and should be dropped.
++ */
++static bool vxlan_snoop(struct net_device *dev,
++ union vxlan_addr *src_ip, const u8 *src_mac,
++ u32 src_ifindex, __be32 vni)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_fdb *f;
++ u32 ifindex = 0;
++
++#if IS_ENABLED(CONFIG_IPV6)
++ if (src_ip->sa.sa_family == AF_INET6 &&
++ (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL))
++ ifindex = src_ifindex;
++#endif
++
++ f = vxlan_find_mac(vxlan, src_mac, vni);
++ if (likely(f)) {
++ struct vxlan_rdst *rdst = first_remote_rcu(f);
++
++ if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
++ rdst->remote_ifindex == ifindex))
++ return false;
++
++ /* Don't migrate static entries, drop packets */
++ if (f->state & (NUD_PERMANENT | NUD_NOARP))
++ return true;
++
++ /* Don't override an fdb with nexthop with a learnt entry */
++ if (rcu_access_pointer(f->nh))
++ return true;
++
++ if (net_ratelimit())
++ netdev_info(dev,
++ "%pM migrated from %pIS to %pIS\n",
++ src_mac, &rdst->remote_ip.sa, &src_ip->sa);
++
++ rdst->remote_ip = *src_ip;
++ f->updated = jiffies;
++ vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
++ } else {
++ u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
++
++ /* learned new entry */
++ spin_lock(&vxlan->hash_lock[hash_index]);
++
++ /* close off race between vxlan_flush and incoming packets */
++ if (netif_running(dev))
++ vxlan_fdb_update(vxlan, src_mac, src_ip,
++ NUD_REACHABLE,
++ NLM_F_EXCL|NLM_F_CREATE,
++ vxlan->cfg.dst_port,
++ vni,
++ vxlan->default_dst.remote_vni,
++ ifindex, NTF_SELF, 0, true, NULL);
++ spin_unlock(&vxlan->hash_lock[hash_index]);
++ }
++
++ return false;
++}
++
++/* See if multicast group is already in use by other ID */
++static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
++{
++ struct vxlan_dev *vxlan;
++ struct vxlan_sock *sock4;
++#if IS_ENABLED(CONFIG_IPV6)
++ struct vxlan_sock *sock6;
++#endif
++ unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
++
++ sock4 = rtnl_dereference(dev->vn4_sock);
++
++ /* The vxlan_sock is only used by dev, leaving group has
++ * no effect on other vxlan devices.
++ */
++ if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1)
++ return false;
++#if IS_ENABLED(CONFIG_IPV6)
++ sock6 = rtnl_dereference(dev->vn6_sock);
++ if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1)
++ return false;
++#endif
++
++ list_for_each_entry(vxlan, &vn->vxlan_list, next) {
++ if (!netif_running(vxlan->dev) || vxlan == dev)
++ continue;
++
++ if (family == AF_INET &&
++ rtnl_dereference(vxlan->vn4_sock) != sock4)
++ continue;
++#if IS_ENABLED(CONFIG_IPV6)
++ if (family == AF_INET6 &&
++ rtnl_dereference(vxlan->vn6_sock) != sock6)
++ continue;
++#endif
++
++ if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
++ &dev->default_dst.remote_ip))
++ continue;
++
++ if (vxlan->default_dst.remote_ifindex !=
++ dev->default_dst.remote_ifindex)
++ continue;
++
++ return true;
++ }
++
++ return false;
++}
++
++static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
++{
++ struct vxlan_net *vn;
++
++ if (!vs)
++ return false;
++ if (!refcount_dec_and_test(&vs->refcnt))
++ return false;
++
++ vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
++ spin_lock(&vn->sock_lock);
++ hlist_del_rcu(&vs->hlist);
++ udp_tunnel_notify_del_rx_port(vs->sock,
++ (vs->flags & VXLAN_F_GPE) ?
++ UDP_TUNNEL_TYPE_VXLAN_GPE :
++ UDP_TUNNEL_TYPE_VXLAN);
++ spin_unlock(&vn->sock_lock);
++
++ return true;
++}
++
++static void vxlan_sock_release(struct vxlan_dev *vxlan)
++{
++ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
++#if IS_ENABLED(CONFIG_IPV6)
++ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
++
++ RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
++#endif
++
++ RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
++ synchronize_net();
++
++ vxlan_vs_del_dev(vxlan);
++
++ if (__vxlan_sock_release_prep(sock4)) {
++ udp_tunnel_sock_release(sock4->sock);
++ kfree(sock4);
++ }
++
++#if IS_ENABLED(CONFIG_IPV6)
++ if (__vxlan_sock_release_prep(sock6)) {
++ udp_tunnel_sock_release(sock6->sock);
++ kfree(sock6);
++ }
++#endif
++}
++
++/* Update multicast group membership when first VNI on
++ * multicast address is brought up
++ */
++static int vxlan_igmp_join(struct vxlan_dev *vxlan)
++{
++ struct sock *sk;
++ union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
++ int ifindex = vxlan->default_dst.remote_ifindex;
++ int ret = -EINVAL;
++
++ if (ip->sa.sa_family == AF_INET) {
++ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
++ struct ip_mreqn mreq = {
++ .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
++ .imr_ifindex = ifindex,
++ };
++
++ sk = sock4->sock->sk;
++ lock_sock(sk);
++ ret = ip_mc_join_group(sk, &mreq);
++ release_sock(sk);
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
++
++ sk = sock6->sock->sk;
++ lock_sock(sk);
++ ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
++ &ip->sin6.sin6_addr);
++ release_sock(sk);
++#endif
++ }
++
++ return ret;
++}
++
++/* Inverse of vxlan_igmp_join when last VNI is brought down */
++static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
++{
++ struct sock *sk;
++ union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
++ int ifindex = vxlan->default_dst.remote_ifindex;
++ int ret = -EINVAL;
++
++ if (ip->sa.sa_family == AF_INET) {
++ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
++ struct ip_mreqn mreq = {
++ .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
++ .imr_ifindex = ifindex,
++ };
++
++ sk = sock4->sock->sk;
++ lock_sock(sk);
++ ret = ip_mc_leave_group(sk, &mreq);
++ release_sock(sk);
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
++
++ sk = sock6->sock->sk;
++ lock_sock(sk);
++ ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
++ &ip->sin6.sin6_addr);
++ release_sock(sk);
++#endif
++ }
++
++ return ret;
++}
++
++static bool vxlan_remcsum(struct vxlanhdr *unparsed,
++ struct sk_buff *skb, u32 vxflags)
++{
++ size_t start, offset;
++
++ if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
++ goto out;
++
++ start = vxlan_rco_start(unparsed->vx_vni);
++ offset = start + vxlan_rco_offset(unparsed->vx_vni);
++
++ if (!pskb_may_pull(skb, offset + sizeof(u16)))
++ return false;
++
++ skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
++ !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
++out:
++ unparsed->vx_flags &= ~VXLAN_HF_RCO;
++ unparsed->vx_vni &= VXLAN_VNI_MASK;
++ return true;
++}
++
++static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
++ struct sk_buff *skb, u32 vxflags,
++ struct vxlan_metadata *md)
++{
++ struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
++ struct metadata_dst *tun_dst;
++
++ if (!(unparsed->vx_flags & VXLAN_HF_GBP))
++ goto out;
++
++ md->gbp = ntohs(gbp->policy_id);
++
++ tun_dst = (struct metadata_dst *)skb_dst(skb);
++ if (tun_dst) {
++ tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
++ tun_dst->u.tun_info.options_len = sizeof(*md);
++ }
++ if (gbp->dont_learn)
++ md->gbp |= VXLAN_GBP_DONT_LEARN;
++
++ if (gbp->policy_applied)
++ md->gbp |= VXLAN_GBP_POLICY_APPLIED;
++
++ /* In flow-based mode, GBP is carried in dst_metadata */
++ if (!(vxflags & VXLAN_F_COLLECT_METADATA))
++ skb->mark = md->gbp;
++out:
++ unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
++}
++
++static bool vxlan_set_mac(struct vxlan_dev *vxlan,
++ struct vxlan_sock *vs,
++ struct sk_buff *skb, __be32 vni)
++{
++ union vxlan_addr saddr;
++ u32 ifindex = skb->dev->ifindex;
++
++ skb_reset_mac_header(skb);
++ skb->protocol = eth_type_trans(skb, vxlan->dev);
++ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
++
++ /* Ignore packet loops (and multicast echo) */
++ if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
++ return false;
++
++ /* Get address from the outer IP header */
++ if (vxlan_get_sk_family(vs) == AF_INET) {
++ saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
++ saddr.sa.sa_family = AF_INET;
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ saddr.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
++ saddr.sa.sa_family = AF_INET6;
++#endif
++ }
++
++ if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
++ vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
++ return false;
++
++ return true;
++}
++
++static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
++ struct sk_buff *skb)
++{
++ int err = 0;
++
++ if (vxlan_get_sk_family(vs) == AF_INET)
++ err = IP_ECN_decapsulate(oiph, skb);
++#if IS_ENABLED(CONFIG_IPV6)
++ else
++ err = IP6_ECN_decapsulate(oiph, skb);
++#endif
++
++ if (unlikely(err) && log_ecn_error) {
++ if (vxlan_get_sk_family(vs) == AF_INET)
++ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
++ &((struct iphdr *)oiph)->saddr,
++ ((struct iphdr *)oiph)->tos);
++ else
++ net_info_ratelimited("non-ECT from %pI6\n",
++ &((struct ipv6hdr *)oiph)->saddr);
++ }
++ return err <= 1;
++}
++
++/* Callback from net/ipv4/udp.c to receive packets */
++static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
++{
++ struct vxlan_dev *vxlan;
++ struct vxlan_sock *vs;
++ struct vxlanhdr unparsed;
++ struct vxlan_metadata _md;
++ struct vxlan_metadata *md = &_md;
++ __be16 protocol = htons(ETH_P_TEB);
++ bool raw_proto = false;
++ void *oiph;
++ __be32 vni = 0;
++
++ /* Need UDP and VXLAN header to be present */
++ if (!pskb_may_pull(skb, VXLAN_HLEN))
++ goto drop;
++
++ unparsed = *vxlan_hdr(skb);
++ /* VNI flag always required to be set */
++ if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
++ netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
++ ntohl(vxlan_hdr(skb)->vx_flags),
++ ntohl(vxlan_hdr(skb)->vx_vni));
++ /* Return non vxlan pkt */
++ goto drop;
++ }
++ unparsed.vx_flags &= ~VXLAN_HF_VNI;
++ unparsed.vx_vni &= ~VXLAN_VNI_MASK;
++
++ vs = rcu_dereference_sk_user_data(sk);
++ if (!vs)
++ goto drop;
++
++ vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
++
++ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
++ if (!vxlan)
++ goto drop;
++
++ /* For backwards compatibility, only allow reserved fields to be
++ * used by VXLAN extensions if explicitly requested.
++ */
++ if (vs->flags & VXLAN_F_GPE) {
++ if (!vxlan_parse_gpe_proto(&unparsed, &protocol))
++ goto drop;
++ unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS;
++ raw_proto = true;
++ }
++
++ if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
++ !net_eq(vxlan->net, dev_net(vxlan->dev))))
++ goto drop;
++
++ if (vs->flags & VXLAN_F_REMCSUM_RX)
++ if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags)))
++ goto drop;
++
++ if (vxlan_collect_metadata(vs)) {
++ struct metadata_dst *tun_dst;
++
++ tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
++ key32_to_tunnel_id(vni), sizeof(*md));
++
++ if (!tun_dst)
++ goto drop;
++
++ md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
++
++ skb_dst_set(skb, (struct dst_entry *)tun_dst);
++ } else {
++ memset(md, 0, sizeof(*md));
++ }
++
++ if (vs->flags & VXLAN_F_GBP)
++ vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
++ /* Note that GBP and GPE can never be active together. This is
++ * ensured in vxlan_dev_configure.
++ */
++
++ if (unparsed.vx_flags || unparsed.vx_vni) {
++ /* If there are any unprocessed flags remaining treat
++ * this as a malformed packet. This behavior diverges from
++ * VXLAN RFC (RFC7348) which stipulates that bits in reserved
++ * in reserved fields are to be ignored. The approach here
++ * maintains compatibility with previous stack code, and also
++ * is more robust and provides a little more security in
++ * adding extensions to VXLAN.
++ */
++ goto drop;
++ }
++
++ if (!raw_proto) {
++ if (!vxlan_set_mac(vxlan, vs, skb, vni))
++ goto drop;
++ } else {
++ skb_reset_mac_header(skb);
++ skb->dev = vxlan->dev;
++ skb->pkt_type = PACKET_HOST;
++ }
++
++ oiph = skb_network_header(skb);
++ skb_reset_network_header(skb);
++
++ if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
++ ++vxlan->dev->stats.rx_frame_errors;
++ ++vxlan->dev->stats.rx_errors;
++ goto drop;
++ }
++
++ rcu_read_lock();
++
++ if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
++ rcu_read_unlock();
++ atomic_long_inc(&vxlan->dev->rx_dropped);
++ goto drop;
++ }
++
++ dev_sw_netstats_rx_add(vxlan->dev, skb->len);
++ gro_cells_receive(&vxlan->gro_cells, skb);
++
++ rcu_read_unlock();
++
++ return 0;
++
++drop:
++ /* Consume bad packet */
++ kfree_skb(skb);
++ return 0;
++}
++
++/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
++static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
++{
++ struct vxlan_dev *vxlan;
++ struct vxlan_sock *vs;
++ struct vxlanhdr *hdr;
++ __be32 vni;
++
++ if (!pskb_may_pull(skb, skb_transport_offset(skb) + VXLAN_HLEN))
++ return -EINVAL;
++
++ hdr = vxlan_hdr(skb);
++
++ if (!(hdr->vx_flags & VXLAN_HF_VNI))
++ return -EINVAL;
++
++ vs = rcu_dereference_sk_user_data(sk);
++ if (!vs)
++ return -ENOENT;
++
++ vni = vxlan_vni(hdr->vx_vni);
++ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
++ if (!vxlan)
++ return -ENOENT;
++
++ return 0;
++}
++
++static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct arphdr *parp;
++ u8 *arpptr, *sha;
++ __be32 sip, tip;
++ struct neighbour *n;
++
++ if (dev->flags & IFF_NOARP)
++ goto out;
++
++ if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
++ dev->stats.tx_dropped++;
++ goto out;
++ }
++ parp = arp_hdr(skb);
++
++ if ((parp->ar_hrd != htons(ARPHRD_ETHER) &&
++ parp->ar_hrd != htons(ARPHRD_IEEE802)) ||
++ parp->ar_pro != htons(ETH_P_IP) ||
++ parp->ar_op != htons(ARPOP_REQUEST) ||
++ parp->ar_hln != dev->addr_len ||
++ parp->ar_pln != 4)
++ goto out;
++ arpptr = (u8 *)parp + sizeof(struct arphdr);
++ sha = arpptr;
++ arpptr += dev->addr_len; /* sha */
++ memcpy(&sip, arpptr, sizeof(sip));
++ arpptr += sizeof(sip);
++ arpptr += dev->addr_len; /* tha */
++ memcpy(&tip, arpptr, sizeof(tip));
++
++ if (ipv4_is_loopback(tip) ||
++ ipv4_is_multicast(tip))
++ goto out;
++
++ n = neigh_lookup(&arp_tbl, &tip, dev);
++
++ if (n) {
++ struct vxlan_fdb *f;
++ struct sk_buff *reply;
++
++ if (!(n->nud_state & NUD_CONNECTED)) {
++ neigh_release(n);
++ goto out;
++ }
++
++ f = vxlan_find_mac(vxlan, n->ha, vni);
++ if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
++ /* bridge-local neighbor */
++ neigh_release(n);
++ goto out;
++ }
++
++ reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
++ n->ha, sha);
++
++ neigh_release(n);
++
++ if (reply == NULL)
++ goto out;
++
++ skb_reset_mac_header(reply);
++ __skb_pull(reply, skb_network_offset(reply));
++ reply->ip_summed = CHECKSUM_UNNECESSARY;
++ reply->pkt_type = PACKET_HOST;
++
++ if (netif_rx_ni(reply) == NET_RX_DROP)
++ dev->stats.rx_dropped++;
++ } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
++ union vxlan_addr ipa = {
++ .sin.sin_addr.s_addr = tip,
++ .sin.sin_family = AF_INET,
++ };
++
++ vxlan_ip_miss(dev, &ipa);
++ }
++out:
++ consume_skb(skb);
++ return NETDEV_TX_OK;
++}
++
++#if IS_ENABLED(CONFIG_IPV6)
++static struct sk_buff *vxlan_na_create(struct sk_buff *request,
++ struct neighbour *n, bool isrouter)
++{
++ struct net_device *dev = request->dev;
++ struct sk_buff *reply;
++ struct nd_msg *ns, *na;
++ struct ipv6hdr *pip6;
++ u8 *daddr;
++ int na_olen = 8; /* opt hdr + ETH_ALEN for target */
++ int ns_olen;
++ int i, len;
++
++ if (dev == NULL || !pskb_may_pull(request, request->len))
++ return NULL;
++
++ len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
++ sizeof(*na) + na_olen + dev->needed_tailroom;
++ reply = alloc_skb(len, GFP_ATOMIC);
++ if (reply == NULL)
++ return NULL;
++
++ reply->protocol = htons(ETH_P_IPV6);
++ reply->dev = dev;
++ skb_reserve(reply, LL_RESERVED_SPACE(request->dev));
++ skb_push(reply, sizeof(struct ethhdr));
++ skb_reset_mac_header(reply);
++
++ ns = (struct nd_msg *)(ipv6_hdr(request) + 1);
++
++ daddr = eth_hdr(request)->h_source;
++ ns_olen = request->len - skb_network_offset(request) -
++ sizeof(struct ipv6hdr) - sizeof(*ns);
++ for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
++ if (!ns->opt[i + 1]) {
++ kfree_skb(reply);
++ return NULL;
++ }
++ if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
++ daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
++ break;
++ }
++ }
++
++ /* Ethernet header */
++ ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
++ ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
++ eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
++ reply->protocol = htons(ETH_P_IPV6);
++
++ skb_pull(reply, sizeof(struct ethhdr));
++ skb_reset_network_header(reply);
++ skb_put(reply, sizeof(struct ipv6hdr));
++
++ /* IPv6 header */
++
++ pip6 = ipv6_hdr(reply);
++ memset(pip6, 0, sizeof(struct ipv6hdr));
++ pip6->version = 6;
++ pip6->priority = ipv6_hdr(request)->priority;
++ pip6->nexthdr = IPPROTO_ICMPV6;
++ pip6->hop_limit = 255;
++ pip6->daddr = ipv6_hdr(request)->saddr;
++ pip6->saddr = *(struct in6_addr *)n->primary_key;
++
++ skb_pull(reply, sizeof(struct ipv6hdr));
++ skb_reset_transport_header(reply);
++
++ /* Neighbor Advertisement */
++ na = skb_put_zero(reply, sizeof(*na) + na_olen);
++ na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
++ na->icmph.icmp6_router = isrouter;
++ na->icmph.icmp6_override = 1;
++ na->icmph.icmp6_solicited = 1;
++ na->target = ns->target;
++ ether_addr_copy(&na->opt[2], n->ha);
++ na->opt[0] = ND_OPT_TARGET_LL_ADDR;
++ na->opt[1] = na_olen >> 3;
++
++ na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
++ &pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6,
++ csum_partial(na, sizeof(*na)+na_olen, 0));
++
++ pip6->payload_len = htons(sizeof(*na)+na_olen);
++
++ skb_push(reply, sizeof(struct ipv6hdr));
++
++ reply->ip_summed = CHECKSUM_UNNECESSARY;
++
++ return reply;
++}
++
++static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ const struct in6_addr *daddr;
++ const struct ipv6hdr *iphdr;
++ struct inet6_dev *in6_dev;
++ struct neighbour *n;
++ struct nd_msg *msg;
++
++ rcu_read_lock();
++ in6_dev = __in6_dev_get(dev);
++ if (!in6_dev)
++ goto out;
++
++ iphdr = ipv6_hdr(skb);
++ daddr = &iphdr->daddr;
++ msg = (struct nd_msg *)(iphdr + 1);
++
++ if (ipv6_addr_loopback(daddr) ||
++ ipv6_addr_is_multicast(&msg->target))
++ goto out;
++
++ n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
++
++ if (n) {
++ struct vxlan_fdb *f;
++ struct sk_buff *reply;
++
++ if (!(n->nud_state & NUD_CONNECTED)) {
++ neigh_release(n);
++ goto out;
++ }
++
++ f = vxlan_find_mac(vxlan, n->ha, vni);
++ if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
++ /* bridge-local neighbor */
++ neigh_release(n);
++ goto out;
++ }
++
++ reply = vxlan_na_create(skb, n,
++ !!(f ? f->flags & NTF_ROUTER : 0));
++
++ neigh_release(n);
++
++ if (reply == NULL)
++ goto out;
++
++ if (netif_rx_ni(reply) == NET_RX_DROP)
++ dev->stats.rx_dropped++;
++
++ } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
++ union vxlan_addr ipa = {
++ .sin6.sin6_addr = msg->target,
++ .sin6.sin6_family = AF_INET6,
++ };
++
++ vxlan_ip_miss(dev, &ipa);
++ }
++
++out:
++ rcu_read_unlock();
++ consume_skb(skb);
++ return NETDEV_TX_OK;
++}
++#endif
++
++static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct neighbour *n;
++
++ if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
++ return false;
++
++ n = NULL;
++ switch (ntohs(eth_hdr(skb)->h_proto)) {
++ case ETH_P_IP:
++ {
++ struct iphdr *pip;
++
++ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
++ return false;
++ pip = ip_hdr(skb);
++ n = neigh_lookup(&arp_tbl, &pip->daddr, dev);
++ if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
++ union vxlan_addr ipa = {
++ .sin.sin_addr.s_addr = pip->daddr,
++ .sin.sin_family = AF_INET,
++ };
++
++ vxlan_ip_miss(dev, &ipa);
++ return false;
++ }
++
++ break;
++ }
++#if IS_ENABLED(CONFIG_IPV6)
++ case ETH_P_IPV6:
++ {
++ struct ipv6hdr *pip6;
++
++ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
++ return false;
++ pip6 = ipv6_hdr(skb);
++ n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev);
++ if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
++ union vxlan_addr ipa = {
++ .sin6.sin6_addr = pip6->daddr,
++ .sin6.sin6_family = AF_INET6,
++ };
++
++ vxlan_ip_miss(dev, &ipa);
++ return false;
++ }
++
++ break;
++ }
++#endif
++ default:
++ return false;
++ }
++
++ if (n) {
++ bool diff;
++
++ diff = !ether_addr_equal(eth_hdr(skb)->h_dest, n->ha);
++ if (diff) {
++ memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
++ dev->addr_len);
++ memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len);
++ }
++ neigh_release(n);
++ return diff;
++ }
++
++ return false;
++}
++
++static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
++ struct vxlan_metadata *md)
++{
++ struct vxlanhdr_gbp *gbp;
++
++ if (!md->gbp)
++ return;
++
++ gbp = (struct vxlanhdr_gbp *)vxh;
++ vxh->vx_flags |= VXLAN_HF_GBP;
++
++ if (md->gbp & VXLAN_GBP_DONT_LEARN)
++ gbp->dont_learn = 1;
++
++ if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
++ gbp->policy_applied = 1;
++
++ gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
++}
++
++static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
++ __be16 protocol)
++{
++ struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
++
++ gpe->np_applied = 1;
++ gpe->next_protocol = tun_p_from_eth_p(protocol);
++ if (!gpe->next_protocol)
++ return -EPFNOSUPPORT;
++ return 0;
++}
++
++static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
++ int iphdr_len, __be32 vni,
++ struct vxlan_metadata *md, u32 vxflags,
++ bool udp_sum)
++{
++ struct vxlanhdr *vxh;
++ int min_headroom;
++ int err;
++ int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
++ __be16 inner_protocol = htons(ETH_P_TEB);
++
++ if ((vxflags & VXLAN_F_REMCSUM_TX) &&
++ skb->ip_summed == CHECKSUM_PARTIAL) {
++ int csum_start = skb_checksum_start_offset(skb);
++
++ if (csum_start <= VXLAN_MAX_REMCSUM_START &&
++ !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
++ (skb->csum_offset == offsetof(struct udphdr, check) ||
++ skb->csum_offset == offsetof(struct tcphdr, check)))
++ type |= SKB_GSO_TUNNEL_REMCSUM;
++ }
++
++ min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
++ + VXLAN_HLEN + iphdr_len;
++
++ /* Need space for new headers (invalidates iph ptr) */
++ err = skb_cow_head(skb, min_headroom);
++ if (unlikely(err))
++ return err;
++
++ err = iptunnel_handle_offloads(skb, type);
++ if (err)
++ return err;
++
++ vxh = __skb_push(skb, sizeof(*vxh));
++ vxh->vx_flags = VXLAN_HF_VNI;
++ vxh->vx_vni = vxlan_vni_field(vni);
++
++ if (type & SKB_GSO_TUNNEL_REMCSUM) {
++ unsigned int start;
++
++ start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
++ vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
++ vxh->vx_flags |= VXLAN_HF_RCO;
++
++ if (!skb_is_gso(skb)) {
++ skb->ip_summed = CHECKSUM_NONE;
++ skb->encapsulation = 0;
++ }
++ }
++
++ if (vxflags & VXLAN_F_GBP)
++ vxlan_build_gbp_hdr(vxh, vxflags, md);
++ if (vxflags & VXLAN_F_GPE) {
++ err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
++ if (err < 0)
++ return err;
++ inner_protocol = skb->protocol;
++ }
++
++ skb_set_inner_protocol(skb, inner_protocol);
++ return 0;
++}
++
++static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
++ struct vxlan_sock *sock4,
++ struct sk_buff *skb, int oif, u8 tos,
++ __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport,
++ struct dst_cache *dst_cache,
++ const struct ip_tunnel_info *info)
++{
++ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
++ struct rtable *rt = NULL;
++ struct flowi4 fl4;
++
++ if (!sock4)
++ return ERR_PTR(-EIO);
++
++ if (tos && !info)
++ use_cache = false;
++ if (use_cache) {
++ rt = dst_cache_get_ip4(dst_cache, saddr);
++ if (rt)
++ return rt;
++ }
++
++ memset(&fl4, 0, sizeof(fl4));
++ fl4.flowi4_oif = oif;
++ fl4.flowi4_tos = RT_TOS(tos);
++ fl4.flowi4_mark = skb->mark;
++ fl4.flowi4_proto = IPPROTO_UDP;
++ fl4.daddr = daddr;
++ fl4.saddr = *saddr;
++ fl4.fl4_dport = dport;
++ fl4.fl4_sport = sport;
++
++ rt = ip_route_output_key(vxlan->net, &fl4);
++ if (!IS_ERR(rt)) {
++ if (rt->dst.dev == dev) {
++ netdev_dbg(dev, "circular route to %pI4\n", &daddr);
++ ip_rt_put(rt);
++ return ERR_PTR(-ELOOP);
++ }
++
++ *saddr = fl4.saddr;
++ if (use_cache)
++ dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
++ } else {
++ netdev_dbg(dev, "no route to %pI4\n", &daddr);
++ return ERR_PTR(-ENETUNREACH);
++ }
++ return rt;
++}
++
++#if IS_ENABLED(CONFIG_IPV6)
++static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
++ struct net_device *dev,
++ struct vxlan_sock *sock6,
++ struct sk_buff *skb, int oif, u8 tos,
++ __be32 label,
++ const struct in6_addr *daddr,
++ struct in6_addr *saddr,
++ __be16 dport, __be16 sport,
++ struct dst_cache *dst_cache,
++ const struct ip_tunnel_info *info)
++{
++ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
++ struct dst_entry *ndst;
++ struct flowi6 fl6;
++
++ if (!sock6)
++ return ERR_PTR(-EIO);
++
++ if (tos && !info)
++ use_cache = false;
++ if (use_cache) {
++ ndst = dst_cache_get_ip6(dst_cache, saddr);
++ if (ndst)
++ return ndst;
++ }
++
++ memset(&fl6, 0, sizeof(fl6));
++ fl6.flowi6_oif = oif;
++ fl6.daddr = *daddr;
++ fl6.saddr = *saddr;
++ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
++ fl6.flowi6_mark = skb->mark;
++ fl6.flowi6_proto = IPPROTO_UDP;
++ fl6.fl6_dport = dport;
++ fl6.fl6_sport = sport;
++
++ ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
++ &fl6, NULL);
++ if (IS_ERR(ndst)) {
++ netdev_dbg(dev, "no route to %pI6\n", daddr);
++ return ERR_PTR(-ENETUNREACH);
++ }
++
++ if (unlikely(ndst->dev == dev)) {
++ netdev_dbg(dev, "circular route to %pI6\n", daddr);
++ dst_release(ndst);
++ return ERR_PTR(-ELOOP);
++ }
++
++ *saddr = fl6.saddr;
++ if (use_cache)
++ dst_cache_set_ip6(dst_cache, ndst, saddr);
++ return ndst;
++}
++#endif
++
++/* Bypass encapsulation if the destination is local */
++static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
++ struct vxlan_dev *dst_vxlan, __be32 vni,
++ bool snoop)
++{
++ struct pcpu_sw_netstats *tx_stats, *rx_stats;
++ union vxlan_addr loopback;
++ union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
++ struct net_device *dev;
++ int len = skb->len;
++
++ tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
++ rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
++ skb->pkt_type = PACKET_HOST;
++ skb->encapsulation = 0;
++ skb->dev = dst_vxlan->dev;
++ __skb_pull(skb, skb_network_offset(skb));
++
++ if (remote_ip->sa.sa_family == AF_INET) {
++ loopback.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
++ loopback.sa.sa_family = AF_INET;
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ loopback.sin6.sin6_addr = in6addr_loopback;
++ loopback.sa.sa_family = AF_INET6;
++#endif
++ }
++
++ rcu_read_lock();
++ dev = skb->dev;
++ if (unlikely(!(dev->flags & IFF_UP))) {
++ kfree_skb(skb);
++ goto drop;
++ }
++
++ if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
++ vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
++
++ u64_stats_update_begin(&tx_stats->syncp);
++ tx_stats->tx_packets++;
++ tx_stats->tx_bytes += len;
++ u64_stats_update_end(&tx_stats->syncp);
++
++ if (netif_rx(skb) == NET_RX_SUCCESS) {
++ u64_stats_update_begin(&rx_stats->syncp);
++ rx_stats->rx_packets++;
++ rx_stats->rx_bytes += len;
++ u64_stats_update_end(&rx_stats->syncp);
++ } else {
++drop:
++ dev->stats.rx_dropped++;
++ }
++ rcu_read_unlock();
++}
++
++static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
++ struct vxlan_dev *vxlan,
++ union vxlan_addr *daddr,
++ __be16 dst_port, int dst_ifindex, __be32 vni,
++ struct dst_entry *dst,
++ u32 rt_flags)
++{
++#if IS_ENABLED(CONFIG_IPV6)
++ /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
++ * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
++ * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
++ */
++ BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
++#endif
++ /* Bypass encapsulation if the destination is local */
++ if (rt_flags & RTCF_LOCAL &&
++ !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
++ struct vxlan_dev *dst_vxlan;
++
++ dst_release(dst);
++ dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
++ daddr->sa.sa_family, dst_port,
++ vxlan->cfg.flags);
++ if (!dst_vxlan) {
++ dev->stats.tx_errors++;
++ kfree_skb(skb);
++
++ return -ENOENT;
++ }
++ vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
++ return 1;
++ }
++
++ return 0;
++}
++
++static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
++ __be32 default_vni, struct vxlan_rdst *rdst,
++ bool did_rsc)
++{
++ struct dst_cache *dst_cache;
++ struct ip_tunnel_info *info;
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ const struct iphdr *old_iph = ip_hdr(skb);
++ union vxlan_addr *dst;
++ union vxlan_addr remote_ip, local_ip;
++ struct vxlan_metadata _md;
++ struct vxlan_metadata *md = &_md;
++ __be16 src_port = 0, dst_port;
++ struct dst_entry *ndst = NULL;
++ __be32 vni, label;
++ __u8 tos, ttl;
++ int ifindex;
++ int err;
++ u32 flags = vxlan->cfg.flags;
++ bool udp_sum = false;
++ bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
++
++ info = skb_tunnel_info(skb);
++
++ if (rdst) {
++ dst = &rdst->remote_ip;
++ if (vxlan_addr_any(dst)) {
++ if (did_rsc) {
++ /* short-circuited back to local bridge */
++ vxlan_encap_bypass(skb, vxlan, vxlan,
++ default_vni, true);
++ return;
++ }
++ goto drop;
++ }
++
++ dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
++ vni = (rdst->remote_vni) ? : default_vni;
++ ifindex = rdst->remote_ifindex;
++ local_ip = vxlan->cfg.saddr;
++ dst_cache = &rdst->dst_cache;
++ md->gbp = skb->mark;
++ if (flags & VXLAN_F_TTL_INHERIT) {
++ ttl = ip_tunnel_get_ttl(old_iph, skb);
++ } else {
++ ttl = vxlan->cfg.ttl;
++ if (!ttl && vxlan_addr_multicast(dst))
++ ttl = 1;
++ }
++
++ tos = vxlan->cfg.tos;
++ if (tos == 1)
++ tos = ip_tunnel_get_dsfield(old_iph, skb);
++
++ if (dst->sa.sa_family == AF_INET)
++ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
++ else
++ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
++ label = vxlan->cfg.label;
++ } else {
++ if (!info) {
++ WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
++ dev->name);
++ goto drop;
++ }
++ remote_ip.sa.sa_family = ip_tunnel_info_af(info);
++ if (remote_ip.sa.sa_family == AF_INET) {
++ remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
++ local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
++ } else {
++ remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
++ local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
++ }
++ dst = &remote_ip;
++ dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
++ vni = tunnel_id_to_key32(info->key.tun_id);
++ ifindex = 0;
++ dst_cache = &info->dst_cache;
++ if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
++ if (info->options_len < sizeof(*md))
++ goto drop;
++ md = ip_tunnel_info_opts(info);
++ }
++ ttl = info->key.ttl;
++ tos = info->key.tos;
++ label = info->key.label;
++ udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
++ }
++ src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
++ vxlan->cfg.port_max, true);
++
++ rcu_read_lock();
++ if (dst->sa.sa_family == AF_INET) {
++ struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
++ struct rtable *rt;
++ __be16 df = 0;
++
++ if (!ifindex)
++ ifindex = sock4->sock->sk->sk_bound_dev_if;
++
++ rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
++ dst->sin.sin_addr.s_addr,
++ &local_ip.sin.sin_addr.s_addr,
++ dst_port, src_port,
++ dst_cache, info);
++ if (IS_ERR(rt)) {
++ err = PTR_ERR(rt);
++ goto tx_error;
++ }
++
++ if (!info) {
++ /* Bypass encapsulation if the destination is local */
++ err = encap_bypass_if_local(skb, dev, vxlan, dst,
++ dst_port, ifindex, vni,
++ &rt->dst, rt->rt_flags);
++ if (err)
++ goto out_unlock;
++
++ if (vxlan->cfg.df == VXLAN_DF_SET) {
++ df = htons(IP_DF);
++ } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
++ struct ethhdr *eth = eth_hdr(skb);
++
++ if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
++ (ntohs(eth->h_proto) == ETH_P_IP &&
++ old_iph->frag_off & htons(IP_DF)))
++ df = htons(IP_DF);
++ }
++ } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
++ df = htons(IP_DF);
++ }
++
++ ndst = &rt->dst;
++ err = skb_tunnel_check_pmtu(skb, ndst, vxlan_headroom(flags & VXLAN_F_GPE),
++ netif_is_any_bridge_port(dev));
++ if (err < 0) {
++ goto tx_error;
++ } else if (err) {
++ if (info) {
++ struct ip_tunnel_info *unclone;
++ struct in_addr src, dst;
++
++ unclone = skb_tunnel_info_unclone(skb);
++ if (unlikely(!unclone))
++ goto tx_error;
++
++ src = remote_ip.sin.sin_addr;
++ dst = local_ip.sin.sin_addr;
++ unclone->key.u.ipv4.src = src.s_addr;
++ unclone->key.u.ipv4.dst = dst.s_addr;
++ }
++ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
++ dst_release(ndst);
++ goto out_unlock;
++ }
++
++ tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
++ ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
++ err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
++ vni, md, flags, udp_sum);
++ if (err < 0)
++ goto tx_error;
++
++ udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr,
++ dst->sin.sin_addr.s_addr, tos, ttl, df,
++ src_port, dst_port, xnet, !udp_sum);
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
++
++ if (!ifindex)
++ ifindex = sock6->sock->sk->sk_bound_dev_if;
++
++ ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
++ label, &dst->sin6.sin6_addr,
++ &local_ip.sin6.sin6_addr,
++ dst_port, src_port,
++ dst_cache, info);
++ if (IS_ERR(ndst)) {
++ err = PTR_ERR(ndst);
++ ndst = NULL;
++ goto tx_error;
++ }
++
++ if (!info) {
++ u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
++
++ err = encap_bypass_if_local(skb, dev, vxlan, dst,
++ dst_port, ifindex, vni,
++ ndst, rt6i_flags);
++ if (err)
++ goto out_unlock;
++ }
++
++ err = skb_tunnel_check_pmtu(skb, ndst,
++ vxlan_headroom((flags & VXLAN_F_GPE) | VXLAN_F_IPV6),
++ netif_is_any_bridge_port(dev));
++ if (err < 0) {
++ goto tx_error;
++ } else if (err) {
++ if (info) {
++ struct ip_tunnel_info *unclone;
++ struct in6_addr src, dst;
++
++ unclone = skb_tunnel_info_unclone(skb);
++ if (unlikely(!unclone))
++ goto tx_error;
++
++ src = remote_ip.sin6.sin6_addr;
++ dst = local_ip.sin6.sin6_addr;
++ unclone->key.u.ipv6.src = src;
++ unclone->key.u.ipv6.dst = dst;
++ }
++
++ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
++ dst_release(ndst);
++ goto out_unlock;
++ }
++
++ tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
++ ttl = ttl ? : ip6_dst_hoplimit(ndst);
++ skb_scrub_packet(skb, xnet);
++ err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
++ vni, md, flags, udp_sum);
++ if (err < 0)
++ goto tx_error;
++
++ udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
++ &local_ip.sin6.sin6_addr,
++ &dst->sin6.sin6_addr, tos, ttl,
++ label, src_port, dst_port, !udp_sum);
++#endif
++ }
++out_unlock:
++ rcu_read_unlock();
++ return;
++
++drop:
++ dev->stats.tx_dropped++;
++ dev_kfree_skb(skb);
++ return;
++
++tx_error:
++ rcu_read_unlock();
++ if (err == -ELOOP)
++ dev->stats.collisions++;
++ else if (err == -ENETUNREACH)
++ dev->stats.tx_carrier_errors++;
++ dst_release(ndst);
++ dev->stats.tx_errors++;
++ kfree_skb(skb);
++}
++
++static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
++ struct vxlan_fdb *f, __be32 vni, bool did_rsc)
++{
++ struct vxlan_rdst nh_rdst;
++ struct nexthop *nh;
++ bool do_xmit;
++ u32 hash;
++
++ memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
++ hash = skb_get_hash(skb);
++
++ rcu_read_lock();
++ nh = rcu_dereference(f->nh);
++ if (!nh) {
++ rcu_read_unlock();
++ goto drop;
++ }
++ do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
++ rcu_read_unlock();
++
++ if (likely(do_xmit))
++ vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
++ else
++ goto drop;
++
++ return;
++
++drop:
++ dev->stats.tx_dropped++;
++ dev_kfree_skb(skb);
++}
++
++/* Transmit local packets over Vxlan
++ *
++ * Outer IP header inherits ECN and DF from inner header.
++ * Outer UDP destination is the VXLAN assigned port.
++ * source port is based on hash of flow
++ */
++static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_rdst *rdst, *fdst = NULL;
++ const struct ip_tunnel_info *info;
++ bool did_rsc = false;
++ struct vxlan_fdb *f;
++ struct ethhdr *eth;
++ __be32 vni = 0;
++
++ info = skb_tunnel_info(skb);
++
++ skb_reset_mac_header(skb);
++
++ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
++ if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
++ info->mode & IP_TUNNEL_INFO_TX) {
++ vni = tunnel_id_to_key32(info->key.tun_id);
++ } else {
++ if (info && info->mode & IP_TUNNEL_INFO_TX)
++ vxlan_xmit_one(skb, dev, vni, NULL, false);
++ else
++ kfree_skb(skb);
++ return NETDEV_TX_OK;
++ }
++ }
++
++ if (vxlan->cfg.flags & VXLAN_F_PROXY) {
++ eth = eth_hdr(skb);
++ if (ntohs(eth->h_proto) == ETH_P_ARP)
++ return arp_reduce(dev, skb, vni);
++#if IS_ENABLED(CONFIG_IPV6)
++ else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
++ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
++ sizeof(struct nd_msg)) &&
++ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
++ struct nd_msg *m = (struct nd_msg *)(ipv6_hdr(skb) + 1);
++
++ if (m->icmph.icmp6_code == 0 &&
++ m->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
++ return neigh_reduce(dev, skb, vni);
++ }
++#endif
++ }
++
++ eth = eth_hdr(skb);
++ f = vxlan_find_mac(vxlan, eth->h_dest, vni);
++ did_rsc = false;
++
++ if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) &&
++ (ntohs(eth->h_proto) == ETH_P_IP ||
++ ntohs(eth->h_proto) == ETH_P_IPV6)) {
++ did_rsc = route_shortcircuit(dev, skb);
++ if (did_rsc)
++ f = vxlan_find_mac(vxlan, eth->h_dest, vni);
++ }
++
++ if (f == NULL) {
++ f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
++ if (f == NULL) {
++ if ((vxlan->cfg.flags & VXLAN_F_L2MISS) &&
++ !is_multicast_ether_addr(eth->h_dest))
++ vxlan_fdb_miss(vxlan, eth->h_dest);
++
++ dev->stats.tx_dropped++;
++ kfree_skb(skb);
++ return NETDEV_TX_OK;
++ }
++ }
++
++ if (rcu_access_pointer(f->nh)) {
++ vxlan_xmit_nh(skb, dev, f,
++ (vni ? : vxlan->default_dst.remote_vni), did_rsc);
++ } else {
++ list_for_each_entry_rcu(rdst, &f->remotes, list) {
++ struct sk_buff *skb1;
++
++ if (!fdst) {
++ fdst = rdst;
++ continue;
++ }
++ skb1 = skb_clone(skb, GFP_ATOMIC);
++ if (skb1)
++ vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
++ }
++ if (fdst)
++ vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
++ else
++ kfree_skb(skb);
++ }
++
++ return NETDEV_TX_OK;
++}
++
++/* Walk the forwarding table and purge stale entries */
++static void vxlan_cleanup(struct timer_list *t)
++{
++ struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
++ unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
++ unsigned int h;
++
++ if (!netif_running(vxlan->dev))
++ return;
++
++ for (h = 0; h < FDB_HASH_SIZE; ++h) {
++ struct hlist_node *p, *n;
++
++ spin_lock(&vxlan->hash_lock[h]);
++ hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
++ struct vxlan_fdb *f
++ = container_of(p, struct vxlan_fdb, hlist);
++ unsigned long timeout;
++
++ if (f->state & (NUD_PERMANENT | NUD_NOARP))
++ continue;
++
++ if (f->flags & NTF_EXT_LEARNED)
++ continue;
++
++ timeout = f->used + vxlan->cfg.age_interval * HZ;
++ if (time_before_eq(timeout, jiffies)) {
++ netdev_dbg(vxlan->dev,
++ "garbage collect %pM\n",
++ f->eth_addr);
++ f->state = NUD_STALE;
++ vxlan_fdb_destroy(vxlan, f, true, true);
++ } else if (time_before(timeout, next_timer))
++ next_timer = timeout;
++ }
++ spin_unlock(&vxlan->hash_lock[h]);
++ }
++
++ mod_timer(&vxlan->age_timer, next_timer);
++}
++
++static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
++{
++ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
++
++ spin_lock(&vn->sock_lock);
++ hlist_del_init_rcu(&vxlan->hlist4.hlist);
++#if IS_ENABLED(CONFIG_IPV6)
++ hlist_del_init_rcu(&vxlan->hlist6.hlist);
++#endif
++ spin_unlock(&vn->sock_lock);
++}
++
++static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
++ struct vxlan_dev_node *node)
++{
++ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
++ __be32 vni = vxlan->default_dst.remote_vni;
++
++ node->vxlan = vxlan;
++ spin_lock(&vn->sock_lock);
++ hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
++ spin_unlock(&vn->sock_lock);
++}
++
++/* Setup stats when device is created */
++static int vxlan_init(struct net_device *dev)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ int err;
++
++ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
++ if (!dev->tstats)
++ return -ENOMEM;
++
++ err = gro_cells_init(&vxlan->gro_cells, dev);
++ if (err) {
++ free_percpu(dev->tstats);
++ return err;
++ }
++
++ return 0;
++}
++
++static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
++{
++ struct vxlan_fdb *f;
++ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
++
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++ f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
++ if (f)
++ vxlan_fdb_destroy(vxlan, f, true, true);
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++}
++
++static void vxlan_uninit(struct net_device *dev)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++
++ gro_cells_destroy(&vxlan->gro_cells);
++
++ vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
++
++ free_percpu(dev->tstats);
++}
++
++/* Start ageing timer and join group when device is brought up */
++static int vxlan_open(struct net_device *dev)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ int ret;
++
++ ret = vxlan_sock_add(vxlan);
++ if (ret < 0)
++ return ret;
++
++ if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
++ ret = vxlan_igmp_join(vxlan);
++ if (ret == -EADDRINUSE)
++ ret = 0;
++ if (ret) {
++ vxlan_sock_release(vxlan);
++ return ret;
++ }
++ }
++
++ if (vxlan->cfg.age_interval)
++ mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
++
++ return ret;
++}
++
++/* Purge the forwarding table */
++static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
++{
++ unsigned int h;
++
++ for (h = 0; h < FDB_HASH_SIZE; ++h) {
++ struct hlist_node *p, *n;
++
++ spin_lock_bh(&vxlan->hash_lock[h]);
++ hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
++ struct vxlan_fdb *f
++ = container_of(p, struct vxlan_fdb, hlist);
++ if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
++ continue;
++ /* the all_zeros_mac entry is deleted at vxlan_uninit */
++ if (is_zero_ether_addr(f->eth_addr) &&
++ f->vni == vxlan->cfg.vni)
++ continue;
++ vxlan_fdb_destroy(vxlan, f, true, true);
++ }
++ spin_unlock_bh(&vxlan->hash_lock[h]);
++ }
++}
++
++/* Cleanup timer and forwarding table on shutdown */
++static int vxlan_stop(struct net_device *dev)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
++ int ret = 0;
++
++ if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
++ !vxlan_group_used(vn, vxlan))
++ ret = vxlan_igmp_leave(vxlan);
++
++ del_timer_sync(&vxlan->age_timer);
++
++ vxlan_flush(vxlan, false);
++ vxlan_sock_release(vxlan);
++
++ return ret;
++}
++
++/* Stub, nothing needs to be done. */
++static void vxlan_set_multicast_list(struct net_device *dev)
++{
++}
++
++static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_rdst *dst = &vxlan->default_dst;
++ struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
++ dst->remote_ifindex);
++
++ /* This check is different than dev->max_mtu, because it looks at
++ * the lowerdev->mtu, rather than the static dev->max_mtu
++ */
++ if (lowerdev) {
++ int max_mtu = lowerdev->mtu - vxlan_headroom(vxlan->cfg.flags);
++ if (new_mtu > max_mtu)
++ return -EINVAL;
++ }
++
++ dev->mtu = new_mtu;
++ return 0;
++}
++
++static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct ip_tunnel_info *info = skb_tunnel_info(skb);
++ __be16 sport, dport;
++
++ sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
++ vxlan->cfg.port_max, true);
++ dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
++
++ if (ip_tunnel_info_af(info) == AF_INET) {
++ struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
++ struct rtable *rt;
++
++ rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
++ info->key.u.ipv4.dst,
++ &info->key.u.ipv4.src, dport, sport,
++ &info->dst_cache, info);
++ if (IS_ERR(rt))
++ return PTR_ERR(rt);
++ ip_rt_put(rt);
++ } else {
++#if IS_ENABLED(CONFIG_IPV6)
++ struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
++ struct dst_entry *ndst;
++
++ ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
++ info->key.label, &info->key.u.ipv6.dst,
++ &info->key.u.ipv6.src, dport, sport,
++ &info->dst_cache, info);
++ if (IS_ERR(ndst))
++ return PTR_ERR(ndst);
++ dst_release(ndst);
++#else /* !CONFIG_IPV6 */
++ return -EPFNOSUPPORT;
++#endif
++ }
++ info->key.tp_src = sport;
++ info->key.tp_dst = dport;
++ return 0;
++}
++
++static const struct net_device_ops vxlan_netdev_ether_ops = {
++ .ndo_init = vxlan_init,
++ .ndo_uninit = vxlan_uninit,
++ .ndo_open = vxlan_open,
++ .ndo_stop = vxlan_stop,
++ .ndo_start_xmit = vxlan_xmit,
++ .ndo_get_stats64 = dev_get_tstats64,
++ .ndo_set_rx_mode = vxlan_set_multicast_list,
++ .ndo_change_mtu = vxlan_change_mtu,
++ .ndo_validate_addr = eth_validate_addr,
++ .ndo_set_mac_address = eth_mac_addr,
++ .ndo_fdb_add = vxlan_fdb_add,
++ .ndo_fdb_del = vxlan_fdb_delete,
++ .ndo_fdb_dump = vxlan_fdb_dump,
++ .ndo_fdb_get = vxlan_fdb_get,
++ .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
++ .ndo_change_proto_down = dev_change_proto_down_generic,
++};
++
++static const struct net_device_ops vxlan_netdev_raw_ops = {
++ .ndo_init = vxlan_init,
++ .ndo_uninit = vxlan_uninit,
++ .ndo_open = vxlan_open,
++ .ndo_stop = vxlan_stop,
++ .ndo_start_xmit = vxlan_xmit,
++ .ndo_get_stats64 = dev_get_tstats64,
++ .ndo_change_mtu = vxlan_change_mtu,
++ .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
++};
++
++/* Info for udev, that this is a virtual tunnel endpoint */
++static struct device_type vxlan_type = {
++ .name = "vxlan",
++};
++
++/* Calls the ndo_udp_tunnel_add of the caller in order to
++ * supply the listening VXLAN udp ports. Callers are expected
++ * to implement the ndo_udp_tunnel_add.
++ */
++static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
++{
++ struct vxlan_sock *vs;
++ struct net *net = dev_net(dev);
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++ unsigned int i;
++
++ spin_lock(&vn->sock_lock);
++ for (i = 0; i < PORT_HASH_SIZE; ++i) {
++ hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
++ unsigned short type;
++
++ if (vs->flags & VXLAN_F_GPE)
++ type = UDP_TUNNEL_TYPE_VXLAN_GPE;
++ else
++ type = UDP_TUNNEL_TYPE_VXLAN;
++
++ if (push)
++ udp_tunnel_push_rx_port(dev, vs->sock, type);
++ else
++ udp_tunnel_drop_rx_port(dev, vs->sock, type);
++ }
++ }
++ spin_unlock(&vn->sock_lock);
++}
++
++/* Initialize the device structure. */
++static void vxlan_setup(struct net_device *dev)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ unsigned int h;
++
++ eth_hw_addr_random(dev);
++ ether_setup(dev);
++
++ dev->needs_free_netdev = true;
++ SET_NETDEV_DEVTYPE(dev, &vxlan_type);
++
++ dev->features |= NETIF_F_LLTX;
++ dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
++ dev->features |= NETIF_F_RXCSUM;
++ dev->features |= NETIF_F_GSO_SOFTWARE;
++
++ dev->vlan_features = dev->features;
++ dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
++ dev->hw_features |= NETIF_F_RXCSUM;
++ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
++ netif_keep_dst(dev);
++ dev->priv_flags |= IFF_NO_QUEUE;
++
++ /* MTU range: 68 - 65535 */
++ dev->min_mtu = ETH_MIN_MTU;
++ dev->max_mtu = ETH_MAX_MTU;
++
++ INIT_LIST_HEAD(&vxlan->next);
++
++ timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
++
++ vxlan->dev = dev;
++
++ for (h = 0; h < FDB_HASH_SIZE; ++h) {
++ spin_lock_init(&vxlan->hash_lock[h]);
++ INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
++ }
++}
++
++static void vxlan_ether_setup(struct net_device *dev)
++{
++ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
++ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
++ dev->netdev_ops = &vxlan_netdev_ether_ops;
++}
++
++static void vxlan_raw_setup(struct net_device *dev)
++{
++ dev->header_ops = NULL;
++ dev->type = ARPHRD_NONE;
++ dev->hard_header_len = 0;
++ dev->addr_len = 0;
++ dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
++ dev->netdev_ops = &vxlan_netdev_raw_ops;
++}
++
++static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
++ [IFLA_VXLAN_ID] = { .type = NLA_U32 },
++ [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) },
++ [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
++ [IFLA_VXLAN_LINK] = { .type = NLA_U32 },
++ [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
++ [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
++ [IFLA_VXLAN_TOS] = { .type = NLA_U8 },
++ [IFLA_VXLAN_TTL] = { .type = NLA_U8 },
++ [IFLA_VXLAN_LABEL] = { .type = NLA_U32 },
++ [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 },
++ [IFLA_VXLAN_AGEING] = { .type = NLA_U32 },
++ [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 },
++ [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct ifla_vxlan_port_range) },
++ [IFLA_VXLAN_PROXY] = { .type = NLA_U8 },
++ [IFLA_VXLAN_RSC] = { .type = NLA_U8 },
++ [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 },
++ [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 },
++ [IFLA_VXLAN_COLLECT_METADATA] = { .type = NLA_U8 },
++ [IFLA_VXLAN_PORT] = { .type = NLA_U16 },
++ [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 },
++ [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
++ [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
++ [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
++ [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
++ [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
++ [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
++ [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
++ [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
++ [IFLA_VXLAN_DF] = { .type = NLA_U8 },
++};
++
++static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
++ struct netlink_ext_ack *extack)
++{
++ if (tb[IFLA_ADDRESS]) {
++ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
++ "Provided link layer address is not Ethernet");
++ return -EINVAL;
++ }
++
++ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
++ "Provided Ethernet address is not unicast");
++ return -EADDRNOTAVAIL;
++ }
++ }
++
++ if (tb[IFLA_MTU]) {
++ u32 mtu = nla_get_u32(tb[IFLA_MTU]);
++
++ if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
++ "MTU must be between 68 and 65535");
++ return -EINVAL;
++ }
++ }
++
++ if (!data) {
++ NL_SET_ERR_MSG(extack,
++ "Required attributes not provided to perform the operation");
++ return -EINVAL;
++ }
++
++ if (data[IFLA_VXLAN_ID]) {
++ u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
++
++ if (id >= VXLAN_N_VID) {
++ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_ID],
++ "VXLAN ID must be lower than 16777216");
++ return -ERANGE;
++ }
++ }
++
++ if (data[IFLA_VXLAN_PORT_RANGE]) {
++ const struct ifla_vxlan_port_range *p
++ = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
++
++ if (ntohs(p->high) < ntohs(p->low)) {
++ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_PORT_RANGE],
++ "Invalid source port range");
++ return -EINVAL;
++ }
++ }
++
++ if (data[IFLA_VXLAN_DF]) {
++ enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
++
++ if (df < 0 || df > VXLAN_DF_MAX) {
++ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_DF],
++ "Invalid DF attribute");
++ return -EINVAL;
++ }
++ }
++
++ return 0;
++}
++
++static void vxlan_get_drvinfo(struct net_device *netdev,
++ struct ethtool_drvinfo *drvinfo)
++{
++ strlcpy(drvinfo->version, VXLAN_VERSION, sizeof(drvinfo->version));
++ strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
++}
++
++static int vxlan_get_link_ksettings(struct net_device *dev,
++ struct ethtool_link_ksettings *cmd)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_rdst *dst = &vxlan->default_dst;
++ struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
++ dst->remote_ifindex);
++
++ if (!lowerdev) {
++ cmd->base.duplex = DUPLEX_UNKNOWN;
++ cmd->base.port = PORT_OTHER;
++ cmd->base.speed = SPEED_UNKNOWN;
++
++ return 0;
++ }
++
++ return __ethtool_get_link_ksettings(lowerdev, cmd);
++}
++
++static const struct ethtool_ops vxlan_ethtool_ops = {
++ .get_drvinfo = vxlan_get_drvinfo,
++ .get_link = ethtool_op_get_link,
++ .get_link_ksettings = vxlan_get_link_ksettings,
++};
++
++static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
++ __be16 port, u32 flags, int ifindex)
++{
++ struct socket *sock;
++ struct udp_port_cfg udp_conf;
++ int err;
++
++ memset(&udp_conf, 0, sizeof(udp_conf));
++
++ if (ipv6) {
++ udp_conf.family = AF_INET6;
++ udp_conf.use_udp6_rx_checksums =
++ !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
++ udp_conf.ipv6_v6only = 1;
++ } else {
++ udp_conf.family = AF_INET;
++ }
++
++ udp_conf.local_udp_port = port;
++ udp_conf.bind_ifindex = ifindex;
++
++ /* Open UDP socket */
++ err = udp_sock_create(net, &udp_conf, &sock);
++ if (err < 0)
++ return ERR_PTR(err);
++
++ udp_allow_gso(sock->sk);
++ return sock;
++}
++
++/* Create new listen socket if needed */
++static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
++ __be16 port, u32 flags,
++ int ifindex)
++{
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++ struct vxlan_sock *vs;
++ struct socket *sock;
++ unsigned int h;
++ struct udp_tunnel_sock_cfg tunnel_cfg;
++
++ vs = kzalloc(sizeof(*vs), GFP_KERNEL);
++ if (!vs)
++ return ERR_PTR(-ENOMEM);
++
++ for (h = 0; h < VNI_HASH_SIZE; ++h)
++ INIT_HLIST_HEAD(&vs->vni_list[h]);
++
++ sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
++ if (IS_ERR(sock)) {
++ kfree(vs);
++ return ERR_CAST(sock);
++ }
++
++ vs->sock = sock;
++ refcount_set(&vs->refcnt, 1);
++ vs->flags = (flags & VXLAN_F_RCV_FLAGS);
++
++ spin_lock(&vn->sock_lock);
++ hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
++ udp_tunnel_notify_add_rx_port(sock,
++ (vs->flags & VXLAN_F_GPE) ?
++ UDP_TUNNEL_TYPE_VXLAN_GPE :
++ UDP_TUNNEL_TYPE_VXLAN);
++ spin_unlock(&vn->sock_lock);
++
++ /* Mark socket as an encapsulation socket. */
++ memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
++ tunnel_cfg.sk_user_data = vs;
++ tunnel_cfg.encap_type = 1;
++ tunnel_cfg.encap_rcv = vxlan_rcv;
++ tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
++ tunnel_cfg.encap_destroy = NULL;
++ tunnel_cfg.gro_receive = vxlan_gro_receive;
++ tunnel_cfg.gro_complete = vxlan_gro_complete;
++
++ setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
++
++ return vs;
++}
++
++static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
++{
++ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
++ struct vxlan_sock *vs = NULL;
++ struct vxlan_dev_node *node;
++ int l3mdev_index = 0;
++
++ if (vxlan->cfg.remote_ifindex)
++ l3mdev_index = l3mdev_master_upper_ifindex_by_index(
++ vxlan->net, vxlan->cfg.remote_ifindex);
++
++ if (!vxlan->cfg.no_share) {
++ spin_lock(&vn->sock_lock);
++ vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
++ vxlan->cfg.dst_port, vxlan->cfg.flags,
++ l3mdev_index);
++ if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
++ spin_unlock(&vn->sock_lock);
++ return -EBUSY;
++ }
++ spin_unlock(&vn->sock_lock);
++ }
++ if (!vs)
++ vs = vxlan_socket_create(vxlan->net, ipv6,
++ vxlan->cfg.dst_port, vxlan->cfg.flags,
++ l3mdev_index);
++ if (IS_ERR(vs))
++ return PTR_ERR(vs);
++#if IS_ENABLED(CONFIG_IPV6)
++ if (ipv6) {
++ rcu_assign_pointer(vxlan->vn6_sock, vs);
++ node = &vxlan->hlist6;
++ } else
++#endif
++ {
++ rcu_assign_pointer(vxlan->vn4_sock, vs);
++ node = &vxlan->hlist4;
++ }
++ vxlan_vs_add_dev(vs, vxlan, node);
++ return 0;
++}
++
++static int vxlan_sock_add(struct vxlan_dev *vxlan)
++{
++ bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
++ bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata;
++ bool ipv4 = !ipv6 || metadata;
++ int ret = 0;
++
++ RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
++#if IS_ENABLED(CONFIG_IPV6)
++ RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
++ if (ipv6) {
++ ret = __vxlan_sock_add(vxlan, true);
++ if (ret < 0 && ret != -EAFNOSUPPORT)
++ ipv4 = false;
++ }
++#endif
++ if (ipv4)
++ ret = __vxlan_sock_add(vxlan, false);
++ if (ret < 0)
++ vxlan_sock_release(vxlan);
++ return ret;
++}
++
++static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
++ struct net_device **lower,
++ struct vxlan_dev *old,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
++ struct vxlan_dev *tmp;
++ bool use_ipv6 = false;
++
++ if (conf->flags & VXLAN_F_GPE) {
++ /* For now, allow GPE only together with
++ * COLLECT_METADATA. This can be relaxed later; in such
++ * case, the other side of the PtP link will have to be
++ * provided.
++ */
++ if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
++ !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
++ NL_SET_ERR_MSG(extack,
++ "VXLAN GPE does not support this combination of attributes");
++ return -EINVAL;
++ }
++ }
++
++ if (!conf->remote_ip.sa.sa_family && !conf->saddr.sa.sa_family) {
++ /* Unless IPv6 is explicitly requested, assume IPv4 */
++ conf->remote_ip.sa.sa_family = AF_INET;
++ conf->saddr.sa.sa_family = AF_INET;
++ } else if (!conf->remote_ip.sa.sa_family) {
++ conf->remote_ip.sa.sa_family = conf->saddr.sa.sa_family;
++ } else if (!conf->saddr.sa.sa_family) {
++ conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family;
++ }
++
++ if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family) {
++ NL_SET_ERR_MSG(extack,
++ "Local and remote address must be from the same family");
++ return -EINVAL;
++ }
++
++ if (vxlan_addr_multicast(&conf->saddr)) {
++ NL_SET_ERR_MSG(extack, "Local address cannot be multicast");
++ return -EINVAL;
++ }
++
++ if (conf->saddr.sa.sa_family == AF_INET6) {
++ if (!IS_ENABLED(CONFIG_IPV6)) {
++ NL_SET_ERR_MSG(extack,
++ "IPv6 support not enabled in the kernel");
++ return -EPFNOSUPPORT;
++ }
++ use_ipv6 = true;
++ conf->flags |= VXLAN_F_IPV6;
++
++ if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) {
++ int local_type =
++ ipv6_addr_type(&conf->saddr.sin6.sin6_addr);
++ int remote_type =
++ ipv6_addr_type(&conf->remote_ip.sin6.sin6_addr);
++
++ if (local_type & IPV6_ADDR_LINKLOCAL) {
++ if (!(remote_type & IPV6_ADDR_LINKLOCAL) &&
++ (remote_type != IPV6_ADDR_ANY)) {
++ NL_SET_ERR_MSG(extack,
++ "Invalid combination of local and remote address scopes");
++ return -EINVAL;
++ }
++
++ conf->flags |= VXLAN_F_IPV6_LINKLOCAL;
++ } else {
++ if (remote_type ==
++ (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL)) {
++ NL_SET_ERR_MSG(extack,
++ "Invalid combination of local and remote address scopes");
++ return -EINVAL;
++ }
++
++ conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL;
++ }
++ }
++ }
++
++ if (conf->label && !use_ipv6) {
++ NL_SET_ERR_MSG(extack,
++ "Label attribute only applies to IPv6 VXLAN devices");
++ return -EINVAL;
++ }
++
++ if (conf->remote_ifindex) {
++ struct net_device *lowerdev;
++
++ lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
++ if (!lowerdev) {
++ NL_SET_ERR_MSG(extack,
++ "Invalid local interface, device not found");
++ return -ENODEV;
++ }
++
++#if IS_ENABLED(CONFIG_IPV6)
++ if (use_ipv6) {
++ struct inet6_dev *idev = __in6_dev_get(lowerdev);
++
++ if (idev && idev->cnf.disable_ipv6) {
++ NL_SET_ERR_MSG(extack,
++ "IPv6 support disabled by administrator");
++ return -EPERM;
++ }
++ }
++#endif
++
++ *lower = lowerdev;
++ } else {
++ if (vxlan_addr_multicast(&conf->remote_ip)) {
++ NL_SET_ERR_MSG(extack,
++ "Local interface required for multicast remote destination");
++
++ return -EINVAL;
++ }
++
++#if IS_ENABLED(CONFIG_IPV6)
++ if (conf->flags & VXLAN_F_IPV6_LINKLOCAL) {
++ NL_SET_ERR_MSG(extack,
++ "Local interface required for link-local local/remote addresses");
++ return -EINVAL;
++ }
++#endif
++
++ *lower = NULL;
++ }
++
++ if (!conf->dst_port) {
++ if (conf->flags & VXLAN_F_GPE)
++ conf->dst_port = htons(4790); /* IANA VXLAN-GPE port */
++ else
++ conf->dst_port = htons(vxlan_port);
++ }
++
++ if (!conf->age_interval)
++ conf->age_interval = FDB_AGE_DEFAULT;
++
++ list_for_each_entry(tmp, &vn->vxlan_list, next) {
++ if (tmp == old)
++ continue;
++
++ if (tmp->cfg.vni != conf->vni)
++ continue;
++ if (tmp->cfg.dst_port != conf->dst_port)
++ continue;
++ if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
++ (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)))
++ continue;
++
++ if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) &&
++ tmp->cfg.remote_ifindex != conf->remote_ifindex)
++ continue;
++
++ NL_SET_ERR_MSG(extack,
++ "A VXLAN device with the specified VNI already exists");
++ return -EEXIST;
++ }
++
++ return 0;
++}
++
++static void vxlan_config_apply(struct net_device *dev,
++ struct vxlan_config *conf,
++ struct net_device *lowerdev,
++ struct net *src_net,
++ bool changelink)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_rdst *dst = &vxlan->default_dst;
++ unsigned short needed_headroom = ETH_HLEN;
++ int max_mtu = ETH_MAX_MTU;
++ u32 flags = conf->flags;
++
++ if (!changelink) {
++ if (flags & VXLAN_F_GPE)
++ vxlan_raw_setup(dev);
++ else
++ vxlan_ether_setup(dev);
++
++ if (conf->mtu)
++ dev->mtu = conf->mtu;
++
++ vxlan->net = src_net;
++ }
++
++ dst->remote_vni = conf->vni;
++
++ memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
++
++ if (lowerdev) {
++ dst->remote_ifindex = conf->remote_ifindex;
++
++ dev->gso_max_size = lowerdev->gso_max_size;
++ dev->gso_max_segs = lowerdev->gso_max_segs;
++
++ needed_headroom = lowerdev->hard_header_len;
++ needed_headroom += lowerdev->needed_headroom;
++
++ dev->needed_tailroom = lowerdev->needed_tailroom;
++
++ max_mtu = lowerdev->mtu - vxlan_headroom(flags);
++ if (max_mtu < ETH_MIN_MTU)
++ max_mtu = ETH_MIN_MTU;
++
++ if (!changelink && !conf->mtu)
++ dev->mtu = max_mtu;
++ }
++
++ if (dev->mtu > max_mtu)
++ dev->mtu = max_mtu;
++
++ if (flags & VXLAN_F_COLLECT_METADATA)
++ flags |= VXLAN_F_IPV6;
++ needed_headroom += vxlan_headroom(flags);
++ dev->needed_headroom = needed_headroom;
++
++ memcpy(&vxlan->cfg, conf, sizeof(*conf));
++}
++
++static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
++ struct vxlan_config *conf, bool changelink,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct net_device *lowerdev;
++ int ret;
++
++ ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan, extack);
++ if (ret)
++ return ret;
++
++ vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
++
++ return 0;
++}
++
++static int __vxlan_dev_create(struct net *net, struct net_device *dev,
++ struct vxlan_config *conf,
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct net_device *remote_dev = NULL;
++ struct vxlan_fdb *f = NULL;
++ bool unregister = false;
++ struct vxlan_rdst *dst;
++ int err;
++
++ dst = &vxlan->default_dst;
++ err = vxlan_dev_configure(net, dev, conf, false, extack);
++ if (err)
++ return err;
++
++ dev->ethtool_ops = &vxlan_ethtool_ops;
++
++ /* create an fdb entry for a valid default destination */
++ if (!vxlan_addr_any(&dst->remote_ip)) {
++ err = vxlan_fdb_create(vxlan, all_zeros_mac,
++ &dst->remote_ip,
++ NUD_REACHABLE | NUD_PERMANENT,
++ vxlan->cfg.dst_port,
++ dst->remote_vni,
++ dst->remote_vni,
++ dst->remote_ifindex,
++ NTF_SELF, 0, &f, extack);
++ if (err)
++ return err;
++ }
++
++ err = register_netdevice(dev);
++ if (err)
++ goto errout;
++ unregister = true;
++
++ if (dst->remote_ifindex) {
++ remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
++ if (!remote_dev) {
++ err = -ENODEV;
++ goto errout;
++ }
++
++ err = netdev_upper_dev_link(remote_dev, dev, extack);
++ if (err)
++ goto errout;
++ }
++
++ err = rtnl_configure_link(dev, NULL);
++ if (err < 0)
++ goto unlink;
++
++ if (f) {
++ vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
++
++ /* notify default fdb entry */
++ err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
++ RTM_NEWNEIGH, true, extack);
++ if (err) {
++ vxlan_fdb_destroy(vxlan, f, false, false);
++ if (remote_dev)
++ netdev_upper_dev_unlink(remote_dev, dev);
++ goto unregister;
++ }
++ }
++
++ list_add(&vxlan->next, &vn->vxlan_list);
++ if (remote_dev)
++ dst->remote_dev = remote_dev;
++ return 0;
++unlink:
++ if (remote_dev)
++ netdev_upper_dev_unlink(remote_dev, dev);
++errout:
++ /* unregister_netdevice() destroys the default FDB entry with deletion
++ * notification. But the addition notification was not sent yet, so
++ * destroy the entry by hand here.
++ */
++ if (f)
++ __vxlan_fdb_free(f);
++unregister:
++ if (unregister)
++ unregister_netdevice(dev);
++ return err;
++}
++
++/* Set/clear flags based on attribute */
++static int vxlan_nl2flag(struct vxlan_config *conf, struct nlattr *tb[],
++ int attrtype, unsigned long mask, bool changelink,
++ bool changelink_supported,
++ struct netlink_ext_ack *extack)
++{
++ unsigned long flags;
++
++ if (!tb[attrtype])
++ return 0;
++
++ if (changelink && !changelink_supported) {
++ vxlan_flag_attr_error(attrtype, extack);
++ return -EOPNOTSUPP;
++ }
++
++ if (vxlan_policy[attrtype].type == NLA_FLAG)
++ flags = conf->flags | mask;
++ else if (nla_get_u8(tb[attrtype]))
++ flags = conf->flags | mask;
++ else
++ flags = conf->flags & ~mask;
++
++ conf->flags = flags;
++
++ return 0;
++}
++
++static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
++ struct net_device *dev, struct vxlan_config *conf,
++ bool changelink, struct netlink_ext_ack *extack)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ int err = 0;
++
++ memset(conf, 0, sizeof(*conf));
++
++ /* if changelink operation, start with old existing cfg */
++ if (changelink)
++ memcpy(conf, &vxlan->cfg, sizeof(*conf));
++
++ if (data[IFLA_VXLAN_ID]) {
++ __be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
++
++ if (changelink && (vni != conf->vni)) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], "Cannot change VNI");
++ return -EOPNOTSUPP;
++ }
++ conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
++ }
++
++ if (data[IFLA_VXLAN_GROUP]) {
++ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
++ return -EOPNOTSUPP;
++ }
++
++ conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
++ conf->remote_ip.sa.sa_family = AF_INET;
++ } else if (data[IFLA_VXLAN_GROUP6]) {
++ if (!IS_ENABLED(CONFIG_IPV6)) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
++ return -EPFNOSUPPORT;
++ }
++
++ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "New group address family does not match old group");
++ return -EOPNOTSUPP;
++ }
++
++ conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
++ conf->remote_ip.sa.sa_family = AF_INET6;
++ }
++
++ if (data[IFLA_VXLAN_LOCAL]) {
++ if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
++ return -EOPNOTSUPP;
++ }
++
++ conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
++ conf->saddr.sa.sa_family = AF_INET;
++ } else if (data[IFLA_VXLAN_LOCAL6]) {
++ if (!IS_ENABLED(CONFIG_IPV6)) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "IPv6 support not enabled in the kernel");
++ return -EPFNOSUPPORT;
++ }
++
++ if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "New local address family does not match old");
++ return -EOPNOTSUPP;
++ }
++
++ /* TODO: respect scope id */
++ conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
++ conf->saddr.sa.sa_family = AF_INET6;
++ }
++
++ if (data[IFLA_VXLAN_LINK])
++ conf->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]);
++
++ if (data[IFLA_VXLAN_TOS])
++ conf->tos = nla_get_u8(data[IFLA_VXLAN_TOS]);
++
++ if (data[IFLA_VXLAN_TTL])
++ conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
++
++ if (data[IFLA_VXLAN_TTL_INHERIT]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_TTL_INHERIT,
++ VXLAN_F_TTL_INHERIT, changelink, false,
++ extack);
++ if (err)
++ return err;
++
++ }
++
++ if (data[IFLA_VXLAN_LABEL])
++ conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
++ IPV6_FLOWLABEL_MASK;
++
++ if (data[IFLA_VXLAN_LEARNING]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
++ VXLAN_F_LEARN, changelink, true,
++ extack);
++ if (err)
++ return err;
++ } else if (!changelink) {
++ /* default to learn on a new device */
++ conf->flags |= VXLAN_F_LEARN;
++ }
++
++ if (data[IFLA_VXLAN_AGEING])
++ conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
++
++ if (data[IFLA_VXLAN_PROXY]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_PROXY,
++ VXLAN_F_PROXY, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_RSC]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_RSC,
++ VXLAN_F_RSC, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_L2MISS]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L2MISS,
++ VXLAN_F_L2MISS, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_L3MISS]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L3MISS,
++ VXLAN_F_L3MISS, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_LIMIT]) {
++ if (changelink) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LIMIT],
++ "Cannot change limit");
++ return -EOPNOTSUPP;
++ }
++ conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
++ }
++
++ if (data[IFLA_VXLAN_COLLECT_METADATA]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_COLLECT_METADATA,
++ VXLAN_F_COLLECT_METADATA, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_PORT_RANGE]) {
++ if (!changelink) {
++ const struct ifla_vxlan_port_range *p
++ = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
++ conf->port_min = ntohs(p->low);
++ conf->port_max = ntohs(p->high);
++ } else {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
++ "Cannot change port range");
++ return -EOPNOTSUPP;
++ }
++ }
++
++ if (data[IFLA_VXLAN_PORT]) {
++ if (changelink) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT],
++ "Cannot change port");
++ return -EOPNOTSUPP;
++ }
++ conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
++ }
++
++ if (data[IFLA_VXLAN_UDP_CSUM]) {
++ if (changelink) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_UDP_CSUM],
++ "Cannot change UDP_CSUM flag");
++ return -EOPNOTSUPP;
++ }
++ if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
++ conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
++ }
++
++ if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
++ VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
++ false, extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
++ VXLAN_F_UDP_ZERO_CSUM6_RX, changelink,
++ false, extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_REMCSUM_TX]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_TX,
++ VXLAN_F_REMCSUM_TX, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_REMCSUM_RX]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_RX,
++ VXLAN_F_REMCSUM_RX, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_GBP]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GBP,
++ VXLAN_F_GBP, changelink, false, extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_GPE]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GPE,
++ VXLAN_F_GPE, changelink, false,
++ extack);
++ if (err)
++ return err;
++ }
++
++ if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
++ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
++ VXLAN_F_REMCSUM_NOPARTIAL, changelink,
++ false, extack);
++ if (err)
++ return err;
++ }
++
++ if (tb[IFLA_MTU]) {
++ if (changelink) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
++ "Cannot change mtu");
++ return -EOPNOTSUPP;
++ }
++ conf->mtu = nla_get_u32(tb[IFLA_MTU]);
++ }
++
++ if (data[IFLA_VXLAN_DF])
++ conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
++
++ return 0;
++}
++
++static int vxlan_newlink(struct net *src_net, struct net_device *dev,
++ struct nlattr *tb[], struct nlattr *data[],
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_config conf;
++ int err;
++
++ err = vxlan_nl2conf(tb, data, dev, &conf, false, extack);
++ if (err)
++ return err;
++
++ return __vxlan_dev_create(src_net, dev, &conf, extack);
++}
++
++static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
++ struct nlattr *data[],
++ struct netlink_ext_ack *extack)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct net_device *lowerdev;
++ struct vxlan_config conf;
++ struct vxlan_rdst *dst;
++ int err;
++
++ dst = &vxlan->default_dst;
++ err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
++ if (err)
++ return err;
++
++ err = vxlan_config_validate(vxlan->net, &conf, &lowerdev,
++ vxlan, extack);
++ if (err)
++ return err;
++
++ if (dst->remote_dev == lowerdev)
++ lowerdev = NULL;
++
++ err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
++ extack);
++ if (err)
++ return err;
++
++ /* handle default dst entry */
++ if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
++ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
++
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++ if (!vxlan_addr_any(&conf.remote_ip)) {
++ err = vxlan_fdb_update(vxlan, all_zeros_mac,
++ &conf.remote_ip,
++ NUD_REACHABLE | NUD_PERMANENT,
++ NLM_F_APPEND | NLM_F_CREATE,
++ vxlan->cfg.dst_port,
++ conf.vni, conf.vni,
++ conf.remote_ifindex,
++ NTF_SELF, 0, true, extack);
++ if (err) {
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++ netdev_adjacent_change_abort(dst->remote_dev,
++ lowerdev, dev);
++ return err;
++ }
++ }
++ if (!vxlan_addr_any(&dst->remote_ip))
++ __vxlan_fdb_delete(vxlan, all_zeros_mac,
++ dst->remote_ip,
++ vxlan->cfg.dst_port,
++ dst->remote_vni,
++ dst->remote_vni,
++ dst->remote_ifindex,
++ true);
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++ }
++
++ if (conf.age_interval != vxlan->cfg.age_interval)
++ mod_timer(&vxlan->age_timer, jiffies);
++
++ netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
++ if (lowerdev && lowerdev != dst->remote_dev)
++ dst->remote_dev = lowerdev;
++ vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
++ return 0;
++}
++
++static void vxlan_dellink(struct net_device *dev, struct list_head *head)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++
++ vxlan_flush(vxlan, true);
++
++ list_del(&vxlan->next);
++ unregister_netdevice_queue(dev, head);
++ if (vxlan->default_dst.remote_dev)
++ netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
++}
++
++static size_t vxlan_get_size(const struct net_device *dev)
++{
++
++ return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */
++ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
++ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
++ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
++ nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA */
++ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
++ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
++ nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
++ nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
++ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
++ 0;
++}
++
++static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
++{
++ const struct vxlan_dev *vxlan = netdev_priv(dev);
++ const struct vxlan_rdst *dst = &vxlan->default_dst;
++ struct ifla_vxlan_port_range ports = {
++ .low = htons(vxlan->cfg.port_min),
++ .high = htons(vxlan->cfg.port_max),
++ };
++
++ if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
++ goto nla_put_failure;
++
++ if (!vxlan_addr_any(&dst->remote_ip)) {
++ if (dst->remote_ip.sa.sa_family == AF_INET) {
++ if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
++ dst->remote_ip.sin.sin_addr.s_addr))
++ goto nla_put_failure;
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
++ &dst->remote_ip.sin6.sin6_addr))
++ goto nla_put_failure;
++#endif
++ }
++ }
++
++ if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
++ goto nla_put_failure;
++
++ if (!vxlan_addr_any(&vxlan->cfg.saddr)) {
++ if (vxlan->cfg.saddr.sa.sa_family == AF_INET) {
++ if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
++ vxlan->cfg.saddr.sin.sin_addr.s_addr))
++ goto nla_put_failure;
++#if IS_ENABLED(CONFIG_IPV6)
++ } else {
++ if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
++ &vxlan->cfg.saddr.sin6.sin6_addr))
++ goto nla_put_failure;
++#endif
++ }
++ }
++
++ if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
++ nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
++ !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
++ nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
++ nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
++ nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
++ nla_put_u8(skb, IFLA_VXLAN_LEARNING,
++ !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
++ nla_put_u8(skb, IFLA_VXLAN_PROXY,
++ !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
++ nla_put_u8(skb, IFLA_VXLAN_RSC,
++ !!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
++ nla_put_u8(skb, IFLA_VXLAN_L2MISS,
++ !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
++ nla_put_u8(skb, IFLA_VXLAN_L3MISS,
++ !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
++ nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
++ !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
++ nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
++ nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
++ nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
++ nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
++ !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
++ nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
++ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
++ nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
++ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
++ nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
++ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
++ nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
++ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
++ goto nla_put_failure;
++
++ if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
++ goto nla_put_failure;
++
++ if (vxlan->cfg.flags & VXLAN_F_GBP &&
++ nla_put_flag(skb, IFLA_VXLAN_GBP))
++ goto nla_put_failure;
++
++ if (vxlan->cfg.flags & VXLAN_F_GPE &&
++ nla_put_flag(skb, IFLA_VXLAN_GPE))
++ goto nla_put_failure;
++
++ if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL &&
++ nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
++ goto nla_put_failure;
++
++ return 0;
++
++nla_put_failure:
++ return -EMSGSIZE;
++}
++
++static struct net *vxlan_get_link_net(const struct net_device *dev)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++
++ return vxlan->net;
++}
++
++static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
++ .kind = "vxlan",
++ .maxtype = IFLA_VXLAN_MAX,
++ .policy = vxlan_policy,
++ .priv_size = sizeof(struct vxlan_dev),
++ .setup = vxlan_setup,
++ .validate = vxlan_validate,
++ .newlink = vxlan_newlink,
++ .changelink = vxlan_changelink,
++ .dellink = vxlan_dellink,
++ .get_size = vxlan_get_size,
++ .fill_info = vxlan_fill_info,
++ .get_link_net = vxlan_get_link_net,
++};
++
++struct net_device *vxlan_dev_create(struct net *net, const char *name,
++ u8 name_assign_type,
++ struct vxlan_config *conf)
++{
++ struct nlattr *tb[IFLA_MAX + 1];
++ struct net_device *dev;
++ int err;
++
++ memset(&tb, 0, sizeof(tb));
++
++ dev = rtnl_create_link(net, name, name_assign_type,
++ &vxlan_link_ops, tb, NULL);
++ if (IS_ERR(dev))
++ return dev;
++
++ err = __vxlan_dev_create(net, dev, conf, NULL);
++ if (err < 0) {
++ free_netdev(dev);
++ return ERR_PTR(err);
++ }
++
++ err = rtnl_configure_link(dev, NULL);
++ if (err < 0) {
++ LIST_HEAD(list_kill);
++
++ vxlan_dellink(dev, &list_kill);
++ unregister_netdevice_many(&list_kill);
++ return ERR_PTR(err);
++ }
++
++ return dev;
++}
++EXPORT_SYMBOL_GPL(vxlan_dev_create);
++
++static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
++ struct net_device *dev)
++{
++ struct vxlan_dev *vxlan, *next;
++ LIST_HEAD(list_kill);
++
++ list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
++ struct vxlan_rdst *dst = &vxlan->default_dst;
++
++ /* In case we created vxlan device with carrier
++ * and we loose the carrier due to module unload
++ * we also need to remove vxlan device. In other
++ * cases, it's not necessary and remote_ifindex
++ * is 0 here, so no matches.
++ */
++ if (dst->remote_ifindex == dev->ifindex)
++ vxlan_dellink(vxlan->dev, &list_kill);
++ }
++
++ unregister_netdevice_many(&list_kill);
++}
++
++static int vxlan_netdevice_event(struct notifier_block *unused,
++ unsigned long event, void *ptr)
++{
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
++
++ if (event == NETDEV_UNREGISTER)
++ vxlan_handle_lowerdev_unregister(vn, dev);
++ else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
++ vxlan_offload_rx_ports(dev, true);
++ else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
++ vxlan_offload_rx_ports(dev, false);
++
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block vxlan_notifier_block __read_mostly = {
++ .notifier_call = vxlan_netdevice_event,
++};
++
++static void
++vxlan_fdb_offloaded_set(struct net_device *dev,
++ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_rdst *rdst;
++ struct vxlan_fdb *f;
++ u32 hash_index;
++
++ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
++
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++
++ f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
++ if (!f)
++ goto out;
++
++ rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
++ fdb_info->remote_port,
++ fdb_info->remote_vni,
++ fdb_info->remote_ifindex);
++ if (!rdst)
++ goto out;
++
++ rdst->offloaded = fdb_info->offloaded;
++
++out:
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++}
++
++static int
++vxlan_fdb_external_learn_add(struct net_device *dev,
++ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct netlink_ext_ack *extack;
++ u32 hash_index;
++ int err;
++
++ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
++ extack = switchdev_notifier_info_to_extack(&fdb_info->info);
++
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++ err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
++ NUD_REACHABLE,
++ NLM_F_CREATE | NLM_F_REPLACE,
++ fdb_info->remote_port,
++ fdb_info->vni,
++ fdb_info->remote_vni,
++ fdb_info->remote_ifindex,
++ NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
++ 0, false, extack);
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++
++ return err;
++}
++
++static int
++vxlan_fdb_external_learn_del(struct net_device *dev,
++ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
++{
++ struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_fdb *f;
++ u32 hash_index;
++ int err = 0;
++
++ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++
++ f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
++ if (!f)
++ err = -ENOENT;
++ else if (f->flags & NTF_EXT_LEARNED)
++ err = __vxlan_fdb_delete(vxlan, fdb_info->eth_addr,
++ fdb_info->remote_ip,
++ fdb_info->remote_port,
++ fdb_info->vni,
++ fdb_info->remote_vni,
++ fdb_info->remote_ifindex,
++ false);
++
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++
++ return err;
++}
++
++static int vxlan_switchdev_event(struct notifier_block *unused,
++ unsigned long event, void *ptr)
++{
++ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
++ struct switchdev_notifier_vxlan_fdb_info *fdb_info;
++ int err = 0;
++
++ switch (event) {
++ case SWITCHDEV_VXLAN_FDB_OFFLOADED:
++ vxlan_fdb_offloaded_set(dev, ptr);
++ break;
++ case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
++ fdb_info = ptr;
++ err = vxlan_fdb_external_learn_add(dev, fdb_info);
++ if (err) {
++ err = notifier_from_errno(err);
++ break;
++ }
++ fdb_info->offloaded = true;
++ vxlan_fdb_offloaded_set(dev, fdb_info);
++ break;
++ case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
++ fdb_info = ptr;
++ err = vxlan_fdb_external_learn_del(dev, fdb_info);
++ if (err) {
++ err = notifier_from_errno(err);
++ break;
++ }
++ fdb_info->offloaded = false;
++ vxlan_fdb_offloaded_set(dev, fdb_info);
++ break;
++ }
++
++ return err;
++}
++
++static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
++ .notifier_call = vxlan_switchdev_event,
++};
++
++static void vxlan_fdb_nh_flush(struct nexthop *nh)
++{
++ struct vxlan_fdb *fdb;
++ struct vxlan_dev *vxlan;
++ u32 hash_index;
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
++ vxlan = rcu_dereference(fdb->vdev);
++ WARN_ON(!vxlan);
++ hash_index = fdb_head_index(vxlan, fdb->eth_addr,
++ vxlan->default_dst.remote_vni);
++ spin_lock_bh(&vxlan->hash_lock[hash_index]);
++ if (!hlist_unhashed(&fdb->hlist))
++ vxlan_fdb_destroy(vxlan, fdb, false, false);
++ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
++ }
++ rcu_read_unlock();
++}
++
++static int vxlan_nexthop_event(struct notifier_block *nb,
++ unsigned long event, void *ptr)
++{
++ struct nh_notifier_info *info = ptr;
++ struct nexthop *nh;
++
++ if (event != NEXTHOP_EVENT_DEL)
++ return NOTIFY_DONE;
++
++ nh = nexthop_find_by_id(info->net, info->id);
++ if (!nh)
++ return NOTIFY_DONE;
++
++ vxlan_fdb_nh_flush(nh);
++
++ return NOTIFY_DONE;
++}
++
++static __net_init int vxlan_init_net(struct net *net)
++{
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++ unsigned int h;
++
++ INIT_LIST_HEAD(&vn->vxlan_list);
++ spin_lock_init(&vn->sock_lock);
++ vn->nexthop_notifier_block.notifier_call = vxlan_nexthop_event;
++
++ for (h = 0; h < PORT_HASH_SIZE; ++h)
++ INIT_HLIST_HEAD(&vn->sock_list[h]);
++
++ return register_nexthop_notifier(net, &vn->nexthop_notifier_block,
++ NULL);
++}
++
++static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
++{
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++ struct vxlan_dev *vxlan, *next;
++ struct net_device *dev, *aux;
++
++ for_each_netdev_safe(net, dev, aux)
++ if (dev->rtnl_link_ops == &vxlan_link_ops)
++ unregister_netdevice_queue(dev, head);
++
++ list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
++ /* If vxlan->dev is in the same netns, it has already been added
++ * to the list by the previous loop.
++ */
++ if (!net_eq(dev_net(vxlan->dev), net))
++ unregister_netdevice_queue(vxlan->dev, head);
++ }
++
++}
++
++static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
++{
++ struct net *net;
++ LIST_HEAD(list);
++ unsigned int h;
++
++ list_for_each_entry(net, net_list, exit_list) {
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++
++ unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
++ }
++ rtnl_lock();
++ list_for_each_entry(net, net_list, exit_list)
++ vxlan_destroy_tunnels(net, &list);
++
++ unregister_netdevice_many(&list);
++ rtnl_unlock();
++
++ list_for_each_entry(net, net_list, exit_list) {
++ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
++
++ for (h = 0; h < PORT_HASH_SIZE; ++h)
++ WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
++ }
++}
++
++static struct pernet_operations vxlan_net_ops = {
++ .init = vxlan_init_net,
++ .exit_batch = vxlan_exit_batch_net,
++ .id = &vxlan_net_id,
++ .size = sizeof(struct vxlan_net),
++};
++
++static int __init vxlan_init_module(void)
++{
++ int rc;
++
++ get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
++
++ rc = register_pernet_subsys(&vxlan_net_ops);
++ if (rc)
++ goto out1;
++
++ rc = register_netdevice_notifier(&vxlan_notifier_block);
++ if (rc)
++ goto out2;
++
++ rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
++ if (rc)
++ goto out3;
++
++ rc = rtnl_link_register(&vxlan_link_ops);
++ if (rc)
++ goto out4;
++
++ return 0;
++out4:
++ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
++out3:
++ unregister_netdevice_notifier(&vxlan_notifier_block);
++out2:
++ unregister_pernet_subsys(&vxlan_net_ops);
++out1:
++ return rc;
++}
++late_initcall(vxlan_init_module);
++
++static void __exit vxlan_cleanup_module(void)
++{
++ rtnl_link_unregister(&vxlan_link_ops);
++ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
++ unregister_netdevice_notifier(&vxlan_notifier_block);
++ unregister_pernet_subsys(&vxlan_net_ops);
++ /* rcu_barrier() is called by netns */
++}
++module_exit(vxlan_cleanup_module);
++
++MODULE_LICENSE("GPL");
++MODULE_VERSION(VXLAN_VERSION);
++MODULE_AUTHOR("Stephen Hemminger <stephen@networkplumber.org>");
++MODULE_DESCRIPTION("Driver for VXLAN encapsulated traffic");
++MODULE_ALIAS_RTNL_LINK("vxlan");
+diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
+index 6a212c085435b..5b01642ca44e0 100644
+--- a/drivers/net/wan/farsync.c
++++ b/drivers/net/wan/farsync.c
+@@ -2545,6 +2545,7 @@ fst_remove_one(struct pci_dev *pdev)
+ struct net_device *dev = port_to_dev(&card->ports[i]);
+
+ unregister_hdlc_device(dev);
++ free_netdev(dev);
+ }
+
+ fst_disable_intr(card);
+@@ -2564,6 +2565,7 @@ fst_remove_one(struct pci_dev *pdev)
+ card->tx_dma_handle_card);
+ }
+ fst_card_array[card->card_no] = NULL;
++ kfree(card);
+ }
+
+ static struct pci_driver fst_driver = {
+diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
+index cda1b4ce6b210..8305df1a3008a 100644
+--- a/drivers/net/wan/fsl_ucc_hdlc.c
++++ b/drivers/net/wan/fsl_ucc_hdlc.c
+@@ -1241,9 +1241,11 @@ static int ucc_hdlc_probe(struct platform_device *pdev)
+ free_dev:
+ free_netdev(dev);
+ undo_uhdlc_init:
+- iounmap(utdm->siram);
++ if (utdm)
++ iounmap(utdm->siram);
+ unmap_si_regs:
+- iounmap(utdm->si_regs);
++ if (utdm)
++ iounmap(utdm->si_regs);
+ free_utdm:
+ if (uhdlc_priv->tsa)
+ kfree(utdm);
+diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
+index 89d31adc3809b..75613ac26641f 100644
+--- a/drivers/net/wan/lapbether.c
++++ b/drivers/net/wan/lapbether.c
+@@ -325,6 +325,7 @@ static int lapbeth_open(struct net_device *dev)
+
+ err = lapb_register(dev, &lapbeth_callbacks);
+ if (err != LAPB_OK) {
++ napi_disable(&lapbeth->napi);
+ pr_err("lapb_register error: %d\n", err);
+ return -ENODEV;
+ }
+@@ -383,6 +384,9 @@ static int lapbeth_new_device(struct net_device *dev)
+
+ ASSERT_RTNL();
+
++ if (dev->type != ARPHRD_ETHER)
++ return -EINVAL;
++
+ ndev = alloc_netdev(sizeof(*lapbeth), "lapb%d", NET_NAME_UNKNOWN,
+ lapbeth_setup);
+ if (!ndev)
+@@ -446,7 +450,7 @@ static int lapbeth_device_event(struct notifier_block *this,
+ if (dev_net(dev) != &init_net)
+ return NOTIFY_DONE;
+
+- if (!dev_is_ethdev(dev))
++ if (!dev_is_ethdev(dev) && !lapbeth_get_x25_dev(dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
+index b7197e80f2264..0ba714ca5185c 100644
+--- a/drivers/net/wireguard/allowedips.c
++++ b/drivers/net/wireguard/allowedips.c
+@@ -6,6 +6,8 @@
+ #include "allowedips.h"
+ #include "peer.h"
+
++enum { MAX_ALLOWEDIPS_DEPTH = 129 };
++
+ static struct kmem_cache *node_cache;
+
+ static void swap_endian(u8 *dst, const u8 *src, u8 bits)
+@@ -40,7 +42,8 @@ static void push_rcu(struct allowedips_node **stack,
+ struct allowedips_node __rcu *p, unsigned int *len)
+ {
+ if (rcu_access_pointer(p)) {
+- WARN_ON(IS_ENABLED(DEBUG) && *len >= 128);
++ if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_DEPTH))
++ return;
+ stack[(*len)++] = rcu_dereference_raw(p);
+ }
+ }
+@@ -52,7 +55,7 @@ static void node_free_rcu(struct rcu_head *rcu)
+
+ static void root_free_rcu(struct rcu_head *rcu)
+ {
+- struct allowedips_node *node, *stack[128] = {
++ struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = {
+ container_of(rcu, struct allowedips_node, rcu) };
+ unsigned int len = 1;
+
+@@ -65,7 +68,7 @@ static void root_free_rcu(struct rcu_head *rcu)
+
+ static void root_remove_peer_lists(struct allowedips_node *root)
+ {
+- struct allowedips_node *node, *stack[128] = { root };
++ struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = { root };
+ unsigned int len = 1;
+
+ while (len > 0 && (node = stack[--len])) {
+@@ -163,7 +166,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
+ return exact;
+ }
+
+-static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
++static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node)
+ {
+ node->parent_bit_packed = (unsigned long)parent | bit;
+ rcu_assign_pointer(*parent, node);
+diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
+index 551ddaaaf5400..5eaef79c06e16 100644
+--- a/drivers/net/wireguard/device.c
++++ b/drivers/net/wireguard/device.c
+@@ -19,6 +19,7 @@
+ #include <linux/if_arp.h>
+ #include <linux/icmp.h>
+ #include <linux/suspend.h>
++#include <net/dst_metadata.h>
+ #include <net/icmp.h>
+ #include <net/rtnetlink.h>
+ #include <net/ip_tunnels.h>
+@@ -98,6 +99,7 @@ static int wg_stop(struct net_device *dev)
+ {
+ struct wg_device *wg = netdev_priv(dev);
+ struct wg_peer *peer;
++ struct sk_buff *skb;
+
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+@@ -108,7 +110,9 @@ static int wg_stop(struct net_device *dev)
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+ }
+ mutex_unlock(&wg->device_update_lock);
+- skb_queue_purge(&wg->incoming_handshakes);
++ while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL)
++ kfree_skb(skb);
++ atomic_set(&wg->handshake_queue_len, 0);
+ wg_socket_reinit(wg, NULL, NULL);
+ return 0;
+ }
+@@ -149,7 +153,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
+ goto err_peer;
+ }
+
+- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
++ mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ __skb_queue_head_init(&packets);
+ if (!skb_is_gso(skb)) {
+@@ -235,14 +239,13 @@ static void wg_destruct(struct net_device *dev)
+ destroy_workqueue(wg->handshake_receive_wq);
+ destroy_workqueue(wg->handshake_send_wq);
+ destroy_workqueue(wg->packet_crypt_wq);
+- wg_packet_queue_free(&wg->decrypt_queue);
+- wg_packet_queue_free(&wg->encrypt_queue);
++ wg_packet_queue_free(&wg->handshake_queue, true);
++ wg_packet_queue_free(&wg->decrypt_queue, false);
++ wg_packet_queue_free(&wg->encrypt_queue, false);
+ rcu_barrier(); /* Wait for all the peers to be actually freed. */
+ wg_ratelimiter_uninit();
+ memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
+- skb_queue_purge(&wg->incoming_handshakes);
+ free_percpu(dev->tstats);
+- free_percpu(wg->incoming_handshakes_worker);
+ kvfree(wg->index_hashtable);
+ kvfree(wg->peer_hashtable);
+ mutex_unlock(&wg->device_update_lock);
+@@ -298,7 +301,6 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
+ init_rwsem(&wg->static_identity.lock);
+ mutex_init(&wg->socket_update_lock);
+ mutex_init(&wg->device_update_lock);
+- skb_queue_head_init(&wg->incoming_handshakes);
+ wg_allowedips_init(&wg->peer_allowedips);
+ wg_cookie_checker_init(&wg->cookie_checker, wg);
+ INIT_LIST_HEAD(&wg->peer_list);
+@@ -316,16 +318,10 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
+ if (!dev->tstats)
+ goto err_free_index_hashtable;
+
+- wg->incoming_handshakes_worker =
+- wg_packet_percpu_multicore_worker_alloc(
+- wg_packet_handshake_receive_worker, wg);
+- if (!wg->incoming_handshakes_worker)
+- goto err_free_tstats;
+-
+ wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
+ WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
+ if (!wg->handshake_receive_wq)
+- goto err_free_incoming_handshakes;
++ goto err_free_tstats;
+
+ wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
+ WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
+@@ -347,10 +343,15 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
+ if (ret < 0)
+ goto err_free_encrypt_queue;
+
+- ret = wg_ratelimiter_init();
++ ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker,
++ MAX_QUEUED_INCOMING_HANDSHAKES);
+ if (ret < 0)
+ goto err_free_decrypt_queue;
+
++ ret = wg_ratelimiter_init();
++ if (ret < 0)
++ goto err_free_handshake_queue;
++
+ ret = register_netdevice(dev);
+ if (ret < 0)
+ goto err_uninit_ratelimiter;
+@@ -367,18 +368,18 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
+
+ err_uninit_ratelimiter:
+ wg_ratelimiter_uninit();
++err_free_handshake_queue:
++ wg_packet_queue_free(&wg->handshake_queue, false);
+ err_free_decrypt_queue:
+- wg_packet_queue_free(&wg->decrypt_queue);
++ wg_packet_queue_free(&wg->decrypt_queue, false);
+ err_free_encrypt_queue:
+- wg_packet_queue_free(&wg->encrypt_queue);
++ wg_packet_queue_free(&wg->encrypt_queue, false);
+ err_destroy_packet_crypt:
+ destroy_workqueue(wg->packet_crypt_wq);
+ err_destroy_handshake_send:
+ destroy_workqueue(wg->handshake_send_wq);
+ err_destroy_handshake_receive:
+ destroy_workqueue(wg->handshake_receive_wq);
+-err_free_incoming_handshakes:
+- free_percpu(wg->incoming_handshakes_worker);
+ err_free_tstats:
+ free_percpu(dev->tstats);
+ err_free_index_hashtable:
+@@ -398,6 +399,7 @@ static struct rtnl_link_ops link_ops __read_mostly = {
+ static void wg_netns_pre_exit(struct net *net)
+ {
+ struct wg_device *wg;
++ struct wg_peer *peer;
+
+ rtnl_lock();
+ list_for_each_entry(wg, &device_list, device_list) {
+@@ -407,6 +409,8 @@ static void wg_netns_pre_exit(struct net *net)
+ mutex_lock(&wg->device_update_lock);
+ rcu_assign_pointer(wg->creating_net, NULL);
+ wg_socket_reinit(wg, NULL, NULL);
++ list_for_each_entry(peer, &wg->peer_list, peer_list)
++ wg_socket_clear_peer_endpoint_src(peer);
+ mutex_unlock(&wg->device_update_lock);
+ }
+ }
+diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
+index 854bc3d97150e..43c7cebbf50b0 100644
+--- a/drivers/net/wireguard/device.h
++++ b/drivers/net/wireguard/device.h
+@@ -39,21 +39,18 @@ struct prev_queue {
+
+ struct wg_device {
+ struct net_device *dev;
+- struct crypt_queue encrypt_queue, decrypt_queue;
++ struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue;
+ struct sock __rcu *sock4, *sock6;
+ struct net __rcu *creating_net;
+ struct noise_static_identity static_identity;
+- struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
+- struct workqueue_struct *packet_crypt_wq;
+- struct sk_buff_head incoming_handshakes;
+- int incoming_handshake_cpu;
+- struct multicore_worker __percpu *incoming_handshakes_worker;
++ struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq;
+ struct cookie_checker cookie_checker;
+ struct pubkey_hashtable *peer_hashtable;
+ struct index_hashtable *index_hashtable;
+ struct allowedips peer_allowedips;
+ struct mutex device_update_lock, socket_update_lock;
+ struct list_head device_list, peer_list;
++ atomic_t handshake_queue_len;
+ unsigned int num_peers, device_update_gen;
+ u32 fwmark;
+ u16 incoming_port;
+diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
+index d0f3b6d7f4089..f5bc279c9a8c2 100644
+--- a/drivers/net/wireguard/netlink.c
++++ b/drivers/net/wireguard/netlink.c
+@@ -436,14 +436,13 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs)
+ if (attrs[WGPEER_A_ENDPOINT]) {
+ struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
+ size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
++ struct endpoint endpoint = { { { 0 } } };
+
+- if ((len == sizeof(struct sockaddr_in) &&
+- addr->sa_family == AF_INET) ||
+- (len == sizeof(struct sockaddr_in6) &&
+- addr->sa_family == AF_INET6)) {
+- struct endpoint endpoint = { { { 0 } } };
+-
+- memcpy(&endpoint.addr, addr, len);
++ if (len == sizeof(struct sockaddr_in) && addr->sa_family == AF_INET) {
++ endpoint.addr4 = *(struct sockaddr_in *)addr;
++ wg_socket_set_peer_endpoint(peer, &endpoint);
++ } else if (len == sizeof(struct sockaddr_in6) && addr->sa_family == AF_INET6) {
++ endpoint.addr6 = *(struct sockaddr_in6 *)addr;
+ wg_socket_set_peer_endpoint(peer, &endpoint);
+ }
+ }
+@@ -547,6 +546,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
+ u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
+ u8 public_key[NOISE_PUBLIC_KEY_LEN];
+ struct wg_peer *peer, *temp;
++ bool send_staged_packets;
+
+ if (!crypto_memneq(wg->static_identity.static_private,
+ private_key, NOISE_PUBLIC_KEY_LEN))
+@@ -565,14 +565,17 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
+ }
+
+ down_write(&wg->static_identity.lock);
+- wg_noise_set_static_identity_private_key(&wg->static_identity,
+- private_key);
+- list_for_each_entry_safe(peer, temp, &wg->peer_list,
+- peer_list) {
++ send_staged_packets = !wg->static_identity.has_identity && netif_running(wg->dev);
++ wg_noise_set_static_identity_private_key(&wg->static_identity, private_key);
++ send_staged_packets = send_staged_packets && wg->static_identity.has_identity;
++
++ wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
++ list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
+ wg_noise_precompute_static_static(peer);
+ wg_noise_expire_current_peer_keypairs(peer);
++ if (send_staged_packets)
++ wg_packet_send_staged_packets(peer);
+ }
+- wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
+ up_write(&wg->static_identity.lock);
+ }
+ skip_set_private_key:
+diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
+index c0cfd9b36c0b5..720952b92e784 100644
+--- a/drivers/net/wireguard/noise.c
++++ b/drivers/net/wireguard/noise.c
+@@ -302,6 +302,41 @@ void wg_noise_set_static_identity_private_key(
+ static_identity->static_public, private_key);
+ }
+
++static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen)
++{
++ struct blake2s_state state;
++ u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
++ u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
++ int i;
++
++ if (keylen > BLAKE2S_BLOCK_SIZE) {
++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
++ blake2s_update(&state, key, keylen);
++ blake2s_final(&state, x_key);
++ } else
++ memcpy(x_key, key, keylen);
++
++ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
++ x_key[i] ^= 0x36;
++
++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
++ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
++ blake2s_update(&state, in, inlen);
++ blake2s_final(&state, i_hash);
++
++ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
++ x_key[i] ^= 0x5c ^ 0x36;
++
++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
++ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
++ blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
++ blake2s_final(&state, i_hash);
++
++ memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
++ memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
++ memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
++}
++
+ /* This is Hugo Krawczyk's HKDF:
+ * - https://eprint.iacr.org/2010/264.pdf
+ * - https://tools.ietf.org/html/rfc5869
+@@ -322,14 +357,14 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
+ ((third_len || third_dst) && (!second_len || !second_dst))));
+
+ /* Extract entropy from data into secret */
+- blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
++ hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
+
+ if (!first_dst || !first_len)
+ goto out;
+
+ /* Expand first key: key = secret, data = 0x1 */
+ output[0] = 1;
+- blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
++ hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
+ memcpy(first_dst, output, first_len);
+
+ if (!second_dst || !second_len)
+@@ -337,8 +372,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
+
+ /* Expand second key: key = secret, data = first-key || 0x2 */
+ output[BLAKE2S_HASH_SIZE] = 2;
+- blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
+- BLAKE2S_HASH_SIZE);
++ hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
+ memcpy(second_dst, output, second_len);
+
+ if (!third_dst || !third_len)
+@@ -346,8 +380,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
+
+ /* Expand third key: key = secret, data = second-key || 0x3 */
+ output[BLAKE2S_HASH_SIZE] = 3;
+- blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
+- BLAKE2S_HASH_SIZE);
++ hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
+ memcpy(third_dst, output, third_len);
+
+ out:
+diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c
+index 48e7b982a3073..26d235d152352 100644
+--- a/drivers/net/wireguard/queueing.c
++++ b/drivers/net/wireguard/queueing.c
+@@ -4,6 +4,7 @@
+ */
+
+ #include "queueing.h"
++#include <linux/skb_array.h>
+
+ struct multicore_worker __percpu *
+ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
+@@ -27,6 +28,7 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ int ret;
+
+ memset(queue, 0, sizeof(*queue));
++ queue->last_cpu = -1;
+ ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
+ if (ret)
+ return ret;
+@@ -38,11 +40,11 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ return 0;
+ }
+
+-void wg_packet_queue_free(struct crypt_queue *queue)
++void wg_packet_queue_free(struct crypt_queue *queue, bool purge)
+ {
+ free_percpu(queue->worker);
+- WARN_ON(!__ptr_ring_empty(&queue->ring));
+- ptr_ring_cleanup(&queue->ring, NULL);
++ WARN_ON(!purge && !__ptr_ring_empty(&queue->ring));
++ ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL);
+ }
+
+ #define NEXT(skb) ((skb)->prev)
+diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
+index 4ef2944a68bc9..a2e702f8c5826 100644
+--- a/drivers/net/wireguard/queueing.h
++++ b/drivers/net/wireguard/queueing.h
+@@ -23,7 +23,7 @@ struct sk_buff;
+ /* queueing.c APIs: */
+ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ unsigned int len);
+-void wg_packet_queue_free(struct crypt_queue *queue);
++void wg_packet_queue_free(struct crypt_queue *queue, bool purge);
+ struct multicore_worker __percpu *
+ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
+
+@@ -119,20 +119,17 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
+ return cpu;
+ }
+
+-/* This function is racy, in the sense that next is unlocked, so it could return
+- * the same CPU twice. A race-free version of this would be to instead store an
+- * atomic sequence number, do an increment-and-return, and then iterate through
+- * every possible CPU until we get to that index -- choose_cpu. However that's
+- * a bit slower, and it doesn't seem like this potential race actually
+- * introduces any performance loss, so we live with it.
++/* This function is racy, in the sense that it's called while last_cpu is
++ * unlocked, so it could return the same CPU twice. Adding locking or using
++ * atomic sequence numbers is slower though, and the consequences of racing are
++ * harmless, so live with it.
+ */
+-static inline int wg_cpumask_next_online(int *next)
++static inline int wg_cpumask_next_online(int *last_cpu)
+ {
+- int cpu = *next;
+-
+- while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
+- cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+- *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
++ int cpu = cpumask_next(*last_cpu, cpu_online_mask);
++ if (cpu >= nr_cpu_ids)
++ cpu = cpumask_first(cpu_online_mask);
++ *last_cpu = cpu;
+ return cpu;
+ }
+
+@@ -161,7 +158,7 @@ static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue)
+
+ static inline int wg_queue_enqueue_per_device_and_peer(
+ struct crypt_queue *device_queue, struct prev_queue *peer_queue,
+- struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
++ struct sk_buff *skb, struct workqueue_struct *wq)
+ {
+ int cpu;
+
+@@ -175,7 +172,7 @@ static inline int wg_queue_enqueue_per_device_and_peer(
+ /* Then we queue it up in the device queue, which consumes the
+ * packet as soon as it can.
+ */
+- cpu = wg_cpumask_next_online(next_cpu);
++ cpu = wg_cpumask_next_online(&device_queue->last_cpu);
+ if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
+ return -EPIPE;
+ queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
+diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c
+index 3fedd1d21f5ee..dd55e5c26f468 100644
+--- a/drivers/net/wireguard/ratelimiter.c
++++ b/drivers/net/wireguard/ratelimiter.c
+@@ -176,12 +176,12 @@ int wg_ratelimiter_init(void)
+ (1U << 14) / sizeof(struct hlist_head)));
+ max_entries = table_size * 8;
+
+- table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
++ table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);
+ if (unlikely(!table_v4))
+ goto err_kmemcache;
+
+ #if IS_ENABLED(CONFIG_IPV6)
+- table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
++ table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);
+ if (unlikely(!table_v6)) {
+ kvfree(table_v4);
+ goto err_kmemcache;
+diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
+index 7dc84bcca2613..f500aaf678370 100644
+--- a/drivers/net/wireguard/receive.c
++++ b/drivers/net/wireguard/receive.c
+@@ -116,8 +116,8 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
+ return;
+ }
+
+- under_load = skb_queue_len(&wg->incoming_handshakes) >=
+- MAX_QUEUED_INCOMING_HANDSHAKES / 8;
++ under_load = atomic_read(&wg->handshake_queue_len) >=
++ MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+ if (under_load) {
+ last_under_load = ktime_get_coarse_boottime_ns();
+ } else if (last_under_load) {
+@@ -212,13 +212,14 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
+
+ void wg_packet_handshake_receive_worker(struct work_struct *work)
+ {
+- struct wg_device *wg = container_of(work, struct multicore_worker,
+- work)->ptr;
++ struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
++ struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue);
+ struct sk_buff *skb;
+
+- while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
++ while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ wg_receive_handshake_packet(wg, skb);
+ dev_kfree_skb(skb);
++ atomic_dec(&wg->handshake_queue_len);
+ cond_resched();
+ }
+ }
+@@ -530,7 +531,7 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
+ goto err;
+
+ ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb,
+- wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu);
++ wg->packet_crypt_wq);
+ if (unlikely(ret == -EPIPE))
+ wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD);
+ if (likely(!ret || ret == -EPIPE)) {
+@@ -553,22 +554,28 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
+- int cpu;
+-
+- if (skb_queue_len(&wg->incoming_handshakes) >
+- MAX_QUEUED_INCOMING_HANDSHAKES ||
+- unlikely(!rng_is_initialized())) {
++ int cpu, ret = -EBUSY;
++
++ if (unlikely(!rng_is_initialized()))
++ goto drop;
++ if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) {
++ if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) {
++ ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb);
++ spin_unlock_bh(&wg->handshake_queue.ring.producer_lock);
++ }
++ } else
++ ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb);
++ if (ret) {
++ drop:
+ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
+ }
+- skb_queue_tail(&wg->incoming_handshakes, skb);
+- /* Queues up a call to packet_process_queued_handshake_
+- * packets(skb):
+- */
+- cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
++ atomic_inc(&wg->handshake_queue_len);
++ cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu);
++ /* Queues up a call to packet_process_queued_handshake_packets(skb): */
+ queue_work_on(cpu, wg->handshake_receive_wq,
+- &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
++ &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work);
+ break;
+ }
+ case cpu_to_le32(MESSAGE_DATA):
+diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
+index e173204ae7d78..2c9eec24eec45 100644
+--- a/drivers/net/wireguard/selftest/allowedips.c
++++ b/drivers/net/wireguard/selftest/allowedips.c
+@@ -593,16 +593,20 @@ bool __init wg_allowedips_selftest(void)
+ wg_allowedips_remove_by_peer(&t, a, &mutex);
+ test_negative(4, a, 192, 168, 0, 1);
+
+- /* These will hit the WARN_ON(len >= 128) in free_node if something
+- * goes wrong.
++ /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_DEPTH) in free_node
++ * if something goes wrong.
+ */
+- for (i = 0; i < 128; ++i) {
+- part = cpu_to_be64(~(1LLU << (i % 64)));
+- memset(&ip, 0xff, 16);
+- memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
++ for (i = 0; i < 64; ++i) {
++ part = cpu_to_be64(~0LLU << i);
++ memset(&ip, 0xff, 8);
++ memcpy((u8 *)&ip + 8, &part, 8);
++ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
++ memcpy(&ip, &part, 8);
++ memset((u8 *)&ip + 8, 0, 8);
+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+ }
+-
++ memset(&ip, 0, 16);
++ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+ wg_allowedips_free(&t, &mutex);
+
+ wg_allowedips_init(&t);
+diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c
+index 007cd4457c5f6..d4bb40a695ab6 100644
+--- a/drivers/net/wireguard/selftest/ratelimiter.c
++++ b/drivers/net/wireguard/selftest/ratelimiter.c
+@@ -167,7 +167,7 @@ bool __init wg_ratelimiter_selftest(void)
+ ++test;
+ #endif
+
+- for (trials = TRIALS_BEFORE_GIVING_UP;;) {
++ for (trials = TRIALS_BEFORE_GIVING_UP; IS_ENABLED(DEBUG_RATELIMITER_TIMINGS);) {
+ int test_count = 0, ret;
+
+ ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
+@@ -176,7 +176,6 @@ bool __init wg_ratelimiter_selftest(void)
+ test += test_count;
+ goto err;
+ }
+- msleep(500);
+ continue;
+ } else if (ret < 0) {
+ test += test_count;
+@@ -195,7 +194,6 @@ bool __init wg_ratelimiter_selftest(void)
+ test += test_count;
+ goto err;
+ }
+- msleep(50);
+ continue;
+ }
+ test += test_count;
+diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
+index 5368f7c35b4bf..95c853b59e1da 100644
+--- a/drivers/net/wireguard/send.c
++++ b/drivers/net/wireguard/send.c
+@@ -318,7 +318,7 @@ static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first)
+ goto err;
+
+ ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first,
+- wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu);
++ wg->packet_crypt_wq);
+ if (unlikely(ret == -EPIPE))
+ wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD);
+ err:
+diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
+index 8c496b7471082..0414d7a6ce741 100644
+--- a/drivers/net/wireguard/socket.c
++++ b/drivers/net/wireguard/socket.c
+@@ -160,6 +160,7 @@ out:
+ rcu_read_unlock_bh();
+ return ret;
+ #else
++ kfree_skb(skb);
+ return -EAFNOSUPPORT;
+ #endif
+ }
+@@ -241,7 +242,7 @@ int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
+ endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ endpoint->src4.s_addr = ip_hdr(skb)->daddr;
+ endpoint->src_if4 = skb->skb_iif;
+- } else if (skb->protocol == htons(ETH_P_IPV6)) {
++ } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) {
+ endpoint->addr6.sin6_family = AF_INET6;
+ endpoint->addr6.sin6_port = udp_hdr(skb)->source;
+ endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
+@@ -284,7 +285,7 @@ void wg_socket_set_peer_endpoint(struct wg_peer *peer,
+ peer->endpoint.addr4 = endpoint->addr4;
+ peer->endpoint.src4 = endpoint->src4;
+ peer->endpoint.src_if4 = endpoint->src_if4;
+- } else if (endpoint->addr.sa_family == AF_INET6) {
++ } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) {
+ peer->endpoint.addr6 = endpoint->addr6;
+ peer->endpoint.src6 = endpoint->src6;
+ } else {
+@@ -308,7 +309,7 @@ void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
+ {
+ write_lock_bh(&peer->endpoint_lock);
+ memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
+- dst_cache_reset(&peer->endpoint_cache);
++ dst_cache_reset_now(&peer->endpoint_cache);
+ write_unlock_bh(&peer->endpoint_lock);
+ }
+
+diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c
+index d54d32ac9bc41..91f5d6d2d4e2d 100644
+--- a/drivers/net/wireguard/timers.c
++++ b/drivers/net/wireguard/timers.c
+@@ -46,7 +46,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer)
+ if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n",
+ peer->device->dev->name, peer->internal_id,
+- &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
++ &peer->endpoint.addr, (int)MAX_TIMER_HANDSHAKES + 2);
+
+ del_timer(&peer->timer_send_keepalive);
+ /* We drop all packets without a keypair and don't try again,
+@@ -64,7 +64,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer)
+ ++peer->timer_handshake_attempts;
+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n",
+ peer->device->dev->name, peer->internal_id,
+- &peer->endpoint.addr, REKEY_TIMEOUT,
++ &peer->endpoint.addr, (int)REKEY_TIMEOUT,
+ peer->timer_handshake_attempts + 1);
+
+ /* We clear the endpoint address src address, in case this is
+@@ -94,7 +94,7 @@ static void wg_expired_new_handshake(struct timer_list *timer)
+
+ pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n",
+ peer->device->dev->name, peer->internal_id,
+- &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
++ &peer->endpoint.addr, (int)(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT));
+ /* We clear the endpoint address src address, in case this is the cause
+ * of trouble.
+ */
+@@ -126,7 +126,7 @@ static void wg_queued_expired_zero_key_material(struct work_struct *work)
+
+ pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n",
+ peer->device->dev->name, peer->internal_id,
+- &peer->endpoint.addr, REJECT_AFTER_TIME * 3);
++ &peer->endpoint.addr, (int)REJECT_AFTER_TIME * 3);
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_keypairs_clear(&peer->keypairs);
+ wg_peer_put(peer);
+diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
+index 49cc4b7ed5163..efe38b2c1df73 100644
+--- a/drivers/net/wireless/ath/ar5523/ar5523.c
++++ b/drivers/net/wireless/ath/ar5523/ar5523.c
+@@ -153,6 +153,10 @@ static void ar5523_cmd_rx_cb(struct urb *urb)
+ ar5523_err(ar, "Invalid reply to WDCMSG_TARGET_START");
+ return;
+ }
++ if (!cmd->odata) {
++ ar5523_err(ar, "Unexpected WDCMSG_TARGET_START reply");
++ return;
++ }
+ memcpy(cmd->odata, hdr + 1, sizeof(u32));
+ cmd->olen = sizeof(u32);
+ cmd->res = 0;
+@@ -237,6 +241,11 @@ static void ar5523_cmd_tx_cb(struct urb *urb)
+ }
+ }
+
++static void ar5523_cancel_tx_cmd(struct ar5523 *ar)
++{
++ usb_kill_urb(ar->tx_cmd.urb_tx);
++}
++
+ static int ar5523_cmd(struct ar5523 *ar, u32 code, const void *idata,
+ int ilen, void *odata, int olen, int flags)
+ {
+@@ -276,6 +285,7 @@ static int ar5523_cmd(struct ar5523 *ar, u32 code, const void *idata,
+ }
+
+ if (!wait_for_completion_timeout(&cmd->done, 2 * HZ)) {
++ ar5523_cancel_tx_cmd(ar);
+ cmd->odata = NULL;
+ ar5523_err(ar, "timeout waiting for command %02x reply\n",
+ code);
+diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h
+index f083fb9038c36..f02a308a9ffc5 100644
+--- a/drivers/net/wireless/ath/ath.h
++++ b/drivers/net/wireless/ath/ath.h
+@@ -96,11 +96,13 @@ struct ath_keyval {
+ u8 kv_type;
+ u8 kv_pad;
+ u16 kv_len;
+- u8 kv_val[16]; /* TK */
+- u8 kv_mic[8]; /* Michael MIC key */
+- u8 kv_txmic[8]; /* Michael MIC TX key (used only if the hardware
+- * supports both MIC keys in the same key cache entry;
+- * in that case, kv_mic is the RX key) */
++ struct_group(kv_values,
++ u8 kv_val[16]; /* TK */
++ u8 kv_mic[8]; /* Michael MIC key */
++ u8 kv_txmic[8]; /* Michael MIC TX key (used only if the hardware
++ * supports both MIC keys in the same key cache entry;
++ * in that case, kv_mic is the RX key) */
++ );
+ };
+
+ enum ath_cipher {
+diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
+index 2f9be182fbfbb..58e86e662ab83 100644
+--- a/drivers/net/wireless/ath/ath10k/core.c
++++ b/drivers/net/wireless/ath/ath10k/core.c
+@@ -89,6 +89,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = true,
+ .dynamic_sar_support = false,
+ },
+@@ -124,6 +125,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = true,
+ .dynamic_sar_support = false,
+ },
+@@ -160,6 +162,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -190,6 +193,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .num_wds_entries = 0x20,
+ .uart_pin_workaround = true,
+ .tx_stats_over_pktlog = false,
++ .credit_size_workaround = false,
+ .bmi_large_size_download = true,
+ .supports_peer_stats_info = true,
+ .dynamic_sar_support = true,
+@@ -226,6 +230,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -261,6 +266,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -296,6 +302,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -334,6 +341,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = true,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .supports_peer_stats_info = true,
+ .dynamic_sar_support = true,
+@@ -376,6 +384,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -424,6 +433,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -469,6 +479,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -504,6 +515,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -541,6 +553,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = true,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -570,6 +583,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .ast_skid_limit = 0x10,
+ .num_wds_entries = 0x20,
+ .uart_pin_workaround = true,
++ .credit_size_workaround = true,
+ .dynamic_sar_support = false,
+ },
+ {
+@@ -611,6 +625,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = false,
+ .hw_filter_reset_required = true,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = false,
+ },
+@@ -639,6 +654,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
+ .rri_on_ddr = true,
+ .hw_filter_reset_required = false,
+ .fw_diag_ce_download = false,
++ .credit_size_workaround = false,
+ .tx_stats_over_pktlog = false,
+ .dynamic_sar_support = true,
+ },
+@@ -714,6 +730,7 @@ static void ath10k_send_suspend_complete(struct ath10k *ar)
+
+ static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode)
+ {
++ bool mtu_workaround = ar->hw_params.credit_size_workaround;
+ int ret;
+ u32 param = 0;
+
+@@ -731,7 +748,7 @@ static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode)
+
+ param |= HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET;
+
+- if (mode == ATH10K_FIRMWARE_MODE_NORMAL)
++ if (mode == ATH10K_FIRMWARE_MODE_NORMAL && !mtu_workaround)
+ param |= HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE;
+ else
+ param &= ~HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE;
+@@ -2690,9 +2707,16 @@ static int ath10k_core_copy_target_iram(struct ath10k *ar)
+ int i, ret;
+ u32 len, remaining_len;
+
+- hw_mem = ath10k_coredump_get_mem_layout(ar);
++ /* copy target iram feature must work also when
++ * ATH10K_FW_CRASH_DUMP_RAM_DATA is disabled, so
++ * _ath10k_coredump_get_mem_layout() to accomplist that
++ */
++ hw_mem = _ath10k_coredump_get_mem_layout(ar);
+ if (!hw_mem)
+- return -ENOMEM;
++ /* if CONFIG_DEV_COREDUMP is disabled we get NULL, then
++ * just silently disable the feature by doing nothing
++ */
++ return 0;
+
+ for (i = 0; i < hw_mem->region_table.size; i++) {
+ tmp = &hw_mem->region_table.regions[i];
+diff --git a/drivers/net/wireless/ath/ath10k/coredump.c b/drivers/net/wireless/ath/ath10k/coredump.c
+index 7eb72290a925c..55e7e11d06d94 100644
+--- a/drivers/net/wireless/ath/ath10k/coredump.c
++++ b/drivers/net/wireless/ath/ath10k/coredump.c
+@@ -1447,11 +1447,17 @@ static u32 ath10k_coredump_get_ramdump_size(struct ath10k *ar)
+
+ const struct ath10k_hw_mem_layout *ath10k_coredump_get_mem_layout(struct ath10k *ar)
+ {
+- int i;
+-
+ if (!test_bit(ATH10K_FW_CRASH_DUMP_RAM_DATA, &ath10k_coredump_mask))
+ return NULL;
+
++ return _ath10k_coredump_get_mem_layout(ar);
++}
++EXPORT_SYMBOL(ath10k_coredump_get_mem_layout);
++
++const struct ath10k_hw_mem_layout *_ath10k_coredump_get_mem_layout(struct ath10k *ar)
++{
++ int i;
++
+ if (WARN_ON(ar->target_version == 0))
+ return NULL;
+
+@@ -1464,7 +1470,6 @@ const struct ath10k_hw_mem_layout *ath10k_coredump_get_mem_layout(struct ath10k
+
+ return NULL;
+ }
+-EXPORT_SYMBOL(ath10k_coredump_get_mem_layout);
+
+ struct ath10k_fw_crash_data *ath10k_coredump_new(struct ath10k *ar)
+ {
+diff --git a/drivers/net/wireless/ath/ath10k/coredump.h b/drivers/net/wireless/ath/ath10k/coredump.h
+index 42404e246e0e9..240d705150888 100644
+--- a/drivers/net/wireless/ath/ath10k/coredump.h
++++ b/drivers/net/wireless/ath/ath10k/coredump.h
+@@ -176,6 +176,7 @@ int ath10k_coredump_register(struct ath10k *ar);
+ void ath10k_coredump_unregister(struct ath10k *ar);
+ void ath10k_coredump_destroy(struct ath10k *ar);
+
++const struct ath10k_hw_mem_layout *_ath10k_coredump_get_mem_layout(struct ath10k *ar);
+ const struct ath10k_hw_mem_layout *ath10k_coredump_get_mem_layout(struct ath10k *ar);
+
+ #else /* CONFIG_DEV_COREDUMP */
+@@ -214,6 +215,12 @@ ath10k_coredump_get_mem_layout(struct ath10k *ar)
+ return NULL;
+ }
+
++static inline const struct ath10k_hw_mem_layout *
++_ath10k_coredump_get_mem_layout(struct ath10k *ar)
++{
++ return NULL;
++}
++
+ #endif /* CONFIG_DEV_COREDUMP */
+
+ #endif /* _COREDUMP_H_ */
+diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
+index d6b8bdcef4160..b793eac2cfac8 100644
+--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
++++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
+@@ -147,6 +147,9 @@ void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt)
+ htt->num_pending_tx--;
+ if (htt->num_pending_tx == htt->max_num_pending_tx - 1)
+ ath10k_mac_tx_unlock(htt->ar, ATH10K_TX_PAUSE_Q_FULL);
++
++ if (htt->num_pending_tx == 0)
++ wake_up(&htt->empty_tx_wq);
+ }
+
+ int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt)
+diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
+index 6b03c7787e36a..591ef7416b613 100644
+--- a/drivers/net/wireless/ath/ath10k/hw.h
++++ b/drivers/net/wireless/ath/ath10k/hw.h
+@@ -618,6 +618,9 @@ struct ath10k_hw_params {
+ */
+ bool uart_pin_workaround;
+
++ /* Workaround for the credit size calculation */
++ bool credit_size_workaround;
++
+ /* tx stats support over pktlog */
+ bool tx_stats_over_pktlog;
+
+diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
+index c272b290fa73d..8208434d7d2b2 100644
+--- a/drivers/net/wireless/ath/ath10k/mac.c
++++ b/drivers/net/wireless/ath/ath10k/mac.c
+@@ -864,11 +864,36 @@ static int ath10k_peer_delete(struct ath10k *ar, u32 vdev_id, const u8 *addr)
+ return 0;
+ }
+
++static void ath10k_peer_map_cleanup(struct ath10k *ar, struct ath10k_peer *peer)
++{
++ int peer_id, i;
++
++ lockdep_assert_held(&ar->conf_mutex);
++
++ for_each_set_bit(peer_id, peer->peer_ids,
++ ATH10K_MAX_NUM_PEER_IDS) {
++ ar->peer_map[peer_id] = NULL;
++ }
++
++ /* Double check that peer is properly un-referenced from
++ * the peer_map
++ */
++ for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) {
++ if (ar->peer_map[i] == peer) {
++ ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n",
++ peer->addr, peer, i);
++ ar->peer_map[i] = NULL;
++ }
++ }
++
++ list_del(&peer->list);
++ kfree(peer);
++ ar->num_peers--;
++}
++
+ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id)
+ {
+ struct ath10k_peer *peer, *tmp;
+- int peer_id;
+- int i;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+@@ -880,25 +905,7 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id)
+ ath10k_warn(ar, "removing stale peer %pM from vdev_id %d\n",
+ peer->addr, vdev_id);
+
+- for_each_set_bit(peer_id, peer->peer_ids,
+- ATH10K_MAX_NUM_PEER_IDS) {
+- ar->peer_map[peer_id] = NULL;
+- }
+-
+- /* Double check that peer is properly un-referenced from
+- * the peer_map
+- */
+- for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) {
+- if (ar->peer_map[i] == peer) {
+- ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n",
+- peer->addr, peer, i);
+- ar->peer_map[i] = NULL;
+- }
+- }
+-
+- list_del(&peer->list);
+- kfree(peer);
+- ar->num_peers--;
++ ath10k_peer_map_cleanup(ar, peer);
+ }
+ spin_unlock_bh(&ar->data_lock);
+ }
+@@ -993,8 +1000,12 @@ static void ath10k_mac_vif_beacon_cleanup(struct ath10k_vif *arvif)
+ ath10k_mac_vif_beacon_free(arvif);
+
+ if (arvif->beacon_buf) {
+- dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN,
+- arvif->beacon_buf, arvif->beacon_paddr);
++ if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL)
++ kfree(arvif->beacon_buf);
++ else
++ dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN,
++ arvif->beacon_buf,
++ arvif->beacon_paddr);
+ arvif->beacon_buf = NULL;
+ }
+ }
+@@ -1048,7 +1059,7 @@ static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id)
+ arg.channel.min_power = 0;
+ arg.channel.max_power = channel->max_power * 2;
+ arg.channel.max_reg_power = channel->max_reg_power * 2;
+- arg.channel.max_antenna_gain = channel->max_antenna_gain * 2;
++ arg.channel.max_antenna_gain = channel->max_antenna_gain;
+
+ reinit_completion(&ar->vdev_setup_done);
+ reinit_completion(&ar->vdev_delete_done);
+@@ -1494,7 +1505,7 @@ static int ath10k_vdev_start_restart(struct ath10k_vif *arvif,
+ arg.channel.min_power = 0;
+ arg.channel.max_power = chandef->chan->max_power * 2;
+ arg.channel.max_reg_power = chandef->chan->max_reg_power * 2;
+- arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain * 2;
++ arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain;
+
+ if (arvif->vdev_type == WMI_VDEV_TYPE_AP) {
+ arg.ssid = arvif->u.ap.ssid;
+@@ -3422,7 +3433,7 @@ static int ath10k_update_channel_list(struct ath10k *ar)
+ ch->min_power = 0;
+ ch->max_power = channel->max_power * 2;
+ ch->max_reg_power = channel->max_reg_power * 2;
+- ch->max_antenna_gain = channel->max_antenna_gain * 2;
++ ch->max_antenna_gain = channel->max_antenna_gain;
+ ch->reg_class_id = 0; /* FIXME */
+
+ /* FIXME: why use only legacy modes, why not any
+@@ -5335,13 +5346,29 @@ err:
+ static void ath10k_stop(struct ieee80211_hw *hw)
+ {
+ struct ath10k *ar = hw->priv;
++ u32 opt;
+
+ ath10k_drain_tx(ar);
+
+ mutex_lock(&ar->conf_mutex);
+ if (ar->state != ATH10K_STATE_OFF) {
+- if (!ar->hw_rfkill_on)
+- ath10k_halt(ar);
++ if (!ar->hw_rfkill_on) {
++ /* If the current driver state is RESTARTING but not yet
++ * fully RESTARTED because of incoming suspend event,
++ * then ath10k_halt() is already called via
++ * ath10k_core_restart() and should not be called here.
++ */
++ if (ar->state != ATH10K_STATE_RESTARTING) {
++ ath10k_halt(ar);
++ } else {
++ /* Suspending here, because when in RESTARTING
++ * state, ath10k_core_stop() skips
++ * ath10k_wait_for_suspend().
++ */
++ opt = WMI_PDEV_SUSPEND_AND_DISABLE_INTR;
++ ath10k_wait_for_suspend(ar, opt);
++ }
++ }
+ ar->state = ATH10K_STATE_OFF;
+ }
+ mutex_unlock(&ar->conf_mutex);
+@@ -5576,10 +5603,25 @@ static int ath10k_add_interface(struct ieee80211_hw *hw,
+ if (vif->type == NL80211_IFTYPE_ADHOC ||
+ vif->type == NL80211_IFTYPE_MESH_POINT ||
+ vif->type == NL80211_IFTYPE_AP) {
+- arvif->beacon_buf = dma_alloc_coherent(ar->dev,
+- IEEE80211_MAX_FRAME_LEN,
+- &arvif->beacon_paddr,
+- GFP_ATOMIC);
++ if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) {
++ arvif->beacon_buf = kmalloc(IEEE80211_MAX_FRAME_LEN,
++ GFP_KERNEL);
++
++ /* Using a kernel pointer in place of a dma_addr_t
++ * token can lead to undefined behavior if that
++ * makes it into cache management functions. Use a
++ * known-invalid address token instead, which
++ * avoids the warning and makes it easier to catch
++ * bugs if it does end up getting used.
++ */
++ arvif->beacon_paddr = DMA_MAPPING_ERROR;
++ } else {
++ arvif->beacon_buf =
++ dma_alloc_coherent(ar->dev,
++ IEEE80211_MAX_FRAME_LEN,
++ &arvif->beacon_paddr,
++ GFP_ATOMIC);
++ }
+ if (!arvif->beacon_buf) {
+ ret = -ENOMEM;
+ ath10k_warn(ar, "failed to allocate beacon buffer: %d\n",
+@@ -5794,8 +5836,12 @@ err_vdev_delete:
+
+ err:
+ if (arvif->beacon_buf) {
+- dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN,
+- arvif->beacon_buf, arvif->beacon_paddr);
++ if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL)
++ kfree(arvif->beacon_buf);
++ else
++ dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN,
++ arvif->beacon_buf,
++ arvif->beacon_paddr);
+ arvif->beacon_buf = NULL;
+ }
+
+@@ -7541,10 +7587,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw,
+ /* Clean up the peer object as well since we
+ * must have failed to do this above.
+ */
+- list_del(&peer->list);
+- ar->peer_map[i] = NULL;
+- kfree(peer);
+- ar->num_peers--;
++ ath10k_peer_map_cleanup(ar, peer);
+ }
+ }
+ spin_unlock_bh(&ar->data_lock);
+diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
+index 4d4e2f91e15cf..3a62f66973137 100644
+--- a/drivers/net/wireless/ath/ath10k/pci.c
++++ b/drivers/net/wireless/ath/ath10k/pci.c
+@@ -1963,8 +1963,9 @@ static int ath10k_pci_hif_start(struct ath10k *ar)
+ ath10k_pci_irq_enable(ar);
+ ath10k_pci_rx_post(ar);
+
+- pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+- ar_pci->link_ctl);
++ pcie_capability_clear_and_set_word(ar_pci->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_ASPMC,
++ ar_pci->link_ctl & PCI_EXP_LNKCTL_ASPMC);
+
+ return 0;
+ }
+@@ -2821,8 +2822,8 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar,
+
+ pcie_capability_read_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+ &ar_pci->link_ctl);
+- pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+- ar_pci->link_ctl & ~PCI_EXP_LNKCTL_ASPMC);
++ pcie_capability_clear_word(ar_pci->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_ASPMC);
+
+ /*
+ * Bring the target up cleanly.
+@@ -3793,18 +3794,22 @@ static struct pci_driver ath10k_pci_driver = {
+
+ static int __init ath10k_pci_init(void)
+ {
+- int ret;
++ int ret1, ret2;
+
+- ret = pci_register_driver(&ath10k_pci_driver);
+- if (ret)
++ ret1 = pci_register_driver(&ath10k_pci_driver);
++ if (ret1)
+ printk(KERN_ERR "failed to register ath10k pci driver: %d\n",
+- ret);
++ ret1);
+
+- ret = ath10k_ahb_init();
+- if (ret)
+- printk(KERN_ERR "ahb init failed: %d\n", ret);
++ ret2 = ath10k_ahb_init();
++ if (ret2)
++ printk(KERN_ERR "ahb init failed: %d\n", ret2);
+
+- return ret;
++ if (ret1 && ret2)
++ return ret1;
++
++ /* registered to at least one bus */
++ return 0;
+ }
+ module_init(ath10k_pci_init);
+
+diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c
+index 07e478f9a808c..80fcb917fe4e1 100644
+--- a/drivers/net/wireless/ath/ath10k/qmi.c
++++ b/drivers/net/wireless/ath/ath10k/qmi.c
+@@ -864,7 +864,8 @@ static void ath10k_qmi_event_server_exit(struct ath10k_qmi *qmi)
+
+ ath10k_qmi_remove_msa_permission(qmi);
+ ath10k_core_free_board_files(ar);
+- if (!test_bit(ATH10K_SNOC_FLAG_UNREGISTERING, &ar_snoc->flags))
++ if (!test_bit(ATH10K_SNOC_FLAG_UNREGISTERING, &ar_snoc->flags) &&
++ !test_bit(ATH10K_SNOC_FLAG_MODEM_STOPPED, &ar_snoc->flags))
+ ath10k_snoc_fw_crashed_dump(ar);
+
+ ath10k_snoc_fw_indication(ar, ATH10K_QMI_EVENT_FW_DOWN_IND);
+diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
+index b746052737e0b..eb705214f3f0a 100644
+--- a/drivers/net/wireless/ath/ath10k/sdio.c
++++ b/drivers/net/wireless/ath/ath10k/sdio.c
+@@ -1363,8 +1363,11 @@ static void ath10k_rx_indication_async_work(struct work_struct *work)
+ ep->ep_ops.ep_rx_complete(ar, skb);
+ }
+
+- if (test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags))
++ if (test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags)) {
++ local_bh_disable();
+ napi_schedule(&ar->napi);
++ local_bh_enable();
++ }
+ }
+
+ static int ath10k_sdio_read_rtc_state(struct ath10k_sdio *ar_sdio, unsigned char *state)
+diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
+index ea00fbb156015..73fe77e7824b4 100644
+--- a/drivers/net/wireless/ath/ath10k/snoc.c
++++ b/drivers/net/wireless/ath/ath10k/snoc.c
+@@ -12,6 +12,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/property.h>
+ #include <linux/regulator/consumer.h>
++#include <linux/remoteproc/qcom_rproc.h>
+ #include <linux/of_address.h>
+ #include <linux/iommu.h>
+
+@@ -1248,13 +1249,12 @@ static void ath10k_snoc_init_napi(struct ath10k *ar)
+ static int ath10k_snoc_request_irq(struct ath10k *ar)
+ {
+ struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
+- int irqflags = IRQF_TRIGGER_RISING;
+ int ret, id;
+
+ for (id = 0; id < CE_COUNT_MAX; id++) {
+ ret = request_irq(ar_snoc->ce_irqs[id].irq_line,
+- ath10k_snoc_per_engine_handler,
+- irqflags, ce_name[id], ar);
++ ath10k_snoc_per_engine_handler, 0,
++ ce_name[id], ar);
+ if (ret) {
+ ath10k_err(ar,
+ "failed to register IRQ handler for CE %d: %d\n",
+@@ -1477,6 +1477,74 @@ void ath10k_snoc_fw_crashed_dump(struct ath10k *ar)
+ mutex_unlock(&ar->dump_mutex);
+ }
+
++static int ath10k_snoc_modem_notify(struct notifier_block *nb, unsigned long action,
++ void *data)
++{
++ struct ath10k_snoc *ar_snoc = container_of(nb, struct ath10k_snoc, nb);
++ struct ath10k *ar = ar_snoc->ar;
++ struct qcom_ssr_notify_data *notify_data = data;
++
++ switch (action) {
++ case QCOM_SSR_BEFORE_POWERUP:
++ ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem starting event\n");
++ clear_bit(ATH10K_SNOC_FLAG_MODEM_STOPPED, &ar_snoc->flags);
++ break;
++
++ case QCOM_SSR_AFTER_POWERUP:
++ ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem running event\n");
++ break;
++
++ case QCOM_SSR_BEFORE_SHUTDOWN:
++ ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem %s event\n",
++ notify_data->crashed ? "crashed" : "stopping");
++ if (!notify_data->crashed)
++ set_bit(ATH10K_SNOC_FLAG_MODEM_STOPPED, &ar_snoc->flags);
++ else
++ clear_bit(ATH10K_SNOC_FLAG_MODEM_STOPPED, &ar_snoc->flags);
++ break;
++
++ case QCOM_SSR_AFTER_SHUTDOWN:
++ ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem offline event\n");
++ break;
++
++ default:
++ ath10k_err(ar, "received unrecognized event %lu\n", action);
++ break;
++ }
++
++ return NOTIFY_OK;
++}
++
++static int ath10k_modem_init(struct ath10k *ar)
++{
++ struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
++ void *notifier;
++ int ret;
++
++ ar_snoc->nb.notifier_call = ath10k_snoc_modem_notify;
++
++ notifier = qcom_register_ssr_notifier("mpss", &ar_snoc->nb);
++ if (IS_ERR(notifier)) {
++ ret = PTR_ERR(notifier);
++ ath10k_err(ar, "failed to initialize modem notifier: %d\n", ret);
++ return ret;
++ }
++
++ ar_snoc->notifier = notifier;
++
++ return 0;
++}
++
++static void ath10k_modem_deinit(struct ath10k *ar)
++{
++ int ret;
++ struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
++
++ ret = qcom_unregister_ssr_notifier(ar_snoc->notifier, &ar_snoc->nb);
++ if (ret)
++ ath10k_err(ar, "error %d unregistering notifier\n", ret);
++}
++
+ static int ath10k_setup_msa_resources(struct ath10k *ar, u32 msa_size)
+ {
+ struct device *dev = ar->dev;
+@@ -1487,11 +1555,11 @@ static int ath10k_setup_msa_resources(struct ath10k *ar, u32 msa_size)
+ node = of_parse_phandle(dev->of_node, "memory-region", 0);
+ if (node) {
+ ret = of_address_to_resource(node, 0, &r);
++ of_node_put(node);
+ if (ret) {
+ dev_err(dev, "failed to resolve msa fixed region\n");
+ return ret;
+ }
+- of_node_put(node);
+
+ ar->msa.paddr = r.start;
+ ar->msa.mem_size = resource_size(&r);
+@@ -1740,10 +1808,17 @@ static int ath10k_snoc_probe(struct platform_device *pdev)
+ goto err_fw_deinit;
+ }
+
++ ret = ath10k_modem_init(ar);
++ if (ret)
++ goto err_qmi_deinit;
++
+ ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc probe\n");
+
+ return 0;
+
++err_qmi_deinit:
++ ath10k_qmi_deinit(ar);
++
+ err_fw_deinit:
+ ath10k_fw_deinit(ar);
+
+@@ -1771,6 +1846,7 @@ static int ath10k_snoc_free_resources(struct ath10k *ar)
+ ath10k_fw_deinit(ar);
+ ath10k_snoc_free_irq(ar);
+ ath10k_snoc_release_resource(ar);
++ ath10k_modem_deinit(ar);
+ ath10k_qmi_deinit(ar);
+ ath10k_core_destroy(ar);
+
+diff --git a/drivers/net/wireless/ath/ath10k/snoc.h b/drivers/net/wireless/ath/ath10k/snoc.h
+index 5095d1893681b..d4bce17076960 100644
+--- a/drivers/net/wireless/ath/ath10k/snoc.h
++++ b/drivers/net/wireless/ath/ath10k/snoc.h
+@@ -6,6 +6,8 @@
+ #ifndef _SNOC_H_
+ #define _SNOC_H_
+
++#include <linux/notifier.h>
++
+ #include "hw.h"
+ #include "ce.h"
+ #include "qmi.h"
+@@ -45,6 +47,7 @@ struct ath10k_snoc_ce_irq {
+ enum ath10k_snoc_flags {
+ ATH10K_SNOC_FLAG_REGISTERED,
+ ATH10K_SNOC_FLAG_UNREGISTERING,
++ ATH10K_SNOC_FLAG_MODEM_STOPPED,
+ ATH10K_SNOC_FLAG_RECOVERY,
+ ATH10K_SNOC_FLAG_8BIT_HOST_CAP_QUIRK,
+ };
+@@ -75,6 +78,8 @@ struct ath10k_snoc {
+ struct clk_bulk_data *clks;
+ size_t num_clks;
+ struct ath10k_qmi *qmi;
++ struct notifier_block nb;
++ void *notifier;
+ unsigned long flags;
+ bool xo_cal_supported;
+ u32 xo_cal_data;
+diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
+index 7c9ea0c073d8b..6f8b642188941 100644
+--- a/drivers/net/wireless/ath/ath10k/txrx.c
++++ b/drivers/net/wireless/ath/ath10k/txrx.c
+@@ -82,8 +82,6 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt,
+ flags = skb_cb->flags;
+ ath10k_htt_tx_free_msdu_id(htt, tx_done->msdu_id);
+ ath10k_htt_tx_dec_pending(htt);
+- if (htt->num_pending_tx == 0)
+- wake_up(&htt->empty_tx_wq);
+ spin_unlock_bh(&htt->tx_lock);
+
+ rcu_read_lock();
+diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c
+index 19b9c27e30e20..3d98f19c6ec8a 100644
+--- a/drivers/net/wireless/ath/ath10k/usb.c
++++ b/drivers/net/wireless/ath/ath10k/usb.c
+@@ -525,7 +525,7 @@ static int ath10k_usb_submit_ctrl_in(struct ath10k *ar,
+ req,
+ USB_DIR_IN | USB_TYPE_VENDOR |
+ USB_RECIP_DEVICE, value, index, buf,
+- size, 2 * HZ);
++ size, 2000);
+
+ if (ret < 0) {
+ ath10k_warn(ar, "Failed to read usb control message: %d\n",
+@@ -853,6 +853,11 @@ static int ath10k_usb_setup_pipe_resources(struct ath10k *ar,
+ le16_to_cpu(endpoint->wMaxPacketSize),
+ endpoint->bInterval);
+ }
++
++ /* Ignore broken descriptors. */
++ if (usb_endpoint_maxp(endpoint) == 0)
++ continue;
++
+ urbcount = 0;
+
+ pipe_num =
+diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
+index b8a4bbfe10b87..7c1c2658cb5f8 100644
+--- a/drivers/net/wireless/ath/ath10k/wmi.c
++++ b/drivers/net/wireless/ath/ath10k/wmi.c
+@@ -2610,6 +2610,10 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
+ if (ieee80211_is_beacon(hdr->frame_control))
+ ath10k_mac_handle_beacon(ar, skb);
+
++ if (ieee80211_is_beacon(hdr->frame_control) ||
++ ieee80211_is_probe_resp(hdr->frame_control))
++ status->boottime_ns = ktime_get_boottime_ns();
++
+ ath10k_dbg(ar, ATH10K_DBG_MGMT,
+ "event mgmt rx skb %pK len %d ftype %02x stype %02x\n",
+ skb, skb->len,
+diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
+index 41c1a3d339c25..01bfd09a9d88c 100644
+--- a/drivers/net/wireless/ath/ath10k/wmi.h
++++ b/drivers/net/wireless/ath/ath10k/wmi.h
+@@ -2066,7 +2066,9 @@ struct wmi_channel {
+ union {
+ __le32 reginfo1;
+ struct {
++ /* note: power unit is 1 dBm */
+ u8 antenna_max;
++ /* note: power unit is 0.5 dBm */
+ u8 max_tx_power;
+ } __packed;
+ } __packed;
+@@ -2086,6 +2088,7 @@ struct wmi_channel_arg {
+ u32 min_power;
+ u32 max_power;
+ u32 max_reg_power;
++ /* note: power unit is 1 dBm */
+ u32 max_antenna_gain;
+ u32 reg_class_id;
+ enum wmi_phy_mode mode;
+diff --git a/drivers/net/wireless/ath/ath10k/wow.c b/drivers/net/wireless/ath/ath10k/wow.c
+index 7d65c115669fe..20b9aa8ddf7d5 100644
+--- a/drivers/net/wireless/ath/ath10k/wow.c
++++ b/drivers/net/wireless/ath/ath10k/wow.c
+@@ -337,14 +337,15 @@ static int ath10k_vif_wow_set_wakeups(struct ath10k_vif *arvif,
+ if (patterns[i].mask[j / 8] & BIT(j % 8))
+ bitmask[j] = 0xff;
+ old_pattern.mask = bitmask;
+- new_pattern = old_pattern;
+
+ if (ar->wmi.rx_decap_mode == ATH10K_HW_TXRX_NATIVE_WIFI) {
+- if (patterns[i].pkt_offset < ETH_HLEN)
++ if (patterns[i].pkt_offset < ETH_HLEN) {
+ ath10k_wow_convert_8023_to_80211(&new_pattern,
+ &old_pattern);
+- else
++ } else {
++ new_pattern = old_pattern;
+ new_pattern.pkt_offset += WOW_HDR_LEN - ETH_HLEN;
++ }
+ }
+
+ if (WARN_ON(new_pattern.pattern_len > WOW_MAX_PATTERN_SIZE))
+diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
+index 8c9c781afc3e5..24bd0520926bf 100644
+--- a/drivers/net/wireless/ath/ath11k/ahb.c
++++ b/drivers/net/wireless/ath/ath11k/ahb.c
+@@ -175,8 +175,11 @@ static void __ath11k_ahb_ext_irq_disable(struct ath11k_base *ab)
+
+ ath11k_ahb_ext_grp_disable(irq_grp);
+
+- napi_synchronize(&irq_grp->napi);
+- napi_disable(&irq_grp->napi);
++ if (irq_grp->napi_enabled) {
++ napi_synchronize(&irq_grp->napi);
++ napi_disable(&irq_grp->napi);
++ irq_grp->napi_enabled = false;
++ }
+ }
+ }
+
+@@ -206,13 +209,13 @@ static void ath11k_ahb_clearbit32(struct ath11k_base *ab, u8 bit, u32 offset)
+
+ static void ath11k_ahb_ce_irq_enable(struct ath11k_base *ab, u16 ce_id)
+ {
+- const struct ce_pipe_config *ce_config;
++ const struct ce_attr *ce_attr;
+
+- ce_config = &ab->hw_params.target_ce_config[ce_id];
+- if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_OUT)
++ ce_attr = &ab->hw_params.host_ce_config[ce_id];
++ if (ce_attr->src_nentries)
+ ath11k_ahb_setbit32(ab, ce_id, CE_HOST_IE_ADDRESS);
+
+- if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_IN) {
++ if (ce_attr->dest_nentries) {
+ ath11k_ahb_setbit32(ab, ce_id, CE_HOST_IE_2_ADDRESS);
+ ath11k_ahb_setbit32(ab, ce_id + CE_HOST_IE_3_SHIFT,
+ CE_HOST_IE_3_ADDRESS);
+@@ -221,13 +224,13 @@ static void ath11k_ahb_ce_irq_enable(struct ath11k_base *ab, u16 ce_id)
+
+ static void ath11k_ahb_ce_irq_disable(struct ath11k_base *ab, u16 ce_id)
+ {
+- const struct ce_pipe_config *ce_config;
++ const struct ce_attr *ce_attr;
+
+- ce_config = &ab->hw_params.target_ce_config[ce_id];
+- if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_OUT)
++ ce_attr = &ab->hw_params.host_ce_config[ce_id];
++ if (ce_attr->src_nentries)
+ ath11k_ahb_clearbit32(ab, ce_id, CE_HOST_IE_ADDRESS);
+
+- if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_IN) {
++ if (ce_attr->dest_nentries) {
+ ath11k_ahb_clearbit32(ab, ce_id, CE_HOST_IE_2_ADDRESS);
+ ath11k_ahb_clearbit32(ab, ce_id + CE_HOST_IE_3_SHIFT,
+ CE_HOST_IE_3_ADDRESS);
+@@ -300,7 +303,10 @@ static void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab)
+ for (i = 0; i < ATH11K_EXT_IRQ_GRP_NUM_MAX; i++) {
+ struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i];
+
+- napi_enable(&irq_grp->napi);
++ if (!irq_grp->napi_enabled) {
++ napi_enable(&irq_grp->napi);
++ irq_grp->napi_enabled = true;
++ }
+ ath11k_ahb_ext_grp_enable(irq_grp);
+ }
+ }
+@@ -385,6 +391,8 @@ static void ath11k_ahb_free_ext_irq(struct ath11k_base *ab)
+
+ for (j = 0; j < irq_grp->num_irq; j++)
+ free_irq(ab->irq_num[irq_grp->irqs[j]], irq_grp);
++
++ netif_napi_del(&irq_grp->napi);
+ }
+ }
+
+diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
+index 969bf1a590d99..48a449fbd2bcc 100644
+--- a/drivers/net/wireless/ath/ath11k/core.c
++++ b/drivers/net/wireless/ath/ath11k/core.c
+@@ -71,6 +71,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+ .supports_suspend = false,
+ .hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
+ .fix_l1ss = true,
++ .wakeup_mhi = false,
+ },
+ {
+ .hw_rev = ATH11K_HW_IPQ6018_HW10,
+@@ -112,6 +113,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+ .supports_suspend = false,
+ .hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
+ .fix_l1ss = true,
++ .wakeup_mhi = false,
+ },
+ {
+ .name = "qca6390 hw2.0",
+@@ -152,6 +154,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+ .supports_suspend = true,
+ .hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
+ .fix_l1ss = true,
++ .wakeup_mhi = true,
+ },
+ {
+ .name = "qcn9074 hw1.0",
+@@ -190,6 +193,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+ .supports_suspend = false,
+ .hal_desc_sz = sizeof(struct hal_rx_desc_qcn9074),
+ .fix_l1ss = true,
++ .wakeup_mhi = false,
+ },
+ {
+ .name = "wcn6855 hw2.0",
+@@ -230,6 +234,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+ .supports_suspend = true,
+ .hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855),
+ .fix_l1ss = false,
++ .wakeup_mhi = true,
+ },
+ };
+
+@@ -347,11 +352,26 @@ static int ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+ scnprintf(variant, sizeof(variant), ",variant=%s",
+ ab->qmi.target.bdf_ext);
+
+- scnprintf(name, name_len,
+- "bus=%s,qmi-chip-id=%d,qmi-board-id=%d%s",
+- ath11k_bus_str(ab->hif.bus),
+- ab->qmi.target.chip_id,
+- ab->qmi.target.board_id, variant);
++ switch (ab->id.bdf_search) {
++ case ATH11K_BDF_SEARCH_BUS_AND_BOARD:
++ scnprintf(name, name_len,
++ "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s",
++ ath11k_bus_str(ab->hif.bus),
++ ab->id.vendor, ab->id.device,
++ ab->id.subsystem_vendor,
++ ab->id.subsystem_device,
++ ab->qmi.target.chip_id,
++ ab->qmi.target.board_id,
++ variant);
++ break;
++ default:
++ scnprintf(name, name_len,
++ "bus=%s,qmi-chip-id=%d,qmi-board-id=%d%s",
++ ath11k_bus_str(ab->hif.bus),
++ ab->qmi.target.chip_id,
++ ab->qmi.target.board_id, variant);
++ break;
++ }
+
+ ath11k_dbg(ab, ATH11K_DBG_BOOT, "boot using board name '%s'\n", name);
+
+@@ -588,7 +608,7 @@ static int ath11k_core_fetch_board_data_api_1(struct ath11k_base *ab,
+ return 0;
+ }
+
+-#define BOARD_NAME_SIZE 100
++#define BOARD_NAME_SIZE 200
+ int ath11k_core_fetch_bdf(struct ath11k_base *ab, struct ath11k_board_data *bd)
+ {
+ char boardname[BOARD_NAME_SIZE];
+@@ -679,23 +699,23 @@ static int ath11k_core_pdev_create(struct ath11k_base *ab)
+ return ret;
+ }
+
+- ret = ath11k_mac_register(ab);
++ ret = ath11k_dp_pdev_alloc(ab);
+ if (ret) {
+- ath11k_err(ab, "failed register the radio with mac80211: %d\n", ret);
++ ath11k_err(ab, "failed to attach DP pdev: %d\n", ret);
+ goto err_pdev_debug;
+ }
+
+- ret = ath11k_dp_pdev_alloc(ab);
++ ret = ath11k_mac_register(ab);
+ if (ret) {
+- ath11k_err(ab, "failed to attach DP pdev: %d\n", ret);
+- goto err_mac_unregister;
++ ath11k_err(ab, "failed register the radio with mac80211: %d\n", ret);
++ goto err_dp_pdev_free;
+ }
+
+ ret = ath11k_thermal_register(ab);
+ if (ret) {
+ ath11k_err(ab, "could not register thermal device: %d\n",
+ ret);
+- goto err_dp_pdev_free;
++ goto err_mac_unregister;
+ }
+
+ ret = ath11k_spectral_init(ab);
+@@ -708,10 +728,10 @@ static int ath11k_core_pdev_create(struct ath11k_base *ab)
+
+ err_thermal_unregister:
+ ath11k_thermal_unregister(ab);
+-err_dp_pdev_free:
+- ath11k_dp_pdev_free(ab);
+ err_mac_unregister:
+ ath11k_mac_unregister(ab);
++err_dp_pdev_free:
++ ath11k_dp_pdev_free(ab);
+ err_pdev_debug:
+ ath11k_debugfs_pdev_destroy(ab);
+
+diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
+index 018fb2385f2a3..fda1c2db05d0a 100644
+--- a/drivers/net/wireless/ath/ath11k/core.h
++++ b/drivers/net/wireless/ath/ath11k/core.h
+@@ -47,6 +47,11 @@ enum ath11k_supported_bw {
+ ATH11K_BW_160 = 3,
+ };
+
++enum ath11k_bdf_search {
++ ATH11K_BDF_SEARCH_DEFAULT,
++ ATH11K_BDF_SEARCH_BUS_AND_BOARD,
++};
++
+ enum wme_ac {
+ WME_AC_BE,
+ WME_AC_BK,
+@@ -132,6 +137,7 @@ struct ath11k_ext_irq_grp {
+ u32 num_irq;
+ u32 grp_id;
+ u64 timestamp;
++ bool napi_enabled;
+ struct napi_struct napi;
+ struct net_device napi_ndev;
+ };
+@@ -701,7 +707,6 @@ struct ath11k_base {
+ u32 wlan_init_status;
+ int irq_num[ATH11K_IRQ_NUM_MAX];
+ struct ath11k_ext_irq_grp ext_irq_grp[ATH11K_EXT_IRQ_GRP_NUM_MAX];
+- struct napi_struct *napi;
+ struct ath11k_targ_cap target_caps;
+ u32 ext_service_bitmap[WMI_SERVICE_EXT_BM_SIZE];
+ bool pdevs_macaddr_valid;
+@@ -726,7 +731,6 @@ struct ath11k_base {
+ enum ath11k_dfs_region dfs_region;
+ #ifdef CONFIG_ATH11K_DEBUGFS
+ struct dentry *debugfs_soc;
+- struct dentry *debugfs_ath11k;
+ #endif
+ struct ath11k_soc_dp_stats soc_stats;
+
+@@ -747,6 +751,14 @@ struct ath11k_base {
+
+ struct completion htc_suspend;
+
++ struct {
++ enum ath11k_bdf_search bdf_search;
++ u32 vendor;
++ u32 device;
++ u32 subsystem_vendor;
++ u32 subsystem_device;
++ } id;
++
+ /* must be last */
+ u8 drv_priv[0] __aligned(sizeof(void *));
+ };
+diff --git a/drivers/net/wireless/ath/ath11k/dbring.c b/drivers/net/wireless/ath/ath11k/dbring.c
+index 5e1f5437b4185..fd98ba5b1130b 100644
+--- a/drivers/net/wireless/ath/ath11k/dbring.c
++++ b/drivers/net/wireless/ath/ath11k/dbring.c
+@@ -8,8 +8,7 @@
+
+ static int ath11k_dbring_bufs_replenish(struct ath11k *ar,
+ struct ath11k_dbring *ring,
+- struct ath11k_dbring_element *buff,
+- gfp_t gfp)
++ struct ath11k_dbring_element *buff)
+ {
+ struct ath11k_base *ab = ar->ab;
+ struct hal_srng *srng;
+@@ -35,7 +34,7 @@ static int ath11k_dbring_bufs_replenish(struct ath11k *ar,
+ goto err;
+
+ spin_lock_bh(&ring->idr_lock);
+- buf_id = idr_alloc(&ring->bufs_idr, buff, 0, ring->bufs_max, gfp);
++ buf_id = idr_alloc(&ring->bufs_idr, buff, 0, ring->bufs_max, GFP_ATOMIC);
+ spin_unlock_bh(&ring->idr_lock);
+ if (buf_id < 0) {
+ ret = -ENOBUFS;
+@@ -72,8 +71,7 @@ err:
+ }
+
+ static int ath11k_dbring_fill_bufs(struct ath11k *ar,
+- struct ath11k_dbring *ring,
+- gfp_t gfp)
++ struct ath11k_dbring *ring)
+ {
+ struct ath11k_dbring_element *buff;
+ struct hal_srng *srng;
+@@ -92,11 +90,11 @@ static int ath11k_dbring_fill_bufs(struct ath11k *ar,
+ size = sizeof(*buff) + ring->buf_sz + align - 1;
+
+ while (num_remain > 0) {
+- buff = kzalloc(size, gfp);
++ buff = kzalloc(size, GFP_ATOMIC);
+ if (!buff)
+ break;
+
+- ret = ath11k_dbring_bufs_replenish(ar, ring, buff, gfp);
++ ret = ath11k_dbring_bufs_replenish(ar, ring, buff);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to replenish db ring num_remain %d req_ent %d\n",
+ num_remain, req_entries);
+@@ -176,7 +174,7 @@ int ath11k_dbring_buf_setup(struct ath11k *ar,
+ ring->hp_addr = ath11k_hal_srng_get_hp_addr(ar->ab, srng);
+ ring->tp_addr = ath11k_hal_srng_get_tp_addr(ar->ab, srng);
+
+- ret = ath11k_dbring_fill_bufs(ar, ring, GFP_KERNEL);
++ ret = ath11k_dbring_fill_bufs(ar, ring);
+
+ return ret;
+ }
+@@ -322,7 +320,7 @@ int ath11k_dbring_buffer_release_event(struct ath11k_base *ab,
+ }
+
+ memset(buff, 0, size);
+- ath11k_dbring_bufs_replenish(ar, ring, buff, GFP_ATOMIC);
++ ath11k_dbring_bufs_replenish(ar, ring, buff);
+ }
+
+ spin_unlock_bh(&srng->lock);
+diff --git a/drivers/net/wireless/ath/ath11k/debug.h b/drivers/net/wireless/ath/ath11k/debug.h
+index 659a275e2eb38..694ebba17fad7 100644
+--- a/drivers/net/wireless/ath/ath11k/debug.h
++++ b/drivers/net/wireless/ath/ath11k/debug.h
+@@ -23,8 +23,8 @@ enum ath11k_debug_mask {
+ ATH11K_DBG_TESTMODE = 0x00000400,
+ ATH11k_DBG_HAL = 0x00000800,
+ ATH11K_DBG_PCI = 0x00001000,
+- ATH11K_DBG_DP_TX = 0x00001000,
+- ATH11K_DBG_DP_RX = 0x00002000,
++ ATH11K_DBG_DP_TX = 0x00002000,
++ ATH11K_DBG_DP_RX = 0x00004000,
+ ATH11K_DBG_ANY = 0xffffffff,
+ };
+
+diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c
+index 554feaf1ed5cd..f827035f0dd2e 100644
+--- a/drivers/net/wireless/ath/ath11k/debugfs.c
++++ b/drivers/net/wireless/ath/ath11k/debugfs.c
+@@ -836,10 +836,6 @@ int ath11k_debugfs_pdev_create(struct ath11k_base *ab)
+ if (test_bit(ATH11K_FLAG_REGISTERED, &ab->dev_flags))
+ return 0;
+
+- ab->debugfs_soc = debugfs_create_dir(ab->hw_params.name, ab->debugfs_ath11k);
+- if (IS_ERR(ab->debugfs_soc))
+- return PTR_ERR(ab->debugfs_soc);
+-
+ debugfs_create_file("simulate_fw_crash", 0600, ab->debugfs_soc, ab,
+ &fops_simulate_fw_crash);
+
+@@ -857,15 +853,51 @@ void ath11k_debugfs_pdev_destroy(struct ath11k_base *ab)
+
+ int ath11k_debugfs_soc_create(struct ath11k_base *ab)
+ {
+- ab->debugfs_ath11k = debugfs_create_dir("ath11k", NULL);
++ struct dentry *root;
++ bool dput_needed;
++ char name[64];
++ int ret;
++
++ root = debugfs_lookup("ath11k", NULL);
++ if (!root) {
++ root = debugfs_create_dir("ath11k", NULL);
++ if (IS_ERR_OR_NULL(root))
++ return PTR_ERR(root);
++
++ dput_needed = false;
++ } else {
++ /* a dentry from lookup() needs dput() after we don't use it */
++ dput_needed = true;
++ }
++
++ scnprintf(name, sizeof(name), "%s-%s", ath11k_bus_str(ab->hif.bus),
++ dev_name(ab->dev));
++
++ ab->debugfs_soc = debugfs_create_dir(name, root);
++ if (IS_ERR_OR_NULL(ab->debugfs_soc)) {
++ ret = PTR_ERR(ab->debugfs_soc);
++ goto out;
++ }
++
++ ret = 0;
+
+- return PTR_ERR_OR_ZERO(ab->debugfs_ath11k);
++out:
++ if (dput_needed)
++ dput(root);
++
++ return ret;
+ }
+
+ void ath11k_debugfs_soc_destroy(struct ath11k_base *ab)
+ {
+- debugfs_remove_recursive(ab->debugfs_ath11k);
+- ab->debugfs_ath11k = NULL;
++ debugfs_remove_recursive(ab->debugfs_soc);
++ ab->debugfs_soc = NULL;
++
++ /* We are not removing ath11k directory on purpose, even if it
++ * would be empty. This simplifies the directory handling and it's
++ * a minor cosmetic issue to leave an empty ath11k directory to
++ * debugfs.
++ */
+ }
+ EXPORT_SYMBOL(ath11k_debugfs_soc_destroy);
+
+diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
+index ee768ccce46e1..d3e50e34f23dd 100644
+--- a/drivers/net/wireless/ath/ath11k/dp.h
++++ b/drivers/net/wireless/ath/ath11k/dp.h
+@@ -515,7 +515,8 @@ struct htt_ppdu_stats_cfg_cmd {
+ } __packed;
+
+ #define HTT_PPDU_STATS_CFG_MSG_TYPE GENMASK(7, 0)
+-#define HTT_PPDU_STATS_CFG_PDEV_ID GENMASK(15, 8)
++#define HTT_PPDU_STATS_CFG_SOC_STATS BIT(8)
++#define HTT_PPDU_STATS_CFG_PDEV_ID GENMASK(15, 9)
+ #define HTT_PPDU_STATS_CFG_TLV_TYPE_BITMASK GENMASK(31, 16)
+
+ enum htt_ppdu_stats_tag_type {
+diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
+index 9a224817630ae..357abd87d5491 100644
+--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
++++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
+@@ -354,10 +354,10 @@ int ath11k_dp_rxbufs_replenish(struct ath11k_base *ab, int mac_id,
+ goto fail_free_skb;
+
+ spin_lock_bh(&rx_ring->idr_lock);
+- buf_id = idr_alloc(&rx_ring->bufs_idr, skb, 0,
+- rx_ring->bufs_max * 3, GFP_ATOMIC);
++ buf_id = idr_alloc(&rx_ring->bufs_idr, skb, 1,
++ (rx_ring->bufs_max * 3) + 1, GFP_ATOMIC);
+ spin_unlock_bh(&rx_ring->idr_lock);
+- if (buf_id < 0)
++ if (buf_id <= 0)
+ goto fail_dma_unmap;
+
+ desc = ath11k_hal_srng_src_get_next_entry(ab, srng);
+@@ -2337,8 +2337,10 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc,
+ channel_num = meta_data;
+ center_freq = meta_data >> 16;
+
+- if (center_freq >= 5935 && center_freq <= 7105) {
++ if (center_freq >= ATH11K_MIN_6G_FREQ &&
++ center_freq <= ATH11K_MAX_6G_FREQ) {
+ rx_status->band = NL80211_BAND_6GHZ;
++ rx_status->freq = center_freq;
+ } else if (channel_num >= 1 && channel_num <= 14) {
+ rx_status->band = NL80211_BAND_2GHZ;
+ } else if (channel_num >= 36 && channel_num <= 173) {
+@@ -2356,8 +2358,9 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc,
+ rx_desc, sizeof(struct hal_rx_desc));
+ }
+
+- rx_status->freq = ieee80211_channel_to_frequency(channel_num,
+- rx_status->band);
++ if (rx_status->band != NL80211_BAND_6GHZ)
++ rx_status->freq = ieee80211_channel_to_frequency(channel_num,
++ rx_status->band);
+
+ ath11k_dp_rx_h_rate(ar, rx_desc, rx_status);
+ }
+@@ -2599,6 +2602,9 @@ try_again:
+ cookie);
+ mac_id = FIELD_GET(DP_RXDMA_BUF_COOKIE_PDEV_ID, cookie);
+
++ if (unlikely(buf_id == 0))
++ continue;
++
+ ar = ab->pdevs[mac_id].ar;
+ rx_ring = &ar->dp.rx_refill_buf_ring;
+ spin_lock_bh(&rx_ring->idr_lock);
+@@ -3053,6 +3059,7 @@ int ath11k_peer_rx_frag_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id
+ if (!peer) {
+ ath11k_warn(ab, "failed to find the peer to set up fragment info\n");
+ spin_unlock_bh(&ab->base_lock);
++ crypto_free_shash(tfm);
+ return -ENOENT;
+ }
+
+@@ -3310,7 +3317,7 @@ static int ath11k_dp_rx_h_defrag_reo_reinject(struct ath11k *ar, struct dp_rx_ti
+
+ paddr = dma_map_single(ab->dev, defrag_skb->data,
+ defrag_skb->len + skb_tailroom(defrag_skb),
+- DMA_FROM_DEVICE);
++ DMA_TO_DEVICE);
+ if (dma_mapping_error(ab->dev, paddr))
+ return -ENOMEM;
+
+@@ -3375,7 +3382,7 @@ err_free_idr:
+ spin_unlock_bh(&rx_refill_ring->idr_lock);
+ err_unmap_dma:
+ dma_unmap_single(ab->dev, paddr, defrag_skb->len + skb_tailroom(defrag_skb),
+- DMA_FROM_DEVICE);
++ DMA_TO_DEVICE);
+ return ret;
+ }
+
+diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
+index 8bba5234f81fc..bb8744ccfa00c 100644
+--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
++++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
+@@ -895,7 +895,7 @@ int ath11k_dp_tx_htt_h2t_ppdu_stats_req(struct ath11k *ar, u32 mask)
+ cmd->msg = FIELD_PREP(HTT_PPDU_STATS_CFG_MSG_TYPE,
+ HTT_H2T_MSG_TYPE_PPDU_STATS_CFG);
+
+- pdev_mask = 1 << (i + 1);
++ pdev_mask = 1 << (ar->pdev_idx + i);
+ cmd->msg |= FIELD_PREP(HTT_PPDU_STATS_CFG_PDEV_ID, pdev_mask);
+ cmd->msg |= FIELD_PREP(HTT_PPDU_STATS_CFG_TLV_TYPE_BITMASK, mask);
+
+diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
+index eaa0edca55761..5dbf5596c9e8e 100644
+--- a/drivers/net/wireless/ath/ath11k/hal.c
++++ b/drivers/net/wireless/ath/ath11k/hal.c
+@@ -947,6 +947,7 @@ int ath11k_hal_srng_setup(struct ath11k_base *ab, enum hal_ring_type type,
+ srng->msi_data = params->msi_data;
+ srng->initialized = 1;
+ spin_lock_init(&srng->lock);
++ lockdep_set_class(&srng->lock, hal->srng_key + ring_id);
+
+ for (i = 0; i < HAL_SRNG_NUM_REG_GRP; i++) {
+ srng->hwreg_base[i] = srng_config->reg_start[i] +
+@@ -1233,6 +1234,24 @@ static int ath11k_hal_srng_create_config(struct ath11k_base *ab)
+ return 0;
+ }
+
++static void ath11k_hal_register_srng_key(struct ath11k_base *ab)
++{
++ struct ath11k_hal *hal = &ab->hal;
++ u32 ring_id;
++
++ for (ring_id = 0; ring_id < HAL_SRNG_RING_ID_MAX; ring_id++)
++ lockdep_register_key(hal->srng_key + ring_id);
++}
++
++static void ath11k_hal_unregister_srng_key(struct ath11k_base *ab)
++{
++ struct ath11k_hal *hal = &ab->hal;
++ u32 ring_id;
++
++ for (ring_id = 0; ring_id < HAL_SRNG_RING_ID_MAX; ring_id++)
++ lockdep_unregister_key(hal->srng_key + ring_id);
++}
++
+ int ath11k_hal_srng_init(struct ath11k_base *ab)
+ {
+ struct ath11k_hal *hal = &ab->hal;
+@@ -1252,6 +1271,8 @@ int ath11k_hal_srng_init(struct ath11k_base *ab)
+ if (ret)
+ goto err_free_cont_rdp;
+
++ ath11k_hal_register_srng_key(ab);
++
+ return 0;
+
+ err_free_cont_rdp:
+@@ -1266,6 +1287,7 @@ void ath11k_hal_srng_deinit(struct ath11k_base *ab)
+ {
+ struct ath11k_hal *hal = &ab->hal;
+
++ ath11k_hal_unregister_srng_key(ab);
+ ath11k_hal_free_cont_rdp(ab);
+ ath11k_hal_free_cont_wrp(ab);
+ kfree(hal->srng_config);
+diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
+index 35ed3a14e200a..7fdcd8bbf7e98 100644
+--- a/drivers/net/wireless/ath/ath11k/hal.h
++++ b/drivers/net/wireless/ath/ath11k/hal.h
+@@ -901,6 +901,8 @@ struct ath11k_hal {
+ /* shadow register configuration */
+ u32 shadow_reg_addr[HAL_SHADOW_NUM_REGS];
+ int num_shadow_reg_configured;
++
++ struct lock_class_key srng_key[HAL_SRNG_RING_ID_MAX];
+ };
+
+ u32 ath11k_hal_reo_qdesc_size(u32 ba_window_size, u8 tid);
+diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
+index d9596903b0a58..3e92cc7cfe4c9 100644
+--- a/drivers/net/wireless/ath/ath11k/hw.c
++++ b/drivers/net/wireless/ath/ath11k/hw.c
+@@ -1015,8 +1015,6 @@ const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_ipq8074 = {
+ const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_qca6390 = {
+ .tx = {
+ ATH11K_TX_RING_MASK_0,
+- ATH11K_TX_RING_MASK_1,
+- ATH11K_TX_RING_MASK_2,
+ },
+ .rx_mon_status = {
+ 0, 0, 0, 0,
+diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
+index 62f5978b30055..4fe051625edfb 100644
+--- a/drivers/net/wireless/ath/ath11k/hw.h
++++ b/drivers/net/wireless/ath/ath11k/hw.h
+@@ -163,6 +163,7 @@ struct ath11k_hw_params {
+ bool supports_suspend;
+ u32 hal_desc_sz;
+ bool fix_l1ss;
++ bool wakeup_mhi;
+ };
+
+ struct ath11k_hw_ops {
+diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
+index e9b3689331ec2..ae6e14fe03c72 100644
+--- a/drivers/net/wireless/ath/ath11k/mac.c
++++ b/drivers/net/wireless/ath/ath11k/mac.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: BSD-3-Clause-Clear
+ /*
+ * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
++ * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+ #include <net/mac80211.h>
+@@ -767,11 +768,15 @@ static int ath11k_mac_setup_bcn_tmpl(struct ath11k_vif *arvif)
+
+ if (cfg80211_find_ie(WLAN_EID_RSN, ies, (skb_tail_pointer(bcn) - ies)))
+ arvif->rsnie_present = true;
++ else
++ arvif->rsnie_present = false;
+
+ if (cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT,
+ WLAN_OUI_TYPE_MICROSOFT_WPA,
+ ies, (skb_tail_pointer(bcn) - ies)))
+ arvif->wpaie_present = true;
++ else
++ arvif->wpaie_present = false;
+
+ ret = ath11k_wmi_bcn_tmpl(ar, arvif->vdev_id, &offs, bcn);
+
+@@ -2151,6 +2156,19 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
+ if (ret)
+ ath11k_warn(ar->ab, "failed to update bcn template: %d\n",
+ ret);
++ if (vif->bss_conf.he_support) {
++ ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
++ WMI_VDEV_PARAM_BA_MODE,
++ WMI_BA_MODE_BUFFER_SIZE_256);
++ if (ret)
++ ath11k_warn(ar->ab,
++ "failed to set BA BUFFER SIZE 256 for vdev: %d\n",
++ arvif->vdev_id);
++ else
++ ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
++ "Set BA BUFFER SIZE 256 for VDEV: %d\n",
++ arvif->vdev_id);
++ }
+ }
+
+ if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) {
+@@ -2186,14 +2204,6 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw,
+
+ if (arvif->is_up && vif->bss_conf.he_support &&
+ vif->bss_conf.he_oper.params) {
+- ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
+- WMI_VDEV_PARAM_BA_MODE,
+- WMI_BA_MODE_BUFFER_SIZE_256);
+- if (ret)
+- ath11k_warn(ar->ab,
+- "failed to set BA BUFFER SIZE 256 for vdev: %d\n",
+- arvif->vdev_id);
+-
+ param_id = WMI_VDEV_PARAM_HEOPS_0_31;
+ param_value = vif->bss_conf.he_oper.params;
+ ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
+@@ -2576,9 +2586,12 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw,
+ arg.scan_id = ATH11K_SCAN_ID;
+
+ if (req->ie_len) {
++ arg.extraie.ptr = kmemdup(req->ie, req->ie_len, GFP_KERNEL);
++ if (!arg.extraie.ptr) {
++ ret = -ENOMEM;
++ goto exit;
++ }
+ arg.extraie.len = req->ie_len;
+- arg.extraie.ptr = kzalloc(req->ie_len, GFP_KERNEL);
+- memcpy(arg.extraie.ptr, req->ie, req->ie_len);
+ }
+
+ if (req->n_ssids) {
+@@ -2655,9 +2668,7 @@ static int ath11k_install_key(struct ath11k_vif *arvif,
+ return 0;
+
+ if (cmd == DISABLE_KEY) {
+- /* TODO: Check if FW expects value other than NONE for del */
+- /* arg.key_cipher = WMI_CIPHER_NONE; */
+- arg.key_len = 0;
++ arg.key_cipher = WMI_CIPHER_NONE;
+ arg.key_data = NULL;
+ goto install;
+ }
+@@ -2789,7 +2800,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
+ /* flush the fragments cache during key (re)install to
+ * ensure all frags in the new frag list belong to the same key.
+ */
+- if (peer && cmd == SET_KEY)
++ if (peer && sta && cmd == SET_KEY)
+ ath11k_peer_frags_flush(ar, peer);
+ spin_unlock_bh(&ab->base_lock);
+
+@@ -3673,6 +3684,8 @@ static int ath11k_mac_set_txbf_conf(struct ath11k_vif *arvif)
+ if (vht_cap & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE)) {
+ nsts = vht_cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
+ nsts >>= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT;
++ if (nsts > (ar->num_rx_chains - 1))
++ nsts = ar->num_rx_chains - 1;
+ value |= SM(nsts, WMI_TXBF_STS_CAP_OFFSET);
+ }
+
+@@ -3713,7 +3726,7 @@ static int ath11k_mac_set_txbf_conf(struct ath11k_vif *arvif)
+ static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap)
+ {
+ bool subfer, subfee;
+- int sound_dim = 0;
++ int sound_dim = 0, nsts = 0;
+
+ subfer = !!(*vht_cap & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE));
+ subfee = !!(*vht_cap & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE));
+@@ -3723,6 +3736,11 @@ static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap)
+ subfer = false;
+ }
+
++ if (ar->num_rx_chains < 2) {
++ *vht_cap &= ~(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE);
++ subfee = false;
++ }
++
+ /* If SU Beaformer is not set, then disable MU Beamformer Capability */
+ if (!subfer)
+ *vht_cap &= ~(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE);
+@@ -3735,7 +3753,9 @@ static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap)
+ sound_dim >>= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT;
+ *vht_cap &= ~IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK;
+
+- /* TODO: Need to check invalid STS and Sound_dim values set by FW? */
++ nsts = (*vht_cap & IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK);
++ nsts >>= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT;
++ *vht_cap &= ~IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
+
+ /* Enable Sounding Dimension Field only if SU BF is enabled */
+ if (subfer) {
+@@ -3747,9 +3767,15 @@ static void ath11k_set_vht_txbf_cap(struct ath11k *ar, u32 *vht_cap)
+ *vht_cap |= sound_dim;
+ }
+
+- /* Use the STS advertised by FW unless SU Beamformee is not supported*/
+- if (!subfee)
+- *vht_cap &= ~(IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK);
++ /* Enable Beamformee STS Field only if SU BF is enabled */
++ if (subfee) {
++ if (nsts > (ar->num_rx_chains - 1))
++ nsts = ar->num_rx_chains - 1;
++
++ nsts <<= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT;
++ nsts &= IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
++ *vht_cap |= nsts;
++ }
+ }
+
+ static struct ieee80211_sta_vht_cap
+@@ -4131,23 +4157,32 @@ static int __ath11k_set_antenna(struct ath11k *ar, u32 tx_ant, u32 rx_ant)
+ return 0;
+ }
+
+-int ath11k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx)
++static void ath11k_mac_tx_mgmt_free(struct ath11k *ar, int buf_id)
+ {
+- struct sk_buff *msdu = skb;
++ struct sk_buff *msdu;
+ struct ieee80211_tx_info *info;
+- struct ath11k *ar = ctx;
+- struct ath11k_base *ab = ar->ab;
+
+ spin_lock_bh(&ar->txmgmt_idr_lock);
+- idr_remove(&ar->txmgmt_idr, buf_id);
++ msdu = idr_remove(&ar->txmgmt_idr, buf_id);
+ spin_unlock_bh(&ar->txmgmt_idr_lock);
+- dma_unmap_single(ab->dev, ATH11K_SKB_CB(msdu)->paddr, msdu->len,
++
++ if (!msdu)
++ return;
++
++ dma_unmap_single(ar->ab->dev, ATH11K_SKB_CB(msdu)->paddr, msdu->len,
+ DMA_TO_DEVICE);
+
+ info = IEEE80211_SKB_CB(msdu);
+ memset(&info->status, 0, sizeof(info->status));
+
+ ieee80211_free_txskb(ar->hw, msdu);
++}
++
++int ath11k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx)
++{
++ struct ath11k *ar = ctx;
++
++ ath11k_mac_tx_mgmt_free(ar, buf_id);
+
+ return 0;
+ }
+@@ -4156,17 +4191,10 @@ static int ath11k_mac_vif_txmgmt_idr_remove(int buf_id, void *skb, void *ctx)
+ {
+ struct ieee80211_vif *vif = ctx;
+ struct ath11k_skb_cb *skb_cb = ATH11K_SKB_CB((struct sk_buff *)skb);
+- struct sk_buff *msdu = skb;
+ struct ath11k *ar = skb_cb->ar;
+- struct ath11k_base *ab = ar->ab;
+
+- if (skb_cb->vif == vif) {
+- spin_lock_bh(&ar->txmgmt_idr_lock);
+- idr_remove(&ar->txmgmt_idr, buf_id);
+- spin_unlock_bh(&ar->txmgmt_idr_lock);
+- dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len,
+- DMA_TO_DEVICE);
+- }
++ if (skb_cb->vif == vif)
++ ath11k_mac_tx_mgmt_free(ar, buf_id);
+
+ return 0;
+ }
+@@ -4181,6 +4209,8 @@ static int ath11k_mac_mgmt_tx_wmi(struct ath11k *ar, struct ath11k_vif *arvif,
+ int buf_id;
+ int ret;
+
++ ATH11K_SKB_CB(skb)->ar = ar;
++
+ spin_lock_bh(&ar->txmgmt_idr_lock);
+ buf_id = idr_alloc(&ar->txmgmt_idr, skb, 0,
+ ATH11K_TX_MGMT_NUM_PENDING_MAX, GFP_ATOMIC);
+@@ -4251,8 +4281,8 @@ static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work)
+ }
+
+ arvif = ath11k_vif_to_arvif(skb_cb->vif);
+- if (ar->allocated_vdev_map & (1LL << arvif->vdev_id) &&
+- arvif->is_started) {
++ mutex_lock(&ar->conf_mutex);
++ if (ar->allocated_vdev_map & (1LL << arvif->vdev_id)) {
+ ret = ath11k_mac_mgmt_tx_wmi(ar, arvif, skb);
+ if (ret) {
+ ath11k_warn(ar->ab, "failed to tx mgmt frame, vdev_id %d :%d\n",
+@@ -4268,6 +4298,7 @@ static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work)
+ arvif->is_started);
+ ieee80211_free_txskb(ar->hw, skb);
+ }
++ mutex_unlock(&ar->conf_mutex);
+ }
+ }
+
+@@ -5577,6 +5608,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
+ struct ath11k *ar = hw->priv;
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_vif *arvif = (void *)vif->drv_priv;
++ struct ath11k_peer *peer;
+ int ret;
+
+ mutex_lock(&ar->conf_mutex);
+@@ -5588,9 +5620,13 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
+ WARN_ON(!arvif->is_started);
+
+ if (ab->hw_params.vdev_start_delay &&
+- arvif->vdev_type == WMI_VDEV_TYPE_MONITOR &&
+- ath11k_peer_find_by_addr(ab, ar->mac_addr))
+- ath11k_peer_delete(ar, arvif->vdev_id, ar->mac_addr);
++ arvif->vdev_type == WMI_VDEV_TYPE_MONITOR) {
++ spin_lock_bh(&ab->base_lock);
++ peer = ath11k_peer_find_by_addr(ab, ar->mac_addr);
++ spin_unlock_bh(&ab->base_lock);
++ if (peer)
++ ath11k_peer_delete(ar, arvif->vdev_id, ar->mac_addr);
++ }
+
+ ret = ath11k_mac_vdev_stop(arvif);
+ if (ret)
+@@ -6545,7 +6581,7 @@ static int __ath11k_mac_register(struct ath11k *ar)
+ ar->hw->queues = ATH11K_HW_MAX_QUEUES;
+ ar->hw->wiphy->tx_queue_len = ATH11K_QUEUE_LEN;
+ ar->hw->offchannel_tx_hw_queue = ATH11K_HW_MAX_QUEUES - 1;
+- ar->hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
++ ar->hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
+
+ ar->hw->vif_data_size = sizeof(struct ath11k_vif);
+ ar->hw->sta_data_size = sizeof(struct ath11k_sta);
+@@ -6590,7 +6626,7 @@ static int __ath11k_mac_register(struct ath11k *ar)
+ ar->hw->wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MONITOR);
+
+ /* Apply the regd received during initialization */
+- ret = ath11k_regd_update(ar, true);
++ ret = ath11k_regd_update(ar);
+ if (ret) {
+ ath11k_err(ar->ab, "ath11k regd update failed: %d\n", ret);
+ goto err_unregister_hw;
+diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
+index 26c7ae242db67..f2149241fb131 100644
+--- a/drivers/net/wireless/ath/ath11k/mhi.c
++++ b/drivers/net/wireless/ath/ath11k/mhi.c
+@@ -519,7 +519,7 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci,
+ ret = 0;
+ break;
+ case ATH11K_MHI_POWER_ON:
+- ret = mhi_async_power_up(ab_pci->mhi_ctrl);
++ ret = mhi_sync_power_up(ab_pci->mhi_ctrl);
+ break;
+ case ATH11K_MHI_POWER_OFF:
+ mhi_power_down(ab_pci->mhi_ctrl, true);
+@@ -533,7 +533,11 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci,
+ ret = mhi_pm_suspend(ab_pci->mhi_ctrl);
+ break;
+ case ATH11K_MHI_RESUME:
+- ret = mhi_pm_resume(ab_pci->mhi_ctrl);
++ /* Do force MHI resume as some devices like QCA6390, WCN6855
++ * are not in M3 state but they are functional. So just ignore
++ * the MHI state while resuming.
++ */
++ ret = mhi_pm_resume_force(ab_pci->mhi_ctrl);
+ break;
+ case ATH11K_MHI_TRIGGER_RDDM:
+ ret = mhi_force_rddm_mode(ab_pci->mhi_ctrl);
+diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
+index 5abb38cc3b55f..b8e4c74254642 100644
+--- a/drivers/net/wireless/ath/ath11k/pci.c
++++ b/drivers/net/wireless/ath/ath11k/pci.c
+@@ -182,7 +182,8 @@ void ath11k_pci_write32(struct ath11k_base *ab, u32 offset, u32 value)
+ /* for offset beyond BAR + 4K - 32, may
+ * need to wakeup MHI to access.
+ */
+- if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
++ if (ab->hw_params.wakeup_mhi &&
++ test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
+ offset >= ACCESS_ALWAYS_OFF)
+ mhi_device_get_sync(ab_pci->mhi_ctrl->mhi_dev);
+
+@@ -206,7 +207,8 @@ void ath11k_pci_write32(struct ath11k_base *ab, u32 offset, u32 value)
+ }
+ }
+
+- if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
++ if (ab->hw_params.wakeup_mhi &&
++ test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
+ offset >= ACCESS_ALWAYS_OFF)
+ mhi_device_put(ab_pci->mhi_ctrl->mhi_dev);
+ }
+@@ -219,7 +221,8 @@ u32 ath11k_pci_read32(struct ath11k_base *ab, u32 offset)
+ /* for offset beyond BAR + 4K - 32, may
+ * need to wakeup MHI to access.
+ */
+- if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
++ if (ab->hw_params.wakeup_mhi &&
++ test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
+ offset >= ACCESS_ALWAYS_OFF)
+ mhi_device_get_sync(ab_pci->mhi_ctrl->mhi_dev);
+
+@@ -243,7 +246,8 @@ u32 ath11k_pci_read32(struct ath11k_base *ab, u32 offset)
+ }
+ }
+
+- if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
++ if (ab->hw_params.wakeup_mhi &&
++ test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
+ offset >= ACCESS_ALWAYS_OFF)
+ mhi_device_put(ab_pci->mhi_ctrl->mhi_dev);
+
+@@ -632,8 +636,11 @@ static void __ath11k_pci_ext_irq_disable(struct ath11k_base *sc)
+
+ ath11k_pci_ext_grp_disable(irq_grp);
+
+- napi_synchronize(&irq_grp->napi);
+- napi_disable(&irq_grp->napi);
++ if (irq_grp->napi_enabled) {
++ napi_synchronize(&irq_grp->napi);
++ napi_disable(&irq_grp->napi);
++ irq_grp->napi_enabled = false;
++ }
+ }
+ }
+
+@@ -652,7 +659,10 @@ static void ath11k_pci_ext_irq_enable(struct ath11k_base *ab)
+ for (i = 0; i < ATH11K_EXT_IRQ_GRP_NUM_MAX; i++) {
+ struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i];
+
+- napi_enable(&irq_grp->napi);
++ if (!irq_grp->napi_enabled) {
++ napi_enable(&irq_grp->napi);
++ irq_grp->napi_enabled = true;
++ }
+ ath11k_pci_ext_grp_enable(irq_grp);
+ }
+ }
+@@ -990,8 +1000,8 @@ static void ath11k_pci_aspm_disable(struct ath11k_pci *ab_pci)
+ u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1));
+
+ /* disable L0s and L1 */
+- pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL,
+- ab_pci->link_ctl & ~PCI_EXP_LNKCTL_ASPMC);
++ pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_ASPMC);
+
+ set_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags);
+ }
+@@ -999,8 +1009,10 @@ static void ath11k_pci_aspm_disable(struct ath11k_pci *ab_pci)
+ static void ath11k_pci_aspm_restore(struct ath11k_pci *ab_pci)
+ {
+ if (test_and_clear_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags))
+- pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL,
+- ab_pci->link_ctl);
++ pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_ASPMC,
++ ab_pci->link_ctl &
++ PCI_EXP_LNKCTL_ASPMC);
+ }
+
+ static int ath11k_pci_power_up(struct ath11k_base *ab)
+@@ -1218,6 +1230,15 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+ goto err_free_core;
+ }
+
++ ath11k_dbg(ab, ATH11K_DBG_BOOT, "pci probe %04x:%04x %04x:%04x\n",
++ pdev->vendor, pdev->device,
++ pdev->subsystem_vendor, pdev->subsystem_device);
++
++ ab->id.vendor = pdev->vendor;
++ ab->id.device = pdev->device;
++ ab->id.subsystem_vendor = pdev->subsystem_vendor;
++ ab->id.subsystem_device = pdev->subsystem_device;
++
+ switch (pci_dev->device) {
+ case QCA6390_DEVICE_ID:
+ ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
+@@ -1240,6 +1261,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
+ ab->hw_rev = ATH11K_HW_QCN9074_HW10;
+ break;
+ case WCN6855_DEVICE_ID:
++ ab->id.bdf_search = ATH11K_BDF_SEARCH_BUS_AND_BOARD;
+ ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
+ &soc_hw_version_minor);
+ switch (soc_hw_version_major) {
+@@ -1366,11 +1388,16 @@ static __maybe_unused int ath11k_pci_pm_suspend(struct device *dev)
+ struct ath11k_base *ab = dev_get_drvdata(dev);
+ int ret;
+
++ if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) {
++ ath11k_dbg(ab, ATH11K_DBG_BOOT, "boot skipping pci suspend as qmi is not initialised\n");
++ return 0;
++ }
++
+ ret = ath11k_core_suspend(ab);
+ if (ret)
+ ath11k_warn(ab, "failed to suspend core: %d\n", ret);
+
+- return ret;
++ return 0;
+ }
+
+ static __maybe_unused int ath11k_pci_pm_resume(struct device *dev)
+@@ -1378,6 +1405,11 @@ static __maybe_unused int ath11k_pci_pm_resume(struct device *dev)
+ struct ath11k_base *ab = dev_get_drvdata(dev);
+ int ret;
+
++ if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) {
++ ath11k_dbg(ab, ATH11K_DBG_BOOT, "boot skipping pci resume as qmi is not initialised\n");
++ return 0;
++ }
++
+ ret = ath11k_core_resume(ab);
+ if (ret)
+ ath11k_warn(ab, "failed to resume core: %d\n", ret);
+diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
+index b5e34d670715e..e4a65513a1bfd 100644
+--- a/drivers/net/wireless/ath/ath11k/qmi.c
++++ b/drivers/net/wireless/ath/ath11k/qmi.c
+@@ -1770,7 +1770,7 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab)
+ chunk->vaddr = dma_alloc_coherent(ab->dev,
+ chunk->size,
+ &chunk->paddr,
+- GFP_KERNEL);
++ GFP_KERNEL | __GFP_NOWARN);
+ if (!chunk->vaddr) {
+ if (ab->qmi.mem_seg_count <= ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT) {
+ ath11k_dbg(ab, ATH11K_DBG_QMI,
+@@ -2707,8 +2707,10 @@ static void ath11k_qmi_driver_event_work(struct work_struct *work)
+ list_del(&event->list);
+ spin_unlock(&qmi->event_lock);
+
+- if (test_bit(ATH11K_FLAG_UNREGISTERING, &ab->dev_flags))
++ if (test_bit(ATH11K_FLAG_UNREGISTERING, &ab->dev_flags)) {
++ kfree(event);
+ return;
++ }
+
+ switch (event->type) {
+ case ATH11K_QMI_EVENT_SERVER_ARRIVE:
+diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h
+index 3d59303307032..25940b683ea45 100644
+--- a/drivers/net/wireless/ath/ath11k/qmi.h
++++ b/drivers/net/wireless/ath/ath11k/qmi.h
+@@ -27,7 +27,7 @@
+ #define ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01 52
+ #define ATH11K_QMI_CALDB_SIZE 0x480000
+ #define ATH11K_QMI_BDF_EXT_STR_LENGTH 0x20
+-#define ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT 3
++#define ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT 5
+
+ #define QMI_WLFW_REQUEST_MEM_IND_V01 0x0035
+ #define QMI_WLFW_FW_MEM_READY_IND_V01 0x0037
+diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c
+index e1a1df169034b..562ecfd50742f 100644
+--- a/drivers/net/wireless/ath/ath11k/reg.c
++++ b/drivers/net/wireless/ath/ath11k/reg.c
+@@ -198,7 +198,7 @@ static void ath11k_copy_regd(struct ieee80211_regdomain *regd_orig,
+ sizeof(struct ieee80211_reg_rule));
+ }
+
+-int ath11k_regd_update(struct ath11k *ar, bool init)
++int ath11k_regd_update(struct ath11k *ar)
+ {
+ struct ieee80211_regdomain *regd, *regd_copy = NULL;
+ int ret, regd_len, pdev_id;
+@@ -209,7 +209,10 @@ int ath11k_regd_update(struct ath11k *ar, bool init)
+
+ spin_lock_bh(&ab->base_lock);
+
+- if (init) {
++ /* Prefer the latest regd update over default if it's available */
++ if (ab->new_regd[pdev_id]) {
++ regd = ab->new_regd[pdev_id];
++ } else {
+ /* Apply the regd received during init through
+ * WMI_REG_CHAN_LIST_CC event. In case of failure to
+ * receive the regd, initialize with a default world
+@@ -222,8 +225,6 @@ int ath11k_regd_update(struct ath11k *ar, bool init)
+ "failed to receive default regd during init\n");
+ regd = (struct ieee80211_regdomain *)&ath11k_world_regd;
+ }
+- } else {
+- regd = ab->new_regd[pdev_id];
+ }
+
+ if (!regd) {
+@@ -246,11 +247,7 @@ int ath11k_regd_update(struct ath11k *ar, bool init)
+ goto err;
+ }
+
+- rtnl_lock();
+- wiphy_lock(ar->hw->wiphy);
+- ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy);
+- wiphy_unlock(ar->hw->wiphy);
+- rtnl_unlock();
++ ret = regulatory_set_wiphy_regd(ar->hw->wiphy, regd_copy);
+
+ kfree(regd_copy);
+
+@@ -458,6 +455,9 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw)
+ {
+ u16 bw;
+
++ if (end_freq <= start_freq)
++ return 0;
++
+ bw = end_freq - start_freq;
+ bw = min_t(u16, bw, max_bw);
+
+@@ -465,8 +465,10 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw)
+ bw = 80;
+ else if (bw >= 40 && bw < 80)
+ bw = 40;
+- else if (bw < 40)
++ else if (bw >= 20 && bw < 40)
+ bw = 20;
++ else
++ bw = 0;
+
+ return bw;
+ }
+@@ -490,73 +492,77 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab,
+ struct cur_reg_rule *reg_rule,
+ u8 *rule_idx, u32 flags, u16 max_bw)
+ {
++ u32 start_freq;
+ u32 end_freq;
+ u16 bw;
+ u8 i;
+
+ i = *rule_idx;
+
++ /* there might be situations when even the input rule must be dropped */
++ i--;
++
++ /* frequencies below weather radar */
+ bw = ath11k_reg_adjust_bw(reg_rule->start_freq,
+ ETSI_WEATHER_RADAR_BAND_LOW, max_bw);
++ if (bw > 0) {
++ i++;
+
+- ath11k_reg_update_rule(regd->reg_rules + i, reg_rule->start_freq,
+- ETSI_WEATHER_RADAR_BAND_LOW, bw,
+- reg_rule->ant_gain, reg_rule->reg_power,
+- flags);
++ ath11k_reg_update_rule(regd->reg_rules + i,
++ reg_rule->start_freq,
++ ETSI_WEATHER_RADAR_BAND_LOW, bw,
++ reg_rule->ant_gain, reg_rule->reg_power,
++ flags);
+
+- ath11k_dbg(ab, ATH11K_DBG_REG,
+- "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
+- i + 1, reg_rule->start_freq, ETSI_WEATHER_RADAR_BAND_LOW,
+- bw, reg_rule->ant_gain, reg_rule->reg_power,
+- regd->reg_rules[i].dfs_cac_ms,
+- flags);
+-
+- if (reg_rule->end_freq > ETSI_WEATHER_RADAR_BAND_HIGH)
+- end_freq = ETSI_WEATHER_RADAR_BAND_HIGH;
+- else
+- end_freq = reg_rule->end_freq;
++ ath11k_dbg(ab, ATH11K_DBG_REG,
++ "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
++ i + 1, reg_rule->start_freq,
++ ETSI_WEATHER_RADAR_BAND_LOW, bw, reg_rule->ant_gain,
++ reg_rule->reg_power, regd->reg_rules[i].dfs_cac_ms,
++ flags);
++ }
+
+- bw = ath11k_reg_adjust_bw(ETSI_WEATHER_RADAR_BAND_LOW, end_freq,
+- max_bw);
++ /* weather radar frequencies */
++ start_freq = max_t(u32, reg_rule->start_freq,
++ ETSI_WEATHER_RADAR_BAND_LOW);
++ end_freq = min_t(u32, reg_rule->end_freq, ETSI_WEATHER_RADAR_BAND_HIGH);
+
+- i++;
++ bw = ath11k_reg_adjust_bw(start_freq, end_freq, max_bw);
++ if (bw > 0) {
++ i++;
+
+- ath11k_reg_update_rule(regd->reg_rules + i,
+- ETSI_WEATHER_RADAR_BAND_LOW, end_freq, bw,
+- reg_rule->ant_gain, reg_rule->reg_power,
+- flags);
++ ath11k_reg_update_rule(regd->reg_rules + i, start_freq,
++ end_freq, bw, reg_rule->ant_gain,
++ reg_rule->reg_power, flags);
+
+- regd->reg_rules[i].dfs_cac_ms = ETSI_WEATHER_RADAR_BAND_CAC_TIMEOUT;
++ regd->reg_rules[i].dfs_cac_ms = ETSI_WEATHER_RADAR_BAND_CAC_TIMEOUT;
+
+- ath11k_dbg(ab, ATH11K_DBG_REG,
+- "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
+- i + 1, ETSI_WEATHER_RADAR_BAND_LOW, end_freq,
+- bw, reg_rule->ant_gain, reg_rule->reg_power,
+- regd->reg_rules[i].dfs_cac_ms,
+- flags);
+-
+- if (end_freq == reg_rule->end_freq) {
+- regd->n_reg_rules--;
+- *rule_idx = i;
+- return;
++ ath11k_dbg(ab, ATH11K_DBG_REG,
++ "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
++ i + 1, start_freq, end_freq, bw,
++ reg_rule->ant_gain, reg_rule->reg_power,
++ regd->reg_rules[i].dfs_cac_ms, flags);
+ }
+
++ /* frequencies above weather radar */
+ bw = ath11k_reg_adjust_bw(ETSI_WEATHER_RADAR_BAND_HIGH,
+ reg_rule->end_freq, max_bw);
++ if (bw > 0) {
++ i++;
+
+- i++;
+-
+- ath11k_reg_update_rule(regd->reg_rules + i, ETSI_WEATHER_RADAR_BAND_HIGH,
+- reg_rule->end_freq, bw,
+- reg_rule->ant_gain, reg_rule->reg_power,
+- flags);
++ ath11k_reg_update_rule(regd->reg_rules + i,
++ ETSI_WEATHER_RADAR_BAND_HIGH,
++ reg_rule->end_freq, bw,
++ reg_rule->ant_gain, reg_rule->reg_power,
++ flags);
+
+- ath11k_dbg(ab, ATH11K_DBG_REG,
+- "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
+- i + 1, ETSI_WEATHER_RADAR_BAND_HIGH, reg_rule->end_freq,
+- bw, reg_rule->ant_gain, reg_rule->reg_power,
+- regd->reg_rules[i].dfs_cac_ms,
+- flags);
++ ath11k_dbg(ab, ATH11K_DBG_REG,
++ "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n",
++ i + 1, ETSI_WEATHER_RADAR_BAND_HIGH,
++ reg_rule->end_freq, bw, reg_rule->ant_gain,
++ reg_rule->reg_power, regd->reg_rules[i].dfs_cac_ms,
++ flags);
++ }
+
+ *rule_idx = i;
+ }
+@@ -683,7 +689,7 @@ void ath11k_regd_update_work(struct work_struct *work)
+ regd_update_work);
+ int ret;
+
+- ret = ath11k_regd_update(ar, false);
++ ret = ath11k_regd_update(ar);
+ if (ret) {
+ /* Firmware has already moved to the new regd. We need
+ * to maintain channel consistency across FW, Host driver
+diff --git a/drivers/net/wireless/ath/ath11k/reg.h b/drivers/net/wireless/ath/ath11k/reg.h
+index 65d56d44796f6..5fb9dc03a74e8 100644
+--- a/drivers/net/wireless/ath/ath11k/reg.h
++++ b/drivers/net/wireless/ath/ath11k/reg.h
+@@ -31,6 +31,6 @@ void ath11k_regd_update_work(struct work_struct *work);
+ struct ieee80211_regdomain *
+ ath11k_reg_build_regd(struct ath11k_base *ab,
+ struct cur_regulatory_info *reg_info, bool intersect);
+-int ath11k_regd_update(struct ath11k *ar, bool init);
++int ath11k_regd_update(struct ath11k *ar);
+ int ath11k_reg_update_chan_list(struct ath11k *ar);
+ #endif
+diff --git a/drivers/net/wireless/ath/ath11k/spectral.c b/drivers/net/wireless/ath/ath11k/spectral.c
+index 1afe677596594..e5af9358e6105 100644
+--- a/drivers/net/wireless/ath/ath11k/spectral.c
++++ b/drivers/net/wireless/ath/ath11k/spectral.c
+@@ -214,7 +214,10 @@ static int ath11k_spectral_scan_config(struct ath11k *ar,
+ return -ENODEV;
+
+ arvif->spectral_enabled = (mode != ATH11K_SPECTRAL_DISABLED);
++
++ spin_lock_bh(&ar->spectral.lock);
+ ar->spectral.mode = mode;
++ spin_unlock_bh(&ar->spectral.lock);
+
+ ret = ath11k_wmi_vdev_spectral_enable(ar, arvif->vdev_id,
+ ATH11K_WMI_SPECTRAL_TRIGGER_CMD_CLEAR,
+@@ -829,9 +832,6 @@ static inline void ath11k_spectral_ring_free(struct ath11k *ar)
+ {
+ struct ath11k_spectral *sp = &ar->spectral;
+
+- if (!sp->enabled)
+- return;
+-
+ ath11k_dbring_srng_cleanup(ar, &sp->rx_ring);
+ ath11k_dbring_buf_cleanup(ar, &sp->rx_ring);
+ }
+@@ -883,15 +883,16 @@ void ath11k_spectral_deinit(struct ath11k_base *ab)
+ if (!sp->enabled)
+ continue;
+
+- ath11k_spectral_debug_unregister(ar);
+- ath11k_spectral_ring_free(ar);
++ mutex_lock(&ar->conf_mutex);
++ ath11k_spectral_scan_config(ar, ATH11K_SPECTRAL_DISABLED);
++ mutex_unlock(&ar->conf_mutex);
+
+ spin_lock_bh(&sp->lock);
+-
+- sp->mode = ATH11K_SPECTRAL_DISABLED;
+ sp->enabled = false;
+-
+ spin_unlock_bh(&sp->lock);
++
++ ath11k_spectral_debug_unregister(ar);
++ ath11k_spectral_ring_free(ar);
+ }
+ }
+
+diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
+index 6c253eae9d069..b11070cf159cc 100644
+--- a/drivers/net/wireless/ath/ath11k/wmi.c
++++ b/drivers/net/wireless/ath/ath11k/wmi.c
+@@ -1339,6 +1339,7 @@ int ath11k_wmi_pdev_bss_chan_info_request(struct ath11k *ar,
+ WMI_TAG_PDEV_BSS_CHAN_INFO_REQUEST) |
+ FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+ cmd->req_type = type;
++ cmd->pdev_id = ar->pdev->pdev_id;
+
+ ath11k_dbg(ar->ab, ATH11K_DBG_WMI,
+ "WMI bss chan info req type %d\n", type);
+@@ -1670,7 +1671,8 @@ int ath11k_wmi_vdev_install_key(struct ath11k *ar,
+ tlv = (struct wmi_tlv *)(skb->data + sizeof(*cmd));
+ tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_BYTE) |
+ FIELD_PREP(WMI_TLV_LEN, key_len_aligned);
+- memcpy(tlv->value, (u8 *)arg->key_data, key_len_aligned);
++ if (arg->key_data)
++ memcpy(tlv->value, (u8 *)arg->key_data, key_len_aligned);
+
+ ret = ath11k_wmi_cmd_send(wmi, skb, WMI_VDEV_INSTALL_KEY_CMDID);
+ if (ret) {
+@@ -2050,7 +2052,7 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar,
+ void *ptr;
+ int i, ret, len;
+ u32 *tmp_ptr;
+- u8 extraie_len_with_pad = 0;
++ u16 extraie_len_with_pad = 0;
+ struct hint_short_ssid *s_ssid = NULL;
+ struct hint_bssid *hint_bssid = NULL;
+
+@@ -2069,7 +2071,7 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar,
+ len += sizeof(*bssid) * params->num_bssid;
+
+ len += TLV_HDR_SIZE;
+- if (params->extraie.len)
++ if (params->extraie.len && params->extraie.len <= 0xFFFF)
+ extraie_len_with_pad =
+ roundup(params->extraie.len, sizeof(u32));
+ len += extraie_len_with_pad;
+@@ -2176,7 +2178,7 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar,
+ FIELD_PREP(WMI_TLV_LEN, len);
+ ptr += TLV_HDR_SIZE;
+
+- if (params->extraie.len)
++ if (extraie_len_with_pad)
+ memcpy(ptr, params->extraie.ptr,
+ params->extraie.len);
+
+@@ -5792,6 +5794,17 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *sk
+
+ pdev_idx = reg_info->phy_id;
+
++ /* Avoid default reg rule updates sent during FW recovery if
++ * it is already available
++ */
++ spin_lock(&ab->base_lock);
++ if (test_bit(ATH11K_FLAG_RECOVERY, &ab->dev_flags) &&
++ ab->default_regd[pdev_idx]) {
++ spin_unlock(&ab->base_lock);
++ goto mem_free;
++ }
++ spin_unlock(&ab->base_lock);
++
+ if (pdev_idx >= ab->num_radios) {
+ /* Process the event for phy0 only if single_pdev_only
+ * is true. If pdev_idx is valid but not 0, discard the
+@@ -5829,10 +5842,10 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *sk
+ }
+
+ spin_lock(&ab->base_lock);
+- if (test_bit(ATH11K_FLAG_REGISTERED, &ab->dev_flags)) {
+- /* Once mac is registered, ar is valid and all CC events from
+- * fw is considered to be received due to user requests
+- * currently.
++ if (ab->default_regd[pdev_idx]) {
++ /* The initial rules from FW after WMI Init is to build
++ * the default regd. From then on, any rules updated for
++ * the pdev could be due to user reg changes.
+ * Free previously built regd before assigning the newly
+ * generated regd to ar. NULL pointer handling will be
+ * taken care by kfree itself.
+@@ -5840,15 +5853,11 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *sk
+ ar = ab->pdevs[pdev_idx].ar;
+ kfree(ab->new_regd[pdev_idx]);
+ ab->new_regd[pdev_idx] = regd;
+- ieee80211_queue_work(ar->hw, &ar->regd_update_work);
++ queue_work(ab->workqueue, &ar->regd_update_work);
+ } else {
+- /* Multiple events for the same *ar is not expected. But we
+- * can still clear any previously stored default_regd if we
+- * are receiving this event for the same radio by mistake.
+- * NULL pointer handling will be taken care by kfree itself.
++ /* This regd would be applied during mac registration and is
++ * held constant throughout for regd intersection purpose
+ */
+- kfree(ab->default_regd[pdev_idx]);
+- /* This regd would be applied during mac registration */
+ ab->default_regd[pdev_idx] = regd;
+ }
+ ab->dfs_region = reg_info->dfs_region;
+@@ -6119,8 +6128,10 @@ static void ath11k_mgmt_rx_event(struct ath11k_base *ab, struct sk_buff *skb)
+ if (rx_ev.status & WMI_RX_STATUS_ERR_MIC)
+ status->flag |= RX_FLAG_MMIC_ERROR;
+
+- if (rx_ev.chan_freq >= ATH11K_MIN_6G_FREQ) {
++ if (rx_ev.chan_freq >= ATH11K_MIN_6G_FREQ &&
++ rx_ev.chan_freq <= ATH11K_MAX_6G_FREQ) {
+ status->band = NL80211_BAND_6GHZ;
++ status->freq = rx_ev.chan_freq;
+ } else if (rx_ev.channel >= 1 && rx_ev.channel <= 14) {
+ status->band = NL80211_BAND_2GHZ;
+ } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH11K_MAX_5G_CHAN) {
+@@ -6141,8 +6152,10 @@ static void ath11k_mgmt_rx_event(struct ath11k_base *ab, struct sk_buff *skb)
+
+ sband = &ar->mac.sbands[status->band];
+
+- status->freq = ieee80211_channel_to_frequency(rx_ev.channel,
+- status->band);
++ if (status->band != NL80211_BAND_6GHZ)
++ status->freq = ieee80211_channel_to_frequency(rx_ev.channel,
++ status->band);
++
+ status->signal = rx_ev.snr + ATH11K_DEFAULT_NOISE_FLOOR;
+ status->rate_idx = ath11k_mac_bitrate_to_idx(sband, rx_ev.rate / 100);
+
+@@ -6301,6 +6314,8 @@ static void ath11k_scan_event(struct ath11k_base *ab, struct sk_buff *skb)
+ ath11k_wmi_event_scan_start_failed(ar);
+ break;
+ case WMI_SCAN_EVENT_DEQUEUED:
++ __ath11k_mac_scan_finish(ar);
++ break;
+ case WMI_SCAN_EVENT_PREEMPTED:
+ case WMI_SCAN_EVENT_RESTARTED:
+ case WMI_SCAN_EVENT_FOREIGN_CHAN_EXIT:
+diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
+index d35c47e0b19d4..0b7d337b36930 100644
+--- a/drivers/net/wireless/ath/ath11k/wmi.h
++++ b/drivers/net/wireless/ath/ath11k/wmi.h
+@@ -2960,6 +2960,7 @@ struct wmi_pdev_bss_chan_info_req_cmd {
+ u32 tlv_header;
+ /* ref wmi_bss_chan_info_req_type */
+ u32 req_type;
++ u32 pdev_id;
+ } __packed;
+
+ struct wmi_ap_ps_peer_cmd {
+@@ -4056,7 +4057,6 @@ struct wmi_vdev_stopped_event {
+ } __packed;
+
+ struct wmi_pdev_bss_chan_info_event {
+- u32 pdev_id;
+ u32 freq; /* Units in MHz */
+ u32 noise_floor; /* units are dBm */
+ /* rx clear - how often the channel was unused */
+@@ -4074,6 +4074,7 @@ struct wmi_pdev_bss_chan_info_event {
+ /*rx_cycle cnt for my bss in 64bits format */
+ u32 rx_bss_cycle_count_low;
+ u32 rx_bss_cycle_count_high;
++ u32 pdev_id;
+ } __packed;
+
+ #define WMI_VDEV_INSTALL_KEY_COMPL_STATUS_SUCCESS 0
+diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c
+index 1fbc2c19848f2..58d3e86f6256d 100644
+--- a/drivers/net/wireless/ath/ath5k/eeprom.c
++++ b/drivers/net/wireless/ath/ath5k/eeprom.c
+@@ -529,7 +529,7 @@ ath5k_eeprom_read_freq_list(struct ath5k_hw *ah, int *offset, int max,
+ ee->ee_n_piers[mode]++;
+
+ freq2 = (val >> 8) & 0xff;
+- if (!freq2)
++ if (!freq2 || i >= max)
+ break;
+
+ pc[i++].freq = ath5k_eeprom_bin2freq(ee,
+@@ -746,6 +746,9 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode,
+ }
+ }
+
++ if (idx == AR5K_EEPROM_N_PD_CURVES)
++ goto err_out;
++
+ ee->ee_pd_gains[mode] = 1;
+
+ pd = &chinfo[pier].pd_curves[idx];
+diff --git a/drivers/net/wireless/ath/ath6kl/bmi.c b/drivers/net/wireless/ath/ath6kl/bmi.c
+index bde5a10d470c8..af98e871199d3 100644
+--- a/drivers/net/wireless/ath/ath6kl/bmi.c
++++ b/drivers/net/wireless/ath/ath6kl/bmi.c
+@@ -246,7 +246,7 @@ int ath6kl_bmi_execute(struct ath6kl *ar, u32 addr, u32 *param)
+ return -EACCES;
+ }
+
+- size = sizeof(cid) + sizeof(addr) + sizeof(param);
++ size = sizeof(cid) + sizeof(addr) + sizeof(*param);
+ if (size > ar->bmi.max_cmd_size) {
+ WARN_ON(1);
+ return -EINVAL;
+diff --git a/drivers/net/wireless/ath/ath6kl/htc.h b/drivers/net/wireless/ath/ath6kl/htc.h
+index 112d8a9b8d431..d3534a29c4f05 100644
+--- a/drivers/net/wireless/ath/ath6kl/htc.h
++++ b/drivers/net/wireless/ath/ath6kl/htc.h
+@@ -153,12 +153,19 @@
+ * implementations.
+ */
+ struct htc_frame_hdr {
+- u8 eid;
+- u8 flags;
+-
+- /* length of data (including trailer) that follows the header */
+- __le16 payld_len;
+-
++ struct_group_tagged(htc_frame_look_ahead, header,
++ union {
++ struct {
++ u8 eid;
++ u8 flags;
++
++ /* length of data (including trailer) that follows the header */
++ __le16 payld_len;
++
++ };
++ u32 word;
++ };
++ );
+ /* end of 4-byte lookahead */
+
+ u8 ctrl[2];
+diff --git a/drivers/net/wireless/ath/ath6kl/htc_mbox.c b/drivers/net/wireless/ath/ath6kl/htc_mbox.c
+index 998947ef63b6e..e3874421c4c0c 100644
+--- a/drivers/net/wireless/ath/ath6kl/htc_mbox.c
++++ b/drivers/net/wireless/ath/ath6kl/htc_mbox.c
+@@ -2260,19 +2260,16 @@ int ath6kl_htc_rxmsg_pending_handler(struct htc_target *target,
+ static struct htc_packet *htc_wait_for_ctrl_msg(struct htc_target *target)
+ {
+ struct htc_packet *packet = NULL;
+- struct htc_frame_hdr *htc_hdr;
+- u32 look_ahead;
++ struct htc_frame_look_ahead look_ahead;
+
+- if (ath6kl_hif_poll_mboxmsg_rx(target->dev, &look_ahead,
++ if (ath6kl_hif_poll_mboxmsg_rx(target->dev, &look_ahead.word,
+ HTC_TARGET_RESPONSE_TIMEOUT))
+ return NULL;
+
+ ath6kl_dbg(ATH6KL_DBG_HTC,
+- "htc rx wait ctrl look_ahead 0x%X\n", look_ahead);
+-
+- htc_hdr = (struct htc_frame_hdr *)&look_ahead;
++ "htc rx wait ctrl look_ahead 0x%X\n", look_ahead.word);
+
+- if (htc_hdr->eid != ENDPOINT_0)
++ if (look_ahead.eid != ENDPOINT_0)
+ return NULL;
+
+ packet = htc_get_control_buf(target, false);
+@@ -2281,8 +2278,8 @@ static struct htc_packet *htc_wait_for_ctrl_msg(struct htc_target *target)
+ return NULL;
+
+ packet->info.rx.rx_flags = 0;
+- packet->info.rx.exp_hdr = look_ahead;
+- packet->act_len = le16_to_cpu(htc_hdr->payld_len) + HTC_HDR_LENGTH;
++ packet->info.rx.exp_hdr = look_ahead.word;
++ packet->act_len = le16_to_cpu(look_ahead.payld_len) + HTC_HDR_LENGTH;
+
+ if (packet->act_len > packet->buf_len)
+ goto fail_ctrl_rx;
+diff --git a/drivers/net/wireless/ath/ath6kl/htc_pipe.c b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
+index c68848819a52d..9b88d96bfe96c 100644
+--- a/drivers/net/wireless/ath/ath6kl/htc_pipe.c
++++ b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
+@@ -960,8 +960,8 @@ static int ath6kl_htc_pipe_rx_complete(struct ath6kl *ar, struct sk_buff *skb,
+ * Thus the possibility of ar->htc_target being NULL
+ * via ath6kl_recv_complete -> ath6kl_usb_io_comp_work.
+ */
+- if (WARN_ON_ONCE(!target)) {
+- ath6kl_err("Target not yet initialized\n");
++ if (!target) {
++ ath6kl_dbg(ATH6KL_DBG_HTC, "Target not yet initialized\n");
+ status = -EINVAL;
+ goto free_skb;
+ }
+diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c
+index 5372e948e761d..aba70f35e574b 100644
+--- a/drivers/net/wireless/ath/ath6kl/usb.c
++++ b/drivers/net/wireless/ath/ath6kl/usb.c
+@@ -340,6 +340,11 @@ static int ath6kl_usb_setup_pipe_resources(struct ath6kl_usb *ar_usb)
+ le16_to_cpu(endpoint->wMaxPacketSize),
+ endpoint->bInterval);
+ }
++
++ /* Ignore broken descriptors. */
++ if (usb_endpoint_maxp(endpoint) == 0)
++ continue;
++
+ urbcount = 0;
+
+ pipe_num =
+@@ -907,7 +912,7 @@ static int ath6kl_usb_submit_ctrl_in(struct ath6kl_usb *ar_usb,
+ req,
+ USB_DIR_IN | USB_TYPE_VENDOR |
+ USB_RECIP_DEVICE, value, index, buf,
+- size, 2 * HZ);
++ size, 2000);
+
+ if (ret < 0) {
+ ath6kl_warn("Failed to read usb control message: %d\n", ret);
+diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+index b0a4ca3559fd8..abed1effd95ca 100644
+--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
++++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+@@ -5615,7 +5615,7 @@ unsigned int ar9003_get_paprd_scale_factor(struct ath_hw *ah,
+
+ static u8 ar9003_get_eepmisc(struct ath_hw *ah)
+ {
+- return ah->eeprom.map4k.baseEepHeader.eepMisc;
++ return ah->eeprom.ar9300_eep.baseEepHeader.opCapFlags.eepMisc;
+ }
+
+ const struct eeprom_ops eep_ar9300_ops = {
+diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
+index 42f00a2a8c800..cf5648188459c 100644
+--- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c
++++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
+@@ -1099,17 +1099,22 @@ static bool ath9k_hw_verify_hang(struct ath_hw *ah, unsigned int queue)
+ {
+ u32 dma_dbg_chain, dma_dbg_complete;
+ u8 dcu_chain_state, dcu_complete_state;
++ unsigned int dbg_reg, reg_offset;
+ int i;
+
+- for (i = 0; i < NUM_STATUS_READS; i++) {
+- if (queue < 6)
+- dma_dbg_chain = REG_READ(ah, AR_DMADBG_4);
+- else
+- dma_dbg_chain = REG_READ(ah, AR_DMADBG_5);
++ if (queue < 6) {
++ dbg_reg = AR_DMADBG_4;
++ reg_offset = queue * 5;
++ } else {
++ dbg_reg = AR_DMADBG_5;
++ reg_offset = (queue - 6) * 5;
++ }
+
++ for (i = 0; i < NUM_STATUS_READS; i++) {
++ dma_dbg_chain = REG_READ(ah, dbg_reg);
+ dma_dbg_complete = REG_READ(ah, AR_DMADBG_6);
+
+- dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f;
++ dcu_chain_state = (dma_dbg_chain >> reg_offset) & 0x1f;
+ dcu_complete_state = dma_dbg_complete & 0x3;
+
+ if ((dcu_chain_state != 0x6) || (dcu_complete_state != 0x1))
+@@ -1128,6 +1133,7 @@ static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
+ u8 dcu_chain_state, dcu_complete_state;
+ bool dcu_wait_frdone = false;
+ unsigned long chk_dcu = 0;
++ unsigned int reg_offset;
+ unsigned int i = 0;
+
+ dma_dbg_4 = REG_READ(ah, AR_DMADBG_4);
+@@ -1139,12 +1145,15 @@ static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah)
+ goto exit;
+
+ for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
+- if (i < 6)
++ if (i < 6) {
+ chk_dbg = dma_dbg_4;
+- else
++ reg_offset = i * 5;
++ } else {
+ chk_dbg = dma_dbg_5;
++ reg_offset = (i - 6) * 5;
++ }
+
+- dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f;
++ dcu_chain_state = (chk_dbg >> reg_offset) & 0x1f;
+ if (dcu_chain_state == 0x6) {
+ dcu_wait_frdone = true;
+ chk_dcu |= BIT(i);
+diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h
+index a171dbb29fbb6..ad949eb02f3d2 100644
+--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h
++++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h
+@@ -720,7 +720,7 @@
+ #define AR_CH0_TOP2 (AR_SREV_9300(ah) ? 0x1628c : \
+ (AR_SREV_9462(ah) ? 0x16290 : 0x16284))
+ #define AR_CH0_TOP2_XPABIASLVL (AR_SREV_9561(ah) ? 0x1e00 : 0xf000)
+-#define AR_CH0_TOP2_XPABIASLVL_S 12
++#define AR_CH0_TOP2_XPABIASLVL_S (AR_SREV_9561(ah) ? 9 : 12)
+
+ #define AR_CH0_XTAL (AR_SREV_9300(ah) ? 0x16294 : \
+ ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x16298 : \
+diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
+index 860da13bfb6ac..e0130beb304df 100644
+--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
++++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
+@@ -244,11 +244,11 @@ static inline void ath9k_skb_queue_complete(struct hif_device_usb *hif_dev,
+ ath9k_htc_txcompletion_cb(hif_dev->htc_handle,
+ skb, txok);
+ if (txok) {
+- TX_STAT_INC(skb_success);
+- TX_STAT_ADD(skb_success_bytes, ln);
++ TX_STAT_INC(hif_dev, skb_success);
++ TX_STAT_ADD(hif_dev, skb_success_bytes, ln);
+ }
+ else
+- TX_STAT_INC(skb_failed);
++ TX_STAT_INC(hif_dev, skb_failed);
+ }
+ }
+
+@@ -302,7 +302,7 @@ static void hif_usb_tx_cb(struct urb *urb)
+ hif_dev->tx.tx_buf_cnt++;
+ if (!(hif_dev->tx.flags & HIF_USB_TX_STOP))
+ __hif_usb_tx(hif_dev); /* Check for pending SKBs */
+- TX_STAT_INC(buf_completed);
++ TX_STAT_INC(hif_dev, buf_completed);
+ spin_unlock(&hif_dev->tx.tx_lock);
+ }
+
+@@ -353,7 +353,7 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
+ tx_buf->len += tx_buf->offset;
+
+ __skb_queue_tail(&tx_buf->skb_queue, nskb);
+- TX_STAT_INC(skb_queued);
++ TX_STAT_INC(hif_dev, skb_queued);
+ }
+
+ usb_fill_bulk_urb(tx_buf->urb, hif_dev->udev,
+@@ -368,11 +368,10 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
+ __skb_queue_head_init(&tx_buf->skb_queue);
+ list_move_tail(&tx_buf->list, &hif_dev->tx.tx_buf);
+ hif_dev->tx.tx_buf_cnt++;
++ } else {
++ TX_STAT_INC(hif_dev, buf_queued);
+ }
+
+- if (!ret)
+- TX_STAT_INC(buf_queued);
+-
+ return ret;
+ }
+
+@@ -515,7 +514,7 @@ static void hif_usb_sta_drain(void *hif_handle, u8 idx)
+ ath9k_htc_txcompletion_cb(hif_dev->htc_handle,
+ skb, false);
+ hif_dev->tx.tx_skb_cnt--;
+- TX_STAT_INC(skb_failed);
++ TX_STAT_INC(hif_dev, skb_failed);
+ }
+ }
+
+@@ -535,6 +534,24 @@ static struct ath9k_htc_hif hif_usb = {
+ .send = hif_usb_send,
+ };
+
++/* Need to free remain_skb allocated in ath9k_hif_usb_rx_stream
++ * in case ath9k_hif_usb_rx_stream wasn't called next time to
++ * process the buffer and subsequently free it.
++ */
++static void ath9k_hif_usb_free_rx_remain_skb(struct hif_device_usb *hif_dev)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&hif_dev->rx_lock, flags);
++ if (hif_dev->remain_skb) {
++ dev_kfree_skb_any(hif_dev->remain_skb);
++ hif_dev->remain_skb = NULL;
++ hif_dev->rx_remain_len = 0;
++ RX_STAT_INC(hif_dev, skb_dropped);
++ }
++ spin_unlock_irqrestore(&hif_dev->rx_lock, flags);
++}
++
+ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
+ struct sk_buff *skb)
+ {
+@@ -562,11 +579,11 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
+ memcpy(ptr, skb->data, rx_remain_len);
+
+ rx_pkt_len += rx_remain_len;
+- hif_dev->rx_remain_len = 0;
+ skb_put(remain_skb, rx_pkt_len);
+
+ skb_pool[pool_index++] = remain_skb;
+-
++ hif_dev->remain_skb = NULL;
++ hif_dev->rx_remain_len = 0;
+ } else {
+ index = rx_remain_len;
+ }
+@@ -585,9 +602,21 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
+ pkt_len = get_unaligned_le16(ptr + index);
+ pkt_tag = get_unaligned_le16(ptr + index + 2);
+
++ /* It is supposed that if we have an invalid pkt_tag or
++ * pkt_len then the whole input SKB is considered invalid
++ * and dropped; the associated packets already in skb_pool
++ * are dropped, too.
++ */
+ if (pkt_tag != ATH_USB_RX_STREAM_MODE_TAG) {
+- RX_STAT_INC(skb_dropped);
+- return;
++ RX_STAT_INC(hif_dev, skb_dropped);
++ goto invalid_pkt;
++ }
++
++ if (pkt_len > 2 * MAX_RX_BUF_SIZE) {
++ dev_err(&hif_dev->udev->dev,
++ "ath9k_htc: invalid pkt_len (%x)\n", pkt_len);
++ RX_STAT_INC(hif_dev, skb_dropped);
++ goto invalid_pkt;
+ }
+
+ pad_len = 4 - (pkt_len & 0x3);
+@@ -599,11 +628,6 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
+
+ if (index > MAX_RX_BUF_SIZE) {
+ spin_lock(&hif_dev->rx_lock);
+- hif_dev->rx_remain_len = index - MAX_RX_BUF_SIZE;
+- hif_dev->rx_transfer_len =
+- MAX_RX_BUF_SIZE - chk_idx - 4;
+- hif_dev->rx_pad_len = pad_len;
+-
+ nskb = __dev_alloc_skb(pkt_len + 32, GFP_ATOMIC);
+ if (!nskb) {
+ dev_err(&hif_dev->udev->dev,
+@@ -611,8 +635,14 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
+ spin_unlock(&hif_dev->rx_lock);
+ goto err;
+ }
++
++ hif_dev->rx_remain_len = index - MAX_RX_BUF_SIZE;
++ hif_dev->rx_transfer_len =
++ MAX_RX_BUF_SIZE - chk_idx - 4;
++ hif_dev->rx_pad_len = pad_len;
++
+ skb_reserve(nskb, 32);
+- RX_STAT_INC(skb_allocated);
++ RX_STAT_INC(hif_dev, skb_allocated);
+
+ memcpy(nskb->data, &(skb->data[chk_idx+4]),
+ hif_dev->rx_transfer_len);
+@@ -633,7 +663,7 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
+ goto err;
+ }
+ skb_reserve(nskb, 32);
+- RX_STAT_INC(skb_allocated);
++ RX_STAT_INC(hif_dev, skb_allocated);
+
+ memcpy(nskb->data, &(skb->data[chk_idx+4]), pkt_len);
+ skb_put(nskb, pkt_len);
+@@ -643,11 +673,18 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
+
+ err:
+ for (i = 0; i < pool_index; i++) {
+- RX_STAT_ADD(skb_completed_bytes, skb_pool[i]->len);
++ RX_STAT_ADD(hif_dev, skb_completed_bytes, skb_pool[i]->len);
+ ath9k_htc_rx_msg(hif_dev->htc_handle, skb_pool[i],
+ skb_pool[i]->len, USB_WLAN_RX_PIPE);
+- RX_STAT_INC(skb_completed);
++ RX_STAT_INC(hif_dev, skb_completed);
+ }
++ return;
++invalid_pkt:
++ for (i = 0; i < pool_index; i++) {
++ dev_kfree_skb_any(skb_pool[i]);
++ RX_STAT_INC(hif_dev, skb_dropped);
++ }
++ return;
+ }
+
+ static void ath9k_hif_usb_rx_cb(struct urb *urb)
+@@ -702,14 +739,13 @@ static void ath9k_hif_usb_reg_in_cb(struct urb *urb)
+ struct rx_buf *rx_buf = (struct rx_buf *)urb->context;
+ struct hif_device_usb *hif_dev = rx_buf->hif_dev;
+ struct sk_buff *skb = rx_buf->skb;
+- struct sk_buff *nskb;
+ int ret;
+
+ if (!skb)
+ return;
+
+ if (!hif_dev)
+- goto free;
++ goto free_skb;
+
+ switch (urb->status) {
+ case 0:
+@@ -718,7 +754,7 @@ static void ath9k_hif_usb_reg_in_cb(struct urb *urb)
+ case -ECONNRESET:
+ case -ENODEV:
+ case -ESHUTDOWN:
+- goto free;
++ goto free_skb;
+ default:
+ skb_reset_tail_pointer(skb);
+ skb_trim(skb, 0);
+@@ -729,25 +765,27 @@ static void ath9k_hif_usb_reg_in_cb(struct urb *urb)
+ if (likely(urb->actual_length != 0)) {
+ skb_put(skb, urb->actual_length);
+
+- /* Process the command first */
++ /*
++ * Process the command first.
++ * skb is either freed here or passed to be
++ * managed to another callback function.
++ */
+ ath9k_htc_rx_msg(hif_dev->htc_handle, skb,
+ skb->len, USB_REG_IN_PIPE);
+
+-
+- nskb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_ATOMIC);
+- if (!nskb) {
++ skb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_ATOMIC);
++ if (!skb) {
+ dev_err(&hif_dev->udev->dev,
+ "ath9k_htc: REG_IN memory allocation failure\n");
+- urb->context = NULL;
+- return;
++ goto free_rx_buf;
+ }
+
+- rx_buf->skb = nskb;
++ rx_buf->skb = skb;
+
+ usb_fill_int_urb(urb, hif_dev->udev,
+ usb_rcvintpipe(hif_dev->udev,
+ USB_REG_IN_PIPE),
+- nskb->data, MAX_REG_IN_BUF_SIZE,
++ skb->data, MAX_REG_IN_BUF_SIZE,
+ ath9k_hif_usb_reg_in_cb, rx_buf, 1);
+ }
+
+@@ -756,12 +794,13 @@ resubmit:
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (ret) {
+ usb_unanchor_urb(urb);
+- goto free;
++ goto free_skb;
+ }
+
+ return;
+-free:
++free_skb:
+ kfree_skb(skb);
++free_rx_buf:
+ kfree(rx_buf);
+ urb->context = NULL;
+ }
+@@ -774,14 +813,10 @@ static void ath9k_hif_usb_dealloc_tx_urbs(struct hif_device_usb *hif_dev)
+ spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
+ list_for_each_entry_safe(tx_buf, tx_buf_tmp,
+ &hif_dev->tx.tx_buf, list) {
+- usb_get_urb(tx_buf->urb);
+- spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
+- usb_kill_urb(tx_buf->urb);
+ list_del(&tx_buf->list);
+ usb_free_urb(tx_buf->urb);
+ kfree(tx_buf->buf);
+ kfree(tx_buf);
+- spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
+ }
+ spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
+
+@@ -851,6 +886,7 @@ err:
+ static void ath9k_hif_usb_dealloc_rx_urbs(struct hif_device_usb *hif_dev)
+ {
+ usb_kill_anchored_urbs(&hif_dev->rx_submitted);
++ ath9k_hif_usb_free_rx_remain_skb(hif_dev);
+ }
+
+ static int ath9k_hif_usb_alloc_rx_urbs(struct hif_device_usb *hif_dev)
+@@ -1323,10 +1359,24 @@ static int send_eject_command(struct usb_interface *interface)
+ static int ath9k_hif_usb_probe(struct usb_interface *interface,
+ const struct usb_device_id *id)
+ {
++ struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in, *int_out;
+ struct usb_device *udev = interface_to_usbdev(interface);
++ struct usb_host_interface *alt;
+ struct hif_device_usb *hif_dev;
+ int ret = 0;
+
++ /* Verify the expected endpoints are present */
++ alt = interface->cur_altsetting;
++ if (usb_find_common_endpoints(alt, &bulk_in, &bulk_out, &int_in, &int_out) < 0 ||
++ usb_endpoint_num(bulk_in) != USB_WLAN_RX_PIPE ||
++ usb_endpoint_num(bulk_out) != USB_WLAN_TX_PIPE ||
++ usb_endpoint_num(int_in) != USB_REG_IN_PIPE ||
++ usb_endpoint_num(int_out) != USB_REG_OUT_PIPE) {
++ dev_err(&udev->dev,
++ "ath9k_htc: Device endpoint numbers are not the expected ones\n");
++ return -ENODEV;
++ }
++
+ if (id->driver_info == STORAGE_DEVICE)
+ return send_eject_command(interface);
+
+@@ -1393,8 +1443,6 @@ static void ath9k_hif_usb_disconnect(struct usb_interface *interface)
+
+ if (hif_dev->flags & HIF_USB_READY) {
+ ath9k_htc_hw_deinit(hif_dev->htc_handle, unplugged);
+- ath9k_hif_usb_dev_deinit(hif_dev);
+- ath9k_destroy_wmi(hif_dev->htc_handle->drv_priv);
+ ath9k_htc_hw_free(hif_dev->htc_handle);
+ }
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h
+index 0a1634238e673..237f4ec2cffd7 100644
+--- a/drivers/net/wireless/ath/ath9k/htc.h
++++ b/drivers/net/wireless/ath/ath9k/htc.h
+@@ -281,6 +281,7 @@ struct ath9k_htc_rxbuf {
+ struct ath9k_htc_rx {
+ struct list_head rxbuf;
+ spinlock_t rxbuflock;
++ bool initialized;
+ };
+
+ #define ATH9K_HTC_TX_CLEANUP_INTERVAL 50 /* ms */
+@@ -305,6 +306,7 @@ struct ath9k_htc_tx {
+ DECLARE_BITMAP(tx_slot, MAX_TX_BUF_NUM);
+ struct timer_list cleanup_timer;
+ spinlock_t tx_lock;
++ bool initialized;
+ };
+
+ struct ath9k_htc_tx_ctl {
+@@ -325,14 +327,18 @@ static inline struct ath9k_htc_tx_ctl *HTC_SKB_CB(struct sk_buff *skb)
+ }
+
+ #ifdef CONFIG_ATH9K_HTC_DEBUGFS
+-
+-#define TX_STAT_INC(c) (hif_dev->htc_handle->drv_priv->debug.tx_stats.c++)
+-#define TX_STAT_ADD(c, a) (hif_dev->htc_handle->drv_priv->debug.tx_stats.c += a)
+-#define RX_STAT_INC(c) (hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c++)
+-#define RX_STAT_ADD(c, a) (hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c += a)
+-#define CAB_STAT_INC priv->debug.tx_stats.cab_queued++
+-
+-#define TX_QSTAT_INC(q) (priv->debug.tx_stats.queue_stats[q]++)
++#define __STAT_SAFE(hif_dev, expr) do { ((hif_dev)->htc_handle->drv_priv ? (expr) : 0); } while (0)
++#define CAB_STAT_INC(priv) do { ((priv)->debug.tx_stats.cab_queued++); } while (0)
++#define TX_QSTAT_INC(priv, q) do { ((priv)->debug.tx_stats.queue_stats[q]++); } while (0)
++
++#define TX_STAT_INC(hif_dev, c) \
++ __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.tx_stats.c++)
++#define TX_STAT_ADD(hif_dev, c, a) \
++ __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.tx_stats.c += a)
++#define RX_STAT_INC(hif_dev, c) \
++ __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.skbrx_stats.c++)
++#define RX_STAT_ADD(hif_dev, c, a) \
++ __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.skbrx_stats.c += a)
+
+ void ath9k_htc_err_stat_rx(struct ath9k_htc_priv *priv,
+ struct ath_rx_status *rs);
+@@ -372,13 +378,13 @@ void ath9k_htc_get_et_stats(struct ieee80211_hw *hw,
+ struct ethtool_stats *stats, u64 *data);
+ #else
+
+-#define TX_STAT_INC(c) do { } while (0)
+-#define TX_STAT_ADD(c, a) do { } while (0)
+-#define RX_STAT_INC(c) do { } while (0)
+-#define RX_STAT_ADD(c, a) do { } while (0)
+-#define CAB_STAT_INC do { } while (0)
++#define TX_STAT_INC(hif_dev, c) do { } while (0)
++#define TX_STAT_ADD(hif_dev, c, a) do { } while (0)
++#define RX_STAT_INC(hif_dev, c) do { } while (0)
++#define RX_STAT_ADD(hif_dev, c, a) do { } while (0)
+
+-#define TX_QSTAT_INC(c) do { } while (0)
++#define CAB_STAT_INC(priv)
++#define TX_QSTAT_INC(priv, c)
+
+ static inline void ath9k_htc_err_stat_rx(struct ath9k_htc_priv *priv,
+ struct ath_rx_status *rs)
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
+index b3ed65e5c4da8..c55aab01fff5d 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
+@@ -491,7 +491,7 @@ int ath9k_htc_init_debug(struct ath_hw *ah)
+
+ priv->debug.debugfs_phy = debugfs_create_dir(KBUILD_MODNAME,
+ priv->hw->wiphy->debugfsdir);
+- if (!priv->debug.debugfs_phy)
++ if (IS_ERR(priv->debug.debugfs_phy))
+ return -ENOMEM;
+
+ ath9k_cmn_spectral_init_debug(&priv->spec_priv, priv->debug.debugfs_phy);
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+index ff61ae34ecdf0..96a3185a96d75 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+@@ -944,7 +944,6 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev,
+ priv->hw = hw;
+ priv->htc = htc_handle;
+ priv->dev = dev;
+- htc_handle->drv_priv = priv;
+ SET_IEEE80211_DEV(hw, priv->dev);
+
+ ret = ath9k_htc_wait_for_target(priv);
+@@ -965,6 +964,8 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev,
+ if (ret)
+ goto err_init;
+
++ htc_handle->drv_priv = priv;
++
+ return 0;
+
+ err_init:
+@@ -987,6 +988,8 @@ void ath9k_htc_disconnect_device(struct htc_target *htc_handle, bool hotunplug)
+
+ ath9k_deinit_device(htc_handle->drv_priv);
+ ath9k_stop_wmi(htc_handle->drv_priv);
++ ath9k_hif_usb_dealloc_urbs((struct hif_device_usb *)htc_handle->hif_dev);
++ ath9k_destroy_wmi(htc_handle->drv_priv);
+ ieee80211_free_hw(htc_handle->drv_priv->hw);
+ }
+ }
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index 8e69e8989f6d3..672789e3c55d0 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -106,20 +106,20 @@ static inline enum htc_endpoint_id get_htc_epid(struct ath9k_htc_priv *priv,
+
+ switch (qnum) {
+ case 0:
+- TX_QSTAT_INC(IEEE80211_AC_VO);
++ TX_QSTAT_INC(priv, IEEE80211_AC_VO);
+ epid = priv->data_vo_ep;
+ break;
+ case 1:
+- TX_QSTAT_INC(IEEE80211_AC_VI);
++ TX_QSTAT_INC(priv, IEEE80211_AC_VI);
+ epid = priv->data_vi_ep;
+ break;
+ case 2:
+- TX_QSTAT_INC(IEEE80211_AC_BE);
++ TX_QSTAT_INC(priv, IEEE80211_AC_BE);
+ epid = priv->data_be_ep;
+ break;
+ case 3:
+ default:
+- TX_QSTAT_INC(IEEE80211_AC_BK);
++ TX_QSTAT_INC(priv, IEEE80211_AC_BK);
+ epid = priv->data_bk_ep;
+ break;
+ }
+@@ -328,7 +328,7 @@ static void ath9k_htc_tx_data(struct ath9k_htc_priv *priv,
+ memcpy(tx_fhdr, (u8 *) &tx_hdr, sizeof(tx_hdr));
+
+ if (is_cab) {
+- CAB_STAT_INC;
++ CAB_STAT_INC(priv);
+ tx_ctl->epid = priv->cab_ep;
+ return;
+ }
+@@ -813,6 +813,11 @@ int ath9k_tx_init(struct ath9k_htc_priv *priv)
+ skb_queue_head_init(&priv->tx.data_vi_queue);
+ skb_queue_head_init(&priv->tx.data_vo_queue);
+ skb_queue_head_init(&priv->tx.tx_failed);
++
++ /* Allow ath9k_wmi_event_tasklet(WMI_TXSTATUS_EVENTID) to operate. */
++ smp_wmb();
++ priv->tx.initialized = true;
++
+ return 0;
+ }
+
+@@ -1011,6 +1016,14 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ goto rx_next;
+ }
+
++ if (rxstatus->rs_keyix >= ATH_KEYMAX &&
++ rxstatus->rs_keyix != ATH9K_RXKEYIX_INVALID) {
++ ath_dbg(common, ANY,
++ "Invalid keyix, dropping (keyix: %d)\n",
++ rxstatus->rs_keyix);
++ goto rx_next;
++ }
++
+ /* Get the RX status information */
+
+ memset(rx_status, 0, sizeof(struct ieee80211_rx_status));
+@@ -1130,6 +1143,10 @@ void ath9k_htc_rxep(void *drv_priv, struct sk_buff *skb,
+ struct ath9k_htc_rxbuf *rxbuf = NULL, *tmp_buf = NULL;
+ unsigned long flags;
+
++ /* Check if ath9k_rx_init() completed. */
++ if (!data_race(priv->rx.initialized))
++ goto err;
++
+ spin_lock_irqsave(&priv->rx.rxbuflock, flags);
+ list_for_each_entry(tmp_buf, &priv->rx.rxbuf, list) {
+ if (!tmp_buf->in_process) {
+@@ -1185,6 +1202,10 @@ int ath9k_rx_init(struct ath9k_htc_priv *priv)
+ list_add_tail(&rxbuf->list, &priv->rx.rxbuf);
+ }
+
++ /* Allow ath9k_htc_rxep() to operate. */
++ smp_wmb();
++ priv->rx.initialized = true;
++
+ return 0;
+
+ err:
+diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c
+index 510e61e97dbcb..99667aba289df 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_hst.c
++++ b/drivers/net/wireless/ath/ath9k/htc_hst.c
+@@ -30,6 +30,7 @@ static int htc_issue_send(struct htc_target *target, struct sk_buff* skb,
+ hdr->endpoint_id = epid;
+ hdr->flags = flags;
+ hdr->payload_len = cpu_to_be16(len);
++ memset(hdr->control, 0, sizeof(hdr->control));
+
+ status = target->hif->send(target->hif_dev, endpoint->ul_pipeid, skb);
+
+@@ -113,7 +114,13 @@ static void htc_process_conn_rsp(struct htc_target *target,
+
+ if (svc_rspmsg->status == HTC_SERVICE_SUCCESS) {
+ epid = svc_rspmsg->endpoint_id;
+- if (epid < 0 || epid >= ENDPOINT_MAX)
++
++ /* Check that the received epid for the endpoint to attach
++ * a new service is valid. ENDPOINT0 can't be used here as it
++ * is already reserved for HTC_CTRL_RSVD_SVC service and thus
++ * should not be modified.
++ */
++ if (epid <= ENDPOINT0 || epid >= ENDPOINT_MAX)
+ return;
+
+ service_id = be16_to_cpu(svc_rspmsg->service_id);
+@@ -272,6 +279,10 @@ int htc_connect_service(struct htc_target *target,
+ conn_msg->dl_pipeid = endpoint->dl_pipeid;
+ conn_msg->ul_pipeid = endpoint->ul_pipeid;
+
++ /* To prevent infoleak */
++ conn_msg->svc_meta_len = 0;
++ conn_msg->pad = 0;
++
+ ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0);
+ if (ret)
+ goto err;
+@@ -359,40 +370,34 @@ ret:
+ }
+
+ static void ath9k_htc_fw_panic_report(struct htc_target *htc_handle,
+- struct sk_buff *skb)
++ struct sk_buff *skb, u32 len)
+ {
+ uint32_t *pattern = (uint32_t *)skb->data;
+
+- switch (*pattern) {
+- case 0x33221199:
+- {
++ if (*pattern == 0x33221199 && len >= sizeof(struct htc_panic_bad_vaddr)) {
+ struct htc_panic_bad_vaddr *htc_panic;
+ htc_panic = (struct htc_panic_bad_vaddr *) skb->data;
+ dev_err(htc_handle->dev, "ath: firmware panic! "
+ "exccause: 0x%08x; pc: 0x%08x; badvaddr: 0x%08x.\n",
+ htc_panic->exccause, htc_panic->pc,
+ htc_panic->badvaddr);
+- break;
+- }
+- case 0x33221299:
+- {
++ return;
++ }
++ if (*pattern == 0x33221299) {
+ struct htc_panic_bad_epid *htc_panic;
+ htc_panic = (struct htc_panic_bad_epid *) skb->data;
+ dev_err(htc_handle->dev, "ath: firmware panic! "
+ "bad epid: 0x%08x\n", htc_panic->epid);
+- break;
+- }
+- default:
+- dev_err(htc_handle->dev, "ath: unknown panic pattern!\n");
+- break;
++ return;
+ }
++ dev_err(htc_handle->dev, "ath: unknown panic pattern!\n");
+ }
+
+ /*
+ * HTC Messages are handled directly here and the obtained SKB
+ * is freed.
+ *
+- * Service messages (Data, WMI) passed to the corresponding
++ * Service messages (Data, WMI) are passed to the corresponding
+ * endpoint RX handlers, which have to free the SKB.
+ */
+ void ath9k_htc_rx_msg(struct htc_target *htc_handle,
+@@ -406,16 +411,26 @@ void ath9k_htc_rx_msg(struct htc_target *htc_handle,
+ if (!htc_handle || !skb)
+ return;
+
++ /* A valid message requires len >= 8.
++ *
++ * sizeof(struct htc_frame_hdr) == 8
++ * sizeof(struct htc_ready_msg) == 8
++ * sizeof(struct htc_panic_bad_vaddr) == 16
++ * sizeof(struct htc_panic_bad_epid) == 8
++ */
++ if (unlikely(len < sizeof(struct htc_frame_hdr)))
++ goto invalid;
+ htc_hdr = (struct htc_frame_hdr *) skb->data;
+ epid = htc_hdr->endpoint_id;
+
+ if (epid == 0x99) {
+- ath9k_htc_fw_panic_report(htc_handle, skb);
++ ath9k_htc_fw_panic_report(htc_handle, skb, len);
+ kfree_skb(skb);
+ return;
+ }
+
+ if (epid < 0 || epid >= ENDPOINT_MAX) {
++invalid:
+ if (pipe_id != USB_REG_IN_PIPE)
+ dev_kfree_skb_any(skb);
+ else
+@@ -427,21 +442,30 @@ void ath9k_htc_rx_msg(struct htc_target *htc_handle,
+
+ /* Handle trailer */
+ if (htc_hdr->flags & HTC_FLAGS_RECV_TRAILER) {
+- if (be32_to_cpu(*(__be32 *) skb->data) == 0x00C60000)
++ if (be32_to_cpu(*(__be32 *) skb->data) == 0x00C60000) {
+ /* Move past the Watchdog pattern */
+ htc_hdr = (struct htc_frame_hdr *)(skb->data + 4);
++ len -= 4;
++ }
+ }
+
+ /* Get the message ID */
++ if (unlikely(len < sizeof(struct htc_frame_hdr) + sizeof(__be16)))
++ goto invalid;
+ msg_id = (__be16 *) ((void *) htc_hdr +
+ sizeof(struct htc_frame_hdr));
+
+ /* Now process HTC messages */
+ switch (be16_to_cpu(*msg_id)) {
+ case HTC_MSG_READY_ID:
++ if (unlikely(len < sizeof(struct htc_ready_msg)))
++ goto invalid;
+ htc_process_target_rdy(htc_handle, htc_hdr);
+ break;
+ case HTC_MSG_CONNECT_SERVICE_RESPONSE_ID:
++ if (unlikely(len < sizeof(struct htc_frame_hdr) +
++ sizeof(struct htc_conn_svc_rspmsg)))
++ goto invalid;
+ htc_process_conn_rsp(htc_handle, htc_hdr);
+ break;
+ default:
+@@ -460,6 +484,8 @@ void ath9k_htc_rx_msg(struct htc_target *htc_handle,
+ if (endpoint->ep_callbacks.rx)
+ endpoint->ep_callbacks.rx(endpoint->ep_callbacks.priv,
+ skb, epid);
++ else
++ goto invalid;
+ }
+ }
+
+diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
+index 139831539da37..9e6d088bd2818 100644
+--- a/drivers/net/wireless/ath/ath9k/main.c
++++ b/drivers/net/wireless/ath/ath9k/main.c
+@@ -203,7 +203,7 @@ void ath_cancel_work(struct ath_softc *sc)
+ void ath_restart_work(struct ath_softc *sc)
+ {
+ ieee80211_queue_delayed_work(sc->hw, &sc->hw_check_work,
+- ATH_HW_CHECK_POLL_INT);
++ msecs_to_jiffies(ATH_HW_CHECK_POLL_INT));
+
+ if (AR_SREV_9340(sc->sc_ah) || AR_SREV_9330(sc->sc_ah))
+ ieee80211_queue_delayed_work(sc->hw, &sc->hw_pll_work,
+@@ -533,8 +533,10 @@ irqreturn_t ath_isr(int irq, void *dev)
+ ath9k_debug_sync_cause(sc, sync_cause);
+ status &= ah->imask; /* discard unasked-for bits */
+
+- if (test_bit(ATH_OP_HW_RESET, &common->op_flags))
++ if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) {
++ ath9k_hw_kill_interrupts(sc->sc_ah);
+ return IRQ_HANDLED;
++ }
+
+ /*
+ * If there are no status bits set, then this interrupt was not
+@@ -837,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix)
+ continue;
+
+ txinfo = IEEE80211_SKB_CB(bf->bf_mpdu);
+- fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0];
++ fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0];
+ if (fi->keyix == keyix)
+ return true;
+ }
+@@ -848,7 +850,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix)
+ static bool ath9k_txq_has_key(struct ath_softc *sc, u32 keyix)
+ {
+ struct ath_hw *ah = sc->sc_ah;
+- int i;
++ int i, j;
+ struct ath_txq *txq;
+ bool key_in_use = false;
+
+@@ -866,8 +868,9 @@ static bool ath9k_txq_has_key(struct ath_softc *sc, u32 keyix)
+ if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) {
+ int idx = txq->txq_tailidx;
+
+- while (!key_in_use &&
+- !list_empty(&txq->txq_fifo[idx])) {
++ for (j = 0; !key_in_use &&
++ !list_empty(&txq->txq_fifo[idx]) &&
++ j < ATH_TXFIFO_DEPTH; j++) {
+ key_in_use = ath9k_txq_list_has_key(
+ &txq->txq_fifo[idx], keyix);
+ INCR(idx, ATH_TXFIFO_DEPTH);
+@@ -2236,7 +2239,7 @@ void __ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop,
+ }
+
+ ieee80211_queue_delayed_work(hw, &sc->hw_check_work,
+- ATH_HW_CHECK_POLL_INT);
++ msecs_to_jiffies(ATH_HW_CHECK_POLL_INT));
+ }
+
+ static bool ath9k_tx_frames_pending(struct ieee80211_hw *hw)
+diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c
+index fe29ad4b9023c..1476b42b52a91 100644
+--- a/drivers/net/wireless/ath/ath9k/wmi.c
++++ b/drivers/net/wireless/ath/ath9k/wmi.c
+@@ -169,6 +169,10 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t)
+ &wmi->drv_priv->fatal_work);
+ break;
+ case WMI_TXSTATUS_EVENTID:
++ /* Check if ath9k_tx_init() completed. */
++ if (!data_race(priv->tx.initialized))
++ break;
++
+ spin_lock_bh(&priv->tx.tx_lock);
+ if (priv->tx.flags & ATH9K_HTC_OP_TX_DRAIN) {
+ spin_unlock_bh(&priv->tx.tx_lock);
+@@ -217,6 +221,10 @@ static void ath9k_wmi_ctrl_rx(void *priv, struct sk_buff *skb,
+ if (unlikely(wmi->stopped))
+ goto free_skb;
+
++ /* Validate the obtained SKB. */
++ if (unlikely(skb->len < sizeof(struct wmi_cmd_hdr)))
++ goto free_skb;
++
+ hdr = (struct wmi_cmd_hdr *) skb->data;
+ cmd_id = be16_to_cpu(hdr->command_id);
+
+@@ -234,10 +242,10 @@ static void ath9k_wmi_ctrl_rx(void *priv, struct sk_buff *skb,
+ spin_unlock_irqrestore(&wmi->wmi_lock, flags);
+ goto free_skb;
+ }
+- spin_unlock_irqrestore(&wmi->wmi_lock, flags);
+
+ /* WMI command response */
+ ath9k_wmi_rsp_callback(wmi, skb);
++ spin_unlock_irqrestore(&wmi->wmi_lock, flags);
+
+ free_skb:
+ kfree_skb(skb);
+@@ -275,7 +283,8 @@ int ath9k_wmi_connect(struct htc_target *htc, struct wmi *wmi,
+
+ static int ath9k_wmi_cmd_issue(struct wmi *wmi,
+ struct sk_buff *skb,
+- enum wmi_cmd_id cmd, u16 len)
++ enum wmi_cmd_id cmd, u16 len,
++ u8 *rsp_buf, u32 rsp_len)
+ {
+ struct wmi_cmd_hdr *hdr;
+ unsigned long flags;
+@@ -285,6 +294,11 @@ static int ath9k_wmi_cmd_issue(struct wmi *wmi,
+ hdr->seq_no = cpu_to_be16(++wmi->tx_seq_id);
+
+ spin_lock_irqsave(&wmi->wmi_lock, flags);
++
++ /* record the rsp buffer and length */
++ wmi->cmd_rsp_buf = rsp_buf;
++ wmi->cmd_rsp_len = rsp_len;
++
+ wmi->last_seq_id = wmi->tx_seq_id;
+ spin_unlock_irqrestore(&wmi->wmi_lock, flags);
+
+@@ -300,8 +314,8 @@ int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
+ struct ath_common *common = ath9k_hw_common(ah);
+ u16 headroom = sizeof(struct htc_frame_hdr) +
+ sizeof(struct wmi_cmd_hdr);
++ unsigned long time_left, flags;
+ struct sk_buff *skb;
+- unsigned long time_left;
+ int ret = 0;
+
+ if (ah->ah_flags & AH_UNPLUGGED)
+@@ -325,11 +339,7 @@ int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
+ goto out;
+ }
+
+- /* record the rsp buffer and length */
+- wmi->cmd_rsp_buf = rsp_buf;
+- wmi->cmd_rsp_len = rsp_len;
+-
+- ret = ath9k_wmi_cmd_issue(wmi, skb, cmd_id, cmd_len);
++ ret = ath9k_wmi_cmd_issue(wmi, skb, cmd_id, cmd_len, rsp_buf, rsp_len);
+ if (ret)
+ goto out;
+
+@@ -337,6 +347,9 @@ int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
+ if (!time_left) {
+ ath_dbg(common, WMI, "Timeout waiting for WMI command: %s\n",
+ wmi_cmd_to_name(cmd_id));
++ spin_lock_irqsave(&wmi->wmi_lock, flags);
++ wmi->last_seq_id = 0;
++ spin_unlock_irqrestore(&wmi->wmi_lock, flags);
+ mutex_unlock(&wmi->op_mutex);
+ return -ETIMEDOUT;
+ }
+diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
+index 5691bd6eb82c2..6555abf02f18b 100644
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb)
+ {
+ struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+ BUILD_BUG_ON(sizeof(struct ath_frame_info) >
+- sizeof(tx_info->rate_driver_data));
+- return (struct ath_frame_info *) &tx_info->rate_driver_data[0];
++ sizeof(tx_info->status.status_driver_data));
++ return (struct ath_frame_info *) &tx_info->status.status_driver_data[0];
+ }
+
+ static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno)
+@@ -2501,6 +2501,16 @@ skip_tx_complete:
+ spin_unlock_irqrestore(&sc->tx.txbuflock, flags);
+ }
+
++static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info)
++{
++ void *ptr = &tx_info->status;
++
++ memset(ptr + sizeof(tx_info->status.rates), 0,
++ sizeof(tx_info->status) -
++ sizeof(tx_info->status.rates) -
++ sizeof(tx_info->status.status_driver_data));
++}
++
+ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
+ struct ath_tx_status *ts, int nframes, int nbad,
+ int txok)
+@@ -2512,6 +2522,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
+ struct ath_hw *ah = sc->sc_ah;
+ u8 i, tx_rateindex;
+
++ ath_clear_tx_status(tx_info);
++
+ if (txok)
+ tx_info->status.ack_signal = ts->ts_rssi;
+
+@@ -2526,6 +2538,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
+ tx_info->status.ampdu_len = nframes;
+ tx_info->status.ampdu_ack_len = nframes - nbad;
+
++ tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
++
++ for (i = tx_rateindex + 1; i < hw->max_rates; i++) {
++ tx_info->status.rates[i].count = 0;
++ tx_info->status.rates[i].idx = -1;
++ }
++
+ if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 &&
+ (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) {
+ /*
+@@ -2547,16 +2566,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
+ tx_info->status.rates[tx_rateindex].count =
+ hw->max_rate_tries;
+ }
+-
+- for (i = tx_rateindex + 1; i < hw->max_rates; i++) {
+- tx_info->status.rates[i].count = 0;
+- tx_info->status.rates[i].idx = -1;
+- }
+-
+- tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1;
+-
+- /* we report airtime in ath_tx_count_airtime(), don't report twice */
+- tx_info->status.tx_time = 0;
+ }
+
+ static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq)
+diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
+index cca3b086aa701..a87476383c540 100644
+--- a/drivers/net/wireless/ath/carl9170/main.c
++++ b/drivers/net/wireless/ath/carl9170/main.c
+@@ -1915,7 +1915,7 @@ static int carl9170_parse_eeprom(struct ar9170 *ar)
+ WARN_ON(!(tx_streams >= 1 && tx_streams <=
+ IEEE80211_HT_MCS_TX_MAX_STREAMS));
+
+- tx_params = (tx_streams - 1) <<
++ tx_params |= (tx_streams - 1) <<
+ IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT;
+
+ carl9170_band_2GHz.ht_cap.mcs.tx_params |= tx_params;
+diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c
+index 88444fe6d1c64..f9e1306ac74fe 100644
+--- a/drivers/net/wireless/ath/carl9170/tx.c
++++ b/drivers/net/wireless/ath/carl9170/tx.c
+@@ -1558,6 +1558,9 @@ static struct carl9170_vif_info *carl9170_pick_beaconing_vif(struct ar9170 *ar)
+ goto out;
+ }
+ } while (ar->beacon_enabled && i--);
++
++ /* no entry found in list */
++ return NULL;
+ }
+
+ out:
+diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c
+index 80390495ea250..75cb53a3ec15e 100644
+--- a/drivers/net/wireless/ath/dfs_pattern_detector.c
++++ b/drivers/net/wireless/ath/dfs_pattern_detector.c
+@@ -183,10 +183,12 @@ static void channel_detector_exit(struct dfs_pattern_detector *dpd,
+ if (cd == NULL)
+ return;
+ list_del(&cd->head);
+- for (i = 0; i < dpd->num_radar_types; i++) {
+- struct pri_detector *de = cd->detectors[i];
+- if (de != NULL)
+- de->exit(de);
++ if (cd->detectors) {
++ for (i = 0; i < dpd->num_radar_types; i++) {
++ struct pri_detector *de = cd->detectors[i];
++ if (de != NULL)
++ de->exit(de);
++ }
+ }
+ kfree(cd->detectors);
+ kfree(cd);
+diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c
+index 61b59a804e308..b7b61d4f02bae 100644
+--- a/drivers/net/wireless/ath/key.c
++++ b/drivers/net/wireless/ath/key.c
+@@ -503,7 +503,7 @@ int ath_key_config(struct ath_common *common,
+
+ hk.kv_len = key->keylen;
+ if (key->keylen)
+- memcpy(hk.kv_val, key->key, key->keylen);
++ memcpy(&hk.kv_values, key->key, key->keylen);
+
+ if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ switch (vif->type) {
+diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c
+index b2400e2417a55..f15e7bd690b5b 100644
+--- a/drivers/net/wireless/ath/regd.c
++++ b/drivers/net/wireless/ath/regd.c
+@@ -667,14 +667,14 @@ ath_regd_init_wiphy(struct ath_regulatory *reg,
+
+ /*
+ * Some users have reported their EEPROM programmed with
+- * 0x8000 or 0x0 set, this is not a supported regulatory
+- * domain but since we have more than one user with it we
+- * need a solution for them. We default to 0x64, which is
+- * the default Atheros world regulatory domain.
++ * 0x8000 set, this is not a supported regulatory domain
++ * but since we have more than one user with it we need
++ * a solution for them. We default to 0x64, which is the
++ * default Atheros world regulatory domain.
+ */
+ static void ath_regd_sanitize(struct ath_regulatory *reg)
+ {
+- if (reg->current_rd != COUNTRY_ERD_FLAG && reg->current_rd != 0)
++ if (reg->current_rd != COUNTRY_ERD_FLAG)
+ return;
+ printk(KERN_DEBUG "ath: EEPROM regdomain sanitized\n");
+ reg->current_rd = 0x64;
+diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c
+index 8e1dbfda65386..e1a35c2eadb6c 100644
+--- a/drivers/net/wireless/ath/wcn36xx/dxe.c
++++ b/drivers/net/wireless/ath/wcn36xx/dxe.c
+@@ -272,6 +272,21 @@ static int wcn36xx_dxe_enable_ch_int(struct wcn36xx *wcn, u16 wcn_ch)
+ return 0;
+ }
+
++static void wcn36xx_dxe_disable_ch_int(struct wcn36xx *wcn, u16 wcn_ch)
++{
++ int reg_data = 0;
++
++ wcn36xx_dxe_read_register(wcn,
++ WCN36XX_DXE_INT_MASK_REG,
++ &reg_data);
++
++ reg_data &= ~wcn_ch;
++
++ wcn36xx_dxe_write_register(wcn,
++ WCN36XX_DXE_INT_MASK_REG,
++ (int)reg_data);
++}
++
+ static int wcn36xx_dxe_fill_skb(struct device *dev,
+ struct wcn36xx_dxe_ctl *ctl,
+ gfp_t gfp)
+@@ -403,8 +418,21 @@ static void reap_tx_dxes(struct wcn36xx *wcn, struct wcn36xx_dxe_ch *ch)
+ dma_unmap_single(wcn->dev, ctl->desc->src_addr_l,
+ ctl->skb->len, DMA_TO_DEVICE);
+ info = IEEE80211_SKB_CB(ctl->skb);
+- if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) {
+- /* Keep frame until TX status comes */
++ if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
++ if (info->flags & IEEE80211_TX_CTL_NO_ACK) {
++ info->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED;
++ ieee80211_tx_status_irqsafe(wcn->hw, ctl->skb);
++ } else {
++ /* Wait for the TX ack indication or timeout... */
++ spin_lock(&wcn->dxe_lock);
++ if (WARN_ON(wcn->tx_ack_skb))
++ ieee80211_free_txskb(wcn->hw, wcn->tx_ack_skb);
++ wcn->tx_ack_skb = ctl->skb; /* Tracking ref */
++ mod_timer(&wcn->tx_ack_timer, jiffies + HZ / 10);
++ spin_unlock(&wcn->dxe_lock);
++ }
++ /* do not free, ownership transferred to mac80211 status cb */
++ } else {
+ ieee80211_free_txskb(wcn->hw, ctl->skb);
+ }
+
+@@ -426,7 +454,6 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev)
+ {
+ struct wcn36xx *wcn = (struct wcn36xx *)dev;
+ int int_src, int_reason;
+- bool transmitted = false;
+
+ wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_INT_SRC_RAW_REG, &int_src);
+
+@@ -466,7 +493,6 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev)
+ if (int_reason & (WCN36XX_CH_STAT_INT_DONE_MASK |
+ WCN36XX_CH_STAT_INT_ED_MASK)) {
+ reap_tx_dxes(wcn, &wcn->dxe_tx_h_ch);
+- transmitted = true;
+ }
+ }
+
+@@ -479,7 +505,6 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev)
+ WCN36XX_DXE_0_INT_CLR,
+ WCN36XX_INT_MASK_CHAN_TX_L);
+
+-
+ if (int_reason & WCN36XX_CH_STAT_INT_ERR_MASK ) {
+ wcn36xx_dxe_write_register(wcn,
+ WCN36XX_DXE_0_INT_ERR_CLR,
+@@ -507,25 +532,8 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev)
+ if (int_reason & (WCN36XX_CH_STAT_INT_DONE_MASK |
+ WCN36XX_CH_STAT_INT_ED_MASK)) {
+ reap_tx_dxes(wcn, &wcn->dxe_tx_l_ch);
+- transmitted = true;
+- }
+- }
+-
+- spin_lock(&wcn->dxe_lock);
+- if (wcn->tx_ack_skb && transmitted) {
+- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(wcn->tx_ack_skb);
+-
+- /* TX complete, no need to wait for 802.11 ack indication */
+- if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS &&
+- info->flags & IEEE80211_TX_CTL_NO_ACK) {
+- info->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED;
+- del_timer(&wcn->tx_ack_timer);
+- ieee80211_tx_status_irqsafe(wcn->hw, wcn->tx_ack_skb);
+- wcn->tx_ack_skb = NULL;
+- ieee80211_wake_queues(wcn->hw);
+ }
+ }
+- spin_unlock(&wcn->dxe_lock);
+
+ return IRQ_HANDLED;
+ }
+@@ -613,6 +621,10 @@ static int wcn36xx_rx_handle_packets(struct wcn36xx *wcn,
+ dxe = ctl->desc;
+
+ while (!(READ_ONCE(dxe->ctrl) & WCN36xx_DXE_CTRL_VLD)) {
++ /* do not read until we own DMA descriptor */
++ dma_rmb();
++
++ /* read/modify DMA descriptor */
+ skb = ctl->skb;
+ dma_addr = dxe->dst_addr_l;
+ ret = wcn36xx_dxe_fill_skb(wcn->dev, ctl, GFP_ATOMIC);
+@@ -623,9 +635,15 @@ static int wcn36xx_rx_handle_packets(struct wcn36xx *wcn,
+ dma_unmap_single(wcn->dev, dma_addr, WCN36XX_PKT_SIZE,
+ DMA_FROM_DEVICE);
+ wcn36xx_rx_skb(wcn, skb);
+- } /* else keep old skb not submitted and use it for rx DMA */
++ }
++ /* else keep old skb not submitted and reuse it for rx DMA
++ * (dropping the packet that it contained)
++ */
+
++ /* flush descriptor changes before re-marking as valid */
++ dma_wmb();
+ dxe->ctrl = ctrl;
++
+ ctl = ctl->next;
+ dxe = ctl->desc;
+ }
+@@ -866,7 +884,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn)
+ WCN36XX_DXE_WQ_TX_L);
+
+ wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_REG_CH_EN, &reg_data);
+- wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_L);
+
+ /***************************************/
+ /* Init descriptors for TX HIGH channel */
+@@ -890,9 +907,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn)
+
+ wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_REG_CH_EN, &reg_data);
+
+- /* Enable channel interrupts */
+- wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_H);
+-
+ /***************************************/
+ /* Init descriptors for RX LOW channel */
+ /***************************************/
+@@ -902,7 +916,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn)
+ goto out_err_rxl_ch;
+ }
+
+-
+ /* For RX we need to preallocated buffers */
+ wcn36xx_dxe_ch_alloc_skb(wcn, &wcn->dxe_rx_l_ch);
+
+@@ -925,9 +938,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn)
+ WCN36XX_DXE_REG_CTL_RX_L,
+ WCN36XX_DXE_CH_DEFAULT_CTL_RX_L);
+
+- /* Enable channel interrupts */
+- wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_L);
+-
+ /***************************************/
+ /* Init descriptors for RX HIGH channel */
+ /***************************************/
+@@ -959,15 +969,18 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn)
+ WCN36XX_DXE_REG_CTL_RX_H,
+ WCN36XX_DXE_CH_DEFAULT_CTL_RX_H);
+
+- /* Enable channel interrupts */
+- wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_H);
+-
+ ret = wcn36xx_dxe_request_irqs(wcn);
+ if (ret < 0)
+ goto out_err_irq;
+
+ timer_setup(&wcn->tx_ack_timer, wcn36xx_dxe_tx_timer, 0);
+
++ /* Enable channel interrupts */
++ wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_L);
++ wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_H);
++ wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_L);
++ wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_H);
++
+ return 0;
+
+ out_err_irq:
+@@ -984,6 +997,14 @@ out_err_txh_ch:
+
+ void wcn36xx_dxe_deinit(struct wcn36xx *wcn)
+ {
++ int reg_data = 0;
++
++ /* Disable channel interrupts */
++ wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_H);
++ wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_L);
++ wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_H);
++ wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_L);
++
+ free_irq(wcn->tx_irq, wcn);
+ free_irq(wcn->rx_irq, wcn);
+ del_timer(&wcn->tx_ack_timer);
+@@ -993,6 +1014,15 @@ void wcn36xx_dxe_deinit(struct wcn36xx *wcn)
+ wcn->tx_ack_skb = NULL;
+ }
+
++ /* Put the DXE block into reset before freeing memory */
++ reg_data = WCN36XX_DXE_REG_RESET;
++ wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_REG_CSR_RESET, reg_data);
++
+ wcn36xx_dxe_ch_free_skbs(wcn, &wcn->dxe_rx_l_ch);
+ wcn36xx_dxe_ch_free_skbs(wcn, &wcn->dxe_rx_h_ch);
++
++ wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_tx_l_ch);
++ wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_tx_h_ch);
++ wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_rx_l_ch);
++ wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_rx_h_ch);
+ }
+diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
+index 455143c4164ee..de3bca043c2b3 100644
+--- a/drivers/net/wireless/ath/wcn36xx/hal.h
++++ b/drivers/net/wireless/ath/wcn36xx/hal.h
+@@ -359,6 +359,8 @@ enum wcn36xx_hal_host_msg_type {
+ WCN36XX_HAL_START_SCAN_OFFLOAD_RSP = 205,
+ WCN36XX_HAL_STOP_SCAN_OFFLOAD_REQ = 206,
+ WCN36XX_HAL_STOP_SCAN_OFFLOAD_RSP = 207,
++ WCN36XX_HAL_UPDATE_CHANNEL_LIST_REQ = 208,
++ WCN36XX_HAL_UPDATE_CHANNEL_LIST_RSP = 209,
+ WCN36XX_HAL_SCAN_OFFLOAD_IND = 210,
+
+ WCN36XX_HAL_AVOID_FREQ_RANGE_IND = 233,
+@@ -1353,6 +1355,36 @@ struct wcn36xx_hal_stop_scan_offload_rsp_msg {
+ u32 status;
+ } __packed;
+
++#define WCN36XX_HAL_CHAN_REG1_MIN_PWR_MASK 0x000000ff
++#define WCN36XX_HAL_CHAN_REG1_MAX_PWR_MASK 0x0000ff00
++#define WCN36XX_HAL_CHAN_REG1_REG_PWR_MASK 0x00ff0000
++#define WCN36XX_HAL_CHAN_REG1_CLASS_ID_MASK 0xff000000
++#define WCN36XX_HAL_CHAN_REG2_ANT_GAIN_MASK 0x000000ff
++#define WCN36XX_HAL_CHAN_INFO_FLAG_PASSIVE BIT(7)
++#define WCN36XX_HAL_CHAN_INFO_FLAG_DFS BIT(10)
++#define WCN36XX_HAL_CHAN_INFO_FLAG_HT BIT(11)
++#define WCN36XX_HAL_CHAN_INFO_FLAG_VHT BIT(12)
++#define WCN36XX_HAL_CHAN_INFO_PHY_11A 0
++#define WCN36XX_HAL_CHAN_INFO_PHY_11BG 1
++#define WCN36XX_HAL_DEFAULT_ANT_GAIN 6
++#define WCN36XX_HAL_DEFAULT_MIN_POWER 6
++
++struct wcn36xx_hal_channel_param {
++ u32 mhz;
++ u32 band_center_freq1;
++ u32 band_center_freq2;
++ u32 channel_info;
++ u32 reg_info_1;
++ u32 reg_info_2;
++} __packed;
++
++struct wcn36xx_hal_update_channel_list_req_msg {
++ struct wcn36xx_hal_msg_header header;
++
++ u8 num_channel;
++ struct wcn36xx_hal_channel_param channels[80];
++} __packed;
++
+ enum wcn36xx_hal_rate_index {
+ HW_RATE_INDEX_1MBPS = 0x82,
+ HW_RATE_INDEX_2MBPS = 0x84,
+diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
+index ec913ec991f3f..d51a783301359 100644
+--- a/drivers/net/wireless/ath/wcn36xx/main.c
++++ b/drivers/net/wireless/ath/wcn36xx/main.c
+@@ -135,7 +135,9 @@ static struct ieee80211_supported_band wcn_band_2ghz = {
+ .cap = IEEE80211_HT_CAP_GRN_FLD |
+ IEEE80211_HT_CAP_SGI_20 |
+ IEEE80211_HT_CAP_DSSSCCK40 |
+- IEEE80211_HT_CAP_LSIG_TXOP_PROT,
++ IEEE80211_HT_CAP_LSIG_TXOP_PROT |
++ IEEE80211_HT_CAP_SGI_40 |
++ IEEE80211_HT_CAP_SUP_WIDTH_20_40,
+ .ht_supported = true,
+ .ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K,
+ .ampdu_density = IEEE80211_HT_MPDU_DENSITY_16,
+@@ -398,6 +400,7 @@ static void wcn36xx_change_opchannel(struct wcn36xx *wcn, int ch)
+ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
+ {
+ struct wcn36xx *wcn = hw->priv;
++ int ret;
+
+ wcn36xx_dbg(WCN36XX_DBG_MAC, "mac config changed 0x%08x\n", changed);
+
+@@ -413,17 +416,31 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
+ * want to receive/transmit regular data packets, then
+ * simply stop the scan session and exit PS mode.
+ */
+- wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN,
+- wcn->sw_scan_vif);
+- wcn->sw_scan_channel = 0;
++ if (wcn->sw_scan_channel)
++ wcn36xx_smd_end_scan(wcn, wcn->sw_scan_channel);
++ if (wcn->sw_scan_init) {
++ wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN,
++ wcn->sw_scan_vif);
++ }
+ } else if (wcn->sw_scan) {
+ /* A scan is ongoing, do not change the operating
+ * channel, but start a scan session on the channel.
+ */
+- wcn36xx_smd_init_scan(wcn, HAL_SYS_MODE_SCAN,
+- wcn->sw_scan_vif);
++ if (wcn->sw_scan_channel)
++ wcn36xx_smd_end_scan(wcn, wcn->sw_scan_channel);
++ if (!wcn->sw_scan_init) {
++ /* This can fail if we are unable to notify the
++ * operating channel.
++ */
++ ret = wcn36xx_smd_init_scan(wcn,
++ HAL_SYS_MODE_SCAN,
++ wcn->sw_scan_vif);
++ if (ret) {
++ mutex_unlock(&wcn->conf_mutex);
++ return -EIO;
++ }
++ }
+ wcn36xx_smd_start_scan(wcn, ch);
+- wcn->sw_scan_channel = ch;
+ } else {
+ wcn36xx_change_opchannel(wcn, ch);
+ }
+@@ -569,12 +586,14 @@ static int wcn36xx_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
+ if (IEEE80211_KEY_FLAG_PAIRWISE & key_conf->flags) {
+ sta_priv->is_data_encrypted = true;
+ /* Reconfigure bss with encrypt_type */
+- if (NL80211_IFTYPE_STATION == vif->type)
++ if (NL80211_IFTYPE_STATION == vif->type) {
+ wcn36xx_smd_config_bss(wcn,
+ vif,
+ sta,
+ sta->addr,
+ true);
++ wcn36xx_smd_config_sta(wcn, vif, sta);
++ }
+
+ wcn36xx_smd_set_stakey(wcn,
+ vif_priv->encrypt_type,
+@@ -604,15 +623,6 @@ static int wcn36xx_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
+ }
+ }
+ }
+- /* FIXME: Only enable bmps support when encryption is enabled.
+- * For any reasons, when connected to open/no-security BSS,
+- * the wcn36xx controller in bmps mode does not forward
+- * 'wake-up' beacons despite AP sends DTIM with station AID.
+- * It could be due to a firmware issue or to the way driver
+- * configure the station.
+- */
+- if (vif->type == NL80211_IFTYPE_STATION)
+- vif_priv->allow_bmps = true;
+ break;
+ case DISABLE_KEY:
+ if (!(IEEE80211_KEY_FLAG_PAIRWISE & key_conf->flags)) {
+@@ -676,6 +686,7 @@ static int wcn36xx_hw_scan(struct ieee80211_hw *hw,
+
+ mutex_unlock(&wcn->scan_lock);
+
++ wcn36xx_smd_update_channel_list(wcn, &hw_req->req);
+ return wcn36xx_smd_start_hw_scan(wcn, vif, &hw_req->req);
+ }
+
+@@ -717,7 +728,12 @@ static void wcn36xx_sw_scan_complete(struct ieee80211_hw *hw,
+ struct wcn36xx *wcn = hw->priv;
+
+ /* ensure that any scan session is finished */
+- wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN, wcn->sw_scan_vif);
++ if (wcn->sw_scan_channel)
++ wcn36xx_smd_end_scan(wcn, wcn->sw_scan_channel);
++ if (wcn->sw_scan_init) {
++ wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN,
++ wcn->sw_scan_vif);
++ }
+ wcn->sw_scan = false;
+ wcn->sw_scan_opchannel = 0;
+ }
+@@ -913,7 +929,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
+ vif->addr,
+ bss_conf->aid);
+ vif_priv->sta_assoc = false;
+- vif_priv->allow_bmps = false;
+ wcn36xx_smd_set_link_st(wcn,
+ bss_conf->bssid,
+ vif->addr,
+@@ -1123,6 +1138,13 @@ static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
+ goto out;
+ ret = wcn36xx_smd_wlan_host_suspend_ind(wcn);
+ }
++
++ /* Disable IRQ, we don't want to handle any packet before mac80211 is
++ * resumed and ready to receive packets.
++ */
++ disable_irq(wcn->tx_irq);
++ disable_irq(wcn->rx_irq);
++
+ out:
+ mutex_unlock(&wcn->conf_mutex);
+ return ret;
+@@ -1145,6 +1167,10 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
+ wcn36xx_smd_ipv6_ns_offload(wcn, vif, false);
+ wcn36xx_smd_arp_offload(wcn, vif, false);
+ }
++
++ enable_irq(wcn->tx_irq);
++ enable_irq(wcn->rx_irq);
++
+ mutex_unlock(&wcn->conf_mutex);
+
+ return 0;
+@@ -1338,7 +1364,6 @@ static int wcn36xx_init_ieee80211(struct wcn36xx *wcn)
+ ieee80211_hw_set(wcn->hw, HAS_RATE_CONTROL);
+ ieee80211_hw_set(wcn->hw, SINGLE_SCAN_ON_ALL_BANDS);
+ ieee80211_hw_set(wcn->hw, REPORTS_TX_ACK_STATUS);
+- ieee80211_hw_set(wcn->hw, CONNECTION_MONITOR);
+
+ wcn->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
+ BIT(NL80211_IFTYPE_AP) |
+@@ -1449,6 +1474,9 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn,
+ if (iris_node) {
+ if (of_device_is_compatible(iris_node, "qcom,wcn3620"))
+ wcn->rf_id = RF_IRIS_WCN3620;
++ if (of_device_is_compatible(iris_node, "qcom,wcn3660") ||
++ of_device_is_compatible(iris_node, "qcom,wcn3660b"))
++ wcn->rf_id = RF_IRIS_WCN3660;
+ if (of_device_is_compatible(iris_node, "qcom,wcn3680"))
+ wcn->rf_id = RF_IRIS_WCN3680;
+ of_node_put(iris_node);
+diff --git a/drivers/net/wireless/ath/wcn36xx/pmc.c b/drivers/net/wireless/ath/wcn36xx/pmc.c
+index 2d0780fefd477..2936aaf532738 100644
+--- a/drivers/net/wireless/ath/wcn36xx/pmc.c
++++ b/drivers/net/wireless/ath/wcn36xx/pmc.c
+@@ -23,10 +23,7 @@ int wcn36xx_pmc_enter_bmps_state(struct wcn36xx *wcn,
+ {
+ int ret = 0;
+ struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+-
+- if (!vif_priv->allow_bmps)
+- return -ENOTSUPP;
+-
++ /* TODO: Make sure the TX chain clean */
+ ret = wcn36xx_smd_enter_bmps(wcn, vif);
+ if (!ret) {
+ wcn36xx_dbg(WCN36XX_DBG_PMC, "Entered BMPS\n");
+diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
+index 57fa857b290b7..c056fae1d6418 100644
+--- a/drivers/net/wireless/ath/wcn36xx/smd.c
++++ b/drivers/net/wireless/ath/wcn36xx/smd.c
+@@ -16,6 +16,7 @@
+
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
++#include <linux/bitfield.h>
+ #include <linux/etherdevice.h>
+ #include <linux/firmware.h>
+ #include <linux/bitops.h>
+@@ -720,6 +721,7 @@ int wcn36xx_smd_init_scan(struct wcn36xx *wcn, enum wcn36xx_hal_sys_mode mode,
+ wcn36xx_err("hal_init_scan response failed err=%d\n", ret);
+ goto out;
+ }
++ wcn->sw_scan_init = true;
+ out:
+ mutex_unlock(&wcn->hal_mutex);
+ return ret;
+@@ -750,6 +752,7 @@ int wcn36xx_smd_start_scan(struct wcn36xx *wcn, u8 scan_channel)
+ wcn36xx_err("hal_start_scan response failed err=%d\n", ret);
+ goto out;
+ }
++ wcn->sw_scan_channel = scan_channel;
+ out:
+ mutex_unlock(&wcn->hal_mutex);
+ return ret;
+@@ -780,6 +783,7 @@ int wcn36xx_smd_end_scan(struct wcn36xx *wcn, u8 scan_channel)
+ wcn36xx_err("hal_end_scan response failed err=%d\n", ret);
+ goto out;
+ }
++ wcn->sw_scan_channel = 0;
+ out:
+ mutex_unlock(&wcn->hal_mutex);
+ return ret;
+@@ -821,6 +825,7 @@ int wcn36xx_smd_finish_scan(struct wcn36xx *wcn,
+ wcn36xx_err("hal_finish_scan response failed err=%d\n", ret);
+ goto out;
+ }
++ wcn->sw_scan_init = false;
+ out:
+ mutex_unlock(&wcn->hal_mutex);
+ return ret;
+@@ -927,6 +932,86 @@ out:
+ return ret;
+ }
+
++int wcn36xx_smd_update_channel_list(struct wcn36xx *wcn, struct cfg80211_scan_request *req)
++{
++ struct wcn36xx_hal_update_channel_list_req_msg *msg_body;
++ int ret, i;
++
++ msg_body = kzalloc(sizeof(*msg_body), GFP_KERNEL);
++ if (!msg_body)
++ return -ENOMEM;
++
++ INIT_HAL_MSG((*msg_body), WCN36XX_HAL_UPDATE_CHANNEL_LIST_REQ);
++
++ msg_body->num_channel = min_t(u8, req->n_channels, ARRAY_SIZE(msg_body->channels));
++ for (i = 0; i < msg_body->num_channel; i++) {
++ struct wcn36xx_hal_channel_param *param = &msg_body->channels[i];
++ u32 min_power = WCN36XX_HAL_DEFAULT_MIN_POWER;
++ u32 ant_gain = WCN36XX_HAL_DEFAULT_ANT_GAIN;
++
++ param->mhz = req->channels[i]->center_freq;
++ param->band_center_freq1 = req->channels[i]->center_freq;
++ param->band_center_freq2 = 0;
++
++ if (req->channels[i]->flags & IEEE80211_CHAN_NO_IR)
++ param->channel_info |= WCN36XX_HAL_CHAN_INFO_FLAG_PASSIVE;
++
++ if (req->channels[i]->flags & IEEE80211_CHAN_RADAR)
++ param->channel_info |= WCN36XX_HAL_CHAN_INFO_FLAG_DFS;
++
++ if (req->channels[i]->band == NL80211_BAND_5GHZ) {
++ param->channel_info |= WCN36XX_HAL_CHAN_INFO_FLAG_HT;
++ param->channel_info |= WCN36XX_HAL_CHAN_INFO_FLAG_VHT;
++ param->channel_info |= WCN36XX_HAL_CHAN_INFO_PHY_11A;
++ } else {
++ param->channel_info |= WCN36XX_HAL_CHAN_INFO_PHY_11BG;
++ }
++
++ if (min_power > req->channels[i]->max_power)
++ min_power = req->channels[i]->max_power;
++
++ if (req->channels[i]->max_antenna_gain)
++ ant_gain = req->channels[i]->max_antenna_gain;
++
++ u32p_replace_bits(&param->reg_info_1, min_power,
++ WCN36XX_HAL_CHAN_REG1_MIN_PWR_MASK);
++ u32p_replace_bits(&param->reg_info_1, req->channels[i]->max_power,
++ WCN36XX_HAL_CHAN_REG1_MAX_PWR_MASK);
++ u32p_replace_bits(&param->reg_info_1, req->channels[i]->max_reg_power,
++ WCN36XX_HAL_CHAN_REG1_REG_PWR_MASK);
++ u32p_replace_bits(&param->reg_info_1, 0,
++ WCN36XX_HAL_CHAN_REG1_CLASS_ID_MASK);
++ u32p_replace_bits(&param->reg_info_2, ant_gain,
++ WCN36XX_HAL_CHAN_REG2_ANT_GAIN_MASK);
++
++ wcn36xx_dbg(WCN36XX_DBG_HAL,
++ "%s: freq=%u, channel_info=%08x, reg_info1=%08x, reg_info2=%08x\n",
++ __func__, param->mhz, param->channel_info, param->reg_info_1,
++ param->reg_info_2);
++ }
++
++ mutex_lock(&wcn->hal_mutex);
++
++ PREPARE_HAL_BUF(wcn->hal_buf, (*msg_body));
++
++ ret = wcn36xx_smd_send_and_wait(wcn, msg_body->header.len);
++ if (ret) {
++ wcn36xx_err("Sending hal_update_channel_list failed\n");
++ goto out;
++ }
++
++ ret = wcn36xx_smd_rsp_status_check(wcn->hal_buf, wcn->hal_rsp_len);
++ if (ret) {
++ wcn36xx_err("hal_update_channel_list response failed err=%d\n", ret);
++ goto out;
++ }
++
++out:
++ kfree(msg_body);
++ mutex_unlock(&wcn->hal_mutex);
++ return ret;
++}
++
+ static int wcn36xx_smd_switch_channel_rsp(void *buf, size_t len)
+ {
+ struct wcn36xx_hal_switch_channel_rsp_msg *rsp;
+@@ -2594,7 +2679,7 @@ static int wcn36xx_smd_missed_beacon_ind(struct wcn36xx *wcn,
+ wcn36xx_dbg(WCN36XX_DBG_HAL, "beacon missed bss_index %d\n",
+ tmp->bss_index);
+ vif = wcn36xx_priv_to_vif(tmp);
+- ieee80211_connection_loss(vif);
++ ieee80211_beacon_loss(vif);
+ }
+ return 0;
+ }
+@@ -2609,7 +2694,7 @@ static int wcn36xx_smd_missed_beacon_ind(struct wcn36xx *wcn,
+ wcn36xx_dbg(WCN36XX_DBG_HAL, "beacon missed bss_index %d\n",
+ rsp->bss_index);
+ vif = wcn36xx_priv_to_vif(tmp);
+- ieee80211_connection_loss(vif);
++ ieee80211_beacon_loss(vif);
+ return 0;
+ }
+ }
+@@ -2623,30 +2708,52 @@ static int wcn36xx_smd_delete_sta_context_ind(struct wcn36xx *wcn,
+ size_t len)
+ {
+ struct wcn36xx_hal_delete_sta_context_ind_msg *rsp = buf;
+- struct wcn36xx_vif *tmp;
++ struct wcn36xx_vif *vif_priv;
++ struct ieee80211_vif *vif;
++ struct ieee80211_bss_conf *bss_conf;
+ struct ieee80211_sta *sta;
++ bool found = false;
+
+ if (len != sizeof(*rsp)) {
+ wcn36xx_warn("Corrupted delete sta indication\n");
+ return -EIO;
+ }
+
+- wcn36xx_dbg(WCN36XX_DBG_HAL, "delete station indication %pM index %d\n",
+- rsp->addr2, rsp->sta_id);
++ wcn36xx_dbg(WCN36XX_DBG_HAL,
++ "delete station indication %pM index %d reason %d\n",
++ rsp->addr2, rsp->sta_id, rsp->reason_code);
+
+- list_for_each_entry(tmp, &wcn->vif_list, list) {
++ list_for_each_entry(vif_priv, &wcn->vif_list, list) {
+ rcu_read_lock();
+- sta = ieee80211_find_sta(wcn36xx_priv_to_vif(tmp), rsp->addr2);
+- if (sta)
+- ieee80211_report_low_ack(sta, 0);
++ vif = wcn36xx_priv_to_vif(vif_priv);
++
++ if (vif->type == NL80211_IFTYPE_STATION) {
++ /* We could call ieee80211_find_sta too, but checking
++ * bss_conf is clearer.
++ */
++ bss_conf = &vif->bss_conf;
++ if (vif_priv->sta_assoc &&
++ !memcmp(bss_conf->bssid, rsp->addr2, ETH_ALEN)) {
++ found = true;
++ wcn36xx_dbg(WCN36XX_DBG_HAL,
++ "connection loss bss_index %d\n",
++ vif_priv->bss_index);
++ ieee80211_connection_loss(vif);
++ }
++ } else {
++ sta = ieee80211_find_sta(vif, rsp->addr2);
++ if (sta) {
++ found = true;
++ ieee80211_report_low_ack(sta, 0);
++ }
++ }
++
+ rcu_read_unlock();
+- if (sta)
++ if (found)
+ return 0;
+ }
+
+- wcn36xx_warn("STA with addr %pM and index %d not found\n",
+- rsp->addr2,
+- rsp->sta_id);
++ wcn36xx_warn("BSS or STA with addr %pM not found\n", rsp->addr2);
+ return -ENOENT;
+ }
+
+@@ -3060,6 +3167,7 @@ int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
+ case WCN36XX_HAL_GTK_OFFLOAD_RSP:
+ case WCN36XX_HAL_GTK_OFFLOAD_GETINFO_RSP:
+ case WCN36XX_HAL_HOST_RESUME_RSP:
++ case WCN36XX_HAL_UPDATE_CHANNEL_LIST_RSP:
+ memcpy(wcn->hal_buf, buf, len);
+ wcn->hal_rsp_len = len;
+ complete(&wcn->hal_rsp_compl);
+diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
+index d8bded03945d4..d3774568d885e 100644
+--- a/drivers/net/wireless/ath/wcn36xx/smd.h
++++ b/drivers/net/wireless/ath/wcn36xx/smd.h
+@@ -70,6 +70,7 @@ int wcn36xx_smd_update_scan_params(struct wcn36xx *wcn, u8 *channels, size_t cha
+ int wcn36xx_smd_start_hw_scan(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+ struct cfg80211_scan_request *req);
+ int wcn36xx_smd_stop_hw_scan(struct wcn36xx *wcn);
++int wcn36xx_smd_update_channel_list(struct wcn36xx *wcn, struct cfg80211_scan_request *req);
+ int wcn36xx_smd_add_sta_self(struct wcn36xx *wcn, struct ieee80211_vif *vif);
+ int wcn36xx_smd_delete_sta_self(struct wcn36xx *wcn, u8 *addr);
+ int wcn36xx_smd_delete_sta(struct wcn36xx *wcn, u8 sta_index);
+diff --git a/drivers/net/wireless/ath/wcn36xx/txrx.c b/drivers/net/wireless/ath/wcn36xx/txrx.c
+index cab196bb38cd4..f33e7228a1010 100644
+--- a/drivers/net/wireless/ath/wcn36xx/txrx.c
++++ b/drivers/net/wireless/ath/wcn36xx/txrx.c
+@@ -31,6 +31,13 @@ struct wcn36xx_rate {
+ enum rate_info_bw bw;
+ };
+
++/* Buffer descriptor rx_ch field is limited to 5-bit (4+1), a mapping is used
++ * for 11A Channels.
++ */
++static const u8 ab_rx_ch_map[] = { 36, 40, 44, 48, 52, 56, 60, 64, 100, 104,
++ 108, 112, 116, 120, 124, 128, 132, 136, 140,
++ 149, 153, 157, 161, 165, 144 };
++
+ static const struct wcn36xx_rate wcn36xx_rate_table[] = {
+ /* 11b rates */
+ { 10, 0, RX_ENC_LEGACY, 0, RATE_INFO_BW_20 },
+@@ -230,7 +237,6 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb)
+ const struct wcn36xx_rate *rate;
+ struct ieee80211_hdr *hdr;
+ struct wcn36xx_rx_bd *bd;
+- struct ieee80211_supported_band *sband;
+ u16 fc, sn;
+
+ /*
+@@ -252,8 +258,6 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb)
+ fc = __le16_to_cpu(hdr->frame_control);
+ sn = IEEE80211_SEQ_TO_SN(__le16_to_cpu(hdr->seq_ctrl));
+
+- status.freq = WCN36XX_CENTER_FREQ(wcn);
+- status.band = WCN36XX_BAND(wcn);
+ status.mactime = 10;
+ status.signal = -get_rssi0(bd);
+ status.antenna = 1;
+@@ -265,18 +269,36 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb)
+
+ wcn36xx_dbg(WCN36XX_DBG_RX, "status.flags=%x\n", status.flag);
+
++ if (bd->scan_learn) {
++ /* If packet originate from hardware scanning, extract the
++ * band/channel from bd descriptor.
++ */
++ u8 hwch = (bd->reserved0 << 4) + bd->rx_ch;
++
++ if (bd->rf_band != 1 && hwch <= sizeof(ab_rx_ch_map) && hwch >= 1) {
++ status.band = NL80211_BAND_5GHZ;
++ status.freq = ieee80211_channel_to_frequency(ab_rx_ch_map[hwch - 1],
++ status.band);
++ } else {
++ status.band = NL80211_BAND_2GHZ;
++ status.freq = ieee80211_channel_to_frequency(hwch, status.band);
++ }
++ } else {
++ status.band = WCN36XX_BAND(wcn);
++ status.freq = WCN36XX_CENTER_FREQ(wcn);
++ }
++
+ if (bd->rate_id < ARRAY_SIZE(wcn36xx_rate_table)) {
+ rate = &wcn36xx_rate_table[bd->rate_id];
+ status.encoding = rate->encoding;
+ status.enc_flags = rate->encoding_flags;
+ status.bw = rate->bw;
+ status.rate_idx = rate->mcs_or_legacy_index;
+- sband = wcn->hw->wiphy->bands[status.band];
+ status.nss = 1;
+
+ if (status.band == NL80211_BAND_5GHZ &&
+ status.encoding == RX_ENC_LEGACY &&
+- status.rate_idx >= sband->n_bitrates) {
++ status.rate_idx >= 4) {
+ /* no dsss rates in 5Ghz rates table */
+ status.rate_idx -= 4;
+ }
+@@ -321,8 +343,6 @@ static void wcn36xx_set_tx_pdu(struct wcn36xx_tx_bd *bd,
+ bd->pdu.mpdu_header_off;
+ bd->pdu.mpdu_len = len;
+ bd->pdu.tid = tid;
+- /* Use seq number generated by mac80211 */
+- bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_HOST;
+ }
+
+ static inline struct wcn36xx_vif *get_vif_by_addr(struct wcn36xx *wcn,
+@@ -419,6 +439,9 @@ static void wcn36xx_set_tx_data(struct wcn36xx_tx_bd *bd,
+ tid = ieee80211_get_tid(hdr);
+ /* TID->QID is one-to-one mapping */
+ bd->queue_id = tid;
++ bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_DPU_QOS;
++ } else {
++ bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_DPU_NON_QOS;
+ }
+
+ if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT ||
+@@ -429,6 +452,9 @@ static void wcn36xx_set_tx_data(struct wcn36xx_tx_bd *bd,
+ if (ieee80211_is_any_nullfunc(hdr->frame_control)) {
+ /* Don't use a regular queue for null packet (no ampdu) */
+ bd->queue_id = WCN36XX_TX_U_WQ_ID;
++ bd->bd_rate = WCN36XX_BD_RATE_CTRL;
++ if (ieee80211_is_qos_nullfunc(hdr->frame_control))
++ bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_HOST;
+ }
+
+ if (bcast) {
+@@ -488,6 +514,8 @@ static void wcn36xx_set_tx_mgmt(struct wcn36xx_tx_bd *bd,
+ bd->queue_id = WCN36XX_TX_U_WQ_ID;
+ *vif_priv = __vif_priv;
+
++ bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_DPU_NON_QOS;
++
+ wcn36xx_set_tx_pdu(bd,
+ ieee80211_is_data_qos(hdr->frame_control) ?
+ sizeof(struct ieee80211_qos_hdr) :
+@@ -502,10 +530,11 @@ int wcn36xx_start_tx(struct wcn36xx *wcn,
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ struct wcn36xx_vif *vif_priv = NULL;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+- unsigned long flags;
+ bool is_low = ieee80211_is_data(hdr->frame_control);
+ bool bcast = is_broadcast_ether_addr(hdr->addr1) ||
+ is_multicast_ether_addr(hdr->addr1);
++ bool ack_ind = (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) &&
++ !(info->flags & IEEE80211_TX_CTL_NO_ACK);
+ struct wcn36xx_tx_bd bd;
+ int ret;
+
+@@ -521,30 +550,16 @@ int wcn36xx_start_tx(struct wcn36xx *wcn,
+
+ bd.dpu_rf = WCN36XX_BMU_WQ_TX;
+
+- if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
++ if (unlikely(ack_ind)) {
+ wcn36xx_dbg(WCN36XX_DBG_DXE, "TX_ACK status requested\n");
+
+- spin_lock_irqsave(&wcn->dxe_lock, flags);
+- if (wcn->tx_ack_skb) {
+- spin_unlock_irqrestore(&wcn->dxe_lock, flags);
+- wcn36xx_warn("tx_ack_skb already set\n");
+- return -EINVAL;
+- }
+-
+- wcn->tx_ack_skb = skb;
+- spin_unlock_irqrestore(&wcn->dxe_lock, flags);
+-
+ /* Only one at a time is supported by fw. Stop the TX queues
+ * until the ack status gets back.
+ */
+ ieee80211_stop_queues(wcn->hw);
+
+- /* TX watchdog if no TX irq or ack indication received */
+- mod_timer(&wcn->tx_ack_timer, jiffies + HZ / 10);
+-
+ /* Request ack indication from the firmware */
+- if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
+- bd.tx_comp = 1;
++ bd.tx_comp = 1;
+ }
+
+ /* Data frames served first*/
+@@ -558,14 +573,8 @@ int wcn36xx_start_tx(struct wcn36xx *wcn,
+ bd.tx_bd_sign = 0xbdbdbdbd;
+
+ ret = wcn36xx_dxe_tx_frame(wcn, vif_priv, &bd, skb, is_low);
+- if (ret && (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) {
+- /* If the skb has not been transmitted,
+- * don't keep a reference to it.
+- */
+- spin_lock_irqsave(&wcn->dxe_lock, flags);
+- wcn->tx_ack_skb = NULL;
+- spin_unlock_irqrestore(&wcn->dxe_lock, flags);
+-
++ if (unlikely(ret && ack_ind)) {
++ /* If the skb has not been transmitted, resume TX queue */
+ ieee80211_wake_queues(wcn->hw);
+ }
+
+diff --git a/drivers/net/wireless/ath/wcn36xx/txrx.h b/drivers/net/wireless/ath/wcn36xx/txrx.h
+index 032216e82b2be..b54311ffde9c5 100644
+--- a/drivers/net/wireless/ath/wcn36xx/txrx.h
++++ b/drivers/net/wireless/ath/wcn36xx/txrx.h
+@@ -110,7 +110,8 @@ struct wcn36xx_rx_bd {
+ /* 0x44 */
+ u32 exp_seq_num:12;
+ u32 cur_seq_num:12;
+- u32 fr_type_subtype:8;
++ u32 rf_band:2;
++ u32 fr_type_subtype:6;
+
+ /* 0x48 */
+ u32 msdu_size:16;
+diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+index add6e527e8330..597f740f3c256 100644
+--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
++++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+@@ -97,6 +97,7 @@ enum wcn36xx_ampdu_state {
+
+ #define RF_UNKNOWN 0x0000
+ #define RF_IRIS_WCN3620 0x3620
++#define RF_IRIS_WCN3660 0x3660
+ #define RF_IRIS_WCN3680 0x3680
+
+ static inline void buff_to_be(u32 *buf, size_t len)
+@@ -128,7 +129,6 @@ struct wcn36xx_vif {
+ enum wcn36xx_hal_bss_type bss_type;
+
+ /* Power management */
+- bool allow_bmps;
+ enum wcn36xx_power_state pw_state;
+
+ u8 bss_index;
+@@ -247,6 +247,7 @@ struct wcn36xx {
+ struct cfg80211_scan_request *scan_req;
+ bool sw_scan;
+ u8 sw_scan_opchannel;
++ bool sw_scan_init;
+ u8 sw_scan_channel;
+ struct ieee80211_vif *sw_scan_vif;
+ struct mutex scan_lock;
+diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
+index 4c944e595978b..ac7787e1a7f61 100644
+--- a/drivers/net/wireless/ath/wil6210/debugfs.c
++++ b/drivers/net/wireless/ath/wil6210/debugfs.c
+@@ -1010,20 +1010,14 @@ static ssize_t wil_write_file_wmi(struct file *file, const char __user *buf,
+ void *cmd;
+ int cmdlen = len - sizeof(struct wmi_cmd_hdr);
+ u16 cmdid;
+- int rc, rc1;
++ int rc1;
+
+- if (cmdlen < 0)
++ if (cmdlen < 0 || *ppos != 0)
+ return -EINVAL;
+
+- wmi = kmalloc(len, GFP_KERNEL);
+- if (!wmi)
+- return -ENOMEM;
+-
+- rc = simple_write_to_buffer(wmi, len, ppos, buf, len);
+- if (rc < 0) {
+- kfree(wmi);
+- return rc;
+- }
++ wmi = memdup_user(buf, len);
++ if (IS_ERR(wmi))
++ return PTR_ERR(wmi);
+
+ cmd = (cmdlen > 0) ? &wmi[1] : NULL;
+ cmdid = le16_to_cpu(wmi->command_id);
+@@ -1033,7 +1027,7 @@ static ssize_t wil_write_file_wmi(struct file *file, const char __user *buf,
+
+ wil_info(wil, "0x%04x[%d] -> %d\n", cmdid, cmdlen, rc1);
+
+- return rc;
++ return len;
+ }
+
+ static const struct file_operations fops_wmi = {
+diff --git a/drivers/net/wireless/atmel/atmel_cs.c b/drivers/net/wireless/atmel/atmel_cs.c
+index 453bb84cb3386..58bba9875d366 100644
+--- a/drivers/net/wireless/atmel/atmel_cs.c
++++ b/drivers/net/wireless/atmel/atmel_cs.c
+@@ -72,6 +72,7 @@ struct local_info {
+ static int atmel_probe(struct pcmcia_device *p_dev)
+ {
+ struct local_info *local;
++ int ret;
+
+ dev_dbg(&p_dev->dev, "atmel_attach()\n");
+
+@@ -82,8 +83,16 @@ static int atmel_probe(struct pcmcia_device *p_dev)
+
+ p_dev->priv = local;
+
+- return atmel_config(p_dev);
+-} /* atmel_attach */
++ ret = atmel_config(p_dev);
++ if (ret)
++ goto err_free_priv;
++
++ return 0;
++
++err_free_priv:
++ kfree(p_dev->priv);
++ return ret;
++}
+
+ static void atmel_detach(struct pcmcia_device *link)
+ {
+diff --git a/drivers/net/wireless/broadcom/b43/b43.h b/drivers/net/wireless/broadcom/b43/b43.h
+index 9fc7c088a539e..67b4bac048e58 100644
+--- a/drivers/net/wireless/broadcom/b43/b43.h
++++ b/drivers/net/wireless/broadcom/b43/b43.h
+@@ -651,7 +651,7 @@ struct b43_iv {
+ union {
+ __be16 d16;
+ __be32 d32;
+- } data __packed;
++ } __packed data;
+ } __packed;
+
+
+diff --git a/drivers/net/wireless/broadcom/b43/phy_g.c b/drivers/net/wireless/broadcom/b43/phy_g.c
+index d5a1a5c582366..ac72ca39e409b 100644
+--- a/drivers/net/wireless/broadcom/b43/phy_g.c
++++ b/drivers/net/wireless/broadcom/b43/phy_g.c
+@@ -2297,7 +2297,7 @@ static u8 b43_gphy_aci_scan(struct b43_wldev *dev)
+ b43_phy_mask(dev, B43_PHY_G_CRS, 0x7FFF);
+ b43_set_all_gains(dev, 3, 8, 1);
+
+- start = (channel - 5 > 0) ? channel - 5 : 1;
++ start = (channel > 5) ? channel - 5 : 1;
+ end = (channel + 5 < 14) ? channel + 5 : 13;
+
+ for (i = start; i <= end; i++) {
+diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c
+index cf3ccf4ddfe72..aa5c994656749 100644
+--- a/drivers/net/wireless/broadcom/b43/phy_n.c
++++ b/drivers/net/wireless/broadcom/b43/phy_n.c
+@@ -582,7 +582,7 @@ static void b43_nphy_adjust_lna_gain_table(struct b43_wldev *dev)
+ u16 data[4];
+ s16 gain[2];
+ u16 minmax[2];
+- static const u16 lna_gain[4] = { -2, 10, 19, 25 };
++ static const s16 lna_gain[4] = { -2, 10, 19, 25 };
+
+ if (nphy->hang_avoid)
+ b43_nphy_stay_in_carrier_search(dev, 1);
+diff --git a/drivers/net/wireless/broadcom/b43legacy/b43legacy.h b/drivers/net/wireless/broadcom/b43legacy/b43legacy.h
+index 6b0cec467938f..f49365d14619f 100644
+--- a/drivers/net/wireless/broadcom/b43legacy/b43legacy.h
++++ b/drivers/net/wireless/broadcom/b43legacy/b43legacy.h
+@@ -379,7 +379,7 @@ struct b43legacy_iv {
+ union {
+ __be16 d16;
+ __be32 d32;
+- } data __packed;
++ } __packed data;
+ } __packed;
+
+ #define B43legacy_PHYMODE(phytype) (1 << (phytype))
+diff --git a/drivers/net/wireless/broadcom/b43legacy/phy.c b/drivers/net/wireless/broadcom/b43legacy/phy.c
+index 05404fbd1e70b..c1395e622759e 100644
+--- a/drivers/net/wireless/broadcom/b43legacy/phy.c
++++ b/drivers/net/wireless/broadcom/b43legacy/phy.c
+@@ -1123,7 +1123,7 @@ void b43legacy_phy_lo_b_measure(struct b43legacy_wldev *dev)
+ struct b43legacy_phy *phy = &dev->phy;
+ u16 regstack[12] = { 0 };
+ u16 mls;
+- u16 fval;
++ s16 fval;
+ int i;
+ int j;
+
+diff --git a/drivers/net/wireless/broadcom/b43legacy/radio.c b/drivers/net/wireless/broadcom/b43legacy/radio.c
+index 06891b4f837b9..fdf78c10a05c2 100644
+--- a/drivers/net/wireless/broadcom/b43legacy/radio.c
++++ b/drivers/net/wireless/broadcom/b43legacy/radio.c
+@@ -283,7 +283,7 @@ u8 b43legacy_radio_aci_scan(struct b43legacy_wldev *dev)
+ & 0x7FFF);
+ b43legacy_set_all_gains(dev, 3, 8, 1);
+
+- start = (channel - 5 > 0) ? channel - 5 : 1;
++ start = (channel > 5) ? channel - 5 : 1;
+ end = (channel + 5 < 14) ? channel + 5 : 13;
+
+ for (i = start; i <= end; i++) {
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+index 9db12ffd2ff80..b14c54da56ed9 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+@@ -90,6 +90,9 @@
+ #define BRCMF_ASSOC_PARAMS_FIXED_SIZE \
+ (sizeof(struct brcmf_assoc_params_le) - sizeof(u16))
+
++#define BRCMF_MAX_CHANSPEC_LIST \
++ (BRCMF_DCMD_MEDLEN / sizeof(__le32) - 1)
++
+ static bool check_vif_up(struct brcmf_cfg80211_vif *vif)
+ {
+ if (!test_bit(BRCMF_VIF_STATUS_READY, &vif->sme_state)) {
+@@ -1347,13 +1350,14 @@ static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len)
+ {
+ struct brcmf_pub *drvr = ifp->drvr;
+ struct brcmf_wsec_pmk_le pmk;
+- int i, err;
++ int err;
+
+- /* convert to firmware key format */
+- pmk.key_len = cpu_to_le16(pmk_len << 1);
+- pmk.flags = cpu_to_le16(BRCMF_WSEC_PASSPHRASE);
+- for (i = 0; i < pmk_len; i++)
+- snprintf(&pmk.key[2 * i], 3, "%02x", pmk_data[i]);
++ memset(&pmk, 0, sizeof(pmk));
++
++ /* pass pmk directly */
++ pmk.key_len = cpu_to_le16(pmk_len);
++ pmk.flags = cpu_to_le16(0);
++ memcpy(pmk.key, pmk_data, pmk_len);
+
+ /* store psk in firmware */
+ err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SET_WSEC_PMK,
+@@ -5887,6 +5891,11 @@ static s32 brcmf_get_assoc_ies(struct brcmf_cfg80211_info *cfg,
+ (struct brcmf_cfg80211_assoc_ielen_le *)cfg->extra_buf;
+ req_len = le32_to_cpu(assoc_info->req_len);
+ resp_len = le32_to_cpu(assoc_info->resp_len);
++ if (req_len > WL_EXTRA_BUF_MAX || resp_len > WL_EXTRA_BUF_MAX) {
++ bphy_err(drvr, "invalid lengths in assoc info: req %u resp %u\n",
++ req_len, resp_len);
++ return -EINVAL;
++ }
+ if (req_len) {
+ err = brcmf_fil_iovar_data_get(ifp, "assoc_req_ies",
+ cfg->extra_buf,
+@@ -6204,18 +6213,20 @@ static s32 brcmf_notify_rssi(struct brcmf_if *ifp,
+ {
+ struct brcmf_cfg80211_vif *vif = ifp->vif;
+ struct brcmf_rssi_be *info = data;
+- s32 rssi, snr, noise;
++ s32 rssi, snr = 0, noise = 0;
+ s32 low, high, last;
+
+- if (e->datalen < sizeof(*info)) {
++ if (e->datalen >= sizeof(*info)) {
++ rssi = be32_to_cpu(info->rssi);
++ snr = be32_to_cpu(info->snr);
++ noise = be32_to_cpu(info->noise);
++ } else if (e->datalen >= sizeof(rssi)) {
++ rssi = be32_to_cpu(*(__be32 *)data);
++ } else {
+ brcmf_err("insufficient RSSI event data\n");
+ return 0;
+ }
+
+- rssi = be32_to_cpu(info->rssi);
+- snr = be32_to_cpu(info->snr);
+- noise = be32_to_cpu(info->noise);
+-
+ low = vif->cqm_rssi_low;
+ high = vif->cqm_rssi_high;
+ last = vif->cqm_rssi_last;
+@@ -6557,6 +6568,13 @@ static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg,
+ band->channels[i].flags = IEEE80211_CHAN_DISABLED;
+
+ total = le32_to_cpu(list->count);
++ if (total > BRCMF_MAX_CHANSPEC_LIST) {
++ bphy_err(drvr, "Invalid count of channel Spec. (%u)\n",
++ total);
++ err = -EINVAL;
++ goto fail_pbuf;
++ }
++
+ for (i = 0; i < total; i++) {
+ ch.chspec = (u16)le32_to_cpu(list->element[i]);
+ cfg->d11inf.decchspec(&ch);
+@@ -6702,6 +6720,13 @@ static int brcmf_enable_bw40_2g(struct brcmf_cfg80211_info *cfg)
+ band = cfg_to_wiphy(cfg)->bands[NL80211_BAND_2GHZ];
+ list = (struct brcmf_chanspec_list *)pbuf;
+ num_chan = le32_to_cpu(list->count);
++ if (num_chan > BRCMF_MAX_CHANSPEC_LIST) {
++ bphy_err(drvr, "Invalid count of channel Spec. (%u)\n",
++ num_chan);
++ kfree(pbuf);
++ return -EINVAL;
++ }
++
+ for (i = 0; i < num_chan; i++) {
+ ch.chspec = (u16)le32_to_cpu(list->element[i]);
+ cfg->d11inf.decchspec(&ch);
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+index e3758bd86acf0..f29de630908d7 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+@@ -264,6 +264,7 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
+ err);
+ goto done;
+ }
++ buf[sizeof(buf) - 1] = '\0';
+ ptr = (char *)buf;
+ strsep(&ptr, "\n");
+
+@@ -280,15 +281,17 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
+ if (err) {
+ brcmf_dbg(TRACE, "retrieving clmver failed, %d\n", err);
+ } else {
++ buf[sizeof(buf) - 1] = '\0';
+ clmver = (char *)buf;
+- /* store CLM version for adding it to revinfo debugfs file */
+- memcpy(ifp->drvr->clmver, clmver, sizeof(ifp->drvr->clmver));
+
+ /* Replace all newline/linefeed characters with space
+ * character
+ */
+ strreplace(clmver, '\n', ' ');
+
++ /* store CLM version for adding it to revinfo debugfs file */
++ memcpy(ifp->drvr->clmver, clmver, sizeof(ifp->drvr->clmver));
++
+ brcmf_dbg(INFO, "CLM version = %s\n", clmver);
+ }
+
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+index db5f8535fdb57..f03fc6f1f8333 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+@@ -295,6 +295,7 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb,
+ struct brcmf_pub *drvr = ifp->drvr;
+ struct ethhdr *eh;
+ int head_delta;
++ unsigned int tx_bytes = skb->len;
+
+ brcmf_dbg(DATA, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx);
+
+@@ -337,6 +338,7 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb,
+ bphy_err(drvr, "%s: failed to expand headroom\n",
+ brcmf_ifname(ifp));
+ atomic_inc(&drvr->bus_if->stats.pktcow_failed);
++ dev_kfree_skb(skb);
+ goto done;
+ }
+ }
+@@ -369,7 +371,7 @@ done:
+ ndev->stats.tx_dropped++;
+ } else {
+ ndev->stats.tx_packets++;
+- ndev->stats.tx_bytes += skb->len;
++ ndev->stats.tx_bytes += tx_bytes;
+ }
+
+ /* Return ok: we always eat the packet */
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c
+index 6d5188b78f2de..0af452dca7664 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c
+@@ -75,6 +75,16 @@ static const struct dmi_system_id dmi_platform_data[] = {
+ },
+ .driver_data = (void *)&acepc_t8_data,
+ },
++ {
++ /* Cyberbook T116 rugged tablet */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"),
++ DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "20170531"),
++ },
++ /* The factory image nvram file is identical to the ACEPC T8 one */
++ .driver_data = (void *)&acepc_t8_data,
++ },
+ {
+ /* Match for the GPDwin which unfortunately uses somewhat
+ * generic dmi strings, which is why we test for 4 strings.
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+index 0eb13e5df5177..c54d8722e755c 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+@@ -207,6 +207,8 @@ static int brcmf_init_nvram_parser(struct nvram_parser *nvp,
+ size = BRCMF_FW_MAX_NVRAM_SIZE;
+ else
+ size = data_len;
++ /* Add space for properties we may add */
++ size += strlen(BRCMF_FW_DEFAULT_BOARDREV) + 1;
+ /* Alloc for extra 0 byte + roundup by 4 + length field */
+ size += 1 + 3 + sizeof(u32);
+ nvp->nvram = kzalloc(size, GFP_KERNEL);
+@@ -693,7 +695,7 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
+ {
+ struct brcmf_fw_item *first = &req->items[0];
+ struct brcmf_fw *fwctx;
+- char *alt_path;
++ char *alt_path = NULL;
+ int ret;
+
+ brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(dev));
+@@ -712,7 +714,9 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
+ fwctx->done = fw_cb;
+
+ /* First try alternative board-specific path if any */
+- alt_path = brcm_alt_fw_path(first->path, fwctx->req->board_type);
++ if (fwctx->req->board_type)
++ alt_path = brcm_alt_fw_path(first->path,
++ fwctx->req->board_type);
+ if (alt_path) {
+ ret = request_firmware_nowait(THIS_MODULE, true, alt_path,
+ fwctx->dev, GFP_KERNEL, fwctx,
+@@ -742,6 +746,11 @@ brcmf_fw_alloc_request(u32 chip, u32 chiprev,
+ u32 i, j;
+ char end = '\0';
+
++ if (chiprev >= BITS_PER_TYPE(u32)) {
++ brcmf_err("Invalid chip revision %u\n", chiprev);
++ return NULL;
++ }
++
+ for (i = 0; i < table_size; i++) {
+ if (mapping_table[i].chipid == chip &&
+ mapping_table[i].revmask & BIT(chiprev))
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
+index bc3f4e4edcdf9..dac7eb77799bd 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
+@@ -228,6 +228,10 @@ static void brcmf_fweh_event_worker(struct work_struct *work)
+ brcmf_fweh_event_name(event->code), event->code,
+ event->emsg.ifidx, event->emsg.bsscfgidx,
+ event->emsg.addr);
++ if (event->emsg.bsscfgidx >= BRCMF_MAX_IFS) {
++ bphy_err(drvr, "invalid bsscfg index: %u\n", event->emsg.bsscfgidx);
++ goto event_free;
++ }
+
+ /* convert event message */
+ emsg_be = &event->emsg;
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+index ff2ef557f0ead..2a1590cc73ab2 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+@@ -383,7 +383,12 @@ struct brcmf_scan_params_le {
+ * fixed parameter portion is assumed, otherwise
+ * ssid in the fixed portion is ignored
+ */
+- __le16 channel_list[1]; /* list of chanspecs */
++ union {
++ __le16 padding; /* Reserve space for at least 1 entry for abort
++ * which uses an on stack brcmf_scan_params_le
++ */
++ DECLARE_FLEX_ARRAY(__le16, channel_list); /* chanspecs */
++ };
+ };
+
+ struct brcmf_scan_results {
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
+index 7c8e08ee8f0ff..bd3b234b78038 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
+@@ -346,8 +346,11 @@ brcmf_msgbuf_alloc_pktid(struct device *dev,
+ count++;
+ } while (count < pktids->array_size);
+
+- if (count == pktids->array_size)
++ if (count == pktids->array_size) {
++ dma_unmap_single(dev, *physaddr, skb->len - data_offset,
++ pktids->direction);
+ return -ENOMEM;
++ }
+
+ array[*idx].data_offset = data_offset;
+ array[*idx].physaddr = *physaddr;
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+index 8b149996fc000..6d8a042170182 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+@@ -12,6 +12,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/bcma/bcma.h>
+ #include <linux/sched.h>
++#include <linux/io.h>
+ #include <asm/unaligned.h>
+
+ #include <soc.h>
+@@ -59,6 +60,13 @@ BRCMF_FW_DEF(4366B, "brcmfmac4366b-pcie");
+ BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie");
+ BRCMF_FW_DEF(4371, "brcmfmac4371-pcie");
+
++/* firmware config files */
++MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.txt");
++MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt");
++
++/* per-board firmware binaries */
++MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.bin");
++
+ static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
+ BRCMF_FW_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602),
+ BRCMF_FW_ENTRY(BRCM_CC_43465_CHIP_ID, 0xFFFFFFF0, 4366C),
+@@ -447,47 +455,6 @@ brcmf_pcie_write_ram32(struct brcmf_pciedev_info *devinfo, u32 mem_offset,
+ }
+
+
+-static void
+-brcmf_pcie_copy_mem_todev(struct brcmf_pciedev_info *devinfo, u32 mem_offset,
+- void *srcaddr, u32 len)
+-{
+- void __iomem *address = devinfo->tcm + mem_offset;
+- __le32 *src32;
+- __le16 *src16;
+- u8 *src8;
+-
+- if (((ulong)address & 4) || ((ulong)srcaddr & 4) || (len & 4)) {
+- if (((ulong)address & 2) || ((ulong)srcaddr & 2) || (len & 2)) {
+- src8 = (u8 *)srcaddr;
+- while (len) {
+- iowrite8(*src8, address);
+- address++;
+- src8++;
+- len--;
+- }
+- } else {
+- len = len / 2;
+- src16 = (__le16 *)srcaddr;
+- while (len) {
+- iowrite16(le16_to_cpu(*src16), address);
+- address += 2;
+- src16++;
+- len--;
+- }
+- }
+- } else {
+- len = len / 4;
+- src32 = (__le32 *)srcaddr;
+- while (len) {
+- iowrite32(le32_to_cpu(*src32), address);
+- address += 4;
+- src32++;
+- len--;
+- }
+- }
+-}
+-
+-
+ static void
+ brcmf_pcie_copy_dev_tomem(struct brcmf_pciedev_info *devinfo, u32 mem_offset,
+ void *dstaddr, u32 len)
+@@ -659,7 +626,7 @@ static int brcmf_pcie_exit_download_state(struct brcmf_pciedev_info *devinfo,
+ }
+
+ if (!brcmf_chip_set_active(devinfo->ci, resetintr))
+- return -EINVAL;
++ return -EIO;
+ return 0;
+ }
+
+@@ -1151,6 +1118,10 @@ static int brcmf_pcie_init_ringbuffers(struct brcmf_pciedev_info *devinfo)
+ BRCMF_NROF_H2D_COMMON_MSGRINGS;
+ max_completionrings = BRCMF_NROF_D2H_COMMON_MSGRINGS;
+ }
++ if (max_flowrings > 512) {
++ brcmf_err(bus, "invalid max_flowrings(%d)\n", max_flowrings);
++ return -EIO;
++ }
+
+ if (devinfo->dma_idx_sz != 0) {
+ bufsz = (max_submissionrings + max_completionrings) *
+@@ -1348,6 +1319,18 @@ static void brcmf_pcie_down(struct device *dev)
+ {
+ }
+
++static int brcmf_pcie_preinit(struct device *dev)
++{
++ struct brcmf_bus *bus_if = dev_get_drvdata(dev);
++ struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie;
++
++ brcmf_dbg(PCIE, "Enter\n");
++
++ brcmf_pcie_intr_enable(buspub->devinfo);
++ brcmf_pcie_hostready(buspub->devinfo);
++
++ return 0;
++}
+
+ static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb)
+ {
+@@ -1456,6 +1439,7 @@ static int brcmf_pcie_reset(struct device *dev)
+ }
+
+ static const struct brcmf_bus_ops brcmf_pcie_bus_ops = {
++ .preinit = brcmf_pcie_preinit,
+ .txdata = brcmf_pcie_tx,
+ .stop = brcmf_pcie_down,
+ .txctl = brcmf_pcie_tx_ctlpkt,
+@@ -1563,8 +1547,8 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo,
+ return err;
+
+ brcmf_dbg(PCIE, "Download FW %s\n", devinfo->fw_name);
+- brcmf_pcie_copy_mem_todev(devinfo, devinfo->ci->rambase,
+- (void *)fw->data, fw->size);
++ memcpy_toio(devinfo->tcm + devinfo->ci->rambase,
++ (void *)fw->data, fw->size);
+
+ resetintr = get_unaligned_le32(fw->data);
+ release_firmware(fw);
+@@ -1578,7 +1562,7 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo,
+ brcmf_dbg(PCIE, "Download NVRAM %s\n", devinfo->nvram_name);
+ address = devinfo->ci->rambase + devinfo->ci->ramsize -
+ nvram_len;
+- brcmf_pcie_copy_mem_todev(devinfo, address, nvram, nvram_len);
++ memcpy_toio(devinfo->tcm + address, nvram, nvram_len);
+ brcmf_fw_nvram_free(nvram);
+ } else {
+ brcmf_dbg(PCIE, "No matching NVRAM file found %s\n",
+@@ -1777,6 +1761,8 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
+ ret = brcmf_chip_get_raminfo(devinfo->ci);
+ if (ret) {
+ brcmf_err(bus, "Failed to get RAM info\n");
++ release_firmware(fw);
++ brcmf_fw_nvram_free(nvram);
+ goto fail;
+ }
+
+@@ -1826,9 +1812,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
+
+ init_waitqueue_head(&devinfo->mbdata_resp_wait);
+
+- brcmf_pcie_intr_enable(devinfo);
+- brcmf_pcie_hostready(devinfo);
+-
+ ret = brcmf_attach(&devinfo->pdev->dev);
+ if (ret)
+ goto fail;
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c
+index fabfbb0b40b0c..d0a7465be586d 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c
+@@ -158,12 +158,12 @@ static int brcmf_pno_set_random(struct brcmf_if *ifp, struct brcmf_pno_info *pi)
+ struct brcmf_pno_macaddr_le pfn_mac;
+ u8 *mac_addr = NULL;
+ u8 *mac_mask = NULL;
+- int err, i;
++ int err, i, ri;
+
+- for (i = 0; i < pi->n_reqs; i++)
+- if (pi->reqs[i]->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+- mac_addr = pi->reqs[i]->mac_addr;
+- mac_mask = pi->reqs[i]->mac_addr_mask;
++ for (ri = 0; ri < pi->n_reqs; ri++)
++ if (pi->reqs[ri]->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
++ mac_addr = pi->reqs[ri]->mac_addr;
++ mac_mask = pi->reqs[ri]->mac_addr_mask;
+ break;
+ }
+
+@@ -185,7 +185,7 @@ static int brcmf_pno_set_random(struct brcmf_if *ifp, struct brcmf_pno_info *pi)
+ pfn_mac.mac[0] |= 0x02;
+
+ brcmf_dbg(SCAN, "enabling random mac: reqid=%llu mac=%pM\n",
+- pi->reqs[i]->reqid, pfn_mac.mac);
++ pi->reqs[ri]->reqid, pfn_mac.mac);
+ err = brcmf_fil_iovar_data_set(ifp, "pfn_macaddr", &pfn_mac,
+ sizeof(pfn_mac));
+ if (err)
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+index 8effeb7a7269b..5006aa8317513 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+@@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype {
+ BRCMF_SDIO_FT_SUB,
+ };
+
+-#define SDIOD_DRVSTR_KEY(chip, pmu) (((chip) << 16) | (pmu))
++#define SDIOD_DRVSTR_KEY(chip, pmu) (((unsigned int)(chip) << 16) | (pmu))
+
+ /* SDIO Pad drive strength to select value mappings */
+ struct sdiod_drive_str {
+@@ -629,7 +629,6 @@ BRCMF_FW_CLM_DEF(43752, "brcmfmac43752-sdio");
+
+ /* firmware config files */
+ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.txt");
+-MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt");
+
+ /* per-board firmware binaries */
+ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.bin");
+@@ -3412,6 +3411,7 @@ static int brcmf_sdio_download_firmware(struct brcmf_sdio *bus,
+ /* Take arm out of reset */
+ if (!brcmf_chip_set_active(bus->ci, rstvec)) {
+ brcmf_err("error getting out of ARM core reset\n");
++ bcmerror = -EIO;
+ goto err;
+ }
+
+diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
+index 65dd8cff1b011..bcbf197fa9372 100644
+--- a/drivers/net/wireless/cisco/airo.c
++++ b/drivers/net/wireless/cisco/airo.c
+@@ -5233,7 +5233,7 @@ static int get_wep_tx_idx(struct airo_info *ai)
+ return -1;
+ }
+
+-static int set_wep_key(struct airo_info *ai, u16 index, const char *key,
++static int set_wep_key(struct airo_info *ai, u16 index, const u8 *key,
+ u16 keylen, int perm, int lock)
+ {
+ static const unsigned char macaddr[ETH_ALEN] = { 0x01, 0, 0, 0, 0, 0 };
+@@ -5284,7 +5284,7 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file)
+ struct net_device *dev = PDE_DATA(inode);
+ struct airo_info *ai = dev->ml_priv;
+ int i, rc;
+- char key[16];
++ u8 key[16];
+ u16 index = 0;
+ int j = 0;
+
+@@ -5312,12 +5312,22 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file)
+ }
+
+ for (i = 0; i < 16*3 && data->wbuffer[i+j]; i++) {
++ int val;
++
++ if (i % 3 == 2)
++ continue;
++
++ val = hex_to_bin(data->wbuffer[i+j]);
++ if (val < 0) {
++ airo_print_err(ai->dev->name, "WebKey passed invalid key hex");
++ return;
++ }
+ switch(i%3) {
+ case 0:
+- key[i/3] = hex_to_bin(data->wbuffer[i+j])<<4;
++ key[i/3] = (u8)val << 4;
+ break;
+ case 1:
+- key[i/3] |= hex_to_bin(data->wbuffer[i+j]);
++ key[i/3] |= (u8)val;
+ break;
+ }
+ }
+@@ -6137,8 +6147,11 @@ static int airo_get_rate(struct net_device *dev,
+ {
+ struct airo_info *local = dev->ml_priv;
+ StatusRid status_rid; /* Card status info */
++ int ret;
+
+- readStatusRid(local, &status_rid, 1);
++ ret = readStatusRid(local, &status_rid, 1);
++ if (ret)
++ return -EBUSY;
+
+ vwrq->value = le16_to_cpu(status_rid.currentXmitRate) * 500000;
+ /* If more than one rate, set auto */
+diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+index ada6ce32c1f19..bb728fb24b8a4 100644
+--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
++++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+@@ -3444,7 +3444,7 @@ static void ipw_rx_queue_reset(struct ipw_priv *priv,
+ dma_unmap_single(&priv->pci_dev->dev,
+ rxq->pool[i].dma_addr,
+ IPW_RX_BUF_SIZE, DMA_FROM_DEVICE);
+- dev_kfree_skb(rxq->pool[i].skb);
++ dev_kfree_skb_irq(rxq->pool[i].skb);
+ rxq->pool[i].skb = NULL;
+ }
+ list_add_tail(&rxq->pool[i].list, &rxq->rx_used);
+@@ -11400,9 +11400,14 @@ static int ipw_wdev_init(struct net_device *dev)
+ set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);
+
+ /* With that information in place, we can now register the wiphy... */
+- if (wiphy_register(wdev->wiphy))
+- rc = -EIO;
++ rc = wiphy_register(wdev->wiphy);
++ if (rc)
++ goto out;
++
++ return 0;
+ out:
++ kfree(priv->ieee->a_band.channels);
++ kfree(priv->ieee->bg_band.channels);
+ return rc;
+ }
+
+diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
+index 36d1e6b2568db..4aec1fce1ae29 100644
+--- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
++++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c
+@@ -383,7 +383,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev)
+
+ /* Each fragment may need to have room for encryption
+ * pre/postfix */
+- if (host_encrypt)
++ if (host_encrypt && crypt && crypt->ops)
+ bytes_per_frag -= crypt->ops->extra_mpdu_prefix_len +
+ crypt->ops->extra_mpdu_postfix_len;
+
+diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+index 45abb25b65a9f..04c149ff745e9 100644
+--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
++++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+@@ -3378,10 +3378,12 @@ static DEVICE_ATTR(dump_errors, 0200, NULL, il3945_dump_error_log);
+ *
+ *****************************************************************************/
+
+-static void
++static int
+ il3945_setup_deferred_work(struct il_priv *il)
+ {
+ il->workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!il->workqueue)
++ return -ENOMEM;
+
+ init_waitqueue_head(&il->wait_command_queue);
+
+@@ -3398,6 +3400,8 @@ il3945_setup_deferred_work(struct il_priv *il)
+ timer_setup(&il->watchdog, il_bg_watchdog, 0);
+
+ tasklet_setup(&il->irq_tasklet, il3945_irq_tasklet);
++
++ return 0;
+ }
+
+ static void
+@@ -3717,7 +3721,10 @@ il3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ }
+
+ il_set_rxon_channel(il, &il->bands[NL80211_BAND_2GHZ].channels[5]);
+- il3945_setup_deferred_work(il);
++ err = il3945_setup_deferred_work(il);
++ if (err)
++ goto out_remove_sysfs;
++
+ il3945_setup_handlers(il);
+ il_power_initialize(il);
+
+@@ -3729,7 +3736,7 @@ il3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ err = il3945_setup_mac(il);
+ if (err)
+- goto out_remove_sysfs;
++ goto out_destroy_workqueue;
+
+ il_dbgfs_register(il, DRV_NAME);
+
+@@ -3738,9 +3745,10 @@ il3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ return 0;
+
+-out_remove_sysfs:
++out_destroy_workqueue:
+ destroy_workqueue(il->workqueue);
+ il->workqueue = NULL;
++out_remove_sysfs:
+ sysfs_remove_group(&pdev->dev.kobj, &il3945_attribute_group);
+ out_release_irq:
+ free_irq(il->pci_dev->irq, il);
+diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+index 0223532fd56a0..ff04282e3db03 100644
+--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
++++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+@@ -6211,10 +6211,12 @@ out:
+ mutex_unlock(&il->mutex);
+ }
+
+-static void
++static int
+ il4965_setup_deferred_work(struct il_priv *il)
+ {
+ il->workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!il->workqueue)
++ return -ENOMEM;
+
+ init_waitqueue_head(&il->wait_command_queue);
+
+@@ -6233,6 +6235,8 @@ il4965_setup_deferred_work(struct il_priv *il)
+ timer_setup(&il->watchdog, il_bg_watchdog, 0);
+
+ tasklet_setup(&il->irq_tasklet, il4965_irq_tasklet);
++
++ return 0;
+ }
+
+ static void
+@@ -6617,7 +6621,10 @@ il4965_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ goto out_disable_msi;
+ }
+
+- il4965_setup_deferred_work(il);
++ err = il4965_setup_deferred_work(il);
++ if (err)
++ goto out_free_irq;
++
+ il4965_setup_handlers(il);
+
+ /*********************************************
+@@ -6655,6 +6662,7 @@ il4965_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ out_destroy_workqueue:
+ destroy_workqueue(il->workqueue);
+ il->workqueue = NULL;
++out_free_irq:
+ free_irq(il->pci_dev->irq, il);
+ out_disable_msi:
+ pci_disable_msi(il->pci_dev);
+diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
+index 9a491e5db75bd..150805aec4071 100644
+--- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c
++++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
+@@ -2403,7 +2403,7 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta,
+ /* Repeat initial/next rate.
+ * For legacy IL_NUMBER_TRY == 1, this loop will not execute.
+ * For HT IL_HT_NUMBER_TRY == 3, this executes twice. */
+- while (repeat_rate > 0 && idx < LINK_QUAL_MAX_RETRY_NUM) {
++ while (repeat_rate > 0 && idx < (LINK_QUAL_MAX_RETRY_NUM - 1)) {
+ if (is_legacy(tbl_type.lq_type)) {
+ if (ant_toggle_cnt < NUM_TRY_BEFORE_ANT_TOGGLE)
+ ant_toggle_cnt++;
+diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
+index 683b632981ed3..83c1ff0d660f7 100644
+--- a/drivers/net/wireless/intel/iwlegacy/common.c
++++ b/drivers/net/wireless/intel/iwlegacy/common.c
+@@ -5173,7 +5173,7 @@ il_mac_reset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+ memset(&il->current_ht_config, 0, sizeof(struct il_ht_config));
+
+ /* new association get rid of ibss beacon skb */
+- dev_kfree_skb(il->beacon_skb);
++ dev_consume_skb_irq(il->beacon_skb);
+ il->beacon_skb = NULL;
+ il->timestamp = 0;
+
+@@ -5292,7 +5292,7 @@ il_beacon_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+ }
+
+ spin_lock_irqsave(&il->lock, flags);
+- dev_kfree_skb(il->beacon_skb);
++ dev_consume_skb_irq(il->beacon_skb);
+ il->beacon_skb = skb;
+
+ timestamp = ((struct ieee80211_mgmt *)skb->data)->u.beacon.timestamp;
+diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c
+index 75e7665773c52..90fe4adca4926 100644
+--- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c
++++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c
+@@ -304,7 +304,7 @@ static int iwlagn_mac_start(struct ieee80211_hw *hw)
+
+ priv->is_open = 1;
+ IWL_DEBUG_MAC80211(priv, "leave\n");
+- return 0;
++ return ret;
+ }
+
+ static void iwlagn_mac_stop(struct ieee80211_hw *hw)
+diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/sta.c b/drivers/net/wireless/intel/iwlwifi/dvm/sta.c
+index ddc14059b07d1..7c3168145e58a 100644
+--- a/drivers/net/wireless/intel/iwlwifi/dvm/sta.c
++++ b/drivers/net/wireless/intel/iwlwifi/dvm/sta.c
+@@ -1086,6 +1086,7 @@ static int iwlagn_send_sta_key(struct iwl_priv *priv,
+ {
+ __le16 key_flags;
+ struct iwl_addsta_cmd sta_cmd;
++ size_t to_copy;
+ int i;
+
+ spin_lock_bh(&priv->sta_lock);
+@@ -1105,7 +1106,9 @@ static int iwlagn_send_sta_key(struct iwl_priv *priv,
+ sta_cmd.key.tkip_rx_tsc_byte2 = tkip_iv32;
+ for (i = 0; i < 5; i++)
+ sta_cmd.key.tkip_rx_ttak[i] = cpu_to_le16(tkip_p1k[i]);
+- memcpy(sta_cmd.key.key, keyconf->key, keyconf->keylen);
++ /* keyconf may contain MIC rx/tx keys which iwl does not use */
++ to_copy = min_t(size_t, sizeof(sta_cmd.key.key), keyconf->keylen);
++ memcpy(sta_cmd.key.key, keyconf->key, to_copy);
+ break;
+ case WLAN_CIPHER_SUITE_WEP104:
+ key_flags |= STA_KEY_FLG_KEY_SIZE_MSK;
+diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+index 1efac0b2a94d7..9e00d1d7e1468 100644
+--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
++++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+ /*
+ * Copyright (C) 2017 Intel Deutschland GmbH
+- * Copyright (C) 2019-2021 Intel Corporation
++ * Copyright (C) 2019-2022 Intel Corporation
+ */
+ #include <linux/uuid.h>
+ #include "iwl-drv.h"
+@@ -814,10 +814,11 @@ bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt)
+ * only one using version 36, so skip this version entirely.
+ */
+ return IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) >= 38 ||
+- IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 17 ||
+- (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 29 &&
+- ((fwrt->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
+- CSR_HW_REV_TYPE_7265D));
++ (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 17 &&
++ fwrt->trans->hw_rev != CSR_HW_REV_TYPE_3160) ||
++ (IWL_UCODE_SERIAL(fwrt->fw->ucode_ver) == 29 &&
++ ((fwrt->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
++ CSR_HW_REV_TYPE_7265D));
+ }
+ IWL_EXPORT_SYMBOL(iwl_sar_geo_support);
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+index 6dcafd0a3d4b1..c69f3fb833327 100644
+--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
++++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+@@ -1022,7 +1022,7 @@ iwl_dump_ini_prph_mac_iter(struct iwl_fw_runtime *fwrt,
+ range->range_data_size = reg->dev_addr.size;
+ for (i = 0; i < le32_to_cpu(reg->dev_addr.size); i += 4) {
+ prph_val = iwl_read_prph(fwrt->trans, addr + i);
+- if (prph_val == 0x5a5a5a5a)
++ if ((prph_val & ~0xf) == 0xa5a5a5a0)
+ return -EBUSY;
+ *val++ = cpu_to_le32(prph_val);
+ }
+@@ -1362,13 +1362,13 @@ static void iwl_ini_get_rxf_data(struct iwl_fw_runtime *fwrt,
+ if (!data)
+ return;
+
++ memset(data, 0, sizeof(*data));
++
+ /* make sure only one bit is set in only one fid */
+ if (WARN_ONCE(hweight_long(fid1) + hweight_long(fid2) != 1,
+ "fid1=%x, fid2=%x\n", fid1, fid2))
+ return;
+
+- memset(data, 0, sizeof(*data));
+-
+ if (fid1) {
+ fifo_idx = ffs(fid1) - 1;
+ if (WARN_ONCE(fifo_idx >= MAX_NUM_LMAC, "fifo_idx=%d\n",
+@@ -1532,13 +1532,11 @@ iwl_dump_ini_dbgi_sram_iter(struct iwl_fw_runtime *fwrt,
+ return -EBUSY;
+
+ range->range_data_size = reg->dev_addr.size;
+- iwl_write_prph_no_grab(fwrt->trans, DBGI_SRAM_TARGET_ACCESS_CFG,
+- DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK);
+ for (i = 0; i < (le32_to_cpu(reg->dev_addr.size) / 4); i++) {
+ prph_data = iwl_read_prph(fwrt->trans, (i % 2) ?
+ DBGI_SRAM_TARGET_ACCESS_RDATA_MSB :
+ DBGI_SRAM_TARGET_ACCESS_RDATA_LSB);
+- if (prph_data == 0x5a5a5a5a) {
++ if ((prph_data & ~0xf) == 0xa5a5a5a0) {
+ iwl_trans_release_nic_access(fwrt->trans);
+ return -EBUSY;
+ }
+diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
+index a152ce3064759..e372f935f6983 100644
+--- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
++++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
+@@ -317,8 +317,10 @@ static void *iwl_dbgfs_fw_info_seq_next(struct seq_file *seq,
+ const struct iwl_fw *fw = priv->fwrt->fw;
+
+ *pos = ++state->pos;
+- if (*pos >= fw->ucode_capa.n_cmd_versions)
++ if (*pos >= fw->ucode_capa.n_cmd_versions) {
++ kfree(state);
+ return NULL;
++ }
+
+ return state;
+ }
+diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+index dde22bdc87039..069fcbc46d2ba 100644
+--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
++++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+@@ -284,16 +284,19 @@ int iwl_pnvm_load(struct iwl_trans *trans,
+ /* First attempt to get the PNVM from BIOS */
+ package = iwl_uefi_get_pnvm(trans, &len);
+ if (!IS_ERR_OR_NULL(package)) {
+- data = kmemdup(package->data, len, GFP_KERNEL);
++ if (len >= sizeof(*package)) {
++ /* we need only the data */
++ len -= sizeof(*package);
++ data = kmemdup(package->data, len, GFP_KERNEL);
++ } else {
++ data = NULL;
++ }
+
+ /* free package regardless of whether kmemdup succeeded */
+ kfree(package);
+
+- if (data) {
+- /* we need only the data size */
+- len -= sizeof(*package);
++ if (data)
+ goto parse;
+- }
+ }
+
+ /* If it's not available, try from the filesystem */
+diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
+index c875bf35533ce..009dd4be597b0 100644
+--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
++++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
+@@ -86,6 +86,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
+ if (len < tlv_len) {
+ IWL_ERR(trans, "invalid TLV len: %zd/%u\n",
+ len, tlv_len);
++ kfree(reduce_power_data);
+ reduce_power_data = ERR_PTR(-EINVAL);
+ goto out;
+ }
+@@ -105,6 +106,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
+ IWL_DEBUG_FW(trans,
+ "Couldn't allocate (more) reduce_power_data\n");
+
++ kfree(reduce_power_data);
+ reduce_power_data = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+@@ -134,6 +136,10 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
+ done:
+ if (!size) {
+ IWL_DEBUG_FW(trans, "Empty REDUCE_POWER, skipping.\n");
++ /* Better safe than sorry, but 'reduce_power_data' should
++ * always be NULL if !size.
++ */
++ kfree(reduce_power_data);
+ reduce_power_data = ERR_PTR(-ENOENT);
+ goto out;
+ }
+diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
+index 45d0b36d79b5a..d552c656ac9fe 100644
+--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
++++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
+@@ -2,7 +2,8 @@
+ /*
+ * Copyright(c) 2021 Intel Corporation
+ */
+-
++#ifndef __iwl_fw_uefi__
++#define __iwl_fw_uefi__
+
+ #define IWL_UEFI_OEM_PNVM_NAME L"UefiCnvWlanOemSignedPnvm"
+ #define IWL_UEFI_REDUCED_POWER_NAME L"UefiCnvWlanReducedPower"
+@@ -40,3 +41,5 @@ void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ #endif /* CONFIG_EFI */
++
++#endif /* __iwl_fw_uefi__ */
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+index cf796403c45c0..c8dff76ac03c1 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+@@ -1,6 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+ /*
+- * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
++ * Copyright (C) 2005-2014, 2018-2022 Intel Corporation
+ * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2016 Intel Deutschland GmbH
+ */
+@@ -104,9 +104,10 @@
+ /* GIO Chicken Bits (PCI Express bus link power management) */
+ #define CSR_GIO_CHICKEN_BITS (CSR_BASE+0x100)
+
+-/* Doorbell NMI (since Bz) */
++/* Doorbell - since Bz
++ * connected to UREG_DOORBELL_TO_ISR6 (lower 16 bits only)
++ */
+ #define CSR_DOORBELL_VECTOR (CSR_BASE + 0x130)
+-#define CSR_DOORBELL_VECTOR_NMI BIT(1)
+
+ /* host chicken bits */
+ #define CSR_HOST_CHICKEN (CSR_BASE + 0x204)
+@@ -318,6 +319,7 @@ enum {
+ #define CSR_HW_REV_TYPE_2x00 (0x0000100)
+ #define CSR_HW_REV_TYPE_105 (0x0000110)
+ #define CSR_HW_REV_TYPE_135 (0x0000120)
++#define CSR_HW_REV_TYPE_3160 (0x0000164)
+ #define CSR_HW_REV_TYPE_7265D (0x0000210)
+ #define CSR_HW_REV_TYPE_NONE (0x00001F0)
+ #define CSR_HW_REV_TYPE_QNJ (0x0000360)
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+index 125479b5c0d61..f9bd081dd9e08 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+@@ -134,6 +134,12 @@ static int iwl_dbg_tlv_alloc_buf_alloc(struct iwl_trans *trans,
+ alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1)
+ goto err;
+
++ if (buf_location == IWL_FW_INI_LOCATION_DRAM_PATH &&
++ alloc->req_size == 0) {
++ IWL_ERR(trans, "WRT: Invalid DRAM buffer allocation requested size (0)\n");
++ return -EINVAL;
++ }
++
+ trans->dbg.fw_mon_cfg[alloc_id] = *alloc;
+
+ return 0;
+@@ -322,7 +328,7 @@ void iwl_dbg_tlv_del_timers(struct iwl_trans *trans)
+ struct iwl_dbg_tlv_timer_node *node, *tmp;
+
+ list_for_each_entry_safe(node, tmp, timer_list, list) {
+- del_timer(&node->timer);
++ del_timer_sync(&node->timer);
+ list_del(&node->list);
+ kfree(node);
+ }
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+index 77124b8b235ee..524b0ad873578 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+@@ -131,6 +131,9 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv)
+
+ for (i = 0; i < IWL_UCODE_TYPE_MAX; i++)
+ iwl_free_fw_img(drv, drv->fw.img + i);
++
++ /* clear the data for the aborted load case */
++ memset(&drv->fw, 0, sizeof(drv->fw));
+ }
+
+ static int iwl_alloc_fw_desc(struct iwl_drv *drv, struct fw_desc *desc,
+@@ -1271,23 +1274,31 @@ _iwl_op_mode_start(struct iwl_drv *drv, struct iwlwifi_opmode_table *op)
+ const struct iwl_op_mode_ops *ops = op->ops;
+ struct dentry *dbgfs_dir = NULL;
+ struct iwl_op_mode *op_mode = NULL;
++ int retry, max_retry = !!iwlwifi_mod_params.fw_restart * IWL_MAX_INIT_RETRY;
++
++ for (retry = 0; retry <= max_retry; retry++) {
+
+ #ifdef CONFIG_IWLWIFI_DEBUGFS
+- drv->dbgfs_op_mode = debugfs_create_dir(op->name,
+- drv->dbgfs_drv);
+- dbgfs_dir = drv->dbgfs_op_mode;
++ drv->dbgfs_op_mode = debugfs_create_dir(op->name,
++ drv->dbgfs_drv);
++ dbgfs_dir = drv->dbgfs_op_mode;
+ #endif
+
+- op_mode = ops->start(drv->trans, drv->trans->cfg, &drv->fw, dbgfs_dir);
++ op_mode = ops->start(drv->trans, drv->trans->cfg,
++ &drv->fw, dbgfs_dir);
++
++ if (op_mode)
++ return op_mode;
++
++ IWL_ERR(drv, "retry init count %d\n", retry);
+
+ #ifdef CONFIG_IWLWIFI_DEBUGFS
+- if (!op_mode) {
+ debugfs_remove_recursive(drv->dbgfs_op_mode);
+ drv->dbgfs_op_mode = NULL;
+- }
+ #endif
++ }
+
+- return op_mode;
++ return NULL;
+ }
+
+ static void _iwl_op_mode_stop(struct iwl_drv *drv)
+@@ -1325,6 +1336,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
+ int i;
+ bool load_module = false;
+ bool usniffer_images = false;
++ bool failure = true;
+
+ fw->ucode_capa.max_probe_length = IWL_DEFAULT_MAX_PROBE_LENGTH;
+ fw->ucode_capa.standard_phy_calibration_size =
+@@ -1585,15 +1597,9 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
+ * else from proceeding if the module fails to load
+ * or hangs loading.
+ */
+- if (load_module) {
++ if (load_module)
+ request_module("%s", op->name);
+-#ifdef CONFIG_IWLWIFI_OPMODE_MODULAR
+- if (err)
+- IWL_ERR(drv,
+- "failed to load module %s (error %d), is dynamic loading enabled?\n",
+- op->name, err);
+-#endif
+- }
++ failure = false;
+ goto free;
+
+ try_again:
+@@ -1608,7 +1614,12 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
+ out_unbind:
+ complete(&drv->request_firmware_complete);
+ device_release_driver(drv->trans->dev);
++ /* drv has just been freed by the release */
++ failure = false;
+ free:
++ if (failure)
++ iwl_dealloc_ucode(drv);
++
+ if (pieces) {
+ for (i = 0; i < ARRAY_SIZE(pieces->img); i++)
+ kfree(pieces->img[i].sec);
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
+index b6442df0c6439..56f2fd3b94906 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
+@@ -90,4 +90,7 @@ void iwl_drv_stop(struct iwl_drv *drv);
+ #define IWL_EXPORT_SYMBOL(sym)
+ #endif
+
++/* max retry for init flow */
++#define IWL_MAX_INIT_RETRY 2
++
+ #endif /* __iwl_drv_h__ */
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
+index 2517c4ae07ab3..5e76ab6c8ad0a 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
+@@ -218,7 +218,7 @@ void iwl_force_nmi(struct iwl_trans *trans)
+ UREG_DOORBELL_TO_ISR6_NMI_BIT);
+ else
+ iwl_write32(trans, CSR_DOORBELL_VECTOR,
+- CSR_DOORBELL_VECTOR_NMI);
++ UREG_DOORBELL_TO_ISR6_NMI_BIT);
+ }
+ IWL_EXPORT_SYMBOL(iwl_force_nmi);
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+index 475f951d4b1ef..fc40cca096c26 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+@@ -541,8 +541,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
+ .has_he = true,
+ .he_cap_elem = {
+ .mac_cap_info[0] =
+- IEEE80211_HE_MAC_CAP0_HTC_HE |
+- IEEE80211_HE_MAC_CAP0_TWT_REQ,
++ IEEE80211_HE_MAC_CAP0_HTC_HE,
+ .mac_cap_info[1] =
+ IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
+ IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+index d0a7d58336a9e..6c4f1c949541a 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+@@ -350,8 +350,6 @@
+ #define WFPM_GP2 0xA030B4
+
+ /* DBGI SRAM Register details */
+-#define DBGI_SRAM_TARGET_ACCESS_CFG 0x00A2E14C
+-#define DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK 0x10000
+ #define DBGI_SRAM_TARGET_ACCESS_RDATA_LSB 0x00A2E154
+ #define DBGI_SRAM_TARGET_ACCESS_RDATA_MSB 0x00A2E158
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+index 9f706fffb5922..6dde3bd8f4416 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+@@ -564,6 +564,7 @@ static void iwl_mvm_wowlan_get_tkip_data(struct ieee80211_hw *hw,
+ }
+
+ for (i = 0; i < IWL_NUM_RSC; i++) {
++ ieee80211_get_key_rx_seq(key, i, &seq);
+ /* wrapping isn't allowed, AP must rekey */
+ if (seq.tkip.iv32 > cur_rx_iv32)
+ cur_rx_iv32 = seq.tkip.iv32;
+@@ -2336,7 +2337,6 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
+ iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert,
+ false, 0);
+ ret = 1;
+- mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
+ goto err;
+ }
+
+@@ -2385,6 +2385,7 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
+ }
+ }
+
++ /* after the successful handshake, we're out of D3 */
+ mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
+
+ /*
+@@ -2455,6 +2456,9 @@ out:
+ */
+ set_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status);
+
++ /* regardless of what happened, we're now out of D3 */
++ mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
++
+ return 1;
+ }
+
+@@ -2496,7 +2500,9 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file)
+
+ /* start pseudo D3 */
+ rtnl_lock();
++ wiphy_lock(mvm->hw->wiphy);
+ err = __iwl_mvm_suspend(mvm->hw, mvm->hw->wiphy->wowlan_config, true);
++ wiphy_unlock(mvm->hw->wiphy);
+ rtnl_unlock();
+ if (err > 0)
+ err = -EINVAL;
+@@ -2552,7 +2558,9 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
+ iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt);
+
+ rtnl_lock();
++ wiphy_lock(mvm->hw->wiphy);
+ __iwl_mvm_resume(mvm, true);
++ wiphy_unlock(mvm->hw->wiphy);
+ rtnl_unlock();
+
+ iwl_mvm_resume_tcm(mvm);
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+index 5dc39fbb74d67..0f5c4c2510ef1 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+@@ -5,6 +5,7 @@
+ * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ */
+ #include <linux/vmalloc.h>
++#include <linux/err.h>
+ #include <linux/ieee80211.h>
+ #include <linux/netdevice.h>
+
+@@ -1930,6 +1931,11 @@ static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf,
+ if (ret < 0)
+ return ret;
+
++ if (iwl_rx_packet_payload_len(hcmd.resp_pkt) < sizeof(*rsp)) {
++ ret = -EIO;
++ goto out;
++ }
++
+ rsp = (void *)hcmd.resp_pkt->data;
+ if (le32_to_cpu(rsp->status) != DEBUG_MEM_STATUS_SUCCESS) {
+ ret = -ENXIO;
+@@ -2007,6 +2013,11 @@ static ssize_t iwl_dbgfs_mem_write(struct file *file,
+ if (ret < 0)
+ return ret;
+
++ if (iwl_rx_packet_payload_len(hcmd.resp_pkt) < sizeof(*rsp)) {
++ ret = -EIO;
++ goto out;
++ }
++
+ rsp = (void *)hcmd.resp_pkt->data;
+ if (rsp->status != DEBUG_MEM_STATUS_SUCCESS) {
+ ret = -ENXIO;
+@@ -2044,7 +2055,6 @@ void iwl_mvm_sta_add_debugfs(struct ieee80211_hw *hw,
+ void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm)
+ {
+ struct dentry *bcast_dir __maybe_unused;
+- char buf[100];
+
+ spin_lock_init(&mvm->drv_stats_lock);
+
+@@ -2140,6 +2150,11 @@ void iwl_mvm_dbgfs_register(struct iwl_mvm *mvm)
+ * Create a symlink with mac80211. It will be removed when mac80211
+ * exists (before the opmode exists which removes the target.)
+ */
+- snprintf(buf, 100, "../../%pd2", mvm->debugfs_dir->d_parent);
+- debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir, buf);
++ if (!IS_ERR(mvm->debugfs_dir)) {
++ char buf[100];
++
++ snprintf(buf, 100, "../../%pd2", mvm->debugfs_dir->d_parent);
++ debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir,
++ buf);
++ }
+ }
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+index 03e5bf5cb9094..bb5fff8174435 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+@@ -499,7 +499,7 @@ iwl_mvm_ftm_put_target(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ rcu_read_lock();
+
+ sta = rcu_dereference(mvm->fw_id_to_mac_id[mvmvif->ap_sta_id]);
+- if (sta->mfp)
++ if (sta->mfp && (peer->ftm.trigger_based || peer->ftm.non_trigger_based))
+ FTM_PUT_FLAG(PMF);
+
+ rcu_read_unlock();
+@@ -1054,7 +1054,7 @@ static void iwl_mvm_ftm_rtt_smoothing(struct iwl_mvm *mvm,
+ overshoot = IWL_MVM_FTM_INITIATOR_SMOOTH_OVERSHOOT;
+ alpha = IWL_MVM_FTM_INITIATOR_SMOOTH_ALPHA;
+
+- rtt_avg = (alpha * rtt + (100 - alpha) * resp->rtt_avg) / 100;
++ rtt_avg = div_s64(alpha * rtt + (100 - alpha) * resp->rtt_avg, 100);
+
+ IWL_DEBUG_INFO(mvm,
+ "%pM: prev rtt_avg=%lld, new rtt_avg=%lld, rtt=%lld\n",
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+index 74404c96063bc..6d439ae7b50b1 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+@@ -1489,8 +1489,10 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
+ while (!sband && i < NUM_NL80211_BANDS)
+ sband = mvm->hw->wiphy->bands[i++];
+
+- if (WARN_ON_ONCE(!sband))
++ if (WARN_ON_ONCE(!sband)) {
++ ret = -ENODEV;
+ goto error;
++ }
+
+ chan = &sband->channels[0];
+
+@@ -1572,7 +1574,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
+ ret = iwl_mvm_sar_init(mvm);
+ if (ret == 0)
+ ret = iwl_mvm_sar_geo_init(mvm);
+- else if (ret < 0)
++ if (ret < 0)
+ goto error;
+
+ iwl_mvm_tas_init(mvm);
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+index 3a4585222d6d4..fa7de3e47b8cc 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+@@ -16,6 +16,7 @@
+ #include <net/ieee80211_radiotap.h>
+ #include <net/tcp.h>
+
++#include "iwl-drv.h"
+ #include "iwl-op-mode.h"
+ #include "iwl-io.h"
+ #include "mvm.h"
+@@ -294,7 +295,6 @@ static const u8 he_if_types_ext_capa_sta[] = {
+ [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING,
+ [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT,
+ [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF,
+- [9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT,
+ };
+
+ static const struct wiphy_iftype_ext_capab he_iftypes_ext_capa[] = {
+@@ -820,7 +820,10 @@ void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+
+ rcu_read_lock();
+ do {
+- while (likely(!mvmtxq->stopped &&
++ while (likely(!test_bit(IWL_MVM_TXQ_STATE_STOP_FULL,
++ &mvmtxq->state) &&
++ !test_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT,
++ &mvmtxq->state) &&
+ !test_bit(IWL_MVM_STATUS_IN_D3, &mvm->status))) {
+ skb = ieee80211_tx_dequeue(hw, txq);
+
+@@ -1116,9 +1119,30 @@ static int iwl_mvm_mac_start(struct ieee80211_hw *hw)
+ {
+ struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ int ret;
++ int retry, max_retry = 0;
+
+ mutex_lock(&mvm->mutex);
+- ret = __iwl_mvm_mac_start(mvm);
++
++ /* we are starting the mac not in error flow, and restart is enabled */
++ if (!test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) &&
++ iwlwifi_mod_params.fw_restart) {
++ max_retry = IWL_MAX_INIT_RETRY;
++ /*
++ * This will prevent mac80211 recovery flows to trigger during
++ * init failures
++ */
++ set_bit(IWL_MVM_STATUS_STARTING, &mvm->status);
++ }
++
++ for (retry = 0; retry <= max_retry; retry++) {
++ ret = __iwl_mvm_mac_start(mvm);
++ if (!ret)
++ break;
++
++ IWL_ERR(mvm, "mac start retry %d\n", retry);
++ }
++ clear_bit(IWL_MVM_STATUS_STARTING, &mvm->status);
++
+ mutex_unlock(&mvm->mutex);
+
+ return ret;
+@@ -1665,6 +1689,7 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm)
+ struct iwl_mvm_mc_iter_data iter_data = {
+ .mvm = mvm,
+ };
++ int ret;
+
+ lockdep_assert_held(&mvm->mutex);
+
+@@ -1674,6 +1699,22 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm)
+ ieee80211_iterate_active_interfaces_atomic(
+ mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
+ iwl_mvm_mc_iface_iterator, &iter_data);
++
++ /*
++ * Send a (synchronous) ech command so that we wait for the
++ * multiple asynchronous MCAST_FILTER_CMD commands sent by
++ * the interface iterator. Otherwise, we might get here over
++ * and over again (by userspace just sending a lot of these)
++ * and the CPU can send them faster than the firmware can
++ * process them.
++ * Note that the CPU is still faster - but with this we'll
++ * actually send fewer commands overall because the CPU will
++ * not schedule the work in mac80211 as frequently if it's
++ * still running when rescheduled (possibly multiple times).
++ */
++ ret = iwl_mvm_send_cmd_pdu(mvm, ECHO_CMD, 0, 0, NULL);
++ if (ret)
++ IWL_ERR(mvm, "Failed to synchronize multicast groups update\n");
+ }
+
+ static u64 iwl_mvm_prepare_multicast(struct ieee80211_hw *hw,
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+index f877d86b038e3..6b59425dbdb19 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+@@ -727,7 +727,9 @@ struct iwl_mvm_txq {
+ struct list_head list;
+ u16 txq_id;
+ atomic_t tx_request;
+- bool stopped;
++#define IWL_MVM_TXQ_STATE_STOP_FULL 0
++#define IWL_MVM_TXQ_STATE_STOP_REDIRECT 1
++ unsigned long state;
+ };
+
+ static inline struct iwl_mvm_txq *
+@@ -1121,6 +1123,8 @@ struct iwl_mvm {
+ * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running
+ * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA
+ * @IWL_MVM_STATUS_IN_D3: in D3 (or at least about to go into it)
++ * @IWL_MVM_STATUS_STARTING: starting mac,
++ * used to disable restart flow while in STARTING state
+ */
+ enum iwl_mvm_status {
+ IWL_MVM_STATUS_HW_RFKILL,
+@@ -1132,6 +1136,7 @@ enum iwl_mvm_status {
+ IWL_MVM_STATUS_FIRMWARE_RUNNING,
+ IWL_MVM_STATUS_NEED_FLUSH_P2P,
+ IWL_MVM_STATUS_IN_D3,
++ IWL_MVM_STATUS_STARTING,
+ };
+
+ /* Keep track of completed init configuration */
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+index da705fcaf0fcc..8a9732b5b9652 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+@@ -445,6 +445,11 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
+ struct iwl_mcc_update_resp *mcc_resp = (void *)pkt->data;
+
+ n_channels = __le32_to_cpu(mcc_resp->n_channels);
++ if (iwl_rx_packet_payload_len(pkt) !=
++ struct_size(mcc_resp, channels, n_channels)) {
++ resp_cp = ERR_PTR(-EINVAL);
++ goto exit;
++ }
+ resp_len = sizeof(struct iwl_mcc_update_resp) +
+ n_channels * sizeof(__le32);
+ resp_cp = kmemdup(mcc_resp, resp_len, GFP_KERNEL);
+@@ -456,6 +461,11 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
+ struct iwl_mcc_update_resp_v3 *mcc_resp_v3 = (void *)pkt->data;
+
+ n_channels = __le32_to_cpu(mcc_resp_v3->n_channels);
++ if (iwl_rx_packet_payload_len(pkt) !=
++ struct_size(mcc_resp_v3, channels, n_channels)) {
++ resp_cp = ERR_PTR(-EINVAL);
++ goto exit;
++ }
+ resp_len = sizeof(struct iwl_mcc_update_resp) +
+ n_channels * sizeof(__le32);
+ resp_cp = kzalloc(resp_len, GFP_KERNEL);
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+index 77ea2d0a30916..01f65c9789e72 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+@@ -238,7 +238,8 @@ static void iwl_mvm_rx_thermal_dual_chain_req(struct iwl_mvm *mvm,
+ */
+ mvm->fw_static_smps_request =
+ req->event == cpu_to_le32(THERMAL_DUAL_CHAIN_REQ_DISABLE);
+- ieee80211_iterate_interfaces(mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
++ ieee80211_iterate_interfaces(mvm->hw,
++ IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER,
+ iwl_mvm_intf_dual_chain_req, NULL);
+ }
+
+@@ -687,6 +688,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm)
+ int ret;
+
+ rtnl_lock();
++ wiphy_lock(mvm->hw->wiphy);
+ mutex_lock(&mvm->mutex);
+
+ ret = iwl_run_init_mvm_ucode(mvm);
+@@ -702,6 +704,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm)
+ iwl_mvm_stop_device(mvm);
+
+ mutex_unlock(&mvm->mutex);
++ wiphy_unlock(mvm->hw->wiphy);
+ rtnl_unlock();
+
+ if (ret < 0)
+@@ -758,12 +761,12 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
+ if (!hw)
+ return NULL;
+
+- hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
++ hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
+
+ if (cfg->max_tx_agg_size)
+ hw->max_tx_aggregation_subframes = cfg->max_tx_agg_size;
+ else
+- hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
++ hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
+
+ op_mode = hw->priv;
+
+@@ -1301,10 +1304,16 @@ static void iwl_mvm_queue_state_change(struct iwl_op_mode *op_mode,
+
+ txq = sta->txq[tid];
+ mvmtxq = iwl_mvm_txq_from_mac80211(txq);
+- mvmtxq->stopped = !start;
++ if (start)
++ clear_bit(IWL_MVM_TXQ_STATE_STOP_FULL, &mvmtxq->state);
++ else
++ set_bit(IWL_MVM_TXQ_STATE_STOP_FULL, &mvmtxq->state);
+
+- if (start && mvmsta->sta_state != IEEE80211_STA_NOTEXIST)
++ if (start && mvmsta->sta_state != IEEE80211_STA_NOTEXIST) {
++ local_bh_disable();
+ iwl_mvm_mac_itxq_xmit(mvm->hw, txq);
++ local_bh_enable();
++ }
+ }
+
+ out:
+@@ -1424,6 +1433,9 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
+ */
+ if (!mvm->fw_restart && fw_error) {
+ iwl_fw_error_collect(&mvm->fwrt, false);
++ } else if (test_bit(IWL_MVM_STATUS_STARTING,
++ &mvm->status)) {
++ IWL_ERR(mvm, "Starting mac, retry will be triggered anyway\n");
+ } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+ struct iwl_mvm_reprobe *reprobe;
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+index 035336a9e755e..6d82725cb87d0 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+ /*
+- * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
++ * Copyright (C) 2012-2014, 2018-2022 Intel Corporation
+ * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2017 Intel Deutschland GmbH
+ */
+@@ -295,18 +295,31 @@ void iwl_mvm_phy_ctxt_unref(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt)
+ * otherwise we might not be able to reuse this phy.
+ */
+ if (ctxt->ref == 0) {
+- struct ieee80211_channel *chan;
++ struct ieee80211_channel *chan = NULL;
+ struct cfg80211_chan_def chandef;
+- struct ieee80211_supported_band *sband = NULL;
+- enum nl80211_band band = NL80211_BAND_2GHZ;
++ struct ieee80211_supported_band *sband;
++ enum nl80211_band band;
++ int channel;
+
+- while (!sband && band < NUM_NL80211_BANDS)
+- sband = mvm->hw->wiphy->bands[band++];
++ for (band = NL80211_BAND_2GHZ; band < NUM_NL80211_BANDS; band++) {
++ sband = mvm->hw->wiphy->bands[band];
+
+- if (WARN_ON(!sband))
+- return;
++ if (!sband)
++ continue;
++
++ for (channel = 0; channel < sband->n_channels; channel++)
++ if (!(sband->channels[channel].flags &
++ IEEE80211_CHAN_DISABLED)) {
++ chan = &sband->channels[channel];
++ break;
++ }
+
+- chan = &sband->channels[0];
++ if (chan)
++ break;
++ }
++
++ if (WARN_ON(!chan))
++ return;
+
+ cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_NO_HT);
+ iwl_mvm_phy_ctxt_changed(mvm, ctxt, &chandef, 1, 1);
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
+index f2b090be38980..3d6008da4f9db 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
+@@ -563,6 +563,9 @@ static void iwl_mvm_power_get_vifs_iterator(void *_data, u8 *mac,
+ struct iwl_power_vifs *power_iterator = _data;
+ bool active = mvmvif->phy_ctxt && mvmvif->phy_ctxt->id < NUM_PHY_CTX;
+
++ if (!mvmvif->uploaded)
++ return;
++
+ switch (ieee80211_vif_type_p2p(vif)) {
+ case NL80211_IFTYPE_P2P_DEVICE:
+ break;
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+index c12f303cf652c..411254e9e603f 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+@@ -121,12 +121,39 @@ static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
+ struct iwl_rx_mpdu_desc *desc = (void *)pkt->data;
+ unsigned int headlen, fraglen, pad_len = 0;
+ unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control);
++ u8 mic_crc_len = u8_get_bits(desc->mac_flags1,
++ IWL_RX_MPDU_MFLG1_MIC_CRC_LEN_MASK) << 1;
+
+ if (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_PAD) {
+ len -= 2;
+ pad_len = 2;
+ }
+
++ /*
++ * For non monitor interface strip the bytes the RADA might not have
++ * removed. As monitor interface cannot exist with other interfaces
++ * this removal is safe.
++ */
++ if (mic_crc_len && !ieee80211_hw_check(mvm->hw, RX_INCLUDES_FCS)) {
++ u32 pkt_flags = le32_to_cpu(pkt->len_n_flags);
++
++ /*
++ * If RADA was not enabled then decryption was not performed so
++ * the MIC cannot be removed.
++ */
++ if (!(pkt_flags & FH_RSCSR_RADA_EN)) {
++ if (WARN_ON(crypt_len > mic_crc_len))
++ return -EINVAL;
++
++ mic_crc_len -= crypt_len;
++ }
++
++ if (WARN_ON(mic_crc_len > len))
++ return -EINVAL;
++
++ len -= mic_crc_len;
++ }
++
+ /* If frame is small enough to fit in skb->head, pull it completely.
+ * If not, only pull ieee80211_hdr (including crypto if present, and
+ * an additional 8 bytes for SNAP/ethertype, see below) so that
+@@ -275,7 +302,8 @@ static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm,
+ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
+ struct ieee80211_hdr *hdr,
+ struct iwl_rx_mpdu_desc *desc,
+- u32 status)
++ u32 status,
++ struct ieee80211_rx_status *stats)
+ {
+ struct iwl_mvm_sta *mvmsta;
+ struct iwl_mvm_vif *mvmvif;
+@@ -304,8 +332,10 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
+
+ /* good cases */
+ if (likely(status & IWL_RX_MPDU_STATUS_MIC_OK &&
+- !(status & IWL_RX_MPDU_STATUS_REPLAY_ERROR)))
++ !(status & IWL_RX_MPDU_STATUS_REPLAY_ERROR))) {
++ stats->flag |= RX_FLAG_DECRYPTED;
+ return 0;
++ }
+
+ if (!sta)
+ return -1;
+@@ -374,7 +404,7 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
+
+ if (unlikely(ieee80211_is_mgmt(hdr->frame_control) &&
+ !ieee80211_has_protected(hdr->frame_control)))
+- return iwl_mvm_rx_mgmt_prot(sta, hdr, desc, status);
++ return iwl_mvm_rx_mgmt_prot(sta, hdr, desc, status, stats);
+
+ if (!ieee80211_has_protected(hdr->frame_control) ||
+ (status & IWL_RX_MPDU_STATUS_SEC_MASK) ==
+@@ -1880,7 +1910,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
+ RCU_INIT_POINTER(mvm->csa_tx_blocked_vif, NULL);
+ /* Unblock BCAST / MCAST station */
+ iwl_mvm_modify_all_sta_disable_tx(mvm, mvmvif, false);
+- cancel_delayed_work_sync(&mvm->cs_tx_unblock_dwork);
++ cancel_delayed_work(&mvm->cs_tx_unblock_dwork);
+ }
+ }
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+index d78e436fa8b53..65e382756de68 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+@@ -1890,7 +1890,10 @@ static u8 iwl_mvm_scan_umac_chan_flags_v2(struct iwl_mvm *mvm,
+ IWL_SCAN_CHANNEL_FLAG_CACHE_ADD;
+
+ /* set fragmented ebs for fragmented scan on HB channels */
+- if (iwl_mvm_is_scan_fragmented(params->hb_type))
++ if ((!iwl_mvm_is_cdb_supported(mvm) &&
++ iwl_mvm_is_scan_fragmented(params->type)) ||
++ (iwl_mvm_is_cdb_supported(mvm) &&
++ iwl_mvm_is_scan_fragmented(params->hb_type)))
+ flags |= IWL_SCAN_CHANNEL_FLAG_EBS_FRAG;
+
+ return flags;
+@@ -1924,22 +1927,19 @@ static void iwl_mvm_scan_6ghz_passive_scan(struct iwl_mvm *mvm,
+ }
+
+ /*
+- * 6GHz passive scan is allowed while associated in a defined time
+- * interval following HW reset or resume flow
++ * 6GHz passive scan is allowed in a defined time interval following HW
++ * reset or resume flow, or while not associated and a large interval
++ * has passed since the last 6GHz passive scan.
+ */
+- if (vif->bss_conf.assoc &&
++ if ((vif->bss_conf.assoc ||
++ time_after(mvm->last_6ghz_passive_scan_jiffies +
++ (IWL_MVM_6GHZ_PASSIVE_SCAN_TIMEOUT * HZ), jiffies)) &&
+ (time_before(mvm->last_reset_or_resume_time_jiffies +
+ (IWL_MVM_6GHZ_PASSIVE_SCAN_ASSOC_TIMEOUT * HZ),
+ jiffies))) {
+- IWL_DEBUG_SCAN(mvm, "6GHz passive scan: associated\n");
+- return;
+- }
+-
+- /* No need for 6GHz passive scan if not enough time elapsed */
+- if (time_after(mvm->last_6ghz_passive_scan_jiffies +
+- (IWL_MVM_6GHZ_PASSIVE_SCAN_TIMEOUT * HZ), jiffies)) {
+- IWL_DEBUG_SCAN(mvm,
+- "6GHz passive scan: timeout did not expire\n");
++ IWL_DEBUG_SCAN(mvm, "6GHz passive scan: %s\n",
++ vif->bss_conf.assoc ? "associated" :
++ "timeout did not expire");
+ return;
+ }
+
+@@ -2490,7 +2490,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
+ return -EIO;
+ }
+
+-#define SCAN_TIMEOUT 20000
++#define SCAN_TIMEOUT 30000
+
+ void iwl_mvm_scan_timeout_wk(struct work_struct *work)
+ {
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+index a64874c05cede..45dfee3ad8c60 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+@@ -640,7 +640,7 @@ static int iwl_mvm_redirect_queue(struct iwl_mvm *mvm, int queue, int tid,
+ queue, iwl_mvm_ac_to_tx_fifo[ac]);
+
+ /* Stop the queue and wait for it to empty */
+- txq->stopped = true;
++ set_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, &txq->state);
+
+ ret = iwl_trans_wait_tx_queues_empty(mvm->trans, BIT(queue));
+ if (ret) {
+@@ -683,7 +683,7 @@ static int iwl_mvm_redirect_queue(struct iwl_mvm *mvm, int queue, int tid,
+
+ out:
+ /* Continue using the queue */
+- txq->stopped = false;
++ clear_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, &txq->state);
+
+ return ret;
+ }
+@@ -1794,6 +1794,7 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm,
+ iwl_mvm_txq_from_mac80211(sta->txq[i]);
+
+ mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE;
++ list_del_init(&mvmtxq->list);
+ }
+ }
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+index e91f8e889df70..ab06dcda1462a 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+@@ -49,14 +49,13 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk)
+ struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, roc_done_wk);
+
+ /*
+- * Clear the ROC_RUNNING /ROC_AUX_RUNNING status bit.
++ * Clear the ROC_RUNNING status bit.
+ * This will cause the TX path to drop offchannel transmissions.
+ * That would also be done by mac80211, but it is racy, in particular
+ * in the case that the time event actually completed in the firmware
+ * (which is handled in iwl_mvm_te_handle_notif).
+ */
+ clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status);
+- clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status);
+
+ synchronize_net();
+
+@@ -82,9 +81,19 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk)
+ mvmvif = iwl_mvm_vif_from_mac80211(mvm->p2p_device_vif);
+ iwl_mvm_flush_sta(mvm, &mvmvif->bcast_sta, true);
+ }
+- } else {
++ }
++
++ /*
++ * Clear the ROC_AUX_RUNNING status bit.
++ * This will cause the TX path to drop offchannel transmissions.
++ * That would also be done by mac80211, but it is racy, in particular
++ * in the case that the time event actually completed in the firmware
++ * (which is handled in iwl_mvm_te_handle_notif).
++ */
++ if (test_and_clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) {
+ /* do the same in case of hot spot 2.0 */
+ iwl_mvm_flush_sta(mvm, &mvm->aux_sta, true);
++
+ /* In newer version of this command an aux station is added only
+ * in cases of dedicated tx queue and need to be removed in end
+ * of use */
+@@ -687,11 +696,14 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
+ iwl_mvm_te_clear_data(mvm, te_data);
+ spin_unlock_bh(&mvm->time_event_lock);
+
+- /* When session protection is supported, the te_data->id field
++ /* When session protection is used, the te_data->id field
+ * is reused to save session protection's configuration.
++ * For AUX ROC, HOT_SPOT_CMD is used and the te_data->id field is set
++ * to HOT_SPOT_CMD.
+ */
+ if (fw_has_capa(&mvm->fw->ucode_capa,
+- IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
++ IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD) &&
++ id != HOT_SPOT_CMD) {
+ if (mvmvif && id < SESSION_PROTECT_CONF_MAX_ID) {
+ /* Session protection is still ongoing. Cancel it */
+ iwl_mvm_cancel_session_protection(mvm, mvmvif, id);
+@@ -1027,7 +1039,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
+ iwl_mvm_p2p_roc_finished(mvm);
+ } else {
+ iwl_mvm_remove_aux_roc_te(mvm, mvmvif,
+- &mvmvif->time_event_data);
++ &mvmvif->hs_time_event_data);
+ iwl_mvm_roc_finished(mvm);
+ }
+
+@@ -1158,15 +1170,10 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm,
+ cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
+ mvmvif->color)),
+ .action = cpu_to_le32(FW_CTXT_ACTION_ADD),
++ .conf_id = cpu_to_le32(SESSION_PROTECT_CONF_ASSOC),
+ .duration_tu = cpu_to_le32(MSEC_TO_TU(duration)),
+ };
+
+- /* The time_event_data.id field is reused to save session
+- * protection's configuration.
+- */
+- mvmvif->time_event_data.id = SESSION_PROTECT_CONF_ASSOC;
+- cmd.conf_id = cpu_to_le32(mvmvif->time_event_data.id);
+-
+ lockdep_assert_held(&mvm->mutex);
+
+ spin_lock_bh(&mvm->time_event_lock);
+@@ -1180,6 +1187,11 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm,
+ }
+
+ iwl_mvm_te_clear_data(mvm, te_data);
++ /*
++ * The time_event_data.id field is reused to save session
++ * protection's configuration.
++ */
++ te_data->id = le32_to_cpu(cmd.conf_id);
+ te_data->duration = le32_to_cpu(cmd.duration_tu);
+ te_data->vif = vif;
+ spin_unlock_bh(&mvm->time_event_lock);
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+index 0a13c2bda2eed..e354918c2480f 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+@@ -268,7 +268,6 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm,
+ int rate_idx = -1;
+ u8 rate_plcp;
+ u32 rate_flags = 0;
+- struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
+
+ /* info->control is only relevant for non HW rate control */
+ if (!ieee80211_hw_check(mvm->hw, HAS_RATE_CONTROL)) {
+@@ -278,7 +277,8 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm,
+ "Got a HT rate (flags:0x%x/mcs:%d/fc:0x%x/state:%d) for a non data frame\n",
+ info->control.rates[0].flags,
+ info->control.rates[0].idx,
+- le16_to_cpu(fc), mvmsta->sta_state);
++ le16_to_cpu(fc),
++ sta ? iwl_mvm_sta_from_mac80211(sta)->sta_state : -1);
+
+ rate_idx = info->control.rates[0].idx;
+ }
+@@ -1150,6 +1150,7 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb,
+ struct sk_buff_head mpdus_skbs;
+ unsigned int payload_len;
+ int ret;
++ struct sk_buff *orig_skb = skb;
+
+ if (WARN_ON_ONCE(!mvmsta))
+ return -1;
+@@ -1182,8 +1183,17 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb,
+
+ ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
+ if (ret) {
++ /* Free skbs created as part of TSO logic that have not yet been dequeued */
+ __skb_queue_purge(&mpdus_skbs);
+- return ret;
++ /* skb here is not necessarily same as skb that entered this method,
++ * so free it explicitly.
++ */
++ if (skb == orig_skb)
++ ieee80211_free_txskb(mvm->hw, skb);
++ else
++ kfree_skb(skb);
++ /* there was error, but we consumed skb one way or another, so return 0 */
++ return 0;
+ }
+ }
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+index 4a3d2971a98b7..ec8a223f90e85 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+@@ -405,6 +405,9 @@ bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm,
+
+ lockdep_assert_held(&mvm->mutex);
+
++ if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM)
++ return false;
++
+ if (num_of_ant(iwl_mvm_get_valid_rx_ant(mvm)) == 1)
+ return false;
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+index e3996ff99bad5..5d324d64c8799 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+@@ -931,9 +931,9 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
+ IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+ iwl_qu_b0_hr1_b0, iwl_ax101_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+- IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP,
++ IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP,
+ IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY,
+- IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
++ IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+ iwl_qu_b0_hr_b0, iwl_ax203_name),
+
+ /* Qu C step */
+@@ -945,7 +945,7 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP,
+ IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY,
+- IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
++ IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+ iwl_qu_c0_hr_b0, iwl_ax203_name),
+
+ /* QuZ */
+@@ -1380,6 +1380,9 @@ static void iwl_pci_remove(struct pci_dev *pdev)
+ {
+ struct iwl_trans *trans = pci_get_drvdata(pdev);
+
++ if (!trans)
++ return;
++
+ iwl_drv_stop(trans->drv);
+
+ iwl_trans_pcie_free(trans);
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+index 8e45eb38304b2..6c6512158813b 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+@@ -1601,18 +1601,22 @@ irqreturn_t iwl_pcie_irq_rx_msix_handler(int irq, void *dev_id)
+ struct msix_entry *entry = dev_id;
+ struct iwl_trans_pcie *trans_pcie = iwl_pcie_get_trans_pcie(entry);
+ struct iwl_trans *trans = trans_pcie->trans;
+- struct iwl_rxq *rxq = &trans_pcie->rxq[entry->entry];
++ struct iwl_rxq *rxq;
+
+ trace_iwlwifi_dev_irq_msix(trans->dev, entry, false, 0, 0);
+
+ if (WARN_ON(entry->entry >= trans->num_rx_queues))
+ return IRQ_NONE;
+
+- if (WARN_ONCE(!rxq,
+- "[%d] Got MSI-X interrupt before we have Rx queues",
+- entry->entry))
++ if (!trans_pcie->rxq) {
++ if (net_ratelimit())
++ IWL_ERR(trans,
++ "[%d] Got MSI-X interrupt before we have Rx queues\n",
++ entry->entry);
+ return IRQ_NONE;
++ }
+
++ rxq = &trans_pcie->rxq[entry->entry];
+ lock_map_acquire(&trans->sync_cmd_lockdep_map);
+ IWL_DEBUG_ISR(trans, "[%d] Got interrupt\n", entry->entry);
+
+@@ -2261,7 +2265,12 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
+ }
+ }
+
+- if (inta_hw & MSIX_HW_INT_CAUSES_REG_WAKEUP) {
++ /*
++ * In some rare cases when the HW is in a bad state, we may
++ * get this interrupt too early, when prph_info is still NULL.
++ * So make sure that it's not NULL to prevent crashing.
++ */
++ if (inta_hw & MSIX_HW_INT_CAUSES_REG_WAKEUP && trans_pcie->prph_info) {
+ u32 sleep_notif =
+ le32_to_cpu(trans_pcie->prph_info->sleep_notif);
+ if (sleep_notif == IWL_D3_SLEEP_STATUS_SUSPEND ||
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+index bf0c32a74ca47..a9c19be29e92e 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+@@ -408,8 +408,7 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans,
+ /* This may fail if AMT took ownership of the device */
+ if (iwl_pcie_prepare_card_hw(trans)) {
+ IWL_WARN(trans, "Exit HW not ready\n");
+- ret = -EIO;
+- goto out;
++ return -EIO;
+ }
+
+ iwl_enable_rfkill_int(trans);
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+index f252680f18e88..04e1f3829e96b 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+@@ -581,7 +581,6 @@ static int iwl_pcie_set_hw_ready(struct iwl_trans *trans)
+ int iwl_pcie_prepare_card_hw(struct iwl_trans *trans)
+ {
+ int ret;
+- int t = 0;
+ int iter;
+
+ IWL_DEBUG_INFO(trans, "iwl_trans_prepare_card_hw enter\n");
+@@ -596,6 +595,8 @@ int iwl_pcie_prepare_card_hw(struct iwl_trans *trans)
+ usleep_range(1000, 2000);
+
+ for (iter = 0; iter < 10; iter++) {
++ int t = 0;
++
+ /* If HW is not ready, prepare the conditions to check again */
+ iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG,
+ CSR_HW_IF_CONFIG_REG_PREPARE);
+@@ -1273,8 +1274,7 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
+ /* This may fail if AMT took ownership of the device */
+ if (iwl_pcie_prepare_card_hw(trans)) {
+ IWL_WARN(trans, "Exit HW not ready\n");
+- ret = -EIO;
+- goto out;
++ return -EIO;
+ }
+
+ iwl_enable_rfkill_int(trans);
+@@ -2778,7 +2778,7 @@ static bool iwl_write_to_user_buf(char __user *user_buf, ssize_t count,
+ void *buf, ssize_t *size,
+ ssize_t *bytes_copied)
+ {
+- int buf_size_left = count - *bytes_copied;
++ ssize_t buf_size_left = count - *bytes_copied;
+
+ buf_size_left = buf_size_left - (buf_size_left % sizeof(u32));
+ if (*size > buf_size_left)
+diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
+index 451b060693501..0f3526b0c5b00 100644
+--- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c
++++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
+@@ -1072,6 +1072,7 @@ int iwl_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num,
+ return 0;
+ err_free_tfds:
+ dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr);
++ txq->tfds = NULL;
+ error:
+ if (txq->entries && cmd_queue)
+ for (i = 0; i < slots_num; i++)
+diff --git a/drivers/net/wireless/intersil/orinoco/hw.c b/drivers/net/wireless/intersil/orinoco/hw.c
+index 0aea35c9c11c7..4fcca08e50de2 100644
+--- a/drivers/net/wireless/intersil/orinoco/hw.c
++++ b/drivers/net/wireless/intersil/orinoco/hw.c
+@@ -931,6 +931,8 @@ int __orinoco_hw_setup_enc(struct orinoco_private *priv)
+ err = hermes_write_wordrec(hw, USER_BAP,
+ HERMES_RID_CNFAUTHENTICATION_AGERE,
+ auth_flag);
++ if (err)
++ return err;
+ }
+ err = hermes_write_wordrec(hw, USER_BAP,
+ HERMES_RID_CNFWEPENABLED_AGERE,
+diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_cs.c b/drivers/net/wireless/intersil/orinoco/orinoco_cs.c
+index a956f965a1e5e..03bfd2482656c 100644
+--- a/drivers/net/wireless/intersil/orinoco/orinoco_cs.c
++++ b/drivers/net/wireless/intersil/orinoco/orinoco_cs.c
+@@ -96,6 +96,7 @@ orinoco_cs_probe(struct pcmcia_device *link)
+ {
+ struct orinoco_private *priv;
+ struct orinoco_pccard *card;
++ int ret;
+
+ priv = alloc_orinocodev(sizeof(*card), &link->dev,
+ orinoco_cs_hard_reset, NULL);
+@@ -107,8 +108,16 @@ orinoco_cs_probe(struct pcmcia_device *link)
+ card->p_dev = link;
+ link->priv = priv;
+
+- return orinoco_cs_config(link);
+-} /* orinoco_cs_attach */
++ ret = orinoco_cs_config(link);
++ if (ret)
++ goto err_free_orinocodev;
++
++ return 0;
++
++err_free_orinocodev:
++ free_orinocodev(priv);
++ return ret;
++}
+
+ static void orinoco_cs_detach(struct pcmcia_device *link)
+ {
+diff --git a/drivers/net/wireless/intersil/orinoco/spectrum_cs.c b/drivers/net/wireless/intersil/orinoco/spectrum_cs.c
+index 291ef97ed45ec..841d623c621ac 100644
+--- a/drivers/net/wireless/intersil/orinoco/spectrum_cs.c
++++ b/drivers/net/wireless/intersil/orinoco/spectrum_cs.c
+@@ -157,6 +157,7 @@ spectrum_cs_probe(struct pcmcia_device *link)
+ {
+ struct orinoco_private *priv;
+ struct orinoco_pccard *card;
++ int ret;
+
+ priv = alloc_orinocodev(sizeof(*card), &link->dev,
+ spectrum_cs_hard_reset,
+@@ -169,8 +170,16 @@ spectrum_cs_probe(struct pcmcia_device *link)
+ card->p_dev = link;
+ link->priv = priv;
+
+- return spectrum_cs_config(link);
+-} /* spectrum_cs_attach */
++ ret = spectrum_cs_config(link);
++ if (ret)
++ goto err_free_orinocodev;
++
++ return 0;
++
++err_free_orinocodev:
++ free_orinocodev(priv);
++ return ret;
++}
+
+ static void spectrum_cs_detach(struct pcmcia_device *link)
+ {
+diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c
+index a3ca6620dc0c6..8fa3ec71603e3 100644
+--- a/drivers/net/wireless/intersil/p54/main.c
++++ b/drivers/net/wireless/intersil/p54/main.c
+@@ -682,7 +682,7 @@ static void p54_flush(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
+ * queues have already been stopped and no new frames can sneak
+ * up from behind.
+ */
+- while ((total = p54_flush_count(priv) && i--)) {
++ while ((total = p54_flush_count(priv)) && i--) {
+ /* waste time */
+ msleep(20);
+ }
+diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c
+index ab0fe85658518..cdb57819684ae 100644
+--- a/drivers/net/wireless/intersil/p54/p54spi.c
++++ b/drivers/net/wireless/intersil/p54/p54spi.c
+@@ -164,7 +164,7 @@ static int p54spi_request_firmware(struct ieee80211_hw *dev)
+
+ ret = p54_parse_firmware(dev, priv->firmware);
+ if (ret) {
+- release_firmware(priv->firmware);
++ /* the firmware is released by the caller */
+ return ret;
+ }
+
+@@ -659,6 +659,7 @@ static int p54spi_probe(struct spi_device *spi)
+ return 0;
+
+ err_free_common:
++ release_firmware(priv->firmware);
+ free_irq(gpio_to_irq(p54spi_gpio_irq), spi);
+ err_free_gpio_irq:
+ gpio_free(p54spi_gpio_irq);
+diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
+index 0adae76eb8df1..c3c3b5aa87b0d 100644
+--- a/drivers/net/wireless/mac80211_hwsim.c
++++ b/drivers/net/wireless/mac80211_hwsim.c
+@@ -663,7 +663,7 @@ struct mac80211_hwsim_data {
+ bool ps_poll_pending;
+ struct dentry *debugfs;
+
+- uintptr_t pending_cookie;
++ atomic_t pending_cookie;
+ struct sk_buff_head pending; /* packets pending */
+ /*
+ * Only radios in the same group can communicate together (the
+@@ -845,6 +845,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
+ struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
+ struct sk_buff *skb;
+ struct ieee80211_hdr *hdr;
++ struct ieee80211_tx_info *cb;
+
+ if (!vp->assoc)
+ return;
+@@ -866,6 +867,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
+ memcpy(hdr->addr2, mac, ETH_ALEN);
+ memcpy(hdr->addr3, vp->bssid, ETH_ALEN);
+
++ cb = IEEE80211_SKB_CB(skb);
++ cb->control.rates[0].count = 1;
++ cb->control.rates[1].idx = -1;
++
+ rcu_read_lock();
+ mac80211_hwsim_tx_frame(data->hw, skb,
+ rcu_dereference(vif->chanctx_conf)->def.chan);
+@@ -1339,8 +1344,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
+ goto nla_put_failure;
+
+ /* We create a cookie to identify this skb */
+- data->pending_cookie++;
+- cookie = data->pending_cookie;
++ cookie = atomic_inc_return(&data->pending_cookie);
+ info->rate_driver_data[0] = (void *)cookie;
+ if (nla_put_u64_64bit(skb, HWSIM_ATTR_COOKIE, cookie, HWSIM_ATTR_PAD))
+ goto nla_put_failure;
+@@ -2336,9 +2340,21 @@ static void hw_scan_work(struct work_struct *work)
+ if (req->ie_len)
+ skb_put_data(probe, req->ie, req->ie_len);
+
++ rcu_read_lock();
++ if (!ieee80211_tx_prepare_skb(hwsim->hw,
++ hwsim->hw_scan_vif,
++ probe,
++ hwsim->tmp_chan->band,
++ NULL)) {
++ rcu_read_unlock();
++ kfree_skb(probe);
++ continue;
++ }
++
+ local_bh_disable();
+ mac80211_hwsim_tx_frame(hwsim->hw, probe,
+ hwsim->tmp_chan);
++ rcu_read_unlock();
+ local_bh_enable();
+ }
+ }
+@@ -3570,6 +3586,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
+ const u8 *src;
+ unsigned int hwsim_flags;
+ int i;
++ unsigned long flags;
+ bool found = false;
+
+ if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] ||
+@@ -3597,18 +3614,20 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
+ }
+
+ /* look for the skb matching the cookie passed back from user */
++ spin_lock_irqsave(&data2->pending.lock, flags);
+ skb_queue_walk_safe(&data2->pending, skb, tmp) {
+- u64 skb_cookie;
++ uintptr_t skb_cookie;
+
+ txi = IEEE80211_SKB_CB(skb);
+- skb_cookie = (u64)(uintptr_t)txi->rate_driver_data[0];
++ skb_cookie = (uintptr_t)txi->rate_driver_data[0];
+
+ if (skb_cookie == ret_skb_cookie) {
+- skb_unlink(skb, &data2->pending);
++ __skb_unlink(skb, &data2->pending);
+ found = true;
+ break;
+ }
+ }
++ spin_unlock_irqrestore(&data2->pending.lock, flags);
+
+ /* not found */
+ if (!found)
+@@ -3641,6 +3660,10 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
+ }
+ txi->flags |= IEEE80211_TX_STAT_ACK;
+ }
++
++ if (hwsim_flags & HWSIM_TX_CTL_NO_ACK)
++ txi->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED;
++
+ ieee80211_tx_status_irqsafe(data2->hw, skb);
+ return 0;
+ out:
+@@ -3731,6 +3754,8 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
+
+ rx_status.band = channel->band;
+ rx_status.rate_idx = nla_get_u32(info->attrs[HWSIM_ATTR_RX_RATE]);
++ if (rx_status.rate_idx >= data2->hw->wiphy->bands[rx_status.band]->n_bitrates)
++ goto out;
+ rx_status.signal = nla_get_u32(info->attrs[HWSIM_ATTR_SIGNAL]);
+
+ hdr = (void *)skb->data;
+@@ -4260,6 +4285,10 @@ static int hwsim_virtio_handle_cmd(struct sk_buff *skb)
+
+ nlh = nlmsg_hdr(skb);
+ gnlh = nlmsg_data(nlh);
++
++ if (skb->len < nlh->nlmsg_len)
++ return -EINVAL;
++
+ err = genlmsg_parse(nlh, &hwsim_genl_family, tb, HWSIM_ATTR_MAX,
+ hwsim_genl_policy, NULL);
+ if (err) {
+@@ -4302,7 +4331,8 @@ static void hwsim_virtio_rx_work(struct work_struct *work)
+ spin_unlock_irqrestore(&hwsim_virtio_lock, flags);
+
+ skb->data = skb->head;
+- skb_set_tail_pointer(skb, len);
++ skb_reset_tail_pointer(skb);
++ skb_put(skb, len);
+ hwsim_virtio_handle_cmd(skb);
+
+ spin_lock_irqsave(&hwsim_virtio_lock, flags);
+diff --git a/drivers/net/wireless/marvell/libertas/cmdresp.c b/drivers/net/wireless/marvell/libertas/cmdresp.c
+index cb515c5584c1f..74cb7551f4275 100644
+--- a/drivers/net/wireless/marvell/libertas/cmdresp.c
++++ b/drivers/net/wireless/marvell/libertas/cmdresp.c
+@@ -48,7 +48,7 @@ void lbs_mac_event_disconnected(struct lbs_private *priv,
+
+ /* Free Tx and Rx packets */
+ spin_lock_irqsave(&priv->driver_lock, flags);
+- kfree_skb(priv->currenttxskb);
++ dev_kfree_skb_irq(priv->currenttxskb);
+ priv->currenttxskb = NULL;
+ priv->tx_pending_len = 0;
+ spin_unlock_irqrestore(&priv->driver_lock, flags);
+diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c
+index 20436a289d5cd..2240b4db8c036 100644
+--- a/drivers/net/wireless/marvell/libertas/if_usb.c
++++ b/drivers/net/wireless/marvell/libertas/if_usb.c
+@@ -287,11 +287,13 @@ static int if_usb_probe(struct usb_interface *intf,
+ return 0;
+
+ err_get_fw:
++ usb_put_dev(udev);
+ lbs_remove_card(priv);
+ err_add_card:
+ if_usb_reset_device(cardp);
+ dealloc:
+ if_usb_free(cardp);
++ kfree(cardp);
+
+ error:
+ return r;
+@@ -316,6 +318,7 @@ static void if_usb_disconnect(struct usb_interface *intf)
+
+ /* Unlink and free urb */
+ if_usb_free(cardp);
++ kfree(cardp);
+
+ usb_set_intfdata(intf, NULL);
+ usb_put_dev(interface_to_usbdev(intf));
+@@ -634,7 +637,7 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
+ priv->resp_len[i] = (recvlength - MESSAGE_HEADER_LEN);
+ memcpy(priv->resp_buf[i], recvbuff + MESSAGE_HEADER_LEN,
+ priv->resp_len[i]);
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ lbs_notify_command_response(priv, i);
+
+ spin_unlock_irqrestore(&priv->driver_lock, flags);
+diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c
+index 64fc5e4108648..46877773a36de 100644
+--- a/drivers/net/wireless/marvell/libertas/main.c
++++ b/drivers/net/wireless/marvell/libertas/main.c
+@@ -217,7 +217,7 @@ int lbs_stop_iface(struct lbs_private *priv)
+
+ spin_lock_irqsave(&priv->driver_lock, flags);
+ priv->iface_running = false;
+- kfree_skb(priv->currenttxskb);
++ dev_kfree_skb_irq(priv->currenttxskb);
+ priv->currenttxskb = NULL;
+ priv->tx_pending_len = 0;
+ spin_unlock_irqrestore(&priv->driver_lock, flags);
+@@ -870,6 +870,7 @@ static int lbs_init_adapter(struct lbs_private *priv)
+ ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL);
+ if (ret) {
+ pr_err("Out of memory allocating event FIFO buffer\n");
++ lbs_free_cmd_buffer(priv);
+ goto out;
+ }
+
+diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
+index fe0a69e804d8c..1750f5e93de21 100644
+--- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c
++++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
+@@ -230,6 +230,7 @@ static int if_usb_probe(struct usb_interface *intf,
+
+ dealloc:
+ if_usb_free(cardp);
++ kfree(cardp);
+ error:
+ lbtf_deb_leave(LBTF_DEB_MAIN);
+ return -ENOMEM;
+@@ -254,6 +255,7 @@ static void if_usb_disconnect(struct usb_interface *intf)
+
+ /* Unlink and free urb */
+ if_usb_free(cardp);
++ kfree(cardp);
+
+ usb_set_intfdata(intf, NULL);
+ usb_put_dev(interface_to_usbdev(intf));
+@@ -611,7 +613,7 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
+ spin_lock_irqsave(&priv->driver_lock, flags);
+ memcpy(priv->cmd_resp_buff, recvbuff + MESSAGE_HEADER_LEN,
+ recvlength - MESSAGE_HEADER_LEN);
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ lbtf_cmd_response_rx(priv);
+ spin_unlock_irqrestore(&priv->driver_lock, flags);
+ }
+diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c
+index d2ee6469e67bb..3fa25cd64cda0 100644
+--- a/drivers/net/wireless/marvell/mwifiex/11h.c
++++ b/drivers/net/wireless/marvell/mwifiex/11h.c
+@@ -303,5 +303,7 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work)
+
+ mwifiex_dbg(priv->adapter, MSG,
+ "indicating channel switch completion to kernel\n");
++ mutex_lock(&priv->wdev.mtx);
+ cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef);
++ mutex_unlock(&priv->wdev.mtx);
+ }
+diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
+index 6696bce561786..b99381ebb82a1 100644
+--- a/drivers/net/wireless/marvell/mwifiex/11n.c
++++ b/drivers/net/wireless/marvell/mwifiex/11n.c
+@@ -657,14 +657,15 @@ int mwifiex_send_delba(struct mwifiex_private *priv, int tid, u8 *peer_mac,
+ uint16_t del_ba_param_set;
+
+ memset(&delba, 0, sizeof(delba));
+- delba.del_ba_param_set = cpu_to_le16(tid << DELBA_TID_POS);
+
+- del_ba_param_set = le16_to_cpu(delba.del_ba_param_set);
++ del_ba_param_set = tid << DELBA_TID_POS;
++
+ if (initiator)
+ del_ba_param_set |= IEEE80211_DELBA_PARAM_INITIATOR_MASK;
+ else
+ del_ba_param_set &= ~IEEE80211_DELBA_PARAM_INITIATOR_MASK;
+
++ delba.del_ba_param_set = cpu_to_le16(del_ba_param_set);
+ memcpy(&delba.peer_mac_addr, peer_mac, ETH_ALEN);
+
+ /* We don't wait for the response of this command */
+@@ -889,7 +890,7 @@ mwifiex_send_delba_txbastream_tbl(struct mwifiex_private *priv, u8 tid)
+ */
+ void mwifiex_update_ampdu_txwinsize(struct mwifiex_adapter *adapter)
+ {
+- u8 i;
++ u8 i, j;
+ u32 tx_win_size;
+ struct mwifiex_private *priv;
+
+@@ -920,8 +921,8 @@ void mwifiex_update_ampdu_txwinsize(struct mwifiex_adapter *adapter)
+ if (tx_win_size != priv->add_ba_param.tx_win_size) {
+ if (!priv->media_connected)
+ continue;
+- for (i = 0; i < MAX_NUM_TID; i++)
+- mwifiex_send_delba_txbastream_tbl(priv, i);
++ for (j = 0; j < MAX_NUM_TID; j++)
++ mwifiex_send_delba_txbastream_tbl(priv, j);
+ }
+ }
+ }
+diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+index 0961f4a5e415c..97f0f39364d67 100644
+--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
++++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+@@ -908,16 +908,20 @@ mwifiex_init_new_priv_params(struct mwifiex_private *priv,
+ switch (type) {
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_ADHOC:
+- priv->bss_role = MWIFIEX_BSS_ROLE_STA;
++ priv->bss_role = MWIFIEX_BSS_ROLE_STA;
++ priv->bss_type = MWIFIEX_BSS_TYPE_STA;
+ break;
+ case NL80211_IFTYPE_P2P_CLIENT:
+- priv->bss_role = MWIFIEX_BSS_ROLE_STA;
++ priv->bss_role = MWIFIEX_BSS_ROLE_STA;
++ priv->bss_type = MWIFIEX_BSS_TYPE_P2P;
+ break;
+ case NL80211_IFTYPE_P2P_GO:
+- priv->bss_role = MWIFIEX_BSS_ROLE_UAP;
++ priv->bss_role = MWIFIEX_BSS_ROLE_UAP;
++ priv->bss_type = MWIFIEX_BSS_TYPE_P2P;
+ break;
+ case NL80211_IFTYPE_AP:
+ priv->bss_role = MWIFIEX_BSS_ROLE_UAP;
++ priv->bss_type = MWIFIEX_BSS_TYPE_UAP;
+ break;
+ default:
+ mwifiex_dbg(adapter, ERROR,
+@@ -1229,29 +1233,15 @@ mwifiex_cfg80211_change_virtual_intf(struct wiphy *wiphy,
+ break;
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_P2P_GO:
++ if (mwifiex_cfg80211_deinit_p2p(priv))
++ return -EFAULT;
++
+ switch (type) {
+- case NL80211_IFTYPE_STATION:
+- if (mwifiex_cfg80211_deinit_p2p(priv))
+- return -EFAULT;
+- priv->adapter->curr_iface_comb.p2p_intf--;
+- priv->adapter->curr_iface_comb.sta_intf++;
+- dev->ieee80211_ptr->iftype = type;
+- if (mwifiex_deinit_priv_params(priv))
+- return -1;
+- if (mwifiex_init_new_priv_params(priv, dev, type))
+- return -1;
+- if (mwifiex_sta_init_cmd(priv, false, false))
+- return -1;
+- break;
+ case NL80211_IFTYPE_ADHOC:
+- if (mwifiex_cfg80211_deinit_p2p(priv))
+- return -EFAULT;
++ case NL80211_IFTYPE_STATION:
+ return mwifiex_change_vif_to_sta_adhoc(dev, curr_iftype,
+ type, params);
+- break;
+ case NL80211_IFTYPE_AP:
+- if (mwifiex_cfg80211_deinit_p2p(priv))
+- return -EFAULT;
+ return mwifiex_change_vif_to_ap(dev, curr_iftype, type,
+ params);
+ case NL80211_IFTYPE_UNSPECIFIED:
+diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c
+index dded92db1f373..1e7dc724c6a94 100644
+--- a/drivers/net/wireless/marvell/mwifiex/debugfs.c
++++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c
+@@ -265,8 +265,11 @@ mwifiex_histogram_read(struct file *file, char __user *ubuf,
+ if (!p)
+ return -ENOMEM;
+
+- if (!priv || !priv->hist_data)
+- return -EFAULT;
++ if (!priv || !priv->hist_data) {
++ ret = -EFAULT;
++ goto free_and_exit;
++ }
++
+ phist_data = priv->hist_data;
+
+ p += sprintf(p, "\n"
+@@ -321,6 +324,8 @@ mwifiex_histogram_read(struct file *file, char __user *ubuf,
+ ret = simple_read_from_buffer(ubuf, count, ppos, (char *)page,
+ (unsigned long)p - page);
+
++free_and_exit:
++ free_page(page);
+ return ret;
+ }
+
+diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
+index 5923c5c14c8df..f4e3dce10d654 100644
+--- a/drivers/net/wireless/marvell/mwifiex/main.h
++++ b/drivers/net/wireless/marvell/mwifiex/main.h
+@@ -1054,6 +1054,8 @@ struct mwifiex_adapter {
+ void *devdump_data;
+ int devdump_len;
+ struct timer_list devdump_timer;
++
++ bool ignore_btcoex_events;
+ };
+
+ void mwifiex_process_tx_queue(struct mwifiex_adapter *adapter);
+diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
+index c6ccce426b496..8b3f46586654a 100644
+--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
++++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
+@@ -17,6 +17,7 @@
+ * this warranty disclaimer.
+ */
+
++#include <linux/iopoll.h>
+ #include <linux/firmware.h>
+
+ #include "decl.h"
+@@ -183,7 +184,7 @@ static const struct mwifiex_pcie_device mwifiex_pcie8997 = {
+ .can_ext_scan = true,
+ };
+
+-static const struct of_device_id mwifiex_pcie_of_match_table[] = {
++static const struct of_device_id mwifiex_pcie_of_match_table[] __maybe_unused = {
+ { .compatible = "pci11ab,2b42" },
+ { .compatible = "pci1b4b,2b42" },
+ { }
+@@ -200,6 +201,8 @@ static int mwifiex_pcie_probe_of(struct device *dev)
+ }
+
+ static void mwifiex_pcie_work(struct work_struct *work);
++static int mwifiex_pcie_delete_rxbd_ring(struct mwifiex_adapter *adapter);
++static int mwifiex_pcie_delete_evtbd_ring(struct mwifiex_adapter *adapter);
+
+ static int
+ mwifiex_map_pci_memory(struct mwifiex_adapter *adapter, struct sk_buff *skb,
+@@ -647,11 +650,15 @@ static void mwifiex_delay_for_sleep_cookie(struct mwifiex_adapter *adapter,
+ "max count reached while accessing sleep cookie\n");
+ }
+
++#define N_WAKEUP_TRIES_SHORT_INTERVAL 15
++#define N_WAKEUP_TRIES_LONG_INTERVAL 35
++
+ /* This function wakes up the card by reading fw_status register. */
+ static int mwifiex_pm_wakeup_card(struct mwifiex_adapter *adapter)
+ {
+ struct pcie_service_card *card = adapter->card;
+ const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
++ int retval;
+
+ mwifiex_dbg(adapter, EVENT,
+ "event: Wakeup device...\n");
+@@ -659,11 +666,24 @@ static int mwifiex_pm_wakeup_card(struct mwifiex_adapter *adapter)
+ if (reg->sleep_cookie)
+ mwifiex_pcie_dev_wakeup_delay(adapter);
+
+- /* Accessing fw_status register will wakeup device */
+- if (mwifiex_write_reg(adapter, reg->fw_status, FIRMWARE_READY_PCIE)) {
+- mwifiex_dbg(adapter, ERROR,
+- "Writing fw_status register failed\n");
+- return -1;
++ /* The 88W8897 PCIe+USB firmware (latest version 15.68.19.p21) sometimes
++ * appears to ignore or miss our wakeup request, so we continue trying
++ * until we receive an interrupt from the card.
++ */
++ if (read_poll_timeout(mwifiex_write_reg, retval,
++ READ_ONCE(adapter->int_status) != 0,
++ 500, 500 * N_WAKEUP_TRIES_SHORT_INTERVAL,
++ false,
++ adapter, reg->fw_status, FIRMWARE_READY_PCIE)) {
++ if (read_poll_timeout(mwifiex_write_reg, retval,
++ READ_ONCE(adapter->int_status) != 0,
++ 10000, 10000 * N_WAKEUP_TRIES_LONG_INTERVAL,
++ false,
++ adapter, reg->fw_status, FIRMWARE_READY_PCIE)) {
++ mwifiex_dbg(adapter, ERROR,
++ "Firmware didn't wake up\n");
++ return -EIO;
++ }
+ }
+
+ if (reg->sleep_cookie) {
+@@ -786,14 +806,15 @@ static int mwifiex_init_rxq_ring(struct mwifiex_adapter *adapter)
+ if (!skb) {
+ mwifiex_dbg(adapter, ERROR,
+ "Unable to allocate skb for RX ring.\n");
+- kfree(card->rxbd_ring_vbase);
+ return -ENOMEM;
+ }
+
+ if (mwifiex_map_pci_memory(adapter, skb,
+ MWIFIEX_RX_DATA_BUF_SIZE,
+- DMA_FROM_DEVICE))
+- return -1;
++ DMA_FROM_DEVICE)) {
++ kfree_skb(skb);
++ return -ENOMEM;
++ }
+
+ buf_pa = MWIFIEX_SKB_DMA_ADDR(skb);
+
+@@ -843,7 +864,6 @@ static int mwifiex_pcie_init_evt_ring(struct mwifiex_adapter *adapter)
+ if (!skb) {
+ mwifiex_dbg(adapter, ERROR,
+ "Unable to allocate skb for EVENT buf.\n");
+- kfree(card->evtbd_ring_vbase);
+ return -ENOMEM;
+ }
+ skb_put(skb, MAX_EVENT_SIZE);
+@@ -851,8 +871,7 @@ static int mwifiex_pcie_init_evt_ring(struct mwifiex_adapter *adapter)
+ if (mwifiex_map_pci_memory(adapter, skb, MAX_EVENT_SIZE,
+ DMA_FROM_DEVICE)) {
+ kfree_skb(skb);
+- kfree(card->evtbd_ring_vbase);
+- return -1;
++ return -ENOMEM;
+ }
+
+ buf_pa = MWIFIEX_SKB_DMA_ADDR(skb);
+@@ -1052,6 +1071,7 @@ static int mwifiex_pcie_delete_txbd_ring(struct mwifiex_adapter *adapter)
+ */
+ static int mwifiex_pcie_create_rxbd_ring(struct mwifiex_adapter *adapter)
+ {
++ int ret;
+ struct pcie_service_card *card = adapter->card;
+ const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+
+@@ -1090,7 +1110,10 @@ static int mwifiex_pcie_create_rxbd_ring(struct mwifiex_adapter *adapter)
+ (u32)((u64)card->rxbd_ring_pbase >> 32),
+ card->rxbd_ring_size);
+
+- return mwifiex_init_rxq_ring(adapter);
++ ret = mwifiex_init_rxq_ring(adapter);
++ if (ret)
++ mwifiex_pcie_delete_rxbd_ring(adapter);
++ return ret;
+ }
+
+ /*
+@@ -1121,6 +1144,7 @@ static int mwifiex_pcie_delete_rxbd_ring(struct mwifiex_adapter *adapter)
+ */
+ static int mwifiex_pcie_create_evtbd_ring(struct mwifiex_adapter *adapter)
+ {
++ int ret;
+ struct pcie_service_card *card = adapter->card;
+ const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+
+@@ -1155,7 +1179,10 @@ static int mwifiex_pcie_create_evtbd_ring(struct mwifiex_adapter *adapter)
+ (u32)((u64)card->evtbd_ring_pbase >> 32),
+ card->evtbd_ring_size);
+
+- return mwifiex_pcie_init_evt_ring(adapter);
++ ret = mwifiex_pcie_init_evt_ring(adapter);
++ if (ret)
++ mwifiex_pcie_delete_evtbd_ring(adapter);
++ return ret;
+ }
+
+ /*
+@@ -1490,6 +1517,14 @@ mwifiex_pcie_send_data(struct mwifiex_adapter *adapter, struct sk_buff *skb,
+ ret = -1;
+ goto done_unmap;
+ }
++
++ /* The firmware (latest version 15.68.19.p21) of the 88W8897 PCIe+USB card
++ * seems to crash randomly after setting the TX ring write pointer when
++ * ASPM powersaving is enabled. A workaround seems to be keeping the bus
++ * busy by reading a random register afterwards.
++ */
++ mwifiex_read_reg(adapter, PCI_VENDOR_ID, &rx_val);
++
+ if ((mwifiex_pcie_txbd_not_full(card)) &&
+ tx_param->next_pkt_len) {
+ /* have more packets and TxBD still can hold more */
+@@ -3126,6 +3161,9 @@ static int mwifiex_init_pcie(struct mwifiex_adapter *adapter)
+ if (ret)
+ goto err_alloc_buffers;
+
++ if (pdev->device == PCIE_DEVICE_ID_MARVELL_88W8897)
++ adapter->ignore_btcoex_events = true;
++
+ return 0;
+
+ err_alloc_buffers:
+diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
+index 0b877f3f6b974..5ec8a42e7150a 100644
+--- a/drivers/net/wireless/marvell/mwifiex/scan.c
++++ b/drivers/net/wireless/marvell/mwifiex/scan.c
+@@ -2199,9 +2199,9 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv,
+
+ if (nd_config) {
+ adapter->nd_info =
+- kzalloc(sizeof(struct cfg80211_wowlan_nd_match) +
+- sizeof(struct cfg80211_wowlan_nd_match *) *
+- scan_rsp->number_of_sets, GFP_ATOMIC);
++ kzalloc(struct_size(adapter->nd_info, matches,
++ scan_rsp->number_of_sets),
++ GFP_ATOMIC);
+
+ if (adapter->nd_info)
+ adapter->nd_info->n_matches = scan_rsp->number_of_sets;
+diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
+index bde9e4bbfffe7..b09e60fedeb16 100644
+--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
++++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
+@@ -484,7 +484,8 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = {
+ {"EXTLAST", NULL, 0, 0xFE},
+ };
+
+-static const struct of_device_id mwifiex_sdio_of_match_table[] = {
++static const struct of_device_id mwifiex_sdio_of_match_table[] __maybe_unused = {
++ { .compatible = "marvell,sd8787" },
+ { .compatible = "marvell,sd8897" },
+ { .compatible = "marvell,sd8997" },
+ { }
+diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c
+index 68c63268e2e6b..7d42c5d2dbf65 100644
+--- a/drivers/net/wireless/marvell/mwifiex/sta_event.c
++++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c
+@@ -365,10 +365,12 @@ static void mwifiex_process_uap_tx_pause(struct mwifiex_private *priv,
+ sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac);
+ if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) {
+ sta_ptr->tx_pause = tp->tx_pause;
++ spin_unlock_bh(&priv->sta_list_spinlock);
+ mwifiex_update_ralist_tx_pause(priv, tp->peermac,
+ tp->tx_pause);
++ } else {
++ spin_unlock_bh(&priv->sta_list_spinlock);
+ }
+- spin_unlock_bh(&priv->sta_list_spinlock);
+ }
+ }
+
+@@ -400,11 +402,13 @@ static void mwifiex_process_sta_tx_pause(struct mwifiex_private *priv,
+ sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac);
+ if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) {
+ sta_ptr->tx_pause = tp->tx_pause;
++ spin_unlock_bh(&priv->sta_list_spinlock);
+ mwifiex_update_ralist_tx_pause(priv,
+ tp->peermac,
+ tp->tx_pause);
++ } else {
++ spin_unlock_bh(&priv->sta_list_spinlock);
+ }
+- spin_unlock_bh(&priv->sta_list_spinlock);
+ }
+ }
+ }
+@@ -1058,6 +1062,9 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv)
+ break;
+ case EVENT_BT_COEX_WLAN_PARA_CHANGE:
+ dev_dbg(adapter->dev, "EVENT: BT coex wlan param update\n");
++ if (adapter->ignore_btcoex_events)
++ break;
++
+ mwifiex_bt_coex_wlan_param_update_event(priv,
+ adapter->event_skb);
+ break;
+diff --git a/drivers/net/wireless/marvell/mwifiex/sta_rx.c b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
+index 0d2adf8879005..3c555946cb2cc 100644
+--- a/drivers/net/wireless/marvell/mwifiex/sta_rx.c
++++ b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
+@@ -98,6 +98,15 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv,
+ rx_pkt_len = le16_to_cpu(local_rx_pd->rx_pkt_length);
+ rx_pkt_hdr = (void *)local_rx_pd + rx_pkt_off;
+
++ if (sizeof(*rx_pkt_hdr) + rx_pkt_off > skb->len) {
++ mwifiex_dbg(priv->adapter, ERROR,
++ "wrong rx packet offset: len=%d, rx_pkt_off=%d\n",
++ skb->len, rx_pkt_off);
++ priv->stats.rx_dropped++;
++ dev_kfree_skb_any(skb);
++ return -1;
++ }
++
+ if ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header,
+ sizeof(bridge_tunnel_header))) ||
+ (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header,
+@@ -206,7 +215,8 @@ int mwifiex_process_sta_rx_packet(struct mwifiex_private *priv,
+
+ rx_pkt_hdr = (void *)local_rx_pd + rx_pkt_offset;
+
+- if ((rx_pkt_offset + rx_pkt_length) > (u16) skb->len) {
++ if ((rx_pkt_offset + rx_pkt_length) > skb->len ||
++ sizeof(rx_pkt_hdr->eth803_hdr) + rx_pkt_offset > skb->len) {
+ mwifiex_dbg(adapter, ERROR,
+ "wrong rx packet: len=%d, rx_pkt_offset=%d, rx_pkt_length=%d\n",
+ skb->len, rx_pkt_offset, rx_pkt_length);
+diff --git a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
+index 245ff644f81e3..8a5d0125a1abd 100644
+--- a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
++++ b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
+@@ -115,6 +115,16 @@ static void mwifiex_uap_queue_bridged_pkt(struct mwifiex_private *priv,
+ return;
+ }
+
++ if (sizeof(*rx_pkt_hdr) +
++ le16_to_cpu(uap_rx_pd->rx_pkt_offset) > skb->len) {
++ mwifiex_dbg(adapter, ERROR,
++ "wrong rx packet offset: len=%d,rx_pkt_offset=%d\n",
++ skb->len, le16_to_cpu(uap_rx_pd->rx_pkt_offset));
++ priv->stats.rx_dropped++;
++ dev_kfree_skb_any(skb);
++ return;
++ }
++
+ if ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header,
+ sizeof(bridge_tunnel_header))) ||
+ (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header,
+@@ -255,7 +265,15 @@ int mwifiex_handle_uap_rx_forward(struct mwifiex_private *priv,
+
+ if (is_multicast_ether_addr(ra)) {
+ skb_uap = skb_copy(skb, GFP_ATOMIC);
+- mwifiex_uap_queue_bridged_pkt(priv, skb_uap);
++ if (likely(skb_uap)) {
++ mwifiex_uap_queue_bridged_pkt(priv, skb_uap);
++ } else {
++ mwifiex_dbg(adapter, ERROR,
++ "failed to copy skb for uAP\n");
++ priv->stats.rx_dropped++;
++ dev_kfree_skb_any(skb);
++ return -1;
++ }
+ } else {
+ if (mwifiex_get_sta_entry(priv, ra)) {
+ /* Requeue Intra-BSS packet */
+@@ -379,6 +397,16 @@ int mwifiex_process_uap_rx_packet(struct mwifiex_private *priv,
+ rx_pkt_type = le16_to_cpu(uap_rx_pd->rx_pkt_type);
+ rx_pkt_hdr = (void *)uap_rx_pd + le16_to_cpu(uap_rx_pd->rx_pkt_offset);
+
++ if (le16_to_cpu(uap_rx_pd->rx_pkt_offset) +
++ sizeof(rx_pkt_hdr->eth803_hdr) > skb->len) {
++ mwifiex_dbg(adapter, ERROR,
++ "wrong rx packet for struct ethhdr: len=%d, offset=%d\n",
++ skb->len, le16_to_cpu(uap_rx_pd->rx_pkt_offset));
++ priv->stats.rx_dropped++;
++ dev_kfree_skb_any(skb);
++ return 0;
++ }
++
+ ether_addr_copy(ta, rx_pkt_hdr->eth803_hdr.h_source);
+
+ if ((le16_to_cpu(uap_rx_pd->rx_pkt_offset) +
+diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c
+index 426e39d4ccf0f..8f01fcbe93961 100644
+--- a/drivers/net/wireless/marvell/mwifiex/usb.c
++++ b/drivers/net/wireless/marvell/mwifiex/usb.c
+@@ -130,7 +130,8 @@ static int mwifiex_usb_recv(struct mwifiex_adapter *adapter,
+ default:
+ mwifiex_dbg(adapter, ERROR,
+ "unknown recv_type %#x\n", recv_type);
+- return -1;
++ ret = -1;
++ goto exit_restore_skb;
+ }
+ break;
+ case MWIFIEX_USB_EP_DATA:
+@@ -505,6 +506,22 @@ static int mwifiex_usb_probe(struct usb_interface *intf,
+ }
+ }
+
++ switch (card->usb_boot_state) {
++ case USB8XXX_FW_DNLD:
++ /* Reject broken descriptors. */
++ if (!card->rx_cmd_ep || !card->tx_cmd_ep)
++ return -ENODEV;
++ if (card->bulk_out_maxpktsize == 0)
++ return -ENODEV;
++ break;
++ case USB8XXX_FW_READY:
++ /* Assume the driver can handle missing endpoints for now. */
++ break;
++ default:
++ WARN_ON(1);
++ return -ENODEV;
++ }
++
+ usb_set_intfdata(intf, card);
+
+ ret = mwifiex_add_card(card, &card->fw_done, &usb_ops,
+diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c
+index d583fa600a296..1f5a6dab9ce55 100644
+--- a/drivers/net/wireless/marvell/mwifiex/util.c
++++ b/drivers/net/wireless/marvell/mwifiex/util.c
+@@ -405,11 +405,15 @@ mwifiex_process_mgmt_packet(struct mwifiex_private *priv,
+ }
+
+ rx_pd = (struct rxpd *)skb->data;
++ pkt_len = le16_to_cpu(rx_pd->rx_pkt_length);
++ if (pkt_len < sizeof(struct ieee80211_hdr) + sizeof(pkt_len)) {
++ mwifiex_dbg(priv->adapter, ERROR, "invalid rx_pkt_length");
++ return -1;
++ }
+
+ skb_pull(skb, le16_to_cpu(rx_pd->rx_pkt_offset));
+ skb_pull(skb, sizeof(pkt_len));
+-
+- pkt_len = le16_to_cpu(rx_pd->rx_pkt_length);
++ pkt_len -= sizeof(pkt_len);
+
+ ieee_hdr = (void *)skb->data;
+ if (ieee80211_is_mgmt(ieee_hdr->frame_control)) {
+@@ -422,7 +426,7 @@ mwifiex_process_mgmt_packet(struct mwifiex_private *priv,
+ skb->data + sizeof(struct ieee80211_hdr),
+ pkt_len - sizeof(struct ieee80211_hdr));
+
+- pkt_len -= ETH_ALEN + sizeof(pkt_len);
++ pkt_len -= ETH_ALEN;
+ rx_pd->rx_pkt_length = cpu_to_le16(pkt_len);
+
+ cfg80211_rx_mgmt(&priv->wdev, priv->roc_cfg.chan.center_freq,
+diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c
+index 3bf6571f41490..529e325498cdb 100644
+--- a/drivers/net/wireless/marvell/mwl8k.c
++++ b/drivers/net/wireless/marvell/mwl8k.c
+@@ -5800,8 +5800,8 @@ static void mwl8k_fw_state_machine(const struct firmware *fw, void *context)
+ fail:
+ priv->fw_state = FW_STATE_ERROR;
+ complete(&priv->firmware_loading_complete);
+- device_release_driver(&priv->pdev->dev);
+ mwl8k_release_firmware(priv);
++ device_release_driver(&priv->pdev->dev);
+ }
+
+ #define MAX_RESTART_ATTEMPTS 1
+diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
+index 72622220051bb..6c8b441945791 100644
+--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
++++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
+@@ -162,8 +162,9 @@ void mt76_rx_aggr_reorder(struct sk_buff *skb, struct sk_buff_head *frames)
+ if (!sta)
+ return;
+
+- if (!status->aggr && !(status->flag & RX_FLAG_8023)) {
+- mt76_rx_aggr_check_ctl(skb, frames);
++ if (!status->aggr) {
++ if (!(status->flag & RX_FLAG_8023))
++ mt76_rx_aggr_check_ctl(skb, frames);
+ return;
+ }
+
+diff --git a/drivers/net/wireless/mediatek/mt76/debugfs.c b/drivers/net/wireless/mediatek/mt76/debugfs.c
+index fa48cc3a7a8f7..ad97308c78534 100644
+--- a/drivers/net/wireless/mediatek/mt76/debugfs.c
++++ b/drivers/net/wireless/mediatek/mt76/debugfs.c
+@@ -116,8 +116,11 @@ static int mt76_read_rate_txpower(struct seq_file *s, void *data)
+ return 0;
+ }
+
+-struct dentry *mt76_register_debugfs(struct mt76_dev *dev)
++struct dentry *
++mt76_register_debugfs_fops(struct mt76_dev *dev,
++ const struct file_operations *ops)
+ {
++ const struct file_operations *fops = ops ? ops : &fops_regval;
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("mt76", dev->hw->wiphy->debugfsdir);
+@@ -126,8 +129,7 @@ struct dentry *mt76_register_debugfs(struct mt76_dev *dev)
+
+ debugfs_create_u8("led_pin", 0600, dir, &dev->led_pin);
+ debugfs_create_u32("regidx", 0600, dir, &dev->debugfs_reg);
+- debugfs_create_file_unsafe("regval", 0600, dir, dev,
+- &fops_regval);
++ debugfs_create_file_unsafe("regval", 0600, dir, dev, fops);
+ debugfs_create_file_unsafe("napi_threaded", 0600, dir, dev,
+ &fops_napi_threaded);
+ debugfs_create_blob("eeprom", 0400, dir, &dev->eeprom);
+@@ -140,4 +142,4 @@ struct dentry *mt76_register_debugfs(struct mt76_dev *dev)
+
+ return dir;
+ }
+-EXPORT_SYMBOL_GPL(mt76_register_debugfs);
++EXPORT_SYMBOL_GPL(mt76_register_debugfs_fops);
+diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
+index 5e1c1506a4c65..69e0e68757f53 100644
+--- a/drivers/net/wireless/mediatek/mt76/dma.c
++++ b/drivers/net/wireless/mediatek/mt76/dma.c
+@@ -434,7 +434,9 @@ free:
+ free_skb:
+ status.skb = tx_info.skb;
+ hw = mt76_tx_status_get_hw(dev, tx_info.skb);
++ spin_lock_bh(&dev->rx_lock);
+ ieee80211_tx_status_ext(hw, &status);
++ spin_unlock_bh(&dev->rx_lock);
+
+ return ret;
+ }
+@@ -465,6 +467,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
+
+ qbuf.addr = addr + offset;
+ qbuf.len = len - offset;
++ qbuf.skip_unmap = false;
+ mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, NULL);
+ frames++;
+ }
+@@ -485,6 +488,7 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
+ bool more;
+
+ spin_lock_bh(&q->lock);
++
+ do {
+ buf = mt76_dma_dequeue(dev, q, true, NULL, NULL, &more);
+ if (!buf)
+@@ -492,6 +496,12 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
+
+ skb_free_frag(buf);
+ } while (1);
++
++ if (q->rx_head) {
++ dev_kfree_skb(q->rx_head);
++ q->rx_head = NULL;
++ }
++
+ spin_unlock_bh(&q->lock);
+
+ if (!q->rx_page.va)
+@@ -514,12 +524,6 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid)
+ mt76_dma_rx_cleanup(dev, q);
+ mt76_dma_sync_idx(dev, q);
+ mt76_dma_rx_fill(dev, q);
+-
+- if (!q->rx_head)
+- return;
+-
+- dev_kfree_skb(q->rx_head);
+- q->rx_head = NULL;
+ }
+
+ static void
+diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c
+index 3b47e85e95e7c..db0cd56c8dc7f 100644
+--- a/drivers/net/wireless/mediatek/mt76/eeprom.c
++++ b/drivers/net/wireless/mediatek/mt76/eeprom.c
+@@ -146,10 +146,13 @@ mt76_find_power_limits_node(struct mt76_dev *dev)
+ }
+
+ if (mt76_string_prop_find(country, dev->alpha2) ||
+- mt76_string_prop_find(regd, region_name))
++ mt76_string_prop_find(regd, region_name)) {
++ of_node_put(np);
+ return cur;
++ }
+ }
+
++ of_node_put(np);
+ return fallback;
+ }
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
+index d03aedc3286bb..028519a739fd1 100644
+--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
++++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
+@@ -123,6 +123,7 @@ static int mt76_led_init(struct mt76_dev *dev)
+ if (!of_property_read_u32(np, "led-sources", &led_pin))
+ dev->led_pin = led_pin;
+ dev->led_al = of_property_read_bool(np, "led-active-low");
++ of_node_put(np);
+ }
+
+ return led_classdev_register(dev->dev, &dev->led_cdev);
+@@ -1100,7 +1101,7 @@ mt76_sta_add(struct mt76_dev *dev, struct ieee80211_vif *vif,
+ continue;
+
+ mtxq = (struct mt76_txq *)sta->txq[i]->drv_priv;
+- mtxq->wcid = wcid;
++ mtxq->wcid = wcid->idx;
+ }
+
+ ewma_signal_init(&wcid->rssi);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
+index 25c5ceef52577..eb1fb955b7777 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76.h
++++ b/drivers/net/wireless/mediatek/mt76/mt76.h
+@@ -19,7 +19,7 @@
+
+ #define MT_MCU_RING_SIZE 32
+ #define MT_RX_BUF_SIZE 2048
+-#define MT_SKB_HEAD_LEN 128
++#define MT_SKB_HEAD_LEN 256
+
+ #define MT_MAX_NON_AQL_PKT 16
+ #define MT_TXQ_FREE_THR 32
+@@ -263,7 +263,7 @@ struct mt76_wcid {
+ };
+
+ struct mt76_txq {
+- struct mt76_wcid *wcid;
++ u16 wcid;
+
+ u16 agg_ssn;
+ bool send_bar;
+@@ -820,10 +820,11 @@ bool __mt76_poll(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
+
+ #define mt76_poll(dev, ...) __mt76_poll(&((dev)->mt76), __VA_ARGS__)
+
+-bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
+- int timeout);
+-
+-#define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
++bool ____mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
++ int timeout, int kick);
++#define __mt76_poll_msec(...) ____mt76_poll_msec(__VA_ARGS__, 10)
++#define mt76_poll_msec(dev, ...) ____mt76_poll_msec(&((dev)->mt76), __VA_ARGS__, 10)
++#define mt76_poll_msec_tick(dev, ...) ____mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
+
+ void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
+ void mt76_pci_disable_aspm(struct pci_dev *pdev);
+@@ -869,7 +870,13 @@ struct mt76_phy *mt76_alloc_phy(struct mt76_dev *dev, unsigned int size,
+ int mt76_register_phy(struct mt76_phy *phy, bool vht,
+ struct ieee80211_rate *rates, int n_rates);
+
+-struct dentry *mt76_register_debugfs(struct mt76_dev *dev);
++struct dentry *mt76_register_debugfs_fops(struct mt76_dev *dev,
++ const struct file_operations *ops);
++static inline struct dentry *mt76_register_debugfs(struct mt76_dev *dev)
++{
++ return mt76_register_debugfs_fops(dev, NULL);
++}
++
+ int mt76_queues_read(struct seq_file *s, void *data);
+ void mt76_seq_puts_array(struct seq_file *file, const char *str,
+ s8 *val, int len);
+@@ -1015,8 +1022,9 @@ static inline bool mt76_is_skb_pktid(u8 pktid)
+ static inline u8 mt76_tx_power_nss_delta(u8 nss)
+ {
+ static const u8 nss_delta[4] = { 0, 6, 9, 12 };
++ u8 idx = nss - 1;
+
+- return nss_delta[nss - 1];
++ return (idx < ARRAY_SIZE(nss_delta)) ? nss_delta[idx] : 0;
+ }
+
+ static inline bool mt76_testmode_enabled(struct mt76_phy *phy)
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+index 3972c56136a20..3745512b1eb3c 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+@@ -525,6 +525,10 @@ mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb)
+ if (rxd2 & MT_RXD2_NORMAL_TKIP_MIC_ERR)
+ status->flag |= RX_FLAG_MMIC_ERROR;
+
++ /* ICV error or CCMP/BIP/WPI MIC error */
++ if (rxd2 & MT_RXD2_NORMAL_ICV_ERR)
++ status->flag |= RX_FLAG_ONLY_MONITOR;
++
+ if (FIELD_GET(MT_RXD2_NORMAL_SEC_MODE, rxd2) != 0 &&
+ !(rxd2 & (MT_RXD2_NORMAL_CLM | MT_RXD2_NORMAL_CM))) {
+ status->flag |= RX_FLAG_DECRYPTED;
+@@ -1280,8 +1284,11 @@ void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data)
+ if (wcidx >= MT7603_WTBL_STA || !sta)
+ goto out;
+
+- if (mt7603_fill_txs(dev, msta, &info, txs_data))
++ if (mt7603_fill_txs(dev, msta, &info, txs_data)) {
++ spin_lock_bh(&dev->mt76.rx_lock);
+ ieee80211_tx_status_noskb(mt76_hw(dev), sta, &info);
++ spin_unlock_bh(&dev->mt76.rx_lock);
++ }
+
+ out:
+ rcu_read_unlock();
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+index 8edea1e7a602f..0b7b87b4cc21c 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+@@ -74,7 +74,7 @@ mt7603_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+ mt7603_wtbl_init(dev, idx, mvif->idx, bc_addr);
+
+ mtxq = (struct mt76_txq *)vif->txq->drv_priv;
+- mtxq->wcid = &mvif->sta.wcid;
++ mtxq->wcid = idx;
+ rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid);
+
+ out:
+@@ -620,6 +620,9 @@ mt7603_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates);
+ int i;
+
++ if (!sta_rates)
++ return;
++
+ spin_lock_bh(&dev->mt76.lock);
+ for (i = 0; i < ARRAY_SIZE(msta->rates); i++) {
+ msta->rates[i].idx = sta_rates->rate[i].idx;
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+index cb4659771fd97..bda22ca0bd714 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+@@ -2,6 +2,33 @@
+
+ #include "mt7615.h"
+
++static int
++mt7615_reg_set(void *data, u64 val)
++{
++ struct mt7615_dev *dev = data;
++
++ mt7615_mutex_acquire(dev);
++ mt76_wr(dev, dev->mt76.debugfs_reg, val);
++ mt7615_mutex_release(dev);
++
++ return 0;
++}
++
++static int
++mt7615_reg_get(void *data, u64 *val)
++{
++ struct mt7615_dev *dev = data;
++
++ mt7615_mutex_acquire(dev);
++ *val = mt76_rr(dev, dev->mt76.debugfs_reg);
++ mt7615_mutex_release(dev);
++
++ return 0;
++}
++
++DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mt7615_reg_get, mt7615_reg_set,
++ "0x%08llx\n");
++
+ static int
+ mt7615_radar_pattern_set(void *data, u64 val)
+ {
+@@ -506,7 +533,7 @@ int mt7615_init_debugfs(struct mt7615_dev *dev)
+ {
+ struct dentry *dir;
+
+- dir = mt76_register_debugfs(&dev->mt76);
++ dir = mt76_register_debugfs_fops(&dev->mt76, &fops_regval);
+ if (!dir)
+ return -ENOMEM;
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+index 6dbaaf95ee385..2092aa373ab32 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+@@ -123,12 +123,12 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev)
+ case MT_EE_5GHZ:
+ dev->mphy.cap.has_5ghz = true;
+ break;
+- case MT_EE_2GHZ:
+- dev->mphy.cap.has_2ghz = true;
+- break;
+ case MT_EE_DBDC:
+ dev->dbdc_support = true;
+ fallthrough;
++ case MT_EE_2GHZ:
++ dev->mphy.cap.has_2ghz = true;
++ break;
+ default:
+ dev->mphy.cap.has_2ghz = true;
+ dev->mphy.cap.has_5ghz = true;
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+index 2f1ac644e018e..47f23ac905a3c 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+@@ -49,12 +49,14 @@ int mt7615_thermal_init(struct mt7615_dev *dev)
+ {
+ struct wiphy *wiphy = mt76_hw(dev)->wiphy;
+ struct device *hwmon;
++ const char *name;
+
+ if (!IS_REACHABLE(CONFIG_HWMON))
+ return 0;
+
+- hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev,
+- wiphy_name(wiphy), dev,
++ name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7615_%s",
++ wiphy_name(wiphy));
++ hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, dev,
+ mt7615_hwmon_groups);
+ if (IS_ERR(hwmon))
+ return PTR_ERR(hwmon);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+index ff3f85e4087c9..2f0ba8a75d71b 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+@@ -286,9 +286,16 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
+ if (rxd2 & MT_RXD2_NORMAL_AMSDU_ERR)
+ return -EINVAL;
+
++ hdr_trans = rxd1 & MT_RXD1_NORMAL_HDR_TRANS;
++ if (hdr_trans && (rxd2 & MT_RXD2_NORMAL_CM))
++ return -EINVAL;
++
++ /* ICV error or CCMP/BIP/WPI MIC error */
++ if (rxd2 & MT_RXD2_NORMAL_ICV_ERR)
++ status->flag |= RX_FLAG_ONLY_MONITOR;
++
+ unicast = (rxd1 & MT_RXD1_NORMAL_ADDR_TYPE) == MT_RXD1_NORMAL_U2M;
+ idx = FIELD_GET(MT_RXD2_NORMAL_WLAN_IDX, rxd2);
+- hdr_trans = rxd1 & MT_RXD1_NORMAL_HDR_TRANS;
+ status->wcid = mt7615_rx_get_wcid(dev, idx, unicast);
+
+ if (status->wcid) {
+@@ -755,12 +762,15 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
+ if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+ txwi[3] |= cpu_to_le32(MT_TXD3_NO_ACK);
+
+- txwi[7] = FIELD_PREP(MT_TXD7_TYPE, fc_type) |
+- FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype) |
+- FIELD_PREP(MT_TXD7_SPE_IDX, 0x18);
+- if (!is_mmio)
+- txwi[8] = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) |
+- FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype);
++ val = FIELD_PREP(MT_TXD7_TYPE, fc_type) |
++ FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype) |
++ FIELD_PREP(MT_TXD7_SPE_IDX, 0x18);
++ txwi[7] = cpu_to_le32(val);
++ if (!is_mmio) {
++ val = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) |
++ FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype);
++ txwi[8] = cpu_to_le32(val);
++ }
+
+ return 0;
+ }
+@@ -859,7 +869,10 @@ void mt7615_mac_sta_poll(struct mt7615_dev *dev)
+
+ msta = list_first_entry(&sta_poll_list, struct mt7615_sta,
+ poll_list);
++
++ spin_lock_bh(&dev->sta_poll_lock);
+ list_del_init(&msta->poll_list);
++ spin_unlock_bh(&dev->sta_poll_lock);
+
+ addr = mt7615_mac_wtbl_addr(dev, msta->wcid.idx) + 19 * 4;
+
+@@ -1028,7 +1041,7 @@ u32 mt7615_mac_get_sta_tid_sn(struct mt7615_dev *dev, int wcid, u8 tid)
+ offset %= 32;
+
+ val = mt76_rr(dev, addr);
+- val >>= (tid % 32);
++ val >>= offset;
+
+ if (offset > 20) {
+ addr += 4;
+@@ -1486,40 +1499,52 @@ static void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data)
+ if (wcid->ext_phy && dev->mt76.phy2)
+ mphy = dev->mt76.phy2;
+
+- if (mt7615_fill_txs(dev, msta, &info, txs_data))
++ if (mt7615_fill_txs(dev, msta, &info, txs_data)) {
++ spin_lock_bh(&dev->mt76.rx_lock);
+ ieee80211_tx_status_noskb(mphy->hw, sta, &info);
++ spin_unlock_bh(&dev->mt76.rx_lock);
++ }
+
+ out:
+ rcu_read_unlock();
+ }
+
+ static void
+-mt7615_mac_tx_free_token(struct mt7615_dev *dev, u16 token)
++mt7615_txwi_free(struct mt7615_dev *dev, struct mt76_txwi_cache *txwi)
+ {
+ struct mt76_dev *mdev = &dev->mt76;
+- struct mt76_txwi_cache *txwi;
+ __le32 *txwi_data;
+ u32 val;
+ u8 wcid;
+
+- trace_mac_tx_free(dev, token);
+- txwi = mt76_token_put(mdev, token);
+- if (!txwi)
+- return;
++ mt7615_txp_skb_unmap(mdev, txwi);
++ if (!txwi->skb)
++ goto out;
+
+ txwi_data = (__le32 *)mt76_get_txwi_ptr(mdev, txwi);
+ val = le32_to_cpu(txwi_data[1]);
+ wcid = FIELD_GET(MT_TXD1_WLAN_IDX, val);
++ mt76_tx_complete_skb(mdev, wcid, txwi->skb);
+
+- mt7615_txp_skb_unmap(mdev, txwi);
+- if (txwi->skb) {
+- mt76_tx_complete_skb(mdev, wcid, txwi->skb);
+- txwi->skb = NULL;
+- }
+-
++out:
++ txwi->skb = NULL;
+ mt76_put_txwi(mdev, txwi);
+ }
+
++static void
++mt7615_mac_tx_free_token(struct mt7615_dev *dev, u16 token)
++{
++ struct mt76_dev *mdev = &dev->mt76;
++ struct mt76_txwi_cache *txwi;
++
++ trace_mac_tx_free(dev, token);
++ txwi = mt76_token_put(mdev, token);
++ if (!txwi)
++ return;
++
++ mt7615_txwi_free(dev, txwi);
++}
++
+ static void mt7615_mac_tx_free(struct mt7615_dev *dev, struct sk_buff *skb)
+ {
+ struct mt7615_tx_free *free = (struct mt7615_tx_free *)skb->data;
+@@ -1713,7 +1738,7 @@ mt7615_mac_adjust_sensitivity(struct mt7615_phy *phy,
+ struct mt7615_dev *dev = phy->dev;
+ int false_cca = ofdm ? phy->false_cca_ofdm : phy->false_cca_cck;
+ bool ext_phy = phy != &dev->phy;
+- u16 def_th = ofdm ? -98 : -110;
++ s16 def_th = ofdm ? -98 : -110;
+ bool update = false;
+ s8 *sensitivity;
+ int signal;
+@@ -1981,6 +2006,14 @@ void mt7615_pm_power_save_work(struct work_struct *work)
+ test_bit(MT76_HW_SCHED_SCANNING, &dev->mphy.state))
+ goto out;
+
++ if (mutex_is_locked(&dev->mt76.mutex))
++ /* if mt76 mutex is held we should not put the device
++ * to sleep since we are currently accessing device
++ * register map. We need to wait for the next power_save
++ * trigger.
++ */
++ goto out;
++
+ if (time_is_after_jiffies(dev->pm.last_activity + delta)) {
+ delta = dev->pm.last_activity + delta - jiffies;
+ goto out;
+@@ -2026,16 +2059,8 @@ void mt7615_tx_token_put(struct mt7615_dev *dev)
+ int id;
+
+ spin_lock_bh(&dev->mt76.token_lock);
+- idr_for_each_entry(&dev->mt76.token, txwi, id) {
+- mt7615_txp_skb_unmap(&dev->mt76, txwi);
+- if (txwi->skb) {
+- struct ieee80211_hw *hw;
+-
+- hw = mt76_tx_status_get_hw(&dev->mt76, txwi->skb);
+- ieee80211_free_txskb(hw, txwi->skb);
+- }
+- mt76_put_txwi(&dev->mt76, txwi);
+- }
++ idr_for_each_entry(&dev->mt76.token, txwi, id)
++ mt7615_txwi_free(dev, txwi);
+ spin_unlock_bh(&dev->mt76.token_lock);
+ idr_destroy(&dev->mt76.token);
+ }
+@@ -2251,7 +2276,7 @@ void mt7615_coredump_work(struct work_struct *work)
+ break;
+
+ skb_pull(skb, sizeof(struct mt7615_mcu_rxd));
+- if (data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ) {
++ if (!dump || data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ) {
+ dev_kfree_skb(skb);
+ continue;
+ }
+@@ -2261,6 +2286,8 @@ void mt7615_coredump_work(struct work_struct *work)
+
+ dev_kfree_skb(skb);
+ }
+- dev_coredumpv(dev->mt76.dev, dump, MT76_CONNAC_COREDUMP_SZ,
+- GFP_KERNEL);
++
++ if (dump)
++ dev_coredumpv(dev->mt76.dev, dump, MT76_CONNAC_COREDUMP_SZ,
++ GFP_KERNEL);
+ }
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+index dada43d6d879e..96667b7d722d5 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+@@ -135,8 +135,6 @@ static int get_omac_idx(enum nl80211_iftype type, u64 mask)
+ int i;
+
+ switch (type) {
+- case NL80211_IFTYPE_MESH_POINT:
+- case NL80211_IFTYPE_ADHOC:
+ case NL80211_IFTYPE_STATION:
+ /* prefer hw bssid slot 1-3 */
+ i = get_free_idx(mask, HW_BSSID_1, HW_BSSID_3);
+@@ -160,6 +158,8 @@ static int get_omac_idx(enum nl80211_iftype type, u64 mask)
+ return HW_BSSID_0;
+
+ break;
++ case NL80211_IFTYPE_ADHOC:
++ case NL80211_IFTYPE_MESH_POINT:
+ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_AP:
+ /* ap uses hw bssid 0 and ext bssid */
+@@ -211,11 +211,9 @@ static int mt7615_add_interface(struct ieee80211_hw *hw,
+ mvif->mt76.omac_idx = idx;
+
+ mvif->mt76.band_idx = ext_phy;
+- if (mt7615_ext_phy(dev))
+- mvif->mt76.wmm_idx = ext_phy * (MT7615_MAX_WMM_SETS / 2) +
+- mvif->mt76.idx % (MT7615_MAX_WMM_SETS / 2);
+- else
+- mvif->mt76.wmm_idx = mvif->mt76.idx % MT7615_MAX_WMM_SETS;
++ mvif->mt76.wmm_idx = vif->type != NL80211_IFTYPE_AP;
++ if (ext_phy)
++ mvif->mt76.wmm_idx += 2;
+
+ dev->mt76.vif_mask |= BIT(mvif->mt76.idx);
+ dev->omac_mask |= BIT_ULL(mvif->mt76.omac_idx);
+@@ -237,7 +235,7 @@ static int mt7615_add_interface(struct ieee80211_hw *hw,
+ rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid);
+ if (vif->txq) {
+ mtxq = (struct mt76_txq *)vif->txq->drv_priv;
+- mtxq->wcid = &mvif->sta.wcid;
++ mtxq->wcid = idx;
+ }
+
+ ret = mt7615_mcu_add_dev_info(phy, vif, true);
+@@ -292,7 +290,8 @@ static void mt7615_init_dfs_state(struct mt7615_phy *phy)
+ if (hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
+ return;
+
+- if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR))
++ if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR) &&
++ !(mphy->chandef.chan->flags & IEEE80211_CHAN_RADAR))
+ return;
+
+ if (mphy->chandef.chan->center_freq == chandef->chan->center_freq &&
+@@ -684,6 +683,9 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw,
+ struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates);
+ int i;
+
++ if (!sta_rates)
++ return;
++
+ spin_lock_bh(&dev->mt76.lock);
+ for (i = 0; i < ARRAY_SIZE(msta->rates); i++) {
+ msta->rates[i].idx = sta_rates->rate[i].idx;
+@@ -1185,12 +1187,16 @@ static void mt7615_sta_set_decap_offload(struct ieee80211_hw *hw,
+ struct mt7615_dev *dev = mt7615_hw_dev(hw);
+ struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv;
+
++ mt7615_mutex_acquire(dev);
++
+ if (enabled)
+ set_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+ else
+ clear_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+
+ mt7615_mcu_set_sta_decap_offload(dev, vif, sta);
++
++ mt7615_mutex_release(dev);
+ }
+
+ #ifdef CONFIG_PM
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+index f8a09692d3e4c..bde65af72feda 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+@@ -385,10 +385,11 @@ static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev)
+ }
+
+ mt7622_trigger_hif_int(dev, false);
+-
+- pm->stats.last_doze_event = jiffies;
+- pm->stats.awake_time += pm->stats.last_doze_event -
+- pm->stats.last_wake_event;
++ if (!err) {
++ pm->stats.last_doze_event = jiffies;
++ pm->stats.awake_time += pm->stats.last_doze_event -
++ pm->stats.last_wake_event;
++ }
+ out:
+ mutex_unlock(&pm->mutex);
+
+@@ -808,7 +809,8 @@ mt7615_mcu_ctrl_pm_state(struct mt7615_dev *dev, int band, int state)
+
+ static int
+ mt7615_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+- struct ieee80211_sta *sta, bool enable)
++ struct ieee80211_sta *sta, struct mt7615_phy *phy,
++ bool enable)
+ {
+ struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+ u32 type = vif->p2p ? NETWORK_P2P : NETWORK_INFRA;
+@@ -821,6 +823,7 @@ mt7615_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+ switch (vif->type) {
+ case NL80211_IFTYPE_MESH_POINT:
+ case NL80211_IFTYPE_AP:
++ case NL80211_IFTYPE_MONITOR:
+ break;
+ case NL80211_IFTYPE_STATION:
+ /* TODO: enable BSS_INFO_UAPSD & BSS_INFO_PM */
+@@ -840,14 +843,19 @@ mt7615_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+ }
+
+ bss = (struct bss_info_basic *)tlv;
+- memcpy(bss->bssid, vif->bss_conf.bssid, ETH_ALEN);
+- bss->bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int);
+ bss->network_type = cpu_to_le32(type);
+- bss->dtim_period = vif->bss_conf.dtim_period;
+ bss->bmc_tx_wlan_idx = wlan_idx;
+ bss->wmm_idx = mvif->mt76.wmm_idx;
+ bss->active = enable;
+
++ if (vif->type != NL80211_IFTYPE_MONITOR) {
++ memcpy(bss->bssid, vif->bss_conf.bssid, ETH_ALEN);
++ bss->bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int);
++ bss->dtim_period = vif->bss_conf.dtim_period;
++ } else {
++ memcpy(bss->bssid, phy->mt76->macaddr, ETH_ALEN);
++ }
++
+ return 0;
+ }
+
+@@ -863,6 +871,7 @@ mt7615_mcu_bss_omac_tlv(struct sk_buff *skb, struct ieee80211_vif *vif)
+ tlv = mt76_connac_mcu_add_tlv(skb, BSS_INFO_OMAC, sizeof(*omac));
+
+ switch (vif->type) {
++ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_MESH_POINT:
+ case NL80211_IFTYPE_AP:
+ if (vif->p2p)
+@@ -929,7 +938,7 @@ mt7615_mcu_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
+ if (enable)
+ mt7615_mcu_bss_omac_tlv(skb, vif);
+
+- mt7615_mcu_bss_basic_tlv(skb, vif, sta, enable);
++ mt7615_mcu_bss_basic_tlv(skb, vif, sta, phy, enable);
+
+ if (enable && mvif->mt76.omac_idx >= EXT_BSSID_START &&
+ mvif->mt76.omac_idx < REPEATER_BSSID_START)
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+index a2465b49ecd0c..87b4aa52ee0f9 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+@@ -28,8 +28,6 @@ static void mt7615_pci_init_work(struct work_struct *work)
+ return;
+
+ mt7615_init_work(dev);
+- if (dev->dbdc_support)
+- mt7615_register_ext_phy(dev);
+ }
+
+ static int mt7615_init_hardware(struct mt7615_dev *dev)
+@@ -160,6 +158,12 @@ int mt7615_register_device(struct mt7615_dev *dev)
+ mt7615_init_txpower(dev, &dev->mphy.sband_2g.sband);
+ mt7615_init_txpower(dev, &dev->mphy.sband_5g.sband);
+
++ if (dev->dbdc_support) {
++ ret = mt7615_register_ext_phy(dev);
++ if (ret)
++ return ret;
++ }
++
+ return mt7615_init_debugfs(dev);
+ }
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+index af43bcb545781..306e9eaea9177 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+@@ -7,9 +7,6 @@ int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm)
+ {
+ struct mt76_dev *dev = phy->dev;
+
+- if (!pm->enable)
+- return 0;
+-
+ if (mt76_is_usb(dev))
+ return 0;
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+index 5c3a81e5f559d..98f651fec3bf3 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
++++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+@@ -295,7 +295,7 @@ mt76_connac_mcu_alloc_wtbl_req(struct mt76_dev *dev, struct mt76_wcid *wcid,
+ }
+
+ if (sta_hdr)
+- sta_hdr->len = cpu_to_le16(sizeof(hdr));
++ le16_add_cpu(&sta_hdr->len, sizeof(hdr));
+
+ return skb_put_data(nskb, &hdr, sizeof(hdr));
+ }
+@@ -689,7 +689,7 @@ mt76_connac_get_phy_mode_v2(struct mt76_phy *mphy, struct ieee80211_vif *vif,
+ if (ht_cap->ht_supported)
+ mode |= PHY_TYPE_BIT_HT;
+
+- if (he_cap->has_he)
++ if (he_cap && he_cap->has_he)
+ mode |= PHY_TYPE_BIT_HE;
+ } else if (band == NL80211_BAND_5GHZ) {
+ mode |= PHY_TYPE_BIT_OFDM;
+@@ -700,7 +700,7 @@ mt76_connac_get_phy_mode_v2(struct mt76_phy *mphy, struct ieee80211_vif *vif,
+ if (vht_cap->vht_supported)
+ mode |= PHY_TYPE_BIT_VHT;
+
+- if (he_cap->has_he)
++ if (he_cap && he_cap->has_he)
+ mode |= PHY_TYPE_BIT_HE;
+ }
+
+@@ -719,6 +719,7 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
+ struct sta_rec_state *state;
+ struct sta_rec_phy *phy;
+ struct tlv *tlv;
++ u16 supp_rates;
+
+ /* starec ht */
+ if (sta->ht_cap.ht_supported) {
+@@ -767,7 +768,15 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
+
+ tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra_info));
+ ra_info = (struct sta_rec_ra_info *)tlv;
+- ra_info->legacy = cpu_to_le16((u16)sta->supp_rates[band]);
++
++ supp_rates = sta->supp_rates[band];
++ if (band == NL80211_BAND_2GHZ)
++ supp_rates = FIELD_PREP(RA_LEGACY_OFDM, supp_rates >> 4) |
++ FIELD_PREP(RA_LEGACY_CCK, supp_rates & 0xf);
++ else
++ supp_rates = FIELD_PREP(RA_LEGACY_OFDM, supp_rates);
++
++ ra_info->legacy = cpu_to_le16(supp_rates);
+
+ if (sta->ht_cap.ht_supported)
+ memcpy(ra_info->rx_mcs_bitmask, sta->ht_cap.mcs.rx_mask,
+@@ -1437,8 +1446,16 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
+ req->channel_min_dwell_time = cpu_to_le16(duration);
+ req->channel_dwell_time = cpu_to_le16(duration);
+
+- req->channels_num = min_t(u8, sreq->n_channels, 32);
+- req->ext_channels_num = min_t(u8, ext_channels_num, 32);
++ if (sreq->n_channels == 0 || sreq->n_channels > 64) {
++ req->channel_type = 0;
++ req->channels_num = 0;
++ req->ext_channels_num = 0;
++ } else {
++ req->channel_type = 4;
++ req->channels_num = min_t(u8, sreq->n_channels, 32);
++ req->ext_channels_num = min_t(u8, ext_channels_num, 32);
++ }
++
+ for (i = 0; i < req->channels_num + req->ext_channels_num; i++) {
+ if (i >= 32)
+ chan = &req->ext_channels[i - 32];
+@@ -1448,7 +1465,6 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
+ chan->band = scan_list[i]->band == NL80211_BAND_2GHZ ? 1 : 2;
+ chan->channel_num = scan_list[i]->hw_value;
+ }
+- req->channel_type = sreq->n_channels ? 4 : 0;
+
+ if (sreq->ie_len > 0) {
+ memcpy(req->ies, sreq->ie, sreq->ie_len);
+@@ -1929,19 +1945,22 @@ mt76_connac_mcu_key_iter(struct ieee80211_hw *hw,
+ key->cipher != WLAN_CIPHER_SUITE_TKIP)
+ return;
+
+- if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
+- gtk_tlv->proto = cpu_to_le32(NL80211_WPA_VERSION_1);
++ if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
+ cipher = BIT(3);
+- } else {
+- gtk_tlv->proto = cpu_to_le32(NL80211_WPA_VERSION_2);
++ else
+ cipher = BIT(4);
+- }
+
+ /* we are assuming here to have a single pairwise key */
+ if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
++ if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
++ gtk_tlv->proto = cpu_to_le32(NL80211_WPA_VERSION_1);
++ else
++ gtk_tlv->proto = cpu_to_le32(NL80211_WPA_VERSION_2);
++
+ gtk_tlv->pairwise_cipher = cpu_to_le32(cipher);
+- gtk_tlv->group_cipher = cpu_to_le32(cipher);
+ gtk_tlv->keyid = key->keyidx;
++ } else {
++ gtk_tlv->group_cipher = cpu_to_le32(cipher);
+ }
+ }
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+index 1c73beb226771..72a70a7046fbc 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
++++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+@@ -124,6 +124,8 @@ struct sta_rec_state {
+ u8 rsv[1];
+ } __packed;
+
++#define RA_LEGACY_OFDM GENMASK(13, 6)
++#define RA_LEGACY_CCK GENMASK(3, 0)
+ #define HT_MCS_MASK_NUM 10
+ struct sta_rec_ra_info {
+ __le16 tag;
+@@ -554,7 +556,7 @@ enum {
+ MCU_CMD_SET_BSS_CONNECTED = MCU_CE_PREFIX | 0x16,
+ MCU_CMD_SET_BSS_ABORT = MCU_CE_PREFIX | 0x17,
+ MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b,
+- MCU_CMD_SET_ROC = MCU_CE_PREFIX | 0x1d,
++ MCU_CMD_SET_ROC = MCU_CE_PREFIX | 0x1c,
+ MCU_CMD_SET_P2P_OPPPS = MCU_CE_PREFIX | 0x33,
+ MCU_CMD_SET_RATE_TX_POWER = MCU_CE_PREFIX | 0x5d,
+ MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61,
+@@ -844,14 +846,14 @@ struct mt76_connac_gtk_rekey_tlv {
+ * 2: rekey update
+ */
+ u8 keyid;
+- u8 pad[2];
++ u8 option; /* 1: rekey data update without enabling offload */
++ u8 pad[1];
+ __le32 proto; /* WPA-RSN-WAPI-OPSN */
+ __le32 pairwise_cipher;
+ __le32 group_cipher;
+ __le32 key_mgmt; /* NONE-PSK-IEEE802.1X */
+ __le32 mgmt_group_cipher;
+- u8 option; /* 1: rekey data update without enabling offload */
+- u8 reserverd[3];
++ u8 reserverd[4];
+ } __packed;
+
+ #define MT76_CONNAC_WOW_MASK_MAX_LEN 16
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+index c32e6dc687739..eacc5d39d5834 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+@@ -176,7 +176,7 @@ void mt76x02_mac_wcid_set_drop(struct mt76x02_dev *dev, u8 idx, bool drop)
+ mt76_wr(dev, MT_WCID_DROP(idx), (val & ~bit) | (bit * drop));
+ }
+
+-static __le16
++static u16
+ mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev,
+ const struct ieee80211_tx_rate *rate, u8 *nss_val)
+ {
+@@ -222,14 +222,14 @@ mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev,
+ rateval |= MT_RXWI_RATE_SGI;
+
+ *nss_val = nss;
+- return cpu_to_le16(rateval);
++ return rateval;
+ }
+
+ void mt76x02_mac_wcid_set_rate(struct mt76x02_dev *dev, struct mt76_wcid *wcid,
+ const struct ieee80211_tx_rate *rate)
+ {
+ s8 max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate);
+- __le16 rateval;
++ u16 rateval;
+ u32 tx_info;
+ s8 nss;
+
+@@ -342,7 +342,7 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
+ struct ieee80211_key_conf *key = info->control.hw_key;
+ u32 wcid_tx_info;
+ u16 rate_ht_mask = FIELD_PREP(MT_RXWI_RATE_PHY, BIT(1) | BIT(2));
+- u16 txwi_flags = 0;
++ u16 txwi_flags = 0, rateval;
+ u8 nss;
+ s8 txpwr_adj, max_txpwr_adj;
+ u8 ccmp_pn[8], nstreams = dev->mphy.chainmask & 0xf;
+@@ -380,14 +380,15 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
+
+ if (wcid && (rate->idx < 0 || !rate->count)) {
+ wcid_tx_info = wcid->tx_info;
+- txwi->rate = FIELD_GET(MT_WCID_TX_INFO_RATE, wcid_tx_info);
++ rateval = FIELD_GET(MT_WCID_TX_INFO_RATE, wcid_tx_info);
+ max_txpwr_adj = FIELD_GET(MT_WCID_TX_INFO_TXPWR_ADJ,
+ wcid_tx_info);
+ nss = FIELD_GET(MT_WCID_TX_INFO_NSS, wcid_tx_info);
+ } else {
+- txwi->rate = mt76x02_mac_tx_rate_val(dev, rate, &nss);
++ rateval = mt76x02_mac_tx_rate_val(dev, rate, &nss);
+ max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate);
+ }
++ txwi->rate = cpu_to_le16(rateval);
+
+ txpwr_adj = mt76x02_tx_get_txpwr_adj(dev, dev->txpower_conf,
+ max_txpwr_adj);
+@@ -630,8 +631,11 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
+
+ mt76_tx_status_unlock(mdev, &list);
+
+- if (!status.skb)
++ if (!status.skb) {
++ spin_lock_bh(&dev->mt76.rx_lock);
+ ieee80211_tx_status_ext(mt76_hw(dev), &status);
++ spin_unlock_bh(&dev->mt76.rx_lock);
++ }
+
+ if (!len)
+ goto out;
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
+index 2953df7d8388d..c6c16fe8ee859 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
++++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
+@@ -108,7 +108,7 @@ __mt76x02u_mcu_send_msg(struct mt76_dev *dev, struct sk_buff *skb,
+ ret = mt76u_bulk_msg(dev, skb->data, skb->len, NULL, 500,
+ MT_EP_OUT_INBAND_CMD);
+ if (ret)
+- return ret;
++ goto out;
+
+ if (wait_resp)
+ ret = mt76x02u_mcu_wait_resp(dev, seq);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+index ccdbab3412714..db7a4ffcad558 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
++++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+@@ -288,7 +288,8 @@ mt76x02_vif_init(struct mt76x02_dev *dev, struct ieee80211_vif *vif,
+ mvif->group_wcid.idx = MT_VIF_WCID(idx);
+ mvif->group_wcid.hw_key_idx = -1;
+ mtxq = (struct mt76_txq *)vif->txq->drv_priv;
+- mtxq->wcid = &mvif->group_wcid;
++ rcu_assign_pointer(dev->mt76.wcid[MT_VIF_WCID(idx)], &mvif->group_wcid);
++ mtxq->wcid = MT_VIF_WCID(idx);
+ }
+
+ int
+@@ -341,6 +342,7 @@ void mt76x02_remove_interface(struct ieee80211_hw *hw,
+ struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv;
+
+ dev->mt76.vif_mask &= ~BIT(mvif->idx);
++ rcu_assign_pointer(dev->mt76.wcid[mvif->group_wcid.idx], NULL);
+ }
+ EXPORT_SYMBOL_GPL(mt76x02_remove_interface);
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+index adf288e50e212..5cd0379d86de8 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
++++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+@@ -80,7 +80,7 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9);
+
+ /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */
+- mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf);
++ mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf);
+
+ /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
+ mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+index 64048243e34b2..31c1d4bc78dd1 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+@@ -12,9 +12,9 @@ mt7915_implicit_txbf_set(void *data, u64 val)
+ {
+ struct mt7915_dev *dev = data;
+
+- if (test_bit(MT76_STATE_RUNNING, &dev->mphy.state))
+- return -EBUSY;
+-
++ /* The existing connected stations shall reconnect to apply
++ * new implicit txbf configuration.
++ */
+ dev->ibf = !!val;
+
+ return mt7915_mcu_set_txbf(dev, MT_BF_TYPE_UPDATE);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+index 4798d6344305d..1ae42ef147c8a 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+@@ -130,9 +130,12 @@ static int mt7915_thermal_init(struct mt7915_phy *phy)
+ struct wiphy *wiphy = phy->mt76->hw->wiphy;
+ struct thermal_cooling_device *cdev;
+ struct device *hwmon;
++ const char *name;
+
+- cdev = thermal_cooling_device_register(wiphy_name(wiphy), phy,
+- &mt7915_thermal_ops);
++ name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7915_%s",
++ wiphy_name(wiphy));
++
++ cdev = thermal_cooling_device_register(name, phy, &mt7915_thermal_ops);
+ if (!IS_ERR(cdev)) {
+ if (sysfs_create_link(&wiphy->dev.kobj, &cdev->device.kobj,
+ "cooling_device") < 0)
+@@ -144,8 +147,7 @@ static int mt7915_thermal_init(struct mt7915_phy *phy)
+ if (!IS_REACHABLE(CONFIG_HWMON))
+ return 0;
+
+- hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev,
+- wiphy_name(wiphy), phy,
++ hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy,
+ mt7915_hwmon_groups);
+ if (IS_ERR(hwmon))
+ return PTR_ERR(hwmon);
+@@ -215,8 +217,8 @@ mt7915_init_wiphy(struct ieee80211_hw *hw)
+ struct wiphy *wiphy = hw->wiphy;
+
+ hw->queues = 4;
+- hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+- hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
++ hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
++ hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
+ hw->netdev_features = NETIF_F_RXCSUM;
+
+ hw->radiotap_timestamp.units_pos =
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+index 2462704094b0a..a8a0e6af51f85 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+@@ -349,9 +349,16 @@ mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
+ if (rxd2 & MT_RXD2_NORMAL_AMSDU_ERR)
+ return -EINVAL;
+
++ hdr_trans = rxd2 & MT_RXD2_NORMAL_HDR_TRANS;
++ if (hdr_trans && (rxd1 & MT_RXD1_NORMAL_CM))
++ return -EINVAL;
++
++ /* ICV error or CCMP/BIP/WPI MIC error */
++ if (rxd1 & MT_RXD1_NORMAL_ICV_ERR)
++ status->flag |= RX_FLAG_ONLY_MONITOR;
++
+ unicast = FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, rxd3) == MT_RXD3_NORMAL_U2M;
+ idx = FIELD_GET(MT_RXD1_NORMAL_WLAN_IDX, rxd1);
+- hdr_trans = rxd2 & MT_RXD2_NORMAL_HDR_TRANS;
+ status->wcid = mt7915_rx_get_wcid(dev, idx, unicast);
+
+ if (status->wcid) {
+@@ -792,6 +799,7 @@ mt7915_mac_write_txwi_8023(struct mt7915_dev *dev, __le32 *txwi,
+
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+ u8 fc_type, fc_stype;
++ u16 ethertype;
+ bool wmm = false;
+ u32 val;
+
+@@ -805,7 +813,8 @@ mt7915_mac_write_txwi_8023(struct mt7915_dev *dev, __le32 *txwi,
+ val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_3) |
+ FIELD_PREP(MT_TXD1_TID, tid);
+
+- if (be16_to_cpu(skb->protocol) >= ETH_P_802_3_MIN)
++ ethertype = get_unaligned_be16(&skb->data[12]);
++ if (ethertype >= ETH_P_802_3_MIN)
+ val |= MT_TXD1_ETH_802_3;
+
+ txwi[1] |= cpu_to_le32(val);
+@@ -892,6 +901,7 @@ mt7915_mac_write_txwi_80211(struct mt7915_dev *dev, __le32 *txwi,
+ val = MT_TXD3_SN_VALID |
+ FIELD_PREP(MT_TXD3_SEQ, IEEE80211_SEQ_TO_SN(seqno));
+ txwi[3] |= cpu_to_le32(val);
++ txwi[7] &= ~cpu_to_le32(MT_TXD7_HW_AMSDU);
+ }
+
+ val = FIELD_PREP(MT_TXD7_TYPE, fc_type) |
+@@ -1232,7 +1242,7 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
+ goto out;
+
+ info = IEEE80211_SKB_CB(skb);
+- if (!(txs_data[0] & le32_to_cpu(MT_TXS0_ACK_ERROR_MASK)))
++ if (!(txs_data[0] & cpu_to_le32(MT_TXS0_ACK_ERROR_MASK)))
+ info->flags |= IEEE80211_TX_STAT_ACK;
+
+ info->status.ampdu_len = 1;
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+index eb1885f4bd8eb..fee7741b5d421 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
++++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+@@ -272,7 +272,8 @@ enum tx_mcu_port_q_idx {
+ #define MT_TX_RATE_MODE GENMASK(9, 6)
+ #define MT_TX_RATE_SU_EXT_TONE BIT(5)
+ #define MT_TX_RATE_DCM BIT(4)
+-#define MT_TX_RATE_IDX GENMASK(3, 0)
++/* VHT/HE only use bits 0-3 */
++#define MT_TX_RATE_IDX GENMASK(5, 0)
+
+ #define MT_TXP_MAX_BUF_NUM 6
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+index c25f8da590dd9..09ea97a81fb4f 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+@@ -243,7 +243,7 @@ static int mt7915_add_interface(struct ieee80211_hw *hw,
+ rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid);
+ if (vif->txq) {
+ mtxq = (struct mt76_txq *)vif->txq->drv_priv;
+- mtxq->wcid = &mvif->sta.wcid;
++ mtxq->wcid = idx;
+ }
+
+ if (vif->type != NL80211_IFTYPE_AP &&
+@@ -302,7 +302,8 @@ static void mt7915_init_dfs_state(struct mt7915_phy *phy)
+ if (hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
+ return;
+
+- if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR))
++ if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR) &&
++ !(mphy->chandef.chan->flags & IEEE80211_CHAN_RADAR))
+ return;
+
+ if (mphy->chandef.chan->center_freq == chandef->chan->center_freq &&
+@@ -440,7 +441,8 @@ static int mt7915_config(struct ieee80211_hw *hw, u32 changed)
+ ieee80211_wake_queues(hw);
+ }
+
+- if (changed & IEEE80211_CONF_CHANGE_POWER) {
++ if (changed & (IEEE80211_CONF_CHANGE_POWER |
++ IEEE80211_CONF_CHANGE_CHANNEL)) {
+ ret = mt7915_mcu_set_txpower_sku(phy);
+ if (ret)
+ return ret;
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+index 43960770a9af2..1c900454cf58c 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+@@ -176,7 +176,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta)
+ if (ht_cap->ht_supported)
+ mode |= PHY_MODE_GN;
+
+- if (he_cap->has_he)
++ if (he_cap && he_cap->has_he)
+ mode |= PHY_MODE_AX_24G;
+ } else if (band == NL80211_BAND_5GHZ) {
+ mode |= PHY_MODE_A;
+@@ -187,7 +187,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta)
+ if (vht_cap->vht_supported)
+ mode |= PHY_MODE_AC;
+
+- if (he_cap->has_he)
++ if (he_cap && he_cap->has_he)
+ mode |= PHY_MODE_AX_5G;
+ }
+
+@@ -721,7 +721,7 @@ mt7915_mcu_alloc_sta_req(struct mt7915_dev *dev, struct mt7915_vif *mvif,
+ .bss_idx = mvif->idx,
+ .wlan_idx_lo = msta ? to_wcid_lo(msta->wcid.idx) : 0,
+ .wlan_idx_hi = msta ? to_wcid_hi(msta->wcid.idx) : 0,
+- .muar_idx = msta ? mvif->omac_idx : 0,
++ .muar_idx = msta && msta->wcid.sta ? mvif->omac_idx : 0xe,
+ .is_tlv_append = 1,
+ };
+ struct sk_buff *skb;
+@@ -757,7 +757,7 @@ mt7915_mcu_alloc_wtbl_req(struct mt7915_dev *dev, struct mt7915_sta *msta,
+ }
+
+ if (sta_hdr)
+- sta_hdr->len = cpu_to_le16(sizeof(hdr));
++ le16_add_cpu(&sta_hdr->len, sizeof(hdr));
+
+ return skb_put_data(nskb, &hdr, sizeof(hdr));
+ }
+@@ -925,7 +925,7 @@ static void mt7915_check_he_obss_narrow_bw_ru_iter(struct wiphy *wiphy,
+
+ elem = ieee80211_bss_get_elem(bss, WLAN_EID_EXT_CAPABILITY);
+
+- if (!elem || elem->datalen < 10 ||
++ if (!elem || elem->datalen <= 10 ||
+ !(elem->data[10] &
+ WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT))
+ data->tolerated = false;
+@@ -1201,7 +1201,7 @@ mt7915_mcu_sta_key_tlv(struct mt7915_sta *msta, struct sk_buff *skb,
+ u8 cipher;
+
+ cipher = mt7915_mcu_get_cipher(key->cipher);
+- if (cipher == MT_CIPHER_NONE)
++ if (cipher == MCU_CIPHER_NONE)
+ return -EOPNOTSUPP;
+
+ sec_key = &sec->key[0];
+@@ -1396,8 +1396,11 @@ mt7915_mcu_wtbl_generic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+ generic = (struct wtbl_generic *)tlv;
+
+ if (sta) {
++ if (vif->type == NL80211_IFTYPE_STATION)
++ generic->partial_aid = cpu_to_le16(vif->bss_conf.aid);
++ else
++ generic->partial_aid = cpu_to_le16(sta->aid);
+ memcpy(generic->peer_addr, sta->addr, ETH_ALEN);
+- generic->partial_aid = cpu_to_le16(sta->aid);
+ generic->muar_idx = mvif->omac_idx;
+ generic->qos = sta->wme;
+ } else {
+@@ -1451,12 +1454,15 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+ case NL80211_IFTYPE_MESH_POINT:
+ case NL80211_IFTYPE_AP:
+ basic->conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
++ basic->aid = cpu_to_le16(sta->aid);
+ break;
+ case NL80211_IFTYPE_STATION:
+ basic->conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
++ basic->aid = cpu_to_le16(vif->bss_conf.aid);
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ basic->conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC);
++ basic->aid = cpu_to_le16(sta->aid);
+ break;
+ default:
+ WARN_ON(1);
+@@ -1464,7 +1470,6 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+ }
+
+ memcpy(basic->peer_addr, sta->addr, ETH_ALEN);
+- basic->aid = cpu_to_le16(sta->aid);
+ basic->qos = sta->wme;
+ }
+
+@@ -2684,7 +2689,7 @@ static int mt7915_driver_own(struct mt7915_dev *dev)
+ {
+ mt76_wr(dev, MT_TOP_LPCR_HOST_BAND0, MT_TOP_LPCR_HOST_DRV_OWN);
+ if (!mt76_poll_msec(dev, MT_TOP_LPCR_HOST_BAND0,
+- MT_TOP_LPCR_HOST_FW_OWN, 0, 500)) {
++ MT_TOP_LPCR_HOST_FW_OWN_STAT, 0, 500)) {
+ dev_err(dev->mt76.dev, "Timeout for driver own\n");
+ return -EIO;
+ }
+@@ -2790,7 +2795,7 @@ out:
+ default:
+ ret = -EAGAIN;
+ dev_err(dev->mt76.dev, "Failed to release patch semaphore\n");
+- goto out;
++ break;
+ }
+ release_firmware(fw);
+
+@@ -3391,20 +3396,20 @@ int mt7915_mcu_set_chan_info(struct mt7915_phy *phy, int cmd)
+
+ static int mt7915_mcu_set_eeprom_flash(struct mt7915_dev *dev)
+ {
+-#define TOTAL_PAGE_MASK GENMASK(7, 5)
++#define MAX_PAGE_IDX_MASK GENMASK(7, 5)
+ #define PAGE_IDX_MASK GENMASK(4, 2)
+ #define PER_PAGE_SIZE 0x400
+ struct mt7915_mcu_eeprom req = { .buffer_mode = EE_MODE_BUFFER };
+- u8 total = MT7915_EEPROM_SIZE / PER_PAGE_SIZE;
++ u8 total = DIV_ROUND_UP(MT7915_EEPROM_SIZE, PER_PAGE_SIZE);
+ u8 *eep = (u8 *)dev->mt76.eeprom.data;
+ int eep_len;
+ int i;
+
+- for (i = 0; i <= total; i++, eep += eep_len) {
++ for (i = 0; i < total; i++, eep += eep_len) {
+ struct sk_buff *skb;
+ int ret;
+
+- if (i == total)
++ if (i == total - 1 && !!(MT7915_EEPROM_SIZE % PER_PAGE_SIZE))
+ eep_len = MT7915_EEPROM_SIZE % PER_PAGE_SIZE;
+ else
+ eep_len = PER_PAGE_SIZE;
+@@ -3414,7 +3419,7 @@ static int mt7915_mcu_set_eeprom_flash(struct mt7915_dev *dev)
+ if (!skb)
+ return -ENOMEM;
+
+- req.format = FIELD_PREP(TOTAL_PAGE_MASK, total) |
++ req.format = FIELD_PREP(MAX_PAGE_IDX_MASK, total - 1) |
+ FIELD_PREP(PAGE_IDX_MASK, i) | EE_FORMAT_WHOLE;
+ req.len = cpu_to_le16(eep_len);
+
+@@ -3481,7 +3486,7 @@ static int mt7915_mcu_set_pre_cal(struct mt7915_dev *dev, u8 idx,
+ u8 idx;
+ u8 rsv[4];
+ __le32 len;
+- } req;
++ } req = {};
+ struct sk_buff *skb;
+
+ skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, sizeof(req) + len);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+index a213b5cb82f81..f4101cc9f9eb1 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
++++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+@@ -426,6 +426,7 @@
+ #define MT_TOP_LPCR_HOST_BAND0 MT_TOP(0x10)
+ #define MT_TOP_LPCR_HOST_FW_OWN BIT(0)
+ #define MT_TOP_LPCR_HOST_DRV_OWN BIT(1)
++#define MT_TOP_LPCR_HOST_FW_OWN_STAT BIT(2)
+
+ #define MT_TOP_MISC MT_TOP(0xf0)
+ #define MT_TOP_MISC_FW_STATE GENMASK(2, 0)
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+index 77468bdae460b..cfcf7964c6881 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+@@ -4,6 +4,32 @@
+ #include "mt7921.h"
+ #include "eeprom.h"
+
++static int
++mt7921_reg_set(void *data, u64 val)
++{
++ struct mt7921_dev *dev = data;
++
++ mt7921_mutex_acquire(dev);
++ mt76_wr(dev, dev->mt76.debugfs_reg, val);
++ mt7921_mutex_release(dev);
++
++ return 0;
++}
++
++static int
++mt7921_reg_get(void *data, u64 *val)
++{
++ struct mt7921_dev *dev = data;
++
++ mt7921_mutex_acquire(dev);
++ *val = mt76_rr(dev, dev->mt76.debugfs_reg);
++ mt7921_mutex_release(dev);
++
++ return 0;
++}
++
++DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mt7921_reg_get, mt7921_reg_set,
++ "0x%08llx\n");
+ static int
+ mt7921_fw_debug_set(void *data, u64 val)
+ {
+@@ -69,6 +95,8 @@ mt7921_tx_stats_show(struct seq_file *file, void *data)
+ struct mt7921_dev *dev = file->private;
+ int stat[8], i, n;
+
++ mt7921_mutex_acquire(dev);
++
+ mt7921_ampdu_stat_read_phy(&dev->phy, file);
+
+ /* Tx amsdu info */
+@@ -78,6 +106,8 @@ mt7921_tx_stats_show(struct seq_file *file, void *data)
+ n += stat[i];
+ }
+
++ mt7921_mutex_release(dev);
++
+ for (i = 0; i < ARRAY_SIZE(stat); i++) {
+ seq_printf(file, "AMSDU pack count of %d MSDU in TXD: 0x%x ",
+ i + 1, stat[i]);
+@@ -98,25 +128,28 @@ mt7921_queues_acq(struct seq_file *s, void *data)
+ struct mt7921_dev *dev = dev_get_drvdata(s->private);
+ int i;
+
+- for (i = 0; i < 16; i++) {
+- int j, acs = i / 4, index = i % 4;
++ mt7921_mutex_acquire(dev);
++
++ for (i = 0; i < 4; i++) {
+ u32 ctrl, val, qlen = 0;
++ int j;
+
+- val = mt76_rr(dev, MT_PLE_AC_QEMPTY(acs, index));
+- ctrl = BIT(31) | BIT(15) | (acs << 8);
++ val = mt76_rr(dev, MT_PLE_AC_QEMPTY(i));
++ ctrl = BIT(31) | BIT(11) | (i << 24);
+
+ for (j = 0; j < 32; j++) {
+ if (val & BIT(j))
+ continue;
+
+- mt76_wr(dev, MT_PLE_FL_Q0_CTRL,
+- ctrl | (j + (index << 5)));
++ mt76_wr(dev, MT_PLE_FL_Q0_CTRL, ctrl | j);
+ qlen += mt76_get_field(dev, MT_PLE_FL_Q3_CTRL,
+ GENMASK(11, 0));
+ }
+- seq_printf(s, "AC%d%d: queued=%d\n", acs, index, qlen);
++ seq_printf(s, "AC%d: queued=%d\n", i, qlen);
+ }
+
++ mt7921_mutex_release(dev);
++
+ return 0;
+ }
+
+@@ -229,31 +262,44 @@ mt7921_txpwr(struct seq_file *s, void *data)
+ return 0;
+ }
+
++static void
++mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
++{
++ struct mt7921_dev *dev = priv;
++
++ mt7921_mcu_set_beacon_filter(dev, vif, dev->pm.enable);
++}
++
+ static int
+ mt7921_pm_set(void *data, u64 val)
+ {
+ struct mt7921_dev *dev = data;
+ struct mt76_connac_pm *pm = &dev->pm;
+- struct mt76_phy *mphy = dev->phy.mt76;
+
+- if (val == pm->enable)
+- return 0;
++ mutex_lock(&dev->mt76.mutex);
+
+- mt7921_mutex_acquire(dev);
++ if (val == pm->enable)
++ goto out;
+
+ if (!pm->enable) {
+ pm->stats.last_wake_event = jiffies;
+ pm->stats.last_doze_event = jiffies;
+ }
+- pm->enable = val;
++ /* make sure the chip is awake here and ps_work is scheduled
++ * just at end of the this routine.
++ */
++ pm->enable = false;
++ mt76_connac_pm_wake(&dev->mphy, pm);
+
+- ieee80211_iterate_active_interfaces(mphy->hw,
++ pm->enable = val;
++ ieee80211_iterate_active_interfaces(mt76_hw(dev),
+ IEEE80211_IFACE_ITER_RESUME_ALL,
+- mt7921_pm_interface_iter, mphy->priv);
++ mt7921_pm_interface_iter, dev);
+
+ mt76_connac_mcu_set_deep_sleep(&dev->mt76, pm->ds_enable);
+-
+- mt7921_mutex_release(dev);
++ mt76_connac_power_save_sched(&dev->mphy, pm);
++out:
++ mutex_unlock(&dev->mt76.mutex);
+
+ return 0;
+ }
+@@ -373,7 +419,7 @@ int mt7921_init_debugfs(struct mt7921_dev *dev)
+ {
+ struct dentry *dir;
+
+- dir = mt76_register_debugfs(&dev->mt76);
++ dir = mt76_register_debugfs_fops(&dev->mt76, &fops_regval);
+ if (!dir)
+ return -ENOMEM;
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+index 7d7d43a5422f8..983861edc6834 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+@@ -118,111 +118,26 @@ static void mt7921_dma_prefetch(struct mt7921_dev *dev)
+ mt76_wr(dev, MT_WFDMA0_TX_RING17_EXT_CTRL, PREFETCH(0x380, 0x4));
+ }
+
+-static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr)
+-{
+- static const struct {
+- u32 phys;
+- u32 mapped;
+- u32 size;
+- } fixed_map[] = {
+- { 0x00400000, 0x80000, 0x10000}, /* WF_MCU_SYSRAM */
+- { 0x00410000, 0x90000, 0x10000}, /* WF_MCU_SYSRAM (configure register) */
+- { 0x40000000, 0x70000, 0x10000}, /* WF_UMAC_SYSRAM */
+- { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */
+- { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */
+- { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */
+- { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */
+- { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */
+- { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */
+- { 0x7c060000, 0xe0000, 0x10000}, /* CONN_INFRA, conn_host_csr_top */
+- { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */
+- { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */
+- { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */
+- { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */
+- { 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */
+- { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */
+- { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */
+- { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */
+- { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */
+- { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */
+- { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */
+- { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */
+- { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */
+- { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */
+- { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */
+- { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */
+- { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */
+- { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */
+- { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */
+- { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */
+- { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */
+- { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */
+- { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */
+- { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */
+- { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */
+- { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */
+- { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */
+- { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */
+- { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */
+- { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */
+- { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */
+- { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */
+- };
+- int i;
+-
+- if (addr < 0x100000)
+- return addr;
+-
+- for (i = 0; i < ARRAY_SIZE(fixed_map); i++) {
+- u32 ofs;
+-
+- if (addr < fixed_map[i].phys)
+- continue;
+-
+- ofs = addr - fixed_map[i].phys;
+- if (ofs > fixed_map[i].size)
+- continue;
+-
+- return fixed_map[i].mapped + ofs;
+- }
+-
+- if ((addr >= 0x18000000 && addr < 0x18c00000) ||
+- (addr >= 0x70000000 && addr < 0x78000000) ||
+- (addr >= 0x7c000000 && addr < 0x7c400000))
+- return mt7921_reg_map_l1(dev, addr);
+-
+- dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n",
+- addr);
+-
+- return 0;
+-}
+-
+-static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset)
+-{
+- struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+- u32 addr = __mt7921_reg_addr(dev, offset);
+-
+- return dev->bus_ops->rr(mdev, addr);
+-}
+-
+-static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val)
++static int mt7921_dma_disable(struct mt7921_dev *dev, bool force)
+ {
+- struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+- u32 addr = __mt7921_reg_addr(dev, offset);
+-
+- dev->bus_ops->wr(mdev, addr, val);
+-}
++ /* disable WFDMA0 */
++ mt76_clear(dev, MT_WFDMA0_GLO_CFG,
++ MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN |
++ MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
++ MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
++ MT_WFDMA0_GLO_CFG_OMIT_RX_INFO |
++ MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+
+-static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
+-{
+- struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+- u32 addr = __mt7921_reg_addr(dev, offset);
++ if (!mt76_poll_msec_tick(dev, MT_WFDMA0_GLO_CFG,
++ MT_WFDMA0_GLO_CFG_TX_DMA_BUSY |
++ MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 100, 1))
++ return -ETIMEDOUT;
+
+- return dev->bus_ops->rmw(mdev, addr, mask, val);
+-}
++ /* disable dmashdl */
++ mt76_clear(dev, MT_WFDMA0_GLO_CFG_EXT0,
++ MT_WFDMA0_CSR_TX_DMASHDL_ENABLE);
++ mt76_set(dev, MT_DMASHDL_SW_CONTROL, MT_DMASHDL_DMASHDL_BYPASS);
+
+-static int mt7921_dma_disable(struct mt7921_dev *dev, bool force)
+-{
+ if (force) {
+ /* reset */
+ mt76_clear(dev, MT_WFDMA0_RST,
+@@ -234,24 +149,6 @@ static int mt7921_dma_disable(struct mt7921_dev *dev, bool force)
+ MT_WFDMA0_RST_LOGIC_RST);
+ }
+
+- /* disable dmashdl */
+- mt76_clear(dev, MT_WFDMA0_GLO_CFG_EXT0,
+- MT_WFDMA0_CSR_TX_DMASHDL_ENABLE);
+- mt76_set(dev, MT_DMASHDL_SW_CONTROL, MT_DMASHDL_DMASHDL_BYPASS);
+-
+- /* disable WFDMA0 */
+- mt76_clear(dev, MT_WFDMA0_GLO_CFG,
+- MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN |
+- MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
+- MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
+- MT_WFDMA0_GLO_CFG_OMIT_RX_INFO |
+- MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+-
+- if (!mt76_poll(dev, MT_WFDMA0_GLO_CFG,
+- MT_WFDMA0_GLO_CFG_TX_DMA_BUSY |
+- MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 1000))
+- return -ETIMEDOUT;
+-
+ return 0;
+ }
+
+@@ -380,20 +277,8 @@ int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev)
+
+ int mt7921_dma_init(struct mt7921_dev *dev)
+ {
+- struct mt76_bus_ops *bus_ops;
+ int ret;
+
+- dev->bus_ops = dev->mt76.bus;
+- bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops),
+- GFP_KERNEL);
+- if (!bus_ops)
+- return -ENOMEM;
+-
+- bus_ops->rr = mt7921_rr;
+- bus_ops->wr = mt7921_wr;
+- bus_ops->rmw = mt7921_rmw;
+- dev->mt76.bus = bus_ops;
+-
+ mt76_dma_attach(&dev->mt76);
+
+ ret = mt7921_dma_disable(dev, true);
+@@ -469,6 +354,10 @@ void mt7921_dma_cleanup(struct mt7921_dev *dev)
+ MT_WFDMA0_GLO_CFG_OMIT_RX_INFO |
+ MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+
++ mt76_poll_msec_tick(dev, MT_WFDMA0_GLO_CFG,
++ MT_WFDMA0_GLO_CFG_TX_DMA_BUSY |
++ MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 100, 1);
++
+ /* reset */
+ mt76_clear(dev, MT_WFDMA0_RST,
+ MT_WFDMA0_RST_DMASHDL_ALL_RST |
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+index a9ce10b988273..c059cb419efd8 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+@@ -49,8 +49,8 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
+ struct wiphy *wiphy = hw->wiphy;
+
+ hw->queues = 4;
+- hw->max_rx_aggregation_subframes = 64;
+- hw->max_tx_aggregation_subframes = 128;
++ hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
++ hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE;
+ hw->netdev_features = NETIF_F_RXCSUM;
+
+ hw->radiotap_timestamp.units_pos =
+@@ -106,6 +106,10 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band)
+ mt76_set(dev, MT_WF_RMAC_MIB_TIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
+ mt76_set(dev, MT_WF_RMAC_MIB_AIRTIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
+
++ /* enable MIB tx-rx time reporting */
++ mt76_set(dev, MT_MIB_SCR1(band), MT_MIB_TXDUR_EN);
++ mt76_set(dev, MT_MIB_SCR1(band), MT_MIB_RXDUR_EN);
++
+ mt76_rmw_field(dev, MT_DMA_DCR0(band), MT_DMA_DCR0_MAX_RX_LEN, 1536);
+ /* disable rx rate report by default due to hw issues */
+ mt76_clear(dev, MT_DMA_DCR0(band), MT_DMA_DCR0_RXD_G5_EN);
+@@ -215,7 +219,7 @@ int mt7921_register_device(struct mt7921_dev *dev)
+ IEEE80211_HT_CAP_LDPC_CODING |
+ IEEE80211_HT_CAP_MAX_AMSDU;
+ dev->mphy.sband_5g.sband.vht_cap.cap |=
+- IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
++ IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
+ IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK |
+ IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+ IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE |
+@@ -247,8 +251,17 @@ int mt7921_register_device(struct mt7921_dev *dev)
+
+ void mt7921_unregister_device(struct mt7921_dev *dev)
+ {
++ int i;
++ struct mt76_connac_pm *pm = &dev->pm;
++
+ mt76_unregister_device(&dev->mt76);
++ mt76_for_each_q_rx(&dev->mt76, i)
++ napi_disable(&dev->mt76.napi[i]);
++ cancel_delayed_work_sync(&pm->ps_work);
++ cancel_work_sync(&pm->wake_work);
++
+ mt7921_tx_token_put(dev);
++ mt7921_mcu_drv_pmctrl(dev);
+ mt7921_dma_cleanup(dev);
+ mt7921_mcu_exit(dev);
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+index 7fe2e3a50428f..6cf0c9b1b8b98 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+@@ -180,12 +180,56 @@ mt7921_mac_decode_he_radiotap_ru(struct mt76_rx_status *status,
+ IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET);
+ }
+
++static void
++mt7921_mac_decode_he_mu_radiotap(struct sk_buff *skb,
++ struct mt76_rx_status *status,
++ __le32 *rxv)
++{
++ static const struct ieee80211_radiotap_he_mu mu_known = {
++ .flags1 = HE_BITS(MU_FLAGS1_SIG_B_MCS_KNOWN) |
++ HE_BITS(MU_FLAGS1_SIG_B_DCM_KNOWN) |
++ HE_BITS(MU_FLAGS1_CH1_RU_KNOWN) |
++ HE_BITS(MU_FLAGS1_SIG_B_SYMS_USERS_KNOWN) |
++ HE_BITS(MU_FLAGS1_SIG_B_COMP_KNOWN),
++ .flags2 = HE_BITS(MU_FLAGS2_BW_FROM_SIG_A_BW_KNOWN) |
++ HE_BITS(MU_FLAGS2_PUNC_FROM_SIG_A_BW_KNOWN),
++ };
++ struct ieee80211_radiotap_he_mu *he_mu = NULL;
++
++ he_mu = skb_push(skb, sizeof(mu_known));
++ memcpy(he_mu, &mu_known, sizeof(mu_known));
++
++#define MU_PREP(f, v) le16_encode_bits(v, IEEE80211_RADIOTAP_HE_MU_##f)
++
++ he_mu->flags1 |= MU_PREP(FLAGS1_SIG_B_MCS, status->rate_idx);
++ if (status->he_dcm)
++ he_mu->flags1 |= MU_PREP(FLAGS1_SIG_B_DCM, status->he_dcm);
++
++ he_mu->flags2 |= MU_PREP(FLAGS2_BW_FROM_SIG_A_BW, status->bw) |
++ MU_PREP(FLAGS2_SIG_B_SYMS_USERS,
++ le32_get_bits(rxv[2], MT_CRXV_HE_NUM_USER));
++
++ he_mu->ru_ch1[0] = FIELD_GET(MT_CRXV_HE_RU0, cpu_to_le32(rxv[3]));
++
++ if (status->bw >= RATE_INFO_BW_40) {
++ he_mu->flags1 |= HE_BITS(MU_FLAGS1_CH2_RU_KNOWN);
++ he_mu->ru_ch2[0] =
++ FIELD_GET(MT_CRXV_HE_RU1, cpu_to_le32(rxv[3]));
++ }
++
++ if (status->bw >= RATE_INFO_BW_80) {
++ he_mu->ru_ch1[1] =
++ FIELD_GET(MT_CRXV_HE_RU2, cpu_to_le32(rxv[3]));
++ he_mu->ru_ch2[1] =
++ FIELD_GET(MT_CRXV_HE_RU3, cpu_to_le32(rxv[3]));
++ }
++}
++
+ static void
+ mt7921_mac_decode_he_radiotap(struct sk_buff *skb,
+ struct mt76_rx_status *status,
+ __le32 *rxv, u32 phy)
+ {
+- /* TODO: struct ieee80211_radiotap_he_mu */
+ static const struct ieee80211_radiotap_he known = {
+ .data1 = HE_BITS(DATA1_DATA_MCS_KNOWN) |
+ HE_BITS(DATA1_DATA_DCM_KNOWN) |
+@@ -193,6 +237,7 @@ mt7921_mac_decode_he_radiotap(struct sk_buff *skb,
+ HE_BITS(DATA1_CODING_KNOWN) |
+ HE_BITS(DATA1_LDPC_XSYMSEG_KNOWN) |
+ HE_BITS(DATA1_DOPPLER_KNOWN) |
++ HE_BITS(DATA1_SPTL_REUSE_KNOWN) |
+ HE_BITS(DATA1_BSS_COLOR_KNOWN),
+ .data2 = HE_BITS(DATA2_GI_KNOWN) |
+ HE_BITS(DATA2_TXBF_KNOWN) |
+@@ -207,9 +252,12 @@ mt7921_mac_decode_he_radiotap(struct sk_buff *skb,
+
+ he->data3 = HE_PREP(DATA3_BSS_COLOR, BSS_COLOR, rxv[14]) |
+ HE_PREP(DATA3_LDPC_XSYMSEG, LDPC_EXT_SYM, rxv[2]);
++ he->data4 = HE_PREP(DATA4_SU_MU_SPTL_REUSE, SR_MASK, rxv[11]);
+ he->data5 = HE_PREP(DATA5_PE_DISAMBIG, PE_DISAMBIG, rxv[2]) |
+ le16_encode_bits(ltf_size,
+ IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE);
++ if (cpu_to_le32(rxv[0]) & MT_PRXV_TXBF)
++ he->data5 |= HE_BITS(DATA5_TXBF);
+ he->data6 = HE_PREP(DATA6_TXOP, TXOP_DUR, rxv[14]) |
+ HE_PREP(DATA6_DOPPLER, DOPPLER, rxv[14]);
+
+@@ -217,8 +265,7 @@ mt7921_mac_decode_he_radiotap(struct sk_buff *skb,
+ case MT_PHY_TYPE_HE_SU:
+ he->data1 |= HE_BITS(DATA1_FORMAT_SU) |
+ HE_BITS(DATA1_UL_DL_KNOWN) |
+- HE_BITS(DATA1_BEAM_CHANGE_KNOWN) |
+- HE_BITS(DATA1_SPTL_REUSE_KNOWN);
++ HE_BITS(DATA1_BEAM_CHANGE_KNOWN);
+
+ he->data3 |= HE_PREP(DATA3_BEAM_CHANGE, BEAM_CHNG, rxv[14]) |
+ HE_PREP(DATA3_UL_DL, UPLINK, rxv[2]);
+@@ -232,17 +279,15 @@ mt7921_mac_decode_he_radiotap(struct sk_buff *skb,
+ break;
+ case MT_PHY_TYPE_HE_MU:
+ he->data1 |= HE_BITS(DATA1_FORMAT_MU) |
+- HE_BITS(DATA1_UL_DL_KNOWN) |
+- HE_BITS(DATA1_SPTL_REUSE_KNOWN);
++ HE_BITS(DATA1_UL_DL_KNOWN);
+
+ he->data3 |= HE_PREP(DATA3_UL_DL, UPLINK, rxv[2]);
+- he->data4 |= HE_PREP(DATA4_SU_MU_SPTL_REUSE, SR_MASK, rxv[11]);
++ he->data4 |= HE_PREP(DATA4_MU_STA_ID, MU_AID, rxv[7]);
+
+ mt7921_mac_decode_he_radiotap_ru(status, he, rxv);
+ break;
+ case MT_PHY_TYPE_HE_TB:
+ he->data1 |= HE_BITS(DATA1_FORMAT_TRIG) |
+- HE_BITS(DATA1_SPTL_REUSE_KNOWN) |
+ HE_BITS(DATA1_SPTL_REUSE2_KNOWN) |
+ HE_BITS(DATA1_SPTL_REUSE3_KNOWN) |
+ HE_BITS(DATA1_SPTL_REUSE4_KNOWN);
+@@ -338,10 +383,17 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
+ if (rxd2 & MT_RXD2_NORMAL_AMSDU_ERR)
+ return -EINVAL;
+
++ hdr_trans = rxd2 & MT_RXD2_NORMAL_HDR_TRANS;
++ if (hdr_trans && (rxd1 & MT_RXD1_NORMAL_CM))
++ return -EINVAL;
++
++ /* ICV error or CCMP/BIP/WPI MIC error */
++ if (rxd1 & MT_RXD1_NORMAL_ICV_ERR)
++ status->flag |= RX_FLAG_ONLY_MONITOR;
++
+ chfreq = FIELD_GET(MT_RXD3_NORMAL_CH_FREQ, rxd3);
+ unicast = FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, rxd3) == MT_RXD3_NORMAL_U2M;
+ idx = FIELD_GET(MT_RXD1_NORMAL_WLAN_IDX, rxd1);
+- hdr_trans = rxd2 & MT_RXD2_NORMAL_HDR_TRANS;
+ status->wcid = mt7921_rx_get_wcid(dev, idx, unicast);
+
+ if (status->wcid) {
+@@ -511,7 +563,7 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
+ status->nss =
+ FIELD_GET(MT_PRXV_NSTS, v0) + 1;
+ status->encoding = RX_ENC_VHT;
+- if (i > 9)
++ if (i > 11)
+ return -EINVAL;
+ break;
+ case MT_PHY_TYPE_HE_MU:
+@@ -606,9 +658,13 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
+
+ mt7921_mac_assoc_rssi(dev, skb);
+
+- if (rxv && status->flag & RX_FLAG_RADIOTAP_HE)
++ if (rxv && status->flag & RX_FLAG_RADIOTAP_HE) {
+ mt7921_mac_decode_he_radiotap(skb, status, rxv, mode);
+
++ if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
++ mt7921_mac_decode_he_mu_radiotap(skb, status, rxv);
++ }
++
+ if (!status->wcid || !ieee80211_is_data_qos(fc))
+ return 0;
+
+@@ -625,6 +681,7 @@ mt7921_mac_write_txwi_8023(struct mt7921_dev *dev, __le32 *txwi,
+ {
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+ u8 fc_type, fc_stype;
++ u16 ethertype;
+ bool wmm = false;
+ u32 val;
+
+@@ -638,7 +695,8 @@ mt7921_mac_write_txwi_8023(struct mt7921_dev *dev, __le32 *txwi,
+ val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_3) |
+ FIELD_PREP(MT_TXD1_TID, tid);
+
+- if (be16_to_cpu(skb->protocol) >= ETH_P_802_3_MIN)
++ ethertype = get_unaligned_be16(&skb->data[12]);
++ if (ethertype >= ETH_P_802_3_MIN)
+ val |= MT_TXD1_ETH_802_3;
+
+ txwi[1] |= cpu_to_le32(val);
+@@ -735,8 +793,9 @@ mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi,
+ static void mt7921_update_txs(struct mt76_wcid *wcid, __le32 *txwi)
+ {
+ struct mt7921_sta *msta = container_of(wcid, struct mt7921_sta, wcid);
+- u32 pid, frame_type = FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]);
++ u32 pid, frame_type;
+
++ frame_type = FIELD_GET(MT_TXD2_FRAME_TYPE, le32_to_cpu(txwi[2]));
+ if (!(frame_type & (IEEE80211_FTYPE_DATA >> 2)))
+ return;
+
+@@ -1417,7 +1476,7 @@ mt7921_mac_update_mib_stats(struct mt7921_phy *phy)
+ mib->rts_retries_cnt += mt76_get_field(dev, MT_MIB_MB_BSDR1(0),
+ MT_MIB_RTS_FAIL_COUNT_MASK);
+
+- for (i = 0, aggr1 = aggr0 + 4; i < 4; i++) {
++ for (i = 0, aggr1 = aggr0 + 8; i < 4; i++) {
+ u32 val, val2;
+
+ val = mt76_rr(dev, MT_TX_AGG_CNT(0, i));
+@@ -1493,6 +1552,14 @@ void mt7921_pm_power_save_work(struct work_struct *work)
+ test_bit(MT76_HW_SCHED_SCANNING, &mphy->state))
+ goto out;
+
++ if (mutex_is_locked(&dev->mt76.mutex))
++ /* if mt76 mutex is held we should not put the device
++ * to sleep since we are currently accessing device
++ * register map. We need to wait for the next power_save
++ * trigger.
++ */
++ goto out;
++
+ if (time_is_after_jiffies(dev->pm.last_activity + delta)) {
+ delta = dev->pm.last_activity + delta - jiffies;
+ goto out;
+@@ -1506,34 +1573,6 @@ out:
+ queue_delayed_work(dev->mt76.wq, &dev->pm.ps_work, delta);
+ }
+
+-int mt7921_mac_set_beacon_filter(struct mt7921_phy *phy,
+- struct ieee80211_vif *vif,
+- bool enable)
+-{
+- struct mt7921_dev *dev = phy->dev;
+- bool ext_phy = phy != &dev->phy;
+- int err;
+-
+- if (!dev->pm.enable)
+- return -EOPNOTSUPP;
+-
+- err = mt7921_mcu_set_bss_pm(dev, vif, enable);
+- if (err)
+- return err;
+-
+- if (enable) {
+- vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER;
+- mt76_set(dev, MT_WF_RFCR(ext_phy),
+- MT_WF_RFCR_DROP_OTHER_BEACON);
+- } else {
+- vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER;
+- mt76_clear(dev, MT_WF_RFCR(ext_phy),
+- MT_WF_RFCR_DROP_OTHER_BEACON);
+- }
+-
+- return 0;
+-}
+-
+ void mt7921_coredump_work(struct work_struct *work)
+ {
+ struct mt7921_dev *dev;
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
+index 3af67fac213df..f0194c8780372 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
+@@ -116,6 +116,7 @@ enum rx_pkt_type {
+ #define MT_PRXV_TX_DCM BIT(4)
+ #define MT_PRXV_TX_ER_SU_106T BIT(5)
+ #define MT_PRXV_NSTS GENMASK(9, 7)
++#define MT_PRXV_TXBF BIT(10)
+ #define MT_PRXV_HT_AD_CODE BIT(11)
+ #define MT_PRXV_FRAME_MODE GENMASK(14, 12)
+ #define MT_PRXV_SGI GENMASK(16, 15)
+@@ -138,8 +139,15 @@ enum rx_pkt_type {
+ #define MT_CRXV_HE_LTF_SIZE GENMASK(18, 17)
+ #define MT_CRXV_HE_LDPC_EXT_SYM BIT(20)
+ #define MT_CRXV_HE_PE_DISAMBIG BIT(23)
++#define MT_CRXV_HE_NUM_USER GENMASK(30, 24)
+ #define MT_CRXV_HE_UPLINK BIT(31)
+
++#define MT_CRXV_HE_RU0 GENMASK(7, 0)
++#define MT_CRXV_HE_RU1 GENMASK(15, 8)
++#define MT_CRXV_HE_RU2 GENMASK(23, 16)
++#define MT_CRXV_HE_RU3 GENMASK(31, 24)
++#define MT_CRXV_HE_MU_AID GENMASK(30, 20)
++
+ #define MT_CRXV_HE_SR_MASK GENMASK(11, 8)
+ #define MT_CRXV_HE_SR1_MASK GENMASK(16, 12)
+ #define MT_CRXV_HE_SR2_MASK GENMASK(20, 17)
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+index 63ec140c9c372..b34f9e6500ccd 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+@@ -224,6 +224,7 @@ static void mt7921_stop(struct ieee80211_hw *hw)
+
+ cancel_delayed_work_sync(&dev->pm.ps_work);
+ cancel_work_sync(&dev->pm.wake_work);
++ cancel_work_sync(&dev->reset_work);
+ mt76_connac_free_pending_tx_skbs(&dev->pm, NULL);
+
+ mt7921_mutex_acquire(dev);
+@@ -282,15 +283,9 @@ static int mt7921_add_interface(struct ieee80211_hw *hw,
+ rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid);
+ if (vif->txq) {
+ mtxq = (struct mt76_txq *)vif->txq->drv_priv;
+- mtxq->wcid = &mvif->sta.wcid;
++ mtxq->wcid = idx;
+ }
+
+- if (vif->type != NL80211_IFTYPE_AP &&
+- (!mvif->mt76.omac_idx || mvif->mt76.omac_idx > 3))
+- vif->offload_flags = 0;
+-
+- vif->offload_flags |= IEEE80211_OFFLOAD_ENCAP_4ADDR;
+-
+ out:
+ mt7921_mutex_release(dev);
+
+@@ -533,36 +528,6 @@ static void mt7921_configure_filter(struct ieee80211_hw *hw,
+ mt7921_mutex_release(dev);
+ }
+
+-static int
+-mt7921_bss_bcnft_apply(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+- bool assoc)
+-{
+- int ret;
+-
+- if (!dev->pm.enable)
+- return 0;
+-
+- if (assoc) {
+- ret = mt7921_mcu_uni_bss_bcnft(dev, vif, true);
+- if (ret)
+- return ret;
+-
+- vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER;
+- mt76_set(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
+-
+- return 0;
+- }
+-
+- ret = mt7921_mcu_set_bss_pm(dev, vif, false);
+- if (ret)
+- return ret;
+-
+- vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER;
+- mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
+-
+- return 0;
+-}
+-
+ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *info,
+@@ -592,7 +557,8 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
+ if (changed & BSS_CHANGED_ASSOC) {
+ mt7921_mcu_sta_update(dev, NULL, vif, true,
+ MT76_STA_INFO_STATE_ASSOC);
+- mt7921_bss_bcnft_apply(dev, vif, info->assoc);
++ if (dev->pm.enable)
++ mt7921_mcu_set_beacon_filter(dev, vif, info->assoc);
+ }
+
+ if (changed & BSS_CHANGED_ARP_FILTER) {
+@@ -661,6 +627,7 @@ void mt7921_mac_sta_assoc(struct mt76_dev *mdev, struct ieee80211_vif *vif,
+
+ mt7921_mac_wtbl_update(dev, msta->wcid.idx,
+ MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
++ memset(msta->airtime_ac, 0, sizeof(msta->airtime_ac));
+
+ mt7921_mcu_sta_update(dev, sta, vif, true, MT76_STA_INFO_STATE_ASSOC);
+
+@@ -1027,7 +994,7 @@ mt7921_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
+ return -EINVAL;
+
+ if ((BIT(hweight8(tx_ant)) - 1) != tx_ant)
+- tx_ant = BIT(ffs(tx_ant) - 1) - 1;
++ return -EINVAL;
+
+ mt7921_mutex_acquire(dev);
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+index 9fbaacc67cfad..9b490ff36bd6b 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+@@ -157,6 +157,7 @@ mt7921_mcu_parse_response(struct mt76_dev *mdev, int cmd,
+ struct sk_buff *skb, int seq)
+ {
+ struct mt7921_mcu_rxd *rxd;
++ int mcu_cmd = cmd & MCU_CMD_MASK;
+ int ret = 0;
+
+ if (!skb) {
+@@ -194,6 +195,9 @@ mt7921_mcu_parse_response(struct mt76_dev *mdev, int cmd,
+ skb_pull(skb, sizeof(*rxd));
+ event = (struct mt7921_mcu_uni_event *)skb->data;
+ ret = le32_to_cpu(event->status);
++ /* skip invalid event */
++ if (mcu_cmd != event->cid)
++ ret = -EAGAIN;
+ break;
+ }
+ case MCU_CMD_REG_READ: {
+@@ -316,11 +320,13 @@ mt7921_mcu_tx_rate_parse(struct mt76_phy *mphy,
+ struct rate_info *rate, u16 r)
+ {
+ struct ieee80211_supported_band *sband;
+- u16 flags = 0;
++ u16 flags = 0, rate_idx;
+ u8 txmode = FIELD_GET(MT_WTBL_RATE_TX_MODE, r);
+ u8 gi = 0;
+ u8 bw = 0;
++ bool cck = false;
+
++ memset(rate, 0, sizeof(*rate));
+ rate->mcs = FIELD_GET(MT_WTBL_RATE_MCS, r);
+ rate->nss = FIELD_GET(MT_WTBL_RATE_NSS, r) + 1;
+
+@@ -345,13 +351,18 @@ mt7921_mcu_tx_rate_parse(struct mt76_phy *mphy,
+
+ switch (txmode) {
+ case MT_PHY_TYPE_CCK:
++ cck = true;
++ fallthrough;
+ case MT_PHY_TYPE_OFDM:
+ if (mphy->chandef.chan->band == NL80211_BAND_5GHZ)
+ sband = &mphy->sband_5g.sband;
+ else
+ sband = &mphy->sband_2g.sband;
+
+- rate->legacy = sband->bitrates[rate->mcs].bitrate;
++ rate_idx = FIELD_GET(MT_TX_RATE_IDX, r);
++ rate_idx = mt76_get_rate(mphy->dev, sband, rate_idx,
++ cck);
++ rate->legacy = sband->bitrates[rate_idx].bitrate;
+ break;
+ case MT_PHY_TYPE_HT:
+ case MT_PHY_TYPE_HT_GF:
+@@ -532,7 +543,8 @@ mt7921_mcu_tx_done_event(struct mt7921_dev *dev, struct sk_buff *skb)
+ peer.g8 = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80);
+ peer.g16 = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_160);
+ mt7921_mcu_tx_rate_parse(mphy->mt76, &peer,
+- &msta->stats.tx_rate, event->tx_rate);
++ &msta->stats.tx_rate,
++ le16_to_cpu(event->tx_rate));
+
+ spin_lock_bh(&dev->sta_poll_lock);
+ break;
+@@ -619,7 +631,7 @@ mt7921_mcu_sta_key_tlv(struct mt7921_sta *msta, struct sk_buff *skb,
+ u8 cipher;
+
+ cipher = mt7921_mcu_get_cipher(key->cipher);
+- if (cipher == MT_CIPHER_NONE)
++ if (cipher == MCU_CIPHER_NONE)
+ return -EOPNOTSUPP;
+
+ sec_key = &sec->key[0];
+@@ -815,7 +827,7 @@ out:
+ default:
+ ret = -EAGAIN;
+ dev_err(dev->mt76.dev, "Failed to release patch semaphore\n");
+- goto out;
++ break;
+ }
+ release_firmware(fw);
+
+@@ -1193,8 +1205,9 @@ int mt7921_mcu_uni_bss_ps(struct mt7921_dev *dev, struct ieee80211_vif *vif)
+ &ps_req, sizeof(ps_req), true);
+ }
+
+-int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+- bool enable)
++static int
++mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
++ bool enable)
+ {
+ struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+ struct {
+@@ -1228,8 +1241,9 @@ int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+ &bcnft_req, sizeof(bcnft_req), true);
+ }
+
+-int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+- bool enable)
++static int
++mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
++ bool enable)
+ {
+ struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+ struct {
+@@ -1292,10 +1306,8 @@ int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta,
+ return mt76_connac_mcu_sta_cmd(&dev->mphy, &info);
+ }
+
+-int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
++int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev)
+ {
+- struct mt76_phy *mphy = &dev->mt76.phy;
+- struct mt76_connac_pm *pm = &dev->pm;
+ int i, err = 0;
+
+ for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) {
+@@ -1308,9 +1320,21 @@ int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
+ if (i == MT7921_DRV_OWN_RETRY_COUNT) {
+ dev_err(dev->mt76.dev, "driver own failed\n");
+ err = -EIO;
+- goto out;
+ }
+
++ return err;
++}
++
++int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
++{
++ struct mt76_phy *mphy = &dev->mt76.phy;
++ struct mt76_connac_pm *pm = &dev->pm;
++ int err;
++
++ err = __mt7921e_mcu_drv_pmctrl(dev);
++ if (err < 0)
++ goto out;
++
+ mt7921_wpdma_reinit_cond(dev);
+ clear_bit(MT76_STATE_PM, &mphy->state);
+
+@@ -1378,31 +1402,34 @@ out:
+ return err;
+ }
+
+-void
+-mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
++int mt7921_mcu_set_beacon_filter(struct mt7921_dev *dev,
++ struct ieee80211_vif *vif,
++ bool enable)
+ {
+- struct mt7921_phy *phy = priv;
+- struct mt7921_dev *dev = phy->dev;
+ struct ieee80211_hw *hw = mt76_hw(dev);
+- int ret;
+-
+- if (dev->pm.enable)
+- ret = mt7921_mcu_uni_bss_bcnft(dev, vif, true);
+- else
+- ret = mt7921_mcu_set_bss_pm(dev, vif, false);
++ int err;
+
+- if (ret)
+- return;
++ if (enable) {
++ err = mt7921_mcu_uni_bss_bcnft(dev, vif, true);
++ if (err)
++ return err;
+
+- if (dev->pm.enable) {
+ vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER;
+ ieee80211_hw_set(hw, CONNECTION_MONITOR);
+ mt76_set(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
+- } else {
+- vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER;
+- __clear_bit(IEEE80211_HW_CONNECTION_MONITOR, hw->flags);
+- mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
++
++ return 0;
+ }
++
++ err = mt7921_mcu_set_bss_pm(dev, vif, false);
++ if (err)
++ return err;
++
++ vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER;
++ __clear_bit(IEEE80211_HW_CONNECTION_MONITOR, hw->flags);
++ mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
++
++ return 0;
+ }
+
+ int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr)
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+index de3c091f67368..42e7271848956 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+@@ -296,11 +296,11 @@ struct mt7921_txpwr_event {
+ struct mt7921_mcu_tx_done_event {
+ u8 pid;
+ u8 status;
+- u16 seq;
++ __le16 seq;
+
+ u8 wlan_idx;
+ u8 tx_cnt;
+- u16 tx_rate;
++ __le16 tx_rate;
+
+ u8 flag;
+ u8 tid;
+@@ -312,9 +312,9 @@ struct mt7921_mcu_tx_done_event {
+ u8 reason;
+ u8 rsv0[1];
+
+- u32 delay;
+- u32 timestamp;
+- u32 applied_flag;
++ __le32 delay;
++ __le32 timestamp;
++ __le32 applied_flag;
+
+ u8 txs[28];
+
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+index 2d8bd6bfc820a..6eb03d6705a1f 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+@@ -363,6 +363,9 @@ void mt7921_set_stream_he_caps(struct mt7921_phy *phy);
+ void mt7921_update_channel(struct mt76_phy *mphy);
+ int mt7921_init_debugfs(struct mt7921_dev *dev);
+
++int mt7921_mcu_set_beacon_filter(struct mt7921_dev *dev,
++ struct ieee80211_vif *vif,
++ bool enable);
+ int mt7921_mcu_uni_tx_ba(struct mt7921_dev *dev,
+ struct ieee80211_ampdu_params *params,
+ bool enable);
+@@ -371,20 +374,13 @@ int mt7921_mcu_uni_rx_ba(struct mt7921_dev *dev,
+ bool enable);
+ void mt7921_scan_work(struct work_struct *work);
+ int mt7921_mcu_uni_bss_ps(struct mt7921_dev *dev, struct ieee80211_vif *vif);
+-int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+- bool enable);
+-int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+- bool enable);
++int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev);
+ int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev);
+ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev);
+ int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev);
+ void mt7921_pm_wake_work(struct work_struct *work);
+ void mt7921_pm_power_save_work(struct work_struct *work);
+ bool mt7921_wait_for_mcu_init(struct mt7921_dev *dev);
+-int mt7921_mac_set_beacon_filter(struct mt7921_phy *phy,
+- struct ieee80211_vif *vif,
+- bool enable);
+-void mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif);
+ void mt7921_coredump_work(struct work_struct *work);
+ int mt7921_wfsys_reset(struct mt7921_dev *dev);
+ int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+index c3905bcab3604..7effee4978e99 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+@@ -88,6 +88,110 @@ static void mt7921_irq_tasklet(unsigned long data)
+ napi_schedule(&dev->mt76.napi[MT_RXQ_MAIN]);
+ }
+
++static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr)
++{
++ static const struct {
++ u32 phys;
++ u32 mapped;
++ u32 size;
++ } fixed_map[] = {
++ { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */
++ { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */
++ { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */
++ { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */
++ { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */
++ { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */
++ { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */
++ { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */
++ { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */
++ { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */
++ { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */
++ { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */
++ { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */
++ { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */
++ { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */
++ { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */
++ { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */
++ { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */
++ { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */
++ { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */
++ { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */
++ { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */
++ { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */
++ { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */
++ { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */
++ { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */
++ { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */
++ { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */
++ { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */
++ { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */
++ { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */
++ { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */
++ { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */
++ { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */
++ { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */
++ { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */
++ { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */
++ { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */
++ { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */
++ { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */
++ { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */
++ { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */
++ { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */
++ };
++ int i;
++
++ if (addr < 0x100000)
++ return addr;
++
++ for (i = 0; i < ARRAY_SIZE(fixed_map); i++) {
++ u32 ofs;
++
++ if (addr < fixed_map[i].phys)
++ continue;
++
++ ofs = addr - fixed_map[i].phys;
++ if (ofs > fixed_map[i].size)
++ continue;
++
++ return fixed_map[i].mapped + ofs;
++ }
++
++ if ((addr >= 0x18000000 && addr < 0x18c00000) ||
++ (addr >= 0x70000000 && addr < 0x78000000) ||
++ (addr >= 0x7c000000 && addr < 0x7c400000))
++ return mt7921_reg_map_l1(dev, addr);
++
++ dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n",
++ addr);
++
++ return 0;
++}
++
++static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset)
++{
++ struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
++ u32 addr = __mt7921_reg_addr(dev, offset);
++
++ return dev->bus_ops->rr(mdev, addr);
++}
++
++static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val)
++{
++ struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
++ u32 addr = __mt7921_reg_addr(dev, offset);
++
++ dev->bus_ops->wr(mdev, addr, val);
++}
++
++static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
++{
++ struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
++ u32 addr = __mt7921_reg_addr(dev, offset);
++
++ return dev->bus_ops->rmw(mdev, addr, mask, val);
++}
++
+ static int mt7921_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+ {
+@@ -110,9 +214,11 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
+ .sta_remove = mt7921_mac_sta_remove,
+ .update_survey = mt7921_update_channel,
+ };
++ struct mt76_bus_ops *bus_ops;
+ struct mt7921_dev *dev;
+ struct mt76_dev *mdev;
+ int ret;
++ u16 cmd;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+@@ -122,6 +228,11 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
+ if (ret)
+ return ret;
+
++ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
++ if (!(cmd & PCI_COMMAND_MEMORY)) {
++ cmd |= PCI_COMMAND_MEMORY;
++ pci_write_config_word(pdev, PCI_COMMAND, cmd);
++ }
+ pci_set_master(pdev);
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+@@ -145,6 +256,24 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
+
+ mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]);
+ tasklet_init(&dev->irq_tasklet, mt7921_irq_tasklet, (unsigned long)dev);
++
++ dev->bus_ops = dev->mt76.bus;
++ bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops),
++ GFP_KERNEL);
++ if (!bus_ops) {
++ ret = -ENOMEM;
++ goto err_free_dev;
++ }
++
++ bus_ops->rr = mt7921_rr;
++ bus_ops->wr = mt7921_wr;
++ bus_ops->rmw = mt7921_rmw;
++ dev->mt76.bus = bus_ops;
++
++ ret = __mt7921e_mcu_drv_pmctrl(dev);
++ if (ret)
++ goto err_free_dev;
++
+ mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) |
+ (mt7921_l1_rr(dev, MT_HW_REV) & 0xff);
+ dev_err(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+index b6944c867a573..9266fb3909ca3 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+@@ -14,19 +14,18 @@
+ #define MT_MCU_INT_EVENT_SER_TRIGGER BIT(2)
+ #define MT_MCU_INT_EVENT_RESET_DONE BIT(3)
+
+-#define MT_PLE_BASE 0x8000
++#define MT_PLE_BASE 0x820c0000
+ #define MT_PLE(ofs) (MT_PLE_BASE + (ofs))
+
+-#define MT_PLE_FL_Q0_CTRL MT_PLE(0x1b0)
+-#define MT_PLE_FL_Q1_CTRL MT_PLE(0x1b4)
+-#define MT_PLE_FL_Q2_CTRL MT_PLE(0x1b8)
+-#define MT_PLE_FL_Q3_CTRL MT_PLE(0x1bc)
++#define MT_PLE_FL_Q0_CTRL MT_PLE(0x3e0)
++#define MT_PLE_FL_Q1_CTRL MT_PLE(0x3e4)
++#define MT_PLE_FL_Q2_CTRL MT_PLE(0x3e8)
++#define MT_PLE_FL_Q3_CTRL MT_PLE(0x3ec)
+
+-#define MT_PLE_AC_QEMPTY(ac, n) MT_PLE(0x300 + 0x10 * (ac) + \
+- ((n) << 2))
++#define MT_PLE_AC_QEMPTY(_n) MT_PLE(0x500 + 0x40 * (_n))
+ #define MT_PLE_AMSDU_PACK_MSDU_CNT(n) MT_PLE(0x10e0 + ((n) << 2))
+
+-#define MT_MDP_BASE 0xf000
++#define MT_MDP_BASE 0x820cd000
+ #define MT_MDP(ofs) (MT_MDP_BASE + (ofs))
+
+ #define MT_MDP_DCR0 MT_MDP(0x000)
+@@ -49,7 +48,7 @@
+ #define MT_MDP_TO_WM 1
+
+ /* TMAC: band 0(0x21000), band 1(0xa1000) */
+-#define MT_WF_TMAC_BASE(_band) ((_band) ? 0xa1000 : 0x21000)
++#define MT_WF_TMAC_BASE(_band) ((_band) ? 0x820f4000 : 0x820e4000)
+ #define MT_WF_TMAC(_band, ofs) (MT_WF_TMAC_BASE(_band) + (ofs))
+
+ #define MT_TMAC_TCR0(_band) MT_WF_TMAC(_band, 0)
+@@ -74,7 +73,7 @@
+ #define MT_TMAC_TRCR0(_band) MT_WF_TMAC(_band, 0x09c)
+ #define MT_TMAC_TFCR0(_band) MT_WF_TMAC(_band, 0x1e0)
+
+-#define MT_WF_DMA_BASE(_band) ((_band) ? 0xa1e00 : 0x21e00)
++#define MT_WF_DMA_BASE(_band) ((_band) ? 0x820f7000 : 0x820e7000)
+ #define MT_WF_DMA(_band, ofs) (MT_WF_DMA_BASE(_band) + (ofs))
+
+ #define MT_DMA_DCR0(_band) MT_WF_DMA(_band, 0x000)
+@@ -82,7 +81,7 @@
+ #define MT_DMA_DCR0_RXD_G5_EN BIT(23)
+
+ /* LPON: band 0(0x24200), band 1(0xa4200) */
+-#define MT_WF_LPON_BASE(_band) ((_band) ? 0xa4200 : 0x24200)
++#define MT_WF_LPON_BASE(_band) ((_band) ? 0x820fb000 : 0x820eb000)
+ #define MT_WF_LPON(_band, ofs) (MT_WF_LPON_BASE(_band) + (ofs))
+
+ #define MT_LPON_UTTR0(_band) MT_WF_LPON(_band, 0x080)
+@@ -93,9 +92,13 @@
+ #define MT_LPON_TCR_SW_WRITE BIT(0)
+
+ /* MIB: band 0(0x24800), band 1(0xa4800) */
+-#define MT_WF_MIB_BASE(_band) ((_band) ? 0xa4800 : 0x24800)
++#define MT_WF_MIB_BASE(_band) ((_band) ? 0x820fd000 : 0x820ed000)
+ #define MT_WF_MIB(_band, ofs) (MT_WF_MIB_BASE(_band) + (ofs))
+
++#define MT_MIB_SCR1(_band) MT_WF_MIB(_band, 0x004)
++#define MT_MIB_TXDUR_EN BIT(8)
++#define MT_MIB_RXDUR_EN BIT(9)
++
+ #define MT_MIB_SDR3(_band) MT_WF_MIB(_band, 0x698)
+ #define MT_MIB_SDR3_FCS_ERR_MASK GENMASK(31, 16)
+
+@@ -108,9 +111,9 @@
+ #define MT_MIB_SDR34(_band) MT_WF_MIB(_band, 0x090)
+ #define MT_MIB_MU_BF_TX_CNT GENMASK(15, 0)
+
+-#define MT_MIB_SDR36(_band) MT_WF_MIB(_band, 0x098)
++#define MT_MIB_SDR36(_band) MT_WF_MIB(_band, 0x054)
+ #define MT_MIB_SDR36_TXTIME_MASK GENMASK(23, 0)
+-#define MT_MIB_SDR37(_band) MT_WF_MIB(_band, 0x09c)
++#define MT_MIB_SDR37(_band) MT_WF_MIB(_band, 0x058)
+ #define MT_MIB_SDR37_RXTIME_MASK GENMASK(23, 0)
+
+ #define MT_MIB_DR8(_band) MT_WF_MIB(_band, 0x0c0)
+@@ -138,7 +141,7 @@
+ #define MT_MIB_ARNG(_band, n) MT_WF_MIB(_band, 0x0b0 + ((n) << 2))
+ #define MT_MIB_ARNCR_RANGE(val, n) (((val) >> ((n) << 3)) & GENMASK(7, 0))
+
+-#define MT_WTBLON_TOP_BASE 0x34000
++#define MT_WTBLON_TOP_BASE 0x820d4000
+ #define MT_WTBLON_TOP(ofs) (MT_WTBLON_TOP_BASE + (ofs))
+ #define MT_WTBLON_TOP_WDUCR MT_WTBLON_TOP(0x200)
+ #define MT_WTBLON_TOP_WDUCR_GROUP GENMASK(2, 0)
+@@ -148,7 +151,7 @@
+ #define MT_WTBL_UPDATE_ADM_COUNT_CLEAR BIT(12)
+ #define MT_WTBL_UPDATE_BUSY BIT(31)
+
+-#define MT_WTBL_BASE 0x38000
++#define MT_WTBL_BASE 0x820d8000
+ #define MT_WTBL_LMAC_ID GENMASK(14, 8)
+ #define MT_WTBL_LMAC_DW GENMASK(7, 2)
+ #define MT_WTBL_LMAC_OFFS(_id, _dw) (MT_WTBL_BASE | \
+@@ -156,7 +159,7 @@
+ FIELD_PREP(MT_WTBL_LMAC_DW, _dw))
+
+ /* AGG: band 0(0x20800), band 1(0xa0800) */
+-#define MT_WF_AGG_BASE(_band) ((_band) ? 0xa0800 : 0x20800)
++#define MT_WF_AGG_BASE(_band) ((_band) ? 0x820f2000 : 0x820e2000)
+ #define MT_WF_AGG(_band, ofs) (MT_WF_AGG_BASE(_band) + (ofs))
+
+ #define MT_AGG_AWSCR0(_band, _n) MT_WF_AGG(_band, 0x05c + (_n) * 4)
+@@ -187,7 +190,7 @@
+ #define MT_AGG_ATCR3(_band) MT_WF_AGG(_band, 0x0f4)
+
+ /* ARB: band 0(0x20c00), band 1(0xa0c00) */
+-#define MT_WF_ARB_BASE(_band) ((_band) ? 0xa0c00 : 0x20c00)
++#define MT_WF_ARB_BASE(_band) ((_band) ? 0x820f3000 : 0x820e3000)
+ #define MT_WF_ARB(_band, ofs) (MT_WF_ARB_BASE(_band) + (ofs))
+
+ #define MT_ARB_SCR(_band) MT_WF_ARB(_band, 0x080)
+@@ -197,7 +200,7 @@
+ #define MT_ARB_DRNGR0(_band, _n) MT_WF_ARB(_band, 0x194 + (_n) * 4)
+
+ /* RMAC: band 0(0x21400), band 1(0xa1400) */
+-#define MT_WF_RMAC_BASE(_band) ((_band) ? 0xa1400 : 0x21400)
++#define MT_WF_RMAC_BASE(_band) ((_band) ? 0x820f5000 : 0x820e5000)
+ #define MT_WF_RMAC(_band, ofs) (MT_WF_RMAC_BASE(_band) + (ofs))
+
+ #define MT_WF_RFCR(_band) MT_WF_RMAC(_band, 0x000)
+diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c
+index 783a15635ec52..9e639d0b9c631 100644
+--- a/drivers/net/wireless/mediatek/mt76/sdio.c
++++ b/drivers/net/wireless/mediatek/mt76/sdio.c
+@@ -213,7 +213,7 @@ static void mt76s_status_worker(struct mt76_worker *w)
+ } while (nframes > 0);
+
+ if (resched)
+- mt76_worker_schedule(&dev->sdio.txrx_worker);
++ mt76_worker_schedule(&dev->tx_worker);
+ }
+
+ static void mt76s_tx_status_data(struct work_struct *work)
+diff --git a/drivers/net/wireless/mediatek/mt76/testmode.c b/drivers/net/wireless/mediatek/mt76/testmode.c
+index f73ffbd6e622d..0109433e8c2fe 100644
+--- a/drivers/net/wireless/mediatek/mt76/testmode.c
++++ b/drivers/net/wireless/mediatek/mt76/testmode.c
+@@ -6,6 +6,7 @@ static const struct nla_policy mt76_tm_policy[NUM_MT76_TM_ATTRS] = {
+ [MT76_TM_ATTR_RESET] = { .type = NLA_FLAG },
+ [MT76_TM_ATTR_STATE] = { .type = NLA_U8 },
+ [MT76_TM_ATTR_TX_COUNT] = { .type = NLA_U32 },
++ [MT76_TM_ATTR_TX_LENGTH] = { .type = NLA_U32 },
+ [MT76_TM_ATTR_TX_RATE_MODE] = { .type = NLA_U8 },
+ [MT76_TM_ATTR_TX_RATE_NSS] = { .type = NLA_U8 },
+ [MT76_TM_ATTR_TX_RATE_IDX] = { .type = NLA_U8 },
+diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
+index f0f7a913eaabf..7d126634547f1 100644
+--- a/drivers/net/wireless/mediatek/mt76/tx.c
++++ b/drivers/net/wireless/mediatek/mt76/tx.c
+@@ -68,7 +68,9 @@ mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list)
+ status.sta = wcid_to_sta(wcid);
+
+ hw = mt76_tx_status_get_hw(dev, skb);
++ spin_lock_bh(&dev->rx_lock);
+ ieee80211_tx_status_ext(hw, &status);
++ spin_unlock_bh(&dev->rx_lock);
+ }
+ rcu_read_unlock();
+ }
+@@ -229,7 +231,9 @@ void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *
+ if (!skb->prev) {
+ hw = mt76_tx_status_get_hw(dev, skb);
+ status.sta = wcid_to_sta(wcid);
++ spin_lock_bh(&dev->rx_lock);
+ ieee80211_tx_status_ext(hw, &status);
++ spin_unlock_bh(&dev->rx_lock);
+ goto out;
+ }
+
+@@ -406,12 +410,11 @@ mt76_txq_stopped(struct mt76_queue *q)
+
+ static int
+ mt76_txq_send_burst(struct mt76_phy *phy, struct mt76_queue *q,
+- struct mt76_txq *mtxq)
++ struct mt76_txq *mtxq, struct mt76_wcid *wcid)
+ {
+ struct mt76_dev *dev = phy->dev;
+ struct ieee80211_txq *txq = mtxq_to_txq(mtxq);
+ enum mt76_txq_id qid = mt76_txq_get_qid(txq);
+- struct mt76_wcid *wcid = mtxq->wcid;
+ struct ieee80211_tx_info *info;
+ struct sk_buff *skb;
+ int n_frames = 1;
+@@ -491,8 +494,8 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid)
+ break;
+
+ mtxq = (struct mt76_txq *)txq->drv_priv;
+- wcid = mtxq->wcid;
+- if (wcid && test_bit(MT_WCID_FLAG_PS, &wcid->flags))
++ wcid = rcu_dereference(dev->wcid[mtxq->wcid]);
++ if (!wcid || test_bit(MT_WCID_FLAG_PS, &wcid->flags))
+ continue;
+
+ spin_lock_bh(&q->lock);
+@@ -511,7 +514,7 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid)
+ }
+
+ if (!mt76_txq_stopped(q))
+- n_frames = mt76_txq_send_burst(phy, q, mtxq);
++ n_frames = mt76_txq_send_burst(phy, q, mtxq, wcid);
+
+ spin_unlock_bh(&q->lock);
+
+diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
+index 1e9f60bb811ad..b47343e321b81 100644
+--- a/drivers/net/wireless/mediatek/mt76/usb.c
++++ b/drivers/net/wireless/mediatek/mt76/usb.c
+@@ -814,6 +814,9 @@ static void mt76u_status_worker(struct mt76_worker *w)
+ struct mt76_queue *q;
+ int i;
+
++ if (!test_bit(MT76_STATE_RUNNING, &dev->phy.state))
++ return;
++
+ for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ q = dev->phy.q_tx[i];
+ if (!q)
+@@ -833,11 +836,11 @@ static void mt76u_status_worker(struct mt76_worker *w)
+ wake_up(&dev->tx_wait);
+
+ mt76_worker_schedule(&dev->tx_worker);
+-
+- if (dev->drv->tx_status_data &&
+- !test_and_set_bit(MT76_READING_STATS, &dev->phy.state))
+- queue_work(dev->wq, &dev->usb.stat_work);
+ }
++
++ if (dev->drv->tx_status_data &&
++ !test_and_set_bit(MT76_READING_STATS, &dev->phy.state))
++ queue_work(dev->wq, &dev->usb.stat_work);
+ }
+
+ static void mt76u_tx_status_data(struct work_struct *work)
+diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c
+index 581964425468f..fc76c66ff1a5a 100644
+--- a/drivers/net/wireless/mediatek/mt76/util.c
++++ b/drivers/net/wireless/mediatek/mt76/util.c
+@@ -24,23 +24,23 @@ bool __mt76_poll(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
+ }
+ EXPORT_SYMBOL_GPL(__mt76_poll);
+
+-bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
+- int timeout)
++bool ____mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
++ int timeout, int tick)
+ {
+ u32 cur;
+
+- timeout /= 10;
++ timeout /= tick;
+ do {
+ cur = __mt76_rr(dev, offset) & mask;
+ if (cur == val)
+ return true;
+
+- usleep_range(10000, 20000);
++ usleep_range(1000 * tick, 2000 * tick);
+ } while (timeout-- > 0);
+
+ return false;
+ }
+-EXPORT_SYMBOL_GPL(__mt76_poll_msec);
++EXPORT_SYMBOL_GPL(____mt76_poll_msec);
+
+ int mt76_wcid_alloc(u32 *mask, int size)
+ {
+diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
+index ed78d2cb35e3c..fd3b768ca92bd 100644
+--- a/drivers/net/wireless/mediatek/mt7601u/dma.c
++++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
+@@ -123,7 +123,8 @@ static u16 mt7601u_rx_next_seg_len(u8 *data, u32 data_len)
+ if (data_len < min_seg_len ||
+ WARN_ON_ONCE(!dma_len) ||
+ WARN_ON_ONCE(dma_len + MT_DMA_HDRS > data_len) ||
+- WARN_ON_ONCE(dma_len & 0x3))
++ WARN_ON_ONCE(dma_len & 0x3) ||
++ WARN_ON_ONCE(dma_len < min_seg_len))
+ return 0;
+
+ return MT_DMA_HDRS + dma_len;
+diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
+index 96973ec7bd9ac..1688144d78475 100644
+--- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c
++++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
+@@ -129,8 +129,7 @@ static void cfg_scan_result(enum scan_event scan_event,
+ info->frame_len,
+ (s32)info->rssi * 100,
+ GFP_KERNEL);
+- if (!bss)
+- cfg80211_put_bss(wiphy, bss);
++ cfg80211_put_bss(wiphy, bss);
+ } else if (scan_event == SCAN_EVENT_DONE) {
+ mutex_lock(&priv->scan_req_lock);
+
+@@ -940,30 +939,52 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch)
+ return;
+
+ while (index + sizeof(*e) <= len) {
++ u16 attr_size;
++
+ e = (struct wilc_attr_entry *)&buf[index];
+- if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST)
++ attr_size = le16_to_cpu(e->attr_len);
++
++ if (index + sizeof(*e) + attr_size > len)
++ return;
++
++ if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST &&
++ attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e)))
+ ch_list_idx = index;
+- else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL)
++ else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL &&
++ attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e)))
+ op_ch_idx = index;
++
+ if (ch_list_idx && op_ch_idx)
+ break;
+- index += le16_to_cpu(e->attr_len) + sizeof(*e);
++
++ index += sizeof(*e) + attr_size;
+ }
+
+ if (ch_list_idx) {
+- u16 attr_size;
+- struct wilc_ch_list_elem *e;
+- int i;
++ unsigned int i;
++ u16 elem_size;
+
+ ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx];
+- attr_size = le16_to_cpu(ch_list->attr_len);
+- for (i = 0; i < attr_size;) {
++ /* the number of bytes following the final 'elem' member */
++ elem_size = le16_to_cpu(ch_list->attr_len) -
++ (sizeof(*ch_list) - sizeof(struct wilc_attr_entry));
++ for (i = 0; i < elem_size;) {
++ struct wilc_ch_list_elem *e;
++
+ e = (struct wilc_ch_list_elem *)(ch_list->elem + i);
++
++ i += sizeof(*e);
++ if (i > elem_size)
++ break;
++
++ i += e->no_of_channels;
++ if (i > elem_size)
++ break;
++
+ if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) {
+ memset(e->ch_list, sta_ch, e->no_of_channels);
+ break;
+ }
+- i += e->no_of_channels;
+ }
+ }
+
+diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
+index a133736a78215..a7bca0475e1ee 100644
+--- a/drivers/net/wireless/microchip/wilc1000/hif.c
++++ b/drivers/net/wireless/microchip/wilc1000/hif.c
+@@ -467,14 +467,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss,
+
+ rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len);
+ if (rsn_ie) {
++ int rsn_ie_len = sizeof(struct element) + rsn_ie[1];
+ int offset = 8;
+
+ param->mode_802_11i = 2;
+ param->rsn_found = true;
++
+ /* extract RSN capabilities */
+- offset += (rsn_ie[offset] * 4) + 2;
+- offset += (rsn_ie[offset] * 4) + 2;
+- memcpy(param->rsn_cap, &rsn_ie[offset], 2);
++ if (offset < rsn_ie_len) {
++ /* skip over pairwise suites */
++ offset += (rsn_ie[offset] * 4) + 2;
++
++ if (offset < rsn_ie_len) {
++ /* skip over authentication suites */
++ offset += (rsn_ie[offset] * 4) + 2;
++
++ if (offset + 1 < rsn_ie_len)
++ memcpy(param->rsn_cap, &rsn_ie[offset], 2);
++ }
++ }
+ }
+
+ if (param->rsn_found) {
+diff --git a/drivers/net/wireless/microchip/wilc1000/mon.c b/drivers/net/wireless/microchip/wilc1000/mon.c
+index 6bd63934c2d84..b5a1b65c087ca 100644
+--- a/drivers/net/wireless/microchip/wilc1000/mon.c
++++ b/drivers/net/wireless/microchip/wilc1000/mon.c
+@@ -233,7 +233,7 @@ struct net_device *wilc_wfi_init_mon_interface(struct wilc *wl,
+ wl->monitor_dev->netdev_ops = &wilc_wfi_netdev_ops;
+ wl->monitor_dev->needs_free_netdev = true;
+
+- if (cfg80211_register_netdevice(wl->monitor_dev)) {
++ if (register_netdevice(wl->monitor_dev)) {
+ netdev_err(real_dev, "register_netdevice failed\n");
+ free_netdev(wl->monitor_dev);
+ return NULL;
+@@ -251,7 +251,7 @@ void wilc_wfi_deinit_mon_interface(struct wilc *wl, bool rtnl_locked)
+ return;
+
+ if (rtnl_locked)
+- cfg80211_unregister_netdevice(wl->monitor_dev);
++ unregister_netdevice(wl->monitor_dev);
+ else
+ unregister_netdev(wl->monitor_dev);
+ wl->monitor_dev = NULL;
+diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c
+index 7e4d9235251cb..5e3ec20e24dad 100644
+--- a/drivers/net/wireless/microchip/wilc1000/netdev.c
++++ b/drivers/net/wireless/microchip/wilc1000/netdev.c
+@@ -724,6 +724,7 @@ netdev_tx_t wilc_mac_xmit(struct sk_buff *skb, struct net_device *ndev)
+
+ if (skb->dev != ndev) {
+ netdev_err(ndev, "Packet not destined to this device\n");
++ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+@@ -901,7 +902,6 @@ void wilc_netdev_cleanup(struct wilc *wilc)
+
+ wilc_wlan_cfg_deinit(wilc);
+ wlan_deinit_locks(wilc);
+- kfree(wilc->bus_data);
+ wiphy_unregister(wilc->wiphy);
+ wiphy_free(wilc->wiphy);
+ }
+diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.h b/drivers/net/wireless/microchip/wilc1000/netdev.h
+index 86209b391a3d6..e6e23fc585ee8 100644
+--- a/drivers/net/wireless/microchip/wilc1000/netdev.h
++++ b/drivers/net/wireless/microchip/wilc1000/netdev.h
+@@ -252,6 +252,7 @@ struct wilc {
+ u8 *rx_buffer;
+ u32 rx_buffer_offset;
+ u8 *tx_buffer;
++ u32 *vmm_table;
+
+ struct txq_handle txq[NQUEUES];
+ int txq_entries;
+diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c
+index 42e03a701ae16..cb4efbfd0811f 100644
+--- a/drivers/net/wireless/microchip/wilc1000/sdio.c
++++ b/drivers/net/wireless/microchip/wilc1000/sdio.c
+@@ -20,6 +20,7 @@ static const struct sdio_device_id wilc_sdio_ids[] = {
+ { SDIO_DEVICE(SDIO_VENDOR_ID_MICROCHIP_WILC, SDIO_DEVICE_ID_MICROCHIP_WILC1000) },
+ { },
+ };
++MODULE_DEVICE_TABLE(sdio, wilc_sdio_ids);
+
+ #define WILC_SDIO_BLOCK_SIZE 512
+
+@@ -27,6 +28,7 @@ struct wilc_sdio {
+ bool irq_gpio;
+ u32 block_size;
+ int has_thrpt_enh3;
++ u8 *cmd53_buf;
+ };
+
+ struct sdio_cmd52 {
+@@ -46,6 +48,7 @@ struct sdio_cmd53 {
+ u32 count: 9;
+ u8 *buffer;
+ u32 block_size;
++ bool use_global_buf;
+ };
+
+ static const struct wilc_hif_func wilc_hif_sdio;
+@@ -90,6 +93,8 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd)
+ {
+ struct sdio_func *func = container_of(wilc->dev, struct sdio_func, dev);
+ int size, ret;
++ struct wilc_sdio *sdio_priv = wilc->bus_data;
++ u8 *buf = cmd->buffer;
+
+ sdio_claim_host(func);
+
+@@ -100,12 +105,23 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd)
+ else
+ size = cmd->count;
+
++ if (cmd->use_global_buf) {
++ if (size > sizeof(u32))
++ return -EINVAL;
++
++ buf = sdio_priv->cmd53_buf;
++ }
++
+ if (cmd->read_write) { /* write */
+- ret = sdio_memcpy_toio(func, cmd->address,
+- (void *)cmd->buffer, size);
++ if (cmd->use_global_buf)
++ memcpy(buf, cmd->buffer, size);
++
++ ret = sdio_memcpy_toio(func, cmd->address, buf, size);
+ } else { /* read */
+- ret = sdio_memcpy_fromio(func, (void *)cmd->buffer,
+- cmd->address, size);
++ ret = sdio_memcpy_fromio(func, buf, cmd->address, size);
++
++ if (cmd->use_global_buf)
++ memcpy(cmd->buffer, buf, size);
+ }
+
+ sdio_release_host(func);
+@@ -127,6 +143,12 @@ static int wilc_sdio_probe(struct sdio_func *func,
+ if (!sdio_priv)
+ return -ENOMEM;
+
++ sdio_priv->cmd53_buf = kzalloc(sizeof(u32), GFP_KERNEL);
++ if (!sdio_priv->cmd53_buf) {
++ ret = -ENOMEM;
++ goto free;
++ }
++
+ ret = wilc_cfg80211_init(&wilc, &func->dev, WILC_HIF_SDIO,
+ &wilc_hif_sdio);
+ if (ret)
+@@ -160,6 +182,7 @@ dispose_irq:
+ irq_dispose_mapping(wilc->dev_irq_num);
+ wilc_netdev_cleanup(wilc);
+ free:
++ kfree(sdio_priv->cmd53_buf);
+ kfree(sdio_priv);
+ return ret;
+ }
+@@ -167,9 +190,12 @@ free:
+ static void wilc_sdio_remove(struct sdio_func *func)
+ {
+ struct wilc *wilc = sdio_get_drvdata(func);
++ struct wilc_sdio *sdio_priv = wilc->bus_data;
+
+ clk_disable_unprepare(wilc->rtc_clk);
+ wilc_netdev_cleanup(wilc);
++ kfree(sdio_priv->cmd53_buf);
++ kfree(sdio_priv);
+ }
+
+ static int wilc_sdio_reset(struct wilc *wilc)
+@@ -365,8 +391,9 @@ static int wilc_sdio_write_reg(struct wilc *wilc, u32 addr, u32 data)
+ cmd.address = WILC_SDIO_FBR_DATA_REG;
+ cmd.block_mode = 0;
+ cmd.increment = 1;
+- cmd.count = 4;
++ cmd.count = sizeof(u32);
+ cmd.buffer = (u8 *)&data;
++ cmd.use_global_buf = true;
+ cmd.block_size = sdio_priv->block_size;
+ ret = wilc_sdio_cmd53(wilc, &cmd);
+ if (ret)
+@@ -404,6 +431,7 @@ static int wilc_sdio_write(struct wilc *wilc, u32 addr, u8 *buf, u32 size)
+ nblk = size / block_size;
+ nleft = size % block_size;
+
++ cmd.use_global_buf = false;
+ if (nblk > 0) {
+ cmd.block_mode = 1;
+ cmd.increment = 1;
+@@ -482,8 +510,9 @@ static int wilc_sdio_read_reg(struct wilc *wilc, u32 addr, u32 *data)
+ cmd.address = WILC_SDIO_FBR_DATA_REG;
+ cmd.block_mode = 0;
+ cmd.increment = 1;
+- cmd.count = 4;
++ cmd.count = sizeof(u32);
+ cmd.buffer = (u8 *)data;
++ cmd.use_global_buf = true;
+
+ cmd.block_size = sdio_priv->block_size;
+ ret = wilc_sdio_cmd53(wilc, &cmd);
+@@ -525,6 +554,7 @@ static int wilc_sdio_read(struct wilc *wilc, u32 addr, u8 *buf, u32 size)
+ nblk = size / block_size;
+ nleft = size % block_size;
+
++ cmd.use_global_buf = false;
+ if (nblk > 0) {
+ cmd.block_mode = 1;
+ cmd.increment = 1;
+diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
+index dd481dc0b5ce0..c98c0999a6b67 100644
+--- a/drivers/net/wireless/microchip/wilc1000/spi.c
++++ b/drivers/net/wireless/microchip/wilc1000/spi.c
+@@ -182,9 +182,11 @@ free:
+ static int wilc_bus_remove(struct spi_device *spi)
+ {
+ struct wilc *wilc = spi_get_drvdata(spi);
++ struct wilc_spi *spi_priv = wilc->bus_data;
+
+ clk_disable_unprepare(wilc->rtc_clk);
+ wilc_netdev_cleanup(wilc);
++ kfree(spi_priv);
+
+ return 0;
+ }
+diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
+index 200a103a0a858..380699983a75b 100644
+--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
++++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
+@@ -701,7 +701,7 @@ int wilc_wlan_handle_txq(struct wilc *wilc, u32 *txq_count)
+ int ret = 0;
+ int counter;
+ int timeout;
+- u32 vmm_table[WILC_VMM_TBL_SIZE];
++ u32 *vmm_table = wilc->vmm_table;
+ u8 ac_pkt_num_to_chip[NQUEUES] = {0, 0, 0, 0};
+ const struct wilc_hif_func *func;
+ int srcu_idx;
+@@ -1220,6 +1220,8 @@ void wilc_wlan_cleanup(struct net_device *dev)
+ while ((rqe = wilc_wlan_rxq_remove(wilc)))
+ kfree(rqe);
+
++ kfree(wilc->vmm_table);
++ wilc->vmm_table = NULL;
+ kfree(wilc->rx_buffer);
+ wilc->rx_buffer = NULL;
+ kfree(wilc->tx_buffer);
+@@ -1455,6 +1457,14 @@ int wilc_wlan_init(struct net_device *dev)
+ goto fail;
+ }
+
++ if (!wilc->vmm_table)
++ wilc->vmm_table = kzalloc(WILC_VMM_TBL_SIZE, GFP_KERNEL);
++
++ if (!wilc->vmm_table) {
++ ret = -ENOBUFS;
++ goto fail;
++ }
++
+ if (!wilc->tx_buffer)
+ wilc->tx_buffer = kmalloc(WILC_TX_BUFF_SIZE, GFP_KERNEL);
+
+@@ -1479,7 +1489,8 @@ int wilc_wlan_init(struct net_device *dev)
+ return 0;
+
+ fail:
+-
++ kfree(wilc->vmm_table);
++ wilc->vmm_table = NULL;
+ kfree(wilc->rx_buffer);
+ wilc->rx_buffer = NULL;
+ kfree(wilc->tx_buffer);
+diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+index deddb0afd3128..34788bfb34b7a 100644
+--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
++++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+@@ -4164,7 +4164,10 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev,
+ rt2800_bbp_write(rt2x00dev, 62, 0x37 - rt2x00dev->lna_gain);
+ rt2800_bbp_write(rt2x00dev, 63, 0x37 - rt2x00dev->lna_gain);
+ rt2800_bbp_write(rt2x00dev, 64, 0x37 - rt2x00dev->lna_gain);
+- rt2800_bbp_write(rt2x00dev, 86, 0);
++ if (rt2x00_rt(rt2x00dev, RT6352))
++ rt2800_bbp_write(rt2x00dev, 86, 0x38);
++ else
++ rt2800_bbp_write(rt2x00dev, 86, 0);
+ }
+
+ if (rf->channel <= 14) {
+@@ -4365,7 +4368,8 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev,
+ reg = (rf->channel <= 14 ? 0x1c : 0x24) + 2*rt2x00dev->lna_gain;
+ rt2800_bbp_write_with_rx_chain(rt2x00dev, 66, reg);
+
+- rt2800_iq_calibrate(rt2x00dev, rf->channel);
++ if (rt2x00_rt(rt2x00dev, RT5592))
++ rt2800_iq_calibrate(rt2x00dev, rf->channel);
+ }
+
+ bbp = rt2800_bbp_read(rt2x00dev, 4);
+@@ -5644,7 +5648,8 @@ static inline void rt2800_set_vgc(struct rt2x00_dev *rt2x00dev,
+ if (qual->vgc_level != vgc_level) {
+ if (rt2x00_rt(rt2x00dev, RT3572) ||
+ rt2x00_rt(rt2x00dev, RT3593) ||
+- rt2x00_rt(rt2x00dev, RT3883)) {
++ rt2x00_rt(rt2x00dev, RT3883) ||
++ rt2x00_rt(rt2x00dev, RT6352)) {
+ rt2800_bbp_write_with_rx_chain(rt2x00dev, 66,
+ vgc_level);
+ } else if (rt2x00_rt(rt2x00dev, RT5592)) {
+@@ -5867,7 +5872,7 @@ static int rt2800_init_registers(struct rt2x00_dev *rt2x00dev)
+ rt2800_register_write(rt2x00dev, TX_SW_CFG0, 0x00000404);
+ } else if (rt2x00_rt(rt2x00dev, RT6352)) {
+ rt2800_register_write(rt2x00dev, TX_SW_CFG0, 0x00000401);
+- rt2800_register_write(rt2x00dev, TX_SW_CFG1, 0x000C0000);
++ rt2800_register_write(rt2x00dev, TX_SW_CFG1, 0x000C0001);
+ rt2800_register_write(rt2x00dev, TX_SW_CFG2, 0x00000000);
+ rt2800_register_write(rt2x00dev, TX_ALC_VGA3, 0x00000000);
+ rt2800_register_write(rt2x00dev, TX0_BB_GAIN_ATTEN, 0x0);
+@@ -6129,6 +6134,27 @@ static int rt2800_init_registers(struct rt2x00_dev *rt2x00dev)
+ reg = rt2800_register_read(rt2x00dev, US_CYC_CNT);
+ rt2x00_set_field32(&reg, US_CYC_CNT_CLOCK_CYCLE, 125);
+ rt2800_register_write(rt2x00dev, US_CYC_CNT, reg);
++ } else if (rt2x00_is_soc(rt2x00dev)) {
++ struct clk *clk = clk_get_sys("bus", NULL);
++ int rate;
++
++ if (IS_ERR(clk)) {
++ clk = clk_get_sys("cpu", NULL);
++
++ if (IS_ERR(clk)) {
++ rate = 125;
++ } else {
++ rate = clk_get_rate(clk) / 3000000;
++ clk_put(clk);
++ }
++ } else {
++ rate = clk_get_rate(clk) / 1000000;
++ clk_put(clk);
++ }
++
++ reg = rt2800_register_read(rt2x00dev, US_CYC_CNT);
++ rt2x00_set_field32(&reg, US_CYC_CNT_CLOCK_CYCLE, rate);
++ rt2800_register_write(rt2x00dev, US_CYC_CNT, reg);
+ }
+
+ reg = rt2800_register_read(rt2x00dev, HT_FBK_CFG0);
+diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c
+index e95c101c27111..388675d073ce2 100644
+--- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c
++++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c
+@@ -1091,6 +1091,7 @@ static void rt2x00lib_remove_hw(struct rt2x00_dev *rt2x00dev)
+ }
+
+ kfree(rt2x00dev->spec.channels_info);
++ kfree(rt2x00dev->chan_survey);
+ }
+
+ static int rt2x00lib_probe_hw(struct rt2x00_dev *rt2x00dev)
+diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c
+index e4473a5512415..74c3d8cb31002 100644
+--- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c
++++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c
+@@ -25,6 +25,9 @@ static bool rt2x00usb_check_usb_error(struct rt2x00_dev *rt2x00dev, int status)
+ if (status == -ENODEV || status == -ENOENT)
+ return true;
+
++ if (!test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags))
++ return false;
++
+ if (status == -EPROTO || status == -ETIMEDOUT)
+ rt2x00dev->num_proto_errs++;
+ else
+diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
+index 0f5009c47cd0a..96330a6c066be 100644
+--- a/drivers/net/wireless/ray_cs.c
++++ b/drivers/net/wireless/ray_cs.c
+@@ -270,13 +270,14 @@ static int ray_probe(struct pcmcia_device *p_dev)
+ {
+ ray_dev_t *local;
+ struct net_device *dev;
++ int ret;
+
+ dev_dbg(&p_dev->dev, "ray_attach()\n");
+
+ /* Allocate space for private device-specific data */
+ dev = alloc_etherdev(sizeof(ray_dev_t));
+ if (!dev)
+- goto fail_alloc_dev;
++ return -ENOMEM;
+
+ local = netdev_priv(dev);
+ local->finder = p_dev;
+@@ -313,11 +314,16 @@ static int ray_probe(struct pcmcia_device *p_dev)
+ timer_setup(&local->timer, NULL, 0);
+
+ this_device = p_dev;
+- return ray_config(p_dev);
++ ret = ray_config(p_dev);
++ if (ret)
++ goto err_free_dev;
++
++ return 0;
+
+-fail_alloc_dev:
+- return -ENOMEM;
+-} /* ray_attach */
++err_free_dev:
++ free_netdev(dev);
++ return ret;
++}
+
+ static void ray_detach(struct pcmcia_device *link)
+ {
+@@ -382,6 +388,8 @@ static int ray_config(struct pcmcia_device *link)
+ goto failed;
+ local->sram = ioremap(link->resource[2]->start,
+ resource_size(link->resource[2]));
++ if (!local->sram)
++ goto failed;
+
+ /*** Set up 16k window for shared memory (receive buffer) ***************/
+ link->resource[3]->flags |=
+@@ -396,6 +404,8 @@ static int ray_config(struct pcmcia_device *link)
+ goto failed;
+ local->rmem = ioremap(link->resource[3]->start,
+ resource_size(link->resource[3]));
++ if (!local->rmem)
++ goto failed;
+
+ /*** Set up window for attribute memory ***********************************/
+ link->resource[4]->flags |=
+@@ -410,6 +420,8 @@ static int ray_config(struct pcmcia_device *link)
+ goto failed;
+ local->amem = ioremap(link->resource[4]->start,
+ resource_size(link->resource[4]));
++ if (!local->amem)
++ goto failed;
+
+ dev_dbg(&link->dev, "ray_config sram=%p\n", local->sram);
+ dev_dbg(&link->dev, "ray_config rmem=%p\n", local->rmem);
+@@ -1637,38 +1649,34 @@ static void authenticate_timeout(struct timer_list *t)
+ /*===========================================================================*/
+ static int parse_addr(char *in_str, UCHAR *out)
+ {
++ int i, k;
+ int len;
+- int i, j, k;
+- int status;
+
+ if (in_str == NULL)
+ return 0;
+- if ((len = strlen(in_str)) < 2)
++ len = strnlen(in_str, ADDRLEN * 2 + 1) - 1;
++ if (len < 1)
+ return 0;
+ memset(out, 0, ADDRLEN);
+
+- status = 1;
+- j = len - 1;
+- if (j > 12)
+- j = 12;
+ i = 5;
+
+- while (j > 0) {
+- if ((k = hex_to_bin(in_str[j--])) != -1)
++ while (len > 0) {
++ if ((k = hex_to_bin(in_str[len--])) != -1)
+ out[i] = k;
+ else
+ return 0;
+
+- if (j == 0)
++ if (len == 0)
+ break;
+- if ((k = hex_to_bin(in_str[j--])) != -1)
++ if ((k = hex_to_bin(in_str[len--])) != -1)
+ out[i] += k << 4;
+ else
+ return 0;
+ if (!i--)
+ break;
+ }
+- return status;
++ return 1;
+ }
+
+ /*===========================================================================*/
+diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c
+index 2477e18c7caec..025619cd14e82 100644
+--- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c
++++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c
+@@ -460,8 +460,10 @@ static void rtl8180_tx(struct ieee80211_hw *dev,
+ struct rtl8180_priv *priv = dev->priv;
+ struct rtl8180_tx_ring *ring;
+ struct rtl8180_tx_desc *entry;
++ unsigned int prio = 0;
+ unsigned long flags;
+- unsigned int idx, prio, hw_prio;
++ unsigned int idx, hw_prio;
++
+ dma_addr_t mapping;
+ u32 tx_flags;
+ u8 rc_flags;
+@@ -470,7 +472,9 @@ static void rtl8180_tx(struct ieee80211_hw *dev,
+ /* do arithmetic and then convert to le16 */
+ u16 frame_duration = 0;
+
+- prio = skb_get_queue_mapping(skb);
++ /* rtl8180/rtl8185 only has one useable tx queue */
++ if (dev->queues > IEEE80211_AC_BK)
++ prio = skb_get_queue_mapping(skb);
+ ring = &priv->tx_ring[prio];
+
+ mapping = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
+diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c
+index 585784258c665..4efab907a3ac6 100644
+--- a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c
++++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c
+@@ -28,7 +28,7 @@ u8 rtl818x_ioread8_idx(struct rtl8187_priv *priv,
+ usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0),
+ RTL8187_REQ_GET_REG, RTL8187_REQT_READ,
+ (unsigned long)addr, idx & 0x03,
+- &priv->io_dmabuf->bits8, sizeof(val), HZ / 2);
++ &priv->io_dmabuf->bits8, sizeof(val), 500);
+
+ val = priv->io_dmabuf->bits8;
+ mutex_unlock(&priv->io_mutex);
+@@ -45,7 +45,7 @@ u16 rtl818x_ioread16_idx(struct rtl8187_priv *priv,
+ usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0),
+ RTL8187_REQ_GET_REG, RTL8187_REQT_READ,
+ (unsigned long)addr, idx & 0x03,
+- &priv->io_dmabuf->bits16, sizeof(val), HZ / 2);
++ &priv->io_dmabuf->bits16, sizeof(val), 500);
+
+ val = priv->io_dmabuf->bits16;
+ mutex_unlock(&priv->io_mutex);
+@@ -62,7 +62,7 @@ u32 rtl818x_ioread32_idx(struct rtl8187_priv *priv,
+ usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0),
+ RTL8187_REQ_GET_REG, RTL8187_REQT_READ,
+ (unsigned long)addr, idx & 0x03,
+- &priv->io_dmabuf->bits32, sizeof(val), HZ / 2);
++ &priv->io_dmabuf->bits32, sizeof(val), 500);
+
+ val = priv->io_dmabuf->bits32;
+ mutex_unlock(&priv->io_mutex);
+@@ -79,7 +79,7 @@ void rtl818x_iowrite8_idx(struct rtl8187_priv *priv,
+ usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0),
+ RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE,
+ (unsigned long)addr, idx & 0x03,
+- &priv->io_dmabuf->bits8, sizeof(val), HZ / 2);
++ &priv->io_dmabuf->bits8, sizeof(val), 500);
+
+ mutex_unlock(&priv->io_mutex);
+ }
+@@ -93,7 +93,7 @@ void rtl818x_iowrite16_idx(struct rtl8187_priv *priv,
+ usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0),
+ RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE,
+ (unsigned long)addr, idx & 0x03,
+- &priv->io_dmabuf->bits16, sizeof(val), HZ / 2);
++ &priv->io_dmabuf->bits16, sizeof(val), 500);
+
+ mutex_unlock(&priv->io_mutex);
+ }
+@@ -107,7 +107,7 @@ void rtl818x_iowrite32_idx(struct rtl8187_priv *priv,
+ usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0),
+ RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE,
+ (unsigned long)addr, idx & 0x03,
+- &priv->io_dmabuf->bits32, sizeof(val), HZ / 2);
++ &priv->io_dmabuf->bits32, sizeof(val), 500);
+
+ mutex_unlock(&priv->io_mutex);
+ }
+@@ -183,7 +183,7 @@ static void rtl8225_write_8051(struct ieee80211_hw *dev, u8 addr, __le16 data)
+ usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0),
+ RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE,
+ addr, 0x8225, &priv->io_dmabuf->bits16, sizeof(data),
+- HZ / 2);
++ 500);
+
+ mutex_unlock(&priv->io_mutex);
+
+diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+index 7ddce3c3f0c48..079fdbf838efc 100644
+--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
++++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+@@ -1190,7 +1190,7 @@ struct rtl8723bu_c2h {
+ u8 bw;
+ } __packed ra_report;
+ };
+-};
++} __packed;
+
+ struct rtl8xxxu_fileops;
+
+@@ -1346,6 +1346,7 @@ struct rtl8xxxu_priv {
+ u32 rege9c;
+ u32 regeb4;
+ u32 regebc;
++ u32 regrcr;
+ int next_mbox;
+ int nr_out_eps;
+
+diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+index b06508d0cdf8f..761aeec07cdd9 100644
+--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
++++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+@@ -1669,6 +1669,11 @@ static void rtl8192e_enable_rf(struct rtl8xxxu_priv *priv)
+ val8 = rtl8xxxu_read8(priv, REG_PAD_CTRL1);
+ val8 &= ~BIT(0);
+ rtl8xxxu_write8(priv, REG_PAD_CTRL1, val8);
++
++ /*
++ * Fix transmission failure of rtl8192e.
++ */
++ rtl8xxxu_write8(priv, REG_TXPAUSE, 0x00);
+ }
+
+ struct rtl8xxxu_fileops rtl8192eu_fops = {
+@@ -1695,6 +1700,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = {
+ .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24),
+ .has_s0s1 = 0,
+ .gen2_thermal_meter = 1,
++ .needs_full_init = 1,
+ .adda_1t_init = 0x0fc01616,
+ .adda_1t_path_on = 0x0fc01616,
+ .adda_2t_path_on_a = 0x0fc01616,
+diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+index 774341b0005a3..be93ffa5086a2 100644
+--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
++++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+@@ -1607,18 +1607,18 @@ static void rtl8xxxu_print_chipinfo(struct rtl8xxxu_priv *priv)
+ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
+ {
+ struct device *dev = &priv->udev->dev;
+- u32 val32, bonding;
++ u32 val32, bonding, sys_cfg;
+ u16 val16;
+
+- val32 = rtl8xxxu_read32(priv, REG_SYS_CFG);
+- priv->chip_cut = (val32 & SYS_CFG_CHIP_VERSION_MASK) >>
++ sys_cfg = rtl8xxxu_read32(priv, REG_SYS_CFG);
++ priv->chip_cut = (sys_cfg & SYS_CFG_CHIP_VERSION_MASK) >>
+ SYS_CFG_CHIP_VERSION_SHIFT;
+- if (val32 & SYS_CFG_TRP_VAUX_EN) {
++ if (sys_cfg & SYS_CFG_TRP_VAUX_EN) {
+ dev_info(dev, "Unsupported test chip\n");
+ return -ENOTSUPP;
+ }
+
+- if (val32 & SYS_CFG_BT_FUNC) {
++ if (sys_cfg & SYS_CFG_BT_FUNC) {
+ if (priv->chip_cut >= 3) {
+ sprintf(priv->chip_name, "8723BU");
+ priv->rtl_chip = RTL8723B;
+@@ -1640,7 +1640,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
+ if (val32 & MULTI_GPS_FUNC_EN)
+ priv->has_gps = 1;
+ priv->is_multi_func = 1;
+- } else if (val32 & SYS_CFG_TYPE_ID) {
++ } else if (sys_cfg & SYS_CFG_TYPE_ID) {
+ bonding = rtl8xxxu_read32(priv, REG_HPON_FSM);
+ bonding &= HPON_FSM_BONDING_MASK;
+ if (priv->fops->tx_desc_size ==
+@@ -1688,7 +1688,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
+ case RTL8188E:
+ case RTL8192E:
+ case RTL8723B:
+- switch (val32 & SYS_CFG_VENDOR_EXT_MASK) {
++ switch (sys_cfg & SYS_CFG_VENDOR_EXT_MASK) {
+ case SYS_CFG_VENDOR_ID_TSMC:
+ sprintf(priv->chip_vendor, "TSMC");
+ break;
+@@ -1705,7 +1705,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
+ }
+ break;
+ default:
+- if (val32 & SYS_CFG_VENDOR_ID) {
++ if (sys_cfg & SYS_CFG_VENDOR_ID) {
+ sprintf(priv->chip_vendor, "UMC");
+ priv->vendor_umc = 1;
+ } else {
+@@ -1874,13 +1874,6 @@ static int rtl8xxxu_read_efuse(struct rtl8xxxu_priv *priv)
+
+ /* We have 8 bits to indicate validity */
+ map_addr = offset * 8;
+- if (map_addr >= EFUSE_MAP_LEN) {
+- dev_warn(dev, "%s: Illegal map_addr (%04x), "
+- "efuse corrupt!\n",
+- __func__, map_addr);
+- ret = -EINVAL;
+- goto exit;
+- }
+ for (i = 0; i < EFUSE_MAX_WORD_UNIT; i++) {
+ /* Check word enable condition in the section */
+ if (word_mask & BIT(i)) {
+@@ -1891,6 +1884,13 @@ static int rtl8xxxu_read_efuse(struct rtl8xxxu_priv *priv)
+ ret = rtl8xxxu_read_efuse8(priv, efuse_addr++, &val8);
+ if (ret)
+ goto exit;
++ if (map_addr >= EFUSE_MAP_LEN - 1) {
++ dev_warn(dev, "%s: Illegal map_addr (%04x), "
++ "efuse corrupt!\n",
++ __func__, map_addr);
++ ret = -EINVAL;
++ goto exit;
++ }
+ priv->efuse_wifi.raw[map_addr++] = val8;
+
+ ret = rtl8xxxu_read_efuse8(priv, efuse_addr++, &val8);
+@@ -2925,12 +2925,12 @@ bool rtl8xxxu_gen2_simularity_compare(struct rtl8xxxu_priv *priv,
+ }
+
+ if (!(simubitmap & 0x30) && priv->tx_paths > 1) {
+- /* path B RX OK */
++ /* path B TX OK */
+ for (i = 4; i < 6; i++)
+ result[3][i] = result[c1][i];
+ }
+
+- if (!(simubitmap & 0x30) && priv->tx_paths > 1) {
++ if (!(simubitmap & 0xc0) && priv->tx_paths > 1) {
+ /* path B RX OK */
+ for (i = 6; i < 8; i++)
+ result[3][i] = result[c1][i];
+@@ -4045,6 +4045,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw)
+ RCR_ACCEPT_MGMT_FRAME | RCR_HTC_LOC_CTRL |
+ RCR_APPEND_PHYSTAT | RCR_APPEND_ICV | RCR_APPEND_MIC;
+ rtl8xxxu_write32(priv, REG_RCR, val32);
++ priv->regrcr = val32;
+
+ /*
+ * Accept all multicast
+@@ -4338,15 +4339,14 @@ void rtl8xxxu_gen2_update_rate_mask(struct rtl8xxxu_priv *priv,
+ h2c.b_macid_cfg.ramask2 = (ramask >> 16) & 0xff;
+ h2c.b_macid_cfg.ramask3 = (ramask >> 24) & 0xff;
+
+- h2c.ramask.arg = 0x80;
+ h2c.b_macid_cfg.data1 = rateid;
+ if (sgi)
+ h2c.b_macid_cfg.data1 |= BIT(7);
+
+ h2c.b_macid_cfg.data2 = bw;
+
+- dev_dbg(&priv->udev->dev, "%s: rate mask %08x, arg %02x, size %zi\n",
+- __func__, ramask, h2c.ramask.arg, sizeof(h2c.b_macid_cfg));
++ dev_dbg(&priv->udev->dev, "%s: rate mask %08x, rateid %02x, sgi %d, size %zi\n",
++ __func__, ramask, rateid, sgi, sizeof(h2c.b_macid_cfg));
+ rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.b_macid_cfg));
+ }
+
+@@ -4370,12 +4370,9 @@ void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv,
+ void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv,
+ u8 macid, bool connect)
+ {
+-#ifdef RTL8XXXU_GEN2_REPORT_CONNECT
+ /*
+- * Barry Day reports this causes issues with 8192eu and 8723bu
+- * devices reconnecting. The reason for this is unclear, but
+- * until it is better understood, leave the code in place but
+- * disabled, so it is not lost.
++ * The firmware turns on the rate control when it knows it's
++ * connected to a network.
+ */
+ struct h2c_cmd h2c;
+
+@@ -4388,7 +4385,6 @@ void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv,
+ h2c.media_status_rpt.parm &= ~BIT(0);
+
+ rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.media_status_rpt));
+-#endif
+ }
+
+ void rtl8xxxu_gen1_init_aggregation(struct rtl8xxxu_priv *priv)
+@@ -4508,6 +4504,53 @@ rtl8xxxu_wireless_mode(struct ieee80211_hw *hw, struct ieee80211_sta *sta)
+ return network_type;
+ }
+
++static void rtl8xxxu_set_aifs(struct rtl8xxxu_priv *priv, u8 slot_time)
++{
++ u32 reg_edca_param[IEEE80211_NUM_ACS] = {
++ [IEEE80211_AC_VO] = REG_EDCA_VO_PARAM,
++ [IEEE80211_AC_VI] = REG_EDCA_VI_PARAM,
++ [IEEE80211_AC_BE] = REG_EDCA_BE_PARAM,
++ [IEEE80211_AC_BK] = REG_EDCA_BK_PARAM,
++ };
++ u32 val32;
++ u16 wireless_mode = 0;
++ u8 aifs, aifsn, sifs;
++ int i;
++
++ if (priv->vif) {
++ struct ieee80211_sta *sta;
++
++ rcu_read_lock();
++ sta = ieee80211_find_sta(priv->vif, priv->vif->bss_conf.bssid);
++ if (sta)
++ wireless_mode = rtl8xxxu_wireless_mode(priv->hw, sta);
++ rcu_read_unlock();
++ }
++
++ if (priv->hw->conf.chandef.chan->band == NL80211_BAND_5GHZ ||
++ (wireless_mode & WIRELESS_MODE_N_24G))
++ sifs = 16;
++ else
++ sifs = 10;
++
++ for (i = 0; i < IEEE80211_NUM_ACS; i++) {
++ val32 = rtl8xxxu_read32(priv, reg_edca_param[i]);
++
++ /* It was set in conf_tx. */
++ aifsn = val32 & 0xff;
++
++ /* aifsn not set yet or already fixed */
++ if (aifsn < 2 || aifsn > 15)
++ continue;
++
++ aifs = aifsn * slot_time + sifs;
++
++ val32 &= ~0xff;
++ val32 |= aifs;
++ rtl8xxxu_write32(priv, reg_edca_param[i], val32);
++ }
++}
++
+ static void
+ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *bss_conf, u32 changed)
+@@ -4593,6 +4636,8 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ else
+ val8 = 20;
+ rtl8xxxu_write8(priv, REG_SLOT, val8);
++
++ rtl8xxxu_set_aifs(priv, val8);
+ }
+
+ if (changed & BSS_CHANGED_BSSID) {
+@@ -4984,6 +5029,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
+ if (control && control->sta)
+ sta = control->sta;
+
++ queue = rtl8xxxu_queue_select(hw, skb);
++
+ tx_desc = skb_push(skb, tx_desc_size);
+
+ memset(tx_desc, 0, tx_desc_size);
+@@ -4996,7 +5043,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
+ is_broadcast_ether_addr(ieee80211_get_DA(hdr)))
+ tx_desc->txdw0 |= TXDESC_BROADMULTICAST;
+
+- queue = rtl8xxxu_queue_select(hw, skb);
+ tx_desc->txdw1 = cpu_to_le32(queue << TXDESC_QUEUE_SHIFT);
+
+ if (tx_info->control.hw_key) {
+@@ -5139,7 +5185,7 @@ static void rtl8xxxu_queue_rx_urb(struct rtl8xxxu_priv *priv,
+ pending = priv->rx_urb_pending_count;
+ } else {
+ skb = (struct sk_buff *)rx_urb->urb.context;
+- dev_kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ usb_free_urb(&rx_urb->urb);
+ }
+
+@@ -5445,9 +5491,6 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work)
+ btcoex = &priv->bt_coex;
+ rarpt = &priv->ra_report;
+
+- if (priv->rf_paths > 1)
+- goto out;
+-
+ while (!skb_queue_empty(&priv->c2hcmd_queue)) {
+ skb = skb_dequeue(&priv->c2hcmd_queue);
+
+@@ -5468,7 +5511,6 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work)
+ rarpt->txrate.flags = 0;
+ rate = c2h->ra_report.rate;
+ sgi = c2h->ra_report.sgi;
+- bw = c2h->ra_report.bw;
+
+ if (rate < DESC_RATE_MCS0) {
+ rarpt->txrate.legacy =
+@@ -5485,8 +5527,13 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work)
+ RATE_INFO_FLAGS_SHORT_GI;
+ }
+
+- if (bw == RATE_INFO_BW_20)
+- rarpt->txrate.bw |= RATE_INFO_BW_20;
++ if (skb->len >= offsetofend(typeof(*c2h), ra_report.bw)) {
++ if (c2h->ra_report.bw == RTL8XXXU_CHANNEL_WIDTH_40)
++ bw = RATE_INFO_BW_40;
++ else
++ bw = RATE_INFO_BW_20;
++ rarpt->txrate.bw = bw;
++ }
+ }
+ bit_rate = cfg80211_calculate_bitrate(&rarpt->txrate);
+ rarpt->bit_rate = bit_rate;
+@@ -5495,10 +5542,9 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work)
+ default:
+ break;
+ }
+- }
+
+-out:
+- dev_kfree_skb(skb);
++ dev_kfree_skb(skb);
++ }
+ }
+
+ static void rtl8723bu_handle_c2h(struct rtl8xxxu_priv *priv,
+@@ -5864,7 +5910,6 @@ static int rtl8xxxu_config(struct ieee80211_hw *hw, u32 changed)
+ {
+ struct rtl8xxxu_priv *priv = hw->priv;
+ struct device *dev = &priv->udev->dev;
+- u16 val16;
+ int ret = 0, channel;
+ bool ht40;
+
+@@ -5874,14 +5919,6 @@ static int rtl8xxxu_config(struct ieee80211_hw *hw, u32 changed)
+ __func__, hw->conf.chandef.chan->hw_value,
+ changed, hw->conf.chandef.width);
+
+- if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS) {
+- val16 = ((hw->conf.long_frame_max_tx_count <<
+- RETRY_LIMIT_LONG_SHIFT) & RETRY_LIMIT_LONG_MASK) |
+- ((hw->conf.short_frame_max_tx_count <<
+- RETRY_LIMIT_SHORT_SHIFT) & RETRY_LIMIT_SHORT_MASK);
+- rtl8xxxu_write16(priv, REG_RETRY_LIMIT, val16);
+- }
+-
+ if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
+ switch (hw->conf.chandef.width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+@@ -5964,7 +6001,7 @@ static void rtl8xxxu_configure_filter(struct ieee80211_hw *hw,
+ unsigned int *total_flags, u64 multicast)
+ {
+ struct rtl8xxxu_priv *priv = hw->priv;
+- u32 rcr = rtl8xxxu_read32(priv, REG_RCR);
++ u32 rcr = priv->regrcr;
+
+ dev_dbg(&priv->udev->dev, "%s: changed_flags %08x, total_flags %08x\n",
+ __func__, changed_flags, *total_flags);
+@@ -6010,6 +6047,7 @@ static void rtl8xxxu_configure_filter(struct ieee80211_hw *hw,
+ */
+
+ rtl8xxxu_write32(priv, REG_RCR, rcr);
++ priv->regrcr = rcr;
+
+ *total_flags &= (FIF_ALLMULTI | FIF_FCSFAIL | FIF_BCN_PRBRESP_PROMISC |
+ FIF_CONTROL | FIF_OTHER_BSS | FIF_PSPOLL |
+diff --git a/drivers/net/wireless/realtek/rtlwifi/debug.c b/drivers/net/wireless/realtek/rtlwifi/debug.c
+index 901cdfe3723cf..9eb26dfe4ca92 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/debug.c
++++ b/drivers/net/wireless/realtek/rtlwifi/debug.c
+@@ -278,8 +278,8 @@ static ssize_t rtl_debugfs_set_write_reg(struct file *filp,
+
+ tmp_len = (count > sizeof(tmp) - 1 ? sizeof(tmp) - 1 : count);
+
+- if (!buffer || copy_from_user(tmp, buffer, tmp_len))
+- return count;
++ if (copy_from_user(tmp, buffer, tmp_len))
++ return -EFAULT;
+
+ tmp[tmp_len] = '\0';
+
+@@ -287,7 +287,7 @@ static ssize_t rtl_debugfs_set_write_reg(struct file *filp,
+ num = sscanf(tmp, "%x %x %x", &addr, &val, &len);
+
+ if (num != 3)
+- return count;
++ return -EINVAL;
+
+ switch (len) {
+ case 1:
+@@ -329,8 +329,8 @@ static ssize_t rtl_debugfs_set_write_h2c(struct file *filp,
+
+ tmp_len = (count > sizeof(tmp) - 1 ? sizeof(tmp) - 1 : count);
+
+- if (!buffer || copy_from_user(tmp, buffer, tmp_len))
+- return count;
++ if (copy_from_user(tmp, buffer, tmp_len))
++ return -EFAULT;
+
+ tmp[tmp_len] = '\0';
+
+@@ -340,8 +340,8 @@ static ssize_t rtl_debugfs_set_write_h2c(struct file *filp,
+ &h2c_data[4], &h2c_data[5],
+ &h2c_data[6], &h2c_data[7]);
+
+- if (h2c_len <= 0)
+- return count;
++ if (h2c_len == 0)
++ return -EINVAL;
+
+ for (i = 0; i < h2c_len; i++)
+ h2c_data_packed[i] = (u8)h2c_data[i];
+@@ -375,8 +375,8 @@ static ssize_t rtl_debugfs_set_write_rfreg(struct file *filp,
+
+ tmp_len = (count > sizeof(tmp) - 1 ? sizeof(tmp) - 1 : count);
+
+- if (!buffer || copy_from_user(tmp, buffer, tmp_len))
+- return count;
++ if (copy_from_user(tmp, buffer, tmp_len))
++ return -EFAULT;
+
+ tmp[tmp_len] = '\0';
+
+@@ -386,7 +386,7 @@ static ssize_t rtl_debugfs_set_write_rfreg(struct file *filp,
+ if (num != 4) {
+ rtl_dbg(rtlpriv, COMP_ERR, DBG_DMESG,
+ "Format is <path> <addr> <mask> <data>\n");
+- return count;
++ return -EINVAL;
+ }
+
+ rtl_set_rfreg(hw, path, addr, bitmask, data);
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
+index bf686a916acb8..13e9717a1ce82 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
+@@ -68,8 +68,10 @@ static void _rtl88ee_return_beacon_queue_skb(struct ieee80211_hw *hw)
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
+ struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
+ struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[BEACON_QUEUE];
++ struct sk_buff_head free_list;
+ unsigned long flags;
+
++ skb_queue_head_init(&free_list);
+ spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags);
+ while (skb_queue_len(&ring->queue)) {
+ struct rtl_tx_desc *entry = &ring->desc[ring->idx];
+@@ -79,10 +81,12 @@ static void _rtl88ee_return_beacon_queue_skb(struct ieee80211_hw *hw)
+ rtlpriv->cfg->ops->get_desc(hw, (u8 *)entry,
+ true, HW_DESC_TXBUFF_ADDR),
+ skb->len, DMA_TO_DEVICE);
+- kfree_skb(skb);
++ __skb_queue_tail(&free_list, skb);
+ ring->idx = (ring->idx + 1) % ring->entries;
+ }
+ spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, flags);
++
++ __skb_queue_purge(&free_list);
+ }
+
+ static void _rtl88ee_disable_bcn_sub_func(struct ieee80211_hw *hw)
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
+index 6312fddd9c00a..eaba661133280 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
+@@ -1000,6 +1000,7 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw)
+ _initpabias(hw);
+ rtl92c_dm_init(hw);
+ exit:
++ local_irq_disable();
+ local_irq_restore(flags);
+ return err;
+ }
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
+index 9b83c710c9b86..4d153bd62c53f 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
+@@ -2386,14 +2386,10 @@ void rtl92d_phy_reload_iqk_setting(struct ieee80211_hw *hw, u8 channel)
+ rtl_dbg(rtlpriv, COMP_SCAN, DBG_LOUD,
+ "Just Read IQK Matrix reg for channel:%d....\n",
+ channel);
+- if ((rtlphy->iqk_matrix[indexforchannel].
+- value[0] != NULL)
+- /*&&(regea4 != 0) */)
++ if (rtlphy->iqk_matrix[indexforchannel].value[0][0] != 0)
+ _rtl92d_phy_patha_fill_iqk_matrix(hw, true,
+- rtlphy->iqk_matrix[
+- indexforchannel].value, 0,
+- (rtlphy->iqk_matrix[
+- indexforchannel].value[0][2] == 0));
++ rtlphy->iqk_matrix[indexforchannel].value, 0,
++ rtlphy->iqk_matrix[indexforchannel].value[0][2] == 0);
+ if (IS_92D_SINGLEPHY(rtlhal->version)) {
+ if ((rtlphy->iqk_matrix[
+ indexforchannel].value[0][4] != 0)
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+index 0748aedce2adb..ccbb082d5e928 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+@@ -30,8 +30,10 @@ static void _rtl8723be_return_beacon_queue_skb(struct ieee80211_hw *hw)
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
+ struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
+ struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[BEACON_QUEUE];
++ struct sk_buff_head free_list;
+ unsigned long flags;
+
++ skb_queue_head_init(&free_list);
+ spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags);
+ while (skb_queue_len(&ring->queue)) {
+ struct rtl_tx_desc *entry = &ring->desc[ring->idx];
+@@ -41,10 +43,12 @@ static void _rtl8723be_return_beacon_queue_skb(struct ieee80211_hw *hw)
+ rtlpriv->cfg->ops->get_desc(hw, (u8 *)entry,
+ true, HW_DESC_TXBUFF_ADDR),
+ skb->len, DMA_TO_DEVICE);
+- kfree_skb(skb);
++ __skb_queue_tail(&free_list, skb);
+ ring->idx = (ring->idx + 1) % ring->entries;
+ }
+ spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, flags);
++
++ __skb_queue_purge(&free_list);
+ }
+
+ static void _rtl8723be_set_bcn_ctrl_reg(struct ieee80211_hw *hw,
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+index 33ffc24d36759..c4ee65cc2d5e6 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+@@ -26,8 +26,10 @@ static void _rtl8821ae_return_beacon_queue_skb(struct ieee80211_hw *hw)
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
+ struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
+ struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[BEACON_QUEUE];
++ struct sk_buff_head free_list;
+ unsigned long flags;
+
++ skb_queue_head_init(&free_list);
+ spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags);
+ while (skb_queue_len(&ring->queue)) {
+ struct rtl_tx_desc *entry = &ring->desc[ring->idx];
+@@ -37,10 +39,12 @@ static void _rtl8821ae_return_beacon_queue_skb(struct ieee80211_hw *hw)
+ rtlpriv->cfg->ops->get_desc(hw, (u8 *)entry,
+ true, HW_DESC_TXBUFF_ADDR),
+ skb->len, DMA_TO_DEVICE);
+- kfree_skb(skb);
++ __skb_queue_tail(&free_list, skb);
+ ring->idx = (ring->idx + 1) % ring->entries;
+ }
+ spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, flags);
++
++ __skb_queue_purge(&free_list);
+ }
+
+ static void _rtl8821ae_set_bcn_ctrl_reg(struct ieee80211_hw *hw,
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
+index a29321e2fa72f..5323ead30db03 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
+@@ -1598,18 +1598,6 @@ static bool _rtl8812ae_get_integer_from_string(const char *str, u8 *pint)
+ return true;
+ }
+
+-static bool _rtl8812ae_eq_n_byte(const char *str1, const char *str2, u32 num)
+-{
+- if (num == 0)
+- return false;
+- while (num > 0) {
+- num--;
+- if (str1[num] != str2[num])
+- return false;
+- }
+- return true;
+-}
+-
+ static s8 _rtl8812ae_phy_get_chnl_idx_of_txpwr_lmt(struct ieee80211_hw *hw,
+ u8 band, u8 channel)
+ {
+@@ -1659,42 +1647,42 @@ static void _rtl8812ae_phy_set_txpower_limit(struct ieee80211_hw *hw,
+ power_limit = power_limit > MAX_POWER_INDEX ?
+ MAX_POWER_INDEX : power_limit;
+
+- if (_rtl8812ae_eq_n_byte(pregulation, "FCC", 3))
++ if (strcmp(pregulation, "FCC") == 0)
+ regulation = 0;
+- else if (_rtl8812ae_eq_n_byte(pregulation, "MKK", 3))
++ else if (strcmp(pregulation, "MKK") == 0)
+ regulation = 1;
+- else if (_rtl8812ae_eq_n_byte(pregulation, "ETSI", 4))
++ else if (strcmp(pregulation, "ETSI") == 0)
+ regulation = 2;
+- else if (_rtl8812ae_eq_n_byte(pregulation, "WW13", 4))
++ else if (strcmp(pregulation, "WW13") == 0)
+ regulation = 3;
+
+- if (_rtl8812ae_eq_n_byte(prate_section, "CCK", 3))
++ if (strcmp(prate_section, "CCK") == 0)
+ rate_section = 0;
+- else if (_rtl8812ae_eq_n_byte(prate_section, "OFDM", 4))
++ else if (strcmp(prate_section, "OFDM") == 0)
+ rate_section = 1;
+- else if (_rtl8812ae_eq_n_byte(prate_section, "HT", 2) &&
+- _rtl8812ae_eq_n_byte(prf_path, "1T", 2))
++ else if (strcmp(prate_section, "HT") == 0 &&
++ strcmp(prf_path, "1T") == 0)
+ rate_section = 2;
+- else if (_rtl8812ae_eq_n_byte(prate_section, "HT", 2) &&
+- _rtl8812ae_eq_n_byte(prf_path, "2T", 2))
++ else if (strcmp(prate_section, "HT") == 0 &&
++ strcmp(prf_path, "2T") == 0)
+ rate_section = 3;
+- else if (_rtl8812ae_eq_n_byte(prate_section, "VHT", 3) &&
+- _rtl8812ae_eq_n_byte(prf_path, "1T", 2))
++ else if (strcmp(prate_section, "VHT") == 0 &&
++ strcmp(prf_path, "1T") == 0)
+ rate_section = 4;
+- else if (_rtl8812ae_eq_n_byte(prate_section, "VHT", 3) &&
+- _rtl8812ae_eq_n_byte(prf_path, "2T", 2))
++ else if (strcmp(prate_section, "VHT") == 0 &&
++ strcmp(prf_path, "2T") == 0)
+ rate_section = 5;
+
+- if (_rtl8812ae_eq_n_byte(pbandwidth, "20M", 3))
++ if (strcmp(pbandwidth, "20M") == 0)
+ bandwidth = 0;
+- else if (_rtl8812ae_eq_n_byte(pbandwidth, "40M", 3))
++ else if (strcmp(pbandwidth, "40M") == 0)
+ bandwidth = 1;
+- else if (_rtl8812ae_eq_n_byte(pbandwidth, "80M", 3))
++ else if (strcmp(pbandwidth, "80M") == 0)
+ bandwidth = 2;
+- else if (_rtl8812ae_eq_n_byte(pbandwidth, "160M", 4))
++ else if (strcmp(pbandwidth, "160M") == 0)
+ bandwidth = 3;
+
+- if (_rtl8812ae_eq_n_byte(pband, "2.4G", 4)) {
++ if (strcmp(pband, "2.4G") == 0) {
+ ret = _rtl8812ae_phy_get_chnl_idx_of_txpwr_lmt(hw,
+ BAND_ON_2_4G,
+ channel);
+@@ -1718,7 +1706,7 @@ static void _rtl8812ae_phy_set_txpower_limit(struct ieee80211_hw *hw,
+ regulation, bandwidth, rate_section, channel_index,
+ rtlphy->txpwr_limit_2_4g[regulation][bandwidth]
+ [rate_section][channel_index][RF90_PATH_A]);
+- } else if (_rtl8812ae_eq_n_byte(pband, "5G", 2)) {
++ } else if (strcmp(pband, "5G") == 0) {
+ ret = _rtl8812ae_phy_get_chnl_idx_of_txpwr_lmt(hw,
+ BAND_ON_5G,
+ channel);
+diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
+index 86a2368732547..a8eebafb9a7ee 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
++++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
+@@ -1014,7 +1014,7 @@ int rtl_usb_probe(struct usb_interface *intf,
+ hw = ieee80211_alloc_hw(sizeof(struct rtl_priv) +
+ sizeof(struct rtl_usb_priv), &rtl_ops);
+ if (!hw) {
+- WARN_ONCE(true, "rtl_usb: ieee80211 alloc failed\n");
++ pr_warn("rtl_usb: ieee80211 alloc failed\n");
+ return -ENOMEM;
+ }
+ rtlpriv = hw->priv;
+diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
+index e6399519584bd..a384fc3a4f2b0 100644
+--- a/drivers/net/wireless/realtek/rtw88/fw.c
++++ b/drivers/net/wireless/realtek/rtw88/fw.c
+@@ -1556,12 +1556,10 @@ static void rtw_fw_read_fifo_page(struct rtw_dev *rtwdev, u32 offset, u32 size,
+ u32 i;
+ u16 idx = 0;
+ u16 ctl;
+- u8 rcr;
+
+- rcr = rtw_read8(rtwdev, REG_RCR + 2);
+ ctl = rtw_read16(rtwdev, REG_PKTBUF_DBG_CTRL) & 0xf000;
+ /* disable rx clock gate */
+- rtw_write8(rtwdev, REG_RCR, rcr | BIT(3));
++ rtw_write32_set(rtwdev, REG_RCR, BIT_DISGCLK);
+
+ do {
+ rtw_write16(rtwdev, REG_PKTBUF_DBG_CTRL, start_pg | ctl);
+@@ -1580,7 +1578,8 @@ static void rtw_fw_read_fifo_page(struct rtw_dev *rtwdev, u32 offset, u32 size,
+
+ out:
+ rtw_write16(rtwdev, REG_PKTBUF_DBG_CTRL, ctl);
+- rtw_write8(rtwdev, REG_RCR + 2, rcr);
++ /* restore rx clock gate */
++ rtw_write32_clr(rtwdev, REG_RCR, BIT_DISGCLK);
+ }
+
+ static void rtw_fw_read_fifo(struct rtw_dev *rtwdev, enum rtw_fw_fifo_sel sel,
+diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
+index d1678aed9d9cb..a0576cc0c8452 100644
+--- a/drivers/net/wireless/realtek/rtw88/mac.c
++++ b/drivers/net/wireless/realtek/rtw88/mac.c
+@@ -233,7 +233,7 @@ static int rtw_pwr_seq_parser(struct rtw_dev *rtwdev,
+
+ ret = rtw_sub_pwr_seq_parser(rtwdev, intf_mask, cut_mask, cmd);
+ if (ret)
+- return -EBUSY;
++ return ret;
+
+ idx++;
+ } while (1);
+@@ -247,6 +247,7 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
+ const struct rtw_pwr_seq_cmd **pwr_seq;
+ u8 rpwm;
+ bool cur_pwr;
++ int ret;
+
+ if (rtw_chip_wcpu_11ac(rtwdev)) {
+ rpwm = rtw_read8(rtwdev, rtwdev->hci.rpwm_addr);
+@@ -270,8 +271,9 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
+ return -EALREADY;
+
+ pwr_seq = pwr_on ? chip->pwr_on_seq : chip->pwr_off_seq;
+- if (rtw_pwr_seq_parser(rtwdev, pwr_seq))
+- return -EINVAL;
++ ret = rtw_pwr_seq_parser(rtwdev, pwr_seq);
++ if (ret)
++ return ret;
+
+ return 0;
+ }
+diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
+index 6bb55e663fc36..d7b7b2cce9746 100644
+--- a/drivers/net/wireless/realtek/rtw88/main.c
++++ b/drivers/net/wireless/realtek/rtw88/main.c
+@@ -1819,6 +1819,10 @@ int rtw_core_init(struct rtw_dev *rtwdev)
+ timer_setup(&rtwdev->tx_report.purge_timer,
+ rtw_tx_report_purge_timer, 0);
+ rtwdev->tx_wq = alloc_workqueue("rtw_tx_wq", WQ_UNBOUND | WQ_HIGHPRI, 0);
++ if (!rtwdev->tx_wq) {
++ rtw_warn(rtwdev, "alloc_workqueue rtw_tx_wq failed\n");
++ return -ENOMEM;
++ }
+
+ INIT_DELAYED_WORK(&rtwdev->watch_dog_work, rtw_watch_dog_work);
+ INIT_DELAYED_WORK(&coex->bt_relink_work, rtw_coex_bt_relink_work);
+@@ -1859,13 +1863,13 @@ int rtw_core_init(struct rtw_dev *rtwdev)
+
+ /* default rx filter setting */
+ rtwdev->hal.rcr = BIT_APP_FCS | BIT_APP_MIC | BIT_APP_ICV |
+- BIT_HTC_LOC_CTRL | BIT_APP_PHYSTS |
++ BIT_PKTCTL_DLEN | BIT_HTC_LOC_CTRL | BIT_APP_PHYSTS |
+ BIT_AB | BIT_AM | BIT_APM;
+
+ ret = rtw_load_firmware(rtwdev, RTW_NORMAL_FW);
+ if (ret) {
+ rtw_warn(rtwdev, "no firmware loaded\n");
+- return ret;
++ goto out;
+ }
+
+ if (chip->wow_fw_name) {
+@@ -1875,11 +1879,15 @@ int rtw_core_init(struct rtw_dev *rtwdev)
+ wait_for_completion(&rtwdev->fw.completion);
+ if (rtwdev->fw.firmware)
+ release_firmware(rtwdev->fw.firmware);
+- return ret;
++ goto out;
+ }
+ }
+
+ return 0;
++
++out:
++ destroy_workqueue(rtwdev->tx_wq);
++ return ret;
+ }
+ EXPORT_SYMBOL(rtw_core_init);
+
+diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
+index a7a6ebfaa203c..08cf66141889b 100644
+--- a/drivers/net/wireless/realtek/rtw88/pci.c
++++ b/drivers/net/wireless/realtek/rtw88/pci.c
+@@ -2,7 +2,6 @@
+ /* Copyright(c) 2018-2019 Realtek Corporation
+ */
+
+-#include <linux/dmi.h>
+ #include <linux/module.h>
+ #include <linux/pci.h>
+ #include "main.h"
+@@ -1409,7 +1408,11 @@ static void rtw_pci_link_ps(struct rtw_dev *rtwdev, bool enter)
+ * throughput. This is probably because the ASPM behavior slightly
+ * varies from different SOC.
+ */
+- if (rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1)
++ if (!(rtwpci->link_ctrl & PCI_EXP_LNKCTL_ASPM_L1))
++ return;
++
++ if ((enter && atomic_dec_if_positive(&rtwpci->link_usage) == 0) ||
++ (!enter && atomic_inc_return(&rtwpci->link_usage) == 1))
+ rtw_pci_aspm_set(rtwdev, enter);
+ }
+
+@@ -1658,6 +1661,9 @@ static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
+ priv);
+ int work_done = 0;
+
++ if (rtwpci->rx_no_aspm)
++ rtw_pci_link_ps(rtwdev, false);
++
+ while (work_done < budget) {
+ u32 work_done_once;
+
+@@ -1681,6 +1687,8 @@ static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
+ if (rtw_pci_get_hw_rx_ring_nr(rtwdev, rtwpci))
+ napi_schedule(napi);
+ }
++ if (rtwpci->rx_no_aspm)
++ rtw_pci_link_ps(rtwdev, true);
+
+ return work_done;
+ }
+@@ -1702,50 +1710,13 @@ static void rtw_pci_napi_deinit(struct rtw_dev *rtwdev)
+ netif_napi_del(&rtwpci->napi);
+ }
+
+-enum rtw88_quirk_dis_pci_caps {
+- QUIRK_DIS_PCI_CAP_MSI,
+- QUIRK_DIS_PCI_CAP_ASPM,
+-};
+-
+-static int disable_pci_caps(const struct dmi_system_id *dmi)
+-{
+- uintptr_t dis_caps = (uintptr_t)dmi->driver_data;
+-
+- if (dis_caps & BIT(QUIRK_DIS_PCI_CAP_MSI))
+- rtw_disable_msi = true;
+- if (dis_caps & BIT(QUIRK_DIS_PCI_CAP_ASPM))
+- rtw_pci_disable_aspm = true;
+-
+- return 1;
+-}
+-
+-static const struct dmi_system_id rtw88_pci_quirks[] = {
+- {
+- .callback = disable_pci_caps,
+- .ident = "Protempo Ltd L116HTN6SPW",
+- .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Protempo Ltd"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "L116HTN6SPW"),
+- },
+- .driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
+- },
+- {
+- .callback = disable_pci_caps,
+- .ident = "HP HP Pavilion Laptop 14-ce0xxx",
+- .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Laptop 14-ce0xxx"),
+- },
+- .driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
+- },
+- {}
+-};
+-
+ int rtw_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+ {
++ struct pci_dev *bridge = pci_upstream_bridge(pdev);
+ struct ieee80211_hw *hw;
+ struct rtw_dev *rtwdev;
++ struct rtw_pci *rtwpci;
+ int drv_data_size;
+ int ret;
+
+@@ -1763,6 +1734,9 @@ int rtw_pci_probe(struct pci_dev *pdev,
+ rtwdev->hci.ops = &rtw_pci_ops;
+ rtwdev->hci.type = RTW_HCI_TYPE_PCIE;
+
++ rtwpci = (struct rtw_pci *)rtwdev->priv;
++ atomic_set(&rtwpci->link_usage, 1);
++
+ ret = rtw_core_init(rtwdev);
+ if (ret)
+ goto err_release_hw;
+@@ -1791,7 +1765,10 @@ int rtw_pci_probe(struct pci_dev *pdev,
+ goto err_destroy_pci;
+ }
+
+- dmi_check_system(rtw88_pci_quirks);
++ /* Disable PCIe ASPM L1 while doing NAPI poll for 8821CE */
++ if (pdev->device == 0xc821 && bridge->vendor == PCI_VENDOR_ID_INTEL)
++ rtwpci->rx_no_aspm = true;
++
+ rtw_pci_phy_cfg(rtwdev);
+
+ ret = rtw_register_hw(rtwdev, hw);
+diff --git a/drivers/net/wireless/realtek/rtw88/pci.h b/drivers/net/wireless/realtek/rtw88/pci.h
+index 66f78eb7757c5..0c37efd8c66fa 100644
+--- a/drivers/net/wireless/realtek/rtw88/pci.h
++++ b/drivers/net/wireless/realtek/rtw88/pci.h
+@@ -223,6 +223,8 @@ struct rtw_pci {
+ struct rtw_pci_tx_ring tx_rings[RTK_MAX_TX_QUEUE_NUM];
+ struct rtw_pci_rx_ring rx_rings[RTK_MAX_RX_QUEUE_NUM];
+ u16 link_ctrl;
++ atomic_t link_usage;
++ bool rx_no_aspm;
+ DECLARE_BITMAP(flags, NUM_OF_RTW_PCI_FLAGS);
+
+ void __iomem *mmap;
+diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
+index f5ce75095e904..c0fb1e446245f 100644
+--- a/drivers/net/wireless/realtek/rtw88/reg.h
++++ b/drivers/net/wireless/realtek/rtw88/reg.h
+@@ -406,6 +406,7 @@
+ #define BIT_MFBEN BIT(22)
+ #define BIT_DISCHKPPDLLEN BIT(21)
+ #define BIT_PKTCTL_DLEN BIT(20)
++#define BIT_DISGCLK BIT(19)
+ #define BIT_TIM_PARSER_EN BIT(18)
+ #define BIT_BC_MD_EN BIT(17)
+ #define BIT_UC_MD_EN BIT(16)
+diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+index 785b8181513f1..280602a34fe67 100644
+--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c
++++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+@@ -40,7 +40,7 @@ static int rtw8821c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
+
+ map = (struct rtw8821c_efuse *)log_map;
+
+- efuse->rfe_option = map->rfe_option;
++ efuse->rfe_option = map->rfe_option & 0x1f;
+ efuse->rf_board_option = map->rf_board_option;
+ efuse->crystal_cap = map->xtal_k;
+ efuse->pa_type_2g = map->pa_type;
+@@ -304,7 +304,8 @@ static void rtw8821c_set_channel_rf(struct rtw_dev *rtwdev, u8 channel, u8 bw)
+ if (channel <= 14) {
+ if (rtwdev->efuse.rfe_option == 0)
+ rtw8821c_switch_rf_set(rtwdev, SWITCH_TO_WLG);
+- else if (rtwdev->efuse.rfe_option == 2)
++ else if (rtwdev->efuse.rfe_option == 2 ||
++ rtwdev->efuse.rfe_option == 4)
+ rtw8821c_switch_rf_set(rtwdev, SWITCH_TO_BTG);
+ rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTDBG, BIT(6), 0x1);
+ rtw_write_rf(rtwdev, RF_PATH_A, 0x64, 0xf, 0xf);
+@@ -506,6 +507,7 @@ static s8 get_cck_rx_pwr(struct rtw_dev *rtwdev, u8 lna_idx, u8 vga_idx)
+ static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status,
+ struct rtw_rx_pkt_stat *pkt_stat)
+ {
++ struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+ s8 rx_power;
+ u8 lna_idx = 0;
+ u8 vga_idx = 0;
+@@ -517,6 +519,7 @@ static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status,
+
+ pkt_stat->rx_power[RF_PATH_A] = rx_power;
+ pkt_stat->rssi = rtw_phy_rf_power_2_rssi(pkt_stat->rx_power, 1);
++ dm_info->rssi[RF_PATH_A] = pkt_stat->rssi;
+ pkt_stat->bw = RTW_CHANNEL_WIDTH_20;
+ pkt_stat->signal_power = rx_power;
+ }
+@@ -524,6 +527,7 @@ static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status,
+ static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status,
+ struct rtw_rx_pkt_stat *pkt_stat)
+ {
++ struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+ u8 rxsc, bw;
+ s8 min_rx_power = -120;
+
+@@ -543,6 +547,7 @@ static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status,
+
+ pkt_stat->rx_power[RF_PATH_A] = GET_PHY_STAT_P1_PWDB_A(phy_status) - 110;
+ pkt_stat->rssi = rtw_phy_rf_power_2_rssi(pkt_stat->rx_power, 1);
++ dm_info->rssi[RF_PATH_A] = pkt_stat->rssi;
+ pkt_stat->bw = bw;
+ pkt_stat->signal_power = max(pkt_stat->rx_power[RF_PATH_A],
+ min_rx_power);
+@@ -773,6 +778,15 @@ static void rtw8821c_coex_cfg_ant_switch(struct rtw_dev *rtwdev, u8 ctrl_type,
+ if (switch_status == coex_dm->cur_switch_status)
+ return;
+
++ if (coex_rfe->wlg_at_btg) {
++ ctrl_type = COEX_SWITCH_CTRL_BY_BBSW;
++
++ if (coex_rfe->ant_switch_polarity)
++ pos_type = COEX_SWITCH_TO_WLA;
++ else
++ pos_type = COEX_SWITCH_TO_WLG_BT;
++ }
++
+ coex_dm->cur_switch_status = switch_status;
+
+ if (coex_rfe->ant_switch_diversity &&
+@@ -1498,6 +1512,8 @@ static const struct rtw_intf_phy_para_table phy_para_table_8821c = {
+ static const struct rtw_rfe_def rtw8821c_rfe_defs[] = {
+ [0] = RTW_DEF_RFE(8821c, 0, 0),
+ [2] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2),
++ [4] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2),
++ [6] = RTW_DEF_RFE(8821c, 0, 0),
+ };
+
+ static struct rtw_hw_reg rtw8821c_dig[] = {
+diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h
+index 112faa60f653e..d9fbddd7b0f35 100644
+--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h
++++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h
+@@ -131,7 +131,7 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data)
+ #define WLAN_TX_FUNC_CFG2 0x30
+ #define WLAN_MAC_OPT_NORM_FUNC1 0x98
+ #define WLAN_MAC_OPT_LB_FUNC1 0x80
+-#define WLAN_MAC_OPT_FUNC2 0x30810041
++#define WLAN_MAC_OPT_FUNC2 0xb0810041
+
+ #define WLAN_SIFS_CFG (WLAN_SIFS_CCK_CONT_TX | \
+ (WLAN_SIFS_OFDM_CONT_TX << BIT_SHIFT_SIFS_OFDM_CTX) | \
+diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+index f1789155e9016..247f26e3e8192 100644
+--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
++++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+@@ -204,7 +204,7 @@ static void rtw8822b_phy_set_param(struct rtw_dev *rtwdev)
+ #define WLAN_TX_FUNC_CFG2 0x30
+ #define WLAN_MAC_OPT_NORM_FUNC1 0x98
+ #define WLAN_MAC_OPT_LB_FUNC1 0x80
+-#define WLAN_MAC_OPT_FUNC2 0x30810041
++#define WLAN_MAC_OPT_FUNC2 0xb0810041
+
+ #define WLAN_SIFS_CFG (WLAN_SIFS_CCK_CONT_TX | \
+ (WLAN_SIFS_OFDM_CONT_TX << BIT_SHIFT_SIFS_OFDM_CTX) | \
+diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+index f3ad079967a68..bc87e3cb9cdce 100644
+--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
++++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+@@ -1962,7 +1962,7 @@ static void rtw8822c_phy_set_param(struct rtw_dev *rtwdev)
+ #define WLAN_TX_FUNC_CFG2 0x30
+ #define WLAN_MAC_OPT_NORM_FUNC1 0x98
+ #define WLAN_MAC_OPT_LB_FUNC1 0x80
+-#define WLAN_MAC_OPT_FUNC2 0x30810041
++#define WLAN_MAC_OPT_FUNC2 0xb0810041
+ #define WLAN_MAC_INT_MIG_CFG 0x33330000
+
+ #define WLAN_SIFS_CFG (WLAN_SIFS_CCK_CONT_TX | \
+diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
+index 63ce2443f1364..70841d131d724 100644
+--- a/drivers/net/wireless/rndis_wlan.c
++++ b/drivers/net/wireless/rndis_wlan.c
+@@ -694,8 +694,8 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len)
+ struct rndis_query *get;
+ struct rndis_query_c *get_c;
+ } u;
+- int ret, buflen;
+- int resplen, respoffs, copylen;
++ int ret;
++ size_t buflen, resplen, respoffs, copylen;
+
+ buflen = *len + sizeof(*u.get);
+ if (buflen < CONTROL_BUFFER_SIZE)
+@@ -730,22 +730,15 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len)
+
+ if (respoffs > buflen) {
+ /* Device returned data offset outside buffer, error. */
+- netdev_dbg(dev->net, "%s(%s): received invalid "
+- "data offset: %d > %d\n", __func__,
+- oid_to_string(oid), respoffs, buflen);
++ netdev_dbg(dev->net,
++ "%s(%s): received invalid data offset: %zu > %zu\n",
++ __func__, oid_to_string(oid), respoffs, buflen);
+
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+- if ((resplen + respoffs) > buflen) {
+- /* Device would have returned more data if buffer would
+- * have been big enough. Copy just the bits that we got.
+- */
+- copylen = buflen - respoffs;
+- } else {
+- copylen = resplen;
+- }
++ copylen = min(resplen, buflen - respoffs);
+
+ if (copylen > *len)
+ copylen = *len;
+diff --git a/drivers/net/wireless/rsi/rsi_91x_coex.c b/drivers/net/wireless/rsi/rsi_91x_coex.c
+index a0c5d02ae88cf..7395359b43b77 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_coex.c
++++ b/drivers/net/wireless/rsi/rsi_91x_coex.c
+@@ -160,6 +160,7 @@ int rsi_coex_attach(struct rsi_common *common)
+ rsi_coex_scheduler_thread,
+ "Coex-Tx-Thread")) {
+ rsi_dbg(ERR_ZONE, "%s: Unable to init tx thrd\n", __func__);
++ kfree(coex_cb);
+ return -EINVAL;
+ }
+ return 0;
+diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c
+index a48e616e0fb91..6b64a103f39f0 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_core.c
++++ b/drivers/net/wireless/rsi/rsi_91x_core.c
+@@ -399,6 +399,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb)
+
+ info = IEEE80211_SKB_CB(skb);
+ tx_params = (struct skb_info *)info->driver_data;
++ /* info->driver_data and info->control part of union so make copy */
++ tx_params->have_key = !!info->control.hw_key;
+ wh = (struct ieee80211_hdr *)&skb->data[0];
+ tx_params->sta_id = 0;
+
+@@ -463,7 +465,9 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb)
+ tid, 0);
+ }
+ }
+- if (skb->protocol == cpu_to_be16(ETH_P_PAE)) {
++
++ if (IEEE80211_SKB_CB(skb)->control.flags &
++ IEEE80211_TX_CTRL_PORT_CTRL_PROTO) {
+ q_num = MGMT_SOFT_Q;
+ skb->priority = q_num;
+ }
+diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
+index f4a26f16f00f4..30d2eccbcadd5 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
++++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
+@@ -162,12 +162,16 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
+ u8 header_size;
+ u8 vap_id = 0;
+ u8 dword_align_bytes;
++ bool tx_eapol;
+ u16 seq_num;
+
+ info = IEEE80211_SKB_CB(skb);
+ vif = info->control.vif;
+ tx_params = (struct skb_info *)info->driver_data;
+
++ tx_eapol = IEEE80211_SKB_CB(skb)->control.flags &
++ IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
++
+ header_size = FRAME_DESC_SZ + sizeof(struct rsi_xtended_desc);
+ if (header_size > skb_headroom(skb)) {
+ rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__);
+@@ -203,7 +207,7 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
+ wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE);
+
+ if ((!(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) &&
+- info->control.hw_key) {
++ tx_params->have_key) {
+ if (rsi_is_cipher_wep(common))
+ ieee80211_size += 4;
+ else
+@@ -214,22 +218,24 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
+ RSI_WIFI_DATA_Q);
+ data_desc->header_len = ieee80211_size;
+
+- if (common->min_rate != RSI_RATE_AUTO) {
++ if (common->rate_config[common->band].fixed_enabled) {
+ /* Send fixed rate */
++ u16 fixed_rate = common->rate_config[common->band].fixed_hw_rate;
++
+ data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE);
+- data_desc->rate_info = cpu_to_le16(common->min_rate);
++ data_desc->rate_info = cpu_to_le16(fixed_rate);
+
+ if (conf_is_ht40(&common->priv->hw->conf))
+ data_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE);
+
+- if ((common->vif_info[0].sgi) && (common->min_rate & 0x100)) {
++ if (common->vif_info[0].sgi && (fixed_rate & 0x100)) {
+ /* Only MCS rates */
+ data_desc->rate_info |=
+ cpu_to_le16(ENABLE_SHORTGI_RATE);
+ }
+ }
+
+- if (skb->protocol == cpu_to_be16(ETH_P_PAE)) {
++ if (tx_eapol) {
+ rsi_dbg(INFO_ZONE, "*** Tx EAPOL ***\n");
+
+ data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE);
+diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+index b66975f545675..e70c1c7fdf595 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
++++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+@@ -510,7 +510,6 @@ static int rsi_mac80211_add_interface(struct ieee80211_hw *hw,
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
+ rsi_send_rx_filter_frame(common, DISALLOW_BEACONS);
+- common->min_rate = RSI_RATE_AUTO;
+ for (i = 0; i < common->max_stations; i++)
+ common->stations[i].sta = NULL;
+ }
+@@ -1228,20 +1227,32 @@ static int rsi_mac80211_set_rate_mask(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ const struct cfg80211_bitrate_mask *mask)
+ {
++ const unsigned int mcs_offset = ARRAY_SIZE(rsi_rates);
+ struct rsi_hw *adapter = hw->priv;
+ struct rsi_common *common = adapter->priv;
+- enum nl80211_band band = hw->conf.chandef.chan->band;
++ int i;
+
+ mutex_lock(&common->mutex);
+- common->fixedrate_mask[band] = 0;
+
+- if (mask->control[band].legacy == 0xfff) {
+- common->fixedrate_mask[band] =
+- (mask->control[band].ht_mcs[0] << 12);
+- } else {
+- common->fixedrate_mask[band] =
+- mask->control[band].legacy;
++ for (i = 0; i < ARRAY_SIZE(common->rate_config); i++) {
++ struct rsi_rate_config *cfg = &common->rate_config[i];
++ u32 bm;
++
++ bm = mask->control[i].legacy | (mask->control[i].ht_mcs[0] << mcs_offset);
++ if (hweight32(bm) == 1) { /* single rate */
++ int rate_index = ffs(bm) - 1;
++
++ if (rate_index < mcs_offset)
++ cfg->fixed_hw_rate = rsi_rates[rate_index].hw_value;
++ else
++ cfg->fixed_hw_rate = rsi_mcsrates[rate_index - mcs_offset];
++ cfg->fixed_enabled = true;
++ } else {
++ cfg->configured_mask = bm;
++ cfg->fixed_enabled = false;
++ }
+ }
++
+ mutex_unlock(&common->mutex);
+
+ return 0;
+@@ -1378,46 +1389,6 @@ void rsi_indicate_pkt_to_os(struct rsi_common *common,
+ ieee80211_rx_irqsafe(hw, skb);
+ }
+
+-static void rsi_set_min_rate(struct ieee80211_hw *hw,
+- struct ieee80211_sta *sta,
+- struct rsi_common *common)
+-{
+- u8 band = hw->conf.chandef.chan->band;
+- u8 ii;
+- u32 rate_bitmap;
+- bool matched = false;
+-
+- common->bitrate_mask[band] = sta->supp_rates[band];
+-
+- rate_bitmap = (common->fixedrate_mask[band] & sta->supp_rates[band]);
+-
+- if (rate_bitmap & 0xfff) {
+- /* Find out the min rate */
+- for (ii = 0; ii < ARRAY_SIZE(rsi_rates); ii++) {
+- if (rate_bitmap & BIT(ii)) {
+- common->min_rate = rsi_rates[ii].hw_value;
+- matched = true;
+- break;
+- }
+- }
+- }
+-
+- common->vif_info[0].is_ht = sta->ht_cap.ht_supported;
+-
+- if ((common->vif_info[0].is_ht) && (rate_bitmap >> 12)) {
+- for (ii = 0; ii < ARRAY_SIZE(rsi_mcsrates); ii++) {
+- if ((rate_bitmap >> 12) & BIT(ii)) {
+- common->min_rate = rsi_mcsrates[ii];
+- matched = true;
+- break;
+- }
+- }
+- }
+-
+- if (!matched)
+- common->min_rate = 0xffff;
+-}
+-
+ /**
+ * rsi_mac80211_sta_add() - This function notifies driver about a peer getting
+ * connected.
+@@ -1516,9 +1487,9 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw,
+
+ if ((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT)) {
+- rsi_set_min_rate(hw, sta, common);
++ common->bitrate_mask[common->band] = sta->supp_rates[common->band];
++ common->vif_info[0].is_ht = sta->ht_cap.ht_supported;
+ if (sta->ht_cap.ht_supported) {
+- common->vif_info[0].is_ht = true;
+ common->bitrate_mask[NL80211_BAND_2GHZ] =
+ sta->supp_rates[NL80211_BAND_2GHZ];
+ if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20) ||
+@@ -1592,7 +1563,6 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw,
+ bss->qos = sta->wme;
+ common->bitrate_mask[NL80211_BAND_2GHZ] = 0;
+ common->bitrate_mask[NL80211_BAND_5GHZ] = 0;
+- common->min_rate = 0xffff;
+ common->vif_info[0].is_ht = false;
+ common->vif_info[0].sgi = false;
+ common->vif_info[0].seq_start = 0;
+diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
+index d98483298555c..5d1490fc32db4 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_main.c
++++ b/drivers/net/wireless/rsi/rsi_91x_main.c
+@@ -23,6 +23,7 @@
+ #include "rsi_common.h"
+ #include "rsi_coex.h"
+ #include "rsi_hal.h"
++#include "rsi_usb.h"
+
+ u32 rsi_zone_enabled = /* INFO_ZONE |
+ INIT_ZONE |
+@@ -168,6 +169,9 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
+ frame_desc = &rx_pkt[index];
+ actual_length = *(u16 *)&frame_desc[0];
+ offset = *(u16 *)&frame_desc[2];
++ if (!rcv_pkt_len && offset >
++ RSI_MAX_RX_USB_PKT_SIZE - FRAME_DESC_SZ)
++ goto fail;
+
+ queueno = rsi_get_queueno(frame_desc, offset);
+ length = rsi_get_length(frame_desc, offset);
+@@ -211,9 +215,10 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
+ bt_pkt_type = frame_desc[offset + BT_RX_PKT_TYPE_OFST];
+ if (bt_pkt_type == BT_CARD_READY_IND) {
+ rsi_dbg(INFO_ZONE, "BT Card ready recvd\n");
+- if (rsi_bt_ops.attach(common, &g_proto_ops))
+- rsi_dbg(ERR_ZONE,
+- "Failed to attach BT module\n");
++ if (common->fsm_state == FSM_MAC_INIT_DONE)
++ rsi_attach_bt(common);
++ else
++ common->bt_defer_attach = true;
+ } else {
+ if (common->bt_adapter)
+ rsi_bt_ops.recv_pkt(common->bt_adapter,
+@@ -278,6 +283,15 @@ void rsi_set_bt_context(void *priv, void *bt_context)
+ }
+ #endif
+
++void rsi_attach_bt(struct rsi_common *common)
++{
++#ifdef CONFIG_RSI_COEX
++ if (rsi_bt_ops.attach(common, &g_proto_ops))
++ rsi_dbg(ERR_ZONE,
++ "Failed to attach BT module\n");
++#endif
++}
++
+ /**
+ * rsi_91x_init() - This function initializes os interface operations.
+ * @oper_mode: One of DEV_OPMODE_*.
+@@ -359,6 +373,7 @@ struct rsi_hw *rsi_91x_init(u16 oper_mode)
+ if (common->coex_mode > 1) {
+ if (rsi_coex_attach(common)) {
+ rsi_dbg(ERR_ZONE, "Failed to init coex module\n");
++ rsi_kill_thread(&common->tx_thread);
+ goto err;
+ }
+ }
+diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+index 891fd5f0fa765..0848f7a7e76c6 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
++++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+@@ -276,7 +276,7 @@ static void rsi_set_default_parameters(struct rsi_common *common)
+ common->channel_width = BW_20MHZ;
+ common->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
+ common->channel = 1;
+- common->min_rate = 0xffff;
++ memset(&common->rate_config, 0, sizeof(common->rate_config));
+ common->fsm_state = FSM_CARD_NOT_READY;
+ common->iface_down = true;
+ common->endpoint = EP_2GHZ_20MHZ;
+@@ -1314,7 +1314,7 @@ static int rsi_send_auto_rate_request(struct rsi_common *common,
+ u8 band = hw->conf.chandef.chan->band;
+ u8 num_supported_rates = 0;
+ u8 rate_table_offset, rate_offset = 0;
+- u32 rate_bitmap;
++ u32 rate_bitmap, configured_rates;
+ u16 *selected_rates, min_rate;
+ bool is_ht = false, is_sgi = false;
+ u16 frame_len = sizeof(struct rsi_auto_rate);
+@@ -1364,6 +1364,10 @@ static int rsi_send_auto_rate_request(struct rsi_common *common,
+ is_sgi = true;
+ }
+
++ /* Limit to any rates administratively configured by cfg80211 */
++ configured_rates = common->rate_config[band].configured_mask ?: 0xffffffff;
++ rate_bitmap &= configured_rates;
++
+ if (band == NL80211_BAND_2GHZ) {
+ if ((rate_bitmap == 0) && (is_ht))
+ min_rate = RSI_RATE_MCS0;
+@@ -1389,10 +1393,13 @@ static int rsi_send_auto_rate_request(struct rsi_common *common,
+ num_supported_rates = jj;
+
+ if (is_ht) {
+- for (ii = 0; ii < ARRAY_SIZE(mcs); ii++)
+- selected_rates[jj++] = mcs[ii];
+- num_supported_rates += ARRAY_SIZE(mcs);
+- rate_offset += ARRAY_SIZE(mcs);
++ for (ii = 0; ii < ARRAY_SIZE(mcs); ii++) {
++ if (configured_rates & BIT(ii + ARRAY_SIZE(rsi_rates))) {
++ selected_rates[jj++] = mcs[ii];
++ num_supported_rates++;
++ rate_offset++;
++ }
++ }
+ }
+
+ sort(selected_rates, jj, sizeof(u16), &rsi_compare, NULL);
+@@ -1482,7 +1489,7 @@ void rsi_inform_bss_status(struct rsi_common *common,
+ qos_enable,
+ aid, sta_id,
+ vif);
+- if (common->min_rate == 0xffff)
++ if (!common->rate_config[common->band].fixed_enabled)
+ rsi_send_auto_rate_request(common, sta, sta_id, vif);
+ if (opmode == RSI_OPMODE_STA &&
+ !(assoc_cap & WLAN_CAPABILITY_PRIVACY) &&
+@@ -2071,6 +2078,9 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common,
+ if (common->reinit_hw) {
+ complete(&common->wlan_init_completion);
+ } else {
++ if (common->bt_defer_attach)
++ rsi_attach_bt(common);
++
+ return rsi_mac80211_attach(common);
+ }
+ }
+diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
+index e0c502bc42707..670de56c69a26 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
++++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
+@@ -24,10 +24,7 @@
+ /* Default operating mode is wlan STA + BT */
+ static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+ module_param(dev_oper_mode, ushort, 0444);
+-MODULE_PARM_DESC(dev_oper_mode,
+- "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+- "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+- "6[AP + BT classic], 14[AP + BT classic + BT LE]");
++MODULE_PARM_DESC(dev_oper_mode, DEV_OPMODE_PARAM_DESC);
+
+ /**
+ * rsi_sdio_set_cmd52_arg() - This function prepares cmd 52 read/write arg.
+@@ -1466,10 +1463,8 @@ static void rsi_shutdown(struct device *dev)
+
+ rsi_dbg(ERR_ZONE, "SDIO Bus shutdown =====>\n");
+
+- if (hw) {
+- struct cfg80211_wowlan *wowlan = hw->wiphy->wowlan_config;
+-
+- if (rsi_config_wowlan(adapter, wowlan))
++ if (hw && hw->wiphy && hw->wiphy->wowlan_config) {
++ if (rsi_config_wowlan(adapter, hw->wiphy->wowlan_config))
+ rsi_dbg(ERR_ZONE, "Failed to configure WoWLAN\n");
+ }
+
+@@ -1484,9 +1479,6 @@ static void rsi_shutdown(struct device *dev)
+ if (sdev->write_fail)
+ rsi_dbg(INFO_ZONE, "###### Device is not ready #######\n");
+
+- if (rsi_set_sdio_pm_caps(adapter))
+- rsi_dbg(INFO_ZONE, "Setting power management caps failed\n");
+-
+ rsi_dbg(INFO_ZONE, "***** RSI module shut down *****\n");
+ }
+
+diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
+index 416976f098882..66fe386ec9cc6 100644
+--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
++++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
+@@ -25,10 +25,7 @@
+ /* Default operating mode is wlan STA + BT */
+ static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+ module_param(dev_oper_mode, ushort, 0444);
+-MODULE_PARM_DESC(dev_oper_mode,
+- "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+- "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+- "6[AP + BT classic], 14[AP + BT classic + BT LE]");
++MODULE_PARM_DESC(dev_oper_mode, DEV_OPMODE_PARAM_DESC);
+
+ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t flags);
+
+@@ -61,7 +58,7 @@ static int rsi_usb_card_write(struct rsi_hw *adapter,
+ (void *)seg,
+ (int)len,
+ &transfer,
+- HZ * 5);
++ USB_CTRL_SET_TIMEOUT);
+
+ if (status < 0) {
+ rsi_dbg(ERR_ZONE,
+@@ -272,8 +269,12 @@ static void rsi_rx_done_handler(struct urb *urb)
+ struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)rx_cb->data;
+ int status = -EINVAL;
+
++ if (!rx_cb->rx_skb)
++ return;
++
+ if (urb->status) {
+ dev_kfree_skb(rx_cb->rx_skb);
++ rx_cb->rx_skb = NULL;
+ return;
+ }
+
+@@ -297,8 +298,10 @@ out:
+ if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num, GFP_ATOMIC))
+ rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__);
+
+- if (status)
++ if (status) {
+ dev_kfree_skb(rx_cb->rx_skb);
++ rx_cb->rx_skb = NULL;
++ }
+ }
+
+ static void rsi_rx_urb_kill(struct rsi_hw *adapter, u8 ep_num)
+@@ -327,7 +330,6 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t mem_flags)
+ struct sk_buff *skb;
+ u8 dword_align_bytes = 0;
+
+-#define RSI_MAX_RX_USB_PKT_SIZE 3000
+ skb = dev_alloc_skb(RSI_MAX_RX_USB_PKT_SIZE);
+ if (!skb)
+ return -ENOMEM;
+diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
+index d044a440fa080..5b07262a97408 100644
+--- a/drivers/net/wireless/rsi/rsi_hal.h
++++ b/drivers/net/wireless/rsi/rsi_hal.h
+@@ -28,6 +28,17 @@
+ #define DEV_OPMODE_AP_BT 6
+ #define DEV_OPMODE_AP_BT_DUAL 14
+
++#define DEV_OPMODE_PARAM_DESC \
++ __stringify(DEV_OPMODE_WIFI_ALONE) "[Wi-Fi alone], " \
++ __stringify(DEV_OPMODE_BT_ALONE) "[BT classic alone], " \
++ __stringify(DEV_OPMODE_BT_LE_ALONE) "[BT LE alone], " \
++ __stringify(DEV_OPMODE_BT_DUAL) "[BT classic + BT LE alone], " \
++ __stringify(DEV_OPMODE_STA_BT) "[Wi-Fi STA + BT classic], " \
++ __stringify(DEV_OPMODE_STA_BT_LE) "[Wi-Fi STA + BT LE], " \
++ __stringify(DEV_OPMODE_STA_BT_DUAL) "[Wi-Fi STA + BT classic + BT LE], " \
++ __stringify(DEV_OPMODE_AP_BT) "[Wi-Fi AP + BT classic], " \
++ __stringify(DEV_OPMODE_AP_BT_DUAL) "[Wi-Fi AP + BT classic + BT LE]"
++
+ #define FLASH_WRITE_CHUNK_SIZE (4 * 1024)
+ #define FLASH_SECTOR_SIZE (4 * 1024)
+
+diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
+index 0f535850a3836..dcf8fb40698b7 100644
+--- a/drivers/net/wireless/rsi/rsi_main.h
++++ b/drivers/net/wireless/rsi/rsi_main.h
+@@ -61,6 +61,7 @@ enum RSI_FSM_STATES {
+ extern u32 rsi_zone_enabled;
+ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
+
++#define RSI_MAX_BANDS 2
+ #define RSI_MAX_VIFS 3
+ #define NUM_EDCA_QUEUES 4
+ #define IEEE80211_ADDR_LEN 6
+@@ -139,6 +140,7 @@ struct skb_info {
+ u8 internal_hdr_size;
+ struct ieee80211_vif *vif;
+ u8 vap_id;
++ bool have_key;
+ };
+
+ enum edca_queue {
+@@ -229,6 +231,12 @@ struct rsi_9116_features {
+ u32 ps_options;
+ };
+
++struct rsi_rate_config {
++ u32 configured_mask; /* configured by mac80211 bits 0-11=legacy 12+ mcs */
++ u16 fixed_hw_rate;
++ bool fixed_enabled;
++};
++
+ struct rsi_common {
+ struct rsi_hw *priv;
+ struct vif_priv vif_info[RSI_MAX_VIFS];
+@@ -254,8 +262,8 @@ struct rsi_common {
+ u8 channel_width;
+
+ u16 rts_threshold;
+- u16 bitrate_mask[2];
+- u32 fixedrate_mask[2];
++ u32 bitrate_mask[RSI_MAX_BANDS];
++ struct rsi_rate_config rate_config[RSI_MAX_BANDS];
+
+ u8 rf_reset;
+ struct transmit_q_stats tx_stats;
+@@ -276,7 +284,6 @@ struct rsi_common {
+ u8 mac_id;
+ u8 radio_id;
+ u16 rate_pwr[20];
+- u16 min_rate;
+
+ /* WMM algo related */
+ u8 selected_qnum;
+@@ -320,6 +327,7 @@ struct rsi_common {
+ struct ieee80211_vif *roc_vif;
+
+ bool eapol4_confirm;
++ bool bt_defer_attach;
+ void *bt_adapter;
+
+ struct cfg80211_scan_request *hwscan;
+@@ -401,5 +409,6 @@ struct rsi_host_intf_ops {
+
+ enum rsi_host_intf rsi_get_host_intf(void *priv);
+ void rsi_set_bt_context(void *priv, void *bt_context);
++void rsi_attach_bt(struct rsi_common *common);
+
+ #endif
+diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h
+index 254d19b664123..961851748bc4c 100644
+--- a/drivers/net/wireless/rsi/rsi_usb.h
++++ b/drivers/net/wireless/rsi/rsi_usb.h
+@@ -44,6 +44,8 @@
+ #define RSI_USB_BUF_SIZE 4096
+ #define RSI_USB_CTRL_BUF_SIZE 0x04
+
++#define RSI_MAX_RX_USB_PKT_SIZE 3000
++
+ struct rx_usb_ctrl_block {
+ u8 *data;
+ struct urb *rx_urb;
+diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
+index 672f5d5f3f2c7..4c408fd7c1594 100644
+--- a/drivers/net/wireless/wl3501_cs.c
++++ b/drivers/net/wireless/wl3501_cs.c
+@@ -1328,7 +1328,7 @@ static netdev_tx_t wl3501_hard_start_xmit(struct sk_buff *skb,
+ } else {
+ ++dev->stats.tx_packets;
+ dev->stats.tx_bytes += skb->len;
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+
+ if (this->tx_buffer_cnt < 2)
+ netif_stop_queue(dev);
+@@ -1862,6 +1862,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
+ {
+ struct net_device *dev;
+ struct wl3501_card *this;
++ int ret;
+
+ /* The io structure describes IO port mapping */
+ p_dev->resource[0]->end = 16;
+@@ -1873,8 +1874,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
+
+ dev = alloc_etherdev(sizeof(struct wl3501_card));
+ if (!dev)
+- goto out_link;
+-
++ return -ENOMEM;
+
+ dev->netdev_ops = &wl3501_netdev_ops;
+ dev->watchdog_timeo = 5 * HZ;
+@@ -1887,9 +1887,15 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
+ netif_stop_queue(dev);
+ p_dev->priv = dev;
+
+- return wl3501_config(p_dev);
+-out_link:
+- return -ENOMEM;
++ ret = wl3501_config(p_dev);
++ if (ret)
++ goto out_free_etherdev;
++
++ return 0;
++
++out_free_etherdev:
++ free_netdev(dev);
++ return ret;
+ }
+
+ static int wl3501_config(struct pcmcia_device *link)
+@@ -1945,8 +1951,7 @@ static int wl3501_config(struct pcmcia_device *link)
+ goto failed;
+ }
+
+- for (i = 0; i < 6; i++)
+- dev->dev_addr[i] = ((char *)&this->mac_addr)[i];
++ eth_hw_addr_set(dev, this->mac_addr);
+
+ /* print probe information */
+ printk(KERN_INFO "%s: wl3501 @ 0x%3.3x, IRQ %d, "
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
+index bdb2d32cdb6d7..e323fe1ae5380 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
+@@ -830,8 +830,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+ ipc_mux->ul_data_pend_bytes);
+
+ /* Reset the skb settings. */
+- skb->tail = 0;
+- skb->len = 0;
++ skb_trim(skb, 0);
+
+ /* Add the consumed ADB to the free list. */
+ skb_queue_tail((&ipc_mux->ul_adb.free_list), skb);
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+index 2fe88b8be3481..8b4222b137d14 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+@@ -232,6 +232,7 @@ static void ipc_pcie_config_init(struct iosm_pcie *ipc_pcie)
+ */
+ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev)
+ {
++ enum ipc_pcie_sleep_state sleep_state = IPC_PCIE_D0L12;
+ union acpi_object *object;
+ acpi_handle handle_acpi;
+
+@@ -242,12 +243,16 @@ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev)
+ }
+
+ object = acpi_evaluate_dsm(handle_acpi, &wwan_acpi_guid, 0, 3, NULL);
++ if (!object)
++ goto default_ret;
++
++ if (object->integer.value == 3)
++ sleep_state = IPC_PCIE_D3L2;
+
+- if (object && object->integer.value == 3)
+- return IPC_PCIE_D3L2;
++ ACPI_FREE(object);
+
+ default_ret:
+- return IPC_PCIE_D0L12;
++ return sleep_state;
+ }
+
+ static int ipc_pcie_probe(struct pci_dev *pci,
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
+index 9b3a6d86ece7a..289397c4ea6ce 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h
++++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
+@@ -122,7 +122,7 @@ struct iosm_protocol {
+ struct iosm_imem *imem;
+ struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES];
+ struct device *dev;
+- phys_addr_t phy_ap_shm;
++ dma_addr_t phy_ap_shm;
+ u32 old_msg_tail;
+ };
+
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
+index c6b032f95d2e4..4627847c6daab 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
+@@ -372,8 +372,6 @@ bool ipc_protocol_dl_td_prepare(struct iosm_protocol *ipc_protocol,
+ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol,
+ struct ipc_pipe *pipe)
+ {
+- u32 tail =
+- le32_to_cpu(ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr]);
+ struct ipc_protocol_td *p_td;
+ struct sk_buff *skb;
+
+@@ -403,14 +401,6 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol,
+ goto ret;
+ }
+
+- if (!IPC_CB(skb)) {
+- dev_err(ipc_protocol->dev, "pipe# %d, tail: %d skb_cb is NULL",
+- pipe->pipe_nr, tail);
+- ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+- skb = NULL;
+- goto ret;
+- }
+-
+ if (p_td->buffer.address != IPC_CB(skb)->mapping) {
+ dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p",
+ (unsigned long long)p_td->buffer.address, skb->data);
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+index b571d9cedba49..3449f877e19f0 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+@@ -167,6 +167,7 @@ static void ipc_wwan_setup(struct net_device *iosm_dev)
+ iosm_dev->max_mtu = ETH_MAX_MTU;
+
+ iosm_dev->flags = IFF_POINTOPOINT | IFF_NOARP;
++ iosm_dev->needs_free_netdev = true;
+
+ iosm_dev->netdev_ops = &ipc_inm_ops;
+ }
+@@ -322,15 +323,16 @@ struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev)
+ ipc_wwan->dev = dev;
+ ipc_wwan->ipc_imem = ipc_imem;
+
++ mutex_init(&ipc_wwan->if_mutex);
++
+ /* WWAN core will create a netdev for the default IP MUX channel */
+ if (wwan_register_ops(ipc_wwan->dev, &iosm_wwan_ops, ipc_wwan,
+ IP_MUX_SESSION_DEFAULT)) {
++ mutex_destroy(&ipc_wwan->if_mutex);
+ kfree(ipc_wwan);
+ return NULL;
+ }
+
+- mutex_init(&ipc_wwan->if_mutex);
+-
+ return ipc_wwan;
+ }
+
+diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
+index 71bf9b4f769f5..ef70bb7c88ad6 100644
+--- a/drivers/net/wwan/mhi_wwan_mbim.c
++++ b/drivers/net/wwan/mhi_wwan_mbim.c
+@@ -385,13 +385,13 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
+ int err;
+
+ while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+- struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
++ struct sk_buff *skb = alloc_skb(mbim->mru, GFP_KERNEL);
+
+ if (unlikely(!skb))
+ break;
+
+ err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
+- MHI_DEFAULT_MRU, MHI_EOT);
++ mbim->mru, MHI_EOT);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ break;
+@@ -582,6 +582,7 @@ static void mhi_mbim_setup(struct net_device *ndev)
+ ndev->min_mtu = ETH_MIN_MTU;
+ ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom;
+ ndev->tx_queue_len = 1000;
++ ndev->needs_free_netdev = true;
+ }
+
+ static const struct wwan_ops mhi_mbim_wwan_ops = {
+diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c
+index 5b62cf3b3c422..a4230a7376dfd 100644
+--- a/drivers/net/wwan/wwan_hwsim.c
++++ b/drivers/net/wwan/wwan_hwsim.c
+@@ -310,7 +310,7 @@ err_unreg_dev:
+ return ERR_PTR(err);
+
+ err_free_dev:
+- kfree(dev);
++ put_device(&dev->dev);
+
+ return ERR_PTR(err);
+ }
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 4a16d6e33c093..f315bddacba3f 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -48,7 +48,6 @@
+ #include <linux/debugfs.h>
+
+ typedef unsigned int pending_ring_idx_t;
+-#define INVALID_PENDING_RING_IDX (~0U)
+
+ struct pending_tx_info {
+ struct xen_netif_tx_request req; /* tx request */
+@@ -82,8 +81,6 @@ struct xenvif_rx_meta {
+ /* Discriminate from any valid pending_idx value. */
+ #define INVALID_PENDING_IDX 0xFFFF
+
+-#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE
+-
+ #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE
+
+ /* The maximum number of frags is derived from the size of a grant (same
+@@ -169,7 +166,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+
+- struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
++ struct gnttab_copy tx_copy_ops[2 * MAX_PENDING_REQS];
+ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ /* passed to gnttab_[un]map_refs with pages under (un)mapping */
+@@ -203,6 +200,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
+ unsigned int rx_queue_max;
+ unsigned int rx_queue_len;
+ unsigned long last_rx_time;
++ unsigned int rx_slots_needed;
+ bool stalled;
+
+ struct xenvif_copy_state rx_copy;
+@@ -366,11 +364,6 @@ void xenvif_free(struct xenvif *vif);
+ int xenvif_xenbus_init(void);
+ void xenvif_xenbus_fini(void);
+
+-int xenvif_schedulable(struct xenvif *vif);
+-
+-int xenvif_queue_stopped(struct xenvif_queue *queue);
+-void xenvif_wake_queue(struct xenvif_queue *queue);
+-
+ /* (Un)Map communication rings. */
+ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue);
+ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
+@@ -393,8 +386,7 @@ int xenvif_dealloc_kthread(void *data);
+ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+
+ bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
+-void xenvif_rx_action(struct xenvif_queue *queue);
+-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+
+ void xenvif_carrier_on(struct xenvif *vif);
+
+@@ -402,9 +394,6 @@ void xenvif_carrier_on(struct xenvif *vif);
+ void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
+ bool zerocopy_success);
+
+-/* Unmap a pending page and release it back to the guest */
+-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
+-
+ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
+ {
+ return MAX_PENDING_REQS -
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index c58996c1e2309..e1a5610b1747e 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -70,7 +70,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+ wake_up(&queue->dealloc_wq);
+ }
+
+-int xenvif_schedulable(struct xenvif *vif)
++static int xenvif_schedulable(struct xenvif *vif)
+ {
+ return netif_running(vif->dev) &&
+ test_bit(VIF_STATUS_CONNECTED, &vif->status) &&
+@@ -178,20 +178,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
+ return IRQ_HANDLED;
+ }
+
+-int xenvif_queue_stopped(struct xenvif_queue *queue)
+-{
+- struct net_device *dev = queue->vif->dev;
+- unsigned int id = queue->id;
+- return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id));
+-}
+-
+-void xenvif_wake_queue(struct xenvif_queue *queue)
+-{
+- struct net_device *dev = queue->vif->dev;
+- unsigned int id = queue->id;
+- netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
+-}
+-
+ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+ {
+@@ -269,14 +255,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
+ skb_clear_hash(skb);
+
+- xenvif_rx_queue_tail(queue, skb);
++ if (!xenvif_rx_queue_tail(queue, skb))
++ goto drop;
++
+ xenvif_kick_thread(queue);
+
+ return NETDEV_TX_OK;
+
+ drop:
+ vif->dev->stats.tx_dropped++;
+- dev_kfree_skb(skb);
++ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index 32d5bc4919d8c..5017033c705ae 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue,
+ s8 st);
+ static void push_tx_responses(struct xenvif_queue *queue);
+
++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
++
+ static inline int tx_work_todo(struct xenvif_queue *queue);
+
+ static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
+@@ -330,10 +332,14 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
+
+
+ struct xenvif_tx_cb {
+- u16 pending_idx;
++ u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
++ u8 copy_count;
++ u32 split_mask;
+ };
+
+ #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
++#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
++#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
+
+ static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
+ u16 pending_idx,
+@@ -356,6 +362,8 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
+ struct sk_buff *skb =
+ alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
+ GFP_ATOMIC | __GFP_NOWARN);
++
++ BUILD_BUG_ON(sizeof(*XENVIF_TX_CB(skb)) > sizeof(skb->cb));
+ if (unlikely(skb == NULL))
+ return NULL;
+
+@@ -368,39 +376,112 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
+ return skb;
+ }
+
+-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
+- struct sk_buff *skb,
+- struct xen_netif_tx_request *txp,
+- struct gnttab_map_grant_ref *gop,
+- unsigned int frag_overflow,
+- struct sk_buff *nskb)
++static void xenvif_get_requests(struct xenvif_queue *queue,
++ struct sk_buff *skb,
++ struct xen_netif_tx_request *first,
++ struct xen_netif_tx_request *txfrags,
++ unsigned *copy_ops,
++ unsigned *map_ops,
++ unsigned int frag_overflow,
++ struct sk_buff *nskb,
++ unsigned int extra_count,
++ unsigned int data_len)
+ {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+- int start;
++ u16 pending_idx;
+ pending_ring_idx_t index;
+ unsigned int nr_slots;
++ struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops;
++ struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
++ struct xen_netif_tx_request *txp = first;
++
++ nr_slots = shinfo->nr_frags + frag_overflow + 1;
++
++ copy_count(skb) = 0;
++ XENVIF_TX_CB(skb)->split_mask = 0;
++
++ /* Create copy ops for exactly data_len bytes into the skb head. */
++ __skb_put(skb, data_len);
++ while (data_len > 0) {
++ int amount = data_len > txp->size ? txp->size : data_len;
++ bool split = false;
++
++ cop->source.u.ref = txp->gref;
++ cop->source.domid = queue->vif->domid;
++ cop->source.offset = txp->offset;
++
++ cop->dest.domid = DOMID_SELF;
++ cop->dest.offset = (offset_in_page(skb->data +
++ skb_headlen(skb) -
++ data_len)) & ~XEN_PAGE_MASK;
++ cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
++ - data_len);
++
++ /* Don't cross local page boundary! */
++ if (cop->dest.offset + amount > XEN_PAGE_SIZE) {
++ amount = XEN_PAGE_SIZE - cop->dest.offset;
++ XENVIF_TX_CB(skb)->split_mask |= 1U << copy_count(skb);
++ split = true;
++ }
+
+- nr_slots = shinfo->nr_frags;
++ cop->len = amount;
++ cop->flags = GNTCOPY_source_gref;
++
++ index = pending_index(queue->pending_cons);
++ pending_idx = queue->pending_ring[index];
++ callback_param(queue, pending_idx).ctx = NULL;
++ copy_pending_idx(skb, copy_count(skb)) = pending_idx;
++ if (!split)
++ copy_count(skb)++;
+
+- /* Skip first skb fragment if it is on same page as header fragment. */
+- start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
++ cop++;
++ data_len -= amount;
++
++ if (amount == txp->size) {
++ /* The copy op covered the full tx_request */
++
++ memcpy(&queue->pending_tx_info[pending_idx].req,
++ txp, sizeof(*txp));
++ queue->pending_tx_info[pending_idx].extra_count =
++ (txp == first) ? extra_count : 0;
++
++ if (txp == first)
++ txp = txfrags;
++ else
++ txp++;
++ queue->pending_cons++;
++ nr_slots--;
++ } else {
++ /* The copy op partially covered the tx_request.
++ * The remainder will be mapped or copied in the next
++ * iteration.
++ */
++ txp->offset += amount;
++ txp->size -= amount;
++ }
++ }
+
+- for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
+- shinfo->nr_frags++, txp++, gop++) {
++ for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
++ shinfo->nr_frags++, gop++, nr_slots--) {
+ index = pending_index(queue->pending_cons++);
+ pending_idx = queue->pending_ring[index];
+- xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
++ xenvif_tx_create_map_op(queue, pending_idx, txp,
++ txp == first ? extra_count : 0, gop);
+ frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
++
++ if (txp == first)
++ txp = txfrags;
++ else
++ txp++;
+ }
+
+- if (frag_overflow) {
++ if (nr_slots > 0) {
+
+ shinfo = skb_shinfo(nskb);
+ frags = shinfo->frags;
+
+- for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
++ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
+ shinfo->nr_frags++, txp++, gop++) {
+ index = pending_index(queue->pending_cons++);
+ pending_idx = queue->pending_ring[index];
+@@ -411,9 +492,15 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que
+ }
+
+ skb_shinfo(skb)->frag_list = nskb;
++ } else if (nskb) {
++ /* A frag_list skb was allocated but it is no longer needed
++ * because enough slots were converted to copy ops above.
++ */
++ kfree_skb(nskb);
+ }
+
+- return gop;
++ (*copy_ops) = cop - queue->tx_copy_ops;
++ (*map_ops) = gop - queue->tx_map_ops;
+ }
+
+ static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
+@@ -449,7 +536,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+ struct gnttab_copy **gopp_copy)
+ {
+ struct gnttab_map_grant_ref *gop_map = *gopp_map;
+- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
++ u16 pending_idx;
+ /* This always points to the shinfo of the skb being checked, which
+ * could be either the first or the one on the frag_list
+ */
+@@ -460,24 +547,44 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+ struct skb_shared_info *first_shinfo = NULL;
+ int nr_frags = shinfo->nr_frags;
+ const bool sharedslot = nr_frags &&
+- frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
+- int i, err;
++ frag_get_pending_idx(&shinfo->frags[0]) ==
++ copy_pending_idx(skb, copy_count(skb) - 1);
++ int i, err = 0;
+
+- /* Check status of header. */
+- err = (*gopp_copy)->status;
+- if (unlikely(err)) {
+- if (net_ratelimit())
+- netdev_dbg(queue->vif->dev,
+- "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
+- (*gopp_copy)->status,
+- pending_idx,
+- (*gopp_copy)->source.u.ref);
+- /* The first frag might still have this slot mapped */
+- if (!sharedslot)
+- xenvif_idx_release(queue, pending_idx,
+- XEN_NETIF_RSP_ERROR);
++ for (i = 0; i < copy_count(skb); i++) {
++ int newerr;
++
++ /* Check status of header. */
++ pending_idx = copy_pending_idx(skb, i);
++
++ newerr = (*gopp_copy)->status;
++
++ /* Split copies need to be handled together. */
++ if (XENVIF_TX_CB(skb)->split_mask & (1U << i)) {
++ (*gopp_copy)++;
++ if (!newerr)
++ newerr = (*gopp_copy)->status;
++ }
++ if (likely(!newerr)) {
++ /* The first frag might still have this slot mapped */
++ if (i < copy_count(skb) - 1 || !sharedslot)
++ xenvif_idx_release(queue, pending_idx,
++ XEN_NETIF_RSP_OKAY);
++ } else {
++ err = newerr;
++ if (net_ratelimit())
++ netdev_dbg(queue->vif->dev,
++ "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
++ (*gopp_copy)->status,
++ pending_idx,
++ (*gopp_copy)->source.u.ref);
++ /* The first frag might still have this slot mapped */
++ if (i < copy_count(skb) - 1 || !sharedslot)
++ xenvif_idx_release(queue, pending_idx,
++ XEN_NETIF_RSP_ERROR);
++ }
++ (*gopp_copy)++;
+ }
+- (*gopp_copy)++;
+
+ check_frags:
+ for (i = 0; i < nr_frags; i++, gop_map++) {
+@@ -524,14 +631,6 @@ check_frags:
+ if (err)
+ continue;
+
+- /* First error: if the header haven't shared a slot with the
+- * first frag, release it as well.
+- */
+- if (!sharedslot)
+- xenvif_idx_release(queue,
+- XENVIF_TX_CB(skb)->pending_idx,
+- XEN_NETIF_RSP_OKAY);
+-
+ /* Invalidate preceding fragments of this skb. */
+ for (j = 0; j < i; j++) {
+ pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
+@@ -801,7 +900,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ unsigned *copy_ops,
+ unsigned *map_ops)
+ {
+- struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
+ struct sk_buff *skb, *nskb;
+ int ret;
+ unsigned int frag_overflow;
+@@ -883,8 +981,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ continue;
+ }
+
++ data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
++ XEN_NETBACK_TX_COPY_LEN : txreq.size;
++
+ ret = xenvif_count_requests(queue, &txreq, extra_count,
+ txfrags, work_to_do);
++
+ if (unlikely(ret < 0))
+ break;
+
+@@ -899,10 +1001,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+
+ /* No crossing a page as the payload mustn't fragment. */
+ if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
+- netdev_err(queue->vif->dev,
+- "txreq.offset: %u, size: %u, end: %lu\n",
+- txreq.offset, txreq.size,
+- (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size);
++ netdev_err(queue->vif->dev, "Cross page boundary, txreq.offset: %u, size: %u\n",
++ txreq.offset, txreq.size);
+ xenvif_fatal_tx_err(queue->vif);
+ break;
+ }
+@@ -910,9 +1010,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ index = pending_index(queue->pending_cons);
+ pending_idx = queue->pending_ring[index];
+
+- data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
+- ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
+- XEN_NETBACK_TX_COPY_LEN : txreq.size;
++ if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size)
++ data_len = txreq.size;
+
+ skb = xenvif_alloc_skb(data_len);
+ if (unlikely(skb == NULL)) {
+@@ -923,8 +1022,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ }
+
+ skb_shinfo(skb)->nr_frags = ret;
+- if (data_len < txreq.size)
+- skb_shinfo(skb)->nr_frags++;
+ /* At this point shinfo->nr_frags is in fact the number of
+ * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
+ */
+@@ -986,54 +1083,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
+ type);
+ }
+
+- XENVIF_TX_CB(skb)->pending_idx = pending_idx;
+-
+- __skb_put(skb, data_len);
+- queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
+- queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
+- queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
+-
+- queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
+- virt_to_gfn(skb->data);
+- queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
+- queue->tx_copy_ops[*copy_ops].dest.offset =
+- offset_in_page(skb->data) & ~XEN_PAGE_MASK;
+-
+- queue->tx_copy_ops[*copy_ops].len = data_len;
+- queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+-
+- (*copy_ops)++;
+-
+- if (data_len < txreq.size) {
+- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+- pending_idx);
+- xenvif_tx_create_map_op(queue, pending_idx, &txreq,
+- extra_count, gop);
+- gop++;
+- } else {
+- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+- INVALID_PENDING_IDX);
+- memcpy(&queue->pending_tx_info[pending_idx].req,
+- &txreq, sizeof(txreq));
+- queue->pending_tx_info[pending_idx].extra_count =
+- extra_count;
+- }
+-
+- queue->pending_cons++;
+-
+- gop = xenvif_get_requests(queue, skb, txfrags, gop,
+- frag_overflow, nskb);
++ xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops,
++ map_ops, frag_overflow, nskb, extra_count,
++ data_len);
+
+ __skb_queue_tail(&queue->tx_queue, skb);
+
+ queue->tx.req_cons = idx;
+
+- if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
++ if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) ||
+ (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
+ break;
+ }
+
+- (*map_ops) = gop - queue->tx_map_ops;
+ return;
+ }
+
+@@ -1112,9 +1174,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
+ while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
+ struct xen_netif_tx_request *txp;
+ u16 pending_idx;
+- unsigned data_len;
+
+- pending_idx = XENVIF_TX_CB(skb)->pending_idx;
++ pending_idx = copy_pending_idx(skb, 0);
+ txp = &queue->pending_tx_info[pending_idx].req;
+
+ /* Check the remap error code. */
+@@ -1133,18 +1194,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
+ continue;
+ }
+
+- data_len = skb->len;
+- callback_param(queue, pending_idx).ctx = NULL;
+- if (data_len < txp->size) {
+- /* Append the packet payload as a fragment. */
+- txp->offset += data_len;
+- txp->size -= data_len;
+- } else {
+- /* Schedule a response immediately. */
+- xenvif_idx_release(queue, pending_idx,
+- XEN_NETIF_RSP_OKAY);
+- }
+-
+ if (txp->flags & XEN_NETTXF_csum_blank)
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ else if (txp->flags & XEN_NETTXF_data_validated)
+@@ -1331,7 +1380,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
+ /* Called after netfront has transmitted */
+ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
+ {
+- unsigned nr_mops, nr_cops = 0;
++ unsigned nr_mops = 0, nr_cops = 0;
+ int work_done, ret;
+
+ if (unlikely(!tx_work_todo(queue)))
+@@ -1418,7 +1467,7 @@ static void push_tx_responses(struct xenvif_queue *queue)
+ notify_remote_via_irq(queue->tx_irq);
+ }
+
+-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
+ {
+ int ret;
+ struct gnttab_unmap_grant_ref tx_unmap_op;
+diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
+index accc991d153f7..0ba754ebc5baa 100644
+--- a/drivers/net/xen-netback/rx.c
++++ b/drivers/net/xen-netback/rx.c
+@@ -33,28 +33,36 @@
+ #include <xen/xen.h>
+ #include <xen/events.h>
+
+-static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
++/*
++ * Update the needed ring page slots for the first SKB queued.
++ * Note that any call sequence outside the RX thread calling this function
++ * needs to wake up the RX thread via a call of xenvif_kick_thread()
++ * afterwards in order to avoid a race with putting the thread to sleep.
++ */
++static void xenvif_update_needed_slots(struct xenvif_queue *queue,
++ const struct sk_buff *skb)
+ {
+- RING_IDX prod, cons;
+- struct sk_buff *skb;
+- int needed;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&queue->rx_queue.lock, flags);
++ unsigned int needed = 0;
+
+- skb = skb_peek(&queue->rx_queue);
+- if (!skb) {
+- spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+- return false;
++ if (skb) {
++ needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
++ if (skb_is_gso(skb))
++ needed++;
++ if (skb->sw_hash)
++ needed++;
+ }
+
+- needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
+- if (skb_is_gso(skb))
+- needed++;
+- if (skb->sw_hash)
+- needed++;
++ WRITE_ONCE(queue->rx_slots_needed, needed);
++}
+
+- spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
++static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
++{
++ RING_IDX prod, cons;
++ unsigned int needed;
++
++ needed = READ_ONCE(queue->rx_slots_needed);
++ if (!needed)
++ return false;
+
+ do {
+ prod = queue->rx.sring->req_prod;
+@@ -74,22 +82,30 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
+ return false;
+ }
+
+-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+ {
+ unsigned long flags;
++ bool ret = true;
+
+ spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+- __skb_queue_tail(&queue->rx_queue, skb);
+-
+- queue->rx_queue_len += skb->len;
+- if (queue->rx_queue_len > queue->rx_queue_max) {
++ if (queue->rx_queue_len >= queue->rx_queue_max) {
+ struct net_device *dev = queue->vif->dev;
+
+ netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
++ ret = false;
++ } else {
++ if (skb_queue_empty(&queue->rx_queue))
++ xenvif_update_needed_slots(queue, skb);
++
++ __skb_queue_tail(&queue->rx_queue, skb);
++
++ queue->rx_queue_len += skb->len;
+ }
+
+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
++
++ return ret;
+ }
+
+ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+@@ -100,6 +116,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+
+ skb = __skb_dequeue(&queue->rx_queue);
+ if (skb) {
++ xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue));
++
+ queue->rx_queue_len -= skb->len;
+ if (queue->rx_queue_len < queue->rx_queue_max) {
+ struct netdev_queue *txq;
+@@ -134,6 +152,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
+ break;
+ xenvif_rx_dequeue(queue);
+ kfree_skb(skb);
++ queue->vif->dev->stats.rx_dropped++;
+ }
+ }
+
+@@ -469,7 +488,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue)
+
+ #define RX_BATCH_SIZE 64
+
+-void xenvif_rx_action(struct xenvif_queue *queue)
++static void xenvif_rx_action(struct xenvif_queue *queue)
+ {
+ struct sk_buff_head completed_skbs;
+ unsigned int work_done = 0;
+@@ -478,6 +497,7 @@ void xenvif_rx_action(struct xenvif_queue *queue)
+ queue->rx_copy.completed = &completed_skbs;
+
+ while (xenvif_rx_ring_slots_available(queue) &&
++ !skb_queue_empty(&queue->rx_queue) &&
+ work_done < RX_BATCH_SIZE) {
+ xenvif_rx_skb(queue);
+ work_done++;
+@@ -487,27 +507,31 @@ void xenvif_rx_action(struct xenvif_queue *queue)
+ xenvif_rx_copy_flush(queue);
+ }
+
+-static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
++static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue)
+ {
+ RING_IDX prod, cons;
+
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
+
++ return prod - cons;
++}
++
++static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue)
++{
++ unsigned int needed = READ_ONCE(queue->rx_slots_needed);
++
+ return !queue->stalled &&
+- prod - cons < 1 &&
++ xenvif_rx_queue_slots(queue) < needed &&
+ time_after(jiffies,
+ queue->last_rx_time + queue->vif->stall_timeout);
+ }
+
+ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+ {
+- RING_IDX prod, cons;
+-
+- prod = queue->rx.sring->req_prod;
+- cons = queue->rx.req_cons;
++ unsigned int needed = READ_ONCE(queue->rx_slots_needed);
+
+- return queue->stalled && prod - cons >= 1;
++ return queue->stalled && xenvif_rx_queue_slots(queue) >= needed;
+ }
+
+ bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+index d24b7a7993aa0..e85b3c5d4acce 100644
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -675,7 +675,6 @@ static void hotplug_status_changed(struct xenbus_watch *watch,
+
+ /* Not interested in this watch anymore. */
+ unregister_hotplug_status_watch(be);
+- xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status");
+ }
+ kfree(str);
+ }
+@@ -824,15 +823,11 @@ static void connect(struct backend_info *be)
+ xenvif_carrier_on(be->vif);
+
+ unregister_hotplug_status_watch(be);
+- if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) {
+- err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
+- NULL, hotplug_status_changed,
+- "%s/%s", dev->nodename,
+- "hotplug-status");
+- if (err)
+- goto err;
++ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL,
++ hotplug_status_changed,
++ "%s/%s", dev->nodename, "hotplug-status");
++ if (!err)
+ be->have_hotplug_status_watch = 1;
+- }
+
+ netif_tx_wake_all_queues(be->vif->dev);
+
+@@ -988,6 +983,7 @@ static int netback_remove(struct xenbus_device *dev)
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+ unregister_hotplug_status_watch(be);
++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+ if (be->vif) {
+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+ backend_disconnect(be);
+diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
+index fc41ba95f81d0..6e73d3a00eecd 100644
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644);
+ MODULE_PARM_DESC(max_queues,
+ "Maximum number of queues per virtual interface");
+
++static bool __read_mostly xennet_trusted = true;
++module_param_named(trusted, xennet_trusted, bool, 0644);
++MODULE_PARM_DESC(trusted, "Is the backend trusted");
++
+ #define XENNET_TIMEOUT (5 * HZ)
+
+ static const struct ethtool_ops xennet_ethtool_ops;
+@@ -148,6 +152,9 @@ struct netfront_queue {
+ grant_ref_t gref_rx_head;
+ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+
++ unsigned int rx_rsp_unconsumed;
++ spinlock_t rx_cons_lock;
++
+ struct page_pool *page_pool;
+ struct xdp_rxq_info xdp_rxq;
+ };
+@@ -172,6 +179,9 @@ struct netfront_info {
+ /* Is device behaving sane? */
+ bool broken;
+
++ /* Should skbs be bounced into a zeroed buffer? */
++ bool bounce;
++
+ atomic_t rx_gso_checksum_fixup;
+ };
+
+@@ -270,7 +280,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
+ if (unlikely(!skb))
+ return NULL;
+
+- page = page_pool_dev_alloc_pages(queue->page_pool);
++ page = page_pool_alloc_pages(queue->page_pool,
++ GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
+ if (unlikely(!page)) {
+ kfree_skb(skb);
+ return NULL;
+@@ -376,12 +387,13 @@ static int xennet_open(struct net_device *dev)
+ return 0;
+ }
+
+-static void xennet_tx_buf_gc(struct netfront_queue *queue)
++static bool xennet_tx_buf_gc(struct netfront_queue *queue)
+ {
+ RING_IDX cons, prod;
+ unsigned short id;
+ struct sk_buff *skb;
+ bool more_to_do;
++ bool work_done = false;
+ const struct device *dev = &queue->info->netdev->dev;
+
+ BUG_ON(!netif_carrier_ok(queue->info->netdev));
+@@ -398,6 +410,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
+ for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
+ struct xen_netif_tx_response txrsp;
+
++ work_done = true;
++
+ RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
+ if (txrsp.status == XEN_NETIF_RSP_NULL)
+ continue;
+@@ -418,14 +432,12 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
+ queue->tx_link[id] = TX_LINK_NONE;
+ skb = queue->tx_skbs[id];
+ queue->tx_skbs[id] = NULL;
+- if (unlikely(gnttab_query_foreign_access(
+- queue->grant_tx_ref[id]) != 0)) {
++ if (unlikely(!gnttab_end_foreign_access_ref(
++ queue->grant_tx_ref[id], GNTMAP_readonly))) {
+ dev_alert(dev,
+ "Grant still in use by backend domain\n");
+ goto err;
+ }
+- gnttab_end_foreign_access_ref(
+- queue->grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(
+ &queue->gref_tx_head, queue->grant_tx_ref[id]);
+ queue->grant_tx_ref[id] = GRANT_INVALID_REF;
+@@ -441,11 +453,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
+
+ xennet_maybe_wake_tx(queue);
+
+- return;
++ return work_done;
+
+ err:
+ queue->info->broken = true;
+ dev_alert(dev, "Disabled for further use\n");
++
++ return work_done;
+ }
+
+ struct xennet_gnttab_make_txreq {
+@@ -661,6 +675,33 @@ static int xennet_xdp_xmit(struct net_device *dev, int n,
+ return nxmit;
+ }
+
++struct sk_buff *bounce_skb(const struct sk_buff *skb)
++{
++ unsigned int headerlen = skb_headroom(skb);
++ /* Align size to allocate full pages and avoid contiguous data leaks */
++ unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
++ XEN_PAGE_SIZE);
++ struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
++
++ if (!n)
++ return NULL;
++
++ if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
++ WARN_ONCE(1, "misaligned skb allocated\n");
++ kfree_skb(n);
++ return NULL;
++ }
++
++ /* Set the data pointer */
++ skb_reserve(n, headerlen);
++ /* Set the tail pointer and length */
++ skb_put(n, skb->len);
++
++ BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
++
++ skb_copy_header(n, skb);
++ return n;
++}
+
+ #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
+
+@@ -714,9 +755,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
+
+ /* The first req should be at least ETH_HLEN size or the packet will be
+ * dropped by netback.
++ *
++ * If the backend is not trusted bounce all data to zeroed pages to
++ * avoid exposing contiguous data on the granted page not belonging to
++ * the skb.
+ */
+- if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
+- nskb = skb_copy(skb, GFP_ATOMIC);
++ if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
++ nskb = bounce_skb(skb);
+ if (!nskb)
+ goto drop;
+ dev_consume_skb_any(skb);
+@@ -834,6 +879,38 @@ static int xennet_close(struct net_device *dev)
+ return 0;
+ }
+
++static void xennet_destroy_queues(struct netfront_info *info)
++{
++ unsigned int i;
++
++ for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
++ struct netfront_queue *queue = &info->queues[i];
++
++ if (netif_running(info->netdev))
++ napi_disable(&queue->napi);
++ netif_napi_del(&queue->napi);
++ }
++
++ kfree(info->queues);
++ info->queues = NULL;
++}
++
++static void xennet_uninit(struct net_device *dev)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ xennet_destroy_queues(np);
++}
++
++static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&queue->rx_cons_lock, flags);
++ queue->rx.rsp_cons = val;
++ queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
++ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
++}
++
+ static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
+ grant_ref_t ref)
+ {
+@@ -885,7 +962,7 @@ static int xennet_get_extras(struct netfront_queue *queue,
+ xennet_move_rx_slot(queue, skb, ref);
+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+- queue->rx.rsp_cons = cons;
++ xennet_set_rx_rsp_cons(queue, cons);
+ return err;
+ }
+
+@@ -950,7 +1027,6 @@ static int xennet_get_responses(struct netfront_queue *queue,
+ struct device *dev = &queue->info->netdev->dev;
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
+- unsigned long ret;
+ int slots = 1;
+ int err = 0;
+ u32 verdict;
+@@ -992,8 +1068,13 @@ static int xennet_get_responses(struct netfront_queue *queue,
+ goto next;
+ }
+
+- ret = gnttab_end_foreign_access_ref(ref, 0);
+- BUG_ON(!ret);
++ if (!gnttab_end_foreign_access_ref(ref, 0)) {
++ dev_alert(dev,
++ "Grant still in use by backend domain\n");
++ queue->info->broken = true;
++ dev_alert(dev, "Disabled for further use\n");
++ return -EINVAL;
++ }
+
+ gnttab_release_grant_reference(&queue->gref_rx_head, ref);
+
+@@ -1013,8 +1094,10 @@ static int xennet_get_responses(struct netfront_queue *queue,
+ }
+ }
+ rcu_read_unlock();
+-next:
++
+ __skb_queue_tail(list, skb);
++
++next:
+ if (!(rx->flags & XEN_NETRXF_more_data))
+ break;
+
+@@ -1039,7 +1122,7 @@ next:
+ }
+
+ if (unlikely(err))
+- queue->rx.rsp_cons = cons + slots;
++ xennet_set_rx_rsp_cons(queue, cons + slots);
+
+ return err;
+ }
+@@ -1093,7 +1176,8 @@ static int xennet_fill_frags(struct netfront_queue *queue,
+ __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
+ }
+ if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
+- queue->rx.rsp_cons = ++cons + skb_queue_len(list);
++ xennet_set_rx_rsp_cons(queue,
++ ++cons + skb_queue_len(list));
+ kfree_skb(nskb);
+ return -ENOENT;
+ }
+@@ -1106,7 +1190,7 @@ static int xennet_fill_frags(struct netfront_queue *queue,
+ kfree_skb(nskb);
+ }
+
+- queue->rx.rsp_cons = cons;
++ xennet_set_rx_rsp_cons(queue, cons);
+
+ return 0;
+ }
+@@ -1213,6 +1297,10 @@ static int xennet_poll(struct napi_struct *napi, int budget)
+ &need_xdp_flush);
+
+ if (unlikely(err)) {
++ if (queue->info->broken) {
++ spin_unlock(&queue->rx_lock);
++ return 0;
++ }
+ err:
+ while ((skb = __skb_dequeue(&tmpq)))
+ __skb_queue_tail(&errq, skb);
+@@ -1229,7 +1317,9 @@ err:
+
+ if (unlikely(xennet_set_skb_gso(skb, gso))) {
+ __skb_queue_head(&tmpq, skb);
+- queue->rx.rsp_cons += skb_queue_len(&tmpq);
++ xennet_set_rx_rsp_cons(queue,
++ queue->rx.rsp_cons +
++ skb_queue_len(&tmpq));
+ goto err;
+ }
+ }
+@@ -1253,7 +1343,8 @@ err:
+
+ __skb_queue_tail(&rxq, skb);
+
+- i = ++queue->rx.rsp_cons;
++ i = queue->rx.rsp_cons + 1;
++ xennet_set_rx_rsp_cons(queue, i);
+ work_done++;
+ }
+ if (need_xdp_flush)
+@@ -1417,40 +1508,79 @@ static int xennet_set_features(struct net_device *dev,
+ return 0;
+ }
+
+-static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
++static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
+ {
+- struct netfront_queue *queue = dev_id;
+ unsigned long flags;
+
+- if (queue->info->broken)
+- return IRQ_HANDLED;
++ if (unlikely(queue->info->broken))
++ return false;
+
+ spin_lock_irqsave(&queue->tx_lock, flags);
+- xennet_tx_buf_gc(queue);
++ if (xennet_tx_buf_gc(queue))
++ *eoi = 0;
+ spin_unlock_irqrestore(&queue->tx_lock, flags);
+
++ return true;
++}
++
++static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
++{
++ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
++
++ if (likely(xennet_handle_tx(dev_id, &eoiflag)))
++ xen_irq_lateeoi(irq, eoiflag);
++
+ return IRQ_HANDLED;
+ }
+
+-static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
++static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
+ {
+- struct netfront_queue *queue = dev_id;
+- struct net_device *dev = queue->info->netdev;
++ unsigned int work_queued;
++ unsigned long flags;
++
++ if (unlikely(queue->info->broken))
++ return false;
+
+- if (queue->info->broken)
+- return IRQ_HANDLED;
++ spin_lock_irqsave(&queue->rx_cons_lock, flags);
++ work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
++ if (work_queued > queue->rx_rsp_unconsumed) {
++ queue->rx_rsp_unconsumed = work_queued;
++ *eoi = 0;
++ } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
++ const struct device *dev = &queue->info->netdev->dev;
+
+- if (likely(netif_carrier_ok(dev) &&
+- RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
++ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
++ dev_alert(dev, "RX producer index going backwards\n");
++ dev_alert(dev, "Disabled for further use\n");
++ queue->info->broken = true;
++ return false;
++ }
++ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
++
++ if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
+ napi_schedule(&queue->napi);
+
++ return true;
++}
++
++static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
++{
++ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
++
++ if (likely(xennet_handle_rx(dev_id, &eoiflag)))
++ xen_irq_lateeoi(irq, eoiflag);
++
+ return IRQ_HANDLED;
+ }
+
+ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+ {
+- xennet_tx_interrupt(irq, dev_id);
+- xennet_rx_interrupt(irq, dev_id);
++ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
++
++ if (xennet_handle_tx(dev_id, &eoiflag) &&
++ xennet_handle_rx(dev_id, &eoiflag))
++ xen_irq_lateeoi(irq, eoiflag);
++
+ return IRQ_HANDLED;
+ }
+
+@@ -1550,6 +1680,7 @@ static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+ }
+
+ static const struct net_device_ops xennet_netdev_ops = {
++ .ndo_uninit = xennet_uninit,
+ .ndo_open = xennet_open,
+ .ndo_stop = xennet_close,
+ .ndo_start_xmit = xennet_start_xmit,
+@@ -1735,6 +1866,12 @@ static int netfront_resume(struct xenbus_device *dev)
+ netif_tx_unlock_bh(info->netdev);
+
+ xennet_disconnect_backend(info);
++
++ rtnl_lock();
++ if (info->queues)
++ xennet_destroy_queues(info);
++ rtnl_unlock();
++
+ return 0;
+ }
+
+@@ -1768,9 +1905,10 @@ static int setup_netfront_single(struct netfront_queue *queue)
+ if (err < 0)
+ goto fail;
+
+- err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
+- xennet_interrupt,
+- 0, queue->info->netdev->name, queue);
++ err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
++ xennet_interrupt, 0,
++ queue->info->netdev->name,
++ queue);
+ if (err < 0)
+ goto bind_fail;
+ queue->rx_evtchn = queue->tx_evtchn;
+@@ -1798,18 +1936,18 @@ static int setup_netfront_split(struct netfront_queue *queue)
+
+ snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
+ "%s-tx", queue->name);
+- err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
+- xennet_tx_interrupt,
+- 0, queue->tx_irq_name, queue);
++ err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
++ xennet_tx_interrupt, 0,
++ queue->tx_irq_name, queue);
+ if (err < 0)
+ goto bind_tx_fail;
+ queue->tx_irq = err;
+
+ snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
+ "%s-rx", queue->name);
+- err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
+- xennet_rx_interrupt,
+- 0, queue->rx_irq_name, queue);
++ err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
++ xennet_rx_interrupt, 0,
++ queue->rx_irq_name, queue);
+ if (err < 0)
+ goto bind_rx_fail;
+ queue->rx_irq = err;
+@@ -1833,7 +1971,7 @@ static int setup_netfront(struct xenbus_device *dev,
+ struct netfront_queue *queue, unsigned int feature_split_evtchn)
+ {
+ struct xen_netif_tx_sring *txs;
+- struct xen_netif_rx_sring *rxs;
++ struct xen_netif_rx_sring *rxs = NULL;
+ grant_ref_t gref;
+ int err;
+
+@@ -1853,21 +1991,21 @@ static int setup_netfront(struct xenbus_device *dev,
+
+ err = xenbus_grant_ring(dev, txs, 1, &gref);
+ if (err < 0)
+- goto grant_tx_ring_fail;
++ goto fail;
+ queue->tx_ring_ref = gref;
+
+ rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
+ if (!rxs) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(dev, err, "allocating rx ring page");
+- goto alloc_rx_ring_fail;
++ goto fail;
+ }
+ SHARED_RING_INIT(rxs);
+ FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, rxs, 1, &gref);
+ if (err < 0)
+- goto grant_rx_ring_fail;
++ goto fail;
+ queue->rx_ring_ref = gref;
+
+ if (feature_split_evtchn)
+@@ -1880,22 +2018,28 @@ static int setup_netfront(struct xenbus_device *dev,
+ err = setup_netfront_single(queue);
+
+ if (err)
+- goto alloc_evtchn_fail;
++ goto fail;
+
+ return 0;
+
+ /* If we fail to setup netfront, it is safe to just revoke access to
+ * granted pages because backend is not accessing it at this point.
+ */
+-alloc_evtchn_fail:
+- gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
+-grant_rx_ring_fail:
+- free_page((unsigned long)rxs);
+-alloc_rx_ring_fail:
+- gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
+-grant_tx_ring_fail:
+- free_page((unsigned long)txs);
+-fail:
++ fail:
++ if (queue->rx_ring_ref != GRANT_INVALID_REF) {
++ gnttab_end_foreign_access(queue->rx_ring_ref, 0,
++ (unsigned long)rxs);
++ queue->rx_ring_ref = GRANT_INVALID_REF;
++ } else {
++ free_page((unsigned long)rxs);
++ }
++ if (queue->tx_ring_ref != GRANT_INVALID_REF) {
++ gnttab_end_foreign_access(queue->tx_ring_ref, 0,
++ (unsigned long)txs);
++ queue->tx_ring_ref = GRANT_INVALID_REF;
++ } else {
++ free_page((unsigned long)txs);
++ }
+ return err;
+ }
+
+@@ -1911,6 +2055,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
+
+ spin_lock_init(&queue->tx_lock);
+ spin_lock_init(&queue->rx_lock);
++ spin_lock_init(&queue->rx_cons_lock);
+
+ timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
+
+@@ -2040,22 +2185,6 @@ error:
+ return err;
+ }
+
+-static void xennet_destroy_queues(struct netfront_info *info)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
+- struct netfront_queue *queue = &info->queues[i];
+-
+- if (netif_running(info->netdev))
+- napi_disable(&queue->napi);
+- netif_napi_del(&queue->napi);
+- }
+-
+- kfree(info->queues);
+- info->queues = NULL;
+-}
+-
+
+
+ static int xennet_create_page_pool(struct netfront_queue *queue)
+@@ -2164,6 +2293,10 @@ static int talk_to_netback(struct xenbus_device *dev,
+
+ info->netdev->irq = 0;
+
++ /* Check if backend is trusted. */
++ info->bounce = !xennet_trusted ||
++ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
++
+ /* Check if backend supports multiple queues */
+ max_queues = xenbus_read_unsigned(info->xbdev->otherend,
+ "multi-queue-max-queues", 1);
+@@ -2330,6 +2463,9 @@ static int xennet_connect(struct net_device *dev)
+ return err;
+ if (np->netback_has_xdp_headroom)
+ pr_info("backend supports XDP headroom\n");
++ if (np->bounce)
++ dev_info(&np->xbdev->dev,
++ "bouncing transmitted data to zeroed pages\n");
+
+ /* talk_to_netback() sets the correct number of queues */
+ num_queues = dev->real_num_tx_queues;
+diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
+index c6b3334f24c9e..f12f903a9dd13 100644
+--- a/drivers/nfc/fdp/fdp.c
++++ b/drivers/nfc/fdp/fdp.c
+@@ -249,11 +249,19 @@ static int fdp_nci_close(struct nci_dev *ndev)
+ static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
+ {
+ struct fdp_nci_info *info = nci_get_drvdata(ndev);
++ int ret;
+
+ if (atomic_dec_and_test(&info->data_pkt_counter))
+ info->data_pkt_counter_cb(ndev);
+
+- return info->phy_ops->write(info->phy, skb);
++ ret = info->phy_ops->write(info->phy, skb);
++ if (ret < 0) {
++ kfree_skb(skb);
++ return ret;
++ }
++
++ consume_skb(skb);
++ return 0;
+ }
+
+ static int fdp_nci_request_firmware(struct nci_dev *ndev)
+diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
+index 051c43a2a52f8..5f97dcf08dd07 100644
+--- a/drivers/nfc/fdp/i2c.c
++++ b/drivers/nfc/fdp/i2c.c
+@@ -249,6 +249,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
+ len, sizeof(**fw_vsc_cfg),
+ GFP_KERNEL);
+
++ if (!*fw_vsc_cfg)
++ goto alloc_err;
++
+ r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME,
+ *fw_vsc_cfg, len);
+
+@@ -262,6 +265,7 @@ vsc_read_err:
+ *fw_vsc_cfg = NULL;
+ }
+
++alloc_err:
+ dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s",
+ *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no");
+ }
+diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
+index ceef81d93ac99..a902720cd8493 100644
+--- a/drivers/nfc/nfcmrvl/i2c.c
++++ b/drivers/nfc/nfcmrvl/i2c.c
+@@ -132,10 +132,15 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv,
+ ret = -EREMOTEIO;
+ } else
+ ret = 0;
++ }
++
++ if (ret) {
+ kfree_skb(skb);
++ return ret;
+ }
+
+- return ret;
++ consume_skb(skb);
++ return 0;
+ }
+
+ static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv,
+@@ -167,9 +172,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node,
+ pdata->irq_polarity = IRQF_TRIGGER_RISING;
+
+ ret = irq_of_parse_and_map(node, 0);
+- if (ret < 0) {
+- pr_err("Unable to get irq, error: %d\n", ret);
+- return ret;
++ if (!ret) {
++ pr_err("Unable to get irq\n");
++ return -EINVAL;
+ }
+ pdata->irq = ret;
+
+diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
+index 2fcf545012b16..1a5284de4341b 100644
+--- a/drivers/nfc/nfcmrvl/main.c
++++ b/drivers/nfc/nfcmrvl/main.c
+@@ -183,6 +183,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
+ {
+ struct nci_dev *ndev = priv->ndev;
+
++ nci_unregister_device(ndev);
+ if (priv->ndev->nfc_dev->fw_download_in_progress)
+ nfcmrvl_fw_dnld_abort(priv);
+
+@@ -191,7 +192,6 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
+ if (gpio_is_valid(priv->config.reset_n_io))
+ gpio_free(priv->config.reset_n_io);
+
+- nci_unregister_device(ndev);
+ nci_free_device(ndev);
+ kfree(priv);
+ }
+diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
+index 5b833a9a83f80..abd75779194cd 100644
+--- a/drivers/nfc/nfcmrvl/spi.c
++++ b/drivers/nfc/nfcmrvl/spi.c
+@@ -115,9 +115,9 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node,
+ }
+
+ ret = irq_of_parse_and_map(node, 0);
+- if (ret < 0) {
+- pr_err("Unable to get irq, error: %d\n", ret);
+- return ret;
++ if (!ret) {
++ pr_err("Unable to get irq\n");
++ return -EINVAL;
+ }
+ pdata->irq = ret;
+
+diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
+index a99aedff795dc..ea73094530968 100644
+--- a/drivers/nfc/nfcmrvl/usb.c
++++ b/drivers/nfc/nfcmrvl/usb.c
+@@ -388,13 +388,25 @@ static void nfcmrvl_play_deferred(struct nfcmrvl_usb_drv_data *drv_data)
+ int err;
+
+ while ((urb = usb_get_from_anchor(&drv_data->deferred))) {
++ usb_anchor_urb(urb, &drv_data->tx_anchor);
++
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+- if (err)
++ if (err) {
++ kfree(urb->setup_packet);
++ usb_unanchor_urb(urb);
++ usb_free_urb(urb);
+ break;
++ }
+
+ drv_data->tx_in_flight++;
++ usb_free_urb(urb);
++ }
++
++ /* Cleanup the rest deferred urbs. */
++ while ((urb = usb_get_from_anchor(&drv_data->deferred))) {
++ kfree(urb->setup_packet);
++ usb_free_urb(urb);
+ }
+- usb_scuttle_anchored_urbs(&drv_data->deferred);
+ }
+
+ static int nfcmrvl_resume(struct usb_interface *intf)
+diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c
+index 85bf8d586c707..0f6befe8be1e2 100644
+--- a/drivers/nfc/nfcsim.c
++++ b/drivers/nfc/nfcsim.c
+@@ -336,10 +336,6 @@ static struct dentry *nfcsim_debugfs_root;
+ static void nfcsim_debugfs_init(void)
+ {
+ nfcsim_debugfs_root = debugfs_create_dir("nfcsim", NULL);
+-
+- if (!nfcsim_debugfs_root)
+- pr_err("Could not create debugfs entry\n");
+-
+ }
+
+ static void nfcsim_debugfs_remove(void)
+diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c
+index 518e2afb43a8d..13c433eb694dc 100644
+--- a/drivers/nfc/nxp-nci/core.c
++++ b/drivers/nfc/nxp-nci/core.c
+@@ -77,10 +77,13 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
+ return -EINVAL;
+
+ r = info->phy_ops->write(info->phy_id, skb);
+- if (r < 0)
++ if (r < 0) {
+ kfree_skb(skb);
++ return r;
++ }
+
+- return r;
++ consume_skb(skb);
++ return 0;
+ }
+
+ static const struct nci_ops nxp_nci_ops = {
+diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
+index 7e451c10985df..ae2ba08d8ac3f 100644
+--- a/drivers/nfc/nxp-nci/i2c.c
++++ b/drivers/nfc/nxp-nci/i2c.c
+@@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy,
+ skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN);
+
+ r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len);
+- if (r != frame_len) {
++ if (r < 0) {
++ goto fw_read_exit_free_skb;
++ } else if (r != frame_len) {
+ nfc_err(&client->dev,
+ "Invalid frame length: %u (expected %zu)\n",
+ r, frame_len);
+@@ -162,8 +164,13 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy,
+
+ skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE);
+
++ if (!header.plen)
++ return 0;
++
+ r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen);
+- if (r != header.plen) {
++ if (r < 0) {
++ goto nci_read_exit_free_skb;
++ } else if (r != header.plen) {
+ nfc_err(&client->dev,
+ "Invalid frame payload length: %u (expected %u)\n",
+ r, header.plen);
+diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
+index 2f3f3fe9a0baa..939d27652a4c9 100644
+--- a/drivers/nfc/pn533/pn533.c
++++ b/drivers/nfc/pn533/pn533.c
+@@ -1297,6 +1297,8 @@ static int pn533_poll_dep_complete(struct pn533 *dev, void *arg,
+ if (IS_ERR(resp))
+ return PTR_ERR(resp);
+
++ memset(&nfc_target, 0, sizeof(struct nfc_target));
++
+ rsp = (struct pn533_cmd_jump_dep_response *)resp->data;
+
+ rc = rsp->status & PN533_CMD_RET_MASK;
+@@ -1928,6 +1930,8 @@ static int pn533_in_dep_link_up_complete(struct pn533 *dev, void *arg,
+
+ dev_dbg(dev->dev, "Creating new target\n");
+
++ memset(&nfc_target, 0, sizeof(struct nfc_target));
++
+ nfc_target.supported_protocols = NFC_PROTO_NFC_DEP_MASK;
+ nfc_target.nfcid1_len = 10;
+ memcpy(nfc_target.nfcid1, rsp->nfcid3t, nfc_target.nfcid1_len);
+@@ -2218,7 +2222,7 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb)
+ frag = pn533_alloc_skb(dev, frag_size);
+ if (!frag) {
+ skb_queue_purge(&dev->fragment_skb);
+- break;
++ return -ENOMEM;
+ }
+
+ if (!dev->tgt_mode) {
+@@ -2287,7 +2291,7 @@ static int pn533_transceive(struct nfc_dev *nfc_dev,
+ /* jumbo frame ? */
+ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) {
+ rc = pn533_fill_fragment_skbs(dev, skb);
+- if (rc <= 0)
++ if (rc < 0)
+ goto error;
+
+ skb = skb_dequeue(&dev->fragment_skb);
+@@ -2355,7 +2359,7 @@ static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb)
+ /* let's split in multiple chunks if size's too big */
+ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) {
+ rc = pn533_fill_fragment_skbs(dev, skb);
+- if (rc <= 0)
++ if (rc < 0)
+ goto error;
+
+ /* get the first skb */
+@@ -2789,13 +2793,14 @@ void pn53x_common_clean(struct pn533 *priv)
+ {
+ struct pn533_cmd *cmd, *n;
+
++ /* delete the timer before cleanup the worker */
++ del_timer_sync(&priv->listen_timer);
++
+ flush_delayed_work(&priv->poll_work);
+ destroy_workqueue(priv->wq);
+
+ skb_queue_purge(&priv->resp_q);
+
+- del_timer(&priv->listen_timer);
+-
+ list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) {
+ list_del(&cmd->queue);
+ kfree(cmd);
+diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c
+index 7bdaf82630706..7ad98973648cc 100644
+--- a/drivers/nfc/pn533/uart.c
++++ b/drivers/nfc/pn533/uart.c
+@@ -310,6 +310,7 @@ static void pn532_uart_remove(struct serdev_device *serdev)
+ pn53x_unregister_nfc(pn532->priv);
+ serdev_device_close(serdev);
+ pn53x_common_clean(pn532->priv);
++ del_timer_sync(&pn532->cmd_timeout);
+ kfree_skb(pn532->recv_skb);
+ kfree(pn532);
+ }
+diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
+index bd7f7478d1892..47d423cc26081 100644
+--- a/drivers/nfc/pn533/usb.c
++++ b/drivers/nfc/pn533/usb.c
+@@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags)
+ return usb_submit_urb(phy->ack_urb, flags);
+ }
+
++struct pn533_out_arg {
++ struct pn533_usb_phy *phy;
++ struct completion done;
++};
++
+ static int pn533_usb_send_frame(struct pn533 *dev,
+ struct sk_buff *out)
+ {
+ struct pn533_usb_phy *phy = dev->phy;
++ struct pn533_out_arg arg;
++ void *cntx;
+ int rc;
+
+ if (phy->priv == NULL)
+@@ -168,10 +175,18 @@ static int pn533_usb_send_frame(struct pn533 *dev,
+ print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1,
+ out->data, out->len, false);
+
++ arg.phy = phy;
++ init_completion(&arg.done);
++ cntx = phy->out_urb->context;
++ phy->out_urb->context = &arg;
++
+ rc = usb_submit_urb(phy->out_urb, GFP_KERNEL);
+ if (rc)
+ return rc;
+
++ wait_for_completion(&arg.done);
++ phy->out_urb->context = cntx;
++
+ if (dev->protocol_type == PN533_PROTO_REQ_RESP) {
+ /* request for response for sent packet directly */
+ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL);
+@@ -408,7 +423,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy)
+ return arg.rc;
+ }
+
+-static void pn533_send_complete(struct urb *urb)
++static void pn533_out_complete(struct urb *urb)
++{
++ struct pn533_out_arg *arg = urb->context;
++ struct pn533_usb_phy *phy = arg->phy;
++
++ switch (urb->status) {
++ case 0:
++ break; /* success */
++ case -ECONNRESET:
++ case -ENOENT:
++ dev_dbg(&phy->udev->dev,
++ "The urb has been stopped (status %d)\n",
++ urb->status);
++ break;
++ case -ESHUTDOWN:
++ default:
++ nfc_err(&phy->udev->dev,
++ "Urb failure (status %d)\n",
++ urb->status);
++ }
++
++ complete(&arg->done);
++}
++
++static void pn533_ack_complete(struct urb *urb)
+ {
+ struct pn533_usb_phy *phy = urb->context;
+
+@@ -496,10 +535,10 @@ static int pn533_usb_probe(struct usb_interface *interface,
+
+ usb_fill_bulk_urb(phy->out_urb, phy->udev,
+ usb_sndbulkpipe(phy->udev, out_endpoint),
+- NULL, 0, pn533_send_complete, phy);
++ NULL, 0, pn533_out_complete, phy);
+ usb_fill_bulk_urb(phy->ack_urb, phy->udev,
+ usb_sndbulkpipe(phy->udev, out_endpoint),
+- NULL, 0, pn533_send_complete, phy);
++ NULL, 0, pn533_ack_complete, phy);
+
+ switch (id->driver_info) {
+ case PN533_DEVICE_STD:
+diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
+index 16ceb763594fc..90e30e2f15125 100644
+--- a/drivers/nfc/port100.c
++++ b/drivers/nfc/port100.c
+@@ -1612,7 +1612,9 @@ free_nfc_dev:
+ nfc_digital_free_device(dev->nfc_digital_dev);
+
+ error:
++ usb_kill_urb(dev->in_urb);
+ usb_free_urb(dev->in_urb);
++ usb_kill_urb(dev->out_urb);
+ usb_free_urb(dev->out_urb);
+ usb_put_dev(dev->udev);
+
+diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c
+index 1c412007fabb6..0270e05b68dff 100644
+--- a/drivers/nfc/s3fwrn5/core.c
++++ b/drivers/nfc/s3fwrn5/core.c
+@@ -110,11 +110,15 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
+ }
+
+ ret = s3fwrn5_write(info, skb);
+- if (ret < 0)
++ if (ret < 0) {
+ kfree_skb(skb);
++ mutex_unlock(&info->mutex);
++ return ret;
++ }
+
++ consume_skb(skb);
+ mutex_unlock(&info->mutex);
+- return ret;
++ return 0;
+ }
+
+ static int s3fwrn5_nci_post_setup(struct nci_dev *ndev)
+diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c
+index e9dc313b333e2..3564e3335a988 100644
+--- a/drivers/nfc/st-nci/ndlc.c
++++ b/drivers/nfc/st-nci/ndlc.c
+@@ -286,13 +286,15 @@ EXPORT_SYMBOL(ndlc_probe);
+
+ void ndlc_remove(struct llt_ndlc *ndlc)
+ {
+- st_nci_remove(ndlc->ndev);
+-
+ /* cancel timers */
+ del_timer_sync(&ndlc->t1_timer);
+ del_timer_sync(&ndlc->t2_timer);
+ ndlc->t2_active = false;
+ ndlc->t1_active = false;
++ /* cancel work */
++ cancel_work_sync(&ndlc->sm_work);
++
++ st_nci_remove(ndlc->ndev);
+
+ skb_queue_purge(&ndlc->rcv_q);
+ skb_queue_purge(&ndlc->send_q);
+diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c
+index 5fd89f72969d9..57d09dbf627b4 100644
+--- a/drivers/nfc/st-nci/se.c
++++ b/drivers/nfc/st-nci/se.c
+@@ -312,6 +312,8 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev,
+ int r = 0;
+ struct device *dev = &ndev->nfc_dev->dev;
+ struct nfc_evt_transaction *transaction;
++ u32 aid_len;
++ u8 params_len;
+
+ pr_debug("connectivity gate event: %x\n", event);
+
+@@ -325,26 +327,47 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev,
+ * Description Tag Length
+ * AID 81 5 to 16
+ * PARAMETERS 82 0 to 255
++ *
++ * The key differences are aid storage length is variably sized
++ * in the packet, but fixed in nfc_evt_transaction, and that
++ * the aid_len is u8 in the packet, but u32 in the structure,
++ * and the tags in the packet are not included in
++ * nfc_evt_transaction.
++ *
++ * size(b): 1 1 5-16 1 1 0-255
++ * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4
++ * mem name: aid_tag(M) aid_len aid params_tag(M) params_len params
++ * example: 0x81 5-16 X 0x82 0-255 X
+ */
+- if (skb->len < NFC_MIN_AID_LENGTH + 2 &&
+- skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
++ if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
+ return -EPROTO;
+
+- transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL);
+- if (!transaction)
+- return -ENOMEM;
++ aid_len = skb->data[1];
+
+- transaction->aid_len = skb->data[1];
+- memcpy(transaction->aid, &skb->data[2], transaction->aid_len);
++ if (skb->len < aid_len + 4 ||
++ aid_len > sizeof(transaction->aid))
++ return -EPROTO;
+
+- /* Check next byte is PARAMETERS tag (82) */
+- if (skb->data[transaction->aid_len + 2] !=
+- NFC_EVT_TRANSACTION_PARAMS_TAG)
++ params_len = skb->data[aid_len + 3];
++
++ /* Verify PARAMETERS tag is (82), and final check that there is
++ * enough space in the packet to read everything.
++ */
++ if (skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG ||
++ skb->len < aid_len + 4 + params_len)
+ return -EPROTO;
+
+- transaction->params_len = skb->data[transaction->aid_len + 3];
+- memcpy(transaction->params, skb->data +
+- transaction->aid_len + 4, transaction->params_len);
++ transaction = devm_kzalloc(dev, sizeof(*transaction) +
++ params_len, GFP_KERNEL);
++ if (!transaction)
++ return -ENOMEM;
++
++ transaction->aid_len = aid_len;
++ transaction->params_len = params_len;
++
++ memcpy(transaction->aid, &skb->data[2], aid_len);
++ memcpy(transaction->params, &skb->data[aid_len + 4],
++ params_len);
+
+ r = nfc_se_transaction(ndev->nfc_dev, host, transaction);
+ break;
+@@ -651,6 +674,12 @@ int st_nci_se_io(struct nci_dev *ndev, u32 se_idx,
+ ST_NCI_EVT_TRANSMIT_DATA, apdu,
+ apdu_length);
+ default:
++ /* Need to free cb_context here as at the moment we can't
++ * clearly indicate to the caller if the callback function
++ * would be called (and free it) or not. In both cases a
++ * negative value may be returned to the caller.
++ */
++ kfree(cb_context);
+ return -ENODEV;
+ }
+ }
+diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
+index 279d88128b2e4..d56bc24709b5c 100644
+--- a/drivers/nfc/st21nfca/i2c.c
++++ b/drivers/nfc/st21nfca/i2c.c
+@@ -528,7 +528,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+ phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
+ if (IS_ERR(phy->gpiod_ena)) {
+ nfc_err(dev, "Unable to get ENABLE GPIO\n");
+- return PTR_ERR(phy->gpiod_ena);
++ r = PTR_ERR(phy->gpiod_ena);
++ goto out_free;
+ }
+
+ phy->se_status.is_ese_present =
+@@ -539,7 +540,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+ r = st21nfca_hci_platform_init(phy);
+ if (r < 0) {
+ nfc_err(&client->dev, "Unable to reboot st21nfca\n");
+- return r;
++ goto out_free;
+ }
+
+ r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+@@ -548,15 +549,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+ ST21NFCA_HCI_DRIVER_NAME, phy);
+ if (r < 0) {
+ nfc_err(&client->dev, "Unable to register IRQ handler\n");
+- return r;
++ goto out_free;
+ }
+
+- return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+- ST21NFCA_FRAME_HEADROOM,
+- ST21NFCA_FRAME_TAILROOM,
+- ST21NFCA_HCI_LLC_MAX_PAYLOAD,
+- &phy->hdev,
+- &phy->se_status);
++ r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
++ ST21NFCA_FRAME_HEADROOM,
++ ST21NFCA_FRAME_TAILROOM,
++ ST21NFCA_HCI_LLC_MAX_PAYLOAD,
++ &phy->hdev,
++ &phy->se_status);
++ if (r)
++ goto out_free;
++
++ return 0;
++
++out_free:
++ kfree_skb(phy->pending_skb);
++ return r;
+ }
+
+ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
+@@ -567,6 +576,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
+
+ if (phy->powered)
+ st21nfca_hci_i2c_disable(phy);
++ if (phy->pending_skb)
++ kfree_skb(phy->pending_skb);
+
+ return 0;
+ }
+diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c
+index c8bdf078d1115..6a1d3b2752fbf 100644
+--- a/drivers/nfc/st21nfca/se.c
++++ b/drivers/nfc/st21nfca/se.c
+@@ -236,12 +236,18 @@ int st21nfca_hci_se_io(struct nfc_hci_dev *hdev, u32 se_idx,
+ ST21NFCA_EVT_TRANSMIT_DATA,
+ apdu, apdu_length);
+ default:
++ /* Need to free cb_context here as at the moment we can't
++ * clearly indicate to the caller if the callback function
++ * would be called (and free it) or not. In both cases a
++ * negative value may be returned to the caller.
++ */
++ kfree(cb_context);
+ return -ENODEV;
+ }
+ }
+ EXPORT_SYMBOL(st21nfca_hci_se_io);
+
+-static void st21nfca_se_wt_timeout(struct timer_list *t)
++static void st21nfca_se_wt_work(struct work_struct *work)
+ {
+ /*
+ * No answer from the secure element
+@@ -254,8 +260,9 @@ static void st21nfca_se_wt_timeout(struct timer_list *t)
+ */
+ /* hardware reset managed through VCC_UICC_OUT power supply */
+ u8 param = 0x01;
+- struct st21nfca_hci_info *info = from_timer(info, t,
+- se_info.bwi_timer);
++ struct st21nfca_hci_info *info = container_of(work,
++ struct st21nfca_hci_info,
++ se_info.timeout_work);
+
+ pr_debug("\n");
+
+@@ -273,6 +280,13 @@ static void st21nfca_se_wt_timeout(struct timer_list *t)
+ info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME);
+ }
+
++static void st21nfca_se_wt_timeout(struct timer_list *t)
++{
++ struct st21nfca_hci_info *info = from_timer(info, t, se_info.bwi_timer);
++
++ schedule_work(&info->se_info.timeout_work);
++}
++
+ static void st21nfca_se_activation_timeout(struct timer_list *t)
+ {
+ struct st21nfca_hci_info *info = from_timer(info, t,
+@@ -296,6 +310,8 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
+ int r = 0;
+ struct device *dev = &hdev->ndev->dev;
+ struct nfc_evt_transaction *transaction;
++ u32 aid_len;
++ u8 params_len;
+
+ pr_debug("connectivity gate event: %x\n", event);
+
+@@ -304,33 +320,48 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
+ r = nfc_se_connectivity(hdev->ndev, host);
+ break;
+ case ST21NFCA_EVT_TRANSACTION:
+- /*
+- * According to specification etsi 102 622
++ /* According to specification etsi 102 622
+ * 11.2.2.4 EVT_TRANSACTION Table 52
+ * Description Tag Length
+ * AID 81 5 to 16
+ * PARAMETERS 82 0 to 255
++ *
++ * The key differences are aid storage length is variably sized
++ * in the packet, but fixed in nfc_evt_transaction, and that the aid_len
++ * is u8 in the packet, but u32 in the structure, and the tags in
++ * the packet are not included in nfc_evt_transaction.
++ *
++ * size in bytes: 1 1 5-16 1 1 0-255
++ * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4
++ * member name: aid_tag(M) aid_len aid params_tag(M) params_len params
++ * example: 0x81 5-16 X 0x82 0-255 X
+ */
+- if (skb->len < NFC_MIN_AID_LENGTH + 2 &&
+- skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
++ if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
+ return -EPROTO;
+
+- transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL);
+- if (!transaction)
+- return -ENOMEM;
++ aid_len = skb->data[1];
+
+- transaction->aid_len = skb->data[1];
+- memcpy(transaction->aid, &skb->data[2],
+- transaction->aid_len);
++ if (skb->len < aid_len + 4 || aid_len > sizeof(transaction->aid))
++ return -EPROTO;
+
+- /* Check next byte is PARAMETERS tag (82) */
+- if (skb->data[transaction->aid_len + 2] !=
+- NFC_EVT_TRANSACTION_PARAMS_TAG)
++ params_len = skb->data[aid_len + 3];
++
++ /* Verify PARAMETERS tag is (82), and final check that there is enough
++ * space in the packet to read everything.
++ */
++ if ((skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG) ||
++ (skb->len < aid_len + 4 + params_len))
+ return -EPROTO;
+
+- transaction->params_len = skb->data[transaction->aid_len + 3];
+- memcpy(transaction->params, skb->data +
+- transaction->aid_len + 4, transaction->params_len);
++ transaction = devm_kzalloc(dev, sizeof(*transaction) + params_len, GFP_KERNEL);
++ if (!transaction)
++ return -ENOMEM;
++
++ transaction->aid_len = aid_len;
++ transaction->params_len = params_len;
++
++ memcpy(transaction->aid, &skb->data[2], aid_len);
++ memcpy(transaction->params, &skb->data[aid_len + 4], params_len);
+
+ r = nfc_se_transaction(hdev->ndev, host, transaction);
+ break;
+@@ -354,6 +385,7 @@ int st21nfca_apdu_reader_event_received(struct nfc_hci_dev *hdev,
+ switch (event) {
+ case ST21NFCA_EVT_TRANSMIT_DATA:
+ del_timer_sync(&info->se_info.bwi_timer);
++ cancel_work_sync(&info->se_info.timeout_work);
+ info->se_info.bwi_active = false;
+ r = nfc_hci_send_event(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+ ST21NFCA_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0);
+@@ -383,6 +415,7 @@ void st21nfca_se_init(struct nfc_hci_dev *hdev)
+ struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+ init_completion(&info->se_info.req_completion);
++ INIT_WORK(&info->se_info.timeout_work, st21nfca_se_wt_work);
+ /* initialize timers */
+ timer_setup(&info->se_info.bwi_timer, st21nfca_se_wt_timeout, 0);
+ info->se_info.bwi_active = false;
+@@ -410,6 +443,7 @@ void st21nfca_se_deinit(struct nfc_hci_dev *hdev)
+ if (info->se_info.se_active)
+ del_timer_sync(&info->se_info.se_active_timer);
+
++ cancel_work_sync(&info->se_info.timeout_work);
+ info->se_info.bwi_active = false;
+ info->se_info.se_active = false;
+ }
+diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h
+index cb6ad916be911..ae6771cc9894a 100644
+--- a/drivers/nfc/st21nfca/st21nfca.h
++++ b/drivers/nfc/st21nfca/st21nfca.h
+@@ -141,6 +141,7 @@ struct st21nfca_se_info {
+
+ se_io_cb_t cb;
+ void *cb_context;
++ struct work_struct timeout_work;
+ };
+
+ struct st21nfca_hci_info {
+diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c
+index 221fa3bb8705e..6317e8505aaad 100644
+--- a/drivers/nfc/virtual_ncidev.c
++++ b/drivers/nfc/virtual_ncidev.c
+@@ -54,16 +54,19 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
+ mutex_lock(&nci_mutex);
+ if (state != virtual_ncidev_enabled) {
+ mutex_unlock(&nci_mutex);
++ kfree_skb(skb);
+ return 0;
+ }
+
+ if (send_buff) {
+ mutex_unlock(&nci_mutex);
++ kfree_skb(skb);
+ return -1;
+ }
+ send_buff = skb_copy(skb, GFP_KERNEL);
+ mutex_unlock(&nci_mutex);
+ wake_up_interruptible(&wq);
++ consume_skb(skb);
+
+ return 0;
+ }
+diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
+index 87847c3800516..1c03a78c125b0 100644
+--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
++++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
+@@ -1336,12 +1336,17 @@ static struct pci_driver amd_ntb_pci_driver = {
+
+ static int __init amd_ntb_pci_driver_init(void)
+ {
++ int ret;
+ pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+ if (debugfs_initialized())
+ debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+- return pci_register_driver(&amd_ntb_pci_driver);
++ ret = pci_register_driver(&amd_ntb_pci_driver);
++ if (ret)
++ debugfs_remove_recursive(debugfs_dir);
++
++ return ret;
+ }
+ module_init(amd_ntb_pci_driver_init);
+
+diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
+index 733557231ed0b..72060acb9cafc 100644
+--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
++++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
+@@ -2891,6 +2891,7 @@ static struct pci_driver idt_pci_driver = {
+
+ static int __init idt_pci_driver_init(void)
+ {
++ int ret;
+ pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+ /* Create the top DebugFS directory if the FS is initialized */
+@@ -2898,7 +2899,11 @@ static int __init idt_pci_driver_init(void)
+ dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+ /* Register the NTB hardware driver to handle the PCI device */
+- return pci_register_driver(&idt_pci_driver);
++ ret = pci_register_driver(&idt_pci_driver);
++ if (ret)
++ debugfs_remove_recursive(dbgfs_topdir);
++
++ return ret;
+ }
+ module_init(idt_pci_driver_init);
+
+diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c
+index e5f14e20a9ff7..41897167abc71 100644
+--- a/drivers/ntb/hw/intel/ntb_hw_gen1.c
++++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c
+@@ -2060,12 +2060,17 @@ static struct pci_driver intel_ntb_pci_driver = {
+
+ static int __init intel_ntb_pci_driver_init(void)
+ {
++ int ret;
+ pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+ if (debugfs_initialized())
+ debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+- return pci_register_driver(&intel_ntb_pci_driver);
++ ret = pci_register_driver(&intel_ntb_pci_driver);
++ if (ret)
++ debugfs_remove_recursive(debugfs_dir);
++
++ return ret;
+ }
+ module_init(intel_ntb_pci_driver_init);
+
+diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c
+index fede05151f698..4081fc538ff45 100644
+--- a/drivers/ntb/hw/intel/ntb_hw_gen4.c
++++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c
+@@ -168,6 +168,18 @@ static enum ntb_topo gen4_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd)
+ return NTB_TOPO_NONE;
+ }
+
++static enum ntb_topo spr_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd)
++{
++ switch (ppd & SPR_PPD_TOPO_MASK) {
++ case SPR_PPD_TOPO_B2B_USD:
++ return NTB_TOPO_B2B_USD;
++ case SPR_PPD_TOPO_B2B_DSD:
++ return NTB_TOPO_B2B_DSD;
++ }
++
++ return NTB_TOPO_NONE;
++}
++
+ int gen4_init_dev(struct intel_ntb_dev *ndev)
+ {
+ struct pci_dev *pdev = ndev->ntb.pdev;
+@@ -183,7 +195,10 @@ int gen4_init_dev(struct intel_ntb_dev *ndev)
+ }
+
+ ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET);
+- ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1);
++ if (pdev_is_ICX(pdev))
++ ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1);
++ else if (pdev_is_SPR(pdev))
++ ndev->ntb.topo = spr_ppd_topo(ndev, ppd1);
+ dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1,
+ ntb_topo_string(ndev->ntb.topo));
+ if (ndev->ntb.topo == NTB_TOPO_NONE)
+diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h
+index 3fcd3fdce9edf..f91323eaf5ce4 100644
+--- a/drivers/ntb/hw/intel/ntb_hw_gen4.h
++++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h
+@@ -49,10 +49,14 @@
+ #define GEN4_PPD_CLEAR_TRN 0x0001
+ #define GEN4_PPD_LINKTRN 0x0008
+ #define GEN4_PPD_CONN_MASK 0x0300
++#define SPR_PPD_CONN_MASK 0x0700
+ #define GEN4_PPD_CONN_B2B 0x0200
+ #define GEN4_PPD_DEV_MASK 0x1000
+ #define GEN4_PPD_DEV_DSD 0x1000
+ #define GEN4_PPD_DEV_USD 0x0000
++#define SPR_PPD_DEV_MASK 0x4000
++#define SPR_PPD_DEV_DSD 0x4000
++#define SPR_PPD_DEV_USD 0x0000
+ #define GEN4_LINK_CTRL_LINK_DISABLE 0x0010
+
+ #define GEN4_SLOTSTS 0xb05a
+@@ -62,6 +66,10 @@
+ #define GEN4_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_USD)
+ #define GEN4_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_DSD)
+
++#define SPR_PPD_TOPO_MASK (SPR_PPD_CONN_MASK | SPR_PPD_DEV_MASK)
++#define SPR_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_USD)
++#define SPR_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_DSD)
++
+ #define GEN4_DB_COUNT 32
+ #define GEN4_DB_LINK 32
+ #define GEN4_DB_LINK_BIT BIT_ULL(GEN4_DB_LINK)
+@@ -112,4 +120,12 @@ static inline int pdev_is_ICX(struct pci_dev *pdev)
+ return 0;
+ }
+
++static inline int pdev_is_SPR(struct pci_dev *pdev)
++{
++ if (pdev_is_gen4(pdev) &&
++ pdev->revision > PCI_DEVICE_REVISION_ICX_MAX)
++ return 1;
++ return 0;
++}
++
+ #endif
+diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+index 4c6eb61a6ac62..ec9cb6c81edae 100644
+--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
++++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+@@ -419,8 +419,8 @@ static void switchtec_ntb_part_link_speed(struct switchtec_ntb *sndev,
+ enum ntb_width *width)
+ {
+ struct switchtec_dev *stdev = sndev->stdev;
+-
+- u32 pff = ioread32(&stdev->mmio_part_cfg[partition].vep_pff_inst_id);
++ u32 pff =
++ ioread32(&stdev->mmio_part_cfg_all[partition].vep_pff_inst_id);
+ u32 linksta = ioread32(&stdev->mmio_pff_csr[pff].pci_cap_region[13]);
+
+ if (speed)
+@@ -840,7 +840,6 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
+ u64 tpart_vec;
+ int self;
+ u64 part_map;
+- int bit;
+
+ sndev->ntb.pdev = sndev->stdev->pdev;
+ sndev->ntb.topo = NTB_TOPO_SWITCH;
+@@ -861,29 +860,28 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
+ part_map = ioread64(&sndev->mmio_ntb->ep_map);
+ part_map &= ~(1 << sndev->self_partition);
+
+- if (!ffs(tpart_vec)) {
++ if (!tpart_vec) {
+ if (sndev->stdev->partition_count != 2) {
+ dev_err(&sndev->stdev->dev,
+ "ntb target partition not defined\n");
+ return -ENODEV;
+ }
+
+- bit = ffs(part_map);
+- if (!bit) {
++ if (!part_map) {
+ dev_err(&sndev->stdev->dev,
+ "peer partition is not NT partition\n");
+ return -ENODEV;
+ }
+
+- sndev->peer_partition = bit - 1;
++ sndev->peer_partition = __ffs64(part_map);
+ } else {
+- if (ffs(tpart_vec) != fls(tpart_vec)) {
++ if (__ffs64(tpart_vec) != (fls64(tpart_vec) - 1)) {
+ dev_err(&sndev->stdev->dev,
+ "ntb driver only supports 1 pair of 1-1 ntb mapping\n");
+ return -ENODEV;
+ }
+
+- sndev->peer_partition = ffs(tpart_vec) - 1;
++ sndev->peer_partition = __ffs64(tpart_vec);
+ if (!(part_map & (1ULL << sndev->peer_partition))) {
+ dev_err(&sndev->stdev->dev,
+ "ntb target partition is not NT partition\n");
+diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
+index a9b97ebc71ac5..9532108d2dce1 100644
+--- a/drivers/ntb/ntb_transport.c
++++ b/drivers/ntb/ntb_transport.c
+@@ -410,7 +410,7 @@ int ntb_transport_register_client_dev(char *device_name)
+
+ rc = device_register(dev);
+ if (rc) {
+- kfree(client_dev);
++ put_device(dev);
+ goto err;
+ }
+
+@@ -909,7 +909,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
+ return 0;
+ }
+
+-static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
++static void ntb_qp_link_context_reset(struct ntb_transport_qp *qp)
+ {
+ qp->link_is_up = false;
+ qp->active = false;
+@@ -932,6 +932,13 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+ qp->tx_async = 0;
+ }
+
++static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
++{
++ ntb_qp_link_context_reset(qp);
++ if (qp->remote_rx_info)
++ qp->remote_rx_info->entry = qp->rx_max_entry - 1;
++}
++
+ static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
+ {
+ struct ntb_transport_ctx *nt = qp->transport;
+@@ -1174,7 +1181,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
+ qp->ndev = nt->ndev;
+ qp->client_ready = false;
+ qp->event_handler = NULL;
+- ntb_qp_link_down_reset(qp);
++ ntb_qp_link_context_reset(qp);
+
+ if (mw_num < qp_count % mw_count)
+ num_qps_mw = qp_count / mw_count + 1;
+@@ -2276,9 +2283,13 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+ struct ntb_queue_entry *entry;
+ int rc;
+
+- if (!qp || !qp->link_is_up || !len)
++ if (!qp || !len)
+ return -EINVAL;
+
++ /* If the qp link is down already, just ignore. */
++ if (!qp->link_is_up)
++ return 0;
++
+ entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+ if (!entry) {
+ qp->tx_err_no_buf++;
+@@ -2418,7 +2429,7 @@ unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+ unsigned int head = qp->tx_index;
+ unsigned int tail = qp->remote_rx_info->entry;
+
+- return tail > head ? tail - head : qp->tx_max_entry + tail - head;
++ return tail >= head ? tail - head : qp->tx_max_entry + tail - head;
+ }
+ EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
+
+diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
+index b7bf3f863d79b..eeeb4b1c97d2c 100644
+--- a/drivers/ntb/test/ntb_tool.c
++++ b/drivers/ntb/test/ntb_tool.c
+@@ -367,14 +367,16 @@ static ssize_t tool_fn_write(struct tool_ctx *tc,
+ u64 bits;
+ int n;
+
++ if (*offp)
++ return 0;
++
+ buf = kmalloc(size + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+- ret = simple_write_to_buffer(buf, size, offp, ubuf, size);
+- if (ret < 0) {
++ if (copy_from_user(buf, ubuf, size)) {
+ kfree(buf);
+- return ret;
++ return -EFAULT;
+ }
+
+ buf[size] = 0;
+@@ -996,6 +998,8 @@ static int tool_init_mws(struct tool_ctx *tc)
+ tc->peers[pidx].outmws =
+ devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmw_cnt,
+ sizeof(*tc->peers[pidx].outmws), GFP_KERNEL);
++ if (tc->peers[pidx].outmws == NULL)
++ return -ENOMEM;
+
+ for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) {
+ tc->peers[pidx].outmws[widx].pidx = pidx;
+diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
+index 88e1f9a0faafd..78cf0e7b53d5b 100644
+--- a/drivers/nubus/proc.c
++++ b/drivers/nubus/proc.c
+@@ -137,6 +137,18 @@ static int nubus_proc_rsrc_show(struct seq_file *m, void *v)
+ return 0;
+ }
+
++static int nubus_rsrc_proc_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, nubus_proc_rsrc_show, inode);
++}
++
++static const struct proc_ops nubus_rsrc_proc_ops = {
++ .proc_open = nubus_rsrc_proc_open,
++ .proc_read = seq_read,
++ .proc_lseek = seq_lseek,
++ .proc_release = single_release,
++};
++
+ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
+ const struct nubus_dirent *ent,
+ unsigned int size)
+@@ -152,8 +164,8 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
+ pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size);
+ else
+ pde_data = NULL;
+- proc_create_single_data(name, S_IFREG | 0444, procdir,
+- nubus_proc_rsrc_show, pde_data);
++ proc_create_data(name, S_IFREG | 0444, procdir,
++ &nubus_rsrc_proc_ops, pde_data);
+ }
+
+ void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
+@@ -166,9 +178,9 @@ void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
+ return;
+
+ snprintf(name, sizeof(name), "%x", ent->type);
+- proc_create_single_data(name, S_IFREG | 0444, procdir,
+- nubus_proc_rsrc_show,
+- nubus_proc_alloc_pde_data(data, 0));
++ proc_create_data(name, S_IFREG | 0444, procdir,
++ &nubus_rsrc_proc_ops,
++ nubus_proc_alloc_pde_data(data, 0));
+ }
+
+ /*
+diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
+index 92dec49522972..3fd1bdb9fc05b 100644
+--- a/drivers/nvdimm/btt.c
++++ b/drivers/nvdimm/btt.c
+@@ -1538,7 +1538,6 @@ static int btt_blk_init(struct btt *btt)
+ int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
+
+ if (rc) {
+- del_gendisk(btt->btt_disk);
+ blk_cleanup_disk(btt->btt_disk);
+ return rc;
+ }
+diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
+index 9dc7f3edd42b1..84d197cc09f8d 100644
+--- a/drivers/nvdimm/bus.c
++++ b/drivers/nvdimm/bus.c
+@@ -185,8 +185,8 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
+ ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1;
+
+ /* make sure we are in the region */
+- if (ctx->phys < nd_region->ndr_start
+- || (ctx->phys + ctx->cleared) > ndr_end)
++ if (ctx->phys < nd_region->ndr_start ||
++ (ctx->phys + ctx->cleared - 1) > ndr_end)
+ return 0;
+
+ sector = (ctx->phys - nd_region->ndr_start) / 512;
+diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
+index 7de592d7eff45..47625fe4276ee 100644
+--- a/drivers/nvdimm/core.c
++++ b/drivers/nvdimm/core.c
+@@ -399,9 +399,7 @@ static ssize_t capability_show(struct device *dev,
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+- nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+- nvdimm_bus_unlock(dev);
+
+ switch (cap) {
+ case NVDIMM_FWA_CAP_QUIESCE:
+@@ -426,10 +424,8 @@ static ssize_t activate_show(struct device *dev,
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+- nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ state = nd_desc->fw_ops->activate_state(nd_desc);
+- nvdimm_bus_unlock(dev);
+
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return -EOPNOTSUPP;
+@@ -474,7 +470,6 @@ static ssize_t activate_store(struct device *dev,
+ else
+ return -EINVAL;
+
+- nvdimm_bus_lock(dev);
+ state = nd_desc->fw_ops->activate_state(nd_desc);
+
+ switch (state) {
+@@ -492,7 +487,6 @@ static ssize_t activate_store(struct device *dev,
+ default:
+ rc = -ENXIO;
+ }
+- nvdimm_bus_unlock(dev);
+
+ if (rc == 0)
+ rc = len;
+@@ -515,10 +509,7 @@ static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribut
+ if (!nd_desc->fw_ops)
+ return 0;
+
+- nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+- nvdimm_bus_unlock(dev);
+-
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return 0;
+
+diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
+index 054154c22899a..2721dd2ead0a7 100644
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -429,8 +429,10 @@ static int pmem_attach_disk(struct device *dev,
+ bb_range.end = res->end;
+ }
+
+- if (IS_ERR(addr))
+- return PTR_ERR(addr);
++ if (IS_ERR(addr)) {
++ rc = PTR_ERR(addr);
++ goto out;
++ }
+ pmem->virt_addr = addr;
+
+ blk_queue_write_cache(q, true, fua);
+@@ -455,7 +457,8 @@ static int pmem_attach_disk(struct device *dev,
+ flags = DAXDEV_F_SYNC;
+ dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
+ if (IS_ERR(dax_dev)) {
+- return PTR_ERR(dax_dev);
++ rc = PTR_ERR(dax_dev);
++ goto out;
+ }
+ dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
+ pmem->dax_dev = dax_dev;
+@@ -470,8 +473,10 @@ static int pmem_attach_disk(struct device *dev,
+ "badblocks");
+ if (!pmem->bb_state)
+ dev_warn(dev, "'badblocks' notification disabled\n");
+-
+ return 0;
++out:
++ blk_cleanup_disk(pmem->disk);
++ return rc;
+ }
+
+ static int nd_pmem_probe(struct device *dev)
+diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
+index 9ccf3d6087993..70ad891a76bae 100644
+--- a/drivers/nvdimm/region_devs.c
++++ b/drivers/nvdimm/region_devs.c
+@@ -1025,6 +1025,9 @@ static unsigned long default_align(struct nd_region *nd_region)
+ }
+ }
+
++ if (nd_region->ndr_size < MEMREMAP_COMPAT_ALIGN_MAX)
++ align = PAGE_SIZE;
++
+ mappings = max_t(u16, 1, nd_region->ndr_mappings);
+ div_u64_rem(align, mappings, &remainder);
+ if (remainder)
+diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
+index 4b80150e4afa7..b5aa55c614616 100644
+--- a/drivers/nvdimm/security.c
++++ b/drivers/nvdimm/security.c
+@@ -379,11 +379,6 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
+ || !nvdimm->sec.flags)
+ return -EOPNOTSUPP;
+
+- if (dev->driver == NULL) {
+- dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
+- return -EINVAL;
+- }
+-
+ rc = check_security_state(nvdimm);
+ if (rc)
+ return rc;
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index f8dd664b2eda5..98a7649a0f061 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -131,7 +131,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
+ if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ return;
+
+- blk_set_queue_dying(ns->queue);
++ blk_mark_disk_dead(ns->disk);
+ blk_mq_unquiesce_queue(ns->queue);
+
+ set_capacity_and_notify(ns->disk, 0);
+@@ -853,16 +853,26 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+ range = page_address(ns->ctrl->discard_page);
+ }
+
+- __rq_for_each_bio(bio, req) {
+- u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
+- u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
++ if (queue_max_discard_segments(req->q) == 1) {
++ u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req));
++ u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9);
+
+- if (n < segments) {
+- range[n].cattr = cpu_to_le32(0);
+- range[n].nlb = cpu_to_le32(nlb);
+- range[n].slba = cpu_to_le64(slba);
++ range[0].cattr = cpu_to_le32(0);
++ range[0].nlb = cpu_to_le32(nlb);
++ range[0].slba = cpu_to_le64(slba);
++ n = 1;
++ } else {
++ __rq_for_each_bio(bio, req) {
++ u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
++ u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
++
++ if (n < segments) {
++ range[n].cattr = cpu_to_le32(0);
++ range[n].nlb = cpu_to_le32(nlb);
++ range[n].slba = cpu_to_le64(slba);
++ }
++ n++;
+ }
+- n++;
+ }
+
+ if (WARN_ON_ONCE(n != segments)) {
+@@ -1113,6 +1123,18 @@ static u32 nvme_known_admin_effects(u8 opcode)
+ return 0;
+ }
+
++static u32 nvme_known_nvm_effects(u8 opcode)
++{
++ switch (opcode) {
++ case nvme_cmd_write:
++ case nvme_cmd_write_zeroes:
++ case nvme_cmd_write_uncor:
++ return NVME_CMD_EFFECTS_LBCC;
++ default:
++ return 0;
++ }
++}
++
+ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+ {
+ u32 effects = 0;
+@@ -1120,16 +1142,24 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+ if (ns) {
+ if (ns->head->effects)
+ effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
++ if (ns->head->ids.csi == NVME_CSI_NVM)
++ effects |= nvme_known_nvm_effects(opcode);
+ if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
+ dev_warn_once(ctrl->device,
+- "IO command:%02x has unhandled effects:%08x\n",
++ "IO command:%02x has unusual effects:%08x\n",
+ opcode, effects);
+- return 0;
+- }
+
+- if (ctrl->effects)
+- effects = le32_to_cpu(ctrl->effects->acs[opcode]);
+- effects |= nvme_known_admin_effects(opcode);
++ /*
++ * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues,
++ * which would deadlock when done on an I/O command. Note that
++ * We already warn about an unusual effect above.
++ */
++ effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
++ } else {
++ if (ctrl->effects)
++ effects = le32_to_cpu(ctrl->effects->acs[opcode]);
++ effects |= nvme_known_admin_effects(opcode);
++ }
+
+ return effects;
+ }
+@@ -1217,9 +1247,25 @@ EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU);
+ * The host should send Keep Alive commands at half of the Keep Alive Timeout
+ * accounting for transport roundtrip times [..].
+ */
++static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
++{
++ unsigned long delay = ctrl->kato * HZ / 2;
++
++ /*
++ * When using Traffic Based Keep Alive, we need to run
++ * nvme_keep_alive_work at twice the normal frequency, as one
++ * command completion can postpone sending a keep alive command
++ * by up to twice the delay between runs.
++ */
++ if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS)
++ delay /= 2;
++ return delay;
++}
++
+ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
+ {
+- queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2);
++ queue_delayed_work(nvme_wq, &ctrl->ka_work,
++ nvme_keep_alive_work_period(ctrl));
+ }
+
+ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
+@@ -1354,6 +1400,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
+ warn_str, cur->nidl);
+ return -1;
+ }
++ if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
++ return NVME_NIDT_EUI64_LEN;
+ memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN);
+ return NVME_NIDT_EUI64_LEN;
+ case NVME_NIDT_NGUID:
+@@ -1362,6 +1410,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
+ warn_str, cur->nidl);
+ return -1;
+ }
++ if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
++ return NVME_NIDT_NGUID_LEN;
+ memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN);
+ return NVME_NIDT_NGUID_LEN;
+ case NVME_NIDT_UUID:
+@@ -1370,6 +1420,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
+ warn_str, cur->nidl);
+ return -1;
+ }
++ if (ctrl->quirks & NVME_QUIRK_BOGUS_NID)
++ return NVME_NIDT_UUID_LEN;
+ uuid_copy(&ids->uuid, data + sizeof(*cur));
+ return NVME_NIDT_UUID_LEN;
+ case NVME_NIDT_CSI:
+@@ -1466,12 +1518,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
+ if ((*id)->ncap == 0) /* namespace not allocated or attached */
+ goto out_free_id;
+
+- if (ctrl->vs >= NVME_VS(1, 1, 0) &&
+- !memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
+- memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
+- if (ctrl->vs >= NVME_VS(1, 2, 0) &&
+- !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
+- memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
++
++ if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) {
++ dev_info(ctrl->device,
++ "Ignoring bogus Namespace Identifiers\n");
++ } else {
++ if (ctrl->vs >= NVME_VS(1, 1, 0) &&
++ !memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
++ memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64));
++ if (ctrl->vs >= NVME_VS(1, 2, 0) &&
++ !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
++ memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid));
++ }
+
+ return 0;
+
+@@ -1674,13 +1732,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
+ blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
+ }
+
+-static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
+-{
+- return !uuid_is_null(&ids->uuid) ||
+- memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) ||
+- memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
+-}
+-
+ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
+ {
+ return uuid_equal(&a->uuid, &b->uuid) &&
+@@ -1714,7 +1765,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ return 0;
+ }
+
+-static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
++static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
+ {
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+@@ -1730,7 +1781,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
+
+ ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
+ if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
+- return 0;
++ return;
++
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
+ /*
+ * The NVMe over Fabrics specification only supports metadata as
+@@ -1738,10 +1790,21 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
+ * remap the separate metadata buffer from the block layer.
+ */
+ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
+- return -EINVAL;
+- if (ctrl->max_integrity_segments)
+- ns->features |=
+- (NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
++ return;
++
++ ns->features |= NVME_NS_EXT_LBAS;
++
++ /*
++ * The current fabrics transport drivers support namespace
++ * metadata formats only if nvme_ns_has_pi() returns true.
++ * Suppress support for all other formats so the namespace will
++ * have a 0 capacity and not be usable through the block stack.
++ *
++ * Note, this check will need to be modified if any drivers
++ * gain the ability to use other metadata formats.
++ */
++ if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns))
++ ns->features |= NVME_NS_METADATA_SUPPORTED;
+ } else {
+ /*
+ * For PCIe controllers, we can't easily remap the separate
+@@ -1754,8 +1817,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
+ else
+ ns->features |= NVME_NS_METADATA_SUPPORTED;
+ }
+-
+- return 0;
+ }
+
+ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
+@@ -1772,7 +1833,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
+ blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
+ }
+ blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
+- blk_queue_dma_alignment(q, 7);
++ blk_queue_dma_alignment(q, 3);
+ blk_queue_write_cache(q, vwc, vwc);
+ }
+
+@@ -1845,9 +1906,6 @@ static void nvme_update_disk_info(struct gendisk *disk,
+ nvme_config_discard(disk, ns);
+ blk_queue_max_write_zeroes_sectors(disk->queue,
+ ns->ctrl->max_zeroes_sectors);
+-
+- set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
+- test_bit(NVME_NS_FORCE_RO, &ns->flags));
+ }
+
+ static inline bool nvme_first_scan(struct gendisk *disk)
+@@ -1896,18 +1954,20 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
+ ns->lba_shift = id->lbaf[lbaf].ds;
+ nvme_set_queue_limits(ns->ctrl, ns->queue);
+
+- ret = nvme_configure_metadata(ns, id);
+- if (ret)
+- goto out_unfreeze;
++ nvme_configure_metadata(ns, id);
+ nvme_set_chunk_sectors(ns, id);
+ nvme_update_disk_info(ns->disk, ns, id);
+
+ if (ns->head->ids.csi == NVME_CSI_ZNS) {
+ ret = nvme_update_zone_info(ns, lbaf);
+- if (ret)
+- goto out_unfreeze;
++ if (ret) {
++ blk_mq_unfreeze_queue(ns->disk->queue);
++ goto out;
++ }
+ }
+
++ set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) ||
++ test_bit(NVME_NS_FORCE_RO, &ns->flags));
+ set_bit(NVME_NS_READY, &ns->flags);
+ blk_mq_unfreeze_queue(ns->disk->queue);
+
+@@ -1920,16 +1980,17 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
+ if (nvme_ns_head_multipath(ns->head)) {
+ blk_mq_freeze_queue(ns->head->disk->queue);
+ nvme_update_disk_info(ns->head->disk, ns, id);
++ set_disk_ro(ns->head->disk,
++ (id->nsattr & NVME_NS_ATTR_RO) ||
++ test_bit(NVME_NS_FORCE_RO, &ns->flags));
+ nvme_mpath_revalidate_paths(ns);
+ blk_stack_limits(&ns->head->disk->queue->limits,
+ &ns->queue->limits, 0);
+ disk_update_readahead(ns->head->disk);
+ blk_mq_unfreeze_queue(ns->head->disk->queue);
+ }
+- return 0;
+
+-out_unfreeze:
+- blk_mq_unfreeze_queue(ns->disk->queue);
++ ret = 0;
+ out:
+ /*
+ * If probing fails due an unsupported feature, hide the block device,
+@@ -1937,6 +1998,7 @@ out:
+ */
+ if (ret == -ENODEV) {
+ ns->disk->flags |= GENHD_FL_HIDDEN;
++ set_bit(NVME_NS_READY, &ns->flags);
+ ret = 0;
+ }
+ return ret;
+@@ -2040,14 +2102,14 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+
+ static int nvme_pr_clear(struct block_device *bdev, u64 key)
+ {
+- u32 cdw10 = 1 | (key ? 1 << 3 : 0);
++ u32 cdw10 = 1 | (key ? 0 : 1 << 3);
+
+- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
++ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+ }
+
+ static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+ {
+- u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
++ u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 0 : 1 << 3);
+
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+ }
+@@ -2095,6 +2157,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector,
+ static const struct block_device_operations nvme_bdev_ops = {
+ .owner = THIS_MODULE,
+ .ioctl = nvme_ioctl,
++ .compat_ioctl = blkdev_compat_ptr_ioctl,
+ .open = nvme_open,
+ .release = nvme_release,
+ .getgeo = nvme_getgeo,
+@@ -2468,6 +2531,34 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
+ .vid = 0x14a4,
+ .fr = "22301111",
+ .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
++ },
++ {
++ /*
++ * This Kioxia CD6-V Series / HPE PE8030 device times out and
++ * aborts I/O during any load, but more easily reproducible
++ * with discards (fstrim).
++ *
++ * The device is left in a state where it is also not possible
++ * to use "nvme set-feature" to disable APST, but booting with
++ * nvme_core.default_ps_max_latency=0 works.
++ */
++ .vid = 0x1e0f,
++ .mn = "KCD6XVUL6T40",
++ .quirks = NVME_QUIRK_NO_APST,
++ },
++ {
++ /*
++ * The external Samsung X5 SSD fails initialization without a
++ * delay before checking if it is ready and has a whole set of
++ * other problems. To make this even more interesting, it
++ * shares the PCI ID with internal Samsung 970 Evo Plus that
++ * does not need or want these quirks.
++ */
++ .vid = 0x144d,
++ .mn = "Samsung Portable SSD X5",
++ .quirks = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
++ NVME_QUIRK_NO_DEEPEST_PS |
++ NVME_QUIRK_IGNORE_DEV_SUBNQN,
+ }
+ };
+
+@@ -2687,7 +2778,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+ nvme_init_subnqn(subsys, ctrl, id);
+ memcpy(subsys->serial, id->sn, sizeof(subsys->serial));
+ memcpy(subsys->model, id->mn, sizeof(subsys->model));
+- memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
+ subsys->vendor_id = le16_to_cpu(id->vid);
+ subsys->cmic = id->cmic;
+ subsys->awupf = le16_to_cpu(id->awupf);
+@@ -2830,7 +2920,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+- return 0;
++ return -ENOMEM;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = NVME_ID_CNS_CS_CTRL;
+@@ -2877,10 +2967,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
+ if (!ctrl->identified) {
+ unsigned int i;
+
+- ret = nvme_init_subsystem(ctrl, id);
+- if (ret)
+- goto out_free;
+-
+ /*
+ * Check for quirks. Quirk can depend on firmware version,
+ * so, in principle, the set of quirks present can change
+@@ -2893,7 +2979,13 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
+ if (quirk_matches(id, &core_quirks[i]))
+ ctrl->quirks |= core_quirks[i].quirks;
+ }
++
++ ret = nvme_init_subsystem(ctrl, id);
++ if (ret)
++ goto out_free;
+ }
++ memcpy(ctrl->subsys->firmware_rev, id->fr,
++ sizeof(ctrl->subsys->firmware_rev));
+
+ if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
+ dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+@@ -3025,10 +3117,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
+ if (ret)
+ return ret;
+
+- ret = nvme_init_non_mdts_limits(ctrl);
+- if (ret < 0)
+- return ret;
+-
+ ret = nvme_configure_apst(ctrl);
+ if (ret < 0)
+ return ret;
+@@ -3046,8 +3134,12 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
+ return ret;
+
+ if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
++ /*
++ * Do not return errors unless we are in a controller reset,
++ * the controller works perfectly fine without hwmon.
++ */
+ ret = nvme_hwmon_init(ctrl);
+- if (ret < 0)
++ if (ret == -EINTR)
+ return ret;
+ }
+
+@@ -3179,8 +3271,8 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+ * we have no UUID set
+ */
+ if (uuid_is_null(&ids->uuid)) {
+- printk_ratelimited(KERN_WARNING
+- "No UUID available providing old NGUID\n");
++ dev_warn_ratelimited(dev,
++ "No UUID available providing old NGUID\n");
+ return sysfs_emit(buf, "%pU\n", ids->nguid);
+ }
+ return sysfs_emit(buf, "%pU\n", &ids->uuid);
+@@ -3517,15 +3609,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = {
+ NULL,
+ };
+
+-static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
++static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl,
+ unsigned nsid)
+ {
+ struct nvme_ns_head *h;
+
+- lockdep_assert_held(&subsys->lock);
++ lockdep_assert_held(&ctrl->subsys->lock);
+
+- list_for_each_entry(h, &subsys->nsheads, entry) {
+- if (h->ns_id != nsid)
++ list_for_each_entry(h, &ctrl->subsys->nsheads, entry) {
++ /*
++ * Private namespaces can share NSIDs under some conditions.
++ * In that case we can't use the same ns_head for namespaces
++ * with the same NSID.
++ */
++ if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h))
+ continue;
+ if (!list_empty(&h->list) && nvme_tryget_ns_head(h))
+ return h;
+@@ -3534,16 +3631,24 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
+ return NULL;
+ }
+
+-static int __nvme_check_ids(struct nvme_subsystem *subsys,
+- struct nvme_ns_head *new)
++static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys,
++ struct nvme_ns_ids *ids)
+ {
++ bool has_uuid = !uuid_is_null(&ids->uuid);
++ bool has_nguid = memchr_inv(ids->nguid, 0, sizeof(ids->nguid));
++ bool has_eui64 = memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
+ struct nvme_ns_head *h;
+
+ lockdep_assert_held(&subsys->lock);
+
+ list_for_each_entry(h, &subsys->nsheads, entry) {
+- if (nvme_ns_ids_valid(&new->ids) &&
+- nvme_ns_ids_equal(&new->ids, &h->ids))
++ if (has_uuid && uuid_equal(&ids->uuid, &h->ids.uuid))
++ return -EINVAL;
++ if (has_nguid &&
++ memcmp(&ids->nguid, &h->ids.nguid, sizeof(ids->nguid)) == 0)
++ return -EINVAL;
++ if (has_eui64 &&
++ memcmp(&ids->eui64, &h->ids.eui64, sizeof(ids->eui64)) == 0)
+ return -EINVAL;
+ }
+
+@@ -3616,7 +3721,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns)
+ }
+
+ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
+- unsigned nsid, struct nvme_ns_ids *ids)
++ unsigned nsid, struct nvme_ns_ids *ids, bool is_shared)
+ {
+ struct nvme_ns_head *head;
+ size_t size = sizeof(*head);
+@@ -3640,15 +3745,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
+ head->subsys = ctrl->subsys;
+ head->ns_id = nsid;
+ head->ids = *ids;
++ head->shared = is_shared;
+ kref_init(&head->ref);
+
+- ret = __nvme_check_ids(ctrl->subsys, head);
+- if (ret) {
+- dev_err(ctrl->device,
+- "duplicate IDs for nsid %d\n", nsid);
+- goto out_cleanup_srcu;
+- }
+-
+ if (head->ids.csi) {
+ ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects);
+ if (ret)
+@@ -3685,14 +3784,19 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
+ int ret = 0;
+
+ mutex_lock(&ctrl->subsys->lock);
+- head = nvme_find_ns_head(ctrl->subsys, nsid);
++ head = nvme_find_ns_head(ctrl, nsid);
+ if (!head) {
+- head = nvme_alloc_ns_head(ctrl, nsid, ids);
++ ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids);
++ if (ret) {
++ dev_err(ctrl->device,
++ "duplicate IDs for nsid %d\n", nsid);
++ goto out_unlock;
++ }
++ head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared);
+ if (IS_ERR(head)) {
+ ret = PTR_ERR(head);
+ goto out_unlock;
+ }
+- head->shared = is_shared;
+ } else {
+ ret = -EINVAL;
+ if (!is_shared || !head->shared) {
+@@ -3862,7 +3966,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
+ mutex_unlock(&ns->ctrl->subsys->lock);
+
+ /* guarantee not available in head->list */
+- synchronize_rcu();
++ synchronize_srcu(&ns->head->srcu);
+
+ /* wait for concurrent submissions */
+ if (nvme_mpath_clear_current_path(ns))
+@@ -4076,11 +4180,26 @@ static void nvme_scan_work(struct work_struct *work)
+ {
+ struct nvme_ctrl *ctrl =
+ container_of(work, struct nvme_ctrl, scan_work);
++ int ret;
+
+ /* No tagset on a live ctrl means IO queues could not created */
+ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
+ return;
+
++ /*
++ * Identify controller limits can change at controller reset due to
++ * new firmware download, even though it is not common we cannot ignore
++ * such scenario. Controller's non-mdts limits are reported in the unit
++ * of logical blocks that is dependent on the format of attached
++ * namespace. Hence re-read the limits at the time of ns allocation.
++ */
++ ret = nvme_init_non_mdts_limits(ctrl);
++ if (ret < 0) {
++ dev_warn(ctrl->device,
++ "reading non-mdts-limits failed: %d\n", ret);
++ return;
++ }
++
+ if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
+ dev_info(ctrl->device, "rescanning namespaces.\n");
+ nvme_clear_changed_ns_log(ctrl);
+@@ -4187,7 +4306,14 @@ static void nvme_async_event_work(struct work_struct *work)
+ container_of(work, struct nvme_ctrl, async_event_work);
+
+ nvme_aen_uevent(ctrl);
+- ctrl->ops->submit_async_event(ctrl);
++
++ /*
++ * The transport drivers must guarantee AER submission here is safe by
++ * flushing ctrl async_event_work after changing the controller state
++ * from LIVE and before freeing the admin queue.
++ */
++ if (ctrl->state == NVME_CTRL_LIVE)
++ ctrl->ops->submit_async_event(ctrl);
+ }
+
+ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
+@@ -4250,11 +4376,19 @@ static void nvme_fw_act_work(struct work_struct *work)
+ nvme_get_fw_slot_info(ctrl);
+ }
+
+-static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
++static u32 nvme_aer_type(u32 result)
+ {
+- u32 aer_notice_type = (result & 0xff00) >> 8;
++ return result & 0x7;
++}
+
+- trace_nvme_async_event(ctrl, aer_notice_type);
++static u32 nvme_aer_subtype(u32 result)
++{
++ return (result & 0xff00) >> 8;
++}
++
++static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
++{
++ u32 aer_notice_type = nvme_aer_subtype(result);
+
+ switch (aer_notice_type) {
+ case NVME_AER_NOTICE_NS_CHANGED:
+@@ -4285,24 +4419,40 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
+ }
+ }
+
++static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)
++{
++ dev_warn(ctrl->device, "resetting controller due to AER\n");
++ nvme_reset_ctrl(ctrl);
++}
++
+ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
+ volatile union nvme_result *res)
+ {
+ u32 result = le32_to_cpu(res->u32);
+- u32 aer_type = result & 0x07;
++ u32 aer_type = nvme_aer_type(result);
++ u32 aer_subtype = nvme_aer_subtype(result);
+
+ if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
+ return;
+
++ trace_nvme_async_event(ctrl, result);
+ switch (aer_type) {
+ case NVME_AER_NOTICE:
+ nvme_handle_aen_notice(ctrl, result);
+ break;
+ case NVME_AER_ERROR:
++ /*
++ * For a persistent internal error, don't run async_event_work
++ * to submit a new AER. The controller reset will do it.
++ */
++ if (aer_subtype == NVME_AER_ERROR_PERSIST_INT_ERR) {
++ nvme_handle_aer_persistent_error(ctrl);
++ return;
++ }
++ fallthrough;
+ case NVME_AER_SMART:
+ case NVME_AER_CSS:
+ case NVME_AER_VS:
+- trace_nvme_async_event(ctrl, aer_type);
+ ctrl->aen_result = result;
+ break;
+ default:
+@@ -4319,6 +4469,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
+ nvme_stop_failfast_work(ctrl);
+ flush_work(&ctrl->async_event_work);
+ cancel_work_sync(&ctrl->fw_act_work);
++ if (ctrl->ops->stop_ctrl)
++ ctrl->ops->stop_ctrl(ctrl);
+ }
+ EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
+
+@@ -4331,6 +4483,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+ if (ctrl->queue_count > 1) {
+ nvme_queue_scan(ctrl);
+ nvme_start_queues(ctrl);
++ nvme_mpath_update(ctrl);
+ }
+ }
+ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
+diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
+index a146cb903869c..561c2abd3892b 100644
+--- a/drivers/nvme/host/fabrics.h
++++ b/drivers/nvme/host/fabrics.h
+@@ -169,6 +169,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
+ struct nvmf_ctrl_options *opts)
+ {
+ if (ctrl->state == NVME_CTRL_DELETING ||
++ ctrl->state == NVME_CTRL_DELETING_NOIO ||
+ ctrl->state == NVME_CTRL_DEAD ||
+ strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
+ strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
+diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
+index 0a586d7129201..316f3e4ca7cc6 100644
+--- a/drivers/nvme/host/hwmon.c
++++ b/drivers/nvme/host/hwmon.c
+@@ -12,7 +12,7 @@
+
+ struct nvme_hwmon_data {
+ struct nvme_ctrl *ctrl;
+- struct nvme_smart_log log;
++ struct nvme_smart_log *log;
+ struct mutex read_lock;
+ };
+
+@@ -60,14 +60,14 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
+ {
+ return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
+- NVME_CSI_NVM, &data->log, sizeof(data->log), 0);
++ NVME_CSI_NVM, data->log, sizeof(*data->log), 0);
+ }
+
+ static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+ {
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+- struct nvme_smart_log *log = &data->log;
++ struct nvme_smart_log *log = data->log;
+ int temp;
+ int err;
+
+@@ -163,7 +163,9 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
+ case hwmon_temp_max:
+ case hwmon_temp_min:
+ if ((!channel && data->ctrl->wctemp) ||
+- (channel && data->log.temp_sensor[channel - 1])) {
++ (channel && data->log->temp_sensor[channel - 1] &&
++ !(data->ctrl->quirks &
++ NVME_QUIRK_NO_SECONDARY_TEMP_THRESH))) {
+ if (data->ctrl->quirks &
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
+ return 0444;
+@@ -176,7 +178,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
+ break;
+ case hwmon_temp_input:
+ case hwmon_temp_label:
+- if (!channel || data->log.temp_sensor[channel - 1])
++ if (!channel || data->log->temp_sensor[channel - 1])
+ return 0444;
+ break;
+ default:
+@@ -230,7 +232,13 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+- return 0;
++ return -ENOMEM;
++
++ data->log = kzalloc(sizeof(*data->log), GFP_KERNEL);
++ if (!data->log) {
++ err = -ENOMEM;
++ goto err_free_data;
++ }
+
+ data->ctrl = ctrl;
+ mutex_init(&data->read_lock);
+@@ -238,8 +246,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ err = nvme_hwmon_get_smart_log(data);
+ if (err) {
+ dev_warn(dev, "Failed to read smart log (error %d)\n", err);
+- kfree(data);
+- return err;
++ goto err_free_log;
+ }
+
+ hwmon = hwmon_device_register_with_info(dev, "nvme",
+@@ -247,11 +254,17 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+ NULL);
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+- kfree(data);
+- return PTR_ERR(hwmon);
++ err = PTR_ERR(hwmon);
++ goto err_free_log;
+ }
+ ctrl->hwmon_device = hwmon;
+ return 0;
++
++err_free_log:
++ kfree(data->log);
++err_free_data:
++ kfree(data);
++ return err;
+ }
+
+ void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
+@@ -262,6 +275,7 @@ void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
+
+ hwmon_device_unregister(ctrl->hwmon_device);
+ ctrl->hwmon_device = NULL;
++ kfree(data->log);
+ kfree(data);
+ }
+ }
+diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
+index 22314962842da..7397fad4c96ff 100644
+--- a/drivers/nvme/host/ioctl.c
++++ b/drivers/nvme/host/ioctl.c
+@@ -484,11 +484,17 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+ case NVME_IOCTL_IO_CMD:
+ return nvme_dev_user_cmd(ctrl, argp);
+ case NVME_IOCTL_RESET:
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
+ dev_warn(ctrl->device, "resetting controller\n");
+ return nvme_reset_ctrl_sync(ctrl);
+ case NVME_IOCTL_SUBSYS_RESET:
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
+ return nvme_reset_subsystem(ctrl);
+ case NVME_IOCTL_RESCAN:
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
+ nvme_queue_scan(ctrl);
+ return 0;
+ default:
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index fba06618c6c23..73eddb67f0d24 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -138,13 +138,12 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
+ {
+ struct nvme_ns *ns;
+
+- mutex_lock(&ctrl->scan_lock);
+ down_read(&ctrl->namespaces_rwsem);
+- list_for_each_entry(ns, &ctrl->namespaces, list)
+- if (nvme_mpath_clear_current_path(ns))
+- kblockd_schedule_work(&ns->head->requeue_work);
++ list_for_each_entry(ns, &ctrl->namespaces, list) {
++ nvme_mpath_clear_current_path(ns);
++ kblockd_schedule_work(&ns->head->requeue_work);
++ }
+ up_read(&ctrl->namespaces_rwsem);
+- mutex_unlock(&ctrl->scan_lock);
+ }
+
+ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+@@ -152,14 +151,18 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+ struct nvme_ns_head *head = ns->head;
+ sector_t capacity = get_capacity(head->disk);
+ int node;
++ int srcu_idx;
+
++ srcu_idx = srcu_read_lock(&head->srcu);
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (capacity != get_capacity(ns->disk))
+ clear_bit(NVME_NS_READY, &ns->flags);
+ }
++ srcu_read_unlock(&head->srcu, srcu_idx);
+
+ for_each_node(node)
+ rcu_assign_pointer(head->current_path[node], NULL);
++ kblockd_schedule_work(&head->requeue_work);
+ }
+
+ static bool nvme_path_is_disabled(struct nvme_ns *ns)
+@@ -326,6 +329,8 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
+ * pool from the original queue to allocate the bvecs from.
+ */
+ blk_queue_split(&bio);
++ if (!bio)
++ return BLK_QC_T_NONE;
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+@@ -389,6 +394,7 @@ const struct block_device_operations nvme_ns_head_ops = {
+ .open = nvme_ns_head_open,
+ .release = nvme_ns_head_release,
+ .ioctl = nvme_ns_head_ioctl,
++ .compat_ioctl = blkdev_compat_ptr_ioctl,
+ .getgeo = nvme_getgeo,
+ .report_zones = nvme_ns_head_report_zones,
+ .pr_ops = &nvme_pr_ops,
+@@ -463,10 +469,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
+
+ /*
+ * Add a multipath node if the subsystems supports multiple controllers.
+- * We also do this for private namespaces as the namespace sharing data could
+- * change after a rescan.
++ * We also do this for private namespaces as the namespace sharing flag
++ * could change after a rescan.
+ */
+- if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
++ if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
++ !nvme_is_unique_nsid(ctrl, head) || !multipath)
+ return 0;
+
+ head->disk = blk_alloc_disk(ctrl->numa_node);
+@@ -574,8 +581,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
+ ns->ana_grpid = le32_to_cpu(desc->grpid);
+ ns->ana_state = desc->state;
+ clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
+-
+- if (nvme_state_is_live(ns->ana_state))
++ /*
++ * nvme_mpath_set_live() will trigger I/O to the multipath path device
++ * and in turn to this path device. However we cannot accept this I/O
++ * if the controller is not live. This may deadlock if called from
++ * nvme_mpath_init_identify() and the ctrl will never complete
++ * initialization, preventing I/O from completing. For this case we
++ * will reprocess the ANA log page in nvme_mpath_update() once the
++ * controller is ready.
++ */
++ if (nvme_state_is_live(ns->ana_state) &&
++ ns->ctrl->state == NVME_CTRL_LIVE)
+ nvme_mpath_set_live(ns);
+ }
+
+@@ -662,6 +678,18 @@ static void nvme_ana_work(struct work_struct *work)
+ nvme_read_ana_log(ctrl);
+ }
+
++void nvme_mpath_update(struct nvme_ctrl *ctrl)
++{
++ u32 nr_change_groups = 0;
++
++ if (!ctrl->ana_log_buf)
++ return;
++
++ mutex_lock(&ctrl->ana_lock);
++ nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state);
++ mutex_unlock(&ctrl->ana_lock);
++}
++
+ static void nvme_anatt_timeout(struct timer_list *t)
+ {
+ struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
+@@ -793,7 +821,6 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+ {
+ if (!head->disk)
+ return;
+- blk_set_queue_dying(head->disk->queue);
+ /* make sure all pending bios are cleaned up */
+ kblockd_schedule_work(&head->requeue_work);
+ flush_work(&head->requeue_work);
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index ed79a6c7e8043..590ffa3e1c497 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -144,6 +144,16 @@ enum nvme_quirks {
+ * encoding the generation sequence number.
+ */
+ NVME_QUIRK_SKIP_CID_GEN = (1 << 17),
++
++ /*
++ * Reports garbage in the namespace identifiers (eui64, nguid, uuid).
++ */
++ NVME_QUIRK_BOGUS_NID = (1 << 18),
++
++ /*
++ * No temperature thresholds for channels other than 0 (Composite).
++ */
++ NVME_QUIRK_NO_SECONDARY_TEMP_THRESH = (1 << 19),
+ };
+
+ /*
+@@ -490,6 +500,7 @@ struct nvme_ctrl_ops {
+ void (*free_ctrl)(struct nvme_ctrl *ctrl);
+ void (*submit_async_event)(struct nvme_ctrl *ctrl);
+ void (*delete_ctrl)(struct nvme_ctrl *ctrl);
++ void (*stop_ctrl)(struct nvme_ctrl *ctrl);
+ int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
+ };
+
+@@ -552,11 +563,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj)
+ static inline void nvme_should_fail(struct request *req) {}
+ #endif
+
++bool nvme_wait_reset(struct nvme_ctrl *ctrl);
++int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
++
+ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
+ {
++ int ret;
++
+ if (!ctrl->subsystem)
+ return -ENOTTY;
+- return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
++ if (!nvme_wait_reset(ctrl))
++ return -EBUSY;
++
++ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
++ if (ret)
++ return ret;
++
++ return nvme_try_sched_reset(ctrl);
+ }
+
+ /*
+@@ -644,7 +667,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl);
+ void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl);
+ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
+ enum nvme_ctrl_state new_state);
+-bool nvme_wait_reset(struct nvme_ctrl *ctrl);
+ int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
+ int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
+ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
+@@ -693,6 +715,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+ return true;
+ return __nvme_check_ready(ctrl, rq, queue_live);
+ }
++
++/*
++ * NSID shall be unique for all shared namespaces, or if at least one of the
++ * following conditions is met:
++ * 1. Namespace Management is supported by the controller
++ * 2. ANA is supported by the controller
++ * 3. NVM Set are supported by the controller
++ *
++ * In other case, private namespace are not required to report a unique NSID.
++ */
++static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl,
++ struct nvme_ns_head *head)
++{
++ return head->shared ||
++ (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) ||
++ (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) ||
++ (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS);
++}
++
+ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buf, unsigned bufflen);
+ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+@@ -709,7 +750,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
+ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
+ int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
+ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
+-int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
+ int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
+ void nvme_queue_scan(struct nvme_ctrl *ctrl);
+ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
+@@ -752,6 +792,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
+ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
+ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+ void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
++void nvme_mpath_update(struct nvme_ctrl *ctrl);
+ void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
+ void nvme_mpath_stop(struct nvme_ctrl *ctrl);
+ bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+@@ -763,7 +804,7 @@ static inline void nvme_trace_bio_complete(struct request *req)
+ {
+ struct nvme_ns *ns = req->q->queuedata;
+
+- if (req->cmd_flags & REQ_NVME_MPATH)
++ if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio)
+ trace_block_bio_complete(ns->head->disk->queue, req->bio);
+ }
+
+@@ -826,6 +867,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
+ "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
+ return 0;
+ }
++static inline void nvme_mpath_update(struct nvme_ctrl *ctrl)
++{
++}
+ static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
+ {
+ }
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 149ecf73df384..bb3813e8474f4 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -33,7 +33,7 @@
+ #define SQ_SIZE(q) ((q)->q_depth << (q)->sqes)
+ #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion))
+
+-#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
++#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc))
+
+ /*
+ * These can be higher, but we need to ensure that any command doesn't
+@@ -142,9 +142,9 @@ struct nvme_dev {
+ mempool_t *iod_mempool;
+
+ /* shadow doorbell buffer support: */
+- u32 *dbbuf_dbs;
++ __le32 *dbbuf_dbs;
+ dma_addr_t dbbuf_dbs_dma_addr;
+- u32 *dbbuf_eis;
++ __le32 *dbbuf_eis;
+ dma_addr_t dbbuf_eis_dma_addr;
+
+ /* host memory buffer support: */
+@@ -208,10 +208,10 @@ struct nvme_queue {
+ #define NVMEQ_SQ_CMB 1
+ #define NVMEQ_DELETE_ERROR 2
+ #define NVMEQ_POLLED 3
+- u32 *dbbuf_sq_db;
+- u32 *dbbuf_cq_db;
+- u32 *dbbuf_sq_ei;
+- u32 *dbbuf_cq_ei;
++ __le32 *dbbuf_sq_db;
++ __le32 *dbbuf_cq_db;
++ __le32 *dbbuf_sq_ei;
++ __le32 *dbbuf_cq_ei;
+ struct completion delete_done;
+ };
+
+@@ -224,7 +224,6 @@ struct nvme_queue {
+ struct nvme_iod {
+ struct nvme_request req;
+ struct nvme_command cmd;
+- struct nvme_queue *nvmeq;
+ bool use_sgl;
+ int aborted;
+ int npages; /* In the PRP list. 0 means small pool in use */
+@@ -332,11 +331,11 @@ static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
+ }
+
+ /* Update dbbuf and return true if an MMIO is required */
+-static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
+- volatile u32 *dbbuf_ei)
++static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db,
++ volatile __le32 *dbbuf_ei)
+ {
+ if (dbbuf_db) {
+- u16 old_value;
++ u16 old_value, event_idx;
+
+ /*
+ * Ensure that the queue is written before updating
+@@ -344,8 +343,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
+ */
+ wmb();
+
+- old_value = *dbbuf_db;
+- *dbbuf_db = value;
++ old_value = le32_to_cpu(*dbbuf_db);
++ *dbbuf_db = cpu_to_le32(value);
+
+ /*
+ * Ensure that the doorbell is updated before reading the event
+@@ -355,7 +354,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
+ */
+ mb();
+
+- if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
++ event_idx = le32_to_cpu(*dbbuf_ei);
++ if (!nvme_dbbuf_need_event(event_idx, value, old_value))
+ return false;
+ }
+
+@@ -369,9 +369,9 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
+ */
+ static int nvme_pci_npages_prp(void)
+ {
+- unsigned nprps = DIV_ROUND_UP(NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE,
+- NVME_CTRL_PAGE_SIZE);
+- return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
++ unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE;
++ unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE);
++ return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8);
+ }
+
+ /*
+@@ -381,7 +381,7 @@ static int nvme_pci_npages_prp(void)
+ static int nvme_pci_npages_sgl(void)
+ {
+ return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc),
+- PAGE_SIZE);
++ NVME_CTRL_PAGE_SIZE);
+ }
+
+ static size_t nvme_pci_iod_alloc_size(void)
+@@ -421,11 +421,6 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
+ {
+ struct nvme_dev *dev = set->driver_data;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+- int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+- struct nvme_queue *nvmeq = &dev->queues[queue_idx];
+-
+- BUG_ON(!nvmeq);
+- iod->nvmeq = nvmeq;
+
+ nvme_req(req)->ctrl = &dev->ctrl;
+ nvme_req(req)->cmd = &iod->cmd;
+@@ -528,7 +523,7 @@ static void **nvme_pci_iod_list(struct request *req)
+
+ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
+ {
+- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
++ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+ int nseg = blk_rq_nr_phys_segments(req);
+ unsigned int avg_seg_size;
+
+@@ -536,7 +531,7 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
+
+ if (!nvme_ctrl_sgl_supported(&dev->ctrl))
+ return false;
+- if (!iod->nvmeq->qid)
++ if (!nvmeq->qid)
+ return false;
+ if (!sgl_threshold || avg_seg_size < sgl_threshold)
+ return false;
+@@ -731,7 +726,7 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
+ sge->length = cpu_to_le32(entries * sizeof(*sge));
+ sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
+ } else {
+- sge->length = cpu_to_le32(PAGE_SIZE);
++ sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE);
+ sge->type = NVME_SGL_FMT_SEG_DESC << 4;
+ }
+ }
+@@ -814,6 +809,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
+ cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
+ if (bv->bv_len > first_prp_len)
+ cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
++ else
++ cmnd->dptr.prp2 = 0;
+ return BLK_STS_OK;
+ }
+
+@@ -843,6 +840,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
+ int nr_mapped;
+
+ if (blk_rq_nr_phys_segments(req) == 1) {
++ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+ struct bio_vec bv = req_bvec(req);
+
+ if (!is_pci_p2pdma_page(bv.bv_page)) {
+@@ -850,7 +848,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
+ return nvme_setup_prp_simple(dev, req,
+ &cmnd->rw, &bv);
+
+- if (iod->nvmeq->qid && sgl_threshold &&
++ if (nvmeq->qid && sgl_threshold &&
+ nvme_ctrl_sgl_supported(&dev->ctrl))
+ return nvme_setup_sgl_simple(dev, req,
+ &cmnd->rw, &bv);
+@@ -960,12 +958,16 @@ out_free_cmd:
+
+ static void nvme_pci_complete_rq(struct request *req)
+ {
+- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+- struct nvme_dev *dev = iod->nvmeq->dev;
++ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
++ struct nvme_dev *dev = nvmeq->dev;
++
++ if (blk_integrity_rq(req)) {
++ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+
+- if (blk_integrity_rq(req))
+ dma_unmap_page(dev->dev, iod->meta_dma,
+- rq_integrity_vec(req)->bv_len, rq_data_dir(req));
++ rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
++ }
++
+ if (blk_rq_nr_phys_segments(req))
+ nvme_unmap_data(dev, req);
+ nvme_complete_rq(req);
+@@ -1191,8 +1193,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
+
+ static void abort_endio(struct request *req, blk_status_t error)
+ {
+- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+- struct nvme_queue *nvmeq = iod->nvmeq;
++ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+
+ dev_warn(nvmeq->dev->ctrl.device,
+ "Abort status: 0x%x", nvme_req(req)->status);
+@@ -1246,7 +1247,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
+ {
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+- struct nvme_queue *nvmeq = iod->nvmeq;
++ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+ struct nvme_dev *dev = nvmeq->dev;
+ struct request *abort_req;
+ struct nvme_command cmd = { };
+@@ -1277,7 +1278,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
+ else
+ nvme_poll_irqdisable(nvmeq);
+
+- if (blk_mq_request_completed(req)) {
++ if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
+ dev_warn(dev->ctrl.device,
+ "I/O %d QID %d timeout, completion polled\n",
+ req->tag, nvmeq->qid);
+@@ -1680,6 +1681,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
+ dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
+ if (IS_ERR(dev->ctrl.admin_q)) {
+ blk_mq_free_tag_set(&dev->admin_tagset);
++ dev->ctrl.admin_q = NULL;
+ return -ENOMEM;
+ }
+ if (!blk_get_queue(dev->ctrl.admin_q)) {
+@@ -2731,6 +2733,8 @@ static void nvme_reset_work(struct work_struct *work)
+ if (result)
+ goto out_unlock;
+
++ dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
++
+ /*
+ * Limit the max command size to prevent iod->sg allocations going
+ * over a single page.
+@@ -2743,7 +2747,6 @@ static void nvme_reset_work(struct work_struct *work)
+ * Don't limit the IOMMU merged segment size.
+ */
+ dma_set_max_seg_size(dev->dev, 0xffffffff);
+- dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
+
+ mutex_unlock(&dev->shutdown_lock);
+
+@@ -3300,7 +3303,8 @@ static const struct pci_device_id nvme_id_table[] = {
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
+ { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */
+ .driver_data = NVME_QUIRK_STRIPE_SIZE |
+- NVME_QUIRK_DEALLOCATE_ZEROES, },
++ NVME_QUIRK_DEALLOCATE_ZEROES |
++ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */
+ .driver_data = NVME_QUIRK_STRIPE_SIZE |
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
+@@ -3313,7 +3317,10 @@ static const struct pci_device_id nvme_id_table[] = {
+ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
+ .driver_data = NVME_QUIRK_IDENTIFY_CNS |
+- NVME_QUIRK_DISABLE_WRITE_ZEROES, },
++ NVME_QUIRK_DISABLE_WRITE_ZEROES |
++ NVME_QUIRK_BOGUS_NID, },
++ { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
+ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
+ { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
+@@ -3332,15 +3339,23 @@ static const struct pci_device_id nvme_id_table[] = {
+ NVME_QUIRK_DISABLE_WRITE_ZEROES|
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
+- .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
++ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN |
++ NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
+ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
++ { PCI_DEVICE(0x1cc1, 0x33f8), /* ADATA IM2P33F8ABR1 1 TB */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
+- .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
++ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN |
++ NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
++ { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */
++ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN },
++ { PCI_DEVICE(0x1344, 0x6001), /* Micron Nitro NVMe */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
+@@ -3351,6 +3366,49 @@ static const struct pci_device_id nvme_id_table[] = {
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
++ { PCI_DEVICE(0x2646, 0x5013), /* Kingston KC3000, Kingston FURY Renegade */
++ .driver_data = NVME_QUIRK_NO_SECONDARY_TEMP_THRESH, },
++ { PCI_DEVICE(0x2646, 0x5018), /* KINGSTON OM8SFP4xxxxP OS21012 NVMe SSD */
++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
++ { PCI_DEVICE(0x2646, 0x5016), /* KINGSTON OM3PGP4xxxxP OS21011 NVMe SSD */
++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
++ { PCI_DEVICE(0x2646, 0x501A), /* KINGSTON OM8PGP4xxxxP OS21005 NVMe SSD */
++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
++ { PCI_DEVICE(0x2646, 0x501B), /* KINGSTON OM8PGP4xxxxQ OS21005 NVMe SSD */
++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
++ { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */
++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
++ { PCI_DEVICE(0x1f40, 0x1202), /* Netac Technologies Co. NV3000 NVMe SSD */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1e4B, 0x1602), /* MAXIO MAP1602 */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1e49, 0x0021), /* ZHITAI TiPro5000 NVMe SSD */
++ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
++ { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
++ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
++ { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
++ .driver_data = NVME_QUIRK_BOGUS_NID |
++ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
++ { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x1e4b, 0x1602), /* HS-SSD-FUTURE 2048G */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
++ { PCI_DEVICE(0x10ec, 0x5765), /* TEAMGROUP MP33 2TB SSD */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
+ .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
+@@ -3371,7 +3429,6 @@ static const struct pci_device_id nvme_id_table[] = {
+ NVME_QUIRK_128_BYTES_SQES |
+ NVME_QUIRK_SHARED_TAGS |
+ NVME_QUIRK_SKIP_CID_GEN },
+-
+ { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
+ { 0, }
+ };
+diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
+index 042c594bc57e2..b76e1d4adcc77 100644
+--- a/drivers/nvme/host/rdma.c
++++ b/drivers/nvme/host/rdma.c
+@@ -989,6 +989,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
+ goto out_cleanup_connect_q;
+
+ if (!new) {
++ nvme_start_freeze(&ctrl->ctrl);
+ nvme_start_queues(&ctrl->ctrl);
+ if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
+ /*
+@@ -997,6 +998,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
+ * to be safe.
+ */
+ ret = -ENODEV;
++ nvme_unfreeze(&ctrl->ctrl);
+ goto out_wait_freeze_timed_out;
+ }
+ blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
+@@ -1038,7 +1040,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
+ bool remove)
+ {
+ if (ctrl->ctrl.queue_count > 1) {
+- nvme_start_freeze(&ctrl->ctrl);
+ nvme_stop_queues(&ctrl->ctrl);
+ nvme_sync_io_queues(&ctrl->ctrl);
+ nvme_rdma_stop_io_queues(ctrl);
+@@ -1049,6 +1050,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
+ }
+ }
+
++static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
++{
++ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
++
++ cancel_work_sync(&ctrl->err_work);
++ cancel_delayed_work_sync(&ctrl->reconnect_work);
++}
++
+ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
+ {
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+@@ -1095,11 +1104,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
+ return ret;
+
+ if (ctrl->ctrl.icdoff) {
++ ret = -EOPNOTSUPP;
+ dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+ goto destroy_admin;
+ }
+
+ if (!(ctrl->ctrl.sgls & (1 << 2))) {
++ ret = -EOPNOTSUPP;
+ dev_err(ctrl->ctrl.device,
+ "Mandatory keyed sgls are not supported!\n");
+ goto destroy_admin;
+@@ -1190,6 +1201,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
+ struct nvme_rdma_ctrl, err_work);
+
+ nvme_stop_keep_alive(&ctrl->ctrl);
++ flush_work(&ctrl->ctrl.async_event_work);
+ nvme_rdma_teardown_io_queues(ctrl, false);
+ nvme_start_queues(&ctrl->ctrl);
+ nvme_rdma_teardown_admin_queue(ctrl, false);
+@@ -2227,9 +2239,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
+
+ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+ {
+- cancel_work_sync(&ctrl->err_work);
+- cancel_delayed_work_sync(&ctrl->reconnect_work);
+-
+ nvme_rdma_teardown_io_queues(ctrl, shutdown);
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+ if (shutdown)
+@@ -2279,6 +2288,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
+ .submit_async_event = nvme_rdma_submit_async_event,
+ .delete_ctrl = nvme_rdma_delete_ctrl,
+ .get_address = nvmf_get_address,
++ .stop_ctrl = nvme_rdma_stop_ctrl,
+ };
+
+ /*
+diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
+index 4ae562d30d2b9..4ca7ef9416002 100644
+--- a/drivers/nvme/host/tcp.c
++++ b/drivers/nvme/host/tcp.c
+@@ -30,6 +30,44 @@ static int so_priority;
+ module_param(so_priority, int, 0644);
+ MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
+
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++/* lockdep can detect a circular dependency of the form
++ * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock
++ * because dependencies are tracked for both nvme-tcp and user contexts. Using
++ * a separate class prevents lockdep from conflating nvme-tcp socket use with
++ * user-space socket API use.
++ */
++static struct lock_class_key nvme_tcp_sk_key[2];
++static struct lock_class_key nvme_tcp_slock_key[2];
++
++static void nvme_tcp_reclassify_socket(struct socket *sock)
++{
++ struct sock *sk = sock->sk;
++
++ if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
++ return;
++
++ switch (sk->sk_family) {
++ case AF_INET:
++ sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME",
++ &nvme_tcp_slock_key[0],
++ "sk_lock-AF_INET-NVME",
++ &nvme_tcp_sk_key[0]);
++ break;
++ case AF_INET6:
++ sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME",
++ &nvme_tcp_slock_key[1],
++ "sk_lock-AF_INET6-NVME",
++ &nvme_tcp_sk_key[1]);
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ }
++}
++#else
++static void nvme_tcp_reclassify_socket(struct socket *sock) { }
++#endif
++
+ enum nvme_tcp_send_state {
+ NVME_TCP_SEND_CMD_PDU = 0,
+ NVME_TCP_SEND_H2C_PDU,
+@@ -81,7 +119,6 @@ struct nvme_tcp_queue {
+ struct mutex send_mutex;
+ struct llist_head req_list;
+ struct list_head send_list;
+- bool more_requests;
+
+ /* recv state */
+ void *pdu;
+@@ -277,7 +314,7 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
+ static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+ {
+ return !list_empty(&queue->send_list) ||
+- !llist_empty(&queue->req_list) || queue->more_requests;
++ !llist_empty(&queue->req_list);
+ }
+
+ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
+@@ -296,9 +333,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
+ */
+ if (queue->io_cpu == raw_smp_processor_id() &&
+ sync && empty && mutex_trylock(&queue->send_mutex)) {
+- queue->more_requests = !last;
+ nvme_tcp_send_all(queue);
+- queue->more_requests = false;
+ mutex_unlock(&queue->send_mutex);
+ }
+
+@@ -920,7 +955,15 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
+
+ static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
+ {
+- nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
++ if (nvme_tcp_async_req(req)) {
++ union nvme_result res = {};
++
++ nvme_complete_async_event(&req->queue->ctrl->ctrl,
++ cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res);
++ } else {
++ nvme_tcp_end_request(blk_mq_rq_from_pdu(req),
++ NVME_SC_HOST_PATH_ERROR);
++ }
+ }
+
+ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
+@@ -1116,8 +1159,7 @@ done:
+ } else if (ret < 0) {
+ dev_err(queue->ctrl->ctrl.device,
+ "failed to send request %d\n", ret);
+- if (ret != -EPIPE && ret != -ECONNRESET)
+- nvme_tcp_fail_request(queue->request);
++ nvme_tcp_fail_request(queue->request);
+ nvme_tcp_done_send_req(queue);
+ }
+ return ret;
+@@ -1164,7 +1206,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
+ else if (unlikely(result < 0))
+ return;
+
+- if (!pending)
++ if (!pending || !queue->rd_enabled)
+ return;
+
+ } while (!time_after(jiffies, deadline)); /* quota is exhausted */
+@@ -1428,6 +1470,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
+ goto err_destroy_mutex;
+ }
+
++ nvme_tcp_reclassify_socket(queue->sock);
++
+ /* Single syn retry */
+ tcp_sock_set_syncnt(queue->sock->sk, 1);
+
+@@ -1519,22 +1563,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
+ if (ret)
+ goto err_init_connect;
+
+- queue->rd_enabled = true;
+ set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
+- nvme_tcp_init_recv_ctx(queue);
+-
+- write_lock_bh(&queue->sock->sk->sk_callback_lock);
+- queue->sock->sk->sk_user_data = queue;
+- queue->state_change = queue->sock->sk->sk_state_change;
+- queue->data_ready = queue->sock->sk->sk_data_ready;
+- queue->write_space = queue->sock->sk->sk_write_space;
+- queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
+- queue->sock->sk->sk_state_change = nvme_tcp_state_change;
+- queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+-#ifdef CONFIG_NET_RX_BUSY_POLL
+- queue->sock->sk->sk_ll_usec = 1;
+-#endif
+- write_unlock_bh(&queue->sock->sk->sk_callback_lock);
+
+ return 0;
+
+@@ -1554,7 +1583,7 @@ err_destroy_mutex:
+ return ret;
+ }
+
+-static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
++static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
+ {
+ struct socket *sock = queue->sock;
+
+@@ -1569,7 +1598,7 @@ static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
+ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
+ {
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+- nvme_tcp_restore_sock_calls(queue);
++ nvme_tcp_restore_sock_ops(queue);
+ cancel_work_sync(&queue->io_work);
+ }
+
+@@ -1584,21 +1613,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
+ mutex_unlock(&queue->queue_lock);
+ }
+
++static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
++{
++ write_lock_bh(&queue->sock->sk->sk_callback_lock);
++ queue->sock->sk->sk_user_data = queue;
++ queue->state_change = queue->sock->sk->sk_state_change;
++ queue->data_ready = queue->sock->sk->sk_data_ready;
++ queue->write_space = queue->sock->sk->sk_write_space;
++ queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
++ queue->sock->sk->sk_state_change = nvme_tcp_state_change;
++ queue->sock->sk->sk_write_space = nvme_tcp_write_space;
++#ifdef CONFIG_NET_RX_BUSY_POLL
++ queue->sock->sk->sk_ll_usec = 1;
++#endif
++ write_unlock_bh(&queue->sock->sk->sk_callback_lock);
++}
++
+ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
+ {
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
++ struct nvme_tcp_queue *queue = &ctrl->queues[idx];
+ int ret;
+
++ queue->rd_enabled = true;
++ nvme_tcp_init_recv_ctx(queue);
++ nvme_tcp_setup_sock_ops(queue);
++
+ if (idx)
+ ret = nvmf_connect_io_queue(nctrl, idx);
+ else
+ ret = nvmf_connect_admin_queue(nctrl);
+
+ if (!ret) {
+- set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
++ set_bit(NVME_TCP_Q_LIVE, &queue->flags);
+ } else {
+- if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
+- __nvme_tcp_stop_queue(&ctrl->queues[idx]);
++ if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
++ __nvme_tcp_stop_queue(queue);
+ dev_err(nctrl->device,
+ "failed to connect queue: %d ret=%d\n", idx, ret);
+ }
+@@ -1838,6 +1888,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
+ goto out_cleanup_connect_q;
+
+ if (!new) {
++ nvme_start_freeze(ctrl);
+ nvme_start_queues(ctrl);
+ if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
+ /*
+@@ -1846,6 +1897,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
+ * to be safe.
+ */
+ ret = -ENODEV;
++ nvme_unfreeze(ctrl);
+ goto out_wait_freeze_timed_out;
+ }
+ blk_mq_update_nr_hw_queues(ctrl->tagset,
+@@ -1964,7 +2016,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
+ if (ctrl->queue_count <= 1)
+ return;
+ blk_mq_quiesce_queue(ctrl->admin_q);
+- nvme_start_freeze(ctrl);
+ nvme_stop_queues(ctrl);
+ nvme_sync_io_queues(ctrl);
+ nvme_tcp_stop_io_queues(ctrl);
+@@ -2097,6 +2148,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
+ struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+
+ nvme_stop_keep_alive(ctrl);
++ flush_work(&ctrl->async_event_work);
+ nvme_tcp_teardown_io_queues(ctrl, false);
+ /* unquiesce to fail fast pending requests */
+ nvme_start_queues(ctrl);
+@@ -2115,9 +2167,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
+
+ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
+ {
+- cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+- cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+-
+ nvme_tcp_teardown_io_queues(ctrl, shutdown);
+ blk_mq_quiesce_queue(ctrl->admin_q);
+ if (shutdown)
+@@ -2157,6 +2206,12 @@ out_fail:
+ nvme_tcp_reconnect_or_remove(ctrl);
+ }
+
++static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
++{
++ cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
++ cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
++}
++
+ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
+ {
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+@@ -2480,6 +2535,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
+ .submit_async_event = nvme_tcp_submit_async_event,
+ .delete_ctrl = nvme_tcp_delete_ctrl,
+ .get_address = nvmf_get_address,
++ .stop_ctrl = nvme_tcp_stop_ctrl,
+ };
+
+ static bool
+diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
+index 35bac7a254227..b258f7b8788e1 100644
+--- a/drivers/nvme/host/trace.h
++++ b/drivers/nvme/host/trace.h
+@@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq,
+ TP_fast_assign(
+ __entry->ctrl_id = nvme_req(req)->ctrl->instance;
+ __entry->qid = nvme_req_qid(req);
+- __entry->cid = req->tag;
++ __entry->cid = nvme_req(req)->cmd->common.command_id;
+ __entry->result = le64_to_cpu(nvme_req(req)->result.u64);
+ __entry->retries = nvme_req(req)->retries;
+ __entry->flags = nvme_req(req)->flags;
+@@ -127,15 +127,12 @@ TRACE_EVENT(nvme_async_event,
+ ),
+ TP_printk("nvme%d: NVME_AEN=%#08x [%s]",
+ __entry->ctrl_id, __entry->result,
+- __print_symbolic(__entry->result,
+- aer_name(NVME_AER_NOTICE_NS_CHANGED),
+- aer_name(NVME_AER_NOTICE_ANA),
+- aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
+- aer_name(NVME_AER_NOTICE_DISC_CHANGED),
+- aer_name(NVME_AER_ERROR),
+- aer_name(NVME_AER_SMART),
+- aer_name(NVME_AER_CSS),
+- aer_name(NVME_AER_VS))
++ __print_symbolic(__entry->result & 0x7,
++ aer_name(NVME_AER_ERROR),
++ aer_name(NVME_AER_SMART),
++ aer_name(NVME_AER_NOTICE),
++ aer_name(NVME_AER_CSS),
++ aer_name(NVME_AER_VS))
+ )
+ );
+
+diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
+index aa6d84d8848e7..ec13f568785e5 100644
+--- a/drivers/nvme/target/admin-cmd.c
++++ b/drivers/nvme/target/admin-cmd.c
+@@ -164,26 +164,29 @@ out:
+
+ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
+ {
+- log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0);
+- log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0);
+- log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0);
+- log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0);
+- log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0);
+- log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0);
+- log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0);
+-
+- log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0);
+- log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0);
+- log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0);
+- log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0);
+- log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0);
++ log->acs[nvme_admin_get_log_page] =
++ log->acs[nvme_admin_identify] =
++ log->acs[nvme_admin_abort_cmd] =
++ log->acs[nvme_admin_set_features] =
++ log->acs[nvme_admin_get_features] =
++ log->acs[nvme_admin_async_event] =
++ log->acs[nvme_admin_keep_alive] =
++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
++
++ log->iocs[nvme_cmd_read] =
++ log->iocs[nvme_cmd_write] =
++ log->iocs[nvme_cmd_flush] =
++ log->iocs[nvme_cmd_dsm] =
++ log->iocs[nvme_cmd_write_zeroes] =
++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+ }
+
+ static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log)
+ {
+- log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0);
+- log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0);
+- log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0);
++ log->iocs[nvme_cmd_zone_append] =
++ log->iocs[nvme_cmd_zone_mgmt_send] =
++ log->iocs[nvme_cmd_zone_mgmt_recv] =
++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+ }
+
+ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
+@@ -505,7 +508,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
+ goto done;
+ }
+
+- nvmet_ns_revalidate(req->ns);
++ if (nvmet_ns_revalidate(req->ns)) {
++ mutex_lock(&req->ns->subsys->lock);
++ nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
++ mutex_unlock(&req->ns->subsys->lock);
++ }
+
+ /*
+ * nuse = ncap = nsze isn't always true, but we have no way to find
+@@ -673,6 +680,13 @@ static bool nvmet_handle_identify_desclist(struct nvmet_req *req)
+ }
+ }
+
++static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req)
++{
++ /* Not supported: return zeroes */
++ nvmet_req_complete(req,
++ nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm)));
++}
++
+ static void nvmet_execute_identify(struct nvmet_req *req)
+ {
+ if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
+@@ -680,13 +694,8 @@ static void nvmet_execute_identify(struct nvmet_req *req)
+
+ switch (req->cmd->identify.cns) {
+ case NVME_ID_CNS_NS:
+- switch (req->cmd->identify.csi) {
+- case NVME_CSI_NVM:
+- return nvmet_execute_identify_ns(req);
+- default:
+- break;
+- }
+- break;
++ nvmet_execute_identify_ns(req);
++ return;
+ case NVME_ID_CNS_CS_NS:
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+ switch (req->cmd->identify.csi) {
+@@ -698,29 +707,24 @@ static void nvmet_execute_identify(struct nvmet_req *req)
+ }
+ break;
+ case NVME_ID_CNS_CTRL:
+- switch (req->cmd->identify.csi) {
+- case NVME_CSI_NVM:
+- return nvmet_execute_identify_ctrl(req);
+- }
+- break;
++ nvmet_execute_identify_ctrl(req);
++ return;
+ case NVME_ID_CNS_CS_CTRL:
+- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+- switch (req->cmd->identify.csi) {
+- case NVME_CSI_ZNS:
+- return nvmet_execute_identify_cns_cs_ctrl(req);
+- default:
+- break;
+- }
+- }
+- break;
+- case NVME_ID_CNS_NS_ACTIVE_LIST:
+ switch (req->cmd->identify.csi) {
+ case NVME_CSI_NVM:
+- return nvmet_execute_identify_nslist(req);
+- default:
++ nvmet_execute_identify_ctrl_nvm(req);
++ return;
++ case NVME_CSI_ZNS:
++ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
++ nvmet_execute_identify_ctrl_zns(req);
++ return;
++ }
+ break;
+ }
+ break;
++ case NVME_ID_CNS_NS_ACTIVE_LIST:
++ nvmet_execute_identify_nslist(req);
++ return;
+ case NVME_ID_CNS_NS_DESC_LIST:
+ if (nvmet_handle_identify_desclist(req) == true)
+ return;
+@@ -978,7 +982,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
+ ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req;
+ mutex_unlock(&ctrl->lock);
+
+- schedule_work(&ctrl->async_event_work);
++ queue_work(nvmet_wq, &ctrl->async_event_work);
+ }
+
+ void nvmet_execute_keep_alive(struct nvmet_req *req)
+diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
+index be5d82421e3a4..5bdc3ba51f7ef 100644
+--- a/drivers/nvme/target/configfs.c
++++ b/drivers/nvme/target/configfs.c
+@@ -586,7 +586,8 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
+ mutex_unlock(&ns->subsys->lock);
+ return -EINVAL;
+ }
+- nvmet_ns_revalidate(ns);
++ if (nvmet_ns_revalidate(ns))
++ nvmet_ns_changed(ns->subsys, ns->nsid);
+ mutex_unlock(&ns->subsys->lock);
+ return count;
+ }
+@@ -1189,6 +1190,7 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
+ const char *page, size_t count)
+ {
+ int pos = 0, len;
++ char *val;
+
+ if (subsys->subsys_discovered) {
+ pr_err("Can't set model number. %s is already assigned\n",
+@@ -1211,9 +1213,11 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
+ return -EINVAL;
+ }
+
+- subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL);
+- if (!subsys->model_number)
++ val = kmemdup_nul(page, len, GFP_KERNEL);
++ if (!val)
+ return -ENOMEM;
++ kfree(subsys->model_number);
++ subsys->model_number = val;
+ return count;
+ }
+
+@@ -1553,6 +1557,8 @@ static void nvmet_port_release(struct config_item *item)
+ {
+ struct nvmet_port *port = to_nvmet_port(item);
+
++ /* Let inflight controllers teardown complete */
++ flush_workqueue(nvmet_wq);
+ list_del(&port->global_entry);
+
+ kfree(port->ana_state);
+diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
+index b8425fa34300f..2c44d5a95c8d6 100644
+--- a/drivers/nvme/target/core.c
++++ b/drivers/nvme/target/core.c
+@@ -15,11 +15,15 @@
+
+ #include "nvmet.h"
+
++struct kmem_cache *nvmet_bvec_cache;
+ struct workqueue_struct *buffered_io_wq;
+ struct workqueue_struct *zbd_wq;
+ static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
+ static DEFINE_IDA(cntlid_ida);
+
++struct workqueue_struct *nvmet_wq;
++EXPORT_SYMBOL_GPL(nvmet_wq);
++
+ /*
+ * This read/write semaphore is used to synchronize access to configuration
+ * information on a target system that will result in discovery log page
+@@ -205,7 +209,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
+ list_add_tail(&aen->entry, &ctrl->async_events);
+ mutex_unlock(&ctrl->lock);
+
+- schedule_work(&ctrl->async_event_work);
++ queue_work(nvmet_wq, &ctrl->async_event_work);
+ }
+
+ static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
+@@ -385,7 +389,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
+ if (reset_tbkas) {
+ pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
+ ctrl->cntlid);
+- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
++ queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
+ return;
+ }
+
+@@ -403,7 +407,7 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
+ pr_debug("ctrl %d start keep-alive timer for %d secs\n",
+ ctrl->cntlid, ctrl->kato);
+
+- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
++ queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
+ }
+
+ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
+@@ -531,7 +535,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
+ ns->nsid);
+ }
+
+-void nvmet_ns_revalidate(struct nvmet_ns *ns)
++bool nvmet_ns_revalidate(struct nvmet_ns *ns)
+ {
+ loff_t oldsize = ns->size;
+
+@@ -540,8 +544,7 @@ void nvmet_ns_revalidate(struct nvmet_ns *ns)
+ else
+ nvmet_file_ns_revalidate(ns);
+
+- if (oldsize != ns->size)
+- nvmet_ns_changed(ns->subsys, ns->nsid);
++ return oldsize != ns->size;
+ }
+
+ int nvmet_ns_enable(struct nvmet_ns *ns)
+@@ -733,6 +736,8 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
+
+ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+ {
++ struct nvmet_ns *ns = req->ns;
++
+ if (!req->sq->sqhd_disabled)
+ nvmet_update_sq_head(req);
+ req->cqe->sq_id = cpu_to_le16(req->sq->qid);
+@@ -743,15 +748,17 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+
+ trace_nvmet_req_complete(req);
+
+- if (req->ns)
+- nvmet_put_namespace(req->ns);
+ req->ops->queue_response(req);
++ if (ns)
++ nvmet_put_namespace(ns);
+ }
+
+ void nvmet_req_complete(struct nvmet_req *req, u16 status)
+ {
++ struct nvmet_sq *sq = req->sq;
++
+ __nvmet_req_complete(req, status);
+- percpu_ref_put(&req->sq->ref);
++ percpu_ref_put(&sq->ref);
+ }
+ EXPORT_SYMBOL_GPL(nvmet_req_complete);
+
+@@ -1163,7 +1170,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
+ * reset the keep alive timer when the controller is enabled.
+ */
+ if (ctrl->kato)
+- mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
++ mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
+ }
+
+ static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
+@@ -1477,7 +1484,7 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
+ mutex_lock(&ctrl->lock);
+ if (!(ctrl->csts & NVME_CSTS_CFS)) {
+ ctrl->csts |= NVME_CSTS_CFS;
+- schedule_work(&ctrl->fatal_err_work);
++ queue_work(nvmet_wq, &ctrl->fatal_err_work);
+ }
+ mutex_unlock(&ctrl->lock);
+ }
+@@ -1602,24 +1609,32 @@ void nvmet_subsys_put(struct nvmet_subsys *subsys)
+
+ static int __init nvmet_init(void)
+ {
+- int error;
++ int error = -ENOMEM;
+
+ nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
+
++ nvmet_bvec_cache = kmem_cache_create("nvmet-bvec",
++ NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0,
++ SLAB_HWCACHE_ALIGN, NULL);
++ if (!nvmet_bvec_cache)
++ return -ENOMEM;
++
+ zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
+ if (!zbd_wq)
+- return -ENOMEM;
++ goto out_destroy_bvec_cache;
+
+ buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
+ WQ_MEM_RECLAIM, 0);
+- if (!buffered_io_wq) {
+- error = -ENOMEM;
++ if (!buffered_io_wq)
+ goto out_free_zbd_work_queue;
+- }
++
++ nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0);
++ if (!nvmet_wq)
++ goto out_free_buffered_work_queue;
+
+ error = nvmet_init_discovery();
+ if (error)
+- goto out_free_work_queue;
++ goto out_free_nvmet_work_queue;
+
+ error = nvmet_init_configfs();
+ if (error)
+@@ -1628,10 +1643,14 @@ static int __init nvmet_init(void)
+
+ out_exit_discovery:
+ nvmet_exit_discovery();
+-out_free_work_queue:
++out_free_nvmet_work_queue:
++ destroy_workqueue(nvmet_wq);
++out_free_buffered_work_queue:
+ destroy_workqueue(buffered_io_wq);
+ out_free_zbd_work_queue:
+ destroy_workqueue(zbd_wq);
++out_destroy_bvec_cache:
++ kmem_cache_destroy(nvmet_bvec_cache);
+ return error;
+ }
+
+@@ -1640,8 +1659,10 @@ static void __exit nvmet_exit(void)
+ nvmet_exit_configfs();
+ nvmet_exit_discovery();
+ ida_destroy(&cntlid_ida);
++ destroy_workqueue(nvmet_wq);
+ destroy_workqueue(buffered_io_wq);
+ destroy_workqueue(zbd_wq);
++ kmem_cache_destroy(nvmet_bvec_cache);
+
+ BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
+ BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
+diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
+index 22b5108168a6a..00a2a591f5c1f 100644
+--- a/drivers/nvme/target/fc.c
++++ b/drivers/nvme/target/fc.c
+@@ -1491,7 +1491,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
+ list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
+ if (!nvmet_fc_tgt_a_get(assoc))
+ continue;
+- if (!schedule_work(&assoc->del_work))
++ if (!queue_work(nvmet_wq, &assoc->del_work))
+ /* already deleting - release local reference */
+ nvmet_fc_tgt_a_put(assoc);
+ }
+@@ -1546,7 +1546,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
+ continue;
+ assoc->hostport->invalid = 1;
+ noassoc = false;
+- if (!schedule_work(&assoc->del_work))
++ if (!queue_work(nvmet_wq, &assoc->del_work))
+ /* already deleting - release local reference */
+ nvmet_fc_tgt_a_put(assoc);
+ }
+@@ -1592,7 +1592,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
+ nvmet_fc_tgtport_put(tgtport);
+
+ if (found_ctrl) {
+- if (!schedule_work(&assoc->del_work))
++ if (!queue_work(nvmet_wq, &assoc->del_work))
+ /* already deleting - release local reference */
+ nvmet_fc_tgt_a_put(assoc);
+ return;
+@@ -1685,8 +1685,10 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
+ else {
+ queue = nvmet_fc_alloc_target_queue(iod->assoc, 0,
+ be16_to_cpu(rqst->assoc_cmd.sqsize));
+- if (!queue)
++ if (!queue) {
+ ret = VERR_QUEUE_ALLOC_FAIL;
++ nvmet_fc_tgt_a_put(iod->assoc);
++ }
+ }
+ }
+
+@@ -2060,7 +2062,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port,
+ iod->rqstdatalen = lsreqbuf_len;
+ iod->hosthandle = hosthandle;
+
+- schedule_work(&iod->work);
++ queue_work(nvmet_wq, &iod->work);
+
+ return 0;
+ }
+diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
+index 54606f1872b4a..c780af36c1d4a 100644
+--- a/drivers/nvme/target/fcloop.c
++++ b/drivers/nvme/target/fcloop.c
+@@ -360,7 +360,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
+ spin_lock(&rport->lock);
+ list_add_tail(&rport->ls_list, &tls_req->ls_list);
+ spin_unlock(&rport->lock);
+- schedule_work(&rport->ls_work);
++ queue_work(nvmet_wq, &rport->ls_work);
+ return ret;
+ }
+
+@@ -393,7 +393,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
+ spin_lock(&rport->lock);
+ list_add_tail(&rport->ls_list, &tls_req->ls_list);
+ spin_unlock(&rport->lock);
+- schedule_work(&rport->ls_work);
++ queue_work(nvmet_wq, &rport->ls_work);
+ }
+
+ return 0;
+@@ -448,7 +448,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
+ spin_lock(&tport->lock);
+ list_add_tail(&tport->ls_list, &tls_req->ls_list);
+ spin_unlock(&tport->lock);
+- schedule_work(&tport->ls_work);
++ queue_work(nvmet_wq, &tport->ls_work);
+ return ret;
+ }
+
+@@ -480,7 +480,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
+ spin_lock(&tport->lock);
+ list_add_tail(&tport->ls_list, &tls_req->ls_list);
+ spin_unlock(&tport->lock);
+- schedule_work(&tport->ls_work);
++ queue_work(nvmet_wq, &tport->ls_work);
+ }
+
+ return 0;
+@@ -520,7 +520,7 @@ fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport)
+ tgt_rscn->tport = tgtport->private;
+ INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work);
+
+- schedule_work(&tgt_rscn->work);
++ queue_work(nvmet_wq, &tgt_rscn->work);
+ }
+
+ static void
+@@ -614,10 +614,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
+ struct fcloop_fcpreq *tfcp_req =
+ container_of(work, struct fcloop_fcpreq, fcp_rcv_work);
+ struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
++ unsigned long flags;
+ int ret = 0;
+ bool aborted = false;
+
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ switch (tfcp_req->inistate) {
+ case INI_IO_START:
+ tfcp_req->inistate = INI_IO_ACTIVE;
+@@ -626,11 +627,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
+ aborted = true;
+ break;
+ default:
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+ WARN_ON(1);
+ return;
+ }
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ if (unlikely(aborted))
+ ret = -ECANCELED;
+@@ -655,8 +656,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
+ container_of(work, struct fcloop_fcpreq, abort_rcv_work);
+ struct nvmefc_fcp_req *fcpreq;
+ bool completed = false;
++ unsigned long flags;
+
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ fcpreq = tfcp_req->fcpreq;
+ switch (tfcp_req->inistate) {
+ case INI_IO_ABORTED:
+@@ -665,11 +667,11 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
+ completed = true;
+ break;
+ default:
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+ WARN_ON(1);
+ return;
+ }
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ if (unlikely(completed)) {
+ /* remove reference taken in original abort downcall */
+@@ -681,9 +683,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
+ nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
+ &tfcp_req->tgt_fcp_req);
+
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ tfcp_req->fcpreq = NULL;
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
+ /* call_host_done releases reference for abort downcall */
+@@ -699,11 +701,12 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work)
+ struct fcloop_fcpreq *tfcp_req =
+ container_of(work, struct fcloop_fcpreq, tio_done_work);
+ struct nvmefc_fcp_req *fcpreq;
++ unsigned long flags;
+
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ fcpreq = tfcp_req->fcpreq;
+ tfcp_req->inistate = INI_IO_COMPLETED;
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status);
+ }
+@@ -739,7 +742,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport,
+ INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work);
+ kref_init(&tfcp_req->ref);
+
+- schedule_work(&tfcp_req->fcp_rcv_work);
++ queue_work(nvmet_wq, &tfcp_req->fcp_rcv_work);
+
+ return 0;
+ }
+@@ -807,13 +810,14 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
+ u32 rsplen = 0, xfrlen = 0;
+ int fcp_err = 0, active, aborted;
+ u8 op = tgt_fcpreq->op;
++ unsigned long flags;
+
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ fcpreq = tfcp_req->fcpreq;
+ active = tfcp_req->active;
+ aborted = tfcp_req->aborted;
+ tfcp_req->active = true;
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ if (unlikely(active))
+ /* illegal - call while i/o active */
+@@ -821,9 +825,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
+
+ if (unlikely(aborted)) {
+ /* target transport has aborted i/o prior */
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ tfcp_req->active = false;
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+ tgt_fcpreq->transferred_length = 0;
+ tgt_fcpreq->fcp_error = -ECANCELED;
+ tgt_fcpreq->done(tgt_fcpreq);
+@@ -880,9 +884,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
+ break;
+ }
+
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ tfcp_req->active = false;
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ tgt_fcpreq->transferred_length = xfrlen;
+ tgt_fcpreq->fcp_error = fcp_err;
+@@ -896,15 +900,16 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
+ struct nvmefc_tgt_fcp_req *tgt_fcpreq)
+ {
+ struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
++ unsigned long flags;
+
+ /*
+ * mark aborted only in case there were 2 threads in transport
+ * (one doing io, other doing abort) and only kills ops posted
+ * after the abort request
+ */
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ tfcp_req->aborted = true;
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ tfcp_req->status = NVME_SC_INTERNAL;
+
+@@ -921,7 +926,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport,
+ {
+ struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
+
+- schedule_work(&tfcp_req->tio_done_work);
++ queue_work(nvmet_wq, &tfcp_req->tio_done_work);
+ }
+
+ static void
+@@ -946,6 +951,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
+ struct fcloop_ini_fcpreq *inireq = fcpreq->private;
+ struct fcloop_fcpreq *tfcp_req;
+ bool abortio = true;
++ unsigned long flags;
+
+ spin_lock(&inireq->inilock);
+ tfcp_req = inireq->tfcp_req;
+@@ -958,7 +964,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
+ return;
+
+ /* break initiator/target relationship for io */
+- spin_lock_irq(&tfcp_req->reqlock);
++ spin_lock_irqsave(&tfcp_req->reqlock, flags);
+ switch (tfcp_req->inistate) {
+ case INI_IO_START:
+ case INI_IO_ACTIVE:
+@@ -968,15 +974,15 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
+ abortio = false;
+ break;
+ default:
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+ WARN_ON(1);
+ return;
+ }
+- spin_unlock_irq(&tfcp_req->reqlock);
++ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
+
+ if (abortio)
+ /* leave the reference while the work item is scheduled */
+- WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work));
++ WARN_ON(!queue_work(nvmet_wq, &tfcp_req->abort_rcv_work));
+ else {
+ /*
+ * as the io has already had the done callback made,
+diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
+index 1dd1a0fe2e819..098b6bf12cd0a 100644
+--- a/drivers/nvme/target/io-cmd-file.c
++++ b/drivers/nvme/target/io-cmd-file.c
+@@ -8,21 +8,14 @@
+ #include <linux/uio.h>
+ #include <linux/falloc.h>
+ #include <linux/file.h>
++#include <linux/fs.h>
+ #include "nvmet.h"
+
+-#define NVMET_MAX_MPOOL_BVEC 16
+ #define NVMET_MIN_MPOOL_OBJ 16
+
+-int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
++void nvmet_file_ns_revalidate(struct nvmet_ns *ns)
+ {
+- struct kstat stat;
+- int ret;
+-
+- ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
+- AT_STATX_FORCE_SYNC);
+- if (!ret)
+- ns->size = stat.size;
+- return ret;
++ ns->size = i_size_read(ns->file->f_mapping->host);
+ }
+
+ void nvmet_file_ns_disable(struct nvmet_ns *ns)
+@@ -32,8 +25,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
+ flush_workqueue(buffered_io_wq);
+ mempool_destroy(ns->bvec_pool);
+ ns->bvec_pool = NULL;
+- kmem_cache_destroy(ns->bvec_cache);
+- ns->bvec_cache = NULL;
+ fput(ns->file);
+ ns->file = NULL;
+ }
+@@ -42,7 +33,7 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
+ int nvmet_file_ns_enable(struct nvmet_ns *ns)
+ {
+ int flags = O_RDWR | O_LARGEFILE;
+- int ret;
++ int ret = 0;
+
+ if (!ns->buffered_io)
+ flags |= O_DIRECT;
+@@ -56,9 +47,7 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
+ return ret;
+ }
+
+- ret = nvmet_file_ns_revalidate(ns);
+- if (ret)
+- goto err;
++ nvmet_file_ns_revalidate(ns);
+
+ /*
+ * i_blkbits can be greater than the universally accepted upper bound,
+@@ -67,16 +56,8 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
+ ns->blksize_shift = min_t(u8,
+ file_inode(ns->file)->i_blkbits, 12);
+
+- ns->bvec_cache = kmem_cache_create("nvmet-bvec",
+- NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
+- 0, SLAB_HWCACHE_ALIGN, NULL);
+- if (!ns->bvec_cache) {
+- ret = -ENOMEM;
+- goto err;
+- }
+-
+ ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
+- mempool_free_slab, ns->bvec_cache);
++ mempool_free_slab, nvmet_bvec_cache);
+
+ if (!ns->bvec_pool) {
+ ret = -ENOMEM;
+@@ -85,9 +66,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
+
+ return ret;
+ err:
++ fput(ns->file);
++ ns->file = NULL;
+ ns->size = 0;
+ ns->blksize_shift = 0;
+- nvmet_file_ns_disable(ns);
+ return ret;
+ }
+
+@@ -266,7 +248,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
+
+ if (req->ns->buffered_io) {
+ if (likely(!req->f.mpool_alloc) &&
+- nvmet_file_execute_io(req, IOCB_NOWAIT))
++ (req->ns->file->f_mode & FMODE_NOWAIT) &&
++ nvmet_file_execute_io(req, IOCB_NOWAIT))
+ return;
+ nvmet_file_submit_buffered_io(req);
+ } else
+@@ -290,7 +273,7 @@ static void nvmet_file_execute_flush(struct nvmet_req *req)
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+ INIT_WORK(&req->f.work, nvmet_file_flush_work);
+- schedule_work(&req->f.work);
++ queue_work(nvmet_wq, &req->f.work);
+ }
+
+ static void nvmet_file_execute_discard(struct nvmet_req *req)
+@@ -350,7 +333,7 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req)
+ if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
+ return;
+ INIT_WORK(&req->f.work, nvmet_file_dsm_work);
+- schedule_work(&req->f.work);
++ queue_work(nvmet_wq, &req->f.work);
+ }
+
+ static void nvmet_file_write_zeroes_work(struct work_struct *w)
+@@ -380,7 +363,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
+ if (!nvmet_check_transfer_len(req, 0))
+ return;
+ INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
+- schedule_work(&req->f.work);
++ queue_work(nvmet_wq, &req->f.work);
+ }
+
+ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
+diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
+index 0285ccc7541f6..2553f487c9f24 100644
+--- a/drivers/nvme/target/loop.c
++++ b/drivers/nvme/target/loop.c
+@@ -166,7 +166,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+ iod->req.transfer_len = blk_rq_payload_bytes(req);
+ }
+
+- schedule_work(&iod->work);
++ queue_work(nvmet_wq, &iod->work);
+ return BLK_STS_OK;
+ }
+
+@@ -187,7 +187,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg)
+ return;
+ }
+
+- schedule_work(&iod->work);
++ queue_work(nvmet_wq, &iod->work);
+ }
+
+ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
+diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
+index 7143c7fa74641..17dd845514588 100644
+--- a/drivers/nvme/target/nvmet.h
++++ b/drivers/nvme/target/nvmet.h
+@@ -77,7 +77,6 @@ struct nvmet_ns {
+
+ struct completion disable_done;
+ mempool_t *bvec_pool;
+- struct kmem_cache *bvec_cache;
+
+ int use_p2pmem;
+ struct pci_dev *p2p_dev;
+@@ -363,8 +362,11 @@ struct nvmet_req {
+ u64 error_slba;
+ };
+
++#define NVMET_MAX_MPOOL_BVEC 16
++extern struct kmem_cache *nvmet_bvec_cache;
+ extern struct workqueue_struct *buffered_io_wq;
+ extern struct workqueue_struct *zbd_wq;
++extern struct workqueue_struct *nvmet_wq;
+
+ static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
+ {
+@@ -540,12 +542,12 @@ u16 nvmet_bdev_flush(struct nvmet_req *req);
+ u16 nvmet_file_flush(struct nvmet_req *req);
+ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
+ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
+-int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
+-void nvmet_ns_revalidate(struct nvmet_ns *ns);
++void nvmet_file_ns_revalidate(struct nvmet_ns *ns);
++bool nvmet_ns_revalidate(struct nvmet_ns *ns);
+ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
+
+ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
+-void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req);
++void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req);
+ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req);
+ void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req);
+ void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req);
+diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
+index f0efb35379898..9b5929754195b 100644
+--- a/drivers/nvme/target/passthru.c
++++ b/drivers/nvme/target/passthru.c
+@@ -271,17 +271,16 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
+ }
+
+ /*
+- * If there are effects for the command we are about to execute, or
+- * an end_req function we need to use nvme_execute_passthru_rq()
+- * synchronously in a work item seeing the end_req function and
+- * nvme_passthru_end() can't be called in the request done callback
+- * which is typically in interrupt context.
++ * If a command needs post-execution fixups, or there are any
++ * non-trivial effects, make sure to execute the command synchronously
++ * in a workqueue so that nvme_passthru_end gets called.
+ */
+ effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
+- if (req->p.use_workqueue || effects) {
++ if (req->p.use_workqueue ||
++ (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) {
+ INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
+ req->p.rq = rq;
+- schedule_work(&req->p.work);
++ queue_work(nvmet_wq, &req->p.work);
+ } else {
+ rq->end_io_data = req;
+ blk_execute_rq_nowait(ns ? ns->disk : NULL, rq, 0,
+diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
+index 891174ccd44bb..18e082091c821 100644
+--- a/drivers/nvme/target/rdma.c
++++ b/drivers/nvme/target/rdma.c
+@@ -1583,7 +1583,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
+
+ if (queue->host_qid == 0) {
+ /* Let inflight controller teardown complete */
+- flush_scheduled_work();
++ flush_workqueue(nvmet_wq);
+ }
+
+ ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
+@@ -1668,7 +1668,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
+
+ if (disconnect) {
+ rdma_disconnect(queue->cm_id);
+- schedule_work(&queue->release_work);
++ queue_work(nvmet_wq, &queue->release_work);
+ }
+ }
+
+@@ -1698,7 +1698,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+
+ pr_err("failed to connect queue %d\n", queue->idx);
+- schedule_work(&queue->release_work);
++ queue_work(nvmet_wq, &queue->release_work);
+ }
+
+ /**
+@@ -1772,7 +1772,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ if (!queue) {
+ struct nvmet_rdma_port *port = cm_id->context;
+
+- schedule_delayed_work(&port->repair_work, 0);
++ queue_delayed_work(nvmet_wq, &port->repair_work, 0);
+ break;
+ }
+ fallthrough;
+@@ -1818,12 +1818,36 @@ restart:
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+ }
+
++static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port)
++{
++ struct nvmet_rdma_queue *queue, *tmp;
++ struct nvmet_port *nport = port->nport;
++
++ mutex_lock(&nvmet_rdma_queue_mutex);
++ list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list,
++ queue_list) {
++ if (queue->port != nport)
++ continue;
++
++ list_del_init(&queue->queue_list);
++ __nvmet_rdma_queue_disconnect(queue);
++ }
++ mutex_unlock(&nvmet_rdma_queue_mutex);
++}
++
+ static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port)
+ {
+ struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL);
+
+ if (cm_id)
+ rdma_destroy_id(cm_id);
++
++ /*
++ * Destroy the remaining queues, which are not belong to any
++ * controller yet. Do it here after the RDMA-CM was destroyed
++ * guarantees that no new queue will be created.
++ */
++ nvmet_rdma_destroy_port_queues(port);
+ }
+
+ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
+@@ -1878,7 +1902,7 @@ static void nvmet_rdma_repair_port_work(struct work_struct *w)
+ nvmet_rdma_disable_port(port);
+ ret = nvmet_rdma_enable_port(port);
+ if (ret)
+- schedule_delayed_work(&port->repair_work, 5 * HZ);
++ queue_delayed_work(nvmet_wq, &port->repair_work, 5 * HZ);
+ }
+
+ static int nvmet_rdma_add_port(struct nvmet_port *nport)
+@@ -2022,7 +2046,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
+ }
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+
+- flush_scheduled_work();
++ flush_workqueue(nvmet_wq);
+ }
+
+ static struct ib_client nvmet_rdma_ib_client = {
+diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
+index 46c3b3be7e033..2add26637c87e 100644
+--- a/drivers/nvme/target/tcp.c
++++ b/drivers/nvme/target/tcp.c
+@@ -700,10 +700,11 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
+ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
+ {
+ struct nvmet_tcp_queue *queue = cmd->queue;
++ int left = NVME_TCP_DIGEST_LENGTH - cmd->offset;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+ struct kvec iov = {
+ .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
+- .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
++ .iov_len = left
+ };
+ int ret;
+
+@@ -717,6 +718,10 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
+ return ret;
+
+ cmd->offset += ret;
++ left -= ret;
++
++ if (left)
++ return -EAGAIN;
+
+ if (queue->nvme_sq.sqhd_disabled) {
+ cmd->queue->snd_cmd = NULL;
+@@ -929,10 +934,17 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
+ struct nvme_tcp_data_pdu *data = &queue->pdu.data;
+ struct nvmet_tcp_cmd *cmd;
+
+- if (likely(queue->nr_cmds))
++ if (likely(queue->nr_cmds)) {
++ if (unlikely(data->ttag >= queue->nr_cmds)) {
++ pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n",
++ queue->idx, data->ttag, queue->nr_cmds);
++ nvmet_tcp_fatal_error(queue);
++ return -EPROTO;
++ }
+ cmd = &queue->cmds[data->ttag];
+- else
++ } else {
+ cmd = &queue->connect;
++ }
+
+ if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) {
+ pr_err("ttag %u unexpected data offset %u (expected %u)\n",
+@@ -1246,7 +1258,7 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
+ spin_lock(&queue->state_lock);
+ if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
+ queue->state = NVMET_TCP_Q_DISCONNECTING;
+- schedule_work(&queue->release_work);
++ queue_work(nvmet_wq, &queue->release_work);
+ }
+ spin_unlock(&queue->state_lock);
+ }
+@@ -1496,6 +1508,9 @@ static void nvmet_tcp_state_change(struct sock *sk)
+ goto done;
+
+ switch (sk->sk_state) {
++ case TCP_FIN_WAIT2:
++ case TCP_LAST_ACK:
++ break;
+ case TCP_FIN_WAIT1:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSE:
+@@ -1657,7 +1672,7 @@ static void nvmet_tcp_listen_data_ready(struct sock *sk)
+ goto out;
+
+ if (sk->sk_state == TCP_LISTEN)
+- schedule_work(&port->accept_work);
++ queue_work(nvmet_wq, &port->accept_work);
+ out:
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
+@@ -1740,6 +1755,17 @@ err_port:
+ return ret;
+ }
+
++static void nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port *port)
++{
++ struct nvmet_tcp_queue *queue;
++
++ mutex_lock(&nvmet_tcp_queue_mutex);
++ list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
++ if (queue->port == port)
++ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
++ mutex_unlock(&nvmet_tcp_queue_mutex);
++}
++
+ static void nvmet_tcp_remove_port(struct nvmet_port *nport)
+ {
+ struct nvmet_tcp_port *port = nport->priv;
+@@ -1749,6 +1775,11 @@ static void nvmet_tcp_remove_port(struct nvmet_port *nport)
+ port->sock->sk->sk_user_data = NULL;
+ write_unlock_bh(&port->sock->sk->sk_callback_lock);
+ cancel_work_sync(&port->accept_work);
++ /*
++ * Destroy the remaining queues, which are not belong to any
++ * controller yet.
++ */
++ nvmet_tcp_destroy_port_queues(port);
+
+ sock_release(port->sock);
+ kfree(port);
+@@ -1772,7 +1803,7 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
+
+ if (sq->qid == 0) {
+ /* Let inflight controller teardown complete */
+- flush_scheduled_work();
++ flush_workqueue(nvmet_wq);
+ }
+
+ queue->nr_cmds = sq->size * 2;
+@@ -1813,7 +1844,8 @@ static int __init nvmet_tcp_init(void)
+ {
+ int ret;
+
+- nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0);
++ nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq",
++ WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ if (!nvmet_tcp_wq)
+ return -ENOMEM;
+
+@@ -1833,12 +1865,12 @@ static void __exit nvmet_tcp_exit(void)
+
+ nvmet_unregister_transport(&nvmet_tcp_ops);
+
+- flush_scheduled_work();
++ flush_workqueue(nvmet_wq);
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+- flush_scheduled_work();
++ flush_workqueue(nvmet_wq);
+
+ destroy_workqueue(nvmet_tcp_wq);
+ }
+diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
+index 46bc30fe85d2b..ae617d66b1378 100644
+--- a/drivers/nvme/target/zns.c
++++ b/drivers/nvme/target/zns.c
+@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z,
+
+ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
+ {
+- struct request_queue *q = ns->bdev->bd_disk->queue;
+- u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
++ u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
+ struct gendisk *bd_disk = ns->bdev->bd_disk;
+ int ret;
+
+@@ -71,7 +70,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
+ return true;
+ }
+
+-void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
++void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req)
+ {
+ u8 zasl = req->sq->ctrl->subsys->zasl;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+@@ -98,9 +97,10 @@ out:
+
+ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
+ {
+- struct nvme_id_ns_zns *id_zns;
++ struct nvme_id_ns_zns *id_zns = NULL;
+ u64 zsze;
+ u16 status;
++ u32 mar, mor;
+
+ if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
+ req->error_loc = offsetof(struct nvme_identify, nsid);
+@@ -118,22 +118,40 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
+ if (status)
+ goto done;
+
++ if (nvmet_ns_revalidate(req->ns)) {
++ mutex_lock(&req->ns->subsys->lock);
++ nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
++ mutex_unlock(&req->ns->subsys->lock);
++ }
++
+ if (!bdev_is_zoned(req->ns->bdev)) {
++ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_identify, nsid);
+- goto done;
++ goto out;
+ }
+
+- nvmet_ns_revalidate(req->ns);
+ zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
+ req->ns->blksize_shift;
+ id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
+- id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev));
+- id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev));
++
++ mor = bdev_max_open_zones(req->ns->bdev);
++ if (!mor)
++ mor = U32_MAX;
++ else
++ mor--;
++ id_zns->mor = cpu_to_le32(mor);
++
++ mar = bdev_max_active_zones(req->ns->bdev);
++ if (!mar)
++ mar = U32_MAX;
++ else
++ mar--;
++ id_zns->mar = cpu_to_le32(mar);
+
+ done:
+ status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
+- kfree(id_zns);
+ out:
++ kfree(id_zns);
+ nvmet_req_complete(req, status);
+ }
+
+diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
+index 8976da38b375a..47c1487dcf8cc 100644
+--- a/drivers/nvmem/core.c
++++ b/drivers/nvmem/core.c
+@@ -307,6 +307,8 @@ static umode_t nvmem_bin_attr_is_visible(struct kobject *kobj,
+ struct device *dev = kobj_to_dev(kobj);
+ struct nvmem_device *nvmem = to_nvmem_device(dev);
+
++ attr->size = nvmem->size;
++
+ return nvmem_bin_attr_get_umode(nvmem);
+ }
+
+@@ -764,31 +766,32 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
+ return ERR_PTR(rval);
+ }
+
+- if (config->wp_gpio)
+- nvmem->wp_gpio = config->wp_gpio;
+- else
++ nvmem->id = rval;
++
++ nvmem->dev.type = &nvmem_provider_type;
++ nvmem->dev.bus = &nvmem_bus_type;
++ nvmem->dev.parent = config->dev;
++
++ device_initialize(&nvmem->dev);
++
++ if (!config->ignore_wp)
+ nvmem->wp_gpio = gpiod_get_optional(config->dev, "wp",
+ GPIOD_OUT_HIGH);
+ if (IS_ERR(nvmem->wp_gpio)) {
+- ida_free(&nvmem_ida, nvmem->id);
+ rval = PTR_ERR(nvmem->wp_gpio);
+- kfree(nvmem);
+- return ERR_PTR(rval);
++ nvmem->wp_gpio = NULL;
++ goto err_put_device;
+ }
+
+ kref_init(&nvmem->refcnt);
+ INIT_LIST_HEAD(&nvmem->cells);
+
+- nvmem->id = rval;
+ nvmem->owner = config->owner;
+ if (!nvmem->owner && config->dev->driver)
+ nvmem->owner = config->dev->driver->owner;
+ nvmem->stride = config->stride ?: 1;
+ nvmem->word_size = config->word_size ?: 1;
+ nvmem->size = config->size;
+- nvmem->dev.type = &nvmem_provider_type;
+- nvmem->dev.bus = &nvmem_bus_type;
+- nvmem->dev.parent = config->dev;
+ nvmem->root_only = config->root_only;
+ nvmem->priv = config->priv;
+ nvmem->type = config->type;
+@@ -803,18 +806,21 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
+
+ switch (config->id) {
+ case NVMEM_DEVID_NONE:
+- dev_set_name(&nvmem->dev, "%s", config->name);
++ rval = dev_set_name(&nvmem->dev, "%s", config->name);
+ break;
+ case NVMEM_DEVID_AUTO:
+- dev_set_name(&nvmem->dev, "%s%d", config->name, nvmem->id);
++ rval = dev_set_name(&nvmem->dev, "%s%d", config->name, nvmem->id);
+ break;
+ default:
+- dev_set_name(&nvmem->dev, "%s%d",
++ rval = dev_set_name(&nvmem->dev, "%s%d",
+ config->name ? : "nvmem",
+ config->name ? config->id : nvmem->id);
+ break;
+ }
+
++ if (rval)
++ goto err_put_device;
++
+ nvmem->read_only = device_property_present(config->dev, "read-only") ||
+ config->read_only || !nvmem->reg_write;
+
+@@ -824,29 +830,20 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
+
+ if (nvmem->nkeepout) {
+ rval = nvmem_validate_keepouts(nvmem);
+- if (rval) {
+- ida_free(&nvmem_ida, nvmem->id);
+- kfree(nvmem);
+- return ERR_PTR(rval);
+- }
++ if (rval)
++ goto err_put_device;
+ }
+
+- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
+-
+- rval = device_register(&nvmem->dev);
+- if (rval)
+- goto err_put_device;
+-
+ if (config->compat) {
+ rval = nvmem_sysfs_setup_compat(nvmem, config);
+ if (rval)
+- goto err_device_del;
++ goto err_put_device;
+ }
+
+ if (config->cells) {
+ rval = nvmem_add_cells(nvmem, config->cells, config->ncells);
+ if (rval)
+- goto err_teardown_compat;
++ goto err_remove_cells;
+ }
+
+ rval = nvmem_add_cells_from_table(nvmem);
+@@ -857,17 +854,20 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
+ if (rval)
+ goto err_remove_cells;
+
++ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
++
++ rval = device_add(&nvmem->dev);
++ if (rval)
++ goto err_remove_cells;
++
+ blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
+
+ return nvmem;
+
+ err_remove_cells:
+ nvmem_device_remove_all_cells(nvmem);
+-err_teardown_compat:
+ if (config->compat)
+ nvmem_sysfs_remove_compat(nvmem, config);
+-err_device_del:
+- device_del(&nvmem->dev);
+ err_put_device:
+ put_device(&nvmem->dev);
+
+diff --git a/drivers/nvmem/qcom-spmi-sdam.c b/drivers/nvmem/qcom-spmi-sdam.c
+index 4fcb63507ecd1..8499892044b7b 100644
+--- a/drivers/nvmem/qcom-spmi-sdam.c
++++ b/drivers/nvmem/qcom-spmi-sdam.c
+@@ -166,6 +166,7 @@ static const struct of_device_id sdam_match_table[] = {
+ { .compatible = "qcom,spmi-sdam" },
+ {},
+ };
++MODULE_DEVICE_TABLE(of, sdam_match_table);
+
+ static struct platform_driver sdam_driver = {
+ .driver = {
+diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c
+index b11c3c974b3d6..752d0bf4445ee 100644
+--- a/drivers/nvmem/rmem.c
++++ b/drivers/nvmem/rmem.c
+@@ -37,9 +37,9 @@ static int rmem_read(void *context, unsigned int offset,
+ * but as of Dec 2020 this isn't possible on arm64.
+ */
+ addr = memremap(priv->mem->base, available, MEMREMAP_WB);
+- if (IS_ERR(addr)) {
++ if (!addr) {
+ dev_err(priv->dev, "Failed to remap memory region\n");
+- return PTR_ERR(addr);
++ return -ENOMEM;
+ }
+
+ count = memory_read_from_buffer(val, bytes, &off, addr, available);
+@@ -71,6 +71,7 @@ static int rmem_probe(struct platform_device *pdev)
+ config.dev = dev;
+ config.priv = priv;
+ config.name = "rmem";
++ config.id = NVMEM_DEVID_AUTO;
+ config.size = mem->size;
+ config.reg_read = rmem_read;
+
+diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
+index 3dfeae8912dfc..80b5fd44ab1c7 100644
+--- a/drivers/of/Kconfig
++++ b/drivers/of/Kconfig
+@@ -70,10 +70,6 @@ config OF_IRQ
+ def_bool y
+ depends on !SPARC && IRQ_DOMAIN
+
+-config OF_NET
+- depends on NETDEVICES
+- def_bool y
+-
+ config OF_RESERVED_MEM
+ def_bool OF_EARLY_FLATTREE
+
+diff --git a/drivers/of/Makefile b/drivers/of/Makefile
+index c13b982084a3a..e0360a44306e2 100644
+--- a/drivers/of/Makefile
++++ b/drivers/of/Makefile
+@@ -7,7 +7,6 @@ obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
+ obj-$(CONFIG_OF_PROMTREE) += pdt.o
+ obj-$(CONFIG_OF_ADDRESS) += address.o
+ obj-$(CONFIG_OF_IRQ) += irq.o
+-obj-$(CONFIG_OF_NET) += of_net.o
+ obj-$(CONFIG_OF_UNITTEST) += unittest.o
+ obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
+ obj-$(CONFIG_OF_RESOLVE) += resolver.o
+diff --git a/drivers/of/address.c b/drivers/of/address.c
+index 94f017d808c44..586fb94005e26 100644
+--- a/drivers/of/address.c
++++ b/drivers/of/address.c
+@@ -963,8 +963,19 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
+ }
+
+ of_dma_range_parser_init(&parser, node);
+- for_each_of_range(&parser, &range)
++ for_each_of_range(&parser, &range) {
++ if (range.cpu_addr == OF_BAD_ADDR) {
++ pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n",
++ range.bus_addr, node);
++ continue;
++ }
+ num_ranges++;
++ }
++
++ if (!num_ranges) {
++ ret = -EINVAL;
++ goto out;
++ }
+
+ r = kcalloc(num_ranges + 1, sizeof(*r), GFP_KERNEL);
+ if (!r) {
+@@ -973,18 +984,16 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
+ }
+
+ /*
+- * Record all info in the generic DMA ranges array for struct device.
++ * Record all info in the generic DMA ranges array for struct device,
++ * returning an error if we don't find any parsable ranges.
+ */
+ *map = r;
+ of_dma_range_parser_init(&parser, node);
+ for_each_of_range(&parser, &range) {
+ pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+ range.bus_addr, range.cpu_addr, range.size);
+- if (range.cpu_addr == OF_BAD_ADDR) {
+- pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n",
+- range.bus_addr, node);
++ if (range.cpu_addr == OF_BAD_ADDR)
+ continue;
+- }
+ r->cpu_start = range.cpu_addr;
+ r->dma_start = range.bus_addr;
+ r->size = range.size;
+diff --git a/drivers/of/base.c b/drivers/of/base.c
+index 0ac17256258d5..54719f8156ed1 100644
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -1327,9 +1327,14 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it)
+ * property data length
+ */
+ if (it->cur + count > it->list_end) {
+- pr_err("%pOF: %s = %d found %d\n",
+- it->parent, it->cells_name,
+- count, it->cell_count);
++ if (it->cells_name)
++ pr_err("%pOF: %s = %d found %td\n",
++ it->parent, it->cells_name,
++ count, it->list_end - it->cur);
++ else
++ pr_err("%pOF: phandle %s needs %d, found %td\n",
++ it->parent, of_node_full_name(it->node),
++ count, it->list_end - it->cur);
+ goto err;
+ }
+ }
+diff --git a/drivers/of/device.c b/drivers/of/device.c
+index b0800c260f64a..19c42a9dcba91 100644
+--- a/drivers/of/device.c
++++ b/drivers/of/device.c
+@@ -81,8 +81,11 @@ of_dma_set_restricted_buffer(struct device *dev, struct device_node *np)
+ * restricted-dma-pool region is allowed.
+ */
+ if (of_device_is_compatible(node, "restricted-dma-pool") &&
+- of_device_is_available(node))
++ of_device_is_available(node)) {
++ of_node_put(node);
+ break;
++ }
++ of_node_put(node);
+ }
+
+ /*
+@@ -287,12 +290,15 @@ int of_device_request_module(struct device *dev)
+ if (size < 0)
+ return size;
+
+- str = kmalloc(size + 1, GFP_KERNEL);
++ /* Reserve an additional byte for the trailing '\0' */
++ size++;
++
++ str = kmalloc(size, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
+
+ of_device_get_modalias(dev, str, size);
+- str[size] = '\0';
++ str[size - 1] = '\0';
+ ret = request_module(str);
+ kfree(str);
+
+diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
+index cd3821a6444f0..4e436f2d13aeb 100644
+--- a/drivers/of/dynamic.c
++++ b/drivers/of/dynamic.c
+@@ -63,15 +63,14 @@ int of_reconfig_notifier_unregister(struct notifier_block *nb)
+ }
+ EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
+
+-#ifdef DEBUG
+-const char *action_names[] = {
++static const char *action_names[] = {
++ [0] = "INVALID",
+ [OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE",
+ [OF_RECONFIG_DETACH_NODE] = "DETACH_NODE",
+ [OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY",
+ [OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY",
+ [OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY",
+ };
+-#endif
+
+ int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p)
+ {
+@@ -594,21 +593,9 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
+ }
+
+ ret = __of_add_property(ce->np, ce->prop);
+- if (ret) {
+- pr_err("changeset: add_property failed @%pOF/%s\n",
+- ce->np,
+- ce->prop->name);
+- break;
+- }
+ break;
+ case OF_RECONFIG_REMOVE_PROPERTY:
+ ret = __of_remove_property(ce->np, ce->prop);
+- if (ret) {
+- pr_err("changeset: remove_property failed @%pOF/%s\n",
+- ce->np,
+- ce->prop->name);
+- break;
+- }
+ break;
+
+ case OF_RECONFIG_UPDATE_PROPERTY:
+@@ -622,20 +609,17 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
+ }
+
+ ret = __of_update_property(ce->np, ce->prop, &old_prop);
+- if (ret) {
+- pr_err("changeset: update_property failed @%pOF/%s\n",
+- ce->np,
+- ce->prop->name);
+- break;
+- }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+- if (ret)
++ if (ret) {
++ pr_err("changeset: apply failed: %-15s %pOF:%s\n",
++ action_names[ce->action], ce->np, ce->prop->name);
+ return ret;
++ }
+
+ switch (ce->action) {
+ case OF_RECONFIG_ATTACH_NODE:
+@@ -921,6 +905,9 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action,
+ if (!ce)
+ return -ENOMEM;
+
++ if (WARN_ON(action >= ARRAY_SIZE(action_names)))
++ return -EINVAL;
++
+ /* get a reference to the node */
+ ce->action = action;
+ ce->np = of_node_get(np);
+diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
+index 4546572af24bb..338171c978cc1 100644
+--- a/drivers/of/fdt.c
++++ b/drivers/of/fdt.c
+@@ -245,7 +245,7 @@ static int populate_node(const void *blob,
+ }
+
+ *pnp = np;
+- return true;
++ return 0;
+ }
+
+ static void reverse_nodes(struct device_node *parent)
+@@ -313,7 +313,7 @@ static int unflatten_dt_nodes(const void *blob,
+ for (offset = 0;
+ offset >= 0 && depth >= initial_depth;
+ offset = fdt_next_node(blob, offset, &depth)) {
+- if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))
++ if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH - 1))
+ continue;
+
+ if (!IS_ENABLED(CONFIG_OF_KOBJ) &&
+@@ -482,9 +482,11 @@ static int __init early_init_dt_reserve_memory_arch(phys_addr_t base,
+ if (nomap) {
+ /*
+ * If the memory is already reserved (by another region), we
+- * should not allow it to be marked nomap.
++ * should not allow it to be marked nomap, but don't worry
++ * if the region isn't memory as it won't be mapped.
+ */
+- if (memblock_is_region_reserved(base, size))
++ if (memblock_overlaps_region(&memblock.memory, base, size) &&
++ memblock_is_region_reserved(base, size))
+ return -EBUSY;
+
+ return memblock_mark_nomap(base, size);
+@@ -969,18 +971,22 @@ static void __init early_init_dt_check_for_elfcorehdr(unsigned long node)
+ elfcorehdr_addr, elfcorehdr_size);
+ }
+
+-static phys_addr_t cap_mem_addr;
+-static phys_addr_t cap_mem_size;
++static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND;
+
+ /**
+ * early_init_dt_check_for_usable_mem_range - Decode usable memory range
+ * location from flat tree
+- * @node: reference to node containing usable memory range location ('chosen')
+ */
+-static void __init early_init_dt_check_for_usable_mem_range(unsigned long node)
++void __init early_init_dt_check_for_usable_mem_range(void)
+ {
+ const __be32 *prop;
+ int len;
++ phys_addr_t cap_mem_addr;
++ phys_addr_t cap_mem_size;
++ unsigned long node = chosen_node_offset;
++
++ if ((long)node < 0)
++ return;
+
+ pr_debug("Looking for usable-memory-range property... ");
+
+@@ -993,6 +999,8 @@ static void __init early_init_dt_check_for_usable_mem_range(unsigned long node)
+
+ pr_debug("cap_mem_start=%pa cap_mem_size=%pa\n", &cap_mem_addr,
+ &cap_mem_size);
++
++ memblock_cap_memory_range(cap_mem_addr, cap_mem_size);
+ }
+
+ #ifdef CONFIG_SERIAL_EARLYCON
+@@ -1141,9 +1149,10 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
+ (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
+ return 0;
+
++ chosen_node_offset = node;
++
+ early_init_dt_check_for_initrd(node);
+ early_init_dt_check_for_elfcorehdr(node);
+- early_init_dt_check_for_usable_mem_range(node);
+
+ /* Retrieve command line */
+ p = of_get_flat_dt_prop(node, "bootargs", &l);
+@@ -1279,7 +1288,7 @@ void __init early_init_dt_scan_nodes(void)
+ of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+
+ /* Handle linux,usable-memory-range property */
+- memblock_cap_memory_range(cap_mem_addr, cap_mem_size);
++ early_init_dt_check_for_usable_mem_range();
+ }
+
+ bool __init early_init_dt_scan(void *params)
+diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
+index 761fd870d1db2..d10fd54415c2c 100644
+--- a/drivers/of/kexec.c
++++ b/drivers/of/kexec.c
+@@ -125,6 +125,7 @@ int ima_get_kexec_buffer(void **addr, size_t *size)
+ {
+ int ret, len;
+ unsigned long tmp_addr;
++ unsigned long start_pfn, end_pfn;
+ size_t tmp_size;
+ const void *prop;
+
+@@ -139,6 +140,22 @@ int ima_get_kexec_buffer(void **addr, size_t *size)
+ if (ret)
+ return ret;
+
++ /* Do some sanity on the returned size for the ima-kexec buffer */
++ if (!tmp_size)
++ return -ENOENT;
++
++ /*
++ * Calculate the PFNs for the buffer and ensure
++ * they are with in addressable memory.
++ */
++ start_pfn = PHYS_PFN(tmp_addr);
++ end_pfn = PHYS_PFN(tmp_addr + tmp_size - 1);
++ if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) {
++ pr_warn("IMA buffer at 0x%lx, size = 0x%zx beyond memory\n",
++ tmp_addr, tmp_size);
++ return -EINVAL;
++ }
++
+ *addr = __va(tmp_addr);
+ *size = tmp_size;
+
+@@ -148,7 +165,7 @@ int ima_get_kexec_buffer(void **addr, size_t *size)
+ /**
+ * ima_free_kexec_buffer - free memory used by the IMA buffer
+ */
+-int ima_free_kexec_buffer(void)
++int __init ima_free_kexec_buffer(void)
+ {
+ int ret;
+ unsigned long addr;
+@@ -170,8 +187,8 @@ int ima_free_kexec_buffer(void)
+ if (ret)
+ return ret;
+
+- return memblock_free(addr, size);
+-
++ memblock_free_late(addr, size);
++ return 0;
+ }
+
+ /**
+@@ -267,7 +284,7 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
+ const char *cmdline, size_t extra_fdt_size)
+ {
+ void *fdt;
+- int ret, chosen_node;
++ int ret, chosen_node, len;
+ const void *prop;
+ size_t fdt_size;
+
+@@ -310,19 +327,19 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
+ goto out;
+
+ /* Did we boot using an initrd? */
+- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
++ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", &len);
+ if (prop) {
+ u64 tmp_start, tmp_end, tmp_size;
+
+- tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
++ tmp_start = of_read_number(prop, len / 4);
+
+- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
++ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", &len);
+ if (!prop) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+- tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
++ tmp_end = of_read_number(prop, len / 4);
+
+ /*
+ * kexec reserves exact initrd size, while firmware may
+@@ -386,6 +403,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
+ crashk_res.end - crashk_res.start + 1);
+ if (ret)
+ goto out;
++
++ if (crashk_low_res.end) {
++ ret = fdt_appendprop_addrrange(fdt, 0, chosen_node,
++ "linux,usable-memory-range",
++ crashk_low_res.start,
++ crashk_low_res.end - crashk_low_res.start + 1);
++ if (ret)
++ goto out;
++ }
+ }
+
+ /* add bootargs */
+diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
+deleted file mode 100644
+index dbac3a172a11e..0000000000000
+--- a/drivers/of/of_net.c
++++ /dev/null
+@@ -1,145 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * OF helpers for network devices.
+- *
+- * Initially copied out of arch/powerpc/kernel/prom_parse.c
+- */
+-#include <linux/etherdevice.h>
+-#include <linux/kernel.h>
+-#include <linux/of_net.h>
+-#include <linux/of_platform.h>
+-#include <linux/phy.h>
+-#include <linux/export.h>
+-#include <linux/device.h>
+-#include <linux/nvmem-consumer.h>
+-
+-/**
+- * of_get_phy_mode - Get phy mode for given device_node
+- * @np: Pointer to the given device_node
+- * @interface: Pointer to the result
+- *
+- * The function gets phy interface string from property 'phy-mode' or
+- * 'phy-connection-type'. The index in phy_modes table is set in
+- * interface and 0 returned. In case of error interface is set to
+- * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
+- */
+-int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
+-{
+- const char *pm;
+- int err, i;
+-
+- *interface = PHY_INTERFACE_MODE_NA;
+-
+- err = of_property_read_string(np, "phy-mode", &pm);
+- if (err < 0)
+- err = of_property_read_string(np, "phy-connection-type", &pm);
+- if (err < 0)
+- return err;
+-
+- for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
+- if (!strcasecmp(pm, phy_modes(i))) {
+- *interface = i;
+- return 0;
+- }
+-
+- return -ENODEV;
+-}
+-EXPORT_SYMBOL_GPL(of_get_phy_mode);
+-
+-static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
+-{
+- struct property *pp = of_find_property(np, name, NULL);
+-
+- if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
+- memcpy(addr, pp->value, ETH_ALEN);
+- return 0;
+- }
+- return -ENODEV;
+-}
+-
+-static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
+-{
+- struct platform_device *pdev = of_find_device_by_node(np);
+- struct nvmem_cell *cell;
+- const void *mac;
+- size_t len;
+- int ret;
+-
+- /* Try lookup by device first, there might be a nvmem_cell_lookup
+- * associated with a given device.
+- */
+- if (pdev) {
+- ret = nvmem_get_mac_address(&pdev->dev, addr);
+- put_device(&pdev->dev);
+- return ret;
+- }
+-
+- cell = of_nvmem_cell_get(np, "mac-address");
+- if (IS_ERR(cell))
+- return PTR_ERR(cell);
+-
+- mac = nvmem_cell_read(cell, &len);
+- nvmem_cell_put(cell);
+-
+- if (IS_ERR(mac))
+- return PTR_ERR(mac);
+-
+- if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+- kfree(mac);
+- return -EINVAL;
+- }
+-
+- memcpy(addr, mac, ETH_ALEN);
+- kfree(mac);
+-
+- return 0;
+-}
+-
+-/**
+- * of_get_mac_address()
+- * @np: Caller's Device Node
+- * @addr: Pointer to a six-byte array for the result
+- *
+- * Search the device tree for the best MAC address to use. 'mac-address' is
+- * checked first, because that is supposed to contain to "most recent" MAC
+- * address. If that isn't set, then 'local-mac-address' is checked next,
+- * because that is the default address. If that isn't set, then the obsolete
+- * 'address' is checked, just in case we're using an old device tree. If any
+- * of the above isn't set, then try to get MAC address from nvmem cell named
+- * 'mac-address'.
+- *
+- * Note that the 'address' property is supposed to contain a virtual address of
+- * the register set, but some DTS files have redefined that property to be the
+- * MAC address.
+- *
+- * All-zero MAC addresses are rejected, because those could be properties that
+- * exist in the device tree, but were not set by U-Boot. For example, the
+- * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+- * addresses. Some older U-Boots only initialized 'local-mac-address'. In
+- * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+- * but is all zeros.
+- *
+- * Return: 0 on success and errno in case of error.
+-*/
+-int of_get_mac_address(struct device_node *np, u8 *addr)
+-{
+- int ret;
+-
+- if (!np)
+- return -ENODEV;
+-
+- ret = of_get_mac_addr(np, "mac-address", addr);
+- if (!ret)
+- return 0;
+-
+- ret = of_get_mac_addr(np, "local-mac-address", addr);
+- if (!ret)
+- return 0;
+-
+- ret = of_get_mac_addr(np, "address", addr);
+- if (!ret)
+- return 0;
+-
+- return of_get_mac_addr_nvmem(np, addr);
+-}
+-EXPORT_SYMBOL(of_get_mac_address);
+diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
+index 9da8835ba5a58..9e949ddcb1464 100644
+--- a/drivers/of/of_reserved_mem.c
++++ b/drivers/of/of_reserved_mem.c
+@@ -47,9 +47,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
+ err = memblock_mark_nomap(base, size);
+ if (err)
+ memblock_free(base, size);
+- kmemleak_ignore_phys(base);
+ }
+
++ kmemleak_ignore_phys(base);
++
+ return err;
+ }
+
+diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
+index d80160cf34bb7..ee7f3659b353c 100644
+--- a/drivers/of/overlay.c
++++ b/drivers/of/overlay.c
+@@ -57,8 +57,10 @@ struct fragment {
+ * struct overlay_changeset
+ * @id: changeset identifier
+ * @ovcs_list: list on which we are located
+- * @fdt: base of memory allocated to hold aligned FDT that was unflattened to create @overlay_tree
+- * @overlay_tree: expanded device tree that contains the fragment nodes
++ * @new_fdt: Memory allocated to hold unflattened aligned FDT
++ * @overlay_mem: the memory chunk that contains @overlay_root
++ * @overlay_root: expanded device tree that contains the fragment nodes
++ * @notify_state: most recent notify action used on overlay
+ * @count: count of fragment structures
+ * @fragments: fragment nodes in the overlay expanded device tree
+ * @symbols_fragment: last element of @fragments[] is the __symbols__ node
+@@ -67,8 +69,10 @@ struct fragment {
+ struct overlay_changeset {
+ int id;
+ struct list_head ovcs_list;
+- const void *fdt;
+- struct device_node *overlay_tree;
++ const void *new_fdt;
++ const void *overlay_mem;
++ struct device_node *overlay_root;
++ enum of_overlay_notify_action notify_state;
+ int count;
+ struct fragment *fragments;
+ bool symbols_fragment;
+@@ -115,7 +119,6 @@ void of_overlay_mutex_unlock(void)
+ mutex_unlock(&of_overlay_phandle_mutex);
+ }
+
+-
+ static LIST_HEAD(ovcs_list);
+ static DEFINE_IDR(ovcs_idr);
+
+@@ -149,19 +152,14 @@ int of_overlay_notifier_unregister(struct notifier_block *nb)
+ }
+ EXPORT_SYMBOL_GPL(of_overlay_notifier_unregister);
+
+-static char *of_overlay_action_name[] = {
+- "pre-apply",
+- "post-apply",
+- "pre-remove",
+- "post-remove",
+-};
+-
+ static int overlay_notify(struct overlay_changeset *ovcs,
+ enum of_overlay_notify_action action)
+ {
+ struct of_overlay_notify_data nd;
+ int i, ret;
+
++ ovcs->notify_state = action;
++
+ for (i = 0; i < ovcs->count; i++) {
+ struct fragment *fragment = &ovcs->fragments[i];
+
+@@ -170,12 +168,10 @@ static int overlay_notify(struct overlay_changeset *ovcs,
+
+ ret = blocking_notifier_call_chain(&overlay_notify_chain,
+ action, &nd);
+- if (ret == NOTIFY_OK || ret == NOTIFY_STOP)
+- return 0;
+- if (ret) {
++ if (notifier_to_errno(ret)) {
+ ret = notifier_to_errno(ret);
+ pr_err("overlay changeset %s notifier error %d, target: %pOF\n",
+- of_overlay_action_name[action], ret, nd.target);
++ of_overlay_action_name(action), ret, nd.target);
+ return ret;
+ }
+ }
+@@ -185,7 +181,7 @@ static int overlay_notify(struct overlay_changeset *ovcs,
+
+ /*
+ * The values of properties in the "/__symbols__" node are paths in
+- * the ovcs->overlay_tree. When duplicating the properties, the paths
++ * the ovcs->overlay_root. When duplicating the properties, the paths
+ * need to be adjusted to be the correct path for the live device tree.
+ *
+ * The paths refer to a node in the subtree of a fragment node's "__overlay__"
+@@ -221,7 +217,7 @@ static struct property *dup_and_fixup_symbol_prop(
+
+ if (path_len < 1)
+ return NULL;
+- fragment_node = __of_find_node_by_path(ovcs->overlay_tree, path + 1);
++ fragment_node = __of_find_node_by_path(ovcs->overlay_root, path + 1);
+ overlay_node = __of_find_node_by_path(fragment_node, "__overlay__/");
+ of_node_put(fragment_node);
+ of_node_put(overlay_node);
+@@ -549,7 +545,7 @@ static int find_dup_cset_node_entry(struct overlay_changeset *ovcs,
+
+ fn_1 = kasprintf(GFP_KERNEL, "%pOF", ce_1->np);
+ fn_2 = kasprintf(GFP_KERNEL, "%pOF", ce_2->np);
+- node_path_match = !strcmp(fn_1, fn_2);
++ node_path_match = !fn_1 || !fn_2 || !strcmp(fn_1, fn_2);
+ kfree(fn_1);
+ kfree(fn_2);
+ if (node_path_match) {
+@@ -584,7 +580,7 @@ static int find_dup_cset_prop(struct overlay_changeset *ovcs,
+
+ fn_1 = kasprintf(GFP_KERNEL, "%pOF", ce_1->np);
+ fn_2 = kasprintf(GFP_KERNEL, "%pOF", ce_2->np);
+- node_path_match = !strcmp(fn_1, fn_2);
++ node_path_match = !fn_1 || !fn_2 || !strcmp(fn_1, fn_2);
+ kfree(fn_1);
+ kfree(fn_2);
+ if (node_path_match &&
+@@ -718,53 +714,48 @@ static struct device_node *find_target(struct device_node *info_node)
+
+ /**
+ * init_overlay_changeset() - initialize overlay changeset from overlay tree
+- * @ovcs: Overlay changeset to build
+- * @fdt: base of memory allocated to hold aligned FDT that was unflattened to create @tree
+- * @tree: Contains the overlay fragments and overlay fixup nodes
++ * @ovcs: Overlay changeset to build
+ *
+ * Initialize @ovcs. Populate @ovcs->fragments with node information from
+- * the top level of @tree. The relevant top level nodes are the fragment
+- * nodes and the __symbols__ node. Any other top level node will be ignored.
++ * the top level of @overlay_root. The relevant top level nodes are the
++ * fragment nodes and the __symbols__ node. Any other top level node will
++ * be ignored. Populate other @ovcs fields.
+ *
+ * Return: 0 on success, -ENOMEM if memory allocation failure, -EINVAL if error
+- * detected in @tree, or -ENOSPC if idr_alloc() error.
++ * detected in @overlay_root. On error return, the caller of
++ * init_overlay_changeset() must call free_overlay_changeset().
+ */
+-static int init_overlay_changeset(struct overlay_changeset *ovcs,
+- const void *fdt, struct device_node *tree)
++static int init_overlay_changeset(struct overlay_changeset *ovcs)
+ {
+ struct device_node *node, *overlay_node;
+ struct fragment *fragment;
+ struct fragment *fragments;
+- int cnt, id, ret;
++ int cnt, ret;
++
++ /*
++ * None of the resources allocated by this function will be freed in
++ * the error paths. Instead the caller of this function is required
++ * to call free_overlay_changeset() (which will free the resources)
++ * if error return.
++ */
+
+ /*
+ * Warn for some issues. Can not return -EINVAL for these until
+ * of_unittest_apply_overlay() is fixed to pass these checks.
+ */
+- if (!of_node_check_flag(tree, OF_DYNAMIC))
+- pr_debug("%s() tree is not dynamic\n", __func__);
+-
+- if (!of_node_check_flag(tree, OF_DETACHED))
+- pr_debug("%s() tree is not detached\n", __func__);
+-
+- if (!of_node_is_root(tree))
+- pr_debug("%s() tree is not root\n", __func__);
+-
+- ovcs->overlay_tree = tree;
+- ovcs->fdt = fdt;
+-
+- INIT_LIST_HEAD(&ovcs->ovcs_list);
++ if (!of_node_check_flag(ovcs->overlay_root, OF_DYNAMIC))
++ pr_debug("%s() ovcs->overlay_root is not dynamic\n", __func__);
+
+- of_changeset_init(&ovcs->cset);
++ if (!of_node_check_flag(ovcs->overlay_root, OF_DETACHED))
++ pr_debug("%s() ovcs->overlay_root is not detached\n", __func__);
+
+- id = idr_alloc(&ovcs_idr, ovcs, 1, 0, GFP_KERNEL);
+- if (id <= 0)
+- return id;
++ if (!of_node_is_root(ovcs->overlay_root))
++ pr_debug("%s() ovcs->overlay_root is not root\n", __func__);
+
+ cnt = 0;
+
+ /* fragment nodes */
+- for_each_child_of_node(tree, node) {
++ for_each_child_of_node(ovcs->overlay_root, node) {
+ overlay_node = of_get_child_by_name(node, "__overlay__");
+ if (overlay_node) {
+ cnt++;
+@@ -772,7 +763,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
+ }
+ }
+
+- node = of_get_child_by_name(tree, "__symbols__");
++ node = of_get_child_by_name(ovcs->overlay_root, "__symbols__");
+ if (node) {
+ cnt++;
+ of_node_put(node);
+@@ -781,11 +772,12 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
+ fragments = kcalloc(cnt, sizeof(*fragments), GFP_KERNEL);
+ if (!fragments) {
+ ret = -ENOMEM;
+- goto err_free_idr;
++ goto err_out;
+ }
++ ovcs->fragments = fragments;
+
+ cnt = 0;
+- for_each_child_of_node(tree, node) {
++ for_each_child_of_node(ovcs->overlay_root, node) {
+ overlay_node = of_get_child_by_name(node, "__overlay__");
+ if (!overlay_node)
+ continue;
+@@ -797,7 +789,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
+ of_node_put(fragment->overlay);
+ ret = -EINVAL;
+ of_node_put(node);
+- goto err_free_fragments;
++ goto err_out;
+ }
+
+ cnt++;
+@@ -807,7 +799,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
+ * if there is a symbols fragment in ovcs->fragments[i] it is
+ * the final element in the array
+ */
+- node = of_get_child_by_name(tree, "__symbols__");
++ node = of_get_child_by_name(ovcs->overlay_root, "__symbols__");
+ if (node) {
+ ovcs->symbols_fragment = 1;
+ fragment = &fragments[cnt];
+@@ -817,7 +809,8 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
+ if (!fragment->target) {
+ pr_err("symbols in overlay, but not in live tree\n");
+ ret = -EINVAL;
+- goto err_free_fragments;
++ of_node_put(node);
++ goto err_out;
+ }
+
+ cnt++;
+@@ -826,20 +819,14 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs,
+ if (!cnt) {
+ pr_err("no fragments or symbols in overlay\n");
+ ret = -EINVAL;
+- goto err_free_fragments;
++ goto err_out;
+ }
+
+- ovcs->id = id;
+ ovcs->count = cnt;
+- ovcs->fragments = fragments;
+
+ return 0;
+
+-err_free_fragments:
+- kfree(fragments);
+-err_free_idr:
+- idr_remove(&ovcs_idr, id);
+-
++err_out:
+ pr_err("%s() failed, ret = %d\n", __func__, ret);
+
+ return ret;
+@@ -852,21 +839,34 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
+ if (ovcs->cset.entries.next)
+ of_changeset_destroy(&ovcs->cset);
+
+- if (ovcs->id)
++ if (ovcs->id) {
+ idr_remove(&ovcs_idr, ovcs->id);
++ list_del(&ovcs->ovcs_list);
++ ovcs->id = 0;
++ }
++
+
+ for (i = 0; i < ovcs->count; i++) {
+ of_node_put(ovcs->fragments[i].target);
+ of_node_put(ovcs->fragments[i].overlay);
+ }
+ kfree(ovcs->fragments);
++
+ /*
+- * There should be no live pointers into ovcs->overlay_tree and
+- * ovcs->fdt due to the policy that overlay notifiers are not allowed
+- * to retain pointers into the overlay devicetree.
++ * There should be no live pointers into ovcs->overlay_mem and
++ * ovcs->new_fdt due to the policy that overlay notifiers are not
++ * allowed to retain pointers into the overlay devicetree other
++ * than during the window from OF_OVERLAY_PRE_APPLY overlay
++ * notifiers until the OF_OVERLAY_POST_REMOVE overlay notifiers.
++ *
++ * A memory leak will occur here if within the window.
+ */
+- kfree(ovcs->overlay_tree);
+- kfree(ovcs->fdt);
++
++ if (ovcs->notify_state == OF_OVERLAY_INIT ||
++ ovcs->notify_state == OF_OVERLAY_POST_REMOVE) {
++ kfree(ovcs->overlay_mem);
++ kfree(ovcs->new_fdt);
++ }
+ kfree(ovcs);
+ }
+
+@@ -874,28 +874,13 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
+ * internal documentation
+ *
+ * of_overlay_apply() - Create and apply an overlay changeset
+- * @fdt: base of memory allocated to hold the aligned FDT
+- * @tree: Expanded overlay device tree
+- * @ovcs_id: Pointer to overlay changeset id
++ * @ovcs: overlay changeset
+ *
+ * Creates and applies an overlay changeset.
+ *
+- * If an error occurs in a pre-apply notifier, then no changes are made
+- * to the device tree.
+- *
+-
+- * A non-zero return value will not have created the changeset if error is from:
+- * - parameter checks
+- * - building the changeset
+- * - overlay changeset pre-apply notifier
+- *
+ * If an error is returned by an overlay changeset pre-apply notifier
+ * then no further overlay changeset pre-apply notifier will be called.
+ *
+- * A non-zero return value will have created the changeset if error is from:
+- * - overlay changeset entry notifier
+- * - overlay changeset post-apply notifier
+- *
+ * If an error is returned by an overlay changeset post-apply notifier
+ * then no further overlay changeset post-apply notifier will be called.
+ *
+@@ -909,64 +894,35 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
+ * following attempt to apply or remove an overlay changeset will be
+ * refused.
+ *
+- * Returns 0 on success, or a negative error number. Overlay changeset
+- * id is returned to *ovcs_id.
++ * Returns 0 on success, or a negative error number. On error return,
++ * the caller of of_overlay_apply() must call free_overlay_changeset().
+ */
+
+-static int of_overlay_apply(const void *fdt, struct device_node *tree,
+- int *ovcs_id)
++static int of_overlay_apply(struct overlay_changeset *ovcs)
+ {
+- struct overlay_changeset *ovcs;
+ int ret = 0, ret_revert, ret_tmp;
+
+- /*
+- * As of this point, fdt and tree belong to the overlay changeset.
+- * overlay changeset code is responsible for freeing them.
+- */
+-
+ if (devicetree_corrupt()) {
+ pr_err("devicetree state suspect, refuse to apply overlay\n");
+- kfree(fdt);
+- kfree(tree);
+ ret = -EBUSY;
+ goto out;
+ }
+
+- ovcs = kzalloc(sizeof(*ovcs), GFP_KERNEL);
+- if (!ovcs) {
+- kfree(fdt);
+- kfree(tree);
+- ret = -ENOMEM;
+- goto out;
+- }
+-
+- of_overlay_mutex_lock();
+- mutex_lock(&of_mutex);
+-
+- ret = of_resolve_phandles(tree);
++ ret = of_resolve_phandles(ovcs->overlay_root);
+ if (ret)
+- goto err_free_tree;
++ goto out;
+
+- ret = init_overlay_changeset(ovcs, fdt, tree);
++ ret = init_overlay_changeset(ovcs);
+ if (ret)
+- goto err_free_tree;
++ goto out;
+
+- /*
+- * after overlay_notify(), ovcs->overlay_tree related pointers may have
+- * leaked to drivers, so can not kfree() tree, aka ovcs->overlay_tree;
+- * and can not free memory containing aligned fdt. The aligned fdt
+- * is contained within the memory at ovcs->fdt, possibly at an offset
+- * from ovcs->fdt.
+- */
+ ret = overlay_notify(ovcs, OF_OVERLAY_PRE_APPLY);
+- if (ret) {
+- pr_err("overlay changeset pre-apply notify error %d\n", ret);
+- goto err_free_overlay_changeset;
+- }
++ if (ret)
++ goto out;
+
+ ret = build_changeset(ovcs);
+ if (ret)
+- goto err_free_overlay_changeset;
++ goto out;
+
+ ret_revert = 0;
+ ret = __of_changeset_apply_entries(&ovcs->cset, &ret_revert);
+@@ -976,7 +932,7 @@ static int of_overlay_apply(const void *fdt, struct device_node *tree,
+ ret_revert);
+ devicetree_state_flags |= DTSF_APPLY_FAIL;
+ }
+- goto err_free_overlay_changeset;
++ goto out;
+ }
+
+ ret = __of_changeset_apply_notify(&ovcs->cset);
+@@ -984,29 +940,10 @@ static int of_overlay_apply(const void *fdt, struct device_node *tree,
+ pr_err("overlay apply changeset entry notify error %d\n", ret);
+ /* notify failure is not fatal, continue */
+
+- list_add_tail(&ovcs->ovcs_list, &ovcs_list);
+- *ovcs_id = ovcs->id;
+-
+ ret_tmp = overlay_notify(ovcs, OF_OVERLAY_POST_APPLY);
+- if (ret_tmp) {
+- pr_err("overlay changeset post-apply notify error %d\n",
+- ret_tmp);
++ if (ret_tmp)
+ if (!ret)
+ ret = ret_tmp;
+- }
+-
+- goto out_unlock;
+-
+-err_free_tree:
+- kfree(fdt);
+- kfree(tree);
+-
+-err_free_overlay_changeset:
+- free_overlay_changeset(ovcs);
+-
+-out_unlock:
+- mutex_unlock(&of_mutex);
+- of_overlay_mutex_unlock();
+
+ out:
+ pr_debug("%s() err=%d\n", __func__, ret);
+@@ -1015,15 +952,16 @@ out:
+ }
+
+ int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+- int *ovcs_id)
++ int *ret_ovcs_id)
+ {
+ void *new_fdt;
+ void *new_fdt_align;
++ void *overlay_mem;
+ int ret;
+ u32 size;
+- struct device_node *overlay_root = NULL;
++ struct overlay_changeset *ovcs;
+
+- *ovcs_id = 0;
++ *ret_ovcs_id = 0;
+
+ if (overlay_fdt_size < sizeof(struct fdt_header) ||
+ fdt_check_header(overlay_fdt)) {
+@@ -1035,41 +973,68 @@ int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+ if (overlay_fdt_size < size)
+ return -EINVAL;
+
++ ovcs = kzalloc(sizeof(*ovcs), GFP_KERNEL);
++ if (!ovcs)
++ return -ENOMEM;
++
++ of_overlay_mutex_lock();
++ mutex_lock(&of_mutex);
++
++ /*
++ * ovcs->notify_state must be set to OF_OVERLAY_INIT before allocating
++ * ovcs resources, implicitly set by kzalloc() of ovcs
++ */
++
++ ovcs->id = idr_alloc(&ovcs_idr, ovcs, 1, 0, GFP_KERNEL);
++ if (ovcs->id <= 0) {
++ ret = ovcs->id;
++ goto err_free_ovcs;
++ }
++
++ INIT_LIST_HEAD(&ovcs->ovcs_list);
++ list_add_tail(&ovcs->ovcs_list, &ovcs_list);
++ of_changeset_init(&ovcs->cset);
++
+ /*
+ * Must create permanent copy of FDT because of_fdt_unflatten_tree()
+ * will create pointers to the passed in FDT in the unflattened tree.
+ */
+ new_fdt = kmalloc(size + FDT_ALIGN_SIZE, GFP_KERNEL);
+- if (!new_fdt)
+- return -ENOMEM;
++ if (!new_fdt) {
++ ret = -ENOMEM;
++ goto err_free_ovcs;
++ }
++ ovcs->new_fdt = new_fdt;
+
+ new_fdt_align = PTR_ALIGN(new_fdt, FDT_ALIGN_SIZE);
+ memcpy(new_fdt_align, overlay_fdt, size);
+
+- of_fdt_unflatten_tree(new_fdt_align, NULL, &overlay_root);
+- if (!overlay_root) {
++ overlay_mem = of_fdt_unflatten_tree(new_fdt_align, NULL,
++ &ovcs->overlay_root);
++ if (!overlay_mem) {
+ pr_err("unable to unflatten overlay_fdt\n");
+ ret = -EINVAL;
+- goto out_free_new_fdt;
++ goto err_free_ovcs;
+ }
++ ovcs->overlay_mem = overlay_mem;
+
+- ret = of_overlay_apply(new_fdt, overlay_root, ovcs_id);
+- if (ret < 0) {
+- /*
+- * new_fdt and overlay_root now belong to the overlay
+- * changeset.
+- * overlay changeset code is responsible for freeing them.
+- */
+- goto out;
+- }
++ ret = of_overlay_apply(ovcs);
++ if (ret < 0)
++ goto err_free_ovcs;
++
++ mutex_unlock(&of_mutex);
++ of_overlay_mutex_unlock();
++
++ *ret_ovcs_id = ovcs->id;
+
+ return 0;
+
++err_free_ovcs:
++ free_overlay_changeset(ovcs);
+
+-out_free_new_fdt:
+- kfree(new_fdt);
++ mutex_unlock(&of_mutex);
++ of_overlay_mutex_unlock();
+
+-out:
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(of_overlay_fdt_apply);
+@@ -1206,28 +1171,24 @@ int of_overlay_remove(int *ovcs_id)
+ if (!ovcs) {
+ ret = -ENODEV;
+ pr_err("remove: Could not find overlay #%d\n", *ovcs_id);
+- goto out_unlock;
++ goto err_unlock;
+ }
+
+ if (!overlay_removal_is_ok(ovcs)) {
+ ret = -EBUSY;
+- goto out_unlock;
++ goto err_unlock;
+ }
+
+ ret = overlay_notify(ovcs, OF_OVERLAY_PRE_REMOVE);
+- if (ret) {
+- pr_err("overlay changeset pre-remove notify error %d\n", ret);
+- goto out_unlock;
+- }
+-
+- list_del(&ovcs->ovcs_list);
++ if (ret)
++ goto err_unlock;
+
+ ret_apply = 0;
+ ret = __of_changeset_revert_entries(&ovcs->cset, &ret_apply);
+ if (ret) {
+ if (ret_apply)
+ devicetree_state_flags |= DTSF_REVERT_FAIL;
+- goto out_unlock;
++ goto err_unlock;
+ }
+
+ ret = __of_changeset_revert_notify(&ovcs->cset);
+@@ -1237,17 +1198,24 @@ int of_overlay_remove(int *ovcs_id)
+
+ *ovcs_id = 0;
+
++ /*
++ * Note that the overlay memory will be kfree()ed by
++ * free_overlay_changeset() even if the notifier for
++ * OF_OVERLAY_POST_REMOVE returns an error.
++ */
+ ret_tmp = overlay_notify(ovcs, OF_OVERLAY_POST_REMOVE);
+- if (ret_tmp) {
+- pr_err("overlay changeset post-remove notify error %d\n",
+- ret_tmp);
++ if (ret_tmp)
+ if (!ret)
+ ret = ret_tmp;
+- }
+
+ free_overlay_changeset(ovcs);
+
+-out_unlock:
++err_unlock:
++ /*
++ * If jumped over free_overlay_changeset(), then did not kfree()
++ * overlay related memory. This is a memory leak unless a subsequent
++ * of_overlay_remove() of this overlay is successful.
++ */
+ mutex_unlock(&of_mutex);
+
+ out:
+diff --git a/drivers/of/property.c b/drivers/of/property.c
+index a3483484a5a2a..acf0d3110357c 100644
+--- a/drivers/of/property.c
++++ b/drivers/of/property.c
+@@ -975,8 +975,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
+ nargs, index, &of_args);
+ if (ret < 0)
+ return ret;
+- if (!args)
++ if (!args) {
++ of_node_put(of_args.np);
+ return 0;
++ }
+
+ args->nargs = of_args.args_count;
+ args->fwnode = of_fwnode_handle(of_args.np);
+diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
+index 8c056972a6ddc..073a3f44c4049 100644
+--- a/drivers/of/unittest.c
++++ b/drivers/of/unittest.c
+@@ -70,7 +70,7 @@ static void __init of_unittest_find_node_by_name(void)
+
+ np = of_find_node_by_path("/testcase-data");
+ name = kasprintf(GFP_KERNEL, "%pOF", np);
+- unittest(np && !strcmp("/testcase-data", name),
++ unittest(np && name && !strcmp("/testcase-data", name),
+ "find /testcase-data failed\n");
+ of_node_put(np);
+ kfree(name);
+@@ -81,14 +81,14 @@ static void __init of_unittest_find_node_by_name(void)
+
+ np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a");
+ name = kasprintf(GFP_KERNEL, "%pOF", np);
+- unittest(np && !strcmp("/testcase-data/phandle-tests/consumer-a", name),
++ unittest(np && name && !strcmp("/testcase-data/phandle-tests/consumer-a", name),
+ "find /testcase-data/phandle-tests/consumer-a failed\n");
+ of_node_put(np);
+ kfree(name);
+
+ np = of_find_node_by_path("testcase-alias");
+ name = kasprintf(GFP_KERNEL, "%pOF", np);
+- unittest(np && !strcmp("/testcase-data", name),
++ unittest(np && name && !strcmp("/testcase-data", name),
+ "find testcase-alias failed\n");
+ of_node_put(np);
+ kfree(name);
+@@ -99,7 +99,7 @@ static void __init of_unittest_find_node_by_name(void)
+
+ np = of_find_node_by_path("testcase-alias/phandle-tests/consumer-a");
+ name = kasprintf(GFP_KERNEL, "%pOF", np);
+- unittest(np && !strcmp("/testcase-data/phandle-tests/consumer-a", name),
++ unittest(np && name && !strcmp("/testcase-data/phandle-tests/consumer-a", name),
+ "find testcase-alias/phandle-tests/consumer-a failed\n");
+ of_node_put(np);
+ kfree(name);
+@@ -657,12 +657,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
+ memset(&args, 0, sizeof(args));
+
+ EXPECT_BEGIN(KERN_INFO,
+- "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
++ "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
+
+ rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-phandle",
+ "phandle", 0, &args);
+ EXPECT_END(KERN_INFO,
+- "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
++ "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
+
+ unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
+
+@@ -911,11 +911,18 @@ static void __init of_unittest_dma_ranges_one(const char *path,
+ if (!rc) {
+ phys_addr_t paddr;
+ dma_addr_t dma_addr;
+- struct device dev_bogus;
++ struct device *dev_bogus;
+
+- dev_bogus.dma_range_map = map;
+- paddr = dma_to_phys(&dev_bogus, expect_dma_addr);
+- dma_addr = phys_to_dma(&dev_bogus, expect_paddr);
++ dev_bogus = kzalloc(sizeof(struct device), GFP_KERNEL);
++ if (!dev_bogus) {
++ unittest(0, "kzalloc() failed\n");
++ kfree(map);
++ return;
++ }
++
++ dev_bogus->dma_range_map = map;
++ paddr = dma_to_phys(dev_bogus, expect_dma_addr);
++ dma_addr = phys_to_dma(dev_bogus, expect_paddr);
+
+ unittest(paddr == expect_paddr,
+ "of_dma_get_range: wrong phys addr %pap (expecting %llx) on node %pOF\n",
+@@ -925,6 +932,7 @@ static void __init of_unittest_dma_ranges_one(const char *path,
+ &dma_addr, expect_dma_addr, np);
+
+ kfree(map);
++ kfree(dev_bogus);
+ }
+ of_node_put(np);
+ #endif
+@@ -934,8 +942,9 @@ static void __init of_unittest_parse_dma_ranges(void)
+ {
+ of_unittest_dma_ranges_one("/testcase-data/address-tests/device@70000000",
+ 0x0, 0x20000000);
+- of_unittest_dma_ranges_one("/testcase-data/address-tests/bus@80000000/device@1000",
+- 0x100000000, 0x20000000);
++ if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
++ of_unittest_dma_ranges_one("/testcase-data/address-tests/bus@80000000/device@1000",
++ 0x100000000, 0x20000000);
+ of_unittest_dma_ranges_one("/testcase-data/address-tests/pci@90000000",
+ 0x80000000, 0x20000000);
+ }
+@@ -1364,6 +1373,8 @@ static void attach_node_and_children(struct device_node *np)
+ const char *full_name;
+
+ full_name = kasprintf(GFP_KERNEL, "%pOF", np);
++ if (!full_name)
++ return;
+
+ if (!strcmp(full_name, "/__local_fixups__") ||
+ !strcmp(full_name, "/__fixups__")) {
+@@ -1688,19 +1699,19 @@ static void __init of_unittest_overlay_gpio(void)
+ */
+
+ EXPECT_BEGIN(KERN_INFO,
+- "GPIO line <<int>> (line-B-input) hogged as input\n");
++ "gpio-<<int>> (line-B-input): hogged as input\n");
+
+ EXPECT_BEGIN(KERN_INFO,
+- "GPIO line <<int>> (line-A-input) hogged as input\n");
++ "gpio-<<int>> (line-A-input): hogged as input\n");
+
+ ret = platform_driver_register(&unittest_gpio_driver);
+ if (unittest(ret == 0, "could not register unittest gpio driver\n"))
+ return;
+
+ EXPECT_END(KERN_INFO,
+- "GPIO line <<int>> (line-A-input) hogged as input\n");
++ "gpio-<<int>> (line-A-input): hogged as input\n");
+ EXPECT_END(KERN_INFO,
+- "GPIO line <<int>> (line-B-input) hogged as input\n");
++ "gpio-<<int>> (line-B-input): hogged as input\n");
+
+ unittest(probe_pass_count + 2 == unittest_gpio_probe_pass_count,
+ "unittest_gpio_probe() failed or not called\n");
+@@ -1727,7 +1738,7 @@ static void __init of_unittest_overlay_gpio(void)
+ chip_request_count = unittest_gpio_chip_request_count;
+
+ EXPECT_BEGIN(KERN_INFO,
+- "GPIO line <<int>> (line-D-input) hogged as input\n");
++ "gpio-<<int>> (line-D-input): hogged as input\n");
+
+ /* overlay_gpio_03 contains gpio node and child gpio hog node */
+
+@@ -1735,7 +1746,7 @@ static void __init of_unittest_overlay_gpio(void)
+ "Adding overlay 'overlay_gpio_03' failed\n");
+
+ EXPECT_END(KERN_INFO,
+- "GPIO line <<int>> (line-D-input) hogged as input\n");
++ "gpio-<<int>> (line-D-input): hogged as input\n");
+
+ unittest(probe_pass_count + 1 == unittest_gpio_probe_pass_count,
+ "unittest_gpio_probe() failed or not called\n");
+@@ -1774,7 +1785,7 @@ static void __init of_unittest_overlay_gpio(void)
+ */
+
+ EXPECT_BEGIN(KERN_INFO,
+- "GPIO line <<int>> (line-C-input) hogged as input\n");
++ "gpio-<<int>> (line-C-input): hogged as input\n");
+
+ /* overlay_gpio_04b contains child gpio hog node */
+
+@@ -1782,7 +1793,7 @@ static void __init of_unittest_overlay_gpio(void)
+ "Adding overlay 'overlay_gpio_04b' failed\n");
+
+ EXPECT_END(KERN_INFO,
+- "GPIO line <<int>> (line-C-input) hogged as input\n");
++ "gpio-<<int>> (line-C-input): hogged as input\n");
+
+ unittest(chip_request_count + 1 == unittest_gpio_chip_request_count,
+ "unittest_gpio_chip_request() called %d times (expected 1 time)\n",
+@@ -2062,7 +2073,7 @@ static int __init of_unittest_apply_revert_overlay_check(int overlay_nr,
+ of_unittest_untrack_overlay(save_id);
+
+ /* unittest device must be again in before state */
+- if (of_unittest_device_exists(unittest_nr, PDEV_OVERLAY) != before) {
++ if (of_unittest_device_exists(unittest_nr, ovtype) != before) {
+ unittest(0, "%s with device @\"%s\" %s\n",
+ overlay_name_from_nr(overlay_nr),
+ unittest_path(unittest_nr, ovtype),
+diff --git a/drivers/opp/core.c b/drivers/opp/core.c
+index 04b4691a8aac7..dbd69d8e44e42 100644
+--- a/drivers/opp/core.c
++++ b/drivers/opp/core.c
+@@ -1249,7 +1249,10 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
+ return opp_table;
+
+ remove_opp_dev:
++ _of_clear_opp_table(opp_table);
+ _remove_opp_dev(opp_dev, opp_table);
++ mutex_destroy(&opp_table->genpd_virt_dev_lock);
++ mutex_destroy(&opp_table->lock);
+ err:
+ kfree(opp_table);
+ return ERR_PTR(ret);
+@@ -2388,8 +2391,8 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev,
+ }
+
+ virt_dev = dev_pm_domain_attach_by_name(dev, *name);
+- if (IS_ERR(virt_dev)) {
+- ret = PTR_ERR(virt_dev);
++ if (IS_ERR_OR_NULL(virt_dev)) {
++ ret = virt_dev ? PTR_ERR(virt_dev) : -ENODEV;
+ dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret);
+ goto err;
+ }
+diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c
+index 596c185b5dda4..9eb71f47487b2 100644
+--- a/drivers/opp/debugfs.c
++++ b/drivers/opp/debugfs.c
+@@ -10,6 +10,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/device.h>
+ #include <linux/err.h>
++#include <linux/of.h>
+ #include <linux/init.h>
+ #include <linux/limits.h>
+ #include <linux/slab.h>
+@@ -131,9 +132,13 @@ void opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
+ debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend);
+ debugfs_create_u32("performance_state", S_IRUGO, d, &opp->pstate);
+ debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate);
++ debugfs_create_u32("level", S_IRUGO, d, &opp->level);
+ debugfs_create_ulong("clock_latency_ns", S_IRUGO, d,
+ &opp->clock_latency_ns);
+
++ opp->of_name = of_node_full_name(opp->np);
++ debugfs_create_str("of_name", S_IRUGO, d, (char **)&opp->of_name);
++
+ opp_debug_create_supplies(opp, opp_table, d);
+ opp_debug_create_bw(opp, opp_table, d);
+
+@@ -204,7 +209,7 @@ static void opp_migrate_dentry(struct opp_device *opp_dev,
+
+ dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir,
+ opp_table->dentry_name);
+- if (!dentry) {
++ if (IS_ERR(dentry)) {
+ dev_err(dev, "%s: Failed to rename link from: %s to %s\n",
+ __func__, dev_name(opp_dev->dev), dev_name(dev));
+ return;
+diff --git a/drivers/opp/of.c b/drivers/opp/of.c
+index 2a97c6535c4c6..3028353afece3 100644
+--- a/drivers/opp/of.c
++++ b/drivers/opp/of.c
+@@ -437,11 +437,11 @@ static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table)
+
+ /* Checking only first OPP is sufficient */
+ np = of_get_next_available_child(opp_np, NULL);
++ of_node_put(opp_np);
+ if (!np) {
+ dev_err(dev, "OPP table empty\n");
+ return -EINVAL;
+ }
+- of_node_put(opp_np);
+
+ prop = of_find_property(np, "opp-peak-kBps", NULL);
+ of_node_put(np);
+@@ -921,7 +921,7 @@ free_required_opps:
+ free_opp:
+ _opp_free(new_opp);
+
+- return ERR_PTR(ret);
++ return ret ? ERR_PTR(ret) : NULL;
+ }
+
+ /* Initializes OPP tables based on new bindings */
+diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
+index 407c3bfe51d96..45e3a55239a13 100644
+--- a/drivers/opp/opp.h
++++ b/drivers/opp/opp.h
+@@ -96,6 +96,7 @@ struct dev_pm_opp {
+
+ #ifdef CONFIG_DEBUG_FS
+ struct dentry *dentry;
++ const char *of_name;
+ #endif
+ };
+
+diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
+index 059566f544291..6052f264bbb0a 100644
+--- a/drivers/parisc/ccio-dma.c
++++ b/drivers/parisc/ccio-dma.c
+@@ -1003,7 +1003,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
+ ioc->usg_calls++;
+ #endif
+
+- while(sg_dma_len(sglist) && nents--) {
++ while (nents && sg_dma_len(sglist)) {
+
+ #ifdef CCIO_COLLECT_STATS
+ ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
+@@ -1011,6 +1011,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
+ ccio_unmap_page(dev, sg_dma_address(sglist),
+ sg_dma_len(sglist), direction, 0);
+ ++sglist;
++ nents--;
+ }
+
+ DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
+@@ -1379,15 +1380,17 @@ ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr)
+ }
+ }
+
+-static void __init ccio_init_resources(struct ioc *ioc)
++static int __init ccio_init_resources(struct ioc *ioc)
+ {
+ struct resource *res = ioc->mmio_region;
+ char *name = kmalloc(14, GFP_KERNEL);
+-
++ if (unlikely(!name))
++ return -ENOMEM;
+ snprintf(name, 14, "GSC Bus [%d/]", ioc->hw_path);
+
+ ccio_init_resource(res, name, &ioc->ioc_regs->io_io_low);
+ ccio_init_resource(res + 1, name, &ioc->ioc_regs->io_io_low_hv);
++ return 0;
+ }
+
+ static int new_ioc_area(struct resource *res, unsigned long size,
+@@ -1542,7 +1545,11 @@ static int __init ccio_probe(struct parisc_device *dev)
+ return -ENOMEM;
+ }
+ ccio_ioc_init(ioc);
+- ccio_init_resources(ioc);
++ if (ccio_init_resources(ioc)) {
++ iounmap(ioc->ioc_regs);
++ kfree(ioc);
++ return -ENOMEM;
++ }
+ hppa_dma_ops = &ccio_ops;
+
+ hba = kzalloc(sizeof(*hba), GFP_KERNEL);
+diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
+index 952a92504df69..e33036281327d 100644
+--- a/drivers/parisc/dino.c
++++ b/drivers/parisc/dino.c
+@@ -142,9 +142,8 @@ struct dino_device
+ {
+ struct pci_hba_data hba; /* 'C' inheritance - must be first */
+ spinlock_t dinosaur_pen;
+- unsigned long txn_addr; /* EIR addr to generate interrupt */
+- u32 txn_data; /* EIR data assign to each dino */
+ u32 imr; /* IRQ's which are enabled */
++ struct gsc_irq gsc_irq;
+ int global_irq[DINO_LOCAL_IRQS]; /* map IMR bit to global irq */
+ #ifdef DINO_DEBUG
+ unsigned int dino_irr0; /* save most recent IRQ line stat */
+@@ -339,14 +338,43 @@ static void dino_unmask_irq(struct irq_data *d)
+ if (tmp & DINO_MASK_IRQ(local_irq)) {
+ DBG(KERN_WARNING "%s(): IRQ asserted! (ILR 0x%x)\n",
+ __func__, tmp);
+- gsc_writel(dino_dev->txn_data, dino_dev->txn_addr);
++ gsc_writel(dino_dev->gsc_irq.txn_data, dino_dev->gsc_irq.txn_addr);
+ }
+ }
+
++#ifdef CONFIG_SMP
++static int dino_set_affinity_irq(struct irq_data *d, const struct cpumask *dest,
++ bool force)
++{
++ struct dino_device *dino_dev = irq_data_get_irq_chip_data(d);
++ struct cpumask tmask;
++ int cpu_irq;
++ u32 eim;
++
++ if (!cpumask_and(&tmask, dest, cpu_online_mask))
++ return -EINVAL;
++
++ cpu_irq = cpu_check_affinity(d, &tmask);
++ if (cpu_irq < 0)
++ return cpu_irq;
++
++ dino_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq);
++ eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data;
++ __raw_writel(eim, dino_dev->hba.base_addr+DINO_IAR0);
++
++ irq_data_update_effective_affinity(d, &tmask);
++
++ return IRQ_SET_MASK_OK;
++}
++#endif
++
+ static struct irq_chip dino_interrupt_type = {
+ .name = "GSC-PCI",
+ .irq_unmask = dino_unmask_irq,
+ .irq_mask = dino_mask_irq,
++#ifdef CONFIG_SMP
++ .irq_set_affinity = dino_set_affinity_irq,
++#endif
+ };
+
+
+@@ -806,7 +834,6 @@ static int __init dino_common_init(struct parisc_device *dev,
+ {
+ int status;
+ u32 eim;
+- struct gsc_irq gsc_irq;
+ struct resource *res;
+
+ pcibios_register_hba(&dino_dev->hba);
+@@ -821,10 +848,8 @@ static int __init dino_common_init(struct parisc_device *dev,
+ ** still only has 11 IRQ input lines - just map some of them
+ ** to a different processor.
+ */
+- dev->irq = gsc_alloc_irq(&gsc_irq);
+- dino_dev->txn_addr = gsc_irq.txn_addr;
+- dino_dev->txn_data = gsc_irq.txn_data;
+- eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data;
++ dev->irq = gsc_alloc_irq(&dino_dev->gsc_irq);
++ eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data;
+
+ /*
+ ** Dino needs a PA "IRQ" to get a processor's attention.
+diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c
+index ed9371acf37eb..ec175ae998733 100644
+--- a/drivers/parisc/gsc.c
++++ b/drivers/parisc/gsc.c
+@@ -135,10 +135,41 @@ static void gsc_asic_unmask_irq(struct irq_data *d)
+ */
+ }
+
++#ifdef CONFIG_SMP
++static int gsc_set_affinity_irq(struct irq_data *d, const struct cpumask *dest,
++ bool force)
++{
++ struct gsc_asic *gsc_dev = irq_data_get_irq_chip_data(d);
++ struct cpumask tmask;
++ int cpu_irq;
++
++ if (!cpumask_and(&tmask, dest, cpu_online_mask))
++ return -EINVAL;
++
++ cpu_irq = cpu_check_affinity(d, &tmask);
++ if (cpu_irq < 0)
++ return cpu_irq;
++
++ gsc_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq);
++ gsc_dev->eim = ((u32) gsc_dev->gsc_irq.txn_addr) | gsc_dev->gsc_irq.txn_data;
++
++ /* switch IRQ's for devices below LASI/WAX to other CPU */
++ gsc_writel(gsc_dev->eim, gsc_dev->hpa + OFFSET_IAR);
++
++ irq_data_update_effective_affinity(d, &tmask);
++
++ return IRQ_SET_MASK_OK;
++}
++#endif
++
++
+ static struct irq_chip gsc_asic_interrupt_type = {
+ .name = "GSC-ASIC",
+ .irq_unmask = gsc_asic_unmask_irq,
+ .irq_mask = gsc_asic_mask_irq,
++#ifdef CONFIG_SMP
++ .irq_set_affinity = gsc_set_affinity_irq,
++#endif
+ };
+
+ int gsc_assign_irq(struct irq_chip *type, void *data)
+diff --git a/drivers/parisc/gsc.h b/drivers/parisc/gsc.h
+index 86abad3fa2150..73cbd0bb1975a 100644
+--- a/drivers/parisc/gsc.h
++++ b/drivers/parisc/gsc.h
+@@ -31,6 +31,7 @@ struct gsc_asic {
+ int version;
+ int type;
+ int eim;
++ struct gsc_irq gsc_irq;
+ int global_irq[32];
+ };
+
+diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
+index 8a3b0c3a1e92b..93ea922618c3d 100644
+--- a/drivers/parisc/iosapic.c
++++ b/drivers/parisc/iosapic.c
+@@ -677,7 +677,7 @@ static int iosapic_set_affinity_irq(struct irq_data *d,
+ if (dest_cpu < 0)
+ return -1;
+
+- cpumask_copy(irq_data_get_affinity_mask(d), cpumask_of(dest_cpu));
++ irq_data_update_affinity(d, cpumask_of(dest_cpu));
+ vi->txn_addr = txn_affinity_addr(d->irq, dest_cpu);
+
+ spin_lock_irqsave(&iosapic_lock, flags);
+@@ -875,6 +875,7 @@ int iosapic_serial_irq(struct parisc_device *dev)
+
+ return vi->txn_irq;
+ }
++EXPORT_SYMBOL(iosapic_serial_irq);
+ #endif
+
+
+diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c
+index 4e4fd12c2112e..6ef621adb63a8 100644
+--- a/drivers/parisc/lasi.c
++++ b/drivers/parisc/lasi.c
+@@ -163,7 +163,6 @@ static int __init lasi_init_chip(struct parisc_device *dev)
+ {
+ extern void (*chassis_power_off)(void);
+ struct gsc_asic *lasi;
+- struct gsc_irq gsc_irq;
+ int ret;
+
+ lasi = kzalloc(sizeof(*lasi), GFP_KERNEL);
+@@ -185,7 +184,7 @@ static int __init lasi_init_chip(struct parisc_device *dev)
+ lasi_init_irq(lasi);
+
+ /* the IRQ lasi should use */
+- dev->irq = gsc_alloc_irq(&gsc_irq);
++ dev->irq = gsc_alloc_irq(&lasi->gsc_irq);
+ if (dev->irq < 0) {
+ printk(KERN_ERR "%s(): cannot get GSC irq\n",
+ __func__);
+@@ -193,9 +192,9 @@ static int __init lasi_init_chip(struct parisc_device *dev)
+ return -EBUSY;
+ }
+
+- lasi->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data;
++ lasi->eim = ((u32) lasi->gsc_irq.txn_addr) | lasi->gsc_irq.txn_data;
+
+- ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi);
++ ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi);
+ if (ret < 0) {
+ kfree(lasi);
+ return ret;
+diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
+index 732b516c7bf84..afc6e66ddc31c 100644
+--- a/drivers/parisc/lba_pci.c
++++ b/drivers/parisc/lba_pci.c
+@@ -1476,9 +1476,13 @@ lba_driver_probe(struct parisc_device *dev)
+ u32 func_class;
+ void *tmp_obj;
+ char *version;
+- void __iomem *addr = ioremap(dev->hpa.start, 4096);
++ void __iomem *addr;
+ int max;
+
++ addr = ioremap(dev->hpa.start, 4096);
++ if (addr == NULL)
++ return -ENOMEM;
++
+ /* Read HW Rev First */
+ func_class = READ_REG32(addr + LBA_FCLASS);
+
+diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
+index cf91cb024be30..d4dc7f6addf26 100644
+--- a/drivers/parisc/led.c
++++ b/drivers/parisc/led.c
+@@ -56,8 +56,8 @@
+ static int led_type __read_mostly = -1;
+ static unsigned char lastleds; /* LED state from most recent update */
+ static unsigned int led_heartbeat __read_mostly = 1;
+-static unsigned int led_diskio __read_mostly = 1;
+-static unsigned int led_lanrxtx __read_mostly = 1;
++static unsigned int led_diskio __read_mostly;
++static unsigned int led_lanrxtx __read_mostly;
+ static char lcd_text[32] __read_mostly;
+ static char lcd_text_default[32] __read_mostly;
+ static int lcd_no_led_support __read_mostly = 0; /* KittyHawk doesn't support LED on its LCD */
+@@ -137,6 +137,9 @@ static int start_task(void)
+
+ /* Create the work queue and queue the LED task */
+ led_wq = create_singlethread_workqueue("led_wq");
++ if (!led_wq)
++ return -ENOMEM;
++
+ queue_delayed_work(led_wq, &led_task, 0);
+
+ return 0;
+diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
+index e090978518f1a..4760f82def6ec 100644
+--- a/drivers/parisc/pdc_stable.c
++++ b/drivers/parisc/pdc_stable.c
+@@ -979,8 +979,10 @@ pdcs_register_pathentries(void)
+ entry->kobj.kset = paths_kset;
+ err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL,
+ "%s", entry->name);
+- if (err)
++ if (err) {
++ kobject_put(&entry->kobj);
+ return err;
++ }
+
+ /* kobject is now registered */
+ write_lock(&entry->rw_lock);
+diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
+index 456776bd8ee66..6f5e5f0230d39 100644
+--- a/drivers/parisc/power.c
++++ b/drivers/parisc/power.c
+@@ -37,7 +37,6 @@
+ #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+-#include <linux/notifier.h>
+ #include <linux/panic_notifier.h>
+ #include <linux/reboot.h>
+ #include <linux/sched/signal.h>
+@@ -175,16 +174,21 @@ static void powerfail_interrupt(int code, void *x)
+
+
+
+-/* parisc_panic_event() is called by the panic handler.
+- * As soon as a panic occurs, our tasklets above will not be
+- * executed any longer. This function then re-enables the
+- * soft-power switch and allows the user to switch off the system
++/*
++ * parisc_panic_event() is called by the panic handler.
++ *
++ * As soon as a panic occurs, our tasklets above will not
++ * be executed any longer. This function then re-enables
++ * the soft-power switch and allows the user to switch off
++ * the system. We rely in pdc_soft_power_button_panic()
++ * since this version spin_trylocks (instead of regular
++ * spinlock), preventing deadlocks on panic path.
+ */
+ static int parisc_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+ {
+ /* re-enable the soft-power switch */
+- pdc_soft_power_button(0);
++ pdc_soft_power_button_panic(0);
+ return NOTIFY_DONE;
+ }
+
+diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
+index e60690d38d677..374b9199878d4 100644
+--- a/drivers/parisc/sba_iommu.c
++++ b/drivers/parisc/sba_iommu.c
+@@ -1047,7 +1047,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
+ spin_unlock_irqrestore(&ioc->res_lock, flags);
+ #endif
+
+- while (sg_dma_len(sglist) && nents--) {
++ while (nents && sg_dma_len(sglist)) {
+
+ sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist),
+ direction, 0);
+@@ -1056,6 +1056,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
+ ioc->usingle_calls--; /* kluge since call is unmap_sg() */
+ #endif
+ ++sglist;
++ nents--;
+ }
+
+ DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
+diff --git a/drivers/parisc/wax.c b/drivers/parisc/wax.c
+index 5b6df15162354..73a2b01f8d9ca 100644
+--- a/drivers/parisc/wax.c
++++ b/drivers/parisc/wax.c
+@@ -68,7 +68,6 @@ static int __init wax_init_chip(struct parisc_device *dev)
+ {
+ struct gsc_asic *wax;
+ struct parisc_device *parent;
+- struct gsc_irq gsc_irq;
+ int ret;
+
+ wax = kzalloc(sizeof(*wax), GFP_KERNEL);
+@@ -85,7 +84,7 @@ static int __init wax_init_chip(struct parisc_device *dev)
+ wax_init_irq(wax);
+
+ /* the IRQ wax should use */
+- dev->irq = gsc_claim_irq(&gsc_irq, WAX_GSC_IRQ);
++ dev->irq = gsc_claim_irq(&wax->gsc_irq, WAX_GSC_IRQ);
+ if (dev->irq < 0) {
+ printk(KERN_ERR "%s(): cannot get GSC irq\n",
+ __func__);
+@@ -93,9 +92,9 @@ static int __init wax_init_chip(struct parisc_device *dev)
+ return -EBUSY;
+ }
+
+- wax->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data;
++ wax->eim = ((u32) wax->gsc_irq.txn_addr) | wax->gsc_irq.txn_data;
+
+- ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "wax", wax);
++ ret = request_irq(wax->gsc_irq.irq, gsc_asic_intr, 0, "wax", wax);
+ if (ret < 0) {
+ kfree(wax);
+ return ret;
+diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
+index eda4ded4d5e52..925be41eeebec 100644
+--- a/drivers/parport/parport_pc.c
++++ b/drivers/parport/parport_pc.c
+@@ -468,7 +468,7 @@ static size_t parport_pc_fifo_write_block_pio(struct parport *port,
+ const unsigned char *bufp = buf;
+ size_t left = length;
+ unsigned long expire = jiffies + port->physport->cad->timeout;
+- const int fifo = FIFO(port);
++ const unsigned long fifo = FIFO(port);
+ int poll_for = 8; /* 80 usecs */
+ const struct parport_pc_private *priv = port->physport->private_data;
+ const int fifo_depth = priv->fifo_depth;
+diff --git a/drivers/pci/access.c b/drivers/pci/access.c
+index 46935695cfb90..8d0d1f61c650d 100644
+--- a/drivers/pci/access.c
++++ b/drivers/pci/access.c
+@@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn,
+ * write happen to have any RW1C (write-one-to-clear) bits set, we
+ * just inadvertently cleared something we shouldn't have.
+ */
+- dev_warn_ratelimited(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n",
+- size, pci_domain_nr(bus), bus->number,
+- PCI_SLOT(devfn), PCI_FUNC(devfn), where);
++ if (!bus->unsafe_warn) {
++ dev_warn(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n",
++ size, pci_domain_nr(bus), bus->number,
++ PCI_SLOT(devfn), PCI_FUNC(devfn), where);
++ bus->unsafe_warn = 1;
++ }
+
+ mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
+ tmp = readl(addr) & mask;
+diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
+index 3cef835b375fd..feafa378bf8ea 100644
+--- a/drivers/pci/bus.c
++++ b/drivers/pci/bus.c
+@@ -76,6 +76,27 @@ struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n)
+ }
+ EXPORT_SYMBOL_GPL(pci_bus_resource_n);
+
++void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res)
++{
++ struct pci_bus_resource *bus_res, *tmp;
++ int i;
++
++ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
++ if (bus->resource[i] == res) {
++ bus->resource[i] = NULL;
++ return;
++ }
++ }
++
++ list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
++ if (bus_res->res == res) {
++ list_del(&bus_res->list);
++ kfree(bus_res);
++ return;
++ }
++ }
++}
++
+ void pci_bus_remove_resources(struct pci_bus *bus)
+ {
+ int i;
+diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c
+index ffb176d288cd9..918e11082e6a7 100644
+--- a/drivers/pci/controller/cadence/pci-j721e.c
++++ b/drivers/pci/controller/cadence/pci-j721e.c
+@@ -474,7 +474,7 @@ static int j721e_pcie_probe(struct platform_device *pdev)
+ ret = clk_prepare_enable(clk);
+ if (ret) {
+ dev_err(dev, "failed to enable pcie_refclk\n");
+- goto err_get_sync;
++ goto err_pcie_setup;
+ }
+ pcie->refclk = clk;
+
+diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c
+index 88e05b9c2e5b8..18e32b8ffd5ef 100644
+--- a/drivers/pci/controller/cadence/pcie-cadence-ep.c
++++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c
+@@ -187,8 +187,7 @@ static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+ struct cdns_pcie *pcie = &ep->pcie;
+ u32 r;
+
+- r = find_first_zero_bit(&ep->ob_region_map,
+- sizeof(ep->ob_region_map) * BITS_PER_LONG);
++ r = find_first_zero_bit(&ep->ob_region_map, BITS_PER_LONG);
+ if (r >= ep->max_regions - 1) {
+ dev_err(&epc->dev, "no free outbound region\n");
+ return -EINVAL;
+diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c b/drivers/pci/controller/cadence/pcie-cadence-host.c
+index fb96d37a135c1..4d8d15ac51ef4 100644
+--- a/drivers/pci/controller/cadence/pcie-cadence-host.c
++++ b/drivers/pci/controller/cadence/pcie-cadence-host.c
+@@ -12,6 +12,8 @@
+
+ #include "pcie-cadence.h"
+
++#define LINK_RETRAIN_TIMEOUT HZ
++
+ static u64 bar_max_size[] = {
+ [RP_BAR0] = _ULL(128 * SZ_2G),
+ [RP_BAR1] = SZ_2G,
+@@ -77,6 +79,27 @@ static struct pci_ops cdns_pcie_host_ops = {
+ .write = pci_generic_config_write,
+ };
+
++static int cdns_pcie_host_training_complete(struct cdns_pcie *pcie)
++{
++ u32 pcie_cap_off = CDNS_PCIE_RP_CAP_OFFSET;
++ unsigned long end_jiffies;
++ u16 lnk_stat;
++
++ /* Wait for link training to complete. Exit after timeout. */
++ end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
++ do {
++ lnk_stat = cdns_pcie_rp_readw(pcie, pcie_cap_off + PCI_EXP_LNKSTA);
++ if (!(lnk_stat & PCI_EXP_LNKSTA_LT))
++ break;
++ usleep_range(0, 1000);
++ } while (time_before(jiffies, end_jiffies));
++
++ if (!(lnk_stat & PCI_EXP_LNKSTA_LT))
++ return 0;
++
++ return -ETIMEDOUT;
++}
++
+ static int cdns_pcie_host_wait_for_link(struct cdns_pcie *pcie)
+ {
+ struct device *dev = pcie->dev;
+@@ -118,6 +141,10 @@ static int cdns_pcie_retrain(struct cdns_pcie *pcie)
+ cdns_pcie_rp_writew(pcie, pcie_cap_off + PCI_EXP_LNKCTL,
+ lnk_ctl);
+
++ ret = cdns_pcie_host_training_complete(pcie);
++ if (ret)
++ return ret;
++
+ ret = cdns_pcie_host_wait_for_link(pcie);
+ }
+ return ret;
+diff --git a/drivers/pci/controller/cadence/pcie-cadence-plat.c b/drivers/pci/controller/cadence/pcie-cadence-plat.c
+index 5fee0f89ab594..a224afadbcc00 100644
+--- a/drivers/pci/controller/cadence/pcie-cadence-plat.c
++++ b/drivers/pci/controller/cadence/pcie-cadence-plat.c
+@@ -127,6 +127,8 @@ static int cdns_plat_pcie_probe(struct platform_device *pdev)
+ goto err_init;
+ }
+
++ return 0;
++
+ err_init:
+ err_get_sync:
+ pm_runtime_put_sync(dev);
+diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
+index 80fc98acf097f..6a3336f2105b8 100644
+--- a/drivers/pci/controller/dwc/pci-imx6.c
++++ b/drivers/pci/controller/dwc/pci-imx6.c
+@@ -403,6 +403,11 @@ static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
+ dev_err(dev, "failed to disable vpcie regulator: %d\n",
+ ret);
+ }
++
++ /* Some boards don't have PCIe reset GPIO. */
++ if (gpio_is_valid(imx6_pcie->reset_gpio))
++ gpio_set_value_cansleep(imx6_pcie->reset_gpio,
++ imx6_pcie->gpio_active_high);
+ }
+
+ static unsigned int imx6_pcie_grp_offset(const struct imx6_pcie *imx6_pcie)
+@@ -525,15 +530,6 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie)
+ /* allow the clocks to stabilize */
+ usleep_range(200, 500);
+
+- /* Some boards don't have PCIe reset GPIO. */
+- if (gpio_is_valid(imx6_pcie->reset_gpio)) {
+- gpio_set_value_cansleep(imx6_pcie->reset_gpio,
+- imx6_pcie->gpio_active_high);
+- msleep(100);
+- gpio_set_value_cansleep(imx6_pcie->reset_gpio,
+- !imx6_pcie->gpio_active_high);
+- }
+-
+ switch (imx6_pcie->drvdata->variant) {
+ case IMX8MQ:
+ reset_control_deassert(imx6_pcie->pciephy_reset);
+@@ -576,6 +572,15 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie)
+ break;
+ }
+
++ /* Some boards don't have PCIe reset GPIO. */
++ if (gpio_is_valid(imx6_pcie->reset_gpio)) {
++ msleep(100);
++ gpio_set_value_cansleep(imx6_pcie->reset_gpio,
++ !imx6_pcie->gpio_active_high);
++ /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */
++ msleep(100);
++ }
++
+ return;
+
+ err_ref_clk:
+@@ -779,9 +784,7 @@ static int imx6_pcie_start_link(struct dw_pcie *pci)
+ /* Start LTSSM. */
+ imx6_pcie_ltssm_enable(dev);
+
+- ret = dw_pcie_wait_for_link(pci);
+- if (ret)
+- goto err_reset_phy;
++ dw_pcie_wait_for_link(pci);
+
+ if (pci->link_gen == 2) {
+ /* Allow Gen2 mode after the link is up. */
+@@ -817,11 +820,7 @@ static int imx6_pcie_start_link(struct dw_pcie *pci)
+ }
+
+ /* Make sure link training is finished as well! */
+- ret = dw_pcie_wait_for_link(pci);
+- if (ret) {
+- dev_err(dev, "Failed to bring link up!\n");
+- goto err_reset_phy;
+- }
++ dw_pcie_wait_for_link(pci);
+ } else {
+ dev_info(dev, "Link: Gen2 disabled\n");
+ }
+@@ -1259,6 +1258,13 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, 0xabcd,
+ static int __init imx6_pcie_init(void)
+ {
+ #ifdef CONFIG_ARM
++ struct device_node *np;
++
++ np = of_find_matching_node(NULL, imx6_pcie_of_match);
++ if (!np)
++ return -ENODEV;
++ of_node_put(np);
++
+ /*
+ * Since probe() can be deferred we need to make sure that
+ * hook_fault_code is not called after __init memory is freed
+diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c
+index 39f4664bd84c7..dd7d74fecc48e 100644
+--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
++++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
+@@ -18,6 +18,20 @@
+
+ #include "pcie-designware.h"
+
++#define PEX_PF0_CONFIG 0xC0014
++#define PEX_PF0_CFG_READY BIT(0)
++
++/* PEX PFa PCIE PME and message interrupt registers*/
++#define PEX_PF0_PME_MES_DR 0xC0020
++#define PEX_PF0_PME_MES_DR_LUD BIT(7)
++#define PEX_PF0_PME_MES_DR_LDD BIT(9)
++#define PEX_PF0_PME_MES_DR_HRD BIT(10)
++
++#define PEX_PF0_PME_MES_IER 0xC0028
++#define PEX_PF0_PME_MES_IER_LUDIE BIT(7)
++#define PEX_PF0_PME_MES_IER_LDDIE BIT(9)
++#define PEX_PF0_PME_MES_IER_HRDIE BIT(10)
++
+ #define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev)
+
+ struct ls_pcie_ep_drvdata {
+@@ -30,16 +44,98 @@ struct ls_pcie_ep {
+ struct dw_pcie *pci;
+ struct pci_epc_features *ls_epc;
+ const struct ls_pcie_ep_drvdata *drvdata;
++ int irq;
++ u32 lnkcap;
++ bool big_endian;
+ };
+
+-static int ls_pcie_establish_link(struct dw_pcie *pci)
++static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset)
+ {
+- return 0;
++ struct dw_pcie *pci = pcie->pci;
++
++ if (pcie->big_endian)
++ return ioread32be(pci->dbi_base + offset);
++ else
++ return ioread32(pci->dbi_base + offset);
+ }
+
+-static const struct dw_pcie_ops dw_ls_pcie_ep_ops = {
+- .start_link = ls_pcie_establish_link,
+-};
++static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value)
++{
++ struct dw_pcie *pci = pcie->pci;
++
++ if (pcie->big_endian)
++ iowrite32be(value, pci->dbi_base + offset);
++ else
++ iowrite32(value, pci->dbi_base + offset);
++}
++
++static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id)
++{
++ struct ls_pcie_ep *pcie = dev_id;
++ struct dw_pcie *pci = pcie->pci;
++ u32 val, cfg;
++ u8 offset;
++
++ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR);
++ ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val);
++
++ if (!val)
++ return IRQ_NONE;
++
++ if (val & PEX_PF0_PME_MES_DR_LUD) {
++
++ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++
++ /*
++ * The values of the Maximum Link Width and Supported Link
++ * Speed from the Link Capabilities Register will be lost
++ * during link down or hot reset. Restore initial value
++ * that configured by the Reset Configuration Word (RCW).
++ */
++ dw_pcie_dbi_ro_wr_en(pci);
++ dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap);
++ dw_pcie_dbi_ro_wr_dis(pci);
++
++ cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG);
++ cfg |= PEX_PF0_CFG_READY;
++ ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg);
++ dw_pcie_ep_linkup(&pci->ep);
++
++ dev_dbg(pci->dev, "Link up\n");
++ } else if (val & PEX_PF0_PME_MES_DR_LDD) {
++ dev_dbg(pci->dev, "Link down\n");
++ } else if (val & PEX_PF0_PME_MES_DR_HRD) {
++ dev_dbg(pci->dev, "Hot reset\n");
++ }
++
++ return IRQ_HANDLED;
++}
++
++static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie,
++ struct platform_device *pdev)
++{
++ u32 val;
++ int ret;
++
++ pcie->irq = platform_get_irq_byname(pdev, "pme");
++ if (pcie->irq < 0)
++ return pcie->irq;
++
++ ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler,
++ IRQF_SHARED, pdev->name, pcie);
++ if (ret) {
++ dev_err(&pdev->dev, "Can't register PCIe IRQ\n");
++ return ret;
++ }
++
++ /* Enable interrupts */
++ val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER);
++ val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE |
++ PEX_PF0_PME_MES_IER_LUDIE;
++ ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val);
++
++ return 0;
++}
+
+ static const struct pci_epc_features*
+ ls_pcie_ep_get_features(struct dw_pcie_ep *ep)
+@@ -106,19 +202,16 @@ static const struct dw_pcie_ep_ops ls_pcie_ep_ops = {
+
+ static const struct ls_pcie_ep_drvdata ls1_ep_drvdata = {
+ .ops = &ls_pcie_ep_ops,
+- .dw_pcie_ops = &dw_ls_pcie_ep_ops,
+ };
+
+ static const struct ls_pcie_ep_drvdata ls2_ep_drvdata = {
+ .func_offset = 0x20000,
+ .ops = &ls_pcie_ep_ops,
+- .dw_pcie_ops = &dw_ls_pcie_ep_ops,
+ };
+
+ static const struct ls_pcie_ep_drvdata lx2_ep_drvdata = {
+ .func_offset = 0x8000,
+ .ops = &ls_pcie_ep_ops,
+- .dw_pcie_ops = &dw_ls_pcie_ep_ops,
+ };
+
+ static const struct of_device_id ls_pcie_ep_of_match[] = {
+@@ -136,6 +229,8 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
+ struct ls_pcie_ep *pcie;
+ struct pci_epc_features *ls_epc;
+ struct resource *dbi_base;
++ u8 offset;
++ int ret;
+
+ pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
+ if (!pcie)
+@@ -155,6 +250,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
+ pci->ops = pcie->drvdata->dw_pcie_ops;
+
+ ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4);
++ ls_epc->linkup_notifier = true;
+
+ pcie->pci = pci;
+ pcie->ls_epc = ls_epc;
+@@ -166,9 +262,18 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
+
+ pci->ep.ops = &ls_pcie_ep_ops;
+
++ pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
++
+ platform_set_drvdata(pdev, pcie);
+
+- return dw_pcie_ep_init(&pci->ep);
++ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++ pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP);
++
++ ret = dw_pcie_ep_init(&pci->ep);
++ if (ret)
++ return ret;
++
++ return ls_pcie_ep_interrupt_init(pcie, pdev);
+ }
+
+ static struct platform_driver ls_pcie_ep_driver = {
+diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
+index 998b698f40858..5023b7f704d2f 100644
+--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
++++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
+@@ -434,8 +434,7 @@ static void dw_pcie_ep_stop(struct pci_epc *epc)
+ struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+- if (pci->ops && pci->ops->stop_link)
+- pci->ops->stop_link(pci);
++ dw_pcie_stop_link(pci);
+ }
+
+ static int dw_pcie_ep_start(struct pci_epc *epc)
+@@ -443,10 +442,7 @@ static int dw_pcie_ep_start(struct pci_epc *epc)
+ struct dw_pcie_ep *ep = epc_get_drvdata(epc);
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+- if (!pci->ops || !pci->ops->start_link)
+- return -EINVAL;
+-
+- return pci->ops->start_link(pci);
++ return dw_pcie_start_link(pci);
+ }
+
+ static const struct pci_epc_features*
+@@ -777,8 +773,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
+ ep->msi_mem = pci_epc_mem_alloc_addr(epc, &ep->msi_mem_phys,
+ epc->mem->window.page_size);
+ if (!ep->msi_mem) {
++ ret = -ENOMEM;
+ dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n");
+- return -ENOMEM;
++ goto err_exit_epc_mem;
+ }
+
+ if (ep->ops->get_features) {
+@@ -787,6 +784,19 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
+ return 0;
+ }
+
+- return dw_pcie_ep_init_complete(ep);
++ ret = dw_pcie_ep_init_complete(ep);
++ if (ret)
++ goto err_free_epc_mem;
++
++ return 0;
++
++err_free_epc_mem:
++ pci_epc_mem_free_addr(epc, ep->msi_mem_phys, ep->msi_mem,
++ epc->mem->window.page_size);
++
++err_exit_epc_mem:
++ pci_epc_mem_exit(epc);
++
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(dw_pcie_ep_init);
+diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
+index d1d9b8344ec9c..f561e87cd5f6e 100644
+--- a/drivers/pci/controller/dwc/pcie-designware-host.c
++++ b/drivers/pci/controller/dwc/pcie-designware-host.c
+@@ -380,7 +380,8 @@ int dw_pcie_host_init(struct pcie_port *pp)
+ sizeof(pp->msi_msg),
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+- if (dma_mapping_error(pci->dev, pp->msi_data)) {
++ ret = dma_mapping_error(pci->dev, pp->msi_data);
++ if (ret) {
+ dev_err(pci->dev, "Failed to map MSI data\n");
+ pp->msi_data = 0;
+ goto err_free_msi;
+@@ -401,8 +402,8 @@ int dw_pcie_host_init(struct pcie_port *pp)
+
+ dw_pcie_setup_rc(pp);
+
+- if (!dw_pcie_link_up(pci) && pci->ops && pci->ops->start_link) {
+- ret = pci->ops->start_link(pci);
++ if (!dw_pcie_link_up(pci)) {
++ ret = dw_pcie_start_link(pci);
+ if (ret)
+ goto err_free_msi;
+ }
+@@ -413,8 +414,13 @@ int dw_pcie_host_init(struct pcie_port *pp)
+ bridge->sysdata = pp;
+
+ ret = pci_host_probe(bridge);
+- if (!ret)
+- return 0;
++ if (ret)
++ goto err_stop_link;
++
++ return 0;
++
++err_stop_link:
++ dw_pcie_stop_link(pci);
+
+ err_free_msi:
+ if (pp->has_msi_ctrl)
+@@ -425,8 +431,13 @@ EXPORT_SYMBOL_GPL(dw_pcie_host_init);
+
+ void dw_pcie_host_deinit(struct pcie_port *pp)
+ {
++ struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
++
+ pci_stop_root_bus(pp->bridge->bus);
+ pci_remove_root_bus(pp->bridge->bus);
++
++ dw_pcie_stop_link(pci);
++
+ if (pp->has_msi_ctrl)
+ dw_pcie_free_msi(pp);
+ }
+@@ -523,7 +534,6 @@ static struct pci_ops dw_pcie_ops = {
+
+ void dw_pcie_setup_rc(struct pcie_port *pp)
+ {
+- int i;
+ u32 val, ctrl, num_ctrls;
+ struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+
+@@ -575,19 +585,22 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
+ PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
+ dw_pcie_writel_dbi(pci, PCI_COMMAND, val);
+
+- /* Ensure all outbound windows are disabled so there are multiple matches */
+- for (i = 0; i < pci->num_ob_windows; i++)
+- dw_pcie_disable_atu(pci, i, DW_PCIE_REGION_OUTBOUND);
+-
+ /*
+ * If the platform provides its own child bus config accesses, it means
+ * the platform uses its own address translation component rather than
+ * ATU, so we should not program the ATU here.
+ */
+ if (pp->bridge->child_ops == &dw_child_pcie_ops) {
+- int atu_idx = 0;
++ int i, atu_idx = 0;
+ struct resource_entry *entry;
+
++ /*
++ * Disable all outbound windows to make sure a transaction
++ * can't match multiple windows.
++ */
++ for (i = 0; i < pci->num_ob_windows; i++)
++ dw_pcie_disable_atu(pci, i, DW_PCIE_REGION_OUTBOUND);
++
+ /* Get last memory resource entry */
+ resource_list_for_each_entry(entry, &pp->bridge->windows) {
+ if (resource_type(entry->res) != IORESOURCE_MEM)
+diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
+index 8851eb161a0eb..107318ad22817 100644
+--- a/drivers/pci/controller/dwc/pcie-designware-plat.c
++++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
+@@ -36,15 +36,6 @@ static const struct of_device_id dw_plat_pcie_of_match[];
+ static const struct dw_pcie_host_ops dw_plat_pcie_host_ops = {
+ };
+
+-static int dw_plat_pcie_establish_link(struct dw_pcie *pci)
+-{
+- return 0;
+-}
+-
+-static const struct dw_pcie_ops dw_pcie_ops = {
+- .start_link = dw_plat_pcie_establish_link,
+-};
+-
+ static void dw_plat_pcie_ep_init(struct dw_pcie_ep *ep)
+ {
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+@@ -142,7 +133,6 @@ static int dw_plat_pcie_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ pci->dev = dev;
+- pci->ops = &dw_pcie_ops;
+
+ dw_plat_pcie->pci = pci;
+ dw_plat_pcie->mode = mode;
+diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
+index a945f0c0e73dc..00972a7bc9768 100644
+--- a/drivers/pci/controller/dwc/pcie-designware.c
++++ b/drivers/pci/controller/dwc/pcie-designware.c
+@@ -287,8 +287,8 @@ static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, u8 func_no,
+ dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_UPPER_TARGET,
+ upper_32_bits(pci_addr));
+ val = type | PCIE_ATU_FUNC_NUM(func_no);
+- val = upper_32_bits(size - 1) ?
+- val | PCIE_ATU_INCREASE_REGION_SIZE : val;
++ if (upper_32_bits(limit_addr) > upper_32_bits(cpu_addr))
++ val |= PCIE_ATU_INCREASE_REGION_SIZE;
+ if (pci->version == 0x490A)
+ val = dw_pcie_enable_ecrc(val);
+ dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL1, val);
+@@ -315,6 +315,7 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no,
+ u64 pci_addr, u64 size)
+ {
+ u32 retries, val;
++ u64 limit_addr;
+
+ if (pci->ops && pci->ops->cpu_addr_fixup)
+ cpu_addr = pci->ops->cpu_addr_fixup(pci, cpu_addr);
+@@ -325,6 +326,8 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no,
+ return;
+ }
+
++ limit_addr = cpu_addr + size - 1;
++
+ dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT,
+ PCIE_ATU_REGION_OUTBOUND | index);
+ dw_pcie_writel_dbi(pci, PCIE_ATU_LOWER_BASE,
+@@ -332,17 +335,18 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no,
+ dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_BASE,
+ upper_32_bits(cpu_addr));
+ dw_pcie_writel_dbi(pci, PCIE_ATU_LIMIT,
+- lower_32_bits(cpu_addr + size - 1));
++ lower_32_bits(limit_addr));
+ if (pci->version >= 0x460A)
+ dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_LIMIT,
+- upper_32_bits(cpu_addr + size - 1));
++ upper_32_bits(limit_addr));
+ dw_pcie_writel_dbi(pci, PCIE_ATU_LOWER_TARGET,
+ lower_32_bits(pci_addr));
+ dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_TARGET,
+ upper_32_bits(pci_addr));
+ val = type | PCIE_ATU_FUNC_NUM(func_no);
+- val = ((upper_32_bits(size - 1)) && (pci->version >= 0x460A)) ?
+- val | PCIE_ATU_INCREASE_REGION_SIZE : val;
++ if (upper_32_bits(limit_addr) > upper_32_bits(cpu_addr) &&
++ pci->version >= 0x460A)
++ val |= PCIE_ATU_INCREASE_REGION_SIZE;
+ if (pci->version == 0x490A)
+ val = dw_pcie_enable_ecrc(val);
+ dw_pcie_writel_dbi(pci, PCIE_ATU_CR1, val);
+@@ -491,7 +495,7 @@ int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
+ void dw_pcie_disable_atu(struct dw_pcie *pci, int index,
+ enum dw_pcie_region_type type)
+ {
+- int region;
++ u32 region;
+
+ switch (type) {
+ case DW_PCIE_REGION_INBOUND:
+@@ -504,8 +508,18 @@ void dw_pcie_disable_atu(struct dw_pcie *pci, int index,
+ return;
+ }
+
+- dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, region | index);
+- dw_pcie_writel_dbi(pci, PCIE_ATU_CR2, ~(u32)PCIE_ATU_ENABLE);
++ if (pci->iatu_unroll_enabled) {
++ if (region == PCIE_ATU_REGION_INBOUND) {
++ dw_pcie_writel_ib_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL2,
++ ~(u32)PCIE_ATU_ENABLE);
++ } else {
++ dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL2,
++ ~(u32)PCIE_ATU_ENABLE);
++ }
++ } else {
++ dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, region | index);
++ dw_pcie_writel_dbi(pci, PCIE_ATU_CR2, ~(u32)PCIE_ATU_ENABLE);
++ }
+ }
+
+ int dw_pcie_wait_for_link(struct dw_pcie *pci)
+@@ -671,10 +685,11 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci)
+ if (!pci->atu_base) {
+ struct resource *res =
+ platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu");
+- if (res)
++ if (res) {
+ pci->atu_size = resource_size(res);
+- pci->atu_base = devm_ioremap_resource(dev, res);
+- if (IS_ERR(pci->atu_base))
++ pci->atu_base = devm_ioremap_resource(dev, res);
++ }
++ if (!pci->atu_base || IS_ERR(pci->atu_base))
+ pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET;
+ }
+
+@@ -715,7 +730,7 @@ void dw_pcie_setup(struct dw_pcie *pci)
+ if (pci->n_fts[1]) {
+ val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+ val &= ~PORT_LOGIC_N_FTS_MASK;
+- val |= pci->n_fts[pci->link_gen - 1];
++ val |= pci->n_fts[1];
+ dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+ }
+
+@@ -724,6 +739,13 @@ void dw_pcie_setup(struct dw_pcie *pci)
+ val |= PORT_LINK_DLL_LINK_EN;
+ dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
+
++ if (of_property_read_bool(np, "snps,enable-cdm-check")) {
++ val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
++ val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
++ PCIE_PL_CHK_REG_CHK_REG_START;
++ dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
++ }
++
+ of_property_read_u32(np, "num-lanes", &pci->num_lanes);
+ if (!pci->num_lanes) {
+ dev_dbg(pci->dev, "Using h/w default number of lanes\n");
+@@ -770,11 +792,4 @@ void dw_pcie_setup(struct dw_pcie *pci)
+ break;
+ }
+ dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+-
+- if (of_property_read_bool(np, "snps,enable-cdm-check")) {
+- val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+- val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
+- PCIE_PL_CHK_REG_CHK_REG_START;
+- dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
+- }
+ }
+diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
+index 7d6e9b7576be5..8ba2392926346 100644
+--- a/drivers/pci/controller/dwc/pcie-designware.h
++++ b/drivers/pci/controller/dwc/pcie-designware.h
+@@ -365,6 +365,20 @@ static inline void dw_pcie_dbi_ro_wr_dis(struct dw_pcie *pci)
+ dw_pcie_writel_dbi(pci, reg, val);
+ }
+
++static inline int dw_pcie_start_link(struct dw_pcie *pci)
++{
++ if (pci->ops && pci->ops->start_link)
++ return pci->ops->start_link(pci);
++
++ return 0;
++}
++
++static inline void dw_pcie_stop_link(struct dw_pcie *pci)
++{
++ if (pci->ops && pci->ops->stop_link)
++ pci->ops->stop_link(pci);
++}
++
+ #ifdef CONFIG_PCIE_DW_HOST
+ irqreturn_t dw_handle_msi_irq(struct pcie_port *pp);
+ void dw_pcie_setup_rc(struct pcie_port *pp);
+diff --git a/drivers/pci/controller/dwc/pcie-fu740.c b/drivers/pci/controller/dwc/pcie-fu740.c
+index 00cde9a248b5a..78d002be4f821 100644
+--- a/drivers/pci/controller/dwc/pcie-fu740.c
++++ b/drivers/pci/controller/dwc/pcie-fu740.c
+@@ -181,10 +181,59 @@ static int fu740_pcie_start_link(struct dw_pcie *pci)
+ {
+ struct device *dev = pci->dev;
+ struct fu740_pcie *afp = dev_get_drvdata(dev);
++ u8 cap_exp = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++ int ret;
++ u32 orig, tmp;
++
++ /*
++ * Force 2.5GT/s when starting the link, due to some devices not
++ * probing at higher speeds. This happens with the PCIe switch
++ * on the Unmatched board when U-Boot has not initialised the PCIe.
++ * The fix in U-Boot is to force 2.5GT/s, which then gets cleared
++ * by the soft reset done by this driver.
++ */
++ dev_dbg(dev, "cap_exp at %x\n", cap_exp);
++ dw_pcie_dbi_ro_wr_en(pci);
++
++ tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP);
++ orig = tmp & PCI_EXP_LNKCAP_SLS;
++ tmp &= ~PCI_EXP_LNKCAP_SLS;
++ tmp |= PCI_EXP_LNKCAP_SLS_2_5GB;
++ dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp);
+
+ /* Enable LTSSM */
+ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_LTSSM_ENABLE);
+- return 0;
++
++ ret = dw_pcie_wait_for_link(pci);
++ if (ret) {
++ dev_err(dev, "error: link did not start\n");
++ goto err;
++ }
++
++ tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP);
++ if ((tmp & PCI_EXP_LNKCAP_SLS) != orig) {
++ dev_dbg(dev, "changing speed back to original\n");
++
++ tmp &= ~PCI_EXP_LNKCAP_SLS;
++ tmp |= orig;
++ dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp);
++
++ tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
++ tmp |= PORT_LOGIC_SPEED_CHANGE;
++ dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp);
++
++ ret = dw_pcie_wait_for_link(pci);
++ if (ret) {
++ dev_err(dev, "error: link did not start at new speed\n");
++ goto err;
++ }
++ }
++
++ ret = 0;
++err:
++ WARN_ON(ret); /* we assume that errors will be very rare */
++ dw_pcie_dbi_ro_wr_dis(pci);
++ return ret;
+ }
+
+ static int fu740_pcie_host_init(struct pcie_port *pp)
+diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
+index 8a7a300163e5c..9402fe8dddabf 100644
+--- a/drivers/pci/controller/dwc/pcie-qcom.c
++++ b/drivers/pci/controller/dwc/pcie-qcom.c
+@@ -325,8 +325,6 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie)
+ reset_control_assert(res->ext_reset);
+ reset_control_assert(res->phy_reset);
+
+- writel(1, pcie->parf + PCIE20_PARF_PHY_CTRL);
+-
+ ret = regulator_bulk_enable(ARRAY_SIZE(res->supplies), res->supplies);
+ if (ret < 0) {
+ dev_err(dev, "cannot enable regulators\n");
+@@ -369,15 +367,15 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie)
+ goto err_deassert_axi;
+ }
+
+- ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
+- if (ret)
+- goto err_clks;
+-
+ /* enable PCIe clocks and resets */
+ val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL);
+ val &= ~BIT(0);
+ writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL);
+
++ ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
++ if (ret)
++ goto err_clks;
++
+ if (of_device_is_compatible(node, "qcom,pcie-ipq8064") ||
+ of_device_is_compatible(node, "qcom,pcie-ipq8064-v2")) {
+ writel(PCS_DEEMPH_TX_DEEMPH_GEN1(24) |
+@@ -1026,9 +1024,7 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+ struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+ struct dw_pcie *pci = pcie->pci;
+ struct device *dev = pci->dev;
+- u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+ int i, ret;
+- u32 val;
+
+ for (i = 0; i < ARRAY_SIZE(res->rst); i++) {
+ ret = reset_control_assert(res->rst[i]);
+@@ -1085,6 +1081,33 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+ goto err_clk_aux;
+ }
+
++ return 0;
++
++err_clk_aux:
++ clk_disable_unprepare(res->ahb_clk);
++err_clk_ahb:
++ clk_disable_unprepare(res->axi_s_clk);
++err_clk_axi_s:
++ clk_disable_unprepare(res->axi_m_clk);
++err_clk_axi_m:
++ clk_disable_unprepare(res->iface);
++err_clk_iface:
++ /*
++ * Not checking for failure, will anyway return
++ * the original failure in 'ret'.
++ */
++ for (i = 0; i < ARRAY_SIZE(res->rst); i++)
++ reset_control_assert(res->rst[i]);
++
++ return ret;
++}
++
++static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie)
++{
++ struct dw_pcie *pci = pcie->pci;
++ u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++ u32 val;
++
+ writel(SLV_ADDR_SPACE_SZ,
+ pcie->parf + PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE);
+
+@@ -1111,25 +1134,9 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+ writel(PCI_EXP_DEVCTL2_COMP_TMOUT_DIS, pci->dbi_base + offset +
+ PCI_EXP_DEVCTL2);
+
+- return 0;
++ dw_pcie_dbi_ro_wr_dis(pci);
+
+-err_clk_aux:
+- clk_disable_unprepare(res->ahb_clk);
+-err_clk_ahb:
+- clk_disable_unprepare(res->axi_s_clk);
+-err_clk_axi_s:
+- clk_disable_unprepare(res->axi_m_clk);
+-err_clk_axi_m:
+- clk_disable_unprepare(res->iface);
+-err_clk_iface:
+- /*
+- * Not checking for failure, will anyway return
+- * the original failure in 'ret'.
+- */
+- for (i = 0; i < ARRAY_SIZE(res->rst); i++)
+- reset_control_assert(res->rst[i]);
+-
+- return ret;
++ return 0;
+ }
+
+ static int qcom_pcie_get_resources_2_7_0(struct qcom_pcie *pcie)
+@@ -1203,12 +1210,6 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
+ goto err_disable_clocks;
+ }
+
+- ret = clk_prepare_enable(res->pipe_clk);
+- if (ret) {
+- dev_err(dev, "cannot prepare/enable pipe clock\n");
+- goto err_disable_clocks;
+- }
+-
+ /* configure PCIe to RC mode */
+ writel(DEVICE_TYPE_RC, pcie->parf + PCIE20_PARF_DEVICE_TYPE);
+
+@@ -1229,11 +1230,9 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
+ val |= BIT(4);
+ writel(val, pcie->parf + PCIE20_PARF_MHI_CLOCK_RESET_CTRL);
+
+- if (IS_ENABLED(CONFIG_PCI_MSI)) {
+- val = readl(pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT);
+- val |= BIT(31);
+- writel(val, pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT);
+- }
++ val = readl(pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT_V2);
++ val |= BIT(31);
++ writel(val, pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT_V2);
+
+ return 0;
+ err_disable_clocks:
+@@ -1431,6 +1430,7 @@ static const struct qcom_pcie_ops ops_2_4_0 = {
+ static const struct qcom_pcie_ops ops_2_3_3 = {
+ .get_resources = qcom_pcie_get_resources_2_3_3,
+ .init = qcom_pcie_init_2_3_3,
++ .post_init = qcom_pcie_post_init_2_3_3,
+ .deinit = qcom_pcie_deinit_2_3_3,
+ .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+ };
+@@ -1521,22 +1521,21 @@ static int qcom_pcie_probe(struct platform_device *pdev)
+ pp->ops = &qcom_pcie_dw_ops;
+
+ ret = phy_init(pcie->phy);
+- if (ret) {
+- pm_runtime_disable(&pdev->dev);
++ if (ret)
+ goto err_pm_runtime_put;
+- }
+
+ platform_set_drvdata(pdev, pcie);
+
+ ret = dw_pcie_host_init(pp);
+ if (ret) {
+ dev_err(dev, "cannot initialize host\n");
+- pm_runtime_disable(&pdev->dev);
+- goto err_pm_runtime_put;
++ goto err_phy_exit;
+ }
+
+ return 0;
+
++err_phy_exit:
++ phy_exit(pcie->phy);
+ err_pm_runtime_put:
+ pm_runtime_put(dev);
+ pm_runtime_disable(dev);
+diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
+index 9049769130819..765abe0732282 100644
+--- a/drivers/pci/controller/dwc/pcie-tegra194.c
++++ b/drivers/pci/controller/dwc/pcie-tegra194.c
+@@ -239,6 +239,7 @@
+ #define EP_STATE_ENABLED 1
+
+ static const unsigned int pcie_gen_freq[] = {
++ GEN1_CORE_CLK_FREQ, /* PCI_EXP_LNKSTA_CLS == 0; undefined */
+ GEN1_CORE_CLK_FREQ,
+ GEN2_CORE_CLK_FREQ,
+ GEN3_CORE_CLK_FREQ,
+@@ -352,15 +353,14 @@ static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg)
+ struct tegra_pcie_dw *pcie = arg;
+ struct dw_pcie *pci = &pcie->pci;
+ struct pcie_port *pp = &pci->pp;
+- u32 val, tmp;
++ u32 val, status_l0, status_l1;
+ u16 val_w;
+
+- val = appl_readl(pcie, APPL_INTR_STATUS_L0);
+- if (val & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
+- val = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
+- if (val & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) {
+- appl_writel(pcie, val, APPL_INTR_STATUS_L1_0_0);
+-
++ status_l0 = appl_readl(pcie, APPL_INTR_STATUS_L0);
++ if (status_l0 & APPL_INTR_STATUS_L0_LINK_STATE_INT) {
++ status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0);
++ appl_writel(pcie, status_l1, APPL_INTR_STATUS_L1_0_0);
++ if (status_l1 & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) {
+ /* SBR & Surprise Link Down WAR */
+ val = appl_readl(pcie, APPL_CAR_RESET_OVRD);
+ val &= ~APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N;
+@@ -376,15 +376,15 @@ static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg)
+ }
+ }
+
+- if (val & APPL_INTR_STATUS_L0_INT_INT) {
+- val = appl_readl(pcie, APPL_INTR_STATUS_L1_8_0);
+- if (val & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) {
++ if (status_l0 & APPL_INTR_STATUS_L0_INT_INT) {
++ status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_8_0);
++ if (status_l1 & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) {
+ appl_writel(pcie,
+ APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS,
+ APPL_INTR_STATUS_L1_8_0);
+ apply_bad_link_workaround(pp);
+ }
+- if (val & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) {
++ if (status_l1 & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) {
+ appl_writel(pcie,
+ APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS,
+ APPL_INTR_STATUS_L1_8_0);
+@@ -396,25 +396,24 @@ static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg)
+ }
+ }
+
+- val = appl_readl(pcie, APPL_INTR_STATUS_L0);
+- if (val & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) {
+- val = appl_readl(pcie, APPL_INTR_STATUS_L1_18);
+- tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+- if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) {
++ if (status_l0 & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) {
++ status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_18);
++ val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
++ if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) {
+ dev_info(pci->dev, "CDM check complete\n");
+- tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPLETE;
++ val |= PCIE_PL_CHK_REG_CHK_REG_COMPLETE;
+ }
+- if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) {
++ if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) {
+ dev_err(pci->dev, "CDM comparison mismatch\n");
+- tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR;
++ val |= PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR;
+ }
+- if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) {
++ if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) {
+ dev_err(pci->dev, "CDM Logic error\n");
+- tmp |= PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR;
++ val |= PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR;
+ }
+- dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, tmp);
+- tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_ERR_ADDR);
+- dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", tmp);
++ dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
++ val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_ERR_ADDR);
++ dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", val);
+ }
+
+ return IRQ_HANDLED;
+@@ -454,7 +453,11 @@ static irqreturn_t tegra_pcie_ep_irq_thread(int irq, void *arg)
+
+ speed = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA) &
+ PCI_EXP_LNKSTA_CLS;
+- clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
++
++ if (speed >= ARRAY_SIZE(pcie_gen_freq))
++ speed = 0;
++
++ clk_set_rate(pcie->core_clk, pcie_gen_freq[speed]);
+
+ /* If EP doesn't advertise L1SS, just return */
+ val = dw_pcie_readl_dbi(pci, pcie->cfg_link_cap_l1sub);
+@@ -980,7 +983,7 @@ retry_link:
+ offset = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_DLF);
+ val = dw_pcie_readl_dbi(pci, offset + PCI_DLF_CAP);
+ val &= ~PCI_DLF_EXCHANGE_ENABLE;
+- dw_pcie_writel_dbi(pci, offset, val);
++ dw_pcie_writel_dbi(pci, offset + PCI_DLF_CAP, val);
+
+ tegra_pcie_dw_host_init(pp);
+ dw_pcie_setup_rc(pp);
+@@ -991,7 +994,11 @@ retry_link:
+
+ speed = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA) &
+ PCI_EXP_LNKSTA_CLS;
+- clk_set_rate(pcie->core_clk, pcie_gen_freq[speed - 1]);
++
++ if (speed >= ARRAY_SIZE(pcie_gen_freq))
++ speed = 0;
++
++ clk_set_rate(pcie->core_clk, pcie_gen_freq[speed]);
+
+ tegra_pcie_enable_interrupts(pp);
+
+@@ -1951,6 +1958,7 @@ static int tegra_pcie_config_ep(struct tegra_pcie_dw *pcie,
+ if (ret) {
+ dev_err(dev, "Failed to initialize DWC Endpoint subsystem: %d\n",
+ ret);
++ pm_runtime_disable(dev);
+ return ret;
+ }
+
+diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c b/drivers/pci/controller/dwc/pcie-uniphier.c
+index d842fd0181299..d05be942956e2 100644
+--- a/drivers/pci/controller/dwc/pcie-uniphier.c
++++ b/drivers/pci/controller/dwc/pcie-uniphier.c
+@@ -168,30 +168,21 @@ static void uniphier_pcie_irq_enable(struct uniphier_pcie_priv *priv)
+ writel(PCL_RCV_INTX_ALL_ENABLE, priv->base + PCL_RCV_INTX);
+ }
+
+-static void uniphier_pcie_irq_ack(struct irq_data *d)
+-{
+- struct pcie_port *pp = irq_data_get_irq_chip_data(d);
+- struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+- struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
+- u32 val;
+-
+- val = readl(priv->base + PCL_RCV_INTX);
+- val &= ~PCL_RCV_INTX_ALL_STATUS;
+- val |= BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_STATUS_SHIFT);
+- writel(val, priv->base + PCL_RCV_INTX);
+-}
+-
+ static void uniphier_pcie_irq_mask(struct irq_data *d)
+ {
+ struct pcie_port *pp = irq_data_get_irq_chip_data(d);
+ struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+ struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
++ unsigned long flags;
+ u32 val;
+
++ raw_spin_lock_irqsave(&pp->lock, flags);
++
+ val = readl(priv->base + PCL_RCV_INTX);
+- val &= ~PCL_RCV_INTX_ALL_MASK;
+ val |= BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT);
+ writel(val, priv->base + PCL_RCV_INTX);
++
++ raw_spin_unlock_irqrestore(&pp->lock, flags);
+ }
+
+ static void uniphier_pcie_irq_unmask(struct irq_data *d)
+@@ -199,17 +190,20 @@ static void uniphier_pcie_irq_unmask(struct irq_data *d)
+ struct pcie_port *pp = irq_data_get_irq_chip_data(d);
+ struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+ struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci);
++ unsigned long flags;
+ u32 val;
+
++ raw_spin_lock_irqsave(&pp->lock, flags);
++
+ val = readl(priv->base + PCL_RCV_INTX);
+- val &= ~PCL_RCV_INTX_ALL_MASK;
+ val &= ~BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT);
+ writel(val, priv->base + PCL_RCV_INTX);
++
++ raw_spin_unlock_irqrestore(&pp->lock, flags);
+ }
+
+ static struct irq_chip uniphier_pcie_irq_chip = {
+ .name = "PCI",
+- .irq_ack = uniphier_pcie_irq_ack,
+ .irq_mask = uniphier_pcie_irq_mask,
+ .irq_unmask = uniphier_pcie_irq_unmask,
+ };
+diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
+index 596ebcfcc82dc..215f7510de9a7 100644
+--- a/drivers/pci/controller/pci-aardvark.c
++++ b/drivers/pci/controller/pci-aardvark.c
+@@ -8,6 +8,7 @@
+ * Author: Hezi Shahmoon <hezi.shahmoon@marvell.com>
+ */
+
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+ #include <linux/gpio/consumer.h>
+ #include <linux/interrupt.h>
+@@ -31,9 +32,6 @@
+ /* PCIe core registers */
+ #define PCIE_CORE_DEV_ID_REG 0x0
+ #define PCIE_CORE_CMD_STATUS_REG 0x4
+-#define PCIE_CORE_CMD_IO_ACCESS_EN BIT(0)
+-#define PCIE_CORE_CMD_MEM_ACCESS_EN BIT(1)
+-#define PCIE_CORE_CMD_MEM_IO_REQ_EN BIT(2)
+ #define PCIE_CORE_DEV_REV_REG 0x8
+ #define PCIE_CORE_PCIEXP_CAP 0xc0
+ #define PCIE_CORE_ERR_CAPCTL_REG 0x118
+@@ -41,10 +39,6 @@
+ #define PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN BIT(6)
+ #define PCIE_CORE_ERR_CAPCTL_ECRC_CHCK BIT(7)
+ #define PCIE_CORE_ERR_CAPCTL_ECRC_CHCK_RCV BIT(8)
+-#define PCIE_CORE_INT_A_ASSERT_ENABLE 1
+-#define PCIE_CORE_INT_B_ASSERT_ENABLE 2
+-#define PCIE_CORE_INT_C_ASSERT_ENABLE 3
+-#define PCIE_CORE_INT_D_ASSERT_ENABLE 4
+ /* PIO registers base address and register offsets */
+ #define PIO_BASE_ADDR 0x4000
+ #define PIO_CTRL (PIO_BASE_ADDR + 0x0)
+@@ -99,25 +93,32 @@
+ #define PCIE_CORE_CTRL2_MSI_ENABLE BIT(10)
+ #define PCIE_CORE_REF_CLK_REG (CONTROL_BASE_ADDR + 0x14)
+ #define PCIE_CORE_REF_CLK_TX_ENABLE BIT(1)
++#define PCIE_CORE_REF_CLK_RX_ENABLE BIT(2)
+ #define PCIE_MSG_LOG_REG (CONTROL_BASE_ADDR + 0x30)
+ #define PCIE_ISR0_REG (CONTROL_BASE_ADDR + 0x40)
+ #define PCIE_MSG_PM_PME_MASK BIT(7)
+ #define PCIE_ISR0_MASK_REG (CONTROL_BASE_ADDR + 0x44)
+ #define PCIE_ISR0_MSI_INT_PENDING BIT(24)
++#define PCIE_ISR0_CORR_ERR BIT(11)
++#define PCIE_ISR0_NFAT_ERR BIT(12)
++#define PCIE_ISR0_FAT_ERR BIT(13)
++#define PCIE_ISR0_ERR_MASK GENMASK(13, 11)
+ #define PCIE_ISR0_INTX_ASSERT(val) BIT(16 + (val))
+ #define PCIE_ISR0_INTX_DEASSERT(val) BIT(20 + (val))
+-#define PCIE_ISR0_ALL_MASK GENMASK(26, 0)
++#define PCIE_ISR0_ALL_MASK GENMASK(31, 0)
+ #define PCIE_ISR1_REG (CONTROL_BASE_ADDR + 0x48)
+ #define PCIE_ISR1_MASK_REG (CONTROL_BASE_ADDR + 0x4C)
+ #define PCIE_ISR1_POWER_STATE_CHANGE BIT(4)
+ #define PCIE_ISR1_FLUSH BIT(5)
+ #define PCIE_ISR1_INTX_ASSERT(val) BIT(8 + (val))
+-#define PCIE_ISR1_ALL_MASK GENMASK(11, 4)
++#define PCIE_ISR1_ALL_MASK GENMASK(31, 0)
+ #define PCIE_MSI_ADDR_LOW_REG (CONTROL_BASE_ADDR + 0x50)
+ #define PCIE_MSI_ADDR_HIGH_REG (CONTROL_BASE_ADDR + 0x54)
+ #define PCIE_MSI_STATUS_REG (CONTROL_BASE_ADDR + 0x58)
+ #define PCIE_MSI_MASK_REG (CONTROL_BASE_ADDR + 0x5C)
++#define PCIE_MSI_ALL_MASK GENMASK(31, 0)
+ #define PCIE_MSI_PAYLOAD_REG (CONTROL_BASE_ADDR + 0x9C)
++#define PCIE_MSI_DATA_MASK GENMASK(15, 0)
+
+ /* PCIe window configuration */
+ #define OB_WIN_BASE_ADDR 0x4c00
+@@ -164,8 +165,50 @@
+ #define CFG_REG (LMI_BASE_ADDR + 0x0)
+ #define LTSSM_SHIFT 24
+ #define LTSSM_MASK 0x3f
+-#define LTSSM_L0 0x10
+ #define RC_BAR_CONFIG 0x300
++
++/* LTSSM values in CFG_REG */
++enum {
++ LTSSM_DETECT_QUIET = 0x0,
++ LTSSM_DETECT_ACTIVE = 0x1,
++ LTSSM_POLLING_ACTIVE = 0x2,
++ LTSSM_POLLING_COMPLIANCE = 0x3,
++ LTSSM_POLLING_CONFIGURATION = 0x4,
++ LTSSM_CONFIG_LINKWIDTH_START = 0x5,
++ LTSSM_CONFIG_LINKWIDTH_ACCEPT = 0x6,
++ LTSSM_CONFIG_LANENUM_ACCEPT = 0x7,
++ LTSSM_CONFIG_LANENUM_WAIT = 0x8,
++ LTSSM_CONFIG_COMPLETE = 0x9,
++ LTSSM_CONFIG_IDLE = 0xa,
++ LTSSM_RECOVERY_RCVR_LOCK = 0xb,
++ LTSSM_RECOVERY_SPEED = 0xc,
++ LTSSM_RECOVERY_RCVR_CFG = 0xd,
++ LTSSM_RECOVERY_IDLE = 0xe,
++ LTSSM_L0 = 0x10,
++ LTSSM_RX_L0S_ENTRY = 0x11,
++ LTSSM_RX_L0S_IDLE = 0x12,
++ LTSSM_RX_L0S_FTS = 0x13,
++ LTSSM_TX_L0S_ENTRY = 0x14,
++ LTSSM_TX_L0S_IDLE = 0x15,
++ LTSSM_TX_L0S_FTS = 0x16,
++ LTSSM_L1_ENTRY = 0x17,
++ LTSSM_L1_IDLE = 0x18,
++ LTSSM_L2_IDLE = 0x19,
++ LTSSM_L2_TRANSMIT_WAKE = 0x1a,
++ LTSSM_DISABLED = 0x20,
++ LTSSM_LOOPBACK_ENTRY_MASTER = 0x21,
++ LTSSM_LOOPBACK_ACTIVE_MASTER = 0x22,
++ LTSSM_LOOPBACK_EXIT_MASTER = 0x23,
++ LTSSM_LOOPBACK_ENTRY_SLAVE = 0x24,
++ LTSSM_LOOPBACK_ACTIVE_SLAVE = 0x25,
++ LTSSM_LOOPBACK_EXIT_SLAVE = 0x26,
++ LTSSM_HOT_RESET = 0x27,
++ LTSSM_RECOVERY_EQUALIZATION_PHASE0 = 0x28,
++ LTSSM_RECOVERY_EQUALIZATION_PHASE1 = 0x29,
++ LTSSM_RECOVERY_EQUALIZATION_PHASE2 = 0x2a,
++ LTSSM_RECOVERY_EQUALIZATION_PHASE3 = 0x2b,
++};
++
+ #define VENDOR_ID_REG (LMI_BASE_ADDR + 0x44)
+
+ /* PCIe core controller registers */
+@@ -198,7 +241,7 @@
+ #define PCIE_IRQ_MSI_INT2_DET BIT(21)
+ #define PCIE_IRQ_RC_DBELL_DET BIT(22)
+ #define PCIE_IRQ_EP_STATUS BIT(23)
+-#define PCIE_IRQ_ALL_MASK 0xfff0fb
++#define PCIE_IRQ_ALL_MASK GENMASK(31, 0)
+ #define PCIE_IRQ_ENABLE_INTS_MASK PCIE_IRQ_CORE_INT
+
+ /* Transaction types */
+@@ -230,17 +273,15 @@ struct advk_pcie {
+ u32 actions;
+ } wins[OB_WIN_COUNT];
+ u8 wins_count;
++ struct irq_domain *rp_irq_domain;
+ struct irq_domain *irq_domain;
+ struct irq_chip irq_chip;
+ raw_spinlock_t irq_lock;
+ struct irq_domain *msi_domain;
+ struct irq_domain *msi_inner_domain;
+- struct irq_chip msi_bottom_irq_chip;
+- struct irq_chip msi_irq_chip;
+- struct msi_domain_info msi_domain_info;
++ raw_spinlock_t msi_irq_lock;
+ DECLARE_BITMAP(msi_used, MSI_IRQ_NUM);
+ struct mutex msi_used_lock;
+- u16 msi_msg;
+ int link_gen;
+ struct pci_bridge_emul bridge;
+ struct gpio_desc *reset_gpio;
+@@ -257,18 +298,49 @@ static inline u32 advk_readl(struct advk_pcie *pcie, u64 reg)
+ return readl(pcie->base + reg);
+ }
+
+-static inline u16 advk_read16(struct advk_pcie *pcie, u64 reg)
++static u8 advk_pcie_ltssm_state(struct advk_pcie *pcie)
+ {
+- return advk_readl(pcie, (reg & ~0x3)) >> ((reg & 0x3) * 8);
++ u32 val;
++ u8 ltssm_state;
++
++ val = advk_readl(pcie, CFG_REG);
++ ltssm_state = (val >> LTSSM_SHIFT) & LTSSM_MASK;
++ return ltssm_state;
+ }
+
+-static int advk_pcie_link_up(struct advk_pcie *pcie)
++static inline bool advk_pcie_link_up(struct advk_pcie *pcie)
+ {
+- u32 val, ltssm_state;
++ /* check if LTSSM is in normal operation - some L* state */
++ u8 ltssm_state = advk_pcie_ltssm_state(pcie);
++ return ltssm_state >= LTSSM_L0 && ltssm_state < LTSSM_DISABLED;
++}
+
+- val = advk_readl(pcie, CFG_REG);
+- ltssm_state = (val >> LTSSM_SHIFT) & LTSSM_MASK;
+- return ltssm_state >= LTSSM_L0;
++static inline bool advk_pcie_link_active(struct advk_pcie *pcie)
++{
++ /*
++ * According to PCIe Base specification 3.0, Table 4-14: Link
++ * Status Mapped to the LTSSM, and 4.2.6.3.6 Configuration.Idle
++ * is Link Up mapped to LTSSM Configuration.Idle, Recovery, L0,
++ * L0s, L1 and L2 states. And according to 3.2.1. Data Link
++ * Control and Management State Machine Rules is DL Up status
++ * reported in DL Active state.
++ */
++ u8 ltssm_state = advk_pcie_ltssm_state(pcie);
++ return ltssm_state >= LTSSM_CONFIG_IDLE && ltssm_state < LTSSM_DISABLED;
++}
++
++static inline bool advk_pcie_link_training(struct advk_pcie *pcie)
++{
++ /*
++ * According to PCIe Base specification 3.0, Table 4-14: Link
++ * Status Mapped to the LTSSM is Link Training mapped to LTSSM
++ * Configuration and Recovery states.
++ */
++ u8 ltssm_state = advk_pcie_ltssm_state(pcie);
++ return ((ltssm_state >= LTSSM_CONFIG_LINKWIDTH_START &&
++ ltssm_state < LTSSM_L0) ||
++ (ltssm_state >= LTSSM_RECOVERY_EQUALIZATION_PHASE0 &&
++ ltssm_state <= LTSSM_RECOVERY_EQUALIZATION_PHASE3));
+ }
+
+ static int advk_pcie_wait_for_link(struct advk_pcie *pcie)
+@@ -291,7 +363,7 @@ static void advk_pcie_wait_for_retrain(struct advk_pcie *pcie)
+ size_t retries;
+
+ for (retries = 0; retries < RETRAIN_WAIT_MAX_RETRIES; ++retries) {
+- if (!advk_pcie_link_up(pcie))
++ if (advk_pcie_link_training(pcie))
+ break;
+ udelay(RETRAIN_WAIT_USLEEP_US);
+ }
+@@ -299,23 +371,9 @@ static void advk_pcie_wait_for_retrain(struct advk_pcie *pcie)
+
+ static void advk_pcie_issue_perst(struct advk_pcie *pcie)
+ {
+- u32 reg;
+-
+ if (!pcie->reset_gpio)
+ return;
+
+- /*
+- * As required by PCI Express spec (PCI Express Base Specification, REV.
+- * 4.0 PCI Express, February 19 2014, 6.6.1 Conventional Reset) a delay
+- * for at least 100ms after de-asserting PERST# signal is needed before
+- * link training is enabled. So ensure that link training is disabled
+- * prior de-asserting PERST# signal to fulfill that PCI Express spec
+- * requirement.
+- */
+- reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+- reg &= ~LINK_TRAINING_EN;
+- advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+-
+ /* 10ms delay is needed for some cards */
+ dev_info(&pcie->pdev->dev, "issuing PERST via reset GPIO for 10ms\n");
+ gpiod_set_value_cansleep(pcie->reset_gpio, 1);
+@@ -323,53 +381,46 @@ static void advk_pcie_issue_perst(struct advk_pcie *pcie)
+ gpiod_set_value_cansleep(pcie->reset_gpio, 0);
+ }
+
+-static int advk_pcie_train_at_gen(struct advk_pcie *pcie, int gen)
++static void advk_pcie_train_link(struct advk_pcie *pcie)
+ {
+- int ret, neg_gen;
++ struct device *dev = &pcie->pdev->dev;
+ u32 reg;
++ int ret;
+
+- /* Setup link speed */
++ /*
++ * Setup PCIe rev / gen compliance based on device tree property
++ * 'max-link-speed' which also forces maximal link speed.
++ */
+ reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+ reg &= ~PCIE_GEN_SEL_MSK;
+- if (gen == 3)
++ if (pcie->link_gen == 3)
+ reg |= SPEED_GEN_3;
+- else if (gen == 2)
++ else if (pcie->link_gen == 2)
+ reg |= SPEED_GEN_2;
+ else
+ reg |= SPEED_GEN_1;
+ advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+
+ /*
+- * Enable link training. This is not needed in every call to this
+- * function, just once suffices, but it does not break anything either.
++ * Set maximal link speed value also into PCIe Link Control 2 register.
++ * Armada 3700 Functional Specification says that default value is based
++ * on SPEED_GEN but tests showed that default value is always 8.0 GT/s.
+ */
++ reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2);
++ reg &= ~PCI_EXP_LNKCTL2_TLS;
++ if (pcie->link_gen == 3)
++ reg |= PCI_EXP_LNKCTL2_TLS_8_0GT;
++ else if (pcie->link_gen == 2)
++ reg |= PCI_EXP_LNKCTL2_TLS_5_0GT;
++ else
++ reg |= PCI_EXP_LNKCTL2_TLS_2_5GT;
++ advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2);
++
++ /* Enable link training after selecting PCIe generation */
+ reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
+ reg |= LINK_TRAINING_EN;
+ advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+
+- /*
+- * Start link training immediately after enabling it.
+- * This solves problems for some buggy cards.
+- */
+- reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL);
+- reg |= PCI_EXP_LNKCTL_RL;
+- advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL);
+-
+- ret = advk_pcie_wait_for_link(pcie);
+- if (ret)
+- return ret;
+-
+- reg = advk_read16(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKSTA);
+- neg_gen = reg & PCI_EXP_LNKSTA_CLS;
+-
+- return neg_gen;
+-}
+-
+-static void advk_pcie_train_link(struct advk_pcie *pcie)
+-{
+- struct device *dev = &pcie->pdev->dev;
+- int neg_gen = -1, gen;
+-
+ /*
+ * Reset PCIe card via PERST# signal. Some cards are not detected
+ * during link training when they are in some non-initial state.
+@@ -380,41 +431,18 @@ static void advk_pcie_train_link(struct advk_pcie *pcie)
+ * PERST# signal could have been asserted by pinctrl subsystem before
+ * probe() callback has been called or issued explicitly by reset gpio
+ * function advk_pcie_issue_perst(), making the endpoint going into
+- * fundamental reset. As required by PCI Express spec a delay for at
+- * least 100ms after such a reset before link training is needed.
+- */
+- msleep(PCI_PM_D3COLD_WAIT);
+-
+- /*
+- * Try link training at link gen specified by device tree property
+- * 'max-link-speed'. If this fails, iteratively train at lower gen.
++ * fundamental reset. As required by PCI Express spec (PCI Express
++ * Base Specification, REV. 4.0 PCI Express, February 19 2014, 6.6.1
++ * Conventional Reset) a delay for at least 100ms after such a reset
++ * before sending a Configuration Request to the device is needed.
++ * So wait until PCIe link is up. Function advk_pcie_wait_for_link()
++ * waits for link at least 900ms.
+ */
+- for (gen = pcie->link_gen; gen > 0; --gen) {
+- neg_gen = advk_pcie_train_at_gen(pcie, gen);
+- if (neg_gen > 0)
+- break;
+- }
+-
+- if (neg_gen < 0)
+- goto err;
+-
+- /*
+- * After successful training if negotiated gen is lower than requested,
+- * train again on negotiated gen. This solves some stability issues for
+- * some buggy gen1 cards.
+- */
+- if (neg_gen < gen) {
+- gen = neg_gen;
+- neg_gen = advk_pcie_train_at_gen(pcie, gen);
+- }
+-
+- if (neg_gen == gen) {
+- dev_info(dev, "link up at gen %i\n", gen);
+- return;
+- }
+-
+-err:
+- dev_err(dev, "link never came up\n");
++ ret = advk_pcie_wait_for_link(pcie);
++ if (ret < 0)
++ dev_err(dev, "link never came up\n");
++ else
++ dev_info(dev, "link up\n");
+ }
+
+ /*
+@@ -448,12 +476,19 @@ static void advk_pcie_disable_ob_win(struct advk_pcie *pcie, u8 win_num)
+
+ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
+ {
++ phys_addr_t msi_addr;
+ u32 reg;
+ int i;
+
+- /* Enable TX */
++ /*
++ * Configure PCIe Reference clock. Direction is from the PCIe
++ * controller to the endpoint card, so enable transmitting of
++ * Reference clock differential signal off-chip and disable
++ * receiving off-chip differential signal.
++ */
+ reg = advk_readl(pcie, PCIE_CORE_REF_CLK_REG);
+ reg |= PCIE_CORE_REF_CLK_TX_ENABLE;
++ reg &= ~PCIE_CORE_REF_CLK_RX_ENABLE;
+ advk_writel(pcie, reg, PCIE_CORE_REF_CLK_REG);
+
+ /* Set to Direct mode */
+@@ -477,6 +512,31 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
+ reg = (PCI_VENDOR_ID_MARVELL << 16) | PCI_VENDOR_ID_MARVELL;
+ advk_writel(pcie, reg, VENDOR_ID_REG);
+
++ /*
++ * Change Class Code of PCI Bridge device to PCI Bridge (0x600400),
++ * because the default value is Mass storage controller (0x010400).
++ *
++ * Note that this Aardvark PCI Bridge does not have compliant Type 1
++ * Configuration Space and it even cannot be accessed via Aardvark's
++ * PCI config space access method. Something like config space is
++ * available in internal Aardvark registers starting at offset 0x0
++ * and is reported as Type 0. In range 0x10 - 0x34 it has totally
++ * different registers.
++ *
++ * Therefore driver uses emulation of PCI Bridge which emulates
++ * access to configuration space via internal Aardvark registers or
++ * emulated configuration buffer.
++ */
++ reg = advk_readl(pcie, PCIE_CORE_DEV_REV_REG);
++ reg &= ~0xffffff00;
++ reg |= (PCI_CLASS_BRIDGE_PCI << 8) << 8;
++ advk_writel(pcie, reg, PCIE_CORE_DEV_REV_REG);
++
++ /* Disable Root Bridge I/O space, memory space and bus mastering */
++ reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
++ reg &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
++ advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG);
++
+ /* Set Advanced Error Capabilities and Control PF0 register */
+ reg = PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX |
+ PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN |
+@@ -488,8 +548,9 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
+ reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL);
+ reg &= ~PCI_EXP_DEVCTL_RELAX_EN;
+ reg &= ~PCI_EXP_DEVCTL_NOSNOOP_EN;
++ reg &= ~PCI_EXP_DEVCTL_PAYLOAD;
+ reg &= ~PCI_EXP_DEVCTL_READRQ;
+- reg |= PCI_EXP_DEVCTL_PAYLOAD; /* Set max payload size */
++ reg |= PCI_EXP_DEVCTL_PAYLOAD_512B;
+ reg |= PCI_EXP_DEVCTL_READRQ_512B;
+ advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL);
+
+@@ -504,25 +565,36 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
+ reg |= LANE_COUNT_1;
+ advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
+
++ /* Set MSI address */
++ msi_addr = virt_to_phys(pcie);
++ advk_writel(pcie, lower_32_bits(msi_addr), PCIE_MSI_ADDR_LOW_REG);
++ advk_writel(pcie, upper_32_bits(msi_addr), PCIE_MSI_ADDR_HIGH_REG);
++
+ /* Enable MSI */
+ reg = advk_readl(pcie, PCIE_CORE_CTRL2_REG);
+ reg |= PCIE_CORE_CTRL2_MSI_ENABLE;
+ advk_writel(pcie, reg, PCIE_CORE_CTRL2_REG);
+
+ /* Clear all interrupts */
++ advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_STATUS_REG);
+ advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG);
+ advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
+ advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
+
+- /* Disable All ISR0/1 Sources */
+- reg = PCIE_ISR0_ALL_MASK;
++ /* Disable All ISR0/1 and MSI Sources */
++ advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_MASK_REG);
++ advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
++ advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_MASK_REG);
++
++ /* Unmask summary MSI interrupt */
++ reg = advk_readl(pcie, PCIE_ISR0_MASK_REG);
+ reg &= ~PCIE_ISR0_MSI_INT_PENDING;
+ advk_writel(pcie, reg, PCIE_ISR0_MASK_REG);
+
+- advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
+-
+- /* Unmask all MSIs */
+- advk_writel(pcie, 0, PCIE_MSI_MASK_REG);
++ /* Unmask PME interrupt for processing of PME requester */
++ reg = advk_readl(pcie, PCIE_ISR0_MASK_REG);
++ reg &= ~PCIE_MSG_PM_PME_MASK;
++ advk_writel(pcie, reg, PCIE_ISR0_MASK_REG);
+
+ /* Enable summary interrupt for GIC SPI source */
+ reg = PCIE_IRQ_ALL_MASK & (~PCIE_IRQ_ENABLE_INTS_MASK);
+@@ -574,19 +646,6 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
+ advk_pcie_disable_ob_win(pcie, i);
+
+ advk_pcie_train_link(pcie);
+-
+- /*
+- * FIXME: The following register update is suspicious. This register is
+- * applicable only when the PCI controller is configured for Endpoint
+- * mode, not as a Root Complex. But apparently when this code is
+- * removed, some cards stop working. This should be investigated and
+- * a comment explaining this should be put here.
+- */
+- reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
+- reg |= PCIE_CORE_CMD_MEM_ACCESS_EN |
+- PCIE_CORE_CMD_IO_ACCESS_EN |
+- PCIE_CORE_CMD_MEM_IO_REQ_EN;
+- advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG);
+ }
+
+ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u32 *val)
+@@ -595,6 +654,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
+ u32 reg;
+ unsigned int status;
+ char *strcomp_status, *str_posted;
++ int ret;
+
+ reg = advk_readl(pcie, PIO_STAT);
+ status = (reg & PIO_COMPLETION_STATUS_MASK) >>
+@@ -619,6 +679,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
+ case PIO_COMPLETION_STATUS_OK:
+ if (reg & PIO_ERR_STATUS) {
+ strcomp_status = "COMP_ERR";
++ ret = -EFAULT;
+ break;
+ }
+ /* Get the read result */
+@@ -626,9 +687,11 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
+ *val = advk_readl(pcie, PIO_RD_DATA);
+ /* No error */
+ strcomp_status = NULL;
++ ret = 0;
+ break;
+ case PIO_COMPLETION_STATUS_UR:
+ strcomp_status = "UR";
++ ret = -EOPNOTSUPP;
+ break;
+ case PIO_COMPLETION_STATUS_CRS:
+ if (allow_crs && val) {
+@@ -646,6 +709,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
+ */
+ *val = CFG_RD_CRS_VAL;
+ strcomp_status = NULL;
++ ret = 0;
+ break;
+ }
+ /* PCIe r4.0, sec 2.3.2, says:
+@@ -661,31 +725,34 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
+ * Request and taking appropriate action, e.g., complete the
+ * Request to the host as a failed transaction.
+ *
+- * To simplify implementation do not re-issue the Configuration
+- * Request and complete the Request as a failed transaction.
++ * So return -EAGAIN and caller (pci-aardvark.c driver) will
++ * re-issue request again up to the PIO_RETRY_CNT retries.
+ */
+ strcomp_status = "CRS";
++ ret = -EAGAIN;
+ break;
+ case PIO_COMPLETION_STATUS_CA:
+ strcomp_status = "CA";
++ ret = -ECANCELED;
+ break;
+ default:
+ strcomp_status = "Unknown";
++ ret = -EINVAL;
+ break;
+ }
+
+ if (!strcomp_status)
+- return 0;
++ return ret;
+
+ if (reg & PIO_NON_POSTED_REQ)
+ str_posted = "Non-posted";
+ else
+ str_posted = "Posted";
+
+- dev_err(dev, "%s PIO Response Status: %s, %#x @ %#x\n",
++ dev_dbg(dev, "%s PIO Response Status: %s, %#x @ %#x\n",
+ str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS));
+
+- return -EFAULT;
++ return ret;
+ }
+
+ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
+@@ -693,13 +760,13 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
+ struct device *dev = &pcie->pdev->dev;
+ int i;
+
+- for (i = 0; i < PIO_RETRY_CNT; i++) {
++ for (i = 1; i <= PIO_RETRY_CNT; i++) {
+ u32 start, isr;
+
+ start = advk_readl(pcie, PIO_START);
+ isr = advk_readl(pcie, PIO_ISR);
+ if (!start && isr)
+- return 0;
++ return i;
+ udelay(PIO_RETRY_DELAY);
+ }
+
+@@ -707,30 +774,106 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
+ return -ETIMEDOUT;
+ }
+
+-
+ static pci_bridge_emul_read_status_t
+-advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
++advk_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge,
+ int reg, u32 *value)
+ {
+ struct advk_pcie *pcie = bridge->data;
+
+-
+ switch (reg) {
+- case PCI_EXP_SLTCTL:
+- *value = PCI_EXP_SLTSTA_PDS << 16;
++ case PCI_COMMAND:
++ *value = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
+ return PCI_BRIDGE_EMUL_HANDLED;
+
+- case PCI_EXP_RTCTL: {
+- u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG);
+- *value = (val & PCIE_MSG_PM_PME_MASK) ? 0 : PCI_EXP_RTCTL_PMEIE;
+- *value |= PCI_EXP_RTCAP_CRSVIS << 16;
++ case PCI_INTERRUPT_LINE: {
++ /*
++ * From the whole 32bit register we support reading from HW only
++ * two bits: PCI_BRIDGE_CTL_BUS_RESET and PCI_BRIDGE_CTL_SERR.
++ * Other bits are retrieved only from emulated config buffer.
++ */
++ __le32 *cfgspace = (__le32 *)&bridge->conf;
++ u32 val = le32_to_cpu(cfgspace[PCI_INTERRUPT_LINE / 4]);
++ if (advk_readl(pcie, PCIE_ISR0_MASK_REG) & PCIE_ISR0_ERR_MASK)
++ val &= ~(PCI_BRIDGE_CTL_SERR << 16);
++ else
++ val |= PCI_BRIDGE_CTL_SERR << 16;
++ if (advk_readl(pcie, PCIE_CORE_CTRL1_REG) & HOT_RESET_GEN)
++ val |= PCI_BRIDGE_CTL_BUS_RESET << 16;
++ else
++ val &= ~(PCI_BRIDGE_CTL_BUS_RESET << 16);
++ *value = val;
+ return PCI_BRIDGE_EMUL_HANDLED;
+ }
+
+- case PCI_EXP_RTSTA: {
+- u32 isr0 = advk_readl(pcie, PCIE_ISR0_REG);
+- u32 msglog = advk_readl(pcie, PCIE_MSG_LOG_REG);
+- *value = (isr0 & PCIE_MSG_PM_PME_MASK) << 16 | (msglog >> 16);
++ default:
++ return PCI_BRIDGE_EMUL_NOT_HANDLED;
++ }
++}
++
++static void
++advk_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge,
++ int reg, u32 old, u32 new, u32 mask)
++{
++ struct advk_pcie *pcie = bridge->data;
++
++ switch (reg) {
++ case PCI_COMMAND:
++ advk_writel(pcie, new, PCIE_CORE_CMD_STATUS_REG);
++ break;
++
++ case PCI_INTERRUPT_LINE:
++ /*
++ * According to Figure 6-3: Pseudo Logic Diagram for Error
++ * Message Controls in PCIe base specification, SERR# Enable bit
++ * in Bridge Control register enable receiving of ERR_* messages
++ */
++ if (mask & (PCI_BRIDGE_CTL_SERR << 16)) {
++ u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG);
++ if (new & (PCI_BRIDGE_CTL_SERR << 16))
++ val &= ~PCIE_ISR0_ERR_MASK;
++ else
++ val |= PCIE_ISR0_ERR_MASK;
++ advk_writel(pcie, val, PCIE_ISR0_MASK_REG);
++ }
++ if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) {
++ u32 val = advk_readl(pcie, PCIE_CORE_CTRL1_REG);
++ if (new & (PCI_BRIDGE_CTL_BUS_RESET << 16))
++ val |= HOT_RESET_GEN;
++ else
++ val &= ~HOT_RESET_GEN;
++ advk_writel(pcie, val, PCIE_CORE_CTRL1_REG);
++ }
++ break;
++
++ default:
++ break;
++ }
++}
++
++static pci_bridge_emul_read_status_t
++advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
++ int reg, u32 *value)
++{
++ struct advk_pcie *pcie = bridge->data;
++
++
++ switch (reg) {
++ /*
++ * PCI_EXP_SLTCAP, PCI_EXP_SLTCTL, PCI_EXP_RTCTL and PCI_EXP_RTSTA are
++ * also supported, but do not need to be handled here, because their
++ * values are stored in emulated config space buffer, and we read them
++ * from there when needed.
++ */
++
++ case PCI_EXP_LNKCAP: {
++ u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
++ /*
++ * PCI_EXP_LNKCAP_DLLLARC bit is hardwired in aardvark HW to 0.
++ * But support for PCI_EXP_LNKSTA_DLLLA is emulated via ltssm
++ * state so explicitly enable PCI_EXP_LNKCAP_DLLLARC flag.
++ */
++ val |= PCI_EXP_LNKCAP_DLLLARC;
++ *value = val;
+ return PCI_BRIDGE_EMUL_HANDLED;
+ }
+
+@@ -738,18 +881,23 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
+ /* u32 contains both PCI_EXP_LNKCTL and PCI_EXP_LNKSTA */
+ u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg) &
+ ~(PCI_EXP_LNKSTA_LT << 16);
+- if (!advk_pcie_link_up(pcie))
++ if (advk_pcie_link_training(pcie))
+ val |= (PCI_EXP_LNKSTA_LT << 16);
++ if (advk_pcie_link_active(pcie))
++ val |= (PCI_EXP_LNKSTA_DLLLA << 16);
+ *value = val;
+ return PCI_BRIDGE_EMUL_HANDLED;
+ }
+
+- case PCI_CAP_LIST_ID:
+ case PCI_EXP_DEVCAP:
+ case PCI_EXP_DEVCTL:
+- case PCI_EXP_LNKCAP:
++ case PCI_EXP_DEVCAP2:
++ case PCI_EXP_DEVCTL2:
++ case PCI_EXP_LNKCAP2:
++ case PCI_EXP_LNKCTL2:
+ *value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
+ return PCI_BRIDGE_EMUL_HANDLED;
++
+ default:
+ return PCI_BRIDGE_EMUL_NOT_HANDLED;
+ }
+@@ -763,10 +911,6 @@ advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
+ struct advk_pcie *pcie = bridge->data;
+
+ switch (reg) {
+- case PCI_EXP_DEVCTL:
+- advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
+- break;
+-
+ case PCI_EXP_LNKCTL:
+ advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
+ if (new & PCI_EXP_LNKCTL_RL)
+@@ -774,18 +918,23 @@ advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
+ break;
+
+ case PCI_EXP_RTCTL: {
+- /* Only mask/unmask PME interrupt */
+- u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG) &
+- ~PCIE_MSG_PM_PME_MASK;
+- if ((new & PCI_EXP_RTCTL_PMEIE) == 0)
+- val |= PCIE_MSG_PM_PME_MASK;
+- advk_writel(pcie, val, PCIE_ISR0_MASK_REG);
++ u16 rootctl = le16_to_cpu(bridge->pcie_conf.rootctl);
++ /* Only emulation of PMEIE and CRSSVE bits is provided */
++ rootctl &= PCI_EXP_RTCTL_PMEIE | PCI_EXP_RTCTL_CRSSVE;
++ bridge->pcie_conf.rootctl = cpu_to_le16(rootctl);
+ break;
+ }
+
+- case PCI_EXP_RTSTA:
+- new = (new & PCI_EXP_RTSTA_PME) >> 9;
+- advk_writel(pcie, new, PCIE_ISR0_REG);
++ /*
++ * PCI_EXP_RTSTA is also supported, but does not need to be handled
++ * here, because its value is stored in emulated config space buffer,
++ * and we write it there when needed.
++ */
++
++ case PCI_EXP_DEVCTL:
++ case PCI_EXP_DEVCTL2:
++ case PCI_EXP_LNKCTL2:
++ advk_writel(pcie, new, PCIE_CORE_PCIEXP_CAP + reg);
+ break;
+
+ default:
+@@ -794,6 +943,8 @@ advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
+ }
+
+ static struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = {
++ .read_base = advk_pci_bridge_emul_base_conf_read,
++ .write_base = advk_pci_bridge_emul_base_conf_write,
+ .read_pcie = advk_pci_bridge_emul_pcie_conf_read,
+ .write_pcie = advk_pci_bridge_emul_pcie_conf_write,
+ };
+@@ -805,7 +956,6 @@ static struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = {
+ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie)
+ {
+ struct pci_bridge_emul *bridge = &pcie->bridge;
+- int ret;
+
+ bridge->conf.vendor =
+ cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) & 0xffff);
+@@ -823,21 +973,36 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie)
+ bridge->conf.pref_mem_limit = cpu_to_le16(PCI_PREF_RANGE_TYPE_64);
+
+ /* Support interrupt A for MSI feature */
+- bridge->conf.intpin = PCIE_CORE_INT_A_ASSERT_ENABLE;
++ bridge->conf.intpin = PCI_INTERRUPT_INTA;
+
+- bridge->has_pcie = true;
+- bridge->data = pcie;
+- bridge->ops = &advk_pci_bridge_emul_ops;
++ /*
++ * Aardvark HW provides PCIe Capability structure in version 2 and
++ * indicate slot support, which is emulated.
++ */
++ bridge->pcie_conf.cap = cpu_to_le16(2 | PCI_EXP_FLAGS_SLOT);
+
+- /* PCIe config space can be initialized after pci_bridge_emul_init() */
+- ret = pci_bridge_emul_init(bridge, 0);
+- if (ret < 0)
+- return ret;
++ /*
++ * Set Presence Detect State bit permanently since there is no support
++ * for unplugging the card nor detecting whether it is plugged. (If a
++ * platform exists in the future that supports it, via a GPIO for
++ * example, it should be implemented via this bit.)
++ *
++ * Set physical slot number to 1 since there is only one port and zero
++ * value is reserved for ports within the same silicon as Root Port
++ * which is not our case.
++ */
++ bridge->pcie_conf.slotcap = cpu_to_le32(FIELD_PREP(PCI_EXP_SLTCAP_PSN,
++ 1));
++ bridge->pcie_conf.slotsta = cpu_to_le16(PCI_EXP_SLTSTA_PDS);
+
+ /* Indicates supports for Completion Retry Status */
+ bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS);
+
+- return 0;
++ bridge->has_pcie = true;
++ bridge->data = pcie;
++ bridge->ops = &advk_pci_bridge_emul_ops;
++
++ return pci_bridge_emul_init(bridge, 0);
+ }
+
+ static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
+@@ -847,8 +1012,12 @@ static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
+ return false;
+
+ /*
+- * If the link goes down after we check for link-up, nothing bad
+- * happens but the config access times out.
++ * If the link goes down after we check for link-up, we have a problem:
++ * if a PIO request is executed while link-down, the whole controller
++ * gets stuck in a non-functional state, and even after link comes up
++ * again, PIO requests won't work anymore, and a reset of the whole PCIe
++ * controller is needed. Therefore we need to prevent sending PIO
++ * requests while the link is down.
+ */
+ if (!pci_is_root_bus(bus) && !advk_pcie_link_up(pcie))
+ return false;
+@@ -889,6 +1058,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
+ int where, int size, u32 *val)
+ {
+ struct advk_pcie *pcie = bus->sysdata;
++ int retry_count;
+ bool allow_crs;
+ u32 reg;
+ int ret;
+@@ -911,18 +1081,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
+ (le16_to_cpu(pcie->bridge.pcie_conf.rootctl) &
+ PCI_EXP_RTCTL_CRSSVE);
+
+- if (advk_pcie_pio_is_running(pcie)) {
+- /*
+- * If it is possible return Completion Retry Status so caller
+- * tries to issue the request again instead of failing.
+- */
+- if (allow_crs) {
+- *val = CFG_RD_CRS_VAL;
+- return PCIBIOS_SUCCESSFUL;
+- }
+- *val = 0xffffffff;
+- return PCIBIOS_SET_FAILED;
+- }
++ if (advk_pcie_pio_is_running(pcie))
++ goto try_crs;
+
+ /* Program the control register */
+ reg = advk_readl(pcie, PIO_CTRL);
+@@ -941,30 +1101,24 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
+ /* Program the data strobe */
+ advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);
+
+- /* Clear PIO DONE ISR and start the transfer */
+- advk_writel(pcie, 1, PIO_ISR);
+- advk_writel(pcie, 1, PIO_START);
++ retry_count = 0;
++ do {
++ /* Clear PIO DONE ISR and start the transfer */
++ advk_writel(pcie, 1, PIO_ISR);
++ advk_writel(pcie, 1, PIO_START);
+
+- ret = advk_pcie_wait_pio(pcie);
+- if (ret < 0) {
+- /*
+- * If it is possible return Completion Retry Status so caller
+- * tries to issue the request again instead of failing.
+- */
+- if (allow_crs) {
+- *val = CFG_RD_CRS_VAL;
+- return PCIBIOS_SUCCESSFUL;
+- }
+- *val = 0xffffffff;
+- return PCIBIOS_SET_FAILED;
+- }
++ ret = advk_pcie_wait_pio(pcie);
++ if (ret < 0)
++ goto try_crs;
+
+- /* Check PIO status and get the read result */
+- ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
+- if (ret < 0) {
+- *val = 0xffffffff;
+- return PCIBIOS_SET_FAILED;
+- }
++ retry_count += ret;
++
++ /* Check PIO status and get the read result */
++ ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
++ } while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
++
++ if (ret < 0)
++ goto fail;
+
+ if (size == 1)
+ *val = (*val >> (8 * (where & 3))) & 0xff;
+@@ -972,6 +1126,20 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
+ *val = (*val >> (8 * (where & 3))) & 0xffff;
+
+ return PCIBIOS_SUCCESSFUL;
++
++try_crs:
++ /*
++ * If it is possible, return Completion Retry Status so that caller
++ * tries to issue the request again instead of failing.
++ */
++ if (allow_crs) {
++ *val = CFG_RD_CRS_VAL;
++ return PCIBIOS_SUCCESSFUL;
++ }
++
++fail:
++ *val = 0xffffffff;
++ return PCIBIOS_SET_FAILED;
+ }
+
+ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+@@ -980,6 +1148,7 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+ struct advk_pcie *pcie = bus->sysdata;
+ u32 reg;
+ u32 data_strobe = 0x0;
++ int retry_count;
+ int offset;
+ int ret;
+
+@@ -1021,19 +1190,22 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+ /* Program the data strobe */
+ advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);
+
+- /* Clear PIO DONE ISR and start the transfer */
+- advk_writel(pcie, 1, PIO_ISR);
+- advk_writel(pcie, 1, PIO_START);
++ retry_count = 0;
++ do {
++ /* Clear PIO DONE ISR and start the transfer */
++ advk_writel(pcie, 1, PIO_ISR);
++ advk_writel(pcie, 1, PIO_START);
+
+- ret = advk_pcie_wait_pio(pcie);
+- if (ret < 0)
+- return PCIBIOS_SET_FAILED;
++ ret = advk_pcie_wait_pio(pcie);
++ if (ret < 0)
++ return PCIBIOS_SET_FAILED;
+
+- ret = advk_pcie_check_pio_status(pcie, false, NULL);
+- if (ret < 0)
+- return PCIBIOS_SET_FAILED;
++ retry_count += ret;
+
+- return PCIBIOS_SUCCESSFUL;
++ ret = advk_pcie_check_pio_status(pcie, false, NULL);
++ } while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
++
++ return ret < 0 ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
+ }
+
+ static struct pci_ops advk_pcie_ops = {
+@@ -1045,11 +1217,11 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data,
+ struct msi_msg *msg)
+ {
+ struct advk_pcie *pcie = irq_data_get_irq_chip_data(data);
+- phys_addr_t msi_msg = virt_to_phys(&pcie->msi_msg);
++ phys_addr_t msi_addr = virt_to_phys(pcie);
+
+- msg->address_lo = lower_32_bits(msi_msg);
+- msg->address_hi = upper_32_bits(msi_msg);
+- msg->data = data->irq;
++ msg->address_lo = lower_32_bits(msi_addr);
++ msg->address_hi = upper_32_bits(msi_addr);
++ msg->data = data->hwirq;
+ }
+
+ static int advk_msi_set_affinity(struct irq_data *irq_data,
+@@ -1058,6 +1230,54 @@ static int advk_msi_set_affinity(struct irq_data *irq_data,
+ return -EINVAL;
+ }
+
++static void advk_msi_irq_mask(struct irq_data *d)
++{
++ struct advk_pcie *pcie = d->domain->host_data;
++ irq_hw_number_t hwirq = irqd_to_hwirq(d);
++ unsigned long flags;
++ u32 mask;
++
++ raw_spin_lock_irqsave(&pcie->msi_irq_lock, flags);
++ mask = advk_readl(pcie, PCIE_MSI_MASK_REG);
++ mask |= BIT(hwirq);
++ advk_writel(pcie, mask, PCIE_MSI_MASK_REG);
++ raw_spin_unlock_irqrestore(&pcie->msi_irq_lock, flags);
++}
++
++static void advk_msi_irq_unmask(struct irq_data *d)
++{
++ struct advk_pcie *pcie = d->domain->host_data;
++ irq_hw_number_t hwirq = irqd_to_hwirq(d);
++ unsigned long flags;
++ u32 mask;
++
++ raw_spin_lock_irqsave(&pcie->msi_irq_lock, flags);
++ mask = advk_readl(pcie, PCIE_MSI_MASK_REG);
++ mask &= ~BIT(hwirq);
++ advk_writel(pcie, mask, PCIE_MSI_MASK_REG);
++ raw_spin_unlock_irqrestore(&pcie->msi_irq_lock, flags);
++}
++
++static void advk_msi_top_irq_mask(struct irq_data *d)
++{
++ pci_msi_mask_irq(d);
++ irq_chip_mask_parent(d);
++}
++
++static void advk_msi_top_irq_unmask(struct irq_data *d)
++{
++ pci_msi_unmask_irq(d);
++ irq_chip_unmask_parent(d);
++}
++
++static struct irq_chip advk_msi_bottom_irq_chip = {
++ .name = "MSI",
++ .irq_compose_msi_msg = advk_msi_irq_compose_msi_msg,
++ .irq_set_affinity = advk_msi_set_affinity,
++ .irq_mask = advk_msi_irq_mask,
++ .irq_unmask = advk_msi_irq_unmask,
++};
++
+ static int advk_msi_irq_domain_alloc(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_irqs, void *args)
+@@ -1066,23 +1286,19 @@ static int advk_msi_irq_domain_alloc(struct irq_domain *domain,
+ int hwirq, i;
+
+ mutex_lock(&pcie->msi_used_lock);
+- hwirq = bitmap_find_next_zero_area(pcie->msi_used, MSI_IRQ_NUM,
+- 0, nr_irqs, 0);
+- if (hwirq >= MSI_IRQ_NUM) {
+- mutex_unlock(&pcie->msi_used_lock);
+- return -ENOSPC;
+- }
+-
+- bitmap_set(pcie->msi_used, hwirq, nr_irqs);
++ hwirq = bitmap_find_free_region(pcie->msi_used, MSI_IRQ_NUM,
++ order_base_2(nr_irqs));
+ mutex_unlock(&pcie->msi_used_lock);
++ if (hwirq < 0)
++ return -ENOSPC;
+
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_set_info(domain, virq + i, hwirq + i,
+- &pcie->msi_bottom_irq_chip,
++ &advk_msi_bottom_irq_chip,
+ domain->host_data, handle_simple_irq,
+ NULL, NULL);
+
+- return hwirq;
++ return 0;
+ }
+
+ static void advk_msi_irq_domain_free(struct irq_domain *domain,
+@@ -1092,7 +1308,7 @@ static void advk_msi_irq_domain_free(struct irq_domain *domain,
+ struct advk_pcie *pcie = domain->host_data;
+
+ mutex_lock(&pcie->msi_used_lock);
+- bitmap_clear(pcie->msi_used, d->hwirq, nr_irqs);
++ bitmap_release_region(pcie->msi_used, d->hwirq, order_base_2(nr_irqs));
+ mutex_unlock(&pcie->msi_used_lock);
+ }
+
+@@ -1134,7 +1350,6 @@ static int advk_pcie_irq_map(struct irq_domain *h,
+ {
+ struct advk_pcie *pcie = h->host_data;
+
+- advk_pcie_irq_mask(irq_get_irq_data(virq));
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &pcie->irq_chip,
+ handle_level_irq);
+@@ -1148,37 +1363,25 @@ static const struct irq_domain_ops advk_pcie_irq_domain_ops = {
+ .xlate = irq_domain_xlate_onecell,
+ };
+
++static struct irq_chip advk_msi_irq_chip = {
++ .name = "advk-MSI",
++ .irq_mask = advk_msi_top_irq_mask,
++ .irq_unmask = advk_msi_top_irq_unmask,
++};
++
++static struct msi_domain_info advk_msi_domain_info = {
++ .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
++ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
++ .chip = &advk_msi_irq_chip,
++};
++
+ static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie)
+ {
+ struct device *dev = &pcie->pdev->dev;
+- struct device_node *node = dev->of_node;
+- struct irq_chip *bottom_ic, *msi_ic;
+- struct msi_domain_info *msi_di;
+- phys_addr_t msi_msg_phys;
+
++ raw_spin_lock_init(&pcie->msi_irq_lock);
+ mutex_init(&pcie->msi_used_lock);
+
+- bottom_ic = &pcie->msi_bottom_irq_chip;
+-
+- bottom_ic->name = "MSI";
+- bottom_ic->irq_compose_msi_msg = advk_msi_irq_compose_msi_msg;
+- bottom_ic->irq_set_affinity = advk_msi_set_affinity;
+-
+- msi_ic = &pcie->msi_irq_chip;
+- msi_ic->name = "advk-MSI";
+-
+- msi_di = &pcie->msi_domain_info;
+- msi_di->flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+- MSI_FLAG_MULTI_PCI_MSI;
+- msi_di->chip = msi_ic;
+-
+- msi_msg_phys = virt_to_phys(&pcie->msi_msg);
+-
+- advk_writel(pcie, lower_32_bits(msi_msg_phys),
+- PCIE_MSI_ADDR_LOW_REG);
+- advk_writel(pcie, upper_32_bits(msi_msg_phys),
+- PCIE_MSI_ADDR_HIGH_REG);
+-
+ pcie->msi_inner_domain =
+ irq_domain_add_linear(NULL, MSI_IRQ_NUM,
+ &advk_msi_domain_ops, pcie);
+@@ -1186,8 +1389,9 @@ static int advk_pcie_init_msi_irq_domain(struct advk_pcie *pcie)
+ return -ENOMEM;
+
+ pcie->msi_domain =
+- pci_msi_create_irq_domain(of_node_to_fwnode(node),
+- msi_di, pcie->msi_inner_domain);
++ pci_msi_create_irq_domain(dev_fwnode(dev),
++ &advk_msi_domain_info,
++ pcie->msi_inner_domain);
+ if (!pcie->msi_domain) {
+ irq_domain_remove(pcie->msi_inner_domain);
+ return -ENOMEM;
+@@ -1228,7 +1432,6 @@ static int advk_pcie_init_irq_domain(struct advk_pcie *pcie)
+ }
+
+ irq_chip->irq_mask = advk_pcie_irq_mask;
+- irq_chip->irq_mask_ack = advk_pcie_irq_mask;
+ irq_chip->irq_unmask = advk_pcie_irq_unmask;
+
+ pcie->irq_domain =
+@@ -1250,22 +1453,85 @@ static void advk_pcie_remove_irq_domain(struct advk_pcie *pcie)
+ irq_domain_remove(pcie->irq_domain);
+ }
+
++static struct irq_chip advk_rp_irq_chip = {
++ .name = "advk-RP",
++};
++
++static int advk_pcie_rp_irq_map(struct irq_domain *h,
++ unsigned int virq, irq_hw_number_t hwirq)
++{
++ struct advk_pcie *pcie = h->host_data;
++
++ irq_set_chip_and_handler(virq, &advk_rp_irq_chip, handle_simple_irq);
++ irq_set_chip_data(virq, pcie);
++
++ return 0;
++}
++
++static const struct irq_domain_ops advk_pcie_rp_irq_domain_ops = {
++ .map = advk_pcie_rp_irq_map,
++ .xlate = irq_domain_xlate_onecell,
++};
++
++static int advk_pcie_init_rp_irq_domain(struct advk_pcie *pcie)
++{
++ pcie->rp_irq_domain = irq_domain_add_linear(NULL, 1,
++ &advk_pcie_rp_irq_domain_ops,
++ pcie);
++ if (!pcie->rp_irq_domain) {
++ dev_err(&pcie->pdev->dev, "Failed to add Root Port IRQ domain\n");
++ return -ENOMEM;
++ }
++
++ return 0;
++}
++
++static void advk_pcie_remove_rp_irq_domain(struct advk_pcie *pcie)
++{
++ irq_domain_remove(pcie->rp_irq_domain);
++}
++
++static void advk_pcie_handle_pme(struct advk_pcie *pcie)
++{
++ u32 requester = advk_readl(pcie, PCIE_MSG_LOG_REG) >> 16;
++
++ advk_writel(pcie, PCIE_MSG_PM_PME_MASK, PCIE_ISR0_REG);
++
++ /*
++ * PCIE_MSG_LOG_REG contains the last inbound message, so store
++ * the requester ID only when PME was not asserted yet.
++ * Also do not trigger PME interrupt when PME is still asserted.
++ */
++ if (!(le32_to_cpu(pcie->bridge.pcie_conf.rootsta) & PCI_EXP_RTSTA_PME)) {
++ pcie->bridge.pcie_conf.rootsta = cpu_to_le32(requester | PCI_EXP_RTSTA_PME);
++
++ /*
++ * Trigger PME interrupt only if PMEIE bit in Root Control is set.
++ * Aardvark HW returns zero for PCI_EXP_FLAGS_IRQ, so use PCIe interrupt 0.
++ */
++ if (!(le16_to_cpu(pcie->bridge.pcie_conf.rootctl) & PCI_EXP_RTCTL_PMEIE))
++ return;
++
++ if (generic_handle_domain_irq(pcie->rp_irq_domain, 0) == -EINVAL)
++ dev_err_ratelimited(&pcie->pdev->dev, "unhandled PME IRQ\n");
++ }
++}
++
+ static void advk_pcie_handle_msi(struct advk_pcie *pcie)
+ {
+ u32 msi_val, msi_mask, msi_status, msi_idx;
+- u16 msi_data;
+
+ msi_mask = advk_readl(pcie, PCIE_MSI_MASK_REG);
+ msi_val = advk_readl(pcie, PCIE_MSI_STATUS_REG);
+- msi_status = msi_val & ~msi_mask;
++ msi_status = msi_val & ((~msi_mask) & PCIE_MSI_ALL_MASK);
+
+ for (msi_idx = 0; msi_idx < MSI_IRQ_NUM; msi_idx++) {
+ if (!(BIT(msi_idx) & msi_status))
+ continue;
+
+ advk_writel(pcie, BIT(msi_idx), PCIE_MSI_STATUS_REG);
+- msi_data = advk_readl(pcie, PCIE_MSI_PAYLOAD_REG) & 0xFF;
+- generic_handle_irq(msi_data);
++ if (generic_handle_domain_irq(pcie->msi_inner_domain, msi_idx) == -EINVAL)
++ dev_err_ratelimited(&pcie->pdev->dev, "unexpected MSI 0x%02x\n", msi_idx);
+ }
+
+ advk_writel(pcie, PCIE_ISR0_MSI_INT_PENDING,
+@@ -1286,10 +1552,20 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
+ isr1_mask = advk_readl(pcie, PCIE_ISR1_MASK_REG);
+ isr1_status = isr1_val & ((~isr1_mask) & PCIE_ISR1_ALL_MASK);
+
+- if (!isr0_status && !isr1_status) {
+- advk_writel(pcie, isr0_val, PCIE_ISR0_REG);
+- advk_writel(pcie, isr1_val, PCIE_ISR1_REG);
+- return;
++ /* Process PME interrupt as the first one to do not miss PME requester id */
++ if (isr0_status & PCIE_MSG_PM_PME_MASK)
++ advk_pcie_handle_pme(pcie);
++
++ /* Process ERR interrupt */
++ if (isr0_status & PCIE_ISR0_ERR_MASK) {
++ advk_writel(pcie, PCIE_ISR0_ERR_MASK, PCIE_ISR0_REG);
++
++ /*
++ * Aardvark HW returns zero for PCI_ERR_ROOT_AER_IRQ, so use
++ * PCIe interrupt 0
++ */
++ if (generic_handle_domain_irq(pcie->rp_irq_domain, 0) == -EINVAL)
++ dev_err_ratelimited(&pcie->pdev->dev, "unhandled ERR IRQ\n");
+ }
+
+ /* Process MSI interrupts */
+@@ -1304,7 +1580,9 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
+ advk_writel(pcie, PCIE_ISR1_INTX_ASSERT(i),
+ PCIE_ISR1_REG);
+
+- generic_handle_domain_irq(pcie->irq_domain, i);
++ if (generic_handle_domain_irq(pcie->irq_domain, i) == -EINVAL)
++ dev_err_ratelimited(&pcie->pdev->dev, "unexpected INT%c IRQ\n",
++ (char)i + 'A');
+ }
+ }
+
+@@ -1325,7 +1603,22 @@ static irqreturn_t advk_pcie_irq_handler(int irq, void *arg)
+ return IRQ_HANDLED;
+ }
+
+-static void __maybe_unused advk_pcie_disable_phy(struct advk_pcie *pcie)
++static int advk_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
++{
++ struct advk_pcie *pcie = dev->bus->sysdata;
++
++ /*
++ * Emulated root bridge has its own emulated irq chip and irq domain.
++ * Argument pin is the INTx pin (1=INTA, 2=INTB, 3=INTC, 4=INTD) and
++ * hwirq for irq_create_mapping() is indexed from zero.
++ */
++ if (pci_is_root_bus(dev->bus))
++ return irq_create_mapping(pcie->rp_irq_domain, pin - 1);
++ else
++ return of_irq_parse_and_map_pci(dev, slot, pin);
++}
++
++static void advk_pcie_disable_phy(struct advk_pcie *pcie)
+ {
+ phy_power_off(pcie->phy);
+ phy_exit(pcie->phy);
+@@ -1411,8 +1704,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
+ * only PIO for issuing configuration transfers which does
+ * not use PCIe window configuration.
+ */
+- if (type != IORESOURCE_MEM && type != IORESOURCE_MEM_64 &&
+- type != IORESOURCE_IO)
++ if (type != IORESOURCE_MEM && type != IORESOURCE_IO)
+ continue;
+
+ /*
+@@ -1420,8 +1712,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
+ * configuration is set to transparent memory access so it
+ * does not need window configuration.
+ */
+- if ((type == IORESOURCE_MEM || type == IORESOURCE_MEM_64) &&
+- entry->offset == 0)
++ if (type == IORESOURCE_MEM && entry->offset == 0)
+ continue;
+
+ /*
+@@ -1536,11 +1827,21 @@ static int advk_pcie_probe(struct platform_device *pdev)
+ return ret;
+ }
+
++ ret = advk_pcie_init_rp_irq_domain(pcie);
++ if (ret) {
++ dev_err(dev, "Failed to initialize irq\n");
++ advk_pcie_remove_msi_irq_domain(pcie);
++ advk_pcie_remove_irq_domain(pcie);
++ return ret;
++ }
++
+ bridge->sysdata = pcie;
+ bridge->ops = &advk_pcie_ops;
++ bridge->map_irq = advk_pcie_map_irq;
+
+ ret = pci_host_probe(bridge);
+ if (ret < 0) {
++ advk_pcie_remove_rp_irq_domain(pcie);
+ advk_pcie_remove_msi_irq_domain(pcie);
+ advk_pcie_remove_irq_domain(pcie);
+ return ret;
+@@ -1553,20 +1854,65 @@ static int advk_pcie_remove(struct platform_device *pdev)
+ {
+ struct advk_pcie *pcie = platform_get_drvdata(pdev);
+ struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
++ u32 val;
+ int i;
+
++ /* Remove PCI bus with all devices */
+ pci_lock_rescan_remove();
+ pci_stop_root_bus(bridge->bus);
+ pci_remove_root_bus(bridge->bus);
+ pci_unlock_rescan_remove();
+
++ /* Disable Root Bridge I/O space, memory space and bus mastering */
++ val = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG);
++ val &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
++ advk_writel(pcie, val, PCIE_CORE_CMD_STATUS_REG);
++
++ /* Disable MSI */
++ val = advk_readl(pcie, PCIE_CORE_CTRL2_REG);
++ val &= ~PCIE_CORE_CTRL2_MSI_ENABLE;
++ advk_writel(pcie, val, PCIE_CORE_CTRL2_REG);
++
++ /* Clear MSI address */
++ advk_writel(pcie, 0, PCIE_MSI_ADDR_LOW_REG);
++ advk_writel(pcie, 0, PCIE_MSI_ADDR_HIGH_REG);
++
++ /* Mask all interrupts */
++ advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_MASK_REG);
++ advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_MASK_REG);
++ advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_MASK_REG);
++ advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_MASK_REG);
++
++ /* Clear all interrupts */
++ advk_writel(pcie, PCIE_MSI_ALL_MASK, PCIE_MSI_STATUS_REG);
++ advk_writel(pcie, PCIE_ISR0_ALL_MASK, PCIE_ISR0_REG);
++ advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
++ advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
++
++ /* Remove IRQ domains */
++ advk_pcie_remove_rp_irq_domain(pcie);
+ advk_pcie_remove_msi_irq_domain(pcie);
+ advk_pcie_remove_irq_domain(pcie);
+
++ /* Free config space for emulated root bridge */
++ pci_bridge_emul_cleanup(&pcie->bridge);
++
++ /* Assert PERST# signal which prepares PCIe card for power down */
++ if (pcie->reset_gpio)
++ gpiod_set_value_cansleep(pcie->reset_gpio, 1);
++
++ /* Disable link training */
++ val = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
++ val &= ~LINK_TRAINING_EN;
++ advk_writel(pcie, val, PCIE_CORE_CTRL0_REG);
++
+ /* Disable outbound address windows mapping */
+ for (i = 0; i < OB_WIN_COUNT; i++)
+ advk_pcie_disable_ob_win(pcie, i);
+
++ /* Disable phy */
++ advk_pcie_disable_phy(pcie);
++
+ return 0;
+ }
+
+diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c
+index 88980a44461df..ca8de44045bbe 100644
+--- a/drivers/pci/controller/pci-ftpci100.c
++++ b/drivers/pci/controller/pci-ftpci100.c
+@@ -442,22 +442,12 @@ static int faraday_pci_probe(struct platform_device *pdev)
+ p->dev = dev;
+
+ /* Retrieve and enable optional clocks */
+- clk = devm_clk_get(dev, "PCLK");
++ clk = devm_clk_get_enabled(dev, "PCLK");
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+- ret = clk_prepare_enable(clk);
+- if (ret) {
+- dev_err(dev, "could not prepare PCLK\n");
+- return ret;
+- }
+- p->bus_clk = devm_clk_get(dev, "PCICLK");
++ p->bus_clk = devm_clk_get_enabled(dev, "PCICLK");
+ if (IS_ERR(p->bus_clk))
+ return PTR_ERR(p->bus_clk);
+- ret = clk_prepare_enable(p->bus_clk);
+- if (ret) {
+- dev_err(dev, "could not prepare PCICLK\n");
+- return ret;
+- }
+
+ p->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(p->base))
+diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
+index 67c46e52c0dc3..9b15c0130bbbd 100644
+--- a/drivers/pci/controller/pci-hyperv.c
++++ b/drivers/pci/controller/pci-hyperv.c
+@@ -483,7 +483,10 @@ struct hv_pcibus_device {
+ struct fwnode_handle *fwnode;
+ /* Protocol version negotiated with the host */
+ enum pci_protocol_version_t protocol_version;
++
++ struct mutex state_lock;
+ enum hv_pcibus_state state;
++
+ struct hv_device *hdev;
+ resource_size_t low_mmio_space;
+ resource_size_t high_mmio_space;
+@@ -547,19 +550,10 @@ struct hv_dr_state {
+ struct hv_pcidev_description func[];
+ };
+
+-enum hv_pcichild_state {
+- hv_pcichild_init = 0,
+- hv_pcichild_requirements,
+- hv_pcichild_resourced,
+- hv_pcichild_ejecting,
+- hv_pcichild_maximum
+-};
+-
+ struct hv_pci_dev {
+ /* List protected by pci_rescan_remove_lock */
+ struct list_head list_entry;
+ refcount_t refs;
+- enum hv_pcichild_state state;
+ struct pci_slot *pci_slot;
+ struct hv_pcidev_description desc;
+ bool reported_missing;
+@@ -1142,6 +1136,10 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev,
+ u8 buffer[sizeof(struct pci_delete_interrupt)];
+ } ctxt;
+
++ if (!int_desc->vector_count) {
++ kfree(int_desc);
++ return;
++ }
+ memset(&ctxt, 0, sizeof(ctxt));
+ int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
+ int_pkt->message_type.type =
+@@ -1204,6 +1202,28 @@ static void hv_irq_mask(struct irq_data *data)
+ pci_msi_mask_irq(data);
+ }
+
++static unsigned int hv_msi_get_int_vector(struct irq_data *data)
++{
++ struct irq_cfg *cfg = irqd_cfg(data);
++
++ return cfg->vector;
++}
++
++static int hv_msi_prepare(struct irq_domain *domain, struct device *dev,
++ int nvec, msi_alloc_info_t *info)
++{
++ int ret = pci_msi_prepare(domain, dev, nvec, info);
++
++ /*
++ * By using the interrupt remapper in the hypervisor IOMMU, contiguous
++ * CPU vectors is not needed for multi-MSI
++ */
++ if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI)
++ info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
++
++ return ret;
++}
++
+ /**
+ * hv_irq_unmask() - "Unmask" the IRQ by setting its current
+ * affinity.
+@@ -1219,6 +1239,7 @@ static void hv_irq_unmask(struct irq_data *data)
+ struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
+ struct irq_cfg *cfg = irqd_cfg(data);
+ struct hv_retarget_device_interrupt *params;
++ struct tran_int_desc *int_desc;
+ struct hv_pcibus_device *hbus;
+ struct cpumask *dest;
+ cpumask_var_t tmp;
+@@ -1233,6 +1254,12 @@ static void hv_irq_unmask(struct irq_data *data)
+ pdev = msi_desc_to_pci_dev(msi_desc);
+ pbus = pdev->bus;
+ hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
++ int_desc = data->chip_data;
++ if (!int_desc) {
++ dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
++ __func__, data->irq);
++ return;
++ }
+
+ spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags);
+
+@@ -1240,7 +1267,8 @@ static void hv_irq_unmask(struct irq_data *data)
+ memset(params, 0, sizeof(*params));
+ params->partition_id = HV_PARTITION_ID_SELF;
+ params->int_entry.source = HV_INTERRUPT_SOURCE_MSI;
+- hv_set_msi_entry_from_desc(&params->int_entry.msi_entry, msi_desc);
++ params->int_entry.msi_entry.address.as_uint32 = int_desc->address & 0xffffffff;
++ params->int_entry.msi_entry.data.as_uint32 = int_desc->data;
+ params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
+ (hbus->hdev->dev_instance.b[4] << 16) |
+ (hbus->hdev->dev_instance.b[7] << 8) |
+@@ -1341,12 +1369,12 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
+
+ static u32 hv_compose_msi_req_v1(
+ struct pci_create_interrupt *int_pkt, struct cpumask *affinity,
+- u32 slot, u8 vector)
++ u32 slot, u8 vector, u8 vector_count)
+ {
+ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
+ int_pkt->wslot.slot = slot;
+ int_pkt->int_desc.vector = vector;
+- int_pkt->int_desc.vector_count = 1;
++ int_pkt->int_desc.vector_count = vector_count;
+ int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+
+ /*
+@@ -1369,14 +1397,14 @@ static int hv_compose_msi_req_get_cpu(struct cpumask *affinity)
+
+ static u32 hv_compose_msi_req_v2(
+ struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
+- u32 slot, u8 vector)
++ u32 slot, u8 vector, u8 vector_count)
+ {
+ int cpu;
+
+ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
+ int_pkt->wslot.slot = slot;
+ int_pkt->int_desc.vector = vector;
+- int_pkt->int_desc.vector_count = 1;
++ int_pkt->int_desc.vector_count = vector_count;
+ int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ cpu = hv_compose_msi_req_get_cpu(affinity);
+ int_pkt->int_desc.processor_array[0] =
+@@ -1388,7 +1416,7 @@ static u32 hv_compose_msi_req_v2(
+
+ static u32 hv_compose_msi_req_v3(
+ struct pci_create_interrupt3 *int_pkt, struct cpumask *affinity,
+- u32 slot, u32 vector)
++ u32 slot, u32 vector, u8 vector_count)
+ {
+ int cpu;
+
+@@ -1396,7 +1424,7 @@ static u32 hv_compose_msi_req_v3(
+ int_pkt->wslot.slot = slot;
+ int_pkt->int_desc.vector = vector;
+ int_pkt->int_desc.reserved = 0;
+- int_pkt->int_desc.vector_count = 1;
++ int_pkt->int_desc.vector_count = vector_count;
+ int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ cpu = hv_compose_msi_req_get_cpu(affinity);
+ int_pkt->int_desc.processor_array[0] =
+@@ -1419,7 +1447,6 @@ static u32 hv_compose_msi_req_v3(
+ */
+ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ {
+- struct irq_cfg *cfg = irqd_cfg(data);
+ struct hv_pcibus_device *hbus;
+ struct vmbus_channel *channel;
+ struct hv_pci_dev *hpdev;
+@@ -1428,6 +1455,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ struct cpumask *dest;
+ struct compose_comp_ctxt comp;
+ struct tran_int_desc *int_desc;
++ struct msi_desc *msi_desc;
++ u8 vector, vector_count;
+ struct {
+ struct pci_packet pci_pkt;
+ union {
+@@ -1440,7 +1469,17 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ u32 size;
+ int ret;
+
+- pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
++ /* Reuse the previous allocation */
++ if (data->chip_data) {
++ int_desc = data->chip_data;
++ msg->address_hi = int_desc->address >> 32;
++ msg->address_lo = int_desc->address & 0xffffffff;
++ msg->data = int_desc->data;
++ return;
++ }
++
++ msi_desc = irq_data_get_msi_desc(data);
++ pdev = msi_desc_to_pci_dev(msi_desc);
+ dest = irq_data_get_effective_affinity_mask(data);
+ pbus = pdev->bus;
+ hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
+@@ -1449,17 +1488,40 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ if (!hpdev)
+ goto return_null_message;
+
+- /* Free any previous message that might have already been composed. */
+- if (data->chip_data) {
+- int_desc = data->chip_data;
+- data->chip_data = NULL;
+- hv_int_desc_free(hpdev, int_desc);
+- }
+-
+ int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
+ if (!int_desc)
+ goto drop_reference;
+
++ if (!msi_desc->msi_attrib.is_msix && msi_desc->nvec_used > 1) {
++ /*
++ * If this is not the first MSI of Multi MSI, we already have
++ * a mapping. Can exit early.
++ */
++ if (msi_desc->irq != data->irq) {
++ data->chip_data = int_desc;
++ int_desc->address = msi_desc->msg.address_lo |
++ (u64)msi_desc->msg.address_hi << 32;
++ int_desc->data = msi_desc->msg.data +
++ (data->irq - msi_desc->irq);
++ msg->address_hi = msi_desc->msg.address_hi;
++ msg->address_lo = msi_desc->msg.address_lo;
++ msg->data = int_desc->data;
++ put_pcichild(hpdev);
++ return;
++ }
++ /*
++ * The vector we select here is a dummy value. The correct
++ * value gets sent to the hypervisor in unmask(). This needs
++ * to be aligned with the count, and also not zero. Multi-msi
++ * is powers of 2 up to 32, so 32 will always work here.
++ */
++ vector = 32;
++ vector_count = msi_desc->nvec_used;
++ } else {
++ vector = hv_msi_get_int_vector(data);
++ vector_count = 1;
++ }
++
+ memset(&ctxt, 0, sizeof(ctxt));
+ init_completion(&comp.comp_pkt.host_event);
+ ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
+@@ -1470,7 +1532,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
+ dest,
+ hpdev->desc.win_slot.slot,
+- cfg->vector);
++ vector,
++ vector_count);
+ break;
+
+ case PCI_PROTOCOL_VERSION_1_2:
+@@ -1478,14 +1541,16 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
+ dest,
+ hpdev->desc.win_slot.slot,
+- cfg->vector);
++ vector,
++ vector_count);
+ break;
+
+ case PCI_PROTOCOL_VERSION_1_4:
+ size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
+ dest,
+ hpdev->desc.win_slot.slot,
+- cfg->vector);
++ vector,
++ vector_count);
+ break;
+
+ default:
+@@ -1544,12 +1609,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ hv_pci_onchannelcallback(hbus);
+ spin_unlock_irqrestore(&channel->sched_lock, flags);
+
+- if (hpdev->state == hv_pcichild_ejecting) {
+- dev_err_once(&hbus->hdev->device,
+- "the device is being ejected\n");
+- goto enable_tasklet;
+- }
+-
+ udelay(100);
+ }
+
+@@ -1601,7 +1660,7 @@ static struct irq_chip hv_msi_irq_chip = {
+ };
+
+ static struct msi_domain_ops hv_msi_ops = {
+- .msi_prepare = pci_msi_prepare,
++ .msi_prepare = hv_msi_prepare,
+ .msi_free = hv_msi_free,
+ };
+
+@@ -1899,8 +1958,17 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
+ if (!hv_dev)
+ continue;
+
+- if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY)
+- set_dev_node(&dev->dev, hv_dev->desc.virtual_numa_node);
++ if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY &&
++ hv_dev->desc.virtual_numa_node < num_possible_nodes())
++ /*
++ * The kernel may boot with some NUMA nodes offline
++ * (e.g. in a KDUMP kernel) or with NUMA disabled via
++ * "numa=off". In those cases, adjust the host provided
++ * NUMA node to a valid NUMA node used by the kernel.
++ */
++ set_dev_node(&dev->dev,
++ numa_map_to_online_node(
++ hv_dev->desc.virtual_numa_node));
+
+ put_pcichild(hv_dev);
+ }
+@@ -2126,6 +2194,8 @@ static void pci_devices_present_work(struct work_struct *work)
+ if (!dr)
+ return;
+
++ mutex_lock(&hbus->state_lock);
++
+ /* First, mark all existing children as reported missing. */
+ spin_lock_irqsave(&hbus->device_list_lock, flags);
+ list_for_each_entry(hpdev, &hbus->children, list_entry) {
+@@ -2207,6 +2277,8 @@ static void pci_devices_present_work(struct work_struct *work)
+ break;
+ }
+
++ mutex_unlock(&hbus->state_lock);
++
+ kfree(dr);
+ }
+
+@@ -2355,7 +2427,7 @@ static void hv_eject_device_work(struct work_struct *work)
+ hpdev = container_of(work, struct hv_pci_dev, wrk);
+ hbus = hpdev->hbus;
+
+- WARN_ON(hpdev->state != hv_pcichild_ejecting);
++ mutex_lock(&hbus->state_lock);
+
+ /*
+ * Ejection can come before or after the PCI bus has been set up, so
+@@ -2393,6 +2465,8 @@ static void hv_eject_device_work(struct work_struct *work)
+ put_pcichild(hpdev);
+ put_pcichild(hpdev);
+ /* hpdev has been freed. Do not use it any more. */
++
++ mutex_unlock(&hbus->state_lock);
+ }
+
+ /**
+@@ -2413,7 +2487,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
+ return;
+ }
+
+- hpdev->state = hv_pcichild_ejecting;
+ get_pcichild(hpdev);
+ INIT_WORK(&hpdev->wrk, hv_eject_device_work);
+ queue_work(hbus->wq, &hpdev->wrk);
+@@ -2814,8 +2887,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
+ struct pci_bus_d0_entry *d0_entry;
+ struct hv_pci_compl comp_pkt;
+ struct pci_packet *pkt;
++ bool retry = true;
+ int ret;
+
++enter_d0_retry:
+ /*
+ * Tell the host that the bus is ready to use, and moved into the
+ * powered-on state. This includes telling the host which region
+@@ -2842,6 +2917,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
+ if (ret)
+ goto exit;
+
++ /*
++ * In certain case (Kdump) the pci device of interest was
++ * not cleanly shut down and resource is still held on host
++ * side, the host could return invalid device status.
++ * We need to explicitly request host to release the resource
++ * and try to enter D0 again.
++ */
++ if (comp_pkt.completion_status < 0 && retry) {
++ retry = false;
++
++ dev_err(&hdev->device, "Retrying D0 Entry\n");
++
++ /*
++ * Hv_pci_bus_exit() calls hv_send_resource_released()
++ * to free up resources of its child devices.
++ * In the kdump kernel we need to set the
++ * wslot_res_allocated to 255 so it scans all child
++ * devices to release resources allocated in the
++ * normal kernel before panic happened.
++ */
++ hbus->wslot_res_allocated = 255;
++
++ ret = hv_pci_bus_exit(hdev, true);
++
++ if (ret == 0) {
++ kfree(pkt);
++ goto enter_d0_retry;
++ }
++ dev_err(&hdev->device,
++ "Retrying D0 failed with ret %d\n", ret);
++ }
++
+ if (comp_pkt.completion_status < 0) {
+ dev_err(&hdev->device,
+ "PCI Pass-through VSP failed D0 Entry with status %x\n",
+@@ -2884,6 +2991,24 @@ static int hv_pci_query_relations(struct hv_device *hdev)
+ if (!ret)
+ ret = wait_for_response(hdev, &comp);
+
++ /*
++ * In the case of fast device addition/removal, it's possible that
++ * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
++ * already got a PCI_BUS_RELATIONS* message from the host and the
++ * channel callback already scheduled a work to hbus->wq, which can be
++ * running pci_devices_present_work() -> survey_child_resources() ->
++ * complete(&hbus->survey_event), even after hv_pci_query_relations()
++ * exits and the stack variable 'comp' is no longer valid; as a result,
++ * a hang or a page fault may happen when the complete() calls
++ * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
++ * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
++ * -ENODEV, there can't be any more work item scheduled to hbus->wq
++ * after the flush_workqueue(): see vmbus_onoffer_rescind() ->
++ * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
++ * channel->rescind = true.
++ */
++ flush_workqueue(hbus->wq);
++
+ return ret;
+ }
+
+@@ -3069,7 +3194,6 @@ static int hv_pci_probe(struct hv_device *hdev,
+ struct hv_pcibus_device *hbus;
+ u16 dom_req, dom;
+ char *name;
+- bool enter_d0_retry = true;
+ int ret;
+
+ /*
+@@ -3105,6 +3229,7 @@ static int hv_pci_probe(struct hv_device *hdev,
+ return -ENOMEM;
+
+ hbus->bridge = bridge;
++ mutex_init(&hbus->state_lock);
+ hbus->state = hv_pcibus_init;
+ hbus->wslot_res_allocated = -1;
+
+@@ -3139,6 +3264,15 @@ static int hv_pci_probe(struct hv_device *hdev,
+ hbus->bridge->domain_nr = dom;
+ #ifdef CONFIG_X86
+ hbus->sysdata.domain = dom;
++#elif defined(CONFIG_ARM64)
++ /*
++ * Set the PCI bus parent to be the corresponding VMbus
++ * device. Then the VMbus device will be assigned as the
++ * ACPI companion in pcibios_root_bridge_prepare() and
++ * pci_dma_configure() will propagate device coherence
++ * information to devices created on the bus.
++ */
++ hbus->sysdata.parent = hdev->device.parent;
+ #endif
+
+ hbus->hdev = hdev;
+@@ -3196,49 +3330,15 @@ static int hv_pci_probe(struct hv_device *hdev,
+ if (ret)
+ goto free_fwnode;
+
+-retry:
+ ret = hv_pci_query_relations(hdev);
+ if (ret)
+ goto free_irq_domain;
+
+- ret = hv_pci_enter_d0(hdev);
+- /*
+- * In certain case (Kdump) the pci device of interest was
+- * not cleanly shut down and resource is still held on host
+- * side, the host could return invalid device status.
+- * We need to explicitly request host to release the resource
+- * and try to enter D0 again.
+- * Since the hv_pci_bus_exit() call releases structures
+- * of all its child devices, we need to start the retry from
+- * hv_pci_query_relations() call, requesting host to send
+- * the synchronous child device relations message before this
+- * information is needed in hv_send_resources_allocated()
+- * call later.
+- */
+- if (ret == -EPROTO && enter_d0_retry) {
+- enter_d0_retry = false;
+-
+- dev_err(&hdev->device, "Retrying D0 Entry\n");
+-
+- /*
+- * Hv_pci_bus_exit() calls hv_send_resources_released()
+- * to free up resources of its child devices.
+- * In the kdump kernel we need to set the
+- * wslot_res_allocated to 255 so it scans all child
+- * devices to release resources allocated in the
+- * normal kernel before panic happened.
+- */
+- hbus->wslot_res_allocated = 255;
+- ret = hv_pci_bus_exit(hdev, true);
++ mutex_lock(&hbus->state_lock);
+
+- if (ret == 0)
+- goto retry;
+-
+- dev_err(&hdev->device,
+- "Retrying D0 failed with ret %d\n", ret);
+- }
++ ret = hv_pci_enter_d0(hdev);
+ if (ret)
+- goto free_irq_domain;
++ goto release_state_lock;
+
+ ret = hv_pci_allocate_bridge_windows(hbus);
+ if (ret)
+@@ -3256,12 +3356,15 @@ retry:
+ if (ret)
+ goto free_windows;
+
++ mutex_unlock(&hbus->state_lock);
+ return 0;
+
+ free_windows:
+ hv_pci_free_bridge_windows(hbus);
+ exit_d0:
+ (void) hv_pci_bus_exit(hdev, true);
++release_state_lock:
++ mutex_unlock(&hbus->state_lock);
+ free_irq_domain:
+ irq_domain_remove(hbus->irq_domain);
+ free_fwnode:
+@@ -3494,20 +3597,26 @@ static int hv_pci_resume(struct hv_device *hdev)
+ if (ret)
+ goto out;
+
++ mutex_lock(&hbus->state_lock);
++
+ ret = hv_pci_enter_d0(hdev);
+ if (ret)
+- goto out;
++ goto release_state_lock;
+
+ ret = hv_send_resources_allocated(hdev);
+ if (ret)
+- goto out;
++ goto release_state_lock;
+
+ prepopulate_bars(hbus);
+
+ hv_pci_restore_msi_state(hbus);
+
+ hbus->state = hv_pcibus_installed;
++ mutex_unlock(&hbus->state_lock);
+ return 0;
++
++release_state_lock:
++ mutex_unlock(&hbus->state_lock);
+ out:
+ vmbus_close(hdev->channel);
+ return ret;
+diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c
+index 48169b1e38171..e73e18a73833b 100644
+--- a/drivers/pci/controller/pci-loongson.c
++++ b/drivers/pci/controller/pci-loongson.c
+@@ -13,9 +13,14 @@
+ #include "../pci.h"
+
+ /* Device IDs */
+-#define DEV_PCIE_PORT_0 0x7a09
+-#define DEV_PCIE_PORT_1 0x7a19
+-#define DEV_PCIE_PORT_2 0x7a29
++#define DEV_LS2K_PCIE_PORT0 0x1a05
++#define DEV_LS7A_PCIE_PORT0 0x7a09
++#define DEV_LS7A_PCIE_PORT1 0x7a19
++#define DEV_LS7A_PCIE_PORT2 0x7a29
++#define DEV_LS7A_PCIE_PORT3 0x7a39
++#define DEV_LS7A_PCIE_PORT4 0x7a49
++#define DEV_LS7A_PCIE_PORT5 0x7a59
++#define DEV_LS7A_PCIE_PORT6 0x7a69
+
+ #define DEV_LS2K_APB 0x7a02
+ #define DEV_LS7A_CONF 0x7a10
+@@ -38,11 +43,11 @@ static void bridge_class_quirk(struct pci_dev *dev)
+ dev->class = PCI_CLASS_BRIDGE_PCI << 8;
+ }
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+- DEV_PCIE_PORT_0, bridge_class_quirk);
++ DEV_LS7A_PCIE_PORT0, bridge_class_quirk);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+- DEV_PCIE_PORT_1, bridge_class_quirk);
++ DEV_LS7A_PCIE_PORT1, bridge_class_quirk);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+- DEV_PCIE_PORT_2, bridge_class_quirk);
++ DEV_LS7A_PCIE_PORT2, bridge_class_quirk);
+
+ static void system_bus_quirk(struct pci_dev *pdev)
+ {
+@@ -60,37 +65,33 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
+ DEV_LS7A_LPC, system_bus_quirk);
+
+-static void loongson_mrrs_quirk(struct pci_dev *dev)
++static void loongson_mrrs_quirk(struct pci_dev *pdev)
+ {
+- struct pci_bus *bus = dev->bus;
+- struct pci_dev *bridge;
+- static const struct pci_device_id bridge_devids[] = {
+- { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_0) },
+- { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_1) },
+- { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_2) },
+- { 0, },
+- };
+-
+- /* look for the matching bridge */
+- while (!pci_is_root_bus(bus)) {
+- bridge = bus->self;
+- bus = bus->parent;
+- /*
+- * Some Loongson PCIe ports have a h/w limitation of
+- * 256 bytes maximum read request size. They can't handle
+- * anything larger than this. So force this limit on
+- * any devices attached under these ports.
+- */
+- if (pci_match_id(bridge_devids, bridge)) {
+- if (pcie_get_readrq(dev) > 256) {
+- pci_info(dev, "limiting MRRS to 256\n");
+- pcie_set_readrq(dev, 256);
+- }
+- break;
+- }
+- }
++ /*
++ * Some Loongson PCIe ports have h/w limitations of maximum read
++ * request size. They can't handle anything larger than this. So
++ * force this limit on any devices attached under these ports.
++ */
++ struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus);
++
++ bridge->no_inc_mrrs = 1;
+ }
+-DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS2K_PCIE_PORT0, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS7A_PCIE_PORT0, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS7A_PCIE_PORT1, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS7A_PCIE_PORT2, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS7A_PCIE_PORT3, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS7A_PCIE_PORT4, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS7A_PCIE_PORT5, loongson_mrrs_quirk);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON,
++ DEV_LS7A_PCIE_PORT6, loongson_mrrs_quirk);
+
+ static void __iomem *cfg1_map(struct loongson_pci *priv, int bus,
+ unsigned int devfn, int where)
+diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
+index ed13e81cd691d..2a3bf82aa4e26 100644
+--- a/drivers/pci/controller/pci-mvebu.c
++++ b/drivers/pci/controller/pci-mvebu.c
+@@ -51,10 +51,14 @@
+ PCIE_CONF_FUNC(PCI_FUNC(devfn)) | PCIE_CONF_REG(where) | \
+ PCIE_CONF_ADDR_EN)
+ #define PCIE_CONF_DATA_OFF 0x18fc
++#define PCIE_INT_CAUSE_OFF 0x1900
++#define PCIE_INT_PM_PME BIT(28)
+ #define PCIE_MASK_OFF 0x1910
+ #define PCIE_MASK_ENABLE_INTS 0x0f000000
+ #define PCIE_CTRL_OFF 0x1a00
+ #define PCIE_CTRL_X1_MODE 0x0001
++#define PCIE_CTRL_RC_MODE BIT(1)
++#define PCIE_CTRL_MASTER_HOT_RESET BIT(24)
+ #define PCIE_STAT_OFF 0x1a04
+ #define PCIE_STAT_BUS 0xff00
+ #define PCIE_STAT_DEV 0x1f0000
+@@ -125,6 +129,11 @@ static bool mvebu_pcie_link_up(struct mvebu_pcie_port *port)
+ return !(mvebu_readl(port, PCIE_STAT_OFF) & PCIE_STAT_LINK_DOWN);
+ }
+
++static u8 mvebu_pcie_get_local_bus_nr(struct mvebu_pcie_port *port)
++{
++ return (mvebu_readl(port, PCIE_STAT_OFF) & PCIE_STAT_BUS) >> 8;
++}
++
+ static void mvebu_pcie_set_local_bus_nr(struct mvebu_pcie_port *port, int nr)
+ {
+ u32 stat;
+@@ -213,18 +222,21 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
+
+ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
+ {
+- u32 cmd, mask;
++ u32 ctrl, cmd, mask;
+
+- /* Point PCIe unit MBUS decode windows to DRAM space. */
+- mvebu_pcie_setup_wins(port);
++ /* Setup PCIe controller to Root Complex mode. */
++ ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
++ ctrl |= PCIE_CTRL_RC_MODE;
++ mvebu_writel(port, ctrl, PCIE_CTRL_OFF);
+
+- /* Master + slave enable. */
++ /* Disable Root Bridge I/O space, memory space and bus mastering. */
+ cmd = mvebu_readl(port, PCIE_CMD_OFF);
+- cmd |= PCI_COMMAND_IO;
+- cmd |= PCI_COMMAND_MEMORY;
+- cmd |= PCI_COMMAND_MASTER;
++ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+ mvebu_writel(port, cmd, PCIE_CMD_OFF);
+
++ /* Point PCIe unit MBUS decode windows to DRAM space. */
++ mvebu_pcie_setup_wins(port);
++
+ /* Enable interrupt lines A-D. */
+ mask = mvebu_readl(port, PCIE_MASK_OFF);
+ mask |= PCIE_MASK_ENABLE_INTS;
+@@ -371,8 +383,7 @@ static void mvebu_pcie_handle_iobase_change(struct mvebu_pcie_port *port)
+
+ /* Are the new iobase/iolimit values invalid? */
+ if (conf->iolimit < conf->iobase ||
+- conf->iolimitupper < conf->iobaseupper ||
+- !(conf->command & PCI_COMMAND_IO)) {
++ conf->iolimitupper < conf->iobaseupper) {
+ mvebu_pcie_set_window(port, port->io_target, port->io_attr,
+ &desired, &port->iowin);
+ return;
+@@ -409,8 +420,7 @@ static void mvebu_pcie_handle_membase_change(struct mvebu_pcie_port *port)
+ struct pci_bridge_emul_conf *conf = &port->bridge.conf;
+
+ /* Are the new membase/memlimit values invalid? */
+- if (conf->memlimit < conf->membase ||
+- !(conf->command & PCI_COMMAND_MEMORY)) {
++ if (conf->memlimit < conf->membase) {
+ mvebu_pcie_set_window(port, port->mem_target, port->mem_attr,
+ &desired, &port->memwin);
+ return;
+@@ -430,6 +440,54 @@ static void mvebu_pcie_handle_membase_change(struct mvebu_pcie_port *port)
+ &port->memwin);
+ }
+
++static pci_bridge_emul_read_status_t
++mvebu_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge,
++ int reg, u32 *value)
++{
++ struct mvebu_pcie_port *port = bridge->data;
++
++ switch (reg) {
++ case PCI_COMMAND:
++ *value = mvebu_readl(port, PCIE_CMD_OFF);
++ break;
++
++ case PCI_PRIMARY_BUS: {
++ /*
++ * From the whole 32bit register we support reading from HW only
++ * secondary bus number which is mvebu local bus number.
++ * Other bits are retrieved only from emulated config buffer.
++ */
++ __le32 *cfgspace = (__le32 *)&bridge->conf;
++ u32 val = le32_to_cpu(cfgspace[PCI_PRIMARY_BUS / 4]);
++ val &= ~0xff00;
++ val |= mvebu_pcie_get_local_bus_nr(port) << 8;
++ *value = val;
++ break;
++ }
++
++ case PCI_INTERRUPT_LINE: {
++ /*
++ * From the whole 32bit register we support reading from HW only
++ * one bit: PCI_BRIDGE_CTL_BUS_RESET.
++ * Other bits are retrieved only from emulated config buffer.
++ */
++ __le32 *cfgspace = (__le32 *)&bridge->conf;
++ u32 val = le32_to_cpu(cfgspace[PCI_INTERRUPT_LINE / 4]);
++ if (mvebu_readl(port, PCIE_CTRL_OFF) & PCIE_CTRL_MASTER_HOT_RESET)
++ val |= PCI_BRIDGE_CTL_BUS_RESET << 16;
++ else
++ val &= ~(PCI_BRIDGE_CTL_BUS_RESET << 16);
++ *value = val;
++ break;
++ }
++
++ default:
++ return PCI_BRIDGE_EMUL_NOT_HANDLED;
++ }
++
++ return PCI_BRIDGE_EMUL_HANDLED;
++}
++
+ static pci_bridge_emul_read_status_t
+ mvebu_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
+ int reg, u32 *value)
+@@ -442,9 +500,7 @@ mvebu_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
+ break;
+
+ case PCI_EXP_DEVCTL:
+- *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL) &
+- ~(PCI_EXP_DEVCTL_URRE | PCI_EXP_DEVCTL_FERE |
+- PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_CERE);
++ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL);
+ break;
+
+ case PCI_EXP_LNKCAP:
+@@ -468,6 +524,18 @@ mvebu_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
+ *value = mvebu_readl(port, PCIE_RC_RTSTA);
+ break;
+
++ case PCI_EXP_DEVCAP2:
++ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCAP2);
++ break;
++
++ case PCI_EXP_DEVCTL2:
++ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL2);
++ break;
++
++ case PCI_EXP_LNKCTL2:
++ *value = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL2);
++ break;
++
+ default:
+ return PCI_BRIDGE_EMUL_NOT_HANDLED;
+ }
+@@ -484,26 +552,16 @@ mvebu_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge,
+
+ switch (reg) {
+ case PCI_COMMAND:
+- {
+- if (!mvebu_has_ioport(port))
+- conf->command &= ~PCI_COMMAND_IO;
+-
+- if ((old ^ new) & PCI_COMMAND_IO)
+- mvebu_pcie_handle_iobase_change(port);
+- if ((old ^ new) & PCI_COMMAND_MEMORY)
+- mvebu_pcie_handle_membase_change(port);
++ if (!mvebu_has_ioport(port)) {
++ conf->command = cpu_to_le16(
++ le16_to_cpu(conf->command) & ~PCI_COMMAND_IO);
++ new &= ~PCI_COMMAND_IO;
++ }
+
++ mvebu_writel(port, new, PCIE_CMD_OFF);
+ break;
+- }
+
+ case PCI_IO_BASE:
+- /*
+- * We keep bit 1 set, it is a read-only bit that
+- * indicates we support 32 bits addressing for the
+- * I/O
+- */
+- conf->iobase |= PCI_IO_RANGE_TYPE_32;
+- conf->iolimit |= PCI_IO_RANGE_TYPE_32;
+ mvebu_pcie_handle_iobase_change(port);
+ break;
+
+@@ -516,7 +574,19 @@ mvebu_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge,
+ break;
+
+ case PCI_PRIMARY_BUS:
+- mvebu_pcie_set_local_bus_nr(port, conf->secondary_bus);
++ if (mask & 0xff00)
++ mvebu_pcie_set_local_bus_nr(port, conf->secondary_bus);
++ break;
++
++ case PCI_INTERRUPT_LINE:
++ if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) {
++ u32 ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
++ if (new & (PCI_BRIDGE_CTL_BUS_RESET << 16))
++ ctrl |= PCIE_CTRL_MASTER_HOT_RESET;
++ else
++ ctrl &= ~PCIE_CTRL_MASTER_HOT_RESET;
++ mvebu_writel(port, ctrl, PCIE_CTRL_OFF);
++ }
+ break;
+
+ default:
+@@ -532,13 +602,6 @@ mvebu_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
+
+ switch (reg) {
+ case PCI_EXP_DEVCTL:
+- /*
+- * Armada370 data says these bits must always
+- * be zero when in root complex mode.
+- */
+- new &= ~(PCI_EXP_DEVCTL_URRE | PCI_EXP_DEVCTL_FERE |
+- PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_CERE);
+-
+ mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL);
+ break;
+
+@@ -555,12 +618,31 @@ mvebu_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge,
+ break;
+
+ case PCI_EXP_RTSTA:
+- mvebu_writel(port, new, PCIE_RC_RTSTA);
++ /*
++ * PME Status bit in Root Status Register (PCIE_RC_RTSTA)
++ * is read-only and can be cleared only by writing 0b to the
++ * Interrupt Cause RW0C register (PCIE_INT_CAUSE_OFF). So
++ * clear PME via Interrupt Cause.
++ */
++ if (new & PCI_EXP_RTSTA_PME)
++ mvebu_writel(port, ~PCIE_INT_PM_PME, PCIE_INT_CAUSE_OFF);
++ break;
++
++ case PCI_EXP_DEVCTL2:
++ mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_DEVCTL2);
++ break;
++
++ case PCI_EXP_LNKCTL2:
++ mvebu_writel(port, new, PCIE_CAP_PCIEXP + PCI_EXP_LNKCTL2);
++ break;
++
++ default:
+ break;
+ }
+ }
+
+ static struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = {
++ .read_base = mvebu_pci_bridge_emul_base_conf_read,
+ .write_base = mvebu_pci_bridge_emul_base_conf_write,
+ .read_pcie = mvebu_pci_bridge_emul_pcie_conf_read,
+ .write_pcie = mvebu_pci_bridge_emul_pcie_conf_write,
+@@ -570,9 +652,11 @@ static struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = {
+ * Initialize the configuration space of the PCI-to-PCI bridge
+ * associated with the given PCIe interface.
+ */
+-static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
++static int mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
+ {
+ struct pci_bridge_emul *bridge = &port->bridge;
++ u32 pcie_cap = mvebu_readl(port, PCIE_CAP_PCIEXP);
++ u8 pcie_cap_ver = ((pcie_cap >> 16) & PCI_EXP_FLAGS_VERS);
+
+ bridge->conf.vendor = PCI_VENDOR_ID_MARVELL;
+ bridge->conf.device = mvebu_readl(port, PCIE_DEV_ID_OFF) >> 16;
+@@ -585,11 +669,17 @@ static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
+ bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32;
+ }
+
++ /*
++ * Older mvebu hardware provides PCIe Capability structure only in
++ * version 1. New hardware provides it in version 2.
++ */
++ bridge->pcie_conf.cap = cpu_to_le16(pcie_cap_ver);
++
+ bridge->has_pcie = true;
+ bridge->data = port;
+ bridge->ops = &mvebu_pci_bridge_emul_ops;
+
+- pci_bridge_emul_init(bridge, PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR);
++ return pci_bridge_emul_init(bridge, PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR);
+ }
+
+ static inline struct mvebu_pcie *sys_to_pcie(struct pci_sys_data *sys)
+@@ -1112,9 +1202,94 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
+ continue;
+ }
+
++ ret = mvebu_pci_bridge_emul_init(port);
++ if (ret < 0) {
++ dev_err(dev, "%s: cannot init emulated bridge\n",
++ port->name);
++ devm_iounmap(dev, port->base);
++ port->base = NULL;
++ mvebu_pcie_powerdown(port);
++ continue;
++ }
++
++ /*
++ * PCIe topology exported by mvebu hw is quite complicated. In
++ * reality has something like N fully independent host bridges
++ * where each host bridge has one PCIe Root Port (which acts as
++ * PCI Bridge device). Each host bridge has its own independent
++ * internal registers, independent access to PCI config space,
++ * independent interrupt lines, independent window and memory
++ * access configuration. But additionally there is some kind of
++ * peer-to-peer support between PCIe devices behind different
++ * host bridges limited just to forwarding of memory and I/O
++ * transactions (forwarding of error messages and config cycles
++ * is not supported). So we could say there are N independent
++ * PCIe Root Complexes.
++ *
++ * For this kind of setup DT should have been structured into
++ * N independent PCIe controllers / host bridges. But instead
++ * structure in past was defined to put PCIe Root Ports of all
++ * host bridges into one bus zero, like in classic multi-port
++ * Root Complex setup with just one host bridge.
++ *
++ * This means that pci-mvebu.c driver provides "virtual" bus 0
++ * on which registers all PCIe Root Ports (PCI Bridge devices)
++ * specified in DT by their BDF addresses and virtually routes
++ * PCI config access of each PCI bridge device to specific PCIe
++ * host bridge.
++ *
++ * Normally PCI Bridge should choose between Type 0 and Type 1
++ * config requests based on primary and secondary bus numbers
++ * configured on the bridge itself. But because mvebu PCI Bridge
++ * does not have registers for primary and secondary bus numbers
++ * in its config space, it determinates type of config requests
++ * via its own custom way.
++ *
++ * There are two options how mvebu determinate type of config
++ * request.
++ *
++ * 1. If Secondary Bus Number Enable bit is not set or is not
++ * available (applies for pre-XP PCIe controllers) then Type 0
++ * is used if target bus number equals Local Bus Number (bits
++ * [15:8] in register 0x1a04) and target device number differs
++ * from Local Device Number (bits [20:16] in register 0x1a04).
++ * Type 1 is used if target bus number differs from Local Bus
++ * Number. And when target bus number equals Local Bus Number
++ * and target device equals Local Device Number then request is
++ * routed to Local PCI Bridge (PCIe Root Port).
++ *
++ * 2. If Secondary Bus Number Enable bit is set (bit 7 in
++ * register 0x1a2c) then mvebu hw determinate type of config
++ * request like compliant PCI Bridge based on primary bus number
++ * which is configured via Local Bus Number (bits [15:8] in
++ * register 0x1a04) and secondary bus number which is configured
++ * via Secondary Bus Number (bits [7:0] in register 0x1a2c).
++ * Local PCI Bridge (PCIe Root Port) is available on primary bus
++ * as device with Local Device Number (bits [20:16] in register
++ * 0x1a04).
++ *
++ * Secondary Bus Number Enable bit is disabled by default and
++ * option 2. is not available on pre-XP PCIe controllers. Hence
++ * this driver always use option 1.
++ *
++ * Basically it means that primary and secondary buses shares
++ * one virtual number configured via Local Bus Number bits and
++ * Local Device Number bits determinates if accessing primary
++ * or secondary bus. Set Local Device Number to 1 and redirect
++ * all writes of PCI Bridge Secondary Bus Number register to
++ * Local Bus Number (bits [15:8] in register 0x1a04).
++ *
++ * So when accessing devices on buses behind secondary bus
++ * number it would work correctly. And also when accessing
++ * device 0 at secondary bus number via config space would be
++ * correctly routed to secondary bus. Due to issues described
++ * in mvebu_pcie_setup_hw(), PCI Bridges at primary bus (zero)
++ * are not accessed directly via PCI config space but rarher
++ * indirectly via kernel emulated PCI bridge driver.
++ */
+ mvebu_pcie_setup_hw(port);
+ mvebu_pcie_set_local_dev_nr(port, 1);
+- mvebu_pci_bridge_emul_init(port);
++ mvebu_pcie_set_local_bus_nr(port, 0);
+ }
+
+ pcie->nports = i;
+diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c
+index e64536047b651..4641e57487cfd 100644
+--- a/drivers/pci/controller/pci-xgene.c
++++ b/drivers/pci/controller/pci-xgene.c
+@@ -480,28 +480,27 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size)
+ }
+
+ static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port,
+- struct resource_entry *entry,
+- u8 *ib_reg_mask)
++ struct of_pci_range *range, u8 *ib_reg_mask)
+ {
+ void __iomem *cfg_base = port->cfg_base;
+ struct device *dev = port->dev;
+ void __iomem *bar_addr;
+ u32 pim_reg;
+- u64 cpu_addr = entry->res->start;
+- u64 pci_addr = cpu_addr - entry->offset;
+- u64 size = resource_size(entry->res);
++ u64 cpu_addr = range->cpu_addr;
++ u64 pci_addr = range->pci_addr;
++ u64 size = range->size;
+ u64 mask = ~(size - 1) | EN_REG;
+ u32 flags = PCI_BASE_ADDRESS_MEM_TYPE_64;
+ u32 bar_low;
+ int region;
+
+- region = xgene_pcie_select_ib_reg(ib_reg_mask, size);
++ region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size);
+ if (region < 0) {
+ dev_warn(dev, "invalid pcie dma-range config\n");
+ return;
+ }
+
+- if (entry->res->flags & IORESOURCE_PREFETCH)
++ if (range->flags & IORESOURCE_PREFETCH)
+ flags |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ bar_low = pcie_bar_low_val((u32)cpu_addr, flags);
+@@ -532,13 +531,25 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port,
+
+ static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie_port *port)
+ {
+- struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port);
+- struct resource_entry *entry;
++ struct device_node *np = port->node;
++ struct of_pci_range range;
++ struct of_pci_range_parser parser;
++ struct device *dev = port->dev;
+ u8 ib_reg_mask = 0;
+
+- resource_list_for_each_entry(entry, &bridge->dma_ranges)
+- xgene_pcie_setup_ib_reg(port, entry, &ib_reg_mask);
++ if (of_pci_dma_range_parser_init(&parser, np)) {
++ dev_err(dev, "missing dma-ranges property\n");
++ return -EINVAL;
++ }
++
++ /* Get the dma-ranges from DT */
++ for_each_of_pci_range(&parser, &range) {
++ u64 end = range.cpu_addr + range.size - 1;
+
++ dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n",
++ range.flags, range.cpu_addr, end, range.pci_addr);
++ xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask);
++ }
+ return 0;
+ }
+
+diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
+index 17c59b0d6978b..36c8702439e95 100644
+--- a/drivers/pci/controller/pcie-mediatek-gen3.c
++++ b/drivers/pci/controller/pcie-mediatek-gen3.c
+@@ -79,6 +79,9 @@
+ #define PCIE_ICMD_PM_REG 0x198
+ #define PCIE_TURN_OFF_LINK BIT(4)
+
++#define PCIE_MISC_CTRL_REG 0x348
++#define PCIE_DISABLE_DVFSRC_VLT_REQ BIT(1)
++
+ #define PCIE_TRANS_TABLE_BASE_REG 0x800
+ #define PCIE_ATR_SRC_ADDR_MSB_OFFSET 0x4
+ #define PCIE_ATR_TRSL_ADDR_LSB_OFFSET 0x8
+@@ -297,6 +300,11 @@ static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
+ val &= ~PCIE_INTX_ENABLE;
+ writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
+
++ /* Disable DVFSRC voltage request */
++ val = readl_relaxed(port->base + PCIE_MISC_CTRL_REG);
++ val |= PCIE_DISABLE_DVFSRC_VLT_REQ;
++ writel_relaxed(val, port->base + PCIE_MISC_CTRL_REG);
++
+ /* Assert all reset signals */
+ val = readl_relaxed(port->base + PCIE_RST_CTRL_REG);
+ val |= PCIE_MAC_RSTB | PCIE_PHY_RSTB | PCIE_BRG_RSTB | PCIE_PE_RSTB;
+@@ -592,7 +600,8 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
+ &intx_domain_ops, port);
+ if (!port->intx_domain) {
+ dev_err(dev, "failed to create INTx IRQ domain\n");
+- return -ENODEV;
++ ret = -ENODEV;
++ goto out_put_node;
+ }
+
+ /* Setup MSI */
+@@ -615,6 +624,7 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
+ goto err_msi_domain;
+ }
+
++ of_node_put(intc_node);
+ return 0;
+
+ err_msi_domain:
+@@ -622,6 +632,8 @@ err_msi_domain:
+ err_msi_bottom_domain:
+ irq_domain_remove(port->intx_domain);
+
++out_put_node:
++ of_node_put(intc_node);
+ return ret;
+ }
+
+diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c
+index 2f3f974977a36..5273cb5ede0fd 100644
+--- a/drivers/pci/controller/pcie-mediatek.c
++++ b/drivers/pci/controller/pcie-mediatek.c
+@@ -1008,6 +1008,7 @@ static int mtk_pcie_subsys_powerup(struct mtk_pcie *pcie)
+ "mediatek,generic-pciecfg");
+ if (cfg_node) {
+ pcie->cfg = syscon_node_to_regmap(cfg_node);
++ of_node_put(cfg_node);
+ if (IS_ERR(pcie->cfg))
+ return PTR_ERR(pcie->cfg);
+ }
+diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c
+index 329f930d17aa7..8eb049c839ca7 100644
+--- a/drivers/pci/controller/pcie-microchip-host.c
++++ b/drivers/pci/controller/pcie-microchip-host.c
+@@ -167,12 +167,12 @@
+ #define EVENT_PCIE_DLUP_EXIT 2
+ #define EVENT_SEC_TX_RAM_SEC_ERR 3
+ #define EVENT_SEC_RX_RAM_SEC_ERR 4
+-#define EVENT_SEC_AXI2PCIE_RAM_SEC_ERR 5
+-#define EVENT_SEC_PCIE2AXI_RAM_SEC_ERR 6
++#define EVENT_SEC_PCIE2AXI_RAM_SEC_ERR 5
++#define EVENT_SEC_AXI2PCIE_RAM_SEC_ERR 6
+ #define EVENT_DED_TX_RAM_DED_ERR 7
+ #define EVENT_DED_RX_RAM_DED_ERR 8
+-#define EVENT_DED_AXI2PCIE_RAM_DED_ERR 9
+-#define EVENT_DED_PCIE2AXI_RAM_DED_ERR 10
++#define EVENT_DED_PCIE2AXI_RAM_DED_ERR 9
++#define EVENT_DED_AXI2PCIE_RAM_DED_ERR 10
+ #define EVENT_LOCAL_DMA_END_ENGINE_0 11
+ #define EVENT_LOCAL_DMA_END_ENGINE_1 12
+ #define EVENT_LOCAL_DMA_ERROR_ENGINE_0 13
+@@ -416,6 +416,7 @@ static void mc_handle_msi(struct irq_desc *desc)
+
+ status = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
+ if (status & PM_MSI_INT_MSI_MASK) {
++ writel_relaxed(status & PM_MSI_INT_MSI_MASK, bridge_base_addr + ISTATUS_LOCAL);
+ status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
+ for_each_set_bit(bit, &status, msi->num_vectors) {
+ ret = generic_handle_domain_irq(msi->dev_domain, bit);
+@@ -432,13 +433,8 @@ static void mc_msi_bottom_irq_ack(struct irq_data *data)
+ void __iomem *bridge_base_addr =
+ port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+ u32 bitpos = data->hwirq;
+- unsigned long status;
+
+ writel_relaxed(BIT(bitpos), bridge_base_addr + ISTATUS_MSI);
+- status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
+- if (!status)
+- writel_relaxed(BIT(PM_MSI_INT_MSI_SHIFT),
+- bridge_base_addr + ISTATUS_LOCAL);
+ }
+
+ static void mc_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+@@ -898,6 +894,7 @@ static int mc_pcie_init_irq_domains(struct mc_port *port)
+ &event_domain_ops, port);
+ if (!port->event_domain) {
+ dev_err(dev, "failed to get event domain\n");
++ of_node_put(pcie_intc_node);
+ return -ENOMEM;
+ }
+
+@@ -907,6 +904,7 @@ static int mc_pcie_init_irq_domains(struct mc_port *port)
+ &intx_domain_ops, port);
+ if (!port->intx_domain) {
+ dev_err(dev, "failed to get an INTx IRQ domain\n");
++ of_node_put(pcie_intc_node);
+ return -ENOMEM;
+ }
+
+diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c
+index 8f3131844e777..bfb13f358d073 100644
+--- a/drivers/pci/controller/pcie-rcar-host.c
++++ b/drivers/pci/controller/pcie-rcar-host.c
+@@ -52,10 +52,10 @@ struct rcar_msi {
+ */
+ static void __iomem *pcie_base;
+ /*
+- * Static copy of bus clock pointer, so we can check whether the clock
+- * is enabled or not.
++ * Static copy of PCIe device pointer, so we can check whether the
++ * device is runtime suspended or not.
+ */
+-static struct clk *pcie_bus_clk;
++static struct device *pcie_dev;
+ #endif
+
+ /* Structure representing the PCIe interface */
+@@ -794,7 +794,7 @@ static int rcar_pcie_get_resources(struct rcar_pcie_host *host)
+ #ifdef CONFIG_ARM
+ /* Cache static copy for L1 link state fixup hook on aarch32 */
+ pcie_base = pcie->base;
+- pcie_bus_clk = host->bus_clk;
++ pcie_dev = pcie->dev;
+ #endif
+
+ return 0;
+@@ -1064,7 +1064,7 @@ static int rcar_pcie_aarch32_abort_handler(unsigned long addr,
+
+ spin_lock_irqsave(&pmsr_lock, flags);
+
+- if (!pcie_base || !__clk_is_enabled(pcie_bus_clk)) {
++ if (!pcie_base || pm_runtime_suspended(pcie_dev)) {
+ ret = 1;
+ goto unlock_exit;
+ }
+diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
+index 5fb9ce6e536e0..0af0e965fb57e 100644
+--- a/drivers/pci/controller/pcie-rockchip-ep.c
++++ b/drivers/pci/controller/pcie-rockchip-ep.c
+@@ -61,70 +61,38 @@ static void rockchip_pcie_clear_ep_ob_atu(struct rockchip_pcie *rockchip,
+ ROCKCHIP_PCIE_AT_OB_REGION_DESC0(region));
+ rockchip_pcie_write(rockchip, 0,
+ ROCKCHIP_PCIE_AT_OB_REGION_DESC1(region));
+- rockchip_pcie_write(rockchip, 0,
+- ROCKCHIP_PCIE_AT_OB_REGION_CPU_ADDR0(region));
+- rockchip_pcie_write(rockchip, 0,
+- ROCKCHIP_PCIE_AT_OB_REGION_CPU_ADDR1(region));
+ }
+
+ static void rockchip_pcie_prog_ep_ob_atu(struct rockchip_pcie *rockchip, u8 fn,
+- u32 r, u32 type, u64 cpu_addr,
+- u64 pci_addr, size_t size)
++ u32 r, u64 cpu_addr, u64 pci_addr,
++ size_t size)
+ {
+- u64 sz = 1ULL << fls64(size - 1);
+- int num_pass_bits = ilog2(sz);
+- u32 addr0, addr1, desc0, desc1;
+- bool is_nor_msg = (type == AXI_WRAPPER_NOR_MSG);
++ int num_pass_bits = fls64(size - 1);
++ u32 addr0, addr1, desc0;
+
+- /* The minimal region size is 1MB */
+ if (num_pass_bits < 8)
+ num_pass_bits = 8;
+
+- cpu_addr -= rockchip->mem_res->start;
+- addr0 = ((is_nor_msg ? 0x10 : (num_pass_bits - 1)) &
+- PCIE_CORE_OB_REGION_ADDR0_NUM_BITS) |
+- (lower_32_bits(cpu_addr) & PCIE_CORE_OB_REGION_ADDR0_LO_ADDR);
+- addr1 = upper_32_bits(is_nor_msg ? cpu_addr : pci_addr);
+- desc0 = ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN(fn) | type;
+- desc1 = 0;
+-
+- if (is_nor_msg) {
+- rockchip_pcie_write(rockchip, 0,
+- ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(r));
+- rockchip_pcie_write(rockchip, 0,
+- ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR1(r));
+- rockchip_pcie_write(rockchip, desc0,
+- ROCKCHIP_PCIE_AT_OB_REGION_DESC0(r));
+- rockchip_pcie_write(rockchip, desc1,
+- ROCKCHIP_PCIE_AT_OB_REGION_DESC1(r));
+- } else {
+- /* PCI bus address region */
+- rockchip_pcie_write(rockchip, addr0,
+- ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(r));
+- rockchip_pcie_write(rockchip, addr1,
+- ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR1(r));
+- rockchip_pcie_write(rockchip, desc0,
+- ROCKCHIP_PCIE_AT_OB_REGION_DESC0(r));
+- rockchip_pcie_write(rockchip, desc1,
+- ROCKCHIP_PCIE_AT_OB_REGION_DESC1(r));
+-
+- addr0 =
+- ((num_pass_bits - 1) & PCIE_CORE_OB_REGION_ADDR0_NUM_BITS) |
+- (lower_32_bits(cpu_addr) &
+- PCIE_CORE_OB_REGION_ADDR0_LO_ADDR);
+- addr1 = upper_32_bits(cpu_addr);
+- }
++ addr0 = ((num_pass_bits - 1) & PCIE_CORE_OB_REGION_ADDR0_NUM_BITS) |
++ (lower_32_bits(pci_addr) & PCIE_CORE_OB_REGION_ADDR0_LO_ADDR);
++ addr1 = upper_32_bits(pci_addr);
++ desc0 = ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN(fn) | AXI_WRAPPER_MEM_WRITE;
+
+- /* CPU bus address region */
++ /* PCI bus address region */
+ rockchip_pcie_write(rockchip, addr0,
+- ROCKCHIP_PCIE_AT_OB_REGION_CPU_ADDR0(r));
++ ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(r));
+ rockchip_pcie_write(rockchip, addr1,
+- ROCKCHIP_PCIE_AT_OB_REGION_CPU_ADDR1(r));
++ ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR1(r));
++ rockchip_pcie_write(rockchip, desc0,
++ ROCKCHIP_PCIE_AT_OB_REGION_DESC0(r));
++ rockchip_pcie_write(rockchip, 0,
++ ROCKCHIP_PCIE_AT_OB_REGION_DESC1(r));
+ }
+
+ static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
+ struct pci_epf_header *hdr)
+ {
++ u32 reg;
+ struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+ struct rockchip_pcie *rockchip = &ep->rockchip;
+
+@@ -137,8 +105,9 @@ static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn,
+ PCIE_CORE_CONFIG_VENDOR);
+ }
+
+- rockchip_pcie_write(rockchip, hdr->deviceid << 16,
+- ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + PCI_VENDOR_ID);
++ reg = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_DID_VID);
++ reg = (reg & 0xFFFF) | (hdr->deviceid << 16);
++ rockchip_pcie_write(rockchip, reg, PCIE_EP_CONFIG_DID_VID);
+
+ rockchip_pcie_write(rockchip,
+ hdr->revid |
+@@ -256,27 +225,20 @@ static void rockchip_pcie_ep_clear_bar(struct pci_epc *epc, u8 fn, u8 vfn,
+ ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar));
+ }
+
++static inline u32 rockchip_ob_region(phys_addr_t addr)
++{
++ return (addr >> ilog2(SZ_1M)) & 0x1f;
++}
++
+ static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+ phys_addr_t addr, u64 pci_addr,
+ size_t size)
+ {
+ struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+ struct rockchip_pcie *pcie = &ep->rockchip;
+- u32 r;
+-
+- r = find_first_zero_bit(&ep->ob_region_map,
+- sizeof(ep->ob_region_map) * BITS_PER_LONG);
+- /*
+- * Region 0 is reserved for configuration space and shouldn't
+- * be used elsewhere per TRM, so leave it out.
+- */
+- if (r >= ep->max_regions - 1) {
+- dev_err(&epc->dev, "no free outbound region\n");
+- return -EINVAL;
+- }
++ u32 r = rockchip_ob_region(addr);
+
+- rockchip_pcie_prog_ep_ob_atu(pcie, fn, r, AXI_WRAPPER_MEM_WRITE, addr,
+- pci_addr, size);
++ rockchip_pcie_prog_ep_ob_atu(pcie, fn, r, addr, pci_addr, size);
+
+ set_bit(r, &ep->ob_region_map);
+ ep->ob_addr[r] = addr;
+@@ -291,15 +253,11 @@ static void rockchip_pcie_ep_unmap_addr(struct pci_epc *epc, u8 fn, u8 vfn,
+ struct rockchip_pcie *rockchip = &ep->rockchip;
+ u32 r;
+
+- for (r = 0; r < ep->max_regions - 1; r++)
++ for (r = 0; r < ep->max_regions; r++)
+ if (ep->ob_addr[r] == addr)
+ break;
+
+- /*
+- * Region 0 is reserved for configuration space and shouldn't
+- * be used elsewhere per TRM, so leave it out.
+- */
+- if (r == ep->max_regions - 1)
++ if (r == ep->max_regions)
+ return;
+
+ rockchip_pcie_clear_ep_ob_atu(rockchip, r);
+@@ -313,15 +271,15 @@ static int rockchip_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn,
+ {
+ struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+ struct rockchip_pcie *rockchip = &ep->rockchip;
+- u16 flags;
++ u32 flags;
+
+ flags = rockchip_pcie_read(rockchip,
+ ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+ ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+ flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK;
+ flags |=
+- ((multi_msg_cap << 1) << ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET) |
+- PCI_MSI_FLAGS_64BIT;
++ (multi_msg_cap << ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET) |
++ (PCI_MSI_FLAGS_64BIT << ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET);
+ flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP;
+ rockchip_pcie_write(rockchip, flags,
+ ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+@@ -333,7 +291,7 @@ static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
+ {
+ struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
+ struct rockchip_pcie *rockchip = &ep->rockchip;
+- u16 flags;
++ u32 flags;
+
+ flags = rockchip_pcie_read(rockchip,
+ ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+@@ -346,48 +304,25 @@ static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn)
+ }
+
+ static void rockchip_pcie_ep_assert_intx(struct rockchip_pcie_ep *ep, u8 fn,
+- u8 intx, bool is_asserted)
++ u8 intx, bool do_assert)
+ {
+ struct rockchip_pcie *rockchip = &ep->rockchip;
+- u32 r = ep->max_regions - 1;
+- u32 offset;
+- u32 status;
+- u8 msg_code;
+-
+- if (unlikely(ep->irq_pci_addr != ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR ||
+- ep->irq_pci_fn != fn)) {
+- rockchip_pcie_prog_ep_ob_atu(rockchip, fn, r,
+- AXI_WRAPPER_NOR_MSG,
+- ep->irq_phys_addr, 0, 0);
+- ep->irq_pci_addr = ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR;
+- ep->irq_pci_fn = fn;
+- }
+
+ intx &= 3;
+- if (is_asserted) {
++
++ if (do_assert) {
+ ep->irq_pending |= BIT(intx);
+- msg_code = ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTA + intx;
++ rockchip_pcie_write(rockchip,
++ PCIE_CLIENT_INT_IN_ASSERT |
++ PCIE_CLIENT_INT_PEND_ST_PEND,
++ PCIE_CLIENT_LEGACY_INT_CTRL);
+ } else {
+ ep->irq_pending &= ~BIT(intx);
+- msg_code = ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTA + intx;
++ rockchip_pcie_write(rockchip,
++ PCIE_CLIENT_INT_IN_DEASSERT |
++ PCIE_CLIENT_INT_PEND_ST_NORMAL,
++ PCIE_CLIENT_LEGACY_INT_CTRL);
+ }
+-
+- status = rockchip_pcie_read(rockchip,
+- ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+- ROCKCHIP_PCIE_EP_CMD_STATUS);
+- status &= ROCKCHIP_PCIE_EP_CMD_STATUS_IS;
+-
+- if ((status != 0) ^ (ep->irq_pending != 0)) {
+- status ^= ROCKCHIP_PCIE_EP_CMD_STATUS_IS;
+- rockchip_pcie_write(rockchip, status,
+- ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+- ROCKCHIP_PCIE_EP_CMD_STATUS);
+- }
+-
+- offset =
+- ROCKCHIP_PCIE_MSG_ROUTING(ROCKCHIP_PCIE_MSG_ROUTING_LOCAL_INTX) |
+- ROCKCHIP_PCIE_MSG_CODE(msg_code) | ROCKCHIP_PCIE_MSG_NO_DATA;
+- writel(0, ep->irq_cpu_addr + offset);
+ }
+
+ static int rockchip_pcie_ep_send_legacy_irq(struct rockchip_pcie_ep *ep, u8 fn,
+@@ -417,9 +352,10 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn,
+ u8 interrupt_num)
+ {
+ struct rockchip_pcie *rockchip = &ep->rockchip;
+- u16 flags, mme, data, data_mask;
++ u32 flags, mme, data, data_mask;
+ u8 msi_count;
+- u64 pci_addr, pci_addr_mask = 0xff;
++ u64 pci_addr;
++ u32 r;
+
+ /* Check MSI enable bit */
+ flags = rockchip_pcie_read(&ep->rockchip,
+@@ -453,21 +389,20 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn,
+ ROCKCHIP_PCIE_EP_FUNC_BASE(fn) +
+ ROCKCHIP_PCIE_EP_MSI_CTRL_REG +
+ PCI_MSI_ADDRESS_LO);
+- pci_addr &= GENMASK_ULL(63, 2);
+
+ /* Set the outbound region if needed. */
+- if (unlikely(ep->irq_pci_addr != (pci_addr & ~pci_addr_mask) ||
++ if (unlikely(ep->irq_pci_addr != (pci_addr & PCIE_ADDR_MASK) ||
+ ep->irq_pci_fn != fn)) {
+- rockchip_pcie_prog_ep_ob_atu(rockchip, fn, ep->max_regions - 1,
+- AXI_WRAPPER_MEM_WRITE,
++ r = rockchip_ob_region(ep->irq_phys_addr);
++ rockchip_pcie_prog_ep_ob_atu(rockchip, fn, r,
+ ep->irq_phys_addr,
+- pci_addr & ~pci_addr_mask,
+- pci_addr_mask + 1);
+- ep->irq_pci_addr = (pci_addr & ~pci_addr_mask);
++ pci_addr & PCIE_ADDR_MASK,
++ ~PCIE_ADDR_MASK + 1);
++ ep->irq_pci_addr = (pci_addr & PCIE_ADDR_MASK);
+ ep->irq_pci_fn = fn;
+ }
+
+- writew(data, ep->irq_cpu_addr + (pci_addr & pci_addr_mask));
++ writew(data, ep->irq_cpu_addr + (pci_addr & ~PCIE_ADDR_MASK));
+ return 0;
+ }
+
+@@ -507,6 +442,7 @@ static const struct pci_epc_features rockchip_pcie_epc_features = {
+ .linkup_notifier = false,
+ .msi_capable = true,
+ .msix_capable = false,
++ .align = 256,
+ };
+
+ static const struct pci_epc_features*
+@@ -548,6 +484,8 @@ static int rockchip_pcie_parse_ep_dt(struct rockchip_pcie *rockchip,
+ if (err < 0 || ep->max_regions > MAX_REGION_LIMIT)
+ ep->max_regions = MAX_REGION_LIMIT;
+
++ ep->ob_region_map = 0;
++
+ err = of_property_read_u8(dev->of_node, "max-functions",
+ &ep->epc->max_functions);
+ if (err < 0)
+@@ -568,7 +506,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev)
+ struct rockchip_pcie *rockchip;
+ struct pci_epc *epc;
+ size_t max_regions;
+- int err;
++ struct pci_epc_mem_window *windows = NULL;
++ int err, i;
++ u32 cfg_msi, cfg_msix_cp;
+
+ ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+@@ -615,15 +555,27 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev)
+ /* Only enable function 0 by default */
+ rockchip_pcie_write(rockchip, BIT(0), PCIE_CORE_PHY_FUNC_CFG);
+
+- err = pci_epc_mem_init(epc, rockchip->mem_res->start,
+- resource_size(rockchip->mem_res), PAGE_SIZE);
++ windows = devm_kcalloc(dev, ep->max_regions,
++ sizeof(struct pci_epc_mem_window), GFP_KERNEL);
++ if (!windows) {
++ err = -ENOMEM;
++ goto err_uninit_port;
++ }
++ for (i = 0; i < ep->max_regions; i++) {
++ windows[i].phys_base = rockchip->mem_res->start + (SZ_1M * i);
++ windows[i].size = SZ_1M;
++ windows[i].page_size = SZ_1M;
++ }
++ err = pci_epc_multi_mem_init(epc, windows, ep->max_regions);
++ devm_kfree(dev, windows);
++
+ if (err < 0) {
+ dev_err(dev, "failed to initialize the memory space\n");
+ goto err_uninit_port;
+ }
+
+ ep->irq_cpu_addr = pci_epc_mem_alloc_addr(epc, &ep->irq_phys_addr,
+- SZ_128K);
++ SZ_1M);
+ if (!ep->irq_cpu_addr) {
+ dev_err(dev, "failed to reserve memory space for MSI\n");
+ err = -ENOMEM;
+@@ -632,6 +584,32 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev)
+
+ ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR;
+
++ /*
++ * MSI-X is not supported but the controller still advertises the MSI-X
++ * capability by default, which can lead to the Root Complex side
++ * allocating MSI-X vectors which cannot be used. Avoid this by skipping
++ * the MSI-X capability entry in the PCIe capabilities linked-list: get
++ * the next pointer from the MSI-X entry and set that in the MSI
++ * capability entry (which is the previous entry). This way the MSI-X
++ * entry is skipped (left out of the linked-list) and not advertised.
++ */
++ cfg_msi = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE +
++ ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
++
++ cfg_msi &= ~ROCKCHIP_PCIE_EP_MSI_CP1_MASK;
++
++ cfg_msix_cp = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE +
++ ROCKCHIP_PCIE_EP_MSIX_CAP_REG) &
++ ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK;
++
++ cfg_msi |= cfg_msix_cp;
++
++ rockchip_pcie_write(rockchip, cfg_msi,
++ PCIE_EP_CONFIG_BASE + ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
++
++ rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE,
++ PCIE_CLIENT_CONFIG);
++
+ return 0;
+ err_epc_mem_exit:
+ pci_epc_mem_exit(epc);
+diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c
+index 990a00e08bc5b..1aa84035a8bc7 100644
+--- a/drivers/pci/controller/pcie-rockchip.c
++++ b/drivers/pci/controller/pcie-rockchip.c
+@@ -14,6 +14,7 @@
+ #include <linux/clk.h>
+ #include <linux/delay.h>
+ #include <linux/gpio/consumer.h>
++#include <linux/iopoll.h>
+ #include <linux/of_pci.h>
+ #include <linux/phy/phy.h>
+ #include <linux/platform_device.h>
+@@ -153,6 +154,12 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
+ }
+ EXPORT_SYMBOL_GPL(rockchip_pcie_parse_dt);
+
++#define rockchip_pcie_read_addr(addr) rockchip_pcie_read(rockchip, addr)
++/* 100 ms max wait time for PHY PLLs to lock */
++#define RK_PHY_PLL_LOCK_TIMEOUT_US 100000
++/* Sleep should be less than 20ms */
++#define RK_PHY_PLL_LOCK_SLEEP_US 1000
++
+ int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
+ {
+ struct device *dev = rockchip->dev;
+@@ -254,6 +261,16 @@ int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
+ }
+ }
+
++ err = readx_poll_timeout(rockchip_pcie_read_addr,
++ PCIE_CLIENT_SIDE_BAND_STATUS,
++ regs, !(regs & PCIE_CLIENT_PHY_ST),
++ RK_PHY_PLL_LOCK_SLEEP_US,
++ RK_PHY_PLL_LOCK_TIMEOUT_US);
++ if (err) {
++ dev_err(dev, "PHY PLLs could not lock, %d\n", err);
++ goto err_power_off_phy;
++ }
++
+ /*
+ * Please don't reorder the deassert sequence of the following
+ * four reset pins.
+diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h
+index 1650a5087450b..e3a9292f2dbe5 100644
+--- a/drivers/pci/controller/pcie-rockchip.h
++++ b/drivers/pci/controller/pcie-rockchip.h
+@@ -38,6 +38,13 @@
+ #define PCIE_CLIENT_MODE_EP HIWORD_UPDATE(0x0040, 0)
+ #define PCIE_CLIENT_GEN_SEL_1 HIWORD_UPDATE(0x0080, 0)
+ #define PCIE_CLIENT_GEN_SEL_2 HIWORD_UPDATE_BIT(0x0080)
++#define PCIE_CLIENT_LEGACY_INT_CTRL (PCIE_CLIENT_BASE + 0x0c)
++#define PCIE_CLIENT_INT_IN_ASSERT HIWORD_UPDATE_BIT(0x0002)
++#define PCIE_CLIENT_INT_IN_DEASSERT HIWORD_UPDATE(0x0002, 0)
++#define PCIE_CLIENT_INT_PEND_ST_PEND HIWORD_UPDATE_BIT(0x0001)
++#define PCIE_CLIENT_INT_PEND_ST_NORMAL HIWORD_UPDATE(0x0001, 0)
++#define PCIE_CLIENT_SIDE_BAND_STATUS (PCIE_CLIENT_BASE + 0x20)
++#define PCIE_CLIENT_PHY_ST BIT(12)
+ #define PCIE_CLIENT_DEBUG_OUT_0 (PCIE_CLIENT_BASE + 0x3c)
+ #define PCIE_CLIENT_DEBUG_LTSSM_MASK GENMASK(5, 0)
+ #define PCIE_CLIENT_DEBUG_LTSSM_L1 0x18
+@@ -132,7 +139,10 @@
+
+ #define PCIE_RC_RP_ATS_BASE 0x400000
+ #define PCIE_RC_CONFIG_NORMAL_BASE 0x800000
++#define PCIE_EP_PF_CONFIG_REGS_BASE 0x800000
+ #define PCIE_RC_CONFIG_BASE 0xa00000
++#define PCIE_EP_CONFIG_BASE 0xa00000
++#define PCIE_EP_CONFIG_DID_VID (PCIE_EP_CONFIG_BASE + 0x00)
+ #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08)
+ #define PCIE_RC_CONFIG_SCC_SHIFT 16
+ #define PCIE_RC_CONFIG_DCR (PCIE_RC_CONFIG_BASE + 0xc4)
+@@ -149,10 +159,13 @@
+ #define PCIE_RC_CONFIG_THP_CAP (PCIE_RC_CONFIG_BASE + 0x274)
+ #define PCIE_RC_CONFIG_THP_CAP_NEXT_MASK GENMASK(31, 20)
+
++#define MAX_AXI_IB_ROOTPORT_REGION_NUM 3
++#define MIN_AXI_ADDR_BITS_PASSED 8
++#define PCIE_ADDR_MASK GENMASK_ULL(63, MIN_AXI_ADDR_BITS_PASSED)
+ #define PCIE_CORE_AXI_CONF_BASE 0xc00000
+ #define PCIE_CORE_OB_REGION_ADDR0 (PCIE_CORE_AXI_CONF_BASE + 0x0)
+ #define PCIE_CORE_OB_REGION_ADDR0_NUM_BITS 0x3f
+-#define PCIE_CORE_OB_REGION_ADDR0_LO_ADDR 0xffffff00
++#define PCIE_CORE_OB_REGION_ADDR0_LO_ADDR PCIE_ADDR_MASK
+ #define PCIE_CORE_OB_REGION_ADDR1 (PCIE_CORE_AXI_CONF_BASE + 0x4)
+ #define PCIE_CORE_OB_REGION_DESC0 (PCIE_CORE_AXI_CONF_BASE + 0x8)
+ #define PCIE_CORE_OB_REGION_DESC1 (PCIE_CORE_AXI_CONF_BASE + 0xc)
+@@ -160,7 +173,7 @@
+ #define PCIE_CORE_AXI_INBOUND_BASE 0xc00800
+ #define PCIE_RP_IB_ADDR0 (PCIE_CORE_AXI_INBOUND_BASE + 0x0)
+ #define PCIE_CORE_IB_REGION_ADDR0_NUM_BITS 0x3f
+-#define PCIE_CORE_IB_REGION_ADDR0_LO_ADDR 0xffffff00
++#define PCIE_CORE_IB_REGION_ADDR0_LO_ADDR PCIE_ADDR_MASK
+ #define PCIE_RP_IB_ADDR1 (PCIE_CORE_AXI_INBOUND_BASE + 0x4)
+
+ /* Size of one AXI Region (not Region 0) */
+@@ -175,8 +188,6 @@
+ #define AXI_WRAPPER_TYPE1_CFG 0xb
+ #define AXI_WRAPPER_NOR_MSG 0xc
+
+-#define MAX_AXI_IB_ROOTPORT_REGION_NUM 3
+-#define MIN_AXI_ADDR_BITS_PASSED 8
+ #define PCIE_RC_SEND_PME_OFF 0x11960
+ #define ROCKCHIP_VENDOR_ID 0x1d87
+ #define PCIE_LINK_IS_L2(x) \
+@@ -217,21 +228,28 @@
+ #define ROCKCHIP_PCIE_EP_CMD_STATUS 0x4
+ #define ROCKCHIP_PCIE_EP_CMD_STATUS_IS BIT(19)
+ #define ROCKCHIP_PCIE_EP_MSI_CTRL_REG 0x90
++#define ROCKCHIP_PCIE_EP_MSI_CP1_OFFSET 8
++#define ROCKCHIP_PCIE_EP_MSI_CP1_MASK GENMASK(15, 8)
++#define ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET 16
+ #define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET 17
+ #define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK GENMASK(19, 17)
+ #define ROCKCHIP_PCIE_EP_MSI_CTRL_MME_OFFSET 20
+ #define ROCKCHIP_PCIE_EP_MSI_CTRL_MME_MASK GENMASK(22, 20)
+ #define ROCKCHIP_PCIE_EP_MSI_CTRL_ME BIT(16)
+ #define ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP BIT(24)
++#define ROCKCHIP_PCIE_EP_MSIX_CAP_REG 0xb0
++#define ROCKCHIP_PCIE_EP_MSIX_CAP_CP_OFFSET 8
++#define ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK GENMASK(15, 8)
+ #define ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR 0x1
+ #define ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR 0x3
+-#define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) (((fn) << 12) & GENMASK(19, 12))
++#define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) \
++ (PCIE_EP_PF_CONFIG_REGS_BASE + (((fn) << 12) & GENMASK(19, 12)))
++#define ROCKCHIP_PCIE_EP_VIRT_FUNC_BASE(fn) \
++ (PCIE_EP_PF_CONFIG_REGS_BASE + 0x10000 + (((fn) << 12) & GENMASK(19, 12)))
+ #define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar) \
+- (PCIE_RC_RP_ATS_BASE + 0x0840 + (fn) * 0x0040 + (bar) * 0x0008)
++ (PCIE_CORE_AXI_CONF_BASE + 0x0828 + (fn) * 0x0040 + (bar) * 0x0008)
+ #define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR1(fn, bar) \
+- (PCIE_RC_RP_ATS_BASE + 0x0844 + (fn) * 0x0040 + (bar) * 0x0008)
+-#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(r) \
+- (PCIE_RC_RP_ATS_BASE + 0x0000 + ((r) & 0x1f) * 0x0020)
++ (PCIE_CORE_AXI_CONF_BASE + 0x082c + (fn) * 0x0040 + (bar) * 0x0008)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN_MASK GENMASK(19, 12)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_DEVFN(devfn) \
+ (((devfn) << 12) & \
+@@ -239,20 +257,21 @@
+ #define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_BUS_MASK GENMASK(27, 20)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_BUS(bus) \
+ (((bus) << 20) & ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0_BUS_MASK)
++#define PCIE_RC_EP_ATR_OB_REGIONS_1_32 (PCIE_CORE_AXI_CONF_BASE + 0x0020)
++#define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR0(r) \
++ (PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0000 + ((r) & 0x1f) * 0x0020)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_PCI_ADDR1(r) \
+- (PCIE_RC_RP_ATS_BASE + 0x0004 + ((r) & 0x1f) * 0x0020)
++ (PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0004 + ((r) & 0x1f) * 0x0020)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_DESC0_HARDCODED_RID BIT(23)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN_MASK GENMASK(31, 24)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN(devfn) \
+ (((devfn) << 24) & ROCKCHIP_PCIE_AT_OB_REGION_DESC0_DEVFN_MASK)
+ #define ROCKCHIP_PCIE_AT_OB_REGION_DESC0(r) \
+- (PCIE_RC_RP_ATS_BASE + 0x0008 + ((r) & 0x1f) * 0x0020)
+-#define ROCKCHIP_PCIE_AT_OB_REGION_DESC1(r) \
+- (PCIE_RC_RP_ATS_BASE + 0x000c + ((r) & 0x1f) * 0x0020)
+-#define ROCKCHIP_PCIE_AT_OB_REGION_CPU_ADDR0(r) \
+- (PCIE_RC_RP_ATS_BASE + 0x0018 + ((r) & 0x1f) * 0x0020)
+-#define ROCKCHIP_PCIE_AT_OB_REGION_CPU_ADDR1(r) \
+- (PCIE_RC_RP_ATS_BASE + 0x001c + ((r) & 0x1f) * 0x0020)
++ (PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0008 + ((r) & 0x1f) * 0x0020)
++#define ROCKCHIP_PCIE_AT_OB_REGION_DESC1(r) \
++ (PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x000c + ((r) & 0x1f) * 0x0020)
++#define ROCKCHIP_PCIE_AT_OB_REGION_DESC2(r) \
++ (PCIE_RC_EP_ATR_OB_REGIONS_1_32 + 0x0010 + ((r) & 0x1f) * 0x0020)
+
+ #define ROCKCHIP_PCIE_CORE_EP_FUNC_BAR_CFG0(fn) \
+ (PCIE_CORE_CTRL_MGMT_BASE + 0x0240 + (fn) * 0x0008)
+diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
+index a5987e52700e3..f49001ba96c75 100644
+--- a/drivers/pci/controller/vmd.c
++++ b/drivers/pci/controller/vmd.c
+@@ -881,6 +881,13 @@ static void vmd_remove(struct pci_dev *dev)
+ vmd_remove_irq_domain(vmd);
+ }
+
++static void vmd_shutdown(struct pci_dev *dev)
++{
++ struct vmd_dev *vmd = pci_get_drvdata(dev);
++
++ vmd_remove_irq_domain(vmd);
++}
++
+ #ifdef CONFIG_PM_SLEEP
+ static int vmd_suspend(struct device *dev)
+ {
+@@ -900,6 +907,11 @@ static int vmd_resume(struct device *dev)
+ struct vmd_dev *vmd = pci_get_drvdata(pdev);
+ int err, i;
+
++ if (vmd->irq_domain)
++ vmd_set_msi_remapping(vmd, true);
++ else
++ vmd_set_msi_remapping(vmd, false);
++
+ for (i = 0; i < vmd->msix_count; i++) {
+ err = devm_request_irq(dev, pci_irq_vector(pdev, i),
+ vmd_irq, IRQF_NO_THREAD,
+@@ -941,6 +953,7 @@ static struct pci_driver vmd_drv = {
+ .id_table = vmd_ids,
+ .probe = vmd_probe,
+ .remove = vmd_remove,
++ .shutdown = vmd_shutdown,
+ .driver = {
+ .pm = &vmd_dev_pm_ops,
+ },
+diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
+index 90d84d3bc868f..45535d4ae6445 100644
+--- a/drivers/pci/endpoint/functions/pci-epf-test.c
++++ b/drivers/pci/endpoint/functions/pci-epf-test.c
+@@ -285,7 +285,17 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test)
+ if (ret)
+ dev_err(dev, "Data transfer failed\n");
+ } else {
+- memcpy(dst_addr, src_addr, reg->size);
++ void *buf;
++
++ buf = kzalloc(reg->size, GFP_KERNEL);
++ if (!buf) {
++ ret = -ENOMEM;
++ goto err_map_addr;
++ }
++
++ memcpy_fromio(buf, src_addr, reg->size);
++ memcpy_toio(dst_addr, buf, reg->size);
++ kfree(buf);
+ }
+ ktime_get_ts64(&end);
+ pci_epf_test_print_rate("COPY", reg->size, &start, &end, use_dma);
+@@ -441,7 +451,7 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test)
+ if (!epf_test->dma_supported) {
+ dev_err(dev, "Cannot transfer data using DMA\n");
+ ret = -EINVAL;
+- goto err_map_addr;
++ goto err_dma_map;
+ }
+
+ src_phys_addr = dma_map_single(dma_dev, buf, reg->size,
+@@ -617,7 +627,6 @@ static void pci_epf_test_unbind(struct pci_epf *epf)
+
+ cancel_delayed_work(&epf_test->cmd_handler);
+ pci_epf_test_clean_dma_chan(epf_test);
+- pci_epc_stop(epc);
+ for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+ epf_bar = &epf->bar[bar];
+
+@@ -874,7 +883,7 @@ static int pci_epf_test_bind(struct pci_epf *epf)
+ if (ret)
+ epf_test->dma_supported = false;
+
+- if (linkup_notifier) {
++ if (linkup_notifier || core_init_notifier) {
+ epf->nb.notifier_call = pci_epf_test_notifier;
+ pci_epc_register_notifier(epc, &epf->nb);
+ } else {
+diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
+index f031302ad4019..0a37967b0a939 100644
+--- a/drivers/pci/hotplug/acpiphp_glue.c
++++ b/drivers/pci/hotplug/acpiphp_glue.c
+@@ -503,12 +503,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
+ if (pass && dev->subordinate) {
+ check_hotplug_bridge(slot, dev);
+ pcibios_resource_survey_bus(dev->subordinate);
+- __pci_bus_size_bridges(dev->subordinate,
+- &add_list);
++ if (pci_is_root_bus(bus))
++ __pci_bus_size_bridges(dev->subordinate, &add_list);
+ }
+ }
+ }
+- __pci_bus_assign_resources(bus, &add_list, NULL);
++ if (pci_is_root_bus(bus))
++ __pci_bus_assign_resources(bus, &add_list, NULL);
++ else
++ pci_assign_unassigned_bridge_resources(bus->self);
+ }
+
+ acpiphp_sanitize_bus(bus);
+diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
+index 69fd401691be6..e0a614acee059 100644
+--- a/drivers/pci/hotplug/pciehp.h
++++ b/drivers/pci/hotplug/pciehp.h
+@@ -75,6 +75,8 @@ extern int pciehp_poll_time;
+ * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
+ * Link Status register and to the Presence Detect State bit in the Slot
+ * Status register during a slot reset which may cause them to flap
++ * @depth: Number of additional hotplug ports in the path to the root bus,
++ * used as lock subclass for @reset_lock
+ * @ist_running: flag to keep user request waiting while IRQ thread is running
+ * @request_result: result of last user request submitted to the IRQ thread
+ * @requester: wait queue to wake up on completion of user request,
+@@ -106,6 +108,7 @@ struct controller {
+
+ struct hotplug_slot hotplug_slot; /* hotplug core interface */
+ struct rw_semaphore reset_lock;
++ unsigned int depth;
+ unsigned int ist_running;
+ int request_result;
+ wait_queue_head_t requester;
+@@ -189,6 +192,8 @@ int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status);
+ int pciehp_set_raw_indicator_status(struct hotplug_slot *h_slot, u8 status);
+ int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status);
+
++int pciehp_slot_reset(struct pcie_device *dev);
++
+ static inline const char *slot_name(struct controller *ctrl)
+ {
+ return hotplug_slot_name(&ctrl->hotplug_slot);
+diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
+index ad3393930ecb4..4042d87d539dd 100644
+--- a/drivers/pci/hotplug/pciehp_core.c
++++ b/drivers/pci/hotplug/pciehp_core.c
+@@ -166,7 +166,7 @@ static void pciehp_check_presence(struct controller *ctrl)
+ {
+ int occupied;
+
+- down_read(&ctrl->reset_lock);
++ down_read_nested(&ctrl->reset_lock, ctrl->depth);
+ mutex_lock(&ctrl->state_lock);
+
+ occupied = pciehp_card_present_or_link_active(ctrl);
+@@ -351,6 +351,8 @@ static struct pcie_port_service_driver hpdriver_portdrv = {
+ .runtime_suspend = pciehp_runtime_suspend,
+ .runtime_resume = pciehp_runtime_resume,
+ #endif /* PM */
++
++ .slot_reset = pciehp_slot_reset,
+ };
+
+ int __init pcie_hp_init(void)
+diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
+index 529c348084401..32baba1b7f131 100644
+--- a/drivers/pci/hotplug/pciehp_ctrl.c
++++ b/drivers/pci/hotplug/pciehp_ctrl.c
+@@ -256,6 +256,14 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
+ present = pciehp_card_present(ctrl);
+ link_active = pciehp_check_link_active(ctrl);
+ if (present <= 0 && link_active <= 0) {
++ if (ctrl->state == BLINKINGON_STATE) {
++ ctrl->state = OFF_STATE;
++ cancel_delayed_work(&ctrl->button_work);
++ pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
++ INDICATOR_NOOP);
++ ctrl_info(ctrl, "Slot(%s): Card not present\n",
++ slot_name(ctrl));
++ }
+ mutex_unlock(&ctrl->state_lock);
+ return;
+ }
+diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
+index 3024d7e85e6a7..7773009b8b32e 100644
+--- a/drivers/pci/hotplug/pciehp_hpc.c
++++ b/drivers/pci/hotplug/pciehp_hpc.c
+@@ -98,6 +98,8 @@ static int pcie_poll_cmd(struct controller *ctrl, int timeout)
+ if (slot_status & PCI_EXP_SLTSTA_CC) {
+ pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_CC);
++ ctrl->cmd_busy = 0;
++ smp_mb();
+ return 1;
+ }
+ msleep(10);
+@@ -330,17 +332,11 @@ int pciehp_check_link_status(struct controller *ctrl)
+ static int __pciehp_link_set(struct controller *ctrl, bool enable)
+ {
+ struct pci_dev *pdev = ctrl_dev(ctrl);
+- u16 lnk_ctrl;
+
+- pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &lnk_ctrl);
++ pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_LD,
++ enable ? 0 : PCI_EXP_LNKCTL_LD);
+
+- if (enable)
+- lnk_ctrl &= ~PCI_EXP_LNKCTL_LD;
+- else
+- lnk_ctrl |= PCI_EXP_LNKCTL_LD;
+-
+- pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnk_ctrl);
+- ctrl_dbg(ctrl, "%s: lnk_ctrl = %x\n", __func__, lnk_ctrl);
+ return 0;
+ }
+
+@@ -583,7 +579,7 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl,
+ * the corresponding link change may have been ignored above.
+ * Synthesize it to ensure that it is acted on.
+ */
+- down_read(&ctrl->reset_lock);
++ down_read_nested(&ctrl->reset_lock, ctrl->depth);
+ if (!pciehp_check_link_active(ctrl))
+ pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC);
+ up_read(&ctrl->reset_lock);
+@@ -642,6 +638,8 @@ read_status:
+ */
+ if (ctrl->power_fault_detected)
+ status &= ~PCI_EXP_SLTSTA_PFD;
++ else if (status & PCI_EXP_SLTSTA_PFD)
++ ctrl->power_fault_detected = true;
+
+ events |= status;
+ if (!events) {
+@@ -651,7 +649,7 @@ read_status:
+ }
+
+ if (status) {
+- pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events);
++ pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status);
+
+ /*
+ * In MSI mode, all event bits must be zero before the port
+@@ -725,8 +723,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
+ }
+
+ /* Check Power Fault Detected */
+- if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
+- ctrl->power_fault_detected = 1;
++ if (events & PCI_EXP_SLTSTA_PFD) {
+ ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
+ pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+ PCI_EXP_SLTCTL_ATTN_IND_ON);
+@@ -746,7 +743,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
+ * Disable requests have higher priority than Presence Detect Changed
+ * or Data Link Layer State Changed events.
+ */
+- down_read(&ctrl->reset_lock);
++ down_read_nested(&ctrl->reset_lock, ctrl->depth);
+ if (events & DISABLE_SLOT)
+ pciehp_handle_disable_request(ctrl);
+ else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
+@@ -862,6 +859,32 @@ void pcie_disable_interrupt(struct controller *ctrl)
+ pcie_write_cmd(ctrl, 0, mask);
+ }
+
++/**
++ * pciehp_slot_reset() - ignore link event caused by error-induced hot reset
++ * @dev: PCI Express port service device
++ *
++ * Called from pcie_portdrv_slot_reset() after AER or DPC initiated a reset
++ * further up in the hierarchy to recover from an error. The reset was
++ * propagated down to this hotplug port. Ignore the resulting link flap.
++ * If the link failed to retrain successfully, synthesize the ignored event.
++ * Surprise removal during reset is detected through Presence Detect Changed.
++ */
++int pciehp_slot_reset(struct pcie_device *dev)
++{
++ struct controller *ctrl = get_service_data(dev);
++
++ if (ctrl->state != ON_STATE)
++ return 0;
++
++ pcie_capability_write_word(dev->port, PCI_EXP_SLTSTA,
++ PCI_EXP_SLTSTA_DLLSC);
++
++ if (!pciehp_check_link_active(ctrl))
++ pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC);
++
++ return 0;
++}
++
+ /*
+ * pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary
+ * bus reset of the bridge, but at the same time we want to ensure that it is
+@@ -880,7 +903,7 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
+ if (probe)
+ return 0;
+
+- down_write(&ctrl->reset_lock);
++ down_write_nested(&ctrl->reset_lock, ctrl->depth);
+
+ if (!ATTN_BUTTN(ctrl)) {
+ ctrl_mask |= PCI_EXP_SLTCTL_PDCE;
+@@ -936,6 +959,20 @@ static inline void dbg_ctrl(struct controller *ctrl)
+
+ #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
+
++static inline int pcie_hotplug_depth(struct pci_dev *dev)
++{
++ struct pci_bus *bus = dev->bus;
++ int depth = 0;
++
++ while (bus->parent) {
++ bus = bus->parent;
++ if (bus->self && bus->self->is_hotplug_bridge)
++ depth++;
++ }
++
++ return depth;
++}
++
+ struct controller *pcie_init(struct pcie_device *dev)
+ {
+ struct controller *ctrl;
+@@ -949,6 +986,7 @@ struct controller *pcie_init(struct pcie_device *dev)
+ return NULL;
+
+ ctrl->pcie = dev;
++ ctrl->depth = pcie_hotplug_depth(dev->port);
+ pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
+
+ if (pdev->hotplug_user_indicators)
+@@ -1042,6 +1080,8 @@ static void quirk_cmd_compl(struct pci_dev *pdev)
+ }
+ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
++DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0110,
++ PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400,
+ PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
+ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401,
+diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
+index d17f3bf36f709..ad12515a4a121 100644
+--- a/drivers/pci/hotplug/pciehp_pci.c
++++ b/drivers/pci/hotplug/pciehp_pci.c
+@@ -63,7 +63,14 @@ int pciehp_configure_device(struct controller *ctrl)
+
+ pci_assign_unassigned_bridge_resources(bridge);
+ pcie_bus_configure_settings(parent);
++
++ /*
++ * Release reset_lock during driver binding
++ * to avoid AB-BA deadlock with device_lock.
++ */
++ up_read(&ctrl->reset_lock);
+ pci_bus_add_devices(parent);
++ down_read_nested(&ctrl->reset_lock, ctrl->depth);
+
+ out:
+ pci_unlock_rescan_remove();
+@@ -104,7 +111,15 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
+ list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
+ bus_list) {
+ pci_dev_get(dev);
++
++ /*
++ * Release reset_lock during driver unbinding
++ * to avoid AB-BA deadlock with device_lock.
++ */
++ up_read(&ctrl->reset_lock);
+ pci_stop_and_remove_bus_device(dev);
++ down_read_nested(&ctrl->reset_lock, ctrl->depth);
++
+ /*
+ * Ensure that no new Requests will be generated from
+ * the device.
+diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
+index dafdc652fcd06..ef71c1a204004 100644
+--- a/drivers/pci/iov.c
++++ b/drivers/pci/iov.c
+@@ -14,7 +14,7 @@
+ #include <linux/delay.h>
+ #include "pci.h"
+
+-#define VIRTFN_ID_LEN 16
++#define VIRTFN_ID_LEN 17 /* "virtfn%u\0" for 2^32 - 1 */
+
+ int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
+ {
+diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
+index 12ecd0aaa28d6..0050e8f6814ed 100644
+--- a/drivers/pci/irq.c
++++ b/drivers/pci/irq.c
+@@ -44,6 +44,8 @@ int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler,
+ va_start(ap, fmt);
+ devname = kvasprintf(GFP_KERNEL, fmt, ap);
+ va_end(ap);
++ if (!devname)
++ return -ENOMEM;
+
+ ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn,
+ irqflags, devname, dev_id);
+diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
+index 4b4792940e869..cc4c2b8a5efd7 100644
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -148,6 +148,9 @@ static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 s
+ raw_spinlock_t *lock = &desc->dev->msi_lock;
+ unsigned long flags;
+
++ if (!desc->msi_attrib.can_mask)
++ return;
++
+ raw_spin_lock_irqsave(lock, flags);
+ desc->msi_mask &= ~clear;
+ desc->msi_mask |= set;
+@@ -181,7 +184,8 @@ static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
+ {
+ void __iomem *desc_addr = pci_msix_desc_addr(desc);
+
+- writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
++ if (desc->msi_attrib.can_mask)
++ writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+ }
+
+ static inline void pci_msix_mask(struct msi_desc *desc)
+@@ -200,23 +204,17 @@ static inline void pci_msix_unmask(struct msi_desc *desc)
+
+ static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
+ {
+- if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
+- return;
+-
+ if (desc->msi_attrib.is_msix)
+ pci_msix_mask(desc);
+- else if (desc->msi_attrib.maskbit)
++ else
+ pci_msi_mask(desc, mask);
+ }
+
+ static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
+ {
+- if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
+- return;
+-
+ if (desc->msi_attrib.is_msix)
+ pci_msix_unmask(desc);
+- else if (desc->msi_attrib.maskbit)
++ else
+ pci_msi_unmask(desc, mask);
+ }
+
+@@ -370,6 +368,11 @@ static void free_msi_irqs(struct pci_dev *dev)
+ for (i = 0; i < entry->nvec_used; i++)
+ BUG_ON(irq_has_action(entry->irq + i));
+
++ if (dev->msi_irq_groups) {
++ msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
++ dev->msi_irq_groups = NULL;
++ }
++
+ pci_msi_teardown_msi_irqs(dev);
+
+ list_for_each_entry_safe(entry, tmp, msi_list, list) {
+@@ -381,11 +384,6 @@ static void free_msi_irqs(struct pci_dev *dev)
+ list_del(&entry->list);
+ free_msi_entry(entry);
+ }
+-
+- if (dev->msi_irq_groups) {
+- msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
+- dev->msi_irq_groups = NULL;
+- }
+ }
+
+ static void pci_intx_for_msi(struct pci_dev *dev, int enable)
+@@ -479,12 +477,16 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
+ goto out;
+
+ pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
++ /* Lies, damned lies, and MSIs */
++ if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
++ control |= PCI_MSI_FLAGS_MASKBIT;
+
+ entry->msi_attrib.is_msix = 0;
+ entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT);
+ entry->msi_attrib.is_virtual = 0;
+ entry->msi_attrib.entry_nr = 0;
+- entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT);
++ entry->msi_attrib.can_mask = !pci_msi_ignore_mask &&
++ !!(control & PCI_MSI_FLAGS_MASKBIT);
+ entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
+ entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
+ entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
+@@ -495,7 +497,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
+ entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
+
+ /* Save the initial mask status */
+- if (entry->msi_attrib.maskbit)
++ if (entry->msi_attrib.can_mask)
+ pci_read_config_dword(dev, entry->mask_pos, &entry->msi_mask);
+
+ out:
+@@ -638,10 +640,13 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
+ entry->msi_attrib.is_virtual =
+ entry->msi_attrib.entry_nr >= vec_count;
+
++ entry->msi_attrib.can_mask = !pci_msi_ignore_mask &&
++ !entry->msi_attrib.is_virtual;
++
+ entry->msi_attrib.default_irq = dev->irq;
+ entry->mask_base = base;
+
+- if (!entry->msi_attrib.is_virtual) {
++ if (entry->msi_attrib.can_mask) {
+ addr = pci_msix_desc_addr(entry);
+ entry->msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+ }
+@@ -716,9 +721,6 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
+ goto out_disable;
+ }
+
+- /* Ensure that all table entries are masked. */
+- msix_mask_all(base, tsize);
+-
+ ret = msix_setup_entries(dev, base, entries, nvec, affd);
+ if (ret)
+ goto out_disable;
+@@ -745,6 +747,16 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
+ /* Set MSI-X enabled bits and unmask the function */
+ pci_intx_for_msi(dev, 0);
+ dev->msix_enabled = 1;
++
++ /*
++ * Ensure that all table entries are masked to prevent
++ * stale entries from firing in a crash kernel.
++ *
++ * Done late to deal with a broken Marvell NVME device
++ * which takes the MSI-X mask bits into account even
++ * when MSI-X is disabled, which prevents MSI delivery.
++ */
++ msix_mask_all(base, tsize);
+ pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
+
+ pcibios_free_irq(dev);
+@@ -771,7 +783,7 @@ out_free:
+ free_msi_irqs(dev);
+
+ out_disable:
+- pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
++ pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);
+
+ return ret;
+ }
+@@ -1181,19 +1193,24 @@ EXPORT_SYMBOL(pci_free_irq_vectors);
+
+ /**
+ * pci_irq_vector - return Linux IRQ number of a device vector
+- * @dev: PCI device to operate on
+- * @nr: device-relative interrupt vector index (0-based).
++ * @dev: PCI device to operate on
++ * @nr: Interrupt vector index (0-based)
++ *
++ * @nr has the following meanings depending on the interrupt mode:
++ * MSI-X: The index in the MSI-X vector table
++ * MSI: The index of the enabled MSI vectors
++ * INTx: Must be 0
++ *
++ * Return: The Linux interrupt number or -EINVAl if @nr is out of range.
+ */
+ int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
+ {
+ if (dev->msix_enabled) {
+ struct msi_desc *entry;
+- int i = 0;
+
+ for_each_pci_msi_entry(entry, dev) {
+- if (i == nr)
++ if (entry->msi_attrib.entry_nr == nr)
+ return entry->irq;
+- i++;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+@@ -1217,17 +1234,22 @@ EXPORT_SYMBOL(pci_irq_vector);
+ * pci_irq_get_affinity - return the affinity of a particular MSI vector
+ * @dev: PCI device to operate on
+ * @nr: device-relative interrupt vector index (0-based).
++ *
++ * @nr has the following meanings depending on the interrupt mode:
++ * MSI-X: The index in the MSI-X vector table
++ * MSI: The index of the enabled MSI vectors
++ * INTx: Must be 0
++ *
++ * Return: A cpumask pointer or NULL if @nr is out of range
+ */
+ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
+ {
+ if (dev->msix_enabled) {
+ struct msi_desc *entry;
+- int i = 0;
+
+ for_each_pci_msi_entry(entry, dev) {
+- if (i == nr)
++ if (entry->msi_attrib.entry_nr == nr)
+ return &entry->affinity->mask;
+- i++;
+ }
+ WARN_ON_ONCE(1);
+ return NULL;
+diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
+index 50cdde3e9a8b2..316fd2f44df45 100644
+--- a/drivers/pci/p2pdma.c
++++ b/drivers/pci/p2pdma.c
+@@ -219,7 +219,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
+ error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
+ pci_bus_address(pdev, bar) + offset,
+ range_len(&pgmap->range), dev_to_node(&pdev->dev),
+- pgmap->ref);
++ &pgmap->ref);
+ if (error)
+ goto pages_free;
+
+diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
+index 260a06fb78a61..813e0d25e841e 100644
+--- a/drivers/pci/pci-acpi.c
++++ b/drivers/pci/pci-acpi.c
+@@ -976,9 +976,11 @@ static bool acpi_pci_power_manageable(struct pci_dev *dev)
+
+ static bool acpi_pci_bridge_d3(struct pci_dev *dev)
+ {
+- const union acpi_object *obj;
+- struct acpi_device *adev;
+ struct pci_dev *rpdev;
++ struct acpi_device *adev;
++ acpi_status status;
++ unsigned long long state;
++ const union acpi_object *obj;
+
+ if (!dev->is_hotplug_bridge)
+ return false;
+@@ -987,12 +989,6 @@ static bool acpi_pci_bridge_d3(struct pci_dev *dev)
+ if (acpi_pci_power_manageable(dev))
+ return true;
+
+- /*
+- * The ACPI firmware will provide the device-specific properties through
+- * _DSD configuration object. Look for the 'HotPlugSupportInD3' property
+- * for the root port and if it is set we know the hierarchy behind it
+- * supports D3 just fine.
+- */
+ rpdev = pcie_find_root_port(dev);
+ if (!rpdev)
+ return false;
+@@ -1001,11 +997,34 @@ static bool acpi_pci_bridge_d3(struct pci_dev *dev)
+ if (!adev)
+ return false;
+
+- if (acpi_dev_get_property(adev, "HotPlugSupportInD3",
+- ACPI_TYPE_INTEGER, &obj) < 0)
++ /*
++ * If the Root Port cannot signal wakeup signals at all, i.e., it
++ * doesn't supply a wakeup GPE via _PRW, it cannot signal hotplug
++ * events from low-power states including D3hot and D3cold.
++ */
++ if (!adev->wakeup.flags.valid)
+ return false;
+
+- return obj->integer.value == 1;
++ /*
++ * If the Root Port cannot wake itself from D3hot or D3cold, we
++ * can't use D3.
++ */
++ status = acpi_evaluate_integer(adev->handle, "_S0W", NULL, &state);
++ if (ACPI_SUCCESS(status) && state < ACPI_STATE_D3_HOT)
++ return false;
++
++ /*
++ * The "HotPlugSupportInD3" property in a Root Port _DSD indicates
++ * the Port can signal hotplug events while in D3. We assume any
++ * bridges *below* that Root Port can also signal hotplug events
++ * while in D3.
++ */
++ if (!acpi_dev_get_property(adev, "HotPlugSupportInD3",
++ ACPI_TYPE_INTEGER, &obj) &&
++ obj->integer.value == 1)
++ return true;
++
++ return false;
+ }
+
+ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
+index fdaf86a888b73..c994ebec23603 100644
+--- a/drivers/pci/pci-bridge-emul.c
++++ b/drivers/pci/pci-bridge-emul.c
+@@ -139,8 +139,13 @@ struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = {
+ .ro = GENMASK(7, 0),
+ },
+
++ /*
++ * If expansion ROM is unsupported then ROM Base Address register must
++ * be implemented as read-only register that return 0 when read, same
++ * as for unused Base Address registers.
++ */
+ [PCI_ROM_ADDRESS1 / 4] = {
+- .rw = GENMASK(31, 11) | BIT(0),
++ .ro = ~0,
+ },
+
+ /*
+@@ -171,41 +176,55 @@ struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] =
+ [PCI_CAP_LIST_ID / 4] = {
+ /*
+ * Capability ID, Next Capability Pointer and
+- * Capabilities register are all read-only.
++ * bits [14:0] of Capabilities register are all read-only.
++ * Bit 15 of Capabilities register is reserved.
+ */
+- .ro = ~0,
++ .ro = GENMASK(30, 0),
+ },
+
+ [PCI_EXP_DEVCAP / 4] = {
+- .ro = ~0,
++ /*
++ * Bits [31:29] and [17:16] are reserved.
++ * Bits [27:18] are reserved for non-upstream ports.
++ * Bits 28 and [14:6] are reserved for non-endpoint devices.
++ * Other bits are read-only.
++ */
++ .ro = BIT(15) | GENMASK(5, 0),
+ },
+
+ [PCI_EXP_DEVCTL / 4] = {
+- /* Device control register is RW */
+- .rw = GENMASK(15, 0),
++ /*
++ * Device control register is RW, except bit 15 which is
++ * reserved for non-endpoints or non-PCIe-to-PCI/X bridges.
++ */
++ .rw = GENMASK(14, 0),
+
+ /*
+ * Device status register has bits 6 and [3:0] W1C, [5:4] RO,
+- * the rest is reserved
++ * the rest is reserved. Also bit 6 is reserved for non-upstream
++ * ports.
+ */
+- .w1c = (BIT(6) | GENMASK(3, 0)) << 16,
++ .w1c = GENMASK(3, 0) << 16,
+ .ro = GENMASK(5, 4) << 16,
+ },
+
+ [PCI_EXP_LNKCAP / 4] = {
+- /* All bits are RO, except bit 23 which is reserved */
+- .ro = lower_32_bits(~BIT(23)),
++ /*
++ * All bits are RO, except bit 23 which is reserved and
++ * bit 18 which is reserved for non-upstream ports.
++ */
++ .ro = lower_32_bits(~(BIT(23) | PCI_EXP_LNKCAP_CLKPM)),
+ },
+
+ [PCI_EXP_LNKCTL / 4] = {
+ /*
+ * Link control has bits [15:14], [11:3] and [1:0] RW, the
+- * rest is reserved.
++ * rest is reserved. Bit 8 is reserved for non-upstream ports.
+ *
+ * Link status has bits [13:0] RO, and bits [15:14]
+ * W1C.
+ */
+- .rw = GENMASK(15, 14) | GENMASK(11, 3) | GENMASK(1, 0),
++ .rw = GENMASK(15, 14) | GENMASK(11, 9) | GENMASK(7, 3) | GENMASK(1, 0),
+ .ro = GENMASK(13, 0) << 16,
+ .w1c = GENMASK(15, 14) << 16,
+ },
+@@ -251,6 +270,49 @@ struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] =
+ .ro = GENMASK(15, 0) | PCI_EXP_RTSTA_PENDING,
+ .w1c = PCI_EXP_RTSTA_PME,
+ },
++
++ [PCI_EXP_DEVCAP2 / 4] = {
++ /*
++ * Device capabilities 2 register has reserved bits [30:27].
++ * Also bits [26:24] are reserved for non-upstream ports.
++ */
++ .ro = BIT(31) | GENMASK(23, 0),
++ },
++
++ [PCI_EXP_DEVCTL2 / 4] = {
++ /*
++ * Device control 2 register is RW. Bit 11 is reserved for
++ * non-upstream ports.
++ *
++ * Device status 2 register is reserved.
++ */
++ .rw = GENMASK(15, 12) | GENMASK(10, 0),
++ },
++
++ [PCI_EXP_LNKCAP2 / 4] = {
++ /* Link capabilities 2 register has reserved bits [30:25] and 0. */
++ .ro = BIT(31) | GENMASK(24, 1),
++ },
++
++ [PCI_EXP_LNKCTL2 / 4] = {
++ /*
++ * Link control 2 register is RW.
++ *
++ * Link status 2 register has bits 5, 15 W1C;
++ * bits 10, 11 reserved and others are RO.
++ */
++ .rw = GENMASK(15, 0),
++ .w1c = (BIT(15) | BIT(5)) << 16,
++ .ro = (GENMASK(14, 12) | GENMASK(9, 6) | GENMASK(4, 0)) << 16,
++ },
++
++ [PCI_EXP_SLTCAP2 / 4] = {
++ /* Slot capabilities 2 register is reserved. */
++ },
++
++ [PCI_EXP_SLTCTL2 / 4] = {
++ /* Both Slot control 2 and Slot status 2 registers are reserved. */
++ },
+ };
+
+ /*
+@@ -265,7 +327,11 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
+ {
+ BUILD_BUG_ON(sizeof(bridge->conf) != PCI_BRIDGE_CONF_END);
+
+- bridge->conf.class_revision |= cpu_to_le32(PCI_CLASS_BRIDGE_PCI << 16);
++ /*
++ * class_revision: Class is high 24 bits and revision is low 8 bit of this member,
++ * while class for PCI Bridge Normal Decode has the 24-bit value: PCI_CLASS_BRIDGE_PCI << 8
++ */
++ bridge->conf.class_revision |= cpu_to_le32((PCI_CLASS_BRIDGE_PCI << 8) << 8);
+ bridge->conf.header_type = PCI_HEADER_TYPE_BRIDGE;
+ bridge->conf.cache_line_size = 0x10;
+ bridge->conf.status = cpu_to_le16(PCI_STATUS_CAP_LIST);
+@@ -277,11 +343,9 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
+
+ if (bridge->has_pcie) {
+ bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START;
++ bridge->conf.status |= cpu_to_le16(PCI_STATUS_CAP_LIST);
+ bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP;
+- /* Set PCIe v2, root port, slot support */
+- bridge->pcie_conf.cap =
+- cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4 | 2 |
+- PCI_EXP_FLAGS_SLOT);
++ bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4);
+ bridge->pcie_cap_regs_behavior =
+ kmemdup(pcie_cap_regs_behavior,
+ sizeof(pcie_cap_regs_behavior),
+@@ -290,6 +354,27 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
+ kfree(bridge->pci_regs_behavior);
+ return -ENOMEM;
+ }
++ /* These bits are applicable only for PCI and reserved on PCIe */
++ bridge->pci_regs_behavior[PCI_CACHE_LINE_SIZE / 4].ro &=
++ ~GENMASK(15, 8);
++ bridge->pci_regs_behavior[PCI_COMMAND / 4].ro &=
++ ~((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE |
++ PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT |
++ PCI_COMMAND_FAST_BACK) |
++ (PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
++ PCI_STATUS_DEVSEL_MASK) << 16);
++ bridge->pci_regs_behavior[PCI_PRIMARY_BUS / 4].ro &=
++ ~GENMASK(31, 24);
++ bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro &=
++ ~((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
++ PCI_STATUS_DEVSEL_MASK) << 16);
++ bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].rw &=
++ ~((PCI_BRIDGE_CTL_MASTER_ABORT |
++ BIT(8) | BIT(9) | BIT(11)) << 16);
++ bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].ro &=
++ ~((PCI_BRIDGE_CTL_FAST_BACK) << 16);
++ bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].w1c &=
++ ~(BIT(10) << 16);
+ }
+
+ if (flags & PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR) {
+@@ -431,8 +516,21 @@ int pci_bridge_emul_conf_write(struct pci_bridge_emul *bridge, int where,
+ /* Clear the W1C bits */
+ new &= ~((value << shift) & (behavior[reg / 4].w1c & mask));
+
++ /* Save the new value with the cleared W1C bits into the cfgspace */
+ cfgspace[reg / 4] = cpu_to_le32(new);
+
++ /*
++ * Clear the W1C bits not specified by the write mask, so that the
++ * write_op() does not clear them.
++ */
++ new &= ~(behavior[reg / 4].w1c & ~mask);
++
++ /*
++ * Set the W1C bits specified by the write mask, so that write_op()
++ * knows about that they are to be cleared.
++ */
++ new |= (value << shift) & (behavior[reg / 4].w1c & mask);
++
+ if (write_op)
+ write_op(bridge, reg, old, new, mask);
+
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+index 2761ab86490d1..f44c0667a83c6 100644
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -925,7 +925,7 @@ static int pci_pm_resume_noirq(struct device *dev)
+ pcie_pme_root_status_cleanup(pci_dev);
+
+ if (!skip_bus_pm && prev_state == PCI_D3cold)
+- pci_bridge_wait_for_secondary_bus(pci_dev);
++ pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+
+ if (pci_has_legacy_pm_support(pci_dev))
+ return 0;
+@@ -1312,7 +1312,7 @@ static int pci_pm_runtime_resume(struct device *dev)
+ pci_pm_default_resume(pci_dev);
+
+ if (prev_state == PCI_D3cold)
+- pci_bridge_wait_for_secondary_bus(pci_dev);
++ pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+
+ if (pm && pm->runtime_resume)
+ error = pm->runtime_resume(dev);
+diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
+index 7fb5cd17cc981..f2909ae93f2f8 100644
+--- a/drivers/pci/pci-sysfs.c
++++ b/drivers/pci/pci-sysfs.c
+@@ -1179,11 +1179,9 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
+
+ sysfs_bin_attr_init(res_attr);
+ if (write_combine) {
+- pdev->res_attr_wc[num] = res_attr;
+ sprintf(res_attr_name, "resource%d_wc", num);
+ res_attr->mmap = pci_mmap_resource_wc;
+ } else {
+- pdev->res_attr[num] = res_attr;
+ sprintf(res_attr_name, "resource%d", num);
+ if (pci_resource_flags(pdev, num) & IORESOURCE_IO) {
+ res_attr->read = pci_read_resource_io;
+@@ -1201,10 +1199,17 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
+ res_attr->size = pci_resource_len(pdev, num);
+ res_attr->private = (void *)(unsigned long)num;
+ retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+- if (retval)
++ if (retval) {
+ kfree(res_attr);
++ return retval;
++ }
++
++ if (write_combine)
++ pdev->res_attr_wc[num] = res_attr;
++ else
++ pdev->res_attr[num] = res_attr;
+
+- return retval;
++ return 0;
+ }
+
+ /**
+diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
+index ce2ab62b64cfa..244c1c2e08767 100644
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -163,9 +163,6 @@ static int __init pcie_port_pm_setup(char *str)
+ }
+ __setup("pcie_port_pm=", pcie_port_pm_setup);
+
+-/* Time to wait after a reset for device to become responsive */
+-#define PCIE_RESET_READY_POLL_MS 60000
+-
+ /**
+ * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
+ * @bus: pointer to PCI bus structure to search
+@@ -1143,9 +1140,6 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
+ if (need_restore)
+ pci_restore_bars(dev);
+
+- if (dev->bus->self)
+- pcie_aspm_pm_state_change(dev->bus->self);
+-
+ return 0;
+ }
+
+@@ -1255,7 +1249,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+ return -ENOTTY;
+ }
+
+- if (delay > 1000)
++ if (delay > PCI_RESET_WAIT)
+ pci_info(dev, "not ready %dms after %s; waiting\n",
+ delay - 1, reset_type);
+
+@@ -1264,7 +1258,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+ pci_read_config_dword(dev, PCI_COMMAND, &id);
+ }
+
+- if (delay > 1000)
++ if (delay > PCI_RESET_WAIT)
+ pci_info(dev, "ready %dms after %s\n", delay - 1,
+ reset_type);
+
+@@ -2889,6 +2883,18 @@ static const struct dmi_system_id bridge_d3_blacklist[] = {
+ DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"),
+ },
+ },
++ {
++ /*
++ * Downstream device is not accessible after putting a root port
++ * into D3cold and back into D0 on Elo Continental Z2 board
++ */
++ .ident = "Elo Continental Z2",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Elo Touch Solutions"),
++ DMI_MATCH(DMI_BOARD_NAME, "Geminilake"),
++ DMI_MATCH(DMI_BOARD_VERSION, "Continental Z2"),
++ },
++ },
+ #endif
+ { }
+ };
+@@ -3719,6 +3725,14 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask)
+ struct pci_dev *bridge;
+ u32 cap, ctl2;
+
++ /*
++ * Per PCIe r5.0, sec 9.3.5.10, the AtomicOp Requester Enable bit
++ * in Device Control 2 is reserved in VFs and the PF value applies
++ * to all associated VFs.
++ */
++ if (dev->is_virtfn)
++ return -EINVAL;
++
+ if (!pci_is_pcie(dev))
+ return -EINVAL;
+
+@@ -4866,24 +4880,31 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
+ /**
+ * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+ * @dev: PCI bridge
++ * @reset_type: reset type in human-readable form
++ * @timeout: maximum time to wait for devices on secondary bus (milliseconds)
+ *
+ * Handle necessary delays before access to the devices on the secondary
+- * side of the bridge are permitted after D3cold to D0 transition.
++ * side of the bridge are permitted after D3cold to D0 transition
++ * or Conventional Reset.
+ *
+ * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+ * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+ * 4.3.2.
++ *
++ * Return 0 on success or -ENOTTY if the first device on the secondary bus
++ * failed to become accessible.
+ */
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++ int timeout)
+ {
+ struct pci_dev *child;
+ int delay;
+
+ if (pci_dev_is_disconnected(dev))
+- return;
++ return 0;
+
+- if (!pci_is_bridge(dev) || !dev->bridge_d3)
+- return;
++ if (!pci_is_bridge(dev))
++ return 0;
+
+ down_read(&pci_bus_sem);
+
+@@ -4895,14 +4916,14 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ */
+ if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+ up_read(&pci_bus_sem);
+- return;
++ return 0;
+ }
+
+ /* Take d3cold_delay requirements into account */
+ delay = pci_bus_max_d3cold_delay(dev->subordinate);
+ if (!delay) {
+ up_read(&pci_bus_sem);
+- return;
++ return 0;
+ }
+
+ child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+@@ -4911,14 +4932,12 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+
+ /*
+ * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+- * accessing the device after reset (that is 1000 ms + 100 ms). In
+- * practice this should not be needed because we don't do power
+- * management for them (see pci_bridge_d3_possible()).
++ * accessing the device after reset (that is 1000 ms + 100 ms).
+ */
+ if (!pci_is_pcie(dev)) {
+ pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+ msleep(1000 + delay);
+- return;
++ return 0;
+ }
+
+ /*
+@@ -4935,11 +4954,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ * configuration requests if we only wait for 100 ms (see
+ * https://bugzilla.kernel.org/show_bug.cgi?id=203885).
+ *
+- * Therefore we wait for 100 ms and check for the device presence.
+- * If it is still not present give it an additional 100 ms.
++ * Therefore we wait for 100 ms and check for the device presence
++ * until the timeout expires.
+ */
+ if (!pcie_downstream_port(dev))
+- return;
++ return 0;
+
+ if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+ pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+@@ -4950,14 +4969,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ if (!pcie_wait_for_link_delay(dev, true, delay)) {
+ /* Did not train, no need to wait any further */
+ pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
+- return;
++ return -ENOTTY;
+ }
+ }
+
+- if (!pci_device_is_present(child)) {
+- pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
+- msleep(delay);
+- }
++ return pci_dev_wait(child, reset_type, timeout - delay);
+ }
+
+ void pci_reset_secondary_bus(struct pci_dev *dev)
+@@ -4976,15 +4992,6 @@ void pci_reset_secondary_bus(struct pci_dev *dev)
+
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+-
+- /*
+- * Trhfa for conventional PCI is 2^25 clock cycles.
+- * Assuming a minimum 33MHz clock this results in a 1s
+- * delay before we can consider subordinate devices to
+- * be re-initialized. PCIe has some ways to shorten this,
+- * but we don't make use of them yet.
+- */
+- ssleep(1);
+ }
+
+ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+@@ -5003,7 +5010,8 @@ int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
+ {
+ pcibios_reset_secondary_bus(dev);
+
+- return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
++ return pci_bridge_wait_for_secondary_bus(dev, "bus reset",
++ PCIE_RESET_READY_POLL_MS);
+ }
+ EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
+
+@@ -5061,18 +5069,18 @@ static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
+
+ static void pci_dev_lock(struct pci_dev *dev)
+ {
+- pci_cfg_access_lock(dev);
+ /* block PM suspend, driver probe, etc. */
+ device_lock(&dev->dev);
++ pci_cfg_access_lock(dev);
+ }
+
+ /* Return 1 on successful lock, 0 on contention */
+ int pci_dev_trylock(struct pci_dev *dev)
+ {
+- if (pci_cfg_access_trylock(dev)) {
+- if (device_trylock(&dev->dev))
++ if (device_trylock(&dev->dev)) {
++ if (pci_cfg_access_trylock(dev))
+ return 1;
+- pci_cfg_access_unlock(dev);
++ device_unlock(&dev->dev);
+ }
+
+ return 0;
+@@ -5081,8 +5089,8 @@ EXPORT_SYMBOL_GPL(pci_dev_trylock);
+
+ void pci_dev_unlock(struct pci_dev *dev)
+ {
+- device_unlock(&dev->dev);
+ pci_cfg_access_unlock(dev);
++ device_unlock(&dev->dev);
+ }
+ EXPORT_SYMBOL_GPL(pci_dev_unlock);
+
+@@ -5950,6 +5958,7 @@ int pcie_set_readrq(struct pci_dev *dev, int rq)
+ {
+ u16 v;
+ int ret;
++ struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+
+ if (rq < 128 || rq > 4096 || !is_power_of_2(rq))
+ return -EINVAL;
+@@ -5968,6 +5977,15 @@ int pcie_set_readrq(struct pci_dev *dev, int rq)
+
+ v = (ffs(rq) - 8) << 12;
+
++ if (bridge->no_inc_mrrs) {
++ int max_mrrs = pcie_get_readrq(dev);
++
++ if (rq > max_mrrs) {
++ pci_info(dev, "can't set Max_Read_Request_Size to %d; max is %d\n", rq, max_mrrs);
++ return -EINVAL;
++ }
++ }
++
+ ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_READRQ, v);
+
+@@ -6363,6 +6381,8 @@ bool pci_device_is_present(struct pci_dev *pdev)
+ {
+ u32 v;
+
++ /* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */
++ pdev = pci_physfn(pdev);
+ if (pci_dev_is_disconnected(pdev))
+ return false;
+ return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0);
+diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
+index 1cce56c2aea01..e6ea6e9504280 100644
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -63,6 +63,19 @@ struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev,
+ #define PCI_PM_D3HOT_WAIT 10 /* msec */
+ #define PCI_PM_D3COLD_WAIT 100 /* msec */
+
++/*
++ * Following exit from Conventional Reset, devices must be ready within 1 sec
++ * (PCIe r6.0 sec 6.6.1). A D3cold to D0 transition implies a Conventional
++ * Reset (PCIe r6.0 sec 5.8).
++ */
++#define PCI_RESET_WAIT 1000 /* msec */
++/*
++ * Devices may extend the 1 sec period through Request Retry Status completions
++ * (PCIe r6.0 sec 2.3.1). The spec does not provide an upper limit, but 60 sec
++ * ought to be enough for any device to become responsive.
++ */
++#define PCIE_RESET_READY_POLL_MS 60000 /* msec */
++
+ /**
+ * struct pci_platform_pm_ops - Firmware PM callbacks
+ *
+@@ -124,7 +137,8 @@ void pci_msi_init(struct pci_dev *dev);
+ void pci_msix_init(struct pci_dev *dev);
+ bool pci_bridge_d3_possible(struct pci_dev *dev);
+ void pci_bridge_d3_update(struct pci_dev *dev);
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++ int timeout);
+
+ static inline void pci_wakeup_event(struct pci_dev *dev)
+ {
+@@ -347,53 +361,36 @@ struct pci_sriov {
+ * @dev: PCI device to set new error_state
+ * @new: the state we want dev to be in
+ *
+- * Must be called with device_lock held.
++ * If the device is experiencing perm_failure, it has to remain in that state.
++ * Any other transition is allowed.
+ *
+ * Returns true if state has been changed to the requested state.
+ */
+ static inline bool pci_dev_set_io_state(struct pci_dev *dev,
+ pci_channel_state_t new)
+ {
+- bool changed = false;
++ pci_channel_state_t old;
+
+- device_lock_assert(&dev->dev);
+ switch (new) {
+ case pci_channel_io_perm_failure:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- case pci_channel_io_perm_failure:
+- changed = true;
+- break;
+- }
+- break;
++ xchg(&dev->error_state, pci_channel_io_perm_failure);
++ return true;
+ case pci_channel_io_frozen:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- changed = true;
+- break;
+- }
+- break;
++ old = cmpxchg(&dev->error_state, pci_channel_io_normal,
++ pci_channel_io_frozen);
++ return old != pci_channel_io_perm_failure;
+ case pci_channel_io_normal:
+- switch (dev->error_state) {
+- case pci_channel_io_frozen:
+- case pci_channel_io_normal:
+- changed = true;
+- break;
+- }
+- break;
++ old = cmpxchg(&dev->error_state, pci_channel_io_frozen,
++ pci_channel_io_normal);
++ return old != pci_channel_io_perm_failure;
++ default:
++ return false;
+ }
+- if (changed)
+- dev->error_state = new;
+- return changed;
+ }
+
+ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
+ {
+- device_lock(&dev->dev);
+ pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
+- device_unlock(&dev->dev);
+
+ return 0;
+ }
+@@ -598,12 +595,10 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
+ #ifdef CONFIG_PCIEASPM
+ void pcie_aspm_init_link_state(struct pci_dev *pdev);
+ void pcie_aspm_exit_link_state(struct pci_dev *pdev);
+-void pcie_aspm_pm_state_change(struct pci_dev *pdev);
+ void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
+ #else
+ static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
+ static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
+-static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
+ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
+ #endif
+
+diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
+index 9784fdcf30061..ca9ac8c6a2021 100644
+--- a/drivers/pci/pcie/aer.c
++++ b/drivers/pci/pcie/aer.c
+@@ -101,6 +101,11 @@ struct aer_stats {
+ #define ERR_COR_ID(d) (d & 0xffff)
+ #define ERR_UNCOR_ID(d) (d >> 16)
+
++#define AER_ERR_STATUS_MASK (PCI_ERR_ROOT_UNCOR_RCV | \
++ PCI_ERR_ROOT_COR_RCV | \
++ PCI_ERR_ROOT_MULTI_COR_RCV | \
++ PCI_ERR_ROOT_MULTI_UNCOR_RCV)
++
+ static int pcie_aer_disable;
+ static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
+
+@@ -533,7 +538,7 @@ static const char *aer_agent_string[] = {
+ u64 *stats = pdev->aer_stats->stats_array; \
+ size_t len = 0; \
+ \
+- for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \
++ for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
+ if (strings_array[i]) \
+ len += sysfs_emit_at(buf, len, "%s %llu\n", \
+ strings_array[i], \
+@@ -1196,7 +1201,7 @@ static irqreturn_t aer_irq(int irq, void *context)
+ struct aer_err_source e_src = {};
+
+ pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status);
+- if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV)))
++ if (!(e_src.status & AER_ERR_STATUS_MASK))
+ return IRQ_NONE;
+
+ pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id);
+@@ -1342,6 +1347,11 @@ static int aer_probe(struct pcie_device *dev)
+ struct device *device = &dev->device;
+ struct pci_dev *port = dev->port;
+
++ BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) <
++ AER_MAX_TYPEOF_COR_ERRS);
++ BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) <
++ AER_MAX_TYPEOF_UNCOR_ERRS);
++
+ /* Limit to Root Ports or Root Complex Event Collectors */
+ if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
+ (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
+diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
+index 013a47f587cea..4a2c229205fd0 100644
+--- a/drivers/pci/pcie/aspm.c
++++ b/drivers/pci/pcie/aspm.c
+@@ -192,12 +192,39 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist)
+ link->clkpm_disable = blacklist ? 1 : 0;
+ }
+
+-static bool pcie_retrain_link(struct pcie_link_state *link)
++static int pcie_wait_for_retrain(struct pci_dev *pdev)
+ {
+- struct pci_dev *parent = link->pdev;
+ unsigned long end_jiffies;
+ u16 reg16;
+
++ /* Wait for Link Training to be cleared by hardware */
++ end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
++ do {
++ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &reg16);
++ if (!(reg16 & PCI_EXP_LNKSTA_LT))
++ return 0;
++ msleep(1);
++ } while (time_before(jiffies, end_jiffies));
++
++ return -ETIMEDOUT;
++}
++
++static int pcie_retrain_link(struct pcie_link_state *link)
++{
++ struct pci_dev *parent = link->pdev;
++ int rc;
++ u16 reg16;
++
++ /*
++ * Ensure the updated LNKCTL parameters are used during link
++ * training by checking that there is no ongoing link training to
++ * avoid LTSSM race as recommended in Implementation Note at the
++ * end of PCIe r6.0.1 sec 7.5.3.7.
++ */
++ rc = pcie_wait_for_retrain(parent);
++ if (rc)
++ return rc;
++
+ pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
+ reg16 |= PCI_EXP_LNKCTL_RL;
+ pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
+@@ -211,15 +238,7 @@ static bool pcie_retrain_link(struct pcie_link_state *link)
+ pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
+ }
+
+- /* Wait for link training end. Break out after waiting for timeout */
+- end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT;
+- do {
+- pcie_capability_read_word(parent, PCI_EXP_LNKSTA, &reg16);
+- if (!(reg16 & PCI_EXP_LNKSTA_LT))
+- break;
+- msleep(1);
+- } while (time_before(jiffies, end_jiffies));
+- return !(reg16 & PCI_EXP_LNKSTA_LT);
++ return pcie_wait_for_retrain(parent);
+ }
+
+ /*
+@@ -230,7 +249,7 @@ static bool pcie_retrain_link(struct pcie_link_state *link)
+ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link)
+ {
+ int same_clock = 1;
+- u16 reg16, parent_reg, child_reg[8];
++ u16 reg16, ccc, parent_old_ccc, child_old_ccc[8];
+ struct pci_dev *child, *parent = link->pdev;
+ struct pci_bus *linkbus = parent->subordinate;
+ /*
+@@ -252,6 +271,7 @@ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link)
+
+ /* Port might be already in common clock mode */
+ pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
++ parent_old_ccc = reg16 & PCI_EXP_LNKCTL_CCC;
+ if (same_clock && (reg16 & PCI_EXP_LNKCTL_CCC)) {
+ bool consistent = true;
+
+@@ -268,35 +288,30 @@ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link)
+ pci_info(parent, "ASPM: current common clock configuration is inconsistent, reconfiguring\n");
+ }
+
++ ccc = same_clock ? PCI_EXP_LNKCTL_CCC : 0;
+ /* Configure downstream component, all functions */
+ list_for_each_entry(child, &linkbus->devices, bus_list) {
+ pcie_capability_read_word(child, PCI_EXP_LNKCTL, &reg16);
+- child_reg[PCI_FUNC(child->devfn)] = reg16;
+- if (same_clock)
+- reg16 |= PCI_EXP_LNKCTL_CCC;
+- else
+- reg16 &= ~PCI_EXP_LNKCTL_CCC;
+- pcie_capability_write_word(child, PCI_EXP_LNKCTL, reg16);
++ child_old_ccc[PCI_FUNC(child->devfn)] = reg16 & PCI_EXP_LNKCTL_CCC;
++ pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_CCC, ccc);
+ }
+
+ /* Configure upstream component */
+- pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &reg16);
+- parent_reg = reg16;
+- if (same_clock)
+- reg16 |= PCI_EXP_LNKCTL_CCC;
+- else
+- reg16 &= ~PCI_EXP_LNKCTL_CCC;
+- pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16);
++ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_CCC, ccc);
+
+- if (pcie_retrain_link(link))
+- return;
++ if (pcie_retrain_link(link)) {
+
+- /* Training failed. Restore common clock configurations */
+- pci_err(parent, "ASPM: Could not configure common clock\n");
+- list_for_each_entry(child, &linkbus->devices, bus_list)
+- pcie_capability_write_word(child, PCI_EXP_LNKCTL,
+- child_reg[PCI_FUNC(child->devfn)]);
+- pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg);
++ /* Training failed. Restore common clock configurations */
++ pci_err(parent, "ASPM: Could not configure common clock\n");
++ list_for_each_entry(child, &linkbus->devices, bus_list)
++ pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_CCC,
++ child_old_ccc[PCI_FUNC(child->devfn)]);
++ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
++ PCI_EXP_LNKCTL_CCC, parent_old_ccc);
++ }
+ }
+
+ /* Convert L0s latency encoding to ns */
+@@ -993,21 +1008,24 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
+
+ down_read(&pci_bus_sem);
+ mutex_lock(&aspm_lock);
+- /*
+- * All PCIe functions are in one slot, remove one function will remove
+- * the whole slot, so just wait until we are the last function left.
+- */
+- if (!list_empty(&parent->subordinate->devices))
+- goto out;
+
+ link = parent->link_state;
+ root = link->root;
+ parent_link = link->parent;
+
+- /* All functions are removed, so just disable ASPM for the link */
++ /*
++ * link->downstream is a pointer to the pci_dev of function 0. If
++ * we remove that function, the pci_dev is about to be deallocated,
++ * so we can't use link->downstream again. Free the link state to
++ * avoid this.
++ *
++ * If we're removing a non-0 function, it's possible we could
++ * retain the link state, but PCIe r6.0, sec 7.5.3.7, recommends
++ * programming the same ASPM Control value for all functions of
++ * multi-function devices, so disable ASPM for all of them.
++ */
+ pcie_config_aspm_link(link, 0);
+ list_del(&link->sibling);
+- /* Clock PM is for endpoint device */
+ free_link_state(link);
+
+ /* Recheck latencies and configure upstream links */
+@@ -1015,26 +1033,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
+ pcie_update_aspm_capable(root);
+ pcie_config_aspm_path(parent_link);
+ }
+-out:
+- mutex_unlock(&aspm_lock);
+- up_read(&pci_bus_sem);
+-}
+-
+-/* @pdev: the root port or switch downstream port */
+-void pcie_aspm_pm_state_change(struct pci_dev *pdev)
+-{
+- struct pcie_link_state *link = pdev->link_state;
+
+- if (aspm_disabled || !link)
+- return;
+- /*
+- * Devices changed PM state, we should recheck if latency
+- * meets all functions' requirement
+- */
+- down_read(&pci_bus_sem);
+- mutex_lock(&aspm_lock);
+- pcie_update_aspm_capable(link->root);
+- pcie_config_aspm_path(link);
+ mutex_unlock(&aspm_lock);
+ up_read(&pci_bus_sem);
+ }
+diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
+index c556e7beafe38..f21d64ae4ffcc 100644
+--- a/drivers/pci/pcie/dpc.c
++++ b/drivers/pci/pcie/dpc.c
+@@ -170,8 +170,8 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
+ pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+ PCI_EXP_DPC_STATUS_TRIGGER);
+
+- if (!pcie_wait_for_link(pdev, true)) {
+- pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n");
++ if (pci_bridge_wait_for_secondary_bus(pdev, "DPC",
++ PCIE_RESET_READY_POLL_MS)) {
+ clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+ ret = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c
+index a6b9b479b97ad..87734e4c3c204 100644
+--- a/drivers/pci/pcie/edr.c
++++ b/drivers/pci/pcie/edr.c
+@@ -193,6 +193,7 @@ send_ost:
+ */
+ if (estate == PCI_ERS_RESULT_RECOVERED) {
+ pci_dbg(edev, "DPC port successfully recovered\n");
++ pcie_clear_device_status(edev);
+ acpi_send_edr_status(pdev, edev, EDR_OST_SUCCESS);
+ } else {
+ pci_dbg(edev, "DPC port recovery failed\n");
+diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
+index 2ff5724b8f13f..41fe1ffd59078 100644
+--- a/drivers/pci/pcie/portdrv.h
++++ b/drivers/pci/pcie/portdrv.h
+@@ -85,6 +85,8 @@ struct pcie_port_service_driver {
+ int (*runtime_suspend)(struct pcie_device *dev);
+ int (*runtime_resume)(struct pcie_device *dev);
+
++ int (*slot_reset)(struct pcie_device *dev);
++
+ /* Device driver may resume normal operations */
+ void (*error_resume)(struct pci_dev *dev);
+
+@@ -110,6 +112,7 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *new);
+
+ extern struct bus_type pcie_port_bus_type;
+ int pcie_port_device_register(struct pci_dev *dev);
++int pcie_port_device_iter(struct device *dev, void *data);
+ #ifdef CONFIG_PM
+ int pcie_port_device_suspend(struct device *dev);
+ int pcie_port_device_resume_noirq(struct device *dev);
+diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
+index 3ee63968deaa5..604feeb84ee40 100644
+--- a/drivers/pci/pcie/portdrv_core.c
++++ b/drivers/pci/pcie/portdrv_core.c
+@@ -367,24 +367,24 @@ error_disable:
+ return status;
+ }
+
+-#ifdef CONFIG_PM
+-typedef int (*pcie_pm_callback_t)(struct pcie_device *);
++typedef int (*pcie_callback_t)(struct pcie_device *);
+
+-static int pm_iter(struct device *dev, void *data)
++int pcie_port_device_iter(struct device *dev, void *data)
+ {
+ struct pcie_port_service_driver *service_driver;
+ size_t offset = *(size_t *)data;
+- pcie_pm_callback_t cb;
++ pcie_callback_t cb;
+
+ if ((dev->bus == &pcie_port_bus_type) && dev->driver) {
+ service_driver = to_service_driver(dev->driver);
+- cb = *(pcie_pm_callback_t *)((void *)service_driver + offset);
++ cb = *(pcie_callback_t *)((void *)service_driver + offset);
+ if (cb)
+ return cb(to_pcie_device(dev));
+ }
+ return 0;
+ }
+
++#ifdef CONFIG_PM
+ /**
+ * pcie_port_device_suspend - suspend port services associated with a PCIe port
+ * @dev: PCI Express port to handle
+@@ -392,13 +392,13 @@ static int pm_iter(struct device *dev, void *data)
+ int pcie_port_device_suspend(struct device *dev)
+ {
+ size_t off = offsetof(struct pcie_port_service_driver, suspend);
+- return device_for_each_child(dev, &off, pm_iter);
++ return device_for_each_child(dev, &off, pcie_port_device_iter);
+ }
+
+ int pcie_port_device_resume_noirq(struct device *dev)
+ {
+ size_t off = offsetof(struct pcie_port_service_driver, resume_noirq);
+- return device_for_each_child(dev, &off, pm_iter);
++ return device_for_each_child(dev, &off, pcie_port_device_iter);
+ }
+
+ /**
+@@ -408,7 +408,7 @@ int pcie_port_device_resume_noirq(struct device *dev)
+ int pcie_port_device_resume(struct device *dev)
+ {
+ size_t off = offsetof(struct pcie_port_service_driver, resume);
+- return device_for_each_child(dev, &off, pm_iter);
++ return device_for_each_child(dev, &off, pcie_port_device_iter);
+ }
+
+ /**
+@@ -418,7 +418,7 @@ int pcie_port_device_resume(struct device *dev)
+ int pcie_port_device_runtime_suspend(struct device *dev)
+ {
+ size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend);
+- return device_for_each_child(dev, &off, pm_iter);
++ return device_for_each_child(dev, &off, pcie_port_device_iter);
+ }
+
+ /**
+@@ -428,7 +428,7 @@ int pcie_port_device_runtime_suspend(struct device *dev)
+ int pcie_port_device_runtime_resume(struct device *dev)
+ {
+ size_t off = offsetof(struct pcie_port_service_driver, runtime_resume);
+- return device_for_each_child(dev, &off, pm_iter);
++ return device_for_each_child(dev, &off, pcie_port_device_iter);
+ }
+ #endif /* PM */
+
+diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
+index c7ff1eea225ab..1af74c3d9d5db 100644
+--- a/drivers/pci/pcie/portdrv_pci.c
++++ b/drivers/pci/pcie/portdrv_pci.c
+@@ -160,6 +160,9 @@ static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
+
+ static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
+ {
++ size_t off = offsetof(struct pcie_port_service_driver, slot_reset);
++ device_for_each_child(&dev->dev, &off, pcie_port_device_iter);
++
+ pci_restore_state(dev);
+ pci_save_state(dev);
+ return PCI_ERS_RESULT_RECOVERED;
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 4537d1ea14fdc..ec17d42c2a155 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -1811,6 +1811,18 @@ static void quirk_alder_ioapic(struct pci_dev *pdev)
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic);
+ #endif
+
++static void quirk_no_msi(struct pci_dev *dev)
++{
++ pci_info(dev, "avoiding MSI to work around a hardware defect\n");
++ dev->no_msi = 1;
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4386, quirk_no_msi);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4387, quirk_no_msi);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4388, quirk_no_msi);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4389, quirk_no_msi);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438a, quirk_no_msi);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438b, quirk_no_msi);
++
+ static void quirk_pcie_mch(struct pci_dev *pdev)
+ {
+ pdev->no_msi = 1;
+@@ -3612,6 +3624,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0032, quirk_no_bus_reset);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003c, quirk_no_bus_reset);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0033, quirk_no_bus_reset);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0034, quirk_no_bus_reset);
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset);
+
+ /*
+ * Root port on some Cavium CN8xxx chips do not successfully complete a bus
+@@ -4102,6 +4115,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9120,
+ quirk_dma_func1_alias);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123,
+ quirk_dma_func1_alias);
++/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c136 */
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9125,
++ quirk_dma_func1_alias);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128,
+ quirk_dma_func1_alias);
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
+@@ -4133,6 +4149,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9220,
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c49 */
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230,
+ quirk_dma_func1_alias);
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9235,
++ quirk_dma_func1_alias);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642,
+ quirk_dma_func1_alias);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645,
+@@ -4807,6 +4825,26 @@ static int pci_quirk_brcm_acs(struct pci_dev *dev, u16 acs_flags)
+ PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
+ }
+
++/*
++ * Wangxun 10G/1G NICs have no ACS capability, and on multi-function
++ * devices, peer-to-peer transactions are not be used between the functions.
++ * So add an ACS quirk for below devices to isolate functions.
++ * SFxxx 1G NICs(em).
++ * RP1000/RP2000 10G NICs(sp).
++ */
++static int pci_quirk_wangxun_nic_acs(struct pci_dev *dev, u16 acs_flags)
++{
++ switch (dev->device) {
++ case 0x0100 ... 0x010F:
++ case 0x1001:
++ case 0x2001:
++ return pci_acs_ctrl_enabled(acs_flags,
++ PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF);
++ }
++
++ return false;
++}
++
+ static const struct pci_dev_acs_enabled {
+ u16 vendor;
+ u16 device;
+@@ -4907,6 +4945,9 @@ static const struct pci_dev_acs_enabled {
+ { PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
+ /* Broadcom multi-function device */
+ { PCI_VENDOR_ID_BROADCOM, 0x16D7, pci_quirk_mf_endpoint_acs },
++ { PCI_VENDOR_ID_BROADCOM, 0x1750, pci_quirk_mf_endpoint_acs },
++ { PCI_VENDOR_ID_BROADCOM, 0x1751, pci_quirk_mf_endpoint_acs },
++ { PCI_VENDOR_ID_BROADCOM, 0x1752, pci_quirk_mf_endpoint_acs },
+ { PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
+ /* Amazon Annapurna Labs */
+ { PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
+@@ -4949,6 +4990,8 @@ static const struct pci_dev_acs_enabled {
+ { PCI_VENDOR_ID_NXP, 0x8d9b, pci_quirk_nxp_rp_acs },
+ /* Zhaoxin Root/Downstream Ports */
+ { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
++ /* Wangxun nics */
++ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
+ { 0 }
+ };
+
+@@ -5309,6 +5352,7 @@ static void quirk_no_flr(struct pci_dev *dev)
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1487, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x148c, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr);
++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr);
+
+@@ -5340,11 +5384,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
+ */
+ static void quirk_amd_harvest_no_ats(struct pci_dev *pdev)
+ {
+- if ((pdev->device == 0x7312 && pdev->revision != 0x00) ||
+- (pdev->device == 0x7340 && pdev->revision != 0xc5) ||
+- (pdev->device == 0x7341 && pdev->revision != 0x00))
+- return;
+-
+ if (pdev->device == 0x15d8) {
+ if (pdev->revision == 0xcf &&
+ pdev->subsystem_vendor == 0xea50 &&
+@@ -5366,10 +5405,19 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats);
+ /* AMD Iceland dGPU */
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats);
+ /* AMD Navi10 dGPU */
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7310, quirk_amd_harvest_no_ats);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7318, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7319, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731a, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731b, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731e, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731f, quirk_amd_harvest_no_ats);
+ /* AMD Navi14 dGPU */
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7347, quirk_amd_harvest_no_ats);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x734f, quirk_amd_harvest_no_ats);
+ /* AMD Raven platform iGPU */
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats);
+ #endif /* CONFIG_PCI_ATS */
+@@ -5795,3 +5843,9 @@ static void apex_pci_fixup_class(struct pci_dev *pdev)
+ }
+ DECLARE_PCI_FIXUP_CLASS_HEADER(0x1ac1, 0x089a,
+ PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class);
++
++static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
++{
++ pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING;
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup);
+diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
+index 2ce636937c6ea..16d291e10627b 100644
+--- a/drivers/pci/setup-bus.c
++++ b/drivers/pci/setup-bus.c
+@@ -1878,12 +1878,67 @@ static void adjust_bridge_window(struct pci_dev *bridge, struct resource *res,
+ add_size = size - new_size;
+ pci_dbg(bridge, "bridge window %pR shrunken by %pa\n", res,
+ &add_size);
++ } else {
++ return;
+ }
+
+ res->end = res->start + new_size - 1;
+ remove_from_list(add_list, res);
+ }
+
++static void remove_dev_resource(struct resource *avail, struct pci_dev *dev,
++ struct resource *res)
++{
++ resource_size_t size, align, tmp;
++
++ size = resource_size(res);
++ if (!size)
++ return;
++
++ align = pci_resource_alignment(dev, res);
++ align = align ? ALIGN(avail->start, align) - avail->start : 0;
++ tmp = align + size;
++ avail->start = min(avail->start + tmp, avail->end + 1);
++}
++
++static void remove_dev_resources(struct pci_dev *dev, struct resource *io,
++ struct resource *mmio,
++ struct resource *mmio_pref)
++{
++ int i;
++
++ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
++ struct resource *res = &dev->resource[i];
++
++ if (resource_type(res) == IORESOURCE_IO) {
++ remove_dev_resource(io, dev, res);
++ } else if (resource_type(res) == IORESOURCE_MEM) {
++
++ /*
++ * Make sure prefetchable memory is reduced from
++ * the correct resource. Specifically we put 32-bit
++ * prefetchable memory in non-prefetchable window
++ * if there is an 64-bit pretchable window.
++ *
++ * See comments in __pci_bus_size_bridges() for
++ * more information.
++ */
++ if ((res->flags & IORESOURCE_PREFETCH) &&
++ ((res->flags & IORESOURCE_MEM_64) ==
++ (mmio_pref->flags & IORESOURCE_MEM_64)))
++ remove_dev_resource(mmio_pref, dev, res);
++ else
++ remove_dev_resource(mmio, dev, res);
++ }
++ }
++}
++
++/*
++ * io, mmio and mmio_pref contain the total amount of bridge window space
++ * available. This includes the minimal space needed to cover all the
++ * existing devices on the bus and the possible extra space that can be
++ * shared with the bridges.
++ */
+ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
+ struct list_head *add_list,
+ struct resource io,
+@@ -1893,7 +1948,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
+ unsigned int normal_bridges = 0, hotplug_bridges = 0;
+ struct resource *io_res, *mmio_res, *mmio_pref_res;
+ struct pci_dev *dev, *bridge = bus->self;
+- resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align;
++ resource_size_t io_per_b, mmio_per_b, mmio_pref_per_b, align;
+
+ io_res = &bridge->resource[PCI_BRIDGE_IO_WINDOW];
+ mmio_res = &bridge->resource[PCI_BRIDGE_MEM_WINDOW];
+@@ -1937,94 +1992,88 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
+ normal_bridges++;
+ }
+
++ if (!(hotplug_bridges + normal_bridges))
++ return;
++
+ /*
+- * There is only one bridge on the bus so it gets all available
+- * resources which it can then distribute to the possible hotplug
+- * bridges below.
++ * Calculate the amount of space we can forward from "bus" to any
++ * downstream buses, i.e., the space left over after assigning the
++ * BARs and windows on "bus".
+ */
+- if (hotplug_bridges + normal_bridges == 1) {
+- dev = list_first_entry(&bus->devices, struct pci_dev, bus_list);
+- if (dev->subordinate)
+- pci_bus_distribute_available_resources(dev->subordinate,
+- add_list, io, mmio, mmio_pref);
+- return;
++ list_for_each_entry(dev, &bus->devices, bus_list) {
++ if (!dev->is_virtfn)
++ remove_dev_resources(dev, &io, &mmio, &mmio_pref);
+ }
+
+- if (hotplug_bridges == 0)
+- return;
+-
+ /*
+- * Calculate the total amount of extra resource space we can
+- * pass to bridges below this one. This is basically the
+- * extra space reduced by the minimal required space for the
+- * non-hotplug bridges.
++ * If there is at least one hotplug bridge on this bus it gets all
++ * the extra resource space that was left after the reductions
++ * above.
++ *
++ * If there are no hotplug bridges the extra resource space is
++ * split between non-hotplug bridges. This is to allow possible
++ * hotplug bridges below them to get the extra space as well.
+ */
++ if (hotplug_bridges) {
++ io_per_b = div64_ul(resource_size(&io), hotplug_bridges);
++ mmio_per_b = div64_ul(resource_size(&mmio), hotplug_bridges);
++ mmio_pref_per_b = div64_ul(resource_size(&mmio_pref),
++ hotplug_bridges);
++ } else {
++ io_per_b = div64_ul(resource_size(&io), normal_bridges);
++ mmio_per_b = div64_ul(resource_size(&mmio), normal_bridges);
++ mmio_pref_per_b = div64_ul(resource_size(&mmio_pref),
++ normal_bridges);
++ }
++
+ for_each_pci_bridge(dev, bus) {
+- resource_size_t used_size;
+ struct resource *res;
++ struct pci_bus *b;
+
+- if (dev->is_hotplug_bridge)
++ b = dev->subordinate;
++ if (!b)
++ continue;
++ if (hotplug_bridges && !dev->is_hotplug_bridge)
+ continue;
+
++ res = &dev->resource[PCI_BRIDGE_IO_WINDOW];
++
+ /*
+- * Reduce the available resource space by what the
+- * bridge and devices below it occupy.
++ * Make sure the split resource space is properly aligned
++ * for bridge windows (align it down to avoid going above
++ * what is available).
+ */
+- res = &dev->resource[PCI_BRIDGE_IO_WINDOW];
+ align = pci_resource_alignment(dev, res);
+- align = align ? ALIGN(io.start, align) - io.start : 0;
+- used_size = align + resource_size(res);
+- if (!res->parent)
+- io.start = min(io.start + used_size, io.end + 1);
++ io.end = align ? io.start + ALIGN_DOWN(io_per_b, align) - 1
++ : io.start + io_per_b - 1;
++
++ /*
++ * The x_per_b holds the extra resource space that can be
++ * added for each bridge but there is the minimal already
++ * reserved as well so adjust x.start down accordingly to
++ * cover the whole space.
++ */
++ io.start -= resource_size(res);
+
+ res = &dev->resource[PCI_BRIDGE_MEM_WINDOW];
+ align = pci_resource_alignment(dev, res);
+- align = align ? ALIGN(mmio.start, align) - mmio.start : 0;
+- used_size = align + resource_size(res);
+- if (!res->parent)
+- mmio.start = min(mmio.start + used_size, mmio.end + 1);
++ mmio.end = align ? mmio.start + ALIGN_DOWN(mmio_per_b, align) - 1
++ : mmio.start + mmio_per_b - 1;
++ mmio.start -= resource_size(res);
+
+ res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW];
+ align = pci_resource_alignment(dev, res);
+- align = align ? ALIGN(mmio_pref.start, align) -
+- mmio_pref.start : 0;
+- used_size = align + resource_size(res);
+- if (!res->parent)
+- mmio_pref.start = min(mmio_pref.start + used_size,
+- mmio_pref.end + 1);
+- }
+-
+- io_per_hp = div64_ul(resource_size(&io), hotplug_bridges);
+- mmio_per_hp = div64_ul(resource_size(&mmio), hotplug_bridges);
+- mmio_pref_per_hp = div64_ul(resource_size(&mmio_pref),
+- hotplug_bridges);
+-
+- /*
+- * Go over devices on this bus and distribute the remaining
+- * resource space between hotplug bridges.
+- */
+- for_each_pci_bridge(dev, bus) {
+- struct pci_bus *b;
+-
+- b = dev->subordinate;
+- if (!b || !dev->is_hotplug_bridge)
+- continue;
+-
+- /*
+- * Distribute available extra resources equally between
+- * hotplug-capable downstream ports taking alignment into
+- * account.
+- */
+- io.end = io.start + io_per_hp - 1;
+- mmio.end = mmio.start + mmio_per_hp - 1;
+- mmio_pref.end = mmio_pref.start + mmio_pref_per_hp - 1;
++ mmio_pref.end = align ? mmio_pref.start +
++ ALIGN_DOWN(mmio_pref_per_b, align) - 1
++ : mmio_pref.start + mmio_pref_per_b - 1;
++ mmio_pref.start -= resource_size(res);
+
+ pci_bus_distribute_available_resources(b, add_list, io, mmio,
+ mmio_pref);
+
+- io.start += io_per_hp;
+- mmio.start += mmio_per_hp;
+- mmio_pref.start += mmio_pref_per_hp;
++ io.start += io.end + 1;
++ mmio.start += mmio.end + 1;
++ mmio_pref.start += mmio_pref.end + 1;
+ }
+ }
+
+diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
+index 7f1acb3918d0c..875d50c16f19d 100644
+--- a/drivers/pci/setup-res.c
++++ b/drivers/pci/setup-res.c
+@@ -210,6 +210,17 @@ static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
+
+ root = pci_find_parent_resource(dev, res);
+ if (!root) {
++ /*
++ * If dev is behind a bridge, accesses will only reach it
++ * if res is inside the relevant bridge window.
++ */
++ if (pci_upstream_bridge(dev))
++ return -ENXIO;
++
++ /*
++ * On the root bus, assume the host bridge will forward
++ * everything.
++ */
+ if (res->flags & IORESOURCE_IO)
+ root = &ioport_resource;
+ else
+diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
+index 0b301f8be9ed5..d021ef3fb165b 100644
+--- a/drivers/pci/switch/switchtec.c
++++ b/drivers/pci/switch/switchtec.c
+@@ -552,21 +552,20 @@ static ssize_t switchtec_dev_read(struct file *filp, char __user *data,
+ rc = copy_to_user(data, &stuser->return_code,
+ sizeof(stuser->return_code));
+ if (rc) {
+- rc = -EFAULT;
+- goto out;
++ mutex_unlock(&stdev->mrpc_mutex);
++ return -EFAULT;
+ }
+
+ data += sizeof(stuser->return_code);
+ rc = copy_to_user(data, &stuser->data,
+ size - sizeof(stuser->return_code));
+ if (rc) {
+- rc = -EFAULT;
+- goto out;
++ mutex_unlock(&stdev->mrpc_mutex);
++ return -EFAULT;
+ }
+
+ stuser_set_state(stuser, MRPC_IDLE);
+
+-out:
+ mutex_unlock(&stdev->mrpc_mutex);
+
+ if (stuser->status == SWITCHTEC_MRPC_STATUS_DONE)
+diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig
+index d13b8d1a780aa..4868ec03e32f7 100644
+--- a/drivers/pcmcia/Kconfig
++++ b/drivers/pcmcia/Kconfig
+@@ -151,7 +151,7 @@ config TCIC
+
+ config PCMCIA_ALCHEMY_DEVBOARD
+ tristate "Alchemy Db/Pb1xxx PCMCIA socket services"
+- depends on MIPS_ALCHEMY && PCMCIA
++ depends on MIPS_DB1XXX && PCMCIA
+ help
+ Enable this driver of you want PCMCIA support on your Alchemy
+ Db1000, Db/Pb1100, Db/Pb1500, Db/Pb1550, Db/Pb1200, DB1300
+diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
+index e211e2619680c..f70197154a362 100644
+--- a/drivers/pcmcia/cs.c
++++ b/drivers/pcmcia/cs.c
+@@ -666,18 +666,16 @@ static int pccardd(void *__skt)
+ if (events || sysfs_events)
+ continue;
+
++ set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop())
+ break;
+
+- set_current_state(TASK_INTERRUPTIBLE);
+-
+ schedule();
+
+- /* make sure we are running */
+- __set_current_state(TASK_RUNNING);
+-
+ try_to_freeze();
+ }
++ /* make sure we are running before we exit */
++ __set_current_state(TASK_RUNNING);
+
+ /* shut down socket, if a device is still present */
+ if (skt->state & SOCKET_PRESENT) {
+diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
+index bb15a8bdbaab5..e6c90c0bb7646 100644
+--- a/drivers/pcmcia/rsrc_nonstatic.c
++++ b/drivers/pcmcia/rsrc_nonstatic.c
+@@ -690,6 +690,9 @@ static struct resource *__nonstatic_find_io_region(struct pcmcia_socket *s,
+ unsigned long min = base;
+ int ret;
+
++ if (!res)
++ return NULL;
++
+ data.mask = align - 1;
+ data.offset = base & data.mask;
+ data.map = &s_data->io_db;
+@@ -809,6 +812,9 @@ static struct resource *nonstatic_find_mem_region(u_long base, u_long num,
+ unsigned long min, max;
+ int ret, i, j;
+
++ if (!res)
++ return NULL;
++
+ low = low || !(s->features & SS_CAP_PAGE_REGS);
+
+ data.mask = align - 1;
+@@ -1047,6 +1053,8 @@ static void nonstatic_release_resource_db(struct pcmcia_socket *s)
+ q = p->next;
+ kfree(p);
+ }
++
++ kfree(data);
+ }
+
+
+diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
+index bc3cba5f8c5dc..40945343c4cc1 100644
+--- a/drivers/perf/arm-cmn.c
++++ b/drivers/perf/arm-cmn.c
+@@ -1254,9 +1254,10 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
+ if (dtc->irq < 0)
+ return dtc->irq;
+
+- writel_relaxed(0, dtc->base + CMN_DT_PMCR);
++ writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
++ writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
++ writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR);
+ writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
+- writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
+
+ /* We do at least know that a DTC's XP must be in that DTC's domain */
+ xp = arm_cmn_node_to_xp(dn);
+@@ -1303,7 +1304,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
+ dn->type = CMN_TYPE_RNI;
+ }
+
+- writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL);
++ arm_cmn_set_state(cmn, CMN_STATE_DISABLED);
+
+ return 0;
+ }
+@@ -1561,7 +1562,8 @@ static int arm_cmn_probe(struct platform_device *pdev)
+
+ err = perf_pmu_register(&cmn->pmu, name, -1);
+ if (err)
+- cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
++ cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
++
+ return err;
+ }
+
+@@ -1572,7 +1574,7 @@ static int arm_cmn_remove(struct platform_device *pdev)
+ writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL);
+
+ perf_pmu_unregister(&cmn->pmu);
+- cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
++ cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
+ return 0;
+ }
+
+diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
+index 280a6ae3e27cf..54aa4658fb36e 100644
+--- a/drivers/perf/arm_dmc620_pmu.c
++++ b/drivers/perf/arm_dmc620_pmu.c
+@@ -725,6 +725,8 @@ static struct platform_driver dmc620_pmu_driver = {
+
+ static int __init dmc620_pmu_init(void)
+ {
++ int ret;
++
+ cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ DMC620_DRVNAME,
+ NULL,
+@@ -732,7 +734,11 @@ static int __init dmc620_pmu_init(void)
+ if (cpuhp_state_num < 0)
+ return cpuhp_state_num;
+
+- return platform_driver_register(&dmc620_pmu_driver);
++ ret = platform_driver_register(&dmc620_pmu_driver);
++ if (ret)
++ cpuhp_remove_multi_state(cpuhp_state_num);
++
++ return ret;
+ }
+
+ static void __exit dmc620_pmu_exit(void)
+diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
+index a36698a90d2f2..54b8ba032c787 100644
+--- a/drivers/perf/arm_dsu_pmu.c
++++ b/drivers/perf/arm_dsu_pmu.c
+@@ -858,7 +858,11 @@ static int __init dsu_pmu_init(void)
+ if (ret < 0)
+ return ret;
+ dsu_pmu_cpuhp_state = ret;
+- return platform_driver_register(&dsu_pmu_driver);
++ ret = platform_driver_register(&dsu_pmu_driver);
++ if (ret)
++ cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
++
++ return ret;
+ }
+
+ static void __exit dsu_pmu_exit(void)
+diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
+index 295cc7952d0ed..57d20cf3da7a3 100644
+--- a/drivers/perf/arm_pmu.c
++++ b/drivers/perf/arm_pmu.c
+@@ -398,6 +398,9 @@ validate_group(struct perf_event *event)
+ if (!validate_event(event->pmu, &fake_pmu, leader))
+ return -EINVAL;
+
++ if (event == leader)
++ return 0;
++
+ for_each_sibling_event(sibling, leader) {
+ if (!validate_event(event->pmu, &fake_pmu, sibling))
+ return -EINVAL;
+@@ -487,12 +490,7 @@ __hw_perf_event_init(struct perf_event *event)
+ local64_set(&hwc->period_left, hwc->sample_period);
+ }
+
+- if (event->group_leader != event) {
+- if (validate_group(event) != 0)
+- return -EINVAL;
+- }
+-
+- return 0;
++ return validate_group(event);
+ }
+
+ static int armpmu_event_init(struct perf_event *event)
+diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
+index 513de1f54e2d7..933b96e243b84 100644
+--- a/drivers/perf/arm_pmu_platform.c
++++ b/drivers/perf/arm_pmu_platform.c
+@@ -117,7 +117,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
+
+ if (num_irqs == 1) {
+ int irq = platform_get_irq(pdev, 0);
+- if (irq && irq_is_percpu_devid(irq))
++ if ((irq > 0) && irq_is_percpu_devid(irq))
+ return pmu_parse_percpu_irq(pmu, irq);
+ }
+
+diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
+index 226348822ab39..5933ad151f869 100644
+--- a/drivers/perf/arm_smmuv3_pmu.c
++++ b/drivers/perf/arm_smmuv3_pmu.c
+@@ -896,6 +896,8 @@ static struct platform_driver smmu_pmu_driver = {
+
+ static int __init arm_smmu_pmu_init(void)
+ {
++ int ret;
++
+ cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/arm/pmcg:online",
+ NULL,
+@@ -903,7 +905,11 @@ static int __init arm_smmu_pmu_init(void)
+ if (cpuhp_state_num < 0)
+ return cpuhp_state_num;
+
+- return platform_driver_register(&smmu_pmu_driver);
++ ret = platform_driver_register(&smmu_pmu_driver);
++ if (ret)
++ cpuhp_remove_multi_state(cpuhp_state_num);
++
++ return ret;
+ }
+ module_init(arm_smmu_pmu_init);
+
+diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
+index d44bcc29d99c8..cd5945e17fdf7 100644
+--- a/drivers/perf/arm_spe_pmu.c
++++ b/drivers/perf/arm_spe_pmu.c
+@@ -39,6 +39,24 @@
+ #include <asm/mmu.h>
+ #include <asm/sysreg.h>
+
++/*
++ * Cache if the event is allowed to trace Context information.
++ * This allows us to perform the check, i.e, perfmon_capable(),
++ * in the context of the event owner, once, during the event_init().
++ */
++#define SPE_PMU_HW_FLAGS_CX BIT(0)
++
++static void set_spe_event_has_cx(struct perf_event *event)
++{
++ if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
++ event->hw.flags |= SPE_PMU_HW_FLAGS_CX;
++}
++
++static bool get_spe_event_has_cx(struct perf_event *event)
++{
++ return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX);
++}
++
+ #define ARM_SPE_BUF_PAD_BYTE 0
+
+ struct arm_spe_pmu_buf {
+@@ -272,7 +290,7 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event)
+ if (!attr->exclude_kernel)
+ reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
+
+- if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
++ if (get_spe_event_has_cx(event))
+ reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT);
+
+ return reg;
+@@ -709,10 +727,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
+ !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
+ return -EOPNOTSUPP;
+
++ set_spe_event_has_cx(event);
+ reg = arm_spe_event_to_pmscr(event);
+ if (!perfmon_capable() &&
+ (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) |
+- BIT(SYS_PMSCR_EL1_CX_SHIFT) |
+ BIT(SYS_PMSCR_EL1_PCT_SHIFT))))
+ return -EACCES;
+
+diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
+index 94ebc1ecace7c..4daa782c48df0 100644
+--- a/drivers/perf/fsl_imx8_ddr_perf.c
++++ b/drivers/perf/fsl_imx8_ddr_perf.c
+@@ -29,7 +29,7 @@
+ #define CNTL_OVER_MASK 0xFFFFFFFE
+
+ #define CNTL_CSV_SHIFT 24
+-#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT)
++#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT)
+
+ #define EVENT_CYCLES_ID 0
+ #define EVENT_CYCLES_COUNTER 0
+@@ -102,6 +102,7 @@ struct ddr_pmu {
+ const struct fsl_ddr_devtype_data *devtype_data;
+ int irq;
+ int id;
++ int active_counter;
+ };
+
+ static ssize_t ddr_perf_identifier_show(struct device *dev,
+@@ -496,6 +497,10 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
+
+ ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
+
++ if (!pmu->active_counter++)
++ ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
++ EVENT_CYCLES_COUNTER, true);
++
+ hwc->state = 0;
+ }
+
+@@ -550,6 +555,10 @@ static void ddr_perf_event_stop(struct perf_event *event, int flags)
+ ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
+ ddr_perf_event_update(event);
+
++ if (!--pmu->active_counter)
++ ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
++ EVENT_CYCLES_COUNTER, false);
++
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+@@ -568,25 +577,10 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
+
+ static void ddr_perf_pmu_enable(struct pmu *pmu)
+ {
+- struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+-
+- /* enable cycle counter if cycle is not active event list */
+- if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+- ddr_perf_counter_enable(ddr_pmu,
+- EVENT_CYCLES_ID,
+- EVENT_CYCLES_COUNTER,
+- true);
+ }
+
+ static void ddr_perf_pmu_disable(struct pmu *pmu)
+ {
+- struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+-
+- if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+- ddr_perf_counter_enable(ddr_pmu,
+- EVENT_CYCLES_ID,
+- EVENT_CYCLES_COUNTER,
+- false);
+ }
+
+ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
+index 5b093badd0f65..f60e79fac2021 100644
+--- a/drivers/perf/qcom_l2_pmu.c
++++ b/drivers/perf/qcom_l2_pmu.c
+@@ -736,7 +736,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
+ {
+ u64 mpidr;
+ int cpu_cluster_id;
+- struct cluster_pmu *cluster = NULL;
++ struct cluster_pmu *cluster;
+
+ /*
+ * This assumes that the cluster_id is in MPIDR[aff1] for
+@@ -758,10 +758,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
+ cluster->cluster_id);
+ cpumask_set_cpu(cpu, &cluster->cluster_cpus);
+ *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
+- break;
++ return cluster;
+ }
+
+- return cluster;
++ return NULL;
+ }
+
+ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+diff --git a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
+index cd2332bf0e31a..fdbd64c03e12b 100644
+--- a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
++++ b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c
+@@ -9,6 +9,7 @@
+
+ #include <linux/bitfield.h>
+ #include <linux/bitops.h>
++#include <linux/bits.h>
+ #include <linux/clk.h>
+ #include <linux/delay.h>
+ #include <linux/io.h>
+@@ -250,7 +251,7 @@ static int phy_meson_axg_mipi_dphy_power_on(struct phy *phy)
+ (DIV_ROUND_UP(priv->config.clk_zero, temp) << 16) |
+ (DIV_ROUND_UP(priv->config.clk_prepare, temp) << 24));
+ regmap_write(priv->regmap, MIPI_DSI_CLK_TIM1,
+- DIV_ROUND_UP(priv->config.clk_pre, temp));
++ DIV_ROUND_UP(priv->config.clk_pre, BITS_PER_BYTE));
+
+ regmap_write(priv->regmap, MIPI_DSI_HS_TIM,
+ DIV_ROUND_UP(priv->config.hs_exit, temp) |
+diff --git a/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c b/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c
+index 1027ece6ca123..a3e1108b736d6 100644
+--- a/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c
++++ b/drivers/phy/amlogic/phy-meson-axg-mipi-pcie-analog.c
+@@ -197,7 +197,7 @@ static int phy_axg_mipi_pcie_analog_probe(struct platform_device *pdev)
+ struct phy_provider *phy;
+ struct device *dev = &pdev->dev;
+ struct phy_axg_mipi_pcie_analog_priv *priv;
+- struct device_node *np = dev->of_node;
++ struct device_node *np = dev->of_node, *parent_np;
+ struct regmap *map;
+ int ret;
+
+@@ -206,7 +206,9 @@ static int phy_axg_mipi_pcie_analog_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ /* Get the hhi system controller node */
+- map = syscon_node_to_regmap(of_get_parent(dev->of_node));
++ parent_np = of_get_parent(dev->of_node);
++ map = syscon_node_to_regmap(parent_np);
++ of_node_put(parent_np);
+ if (IS_ERR(map)) {
+ dev_err(dev,
+ "failed to get HHI regmap\n");
+diff --git a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
+index 5b471ab80fe28..54d65a6f0fccf 100644
+--- a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
++++ b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
+@@ -414,19 +414,19 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev)
+
+ ret = clk_prepare_enable(priv->clk_ref);
+ if (ret)
+- goto err_disable_clk_ref;
++ return ret;
+
+ priv->reset = devm_reset_control_array_get_exclusive(dev);
+- if (IS_ERR(priv->reset))
+- return PTR_ERR(priv->reset);
++ if (IS_ERR(priv->reset)) {
++ ret = PTR_ERR(priv->reset);
++ goto err_disable_clk_ref;
++ }
+
+ priv->phy = devm_phy_create(dev, np, &phy_g12a_usb3_pcie_ops);
+ if (IS_ERR(priv->phy)) {
+ ret = PTR_ERR(priv->phy);
+- if (ret != -EPROBE_DEFER)
+- dev_err(dev, "failed to create PHY\n");
+-
+- return ret;
++ dev_err_probe(dev, ret, "failed to create PHY\n");
++ goto err_disable_clk_ref;
+ }
+
+ phy_set_drvdata(priv->phy, priv);
+@@ -434,8 +434,12 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev)
+
+ phy_provider = devm_of_phy_provider_register(dev,
+ phy_g12a_usb3_pcie_xlate);
++ if (IS_ERR(phy_provider)) {
++ ret = PTR_ERR(phy_provider);
++ goto err_disable_clk_ref;
++ }
+
+- return PTR_ERR_OR_ZERO(phy_provider);
++ return 0;
+
+ err_disable_clk_ref:
+ clk_disable_unprepare(priv->clk_ref);
+diff --git a/drivers/phy/amlogic/phy-meson-gxl-usb2.c b/drivers/phy/amlogic/phy-meson-gxl-usb2.c
+index 2b3c0d730f20f..db17c3448bfed 100644
+--- a/drivers/phy/amlogic/phy-meson-gxl-usb2.c
++++ b/drivers/phy/amlogic/phy-meson-gxl-usb2.c
+@@ -114,8 +114,10 @@ static int phy_meson_gxl_usb2_init(struct phy *phy)
+ return ret;
+
+ ret = clk_prepare_enable(priv->clk);
+- if (ret)
++ if (ret) {
++ reset_control_rearm(priv->reset);
+ return ret;
++ }
+
+ return 0;
+ }
+@@ -125,6 +127,7 @@ static int phy_meson_gxl_usb2_exit(struct phy *phy)
+ struct phy_meson_gxl_usb2_priv *priv = phy_get_drvdata(phy);
+
+ clk_disable_unprepare(priv->clk);
++ reset_control_rearm(priv->reset);
+
+ return 0;
+ }
+diff --git a/drivers/phy/amlogic/phy-meson8b-usb2.c b/drivers/phy/amlogic/phy-meson8b-usb2.c
+index cf10bed40528a..dd96763911b8b 100644
+--- a/drivers/phy/amlogic/phy-meson8b-usb2.c
++++ b/drivers/phy/amlogic/phy-meson8b-usb2.c
+@@ -154,6 +154,7 @@ static int phy_meson8b_usb2_power_on(struct phy *phy)
+ ret = clk_prepare_enable(priv->clk_usb_general);
+ if (ret) {
+ dev_err(&phy->dev, "Failed to enable USB general clock\n");
++ reset_control_rearm(priv->reset);
+ return ret;
+ }
+
+@@ -161,6 +162,7 @@ static int phy_meson8b_usb2_power_on(struct phy *phy)
+ if (ret) {
+ dev_err(&phy->dev, "Failed to enable USB DDR clock\n");
+ clk_disable_unprepare(priv->clk_usb_general);
++ reset_control_rearm(priv->reset);
+ return ret;
+ }
+
+@@ -199,6 +201,7 @@ static int phy_meson8b_usb2_power_on(struct phy *phy)
+ dev_warn(&phy->dev, "USB ID detect failed!\n");
+ clk_disable_unprepare(priv->clk_usb);
+ clk_disable_unprepare(priv->clk_usb_general);
++ reset_control_rearm(priv->reset);
+ return -EINVAL;
+ }
+ }
+@@ -218,6 +221,7 @@ static int phy_meson8b_usb2_power_off(struct phy *phy)
+
+ clk_disable_unprepare(priv->clk_usb);
+ clk_disable_unprepare(priv->clk_usb_general);
++ reset_control_rearm(priv->reset);
+
+ /* power off the PHY by putting it into reset mode */
+ regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET,
+@@ -265,8 +269,9 @@ static int phy_meson8b_usb2_probe(struct platform_device *pdev)
+ return PTR_ERR(priv->clk_usb);
+
+ priv->reset = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
+- if (PTR_ERR(priv->reset) == -EPROBE_DEFER)
+- return PTR_ERR(priv->reset);
++ if (IS_ERR(priv->reset))
++ return dev_err_probe(&pdev->dev, PTR_ERR(priv->reset),
++ "Failed to get the reset line");
+
+ priv->dr_mode = of_usb_get_dr_mode_by_phy(pdev->dev.of_node, -1);
+ if (priv->dr_mode == USB_DR_MODE_UNKNOWN) {
+diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig
+index fd92b73b71095..1dcfa3bd1442d 100644
+--- a/drivers/phy/broadcom/Kconfig
++++ b/drivers/phy/broadcom/Kconfig
+@@ -95,8 +95,7 @@ config PHY_BRCM_USB
+ depends on OF
+ select GENERIC_PHY
+ select SOC_BRCMSTB if ARCH_BRCMSTB
+- default ARCH_BCM4908
+- default ARCH_BRCMSTB
++ default ARCH_BCM4908 || ARCH_BRCMSTB
+ help
+ Enable this to support the Broadcom STB USB PHY.
+ This driver is required by the USB XHCI, EHCI and OHCI
+diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c
+index 9391ab42a12b3..dd0f66288fbdd 100644
+--- a/drivers/phy/broadcom/phy-brcm-usb-init.c
++++ b/drivers/phy/broadcom/phy-brcm-usb-init.c
+@@ -79,6 +79,7 @@
+
+ enum brcm_family_type {
+ BRCM_FAMILY_3390A0,
++ BRCM_FAMILY_4908,
+ BRCM_FAMILY_7250B0,
+ BRCM_FAMILY_7271A0,
+ BRCM_FAMILY_7364A0,
+@@ -96,6 +97,7 @@ enum brcm_family_type {
+
+ static const char *family_names[BRCM_FAMILY_COUNT] = {
+ USB_BRCM_FAMILY(3390A0),
++ USB_BRCM_FAMILY(4908),
+ USB_BRCM_FAMILY(7250B0),
+ USB_BRCM_FAMILY(7271A0),
+ USB_BRCM_FAMILY(7364A0),
+@@ -203,6 +205,27 @@ usb_reg_bits_map_table[BRCM_FAMILY_COUNT][USB_CTRL_SELECTOR_COUNT] = {
+ USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK,
+ ENDIAN_SETTINGS, /* USB_CTRL_SETUP ENDIAN bits */
+ },
++ /* 4908 */
++ [BRCM_FAMILY_4908] = {
++ 0, /* USB_CTRL_SETUP_SCB1_EN_MASK */
++ 0, /* USB_CTRL_SETUP_SCB2_EN_MASK */
++ 0, /* USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK */
++ 0, /* USB_CTRL_SETUP_STRAP_IPP_SEL_MASK */
++ 0, /* USB_CTRL_SETUP_OC3_DISABLE_MASK */
++ 0, /* USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK */
++ 0, /* USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK */
++ USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK,
++ USB_CTRL_USB_PM_USB_PWRDN_MASK,
++ 0, /* USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK */
++ 0, /* USB_CTRL_USB30_CTL1_USB3_IOC_MASK */
++ 0, /* USB_CTRL_USB30_CTL1_USB3_IPP_MASK */
++ 0, /* USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK */
++ 0, /* USB_CTRL_USB_PM_SOFT_RESET_MASK */
++ 0, /* USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK */
++ 0, /* USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK */
++ 0, /* USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK */
++ 0, /* USB_CTRL_SETUP ENDIAN bits */
++ },
+ /* 7250b0 */
+ [BRCM_FAMILY_7250B0] = {
+ USB_CTRL_SETUP_SCB1_EN_MASK,
+@@ -559,6 +582,7 @@ static void brcmusb_usb3_pll_54mhz(struct brcm_usb_init_params *params)
+ */
+ switch (params->selected_family) {
+ case BRCM_FAMILY_3390A0:
++ case BRCM_FAMILY_4908:
+ case BRCM_FAMILY_7250B0:
+ case BRCM_FAMILY_7366C0:
+ case BRCM_FAMILY_74371A0:
+@@ -1004,6 +1028,18 @@ static const struct brcm_usb_init_ops bcm7445_ops = {
+ .set_dual_select = usb_set_dual_select,
+ };
+
++void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params)
++{
++ int fam;
++
++ fam = BRCM_FAMILY_4908;
++ params->selected_family = fam;
++ params->usb_reg_bits_map =
++ &usb_reg_bits_map_table[fam][0];
++ params->family_name = family_names[fam];
++ params->ops = &bcm7445_ops;
++}
++
+ void brcm_usb_dvr_init_7445(struct brcm_usb_init_params *params)
+ {
+ int fam;
+diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h
+index a39f30fa2e991..1ccb5ddab865c 100644
+--- a/drivers/phy/broadcom/phy-brcm-usb-init.h
++++ b/drivers/phy/broadcom/phy-brcm-usb-init.h
+@@ -64,6 +64,7 @@ struct brcm_usb_init_params {
+ bool suspend_with_clocks;
+ };
+
++void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params);
+ void brcm_usb_dvr_init_7445(struct brcm_usb_init_params *params);
+ void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params);
+ void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params);
+diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
+index 116fb23aebd99..c0c3ab9b2a153 100644
+--- a/drivers/phy/broadcom/phy-brcm-usb.c
++++ b/drivers/phy/broadcom/phy-brcm-usb.c
+@@ -18,6 +18,7 @@
+ #include <linux/soc/brcmstb/brcmstb.h>
+ #include <dt-bindings/phy/phy.h>
+ #include <linux/mfd/syscon.h>
++#include <linux/suspend.h>
+
+ #include "phy-brcm-usb-init.h"
+
+@@ -70,17 +71,40 @@ struct brcm_usb_phy_data {
+ int init_count;
+ int wake_irq;
+ struct brcm_usb_phy phys[BRCM_USB_PHY_ID_MAX];
++ struct notifier_block pm_notifier;
++ bool pm_active;
+ };
+
+ static s8 *node_reg_names[BRCM_REGS_MAX] = {
+ "crtl", "xhci_ec", "xhci_gbl", "usb_phy", "usb_mdio", "bdc_ec"
+ };
+
++static int brcm_pm_notifier(struct notifier_block *notifier,
++ unsigned long pm_event,
++ void *unused)
++{
++ struct brcm_usb_phy_data *priv =
++ container_of(notifier, struct brcm_usb_phy_data, pm_notifier);
++
++ switch (pm_event) {
++ case PM_HIBERNATION_PREPARE:
++ case PM_SUSPEND_PREPARE:
++ priv->pm_active = true;
++ break;
++ case PM_POST_RESTORE:
++ case PM_POST_HIBERNATION:
++ case PM_POST_SUSPEND:
++ priv->pm_active = false;
++ break;
++ }
++ return NOTIFY_DONE;
++}
++
+ static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id)
+ {
+- struct phy *gphy = dev_id;
++ struct device *dev = dev_id;
+
+- pm_wakeup_event(&gphy->dev, 0);
++ pm_wakeup_event(dev, 0);
+
+ return IRQ_HANDLED;
+ }
+@@ -91,6 +115,9 @@ static int brcm_usb_phy_init(struct phy *gphy)
+ struct brcm_usb_phy_data *priv =
+ container_of(phy, struct brcm_usb_phy_data, phys[phy->id]);
+
++ if (priv->pm_active)
++ return 0;
++
+ /*
+ * Use a lock to make sure a second caller waits until
+ * the base phy is inited before using it.
+@@ -120,6 +147,9 @@ static int brcm_usb_phy_exit(struct phy *gphy)
+ struct brcm_usb_phy_data *priv =
+ container_of(phy, struct brcm_usb_phy_data, phys[phy->id]);
+
++ if (priv->pm_active)
++ return 0;
++
+ dev_dbg(&gphy->dev, "EXIT\n");
+ if (phy->id == BRCM_USB_PHY_2_0)
+ brcm_usb_uninit_eohci(&priv->ini);
+@@ -253,6 +283,15 @@ static const struct attribute_group brcm_usb_phy_group = {
+ .attrs = brcm_usb_phy_attrs,
+ };
+
++static const struct match_chip_info chip_info_4908 = {
++ .init_func = &brcm_usb_dvr_init_4908,
++ .required_regs = {
++ BRCM_REGS_CTRL,
++ BRCM_REGS_XHCI_EC,
++ -1,
++ },
++};
++
+ static const struct match_chip_info chip_info_7216 = {
+ .init_func = &brcm_usb_dvr_init_7216,
+ .required_regs = {
+@@ -288,7 +327,7 @@ static const struct match_chip_info chip_info_7445 = {
+ static const struct of_device_id brcm_usb_dt_ids[] = {
+ {
+ .compatible = "brcm,bcm4908-usb-phy",
+- .data = &chip_info_7445,
++ .data = &chip_info_4908,
+ },
+ {
+ .compatible = "brcm,bcm7216-usb-phy",
+@@ -412,7 +451,7 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev,
+ if (priv->wake_irq >= 0) {
+ err = devm_request_irq(dev, priv->wake_irq,
+ brcm_usb_phy_wake_isr, 0,
+- dev_name(dev), gphy);
++ dev_name(dev), dev);
+ if (err < 0)
+ return err;
+ device_set_wakeup_capable(dev, 1);
+@@ -488,6 +527,9 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
+ if (err)
+ return err;
+
++ priv->pm_notifier.notifier_call = brcm_pm_notifier;
++ register_pm_notifier(&priv->pm_notifier);
++
+ mutex_init(&priv->mutex);
+
+ /* make sure invert settings are correct */
+@@ -528,7 +570,10 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
+
+ static int brcm_usb_phy_remove(struct platform_device *pdev)
+ {
++ struct brcm_usb_phy_data *priv = dev_get_drvdata(&pdev->dev);
++
+ sysfs_remove_group(&pdev->dev.kobj, &brcm_usb_phy_group);
++ unregister_pm_notifier(&priv->pm_notifier);
+
+ return 0;
+ }
+@@ -539,6 +584,7 @@ static int brcm_usb_phy_suspend(struct device *dev)
+ struct brcm_usb_phy_data *priv = dev_get_drvdata(dev);
+
+ if (priv->init_count) {
++ dev_dbg(dev, "SUSPEND\n");
+ priv->ini.wake_enabled = device_may_wakeup(dev);
+ if (priv->phys[BRCM_USB_PHY_3_0].inited)
+ brcm_usb_uninit_xhci(&priv->ini);
+@@ -578,6 +624,7 @@ static int brcm_usb_phy_resume(struct device *dev)
+ * Uninitialize anything that wasn't previously initialized.
+ */
+ if (priv->init_count) {
++ dev_dbg(dev, "RESUME\n");
+ if (priv->wake_irq >= 0)
+ disable_irq_wake(priv->wake_irq);
+ brcm_usb_init_common(&priv->ini);
+diff --git a/drivers/phy/cadence/phy-cadence-sierra.c b/drivers/phy/cadence/phy-cadence-sierra.c
+index e93818e3991fd..3e2d096d54fd7 100644
+--- a/drivers/phy/cadence/phy-cadence-sierra.c
++++ b/drivers/phy/cadence/phy-cadence-sierra.c
+@@ -215,7 +215,10 @@ static const int pll_mux_parent_index[][SIERRA_NUM_CMN_PLLC_PARENTS] = {
+ [CMN_PLLLC1] = { PLL1_REFCLK, PLL0_REFCLK },
+ };
+
+-static u32 cdns_sierra_pll_mux_table[] = { 0, 1 };
++static u32 cdns_sierra_pll_mux_table[][SIERRA_NUM_CMN_PLLC_PARENTS] = {
++ [CMN_PLLLC] = { 0, 1 },
++ [CMN_PLLLC1] = { 1, 0 },
++};
+
+ struct cdns_sierra_inst {
+ struct phy *phy;
+@@ -436,11 +439,25 @@ static const struct phy_ops ops = {
+ static u8 cdns_sierra_pll_mux_get_parent(struct clk_hw *hw)
+ {
+ struct cdns_sierra_pll_mux *mux = to_cdns_sierra_pll_mux(hw);
++ struct regmap_field *plllc1en_field = mux->plllc1en_field;
++ struct regmap_field *termen_field = mux->termen_field;
+ struct regmap_field *field = mux->pfdclk_sel_preg;
+ unsigned int val;
++ int index;
+
+ regmap_field_read(field, &val);
+- return clk_mux_val_to_index(hw, cdns_sierra_pll_mux_table, 0, val);
++
++ if (strstr(clk_hw_get_name(hw), clk_names[CDNS_SIERRA_PLL_CMNLC1])) {
++ index = clk_mux_val_to_index(hw, cdns_sierra_pll_mux_table[CMN_PLLLC1], 0, val);
++ if (index == 1) {
++ regmap_field_write(plllc1en_field, 1);
++ regmap_field_write(termen_field, 1);
++ }
++ } else {
++ index = clk_mux_val_to_index(hw, cdns_sierra_pll_mux_table[CMN_PLLLC], 0, val);
++ }
++
++ return index;
+ }
+
+ static int cdns_sierra_pll_mux_set_parent(struct clk_hw *hw, u8 index)
+@@ -458,7 +475,11 @@ static int cdns_sierra_pll_mux_set_parent(struct clk_hw *hw, u8 index)
+ ret |= regmap_field_write(termen_field, 1);
+ }
+
+- val = cdns_sierra_pll_mux_table[index];
++ if (strstr(clk_hw_get_name(hw), clk_names[CDNS_SIERRA_PLL_CMNLC1]))
++ val = cdns_sierra_pll_mux_table[CMN_PLLLC1][index];
++ else
++ val = cdns_sierra_pll_mux_table[CMN_PLLLC][index];
++
+ ret |= regmap_field_write(field, val);
+
+ return ret;
+@@ -496,8 +517,8 @@ static int cdns_sierra_pll_mux_register(struct cdns_sierra_phy *sp,
+ for (i = 0; i < num_parents; i++) {
+ clk = sp->input_clks[pll_mux_parent_index[clk_index][i]];
+ if (IS_ERR_OR_NULL(clk)) {
+- dev_err(dev, "No parent clock for derived_refclk\n");
+- return PTR_ERR(clk);
++ dev_err(dev, "No parent clock for PLL mux clocks\n");
++ return IS_ERR(clk) ? PTR_ERR(clk) : -ENOENT;
+ }
+ parent_names[i] = __clk_get_name(clk);
+ }
+diff --git a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c
+index 34a6a9a1ceb25..897c6bb4cbb8c 100644
+--- a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c
++++ b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c
+@@ -153,7 +153,7 @@ static int hisi_inno_phy_probe(struct platform_device *pdev)
+ phy_set_drvdata(phy, &priv->ports[i]);
+ i++;
+
+- if (i > INNO_PHY_PORT_NUM) {
++ if (i >= INNO_PHY_PORT_NUM) {
+ dev_warn(dev, "Support %d ports in maximum\n", i);
+ break;
+ }
+diff --git a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
+index 28ad9403c4414..67b005d5b9e35 100644
+--- a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
++++ b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
+@@ -146,6 +146,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ mipi_tx->driver_data = of_device_get_match_data(dev);
++ if (!mipi_tx->driver_data)
++ return -ENODEV;
+
+ mipi_tx->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mipi_tx->regs))
+diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c
+index cdcef865fe9e5..0649c08fe3108 100644
+--- a/drivers/phy/mediatek/phy-mtk-tphy.c
++++ b/drivers/phy/mediatek/phy-mtk-tphy.c
+@@ -12,6 +12,7 @@
+ #include <linux/iopoll.h>
+ #include <linux/mfd/syscon.h>
+ #include <linux/module.h>
++#include <linux/nvmem-consumer.h>
+ #include <linux/of_address.h>
+ #include <linux/of_device.h>
+ #include <linux/phy/phy.h>
+@@ -41,6 +42,9 @@
+ #define SSUSB_SIFSLV_V2_U3PHYD 0x200
+ #define SSUSB_SIFSLV_V2_U3PHYA 0x400
+
++#define U3P_MISC_REG1 0x04
++#define MR1_EFUSE_AUTO_LOAD_DIS BIT(6)
++
+ #define U3P_USBPHYACR0 0x000
+ #define PA0_RG_U2PLL_FORCE_ON BIT(15)
+ #define PA0_USB20_PLL_PREDIV GENMASK(7, 6)
+@@ -133,6 +137,8 @@
+ #define P3C_RG_SWRST_U3_PHYD_FORCE_EN BIT(24)
+
+ #define U3P_U3_PHYA_REG0 0x000
++#define P3A_RG_IEXT_INTR GENMASK(15, 10)
++#define P3A_RG_IEXT_INTR_VAL(x) ((0x3f & (x)) << 10)
+ #define P3A_RG_CLKDRV_OFF GENMASK(3, 2)
+ #define P3A_RG_CLKDRV_OFF_VAL(x) ((0x3 & (x)) << 2)
+
+@@ -187,6 +193,19 @@
+ #define P3D_RG_FWAKE_TH GENMASK(21, 16)
+ #define P3D_RG_FWAKE_TH_VAL(x) ((0x3f & (x)) << 16)
+
++#define U3P_U3_PHYD_IMPCAL0 0x010
++#define P3D_RG_FORCE_TX_IMPEL BIT(31)
++#define P3D_RG_TX_IMPEL GENMASK(28, 24)
++#define P3D_RG_TX_IMPEL_VAL(x) ((0x1f & (x)) << 24)
++
++#define U3P_U3_PHYD_IMPCAL1 0x014
++#define P3D_RG_FORCE_RX_IMPEL BIT(31)
++#define P3D_RG_RX_IMPEL GENMASK(28, 24)
++#define P3D_RG_RX_IMPEL_VAL(x) ((0x1f & (x)) << 24)
++
++#define U3P_U3_PHYD_RSV 0x054
++#define P3D_RG_EFUSE_AUTO_LOAD_DIS BIT(12)
++
+ #define U3P_U3_PHYD_CDR1 0x05c
+ #define P3D_RG_CDR_BIR_LTD1 GENMASK(28, 24)
+ #define P3D_RG_CDR_BIR_LTD1_VAL(x) ((0x1f & (x)) << 24)
+@@ -307,6 +326,11 @@ struct mtk_phy_pdata {
+ * 48M PLL, fix it by switching PLL to 26M from default 48M
+ */
+ bool sw_pll_48m_to_26m;
++ /*
++ * Some SoCs (e.g. mt8195) drop a bit when use auto load efuse,
++ * support sw way, also support it for v2/v3 optionally.
++ */
++ bool sw_efuse_supported;
+ enum mtk_phy_version version;
+ };
+
+@@ -336,6 +360,10 @@ struct mtk_phy_instance {
+ struct regmap *type_sw;
+ u32 type_sw_reg;
+ u32 type_sw_index;
++ u32 efuse_sw_en;
++ u32 efuse_intr;
++ u32 efuse_tx_imp;
++ u32 efuse_rx_imp;
+ int eye_src;
+ int eye_vrt;
+ int eye_term;
+@@ -1011,7 +1039,7 @@ static int phy_type_syscon_get(struct mtk_phy_instance *instance,
+ static int phy_type_set(struct mtk_phy_instance *instance)
+ {
+ int type;
+- u32 mask;
++ u32 offset;
+
+ if (!instance->type_sw)
+ return 0;
+@@ -1034,12 +1062,137 @@ static int phy_type_set(struct mtk_phy_instance *instance)
+ return 0;
+ }
+
+- mask = RG_PHY_SW_TYPE << (instance->type_sw_index * BITS_PER_BYTE);
+- regmap_update_bits(instance->type_sw, instance->type_sw_reg, mask, type);
++ offset = instance->type_sw_index * BITS_PER_BYTE;
++ regmap_update_bits(instance->type_sw, instance->type_sw_reg,
++ RG_PHY_SW_TYPE << offset, type << offset);
+
+ return 0;
+ }
+
++static int phy_efuse_get(struct mtk_tphy *tphy, struct mtk_phy_instance *instance)
++{
++ struct device *dev = &instance->phy->dev;
++ int ret = 0;
++
++ /* tphy v1 doesn't support sw efuse, skip it */
++ if (!tphy->pdata->sw_efuse_supported) {
++ instance->efuse_sw_en = 0;
++ return 0;
++ }
++
++ /* software efuse is optional */
++ instance->efuse_sw_en = device_property_read_bool(dev, "nvmem-cells");
++ if (!instance->efuse_sw_en)
++ return 0;
++
++ switch (instance->type) {
++ case PHY_TYPE_USB2:
++ ret = nvmem_cell_read_variable_le_u32(dev, "intr", &instance->efuse_intr);
++ if (ret) {
++ dev_err(dev, "fail to get u2 intr efuse, %d\n", ret);
++ break;
++ }
++
++ /* no efuse, ignore it */
++ if (!instance->efuse_intr) {
++ dev_warn(dev, "no u2 intr efuse, but dts enable it\n");
++ instance->efuse_sw_en = 0;
++ break;
++ }
++
++ dev_dbg(dev, "u2 efuse - intr %x\n", instance->efuse_intr);
++ break;
++
++ case PHY_TYPE_USB3:
++ case PHY_TYPE_PCIE:
++ ret = nvmem_cell_read_variable_le_u32(dev, "intr", &instance->efuse_intr);
++ if (ret) {
++ dev_err(dev, "fail to get u3 intr efuse, %d\n", ret);
++ break;
++ }
++
++ ret = nvmem_cell_read_variable_le_u32(dev, "rx_imp", &instance->efuse_rx_imp);
++ if (ret) {
++ dev_err(dev, "fail to get u3 rx_imp efuse, %d\n", ret);
++ break;
++ }
++
++ ret = nvmem_cell_read_variable_le_u32(dev, "tx_imp", &instance->efuse_tx_imp);
++ if (ret) {
++ dev_err(dev, "fail to get u3 tx_imp efuse, %d\n", ret);
++ break;
++ }
++
++ /* no efuse, ignore it */
++ if (!instance->efuse_intr &&
++ !instance->efuse_rx_imp &&
++ !instance->efuse_tx_imp) {
++ dev_warn(dev, "no u3 intr efuse, but dts enable it\n");
++ instance->efuse_sw_en = 0;
++ break;
++ }
++
++ dev_dbg(dev, "u3 efuse - intr %x, rx_imp %x, tx_imp %x\n",
++ instance->efuse_intr, instance->efuse_rx_imp,instance->efuse_tx_imp);
++ break;
++ default:
++ dev_err(dev, "no sw efuse for type %d\n", instance->type);
++ ret = -EINVAL;
++ }
++
++ return ret;
++}
++
++static void phy_efuse_set(struct mtk_phy_instance *instance)
++{
++ struct device *dev = &instance->phy->dev;
++ struct u2phy_banks *u2_banks = &instance->u2_banks;
++ struct u3phy_banks *u3_banks = &instance->u3_banks;
++ u32 tmp;
++
++ if (!instance->efuse_sw_en)
++ return;
++
++ switch (instance->type) {
++ case PHY_TYPE_USB2:
++ tmp = readl(u2_banks->misc + U3P_MISC_REG1);
++ tmp |= MR1_EFUSE_AUTO_LOAD_DIS;
++ writel(tmp, u2_banks->misc + U3P_MISC_REG1);
++
++ tmp = readl(u2_banks->com + U3P_USBPHYACR1);
++ tmp &= ~PA1_RG_INTR_CAL;
++ tmp |= PA1_RG_INTR_CAL_VAL(instance->efuse_intr);
++ writel(tmp, u2_banks->com + U3P_USBPHYACR1);
++ break;
++ case PHY_TYPE_USB3:
++ case PHY_TYPE_PCIE:
++ tmp = readl(u3_banks->phyd + U3P_U3_PHYD_RSV);
++ tmp |= P3D_RG_EFUSE_AUTO_LOAD_DIS;
++ writel(tmp, u3_banks->phyd + U3P_U3_PHYD_RSV);
++
++ tmp = readl(u3_banks->phyd + U3P_U3_PHYD_IMPCAL0);
++ tmp &= ~P3D_RG_TX_IMPEL;
++ tmp |= P3D_RG_TX_IMPEL_VAL(instance->efuse_tx_imp);
++ tmp |= P3D_RG_FORCE_TX_IMPEL;
++ writel(tmp, u3_banks->phyd + U3P_U3_PHYD_IMPCAL0);
++
++ tmp = readl(u3_banks->phyd + U3P_U3_PHYD_IMPCAL1);
++ tmp &= ~P3D_RG_RX_IMPEL;
++ tmp |= P3D_RG_RX_IMPEL_VAL(instance->efuse_rx_imp);
++ tmp |= P3D_RG_FORCE_RX_IMPEL;
++ writel(tmp, u3_banks->phyd + U3P_U3_PHYD_IMPCAL1);
++
++ tmp = readl(u3_banks->phya + U3P_U3_PHYA_REG0);
++ tmp &= ~P3A_RG_IEXT_INTR;
++ tmp |= P3A_RG_IEXT_INTR_VAL(instance->efuse_intr);
++ writel(tmp, u3_banks->phya + U3P_U3_PHYA_REG0);
++ break;
++ default:
++ dev_warn(dev, "no sw efuse for type %d\n", instance->type);
++ break;
++ }
++}
++
+ static int mtk_phy_init(struct phy *phy)
+ {
+ struct mtk_phy_instance *instance = phy_get_drvdata(phy);
+@@ -1050,6 +1203,8 @@ static int mtk_phy_init(struct phy *phy)
+ if (ret)
+ return ret;
+
++ phy_efuse_set(instance);
++
+ switch (instance->type) {
+ case PHY_TYPE_USB2:
+ u2_phy_instance_init(tphy, instance);
+@@ -1134,6 +1289,7 @@ static struct phy *mtk_phy_xlate(struct device *dev,
+ struct mtk_phy_instance *instance = NULL;
+ struct device_node *phy_np = args->np;
+ int index;
++ int ret;
+
+ if (args->args_count != 1) {
+ dev_err(dev, "invalid number of cells in 'phy' property\n");
+@@ -1174,6 +1330,10 @@ static struct phy *mtk_phy_xlate(struct device *dev,
+ return ERR_PTR(-EINVAL);
+ }
+
++ ret = phy_efuse_get(tphy, instance);
++ if (ret)
++ return ERR_PTR(ret);
++
+ phy_parse_property(tphy, instance);
+ phy_type_set(instance);
+
+@@ -1196,10 +1356,12 @@ static const struct mtk_phy_pdata tphy_v1_pdata = {
+
+ static const struct mtk_phy_pdata tphy_v2_pdata = {
+ .avoid_rx_sen_degradation = false,
++ .sw_efuse_supported = true,
+ .version = MTK_PHY_V2,
+ };
+
+ static const struct mtk_phy_pdata tphy_v3_pdata = {
++ .sw_efuse_supported = true,
+ .version = MTK_PHY_V3,
+ };
+
+@@ -1210,6 +1372,7 @@ static const struct mtk_phy_pdata mt8173_pdata = {
+
+ static const struct mtk_phy_pdata mt8195_pdata = {
+ .sw_pll_48m_to_26m = true,
++ .sw_efuse_supported = true,
+ .version = MTK_PHY_V3,
+ };
+
+diff --git a/drivers/phy/microchip/sparx5_serdes.c b/drivers/phy/microchip/sparx5_serdes.c
+index 4076580fc2cd9..ab1b0986aa671 100644
+--- a/drivers/phy/microchip/sparx5_serdes.c
++++ b/drivers/phy/microchip/sparx5_serdes.c
+@@ -2475,10 +2475,10 @@ static int sparx5_serdes_probe(struct platform_device *pdev)
+ return -EINVAL;
+ }
+ iomem = devm_ioremap(priv->dev, iores->start, resource_size(iores));
+- if (IS_ERR(iomem)) {
++ if (!iomem) {
+ dev_err(priv->dev, "Unable to get serdes registers: %s\n",
+ iores->name);
+- return PTR_ERR(iomem);
++ return -ENOMEM;
+ }
+ for (idx = 0; idx < ARRAY_SIZE(sparx5_serdes_iomap); idx++) {
+ struct sparx5_serdes_io_resource *iomap = &sparx5_serdes_iomap[idx];
+diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c
+index 5172971f4c360..3cd4d51c247c3 100644
+--- a/drivers/phy/motorola/phy-mapphone-mdm6600.c
++++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c
+@@ -629,7 +629,8 @@ idle:
+ cleanup:
+ if (error < 0)
+ phy_mdm6600_device_power_off(ddata);
+-
++ pm_runtime_disable(ddata->dev);
++ pm_runtime_dont_use_autosuspend(ddata->dev);
+ return error;
+ }
+
+diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c
+index c2cb93b4df71c..4525d3fd903a4 100644
+--- a/drivers/phy/phy-can-transceiver.c
++++ b/drivers/phy/phy-can-transceiver.c
+@@ -87,6 +87,7 @@ static int can_transceiver_phy_probe(struct platform_device *pdev)
+ struct gpio_desc *standby_gpio;
+ struct gpio_desc *enable_gpio;
+ u32 max_bitrate = 0;
++ int err;
+
+ can_transceiver_phy = devm_kzalloc(dev, sizeof(struct can_transceiver_phy), GFP_KERNEL);
+ if (!can_transceiver_phy)
+@@ -102,8 +103,8 @@ static int can_transceiver_phy_probe(struct platform_device *pdev)
+ return PTR_ERR(phy);
+ }
+
+- device_property_read_u32(dev, "max-bitrate", &max_bitrate);
+- if (!max_bitrate)
++ err = device_property_read_u32(dev, "max-bitrate", &max_bitrate);
++ if ((err != -EINVAL) && !max_bitrate)
+ dev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit\n");
+ phy->attrs.max_link_rate = max_bitrate;
+
+diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c
+index 288c9c67aa748..929e86d6558e0 100644
+--- a/drivers/phy/phy-core-mipi-dphy.c
++++ b/drivers/phy/phy-core-mipi-dphy.c
+@@ -36,7 +36,7 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock,
+
+ cfg->clk_miss = 0;
+ cfg->clk_post = 60000 + 52 * ui;
+- cfg->clk_pre = 8000;
++ cfg->clk_pre = 8;
+ cfg->clk_prepare = 38000;
+ cfg->clk_settle = 95000;
+ cfg->clk_term_en = 0;
+@@ -64,10 +64,10 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock,
+ cfg->hs_trail = max(4 * 8 * ui, 60000 + 4 * 4 * ui);
+
+ cfg->init = 100;
+- cfg->lpx = 60000;
++ cfg->lpx = 50000;
+ cfg->ta_get = 5 * cfg->lpx;
+ cfg->ta_go = 4 * cfg->lpx;
+- cfg->ta_sure = 2 * cfg->lpx;
++ cfg->ta_sure = cfg->lpx;
+ cfg->wakeup = 1000;
+
+ cfg->hs_clk_rate = hs_clk_rate;
+@@ -97,7 +97,7 @@ int phy_mipi_dphy_config_validate(struct phy_configure_opts_mipi_dphy *cfg)
+ if (cfg->clk_post < (60000 + 52 * ui))
+ return -EINVAL;
+
+- if (cfg->clk_pre < 8000)
++ if (cfg->clk_pre < 8)
+ return -EINVAL;
+
+ if (cfg->clk_prepare < 38000 || cfg->clk_prepare > 95000)
+diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c
+index f14032170b1c1..eef863108bfe2 100644
+--- a/drivers/phy/qualcomm/phy-qcom-qmp.c
++++ b/drivers/phy/qualcomm/phy-qcom-qmp.c
+@@ -2919,6 +2919,7 @@ struct qcom_qmp {
+ struct regulator_bulk_data *vregs;
+
+ struct qmp_phy **phys;
++ struct qmp_phy *usb_phy;
+
+ struct mutex phy_mutex;
+ int init_count;
+@@ -3417,8 +3418,8 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
+
+ .clk_list = qmp_v3_phy_clk_l,
+ .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l),
+- .reset_list = sc7180_usb3phy_reset_l,
+- .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l),
++ .reset_list = msm8996_usb3phy_reset_l,
++ .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l),
+ .vreg_list = qmp_phy_vreg_l,
+ .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l),
+ .regs = qmp_v3_usb3phy_regs_layout,
+@@ -3632,7 +3633,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
+ .nlanes = 1,
+
+ .serdes_tbl = sc8180x_qmp_pcie_serdes_tbl,
+- .serdes_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
++ .serdes_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl),
+ .tx_tbl = sc8180x_qmp_pcie_tx_tbl,
+ .tx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl),
+ .rx_tbl = sc8180x_qmp_pcie_rx_tbl,
+@@ -3805,8 +3806,8 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
+ .serdes_tbl_hbr3 = qmp_v4_dp_serdes_tbl_hbr3,
+ .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_hbr3),
+
+- .clk_list = qmp_v4_phy_clk_l,
+- .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l),
++ .clk_list = qmp_v4_sm8250_usbphy_clk_l,
++ .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
+ .reset_list = msm8996_usb3phy_reset_l,
+ .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l),
+ .vreg_list = qmp_phy_vreg_l,
+@@ -4554,7 +4555,7 @@ static int qcom_qmp_phy_com_init(struct qmp_phy *qphy)
+ struct qcom_qmp *qmp = qphy->qmp;
+ const struct qmp_phy_cfg *cfg = qphy->cfg;
+ void __iomem *serdes = qphy->serdes;
+- void __iomem *pcs = qphy->pcs;
++ struct qmp_phy *usb_phy = qmp->usb_phy;
+ void __iomem *dp_com = qmp->dp_com;
+ int ret, i;
+
+@@ -4620,13 +4621,13 @@ static int qcom_qmp_phy_com_init(struct qmp_phy *qphy)
+ qphy_setbits(serdes, cfg->regs[QPHY_COM_POWER_DOWN_CONTROL],
+ SW_PWRDN);
+ } else {
+- if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
+- qphy_setbits(pcs,
+- cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+- cfg->pwrdn_ctrl);
++ if (usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
++ qphy_setbits(usb_phy->pcs,
++ usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
++ usb_phy->cfg->pwrdn_ctrl);
+ else
+- qphy_setbits(pcs, QPHY_POWER_DOWN_CONTROL,
+- cfg->pwrdn_ctrl);
++ qphy_setbits(usb_phy->pcs, QPHY_POWER_DOWN_CONTROL,
++ usb_phy->cfg->pwrdn_ctrl);
+ }
+
+ mutex_unlock(&qmp->phy_mutex);
+@@ -4802,7 +4803,7 @@ static int qcom_qmp_phy_power_on(struct phy *phy)
+
+ ret = reset_control_deassert(qmp->ufs_reset);
+ if (ret)
+- goto err_lane_rst;
++ goto err_pcs_ready;
+
+ qcom_qmp_phy_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl,
+ cfg->pcs_misc_tbl_num);
+@@ -4984,7 +4985,7 @@ static void qcom_qmp_phy_disable_autonomous_mode(struct qmp_phy *qphy)
+ static int __maybe_unused qcom_qmp_phy_runtime_suspend(struct device *dev)
+ {
+ struct qcom_qmp *qmp = dev_get_drvdata(dev);
+- struct qmp_phy *qphy = qmp->phys[0];
++ struct qmp_phy *qphy = qmp->usb_phy;
+ const struct qmp_phy_cfg *cfg = qphy->cfg;
+
+ dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode);
+@@ -5009,7 +5010,7 @@ static int __maybe_unused qcom_qmp_phy_runtime_suspend(struct device *dev)
+ static int __maybe_unused qcom_qmp_phy_runtime_resume(struct device *dev)
+ {
+ struct qcom_qmp *qmp = dev_get_drvdata(dev);
+- struct qmp_phy *qphy = qmp->phys[0];
++ struct qmp_phy *qphy = qmp->usb_phy;
+ const struct qmp_phy_cfg *cfg = qphy->cfg;
+ int ret = 0;
+
+@@ -5382,6 +5383,26 @@ static const struct phy_ops qcom_qmp_pcie_ufs_ops = {
+ .owner = THIS_MODULE,
+ };
+
++static void qcom_qmp_reset_control_put(void *data)
++{
++ reset_control_put(data);
++}
++
++static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np,
++ int index, bool exclusive)
++{
++ struct resource res;
++
++ if (!exclusive) {
++ if (of_address_to_resource(np, index, &res))
++ return IOMEM_ERR_PTR(-EINVAL);
++
++ return devm_ioremap(dev, res.start, resource_size(&res));
++ }
++
++ return devm_of_iomap(dev, np, index, NULL);
++}
++
+ static
+ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id,
+ void __iomem *serdes, const struct qmp_phy_cfg *cfg)
+@@ -5391,8 +5412,18 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id,
+ struct qmp_phy *qphy;
+ const struct phy_ops *ops;
+ char prop_name[MAX_PROP_NAME];
++ bool exclusive = true;
+ int ret;
+
++ /*
++ * FIXME: These bindings should be fixed to not rely on overlapping
++ * mappings for PCS.
++ */
++ if (of_device_is_compatible(dev->of_node, "qcom,sdx65-qmp-usb3-uni-phy"))
++ exclusive = false;
++ if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy"))
++ exclusive = false;
++
+ qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
+ if (!qphy)
+ return -ENOMEM;
+@@ -5405,17 +5436,17 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id,
+ * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
+ * For single lane PHYs: pcs_misc (optional) -> 3.
+ */
+- qphy->tx = of_iomap(np, 0);
+- if (!qphy->tx)
+- return -ENOMEM;
++ qphy->tx = devm_of_iomap(dev, np, 0, NULL);
++ if (IS_ERR(qphy->tx))
++ return PTR_ERR(qphy->tx);
+
+- qphy->rx = of_iomap(np, 1);
+- if (!qphy->rx)
+- return -ENOMEM;
++ qphy->rx = devm_of_iomap(dev, np, 1, NULL);
++ if (IS_ERR(qphy->rx))
++ return PTR_ERR(qphy->rx);
+
+- qphy->pcs = of_iomap(np, 2);
+- if (!qphy->pcs)
+- return -ENOMEM;
++ qphy->pcs = qmp_usb_iomap(dev, np, 2, exclusive);
++ if (IS_ERR(qphy->pcs))
++ return PTR_ERR(qphy->pcs);
+
+ /*
+ * If this is a dual-lane PHY, then there should be registers for the
+@@ -5424,9 +5455,9 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id,
+ * offset from the first lane.
+ */
+ if (cfg->is_dual_lane_phy) {
+- qphy->tx2 = of_iomap(np, 3);
+- qphy->rx2 = of_iomap(np, 4);
+- if (!qphy->tx2 || !qphy->rx2) {
++ qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
++ qphy->rx2 = devm_of_iomap(dev, np, 4, NULL);
++ if (IS_ERR(qphy->tx2) || IS_ERR(qphy->rx2)) {
+ dev_warn(dev,
+ "Underspecified device tree, falling back to legacy register regions\n");
+
+@@ -5436,15 +5467,17 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id,
+ qphy->rx2 = qphy->rx + QMP_PHY_LEGACY_LANE_STRIDE;
+
+ } else {
+- qphy->pcs_misc = of_iomap(np, 5);
++ qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
+ }
+
+ } else {
+- qphy->pcs_misc = of_iomap(np, 3);
++ qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
+ }
+
+- if (!qphy->pcs_misc)
++ if (IS_ERR(qphy->pcs_misc)) {
+ dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
++ qphy->pcs_misc = NULL;
++ }
+
+ /*
+ * Get PHY's Pipe clock, if any. USB3 and PCIe are PIPE3
+@@ -5454,7 +5487,7 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id,
+ * all phys that don't need this.
+ */
+ snprintf(prop_name, sizeof(prop_name), "pipe%d", id);
+- qphy->pipe_clk = of_clk_get_by_name(np, prop_name);
++ qphy->pipe_clk = devm_get_clk_from_child(dev, np, prop_name);
+ if (IS_ERR(qphy->pipe_clk)) {
+ if (cfg->type == PHY_TYPE_PCIE ||
+ cfg->type == PHY_TYPE_USB3) {
+@@ -5476,6 +5509,10 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id,
+ dev_err(dev, "failed to get lane%d reset\n", id);
+ return PTR_ERR(qphy->lane_rst);
+ }
++ ret = devm_add_action_or_reset(dev, qcom_qmp_reset_control_put,
++ qphy->lane_rst);
++ if (ret)
++ return ret;
+ }
+
+ if (cfg->type == PHY_TYPE_UFS || cfg->type == PHY_TYPE_PCIE)
+@@ -5731,7 +5768,9 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ pm_runtime_set_active(dev);
+- pm_runtime_enable(dev);
++ ret = devm_pm_runtime_enable(dev);
++ if (ret)
++ return ret;
+ /*
+ * Prevent runtime pm from being ON by default. Users can enable
+ * it using power/control in sysfs.
+@@ -5756,6 +5795,9 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev)
+ goto err_node_put;
+ }
+
++ if (cfg->type != PHY_TYPE_DP)
++ qmp->usb_phy = qmp->phys[id];
++
+ /*
+ * Register the pipe clock provided by phy.
+ * See function description to see details of this pipe clock.
+@@ -5778,16 +5820,16 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev)
+ id++;
+ }
+
++ if (!qmp->usb_phy)
++ return -EINVAL;
++
+ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+ if (!IS_ERR(phy_provider))
+ dev_info(dev, "Registered Qcom-QMP phy\n");
+- else
+- pm_runtime_disable(dev);
+
+ return PTR_ERR_OR_ZERO(phy_provider);
+
+ err_node_put:
+- pm_runtime_disable(dev);
+ of_node_put(child);
+ return ret;
+ }
+diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c
+index 3c1d3b71c825b..f1d97fbd13318 100644
+--- a/drivers/phy/qualcomm/phy-qcom-qusb2.c
++++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c
+@@ -561,7 +561,7 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy)
+ {
+ struct device *dev = &qphy->phy->dev;
+ const struct qusb2_phy_cfg *cfg = qphy->cfg;
+- u8 *val;
++ u8 *val, hstx_trim;
+
+ /* efuse register is optional */
+ if (!qphy->cell)
+@@ -575,7 +575,13 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy)
+ * set while configuring the phy.
+ */
+ val = nvmem_cell_read(qphy->cell, NULL);
+- if (IS_ERR(val) || !val[0]) {
++ if (IS_ERR(val)) {
++ dev_dbg(dev, "failed to read a valid hs-tx trim value\n");
++ return;
++ }
++ hstx_trim = val[0];
++ kfree(val);
++ if (!hstx_trim) {
+ dev_dbg(dev, "failed to read a valid hs-tx trim value\n");
+ return;
+ }
+@@ -583,12 +589,10 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy)
+ /* Fused TUNE1/2 value is the higher nibble only */
+ if (cfg->update_tune1_with_efuse)
+ qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1],
+- val[0] << HSTX_TRIM_SHIFT,
+- HSTX_TRIM_MASK);
++ hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK);
+ else
+ qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE2],
+- val[0] << HSTX_TRIM_SHIFT,
+- HSTX_TRIM_MASK);
++ hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK);
+ }
+
+ static int qusb2_phy_set_mode(struct phy *phy,
+diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
+index ae4bac024c7b1..173d166ed8295 100644
+--- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
++++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
+@@ -33,7 +33,7 @@
+
+ #define USB2_PHY_USB_PHY_HS_PHY_CTRL_COMMON0 (0x54)
+ #define RETENABLEN BIT(3)
+-#define FSEL_MASK GENMASK(7, 5)
++#define FSEL_MASK GENMASK(6, 4)
+ #define FSEL_DEFAULT (0x3 << 4)
+
+ #define USB2_PHY_USB_PHY_HS_PHY_CTRL_COMMON1 (0x58)
+@@ -68,23 +68,27 @@ static const char * const qcom_snps_hsphy_vreg_names[] = {
+ /**
+ * struct qcom_snps_hsphy - snps hs phy attributes
+ *
++ * @dev: device structure
++ *
+ * @phy: generic phy
+ * @base: iomapped memory space for snps hs phy
+ *
+- * @cfg_ahb_clk: AHB2PHY interface clock
+- * @ref_clk: phy reference clock
+- * @iface_clk: phy interface clock
++ * @num_clks: number of clocks
++ * @clks: array of clocks
+ * @phy_reset: phy reset control
+ * @vregs: regulator supplies bulk data
+ * @phy_initialized: if PHY has been initialized correctly
+ * @mode: contains the current mode the PHY is in
++ * @update_seq_cfg: tuning parameters for phy init
+ */
+ struct qcom_snps_hsphy {
++ struct device *dev;
++
+ struct phy *phy;
+ void __iomem *base;
+
+- struct clk *cfg_ahb_clk;
+- struct clk *ref_clk;
++ int num_clks;
++ struct clk_bulk_data *clks;
+ struct reset_control *phy_reset;
+ struct regulator_bulk_data vregs[SNPS_HS_NUM_VREGS];
+
+@@ -92,6 +96,34 @@ struct qcom_snps_hsphy {
+ enum phy_mode mode;
+ };
+
++static int qcom_snps_hsphy_clk_init(struct qcom_snps_hsphy *hsphy)
++{
++ struct device *dev = hsphy->dev;
++
++ hsphy->num_clks = 2;
++ hsphy->clks = devm_kcalloc(dev, hsphy->num_clks, sizeof(*hsphy->clks), GFP_KERNEL);
++ if (!hsphy->clks)
++ return -ENOMEM;
++
++ /*
++ * TODO: Currently no device tree instantiation of the PHY is using the clock.
++ * This needs to be fixed in order for this code to be able to use devm_clk_bulk_get().
++ */
++ hsphy->clks[0].id = "cfg_ahb";
++ hsphy->clks[0].clk = devm_clk_get_optional(dev, "cfg_ahb");
++ if (IS_ERR(hsphy->clks[0].clk))
++ return dev_err_probe(dev, PTR_ERR(hsphy->clks[0].clk),
++ "failed to get cfg_ahb clk\n");
++
++ hsphy->clks[1].id = "ref";
++ hsphy->clks[1].clk = devm_clk_get(dev, "ref");
++ if (IS_ERR(hsphy->clks[1].clk))
++ return dev_err_probe(dev, PTR_ERR(hsphy->clks[1].clk),
++ "failed to get ref clk\n");
++
++ return 0;
++}
++
+ static inline void qcom_snps_hsphy_write_mask(void __iomem *base, u32 offset,
+ u32 mask, u32 val)
+ {
+@@ -122,22 +154,13 @@ static int qcom_snps_hsphy_suspend(struct qcom_snps_hsphy *hsphy)
+ 0, USB2_AUTO_RESUME);
+ }
+
+- clk_disable_unprepare(hsphy->cfg_ahb_clk);
+ return 0;
+ }
+
+ static int qcom_snps_hsphy_resume(struct qcom_snps_hsphy *hsphy)
+ {
+- int ret;
+-
+ dev_dbg(&hsphy->phy->dev, "Resume QCOM SNPS PHY, mode\n");
+
+- ret = clk_prepare_enable(hsphy->cfg_ahb_clk);
+- if (ret) {
+- dev_err(&hsphy->phy->dev, "failed to enable cfg ahb clock\n");
+- return ret;
+- }
+-
+ return 0;
+ }
+
+@@ -148,8 +171,7 @@ static int __maybe_unused qcom_snps_hsphy_runtime_suspend(struct device *dev)
+ if (!hsphy->phy_initialized)
+ return 0;
+
+- qcom_snps_hsphy_suspend(hsphy);
+- return 0;
++ return qcom_snps_hsphy_suspend(hsphy);
+ }
+
+ static int __maybe_unused qcom_snps_hsphy_runtime_resume(struct device *dev)
+@@ -159,8 +181,7 @@ static int __maybe_unused qcom_snps_hsphy_runtime_resume(struct device *dev)
+ if (!hsphy->phy_initialized)
+ return 0;
+
+- qcom_snps_hsphy_resume(hsphy);
+- return 0;
++ return qcom_snps_hsphy_resume(hsphy);
+ }
+
+ static int qcom_snps_hsphy_set_mode(struct phy *phy, enum phy_mode mode,
+@@ -183,16 +204,16 @@ static int qcom_snps_hsphy_init(struct phy *phy)
+ if (ret)
+ return ret;
+
+- ret = clk_prepare_enable(hsphy->cfg_ahb_clk);
++ ret = clk_bulk_prepare_enable(hsphy->num_clks, hsphy->clks);
+ if (ret) {
+- dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret);
++ dev_err(&phy->dev, "failed to enable clocks, %d\n", ret);
+ goto poweroff_phy;
+ }
+
+ ret = reset_control_assert(hsphy->phy_reset);
+ if (ret) {
+ dev_err(&phy->dev, "failed to assert phy_reset, %d\n", ret);
+- goto disable_ahb_clk;
++ goto disable_clks;
+ }
+
+ usleep_range(100, 150);
+@@ -200,7 +221,7 @@ static int qcom_snps_hsphy_init(struct phy *phy)
+ ret = reset_control_deassert(hsphy->phy_reset);
+ if (ret) {
+ dev_err(&phy->dev, "failed to de-assert phy_reset, %d\n", ret);
+- goto disable_ahb_clk;
++ goto disable_clks;
+ }
+
+ qcom_snps_hsphy_write_mask(hsphy->base, USB2_PHY_USB_PHY_CFG0,
+@@ -246,8 +267,8 @@ static int qcom_snps_hsphy_init(struct phy *phy)
+
+ return 0;
+
+-disable_ahb_clk:
+- clk_disable_unprepare(hsphy->cfg_ahb_clk);
++disable_clks:
++ clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks);
+ poweroff_phy:
+ regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs);
+
+@@ -259,7 +280,7 @@ static int qcom_snps_hsphy_exit(struct phy *phy)
+ struct qcom_snps_hsphy *hsphy = phy_get_drvdata(phy);
+
+ reset_control_assert(hsphy->phy_reset);
+- clk_disable_unprepare(hsphy->cfg_ahb_clk);
++ clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks);
+ regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs);
+ hsphy->phy_initialized = false;
+
+@@ -299,17 +320,15 @@ static int qcom_snps_hsphy_probe(struct platform_device *pdev)
+ if (!hsphy)
+ return -ENOMEM;
+
++ hsphy->dev = dev;
++
+ hsphy->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(hsphy->base))
+ return PTR_ERR(hsphy->base);
+
+- hsphy->ref_clk = devm_clk_get(dev, "ref");
+- if (IS_ERR(hsphy->ref_clk)) {
+- ret = PTR_ERR(hsphy->ref_clk);
+- if (ret != -EPROBE_DEFER)
+- dev_err(dev, "failed to get ref clk, %d\n", ret);
+- return ret;
+- }
++ ret = qcom_snps_hsphy_clk_init(hsphy);
++ if (ret)
++ return dev_err_probe(dev, ret, "failed to initialize clocks\n");
+
+ hsphy->phy_reset = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+ if (IS_ERR(hsphy->phy_reset)) {
+@@ -322,12 +341,9 @@ static int qcom_snps_hsphy_probe(struct platform_device *pdev)
+ hsphy->vregs[i].supply = qcom_snps_hsphy_vreg_names[i];
+
+ ret = devm_regulator_bulk_get(dev, num, hsphy->vregs);
+- if (ret) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(dev, "failed to get regulator supplies: %d\n",
+- ret);
+- return ret;
+- }
++ if (ret)
++ return dev_err_probe(dev, ret,
++ "failed to get regulator supplies\n");
+
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hsic.c b/drivers/phy/qualcomm/phy-qcom-usb-hsic.c
+index 04d18d52f700d..d4741c2dbbb56 100644
+--- a/drivers/phy/qualcomm/phy-qcom-usb-hsic.c
++++ b/drivers/phy/qualcomm/phy-qcom-usb-hsic.c
+@@ -54,8 +54,10 @@ static int qcom_usb_hsic_phy_power_on(struct phy *phy)
+
+ /* Configure pins for HSIC functionality */
+ pins_default = pinctrl_lookup_state(uphy->pctl, PINCTRL_STATE_DEFAULT);
+- if (IS_ERR(pins_default))
+- return PTR_ERR(pins_default);
++ if (IS_ERR(pins_default)) {
++ ret = PTR_ERR(pins_default);
++ goto err_ulpi;
++ }
+
+ ret = pinctrl_select_state(uphy->pctl, pins_default);
+ if (ret)
+diff --git a/drivers/phy/ralink/phy-mt7621-pci.c b/drivers/phy/ralink/phy-mt7621-pci.c
+index 5e6530f545b5c..85888ab2d307a 100644
+--- a/drivers/phy/ralink/phy-mt7621-pci.c
++++ b/drivers/phy/ralink/phy-mt7621-pci.c
+@@ -280,7 +280,8 @@ static struct phy *mt7621_pcie_phy_of_xlate(struct device *dev,
+ }
+
+ static const struct soc_device_attribute mt7621_pci_quirks_match[] = {
+- { .soc_id = "mt7621", .revision = "E2" }
++ { .soc_id = "mt7621", .revision = "E2" },
++ { /* sentinel */ }
+ };
+
+ static const struct regmap_config mt7621_pci_phy_regmap_config = {
+diff --git a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
+index 347dc79a18c18..630e01b5c19b9 100644
+--- a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
++++ b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
+@@ -5,6 +5,7 @@
+ * Author: Wyon Bi <bivvy.bi@rock-chips.com>
+ */
+
++#include <linux/bits.h>
+ #include <linux/kernel.h>
+ #include <linux/clk.h>
+ #include <linux/iopoll.h>
+@@ -364,7 +365,7 @@ static void inno_dsidphy_mipi_mode_enable(struct inno_dsidphy *inno)
+ * The value of counter for HS Tclk-pre
+ * Tclk-pre = Tpin_txbyteclkhs * value
+ */
+- clk_pre = DIV_ROUND_UP(cfg->clk_pre, t_txbyteclkhs);
++ clk_pre = DIV_ROUND_UP(cfg->clk_pre, BITS_PER_BYTE);
+
+ /*
+ * The value of counter for HS Tlpx Time
+diff --git a/drivers/phy/rockchip/phy-rockchip-inno-hdmi.c b/drivers/phy/rockchip/phy-rockchip-inno-hdmi.c
+index 80acca4e9e146..2556caf475c0c 100644
+--- a/drivers/phy/rockchip/phy-rockchip-inno-hdmi.c
++++ b/drivers/phy/rockchip/phy-rockchip-inno-hdmi.c
+@@ -745,10 +745,12 @@ unsigned long inno_hdmi_phy_rk3328_clk_recalc_rate(struct clk_hw *hw,
+ do_div(vco, (nd * (no_a == 1 ? no_b : no_a) * no_d * 2));
+ }
+
+- inno->pixclock = vco;
+- dev_dbg(inno->dev, "%s rate %lu\n", __func__, inno->pixclock);
++ inno->pixclock = DIV_ROUND_CLOSEST((unsigned long)vco, 1000) * 1000;
+
+- return vco;
++ dev_dbg(inno->dev, "%s rate %lu vco %llu\n",
++ __func__, inno->pixclock, vco);
++
++ return inno->pixclock;
+ }
+
+ static long inno_hdmi_phy_rk3328_clk_round_rate(struct clk_hw *hw,
+@@ -790,8 +792,8 @@ static int inno_hdmi_phy_rk3328_clk_set_rate(struct clk_hw *hw,
+ RK3328_PRE_PLL_POWER_DOWN);
+
+ /* Configure pre-pll */
+- inno_update_bits(inno, 0xa0, RK3228_PCLK_VCO_DIV_5_MASK,
+- RK3228_PCLK_VCO_DIV_5(cfg->vco_div_5_en));
++ inno_update_bits(inno, 0xa0, RK3328_PCLK_VCO_DIV_5_MASK,
++ RK3328_PCLK_VCO_DIV_5(cfg->vco_div_5_en));
+ inno_write(inno, 0xa1, RK3328_PRE_PLL_PRE_DIV(cfg->prediv));
+
+ val = RK3328_SPREAD_SPECTRUM_MOD_DISABLE;
+@@ -1021,9 +1023,10 @@ inno_hdmi_phy_rk3328_power_on(struct inno_hdmi_phy *inno,
+
+ inno_write(inno, 0xac, RK3328_POST_PLL_FB_DIV_7_0(cfg->fbdiv));
+ if (cfg->postdiv == 1) {
+- inno_write(inno, 0xaa, RK3328_POST_PLL_REFCLK_SEL_TMDS);
+ inno_write(inno, 0xab, RK3328_POST_PLL_FB_DIV_8(cfg->fbdiv) |
+ RK3328_POST_PLL_PRE_DIV(cfg->prediv));
++ inno_write(inno, 0xaa, RK3328_POST_PLL_REFCLK_SEL_TMDS |
++ RK3328_POST_PLL_POWER_DOWN);
+ } else {
+ v = (cfg->postdiv / 2) - 1;
+ v &= RK3328_POST_PLL_POST_DIV_MASK;
+@@ -1031,7 +1034,8 @@ inno_hdmi_phy_rk3328_power_on(struct inno_hdmi_phy *inno,
+ inno_write(inno, 0xab, RK3328_POST_PLL_FB_DIV_8(cfg->fbdiv) |
+ RK3328_POST_PLL_PRE_DIV(cfg->prediv));
+ inno_write(inno, 0xaa, RK3328_POST_PLL_POST_DIV_ENABLE |
+- RK3328_POST_PLL_REFCLK_SEL_TMDS);
++ RK3328_POST_PLL_REFCLK_SEL_TMDS |
++ RK3328_POST_PLL_POWER_DOWN);
+ }
+
+ for (v = 0; v < 14; v++)
+diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
+index 4f569d9307b9e..c167b8c5cc860 100644
+--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
++++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c
+@@ -467,8 +467,10 @@ static int rockchip_usb2phy_power_on(struct phy *phy)
+ return ret;
+
+ ret = property_enable(base, &rport->port_cfg->phy_sus, false);
+- if (ret)
++ if (ret) {
++ clk_disable_unprepare(rphy->clk480m);
+ return ret;
++ }
+
+ /* waiting for the utmi_clk to become stable */
+ usleep_range(1500, 2000);
+diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c
+index d2bbdc96a1672..062821410ee4f 100644
+--- a/drivers/phy/rockchip/phy-rockchip-typec.c
++++ b/drivers/phy/rockchip/phy-rockchip-typec.c
+@@ -808,9 +808,8 @@ static int tcphy_get_mode(struct rockchip_typec_phy *tcphy)
+ struct extcon_dev *edev = tcphy->extcon;
+ union extcon_property_value property;
+ unsigned int id;
+- bool ufp, dp;
+ u8 mode;
+- int ret;
++ int ret, ufp, dp;
+
+ if (!edev)
+ return MODE_DFP_USB;
+@@ -821,10 +820,10 @@ static int tcphy_get_mode(struct rockchip_typec_phy *tcphy)
+ mode = MODE_DFP_USB;
+ id = EXTCON_USB_HOST;
+
+- if (ufp) {
++ if (ufp > 0) {
+ mode = MODE_UFP_USB;
+ id = EXTCON_USB;
+- } else if (dp) {
++ } else if (dp > 0) {
+ mode = MODE_DFP_DP;
+ id = EXTCON_DISP_DP;
+
+diff --git a/drivers/phy/samsung/phy-exynos-pcie.c b/drivers/phy/samsung/phy-exynos-pcie.c
+index 578cfe07d07ab..53c9230c29078 100644
+--- a/drivers/phy/samsung/phy-exynos-pcie.c
++++ b/drivers/phy/samsung/phy-exynos-pcie.c
+@@ -51,6 +51,13 @@ static int exynos5433_pcie_phy_init(struct phy *phy)
+ {
+ struct exynos_pcie_phy *ep = phy_get_drvdata(phy);
+
++ regmap_update_bits(ep->pmureg, EXYNOS5433_PMU_PCIE_PHY_OFFSET,
++ BIT(0), 1);
++ regmap_update_bits(ep->fsysreg, PCIE_EXYNOS5433_PHY_GLOBAL_RESET,
++ PCIE_APP_REQ_EXIT_L1_MODE, 0);
++ regmap_update_bits(ep->fsysreg, PCIE_EXYNOS5433_PHY_L1SUB_CM_CON,
++ PCIE_REFCLK_GATING_EN, 0);
++
+ regmap_update_bits(ep->fsysreg, PCIE_EXYNOS5433_PHY_COMMON_RESET,
+ PCIE_PHY_RESET, 1);
+ regmap_update_bits(ep->fsysreg, PCIE_EXYNOS5433_PHY_MAC_RESET,
+@@ -109,20 +116,7 @@ static int exynos5433_pcie_phy_init(struct phy *phy)
+ return 0;
+ }
+
+-static int exynos5433_pcie_phy_power_on(struct phy *phy)
+-{
+- struct exynos_pcie_phy *ep = phy_get_drvdata(phy);
+-
+- regmap_update_bits(ep->pmureg, EXYNOS5433_PMU_PCIE_PHY_OFFSET,
+- BIT(0), 1);
+- regmap_update_bits(ep->fsysreg, PCIE_EXYNOS5433_PHY_GLOBAL_RESET,
+- PCIE_APP_REQ_EXIT_L1_MODE, 0);
+- regmap_update_bits(ep->fsysreg, PCIE_EXYNOS5433_PHY_L1SUB_CM_CON,
+- PCIE_REFCLK_GATING_EN, 0);
+- return 0;
+-}
+-
+-static int exynos5433_pcie_phy_power_off(struct phy *phy)
++static int exynos5433_pcie_phy_exit(struct phy *phy)
+ {
+ struct exynos_pcie_phy *ep = phy_get_drvdata(phy);
+
+@@ -135,8 +129,7 @@ static int exynos5433_pcie_phy_power_off(struct phy *phy)
+
+ static const struct phy_ops exynos5433_phy_ops = {
+ .init = exynos5433_pcie_phy_init,
+- .power_on = exynos5433_pcie_phy_power_on,
+- .power_off = exynos5433_pcie_phy_power_off,
++ .exit = exynos5433_pcie_phy_exit,
+ .owner = THIS_MODULE,
+ };
+
+diff --git a/drivers/phy/samsung/phy-exynos5250-sata.c b/drivers/phy/samsung/phy-exynos5250-sata.c
+index 9ec234243f7c6..595adba5fb8f1 100644
+--- a/drivers/phy/samsung/phy-exynos5250-sata.c
++++ b/drivers/phy/samsung/phy-exynos5250-sata.c
+@@ -187,6 +187,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
+ return -EINVAL;
+
+ sata_phy->client = of_find_i2c_device_by_node(node);
++ of_node_put(node);
+ if (!sata_phy->client)
+ return -EPROBE_DEFER;
+
+@@ -195,20 +196,21 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
+ sata_phy->phyclk = devm_clk_get(dev, "sata_phyctrl");
+ if (IS_ERR(sata_phy->phyclk)) {
+ dev_err(dev, "failed to get clk for PHY\n");
+- return PTR_ERR(sata_phy->phyclk);
++ ret = PTR_ERR(sata_phy->phyclk);
++ goto put_dev;
+ }
+
+ ret = clk_prepare_enable(sata_phy->phyclk);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable source clk\n");
+- return ret;
++ goto put_dev;
+ }
+
+ sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops);
+ if (IS_ERR(sata_phy->phy)) {
+- clk_disable_unprepare(sata_phy->phyclk);
+ dev_err(dev, "failed to create PHY\n");
+- return PTR_ERR(sata_phy->phy);
++ ret = PTR_ERR(sata_phy->phy);
++ goto clk_disable;
+ }
+
+ phy_set_drvdata(sata_phy->phy, sata_phy);
+@@ -216,11 +218,18 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
+ phy_provider = devm_of_phy_provider_register(dev,
+ of_phy_simple_xlate);
+ if (IS_ERR(phy_provider)) {
+- clk_disable_unprepare(sata_phy->phyclk);
+- return PTR_ERR(phy_provider);
++ ret = PTR_ERR(phy_provider);
++ goto clk_disable;
+ }
+
+ return 0;
++
++clk_disable:
++ clk_disable_unprepare(sata_phy->phyclk);
++put_dev:
++ put_device(&sata_phy->client->dev);
++
++ return ret;
+ }
+
+ static const struct of_device_id exynos_sata_phy_of_match[] = {
+diff --git a/drivers/phy/samsung/phy-exynosautov9-ufs.c b/drivers/phy/samsung/phy-exynosautov9-ufs.c
+index 36398a15c2db7..d043dfdb598a2 100644
+--- a/drivers/phy/samsung/phy-exynosautov9-ufs.c
++++ b/drivers/phy/samsung/phy-exynosautov9-ufs.c
+@@ -31,22 +31,22 @@ static const struct samsung_ufs_phy_cfg exynosautov9_pre_init_cfg[] = {
+ PHY_COMN_REG_CFG(0x023, 0xc0, PWR_MODE_ANY),
+ PHY_COMN_REG_CFG(0x023, 0x00, PWR_MODE_ANY),
+
+- PHY_TRSV_REG_CFG(0x042, 0x5d, PWR_MODE_ANY),
+- PHY_TRSV_REG_CFG(0x043, 0x80, PWR_MODE_ANY),
++ PHY_TRSV_REG_CFG_AUTOV9(0x042, 0x5d, PWR_MODE_ANY),
++ PHY_TRSV_REG_CFG_AUTOV9(0x043, 0x80, PWR_MODE_ANY),
+
+ END_UFS_PHY_CFG,
+ };
+
+ /* Calibration for HS mode series A/B */
+ static const struct samsung_ufs_phy_cfg exynosautov9_pre_pwr_hs_cfg[] = {
+- PHY_TRSV_REG_CFG(0x032, 0xbc, PWR_MODE_HS_ANY),
+- PHY_TRSV_REG_CFG(0x03c, 0x7f, PWR_MODE_HS_ANY),
+- PHY_TRSV_REG_CFG(0x048, 0xc0, PWR_MODE_HS_ANY),
++ PHY_TRSV_REG_CFG_AUTOV9(0x032, 0xbc, PWR_MODE_HS_ANY),
++ PHY_TRSV_REG_CFG_AUTOV9(0x03c, 0x7f, PWR_MODE_HS_ANY),
++ PHY_TRSV_REG_CFG_AUTOV9(0x048, 0xc0, PWR_MODE_HS_ANY),
+
+- PHY_TRSV_REG_CFG(0x04a, 0x00, PWR_MODE_HS_G3_SER_B),
+- PHY_TRSV_REG_CFG(0x04b, 0x10, PWR_MODE_HS_G1_SER_B |
+- PWR_MODE_HS_G3_SER_B),
+- PHY_TRSV_REG_CFG(0x04d, 0x63, PWR_MODE_HS_G3_SER_B),
++ PHY_TRSV_REG_CFG_AUTOV9(0x04a, 0x00, PWR_MODE_HS_G3_SER_B),
++ PHY_TRSV_REG_CFG_AUTOV9(0x04b, 0x10, PWR_MODE_HS_G1_SER_B |
++ PWR_MODE_HS_G3_SER_B),
++ PHY_TRSV_REG_CFG_AUTOV9(0x04d, 0x63, PWR_MODE_HS_G3_SER_B),
+
+ END_UFS_PHY_CFG,
+ };
+diff --git a/drivers/phy/socionext/phy-uniphier-usb3ss.c b/drivers/phy/socionext/phy-uniphier-usb3ss.c
+index 6700645bcbe6b..3b5ffc16a6947 100644
+--- a/drivers/phy/socionext/phy-uniphier-usb3ss.c
++++ b/drivers/phy/socionext/phy-uniphier-usb3ss.c
+@@ -22,11 +22,13 @@
+ #include <linux/reset.h>
+
+ #define SSPHY_TESTI 0x0
+-#define SSPHY_TESTO 0x4
+ #define TESTI_DAT_MASK GENMASK(13, 6)
+ #define TESTI_ADR_MASK GENMASK(5, 1)
+ #define TESTI_WR_EN BIT(0)
+
++#define SSPHY_TESTO 0x4
++#define TESTO_DAT_MASK GENMASK(7, 0)
++
+ #define PHY_F(regno, msb, lsb) { (regno), (msb), (lsb) }
+
+ #define CDR_CPD_TRIM PHY_F(7, 3, 0) /* RxPLL charge pump current */
+@@ -84,12 +86,12 @@ static void uniphier_u3ssphy_set_param(struct uniphier_u3ssphy_priv *priv,
+ val = FIELD_PREP(TESTI_DAT_MASK, 1);
+ val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no);
+ uniphier_u3ssphy_testio_write(priv, val);
+- val = readl(priv->base + SSPHY_TESTO);
++ val = readl(priv->base + SSPHY_TESTO) & TESTO_DAT_MASK;
+
+ /* update value */
+- val &= ~FIELD_PREP(TESTI_DAT_MASK, field_mask);
++ val &= ~field_mask;
+ data = field_mask & (p->value << p->field.lsb);
+- val = FIELD_PREP(TESTI_DAT_MASK, data);
++ val = FIELD_PREP(TESTI_DAT_MASK, data | val);
+ val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no);
+ uniphier_u3ssphy_testio_write(priv, val);
+ uniphier_u3ssphy_testio_write(priv, val | TESTI_WR_EN);
+diff --git a/drivers/phy/st/phy-miphy28lp.c b/drivers/phy/st/phy-miphy28lp.c
+index 068160a34f5cc..e30305b77f0d1 100644
+--- a/drivers/phy/st/phy-miphy28lp.c
++++ b/drivers/phy/st/phy-miphy28lp.c
+@@ -9,6 +9,7 @@
+
+ #include <linux/platform_device.h>
+ #include <linux/io.h>
++#include <linux/iopoll.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/of.h>
+@@ -484,19 +485,11 @@ static inline void miphy28lp_pcie_config_gen(struct miphy28lp_phy *miphy_phy)
+
+ static inline int miphy28lp_wait_compensation(struct miphy28lp_phy *miphy_phy)
+ {
+- unsigned long finish = jiffies + 5 * HZ;
+ u8 val;
+
+ /* Waiting for Compensation to complete */
+- do {
+- val = readb_relaxed(miphy_phy->base + MIPHY_COMP_FSM_6);
+-
+- if (time_after_eq(jiffies, finish))
+- return -EBUSY;
+- cpu_relax();
+- } while (!(val & COMP_DONE));
+-
+- return 0;
++ return readb_relaxed_poll_timeout(miphy_phy->base + MIPHY_COMP_FSM_6,
++ val, val & COMP_DONE, 1, 5 * USEC_PER_SEC);
+ }
+
+
+@@ -805,7 +798,6 @@ static inline void miphy28lp_configure_usb3(struct miphy28lp_phy *miphy_phy)
+
+ static inline int miphy_is_ready(struct miphy28lp_phy *miphy_phy)
+ {
+- unsigned long finish = jiffies + 5 * HZ;
+ u8 mask = HFC_PLL | HFC_RDY;
+ u8 val;
+
+@@ -816,21 +808,14 @@ static inline int miphy_is_ready(struct miphy28lp_phy *miphy_phy)
+ if (miphy_phy->type == PHY_TYPE_SATA)
+ mask |= PHY_RDY;
+
+- do {
+- val = readb_relaxed(miphy_phy->base + MIPHY_STATUS_1);
+- if ((val & mask) != mask)
+- cpu_relax();
+- else
+- return 0;
+- } while (!time_after_eq(jiffies, finish));
+-
+- return -EBUSY;
++ return readb_relaxed_poll_timeout(miphy_phy->base + MIPHY_STATUS_1,
++ val, (val & mask) == mask, 1,
++ 5 * USEC_PER_SEC);
+ }
+
+ static int miphy_osc_is_ready(struct miphy28lp_phy *miphy_phy)
+ {
+ struct miphy28lp_dev *miphy_dev = miphy_phy->phydev;
+- unsigned long finish = jiffies + 5 * HZ;
+ u32 val;
+
+ if (!miphy_phy->osc_rdy)
+@@ -839,17 +824,10 @@ static int miphy_osc_is_ready(struct miphy28lp_phy *miphy_phy)
+ if (!miphy_phy->syscfg_reg[SYSCFG_STATUS])
+ return -EINVAL;
+
+- do {
+- regmap_read(miphy_dev->regmap,
+- miphy_phy->syscfg_reg[SYSCFG_STATUS], &val);
+-
+- if ((val & MIPHY_OSC_RDY) != MIPHY_OSC_RDY)
+- cpu_relax();
+- else
+- return 0;
+- } while (!time_after_eq(jiffies, finish));
+-
+- return -EBUSY;
++ return regmap_read_poll_timeout(miphy_dev->regmap,
++ miphy_phy->syscfg_reg[SYSCFG_STATUS],
++ val, val & MIPHY_OSC_RDY, 1,
++ 5 * USEC_PER_SEC);
+ }
+
+ static int miphy28lp_get_resource_byname(struct device_node *child,
+diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c
+index 937a14fa7448a..27f7e2292cf0b 100644
+--- a/drivers/phy/st/phy-stm32-usbphyc.c
++++ b/drivers/phy/st/phy-stm32-usbphyc.c
+@@ -225,7 +225,7 @@ static int stm32_usbphyc_pll_enable(struct stm32_usbphyc *usbphyc)
+
+ ret = __stm32_usbphyc_pll_disable(usbphyc);
+ if (ret)
+- return ret;
++ goto dec_n_pll_cons;
+ }
+
+ ret = stm32_usbphyc_regulators_enable(usbphyc);
+@@ -279,7 +279,9 @@ static int stm32_usbphyc_phy_init(struct phy *phy)
+ return 0;
+
+ pll_disable:
+- return stm32_usbphyc_pll_disable(usbphyc);
++ stm32_usbphyc_pll_disable(usbphyc);
++
++ return ret;
+ }
+
+ static int stm32_usbphyc_phy_exit(struct phy *phy)
+@@ -530,6 +532,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
+ ret = of_property_read_u32(child, "reg", &index);
+ if (ret || index > usbphyc->nphys) {
+ dev_err(&phy->dev, "invalid reg property: %d\n", ret);
++ if (!ret)
++ ret = -EINVAL;
+ goto put_child;
+ }
+
+diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
+index 963de5913e505..119e2c039225f 100644
+--- a/drivers/phy/tegra/xusb.c
++++ b/drivers/phy/tegra/xusb.c
+@@ -562,6 +562,7 @@ static void tegra_xusb_port_unregister(struct tegra_xusb_port *port)
+ usb_role_switch_unregister(port->usb_role_sw);
+ cancel_work_sync(&port->usb_phy_work);
+ usb_remove_phy(&port->usb_phy);
++ port->usb_phy.dev->driver = NULL;
+ }
+
+ if (port->ops->remove)
+@@ -668,6 +669,9 @@ static int tegra_xusb_setup_usb_role_switch(struct tegra_xusb_port *port)
+ port->dev.driver = devm_kzalloc(&port->dev,
+ sizeof(struct device_driver),
+ GFP_KERNEL);
++ if (!port->dev.driver)
++ return -ENOMEM;
++
+ port->dev.driver->owner = THIS_MODULE;
+
+ port->usb_role_sw = usb_role_switch_register(&port->dev,
+@@ -781,6 +785,7 @@ static int tegra_xusb_add_usb2_port(struct tegra_xusb_padctl *padctl,
+ usb2->base.lane = usb2->base.ops->map(&usb2->base);
+ if (IS_ERR(usb2->base.lane)) {
+ err = PTR_ERR(usb2->base.lane);
++ tegra_xusb_port_unregister(&usb2->base);
+ goto out;
+ }
+
+@@ -847,6 +852,7 @@ static int tegra_xusb_add_ulpi_port(struct tegra_xusb_padctl *padctl,
+ ulpi->base.lane = ulpi->base.ops->map(&ulpi->base);
+ if (IS_ERR(ulpi->base.lane)) {
+ err = PTR_ERR(ulpi->base.lane);
++ tegra_xusb_port_unregister(&ulpi->base);
+ goto out;
+ }
+
+diff --git a/drivers/phy/ti/Kconfig b/drivers/phy/ti/Kconfig
+index 15a3bcf323086..b905902d57508 100644
+--- a/drivers/phy/ti/Kconfig
++++ b/drivers/phy/ti/Kconfig
+@@ -23,7 +23,7 @@ config PHY_DM816X_USB
+
+ config PHY_AM654_SERDES
+ tristate "TI AM654 SERDES support"
+- depends on OF && ARCH_K3 || COMPILE_TEST
++ depends on OF && (ARCH_K3 || COMPILE_TEST)
+ depends on COMMON_CLK
+ select GENERIC_PHY
+ select MULTIPLEXER
+@@ -35,7 +35,7 @@ config PHY_AM654_SERDES
+
+ config PHY_J721E_WIZ
+ tristate "TI J721E WIZ (SERDES Wrapper) support"
+- depends on OF && ARCH_K3 || COMPILE_TEST
++ depends on OF && (ARCH_K3 || COMPILE_TEST)
+ depends on HAS_IOMEM && OF_ADDRESS
+ depends on COMMON_CLK
+ select GENERIC_PHY
+diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c
+index 2ff56ce77b307..21c0088f5ca9e 100644
+--- a/drivers/phy/ti/phy-am654-serdes.c
++++ b/drivers/phy/ti/phy-am654-serdes.c
+@@ -838,7 +838,7 @@ static int serdes_am654_probe(struct platform_device *pdev)
+
+ clk_err:
+ of_clk_del_provider(node);
+-
++ pm_runtime_disable(dev);
+ return ret;
+ }
+
+diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c
+index 5fd2e8a08bfcf..d0ab69750c6b4 100644
+--- a/drivers/phy/ti/phy-gmii-sel.c
++++ b/drivers/phy/ti/phy-gmii-sel.c
+@@ -320,6 +320,8 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv)
+ u64 size;
+
+ offset = of_get_address(dev->of_node, 0, &size, NULL);
++ if (!offset)
++ return -EINVAL;
+ priv->num_ports = size / sizeof(u32);
+ if (!priv->num_ports)
+ return -EINVAL;
+diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c
+index 126f5b8735cc1..8963fbf7aa73b 100644
+--- a/drivers/phy/ti/phy-j721e-wiz.c
++++ b/drivers/phy/ti/phy-j721e-wiz.c
+@@ -233,6 +233,7 @@ static const struct clk_div_table clk_div_table[] = {
+ { .val = 1, .div = 2, },
+ { .val = 2, .div = 4, },
+ { .val = 3, .div = 8, },
++ { /* sentinel */ },
+ };
+
+ static const struct wiz_clk_div_sel clk_div_sel[] = {
+diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c
+index ebceb1520ce88..ca8532a3f1931 100644
+--- a/drivers/phy/ti/phy-omap-usb2.c
++++ b/drivers/phy/ti/phy-omap-usb2.c
+@@ -215,7 +215,7 @@ static int omap_usb2_enable_clocks(struct omap_usb *phy)
+ return 0;
+
+ err1:
+- clk_disable(phy->wkupclk);
++ clk_disable_unprepare(phy->wkupclk);
+
+ err0:
+ return ret;
+diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c
+index f478d8a17115b..9be9535ad7ab7 100644
+--- a/drivers/phy/xilinx/phy-zynqmp.c
++++ b/drivers/phy/xilinx/phy-zynqmp.c
+@@ -134,7 +134,8 @@
+ #define PROT_BUS_WIDTH_10 0x0
+ #define PROT_BUS_WIDTH_20 0x1
+ #define PROT_BUS_WIDTH_40 0x2
+-#define PROT_BUS_WIDTH_SHIFT 2
++#define PROT_BUS_WIDTH_SHIFT(n) ((n) * 2)
++#define PROT_BUS_WIDTH_MASK(n) GENMASK((n) * 2 + 1, (n) * 2)
+
+ /* Number of GT lanes */
+ #define NUM_LANES 4
+@@ -445,12 +446,12 @@ static void xpsgtr_phy_init_sata(struct xpsgtr_phy *gtr_phy)
+ static void xpsgtr_phy_init_sgmii(struct xpsgtr_phy *gtr_phy)
+ {
+ struct xpsgtr_dev *gtr_dev = gtr_phy->dev;
++ u32 mask = PROT_BUS_WIDTH_MASK(gtr_phy->lane);
++ u32 val = PROT_BUS_WIDTH_10 << PROT_BUS_WIDTH_SHIFT(gtr_phy->lane);
+
+ /* Set SGMII protocol TX and RX bus width to 10 bits. */
+- xpsgtr_write(gtr_dev, TX_PROT_BUS_WIDTH,
+- PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT));
+- xpsgtr_write(gtr_dev, RX_PROT_BUS_WIDTH,
+- PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT));
++ xpsgtr_clr_set(gtr_dev, TX_PROT_BUS_WIDTH, mask, val);
++ xpsgtr_clr_set(gtr_dev, RX_PROT_BUS_WIDTH, mask, val);
+
+ xpsgtr_bypass_scrambler_8b10b(gtr_phy);
+ }
+diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
+index a3fa03bcd9a30..54064714d73fb 100644
+--- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
++++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
+@@ -1236,18 +1236,12 @@ FUNC_GROUP_DECL(SALT8, AA12);
+ FUNC_GROUP_DECL(WDTRST4, AA12);
+
+ #define AE12 196
+-SIG_EXPR_LIST_DECL_SEMG(AE12, FWSPIDQ2, FWQSPID, FWSPID,
+- SIG_DESC_SET(SCU438, 4));
+ SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4);
+-PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIDQ2),
+- SIG_EXPR_LIST_PTR(AE12, GPIOY4));
++PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, GPIOY4));
+
+ #define AF12 197
+-SIG_EXPR_LIST_DECL_SEMG(AF12, FWSPIDQ3, FWQSPID, FWSPID,
+- SIG_DESC_SET(SCU438, 5));
+ SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5);
+-PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIDQ3),
+- SIG_EXPR_LIST_PTR(AF12, GPIOY5));
++PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, GPIOY5));
+
+ #define AC12 198
+ SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6));
+@@ -1520,9 +1514,8 @@ SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3));
+ PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7);
+
+ GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4);
+-GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12);
+ GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4);
+-FUNC_DECL_2(FWSPID, FWSPID, FWQSPID);
++FUNC_DECL_1(FWSPID, FWSPID);
+ FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4);
+ FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8);
+ /*
+@@ -1918,7 +1911,6 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = {
+ ASPEED_PINCTRL_GROUP(FSI2),
+ ASPEED_PINCTRL_GROUP(FWSPIABR),
+ ASPEED_PINCTRL_GROUP(FWSPID),
+- ASPEED_PINCTRL_GROUP(FWQSPID),
+ ASPEED_PINCTRL_GROUP(FWSPIWP),
+ ASPEED_PINCTRL_GROUP(GPIT0),
+ ASPEED_PINCTRL_GROUP(GPIT1),
+diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
+index c94e24aadf922..5a12fc7cf91fb 100644
+--- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c
++++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
+@@ -122,7 +122,7 @@ static int aspeed_disable_sig(struct aspeed_pinmux_data *ctx,
+ int ret = 0;
+
+ if (!exprs)
+- return true;
++ return -EINVAL;
+
+ while (*exprs && !ret) {
+ ret = aspeed_sig_expr_disable(ctx, *exprs);
+@@ -236,11 +236,11 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
+ const struct aspeed_sig_expr **funcs;
+ const struct aspeed_sig_expr ***prios;
+
+- pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name);
+-
+ if (!pdesc)
+ return -EINVAL;
+
++ pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name);
++
+ prios = pdesc->prios;
+
+ if (!prios)
+diff --git a/drivers/pinctrl/bcm/Kconfig b/drivers/pinctrl/bcm/Kconfig
+index c9c5efc927311..5973a279e6b8c 100644
+--- a/drivers/pinctrl/bcm/Kconfig
++++ b/drivers/pinctrl/bcm/Kconfig
+@@ -35,6 +35,7 @@ config PINCTRL_BCM63XX
+ select PINCONF
+ select GENERIC_PINCONF
+ select GPIOLIB
++ select REGMAP
+ select GPIO_REGMAP
+
+ config PINCTRL_BCM6318
+diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+index 6e6fefeb21ead..7904ae5406da1 100644
+--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
++++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+@@ -351,6 +351,18 @@ static int bcm2835_gpio_direction_output(struct gpio_chip *chip,
+ return pinctrl_gpio_direction_output(chip->base + offset);
+ }
+
++static int bcm2835_of_gpio_ranges_fallback(struct gpio_chip *gc,
++ struct device_node *np)
++{
++ struct pinctrl_dev *pctldev = of_pinctrl_get(np);
++
++ if (!pctldev)
++ return 0;
++
++ return gpiochip_add_pin_range(gc, pinctrl_dev_get_devname(pctldev), 0, 0,
++ gc->ngpio);
++}
++
+ static const struct gpio_chip bcm2835_gpio_chip = {
+ .label = MODULE_NAME,
+ .owner = THIS_MODULE,
+@@ -365,6 +377,7 @@ static const struct gpio_chip bcm2835_gpio_chip = {
+ .base = -1,
+ .ngpio = BCM2835_NUM_GPIOS,
+ .can_sleep = false,
++ .of_gpio_ranges_fallback = bcm2835_of_gpio_ranges_fallback,
+ };
+
+ static const struct gpio_chip bcm2711_gpio_chip = {
+@@ -381,6 +394,7 @@ static const struct gpio_chip bcm2711_gpio_chip = {
+ .base = -1,
+ .ngpio = BCM2711_NUM_GPIOS,
+ .can_sleep = false,
++ .of_gpio_ranges_fallback = bcm2835_of_gpio_ranges_fallback,
+ };
+
+ static void bcm2835_gpio_irq_handle_bank(struct bcm2835_pinctrl *pc,
+@@ -1243,6 +1257,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev)
+ raw_spin_lock_init(&pc->irq_lock[i]);
+ }
+
++ pc->pctl_desc = *pdata->pctl_desc;
++ pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc);
++ if (IS_ERR(pc->pctl_dev)) {
++ gpiochip_remove(&pc->gpio_chip);
++ return PTR_ERR(pc->pctl_dev);
++ }
++
++ pc->gpio_range = *pdata->gpio_range;
++ pc->gpio_range.base = pc->gpio_chip.base;
++ pc->gpio_range.gc = &pc->gpio_chip;
++ pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range);
++
+ girq = &pc->gpio_chip.irq;
+ girq->chip = &bcm2835_gpio_irq_chip;
+ girq->parent_handler = bcm2835_gpio_irq_handler;
+@@ -1250,15 +1276,19 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev)
+ girq->parents = devm_kcalloc(dev, BCM2835_NUM_IRQS,
+ sizeof(*girq->parents),
+ GFP_KERNEL);
+- if (!girq->parents)
+- return -ENOMEM;
++ if (!girq->parents) {
++ err = -ENOMEM;
++ goto out_remove;
++ }
+
+ if (is_7211) {
+ pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS,
+ sizeof(*pc->wake_irq),
+ GFP_KERNEL);
+- if (!pc->wake_irq)
+- return -ENOMEM;
++ if (!pc->wake_irq) {
++ err = -ENOMEM;
++ goto out_remove;
++ }
+ }
+
+ /*
+@@ -1286,8 +1316,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev)
+
+ len = strlen(dev_name(pc->dev)) + 16;
+ name = devm_kzalloc(pc->dev, len, GFP_KERNEL);
+- if (!name)
+- return -ENOMEM;
++ if (!name) {
++ err = -ENOMEM;
++ goto out_remove;
++ }
+
+ snprintf(name, len, "%s:bank%d", dev_name(pc->dev), i);
+
+@@ -1306,22 +1338,14 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev)
+ err = gpiochip_add_data(&pc->gpio_chip, pc);
+ if (err) {
+ dev_err(dev, "could not add GPIO chip\n");
+- return err;
+- }
+-
+- pc->pctl_desc = *pdata->pctl_desc;
+- pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc);
+- if (IS_ERR(pc->pctl_dev)) {
+- gpiochip_remove(&pc->gpio_chip);
+- return PTR_ERR(pc->pctl_dev);
++ goto out_remove;
+ }
+
+- pc->gpio_range = *pdata->gpio_range;
+- pc->gpio_range.base = pc->gpio_chip.base;
+- pc->gpio_range.gc = &pc->gpio_chip;
+- pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range);
+-
+ return 0;
++
++out_remove:
++ pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range);
++ return err;
+ }
+
+ static struct platform_driver bcm2835_pinctrl_driver = {
+diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
+index 5082102d7d0d9..ffe39336fcaca 100644
+--- a/drivers/pinctrl/core.c
++++ b/drivers/pinctrl/core.c
+@@ -2100,6 +2100,8 @@ int pinctrl_enable(struct pinctrl_dev *pctldev)
+ if (error) {
+ dev_err(pctldev->dev, "could not claim hogs: %i\n",
+ error);
++ pinctrl_free_pindescs(pctldev, pctldev->desc->pins,
++ pctldev->desc->npins);
+ mutex_destroy(&pctldev->mutex);
+ kfree(pctldev);
+
+diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c
+index 3fb2387147189..eac55fee5281c 100644
+--- a/drivers/pinctrl/devicetree.c
++++ b/drivers/pinctrl/devicetree.c
+@@ -220,6 +220,8 @@ int pinctrl_dt_to_map(struct pinctrl *p, struct pinctrl_dev *pctldev)
+ for (state = 0; ; state++) {
+ /* Retrieve the pinctrl-* property */
+ propname = kasprintf(GFP_KERNEL, "pinctrl-%d", state);
++ if (!propname)
++ return -ENOMEM;
+ prop = of_find_property(np, propname, &size);
+ kfree(propname);
+ if (!prop) {
+diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
+index 980099028cf8a..1b993b33d60f0 100644
+--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
++++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
+@@ -945,11 +945,6 @@ static int chv_config_get(struct pinctrl_dev *pctldev, unsigned int pin,
+
+ break;
+
+- case PIN_CONFIG_DRIVE_OPEN_DRAIN:
+- if (!(ctrl1 & CHV_PADCTRL1_ODEN))
+- return -EINVAL;
+- break;
+-
+ case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: {
+ u32 cfg;
+
+@@ -959,6 +954,16 @@ static int chv_config_get(struct pinctrl_dev *pctldev, unsigned int pin,
+ return -EINVAL;
+
+ break;
++
++ case PIN_CONFIG_DRIVE_PUSH_PULL:
++ if (ctrl1 & CHV_PADCTRL1_ODEN)
++ return -EINVAL;
++ break;
++
++ case PIN_CONFIG_DRIVE_OPEN_DRAIN:
++ if (!(ctrl1 & CHV_PADCTRL1_ODEN))
++ return -EINVAL;
++ break;
+ }
+
+ default:
+@@ -1619,7 +1624,6 @@ static int chv_pinctrl_probe(struct platform_device *pdev)
+ const struct intel_pinctrl_soc_data *soc_data;
+ struct intel_community *community;
+ struct device *dev = &pdev->dev;
+- struct acpi_device *adev = ACPI_COMPANION(dev);
+ struct intel_pinctrl *pctrl;
+ acpi_status status;
+ int ret, irq;
+@@ -1682,7 +1686,7 @@ static int chv_pinctrl_probe(struct platform_device *pdev)
+ if (ret)
+ return ret;
+
+- status = acpi_install_address_space_handler(adev->handle,
++ status = acpi_install_address_space_handler(ACPI_HANDLE(dev),
+ community->acpi_space_id,
+ chv_pinctrl_mmio_access_handler,
+ NULL, pctrl);
+@@ -1699,7 +1703,7 @@ static int chv_pinctrl_remove(struct platform_device *pdev)
+ struct intel_pinctrl *pctrl = platform_get_drvdata(pdev);
+ const struct intel_community *community = &pctrl->communities[0];
+
+- acpi_remove_address_space_handler(ACPI_COMPANION(&pdev->dev),
++ acpi_remove_address_space_handler(ACPI_HANDLE(&pdev->dev),
+ community->acpi_space_id,
+ chv_pinctrl_mmio_access_handler);
+
+diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
+index 85750974d1825..cc64eda155f57 100644
+--- a/drivers/pinctrl/intel/pinctrl-intel.c
++++ b/drivers/pinctrl/intel/pinctrl-intel.c
+@@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input)
+ writel(value, padcfg0);
+ }
+
++static int __intel_gpio_get_gpio_mode(u32 value)
++{
++ return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
++}
++
+ static int intel_gpio_get_gpio_mode(void __iomem *padcfg0)
+ {
+- return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
++ return __intel_gpio_get_gpio_mode(readl(padcfg0));
+ }
+
+ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
+@@ -451,8 +456,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
+ value &= ~PADCFG0_PMODE_MASK;
+ value |= PADCFG0_PMODE_GPIO;
+
+- /* Disable input and output buffers */
+- value |= PADCFG0_GPIORXDIS;
++ /* Disable TX buffer and enable RX (this will be input) */
++ value &= ~PADCFG0_GPIORXDIS;
+ value |= PADCFG0_GPIOTXDIS;
+
+ /* Disable SCI/SMI/NMI generation */
+@@ -497,9 +502,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev,
+
+ intel_gpio_set_gpio_mode(padcfg0);
+
+- /* Disable TX buffer and enable RX (this will be input) */
+- __intel_gpio_set_direction(padcfg0, true);
+-
+ raw_spin_unlock_irqrestore(&pctrl->lock, flags);
+
+ return 0;
+@@ -1115,9 +1117,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned int type)
+
+ intel_gpio_set_gpio_mode(reg);
+
+- /* Disable TX buffer and enable RX (this will be input) */
+- __intel_gpio_set_direction(reg, true);
+-
+ value = readl(reg);
+
+ value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV);
+@@ -1216,6 +1215,39 @@ static irqreturn_t intel_gpio_irq(int irq, void *data)
+ return IRQ_RETVAL(ret);
+ }
+
++static void intel_gpio_irq_init(struct intel_pinctrl *pctrl)
++{
++ int i;
++
++ for (i = 0; i < pctrl->ncommunities; i++) {
++ const struct intel_community *community;
++ void __iomem *base;
++ unsigned int gpp;
++
++ community = &pctrl->communities[i];
++ base = community->regs;
++
++ for (gpp = 0; gpp < community->ngpps; gpp++) {
++ /* Mask and clear all interrupts */
++ writel(0, base + community->ie_offset + gpp * 4);
++ writel(0xffff, base + community->is_offset + gpp * 4);
++ }
++ }
++}
++
++static int intel_gpio_irq_init_hw(struct gpio_chip *gc)
++{
++ struct intel_pinctrl *pctrl = gpiochip_get_data(gc);
++
++ /*
++ * Make sure the interrupt lines are in a proper state before
++ * further configuration.
++ */
++ intel_gpio_irq_init(pctrl);
++
++ return 0;
++}
++
+ static int intel_gpio_add_community_ranges(struct intel_pinctrl *pctrl,
+ const struct intel_community *community)
+ {
+@@ -1320,6 +1352,7 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq)
+ girq->num_parents = 0;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
++ girq->init_hw = intel_gpio_irq_init_hw;
+
+ ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl);
+ if (ret) {
+@@ -1598,16 +1631,14 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_probe_by_uid);
+
+ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_device *pdev)
+ {
++ const struct intel_pinctrl_soc_data * const *table;
+ const struct intel_pinctrl_soc_data *data = NULL;
+- const struct intel_pinctrl_soc_data **table;
+- struct acpi_device *adev;
+- unsigned int i;
+
+- adev = ACPI_COMPANION(&pdev->dev);
+- if (adev) {
+- const void *match = device_get_match_data(&pdev->dev);
++ table = device_get_match_data(&pdev->dev);
++ if (table) {
++ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
++ unsigned int i;
+
+- table = (const struct intel_pinctrl_soc_data **)match;
+ for (i = 0; table[i]; i++) {
+ if (!strcmp(adev->pnp.unique_id, table[i]->uid)) {
+ data = table[i];
+@@ -1621,7 +1652,7 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_
+ if (!id)
+ return ERR_PTR(-ENODEV);
+
+- table = (const struct intel_pinctrl_soc_data **)id->driver_data;
++ table = (const struct intel_pinctrl_soc_data * const *)id->driver_data;
+ data = table[pdev->id];
+ }
+
+@@ -1630,9 +1661,16 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_
+ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data);
+
+ #ifdef CONFIG_PM_SLEEP
++static bool __intel_gpio_is_direct_irq(u32 value)
++{
++ return (value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) &&
++ (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO);
++}
++
+ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin)
+ {
+ const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin);
++ u32 value;
+
+ if (!pd || !intel_pad_usable(pctrl, pin))
+ return false;
+@@ -1647,6 +1685,24 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int
+ gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin)))
+ return true;
+
++ /*
++ * The firmware on some systems may configure GPIO pins to be
++ * an interrupt source in so called "direct IRQ" mode. In such
++ * cases the GPIO controller driver has no idea if those pins
++ * are being used or not. At the same time, there is a known bug
++ * in the firmwares that don't restore the pin settings correctly
++ * after suspend, i.e. by an unknown reason the Rx value becomes
++ * inverted.
++ *
++ * Hence, let's save and restore the pins that are configured
++ * as GPIOs in the input mode with GPIROUTIOXAPIC bit set.
++ *
++ * See https://bugzilla.kernel.org/show_bug.cgi?id=214749.
++ */
++ value = readl(intel_get_padcfg(pctrl, pin, PADCFG0));
++ if (__intel_gpio_is_direct_irq(value))
++ return true;
++
+ return false;
+ }
+
+@@ -1695,26 +1751,6 @@ int intel_pinctrl_suspend_noirq(struct device *dev)
+ }
+ EXPORT_SYMBOL_GPL(intel_pinctrl_suspend_noirq);
+
+-static void intel_gpio_irq_init(struct intel_pinctrl *pctrl)
+-{
+- size_t i;
+-
+- for (i = 0; i < pctrl->ncommunities; i++) {
+- const struct intel_community *community;
+- void __iomem *base;
+- unsigned int gpp;
+-
+- community = &pctrl->communities[i];
+- base = community->regs;
+-
+- for (gpp = 0; gpp < community->ngpps; gpp++) {
+- /* Mask and clear all interrupts */
+- writel(0, base + community->ie_offset + gpp * 4);
+- writel(0xffff, base + community->is_offset + gpp * 4);
+- }
+- }
+-}
+-
+ static bool intel_gpio_update_reg(void __iomem *reg, u32 mask, u32 value)
+ {
+ u32 curr, updated;
+@@ -1794,7 +1830,12 @@ int intel_pinctrl_resume_noirq(struct device *dev)
+ for (i = 0; i < pctrl->soc->npins; i++) {
+ const struct pinctrl_pin_desc *desc = &pctrl->soc->pins[i];
+
+- if (!intel_pinctrl_should_save(pctrl, desc->number))
++ if (!(intel_pinctrl_should_save(pctrl, desc->number) ||
++ /*
++ * If the firmware mangled the register contents too much,
++ * check the saved value for the Direct IRQ mode.
++ */
++ __intel_gpio_is_direct_irq(pads[i].padcfg0)))
+ continue;
+
+ intel_restore_padcfg(pctrl, desc->number, PADCFG0, pads[i].padcfg0);
+diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c
+index 0bcd19597e4ad..3ddaeffc04150 100644
+--- a/drivers/pinctrl/intel/pinctrl-tigerlake.c
++++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c
+@@ -749,7 +749,6 @@ static const struct acpi_device_id tgl_pinctrl_acpi_match[] = {
+ { "INT34C5", (kernel_ulong_t)&tgllp_soc_data },
+ { "INT34C6", (kernel_ulong_t)&tglh_soc_data },
+ { "INTC1055", (kernel_ulong_t)&tgllp_soc_data },
+- { "INTC1057", (kernel_ulong_t)&tgllp_soc_data },
+ { }
+ };
+ MODULE_DEVICE_TABLE(acpi, tgl_pinctrl_acpi_match);
+diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig
+index 7040a7a7bd5d1..8a1706c8bb6ec 100644
+--- a/drivers/pinctrl/mediatek/Kconfig
++++ b/drivers/pinctrl/mediatek/Kconfig
+@@ -30,6 +30,7 @@ config PINCTRL_MTK_MOORE
+ select GENERIC_PINMUX_FUNCTIONS
+ select GPIOLIB
+ select OF_GPIO
++ select EINT_MTK
+ select PINCTRL_MTK_V2
+
+ config PINCTRL_MTK_PARIS
+@@ -151,6 +152,7 @@ config PINCTRL_MT8195
+ bool "Mediatek MT8195 pin control"
+ depends on OF
+ depends on ARM64 || COMPILE_TEST
++ default ARM64 && ARCH_MEDIATEK
+ select PINCTRL_MTK_PARIS
+
+ config PINCTRL_MT8365
+diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c
+index f7b54a5517641..c24583bffa99d 100644
+--- a/drivers/pinctrl/mediatek/mtk-eint.c
++++ b/drivers/pinctrl/mediatek/mtk-eint.c
+@@ -287,12 +287,15 @@ static struct irq_chip mtk_eint_irq_chip = {
+
+ static unsigned int mtk_eint_hw_init(struct mtk_eint *eint)
+ {
+- void __iomem *reg = eint->base + eint->regs->dom_en;
++ void __iomem *dom_en = eint->base + eint->regs->dom_en;
++ void __iomem *mask_set = eint->base + eint->regs->mask_set;
+ unsigned int i;
+
+ for (i = 0; i < eint->hw->ap_num; i += 32) {
+- writel(0xffffffff, reg);
+- reg += 4;
++ writel(0xffffffff, dom_en);
++ writel(0xffffffff, mask_set);
++ dom_en += 4;
++ mask_set += 4;
+ }
+
+ return 0;
+diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c
+index a7500e18bb1de..c32884fc7de79 100644
+--- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c
++++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c
+@@ -659,7 +659,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = {
+ PIN_FIELD_BASE(10, 10, 4, 0x010, 0x10, 9, 3),
+ PIN_FIELD_BASE(11, 11, 4, 0x000, 0x10, 24, 3),
+ PIN_FIELD_BASE(12, 12, 4, 0x010, 0x10, 12, 3),
+- PIN_FIELD_BASE(13, 13, 4, 0x010, 0x10, 27, 3),
++ PIN_FIELD_BASE(13, 13, 4, 0x000, 0x10, 27, 3),
+ PIN_FIELD_BASE(14, 14, 4, 0x010, 0x10, 15, 3),
+ PIN_FIELD_BASE(15, 15, 4, 0x010, 0x10, 0, 3),
+ PIN_FIELD_BASE(16, 16, 4, 0x010, 0x10, 18, 3),
+@@ -708,7 +708,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = {
+ PIN_FIELD_BASE(78, 78, 3, 0x000, 0x10, 15, 3),
+ PIN_FIELD_BASE(79, 79, 3, 0x000, 0x10, 18, 3),
+ PIN_FIELD_BASE(80, 80, 3, 0x000, 0x10, 21, 3),
+- PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 28, 3),
++ PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 24, 3),
+ PIN_FIELD_BASE(82, 82, 3, 0x000, 0x10, 27, 3),
+ PIN_FIELD_BASE(83, 83, 3, 0x010, 0x10, 0, 3),
+ PIN_FIELD_BASE(84, 84, 3, 0x010, 0x10, 3, 3),
+diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8365.c b/drivers/pinctrl/mediatek/pinctrl-mt8365.c
+index 79b1fee5a1eba..ddee0db72d264 100644
+--- a/drivers/pinctrl/mediatek/pinctrl-mt8365.c
++++ b/drivers/pinctrl/mediatek/pinctrl-mt8365.c
+@@ -259,7 +259,7 @@ static const struct mtk_pin_ies_smt_set mt8365_ies_set[] = {
+ MTK_PIN_IES_SMT_SPEC(104, 104, 0x420, 13),
+ MTK_PIN_IES_SMT_SPEC(105, 109, 0x420, 14),
+ MTK_PIN_IES_SMT_SPEC(110, 113, 0x420, 15),
+- MTK_PIN_IES_SMT_SPEC(114, 112, 0x420, 16),
++ MTK_PIN_IES_SMT_SPEC(114, 116, 0x420, 16),
+ MTK_PIN_IES_SMT_SPEC(117, 119, 0x420, 17),
+ MTK_PIN_IES_SMT_SPEC(120, 122, 0x420, 18),
+ MTK_PIN_IES_SMT_SPEC(123, 125, 0x420, 19),
+diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
+index 45ebdeba985ae..12163d3c4bcb0 100644
+--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
++++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
+@@ -285,8 +285,12 @@ static int mtk_xt_get_gpio_n(void *data, unsigned long eint_n,
+ desc = (const struct mtk_pin_desc *)hw->soc->pins;
+ *gpio_chip = &hw->chip;
+
+- /* Be greedy to guess first gpio_n is equal to eint_n */
+- if (desc[eint_n].eint.eint_n == eint_n)
++ /*
++ * Be greedy to guess first gpio_n is equal to eint_n.
++ * Only eint virtual eint number is greater than gpio number.
++ */
++ if (hw->soc->npins > eint_n &&
++ desc[eint_n].eint.eint_n == eint_n)
+ *gpio_n = eint_n;
+ else
+ *gpio_n = mtk_xt_find_eint_num(hw, eint_n);
+diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+index 5f7c421ab6e76..334cb85855a93 100644
+--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
++++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+@@ -1038,6 +1038,7 @@ int mtk_pctrl_init(struct platform_device *pdev,
+ node = of_parse_phandle(np, "mediatek,pctl-regmap", 0);
+ if (node) {
+ pctl->regmap1 = syscon_node_to_regmap(node);
++ of_node_put(node);
+ if (IS_ERR(pctl->regmap1))
+ return PTR_ERR(pctl->regmap1);
+ } else if (regmap) {
+@@ -1051,6 +1052,7 @@ int mtk_pctrl_init(struct platform_device *pdev,
+ node = of_parse_phandle(np, "mediatek,pctl-regmap", 1);
+ if (node) {
+ pctl->regmap2 = syscon_node_to_regmap(node);
++ of_node_put(node);
+ if (IS_ERR(pctl->regmap2))
+ return PTR_ERR(pctl->regmap2);
+ }
+diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c
+index 85db2e4377f0c..0fa1c36148c23 100644
+--- a/drivers/pinctrl/mediatek/pinctrl-paris.c
++++ b/drivers/pinctrl/mediatek/pinctrl-paris.c
+@@ -96,20 +96,16 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev,
+ err = hw->soc->bias_get_combo(hw, desc, &pullup, &ret);
+ if (err)
+ goto out;
++ if (ret == MTK_PUPD_SET_R1R0_00)
++ ret = MTK_DISABLE;
+ if (param == PIN_CONFIG_BIAS_DISABLE) {
+- if (ret == MTK_PUPD_SET_R1R0_00)
+- ret = MTK_DISABLE;
++ if (ret != MTK_DISABLE)
++ err = -EINVAL;
+ } else if (param == PIN_CONFIG_BIAS_PULL_UP) {
+- /* When desire to get pull-up value, return
+- * error if current setting is pull-down
+- */
+- if (!pullup)
++ if (!pullup || ret == MTK_DISABLE)
+ err = -EINVAL;
+ } else if (param == PIN_CONFIG_BIAS_PULL_DOWN) {
+- /* When desire to get pull-down value, return
+- * error if current setting is pull-up
+- */
+- if (pullup)
++ if (pullup || ret == MTK_DISABLE)
+ err = -EINVAL;
+ }
+ } else {
+@@ -188,8 +184,7 @@ out:
+ }
+
+ static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+- enum pin_config_param param,
+- enum pin_config_param arg)
++ enum pin_config_param param, u32 arg)
+ {
+ struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev);
+ const struct mtk_pin_desc *desc;
+@@ -577,14 +572,17 @@ static int mtk_hw_get_value_wrap(struct mtk_pinctrl *hw, unsigned int gpio, int
+ mtk_hw_get_value_wrap(hw, gpio, PINCTRL_PIN_REG_DRV)
+
+ ssize_t mtk_pctrl_show_one_pin(struct mtk_pinctrl *hw,
+- unsigned int gpio, char *buf, unsigned int bufLen)
++ unsigned int gpio, char *buf, unsigned int buf_len)
+ {
+- int pinmux, pullup, pullen, len = 0, r1 = -1, r0 = -1;
++ int pinmux, pullup = 0, pullen = 0, len = 0, r1 = -1, r0 = -1;
+ const struct mtk_pin_desc *desc;
+
+ if (gpio >= hw->soc->npins)
+ return -EINVAL;
+
++ if (mtk_is_virt_gpio(hw, gpio))
++ return -EINVAL;
++
+ desc = (const struct mtk_pin_desc *)&hw->soc->pins[gpio];
+ pinmux = mtk_pctrl_get_pinmux(hw, gpio);
+ if (pinmux >= hw->soc->nfuncs)
+@@ -610,7 +608,7 @@ ssize_t mtk_pctrl_show_one_pin(struct mtk_pinctrl *hw,
+ } else if (pullen != MTK_DISABLE && pullen != MTK_ENABLE) {
+ pullen = 0;
+ }
+- len += scnprintf(buf + len, bufLen - len,
++ len += scnprintf(buf + len, buf_len - len,
+ "%03d: %1d%1d%1d%1d%02d%1d%1d%1d%1d",
+ gpio,
+ pinmux,
+@@ -624,10 +622,10 @@ ssize_t mtk_pctrl_show_one_pin(struct mtk_pinctrl *hw,
+ pullup);
+
+ if (r1 != -1) {
+- len += scnprintf(buf + len, bufLen - len, " (%1d %1d)\n",
++ len += scnprintf(buf + len, buf_len - len, " (%1d %1d)\n",
+ r1, r0);
+ } else {
+- len += scnprintf(buf + len, bufLen - len, "\n");
++ len += scnprintf(buf + len, buf_len - len, "\n");
+ }
+
+ return len;
+@@ -639,7 +637,7 @@ static void mtk_pctrl_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s,
+ unsigned int gpio)
+ {
+ struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev);
+- char buf[PIN_DBG_BUF_SZ];
++ char buf[PIN_DBG_BUF_SZ] = { 0 };
+
+ (void)mtk_pctrl_show_one_pin(hw, gpio, buf, PIN_DBG_BUF_SZ);
+
+@@ -719,10 +717,10 @@ static int mtk_pconf_group_get(struct pinctrl_dev *pctldev, unsigned group,
+ unsigned long *config)
+ {
+ struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev);
++ struct mtk_pinctrl_group *grp = &hw->groups[group];
+
+- *config = hw->groups[group].config;
+-
+- return 0;
++ /* One pin per group only */
++ return mtk_pinconf_get(pctldev, grp->pin, config);
+ }
+
+ static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group,
+@@ -738,8 +736,6 @@ static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group,
+ pinconf_to_config_argument(configs[i]));
+ if (ret < 0)
+ return ret;
+-
+- grp->config = configs[i];
+ }
+
+ return 0;
+diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c
+index 7bfecdfba1779..d249a035c2b9b 100644
+--- a/drivers/pinctrl/meson/pinctrl-meson-axg.c
++++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c
+@@ -400,6 +400,7 @@ static struct meson_pmx_group meson_axg_periphs_groups[] = {
+ GPIO_GROUP(GPIOA_15),
+ GPIO_GROUP(GPIOA_16),
+ GPIO_GROUP(GPIOA_17),
++ GPIO_GROUP(GPIOA_18),
+ GPIO_GROUP(GPIOA_19),
+ GPIO_GROUP(GPIOA_20),
+
+diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+index 5cb018f988003..7338bc353347e 100644
+--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
++++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+@@ -341,12 +341,12 @@ static int armada_37xx_pmx_set_by_name(struct pinctrl_dev *pctldev,
+ struct armada_37xx_pin_group *grp)
+ {
+ struct armada_37xx_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
++ struct device *dev = info->dev;
+ unsigned int reg = SELECTION;
+ unsigned int mask = grp->reg_mask;
+ int func, val;
+
+- dev_dbg(info->dev, "enable function %s group %s\n",
+- name, grp->name);
++ dev_dbg(dev, "enable function %s group %s\n", name, grp->name);
+
+ func = match_string(grp->funcs, NB_FUNCS, name);
+ if (func < 0)
+@@ -722,25 +722,22 @@ static unsigned int armada_37xx_irq_startup(struct irq_data *d)
+ static int armada_37xx_irqchip_register(struct platform_device *pdev,
+ struct armada_37xx_pinctrl *info)
+ {
+- struct device_node *np = info->dev->of_node;
+ struct gpio_chip *gc = &info->gpio_chip;
+ struct irq_chip *irqchip = &info->irq_chip;
+ struct gpio_irq_chip *girq = &gc->irq;
+ struct device *dev = &pdev->dev;
+- struct resource res;
++ struct device_node *np;
+ int ret = -ENODEV, i, nr_irq_parent;
+
+ /* Check if we have at least one gpio-controller child node */
+- for_each_child_of_node(info->dev->of_node, np) {
++ for_each_child_of_node(dev->of_node, np) {
+ if (of_property_read_bool(np, "gpio-controller")) {
+ ret = 0;
+ break;
+ }
+ }
+- if (ret) {
+- dev_err(dev, "no gpio-controller child node\n");
+- return ret;
+- }
++ if (ret)
++ return dev_err_probe(dev, ret, "no gpio-controller child node\n");
+
+ nr_irq_parent = of_irq_count(np);
+ spin_lock_init(&info->irq_lock);
+@@ -750,12 +747,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev,
+ return 0;
+ }
+
+- if (of_address_to_resource(info->dev->of_node, 1, &res)) {
+- dev_err(dev, "cannot find IO resource\n");
+- return -ENOENT;
+- }
+-
+- info->base = devm_ioremap_resource(info->dev, &res);
++ info->base = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(info->base))
+ return PTR_ERR(info->base);
+
+@@ -774,14 +766,13 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev,
+ * the chained irq with all of them.
+ */
+ girq->num_parents = nr_irq_parent;
+- girq->parents = devm_kcalloc(&pdev->dev, nr_irq_parent,
+- sizeof(*girq->parents), GFP_KERNEL);
++ girq->parents = devm_kcalloc(dev, nr_irq_parent, sizeof(*girq->parents), GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ for (i = 0; i < nr_irq_parent; i++) {
+ int irq = irq_of_parse_and_map(np, i);
+
+- if (irq < 0)
++ if (!irq)
+ continue;
+ girq->parents[i] = irq;
+ }
+@@ -794,11 +785,12 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev,
+ static int armada_37xx_gpiochip_register(struct platform_device *pdev,
+ struct armada_37xx_pinctrl *info)
+ {
++ struct device *dev = &pdev->dev;
+ struct device_node *np;
+ struct gpio_chip *gc;
+ int ret = -ENODEV;
+
+- for_each_child_of_node(info->dev->of_node, np) {
++ for_each_child_of_node(dev->of_node, np) {
+ if (of_find_property(np, "gpio-controller", NULL)) {
+ ret = 0;
+ break;
+@@ -811,19 +803,16 @@ static int armada_37xx_gpiochip_register(struct platform_device *pdev,
+
+ gc = &info->gpio_chip;
+ gc->ngpio = info->data->nr_pins;
+- gc->parent = &pdev->dev;
++ gc->parent = dev;
+ gc->base = -1;
+ gc->of_node = np;
+ gc->label = info->data->name;
+
+ ret = armada_37xx_irqchip_register(pdev, info);
+- if (ret)
+- return ret;
+- ret = devm_gpiochip_add_data(&pdev->dev, gc, info);
+ if (ret)
+ return ret;
+
+- return 0;
++ return devm_gpiochip_add_data(dev, gc, info);
+ }
+
+ /**
+@@ -874,13 +863,13 @@ static int armada_37xx_add_function(struct armada_37xx_pmx_func *funcs,
+ static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info)
+ {
+ int n, num = 0, funcsize = info->data->nr_pins;
++ struct device *dev = info->dev;
+
+ for (n = 0; n < info->ngroups; n++) {
+ struct armada_37xx_pin_group *grp = &info->groups[n];
+ int i, j, f;
+
+- grp->pins = devm_kcalloc(info->dev,
+- grp->npins + grp->extra_npins,
++ grp->pins = devm_kcalloc(dev, grp->npins + grp->extra_npins,
+ sizeof(*grp->pins),
+ GFP_KERNEL);
+ if (!grp->pins)
+@@ -898,8 +887,7 @@ static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info)
+ ret = armada_37xx_add_function(info->funcs, &funcsize,
+ grp->funcs[f]);
+ if (ret == -EOVERFLOW)
+- dev_err(info->dev,
+- "More functions than pins(%d)\n",
++ dev_err(dev, "More functions than pins(%d)\n",
+ info->data->nr_pins);
+ if (ret < 0)
+ continue;
+@@ -925,6 +913,7 @@ static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info)
+ static int armada_37xx_fill_func(struct armada_37xx_pinctrl *info)
+ {
+ struct armada_37xx_pmx_func *funcs = info->funcs;
++ struct device *dev = info->dev;
+ int n;
+
+ for (n = 0; n < info->nfuncs; n++) {
+@@ -932,8 +921,7 @@ static int armada_37xx_fill_func(struct armada_37xx_pinctrl *info)
+ const char **groups;
+ int g;
+
+- funcs[n].groups = devm_kcalloc(info->dev,
+- funcs[n].ngroups,
++ funcs[n].groups = devm_kcalloc(dev, funcs[n].ngroups,
+ sizeof(*(funcs[n].groups)),
+ GFP_KERNEL);
+ if (!funcs[n].groups)
+@@ -962,6 +950,7 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev,
+ const struct armada_37xx_pin_data *pin_data = info->data;
+ struct pinctrl_desc *ctrldesc = &info->pctl;
+ struct pinctrl_pin_desc *pindesc, *pdesc;
++ struct device *dev = &pdev->dev;
+ int pin, ret;
+
+ info->groups = pin_data->groups;
+@@ -973,9 +962,7 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev,
+ ctrldesc->pmxops = &armada_37xx_pmx_ops;
+ ctrldesc->confops = &armada_37xx_pinconf_ops;
+
+- pindesc = devm_kcalloc(&pdev->dev,
+- pin_data->nr_pins, sizeof(*pindesc),
+- GFP_KERNEL);
++ pindesc = devm_kcalloc(dev, pin_data->nr_pins, sizeof(*pindesc), GFP_KERNEL);
+ if (!pindesc)
+ return -ENOMEM;
+
+@@ -994,14 +981,10 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev,
+ * we allocate functions for number of pins and hope there are
+ * fewer unique functions than pins available
+ */
+- info->funcs = devm_kcalloc(&pdev->dev,
+- pin_data->nr_pins,
+- sizeof(struct armada_37xx_pmx_func),
+- GFP_KERNEL);
++ info->funcs = devm_kcalloc(dev, pin_data->nr_pins, sizeof(*info->funcs), GFP_KERNEL);
+ if (!info->funcs)
+ return -ENOMEM;
+
+-
+ ret = armada_37xx_fill_group(info);
+ if (ret)
+ return ret;
+@@ -1010,11 +993,9 @@ static int armada_37xx_pinctrl_register(struct platform_device *pdev,
+ if (ret)
+ return ret;
+
+- info->pctl_dev = devm_pinctrl_register(&pdev->dev, ctrldesc, info);
+- if (IS_ERR(info->pctl_dev)) {
+- dev_err(&pdev->dev, "could not register pinctrl driver\n");
+- return PTR_ERR(info->pctl_dev);
+- }
++ info->pctl_dev = devm_pinctrl_register(dev, ctrldesc, info);
++ if (IS_ERR(info->pctl_dev))
++ return dev_err_probe(dev, PTR_ERR(info->pctl_dev), "could not register pinctrl driver\n");
+
+ return 0;
+ }
+@@ -1135,28 +1116,40 @@ static const struct of_device_id armada_37xx_pinctrl_of_match[] = {
+ { },
+ };
+
++static const struct regmap_config armada_37xx_pinctrl_regmap_config = {
++ .reg_bits = 32,
++ .val_bits = 32,
++ .reg_stride = 4,
++ .use_raw_spinlock = true,
++};
++
+ static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev)
+ {
+ struct armada_37xx_pinctrl *info;
+ struct device *dev = &pdev->dev;
+- struct device_node *np = dev->of_node;
+ struct regmap *regmap;
++ void __iomem *base;
+ int ret;
+
+- info = devm_kzalloc(dev, sizeof(struct armada_37xx_pinctrl),
+- GFP_KERNEL);
+- if (!info)
+- return -ENOMEM;
+-
+- info->dev = dev;
++ base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
++ if (IS_ERR(base)) {
++ dev_err(dev, "failed to ioremap base address: %pe\n", base);
++ return PTR_ERR(base);
++ }
+
+- regmap = syscon_node_to_regmap(np);
++ regmap = devm_regmap_init_mmio(dev, base,
++ &armada_37xx_pinctrl_regmap_config);
+ if (IS_ERR(regmap)) {
+- dev_err(&pdev->dev, "cannot get regmap\n");
++ dev_err(dev, "failed to create regmap: %pe\n", regmap);
+ return PTR_ERR(regmap);
+ }
+- info->regmap = regmap;
+
++ info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
++ if (!info)
++ return -ENOMEM;
++
++ info->dev = dev;
++ info->regmap = regmap;
+ info->data = of_device_get_match_data(dev);
+
+ ret = armada_37xx_pinctrl_register(pdev, info);
+diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+index 39828e9c3120a..6dd930a839ecc 100644
+--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
++++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+@@ -1421,8 +1421,10 @@ static int nmk_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+
+ has_config = nmk_pinctrl_dt_get_config(np, &configs);
+ np_config = of_parse_phandle(np, "ste,config", 0);
+- if (np_config)
++ if (np_config) {
+ has_config |= nmk_pinctrl_dt_get_config(np_config, &configs);
++ of_node_put(np_config);
++ }
+ if (has_config) {
+ const char *gpio_name;
+ const char *pin;
+@@ -1883,8 +1885,10 @@ static int nmk_pinctrl_probe(struct platform_device *pdev)
+ }
+
+ prcm_np = of_parse_phandle(np, "prcm", 0);
+- if (prcm_np)
++ if (prcm_np) {
+ npct->prcm_base = of_iomap(prcm_np, 0);
++ of_node_put(prcm_np);
++ }
+ if (!npct->prcm_base) {
+ if (version == PINCTRL_NMK_STN8815) {
+ dev_info(&pdev->dev,
+diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
+index 4d81908d6725d..e4a0d16b58cc8 100644
+--- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
++++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
+@@ -78,7 +78,6 @@ struct npcm7xx_gpio {
+ struct gpio_chip gc;
+ int irqbase;
+ int irq;
+- void *priv;
+ struct irq_chip irq_chip;
+ u32 pinctrl_id;
+ int (*direction_input)(struct gpio_chip *chip, unsigned offset);
+@@ -105,12 +104,12 @@ static void npcm_gpio_set(struct gpio_chip *gc, void __iomem *reg,
+ unsigned long flags;
+ unsigned long val;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ val = ioread32(reg) | pinmask;
+ iowrite32(val, reg);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void npcm_gpio_clr(struct gpio_chip *gc, void __iomem *reg,
+@@ -119,12 +118,12 @@ static void npcm_gpio_clr(struct gpio_chip *gc, void __iomem *reg,
+ unsigned long flags;
+ unsigned long val;
+
+- spin_lock_irqsave(&gc->bgpio_lock, flags);
++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags);
+
+ val = ioread32(reg) & ~pinmask;
+ iowrite32(val, reg);
+
+- spin_unlock_irqrestore(&gc->bgpio_lock, flags);
++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags);
+ }
+
+ static void npcmgpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+@@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc)
+ chained_irq_enter(chip, desc);
+ sts = ioread32(bank->base + NPCM7XX_GP_N_EVST);
+ en = ioread32(bank->base + NPCM7XX_GP_N_EVEN);
+- dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts,
++ dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts,
+ en);
+
+ sts &= en;
+@@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type)
+ gpiochip_get_data(irq_data_get_irq_chip_data(d));
+ unsigned int gpio = BIT(d->hwirq);
+
+- dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio,
++ dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio,
+ d->irq, type);
+ switch (type) {
+ case IRQ_TYPE_EDGE_RISING:
+- dev_dbg(d->chip->parent_device, "edge.rising\n");
++ dev_dbg(bank->gc.parent, "edge.rising\n");
+ npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
+ npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+- dev_dbg(d->chip->parent_device, "edge.falling\n");
++ dev_dbg(bank->gc.parent, "edge.falling\n");
+ npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
+ npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
+ break;
+ case IRQ_TYPE_EDGE_BOTH:
+- dev_dbg(d->chip->parent_device, "edge.both\n");
++ dev_dbg(bank->gc.parent, "edge.both\n");
+ npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+- dev_dbg(d->chip->parent_device, "level.low\n");
++ dev_dbg(bank->gc.parent, "level.low\n");
+ npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+- dev_dbg(d->chip->parent_device, "level.high\n");
++ dev_dbg(bank->gc.parent, "level.high\n");
+ npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
+ break;
+ default:
+- dev_dbg(d->chip->parent_device, "invalid irq type\n");
++ dev_dbg(bank->gc.parent, "invalid irq type\n");
+ return -EINVAL;
+ }
+
+@@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d)
+ gpiochip_get_data(irq_data_get_irq_chip_data(d));
+ unsigned int gpio = d->hwirq;
+
+- dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq);
++ dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq);
+ iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST);
+ }
+
+@@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d)
+ unsigned int gpio = d->hwirq;
+
+ /* Clear events */
+- dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq);
++ dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq);
+ iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC);
+ }
+
+@@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d)
+ unsigned int gpio = d->hwirq;
+
+ /* Enable events */
+- dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq);
++ dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq);
+ iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS);
+ }
+
+@@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d)
+ unsigned int gpio = d->hwirq;
+
+ /* active-high, input, clear interrupt, enable interrupt */
+- dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq);
++ dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq);
+ npcmgpio_direction_input(gc, gpio);
+ npcmgpio_irq_ack(d);
+ npcmgpio_irq_unmask(d);
+@@ -905,7 +904,7 @@ static struct npcm7xx_func npcm7xx_funcs[] = {
+ #define DRIVE_STRENGTH_HI_SHIFT 12
+ #define DRIVE_STRENGTH_MASK 0x0000FF00
+
+-#define DS(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \
++#define DSTR(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \
+ ((hi) << DRIVE_STRENGTH_HI_SHIFT))
+ #define DSLO(x) (((x) >> DRIVE_STRENGTH_LO_SHIFT) & 0xF)
+ #define DSHI(x) (((x) >> DRIVE_STRENGTH_HI_SHIFT) & 0xF)
+@@ -925,31 +924,31 @@ struct npcm7xx_pincfg {
+ static const struct npcm7xx_pincfg pincfg[] = {
+ /* PIN FUNCTION 1 FUNCTION 2 FUNCTION 3 FLAGS */
+ NPCM7XX_PINCFG(0, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)),
++ NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
+ NPCM7XX_PINCFG(3, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(4, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(5, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(6, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(7, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW),
+- NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)),
++ NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
+ NPCM7XX_PINCFG(12, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(13, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(14, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(15, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW),
+- NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)),
++ NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)),
+ NPCM7XX_PINCFG(20, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(21, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(22, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(23, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)),
++ NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
+ NPCM7XX_PINCFG(26, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(27, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(28, smb4, MFSEL1, 1, none, NONE, 0, none, NONE, 0, 0),
+@@ -965,12 +964,12 @@ static const struct npcm7xx_pincfg pincfg[] = {
+ NPCM7XX_PINCFG(39, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(40, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(41, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DS(2, 4) | GPO),
++ NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DSTR(2, 4) | GPO),
+ NPCM7XX_PINCFG(43, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0),
+ NPCM7XX_PINCFG(44, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0),
+ NPCM7XX_PINCFG(45, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)),
+- NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)),
++ NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)),
++ NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)),
+ NPCM7XX_PINCFG(48, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, GPO),
+ NPCM7XX_PINCFG(49, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(50, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0),
+@@ -980,8 +979,8 @@ static const struct npcm7xx_pincfg pincfg[] = {
+ NPCM7XX_PINCFG(54, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(55, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(56, r1err, MFSEL1, 12, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)),
+- NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)),
++ NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)),
++ NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)),
+ NPCM7XX_PINCFG(59, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(60, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(61, uart1, MFSEL1, 10, none, NONE, 0, none, NONE, 0, GPO),
+@@ -1004,19 +1003,19 @@ static const struct npcm7xx_pincfg pincfg[] = {
+ NPCM7XX_PINCFG(77, fanin13, MFSEL2, 13, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(78, fanin14, MFSEL2, 14, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(79, fanin15, MFSEL2, 15, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
++ NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
+ NPCM7XX_PINCFG(87, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(88, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(89, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(90, r2err, MFSEL1, 15, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)),
+- NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)),
++ NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)),
++ NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)),
+ NPCM7XX_PINCFG(93, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(94, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(95, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0),
+@@ -1062,34 +1061,34 @@ static const struct npcm7xx_pincfg pincfg[] = {
+ NPCM7XX_PINCFG(133, smb10, MFSEL4, 13, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(134, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(135, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
++ NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
+ NPCM7XX_PINCFG(141, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
++ NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
+ NPCM7XX_PINCFG(143, sd1, MFSEL3, 12, sd1pwr, MFSEL4, 5, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
++ NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
+ NPCM7XX_PINCFG(153, mmcwp, FLOCKR1, 24, none, NONE, 0, none, NONE, 0, 0), /* Z1/A1 */
+- NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
++ NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
+ NPCM7XX_PINCFG(155, mmccd, MFSEL3, 25, mmcrst, MFSEL4, 6, none, NONE, 0, 0), /* Z1/A1 */
+- NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+-
+- NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DS(8, 12)),
+- NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DS(8, 12)),
++ NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++
++ NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DSTR(8, 12)),
++ NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DSTR(8, 12)),
+ NPCM7XX_PINCFG(163, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0),
+ NPCM7XX_PINCFG(164, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC),
+ NPCM7XX_PINCFG(165, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC),
+@@ -1102,25 +1101,25 @@ static const struct npcm7xx_pincfg pincfg[] = {
+ NPCM7XX_PINCFG(172, smb6, MFSEL3, 1, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(173, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(174, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
++ NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
+ NPCM7XX_PINCFG(181, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(182, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO),
+- NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO),
+- NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DS(8, 12)),
+- NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DS(2, 4)),
+- NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */
+-
+- NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */
++ NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO),
++ NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO),
++ NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
++ NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DSTR(2, 4)),
++ NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */
++
++ NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */
+ NPCM7XX_PINCFG(193, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(194, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(195, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0),
+@@ -1131,11 +1130,11 @@ static const struct npcm7xx_pincfg pincfg[] = {
+ NPCM7XX_PINCFG(200, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(201, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(202, smb0c, I2CSEGSEL, 1, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DS(8, 12)),
++ NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
+ NPCM7XX_PINCFG(204, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW),
+ NPCM7XX_PINCFG(205, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW),
+- NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)),
+- NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)),
++ NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)),
++ NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)),
+ NPCM7XX_PINCFG(208, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(209, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(210, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0),
+@@ -1147,20 +1146,20 @@ static const struct npcm7xx_pincfg pincfg[] = {
+ NPCM7XX_PINCFG(216, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(217, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(218, wdog1, MFSEL3, 19, none, NONE, 0, none, NONE, 0, 0),
+- NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)),
++ NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)),
+ NPCM7XX_PINCFG(220, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(221, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(222, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0),
+ NPCM7XX_PINCFG(223, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0),
+
+ NPCM7XX_PINCFG(224, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, SLEW),
+- NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO),
+- NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO),
+- NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW),
+- NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DS(8, 12)),
++ NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO),
++ NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO),
++ NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW),
++ NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12)),
+ NPCM7XX_PINCFG(253, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC1 power */
+ NPCM7XX_PINCFG(254, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC2 power */
+ NPCM7XX_PINCFG(255, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* DACOSEL */
+@@ -1561,7 +1560,7 @@ static int npcm7xx_get_groups_count(struct pinctrl_dev *pctldev)
+ {
+ struct npcm7xx_pinctrl *npcm = pinctrl_dev_get_drvdata(pctldev);
+
+- dev_dbg(npcm->dev, "group size: %d\n", ARRAY_SIZE(npcm7xx_groups));
++ dev_dbg(npcm->dev, "group size: %zu\n", ARRAY_SIZE(npcm7xx_groups));
+ return ARRAY_SIZE(npcm7xx_groups);
+ }
+
+diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
+index 22e8d4c4040e1..e6fe1330eab9f 100644
+--- a/drivers/pinctrl/pinconf-generic.c
++++ b/drivers/pinctrl/pinconf-generic.c
+@@ -30,10 +30,10 @@ static const struct pin_config_item conf_items[] = {
+ PCONFDUMP(PIN_CONFIG_BIAS_BUS_HOLD, "input bias bus hold", NULL, false),
+ PCONFDUMP(PIN_CONFIG_BIAS_DISABLE, "input bias disabled", NULL, false),
+ PCONFDUMP(PIN_CONFIG_BIAS_HIGH_IMPEDANCE, "input bias high impedance", NULL, false),
+- PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", NULL, false),
++ PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", "ohms", true),
+ PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
+- "input bias pull to pin specific state", NULL, false),
+- PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false),
++ "input bias pull to pin specific state", "ohms", true),
++ PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", "ohms", true),
+ PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false),
+ PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false),
+ PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false),
+@@ -393,8 +393,10 @@ int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev,
+ for_each_available_child_of_node(np_config, np) {
+ ret = pinconf_generic_dt_subnode_to_map(pctldev, np, map,
+ &reserved_maps, num_maps, type);
+- if (ret < 0)
++ if (ret < 0) {
++ of_node_put(np);
+ goto exit;
++ }
+ }
+ return 0;
+
+diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
+index bae9d429b813e..7e41b4842c1f4 100644
+--- a/drivers/pinctrl/pinctrl-amd.c
++++ b/drivers/pinctrl/pinctrl-amd.c
+@@ -126,6 +126,14 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
+ struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
+
+ raw_spin_lock_irqsave(&gpio_dev->lock, flags);
++
++ /* Use special handling for Pin0 debounce */
++ if (offset == 0) {
++ pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
++ if (pin_reg & INTERNAL_GPIO0_DEBOUNCE)
++ debounce = 0;
++ }
++
+ pin_reg = readl(gpio_dev->base + offset * 4);
+
+ if (debounce) {
+@@ -181,18 +189,6 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
+ return ret;
+ }
+
+-static int amd_gpio_set_config(struct gpio_chip *gc, unsigned offset,
+- unsigned long config)
+-{
+- u32 debounce;
+-
+- if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
+- return -ENOTSUPP;
+-
+- debounce = pinconf_to_config_argument(config);
+- return amd_gpio_set_debounce(gc, offset, debounce);
+-}
+-
+ #ifdef CONFIG_DEBUG_FS
+ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc)
+ {
+@@ -223,6 +219,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc)
+ char debounce_value[40];
+ char *debounce_enable;
+
++ seq_printf(s, "WAKE_INT_MASTER_REG: 0x%08x\n", readl(gpio_dev->base + WAKE_INT_MASTER_REG));
+ for (bank = 0; bank < gpio_dev->hwbank_num; bank++) {
+ seq_printf(s, "GPIO bank%d\t", bank);
+
+@@ -598,14 +595,14 @@ static struct irq_chip amd_gpio_irqchip = {
+
+ #define PIN_IRQ_PENDING (BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
+
+-static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
++static bool do_amd_gpio_irq_handler(int irq, void *dev_id)
+ {
+ struct amd_gpio *gpio_dev = dev_id;
+ struct gpio_chip *gc = &gpio_dev->gc;
+- irqreturn_t ret = IRQ_NONE;
+ unsigned int i, irqnr;
+ unsigned long flags;
+ u32 __iomem *regs;
++ bool ret = false;
+ u32 regval;
+ u64 status, mask;
+
+@@ -627,6 +624,16 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
+ /* Each status bit covers four pins */
+ for (i = 0; i < 4; i++) {
+ regval = readl(regs + i);
++
++ if (regval & PIN_IRQ_PENDING)
++ dev_dbg(&gpio_dev->pdev->dev,
++ "GPIO %d is active: 0x%x",
++ irqnr + i, regval);
++
++ /* caused wake on resume context for shared IRQ */
++ if (irq < 0 && (regval & BIT(WAKE_STS_OFF)))
++ return true;
++
+ if (!(regval & PIN_IRQ_PENDING) ||
+ !(regval & BIT(INTERRUPT_MASK_OFF)))
+ continue;
+@@ -636,23 +643,26 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
+ * We must read the pin register again, in case the
+ * value was changed while executing
+ * generic_handle_domain_irq() above.
+- * If we didn't find a mapping for the interrupt,
+- * disable it in order to avoid a system hang caused
+- * by an interrupt storm.
++ * If the line is not an irq, disable it in order to
++ * avoid a system hang caused by an interrupt storm.
+ */
+ raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+ regval = readl(regs + i);
+- if (irq == 0) {
+- regval &= ~BIT(INTERRUPT_ENABLE_OFF);
++ if (!gpiochip_line_is_irq(gc, irqnr + i)) {
++ regval &= ~BIT(INTERRUPT_MASK_OFF);
+ dev_dbg(&gpio_dev->pdev->dev,
+ "Disabling spurious GPIO IRQ %d\n",
+ irqnr + i);
++ } else {
++ ret = true;
+ }
+ writel(regval, regs + i);
+ raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+- ret = IRQ_HANDLED;
+ }
+ }
++ /* did not cause wake on resume context for shared IRQ */
++ if (irq < 0)
++ return false;
+
+ /* Signal EOI to the GPIO unit */
+ raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+@@ -664,6 +674,16 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
+ return ret;
+ }
+
++static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
++{
++ return IRQ_RETVAL(do_amd_gpio_irq_handler(irq, dev_id));
++}
++
++static bool __maybe_unused amd_gpio_check_wake(void *dev_id)
++{
++ return do_amd_gpio_irq_handler(-1, dev_id);
++}
++
+ static int amd_get_groups_count(struct pinctrl_dev *pctldev)
+ {
+ struct amd_gpio *gpio_dev = pinctrl_dev_get_drvdata(pctldev);
+@@ -732,7 +752,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev,
+ break;
+
+ default:
+- dev_err(&gpio_dev->pdev->dev, "Invalid config param %04x\n",
++ dev_dbg(&gpio_dev->pdev->dev, "Invalid config param %04x\n",
+ param);
+ return -ENOTSUPP;
+ }
+@@ -743,7 +763,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev,
+ }
+
+ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+- unsigned long *configs, unsigned num_configs)
++ unsigned long *configs, unsigned int num_configs)
+ {
+ int i;
+ u32 arg;
+@@ -785,7 +805,7 @@ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+ break;
+
+ default:
+- dev_err(&gpio_dev->pdev->dev,
++ dev_dbg(&gpio_dev->pdev->dev,
+ "Invalid config param %04x\n", param);
+ ret = -ENOTSUPP;
+ }
+@@ -833,6 +853,20 @@ static int amd_pinconf_group_set(struct pinctrl_dev *pctldev,
+ return 0;
+ }
+
++static int amd_gpio_set_config(struct gpio_chip *gc, unsigned int pin,
++ unsigned long config)
++{
++ struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
++
++ if (pinconf_to_config_param(config) == PIN_CONFIG_INPUT_DEBOUNCE) {
++ u32 debounce = pinconf_to_config_argument(config);
++
++ return amd_gpio_set_debounce(gc, pin, debounce);
++ }
++
++ return amd_pinconf_set(gpio_dev->pctrl, pin, &config, 1);
++}
++
+ static const struct pinconf_ops amd_pinconf_ops = {
+ .pin_config_get = amd_pinconf_get,
+ .pin_config_set = amd_pinconf_set,
+@@ -860,9 +894,9 @@ static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+
+ raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+
+- pin_reg = readl(gpio_dev->base + i * 4);
++ pin_reg = readl(gpio_dev->base + pin * 4);
+ pin_reg &= ~mask;
+- writel(pin_reg, gpio_dev->base + i * 4);
++ writel(pin_reg, gpio_dev->base + pin * 4);
+
+ raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+ }
+@@ -891,6 +925,7 @@ static int amd_gpio_suspend(struct device *dev)
+ {
+ struct amd_gpio *gpio_dev = dev_get_drvdata(dev);
+ struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
++ unsigned long flags;
+ int i;
+
+ for (i = 0; i < desc->npins; i++) {
+@@ -899,7 +934,9 @@ static int amd_gpio_suspend(struct device *dev)
+ if (!amd_gpio_should_save(gpio_dev, pin))
+ continue;
+
+- gpio_dev->saved_regs[i] = readl(gpio_dev->base + pin*4);
++ raw_spin_lock_irqsave(&gpio_dev->lock, flags);
++ gpio_dev->saved_regs[i] = readl(gpio_dev->base + pin * 4) & ~PIN_IRQ_PENDING;
++ raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+ }
+
+ return 0;
+@@ -909,6 +946,7 @@ static int amd_gpio_resume(struct device *dev)
+ {
+ struct amd_gpio *gpio_dev = dev_get_drvdata(dev);
+ struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
++ unsigned long flags;
+ int i;
+
+ for (i = 0; i < desc->npins; i++) {
+@@ -917,7 +955,10 @@ static int amd_gpio_resume(struct device *dev)
+ if (!amd_gpio_should_save(gpio_dev, pin))
+ continue;
+
+- writel(gpio_dev->saved_regs[i], gpio_dev->base + pin*4);
++ raw_spin_lock_irqsave(&gpio_dev->lock, flags);
++ gpio_dev->saved_regs[i] |= readl(gpio_dev->base + pin * 4) & PIN_IRQ_PENDING;
++ writel(gpio_dev->saved_regs[i], gpio_dev->base + pin * 4);
++ raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+ }
+
+ return 0;
+@@ -1033,6 +1074,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
+ goto out2;
+
+ platform_set_drvdata(pdev, gpio_dev);
++ acpi_register_wakeup_handler(gpio_dev->irq, amd_gpio_check_wake, gpio_dev);
+
+ dev_dbg(&pdev->dev, "amd gpio driver loaded\n");
+ return ret;
+@@ -1050,6 +1092,7 @@ static int amd_gpio_remove(struct platform_device *pdev)
+ gpio_dev = platform_get_drvdata(pdev);
+
+ gpiochip_remove(&gpio_dev->gc);
++ acpi_unregister_wakeup_handler(amd_gpio_check_wake, gpio_dev);
+
+ return 0;
+ }
+diff --git a/drivers/pinctrl/pinctrl-amd.h b/drivers/pinctrl/pinctrl-amd.h
+index 1d43170736545..04ae23c46152d 100644
+--- a/drivers/pinctrl/pinctrl-amd.h
++++ b/drivers/pinctrl/pinctrl-amd.h
+@@ -17,6 +17,7 @@
+ #define AMD_GPIO_PINS_BANK3 32
+
+ #define WAKE_INT_MASTER_REG 0xfc
++#define INTERNAL_GPIO0_DEBOUNCE (1 << 15)
+ #define EOI_MASK (1 << 29)
+
+ #define WAKE_INT_STATUS_REG0 0x2f8
+diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
+index 03c32b2c5d303..62b9a94c10baa 100644
+--- a/drivers/pinctrl/pinctrl-at91-pio4.c
++++ b/drivers/pinctrl/pinctrl-at91-pio4.c
+@@ -1126,8 +1126,10 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
+
+ pin_desc[i].number = i;
+ /* Pin naming convention: P(bank_name)(bank_pin_number). */
+- pin_desc[i].name = kasprintf(GFP_KERNEL, "P%c%d",
+- bank + 'A', line);
++ pin_desc[i].name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "P%c%d",
++ bank + 'A', line);
++ if (!pin_desc[i].name)
++ return -ENOMEM;
+
+ group->name = group_names[i] = pin_desc[i].name;
+ group->pin = pin_desc[i].number;
+@@ -1184,7 +1186,6 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
+ dev_err(dev, "can't add the irq domain\n");
+ return -ENODEV;
+ }
+- atmel_pioctrl->irq_domain->name = "atmel gpio";
+
+ for (i = 0; i < atmel_pioctrl->npins; i++) {
+ int irq = irq_create_mapping(atmel_pioctrl->irq_domain, i);
+diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
+index 6022496bb6a98..3b0341c730ee0 100644
+--- a/drivers/pinctrl/pinctrl-at91.c
++++ b/drivers/pinctrl/pinctrl-at91.c
+@@ -1891,7 +1891,7 @@ static int at91_gpio_probe(struct platform_device *pdev)
+ }
+
+ for (i = 0; i < chip->ngpio; i++)
+- names[i] = kasprintf(GFP_KERNEL, "pio%c%d", alias_idx + 'A', i);
++ names[i] = devm_kasprintf(&pdev->dev, GFP_KERNEL, "pio%c%d", alias_idx + 'A', i);
+
+ chip->names = (const char *const *)names;
+
+diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c
+index fb713f9c53d0e..3f0143087cc77 100644
+--- a/drivers/pinctrl/pinctrl-equilibrium.c
++++ b/drivers/pinctrl/pinctrl-equilibrium.c
+@@ -675,6 +675,11 @@ static int eqbr_build_functions(struct eqbr_pinctrl_drv_data *drvdata)
+ return ret;
+
+ for (i = 0; i < nr_funcs; i++) {
++
++ /* Ignore the same function with multiple groups */
++ if (funcs[i].name == NULL)
++ continue;
++
+ ret = pinmux_generic_add_function(drvdata->pctl_dev,
+ funcs[i].name,
+ funcs[i].groups,
+@@ -815,7 +820,7 @@ static int pinctrl_reg(struct eqbr_pinctrl_drv_data *drvdata)
+
+ ret = eqbr_build_functions(drvdata);
+ if (ret) {
+- dev_err(dev, "Failed to build groups\n");
++ dev_err(dev, "Failed to build functions\n");
+ return ret;
+ }
+
+diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c
+index 2712f51eb2381..c973123e6de9e 100644
+--- a/drivers/pinctrl/pinctrl-ingenic.c
++++ b/drivers/pinctrl/pinctrl-ingenic.c
+@@ -119,6 +119,8 @@ struct ingenic_chip_info {
+ unsigned int num_functions;
+
+ const u32 *pull_ups, *pull_downs;
++
++ const struct regmap_access_table *access_table;
+ };
+
+ struct ingenic_pinctrl {
+@@ -641,7 +643,7 @@ static u8 jz4755_lcd_24bit_funcs[] = { 1, 1, 1, 1, 0, 0, };
+ static const struct group_desc jz4755_groups[] = {
+ INGENIC_PIN_GROUP("uart0-data", jz4755_uart0_data, 0),
+ INGENIC_PIN_GROUP("uart0-hwflow", jz4755_uart0_hwflow, 0),
+- INGENIC_PIN_GROUP("uart1-data", jz4755_uart1_data, 0),
++ INGENIC_PIN_GROUP("uart1-data", jz4755_uart1_data, 1),
+ INGENIC_PIN_GROUP("uart2-data", jz4755_uart2_data, 1),
+ INGENIC_PIN_GROUP("ssi-dt-b", jz4755_ssi_dt_b, 0),
+ INGENIC_PIN_GROUP("ssi-dt-f", jz4755_ssi_dt_f, 0),
+@@ -695,7 +697,7 @@ static const char *jz4755_ssi_groups[] = {
+ "ssi-ce1-b", "ssi-ce1-f",
+ };
+ static const char *jz4755_mmc0_groups[] = { "mmc0-1bit", "mmc0-4bit", };
+-static const char *jz4755_mmc1_groups[] = { "mmc0-1bit", "mmc0-4bit", };
++static const char *jz4755_mmc1_groups[] = { "mmc1-1bit", "mmc1-4bit", };
+ static const char *jz4755_i2c_groups[] = { "i2c-data", };
+ static const char *jz4755_cim_groups[] = { "cim-data", };
+ static const char *jz4755_lcd_groups[] = {
+@@ -2179,6 +2181,17 @@ static const struct function_desc x1000_functions[] = {
+ { "mac", x1000_mac_groups, ARRAY_SIZE(x1000_mac_groups), },
+ };
+
++static const struct regmap_range x1000_access_ranges[] = {
++ regmap_reg_range(0x000, 0x400 - 4),
++ regmap_reg_range(0x700, 0x800 - 4),
++};
++
++/* shared with X1500 */
++static const struct regmap_access_table x1000_access_table = {
++ .yes_ranges = x1000_access_ranges,
++ .n_yes_ranges = ARRAY_SIZE(x1000_access_ranges),
++};
++
+ static const struct ingenic_chip_info x1000_chip_info = {
+ .num_chips = 4,
+ .reg_offset = 0x100,
+@@ -2189,6 +2202,7 @@ static const struct ingenic_chip_info x1000_chip_info = {
+ .num_functions = ARRAY_SIZE(x1000_functions),
+ .pull_ups = x1000_pull_ups,
+ .pull_downs = x1000_pull_downs,
++ .access_table = &x1000_access_table,
+ };
+
+ static int x1500_uart0_data_pins[] = { 0x4a, 0x4b, };
+@@ -2300,6 +2314,7 @@ static const struct ingenic_chip_info x1500_chip_info = {
+ .num_functions = ARRAY_SIZE(x1500_functions),
+ .pull_ups = x1000_pull_ups,
+ .pull_downs = x1000_pull_downs,
++ .access_table = &x1000_access_table,
+ };
+
+ static const u32 x1830_pull_ups[4] = {
+@@ -2506,6 +2521,16 @@ static const struct function_desc x1830_functions[] = {
+ { "mac", x1830_mac_groups, ARRAY_SIZE(x1830_mac_groups), },
+ };
+
++static const struct regmap_range x1830_access_ranges[] = {
++ regmap_reg_range(0x0000, 0x4000 - 4),
++ regmap_reg_range(0x7000, 0x8000 - 4),
++};
++
++static const struct regmap_access_table x1830_access_table = {
++ .yes_ranges = x1830_access_ranges,
++ .n_yes_ranges = ARRAY_SIZE(x1830_access_ranges),
++};
++
+ static const struct ingenic_chip_info x1830_chip_info = {
+ .num_chips = 4,
+ .reg_offset = 0x1000,
+@@ -2516,6 +2541,7 @@ static const struct ingenic_chip_info x1830_chip_info = {
+ .num_functions = ARRAY_SIZE(x1830_functions),
+ .pull_ups = x1830_pull_ups,
+ .pull_downs = x1830_pull_downs,
++ .access_table = &x1830_access_table,
+ };
+
+ static const u32 x2000_pull_ups[5] = {
+@@ -2969,6 +2995,17 @@ static const struct function_desc x2000_functions[] = {
+ { "otg", x2000_otg_groups, ARRAY_SIZE(x2000_otg_groups), },
+ };
+
++static const struct regmap_range x2000_access_ranges[] = {
++ regmap_reg_range(0x000, 0x500 - 4),
++ regmap_reg_range(0x700, 0x800 - 4),
++};
++
++/* shared with X2100 */
++static const struct regmap_access_table x2000_access_table = {
++ .yes_ranges = x2000_access_ranges,
++ .n_yes_ranges = ARRAY_SIZE(x2000_access_ranges),
++};
++
+ static const struct ingenic_chip_info x2000_chip_info = {
+ .num_chips = 5,
+ .reg_offset = 0x100,
+@@ -2979,6 +3016,7 @@ static const struct ingenic_chip_info x2000_chip_info = {
+ .num_functions = ARRAY_SIZE(x2000_functions),
+ .pull_ups = x2000_pull_ups,
+ .pull_downs = x2000_pull_downs,
++ .access_table = &x2000_access_table,
+ };
+
+ static const u32 x2100_pull_ups[5] = {
+@@ -3189,6 +3227,7 @@ static const struct ingenic_chip_info x2100_chip_info = {
+ .num_functions = ARRAY_SIZE(x2100_functions),
+ .pull_ups = x2100_pull_ups,
+ .pull_downs = x2100_pull_downs,
++ .access_table = &x2000_access_table,
+ };
+
+ static u32 ingenic_gpio_read_reg(struct ingenic_gpio_chip *jzgc, u8 reg)
+@@ -4168,7 +4207,12 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev)
+ return PTR_ERR(base);
+
+ regmap_config = ingenic_pinctrl_regmap_config;
+- regmap_config.max_register = chip_info->num_chips * chip_info->reg_offset;
++ if (chip_info->access_table) {
++ regmap_config.rd_table = chip_info->access_table;
++ regmap_config.wr_table = chip_info->access_table;
++ } else {
++ regmap_config.max_register = chip_info->num_chips * chip_info->reg_offset - 4;
++ }
+
+ jzpc->map = devm_regmap_init_mmio(dev, base, &regmap_config);
+ if (IS_ERR(jzpc->map)) {
+diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c
+index 49e32684dbb25..ad4db99094a79 100644
+--- a/drivers/pinctrl/pinctrl-k210.c
++++ b/drivers/pinctrl/pinctrl-k210.c
+@@ -482,7 +482,7 @@ static int k210_pinconf_get_drive(unsigned int max_strength_ua)
+ {
+ int i;
+
+- for (i = K210_PC_DRIVE_MAX; i; i--) {
++ for (i = K210_PC_DRIVE_MAX; i >= 0; i--) {
+ if (k210_pinconf_drive_strength[i] <= max_strength_ua)
+ return i;
+ }
+@@ -527,7 +527,7 @@ static int k210_pinconf_set_param(struct pinctrl_dev *pctldev,
+ case PIN_CONFIG_BIAS_PULL_UP:
+ if (!arg)
+ return -EINVAL;
+- val |= K210_PC_PD;
++ val |= K210_PC_PU;
+ break;
+ case PIN_CONFIG_DRIVE_STRENGTH:
+ arg *= 1000;
+@@ -862,8 +862,10 @@ static int k210_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev,
+ for_each_available_child_of_node(np_config, np) {
+ ret = k210_pinctrl_dt_subnode_to_map(pctldev, np, map,
+ &reserved_maps, num_maps);
+- if (ret < 0)
++ if (ret < 0) {
++ of_node_put(np);
+ goto err;
++ }
+ }
+ return 0;
+
+diff --git a/drivers/pinctrl/pinctrl-mcp23s08_spi.c b/drivers/pinctrl/pinctrl-mcp23s08_spi.c
+index 9ae10318f6f35..ea059b9c5542e 100644
+--- a/drivers/pinctrl/pinctrl-mcp23s08_spi.c
++++ b/drivers/pinctrl/pinctrl-mcp23s08_spi.c
+@@ -91,18 +91,28 @@ static int mcp23s08_spi_regmap_init(struct mcp23s08 *mcp, struct device *dev,
+ mcp->reg_shift = 0;
+ mcp->chip.ngpio = 8;
+ mcp->chip.label = devm_kasprintf(dev, GFP_KERNEL, "mcp23s08.%d", addr);
++ if (!mcp->chip.label)
++ return -ENOMEM;
+
+ config = &mcp23x08_regmap;
+ name = devm_kasprintf(dev, GFP_KERNEL, "%d", addr);
++ if (!name)
++ return -ENOMEM;
++
+ break;
+
+ case MCP_TYPE_S17:
+ mcp->reg_shift = 1;
+ mcp->chip.ngpio = 16;
+ mcp->chip.label = devm_kasprintf(dev, GFP_KERNEL, "mcp23s17.%d", addr);
++ if (!mcp->chip.label)
++ return -ENOMEM;
+
+ config = &mcp23x17_regmap;
+ name = devm_kasprintf(dev, GFP_KERNEL, "%d", addr);
++ if (!name)
++ return -ENOMEM;
++
+ break;
+
+ case MCP_TYPE_S18:
+diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c
+index 072bccdea2a5d..aceadc9ec0244 100644
+--- a/drivers/pinctrl/pinctrl-microchip-sgpio.c
++++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c
+@@ -17,6 +17,8 @@
+ #include <linux/pinctrl/pinmux.h>
+ #include <linux/platform_device.h>
+ #include <linux/property.h>
++#include <linux/reset.h>
++#include <linux/spinlock.h>
+
+ #include "core.h"
+ #include "pinconf.h"
+@@ -114,6 +116,7 @@ struct sgpio_priv {
+ u32 clock;
+ u32 __iomem *regs;
+ const struct sgpio_properties *properties;
++ spinlock_t lock;
+ };
+
+ struct sgpio_port_addr {
+@@ -215,6 +218,7 @@ static void sgpio_output_set(struct sgpio_priv *priv,
+ int value)
+ {
+ unsigned int bit = SGPIO_SRC_BITS * addr->bit;
++ unsigned long flags;
+ u32 clr, set;
+
+ switch (priv->properties->arch) {
+@@ -233,7 +237,10 @@ static void sgpio_output_set(struct sgpio_priv *priv,
+ default:
+ return;
+ }
++
++ spin_lock_irqsave(&priv->lock, flags);
+ sgpio_clrsetbits(priv, REG_PORT_CONFIG, addr->port, clr, set);
++ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+
+ static int sgpio_output_get(struct sgpio_priv *priv,
+@@ -561,10 +568,13 @@ static void microchip_sgpio_irq_settype(struct irq_data *data,
+ struct sgpio_bank *bank = gpiochip_get_data(chip);
+ unsigned int gpio = irqd_to_hwirq(data);
+ struct sgpio_port_addr addr;
++ unsigned long flags;
+ u32 ena;
+
+ sgpio_pin_to_addr(bank->priv, gpio, &addr);
+
++ spin_lock_irqsave(&bank->priv->lock, flags);
++
+ /* Disable interrupt while changing type */
+ ena = sgpio_readl(bank->priv, REG_INT_ENABLE, addr.bit);
+ sgpio_writel(bank->priv, ena & ~BIT(addr.port), REG_INT_ENABLE, addr.bit);
+@@ -581,6 +591,8 @@ static void microchip_sgpio_irq_settype(struct irq_data *data,
+
+ /* Possibly re-enable interrupts */
+ sgpio_writel(bank->priv, ena, REG_INT_ENABLE, addr.bit);
++
++ spin_unlock_irqrestore(&bank->priv->lock, flags);
+ }
+
+ static void microchip_sgpio_irq_setreg(struct irq_data *data,
+@@ -591,13 +603,16 @@ static void microchip_sgpio_irq_setreg(struct irq_data *data,
+ struct sgpio_bank *bank = gpiochip_get_data(chip);
+ unsigned int gpio = irqd_to_hwirq(data);
+ struct sgpio_port_addr addr;
++ unsigned long flags;
+
+ sgpio_pin_to_addr(bank->priv, gpio, &addr);
+
++ spin_lock_irqsave(&bank->priv->lock, flags);
+ if (clear)
+ sgpio_clrsetbits(bank->priv, reg, addr.bit, BIT(addr.port), 0);
+ else
+ sgpio_clrsetbits(bank->priv, reg, addr.bit, 0, BIT(addr.port));
++ spin_unlock_irqrestore(&bank->priv->lock, flags);
+ }
+
+ static void microchip_sgpio_irq_mask(struct irq_data *data)
+@@ -714,6 +729,9 @@ static int microchip_sgpio_register_bank(struct device *dev,
+ pctl_desc->name = devm_kasprintf(dev, GFP_KERNEL, "%s-%sput",
+ dev_name(dev),
+ bank->is_input ? "in" : "out");
++ if (!pctl_desc->name)
++ return -ENOMEM;
++
+ pctl_desc->pctlops = &sgpio_pctl_ops;
+ pctl_desc->pmxops = &sgpio_pmx_ops;
+ pctl_desc->confops = &sgpio_confops;
+@@ -803,6 +821,7 @@ static int microchip_sgpio_probe(struct platform_device *pdev)
+ int div_clock = 0, ret, port, i, nbanks;
+ struct device *dev = &pdev->dev;
+ struct fwnode_handle *fwnode;
++ struct reset_control *reset;
+ struct sgpio_priv *priv;
+ struct clk *clk;
+ u32 val;
+@@ -812,6 +831,12 @@ static int microchip_sgpio_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ priv->dev = dev;
++ spin_lock_init(&priv->lock);
++
++ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch");
++ if (IS_ERR(reset))
++ return dev_err_probe(dev, PTR_ERR(reset), "Failed to get reset\n");
++ reset_control_reset(reset);
+
+ clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(clk))
+diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
+index 0a36ec8775a38..b14f1b7a625ec 100644
+--- a/drivers/pinctrl/pinctrl-ocelot.c
++++ b/drivers/pinctrl/pinctrl-ocelot.c
+@@ -739,7 +739,7 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev,
+ regmap_update_bits(info->map, REG_ALT(0, info, pin->pin),
+ BIT(p), f << p);
+ regmap_update_bits(info->map, REG_ALT(1, info, pin->pin),
+- BIT(p), f << (p - 1));
++ BIT(p), (f >> 1) << p);
+
+ return 0;
+ }
+diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
+index 8d271c6b0ca41..5de691c630b4f 100644
+--- a/drivers/pinctrl/pinctrl-pistachio.c
++++ b/drivers/pinctrl/pinctrl-pistachio.c
+@@ -1374,10 +1374,10 @@ static int pistachio_gpio_register(struct pistachio_pinctrl *pctl)
+ }
+
+ irq = irq_of_parse_and_map(child, 0);
+- if (irq < 0) {
+- dev_err(pctl->dev, "No IRQ for bank %u: %d\n", i, irq);
++ if (!irq) {
++ dev_err(pctl->dev, "No IRQ for bank %u\n", i);
+ of_node_put(child);
+- ret = irq;
++ ret = -EINVAL;
+ goto err;
+ }
+
+diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
+index 5ce260f152ce5..a6f4aca9c61c4 100644
+--- a/drivers/pinctrl/pinctrl-rockchip.c
++++ b/drivers/pinctrl/pinctrl-rockchip.c
+@@ -285,6 +285,7 @@ static int rockchip_dt_node_to_map(struct pinctrl_dev *pctldev,
+ {
+ struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
+ const struct rockchip_pin_group *grp;
++ struct device *dev = info->dev;
+ struct pinctrl_map *new_map;
+ struct device_node *parent;
+ int map_num = 1;
+@@ -296,8 +297,7 @@ static int rockchip_dt_node_to_map(struct pinctrl_dev *pctldev,
+ */
+ grp = pinctrl_name_to_group(info, np->name);
+ if (!grp) {
+- dev_err(info->dev, "unable to find group for node %pOFn\n",
+- np);
++ dev_err(dev, "unable to find group for node %pOFn\n", np);
+ return -EINVAL;
+ }
+
+@@ -331,7 +331,7 @@ static int rockchip_dt_node_to_map(struct pinctrl_dev *pctldev,
+ new_map[i].data.configs.num_configs = grp->data[i].nconfigs;
+ }
+
+- dev_dbg(pctldev->dev, "maps: function %s group %s num %d\n",
++ dev_dbg(dev, "maps: function %s group %s num %d\n",
+ (*map)->data.mux.function, (*map)->data.mux.group, map_num);
+
+ return 0;
+@@ -455,95 +455,110 @@ static struct rockchip_mux_recalced_data rk3128_mux_recalced_data[] = {
+
+ static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = {
+ {
++ /* gpio1b6_sel */
+ .num = 1,
+ .pin = 14,
+ .reg = 0x28,
+ .bit = 12,
+ .mask = 0xf
+ }, {
++ /* gpio1b7_sel */
+ .num = 1,
+ .pin = 15,
+ .reg = 0x2c,
+ .bit = 0,
+ .mask = 0x3
+ }, {
++ /* gpio1c2_sel */
+ .num = 1,
+ .pin = 18,
+ .reg = 0x30,
+ .bit = 4,
+ .mask = 0xf
+ }, {
++ /* gpio1c3_sel */
+ .num = 1,
+ .pin = 19,
+ .reg = 0x30,
+ .bit = 8,
+ .mask = 0xf
+ }, {
++ /* gpio1c4_sel */
+ .num = 1,
+ .pin = 20,
+ .reg = 0x30,
+ .bit = 12,
+ .mask = 0xf
+ }, {
++ /* gpio1c5_sel */
+ .num = 1,
+ .pin = 21,
+ .reg = 0x34,
+ .bit = 0,
+ .mask = 0xf
+ }, {
++ /* gpio1c6_sel */
+ .num = 1,
+ .pin = 22,
+ .reg = 0x34,
+ .bit = 4,
+ .mask = 0xf
+ }, {
++ /* gpio1c7_sel */
+ .num = 1,
+ .pin = 23,
+ .reg = 0x34,
+ .bit = 8,
+ .mask = 0xf
+ }, {
++ /* gpio3b4_sel */
+ .num = 3,
+ .pin = 12,
+ .reg = 0x68,
+ .bit = 8,
+ .mask = 0xf
+ }, {
++ /* gpio3b5_sel */
+ .num = 3,
+ .pin = 13,
+ .reg = 0x68,
+ .bit = 12,
+ .mask = 0xf
+ }, {
++ /* gpio2a2_sel */
+ .num = 2,
+ .pin = 2,
+- .reg = 0x608,
+- .bit = 0,
+- .mask = 0x7
++ .reg = 0x40,
++ .bit = 4,
++ .mask = 0x3
+ }, {
++ /* gpio2a3_sel */
+ .num = 2,
+ .pin = 3,
+- .reg = 0x608,
+- .bit = 4,
+- .mask = 0x7
++ .reg = 0x40,
++ .bit = 6,
++ .mask = 0x3
+ }, {
++ /* gpio2c0_sel */
+ .num = 2,
+ .pin = 16,
+- .reg = 0x610,
+- .bit = 8,
+- .mask = 0x7
++ .reg = 0x50,
++ .bit = 0,
++ .mask = 0x3
+ }, {
++ /* gpio3b2_sel */
+ .num = 3,
+ .pin = 10,
+- .reg = 0x610,
+- .bit = 0,
+- .mask = 0x7
++ .reg = 0x68,
++ .bit = 4,
++ .mask = 0x3
+ }, {
++ /* gpio3b3_sel */
+ .num = 3,
+ .pin = 11,
+- .reg = 0x610,
+- .bit = 4,
+- .mask = 0x7
++ .reg = 0x68,
++ .bit = 6,
++ .mask = 0x3
+ },
+ };
+
+@@ -593,14 +608,54 @@ static void rockchip_get_recalced_mux(struct rockchip_pin_bank *bank, int pin,
+ }
+
+ static struct rockchip_mux_route_data px30_mux_route_data[] = {
++ RK_MUXROUTE_SAME(2, RK_PB4, 1, 0x184, BIT(16 + 7)), /* cif-d0m0 */
++ RK_MUXROUTE_SAME(3, RK_PA1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d0m1 */
++ RK_MUXROUTE_SAME(2, RK_PB6, 1, 0x184, BIT(16 + 7)), /* cif-d1m0 */
++ RK_MUXROUTE_SAME(3, RK_PA2, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d1m1 */
+ RK_MUXROUTE_SAME(2, RK_PA0, 1, 0x184, BIT(16 + 7)), /* cif-d2m0 */
+ RK_MUXROUTE_SAME(3, RK_PA3, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d2m1 */
++ RK_MUXROUTE_SAME(2, RK_PA1, 1, 0x184, BIT(16 + 7)), /* cif-d3m0 */
++ RK_MUXROUTE_SAME(3, RK_PA5, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d3m1 */
++ RK_MUXROUTE_SAME(2, RK_PA2, 1, 0x184, BIT(16 + 7)), /* cif-d4m0 */
++ RK_MUXROUTE_SAME(3, RK_PA7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d4m1 */
++ RK_MUXROUTE_SAME(2, RK_PA3, 1, 0x184, BIT(16 + 7)), /* cif-d5m0 */
++ RK_MUXROUTE_SAME(3, RK_PB0, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d5m1 */
++ RK_MUXROUTE_SAME(2, RK_PA4, 1, 0x184, BIT(16 + 7)), /* cif-d6m0 */
++ RK_MUXROUTE_SAME(3, RK_PB1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d6m1 */
++ RK_MUXROUTE_SAME(2, RK_PA5, 1, 0x184, BIT(16 + 7)), /* cif-d7m0 */
++ RK_MUXROUTE_SAME(3, RK_PB4, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d7m1 */
++ RK_MUXROUTE_SAME(2, RK_PA6, 1, 0x184, BIT(16 + 7)), /* cif-d8m0 */
++ RK_MUXROUTE_SAME(3, RK_PB6, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d8m1 */
++ RK_MUXROUTE_SAME(2, RK_PA7, 1, 0x184, BIT(16 + 7)), /* cif-d9m0 */
++ RK_MUXROUTE_SAME(3, RK_PB7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d9m1 */
++ RK_MUXROUTE_SAME(2, RK_PB7, 1, 0x184, BIT(16 + 7)), /* cif-d10m0 */
++ RK_MUXROUTE_SAME(3, RK_PC6, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d10m1 */
++ RK_MUXROUTE_SAME(2, RK_PC0, 1, 0x184, BIT(16 + 7)), /* cif-d11m0 */
++ RK_MUXROUTE_SAME(3, RK_PC7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d11m1 */
++ RK_MUXROUTE_SAME(2, RK_PB0, 1, 0x184, BIT(16 + 7)), /* cif-vsyncm0 */
++ RK_MUXROUTE_SAME(3, RK_PD1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-vsyncm1 */
++ RK_MUXROUTE_SAME(2, RK_PB1, 1, 0x184, BIT(16 + 7)), /* cif-hrefm0 */
++ RK_MUXROUTE_SAME(3, RK_PD2, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-hrefm1 */
++ RK_MUXROUTE_SAME(2, RK_PB2, 1, 0x184, BIT(16 + 7)), /* cif-clkinm0 */
++ RK_MUXROUTE_SAME(3, RK_PD3, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-clkinm1 */
++ RK_MUXROUTE_SAME(2, RK_PB3, 1, 0x184, BIT(16 + 7)), /* cif-clkoutm0 */
++ RK_MUXROUTE_SAME(3, RK_PD0, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-clkoutm1 */
+ RK_MUXROUTE_SAME(3, RK_PC6, 2, 0x184, BIT(16 + 8)), /* pdm-m0 */
+ RK_MUXROUTE_SAME(2, RK_PC6, 1, 0x184, BIT(16 + 8) | BIT(8)), /* pdm-m1 */
++ RK_MUXROUTE_SAME(3, RK_PD3, 2, 0x184, BIT(16 + 8)), /* pdm-sdi0m0 */
++ RK_MUXROUTE_SAME(2, RK_PC5, 2, 0x184, BIT(16 + 8) | BIT(8)), /* pdm-sdi0m1 */
+ RK_MUXROUTE_SAME(1, RK_PD3, 2, 0x184, BIT(16 + 10)), /* uart2-rxm0 */
+ RK_MUXROUTE_SAME(2, RK_PB6, 2, 0x184, BIT(16 + 10) | BIT(10)), /* uart2-rxm1 */
++ RK_MUXROUTE_SAME(1, RK_PD2, 2, 0x184, BIT(16 + 10)), /* uart2-txm0 */
++ RK_MUXROUTE_SAME(2, RK_PB4, 2, 0x184, BIT(16 + 10) | BIT(10)), /* uart2-txm1 */
+ RK_MUXROUTE_SAME(0, RK_PC1, 2, 0x184, BIT(16 + 9)), /* uart3-rxm0 */
+ RK_MUXROUTE_SAME(1, RK_PB7, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-rxm1 */
++ RK_MUXROUTE_SAME(0, RK_PC0, 2, 0x184, BIT(16 + 9)), /* uart3-txm0 */
++ RK_MUXROUTE_SAME(1, RK_PB6, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-txm1 */
++ RK_MUXROUTE_SAME(0, RK_PC2, 2, 0x184, BIT(16 + 9)), /* uart3-ctsm0 */
++ RK_MUXROUTE_SAME(1, RK_PB4, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-ctsm1 */
++ RK_MUXROUTE_SAME(0, RK_PC3, 2, 0x184, BIT(16 + 9)), /* uart3-rtsm0 */
++ RK_MUXROUTE_SAME(1, RK_PB5, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-rtsm1 */
+ };
+
+ static struct rockchip_mux_route_data rk3128_mux_route_data[] = {
+@@ -703,19 +758,19 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
+ RK_MUXROUTE_PMU(0, RK_PB5, 4, 0x0110, WRITE_MASK_VAL(3, 2, 1)), /* PWM1 IO mux M1 */
+ RK_MUXROUTE_PMU(0, RK_PC1, 1, 0x0110, WRITE_MASK_VAL(5, 4, 0)), /* PWM2 IO mux M0 */
+ RK_MUXROUTE_PMU(0, RK_PB6, 4, 0x0110, WRITE_MASK_VAL(5, 4, 1)), /* PWM2 IO mux M1 */
+- RK_MUXROUTE_PMU(0, RK_PB3, 2, 0x0300, WRITE_MASK_VAL(0, 0, 0)), /* CAN0 IO mux M0 */
++ RK_MUXROUTE_GRF(0, RK_PB3, 2, 0x0300, WRITE_MASK_VAL(0, 0, 0)), /* CAN0 IO mux M0 */
+ RK_MUXROUTE_GRF(2, RK_PA1, 4, 0x0300, WRITE_MASK_VAL(0, 0, 1)), /* CAN0 IO mux M1 */
+ RK_MUXROUTE_GRF(1, RK_PA1, 3, 0x0300, WRITE_MASK_VAL(2, 2, 0)), /* CAN1 IO mux M0 */
+ RK_MUXROUTE_GRF(4, RK_PC3, 3, 0x0300, WRITE_MASK_VAL(2, 2, 1)), /* CAN1 IO mux M1 */
+ RK_MUXROUTE_GRF(4, RK_PB5, 3, 0x0300, WRITE_MASK_VAL(4, 4, 0)), /* CAN2 IO mux M0 */
+ RK_MUXROUTE_GRF(2, RK_PB2, 4, 0x0300, WRITE_MASK_VAL(4, 4, 1)), /* CAN2 IO mux M1 */
+ RK_MUXROUTE_GRF(4, RK_PC4, 1, 0x0300, WRITE_MASK_VAL(6, 6, 0)), /* HPDIN IO mux M0 */
+- RK_MUXROUTE_PMU(0, RK_PC2, 2, 0x0300, WRITE_MASK_VAL(6, 6, 1)), /* HPDIN IO mux M1 */
++ RK_MUXROUTE_GRF(0, RK_PC2, 2, 0x0300, WRITE_MASK_VAL(6, 6, 1)), /* HPDIN IO mux M1 */
+ RK_MUXROUTE_GRF(3, RK_PB1, 3, 0x0300, WRITE_MASK_VAL(8, 8, 0)), /* GMAC1 IO mux M0 */
+ RK_MUXROUTE_GRF(4, RK_PA7, 3, 0x0300, WRITE_MASK_VAL(8, 8, 1)), /* GMAC1 IO mux M1 */
+ RK_MUXROUTE_GRF(4, RK_PD1, 1, 0x0300, WRITE_MASK_VAL(10, 10, 0)), /* HDMITX IO mux M0 */
+- RK_MUXROUTE_PMU(0, RK_PC7, 1, 0x0300, WRITE_MASK_VAL(10, 10, 1)), /* HDMITX IO mux M1 */
+- RK_MUXROUTE_PMU(0, RK_PB6, 1, 0x0300, WRITE_MASK_VAL(14, 14, 0)), /* I2C2 IO mux M0 */
++ RK_MUXROUTE_GRF(0, RK_PC7, 1, 0x0300, WRITE_MASK_VAL(10, 10, 1)), /* HDMITX IO mux M1 */
++ RK_MUXROUTE_GRF(0, RK_PB6, 1, 0x0300, WRITE_MASK_VAL(14, 14, 0)), /* I2C2 IO mux M0 */
+ RK_MUXROUTE_GRF(4, RK_PB4, 1, 0x0300, WRITE_MASK_VAL(14, 14, 1)), /* I2C2 IO mux M1 */
+ RK_MUXROUTE_GRF(1, RK_PA0, 1, 0x0304, WRITE_MASK_VAL(0, 0, 0)), /* I2C3 IO mux M0 */
+ RK_MUXROUTE_GRF(3, RK_PB6, 4, 0x0304, WRITE_MASK_VAL(0, 0, 1)), /* I2C3 IO mux M1 */
+@@ -741,7 +796,7 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
+ RK_MUXROUTE_GRF(4, RK_PC3, 1, 0x0308, WRITE_MASK_VAL(12, 12, 1)), /* PWM15 IO mux M1 */
+ RK_MUXROUTE_GRF(3, RK_PD2, 3, 0x0308, WRITE_MASK_VAL(14, 14, 0)), /* SDMMC2 IO mux M0 */
+ RK_MUXROUTE_GRF(3, RK_PA5, 5, 0x0308, WRITE_MASK_VAL(14, 14, 1)), /* SDMMC2 IO mux M1 */
+- RK_MUXROUTE_PMU(0, RK_PB5, 2, 0x030c, WRITE_MASK_VAL(0, 0, 0)), /* SPI0 IO mux M0 */
++ RK_MUXROUTE_GRF(0, RK_PB5, 2, 0x030c, WRITE_MASK_VAL(0, 0, 0)), /* SPI0 IO mux M0 */
+ RK_MUXROUTE_GRF(2, RK_PD3, 3, 0x030c, WRITE_MASK_VAL(0, 0, 1)), /* SPI0 IO mux M1 */
+ RK_MUXROUTE_GRF(2, RK_PB5, 3, 0x030c, WRITE_MASK_VAL(2, 2, 0)), /* SPI1 IO mux M0 */
+ RK_MUXROUTE_GRF(3, RK_PC3, 3, 0x030c, WRITE_MASK_VAL(2, 2, 1)), /* SPI1 IO mux M1 */
+@@ -750,8 +805,8 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
+ RK_MUXROUTE_GRF(4, RK_PB3, 4, 0x030c, WRITE_MASK_VAL(6, 6, 0)), /* SPI3 IO mux M0 */
+ RK_MUXROUTE_GRF(4, RK_PC2, 2, 0x030c, WRITE_MASK_VAL(6, 6, 1)), /* SPI3 IO mux M1 */
+ RK_MUXROUTE_GRF(2, RK_PB4, 2, 0x030c, WRITE_MASK_VAL(8, 8, 0)), /* UART1 IO mux M0 */
+- RK_MUXROUTE_PMU(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(8, 8, 1)), /* UART1 IO mux M1 */
+- RK_MUXROUTE_PMU(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(10, 10, 0)), /* UART2 IO mux M0 */
++ RK_MUXROUTE_GRF(3, RK_PD6, 4, 0x030c, WRITE_MASK_VAL(8, 8, 1)), /* UART1 IO mux M1 */
++ RK_MUXROUTE_GRF(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(10, 10, 0)), /* UART2 IO mux M0 */
+ RK_MUXROUTE_GRF(1, RK_PD5, 2, 0x030c, WRITE_MASK_VAL(10, 10, 1)), /* UART2 IO mux M1 */
+ RK_MUXROUTE_GRF(1, RK_PA1, 2, 0x030c, WRITE_MASK_VAL(12, 12, 0)), /* UART3 IO mux M0 */
+ RK_MUXROUTE_GRF(3, RK_PB7, 4, 0x030c, WRITE_MASK_VAL(12, 12, 1)), /* UART3 IO mux M1 */
+@@ -781,13 +836,13 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = {
+ RK_MUXROUTE_GRF(3, RK_PD6, 5, 0x0314, WRITE_MASK_VAL(1, 0, 1)), /* PDM IO mux M1 */
+ RK_MUXROUTE_GRF(4, RK_PA0, 4, 0x0314, WRITE_MASK_VAL(1, 0, 1)), /* PDM IO mux M1 */
+ RK_MUXROUTE_GRF(3, RK_PC4, 5, 0x0314, WRITE_MASK_VAL(1, 0, 2)), /* PDM IO mux M2 */
+- RK_MUXROUTE_PMU(0, RK_PA5, 3, 0x0314, WRITE_MASK_VAL(3, 2, 0)), /* PCIE20 IO mux M0 */
++ RK_MUXROUTE_GRF(0, RK_PA5, 3, 0x0314, WRITE_MASK_VAL(3, 2, 0)), /* PCIE20 IO mux M0 */
+ RK_MUXROUTE_GRF(2, RK_PD0, 4, 0x0314, WRITE_MASK_VAL(3, 2, 1)), /* PCIE20 IO mux M1 */
+ RK_MUXROUTE_GRF(1, RK_PB0, 4, 0x0314, WRITE_MASK_VAL(3, 2, 2)), /* PCIE20 IO mux M2 */
+- RK_MUXROUTE_PMU(0, RK_PA4, 3, 0x0314, WRITE_MASK_VAL(5, 4, 0)), /* PCIE30X1 IO mux M0 */
++ RK_MUXROUTE_GRF(0, RK_PA4, 3, 0x0314, WRITE_MASK_VAL(5, 4, 0)), /* PCIE30X1 IO mux M0 */
+ RK_MUXROUTE_GRF(2, RK_PD2, 4, 0x0314, WRITE_MASK_VAL(5, 4, 1)), /* PCIE30X1 IO mux M1 */
+ RK_MUXROUTE_GRF(1, RK_PA5, 4, 0x0314, WRITE_MASK_VAL(5, 4, 2)), /* PCIE30X1 IO mux M2 */
+- RK_MUXROUTE_PMU(0, RK_PA6, 2, 0x0314, WRITE_MASK_VAL(7, 6, 0)), /* PCIE30X2 IO mux M0 */
++ RK_MUXROUTE_GRF(0, RK_PA6, 2, 0x0314, WRITE_MASK_VAL(7, 6, 0)), /* PCIE30X2 IO mux M0 */
+ RK_MUXROUTE_GRF(2, RK_PD4, 4, 0x0314, WRITE_MASK_VAL(7, 6, 1)), /* PCIE30X2 IO mux M1 */
+ RK_MUXROUTE_GRF(4, RK_PC2, 4, 0x0314, WRITE_MASK_VAL(7, 6, 2)), /* PCIE30X2 IO mux M2 */
+ };
+@@ -872,20 +927,20 @@ static int rockchip_verify_mux(struct rockchip_pin_bank *bank,
+ int pin, int mux)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
++ struct device *dev = info->dev;
+ int iomux_num = (pin / 8);
+
+ if (iomux_num > 3)
+ return -EINVAL;
+
+ if (bank->iomux[iomux_num].type & IOMUX_UNROUTED) {
+- dev_err(info->dev, "pin %d is unrouted\n", pin);
++ dev_err(dev, "pin %d is unrouted\n", pin);
+ return -EINVAL;
+ }
+
+ if (bank->iomux[iomux_num].type & IOMUX_GPIO_ONLY) {
+ if (mux != RK_FUNC_GPIO) {
+- dev_err(info->dev,
+- "pin %d only supports a gpio mux\n", pin);
++ dev_err(dev, "pin %d only supports a gpio mux\n", pin);
+ return -ENOTSUPP;
+ }
+ }
+@@ -909,6 +964,7 @@ static int rockchip_verify_mux(struct rockchip_pin_bank *bank,
+ static int rockchip_set_mux(struct rockchip_pin_bank *bank, int pin, int mux)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
++ struct device *dev = info->dev;
+ int iomux_num = (pin / 8);
+ struct regmap *regmap;
+ int reg, ret, mask, mux_type;
+@@ -922,8 +978,7 @@ static int rockchip_set_mux(struct rockchip_pin_bank *bank, int pin, int mux)
+ if (bank->iomux[iomux_num].type & IOMUX_GPIO_ONLY)
+ return 0;
+
+- dev_dbg(info->dev, "setting mux of GPIO%d-%d to %d\n",
+- bank->bank_num, pin, mux);
++ dev_dbg(dev, "setting mux of GPIO%d-%d to %d\n", bank->bank_num, pin, mux);
+
+ regmap = (bank->iomux[iomux_num].type & IOMUX_SOURCE_PMU)
+ ? info->regmap_pmu : info->regmap_base;
+@@ -984,9 +1039,9 @@ static int rockchip_set_mux(struct rockchip_pin_bank *bank, int pin, int mux)
+ #define PX30_PULL_PINS_PER_REG 8
+ #define PX30_PULL_BANK_STRIDE 16
+
+-static void px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1006,6 +1061,8 @@ static void px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *reg += ((pin_num / PX30_PULL_PINS_PER_REG) * 4);
+ *bit = (pin_num % PX30_PULL_PINS_PER_REG);
+ *bit *= PX30_PULL_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define PX30_DRV_PMU_OFFSET 0x20
+@@ -1014,9 +1071,9 @@ static void px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define PX30_DRV_PINS_PER_REG 8
+ #define PX30_DRV_BANK_STRIDE 16
+
+-static void px30_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int px30_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1036,6 +1093,8 @@ static void px30_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ *reg += ((pin_num / PX30_DRV_PINS_PER_REG) * 4);
+ *bit = (pin_num % PX30_DRV_PINS_PER_REG);
+ *bit *= PX30_DRV_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define PX30_SCHMITT_PMU_OFFSET 0x38
+@@ -1075,9 +1134,9 @@ static int px30_calc_schmitt_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define RV1108_PULL_BITS_PER_PIN 2
+ #define RV1108_PULL_BANK_STRIDE 16
+
+-static void rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1096,6 +1155,8 @@ static void rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *reg += ((pin_num / RV1108_PULL_PINS_PER_REG) * 4);
+ *bit = (pin_num % RV1108_PULL_PINS_PER_REG);
+ *bit *= RV1108_PULL_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define RV1108_DRV_PMU_OFFSET 0x20
+@@ -1104,9 +1165,9 @@ static void rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define RV1108_DRV_PINS_PER_REG 8
+ #define RV1108_DRV_BANK_STRIDE 16
+
+-static void rv1108_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rv1108_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1126,6 +1187,8 @@ static void rv1108_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ *reg += ((pin_num / RV1108_DRV_PINS_PER_REG) * 4);
+ *bit = pin_num % RV1108_DRV_PINS_PER_REG;
+ *bit *= RV1108_DRV_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define RV1108_SCHMITT_PMU_OFFSET 0x30
+@@ -1182,9 +1245,9 @@ static int rk3308_calc_schmitt_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define RK2928_PULL_PINS_PER_REG 16
+ #define RK2928_PULL_BANK_STRIDE 8
+
+-static void rk2928_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk2928_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1194,13 +1257,15 @@ static void rk2928_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *reg += (pin_num / RK2928_PULL_PINS_PER_REG) * 4;
+
+ *bit = pin_num % RK2928_PULL_PINS_PER_REG;
++
++ return 0;
+ };
+
+ #define RK3128_PULL_OFFSET 0x118
+
+-static void rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1210,6 +1275,8 @@ static void rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *reg += ((pin_num / RK2928_PULL_PINS_PER_REG) * 4);
+
+ *bit = pin_num % RK2928_PULL_PINS_PER_REG;
++
++ return 0;
+ }
+
+ #define RK3188_PULL_OFFSET 0x164
+@@ -1218,9 +1285,9 @@ static void rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define RK3188_PULL_BANK_STRIDE 16
+ #define RK3188_PULL_PMU_OFFSET 0x64
+
+-static void rk3188_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3188_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1250,12 +1317,14 @@ static void rk3188_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = 7 - (pin_num % RK3188_PULL_PINS_PER_REG);
+ *bit *= RK3188_PULL_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+ #define RK3288_PULL_OFFSET 0x140
+-static void rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1279,6 +1348,8 @@ static void rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % RK3188_PULL_PINS_PER_REG);
+ *bit *= RK3188_PULL_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+ #define RK3288_DRV_PMU_OFFSET 0x70
+@@ -1287,9 +1358,9 @@ static void rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define RK3288_DRV_PINS_PER_REG 8
+ #define RK3288_DRV_BANK_STRIDE 16
+
+-static void rk3288_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3288_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1313,13 +1384,15 @@ static void rk3288_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % RK3288_DRV_PINS_PER_REG);
+ *bit *= RK3288_DRV_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+ #define RK3228_PULL_OFFSET 0x100
+
+-static void rk3228_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3228_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1330,13 +1403,15 @@ static void rk3228_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+
+ *bit = (pin_num % RK3188_PULL_PINS_PER_REG);
+ *bit *= RK3188_PULL_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define RK3228_DRV_GRF_OFFSET 0x200
+
+-static void rk3228_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3228_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1347,13 +1422,15 @@ static void rk3228_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+
+ *bit = (pin_num % RK3288_DRV_PINS_PER_REG);
+ *bit *= RK3288_DRV_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define RK3308_PULL_OFFSET 0xa0
+
+-static void rk3308_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3308_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1364,13 +1441,15 @@ static void rk3308_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+
+ *bit = (pin_num % RK3188_PULL_PINS_PER_REG);
+ *bit *= RK3188_PULL_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define RK3308_DRV_GRF_OFFSET 0x100
+
+-static void rk3308_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3308_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1381,14 +1460,16 @@ static void rk3308_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+
+ *bit = (pin_num % RK3288_DRV_PINS_PER_REG);
+ *bit *= RK3288_DRV_BITS_PER_PIN;
++
++ return 0;
+ }
+
+ #define RK3368_PULL_GRF_OFFSET 0x100
+ #define RK3368_PULL_PMU_OFFSET 0x10
+
+-static void rk3368_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3368_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1412,14 +1493,16 @@ static void rk3368_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % RK3188_PULL_PINS_PER_REG);
+ *bit *= RK3188_PULL_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+ #define RK3368_DRV_PMU_OFFSET 0x20
+ #define RK3368_DRV_GRF_OFFSET 0x200
+
+-static void rk3368_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3368_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1443,15 +1526,17 @@ static void rk3368_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % RK3288_DRV_PINS_PER_REG);
+ *bit *= RK3288_DRV_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+ #define RK3399_PULL_GRF_OFFSET 0xe040
+ #define RK3399_PULL_PMU_OFFSET 0x40
+ #define RK3399_DRV_3BITS_PER_PIN 3
+
+-static void rk3399_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3399_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1477,11 +1562,13 @@ static void rk3399_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % RK3188_PULL_PINS_PER_REG);
+ *bit *= RK3188_PULL_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+-static void rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+ int drv_num = (pin_num / 8);
+@@ -1498,6 +1585,8 @@ static void rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % 8) * 3;
+ else
+ *bit = (pin_num % 8) * 2;
++
++ return 0;
+ }
+
+ #define RK3568_PULL_PMU_OFFSET 0x20
+@@ -1506,9 +1595,9 @@ static void rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define RK3568_PULL_PINS_PER_REG 8
+ #define RK3568_PULL_BANK_STRIDE 0x10
+
+-static void rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1529,6 +1618,8 @@ static void rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % RK3568_PULL_PINS_PER_REG);
+ *bit *= RK3568_PULL_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+ #define RK3568_DRV_PMU_OFFSET 0x70
+@@ -1537,9 +1628,9 @@ static void rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ #define RK3568_DRV_PINS_PER_REG 2
+ #define RK3568_DRV_BANK_STRIDE 0x40
+
+-static void rk3568_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+- int pin_num, struct regmap **regmap,
+- int *reg, u8 *bit)
++static int rk3568_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
++ int pin_num, struct regmap **regmap,
++ int *reg, u8 *bit)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+@@ -1560,6 +1651,8 @@ static void rk3568_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ *bit = (pin_num % RK3568_DRV_PINS_PER_REG);
+ *bit *= RK3568_DRV_BITS_PER_PIN;
+ }
++
++ return 0;
+ }
+
+ static int rockchip_perpin_drv_list[DRV_TYPE_MAX][8] = {
+@@ -1575,13 +1668,16 @@ static int rockchip_get_drive_perpin(struct rockchip_pin_bank *bank,
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+ struct rockchip_pin_ctrl *ctrl = info->ctrl;
++ struct device *dev = info->dev;
+ struct regmap *regmap;
+ int reg, ret;
+ u32 data, temp, rmask_bits;
+ u8 bit;
+ int drv_type = bank->drv[pin_num / 8].drv_type;
+
+- ctrl->drv_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ ret = ctrl->drv_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ if (ret)
++ return ret;
+
+ switch (drv_type) {
+ case DRV_TYPE_IO_1V8_3V0_AUTO:
+@@ -1620,7 +1716,7 @@ static int rockchip_get_drive_perpin(struct rockchip_pin_bank *bank,
+ bit -= 16;
+ break;
+ default:
+- dev_err(info->dev, "unsupported bit: %d for pinctrl drive type: %d\n",
++ dev_err(dev, "unsupported bit: %d for pinctrl drive type: %d\n",
+ bit, drv_type);
+ return -EINVAL;
+ }
+@@ -1632,8 +1728,7 @@ static int rockchip_get_drive_perpin(struct rockchip_pin_bank *bank,
+ rmask_bits = RK3288_DRV_BITS_PER_PIN;
+ break;
+ default:
+- dev_err(info->dev, "unsupported pinctrl drive type: %d\n",
+- drv_type);
++ dev_err(dev, "unsupported pinctrl drive type: %d\n", drv_type);
+ return -EINVAL;
+ }
+
+@@ -1652,16 +1747,19 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank,
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+ struct rockchip_pin_ctrl *ctrl = info->ctrl;
++ struct device *dev = info->dev;
+ struct regmap *regmap;
+ int reg, ret, i;
+ u32 data, rmask, rmask_bits, temp;
+ u8 bit;
+ int drv_type = bank->drv[pin_num / 8].drv_type;
+
+- dev_dbg(info->dev, "setting drive of GPIO%d-%d to %d\n",
++ dev_dbg(dev, "setting drive of GPIO%d-%d to %d\n",
+ bank->bank_num, pin_num, strength);
+
+- ctrl->drv_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ ret = ctrl->drv_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ if (ret)
++ return ret;
+ if (ctrl->type == RK3568) {
+ rmask_bits = RK3568_DRV_BITS_PER_PIN;
+ ret = (1 << (strength + 1)) - 1;
+@@ -1680,8 +1778,7 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank,
+ }
+
+ if (ret < 0) {
+- dev_err(info->dev, "unsupported driver strength %d\n",
+- strength);
++ dev_err(dev, "unsupported driver strength %d\n", strength);
+ return ret;
+ }
+
+@@ -1720,7 +1817,7 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank,
+ bit -= 16;
+ break;
+ default:
+- dev_err(info->dev, "unsupported bit: %d for pinctrl drive type: %d\n",
++ dev_err(dev, "unsupported bit: %d for pinctrl drive type: %d\n",
+ bit, drv_type);
+ return -EINVAL;
+ }
+@@ -1731,8 +1828,7 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank,
+ rmask_bits = RK3288_DRV_BITS_PER_PIN;
+ break;
+ default:
+- dev_err(info->dev, "unsupported pinctrl drive type: %d\n",
+- drv_type);
++ dev_err(dev, "unsupported pinctrl drive type: %d\n", drv_type);
+ return -EINVAL;
+ }
+
+@@ -1766,6 +1862,7 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num)
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+ struct rockchip_pin_ctrl *ctrl = info->ctrl;
++ struct device *dev = info->dev;
+ struct regmap *regmap;
+ int reg, ret, pull_type;
+ u8 bit;
+@@ -1775,7 +1872,9 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num)
+ if (ctrl->type == RK3066B)
+ return PIN_CONFIG_BIAS_DISABLE;
+
+- ctrl->pull_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ ret = ctrl->pull_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ if (ret)
++ return ret;
+
+ ret = regmap_read(regmap, reg, &data);
+ if (ret)
+@@ -1794,13 +1893,22 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num)
+ case RK3308:
+ case RK3368:
+ case RK3399:
++ case RK3568:
+ pull_type = bank->pull_type[pin_num / 8];
+ data >>= bit;
+ data &= (1 << RK3188_PULL_BITS_PER_PIN) - 1;
++ /*
++ * In the TRM, pull-up being 1 for everything except the GPIO0_D3-D6,
++ * where that pull up value becomes 3.
++ */
++ if (ctrl->type == RK3568 && bank->bank_num == 0 && pin_num >= 27 && pin_num <= 30) {
++ if (data == 3)
++ data = 1;
++ }
+
+ return rockchip_pull_list[pull_type][data];
+ default:
+- dev_err(info->dev, "unsupported pinctrl type\n");
++ dev_err(dev, "unsupported pinctrl type\n");
+ return -EINVAL;
+ };
+ }
+@@ -1810,19 +1918,21 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank,
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+ struct rockchip_pin_ctrl *ctrl = info->ctrl;
++ struct device *dev = info->dev;
+ struct regmap *regmap;
+ int reg, ret, i, pull_type;
+ u8 bit;
+ u32 data, rmask;
+
+- dev_dbg(info->dev, "setting pull of GPIO%d-%d to %d\n",
+- bank->bank_num, pin_num, pull);
++ dev_dbg(dev, "setting pull of GPIO%d-%d to %d\n", bank->bank_num, pin_num, pull);
+
+ /* rk3066b does support any pulls */
+ if (ctrl->type == RK3066B)
+ return pull ? -EINVAL : 0;
+
+- ctrl->pull_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ ret = ctrl->pull_calc_reg(bank, pin_num, &regmap, &reg, &bit);
++ if (ret)
++ return ret;
+
+ switch (ctrl->type) {
+ case RK2928:
+@@ -1850,7 +1960,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank,
+ }
+ }
+ /*
+- * In the TRM, pull-up being 1 for everything except the GPIO0_D0-D6,
++ * In the TRM, pull-up being 1 for everything except the GPIO0_D3-D6,
+ * where that pull up value becomes 3.
+ */
+ if (ctrl->type == RK3568 && bank->bank_num == 0 && pin_num >= 27 && pin_num <= 30) {
+@@ -1859,8 +1969,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank,
+ }
+
+ if (ret < 0) {
+- dev_err(info->dev, "unsupported pull setting %d\n",
+- pull);
++ dev_err(dev, "unsupported pull setting %d\n", pull);
+ return ret;
+ }
+
+@@ -1872,7 +1981,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank,
+ ret = regmap_update_bits(regmap, reg, rmask, data);
+ break;
+ default:
+- dev_err(info->dev, "unsupported pinctrl type\n");
++ dev_err(dev, "unsupported pinctrl type\n");
+ return -EINVAL;
+ }
+
+@@ -1963,12 +2072,13 @@ static int rockchip_set_schmitt(struct rockchip_pin_bank *bank,
+ {
+ struct rockchip_pinctrl *info = bank->drvdata;
+ struct rockchip_pin_ctrl *ctrl = info->ctrl;
++ struct device *dev = info->dev;
+ struct regmap *regmap;
+ int reg, ret;
+ u8 bit;
+ u32 data, rmask;
+
+- dev_dbg(info->dev, "setting input schmitt of GPIO%d-%d to %d\n",
++ dev_dbg(dev, "setting input schmitt of GPIO%d-%d to %d\n",
+ bank->bank_num, pin_num, enable);
+
+ ret = ctrl->schmitt_calc_reg(bank, pin_num, &regmap, &reg, &bit);
+@@ -2028,10 +2138,11 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+ struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
+ const unsigned int *pins = info->groups[group].pins;
+ const struct rockchip_pin_config *data = info->groups[group].data;
++ struct device *dev = info->dev;
+ struct rockchip_pin_bank *bank;
+ int cnt, ret = 0;
+
+- dev_dbg(info->dev, "enable function %s group %s\n",
++ dev_dbg(dev, "enable function %s group %s\n",
+ info->functions[selector].name, info->groups[group].name);
+
+ /*
+@@ -2057,11 +2168,24 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+ return 0;
+ }
+
++static int rockchip_pmx_gpio_set_direction(struct pinctrl_dev *pctldev,
++ struct pinctrl_gpio_range *range,
++ unsigned offset,
++ bool input)
++{
++ struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
++ struct rockchip_pin_bank *bank;
++
++ bank = pin_to_bank(info, offset);
++ return rockchip_set_mux(bank, offset - bank->pin_base, RK_FUNC_GPIO);
++}
++
+ static const struct pinmux_ops rockchip_pmx_ops = {
+ .get_functions_count = rockchip_pmx_get_funcs_count,
+ .get_function_name = rockchip_pmx_get_func_name,
+ .get_function_groups = rockchip_pmx_get_groups,
+ .set_mux = rockchip_pmx_set,
++ .gpio_set_direction = rockchip_pmx_gpio_set_direction,
+ };
+
+ /*
+@@ -2092,19 +2216,20 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl,
+ return false;
+ }
+
+-static int rockchip_pinconf_defer_output(struct rockchip_pin_bank *bank,
+- unsigned int pin, u32 arg)
++static int rockchip_pinconf_defer_pin(struct rockchip_pin_bank *bank,
++ unsigned int pin, u32 param, u32 arg)
+ {
+- struct rockchip_pin_output_deferred *cfg;
++ struct rockchip_pin_deferred *cfg;
+
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ cfg->pin = pin;
++ cfg->param = param;
+ cfg->arg = arg;
+
+- list_add_tail(&cfg->head, &bank->deferred_output);
++ list_add_tail(&cfg->head, &bank->deferred_pins);
+
+ return 0;
+ }
+@@ -2125,6 +2250,25 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+ param = pinconf_to_config_param(configs[i]);
+ arg = pinconf_to_config_argument(configs[i]);
+
++ if (param == PIN_CONFIG_OUTPUT || param == PIN_CONFIG_INPUT_ENABLE) {
++ /*
++ * Check for gpio driver not being probed yet.
++ * The lock makes sure that either gpio-probe has completed
++ * or the gpio driver hasn't probed yet.
++ */
++ mutex_lock(&bank->deferred_lock);
++ if (!gpio || !gpio->direction_output) {
++ rc = rockchip_pinconf_defer_pin(bank, pin - bank->pin_base, param,
++ arg);
++ mutex_unlock(&bank->deferred_lock);
++ if (rc)
++ return rc;
++
++ break;
++ }
++ mutex_unlock(&bank->deferred_lock);
++ }
++
+ switch (param) {
+ case PIN_CONFIG_BIAS_DISABLE:
+ rc = rockchip_set_pull(bank, pin - bank->pin_base,
+@@ -2153,27 +2297,21 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+ if (rc != RK_FUNC_GPIO)
+ return -EINVAL;
+
+- /*
+- * Check for gpio driver not being probed yet.
+- * The lock makes sure that either gpio-probe has completed
+- * or the gpio driver hasn't probed yet.
+- */
+- mutex_lock(&bank->deferred_lock);
+- if (!gpio || !gpio->direction_output) {
+- rc = rockchip_pinconf_defer_output(bank, pin - bank->pin_base, arg);
+- mutex_unlock(&bank->deferred_lock);
+- if (rc)
+- return rc;
+-
+- break;
+- }
+- mutex_unlock(&bank->deferred_lock);
+-
+ rc = gpio->direction_output(gpio, pin - bank->pin_base,
+ arg);
+ if (rc)
+ return rc;
+ break;
++ case PIN_CONFIG_INPUT_ENABLE:
++ rc = rockchip_set_mux(bank, pin - bank->pin_base,
++ RK_FUNC_GPIO);
++ if (rc != RK_FUNC_GPIO)
++ return -EINVAL;
++
++ rc = gpio->direction_input(gpio, pin - bank->pin_base);
++ if (rc)
++ return rc;
++ break;
+ case PIN_CONFIG_DRIVE_STRENGTH:
+ /* rk3288 is the first with per-pin drive-strength */
+ if (!info->ctrl->drv_calc_reg)
+@@ -2310,6 +2448,7 @@ static int rockchip_pinctrl_parse_groups(struct device_node *np,
+ struct rockchip_pinctrl *info,
+ u32 index)
+ {
++ struct device *dev = info->dev;
+ struct rockchip_pin_bank *bank;
+ int size;
+ const __be32 *list;
+@@ -2317,7 +2456,7 @@ static int rockchip_pinctrl_parse_groups(struct device_node *np,
+ int i, j;
+ int ret;
+
+- dev_dbg(info->dev, "group(%d): %pOFn\n", index, np);
++ dev_dbg(dev, "group(%d): %pOFn\n", index, np);
+
+ /* Initialise group */
+ grp->name = np->name;
+@@ -2330,18 +2469,14 @@ static int rockchip_pinctrl_parse_groups(struct device_node *np,
+ /* we do not check return since it's safe node passed down */
+ size /= sizeof(*list);
+ if (!size || size % 4) {
+- dev_err(info->dev, "wrong pins number or pins and configs should be by 4\n");
++ dev_err(dev, "wrong pins number or pins and configs should be by 4\n");
+ return -EINVAL;
+ }
+
+ grp->npins = size / 4;
+
+- grp->pins = devm_kcalloc(info->dev, grp->npins, sizeof(unsigned int),
+- GFP_KERNEL);
+- grp->data = devm_kcalloc(info->dev,
+- grp->npins,
+- sizeof(struct rockchip_pin_config),
+- GFP_KERNEL);
++ grp->pins = devm_kcalloc(dev, grp->npins, sizeof(*grp->pins), GFP_KERNEL);
++ grp->data = devm_kcalloc(dev, grp->npins, sizeof(*grp->data), GFP_KERNEL);
+ if (!grp->pins || !grp->data)
+ return -ENOMEM;
+
+@@ -2364,6 +2499,7 @@ static int rockchip_pinctrl_parse_groups(struct device_node *np,
+ np_config = of_find_node_by_phandle(be32_to_cpup(phandle));
+ ret = pinconf_generic_parse_dt_config(np_config, NULL,
+ &grp->data[j].configs, &grp->data[j].nconfigs);
++ of_node_put(np_config);
+ if (ret)
+ return ret;
+ }
+@@ -2375,6 +2511,7 @@ static int rockchip_pinctrl_parse_functions(struct device_node *np,
+ struct rockchip_pinctrl *info,
+ u32 index)
+ {
++ struct device *dev = info->dev;
+ struct device_node *child;
+ struct rockchip_pmx_func *func;
+ struct rockchip_pin_group *grp;
+@@ -2382,7 +2519,7 @@ static int rockchip_pinctrl_parse_functions(struct device_node *np,
+ static u32 grp_index;
+ u32 i = 0;
+
+- dev_dbg(info->dev, "parse function(%d): %pOFn\n", index, np);
++ dev_dbg(dev, "parse function(%d): %pOFn\n", index, np);
+
+ func = &info->functions[index];
+
+@@ -2392,8 +2529,7 @@ static int rockchip_pinctrl_parse_functions(struct device_node *np,
+ if (func->ngroups <= 0)
+ return 0;
+
+- func->groups = devm_kcalloc(info->dev,
+- func->ngroups, sizeof(char *), GFP_KERNEL);
++ func->groups = devm_kcalloc(dev, func->ngroups, sizeof(*func->groups), GFP_KERNEL);
+ if (!func->groups)
+ return -ENOMEM;
+
+@@ -2421,20 +2557,14 @@ static int rockchip_pinctrl_parse_dt(struct platform_device *pdev,
+
+ rockchip_pinctrl_child_count(info, np);
+
+- dev_dbg(&pdev->dev, "nfunctions = %d\n", info->nfunctions);
+- dev_dbg(&pdev->dev, "ngroups = %d\n", info->ngroups);
++ dev_dbg(dev, "nfunctions = %d\n", info->nfunctions);
++ dev_dbg(dev, "ngroups = %d\n", info->ngroups);
+
+- info->functions = devm_kcalloc(dev,
+- info->nfunctions,
+- sizeof(struct rockchip_pmx_func),
+- GFP_KERNEL);
++ info->functions = devm_kcalloc(dev, info->nfunctions, sizeof(*info->functions), GFP_KERNEL);
+ if (!info->functions)
+ return -ENOMEM;
+
+- info->groups = devm_kcalloc(dev,
+- info->ngroups,
+- sizeof(struct rockchip_pin_group),
+- GFP_KERNEL);
++ info->groups = devm_kcalloc(dev, info->ngroups, sizeof(*info->groups), GFP_KERNEL);
+ if (!info->groups)
+ return -ENOMEM;
+
+@@ -2446,7 +2576,7 @@ static int rockchip_pinctrl_parse_dt(struct platform_device *pdev,
+
+ ret = rockchip_pinctrl_parse_functions(child, info, i++);
+ if (ret) {
+- dev_err(&pdev->dev, "failed to parse function\n");
++ dev_err(dev, "failed to parse function\n");
+ of_node_put(child);
+ return ret;
+ }
+@@ -2461,6 +2591,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev,
+ struct pinctrl_desc *ctrldesc = &info->pctl;
+ struct pinctrl_pin_desc *pindesc, *pdesc;
+ struct rockchip_pin_bank *pin_bank;
++ struct device *dev = &pdev->dev;
+ int pin, bank, ret;
+ int k;
+
+@@ -2470,9 +2601,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev,
+ ctrldesc->pmxops = &rockchip_pmx_ops;
+ ctrldesc->confops = &rockchip_pinconf_ops;
+
+- pindesc = devm_kcalloc(&pdev->dev,
+- info->ctrl->nr_pins, sizeof(*pindesc),
+- GFP_KERNEL);
++ pindesc = devm_kcalloc(dev, info->ctrl->nr_pins, sizeof(*pindesc), GFP_KERNEL);
+ if (!pindesc)
+ return -ENOMEM;
+
+@@ -2489,7 +2618,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev,
+ pdesc++;
+ }
+
+- INIT_LIST_HEAD(&pin_bank->deferred_output);
++ INIT_LIST_HEAD(&pin_bank->deferred_pins);
+ mutex_init(&pin_bank->deferred_lock);
+ }
+
+@@ -2497,9 +2626,9 @@ static int rockchip_pinctrl_register(struct platform_device *pdev,
+ if (ret)
+ return ret;
+
+- info->pctl_dev = devm_pinctrl_register(&pdev->dev, ctrldesc, info);
++ info->pctl_dev = devm_pinctrl_register(dev, ctrldesc, info);
+ if (IS_ERR(info->pctl_dev)) {
+- dev_err(&pdev->dev, "could not register pinctrl driver\n");
++ dev_err(dev, "could not register pinctrl driver\n");
+ return PTR_ERR(info->pctl_dev);
+ }
+
+@@ -2513,8 +2642,9 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data(
+ struct rockchip_pinctrl *d,
+ struct platform_device *pdev)
+ {
++ struct device *dev = &pdev->dev;
++ struct device_node *node = dev->of_node;
+ const struct of_device_id *match;
+- struct device_node *node = pdev->dev.of_node;
+ struct rockchip_pin_ctrl *ctrl;
+ struct rockchip_pin_bank *bank;
+ int grf_offs, pmu_offs, drv_grf_offs, drv_pmu_offs, i, j;
+@@ -2566,7 +2696,7 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data(
+ drv_pmu_offs : drv_grf_offs;
+ }
+
+- dev_dbg(d->dev, "bank %d, iomux %d has iom_offset 0x%x drv_offset 0x%x\n",
++ dev_dbg(dev, "bank %d, iomux %d has iom_offset 0x%x drv_offset 0x%x\n",
+ i, j, iom->offset, drv->offset);
+
+ /*
+@@ -2675,8 +2805,8 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
+ {
+ struct rockchip_pinctrl *info;
+ struct device *dev = &pdev->dev;
++ struct device_node *np = dev->of_node, *node;
+ struct rockchip_pin_ctrl *ctrl;
+- struct device_node *np = pdev->dev.of_node, *node;
+ struct resource *res;
+ void __iomem *base;
+ int ret;
+@@ -2702,6 +2832,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
+ node = of_parse_phandle(np, "rockchip,grf", 0);
+ if (node) {
+ info->regmap_base = syscon_node_to_regmap(node);
++ of_node_put(node);
+ if (IS_ERR(info->regmap_base))
+ return PTR_ERR(info->regmap_base);
+ } else {
+@@ -2712,8 +2843,8 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
+
+ rockchip_regmap_config.max_register = resource_size(res) - 4;
+ rockchip_regmap_config.name = "rockchip,pinctrl";
+- info->regmap_base = devm_regmap_init_mmio(&pdev->dev, base,
+- &rockchip_regmap_config);
++ info->regmap_base =
++ devm_regmap_init_mmio(dev, base, &rockchip_regmap_config);
+
+ /* to check for the old dt-bindings */
+ info->reg_size = resource_size(res);
+@@ -2725,12 +2856,10 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+- rockchip_regmap_config.max_register =
+- resource_size(res) - 4;
++ rockchip_regmap_config.max_register = resource_size(res) - 4;
+ rockchip_regmap_config.name = "rockchip,pinctrl-pull";
+- info->regmap_pull = devm_regmap_init_mmio(&pdev->dev,
+- base,
+- &rockchip_regmap_config);
++ info->regmap_pull =
++ devm_regmap_init_mmio(dev, base, &rockchip_regmap_config);
+ }
+ }
+
+@@ -2738,6 +2867,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
+ node = of_parse_phandle(np, "rockchip,pmu", 0);
+ if (node) {
+ info->regmap_pmu = syscon_node_to_regmap(node);
++ of_node_put(node);
+ if (IS_ERR(info->regmap_pmu))
+ return PTR_ERR(info->regmap_pmu);
+ }
+@@ -2748,9 +2878,9 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
+
+ platform_set_drvdata(pdev, info);
+
+- ret = of_platform_populate(np, rockchip_bank_match, NULL, NULL);
++ ret = of_platform_populate(np, NULL, NULL, &pdev->dev);
+ if (ret) {
+- dev_err(&pdev->dev, "failed to register gpio device\n");
++ dev_err(dev, "failed to register gpio device\n");
+ return ret;
+ }
+
+@@ -2761,7 +2891,7 @@ static int rockchip_pinctrl_remove(struct platform_device *pdev)
+ {
+ struct rockchip_pinctrl *info = platform_get_drvdata(pdev);
+ struct rockchip_pin_bank *bank;
+- struct rockchip_pin_output_deferred *cfg;
++ struct rockchip_pin_deferred *cfg;
+ int i;
+
+ of_platform_depopulate(&pdev->dev);
+@@ -2770,9 +2900,9 @@ static int rockchip_pinctrl_remove(struct platform_device *pdev)
+ bank = &info->ctrl->pin_banks[i];
+
+ mutex_lock(&bank->deferred_lock);
+- while (!list_empty(&bank->deferred_output)) {
+- cfg = list_first_entry(&bank->deferred_output,
+- struct rockchip_pin_output_deferred, head);
++ while (!list_empty(&bank->deferred_pins)) {
++ cfg = list_first_entry(&bank->deferred_pins,
++ struct rockchip_pin_deferred, head);
+ list_del(&cfg->head);
+ kfree(cfg);
+ }
+diff --git a/drivers/pinctrl/pinctrl-rockchip.h b/drivers/pinctrl/pinctrl-rockchip.h
+index 91f10279d0844..59116e13758d0 100644
+--- a/drivers/pinctrl/pinctrl-rockchip.h
++++ b/drivers/pinctrl/pinctrl-rockchip.h
+@@ -171,7 +171,7 @@ struct rockchip_pin_bank {
+ u32 toggle_edge_mode;
+ u32 recalced_mask;
+ u32 route_mask;
+- struct list_head deferred_output;
++ struct list_head deferred_pins;
+ struct mutex deferred_lock;
+ };
+
+@@ -230,10 +230,10 @@ struct rockchip_pin_ctrl {
+ struct rockchip_mux_route_data *iomux_routes;
+ u32 niomux_routes;
+
+- void (*pull_calc_reg)(struct rockchip_pin_bank *bank,
++ int (*pull_calc_reg)(struct rockchip_pin_bank *bank,
+ int pin_num, struct regmap **regmap,
+ int *reg, u8 *bit);
+- void (*drv_calc_reg)(struct rockchip_pin_bank *bank,
++ int (*drv_calc_reg)(struct rockchip_pin_bank *bank,
+ int pin_num, struct regmap **regmap,
+ int *reg, u8 *bit);
+ int (*schmitt_calc_reg)(struct rockchip_pin_bank *bank,
+@@ -247,9 +247,12 @@ struct rockchip_pin_config {
+ unsigned int nconfigs;
+ };
+
+-struct rockchip_pin_output_deferred {
++enum pin_config_param;
++
++struct rockchip_pin_deferred {
+ struct list_head head;
+ unsigned int pin;
++ enum pin_config_param param;
+ u32 arg;
+ };
+
+diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
+index 67bec7ea0f8b0..9ad8f70206142 100644
+--- a/drivers/pinctrl/pinctrl-single.c
++++ b/drivers/pinctrl/pinctrl-single.c
+@@ -372,6 +372,8 @@ static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector,
+ if (!pcs->fmask)
+ return 0;
+ function = pinmux_generic_get_function(pctldev, fselector);
++ if (!function)
++ return -EINVAL;
+ func = function->data;
+ if (!func)
+ return -EINVAL;
+@@ -727,7 +729,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs)
+
+ mux_bytes = pcs->width / BITS_PER_BYTE;
+
+- if (pcs->bits_per_mux) {
++ if (pcs->bits_per_mux && pcs->fmask) {
+ pcs->bits_per_pin = fls(pcs->fmask);
+ nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin;
+ } else {
+diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
+index 5ff4207df66e1..f1b5176a5085b 100644
+--- a/drivers/pinctrl/qcom/Kconfig
++++ b/drivers/pinctrl/qcom/Kconfig
+@@ -189,6 +189,7 @@ config PINCTRL_QCOM_SPMI_PMIC
+ select PINMUX
+ select PINCONF
+ select GENERIC_PINCONF
++ select GPIOLIB
+ select GPIOLIB_IRQCHIP
+ select IRQ_DOMAIN_HIERARCHY
+ help
+@@ -203,6 +204,7 @@ config PINCTRL_QCOM_SSBI_PMIC
+ select PINMUX
+ select PINCONF
+ select GENERIC_PINCONF
++ select GPIOLIB
+ select GPIOLIB_IRQCHIP
+ select IRQ_DOMAIN_HIERARCHY
+ help
+diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
+index 2f19ab4db7208..dd1c9fd733c8f 100644
+--- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
++++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
+@@ -452,6 +452,15 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
+ }
+ }
+
++ /*
++ * As per Hardware Programming Guide, when configuring pin as output,
++ * set the pin value before setting output-enable (OE).
++ */
++ if (output_enabled) {
++ val = u32_encode_bits(value ? 1 : 0, LPI_GPIO_VALUE_OUT_MASK);
++ lpi_gpio_write(pctrl, group, LPI_GPIO_VALUE_REG, val);
++ }
++
+ val = lpi_gpio_read(pctrl, group, LPI_GPIO_CFG_REG);
+
+ u32p_replace_bits(&val, pullup, LPI_GPIO_PULL_MASK);
+@@ -461,11 +470,6 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
+
+ lpi_gpio_write(pctrl, group, LPI_GPIO_CFG_REG, val);
+
+- if (output_enabled) {
+- val = u32_encode_bits(value ? 1 : 0, LPI_GPIO_VALUE_OUT_MASK);
+- lpi_gpio_write(pctrl, group, LPI_GPIO_VALUE_REG, val);
+- }
+-
+ return 0;
+ }
+
+diff --git a/drivers/pinctrl/qcom/pinctrl-msm8916.c b/drivers/pinctrl/qcom/pinctrl-msm8916.c
+index 396db12ae9048..bf68913ba8212 100644
+--- a/drivers/pinctrl/qcom/pinctrl-msm8916.c
++++ b/drivers/pinctrl/qcom/pinctrl-msm8916.c
+@@ -844,8 +844,8 @@ static const struct msm_pingroup msm8916_groups[] = {
+ PINGROUP(28, pwr_modem_enabled_a, NA, NA, NA, NA, NA, qdss_tracedata_b, NA, atest_combodac),
+ PINGROUP(29, cci_i2c, NA, NA, NA, NA, NA, qdss_tracedata_b, NA, atest_combodac),
+ PINGROUP(30, cci_i2c, NA, NA, NA, NA, NA, NA, NA, qdss_tracedata_b),
+- PINGROUP(31, cci_timer0, NA, NA, NA, NA, NA, NA, NA, NA),
+- PINGROUP(32, cci_timer1, NA, NA, NA, NA, NA, NA, NA, NA),
++ PINGROUP(31, cci_timer0, flash_strobe, NA, NA, NA, NA, NA, NA, NA),
++ PINGROUP(32, cci_timer1, flash_strobe, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(33, cci_async, NA, NA, NA, NA, NA, NA, NA, qdss_tracedata_b),
+ PINGROUP(34, pwr_nav_enabled_a, NA, NA, NA, NA, NA, NA, NA, qdss_tracedata_b),
+ PINGROUP(35, pwr_crypto_enabled_a, NA, NA, NA, NA, NA, NA, NA, qdss_tracedata_b),
+diff --git a/drivers/pinctrl/qcom/pinctrl-msm8976.c b/drivers/pinctrl/qcom/pinctrl-msm8976.c
+index ec43edf9b660a..e11d845847190 100644
+--- a/drivers/pinctrl/qcom/pinctrl-msm8976.c
++++ b/drivers/pinctrl/qcom/pinctrl-msm8976.c
+@@ -733,7 +733,7 @@ static const char * const codec_int2_groups[] = {
+ "gpio74",
+ };
+ static const char * const wcss_bt_groups[] = {
+- "gpio39", "gpio47", "gpio88",
++ "gpio39", "gpio47", "gpio48",
+ };
+ static const char * const sdc3_groups[] = {
+ "gpio39", "gpio40", "gpio41",
+@@ -958,9 +958,9 @@ static const struct msm_pingroup msm8976_groups[] = {
+ PINGROUP(37, NA, NA, NA, qdss_tracedata_b, NA, NA, NA, NA, NA),
+ PINGROUP(38, NA, NA, NA, NA, NA, NA, NA, qdss_tracedata_b, NA),
+ PINGROUP(39, wcss_bt, sdc3, NA, qdss_tracedata_a, NA, NA, NA, NA, NA),
+- PINGROUP(40, wcss_wlan, sdc3, NA, qdss_tracedata_a, NA, NA, NA, NA, NA),
+- PINGROUP(41, wcss_wlan, sdc3, NA, qdss_tracedata_a, NA, NA, NA, NA, NA),
+- PINGROUP(42, wcss_wlan, sdc3, NA, qdss_tracedata_a, NA, NA, NA, NA, NA),
++ PINGROUP(40, wcss_wlan2, sdc3, NA, qdss_tracedata_a, NA, NA, NA, NA, NA),
++ PINGROUP(41, wcss_wlan1, sdc3, NA, qdss_tracedata_a, NA, NA, NA, NA, NA),
++ PINGROUP(42, wcss_wlan0, sdc3, NA, qdss_tracedata_a, NA, NA, NA, NA, NA),
+ PINGROUP(43, wcss_wlan, sdc3, NA, NA, qdss_tracedata_a, NA, NA, NA, NA),
+ PINGROUP(44, wcss_wlan, sdc3, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(45, wcss_fm, NA, qdss_tracectl_a, NA, NA, NA, NA, NA, NA),
+diff --git a/drivers/pinctrl/qcom/pinctrl-sc8180x.c b/drivers/pinctrl/qcom/pinctrl-sc8180x.c
+index 0d9654b4ab60b..a4725ff12da01 100644
+--- a/drivers/pinctrl/qcom/pinctrl-sc8180x.c
++++ b/drivers/pinctrl/qcom/pinctrl-sc8180x.c
+@@ -530,10 +530,10 @@ DECLARE_MSM_GPIO_PINS(187);
+ DECLARE_MSM_GPIO_PINS(188);
+ DECLARE_MSM_GPIO_PINS(189);
+
+-static const unsigned int sdc2_clk_pins[] = { 190 };
+-static const unsigned int sdc2_cmd_pins[] = { 191 };
+-static const unsigned int sdc2_data_pins[] = { 192 };
+-static const unsigned int ufs_reset_pins[] = { 193 };
++static const unsigned int ufs_reset_pins[] = { 190 };
++static const unsigned int sdc2_clk_pins[] = { 191 };
++static const unsigned int sdc2_cmd_pins[] = { 192 };
++static const unsigned int sdc2_data_pins[] = { 193 };
+
+ enum sc8180x_functions {
+ msm_mux_adsp_ext,
+@@ -1582,7 +1582,7 @@ static const int sc8180x_acpi_reserved_gpios[] = {
+ static const struct msm_gpio_wakeirq_map sc8180x_pdc_map[] = {
+ { 3, 31 }, { 5, 32 }, { 8, 33 }, { 9, 34 }, { 10, 100 }, { 12, 104 },
+ { 24, 37 }, { 26, 38 }, { 27, 41 }, { 28, 42 }, { 30, 39 }, { 36, 43 },
+- { 37, 43 }, { 38, 45 }, { 39, 118 }, { 39, 125 }, { 41, 47 },
++ { 37, 44 }, { 38, 45 }, { 39, 118 }, { 39, 125 }, { 41, 47 },
+ { 42, 48 }, { 46, 50 }, { 47, 49 }, { 48, 51 }, { 49, 53 }, { 50, 52 },
+ { 51, 116 }, { 51, 123 }, { 53, 54 }, { 54, 55 }, { 55, 56 },
+ { 56, 57 }, { 58, 58 }, { 60, 60 }, { 68, 62 }, { 70, 63 }, { 76, 86 },
+diff --git a/drivers/pinctrl/qcom/pinctrl-sdm845.c b/drivers/pinctrl/qcom/pinctrl-sdm845.c
+index c51793f6546f1..fdfd7b8f3a76d 100644
+--- a/drivers/pinctrl/qcom/pinctrl-sdm845.c
++++ b/drivers/pinctrl/qcom/pinctrl-sdm845.c
+@@ -1310,6 +1310,7 @@ static const struct msm_pinctrl_soc_data sdm845_pinctrl = {
+ .ngpios = 151,
+ .wakeirq_map = sdm845_pdc_map,
+ .nwakeirq_map = ARRAY_SIZE(sdm845_pdc_map),
++ .wakeirq_dual_edge_errata = true,
+ };
+
+ static const struct msm_pinctrl_soc_data sdm845_acpi_pinctrl = {
+diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c
+index af144e724bd9c..3bd7f9fedcc34 100644
+--- a/drivers/pinctrl/qcom/pinctrl-sm8250.c
++++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c
+@@ -1316,7 +1316,7 @@ static const struct msm_pingroup sm8250_groups[] = {
+ static const struct msm_gpio_wakeirq_map sm8250_pdc_map[] = {
+ { 0, 79 }, { 1, 84 }, { 2, 80 }, { 3, 82 }, { 4, 107 }, { 7, 43 },
+ { 11, 42 }, { 14, 44 }, { 15, 52 }, { 19, 67 }, { 23, 68 }, { 24, 105 },
+- { 27, 92 }, { 28, 106 }, { 31, 69 }, { 35, 70 }, { 39, 37 },
++ { 27, 92 }, { 28, 106 }, { 31, 69 }, { 35, 70 }, { 39, 73 },
+ { 40, 108 }, { 43, 71 }, { 45, 72 }, { 47, 83 }, { 51, 74 }, { 55, 77 },
+ { 59, 78 }, { 63, 75 }, { 64, 81 }, { 65, 87 }, { 66, 88 }, { 67, 89 },
+ { 68, 54 }, { 70, 85 }, { 77, 46 }, { 80, 90 }, { 81, 91 }, { 83, 97 },
+diff --git a/drivers/pinctrl/qcom/pinctrl-sm8350.c b/drivers/pinctrl/qcom/pinctrl-sm8350.c
+index 4d8f8636c2b39..1c042d39380c6 100644
+--- a/drivers/pinctrl/qcom/pinctrl-sm8350.c
++++ b/drivers/pinctrl/qcom/pinctrl-sm8350.c
+@@ -1597,10 +1597,10 @@ static const struct msm_pingroup sm8350_groups[] = {
+ [200] = PINGROUP(200, qdss_gpio, _, _, _, _, _, _, _, _),
+ [201] = PINGROUP(201, _, _, _, _, _, _, _, _, _),
+ [202] = PINGROUP(202, _, _, _, _, _, _, _, _, _),
+- [203] = UFS_RESET(ufs_reset, 0x1d8000),
+- [204] = SDC_PINGROUP(sdc2_clk, 0x1cf000, 14, 6),
+- [205] = SDC_PINGROUP(sdc2_cmd, 0x1cf000, 11, 3),
+- [206] = SDC_PINGROUP(sdc2_data, 0x1cf000, 9, 0),
++ [203] = UFS_RESET(ufs_reset, 0xd8000),
++ [204] = SDC_PINGROUP(sdc2_clk, 0xcf000, 14, 6),
++ [205] = SDC_PINGROUP(sdc2_cmd, 0xcf000, 11, 3),
++ [206] = SDC_PINGROUP(sdc2_data, 0xcf000, 9, 0),
+ };
+
+ static const struct msm_gpio_wakeirq_map sm8350_pdc_map[] = {
+diff --git a/drivers/pinctrl/ralink/Kconfig b/drivers/pinctrl/ralink/Kconfig
+index a76ee3deb8c31..d0f0a8f2b9b7d 100644
+--- a/drivers/pinctrl/ralink/Kconfig
++++ b/drivers/pinctrl/ralink/Kconfig
+@@ -3,37 +3,33 @@ menu "Ralink pinctrl drivers"
+ depends on RALINK
+
+ config PINCTRL_RALINK
+- bool "Ralink pin control support"
+- default y if RALINK
+-
+-config PINCTRL_RT2880
+- bool "RT2880 pinctrl driver for RALINK/Mediatek SOCs"
++ bool "Ralink pinctrl driver"
+ select PINMUX
+ select GENERIC_PINCONF
+
+ config PINCTRL_MT7620
+ bool "mt7620 pinctrl driver for RALINK/Mediatek SOCs"
+ depends on RALINK && SOC_MT7620
+- select PINCTRL_RT2880
++ select PINCTRL_RALINK
+
+ config PINCTRL_MT7621
+ bool "mt7621 pinctrl driver for RALINK/Mediatek SOCs"
+ depends on RALINK && SOC_MT7621
+- select PINCTRL_RT2880
++ select PINCTRL_RALINK
+
+ config PINCTRL_RT288X
+ bool "RT288X pinctrl driver for RALINK/Mediatek SOCs"
+ depends on RALINK && SOC_RT288X
+- select PINCTRL_RT2880
++ select PINCTRL_RALINK
+
+ config PINCTRL_RT305X
+ bool "RT305X pinctrl driver for RALINK/Mediatek SOCs"
+ depends on RALINK && SOC_RT305X
+- select PINCTRL_RT2880
++ select PINCTRL_RALINK
+
+ config PINCTRL_RT3883
+ bool "RT3883 pinctrl driver for RALINK/Mediatek SOCs"
+ depends on RALINK && SOC_RT3883
+- select PINCTRL_RT2880
++ select PINCTRL_RALINK
+
+ endmenu
+diff --git a/drivers/pinctrl/ralink/Makefile b/drivers/pinctrl/ralink/Makefile
+index a15610206ced4..2c1323b74e96f 100644
+--- a/drivers/pinctrl/ralink/Makefile
++++ b/drivers/pinctrl/ralink/Makefile
+@@ -1,5 +1,5 @@
+ # SPDX-License-Identifier: GPL-2.0
+-obj-$(CONFIG_PINCTRL_RT2880) += pinctrl-rt2880.o
++obj-$(CONFIG_PINCTRL_RALINK) += pinctrl-ralink.o
+
+ obj-$(CONFIG_PINCTRL_MT7620) += pinctrl-mt7620.o
+ obj-$(CONFIG_PINCTRL_MT7621) += pinctrl-mt7621.o
+diff --git a/drivers/pinctrl/ralink/pinctrl-mt7620.c b/drivers/pinctrl/ralink/pinctrl-mt7620.c
+index 425d55a2ee19f..51b863d85c51e 100644
+--- a/drivers/pinctrl/ralink/pinctrl-mt7620.c
++++ b/drivers/pinctrl/ralink/pinctrl-mt7620.c
+@@ -1,10 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+
++#include <asm/mach-ralink/ralink_regs.h>
+ #include <asm/mach-ralink/mt7620.h>
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/of.h>
+-#include "pinmux.h"
++#include "pinctrl-ralink.h"
+
+ #define MT7620_GPIO_MODE_UART0_SHIFT 2
+ #define MT7620_GPIO_MODE_UART0_MASK 0x7
+@@ -53,20 +54,20 @@
+ #define MT7620_GPIO_MODE_EPHY 15
+ #define MT7620_GPIO_MODE_PA 20
+
+-static struct rt2880_pmx_func i2c_grp[] = { FUNC("i2c", 0, 1, 2) };
+-static struct rt2880_pmx_func spi_grp[] = { FUNC("spi", 0, 3, 4) };
+-static struct rt2880_pmx_func uartlite_grp[] = { FUNC("uartlite", 0, 15, 2) };
+-static struct rt2880_pmx_func mdio_grp[] = {
++static struct ralink_pmx_func i2c_grp[] = { FUNC("i2c", 0, 1, 2) };
++static struct ralink_pmx_func spi_grp[] = { FUNC("spi", 0, 3, 4) };
++static struct ralink_pmx_func uartlite_grp[] = { FUNC("uartlite", 0, 15, 2) };
++static struct ralink_pmx_func mdio_grp[] = {
+ FUNC("mdio", MT7620_GPIO_MODE_MDIO, 22, 2),
+ FUNC("refclk", MT7620_GPIO_MODE_MDIO_REFCLK, 22, 2),
+ };
+-static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 24, 12) };
+-static struct rt2880_pmx_func refclk_grp[] = { FUNC("spi refclk", 0, 37, 3) };
+-static struct rt2880_pmx_func ephy_grp[] = { FUNC("ephy", 0, 40, 5) };
+-static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 60, 12) };
+-static struct rt2880_pmx_func wled_grp[] = { FUNC("wled", 0, 72, 1) };
+-static struct rt2880_pmx_func pa_grp[] = { FUNC("pa", 0, 18, 4) };
+-static struct rt2880_pmx_func uartf_grp[] = {
++static struct ralink_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 24, 12) };
++static struct ralink_pmx_func refclk_grp[] = { FUNC("spi refclk", 0, 37, 3) };
++static struct ralink_pmx_func ephy_grp[] = { FUNC("ephy", 0, 40, 5) };
++static struct ralink_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 60, 12) };
++static struct ralink_pmx_func wled_grp[] = { FUNC("wled", 0, 72, 1) };
++static struct ralink_pmx_func pa_grp[] = { FUNC("pa", 0, 18, 4) };
++static struct ralink_pmx_func uartf_grp[] = {
+ FUNC("uartf", MT7620_GPIO_MODE_UARTF, 7, 8),
+ FUNC("pcm uartf", MT7620_GPIO_MODE_PCM_UARTF, 7, 8),
+ FUNC("pcm i2s", MT7620_GPIO_MODE_PCM_I2S, 7, 8),
+@@ -75,20 +76,20 @@ static struct rt2880_pmx_func uartf_grp[] = {
+ FUNC("gpio uartf", MT7620_GPIO_MODE_GPIO_UARTF, 7, 4),
+ FUNC("gpio i2s", MT7620_GPIO_MODE_GPIO_I2S, 7, 4),
+ };
+-static struct rt2880_pmx_func wdt_grp[] = {
++static struct ralink_pmx_func wdt_grp[] = {
+ FUNC("wdt rst", 0, 17, 1),
+ FUNC("wdt refclk", 0, 17, 1),
+ };
+-static struct rt2880_pmx_func pcie_rst_grp[] = {
++static struct ralink_pmx_func pcie_rst_grp[] = {
+ FUNC("pcie rst", MT7620_GPIO_MODE_PCIE_RST, 36, 1),
+ FUNC("pcie refclk", MT7620_GPIO_MODE_PCIE_REF, 36, 1)
+ };
+-static struct rt2880_pmx_func nd_sd_grp[] = {
++static struct ralink_pmx_func nd_sd_grp[] = {
+ FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15),
+ FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13)
+ };
+
+-static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
++static struct ralink_pmx_group mt7620a_pinmux_data[] = {
+ GRP("i2c", i2c_grp, 1, MT7620_GPIO_MODE_I2C),
+ GRP("uartf", uartf_grp, MT7620_GPIO_MODE_UART0_MASK,
+ MT7620_GPIO_MODE_UART0_SHIFT),
+@@ -111,262 +112,262 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
+ { 0 }
+ };
+
+-static struct rt2880_pmx_func pwm1_grp_mt7628[] = {
++static struct ralink_pmx_func pwm1_grp_mt76x8[] = {
+ FUNC("sdxc d6", 3, 19, 1),
+ FUNC("utif", 2, 19, 1),
+ FUNC("gpio", 1, 19, 1),
+ FUNC("pwm1", 0, 19, 1),
+ };
+
+-static struct rt2880_pmx_func pwm0_grp_mt7628[] = {
++static struct ralink_pmx_func pwm0_grp_mt76x8[] = {
+ FUNC("sdxc d7", 3, 18, 1),
+ FUNC("utif", 2, 18, 1),
+ FUNC("gpio", 1, 18, 1),
+ FUNC("pwm0", 0, 18, 1),
+ };
+
+-static struct rt2880_pmx_func uart2_grp_mt7628[] = {
++static struct ralink_pmx_func uart2_grp_mt76x8[] = {
+ FUNC("sdxc d5 d4", 3, 20, 2),
+ FUNC("pwm", 2, 20, 2),
+ FUNC("gpio", 1, 20, 2),
+ FUNC("uart2", 0, 20, 2),
+ };
+
+-static struct rt2880_pmx_func uart1_grp_mt7628[] = {
++static struct ralink_pmx_func uart1_grp_mt76x8[] = {
+ FUNC("sw_r", 3, 45, 2),
+ FUNC("pwm", 2, 45, 2),
+ FUNC("gpio", 1, 45, 2),
+ FUNC("uart1", 0, 45, 2),
+ };
+
+-static struct rt2880_pmx_func i2c_grp_mt7628[] = {
++static struct ralink_pmx_func i2c_grp_mt76x8[] = {
+ FUNC("-", 3, 4, 2),
+ FUNC("debug", 2, 4, 2),
+ FUNC("gpio", 1, 4, 2),
+ FUNC("i2c", 0, 4, 2),
+ };
+
+-static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("refclk", 0, 37, 1) };
+-static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 36, 1) };
+-static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) };
+-static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) };
++static struct ralink_pmx_func refclk_grp_mt76x8[] = { FUNC("refclk", 0, 37, 1) };
++static struct ralink_pmx_func perst_grp_mt76x8[] = { FUNC("perst", 0, 36, 1) };
++static struct ralink_pmx_func wdt_grp_mt76x8[] = { FUNC("wdt", 0, 38, 1) };
++static struct ralink_pmx_func spi_grp_mt76x8[] = { FUNC("spi", 0, 7, 4) };
+
+-static struct rt2880_pmx_func sd_mode_grp_mt7628[] = {
++static struct ralink_pmx_func sd_mode_grp_mt76x8[] = {
+ FUNC("jtag", 3, 22, 8),
+ FUNC("utif", 2, 22, 8),
+ FUNC("gpio", 1, 22, 8),
+ FUNC("sdxc", 0, 22, 8),
+ };
+
+-static struct rt2880_pmx_func uart0_grp_mt7628[] = {
++static struct ralink_pmx_func uart0_grp_mt76x8[] = {
+ FUNC("-", 3, 12, 2),
+ FUNC("-", 2, 12, 2),
+ FUNC("gpio", 1, 12, 2),
+ FUNC("uart0", 0, 12, 2),
+ };
+
+-static struct rt2880_pmx_func i2s_grp_mt7628[] = {
++static struct ralink_pmx_func i2s_grp_mt76x8[] = {
+ FUNC("antenna", 3, 0, 4),
+ FUNC("pcm", 2, 0, 4),
+ FUNC("gpio", 1, 0, 4),
+ FUNC("i2s", 0, 0, 4),
+ };
+
+-static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = {
++static struct ralink_pmx_func spi_cs1_grp_mt76x8[] = {
+ FUNC("-", 3, 6, 1),
+ FUNC("refclk", 2, 6, 1),
+ FUNC("gpio", 1, 6, 1),
+ FUNC("spi cs1", 0, 6, 1),
+ };
+
+-static struct rt2880_pmx_func spis_grp_mt7628[] = {
++static struct ralink_pmx_func spis_grp_mt76x8[] = {
+ FUNC("pwm_uart2", 3, 14, 4),
+ FUNC("utif", 2, 14, 4),
+ FUNC("gpio", 1, 14, 4),
+ FUNC("spis", 0, 14, 4),
+ };
+
+-static struct rt2880_pmx_func gpio_grp_mt7628[] = {
++static struct ralink_pmx_func gpio_grp_mt76x8[] = {
+ FUNC("pcie", 3, 11, 1),
+ FUNC("refclk", 2, 11, 1),
+ FUNC("gpio", 1, 11, 1),
+ FUNC("gpio", 0, 11, 1),
+ };
+
+-static struct rt2880_pmx_func p4led_kn_grp_mt7628[] = {
++static struct ralink_pmx_func p4led_kn_grp_mt76x8[] = {
+ FUNC("jtag", 3, 30, 1),
+ FUNC("utif", 2, 30, 1),
+ FUNC("gpio", 1, 30, 1),
+ FUNC("p4led_kn", 0, 30, 1),
+ };
+
+-static struct rt2880_pmx_func p3led_kn_grp_mt7628[] = {
++static struct ralink_pmx_func p3led_kn_grp_mt76x8[] = {
+ FUNC("jtag", 3, 31, 1),
+ FUNC("utif", 2, 31, 1),
+ FUNC("gpio", 1, 31, 1),
+ FUNC("p3led_kn", 0, 31, 1),
+ };
+
+-static struct rt2880_pmx_func p2led_kn_grp_mt7628[] = {
++static struct ralink_pmx_func p2led_kn_grp_mt76x8[] = {
+ FUNC("jtag", 3, 32, 1),
+ FUNC("utif", 2, 32, 1),
+ FUNC("gpio", 1, 32, 1),
+ FUNC("p2led_kn", 0, 32, 1),
+ };
+
+-static struct rt2880_pmx_func p1led_kn_grp_mt7628[] = {
++static struct ralink_pmx_func p1led_kn_grp_mt76x8[] = {
+ FUNC("jtag", 3, 33, 1),
+ FUNC("utif", 2, 33, 1),
+ FUNC("gpio", 1, 33, 1),
+ FUNC("p1led_kn", 0, 33, 1),
+ };
+
+-static struct rt2880_pmx_func p0led_kn_grp_mt7628[] = {
++static struct ralink_pmx_func p0led_kn_grp_mt76x8[] = {
+ FUNC("jtag", 3, 34, 1),
+ FUNC("rsvd", 2, 34, 1),
+ FUNC("gpio", 1, 34, 1),
+ FUNC("p0led_kn", 0, 34, 1),
+ };
+
+-static struct rt2880_pmx_func wled_kn_grp_mt7628[] = {
++static struct ralink_pmx_func wled_kn_grp_mt76x8[] = {
+ FUNC("rsvd", 3, 35, 1),
+ FUNC("rsvd", 2, 35, 1),
+ FUNC("gpio", 1, 35, 1),
+ FUNC("wled_kn", 0, 35, 1),
+ };
+
+-static struct rt2880_pmx_func p4led_an_grp_mt7628[] = {
++static struct ralink_pmx_func p4led_an_grp_mt76x8[] = {
+ FUNC("jtag", 3, 39, 1),
+ FUNC("utif", 2, 39, 1),
+ FUNC("gpio", 1, 39, 1),
+ FUNC("p4led_an", 0, 39, 1),
+ };
+
+-static struct rt2880_pmx_func p3led_an_grp_mt7628[] = {
++static struct ralink_pmx_func p3led_an_grp_mt76x8[] = {
+ FUNC("jtag", 3, 40, 1),
+ FUNC("utif", 2, 40, 1),
+ FUNC("gpio", 1, 40, 1),
+ FUNC("p3led_an", 0, 40, 1),
+ };
+
+-static struct rt2880_pmx_func p2led_an_grp_mt7628[] = {
++static struct ralink_pmx_func p2led_an_grp_mt76x8[] = {
+ FUNC("jtag", 3, 41, 1),
+ FUNC("utif", 2, 41, 1),
+ FUNC("gpio", 1, 41, 1),
+ FUNC("p2led_an", 0, 41, 1),
+ };
+
+-static struct rt2880_pmx_func p1led_an_grp_mt7628[] = {
++static struct ralink_pmx_func p1led_an_grp_mt76x8[] = {
+ FUNC("jtag", 3, 42, 1),
+ FUNC("utif", 2, 42, 1),
+ FUNC("gpio", 1, 42, 1),
+ FUNC("p1led_an", 0, 42, 1),
+ };
+
+-static struct rt2880_pmx_func p0led_an_grp_mt7628[] = {
++static struct ralink_pmx_func p0led_an_grp_mt76x8[] = {
+ FUNC("jtag", 3, 43, 1),
+ FUNC("rsvd", 2, 43, 1),
+ FUNC("gpio", 1, 43, 1),
+ FUNC("p0led_an", 0, 43, 1),
+ };
+
+-static struct rt2880_pmx_func wled_an_grp_mt7628[] = {
++static struct ralink_pmx_func wled_an_grp_mt76x8[] = {
+ FUNC("rsvd", 3, 44, 1),
+ FUNC("rsvd", 2, 44, 1),
+ FUNC("gpio", 1, 44, 1),
+ FUNC("wled_an", 0, 44, 1),
+ };
+
+-#define MT7628_GPIO_MODE_MASK 0x3
+-
+-#define MT7628_GPIO_MODE_P4LED_KN 58
+-#define MT7628_GPIO_MODE_P3LED_KN 56
+-#define MT7628_GPIO_MODE_P2LED_KN 54
+-#define MT7628_GPIO_MODE_P1LED_KN 52
+-#define MT7628_GPIO_MODE_P0LED_KN 50
+-#define MT7628_GPIO_MODE_WLED_KN 48
+-#define MT7628_GPIO_MODE_P4LED_AN 42
+-#define MT7628_GPIO_MODE_P3LED_AN 40
+-#define MT7628_GPIO_MODE_P2LED_AN 38
+-#define MT7628_GPIO_MODE_P1LED_AN 36
+-#define MT7628_GPIO_MODE_P0LED_AN 34
+-#define MT7628_GPIO_MODE_WLED_AN 32
+-#define MT7628_GPIO_MODE_PWM1 30
+-#define MT7628_GPIO_MODE_PWM0 28
+-#define MT7628_GPIO_MODE_UART2 26
+-#define MT7628_GPIO_MODE_UART1 24
+-#define MT7628_GPIO_MODE_I2C 20
+-#define MT7628_GPIO_MODE_REFCLK 18
+-#define MT7628_GPIO_MODE_PERST 16
+-#define MT7628_GPIO_MODE_WDT 14
+-#define MT7628_GPIO_MODE_SPI 12
+-#define MT7628_GPIO_MODE_SDMODE 10
+-#define MT7628_GPIO_MODE_UART0 8
+-#define MT7628_GPIO_MODE_I2S 6
+-#define MT7628_GPIO_MODE_CS1 4
+-#define MT7628_GPIO_MODE_SPIS 2
+-#define MT7628_GPIO_MODE_GPIO 0
+-
+-static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
+- GRP_G("pwm1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_PWM1),
+- GRP_G("pwm0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_PWM0),
+- GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_UART2),
+- GRP_G("uart1", uart1_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_UART1),
+- GRP_G("i2c", i2c_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_I2C),
+- GRP("refclk", refclk_grp_mt7628, 1, MT7628_GPIO_MODE_REFCLK),
+- GRP("perst", perst_grp_mt7628, 1, MT7628_GPIO_MODE_PERST),
+- GRP("wdt", wdt_grp_mt7628, 1, MT7628_GPIO_MODE_WDT),
+- GRP("spi", spi_grp_mt7628, 1, MT7628_GPIO_MODE_SPI),
+- GRP_G("sdmode", sd_mode_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_SDMODE),
+- GRP_G("uart0", uart0_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_UART0),
+- GRP_G("i2s", i2s_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_I2S),
+- GRP_G("spi cs1", spi_cs1_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_CS1),
+- GRP_G("spis", spis_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_SPIS),
+- GRP_G("gpio", gpio_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_GPIO),
+- GRP_G("wled_an", wled_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_WLED_AN),
+- GRP_G("p0led_an", p0led_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P0LED_AN),
+- GRP_G("p1led_an", p1led_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P1LED_AN),
+- GRP_G("p2led_an", p2led_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P2LED_AN),
+- GRP_G("p3led_an", p3led_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P3LED_AN),
+- GRP_G("p4led_an", p4led_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P4LED_AN),
+- GRP_G("wled_kn", wled_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_WLED_KN),
+- GRP_G("p0led_kn", p0led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P0LED_KN),
+- GRP_G("p1led_kn", p1led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P1LED_KN),
+- GRP_G("p2led_kn", p2led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P2LED_KN),
+- GRP_G("p3led_kn", p3led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P3LED_KN),
+- GRP_G("p4led_kn", p4led_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+- 1, MT7628_GPIO_MODE_P4LED_KN),
++#define MT76X8_GPIO_MODE_MASK 0x3
++
++#define MT76X8_GPIO_MODE_P4LED_KN 58
++#define MT76X8_GPIO_MODE_P3LED_KN 56
++#define MT76X8_GPIO_MODE_P2LED_KN 54
++#define MT76X8_GPIO_MODE_P1LED_KN 52
++#define MT76X8_GPIO_MODE_P0LED_KN 50
++#define MT76X8_GPIO_MODE_WLED_KN 48
++#define MT76X8_GPIO_MODE_P4LED_AN 42
++#define MT76X8_GPIO_MODE_P3LED_AN 40
++#define MT76X8_GPIO_MODE_P2LED_AN 38
++#define MT76X8_GPIO_MODE_P1LED_AN 36
++#define MT76X8_GPIO_MODE_P0LED_AN 34
++#define MT76X8_GPIO_MODE_WLED_AN 32
++#define MT76X8_GPIO_MODE_PWM1 30
++#define MT76X8_GPIO_MODE_PWM0 28
++#define MT76X8_GPIO_MODE_UART2 26
++#define MT76X8_GPIO_MODE_UART1 24
++#define MT76X8_GPIO_MODE_I2C 20
++#define MT76X8_GPIO_MODE_REFCLK 18
++#define MT76X8_GPIO_MODE_PERST 16
++#define MT76X8_GPIO_MODE_WDT 14
++#define MT76X8_GPIO_MODE_SPI 12
++#define MT76X8_GPIO_MODE_SDMODE 10
++#define MT76X8_GPIO_MODE_UART0 8
++#define MT76X8_GPIO_MODE_I2S 6
++#define MT76X8_GPIO_MODE_CS1 4
++#define MT76X8_GPIO_MODE_SPIS 2
++#define MT76X8_GPIO_MODE_GPIO 0
++
++static struct ralink_pmx_group mt76x8_pinmux_data[] = {
++ GRP_G("pwm1", pwm1_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_PWM1),
++ GRP_G("pwm0", pwm0_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_PWM0),
++ GRP_G("uart2", uart2_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_UART2),
++ GRP_G("uart1", uart1_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_UART1),
++ GRP_G("i2c", i2c_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_I2C),
++ GRP("refclk", refclk_grp_mt76x8, 1, MT76X8_GPIO_MODE_REFCLK),
++ GRP("perst", perst_grp_mt76x8, 1, MT76X8_GPIO_MODE_PERST),
++ GRP("wdt", wdt_grp_mt76x8, 1, MT76X8_GPIO_MODE_WDT),
++ GRP("spi", spi_grp_mt76x8, 1, MT76X8_GPIO_MODE_SPI),
++ GRP_G("sdmode", sd_mode_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_SDMODE),
++ GRP_G("uart0", uart0_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_UART0),
++ GRP_G("i2s", i2s_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_I2S),
++ GRP_G("spi cs1", spi_cs1_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_CS1),
++ GRP_G("spis", spis_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_SPIS),
++ GRP_G("gpio", gpio_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_GPIO),
++ GRP_G("wled_an", wled_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_WLED_AN),
++ GRP_G("p0led_an", p0led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P0LED_AN),
++ GRP_G("p1led_an", p1led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P1LED_AN),
++ GRP_G("p2led_an", p2led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P2LED_AN),
++ GRP_G("p3led_an", p3led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P3LED_AN),
++ GRP_G("p4led_an", p4led_an_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P4LED_AN),
++ GRP_G("wled_kn", wled_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_WLED_KN),
++ GRP_G("p0led_kn", p0led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P0LED_KN),
++ GRP_G("p1led_kn", p1led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P1LED_KN),
++ GRP_G("p2led_kn", p2led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P2LED_KN),
++ GRP_G("p3led_kn", p3led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P3LED_KN),
++ GRP_G("p4led_kn", p4led_kn_grp_mt76x8, MT76X8_GPIO_MODE_MASK,
++ 1, MT76X8_GPIO_MODE_P4LED_KN),
+ { 0 }
+ };
+
+ static int mt7620_pinmux_probe(struct platform_device *pdev)
+ {
+ if (is_mt76x8())
+- return rt2880_pinmux_init(pdev, mt7628an_pinmux_data);
++ return ralink_pinmux_init(pdev, mt76x8_pinmux_data);
+ else
+- return rt2880_pinmux_init(pdev, mt7620a_pinmux_data);
++ return ralink_pinmux_init(pdev, mt7620a_pinmux_data);
+ }
+
+ static const struct of_device_id mt7620_pinmux_match[] = {
+diff --git a/drivers/pinctrl/ralink/pinctrl-mt7621.c b/drivers/pinctrl/ralink/pinctrl-mt7621.c
+index 7d96144c474e7..14b89cb43d4cb 100644
+--- a/drivers/pinctrl/ralink/pinctrl-mt7621.c
++++ b/drivers/pinctrl/ralink/pinctrl-mt7621.c
+@@ -3,7 +3,7 @@
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/of.h>
+-#include "pinmux.h"
++#include "pinctrl-ralink.h"
+
+ #define MT7621_GPIO_MODE_UART1 1
+ #define MT7621_GPIO_MODE_I2C 2
+@@ -34,40 +34,40 @@
+ #define MT7621_GPIO_MODE_SDHCI_SHIFT 18
+ #define MT7621_GPIO_MODE_SDHCI_GPIO 1
+
+-static struct rt2880_pmx_func uart1_grp[] = { FUNC("uart1", 0, 1, 2) };
+-static struct rt2880_pmx_func i2c_grp[] = { FUNC("i2c", 0, 3, 2) };
+-static struct rt2880_pmx_func uart3_grp[] = {
++static struct ralink_pmx_func uart1_grp[] = { FUNC("uart1", 0, 1, 2) };
++static struct ralink_pmx_func i2c_grp[] = { FUNC("i2c", 0, 3, 2) };
++static struct ralink_pmx_func uart3_grp[] = {
+ FUNC("uart3", 0, 5, 4),
+ FUNC("i2s", 2, 5, 4),
+ FUNC("spdif3", 3, 5, 4),
+ };
+-static struct rt2880_pmx_func uart2_grp[] = {
++static struct ralink_pmx_func uart2_grp[] = {
+ FUNC("uart2", 0, 9, 4),
+ FUNC("pcm", 2, 9, 4),
+ FUNC("spdif2", 3, 9, 4),
+ };
+-static struct rt2880_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) };
+-static struct rt2880_pmx_func wdt_grp[] = {
++static struct ralink_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) };
++static struct ralink_pmx_func wdt_grp[] = {
+ FUNC("wdt rst", 0, 18, 1),
+ FUNC("wdt refclk", 2, 18, 1),
+ };
+-static struct rt2880_pmx_func pcie_rst_grp[] = {
++static struct ralink_pmx_func pcie_rst_grp[] = {
+ FUNC("pcie rst", MT7621_GPIO_MODE_PCIE_RST, 19, 1),
+ FUNC("pcie refclk", MT7621_GPIO_MODE_PCIE_REF, 19, 1)
+ };
+-static struct rt2880_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) };
+-static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) };
+-static struct rt2880_pmx_func spi_grp[] = {
++static struct ralink_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) };
++static struct ralink_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) };
++static struct ralink_pmx_func spi_grp[] = {
+ FUNC("spi", 0, 34, 7),
+ FUNC("nand1", 2, 34, 7),
+ };
+-static struct rt2880_pmx_func sdhci_grp[] = {
++static struct ralink_pmx_func sdhci_grp[] = {
+ FUNC("sdhci", 0, 41, 8),
+ FUNC("nand2", 2, 41, 8),
+ };
+-static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) };
++static struct ralink_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) };
+
+-static struct rt2880_pmx_group mt7621_pinmux_data[] = {
++static struct ralink_pmx_group mt7621_pinmux_data[] = {
+ GRP("uart1", uart1_grp, 1, MT7621_GPIO_MODE_UART1),
+ GRP("i2c", i2c_grp, 1, MT7621_GPIO_MODE_I2C),
+ GRP_G("uart3", uart3_grp, MT7621_GPIO_MODE_UART3_MASK,
+@@ -92,7 +92,7 @@ static struct rt2880_pmx_group mt7621_pinmux_data[] = {
+
+ static int mt7621_pinmux_probe(struct platform_device *pdev)
+ {
+- return rt2880_pinmux_init(pdev, mt7621_pinmux_data);
++ return ralink_pinmux_init(pdev, mt7621_pinmux_data);
+ }
+
+ static const struct of_device_id mt7621_pinmux_match[] = {
+diff --git a/drivers/pinctrl/ralink/pinctrl-ralink.c b/drivers/pinctrl/ralink/pinctrl-ralink.c
+new file mode 100644
+index 0000000000000..3a8268a43d74a
+--- /dev/null
++++ b/drivers/pinctrl/ralink/pinctrl-ralink.c
+@@ -0,0 +1,351 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
++ */
++
++#include <linux/module.h>
++#include <linux/device.h>
++#include <linux/io.h>
++#include <linux/platform_device.h>
++#include <linux/slab.h>
++#include <linux/of.h>
++#include <linux/pinctrl/pinctrl.h>
++#include <linux/pinctrl/pinconf.h>
++#include <linux/pinctrl/pinconf-generic.h>
++#include <linux/pinctrl/pinmux.h>
++#include <linux/pinctrl/consumer.h>
++#include <linux/pinctrl/machine.h>
++
++#include <asm/mach-ralink/ralink_regs.h>
++#include <asm/mach-ralink/mt7620.h>
++
++#include "pinctrl-ralink.h"
++#include "../core.h"
++#include "../pinctrl-utils.h"
++
++#define SYSC_REG_GPIO_MODE 0x60
++#define SYSC_REG_GPIO_MODE2 0x64
++
++struct ralink_priv {
++ struct device *dev;
++
++ struct pinctrl_pin_desc *pads;
++ struct pinctrl_desc *desc;
++
++ struct ralink_pmx_func **func;
++ int func_count;
++
++ struct ralink_pmx_group *groups;
++ const char **group_names;
++ int group_count;
++
++ u8 *gpio;
++ int max_pins;
++};
++
++static int ralink_get_group_count(struct pinctrl_dev *pctrldev)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++
++ return p->group_count;
++}
++
++static const char *ralink_get_group_name(struct pinctrl_dev *pctrldev,
++ unsigned int group)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++
++ return (group >= p->group_count) ? NULL : p->group_names[group];
++}
++
++static int ralink_get_group_pins(struct pinctrl_dev *pctrldev,
++ unsigned int group,
++ const unsigned int **pins,
++ unsigned int *num_pins)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++
++ if (group >= p->group_count)
++ return -EINVAL;
++
++ *pins = p->groups[group].func[0].pins;
++ *num_pins = p->groups[group].func[0].pin_count;
++
++ return 0;
++}
++
++static const struct pinctrl_ops ralink_pctrl_ops = {
++ .get_groups_count = ralink_get_group_count,
++ .get_group_name = ralink_get_group_name,
++ .get_group_pins = ralink_get_group_pins,
++ .dt_node_to_map = pinconf_generic_dt_node_to_map_all,
++ .dt_free_map = pinconf_generic_dt_free_map,
++};
++
++static int ralink_pmx_func_count(struct pinctrl_dev *pctrldev)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++
++ return p->func_count;
++}
++
++static const char *ralink_pmx_func_name(struct pinctrl_dev *pctrldev,
++ unsigned int func)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++
++ return p->func[func]->name;
++}
++
++static int ralink_pmx_group_get_groups(struct pinctrl_dev *pctrldev,
++ unsigned int func,
++ const char * const **groups,
++ unsigned int * const num_groups)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++
++ if (p->func[func]->group_count == 1)
++ *groups = &p->group_names[p->func[func]->groups[0]];
++ else
++ *groups = p->group_names;
++
++ *num_groups = p->func[func]->group_count;
++
++ return 0;
++}
++
++static int ralink_pmx_group_enable(struct pinctrl_dev *pctrldev,
++ unsigned int func, unsigned int group)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++ u32 mode = 0;
++ u32 reg = SYSC_REG_GPIO_MODE;
++ int i;
++ int shift;
++
++ /* dont allow double use */
++ if (p->groups[group].enabled) {
++ dev_err(p->dev, "%s is already enabled\n",
++ p->groups[group].name);
++ return 0;
++ }
++
++ p->groups[group].enabled = 1;
++ p->func[func]->enabled = 1;
++
++ shift = p->groups[group].shift;
++ if (shift >= 32) {
++ shift -= 32;
++ reg = SYSC_REG_GPIO_MODE2;
++ }
++ mode = rt_sysc_r32(reg);
++ mode &= ~(p->groups[group].mask << shift);
++
++ /* mark the pins as gpio */
++ for (i = 0; i < p->groups[group].func[0].pin_count; i++)
++ p->gpio[p->groups[group].func[0].pins[i]] = 1;
++
++ /* function 0 is gpio and needs special handling */
++ if (func == 0) {
++ mode |= p->groups[group].gpio << shift;
++ } else {
++ for (i = 0; i < p->func[func]->pin_count; i++)
++ p->gpio[p->func[func]->pins[i]] = 0;
++ mode |= p->func[func]->value << shift;
++ }
++ rt_sysc_w32(mode, reg);
++
++ return 0;
++}
++
++static int ralink_pmx_group_gpio_request_enable(struct pinctrl_dev *pctrldev,
++ struct pinctrl_gpio_range *range,
++ unsigned int pin)
++{
++ struct ralink_priv *p = pinctrl_dev_get_drvdata(pctrldev);
++
++ if (!p->gpio[pin]) {
++ dev_err(p->dev, "pin %d is not set to gpio mux\n", pin);
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static const struct pinmux_ops ralink_pmx_group_ops = {
++ .get_functions_count = ralink_pmx_func_count,
++ .get_function_name = ralink_pmx_func_name,
++ .get_function_groups = ralink_pmx_group_get_groups,
++ .set_mux = ralink_pmx_group_enable,
++ .gpio_request_enable = ralink_pmx_group_gpio_request_enable,
++};
++
++static struct pinctrl_desc ralink_pctrl_desc = {
++ .owner = THIS_MODULE,
++ .name = "ralink-pinmux",
++ .pctlops = &ralink_pctrl_ops,
++ .pmxops = &ralink_pmx_group_ops,
++};
++
++static struct ralink_pmx_func gpio_func = {
++ .name = "gpio",
++};
++
++static int ralink_pinmux_index(struct ralink_priv *p)
++{
++ struct ralink_pmx_group *mux = p->groups;
++ int i, j, c = 0;
++
++ /* count the mux functions */
++ while (mux->name) {
++ p->group_count++;
++ mux++;
++ }
++
++ /* allocate the group names array needed by the gpio function */
++ p->group_names = devm_kcalloc(p->dev, p->group_count,
++ sizeof(char *), GFP_KERNEL);
++ if (!p->group_names)
++ return -ENOMEM;
++
++ for (i = 0; i < p->group_count; i++) {
++ p->group_names[i] = p->groups[i].name;
++ p->func_count += p->groups[i].func_count;
++ }
++
++ /* we have a dummy function[0] for gpio */
++ p->func_count++;
++
++ /* allocate our function and group mapping index buffers */
++ p->func = devm_kcalloc(p->dev, p->func_count,
++ sizeof(*p->func), GFP_KERNEL);
++ gpio_func.groups = devm_kcalloc(p->dev, p->group_count, sizeof(int),
++ GFP_KERNEL);
++ if (!p->func || !gpio_func.groups)
++ return -ENOMEM;
++
++ /* add a backpointer to the function so it knows its group */
++ gpio_func.group_count = p->group_count;
++ for (i = 0; i < gpio_func.group_count; i++)
++ gpio_func.groups[i] = i;
++
++ p->func[c] = &gpio_func;
++ c++;
++
++ /* add remaining functions */
++ for (i = 0; i < p->group_count; i++) {
++ for (j = 0; j < p->groups[i].func_count; j++) {
++ p->func[c] = &p->groups[i].func[j];
++ p->func[c]->groups = devm_kzalloc(p->dev, sizeof(int),
++ GFP_KERNEL);
++ if (!p->func[c]->groups)
++ return -ENOMEM;
++ p->func[c]->groups[0] = i;
++ p->func[c]->group_count = 1;
++ c++;
++ }
++ }
++ return 0;
++}
++
++static int ralink_pinmux_pins(struct ralink_priv *p)
++{
++ int i, j;
++
++ /*
++ * loop over the functions and initialize the pins array.
++ * also work out the highest pin used.
++ */
++ for (i = 0; i < p->func_count; i++) {
++ int pin;
++
++ if (!p->func[i]->pin_count)
++ continue;
++
++ p->func[i]->pins = devm_kcalloc(p->dev,
++ p->func[i]->pin_count,
++ sizeof(int),
++ GFP_KERNEL);
++ if (!p->func[i]->pins)
++ return -ENOMEM;
++ for (j = 0; j < p->func[i]->pin_count; j++)
++ p->func[i]->pins[j] = p->func[i]->pin_first + j;
++
++ pin = p->func[i]->pin_first + p->func[i]->pin_count;
++ if (pin > p->max_pins)
++ p->max_pins = pin;
++ }
++
++ /* the buffer that tells us which pins are gpio */
++ p->gpio = devm_kcalloc(p->dev, p->max_pins, sizeof(u8), GFP_KERNEL);
++ /* the pads needed to tell pinctrl about our pins */
++ p->pads = devm_kcalloc(p->dev, p->max_pins,
++ sizeof(struct pinctrl_pin_desc), GFP_KERNEL);
++ if (!p->pads || !p->gpio)
++ return -ENOMEM;
++
++ memset(p->gpio, 1, sizeof(u8) * p->max_pins);
++ for (i = 0; i < p->func_count; i++) {
++ if (!p->func[i]->pin_count)
++ continue;
++
++ for (j = 0; j < p->func[i]->pin_count; j++)
++ p->gpio[p->func[i]->pins[j]] = 0;
++ }
++
++ /* pin 0 is always a gpio */
++ p->gpio[0] = 1;
++
++ /* set the pads */
++ for (i = 0; i < p->max_pins; i++) {
++ /* strlen("ioXY") + 1 = 5 */
++ char *name = devm_kzalloc(p->dev, 5, GFP_KERNEL);
++
++ if (!name)
++ return -ENOMEM;
++ snprintf(name, 5, "io%d", i);
++ p->pads[i].number = i;
++ p->pads[i].name = name;
++ }
++ p->desc->pins = p->pads;
++ p->desc->npins = p->max_pins;
++
++ return 0;
++}
++
++int ralink_pinmux_init(struct platform_device *pdev,
++ struct ralink_pmx_group *data)
++{
++ struct ralink_priv *p;
++ struct pinctrl_dev *dev;
++ int err;
++
++ if (!data)
++ return -ENOTSUPP;
++
++ /* setup the private data */
++ p = devm_kzalloc(&pdev->dev, sizeof(struct ralink_priv), GFP_KERNEL);
++ if (!p)
++ return -ENOMEM;
++
++ p->dev = &pdev->dev;
++ p->desc = &ralink_pctrl_desc;
++ p->groups = data;
++ platform_set_drvdata(pdev, p);
++
++ /* init the device */
++ err = ralink_pinmux_index(p);
++ if (err) {
++ dev_err(&pdev->dev, "failed to load index\n");
++ return err;
++ }
++
++ err = ralink_pinmux_pins(p);
++ if (err) {
++ dev_err(&pdev->dev, "failed to load pins\n");
++ return err;
++ }
++ dev = pinctrl_register(p->desc, &pdev->dev, p);
++
++ return PTR_ERR_OR_ZERO(dev);
++}
+diff --git a/drivers/pinctrl/ralink/pinctrl-ralink.h b/drivers/pinctrl/ralink/pinctrl-ralink.h
+new file mode 100644
+index 0000000000000..1349694095852
+--- /dev/null
++++ b/drivers/pinctrl/ralink/pinctrl-ralink.h
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (C) 2012 John Crispin <john@phrozen.org>
++ */
++
++#ifndef _PINCTRL_RALINK_H__
++#define _PINCTRL_RALINK_H__
++
++#define FUNC(name, value, pin_first, pin_count) \
++ { name, value, pin_first, pin_count }
++
++#define GRP(_name, _func, _mask, _shift) \
++ { .name = _name, .mask = _mask, .shift = _shift, \
++ .func = _func, .gpio = _mask, \
++ .func_count = ARRAY_SIZE(_func) }
++
++#define GRP_G(_name, _func, _mask, _gpio, _shift) \
++ { .name = _name, .mask = _mask, .shift = _shift, \
++ .func = _func, .gpio = _gpio, \
++ .func_count = ARRAY_SIZE(_func) }
++
++struct ralink_pmx_group;
++
++struct ralink_pmx_func {
++ const char *name;
++ const char value;
++
++ int pin_first;
++ int pin_count;
++ int *pins;
++
++ int *groups;
++ int group_count;
++
++ int enabled;
++};
++
++struct ralink_pmx_group {
++ const char *name;
++ int enabled;
++
++ const u32 shift;
++ const char mask;
++ const char gpio;
++
++ struct ralink_pmx_func *func;
++ int func_count;
++};
++
++int ralink_pinmux_init(struct platform_device *pdev,
++ struct ralink_pmx_group *data);
++
++#endif
+diff --git a/drivers/pinctrl/ralink/pinctrl-rt2880.c b/drivers/pinctrl/ralink/pinctrl-rt2880.c
+deleted file mode 100644
+index 96fc06d1b8b92..0000000000000
+--- a/drivers/pinctrl/ralink/pinctrl-rt2880.c
++++ /dev/null
+@@ -1,349 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+- */
+-
+-#include <linux/module.h>
+-#include <linux/device.h>
+-#include <linux/io.h>
+-#include <linux/platform_device.h>
+-#include <linux/slab.h>
+-#include <linux/of.h>
+-#include <linux/pinctrl/pinctrl.h>
+-#include <linux/pinctrl/pinconf.h>
+-#include <linux/pinctrl/pinconf-generic.h>
+-#include <linux/pinctrl/pinmux.h>
+-#include <linux/pinctrl/consumer.h>
+-#include <linux/pinctrl/machine.h>
+-
+-#include <asm/mach-ralink/ralink_regs.h>
+-#include <asm/mach-ralink/mt7620.h>
+-
+-#include "pinmux.h"
+-#include "../core.h"
+-#include "../pinctrl-utils.h"
+-
+-#define SYSC_REG_GPIO_MODE 0x60
+-#define SYSC_REG_GPIO_MODE2 0x64
+-
+-struct rt2880_priv {
+- struct device *dev;
+-
+- struct pinctrl_pin_desc *pads;
+- struct pinctrl_desc *desc;
+-
+- struct rt2880_pmx_func **func;
+- int func_count;
+-
+- struct rt2880_pmx_group *groups;
+- const char **group_names;
+- int group_count;
+-
+- u8 *gpio;
+- int max_pins;
+-};
+-
+-static int rt2880_get_group_count(struct pinctrl_dev *pctrldev)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+-
+- return p->group_count;
+-}
+-
+-static const char *rt2880_get_group_name(struct pinctrl_dev *pctrldev,
+- unsigned int group)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+-
+- return (group >= p->group_count) ? NULL : p->group_names[group];
+-}
+-
+-static int rt2880_get_group_pins(struct pinctrl_dev *pctrldev,
+- unsigned int group,
+- const unsigned int **pins,
+- unsigned int *num_pins)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+-
+- if (group >= p->group_count)
+- return -EINVAL;
+-
+- *pins = p->groups[group].func[0].pins;
+- *num_pins = p->groups[group].func[0].pin_count;
+-
+- return 0;
+-}
+-
+-static const struct pinctrl_ops rt2880_pctrl_ops = {
+- .get_groups_count = rt2880_get_group_count,
+- .get_group_name = rt2880_get_group_name,
+- .get_group_pins = rt2880_get_group_pins,
+- .dt_node_to_map = pinconf_generic_dt_node_to_map_all,
+- .dt_free_map = pinconf_generic_dt_free_map,
+-};
+-
+-static int rt2880_pmx_func_count(struct pinctrl_dev *pctrldev)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+-
+- return p->func_count;
+-}
+-
+-static const char *rt2880_pmx_func_name(struct pinctrl_dev *pctrldev,
+- unsigned int func)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+-
+- return p->func[func]->name;
+-}
+-
+-static int rt2880_pmx_group_get_groups(struct pinctrl_dev *pctrldev,
+- unsigned int func,
+- const char * const **groups,
+- unsigned int * const num_groups)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+-
+- if (p->func[func]->group_count == 1)
+- *groups = &p->group_names[p->func[func]->groups[0]];
+- else
+- *groups = p->group_names;
+-
+- *num_groups = p->func[func]->group_count;
+-
+- return 0;
+-}
+-
+-static int rt2880_pmx_group_enable(struct pinctrl_dev *pctrldev,
+- unsigned int func, unsigned int group)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+- u32 mode = 0;
+- u32 reg = SYSC_REG_GPIO_MODE;
+- int i;
+- int shift;
+-
+- /* dont allow double use */
+- if (p->groups[group].enabled) {
+- dev_err(p->dev, "%s is already enabled\n",
+- p->groups[group].name);
+- return 0;
+- }
+-
+- p->groups[group].enabled = 1;
+- p->func[func]->enabled = 1;
+-
+- shift = p->groups[group].shift;
+- if (shift >= 32) {
+- shift -= 32;
+- reg = SYSC_REG_GPIO_MODE2;
+- }
+- mode = rt_sysc_r32(reg);
+- mode &= ~(p->groups[group].mask << shift);
+-
+- /* mark the pins as gpio */
+- for (i = 0; i < p->groups[group].func[0].pin_count; i++)
+- p->gpio[p->groups[group].func[0].pins[i]] = 1;
+-
+- /* function 0 is gpio and needs special handling */
+- if (func == 0) {
+- mode |= p->groups[group].gpio << shift;
+- } else {
+- for (i = 0; i < p->func[func]->pin_count; i++)
+- p->gpio[p->func[func]->pins[i]] = 0;
+- mode |= p->func[func]->value << shift;
+- }
+- rt_sysc_w32(mode, reg);
+-
+- return 0;
+-}
+-
+-static int rt2880_pmx_group_gpio_request_enable(struct pinctrl_dev *pctrldev,
+- struct pinctrl_gpio_range *range,
+- unsigned int pin)
+-{
+- struct rt2880_priv *p = pinctrl_dev_get_drvdata(pctrldev);
+-
+- if (!p->gpio[pin]) {
+- dev_err(p->dev, "pin %d is not set to gpio mux\n", pin);
+- return -EINVAL;
+- }
+-
+- return 0;
+-}
+-
+-static const struct pinmux_ops rt2880_pmx_group_ops = {
+- .get_functions_count = rt2880_pmx_func_count,
+- .get_function_name = rt2880_pmx_func_name,
+- .get_function_groups = rt2880_pmx_group_get_groups,
+- .set_mux = rt2880_pmx_group_enable,
+- .gpio_request_enable = rt2880_pmx_group_gpio_request_enable,
+-};
+-
+-static struct pinctrl_desc rt2880_pctrl_desc = {
+- .owner = THIS_MODULE,
+- .name = "rt2880-pinmux",
+- .pctlops = &rt2880_pctrl_ops,
+- .pmxops = &rt2880_pmx_group_ops,
+-};
+-
+-static struct rt2880_pmx_func gpio_func = {
+- .name = "gpio",
+-};
+-
+-static int rt2880_pinmux_index(struct rt2880_priv *p)
+-{
+- struct rt2880_pmx_group *mux = p->groups;
+- int i, j, c = 0;
+-
+- /* count the mux functions */
+- while (mux->name) {
+- p->group_count++;
+- mux++;
+- }
+-
+- /* allocate the group names array needed by the gpio function */
+- p->group_names = devm_kcalloc(p->dev, p->group_count,
+- sizeof(char *), GFP_KERNEL);
+- if (!p->group_names)
+- return -ENOMEM;
+-
+- for (i = 0; i < p->group_count; i++) {
+- p->group_names[i] = p->groups[i].name;
+- p->func_count += p->groups[i].func_count;
+- }
+-
+- /* we have a dummy function[0] for gpio */
+- p->func_count++;
+-
+- /* allocate our function and group mapping index buffers */
+- p->func = devm_kcalloc(p->dev, p->func_count,
+- sizeof(*p->func), GFP_KERNEL);
+- gpio_func.groups = devm_kcalloc(p->dev, p->group_count, sizeof(int),
+- GFP_KERNEL);
+- if (!p->func || !gpio_func.groups)
+- return -ENOMEM;
+-
+- /* add a backpointer to the function so it knows its group */
+- gpio_func.group_count = p->group_count;
+- for (i = 0; i < gpio_func.group_count; i++)
+- gpio_func.groups[i] = i;
+-
+- p->func[c] = &gpio_func;
+- c++;
+-
+- /* add remaining functions */
+- for (i = 0; i < p->group_count; i++) {
+- for (j = 0; j < p->groups[i].func_count; j++) {
+- p->func[c] = &p->groups[i].func[j];
+- p->func[c]->groups = devm_kzalloc(p->dev, sizeof(int),
+- GFP_KERNEL);
+- if (!p->func[c]->groups)
+- return -ENOMEM;
+- p->func[c]->groups[0] = i;
+- p->func[c]->group_count = 1;
+- c++;
+- }
+- }
+- return 0;
+-}
+-
+-static int rt2880_pinmux_pins(struct rt2880_priv *p)
+-{
+- int i, j;
+-
+- /*
+- * loop over the functions and initialize the pins array.
+- * also work out the highest pin used.
+- */
+- for (i = 0; i < p->func_count; i++) {
+- int pin;
+-
+- if (!p->func[i]->pin_count)
+- continue;
+-
+- p->func[i]->pins = devm_kcalloc(p->dev,
+- p->func[i]->pin_count,
+- sizeof(int),
+- GFP_KERNEL);
+- for (j = 0; j < p->func[i]->pin_count; j++)
+- p->func[i]->pins[j] = p->func[i]->pin_first + j;
+-
+- pin = p->func[i]->pin_first + p->func[i]->pin_count;
+- if (pin > p->max_pins)
+- p->max_pins = pin;
+- }
+-
+- /* the buffer that tells us which pins are gpio */
+- p->gpio = devm_kcalloc(p->dev, p->max_pins, sizeof(u8), GFP_KERNEL);
+- /* the pads needed to tell pinctrl about our pins */
+- p->pads = devm_kcalloc(p->dev, p->max_pins,
+- sizeof(struct pinctrl_pin_desc), GFP_KERNEL);
+- if (!p->pads || !p->gpio)
+- return -ENOMEM;
+-
+- memset(p->gpio, 1, sizeof(u8) * p->max_pins);
+- for (i = 0; i < p->func_count; i++) {
+- if (!p->func[i]->pin_count)
+- continue;
+-
+- for (j = 0; j < p->func[i]->pin_count; j++)
+- p->gpio[p->func[i]->pins[j]] = 0;
+- }
+-
+- /* pin 0 is always a gpio */
+- p->gpio[0] = 1;
+-
+- /* set the pads */
+- for (i = 0; i < p->max_pins; i++) {
+- /* strlen("ioXY") + 1 = 5 */
+- char *name = devm_kzalloc(p->dev, 5, GFP_KERNEL);
+-
+- if (!name)
+- return -ENOMEM;
+- snprintf(name, 5, "io%d", i);
+- p->pads[i].number = i;
+- p->pads[i].name = name;
+- }
+- p->desc->pins = p->pads;
+- p->desc->npins = p->max_pins;
+-
+- return 0;
+-}
+-
+-int rt2880_pinmux_init(struct platform_device *pdev,
+- struct rt2880_pmx_group *data)
+-{
+- struct rt2880_priv *p;
+- struct pinctrl_dev *dev;
+- int err;
+-
+- if (!data)
+- return -ENOTSUPP;
+-
+- /* setup the private data */
+- p = devm_kzalloc(&pdev->dev, sizeof(struct rt2880_priv), GFP_KERNEL);
+- if (!p)
+- return -ENOMEM;
+-
+- p->dev = &pdev->dev;
+- p->desc = &rt2880_pctrl_desc;
+- p->groups = data;
+- platform_set_drvdata(pdev, p);
+-
+- /* init the device */
+- err = rt2880_pinmux_index(p);
+- if (err) {
+- dev_err(&pdev->dev, "failed to load index\n");
+- return err;
+- }
+-
+- err = rt2880_pinmux_pins(p);
+- if (err) {
+- dev_err(&pdev->dev, "failed to load pins\n");
+- return err;
+- }
+- dev = pinctrl_register(p->desc, &pdev->dev, p);
+-
+- return PTR_ERR_OR_ZERO(dev);
+-}
+diff --git a/drivers/pinctrl/ralink/pinctrl-rt288x.c b/drivers/pinctrl/ralink/pinctrl-rt288x.c
+index 0744aebbace52..40c45140ff8a3 100644
+--- a/drivers/pinctrl/ralink/pinctrl-rt288x.c
++++ b/drivers/pinctrl/ralink/pinctrl-rt288x.c
+@@ -4,7 +4,7 @@
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/of.h>
+-#include "pinmux.h"
++#include "pinctrl-ralink.h"
+
+ #define RT2880_GPIO_MODE_I2C BIT(0)
+ #define RT2880_GPIO_MODE_UART0 BIT(1)
+@@ -15,15 +15,15 @@
+ #define RT2880_GPIO_MODE_SDRAM BIT(6)
+ #define RT2880_GPIO_MODE_PCI BIT(7)
+
+-static struct rt2880_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) };
+-static struct rt2880_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) };
+-static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 7, 8) };
+-static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
+-static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
+-static struct rt2880_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) };
+-static struct rt2880_pmx_func pci_func[] = { FUNC("pci", 0, 40, 32) };
++static struct ralink_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) };
++static struct ralink_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) };
++static struct ralink_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 7, 8) };
++static struct ralink_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
++static struct ralink_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
++static struct ralink_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) };
++static struct ralink_pmx_func pci_func[] = { FUNC("pci", 0, 40, 32) };
+
+-static struct rt2880_pmx_group rt2880_pinmux_data_act[] = {
++static struct ralink_pmx_group rt2880_pinmux_data_act[] = {
+ GRP("i2c", i2c_func, 1, RT2880_GPIO_MODE_I2C),
+ GRP("spi", spi_func, 1, RT2880_GPIO_MODE_SPI),
+ GRP("uartlite", uartlite_func, 1, RT2880_GPIO_MODE_UART0),
+@@ -36,7 +36,7 @@ static struct rt2880_pmx_group rt2880_pinmux_data_act[] = {
+
+ static int rt288x_pinmux_probe(struct platform_device *pdev)
+ {
+- return rt2880_pinmux_init(pdev, rt2880_pinmux_data_act);
++ return ralink_pinmux_init(pdev, rt2880_pinmux_data_act);
+ }
+
+ static const struct of_device_id rt288x_pinmux_match[] = {
+diff --git a/drivers/pinctrl/ralink/pinctrl-rt305x.c b/drivers/pinctrl/ralink/pinctrl-rt305x.c
+index 5d8fa156c0037..25527ca1ccaae 100644
+--- a/drivers/pinctrl/ralink/pinctrl-rt305x.c
++++ b/drivers/pinctrl/ralink/pinctrl-rt305x.c
+@@ -5,7 +5,7 @@
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/of.h>
+-#include "pinmux.h"
++#include "pinctrl-ralink.h"
+
+ #define RT305X_GPIO_MODE_UART0_SHIFT 2
+ #define RT305X_GPIO_MODE_UART0_MASK 0x7
+@@ -31,9 +31,9 @@
+ #define RT3352_GPIO_MODE_LNA 18
+ #define RT3352_GPIO_MODE_PA 20
+
+-static struct rt2880_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) };
+-static struct rt2880_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) };
+-static struct rt2880_pmx_func uartf_func[] = {
++static struct ralink_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) };
++static struct ralink_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) };
++static struct ralink_pmx_func uartf_func[] = {
+ FUNC("uartf", RT305X_GPIO_MODE_UARTF, 7, 8),
+ FUNC("pcm uartf", RT305X_GPIO_MODE_PCM_UARTF, 7, 8),
+ FUNC("pcm i2s", RT305X_GPIO_MODE_PCM_I2S, 7, 8),
+@@ -42,28 +42,28 @@ static struct rt2880_pmx_func uartf_func[] = {
+ FUNC("gpio uartf", RT305X_GPIO_MODE_GPIO_UARTF, 7, 4),
+ FUNC("gpio i2s", RT305X_GPIO_MODE_GPIO_I2S, 7, 4),
+ };
+-static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) };
+-static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
+-static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
+-static struct rt2880_pmx_func rt5350_led_func[] = { FUNC("led", 0, 22, 5) };
+-static struct rt2880_pmx_func rt5350_cs1_func[] = {
++static struct ralink_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) };
++static struct ralink_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
++static struct ralink_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
++static struct ralink_pmx_func rt5350_led_func[] = { FUNC("led", 0, 22, 5) };
++static struct ralink_pmx_func rt5350_cs1_func[] = {
+ FUNC("spi_cs1", 0, 27, 1),
+ FUNC("wdg_cs1", 1, 27, 1),
+ };
+-static struct rt2880_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) };
+-static struct rt2880_pmx_func rt3352_rgmii_func[] = {
++static struct ralink_pmx_func sdram_func[] = { FUNC("sdram", 0, 24, 16) };
++static struct ralink_pmx_func rt3352_rgmii_func[] = {
+ FUNC("rgmii", 0, 24, 12)
+ };
+-static struct rt2880_pmx_func rgmii_func[] = { FUNC("rgmii", 0, 40, 12) };
+-static struct rt2880_pmx_func rt3352_lna_func[] = { FUNC("lna", 0, 36, 2) };
+-static struct rt2880_pmx_func rt3352_pa_func[] = { FUNC("pa", 0, 38, 2) };
+-static struct rt2880_pmx_func rt3352_led_func[] = { FUNC("led", 0, 40, 5) };
+-static struct rt2880_pmx_func rt3352_cs1_func[] = {
++static struct ralink_pmx_func rgmii_func[] = { FUNC("rgmii", 0, 40, 12) };
++static struct ralink_pmx_func rt3352_lna_func[] = { FUNC("lna", 0, 36, 2) };
++static struct ralink_pmx_func rt3352_pa_func[] = { FUNC("pa", 0, 38, 2) };
++static struct ralink_pmx_func rt3352_led_func[] = { FUNC("led", 0, 40, 5) };
++static struct ralink_pmx_func rt3352_cs1_func[] = {
+ FUNC("spi_cs1", 0, 45, 1),
+ FUNC("wdg_cs1", 1, 45, 1),
+ };
+
+-static struct rt2880_pmx_group rt3050_pinmux_data[] = {
++static struct ralink_pmx_group rt3050_pinmux_data[] = {
+ GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C),
+ GRP("spi", spi_func, 1, RT305X_GPIO_MODE_SPI),
+ GRP("uartf", uartf_func, RT305X_GPIO_MODE_UART0_MASK,
+@@ -76,7 +76,7 @@ static struct rt2880_pmx_group rt3050_pinmux_data[] = {
+ { 0 }
+ };
+
+-static struct rt2880_pmx_group rt3352_pinmux_data[] = {
++static struct ralink_pmx_group rt3352_pinmux_data[] = {
+ GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C),
+ GRP("spi", spi_func, 1, RT305X_GPIO_MODE_SPI),
+ GRP("uartf", uartf_func, RT305X_GPIO_MODE_UART0_MASK,
+@@ -92,7 +92,7 @@ static struct rt2880_pmx_group rt3352_pinmux_data[] = {
+ { 0 }
+ };
+
+-static struct rt2880_pmx_group rt5350_pinmux_data[] = {
++static struct ralink_pmx_group rt5350_pinmux_data[] = {
+ GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C),
+ GRP("spi", spi_func, 1, RT305X_GPIO_MODE_SPI),
+ GRP("uartf", uartf_func, RT305X_GPIO_MODE_UART0_MASK,
+@@ -107,11 +107,11 @@ static struct rt2880_pmx_group rt5350_pinmux_data[] = {
+ static int rt305x_pinmux_probe(struct platform_device *pdev)
+ {
+ if (soc_is_rt5350())
+- return rt2880_pinmux_init(pdev, rt5350_pinmux_data);
++ return ralink_pinmux_init(pdev, rt5350_pinmux_data);
+ else if (soc_is_rt305x() || soc_is_rt3350())
+- return rt2880_pinmux_init(pdev, rt3050_pinmux_data);
++ return ralink_pinmux_init(pdev, rt3050_pinmux_data);
+ else if (soc_is_rt3352())
+- return rt2880_pinmux_init(pdev, rt3352_pinmux_data);
++ return ralink_pinmux_init(pdev, rt3352_pinmux_data);
+ else
+ return -EINVAL;
+ }
+diff --git a/drivers/pinctrl/ralink/pinctrl-rt3883.c b/drivers/pinctrl/ralink/pinctrl-rt3883.c
+index 3e0e1b4caa647..0b8674dbe1880 100644
+--- a/drivers/pinctrl/ralink/pinctrl-rt3883.c
++++ b/drivers/pinctrl/ralink/pinctrl-rt3883.c
+@@ -3,7 +3,7 @@
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+ #include <linux/of.h>
+-#include "pinmux.h"
++#include "pinctrl-ralink.h"
+
+ #define RT3883_GPIO_MODE_UART0_SHIFT 2
+ #define RT3883_GPIO_MODE_UART0_MASK 0x7
+@@ -39,9 +39,9 @@
+ #define RT3883_GPIO_MODE_LNA_G_GPIO 0x3
+ #define RT3883_GPIO_MODE_LNA_G _RT3883_GPIO_MODE_LNA_G(RT3883_GPIO_MODE_LNA_G_MASK)
+
+-static struct rt2880_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) };
+-static struct rt2880_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) };
+-static struct rt2880_pmx_func uartf_func[] = {
++static struct ralink_pmx_func i2c_func[] = { FUNC("i2c", 0, 1, 2) };
++static struct ralink_pmx_func spi_func[] = { FUNC("spi", 0, 3, 4) };
++static struct ralink_pmx_func uartf_func[] = {
+ FUNC("uartf", RT3883_GPIO_MODE_UARTF, 7, 8),
+ FUNC("pcm uartf", RT3883_GPIO_MODE_PCM_UARTF, 7, 8),
+ FUNC("pcm i2s", RT3883_GPIO_MODE_PCM_I2S, 7, 8),
+@@ -50,21 +50,21 @@ static struct rt2880_pmx_func uartf_func[] = {
+ FUNC("gpio uartf", RT3883_GPIO_MODE_GPIO_UARTF, 7, 4),
+ FUNC("gpio i2s", RT3883_GPIO_MODE_GPIO_I2S, 7, 4),
+ };
+-static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) };
+-static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
+-static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
+-static struct rt2880_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) };
+-static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) };
+-static struct rt2880_pmx_func pci_func[] = {
++static struct ralink_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) };
++static struct ralink_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
++static struct ralink_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
++static struct ralink_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) };
++static struct ralink_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) };
++static struct ralink_pmx_func pci_func[] = {
+ FUNC("pci-dev", 0, 40, 32),
+ FUNC("pci-host2", 1, 40, 32),
+ FUNC("pci-host1", 2, 40, 32),
+ FUNC("pci-fnc", 3, 40, 32)
+ };
+-static struct rt2880_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) };
+-static struct rt2880_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) };
++static struct ralink_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) };
++static struct ralink_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) };
+
+-static struct rt2880_pmx_group rt3883_pinmux_data[] = {
++static struct ralink_pmx_group rt3883_pinmux_data[] = {
+ GRP("i2c", i2c_func, 1, RT3883_GPIO_MODE_I2C),
+ GRP("spi", spi_func, 1, RT3883_GPIO_MODE_SPI),
+ GRP("uartf", uartf_func, RT3883_GPIO_MODE_UART0_MASK,
+@@ -83,7 +83,7 @@ static struct rt2880_pmx_group rt3883_pinmux_data[] = {
+
+ static int rt3883_pinmux_probe(struct platform_device *pdev)
+ {
+- return rt2880_pinmux_init(pdev, rt3883_pinmux_data);
++ return ralink_pinmux_init(pdev, rt3883_pinmux_data);
+ }
+
+ static const struct of_device_id rt3883_pinmux_match[] = {
+diff --git a/drivers/pinctrl/ralink/pinmux.h b/drivers/pinctrl/ralink/pinmux.h
+deleted file mode 100644
+index 0046abe3bcc79..0000000000000
+--- a/drivers/pinctrl/ralink/pinmux.h
++++ /dev/null
+@@ -1,53 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-only */
+-/*
+- * Copyright (C) 2012 John Crispin <john@phrozen.org>
+- */
+-
+-#ifndef _RT288X_PINMUX_H__
+-#define _RT288X_PINMUX_H__
+-
+-#define FUNC(name, value, pin_first, pin_count) \
+- { name, value, pin_first, pin_count }
+-
+-#define GRP(_name, _func, _mask, _shift) \
+- { .name = _name, .mask = _mask, .shift = _shift, \
+- .func = _func, .gpio = _mask, \
+- .func_count = ARRAY_SIZE(_func) }
+-
+-#define GRP_G(_name, _func, _mask, _gpio, _shift) \
+- { .name = _name, .mask = _mask, .shift = _shift, \
+- .func = _func, .gpio = _gpio, \
+- .func_count = ARRAY_SIZE(_func) }
+-
+-struct rt2880_pmx_group;
+-
+-struct rt2880_pmx_func {
+- const char *name;
+- const char value;
+-
+- int pin_first;
+- int pin_count;
+- int *pins;
+-
+- int *groups;
+- int group_count;
+-
+- int enabled;
+-};
+-
+-struct rt2880_pmx_group {
+- const char *name;
+- int enabled;
+-
+- const u32 shift;
+- const char mask;
+- const char gpio;
+-
+- struct rt2880_pmx_func *func;
+- int func_count;
+-};
+-
+-int rt2880_pinmux_init(struct platform_device *pdev,
+- struct rt2880_pmx_group *data);
+-
+-#endif
+diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c
+index f2ab02225837e..75fc420b6bdf1 100644
+--- a/drivers/pinctrl/renesas/core.c
++++ b/drivers/pinctrl/renesas/core.c
+@@ -71,12 +71,11 @@ static int sh_pfc_map_resources(struct sh_pfc *pfc,
+
+ /* Fill them. */
+ for (i = 0; i < num_windows; i++) {
+- res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+- windows->phys = res->start;
+- windows->size = resource_size(res);
+- windows->virt = devm_ioremap_resource(pfc->dev, res);
++ windows->virt = devm_platform_get_and_ioremap_resource(pdev, i, &res);
+ if (IS_ERR(windows->virt))
+ return -ENOMEM;
++ windows->phys = res->start;
++ windows->size = resource_size(res);
+ windows++;
+ }
+ for (i = 0; i < num_irqs; i++)
+@@ -739,7 +738,7 @@ static int sh_pfc_suspend_init(struct sh_pfc *pfc) { return 0; }
+
+ #ifdef DEBUG
+ #define SH_PFC_MAX_REGS 300
+-#define SH_PFC_MAX_ENUMS 3000
++#define SH_PFC_MAX_ENUMS 5000
+
+ static unsigned int sh_pfc_errors __initdata = 0;
+ static unsigned int sh_pfc_warnings __initdata = 0;
+@@ -851,7 +850,8 @@ static void __init sh_pfc_check_cfg_reg(const char *drvname,
+ sh_pfc_check_reg(drvname, cfg_reg->reg);
+
+ if (cfg_reg->field_width) {
+- n = cfg_reg->reg_width / cfg_reg->field_width;
++ fw = cfg_reg->field_width;
++ n = (cfg_reg->reg_width / fw) << fw;
+ /* Skip field checks (done at build time) */
+ goto check_enum_ids;
+ }
+@@ -890,7 +890,7 @@ static void __init sh_pfc_check_drive_reg(const struct sh_pfc_soc_info *info,
+ if (!field->pin && !field->offset && !field->size)
+ continue;
+
+- mask = GENMASK(field->offset + field->size, field->offset);
++ mask = GENMASK(field->offset + field->size - 1, field->offset);
+ if (mask & seen)
+ sh_pfc_err("drive_reg 0x%x: field %u overlap\n",
+ drive->reg, i);
+diff --git a/drivers/pinctrl/renesas/pfc-r8a77470.c b/drivers/pinctrl/renesas/pfc-r8a77470.c
+index e6e5487691c16..cf7153d06a953 100644
+--- a/drivers/pinctrl/renesas/pfc-r8a77470.c
++++ b/drivers/pinctrl/renesas/pfc-r8a77470.c
+@@ -2140,7 +2140,7 @@ static const unsigned int vin0_clk_mux[] = {
+ VI0_CLK_MARK,
+ };
+ /* - VIN1 ------------------------------------------------------------------- */
+-static const union vin_data vin1_data_pins = {
++static const union vin_data12 vin1_data_pins = {
+ .data12 = {
+ RCAR_GP_PIN(3, 1), RCAR_GP_PIN(3, 2),
+ RCAR_GP_PIN(3, 3), RCAR_GP_PIN(3, 4),
+@@ -2150,7 +2150,7 @@ static const union vin_data vin1_data_pins = {
+ RCAR_GP_PIN(3, 15), RCAR_GP_PIN(3, 16),
+ },
+ };
+-static const union vin_data vin1_data_mux = {
++static const union vin_data12 vin1_data_mux = {
+ .data12 = {
+ VI1_DATA0_MARK, VI1_DATA1_MARK,
+ VI1_DATA2_MARK, VI1_DATA3_MARK,
+diff --git a/drivers/pinctrl/renesas/pfc-r8a779a0.c b/drivers/pinctrl/renesas/pfc-r8a779a0.c
+index ad6532443a785..aa4fd56e0250d 100644
+--- a/drivers/pinctrl/renesas/pfc-r8a779a0.c
++++ b/drivers/pinctrl/renesas/pfc-r8a779a0.c
+@@ -629,7 +629,36 @@ enum {
+ };
+
+ static const u16 pinmux_data[] = {
++/* Using GP_2_[2-15] requires disabling I2C in MOD_SEL2 */
++#define GP_2_2_FN GP_2_2_FN, FN_SEL_I2C0_0
++#define GP_2_3_FN GP_2_3_FN, FN_SEL_I2C0_0
++#define GP_2_4_FN GP_2_4_FN, FN_SEL_I2C1_0
++#define GP_2_5_FN GP_2_5_FN, FN_SEL_I2C1_0
++#define GP_2_6_FN GP_2_6_FN, FN_SEL_I2C2_0
++#define GP_2_7_FN GP_2_7_FN, FN_SEL_I2C2_0
++#define GP_2_8_FN GP_2_8_FN, FN_SEL_I2C3_0
++#define GP_2_9_FN GP_2_9_FN, FN_SEL_I2C3_0
++#define GP_2_10_FN GP_2_10_FN, FN_SEL_I2C4_0
++#define GP_2_11_FN GP_2_11_FN, FN_SEL_I2C4_0
++#define GP_2_12_FN GP_2_12_FN, FN_SEL_I2C5_0
++#define GP_2_13_FN GP_2_13_FN, FN_SEL_I2C5_0
++#define GP_2_14_FN GP_2_14_FN, FN_SEL_I2C6_0
++#define GP_2_15_FN GP_2_15_FN, FN_SEL_I2C6_0
+ PINMUX_DATA_GP_ALL(),
++#undef GP_2_2_FN
++#undef GP_2_3_FN
++#undef GP_2_4_FN
++#undef GP_2_5_FN
++#undef GP_2_6_FN
++#undef GP_2_7_FN
++#undef GP_2_8_FN
++#undef GP_2_9_FN
++#undef GP_2_10_FN
++#undef GP_2_11_FN
++#undef GP_2_12_FN
++#undef GP_2_13_FN
++#undef GP_2_14_FN
++#undef GP_2_15_FN
+
+ PINMUX_SINGLE(MMC_D7),
+ PINMUX_SINGLE(MMC_D6),
+@@ -681,16 +710,8 @@ static const u16 pinmux_data[] = {
+ PINMUX_SINGLE(PCIE0_CLKREQ_N),
+
+ PINMUX_SINGLE(AVB0_PHY_INT),
+- PINMUX_SINGLE(AVB0_MAGIC),
+- PINMUX_SINGLE(AVB0_MDC),
+- PINMUX_SINGLE(AVB0_MDIO),
+- PINMUX_SINGLE(AVB0_TXCREFCLK),
+
+ PINMUX_SINGLE(AVB1_PHY_INT),
+- PINMUX_SINGLE(AVB1_MAGIC),
+- PINMUX_SINGLE(AVB1_MDC),
+- PINMUX_SINGLE(AVB1_MDIO),
+- PINMUX_SINGLE(AVB1_TXCREFCLK),
+
+ PINMUX_SINGLE(AVB2_AVTP_PPS),
+ PINMUX_SINGLE(AVB2_AVTP_CAPTURE),
+diff --git a/drivers/pinctrl/renesas/pinctrl-rza2.c b/drivers/pinctrl/renesas/pinctrl-rza2.c
+index 32829eb9656c9..ddd8ee6b604ef 100644
+--- a/drivers/pinctrl/renesas/pinctrl-rza2.c
++++ b/drivers/pinctrl/renesas/pinctrl-rza2.c
+@@ -14,6 +14,7 @@
+ #include <linux/gpio/driver.h>
+ #include <linux/io.h>
+ #include <linux/module.h>
++#include <linux/mutex.h>
+ #include <linux/of_device.h>
+ #include <linux/pinctrl/pinmux.h>
+
+@@ -46,6 +47,7 @@ struct rza2_pinctrl_priv {
+ struct pinctrl_dev *pctl;
+ struct pinctrl_gpio_range gpio_range;
+ int npins;
++ struct mutex mutex; /* serialize adding groups and functions */
+ };
+
+ #define RZA2_PDR(port) (0x0000 + (port) * 2) /* Direction 16-bit */
+@@ -359,10 +361,14 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
+ psel_val[i] = MUX_FUNC(value);
+ }
+
++ mutex_lock(&priv->mutex);
++
+ /* Register a single pin group listing all the pins we read from DT */
+ gsel = pinctrl_generic_add_group(pctldev, np->name, pins, npins, NULL);
+- if (gsel < 0)
+- return gsel;
++ if (gsel < 0) {
++ ret = gsel;
++ goto unlock;
++ }
+
+ /*
+ * Register a single group function where the 'data' is an array PSEL
+@@ -391,6 +397,8 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
+ (*map)->data.mux.function = np->name;
+ *num_maps = 1;
+
++ mutex_unlock(&priv->mutex);
++
+ return 0;
+
+ remove_function:
+@@ -399,6 +407,9 @@ remove_function:
+ remove_group:
+ pinctrl_generic_remove_group(pctldev, gsel);
+
++unlock:
++ mutex_unlock(&priv->mutex);
++
+ dev_err(priv->dev, "Unable to parse DT node %s\n", np->name);
+
+ return ret;
+@@ -474,6 +485,8 @@ static int rza2_pinctrl_probe(struct platform_device *pdev)
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
++ mutex_init(&priv->mutex);
++
+ platform_set_drvdata(pdev, priv);
+
+ priv->npins = (int)(uintptr_t)of_device_get_match_data(&pdev->dev) *
+diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+index dbf2f521bb272..20b2af889ca96 100644
+--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
++++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+@@ -852,7 +852,7 @@ static const u32 rzg2l_gpio_configs[] = {
+ RZG2L_GPIO_PORT_PACK(2, 0x1e, RZG2L_MPXED_PIN_FUNCS),
+ RZG2L_GPIO_PORT_PACK(2, 0x1f, RZG2L_MPXED_PIN_FUNCS),
+ RZG2L_GPIO_PORT_PACK(2, 0x20, RZG2L_MPXED_PIN_FUNCS),
+- RZG2L_GPIO_PORT_PACK(3, 0x22, RZG2L_MPXED_PIN_FUNCS),
++ RZG2L_GPIO_PORT_PACK(3, 0x21, RZG2L_MPXED_PIN_FUNCS),
+ RZG2L_GPIO_PORT_PACK(2, 0x22, RZG2L_MPXED_PIN_FUNCS),
+ RZG2L_GPIO_PORT_PACK(2, 0x23, RZG2L_MPXED_PIN_FUNCS),
+ RZG2L_GPIO_PORT_PACK(3, 0x24, RZG2L_MPXED_ETH_PIN_FUNCS(PIN_CFG_IOLH_ETH0)),
+diff --git a/drivers/pinctrl/renesas/pinctrl-rzn1.c b/drivers/pinctrl/renesas/pinctrl-rzn1.c
+index ef5fb25b6016d..849d091205d4d 100644
+--- a/drivers/pinctrl/renesas/pinctrl-rzn1.c
++++ b/drivers/pinctrl/renesas/pinctrl-rzn1.c
+@@ -865,17 +865,15 @@ static int rzn1_pinctrl_probe(struct platform_device *pdev)
+ ipctl->mdio_func[0] = -1;
+ ipctl->mdio_func[1] = -1;
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- ipctl->lev1_protect_phys = (u32)res->start + 0x400;
+- ipctl->lev1 = devm_ioremap_resource(&pdev->dev, res);
++ ipctl->lev1 = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(ipctl->lev1))
+ return PTR_ERR(ipctl->lev1);
++ ipctl->lev1_protect_phys = (u32)res->start + 0x400;
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+- ipctl->lev2_protect_phys = (u32)res->start + 0x400;
+- ipctl->lev2 = devm_ioremap_resource(&pdev->dev, res);
++ ipctl->lev2 = devm_platform_get_and_ioremap_resource(pdev, 1, &res);
+ if (IS_ERR(ipctl->lev2))
+ return PTR_ERR(ipctl->lev2);
++ ipctl->lev2_protect_phys = (u32)res->start + 0x400;
+
+ ipctl->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(ipctl->clk))
+diff --git a/drivers/pinctrl/samsung/Kconfig b/drivers/pinctrl/samsung/Kconfig
+index dfd805e768624..7b0576f71376e 100644
+--- a/drivers/pinctrl/samsung/Kconfig
++++ b/drivers/pinctrl/samsung/Kconfig
+@@ -4,14 +4,13 @@
+ #
+ config PINCTRL_SAMSUNG
+ bool
+- depends on OF_GPIO
++ select GPIOLIB
+ select PINMUX
+ select PINCONF
+
+ config PINCTRL_EXYNOS
+ bool "Pinctrl common driver part for Samsung Exynos SoCs"
+- depends on OF_GPIO
+- depends on ARCH_EXYNOS || ARCH_S5PV210 || COMPILE_TEST
++ depends on ARCH_EXYNOS || ARCH_S5PV210 || (COMPILE_TEST && OF)
+ select PINCTRL_SAMSUNG
+ select PINCTRL_EXYNOS_ARM if ARM && (ARCH_EXYNOS || ARCH_S5PV210)
+ select PINCTRL_EXYNOS_ARM64 if ARM64 && ARCH_EXYNOS
+@@ -26,12 +25,10 @@ config PINCTRL_EXYNOS_ARM64
+
+ config PINCTRL_S3C24XX
+ bool "Samsung S3C24XX SoC pinctrl driver"
+- depends on OF_GPIO
+- depends on ARCH_S3C24XX || COMPILE_TEST
++ depends on ARCH_S3C24XX || (COMPILE_TEST && OF)
+ select PINCTRL_SAMSUNG
+
+ config PINCTRL_S3C64XX
+ bool "Samsung S3C64XX SoC pinctrl driver"
+- depends on OF_GPIO
+- depends on ARCH_S3C64XX || COMPILE_TEST
++ depends on ARCH_S3C64XX || (COMPILE_TEST && OF)
+ select PINCTRL_SAMSUNG
+diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
+index fe5f6046fbd52..cc66f852ef7bc 100644
+--- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
++++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
+@@ -504,13 +504,11 @@ static const struct samsung_pin_ctrl exynos850_pin_ctrl[] __initconst = {
+ /* pin-controller instance 0 ALIVE data */
+ .pin_banks = exynos850_pin_banks0,
+ .nr_banks = ARRAY_SIZE(exynos850_pin_banks0),
+- .eint_gpio_init = exynos_eint_gpio_init,
+ .eint_wkup_init = exynos_eint_wkup_init,
+ }, {
+ /* pin-controller instance 1 CMGP data */
+ .pin_banks = exynos850_pin_banks1,
+ .nr_banks = ARRAY_SIZE(exynos850_pin_banks1),
+- .eint_gpio_init = exynos_eint_gpio_init,
+ .eint_wkup_init = exynos_eint_wkup_init,
+ }, {
+ /* pin-controller instance 2 AUD data */
+diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
+index 2a0fc63516f12..463b9e578237e 100644
+--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
++++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
+@@ -1002,6 +1002,16 @@ samsung_pinctrl_get_soc_data_for_of_alias(struct platform_device *pdev)
+ return &(of_data->ctrl[id]);
+ }
+
++static void samsung_banks_of_node_put(struct samsung_pinctrl_drv_data *d)
++{
++ struct samsung_pin_bank *bank;
++ unsigned int i;
++
++ bank = d->pin_banks;
++ for (i = 0; i < d->nr_banks; ++i, ++bank)
++ of_node_put(bank->of_node);
++}
++
+ /* retrieve the soc specific data */
+ static const struct samsung_pin_ctrl *
+ samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d,
+@@ -1116,19 +1126,19 @@ static int samsung_pinctrl_probe(struct platform_device *pdev)
+ if (ctrl->retention_data) {
+ drvdata->retention_ctrl = ctrl->retention_data->init(drvdata,
+ ctrl->retention_data);
+- if (IS_ERR(drvdata->retention_ctrl))
+- return PTR_ERR(drvdata->retention_ctrl);
++ if (IS_ERR(drvdata->retention_ctrl)) {
++ ret = PTR_ERR(drvdata->retention_ctrl);
++ goto err_put_banks;
++ }
+ }
+
+ ret = samsung_pinctrl_register(pdev, drvdata);
+ if (ret)
+- return ret;
++ goto err_put_banks;
+
+ ret = samsung_gpiolib_register(pdev, drvdata);
+- if (ret) {
+- samsung_pinctrl_unregister(pdev, drvdata);
+- return ret;
+- }
++ if (ret)
++ goto err_unregister;
+
+ if (ctrl->eint_gpio_init)
+ ctrl->eint_gpio_init(drvdata);
+@@ -1138,6 +1148,12 @@ static int samsung_pinctrl_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, drvdata);
+
+ return 0;
++
++err_unregister:
++ samsung_pinctrl_unregister(pdev, drvdata);
++err_put_banks:
++ samsung_banks_of_node_put(drvdata);
++ return ret;
+ }
+
+ /*
+diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
+index dfd8888a222a4..abb12a5c3c329 100644
+--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
++++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
+@@ -225,6 +225,13 @@ static void stm32_gpio_free(struct gpio_chip *chip, unsigned offset)
+ pinctrl_gpio_free(chip->base + offset);
+ }
+
++static int stm32_gpio_get_noclk(struct gpio_chip *chip, unsigned int offset)
++{
++ struct stm32_gpio_bank *bank = gpiochip_get_data(chip);
++
++ return !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset));
++}
++
+ static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset)
+ {
+ struct stm32_gpio_bank *bank = gpiochip_get_data(chip);
+@@ -232,7 +239,7 @@ static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset)
+
+ clk_enable(bank->clk);
+
+- ret = !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset));
++ ret = stm32_gpio_get_noclk(chip, offset);
+
+ clk_disable(bank->clk);
+
+@@ -311,8 +318,12 @@ static void stm32_gpio_irq_trigger(struct irq_data *d)
+ struct stm32_gpio_bank *bank = d->domain->host_data;
+ int level;
+
++ /* Do not access the GPIO if this is not LEVEL triggered IRQ. */
++ if (!(bank->irq_type[d->hwirq] & IRQ_TYPE_LEVEL_MASK))
++ return;
++
+ /* If level interrupt type then retrig */
+- level = stm32_gpio_get(&bank->gpio_chip, d->hwirq);
++ level = stm32_gpio_get_noclk(&bank->gpio_chip, d->hwirq);
+ if ((level == 0 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_LOW) ||
+ (level == 1 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_HIGH))
+ irq_chip_retrigger_hierarchy(d);
+@@ -354,6 +365,7 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data)
+ {
+ struct stm32_gpio_bank *bank = irq_data->domain->host_data;
+ struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
++ unsigned long flags;
+ int ret;
+
+ ret = stm32_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq);
+@@ -367,6 +379,10 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data)
+ return ret;
+ }
+
++ flags = irqd_get_trigger_type(irq_data);
++ if (flags & IRQ_TYPE_LEVEL_MASK)
++ clk_enable(bank->clk);
++
+ return 0;
+ }
+
+@@ -374,6 +390,9 @@ static void stm32_gpio_irq_release_resources(struct irq_data *irq_data)
+ {
+ struct stm32_gpio_bank *bank = irq_data->domain->host_data;
+
++ if (bank->irq_type[irq_data->hwirq] & IRQ_TYPE_LEVEL_MASK)
++ clk_disable(bank->clk);
++
+ gpiochip_unlock_as_irq(&bank->gpio_chip, irq_data->hwirq);
+ }
+
+@@ -1251,10 +1270,10 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
+ bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK;
+ bank->gpio_chip.base = args.args[1];
+
+- npins = args.args[2];
+- while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
+- ++i, &args))
+- npins += args.args[2];
++ /* get the last defined gpio line (offset + nb of pins) */
++ npins = args.args[0] + args.args[2];
++ while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++i, &args))
++ npins = max(npins, (int)(args.args[0] + args.args[2]));
+ } else {
+ bank_nr = pctl->nbanks;
+ bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK;
+@@ -1280,15 +1299,17 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
+ bank->bank_ioport_nr = bank_ioport_nr;
+ spin_lock_init(&bank->lock);
+
+- /* create irq hierarchical domain */
+- bank->fwnode = of_node_to_fwnode(np);
++ if (pctl->domain) {
++ /* create irq hierarchical domain */
++ bank->fwnode = of_node_to_fwnode(np);
+
+- bank->domain = irq_domain_create_hierarchy(pctl->domain, 0,
+- STM32_GPIO_IRQ_LINE, bank->fwnode,
+- &stm32_gpio_domain_ops, bank);
++ bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, STM32_GPIO_IRQ_LINE,
++ bank->fwnode, &stm32_gpio_domain_ops,
++ bank);
+
+- if (!bank->domain)
+- return -ENODEV;
++ if (!bank->domain)
++ return -ENODEV;
++ }
+
+ err = gpiochip_add_data(&bank->gpio_chip, bank);
+ if (err) {
+@@ -1313,6 +1334,7 @@ static struct irq_domain *stm32_pctrl_get_irq_domain(struct device_node *np)
+ return ERR_PTR(-ENXIO);
+
+ domain = irq_find_host(parent);
++ of_node_put(parent);
+ if (!domain)
+ /* domain not registered yet */
+ return ERR_PTR(-EPROBE_DEFER);
+@@ -1458,6 +1480,8 @@ int stm32_pctl_probe(struct platform_device *pdev)
+ pctl->domain = stm32_pctrl_get_irq_domain(np);
+ if (IS_ERR(pctl->domain))
+ return PTR_ERR(pctl->domain);
++ if (!pctl->domain)
++ dev_warn(dev, "pinctrl without interrupt support\n");
+
+ /* hwspinlock is optional */
+ hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0);
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c
+index 21054fcacd345..18088f6f44b23 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c
+@@ -98,7 +98,7 @@ MODULE_DEVICE_TABLE(of, a100_r_pinctrl_match);
+ static struct platform_driver a100_r_pinctrl_driver = {
+ .probe = a100_r_pinctrl_probe,
+ .driver = {
+- .name = "sun50iw10p1-r-pinctrl",
++ .name = "sun50i-a100-r-pinctrl",
+ .of_match_table = a100_r_pinctrl_match,
+ },
+ };
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h6-r.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h6-r.c
+index c7d90c44e87aa..7b4b9f3d45558 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h6-r.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h6-r.c
+@@ -107,6 +107,7 @@ static const struct sunxi_pinctrl_desc sun50i_h6_r_pinctrl_data = {
+ .npins = ARRAY_SIZE(sun50i_h6_r_pins),
+ .pin_base = PL_BASE,
+ .irq_banks = 2,
++ .io_bias_cfg_variant = BIAS_VOLTAGE_PIO_POW_MODE_SEL,
+ };
+
+ static int sun50i_h6_r_pinctrl_probe(struct platform_device *pdev)
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c
+index ce1917e230f41..152b71226a807 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c
+@@ -363,16 +363,16 @@ static const struct sunxi_desc_pin h616_pins[] = {
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart2"), /* CTS */
+- SUNXI_FUNCTION(0x3, "i2s3"), /* DO0 */
++ SUNXI_FUNCTION(0x3, "i2s3_dout0"), /* DO0 */
+ SUNXI_FUNCTION(0x4, "spi1"), /* MISO */
+- SUNXI_FUNCTION(0x5, "i2s3"), /* DI1 */
++ SUNXI_FUNCTION(0x5, "i2s3_din1"), /* DI1 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 8)), /* PH_EINT8 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x3, "i2s3"), /* DI0 */
++ SUNXI_FUNCTION(0x3, "i2s3_din0"), /* DI0 */
+ SUNXI_FUNCTION(0x4, "spi1"), /* CS1 */
+- SUNXI_FUNCTION(0x3, "i2s3"), /* DO1 */
++ SUNXI_FUNCTION(0x5, "i2s3_dout1"), /* DO1 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 9)), /* PH_EINT9 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
+index 4ada80317a3bd..b5c1a8f363f32 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
+@@ -158,26 +158,26 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = {
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 14),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "nand"), /* DQ6 */
++ SUNXI_FUNCTION(0x2, "nand0"), /* DQ6 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D6 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 15),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "nand"), /* DQ7 */
++ SUNXI_FUNCTION(0x2, "nand0"), /* DQ7 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D7 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 16),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "nand"), /* DQS */
++ SUNXI_FUNCTION(0x2, "nand0"), /* DQS */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* RST */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 17),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "nand")), /* CE2 */
++ SUNXI_FUNCTION(0x2, "nand0")), /* CE2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 18),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "nand")), /* CE3 */
++ SUNXI_FUNCTION(0x2, "nand0")), /* CE3 */
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+diff --git a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
+index 2801ca7062732..68a5b627fb9b2 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
++++ b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
+@@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = {
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "lcd"), /* D20 */
+- SUNXI_FUNCTION(0x3, "lvds1"), /* RX */
++ SUNXI_FUNCTION(0x3, "uart2"), /* RX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)),
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+index 862c84efb718f..30ca0fe5c31a4 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+@@ -36,6 +36,13 @@
+ #include "../core.h"
+ #include "pinctrl-sunxi.h"
+
++/*
++ * These lock classes tell lockdep that GPIO IRQs are in a different
++ * category than their parents, so it won't report false recursion.
++ */
++static struct lock_class_key sunxi_pinctrl_irq_lock_class;
++static struct lock_class_key sunxi_pinctrl_irq_request_class;
++
+ static struct irq_chip sunxi_pinctrl_edge_irq_chip;
+ static struct irq_chip sunxi_pinctrl_level_irq_chip;
+
+@@ -537,6 +544,8 @@ static int sunxi_pconf_set(struct pinctrl_dev *pctldev, unsigned pin,
+ struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ int i;
+
++ pin -= pctl->desc->pin_base;
++
+ for (i = 0; i < num_configs; i++) {
+ enum pin_config_param param;
+ unsigned long flags;
+@@ -615,7 +624,7 @@ static int sunxi_pinctrl_set_io_bias_cfg(struct sunxi_pinctrl *pctl,
+ unsigned pin,
+ struct regulator *supply)
+ {
+- unsigned short bank = pin / PINS_PER_BANK;
++ unsigned short bank;
+ unsigned long flags;
+ u32 val, reg;
+ int uV;
+@@ -631,6 +640,9 @@ static int sunxi_pinctrl_set_io_bias_cfg(struct sunxi_pinctrl *pctl,
+ if (uV == 0)
+ return 0;
+
++ pin -= pctl->desc->pin_base;
++ bank = pin / PINS_PER_BANK;
++
+ switch (pctl->desc->io_bias_cfg_variant) {
+ case BIAS_VOLTAGE_GRP_CONFIG:
+ /*
+@@ -648,8 +660,6 @@ static int sunxi_pinctrl_set_io_bias_cfg(struct sunxi_pinctrl *pctl,
+ else
+ val = 0xD; /* 3.3V */
+
+- pin -= pctl->desc->pin_base;
+-
+ reg = readl(pctl->membase + sunxi_grp_config_reg(pin));
+ reg &= ~IO_BIAS_MASK;
+ writel(reg | val, pctl->membase + sunxi_grp_config_reg(pin));
+@@ -1551,6 +1561,8 @@ int sunxi_pinctrl_init_with_variant(struct platform_device *pdev,
+ for (i = 0; i < (pctl->desc->irq_banks * IRQ_PER_BANK); i++) {
+ int irqno = irq_create_mapping(pctl->domain, i);
+
++ irq_set_lockdep_class(irqno, &sunxi_pinctrl_irq_lock_class,
++ &sunxi_pinctrl_irq_request_class);
+ irq_set_chip_and_handler(irqno, &sunxi_pinctrl_edge_irq_chip,
+ handle_edge_irq);
+ irq_set_chip_data(irqno, pctl);
+diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
+index f901d2e43166c..88cbc434c06b2 100644
+--- a/drivers/platform/chrome/Makefile
++++ b/drivers/platform/chrome/Makefile
+@@ -2,6 +2,7 @@
+
+ # tell define_trace.h where to find the cros ec trace header
+ CFLAGS_cros_ec_trace.o:= -I$(src)
++CFLAGS_cros_ec_sensorhub_ring.o:= -I$(src)
+
+ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o
+ obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o
+@@ -20,7 +21,7 @@ obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_chardev.o
+ obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o
+ obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o
+ obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o
+-cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o cros_ec_trace.o
++cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o
+ obj-$(CONFIG_CROS_EC_SENSORHUB) += cros-ec-sensorhub.o
+ obj-$(CONFIG_CROS_EC_SYSFS) += cros_ec_sysfs.o
+ obj-$(CONFIG_CROS_USBPD_LOGGER) += cros_usbpd_logger.o
+diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
+index 4e14b4d6635d7..a2cdbfbaeae6b 100644
+--- a/drivers/platform/chrome/chromeos_laptop.c
++++ b/drivers/platform/chrome/chromeos_laptop.c
+@@ -740,6 +740,7 @@ static int __init
+ chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop,
+ const struct chromeos_laptop *src)
+ {
++ struct i2c_peripheral *i2c_peripherals;
+ struct i2c_peripheral *i2c_dev;
+ struct i2c_board_info *info;
+ int i;
+@@ -748,17 +749,15 @@ chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop,
+ if (!src->num_i2c_peripherals)
+ return 0;
+
+- cros_laptop->i2c_peripherals = kmemdup(src->i2c_peripherals,
+- src->num_i2c_peripherals *
+- sizeof(*src->i2c_peripherals),
+- GFP_KERNEL);
+- if (!cros_laptop->i2c_peripherals)
++ i2c_peripherals = kmemdup(src->i2c_peripherals,
++ src->num_i2c_peripherals *
++ sizeof(*src->i2c_peripherals),
++ GFP_KERNEL);
++ if (!i2c_peripherals)
+ return -ENOMEM;
+
+- cros_laptop->num_i2c_peripherals = src->num_i2c_peripherals;
+-
+- for (i = 0; i < cros_laptop->num_i2c_peripherals; i++) {
+- i2c_dev = &cros_laptop->i2c_peripherals[i];
++ for (i = 0; i < src->num_i2c_peripherals; i++) {
++ i2c_dev = &i2c_peripherals[i];
+ info = &i2c_dev->board_info;
+
+ error = chromeos_laptop_setup_irq(i2c_dev);
+@@ -775,16 +774,19 @@ chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop,
+ }
+ }
+
++ cros_laptop->i2c_peripherals = i2c_peripherals;
++ cros_laptop->num_i2c_peripherals = src->num_i2c_peripherals;
++
+ return 0;
+
+ err_out:
+ while (--i >= 0) {
+- i2c_dev = &cros_laptop->i2c_peripherals[i];
++ i2c_dev = &i2c_peripherals[i];
+ info = &i2c_dev->board_info;
+ if (!IS_ERR_OR_NULL(info->fwnode))
+ fwnode_remove_software_node(info->fwnode);
+ }
+- kfree(cros_laptop->i2c_peripherals);
++ kfree(i2c_peripherals);
+ return error;
+ }
+
+diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c
+index fc5aa1525d13c..9664e13ded590 100644
+--- a/drivers/platform/chrome/cros_ec.c
++++ b/drivers/platform/chrome/cros_ec.c
+@@ -135,16 +135,16 @@ static int cros_ec_sleep_event(struct cros_ec_device *ec_dev, u8 sleep_event)
+ buf.msg.command = EC_CMD_HOST_SLEEP_EVENT;
+
+ ret = cros_ec_cmd_xfer_status(ec_dev, &buf.msg);
+-
+- /* For now, report failure to transition to S0ix with a warning. */
++ /* Report failure to transition to system wide suspend with a warning. */
+ if (ret >= 0 && ec_dev->host_sleep_v1 &&
+- (sleep_event == HOST_SLEEP_EVENT_S0IX_RESUME)) {
++ (sleep_event == HOST_SLEEP_EVENT_S0IX_RESUME ||
++ sleep_event == HOST_SLEEP_EVENT_S3_RESUME)) {
+ ec_dev->last_resume_result =
+ buf.u.resp1.resume_response.sleep_transitions;
+
+ WARN_ONCE(buf.u.resp1.resume_response.sleep_transitions &
+ EC_HOST_RESUME_SLEEP_TIMEOUT,
+- "EC detected sleep transition timeout. Total slp_s0 transitions: %d",
++ "EC detected sleep transition timeout. Total sleep transitions: %d",
+ buf.u.resp1.resume_response.sleep_transitions &
+ EC_HOST_RESUME_SLEEP_TRANSITIONS_MASK);
+ }
+@@ -189,6 +189,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
+ ec_dev->max_request = sizeof(struct ec_params_hello);
+ ec_dev->max_response = sizeof(struct ec_response_get_protocol_info);
+ ec_dev->max_passthru = 0;
++ ec_dev->ec = NULL;
++ ec_dev->pd = NULL;
+
+ ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL);
+ if (!ec_dev->din)
+@@ -245,18 +247,16 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
+ if (IS_ERR(ec_dev->pd)) {
+ dev_err(ec_dev->dev,
+ "Failed to create CrOS PD platform device\n");
+- platform_device_unregister(ec_dev->ec);
+- return PTR_ERR(ec_dev->pd);
++ err = PTR_ERR(ec_dev->pd);
++ goto exit;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
+ err = devm_of_platform_populate(dev);
+ if (err) {
+- platform_device_unregister(ec_dev->pd);
+- platform_device_unregister(ec_dev->ec);
+ dev_err(dev, "Failed to register sub-devices\n");
+- return err;
++ goto exit;
+ }
+ }
+
+@@ -278,7 +278,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
+ err = blocking_notifier_chain_register(&ec_dev->event_notifier,
+ &ec_dev->notifier_ready);
+ if (err)
+- return err;
++ goto exit;
+ }
+
+ dev_info(dev, "Chrome EC device registered\n");
+@@ -291,6 +291,10 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
+ cros_ec_irq_thread(0, ec_dev);
+
+ return 0;
++exit:
++ platform_device_unregister(ec_dev->ec);
++ platform_device_unregister(ec_dev->pd);
++ return err;
+ }
+ EXPORT_SYMBOL(cros_ec_register);
+
+@@ -349,10 +353,16 @@ EXPORT_SYMBOL(cros_ec_suspend);
+
+ static void cros_ec_report_events_during_suspend(struct cros_ec_device *ec_dev)
+ {
++ bool wake_event;
++
+ while (ec_dev->mkbp_event_supported &&
+- cros_ec_get_next_event(ec_dev, NULL, NULL) > 0)
++ cros_ec_get_next_event(ec_dev, &wake_event, NULL) > 0) {
+ blocking_notifier_call_chain(&ec_dev->event_notifier,
+ 1, ec_dev);
++
++ if (wake_event && device_may_wakeup(ec_dev->dev))
++ pm_wakeup_event(ec_dev->dev, 0);
++ }
+ }
+
+ /**
+diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c
+index e0bce869c49a9..d6de5a2941282 100644
+--- a/drivers/platform/chrome/cros_ec_chardev.c
++++ b/drivers/platform/chrome/cros_ec_chardev.c
+@@ -284,7 +284,7 @@ static long cros_ec_chardev_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
+ u_cmd.insize > EC_MAX_MSG_BYTES)
+ return -EINVAL;
+
+- s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize),
++ s_cmd = kzalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize),
+ GFP_KERNEL);
+ if (!s_cmd)
+ return -ENOMEM;
+@@ -301,7 +301,7 @@ static long cros_ec_chardev_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
+ }
+
+ s_cmd->command += ec->cmd_offset;
+- ret = cros_ec_cmd_xfer_status(ec->ec_dev, s_cmd);
++ ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
+ /* Only copy data to userland if data was received. */
+ if (ret < 0)
+ goto exit;
+@@ -327,6 +327,9 @@ static long cros_ec_chardev_ioctl_readmem(struct cros_ec_dev *ec,
+ if (copy_from_user(&s_mem, arg, sizeof(s_mem)))
+ return -EFAULT;
+
++ if (s_mem.bytes > sizeof(s_mem.buffer))
++ return -EINVAL;
++
+ num = ec_dev->cmd_readmem(ec_dev, s_mem.offset, s_mem.bytes,
+ s_mem.buffer);
+ if (num <= 0)
+diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
+index 272c89837d745..0dbceee87a4b1 100644
+--- a/drivers/platform/chrome/cros_ec_debugfs.c
++++ b/drivers/platform/chrome/cros_ec_debugfs.c
+@@ -25,6 +25,9 @@
+
+ #define CIRC_ADD(idx, size, value) (((idx) + (value)) & ((size) - 1))
+
++/* waitqueue for log readers */
++static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq);
++
+ /**
+ * struct cros_ec_debugfs - EC debugging information.
+ *
+@@ -33,7 +36,6 @@
+ * @log_buffer: circular buffer for console log information
+ * @read_msg: preallocated EC command and buffer to read console log
+ * @log_mutex: mutex to protect circular buffer
+- * @log_wq: waitqueue for log readers
+ * @log_poll_work: recurring task to poll EC for new console log data
+ * @panicinfo_blob: panicinfo debugfs blob
+ */
+@@ -44,7 +46,6 @@ struct cros_ec_debugfs {
+ struct circ_buf log_buffer;
+ struct cros_ec_command *read_msg;
+ struct mutex log_mutex;
+- wait_queue_head_t log_wq;
+ struct delayed_work log_poll_work;
+ /* EC panicinfo */
+ struct debugfs_blob_wrapper panicinfo_blob;
+@@ -107,7 +108,7 @@ static void cros_ec_console_log_work(struct work_struct *__work)
+ buf_space--;
+ }
+
+- wake_up(&debug_info->log_wq);
++ wake_up(&cros_ec_debugfs_log_wq);
+ }
+
+ mutex_unlock(&debug_info->log_mutex);
+@@ -141,7 +142,7 @@ static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf,
+
+ mutex_unlock(&debug_info->log_mutex);
+
+- ret = wait_event_interruptible(debug_info->log_wq,
++ ret = wait_event_interruptible(cros_ec_debugfs_log_wq,
+ CIRC_CNT(cb->head, cb->tail, LOG_SIZE));
+ if (ret < 0)
+ return ret;
+@@ -173,7 +174,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file,
+ struct cros_ec_debugfs *debug_info = file->private_data;
+ __poll_t mask = 0;
+
+- poll_wait(file, &debug_info->log_wq, wait);
++ poll_wait(file, &cros_ec_debugfs_log_wq, wait);
+
+ mutex_lock(&debug_info->log_mutex);
+ if (CIRC_CNT(debug_info->log_buffer.head,
+@@ -377,7 +378,6 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info)
+ debug_info->log_buffer.tail = 0;
+
+ mutex_init(&debug_info->log_mutex);
+- init_waitqueue_head(&debug_info->log_wq);
+
+ debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir,
+ debug_info, &cros_ec_console_log_fops);
+diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
+index a7404d69b2d32..9d1f431bdc244 100644
+--- a/drivers/platform/chrome/cros_ec_proto.c
++++ b/drivers/platform/chrome/cros_ec_proto.c
+@@ -507,13 +507,13 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev)
+ ret = cros_ec_get_host_command_version_mask(ec_dev,
+ EC_CMD_GET_NEXT_EVENT,
+ &ver_mask);
+- if (ret < 0 || ver_mask == 0)
++ if (ret < 0 || ver_mask == 0) {
+ ec_dev->mkbp_event_supported = 0;
+- else
++ } else {
+ ec_dev->mkbp_event_supported = fls(ver_mask);
+
+- dev_dbg(ec_dev->dev, "MKBP support version %u\n",
+- ec_dev->mkbp_event_supported - 1);
++ dev_dbg(ec_dev->dev, "MKBP support version %u\n", ec_dev->mkbp_event_supported - 1);
++ }
+
+ /* Probe if host sleep v1 is supported for S0ix failure detection. */
+ ret = cros_ec_get_host_command_version_mask(ec_dev,
+@@ -560,22 +560,28 @@ exit:
+ EXPORT_SYMBOL(cros_ec_query_all);
+
+ /**
+- * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC.
++ * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC.
+ * @ec_dev: EC device.
+ * @msg: Message to write.
+ *
+- * Call this to send a command to the ChromeOS EC. This should be used instead of calling the EC's
+- * cmd_xfer() callback directly. It returns success status only if both the command was transmitted
+- * successfully and the EC replied with success status.
++ * Call this to send a command to the ChromeOS EC. This should be used instead
++ * of calling the EC's cmd_xfer() callback directly. This function does not
++ * convert EC command execution error codes to Linux error codes. Most
++ * in-kernel users will want to use cros_ec_cmd_xfer_status() instead since
++ * that function implements the conversion.
+ *
+ * Return:
+- * >=0 - The number of bytes transferred
+- * <0 - Linux error code
++ * >0 - EC command was executed successfully. The return value is the number
++ * of bytes returned by the EC (excluding the header).
++ * =0 - EC communication was successful. EC command execution results are
++ * reported in msg->result. The result will be EC_RES_SUCCESS if the
++ * command was executed successfully or report an EC command execution
++ * error.
++ * <0 - EC communication error. Return value is the Linux error code.
+ */
+-int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
+- struct cros_ec_command *msg)
++int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, struct cros_ec_command *msg)
+ {
+- int ret, mapped;
++ int ret;
+
+ mutex_lock(&ec_dev->lock);
+ if (ec_dev->proto_version == EC_PROTO_VERSION_UNKNOWN) {
+@@ -616,6 +622,32 @@ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
+ ret = send_command(ec_dev, msg);
+ mutex_unlock(&ec_dev->lock);
+
++ return ret;
++}
++EXPORT_SYMBOL(cros_ec_cmd_xfer);
++
++/**
++ * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC.
++ * @ec_dev: EC device.
++ * @msg: Message to write.
++ *
++ * Call this to send a command to the ChromeOS EC. This should be used instead of calling the EC's
++ * cmd_xfer() callback directly. It returns success status only if both the command was transmitted
++ * successfully and the EC replied with success status.
++ *
++ * Return:
++ * >=0 - The number of bytes transferred.
++ * <0 - Linux error code
++ */
++int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
++ struct cros_ec_command *msg)
++{
++ int ret, mapped;
++
++ ret = cros_ec_cmd_xfer(ec_dev, msg);
++ if (ret < 0)
++ return ret;
++
+ mapped = cros_ec_map_error(msg->result);
+ if (mapped) {
+ dev_dbg(ec_dev->dev, "Command result (err: %d [%d])\n",
+@@ -716,6 +748,7 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev,
+ u8 event_type;
+ u32 host_event;
+ int ret;
++ u32 ver_mask;
+
+ /*
+ * Default value for wake_event.
+@@ -737,6 +770,37 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev,
+ return get_keyboard_state_event(ec_dev);
+
+ ret = get_next_event(ec_dev);
++ /*
++ * -ENOPROTOOPT is returned when EC returns EC_RES_INVALID_VERSION.
++ * This can occur when EC based device (e.g. Fingerprint MCU) jumps to
++ * the RO image which doesn't support newer version of the command. In
++ * this case we will attempt to update maximum supported version of the
++ * EC_CMD_GET_NEXT_EVENT.
++ */
++ if (ret == -ENOPROTOOPT) {
++ dev_dbg(ec_dev->dev,
++ "GET_NEXT_EVENT returned invalid version error.\n");
++ ret = cros_ec_get_host_command_version_mask(ec_dev,
++ EC_CMD_GET_NEXT_EVENT,
++ &ver_mask);
++ if (ret < 0 || ver_mask == 0)
++ /*
++ * Do not change the MKBP supported version if we can't
++ * obtain supported version correctly. Please note that
++ * calling EC_CMD_GET_NEXT_EVENT returned
++ * EC_RES_INVALID_VERSION which means that the command
++ * is present.
++ */
++ return -ENOPROTOOPT;
++
++ ec_dev->mkbp_event_supported = fls(ver_mask);
++ dev_dbg(ec_dev->dev, "MKBP support version changed to %u\n",
++ ec_dev->mkbp_event_supported - 1);
++
++ /* Try to get next event with new MKBP support version set. */
++ ret = get_next_event(ec_dev);
++ }
++
+ if (ret <= 0)
+ return ret;
+
+diff --git a/drivers/platform/chrome/cros_ec_sensorhub_ring.c b/drivers/platform/chrome/cros_ec_sensorhub_ring.c
+index 98e37080f7609..71948dade0e2a 100644
+--- a/drivers/platform/chrome/cros_ec_sensorhub_ring.c
++++ b/drivers/platform/chrome/cros_ec_sensorhub_ring.c
+@@ -17,7 +17,8 @@
+ #include <linux/sort.h>
+ #include <linux/slab.h>
+
+-#include "cros_ec_trace.h"
++#define CREATE_TRACE_POINTS
++#include "cros_ec_sensorhub_trace.h"
+
+ /* Precision of fixed point for the m values from the filter */
+ #define M_PRECISION BIT(23)
+diff --git a/drivers/platform/chrome/cros_ec_sensorhub_trace.h b/drivers/platform/chrome/cros_ec_sensorhub_trace.h
+new file mode 100644
+index 0000000000000..57d9b47859692
+--- /dev/null
++++ b/drivers/platform/chrome/cros_ec_sensorhub_trace.h
+@@ -0,0 +1,123 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Trace events for the ChromeOS Sensorhub kernel module
++ *
++ * Copyright 2021 Google LLC.
++ */
++
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM cros_ec
++
++#if !defined(_CROS_EC_SENSORHUB_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
++#define _CROS_EC_SENSORHUB_TRACE_H_
++
++#include <linux/types.h>
++#include <linux/platform_data/cros_ec_sensorhub.h>
++
++#include <linux/tracepoint.h>
++
++TRACE_EVENT(cros_ec_sensorhub_timestamp,
++ TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp,
++ s64 current_timestamp, s64 current_time),
++ TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp,
++ current_time),
++ TP_STRUCT__entry(
++ __field(u32, ec_sample_timestamp)
++ __field(u32, ec_fifo_timestamp)
++ __field(s64, fifo_timestamp)
++ __field(s64, current_timestamp)
++ __field(s64, current_time)
++ __field(s64, delta)
++ ),
++ TP_fast_assign(
++ __entry->ec_sample_timestamp = ec_sample_timestamp;
++ __entry->ec_fifo_timestamp = ec_fifo_timestamp;
++ __entry->fifo_timestamp = fifo_timestamp;
++ __entry->current_timestamp = current_timestamp;
++ __entry->current_time = current_time;
++ __entry->delta = current_timestamp - current_time;
++ ),
++ TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld",
++ __entry->ec_sample_timestamp,
++ __entry->ec_fifo_timestamp,
++ __entry->fifo_timestamp,
++ __entry->current_timestamp,
++ __entry->current_time,
++ __entry->delta
++ )
++);
++
++TRACE_EVENT(cros_ec_sensorhub_data,
++ TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp,
++ s64 current_timestamp, s64 current_time),
++ TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time),
++ TP_STRUCT__entry(
++ __field(u32, ec_sensor_num)
++ __field(u32, ec_fifo_timestamp)
++ __field(s64, fifo_timestamp)
++ __field(s64, current_timestamp)
++ __field(s64, current_time)
++ __field(s64, delta)
++ ),
++ TP_fast_assign(
++ __entry->ec_sensor_num = ec_sensor_num;
++ __entry->ec_fifo_timestamp = ec_fifo_timestamp;
++ __entry->fifo_timestamp = fifo_timestamp;
++ __entry->current_timestamp = current_timestamp;
++ __entry->current_time = current_time;
++ __entry->delta = current_timestamp - current_time;
++ ),
++ TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld",
++ __entry->ec_sensor_num,
++ __entry->ec_fifo_timestamp,
++ __entry->fifo_timestamp,
++ __entry->current_timestamp,
++ __entry->current_time,
++ __entry->delta
++ )
++);
++
++TRACE_EVENT(cros_ec_sensorhub_filter,
++ TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy),
++ TP_ARGS(state, dx, dy),
++ TP_STRUCT__entry(
++ __field(s64, dx)
++ __field(s64, dy)
++ __field(s64, median_m)
++ __field(s64, median_error)
++ __field(s64, history_len)
++ __field(s64, x)
++ __field(s64, y)
++ ),
++ TP_fast_assign(
++ __entry->dx = dx;
++ __entry->dy = dy;
++ __entry->median_m = state->median_m;
++ __entry->median_error = state->median_error;
++ __entry->history_len = state->history_len;
++ __entry->x = state->x_offset;
++ __entry->y = state->y_offset;
++ ),
++ TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld",
++ __entry->dx,
++ __entry->dy,
++ __entry->median_m,
++ __entry->median_error,
++ __entry->history_len,
++ __entry->x,
++ __entry->y
++ )
++);
++
++
++#endif /* _CROS_EC_SENSORHUB_TRACE_H_ */
++
++/* this part must be outside header guard */
++
++#undef TRACE_INCLUDE_PATH
++#define TRACE_INCLUDE_PATH .
++
++#undef TRACE_INCLUDE_FILE
++#define TRACE_INCLUDE_FILE cros_ec_sensorhub_trace
++
++#include <trace/define_trace.h>
+diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h
+index 7e7cfc98657a4..9bb5cd2c98b8b 100644
+--- a/drivers/platform/chrome/cros_ec_trace.h
++++ b/drivers/platform/chrome/cros_ec_trace.h
+@@ -15,7 +15,6 @@
+ #include <linux/types.h>
+ #include <linux/platform_data/cros_ec_commands.h>
+ #include <linux/platform_data/cros_ec_proto.h>
+-#include <linux/platform_data/cros_ec_sensorhub.h>
+
+ #include <linux/tracepoint.h>
+
+@@ -71,100 +70,6 @@ TRACE_EVENT(cros_ec_request_done,
+ __entry->retval)
+ );
+
+-TRACE_EVENT(cros_ec_sensorhub_timestamp,
+- TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp,
+- s64 current_timestamp, s64 current_time),
+- TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp,
+- current_time),
+- TP_STRUCT__entry(
+- __field(u32, ec_sample_timestamp)
+- __field(u32, ec_fifo_timestamp)
+- __field(s64, fifo_timestamp)
+- __field(s64, current_timestamp)
+- __field(s64, current_time)
+- __field(s64, delta)
+- ),
+- TP_fast_assign(
+- __entry->ec_sample_timestamp = ec_sample_timestamp;
+- __entry->ec_fifo_timestamp = ec_fifo_timestamp;
+- __entry->fifo_timestamp = fifo_timestamp;
+- __entry->current_timestamp = current_timestamp;
+- __entry->current_time = current_time;
+- __entry->delta = current_timestamp - current_time;
+- ),
+- TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld",
+- __entry->ec_sample_timestamp,
+- __entry->ec_fifo_timestamp,
+- __entry->fifo_timestamp,
+- __entry->current_timestamp,
+- __entry->current_time,
+- __entry->delta
+- )
+-);
+-
+-TRACE_EVENT(cros_ec_sensorhub_data,
+- TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp,
+- s64 current_timestamp, s64 current_time),
+- TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time),
+- TP_STRUCT__entry(
+- __field(u32, ec_sensor_num)
+- __field(u32, ec_fifo_timestamp)
+- __field(s64, fifo_timestamp)
+- __field(s64, current_timestamp)
+- __field(s64, current_time)
+- __field(s64, delta)
+- ),
+- TP_fast_assign(
+- __entry->ec_sensor_num = ec_sensor_num;
+- __entry->ec_fifo_timestamp = ec_fifo_timestamp;
+- __entry->fifo_timestamp = fifo_timestamp;
+- __entry->current_timestamp = current_timestamp;
+- __entry->current_time = current_time;
+- __entry->delta = current_timestamp - current_time;
+- ),
+- TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld",
+- __entry->ec_sensor_num,
+- __entry->ec_fifo_timestamp,
+- __entry->fifo_timestamp,
+- __entry->current_timestamp,
+- __entry->current_time,
+- __entry->delta
+- )
+-);
+-
+-TRACE_EVENT(cros_ec_sensorhub_filter,
+- TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy),
+- TP_ARGS(state, dx, dy),
+- TP_STRUCT__entry(
+- __field(s64, dx)
+- __field(s64, dy)
+- __field(s64, median_m)
+- __field(s64, median_error)
+- __field(s64, history_len)
+- __field(s64, x)
+- __field(s64, y)
+- ),
+- TP_fast_assign(
+- __entry->dx = dx;
+- __entry->dy = dy;
+- __entry->median_m = state->median_m;
+- __entry->median_error = state->median_error;
+- __entry->history_len = state->history_len;
+- __entry->x = state->x_offset;
+- __entry->y = state->y_offset;
+- ),
+- TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld",
+- __entry->dx,
+- __entry->dy,
+- __entry->median_m,
+- __entry->median_error,
+- __entry->history_len,
+- __entry->x,
+- __entry->y
+- )
+-);
+-
+-
+ #endif /* _CROS_EC_TRACE_H_ */
+
+ /* this part must be outside header guard */
+diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c
+index 262a891eded34..b94abb8f7706a 100644
+--- a/drivers/platform/chrome/cros_ec_typec.c
++++ b/drivers/platform/chrome/cros_ec_typec.c
+@@ -156,12 +156,12 @@ static int cros_typec_get_switch_handles(struct cros_typec_port *port,
+ return 0;
+
+ role_sw_err:
+- usb_role_switch_put(port->role_sw);
+-ori_sw_err:
+ typec_switch_put(port->ori_sw);
+-mux_err:
++ port->ori_sw = NULL;
++ori_sw_err:
+ typec_mux_put(port->mux);
+-
++ port->mux = NULL;
++mux_err:
+ return -ENODEV;
+ }
+
+@@ -691,7 +691,7 @@ static int cros_typec_register_altmodes(struct cros_typec_data *typec, int port_
+ for (j = 0; j < sop_disc->svids[i].mode_count; j++) {
+ memset(&desc, 0, sizeof(desc));
+ desc.svid = sop_disc->svids[i].svid;
+- desc.mode = j;
++ desc.mode = j + 1;
+ desc.vdo = sop_disc->svids[i].mode_vdo[j];
+
+ if (is_partner)
+@@ -1106,7 +1106,13 @@ static int cros_typec_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ typec->dev = dev;
++
+ typec->ec = dev_get_drvdata(pdev->dev.parent);
++ if (!typec->ec) {
++ dev_err(dev, "couldn't find parent EC device\n");
++ return -ENODEV;
++ }
++
+ platform_set_drvdata(pdev, typec);
+
+ ret = cros_typec_get_cmd_version(typec);
+diff --git a/drivers/platform/chrome/cros_usbpd_notify.c b/drivers/platform/chrome/cros_usbpd_notify.c
+index 48a6617aa12f3..de76de6f50900 100644
+--- a/drivers/platform/chrome/cros_usbpd_notify.c
++++ b/drivers/platform/chrome/cros_usbpd_notify.c
+@@ -285,7 +285,11 @@ static int __init cros_usbpd_notify_init(void)
+ return ret;
+
+ #ifdef CONFIG_ACPI
+- platform_driver_register(&cros_usbpd_notify_acpi_driver);
++ ret = platform_driver_register(&cros_usbpd_notify_acpi_driver);
++ if (ret) {
++ platform_driver_unregister(&cros_usbpd_notify_plat_driver);
++ return ret;
++ }
+ #endif
+ return 0;
+ }
+diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c
+index 04bc3b50aa7a4..2d4bbe99959ef 100644
+--- a/drivers/platform/mellanox/mlxbf-pmc.c
++++ b/drivers/platform/mellanox/mlxbf-pmc.c
+@@ -191,6 +191,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_smgen_events[] = {
+ };
+
+ static const struct mlxbf_pmc_events mlxbf_pmc_trio_events_1[] = {
++ { 0x0, "DISABLE" },
+ { 0xa0, "TPIO_DATA_BEAT" },
+ { 0xa1, "TDMA_DATA_BEAT" },
+ { 0xa2, "MAP_DATA_BEAT" },
+@@ -214,6 +215,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_trio_events_1[] = {
+ };
+
+ static const struct mlxbf_pmc_events mlxbf_pmc_trio_events_2[] = {
++ { 0x0, "DISABLE" },
+ { 0xa0, "TPIO_DATA_BEAT" },
+ { 0xa1, "TDMA_DATA_BEAT" },
+ { 0xa2, "MAP_DATA_BEAT" },
+@@ -246,6 +248,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_trio_events_2[] = {
+ };
+
+ static const struct mlxbf_pmc_events mlxbf_pmc_ecc_events[] = {
++ { 0x0, "DISABLE" },
+ { 0x100, "ECC_SINGLE_ERROR_CNT" },
+ { 0x104, "ECC_DOUBLE_ERROR_CNT" },
+ { 0x114, "SERR_INJ" },
+@@ -258,6 +261,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_ecc_events[] = {
+ };
+
+ static const struct mlxbf_pmc_events mlxbf_pmc_mss_events[] = {
++ { 0x0, "DISABLE" },
+ { 0xc0, "RXREQ_MSS" },
+ { 0xc1, "RXDAT_MSS" },
+ { 0xc2, "TXRSP_MSS" },
+@@ -265,6 +269,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_mss_events[] = {
+ };
+
+ static const struct mlxbf_pmc_events mlxbf_pmc_hnf_events[] = {
++ { 0x0, "DISABLE" },
+ { 0x45, "HNF_REQUESTS" },
+ { 0x46, "HNF_REJECTS" },
+ { 0x47, "ALL_BUSY" },
+@@ -323,6 +328,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_hnf_events[] = {
+ };
+
+ static const struct mlxbf_pmc_events mlxbf_pmc_hnfnet_events[] = {
++ { 0x0, "DISABLE" },
+ { 0x12, "CDN_REQ" },
+ { 0x13, "DDN_REQ" },
+ { 0x14, "NDN_REQ" },
+@@ -358,7 +364,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_hnfnet_events[] = {
+ { 0x32, "DDN_DIAG_W_INGRESS" },
+ { 0x33, "DDN_DIAG_C_INGRESS" },
+ { 0x34, "DDN_DIAG_CORE_SENT" },
+- { 0x35, "NDN_DIAG_S_OUT_OF_CRED" },
++ { 0x35, "NDN_DIAG_N_OUT_OF_CRED" },
+ { 0x36, "NDN_DIAG_S_OUT_OF_CRED" },
+ { 0x37, "NDN_DIAG_E_OUT_OF_CRED" },
+ { 0x38, "NDN_DIAG_W_OUT_OF_CRED" },
+@@ -892,7 +898,7 @@ static int mlxbf_pmc_read_event(int blk_num, uint32_t cnt_num, bool is_l3,
+ uint64_t *result)
+ {
+ uint32_t perfcfg_offset, perfval_offset;
+- uint64_t perfmon_cfg, perfevt, perfctl;
++ uint64_t perfmon_cfg, perfevt;
+
+ if (cnt_num >= pmc->block[blk_num].counters)
+ return -EINVAL;
+@@ -904,25 +910,6 @@ static int mlxbf_pmc_read_event(int blk_num, uint32_t cnt_num, bool is_l3,
+ perfval_offset = perfcfg_offset +
+ pmc->block[blk_num].counters * MLXBF_PMC_REG_SIZE;
+
+- /* Set counter in "read" mode */
+- perfmon_cfg = FIELD_PREP(MLXBF_PMC_PERFMON_CONFIG_ADDR,
+- MLXBF_PMC_PERFCTL);
+- perfmon_cfg |= FIELD_PREP(MLXBF_PMC_PERFMON_CONFIG_STROBE, 1);
+- perfmon_cfg |= FIELD_PREP(MLXBF_PMC_PERFMON_CONFIG_WR_R_B, 0);
+-
+- if (mlxbf_pmc_write(pmc->block[blk_num].mmio_base + perfcfg_offset,
+- MLXBF_PMC_WRITE_REG_64, perfmon_cfg))
+- return -EFAULT;
+-
+- /* Check if the counter is enabled */
+-
+- if (mlxbf_pmc_read(pmc->block[blk_num].mmio_base + perfval_offset,
+- MLXBF_PMC_READ_REG_64, &perfctl))
+- return -EFAULT;
+-
+- if (!FIELD_GET(MLXBF_PMC_PERFCTL_EN0, perfctl))
+- return -EINVAL;
+-
+ /* Set counter in "read" mode */
+ perfmon_cfg = FIELD_PREP(MLXBF_PMC_PERFMON_CONFIG_ADDR,
+ MLXBF_PMC_PERFEVT);
+@@ -1008,7 +995,7 @@ static ssize_t mlxbf_pmc_counter_show(struct device *dev,
+ } else
+ return -EINVAL;
+
+- return sprintf(buf, "0x%llx\n", value);
++ return sysfs_emit(buf, "0x%llx\n", value);
+ }
+
+ /* Store function for "counter" sysfs files */
+@@ -1078,13 +1065,13 @@ static ssize_t mlxbf_pmc_event_show(struct device *dev,
+
+ err = mlxbf_pmc_read_event(blk_num, cnt_num, is_l3, &evt_num);
+ if (err)
+- return sprintf(buf, "No event being monitored\n");
++ return sysfs_emit(buf, "No event being monitored\n");
+
+ evt_name = mlxbf_pmc_get_event_name(pmc->block_name[blk_num], evt_num);
+ if (!evt_name)
+ return -EINVAL;
+
+- return sprintf(buf, "0x%llx: %s\n", evt_num, evt_name);
++ return sysfs_emit(buf, "0x%llx: %s\n", evt_num, evt_name);
+ }
+
+ /* Store function for "event" sysfs files */
+@@ -1139,9 +1126,9 @@ static ssize_t mlxbf_pmc_event_list_show(struct device *dev,
+ return -EINVAL;
+
+ for (i = 0, buf[0] = '\0'; i < size; ++i) {
+- len += sprintf(e_info, "0x%x: %s\n", events[i].evt_num,
+- events[i].evt_name);
+- if (len > PAGE_SIZE)
++ len += snprintf(e_info, sizeof(e_info), "0x%x: %s\n",
++ events[i].evt_num, events[i].evt_name);
++ if (len >= PAGE_SIZE)
+ break;
+ strcat(buf, e_info);
+ ret = len;
+@@ -1168,7 +1155,7 @@ static ssize_t mlxbf_pmc_enable_show(struct device *dev,
+
+ value = FIELD_GET(MLXBF_PMC_L3C_PERF_CNT_CFG_EN, perfcnt_cfg);
+
+- return sprintf(buf, "%d\n", value);
++ return sysfs_emit(buf, "%d\n", value);
+ }
+
+ /* Store function for "enable" sysfs files - only for l3cache */
+@@ -1348,9 +1335,8 @@ static int mlxbf_pmc_map_counters(struct device *dev)
+
+ for (i = 0; i < pmc->total_blocks; ++i) {
+ if (strstr(pmc->block_name[i], "tile")) {
+- ret = sscanf(pmc->block_name[i], "tile%d", &tile_num);
+- if (ret < 0)
+- return ret;
++ if (sscanf(pmc->block_name[i], "tile%d", &tile_num) != 1)
++ return -EINVAL;
+
+ if (tile_num >= pmc->tile_count)
+ continue;
+@@ -1374,8 +1360,8 @@ static int mlxbf_pmc_map_counters(struct device *dev)
+ pmc->block[i].counters = info[2];
+ pmc->block[i].type = info[3];
+
+- if (IS_ERR(pmc->block[i].mmio_base))
+- return PTR_ERR(pmc->block[i].mmio_base);
++ if (!pmc->block[i].mmio_base)
++ return -ENOMEM;
+
+ ret = mlxbf_pmc_create_groups(dev, i);
+ if (ret)
+diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
+index 38800e86ed8ad..194f3205e5597 100644
+--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
++++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
+@@ -56,6 +56,7 @@ struct mlxbf_tmfifo;
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
++ * @drop_desc: dummy desc for packet dropping
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+@@ -72,6 +73,7 @@ struct mlxbf_tmfifo_vring {
+ struct virtqueue *vq;
+ struct vring_desc *desc;
+ struct vring_desc *desc_head;
++ struct vring_desc drop_desc;
+ int cur_len;
+ int rem_len;
+ u32 pkt_len;
+@@ -83,6 +85,14 @@ struct mlxbf_tmfifo_vring {
+ struct mlxbf_tmfifo *fifo;
+ };
+
++/* Check whether vring is in drop mode. */
++#define IS_VRING_DROP(_r) ({ \
++ typeof(_r) (r) = (_r); \
++ (r->desc_head == &r->drop_desc ? true : false); })
++
++/* A stub length to drop maximum length packet. */
++#define VRING_DROP_DESC_MAX_LEN GENMASK(15, 0)
++
+ /* Interrupt types. */
+ enum {
+ MLXBF_TM_RX_LWM_IRQ,
+@@ -195,7 +205,7 @@ static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+ static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+ /* Maximum L2 header length. */
+-#define MLXBF_TMFIFO_NET_L2_OVERHEAD 36
++#define MLXBF_TMFIFO_NET_L2_OVERHEAD (ETH_HLEN + VLAN_HLEN)
+
+ /* Supported virtio-net features. */
+ #define MLXBF_TMFIFO_NET_FEATURES \
+@@ -243,6 +253,7 @@ static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+ vring->align = SMP_CACHE_BYTES;
+ vring->index = i;
+ vring->vdev_id = tm_vdev->vdev.id.device;
++ vring->drop_desc.len = VRING_DROP_DESC_MAX_LEN;
+ dev = &tm_vdev->vdev.dev;
+
+ size = vring_size(vring->num, vring->align);
+@@ -348,7 +359,7 @@ static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+ return len;
+ }
+
+-static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
++static void mlxbf_tmfifo_release_pkt(struct mlxbf_tmfifo_vring *vring)
+ {
+ struct vring_desc *desc_head;
+ u32 len = 0;
+@@ -577,19 +588,25 @@ static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+
+ if (vring->cur_len + sizeof(u64) <= len) {
+ /* The whole word. */
+- if (is_rx)
+- memcpy(addr + vring->cur_len, &data, sizeof(u64));
+- else
+- memcpy(&data, addr + vring->cur_len, sizeof(u64));
++ if (!IS_VRING_DROP(vring)) {
++ if (is_rx)
++ memcpy(addr + vring->cur_len, &data,
++ sizeof(u64));
++ else
++ memcpy(&data, addr + vring->cur_len,
++ sizeof(u64));
++ }
+ vring->cur_len += sizeof(u64);
+ } else {
+ /* Leftover bytes. */
+- if (is_rx)
+- memcpy(addr + vring->cur_len, &data,
+- len - vring->cur_len);
+- else
+- memcpy(&data, addr + vring->cur_len,
+- len - vring->cur_len);
++ if (!IS_VRING_DROP(vring)) {
++ if (is_rx)
++ memcpy(addr + vring->cur_len, &data,
++ len - vring->cur_len);
++ else
++ memcpy(&data, addr + vring->cur_len,
++ len - vring->cur_len);
++ }
+ vring->cur_len = len;
+ }
+
+@@ -606,13 +623,14 @@ static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+ * flag is set.
+ */
+ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+- struct vring_desc *desc,
++ struct vring_desc **desc,
+ bool is_rx, bool *vring_change)
+ {
+ struct mlxbf_tmfifo *fifo = vring->fifo;
+ struct virtio_net_config *config;
+ struct mlxbf_tmfifo_msg_hdr hdr;
+ int vdev_id, hdr_len;
++ bool drop_rx = false;
+
+ /* Read/Write packet header. */
+ if (is_rx) {
+@@ -632,8 +650,8 @@ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+ if (ntohs(hdr.len) >
+ __virtio16_to_cpu(virtio_legacy_is_little_endian(),
+ config->mtu) +
+- MLXBF_TMFIFO_NET_L2_OVERHEAD)
+- return;
++ MLXBF_TMFIFO_NET_L2_OVERHEAD)
++ drop_rx = true;
+ } else {
+ vdev_id = VIRTIO_ID_CONSOLE;
+ hdr_len = 0;
+@@ -648,16 +666,25 @@ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+
+ if (!tm_dev2)
+ return;
+- vring->desc = desc;
++ vring->desc = *desc;
+ vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+ *vring_change = true;
+ }
++
++ if (drop_rx && !IS_VRING_DROP(vring)) {
++ if (vring->desc_head)
++ mlxbf_tmfifo_release_pkt(vring);
++ *desc = &vring->drop_desc;
++ vring->desc_head = *desc;
++ vring->desc = *desc;
++ }
++
+ vring->pkt_len = ntohs(hdr.len) + hdr_len;
+ } else {
+ /* Network virtio has an extra header. */
+ hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+ sizeof(struct virtio_net_hdr) : 0;
+- vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
++ vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, *desc);
+ hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+ VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+ hdr.len = htons(vring->pkt_len - hdr_len);
+@@ -690,15 +717,23 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+ /* Get the descriptor of the next packet. */
+ if (!vring->desc) {
+ desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+- if (!desc)
+- return false;
++ if (!desc) {
++ /* Drop next Rx packet to avoid stuck. */
++ if (is_rx) {
++ desc = &vring->drop_desc;
++ vring->desc_head = desc;
++ vring->desc = desc;
++ } else {
++ return false;
++ }
++ }
+ } else {
+ desc = vring->desc;
+ }
+
+ /* Beginning of a packet. Start to Rx/Tx packet header. */
+ if (vring->pkt_len == 0) {
+- mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
++ mlxbf_tmfifo_rxtx_header(vring, &desc, is_rx, &vring_change);
+ (*avail)--;
+
+ /* Return if new packet is for another ring. */
+@@ -724,17 +759,24 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+ vring->rem_len -= len;
+
+ /* Get the next desc on the chain. */
+- if (vring->rem_len > 0 &&
++ if (!IS_VRING_DROP(vring) && vring->rem_len > 0 &&
+ (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+ idx = virtio16_to_cpu(vdev, desc->next);
+ desc = &vr->desc[idx];
+ goto mlxbf_tmfifo_desc_done;
+ }
+
+- /* Done and release the pending packet. */
+- mlxbf_tmfifo_release_pending_pkt(vring);
++ /* Done and release the packet. */
+ desc = NULL;
+ fifo->vring[is_rx] = NULL;
++ if (!IS_VRING_DROP(vring)) {
++ mlxbf_tmfifo_release_pkt(vring);
++ } else {
++ vring->pkt_len = 0;
++ vring->desc_head = NULL;
++ vring->desc = NULL;
++ return false;
++ }
+
+ /*
+ * Make sure the load/store are in order before
+@@ -868,6 +910,7 @@ static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+ tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+ mlxbf_tmfifo_console_output(tm_vdev, vring);
+ spin_unlock_irqrestore(&fifo->spin_lock[0], flags);
++ set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+ } else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+ &fifo->pend_events)) {
+ return true;
+@@ -913,7 +956,7 @@ static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+
+ /* Release the pending packet. */
+ if (vring->desc)
+- mlxbf_tmfifo_release_pending_pkt(vring);
++ mlxbf_tmfifo_release_pkt(vring);
+ vq = vring->vq;
+ if (vq) {
+ vring->vq = NULL;
+diff --git a/drivers/platform/mips/Kconfig b/drivers/platform/mips/Kconfig
+index 8ac149173c64b..495da331ca2db 100644
+--- a/drivers/platform/mips/Kconfig
++++ b/drivers/platform/mips/Kconfig
+@@ -17,7 +17,7 @@ menuconfig MIPS_PLATFORM_DEVICES
+ if MIPS_PLATFORM_DEVICES
+
+ config CPU_HWMON
+- tristate "Loongson-3 CPU HWMon Driver"
++ bool "Loongson-3 CPU HWMon Driver"
+ depends on MACH_LOONGSON64
+ select HWMON
+ default y
+diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c
+index 386389ffec419..d8c5f9195f85f 100644
+--- a/drivers/platform/mips/cpu_hwmon.c
++++ b/drivers/platform/mips/cpu_hwmon.c
+@@ -55,55 +55,6 @@ out:
+ static int nr_packages;
+ static struct device *cpu_hwmon_dev;
+
+-static SENSOR_DEVICE_ATTR(name, 0444, NULL, NULL, 0);
+-
+-static struct attribute *cpu_hwmon_attributes[] = {
+- &sensor_dev_attr_name.dev_attr.attr,
+- NULL
+-};
+-
+-/* Hwmon device attribute group */
+-static struct attribute_group cpu_hwmon_attribute_group = {
+- .attrs = cpu_hwmon_attributes,
+-};
+-
+-static ssize_t get_cpu_temp(struct device *dev,
+- struct device_attribute *attr, char *buf);
+-static ssize_t cpu_temp_label(struct device *dev,
+- struct device_attribute *attr, char *buf);
+-
+-static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1);
+-static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1);
+-static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2);
+-static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2);
+-static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3);
+-static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3);
+-static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4);
+-static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4);
+-
+-static const struct attribute *hwmon_cputemp[4][3] = {
+- {
+- &sensor_dev_attr_temp1_input.dev_attr.attr,
+- &sensor_dev_attr_temp1_label.dev_attr.attr,
+- NULL
+- },
+- {
+- &sensor_dev_attr_temp2_input.dev_attr.attr,
+- &sensor_dev_attr_temp2_label.dev_attr.attr,
+- NULL
+- },
+- {
+- &sensor_dev_attr_temp3_input.dev_attr.attr,
+- &sensor_dev_attr_temp3_label.dev_attr.attr,
+- NULL
+- },
+- {
+- &sensor_dev_attr_temp4_input.dev_attr.attr,
+- &sensor_dev_attr_temp4_label.dev_attr.attr,
+- NULL
+- }
+-};
+-
+ static ssize_t cpu_temp_label(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
+@@ -121,24 +72,47 @@ static ssize_t get_cpu_temp(struct device *dev,
+ return sprintf(buf, "%d\n", value);
+ }
+
+-static int create_sysfs_cputemp_files(struct kobject *kobj)
+-{
+- int i, ret = 0;
+-
+- for (i = 0; i < nr_packages; i++)
+- ret = sysfs_create_files(kobj, hwmon_cputemp[i]);
++static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1);
++static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1);
++static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2);
++static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2);
++static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3);
++static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3);
++static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4);
++static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4);
+
+- return ret;
+-}
++static struct attribute *cpu_hwmon_attributes[] = {
++ &sensor_dev_attr_temp1_input.dev_attr.attr,
++ &sensor_dev_attr_temp1_label.dev_attr.attr,
++ &sensor_dev_attr_temp2_input.dev_attr.attr,
++ &sensor_dev_attr_temp2_label.dev_attr.attr,
++ &sensor_dev_attr_temp3_input.dev_attr.attr,
++ &sensor_dev_attr_temp3_label.dev_attr.attr,
++ &sensor_dev_attr_temp4_input.dev_attr.attr,
++ &sensor_dev_attr_temp4_label.dev_attr.attr,
++ NULL
++};
+
+-static void remove_sysfs_cputemp_files(struct kobject *kobj)
++static umode_t cpu_hwmon_is_visible(struct kobject *kobj,
++ struct attribute *attr, int i)
+ {
+- int i;
++ int id = i / 2;
+
+- for (i = 0; i < nr_packages; i++)
+- sysfs_remove_files(kobj, hwmon_cputemp[i]);
++ if (id < nr_packages)
++ return attr->mode;
++ return 0;
+ }
+
++static struct attribute_group cpu_hwmon_group = {
++ .attrs = cpu_hwmon_attributes,
++ .is_visible = cpu_hwmon_is_visible,
++};
++
++static const struct attribute_group *cpu_hwmon_groups[] = {
++ &cpu_hwmon_group,
++ NULL
++};
++
+ #define CPU_THERMAL_THRESHOLD 90000
+ static struct delayed_work thermal_work;
+
+@@ -159,50 +133,31 @@ static void do_thermal_timer(struct work_struct *work)
+
+ static int __init loongson_hwmon_init(void)
+ {
+- int ret;
+-
+ pr_info("Loongson Hwmon Enter...\n");
+
+ if (cpu_has_csr())
+ csr_temp_enable = csr_readl(LOONGSON_CSR_FEATURES) &
+ LOONGSON_CSRF_TEMP;
+
+- cpu_hwmon_dev = hwmon_device_register_with_info(NULL, "cpu_hwmon", NULL, NULL, NULL);
+- if (IS_ERR(cpu_hwmon_dev)) {
+- ret = PTR_ERR(cpu_hwmon_dev);
+- pr_err("hwmon_device_register fail!\n");
+- goto fail_hwmon_device_register;
+- }
+-
+ nr_packages = loongson_sysconf.nr_cpus /
+ loongson_sysconf.cores_per_package;
+
+- ret = create_sysfs_cputemp_files(&cpu_hwmon_dev->kobj);
+- if (ret) {
+- pr_err("fail to create cpu temperature interface!\n");
+- goto fail_create_sysfs_cputemp_files;
++ cpu_hwmon_dev = hwmon_device_register_with_groups(NULL, "cpu_hwmon",
++ NULL, cpu_hwmon_groups);
++ if (IS_ERR(cpu_hwmon_dev)) {
++ pr_err("hwmon_device_register fail!\n");
++ return PTR_ERR(cpu_hwmon_dev);
+ }
+
+ INIT_DEFERRABLE_WORK(&thermal_work, do_thermal_timer);
+ schedule_delayed_work(&thermal_work, msecs_to_jiffies(20000));
+
+- return ret;
+-
+-fail_create_sysfs_cputemp_files:
+- sysfs_remove_group(&cpu_hwmon_dev->kobj,
+- &cpu_hwmon_attribute_group);
+- hwmon_device_unregister(cpu_hwmon_dev);
+-
+-fail_hwmon_device_register:
+- return ret;
++ return 0;
+ }
+
+ static void __exit loongson_hwmon_exit(void)
+ {
+ cancel_delayed_work_sync(&thermal_work);
+- remove_sysfs_cputemp_files(&cpu_hwmon_dev->kobj);
+- sysfs_remove_group(&cpu_hwmon_dev->kobj,
+- &cpu_hwmon_attribute_group);
+ hwmon_device_unregister(cpu_hwmon_dev);
+ }
+
+diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c
+index 4ff5c3a12991c..921520475ff68 100644
+--- a/drivers/platform/olpc/olpc-ec.c
++++ b/drivers/platform/olpc/olpc-ec.c
+@@ -264,7 +264,7 @@ static ssize_t ec_dbgfs_cmd_write(struct file *file, const char __user *buf,
+ int i, m;
+ unsigned char ec_cmd[EC_MAX_CMD_ARGS];
+ unsigned int ec_cmd_int[EC_MAX_CMD_ARGS];
+- char cmdbuf[64];
++ char cmdbuf[64] = "";
+ int ec_cmd_bytes;
+
+ mutex_lock(&ec_dbgfs_lock);
+diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
+index b8c377b3f9321..5542b768890c9 100644
+--- a/drivers/platform/surface/aggregator/controller.c
++++ b/drivers/platform/surface/aggregator/controller.c
+@@ -825,7 +825,7 @@ static int ssam_cplt_init(struct ssam_cplt *cplt, struct device *dev)
+
+ cplt->dev = dev;
+
+- cplt->wq = create_workqueue(SSAM_CPLT_WQ_NAME);
++ cplt->wq = alloc_workqueue(SSAM_CPLT_WQ_NAME, WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ if (!cplt->wq)
+ return -ENOMEM;
+
+@@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl,
+ return status;
+
+ status = ssam_request_sync_init(rqst, spec->flags);
+- if (status)
++ if (status) {
++ ssam_request_sync_free(rqst);
+ return status;
++ }
+
+ ssam_request_sync_set_resp(rqst, rsp);
+
+diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c
+index c61bbeeec2dfd..54f86df77a37b 100644
+--- a/drivers/platform/surface/aggregator/core.c
++++ b/drivers/platform/surface/aggregator/core.c
+@@ -816,7 +816,7 @@ err_cpkg:
+ err_bus:
+ return status;
+ }
+-module_init(ssam_core_init);
++subsys_initcall(ssam_core_init);
+
+ static void __exit ssam_core_exit(void)
+ {
+diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.c b/drivers/platform/surface/aggregator/ssh_packet_layer.c
+index 8a4451c1ffe57..a652c2763175e 100644
+--- a/drivers/platform/surface/aggregator/ssh_packet_layer.c
++++ b/drivers/platform/surface/aggregator/ssh_packet_layer.c
+@@ -1596,16 +1596,32 @@ static void ssh_ptl_timeout_reap(struct work_struct *work)
+ ssh_ptl_tx_wakeup_packet(ptl);
+ }
+
+-static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, u8 seq)
++static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, const struct ssh_frame *frame)
+ {
+ int i;
+
++ /*
++ * Ignore unsequenced packets. On some devices (notably Surface Pro 9),
++ * unsequenced events will always be sent with SEQ=0x00. Attempting to
++ * detect retransmission would thus just block all events.
++ *
++ * While sequence numbers would also allow detection of retransmitted
++ * packets in unsequenced communication, they have only ever been used
++ * to cover edge-cases in sequenced transmission. In particular, the
++ * only instance of packets being retransmitted (that we are aware of)
++ * is due to an ACK timeout. As this does not happen in unsequenced
++ * communication, skip the retransmission check for those packets
++ * entirely.
++ */
++ if (frame->type == SSH_FRAME_TYPE_DATA_NSQ)
++ return false;
++
+ /*
+ * Check if SEQ has been seen recently (i.e. packet was
+ * re-transmitted and we should ignore it).
+ */
+ for (i = 0; i < ARRAY_SIZE(ptl->rx.blocked.seqs); i++) {
+- if (likely(ptl->rx.blocked.seqs[i] != seq))
++ if (likely(ptl->rx.blocked.seqs[i] != frame->seq))
+ continue;
+
+ ptl_dbg(ptl, "ptl: ignoring repeated data packet\n");
+@@ -1613,7 +1629,7 @@ static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, u8 seq)
+ }
+
+ /* Update list of blocked sequence IDs. */
+- ptl->rx.blocked.seqs[ptl->rx.blocked.offset] = seq;
++ ptl->rx.blocked.seqs[ptl->rx.blocked.offset] = frame->seq;
+ ptl->rx.blocked.offset = (ptl->rx.blocked.offset + 1)
+ % ARRAY_SIZE(ptl->rx.blocked.seqs);
+
+@@ -1624,7 +1640,7 @@ static void ssh_ptl_rx_dataframe(struct ssh_ptl *ptl,
+ const struct ssh_frame *frame,
+ const struct ssam_span *payload)
+ {
+- if (ssh_ptl_rx_retransmit_check(ptl, frame->seq))
++ if (ssh_ptl_rx_retransmit_check(ptl, frame))
+ return;
+
+ ptl->ops.data_received(ptl, payload);
+diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c
+index 790f7f0eee98b..7c0b637c91fc8 100644
+--- a/drivers/platform/surface/aggregator/ssh_request_layer.c
++++ b/drivers/platform/surface/aggregator/ssh_request_layer.c
+@@ -916,6 +916,20 @@ static void ssh_rtl_rx_command(struct ssh_ptl *p, const struct ssam_span *data)
+ if (sshp_parse_command(dev, data, &command, &command_data))
+ return;
+
++ /*
++ * Check if the message was intended for us. If not, drop it.
++ *
++ * Note: We will need to change this to handle debug messages. On newer
++ * generation devices, these seem to be sent to tid_out=0x03. We as
++ * host can still receive them as they can be forwarded via an override
++ * option on SAM, but doing so does not change tid_out=0x00.
++ */
++ if (command->tid_out != 0x00) {
++ rtl_warn(rtl, "rtl: dropping message not intended for us (tid = %#04x)\n",
++ command->tid_out);
++ return;
++ }
++
+ if (ssh_rqid_is_event(get_unaligned_le16(&command->rqid)))
+ ssh_rtl_rx_event(rtl, command, &command_data);
+ else
+diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c
+index 90c1568ea4e09..3cc004c68bdba 100644
+--- a/drivers/platform/surface/surface3_power.c
++++ b/drivers/platform/surface/surface3_power.c
+@@ -233,14 +233,21 @@ static int mshw0011_bix(struct mshw0011_data *cdata, struct bix *bix)
+ }
+ bix->last_full_charg_capacity = ret;
+
+- /* get serial number */
++ /*
++ * Get serial number, on some devices (with unofficial replacement
++ * battery?) reading any of the serial number range addresses gets
++ * nacked in this case just leave the serial number empty.
++ */
+ ret = i2c_smbus_read_i2c_block_data(client, MSHW0011_BAT0_REG_SERIAL_NO,
+ sizeof(buf), buf);
+- if (ret != sizeof(buf)) {
++ if (ret == -EREMOTEIO) {
++ /* no serial number available */
++ } else if (ret != sizeof(buf)) {
+ dev_err(&client->dev, "Error reading serial no: %d\n", ret);
+ return ret;
++ } else {
++ snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf);
+ }
+- snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf);
+
+ /* get cycle count */
+ ret = i2c_smbus_read_word_data(client, MSHW0011_BAT0_REG_CYCLE_CNT);
+diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
+index 4428c4330229a..5c0451c56ea83 100644
+--- a/drivers/platform/surface/surface_aggregator_registry.c
++++ b/drivers/platform/surface/surface_aggregator_registry.c
+@@ -77,6 +77,42 @@ static const struct software_node ssam_node_bas_dtx = {
+ .parent = &ssam_node_root,
+ };
+
++/* HID keyboard (TID1). */
++static const struct software_node ssam_node_hid_tid1_keyboard = {
++ .name = "ssam:01:15:01:01:00",
++ .parent = &ssam_node_root,
++};
++
++/* HID pen stash (TID1; pen taken / stashed away evens). */
++static const struct software_node ssam_node_hid_tid1_penstash = {
++ .name = "ssam:01:15:01:02:00",
++ .parent = &ssam_node_root,
++};
++
++/* HID touchpad (TID1). */
++static const struct software_node ssam_node_hid_tid1_touchpad = {
++ .name = "ssam:01:15:01:03:00",
++ .parent = &ssam_node_root,
++};
++
++/* HID device instance 6 (TID1, unknown HID device). */
++static const struct software_node ssam_node_hid_tid1_iid6 = {
++ .name = "ssam:01:15:01:06:00",
++ .parent = &ssam_node_root,
++};
++
++/* HID device instance 7 (TID1, unknown HID device). */
++static const struct software_node ssam_node_hid_tid1_iid7 = {
++ .name = "ssam:01:15:01:07:00",
++ .parent = &ssam_node_root,
++};
++
++/* HID system controls (TID1). */
++static const struct software_node ssam_node_hid_tid1_sysctrl = {
++ .name = "ssam:01:15:01:08:00",
++ .parent = &ssam_node_root,
++};
++
+ /* HID keyboard. */
+ static const struct software_node ssam_node_hid_main_keyboard = {
+ .name = "ssam:01:15:02:01:00",
+@@ -159,6 +195,21 @@ static const struct software_node *ssam_node_group_sl3[] = {
+ NULL,
+ };
+
++/* Devices for Surface Laptop Studio. */
++static const struct software_node *ssam_node_group_sls[] = {
++ &ssam_node_root,
++ &ssam_node_bat_ac,
++ &ssam_node_bat_main,
++ &ssam_node_tmp_pprof,
++ &ssam_node_hid_tid1_keyboard,
++ &ssam_node_hid_tid1_penstash,
++ &ssam_node_hid_tid1_touchpad,
++ &ssam_node_hid_tid1_iid6,
++ &ssam_node_hid_tid1_iid7,
++ &ssam_node_hid_tid1_sysctrl,
++ NULL,
++};
++
+ /* Devices for Surface Laptop Go. */
+ static const struct software_node *ssam_node_group_slg1[] = {
+ &ssam_node_root,
+@@ -507,6 +558,12 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
+ /* Surface Laptop Go 1 */
+ { "MSHW0118", (unsigned long)ssam_node_group_slg1 },
+
++ /* Surface Laptop Go 2 */
++ { "MSHW0290", (unsigned long)ssam_node_group_slg1 },
++
++ /* Surface Laptop Studio */
++ { "MSHW0123", (unsigned long)ssam_node_group_sls },
++
+ { },
+ };
+ MODULE_DEVICE_TABLE(acpi, ssam_platform_hub_match);
+diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
+index e21ea3d23e6f2..50abcf0c483c3 100644
+--- a/drivers/platform/x86/Kconfig
++++ b/drivers/platform/x86/Kconfig
+@@ -171,6 +171,7 @@ config ACER_WMI
+ config AMD_PMC
+ tristate "AMD SoC PMC driver"
+ depends on ACPI && PCI
++ select SERIO
+ help
+ The driver provides support for AMD Power Management Controller
+ primarily responsible for S2Idle transactions that are driven from
+@@ -388,24 +389,7 @@ config GPD_POCKET_FAN
+ of the CPU temperature. Say Y or M if the kernel may be used on a
+ GPD pocket.
+
+-config HP_ACCEL
+- tristate "HP laptop accelerometer"
+- depends on INPUT && ACPI
+- depends on SERIO_I8042
+- select SENSORS_LIS3LV02D
+- select NEW_LEDS
+- select LEDS_CLASS
+- help
+- This driver provides support for the "Mobile Data Protection System 3D"
+- or "3D DriveGuard" feature of HP laptops. On such systems the driver
+- should load automatically (via ACPI alias).
+-
+- Support for a led indicating disk protection will be provided as
+- hp::hddprotect. For more information on the feature, refer to
+- Documentation/misc-devices/lis3lv02d.rst.
+-
+- To compile this driver as a module, choose M here: the module will
+- be called hp_accel.
++source "drivers/platform/x86/hp/Kconfig"
+
+ config WIRELESS_HOTKEY
+ tristate "Wireless hotkey button"
+@@ -419,29 +403,6 @@ config WIRELESS_HOTKEY
+ To compile this driver as a module, choose M here: the module will
+ be called wireless-hotkey.
+
+-config HP_WMI
+- tristate "HP WMI extras"
+- depends on ACPI_WMI
+- depends on INPUT
+- depends on RFKILL || RFKILL = n
+- select INPUT_SPARSEKMAP
+- select ACPI_PLATFORM_PROFILE
+- help
+- Say Y here if you want to support WMI-based hotkeys on HP laptops and
+- to read data from WMI such as docking or ambient light sensor state.
+-
+- To compile this driver as a module, choose M here: the module will
+- be called hp-wmi.
+-
+-config TC1100_WMI
+- tristate "HP Compaq TC1100 Tablet WMI Extras"
+- depends on !X86_64
+- depends on ACPI
+- depends on ACPI_WMI
+- help
+- This is a driver for the WMI extensions (wireless and bluetooth power
+- control) of the HP Compaq TC1100 tablet.
+-
+ config IBM_RTL
+ tristate "Device driver to enable PRTL support"
+ depends on PCI
+@@ -871,6 +832,8 @@ config PANASONIC_LAPTOP
+ tristate "Panasonic Laptop Extras"
+ depends on INPUT && ACPI
+ depends on BACKLIGHT_CLASS_DEVICE
++ depends on ACPI_VIDEO=n || ACPI_VIDEO
++ depends on SERIO_I8042 || SERIO_I8042 = n
+ select INPUT_SPARSEKMAP
+ help
+ This driver adds support for access to backlight control and hotkeys
+@@ -940,7 +903,8 @@ config I2C_MULTI_INSTANTIATE
+
+ config MLX_PLATFORM
+ tristate "Mellanox Technologies platform support"
+- depends on I2C && REGMAP
++ depends on I2C
++ select REGMAP
+ help
+ This option enables system support for the Mellanox Technologies
+ platform. The Mellanox systems provide data center networking
+diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
+index 69690e26bb6d4..5dba9fe23fb15 100644
+--- a/drivers/platform/x86/Makefile
++++ b/drivers/platform/x86/Makefile
+@@ -52,9 +52,7 @@ obj-$(CONFIG_FUJITSU_TABLET) += fujitsu-tablet.o
+ obj-$(CONFIG_GPD_POCKET_FAN) += gpd-pocket-fan.o
+
+ # Hewlett Packard
+-obj-$(CONFIG_HP_ACCEL) += hp_accel.o
+-obj-$(CONFIG_HP_WMI) += hp-wmi.o
+-obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
++obj-$(CONFIG_X86_PLATFORM_DRIVERS_HP) += hp/
+
+ # Hewlett Packard Enterprise
+ obj-$(CONFIG_UV_SYSFS) += uv_sysfs.o
+@@ -67,7 +65,7 @@ obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o
+ obj-$(CONFIG_THINKPAD_LMI) += think-lmi.o
+
+ # Intel
+-obj-$(CONFIG_X86_PLATFORM_DRIVERS_INTEL) += intel/
++obj-y += intel/
+
+ # MSI
+ obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o
+diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
+index 694b45ed06a21..82516796a53b0 100644
+--- a/drivers/platform/x86/acer-wmi.c
++++ b/drivers/platform/x86/acer-wmi.c
+@@ -99,6 +99,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = {
+ {KE_KEY, 0x22, {KEY_PROG2} }, /* Arcade */
+ {KE_KEY, 0x23, {KEY_PROG3} }, /* P_Key */
+ {KE_KEY, 0x24, {KEY_PROG4} }, /* Social networking_Key */
++ {KE_KEY, 0x27, {KEY_HELP} },
+ {KE_KEY, 0x29, {KEY_PROG3} }, /* P_Key for TM8372 */
+ {KE_IGNORE, 0x41, {KEY_MUTE} },
+ {KE_IGNORE, 0x42, {KEY_PREVIOUSSONG} },
+@@ -112,7 +113,13 @@ static const struct key_entry acer_wmi_keymap[] __initconst = {
+ {KE_IGNORE, 0x48, {KEY_VOLUMEUP} },
+ {KE_IGNORE, 0x49, {KEY_VOLUMEDOWN} },
+ {KE_IGNORE, 0x4a, {KEY_VOLUMEDOWN} },
+- {KE_IGNORE, 0x61, {KEY_SWITCHVIDEOMODE} },
++ /*
++ * 0x61 is KEY_SWITCHVIDEOMODE. Usually this is a duplicate input event
++ * with the "Video Bus" input device events. But sometimes it is not
++ * a dup. Map it to KEY_UNKNOWN instead of using KE_IGNORE so that
++ * udev/hwdb can override it on systems where it is not a dup.
++ */
++ {KE_KEY, 0x61, {KEY_UNKNOWN} },
+ {KE_IGNORE, 0x62, {KEY_BRIGHTNESSUP} },
+ {KE_IGNORE, 0x63, {KEY_BRIGHTNESSDOWN} },
+ {KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} }, /* Display Switch */
+@@ -557,6 +564,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = {
+ },
+ .driver_data = (void *)ACER_CAP_KBD_DOCK,
+ },
++ {
++ .callback = set_force_caps,
++ .ident = "Acer Aspire Switch V 10 SW5-017",
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"),
++ },
++ .driver_data = (void *)ACER_CAP_KBD_DOCK,
++ },
+ {
+ .callback = set_force_caps,
+ .ident = "Acer One 10 (S1003)",
+diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
+index fc95620101e85..83fea28bbb4f7 100644
+--- a/drivers/platform/x86/amd-pmc.c
++++ b/drivers/platform/x86/amd-pmc.c
+@@ -20,6 +20,7 @@
+ #include <linux/module.h>
+ #include <linux/pci.h>
+ #include <linux/platform_device.h>
++#include <linux/serio.h>
+ #include <linux/suspend.h>
+ #include <linux/seq_file.h>
+ #include <linux/uaccess.h>
+@@ -29,6 +30,10 @@
+ #define AMD_PMC_REGISTER_RESPONSE 0x980
+ #define AMD_PMC_REGISTER_ARGUMENT 0x9BC
+
++/* PMC Scratch Registers */
++#define AMD_PMC_SCRATCH_REG_CZN 0x94
++#define AMD_PMC_SCRATCH_REG_YC 0xD14
++
+ /* Base address of SMU for mapping physical address to virtual address */
+ #define AMD_PMC_SMU_INDEX_ADDRESS 0xB8
+ #define AMD_PMC_SMU_INDEX_DATA 0xBC
+@@ -70,7 +75,7 @@
+ #define AMD_CPU_ID_CZN AMD_CPU_ID_RN
+ #define AMD_CPU_ID_YC 0x14B5
+
+-#define PMC_MSG_DELAY_MIN_US 100
++#define PMC_MSG_DELAY_MIN_US 50
+ #define RESPONSE_REGISTER_LOOP_MAX 20000
+
+ #define SOC_SUBSYSTEM_IP_MAX 12
+@@ -110,6 +115,11 @@ struct amd_pmc_dev {
+ u32 base_addr;
+ u32 cpu_id;
+ u32 active_ips;
++/* SMU version information */
++ u8 smu_program;
++ u8 major;
++ u8 minor;
++ u8 rev;
+ struct device *dev;
+ struct mutex lock; /* generic mutex lock */
+ #if IS_ENABLED(CONFIG_DEBUG_FS)
+@@ -147,6 +157,51 @@ struct smu_metrics {
+ u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX];
+ } __packed;
+
++static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev)
++{
++ int rc;
++ u32 val;
++
++ rc = amd_pmc_send_cmd(dev, 0, &val, SMU_MSG_GETSMUVERSION, 1);
++ if (rc)
++ return rc;
++
++ dev->smu_program = (val >> 24) & GENMASK(7, 0);
++ dev->major = (val >> 16) & GENMASK(7, 0);
++ dev->minor = (val >> 8) & GENMASK(7, 0);
++ dev->rev = (val >> 0) & GENMASK(7, 0);
++
++ dev_dbg(dev->dev, "SMU program %u version is %u.%u.%u\n",
++ dev->smu_program, dev->major, dev->minor, dev->rev);
++
++ return 0;
++}
++
++static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
++ struct seq_file *s)
++{
++ u32 val;
++
++ switch (pdev->cpu_id) {
++ case AMD_CPU_ID_CZN:
++ val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_CZN);
++ break;
++ case AMD_CPU_ID_YC:
++ val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_YC);
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ if (dev)
++ dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val);
++
++ if (s)
++ seq_printf(s, "SMU idlemask : 0x%x\n", val);
++
++ return 0;
++}
++
+ #ifdef CONFIG_DEBUG_FS
+ static int smu_fw_info_show(struct seq_file *s, void *unused)
+ {
+@@ -201,6 +256,23 @@ static int s0ix_stats_show(struct seq_file *s, void *unused)
+ }
+ DEFINE_SHOW_ATTRIBUTE(s0ix_stats);
+
++static int amd_pmc_idlemask_show(struct seq_file *s, void *unused)
++{
++ struct amd_pmc_dev *dev = s->private;
++ int rc;
++
++ if (dev->major > 56 || (dev->major >= 55 && dev->minor >= 37)) {
++ rc = amd_pmc_idlemask_read(dev, NULL, s);
++ if (rc)
++ return rc;
++ } else {
++ seq_puts(s, "Unsupported SMU version for Idlemask\n");
++ }
++
++ return 0;
++}
++DEFINE_SHOW_ATTRIBUTE(amd_pmc_idlemask);
++
+ static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
+ {
+ debugfs_remove_recursive(dev->dbgfs_dir);
+@@ -213,6 +285,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
+ &smu_fw_info_fops);
+ debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev,
+ &s0ix_stats_fops);
++ debugfs_create_file("amd_pmc_idlemask", 0644, dev->dbgfs_dir, dev,
++ &amd_pmc_idlemask_fops);
+ }
+ #else
+ static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
+@@ -339,16 +413,54 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
+ return -EINVAL;
+ }
+
++static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev)
++{
++ struct device *d;
++ int rc;
++
++ if (!pdev->major) {
++ rc = amd_pmc_get_smu_version(pdev);
++ if (rc)
++ return rc;
++ }
++
++ if (pdev->major > 64 || (pdev->major == 64 && pdev->minor > 65))
++ return 0;
++
++ d = bus_find_device_by_name(&serio_bus, NULL, "serio0");
++ if (!d)
++ return 0;
++ if (device_may_wakeup(d)) {
++ dev_info_once(d, "Disabling IRQ1 wakeup source to avoid platform firmware bug\n");
++ disable_irq_wake(1);
++ device_set_wakeup_enable(d, false);
++ }
++ put_device(d);
++
++ return 0;
++}
++
+ static int __maybe_unused amd_pmc_suspend(struct device *dev)
+ {
+ struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
+ int rc;
+ u8 msg;
+
++ if (pdev->cpu_id == AMD_CPU_ID_CZN) {
++ int rc = amd_pmc_czn_wa_irq1(pdev);
++
++ if (rc) {
++ dev_err(pdev->dev, "failed to adjust keyboard wakeup: %d\n", rc);
++ return rc;
++ }
++ }
++
+ /* Reset and Start SMU logging - to monitor the s0i3 stats */
+ amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0);
+ amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
+
++ /* Dump the IdleMask before we send hint to SMU */
++ amd_pmc_idlemask_read(pdev, dev, NULL);
+ msg = amd_pmc_get_os_hint(pdev);
+ rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0);
+ if (rc)
+@@ -371,11 +483,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev)
+ if (rc)
+ dev_err(pdev->dev, "resume failed\n");
+
++ /* Dump the IdleMask to see the blockers */
++ amd_pmc_idlemask_read(pdev, dev, NULL);
++
+ return 0;
+ }
+
+ static const struct dev_pm_ops amd_pmc_pm_ops = {
+- SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(amd_pmc_suspend, amd_pmc_resume)
++ .suspend_noirq = amd_pmc_suspend,
++ .resume_noirq = amd_pmc_resume,
+ };
+
+ static const struct pci_device_id pmc_pci_ids[] = {
+@@ -457,6 +573,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
+ if (err)
+ dev_err(dev->dev, "SMU debugging info not supported on this platform\n");
+
++ amd_pmc_get_smu_version(dev);
+ platform_set_drvdata(pdev, dev);
+ amd_pmc_dbgfs_register(dev);
+ return 0;
+diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
+index 9aae45a452002..57553f9b4d1dc 100644
+--- a/drivers/platform/x86/apple-gmux.c
++++ b/drivers/platform/x86/apple-gmux.c
+@@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
+ }
+
+ gmux_data->iostart = res->start;
+- gmux_data->iolen = res->end - res->start;
++ gmux_data->iolen = resource_size(res);
+
+ if (gmux_data->iolen < GMUX_MIN_IO_LEN) {
+ pr_err("gmux I/O region too small (%lu < %u)\n",
+diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
+index a81dc4b191b77..2c43801a18a28 100644
+--- a/drivers/platform/x86/asus-nb-wmi.c
++++ b/drivers/platform/x86/asus-nb-wmi.c
+@@ -521,6 +521,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
+ { KE_KEY, 0x30, { KEY_VOLUMEUP } },
+ { KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
+ { KE_KEY, 0x32, { KEY_MUTE } },
++ { KE_KEY, 0x33, { KEY_SCREENLOCK } },
+ { KE_KEY, 0x35, { KEY_SCREENLOCK } },
+ { KE_KEY, 0x40, { KEY_PREVIOUSSONG } },
+ { KE_KEY, 0x41, { KEY_NEXTSONG } },
+@@ -549,6 +550,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
+ { KE_KEY, 0x71, { KEY_F13 } }, /* General-purpose button */
+ { KE_IGNORE, 0x79, }, /* Charger type dectection notification */
+ { KE_KEY, 0x7a, { KEY_ALS_TOGGLE } }, /* Ambient Light Sensor Toggle */
++ { KE_IGNORE, 0x7B, }, /* Charger connect/disconnect notification */
+ { KE_KEY, 0x7c, { KEY_MICMUTE } },
+ { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
+ { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */
+@@ -574,6 +576,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
+ { KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */
+ { KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */
+ { KE_KEY, 0xB5, { KEY_CALC } },
++ { KE_IGNORE, 0xC0, }, /* External display connect/disconnect notification */
+ { KE_KEY, 0xC4, { KEY_KBDILLUMUP } },
+ { KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } },
+ { KE_IGNORE, 0xC6, }, /* Ambient Light Sensor notification */
+diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
+index e14fb5fa73240..f030ea97f1266 100644
+--- a/drivers/platform/x86/asus-wmi.c
++++ b/drivers/platform/x86/asus-wmi.c
+@@ -1511,6 +1511,8 @@ static void asus_wmi_set_xusb2pr(struct asus_wmi *asus)
+ pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR,
+ cpu_to_le32(ports_available));
+
++ pci_dev_put(xhci_pdev);
++
+ pr_info("set USB_INTEL_XUSB2PR old: 0x%04x, new: 0x%04x\n",
+ orig_ports_available, ports_available);
+ }
+diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig
+index 2fffa57e596e4..fe224a54f24c0 100644
+--- a/drivers/platform/x86/dell/Kconfig
++++ b/drivers/platform/x86/dell/Kconfig
+@@ -187,7 +187,7 @@ config DELL_WMI_AIO
+
+ config DELL_WMI_DESCRIPTOR
+ tristate
+- default m
++ default n
+ depends on ACPI_WMI
+
+ config DELL_WMI_LED
+diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c
+index 089c125e18f70..b83d6fa6e39b3 100644
+--- a/drivers/platform/x86/dell/dell-wmi-base.c
++++ b/drivers/platform/x86/dell/dell-wmi-base.c
+@@ -260,6 +260,9 @@ static const struct key_entry dell_wmi_keymap_type_0010[] = {
+ { KE_KEY, 0x57, { KEY_BRIGHTNESSDOWN } },
+ { KE_KEY, 0x58, { KEY_BRIGHTNESSUP } },
+
++ /*Speaker Mute*/
++ { KE_KEY, 0x109, { KEY_MUTE} },
++
+ /* Mic mute */
+ { KE_KEY, 0x150, { KEY_MICMUTE } },
+
+diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c
+index 074b7e68c227c..7b79e987ca088 100644
+--- a/drivers/platform/x86/dell/dell-wmi-privacy.c
++++ b/drivers/platform/x86/dell/dell-wmi-privacy.c
+@@ -61,7 +61,7 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = {
+ /* privacy mic mute */
+ { KE_KEY, 0x0001, { KEY_MICMUTE } },
+ /* privacy camera mute */
+- { KE_SW, 0x0002, { SW_CAMERA_LENS_COVER } },
++ { KE_VSW, 0x0002, { SW_CAMERA_LENS_COVER } },
+ { KE_END, 0},
+ };
+
+@@ -115,11 +115,15 @@ bool dell_privacy_process_event(int type, int code, int status)
+
+ switch (code) {
+ case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */
+- case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */
+ priv->last_status = status;
+ sparse_keymap_report_entry(priv->input_dev, key, 1, true);
+ ret = true;
+ break;
++ case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */
++ priv->last_status = status;
++ sparse_keymap_report_entry(priv->input_dev, key, !(status & CAMERA_STATUS), false);
++ ret = true;
++ break;
+ default:
+ dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code);
+ }
+@@ -295,7 +299,7 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
+ {
+ struct privacy_wmi_data *priv;
+ struct key_entry *keymap;
+- int ret, i;
++ int ret, i, j;
+
+ ret = wmi_has_guid(DELL_PRIVACY_GUID);
+ if (!ret)
+@@ -307,6 +311,11 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
+
+ dev_set_drvdata(&wdev->dev, priv);
+ priv->wdev = wdev;
++
++ ret = get_current_status(priv->wdev);
++ if (ret)
++ return ret;
++
+ /* create evdev passing interface */
+ priv->input_dev = devm_input_allocate_device(&wdev->dev);
+ if (!priv->input_dev)
+@@ -321,9 +330,20 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
+ /* remap the keymap code with Dell privacy key type 0x12 as prefix
+ * KEY_MICMUTE scancode will be reported as 0x120001
+ */
+- for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) {
+- keymap[i] = dell_wmi_keymap_type_0012[i];
+- keymap[i].code |= (0x0012 << 16);
++ for (i = 0, j = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) {
++ /*
++ * Unlike keys where only presses matter, userspace may act
++ * on switches in both of their positions. Only register
++ * SW_CAMERA_LENS_COVER if it is actually there.
++ */
++ if (dell_wmi_keymap_type_0012[i].type == KE_VSW &&
++ dell_wmi_keymap_type_0012[i].sw.code == SW_CAMERA_LENS_COVER &&
++ !(priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA)))
++ continue;
++
++ keymap[j] = dell_wmi_keymap_type_0012[i];
++ keymap[j].code |= (0x0012 << 16);
++ j++;
+ }
+ ret = sparse_keymap_setup(priv->input_dev, keymap, NULL);
+ kfree(keymap);
+@@ -334,11 +354,12 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
+ priv->input_dev->name = "Dell Privacy Driver";
+ priv->input_dev->id.bustype = BUS_HOST;
+
+- ret = input_register_device(priv->input_dev);
+- if (ret)
+- return ret;
++ /* Report initial camera-cover status */
++ if (priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA))
++ input_report_switch(priv->input_dev, SW_CAMERA_LENS_COVER,
++ !(priv->last_status & CAMERA_STATUS));
+
+- ret = get_current_status(priv->wdev);
++ ret = input_register_device(priv->input_dev);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+index 636bdfa83284d..907fde53e95c4 100644
+--- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
++++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+@@ -396,6 +396,7 @@ static int init_bios_attributes(int attr_type, const char *guid)
+ struct kobject *attr_name_kobj; //individual attribute names
+ union acpi_object *obj = NULL;
+ union acpi_object *elements;
++ struct kobject *duplicate;
+ struct kset *tmp_set;
+ int min_elements;
+
+@@ -454,9 +455,11 @@ static int init_bios_attributes(int attr_type, const char *guid)
+ else
+ tmp_set = wmi_priv.main_dir_kset;
+
+- if (kset_find_obj(tmp_set, elements[ATTR_NAME].string.pointer)) {
+- pr_debug("duplicate attribute name found - %s\n",
+- elements[ATTR_NAME].string.pointer);
++ duplicate = kset_find_obj(tmp_set, elements[ATTR_NAME].string.pointer);
++ if (duplicate) {
++ pr_debug("Duplicate attribute name found - %s\n",
++ elements[ATTR_NAME].string.pointer);
++ kobject_put(duplicate);
+ goto nextobj;
+ }
+
+diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
+index 658bab4b79648..bf1b98dd00b99 100644
+--- a/drivers/platform/x86/gigabyte-wmi.c
++++ b/drivers/platform/x86/gigabyte-wmi.c
+@@ -140,6 +140,9 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
+ }}
+
+ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
++ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("A320M-S2H V2-CF"),
++ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H-CF"),
++ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H WIFI-CF"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE AX V2"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
+@@ -153,6 +156,8 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
++ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570S AORUS ELITE"),
++ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"),
+ { }
+ };
+
+diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
+deleted file mode 100644
+index 027a1467d009f..0000000000000
+--- a/drivers/platform/x86/hp-wmi.c
++++ /dev/null
+@@ -1,1121 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * HP WMI hotkeys
+- *
+- * Copyright (C) 2008 Red Hat <mjg@redhat.com>
+- * Copyright (C) 2010, 2011 Anssi Hannula <anssi.hannula@iki.fi>
+- *
+- * Portions based on wistron_btns.c:
+- * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
+- * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
+- * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
+- */
+-
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/init.h>
+-#include <linux/slab.h>
+-#include <linux/types.h>
+-#include <linux/input.h>
+-#include <linux/input/sparse-keymap.h>
+-#include <linux/platform_device.h>
+-#include <linux/platform_profile.h>
+-#include <linux/acpi.h>
+-#include <linux/rfkill.h>
+-#include <linux/string.h>
+-
+-MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>");
+-MODULE_DESCRIPTION("HP laptop WMI hotkeys driver");
+-MODULE_LICENSE("GPL");
+-
+-MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C");
+-MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
+-
+-static int enable_tablet_mode_sw = -1;
+-module_param(enable_tablet_mode_sw, int, 0444);
+-MODULE_PARM_DESC(enable_tablet_mode_sw, "Enable SW_TABLET_MODE reporting (-1=auto, 0=no, 1=yes)");
+-
+-#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
+-#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
+-
+-enum hp_wmi_radio {
+- HPWMI_WIFI = 0x0,
+- HPWMI_BLUETOOTH = 0x1,
+- HPWMI_WWAN = 0x2,
+- HPWMI_GPS = 0x3,
+-};
+-
+-enum hp_wmi_event_ids {
+- HPWMI_DOCK_EVENT = 0x01,
+- HPWMI_PARK_HDD = 0x02,
+- HPWMI_SMART_ADAPTER = 0x03,
+- HPWMI_BEZEL_BUTTON = 0x04,
+- HPWMI_WIRELESS = 0x05,
+- HPWMI_CPU_BATTERY_THROTTLE = 0x06,
+- HPWMI_LOCK_SWITCH = 0x07,
+- HPWMI_LID_SWITCH = 0x08,
+- HPWMI_SCREEN_ROTATION = 0x09,
+- HPWMI_COOLSENSE_SYSTEM_MOBILE = 0x0A,
+- HPWMI_COOLSENSE_SYSTEM_HOT = 0x0B,
+- HPWMI_PROXIMITY_SENSOR = 0x0C,
+- HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D,
+- HPWMI_PEAKSHIFT_PERIOD = 0x0F,
+- HPWMI_BATTERY_CHARGE_PERIOD = 0x10,
+-};
+-
+-struct bios_args {
+- u32 signature;
+- u32 command;
+- u32 commandtype;
+- u32 datasize;
+- u8 data[128];
+-};
+-
+-enum hp_wmi_commandtype {
+- HPWMI_DISPLAY_QUERY = 0x01,
+- HPWMI_HDDTEMP_QUERY = 0x02,
+- HPWMI_ALS_QUERY = 0x03,
+- HPWMI_HARDWARE_QUERY = 0x04,
+- HPWMI_WIRELESS_QUERY = 0x05,
+- HPWMI_BATTERY_QUERY = 0x07,
+- HPWMI_BIOS_QUERY = 0x09,
+- HPWMI_FEATURE_QUERY = 0x0b,
+- HPWMI_HOTKEY_QUERY = 0x0c,
+- HPWMI_FEATURE2_QUERY = 0x0d,
+- HPWMI_WIRELESS2_QUERY = 0x1b,
+- HPWMI_POSTCODEERROR_QUERY = 0x2a,
+- HPWMI_THERMAL_PROFILE_QUERY = 0x4c,
+-};
+-
+-enum hp_wmi_command {
+- HPWMI_READ = 0x01,
+- HPWMI_WRITE = 0x02,
+- HPWMI_ODM = 0x03,
+-};
+-
+-enum hp_wmi_hardware_mask {
+- HPWMI_DOCK_MASK = 0x01,
+- HPWMI_TABLET_MASK = 0x04,
+-};
+-
+-struct bios_return {
+- u32 sigpass;
+- u32 return_code;
+-};
+-
+-enum hp_return_value {
+- HPWMI_RET_WRONG_SIGNATURE = 0x02,
+- HPWMI_RET_UNKNOWN_COMMAND = 0x03,
+- HPWMI_RET_UNKNOWN_CMDTYPE = 0x04,
+- HPWMI_RET_INVALID_PARAMETERS = 0x05,
+-};
+-
+-enum hp_wireless2_bits {
+- HPWMI_POWER_STATE = 0x01,
+- HPWMI_POWER_SOFT = 0x02,
+- HPWMI_POWER_BIOS = 0x04,
+- HPWMI_POWER_HARD = 0x08,
+- HPWMI_POWER_FW_OR_HW = HPWMI_POWER_BIOS | HPWMI_POWER_HARD,
+-};
+-
+-enum hp_thermal_profile {
+- HP_THERMAL_PROFILE_PERFORMANCE = 0x00,
+- HP_THERMAL_PROFILE_DEFAULT = 0x01,
+- HP_THERMAL_PROFILE_COOL = 0x02
+-};
+-
+-#define IS_HWBLOCKED(x) ((x & HPWMI_POWER_FW_OR_HW) != HPWMI_POWER_FW_OR_HW)
+-#define IS_SWBLOCKED(x) !(x & HPWMI_POWER_SOFT)
+-
+-struct bios_rfkill2_device_state {
+- u8 radio_type;
+- u8 bus_type;
+- u16 vendor_id;
+- u16 product_id;
+- u16 subsys_vendor_id;
+- u16 subsys_product_id;
+- u8 rfkill_id;
+- u8 power;
+- u8 unknown[4];
+-};
+-
+-/* 7 devices fit into the 128 byte buffer */
+-#define HPWMI_MAX_RFKILL2_DEVICES 7
+-
+-struct bios_rfkill2_state {
+- u8 unknown[7];
+- u8 count;
+- u8 pad[8];
+- struct bios_rfkill2_device_state device[HPWMI_MAX_RFKILL2_DEVICES];
+-};
+-
+-static const struct key_entry hp_wmi_keymap[] = {
+- { KE_KEY, 0x02, { KEY_BRIGHTNESSUP } },
+- { KE_KEY, 0x03, { KEY_BRIGHTNESSDOWN } },
+- { KE_KEY, 0x20e6, { KEY_PROG1 } },
+- { KE_KEY, 0x20e8, { KEY_MEDIA } },
+- { KE_KEY, 0x2142, { KEY_MEDIA } },
+- { KE_KEY, 0x213b, { KEY_INFO } },
+- { KE_KEY, 0x2169, { KEY_ROTATE_DISPLAY } },
+- { KE_KEY, 0x216a, { KEY_SETUP } },
+- { KE_KEY, 0x231b, { KEY_HELP } },
+- { KE_END, 0 }
+-};
+-
+-static struct input_dev *hp_wmi_input_dev;
+-static struct platform_device *hp_wmi_platform_dev;
+-static struct platform_profile_handler platform_profile_handler;
+-static bool platform_profile_support;
+-
+-static struct rfkill *wifi_rfkill;
+-static struct rfkill *bluetooth_rfkill;
+-static struct rfkill *wwan_rfkill;
+-
+-struct rfkill2_device {
+- u8 id;
+- int num;
+- struct rfkill *rfkill;
+-};
+-
+-static int rfkill2_count;
+-static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES];
+-
+-/* map output size to the corresponding WMI method id */
+-static inline int encode_outsize_for_pvsz(int outsize)
+-{
+- if (outsize > 4096)
+- return -EINVAL;
+- if (outsize > 1024)
+- return 5;
+- if (outsize > 128)
+- return 4;
+- if (outsize > 4)
+- return 3;
+- if (outsize > 0)
+- return 2;
+- return 1;
+-}
+-
+-/*
+- * hp_wmi_perform_query
+- *
+- * query: The commandtype (enum hp_wmi_commandtype)
+- * write: The command (enum hp_wmi_command)
+- * buffer: Buffer used as input and/or output
+- * insize: Size of input buffer
+- * outsize: Size of output buffer
+- *
+- * returns zero on success
+- * an HP WMI query specific error code (which is positive)
+- * -EINVAL if the query was not successful at all
+- * -EINVAL if the output buffer size exceeds buffersize
+- *
+- * Note: The buffersize must at least be the maximum of the input and output
+- * size. E.g. Battery info query is defined to have 1 byte input
+- * and 128 byte output. The caller would do:
+- * buffer = kzalloc(128, GFP_KERNEL);
+- * ret = hp_wmi_perform_query(HPWMI_BATTERY_QUERY, HPWMI_READ, buffer, 1, 128)
+- */
+-static int hp_wmi_perform_query(int query, enum hp_wmi_command command,
+- void *buffer, int insize, int outsize)
+-{
+- int mid;
+- struct bios_return *bios_return;
+- int actual_outsize;
+- union acpi_object *obj;
+- struct bios_args args = {
+- .signature = 0x55434553,
+- .command = command,
+- .commandtype = query,
+- .datasize = insize,
+- .data = { 0 },
+- };
+- struct acpi_buffer input = { sizeof(struct bios_args), &args };
+- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+- int ret = 0;
+-
+- mid = encode_outsize_for_pvsz(outsize);
+- if (WARN_ON(mid < 0))
+- return mid;
+-
+- if (WARN_ON(insize > sizeof(args.data)))
+- return -EINVAL;
+- memcpy(&args.data[0], buffer, insize);
+-
+- wmi_evaluate_method(HPWMI_BIOS_GUID, 0, mid, &input, &output);
+-
+- obj = output.pointer;
+-
+- if (!obj)
+- return -EINVAL;
+-
+- if (obj->type != ACPI_TYPE_BUFFER) {
+- ret = -EINVAL;
+- goto out_free;
+- }
+-
+- bios_return = (struct bios_return *)obj->buffer.pointer;
+- ret = bios_return->return_code;
+-
+- if (ret) {
+- if (ret != HPWMI_RET_UNKNOWN_COMMAND &&
+- ret != HPWMI_RET_UNKNOWN_CMDTYPE)
+- pr_warn("query 0x%x returned error 0x%x\n", query, ret);
+- goto out_free;
+- }
+-
+- /* Ignore output data of zero size */
+- if (!outsize)
+- goto out_free;
+-
+- actual_outsize = min(outsize, (int)(obj->buffer.length - sizeof(*bios_return)));
+- memcpy(buffer, obj->buffer.pointer + sizeof(*bios_return), actual_outsize);
+- memset(buffer + actual_outsize, 0, outsize - actual_outsize);
+-
+-out_free:
+- kfree(obj);
+- return ret;
+-}
+-
+-static int hp_wmi_read_int(int query)
+-{
+- int val = 0, ret;
+-
+- ret = hp_wmi_perform_query(query, HPWMI_READ, &val,
+- sizeof(val), sizeof(val));
+-
+- if (ret)
+- return ret < 0 ? ret : -EINVAL;
+-
+- return val;
+-}
+-
+-static int hp_wmi_hw_state(int mask)
+-{
+- int state = hp_wmi_read_int(HPWMI_HARDWARE_QUERY);
+-
+- if (state < 0)
+- return state;
+-
+- return !!(state & mask);
+-}
+-
+-static int __init hp_wmi_bios_2008_later(void)
+-{
+- int state = 0;
+- int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, HPWMI_READ, &state,
+- sizeof(state), sizeof(state));
+- if (!ret)
+- return 1;
+-
+- return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO;
+-}
+-
+-static int __init hp_wmi_bios_2009_later(void)
+-{
+- u8 state[128];
+- int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state,
+- sizeof(state), sizeof(state));
+- if (!ret)
+- return 1;
+-
+- return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO;
+-}
+-
+-static int __init hp_wmi_enable_hotkeys(void)
+-{
+- int value = 0x6e;
+- int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, HPWMI_WRITE, &value,
+- sizeof(value), 0);
+-
+- return ret <= 0 ? ret : -EINVAL;
+-}
+-
+-static int hp_wmi_set_block(void *data, bool blocked)
+-{
+- enum hp_wmi_radio r = (enum hp_wmi_radio) data;
+- int query = BIT(r + 8) | ((!blocked) << r);
+- int ret;
+-
+- ret = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, HPWMI_WRITE,
+- &query, sizeof(query), 0);
+-
+- return ret <= 0 ? ret : -EINVAL;
+-}
+-
+-static const struct rfkill_ops hp_wmi_rfkill_ops = {
+- .set_block = hp_wmi_set_block,
+-};
+-
+-static bool hp_wmi_get_sw_state(enum hp_wmi_radio r)
+-{
+- int mask = 0x200 << (r * 8);
+-
+- int wireless = hp_wmi_read_int(HPWMI_WIRELESS_QUERY);
+-
+- /* TBD: Pass error */
+- WARN_ONCE(wireless < 0, "error executing HPWMI_WIRELESS_QUERY");
+-
+- return !(wireless & mask);
+-}
+-
+-static bool hp_wmi_get_hw_state(enum hp_wmi_radio r)
+-{
+- int mask = 0x800 << (r * 8);
+-
+- int wireless = hp_wmi_read_int(HPWMI_WIRELESS_QUERY);
+-
+- /* TBD: Pass error */
+- WARN_ONCE(wireless < 0, "error executing HPWMI_WIRELESS_QUERY");
+-
+- return !(wireless & mask);
+-}
+-
+-static int hp_wmi_rfkill2_set_block(void *data, bool blocked)
+-{
+- int rfkill_id = (int)(long)data;
+- char buffer[4] = { 0x01, 0x00, rfkill_id, !blocked };
+- int ret;
+-
+- ret = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_WRITE,
+- buffer, sizeof(buffer), 0);
+-
+- return ret <= 0 ? ret : -EINVAL;
+-}
+-
+-static const struct rfkill_ops hp_wmi_rfkill2_ops = {
+- .set_block = hp_wmi_rfkill2_set_block,
+-};
+-
+-static int hp_wmi_rfkill2_refresh(void)
+-{
+- struct bios_rfkill2_state state;
+- int err, i;
+-
+- err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
+- sizeof(state), sizeof(state));
+- if (err)
+- return err;
+-
+- for (i = 0; i < rfkill2_count; i++) {
+- int num = rfkill2[i].num;
+- struct bios_rfkill2_device_state *devstate;
+- devstate = &state.device[num];
+-
+- if (num >= state.count ||
+- devstate->rfkill_id != rfkill2[i].id) {
+- pr_warn("power configuration of the wireless devices unexpectedly changed\n");
+- continue;
+- }
+-
+- rfkill_set_states(rfkill2[i].rfkill,
+- IS_SWBLOCKED(devstate->power),
+- IS_HWBLOCKED(devstate->power));
+- }
+-
+- return 0;
+-}
+-
+-static ssize_t display_show(struct device *dev, struct device_attribute *attr,
+- char *buf)
+-{
+- int value = hp_wmi_read_int(HPWMI_DISPLAY_QUERY);
+- if (value < 0)
+- return value;
+- return sprintf(buf, "%d\n", value);
+-}
+-
+-static ssize_t hddtemp_show(struct device *dev, struct device_attribute *attr,
+- char *buf)
+-{
+- int value = hp_wmi_read_int(HPWMI_HDDTEMP_QUERY);
+- if (value < 0)
+- return value;
+- return sprintf(buf, "%d\n", value);
+-}
+-
+-static ssize_t als_show(struct device *dev, struct device_attribute *attr,
+- char *buf)
+-{
+- int value = hp_wmi_read_int(HPWMI_ALS_QUERY);
+- if (value < 0)
+- return value;
+- return sprintf(buf, "%d\n", value);
+-}
+-
+-static ssize_t dock_show(struct device *dev, struct device_attribute *attr,
+- char *buf)
+-{
+- int value = hp_wmi_hw_state(HPWMI_DOCK_MASK);
+- if (value < 0)
+- return value;
+- return sprintf(buf, "%d\n", value);
+-}
+-
+-static ssize_t tablet_show(struct device *dev, struct device_attribute *attr,
+- char *buf)
+-{
+- int value = hp_wmi_hw_state(HPWMI_TABLET_MASK);
+- if (value < 0)
+- return value;
+- return sprintf(buf, "%d\n", value);
+-}
+-
+-static ssize_t postcode_show(struct device *dev, struct device_attribute *attr,
+- char *buf)
+-{
+- /* Get the POST error code of previous boot failure. */
+- int value = hp_wmi_read_int(HPWMI_POSTCODEERROR_QUERY);
+- if (value < 0)
+- return value;
+- return sprintf(buf, "0x%x\n", value);
+-}
+-
+-static ssize_t als_store(struct device *dev, struct device_attribute *attr,
+- const char *buf, size_t count)
+-{
+- u32 tmp;
+- int ret;
+-
+- ret = kstrtou32(buf, 10, &tmp);
+- if (ret)
+- return ret;
+-
+- ret = hp_wmi_perform_query(HPWMI_ALS_QUERY, HPWMI_WRITE, &tmp,
+- sizeof(tmp), sizeof(tmp));
+- if (ret)
+- return ret < 0 ? ret : -EINVAL;
+-
+- return count;
+-}
+-
+-static ssize_t postcode_store(struct device *dev, struct device_attribute *attr,
+- const char *buf, size_t count)
+-{
+- u32 tmp = 1;
+- bool clear;
+- int ret;
+-
+- ret = kstrtobool(buf, &clear);
+- if (ret)
+- return ret;
+-
+- if (clear == false)
+- return -EINVAL;
+-
+- /* Clear the POST error code. It is kept until until cleared. */
+- ret = hp_wmi_perform_query(HPWMI_POSTCODEERROR_QUERY, HPWMI_WRITE, &tmp,
+- sizeof(tmp), sizeof(tmp));
+- if (ret)
+- return ret < 0 ? ret : -EINVAL;
+-
+- return count;
+-}
+-
+-static DEVICE_ATTR_RO(display);
+-static DEVICE_ATTR_RO(hddtemp);
+-static DEVICE_ATTR_RW(als);
+-static DEVICE_ATTR_RO(dock);
+-static DEVICE_ATTR_RO(tablet);
+-static DEVICE_ATTR_RW(postcode);
+-
+-static struct attribute *hp_wmi_attrs[] = {
+- &dev_attr_display.attr,
+- &dev_attr_hddtemp.attr,
+- &dev_attr_als.attr,
+- &dev_attr_dock.attr,
+- &dev_attr_tablet.attr,
+- &dev_attr_postcode.attr,
+- NULL,
+-};
+-ATTRIBUTE_GROUPS(hp_wmi);
+-
+-static void hp_wmi_notify(u32 value, void *context)
+-{
+- struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
+- u32 event_id, event_data;
+- union acpi_object *obj;
+- acpi_status status;
+- u32 *location;
+- int key_code;
+-
+- status = wmi_get_event_data(value, &response);
+- if (status != AE_OK) {
+- pr_info("bad event status 0x%x\n", status);
+- return;
+- }
+-
+- obj = (union acpi_object *)response.pointer;
+-
+- if (!obj)
+- return;
+- if (obj->type != ACPI_TYPE_BUFFER) {
+- pr_info("Unknown response received %d\n", obj->type);
+- kfree(obj);
+- return;
+- }
+-
+- /*
+- * Depending on ACPI version the concatenation of id and event data
+- * inside _WED function will result in a 8 or 16 byte buffer.
+- */
+- location = (u32 *)obj->buffer.pointer;
+- if (obj->buffer.length == 8) {
+- event_id = *location;
+- event_data = *(location + 1);
+- } else if (obj->buffer.length == 16) {
+- event_id = *location;
+- event_data = *(location + 2);
+- } else {
+- pr_info("Unknown buffer length %d\n", obj->buffer.length);
+- kfree(obj);
+- return;
+- }
+- kfree(obj);
+-
+- switch (event_id) {
+- case HPWMI_DOCK_EVENT:
+- if (test_bit(SW_DOCK, hp_wmi_input_dev->swbit))
+- input_report_switch(hp_wmi_input_dev, SW_DOCK,
+- hp_wmi_hw_state(HPWMI_DOCK_MASK));
+- if (test_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit))
+- input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
+- hp_wmi_hw_state(HPWMI_TABLET_MASK));
+- input_sync(hp_wmi_input_dev);
+- break;
+- case HPWMI_PARK_HDD:
+- break;
+- case HPWMI_SMART_ADAPTER:
+- break;
+- case HPWMI_BEZEL_BUTTON:
+- key_code = hp_wmi_read_int(HPWMI_HOTKEY_QUERY);
+- if (key_code < 0)
+- break;
+-
+- if (!sparse_keymap_report_event(hp_wmi_input_dev,
+- key_code, 1, true))
+- pr_info("Unknown key code - 0x%x\n", key_code);
+- break;
+- case HPWMI_WIRELESS:
+- if (rfkill2_count) {
+- hp_wmi_rfkill2_refresh();
+- break;
+- }
+-
+- if (wifi_rfkill)
+- rfkill_set_states(wifi_rfkill,
+- hp_wmi_get_sw_state(HPWMI_WIFI),
+- hp_wmi_get_hw_state(HPWMI_WIFI));
+- if (bluetooth_rfkill)
+- rfkill_set_states(bluetooth_rfkill,
+- hp_wmi_get_sw_state(HPWMI_BLUETOOTH),
+- hp_wmi_get_hw_state(HPWMI_BLUETOOTH));
+- if (wwan_rfkill)
+- rfkill_set_states(wwan_rfkill,
+- hp_wmi_get_sw_state(HPWMI_WWAN),
+- hp_wmi_get_hw_state(HPWMI_WWAN));
+- break;
+- case HPWMI_CPU_BATTERY_THROTTLE:
+- pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n");
+- break;
+- case HPWMI_LOCK_SWITCH:
+- break;
+- case HPWMI_LID_SWITCH:
+- break;
+- case HPWMI_SCREEN_ROTATION:
+- break;
+- case HPWMI_COOLSENSE_SYSTEM_MOBILE:
+- break;
+- case HPWMI_COOLSENSE_SYSTEM_HOT:
+- break;
+- case HPWMI_PROXIMITY_SENSOR:
+- break;
+- case HPWMI_BACKLIT_KB_BRIGHTNESS:
+- break;
+- case HPWMI_PEAKSHIFT_PERIOD:
+- break;
+- case HPWMI_BATTERY_CHARGE_PERIOD:
+- break;
+- default:
+- pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data);
+- break;
+- }
+-}
+-
+-static int __init hp_wmi_input_setup(void)
+-{
+- acpi_status status;
+- int err, val;
+-
+- hp_wmi_input_dev = input_allocate_device();
+- if (!hp_wmi_input_dev)
+- return -ENOMEM;
+-
+- hp_wmi_input_dev->name = "HP WMI hotkeys";
+- hp_wmi_input_dev->phys = "wmi/input0";
+- hp_wmi_input_dev->id.bustype = BUS_HOST;
+-
+- __set_bit(EV_SW, hp_wmi_input_dev->evbit);
+-
+- /* Dock */
+- val = hp_wmi_hw_state(HPWMI_DOCK_MASK);
+- if (!(val < 0)) {
+- __set_bit(SW_DOCK, hp_wmi_input_dev->swbit);
+- input_report_switch(hp_wmi_input_dev, SW_DOCK, val);
+- }
+-
+- /* Tablet mode */
+- if (enable_tablet_mode_sw > 0) {
+- val = hp_wmi_hw_state(HPWMI_TABLET_MASK);
+- if (val >= 0) {
+- __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit);
+- input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val);
+- }
+- }
+-
+- err = sparse_keymap_setup(hp_wmi_input_dev, hp_wmi_keymap, NULL);
+- if (err)
+- goto err_free_dev;
+-
+- /* Set initial hardware state */
+- input_sync(hp_wmi_input_dev);
+-
+- if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later())
+- hp_wmi_enable_hotkeys();
+-
+- status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL);
+- if (ACPI_FAILURE(status)) {
+- err = -EIO;
+- goto err_free_dev;
+- }
+-
+- err = input_register_device(hp_wmi_input_dev);
+- if (err)
+- goto err_uninstall_notifier;
+-
+- return 0;
+-
+- err_uninstall_notifier:
+- wmi_remove_notify_handler(HPWMI_EVENT_GUID);
+- err_free_dev:
+- input_free_device(hp_wmi_input_dev);
+- return err;
+-}
+-
+-static void hp_wmi_input_destroy(void)
+-{
+- wmi_remove_notify_handler(HPWMI_EVENT_GUID);
+- input_unregister_device(hp_wmi_input_dev);
+-}
+-
+-static int __init hp_wmi_rfkill_setup(struct platform_device *device)
+-{
+- int err, wireless;
+-
+- wireless = hp_wmi_read_int(HPWMI_WIRELESS_QUERY);
+- if (wireless < 0)
+- return wireless;
+-
+- err = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, HPWMI_WRITE, &wireless,
+- sizeof(wireless), 0);
+- if (err)
+- return err;
+-
+- if (wireless & 0x1) {
+- wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev,
+- RFKILL_TYPE_WLAN,
+- &hp_wmi_rfkill_ops,
+- (void *) HPWMI_WIFI);
+- if (!wifi_rfkill)
+- return -ENOMEM;
+- rfkill_init_sw_state(wifi_rfkill,
+- hp_wmi_get_sw_state(HPWMI_WIFI));
+- rfkill_set_hw_state(wifi_rfkill,
+- hp_wmi_get_hw_state(HPWMI_WIFI));
+- err = rfkill_register(wifi_rfkill);
+- if (err)
+- goto register_wifi_error;
+- }
+-
+- if (wireless & 0x2) {
+- bluetooth_rfkill = rfkill_alloc("hp-bluetooth", &device->dev,
+- RFKILL_TYPE_BLUETOOTH,
+- &hp_wmi_rfkill_ops,
+- (void *) HPWMI_BLUETOOTH);
+- if (!bluetooth_rfkill) {
+- err = -ENOMEM;
+- goto register_bluetooth_error;
+- }
+- rfkill_init_sw_state(bluetooth_rfkill,
+- hp_wmi_get_sw_state(HPWMI_BLUETOOTH));
+- rfkill_set_hw_state(bluetooth_rfkill,
+- hp_wmi_get_hw_state(HPWMI_BLUETOOTH));
+- err = rfkill_register(bluetooth_rfkill);
+- if (err)
+- goto register_bluetooth_error;
+- }
+-
+- if (wireless & 0x4) {
+- wwan_rfkill = rfkill_alloc("hp-wwan", &device->dev,
+- RFKILL_TYPE_WWAN,
+- &hp_wmi_rfkill_ops,
+- (void *) HPWMI_WWAN);
+- if (!wwan_rfkill) {
+- err = -ENOMEM;
+- goto register_wwan_error;
+- }
+- rfkill_init_sw_state(wwan_rfkill,
+- hp_wmi_get_sw_state(HPWMI_WWAN));
+- rfkill_set_hw_state(wwan_rfkill,
+- hp_wmi_get_hw_state(HPWMI_WWAN));
+- err = rfkill_register(wwan_rfkill);
+- if (err)
+- goto register_wwan_error;
+- }
+-
+- return 0;
+-
+-register_wwan_error:
+- rfkill_destroy(wwan_rfkill);
+- wwan_rfkill = NULL;
+- if (bluetooth_rfkill)
+- rfkill_unregister(bluetooth_rfkill);
+-register_bluetooth_error:
+- rfkill_destroy(bluetooth_rfkill);
+- bluetooth_rfkill = NULL;
+- if (wifi_rfkill)
+- rfkill_unregister(wifi_rfkill);
+-register_wifi_error:
+- rfkill_destroy(wifi_rfkill);
+- wifi_rfkill = NULL;
+- return err;
+-}
+-
+-static int __init hp_wmi_rfkill2_setup(struct platform_device *device)
+-{
+- struct bios_rfkill2_state state;
+- int err, i;
+-
+- err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
+- sizeof(state), sizeof(state));
+- if (err)
+- return err < 0 ? err : -EINVAL;
+-
+- if (state.count > HPWMI_MAX_RFKILL2_DEVICES) {
+- pr_warn("unable to parse 0x1b query output\n");
+- return -EINVAL;
+- }
+-
+- for (i = 0; i < state.count; i++) {
+- struct rfkill *rfkill;
+- enum rfkill_type type;
+- char *name;
+- switch (state.device[i].radio_type) {
+- case HPWMI_WIFI:
+- type = RFKILL_TYPE_WLAN;
+- name = "hp-wifi";
+- break;
+- case HPWMI_BLUETOOTH:
+- type = RFKILL_TYPE_BLUETOOTH;
+- name = "hp-bluetooth";
+- break;
+- case HPWMI_WWAN:
+- type = RFKILL_TYPE_WWAN;
+- name = "hp-wwan";
+- break;
+- case HPWMI_GPS:
+- type = RFKILL_TYPE_GPS;
+- name = "hp-gps";
+- break;
+- default:
+- pr_warn("unknown device type 0x%x\n",
+- state.device[i].radio_type);
+- continue;
+- }
+-
+- if (!state.device[i].vendor_id) {
+- pr_warn("zero device %d while %d reported\n",
+- i, state.count);
+- continue;
+- }
+-
+- rfkill = rfkill_alloc(name, &device->dev, type,
+- &hp_wmi_rfkill2_ops, (void *)(long)i);
+- if (!rfkill) {
+- err = -ENOMEM;
+- goto fail;
+- }
+-
+- rfkill2[rfkill2_count].id = state.device[i].rfkill_id;
+- rfkill2[rfkill2_count].num = i;
+- rfkill2[rfkill2_count].rfkill = rfkill;
+-
+- rfkill_init_sw_state(rfkill,
+- IS_SWBLOCKED(state.device[i].power));
+- rfkill_set_hw_state(rfkill,
+- IS_HWBLOCKED(state.device[i].power));
+-
+- if (!(state.device[i].power & HPWMI_POWER_BIOS))
+- pr_info("device %s blocked by BIOS\n", name);
+-
+- err = rfkill_register(rfkill);
+- if (err) {
+- rfkill_destroy(rfkill);
+- goto fail;
+- }
+-
+- rfkill2_count++;
+- }
+-
+- return 0;
+-fail:
+- for (; rfkill2_count > 0; rfkill2_count--) {
+- rfkill_unregister(rfkill2[rfkill2_count - 1].rfkill);
+- rfkill_destroy(rfkill2[rfkill2_count - 1].rfkill);
+- }
+- return err;
+-}
+-
+-static int thermal_profile_get(void)
+-{
+- return hp_wmi_read_int(HPWMI_THERMAL_PROFILE_QUERY);
+-}
+-
+-static int thermal_profile_set(int thermal_profile)
+-{
+- return hp_wmi_perform_query(HPWMI_THERMAL_PROFILE_QUERY, HPWMI_WRITE, &thermal_profile,
+- sizeof(thermal_profile), 0);
+-}
+-
+-static int platform_profile_get(struct platform_profile_handler *pprof,
+- enum platform_profile_option *profile)
+-{
+- int tp;
+-
+- tp = thermal_profile_get();
+- if (tp < 0)
+- return tp;
+-
+- switch (tp) {
+- case HP_THERMAL_PROFILE_PERFORMANCE:
+- *profile = PLATFORM_PROFILE_PERFORMANCE;
+- break;
+- case HP_THERMAL_PROFILE_DEFAULT:
+- *profile = PLATFORM_PROFILE_BALANCED;
+- break;
+- case HP_THERMAL_PROFILE_COOL:
+- *profile = PLATFORM_PROFILE_COOL;
+- break;
+- default:
+- return -EINVAL;
+- }
+-
+- return 0;
+-}
+-
+-static int platform_profile_set(struct platform_profile_handler *pprof,
+- enum platform_profile_option profile)
+-{
+- int err, tp;
+-
+- switch (profile) {
+- case PLATFORM_PROFILE_PERFORMANCE:
+- tp = HP_THERMAL_PROFILE_PERFORMANCE;
+- break;
+- case PLATFORM_PROFILE_BALANCED:
+- tp = HP_THERMAL_PROFILE_DEFAULT;
+- break;
+- case PLATFORM_PROFILE_COOL:
+- tp = HP_THERMAL_PROFILE_COOL;
+- break;
+- default:
+- return -EOPNOTSUPP;
+- }
+-
+- err = thermal_profile_set(tp);
+- if (err)
+- return err;
+-
+- return 0;
+-}
+-
+-static int thermal_profile_setup(void)
+-{
+- int err, tp;
+-
+- tp = thermal_profile_get();
+- if (tp < 0)
+- return tp;
+-
+- /*
+- * call thermal profile write command to ensure that the firmware correctly
+- * sets the OEM variables for the DPTF
+- */
+- err = thermal_profile_set(tp);
+- if (err)
+- return err;
+-
+- platform_profile_handler.profile_get = platform_profile_get,
+- platform_profile_handler.profile_set = platform_profile_set,
+-
+- set_bit(PLATFORM_PROFILE_COOL, platform_profile_handler.choices);
+- set_bit(PLATFORM_PROFILE_BALANCED, platform_profile_handler.choices);
+- set_bit(PLATFORM_PROFILE_PERFORMANCE, platform_profile_handler.choices);
+-
+- err = platform_profile_register(&platform_profile_handler);
+- if (err)
+- return err;
+-
+- platform_profile_support = true;
+-
+- return 0;
+-}
+-
+-static int __init hp_wmi_bios_setup(struct platform_device *device)
+-{
+- /* clear detected rfkill devices */
+- wifi_rfkill = NULL;
+- bluetooth_rfkill = NULL;
+- wwan_rfkill = NULL;
+- rfkill2_count = 0;
+-
+- if (hp_wmi_rfkill_setup(device))
+- hp_wmi_rfkill2_setup(device);
+-
+- thermal_profile_setup();
+-
+- return 0;
+-}
+-
+-static int __exit hp_wmi_bios_remove(struct platform_device *device)
+-{
+- int i;
+-
+- for (i = 0; i < rfkill2_count; i++) {
+- rfkill_unregister(rfkill2[i].rfkill);
+- rfkill_destroy(rfkill2[i].rfkill);
+- }
+-
+- if (wifi_rfkill) {
+- rfkill_unregister(wifi_rfkill);
+- rfkill_destroy(wifi_rfkill);
+- }
+- if (bluetooth_rfkill) {
+- rfkill_unregister(bluetooth_rfkill);
+- rfkill_destroy(bluetooth_rfkill);
+- }
+- if (wwan_rfkill) {
+- rfkill_unregister(wwan_rfkill);
+- rfkill_destroy(wwan_rfkill);
+- }
+-
+- if (platform_profile_support)
+- platform_profile_remove();
+-
+- return 0;
+-}
+-
+-static int hp_wmi_resume_handler(struct device *device)
+-{
+- /*
+- * Hardware state may have changed while suspended, so trigger
+- * input events for the current state. As this is a switch,
+- * the input layer will only actually pass it on if the state
+- * changed.
+- */
+- if (hp_wmi_input_dev) {
+- if (test_bit(SW_DOCK, hp_wmi_input_dev->swbit))
+- input_report_switch(hp_wmi_input_dev, SW_DOCK,
+- hp_wmi_hw_state(HPWMI_DOCK_MASK));
+- if (test_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit))
+- input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
+- hp_wmi_hw_state(HPWMI_TABLET_MASK));
+- input_sync(hp_wmi_input_dev);
+- }
+-
+- if (rfkill2_count)
+- hp_wmi_rfkill2_refresh();
+-
+- if (wifi_rfkill)
+- rfkill_set_states(wifi_rfkill,
+- hp_wmi_get_sw_state(HPWMI_WIFI),
+- hp_wmi_get_hw_state(HPWMI_WIFI));
+- if (bluetooth_rfkill)
+- rfkill_set_states(bluetooth_rfkill,
+- hp_wmi_get_sw_state(HPWMI_BLUETOOTH),
+- hp_wmi_get_hw_state(HPWMI_BLUETOOTH));
+- if (wwan_rfkill)
+- rfkill_set_states(wwan_rfkill,
+- hp_wmi_get_sw_state(HPWMI_WWAN),
+- hp_wmi_get_hw_state(HPWMI_WWAN));
+-
+- return 0;
+-}
+-
+-static const struct dev_pm_ops hp_wmi_pm_ops = {
+- .resume = hp_wmi_resume_handler,
+- .restore = hp_wmi_resume_handler,
+-};
+-
+-static struct platform_driver hp_wmi_driver = {
+- .driver = {
+- .name = "hp-wmi",
+- .pm = &hp_wmi_pm_ops,
+- .dev_groups = hp_wmi_groups,
+- },
+- .remove = __exit_p(hp_wmi_bios_remove),
+-};
+-
+-static int __init hp_wmi_init(void)
+-{
+- int event_capable = wmi_has_guid(HPWMI_EVENT_GUID);
+- int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID);
+- int err;
+-
+- if (!bios_capable && !event_capable)
+- return -ENODEV;
+-
+- if (event_capable) {
+- err = hp_wmi_input_setup();
+- if (err)
+- return err;
+- }
+-
+- if (bios_capable) {
+- hp_wmi_platform_dev =
+- platform_device_register_simple("hp-wmi", -1, NULL, 0);
+- if (IS_ERR(hp_wmi_platform_dev)) {
+- err = PTR_ERR(hp_wmi_platform_dev);
+- goto err_destroy_input;
+- }
+-
+- err = platform_driver_probe(&hp_wmi_driver, hp_wmi_bios_setup);
+- if (err)
+- goto err_unregister_device;
+- }
+-
+- return 0;
+-
+-err_unregister_device:
+- platform_device_unregister(hp_wmi_platform_dev);
+-err_destroy_input:
+- if (event_capable)
+- hp_wmi_input_destroy();
+-
+- return err;
+-}
+-module_init(hp_wmi_init);
+-
+-static void __exit hp_wmi_exit(void)
+-{
+- if (wmi_has_guid(HPWMI_EVENT_GUID))
+- hp_wmi_input_destroy();
+-
+- if (hp_wmi_platform_dev) {
+- platform_device_unregister(hp_wmi_platform_dev);
+- platform_driver_unregister(&hp_wmi_driver);
+- }
+-}
+-module_exit(hp_wmi_exit);
+diff --git a/drivers/platform/x86/hp/Kconfig b/drivers/platform/x86/hp/Kconfig
+new file mode 100644
+index 0000000000000..ae165955311ce
+--- /dev/null
++++ b/drivers/platform/x86/hp/Kconfig
+@@ -0,0 +1,63 @@
++# SPDX-License-Identifier: GPL-2.0-only
++#
++# X86 Platform Specific Drivers
++#
++menuconfig X86_PLATFORM_DRIVERS_HP
++ bool "HP X86 Platform Specific Device Drivers"
++ depends on X86_PLATFORM_DEVICES
++ help
++ Say Y here to get to see options for device drivers for various
++ HP x86 platforms, including vendor-specific laptop extension drivers.
++ This option alone does not add any kernel code.
++
++ If you say N, all options in this submenu will be skipped and disabled.
++
++if X86_PLATFORM_DRIVERS_HP
++
++config HP_ACCEL
++ tristate "HP laptop accelerometer"
++ default m
++ depends on INPUT && ACPI
++ depends on SERIO_I8042
++ select SENSORS_LIS3LV02D
++ select NEW_LEDS
++ select LEDS_CLASS
++ help
++ This driver provides support for the "Mobile Data Protection System 3D"
++ or "3D DriveGuard" feature of HP laptops. On such systems the driver
++ should load automatically (via ACPI alias).
++
++ Support for a led indicating disk protection will be provided as
++ hp::hddprotect. For more information on the feature, refer to
++ Documentation/misc-devices/lis3lv02d.rst.
++
++ To compile this driver as a module, choose M here: the module will
++ be called hp_accel.
++
++config HP_WMI
++ tristate "HP WMI extras"
++ default m
++ depends on ACPI_WMI
++ depends on INPUT
++ depends on RFKILL || RFKILL = n
++ select INPUT_SPARSEKMAP
++ select ACPI_PLATFORM_PROFILE
++ select HWMON
++ help
++ Say Y here if you want to support WMI-based hotkeys on HP laptops and
++ to read data from WMI such as docking or ambient light sensor state.
++
++ To compile this driver as a module, choose M here: the module will
++ be called hp-wmi.
++
++config TC1100_WMI
++ tristate "HP Compaq TC1100 Tablet WMI Extras"
++ default m
++ depends on !X86_64
++ depends on ACPI
++ depends on ACPI_WMI
++ help
++ This is a driver for the WMI extensions (wireless and bluetooth power
++ control) of the HP Compaq TC1100 tablet.
++
++endif # X86_PLATFORM_DRIVERS_HP
+diff --git a/drivers/platform/x86/hp/Makefile b/drivers/platform/x86/hp/Makefile
+new file mode 100644
+index 0000000000000..db1eed4cd7c7d
+--- /dev/null
++++ b/drivers/platform/x86/hp/Makefile
+@@ -0,0 +1,10 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Makefile for linux/drivers/platform/x86/hp
++# HP x86 Platform-Specific Drivers
++#
++
++# Hewlett Packard
++obj-$(CONFIG_HP_ACCEL) += hp_accel.o
++obj-$(CONFIG_HP_WMI) += hp-wmi.o
++obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
+diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c
+new file mode 100644
+index 0000000000000..8c845d263429f
+--- /dev/null
++++ b/drivers/platform/x86/hp/hp-wmi.c
+@@ -0,0 +1,1138 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * HP WMI hotkeys
++ *
++ * Copyright (C) 2008 Red Hat <mjg@redhat.com>
++ * Copyright (C) 2010, 2011 Anssi Hannula <anssi.hannula@iki.fi>
++ *
++ * Portions based on wistron_btns.c:
++ * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
++ * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
++ * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/types.h>
++#include <linux/input.h>
++#include <linux/input/sparse-keymap.h>
++#include <linux/platform_device.h>
++#include <linux/platform_profile.h>
++#include <linux/acpi.h>
++#include <linux/rfkill.h>
++#include <linux/string.h>
++
++MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>");
++MODULE_DESCRIPTION("HP laptop WMI hotkeys driver");
++MODULE_LICENSE("GPL");
++
++MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C");
++MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
++
++static int enable_tablet_mode_sw = -1;
++module_param(enable_tablet_mode_sw, int, 0444);
++MODULE_PARM_DESC(enable_tablet_mode_sw, "Enable SW_TABLET_MODE reporting (-1=auto, 0=no, 1=yes)");
++
++#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
++#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
++
++enum hp_wmi_radio {
++ HPWMI_WIFI = 0x0,
++ HPWMI_BLUETOOTH = 0x1,
++ HPWMI_WWAN = 0x2,
++ HPWMI_GPS = 0x3,
++};
++
++enum hp_wmi_event_ids {
++ HPWMI_DOCK_EVENT = 0x01,
++ HPWMI_PARK_HDD = 0x02,
++ HPWMI_SMART_ADAPTER = 0x03,
++ HPWMI_BEZEL_BUTTON = 0x04,
++ HPWMI_WIRELESS = 0x05,
++ HPWMI_CPU_BATTERY_THROTTLE = 0x06,
++ HPWMI_LOCK_SWITCH = 0x07,
++ HPWMI_LID_SWITCH = 0x08,
++ HPWMI_SCREEN_ROTATION = 0x09,
++ HPWMI_COOLSENSE_SYSTEM_MOBILE = 0x0A,
++ HPWMI_COOLSENSE_SYSTEM_HOT = 0x0B,
++ HPWMI_PROXIMITY_SENSOR = 0x0C,
++ HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D,
++ HPWMI_PEAKSHIFT_PERIOD = 0x0F,
++ HPWMI_BATTERY_CHARGE_PERIOD = 0x10,
++ HPWMI_SANITIZATION_MODE = 0x17,
++ HPWMI_SMART_EXPERIENCE_APP = 0x21,
++};
++
++struct bios_args {
++ u32 signature;
++ u32 command;
++ u32 commandtype;
++ u32 datasize;
++ u8 data[128];
++};
++
++enum hp_wmi_commandtype {
++ HPWMI_DISPLAY_QUERY = 0x01,
++ HPWMI_HDDTEMP_QUERY = 0x02,
++ HPWMI_ALS_QUERY = 0x03,
++ HPWMI_HARDWARE_QUERY = 0x04,
++ HPWMI_WIRELESS_QUERY = 0x05,
++ HPWMI_BATTERY_QUERY = 0x07,
++ HPWMI_BIOS_QUERY = 0x09,
++ HPWMI_FEATURE_QUERY = 0x0b,
++ HPWMI_HOTKEY_QUERY = 0x0c,
++ HPWMI_FEATURE2_QUERY = 0x0d,
++ HPWMI_WIRELESS2_QUERY = 0x1b,
++ HPWMI_POSTCODEERROR_QUERY = 0x2a,
++ HPWMI_THERMAL_PROFILE_QUERY = 0x4c,
++};
++
++enum hp_wmi_command {
++ HPWMI_READ = 0x01,
++ HPWMI_WRITE = 0x02,
++ HPWMI_ODM = 0x03,
++};
++
++enum hp_wmi_hardware_mask {
++ HPWMI_DOCK_MASK = 0x01,
++ HPWMI_TABLET_MASK = 0x04,
++};
++
++struct bios_return {
++ u32 sigpass;
++ u32 return_code;
++};
++
++enum hp_return_value {
++ HPWMI_RET_WRONG_SIGNATURE = 0x02,
++ HPWMI_RET_UNKNOWN_COMMAND = 0x03,
++ HPWMI_RET_UNKNOWN_CMDTYPE = 0x04,
++ HPWMI_RET_INVALID_PARAMETERS = 0x05,
++};
++
++enum hp_wireless2_bits {
++ HPWMI_POWER_STATE = 0x01,
++ HPWMI_POWER_SOFT = 0x02,
++ HPWMI_POWER_BIOS = 0x04,
++ HPWMI_POWER_HARD = 0x08,
++ HPWMI_POWER_FW_OR_HW = HPWMI_POWER_BIOS | HPWMI_POWER_HARD,
++};
++
++enum hp_thermal_profile {
++ HP_THERMAL_PROFILE_PERFORMANCE = 0x00,
++ HP_THERMAL_PROFILE_DEFAULT = 0x01,
++ HP_THERMAL_PROFILE_COOL = 0x02
++};
++
++#define IS_HWBLOCKED(x) ((x & HPWMI_POWER_FW_OR_HW) != HPWMI_POWER_FW_OR_HW)
++#define IS_SWBLOCKED(x) !(x & HPWMI_POWER_SOFT)
++
++struct bios_rfkill2_device_state {
++ u8 radio_type;
++ u8 bus_type;
++ u16 vendor_id;
++ u16 product_id;
++ u16 subsys_vendor_id;
++ u16 subsys_product_id;
++ u8 rfkill_id;
++ u8 power;
++ u8 unknown[4];
++};
++
++/* 7 devices fit into the 128 byte buffer */
++#define HPWMI_MAX_RFKILL2_DEVICES 7
++
++struct bios_rfkill2_state {
++ u8 unknown[7];
++ u8 count;
++ u8 pad[8];
++ struct bios_rfkill2_device_state device[HPWMI_MAX_RFKILL2_DEVICES];
++};
++
++static const struct key_entry hp_wmi_keymap[] = {
++ { KE_KEY, 0x02, { KEY_BRIGHTNESSUP } },
++ { KE_KEY, 0x03, { KEY_BRIGHTNESSDOWN } },
++ { KE_KEY, 0x270, { KEY_MICMUTE } },
++ { KE_KEY, 0x20e6, { KEY_PROG1 } },
++ { KE_KEY, 0x20e8, { KEY_MEDIA } },
++ { KE_KEY, 0x2142, { KEY_MEDIA } },
++ { KE_KEY, 0x213b, { KEY_INFO } },
++ { KE_KEY, 0x2169, { KEY_ROTATE_DISPLAY } },
++ { KE_KEY, 0x216a, { KEY_SETUP } },
++ { KE_KEY, 0x21a9, { KEY_TOUCHPAD_OFF } },
++ { KE_KEY, 0x121a9, { KEY_TOUCHPAD_ON } },
++ { KE_KEY, 0x231b, { KEY_HELP } },
++ { KE_END, 0 }
++};
++
++static struct input_dev *hp_wmi_input_dev;
++static struct platform_device *hp_wmi_platform_dev;
++static struct platform_profile_handler platform_profile_handler;
++static bool platform_profile_support;
++
++static struct rfkill *wifi_rfkill;
++static struct rfkill *bluetooth_rfkill;
++static struct rfkill *wwan_rfkill;
++
++struct rfkill2_device {
++ u8 id;
++ int num;
++ struct rfkill *rfkill;
++};
++
++static int rfkill2_count;
++static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES];
++
++/* map output size to the corresponding WMI method id */
++static inline int encode_outsize_for_pvsz(int outsize)
++{
++ if (outsize > 4096)
++ return -EINVAL;
++ if (outsize > 1024)
++ return 5;
++ if (outsize > 128)
++ return 4;
++ if (outsize > 4)
++ return 3;
++ if (outsize > 0)
++ return 2;
++ return 1;
++}
++
++/*
++ * hp_wmi_perform_query
++ *
++ * query: The commandtype (enum hp_wmi_commandtype)
++ * write: The command (enum hp_wmi_command)
++ * buffer: Buffer used as input and/or output
++ * insize: Size of input buffer
++ * outsize: Size of output buffer
++ *
++ * returns zero on success
++ * an HP WMI query specific error code (which is positive)
++ * -EINVAL if the query was not successful at all
++ * -EINVAL if the output buffer size exceeds buffersize
++ *
++ * Note: The buffersize must at least be the maximum of the input and output
++ * size. E.g. Battery info query is defined to have 1 byte input
++ * and 128 byte output. The caller would do:
++ * buffer = kzalloc(128, GFP_KERNEL);
++ * ret = hp_wmi_perform_query(HPWMI_BATTERY_QUERY, HPWMI_READ, buffer, 1, 128)
++ */
++static int hp_wmi_perform_query(int query, enum hp_wmi_command command,
++ void *buffer, int insize, int outsize)
++{
++ int mid;
++ struct bios_return *bios_return;
++ int actual_outsize;
++ union acpi_object *obj;
++ struct bios_args args = {
++ .signature = 0x55434553,
++ .command = command,
++ .commandtype = query,
++ .datasize = insize,
++ .data = { 0 },
++ };
++ struct acpi_buffer input = { sizeof(struct bios_args), &args };
++ struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
++ int ret = 0;
++
++ mid = encode_outsize_for_pvsz(outsize);
++ if (WARN_ON(mid < 0))
++ return mid;
++
++ if (WARN_ON(insize > sizeof(args.data)))
++ return -EINVAL;
++ memcpy(&args.data[0], buffer, insize);
++
++ wmi_evaluate_method(HPWMI_BIOS_GUID, 0, mid, &input, &output);
++
++ obj = output.pointer;
++
++ if (!obj)
++ return -EINVAL;
++
++ if (obj->type != ACPI_TYPE_BUFFER) {
++ ret = -EINVAL;
++ goto out_free;
++ }
++
++ bios_return = (struct bios_return *)obj->buffer.pointer;
++ ret = bios_return->return_code;
++
++ if (ret) {
++ if (ret != HPWMI_RET_UNKNOWN_COMMAND &&
++ ret != HPWMI_RET_UNKNOWN_CMDTYPE)
++ pr_warn("query 0x%x returned error 0x%x\n", query, ret);
++ goto out_free;
++ }
++
++ /* Ignore output data of zero size */
++ if (!outsize)
++ goto out_free;
++
++ actual_outsize = min(outsize, (int)(obj->buffer.length - sizeof(*bios_return)));
++ memcpy(buffer, obj->buffer.pointer + sizeof(*bios_return), actual_outsize);
++ memset(buffer + actual_outsize, 0, outsize - actual_outsize);
++
++out_free:
++ kfree(obj);
++ return ret;
++}
++
++static int hp_wmi_read_int(int query)
++{
++ int val = 0, ret;
++
++ ret = hp_wmi_perform_query(query, HPWMI_READ, &val,
++ sizeof(val), sizeof(val));
++
++ if (ret)
++ return ret < 0 ? ret : -EINVAL;
++
++ return val;
++}
++
++static int hp_wmi_hw_state(int mask)
++{
++ int state = hp_wmi_read_int(HPWMI_HARDWARE_QUERY);
++
++ if (state < 0)
++ return state;
++
++ return !!(state & mask);
++}
++
++static int __init hp_wmi_bios_2008_later(void)
++{
++ int state = 0;
++ int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, HPWMI_READ, &state,
++ sizeof(state), sizeof(state));
++ if (!ret)
++ return 1;
++
++ return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO;
++}
++
++static int __init hp_wmi_bios_2009_later(void)
++{
++ u8 state[128];
++ int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state,
++ sizeof(state), sizeof(state));
++ if (!ret)
++ return 1;
++
++ return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO;
++}
++
++static int __init hp_wmi_enable_hotkeys(void)
++{
++ int value = 0x6e;
++ int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, HPWMI_WRITE, &value,
++ sizeof(value), 0);
++
++ return ret <= 0 ? ret : -EINVAL;
++}
++
++static int hp_wmi_set_block(void *data, bool blocked)
++{
++ enum hp_wmi_radio r = (enum hp_wmi_radio) data;
++ int query = BIT(r + 8) | ((!blocked) << r);
++ int ret;
++
++ ret = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, HPWMI_WRITE,
++ &query, sizeof(query), 0);
++
++ return ret <= 0 ? ret : -EINVAL;
++}
++
++static const struct rfkill_ops hp_wmi_rfkill_ops = {
++ .set_block = hp_wmi_set_block,
++};
++
++static bool hp_wmi_get_sw_state(enum hp_wmi_radio r)
++{
++ int mask = 0x200 << (r * 8);
++
++ int wireless = hp_wmi_read_int(HPWMI_WIRELESS_QUERY);
++
++ /* TBD: Pass error */
++ WARN_ONCE(wireless < 0, "error executing HPWMI_WIRELESS_QUERY");
++
++ return !(wireless & mask);
++}
++
++static bool hp_wmi_get_hw_state(enum hp_wmi_radio r)
++{
++ int mask = 0x800 << (r * 8);
++
++ int wireless = hp_wmi_read_int(HPWMI_WIRELESS_QUERY);
++
++ /* TBD: Pass error */
++ WARN_ONCE(wireless < 0, "error executing HPWMI_WIRELESS_QUERY");
++
++ return !(wireless & mask);
++}
++
++static int hp_wmi_rfkill2_set_block(void *data, bool blocked)
++{
++ int rfkill_id = (int)(long)data;
++ char buffer[4] = { 0x01, 0x00, rfkill_id, !blocked };
++ int ret;
++
++ ret = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_WRITE,
++ buffer, sizeof(buffer), 0);
++
++ return ret <= 0 ? ret : -EINVAL;
++}
++
++static const struct rfkill_ops hp_wmi_rfkill2_ops = {
++ .set_block = hp_wmi_rfkill2_set_block,
++};
++
++static int hp_wmi_rfkill2_refresh(void)
++{
++ struct bios_rfkill2_state state;
++ int err, i;
++
++ err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
++ sizeof(state), sizeof(state));
++ if (err)
++ return err;
++
++ for (i = 0; i < rfkill2_count; i++) {
++ int num = rfkill2[i].num;
++ struct bios_rfkill2_device_state *devstate;
++ devstate = &state.device[num];
++
++ if (num >= state.count ||
++ devstate->rfkill_id != rfkill2[i].id) {
++ pr_warn("power configuration of the wireless devices unexpectedly changed\n");
++ continue;
++ }
++
++ rfkill_set_states(rfkill2[i].rfkill,
++ IS_SWBLOCKED(devstate->power),
++ IS_HWBLOCKED(devstate->power));
++ }
++
++ return 0;
++}
++
++static ssize_t display_show(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ int value = hp_wmi_read_int(HPWMI_DISPLAY_QUERY);
++ if (value < 0)
++ return value;
++ return sprintf(buf, "%d\n", value);
++}
++
++static ssize_t hddtemp_show(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ int value = hp_wmi_read_int(HPWMI_HDDTEMP_QUERY);
++ if (value < 0)
++ return value;
++ return sprintf(buf, "%d\n", value);
++}
++
++static ssize_t als_show(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ int value = hp_wmi_read_int(HPWMI_ALS_QUERY);
++ if (value < 0)
++ return value;
++ return sprintf(buf, "%d\n", value);
++}
++
++static ssize_t dock_show(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ int value = hp_wmi_hw_state(HPWMI_DOCK_MASK);
++ if (value < 0)
++ return value;
++ return sprintf(buf, "%d\n", value);
++}
++
++static ssize_t tablet_show(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ int value = hp_wmi_hw_state(HPWMI_TABLET_MASK);
++ if (value < 0)
++ return value;
++ return sprintf(buf, "%d\n", value);
++}
++
++static ssize_t postcode_show(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ /* Get the POST error code of previous boot failure. */
++ int value = hp_wmi_read_int(HPWMI_POSTCODEERROR_QUERY);
++ if (value < 0)
++ return value;
++ return sprintf(buf, "0x%x\n", value);
++}
++
++static ssize_t als_store(struct device *dev, struct device_attribute *attr,
++ const char *buf, size_t count)
++{
++ u32 tmp;
++ int ret;
++
++ ret = kstrtou32(buf, 10, &tmp);
++ if (ret)
++ return ret;
++
++ ret = hp_wmi_perform_query(HPWMI_ALS_QUERY, HPWMI_WRITE, &tmp,
++ sizeof(tmp), sizeof(tmp));
++ if (ret)
++ return ret < 0 ? ret : -EINVAL;
++
++ return count;
++}
++
++static ssize_t postcode_store(struct device *dev, struct device_attribute *attr,
++ const char *buf, size_t count)
++{
++ u32 tmp = 1;
++ bool clear;
++ int ret;
++
++ ret = kstrtobool(buf, &clear);
++ if (ret)
++ return ret;
++
++ if (clear == false)
++ return -EINVAL;
++
++ /* Clear the POST error code. It is kept until until cleared. */
++ ret = hp_wmi_perform_query(HPWMI_POSTCODEERROR_QUERY, HPWMI_WRITE, &tmp,
++ sizeof(tmp), sizeof(tmp));
++ if (ret)
++ return ret < 0 ? ret : -EINVAL;
++
++ return count;
++}
++
++static DEVICE_ATTR_RO(display);
++static DEVICE_ATTR_RO(hddtemp);
++static DEVICE_ATTR_RW(als);
++static DEVICE_ATTR_RO(dock);
++static DEVICE_ATTR_RO(tablet);
++static DEVICE_ATTR_RW(postcode);
++
++static struct attribute *hp_wmi_attrs[] = {
++ &dev_attr_display.attr,
++ &dev_attr_hddtemp.attr,
++ &dev_attr_als.attr,
++ &dev_attr_dock.attr,
++ &dev_attr_tablet.attr,
++ &dev_attr_postcode.attr,
++ NULL,
++};
++ATTRIBUTE_GROUPS(hp_wmi);
++
++static void hp_wmi_notify(u32 value, void *context)
++{
++ struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
++ u32 event_id, event_data;
++ union acpi_object *obj;
++ acpi_status status;
++ u32 *location;
++ int key_code;
++
++ status = wmi_get_event_data(value, &response);
++ if (status != AE_OK) {
++ pr_info("bad event status 0x%x\n", status);
++ return;
++ }
++
++ obj = (union acpi_object *)response.pointer;
++
++ if (!obj)
++ return;
++ if (obj->type != ACPI_TYPE_BUFFER) {
++ pr_info("Unknown response received %d\n", obj->type);
++ kfree(obj);
++ return;
++ }
++
++ /*
++ * Depending on ACPI version the concatenation of id and event data
++ * inside _WED function will result in a 8 or 16 byte buffer.
++ */
++ location = (u32 *)obj->buffer.pointer;
++ if (obj->buffer.length == 8) {
++ event_id = *location;
++ event_data = *(location + 1);
++ } else if (obj->buffer.length == 16) {
++ event_id = *location;
++ event_data = *(location + 2);
++ } else {
++ pr_info("Unknown buffer length %d\n", obj->buffer.length);
++ kfree(obj);
++ return;
++ }
++ kfree(obj);
++
++ switch (event_id) {
++ case HPWMI_DOCK_EVENT:
++ if (test_bit(SW_DOCK, hp_wmi_input_dev->swbit))
++ input_report_switch(hp_wmi_input_dev, SW_DOCK,
++ hp_wmi_hw_state(HPWMI_DOCK_MASK));
++ if (test_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit))
++ input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
++ hp_wmi_hw_state(HPWMI_TABLET_MASK));
++ input_sync(hp_wmi_input_dev);
++ break;
++ case HPWMI_PARK_HDD:
++ break;
++ case HPWMI_SMART_ADAPTER:
++ break;
++ case HPWMI_BEZEL_BUTTON:
++ key_code = hp_wmi_read_int(HPWMI_HOTKEY_QUERY);
++ if (key_code < 0)
++ break;
++
++ if (!sparse_keymap_report_event(hp_wmi_input_dev,
++ key_code, 1, true))
++ pr_info("Unknown key code - 0x%x\n", key_code);
++ break;
++ case HPWMI_WIRELESS:
++ if (rfkill2_count) {
++ hp_wmi_rfkill2_refresh();
++ break;
++ }
++
++ if (wifi_rfkill)
++ rfkill_set_states(wifi_rfkill,
++ hp_wmi_get_sw_state(HPWMI_WIFI),
++ hp_wmi_get_hw_state(HPWMI_WIFI));
++ if (bluetooth_rfkill)
++ rfkill_set_states(bluetooth_rfkill,
++ hp_wmi_get_sw_state(HPWMI_BLUETOOTH),
++ hp_wmi_get_hw_state(HPWMI_BLUETOOTH));
++ if (wwan_rfkill)
++ rfkill_set_states(wwan_rfkill,
++ hp_wmi_get_sw_state(HPWMI_WWAN),
++ hp_wmi_get_hw_state(HPWMI_WWAN));
++ break;
++ case HPWMI_CPU_BATTERY_THROTTLE:
++ pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n");
++ break;
++ case HPWMI_LOCK_SWITCH:
++ break;
++ case HPWMI_LID_SWITCH:
++ break;
++ case HPWMI_SCREEN_ROTATION:
++ break;
++ case HPWMI_COOLSENSE_SYSTEM_MOBILE:
++ break;
++ case HPWMI_COOLSENSE_SYSTEM_HOT:
++ break;
++ case HPWMI_PROXIMITY_SENSOR:
++ break;
++ case HPWMI_BACKLIT_KB_BRIGHTNESS:
++ break;
++ case HPWMI_PEAKSHIFT_PERIOD:
++ break;
++ case HPWMI_BATTERY_CHARGE_PERIOD:
++ break;
++ case HPWMI_SANITIZATION_MODE:
++ break;
++ case HPWMI_SMART_EXPERIENCE_APP:
++ break;
++ default:
++ pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data);
++ break;
++ }
++}
++
++static int __init hp_wmi_input_setup(void)
++{
++ acpi_status status;
++ int err, val;
++
++ hp_wmi_input_dev = input_allocate_device();
++ if (!hp_wmi_input_dev)
++ return -ENOMEM;
++
++ hp_wmi_input_dev->name = "HP WMI hotkeys";
++ hp_wmi_input_dev->phys = "wmi/input0";
++ hp_wmi_input_dev->id.bustype = BUS_HOST;
++
++ __set_bit(EV_SW, hp_wmi_input_dev->evbit);
++
++ /* Dock */
++ val = hp_wmi_hw_state(HPWMI_DOCK_MASK);
++ if (!(val < 0)) {
++ __set_bit(SW_DOCK, hp_wmi_input_dev->swbit);
++ input_report_switch(hp_wmi_input_dev, SW_DOCK, val);
++ }
++
++ /* Tablet mode */
++ if (enable_tablet_mode_sw > 0) {
++ val = hp_wmi_hw_state(HPWMI_TABLET_MASK);
++ if (val >= 0) {
++ __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit);
++ input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val);
++ }
++ }
++
++ err = sparse_keymap_setup(hp_wmi_input_dev, hp_wmi_keymap, NULL);
++ if (err)
++ goto err_free_dev;
++
++ /* Set initial hardware state */
++ input_sync(hp_wmi_input_dev);
++
++ if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later())
++ hp_wmi_enable_hotkeys();
++
++ status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL);
++ if (ACPI_FAILURE(status)) {
++ err = -EIO;
++ goto err_free_dev;
++ }
++
++ err = input_register_device(hp_wmi_input_dev);
++ if (err)
++ goto err_uninstall_notifier;
++
++ return 0;
++
++ err_uninstall_notifier:
++ wmi_remove_notify_handler(HPWMI_EVENT_GUID);
++ err_free_dev:
++ input_free_device(hp_wmi_input_dev);
++ return err;
++}
++
++static void hp_wmi_input_destroy(void)
++{
++ wmi_remove_notify_handler(HPWMI_EVENT_GUID);
++ input_unregister_device(hp_wmi_input_dev);
++}
++
++static int __init hp_wmi_rfkill_setup(struct platform_device *device)
++{
++ int err, wireless;
++
++ wireless = hp_wmi_read_int(HPWMI_WIRELESS_QUERY);
++ if (wireless < 0)
++ return wireless;
++
++ err = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, HPWMI_WRITE, &wireless,
++ sizeof(wireless), 0);
++ if (err)
++ return err;
++
++ if (wireless & 0x1) {
++ wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev,
++ RFKILL_TYPE_WLAN,
++ &hp_wmi_rfkill_ops,
++ (void *) HPWMI_WIFI);
++ if (!wifi_rfkill)
++ return -ENOMEM;
++ rfkill_init_sw_state(wifi_rfkill,
++ hp_wmi_get_sw_state(HPWMI_WIFI));
++ rfkill_set_hw_state(wifi_rfkill,
++ hp_wmi_get_hw_state(HPWMI_WIFI));
++ err = rfkill_register(wifi_rfkill);
++ if (err)
++ goto register_wifi_error;
++ }
++
++ if (wireless & 0x2) {
++ bluetooth_rfkill = rfkill_alloc("hp-bluetooth", &device->dev,
++ RFKILL_TYPE_BLUETOOTH,
++ &hp_wmi_rfkill_ops,
++ (void *) HPWMI_BLUETOOTH);
++ if (!bluetooth_rfkill) {
++ err = -ENOMEM;
++ goto register_bluetooth_error;
++ }
++ rfkill_init_sw_state(bluetooth_rfkill,
++ hp_wmi_get_sw_state(HPWMI_BLUETOOTH));
++ rfkill_set_hw_state(bluetooth_rfkill,
++ hp_wmi_get_hw_state(HPWMI_BLUETOOTH));
++ err = rfkill_register(bluetooth_rfkill);
++ if (err)
++ goto register_bluetooth_error;
++ }
++
++ if (wireless & 0x4) {
++ wwan_rfkill = rfkill_alloc("hp-wwan", &device->dev,
++ RFKILL_TYPE_WWAN,
++ &hp_wmi_rfkill_ops,
++ (void *) HPWMI_WWAN);
++ if (!wwan_rfkill) {
++ err = -ENOMEM;
++ goto register_wwan_error;
++ }
++ rfkill_init_sw_state(wwan_rfkill,
++ hp_wmi_get_sw_state(HPWMI_WWAN));
++ rfkill_set_hw_state(wwan_rfkill,
++ hp_wmi_get_hw_state(HPWMI_WWAN));
++ err = rfkill_register(wwan_rfkill);
++ if (err)
++ goto register_wwan_error;
++ }
++
++ return 0;
++
++register_wwan_error:
++ rfkill_destroy(wwan_rfkill);
++ wwan_rfkill = NULL;
++ if (bluetooth_rfkill)
++ rfkill_unregister(bluetooth_rfkill);
++register_bluetooth_error:
++ rfkill_destroy(bluetooth_rfkill);
++ bluetooth_rfkill = NULL;
++ if (wifi_rfkill)
++ rfkill_unregister(wifi_rfkill);
++register_wifi_error:
++ rfkill_destroy(wifi_rfkill);
++ wifi_rfkill = NULL;
++ return err;
++}
++
++static int __init hp_wmi_rfkill2_setup(struct platform_device *device)
++{
++ struct bios_rfkill2_state state;
++ int err, i;
++
++ err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
++ sizeof(state), sizeof(state));
++ if (err)
++ return err < 0 ? err : -EINVAL;
++
++ if (state.count > HPWMI_MAX_RFKILL2_DEVICES) {
++ pr_warn("unable to parse 0x1b query output\n");
++ return -EINVAL;
++ }
++
++ for (i = 0; i < state.count; i++) {
++ struct rfkill *rfkill;
++ enum rfkill_type type;
++ char *name;
++ switch (state.device[i].radio_type) {
++ case HPWMI_WIFI:
++ type = RFKILL_TYPE_WLAN;
++ name = "hp-wifi";
++ break;
++ case HPWMI_BLUETOOTH:
++ type = RFKILL_TYPE_BLUETOOTH;
++ name = "hp-bluetooth";
++ break;
++ case HPWMI_WWAN:
++ type = RFKILL_TYPE_WWAN;
++ name = "hp-wwan";
++ break;
++ case HPWMI_GPS:
++ type = RFKILL_TYPE_GPS;
++ name = "hp-gps";
++ break;
++ default:
++ pr_warn("unknown device type 0x%x\n",
++ state.device[i].radio_type);
++ continue;
++ }
++
++ if (!state.device[i].vendor_id) {
++ pr_warn("zero device %d while %d reported\n",
++ i, state.count);
++ continue;
++ }
++
++ rfkill = rfkill_alloc(name, &device->dev, type,
++ &hp_wmi_rfkill2_ops, (void *)(long)i);
++ if (!rfkill) {
++ err = -ENOMEM;
++ goto fail;
++ }
++
++ rfkill2[rfkill2_count].id = state.device[i].rfkill_id;
++ rfkill2[rfkill2_count].num = i;
++ rfkill2[rfkill2_count].rfkill = rfkill;
++
++ rfkill_init_sw_state(rfkill,
++ IS_SWBLOCKED(state.device[i].power));
++ rfkill_set_hw_state(rfkill,
++ IS_HWBLOCKED(state.device[i].power));
++
++ if (!(state.device[i].power & HPWMI_POWER_BIOS))
++ pr_info("device %s blocked by BIOS\n", name);
++
++ err = rfkill_register(rfkill);
++ if (err) {
++ rfkill_destroy(rfkill);
++ goto fail;
++ }
++
++ rfkill2_count++;
++ }
++
++ return 0;
++fail:
++ for (; rfkill2_count > 0; rfkill2_count--) {
++ rfkill_unregister(rfkill2[rfkill2_count - 1].rfkill);
++ rfkill_destroy(rfkill2[rfkill2_count - 1].rfkill);
++ }
++ return err;
++}
++
++static int thermal_profile_get(void)
++{
++ return hp_wmi_read_int(HPWMI_THERMAL_PROFILE_QUERY);
++}
++
++static int thermal_profile_set(int thermal_profile)
++{
++ return hp_wmi_perform_query(HPWMI_THERMAL_PROFILE_QUERY, HPWMI_WRITE, &thermal_profile,
++ sizeof(thermal_profile), 0);
++}
++
++static int platform_profile_get(struct platform_profile_handler *pprof,
++ enum platform_profile_option *profile)
++{
++ int tp;
++
++ tp = thermal_profile_get();
++ if (tp < 0)
++ return tp;
++
++ switch (tp) {
++ case HP_THERMAL_PROFILE_PERFORMANCE:
++ *profile = PLATFORM_PROFILE_PERFORMANCE;
++ break;
++ case HP_THERMAL_PROFILE_DEFAULT:
++ *profile = PLATFORM_PROFILE_BALANCED;
++ break;
++ case HP_THERMAL_PROFILE_COOL:
++ *profile = PLATFORM_PROFILE_COOL;
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int platform_profile_set(struct platform_profile_handler *pprof,
++ enum platform_profile_option profile)
++{
++ int err, tp;
++
++ switch (profile) {
++ case PLATFORM_PROFILE_PERFORMANCE:
++ tp = HP_THERMAL_PROFILE_PERFORMANCE;
++ break;
++ case PLATFORM_PROFILE_BALANCED:
++ tp = HP_THERMAL_PROFILE_DEFAULT;
++ break;
++ case PLATFORM_PROFILE_COOL:
++ tp = HP_THERMAL_PROFILE_COOL;
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++
++ err = thermal_profile_set(tp);
++ if (err)
++ return err;
++
++ return 0;
++}
++
++static int thermal_profile_setup(void)
++{
++ int err, tp;
++
++ tp = thermal_profile_get();
++ if (tp < 0)
++ return tp;
++
++ /*
++ * call thermal profile write command to ensure that the firmware correctly
++ * sets the OEM variables for the DPTF
++ */
++ err = thermal_profile_set(tp);
++ if (err)
++ return err;
++
++ platform_profile_handler.profile_get = platform_profile_get,
++ platform_profile_handler.profile_set = platform_profile_set,
++
++ set_bit(PLATFORM_PROFILE_COOL, platform_profile_handler.choices);
++ set_bit(PLATFORM_PROFILE_BALANCED, platform_profile_handler.choices);
++ set_bit(PLATFORM_PROFILE_PERFORMANCE, platform_profile_handler.choices);
++
++ err = platform_profile_register(&platform_profile_handler);
++ if (err)
++ return err;
++
++ platform_profile_support = true;
++
++ return 0;
++}
++
++static int __init hp_wmi_bios_setup(struct platform_device *device)
++{
++ /* clear detected rfkill devices */
++ wifi_rfkill = NULL;
++ bluetooth_rfkill = NULL;
++ wwan_rfkill = NULL;
++ rfkill2_count = 0;
++
++ /*
++ * In pre-2009 BIOS, command 1Bh return 0x4 to indicate that
++ * BIOS no longer controls the power for the wireless
++ * devices. All features supported by this command will no
++ * longer be supported.
++ */
++ if (!hp_wmi_bios_2009_later()) {
++ if (hp_wmi_rfkill_setup(device))
++ hp_wmi_rfkill2_setup(device);
++ }
++
++ thermal_profile_setup();
++
++ return 0;
++}
++
++static int __exit hp_wmi_bios_remove(struct platform_device *device)
++{
++ int i;
++
++ for (i = 0; i < rfkill2_count; i++) {
++ rfkill_unregister(rfkill2[i].rfkill);
++ rfkill_destroy(rfkill2[i].rfkill);
++ }
++
++ if (wifi_rfkill) {
++ rfkill_unregister(wifi_rfkill);
++ rfkill_destroy(wifi_rfkill);
++ }
++ if (bluetooth_rfkill) {
++ rfkill_unregister(bluetooth_rfkill);
++ rfkill_destroy(bluetooth_rfkill);
++ }
++ if (wwan_rfkill) {
++ rfkill_unregister(wwan_rfkill);
++ rfkill_destroy(wwan_rfkill);
++ }
++
++ if (platform_profile_support)
++ platform_profile_remove();
++
++ return 0;
++}
++
++static int hp_wmi_resume_handler(struct device *device)
++{
++ /*
++ * Hardware state may have changed while suspended, so trigger
++ * input events for the current state. As this is a switch,
++ * the input layer will only actually pass it on if the state
++ * changed.
++ */
++ if (hp_wmi_input_dev) {
++ if (test_bit(SW_DOCK, hp_wmi_input_dev->swbit))
++ input_report_switch(hp_wmi_input_dev, SW_DOCK,
++ hp_wmi_hw_state(HPWMI_DOCK_MASK));
++ if (test_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit))
++ input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
++ hp_wmi_hw_state(HPWMI_TABLET_MASK));
++ input_sync(hp_wmi_input_dev);
++ }
++
++ if (rfkill2_count)
++ hp_wmi_rfkill2_refresh();
++
++ if (wifi_rfkill)
++ rfkill_set_states(wifi_rfkill,
++ hp_wmi_get_sw_state(HPWMI_WIFI),
++ hp_wmi_get_hw_state(HPWMI_WIFI));
++ if (bluetooth_rfkill)
++ rfkill_set_states(bluetooth_rfkill,
++ hp_wmi_get_sw_state(HPWMI_BLUETOOTH),
++ hp_wmi_get_hw_state(HPWMI_BLUETOOTH));
++ if (wwan_rfkill)
++ rfkill_set_states(wwan_rfkill,
++ hp_wmi_get_sw_state(HPWMI_WWAN),
++ hp_wmi_get_hw_state(HPWMI_WWAN));
++
++ return 0;
++}
++
++static const struct dev_pm_ops hp_wmi_pm_ops = {
++ .resume = hp_wmi_resume_handler,
++ .restore = hp_wmi_resume_handler,
++};
++
++static struct platform_driver hp_wmi_driver = {
++ .driver = {
++ .name = "hp-wmi",
++ .pm = &hp_wmi_pm_ops,
++ .dev_groups = hp_wmi_groups,
++ },
++ .remove = __exit_p(hp_wmi_bios_remove),
++};
++
++static int __init hp_wmi_init(void)
++{
++ int event_capable = wmi_has_guid(HPWMI_EVENT_GUID);
++ int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID);
++ int err;
++
++ if (!bios_capable && !event_capable)
++ return -ENODEV;
++
++ if (event_capable) {
++ err = hp_wmi_input_setup();
++ if (err)
++ return err;
++ }
++
++ if (bios_capable) {
++ hp_wmi_platform_dev =
++ platform_device_register_simple("hp-wmi", -1, NULL, 0);
++ if (IS_ERR(hp_wmi_platform_dev)) {
++ err = PTR_ERR(hp_wmi_platform_dev);
++ goto err_destroy_input;
++ }
++
++ err = platform_driver_probe(&hp_wmi_driver, hp_wmi_bios_setup);
++ if (err)
++ goto err_unregister_device;
++ }
++
++ return 0;
++
++err_unregister_device:
++ platform_device_unregister(hp_wmi_platform_dev);
++err_destroy_input:
++ if (event_capable)
++ hp_wmi_input_destroy();
++
++ return err;
++}
++module_init(hp_wmi_init);
++
++static void __exit hp_wmi_exit(void)
++{
++ if (wmi_has_guid(HPWMI_EVENT_GUID))
++ hp_wmi_input_destroy();
++
++ if (hp_wmi_platform_dev) {
++ platform_device_unregister(hp_wmi_platform_dev);
++ platform_driver_unregister(&hp_wmi_driver);
++ }
++}
++module_exit(hp_wmi_exit);
+diff --git a/drivers/platform/x86/hp/hp_accel.c b/drivers/platform/x86/hp/hp_accel.c
+new file mode 100644
+index 0000000000000..62a1d93464750
+--- /dev/null
++++ b/drivers/platform/x86/hp/hp_accel.c
+@@ -0,0 +1,405 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * hp_accel.c - Interface between LIS3LV02DL driver and HP ACPI BIOS
++ *
++ * Copyright (C) 2007-2008 Yan Burman
++ * Copyright (C) 2008 Eric Piel
++ * Copyright (C) 2008-2009 Pavel Machek
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/dmi.h>
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/platform_device.h>
++#include <linux/interrupt.h>
++#include <linux/delay.h>
++#include <linux/wait.h>
++#include <linux/poll.h>
++#include <linux/freezer.h>
++#include <linux/uaccess.h>
++#include <linux/leds.h>
++#include <linux/atomic.h>
++#include <linux/acpi.h>
++#include <linux/i8042.h>
++#include <linux/serio.h>
++#include "../../../misc/lis3lv02d/lis3lv02d.h"
++
++/* Delayed LEDs infrastructure ------------------------------------ */
++
++/* Special LED class that can defer work */
++struct delayed_led_classdev {
++ struct led_classdev led_classdev;
++ struct work_struct work;
++ enum led_brightness new_brightness;
++
++ unsigned int led; /* For driver */
++ void (*set_brightness)(struct delayed_led_classdev *data, enum led_brightness value);
++};
++
++static inline void delayed_set_status_worker(struct work_struct *work)
++{
++ struct delayed_led_classdev *data =
++ container_of(work, struct delayed_led_classdev, work);
++
++ data->set_brightness(data, data->new_brightness);
++}
++
++static inline void delayed_sysfs_set(struct led_classdev *led_cdev,
++ enum led_brightness brightness)
++{
++ struct delayed_led_classdev *data = container_of(led_cdev,
++ struct delayed_led_classdev, led_classdev);
++ data->new_brightness = brightness;
++ schedule_work(&data->work);
++}
++
++/* HP-specific accelerometer driver ------------------------------------ */
++
++/* e0 25, e0 26, e0 27, e0 28 are scan codes that the accelerometer with acpi id
++ * HPQ6000 sends through the keyboard bus */
++#define ACCEL_1 0x25
++#define ACCEL_2 0x26
++#define ACCEL_3 0x27
++#define ACCEL_4 0x28
++
++/* For automatic insertion of the module */
++static const struct acpi_device_id lis3lv02d_device_ids[] = {
++ {"HPQ0004", 0}, /* HP Mobile Data Protection System PNP */
++ {"HPQ6000", 0}, /* HP Mobile Data Protection System PNP */
++ {"HPQ6007", 0}, /* HP Mobile Data Protection System PNP */
++ {"", 0},
++};
++MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids);
++
++/**
++ * lis3lv02d_acpi_init - initialize the device for ACPI
++ * @lis3: pointer to the device struct
++ *
++ * Returns 0 on success.
++ */
++static int lis3lv02d_acpi_init(struct lis3lv02d *lis3)
++{
++ return 0;
++}
++
++/**
++ * lis3lv02d_acpi_read - ACPI ALRD method: read a register
++ * @lis3: pointer to the device struct
++ * @reg: the register to read
++ * @ret: result of the operation
++ *
++ * Returns 0 on success.
++ */
++static int lis3lv02d_acpi_read(struct lis3lv02d *lis3, int reg, u8 *ret)
++{
++ struct acpi_device *dev = lis3->bus_priv;
++ union acpi_object arg0 = { ACPI_TYPE_INTEGER };
++ struct acpi_object_list args = { 1, &arg0 };
++ unsigned long long lret;
++ acpi_status status;
++
++ arg0.integer.value = reg;
++
++ status = acpi_evaluate_integer(dev->handle, "ALRD", &args, &lret);
++ if (ACPI_FAILURE(status))
++ return -EINVAL;
++ *ret = lret;
++ return 0;
++}
++
++/**
++ * lis3lv02d_acpi_write - ACPI ALWR method: write to a register
++ * @lis3: pointer to the device struct
++ * @reg: the register to write to
++ * @val: the value to write
++ *
++ * Returns 0 on success.
++ */
++static int lis3lv02d_acpi_write(struct lis3lv02d *lis3, int reg, u8 val)
++{
++ struct acpi_device *dev = lis3->bus_priv;
++ unsigned long long ret; /* Not used when writting */
++ union acpi_object in_obj[2];
++ struct acpi_object_list args = { 2, in_obj };
++
++ in_obj[0].type = ACPI_TYPE_INTEGER;
++ in_obj[0].integer.value = reg;
++ in_obj[1].type = ACPI_TYPE_INTEGER;
++ in_obj[1].integer.value = val;
++
++ if (acpi_evaluate_integer(dev->handle, "ALWR", &args, &ret) != AE_OK)
++ return -EINVAL;
++
++ return 0;
++}
++
++static int lis3lv02d_dmi_matched(const struct dmi_system_id *dmi)
++{
++ lis3_dev.ac = *((union axis_conversion *)dmi->driver_data);
++ pr_info("hardware type %s found\n", dmi->ident);
++
++ return 1;
++}
++
++/* Represents, for each axis seen by userspace, the corresponding hw axis (+1).
++ * If the value is negative, the opposite of the hw value is used. */
++#define DEFINE_CONV(name, x, y, z) \
++ static union axis_conversion lis3lv02d_axis_##name = \
++ { .as_array = { x, y, z } }
++DEFINE_CONV(normal, 1, 2, 3);
++DEFINE_CONV(y_inverted, 1, -2, 3);
++DEFINE_CONV(x_inverted, -1, 2, 3);
++DEFINE_CONV(x_inverted_usd, -1, 2, -3);
++DEFINE_CONV(z_inverted, 1, 2, -3);
++DEFINE_CONV(xy_swap, 2, 1, 3);
++DEFINE_CONV(xy_rotated_left, -2, 1, 3);
++DEFINE_CONV(xy_rotated_left_usd, -2, 1, -3);
++DEFINE_CONV(xy_swap_inverted, -2, -1, 3);
++DEFINE_CONV(xy_rotated_right, 2, -1, 3);
++DEFINE_CONV(xy_swap_yz_inverted, 2, -1, -3);
++
++#define AXIS_DMI_MATCH(_ident, _name, _axis) { \
++ .ident = _ident, \
++ .callback = lis3lv02d_dmi_matched, \
++ .matches = { \
++ DMI_MATCH(DMI_PRODUCT_NAME, _name) \
++ }, \
++ .driver_data = &lis3lv02d_axis_##_axis \
++}
++
++#define AXIS_DMI_MATCH2(_ident, _class1, _name1, \
++ _class2, _name2, \
++ _axis) { \
++ .ident = _ident, \
++ .callback = lis3lv02d_dmi_matched, \
++ .matches = { \
++ DMI_MATCH(DMI_##_class1, _name1), \
++ DMI_MATCH(DMI_##_class2, _name2), \
++ }, \
++ .driver_data = &lis3lv02d_axis_##_axis \
++}
++static const struct dmi_system_id lis3lv02d_dmi_ids[] = {
++ /* product names are truncated to match all kinds of a same model */
++ AXIS_DMI_MATCH("NC64x0", "HP Compaq nc64", x_inverted),
++ AXIS_DMI_MATCH("NC84x0", "HP Compaq nc84", z_inverted),
++ AXIS_DMI_MATCH("NX9420", "HP Compaq nx9420", x_inverted),
++ AXIS_DMI_MATCH("NW9440", "HP Compaq nw9440", x_inverted),
++ AXIS_DMI_MATCH("NC2510", "HP Compaq 2510", y_inverted),
++ AXIS_DMI_MATCH("NC2710", "HP Compaq 2710", xy_swap),
++ AXIS_DMI_MATCH("NC8510", "HP Compaq 8510", xy_swap_inverted),
++ AXIS_DMI_MATCH("HP2133", "HP 2133", xy_rotated_left),
++ AXIS_DMI_MATCH("HP2140", "HP 2140", xy_swap_inverted),
++ AXIS_DMI_MATCH("NC653x", "HP Compaq 653", xy_rotated_left_usd),
++ AXIS_DMI_MATCH("NC6730b", "HP Compaq 6730b", xy_rotated_left_usd),
++ AXIS_DMI_MATCH("NC6730s", "HP Compaq 6730s", xy_swap),
++ AXIS_DMI_MATCH("NC651xx", "HP Compaq 651", xy_rotated_right),
++ AXIS_DMI_MATCH("NC6710x", "HP Compaq 6710", xy_swap_yz_inverted),
++ AXIS_DMI_MATCH("NC6715x", "HP Compaq 6715", y_inverted),
++ AXIS_DMI_MATCH("NC693xx", "HP EliteBook 693", xy_rotated_right),
++ AXIS_DMI_MATCH("NC693xx", "HP EliteBook 853", xy_swap),
++ AXIS_DMI_MATCH("NC854xx", "HP EliteBook 854", y_inverted),
++ AXIS_DMI_MATCH("NC273xx", "HP EliteBook 273", y_inverted),
++ /* Intel-based HP Pavilion dv5 */
++ AXIS_DMI_MATCH2("HPDV5_I",
++ PRODUCT_NAME, "HP Pavilion dv5",
++ BOARD_NAME, "3603",
++ x_inverted),
++ /* AMD-based HP Pavilion dv5 */
++ AXIS_DMI_MATCH2("HPDV5_A",
++ PRODUCT_NAME, "HP Pavilion dv5",
++ BOARD_NAME, "3600",
++ y_inverted),
++ AXIS_DMI_MATCH("DV7", "HP Pavilion dv7", x_inverted),
++ AXIS_DMI_MATCH("HP8710", "HP Compaq 8710", y_inverted),
++ AXIS_DMI_MATCH("HDX18", "HP HDX 18", x_inverted),
++ AXIS_DMI_MATCH("HPB432x", "HP ProBook 432", xy_rotated_left),
++ AXIS_DMI_MATCH("HPB440G3", "HP ProBook 440 G3", x_inverted_usd),
++ AXIS_DMI_MATCH("HPB440G4", "HP ProBook 440 G4", x_inverted),
++ AXIS_DMI_MATCH("HPB442x", "HP ProBook 442", xy_rotated_left),
++ AXIS_DMI_MATCH("HPB450G0", "HP ProBook 450 G0", x_inverted),
++ AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted),
++ AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap),
++ AXIS_DMI_MATCH("HPB532x", "HP ProBook 532", y_inverted),
++ AXIS_DMI_MATCH("HPB655x", "HP ProBook 655", xy_swap_inverted),
++ AXIS_DMI_MATCH("Mini510x", "HP Mini 510", xy_rotated_left_usd),
++ AXIS_DMI_MATCH("HPB63xx", "HP ProBook 63", xy_swap),
++ AXIS_DMI_MATCH("HPB64xx", "HP ProBook 64", xy_swap),
++ AXIS_DMI_MATCH("HPB64xx", "HP EliteBook 84", xy_swap),
++ AXIS_DMI_MATCH("HPB65xx", "HP ProBook 65", x_inverted),
++ AXIS_DMI_MATCH("HPZBook15", "HP ZBook 15", x_inverted),
++ AXIS_DMI_MATCH("HPZBook17G5", "HP ZBook 17 G5", x_inverted),
++ AXIS_DMI_MATCH("HPZBook17", "HP ZBook 17", xy_swap_yz_inverted),
++ { NULL, }
++/* Laptop models without axis info (yet):
++ * "NC6910" "HP Compaq 6910"
++ * "NC2400" "HP Compaq nc2400"
++ * "NX74x0" "HP Compaq nx74"
++ * "NX6325" "HP Compaq nx6325"
++ * "NC4400" "HP Compaq nc4400"
++ */
++};
++
++static void hpled_set(struct delayed_led_classdev *led_cdev, enum led_brightness value)
++{
++ struct acpi_device *dev = lis3_dev.bus_priv;
++ unsigned long long ret; /* Not used when writing */
++ union acpi_object in_obj[1];
++ struct acpi_object_list args = { 1, in_obj };
++
++ in_obj[0].type = ACPI_TYPE_INTEGER;
++ in_obj[0].integer.value = !!value;
++
++ acpi_evaluate_integer(dev->handle, "ALED", &args, &ret);
++}
++
++static struct delayed_led_classdev hpled_led = {
++ .led_classdev = {
++ .name = "hp::hddprotect",
++ .default_trigger = "none",
++ .brightness_set = delayed_sysfs_set,
++ .flags = LED_CORE_SUSPENDRESUME,
++ },
++ .set_brightness = hpled_set,
++};
++
++static bool hp_accel_i8042_filter(unsigned char data, unsigned char str,
++ struct serio *port)
++{
++ static bool extended;
++
++ if (str & I8042_STR_AUXDATA)
++ return false;
++
++ if (data == 0xe0) {
++ extended = true;
++ return true;
++ } else if (unlikely(extended)) {
++ extended = false;
++
++ switch (data) {
++ case ACCEL_1:
++ case ACCEL_2:
++ case ACCEL_3:
++ case ACCEL_4:
++ return true;
++ default:
++ serio_interrupt(port, 0xe0, 0);
++ return false;
++ }
++ }
++
++ return false;
++}
++
++static int lis3lv02d_probe(struct platform_device *device)
++{
++ int ret;
++
++ lis3_dev.bus_priv = ACPI_COMPANION(&device->dev);
++ lis3_dev.init = lis3lv02d_acpi_init;
++ lis3_dev.read = lis3lv02d_acpi_read;
++ lis3_dev.write = lis3lv02d_acpi_write;
++
++ /* obtain IRQ number of our device from ACPI */
++ ret = platform_get_irq_optional(device, 0);
++ if (ret > 0)
++ lis3_dev.irq = ret;
++
++ /* If possible use a "standard" axes order */
++ if (lis3_dev.ac.x && lis3_dev.ac.y && lis3_dev.ac.z) {
++ pr_info("Using custom axes %d,%d,%d\n",
++ lis3_dev.ac.x, lis3_dev.ac.y, lis3_dev.ac.z);
++ } else if (dmi_check_system(lis3lv02d_dmi_ids) == 0) {
++ pr_info("laptop model unknown, using default axes configuration\n");
++ lis3_dev.ac = lis3lv02d_axis_normal;
++ }
++
++ /* call the core layer do its init */
++ ret = lis3lv02d_init_device(&lis3_dev);
++ if (ret)
++ return ret;
++
++ /* filter to remove HPQ6000 accelerometer data
++ * from keyboard bus stream */
++ if (strstr(dev_name(&device->dev), "HPQ6000"))
++ i8042_install_filter(hp_accel_i8042_filter);
++
++ INIT_WORK(&hpled_led.work, delayed_set_status_worker);
++ ret = led_classdev_register(NULL, &hpled_led.led_classdev);
++ if (ret) {
++ i8042_remove_filter(hp_accel_i8042_filter);
++ lis3lv02d_joystick_disable(&lis3_dev);
++ lis3lv02d_poweroff(&lis3_dev);
++ flush_work(&hpled_led.work);
++ lis3lv02d_remove_fs(&lis3_dev);
++ return ret;
++ }
++
++ return ret;
++}
++
++static int lis3lv02d_remove(struct platform_device *device)
++{
++ i8042_remove_filter(hp_accel_i8042_filter);
++ lis3lv02d_joystick_disable(&lis3_dev);
++ lis3lv02d_poweroff(&lis3_dev);
++
++ led_classdev_unregister(&hpled_led.led_classdev);
++ flush_work(&hpled_led.work);
++
++ return lis3lv02d_remove_fs(&lis3_dev);
++}
++
++#ifdef CONFIG_PM_SLEEP
++static int lis3lv02d_suspend(struct device *dev)
++{
++ /* make sure the device is off when we suspend */
++ lis3lv02d_poweroff(&lis3_dev);
++ return 0;
++}
++
++static int lis3lv02d_resume(struct device *dev)
++{
++ lis3lv02d_poweron(&lis3_dev);
++ return 0;
++}
++
++static int lis3lv02d_restore(struct device *dev)
++{
++ lis3lv02d_poweron(&lis3_dev);
++ return 0;
++}
++
++static const struct dev_pm_ops hp_accel_pm = {
++ .suspend = lis3lv02d_suspend,
++ .resume = lis3lv02d_resume,
++ .freeze = lis3lv02d_suspend,
++ .thaw = lis3lv02d_resume,
++ .poweroff = lis3lv02d_suspend,
++ .restore = lis3lv02d_restore,
++};
++
++#define HP_ACCEL_PM (&hp_accel_pm)
++#else
++#define HP_ACCEL_PM NULL
++#endif
++
++/* For the HP MDPS aka 3D Driveguard */
++static struct platform_driver lis3lv02d_driver = {
++ .probe = lis3lv02d_probe,
++ .remove = lis3lv02d_remove,
++ .driver = {
++ .name = "hp_accel",
++ .pm = HP_ACCEL_PM,
++ .acpi_match_table = lis3lv02d_device_ids,
++ },
++};
++module_platform_driver(lis3lv02d_driver);
++
++MODULE_DESCRIPTION("Glue between LIS3LV02Dx and HP ACPI BIOS and support for disk protection LED.");
++MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/platform/x86/hp/tc1100-wmi.c b/drivers/platform/x86/hp/tc1100-wmi.c
+new file mode 100644
+index 0000000000000..9072eb3026185
+--- /dev/null
++++ b/drivers/platform/x86/hp/tc1100-wmi.c
+@@ -0,0 +1,265 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * HP Compaq TC1100 Tablet WMI Extras Driver
++ *
++ * Copyright (C) 2007 Carlos Corbacho <carlos@strangeworlds.co.uk>
++ * Copyright (C) 2004 Jamey Hicks <jamey.hicks@hp.com>
++ * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
++ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/acpi.h>
++#include <linux/platform_device.h>
++
++#define GUID "C364AC71-36DB-495A-8494-B439D472A505"
++
++#define TC1100_INSTANCE_WIRELESS 1
++#define TC1100_INSTANCE_JOGDIAL 2
++
++MODULE_AUTHOR("Jamey Hicks, Carlos Corbacho");
++MODULE_DESCRIPTION("HP Compaq TC1100 Tablet WMI Extras");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS("wmi:C364AC71-36DB-495A-8494-B439D472A505");
++
++static struct platform_device *tc1100_device;
++
++struct tc1100_data {
++ u32 wireless;
++ u32 jogdial;
++};
++
++#ifdef CONFIG_PM
++static struct tc1100_data suspend_data;
++#endif
++
++/* --------------------------------------------------------------------------
++ Device Management
++ -------------------------------------------------------------------------- */
++
++static int get_state(u32 *out, u8 instance)
++{
++ u32 tmp;
++ acpi_status status;
++ struct acpi_buffer result = { ACPI_ALLOCATE_BUFFER, NULL };
++ union acpi_object *obj;
++
++ if (!out)
++ return -EINVAL;
++
++ if (instance > 2)
++ return -ENODEV;
++
++ status = wmi_query_block(GUID, instance, &result);
++ if (ACPI_FAILURE(status))
++ return -ENODEV;
++
++ obj = (union acpi_object *) result.pointer;
++ if (obj && obj->type == ACPI_TYPE_INTEGER) {
++ tmp = obj->integer.value;
++ } else {
++ tmp = 0;
++ }
++
++ if (result.length > 0)
++ kfree(result.pointer);
++
++ switch (instance) {
++ case TC1100_INSTANCE_WIRELESS:
++ *out = (tmp == 3) ? 1 : 0;
++ return 0;
++ case TC1100_INSTANCE_JOGDIAL:
++ *out = (tmp == 1) ? 0 : 1;
++ return 0;
++ default:
++ return -ENODEV;
++ }
++}
++
++static int set_state(u32 *in, u8 instance)
++{
++ u32 value;
++ acpi_status status;
++ struct acpi_buffer input;
++
++ if (!in)
++ return -EINVAL;
++
++ if (instance > 2)
++ return -ENODEV;
++
++ switch (instance) {
++ case TC1100_INSTANCE_WIRELESS:
++ value = (*in) ? 1 : 2;
++ break;
++ case TC1100_INSTANCE_JOGDIAL:
++ value = (*in) ? 0 : 1;
++ break;
++ default:
++ return -ENODEV;
++ }
++
++ input.length = sizeof(u32);
++ input.pointer = &value;
++
++ status = wmi_set_block(GUID, instance, &input);
++ if (ACPI_FAILURE(status))
++ return -ENODEV;
++
++ return 0;
++}
++
++/* --------------------------------------------------------------------------
++ FS Interface (/sys)
++ -------------------------------------------------------------------------- */
++
++/*
++ * Read/ write bool sysfs macro
++ */
++#define show_set_bool(value, instance) \
++static ssize_t \
++show_bool_##value(struct device *dev, struct device_attribute *attr, \
++ char *buf) \
++{ \
++ u32 result; \
++ acpi_status status = get_state(&result, instance); \
++ if (ACPI_SUCCESS(status)) \
++ return sprintf(buf, "%d\n", result); \
++ return sprintf(buf, "Read error\n"); \
++} \
++\
++static ssize_t \
++set_bool_##value(struct device *dev, struct device_attribute *attr, \
++ const char *buf, size_t count) \
++{ \
++ u32 tmp = simple_strtoul(buf, NULL, 10); \
++ acpi_status status = set_state(&tmp, instance); \
++ if (ACPI_FAILURE(status)) \
++ return -EINVAL; \
++ return count; \
++} \
++static DEVICE_ATTR(value, S_IRUGO | S_IWUSR, \
++ show_bool_##value, set_bool_##value);
++
++show_set_bool(wireless, TC1100_INSTANCE_WIRELESS);
++show_set_bool(jogdial, TC1100_INSTANCE_JOGDIAL);
++
++static struct attribute *tc1100_attributes[] = {
++ &dev_attr_wireless.attr,
++ &dev_attr_jogdial.attr,
++ NULL
++};
++
++static const struct attribute_group tc1100_attribute_group = {
++ .attrs = tc1100_attributes,
++};
++
++/* --------------------------------------------------------------------------
++ Driver Model
++ -------------------------------------------------------------------------- */
++
++static int __init tc1100_probe(struct platform_device *device)
++{
++ return sysfs_create_group(&device->dev.kobj, &tc1100_attribute_group);
++}
++
++
++static int tc1100_remove(struct platform_device *device)
++{
++ sysfs_remove_group(&device->dev.kobj, &tc1100_attribute_group);
++
++ return 0;
++}
++
++#ifdef CONFIG_PM
++static int tc1100_suspend(struct device *dev)
++{
++ int ret;
++
++ ret = get_state(&suspend_data.wireless, TC1100_INSTANCE_WIRELESS);
++ if (ret)
++ return ret;
++
++ ret = get_state(&suspend_data.jogdial, TC1100_INSTANCE_JOGDIAL);
++ if (ret)
++ return ret;
++
++ return 0;
++}
++
++static int tc1100_resume(struct device *dev)
++{
++ int ret;
++
++ ret = set_state(&suspend_data.wireless, TC1100_INSTANCE_WIRELESS);
++ if (ret)
++ return ret;
++
++ ret = set_state(&suspend_data.jogdial, TC1100_INSTANCE_JOGDIAL);
++ if (ret)
++ return ret;
++
++ return 0;
++}
++
++static const struct dev_pm_ops tc1100_pm_ops = {
++ .suspend = tc1100_suspend,
++ .resume = tc1100_resume,
++ .freeze = tc1100_suspend,
++ .restore = tc1100_resume,
++};
++#endif
++
++static struct platform_driver tc1100_driver = {
++ .driver = {
++ .name = "tc1100-wmi",
++#ifdef CONFIG_PM
++ .pm = &tc1100_pm_ops,
++#endif
++ },
++ .remove = tc1100_remove,
++};
++
++static int __init tc1100_init(void)
++{
++ int error;
++
++ if (!wmi_has_guid(GUID))
++ return -ENODEV;
++
++ tc1100_device = platform_device_alloc("tc1100-wmi", -1);
++ if (!tc1100_device)
++ return -ENOMEM;
++
++ error = platform_device_add(tc1100_device);
++ if (error)
++ goto err_device_put;
++
++ error = platform_driver_probe(&tc1100_driver, tc1100_probe);
++ if (error)
++ goto err_device_del;
++
++ pr_info("HP Compaq TC1100 Tablet WMI Extras loaded\n");
++ return 0;
++
++ err_device_del:
++ platform_device_del(tc1100_device);
++ err_device_put:
++ platform_device_put(tc1100_device);
++ return error;
++}
++
++static void __exit tc1100_exit(void)
++{
++ platform_device_unregister(tc1100_device);
++ platform_driver_unregister(&tc1100_driver);
++}
++
++module_init(tc1100_init);
++module_exit(tc1100_exit);
+diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
+deleted file mode 100644
+index cc53f725c0419..0000000000000
+--- a/drivers/platform/x86/hp_accel.c
++++ /dev/null
+@@ -1,403 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * hp_accel.c - Interface between LIS3LV02DL driver and HP ACPI BIOS
+- *
+- * Copyright (C) 2007-2008 Yan Burman
+- * Copyright (C) 2008 Eric Piel
+- * Copyright (C) 2008-2009 Pavel Machek
+- */
+-
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/dmi.h>
+-#include <linux/module.h>
+-#include <linux/types.h>
+-#include <linux/platform_device.h>
+-#include <linux/interrupt.h>
+-#include <linux/delay.h>
+-#include <linux/wait.h>
+-#include <linux/poll.h>
+-#include <linux/freezer.h>
+-#include <linux/uaccess.h>
+-#include <linux/leds.h>
+-#include <linux/atomic.h>
+-#include <linux/acpi.h>
+-#include <linux/i8042.h>
+-#include <linux/serio.h>
+-#include "../../misc/lis3lv02d/lis3lv02d.h"
+-
+-/* Delayed LEDs infrastructure ------------------------------------ */
+-
+-/* Special LED class that can defer work */
+-struct delayed_led_classdev {
+- struct led_classdev led_classdev;
+- struct work_struct work;
+- enum led_brightness new_brightness;
+-
+- unsigned int led; /* For driver */
+- void (*set_brightness)(struct delayed_led_classdev *data, enum led_brightness value);
+-};
+-
+-static inline void delayed_set_status_worker(struct work_struct *work)
+-{
+- struct delayed_led_classdev *data =
+- container_of(work, struct delayed_led_classdev, work);
+-
+- data->set_brightness(data, data->new_brightness);
+-}
+-
+-static inline void delayed_sysfs_set(struct led_classdev *led_cdev,
+- enum led_brightness brightness)
+-{
+- struct delayed_led_classdev *data = container_of(led_cdev,
+- struct delayed_led_classdev, led_classdev);
+- data->new_brightness = brightness;
+- schedule_work(&data->work);
+-}
+-
+-/* HP-specific accelerometer driver ------------------------------------ */
+-
+-/* e0 25, e0 26, e0 27, e0 28 are scan codes that the accelerometer with acpi id
+- * HPQ6000 sends through the keyboard bus */
+-#define ACCEL_1 0x25
+-#define ACCEL_2 0x26
+-#define ACCEL_3 0x27
+-#define ACCEL_4 0x28
+-
+-/* For automatic insertion of the module */
+-static const struct acpi_device_id lis3lv02d_device_ids[] = {
+- {"HPQ0004", 0}, /* HP Mobile Data Protection System PNP */
+- {"HPQ6000", 0}, /* HP Mobile Data Protection System PNP */
+- {"HPQ6007", 0}, /* HP Mobile Data Protection System PNP */
+- {"", 0},
+-};
+-MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids);
+-
+-/**
+- * lis3lv02d_acpi_init - initialize the device for ACPI
+- * @lis3: pointer to the device struct
+- *
+- * Returns 0 on success.
+- */
+-static int lis3lv02d_acpi_init(struct lis3lv02d *lis3)
+-{
+- return 0;
+-}
+-
+-/**
+- * lis3lv02d_acpi_read - ACPI ALRD method: read a register
+- * @lis3: pointer to the device struct
+- * @reg: the register to read
+- * @ret: result of the operation
+- *
+- * Returns 0 on success.
+- */
+-static int lis3lv02d_acpi_read(struct lis3lv02d *lis3, int reg, u8 *ret)
+-{
+- struct acpi_device *dev = lis3->bus_priv;
+- union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+- struct acpi_object_list args = { 1, &arg0 };
+- unsigned long long lret;
+- acpi_status status;
+-
+- arg0.integer.value = reg;
+-
+- status = acpi_evaluate_integer(dev->handle, "ALRD", &args, &lret);
+- if (ACPI_FAILURE(status))
+- return -EINVAL;
+- *ret = lret;
+- return 0;
+-}
+-
+-/**
+- * lis3lv02d_acpi_write - ACPI ALWR method: write to a register
+- * @lis3: pointer to the device struct
+- * @reg: the register to write to
+- * @val: the value to write
+- *
+- * Returns 0 on success.
+- */
+-static int lis3lv02d_acpi_write(struct lis3lv02d *lis3, int reg, u8 val)
+-{
+- struct acpi_device *dev = lis3->bus_priv;
+- unsigned long long ret; /* Not used when writting */
+- union acpi_object in_obj[2];
+- struct acpi_object_list args = { 2, in_obj };
+-
+- in_obj[0].type = ACPI_TYPE_INTEGER;
+- in_obj[0].integer.value = reg;
+- in_obj[1].type = ACPI_TYPE_INTEGER;
+- in_obj[1].integer.value = val;
+-
+- if (acpi_evaluate_integer(dev->handle, "ALWR", &args, &ret) != AE_OK)
+- return -EINVAL;
+-
+- return 0;
+-}
+-
+-static int lis3lv02d_dmi_matched(const struct dmi_system_id *dmi)
+-{
+- lis3_dev.ac = *((union axis_conversion *)dmi->driver_data);
+- pr_info("hardware type %s found\n", dmi->ident);
+-
+- return 1;
+-}
+-
+-/* Represents, for each axis seen by userspace, the corresponding hw axis (+1).
+- * If the value is negative, the opposite of the hw value is used. */
+-#define DEFINE_CONV(name, x, y, z) \
+- static union axis_conversion lis3lv02d_axis_##name = \
+- { .as_array = { x, y, z } }
+-DEFINE_CONV(normal, 1, 2, 3);
+-DEFINE_CONV(y_inverted, 1, -2, 3);
+-DEFINE_CONV(x_inverted, -1, 2, 3);
+-DEFINE_CONV(x_inverted_usd, -1, 2, -3);
+-DEFINE_CONV(z_inverted, 1, 2, -3);
+-DEFINE_CONV(xy_swap, 2, 1, 3);
+-DEFINE_CONV(xy_rotated_left, -2, 1, 3);
+-DEFINE_CONV(xy_rotated_left_usd, -2, 1, -3);
+-DEFINE_CONV(xy_swap_inverted, -2, -1, 3);
+-DEFINE_CONV(xy_rotated_right, 2, -1, 3);
+-DEFINE_CONV(xy_swap_yz_inverted, 2, -1, -3);
+-
+-#define AXIS_DMI_MATCH(_ident, _name, _axis) { \
+- .ident = _ident, \
+- .callback = lis3lv02d_dmi_matched, \
+- .matches = { \
+- DMI_MATCH(DMI_PRODUCT_NAME, _name) \
+- }, \
+- .driver_data = &lis3lv02d_axis_##_axis \
+-}
+-
+-#define AXIS_DMI_MATCH2(_ident, _class1, _name1, \
+- _class2, _name2, \
+- _axis) { \
+- .ident = _ident, \
+- .callback = lis3lv02d_dmi_matched, \
+- .matches = { \
+- DMI_MATCH(DMI_##_class1, _name1), \
+- DMI_MATCH(DMI_##_class2, _name2), \
+- }, \
+- .driver_data = &lis3lv02d_axis_##_axis \
+-}
+-static const struct dmi_system_id lis3lv02d_dmi_ids[] = {
+- /* product names are truncated to match all kinds of a same model */
+- AXIS_DMI_MATCH("NC64x0", "HP Compaq nc64", x_inverted),
+- AXIS_DMI_MATCH("NC84x0", "HP Compaq nc84", z_inverted),
+- AXIS_DMI_MATCH("NX9420", "HP Compaq nx9420", x_inverted),
+- AXIS_DMI_MATCH("NW9440", "HP Compaq nw9440", x_inverted),
+- AXIS_DMI_MATCH("NC2510", "HP Compaq 2510", y_inverted),
+- AXIS_DMI_MATCH("NC2710", "HP Compaq 2710", xy_swap),
+- AXIS_DMI_MATCH("NC8510", "HP Compaq 8510", xy_swap_inverted),
+- AXIS_DMI_MATCH("HP2133", "HP 2133", xy_rotated_left),
+- AXIS_DMI_MATCH("HP2140", "HP 2140", xy_swap_inverted),
+- AXIS_DMI_MATCH("NC653x", "HP Compaq 653", xy_rotated_left_usd),
+- AXIS_DMI_MATCH("NC6730b", "HP Compaq 6730b", xy_rotated_left_usd),
+- AXIS_DMI_MATCH("NC6730s", "HP Compaq 6730s", xy_swap),
+- AXIS_DMI_MATCH("NC651xx", "HP Compaq 651", xy_rotated_right),
+- AXIS_DMI_MATCH("NC6710x", "HP Compaq 6710", xy_swap_yz_inverted),
+- AXIS_DMI_MATCH("NC6715x", "HP Compaq 6715", y_inverted),
+- AXIS_DMI_MATCH("NC693xx", "HP EliteBook 693", xy_rotated_right),
+- AXIS_DMI_MATCH("NC693xx", "HP EliteBook 853", xy_swap),
+- AXIS_DMI_MATCH("NC854xx", "HP EliteBook 854", y_inverted),
+- AXIS_DMI_MATCH("NC273xx", "HP EliteBook 273", y_inverted),
+- /* Intel-based HP Pavilion dv5 */
+- AXIS_DMI_MATCH2("HPDV5_I",
+- PRODUCT_NAME, "HP Pavilion dv5",
+- BOARD_NAME, "3603",
+- x_inverted),
+- /* AMD-based HP Pavilion dv5 */
+- AXIS_DMI_MATCH2("HPDV5_A",
+- PRODUCT_NAME, "HP Pavilion dv5",
+- BOARD_NAME, "3600",
+- y_inverted),
+- AXIS_DMI_MATCH("DV7", "HP Pavilion dv7", x_inverted),
+- AXIS_DMI_MATCH("HP8710", "HP Compaq 8710", y_inverted),
+- AXIS_DMI_MATCH("HDX18", "HP HDX 18", x_inverted),
+- AXIS_DMI_MATCH("HPB432x", "HP ProBook 432", xy_rotated_left),
+- AXIS_DMI_MATCH("HPB440G3", "HP ProBook 440 G3", x_inverted_usd),
+- AXIS_DMI_MATCH("HPB440G4", "HP ProBook 440 G4", x_inverted),
+- AXIS_DMI_MATCH("HPB442x", "HP ProBook 442", xy_rotated_left),
+- AXIS_DMI_MATCH("HPB450G0", "HP ProBook 450 G0", x_inverted),
+- AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted),
+- AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap),
+- AXIS_DMI_MATCH("HPB532x", "HP ProBook 532", y_inverted),
+- AXIS_DMI_MATCH("HPB655x", "HP ProBook 655", xy_swap_inverted),
+- AXIS_DMI_MATCH("Mini510x", "HP Mini 510", xy_rotated_left_usd),
+- AXIS_DMI_MATCH("HPB63xx", "HP ProBook 63", xy_swap),
+- AXIS_DMI_MATCH("HPB64xx", "HP ProBook 64", xy_swap),
+- AXIS_DMI_MATCH("HPB64xx", "HP EliteBook 84", xy_swap),
+- AXIS_DMI_MATCH("HPB65xx", "HP ProBook 65", x_inverted),
+- AXIS_DMI_MATCH("HPZBook15", "HP ZBook 15", x_inverted),
+- AXIS_DMI_MATCH("HPZBook17G5", "HP ZBook 17 G5", x_inverted),
+- AXIS_DMI_MATCH("HPZBook17", "HP ZBook 17", xy_swap_yz_inverted),
+- { NULL, }
+-/* Laptop models without axis info (yet):
+- * "NC6910" "HP Compaq 6910"
+- * "NC2400" "HP Compaq nc2400"
+- * "NX74x0" "HP Compaq nx74"
+- * "NX6325" "HP Compaq nx6325"
+- * "NC4400" "HP Compaq nc4400"
+- */
+-};
+-
+-static void hpled_set(struct delayed_led_classdev *led_cdev, enum led_brightness value)
+-{
+- struct acpi_device *dev = lis3_dev.bus_priv;
+- unsigned long long ret; /* Not used when writing */
+- union acpi_object in_obj[1];
+- struct acpi_object_list args = { 1, in_obj };
+-
+- in_obj[0].type = ACPI_TYPE_INTEGER;
+- in_obj[0].integer.value = !!value;
+-
+- acpi_evaluate_integer(dev->handle, "ALED", &args, &ret);
+-}
+-
+-static struct delayed_led_classdev hpled_led = {
+- .led_classdev = {
+- .name = "hp::hddprotect",
+- .default_trigger = "none",
+- .brightness_set = delayed_sysfs_set,
+- .flags = LED_CORE_SUSPENDRESUME,
+- },
+- .set_brightness = hpled_set,
+-};
+-
+-static bool hp_accel_i8042_filter(unsigned char data, unsigned char str,
+- struct serio *port)
+-{
+- static bool extended;
+-
+- if (str & I8042_STR_AUXDATA)
+- return false;
+-
+- if (data == 0xe0) {
+- extended = true;
+- return true;
+- } else if (unlikely(extended)) {
+- extended = false;
+-
+- switch (data) {
+- case ACCEL_1:
+- case ACCEL_2:
+- case ACCEL_3:
+- case ACCEL_4:
+- return true;
+- default:
+- serio_interrupt(port, 0xe0, 0);
+- return false;
+- }
+- }
+-
+- return false;
+-}
+-
+-static int lis3lv02d_probe(struct platform_device *device)
+-{
+- int ret;
+-
+- lis3_dev.bus_priv = ACPI_COMPANION(&device->dev);
+- lis3_dev.init = lis3lv02d_acpi_init;
+- lis3_dev.read = lis3lv02d_acpi_read;
+- lis3_dev.write = lis3lv02d_acpi_write;
+-
+- /* obtain IRQ number of our device from ACPI */
+- ret = platform_get_irq_optional(device, 0);
+- if (ret > 0)
+- lis3_dev.irq = ret;
+-
+- /* If possible use a "standard" axes order */
+- if (lis3_dev.ac.x && lis3_dev.ac.y && lis3_dev.ac.z) {
+- pr_info("Using custom axes %d,%d,%d\n",
+- lis3_dev.ac.x, lis3_dev.ac.y, lis3_dev.ac.z);
+- } else if (dmi_check_system(lis3lv02d_dmi_ids) == 0) {
+- pr_info("laptop model unknown, using default axes configuration\n");
+- lis3_dev.ac = lis3lv02d_axis_normal;
+- }
+-
+- /* call the core layer do its init */
+- ret = lis3lv02d_init_device(&lis3_dev);
+- if (ret)
+- return ret;
+-
+- /* filter to remove HPQ6000 accelerometer data
+- * from keyboard bus stream */
+- if (strstr(dev_name(&device->dev), "HPQ6000"))
+- i8042_install_filter(hp_accel_i8042_filter);
+-
+- INIT_WORK(&hpled_led.work, delayed_set_status_worker);
+- ret = led_classdev_register(NULL, &hpled_led.led_classdev);
+- if (ret) {
+- lis3lv02d_joystick_disable(&lis3_dev);
+- lis3lv02d_poweroff(&lis3_dev);
+- flush_work(&hpled_led.work);
+- return ret;
+- }
+-
+- return ret;
+-}
+-
+-static int lis3lv02d_remove(struct platform_device *device)
+-{
+- i8042_remove_filter(hp_accel_i8042_filter);
+- lis3lv02d_joystick_disable(&lis3_dev);
+- lis3lv02d_poweroff(&lis3_dev);
+-
+- led_classdev_unregister(&hpled_led.led_classdev);
+- flush_work(&hpled_led.work);
+-
+- return lis3lv02d_remove_fs(&lis3_dev);
+-}
+-
+-#ifdef CONFIG_PM_SLEEP
+-static int lis3lv02d_suspend(struct device *dev)
+-{
+- /* make sure the device is off when we suspend */
+- lis3lv02d_poweroff(&lis3_dev);
+- return 0;
+-}
+-
+-static int lis3lv02d_resume(struct device *dev)
+-{
+- lis3lv02d_poweron(&lis3_dev);
+- return 0;
+-}
+-
+-static int lis3lv02d_restore(struct device *dev)
+-{
+- lis3lv02d_poweron(&lis3_dev);
+- return 0;
+-}
+-
+-static const struct dev_pm_ops hp_accel_pm = {
+- .suspend = lis3lv02d_suspend,
+- .resume = lis3lv02d_resume,
+- .freeze = lis3lv02d_suspend,
+- .thaw = lis3lv02d_resume,
+- .poweroff = lis3lv02d_suspend,
+- .restore = lis3lv02d_restore,
+-};
+-
+-#define HP_ACCEL_PM (&hp_accel_pm)
+-#else
+-#define HP_ACCEL_PM NULL
+-#endif
+-
+-/* For the HP MDPS aka 3D Driveguard */
+-static struct platform_driver lis3lv02d_driver = {
+- .probe = lis3lv02d_probe,
+- .remove = lis3lv02d_remove,
+- .driver = {
+- .name = "hp_accel",
+- .pm = HP_ACCEL_PM,
+- .acpi_match_table = lis3lv02d_device_ids,
+- },
+-};
+-module_platform_driver(lis3lv02d_driver);
+-
+-MODULE_DESCRIPTION("Glue between LIS3LV02Dx and HP ACPI BIOS and support for disk protection LED.");
+-MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
+-MODULE_LICENSE("GPL");
+diff --git a/drivers/platform/x86/huawei-wmi.c b/drivers/platform/x86/huawei-wmi.c
+index a2d846c4a7eef..23ebd0c046e16 100644
+--- a/drivers/platform/x86/huawei-wmi.c
++++ b/drivers/platform/x86/huawei-wmi.c
+@@ -86,6 +86,8 @@ static const struct key_entry huawei_wmi_keymap[] = {
+ { KE_IGNORE, 0x293, { KEY_KBDILLUMTOGGLE } },
+ { KE_IGNORE, 0x294, { KEY_KBDILLUMUP } },
+ { KE_IGNORE, 0x295, { KEY_KBDILLUMUP } },
++ // Ignore Ambient Light Sensoring
++ { KE_KEY, 0x2c1, { KEY_RESERVED } },
+ { KE_END, 0 }
+ };
+
+@@ -470,10 +472,17 @@ static DEVICE_ATTR_RW(charge_control_thresholds);
+
+ static int huawei_wmi_battery_add(struct power_supply *battery)
+ {
+- device_create_file(&battery->dev, &dev_attr_charge_control_start_threshold);
+- device_create_file(&battery->dev, &dev_attr_charge_control_end_threshold);
++ int err = 0;
+
+- return 0;
++ err = device_create_file(&battery->dev, &dev_attr_charge_control_start_threshold);
++ if (err)
++ return err;
++
++ err = device_create_file(&battery->dev, &dev_attr_charge_control_end_threshold);
++ if (err)
++ device_remove_file(&battery->dev, &dev_attr_charge_control_start_threshold);
++
++ return err;
+ }
+
+ static int huawei_wmi_battery_remove(struct power_supply *battery)
+@@ -753,6 +762,9 @@ static int huawei_wmi_input_setup(struct device *dev,
+ const char *guid,
+ struct input_dev **idev)
+ {
++ acpi_status status;
++ int err;
++
+ *idev = devm_input_allocate_device(dev);
+ if (!*idev)
+ return -ENOMEM;
+@@ -762,10 +774,19 @@ static int huawei_wmi_input_setup(struct device *dev,
+ (*idev)->id.bustype = BUS_HOST;
+ (*idev)->dev.parent = dev;
+
+- return sparse_keymap_setup(*idev, huawei_wmi_keymap, NULL) ||
+- input_register_device(*idev) ||
+- wmi_install_notify_handler(guid, huawei_wmi_input_notify,
+- *idev);
++ err = sparse_keymap_setup(*idev, huawei_wmi_keymap, NULL);
++ if (err)
++ return err;
++
++ err = input_register_device(*idev);
++ if (err)
++ return err;
++
++ status = wmi_install_notify_handler(guid, huawei_wmi_input_notify, *idev);
++ if (ACPI_FAILURE(status))
++ return -EIO;
++
++ return 0;
+ }
+
+ static void huawei_wmi_input_exit(struct device *dev, const char *guid)
+diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
+index e7a1299e37766..e75b09a144a32 100644
+--- a/drivers/platform/x86/ideapad-laptop.c
++++ b/drivers/platform/x86/ideapad-laptop.c
+@@ -136,6 +136,7 @@ struct ideapad_private {
+ bool dytc : 1;
+ bool fan_mode : 1;
+ bool fn_lock : 1;
++ bool set_fn_lock_led : 1;
+ bool hw_rfkill_switch : 1;
+ bool kbd_bl : 1;
+ bool touchpad_ctrl_via_ec : 1;
+@@ -1467,6 +1468,9 @@ static void ideapad_wmi_notify(u32 value, void *context)
+ ideapad_input_report(priv, value);
+ break;
+ case 208:
++ if (!priv->features.set_fn_lock_led)
++ break;
++
+ if (!eval_hals(priv->adev->handle, &result)) {
+ bool state = test_bit(HALS_FNLOCK_STATE_BIT, &result);
+
+@@ -1480,6 +1484,24 @@ static void ideapad_wmi_notify(u32 value, void *context)
+ }
+ #endif
+
++/* On some models we need to call exec_sals(SALS_FNLOCK_ON/OFF) to set the LED */
++static const struct dmi_system_id set_fn_lock_led_list[] = {
++ {
++ /* https://bugzilla.kernel.org/show_bug.cgi?id=212671 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"),
++ }
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion 5 15ARH05"),
++ }
++ },
++ {}
++};
++
+ /*
+ * Some ideapads have a hardware rfkill switch, but most do not have one.
+ * Reading VPCCMD_R_RF always results in 0 on models without a hardware rfkill,
+@@ -1499,15 +1521,39 @@ static const struct dmi_system_id hw_rfkill_list[] = {
+ {}
+ };
+
++static const struct dmi_system_id no_touchpad_switch_list[] = {
++ {
++ .ident = "Lenovo Yoga 3 Pro 1370",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3"),
++ },
++ },
++ {
++ .ident = "ZhaoYang K4e-IML",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "ZhaoYang K4e-IML"),
++ },
++ },
++ {}
++};
++
+ static void ideapad_check_features(struct ideapad_private *priv)
+ {
+ acpi_handle handle = priv->adev->handle;
+ unsigned long val;
+
++ priv->features.set_fn_lock_led = dmi_check_system(set_fn_lock_led_list);
+ priv->features.hw_rfkill_switch = dmi_check_system(hw_rfkill_list);
+
+ /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */
+- priv->features.touchpad_ctrl_via_ec = !acpi_dev_present("ELAN0634", NULL, -1);
++ if (acpi_dev_present("ELAN0634", NULL, -1))
++ priv->features.touchpad_ctrl_via_ec = 0;
++ else if (dmi_check_system(no_touchpad_switch_list))
++ priv->features.touchpad_ctrl_via_ec = 0;
++ else
++ priv->features.touchpad_ctrl_via_ec = 1;
+
+ if (!read_ec_data(handle, VPCCMD_R_FAN, &val))
+ priv->features.fan_mode = true;
+diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig
+index 0b21468e1bd01..02e4481b384e4 100644
+--- a/drivers/platform/x86/intel/Kconfig
++++ b/drivers/platform/x86/intel/Kconfig
+@@ -3,19 +3,6 @@
+ # Intel x86 Platform Specific Drivers
+ #
+
+-menuconfig X86_PLATFORM_DRIVERS_INTEL
+- bool "Intel x86 Platform Specific Device Drivers"
+- default y
+- help
+- Say Y here to get to see options for device drivers for
+- various Intel x86 platforms, including vendor-specific
+- drivers. This option alone does not add any kernel code.
+-
+- If you say N, all options in this submenu will be skipped
+- and disabled.
+-
+-if X86_PLATFORM_DRIVERS_INTEL
+-
+ source "drivers/platform/x86/intel/atomisp2/Kconfig"
+ source "drivers/platform/x86/intel/int1092/Kconfig"
+ source "drivers/platform/x86/intel/int33fe/Kconfig"
+@@ -167,5 +154,3 @@ config INTEL_UNCORE_FREQ_CONTROL
+
+ To compile this driver as a module, choose M here: the module
+ will be called intel-uncore-frequency.
+-
+-endif # X86_PLATFORM_DRIVERS_INTEL
+diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
+index 08598942a6d78..f59a3cc9767b9 100644
+--- a/drivers/platform/x86/intel/hid.c
++++ b/drivers/platform/x86/intel/hid.c
+@@ -27,6 +27,9 @@ static const struct acpi_device_id intel_hid_ids[] = {
+ {"INTC1051", 0},
+ {"INTC1054", 0},
+ {"INTC1070", 0},
++ {"INTC1076", 0},
++ {"INTC1077", 0},
++ {"INTC1078", 0},
+ {"", 0},
+ };
+ MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
+@@ -99,6 +102,20 @@ static const struct dmi_system_id button_array_table[] = {
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Tablet Gen 2"),
+ },
+ },
++ {
++ .ident = "Microsoft Surface Go 3",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"),
++ },
++ },
++ {
++ .ident = "Microsoft Surface Go 3",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"),
++ },
++ },
+ { }
+ };
+
+@@ -115,6 +132,18 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible 15-df0xxx"),
+ },
+ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go"),
++ },
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite Dragonfly G2 Notebook PC"),
++ },
++ },
+ { }
+ };
+
+@@ -231,7 +260,7 @@ static bool intel_hid_evaluate_method(acpi_handle handle,
+
+ method_name = (char *)intel_hid_dsm_fn_to_method[fn_index];
+
+- if (!(intel_hid_dsm_fn_mask & fn_index))
++ if (!(intel_hid_dsm_fn_mask & BIT(fn_index)))
+ goto skip_dsm_eval;
+
+ obj = acpi_evaluate_dsm_typed(handle, &intel_dsm_guid,
+@@ -585,7 +614,7 @@ static bool button_array_present(struct platform_device *device)
+ static int intel_hid_probe(struct platform_device *device)
+ {
+ acpi_handle handle = ACPI_HANDLE(&device->dev);
+- unsigned long long mode;
++ unsigned long long mode, dummy;
+ struct intel_hid_priv *priv;
+ acpi_status status;
+ int err;
+@@ -650,18 +679,15 @@ static int intel_hid_probe(struct platform_device *device)
+ if (err)
+ goto err_remove_notify;
+
+- if (priv->array) {
+- unsigned long long dummy;
++ intel_button_array_enable(&device->dev, true);
+
+- intel_button_array_enable(&device->dev, true);
+-
+- /* Call button load method to enable HID power button */
+- if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_BTNL_FN,
+- &dummy)) {
+- dev_warn(&device->dev,
+- "failed to enable HID power button\n");
+- }
+- }
++ /*
++ * Call button load method to enable HID power button
++ * Always do this since it activates events on some devices without
++ * a button array too.
++ */
++ if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_BTNL_FN, &dummy))
++ dev_warn(&device->dev, "failed to enable HID power button\n");
+
+ device_init_wakeup(&device->dev, true);
+ /*
+diff --git a/drivers/platform/x86/intel/int3472/Makefile b/drivers/platform/x86/intel/int3472/Makefile
+index 2362e04db18d5..771e720528a06 100644
+--- a/drivers/platform/x86/intel/int3472/Makefile
++++ b/drivers/platform/x86/intel/int3472/Makefile
+@@ -1,5 +1,4 @@
+-obj-$(CONFIG_INTEL_SKL_INT3472) += intel_skl_int3472.o
+-intel_skl_int3472-y := intel_skl_int3472_common.o \
+- intel_skl_int3472_discrete.o \
+- intel_skl_int3472_tps68470.o \
+- intel_skl_int3472_clk_and_regulator.o
++obj-$(CONFIG_INTEL_SKL_INT3472) += intel_skl_int3472_discrete.o \
++ intel_skl_int3472_tps68470.o
++intel_skl_int3472_discrete-y := discrete.o clk_and_regulator.o common.o
++intel_skl_int3472_tps68470-y := tps68470.o common.o
+diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c
+new file mode 100644
+index 0000000000000..28353addffa7f
+--- /dev/null
++++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c
+@@ -0,0 +1,210 @@
++// SPDX-License-Identifier: GPL-2.0
++/* Author: Dan Scally <djrscally@gmail.com> */
++
++#include <linux/acpi.h>
++#include <linux/clkdev.h>
++#include <linux/clk-provider.h>
++#include <linux/device.h>
++#include <linux/gpio/consumer.h>
++#include <linux/regulator/driver.h>
++#include <linux/slab.h>
++
++#include "common.h"
++
++/*
++ * The regulators have to have .ops to be valid, but the only ops we actually
++ * support are .enable and .disable which are handled via .ena_gpiod. Pass an
++ * empty struct to clear the check without lying about capabilities.
++ */
++static const struct regulator_ops int3472_gpio_regulator_ops;
++
++static int skl_int3472_clk_prepare(struct clk_hw *hw)
++{
++ struct int3472_gpio_clock *clk = to_int3472_clk(hw);
++
++ gpiod_set_value_cansleep(clk->ena_gpio, 1);
++ gpiod_set_value_cansleep(clk->led_gpio, 1);
++
++ return 0;
++}
++
++static void skl_int3472_clk_unprepare(struct clk_hw *hw)
++{
++ struct int3472_gpio_clock *clk = to_int3472_clk(hw);
++
++ gpiod_set_value_cansleep(clk->ena_gpio, 0);
++ gpiod_set_value_cansleep(clk->led_gpio, 0);
++}
++
++static int skl_int3472_clk_enable(struct clk_hw *hw)
++{
++ /*
++ * We're just turning a GPIO on to enable the clock, which operation
++ * has the potential to sleep. Given .enable() cannot sleep, but
++ * .prepare() can, we toggle the GPIO in .prepare() instead. Thus,
++ * nothing to do here.
++ */
++ return 0;
++}
++
++static void skl_int3472_clk_disable(struct clk_hw *hw)
++{
++ /* Likewise, nothing to do here... */
++}
++
++static unsigned int skl_int3472_get_clk_frequency(struct int3472_discrete_device *int3472)
++{
++ union acpi_object *obj;
++ unsigned int freq;
++
++ obj = skl_int3472_get_acpi_buffer(int3472->sensor, "SSDB");
++ if (IS_ERR(obj))
++ return 0; /* report rate as 0 on error */
++
++ if (obj->buffer.length < CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET + sizeof(u32)) {
++ dev_err(int3472->dev, "The buffer is too small\n");
++ kfree(obj);
++ return 0;
++ }
++
++ freq = *(u32 *)(obj->buffer.pointer + CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET);
++
++ kfree(obj);
++ return freq;
++}
++
++static unsigned long skl_int3472_clk_recalc_rate(struct clk_hw *hw,
++ unsigned long parent_rate)
++{
++ struct int3472_gpio_clock *clk = to_int3472_clk(hw);
++
++ return clk->frequency;
++}
++
++static const struct clk_ops skl_int3472_clock_ops = {
++ .prepare = skl_int3472_clk_prepare,
++ .unprepare = skl_int3472_clk_unprepare,
++ .enable = skl_int3472_clk_enable,
++ .disable = skl_int3472_clk_disable,
++ .recalc_rate = skl_int3472_clk_recalc_rate,
++};
++
++int skl_int3472_register_clock(struct int3472_discrete_device *int3472)
++{
++ struct clk_init_data init = {
++ .ops = &skl_int3472_clock_ops,
++ .flags = CLK_GET_RATE_NOCACHE,
++ };
++ int ret;
++
++ init.name = kasprintf(GFP_KERNEL, "%s-clk",
++ acpi_dev_name(int3472->adev));
++ if (!init.name)
++ return -ENOMEM;
++
++ int3472->clock.frequency = skl_int3472_get_clk_frequency(int3472);
++
++ int3472->clock.clk_hw.init = &init;
++ int3472->clock.clk = clk_register(&int3472->adev->dev,
++ &int3472->clock.clk_hw);
++ if (IS_ERR(int3472->clock.clk)) {
++ ret = PTR_ERR(int3472->clock.clk);
++ goto out_free_init_name;
++ }
++
++ int3472->clock.cl = clkdev_create(int3472->clock.clk, NULL,
++ int3472->sensor_name);
++ if (!int3472->clock.cl) {
++ ret = -ENOMEM;
++ goto err_unregister_clk;
++ }
++
++ kfree(init.name);
++ return 0;
++
++err_unregister_clk:
++ clk_unregister(int3472->clock.clk);
++out_free_init_name:
++ kfree(init.name);
++
++ return ret;
++}
++
++void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472)
++{
++ clkdev_drop(int3472->clock.cl);
++ clk_unregister(int3472->clock.clk);
++}
++
++int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
++ struct acpi_resource_gpio *agpio)
++{
++ const struct int3472_sensor_config *sensor_config;
++ char *path = agpio->resource_source.string_ptr;
++ struct regulator_consumer_supply supply_map;
++ struct regulator_init_data init_data = { };
++ struct regulator_config cfg = { };
++ int ret;
++
++ sensor_config = int3472->sensor_config;
++ if (IS_ERR(sensor_config)) {
++ dev_err(int3472->dev, "No sensor module config\n");
++ return PTR_ERR(sensor_config);
++ }
++
++ if (!sensor_config->supply_map.supply) {
++ dev_err(int3472->dev, "No supply name defined\n");
++ return -ENODEV;
++ }
++
++ init_data.constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
++ init_data.num_consumer_supplies = 1;
++ supply_map = sensor_config->supply_map;
++ supply_map.dev_name = int3472->sensor_name;
++ init_data.consumer_supplies = &supply_map;
++
++ snprintf(int3472->regulator.regulator_name,
++ sizeof(int3472->regulator.regulator_name), "%s-regulator",
++ acpi_dev_name(int3472->adev));
++ snprintf(int3472->regulator.supply_name,
++ GPIO_REGULATOR_SUPPLY_NAME_LENGTH, "supply-0");
++
++ int3472->regulator.rdesc = INT3472_REGULATOR(
++ int3472->regulator.regulator_name,
++ int3472->regulator.supply_name,
++ &int3472_gpio_regulator_ops);
++
++ int3472->regulator.gpio = acpi_get_and_request_gpiod(path, agpio->pin_table[0],
++ "int3472,regulator");
++ if (IS_ERR(int3472->regulator.gpio)) {
++ dev_err(int3472->dev, "Failed to get regulator GPIO line\n");
++ return PTR_ERR(int3472->regulator.gpio);
++ }
++
++ /* Ensure the pin is in output mode and non-active state */
++ gpiod_direction_output(int3472->regulator.gpio, 0);
++
++ cfg.dev = &int3472->adev->dev;
++ cfg.init_data = &init_data;
++ cfg.ena_gpiod = int3472->regulator.gpio;
++
++ int3472->regulator.rdev = regulator_register(&int3472->regulator.rdesc,
++ &cfg);
++ if (IS_ERR(int3472->regulator.rdev)) {
++ ret = PTR_ERR(int3472->regulator.rdev);
++ goto err_free_gpio;
++ }
++
++ return 0;
++
++err_free_gpio:
++ gpiod_put(int3472->regulator.gpio);
++
++ return ret;
++}
++
++void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472)
++{
++ regulator_unregister(int3472->regulator.rdev);
++ gpiod_put(int3472->regulator.gpio);
++}
+diff --git a/drivers/platform/x86/intel/int3472/common.c b/drivers/platform/x86/intel/int3472/common.c
+new file mode 100644
+index 0000000000000..350655a9515b1
+--- /dev/null
++++ b/drivers/platform/x86/intel/int3472/common.c
+@@ -0,0 +1,54 @@
++// SPDX-License-Identifier: GPL-2.0
++/* Author: Dan Scally <djrscally@gmail.com> */
++
++#include <linux/acpi.h>
++#include <linux/slab.h>
++
++#include "common.h"
++
++union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev, char *id)
++{
++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++ acpi_handle handle = adev->handle;
++ union acpi_object *obj;
++ acpi_status status;
++
++ status = acpi_evaluate_object(handle, id, NULL, &buffer);
++ if (ACPI_FAILURE(status))
++ return ERR_PTR(-ENODEV);
++
++ obj = buffer.pointer;
++ if (!obj)
++ return ERR_PTR(-ENODEV);
++
++ if (obj->type != ACPI_TYPE_BUFFER) {
++ acpi_handle_err(handle, "%s object is not an ACPI buffer\n", id);
++ kfree(obj);
++ return ERR_PTR(-EINVAL);
++ }
++
++ return obj;
++}
++
++int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb)
++{
++ union acpi_object *obj;
++ int ret;
++
++ obj = skl_int3472_get_acpi_buffer(adev, "CLDB");
++ if (IS_ERR(obj))
++ return PTR_ERR(obj);
++
++ if (obj->buffer.length > sizeof(*cldb)) {
++ acpi_handle_err(adev->handle, "The CLDB buffer is too large\n");
++ ret = -EINVAL;
++ goto out_free_obj;
++ }
++
++ memcpy(cldb, obj->buffer.pointer, obj->buffer.length);
++ ret = 0;
++
++out_free_obj:
++ kfree(obj);
++ return ret;
++}
+diff --git a/drivers/platform/x86/intel/int3472/common.h b/drivers/platform/x86/intel/int3472/common.h
+new file mode 100644
+index 0000000000000..d14944ee85861
+--- /dev/null
++++ b/drivers/platform/x86/intel/int3472/common.h
+@@ -0,0 +1,119 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Author: Dan Scally <djrscally@gmail.com> */
++
++#ifndef _INTEL_SKL_INT3472_H
++#define _INTEL_SKL_INT3472_H
++
++#include <linux/clk-provider.h>
++#include <linux/gpio/machine.h>
++#include <linux/regulator/driver.h>
++#include <linux/regulator/machine.h>
++#include <linux/types.h>
++
++/* FIXME drop this once the I2C_DEV_NAME_FORMAT macro has been added to include/linux/i2c.h */
++#ifndef I2C_DEV_NAME_FORMAT
++#define I2C_DEV_NAME_FORMAT "i2c-%s"
++#endif
++
++/* PMIC GPIO Types */
++#define INT3472_GPIO_TYPE_RESET 0x00
++#define INT3472_GPIO_TYPE_POWERDOWN 0x01
++#define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b
++#define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c
++#define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d
++
++#define INT3472_PDEV_MAX_NAME_LEN 23
++#define INT3472_MAX_SENSOR_GPIOS 3
++
++#define GPIO_REGULATOR_NAME_LENGTH 21
++#define GPIO_REGULATOR_SUPPLY_NAME_LENGTH 9
++
++#define CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET 86
++
++#define INT3472_REGULATOR(_name, _supply, _ops) \
++ (const struct regulator_desc) { \
++ .name = _name, \
++ .supply_name = _supply, \
++ .type = REGULATOR_VOLTAGE, \
++ .ops = _ops, \
++ .owner = THIS_MODULE, \
++ }
++
++#define to_int3472_clk(hw) \
++ container_of(hw, struct int3472_gpio_clock, clk_hw)
++
++#define to_int3472_device(clk) \
++ container_of(clk, struct int3472_discrete_device, clock)
++
++struct acpi_device;
++struct i2c_client;
++struct platform_device;
++
++struct int3472_cldb {
++ u8 version;
++ /*
++ * control logic type
++ * 0: UNKNOWN
++ * 1: DISCRETE(CRD-D)
++ * 2: PMIC TPS68470
++ * 3: PMIC uP6641
++ */
++ u8 control_logic_type;
++ u8 control_logic_id;
++ u8 sensor_card_sku;
++ u8 reserved[28];
++};
++
++struct int3472_gpio_function_remap {
++ const char *documented;
++ const char *actual;
++};
++
++struct int3472_sensor_config {
++ const char *sensor_module_name;
++ struct regulator_consumer_supply supply_map;
++ const struct int3472_gpio_function_remap *function_maps;
++};
++
++struct int3472_discrete_device {
++ struct acpi_device *adev;
++ struct device *dev;
++ struct acpi_device *sensor;
++ const char *sensor_name;
++
++ const struct int3472_sensor_config *sensor_config;
++
++ struct int3472_gpio_regulator {
++ char regulator_name[GPIO_REGULATOR_NAME_LENGTH];
++ char supply_name[GPIO_REGULATOR_SUPPLY_NAME_LENGTH];
++ struct gpio_desc *gpio;
++ struct regulator_dev *rdev;
++ struct regulator_desc rdesc;
++ } regulator;
++
++ struct int3472_gpio_clock {
++ struct clk *clk;
++ struct clk_hw clk_hw;
++ struct clk_lookup *cl;
++ struct gpio_desc *ena_gpio;
++ struct gpio_desc *led_gpio;
++ u32 frequency;
++ } clock;
++
++ unsigned int ngpios; /* how many GPIOs have we seen */
++ unsigned int n_sensor_gpios; /* how many have we mapped to sensor */
++ struct gpiod_lookup_table gpios;
++};
++
++union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev,
++ char *id);
++int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb);
++
++int skl_int3472_register_clock(struct int3472_discrete_device *int3472);
++void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472);
++
++int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
++ struct acpi_resource_gpio *agpio);
++void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472);
++
++#endif
+diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
+new file mode 100644
+index 0000000000000..401fa8f223d62
+--- /dev/null
++++ b/drivers/platform/x86/intel/int3472/discrete.c
+@@ -0,0 +1,439 @@
++// SPDX-License-Identifier: GPL-2.0
++/* Author: Dan Scally <djrscally@gmail.com> */
++
++#include <linux/acpi.h>
++#include <linux/clkdev.h>
++#include <linux/clk-provider.h>
++#include <linux/device.h>
++#include <linux/gpio/consumer.h>
++#include <linux/gpio/machine.h>
++#include <linux/i2c.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/overflow.h>
++#include <linux/platform_device.h>
++#include <linux/uuid.h>
++
++#include "common.h"
++
++/*
++ * 79234640-9e10-4fea-a5c1-b5aa8b19756f
++ * This _DSM GUID returns information about the GPIO lines mapped to a
++ * discrete INT3472 device. Function number 1 returns a count of the GPIO
++ * lines that are mapped. Subsequent functions return 32 bit ints encoding
++ * information about the GPIO line, including its purpose.
++ */
++static const guid_t int3472_gpio_guid =
++ GUID_INIT(0x79234640, 0x9e10, 0x4fea,
++ 0xa5, 0xc1, 0xb5, 0xaa, 0x8b, 0x19, 0x75, 0x6f);
++
++/*
++ * 822ace8f-2814-4174-a56b-5f029fe079ee
++ * This _DSM GUID returns a string from the sensor device, which acts as a
++ * module identifier.
++ */
++static const guid_t cio2_sensor_module_guid =
++ GUID_INIT(0x822ace8f, 0x2814, 0x4174,
++ 0xa5, 0x6b, 0x5f, 0x02, 0x9f, 0xe0, 0x79, 0xee);
++
++/*
++ * Here follows platform specific mapping information that we can pass to
++ * the functions mapping resources to the sensors. Where the sensors have
++ * a power enable pin defined in DSDT we need to provide a supply name so
++ * the sensor drivers can find the regulator. The device name will be derived
++ * from the sensor's ACPI device within the code. Optionally, we can provide a
++ * NULL terminated array of function name mappings to deal with any platform
++ * specific deviations from the documented behaviour of GPIOs.
++ *
++ * Map a GPIO function name to NULL to prevent the driver from mapping that
++ * GPIO at all.
++ */
++
++static const struct int3472_gpio_function_remap ov2680_gpio_function_remaps[] = {
++ { "reset", NULL },
++ { "powerdown", "reset" },
++ { }
++};
++
++static const struct int3472_sensor_config int3472_sensor_configs[] = {
++ /* Lenovo Miix 510-12ISK - OV2680, Front */
++ { "GNDF140809R", { 0 }, ov2680_gpio_function_remaps },
++ /* Lenovo Miix 510-12ISK - OV5648, Rear */
++ { "GEFF150023R", REGULATOR_SUPPLY("avdd", NULL), NULL },
++ /* Surface Go 1&2 - OV5693, Front */
++ { "YHCU", REGULATOR_SUPPLY("avdd", NULL), NULL },
++};
++
++static const struct int3472_sensor_config *
++skl_int3472_get_sensor_module_config(struct int3472_discrete_device *int3472)
++{
++ union acpi_object *obj;
++ unsigned int i;
++
++ obj = acpi_evaluate_dsm_typed(int3472->sensor->handle,
++ &cio2_sensor_module_guid, 0x00,
++ 0x01, NULL, ACPI_TYPE_STRING);
++
++ if (!obj) {
++ dev_err(int3472->dev,
++ "Failed to get sensor module string from _DSM\n");
++ return ERR_PTR(-ENODEV);
++ }
++
++ if (obj->string.type != ACPI_TYPE_STRING) {
++ dev_err(int3472->dev,
++ "Sensor _DSM returned a non-string value\n");
++
++ ACPI_FREE(obj);
++ return ERR_PTR(-EINVAL);
++ }
++
++ for (i = 0; i < ARRAY_SIZE(int3472_sensor_configs); i++) {
++ if (!strcmp(int3472_sensor_configs[i].sensor_module_name,
++ obj->string.pointer))
++ break;
++ }
++
++ ACPI_FREE(obj);
++
++ if (i >= ARRAY_SIZE(int3472_sensor_configs))
++ return ERR_PTR(-EINVAL);
++
++ return &int3472_sensor_configs[i];
++}
++
++static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int3472,
++ struct acpi_resource_gpio *agpio,
++ const char *func, u32 polarity)
++{
++ const struct int3472_sensor_config *sensor_config;
++ char *path = agpio->resource_source.string_ptr;
++ struct gpiod_lookup *table_entry;
++ struct acpi_device *adev;
++ acpi_handle handle;
++ acpi_status status;
++ int ret;
++
++ if (int3472->n_sensor_gpios >= INT3472_MAX_SENSOR_GPIOS) {
++ dev_warn(int3472->dev, "Too many GPIOs mapped\n");
++ return -EINVAL;
++ }
++
++ sensor_config = int3472->sensor_config;
++ if (!IS_ERR(sensor_config) && sensor_config->function_maps) {
++ const struct int3472_gpio_function_remap *remap;
++
++ for (remap = sensor_config->function_maps; remap->documented; remap++) {
++ if (!strcmp(func, remap->documented)) {
++ func = remap->actual;
++ break;
++ }
++ }
++ }
++
++ /* Functions mapped to NULL should not be mapped to the sensor */
++ if (!func)
++ return 0;
++
++ status = acpi_get_handle(NULL, path, &handle);
++ if (ACPI_FAILURE(status))
++ return -EINVAL;
++
++ ret = acpi_bus_get_device(handle, &adev);
++ if (ret)
++ return -ENODEV;
++
++ table_entry = &int3472->gpios.table[int3472->n_sensor_gpios];
++ table_entry->key = acpi_dev_name(adev);
++ table_entry->chip_hwnum = agpio->pin_table[0];
++ table_entry->con_id = func;
++ table_entry->idx = 0;
++ table_entry->flags = polarity;
++
++ int3472->n_sensor_gpios++;
++
++ return 0;
++}
++
++static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472,
++ struct acpi_resource_gpio *agpio, u8 type)
++{
++ char *path = agpio->resource_source.string_ptr;
++ u16 pin = agpio->pin_table[0];
++ struct gpio_desc *gpio;
++
++ switch (type) {
++ case INT3472_GPIO_TYPE_CLK_ENABLE:
++ gpio = acpi_get_and_request_gpiod(path, pin, "int3472,clk-enable");
++ if (IS_ERR(gpio))
++ return (PTR_ERR(gpio));
++
++ int3472->clock.ena_gpio = gpio;
++ /* Ensure the pin is in output mode and non-active state */
++ gpiod_direction_output(int3472->clock.ena_gpio, 0);
++ break;
++ case INT3472_GPIO_TYPE_PRIVACY_LED:
++ gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led");
++ if (IS_ERR(gpio))
++ return (PTR_ERR(gpio));
++
++ int3472->clock.led_gpio = gpio;
++ /* Ensure the pin is in output mode and non-active state */
++ gpiod_direction_output(int3472->clock.led_gpio, 0);
++ break;
++ default:
++ dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type);
++ break;
++ }
++
++ return 0;
++}
++
++/**
++ * skl_int3472_handle_gpio_resources: Map PMIC resources to consuming sensor
++ * @ares: A pointer to a &struct acpi_resource
++ * @data: A pointer to a &struct int3472_discrete_device
++ *
++ * This function handles GPIO resources that are against an INT3472
++ * ACPI device, by checking the value of the corresponding _DSM entry.
++ * This will return a 32bit int, where the lowest byte represents the
++ * function of the GPIO pin:
++ *
++ * 0x00 Reset
++ * 0x01 Power down
++ * 0x0b Power enable
++ * 0x0c Clock enable
++ * 0x0d Privacy LED
++ *
++ * There are some known platform specific quirks where that does not quite
++ * hold up; for example where a pin with type 0x01 (Power down) is mapped to
++ * a sensor pin that performs a reset function or entries in _CRS and _DSM that
++ * do not actually correspond to a physical connection. These will be handled
++ * by the mapping sub-functions.
++ *
++ * GPIOs will either be mapped directly to the sensor device or else used
++ * to create clocks and regulators via the usual frameworks.
++ *
++ * Return:
++ * * 1 - To continue the loop
++ * * 0 - When all resources found are handled properly.
++ * * -EINVAL - If the resource is not a GPIO IO resource
++ * * -ENODEV - If the resource has no corresponding _DSM entry
++ * * -Other - Errors propagated from one of the sub-functions.
++ */
++static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
++ void *data)
++{
++ struct int3472_discrete_device *int3472 = data;
++ struct acpi_resource_gpio *agpio;
++ union acpi_object *obj;
++ const char *err_msg;
++ int ret;
++ u8 type;
++
++ if (!acpi_gpio_get_io_resource(ares, &agpio))
++ return 1;
++
++ /*
++ * ngpios + 2 because the index of this _DSM function is 1-based and
++ * the first function is just a count.
++ */
++ obj = acpi_evaluate_dsm_typed(int3472->adev->handle,
++ &int3472_gpio_guid, 0x00,
++ int3472->ngpios + 2,
++ NULL, ACPI_TYPE_INTEGER);
++
++ if (!obj) {
++ dev_warn(int3472->dev, "No _DSM entry for GPIO pin %u\n",
++ agpio->pin_table[0]);
++ return 1;
++ }
++
++ type = obj->integer.value & 0xff;
++
++ switch (type) {
++ case INT3472_GPIO_TYPE_RESET:
++ ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, "reset",
++ GPIO_ACTIVE_LOW);
++ if (ret)
++ err_msg = "Failed to map reset pin to sensor\n";
++
++ break;
++ case INT3472_GPIO_TYPE_POWERDOWN:
++ ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, "powerdown",
++ GPIO_ACTIVE_LOW);
++ if (ret)
++ err_msg = "Failed to map powerdown pin to sensor\n";
++
++ break;
++ case INT3472_GPIO_TYPE_CLK_ENABLE:
++ case INT3472_GPIO_TYPE_PRIVACY_LED:
++ ret = skl_int3472_map_gpio_to_clk(int3472, agpio, type);
++ if (ret)
++ err_msg = "Failed to map GPIO to clock\n";
++
++ break;
++ case INT3472_GPIO_TYPE_POWER_ENABLE:
++ ret = skl_int3472_register_regulator(int3472, agpio);
++ if (ret)
++ err_msg = "Failed to map regulator to sensor\n";
++
++ break;
++ default:
++ dev_warn(int3472->dev,
++ "GPIO type 0x%02x unknown; the sensor may not work\n",
++ type);
++ ret = 1;
++ break;
++ }
++
++ int3472->ngpios++;
++ ACPI_FREE(obj);
++
++ if (ret < 0)
++ return dev_err_probe(int3472->dev, ret, err_msg);
++
++ return ret;
++}
++
++static int skl_int3472_parse_crs(struct int3472_discrete_device *int3472)
++{
++ LIST_HEAD(resource_list);
++ int ret;
++
++ /*
++ * No error check, because not having a sensor config is not necessarily
++ * a failure mode.
++ */
++ int3472->sensor_config = skl_int3472_get_sensor_module_config(int3472);
++
++ ret = acpi_dev_get_resources(int3472->adev, &resource_list,
++ skl_int3472_handle_gpio_resources,
++ int3472);
++ if (ret < 0)
++ return ret;
++
++ acpi_dev_free_resource_list(&resource_list);
++
++ /*
++ * If we find no clock enable GPIO pin then the privacy LED won't work.
++ * We've never seen that situation, but it's possible. Warn the user so
++ * it's clear what's happened.
++ */
++ if (int3472->clock.ena_gpio) {
++ ret = skl_int3472_register_clock(int3472);
++ if (ret)
++ return ret;
++ } else {
++ if (int3472->clock.led_gpio)
++ dev_warn(int3472->dev,
++ "No clk GPIO. The privacy LED won't work\n");
++ }
++
++ int3472->gpios.dev_id = int3472->sensor_name;
++ gpiod_add_lookup_table(&int3472->gpios);
++
++ return 0;
++}
++
++static int skl_int3472_discrete_remove(struct platform_device *pdev);
++
++static int skl_int3472_discrete_probe(struct platform_device *pdev)
++{
++ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
++ struct int3472_discrete_device *int3472;
++ struct int3472_cldb cldb;
++ int ret;
++
++ ret = skl_int3472_fill_cldb(adev, &cldb);
++ if (ret) {
++ dev_err(&pdev->dev, "Couldn't fill CLDB structure\n");
++ return ret;
++ }
++
++ if (cldb.control_logic_type != 1) {
++ dev_err(&pdev->dev, "Unsupported control logic type %u\n",
++ cldb.control_logic_type);
++ return -EINVAL;
++ }
++
++ /* Max num GPIOs we've seen plus a terminator */
++ int3472 = devm_kzalloc(&pdev->dev, struct_size(int3472, gpios.table,
++ INT3472_MAX_SENSOR_GPIOS + 1), GFP_KERNEL);
++ if (!int3472)
++ return -ENOMEM;
++
++ int3472->adev = adev;
++ int3472->dev = &pdev->dev;
++ platform_set_drvdata(pdev, int3472);
++
++ int3472->sensor = acpi_dev_get_first_consumer_dev(adev);
++ if (!int3472->sensor) {
++ dev_err(&pdev->dev, "INT3472 seems to have no dependents.\n");
++ return -ENODEV;
++ }
++
++ int3472->sensor_name = devm_kasprintf(int3472->dev, GFP_KERNEL,
++ I2C_DEV_NAME_FORMAT,
++ acpi_dev_name(int3472->sensor));
++ if (!int3472->sensor_name) {
++ ret = -ENOMEM;
++ goto err_put_sensor;
++ }
++
++ /*
++ * Initialising this list means we can call gpiod_remove_lookup_table()
++ * in failure paths without issue.
++ */
++ INIT_LIST_HEAD(&int3472->gpios.list);
++
++ ret = skl_int3472_parse_crs(int3472);
++ if (ret) {
++ skl_int3472_discrete_remove(pdev);
++ return ret;
++ }
++
++ return 0;
++
++err_put_sensor:
++ acpi_dev_put(int3472->sensor);
++
++ return ret;
++}
++
++static int skl_int3472_discrete_remove(struct platform_device *pdev)
++{
++ struct int3472_discrete_device *int3472 = platform_get_drvdata(pdev);
++
++ gpiod_remove_lookup_table(&int3472->gpios);
++
++ if (int3472->clock.cl)
++ skl_int3472_unregister_clock(int3472);
++
++ gpiod_put(int3472->clock.ena_gpio);
++ gpiod_put(int3472->clock.led_gpio);
++
++ skl_int3472_unregister_regulator(int3472);
++
++ return 0;
++}
++
++static const struct acpi_device_id int3472_device_id[] = {
++ { "INT3472", 0 },
++ { }
++};
++MODULE_DEVICE_TABLE(acpi, int3472_device_id);
++
++static struct platform_driver int3472_discrete = {
++ .driver = {
++ .name = "int3472-discrete",
++ .acpi_match_table = int3472_device_id,
++ },
++ .probe = skl_int3472_discrete_probe,
++ .remove = skl_int3472_discrete_remove,
++};
++module_platform_driver(int3472_discrete);
++
++MODULE_DESCRIPTION("Intel SkyLake INT3472 ACPI Discrete Device Driver");
++MODULE_AUTHOR("Daniel Scally <djrscally@gmail.com>");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/platform/x86/intel/int3472/intel_skl_int3472_clk_and_regulator.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_clk_and_regulator.c
+deleted file mode 100644
+index 1700e7557a824..0000000000000
+--- a/drivers/platform/x86/intel/int3472/intel_skl_int3472_clk_and_regulator.c
++++ /dev/null
+@@ -1,207 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/* Author: Dan Scally <djrscally@gmail.com> */
+-
+-#include <linux/acpi.h>
+-#include <linux/clkdev.h>
+-#include <linux/clk-provider.h>
+-#include <linux/device.h>
+-#include <linux/gpio/consumer.h>
+-#include <linux/regulator/driver.h>
+-#include <linux/slab.h>
+-
+-#include "intel_skl_int3472_common.h"
+-
+-/*
+- * The regulators have to have .ops to be valid, but the only ops we actually
+- * support are .enable and .disable which are handled via .ena_gpiod. Pass an
+- * empty struct to clear the check without lying about capabilities.
+- */
+-static const struct regulator_ops int3472_gpio_regulator_ops;
+-
+-static int skl_int3472_clk_prepare(struct clk_hw *hw)
+-{
+- struct int3472_gpio_clock *clk = to_int3472_clk(hw);
+-
+- gpiod_set_value_cansleep(clk->ena_gpio, 1);
+- gpiod_set_value_cansleep(clk->led_gpio, 1);
+-
+- return 0;
+-}
+-
+-static void skl_int3472_clk_unprepare(struct clk_hw *hw)
+-{
+- struct int3472_gpio_clock *clk = to_int3472_clk(hw);
+-
+- gpiod_set_value_cansleep(clk->ena_gpio, 0);
+- gpiod_set_value_cansleep(clk->led_gpio, 0);
+-}
+-
+-static int skl_int3472_clk_enable(struct clk_hw *hw)
+-{
+- /*
+- * We're just turning a GPIO on to enable the clock, which operation
+- * has the potential to sleep. Given .enable() cannot sleep, but
+- * .prepare() can, we toggle the GPIO in .prepare() instead. Thus,
+- * nothing to do here.
+- */
+- return 0;
+-}
+-
+-static void skl_int3472_clk_disable(struct clk_hw *hw)
+-{
+- /* Likewise, nothing to do here... */
+-}
+-
+-static unsigned int skl_int3472_get_clk_frequency(struct int3472_discrete_device *int3472)
+-{
+- union acpi_object *obj;
+- unsigned int freq;
+-
+- obj = skl_int3472_get_acpi_buffer(int3472->sensor, "SSDB");
+- if (IS_ERR(obj))
+- return 0; /* report rate as 0 on error */
+-
+- if (obj->buffer.length < CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET + sizeof(u32)) {
+- dev_err(int3472->dev, "The buffer is too small\n");
+- kfree(obj);
+- return 0;
+- }
+-
+- freq = *(u32 *)(obj->buffer.pointer + CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET);
+-
+- kfree(obj);
+- return freq;
+-}
+-
+-static unsigned long skl_int3472_clk_recalc_rate(struct clk_hw *hw,
+- unsigned long parent_rate)
+-{
+- struct int3472_gpio_clock *clk = to_int3472_clk(hw);
+-
+- return clk->frequency;
+-}
+-
+-static const struct clk_ops skl_int3472_clock_ops = {
+- .prepare = skl_int3472_clk_prepare,
+- .unprepare = skl_int3472_clk_unprepare,
+- .enable = skl_int3472_clk_enable,
+- .disable = skl_int3472_clk_disable,
+- .recalc_rate = skl_int3472_clk_recalc_rate,
+-};
+-
+-int skl_int3472_register_clock(struct int3472_discrete_device *int3472)
+-{
+- struct clk_init_data init = {
+- .ops = &skl_int3472_clock_ops,
+- .flags = CLK_GET_RATE_NOCACHE,
+- };
+- int ret;
+-
+- init.name = kasprintf(GFP_KERNEL, "%s-clk",
+- acpi_dev_name(int3472->adev));
+- if (!init.name)
+- return -ENOMEM;
+-
+- int3472->clock.frequency = skl_int3472_get_clk_frequency(int3472);
+-
+- int3472->clock.clk_hw.init = &init;
+- int3472->clock.clk = clk_register(&int3472->adev->dev,
+- &int3472->clock.clk_hw);
+- if (IS_ERR(int3472->clock.clk)) {
+- ret = PTR_ERR(int3472->clock.clk);
+- goto out_free_init_name;
+- }
+-
+- int3472->clock.cl = clkdev_create(int3472->clock.clk, NULL,
+- int3472->sensor_name);
+- if (!int3472->clock.cl) {
+- ret = -ENOMEM;
+- goto err_unregister_clk;
+- }
+-
+- kfree(init.name);
+- return 0;
+-
+-err_unregister_clk:
+- clk_unregister(int3472->clock.clk);
+-out_free_init_name:
+- kfree(init.name);
+-
+- return ret;
+-}
+-
+-void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472)
+-{
+- clkdev_drop(int3472->clock.cl);
+- clk_unregister(int3472->clock.clk);
+-}
+-
+-int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
+- struct acpi_resource_gpio *agpio)
+-{
+- const struct int3472_sensor_config *sensor_config;
+- char *path = agpio->resource_source.string_ptr;
+- struct regulator_consumer_supply supply_map;
+- struct regulator_init_data init_data = { };
+- struct regulator_config cfg = { };
+- int ret;
+-
+- sensor_config = int3472->sensor_config;
+- if (IS_ERR(sensor_config)) {
+- dev_err(int3472->dev, "No sensor module config\n");
+- return PTR_ERR(sensor_config);
+- }
+-
+- if (!sensor_config->supply_map.supply) {
+- dev_err(int3472->dev, "No supply name defined\n");
+- return -ENODEV;
+- }
+-
+- init_data.constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
+- init_data.num_consumer_supplies = 1;
+- supply_map = sensor_config->supply_map;
+- supply_map.dev_name = int3472->sensor_name;
+- init_data.consumer_supplies = &supply_map;
+-
+- snprintf(int3472->regulator.regulator_name,
+- sizeof(int3472->regulator.regulator_name), "%s-regulator",
+- acpi_dev_name(int3472->adev));
+- snprintf(int3472->regulator.supply_name,
+- GPIO_REGULATOR_SUPPLY_NAME_LENGTH, "supply-0");
+-
+- int3472->regulator.rdesc = INT3472_REGULATOR(
+- int3472->regulator.regulator_name,
+- int3472->regulator.supply_name,
+- &int3472_gpio_regulator_ops);
+-
+- int3472->regulator.gpio = acpi_get_and_request_gpiod(path, agpio->pin_table[0],
+- "int3472,regulator");
+- if (IS_ERR(int3472->regulator.gpio)) {
+- dev_err(int3472->dev, "Failed to get regulator GPIO line\n");
+- return PTR_ERR(int3472->regulator.gpio);
+- }
+-
+- cfg.dev = &int3472->adev->dev;
+- cfg.init_data = &init_data;
+- cfg.ena_gpiod = int3472->regulator.gpio;
+-
+- int3472->regulator.rdev = regulator_register(&int3472->regulator.rdesc,
+- &cfg);
+- if (IS_ERR(int3472->regulator.rdev)) {
+- ret = PTR_ERR(int3472->regulator.rdev);
+- goto err_free_gpio;
+- }
+-
+- return 0;
+-
+-err_free_gpio:
+- gpiod_put(int3472->regulator.gpio);
+-
+- return ret;
+-}
+-
+-void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472)
+-{
+- regulator_unregister(int3472->regulator.rdev);
+- gpiod_put(int3472->regulator.gpio);
+-}
+diff --git a/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.c
+deleted file mode 100644
+index 497e74fba75fb..0000000000000
+--- a/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.c
++++ /dev/null
+@@ -1,106 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/* Author: Dan Scally <djrscally@gmail.com> */
+-
+-#include <linux/acpi.h>
+-#include <linux/i2c.h>
+-#include <linux/platform_device.h>
+-#include <linux/slab.h>
+-
+-#include "intel_skl_int3472_common.h"
+-
+-union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev, char *id)
+-{
+- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+- acpi_handle handle = adev->handle;
+- union acpi_object *obj;
+- acpi_status status;
+-
+- status = acpi_evaluate_object(handle, id, NULL, &buffer);
+- if (ACPI_FAILURE(status))
+- return ERR_PTR(-ENODEV);
+-
+- obj = buffer.pointer;
+- if (!obj)
+- return ERR_PTR(-ENODEV);
+-
+- if (obj->type != ACPI_TYPE_BUFFER) {
+- acpi_handle_err(handle, "%s object is not an ACPI buffer\n", id);
+- kfree(obj);
+- return ERR_PTR(-EINVAL);
+- }
+-
+- return obj;
+-}
+-
+-int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb)
+-{
+- union acpi_object *obj;
+- int ret;
+-
+- obj = skl_int3472_get_acpi_buffer(adev, "CLDB");
+- if (IS_ERR(obj))
+- return PTR_ERR(obj);
+-
+- if (obj->buffer.length > sizeof(*cldb)) {
+- acpi_handle_err(adev->handle, "The CLDB buffer is too large\n");
+- ret = -EINVAL;
+- goto out_free_obj;
+- }
+-
+- memcpy(cldb, obj->buffer.pointer, obj->buffer.length);
+- ret = 0;
+-
+-out_free_obj:
+- kfree(obj);
+- return ret;
+-}
+-
+-static const struct acpi_device_id int3472_device_id[] = {
+- { "INT3472", 0 },
+- { }
+-};
+-MODULE_DEVICE_TABLE(acpi, int3472_device_id);
+-
+-static struct platform_driver int3472_discrete = {
+- .driver = {
+- .name = "int3472-discrete",
+- .acpi_match_table = int3472_device_id,
+- },
+- .probe = skl_int3472_discrete_probe,
+- .remove = skl_int3472_discrete_remove,
+-};
+-
+-static struct i2c_driver int3472_tps68470 = {
+- .driver = {
+- .name = "int3472-tps68470",
+- .acpi_match_table = int3472_device_id,
+- },
+- .probe_new = skl_int3472_tps68470_probe,
+-};
+-
+-static int skl_int3472_init(void)
+-{
+- int ret;
+-
+- ret = platform_driver_register(&int3472_discrete);
+- if (ret)
+- return ret;
+-
+- ret = i2c_register_driver(THIS_MODULE, &int3472_tps68470);
+- if (ret)
+- platform_driver_unregister(&int3472_discrete);
+-
+- return ret;
+-}
+-module_init(skl_int3472_init);
+-
+-static void skl_int3472_exit(void)
+-{
+- platform_driver_unregister(&int3472_discrete);
+- i2c_del_driver(&int3472_tps68470);
+-}
+-module_exit(skl_int3472_exit);
+-
+-MODULE_DESCRIPTION("Intel SkyLake INT3472 ACPI Device Driver");
+-MODULE_AUTHOR("Daniel Scally <djrscally@gmail.com>");
+-MODULE_LICENSE("GPL v2");
+diff --git a/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.h b/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.h
+deleted file mode 100644
+index 714fde73b5247..0000000000000
+--- a/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.h
++++ /dev/null
+@@ -1,122 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/* Author: Dan Scally <djrscally@gmail.com> */
+-
+-#ifndef _INTEL_SKL_INT3472_H
+-#define _INTEL_SKL_INT3472_H
+-
+-#include <linux/clk-provider.h>
+-#include <linux/gpio/machine.h>
+-#include <linux/regulator/driver.h>
+-#include <linux/regulator/machine.h>
+-#include <linux/types.h>
+-
+-/* FIXME drop this once the I2C_DEV_NAME_FORMAT macro has been added to include/linux/i2c.h */
+-#ifndef I2C_DEV_NAME_FORMAT
+-#define I2C_DEV_NAME_FORMAT "i2c-%s"
+-#endif
+-
+-/* PMIC GPIO Types */
+-#define INT3472_GPIO_TYPE_RESET 0x00
+-#define INT3472_GPIO_TYPE_POWERDOWN 0x01
+-#define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b
+-#define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c
+-#define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d
+-
+-#define INT3472_PDEV_MAX_NAME_LEN 23
+-#define INT3472_MAX_SENSOR_GPIOS 3
+-
+-#define GPIO_REGULATOR_NAME_LENGTH 21
+-#define GPIO_REGULATOR_SUPPLY_NAME_LENGTH 9
+-
+-#define CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET 86
+-
+-#define INT3472_REGULATOR(_name, _supply, _ops) \
+- (const struct regulator_desc) { \
+- .name = _name, \
+- .supply_name = _supply, \
+- .type = REGULATOR_VOLTAGE, \
+- .ops = _ops, \
+- .owner = THIS_MODULE, \
+- }
+-
+-#define to_int3472_clk(hw) \
+- container_of(hw, struct int3472_gpio_clock, clk_hw)
+-
+-#define to_int3472_device(clk) \
+- container_of(clk, struct int3472_discrete_device, clock)
+-
+-struct acpi_device;
+-struct i2c_client;
+-struct platform_device;
+-
+-struct int3472_cldb {
+- u8 version;
+- /*
+- * control logic type
+- * 0: UNKNOWN
+- * 1: DISCRETE(CRD-D)
+- * 2: PMIC TPS68470
+- * 3: PMIC uP6641
+- */
+- u8 control_logic_type;
+- u8 control_logic_id;
+- u8 sensor_card_sku;
+- u8 reserved[28];
+-};
+-
+-struct int3472_gpio_function_remap {
+- const char *documented;
+- const char *actual;
+-};
+-
+-struct int3472_sensor_config {
+- const char *sensor_module_name;
+- struct regulator_consumer_supply supply_map;
+- const struct int3472_gpio_function_remap *function_maps;
+-};
+-
+-struct int3472_discrete_device {
+- struct acpi_device *adev;
+- struct device *dev;
+- struct acpi_device *sensor;
+- const char *sensor_name;
+-
+- const struct int3472_sensor_config *sensor_config;
+-
+- struct int3472_gpio_regulator {
+- char regulator_name[GPIO_REGULATOR_NAME_LENGTH];
+- char supply_name[GPIO_REGULATOR_SUPPLY_NAME_LENGTH];
+- struct gpio_desc *gpio;
+- struct regulator_dev *rdev;
+- struct regulator_desc rdesc;
+- } regulator;
+-
+- struct int3472_gpio_clock {
+- struct clk *clk;
+- struct clk_hw clk_hw;
+- struct clk_lookup *cl;
+- struct gpio_desc *ena_gpio;
+- struct gpio_desc *led_gpio;
+- u32 frequency;
+- } clock;
+-
+- unsigned int ngpios; /* how many GPIOs have we seen */
+- unsigned int n_sensor_gpios; /* how many have we mapped to sensor */
+- struct gpiod_lookup_table gpios;
+-};
+-
+-int skl_int3472_discrete_probe(struct platform_device *pdev);
+-int skl_int3472_discrete_remove(struct platform_device *pdev);
+-int skl_int3472_tps68470_probe(struct i2c_client *client);
+-union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev,
+- char *id);
+-int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb);
+-
+-int skl_int3472_register_clock(struct int3472_discrete_device *int3472);
+-void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472);
+-
+-int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
+- struct acpi_resource_gpio *agpio);
+-void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472);
+-
+-#endif
+diff --git a/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c
+deleted file mode 100644
+index e59d79c7e82f8..0000000000000
+--- a/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c
++++ /dev/null
+@@ -1,413 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/* Author: Dan Scally <djrscally@gmail.com> */
+-
+-#include <linux/acpi.h>
+-#include <linux/clkdev.h>
+-#include <linux/clk-provider.h>
+-#include <linux/device.h>
+-#include <linux/gpio/consumer.h>
+-#include <linux/gpio/machine.h>
+-#include <linux/i2c.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/overflow.h>
+-#include <linux/platform_device.h>
+-#include <linux/uuid.h>
+-
+-#include "intel_skl_int3472_common.h"
+-
+-/*
+- * 79234640-9e10-4fea-a5c1-b5aa8b19756f
+- * This _DSM GUID returns information about the GPIO lines mapped to a
+- * discrete INT3472 device. Function number 1 returns a count of the GPIO
+- * lines that are mapped. Subsequent functions return 32 bit ints encoding
+- * information about the GPIO line, including its purpose.
+- */
+-static const guid_t int3472_gpio_guid =
+- GUID_INIT(0x79234640, 0x9e10, 0x4fea,
+- 0xa5, 0xc1, 0xb5, 0xaa, 0x8b, 0x19, 0x75, 0x6f);
+-
+-/*
+- * 822ace8f-2814-4174-a56b-5f029fe079ee
+- * This _DSM GUID returns a string from the sensor device, which acts as a
+- * module identifier.
+- */
+-static const guid_t cio2_sensor_module_guid =
+- GUID_INIT(0x822ace8f, 0x2814, 0x4174,
+- 0xa5, 0x6b, 0x5f, 0x02, 0x9f, 0xe0, 0x79, 0xee);
+-
+-/*
+- * Here follows platform specific mapping information that we can pass to
+- * the functions mapping resources to the sensors. Where the sensors have
+- * a power enable pin defined in DSDT we need to provide a supply name so
+- * the sensor drivers can find the regulator. The device name will be derived
+- * from the sensor's ACPI device within the code. Optionally, we can provide a
+- * NULL terminated array of function name mappings to deal with any platform
+- * specific deviations from the documented behaviour of GPIOs.
+- *
+- * Map a GPIO function name to NULL to prevent the driver from mapping that
+- * GPIO at all.
+- */
+-
+-static const struct int3472_gpio_function_remap ov2680_gpio_function_remaps[] = {
+- { "reset", NULL },
+- { "powerdown", "reset" },
+- { }
+-};
+-
+-static const struct int3472_sensor_config int3472_sensor_configs[] = {
+- /* Lenovo Miix 510-12ISK - OV2680, Front */
+- { "GNDF140809R", { 0 }, ov2680_gpio_function_remaps },
+- /* Lenovo Miix 510-12ISK - OV5648, Rear */
+- { "GEFF150023R", REGULATOR_SUPPLY("avdd", NULL), NULL },
+- /* Surface Go 1&2 - OV5693, Front */
+- { "YHCU", REGULATOR_SUPPLY("avdd", NULL), NULL },
+-};
+-
+-static const struct int3472_sensor_config *
+-skl_int3472_get_sensor_module_config(struct int3472_discrete_device *int3472)
+-{
+- union acpi_object *obj;
+- unsigned int i;
+-
+- obj = acpi_evaluate_dsm_typed(int3472->sensor->handle,
+- &cio2_sensor_module_guid, 0x00,
+- 0x01, NULL, ACPI_TYPE_STRING);
+-
+- if (!obj) {
+- dev_err(int3472->dev,
+- "Failed to get sensor module string from _DSM\n");
+- return ERR_PTR(-ENODEV);
+- }
+-
+- if (obj->string.type != ACPI_TYPE_STRING) {
+- dev_err(int3472->dev,
+- "Sensor _DSM returned a non-string value\n");
+-
+- ACPI_FREE(obj);
+- return ERR_PTR(-EINVAL);
+- }
+-
+- for (i = 0; i < ARRAY_SIZE(int3472_sensor_configs); i++) {
+- if (!strcmp(int3472_sensor_configs[i].sensor_module_name,
+- obj->string.pointer))
+- break;
+- }
+-
+- ACPI_FREE(obj);
+-
+- if (i >= ARRAY_SIZE(int3472_sensor_configs))
+- return ERR_PTR(-EINVAL);
+-
+- return &int3472_sensor_configs[i];
+-}
+-
+-static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int3472,
+- struct acpi_resource_gpio *agpio,
+- const char *func, u32 polarity)
+-{
+- const struct int3472_sensor_config *sensor_config;
+- char *path = agpio->resource_source.string_ptr;
+- struct gpiod_lookup *table_entry;
+- struct acpi_device *adev;
+- acpi_handle handle;
+- acpi_status status;
+- int ret;
+-
+- if (int3472->n_sensor_gpios >= INT3472_MAX_SENSOR_GPIOS) {
+- dev_warn(int3472->dev, "Too many GPIOs mapped\n");
+- return -EINVAL;
+- }
+-
+- sensor_config = int3472->sensor_config;
+- if (!IS_ERR(sensor_config) && sensor_config->function_maps) {
+- const struct int3472_gpio_function_remap *remap;
+-
+- for (remap = sensor_config->function_maps; remap->documented; remap++) {
+- if (!strcmp(func, remap->documented)) {
+- func = remap->actual;
+- break;
+- }
+- }
+- }
+-
+- /* Functions mapped to NULL should not be mapped to the sensor */
+- if (!func)
+- return 0;
+-
+- status = acpi_get_handle(NULL, path, &handle);
+- if (ACPI_FAILURE(status))
+- return -EINVAL;
+-
+- ret = acpi_bus_get_device(handle, &adev);
+- if (ret)
+- return -ENODEV;
+-
+- table_entry = &int3472->gpios.table[int3472->n_sensor_gpios];
+- table_entry->key = acpi_dev_name(adev);
+- table_entry->chip_hwnum = agpio->pin_table[0];
+- table_entry->con_id = func;
+- table_entry->idx = 0;
+- table_entry->flags = polarity;
+-
+- int3472->n_sensor_gpios++;
+-
+- return 0;
+-}
+-
+-static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472,
+- struct acpi_resource_gpio *agpio, u8 type)
+-{
+- char *path = agpio->resource_source.string_ptr;
+- u16 pin = agpio->pin_table[0];
+- struct gpio_desc *gpio;
+-
+- switch (type) {
+- case INT3472_GPIO_TYPE_CLK_ENABLE:
+- gpio = acpi_get_and_request_gpiod(path, pin, "int3472,clk-enable");
+- if (IS_ERR(gpio))
+- return (PTR_ERR(gpio));
+-
+- int3472->clock.ena_gpio = gpio;
+- break;
+- case INT3472_GPIO_TYPE_PRIVACY_LED:
+- gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led");
+- if (IS_ERR(gpio))
+- return (PTR_ERR(gpio));
+-
+- int3472->clock.led_gpio = gpio;
+- break;
+- default:
+- dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type);
+- break;
+- }
+-
+- return 0;
+-}
+-
+-/**
+- * skl_int3472_handle_gpio_resources: Map PMIC resources to consuming sensor
+- * @ares: A pointer to a &struct acpi_resource
+- * @data: A pointer to a &struct int3472_discrete_device
+- *
+- * This function handles GPIO resources that are against an INT3472
+- * ACPI device, by checking the value of the corresponding _DSM entry.
+- * This will return a 32bit int, where the lowest byte represents the
+- * function of the GPIO pin:
+- *
+- * 0x00 Reset
+- * 0x01 Power down
+- * 0x0b Power enable
+- * 0x0c Clock enable
+- * 0x0d Privacy LED
+- *
+- * There are some known platform specific quirks where that does not quite
+- * hold up; for example where a pin with type 0x01 (Power down) is mapped to
+- * a sensor pin that performs a reset function or entries in _CRS and _DSM that
+- * do not actually correspond to a physical connection. These will be handled
+- * by the mapping sub-functions.
+- *
+- * GPIOs will either be mapped directly to the sensor device or else used
+- * to create clocks and regulators via the usual frameworks.
+- *
+- * Return:
+- * * 1 - To continue the loop
+- * * 0 - When all resources found are handled properly.
+- * * -EINVAL - If the resource is not a GPIO IO resource
+- * * -ENODEV - If the resource has no corresponding _DSM entry
+- * * -Other - Errors propagated from one of the sub-functions.
+- */
+-static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
+- void *data)
+-{
+- struct int3472_discrete_device *int3472 = data;
+- struct acpi_resource_gpio *agpio;
+- union acpi_object *obj;
+- const char *err_msg;
+- int ret;
+- u8 type;
+-
+- if (!acpi_gpio_get_io_resource(ares, &agpio))
+- return 1;
+-
+- /*
+- * ngpios + 2 because the index of this _DSM function is 1-based and
+- * the first function is just a count.
+- */
+- obj = acpi_evaluate_dsm_typed(int3472->adev->handle,
+- &int3472_gpio_guid, 0x00,
+- int3472->ngpios + 2,
+- NULL, ACPI_TYPE_INTEGER);
+-
+- if (!obj) {
+- dev_warn(int3472->dev, "No _DSM entry for GPIO pin %u\n",
+- agpio->pin_table[0]);
+- return 1;
+- }
+-
+- type = obj->integer.value & 0xff;
+-
+- switch (type) {
+- case INT3472_GPIO_TYPE_RESET:
+- ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, "reset",
+- GPIO_ACTIVE_LOW);
+- if (ret)
+- err_msg = "Failed to map reset pin to sensor\n";
+-
+- break;
+- case INT3472_GPIO_TYPE_POWERDOWN:
+- ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, "powerdown",
+- GPIO_ACTIVE_LOW);
+- if (ret)
+- err_msg = "Failed to map powerdown pin to sensor\n";
+-
+- break;
+- case INT3472_GPIO_TYPE_CLK_ENABLE:
+- case INT3472_GPIO_TYPE_PRIVACY_LED:
+- ret = skl_int3472_map_gpio_to_clk(int3472, agpio, type);
+- if (ret)
+- err_msg = "Failed to map GPIO to clock\n";
+-
+- break;
+- case INT3472_GPIO_TYPE_POWER_ENABLE:
+- ret = skl_int3472_register_regulator(int3472, agpio);
+- if (ret)
+- err_msg = "Failed to map regulator to sensor\n";
+-
+- break;
+- default:
+- dev_warn(int3472->dev,
+- "GPIO type 0x%02x unknown; the sensor may not work\n",
+- type);
+- ret = 1;
+- break;
+- }
+-
+- int3472->ngpios++;
+- ACPI_FREE(obj);
+-
+- if (ret < 0)
+- return dev_err_probe(int3472->dev, ret, err_msg);
+-
+- return ret;
+-}
+-
+-static int skl_int3472_parse_crs(struct int3472_discrete_device *int3472)
+-{
+- LIST_HEAD(resource_list);
+- int ret;
+-
+- /*
+- * No error check, because not having a sensor config is not necessarily
+- * a failure mode.
+- */
+- int3472->sensor_config = skl_int3472_get_sensor_module_config(int3472);
+-
+- ret = acpi_dev_get_resources(int3472->adev, &resource_list,
+- skl_int3472_handle_gpio_resources,
+- int3472);
+- if (ret < 0)
+- return ret;
+-
+- acpi_dev_free_resource_list(&resource_list);
+-
+- /*
+- * If we find no clock enable GPIO pin then the privacy LED won't work.
+- * We've never seen that situation, but it's possible. Warn the user so
+- * it's clear what's happened.
+- */
+- if (int3472->clock.ena_gpio) {
+- ret = skl_int3472_register_clock(int3472);
+- if (ret)
+- return ret;
+- } else {
+- if (int3472->clock.led_gpio)
+- dev_warn(int3472->dev,
+- "No clk GPIO. The privacy LED won't work\n");
+- }
+-
+- int3472->gpios.dev_id = int3472->sensor_name;
+- gpiod_add_lookup_table(&int3472->gpios);
+-
+- return 0;
+-}
+-
+-int skl_int3472_discrete_probe(struct platform_device *pdev)
+-{
+- struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+- struct int3472_discrete_device *int3472;
+- struct int3472_cldb cldb;
+- int ret;
+-
+- ret = skl_int3472_fill_cldb(adev, &cldb);
+- if (ret) {
+- dev_err(&pdev->dev, "Couldn't fill CLDB structure\n");
+- return ret;
+- }
+-
+- if (cldb.control_logic_type != 1) {
+- dev_err(&pdev->dev, "Unsupported control logic type %u\n",
+- cldb.control_logic_type);
+- return -EINVAL;
+- }
+-
+- /* Max num GPIOs we've seen plus a terminator */
+- int3472 = devm_kzalloc(&pdev->dev, struct_size(int3472, gpios.table,
+- INT3472_MAX_SENSOR_GPIOS + 1), GFP_KERNEL);
+- if (!int3472)
+- return -ENOMEM;
+-
+- int3472->adev = adev;
+- int3472->dev = &pdev->dev;
+- platform_set_drvdata(pdev, int3472);
+-
+- int3472->sensor = acpi_dev_get_first_consumer_dev(adev);
+- if (!int3472->sensor) {
+- dev_err(&pdev->dev, "INT3472 seems to have no dependents.\n");
+- return -ENODEV;
+- }
+-
+- int3472->sensor_name = devm_kasprintf(int3472->dev, GFP_KERNEL,
+- I2C_DEV_NAME_FORMAT,
+- acpi_dev_name(int3472->sensor));
+- if (!int3472->sensor_name) {
+- ret = -ENOMEM;
+- goto err_put_sensor;
+- }
+-
+- /*
+- * Initialising this list means we can call gpiod_remove_lookup_table()
+- * in failure paths without issue.
+- */
+- INIT_LIST_HEAD(&int3472->gpios.list);
+-
+- ret = skl_int3472_parse_crs(int3472);
+- if (ret) {
+- skl_int3472_discrete_remove(pdev);
+- return ret;
+- }
+-
+- return 0;
+-
+-err_put_sensor:
+- acpi_dev_put(int3472->sensor);
+-
+- return ret;
+-}
+-
+-int skl_int3472_discrete_remove(struct platform_device *pdev)
+-{
+- struct int3472_discrete_device *int3472 = platform_get_drvdata(pdev);
+-
+- gpiod_remove_lookup_table(&int3472->gpios);
+-
+- if (int3472->clock.cl)
+- skl_int3472_unregister_clock(int3472);
+-
+- gpiod_put(int3472->clock.ena_gpio);
+- gpiod_put(int3472->clock.led_gpio);
+-
+- skl_int3472_unregister_regulator(int3472);
+-
+- return 0;
+-}
+diff --git a/drivers/platform/x86/intel/int3472/intel_skl_int3472_tps68470.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_tps68470.c
+deleted file mode 100644
+index c05b4cf502fef..0000000000000
+--- a/drivers/platform/x86/intel/int3472/intel_skl_int3472_tps68470.c
++++ /dev/null
+@@ -1,137 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/* Author: Dan Scally <djrscally@gmail.com> */
+-
+-#include <linux/i2c.h>
+-#include <linux/mfd/core.h>
+-#include <linux/mfd/tps68470.h>
+-#include <linux/platform_device.h>
+-#include <linux/regmap.h>
+-
+-#include "intel_skl_int3472_common.h"
+-
+-#define DESIGNED_FOR_CHROMEOS 1
+-#define DESIGNED_FOR_WINDOWS 2
+-
+-static const struct mfd_cell tps68470_cros[] = {
+- { .name = "tps68470-gpio" },
+- { .name = "tps68470_pmic_opregion" },
+-};
+-
+-static const struct mfd_cell tps68470_win[] = {
+- { .name = "tps68470-gpio" },
+- { .name = "tps68470-clk" },
+- { .name = "tps68470-regulator" },
+-};
+-
+-static const struct regmap_config tps68470_regmap_config = {
+- .reg_bits = 8,
+- .val_bits = 8,
+- .max_register = TPS68470_REG_MAX,
+-};
+-
+-static int tps68470_chip_init(struct device *dev, struct regmap *regmap)
+-{
+- unsigned int version;
+- int ret;
+-
+- /* Force software reset */
+- ret = regmap_write(regmap, TPS68470_REG_RESET, TPS68470_REG_RESET_MASK);
+- if (ret)
+- return ret;
+-
+- ret = regmap_read(regmap, TPS68470_REG_REVID, &version);
+- if (ret) {
+- dev_err(dev, "Failed to read revision register: %d\n", ret);
+- return ret;
+- }
+-
+- dev_info(dev, "TPS68470 REVID: 0x%02x\n", version);
+-
+- return 0;
+-}
+-
+-/** skl_int3472_tps68470_calc_type: Check what platform a device is designed for
+- * @adev: A pointer to a &struct acpi_device
+- *
+- * Check CLDB buffer against the PMIC's adev. If present, then we check
+- * the value of control_logic_type field and follow one of the
+- * following scenarios:
+- *
+- * 1. No CLDB - likely ACPI tables designed for ChromeOS. We
+- * create platform devices for the GPIOs and OpRegion drivers.
+- *
+- * 2. CLDB, with control_logic_type = 2 - probably ACPI tables
+- * made for Windows 2-in-1 platforms. Register pdevs for GPIO,
+- * Clock and Regulator drivers to bind to.
+- *
+- * 3. Any other value in control_logic_type, we should never have
+- * gotten to this point; fail probe and return.
+- *
+- * Return:
+- * * 1 Device intended for ChromeOS
+- * * 2 Device intended for Windows
+- * * -EINVAL Where @adev has an object named CLDB but it does not conform to
+- * our expectations
+- */
+-static int skl_int3472_tps68470_calc_type(struct acpi_device *adev)
+-{
+- struct int3472_cldb cldb = { 0 };
+- int ret;
+-
+- /*
+- * A CLDB buffer that exists, but which does not match our expectations
+- * should trigger an error so we don't blindly continue.
+- */
+- ret = skl_int3472_fill_cldb(adev, &cldb);
+- if (ret && ret != -ENODEV)
+- return ret;
+-
+- if (ret)
+- return DESIGNED_FOR_CHROMEOS;
+-
+- if (cldb.control_logic_type != 2)
+- return -EINVAL;
+-
+- return DESIGNED_FOR_WINDOWS;
+-}
+-
+-int skl_int3472_tps68470_probe(struct i2c_client *client)
+-{
+- struct acpi_device *adev = ACPI_COMPANION(&client->dev);
+- struct regmap *regmap;
+- int device_type;
+- int ret;
+-
+- regmap = devm_regmap_init_i2c(client, &tps68470_regmap_config);
+- if (IS_ERR(regmap)) {
+- dev_err(&client->dev, "Failed to create regmap: %ld\n", PTR_ERR(regmap));
+- return PTR_ERR(regmap);
+- }
+-
+- i2c_set_clientdata(client, regmap);
+-
+- ret = tps68470_chip_init(&client->dev, regmap);
+- if (ret < 0) {
+- dev_err(&client->dev, "TPS68470 init error %d\n", ret);
+- return ret;
+- }
+-
+- device_type = skl_int3472_tps68470_calc_type(adev);
+- switch (device_type) {
+- case DESIGNED_FOR_WINDOWS:
+- ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE,
+- tps68470_win, ARRAY_SIZE(tps68470_win),
+- NULL, 0, NULL);
+- break;
+- case DESIGNED_FOR_CHROMEOS:
+- ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE,
+- tps68470_cros, ARRAY_SIZE(tps68470_cros),
+- NULL, 0, NULL);
+- break;
+- default:
+- dev_err(&client->dev, "Failed to add MFD devices\n");
+- return device_type;
+- }
+-
+- return ret;
+-}
+diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c
+new file mode 100644
+index 0000000000000..fd3bef449137c
+--- /dev/null
++++ b/drivers/platform/x86/intel/int3472/tps68470.c
+@@ -0,0 +1,156 @@
++// SPDX-License-Identifier: GPL-2.0
++/* Author: Dan Scally <djrscally@gmail.com> */
++
++#include <linux/i2c.h>
++#include <linux/mfd/core.h>
++#include <linux/mfd/tps68470.h>
++#include <linux/platform_device.h>
++#include <linux/regmap.h>
++
++#include "common.h"
++
++#define DESIGNED_FOR_CHROMEOS 1
++#define DESIGNED_FOR_WINDOWS 2
++
++static const struct mfd_cell tps68470_cros[] = {
++ { .name = "tps68470-gpio" },
++ { .name = "tps68470_pmic_opregion" },
++};
++
++static const struct mfd_cell tps68470_win[] = {
++ { .name = "tps68470-gpio" },
++ { .name = "tps68470-clk" },
++ { .name = "tps68470-regulator" },
++};
++
++static const struct regmap_config tps68470_regmap_config = {
++ .reg_bits = 8,
++ .val_bits = 8,
++ .max_register = TPS68470_REG_MAX,
++};
++
++static int tps68470_chip_init(struct device *dev, struct regmap *regmap)
++{
++ unsigned int version;
++ int ret;
++
++ /* Force software reset */
++ ret = regmap_write(regmap, TPS68470_REG_RESET, TPS68470_REG_RESET_MASK);
++ if (ret)
++ return ret;
++
++ ret = regmap_read(regmap, TPS68470_REG_REVID, &version);
++ if (ret) {
++ dev_err(dev, "Failed to read revision register: %d\n", ret);
++ return ret;
++ }
++
++ dev_info(dev, "TPS68470 REVID: 0x%02x\n", version);
++
++ return 0;
++}
++
++/** skl_int3472_tps68470_calc_type: Check what platform a device is designed for
++ * @adev: A pointer to a &struct acpi_device
++ *
++ * Check CLDB buffer against the PMIC's adev. If present, then we check
++ * the value of control_logic_type field and follow one of the
++ * following scenarios:
++ *
++ * 1. No CLDB - likely ACPI tables designed for ChromeOS. We
++ * create platform devices for the GPIOs and OpRegion drivers.
++ *
++ * 2. CLDB, with control_logic_type = 2 - probably ACPI tables
++ * made for Windows 2-in-1 platforms. Register pdevs for GPIO,
++ * Clock and Regulator drivers to bind to.
++ *
++ * 3. Any other value in control_logic_type, we should never have
++ * gotten to this point; fail probe and return.
++ *
++ * Return:
++ * * 1 Device intended for ChromeOS
++ * * 2 Device intended for Windows
++ * * -EINVAL Where @adev has an object named CLDB but it does not conform to
++ * our expectations
++ */
++static int skl_int3472_tps68470_calc_type(struct acpi_device *adev)
++{
++ struct int3472_cldb cldb = { 0 };
++ int ret;
++
++ /*
++ * A CLDB buffer that exists, but which does not match our expectations
++ * should trigger an error so we don't blindly continue.
++ */
++ ret = skl_int3472_fill_cldb(adev, &cldb);
++ if (ret && ret != -ENODEV)
++ return ret;
++
++ if (ret)
++ return DESIGNED_FOR_CHROMEOS;
++
++ if (cldb.control_logic_type != 2)
++ return -EINVAL;
++
++ return DESIGNED_FOR_WINDOWS;
++}
++
++static int skl_int3472_tps68470_probe(struct i2c_client *client)
++{
++ struct acpi_device *adev = ACPI_COMPANION(&client->dev);
++ struct regmap *regmap;
++ int device_type;
++ int ret;
++
++ regmap = devm_regmap_init_i2c(client, &tps68470_regmap_config);
++ if (IS_ERR(regmap)) {
++ dev_err(&client->dev, "Failed to create regmap: %ld\n", PTR_ERR(regmap));
++ return PTR_ERR(regmap);
++ }
++
++ i2c_set_clientdata(client, regmap);
++
++ ret = tps68470_chip_init(&client->dev, regmap);
++ if (ret < 0) {
++ dev_err(&client->dev, "TPS68470 init error %d\n", ret);
++ return ret;
++ }
++
++ device_type = skl_int3472_tps68470_calc_type(adev);
++ switch (device_type) {
++ case DESIGNED_FOR_WINDOWS:
++ ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE,
++ tps68470_win, ARRAY_SIZE(tps68470_win),
++ NULL, 0, NULL);
++ break;
++ case DESIGNED_FOR_CHROMEOS:
++ ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE,
++ tps68470_cros, ARRAY_SIZE(tps68470_cros),
++ NULL, 0, NULL);
++ break;
++ default:
++ dev_err(&client->dev, "Failed to add MFD devices\n");
++ return device_type;
++ }
++
++ return ret;
++}
++
++static const struct acpi_device_id int3472_device_id[] = {
++ { "INT3472", 0 },
++ { }
++};
++MODULE_DEVICE_TABLE(acpi, int3472_device_id);
++
++static struct i2c_driver int3472_tps68470 = {
++ .driver = {
++ .name = "int3472-tps68470",
++ .acpi_match_table = int3472_device_id,
++ },
++ .probe_new = skl_int3472_tps68470_probe,
++};
++module_i2c_driver(int3472_tps68470);
++
++MODULE_DESCRIPTION("Intel SkyLake INT3472 ACPI TPS68470 Device Driver");
++MODULE_AUTHOR("Daniel Scally <djrscally@gmail.com>");
++MODULE_LICENSE("GPL v2");
+diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
+index ac19fcc9abbf5..b8d67bc4acb0a 100644
+--- a/drivers/platform/x86/intel/pmc/core.c
++++ b/drivers/platform/x86/intel/pmc/core.c
+@@ -958,7 +958,18 @@ static inline void pmc_core_reg_write(struct pmc_dev *pmcdev, int reg_offset,
+
+ static inline u64 pmc_core_adjust_slp_s0_step(struct pmc_dev *pmcdev, u32 value)
+ {
+- return (u64)value * pmcdev->map->slp_s0_res_counter_step;
++ /*
++ * ADL PCH does not have the SLP_S0 counter and LPM Residency counters are
++ * used as a workaround which uses 30.5 usec tick. All other client
++ * programs have the legacy SLP_S0 residency counter that is using the 122
++ * usec tick.
++ */
++ const int lpm_adj_x2 = pmcdev->map->lpm_res_counter_step_x2;
++
++ if (pmcdev->map == &adl_reg_map)
++ return (u64)value * GET_X2_COUNTER((u64)lpm_adj_x2);
++ else
++ return (u64)value * pmcdev->map->slp_s0_res_counter_step;
+ }
+
+ static int set_etr3(struct pmc_dev *pmcdev)
+diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c
+index 73797680b895c..ddfba38c21044 100644
+--- a/drivers/platform/x86/intel/pmc/pltdrv.c
++++ b/drivers/platform/x86/intel/pmc/pltdrv.c
+@@ -18,6 +18,8 @@
+ #include <asm/cpu_device_id.h>
+ #include <asm/intel-family.h>
+
++#include <xen/xen.h>
++
+ static void intel_pmc_core_release(struct device *dev)
+ {
+ kfree(dev);
+@@ -53,6 +55,13 @@ static int __init pmc_core_platform_init(void)
+ if (acpi_dev_present("INT33A1", NULL, -1))
+ return -ENODEV;
+
++ /*
++ * Skip forcefully attaching the device for VMs. Make an exception for
++ * Xen dom0, which does have full hardware access.
++ */
++ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR) && !xen_initial_domain())
++ return -ENODEV;
++
+ if (!x86_match_cpu(intel_pmc_core_platform_ids))
+ return -ENODEV;
+
+@@ -65,7 +74,7 @@ static int __init pmc_core_platform_init(void)
+
+ retval = platform_device_register(pmc_core_device);
+ if (retval)
+- kfree(pmc_core_device);
++ platform_device_put(pmc_core_device);
+
+ return retval;
+ }
+diff --git a/drivers/platform/x86/intel/pmt/class.c b/drivers/platform/x86/intel/pmt/class.c
+index 659b1073033c2..586a5877422b5 100644
+--- a/drivers/platform/x86/intel/pmt/class.c
++++ b/drivers/platform/x86/intel/pmt/class.c
+@@ -9,6 +9,7 @@
+ */
+
+ #include <linux/kernel.h>
++#include <linux/io-64-nonatomic-lo-hi.h>
+ #include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/pci.h>
+@@ -18,6 +19,7 @@
+ #define PMT_XA_START 0
+ #define PMT_XA_MAX INT_MAX
+ #define PMT_XA_LIMIT XA_LIMIT(PMT_XA_START, PMT_XA_MAX)
++#define GUID_SPR_PUNIT 0x9956f43f
+
+ /*
+ * Early implementations of PMT on client platforms have some
+@@ -41,6 +43,29 @@ bool intel_pmt_is_early_client_hw(struct device *dev)
+ }
+ EXPORT_SYMBOL_GPL(intel_pmt_is_early_client_hw);
+
++static inline int
++pmt_memcpy64_fromio(void *to, const u64 __iomem *from, size_t count)
++{
++ int i, remain;
++ u64 *buf = to;
++
++ if (!IS_ALIGNED((unsigned long)from, 8))
++ return -EFAULT;
++
++ for (i = 0; i < count/8; i++)
++ buf[i] = readq(&from[i]);
++
++ /* Copy any remaining bytes */
++ remain = count % 8;
++ if (remain) {
++ u64 tmp = readq(&from[i]);
++
++ memcpy(&buf[i], &tmp, remain);
++ }
++
++ return count;
++}
++
+ /*
+ * sysfs
+ */
+@@ -62,7 +87,11 @@ intel_pmt_read(struct file *filp, struct kobject *kobj,
+ if (count > entry->size - off)
+ count = entry->size - off;
+
+- memcpy_fromio(buf, entry->base + off, count);
++ if (entry->guid == GUID_SPR_PUNIT)
++ /* PUNIT on SPR only supports aligned 64-bit read */
++ count = pmt_memcpy64_fromio(buf, entry->base + off, count);
++ else
++ memcpy_fromio(buf, entry->base + off, count);
+
+ return count;
+ }
+diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+index c9a85eb2e8600..f6b32d31c5110 100644
+--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
++++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+@@ -277,39 +277,46 @@ static int isst_if_get_platform_info(void __user *argp)
+ return 0;
+ }
+
++#define ISST_MAX_BUS_NUMBER 2
+
+ struct isst_if_cpu_info {
+ /* For BUS 0 and BUS 1 only, which we need for PUNIT interface */
+- int bus_info[2];
+- struct pci_dev *pci_dev[2];
++ int bus_info[ISST_MAX_BUS_NUMBER];
++ struct pci_dev *pci_dev[ISST_MAX_BUS_NUMBER];
+ int punit_cpu_id;
+ int numa_node;
+ };
+
++struct isst_if_pkg_info {
++ struct pci_dev *pci_dev[ISST_MAX_BUS_NUMBER];
++};
++
+ static struct isst_if_cpu_info *isst_cpu_info;
+-#define ISST_MAX_PCI_DOMAINS 8
++static struct isst_if_pkg_info *isst_pkg_info;
+
+ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn)
+ {
+ struct pci_dev *matched_pci_dev = NULL;
+ struct pci_dev *pci_dev = NULL;
+- int no_matches = 0;
+- int i, bus_number;
++ struct pci_dev *_pci_dev = NULL;
++ int no_matches = 0, pkg_id;
++ int bus_number;
+
+- if (bus_no < 0 || bus_no > 1 || cpu < 0 || cpu >= nr_cpu_ids ||
+- cpu >= num_possible_cpus())
++ if (bus_no < 0 || bus_no >= ISST_MAX_BUS_NUMBER || cpu < 0 ||
++ cpu >= nr_cpu_ids || cpu >= num_possible_cpus())
+ return NULL;
+
++ pkg_id = topology_physical_package_id(cpu);
++
+ bus_number = isst_cpu_info[cpu].bus_info[bus_no];
+ if (bus_number < 0)
+ return NULL;
+
+- for (i = 0; i < ISST_MAX_PCI_DOMAINS; ++i) {
+- struct pci_dev *_pci_dev;
++ for_each_pci_dev(_pci_dev) {
+ int node;
+
+- _pci_dev = pci_get_domain_bus_and_slot(i, bus_number, PCI_DEVFN(dev, fn));
+- if (!_pci_dev)
++ if (_pci_dev->bus->number != bus_number ||
++ _pci_dev->devfn != PCI_DEVFN(dev, fn))
+ continue;
+
+ ++no_matches;
+@@ -324,6 +331,8 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
+ }
+
+ if (node == isst_cpu_info[cpu].numa_node) {
++ isst_pkg_info[pkg_id].pci_dev[bus_no] = _pci_dev;
++
+ pci_dev = _pci_dev;
+ break;
+ }
+@@ -342,6 +351,10 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
+ if (!pci_dev && no_matches == 1)
+ pci_dev = matched_pci_dev;
+
++ /* Return pci_dev pointer for any matched CPU in the package */
++ if (!pci_dev)
++ pci_dev = isst_pkg_info[pkg_id].pci_dev[bus_no];
++
+ return pci_dev;
+ }
+
+@@ -361,8 +374,8 @@ struct pci_dev *isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn)
+ {
+ struct pci_dev *pci_dev;
+
+- if (bus_no < 0 || bus_no > 1 || cpu < 0 || cpu >= nr_cpu_ids ||
+- cpu >= num_possible_cpus())
++ if (bus_no < 0 || bus_no >= ISST_MAX_BUS_NUMBER || cpu < 0 ||
++ cpu >= nr_cpu_ids || cpu >= num_possible_cpus())
+ return NULL;
+
+ pci_dev = isst_cpu_info[cpu].pci_dev[bus_no];
+@@ -417,10 +430,19 @@ static int isst_if_cpu_info_init(void)
+ if (!isst_cpu_info)
+ return -ENOMEM;
+
++ isst_pkg_info = kcalloc(topology_max_packages(),
++ sizeof(*isst_pkg_info),
++ GFP_KERNEL);
++ if (!isst_pkg_info) {
++ kfree(isst_cpu_info);
++ return -ENOMEM;
++ }
++
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "platform/x86/isst-if:online",
+ isst_if_cpu_online, NULL);
+ if (ret < 0) {
++ kfree(isst_pkg_info);
+ kfree(isst_cpu_info);
+ return ret;
+ }
+@@ -433,6 +455,7 @@ static int isst_if_cpu_info_init(void)
+ static void isst_if_cpu_info_exit(void)
+ {
+ cpuhp_remove_state(isst_if_online_id);
++ kfree(isst_pkg_info);
+ kfree(isst_cpu_info);
+ };
+
+@@ -596,7 +619,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd,
+ return ret;
+ }
+
+-static DEFINE_MUTEX(punit_misc_dev_lock);
++/* Lock to prevent module registration when already opened by user space */
++static DEFINE_MUTEX(punit_misc_dev_open_lock);
++/* Lock to allow one share misc device for all ISST interace */
++static DEFINE_MUTEX(punit_misc_dev_reg_lock);
+ static int misc_usage_count;
+ static int misc_device_ret;
+ static int misc_device_open;
+@@ -606,7 +632,7 @@ static int isst_if_open(struct inode *inode, struct file *file)
+ int i, ret = 0;
+
+ /* Fail open, if a module is going away */
+- mutex_lock(&punit_misc_dev_lock);
++ mutex_lock(&punit_misc_dev_open_lock);
+ for (i = 0; i < ISST_IF_DEV_MAX; ++i) {
+ struct isst_if_cmd_cb *cb = &punit_callbacks[i];
+
+@@ -628,7 +654,7 @@ static int isst_if_open(struct inode *inode, struct file *file)
+ } else {
+ misc_device_open++;
+ }
+- mutex_unlock(&punit_misc_dev_lock);
++ mutex_unlock(&punit_misc_dev_open_lock);
+
+ return ret;
+ }
+@@ -637,7 +663,7 @@ static int isst_if_relase(struct inode *inode, struct file *f)
+ {
+ int i;
+
+- mutex_lock(&punit_misc_dev_lock);
++ mutex_lock(&punit_misc_dev_open_lock);
+ misc_device_open--;
+ for (i = 0; i < ISST_IF_DEV_MAX; ++i) {
+ struct isst_if_cmd_cb *cb = &punit_callbacks[i];
+@@ -645,7 +671,7 @@ static int isst_if_relase(struct inode *inode, struct file *f)
+ if (cb->registered)
+ module_put(cb->owner);
+ }
+- mutex_unlock(&punit_misc_dev_lock);
++ mutex_unlock(&punit_misc_dev_open_lock);
+
+ return 0;
+ }
+@@ -662,6 +688,43 @@ static struct miscdevice isst_if_char_driver = {
+ .fops = &isst_if_char_driver_ops,
+ };
+
++static int isst_misc_reg(void)
++{
++ mutex_lock(&punit_misc_dev_reg_lock);
++ if (misc_device_ret)
++ goto unlock_exit;
++
++ if (!misc_usage_count) {
++ misc_device_ret = isst_if_cpu_info_init();
++ if (misc_device_ret)
++ goto unlock_exit;
++
++ misc_device_ret = misc_register(&isst_if_char_driver);
++ if (misc_device_ret) {
++ isst_if_cpu_info_exit();
++ goto unlock_exit;
++ }
++ }
++ misc_usage_count++;
++
++unlock_exit:
++ mutex_unlock(&punit_misc_dev_reg_lock);
++
++ return misc_device_ret;
++}
++
++static void isst_misc_unreg(void)
++{
++ mutex_lock(&punit_misc_dev_reg_lock);
++ if (misc_usage_count)
++ misc_usage_count--;
++ if (!misc_usage_count && !misc_device_ret) {
++ misc_deregister(&isst_if_char_driver);
++ isst_if_cpu_info_exit();
++ }
++ mutex_unlock(&punit_misc_dev_reg_lock);
++}
++
+ /**
+ * isst_if_cdev_register() - Register callback for IOCTL
+ * @device_type: The device type this callback handling.
+@@ -679,38 +742,31 @@ static struct miscdevice isst_if_char_driver = {
+ */
+ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb)
+ {
+- if (misc_device_ret)
+- return misc_device_ret;
++ int ret;
+
+ if (device_type >= ISST_IF_DEV_MAX)
+ return -EINVAL;
+
+- mutex_lock(&punit_misc_dev_lock);
++ mutex_lock(&punit_misc_dev_open_lock);
++ /* Device is already open, we don't want to add new callbacks */
+ if (misc_device_open) {
+- mutex_unlock(&punit_misc_dev_lock);
++ mutex_unlock(&punit_misc_dev_open_lock);
+ return -EAGAIN;
+ }
+- if (!misc_usage_count) {
+- int ret;
+-
+- misc_device_ret = misc_register(&isst_if_char_driver);
+- if (misc_device_ret)
+- goto unlock_exit;
+-
+- ret = isst_if_cpu_info_init();
+- if (ret) {
+- misc_deregister(&isst_if_char_driver);
+- misc_device_ret = ret;
+- goto unlock_exit;
+- }
+- }
+ memcpy(&punit_callbacks[device_type], cb, sizeof(*cb));
+ punit_callbacks[device_type].registered = 1;
+- misc_usage_count++;
+-unlock_exit:
+- mutex_unlock(&punit_misc_dev_lock);
++ mutex_unlock(&punit_misc_dev_open_lock);
+
+- return misc_device_ret;
++ ret = isst_misc_reg();
++ if (ret) {
++ /*
++ * No need of mutex as the misc device register failed
++ * as no one can open device yet. Hence no contention.
++ */
++ punit_callbacks[device_type].registered = 0;
++ return ret;
++ }
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(isst_if_cdev_register);
+
+@@ -725,16 +781,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register);
+ */
+ void isst_if_cdev_unregister(int device_type)
+ {
+- mutex_lock(&punit_misc_dev_lock);
+- misc_usage_count--;
++ isst_misc_unreg();
++ mutex_lock(&punit_misc_dev_open_lock);
+ punit_callbacks[device_type].registered = 0;
+ if (device_type == ISST_IF_DEV_MBOX)
+ isst_delete_hash();
+- if (!misc_usage_count && !misc_device_ret) {
+- misc_deregister(&isst_if_char_driver);
+- isst_if_cpu_info_exit();
+- }
+- mutex_unlock(&punit_misc_dev_lock);
++ mutex_unlock(&punit_misc_dev_open_lock);
+ }
+ EXPORT_SYMBOL_GPL(isst_if_cdev_unregister);
+
+diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
+index 7cc9089d1e14f..e7a3e34028178 100644
+--- a/drivers/platform/x86/intel_scu_ipc.c
++++ b/drivers/platform/x86/intel_scu_ipc.c
+@@ -583,7 +583,6 @@ __intel_scu_ipc_register(struct device *parent,
+ scu->dev.parent = parent;
+ scu->dev.class = &intel_scu_ipc_class;
+ scu->dev.release = intel_scu_ipc_release;
+- dev_set_name(&scu->dev, "intel_scu_ipc");
+
+ if (!request_mem_region(scu_data->mem.start, resource_size(&scu_data->mem),
+ "intel_scu_ipc")) {
+@@ -612,6 +611,7 @@ __intel_scu_ipc_register(struct device *parent,
+ * After this point intel_scu_ipc_release() takes care of
+ * releasing the SCU IPC resources once refcount drops to zero.
+ */
++ dev_set_name(&scu->dev, "intel_scu_ipc");
+ err = device_register(&scu->dev);
+ if (err) {
+ put_device(&scu->dev);
+diff --git a/drivers/platform/x86/intel_scu_pcidrv.c b/drivers/platform/x86/intel_scu_pcidrv.c
+index 80abc708e4f2f..d904fad499aa5 100644
+--- a/drivers/platform/x86/intel_scu_pcidrv.c
++++ b/drivers/platform/x86/intel_scu_pcidrv.c
+@@ -34,6 +34,7 @@ static int intel_scu_pci_probe(struct pci_dev *pdev,
+
+ static const struct pci_device_id pci_ids[] = {
+ { PCI_VDEVICE(INTEL, 0x080e) },
++ { PCI_VDEVICE(INTEL, 0x082a) },
+ { PCI_VDEVICE(INTEL, 0x08ea) },
+ { PCI_VDEVICE(INTEL, 0x0a94) },
+ { PCI_VDEVICE(INTEL, 0x11a0) },
+diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
+index 24ffc8e2d2d1e..dfb4af759aa75 100644
+--- a/drivers/platform/x86/msi-laptop.c
++++ b/drivers/platform/x86/msi-laptop.c
+@@ -210,7 +210,7 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask)
+ return -EINVAL;
+
+ if (quirks->ec_read_only)
+- return -EOPNOTSUPP;
++ return 0;
+
+ /* read current device state */
+ result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata);
+@@ -596,11 +596,10 @@ static const struct dmi_system_id msi_dmi_table[] __initconst = {
+ {
+ .ident = "MSI S270",
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "MICRO-STAR INT'L CO.,LTD"),
++ DMI_MATCH(DMI_SYS_VENDOR, "MICRO-STAR INT"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MS-1013"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "0131"),
+- DMI_MATCH(DMI_CHASSIS_VENDOR,
+- "MICRO-STAR INT'L CO.,LTD")
++ DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT")
+ },
+ .driver_data = &quirk_old_ec_model,
+ .callback = dmi_check_cb
+@@ -633,8 +632,7 @@ static const struct dmi_system_id msi_dmi_table[] __initconst = {
+ DMI_MATCH(DMI_SYS_VENDOR, "NOTEBOOK"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "SAM2000"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "0131"),
+- DMI_MATCH(DMI_CHASSIS_VENDOR,
+- "MICRO-STAR INT'L CO.,LTD")
++ DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT")
+ },
+ .driver_data = &quirk_old_ec_model,
+ .callback = dmi_check_cb
+@@ -843,15 +841,15 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str,
+ static void msi_init_rfkill(struct work_struct *ignored)
+ {
+ if (rfk_wlan) {
+- rfkill_set_sw_state(rfk_wlan, !wlan_s);
++ msi_rfkill_set_state(rfk_wlan, !wlan_s);
+ rfkill_wlan_set(NULL, !wlan_s);
+ }
+ if (rfk_bluetooth) {
+- rfkill_set_sw_state(rfk_bluetooth, !bluetooth_s);
++ msi_rfkill_set_state(rfk_bluetooth, !bluetooth_s);
+ rfkill_bluetooth_set(NULL, !bluetooth_s);
+ }
+ if (rfk_threeg) {
+- rfkill_set_sw_state(rfk_threeg, !threeg_s);
++ msi_rfkill_set_state(rfk_threeg, !threeg_s);
+ rfkill_threeg_set(NULL, !threeg_s);
+ }
+ }
+@@ -1048,8 +1046,7 @@ static int __init msi_init(void)
+ return -EINVAL;
+
+ /* Register backlight stuff */
+-
+- if (quirks->old_ec_model ||
++ if (quirks->old_ec_model &&
+ acpi_video_get_backlight_type() == acpi_backlight_vendor) {
+ struct backlight_properties props;
+ memset(&props, 0, sizeof(struct backlight_properties));
+@@ -1117,6 +1114,8 @@ fail_create_attr:
+ fail_create_group:
+ if (quirks->load_scm_model) {
+ i8042_remove_filter(msi_laptop_i8042_filter);
++ cancel_delayed_work_sync(&msi_touchpad_dwork);
++ input_unregister_device(msi_laptop_input_dev);
+ cancel_delayed_work_sync(&msi_rfkill_dwork);
+ cancel_work_sync(&msi_rfkill_work);
+ rfkill_cleanup();
+@@ -1137,6 +1136,7 @@ static void __exit msi_cleanup(void)
+ {
+ if (quirks->load_scm_model) {
+ i8042_remove_filter(msi_laptop_i8042_filter);
++ cancel_delayed_work_sync(&msi_touchpad_dwork);
+ input_unregister_device(msi_laptop_input_dev);
+ cancel_delayed_work_sync(&msi_rfkill_dwork);
+ cancel_work_sync(&msi_rfkill_work);
+diff --git a/drivers/platform/x86/mxm-wmi.c b/drivers/platform/x86/mxm-wmi.c
+index 9a19fbd2f7341..9a457956025a5 100644
+--- a/drivers/platform/x86/mxm-wmi.c
++++ b/drivers/platform/x86/mxm-wmi.c
+@@ -35,13 +35,11 @@ int mxm_wmi_call_mxds(int adapter)
+ .xarg = 1,
+ };
+ struct acpi_buffer input = { (acpi_size)sizeof(args), &args };
+- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+ acpi_status status;
+
+ printk("calling mux switch %d\n", adapter);
+
+- status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input,
+- &output);
++ status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, NULL);
+
+ if (ACPI_FAILURE(status))
+ return status;
+@@ -60,13 +58,11 @@ int mxm_wmi_call_mxmx(int adapter)
+ .xarg = 1,
+ };
+ struct acpi_buffer input = { (acpi_size)sizeof(args), &args };
+- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+ acpi_status status;
+
+ printk("calling mux switch %d\n", adapter);
+
+- status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input,
+- &output);
++ status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, NULL);
+
+ if (ACPI_FAILURE(status))
+ return status;
+diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
+index d4f444401496e..7ca49b3fc6c28 100644
+--- a/drivers/platform/x86/panasonic-laptop.c
++++ b/drivers/platform/x86/panasonic-laptop.c
+@@ -119,20 +119,22 @@
+ * - v0.1 start from toshiba_acpi driver written by John Belmonte
+ */
+
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/init.h>
+-#include <linux/types.h>
++#include <linux/acpi.h>
+ #include <linux/backlight.h>
+ #include <linux/ctype.h>
+-#include <linux/seq_file.h>
+-#include <linux/uaccess.h>
+-#include <linux/slab.h>
+-#include <linux/acpi.h>
++#include <linux/i8042.h>
++#include <linux/init.h>
+ #include <linux/input.h>
+ #include <linux/input/sparse-keymap.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/platform_device.h>
+-
++#include <linux/seq_file.h>
++#include <linux/serio.h>
++#include <linux/slab.h>
++#include <linux/types.h>
++#include <linux/uaccess.h>
++#include <acpi/video.h>
+
+ MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
+ MODULE_AUTHOR("David Bronaugh <dbronaugh@linuxboxen.org>");
+@@ -241,6 +243,42 @@ struct pcc_acpi {
+ struct platform_device *platform;
+ };
+
++/*
++ * On some Panasonic models the volume up / down / mute keys send duplicate
++ * keypress events over the PS/2 kbd interface, filter these out.
++ */
++static bool panasonic_i8042_filter(unsigned char data, unsigned char str,
++ struct serio *port)
++{
++ static bool extended;
++
++ if (str & I8042_STR_AUXDATA)
++ return false;
++
++ if (data == 0xe0) {
++ extended = true;
++ return true;
++ } else if (extended) {
++ extended = false;
++
++ switch (data & 0x7f) {
++ case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */
++ case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */
++ case 0x30: /* e0 30 / e0 b0, Volume Up press / release */
++ return true;
++ default:
++ /*
++ * Report the previously filtered e0 before continuing
++ * with the next non-filtered byte.
++ */
++ serio_interrupt(port, 0xe0, 0);
++ return false;
++ }
++ }
++
++ return false;
++}
++
+ /* method access functions */
+ static int acpi_pcc_write_sset(struct pcc_acpi *pcc, int func, int val)
+ {
+@@ -762,6 +800,8 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc)
+ struct input_dev *hotk_input_dev = pcc->input_dev;
+ int rc;
+ unsigned long long result;
++ unsigned int key;
++ unsigned int updown;
+
+ rc = acpi_evaluate_integer(pcc->handle, METHOD_HKEY_QUERY,
+ NULL, &result);
+@@ -770,20 +810,27 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc)
+ return;
+ }
+
++ key = result & 0xf;
++ updown = result & 0x80; /* 0x80 == key down; 0x00 = key up */
++
+ /* hack: some firmware sends no key down for sleep / hibernate */
+- if ((result & 0xf) == 0x7 || (result & 0xf) == 0xa) {
+- if (result & 0x80)
++ if (key == 7 || key == 10) {
++ if (updown)
+ sleep_keydown_seen = 1;
+ if (!sleep_keydown_seen)
+ sparse_keymap_report_event(hotk_input_dev,
+- result & 0xf, 0x80, false);
++ key, 0x80, false);
+ }
+
+- if ((result & 0xf) == 0x7 || (result & 0xf) == 0x9 || (result & 0xf) == 0xa) {
+- if (!sparse_keymap_report_event(hotk_input_dev,
+- result & 0xf, result & 0x80, false))
+- pr_err("Unknown hotkey event: 0x%04llx\n", result);
+- }
++ /*
++ * Don't report brightness key-presses if they are also reported
++ * by the ACPI video bus.
++ */
++ if ((key == 1 || key == 2) && acpi_video_handles_brightness_key_presses())
++ return;
++
++ if (!sparse_keymap_report_event(hotk_input_dev, key, updown, false))
++ pr_err("Unknown hotkey event: 0x%04llx\n", result);
+ }
+
+ static void acpi_pcc_hotkey_notify(struct acpi_device *device, u32 event)
+@@ -997,6 +1044,7 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
+ pcc->platform = NULL;
+ }
+
++ i8042_install_filter(panasonic_i8042_filter);
+ return 0;
+
+ out_platform:
+@@ -1020,6 +1068,8 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device)
+ if (!device || !pcc)
+ return -EINVAL;
+
++ i8042_remove_filter(panasonic_i8042_filter);
++
+ if (pcc->platform) {
+ device_remove_file(&pcc->platform->dev, &dev_attr_cdpower);
+ platform_device_unregister(pcc->platform);
+diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
+index a9d2a4b98e570..4b0739f95f8b9 100644
+--- a/drivers/platform/x86/pmc_atom.c
++++ b/drivers/platform/x86/pmc_atom.c
+@@ -244,7 +244,7 @@ static void pmc_power_off(void)
+ pm1_cnt_port = acpi_base_addr + PM1_CNT;
+
+ pm1_cnt_value = inl(pm1_cnt_port);
+- pm1_cnt_value &= SLEEP_TYPE_MASK;
++ pm1_cnt_value &= ~SLEEP_TYPE_MASK;
+ pm1_cnt_value |= SLEEP_TYPE_S5;
+ pm1_cnt_value |= SLEEP_ENABLE;
+
+diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
+index 7ee010aa740aa..404bdb4cbfae4 100644
+--- a/drivers/platform/x86/samsung-laptop.c
++++ b/drivers/platform/x86/samsung-laptop.c
+@@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev,
+
+ if (value > samsung->kbd_led.max_brightness)
+ value = samsung->kbd_led.max_brightness;
+- else if (value < 0)
+- value = 0;
+
+ samsung->kbd_led_wk = value;
+ queue_work(samsung->led_workqueue, &samsung->kbd_led_work);
+diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
+index 7048133749221..336dee9485d4b 100644
+--- a/drivers/platform/x86/sony-laptop.c
++++ b/drivers/platform/x86/sony-laptop.c
+@@ -1892,14 +1892,21 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd,
+ break;
+ }
+
+- ret = sony_call_snc_handle(handle, probe_base, &result);
+- if (ret)
+- return ret;
++ /*
++ * Only probe if there is a separate probe_base, otherwise the probe call
++ * is equivalent to __sony_nc_kbd_backlight_mode_set(0), resulting in
++ * the keyboard backlight being turned off.
++ */
++ if (probe_base) {
++ ret = sony_call_snc_handle(handle, probe_base, &result);
++ if (ret)
++ return ret;
+
+- if ((handle == 0x0137 && !(result & 0x02)) ||
+- !(result & 0x01)) {
+- dprintk("no backlight keyboard found\n");
+- return 0;
++ if ((handle == 0x0137 && !(result & 0x02)) ||
++ !(result & 0x01)) {
++ dprintk("no backlight keyboard found\n");
++ return 0;
++ }
+ }
+
+ kbdbl_ctl = kzalloc(sizeof(*kbdbl_ctl), GFP_KERNEL);
+diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c
+deleted file mode 100644
+index 9072eb3026185..0000000000000
+--- a/drivers/platform/x86/tc1100-wmi.c
++++ /dev/null
+@@ -1,265 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * HP Compaq TC1100 Tablet WMI Extras Driver
+- *
+- * Copyright (C) 2007 Carlos Corbacho <carlos@strangeworlds.co.uk>
+- * Copyright (C) 2004 Jamey Hicks <jamey.hicks@hp.com>
+- * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+- */
+-
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/slab.h>
+-#include <linux/init.h>
+-#include <linux/types.h>
+-#include <linux/acpi.h>
+-#include <linux/platform_device.h>
+-
+-#define GUID "C364AC71-36DB-495A-8494-B439D472A505"
+-
+-#define TC1100_INSTANCE_WIRELESS 1
+-#define TC1100_INSTANCE_JOGDIAL 2
+-
+-MODULE_AUTHOR("Jamey Hicks, Carlos Corbacho");
+-MODULE_DESCRIPTION("HP Compaq TC1100 Tablet WMI Extras");
+-MODULE_LICENSE("GPL");
+-MODULE_ALIAS("wmi:C364AC71-36DB-495A-8494-B439D472A505");
+-
+-static struct platform_device *tc1100_device;
+-
+-struct tc1100_data {
+- u32 wireless;
+- u32 jogdial;
+-};
+-
+-#ifdef CONFIG_PM
+-static struct tc1100_data suspend_data;
+-#endif
+-
+-/* --------------------------------------------------------------------------
+- Device Management
+- -------------------------------------------------------------------------- */
+-
+-static int get_state(u32 *out, u8 instance)
+-{
+- u32 tmp;
+- acpi_status status;
+- struct acpi_buffer result = { ACPI_ALLOCATE_BUFFER, NULL };
+- union acpi_object *obj;
+-
+- if (!out)
+- return -EINVAL;
+-
+- if (instance > 2)
+- return -ENODEV;
+-
+- status = wmi_query_block(GUID, instance, &result);
+- if (ACPI_FAILURE(status))
+- return -ENODEV;
+-
+- obj = (union acpi_object *) result.pointer;
+- if (obj && obj->type == ACPI_TYPE_INTEGER) {
+- tmp = obj->integer.value;
+- } else {
+- tmp = 0;
+- }
+-
+- if (result.length > 0)
+- kfree(result.pointer);
+-
+- switch (instance) {
+- case TC1100_INSTANCE_WIRELESS:
+- *out = (tmp == 3) ? 1 : 0;
+- return 0;
+- case TC1100_INSTANCE_JOGDIAL:
+- *out = (tmp == 1) ? 0 : 1;
+- return 0;
+- default:
+- return -ENODEV;
+- }
+-}
+-
+-static int set_state(u32 *in, u8 instance)
+-{
+- u32 value;
+- acpi_status status;
+- struct acpi_buffer input;
+-
+- if (!in)
+- return -EINVAL;
+-
+- if (instance > 2)
+- return -ENODEV;
+-
+- switch (instance) {
+- case TC1100_INSTANCE_WIRELESS:
+- value = (*in) ? 1 : 2;
+- break;
+- case TC1100_INSTANCE_JOGDIAL:
+- value = (*in) ? 0 : 1;
+- break;
+- default:
+- return -ENODEV;
+- }
+-
+- input.length = sizeof(u32);
+- input.pointer = &value;
+-
+- status = wmi_set_block(GUID, instance, &input);
+- if (ACPI_FAILURE(status))
+- return -ENODEV;
+-
+- return 0;
+-}
+-
+-/* --------------------------------------------------------------------------
+- FS Interface (/sys)
+- -------------------------------------------------------------------------- */
+-
+-/*
+- * Read/ write bool sysfs macro
+- */
+-#define show_set_bool(value, instance) \
+-static ssize_t \
+-show_bool_##value(struct device *dev, struct device_attribute *attr, \
+- char *buf) \
+-{ \
+- u32 result; \
+- acpi_status status = get_state(&result, instance); \
+- if (ACPI_SUCCESS(status)) \
+- return sprintf(buf, "%d\n", result); \
+- return sprintf(buf, "Read error\n"); \
+-} \
+-\
+-static ssize_t \
+-set_bool_##value(struct device *dev, struct device_attribute *attr, \
+- const char *buf, size_t count) \
+-{ \
+- u32 tmp = simple_strtoul(buf, NULL, 10); \
+- acpi_status status = set_state(&tmp, instance); \
+- if (ACPI_FAILURE(status)) \
+- return -EINVAL; \
+- return count; \
+-} \
+-static DEVICE_ATTR(value, S_IRUGO | S_IWUSR, \
+- show_bool_##value, set_bool_##value);
+-
+-show_set_bool(wireless, TC1100_INSTANCE_WIRELESS);
+-show_set_bool(jogdial, TC1100_INSTANCE_JOGDIAL);
+-
+-static struct attribute *tc1100_attributes[] = {
+- &dev_attr_wireless.attr,
+- &dev_attr_jogdial.attr,
+- NULL
+-};
+-
+-static const struct attribute_group tc1100_attribute_group = {
+- .attrs = tc1100_attributes,
+-};
+-
+-/* --------------------------------------------------------------------------
+- Driver Model
+- -------------------------------------------------------------------------- */
+-
+-static int __init tc1100_probe(struct platform_device *device)
+-{
+- return sysfs_create_group(&device->dev.kobj, &tc1100_attribute_group);
+-}
+-
+-
+-static int tc1100_remove(struct platform_device *device)
+-{
+- sysfs_remove_group(&device->dev.kobj, &tc1100_attribute_group);
+-
+- return 0;
+-}
+-
+-#ifdef CONFIG_PM
+-static int tc1100_suspend(struct device *dev)
+-{
+- int ret;
+-
+- ret = get_state(&suspend_data.wireless, TC1100_INSTANCE_WIRELESS);
+- if (ret)
+- return ret;
+-
+- ret = get_state(&suspend_data.jogdial, TC1100_INSTANCE_JOGDIAL);
+- if (ret)
+- return ret;
+-
+- return 0;
+-}
+-
+-static int tc1100_resume(struct device *dev)
+-{
+- int ret;
+-
+- ret = set_state(&suspend_data.wireless, TC1100_INSTANCE_WIRELESS);
+- if (ret)
+- return ret;
+-
+- ret = set_state(&suspend_data.jogdial, TC1100_INSTANCE_JOGDIAL);
+- if (ret)
+- return ret;
+-
+- return 0;
+-}
+-
+-static const struct dev_pm_ops tc1100_pm_ops = {
+- .suspend = tc1100_suspend,
+- .resume = tc1100_resume,
+- .freeze = tc1100_suspend,
+- .restore = tc1100_resume,
+-};
+-#endif
+-
+-static struct platform_driver tc1100_driver = {
+- .driver = {
+- .name = "tc1100-wmi",
+-#ifdef CONFIG_PM
+- .pm = &tc1100_pm_ops,
+-#endif
+- },
+- .remove = tc1100_remove,
+-};
+-
+-static int __init tc1100_init(void)
+-{
+- int error;
+-
+- if (!wmi_has_guid(GUID))
+- return -ENODEV;
+-
+- tc1100_device = platform_device_alloc("tc1100-wmi", -1);
+- if (!tc1100_device)
+- return -ENOMEM;
+-
+- error = platform_device_add(tc1100_device);
+- if (error)
+- goto err_device_put;
+-
+- error = platform_driver_probe(&tc1100_driver, tc1100_probe);
+- if (error)
+- goto err_device_del;
+-
+- pr_info("HP Compaq TC1100 Tablet WMI Extras loaded\n");
+- return 0;
+-
+- err_device_del:
+- platform_device_del(tc1100_device);
+- err_device_put:
+- platform_device_put(tc1100_device);
+- return error;
+-}
+-
+-static void __exit tc1100_exit(void)
+-{
+- platform_device_unregister(tc1100_device);
+- platform_driver_unregister(&tc1100_driver);
+-}
+-
+-module_init(tc1100_init);
+-module_exit(tc1100_exit);
+diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
+index 9472aae72df29..76f0d04e17f30 100644
+--- a/drivers/platform/x86/think-lmi.c
++++ b/drivers/platform/x86/think-lmi.c
+@@ -504,7 +504,7 @@ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *at
+ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+ {
+ struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+- char *item, *value;
++ char *item, *value, *p;
+ int ret;
+
+ ret = tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID);
+@@ -514,10 +514,15 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a
+ /* validate and split from `item,value` -> `value` */
+ value = strpbrk(item, ",");
+ if (!value || value == item || !strlen(value + 1))
+- return -EINVAL;
+-
+- ret = sysfs_emit(buf, "%s\n", value + 1);
++ ret = -EINVAL;
++ else {
++ /* On Workstations remove the Options part after the value */
++ p = strchrnul(value, ';');
++ *p = '\0';
++ ret = sysfs_emit(buf, "%s\n", value + 1);
++ }
+ kfree(item);
++
+ return ret;
+ }
+
+@@ -525,12 +530,23 @@ static ssize_t possible_values_show(struct kobject *kobj, struct kobj_attribute
+ {
+ struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+
+- if (!tlmi_priv.can_get_bios_selections)
+- return -EOPNOTSUPP;
+-
+ return sysfs_emit(buf, "%s\n", setting->possible_values);
+ }
+
++static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr,
++ char *buf)
++{
++ struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
++
++ if (setting->possible_values) {
++ /* Figure out what setting type is as BIOS does not return this */
++ if (strchr(setting->possible_values, ';'))
++ return sysfs_emit(buf, "enumeration\n");
++ }
++ /* Anything else is going to be a string */
++ return sysfs_emit(buf, "string\n");
++}
++
+ static ssize_t current_value_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+@@ -601,14 +617,30 @@ static struct kobj_attribute attr_possible_values = __ATTR_RO(possible_values);
+
+ static struct kobj_attribute attr_current_val = __ATTR_RW_MODE(current_value, 0600);
+
++static struct kobj_attribute attr_type = __ATTR_RO(type);
++
++static umode_t attr_is_visible(struct kobject *kobj,
++ struct attribute *attr, int n)
++{
++ struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
++
++ /* We don't want to display possible_values attributes if not available */
++ if ((attr == &attr_possible_values.attr) && (!setting->possible_values))
++ return 0;
++
++ return attr->mode;
++}
++
+ static struct attribute *tlmi_attrs[] = {
+ &attr_displ_name.attr,
+ &attr_current_val.attr,
+ &attr_possible_values.attr,
++ &attr_type.attr,
+ NULL
+ };
+
+ static const struct attribute_group tlmi_attr_group = {
++ .is_visible = attr_is_visible,
+ .attrs = tlmi_attrs,
+ };
+
+@@ -888,8 +920,10 @@ static int tlmi_analyze(void)
+ break;
+ if (!item)
+ break;
+- if (!*item)
++ if (!*item) {
++ kfree(item);
+ continue;
++ }
+
+ /* It is not allowed to have '/' for file name. Convert it into '\'. */
+ strreplace(item, '/', '\\');
+@@ -902,6 +936,7 @@ static int tlmi_analyze(void)
+ setting = kzalloc(sizeof(*setting), GFP_KERNEL);
+ if (!setting) {
+ ret = -ENOMEM;
++ kfree(item);
+ goto fail_clear_attr;
+ }
+ setting->index = i;
+@@ -913,10 +948,37 @@ static int tlmi_analyze(void)
+ if (ret || !setting->possible_values)
+ pr_info("Error retrieving possible values for %d : %s\n",
+ i, setting->display_name);
++ } else {
++ /*
++ * Older Thinkstations don't support the bios_selections API.
++ * Instead they store this as a [Optional:Option1,Option2] section of the
++ * name string.
++ * Try and pull that out if it's available.
++ */
++ char *optitem, *optstart, *optend;
++
++ if (!tlmi_setting(setting->index, &optitem, LENOVO_BIOS_SETTING_GUID)) {
++ optstart = strstr(optitem, "[Optional:");
++ if (optstart) {
++ optstart += strlen("[Optional:");
++ optend = strstr(optstart, "]");
++ if (optend)
++ setting->possible_values =
++ kstrndup(optstart, optend - optstart,
++ GFP_KERNEL);
++ }
++ kfree(optitem);
++ }
+ }
++ /*
++ * firmware-attributes requires that possible_values are separated by ';' but
++ * Lenovo FW uses ','. Replace appropriately.
++ */
++ if (setting->possible_values)
++ strreplace(setting->possible_values, ',', ';');
++
+ kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
+ tlmi_priv.setting[i] = setting;
+- tlmi_priv.settings_count++;
+ kfree(item);
+ }
+
+@@ -983,7 +1045,12 @@ static void tlmi_remove(struct wmi_device *wdev)
+
+ static int tlmi_probe(struct wmi_device *wdev, const void *context)
+ {
+- tlmi_analyze();
++ int ret;
++
++ ret = tlmi_analyze();
++ if (ret)
++ return ret;
++
+ return tlmi_sysfs_init();
+ }
+
+diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h
+index f8e26823075fd..2ce5086a5af27 100644
+--- a/drivers/platform/x86/think-lmi.h
++++ b/drivers/platform/x86/think-lmi.h
+@@ -55,7 +55,6 @@ struct tlmi_attr_setting {
+ struct think_lmi {
+ struct wmi_device *wmi_device;
+
+- int settings_count;
+ bool can_set_bios_settings;
+ bool can_get_bios_selections;
+ bool can_set_bios_password;
+diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
+index 50ff04c84650c..3dc055ce6e61b 100644
+--- a/drivers/platform/x86/thinkpad_acpi.c
++++ b/drivers/platform/x86/thinkpad_acpi.c
+@@ -1178,15 +1178,6 @@ static int tpacpi_rfk_update_swstate(const struct tpacpi_rfk *tp_rfk)
+ return status;
+ }
+
+-/* Query FW and update rfkill sw state for all rfkill switches */
+-static void tpacpi_rfk_update_swstate_all(void)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < TPACPI_RFK_SW_MAX; i++)
+- tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[i]);
+-}
+-
+ /*
+ * Sync the HW-blocking state of all rfkill switches,
+ * do notice it causes the rfkill core to schedule uevents
+@@ -3129,9 +3120,6 @@ static void tpacpi_send_radiosw_update(void)
+ if (wlsw == TPACPI_RFK_RADIO_OFF)
+ tpacpi_rfk_update_hwblock_state(true);
+
+- /* Sync sw blocking state */
+- tpacpi_rfk_update_swstate_all();
+-
+ /* Sync hw blocking state last if it is hw-unblocked */
+ if (wlsw == TPACPI_RFK_RADIO_ON)
+ tpacpi_rfk_update_hwblock_state(false);
+@@ -8853,6 +8841,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
+ TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (1st gen) */
+ TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (2nd gen) */
+ TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (3nd gen) */
++ TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */
+ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */
+ TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */
+ };
+@@ -9145,7 +9134,7 @@ static int fan_write_cmd_level(const char *cmd, int *rc)
+
+ if (strlencmp(cmd, "level auto") == 0)
+ level = TP_EC_FAN_AUTO;
+- else if ((strlencmp(cmd, "level disengaged") == 0) |
++ else if ((strlencmp(cmd, "level disengaged") == 0) ||
+ (strlencmp(cmd, "level full-speed") == 0))
+ level = TP_EC_FAN_FULLSPEED;
+ else if (sscanf(cmd, "level %d", &level) != 1)
+diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
+index 033f797861d8a..90aee8b87bbe0 100644
+--- a/drivers/platform/x86/touchscreen_dmi.c
++++ b/drivers/platform/x86/touchscreen_dmi.c
+@@ -255,6 +255,23 @@ static const struct ts_dmi_data connect_tablet9_data = {
+ .properties = connect_tablet9_props,
+ };
+
++static const struct property_entry csl_panther_tab_hd_props[] = {
++ PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
++ PROPERTY_ENTRY_U32("touchscreen-min-y", 20),
++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1980),
++ PROPERTY_ENTRY_U32("touchscreen-size-y", 1526),
++ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
++ PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-csl-panther-tab-hd.fw"),
++ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
++ { }
++};
++
++static const struct ts_dmi_data csl_panther_tab_hd_data = {
++ .acpi_name = "MSSL1680:00",
++ .properties = csl_panther_tab_hd_props,
++};
++
+ static const struct property_entry cube_iwork8_air_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 3),
+@@ -310,6 +327,22 @@ static const struct ts_dmi_data dexp_ursus_7w_data = {
+ .properties = dexp_ursus_7w_props,
+ };
+
++static const struct property_entry dexp_ursus_kx210i_props[] = {
++ PROPERTY_ENTRY_U32("touchscreen-min-x", 5),
++ PROPERTY_ENTRY_U32("touchscreen-min-y", 2),
++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1720),
++ PROPERTY_ENTRY_U32("touchscreen-size-y", 1137),
++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-dexp-ursus-kx210i.fw"),
++ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
++ PROPERTY_ENTRY_BOOL("silead,home-button"),
++ { }
++};
++
++static const struct ts_dmi_data dexp_ursus_kx210i_data = {
++ .acpi_name = "MSSL1680:00",
++ .properties = dexp_ursus_kx210i_props,
++};
++
+ static const struct property_entry digma_citi_e200_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1980),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1500),
+@@ -364,6 +397,11 @@ static const struct ts_dmi_data glavey_tm800a550l_data = {
+ .properties = glavey_tm800a550l_props,
+ };
+
++static const struct ts_dmi_data gdix1002_00_upside_down_data = {
++ .acpi_name = "GDIX1002:00",
++ .properties = gdix1001_upside_down_props,
++};
++
+ static const struct property_entry gp_electronic_t701_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 960),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 640),
+@@ -773,6 +811,37 @@ static const struct ts_dmi_data predia_basic_data = {
+ .properties = predia_basic_props,
+ };
+
++static const struct property_entry rca_cambio_w101_v2_props[] = {
++ PROPERTY_ENTRY_U32("touchscreen-min-x", 4),
++ PROPERTY_ENTRY_U32("touchscreen-min-y", 20),
++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1644),
++ PROPERTY_ENTRY_U32("touchscreen-size-y", 874),
++ PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rca-cambio-w101-v2.fw"),
++ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
++ { }
++};
++
++static const struct ts_dmi_data rca_cambio_w101_v2_data = {
++ .acpi_name = "MSSL1680:00",
++ .properties = rca_cambio_w101_v2_props,
++};
++
++static const struct property_entry rwc_nanote_p8_props[] = {
++ PROPERTY_ENTRY_U32("touchscreen-min-y", 46),
++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1728),
++ PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
++ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rwc-nanote-p8.fw"),
++ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
++ { }
++};
++
++static const struct ts_dmi_data rwc_nanote_p8_data = {
++ .acpi_name = "MSSL1680:00",
++ .properties = rwc_nanote_p8_props,
++};
++
+ static const struct property_entry schneider_sct101ctm_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1715),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
+@@ -1025,6 +1094,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
+ DMI_MATCH(DMI_BIOS_DATE, "05/07/2016"),
+ },
+ },
++ {
++ /* Chuwi Vi8 (CWI501) */
++ .driver_data = (void *)&chuwi_vi8_data,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "i86"),
++ DMI_MATCH(DMI_BIOS_VERSION, "CHUWI.W86JLBNR01"),
++ },
++ },
+ {
+ /* Chuwi Vi8 (CWI506) */
+ .driver_data = (void *)&chuwi_vi8_data,
+@@ -1069,6 +1147,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Tablet 9"),
+ },
+ },
++ {
++ /* CSL Panther Tab HD */
++ .driver_data = (void *)&csl_panther_tab_hd_data,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "CSL Computer GmbH & Co. KG"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "CSL Panther Tab HD"),
++ },
++ },
+ {
+ /* CUBE iwork8 Air */
+ .driver_data = (void *)&cube_iwork8_air_data,
+@@ -1096,6 +1182,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "7W"),
+ },
+ },
++ {
++ /* DEXP Ursus KX210i */
++ .driver_data = (void *)&dexp_ursus_kx210i_data,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "INSYDE Corp."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "S107I"),
++ },
++ },
+ {
+ /* Digma Citi E200 */
+ .driver_data = (void *)&digma_citi_e200_data,
+@@ -1215,6 +1309,18 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
+ DMI_MATCH(DMI_BIOS_VERSION, "jumperx.T87.KFBNEEA"),
+ },
+ },
++ {
++ /* Juno Tablet */
++ .driver_data = (void *)&gdix1002_00_upside_down_data,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Default string"),
++ /* Both product- and board-name being "Default string" is somewhat rare */
++ DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
++ DMI_MATCH(DMI_BOARD_NAME, "Default string"),
++ /* Above matches are too generic, add partial bios-version match */
++ DMI_MATCH(DMI_BIOS_VERSION, "JP2V1."),
++ },
++ },
+ {
+ /* Mediacom WinPad 7.0 W700 (same hw as Wintron surftab 7") */
+ .driver_data = (void *)&trekstor_surftab_wintron70_data,
+@@ -1379,6 +1485,24 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
+ DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"),
+ },
+ },
++ {
++ /* RCA Cambio W101 v2 */
++ /* https://github.com/onitake/gsl-firmware/discussions/193 */
++ .driver_data = (void *)&rca_cambio_w101_v2_data,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "RCA"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "W101SA23T1"),
++ },
++ },
++ {
++ /* RWC NANOTE P8 */
++ .driver_data = (void *)&rwc_nanote_p8_data,
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Default string"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "AY07J"),
++ DMI_MATCH(DMI_PRODUCT_SKU, "0001")
++ },
++ },
+ {
+ /* Schneider SCT101CTM */
+ .driver_data = (void *)&schneider_sct101ctm_data,
+diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
+index a76313006bdc4..7ce0408d3bfdd 100644
+--- a/drivers/platform/x86/wmi.c
++++ b/drivers/platform/x86/wmi.c
+@@ -39,7 +39,7 @@ MODULE_LICENSE("GPL");
+ static LIST_HEAD(wmi_block_list);
+
+ struct guid_block {
+- char guid[16];
++ guid_t guid;
+ union {
+ char object_id[2];
+ struct {
+@@ -51,6 +51,11 @@ struct guid_block {
+ u8 flags;
+ };
+
++enum { /* wmi_block flags */
++ WMI_READ_TAKES_NO_ARGS,
++ WMI_PROBED,
++};
++
+ struct wmi_block {
+ struct wmi_device dev;
+ struct list_head list;
+@@ -61,8 +66,7 @@ struct wmi_block {
+ wmi_notify_handler handler;
+ void *handler_data;
+ u64 req_buf_size;
+-
+- bool read_takes_no_args;
++ unsigned long flags;
+ };
+
+
+@@ -120,7 +124,7 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
+ list_for_each_entry(wblock, &wmi_block_list, list) {
+ block = &wblock->gblock;
+
+- if (memcmp(block->guid, &guid_input, 16) == 0) {
++ if (guid_equal(&block->guid, &guid_input)) {
+ if (out)
+ *out = wblock;
+ return true;
+@@ -129,11 +133,20 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
+ return false;
+ }
+
++static bool guid_parse_and_compare(const char *string, const guid_t *guid)
++{
++ guid_t guid_input;
++
++ if (guid_parse(string, &guid_input))
++ return false;
++
++ return guid_equal(&guid_input, guid);
++}
++
+ static const void *find_guid_context(struct wmi_block *wblock,
+ struct wmi_driver *wdriver)
+ {
+ const struct wmi_device_id *id;
+- guid_t guid_input;
+
+ if (wblock == NULL || wdriver == NULL)
+ return NULL;
+@@ -142,9 +155,7 @@ static const void *find_guid_context(struct wmi_block *wblock,
+
+ id = wdriver->id_table;
+ while (*id->guid_string) {
+- if (guid_parse(id->guid_string, &guid_input))
+- continue;
+- if (!memcmp(wblock->gblock.guid, &guid_input, 16))
++ if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid))
+ return id->context;
+ id++;
+ }
+@@ -325,7 +336,7 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance,
+ wq_params[0].type = ACPI_TYPE_INTEGER;
+ wq_params[0].integer.value = instance;
+
+- if (instance == 0 && wblock->read_takes_no_args)
++ if (instance == 0 && test_bit(WMI_READ_TAKES_NO_ARGS, &wblock->flags))
+ input.count = 0;
+
+ /*
+@@ -353,7 +364,14 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance,
+ * the WQxx method failed - we should disable collection anyway.
+ */
+ if ((block->flags & ACPI_WMI_EXPENSIVE) && ACPI_SUCCESS(wc_status)) {
+- status = acpi_execute_simple_method(handle, wc_method, 0);
++ /*
++ * Ignore whether this WCxx call succeeds or not since
++ * the previously executed WQxx method call might have
++ * succeeded, and returning the failing status code
++ * of this call would throw away the result of the WQxx
++ * call, potentially leaking memory.
++ */
++ acpi_execute_simple_method(handle, wc_method, 0);
+ }
+
+ return status;
+@@ -449,7 +467,7 @@ EXPORT_SYMBOL_GPL(wmi_set_block);
+
+ static void wmi_dump_wdg(const struct guid_block *g)
+ {
+- pr_info("%pUL:\n", g->guid);
++ pr_info("%pUL:\n", &g->guid);
+ if (g->flags & ACPI_WMI_EVENT)
+ pr_info("\tnotify_id: 0x%02X\n", g->notify_id);
+ else
+@@ -531,7 +549,7 @@ wmi_notify_handler handler, void *data)
+ list_for_each_entry(block, &wmi_block_list, list) {
+ acpi_status wmi_status;
+
+- if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
++ if (guid_equal(&block->gblock.guid, &guid_input)) {
+ if (block->handler &&
+ block->handler != wmi_notify_debug)
+ return AE_ALREADY_ACQUIRED;
+@@ -571,7 +589,7 @@ acpi_status wmi_remove_notify_handler(const char *guid)
+ list_for_each_entry(block, &wmi_block_list, list) {
+ acpi_status wmi_status;
+
+- if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
++ if (guid_equal(&block->gblock.guid, &guid_input)) {
+ if (!block->handler ||
+ block->handler == wmi_notify_debug)
+ return AE_NULL_ENTRY;
+@@ -607,7 +625,6 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
+ {
+ struct acpi_object_list input;
+ union acpi_object params[1];
+- struct guid_block *gblock;
+ struct wmi_block *wblock;
+
+ input.count = 1;
+@@ -616,7 +633,7 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
+ params[0].integer.value = event;
+
+ list_for_each_entry(wblock, &wmi_block_list, list) {
+- gblock = &wblock->gblock;
++ struct guid_block *gblock = &wblock->gblock;
+
+ if ((gblock->flags & ACPI_WMI_EVENT) &&
+ (gblock->notify_id == event))
+@@ -669,6 +686,11 @@ static struct wmi_device *dev_to_wdev(struct device *dev)
+ return container_of(dev, struct wmi_device, dev);
+ }
+
++static inline struct wmi_driver *drv_to_wdrv(struct device_driver *drv)
++{
++ return container_of(drv, struct wmi_driver, driver);
++}
++
+ /*
+ * sysfs interface
+ */
+@@ -677,7 +699,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ {
+ struct wmi_block *wblock = dev_to_wblock(dev);
+
+- return sprintf(buf, "wmi:%pUL\n", wblock->gblock.guid);
++ return sprintf(buf, "wmi:%pUL\n", &wblock->gblock.guid);
+ }
+ static DEVICE_ATTR_RO(modalias);
+
+@@ -686,7 +708,7 @@ static ssize_t guid_show(struct device *dev, struct device_attribute *attr,
+ {
+ struct wmi_block *wblock = dev_to_wblock(dev);
+
+- return sprintf(buf, "%pUL\n", wblock->gblock.guid);
++ return sprintf(buf, "%pUL\n", &wblock->gblock.guid);
+ }
+ static DEVICE_ATTR_RO(guid);
+
+@@ -769,10 +791,10 @@ static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
+ {
+ struct wmi_block *wblock = dev_to_wblock(dev);
+
+- if (add_uevent_var(env, "MODALIAS=wmi:%pUL", wblock->gblock.guid))
++ if (add_uevent_var(env, "MODALIAS=wmi:%pUL", &wblock->gblock.guid))
+ return -ENOMEM;
+
+- if (add_uevent_var(env, "WMI_GUID=%pUL", wblock->gblock.guid))
++ if (add_uevent_var(env, "WMI_GUID=%pUL", &wblock->gblock.guid))
+ return -ENOMEM;
+
+ return 0;
+@@ -787,8 +809,7 @@ static void wmi_dev_release(struct device *dev)
+
+ static int wmi_dev_match(struct device *dev, struct device_driver *driver)
+ {
+- struct wmi_driver *wmi_driver =
+- container_of(driver, struct wmi_driver, driver);
++ struct wmi_driver *wmi_driver = drv_to_wdrv(driver);
+ struct wmi_block *wblock = dev_to_wblock(dev);
+ const struct wmi_device_id *id = wmi_driver->id_table;
+
+@@ -796,11 +817,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver)
+ return 0;
+
+ while (*id->guid_string) {
+- guid_t driver_guid;
+-
+- if (WARN_ON(guid_parse(id->guid_string, &driver_guid)))
+- continue;
+- if (!memcmp(&driver_guid, wblock->gblock.guid, 16))
++ if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid))
+ return 1;
+
+ id++;
+@@ -885,8 +902,7 @@ static long wmi_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ }
+
+ /* let the driver do any filtering and do the call */
+- wdriver = container_of(wblock->dev.dev.driver,
+- struct wmi_driver, driver);
++ wdriver = drv_to_wdrv(wblock->dev.dev.driver);
+ if (!try_module_get(wdriver->driver.owner)) {
+ ret = -EBUSY;
+ goto out_ioctl;
+@@ -919,8 +935,7 @@ static const struct file_operations wmi_fops = {
+ static int wmi_dev_probe(struct device *dev)
+ {
+ struct wmi_block *wblock = dev_to_wblock(dev);
+- struct wmi_driver *wdriver =
+- container_of(dev->driver, struct wmi_driver, driver);
++ struct wmi_driver *wdriver = drv_to_wdrv(dev->driver);
+ int ret = 0;
+ char *buf;
+
+@@ -968,6 +983,7 @@ static int wmi_dev_probe(struct device *dev)
+ }
+ }
+
++ set_bit(WMI_PROBED, &wblock->flags);
+ return 0;
+
+ probe_misc_failure:
+@@ -983,8 +999,9 @@ probe_failure:
+ static void wmi_dev_remove(struct device *dev)
+ {
+ struct wmi_block *wblock = dev_to_wblock(dev);
+- struct wmi_driver *wdriver =
+- container_of(dev->driver, struct wmi_driver, driver);
++ struct wmi_driver *wdriver = drv_to_wdrv(dev->driver);
++
++ clear_bit(WMI_PROBED, &wblock->flags);
+
+ if (wdriver->filter_callback) {
+ misc_deregister(&wblock->char_dev);
+@@ -1031,7 +1048,6 @@ static const struct device_type wmi_type_data = {
+ };
+
+ static int wmi_create_device(struct device *wmi_bus_dev,
+- const struct guid_block *gblock,
+ struct wmi_block *wblock,
+ struct acpi_device *device)
+ {
+@@ -1039,12 +1055,12 @@ static int wmi_create_device(struct device *wmi_bus_dev,
+ char method[5];
+ int result;
+
+- if (gblock->flags & ACPI_WMI_EVENT) {
++ if (wblock->gblock.flags & ACPI_WMI_EVENT) {
+ wblock->dev.dev.type = &wmi_type_event;
+ goto out_init;
+ }
+
+- if (gblock->flags & ACPI_WMI_METHOD) {
++ if (wblock->gblock.flags & ACPI_WMI_METHOD) {
+ wblock->dev.dev.type = &wmi_type_method;
+ mutex_init(&wblock->char_mutex);
+ goto out_init;
+@@ -1079,7 +1095,7 @@ static int wmi_create_device(struct device *wmi_bus_dev,
+ * laptops, WQxx may not be a method at all.)
+ */
+ if (info->type != ACPI_TYPE_METHOD || info->param_count == 0)
+- wblock->read_takes_no_args = true;
++ set_bit(WMI_READ_TAKES_NO_ARGS, &wblock->flags);
+
+ kfree(info);
+
+@@ -1094,7 +1110,7 @@ static int wmi_create_device(struct device *wmi_bus_dev,
+ wblock->dev.dev.bus = &wmi_bus_type;
+ wblock->dev.dev.parent = wmi_bus_dev;
+
+- dev_set_name(&wblock->dev.dev, "%pUL", gblock->guid);
++ dev_set_name(&wblock->dev.dev, "%pUL", &wblock->gblock.guid);
+
+ device_initialize(&wblock->dev.dev);
+
+@@ -1114,12 +1130,12 @@ static void wmi_free_devices(struct acpi_device *device)
+ }
+ }
+
+-static bool guid_already_parsed(struct acpi_device *device, const u8 *guid)
++static bool guid_already_parsed(struct acpi_device *device, const guid_t *guid)
+ {
+ struct wmi_block *wblock;
+
+ list_for_each_entry(wblock, &wmi_block_list, list) {
+- if (memcmp(wblock->gblock.guid, guid, 16) == 0) {
++ if (guid_equal(&wblock->gblock.guid, guid)) {
+ /*
+ * Because we historically didn't track the relationship
+ * between GUIDs and ACPI nodes, we don't know whether
+@@ -1174,7 +1190,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
+ * case yet, so for now, we'll just ignore the duplicate
+ * for device creation.
+ */
+- if (guid_already_parsed(device, gblock[i].guid))
++ if (guid_already_parsed(device, &gblock[i].guid))
+ continue;
+
+ wblock = kzalloc(sizeof(struct wmi_block), GFP_KERNEL);
+@@ -1186,7 +1202,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
+ wblock->acpi_device = device;
+ wblock->gblock = gblock[i];
+
+- retval = wmi_create_device(wmi_bus_dev, &gblock[i], wblock, device);
++ retval = wmi_create_device(wmi_bus_dev, wblock, device);
+ if (retval) {
+ kfree(wblock);
+ continue;
+@@ -1211,7 +1227,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
+ retval = device_add(&wblock->dev.dev);
+ if (retval) {
+ dev_err(wmi_bus_dev, "failed to register %pUL\n",
+- wblock->gblock.guid);
++ &wblock->gblock.guid);
+ if (debug_event)
+ wmi_method_enable(wblock, 0);
+ list_del(&wblock->list);
+@@ -1268,12 +1284,11 @@ acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address,
+ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
+ void *context)
+ {
+- struct guid_block *block;
+ struct wmi_block *wblock;
+ bool found_it = false;
+
+ list_for_each_entry(wblock, &wmi_block_list, list) {
+- block = &wblock->gblock;
++ struct guid_block *block = &wblock->gblock;
+
+ if (wblock->acpi_device->handle == handle &&
+ (block->flags & ACPI_WMI_EVENT) &&
+@@ -1288,16 +1303,13 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
+ return;
+
+ /* If a driver is bound, then notify the driver. */
+- if (wblock->dev.dev.driver) {
+- struct wmi_driver *driver;
++ if (test_bit(WMI_PROBED, &wblock->flags) && wblock->dev.dev.driver) {
++ struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver);
+ struct acpi_object_list input;
+ union acpi_object params[1];
+ struct acpi_buffer evdata = { ACPI_ALLOCATE_BUFFER, NULL };
+ acpi_status status;
+
+- driver = container_of(wblock->dev.dev.driver,
+- struct wmi_driver, driver);
+-
+ input.count = 1;
+ input.pointer = params;
+ params[0].type = ACPI_TYPE_INTEGER;
+@@ -1322,7 +1334,7 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
+ }
+
+ if (debug_event)
+- pr_info("DEBUG Event GUID: %pUL\n", wblock->gblock.guid);
++ pr_info("DEBUG Event GUID: %pUL\n", &wblock->gblock.guid);
+
+ acpi_bus_generate_netlink_event(
+ wblock->acpi_device->pnp.device_class,
+diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
+index 4df5aa6a309c3..6a60c5d83383b 100644
+--- a/drivers/pnp/core.c
++++ b/drivers/pnp/core.c
+@@ -148,14 +148,14 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id,
+ dev->dev.coherent_dma_mask = dev->dma_mask;
+ dev->dev.release = &pnp_release_device;
+
+- dev_set_name(&dev->dev, "%02x:%02x", dev->protocol->number, dev->number);
+-
+ dev_id = pnp_add_id(dev, pnpid);
+ if (!dev_id) {
+ kfree(dev);
+ return NULL;
+ }
+
++ dev_set_name(&dev->dev, "%02x:%02x", dev->protocol->number, dev->number);
++
+ return dev;
+ }
+
+diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c
+index 08d0a07b58ef2..c7624d7611a7e 100644
+--- a/drivers/power/reset/arm-versatile-reboot.c
++++ b/drivers/power/reset/arm-versatile-reboot.c
+@@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void)
+ versatile_reboot_type = (enum versatile_reboot)reboot_id->data;
+
+ syscon_regmap = syscon_node_to_regmap(np);
++ of_node_put(np);
+ if (IS_ERR(syscon_regmap))
+ return PTR_ERR(syscon_regmap);
+
+diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c
+index 026649409135c..64def79d557a8 100644
+--- a/drivers/power/reset/at91-reset.c
++++ b/drivers/power/reset/at91-reset.c
+@@ -193,7 +193,7 @@ static int __init at91_reset_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ reset->rstc_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL);
+- if (!reset->rstc_base) {
++ if (IS_ERR(reset->rstc_base)) {
+ dev_err(&pdev->dev, "Could not map reset controller address\n");
+ return -ENODEV;
+ }
+@@ -203,7 +203,7 @@ static int __init at91_reset_probe(struct platform_device *pdev)
+ for_each_matching_node_and_match(np, at91_ramc_of_match, &match) {
+ reset->ramc_lpr = (u32)match->data;
+ reset->ramc_base[idx] = devm_of_iomap(&pdev->dev, np, 0, NULL);
+- if (!reset->ramc_base[idx]) {
++ if (IS_ERR(reset->ramc_base[idx])) {
+ dev_err(&pdev->dev, "Could not map ram controller address\n");
+ of_node_put(np);
+ return -ENODEV;
+diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c
+index 90e35c07240ae..b7f7a8225f22e 100644
+--- a/drivers/power/reset/gemini-poweroff.c
++++ b/drivers/power/reset/gemini-poweroff.c
+@@ -107,8 +107,8 @@ static int gemini_poweroff_probe(struct platform_device *pdev)
+ return PTR_ERR(gpw->base);
+
+ irq = platform_get_irq(pdev, 0);
+- if (!irq)
+- return -EINVAL;
++ if (irq < 0)
++ return irq;
+
+ gpw->dev = dev;
+
+diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c
+index 8688c8ba8894c..81be33c041d6b 100644
+--- a/drivers/power/reset/ltc2952-poweroff.c
++++ b/drivers/power/reset/ltc2952-poweroff.c
+@@ -161,8 +161,8 @@ static void ltc2952_poweroff_kill(void)
+
+ static void ltc2952_poweroff_default(struct ltc2952_poweroff *data)
+ {
+- data->wde_interval = 300L * 1E6L;
+- data->trigger_delay = ktime_set(2, 500L*1E6L);
++ data->wde_interval = 300L * NSEC_PER_MSEC;
++ data->trigger_delay = ktime_set(2, 500L * NSEC_PER_MSEC);
+
+ hrtimer_init(&data->timer_trigger, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ data->timer_trigger.function = ltc2952_poweroff_timer_trigger;
+diff --git a/drivers/power/reset/mt6323-poweroff.c b/drivers/power/reset/mt6323-poweroff.c
+index 0532803e6cbc4..d90e76fcb9383 100644
+--- a/drivers/power/reset/mt6323-poweroff.c
++++ b/drivers/power/reset/mt6323-poweroff.c
+@@ -57,6 +57,9 @@ static int mt6323_pwrc_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -EINVAL;
++
+ pwrc->base = res->start;
+ pwrc->regmap = mt6397_chip->regmap;
+ pwrc->dev = &pdev->dev;
+diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c
+index b6c9111d77d7d..896309d3cadfe 100644
+--- a/drivers/power/supply/ab8500_btemp.c
++++ b/drivers/power/supply/ab8500_btemp.c
+@@ -902,10 +902,8 @@ static int ab8500_btemp_get_ext_psy_data(struct device *dev, void *data)
+ */
+ static void ab8500_btemp_external_power_changed(struct power_supply *psy)
+ {
+- struct ab8500_btemp *di = power_supply_get_drvdata(psy);
+-
+- class_for_each_device(power_supply_class, NULL,
+- di->btemp_psy, ab8500_btemp_get_ext_psy_data);
++ class_for_each_device(power_supply_class, NULL, psy,
++ ab8500_btemp_get_ext_psy_data);
+ }
+
+ /* ab8500 btemp driver interrupts and their respective isr */
+diff --git a/drivers/power/supply/ab8500_chargalg.c b/drivers/power/supply/ab8500_chargalg.c
+index ff4b26b1cecae..b809fa5abbbaf 100644
+--- a/drivers/power/supply/ab8500_chargalg.c
++++ b/drivers/power/supply/ab8500_chargalg.c
+@@ -2019,11 +2019,11 @@ static int ab8500_chargalg_probe(struct platform_device *pdev)
+ psy_cfg.drv_data = di;
+
+ /* Initilialize safety timer */
+- hrtimer_init(&di->safety_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
++ hrtimer_init(&di->safety_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ di->safety_timer.function = ab8500_chargalg_safety_timer_expired;
+
+ /* Initilialize maintenance timer */
+- hrtimer_init(&di->maintenance_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
++ hrtimer_init(&di->maintenance_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ di->maintenance_timer.function =
+ ab8500_chargalg_maintenance_timer_expired;
+
+diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c
+index 15eadaf46f144..a4f766fc7c9d7 100644
+--- a/drivers/power/supply/ab8500_charger.c
++++ b/drivers/power/supply/ab8500_charger.c
+@@ -3726,7 +3726,14 @@ static int __init ab8500_charger_init(void)
+ if (ret)
+ return ret;
+
+- return platform_driver_register(&ab8500_charger_driver);
++ ret = platform_driver_register(&ab8500_charger_driver);
++ if (ret) {
++ platform_unregister_drivers(ab8500_charger_component_drivers,
++ ARRAY_SIZE(ab8500_charger_component_drivers));
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static void __exit ab8500_charger_exit(void)
+diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c
+index 05fe9724ba508..eb7eac23da70f 100644
+--- a/drivers/power/supply/ab8500_fg.c
++++ b/drivers/power/supply/ab8500_fg.c
+@@ -2384,10 +2384,8 @@ out:
+ */
+ static void ab8500_fg_external_power_changed(struct power_supply *psy)
+ {
+- struct ab8500_fg *di = power_supply_get_drvdata(psy);
+-
+- class_for_each_device(power_supply_class, NULL,
+- di->fg_psy, ab8500_fg_get_ext_psy_data);
++ class_for_each_device(power_supply_class, NULL, psy,
++ ab8500_fg_get_ext_psy_data);
+ }
+
+ /**
+@@ -2545,8 +2543,10 @@ static int ab8500_fg_sysfs_init(struct ab8500_fg *di)
+ ret = kobject_init_and_add(&di->fg_kobject,
+ &ab8500_fg_ktype,
+ NULL, "battery");
+- if (ret < 0)
++ if (ret < 0) {
++ kobject_put(&di->fg_kobject);
+ dev_err(di->dev, "failed to create sysfs entry\n");
++ }
+
+ return ret;
+ }
+diff --git a/drivers/power/supply/adp5061.c b/drivers/power/supply/adp5061.c
+index 003557043ab3a..daee1161c3059 100644
+--- a/drivers/power/supply/adp5061.c
++++ b/drivers/power/supply/adp5061.c
+@@ -427,11 +427,11 @@ static int adp5061_get_chg_type(struct adp5061_state *st,
+ if (ret < 0)
+ return ret;
+
+- chg_type = adp5061_chg_type[ADP5061_CHG_STATUS_1_CHG_STATUS(status1)];
+- if (chg_type > ADP5061_CHG_FAST_CV)
++ chg_type = ADP5061_CHG_STATUS_1_CHG_STATUS(status1);
++ if (chg_type >= ARRAY_SIZE(adp5061_chg_type))
+ val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+ else
+- val->intval = chg_type;
++ val->intval = adp5061_chg_type[chg_type];
+
+ return ret;
+ }
+diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c
+index 18a9db0df4b1f..335e12cc5e2f9 100644
+--- a/drivers/power/supply/axp20x_battery.c
++++ b/drivers/power/supply/axp20x_battery.c
+@@ -186,7 +186,6 @@ static int axp20x_battery_get_prop(struct power_supply *psy,
+ union power_supply_propval *val)
+ {
+ struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy);
+- struct iio_channel *chan;
+ int ret = 0, reg, val1;
+
+ switch (psp) {
+@@ -266,12 +265,12 @@ static int axp20x_battery_get_prop(struct power_supply *psy,
+ if (ret)
+ return ret;
+
+- if (reg & AXP20X_PWR_STATUS_BAT_CHARGING)
+- chan = axp20x_batt->batt_chrg_i;
+- else
+- chan = axp20x_batt->batt_dischrg_i;
+-
+- ret = iio_read_channel_processed(chan, &val->intval);
++ if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) {
++ ret = iio_read_channel_processed(axp20x_batt->batt_chrg_i, &val->intval);
++ } else {
++ ret = iio_read_channel_processed(axp20x_batt->batt_dischrg_i, &val1);
++ val->intval = -val1;
++ }
+ if (ret)
+ return ret;
+
+diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c
+index b9553be9bed56..22378dad4d9fc 100644
+--- a/drivers/power/supply/axp288_charger.c
++++ b/drivers/power/supply/axp288_charger.c
+@@ -41,11 +41,11 @@
+ #define VBUS_ISPOUT_CUR_LIM_1500MA 0x1 /* 1500mA */
+ #define VBUS_ISPOUT_CUR_LIM_2000MA 0x2 /* 2000mA */
+ #define VBUS_ISPOUT_CUR_NO_LIM 0x3 /* 2500mA */
+-#define VBUS_ISPOUT_VHOLD_SET_MASK 0x31
++#define VBUS_ISPOUT_VHOLD_SET_MASK 0x38
+ #define VBUS_ISPOUT_VHOLD_SET_BIT_POS 0x3
+ #define VBUS_ISPOUT_VHOLD_SET_OFFSET 4000 /* 4000mV */
+ #define VBUS_ISPOUT_VHOLD_SET_LSB_RES 100 /* 100mV */
+-#define VBUS_ISPOUT_VHOLD_SET_4300MV 0x3 /* 4300mV */
++#define VBUS_ISPOUT_VHOLD_SET_4400MV 0x4 /* 4400mV */
+ #define VBUS_ISPOUT_VBUS_PATH_DIS BIT(7)
+
+ #define CHRG_CCCV_CC_MASK 0xf /* 4 bits */
+@@ -744,6 +744,16 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info)
+ ret = axp288_charger_vbus_path_select(info, true);
+ if (ret < 0)
+ return ret;
++ } else {
++ /* Set Vhold to the factory default / recommended 4.4V */
++ val = VBUS_ISPOUT_VHOLD_SET_4400MV << VBUS_ISPOUT_VHOLD_SET_BIT_POS;
++ ret = regmap_update_bits(info->regmap, AXP20X_VBUS_IPSOUT_MGMT,
++ VBUS_ISPOUT_VHOLD_SET_MASK, val);
++ if (ret < 0) {
++ dev_err(&info->pdev->dev, "register(%x) write error(%d)\n",
++ AXP20X_VBUS_IPSOUT_MGMT, ret);
++ return ret;
++ }
+ }
+
+ /* Read current charge voltage and current limit */
+@@ -822,17 +832,20 @@ static int axp288_charger_probe(struct platform_device *pdev)
+ info->regmap_irqc = axp20x->regmap_irqc;
+
+ info->cable.edev = extcon_get_extcon_dev(AXP288_EXTCON_DEV_NAME);
+- if (info->cable.edev == NULL) {
+- dev_dbg(dev, "%s is not ready, probe deferred\n",
+- AXP288_EXTCON_DEV_NAME);
+- return -EPROBE_DEFER;
++ if (IS_ERR(info->cable.edev)) {
++ dev_err_probe(dev, PTR_ERR(info->cable.edev),
++ "extcon_get_extcon_dev(%s) failed\n",
++ AXP288_EXTCON_DEV_NAME);
++ return PTR_ERR(info->cable.edev);
+ }
+
+ if (acpi_dev_present(USB_HOST_EXTCON_HID, NULL, -1)) {
+ info->otg.cable = extcon_get_extcon_dev(USB_HOST_EXTCON_NAME);
+- if (info->otg.cable == NULL) {
+- dev_dbg(dev, "EXTCON_USB_HOST is not ready, probe deferred\n");
+- return -EPROBE_DEFER;
++ if (IS_ERR(info->otg.cable)) {
++ dev_err_probe(dev, PTR_ERR(info->otg.cable),
++ "extcon_get_extcon_dev(%s) failed\n",
++ USB_HOST_EXTCON_NAME);
++ return PTR_ERR(info->otg.cable);
+ }
+ dev_info(dev, "Using " USB_HOST_EXTCON_HID " extcon for usb-id\n");
+ }
+diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c
+index c1da217fdb0e2..97e8663c08dfc 100644
+--- a/drivers/power/supply/axp288_fuel_gauge.c
++++ b/drivers/power/supply/axp288_fuel_gauge.c
+@@ -605,7 +605,6 @@ static const struct dmi_system_id axp288_no_battery_list[] = {
+ DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"),
+ DMI_MATCH(DMI_CHASSIS_TYPE, "3"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+- DMI_MATCH(DMI_BIOS_VERSION, "5.11"),
+ },
+ },
+ {}
+diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
+index 35ff0c8fe96f5..90ac5e59a5d6f 100644
+--- a/drivers/power/supply/bq24190_charger.c
++++ b/drivers/power/supply/bq24190_charger.c
+@@ -39,6 +39,7 @@
+ #define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0
+ #define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1
+ #define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2
++#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT 0x3
+ #define BQ24190_REG_POC_SYS_MIN_MASK (BIT(3) | BIT(2) | BIT(1))
+ #define BQ24190_REG_POC_SYS_MIN_SHIFT 1
+ #define BQ24190_REG_POC_SYS_MIN_MIN 3000
+@@ -445,11 +446,9 @@ static ssize_t bq24190_sysfs_show(struct device *dev,
+ if (!info)
+ return -EINVAL;
+
+- ret = pm_runtime_get_sync(bdi->dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
++ if (ret < 0)
+ return ret;
+- }
+
+ ret = bq24190_read_mask(bdi, info->reg, info->mask, info->shift, &v);
+ if (ret)
+@@ -480,11 +479,9 @@ static ssize_t bq24190_sysfs_store(struct device *dev,
+ if (ret < 0)
+ return ret;
+
+- ret = pm_runtime_get_sync(bdi->dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
++ if (ret < 0)
+ return ret;
+- }
+
+ ret = bq24190_write_mask(bdi, info->reg, info->mask, info->shift, v);
+ if (ret)
+@@ -503,10 +500,9 @@ static int bq24190_set_charge_mode(struct regulator_dev *dev, u8 val)
+ struct bq24190_dev_info *bdi = rdev_get_drvdata(dev);
+ int ret;
+
+- ret = pm_runtime_get_sync(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
+ if (ret < 0) {
+ dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", ret);
+- pm_runtime_put_noidle(bdi->dev);
+ return ret;
+ }
+
+@@ -536,10 +532,9 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev)
+ int ret;
+ u8 val;
+
+- ret = pm_runtime_get_sync(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
+ if (ret < 0) {
+ dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", ret);
+- pm_runtime_put_noidle(bdi->dev);
+ return ret;
+ }
+
+@@ -550,7 +545,11 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev)
+ pm_runtime_mark_last_busy(bdi->dev);
+ pm_runtime_put_autosuspend(bdi->dev);
+
+- return ret ? ret : val == BQ24190_REG_POC_CHG_CONFIG_OTG;
++ if (ret)
++ return ret;
++
++ return (val == BQ24190_REG_POC_CHG_CONFIG_OTG ||
++ val == BQ24190_REG_POC_CHG_CONFIG_OTG_ALT);
+ }
+
+ static const struct regulator_ops bq24190_vbus_ops = {
+@@ -1076,11 +1075,9 @@ static int bq24190_charger_get_property(struct power_supply *psy,
+
+ dev_dbg(bdi->dev, "prop: %d\n", psp);
+
+- ret = pm_runtime_get_sync(bdi->dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
++ if (ret < 0)
+ return ret;
+- }
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_CHARGE_TYPE:
+@@ -1150,11 +1147,9 @@ static int bq24190_charger_set_property(struct power_supply *psy,
+
+ dev_dbg(bdi->dev, "prop: %d\n", psp);
+
+- ret = pm_runtime_get_sync(bdi->dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
++ if (ret < 0)
+ return ret;
+- }
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+@@ -1206,8 +1201,19 @@ static void bq24190_input_current_limit_work(struct work_struct *work)
+ struct bq24190_dev_info *bdi =
+ container_of(work, struct bq24190_dev_info,
+ input_current_limit_work.work);
++ union power_supply_propval val;
++ int ret;
+
+- power_supply_set_input_current_limit_from_supplier(bdi->charger);
++ ret = power_supply_get_property_from_supplier(bdi->charger,
++ POWER_SUPPLY_PROP_CURRENT_MAX,
++ &val);
++ if (ret)
++ return;
++
++ bq24190_charger_set_property(bdi->charger,
++ POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
++ &val);
++ power_supply_changed(bdi->charger);
+ }
+
+ /* Sync the input-current-limit with our parent supply (if we have one) */
+@@ -1413,11 +1419,9 @@ static int bq24190_battery_get_property(struct power_supply *psy,
+ dev_warn(bdi->dev, "warning: /sys/class/power_supply/bq24190-battery is deprecated\n");
+ dev_dbg(bdi->dev, "prop: %d\n", psp);
+
+- ret = pm_runtime_get_sync(bdi->dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
++ if (ret < 0)
+ return ret;
+- }
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+@@ -1461,11 +1465,9 @@ static int bq24190_battery_set_property(struct power_supply *psy,
+ dev_warn(bdi->dev, "warning: /sys/class/power_supply/bq24190-battery is deprecated\n");
+ dev_dbg(bdi->dev, "prop: %d\n", psp);
+
+- ret = pm_runtime_get_sync(bdi->dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(bdi->dev);
++ ret = pm_runtime_resume_and_get(bdi->dev);
++ if (ret < 0)
+ return ret;
+- }
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+@@ -1619,10 +1621,9 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
+ int error;
+
+ bdi->irq_event = true;
+- error = pm_runtime_get_sync(bdi->dev);
++ error = pm_runtime_resume_and_get(bdi->dev);
+ if (error < 0) {
+ dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+- pm_runtime_put_noidle(bdi->dev);
+ return IRQ_NONE;
+ }
+ bq24190_check_status(bdi);
+@@ -1842,11 +1843,10 @@ static int bq24190_remove(struct i2c_client *client)
+ struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+ int error;
+
+- error = pm_runtime_get_sync(bdi->dev);
+- if (error < 0) {
++ cancel_delayed_work_sync(&bdi->input_current_limit_work);
++ error = pm_runtime_resume_and_get(bdi->dev);
++ if (error < 0)
+ dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+- pm_runtime_put_noidle(bdi->dev);
+- }
+
+ bq24190_register_reset(bdi);
+ if (bdi->battery)
+@@ -1895,11 +1895,9 @@ static __maybe_unused int bq24190_pm_suspend(struct device *dev)
+ struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+ int error;
+
+- error = pm_runtime_get_sync(bdi->dev);
+- if (error < 0) {
++ error = pm_runtime_resume_and_get(bdi->dev);
++ if (error < 0)
+ dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+- pm_runtime_put_noidle(bdi->dev);
+- }
+
+ bq24190_register_reset(bdi);
+
+@@ -1920,11 +1918,9 @@ static __maybe_unused int bq24190_pm_resume(struct device *dev)
+ bdi->f_reg = 0;
+ bdi->ss_reg = BQ24190_REG_SS_VBUS_STAT_MASK; /* impossible state */
+
+- error = pm_runtime_get_sync(bdi->dev);
+- if (error < 0) {
++ error = pm_runtime_resume_and_get(bdi->dev);
++ if (error < 0)
+ dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+- pm_runtime_put_noidle(bdi->dev);
+- }
+
+ bq24190_register_reset(bdi);
+ bq24190_set_config(bdi);
+diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
+index 945c3257ca931..fe814805c68b5 100644
+--- a/drivers/power/supply/bq25890_charger.c
++++ b/drivers/power/supply/bq25890_charger.c
+@@ -581,12 +581,12 @@ static irqreturn_t __bq25890_handle_irq(struct bq25890_device *bq)
+
+ if (!new_state.online && bq->state.online) { /* power removed */
+ /* disable ADC */
+- ret = bq25890_field_write(bq, F_CONV_START, 0);
++ ret = bq25890_field_write(bq, F_CONV_RATE, 0);
+ if (ret < 0)
+ goto error;
+ } else if (new_state.online && !bq->state.online) { /* power inserted */
+ /* enable ADC, to have control of charge current/voltage */
+- ret = bq25890_field_write(bq, F_CONV_START, 1);
++ ret = bq25890_field_write(bq, F_CONV_RATE, 1);
+ if (ret < 0)
+ goto error;
+ }
+diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
+index 7e5e24b585d8a..7c1295986b594 100644
+--- a/drivers/power/supply/bq27xxx_battery.c
++++ b/drivers/power/supply/bq27xxx_battery.c
+@@ -1083,10 +1083,8 @@ static int poll_interval_param_set(const char *val, const struct kernel_param *k
+ return ret;
+
+ mutex_lock(&bq27xxx_list_lock);
+- list_for_each_entry(di, &bq27xxx_battery_devices, list) {
+- cancel_delayed_work_sync(&di->work);
+- schedule_delayed_work(&di->work, 0);
+- }
++ list_for_each_entry(di, &bq27xxx_battery_devices, list)
++ mod_delayed_work(system_wq, &di->work, 0);
+ mutex_unlock(&bq27xxx_list_lock);
+
+ return ret;
+@@ -1572,14 +1570,6 @@ static int bq27xxx_battery_read_charge(struct bq27xxx_device_info *di, u8 reg)
+ */
+ static inline int bq27xxx_battery_read_nac(struct bq27xxx_device_info *di)
+ {
+- int flags;
+-
+- if (di->opts & BQ27XXX_O_ZERO) {
+- flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, true);
+- if (flags >= 0 && (flags & BQ27000_FLAG_CI))
+- return -ENODATA;
+- }
+-
+ return bq27xxx_battery_read_charge(di, BQ27XXX_REG_NAC);
+ }
+
+@@ -1742,6 +1732,18 @@ static bool bq27xxx_battery_dead(struct bq27xxx_device_info *di, u16 flags)
+ return flags & (BQ27XXX_FLAG_SOC1 | BQ27XXX_FLAG_SOCF);
+ }
+
++/*
++ * Returns true if reported battery capacity is inaccurate
++ */
++static bool bq27xxx_battery_capacity_inaccurate(struct bq27xxx_device_info *di,
++ u16 flags)
++{
++ if (di->opts & BQ27XXX_O_HAS_CI)
++ return (flags & BQ27000_FLAG_CI);
++ else
++ return false;
++}
++
+ static int bq27xxx_battery_read_health(struct bq27xxx_device_info *di)
+ {
+ /* Unlikely but important to return first */
+@@ -1751,76 +1753,12 @@ static int bq27xxx_battery_read_health(struct bq27xxx_device_info *di)
+ return POWER_SUPPLY_HEALTH_COLD;
+ if (unlikely(bq27xxx_battery_dead(di, di->cache.flags)))
+ return POWER_SUPPLY_HEALTH_DEAD;
++ if (unlikely(bq27xxx_battery_capacity_inaccurate(di, di->cache.flags)))
++ return POWER_SUPPLY_HEALTH_CALIBRATION_REQUIRED;
+
+ return POWER_SUPPLY_HEALTH_GOOD;
+ }
+
+-void bq27xxx_battery_update(struct bq27xxx_device_info *di)
+-{
+- struct bq27xxx_reg_cache cache = {0, };
+- bool has_ci_flag = di->opts & BQ27XXX_O_HAS_CI;
+- bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
+-
+- cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
+- if ((cache.flags & 0xff) == 0xff)
+- cache.flags = -1; /* read error */
+- if (cache.flags >= 0) {
+- cache.temperature = bq27xxx_battery_read_temperature(di);
+- if (has_ci_flag && (cache.flags & BQ27000_FLAG_CI)) {
+- dev_info_once(di->dev, "battery is not calibrated! ignoring capacity values\n");
+- cache.capacity = -ENODATA;
+- cache.energy = -ENODATA;
+- cache.time_to_empty = -ENODATA;
+- cache.time_to_empty_avg = -ENODATA;
+- cache.time_to_full = -ENODATA;
+- cache.charge_full = -ENODATA;
+- cache.health = -ENODATA;
+- } else {
+- if (di->regs[BQ27XXX_REG_TTE] != INVALID_REG_ADDR)
+- cache.time_to_empty = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTE);
+- if (di->regs[BQ27XXX_REG_TTECP] != INVALID_REG_ADDR)
+- cache.time_to_empty_avg = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTECP);
+- if (di->regs[BQ27XXX_REG_TTF] != INVALID_REG_ADDR)
+- cache.time_to_full = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTF);
+-
+- cache.charge_full = bq27xxx_battery_read_fcc(di);
+- cache.capacity = bq27xxx_battery_read_soc(di);
+- if (di->regs[BQ27XXX_REG_AE] != INVALID_REG_ADDR)
+- cache.energy = bq27xxx_battery_read_energy(di);
+- di->cache.flags = cache.flags;
+- cache.health = bq27xxx_battery_read_health(di);
+- }
+- if (di->regs[BQ27XXX_REG_CYCT] != INVALID_REG_ADDR)
+- cache.cycle_count = bq27xxx_battery_read_cyct(di);
+-
+- /* We only have to read charge design full once */
+- if (di->charge_design_full <= 0)
+- di->charge_design_full = bq27xxx_battery_read_dcap(di);
+- }
+-
+- if ((di->cache.capacity != cache.capacity) ||
+- (di->cache.flags != cache.flags))
+- power_supply_changed(di->bat);
+-
+- if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
+- di->cache = cache;
+-
+- di->last_update = jiffies;
+-}
+-EXPORT_SYMBOL_GPL(bq27xxx_battery_update);
+-
+-static void bq27xxx_battery_poll(struct work_struct *work)
+-{
+- struct bq27xxx_device_info *di =
+- container_of(work, struct bq27xxx_device_info,
+- work.work);
+-
+- bq27xxx_battery_update(di);
+-
+- if (poll_interval > 0)
+- schedule_delayed_work(&di->work, poll_interval * HZ);
+-}
+-
+ static bool bq27xxx_battery_is_full(struct bq27xxx_device_info *di, int flags)
+ {
+ if (di->opts & BQ27XXX_O_ZERO)
+@@ -1839,7 +1777,8 @@ static bool bq27xxx_battery_is_full(struct bq27xxx_device_info *di, int flags)
+ static int bq27xxx_battery_current_and_status(
+ struct bq27xxx_device_info *di,
+ union power_supply_propval *val_curr,
+- union power_supply_propval *val_status)
++ union power_supply_propval *val_status,
++ struct bq27xxx_reg_cache *cache)
+ {
+ bool single_flags = (di->opts & BQ27XXX_O_ZERO);
+ int curr;
+@@ -1851,10 +1790,14 @@ static int bq27xxx_battery_current_and_status(
+ return curr;
+ }
+
+- flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, single_flags);
+- if (flags < 0) {
+- dev_err(di->dev, "error reading flags\n");
+- return flags;
++ if (cache) {
++ flags = cache->flags;
++ } else {
++ flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, single_flags);
++ if (flags < 0) {
++ dev_err(di->dev, "error reading flags\n");
++ return flags;
++ }
+ }
+
+ if (di->opts & BQ27XXX_O_ZERO) {
+@@ -1889,6 +1832,78 @@ static int bq27xxx_battery_current_and_status(
+ return 0;
+ }
+
++static void bq27xxx_battery_update_unlocked(struct bq27xxx_device_info *di)
++{
++ union power_supply_propval status = di->last_status;
++ struct bq27xxx_reg_cache cache = {0, };
++ bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
++
++ cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
++ if ((cache.flags & 0xff) == 0xff)
++ cache.flags = -1; /* read error */
++ if (cache.flags >= 0) {
++ cache.temperature = bq27xxx_battery_read_temperature(di);
++ if (di->regs[BQ27XXX_REG_TTE] != INVALID_REG_ADDR)
++ cache.time_to_empty = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTE);
++ if (di->regs[BQ27XXX_REG_TTECP] != INVALID_REG_ADDR)
++ cache.time_to_empty_avg = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTECP);
++ if (di->regs[BQ27XXX_REG_TTF] != INVALID_REG_ADDR)
++ cache.time_to_full = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTF);
++
++ cache.charge_full = bq27xxx_battery_read_fcc(di);
++ cache.capacity = bq27xxx_battery_read_soc(di);
++ if (di->regs[BQ27XXX_REG_AE] != INVALID_REG_ADDR)
++ cache.energy = bq27xxx_battery_read_energy(di);
++ di->cache.flags = cache.flags;
++ cache.health = bq27xxx_battery_read_health(di);
++ if (di->regs[BQ27XXX_REG_CYCT] != INVALID_REG_ADDR)
++ cache.cycle_count = bq27xxx_battery_read_cyct(di);
++
++ /*
++ * On gauges with signed current reporting the current must be
++ * checked to detect charging <-> discharging status changes.
++ */
++ if (!(di->opts & BQ27XXX_O_ZERO))
++ bq27xxx_battery_current_and_status(di, NULL, &status, &cache);
++
++ /* We only have to read charge design full once */
++ if (di->charge_design_full <= 0)
++ di->charge_design_full = bq27xxx_battery_read_dcap(di);
++ }
++
++ if ((di->cache.capacity != cache.capacity) ||
++ (di->cache.flags != cache.flags) ||
++ (di->last_status.intval != status.intval)) {
++ di->last_status.intval = status.intval;
++ power_supply_changed(di->bat);
++ }
++
++ if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
++ di->cache = cache;
++
++ di->last_update = jiffies;
++
++ if (!di->removed && poll_interval > 0)
++ mod_delayed_work(system_wq, &di->work, poll_interval * HZ);
++}
++
++void bq27xxx_battery_update(struct bq27xxx_device_info *di)
++{
++ mutex_lock(&di->lock);
++ bq27xxx_battery_update_unlocked(di);
++ mutex_unlock(&di->lock);
++}
++EXPORT_SYMBOL_GPL(bq27xxx_battery_update);
++
++static void bq27xxx_battery_poll(struct work_struct *work)
++{
++ struct bq27xxx_device_info *di =
++ container_of(work, struct bq27xxx_device_info,
++ work.work);
++
++ bq27xxx_battery_update(di);
++}
++
+ /*
+ * Get the average power in µW
+ * Return < 0 if something fails.
+@@ -1991,10 +2006,8 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
+ struct bq27xxx_device_info *di = power_supply_get_drvdata(psy);
+
+ mutex_lock(&di->lock);
+- if (time_is_before_jiffies(di->last_update + 5 * HZ)) {
+- cancel_delayed_work_sync(&di->work);
+- bq27xxx_battery_poll(&di->work.work);
+- }
++ if (time_is_before_jiffies(di->last_update + 5 * HZ))
++ bq27xxx_battery_update_unlocked(di);
+ mutex_unlock(&di->lock);
+
+ if (psp != POWER_SUPPLY_PROP_PRESENT && di->cache.flags < 0)
+@@ -2002,7 +2015,7 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+- ret = bq27xxx_battery_current_and_status(di, NULL, val);
++ ret = bq27xxx_battery_current_and_status(di, NULL, val, NULL);
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ ret = bq27xxx_battery_voltage(di, val);
+@@ -2011,7 +2024,7 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
+ val->intval = di->cache.flags < 0 ? 0 : 1;
+ break;
+ case POWER_SUPPLY_PROP_CURRENT_NOW:
+- ret = bq27xxx_battery_current_and_status(di, val, NULL);
++ ret = bq27xxx_battery_current_and_status(di, val, NULL, NULL);
+ break;
+ case POWER_SUPPLY_PROP_CAPACITY:
+ ret = bq27xxx_simple_value(di->cache.capacity, val);
+@@ -2084,8 +2097,8 @@ static void bq27xxx_external_power_changed(struct power_supply *psy)
+ {
+ struct bq27xxx_device_info *di = power_supply_get_drvdata(psy);
+
+- cancel_delayed_work_sync(&di->work);
+- schedule_delayed_work(&di->work, 0);
++ /* After charger plug in/out wait 0.5s for things to stabilize */
++ mod_delayed_work(system_wq, &di->work, HZ / 2);
+ }
+
+ int bq27xxx_battery_setup(struct bq27xxx_device_info *di)
+@@ -2133,22 +2146,18 @@ EXPORT_SYMBOL_GPL(bq27xxx_battery_setup);
+
+ void bq27xxx_battery_teardown(struct bq27xxx_device_info *di)
+ {
+- /*
+- * power_supply_unregister call bq27xxx_battery_get_property which
+- * call bq27xxx_battery_poll.
+- * Make sure that bq27xxx_battery_poll will not call
+- * schedule_delayed_work again after unregister (which cause OOPS).
+- */
+- poll_interval = 0;
+-
+- cancel_delayed_work_sync(&di->work);
+-
+- power_supply_unregister(di->bat);
+-
+ mutex_lock(&bq27xxx_list_lock);
+ list_del(&di->list);
+ mutex_unlock(&bq27xxx_list_lock);
+
++ /* Set removed to avoid bq27xxx_battery_update() re-queuing the work */
++ mutex_lock(&di->lock);
++ di->removed = true;
++ mutex_unlock(&di->lock);
++
++ cancel_delayed_work_sync(&di->work);
++
++ power_supply_unregister(di->bat);
+ mutex_destroy(&di->lock);
+ }
+ EXPORT_SYMBOL_GPL(bq27xxx_battery_teardown);
+diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
+index 46f078350fd3f..b722ee2d7e142 100644
+--- a/drivers/power/supply/bq27xxx_battery_i2c.c
++++ b/drivers/power/supply/bq27xxx_battery_i2c.c
+@@ -179,7 +179,7 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
+ i2c_set_clientdata(client, di);
+
+ if (client->irq) {
+- ret = devm_request_threaded_irq(&client->dev, client->irq,
++ ret = request_threaded_irq(client->irq,
+ NULL, bq27xxx_battery_irq_handler_thread,
+ IRQF_ONESHOT,
+ di->name, di);
+@@ -187,7 +187,8 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
+ dev_err(&client->dev,
+ "Unable to register IRQ %d error %d\n",
+ client->irq, ret);
+- return ret;
++ bq27xxx_battery_teardown(di);
++ goto err_failed;
+ }
+ }
+
+@@ -208,6 +209,7 @@ static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
+ {
+ struct bq27xxx_device_info *di = i2c_get_clientdata(client);
+
++ free_irq(client->irq, di);
+ bq27xxx_battery_teardown(di);
+
+ mutex_lock(&battery_mutex);
+diff --git a/drivers/power/supply/charger-manager.c b/drivers/power/supply/charger-manager.c
+index d67edb760c948..92db79400a6ad 100644
+--- a/drivers/power/supply/charger-manager.c
++++ b/drivers/power/supply/charger-manager.c
+@@ -985,13 +985,10 @@ static int charger_extcon_init(struct charger_manager *cm,
+ cable->nb.notifier_call = charger_extcon_notifier;
+
+ cable->extcon_dev = extcon_get_extcon_dev(cable->extcon_name);
+- if (IS_ERR_OR_NULL(cable->extcon_dev)) {
++ if (IS_ERR(cable->extcon_dev)) {
+ pr_err("Cannot find extcon_dev for %s (cable: %s)\n",
+ cable->extcon_name, cable->name);
+- if (cable->extcon_dev == NULL)
+- return -EPROBE_DEFER;
+- else
+- return PTR_ERR(cable->extcon_dev);
++ return PTR_ERR(cable->extcon_dev);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(extcon_mapping); i++) {
+diff --git a/drivers/power/supply/cros_usbpd-charger.c b/drivers/power/supply/cros_usbpd-charger.c
+index d89e08efd2ad0..0a4f02e4ae7ba 100644
+--- a/drivers/power/supply/cros_usbpd-charger.c
++++ b/drivers/power/supply/cros_usbpd-charger.c
+@@ -276,7 +276,7 @@ static int cros_usbpd_charger_get_power_info(struct port_data *port)
+ port->psy_current_max = 0;
+ break;
+ default:
+- dev_err(dev, "Port %d: default case!\n", port->port_number);
++ dev_dbg(dev, "Port %d: default case!\n", port->port_number);
+ port->psy_usb_type = POWER_SUPPLY_USB_TYPE_SDP;
+ }
+
+diff --git a/drivers/power/supply/da9150-charger.c b/drivers/power/supply/da9150-charger.c
+index f9314cc0cd75f..6b987da586556 100644
+--- a/drivers/power/supply/da9150-charger.c
++++ b/drivers/power/supply/da9150-charger.c
+@@ -662,6 +662,7 @@ static int da9150_charger_remove(struct platform_device *pdev)
+
+ if (!IS_ERR_OR_NULL(charger->usb_phy))
+ usb_unregister_notifier(charger->usb_phy, &charger->otg_nb);
++ cancel_work_sync(&charger->otg_work);
+
+ power_supply_unregister(charger->battery);
+ power_supply_unregister(charger->usb);
+diff --git a/drivers/power/supply/generic-adc-battery.c b/drivers/power/supply/generic-adc-battery.c
+index 66039c665dd1e..0af536f4932f1 100644
+--- a/drivers/power/supply/generic-adc-battery.c
++++ b/drivers/power/supply/generic-adc-battery.c
+@@ -135,6 +135,9 @@ static int read_channel(struct gab *adc_bat, enum power_supply_property psp,
+ result);
+ if (ret < 0)
+ pr_err("read channel error\n");
++ else
++ *result *= 1000;
++
+ return ret;
+ }
+
+diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c
+index 3cea92e28dc3e..a9aef1e8b186e 100644
+--- a/drivers/power/supply/max17040_battery.c
++++ b/drivers/power/supply/max17040_battery.c
+@@ -449,6 +449,8 @@ static int max17040_probe(struct i2c_client *client,
+
+ chip->client = client;
+ chip->regmap = devm_regmap_init_i2c(client, &max17040_regmap);
++ if (IS_ERR(chip->regmap))
++ return PTR_ERR(chip->regmap);
+ chip_id = (enum chip_id) id->driver_data;
+ if (client->dev.of_node) {
+ ret = max17040_get_of_data(chip);
+diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
+index 8dffae76b6a31..aaf6f1bd3d298 100644
+--- a/drivers/power/supply/max17042_battery.c
++++ b/drivers/power/supply/max17042_battery.c
+@@ -313,7 +313,10 @@ static int max17042_get_property(struct power_supply *psy,
+ val->intval = data * 625 / 8;
+ break;
+ case POWER_SUPPLY_PROP_CAPACITY:
+- ret = regmap_read(map, MAX17042_RepSOC, &data);
++ if (chip->pdata->enable_current_sense)
++ ret = regmap_read(map, MAX17042_RepSOC, &data);
++ else
++ ret = regmap_read(map, MAX17042_VFSOC, &data);
+ if (ret < 0)
+ return ret;
+
+@@ -857,7 +860,8 @@ static void max17042_set_soc_threshold(struct max17042_chip *chip, u16 off)
+ regmap_read(map, MAX17042_RepSOC, &soc);
+ soc >>= 8;
+ soc_tr = (soc + off) << 8;
+- soc_tr |= (soc - off);
++ if (off < soc)
++ soc_tr |= soc - off;
+ regmap_write(map, MAX17042_SALRT_Th, soc_tr);
+ }
+
+@@ -876,6 +880,10 @@ static irqreturn_t max17042_thread_handler(int id, void *dev)
+ max17042_set_soc_threshold(chip, 1);
+ }
+
++ /* we implicitly handle all alerts via power_supply_changed */
++ regmap_clear_bits(chip->regmap, MAX17042_STATUS,
++ 0xFFFF & ~(STATUS_POR_BIT | STATUS_BST_BIT));
++
+ power_supply_changed(chip->battery);
+ return IRQ_HANDLED;
+ }
+diff --git a/drivers/power/supply/max8997_charger.c b/drivers/power/supply/max8997_charger.c
+index 25207fe2aa68e..bfa7a576523df 100644
+--- a/drivers/power/supply/max8997_charger.c
++++ b/drivers/power/supply/max8997_charger.c
+@@ -248,10 +248,10 @@ static int max8997_battery_probe(struct platform_device *pdev)
+ dev_info(&pdev->dev, "couldn't get charger regulator\n");
+ }
+ charger->edev = extcon_get_extcon_dev("max8997-muic");
+- if (IS_ERR_OR_NULL(charger->edev)) {
+- if (!charger->edev)
+- return -EPROBE_DEFER;
+- dev_info(charger->dev, "couldn't get extcon device\n");
++ if (IS_ERR(charger->edev)) {
++ dev_err_probe(charger->dev, PTR_ERR(charger->edev),
++ "couldn't get extcon device: max8997-muic\n");
++ return PTR_ERR(charger->edev);
+ }
+
+ if (!IS_ERR(charger->reg) && !IS_ERR_OR_NULL(charger->edev)) {
+diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c
+index 3abaa72e06683..f1248faf59058 100644
+--- a/drivers/power/supply/mt6360_charger.c
++++ b/drivers/power/supply/mt6360_charger.c
+@@ -799,7 +799,9 @@ static int mt6360_charger_probe(struct platform_device *pdev)
+ mci->vinovp = 6500000;
+ mutex_init(&mci->chgdet_lock);
+ platform_set_drvdata(pdev, mci);
+- devm_work_autocancel(&pdev->dev, &mci->chrdet_work, mt6360_chrdet_work);
++ ret = devm_work_autocancel(&pdev->dev, &mci->chrdet_work, mt6360_chrdet_work);
++ if (ret)
++ return dev_err_probe(&pdev->dev, ret, "Failed to set delayed work\n");
+
+ ret = device_property_read_u32(&pdev->dev, "richtek,vinovp-microvolt", &mci->vinovp);
+ if (ret)
+diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
+index 0c2132c7f5d40..8b2cd63016160 100644
+--- a/drivers/power/supply/power_supply_core.c
++++ b/drivers/power/supply/power_supply_core.c
+@@ -347,6 +347,10 @@ static int __power_supply_is_system_supplied(struct device *dev, void *data)
+ struct power_supply *psy = dev_get_drvdata(dev);
+ unsigned int *count = data;
+
++ if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, &ret))
++ if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE)
++ return 0;
++
+ (*count)++;
+ if (psy->desc->type != POWER_SUPPLY_TYPE_BATTERY)
+ if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_ONLINE,
+@@ -365,8 +369,8 @@ int power_supply_is_system_supplied(void)
+ __power_supply_is_system_supplied);
+
+ /*
+- * If no power class device was found at all, most probably we are
+- * running on a desktop system, so assume we are on mains power.
++ * If no system scope power class device was found at all, most probably we
++ * are running on a desktop system, so assume we are on mains power.
+ */
+ if (count == 0)
+ return 1;
+@@ -375,46 +379,49 @@ int power_supply_is_system_supplied(void)
+ }
+ EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
+
+-static int __power_supply_get_supplier_max_current(struct device *dev,
+- void *data)
++struct psy_get_supplier_prop_data {
++ struct power_supply *psy;
++ enum power_supply_property psp;
++ union power_supply_propval *val;
++};
++
++static int __power_supply_get_supplier_property(struct device *dev, void *_data)
+ {
+- union power_supply_propval ret = {0,};
+ struct power_supply *epsy = dev_get_drvdata(dev);
+- struct power_supply *psy = data;
++ struct psy_get_supplier_prop_data *data = _data;
+
+- if (__power_supply_is_supplied_by(epsy, psy))
+- if (!epsy->desc->get_property(epsy,
+- POWER_SUPPLY_PROP_CURRENT_MAX,
+- &ret))
+- return ret.intval;
++ if (__power_supply_is_supplied_by(epsy, data->psy))
++ if (!epsy->desc->get_property(epsy, data->psp, data->val))
++ return 1; /* Success */
+
+- return 0;
++ return 0; /* Continue iterating */
+ }
+
+-int power_supply_set_input_current_limit_from_supplier(struct power_supply *psy)
++int power_supply_get_property_from_supplier(struct power_supply *psy,
++ enum power_supply_property psp,
++ union power_supply_propval *val)
+ {
+- union power_supply_propval val = {0,};
+- int curr;
+-
+- if (!psy->desc->set_property)
+- return -EINVAL;
++ struct psy_get_supplier_prop_data data = {
++ .psy = psy,
++ .psp = psp,
++ .val = val,
++ };
++ int ret;
+
+ /*
+ * This function is not intended for use with a supply with multiple
+- * suppliers, we simply pick the first supply to report a non 0
+- * max-current.
++ * suppliers, we simply pick the first supply to report the psp.
+ */
+- curr = class_for_each_device(power_supply_class, NULL, psy,
+- __power_supply_get_supplier_max_current);
+- if (curr <= 0)
+- return (curr == 0) ? -ENODEV : curr;
+-
+- val.intval = curr;
++ ret = class_for_each_device(power_supply_class, NULL, &data,
++ __power_supply_get_supplier_property);
++ if (ret < 0)
++ return ret;
++ if (ret == 0)
++ return -ENODEV;
+
+- return psy->desc->set_property(psy,
+- POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT, &val);
++ return 0;
+ }
+-EXPORT_SYMBOL_GPL(power_supply_set_input_current_limit_from_supplier);
++EXPORT_SYMBOL_GPL(power_supply_get_property_from_supplier);
+
+ int power_supply_set_battery_charged(struct power_supply *psy)
+ {
+@@ -696,6 +703,11 @@ int power_supply_get_battery_info(struct power_supply *psy,
+ int i, tab_len, size;
+
+ propname = kasprintf(GFP_KERNEL, "ocv-capacity-table-%d", index);
++ if (!propname) {
++ power_supply_put_battery_info(psy, info);
++ err = -ENOMEM;
++ goto out_put_node;
++ }
+ list = of_get_property(battery_np, propname, &size);
+ if (!list || !size) {
+ dev_err(&psy->dev, "failed to get %s\n", propname);
+@@ -853,6 +865,10 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info,
+ return NULL;
+
+ for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) {
++ /* Out of capacity tables */
++ if (!info->ocv_table[i])
++ break;
++
+ temp_diff = abs(info->ocv_temp[i] - temp);
+
+ if (temp_diff < best_temp_diff) {
+@@ -1005,87 +1021,6 @@ static void psy_unregister_thermal(struct power_supply *psy)
+ thermal_zone_device_unregister(psy->tzd);
+ }
+
+-/* thermal cooling device callbacks */
+-static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
+- unsigned long *state)
+-{
+- struct power_supply *psy;
+- union power_supply_propval val;
+- int ret;
+-
+- psy = tcd->devdata;
+- ret = power_supply_get_property(psy,
+- POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+- if (ret)
+- return ret;
+-
+- *state = val.intval;
+-
+- return ret;
+-}
+-
+-static int ps_get_cur_charge_cntl_limit(struct thermal_cooling_device *tcd,
+- unsigned long *state)
+-{
+- struct power_supply *psy;
+- union power_supply_propval val;
+- int ret;
+-
+- psy = tcd->devdata;
+- ret = power_supply_get_property(psy,
+- POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+- if (ret)
+- return ret;
+-
+- *state = val.intval;
+-
+- return ret;
+-}
+-
+-static int ps_set_cur_charge_cntl_limit(struct thermal_cooling_device *tcd,
+- unsigned long state)
+-{
+- struct power_supply *psy;
+- union power_supply_propval val;
+- int ret;
+-
+- psy = tcd->devdata;
+- val.intval = state;
+- ret = psy->desc->set_property(psy,
+- POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+-
+- return ret;
+-}
+-
+-static const struct thermal_cooling_device_ops psy_tcd_ops = {
+- .get_max_state = ps_get_max_charge_cntl_limit,
+- .get_cur_state = ps_get_cur_charge_cntl_limit,
+- .set_cur_state = ps_set_cur_charge_cntl_limit,
+-};
+-
+-static int psy_register_cooler(struct power_supply *psy)
+-{
+- int i;
+-
+- /* Register for cooling device if psy can control charging */
+- for (i = 0; i < psy->desc->num_properties; i++) {
+- if (psy->desc->properties[i] ==
+- POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT) {
+- psy->tcd = thermal_cooling_device_register(
+- (char *)psy->desc->name,
+- psy, &psy_tcd_ops);
+- return PTR_ERR_OR_ZERO(psy->tcd);
+- }
+- }
+- return 0;
+-}
+-
+-static void psy_unregister_cooler(struct power_supply *psy)
+-{
+- if (IS_ERR_OR_NULL(psy->tcd))
+- return;
+- thermal_cooling_device_unregister(psy->tcd);
+-}
+ #else
+ static int psy_register_thermal(struct power_supply *psy)
+ {
+@@ -1095,15 +1030,6 @@ static int psy_register_thermal(struct power_supply *psy)
+ static void psy_unregister_thermal(struct power_supply *psy)
+ {
+ }
+-
+-static int psy_register_cooler(struct power_supply *psy)
+-{
+- return 0;
+-}
+-
+-static void psy_unregister_cooler(struct power_supply *psy)
+-{
+-}
+ #endif
+
+ static struct power_supply *__must_check
+@@ -1179,10 +1105,6 @@ __power_supply_register(struct device *parent,
+ if (rc)
+ goto register_thermal_failed;
+
+- rc = psy_register_cooler(psy);
+- if (rc)
+- goto register_cooler_failed;
+-
+ rc = power_supply_create_triggers(psy);
+ if (rc)
+ goto create_triggers_failed;
+@@ -1212,12 +1134,10 @@ __power_supply_register(struct device *parent,
+ add_hwmon_sysfs_failed:
+ power_supply_remove_triggers(psy);
+ create_triggers_failed:
+- psy_unregister_cooler(psy);
+-register_cooler_failed:
+ psy_unregister_thermal(psy);
+ register_thermal_failed:
+- device_del(dev);
+ wakeup_init_failed:
++ device_del(dev);
+ device_add_failed:
+ check_supplies_failed:
+ dev_set_name_failed:
+@@ -1365,7 +1285,6 @@ void power_supply_unregister(struct power_supply *psy)
+ sysfs_remove_link(&psy->dev.kobj, "powers");
+ power_supply_remove_hwmon_sysfs(psy);
+ power_supply_remove_triggers(psy);
+- psy_unregister_cooler(psy);
+ psy_unregister_thermal(psy);
+ device_init_wakeup(&psy->dev, false);
+ device_unregister(&psy->dev);
+diff --git a/drivers/power/supply/power_supply_leds.c b/drivers/power/supply/power_supply_leds.c
+index d69880cc35931..b7a2778f878de 100644
+--- a/drivers/power/supply/power_supply_leds.c
++++ b/drivers/power/supply/power_supply_leds.c
+@@ -34,8 +34,9 @@ static void power_supply_update_bat_leds(struct power_supply *psy)
+ led_trigger_event(psy->charging_full_trig, LED_FULL);
+ led_trigger_event(psy->charging_trig, LED_OFF);
+ led_trigger_event(psy->full_trig, LED_FULL);
+- led_trigger_event(psy->charging_blink_full_solid_trig,
+- LED_FULL);
++ /* Going from blink to LED on requires a LED_OFF event to stop blink */
++ led_trigger_event(psy->charging_blink_full_solid_trig, LED_OFF);
++ led_trigger_event(psy->charging_blink_full_solid_trig, LED_FULL);
+ break;
+ case POWER_SUPPLY_STATUS_CHARGING:
+ led_trigger_event(psy->charging_full_trig, LED_FULL);
+diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
+index c3d7cbcd4fad5..7a0485c35ba9d 100644
+--- a/drivers/power/supply/power_supply_sysfs.c
++++ b/drivers/power/supply/power_supply_sysfs.c
+@@ -276,7 +276,8 @@ static ssize_t power_supply_show_property(struct device *dev,
+
+ if (ret < 0) {
+ if (ret == -ENODATA)
+- dev_dbg(dev, "driver has no data for `%s' property\n",
++ dev_dbg_ratelimited(dev,
++ "driver has no data for `%s' property\n",
+ attr->attr.name);
+ else if (ret != -ENODEV && ret != -EAGAIN)
+ dev_err_ratelimited(dev,
+diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c
+index 9ad0afe83d1b7..7a23c70f48791 100644
+--- a/drivers/power/supply/rt5033_battery.c
++++ b/drivers/power/supply/rt5033_battery.c
+@@ -60,7 +60,7 @@ static int rt5033_battery_get_watt_prop(struct i2c_client *client,
+ regmap_read(battery->regmap, regh, &msb);
+ regmap_read(battery->regmap, regl, &lsb);
+
+- ret = ((msb << 4) + (lsb >> 4)) * 1250 / 1000;
++ ret = ((msb << 4) + (lsb >> 4)) * 1250;
+
+ return ret;
+ }
+diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c
+index 6fa65d118ec12..bc927c0ddd343 100644
+--- a/drivers/power/supply/sbs-charger.c
++++ b/drivers/power/supply/sbs-charger.c
+@@ -18,12 +18,13 @@
+ #include <linux/interrupt.h>
+ #include <linux/regmap.h>
+ #include <linux/bitops.h>
++#include <linux/devm-helpers.h>
+
+ #define SBS_CHARGER_REG_SPEC_INFO 0x11
+ #define SBS_CHARGER_REG_STATUS 0x13
+ #define SBS_CHARGER_REG_ALARM_WARNING 0x16
+
+-#define SBS_CHARGER_STATUS_CHARGE_INHIBITED BIT(1)
++#define SBS_CHARGER_STATUS_CHARGE_INHIBITED BIT(0)
+ #define SBS_CHARGER_STATUS_RES_COLD BIT(9)
+ #define SBS_CHARGER_STATUS_RES_HOT BIT(10)
+ #define SBS_CHARGER_STATUS_BATTERY_PRESENT BIT(14)
+@@ -209,7 +210,12 @@ static int sbs_probe(struct i2c_client *client,
+ if (ret)
+ return dev_err_probe(&client->dev, ret, "Failed to request irq\n");
+ } else {
+- INIT_DELAYED_WORK(&chip->work, sbs_delayed_work);
++ ret = devm_delayed_work_autocancel(&client->dev, &chip->work,
++ sbs_delayed_work);
++ if (ret)
++ return dev_err_probe(&client->dev, ret,
++ "Failed to init work for polling\n");
++
+ schedule_delayed_work(&chip->work,
+ msecs_to_jiffies(SBS_CHARGER_POLL_TIME));
+ }
+@@ -220,15 +226,6 @@ static int sbs_probe(struct i2c_client *client,
+ return 0;
+ }
+
+-static int sbs_remove(struct i2c_client *client)
+-{
+- struct sbs_info *chip = i2c_get_clientdata(client);
+-
+- cancel_delayed_work_sync(&chip->work);
+-
+- return 0;
+-}
+-
+ #ifdef CONFIG_OF
+ static const struct of_device_id sbs_dt_ids[] = {
+ { .compatible = "sbs,sbs-charger" },
+@@ -245,7 +242,6 @@ MODULE_DEVICE_TABLE(i2c, sbs_id);
+
+ static struct i2c_driver sbs_driver = {
+ .probe = sbs_probe,
+- .remove = sbs_remove,
+ .id_table = sbs_id,
+ .driver = {
+ .name = "sbs-charger",
+diff --git a/drivers/power/supply/sc27xx_fuel_gauge.c b/drivers/power/supply/sc27xx_fuel_gauge.c
+index ae45069bd5e1b..3d8a85df87f4d 100644
+--- a/drivers/power/supply/sc27xx_fuel_gauge.c
++++ b/drivers/power/supply/sc27xx_fuel_gauge.c
+@@ -733,13 +733,6 @@ static int sc27xx_fgu_set_property(struct power_supply *psy,
+ return ret;
+ }
+
+-static void sc27xx_fgu_external_power_changed(struct power_supply *psy)
+-{
+- struct sc27xx_fgu_data *data = power_supply_get_drvdata(psy);
+-
+- power_supply_changed(data->battery);
+-}
+-
+ static int sc27xx_fgu_property_is_writeable(struct power_supply *psy,
+ enum power_supply_property psp)
+ {
+@@ -774,7 +767,7 @@ static const struct power_supply_desc sc27xx_fgu_desc = {
+ .num_properties = ARRAY_SIZE(sc27xx_fgu_props),
+ .get_property = sc27xx_fgu_get_property,
+ .set_property = sc27xx_fgu_set_property,
+- .external_power_changed = sc27xx_fgu_external_power_changed,
++ .external_power_changed = power_supply_changed,
+ .property_is_writeable = sc27xx_fgu_property_is_writeable,
+ .no_thermal = true,
+ };
+diff --git a/drivers/power/supply/wm8350_power.c b/drivers/power/supply/wm8350_power.c
+index e05cee457471b..908cfd45d2624 100644
+--- a/drivers/power/supply/wm8350_power.c
++++ b/drivers/power/supply/wm8350_power.c
+@@ -408,44 +408,112 @@ static const struct power_supply_desc wm8350_usb_desc = {
+ * Initialisation
+ *********************************************************************/
+
+-static void wm8350_init_charger(struct wm8350 *wm8350)
++static int wm8350_init_charger(struct wm8350 *wm8350)
+ {
++ int ret;
++
+ /* register our interest in charger events */
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT,
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT,
+ wm8350_charger_handler, 0, "Battery hot", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD,
++ if (ret)
++ goto err;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD,
+ wm8350_charger_handler, 0, "Battery cold", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL,
++ if (ret)
++ goto free_chg_bat_hot;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL,
+ wm8350_charger_handler, 0, "Battery fail", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO,
++ if (ret)
++ goto free_chg_bat_cold;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO,
+ wm8350_charger_handler, 0,
+ "Charger timeout", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END,
++ if (ret)
++ goto free_chg_bat_fail;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END,
+ wm8350_charger_handler, 0,
+ "Charge end", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START,
++ if (ret)
++ goto free_chg_to;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START,
+ wm8350_charger_handler, 0,
+ "Charge start", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY,
++ if (ret)
++ goto free_chg_end;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY,
+ wm8350_charger_handler, 0,
+ "Fast charge ready", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9,
++ if (ret)
++ goto free_chg_start;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9,
+ wm8350_charger_handler, 0,
+ "Battery <3.9V", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1,
++ if (ret)
++ goto free_chg_fast_rdy;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1,
+ wm8350_charger_handler, 0,
+ "Battery <3.1V", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85,
++ if (ret)
++ goto free_chg_vbatt_lt_3p9;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85,
+ wm8350_charger_handler, 0,
+ "Battery <2.85V", wm8350);
++ if (ret)
++ goto free_chg_vbatt_lt_3p1;
+
+ /* and supply change events */
+- wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB,
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB,
+ wm8350_charger_handler, 0, "USB", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB,
++ if (ret)
++ goto free_chg_vbatt_lt_2p85;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB,
+ wm8350_charger_handler, 0, "Wall", wm8350);
+- wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB,
++ if (ret)
++ goto free_ext_usb_fb;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB,
+ wm8350_charger_handler, 0, "Battery", wm8350);
++ if (ret)
++ goto free_ext_wall_fb;
++
++ return 0;
++
++free_ext_wall_fb:
++ wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350);
++free_ext_usb_fb:
++ wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350);
++free_chg_vbatt_lt_2p85:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350);
++free_chg_vbatt_lt_3p1:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350);
++free_chg_vbatt_lt_3p9:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350);
++free_chg_fast_rdy:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350);
++free_chg_start:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350);
++free_chg_end:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350);
++free_chg_to:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350);
++free_chg_bat_fail:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350);
++free_chg_bat_cold:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350);
++free_chg_bat_hot:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350);
++err:
++ return ret;
+ }
+
+ static void free_charger_irq(struct wm8350 *wm8350)
+@@ -456,6 +524,7 @@ static void free_charger_irq(struct wm8350 *wm8350)
+ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350);
+ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350);
+ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350);
++ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350);
+ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350);
+ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350);
+ wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350);
+diff --git a/drivers/power/supply/z2_battery.c b/drivers/power/supply/z2_battery.c
+index 7ed4e4bb26eca..fd33cdf9cf12c 100644
+--- a/drivers/power/supply/z2_battery.c
++++ b/drivers/power/supply/z2_battery.c
+@@ -206,10 +206,12 @@ static int z2_batt_probe(struct i2c_client *client,
+
+ charger->charge_gpiod = devm_gpiod_get_optional(&client->dev,
+ NULL, GPIOD_IN);
+- if (IS_ERR(charger->charge_gpiod))
+- return dev_err_probe(&client->dev,
++ if (IS_ERR(charger->charge_gpiod)) {
++ ret = dev_err_probe(&client->dev,
+ PTR_ERR(charger->charge_gpiod),
+ "failed to get charge GPIO\n");
++ goto err;
++ }
+
+ if (charger->charge_gpiod) {
+ gpiod_set_consumer_name(charger->charge_gpiod, "BATT CHRG");
+diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
+index 8242e8c5ed77e..503797b2a1c69 100644
+--- a/drivers/powercap/Kconfig
++++ b/drivers/powercap/Kconfig
+@@ -18,10 +18,12 @@ if POWERCAP
+ # Client driver configurations go here.
+ config INTEL_RAPL_CORE
+ tristate
++ depends on PCI
++ select IOSF_MBI
+
+ config INTEL_RAPL
+ tristate "Intel RAPL Support via MSR Interface"
+- depends on X86 && IOSF_MBI
++ depends on X86 && PCI
+ select INTEL_RAPL_CORE
+ help
+ This enables support for the Intel Running Average Power Limit (RAPL)
+diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
+index 7c0099e7a6d72..9dfc053878fda 100644
+--- a/drivers/powercap/intel_rapl_common.c
++++ b/drivers/powercap/intel_rapl_common.c
+@@ -938,6 +938,9 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
+ y = value & 0x1f;
+ value = (1 << y) * (4 + f) * rp->time_unit / 4;
+ } else {
++ if (value < rp->time_unit)
++ return 0;
++
+ do_div(value, rp->time_unit);
+ y = ilog2(value);
+ f = div64_u64(4 * (value - (1 << y)), 1 << y);
+@@ -979,7 +982,6 @@ static const struct rapl_defaults rapl_defaults_spr_server = {
+ .check_unit = rapl_check_unit_core,
+ .set_floor_freq = set_floor_freq_default,
+ .compute_time_window = rapl_compute_time_window_core,
+- .dram_domain_energy_unit = 15300,
+ .psys_domain_energy_unit = 1000000000,
+ };
+
+diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
+index 1be45f36ab6cd..c19e69e77093b 100644
+--- a/drivers/powercap/intel_rapl_msr.c
++++ b/drivers/powercap/intel_rapl_msr.c
+@@ -22,7 +22,6 @@
+ #include <linux/processor.h>
+ #include <linux/platform_device.h>
+
+-#include <asm/iosf_mbi.h>
+ #include <asm/cpu_device_id.h>
+ #include <asm/intel-family.h>
+
+diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
+index f0654a932b372..ff736b006198f 100644
+--- a/drivers/powercap/powercap_sys.c
++++ b/drivers/powercap/powercap_sys.c
+@@ -529,9 +529,6 @@ struct powercap_zone *powercap_register_zone(
+ power_zone->name = kstrdup(name, GFP_KERNEL);
+ if (!power_zone->name)
+ goto err_name_alloc;
+- dev_set_name(&power_zone->dev, "%s:%x",
+- dev_name(power_zone->dev.parent),
+- power_zone->id);
+ power_zone->constraints = kcalloc(nr_constraints,
+ sizeof(*power_zone->constraints),
+ GFP_KERNEL);
+@@ -554,9 +551,16 @@ struct powercap_zone *powercap_register_zone(
+ power_zone->dev_attr_groups[0] = &power_zone->dev_zone_attr_group;
+ power_zone->dev_attr_groups[1] = NULL;
+ power_zone->dev.groups = power_zone->dev_attr_groups;
++ dev_set_name(&power_zone->dev, "%s:%x",
++ dev_name(power_zone->dev.parent),
++ power_zone->id);
+ result = device_register(&power_zone->dev);
+- if (result)
+- goto err_dev_ret;
++ if (result) {
++ put_device(&power_zone->dev);
++ mutex_unlock(&control_type->lock);
++
++ return ERR_PTR(result);
++ }
+
+ control_type->nr_zones++;
+ mutex_unlock(&control_type->lock);
+diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c
+index 35799e6401c99..2f4b11b4dfcd9 100644
+--- a/drivers/pps/clients/pps-gpio.c
++++ b/drivers/pps/clients/pps-gpio.c
+@@ -169,7 +169,7 @@ static int pps_gpio_probe(struct platform_device *pdev)
+ /* GPIO setup */
+ ret = pps_gpio_setup(dev);
+ if (ret)
+- return -EINVAL;
++ return ret;
+
+ /* IRQ setup */
+ ret = gpiod_to_irq(data->gpio_pin);
+diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
+index f9b2d66b04433..8a652a367625b 100644
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -317,11 +317,18 @@ no_memory:
+ }
+ EXPORT_SYMBOL(ptp_clock_register);
+
++static int unregister_vclock(struct device *dev, void *data)
++{
++ struct ptp_clock *ptp = dev_get_drvdata(dev);
++
++ ptp_vclock_unregister(info_to_vclock(ptp->info));
++ return 0;
++}
++
+ int ptp_clock_unregister(struct ptp_clock *ptp)
+ {
+ if (ptp_vclock_in_use(ptp)) {
+- pr_err("ptp: virtual clock in use\n");
+- return -EBUSY;
++ device_for_each_child(&ptp->dev, NULL, unregister_vclock);
+ }
+
+ ptp->defunct = 1;
+diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
+index caf9b37c5eb1e..e238ae8e94709 100644
+--- a/drivers/ptp/ptp_ocp.c
++++ b/drivers/ptp/ptp_ocp.c
+@@ -1049,10 +1049,11 @@ ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r)
+ if (!ext)
+ return -ENOMEM;
+
+- err = -EINVAL;
+ ext->mem = ptp_ocp_get_mem(bp, r);
+- if (!ext->mem)
++ if (IS_ERR(ext->mem)) {
++ err = PTR_ERR(ext->mem);
+ goto out;
++ }
+
+ ext->bp = bp;
+ ext->info = r->extra;
+@@ -1122,8 +1123,8 @@ ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+ void __iomem *mem;
+
+ mem = ptp_ocp_get_mem(bp, r);
+- if (!mem)
+- return -EINVAL;
++ if (IS_ERR(mem))
++ return PTR_ERR(mem);
+
+ bp_assign_entry(bp, r, mem);
+
+diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
+index 08f4cf0ad9e3c..8fa9772acf79b 100644
+--- a/drivers/ptp/ptp_qoriq.c
++++ b/drivers/ptp/ptp_qoriq.c
+@@ -601,7 +601,7 @@ static int ptp_qoriq_probe(struct platform_device *dev)
+ return 0;
+
+ no_clock:
+- iounmap(ptp_qoriq->base);
++ iounmap(base);
+ no_ioremap:
+ release_resource(ptp_qoriq->rsrc);
+ no_resource:
+diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
+index 41b92dc2f011a..9233bfedeb174 100644
+--- a/drivers/ptp/ptp_sysfs.c
++++ b/drivers/ptp/ptp_sysfs.c
+@@ -14,7 +14,7 @@ static ssize_t clock_name_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+ {
+ struct ptp_clock *ptp = dev_get_drvdata(dev);
+- return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
++ return sysfs_emit(page, "%s\n", ptp->info->name);
+ }
+ static DEVICE_ATTR_RO(clock_name);
+
+@@ -387,7 +387,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr,
+
+ mutex_unlock(&ptp->pincfg_mux);
+
+- return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan);
++ return sysfs_emit(page, "%u %u\n", func, chan);
+ }
+
+ static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr,
+diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c
+index baee0379482bc..ab1d233173e13 100644
+--- a/drivers/ptp/ptp_vclock.c
++++ b/drivers/ptp/ptp_vclock.c
+@@ -185,8 +185,8 @@ out:
+ }
+ EXPORT_SYMBOL(ptp_get_vclocks_index);
+
+-void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
+- int vclock_index)
++ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps,
++ int vclock_index)
+ {
+ char name[PTP_CLOCK_NAME_LEN] = "";
+ struct ptp_vclock *vclock;
+@@ -198,12 +198,12 @@ void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
+ snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", vclock_index);
+ dev = class_find_device_by_name(ptp_class, name);
+ if (!dev)
+- return;
++ return 0;
+
+ ptp = dev_get_drvdata(dev);
+ if (!ptp->is_virtual_clock) {
+ put_device(dev);
+- return;
++ return 0;
+ }
+
+ vclock = info_to_vclock(ptp->info);
+@@ -215,7 +215,7 @@ void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
+ spin_unlock_irqrestore(&vclock->lock, flags);
+
+ put_device(dev);
+- hwtstamps->hwtstamp = ns_to_ktime(ns);
++ return ns_to_ktime(ns);
+ }
+ EXPORT_SYMBOL(ptp_convert_timestamp);
+ #endif
+diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c
+index ad37bc46f2721..5fa91f4cda7ac 100644
+--- a/drivers/pwm/pwm-ab8500.c
++++ b/drivers/pwm/pwm-ab8500.c
+@@ -96,7 +96,7 @@ static int ab8500_pwm_probe(struct platform_device *pdev)
+ int err;
+
+ if (pdev->id < 1 || pdev->id > 31)
+- return dev_err_probe(&pdev->dev, EINVAL, "Invalid device id %d\n", pdev->id);
++ return dev_err_probe(&pdev->dev, -EINVAL, "Invalid device id %d\n", pdev->id);
+
+ /*
+ * Nothing to be done in probe, this is required to get the
+diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
+index 36f7ea381838d..bb415be73bbe4 100644
+--- a/drivers/pwm/pwm-atmel-tcb.c
++++ b/drivers/pwm/pwm-atmel-tcb.c
+@@ -422,13 +422,14 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+ struct atmel_tcb_pwm_chip *tcbpwm;
+ const struct atmel_tcb_config *config;
+ struct device_node *np = pdev->dev.of_node;
+- struct regmap *regmap;
+- struct clk *clk, *gclk = NULL;
+- struct clk *slow_clk;
+ char clk_name[] = "t0_clk";
+ int err;
+ int channel;
+
++ tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
++ if (tcbpwm == NULL)
++ return -ENOMEM;
++
+ err = of_property_read_u32(np, "reg", &channel);
+ if (err < 0) {
+ dev_err(&pdev->dev,
+@@ -437,49 +438,43 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+ return err;
+ }
+
+- regmap = syscon_node_to_regmap(np->parent);
+- if (IS_ERR(regmap))
+- return PTR_ERR(regmap);
++ tcbpwm->regmap = syscon_node_to_regmap(np->parent);
++ if (IS_ERR(tcbpwm->regmap))
++ return PTR_ERR(tcbpwm->regmap);
+
+- slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
+- if (IS_ERR(slow_clk))
+- return PTR_ERR(slow_clk);
++ tcbpwm->slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
++ if (IS_ERR(tcbpwm->slow_clk))
++ return PTR_ERR(tcbpwm->slow_clk);
+
+ clk_name[1] += channel;
+- clk = of_clk_get_by_name(np->parent, clk_name);
+- if (IS_ERR(clk))
+- clk = of_clk_get_by_name(np->parent, "t0_clk");
+- if (IS_ERR(clk))
+- return PTR_ERR(clk);
++ tcbpwm->clk = of_clk_get_by_name(np->parent, clk_name);
++ if (IS_ERR(tcbpwm->clk))
++ tcbpwm->clk = of_clk_get_by_name(np->parent, "t0_clk");
++ if (IS_ERR(tcbpwm->clk)) {
++ err = PTR_ERR(tcbpwm->clk);
++ goto err_slow_clk;
++ }
+
+ match = of_match_node(atmel_tcb_of_match, np->parent);
+ config = match->data;
+
+ if (config->has_gclk) {
+- gclk = of_clk_get_by_name(np->parent, "gclk");
+- if (IS_ERR(gclk))
+- return PTR_ERR(gclk);
+- }
+-
+- tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
+- if (tcbpwm == NULL) {
+- err = -ENOMEM;
+- goto err_slow_clk;
++ tcbpwm->gclk = of_clk_get_by_name(np->parent, "gclk");
++ if (IS_ERR(tcbpwm->gclk)) {
++ err = PTR_ERR(tcbpwm->gclk);
++ goto err_clk;
++ }
+ }
+
+ tcbpwm->chip.dev = &pdev->dev;
+ tcbpwm->chip.ops = &atmel_tcb_pwm_ops;
+ tcbpwm->chip.npwm = NPWM;
+ tcbpwm->channel = channel;
+- tcbpwm->regmap = regmap;
+- tcbpwm->clk = clk;
+- tcbpwm->gclk = gclk;
+- tcbpwm->slow_clk = slow_clk;
+ tcbpwm->width = config->counter_width;
+
+- err = clk_prepare_enable(slow_clk);
++ err = clk_prepare_enable(tcbpwm->slow_clk);
+ if (err)
+- goto err_slow_clk;
++ goto err_gclk;
+
+ spin_lock_init(&tcbpwm->lock);
+
+@@ -494,23 +489,28 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+ err_disable_clk:
+ clk_disable_unprepare(tcbpwm->slow_clk);
+
++err_gclk:
++ clk_put(tcbpwm->gclk);
++
++err_clk:
++ clk_put(tcbpwm->clk);
++
+ err_slow_clk:
+- clk_put(slow_clk);
++ clk_put(tcbpwm->slow_clk);
+
+ return err;
+ }
+
+-static int atmel_tcb_pwm_remove(struct platform_device *pdev)
++static void atmel_tcb_pwm_remove(struct platform_device *pdev)
+ {
+ struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev);
+
+ pwmchip_remove(&tcbpwm->chip);
+
+ clk_disable_unprepare(tcbpwm->slow_clk);
+- clk_put(tcbpwm->slow_clk);
++ clk_put(tcbpwm->gclk);
+ clk_put(tcbpwm->clk);
+-
+- return 0;
++ clk_put(tcbpwm->slow_clk);
+ }
+
+ static const struct of_device_id atmel_tcb_pwm_dt_ids[] = {
+@@ -564,7 +564,7 @@ static struct platform_driver atmel_tcb_pwm_driver = {
+ .pm = &atmel_tcb_pwm_pm_ops,
+ },
+ .probe = atmel_tcb_pwm_probe,
+- .remove = atmel_tcb_pwm_remove,
++ .remove_new = atmel_tcb_pwm_remove,
+ };
+ module_platform_driver(atmel_tcb_pwm_driver);
+
+diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
+index 5e29d9c682c34..adfd03c11e18c 100644
+--- a/drivers/pwm/pwm-cros-ec.c
++++ b/drivers/pwm/pwm-cros-ec.c
+@@ -157,6 +157,7 @@ static void cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+
+ state->enabled = (ret > 0);
+ state->period = EC_PWM_MAX_DUTY;
++ state->polarity = PWM_POLARITY_NORMAL;
+
+ /*
+ * Note that "disabled" and "duty cycle == 0" are treated the same. If
+diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
+index 333f1b18ff4e6..54035563fc0ef 100644
+--- a/drivers/pwm/pwm-hibvt.c
++++ b/drivers/pwm/pwm-hibvt.c
+@@ -146,6 +146,7 @@ static void hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+
+ value = readl(base + PWM_CTRL_ADDR(pwm->hwpwm));
+ state->enabled = (PWM_ENABLE_MASK & value);
++ state->polarity = (PWM_POLARITY_MASK & value) ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
+ }
+
+ static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c
+index e5e7b7c339a8f..7a53bf51964f2 100644
+--- a/drivers/pwm/pwm-imx-tpm.c
++++ b/drivers/pwm/pwm-imx-tpm.c
+@@ -397,6 +397,13 @@ static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev)
+ if (tpm->enable_count > 0)
+ return -EBUSY;
+
++ /*
++ * Force 'real_period' to be zero to force period update code
++ * can be executed after system resume back, since suspend causes
++ * the period related registers to become their reset values.
++ */
++ tpm->real_period = 0;
++
+ clk_disable_unprepare(tpm->clk);
+
+ return 0;
+diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c
+index 54bd95a5cab0c..8cee8f626d4e7 100644
+--- a/drivers/pwm/pwm-iqs620a.c
++++ b/drivers/pwm/pwm-iqs620a.c
+@@ -126,6 +126,7 @@ static void iqs620_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ mutex_unlock(&iqs620_pwm->lock);
+
+ state->period = IQS620_PWM_PERIOD_NS;
++ state->polarity = PWM_POLARITY_NORMAL;
+ }
+
+ static int iqs620_pwm_notifier(struct notifier_block *notifier,
+diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c
+index ea17d446a6276..2bd04ecb508cf 100644
+--- a/drivers/pwm/pwm-lp3943.c
++++ b/drivers/pwm/pwm-lp3943.c
+@@ -125,6 +125,7 @@ static int lp3943_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+ if (err)
+ return err;
+
++ duty_ns = min(duty_ns, period_ns);
+ val = (u8)(duty_ns * LP3943_MAX_DUTY / period_ns);
+
+ return lp3943_write_byte(lp3943, reg_duty, val);
+diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c
+index 8e461f3baa05a..43b5509dde513 100644
+--- a/drivers/pwm/pwm-lpc18xx-sct.c
++++ b/drivers/pwm/pwm-lpc18xx-sct.c
+@@ -76,6 +76,8 @@
+ #define LPC18XX_PWM_EVENT_PERIOD 0
+ #define LPC18XX_PWM_EVENT_MAX 16
+
++#define LPC18XX_NUM_PWMS 16
++
+ /* SCT conflict resolution */
+ enum lpc18xx_pwm_res_action {
+ LPC18XX_PWM_RES_NONE,
+@@ -96,11 +98,12 @@ struct lpc18xx_pwm_chip {
+ unsigned long clk_rate;
+ unsigned int period_ns;
+ unsigned int min_period_ns;
+- unsigned int max_period_ns;
++ u64 max_period_ns;
+ unsigned int period_event;
+ unsigned long event_map;
+ struct mutex res_lock;
+ struct mutex period_lock;
++ struct lpc18xx_pwm_data channeldata[LPC18XX_NUM_PWMS];
+ };
+
+ static inline struct lpc18xx_pwm_chip *
+@@ -142,40 +145,48 @@ static void lpc18xx_pwm_set_conflict_res(struct lpc18xx_pwm_chip *lpc18xx_pwm,
+ mutex_unlock(&lpc18xx_pwm->res_lock);
+ }
+
+-static void lpc18xx_pwm_config_period(struct pwm_chip *chip, int period_ns)
++static void lpc18xx_pwm_config_period(struct pwm_chip *chip, u64 period_ns)
+ {
+ struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip);
+- u64 val;
++ u32 val;
+
+- val = (u64)period_ns * lpc18xx_pwm->clk_rate;
+- do_div(val, NSEC_PER_SEC);
++ /*
++ * With clk_rate < NSEC_PER_SEC this cannot overflow.
++ * With period_ns < max_period_ns this also fits into an u32.
++ * As period_ns >= min_period_ns = DIV_ROUND_UP(NSEC_PER_SEC, lpc18xx_pwm->clk_rate);
++ * we have val >= 1.
++ */
++ val = mul_u64_u64_div_u64(period_ns, lpc18xx_pwm->clk_rate, NSEC_PER_SEC);
+
+ lpc18xx_pwm_writel(lpc18xx_pwm,
+ LPC18XX_PWM_MATCH(lpc18xx_pwm->period_event),
+- (u32)val - 1);
++ val - 1);
+
+ lpc18xx_pwm_writel(lpc18xx_pwm,
+ LPC18XX_PWM_MATCHREL(lpc18xx_pwm->period_event),
+- (u32)val - 1);
++ val - 1);
+ }
+
+ static void lpc18xx_pwm_config_duty(struct pwm_chip *chip,
+- struct pwm_device *pwm, int duty_ns)
++ struct pwm_device *pwm, u64 duty_ns)
+ {
+ struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip);
+- struct lpc18xx_pwm_data *lpc18xx_data = pwm_get_chip_data(pwm);
+- u64 val;
++ struct lpc18xx_pwm_data *lpc18xx_data = &lpc18xx_pwm->channeldata[pwm->hwpwm];
++ u32 val;
+
+- val = (u64)duty_ns * lpc18xx_pwm->clk_rate;
+- do_div(val, NSEC_PER_SEC);
++ /*
++ * With clk_rate < NSEC_PER_SEC this cannot overflow.
++ * With duty_ns <= period_ns < max_period_ns this also fits into an u32.
++ */
++ val = mul_u64_u64_div_u64(duty_ns, lpc18xx_pwm->clk_rate, NSEC_PER_SEC);
+
+ lpc18xx_pwm_writel(lpc18xx_pwm,
+ LPC18XX_PWM_MATCH(lpc18xx_data->duty_event),
+- (u32)val);
++ val);
+
+ lpc18xx_pwm_writel(lpc18xx_pwm,
+ LPC18XX_PWM_MATCHREL(lpc18xx_data->duty_event),
+- (u32)val);
++ val);
+ }
+
+ static int lpc18xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+@@ -233,7 +244,7 @@ static int lpc18xx_pwm_set_polarity(struct pwm_chip *chip,
+ static int lpc18xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+ {
+ struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip);
+- struct lpc18xx_pwm_data *lpc18xx_data = pwm_get_chip_data(pwm);
++ struct lpc18xx_pwm_data *lpc18xx_data = &lpc18xx_pwm->channeldata[pwm->hwpwm];
+ enum lpc18xx_pwm_res_action res_action;
+ unsigned int set_event, clear_event;
+
+@@ -268,7 +279,7 @@ static int lpc18xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+ static void lpc18xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+ {
+ struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip);
+- struct lpc18xx_pwm_data *lpc18xx_data = pwm_get_chip_data(pwm);
++ struct lpc18xx_pwm_data *lpc18xx_data = &lpc18xx_pwm->channeldata[pwm->hwpwm];
+
+ lpc18xx_pwm_writel(lpc18xx_pwm,
+ LPC18XX_PWM_EVCTRL(lpc18xx_data->duty_event), 0);
+@@ -279,7 +290,7 @@ static void lpc18xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+ static int lpc18xx_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
+ {
+ struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip);
+- struct lpc18xx_pwm_data *lpc18xx_data = pwm_get_chip_data(pwm);
++ struct lpc18xx_pwm_data *lpc18xx_data = &lpc18xx_pwm->channeldata[pwm->hwpwm];
+ unsigned long event;
+
+ event = find_first_zero_bit(&lpc18xx_pwm->event_map,
+@@ -300,7 +311,7 @@ static int lpc18xx_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
+ static void lpc18xx_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
+ {
+ struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip);
+- struct lpc18xx_pwm_data *lpc18xx_data = pwm_get_chip_data(pwm);
++ struct lpc18xx_pwm_data *lpc18xx_data = &lpc18xx_pwm->channeldata[pwm->hwpwm];
+
+ clear_bit(lpc18xx_data->duty_event, &lpc18xx_pwm->event_map);
+ }
+@@ -324,8 +335,7 @@ MODULE_DEVICE_TABLE(of, lpc18xx_pwm_of_match);
+ static int lpc18xx_pwm_probe(struct platform_device *pdev)
+ {
+ struct lpc18xx_pwm_chip *lpc18xx_pwm;
+- struct pwm_device *pwm;
+- int ret, i;
++ int ret;
+ u64 val;
+
+ lpc18xx_pwm = devm_kzalloc(&pdev->dev, sizeof(*lpc18xx_pwm),
+@@ -358,19 +368,34 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
+ goto disable_pwmclk;
+ }
+
++ /*
++ * If clkrate is too fast, the calculations in .apply() might overflow.
++ */
++ if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC) {
++ ret = dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
++ goto disable_pwmclk;
++ }
++
++ /*
++ * If clkrate is too fast, the calculations in .apply() might overflow.
++ */
++ if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC) {
++ ret = dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
++ goto disable_pwmclk;
++ }
++
+ mutex_init(&lpc18xx_pwm->res_lock);
+ mutex_init(&lpc18xx_pwm->period_lock);
+
+- val = (u64)NSEC_PER_SEC * LPC18XX_PWM_TIMER_MAX;
+- do_div(val, lpc18xx_pwm->clk_rate);
+- lpc18xx_pwm->max_period_ns = val;
++ lpc18xx_pwm->max_period_ns =
++ mul_u64_u64_div_u64(NSEC_PER_SEC, LPC18XX_PWM_TIMER_MAX, lpc18xx_pwm->clk_rate);
+
+ lpc18xx_pwm->min_period_ns = DIV_ROUND_UP(NSEC_PER_SEC,
+ lpc18xx_pwm->clk_rate);
+
+ lpc18xx_pwm->chip.dev = &pdev->dev;
+ lpc18xx_pwm->chip.ops = &lpc18xx_pwm_ops;
+- lpc18xx_pwm->chip.npwm = 16;
++ lpc18xx_pwm->chip.npwm = LPC18XX_NUM_PWMS;
+
+ /* SCT counter must be in unify (32 bit) mode */
+ lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CONFIG,
+@@ -395,40 +420,23 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
+ lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_LIMIT,
+ BIT(lpc18xx_pwm->period_event));
+
++ val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL);
++ val &= ~LPC18XX_PWM_BIDIR;
++ val &= ~LPC18XX_PWM_CTRL_HALT;
++ val &= ~LPC18XX_PWM_PRE_MASK;
++ val |= LPC18XX_PWM_PRE(0);
++ lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val);
++
+ ret = pwmchip_add(&lpc18xx_pwm->chip);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "pwmchip_add failed: %d\n", ret);
+ goto disable_pwmclk;
+ }
+
+- for (i = 0; i < lpc18xx_pwm->chip.npwm; i++) {
+- struct lpc18xx_pwm_data *data;
+-
+- pwm = &lpc18xx_pwm->chip.pwms[i];
+-
+- data = devm_kzalloc(lpc18xx_pwm->dev, sizeof(*data),
+- GFP_KERNEL);
+- if (!data) {
+- ret = -ENOMEM;
+- goto remove_pwmchip;
+- }
+-
+- pwm_set_chip_data(pwm, data);
+- }
+-
+ platform_set_drvdata(pdev, lpc18xx_pwm);
+
+- val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL);
+- val &= ~LPC18XX_PWM_BIDIR;
+- val &= ~LPC18XX_PWM_CTRL_HALT;
+- val &= ~LPC18XX_PWM_PRE_MASK;
+- val |= LPC18XX_PWM_PRE(0);
+- lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val);
+-
+ return 0;
+
+-remove_pwmchip:
+- pwmchip_remove(&lpc18xx_pwm->chip);
+ disable_pwmclk:
+ clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
+ return ret;
+diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c
+index ddeab5687cb81..45b613dbc1c7b 100644
+--- a/drivers/pwm/pwm-lpc32xx.c
++++ b/drivers/pwm/pwm-lpc32xx.c
+@@ -51,10 +51,10 @@ static int lpc32xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+ if (duty_cycles > 255)
+ duty_cycles = 255;
+
+- val = readl(lpc32xx->base + (pwm->hwpwm << 2));
++ val = readl(lpc32xx->base);
+ val &= ~0xFFFF;
+ val |= (period_cycles << 8) | duty_cycles;
+- writel(val, lpc32xx->base + (pwm->hwpwm << 2));
++ writel(val, lpc32xx->base);
+
+ return 0;
+ }
+@@ -69,9 +69,9 @@ static int lpc32xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+ if (ret)
+ return ret;
+
+- val = readl(lpc32xx->base + (pwm->hwpwm << 2));
++ val = readl(lpc32xx->base);
+ val |= PWM_ENABLE;
+- writel(val, lpc32xx->base + (pwm->hwpwm << 2));
++ writel(val, lpc32xx->base);
+
+ return 0;
+ }
+@@ -81,9 +81,9 @@ static void lpc32xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+ struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
+ u32 val;
+
+- val = readl(lpc32xx->base + (pwm->hwpwm << 2));
++ val = readl(lpc32xx->base);
+ val &= ~PWM_ENABLE;
+- writel(val, lpc32xx->base + (pwm->hwpwm << 2));
++ writel(val, lpc32xx->base);
+
+ clk_disable_unprepare(lpc32xx->clk);
+ }
+@@ -118,9 +118,9 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev)
+ lpc32xx->chip.npwm = 1;
+
+ /* If PWM is disabled, configure the output to the default value */
+- val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
++ val = readl(lpc32xx->base);
+ val &= ~PWM_PIN_LEVEL;
+- writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
++ writel(val, lpc32xx->base);
+
+ ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip);
+ if (ret < 0) {
+diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
+index 0d4dd80e9f07f..f8f9a74891293 100644
+--- a/drivers/pwm/pwm-mediatek.c
++++ b/drivers/pwm/pwm-mediatek.c
+@@ -275,7 +275,7 @@ static const struct pwm_mediatek_of_data mt2712_pwm_data = {
+ static const struct pwm_mediatek_of_data mt7622_pwm_data = {
+ .num_pwms = 6,
+ .pwm45_fixup = false,
+- .has_ck_26m_sel = false,
++ .has_ck_26m_sel = true,
+ };
+
+ static const struct pwm_mediatek_of_data mt7623_pwm_data = {
+diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
+index 3cf3bcf5ddfcc..ec6a544d6f526 100644
+--- a/drivers/pwm/pwm-meson.c
++++ b/drivers/pwm/pwm-meson.c
+@@ -147,12 +147,13 @@ static int meson_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
+ return err;
+ }
+
+- return pwm_set_chip_data(pwm, channel);
++ return 0;
+ }
+
+ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
+ {
+- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
++ struct meson_pwm *meson = to_meson_pwm(chip);
++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm];
+
+ if (channel)
+ clk_disable_unprepare(channel->clk);
+@@ -161,13 +162,20 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
+ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
+ const struct pwm_state *state)
+ {
+- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
+- unsigned int duty, period, pre_div, cnt, duty_cnt;
++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm];
++ unsigned int pre_div, cnt, duty_cnt;
+ unsigned long fin_freq;
++ u64 duty, period;
+
+ duty = state->duty_cycle;
+ period = state->period;
+
++ /*
++ * Note this is wrong. The result is an output wave that isn't really
++ * inverted and so is wrongly identified by .get_state as normal.
++ * Fixing this needs some care however as some machines might rely on
++ * this.
++ */
+ if (state->polarity == PWM_POLARITY_INVERSED)
+ duty = period - duty;
+
+@@ -179,19 +187,19 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
+
+ dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq);
+
+- pre_div = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * 0xffffLL);
++ pre_div = div64_u64(fin_freq * period, NSEC_PER_SEC * 0xffffLL);
+ if (pre_div > MISC_CLK_DIV_MASK) {
+ dev_err(meson->chip.dev, "unable to get period pre_div\n");
+ return -EINVAL;
+ }
+
+- cnt = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * (pre_div + 1));
++ cnt = div64_u64(fin_freq * period, NSEC_PER_SEC * (pre_div + 1));
+ if (cnt > 0xffff) {
+ dev_err(meson->chip.dev, "unable to get period cnt\n");
+ return -EINVAL;
+ }
+
+- dev_dbg(meson->chip.dev, "period=%u pre_div=%u cnt=%u\n", period,
++ dev_dbg(meson->chip.dev, "period=%llu pre_div=%u cnt=%u\n", period,
+ pre_div, cnt);
+
+ if (duty == period) {
+@@ -204,14 +212,13 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
+ channel->lo = cnt;
+ } else {
+ /* Then check is we can have the duty with the same pre_div */
+- duty_cnt = div64_u64(fin_freq * (u64)duty,
+- NSEC_PER_SEC * (pre_div + 1));
++ duty_cnt = div64_u64(fin_freq * duty, NSEC_PER_SEC * (pre_div + 1));
+ if (duty_cnt > 0xffff) {
+ dev_err(meson->chip.dev, "unable to get duty cycle\n");
+ return -EINVAL;
+ }
+
+- dev_dbg(meson->chip.dev, "duty=%u pre_div=%u duty_cnt=%u\n",
++ dev_dbg(meson->chip.dev, "duty=%llu pre_div=%u duty_cnt=%u\n",
+ duty, pre_div, duty_cnt);
+
+ channel->pre_div = pre_div;
+@@ -224,7 +231,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
+
+ static void meson_pwm_enable(struct meson_pwm *meson, struct pwm_device *pwm)
+ {
+- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm];
+ struct meson_pwm_channel_data *channel_data;
+ unsigned long flags;
+ u32 value;
+@@ -267,8 +274,8 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm)
+ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+ {
+- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
+ struct meson_pwm *meson = to_meson_pwm(chip);
++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm];
+ int err = 0;
+
+ if (!state)
+@@ -366,6 +373,7 @@ static void meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ state->period = 0;
+ state->duty_cycle = 0;
+ }
++ state->polarity = PWM_POLARITY_NORMAL;
+ }
+
+ static const struct pwm_ops meson_pwm_ops = {
+@@ -417,7 +425,7 @@ static const struct meson_pwm_data pwm_axg_ee_data = {
+ };
+
+ static const char * const pwm_axg_ao_parent_names[] = {
+- "aoclk81", "xtal", "fclk_div4", "fclk_div5"
++ "xtal", "axg_ao_clk81", "fclk_div4", "fclk_div5"
+ };
+
+ static const struct meson_pwm_data pwm_axg_ao_data = {
+@@ -426,7 +434,7 @@ static const struct meson_pwm_data pwm_axg_ao_data = {
+ };
+
+ static const char * const pwm_g12a_ao_ab_parent_names[] = {
+- "xtal", "aoclk81", "fclk_div4", "fclk_div5"
++ "xtal", "g12a_ao_clk81", "fclk_div4", "fclk_div5"
+ };
+
+ static const struct meson_pwm_data pwm_g12a_ao_ab_data = {
+@@ -435,7 +443,7 @@ static const struct meson_pwm_data pwm_g12a_ao_ab_data = {
+ };
+
+ static const char * const pwm_g12a_ao_cd_parent_names[] = {
+- "xtal", "aoclk81",
++ "xtal", "g12a_ao_clk81",
+ };
+
+ static const struct meson_pwm_data pwm_g12a_ao_cd_data = {
+diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c
+index c605013e4114c..a581d8adab59c 100644
+--- a/drivers/pwm/pwm-mtk-disp.c
++++ b/drivers/pwm/pwm-mtk-disp.c
+@@ -79,14 +79,11 @@ static int mtk_disp_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+- if (!state->enabled) {
+- mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
+- 0x0);
+-
+- if (mdp->enabled) {
+- clk_disable_unprepare(mdp->clk_mm);
+- clk_disable_unprepare(mdp->clk_main);
+- }
++ if (!state->enabled && mdp->enabled) {
++ mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN,
++ mdp->data->enable_mask, 0x0);
++ clk_disable_unprepare(mdp->clk_mm);
++ clk_disable_unprepare(mdp->clk_main);
+
+ mdp->enabled = false;
+ return 0;
+@@ -138,6 +135,19 @@ static int mtk_disp_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ high_width = mul_u64_u64_div_u64(state->duty_cycle, rate, div);
+ value = period | (high_width << PWM_HIGH_WIDTH_SHIFT);
+
++ if (mdp->data->bls_debug && !mdp->data->has_commit) {
++ /*
++ * For MT2701, disable double buffer before writing register
++ * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH.
++ */
++ mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug,
++ mdp->data->bls_debug_mask,
++ mdp->data->bls_debug_mask);
++ mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
++ mdp->data->con0_sel,
++ mdp->data->con0_sel);
++ }
++
+ mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
+ PWM_CLKDIV_MASK,
+ clk_div << PWM_CLKDIV_SHIFT);
+@@ -152,17 +162,6 @@ static int mtk_disp_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ mtk_disp_pwm_update_bits(mdp, mdp->data->commit,
+ mdp->data->commit_mask,
+ 0x0);
+- } else {
+- /*
+- * For MT2701, disable double buffer before writing register
+- * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH.
+- */
+- mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug,
+- mdp->data->bls_debug_mask,
+- mdp->data->bls_debug_mask);
+- mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
+- mdp->data->con0_sel,
+- mdp->data->con0_sel);
+ }
+
+ mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
+@@ -178,7 +177,7 @@ static void mtk_disp_pwm_get_state(struct pwm_chip *chip,
+ {
+ struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip);
+ u64 rate, period, high_width;
+- u32 clk_div, con0, con1;
++ u32 clk_div, pwm_en, con0, con1;
+ int err;
+
+ err = clk_prepare_enable(mdp->clk_main);
+@@ -194,10 +193,21 @@ static void mtk_disp_pwm_get_state(struct pwm_chip *chip,
+ return;
+ }
+
++ /*
++ * Apply DISP_PWM_DEBUG settings to choose whether to enable or disable
++ * registers double buffer and manual commit to working register before
++ * performing any read/write operation
++ */
++ if (mdp->data->bls_debug)
++ mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug,
++ mdp->data->bls_debug_mask,
++ mdp->data->bls_debug_mask);
++
+ rate = clk_get_rate(mdp->clk_main);
+ con0 = readl(mdp->base + mdp->data->con0);
+ con1 = readl(mdp->base + mdp->data->con1);
+- state->enabled = !!(con0 & BIT(0));
++ pwm_en = readl(mdp->base + DISP_PWM_EN);
++ state->enabled = !!(pwm_en & mdp->data->enable_mask);
+ clk_div = FIELD_GET(PWM_CLKDIV_MASK, con0);
+ period = FIELD_GET(PWM_PERIOD_MASK, con1);
+ /*
+diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c
+index 579a15240e0a8..c877de37734d9 100644
+--- a/drivers/pwm/pwm-raspberrypi-poe.c
++++ b/drivers/pwm/pwm-raspberrypi-poe.c
+@@ -66,7 +66,7 @@ static int raspberrypi_pwm_get_property(struct rpi_firmware *firmware,
+ u32 reg, u32 *val)
+ {
+ struct raspberrypi_pwm_prop msg = {
+- .reg = reg
++ .reg = cpu_to_le32(reg),
+ };
+ int ret;
+
+diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c
+index 253c4a17d2553..41a6fc47cc163 100644
+--- a/drivers/pwm/pwm-sifive.c
++++ b/drivers/pwm/pwm-sifive.c
+@@ -23,7 +23,7 @@
+ #define PWM_SIFIVE_PWMCFG 0x0
+ #define PWM_SIFIVE_PWMCOUNT 0x8
+ #define PWM_SIFIVE_PWMS 0x10
+-#define PWM_SIFIVE_PWMCMP0 0x20
++#define PWM_SIFIVE_PWMCMP(i) (0x20 + 4 * (i))
+
+ /* PWMCFG fields */
+ #define PWM_SIFIVE_PWMCFG_SCALE GENMASK(3, 0)
+@@ -36,14 +36,12 @@
+ #define PWM_SIFIVE_PWMCFG_GANG BIT(24)
+ #define PWM_SIFIVE_PWMCFG_IP BIT(28)
+
+-/* PWM_SIFIVE_SIZE_PWMCMP is used to calculate offset for pwmcmpX registers */
+-#define PWM_SIFIVE_SIZE_PWMCMP 4
+ #define PWM_SIFIVE_CMPWIDTH 16
+ #define PWM_SIFIVE_DEFAULT_PERIOD 10000000
+
+ struct pwm_sifive_ddata {
+ struct pwm_chip chip;
+- struct mutex lock; /* lock to protect user_count */
++ struct mutex lock; /* lock to protect user_count and approx_period */
+ struct notifier_block notifier;
+ struct clk *clk;
+ void __iomem *regs;
+@@ -78,6 +76,7 @@ static void pwm_sifive_free(struct pwm_chip *chip, struct pwm_device *pwm)
+ mutex_unlock(&ddata->lock);
+ }
+
++/* Called holding ddata->lock */
+ static void pwm_sifive_update_clock(struct pwm_sifive_ddata *ddata,
+ unsigned long rate)
+ {
+@@ -112,8 +111,7 @@ static void pwm_sifive_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
+ u32 duty, val;
+
+- duty = readl(ddata->regs + PWM_SIFIVE_PWMCMP0 +
+- pwm->hwpwm * PWM_SIFIVE_SIZE_PWMCMP);
++ duty = readl(ddata->regs + PWM_SIFIVE_PWMCMP(pwm->hwpwm));
+
+ state->enabled = duty > 0;
+
+@@ -166,7 +164,6 @@ static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ return ret;
+ }
+
+- mutex_lock(&ddata->lock);
+ cur_state = pwm->state;
+ enabled = cur_state.enabled;
+
+@@ -185,24 +182,31 @@ static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ /* The hardware cannot generate a 100% duty cycle */
+ frac = min(frac, (1U << PWM_SIFIVE_CMPWIDTH) - 1);
+
++ mutex_lock(&ddata->lock);
+ if (state->period != ddata->approx_period) {
+- if (ddata->user_count != 1) {
++ /*
++ * Don't let a 2nd user change the period underneath the 1st user.
++ * However if ddate->approx_period == 0 this is the first time we set
++ * any period, so let whoever gets here first set the period so other
++ * users who agree on the period won't fail.
++ */
++ if (ddata->user_count != 1 && ddata->approx_period) {
++ mutex_unlock(&ddata->lock);
+ ret = -EBUSY;
+ goto exit;
+ }
+ ddata->approx_period = state->period;
+ pwm_sifive_update_clock(ddata, clk_get_rate(ddata->clk));
+ }
++ mutex_unlock(&ddata->lock);
+
+- writel(frac, ddata->regs + PWM_SIFIVE_PWMCMP0 +
+- pwm->hwpwm * PWM_SIFIVE_SIZE_PWMCMP);
++ writel(frac, ddata->regs + PWM_SIFIVE_PWMCMP(pwm->hwpwm));
+
+ if (state->enabled != enabled)
+ pwm_sifive_enable(chip, state->enabled);
+
+ exit:
+ clk_disable(ddata->clk);
+- mutex_unlock(&ddata->lock);
+ return ret;
+ }
+
+@@ -221,8 +225,11 @@ static int pwm_sifive_clock_notifier(struct notifier_block *nb,
+ struct pwm_sifive_ddata *ddata =
+ container_of(nb, struct pwm_sifive_ddata, notifier);
+
+- if (event == POST_RATE_CHANGE)
++ if (event == POST_RATE_CHANGE) {
++ mutex_lock(&ddata->lock);
+ pwm_sifive_update_clock(ddata, ndata->new_rate);
++ mutex_unlock(&ddata->lock);
++ }
+
+ return NOTIFY_OK;
+ }
+@@ -233,6 +240,8 @@ static int pwm_sifive_probe(struct platform_device *pdev)
+ struct pwm_sifive_ddata *ddata;
+ struct pwm_chip *chip;
+ int ret;
++ u32 val;
++ unsigned int enabled_pwms = 0, enabled_clks = 1;
+
+ ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
+ if (!ddata)
+@@ -259,6 +268,33 @@ static int pwm_sifive_probe(struct platform_device *pdev)
+ return ret;
+ }
+
++ val = readl(ddata->regs + PWM_SIFIVE_PWMCFG);
++ if (val & PWM_SIFIVE_PWMCFG_EN_ALWAYS) {
++ unsigned int i;
++
++ for (i = 0; i < chip->npwm; ++i) {
++ val = readl(ddata->regs + PWM_SIFIVE_PWMCMP(i));
++ if (val > 0)
++ ++enabled_pwms;
++ }
++ }
++
++ /* The clk should be on once for each running PWM. */
++ if (enabled_pwms) {
++ while (enabled_clks < enabled_pwms) {
++ /* This is not expected to fail as the clk is already on */
++ ret = clk_enable(ddata->clk);
++ if (unlikely(ret)) {
++ dev_err_probe(dev, ret, "Failed to enable clk\n");
++ goto disable_clk;
++ }
++ ++enabled_clks;
++ }
++ } else {
++ clk_disable(ddata->clk);
++ enabled_clks = 0;
++ }
++
+ /* Watch for changes to underlying clock frequency */
+ ddata->notifier.notifier_call = pwm_sifive_clock_notifier;
+ ret = clk_notifier_register(ddata->clk, &ddata->notifier);
+@@ -281,7 +317,11 @@ static int pwm_sifive_probe(struct platform_device *pdev)
+ unregister_clk:
+ clk_notifier_unregister(ddata->clk, &ddata->notifier);
+ disable_clk:
+- clk_disable_unprepare(ddata->clk);
++ while (enabled_clks) {
++ clk_disable(ddata->clk);
++ --enabled_clks;
++ }
++ clk_unprepare(ddata->clk);
+
+ return ret;
+ }
+@@ -289,23 +329,19 @@ disable_clk:
+ static int pwm_sifive_remove(struct platform_device *dev)
+ {
+ struct pwm_sifive_ddata *ddata = platform_get_drvdata(dev);
+- bool is_enabled = false;
+ struct pwm_device *pwm;
+ int ch;
+
++ pwmchip_remove(&ddata->chip);
++ clk_notifier_unregister(ddata->clk, &ddata->notifier);
++
+ for (ch = 0; ch < ddata->chip.npwm; ch++) {
+ pwm = &ddata->chip.pwms[ch];
+- if (pwm->state.enabled) {
+- is_enabled = true;
+- break;
+- }
++ if (pwm->state.enabled)
++ clk_disable(ddata->clk);
+ }
+- if (is_enabled)
+- clk_disable(ddata->clk);
+
+- clk_disable_unprepare(ddata->clk);
+- pwmchip_remove(&ddata->chip);
+- clk_notifier_unregister(ddata->clk, &ddata->notifier);
++ clk_unprepare(ddata->clk);
+
+ return 0;
+ }
+diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
+index 7004f55bbf115..869e696a503f0 100644
+--- a/drivers/pwm/pwm-sprd.c
++++ b/drivers/pwm/pwm-sprd.c
+@@ -109,6 +109,7 @@ static void sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ duty = val & SPRD_PWM_DUTY_MSK;
+ tmp = (prescale + 1) * NSEC_PER_SEC * duty;
+ state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate);
++ state->polarity = PWM_POLARITY_NORMAL;
+
+ /* Disable PWM clocks if the PWM channel is not in enable state. */
+ if (!state->enabled)
+diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
+index 3115abb3f52ab..61a1c87cd5016 100644
+--- a/drivers/pwm/pwm-stm32-lp.c
++++ b/drivers/pwm/pwm-stm32-lp.c
+@@ -127,7 +127,7 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+
+ /* ensure CMP & ARR registers are properly written */
+ ret = regmap_read_poll_timeout(priv->regmap, STM32_LPTIM_ISR, val,
+- (val & STM32_LPTIM_CMPOK_ARROK),
++ (val & STM32_LPTIM_CMPOK_ARROK) == STM32_LPTIM_CMPOK_ARROK,
+ 100, 1000);
+ if (ret) {
+ dev_err(priv->chip.dev, "ARR/CMP registers write issue\n");
+diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
+index 11a10b575ace9..6a1ff9d42f795 100644
+--- a/drivers/pwm/pwm-tegra.c
++++ b/drivers/pwm/pwm-tegra.c
+@@ -142,8 +142,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+ * source clock rate as required_clk_rate, PWM controller will
+ * be able to configure the requested period.
+ */
+- required_clk_rate =
+- (NSEC_PER_SEC / period_ns) << PWM_DUTY_WIDTH;
++ required_clk_rate = DIV_ROUND_UP_ULL((u64)NSEC_PER_SEC << PWM_DUTY_WIDTH,
++ period_ns);
+
+ err = clk_set_rate(pc->clk, required_clk_rate);
+ if (err < 0)
+diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
+index 9903c3a7ecedc..b8417a8d2ef97 100644
+--- a/drivers/pwm/sysfs.c
++++ b/drivers/pwm/sysfs.c
+@@ -424,6 +424,13 @@ static int pwm_class_resume_npwm(struct device *parent, unsigned int npwm)
+ if (!export)
+ continue;
+
++ /* If pwmchip was not enabled before suspend, do nothing. */
++ if (!export->suspend.enabled) {
++ /* release lock taken in pwm_class_get_state */
++ mutex_unlock(&export->lock);
++ continue;
++ }
++
+ state.enabled = export->suspend.enabled;
+ ret = pwm_class_apply_state(export, pwm, &state);
+ if (ret < 0)
+@@ -448,7 +455,17 @@ static int __maybe_unused pwm_class_suspend(struct device *parent)
+ if (!export)
+ continue;
+
++ /*
++ * If pwmchip was not enabled before suspend, save
++ * state for resume time and do nothing else.
++ */
+ export->suspend = state;
++ if (!state.enabled) {
++ /* release lock taken in pwm_class_get_state */
++ mutex_unlock(&export->lock);
++ continue;
++ }
++
+ state.enabled = false;
+ ret = pwm_class_apply_state(export, pwm, &state);
+ if (ret < 0) {
+diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
+index 94331d999d273..5ac2dc1e2abd8 100644
+--- a/drivers/rapidio/devices/rio_mport_cdev.c
++++ b/drivers/rapidio/devices/rio_mport_cdev.c
+@@ -1803,8 +1803,11 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
+ rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
+ 0, 0xffff);
+ err = rio_add_device(rdev);
+- if (err)
+- goto cleanup;
++ if (err) {
++ put_device(&rdev->dev);
++ return err;
++ }
++
+ rio_dev_get(rdev);
+
+ return 0;
+@@ -1900,10 +1903,6 @@ static int mport_cdev_open(struct inode *inode, struct file *filp)
+
+ priv->md = chdev;
+
+- mutex_lock(&chdev->file_mutex);
+- list_add_tail(&priv->list, &chdev->file_list);
+- mutex_unlock(&chdev->file_mutex);
+-
+ INIT_LIST_HEAD(&priv->db_filters);
+ INIT_LIST_HEAD(&priv->pw_filters);
+ spin_lock_init(&priv->fifo_lock);
+@@ -1912,6 +1911,7 @@ static int mport_cdev_open(struct inode *inode, struct file *filp)
+ sizeof(struct rio_event) * MPORT_EVENT_DEPTH,
+ GFP_KERNEL);
+ if (ret < 0) {
++ put_device(&chdev->dev);
+ dev_err(&chdev->dev, DRV_NAME ": kfifo_alloc failed\n");
+ ret = -ENOMEM;
+ goto err_fifo;
+@@ -1922,6 +1922,9 @@ static int mport_cdev_open(struct inode *inode, struct file *filp)
+ spin_lock_init(&priv->req_lock);
+ mutex_init(&priv->dma_lock);
+ #endif
++ mutex_lock(&chdev->file_mutex);
++ list_add_tail(&priv->list, &chdev->file_list);
++ mutex_unlock(&chdev->file_mutex);
+
+ filp->private_data = priv;
+ goto out;
+diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
+index 19b0c33f4a62a..fdcf742b2adbc 100644
+--- a/drivers/rapidio/rio-scan.c
++++ b/drivers/rapidio/rio-scan.c
+@@ -454,8 +454,12 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
+ 0, 0xffff);
+
+ ret = rio_add_device(rdev);
+- if (ret)
+- goto cleanup;
++ if (ret) {
++ if (rswitch)
++ kfree(rswitch->route_table);
++ put_device(&rdev->dev);
++ return NULL;
++ }
+
+ rio_dev_get(rdev);
+
+diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
+index e74cf09eeff07..9544b8ee0c963 100644
+--- a/drivers/rapidio/rio.c
++++ b/drivers/rapidio/rio.c
+@@ -2186,11 +2186,16 @@ int rio_register_mport(struct rio_mport *port)
+ atomic_set(&port->state, RIO_DEVICE_RUNNING);
+
+ res = device_register(&port->dev);
+- if (res)
++ if (res) {
+ dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
+ port->id, res);
+- else
++ mutex_lock(&rio_mport_list_lock);
++ list_del(&port->node);
++ mutex_unlock(&rio_mport_list_lock);
++ put_device(&port->dev);
++ } else {
+ dev_dbg(&port->dev, "RIO: registered mport%d\n", port->id);
++ }
+
+ return res;
+ }
+diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c
+index 05147d2c38428..485e58b264c04 100644
+--- a/drivers/regulator/atc260x-regulator.c
++++ b/drivers/regulator/atc260x-regulator.c
+@@ -292,6 +292,7 @@ enum atc2603c_reg_ids {
+ .bypass_mask = BIT(5), \
+ .active_discharge_reg = ATC2603C_PMU_SWITCH_CTL, \
+ .active_discharge_mask = BIT(1), \
++ .active_discharge_on = BIT(1), \
+ .owner = THIS_MODULE, \
+ }
+
+diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
+index ca6caba8a191a..ebde10e744343 100644
+--- a/drivers/regulator/core.c
++++ b/drivers/regulator/core.c
+@@ -206,6 +206,78 @@ static void regulator_unlock(struct regulator_dev *rdev)
+ mutex_unlock(&regulator_nesting_mutex);
+ }
+
++/**
++ * regulator_lock_two - lock two regulators
++ * @rdev1: first regulator
++ * @rdev2: second regulator
++ * @ww_ctx: w/w mutex acquire context
++ *
++ * Locks both rdevs using the regulator_ww_class.
++ */
++static void regulator_lock_two(struct regulator_dev *rdev1,
++ struct regulator_dev *rdev2,
++ struct ww_acquire_ctx *ww_ctx)
++{
++ struct regulator_dev *tmp;
++ int ret;
++
++ ww_acquire_init(ww_ctx, &regulator_ww_class);
++
++ /* Try to just grab both of them */
++ ret = regulator_lock_nested(rdev1, ww_ctx);
++ WARN_ON(ret);
++ ret = regulator_lock_nested(rdev2, ww_ctx);
++ if (ret != -EDEADLOCK) {
++ WARN_ON(ret);
++ goto exit;
++ }
++
++ while (true) {
++ /*
++ * Start of loop: rdev1 was locked and rdev2 was contended.
++ * Need to unlock rdev1, slowly lock rdev2, then try rdev1
++ * again.
++ */
++ regulator_unlock(rdev1);
++
++ ww_mutex_lock_slow(&rdev2->mutex, ww_ctx);
++ rdev2->ref_cnt++;
++ rdev2->mutex_owner = current;
++ ret = regulator_lock_nested(rdev1, ww_ctx);
++
++ if (ret == -EDEADLOCK) {
++ /* More contention; swap which needs to be slow */
++ tmp = rdev1;
++ rdev1 = rdev2;
++ rdev2 = tmp;
++ } else {
++ WARN_ON(ret);
++ break;
++ }
++ }
++
++exit:
++ ww_acquire_done(ww_ctx);
++}
++
++/**
++ * regulator_unlock_two - unlock two regulators
++ * @rdev1: first regulator
++ * @rdev2: second regulator
++ * @ww_ctx: w/w mutex acquire context
++ *
++ * The inverse of regulator_lock_two().
++ */
++
++static void regulator_unlock_two(struct regulator_dev *rdev1,
++ struct regulator_dev *rdev2,
++ struct ww_acquire_ctx *ww_ctx)
++{
++ regulator_unlock(rdev2);
++ regulator_unlock(rdev1);
++ ww_acquire_fini(ww_ctx);
++}
++
+ static bool regulator_supply_is_couple(struct regulator_dev *rdev)
+ {
+ struct regulator_dev *c_rdev;
+@@ -333,6 +405,7 @@ static void regulator_lock_dependent(struct regulator_dev *rdev,
+ ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx);
+ old_contended_rdev = new_contended_rdev;
+ old_contended_rdev->ref_cnt++;
++ old_contended_rdev->mutex_owner = current;
+ }
+
+ err = regulator_lock_recursive(rdev,
+@@ -962,7 +1035,7 @@ static int drms_uA_update(struct regulator_dev *rdev)
+ /* get input voltage */
+ input_uV = 0;
+ if (rdev->supply)
+- input_uV = regulator_get_voltage(rdev->supply);
++ input_uV = regulator_get_voltage_rdev(rdev->supply->rdev);
+ if (input_uV <= 0)
+ input_uV = rdev->constraints->input_uV;
+ if (input_uV <= 0) {
+@@ -1521,6 +1594,24 @@ static int set_machine_constraints(struct regulator_dev *rdev)
+ }
+ }
+
++ /*
++ * If there is no mechanism for controlling the regulator then
++ * flag it as always_on so we don't end up duplicating checks
++ * for this so much. Note that we could control the state of
++ * a supply to control the output on a regulator that has no
++ * direct control.
++ */
++ if (!rdev->ena_pin && !ops->enable) {
++ if (rdev->supply_name && !rdev->supply)
++ return -EPROBE_DEFER;
++
++ if (rdev->supply)
++ rdev->constraints->always_on =
++ rdev->supply->rdev->constraints->always_on;
++ else
++ rdev->constraints->always_on = true;
++ }
++
+ /* If the constraints say the regulator should be on at this point
+ * and we have control then make sure it is enabled.
+ */
+@@ -1531,7 +1622,13 @@ static int set_machine_constraints(struct regulator_dev *rdev)
+ if (rdev->supply_name && !rdev->supply)
+ return -EPROBE_DEFER;
+
+- if (rdev->supply) {
++ /* If supplying regulator has already been enabled,
++ * it's not intended to have use_count increment
++ * when rdev is only boot-on.
++ */
++ if (rdev->supply &&
++ (rdev->constraints->always_on ||
++ !regulator_is_enabled(rdev->supply))) {
+ ret = regulator_enable(rdev->supply);
+ if (ret < 0) {
+ _regulator_put(rdev->supply);
+@@ -1558,8 +1655,8 @@ static int set_machine_constraints(struct regulator_dev *rdev)
+
+ /**
+ * set_supply - set regulator supply regulator
+- * @rdev: regulator name
+- * @supply_rdev: supply regulator name
++ * @rdev: regulator (locked)
++ * @supply_rdev: supply regulator (locked))
+ *
+ * Called by platform initialisation code to set the supply regulator for this
+ * regulator. This ensures that a regulators supply will also be enabled by the
+@@ -1577,6 +1674,7 @@ static int set_supply(struct regulator_dev *rdev,
+
+ rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY");
+ if (rdev->supply == NULL) {
++ module_put(supply_rdev->owner);
+ err = -ENOMEM;
+ return err;
+ }
+@@ -1730,6 +1828,8 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
+ struct regulator *regulator;
+ int err = 0;
+
++ lockdep_assert_held_once(&rdev->mutex.base);
++
+ if (dev) {
+ char buf[REG_STR_SIZE];
+ int size;
+@@ -1750,16 +1850,14 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
+
+ regulator = kzalloc(sizeof(*regulator), GFP_KERNEL);
+ if (regulator == NULL) {
+- kfree(supply_name);
++ kfree_const(supply_name);
+ return NULL;
+ }
+
+ regulator->rdev = rdev;
+ regulator->supply_name = supply_name;
+
+- regulator_lock(rdev);
+ list_add(&regulator->list, &rdev->consumer_list);
+- regulator_unlock(rdev);
+
+ if (dev) {
+ regulator->dev = dev;
+@@ -1776,19 +1874,17 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
+
+ if (err != -EEXIST)
+ regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs);
+- if (!regulator->debugfs) {
++ if (IS_ERR(regulator->debugfs))
+ rdev_dbg(rdev, "Failed to create debugfs directory\n");
+- } else {
+- debugfs_create_u32("uA_load", 0444, regulator->debugfs,
+- &regulator->uA_load);
+- debugfs_create_u32("min_uV", 0444, regulator->debugfs,
+- &regulator->voltage[PM_SUSPEND_ON].min_uV);
+- debugfs_create_u32("max_uV", 0444, regulator->debugfs,
+- &regulator->voltage[PM_SUSPEND_ON].max_uV);
+- debugfs_create_file("constraint_flags", 0444,
+- regulator->debugfs, regulator,
+- &constraint_flags_fops);
+- }
++
++ debugfs_create_u32("uA_load", 0444, regulator->debugfs,
++ &regulator->uA_load);
++ debugfs_create_u32("min_uV", 0444, regulator->debugfs,
++ &regulator->voltage[PM_SUSPEND_ON].min_uV);
++ debugfs_create_u32("max_uV", 0444, regulator->debugfs,
++ &regulator->voltage[PM_SUSPEND_ON].max_uV);
++ debugfs_create_file("constraint_flags", 0444, regulator->debugfs,
++ regulator, &constraint_flags_fops);
+
+ /*
+ * Check now if the regulator is an always on regulator - if
+@@ -1880,6 +1976,7 @@ static struct regulator_dev *regulator_dev_lookup(struct device *dev,
+ node = of_get_regulator(dev, supply);
+ if (node) {
+ r = of_find_regulator_by_node(node);
++ of_node_put(node);
+ if (r)
+ return r;
+
+@@ -1924,6 +2021,7 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
+ {
+ struct regulator_dev *r;
+ struct device *dev = rdev->dev.parent;
++ struct ww_acquire_ctx ww_ctx;
+ int ret = 0;
+
+ /* No supply to resolve? */
+@@ -1990,23 +2088,23 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
+ * between rdev->supply null check and setting rdev->supply in
+ * set_supply() from concurrent tasks.
+ */
+- regulator_lock(rdev);
++ regulator_lock_two(rdev, r, &ww_ctx);
+
+ /* Supply just resolved by a concurrent task? */
+ if (rdev->supply) {
+- regulator_unlock(rdev);
++ regulator_unlock_two(rdev, r, &ww_ctx);
+ put_device(&r->dev);
+ goto out;
+ }
+
+ ret = set_supply(rdev, r);
+ if (ret < 0) {
+- regulator_unlock(rdev);
++ regulator_unlock_two(rdev, r, &ww_ctx);
+ put_device(&r->dev);
+ goto out;
+ }
+
+- regulator_unlock(rdev);
++ regulator_unlock_two(rdev, r, &ww_ctx);
+
+ /*
+ * In set_machine_constraints() we may have turned this regulator on
+@@ -2119,7 +2217,9 @@ struct regulator *_regulator_get(struct device *dev, const char *id,
+ return regulator;
+ }
+
++ regulator_lock(rdev);
+ regulator = create_regulator(rdev, dev, id);
++ regulator_unlock(rdev);
+ if (regulator == NULL) {
+ regulator = ERR_PTR(-ENOMEM);
+ module_put(rdev->owner);
+@@ -2132,10 +2232,13 @@ struct regulator *_regulator_get(struct device *dev, const char *id,
+ rdev->exclusive = 1;
+
+ ret = _regulator_is_enabled(rdev);
+- if (ret > 0)
++ if (ret > 0) {
+ rdev->use_count = 1;
+- else
++ regulator->enable_count = 1;
++ } else {
+ rdev->use_count = 0;
++ regulator->enable_count = 0;
++ }
+ }
+
+ link = device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS);
+@@ -2594,12 +2697,12 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
+
+ trace_regulator_enable(rdev_get_name(rdev));
+
+- if (rdev->desc->off_on_delay && rdev->last_off) {
++ if (rdev->desc->off_on_delay) {
+ /* if needed, keep a distance of off_on_delay from last time
+ * this regulator was disabled.
+ */
+ ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay);
+- s64 remaining = ktime_us_delta(end, ktime_get());
++ s64 remaining = ktime_us_delta(end, ktime_get_boottime());
+
+ if (remaining > 0)
+ _regulator_enable_delay(remaining);
+@@ -2633,7 +2736,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
+ * expired, return -ETIMEDOUT.
+ */
+ if (rdev->desc->poll_enabled_time) {
+- unsigned int time_remaining = delay;
++ int time_remaining = delay;
+
+ while (time_remaining > 0) {
+ _regulator_enable_delay(rdev->desc->poll_enabled_time);
+@@ -2685,13 +2788,18 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
+ */
+ static int _regulator_handle_consumer_enable(struct regulator *regulator)
+ {
++ int ret;
+ struct regulator_dev *rdev = regulator->rdev;
+
+ lockdep_assert_held_once(&rdev->mutex.base);
+
+ regulator->enable_count++;
+- if (regulator->uA_load && regulator->enable_count == 1)
+- return drms_uA_update(rdev);
++ if (regulator->uA_load && regulator->enable_count == 1) {
++ ret = drms_uA_update(rdev);
++ if (ret)
++ regulator->enable_count--;
++ return ret;
++ }
+
+ return 0;
+ }
+@@ -2833,7 +2941,7 @@ static int _regulator_do_disable(struct regulator_dev *rdev)
+ }
+
+ if (rdev->desc->off_on_delay)
+- rdev->last_off = ktime_get();
++ rdev->last_off = ktime_get_boottime();
+
+ trace_regulator_disable_complete(rdev_get_name(rdev));
+
+@@ -5063,6 +5171,7 @@ static void regulator_dev_release(struct device *dev)
+ {
+ struct regulator_dev *rdev = dev_get_drvdata(dev);
+
++ debugfs_remove_recursive(rdev->debugfs);
+ kfree(rdev->constraints);
+ of_node_put(rdev->dev.of_node);
+ kfree(rdev);
+@@ -5082,10 +5191,8 @@ static void rdev_init_debugfs(struct regulator_dev *rdev)
+ }
+
+ rdev->debugfs = debugfs_create_dir(rname, debugfs_root);
+- if (!rdev->debugfs) {
+- rdev_warn(rdev, "Failed to create debugfs directory\n");
+- return;
+- }
++ if (IS_ERR(rdev->debugfs))
++ rdev_dbg(rdev, "Failed to create debugfs directory\n");
+
+ debugfs_create_u32("use_count", 0444, rdev->debugfs,
+ &rdev->use_count);
+@@ -5537,15 +5644,20 @@ unset_supplies:
+ regulator_remove_coupling(rdev);
+ mutex_unlock(&regulator_list_mutex);
+ wash:
++ regulator_put(rdev->supply);
+ kfree(rdev->coupling_desc.coupled_rdevs);
+ mutex_lock(&regulator_list_mutex);
+ regulator_ena_gpio_free(rdev);
+ mutex_unlock(&regulator_list_mutex);
++ put_device(&rdev->dev);
++ rdev = NULL;
+ clean:
+ if (dangling_of_gpiod)
+ gpiod_put(config->ena_gpiod);
++ if (rdev && rdev->dev.of_node)
++ of_node_put(rdev->dev.of_node);
++ kfree(rdev);
+ kfree(config);
+- put_device(&rdev->dev);
+ rinse:
+ if (dangling_cfg_gpiod)
+ gpiod_put(cfg->ena_gpiod);
+@@ -5574,7 +5686,6 @@ void regulator_unregister(struct regulator_dev *rdev)
+
+ mutex_lock(&regulator_list_mutex);
+
+- debugfs_remove_recursive(rdev->debugfs);
+ WARN_ON(rdev->open_count);
+ regulator_remove_coupling(rdev);
+ unset_regulator_supplies(rdev);
+@@ -5927,6 +6038,7 @@ static void regulator_summary_lock(struct ww_acquire_ctx *ww_ctx)
+ ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx);
+ old_contended_rdev = new_contended_rdev;
+ old_contended_rdev->ref_cnt++;
++ old_contended_rdev->mutex_owner = current;
+ }
+
+ err = regulator_summary_lock_all(ww_ctx,
+@@ -5987,8 +6099,8 @@ static int __init regulator_init(void)
+ ret = class_register(&regulator_class);
+
+ debugfs_root = debugfs_create_dir("regulator", NULL);
+- if (!debugfs_root)
+- pr_warn("regulator: Failed to create debugfs directory\n");
++ if (IS_ERR(debugfs_root))
++ pr_debug("regulator: Failed to create debugfs directory\n");
+
+ #ifdef CONFIG_DEBUG_FS
+ debugfs_create_file("supply_map", 0444, debugfs_root, NULL,
+@@ -6010,9 +6122,8 @@ core_initcall(regulator_init);
+ static int regulator_late_cleanup(struct device *dev, void *data)
+ {
+ struct regulator_dev *rdev = dev_to_rdev(dev);
+- const struct regulator_ops *ops = rdev->desc->ops;
+ struct regulation_constraints *c = rdev->constraints;
+- int enabled, ret;
++ int ret;
+
+ if (c && c->always_on)
+ return 0;
+@@ -6025,14 +6136,8 @@ static int regulator_late_cleanup(struct device *dev, void *data)
+ if (rdev->use_count)
+ goto unlock;
+
+- /* If we can't read the status assume it's always on. */
+- if (ops->is_enabled)
+- enabled = ops->is_enabled(rdev);
+- else
+- enabled = 1;
+-
+- /* But if reading the status failed, assume that it's off. */
+- if (enabled <= 0)
++ /* If reading the status failed, assume that it's off. */
++ if (_regulator_is_enabled(rdev) <= 0)
+ goto unlock;
+
+ if (have_full_constraints()) {
+diff --git a/drivers/regulator/da9121-regulator.c b/drivers/regulator/da9121-regulator.c
+index e669250902580..3315994d7e311 100644
+--- a/drivers/regulator/da9121-regulator.c
++++ b/drivers/regulator/da9121-regulator.c
+@@ -253,6 +253,11 @@ static int da9121_set_current_limit(struct regulator_dev *rdev,
+ goto error;
+ }
+
++ if (rdev->desc->ops->is_enabled(rdev)) {
++ ret = -EBUSY;
++ goto error;
++ }
++
+ ret = da9121_ceiling_selector(rdev, min_ua, max_ua, &sel);
+ if (ret < 0)
+ goto error;
+@@ -931,6 +936,8 @@ static int da9121_assign_chip_model(struct i2c_client *i2c,
+ chip->variant_id = DA9121_TYPE_DA9220_DA9132;
+ regmap = &da9121_2ch_regmap_config;
+ break;
++ default:
++ return -EINVAL;
+ }
+
+ /* Set these up for of_regulator_match call which may want .of_map_modes */
+diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
+index e01b32d1fa17d..00828f5baa972 100644
+--- a/drivers/regulator/da9211-regulator.c
++++ b/drivers/regulator/da9211-regulator.c
+@@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c)
+
+ chip->chip_irq = i2c->irq;
+
++ ret = da9211_regulator_init(chip);
++ if (ret < 0) {
++ dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
++ return ret;
++ }
++
+ if (chip->chip_irq != 0) {
+ ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL,
+ da9211_irq_handler,
+@@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c)
+ dev_warn(chip->dev, "No IRQ configured\n");
+ }
+
+- ret = da9211_regulator_init(chip);
+-
+- if (ret < 0)
+- dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
+-
+ return ret;
+ }
+
+diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
+index dac1fb584fa35..ecd5a50c61660 100644
+--- a/drivers/regulator/fan53555.c
++++ b/drivers/regulator/fan53555.c
+@@ -8,18 +8,19 @@
+ // Copyright (c) 2012 Marvell Technology Ltd.
+ // Yunfan Zhang <yfzhang@marvell.com>
+
++#include <linux/bits.h>
++#include <linux/err.h>
++#include <linux/i2c.h>
+ #include <linux/module.h>
++#include <linux/of_device.h>
+ #include <linux/param.h>
+-#include <linux/err.h>
+ #include <linux/platform_device.h>
++#include <linux/regmap.h>
+ #include <linux/regulator/driver.h>
++#include <linux/regulator/fan53555.h>
+ #include <linux/regulator/machine.h>
+ #include <linux/regulator/of_regulator.h>
+-#include <linux/of_device.h>
+-#include <linux/i2c.h>
+ #include <linux/slab.h>
+-#include <linux/regmap.h>
+-#include <linux/regulator/fan53555.h>
+
+ /* Voltage setting */
+ #define FAN53555_VSEL0 0x00
+@@ -60,7 +61,7 @@
+ #define TCS_VSEL1_MODE (1 << 6)
+
+ #define TCS_SLEW_SHIFT 3
+-#define TCS_SLEW_MASK (0x3 < 3)
++#define TCS_SLEW_MASK GENMASK(4, 3)
+
+ enum fan53555_vendor {
+ FAN53526_VENDOR_FAIRCHILD = 0,
+diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
+index 599ad201dca75..fb163458337fc 100644
+--- a/drivers/regulator/fixed.c
++++ b/drivers/regulator/fixed.c
+@@ -215,7 +215,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
+ drvdata->enable_clock = devm_clk_get(dev, NULL);
+ if (IS_ERR(drvdata->enable_clock)) {
+ dev_err(dev, "Can't get enable-clock from devicetree\n");
+- return -ENOENT;
++ return PTR_ERR(drvdata->enable_clock);
+ }
+ } else if (drvtype && drvtype->has_performance_state) {
+ drvdata->desc.ops = &fixed_voltage_domain_ops;
+diff --git a/drivers/regulator/max77802-regulator.c b/drivers/regulator/max77802-regulator.c
+index 21e0eb0f43f94..befe5f319819b 100644
+--- a/drivers/regulator/max77802-regulator.c
++++ b/drivers/regulator/max77802-regulator.c
+@@ -94,9 +94,11 @@ static int max77802_set_suspend_disable(struct regulator_dev *rdev)
+ {
+ unsigned int val = MAX77802_OFF_PWRREQ;
+ struct max77802_regulator_prv *max77802 = rdev_get_drvdata(rdev);
+- int id = rdev_get_id(rdev);
++ unsigned int id = rdev_get_id(rdev);
+ int shift = max77802_get_opmode_shift(id);
+
++ if (WARN_ON_ONCE(id >= ARRAY_SIZE(max77802->opmode)))
++ return -EINVAL;
+ max77802->opmode[id] = val;
+ return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
+ rdev->desc->enable_mask, val << shift);
+@@ -110,7 +112,7 @@ static int max77802_set_suspend_disable(struct regulator_dev *rdev)
+ static int max77802_set_mode(struct regulator_dev *rdev, unsigned int mode)
+ {
+ struct max77802_regulator_prv *max77802 = rdev_get_drvdata(rdev);
+- int id = rdev_get_id(rdev);
++ unsigned int id = rdev_get_id(rdev);
+ unsigned int val;
+ int shift = max77802_get_opmode_shift(id);
+
+@@ -127,6 +129,9 @@ static int max77802_set_mode(struct regulator_dev *rdev, unsigned int mode)
+ return -EINVAL;
+ }
+
++ if (WARN_ON_ONCE(id >= ARRAY_SIZE(max77802->opmode)))
++ return -EINVAL;
++
+ max77802->opmode[id] = val;
+ return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
+ rdev->desc->enable_mask, val << shift);
+@@ -135,8 +140,10 @@ static int max77802_set_mode(struct regulator_dev *rdev, unsigned int mode)
+ static unsigned max77802_get_mode(struct regulator_dev *rdev)
+ {
+ struct max77802_regulator_prv *max77802 = rdev_get_drvdata(rdev);
+- int id = rdev_get_id(rdev);
++ unsigned int id = rdev_get_id(rdev);
+
++ if (WARN_ON_ONCE(id >= ARRAY_SIZE(max77802->opmode)))
++ return -EINVAL;
+ return max77802_map_mode(max77802->opmode[id]);
+ }
+
+@@ -160,10 +167,13 @@ static int max77802_set_suspend_mode(struct regulator_dev *rdev,
+ unsigned int mode)
+ {
+ struct max77802_regulator_prv *max77802 = rdev_get_drvdata(rdev);
+- int id = rdev_get_id(rdev);
++ unsigned int id = rdev_get_id(rdev);
+ unsigned int val;
+ int shift = max77802_get_opmode_shift(id);
+
++ if (WARN_ON_ONCE(id >= ARRAY_SIZE(max77802->opmode)))
++ return -EINVAL;
++
+ /*
+ * If the regulator has been disabled for suspend
+ * then is invalid to try setting a suspend mode.
+@@ -209,9 +219,11 @@ static int max77802_set_suspend_mode(struct regulator_dev *rdev,
+ static int max77802_enable(struct regulator_dev *rdev)
+ {
+ struct max77802_regulator_prv *max77802 = rdev_get_drvdata(rdev);
+- int id = rdev_get_id(rdev);
++ unsigned int id = rdev_get_id(rdev);
+ int shift = max77802_get_opmode_shift(id);
+
++ if (WARN_ON_ONCE(id >= ARRAY_SIZE(max77802->opmode)))
++ return -EINVAL;
+ if (max77802->opmode[id] == MAX77802_OFF_PWRREQ)
+ max77802->opmode[id] = MAX77802_OPMODE_NORMAL;
+
+@@ -495,7 +507,7 @@ static int max77802_pmic_probe(struct platform_device *pdev)
+
+ for (i = 0; i < MAX77802_REG_MAX; i++) {
+ struct regulator_dev *rdev;
+- int id = regulators[i].id;
++ unsigned int id = regulators[i].id;
+ int shift = max77802_get_opmode_shift(id);
+ int ret;
+
+@@ -513,10 +525,12 @@ static int max77802_pmic_probe(struct platform_device *pdev)
+ * the hardware reports OFF as the regulator operating mode.
+ * Default to operating mode NORMAL in that case.
+ */
+- if (val == MAX77802_STATUS_OFF)
+- max77802->opmode[id] = MAX77802_OPMODE_NORMAL;
+- else
+- max77802->opmode[id] = val;
++ if (id < ARRAY_SIZE(max77802->opmode)) {
++ if (val == MAX77802_STATUS_OFF)
++ max77802->opmode[id] = MAX77802_OPMODE_NORMAL;
++ else
++ max77802->opmode[id] = val;
++ }
+
+ rdev = devm_regulator_register(&pdev->dev,
+ &regulators[i], &config);
+diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
+index de3b0462832cd..f94f87c5407ae 100644
+--- a/drivers/regulator/mt6359-regulator.c
++++ b/drivers/regulator/mt6359-regulator.c
+@@ -951,9 +951,12 @@ static int mt6359_regulator_probe(struct platform_device *pdev)
+ struct regulator_config config = {};
+ struct regulator_dev *rdev;
+ struct mt6359_regulator_info *mt6359_info;
+- int i, hw_ver;
++ int i, hw_ver, ret;
++
++ ret = regmap_read(mt6397->regmap, MT6359P_HWCID, &hw_ver);
++ if (ret)
++ return ret;
+
+- regmap_read(mt6397->regmap, MT6359P_HWCID, &hw_ver);
+ if (hw_ver >= MT6359P_CHIP_VER)
+ mt6359_info = mt6359p_regulators;
+ else
+diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
+index f54d4f176882a..e12b681c72e5e 100644
+--- a/drivers/regulator/of_regulator.c
++++ b/drivers/regulator/of_regulator.c
+@@ -264,8 +264,12 @@ static int of_get_regulation_constraints(struct device *dev,
+ }
+
+ suspend_np = of_get_child_by_name(np, regulator_states[i]);
+- if (!suspend_np || !suspend_state)
++ if (!suspend_np)
+ continue;
++ if (!suspend_state) {
++ of_node_put(suspend_np);
++ continue;
++ }
+
+ if (!of_property_read_u32(suspend_np, "regulator-mode",
+ &pval)) {
+diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c
+index 64e5f5f0cc841..556074d7fe242 100644
+--- a/drivers/regulator/pca9450-regulator.c
++++ b/drivers/regulator/pca9450-regulator.c
+@@ -256,7 +256,7 @@ static const struct pca9450_regulator_desc pca9450a_regulators[] = {
+ .vsel_reg = PCA9450_REG_BUCK2OUT_DVS0,
+ .vsel_mask = BUCK2OUT_DVS0_MASK,
+ .enable_reg = PCA9450_REG_BUCK2CTRL,
+- .enable_mask = BUCK1_ENMODE_MASK,
++ .enable_mask = BUCK2_ENMODE_MASK,
+ .ramp_reg = PCA9450_REG_BUCK2CTRL,
+ .ramp_mask = BUCK2_RAMP_MASK,
+ .ramp_delay_table = pca9450_dvs_buck_ramp_table,
+@@ -494,7 +494,7 @@ static const struct pca9450_regulator_desc pca9450bc_regulators[] = {
+ .vsel_reg = PCA9450_REG_BUCK2OUT_DVS0,
+ .vsel_mask = BUCK2OUT_DVS0_MASK,
+ .enable_reg = PCA9450_REG_BUCK2CTRL,
+- .enable_mask = BUCK1_ENMODE_MASK,
++ .enable_mask = BUCK2_ENMODE_MASK,
+ .ramp_reg = PCA9450_REG_BUCK2CTRL,
+ .ramp_mask = BUCK2_RAMP_MASK,
+ .ramp_delay_table = pca9450_dvs_buck_ramp_table,
+diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
+index d60d7d1b7fa25..a9a0bd918d1e2 100644
+--- a/drivers/regulator/pfuze100-regulator.c
++++ b/drivers/regulator/pfuze100-regulator.c
+@@ -521,6 +521,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
+ parent = of_get_child_by_name(np, "regulators");
+ if (!parent) {
+ dev_err(dev, "regulators node not found\n");
++ of_node_put(np);
+ return -EINVAL;
+ }
+
+@@ -550,6 +551,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
+ }
+
+ of_node_put(parent);
++ of_node_put(np);
+ if (ret < 0) {
+ dev_err(dev, "Error parsing regulator init data: %d\n",
+ ret);
+@@ -761,7 +763,7 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
+ ((pfuze_chip->chip_id == PFUZE3000) ? "3000" : "3001"))));
+
+ memcpy(pfuze_chip->regulator_descs, pfuze_chip->pfuze_regulators,
+- sizeof(pfuze_chip->regulator_descs));
++ regulator_num * sizeof(struct pfuze_regulator));
+
+ ret = pfuze_parse_regulators_dt(pfuze_chip);
+ if (ret)
+diff --git a/drivers/regulator/qcom-labibb-regulator.c b/drivers/regulator/qcom-labibb-regulator.c
+index b3da0dc58782f..bcf7140f3bc98 100644
+--- a/drivers/regulator/qcom-labibb-regulator.c
++++ b/drivers/regulator/qcom-labibb-regulator.c
+@@ -260,7 +260,7 @@ static irqreturn_t qcom_labibb_ocp_isr(int irq, void *chip)
+
+ /* If the regulator is not enabled, this is a fake event */
+ if (!ops->is_enabled(vreg->rdev))
+- return 0;
++ return IRQ_HANDLED;
+
+ /* If we tried to recover for too many times it's not getting better */
+ if (vreg->ocp_irq_count > LABIBB_MAX_OCP_COUNT)
+@@ -822,6 +822,7 @@ static int qcom_labibb_regulator_probe(struct platform_device *pdev)
+ if (irq == 0)
+ irq = -EINVAL;
+
++ of_node_put(reg_node);
+ return dev_err_probe(vreg->dev, irq,
+ "Short-circuit irq not found.\n");
+ }
+diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
+index 7f458d510483f..27efdbbd90d9a 100644
+--- a/drivers/regulator/qcom-rpmh-regulator.c
++++ b/drivers/regulator/qcom-rpmh-regulator.c
+@@ -1108,7 +1108,7 @@ static const struct rpmh_vreg_init_data pm7325_vreg_data[] = {
+ static const struct rpmh_vreg_init_data pmr735a_vreg_data[] = {
+ RPMH_VREG("smps1", "smp%s1", &pmic5_ftsmps520, "vdd-s1"),
+ RPMH_VREG("smps2", "smp%s2", &pmic5_ftsmps520, "vdd-s2"),
+- RPMH_VREG("smps3", "smp%s3", &pmic5_hfsmps510, "vdd-s3"),
++ RPMH_VREG("smps3", "smp%s3", &pmic5_hfsmps515, "vdd-s3"),
+ RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1-l2"),
+ RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l1-l2"),
+ RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"),
+diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c
+index 7f9d66ac37ff8..3c41b71a1f529 100644
+--- a/drivers/regulator/qcom_rpm-regulator.c
++++ b/drivers/regulator/qcom_rpm-regulator.c
+@@ -802,6 +802,12 @@ static const struct rpm_regulator_data rpm_pm8018_regulators[] = {
+ };
+
+ static const struct rpm_regulator_data rpm_pm8058_regulators[] = {
++ { "s0", QCOM_RPM_PM8058_SMPS0, &pm8058_smps, "vdd_s0" },
++ { "s1", QCOM_RPM_PM8058_SMPS1, &pm8058_smps, "vdd_s1" },
++ { "s2", QCOM_RPM_PM8058_SMPS2, &pm8058_smps, "vdd_s2" },
++ { "s3", QCOM_RPM_PM8058_SMPS3, &pm8058_smps, "vdd_s3" },
++ { "s4", QCOM_RPM_PM8058_SMPS4, &pm8058_smps, "vdd_s4" },
++
+ { "l0", QCOM_RPM_PM8058_LDO0, &pm8058_nldo, "vdd_l0_l1_lvs" },
+ { "l1", QCOM_RPM_PM8058_LDO1, &pm8058_nldo, "vdd_l0_l1_lvs" },
+ { "l2", QCOM_RPM_PM8058_LDO2, &pm8058_pldo, "vdd_l2_l11_l12" },
+@@ -829,12 +835,6 @@ static const struct rpm_regulator_data rpm_pm8058_regulators[] = {
+ { "l24", QCOM_RPM_PM8058_LDO24, &pm8058_nldo, "vdd_l23_l24_l25" },
+ { "l25", QCOM_RPM_PM8058_LDO25, &pm8058_nldo, "vdd_l23_l24_l25" },
+
+- { "s0", QCOM_RPM_PM8058_SMPS0, &pm8058_smps, "vdd_s0" },
+- { "s1", QCOM_RPM_PM8058_SMPS1, &pm8058_smps, "vdd_s1" },
+- { "s2", QCOM_RPM_PM8058_SMPS2, &pm8058_smps, "vdd_s2" },
+- { "s3", QCOM_RPM_PM8058_SMPS3, &pm8058_smps, "vdd_s3" },
+- { "s4", QCOM_RPM_PM8058_SMPS4, &pm8058_smps, "vdd_s4" },
+-
+ { "lvs0", QCOM_RPM_PM8058_LVS0, &pm8058_switch, "vdd_l0_l1_lvs" },
+ { "lvs1", QCOM_RPM_PM8058_LVS1, &pm8058_switch, "vdd_l0_l1_lvs" },
+
+@@ -843,6 +843,12 @@ static const struct rpm_regulator_data rpm_pm8058_regulators[] = {
+ };
+
+ static const struct rpm_regulator_data rpm_pm8901_regulators[] = {
++ { "s0", QCOM_RPM_PM8901_SMPS0, &pm8901_ftsmps, "vdd_s0" },
++ { "s1", QCOM_RPM_PM8901_SMPS1, &pm8901_ftsmps, "vdd_s1" },
++ { "s2", QCOM_RPM_PM8901_SMPS2, &pm8901_ftsmps, "vdd_s2" },
++ { "s3", QCOM_RPM_PM8901_SMPS3, &pm8901_ftsmps, "vdd_s3" },
++ { "s4", QCOM_RPM_PM8901_SMPS4, &pm8901_ftsmps, "vdd_s4" },
++
+ { "l0", QCOM_RPM_PM8901_LDO0, &pm8901_nldo, "vdd_l0" },
+ { "l1", QCOM_RPM_PM8901_LDO1, &pm8901_pldo, "vdd_l1" },
+ { "l2", QCOM_RPM_PM8901_LDO2, &pm8901_pldo, "vdd_l2" },
+@@ -851,12 +857,6 @@ static const struct rpm_regulator_data rpm_pm8901_regulators[] = {
+ { "l5", QCOM_RPM_PM8901_LDO5, &pm8901_pldo, "vdd_l5" },
+ { "l6", QCOM_RPM_PM8901_LDO6, &pm8901_pldo, "vdd_l6" },
+
+- { "s0", QCOM_RPM_PM8901_SMPS0, &pm8901_ftsmps, "vdd_s0" },
+- { "s1", QCOM_RPM_PM8901_SMPS1, &pm8901_ftsmps, "vdd_s1" },
+- { "s2", QCOM_RPM_PM8901_SMPS2, &pm8901_ftsmps, "vdd_s2" },
+- { "s3", QCOM_RPM_PM8901_SMPS3, &pm8901_ftsmps, "vdd_s3" },
+- { "s4", QCOM_RPM_PM8901_SMPS4, &pm8901_ftsmps, "vdd_s4" },
+-
+ { "lvs0", QCOM_RPM_PM8901_LVS0, &pm8901_switch, "lvs0_in" },
+ { "lvs1", QCOM_RPM_PM8901_LVS1, &pm8901_switch, "lvs1_in" },
+ { "lvs2", QCOM_RPM_PM8901_LVS2, &pm8901_switch, "lvs2_in" },
+diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
+index 198fcc6551f6d..2fe13c765effb 100644
+--- a/drivers/regulator/qcom_smd-regulator.c
++++ b/drivers/regulator/qcom_smd-regulator.c
+@@ -9,6 +9,7 @@
+ #include <linux/of_device.h>
+ #include <linux/platform_device.h>
+ #include <linux/regulator/driver.h>
++#include <linux/regulator/of_regulator.h>
+ #include <linux/soc/qcom/smd-rpm.h>
+
+ struct qcom_rpm_reg {
+@@ -356,10 +357,10 @@ static const struct regulator_desc pm8941_switch = {
+
+ static const struct regulator_desc pm8916_pldo = {
+ .linear_ranges = (struct linear_range[]) {
+- REGULATOR_LINEAR_RANGE(750000, 0, 208, 12500),
++ REGULATOR_LINEAR_RANGE(1750000, 0, 127, 12500),
+ },
+ .n_linear_ranges = 1,
+- .n_voltages = 209,
++ .n_voltages = 128,
+ .ops = &rpm_smps_ldo_ops,
+ };
+
+@@ -925,32 +926,31 @@ static const struct rpm_regulator_data rpm_pm8950_regulators[] = {
+ { "s2", QCOM_SMD_RPM_SMPA, 2, &pm8950_hfsmps, "vdd_s2" },
+ { "s3", QCOM_SMD_RPM_SMPA, 3, &pm8950_hfsmps, "vdd_s3" },
+ { "s4", QCOM_SMD_RPM_SMPA, 4, &pm8950_hfsmps, "vdd_s4" },
+- { "s5", QCOM_SMD_RPM_SMPA, 5, &pm8950_ftsmps2p5, "vdd_s5" },
++ /* S5 is managed via SPMI. */
+ { "s6", QCOM_SMD_RPM_SMPA, 6, &pm8950_hfsmps, "vdd_s6" },
+
+ { "l1", QCOM_SMD_RPM_LDOA, 1, &pm8950_ult_nldo, "vdd_l1_l19" },
+ { "l2", QCOM_SMD_RPM_LDOA, 2, &pm8950_ult_nldo, "vdd_l2_l23" },
+ { "l3", QCOM_SMD_RPM_LDOA, 3, &pm8950_ult_nldo, "vdd_l3" },
+- { "l4", QCOM_SMD_RPM_LDOA, 4, &pm8950_ult_pldo, "vdd_l4_l5_l6_l7_l16" },
+- { "l5", QCOM_SMD_RPM_LDOA, 5, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" },
+- { "l6", QCOM_SMD_RPM_LDOA, 6, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" },
+- { "l7", QCOM_SMD_RPM_LDOA, 7, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" },
++ /* L4 seems not to exist. */
++ { "l5", QCOM_SMD_RPM_LDOA, 5, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" },
++ { "l6", QCOM_SMD_RPM_LDOA, 6, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" },
++ { "l7", QCOM_SMD_RPM_LDOA, 7, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" },
+ { "l8", QCOM_SMD_RPM_LDOA, 8, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
+ { "l9", QCOM_SMD_RPM_LDOA, 9, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
+ { "l10", QCOM_SMD_RPM_LDOA, 10, &pm8950_ult_nldo, "vdd_l9_l10_l13_l14_l15_l18"},
+- { "l11", QCOM_SMD_RPM_LDOA, 11, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"},
+- { "l12", QCOM_SMD_RPM_LDOA, 12, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"},
+- { "l13", QCOM_SMD_RPM_LDOA, 13, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
+- { "l14", QCOM_SMD_RPM_LDOA, 14, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
+- { "l15", QCOM_SMD_RPM_LDOA, 15, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
+- { "l16", QCOM_SMD_RPM_LDOA, 16, &pm8950_ult_pldo, "vdd_l4_l5_l6_l7_l16"},
+- { "l17", QCOM_SMD_RPM_LDOA, 17, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"},
+- { "l18", QCOM_SMD_RPM_LDOA, 18, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"},
+- { "l19", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l1_l19"},
+- { "l20", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l20"},
+- { "l21", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l21"},
+- { "l22", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l8_l11_l12_l17_l22"},
+- { "l23", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l2_l23"},
++ { "l11", QCOM_SMD_RPM_LDOA, 11, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
++ { "l12", QCOM_SMD_RPM_LDOA, 12, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
++ { "l13", QCOM_SMD_RPM_LDOA, 13, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
++ { "l14", QCOM_SMD_RPM_LDOA, 14, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
++ { "l15", QCOM_SMD_RPM_LDOA, 15, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" },
++ { "l16", QCOM_SMD_RPM_LDOA, 16, &pm8950_ult_pldo, "vdd_l5_l6_l7_l16" },
++ { "l17", QCOM_SMD_RPM_LDOA, 17, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" },
++ /* L18 seems not to exist. */
++ { "l19", QCOM_SMD_RPM_LDOA, 19, &pm8950_pldo, "vdd_l1_l19" },
++ /* L20 & L21 seem not to exist. */
++ { "l22", QCOM_SMD_RPM_LDOA, 22, &pm8950_pldo, "vdd_l8_l11_l12_l17_l22" },
++ { "l23", QCOM_SMD_RPM_LDOA, 23, &pm8950_pldo, "vdd_l2_l23" },
+ {}
+ };
+
+@@ -1190,52 +1190,93 @@ static const struct of_device_id rpm_of_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, rpm_of_match);
+
+-static int rpm_reg_probe(struct platform_device *pdev)
++/**
++ * rpm_regulator_init_vreg() - initialize all attributes of a qcom_smd-regulator
++ * @vreg: Pointer to the individual qcom_smd-regulator resource
++ * @dev: Pointer to the top level qcom_smd-regulator PMIC device
++ * @node: Pointer to the individual qcom_smd-regulator resource
++ * device node
++ * @rpm: Pointer to the rpm bus node
++ * @pmic_rpm_data: Pointer to a null-terminated array of qcom_smd-regulator
++ * resources defined for the top level PMIC device
++ *
++ * Return: 0 on success, errno on failure
++ */
++static int rpm_regulator_init_vreg(struct qcom_rpm_reg *vreg, struct device *dev,
++ struct device_node *node, struct qcom_smd_rpm *rpm,
++ const struct rpm_regulator_data *pmic_rpm_data)
+ {
+- const struct rpm_regulator_data *reg;
+- const struct of_device_id *match;
+- struct regulator_config config = { };
++ struct regulator_config config = {};
++ const struct rpm_regulator_data *rpm_data;
+ struct regulator_dev *rdev;
++ int ret;
++
++ for (rpm_data = pmic_rpm_data; rpm_data->name; rpm_data++)
++ if (of_node_name_eq(node, rpm_data->name))
++ break;
++
++ if (!rpm_data->name) {
++ dev_err(dev, "Unknown regulator %pOFn\n", node);
++ return -EINVAL;
++ }
++
++ vreg->dev = dev;
++ vreg->rpm = rpm;
++ vreg->type = rpm_data->type;
++ vreg->id = rpm_data->id;
++
++ memcpy(&vreg->desc, rpm_data->desc, sizeof(vreg->desc));
++ vreg->desc.name = rpm_data->name;
++ vreg->desc.supply_name = rpm_data->supply;
++ vreg->desc.owner = THIS_MODULE;
++ vreg->desc.type = REGULATOR_VOLTAGE;
++ vreg->desc.of_match = rpm_data->name;
++
++ config.dev = dev;
++ config.of_node = node;
++ config.driver_data = vreg;
++
++ rdev = devm_regulator_register(dev, &vreg->desc, &config);
++ if (IS_ERR(rdev)) {
++ ret = PTR_ERR(rdev);
++ dev_err(dev, "%pOFn: devm_regulator_register() failed, ret=%d\n", node, ret);
++ return ret;
++ }
++
++ return 0;
++}
++
++static int rpm_reg_probe(struct platform_device *pdev)
++{
++ struct device *dev = &pdev->dev;
++ const struct rpm_regulator_data *vreg_data;
++ struct device_node *node;
+ struct qcom_rpm_reg *vreg;
+ struct qcom_smd_rpm *rpm;
++ int ret;
+
+ rpm = dev_get_drvdata(pdev->dev.parent);
+ if (!rpm) {
+- dev_err(&pdev->dev, "unable to retrieve handle to rpm\n");
++ dev_err(&pdev->dev, "Unable to retrieve handle to rpm\n");
+ return -ENODEV;
+ }
+
+- match = of_match_device(rpm_of_match, &pdev->dev);
+- if (!match) {
+- dev_err(&pdev->dev, "failed to match device\n");
++ vreg_data = of_device_get_match_data(dev);
++ if (!vreg_data)
+ return -ENODEV;
+- }
+
+- for (reg = match->data; reg->name; reg++) {
++ for_each_available_child_of_node(dev->of_node, node) {
+ vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL);
+- if (!vreg)
++ if (!vreg) {
++ of_node_put(node);
+ return -ENOMEM;
++ }
++
++ ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data);
+
+- vreg->dev = &pdev->dev;
+- vreg->type = reg->type;
+- vreg->id = reg->id;
+- vreg->rpm = rpm;
+-
+- memcpy(&vreg->desc, reg->desc, sizeof(vreg->desc));
+-
+- vreg->desc.id = -1;
+- vreg->desc.owner = THIS_MODULE;
+- vreg->desc.type = REGULATOR_VOLTAGE;
+- vreg->desc.name = reg->name;
+- vreg->desc.supply_name = reg->supply;
+- vreg->desc.of_match = reg->name;
+-
+- config.dev = &pdev->dev;
+- config.driver_data = vreg;
+- rdev = devm_regulator_register(&pdev->dev, &vreg->desc, &config);
+- if (IS_ERR(rdev)) {
+- dev_err(&pdev->dev, "failed to register %s\n", reg->name);
+- return PTR_ERR(rdev);
++ if (ret < 0) {
++ of_node_put(node);
++ return ret;
+ }
+ }
+
+diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
+index ee46bfbf5eee7..991b4730d7687 100644
+--- a/drivers/regulator/rpi-panel-attiny-regulator.c
++++ b/drivers/regulator/rpi-panel-attiny-regulator.c
+@@ -37,11 +37,24 @@ static const struct regmap_config attiny_regmap_config = {
+ static int attiny_lcd_power_enable(struct regulator_dev *rdev)
+ {
+ unsigned int data;
++ int ret, i;
+
+ regmap_write(rdev->regmap, REG_POWERON, 1);
++ msleep(80);
++
+ /* Wait for nPWRDWN to go low to indicate poweron is done. */
+- regmap_read_poll_timeout(rdev->regmap, REG_PORTB, data,
+- data & BIT(0), 10, 1000000);
++ for (i = 0; i < 20; i++) {
++ ret = regmap_read(rdev->regmap, REG_PORTB, &data);
++ if (!ret) {
++ if (data & BIT(0))
++ break;
++ }
++ usleep_range(10000, 12000);
++ }
++ usleep_range(10000, 12000);
++
++ if (ret)
++ pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret);
+
+ /* Default to the same orientation as the closed source
+ * firmware used for the panel. Runtime rotation
+@@ -57,23 +70,34 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev)
+ {
+ regmap_write(rdev->regmap, REG_PWM, 0);
+ regmap_write(rdev->regmap, REG_POWERON, 0);
+- udelay(1);
++ msleep(30);
+ return 0;
+ }
+
+ static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev)
+ {
+ unsigned int data;
+- int ret;
++ int ret, i;
+
+- ret = regmap_read(rdev->regmap, REG_POWERON, &data);
++ for (i = 0; i < 10; i++) {
++ ret = regmap_read(rdev->regmap, REG_POWERON, &data);
++ if (!ret)
++ break;
++ usleep_range(10000, 12000);
++ }
+ if (ret < 0)
+ return ret;
+
+ if (!(data & BIT(0)))
+ return 0;
+
+- ret = regmap_read(rdev->regmap, REG_PORTB, &data);
++ for (i = 0; i < 10; i++) {
++ ret = regmap_read(rdev->regmap, REG_PORTB, &data);
++ if (!ret)
++ break;
++ usleep_range(10000, 12000);
++ }
++
+ if (ret < 0)
+ return ret;
+
+@@ -103,20 +127,32 @@ static int attiny_update_status(struct backlight_device *bl)
+ {
+ struct regmap *regmap = bl_get_data(bl);
+ int brightness = bl->props.brightness;
++ int ret, i;
+
+ if (bl->props.power != FB_BLANK_UNBLANK ||
+ bl->props.fb_blank != FB_BLANK_UNBLANK)
+ brightness = 0;
+
+- return regmap_write(regmap, REG_PWM, brightness);
++ for (i = 0; i < 10; i++) {
++ ret = regmap_write(regmap, REG_PWM, brightness);
++ if (!ret)
++ break;
++ }
++
++ return ret;
+ }
+
+ static int attiny_get_brightness(struct backlight_device *bl)
+ {
+ struct regmap *regmap = bl_get_data(bl);
+- int ret, brightness;
++ int ret, brightness, i;
++
++ for (i = 0; i < 10; i++) {
++ ret = regmap_read(regmap, REG_PWM, &brightness);
++ if (!ret)
++ break;
++ }
+
+- ret = regmap_read(regmap, REG_PWM, &brightness);
+ if (ret)
+ return ret;
+
+@@ -166,7 +202,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
+ }
+
+ regmap_write(regmap, REG_POWERON, 0);
+- mdelay(1);
++ msleep(30);
+
+ config.dev = &i2c->dev;
+ config.regmap = regmap;
+diff --git a/drivers/regulator/rtq2134-regulator.c b/drivers/regulator/rtq2134-regulator.c
+index f21e3f8b21f23..8e13dea354a21 100644
+--- a/drivers/regulator/rtq2134-regulator.c
++++ b/drivers/regulator/rtq2134-regulator.c
+@@ -285,6 +285,7 @@ static const unsigned int rtq2134_buck_ramp_delay_table[] = {
+ .enable_mask = RTQ2134_VOUTEN_MASK, \
+ .active_discharge_reg = RTQ2134_REG_BUCK##_id##_CFG0, \
+ .active_discharge_mask = RTQ2134_ACTDISCHG_MASK, \
++ .active_discharge_on = RTQ2134_ACTDISCHG_MASK, \
+ .ramp_reg = RTQ2134_REG_BUCK##_id##_RSPCFG, \
+ .ramp_mask = RTQ2134_RSPUP_MASK, \
+ .ramp_delay_table = rtq2134_buck_ramp_delay_table, \
+diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
+index 7c111bbdc2afa..754c6fcc6e642 100644
+--- a/drivers/regulator/s5m8767.c
++++ b/drivers/regulator/s5m8767.c
+@@ -850,18 +850,15 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
+ /* DS4 GPIO */
+ gpio_direction_output(pdata->buck_ds[2], 0x0);
+
+- if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs ||
+- pdata->buck4_gpiodvs) {
+- regmap_update_bits(s5m8767->iodev->regmap_pmic,
+- S5M8767_REG_BUCK2CTRL, 1 << 1,
+- (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1));
+- regmap_update_bits(s5m8767->iodev->regmap_pmic,
+- S5M8767_REG_BUCK3CTRL, 1 << 1,
+- (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1));
+- regmap_update_bits(s5m8767->iodev->regmap_pmic,
+- S5M8767_REG_BUCK4CTRL, 1 << 1,
+- (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1));
+- }
++ regmap_update_bits(s5m8767->iodev->regmap_pmic,
++ S5M8767_REG_BUCK2CTRL, 1 << 1,
++ (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1));
++ regmap_update_bits(s5m8767->iodev->regmap_pmic,
++ S5M8767_REG_BUCK3CTRL, 1 << 1,
++ (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1));
++ regmap_update_bits(s5m8767->iodev->regmap_pmic,
++ S5M8767_REG_BUCK4CTRL, 1 << 1,
++ (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1));
+
+ /* Initialize GPIO DVS registers */
+ for (i = 0; i < 8; i++) {
+@@ -926,10 +923,14 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
+
+ for (i = 0; i < pdata->num_regulators; i++) {
+ const struct sec_voltage_desc *desc;
+- int id = pdata->regulators[i].id;
++ unsigned int id = pdata->regulators[i].id;
+ int enable_reg, enable_val;
+ struct regulator_dev *rdev;
+
++ BUILD_BUG_ON(ARRAY_SIZE(regulators) != ARRAY_SIZE(reg_voltage_map));
++ if (WARN_ON_ONCE(id >= ARRAY_SIZE(regulators)))
++ continue;
++
+ desc = reg_voltage_map[id];
+ if (desc) {
+ regulators[id].n_voltages =
+diff --git a/drivers/regulator/scmi-regulator.c b/drivers/regulator/scmi-regulator.c
+index 1f02f60ad1366..41ae7ac27ff6a 100644
+--- a/drivers/regulator/scmi-regulator.c
++++ b/drivers/regulator/scmi-regulator.c
+@@ -352,7 +352,7 @@ static int scmi_regulator_probe(struct scmi_device *sdev)
+ return ret;
+ }
+ }
+-
++ of_node_put(np);
+ /*
+ * Register a regulator for each valid regulator-DT-entry that we
+ * can successfully reach via SCMI and has a valid associated voltage
+diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c
+index 75a941fb3c2bd..1b2eee95ad3f9 100644
+--- a/drivers/regulator/slg51000-regulator.c
++++ b/drivers/regulator/slg51000-regulator.c
+@@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client)
+ chip->cs_gpiod = cs_gpiod;
+ }
+
++ usleep_range(10000, 11000);
++
+ i2c_set_clientdata(client, chip);
+ chip->chip_irq = client->irq;
+ chip->dev = dev;
+diff --git a/drivers/regulator/stm32-pwr.c b/drivers/regulator/stm32-pwr.c
+index 2a42acb7c24e9..e5dd4db6403b2 100644
+--- a/drivers/regulator/stm32-pwr.c
++++ b/drivers/regulator/stm32-pwr.c
+@@ -129,17 +129,16 @@ static const struct regulator_desc stm32_pwr_desc[] = {
+
+ static int stm32_pwr_regulator_probe(struct platform_device *pdev)
+ {
+- struct device_node *np = pdev->dev.of_node;
+ struct stm32_pwr_reg *priv;
+ void __iomem *base;
+ struct regulator_dev *rdev;
+ struct regulator_config config = { };
+ int i, ret = 0;
+
+- base = of_iomap(np, 0);
+- if (!base) {
++ base = devm_platform_ioremap_resource(pdev, 0);
++ if (IS_ERR(base)) {
+ dev_err(&pdev->dev, "Unable to map IO memory\n");
+- return -ENOMEM;
++ return PTR_ERR(base);
+ }
+
+ config.dev = &pdev->dev;
+diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c
+index 430265c404d65..f3856750944f4 100644
+--- a/drivers/regulator/twl6030-regulator.c
++++ b/drivers/regulator/twl6030-regulator.c
+@@ -67,6 +67,7 @@ struct twlreg_info {
+ #define TWL6030_CFG_STATE_SLEEP 0x03
+ #define TWL6030_CFG_STATE_GRP_SHIFT 5
+ #define TWL6030_CFG_STATE_APP_SHIFT 2
++#define TWL6030_CFG_STATE_MASK 0x03
+ #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT)
+ #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\
+ TWL6030_CFG_STATE_APP_SHIFT)
+@@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev)
+ if (grp < 0)
+ return grp;
+ grp &= P1_GRP_6030;
++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
++ val = TWL6030_CFG_STATE_APP(val);
+ } else {
++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
++ val &= TWL6030_CFG_STATE_MASK;
+ grp = 1;
+ }
+
+- val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
+- val = TWL6030_CFG_STATE_APP(val);
+-
+ return grp && (val == TWL6030_CFG_STATE_ON);
+ }
+
+@@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev)
+
+ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE);
+
+- switch (TWL6030_CFG_STATE_APP(val)) {
++ if (info->features & TWL6032_SUBCLASS)
++ val &= TWL6030_CFG_STATE_MASK;
++ else
++ val = TWL6030_CFG_STATE_APP(val);
++
++ switch (val) {
+ case TWL6030_CFG_STATE_ON:
+ return REGULATOR_STATUS_NORMAL;
+
+@@ -530,6 +537,7 @@ static const struct twlreg_info TWL6030_INFO_##label = { \
+ #define TWL6032_ADJUSTABLE_LDO(label, offset) \
+ static const struct twlreg_info TWL6032_INFO_##label = { \
+ .base = offset, \
++ .features = TWL6032_SUBCLASS, \
+ .desc = { \
+ .name = #label, \
+ .id = TWL6032_REG_##label, \
+@@ -562,6 +570,7 @@ static const struct twlreg_info TWLFIXED_INFO_##label = { \
+ #define TWL6032_ADJUSTABLE_SMPS(label, offset) \
+ static const struct twlreg_info TWLSMPS_INFO_##label = { \
+ .base = offset, \
++ .features = TWL6032_SUBCLASS, \
+ .desc = { \
+ .name = #label, \
+ .id = TWL6032_REG_##label, \
+diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c
+index cadea0344486f..40befdd9dfa92 100644
+--- a/drivers/regulator/wm8994-regulator.c
++++ b/drivers/regulator/wm8994-regulator.c
+@@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = {
+ };
+
+ static const struct regulator_desc wm8994_ldo_desc[] = {
++ {
++ .name = "LDO1",
++ .id = 1,
++ .type = REGULATOR_VOLTAGE,
++ .n_voltages = WM8994_LDO1_MAX_SELECTOR + 1,
++ .vsel_reg = WM8994_LDO_1,
++ .vsel_mask = WM8994_LDO1_VSEL_MASK,
++ .ops = &wm8994_ldo1_ops,
++ .min_uV = 2400000,
++ .uV_step = 100000,
++ .enable_time = 3000,
++ .off_on_delay = 36000,
++ .owner = THIS_MODULE,
++ },
++ {
++ .name = "LDO2",
++ .id = 2,
++ .type = REGULATOR_VOLTAGE,
++ .n_voltages = WM8994_LDO2_MAX_SELECTOR + 1,
++ .vsel_reg = WM8994_LDO_2,
++ .vsel_mask = WM8994_LDO2_VSEL_MASK,
++ .ops = &wm8994_ldo2_ops,
++ .enable_time = 3000,
++ .off_on_delay = 36000,
++ .owner = THIS_MODULE,
++ },
++};
++
++static const struct regulator_desc wm8958_ldo_desc[] = {
+ {
+ .name = "LDO1",
+ .id = 1,
+@@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev)
+ * regulator core and we need not worry about it on the
+ * error path.
+ */
+- ldo->regulator = devm_regulator_register(&pdev->dev,
+- &wm8994_ldo_desc[id],
+- &config);
++ if (ldo->wm8994->type == WM8994) {
++ ldo->regulator = devm_regulator_register(&pdev->dev,
++ &wm8994_ldo_desc[id],
++ &config);
++ } else {
++ ldo->regulator = devm_regulator_register(&pdev->dev,
++ &wm8958_ldo_desc[id],
++ &config);
++ }
++
+ if (IS_ERR(ldo->regulator)) {
+ ret = PTR_ERR(ldo->regulator);
+ dev_err(wm8994->dev, "Failed to register LDO%d: %d\n",
+diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
+index d88f76f5305eb..c4e1ad813e097 100644
+--- a/drivers/remoteproc/imx_rproc.c
++++ b/drivers/remoteproc/imx_rproc.c
+@@ -71,6 +71,7 @@ struct imx_rproc_mem {
+ /* att flags */
+ /* M4 own area. Can be mapped at probe */
+ #define ATT_OWN BIT(1)
++#define ATT_IOMEM BIT(2)
+
+ /* address translation table */
+ struct imx_rproc_att {
+@@ -117,7 +118,7 @@ struct imx_rproc {
+ static const struct imx_rproc_att imx_rproc_att_imx8mn[] = {
+ /* dev addr , sys addr , size , flags */
+ /* ITCM */
+- { 0x00000000, 0x007E0000, 0x00020000, ATT_OWN },
++ { 0x00000000, 0x007E0000, 0x00020000, ATT_OWN | ATT_IOMEM },
+ /* OCRAM_S */
+ { 0x00180000, 0x00180000, 0x00009000, 0 },
+ /* OCRAM */
+@@ -131,7 +132,7 @@ static const struct imx_rproc_att imx_rproc_att_imx8mn[] = {
+ /* DDR (Code) - alias */
+ { 0x10000000, 0x40000000, 0x0FFE0000, 0 },
+ /* DTCM */
+- { 0x20000000, 0x00800000, 0x00020000, ATT_OWN },
++ { 0x20000000, 0x00800000, 0x00020000, ATT_OWN | ATT_IOMEM },
+ /* OCRAM_S - alias */
+ { 0x20180000, 0x00180000, 0x00008000, ATT_OWN },
+ /* OCRAM */
+@@ -147,7 +148,7 @@ static const struct imx_rproc_att imx_rproc_att_imx8mn[] = {
+ static const struct imx_rproc_att imx_rproc_att_imx8mq[] = {
+ /* dev addr , sys addr , size , flags */
+ /* TCML - alias */
+- { 0x00000000, 0x007e0000, 0x00020000, 0 },
++ { 0x00000000, 0x007e0000, 0x00020000, ATT_IOMEM},
+ /* OCRAM_S */
+ { 0x00180000, 0x00180000, 0x00008000, 0 },
+ /* OCRAM */
+@@ -159,9 +160,9 @@ static const struct imx_rproc_att imx_rproc_att_imx8mq[] = {
+ /* DDR (Code) - alias */
+ { 0x10000000, 0x80000000, 0x0FFE0000, 0 },
+ /* TCML */
+- { 0x1FFE0000, 0x007E0000, 0x00020000, ATT_OWN },
++ { 0x1FFE0000, 0x007E0000, 0x00020000, ATT_OWN | ATT_IOMEM},
+ /* TCMU */
+- { 0x20000000, 0x00800000, 0x00020000, ATT_OWN },
++ { 0x20000000, 0x00800000, 0x00020000, ATT_OWN | ATT_IOMEM},
+ /* OCRAM_S */
+ { 0x20180000, 0x00180000, 0x00008000, ATT_OWN },
+ /* OCRAM */
+@@ -199,12 +200,12 @@ static const struct imx_rproc_att imx_rproc_att_imx7d[] = {
+ /* OCRAM_PXP (Code) - alias */
+ { 0x00940000, 0x00940000, 0x00008000, 0 },
+ /* TCML (Code) */
+- { 0x1FFF8000, 0x007F8000, 0x00008000, ATT_OWN },
++ { 0x1FFF8000, 0x007F8000, 0x00008000, ATT_OWN | ATT_IOMEM },
+ /* DDR (Code) - alias, first part of DDR (Data) */
+ { 0x10000000, 0x80000000, 0x0FFF0000, 0 },
+
+ /* TCMU (Data) */
+- { 0x20000000, 0x00800000, 0x00008000, ATT_OWN },
++ { 0x20000000, 0x00800000, 0x00008000, ATT_OWN | ATT_IOMEM },
+ /* OCRAM (Data) */
+ { 0x20200000, 0x00900000, 0x00020000, 0 },
+ /* OCRAM_EPDC (Data) */
+@@ -218,18 +219,18 @@ static const struct imx_rproc_att imx_rproc_att_imx7d[] = {
+ static const struct imx_rproc_att imx_rproc_att_imx6sx[] = {
+ /* dev addr , sys addr , size , flags */
+ /* TCML (M4 Boot Code) - alias */
+- { 0x00000000, 0x007F8000, 0x00008000, 0 },
++ { 0x00000000, 0x007F8000, 0x00008000, ATT_IOMEM },
+ /* OCRAM_S (Code) */
+ { 0x00180000, 0x008F8000, 0x00004000, 0 },
+ /* OCRAM_S (Code) - alias */
+ { 0x00180000, 0x008FC000, 0x00004000, 0 },
+ /* TCML (Code) */
+- { 0x1FFF8000, 0x007F8000, 0x00008000, ATT_OWN },
++ { 0x1FFF8000, 0x007F8000, 0x00008000, ATT_OWN | ATT_IOMEM },
+ /* DDR (Code) - alias, first part of DDR (Data) */
+ { 0x10000000, 0x80000000, 0x0FFF8000, 0 },
+
+ /* TCMU (Data) */
+- { 0x20000000, 0x00800000, 0x00008000, ATT_OWN },
++ { 0x20000000, 0x00800000, 0x00008000, ATT_OWN | ATT_IOMEM },
+ /* OCRAM_S (Data) - alias? */
+ { 0x208F8000, 0x008F8000, 0x00004000, 0 },
+ /* DDR (Data) */
+@@ -341,7 +342,7 @@ static int imx_rproc_stop(struct rproc *rproc)
+ }
+
+ static int imx_rproc_da_to_sys(struct imx_rproc *priv, u64 da,
+- size_t len, u64 *sys)
++ size_t len, u64 *sys, bool *is_iomem)
+ {
+ const struct imx_rproc_dcfg *dcfg = priv->dcfg;
+ int i;
+@@ -354,6 +355,8 @@ static int imx_rproc_da_to_sys(struct imx_rproc *priv, u64 da,
+ unsigned int offset = da - att->da;
+
+ *sys = att->sa + offset;
++ if (is_iomem)
++ *is_iomem = att->flags & ATT_IOMEM;
+ return 0;
+ }
+ }
+@@ -377,7 +380,7 @@ static void *imx_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *i
+ * On device side we have many aliases, so we need to convert device
+ * address (M4) to system bus address first.
+ */
+- if (imx_rproc_da_to_sys(priv, da, len, &sys))
++ if (imx_rproc_da_to_sys(priv, da, len, &sys, is_iomem))
+ return NULL;
+
+ for (i = 0; i < IMX_RPROC_MEM_MAX; i++) {
+@@ -444,8 +447,12 @@ static int imx_rproc_prepare(struct rproc *rproc)
+ if (!strcmp(it.node->name, "vdev0buffer"))
+ continue;
+
++ if (!strcmp(it.node->name, "rsc-table"))
++ continue;
++
+ rmem = of_reserved_mem_lookup(it.node);
+ if (!rmem) {
++ of_node_put(it.node);
+ dev_err(priv->dev, "unable to acquire memory-region\n");
+ return -EINVAL;
+ }
+@@ -458,10 +465,12 @@ static int imx_rproc_prepare(struct rproc *rproc)
+ imx_rproc_mem_alloc, imx_rproc_mem_release,
+ it.node->name);
+
+- if (mem)
++ if (mem) {
+ rproc_coredump_add_segment(rproc, da, rmem->size);
+- else
++ } else {
++ of_node_put(it.node);
+ return -ENOMEM;
++ }
+
+ rproc_add_carveout(rproc, mem);
+ }
+@@ -553,8 +562,12 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
+ if (b >= IMX_RPROC_MEM_MAX)
+ break;
+
+- priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev,
+- att->sa, att->size);
++ if (att->flags & ATT_IOMEM)
++ priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev,
++ att->sa, att->size);
++ else
++ priv->mem[b].cpu_addr = devm_ioremap_wc(&pdev->dev,
++ att->sa, att->size);
+ if (!priv->mem[b].cpu_addr) {
+ dev_err(dev, "failed to remap %#x bytes from %#x\n", att->size, att->sa);
+ return -ENOMEM;
+@@ -575,17 +588,18 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
+ struct resource res;
+
+ node = of_parse_phandle(np, "memory-region", a);
+- /* Not map vdev region */
+- if (!strcmp(node->name, "vdev"))
++ /* Not map vdevbuffer, vdevring region */
++ if (!strncmp(node->name, "vdev", strlen("vdev"))) {
++ of_node_put(node);
+ continue;
++ }
+ err = of_address_to_resource(node, 0, &res);
++ of_node_put(node);
+ if (err) {
+ dev_err(dev, "unable to resolve memory region\n");
+ return err;
+ }
+
+- of_node_put(node);
+-
+ if (b >= IMX_RPROC_MEM_MAX)
+ break;
+
+@@ -597,7 +611,7 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
+ }
+ priv->mem[b].sys_addr = res.start;
+ priv->mem[b].size = resource_size(&res);
+- if (!strcmp(node->name, "rsc_table"))
++ if (!strcmp(node->name, "rsc-table"))
+ priv->rsc_table = priv->mem[b].cpu_addr;
+ b++;
+ }
+@@ -823,6 +837,7 @@ static int imx_rproc_remove(struct platform_device *pdev)
+ clk_disable_unprepare(priv->clk);
+ rproc_del(rproc);
+ imx_rproc_free_mbox(rproc);
++ destroy_workqueue(priv->workqueue);
+ rproc_free(rproc);
+
+ return 0;
+diff --git a/drivers/remoteproc/mtk_scp_ipi.c b/drivers/remoteproc/mtk_scp_ipi.c
+index 6dc955ecab80f..968128b78e59c 100644
+--- a/drivers/remoteproc/mtk_scp_ipi.c
++++ b/drivers/remoteproc/mtk_scp_ipi.c
+@@ -164,21 +164,21 @@ int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len,
+ WARN_ON(len > sizeof(send_obj->share_buf)) || WARN_ON(!buf))
+ return -EINVAL;
+
+- mutex_lock(&scp->send_lock);
+-
+ ret = clk_prepare_enable(scp->clk);
+ if (ret) {
+ dev_err(scp->dev, "failed to enable clock\n");
+- goto unlock_mutex;
++ return ret;
+ }
+
++ mutex_lock(&scp->send_lock);
++
+ /* Wait until SCP receives the last command */
+ timeout = jiffies + msecs_to_jiffies(2000);
+ do {
+ if (time_after(jiffies, timeout)) {
+ dev_err(scp->dev, "%s: IPI timeout!\n", __func__);
+ ret = -ETIMEDOUT;
+- goto clock_disable;
++ goto unlock_mutex;
+ }
+ } while (readl(scp->reg_base + scp->data->host_to_scp_reg));
+
+@@ -205,10 +205,9 @@ int scp_ipi_send(struct mtk_scp *scp, u32 id, void *buf, unsigned int len,
+ ret = 0;
+ }
+
+-clock_disable:
+- clk_disable_unprepare(scp->clk);
+ unlock_mutex:
+ mutex_unlock(&scp->send_lock);
++ clk_disable_unprepare(scp->clk);
+
+ return ret;
+ }
+diff --git a/drivers/remoteproc/qcom_pil_info.c b/drivers/remoteproc/qcom_pil_info.c
+index 7c007dd7b2000..aca21560e20b8 100644
+--- a/drivers/remoteproc/qcom_pil_info.c
++++ b/drivers/remoteproc/qcom_pil_info.c
+@@ -104,7 +104,7 @@ int qcom_pil_info_store(const char *image, phys_addr_t base, size_t size)
+ return -ENOMEM;
+
+ found_unused:
+- memcpy_toio(entry, image, PIL_RELOC_NAME_LEN);
++ memcpy_toio(entry, image, strnlen(image, PIL_RELOC_NAME_LEN));
+ found_existing:
+ /* Use two writel() as base is only aligned to 4 bytes on odd entries */
+ writel(base, entry + PIL_RELOC_NAME_LEN);
+diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c b/drivers/remoteproc/qcom_q6v5_adsp.c
+index 8b0d8bbacd2e4..c10d452c97032 100644
+--- a/drivers/remoteproc/qcom_q6v5_adsp.c
++++ b/drivers/remoteproc/qcom_q6v5_adsp.c
+@@ -406,6 +406,7 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp)
+ }
+
+ ret = of_address_to_resource(node, 0, &r);
++ of_node_put(node);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c
+index 423b31dfa5741..93eefefd514c7 100644
+--- a/drivers/remoteproc/qcom_q6v5_mss.c
++++ b/drivers/remoteproc/qcom_q6v5_mss.c
+@@ -17,6 +17,7 @@
+ #include <linux/module.h>
+ #include <linux/of_address.h>
+ #include <linux/of_device.h>
++#include <linux/of_reserved_mem.h>
+ #include <linux/platform_device.h>
+ #include <linux/pm_domain.h>
+ #include <linux/pm_runtime.h>
+@@ -192,6 +193,9 @@ struct q6v5 {
+ size_t mba_size;
+ size_t dp_size;
+
++ phys_addr_t mdata_phys;
++ size_t mdata_size;
++
+ phys_addr_t mpss_phys;
+ phys_addr_t mpss_reloc;
+ size_t mpss_size;
+@@ -832,15 +836,35 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const struct firmware *fw)
+ if (IS_ERR(metadata))
+ return PTR_ERR(metadata);
+
+- ptr = dma_alloc_attrs(qproc->dev, size, &phys, GFP_KERNEL, dma_attrs);
+- if (!ptr) {
+- kfree(metadata);
+- dev_err(qproc->dev, "failed to allocate mdt buffer\n");
+- return -ENOMEM;
++ if (qproc->mdata_phys) {
++ if (size > qproc->mdata_size) {
++ ret = -EINVAL;
++ dev_err(qproc->dev, "metadata size outside memory range\n");
++ goto free_metadata;
++ }
++
++ phys = qproc->mdata_phys;
++ ptr = memremap(qproc->mdata_phys, size, MEMREMAP_WC);
++ if (!ptr) {
++ ret = -EBUSY;
++ dev_err(qproc->dev, "unable to map memory region: %pa+%zx\n",
++ &qproc->mdata_phys, size);
++ goto free_metadata;
++ }
++ } else {
++ ptr = dma_alloc_attrs(qproc->dev, size, &phys, GFP_KERNEL, dma_attrs);
++ if (!ptr) {
++ ret = -ENOMEM;
++ dev_err(qproc->dev, "failed to allocate mdt buffer\n");
++ goto free_metadata;
++ }
+ }
+
+ memcpy(ptr, metadata, size);
+
++ if (qproc->mdata_phys)
++ memunmap(ptr);
++
+ /* Hypervisor mapping to access metadata by modem */
+ mdata_perm = BIT(QCOM_SCM_VMID_HLOS);
+ ret = q6v5_xfer_mem_ownership(qproc, &mdata_perm, false, true,
+@@ -869,7 +893,9 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const struct firmware *fw)
+ "mdt buffer not reclaimed system may become unstable\n");
+
+ free_dma_attrs:
+- dma_free_attrs(qproc->dev, size, ptr, phys, dma_attrs);
++ if (!qproc->mdata_phys)
++ dma_free_attrs(qproc->dev, size, ptr, phys, dma_attrs);
++free_metadata:
+ kfree(metadata);
+
+ return ret < 0 ? ret : 0;
+@@ -1615,6 +1641,7 @@ static int q6v5_init_reset(struct q6v5 *qproc)
+ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
+ {
+ struct device_node *child;
++ struct reserved_mem *rmem;
+ struct device_node *node;
+ struct resource r;
+ int ret;
+@@ -1624,18 +1651,20 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
+ * reserved memory regions from device's memory-region property.
+ */
+ child = of_get_child_by_name(qproc->dev->of_node, "mba");
+- if (!child)
++ if (!child) {
+ node = of_parse_phandle(qproc->dev->of_node,
+ "memory-region", 0);
+- else
++ } else {
+ node = of_parse_phandle(child, "memory-region", 0);
++ of_node_put(child);
++ }
+
+ ret = of_address_to_resource(node, 0, &r);
++ of_node_put(node);
+ if (ret) {
+ dev_err(qproc->dev, "unable to resolve mba region\n");
+ return ret;
+ }
+- of_node_put(node);
+
+ qproc->mba_phys = r.start;
+ qproc->mba_size = resource_size(&r);
+@@ -1646,18 +1675,39 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
+ } else {
+ child = of_get_child_by_name(qproc->dev->of_node, "mpss");
+ node = of_parse_phandle(child, "memory-region", 0);
++ of_node_put(child);
+ }
+
+ ret = of_address_to_resource(node, 0, &r);
++ of_node_put(node);
+ if (ret) {
+ dev_err(qproc->dev, "unable to resolve mpss region\n");
+ return ret;
+ }
+- of_node_put(node);
+
+ qproc->mpss_phys = qproc->mpss_reloc = r.start;
+ qproc->mpss_size = resource_size(&r);
+
++ if (!child) {
++ node = of_parse_phandle(qproc->dev->of_node, "memory-region", 2);
++ } else {
++ child = of_get_child_by_name(qproc->dev->of_node, "metadata");
++ node = of_parse_phandle(child, "memory-region", 0);
++ of_node_put(child);
++ }
++
++ if (!node)
++ return 0;
++
++ rmem = of_reserved_mem_lookup(node);
++ if (!rmem) {
++ dev_err(qproc->dev, "unable to resolve metadata region\n");
++ return -EINVAL;
++ }
++
++ qproc->mdata_phys = rmem->base;
++ qproc->mdata_size = rmem->size;
++
+ return 0;
+ }
+
+diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c
+index 401b1ec907852..fbcbc00f2e645 100644
+--- a/drivers/remoteproc/qcom_q6v5_pas.c
++++ b/drivers/remoteproc/qcom_q6v5_pas.c
+@@ -87,6 +87,9 @@ static void adsp_minidump(struct rproc *rproc)
+ {
+ struct qcom_adsp *adsp = rproc->priv;
+
++ if (rproc->dump_conf == RPROC_COREDUMP_DISABLED)
++ return;
++
+ qcom_minidump(rproc, adsp->minidump_id);
+ }
+
+@@ -383,6 +386,7 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp)
+ }
+
+ ret = of_address_to_resource(node, 0, &r);
++ of_node_put(node);
+ if (ret)
+ return ret;
+
+@@ -495,6 +499,7 @@ detach_proxy_pds:
+ detach_active_pds:
+ adsp_pds_detach(adsp, adsp->active_pds, adsp->active_pd_count);
+ free_rproc:
++ device_init_wakeup(adsp->dev, false);
+ rproc_free(rproc);
+
+ return ret;
+@@ -510,6 +515,8 @@ static int adsp_remove(struct platform_device *pdev)
+ qcom_remove_sysmon_subdev(adsp->sysmon);
+ qcom_remove_smd_subdev(adsp->rproc, &adsp->smd_subdev);
+ qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev);
++ adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count);
++ device_init_wakeup(adsp->dev, false);
+ rproc_free(adsp->rproc);
+
+ return 0;
+@@ -661,6 +668,7 @@ static const struct adsp_data sm8350_cdsp_resource = {
+ },
+ .proxy_pd_names = (char*[]){
+ "cx",
++ "mxc",
+ NULL
+ },
+ .ssr_name = "cdsp",
+diff --git a/drivers/remoteproc/qcom_q6v5_wcss.c b/drivers/remoteproc/qcom_q6v5_wcss.c
+index 20d50ec7eff1b..cfd34ffcbb121 100644
+--- a/drivers/remoteproc/qcom_q6v5_wcss.c
++++ b/drivers/remoteproc/qcom_q6v5_wcss.c
+@@ -351,7 +351,7 @@ static int q6v5_wcss_qcs404_power_on(struct q6v5_wcss *wcss)
+ if (ret) {
+ dev_err(wcss->dev,
+ "xo cbcr enabling timed out (rc:%d)\n", ret);
+- return ret;
++ goto disable_xo_cbcr_clk;
+ }
+
+ writel(0, wcss->reg_base + Q6SS_CGC_OVERRIDE);
+@@ -417,6 +417,7 @@ disable_sleep_cbcr_clk:
+ val = readl(wcss->reg_base + Q6SS_SLEEP_CBCR);
+ val &= ~Q6SS_CLK_ENABLE;
+ writel(val, wcss->reg_base + Q6SS_SLEEP_CBCR);
++disable_xo_cbcr_clk:
+ val = readl(wcss->reg_base + Q6SS_XO_CBCR);
+ val &= ~Q6SS_CLK_ENABLE;
+ writel(val, wcss->reg_base + Q6SS_XO_CBCR);
+@@ -827,6 +828,9 @@ static int q6v5_wcss_init_mmio(struct q6v5_wcss *wcss,
+ int ret;
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qdsp6");
++ if (!res)
++ return -EINVAL;
++
+ wcss->reg_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!wcss->reg_base)
+diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
+index 9fca814928635..fbfaf2637a91a 100644
+--- a/drivers/remoteproc/qcom_sysmon.c
++++ b/drivers/remoteproc/qcom_sysmon.c
+@@ -41,6 +41,7 @@ struct qcom_sysmon {
+ struct completion comp;
+ struct completion ind_comp;
+ struct completion shutdown_comp;
++ struct completion ssctl_comp;
+ struct mutex lock;
+
+ bool ssr_ack;
+@@ -445,6 +446,8 @@ static int ssctl_new_server(struct qmi_handle *qmi, struct qmi_service *svc)
+
+ svc->priv = sysmon;
+
++ complete(&sysmon->ssctl_comp);
++
+ return 0;
+ }
+
+@@ -501,6 +504,7 @@ static int sysmon_start(struct rproc_subdev *subdev)
+ .ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
+ };
+
++ reinit_completion(&sysmon->ssctl_comp);
+ mutex_lock(&sysmon->state_lock);
+ sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
+ blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
+@@ -545,6 +549,11 @@ static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
+ if (crashed)
+ return;
+
++ if (sysmon->ssctl_instance) {
++ if (!wait_for_completion_timeout(&sysmon->ssctl_comp, HZ / 2))
++ dev_err(sysmon->dev, "timeout waiting for ssctl service\n");
++ }
++
+ if (sysmon->ssctl_version)
+ sysmon->shutdown_acked = ssctl_request_shutdown(sysmon);
+ else if (sysmon->ept)
+@@ -631,6 +640,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+ init_completion(&sysmon->comp);
+ init_completion(&sysmon->ind_comp);
+ init_completion(&sysmon->shutdown_comp);
++ init_completion(&sysmon->ssctl_comp);
+ mutex_init(&sysmon->lock);
+ mutex_init(&sysmon->state_lock);
+
+@@ -640,7 +650,9 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+ if (sysmon->shutdown_irq != -ENODATA) {
+ dev_err(sysmon->dev,
+ "failed to retrieve shutdown-ack IRQ\n");
+- return ERR_PTR(sysmon->shutdown_irq);
++ ret = sysmon->shutdown_irq;
++ kfree(sysmon);
++ return ERR_PTR(ret);
+ }
+ } else {
+ ret = devm_request_threaded_irq(sysmon->dev,
+@@ -651,6 +663,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+ if (ret) {
+ dev_err(sysmon->dev,
+ "failed to acquire shutdown-ack IRQ\n");
++ kfree(sysmon);
+ return ERR_PTR(ret);
+ }
+ }
+diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
+index ebadc6c08e116..97a0c0dc4c77a 100644
+--- a/drivers/remoteproc/qcom_wcnss.c
++++ b/drivers/remoteproc/qcom_wcnss.c
+@@ -468,6 +468,7 @@ static int wcnss_request_irq(struct qcom_wcnss *wcnss,
+ irq_handler_t thread_fn)
+ {
+ int ret;
++ int irq_number;
+
+ ret = platform_get_irq_byname(pdev, name);
+ if (ret < 0 && optional) {
+@@ -478,14 +479,19 @@ static int wcnss_request_irq(struct qcom_wcnss *wcnss,
+ return ret;
+ }
+
++ irq_number = ret;
++
+ ret = devm_request_threaded_irq(&pdev->dev, ret,
+ NULL, thread_fn,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+ "wcnss", wcnss);
+- if (ret)
++ if (ret) {
+ dev_err(&pdev->dev, "request %s IRQ failed\n", name);
++ return ret;
++ }
+
+- return ret;
++ /* Return the IRQ number if the IRQ was successfully acquired */
++ return irq_number;
+ }
+
+ static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss)
+@@ -501,6 +507,7 @@ static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss)
+ }
+
+ ret = of_address_to_resource(node, 0, &r);
++ of_node_put(node);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
+index 502b6604b757b..97e59f7461261 100644
+--- a/drivers/remoteproc/remoteproc_core.c
++++ b/drivers/remoteproc/remoteproc_core.c
+@@ -556,9 +556,6 @@ static int rproc_handle_vdev(struct rproc *rproc, void *ptr,
+ /* Initialise vdev subdevice */
+ snprintf(name, sizeof(name), "vdev%dbuffer", rvdev->index);
+ rvdev->dev.parent = &rproc->dev;
+- ret = copy_dma_range_map(&rvdev->dev, rproc->dev.parent);
+- if (ret)
+- return ret;
+ rvdev->dev.release = rproc_rvdev_release;
+ dev_set_name(&rvdev->dev, "%s#%s", dev_name(rvdev->dev.parent), name);
+ dev_set_drvdata(&rvdev->dev, rvdev);
+@@ -568,6 +565,11 @@ static int rproc_handle_vdev(struct rproc *rproc, void *ptr,
+ put_device(&rvdev->dev);
+ return ret;
+ }
++
++ ret = copy_dma_range_map(&rvdev->dev, rproc->dev.parent);
++ if (ret)
++ goto free_rvdev;
++
+ /* Make device dma capable by inheriting from parent's capabilities */
+ set_dma_ops(&rvdev->dev, get_dma_ops(rproc->dev.parent));
+
+@@ -1953,12 +1955,18 @@ static void rproc_crash_handler_work(struct work_struct *work)
+
+ mutex_lock(&rproc->lock);
+
+- if (rproc->state == RPROC_CRASHED || rproc->state == RPROC_OFFLINE) {
++ if (rproc->state == RPROC_CRASHED) {
+ /* handle only the first crash detected */
+ mutex_unlock(&rproc->lock);
+ return;
+ }
+
++ if (rproc->state == RPROC_OFFLINE) {
++ /* Don't recover if the remote processor was stopped */
++ mutex_unlock(&rproc->lock);
++ goto out;
++ }
++
+ rproc->state = RPROC_CRASHED;
+ dev_err(dev, "handling crash #%u in %s\n", ++rproc->crash_cnt,
+ rproc->name);
+@@ -1968,6 +1976,7 @@ static void rproc_crash_handler_work(struct work_struct *work)
+ if (!rproc->recovery_disabled)
+ rproc_trigger_recovery(rproc);
+
++out:
+ pm_relax(rproc->dev.parent);
+ }
+
+diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c
+index aee657cc08c6a..c892f433a323e 100644
+--- a/drivers/remoteproc/remoteproc_coredump.c
++++ b/drivers/remoteproc/remoteproc_coredump.c
+@@ -152,8 +152,8 @@ static void rproc_copy_segment(struct rproc *rproc, void *dest,
+ struct rproc_dump_segment *segment,
+ size_t offset, size_t size)
+ {
++ bool is_iomem = false;
+ void *ptr;
+- bool is_iomem;
+
+ if (segment->dump) {
+ segment->dump(rproc, segment, dest, offset, size);
+diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c
+index b5a1e3b697d9f..581930483ef84 100644
+--- a/drivers/remoteproc/remoteproc_debugfs.c
++++ b/drivers/remoteproc/remoteproc_debugfs.c
+@@ -76,7 +76,7 @@ static ssize_t rproc_coredump_write(struct file *filp,
+ int ret, err = 0;
+ char buf[20];
+
+- if (count > sizeof(buf))
++ if (count < 1 || count > sizeof(buf))
+ return -EINVAL;
+
+ ret = copy_from_user(buf, user_buf, count);
+diff --git a/drivers/remoteproc/remoteproc_elf_loader.c b/drivers/remoteproc/remoteproc_elf_loader.c
+index 469c52e62faff..d635d19a5aa8a 100644
+--- a/drivers/remoteproc/remoteproc_elf_loader.c
++++ b/drivers/remoteproc/remoteproc_elf_loader.c
+@@ -178,8 +178,8 @@ int rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
+ u64 filesz = elf_phdr_get_p_filesz(class, phdr);
+ u64 offset = elf_phdr_get_p_offset(class, phdr);
+ u32 type = elf_phdr_get_p_type(class, phdr);
++ bool is_iomem = false;
+ void *ptr;
+- bool is_iomem;
+
+ if (type != PT_LOAD)
+ continue;
+@@ -220,7 +220,7 @@ int rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
+ /* put the segment where the remote processor expects it */
+ if (filesz) {
+ if (is_iomem)
+- memcpy_fromio(ptr, (void __iomem *)(elf_data + offset), filesz);
++ memcpy_toio((void __iomem *)ptr, elf_data + offset, filesz);
+ else
+ memcpy(ptr, elf_data + offset, filesz);
+ }
+diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c
+index a3268d95a50e6..e6bd3c7a950a2 100644
+--- a/drivers/remoteproc/st_remoteproc.c
++++ b/drivers/remoteproc/st_remoteproc.c
+@@ -129,6 +129,7 @@ static int st_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
+ while (of_phandle_iterator_next(&it) == 0) {
+ rmem = of_reserved_mem_lookup(it.node);
+ if (!rmem) {
++ of_node_put(it.node);
+ dev_err(dev, "unable to acquire memory-region\n");
+ return -EINVAL;
+ }
+@@ -150,8 +151,10 @@ static int st_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
+ it.node->name);
+ }
+
+- if (!mem)
++ if (!mem) {
++ of_node_put(it.node);
+ return -ENOMEM;
++ }
+
+ rproc_add_carveout(rproc, mem);
+ index++;
+diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c
+index b643efcf995a1..aba3df1d1bf52 100644
+--- a/drivers/remoteproc/stm32_rproc.c
++++ b/drivers/remoteproc/stm32_rproc.c
+@@ -223,11 +223,13 @@ static int stm32_rproc_prepare(struct rproc *rproc)
+ while (of_phandle_iterator_next(&it) == 0) {
+ rmem = of_reserved_mem_lookup(it.node);
+ if (!rmem) {
++ of_node_put(it.node);
+ dev_err(dev, "unable to acquire memory-region\n");
+ return -EINVAL;
+ }
+
+ if (stm32_rproc_pa_to_da(rproc, rmem->base, &da) < 0) {
++ of_node_put(it.node);
+ dev_err(dev, "memory region not valid %pa\n",
+ &rmem->base);
+ return -EINVAL;
+@@ -254,8 +256,10 @@ static int stm32_rproc_prepare(struct rproc *rproc)
+ it.node->name);
+ }
+
+- if (!mem)
++ if (!mem) {
++ of_node_put(it.node);
+ return -ENOMEM;
++ }
+
+ rproc_add_carveout(rproc, mem);
+ index++;
+@@ -287,8 +291,16 @@ static void stm32_rproc_mb_vq_work(struct work_struct *work)
+ struct stm32_mbox *mb = container_of(work, struct stm32_mbox, vq_work);
+ struct rproc *rproc = dev_get_drvdata(mb->client.dev);
+
++ mutex_lock(&rproc->lock);
++
++ if (rproc->state != RPROC_RUNNING)
++ goto unlock_mutex;
++
+ if (rproc_vq_interrupt(rproc, mb->vq_id) == IRQ_NONE)
+ dev_dbg(&rproc->dev, "no message found in vq%d\n", mb->vq_id);
++
++unlock_mutex:
++ mutex_unlock(&rproc->lock);
+ }
+
+ static void stm32_rproc_mb_callback(struct mbox_client *cl, void *data)
+diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c
+index 71615210df3e0..54266ea69c84d 100644
+--- a/drivers/remoteproc/ti_k3_r5_remoteproc.c
++++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c
+@@ -1430,6 +1430,7 @@ static int k3_r5_cluster_of_init(struct platform_device *pdev)
+ if (!cpdev) {
+ ret = -ENODEV;
+ dev_err(dev, "could not get R5 core platform device\n");
++ of_node_put(child);
+ goto fail;
+ }
+
+@@ -1438,6 +1439,7 @@ static int k3_r5_cluster_of_init(struct platform_device *pdev)
+ dev_err(dev, "k3_r5_core_of_init failed, ret = %d\n",
+ ret);
+ put_device(&cpdev->dev);
++ of_node_put(child);
+ goto fail;
+ }
+
+diff --git a/drivers/reset/reset-imx7.c b/drivers/reset/reset-imx7.c
+index 185a333df66c5..d2408725eb2c3 100644
+--- a/drivers/reset/reset-imx7.c
++++ b/drivers/reset/reset-imx7.c
+@@ -329,6 +329,7 @@ static int imx8mp_reset_set(struct reset_controller_dev *rcdev,
+ break;
+
+ case IMX8MP_RESET_PCIE_CTRL_APPS_EN:
++ case IMX8MP_RESET_PCIEPHY_PERST:
+ value = assert ? 0 : bit;
+ break;
+ }
+diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c
+index e0704fd2b5336..a8dde46063602 100644
+--- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c
++++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c
+@@ -121,7 +121,9 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev)
+ return dev_err_probe(dev, PTR_ERR(priv->rstc),
+ "failed to get reset\n");
+
+- reset_control_deassert(priv->rstc);
++ error = reset_control_deassert(priv->rstc);
++ if (error)
++ return error;
+
+ priv->rcdev.ops = &rzg2l_usbphy_ctrl_reset_ops;
+ priv->rcdev.of_reset_n_cells = 1;
+@@ -137,7 +139,12 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev)
+ dev_set_drvdata(dev, priv);
+
+ pm_runtime_enable(&pdev->dev);
+- pm_runtime_resume_and_get(&pdev->dev);
++ error = pm_runtime_resume_and_get(&pdev->dev);
++ if (error < 0) {
++ pm_runtime_disable(&pdev->dev);
++ reset_control_assert(priv->rstc);
++ return dev_err_probe(&pdev->dev, error, "pm_runtime_resume_and_get failed");
++ }
+
+ /* put pll and phy into reset state */
+ spin_lock_irqsave(&priv->lock, flags);
+diff --git a/drivers/reset/reset-uniphier-glue.c b/drivers/reset/reset-uniphier-glue.c
+index 027990b79f61b..7493e9618837e 100644
+--- a/drivers/reset/reset-uniphier-glue.c
++++ b/drivers/reset/reset-uniphier-glue.c
+@@ -23,7 +23,7 @@ struct uniphier_glue_reset_soc_data {
+
+ struct uniphier_glue_reset_priv {
+ struct clk_bulk_data clk[MAX_CLKS];
+- struct reset_control *rst[MAX_RSTS];
++ struct reset_control_bulk_data rst[MAX_RSTS];
+ struct reset_simple_data rdata;
+ const struct uniphier_glue_reset_soc_data *data;
+ };
+@@ -33,9 +33,7 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev)
+ struct device *dev = &pdev->dev;
+ struct uniphier_glue_reset_priv *priv;
+ struct resource *res;
+- resource_size_t size;
+- const char *name;
+- int i, ret, nr;
++ int i, ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+@@ -47,7 +45,6 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev)
+ return -EINVAL;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- size = resource_size(res);
+ priv->rdata.membase = devm_ioremap_resource(dev, res);
+ if (IS_ERR(priv->rdata.membase))
+ return PTR_ERR(priv->rdata.membase);
+@@ -58,26 +55,24 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev)
+ if (ret)
+ return ret;
+
+- for (i = 0; i < priv->data->nrsts; i++) {
+- name = priv->data->reset_names[i];
+- priv->rst[i] = devm_reset_control_get_shared(dev, name);
+- if (IS_ERR(priv->rst[i]))
+- return PTR_ERR(priv->rst[i]);
+- }
++ for (i = 0; i < priv->data->nrsts; i++)
++ priv->rst[i].id = priv->data->reset_names[i];
++ ret = devm_reset_control_bulk_get_shared(dev, priv->data->nrsts,
++ priv->rst);
++ if (ret)
++ return ret;
+
+ ret = clk_bulk_prepare_enable(priv->data->nclks, priv->clk);
+ if (ret)
+ return ret;
+
+- for (nr = 0; nr < priv->data->nrsts; nr++) {
+- ret = reset_control_deassert(priv->rst[nr]);
+- if (ret)
+- goto out_rst_assert;
+- }
++ ret = reset_control_bulk_deassert(priv->data->nrsts, priv->rst);
++ if (ret)
++ goto out_clk_disable;
+
+ spin_lock_init(&priv->rdata.lock);
+ priv->rdata.rcdev.owner = THIS_MODULE;
+- priv->rdata.rcdev.nr_resets = size * BITS_PER_BYTE;
++ priv->rdata.rcdev.nr_resets = resource_size(res) * BITS_PER_BYTE;
+ priv->rdata.rcdev.ops = &reset_simple_ops;
+ priv->rdata.rcdev.of_node = dev->of_node;
+ priv->rdata.active_low = true;
+@@ -91,9 +86,9 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev)
+ return 0;
+
+ out_rst_assert:
+- while (nr--)
+- reset_control_assert(priv->rst[nr]);
++ reset_control_bulk_assert(priv->data->nrsts, priv->rst);
+
++out_clk_disable:
+ clk_bulk_disable_unprepare(priv->data->nclks, priv->clk);
+
+ return ret;
+@@ -102,10 +97,8 @@ out_rst_assert:
+ static int uniphier_glue_reset_remove(struct platform_device *pdev)
+ {
+ struct uniphier_glue_reset_priv *priv = platform_get_drvdata(pdev);
+- int i;
+
+- for (i = 0; i < priv->data->nrsts; i++)
+- reset_control_assert(priv->rst[i]);
++ reset_control_bulk_assert(priv->data->nrsts, priv->rst);
+
+ clk_bulk_disable_unprepare(priv->data->nclks, priv->clk);
+
+diff --git a/drivers/rpmsg/mtk_rpmsg.c b/drivers/rpmsg/mtk_rpmsg.c
+index 96a17ec291401..2d8cb596ad691 100644
+--- a/drivers/rpmsg/mtk_rpmsg.c
++++ b/drivers/rpmsg/mtk_rpmsg.c
+@@ -234,7 +234,9 @@ static void mtk_register_device_work_function(struct work_struct *register_work)
+ if (info->registered)
+ continue;
+
++ mutex_unlock(&subdev->channels_lock);
+ ret = mtk_rpmsg_register_device(subdev, &info->info);
++ mutex_lock(&subdev->channels_lock);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't create rpmsg_device\n");
+ continue;
+diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
+index 05533c71b10ed..d37fd1f431fe4 100644
+--- a/drivers/rpmsg/qcom_glink_native.c
++++ b/drivers/rpmsg/qcom_glink_native.c
+@@ -222,6 +222,10 @@ static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink,
+
+ channel->glink = glink;
+ channel->name = kstrdup(name, GFP_KERNEL);
++ if (!channel->name) {
++ kfree(channel);
++ return ERR_PTR(-ENOMEM);
++ }
+
+ init_completion(&channel->open_req);
+ init_completion(&channel->open_ack);
+@@ -929,6 +933,7 @@ static void qcom_glink_handle_intent(struct qcom_glink *glink,
+ spin_unlock_irqrestore(&glink->idr_lock, flags);
+ if (!channel) {
+ dev_err(glink->dev, "intents for non-existing channel\n");
++ qcom_glink_rx_advance(glink, ALIGN(msglen, 8));
+ return;
+ }
+
+@@ -1488,7 +1493,7 @@ static void qcom_glink_rx_close(struct qcom_glink *glink, unsigned int rcid)
+ cancel_work_sync(&channel->intent_work);
+
+ if (channel->rpdev) {
+- strncpy(chinfo.name, channel->name, sizeof(chinfo.name));
++ strscpy_pad(chinfo.name, channel->name, sizeof(chinfo.name));
+ chinfo.src = RPMSG_ADDR_ANY;
+ chinfo.dst = RPMSG_ADDR_ANY;
+
+diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
+index 8da1b5cb31b31..56bc622de25e5 100644
+--- a/drivers/rpmsg/qcom_smd.c
++++ b/drivers/rpmsg/qcom_smd.c
+@@ -1089,7 +1089,7 @@ static int qcom_smd_create_device(struct qcom_smd_channel *channel)
+
+ /* Assign public information to the rpmsg_device */
+ rpdev = &qsdev->rpdev;
+- strncpy(rpdev->id.name, channel->name, RPMSG_NAME_SIZE);
++ strscpy_pad(rpdev->id.name, channel->name, RPMSG_NAME_SIZE);
+ rpdev->src = RPMSG_ADDR_ANY;
+ rpdev->dst = RPMSG_ADDR_ANY;
+
+@@ -1320,7 +1320,7 @@ static void qcom_channel_state_worker(struct work_struct *work)
+
+ spin_unlock_irqrestore(&edge->channels_lock, flags);
+
+- strncpy(chinfo.name, channel->name, sizeof(chinfo.name));
++ strscpy_pad(chinfo.name, channel->name, sizeof(chinfo.name));
+ chinfo.src = RPMSG_ADDR_ANY;
+ chinfo.dst = RPMSG_ADDR_ANY;
+ rpmsg_unregister_device(&edge->dev, &chinfo);
+@@ -1380,6 +1380,7 @@ static int qcom_smd_parse_edge(struct device *dev,
+ }
+
+ edge->ipc_regmap = syscon_node_to_regmap(syscon_np);
++ of_node_put(syscon_np);
+ if (IS_ERR(edge->ipc_regmap)) {
+ ret = PTR_ERR(edge->ipc_regmap);
+ goto put_node;
+@@ -1404,9 +1405,9 @@ static int qcom_smd_parse_edge(struct device *dev,
+ edge->name = node->name;
+
+ irq = irq_of_parse_and_map(node, 0);
+- if (irq < 0) {
++ if (!irq) {
+ dev_err(dev, "required smd interrupt missing\n");
+- ret = irq;
++ ret = -EINVAL;
+ goto put_node;
+ }
+
+diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
+index 2bebc9b2d1634..88c985f9e73ab 100644
+--- a/drivers/rpmsg/rpmsg_char.c
++++ b/drivers/rpmsg/rpmsg_char.c
+@@ -92,7 +92,7 @@ static int rpmsg_eptdev_destroy(struct device *dev, void *data)
+ /* wake up any blocked readers */
+ wake_up_interruptible(&eptdev->readq);
+
+- device_del(&eptdev->dev);
++ cdev_device_del(&eptdev->cdev, &eptdev->dev);
+ put_device(&eptdev->dev);
+
+ return 0;
+@@ -127,8 +127,11 @@ static int rpmsg_eptdev_open(struct inode *inode, struct file *filp)
+ struct rpmsg_device *rpdev = eptdev->rpdev;
+ struct device *dev = &eptdev->dev;
+
+- if (eptdev->ept)
++ mutex_lock(&eptdev->ept_lock);
++ if (eptdev->ept) {
++ mutex_unlock(&eptdev->ept_lock);
+ return -EBUSY;
++ }
+
+ get_device(dev);
+
+@@ -136,11 +139,13 @@ static int rpmsg_eptdev_open(struct inode *inode, struct file *filp)
+ if (!ept) {
+ dev_err(dev, "failed to open %s\n", eptdev->chinfo.name);
+ put_device(dev);
++ mutex_unlock(&eptdev->ept_lock);
+ return -EINVAL;
+ }
+
+ eptdev->ept = ept;
+ filp->private_data = eptdev;
++ mutex_unlock(&eptdev->ept_lock);
+
+ return 0;
+ }
+@@ -335,7 +340,6 @@ static void rpmsg_eptdev_release_device(struct device *dev)
+
+ ida_simple_remove(&rpmsg_ept_ida, dev->id);
+ ida_simple_remove(&rpmsg_minor_ida, MINOR(eptdev->dev.devt));
+- cdev_del(&eptdev->cdev);
+ kfree(eptdev);
+ }
+
+@@ -380,19 +384,13 @@ static int rpmsg_eptdev_create(struct rpmsg_ctrldev *ctrldev,
+ dev->id = ret;
+ dev_set_name(dev, "rpmsg%d", ret);
+
+- ret = cdev_add(&eptdev->cdev, dev->devt, 1);
++ ret = cdev_device_add(&eptdev->cdev, &eptdev->dev);
+ if (ret)
+ goto free_ept_ida;
+
+ /* We can now rely on the release function for cleanup */
+ dev->release = rpmsg_eptdev_release_device;
+
+- ret = device_add(dev);
+- if (ret) {
+- dev_err(dev, "device_add failed: %d\n", ret);
+- put_device(dev);
+- }
+-
+ return ret;
+
+ free_ept_ida:
+@@ -461,7 +459,6 @@ static void rpmsg_ctrldev_release_device(struct device *dev)
+
+ ida_simple_remove(&rpmsg_ctrl_ida, dev->id);
+ ida_simple_remove(&rpmsg_minor_ida, MINOR(dev->devt));
+- cdev_del(&ctrldev->cdev);
+ kfree(ctrldev);
+ }
+
+@@ -496,19 +493,13 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev)
+ dev->id = ret;
+ dev_set_name(&ctrldev->dev, "rpmsg_ctrl%d", ret);
+
+- ret = cdev_add(&ctrldev->cdev, dev->devt, 1);
++ ret = cdev_device_add(&ctrldev->cdev, &ctrldev->dev);
+ if (ret)
+ goto free_ctrl_ida;
+
+ /* We can now rely on the release function for cleanup */
+ dev->release = rpmsg_ctrldev_release_device;
+
+- ret = device_add(dev);
+- if (ret) {
+- dev_err(&rpdev->dev, "device_add failed: %d\n", ret);
+- put_device(dev);
+- }
+-
+ dev_set_drvdata(&rpdev->dev, ctrldev);
+
+ return ret;
+@@ -534,7 +525,7 @@ static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev)
+ if (ret)
+ dev_warn(&rpdev->dev, "failed to nuke endpoints: %d\n", ret);
+
+- device_del(&ctrldev->dev);
++ cdev_device_del(&ctrldev->cdev, &ctrldev->dev);
+ put_device(&ctrldev->dev);
+ }
+
+diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
+index 9151836190ce3..a71de08acc7b9 100644
+--- a/drivers/rpmsg/rpmsg_core.c
++++ b/drivers/rpmsg/rpmsg_core.c
+@@ -519,13 +519,25 @@ static int rpmsg_dev_probe(struct device *dev)
+ err = rpdrv->probe(rpdev);
+ if (err) {
+ dev_err(dev, "%s: failed: %d\n", __func__, err);
+- if (ept)
+- rpmsg_destroy_ept(ept);
+- goto out;
++ goto destroy_ept;
+ }
+
+- if (ept && rpdev->ops->announce_create)
++ if (ept && rpdev->ops->announce_create) {
+ err = rpdev->ops->announce_create(rpdev);
++ if (err) {
++ dev_err(dev, "failed to announce creation\n");
++ goto remove_rpdev;
++ }
++ }
++
++ return 0;
++
++remove_rpdev:
++ if (rpdrv->remove)
++ rpdrv->remove(rpdev);
++destroy_ept:
++ if (ept)
++ rpmsg_destroy_ept(ept);
+ out:
+ return err;
+ }
+diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
+index 8e49a3bacfc78..b03e7404212f4 100644
+--- a/drivers/rpmsg/virtio_rpmsg_bus.c
++++ b/drivers/rpmsg/virtio_rpmsg_bus.c
+@@ -842,7 +842,7 @@ static struct rpmsg_device *rpmsg_virtio_add_ctrl_dev(struct virtio_device *vdev
+
+ err = rpmsg_chrdev_register_device(rpdev_ctrl);
+ if (err) {
+- kfree(vch);
++ /* vch will be free in virtio_rpmsg_release_device() */
+ return ERR_PTR(err);
+ }
+
+@@ -853,7 +853,7 @@ static void rpmsg_virtio_del_ctrl_dev(struct rpmsg_device *rpdev_ctrl)
+ {
+ if (!rpdev_ctrl)
+ return;
+- kfree(to_virtio_rpmsg_channel(rpdev_ctrl));
++ device_unregister(&rpdev_ctrl->dev);
+ }
+
+ static int rpmsg_probe(struct virtio_device *vdev)
+@@ -964,7 +964,8 @@ static int rpmsg_probe(struct virtio_device *vdev)
+
+ err = rpmsg_ns_register_device(rpdev_ns);
+ if (err)
+- goto free_vch;
++ /* vch will be free in virtio_rpmsg_release_device() */
++ goto free_ctrldev;
+ }
+
+ /*
+@@ -988,8 +989,6 @@ static int rpmsg_probe(struct virtio_device *vdev)
+
+ return 0;
+
+-free_vch:
+- kfree(vch);
+ free_ctrldev:
+ rpmsg_virtio_del_ctrl_dev(rpdev_ctrl);
+ free_coherent:
+diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
+index f77bc089eb6b7..0aef7df2ea704 100644
+--- a/drivers/rtc/class.c
++++ b/drivers/rtc/class.c
+@@ -26,6 +26,15 @@ struct class *rtc_class;
+ static void rtc_device_release(struct device *dev)
+ {
+ struct rtc_device *rtc = to_rtc_device(dev);
++ struct timerqueue_head *head = &rtc->timerqueue;
++ struct timerqueue_node *node;
++
++ mutex_lock(&rtc->ops_lock);
++ while ((node = timerqueue_getnext(head)))
++ timerqueue_del(head, node);
++ mutex_unlock(&rtc->ops_lock);
++
++ cancel_work_sync(&rtc->irqwork);
+
+ ida_simple_remove(&rtc_ida, rtc->id);
+ mutex_destroy(&rtc->ops_lock);
+diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
+index 9a2bd4947007c..f49ab45455d7c 100644
+--- a/drivers/rtc/interface.c
++++ b/drivers/rtc/interface.c
+@@ -392,7 +392,7 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+ return err;
+ if (!rtc->ops) {
+ err = -ENODEV;
+- } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->read_alarm) {
++ } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) {
+ err = -EINVAL;
+ } else {
+ memset(alarm, 0, sizeof(struct rtc_wkalrm));
+@@ -793,9 +793,13 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
+ struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
+ struct rtc_time tm;
+ ktime_t now;
++ int err;
++
++ err = __rtc_read_time(rtc, &tm);
++ if (err)
++ return err;
+
+ timer->enabled = 1;
+- __rtc_read_time(rtc, &tm);
+ now = rtc_tm_to_ktime(tm);
+
+ /* Skip over expired timers */
+@@ -809,7 +813,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
+ trace_rtc_timer_enqueue(timer);
+ if (!next || ktime_before(timer->node.expires, next->expires)) {
+ struct rtc_wkalrm alarm;
+- int err;
+
+ alarm.time = rtc_ktime_to_tm(timer->node.expires);
+ alarm.enabled = 1;
+diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
+index 4eb53412b8085..00e2ca7374ecf 100644
+--- a/drivers/rtc/rtc-cmos.c
++++ b/drivers/rtc/rtc-cmos.c
+@@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+
+ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+ {
++ int ret;
++
+ /*
+ * If pm_trace abused the RTC for storage, set the timespec to 0,
+ * which tells the caller that this RTC value is unusable.
+@@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
+ if (!pm_trace_rtc_valid())
+ return -EIO;
+
+- mc146818_get_time(t);
++ ret = mc146818_get_time(t);
++ if (ret < 0) {
++ dev_err_ratelimited(dev, "unable to read current time\n");
++ return ret;
++ }
++
+ return 0;
+ }
+
+@@ -242,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t)
+ return mc146818_set_time(t);
+ }
+
++struct cmos_read_alarm_callback_param {
++ struct cmos_rtc *cmos;
++ struct rtc_time *time;
++ unsigned char rtc_control;
++};
++
++static void cmos_read_alarm_callback(unsigned char __always_unused seconds,
++ void *param_in)
++{
++ struct cmos_read_alarm_callback_param *p =
++ (struct cmos_read_alarm_callback_param *)param_in;
++ struct rtc_time *time = p->time;
++
++ time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
++ time->tm_min = CMOS_READ(RTC_MINUTES_ALARM);
++ time->tm_hour = CMOS_READ(RTC_HOURS_ALARM);
++
++ if (p->cmos->day_alrm) {
++ /* ignore upper bits on readback per ACPI spec */
++ time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f;
++ if (!time->tm_mday)
++ time->tm_mday = -1;
++
++ if (p->cmos->mon_alrm) {
++ time->tm_mon = CMOS_READ(p->cmos->mon_alrm);
++ if (!time->tm_mon)
++ time->tm_mon = -1;
++ }
++ }
++
++ p->rtc_control = CMOS_READ(RTC_CONTROL);
++}
++
+ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ {
+ struct cmos_rtc *cmos = dev_get_drvdata(dev);
+- unsigned char rtc_control;
++ struct cmos_read_alarm_callback_param p = {
++ .cmos = cmos,
++ .time = &t->time,
++ };
+
+ /* This not only a rtc_op, but also called directly */
+ if (!is_valid_irq(cmos->irq))
+@@ -256,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ * the future.
+ */
+
+- spin_lock_irq(&rtc_lock);
+- t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
+- t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM);
+- t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM);
+-
+- if (cmos->day_alrm) {
+- /* ignore upper bits on readback per ACPI spec */
+- t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f;
+- if (!t->time.tm_mday)
+- t->time.tm_mday = -1;
+-
+- if (cmos->mon_alrm) {
+- t->time.tm_mon = CMOS_READ(cmos->mon_alrm);
+- if (!t->time.tm_mon)
+- t->time.tm_mon = -1;
+- }
+- }
+-
+- rtc_control = CMOS_READ(RTC_CONTROL);
+- spin_unlock_irq(&rtc_lock);
++ /* Some Intel chipsets disconnect the alarm registers when the clock
++ * update is in progress - during this time reads return bogus values
++ * and writes may fail silently. See for example "7th Generation Intel®
++ * Processor Family I/O for U/Y Platforms [...] Datasheet", section
++ * 27.7.1
++ *
++ * Use the mc146818_avoid_UIP() function to avoid this.
++ */
++ if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p))
++ return -EIO;
+
+- if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
++ if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ if (((unsigned)t->time.tm_sec) < 0x60)
+ t->time.tm_sec = bcd2bin(t->time.tm_sec);
+ else
+@@ -306,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ }
+ }
+
+- t->enabled = !!(rtc_control & RTC_AIE);
++ t->enabled = !!(p.rtc_control & RTC_AIE);
+ t->pending = 0;
+
+ return 0;
+@@ -437,10 +470,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t)
+ return 0;
+ }
+
++struct cmos_set_alarm_callback_param {
++ struct cmos_rtc *cmos;
++ unsigned char mon, mday, hrs, min, sec;
++ struct rtc_wkalrm *t;
++};
++
++/* Note: this function may be executed by mc146818_avoid_UIP() more then
++ * once
++ */
++static void cmos_set_alarm_callback(unsigned char __always_unused seconds,
++ void *param_in)
++{
++ struct cmos_set_alarm_callback_param *p =
++ (struct cmos_set_alarm_callback_param *)param_in;
++
++ /* next rtc irq must not be from previous alarm setting */
++ cmos_irq_disable(p->cmos, RTC_AIE);
++
++ /* update alarm */
++ CMOS_WRITE(p->hrs, RTC_HOURS_ALARM);
++ CMOS_WRITE(p->min, RTC_MINUTES_ALARM);
++ CMOS_WRITE(p->sec, RTC_SECONDS_ALARM);
++
++ /* the system may support an "enhanced" alarm */
++ if (p->cmos->day_alrm) {
++ CMOS_WRITE(p->mday, p->cmos->day_alrm);
++ if (p->cmos->mon_alrm)
++ CMOS_WRITE(p->mon, p->cmos->mon_alrm);
++ }
++
++ if (use_hpet_alarm()) {
++ /*
++ * FIXME the HPET alarm glue currently ignores day_alrm
++ * and mon_alrm ...
++ */
++ hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min,
++ p->t->time.tm_sec);
++ }
++
++ if (p->t->enabled)
++ cmos_irq_enable(p->cmos, RTC_AIE);
++}
++
+ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+ {
+ struct cmos_rtc *cmos = dev_get_drvdata(dev);
+- unsigned char mon, mday, hrs, min, sec, rtc_control;
++ struct cmos_set_alarm_callback_param p = {
++ .cmos = cmos,
++ .t = t
++ };
++ unsigned char rtc_control;
+ int ret;
+
+ /* This not only a rtc_op, but also called directly */
+@@ -451,52 +531,33 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+ if (ret < 0)
+ return ret;
+
+- mon = t->time.tm_mon + 1;
+- mday = t->time.tm_mday;
+- hrs = t->time.tm_hour;
+- min = t->time.tm_min;
+- sec = t->time.tm_sec;
++ p.mon = t->time.tm_mon + 1;
++ p.mday = t->time.tm_mday;
++ p.hrs = t->time.tm_hour;
++ p.min = t->time.tm_min;
++ p.sec = t->time.tm_sec;
+
++ spin_lock_irq(&rtc_lock);
+ rtc_control = CMOS_READ(RTC_CONTROL);
++ spin_unlock_irq(&rtc_lock);
++
+ if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ /* Writing 0xff means "don't care" or "match all". */
+- mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
+- mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
+- hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
+- min = (min < 60) ? bin2bcd(min) : 0xff;
+- sec = (sec < 60) ? bin2bcd(sec) : 0xff;
+- }
+-
+- spin_lock_irq(&rtc_lock);
+-
+- /* next rtc irq must not be from previous alarm setting */
+- cmos_irq_disable(cmos, RTC_AIE);
+-
+- /* update alarm */
+- CMOS_WRITE(hrs, RTC_HOURS_ALARM);
+- CMOS_WRITE(min, RTC_MINUTES_ALARM);
+- CMOS_WRITE(sec, RTC_SECONDS_ALARM);
+-
+- /* the system may support an "enhanced" alarm */
+- if (cmos->day_alrm) {
+- CMOS_WRITE(mday, cmos->day_alrm);
+- if (cmos->mon_alrm)
+- CMOS_WRITE(mon, cmos->mon_alrm);
++ p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff;
++ p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff;
++ p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff;
++ p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff;
++ p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff;
+ }
+
+- if (use_hpet_alarm()) {
+- /*
+- * FIXME the HPET alarm glue currently ignores day_alrm
+- * and mon_alrm ...
+- */
+- hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min,
+- t->time.tm_sec);
+- }
+-
+- if (t->enabled)
+- cmos_irq_enable(cmos, RTC_AIE);
+-
+- spin_unlock_irq(&rtc_lock);
++ /*
++ * Some Intel chipsets disconnect the alarm registers when the clock
++ * update is in progress - during this time writes fail silently.
++ *
++ * Use mc146818_avoid_UIP() to avoid this.
++ */
++ if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p))
++ return -EIO;
+
+ cmos->alarm_expires = rtc_tm_to_time64(&t->time);
+
+@@ -683,6 +744,168 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
+ return IRQ_NONE;
+ }
+
++#ifdef CONFIG_ACPI
++
++#include <linux/acpi.h>
++
++static u32 rtc_handler(void *context)
++{
++ struct device *dev = context;
++ struct cmos_rtc *cmos = dev_get_drvdata(dev);
++ unsigned char rtc_control = 0;
++ unsigned char rtc_intr;
++ unsigned long flags;
++
++
++ /*
++ * Always update rtc irq when ACPI is used as RTC Alarm.
++ * Or else, ACPI SCI is enabled during suspend/resume only,
++ * update rtc irq in that case.
++ */
++ if (cmos_use_acpi_alarm())
++ cmos_interrupt(0, (void *)cmos->rtc);
++ else {
++ /* Fix me: can we use cmos_interrupt() here as well? */
++ spin_lock_irqsave(&rtc_lock, flags);
++ if (cmos_rtc.suspend_ctrl)
++ rtc_control = CMOS_READ(RTC_CONTROL);
++ if (rtc_control & RTC_AIE) {
++ cmos_rtc.suspend_ctrl &= ~RTC_AIE;
++ CMOS_WRITE(rtc_control, RTC_CONTROL);
++ rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
++ rtc_update_irq(cmos->rtc, 1, rtc_intr);
++ }
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ }
++
++ pm_wakeup_hard_event(dev);
++ acpi_clear_event(ACPI_EVENT_RTC);
++ acpi_disable_event(ACPI_EVENT_RTC, 0);
++ return ACPI_INTERRUPT_HANDLED;
++}
++
++static void acpi_rtc_event_setup(struct device *dev)
++{
++ if (acpi_disabled)
++ return;
++
++ acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev);
++ /*
++ * After the RTC handler is installed, the Fixed_RTC event should
++ * be disabled. Only when the RTC alarm is set will it be enabled.
++ */
++ acpi_clear_event(ACPI_EVENT_RTC);
++ acpi_disable_event(ACPI_EVENT_RTC, 0);
++}
++
++static void acpi_rtc_event_cleanup(void)
++{
++ if (acpi_disabled)
++ return;
++
++ acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler);
++}
++
++static void rtc_wake_on(struct device *dev)
++{
++ acpi_clear_event(ACPI_EVENT_RTC);
++ acpi_enable_event(ACPI_EVENT_RTC, 0);
++}
++
++static void rtc_wake_off(struct device *dev)
++{
++ acpi_disable_event(ACPI_EVENT_RTC, 0);
++}
++
++#ifdef CONFIG_X86
++/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */
++static void use_acpi_alarm_quirks(void)
++{
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
++ return;
++
++ if (!is_hpet_enabled())
++ return;
++
++ if (dmi_get_bios_year() < 2015)
++ return;
++
++ use_acpi_alarm = true;
++}
++#else
++static inline void use_acpi_alarm_quirks(void) { }
++#endif
++
++static void acpi_cmos_wake_setup(struct device *dev)
++{
++ if (acpi_disabled)
++ return;
++
++ use_acpi_alarm_quirks();
++
++ cmos_rtc.wake_on = rtc_wake_on;
++ cmos_rtc.wake_off = rtc_wake_off;
++
++ /* ACPI tables bug workaround. */
++ if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) {
++ dev_dbg(dev, "bogus FADT month_alarm (%d)\n",
++ acpi_gbl_FADT.month_alarm);
++ acpi_gbl_FADT.month_alarm = 0;
++ }
++
++ cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm;
++ cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm;
++ cmos_rtc.century = acpi_gbl_FADT.century;
++
++ if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE)
++ dev_info(dev, "RTC can wake from S4\n");
++
++ /* RTC always wakes from S1/S2/S3, and often S4/STD */
++ device_init_wakeup(dev, 1);
++}
++
++static void cmos_check_acpi_rtc_status(struct device *dev,
++ unsigned char *rtc_control)
++{
++ struct cmos_rtc *cmos = dev_get_drvdata(dev);
++ acpi_event_status rtc_status;
++ acpi_status status;
++
++ if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC)
++ return;
++
++ status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status);
++ if (ACPI_FAILURE(status)) {
++ dev_err(dev, "Could not get RTC status\n");
++ } else if (rtc_status & ACPI_EVENT_FLAG_SET) {
++ unsigned char mask;
++ *rtc_control &= ~RTC_AIE;
++ CMOS_WRITE(*rtc_control, RTC_CONTROL);
++ mask = CMOS_READ(RTC_INTR_FLAGS);
++ rtc_update_irq(cmos->rtc, 1, mask);
++ }
++}
++
++#else /* !CONFIG_ACPI */
++
++static inline void acpi_rtc_event_setup(struct device *dev)
++{
++}
++
++static inline void acpi_rtc_event_cleanup(void)
++{
++}
++
++static inline void acpi_cmos_wake_setup(struct device *dev)
++{
++}
++
++static inline void cmos_check_acpi_rtc_status(struct device *dev,
++ unsigned char *rtc_control)
++{
++}
++#endif /* CONFIG_ACPI */
++
+ #ifdef CONFIG_PNP
+ #define INITSECTION
+
+@@ -766,19 +989,27 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+ if (info->address_space)
+ address_space = info->address_space;
+
+- if (info->rtc_day_alarm && info->rtc_day_alarm < 128)
+- cmos_rtc.day_alrm = info->rtc_day_alarm;
+- if (info->rtc_mon_alarm && info->rtc_mon_alarm < 128)
+- cmos_rtc.mon_alrm = info->rtc_mon_alarm;
+- if (info->rtc_century && info->rtc_century < 128)
+- cmos_rtc.century = info->rtc_century;
++ cmos_rtc.day_alrm = info->rtc_day_alarm;
++ cmos_rtc.mon_alrm = info->rtc_mon_alarm;
++ cmos_rtc.century = info->rtc_century;
+
+ if (info->wake_on && info->wake_off) {
+ cmos_rtc.wake_on = info->wake_on;
+ cmos_rtc.wake_off = info->wake_off;
+ }
++ } else {
++ acpi_cmos_wake_setup(dev);
+ }
+
++ if (cmos_rtc.day_alrm >= 128)
++ cmos_rtc.day_alrm = 0;
++
++ if (cmos_rtc.mon_alrm >= 128)
++ cmos_rtc.mon_alrm = 0;
++
++ if (cmos_rtc.century >= 128)
++ cmos_rtc.century = 0;
++
+ cmos_rtc.dev = dev;
+ dev_set_drvdata(dev, &cmos_rtc);
+
+@@ -790,16 +1021,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+
+ rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
+
+- spin_lock_irq(&rtc_lock);
+-
+- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+- if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
+- spin_unlock_irq(&rtc_lock);
+- dev_warn(dev, "not accessible\n");
++ if (!mc146818_does_rtc_work()) {
++ dev_warn(dev, "broken or not accessible\n");
+ retval = -ENXIO;
+ goto cleanup1;
+ }
+
++ spin_lock_irq(&rtc_lock);
++
+ if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
+ /* force periodic irq to CMOS reset default of 1024Hz;
+ *
+@@ -869,6 +1098,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
+ nvmem_cfg.size = address_space - NVRAM_OFFSET;
+ devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg);
+
++ /*
++ * Everything has gone well so far, so by default register a handler for
++ * the ACPI RTC fixed event.
++ */
++ if (!info)
++ acpi_rtc_event_setup(dev);
++
+ dev_info(dev, "%s%s, %d bytes nvram%s\n",
+ !is_valid_irq(rtc_irq) ? "no alarms" :
+ cmos_rtc.mon_alrm ? "alarms up to one year" :
+@@ -914,6 +1150,9 @@ static void cmos_do_remove(struct device *dev)
+ hpet_unregister_irq_handler(cmos_interrupt);
+ }
+
++ if (!dev_get_platdata(dev))
++ acpi_rtc_event_cleanup();
++
+ cmos->rtc = NULL;
+
+ ports = cmos->iomem;
+@@ -1063,9 +1302,6 @@ static void cmos_check_wkalrm(struct device *dev)
+ }
+ }
+
+-static void cmos_check_acpi_rtc_status(struct device *dev,
+- unsigned char *rtc_control);
+-
+ static int __maybe_unused cmos_resume(struct device *dev)
+ {
+ struct cmos_rtc *cmos = dev_get_drvdata(dev);
+@@ -1132,174 +1368,16 @@ static SIMPLE_DEV_PM_OPS(cmos_pm_ops, cmos_suspend, cmos_resume);
+ * predate even PNPBIOS should set up platform_bus devices.
+ */
+
+-#ifdef CONFIG_ACPI
+-
+-#include <linux/acpi.h>
+-
+-static u32 rtc_handler(void *context)
+-{
+- struct device *dev = context;
+- struct cmos_rtc *cmos = dev_get_drvdata(dev);
+- unsigned char rtc_control = 0;
+- unsigned char rtc_intr;
+- unsigned long flags;
+-
+-
+- /*
+- * Always update rtc irq when ACPI is used as RTC Alarm.
+- * Or else, ACPI SCI is enabled during suspend/resume only,
+- * update rtc irq in that case.
+- */
+- if (cmos_use_acpi_alarm())
+- cmos_interrupt(0, (void *)cmos->rtc);
+- else {
+- /* Fix me: can we use cmos_interrupt() here as well? */
+- spin_lock_irqsave(&rtc_lock, flags);
+- if (cmos_rtc.suspend_ctrl)
+- rtc_control = CMOS_READ(RTC_CONTROL);
+- if (rtc_control & RTC_AIE) {
+- cmos_rtc.suspend_ctrl &= ~RTC_AIE;
+- CMOS_WRITE(rtc_control, RTC_CONTROL);
+- rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
+- rtc_update_irq(cmos->rtc, 1, rtc_intr);
+- }
+- spin_unlock_irqrestore(&rtc_lock, flags);
+- }
+-
+- pm_wakeup_hard_event(dev);
+- acpi_clear_event(ACPI_EVENT_RTC);
+- acpi_disable_event(ACPI_EVENT_RTC, 0);
+- return ACPI_INTERRUPT_HANDLED;
+-}
+-
+-static inline void rtc_wake_setup(struct device *dev)
+-{
+- acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev);
+- /*
+- * After the RTC handler is installed, the Fixed_RTC event should
+- * be disabled. Only when the RTC alarm is set will it be enabled.
+- */
+- acpi_clear_event(ACPI_EVENT_RTC);
+- acpi_disable_event(ACPI_EVENT_RTC, 0);
+-}
+-
+-static void rtc_wake_on(struct device *dev)
+-{
+- acpi_clear_event(ACPI_EVENT_RTC);
+- acpi_enable_event(ACPI_EVENT_RTC, 0);
+-}
+-
+-static void rtc_wake_off(struct device *dev)
+-{
+- acpi_disable_event(ACPI_EVENT_RTC, 0);
+-}
+-
+-#ifdef CONFIG_X86
+-/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */
+-static void use_acpi_alarm_quirks(void)
+-{
+- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+- return;
+-
+- if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
+- return;
+-
+- if (!is_hpet_enabled())
+- return;
+-
+- if (dmi_get_bios_year() < 2015)
+- return;
+-
+- use_acpi_alarm = true;
+-}
+-#else
+-static inline void use_acpi_alarm_quirks(void) { }
+-#endif
+-
+-/* Every ACPI platform has a mc146818 compatible "cmos rtc". Here we find
+- * its device node and pass extra config data. This helps its driver use
+- * capabilities that the now-obsolete mc146818 didn't have, and informs it
+- * that this board's RTC is wakeup-capable (per ACPI spec).
+- */
+-static struct cmos_rtc_board_info acpi_rtc_info;
+-
+-static void cmos_wake_setup(struct device *dev)
+-{
+- if (acpi_disabled)
+- return;
+-
+- use_acpi_alarm_quirks();
+-
+- rtc_wake_setup(dev);
+- acpi_rtc_info.wake_on = rtc_wake_on;
+- acpi_rtc_info.wake_off = rtc_wake_off;
+-
+- /* workaround bug in some ACPI tables */
+- if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) {
+- dev_dbg(dev, "bogus FADT month_alarm (%d)\n",
+- acpi_gbl_FADT.month_alarm);
+- acpi_gbl_FADT.month_alarm = 0;
+- }
+-
+- acpi_rtc_info.rtc_day_alarm = acpi_gbl_FADT.day_alarm;
+- acpi_rtc_info.rtc_mon_alarm = acpi_gbl_FADT.month_alarm;
+- acpi_rtc_info.rtc_century = acpi_gbl_FADT.century;
+-
+- /* NOTE: S4_RTC_WAKE is NOT currently useful to Linux */
+- if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE)
+- dev_info(dev, "RTC can wake from S4\n");
+-
+- dev->platform_data = &acpi_rtc_info;
+-
+- /* RTC always wakes from S1/S2/S3, and often S4/STD */
+- device_init_wakeup(dev, 1);
+-}
+-
+-static void cmos_check_acpi_rtc_status(struct device *dev,
+- unsigned char *rtc_control)
+-{
+- struct cmos_rtc *cmos = dev_get_drvdata(dev);
+- acpi_event_status rtc_status;
+- acpi_status status;
+-
+- if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC)
+- return;
+-
+- status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status);
+- if (ACPI_FAILURE(status)) {
+- dev_err(dev, "Could not get RTC status\n");
+- } else if (rtc_status & ACPI_EVENT_FLAG_SET) {
+- unsigned char mask;
+- *rtc_control &= ~RTC_AIE;
+- CMOS_WRITE(*rtc_control, RTC_CONTROL);
+- mask = CMOS_READ(RTC_INTR_FLAGS);
+- rtc_update_irq(cmos->rtc, 1, mask);
+- }
+-}
+-
+-#else
+-
+-static void cmos_wake_setup(struct device *dev)
+-{
+-}
+-
+-static void cmos_check_acpi_rtc_status(struct device *dev,
+- unsigned char *rtc_control)
+-{
+-}
+-
+-#endif
+-
+ #ifdef CONFIG_PNP
+
+ #include <linux/pnp.h>
+
+ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
+ {
+- cmos_wake_setup(&pnp->dev);
++ int irq;
+
+ if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) {
+- unsigned int irq = 0;
++ irq = 0;
+ #ifdef CONFIG_X86
+ /* Some machines contain a PNP entry for the RTC, but
+ * don't define the IRQ. It should always be safe to
+@@ -1308,13 +1386,11 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
+ if (nr_legacy_irqs())
+ irq = RTC_IRQ;
+ #endif
+- return cmos_do_probe(&pnp->dev,
+- pnp_get_resource(pnp, IORESOURCE_IO, 0), irq);
+ } else {
+- return cmos_do_probe(&pnp->dev,
+- pnp_get_resource(pnp, IORESOURCE_IO, 0),
+- pnp_irq(pnp, 0));
++ irq = pnp_irq(pnp, 0);
+ }
++
++ return cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq);
+ }
+
+ static void cmos_pnp_remove(struct pnp_dev *pnp)
+@@ -1401,7 +1477,6 @@ static int __init cmos_platform_probe(struct platform_device *pdev)
+ int irq;
+
+ cmos_of_init(pdev);
+- cmos_wake_setup(&pdev->dev);
+
+ if (RTC_IOMAPPED)
+ resource = platform_get_resource(pdev, IORESOURCE_IO, 0);
+diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
+index b3de6d2e680a4..2f83adef966eb 100644
+--- a/drivers/rtc/rtc-ds1302.c
++++ b/drivers/rtc/rtc-ds1302.c
+@@ -199,11 +199,18 @@ static const struct of_device_id ds1302_dt_ids[] = {
+ MODULE_DEVICE_TABLE(of, ds1302_dt_ids);
+ #endif
+
++static const struct spi_device_id ds1302_spi_ids[] = {
++ { .name = "ds1302", },
++ { /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(spi, ds1302_spi_ids);
++
+ static struct spi_driver ds1302_driver = {
+ .driver.name = "rtc-ds1302",
+ .driver.of_match_table = of_match_ptr(ds1302_dt_ids),
+ .probe = ds1302_probe,
+ .remove = ds1302_remove,
++ .id_table = ds1302_spi_ids,
+ };
+
+ module_spi_driver(ds1302_driver);
+diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c
+index 157bf5209ac40..a40c1a52df659 100644
+--- a/drivers/rtc/rtc-ds1347.c
++++ b/drivers/rtc/rtc-ds1347.c
+@@ -112,7 +112,7 @@ static int ds1347_set_time(struct device *dev, struct rtc_time *dt)
+ return err;
+
+ century = (dt->tm_year / 100) + 19;
+- err = regmap_write(map, DS1347_CENTURY_REG, century);
++ err = regmap_write(map, DS1347_CENTURY_REG, bin2bcd(century));
+ if (err)
+ return err;
+
+diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
+index 66fc8617d07ee..93ce72b9ae59e 100644
+--- a/drivers/rtc/rtc-ds1390.c
++++ b/drivers/rtc/rtc-ds1390.c
+@@ -219,12 +219,19 @@ static const struct of_device_id ds1390_of_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, ds1390_of_match);
+
++static const struct spi_device_id ds1390_spi_ids[] = {
++ { .name = "ds1390" },
++ {}
++};
++MODULE_DEVICE_TABLE(spi, ds1390_spi_ids);
++
+ static struct spi_driver ds1390_driver = {
+ .driver = {
+ .name = "rtc-ds1390",
+ .of_match_table = of_match_ptr(ds1390_of_match),
+ },
+ .probe = ds1390_probe,
++ .id_table = ds1390_spi_ids,
+ };
+
+ module_spi_driver(ds1390_driver);
+diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
+index 75db7ab654a5a..1e09c9a228d6c 100644
+--- a/drivers/rtc/rtc-ds1685.c
++++ b/drivers/rtc/rtc-ds1685.c
+@@ -1438,7 +1438,7 @@ ds1685_rtc_poweroff(struct platform_device *pdev)
+ unreachable();
+ }
+ }
+-EXPORT_SYMBOL(ds1685_rtc_poweroff);
++EXPORT_SYMBOL_GPL(ds1685_rtc_poweroff);
+ /* ----------------------------------------------------------------------- */
+
+
+diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c
+index ad3add5db4c82..25c6e7d9570f0 100644
+--- a/drivers/rtc/rtc-ftrtc010.c
++++ b/drivers/rtc/rtc-ftrtc010.c
+@@ -137,28 +137,34 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev)
+ ret = clk_prepare_enable(rtc->extclk);
+ if (ret) {
+ dev_err(dev, "failed to enable EXTCLK\n");
+- return ret;
++ goto err_disable_pclk;
+ }
+ }
+
+- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+- if (!res)
+- return -ENODEV;
+-
+- rtc->rtc_irq = res->start;
++ rtc->rtc_irq = platform_get_irq(pdev, 0);
++ if (rtc->rtc_irq < 0) {
++ ret = rtc->rtc_irq;
++ goto err_disable_extclk;
++ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- if (!res)
+- return -ENODEV;
++ if (!res) {
++ ret = -ENODEV;
++ goto err_disable_extclk;
++ }
+
+ rtc->rtc_base = devm_ioremap(dev, res->start,
+ resource_size(res));
+- if (!rtc->rtc_base)
+- return -ENOMEM;
++ if (!rtc->rtc_base) {
++ ret = -ENOMEM;
++ goto err_disable_extclk;
++ }
+
+ rtc->rtc_dev = devm_rtc_allocate_device(dev);
+- if (IS_ERR(rtc->rtc_dev))
+- return PTR_ERR(rtc->rtc_dev);
++ if (IS_ERR(rtc->rtc_dev)) {
++ ret = PTR_ERR(rtc->rtc_dev);
++ goto err_disable_extclk;
++ }
+
+ rtc->rtc_dev->ops = &ftrtc010_rtc_ops;
+
+@@ -174,9 +180,15 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev)
+ ret = devm_request_irq(dev, rtc->rtc_irq, ftrtc010_rtc_interrupt,
+ IRQF_SHARED, pdev->name, dev);
+ if (unlikely(ret))
+- return ret;
++ goto err_disable_extclk;
+
+ return devm_rtc_register_device(rtc->rtc_dev);
++
++err_disable_extclk:
++ clk_disable_unprepare(rtc->extclk);
++err_disable_pclk:
++ clk_disable_unprepare(rtc->pclk);
++ return ret;
+ }
+
+ static int ftrtc010_rtc_remove(struct platform_device *pdev)
+diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
+index dcfaf09946ee3..347655d24b5d3 100644
+--- a/drivers/rtc/rtc-mc146818-lib.c
++++ b/drivers/rtc/rtc-mc146818-lib.c
+@@ -8,10 +8,106 @@
+ #include <linux/acpi.h>
+ #endif
+
+-unsigned int mc146818_get_time(struct rtc_time *time)
++/*
++ * Execute a function while the UIP (Update-in-progress) bit of the RTC is
++ * unset.
++ *
++ * Warning: callback may be executed more then once.
++ */
++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
++ void *param)
++{
++ int i;
++ unsigned long flags;
++ unsigned char seconds;
++
++ for (i = 0; i < 10; i++) {
++ spin_lock_irqsave(&rtc_lock, flags);
++
++ /*
++ * Check whether there is an update in progress during which the
++ * readout is unspecified. The maximum update time is ~2ms. Poll
++ * every msec for completion.
++ *
++ * Store the second value before checking UIP so a long lasting
++ * NMI which happens to hit after the UIP check cannot make
++ * an update cycle invisible.
++ */
++ seconds = CMOS_READ(RTC_SECONDS);
++
++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ mdelay(1);
++ continue;
++ }
++
++ /* Revalidate the above readout */
++ if (seconds != CMOS_READ(RTC_SECONDS)) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ continue;
++ }
++
++ if (callback)
++ callback(seconds, param);
++
++ /*
++ * Check for the UIP bit again. If it is set now then
++ * the above values may contain garbage.
++ */
++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ mdelay(1);
++ continue;
++ }
++
++ /*
++ * A NMI might have interrupted the above sequence so check
++ * whether the seconds value has changed which indicates that
++ * the NMI took longer than the UIP bit was set. Unlikely, but
++ * possible and there is also virt...
++ */
++ if (seconds != CMOS_READ(RTC_SECONDS)) {
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ continue;
++ }
++ spin_unlock_irqrestore(&rtc_lock, flags);
++
++ return true;
++ }
++ return false;
++}
++EXPORT_SYMBOL_GPL(mc146818_avoid_UIP);
++
++/*
++ * If the UIP (Update-in-progress) bit of the RTC is set for more then
++ * 10ms, the RTC is apparently broken or not present.
++ */
++bool mc146818_does_rtc_work(void)
++{
++ int i;
++ unsigned char val;
++ unsigned long flags;
++
++ for (i = 0; i < 10; i++) {
++ spin_lock_irqsave(&rtc_lock, flags);
++ val = CMOS_READ(RTC_FREQ_SELECT);
++ spin_unlock_irqrestore(&rtc_lock, flags);
++
++ if ((val & RTC_UIP) == 0)
++ return true;
++
++ mdelay(1);
++ }
++
++ return false;
++}
++EXPORT_SYMBOL_GPL(mc146818_does_rtc_work);
++
++int mc146818_get_time(struct rtc_time *time)
+ {
+ unsigned char ctrl;
+ unsigned long flags;
++ unsigned int iter_count = 0;
+ unsigned char century = 0;
+ bool retry;
+
+@@ -20,13 +116,13 @@ unsigned int mc146818_get_time(struct rtc_time *time)
+ #endif
+
+ again:
+- spin_lock_irqsave(&rtc_lock, flags);
+- /* Ensure that the RTC is accessible. Bit 6 must be 0! */
+- if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
+- spin_unlock_irqrestore(&rtc_lock, flags);
+- memset(time, 0xff, sizeof(*time));
+- return 0;
++ if (iter_count > 10) {
++ memset(time, 0, sizeof(*time));
++ return -EIO;
+ }
++ iter_count++;
++
++ spin_lock_irqsave(&rtc_lock, flags);
+
+ /*
+ * Check whether there is an update in progress during which the
+@@ -104,7 +200,7 @@ again:
+ time->tm_year += real_year - 72;
+ #endif
+
+- if (century > 20)
++ if (century > 19)
+ time->tm_year += (century - 19) * 100;
+
+ /*
+@@ -116,10 +212,21 @@ again:
+
+ time->tm_mon--;
+
+- return RTC_24H;
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(mc146818_get_time);
+
++/* AMD systems don't allow access to AltCentury with DV1 */
++static bool apply_amd_register_a_behavior(void)
++{
++#ifdef CONFIG_X86
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++ return true;
++#endif
++ return false;
++}
++
+ /* Set the current date and time in the real time clock. */
+ int mc146818_set_time(struct rtc_time *time)
+ {
+@@ -176,8 +283,10 @@ int mc146818_set_time(struct rtc_time *time)
+ if (yrs >= 100)
+ yrs -= 100;
+
+- if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
+- || RTC_ALWAYS_BCD) {
++ spin_lock_irqsave(&rtc_lock, flags);
++ save_control = CMOS_READ(RTC_CONTROL);
++ spin_unlock_irqrestore(&rtc_lock, flags);
++ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hrs = bin2bcd(hrs);
+@@ -191,7 +300,10 @@ int mc146818_set_time(struct rtc_time *time)
+ save_control = CMOS_READ(RTC_CONTROL);
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+- CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
++ if (apply_amd_register_a_behavior())
++ CMOS_WRITE((save_freq_select & ~RTC_AMD_BANK_SELECT), RTC_FREQ_SELECT);
++ else
++ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+ #ifdef CONFIG_MACH_DECSTATION
+ CMOS_WRITE(real_yrs, RTC_DEC_YEAR);
+diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c
+index bad7792b6ca58..0d515b3df5710 100644
+--- a/drivers/rtc/rtc-mcp795.c
++++ b/drivers/rtc/rtc-mcp795.c
+@@ -430,12 +430,19 @@ static const struct of_device_id mcp795_of_match[] = {
+ MODULE_DEVICE_TABLE(of, mcp795_of_match);
+ #endif
+
++static const struct spi_device_id mcp795_spi_ids[] = {
++ { .name = "mcp795" },
++ { }
++};
++MODULE_DEVICE_TABLE(spi, mcp795_spi_ids);
++
+ static struct spi_driver mcp795_driver = {
+ .driver = {
+ .name = "rtc-mcp795",
+ .of_match_table = of_match_ptr(mcp795_of_match),
+ },
+ .probe = mcp795_probe,
++ .id_table = mcp795_spi_ids,
+ };
+
+ module_spi_driver(mcp795_driver);
+diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c
+index 1463c86215615..648fa362ec447 100644
+--- a/drivers/rtc/rtc-meson-vrtc.c
++++ b/drivers/rtc/rtc-meson-vrtc.c
+@@ -23,7 +23,7 @@ static int meson_vrtc_read_time(struct device *dev, struct rtc_time *tm)
+ struct timespec64 time;
+
+ dev_dbg(dev, "%s\n", __func__);
+- ktime_get_raw_ts64(&time);
++ ktime_get_real_ts64(&time);
+ rtc_time64_to_tm(time.tv_sec, tm);
+
+ return 0;
+@@ -96,7 +96,7 @@ static int __maybe_unused meson_vrtc_suspend(struct device *dev)
+ long alarm_secs;
+ struct timespec64 time;
+
+- ktime_get_raw_ts64(&time);
++ ktime_get_real_ts64(&time);
+ local_time = time.tv_sec;
+
+ dev_dbg(dev, "alarm_time = %lus, local_time=%lus\n",
+diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c
+index 80dc479a6ff02..1d297af80f878 100644
+--- a/drivers/rtc/rtc-mt6397.c
++++ b/drivers/rtc/rtc-mt6397.c
+@@ -269,6 +269,8 @@ static int mtk_rtc_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -EINVAL;
+ rtc->addr_base = res->start;
+
+ rtc->data = of_device_get_match_data(&pdev->dev);
+diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c
+index 5e03834016294..f6d2ad91ff7a9 100644
+--- a/drivers/rtc/rtc-mxc_v2.c
++++ b/drivers/rtc/rtc-mxc_v2.c
+@@ -336,8 +336,10 @@ static int mxc_rtc_probe(struct platform_device *pdev)
+ }
+
+ pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
+- if (IS_ERR(pdata->rtc))
++ if (IS_ERR(pdata->rtc)) {
++ clk_disable_unprepare(pdata->clk);
+ return PTR_ERR(pdata->rtc);
++ }
+
+ pdata->rtc->ops = &mxc_rtc_ops;
+ pdata->rtc->range_max = U32_MAX;
+diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
+index d46e0f0cc5020..3ff832a5af37c 100644
+--- a/drivers/rtc/rtc-omap.c
++++ b/drivers/rtc/rtc-omap.c
+@@ -25,6 +25,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/rtc.h>
++#include <linux/rtc/rtc-omap.h>
+
+ /*
+ * The OMAP RTC is a year/month/day/hours/minutes/seconds BCD clock
+diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
+index 0f58cac81d8c0..7473e6c8a183b 100644
+--- a/drivers/rtc/rtc-pcf2123.c
++++ b/drivers/rtc/rtc-pcf2123.c
+@@ -451,12 +451,21 @@ static const struct of_device_id pcf2123_dt_ids[] = {
+ MODULE_DEVICE_TABLE(of, pcf2123_dt_ids);
+ #endif
+
++static const struct spi_device_id pcf2123_spi_ids[] = {
++ { .name = "pcf2123", },
++ { .name = "rv2123", },
++ { .name = "rtc-pcf2123", },
++ { /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(spi, pcf2123_spi_ids);
++
+ static struct spi_driver pcf2123_driver = {
+ .driver = {
+ .name = "rtc-pcf2123",
+ .of_match_table = of_match_ptr(pcf2123_dt_ids),
+ },
+ .probe = pcf2123_probe,
++ .id_table = pcf2123_spi_ids,
+ };
+
+ module_spi_driver(pcf2123_driver);
+diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
+index 56c58b055dfff..43f8011070952 100644
+--- a/drivers/rtc/rtc-pcf2127.c
++++ b/drivers/rtc/rtc-pcf2127.c
+@@ -374,7 +374,8 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127)
+ static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+ {
+ struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+- unsigned int buf[5], ctrl2;
++ u8 buf[5];
++ unsigned int ctrl2;
+ int ret;
+
+ ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2);
+diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
+index 14da4ab301044..bf2e370907b73 100644
+--- a/drivers/rtc/rtc-pcf85063.c
++++ b/drivers/rtc/rtc-pcf85063.c
+@@ -167,10 +167,10 @@ static int pcf85063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+ if (ret)
+ return ret;
+
+- alrm->time.tm_sec = bcd2bin(buf[0]);
+- alrm->time.tm_min = bcd2bin(buf[1]);
+- alrm->time.tm_hour = bcd2bin(buf[2]);
+- alrm->time.tm_mday = bcd2bin(buf[3]);
++ alrm->time.tm_sec = bcd2bin(buf[0] & 0x7f);
++ alrm->time.tm_min = bcd2bin(buf[1] & 0x7f);
++ alrm->time.tm_hour = bcd2bin(buf[2] & 0x3f);
++ alrm->time.tm_mday = bcd2bin(buf[3] & 0x3f);
+
+ ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &val);
+ if (ret)
+@@ -422,7 +422,7 @@ static int pcf85063_clkout_control(struct clk_hw *hw, bool enable)
+ unsigned int buf;
+ int ret;
+
+- ret = regmap_read(pcf85063->regmap, PCF85063_REG_OFFSET, &buf);
++ ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &buf);
+ if (ret < 0)
+ return ret;
+ buf &= PCF85063_REG_CLKO_F_MASK;
+diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c
+index 7fb9145c43bd5..fa351ac201587 100644
+--- a/drivers/rtc/rtc-pic32.c
++++ b/drivers/rtc/rtc-pic32.c
+@@ -324,16 +324,16 @@ static int pic32_rtc_probe(struct platform_device *pdev)
+
+ spin_lock_init(&pdata->alarm_lock);
+
++ pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
++ if (IS_ERR(pdata->rtc))
++ return PTR_ERR(pdata->rtc);
++
+ clk_prepare_enable(pdata->clk);
+
+ pic32_rtc_enable(pdata, 1);
+
+ device_init_wakeup(&pdev->dev, 1);
+
+- pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
+- if (IS_ERR(pdata->rtc))
+- return PTR_ERR(pdata->rtc);
+-
+ pdata->rtc->ops = &pic32_rtcops;
+ pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
+ pdata->rtc->range_max = RTC_TIMESTAMP_END_2099;
+diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
+index e38ee88483855..bad6a5d9c6839 100644
+--- a/drivers/rtc/rtc-pl031.c
++++ b/drivers/rtc/rtc-pl031.c
+@@ -350,9 +350,6 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
+ }
+ }
+
+- if (!adev->irq[0])
+- clear_bit(RTC_FEATURE_ALARM, ldata->rtc->features);
+-
+ device_init_wakeup(&adev->dev, true);
+ ldata->rtc = devm_rtc_allocate_device(&adev->dev);
+ if (IS_ERR(ldata->rtc)) {
+@@ -360,6 +357,9 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
+ goto out;
+ }
+
++ if (!adev->irq[0])
++ clear_bit(RTC_FEATURE_ALARM, ldata->rtc->features);
++
+ ldata->rtc->ops = ops;
+ ldata->rtc->range_min = vendor->range_min;
+ ldata->rtc->range_max = vendor->range_max;
+diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c
+index 29a1c65661e99..b1fb870c570dd 100644
+--- a/drivers/rtc/rtc-pm8xxx.c
++++ b/drivers/rtc/rtc-pm8xxx.c
+@@ -220,7 +220,6 @@ static int pm8xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+ {
+ int rc, i;
+ u8 value[NUM_8_BIT_RTC_REGS];
+- unsigned int ctrl_reg;
+ unsigned long secs, irq_flags;
+ struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
+ const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
+@@ -232,6 +231,11 @@ static int pm8xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+ secs >>= 8;
+ }
+
++ rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl,
++ regs->alarm_en, 0);
++ if (rc)
++ return rc;
++
+ spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags);
+
+ rc = regmap_bulk_write(rtc_dd->regmap, regs->alarm_rw, value,
+@@ -241,19 +245,11 @@ static int pm8xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+ goto rtc_rw_fail;
+ }
+
+- rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
+- if (rc)
+- goto rtc_rw_fail;
+-
+- if (alarm->enabled)
+- ctrl_reg |= regs->alarm_en;
+- else
+- ctrl_reg &= ~regs->alarm_en;
+-
+- rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
+- if (rc) {
+- dev_err(dev, "Write to RTC alarm control register failed\n");
+- goto rtc_rw_fail;
++ if (alarm->enabled) {
++ rc = regmap_update_bits(rtc_dd->regmap, regs->alarm_ctrl,
++ regs->alarm_en, regs->alarm_en);
++ if (rc)
++ goto rtc_rw_fail;
+ }
+
+ dev_dbg(dev, "Alarm Set for h:m:s=%ptRt, y-m-d=%ptRdr\n",
+diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
+index d2f1d8f754bf3..cf8119b6d3204 100644
+--- a/drivers/rtc/rtc-pxa.c
++++ b/drivers/rtc/rtc-pxa.c
+@@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
+ if (sa1100_rtc->irq_alarm < 0)
+ return -ENXIO;
+
++ sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
++ if (IS_ERR(sa1100_rtc->rtc))
++ return PTR_ERR(sa1100_rtc->rtc);
++
+ pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start,
+ resource_size(pxa_rtc->ress));
+ if (!pxa_rtc->base) {
+diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c
+index d63102d5cb1e4..1b62ed2f14594 100644
+--- a/drivers/rtc/rtc-rv3032.c
++++ b/drivers/rtc/rtc-rv3032.c
+@@ -617,11 +617,11 @@ static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate,
+
+ ret = rv3032_enter_eerd(rv3032, &eerd);
+ if (ret)
+- goto exit_eerd;
++ return ret;
+
+ ret = regmap_write(rv3032->regmap, RV3032_CLKOUT1, hfd & 0xff);
+ if (ret)
+- return ret;
++ goto exit_eerd;
+
+ ret = regmap_write(rv3032->regmap, RV3032_CLKOUT2, RV3032_CLKOUT2_OS |
+ FIELD_PREP(RV3032_CLKOUT2_HFD_MSK, hfd >> 8));
+diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
+index d38aaf08108c2..dc9221393080a 100644
+--- a/drivers/rtc/rtc-rx8025.c
++++ b/drivers/rtc/rtc-rx8025.c
+@@ -55,6 +55,8 @@
+ #define RX8025_BIT_CTRL2_XST BIT(5)
+ #define RX8025_BIT_CTRL2_VDET BIT(6)
+
++#define RX8035_BIT_HOUR_1224 BIT(7)
++
+ /* Clock precision adjustment */
+ #define RX8025_ADJ_RESOLUTION 3050 /* in ppb */
+ #define RX8025_ADJ_DATA_MAX 62
+@@ -78,6 +80,7 @@ struct rx8025_data {
+ struct rtc_device *rtc;
+ enum rx_model model;
+ u8 ctrl1;
++ int is_24;
+ };
+
+ static s32 rx8025_read_reg(const struct i2c_client *client, u8 number)
+@@ -226,7 +229,7 @@ static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
+
+ dt->tm_sec = bcd2bin(date[RX8025_REG_SEC] & 0x7f);
+ dt->tm_min = bcd2bin(date[RX8025_REG_MIN] & 0x7f);
+- if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
++ if (rx8025->is_24)
+ dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x3f);
+ else
+ dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x1f) % 12
+@@ -257,7 +260,7 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
+ */
+ date[RX8025_REG_SEC] = bin2bcd(dt->tm_sec);
+ date[RX8025_REG_MIN] = bin2bcd(dt->tm_min);
+- if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
++ if (rx8025->is_24)
+ date[RX8025_REG_HOUR] = bin2bcd(dt->tm_hour);
+ else
+ date[RX8025_REG_HOUR] = (dt->tm_hour >= 12 ? 0x20 : 0)
+@@ -282,6 +285,7 @@ static int rx8025_init_client(struct i2c_client *client)
+ struct rx8025_data *rx8025 = i2c_get_clientdata(client);
+ u8 ctrl[2], ctrl2;
+ int need_clear = 0;
++ int hour_reg;
+ int err;
+
+ err = rx8025_read_regs(client, RX8025_REG_CTRL1, 2, ctrl);
+@@ -306,6 +310,16 @@ static int rx8025_init_client(struct i2c_client *client)
+
+ err = rx8025_write_reg(client, RX8025_REG_CTRL2, ctrl2);
+ }
++
++ if (rx8025->model == model_rx_8035) {
++ /* In RX-8035, 12/24 flag is in the hour register */
++ hour_reg = rx8025_read_reg(client, RX8025_REG_HOUR);
++ if (hour_reg < 0)
++ return hour_reg;
++ rx8025->is_24 = (hour_reg & RX8035_BIT_HOUR_1224);
++ } else {
++ rx8025->is_24 = (ctrl[1] & RX8025_BIT_CTRL1_1224);
++ }
+ out:
+ return err;
+ }
+@@ -335,7 +349,7 @@ static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+ /* Hardware alarms precision is 1 minute! */
+ t->time.tm_sec = 0;
+ t->time.tm_min = bcd2bin(ald[0] & 0x7f);
+- if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
++ if (rx8025->is_24)
+ t->time.tm_hour = bcd2bin(ald[1] & 0x3f);
+ else
+ t->time.tm_hour = bcd2bin(ald[1] & 0x1f) % 12
+@@ -370,7 +384,7 @@ static int rx8025_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+ }
+
+ ald[0] = bin2bcd(t->time.tm_min);
+- if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
++ if (rx8025->is_24)
+ ald[1] = bin2bcd(t->time.tm_hour);
+ else
+ ald[1] = (t->time.tm_hour >= 12 ? 0x20 : 0)
+diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
+index bd929b0e7d7de..d82acf1af1fae 100644
+--- a/drivers/rtc/rtc-snvs.c
++++ b/drivers/rtc/rtc-snvs.c
+@@ -32,6 +32,14 @@
+ #define SNVS_LPPGDR_INIT 0x41736166
+ #define CNTR_TO_SECS_SH 15
+
++/* The maximum RTC clock cycles that are allowed to pass between two
++ * consecutive clock counter register reads. If the values are corrupted a
++ * bigger difference is expected. The RTC frequency is 32kHz. With 320 cycles
++ * we end at 10ms which should be enough for most cases. If it once takes
++ * longer than expected we do a retry.
++ */
++#define MAX_RTC_READ_DIFF_CYCLES 320
++
+ struct snvs_rtc_data {
+ struct rtc_device *rtc;
+ struct regmap *regmap;
+@@ -56,6 +64,7 @@ static u64 rtc_read_lpsrt(struct snvs_rtc_data *data)
+ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data)
+ {
+ u64 read1, read2;
++ s64 diff;
+ unsigned int timeout = 100;
+
+ /* As expected, the registers might update between the read of the LSB
+@@ -66,7 +75,8 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data)
+ do {
+ read2 = read1;
+ read1 = rtc_read_lpsrt(data);
+- } while (read1 != read2 && --timeout);
++ diff = read1 - read2;
++ } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout);
+ if (!timeout)
+ dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n");
+
+@@ -78,13 +88,15 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data)
+ static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb)
+ {
+ u32 count1, count2;
++ s32 diff;
+ unsigned int timeout = 100;
+
+ regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1);
+ do {
+ count2 = count1;
+ regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1);
+- } while (count1 != count2 && --timeout);
++ diff = count1 - count2;
++ } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout);
+ if (!timeout) {
+ dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n");
+ return -ETIMEDOUT;
+diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c
+index bdb20f63254e2..d04d46f9cc65a 100644
+--- a/drivers/rtc/rtc-st-lpc.c
++++ b/drivers/rtc/rtc-st-lpc.c
+@@ -228,7 +228,7 @@ static int st_rtc_probe(struct platform_device *pdev)
+ enable_irq_wake(rtc->irq);
+ disable_irq(rtc->irq);
+
+- rtc->clk = clk_get(&pdev->dev, NULL);
++ rtc->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(rtc->clk)) {
+ dev_err(&pdev->dev, "Unable to request clock\n");
+ return PTR_ERR(rtc->clk);
+@@ -238,6 +238,7 @@ static int st_rtc_probe(struct platform_device *pdev)
+
+ rtc->clkrate = clk_get_rate(rtc->clk);
+ if (!rtc->clkrate) {
++ clk_disable_unprepare(rtc->clk);
+ dev_err(&pdev->dev, "Unable to fetch clock rate\n");
+ return -EINVAL;
+ }
+diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c
+index adec1b14a8deb..536bd023c4800 100644
+--- a/drivers/rtc/rtc-sun6i.c
++++ b/drivers/rtc/rtc-sun6i.c
+@@ -128,7 +128,6 @@ struct sun6i_rtc_clk_data {
+ unsigned int fixed_prescaler : 16;
+ unsigned int has_prescaler : 1;
+ unsigned int has_out_clk : 1;
+- unsigned int export_iosc : 1;
+ unsigned int has_losc_en : 1;
+ unsigned int has_auto_swt : 1;
+ };
+@@ -138,7 +137,7 @@ struct sun6i_rtc_dev {
+ const struct sun6i_rtc_clk_data *data;
+ void __iomem *base;
+ int irq;
+- unsigned long alarm;
++ time64_t alarm;
+
+ struct clk_hw hw;
+ struct clk_hw *int_osc;
+@@ -260,10 +259,8 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
+ /* Yes, I know, this is ugly. */
+ sun6i_rtc = rtc;
+
+- /* Only read IOSC name from device tree if it is exported */
+- if (rtc->data->export_iosc)
+- of_property_read_string_index(node, "clock-output-names", 2,
+- &iosc_name);
++ of_property_read_string_index(node, "clock-output-names", 2,
++ &iosc_name);
+
+ rtc->int_osc = clk_hw_register_fixed_rate_with_accuracy(NULL,
+ iosc_name,
+@@ -304,13 +301,10 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
+ goto err_register;
+ }
+
+- clk_data->num = 2;
++ clk_data->num = 3;
+ clk_data->hws[0] = &rtc->hw;
+ clk_data->hws[1] = __clk_get_hw(rtc->ext_losc);
+- if (rtc->data->export_iosc) {
+- clk_data->hws[2] = rtc->int_osc;
+- clk_data->num = 3;
+- }
++ clk_data->hws[2] = rtc->int_osc;
+ of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data);
+ return;
+
+@@ -350,7 +344,6 @@ static const struct sun6i_rtc_clk_data sun8i_h3_rtc_data = {
+ .fixed_prescaler = 32,
+ .has_prescaler = 1,
+ .has_out_clk = 1,
+- .export_iosc = 1,
+ };
+
+ static void __init sun8i_h3_rtc_clk_init(struct device_node *node)
+@@ -368,7 +361,6 @@ static const struct sun6i_rtc_clk_data sun50i_h6_rtc_data = {
+ .fixed_prescaler = 32,
+ .has_prescaler = 1,
+ .has_out_clk = 1,
+- .export_iosc = 1,
+ .has_losc_en = 1,
+ .has_auto_swt = 1,
+ };
+@@ -510,10 +502,8 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
+ struct sun6i_rtc_dev *chip = dev_get_drvdata(dev);
+ struct rtc_time *alrm_tm = &wkalrm->time;
+ struct rtc_time tm_now;
+- unsigned long time_now = 0;
+- unsigned long time_set = 0;
+- unsigned long time_gap = 0;
+- int ret = 0;
++ time64_t time_now, time_set;
++ int ret;
+
+ ret = sun6i_rtc_gettime(dev, &tm_now);
+ if (ret < 0) {
+@@ -528,9 +518,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
+ return -EINVAL;
+ }
+
+- time_gap = time_set - time_now;
+-
+- if (time_gap > U32_MAX) {
++ if ((time_set - time_now) > U32_MAX) {
+ dev_err(dev, "Date too far in the future\n");
+ return -EINVAL;
+ }
+@@ -539,7 +527,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
+ writel(0, chip->base + SUN6I_ALRM_COUNTER);
+ usleep_range(100, 300);
+
+- writel(time_gap, chip->base + SUN6I_ALRM_COUNTER);
++ writel(time_set - time_now, chip->base + SUN6I_ALRM_COUNTER);
+ chip->alarm = time_set;
+
+ sun6i_rtc_setaie(wkalrm->enabled, chip);
+diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
+index 2018614f258f6..6eaa9321c0741 100644
+--- a/drivers/rtc/rtc-wm8350.c
++++ b/drivers/rtc/rtc-wm8350.c
+@@ -432,14 +432,21 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
+ wm8350_rtc_update_handler, 0,
+ "RTC Seconds", wm8350);
++ if (ret)
++ return ret;
++
+ wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
+
+- wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM,
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM,
+ wm8350_rtc_alarm_handler, 0,
+ "RTC Alarm", wm8350);
++ if (ret) {
++ wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350);
++ return ret;
++ }
+
+ return 0;
+ }
+diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
+index e34c6cc61983b..0c6ab288201e5 100644
+--- a/drivers/s390/block/dasd.c
++++ b/drivers/s390/block/dasd.c
+@@ -1422,6 +1422,13 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
+ if (!cqr->lpm)
+ cqr->lpm = dasd_path_get_opm(device);
+ }
++ /*
++ * remember the amount of formatted tracks to prevent double format on
++ * ESE devices
++ */
++ if (cqr->block)
++ cqr->trkcount = atomic_read(&cqr->block->trkcount);
++
+ if (cqr->cpmode == 1) {
+ rc = ccw_device_tm_start(device->cdev, cqr->cpaddr,
+ (long) cqr, cqr->lpm);
+@@ -1639,6 +1646,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
+ unsigned long now;
+ int nrf_suppressed = 0;
+ int fp_suppressed = 0;
++ struct request *req;
+ u8 *sense = NULL;
+ int expires;
+
+@@ -1739,7 +1747,12 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
+ }
+
+ if (dasd_ese_needs_format(cqr->block, irb)) {
+- if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
++ req = dasd_get_callback_data(cqr);
++ if (!req) {
++ cqr->status = DASD_CQR_ERROR;
++ return;
++ }
++ if (rq_data_dir(req) == READ) {
+ device->discipline->ese_read(cqr, irb);
+ cqr->status = DASD_CQR_SUCCESS;
+ cqr->stopclk = now;
+@@ -2762,8 +2775,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
+ * complete a request partially.
+ */
+ if (proc_bytes) {
+- blk_update_request(req, BLK_STS_OK,
+- blk_rq_bytes(req) - proc_bytes);
++ blk_update_request(req, BLK_STS_OK, proc_bytes);
+ blk_mq_requeue_request(req, true);
+ } else if (likely(!blk_should_fake_timeout(req->q))) {
+ blk_mq_complete_request(req);
+@@ -2936,41 +2948,32 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
+ * Requeue a request back to the block request queue
+ * only works for block requests
+ */
+-static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
++static void _dasd_requeue_request(struct dasd_ccw_req *cqr)
+ {
+- struct dasd_block *block = cqr->block;
+ struct request *req;
+
+- if (!block)
+- return -EINVAL;
+ /*
+ * If the request is an ERP request there is nothing to requeue.
+ * This will be done with the remaining original request.
+ */
+ if (cqr->refers)
+- return 0;
++ return;
+ spin_lock_irq(&cqr->dq->lock);
+ req = (struct request *) cqr->callback_data;
+- blk_mq_requeue_request(req, false);
++ blk_mq_requeue_request(req, true);
+ spin_unlock_irq(&cqr->dq->lock);
+
+- return 0;
++ return;
+ }
+
+-/*
+- * Go through all request on the dasd_block request queue, cancel them
+- * on the respective dasd_device, and return them to the generic
+- * block layer.
+- */
+-static int dasd_flush_block_queue(struct dasd_block *block)
++static int _dasd_requests_to_flushqueue(struct dasd_block *block,
++ struct list_head *flush_queue)
+ {
+ struct dasd_ccw_req *cqr, *n;
+- int rc, i;
+- struct list_head flush_queue;
+ unsigned long flags;
++ int rc, i;
+
+- INIT_LIST_HEAD(&flush_queue);
+- spin_lock_bh(&block->queue_lock);
++ spin_lock_irqsave(&block->queue_lock, flags);
+ rc = 0;
+ restart:
+ list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
+@@ -2985,13 +2988,32 @@ restart:
+ * is returned from the dasd_device layer.
+ */
+ cqr->callback = _dasd_wake_block_flush_cb;
+- for (i = 0; cqr != NULL; cqr = cqr->refers, i++)
+- list_move_tail(&cqr->blocklist, &flush_queue);
++ for (i = 0; cqr; cqr = cqr->refers, i++)
++ list_move_tail(&cqr->blocklist, flush_queue);
+ if (i > 1)
+ /* moved more than one request - need to restart */
+ goto restart;
+ }
+- spin_unlock_bh(&block->queue_lock);
++ spin_unlock_irqrestore(&block->queue_lock, flags);
++
++ return rc;
++}
++
++/*
++ * Go through all request on the dasd_block request queue, cancel them
++ * on the respective dasd_device, and return them to the generic
++ * block layer.
++ */
++static int dasd_flush_block_queue(struct dasd_block *block)
++{
++ struct dasd_ccw_req *cqr, *n;
++ struct list_head flush_queue;
++ unsigned long flags;
++ int rc;
++
++ INIT_LIST_HEAD(&flush_queue);
++ rc = _dasd_requests_to_flushqueue(block, &flush_queue);
++
+ /* Now call the callback function of flushed requests */
+ restart_cb:
+ list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
+@@ -3914,75 +3936,36 @@ EXPORT_SYMBOL_GPL(dasd_generic_space_avail);
+ */
+ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
+ {
++ struct dasd_block *block = device->block;
+ struct list_head requeue_queue;
+ struct dasd_ccw_req *cqr, *n;
+- struct dasd_ccw_req *refers;
+ int rc;
+
+- INIT_LIST_HEAD(&requeue_queue);
+- spin_lock_irq(get_ccwdev_lock(device->cdev));
+- rc = 0;
+- list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
+- /* Check status and move request to flush_queue */
+- if (cqr->status == DASD_CQR_IN_IO) {
+- rc = device->discipline->term_IO(cqr);
+- if (rc) {
+- /* unable to terminate requeust */
+- dev_err(&device->cdev->dev,
+- "Unable to terminate request %p "
+- "on suspend\n", cqr);
+- spin_unlock_irq(get_ccwdev_lock(device->cdev));
+- dasd_put_device(device);
+- return rc;
+- }
+- }
+- list_move_tail(&cqr->devlist, &requeue_queue);
+- }
+- spin_unlock_irq(get_ccwdev_lock(device->cdev));
+-
+- list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
+- wait_event(dasd_flush_wq,
+- (cqr->status != DASD_CQR_CLEAR_PENDING));
++ if (!block)
++ return 0;
+
+- /*
+- * requeue requests to blocklayer will only work
+- * for block device requests
+- */
+- if (_dasd_requeue_request(cqr))
+- continue;
++ INIT_LIST_HEAD(&requeue_queue);
++ rc = _dasd_requests_to_flushqueue(block, &requeue_queue);
+
+- /* remove requests from device and block queue */
+- list_del_init(&cqr->devlist);
+- while (cqr->refers != NULL) {
+- refers = cqr->refers;
+- /* remove the request from the block queue */
+- list_del(&cqr->blocklist);
+- /* free the finished erp request */
+- dasd_free_erp_request(cqr, cqr->memdev);
+- cqr = refers;
++ /* Now call the callback function of flushed requests */
++restart_cb:
++ list_for_each_entry_safe(cqr, n, &requeue_queue, blocklist) {
++ wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
++ /* Process finished ERP request. */
++ if (cqr->refers) {
++ spin_lock_bh(&block->queue_lock);
++ __dasd_process_erp(block->base, cqr);
++ spin_unlock_bh(&block->queue_lock);
++ /* restart list_for_xx loop since dasd_process_erp
++ * might remove multiple elements
++ */
++ goto restart_cb;
+ }
+-
+- /*
+- * _dasd_requeue_request already checked for a valid
+- * blockdevice, no need to check again
+- * all erp requests (cqr->refers) have a cqr->block
+- * pointer copy from the original cqr
+- */
++ _dasd_requeue_request(cqr);
+ list_del_init(&cqr->blocklist);
+ cqr->block->base->discipline->free_cp(
+ cqr, (struct request *) cqr->callback_data);
+ }
+-
+- /*
+- * if requests remain then they are internal request
+- * and go back to the device queue
+- */
+- if (!list_empty(&requeue_queue)) {
+- /* move freeze_queue to start of the ccw_queue */
+- spin_lock_irq(get_ccwdev_lock(device->cdev));
+- list_splice_tail(&requeue_queue, &device->ccw_queue);
+- spin_unlock_irq(get_ccwdev_lock(device->cdev));
+- }
+ dasd_schedule_device_bh(device);
+ return rc;
+ }
+diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
+index 4691a3c35d725..c2d4ea74e0d00 100644
+--- a/drivers/s390/block/dasd_3990_erp.c
++++ b/drivers/s390/block/dasd_3990_erp.c
+@@ -2436,7 +2436,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr)
+ erp->block = cqr->block;
+ erp->magic = cqr->magic;
+ erp->expires = cqr->expires;
+- erp->retries = 256;
++ erp->retries = device->default_retries;
+ erp->buildclk = get_tod_clock();
+ erp->status = DASD_CQR_FILLED;
+
+diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
+index dc78a523a69f2..b6b938aa66158 100644
+--- a/drivers/s390/block/dasd_alias.c
++++ b/drivers/s390/block/dasd_alias.c
+@@ -675,12 +675,12 @@ int dasd_alias_remove_device(struct dasd_device *device)
+ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device)
+ {
+ struct dasd_eckd_private *alias_priv, *private = base_device->private;
+- struct alias_pav_group *group = private->pavgroup;
+ struct alias_lcu *lcu = private->lcu;
+ struct dasd_device *alias_device;
++ struct alias_pav_group *group;
+ unsigned long flags;
+
+- if (!group || !lcu)
++ if (!lcu)
+ return NULL;
+ if (lcu->pav == NO_PAV ||
+ lcu->flags & (NEED_UAC_UPDATE | UPDATE_PENDING))
+@@ -697,6 +697,11 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device)
+ }
+
+ spin_lock_irqsave(&lcu->lock, flags);
++ group = private->pavgroup;
++ if (!group) {
++ spin_unlock_irqrestore(&lcu->lock, flags);
++ return NULL;
++ }
+ alias_device = group->next;
+ if (!alias_device) {
+ if (list_empty(&group->aliaslist)) {
+diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
+index 460e0f1cca533..56ab74aa07f42 100644
+--- a/drivers/s390/block/dasd_eckd.c
++++ b/drivers/s390/block/dasd_eckd.c
+@@ -3095,13 +3095,24 @@ static int dasd_eckd_format_device(struct dasd_device *base,
+ }
+
+ static bool test_and_set_format_track(struct dasd_format_entry *to_format,
+- struct dasd_block *block)
++ struct dasd_ccw_req *cqr)
+ {
++ struct dasd_block *block = cqr->block;
+ struct dasd_format_entry *format;
+ unsigned long flags;
+ bool rc = false;
+
+ spin_lock_irqsave(&block->format_lock, flags);
++ if (cqr->trkcount != atomic_read(&block->trkcount)) {
++ /*
++ * The number of formatted tracks has changed after request
++ * start and we can not tell if the current track was involved.
++ * To avoid data corruption treat it as if the current track is
++ * involved
++ */
++ rc = true;
++ goto out;
++ }
+ list_for_each_entry(format, &block->format_list, list) {
+ if (format->track == to_format->track) {
+ rc = true;
+@@ -3121,6 +3132,7 @@ static void clear_format_track(struct dasd_format_entry *format,
+ unsigned long flags;
+
+ spin_lock_irqsave(&block->format_lock, flags);
++ atomic_inc(&block->trkcount);
+ list_del_init(&format->list);
+ spin_unlock_irqrestore(&block->format_lock, flags);
+ }
+@@ -3157,7 +3169,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
+ sector_t curr_trk;
+ int rc;
+
+- req = cqr->callback_data;
++ req = dasd_get_callback_data(cqr);
+ block = cqr->block;
+ base = block->base;
+ private = base->private;
+@@ -3182,8 +3194,11 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
+ }
+ format->track = curr_trk;
+ /* test if track is already in formatting by another thread */
+- if (test_and_set_format_track(format, block))
++ if (test_and_set_format_track(format, cqr)) {
++ /* this is no real error so do not count down retries */
++ cqr->retries++;
+ return ERR_PTR(-EEXIST);
++ }
+
+ fdata.start_unit = curr_trk;
+ fdata.stop_unit = curr_trk;
+@@ -3282,12 +3297,11 @@ static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
+ cqr->proc_bytes = blk_count * blksize;
+ return 0;
+ }
+- if (dst && !skip_block) {
+- dst += off;
++ if (dst && !skip_block)
+ memset(dst, 0, blksize);
+- } else {
++ else
+ skip_block--;
+- }
++ dst += blksize;
+ blk_count++;
+ }
+ }
+@@ -4682,7 +4696,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
+ struct dasd_device *basedev;
+ struct req_iterator iter;
+ struct dasd_ccw_req *cqr;
+- unsigned int first_offs;
+ unsigned int trkcount;
+ unsigned long *idaws;
+ unsigned int size;
+@@ -4716,7 +4729,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
+ last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) /
+ DASD_RAW_SECTORS_PER_TRACK;
+ trkcount = last_trk - first_trk + 1;
+- first_offs = 0;
+
+ if (rq_data_dir(req) == READ)
+ cmd = DASD_ECKD_CCW_READ_TRACK;
+@@ -4760,13 +4772,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
+
+ if (use_prefix) {
+ prefix_LRE(ccw++, data, first_trk, last_trk, cmd, basedev,
+- startdev, 1, first_offs + 1, trkcount, 0, 0);
++ startdev, 1, 0, trkcount, 0, 0);
+ } else {
+ define_extent(ccw++, data, first_trk, last_trk, cmd, basedev, 0);
+ ccw[-1].flags |= CCW_FLAG_CC;
+
+ data += sizeof(struct DE_eckd_data);
+- locate_record_ext(ccw++, data, first_trk, first_offs + 1,
++ locate_record_ext(ccw++, data, first_trk, 0,
+ trkcount, cmd, basedev, 0, 0);
+ }
+
+@@ -6717,8 +6729,10 @@ dasd_eckd_init(void)
+ return -ENOMEM;
+ dasd_vol_info_req = kmalloc(sizeof(*dasd_vol_info_req),
+ GFP_KERNEL | GFP_DMA);
+- if (!dasd_vol_info_req)
++ if (!dasd_vol_info_req) {
++ kfree(dasd_reserve_req);
+ return -ENOMEM;
++ }
+ pe_handler_worker = kmalloc(sizeof(*pe_handler_worker),
+ GFP_KERNEL | GFP_DMA);
+ if (!pe_handler_worker) {
+diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
+index fa966e0db6ca9..3a6f3af240fa7 100644
+--- a/drivers/s390/block/dasd_genhd.c
++++ b/drivers/s390/block/dasd_genhd.c
+@@ -14,6 +14,7 @@
+ #define KMSG_COMPONENT "dasd"
+
+ #include <linux/interrupt.h>
++#include <linux/major.h>
+ #include <linux/fs.h>
+ #include <linux/blkpg.h>
+
+diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
+index 155428bfed8ac..d94ae067f085e 100644
+--- a/drivers/s390/block/dasd_int.h
++++ b/drivers/s390/block/dasd_int.h
+@@ -188,6 +188,7 @@ struct dasd_ccw_req {
+ void (*callback)(struct dasd_ccw_req *, void *data);
+ void *callback_data;
+ unsigned int proc_bytes; /* bytes for partial completion */
++ unsigned int trkcount; /* count formatted tracks */
+ };
+
+ /*
+@@ -611,6 +612,7 @@ struct dasd_block {
+
+ struct list_head format_list;
+ spinlock_t format_lock;
++ atomic_t trkcount;
+ };
+
+ struct dasd_attention_data {
+@@ -757,6 +759,18 @@ dasd_check_blocksize(int bsize)
+ return 0;
+ }
+
++/*
++ * return the callback data of the original request in case there are
++ * ERP requests build on top of it
++ */
++static inline void *dasd_get_callback_data(struct dasd_ccw_req *cqr)
++{
++ while (cqr->refers)
++ cqr = cqr->refers;
++
++ return cqr->callback_data;
++}
++
+ /* externals in dasd.c */
+ #define DASD_PROFILE_OFF 0
+ #define DASD_PROFILE_ON 1
+diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
+index 468cbeb539ff0..7dad0428d73ce 100644
+--- a/drivers/s390/block/dasd_ioctl.c
++++ b/drivers/s390/block/dasd_ioctl.c
+@@ -131,6 +131,7 @@ static int dasd_ioctl_resume(struct dasd_block *block)
+ spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
+
+ dasd_schedule_block_bh(block);
++ dasd_schedule_device_bh(base);
+ return 0;
+ }
+
+@@ -502,10 +503,10 @@ static int __dasd_ioctl_information(struct dasd_block *block,
+
+ memcpy(dasd_info->type, base->discipline->name, 4);
+
+- spin_lock_irqsave(&block->queue_lock, flags);
++ spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
+ list_for_each(l, &base->ccw_queue)
+ dasd_info->chanq_len++;
+- spin_unlock_irqrestore(&block->queue_lock, flags);
++ spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
+ return 0;
+ }
+
+diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
+index 5be3d1c39a78e..54176c073547b 100644
+--- a/drivers/s390/block/dcssblk.c
++++ b/drivers/s390/block/dcssblk.c
+@@ -866,6 +866,8 @@ dcssblk_submit_bio(struct bio *bio)
+ unsigned long bytes_done;
+
+ blk_queue_split(&bio);
++ if (!bio)
++ return BLK_QC_T_NONE;
+
+ bytes_done = 0;
+ dev_info = bio->bi_bdev->bd_disk->private_data;
+diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h
+index c467589c7f452..c06d399b9b1f1 100644
+--- a/drivers/s390/char/keyboard.h
++++ b/drivers/s390/char/keyboard.h
+@@ -56,7 +56,7 @@ static inline void
+ kbd_put_queue(struct tty_port *port, int ch)
+ {
+ tty_insert_flip_char(port, ch, 0);
+- tty_schedule_flip(port);
++ tty_flip_buffer_push(port);
+ }
+
+ static inline void
+@@ -64,5 +64,5 @@ kbd_puts_queue(struct tty_port *port, char *cp)
+ {
+ while (*cp)
+ tty_insert_flip_char(port, *cp++, 0);
+- tty_schedule_flip(port);
++ tty_flip_buffer_push(port);
+ }
+diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c
+index 1f5fab617b679..f7e75d9fedf61 100644
+--- a/drivers/s390/char/tape_std.c
++++ b/drivers/s390/char/tape_std.c
+@@ -53,7 +53,6 @@ int
+ tape_std_assign(struct tape_device *device)
+ {
+ int rc;
+- struct timer_list timeout;
+ struct tape_request *request;
+
+ request = tape_alloc_request(2, 11);
+@@ -70,7 +69,7 @@ tape_std_assign(struct tape_device *device)
+ * So we set up a timeout for this call.
+ */
+ timer_setup(&request->timer, tape_std_assign_timeout, 0);
+- mod_timer(&timeout, jiffies + 2 * HZ);
++ mod_timer(&request->timer, jiffies + msecs_to_jiffies(2000));
+
+ rc = tape_do_io_interruptible(device, request);
+
+diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
+index 3ba2d934a3e89..92b32ce645b95 100644
+--- a/drivers/s390/char/zcore.c
++++ b/drivers/s390/char/zcore.c
+@@ -50,6 +50,7 @@ static struct dentry *zcore_reipl_file;
+ static struct dentry *zcore_hsa_file;
+ static struct ipl_parameter_block *zcore_ipl_block;
+
++static DEFINE_MUTEX(hsa_buf_mutex);
+ static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+ /*
+@@ -66,19 +67,24 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
+ if (!hsa_available)
+ return -ENODATA;
+
++ mutex_lock(&hsa_buf_mutex);
+ while (count) {
+ if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) {
+ TRACE("sclp_sdias_copy() failed\n");
++ mutex_unlock(&hsa_buf_mutex);
+ return -EIO;
+ }
+ offset = src % PAGE_SIZE;
+ bytes = min(PAGE_SIZE - offset, count);
+- if (copy_to_user(dest, hsa_buf + offset, bytes))
++ if (copy_to_user(dest, hsa_buf + offset, bytes)) {
++ mutex_unlock(&hsa_buf_mutex);
+ return -EFAULT;
++ }
+ src += bytes;
+ dest += bytes;
+ count -= bytes;
+ }
++ mutex_unlock(&hsa_buf_mutex);
+ return 0;
+ }
+
+@@ -96,9 +102,11 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count)
+ if (!hsa_available)
+ return -ENODATA;
+
++ mutex_lock(&hsa_buf_mutex);
+ while (count) {
+ if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) {
+ TRACE("sclp_sdias_copy() failed\n");
++ mutex_unlock(&hsa_buf_mutex);
+ return -EIO;
+ }
+ offset = src % PAGE_SIZE;
+@@ -108,6 +116,7 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count)
+ dest += bytes;
+ count -= bytes;
+ }
++ mutex_unlock(&hsa_buf_mutex);
+ return 0;
+ }
+
+@@ -229,8 +238,7 @@ static int __init zcore_reipl_init(void)
+ rc = memcpy_hsa_kernel(zcore_ipl_block, ipib_info.ipib,
+ PAGE_SIZE);
+ else
+- rc = memcpy_real(zcore_ipl_block, (void *) ipib_info.ipib,
+- PAGE_SIZE);
++ rc = memcpy_real(zcore_ipl_block, ipib_info.ipib, PAGE_SIZE);
+ if (rc || (__force u32)csum_partial(zcore_ipl_block, zcore_ipl_block->hdr.len, 0) !=
+ ipib_info.checksum) {
+ TRACE("Checksum does not match\n");
+diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
+index 297fb399363cc..620a917cd3a15 100644
+--- a/drivers/s390/cio/chsc.c
++++ b/drivers/s390/cio/chsc.c
+@@ -1255,7 +1255,7 @@ exit:
+ EXPORT_SYMBOL_GPL(css_general_characteristics);
+ EXPORT_SYMBOL_GPL(css_chsc_characteristics);
+
+-int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta)
++int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta)
+ {
+ struct {
+ struct chsc_header request;
+@@ -1266,7 +1266,7 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta)
+ unsigned int rsvd2[5];
+ struct chsc_header response;
+ unsigned int rsvd3[3];
+- u64 clock_delta;
++ s64 clock_delta;
+ unsigned int rsvd4[2];
+ } *rr;
+ int rc;
+diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
+index 44461928aab8a..2ba9e01355659 100644
+--- a/drivers/s390/cio/css.c
++++ b/drivers/s390/cio/css.c
+@@ -437,8 +437,8 @@ static ssize_t dev_busid_show(struct device *dev,
+ struct subchannel *sch = to_subchannel(dev);
+ struct pmcw *pmcw = &sch->schib.pmcw;
+
+- if ((pmcw->st == SUBCHANNEL_TYPE_IO ||
+- pmcw->st == SUBCHANNEL_TYPE_MSG) && pmcw->dnv)
++ if ((pmcw->st == SUBCHANNEL_TYPE_IO && pmcw->dnv) ||
++ (pmcw->st == SUBCHANNEL_TYPE_MSG && pmcw->w))
+ return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid,
+ pmcw->dev);
+ else
+@@ -792,9 +792,8 @@ static int __unset_online(struct device *dev, void *data)
+ {
+ struct idset *set = data;
+ struct subchannel *sch = to_subchannel(dev);
+- struct ccw_device *cdev = sch_get_cdev(sch);
+
+- if (cdev && cdev->online)
++ if (sch->st == SUBCHANNEL_TYPE_IO && sch->config.ena)
+ idset_sch_del(set, sch->schid);
+
+ return 0;
+diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
+index 8d14569823d73..a111154a90465 100644
+--- a/drivers/s390/cio/device.c
++++ b/drivers/s390/cio/device.c
+@@ -1116,6 +1116,8 @@ static void io_subchannel_verify(struct subchannel *sch)
+ cdev = sch_get_cdev(sch);
+ if (cdev)
+ dev_fsm_event(cdev, DEV_EVENT_VERIFY);
++ else
++ css_schedule_eval(sch->schid);
+ }
+
+ static void io_subchannel_terminate_path(struct subchannel *sch, u8 mask)
+@@ -1194,7 +1196,7 @@ static int io_subchannel_chp_event(struct subchannel *sch,
+ else
+ path_event[chpid] = PE_NONE;
+ }
+- if (cdev)
++ if (cdev && cdev->drv && cdev->drv->path_event)
+ cdev->drv->path_event(cdev, path_event);
+ break;
+ }
+@@ -1377,6 +1379,7 @@ void ccw_device_set_notoper(struct ccw_device *cdev)
+ enum io_sch_action {
+ IO_SCH_UNREG,
+ IO_SCH_ORPH_UNREG,
++ IO_SCH_UNREG_CDEV,
+ IO_SCH_ATTACH,
+ IO_SCH_UNREG_ATTACH,
+ IO_SCH_ORPH_ATTACH,
+@@ -1409,7 +1412,7 @@ static enum io_sch_action sch_get_action(struct subchannel *sch)
+ }
+ if ((sch->schib.pmcw.pam & sch->opm) == 0) {
+ if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK)
+- return IO_SCH_UNREG;
++ return IO_SCH_UNREG_CDEV;
+ return IO_SCH_DISC;
+ }
+ if (device_is_disconnected(cdev))
+@@ -1471,6 +1474,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process)
+ case IO_SCH_ORPH_ATTACH:
+ ccw_device_set_disconnected(cdev);
+ break;
++ case IO_SCH_UNREG_CDEV:
+ case IO_SCH_UNREG_ATTACH:
+ case IO_SCH_UNREG:
+ if (!cdev)
+@@ -1504,6 +1508,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process)
+ if (rc)
+ goto out;
+ break;
++ case IO_SCH_UNREG_CDEV:
+ case IO_SCH_UNREG_ATTACH:
+ spin_lock_irqsave(sch->lock, flags);
+ sch_set_cdev(sch, NULL);
+diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
+index 0fe7b2f2e7f52..c533d1dadc6bb 100644
+--- a/drivers/s390/cio/device_ops.c
++++ b/drivers/s390/cio/device_ops.c
+@@ -825,13 +825,23 @@ EXPORT_SYMBOL_GPL(ccw_device_get_chid);
+ */
+ void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size)
+ {
+- return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
++ void *addr;
++
++ if (!get_device(&cdev->dev))
++ return NULL;
++ addr = cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
++ if (IS_ERR_OR_NULL(addr))
++ put_device(&cdev->dev);
++ return addr;
+ }
+ EXPORT_SYMBOL(ccw_device_dma_zalloc);
+
+ void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size)
+ {
++ if (!cpu_addr)
++ return;
+ cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size);
++ put_device(&cdev->dev);
+ }
+ EXPORT_SYMBOL(ccw_device_dma_free);
+
+diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
+index 99c2212dc6a6b..a3230c5093b01 100644
+--- a/drivers/s390/cio/qdio.h
++++ b/drivers/s390/cio/qdio.h
+@@ -95,7 +95,7 @@ static inline int do_sqbs(u64 token, unsigned char state, int queue,
+ " lgr 1,%[token]\n"
+ " .insn rsy,0xeb000000008a,%[qs],%[ccq],0(%[state])"
+ : [ccq] "+&d" (_ccq), [qs] "+&d" (_queuestart)
+- : [state] "d" ((unsigned long)state), [token] "d" (token)
++ : [state] "a" ((unsigned long)state), [token] "d" (token)
+ : "memory", "cc", "1");
+ *count = _ccq & 0xff;
+ *start = _queuestart & 0xff;
+diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
+index 76099bcb765b4..b9091e22ca572 100644
+--- a/drivers/s390/cio/vfio_ccw_drv.c
++++ b/drivers/s390/cio/vfio_ccw_drv.c
+@@ -287,19 +287,11 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process)
+ if (work_pending(&sch->todo_work))
+ goto out_unlock;
+
+- if (cio_update_schib(sch)) {
+- vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
+- rc = 0;
+- goto out_unlock;
+- }
+-
+- private = dev_get_drvdata(&sch->dev);
+- if (private->state == VFIO_CCW_STATE_NOT_OPER) {
+- private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
+- VFIO_CCW_STATE_STANDBY;
+- }
+ rc = 0;
+
++ if (cio_update_schib(sch))
++ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
++
+ out_unlock:
+ spin_unlock_irqrestore(sch->lock, flags);
+
+diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
+index 9ea48bf0ee40d..032bf7b282bab 100644
+--- a/drivers/s390/crypto/ap_queue.c
++++ b/drivers/s390/crypto/ap_queue.c
+@@ -157,6 +157,8 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ aq->queue_count = max_t(int, 0, aq->queue_count - 1);
++ if (!status.queue_empty && !aq->queue_count)
++ aq->queue_count++;
+ if (aq->queue_count > 0)
+ mod_timer(&aq->timeout,
+ jiffies + aq->request_timeout);
+diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
+index cf23ce1b11465..34e1d1b339c12 100644
+--- a/drivers/s390/crypto/pkey_api.c
++++ b/drivers/s390/crypto/pkey_api.c
+@@ -744,7 +744,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
+ if (ktype)
+ *ktype = PKEY_TYPE_EP11;
+ if (ksize)
+- *ksize = kb->head.keybitlen;
++ *ksize = kb->head.bitlen;
+
+ rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
+ ZCRYPT_CEX7, EP11_API_V, kb->wkvp);
+@@ -1286,6 +1286,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
+ return PTR_ERR(kkey);
+ rc = pkey_keyblob2pkey(kkey, ktp.keylen, &ktp.protkey);
+ DEBUG_DBG("%s pkey_keyblob2pkey()=%d\n", __func__, rc);
++ memzero_explicit(kkey, ktp.keylen);
+ kfree(kkey);
+ if (rc)
+ break;
+@@ -1419,6 +1420,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
+ kkey, ktp.keylen, &ktp.protkey);
+ DEBUG_DBG("%s pkey_keyblob2pkey2()=%d\n", __func__, rc);
+ kfree(apqns);
++ memzero_explicit(kkey, ktp.keylen);
+ kfree(kkey);
+ if (rc)
+ break;
+@@ -1545,6 +1547,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
+ protkey, &protkeylen);
+ DEBUG_DBG("%s pkey_keyblob2pkey3()=%d\n", __func__, rc);
+ kfree(apqns);
++ memzero_explicit(kkey, ktp.keylen);
+ kfree(kkey);
+ if (rc) {
+ kfree(protkey);
+diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
+index 4d2556bc7fe58..5196c9ac5a81f 100644
+--- a/drivers/s390/crypto/vfio_ap_drv.c
++++ b/drivers/s390/crypto/vfio_ap_drv.c
+@@ -86,8 +86,9 @@ static struct ap_driver vfio_ap_drv = {
+
+ static void vfio_ap_matrix_dev_release(struct device *dev)
+ {
+- struct ap_matrix_dev *matrix_dev = dev_get_drvdata(dev);
++ struct ap_matrix_dev *matrix_dev;
+
++ matrix_dev = container_of(dev, struct ap_matrix_dev, device);
+ kfree(matrix_dev);
+ }
+
+diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
+index 356318746dd16..17b3f1ea3a5c2 100644
+--- a/drivers/s390/crypto/zcrypt_api.c
++++ b/drivers/s390/crypto/zcrypt_api.c
+@@ -398,6 +398,7 @@ static int zcdn_create(const char *name)
+ ZCRYPT_NAME "_%d", (int) MINOR(devt));
+ nodename[sizeof(nodename)-1] = '\0';
+ if (dev_set_name(&zcdndev->device, nodename)) {
++ kfree(zcdndev);
+ rc = -EINVAL;
+ goto unlockout;
+ }
+diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
+index 9ce5a71da69b8..3daf259ba10e7 100644
+--- a/drivers/s390/crypto/zcrypt_ep11misc.c
++++ b/drivers/s390/crypto/zcrypt_ep11misc.c
+@@ -788,7 +788,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
+ kb->head.type = TOKTYPE_NON_CCA;
+ kb->head.len = rep_pl->data_len;
+ kb->head.version = TOKVER_EP11_AES;
+- kb->head.keybitlen = keybitsize;
++ kb->head.bitlen = keybitsize;
+
+ out:
+ kfree(req);
+@@ -1056,7 +1056,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
+ kb->head.type = TOKTYPE_NON_CCA;
+ kb->head.len = rep_pl->data_len;
+ kb->head.version = TOKVER_EP11_AES;
+- kb->head.keybitlen = keybitsize;
++ kb->head.bitlen = keybitsize;
+
+ out:
+ kfree(req);
+diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
+index 1e02b197c0035..d424fa901f1b0 100644
+--- a/drivers/s390/crypto/zcrypt_ep11misc.h
++++ b/drivers/s390/crypto/zcrypt_ep11misc.h
+@@ -29,14 +29,7 @@ struct ep11keyblob {
+ union {
+ u8 session[32];
+ /* only used for PKEY_TYPE_EP11: */
+- struct {
+- u8 type; /* 0x00 (TOKTYPE_NON_CCA) */
+- u8 res0; /* unused */
+- u16 len; /* total length in bytes of this blob */
+- u8 version; /* 0x03 (TOKVER_EP11_AES) */
+- u8 res1; /* unused */
+- u16 keybitlen; /* clear key bit len, 0 for unknown */
+- } head;
++ struct ep11kblob_header head;
+ };
+ u8 wkvp[16]; /* wrapping key verification pattern */
+ u64 attr; /* boolean key attributes */
+diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
+index fd705429708e8..f91b6cfd7ed01 100644
+--- a/drivers/s390/net/ctcm_main.c
++++ b/drivers/s390/net/ctcm_main.c
+@@ -825,16 +825,9 @@ done:
+ /**
+ * Start transmission of a packet.
+ * Called from generic network device layer.
+- *
+- * skb Pointer to buffer containing the packet.
+- * dev Pointer to interface struct.
+- *
+- * returns 0 if packet consumed, !0 if packet rejected.
+- * Note: If we return !0, then the packet is free'd by
+- * the generic network layer.
+ */
+ /* first merge version - leaving both functions separated */
+-static int ctcm_tx(struct sk_buff *skb, struct net_device *dev)
++static netdev_tx_t ctcm_tx(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct ctcm_priv *priv = dev->ml_priv;
+
+@@ -877,7 +870,7 @@ static int ctcm_tx(struct sk_buff *skb, struct net_device *dev)
+ }
+
+ /* unmerged MPC variant of ctcm_tx */
+-static int ctcmpc_tx(struct sk_buff *skb, struct net_device *dev)
++static netdev_tx_t ctcmpc_tx(struct sk_buff *skb, struct net_device *dev)
+ {
+ int len = 0;
+ struct ctcm_priv *priv = dev->ml_priv;
+diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c
+index f0436f555c62a..be03cb123ef48 100644
+--- a/drivers/s390/net/ctcm_mpc.c
++++ b/drivers/s390/net/ctcm_mpc.c
+@@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo)
+ ctcm_clear_busy_do(dev);
+ }
+
+- kfree(mpcginfo);
+-
+ return;
+
+ }
+@@ -1192,10 +1190,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
+ CTCM_FUNTAIL, dev->name);
+ priv->stats.rx_dropped++;
+ /* mpcginfo only used for non-data transfers */
+- kfree(mpcginfo);
+ if (do_debug_data)
+ ctcmpc_dump_skb(pskb, -8);
+ }
++ kfree(mpcginfo);
+ }
+ done:
+
+@@ -1977,7 +1975,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg)
+ }
+ break;
+ }
+- kfree(mpcginfo);
+
+ CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n",
+ __func__, ch->id, grp->outstanding_xid2,
+@@ -2038,7 +2035,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg)
+ mpc_validate_xid(mpcginfo);
+ break;
+ }
+- kfree(mpcginfo);
+ return;
+ }
+
+diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c
+index ded1930a00b2d..e3813a7aa5e68 100644
+--- a/drivers/s390/net/ctcm_sysfs.c
++++ b/drivers/s390/net/ctcm_sysfs.c
+@@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev,
+ struct ctcm_priv *priv = dev_get_drvdata(dev);
+ int rc;
+
+- ndev = priv->channel[CTCM_READ]->netdev;
+- if (!(priv && priv->channel[CTCM_READ] && ndev)) {
++ if (!(priv && priv->channel[CTCM_READ] &&
++ priv->channel[CTCM_READ]->netdev)) {
+ CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev");
+ return -ENODEV;
+ }
++ ndev = priv->channel[CTCM_READ]->netdev;
+
+ rc = kstrtouint(buf, 0, &bs1);
+ if (rc)
+diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
+index 440219bcaa2be..7e743f4717a91 100644
+--- a/drivers/s390/net/lcs.c
++++ b/drivers/s390/net/lcs.c
+@@ -1518,9 +1518,8 @@ lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer)
+ /**
+ * Packet transmit function called by network stack
+ */
+-static int
+-__lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
+- struct net_device *dev)
++static netdev_tx_t __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
++ struct net_device *dev)
+ {
+ struct lcs_header *header;
+ int rc = NETDEV_TX_OK;
+@@ -1581,8 +1580,7 @@ out:
+ return rc;
+ }
+
+-static int
+-lcs_start_xmit(struct sk_buff *skb, struct net_device *dev)
++static netdev_tx_t lcs_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct lcs_card *card;
+ int rc;
+@@ -1735,10 +1733,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd)
+ lcs_schedule_recovery(card);
+ break;
+ case LCS_CMD_STOPLAN:
+- pr_warn("Stoplan for %s initiated by LGW\n",
+- card->dev->name);
+- if (card->dev)
++ if (card->dev) {
++ pr_warn("Stoplan for %s initiated by LGW\n",
++ card->dev->name);
+ netif_carrier_off(card->dev);
++ }
+ break;
+ default:
+ LCS_DBF_TEXT(5, trace, "noLGWcmd");
+diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
+index 5a0c2f07a3a25..ce5f0ffd6cc8d 100644
+--- a/drivers/s390/net/netiucv.c
++++ b/drivers/s390/net/netiucv.c
+@@ -1252,15 +1252,8 @@ static int netiucv_close(struct net_device *dev)
+ /**
+ * Start transmission of a packet.
+ * Called from generic network device layer.
+- *
+- * @param skb Pointer to buffer containing the packet.
+- * @param dev Pointer to interface struct.
+- *
+- * @return 0 if packet consumed, !0 if packet rejected.
+- * Note: If we return !0, then the packet is free'd by
+- * the generic network layer.
+ */
+-static int netiucv_tx(struct sk_buff *skb, struct net_device *dev)
++static netdev_tx_t netiucv_tx(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct netiucv_priv *privptr = netdev_priv(dev);
+ int rc;
+diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
+index a5aa0bdc61d69..e8c360879883b 100644
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -717,7 +717,6 @@ struct qeth_card_info {
+ u16 chid;
+ u8 ids_valid:1; /* cssid,iid,chid */
+ u8 dev_addr_is_registered:1;
+- u8 open_when_online:1;
+ u8 promisc_mode:1;
+ u8 use_v1_blkt:1;
+ u8 is_vm_nic:1;
+diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
+index e9807d2996a9d..62e7576bff536 100644
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -5459,8 +5459,6 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
+ qeth_clear_ipacmd_list(card);
+
+ rtnl_lock();
+- card->info.open_when_online = card->dev->flags & IFF_UP;
+- dev_close(card->dev);
+ netif_device_detach(card->dev);
+ netif_carrier_off(card->dev);
+ rtnl_unlock();
+diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
+index dc6c00768d919..7cdf3274cf964 100644
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -661,13 +661,13 @@ static void qeth_l2_dev2br_fdb_notify(struct qeth_card *card, u8 code,
+ card->dev, &info.info, NULL);
+ QETH_CARD_TEXT(card, 4, "andelmac");
+ QETH_CARD_TEXT_(card, 4,
+- "mc%012lx", ether_addr_to_u64(ntfy_mac));
++ "mc%012llx", ether_addr_to_u64(ntfy_mac));
+ } else {
+ call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
+ card->dev, &info.info, NULL);
+ QETH_CARD_TEXT(card, 4, "anaddmac");
+ QETH_CARD_TEXT_(card, 4,
+- "mc%012lx", ether_addr_to_u64(ntfy_mac));
++ "mc%012llx", ether_addr_to_u64(ntfy_mac));
+ }
+ }
+
+@@ -764,9 +764,8 @@ static void qeth_l2_br2dev_worker(struct work_struct *work)
+ struct list_head *iter;
+ int err = 0;
+
+- kfree(br2dev_event_work);
+- QETH_CARD_TEXT_(card, 4, "b2dw%04x", event);
+- QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr));
++ QETH_CARD_TEXT_(card, 4, "b2dw%04lx", event);
++ QETH_CARD_TEXT_(card, 4, "ma%012llx", ether_addr_to_u64(addr));
+
+ rcu_read_lock();
+ /* Verify preconditions are still valid: */
+@@ -795,7 +794,7 @@ static void qeth_l2_br2dev_worker(struct work_struct *work)
+ if (err) {
+ QETH_CARD_TEXT(card, 2, "b2derris");
+ QETH_CARD_TEXT_(card, 2,
+- "err%02x%03d", event,
++ "err%02lx%03d", event,
+ lowerdev->ifindex);
+ }
+ }
+@@ -813,7 +812,7 @@ static void qeth_l2_br2dev_worker(struct work_struct *work)
+ break;
+ }
+ if (err)
+- QETH_CARD_TEXT_(card, 2, "b2derr%02x", event);
++ QETH_CARD_TEXT_(card, 2, "b2derr%02lx", event);
+ }
+
+ unlock:
+@@ -821,6 +820,7 @@ unlock:
+ dev_put(brdev);
+ dev_put(lsyncdev);
+ dev_put(dstdev);
++ kfree(br2dev_event_work);
+ }
+
+ static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
+@@ -878,7 +878,7 @@ static int qeth_l2_switchdev_event(struct notifier_block *unused,
+ while (lowerdev) {
+ if (qeth_l2_must_learn(lowerdev, dstdev)) {
+ card = lowerdev->ml_priv;
+- QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event);
++ QETH_CARD_TEXT_(card, 4, "b2dqw%03lx", event);
+ rc = qeth_l2_br2dev_queue_work(brdev, lowerdev,
+ dstdev, event,
+ fdb_info->addr);
+@@ -2373,9 +2373,12 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
+ qeth_enable_hw_features(dev);
+ qeth_l2_enable_brport_features(card);
+
+- if (card->info.open_when_online) {
+- card->info.open_when_online = 0;
+- dev_open(dev, NULL);
++ if (netif_running(dev)) {
++ local_bh_disable();
++ napi_schedule(&card->napi);
++ /* kick-start the NAPI softirq: */
++ local_bh_enable();
++ qeth_l2_set_rx_mode(dev);
+ }
+ rtnl_unlock();
+ }
+diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
+index 6fd3e288f0595..93f55c7348026 100644
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2029,9 +2029,11 @@ static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok)
+ netif_device_attach(dev);
+ qeth_enable_hw_features(dev);
+
+- if (card->info.open_when_online) {
+- card->info.open_when_online = 0;
+- dev_open(dev, NULL);
++ if (netif_running(dev)) {
++ local_bh_disable();
++ napi_schedule(&card->napi);
++ /* kick-start the NAPI softirq: */
++ local_bh_enable();
+ }
+ rtnl_unlock();
+ }
+diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
+index 1082380b21f85..dba9b307020cb 100644
+--- a/drivers/s390/net/qeth_l3_sys.c
++++ b/drivers/s390/net/qeth_l3_sys.c
+@@ -652,7 +652,7 @@ static QETH_DEVICE_ATTR(vipa_add4, add4, 0644,
+ static ssize_t qeth_l3_dev_vipa_del4_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+ {
+- return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4);
++ return qeth_l3_vipa_store(dev, buf, false, count, QETH_PROT_IPV4);
+ }
+
+ static QETH_DEVICE_ATTR(vipa_del4, del4, 0200, NULL,
+diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
+index d24cafe02708f..d323f9985c482 100644
+--- a/drivers/s390/scsi/zfcp_fc.c
++++ b/drivers/s390/scsi/zfcp_fc.c
+@@ -145,27 +145,33 @@ void zfcp_fc_enqueue_event(struct zfcp_adapter *adapter,
+
+ static int zfcp_fc_wka_port_get(struct zfcp_fc_wka_port *wka_port)
+ {
++ int ret = -EIO;
++
+ if (mutex_lock_interruptible(&wka_port->mutex))
+ return -ERESTARTSYS;
+
+ if (wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE ||
+ wka_port->status == ZFCP_FC_WKA_PORT_CLOSING) {
+ wka_port->status = ZFCP_FC_WKA_PORT_OPENING;
+- if (zfcp_fsf_open_wka_port(wka_port))
++ if (zfcp_fsf_open_wka_port(wka_port)) {
++ /* could not even send request, nothing to wait for */
+ wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE;
++ goto out;
++ }
+ }
+
+- mutex_unlock(&wka_port->mutex);
+-
+- wait_event(wka_port->completion_wq,
++ wait_event(wka_port->opened,
+ wka_port->status == ZFCP_FC_WKA_PORT_ONLINE ||
+ wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE);
+
+ if (wka_port->status == ZFCP_FC_WKA_PORT_ONLINE) {
+ atomic_inc(&wka_port->refcount);
+- return 0;
++ ret = 0;
++ goto out;
+ }
+- return -EIO;
++out:
++ mutex_unlock(&wka_port->mutex);
++ return ret;
+ }
+
+ static void zfcp_fc_wka_port_offline(struct work_struct *work)
+@@ -181,9 +187,12 @@ static void zfcp_fc_wka_port_offline(struct work_struct *work)
+
+ wka_port->status = ZFCP_FC_WKA_PORT_CLOSING;
+ if (zfcp_fsf_close_wka_port(wka_port)) {
++ /* could not even send request, nothing to wait for */
+ wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE;
+- wake_up(&wka_port->completion_wq);
++ goto out;
+ }
++ wait_event(wka_port->closed,
++ wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE);
+ out:
+ mutex_unlock(&wka_port->mutex);
+ }
+@@ -193,13 +202,15 @@ static void zfcp_fc_wka_port_put(struct zfcp_fc_wka_port *wka_port)
+ if (atomic_dec_return(&wka_port->refcount) != 0)
+ return;
+ /* wait 10 milliseconds, other reqs might pop in */
+- schedule_delayed_work(&wka_port->work, HZ / 100);
++ queue_delayed_work(wka_port->adapter->work_queue, &wka_port->work,
++ msecs_to_jiffies(10));
+ }
+
+ static void zfcp_fc_wka_port_init(struct zfcp_fc_wka_port *wka_port, u32 d_id,
+ struct zfcp_adapter *adapter)
+ {
+- init_waitqueue_head(&wka_port->completion_wq);
++ init_waitqueue_head(&wka_port->opened);
++ init_waitqueue_head(&wka_port->closed);
+
+ wka_port->adapter = adapter;
+ wka_port->d_id = d_id;
+@@ -521,8 +532,9 @@ static void zfcp_fc_adisc_handler(void *data)
+ goto out;
+ }
+
+- /* port is good, unblock rport without going through erp */
+- zfcp_scsi_schedule_rport_register(port);
++ /* re-init to undo drop from zfcp_fc_adisc() */
++ port->d_id = ntoh24(adisc_resp->adisc_port_id);
++ /* port is still good, nothing to do */
+ out:
+ atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+ put_device(&port->dev);
+@@ -534,6 +546,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port)
+ struct zfcp_fc_req *fc_req;
+ struct zfcp_adapter *adapter = port->adapter;
+ struct Scsi_Host *shost = adapter->scsi_host;
++ u32 d_id;
+ int ret;
+
+ fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC);
+@@ -558,7 +571,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port)
+ fc_req->u.adisc.req.adisc_cmd = ELS_ADISC;
+ hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost));
+
+- ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els,
++ d_id = port->d_id; /* remember as destination for send els below */
++ /*
++ * Force fresh GID_PN lookup on next port recovery.
++ * Must happen after request setup and before sending request,
++ * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler().
++ */
++ port->d_id = 0;
++
++ ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els,
+ ZFCP_FC_CTELS_TMO);
+ if (ret)
+ kmem_cache_free(zfcp_fc_req_cache, fc_req);
+@@ -573,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work)
+ int retval;
+
+ set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */
+- get_device(&port->dev);
+- port->rport_task = RPORT_DEL;
+- zfcp_scsi_rport_work(&port->rport_work);
+
+ /* only issue one test command at one time per port */
+ if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
+diff --git a/drivers/s390/scsi/zfcp_fc.h b/drivers/s390/scsi/zfcp_fc.h
+index 8aaf409ce9cba..97755407ce1b5 100644
+--- a/drivers/s390/scsi/zfcp_fc.h
++++ b/drivers/s390/scsi/zfcp_fc.h
+@@ -185,7 +185,8 @@ enum zfcp_fc_wka_status {
+ /**
+ * struct zfcp_fc_wka_port - representation of well-known-address (WKA) FC port
+ * @adapter: Pointer to adapter structure this WKA port belongs to
+- * @completion_wq: Wait for completion of open/close command
++ * @opened: Wait for completion of open command
++ * @closed: Wait for completion of close command
+ * @status: Current status of WKA port
+ * @refcount: Reference count to keep port open as long as it is in use
+ * @d_id: FC destination id or well-known-address
+@@ -195,7 +196,8 @@ enum zfcp_fc_wka_status {
+ */
+ struct zfcp_fc_wka_port {
+ struct zfcp_adapter *adapter;
+- wait_queue_head_t completion_wq;
++ wait_queue_head_t opened;
++ wait_queue_head_t closed;
+ enum zfcp_fc_wka_status status;
+ atomic_t refcount;
+ u32 d_id;
+diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
+index c1f979296c1a3..e37e1cd1d67f6 100644
+--- a/drivers/s390/scsi/zfcp_fsf.c
++++ b/drivers/s390/scsi/zfcp_fsf.c
+@@ -884,7 +884,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
+ const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req);
+ struct zfcp_adapter *adapter = req->adapter;
+ struct zfcp_qdio *qdio = adapter->qdio;
+- int req_id = req->req_id;
++ unsigned long req_id = req->req_id;
+
+ zfcp_reqlist_add(adapter->req_list, req);
+
+@@ -1907,7 +1907,7 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req)
+ wka_port->status = ZFCP_FC_WKA_PORT_ONLINE;
+ }
+ out:
+- wake_up(&wka_port->completion_wq);
++ wake_up(&wka_port->opened);
+ }
+
+ /**
+@@ -1966,7 +1966,7 @@ static void zfcp_fsf_close_wka_port_handler(struct zfcp_fsf_req *req)
+ }
+
+ wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE;
+- wake_up(&wka_port->completion_wq);
++ wake_up(&wka_port->closed);
+ }
+
+ /**
+diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
+index e41cc354cc8af..6da591508f238 100644
+--- a/drivers/scsi/3w-9xxx.c
++++ b/drivers/scsi/3w-9xxx.c
+@@ -2006,7 +2006,7 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
+ retval = pci_enable_device(pdev);
+ if (retval) {
+ TW_PRINTK(host, TW_DRIVER, 0x34, "Failed to enable pci device");
+- goto out_disable_device;
++ return -ENODEV;
+ }
+
+ pci_set_master(pdev);
+diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
+index 4ee485ab27148..678c8ca4f699c 100644
+--- a/drivers/scsi/3w-xxxx.c
++++ b/drivers/scsi/3w-xxxx.c
+@@ -2305,8 +2305,10 @@ static int tw_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
+ TW_DISABLE_INTERRUPTS(tw_dev);
+
+ /* Initialize the card */
+- if (tw_reset_sequence(tw_dev))
++ if (tw_reset_sequence(tw_dev)) {
++ retval = -EINVAL;
+ goto out_release_mem_region;
++ }
+
+ /* Set host specific parameters */
+ host->max_id = TW_MAX_UNITS;
+diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
+index a12e3525977d8..2f810dac7b8bf 100644
+--- a/drivers/scsi/53c700.c
++++ b/drivers/scsi/53c700.c
+@@ -1599,7 +1599,7 @@ NCR_700_intr(int irq, void *dev_id)
+ printk("scsi%d (%d:%d) PHASE MISMATCH IN SEND MESSAGE %d remain, return %p[%04x], phase %s\n", host->host_no, pun, lun, count, (void *)temp, temp - hostdata->pScript, sbcl_to_string(NCR_700_readb(host, SBCL_REG)));
+ #endif
+ resume_offset = hostdata->pScript + Ent_SendMessagePhaseMismatch;
+- } else if(dsp >= to32bit(&slot->pSG[0].ins) &&
++ } else if (slot && dsp >= to32bit(&slot->pSG[0].ins) &&
+ dsp <= to32bit(&slot->pSG[NCR_700_SG_SEGMENTS].ins)) {
+ int data_transfer = NCR_700_readl(host, DBC_REG) & 0xffffff;
+ int SGcount = (dsp - to32bit(&slot->pSG[0].ins))/sizeof(struct NCR_700_SG_List);
+diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
+index 6e3a04107bb65..eea574e89b872 100644
+--- a/drivers/scsi/Kconfig
++++ b/drivers/scsi/Kconfig
+@@ -460,7 +460,7 @@ config SCSI_MVUMI
+
+ config SCSI_DPT_I2O
+ tristate "Adaptec I2O RAID support "
+- depends on SCSI && PCI && VIRT_TO_BUS
++ depends on SCSI && PCI
+ help
+ This driver supports all of Adaptec's I2O based RAID controllers as
+ well as the DPT SmartRaid V cards. This is an Adaptec maintained
+diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
+index ffb3919675734..97be0765cb9bd 100644
+--- a/drivers/scsi/advansys.c
++++ b/drivers/scsi/advansys.c
+@@ -3308,8 +3308,8 @@ static void asc_prt_adv_board_info(struct seq_file *m, struct Scsi_Host *shost)
+ shost->host_no);
+
+ seq_printf(m,
+- " iop_base 0x%lx, cable_detect: %X, err_code %u\n",
+- (unsigned long)v->iop_base,
++ " iop_base 0x%p, cable_detect: %X, err_code %u\n",
++ v->iop_base,
+ AdvReadWordRegister(iop_base,IOPW_SCSI_CFG1) & CABLE_DETECT,
+ v->err_code);
+
+diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
+index b13b5c85f3de0..75a5a4765f425 100644
+--- a/drivers/scsi/aha152x.c
++++ b/drivers/scsi/aha152x.c
+@@ -3370,13 +3370,11 @@ static int __init aha152x_setup(char *str)
+ setup[setup_count].synchronous = ints[0] >= 6 ? ints[6] : 1;
+ setup[setup_count].delay = ints[0] >= 7 ? ints[7] : DELAY_DEFAULT;
+ setup[setup_count].ext_trans = ints[0] >= 8 ? ints[8] : 0;
+- if (ints[0] > 8) { /*}*/
++ if (ints[0] > 8)
+ printk(KERN_NOTICE "aha152x: usage: aha152x=<IOBASE>[,<IRQ>[,<SCSI ID>"
+ "[,<RECONNECT>[,<PARITY>[,<SYNCHRONOUS>[,<DELAY>[,<EXT_TRANS>]]]]]]]\n");
+- } else {
++ else
+ setup_count++;
+- return 0;
+- }
+
+ return 1;
+ }
+diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
+index c6b63eae28f51..ce48f34f412f0 100644
+--- a/drivers/scsi/aic94xx/aic94xx_task.c
++++ b/drivers/scsi/aic94xx/aic94xx_task.c
+@@ -50,6 +50,9 @@ static int asd_map_scatterlist(struct sas_task *task,
+ dma_addr_t dma = dma_map_single(&asd_ha->pcidev->dev, p,
+ task->total_xfer_len,
+ task->data_dir);
++ if (dma_mapping_error(&asd_ha->pcidev->dev, dma))
++ return -ENOMEM;
++
+ sg_arr[0].bus_addr = cpu_to_le64((u64)dma);
+ sg_arr[0].size = cpu_to_le32(task->total_xfer_len);
+ sg_arr[0].flags |= ASD_SG_EL_LIST_EOL;
+diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
+index 8aeaddc93b167..8d374ae863ba2 100644
+--- a/drivers/scsi/be2iscsi/be_iscsi.c
++++ b/drivers/scsi/be2iscsi/be_iscsi.c
+@@ -450,6 +450,10 @@ int beiscsi_iface_set_param(struct Scsi_Host *shost,
+ }
+
+ nla_for_each_attr(attrib, data, dt_len, rm_len) {
++ /* ignore nla_type as it is never used */
++ if (nla_len(attrib) < sizeof(*iface_param))
++ return -EINVAL;
++
+ iface_param = nla_data(attrib);
+
+ if (iface_param->param_type != ISCSI_NET_PARAM)
+diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
+index e70f69f791db6..7974c1326d461 100644
+--- a/drivers/scsi/be2iscsi/be_main.c
++++ b/drivers/scsi/be2iscsi/be_main.c
+@@ -5741,7 +5741,7 @@ static void beiscsi_remove(struct pci_dev *pcidev)
+ cancel_work_sync(&phba->sess_work);
+
+ beiscsi_iface_destroy_default(phba);
+- iscsi_host_remove(phba->shost);
++ iscsi_host_remove(phba->shost, false);
+ beiscsi_disable_port(phba, 1);
+
+ /* after cancelling boot_work */
+diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c
+index 5ae1e3f789101..e049cdb3c286c 100644
+--- a/drivers/scsi/bfa/bfad_attr.c
++++ b/drivers/scsi/bfa/bfad_attr.c
+@@ -711,7 +711,7 @@ bfad_im_serial_num_show(struct device *dev, struct device_attribute *attr,
+ char serial_num[BFA_ADAPTER_SERIAL_NUM_LEN];
+
+ bfa_get_adapter_serial_num(&bfad->bfa, serial_num);
+- return snprintf(buf, PAGE_SIZE, "%s\n", serial_num);
++ return sysfs_emit(buf, "%s\n", serial_num);
+ }
+
+ static ssize_t
+@@ -725,7 +725,7 @@ bfad_im_model_show(struct device *dev, struct device_attribute *attr,
+ char model[BFA_ADAPTER_MODEL_NAME_LEN];
+
+ bfa_get_adapter_model(&bfad->bfa, model);
+- return snprintf(buf, PAGE_SIZE, "%s\n", model);
++ return sysfs_emit(buf, "%s\n", model);
+ }
+
+ static ssize_t
+@@ -805,7 +805,7 @@ bfad_im_model_desc_show(struct device *dev, struct device_attribute *attr,
+ snprintf(model_descr, BFA_ADAPTER_MODEL_DESCR_LEN,
+ "Invalid Model");
+
+- return snprintf(buf, PAGE_SIZE, "%s\n", model_descr);
++ return sysfs_emit(buf, "%s\n", model_descr);
+ }
+
+ static ssize_t
+@@ -819,7 +819,7 @@ bfad_im_node_name_show(struct device *dev, struct device_attribute *attr,
+ u64 nwwn;
+
+ nwwn = bfa_fcs_lport_get_nwwn(port->fcs_port);
+- return snprintf(buf, PAGE_SIZE, "0x%llx\n", cpu_to_be64(nwwn));
++ return sysfs_emit(buf, "0x%llx\n", cpu_to_be64(nwwn));
+ }
+
+ static ssize_t
+@@ -836,7 +836,7 @@ bfad_im_symbolic_name_show(struct device *dev, struct device_attribute *attr,
+ bfa_fcs_lport_get_attr(&bfad->bfa_fcs.fabric.bport, &port_attr);
+ strlcpy(symname, port_attr.port_cfg.sym_name.symname,
+ BFA_SYMNAME_MAXLEN);
+- return snprintf(buf, PAGE_SIZE, "%s\n", symname);
++ return sysfs_emit(buf, "%s\n", symname);
+ }
+
+ static ssize_t
+@@ -850,14 +850,14 @@ bfad_im_hw_version_show(struct device *dev, struct device_attribute *attr,
+ char hw_ver[BFA_VERSION_LEN];
+
+ bfa_get_pci_chip_rev(&bfad->bfa, hw_ver);
+- return snprintf(buf, PAGE_SIZE, "%s\n", hw_ver);
++ return sysfs_emit(buf, "%s\n", hw_ver);
+ }
+
+ static ssize_t
+ bfad_im_drv_version_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+- return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_VERSION);
++ return sysfs_emit(buf, "%s\n", BFAD_DRIVER_VERSION);
+ }
+
+ static ssize_t
+@@ -871,7 +871,7 @@ bfad_im_optionrom_version_show(struct device *dev,
+ char optrom_ver[BFA_VERSION_LEN];
+
+ bfa_get_adapter_optrom_ver(&bfad->bfa, optrom_ver);
+- return snprintf(buf, PAGE_SIZE, "%s\n", optrom_ver);
++ return sysfs_emit(buf, "%s\n", optrom_ver);
+ }
+
+ static ssize_t
+@@ -885,7 +885,7 @@ bfad_im_fw_version_show(struct device *dev, struct device_attribute *attr,
+ char fw_ver[BFA_VERSION_LEN];
+
+ bfa_get_adapter_fw_ver(&bfad->bfa, fw_ver);
+- return snprintf(buf, PAGE_SIZE, "%s\n", fw_ver);
++ return sysfs_emit(buf, "%s\n", fw_ver);
+ }
+
+ static ssize_t
+@@ -897,7 +897,7 @@ bfad_im_num_of_ports_show(struct device *dev, struct device_attribute *attr,
+ (struct bfad_im_port_s *) shost->hostdata[0];
+ struct bfad_s *bfad = im_port->bfad;
+
+- return snprintf(buf, PAGE_SIZE, "%d\n",
++ return sysfs_emit(buf, "%d\n",
+ bfa_get_nports(&bfad->bfa));
+ }
+
+@@ -905,7 +905,7 @@ static ssize_t
+ bfad_im_drv_name_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+- return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_NAME);
++ return sysfs_emit(buf, "%s\n", BFAD_DRIVER_NAME);
+ }
+
+ static ssize_t
+@@ -924,14 +924,14 @@ bfad_im_num_of_discovered_ports_show(struct device *dev,
+ rports = kcalloc(nrports, sizeof(struct bfa_rport_qualifier_s),
+ GFP_ATOMIC);
+ if (rports == NULL)
+- return snprintf(buf, PAGE_SIZE, "Failed\n");
++ return sysfs_emit(buf, "Failed\n");
+
+ spin_lock_irqsave(&bfad->bfad_lock, flags);
+ bfa_fcs_lport_get_rport_quals(port->fcs_port, rports, &nrports);
+ spin_unlock_irqrestore(&bfad->bfad_lock, flags);
+ kfree(rports);
+
+- return snprintf(buf, PAGE_SIZE, "%d\n", nrports);
++ return sysfs_emit(buf, "%d\n", nrports);
+ }
+
+ static DEVICE_ATTR(serial_number, S_IRUGO,
+diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+index 8863a74e6c57d..e2586472ecad4 100644
+--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
++++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+@@ -82,7 +82,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba);
+ static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba);
+ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface,
+ struct device *parent, int npiv);
+-static void bnx2fc_destroy_work(struct work_struct *work);
++static void bnx2fc_port_destroy(struct fcoe_port *port);
+
+ static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev);
+ static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device
+@@ -508,7 +508,8 @@ static int bnx2fc_l2_rcv_thread(void *arg)
+
+ static void bnx2fc_recv_frame(struct sk_buff *skb)
+ {
+- u32 fr_len;
++ u64 crc_err;
++ u32 fr_len, fr_crc;
+ struct fc_lport *lport;
+ struct fcoe_rcv_info *fr;
+ struct fc_stats *stats;
+@@ -542,6 +543,11 @@ static void bnx2fc_recv_frame(struct sk_buff *skb)
+ skb_pull(skb, sizeof(struct fcoe_hdr));
+ fr_len = skb->len - sizeof(struct fcoe_crc_eof);
+
++ stats = per_cpu_ptr(lport->stats, get_cpu());
++ stats->RxFrames++;
++ stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
++ put_cpu();
++
+ fp = (struct fc_frame *)skb;
+ fc_frame_init(fp);
+ fr_dev(fp) = lport;
+@@ -624,16 +630,15 @@ static void bnx2fc_recv_frame(struct sk_buff *skb)
+ return;
+ }
+
+- stats = per_cpu_ptr(lport->stats, smp_processor_id());
+- stats->RxFrames++;
+- stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
++ fr_crc = le32_to_cpu(fr_crc(fp));
+
+- if (le32_to_cpu(fr_crc(fp)) !=
+- ~crc32(~0, skb->data, fr_len)) {
+- if (stats->InvalidCRCCount < 5)
++ if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) {
++ stats = per_cpu_ptr(lport->stats, get_cpu());
++ crc_err = (stats->InvalidCRCCount++);
++ put_cpu();
++ if (crc_err < 5)
+ printk(KERN_WARNING PFX "dropping frame with "
+ "CRC error\n");
+- stats->InvalidCRCCount++;
+ kfree_skb(skb);
+ return;
+ }
+@@ -907,9 +912,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event,
+ __bnx2fc_destroy(interface);
+ }
+ mutex_unlock(&bnx2fc_dev_lock);
+-
+- /* Ensure ALL destroy work has been completed before return */
+- flush_workqueue(bnx2fc_wq);
+ return;
+
+ default:
+@@ -1215,8 +1217,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport)
+ mutex_unlock(&n_port->lp_mutex);
+ bnx2fc_free_vport(interface->hba, port->lport);
+ bnx2fc_port_shutdown(port->lport);
++ bnx2fc_port_destroy(port);
+ bnx2fc_interface_put(interface);
+- queue_work(bnx2fc_wq, &port->destroy_work);
+ return 0;
+ }
+
+@@ -1525,7 +1527,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface,
+ port->lport = lport;
+ port->priv = interface;
+ port->get_netdev = bnx2fc_netdev;
+- INIT_WORK(&port->destroy_work, bnx2fc_destroy_work);
+
+ /* Configure fcoe_port */
+ rc = bnx2fc_lport_config(lport);
+@@ -1653,8 +1654,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface)
+ bnx2fc_interface_cleanup(interface);
+ bnx2fc_stop(interface);
+ list_del(&interface->list);
++ bnx2fc_port_destroy(port);
+ bnx2fc_interface_put(interface);
+- queue_work(bnx2fc_wq, &port->destroy_work);
+ }
+
+ /**
+@@ -1694,15 +1695,12 @@ netdev_err:
+ return rc;
+ }
+
+-static void bnx2fc_destroy_work(struct work_struct *work)
++static void bnx2fc_port_destroy(struct fcoe_port *port)
+ {
+- struct fcoe_port *port;
+ struct fc_lport *lport;
+
+- port = container_of(work, struct fcoe_port, destroy_work);
+ lport = port->lport;
+-
+- BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n");
++ BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport);
+
+ bnx2fc_if_destroy(lport);
+ }
+@@ -2556,9 +2554,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev)
+ __bnx2fc_destroy(interface);
+ mutex_unlock(&bnx2fc_dev_lock);
+
+- /* Ensure ALL destroy work has been completed before return */
+- flush_workqueue(bnx2fc_wq);
+-
+ bnx2fc_ulp_stop(hba);
+ /* unregister cnic device */
+ if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic))
+diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
+index 5521469ce678b..e16327a4b4c96 100644
+--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
++++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
+@@ -1977,7 +1977,7 @@ static int bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
+ if (nopin->cq_req_sn != qp->cqe_exp_seq_sn)
+ break;
+
+- if (unlikely(test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx))) {
++ if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) {
+ if (nopin->op_code == ISCSI_OP_NOOP_IN &&
+ nopin->itt == (u16) RESERVED_ITT) {
+ printk(KERN_ALERT "bnx2i: Unsolicited "
+diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
+index 1b5f3e143f071..85b5aca4b4977 100644
+--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
++++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
+@@ -909,7 +909,7 @@ void bnx2i_free_hba(struct bnx2i_hba *hba)
+ {
+ struct Scsi_Host *shost = hba->shost;
+
+- iscsi_host_remove(shost);
++ iscsi_host_remove(shost, false);
+ INIT_LIST_HEAD(&hba->ep_ofld_list);
+ INIT_LIST_HEAD(&hba->ep_active_list);
+ INIT_LIST_HEAD(&hba->ep_destroy_list);
+@@ -1721,7 +1721,7 @@ static int bnx2i_tear_down_conn(struct bnx2i_hba *hba,
+ struct iscsi_conn *conn = ep->conn->cls_conn->dd_data;
+
+ /* Must suspend all rx queue activity for this ep */
+- set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
++ set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags);
+ }
+ /* CONN_DISCONNECT timeout may or may not be an issue depending
+ * on what transcribed in TCP layer, different targets behave
+diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c
+index dc98f51f466fb..d5ac938970232 100644
+--- a/drivers/scsi/csiostor/csio_lnode.c
++++ b/drivers/scsi/csiostor/csio_lnode.c
+@@ -619,7 +619,7 @@ csio_ln_vnp_read_cbfn(struct csio_hw *hw, struct csio_mb *mbp)
+ struct fc_els_csp *csp;
+ struct fc_els_cssp *clsp;
+ enum fw_retval retval;
+- __be32 nport_id;
++ __be32 nport_id = 0;
+
+ retval = FW_CMD_RETVAL_G(ntohl(rsp->alloc_to_len16));
+ if (retval != FW_SUCCESS) {
+diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
+index 8c7d4dda4cf29..af281e271f886 100644
+--- a/drivers/scsi/cxgbi/libcxgbi.c
++++ b/drivers/scsi/cxgbi/libcxgbi.c
+@@ -328,7 +328,7 @@ void cxgbi_hbas_remove(struct cxgbi_device *cdev)
+ chba = cdev->hbas[i];
+ if (chba) {
+ cdev->hbas[i] = NULL;
+- iscsi_host_remove(chba->shost);
++ iscsi_host_remove(chba->shost, false);
+ pci_dev_put(cdev->pdev);
+ iscsi_host_free(chba->shost);
+ }
+@@ -1455,7 +1455,7 @@ void cxgbi_conn_tx_open(struct cxgbi_sock *csk)
+ if (conn) {
+ log_debug(1 << CXGBI_DBG_SOCK,
+ "csk 0x%p, cid %d.\n", csk, conn->id);
+- iscsi_conn_queue_work(conn);
++ iscsi_conn_queue_xmit(conn);
+ }
+ }
+ EXPORT_SYMBOL_GPL(cxgbi_conn_tx_open);
+@@ -1634,11 +1634,11 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
+ log_debug(1 << CXGBI_DBG_PDU_RX,
+ "csk 0x%p, conn 0x%p.\n", csk, conn);
+
+- if (unlikely(!conn || conn->suspend_rx)) {
++ if (unlikely(!conn || test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) {
+ log_debug(1 << CXGBI_DBG_PDU_RX,
+- "csk 0x%p, conn 0x%p, id %d, suspend_rx %lu!\n",
++ "csk 0x%p, conn 0x%p, id %d, conn flags 0x%lx!\n",
+ csk, conn, conn ? conn->id : 0xFF,
+- conn ? conn->suspend_rx : 0xFF);
++ conn ? conn->flags : 0xFF);
+ return;
+ }
+
+diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
+index 24c7cefb0b78a..d5623253826f1 100644
+--- a/drivers/scsi/dc395x.c
++++ b/drivers/scsi/dc395x.c
+@@ -3590,10 +3590,19 @@ static struct DeviceCtlBlk *device_alloc(struct AdapterCtlBlk *acb,
+ #endif
+ if (dcb->target_lun != 0) {
+ /* Copy settings */
+- struct DeviceCtlBlk *p;
+- list_for_each_entry(p, &acb->dcb_list, list)
+- if (p->target_id == dcb->target_id)
++ struct DeviceCtlBlk *p = NULL, *iter;
++
++ list_for_each_entry(iter, &acb->dcb_list, list)
++ if (iter->target_id == dcb->target_id) {
++ p = iter;
+ break;
++ }
++
++ if (!p) {
++ kfree(dcb);
++ return NULL;
++ }
++
+ dprintkdbg(DBG_1,
+ "device_alloc: <%02i-%i> copy from <%02i-%i>\n",
+ dcb->target_id, dcb->target_lun,
+@@ -4618,6 +4627,7 @@ static int dc395x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+ /* initialise the adapter and everything we need */
+ if (adapter_init(acb, io_port_base, io_port_len, irq)) {
+ dprintkl(KERN_INFO, "adapter init failed\n");
++ acb = NULL;
+ goto fail;
+ }
+
+diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
+index 37d06f993b761..a9c4a5e2ccb90 100644
+--- a/drivers/scsi/device_handler/scsi_dh_alua.c
++++ b/drivers/scsi/device_handler/scsi_dh_alua.c
+@@ -1117,10 +1117,12 @@ static int alua_activate(struct scsi_device *sdev,
+ rcu_read_unlock();
+ mutex_unlock(&h->init_mutex);
+
+- if (alua_rtpg_queue(pg, sdev, qdata, true))
++ if (alua_rtpg_queue(pg, sdev, qdata, true)) {
+ fn = NULL;
+- else
++ } else {
++ kfree(qdata);
+ err = SCSI_DH_DEV_OFFLINED;
++ }
+ kref_put(&pg->kref, release_port_group);
+ out:
+ if (fn)
+@@ -1172,9 +1174,8 @@ static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
+ case SCSI_ACCESS_STATE_OPTIMAL:
+ case SCSI_ACCESS_STATE_ACTIVE:
+ case SCSI_ACCESS_STATE_LBA:
+- return BLK_STS_OK;
+ case SCSI_ACCESS_STATE_TRANSITIONING:
+- return BLK_STS_AGAIN;
++ return BLK_STS_OK;
+ default:
+ req->rq_flags |= RQF_QUIET;
+ return BLK_STS_IOERR;
+diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
+index 7af96d14c9bca..f1e05d12bc528 100644
+--- a/drivers/scsi/dpt_i2o.c
++++ b/drivers/scsi/dpt_i2o.c
+@@ -56,7 +56,7 @@ MODULE_DESCRIPTION("Adaptec I2O RAID Driver");
+ #include <linux/mutex.h>
+
+ #include <asm/processor.h> /* for boot_cpu_data */
+-#include <asm/io.h> /* for virt_to_bus, etc. */
++#include <asm/io.h>
+
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+@@ -582,51 +582,6 @@ static int adpt_show_info(struct seq_file *m, struct Scsi_Host *host)
+ return 0;
+ }
+
+-/*
+- * Turn a pointer to ioctl reply data into an u32 'context'
+- */
+-static u32 adpt_ioctl_to_context(adpt_hba * pHba, void *reply)
+-{
+-#if BITS_PER_LONG == 32
+- return (u32)(unsigned long)reply;
+-#else
+- ulong flags = 0;
+- u32 nr, i;
+-
+- spin_lock_irqsave(pHba->host->host_lock, flags);
+- nr = ARRAY_SIZE(pHba->ioctl_reply_context);
+- for (i = 0; i < nr; i++) {
+- if (pHba->ioctl_reply_context[i] == NULL) {
+- pHba->ioctl_reply_context[i] = reply;
+- break;
+- }
+- }
+- spin_unlock_irqrestore(pHba->host->host_lock, flags);
+- if (i >= nr) {
+- printk(KERN_WARNING"%s: Too many outstanding "
+- "ioctl commands\n", pHba->name);
+- return (u32)-1;
+- }
+-
+- return i;
+-#endif
+-}
+-
+-/*
+- * Go from an u32 'context' to a pointer to ioctl reply data.
+- */
+-static void *adpt_ioctl_from_context(adpt_hba *pHba, u32 context)
+-{
+-#if BITS_PER_LONG == 32
+- return (void *)(unsigned long)context;
+-#else
+- void *p = pHba->ioctl_reply_context[context];
+- pHba->ioctl_reply_context[context] = NULL;
+-
+- return p;
+-#endif
+-}
+-
+ /*===========================================================================
+ * Error Handling routines
+ *===========================================================================
+@@ -1648,208 +1603,6 @@ static int adpt_close(struct inode *inode, struct file *file)
+ return 0;
+ }
+
+-
+-static int adpt_i2o_passthru(adpt_hba* pHba, u32 __user *arg)
+-{
+- u32 msg[MAX_MESSAGE_SIZE];
+- u32* reply = NULL;
+- u32 size = 0;
+- u32 reply_size = 0;
+- u32 __user *user_msg = arg;
+- u32 __user * user_reply = NULL;
+- void **sg_list = NULL;
+- u32 sg_offset = 0;
+- u32 sg_count = 0;
+- int sg_index = 0;
+- u32 i = 0;
+- u32 rcode = 0;
+- void *p = NULL;
+- dma_addr_t addr;
+- ulong flags = 0;
+-
+- memset(&msg, 0, MAX_MESSAGE_SIZE*4);
+- // get user msg size in u32s
+- if(get_user(size, &user_msg[0])){
+- return -EFAULT;
+- }
+- size = size>>16;
+-
+- user_reply = &user_msg[size];
+- if(size > MAX_MESSAGE_SIZE){
+- return -EFAULT;
+- }
+- size *= 4; // Convert to bytes
+-
+- /* Copy in the user's I2O command */
+- if(copy_from_user(msg, user_msg, size)) {
+- return -EFAULT;
+- }
+- get_user(reply_size, &user_reply[0]);
+- reply_size = reply_size>>16;
+- if(reply_size > REPLY_FRAME_SIZE){
+- reply_size = REPLY_FRAME_SIZE;
+- }
+- reply_size *= 4;
+- reply = kzalloc(REPLY_FRAME_SIZE*4, GFP_KERNEL);
+- if(reply == NULL) {
+- printk(KERN_WARNING"%s: Could not allocate reply buffer\n",pHba->name);
+- return -ENOMEM;
+- }
+- sg_offset = (msg[0]>>4)&0xf;
+- msg[2] = 0x40000000; // IOCTL context
+- msg[3] = adpt_ioctl_to_context(pHba, reply);
+- if (msg[3] == (u32)-1) {
+- rcode = -EBUSY;
+- goto free;
+- }
+-
+- sg_list = kcalloc(pHba->sg_tablesize, sizeof(*sg_list), GFP_KERNEL);
+- if (!sg_list) {
+- rcode = -ENOMEM;
+- goto free;
+- }
+- if(sg_offset) {
+- // TODO add 64 bit API
+- struct sg_simple_element *sg = (struct sg_simple_element*) (msg+sg_offset);
+- sg_count = (size - sg_offset*4) / sizeof(struct sg_simple_element);
+- if (sg_count > pHba->sg_tablesize){
+- printk(KERN_DEBUG"%s:IOCTL SG List too large (%u)\n", pHba->name,sg_count);
+- rcode = -EINVAL;
+- goto free;
+- }
+-
+- for(i = 0; i < sg_count; i++) {
+- int sg_size;
+-
+- if (!(sg[i].flag_count & 0x10000000 /*I2O_SGL_FLAGS_SIMPLE_ADDRESS_ELEMENT*/)) {
+- printk(KERN_DEBUG"%s:Bad SG element %d - not simple (%x)\n",pHba->name,i, sg[i].flag_count);
+- rcode = -EINVAL;
+- goto cleanup;
+- }
+- sg_size = sg[i].flag_count & 0xffffff;
+- /* Allocate memory for the transfer */
+- p = dma_alloc_coherent(&pHba->pDev->dev, sg_size, &addr, GFP_KERNEL);
+- if(!p) {
+- printk(KERN_DEBUG"%s: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
+- pHba->name,sg_size,i,sg_count);
+- rcode = -ENOMEM;
+- goto cleanup;
+- }
+- sg_list[sg_index++] = p; // sglist indexed with input frame, not our internal frame.
+- /* Copy in the user's SG buffer if necessary */
+- if(sg[i].flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR*/) {
+- // sg_simple_element API is 32 bit
+- if (copy_from_user(p,(void __user *)(ulong)sg[i].addr_bus, sg_size)) {
+- printk(KERN_DEBUG"%s: Could not copy SG buf %d FROM user\n",pHba->name,i);
+- rcode = -EFAULT;
+- goto cleanup;
+- }
+- }
+- /* sg_simple_element API is 32 bit, but addr < 4GB */
+- sg[i].addr_bus = addr;
+- }
+- }
+-
+- do {
+- /*
+- * Stop any new commands from enterring the
+- * controller while processing the ioctl
+- */
+- if (pHba->host) {
+- scsi_block_requests(pHba->host);
+- spin_lock_irqsave(pHba->host->host_lock, flags);
+- }
+- rcode = adpt_i2o_post_wait(pHba, msg, size, FOREVER);
+- if (rcode != 0)
+- printk("adpt_i2o_passthru: post wait failed %d %p\n",
+- rcode, reply);
+- if (pHba->host) {
+- spin_unlock_irqrestore(pHba->host->host_lock, flags);
+- scsi_unblock_requests(pHba->host);
+- }
+- } while (rcode == -ETIMEDOUT);
+-
+- if(rcode){
+- goto cleanup;
+- }
+-
+- if(sg_offset) {
+- /* Copy back the Scatter Gather buffers back to user space */
+- u32 j;
+- // TODO add 64 bit API
+- struct sg_simple_element* sg;
+- int sg_size;
+-
+- // re-acquire the original message to handle correctly the sg copy operation
+- memset(&msg, 0, MAX_MESSAGE_SIZE*4);
+- // get user msg size in u32s
+- if(get_user(size, &user_msg[0])){
+- rcode = -EFAULT;
+- goto cleanup;
+- }
+- size = size>>16;
+- size *= 4;
+- if (size > MAX_MESSAGE_SIZE) {
+- rcode = -EINVAL;
+- goto cleanup;
+- }
+- /* Copy in the user's I2O command */
+- if (copy_from_user (msg, user_msg, size)) {
+- rcode = -EFAULT;
+- goto cleanup;
+- }
+- sg_count = (size - sg_offset*4) / sizeof(struct sg_simple_element);
+-
+- // TODO add 64 bit API
+- sg = (struct sg_simple_element*)(msg + sg_offset);
+- for (j = 0; j < sg_count; j++) {
+- /* Copy out the SG list to user's buffer if necessary */
+- if(! (sg[j].flag_count & 0x4000000 /*I2O_SGL_FLAGS_DIR*/)) {
+- sg_size = sg[j].flag_count & 0xffffff;
+- // sg_simple_element API is 32 bit
+- if (copy_to_user((void __user *)(ulong)sg[j].addr_bus,sg_list[j], sg_size)) {
+- printk(KERN_WARNING"%s: Could not copy %p TO user %x\n",pHba->name, sg_list[j], sg[j].addr_bus);
+- rcode = -EFAULT;
+- goto cleanup;
+- }
+- }
+- }
+- }
+-
+- /* Copy back the reply to user space */
+- if (reply_size) {
+- // we wrote our own values for context - now restore the user supplied ones
+- if(copy_from_user(reply+2, user_msg+2, sizeof(u32)*2)) {
+- printk(KERN_WARNING"%s: Could not copy message context FROM user\n",pHba->name);
+- rcode = -EFAULT;
+- }
+- if(copy_to_user(user_reply, reply, reply_size)) {
+- printk(KERN_WARNING"%s: Could not copy reply TO user\n",pHba->name);
+- rcode = -EFAULT;
+- }
+- }
+-
+-
+-cleanup:
+- if (rcode != -ETIME && rcode != -EINTR) {
+- struct sg_simple_element *sg =
+- (struct sg_simple_element*) (msg +sg_offset);
+- while(sg_index) {
+- if(sg_list[--sg_index]) {
+- dma_free_coherent(&pHba->pDev->dev,
+- sg[sg_index].flag_count & 0xffffff,
+- sg_list[sg_index],
+- sg[sg_index].addr_bus);
+- }
+- }
+- }
+-
+-free:
+- kfree(sg_list);
+- kfree(reply);
+- return rcode;
+-}
+-
+ #if defined __ia64__
+ static void adpt_ia64_info(sysInfo_S* si)
+ {
+@@ -1976,8 +1729,6 @@ static int adpt_ioctl(struct inode *inode, struct file *file, uint cmd, ulong ar
+ return -EFAULT;
+ }
+ break;
+- case I2OUSRCMD:
+- return adpt_i2o_passthru(pHba, argp);
+
+ case DPT_CTRLINFO:{
+ drvrHBAinfo_S HbaInfo;
+@@ -2114,7 +1865,7 @@ static irqreturn_t adpt_isr(int irq, void *dev_id)
+ } else {
+ /* Ick, we should *never* be here */
+ printk(KERN_ERR "dpti: reply frame not from pool\n");
+- reply = (u8 *)bus_to_virt(m);
++ continue;
+ }
+
+ if (readl(reply) & MSG_FAIL) {
+@@ -2134,13 +1885,6 @@ static irqreturn_t adpt_isr(int irq, void *dev_id)
+ adpt_send_nop(pHba, old_m);
+ }
+ context = readl(reply+8);
+- if(context & 0x40000000){ // IOCTL
+- void *p = adpt_ioctl_from_context(pHba, readl(reply+12));
+- if( p != NULL) {
+- memcpy_fromio(p, reply, REPLY_FRAME_SIZE * 4);
+- }
+- // All IOCTLs will also be post wait
+- }
+ if(context & 0x80000000){ // Post wait message
+ status = readl(reply+16);
+ if(status >> 24){
+@@ -2148,16 +1892,14 @@ static irqreturn_t adpt_isr(int irq, void *dev_id)
+ } else {
+ status = I2O_POST_WAIT_OK;
+ }
+- if(!(context & 0x40000000)) {
+- /*
+- * The request tag is one less than the command tag
+- * as the firmware might treat a 0 tag as invalid
+- */
+- cmd = scsi_host_find_tag(pHba->host,
+- readl(reply + 12) - 1);
+- if(cmd != NULL) {
+- printk(KERN_WARNING"%s: Apparent SCSI cmd in Post Wait Context - cmd=%p context=%x\n", pHba->name, cmd, context);
+- }
++ /*
++ * The request tag is one less than the command tag
++ * as the firmware might treat a 0 tag as invalid
++ */
++ cmd = scsi_host_find_tag(pHba->host,
++ readl(reply + 12) - 1);
++ if(cmd != NULL) {
++ printk(KERN_WARNING"%s: Apparent SCSI cmd in Post Wait Context - cmd=%p context=%x\n", pHba->name, cmd, context);
+ }
+ adpt_i2o_post_wait_complete(context, status);
+ } else { // SCSI message
+diff --git a/drivers/scsi/dpti.h b/drivers/scsi/dpti.h
+index 8a079e8d7f65f..0565533e8095a 100644
+--- a/drivers/scsi/dpti.h
++++ b/drivers/scsi/dpti.h
+@@ -248,7 +248,6 @@ typedef struct _adpt_hba {
+ void __iomem *FwDebugBLEDflag_P;// Virtual Addr Of FW Debug BLED
+ void __iomem *FwDebugBLEDvalue_P;// Virtual Addr Of FW Debug BLED
+ u32 FwDebugFlags;
+- u32 *ioctl_reply_context[4];
+ } adpt_hba;
+
+ struct sg_simple_element {
+diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c
+index eab68fd9337ac..37e1ab96ee5be 100644
+--- a/drivers/scsi/elx/efct/efct_driver.c
++++ b/drivers/scsi/elx/efct/efct_driver.c
+@@ -42,6 +42,7 @@ efct_device_init(void)
+
+ rc = efct_scsi_reg_fc_transport();
+ if (rc) {
++ efct_scsi_tgt_driver_exit();
+ pr_err("failed to register to FC host\n");
+ return rc;
+ }
+diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c
+index 24db0accb256e..5f690378fe9a9 100644
+--- a/drivers/scsi/elx/libefc/efc_els.c
++++ b/drivers/scsi/elx/libefc/efc_els.c
+@@ -46,18 +46,14 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
+
+ efc = node->efc;
+
+- spin_lock_irqsave(&node->els_ios_lock, flags);
+-
+ if (!node->els_io_enabled) {
+ efc_log_err(efc, "els io alloc disabled\n");
+- spin_unlock_irqrestore(&node->els_ios_lock, flags);
+ return NULL;
+ }
+
+ els = mempool_alloc(efc->els_io_pool, GFP_ATOMIC);
+ if (!els) {
+ atomic_add_return(1, &efc->els_io_alloc_failed_count);
+- spin_unlock_irqrestore(&node->els_ios_lock, flags);
+ return NULL;
+ }
+
+@@ -74,7 +70,6 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
+ &els->io.req.phys, GFP_DMA);
+ if (!els->io.req.virt) {
+ mempool_free(els, efc->els_io_pool);
+- spin_unlock_irqrestore(&node->els_ios_lock, flags);
+ return NULL;
+ }
+
+@@ -94,10 +89,11 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen)
+
+ /* add els structure to ELS IO list */
+ INIT_LIST_HEAD(&els->list_entry);
++ spin_lock_irqsave(&node->els_ios_lock, flags);
+ list_add_tail(&els->list_entry, &node->els_ios_list);
++ spin_unlock_irqrestore(&node->els_ios_lock, flags);
+ }
+
+- spin_unlock_irqrestore(&node->els_ios_lock, flags);
+ return els;
+ }
+
+diff --git a/drivers/scsi/elx/libefc/efclib.h b/drivers/scsi/elx/libefc/efclib.h
+index ee291cabf7e05..b14e516be7d53 100644
+--- a/drivers/scsi/elx/libefc/efclib.h
++++ b/drivers/scsi/elx/libefc/efclib.h
+@@ -58,10 +58,12 @@ enum efc_node_send_ls_acc {
+ #define EFC_LINK_STATUS_UP 0
+ #define EFC_LINK_STATUS_DOWN 1
+
++enum efc_sm_event;
++
+ /* State machine context header */
+ struct efc_sm_ctx {
+ void (*current_state)(struct efc_sm_ctx *ctx,
+- u32 evt, void *arg);
++ enum efc_sm_event evt, void *arg);
+
+ const char *description;
+ void *app;
+@@ -364,7 +366,7 @@ struct efc_node {
+ int prev_evt;
+
+ void (*nodedb_state)(struct efc_sm_ctx *ctx,
+- u32 evt, void *arg);
++ enum efc_sm_event evt, void *arg);
+ struct timer_list gidpt_delay_timer;
+ u64 time_last_gidpt_msec;
+
+diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
+index 5ae6c207d3ac3..76dbdae0e9874 100644
+--- a/drivers/scsi/fcoe/fcoe.c
++++ b/drivers/scsi/fcoe/fcoe.c
+@@ -2501,6 +2501,7 @@ static int __init fcoe_init(void)
+
+ out_free:
+ mutex_unlock(&fcoe_config_mutex);
++ fcoe_transport_detach(&fcoe_sw_transport);
+ out_destroy:
+ destroy_workqueue(fcoe_wq);
+ return rc;
+diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
+index 1756a0ac6f083..303ecbd86b68a 100644
+--- a/drivers/scsi/fcoe/fcoe_ctlr.c
++++ b/drivers/scsi/fcoe/fcoe_ctlr.c
+@@ -319,16 +319,17 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip)
+ {
+ struct fcoe_fcf *sel;
+ struct fcoe_fcf *fcf;
++ unsigned long flags;
+
+ mutex_lock(&fip->ctlr_mutex);
+- spin_lock_bh(&fip->ctlr_lock);
++ spin_lock_irqsave(&fip->ctlr_lock, flags);
+
+ kfree_skb(fip->flogi_req);
+ fip->flogi_req = NULL;
+ list_for_each_entry(fcf, &fip->fcfs, list)
+ fcf->flogi_sent = 0;
+
+- spin_unlock_bh(&fip->ctlr_lock);
++ spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ sel = fip->sel_fcf;
+
+ if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr))
+@@ -699,6 +700,7 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
+ {
+ struct fc_frame *fp;
+ struct fc_frame_header *fh;
++ unsigned long flags;
+ u16 old_xid;
+ u8 op;
+ u8 mac[ETH_ALEN];
+@@ -732,11 +734,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
+ op = FIP_DT_FLOGI;
+ if (fip->mode == FIP_MODE_VN2VN)
+ break;
+- spin_lock_bh(&fip->ctlr_lock);
++ spin_lock_irqsave(&fip->ctlr_lock, flags);
+ kfree_skb(fip->flogi_req);
+ fip->flogi_req = skb;
+ fip->flogi_req_send = 1;
+- spin_unlock_bh(&fip->ctlr_lock);
++ spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ schedule_work(&fip->timer_work);
+ return -EINPROGRESS;
+ case ELS_FDISC:
+@@ -1713,10 +1715,11 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip)
+ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
+ {
+ struct fcoe_fcf *fcf;
++ unsigned long flags;
+ int error;
+
+ mutex_lock(&fip->ctlr_mutex);
+- spin_lock_bh(&fip->ctlr_lock);
++ spin_lock_irqsave(&fip->ctlr_lock, flags);
+ LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n");
+ fcf = fcoe_ctlr_select(fip);
+ if (!fcf || fcf->flogi_sent) {
+@@ -1727,7 +1730,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
+ fcoe_ctlr_solicit(fip, NULL);
+ error = fcoe_ctlr_flogi_send_locked(fip);
+ }
+- spin_unlock_bh(&fip->ctlr_lock);
++ spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ mutex_unlock(&fip->ctlr_mutex);
+ return error;
+ }
+@@ -1744,8 +1747,9 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
+ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
+ {
+ struct fcoe_fcf *fcf;
++ unsigned long flags;
+
+- spin_lock_bh(&fip->ctlr_lock);
++ spin_lock_irqsave(&fip->ctlr_lock, flags);
+ fcf = fip->sel_fcf;
+ if (!fcf || !fip->flogi_req_send)
+ goto unlock;
+@@ -1772,7 +1776,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
+ } else /* XXX */
+ LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n");
+ unlock:
+- spin_unlock_bh(&fip->ctlr_lock);
++ spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+ }
+
+ /**
+@@ -1969,7 +1973,7 @@ EXPORT_SYMBOL(fcoe_ctlr_recv_flogi);
+ *
+ * Returns: u64 fc world wide name
+ */
+-u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN],
++u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN],
+ unsigned int scheme, unsigned int port)
+ {
+ u64 wwn;
+diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c
+index af658aa38fedf..6260aa5ea6af8 100644
+--- a/drivers/scsi/fcoe/fcoe_sysfs.c
++++ b/drivers/scsi/fcoe/fcoe_sysfs.c
+@@ -830,14 +830,15 @@ struct fcoe_ctlr_device *fcoe_ctlr_device_add(struct device *parent,
+
+ dev_set_name(&ctlr->dev, "ctlr_%d", ctlr->id);
+ error = device_register(&ctlr->dev);
+- if (error)
+- goto out_del_q2;
++ if (error) {
++ destroy_workqueue(ctlr->devloss_work_q);
++ destroy_workqueue(ctlr->work_q);
++ put_device(&ctlr->dev);
++ return NULL;
++ }
+
+ return ctlr;
+
+-out_del_q2:
+- destroy_workqueue(ctlr->devloss_work_q);
+- ctlr->devloss_work_q = NULL;
+ out_del_q:
+ destroy_workqueue(ctlr->work_q);
+ ctlr->work_q = NULL;
+@@ -1036,16 +1037,16 @@ struct fcoe_fcf_device *fcoe_fcf_device_add(struct fcoe_ctlr_device *ctlr,
+ fcf->selected = new_fcf->selected;
+
+ error = device_register(&fcf->dev);
+- if (error)
+- goto out_del;
++ if (error) {
++ put_device(&fcf->dev);
++ goto out;
++ }
+
+ fcf->state = FCOE_FCF_STATE_CONNECTED;
+ list_add_tail(&fcf->peers, &ctlr->fcfs);
+
+ return fcf;
+
+-out_del:
+- kfree(fcf);
+ out:
+ return NULL;
+ }
+diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
+index f8afbfb468dca..e6c36b5b07394 100644
+--- a/drivers/scsi/fnic/fnic_scsi.c
++++ b/drivers/scsi/fnic/fnic_scsi.c
+@@ -604,7 +604,7 @@ out:
+
+ FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no,
+ tag, sc, io_req, sg_count, cmd_trace,
+- (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc)));
++ (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
+ /* if only we issued IO, will we have the io lock */
+ if (io_lock_acquired)
+@@ -2172,7 +2172,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
+ bool new_sc)
+
+ {
+- int ret = SUCCESS;
++ int ret = 0;
+ struct fnic_pending_aborts_iter_data iter_data = {
+ .fnic = fnic,
+ .lun_dev = lr_sc->device,
+@@ -2192,9 +2192,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
+
+ /* walk again to check, if IOs are still pending in fw */
+ if (fnic_is_abts_pending(fnic, lr_sc))
+- ret = FAILED;
++ ret = 1;
+
+ clean_pending_aborts_end:
++ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
++ "%s: exit status: %d\n", __func__, ret);
+ return ret;
+ }
+
+diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
+index 9515c45affa5e..7d93783c09a50 100644
+--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
++++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
+@@ -1414,7 +1414,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba)
+ device->linkrate = phy->sas_phy.linkrate;
+
+ hisi_hba->hw->setup_itct(hisi_hba, sas_dev);
+- } else
++ } else if (!port->port_attached)
+ port->id = 0xff;
+ }
+ }
+diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+index b0b2361e63fef..c40588ed68a54 100644
+--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
++++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+@@ -2026,6 +2026,11 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba,
+ u16 dma_tx_err_type = le16_to_cpu(err_record->dma_tx_err_type);
+ u16 sipc_rx_err_type = le16_to_cpu(err_record->sipc_rx_err_type);
+ u32 dma_rx_err_type = le32_to_cpu(err_record->dma_rx_err_type);
++ struct hisi_sas_complete_v2_hdr *complete_queue =
++ hisi_hba->complete_hdr[slot->cmplt_queue];
++ struct hisi_sas_complete_v2_hdr *complete_hdr =
++ &complete_queue[slot->cmplt_queue_slot];
++ u32 dw0 = le32_to_cpu(complete_hdr->dw0);
+ int error = -1;
+
+ if (err_phase == 1) {
+@@ -2310,7 +2315,8 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba,
+ break;
+ }
+ }
+- hisi_sas_sata_done(task, slot);
++ if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK)
++ hisi_sas_sata_done(task, slot);
+ }
+ break;
+ default:
+@@ -2442,7 +2448,8 @@ static void slot_complete_v2_hw(struct hisi_hba *hisi_hba,
+ case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+ {
+ ts->stat = SAS_SAM_STAT_GOOD;
+- hisi_sas_sata_done(task, slot);
++ if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK)
++ hisi_sas_sata_done(task, slot);
+ break;
+ }
+ default:
+diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+index 3ab669dc806f6..b8a12d3ad5f27 100644
+--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
++++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+@@ -405,6 +405,8 @@
+ #define CMPLT_HDR_ERROR_PHASE_MSK (0xff << CMPLT_HDR_ERROR_PHASE_OFF)
+ #define CMPLT_HDR_RSPNS_XFRD_OFF 10
+ #define CMPLT_HDR_RSPNS_XFRD_MSK (0x1 << CMPLT_HDR_RSPNS_XFRD_OFF)
++#define CMPLT_HDR_RSPNS_GOOD_OFF 11
++#define CMPLT_HDR_RSPNS_GOOD_MSK (0x1 << CMPLT_HDR_RSPNS_GOOD_OFF)
+ #define CMPLT_HDR_ERX_OFF 12
+ #define CMPLT_HDR_ERX_MSK (0x1 << CMPLT_HDR_ERX_OFF)
+ #define CMPLT_HDR_ABORT_STAT_OFF 13
+@@ -478,6 +480,9 @@ struct hisi_sas_err_record_v3 {
+ #define RX_DATA_LEN_UNDERFLOW_OFF 6
+ #define RX_DATA_LEN_UNDERFLOW_MSK (1 << RX_DATA_LEN_UNDERFLOW_OFF)
+
++#define RX_FIS_STATUS_ERR_OFF 0
++#define RX_FIS_STATUS_ERR_MSK (1 << RX_FIS_STATUS_ERR_OFF)
++
+ #define HISI_SAS_COMMAND_ENTRIES_V3_HW 4096
+ #define HISI_SAS_MSI_COUNT_V3_HW 32
+
+@@ -518,6 +523,8 @@ struct hisi_sas_err_record_v3 {
+ #define CHNL_INT_STS_INT2_MSK BIT(3)
+ #define CHNL_WIDTH 4
+
++#define BAR_NO_V3_HW 5
++
+ enum {
+ DSM_FUNC_ERR_HANDLE_MSI = 0,
+ };
+@@ -527,7 +534,7 @@ MODULE_PARM_DESC(intr_conv, "interrupt converge enable (0-1)");
+
+ /* permit overriding the host protection capabilities mask (EEDP/T10 PI) */
+ static int prot_mask;
+-module_param(prot_mask, int, 0);
++module_param(prot_mask, int, 0444);
+ MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 ");
+
+ static void debugfs_work_handler_v3_hw(struct work_struct *work);
+@@ -2134,7 +2141,7 @@ static irqreturn_t fatal_axi_int_v3_hw(int irq_no, void *p)
+ return IRQ_HANDLED;
+ }
+
+-static void
++static bool
+ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
+ struct hisi_sas_slot *slot)
+ {
+@@ -2147,11 +2154,22 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
+ hisi_sas_status_buf_addr_mem(slot);
+ u32 dma_rx_err_type = le32_to_cpu(record->dma_rx_err_type);
+ u32 trans_tx_fail_type = le32_to_cpu(record->trans_tx_fail_type);
++ u16 sipc_rx_err_type = le16_to_cpu(record->sipc_rx_err_type);
+ u32 dw3 = le32_to_cpu(complete_hdr->dw3);
++ u32 dw0 = le32_to_cpu(complete_hdr->dw0);
+
+ switch (task->task_proto) {
+ case SAS_PROTOCOL_SSP:
+ if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
++ /*
++ * If returned response frame is incorrect because of data underflow,
++ * but I/O information has been written to the host memory, we examine
++ * response IU.
++ */
++ if (!(dw0 & CMPLT_HDR_RSPNS_GOOD_MSK) &&
++ (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK))
++ return false;
++
+ ts->residual = trans_tx_fail_type;
+ ts->stat = SAS_DATA_UNDERRUN;
+ } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) {
+@@ -2165,7 +2183,10 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
+ case SAS_PROTOCOL_SATA:
+ case SAS_PROTOCOL_STP:
+ case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+- if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
++ if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
++ (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) {
++ ts->stat = SAS_PROTO_RESPONSE;
++ } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
+ ts->residual = trans_tx_fail_type;
+ ts->stat = SAS_DATA_UNDERRUN;
+ } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) {
+@@ -2175,7 +2196,8 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
+ ts->stat = SAS_OPEN_REJECT;
+ ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+ }
+- hisi_sas_sata_done(task, slot);
++ if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK)
++ hisi_sas_sata_done(task, slot);
+ break;
+ case SAS_PROTOCOL_SMP:
+ ts->stat = SAS_SAM_STAT_CHECK_CONDITION;
+@@ -2183,6 +2205,7 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
+ default:
+ break;
+ }
++ return true;
+ }
+
+ static void slot_complete_v3_hw(struct hisi_hba *hisi_hba,
+@@ -2257,19 +2280,20 @@ static void slot_complete_v3_hw(struct hisi_hba *hisi_hba,
+ if ((dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) {
+ u32 *error_info = hisi_sas_status_buf_addr_mem(slot);
+
+- slot_err_v3_hw(hisi_hba, task, slot);
+- if (ts->stat != SAS_DATA_UNDERRUN)
+- dev_info(dev, "erroneous completion iptt=%d task=%pK dev id=%d addr=%016llx CQ hdr: 0x%x 0x%x 0x%x 0x%x Error info: 0x%x 0x%x 0x%x 0x%x\n",
+- slot->idx, task, sas_dev->device_id,
+- SAS_ADDR(device->sas_addr),
+- dw0, dw1, complete_hdr->act, dw3,
+- error_info[0], error_info[1],
+- error_info[2], error_info[3]);
+- if (unlikely(slot->abort)) {
+- sas_task_abort(task);
+- return;
++ if (slot_err_v3_hw(hisi_hba, task, slot)) {
++ if (ts->stat != SAS_DATA_UNDERRUN)
++ dev_info(dev, "erroneous completion iptt=%d task=%pK dev id=%d addr=%016llx CQ hdr: 0x%x 0x%x 0x%x 0x%x Error info: 0x%x 0x%x 0x%x 0x%x\n",
++ slot->idx, task, sas_dev->device_id,
++ SAS_ADDR(device->sas_addr),
++ dw0, dw1, complete_hdr->act, dw3,
++ error_info[0], error_info[1],
++ error_info[2], error_info[3]);
++ if (unlikely(slot->abort)) {
++ sas_task_abort(task);
++ return;
++ }
++ goto out;
+ }
+- goto out;
+ }
+
+ switch (task->task_proto) {
+@@ -2299,7 +2323,8 @@ static void slot_complete_v3_hw(struct hisi_hba *hisi_hba,
+ case SAS_PROTOCOL_STP:
+ case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+ ts->stat = SAS_SAM_STAT_GOOD;
+- hisi_sas_sata_done(task, slot);
++ if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK)
++ hisi_sas_sata_done(task, slot);
+ break;
+ default:
+ ts->stat = SAS_SAM_STAT_CHECK_CONDITION;
+@@ -2392,17 +2417,25 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p)
+ return IRQ_WAKE_THREAD;
+ }
+
++static void hisi_sas_v3_free_vectors(void *data)
++{
++ struct pci_dev *pdev = data;
++
++ pci_free_irq_vectors(pdev);
++}
++
+ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba)
+ {
+ int vectors;
+ int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi;
+ struct Scsi_Host *shost = hisi_hba->shost;
++ struct pci_dev *pdev = hisi_hba->pci_dev;
+ struct irq_affinity desc = {
+ .pre_vectors = BASE_VECTORS_V3_HW,
+ };
+
+ min_msi = MIN_AFFINE_VECTORS_V3_HW;
+- vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev,
++ vectors = pci_alloc_irq_vectors_affinity(pdev,
+ min_msi, max_msi,
+ PCI_IRQ_MSI |
+ PCI_IRQ_AFFINITY,
+@@ -2414,7 +2447,7 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba)
+ hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW;
+ shost->nr_hw_queues = hisi_hba->cq_nvecs;
+
+- return 0;
++ return devm_add_action(&pdev->dev, hisi_sas_v3_free_vectors, pdev);
+ }
+
+ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
+@@ -2749,6 +2782,7 @@ static int slave_configure_v3_hw(struct scsi_device *sdev)
+ struct hisi_hba *hisi_hba = shost_priv(shost);
+ struct device *dev = hisi_hba->dev;
+ int ret = sas_slave_configure(sdev);
++ unsigned int max_sectors;
+
+ if (ret)
+ return ret;
+@@ -2766,6 +2800,12 @@ static int slave_configure_v3_hw(struct scsi_device *sdev)
+ }
+ }
+
++ /* Set according to IOMMU IOVA caching limit */
++ max_sectors = min_t(size_t, queue_max_hw_sectors(sdev->request_queue),
++ (PAGE_SIZE * 32) >> SECTOR_SHIFT);
++
++ blk_queue_max_hw_sectors(sdev->request_queue, max_sectors);
++
+ return 0;
+ }
+
+@@ -3959,6 +3999,54 @@ static const struct file_operations debugfs_bist_phy_v3_hw_fops = {
+ .owner = THIS_MODULE,
+ };
+
++static ssize_t debugfs_bist_cnt_v3_hw_write(struct file *filp,
++ const char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ struct seq_file *m = filp->private_data;
++ struct hisi_hba *hisi_hba = m->private;
++ unsigned int cnt;
++ int val;
++
++ if (hisi_hba->debugfs_bist_enable)
++ return -EPERM;
++
++ val = kstrtouint_from_user(buf, count, 0, &cnt);
++ if (val)
++ return val;
++
++ if (cnt)
++ return -EINVAL;
++
++ hisi_hba->debugfs_bist_cnt = 0;
++ return count;
++}
++
++static int debugfs_bist_cnt_v3_hw_show(struct seq_file *s, void *p)
++{
++ struct hisi_hba *hisi_hba = s->private;
++
++ seq_printf(s, "%u\n", hisi_hba->debugfs_bist_cnt);
++
++ return 0;
++}
++
++static int debugfs_bist_cnt_v3_hw_open(struct inode *inode,
++ struct file *filp)
++{
++ return single_open(filp, debugfs_bist_cnt_v3_hw_show,
++ inode->i_private);
++}
++
++static const struct file_operations debugfs_bist_cnt_v3_hw_ops = {
++ .open = debugfs_bist_cnt_v3_hw_open,
++ .read = seq_read,
++ .write = debugfs_bist_cnt_v3_hw_write,
++ .llseek = seq_lseek,
++ .release = single_release,
++ .owner = THIS_MODULE,
++};
++
+ static const struct {
+ int value;
+ char *name;
+@@ -4596,8 +4684,8 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba)
+ debugfs_create_file("phy_id", 0600, hisi_hba->debugfs_bist_dentry,
+ hisi_hba, &debugfs_bist_phy_v3_hw_fops);
+
+- debugfs_create_u32("cnt", 0600, hisi_hba->debugfs_bist_dentry,
+- &hisi_hba->debugfs_bist_cnt);
++ debugfs_create_file("cnt", 0600, hisi_hba->debugfs_bist_dentry,
++ hisi_hba, &debugfs_bist_cnt_v3_hw_ops);
+
+ debugfs_create_file("loopback_mode", 0600,
+ hisi_hba->debugfs_bist_dentry,
+@@ -4676,15 +4764,15 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ struct sas_ha_struct *sha;
+ int rc, phy_nr, port_nr, i;
+
+- rc = pci_enable_device(pdev);
++ rc = pcim_enable_device(pdev);
+ if (rc)
+ goto err_out;
+
+ pci_set_master(pdev);
+
+- rc = pci_request_regions(pdev, DRV_NAME);
++ rc = pcim_iomap_regions(pdev, 1 << BAR_NO_V3_HW, DRV_NAME);
+ if (rc)
+- goto err_out_disable_device;
++ goto err_out;
+
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc)
+@@ -4692,20 +4780,20 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ if (rc) {
+ dev_err(dev, "No usable DMA addressing method\n");
+ rc = -ENODEV;
+- goto err_out_regions;
++ goto err_out;
+ }
+
+ shost = hisi_sas_shost_alloc_pci(pdev);
+ if (!shost) {
+ rc = -ENOMEM;
+- goto err_out_regions;
++ goto err_out;
+ }
+
+ sha = SHOST_TO_SAS_HA(shost);
+ hisi_hba = shost_priv(shost);
+ dev_set_drvdata(dev, sha);
+
+- hisi_hba->regs = pcim_iomap(pdev, 5, 0);
++ hisi_hba->regs = pcim_iomap_table(pdev)[BAR_NO_V3_HW];
+ if (!hisi_hba->regs) {
+ dev_err(dev, "cannot map register\n");
+ rc = -ENOMEM;
+@@ -4763,7 +4851,7 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ dev_err(dev, "%d hw queues\n", shost->nr_hw_queues);
+ rc = scsi_add_host(shost, dev);
+ if (rc)
+- goto err_out_free_irq_vectors;
++ goto err_out_debugfs;
+
+ rc = sas_register_ha(sha);
+ if (rc)
+@@ -4792,17 +4880,11 @@ err_out_hw_init:
+ sas_unregister_ha(sha);
+ err_out_register_ha:
+ scsi_remove_host(shost);
+-err_out_free_irq_vectors:
+- pci_free_irq_vectors(pdev);
+ err_out_debugfs:
+ debugfs_exit_v3_hw(hisi_hba);
+ err_out_ha:
+ hisi_sas_free(hisi_hba);
+ scsi_host_put(shost);
+-err_out_regions:
+- pci_release_regions(pdev);
+-err_out_disable_device:
+- pci_disable_device(pdev);
+ err_out:
+ return rc;
+ }
+@@ -4821,7 +4903,6 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba)
+
+ devm_free_irq(&pdev->dev, pci_irq_vector(pdev, nr), cq);
+ }
+- pci_free_irq_vectors(pdev);
+ }
+
+ static void hisi_sas_v3_remove(struct pci_dev *pdev)
+@@ -4840,8 +4921,6 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev)
+ sas_remove_host(sha->core.shost);
+
+ hisi_sas_v3_destroy_irqs(pdev, hisi_hba);
+- pci_release_regions(pdev);
+- pci_disable_device(pdev);
+ hisi_sas_free(hisi_hba);
+ debugfs_exit_v3_hw(hisi_hba);
+ scsi_host_put(shost);
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index 24b72ee4246fb..1b285ce62f8ae 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -180,6 +180,7 @@ void scsi_remove_host(struct Scsi_Host *shost)
+ scsi_forget_host(shost);
+ mutex_unlock(&shost->scan_mutex);
+ scsi_proc_host_rm(shost);
++ scsi_proc_hostdir_rm(shost->hostt);
+
+ spin_lock_irqsave(shost->host_lock, flags);
+ if (scsi_host_set_state(shost, SHOST_DEL))
+@@ -321,8 +322,6 @@ static void scsi_host_dev_release(struct device *dev)
+ struct Scsi_Host *shost = dev_to_shost(dev);
+ struct device *parent = dev->parent;
+
+- scsi_proc_hostdir_rm(shost->hostt);
+-
+ /* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */
+ rcu_barrier();
+
+@@ -388,6 +387,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
+ shost->shost_state = SHOST_CREATED;
+ INIT_LIST_HEAD(&shost->__devices);
+ INIT_LIST_HEAD(&shost->__targets);
++ INIT_LIST_HEAD(&shost->eh_abort_list);
+ INIT_LIST_HEAD(&shost->eh_cmd_q);
+ INIT_LIST_HEAD(&shost->starved_list);
+ init_waitqueue_head(&shost->host_wait);
+@@ -518,7 +518,7 @@ EXPORT_SYMBOL(scsi_host_alloc);
+ static int __scsi_host_match(struct device *dev, const void *data)
+ {
+ struct Scsi_Host *p;
+- const unsigned short *hostnum = data;
++ const unsigned int *hostnum = data;
+
+ p = class_to_shost(dev);
+ return p->host_no == *hostnum;
+@@ -535,7 +535,7 @@ static int __scsi_host_match(struct device *dev, const void *data)
+ * that scsi_host_get() took. The put_device() below dropped
+ * the reference from class_find_device().
+ **/
+-struct Scsi_Host *scsi_host_lookup(unsigned short hostnum)
++struct Scsi_Host *scsi_host_lookup(unsigned int hostnum)
+ {
+ struct device *cdev;
+ struct Scsi_Host *shost = NULL;
+diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
+index 3faa87fa296a2..8aa5c22ae3ff9 100644
+--- a/drivers/scsi/hpsa.c
++++ b/drivers/scsi/hpsa.c
+@@ -5848,7 +5848,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h)
+ {
+ struct Scsi_Host *sh;
+
+- sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h));
++ sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info));
+ if (sh == NULL) {
+ dev_err(&h->pdev->dev, "scsi_host_alloc failed\n");
+ return -ENOMEM;
+@@ -8927,7 +8927,7 @@ clean1: /* wq/aer/h */
+ destroy_workqueue(h->monitor_ctlr_wq);
+ h->monitor_ctlr_wq = NULL;
+ }
+- kfree(h);
++ hpda_free_ctlr_info(h);
+ return rc;
+ }
+
+@@ -9788,7 +9788,8 @@ static int hpsa_add_sas_host(struct ctlr_info *h)
+ return 0;
+
+ free_sas_phy:
+- hpsa_free_sas_phy(hpsa_sas_phy);
++ sas_phy_free(hpsa_sas_phy->phy);
++ kfree(hpsa_sas_phy);
+ free_sas_port:
+ hpsa_free_sas_port(hpsa_sas_port);
+ free_sas_node:
+@@ -9824,10 +9825,12 @@ static int hpsa_add_sas_device(struct hpsa_sas_node *hpsa_sas_node,
+
+ rc = hpsa_sas_port_add_rphy(hpsa_sas_port, rphy);
+ if (rc)
+- goto free_sas_port;
++ goto free_sas_rphy;
+
+ return 0;
+
++free_sas_rphy:
++ sas_rphy_free(rphy);
+ free_sas_port:
+ hpsa_free_sas_port(hpsa_sas_port);
+ device->sas_port = NULL;
+diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
+index 01f79991bf4a2..45ef78f388dc9 100644
+--- a/drivers/scsi/ibmvscsi/ibmvfc.c
++++ b/drivers/scsi/ibmvscsi/ibmvfc.c
+@@ -160,8 +160,8 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *);
+ static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
+ static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
+
+-static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
+-static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
++static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *);
++static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *);
+
+ static const char *unknown_error = "unknown error";
+
+@@ -708,8 +708,13 @@ static void ibmvfc_init_host(struct ibmvfc_host *vhost)
+ memset(vhost->async_crq.msgs.async, 0, PAGE_SIZE);
+ vhost->async_crq.cur = 0;
+
+- list_for_each_entry(tgt, &vhost->targets, queue)
+- ibmvfc_del_tgt(tgt);
++ list_for_each_entry(tgt, &vhost->targets, queue) {
++ if (vhost->client_migrated)
++ tgt->need_login = 1;
++ else
++ ibmvfc_del_tgt(tgt);
++ }
++
+ scsi_block_requests(vhost->host);
+ ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
+ vhost->job_step = ibmvfc_npiv_login;
+@@ -917,7 +922,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
+ struct vio_dev *vdev = to_vio_dev(vhost->dev);
+ unsigned long flags;
+
+- ibmvfc_release_sub_crqs(vhost);
++ ibmvfc_dereg_sub_crqs(vhost);
+
+ /* Re-enable the CRQ */
+ do {
+@@ -936,7 +941,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
+ spin_unlock(vhost->crq.q_lock);
+ spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
+- ibmvfc_init_sub_crqs(vhost);
++ ibmvfc_reg_sub_crqs(vhost);
+
+ return rc;
+ }
+@@ -955,7 +960,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
+ struct vio_dev *vdev = to_vio_dev(vhost->dev);
+ struct ibmvfc_queue *crq = &vhost->crq;
+
+- ibmvfc_release_sub_crqs(vhost);
++ ibmvfc_dereg_sub_crqs(vhost);
+
+ /* Close the CRQ */
+ do {
+@@ -988,7 +993,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
+ spin_unlock(vhost->crq.q_lock);
+ spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
+- ibmvfc_init_sub_crqs(vhost);
++ ibmvfc_reg_sub_crqs(vhost);
+
+ return rc;
+ }
+@@ -3235,9 +3240,12 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
+ /* We need to re-setup the interpartition connection */
+ dev_info(vhost->dev, "Partition migrated, Re-enabling adapter\n");
+ vhost->client_migrated = 1;
++
++ scsi_block_requests(vhost->host);
+ ibmvfc_purge_requests(vhost, DID_REQUEUE);
+- ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
++ ibmvfc_set_host_state(vhost, IBMVFC_LINK_DOWN);
+ ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE);
++ wake_up(&vhost->work_wait_q);
+ } else if (crq->format == IBMVFC_PARTNER_FAILED || crq->format == IBMVFC_PARTNER_DEREGISTER) {
+ dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format);
+ ibmvfc_purge_requests(vhost, DID_ERROR);
+@@ -5680,6 +5688,8 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost,
+ queue->cur = 0;
+ queue->fmt = fmt;
+ queue->size = PAGE_SIZE / fmt_size;
++
++ queue->vhost = vhost;
+ return 0;
+ }
+
+@@ -5755,9 +5765,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
+
+ ENTER;
+
+- if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT))
+- return -ENOMEM;
+-
+ rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
+ &scrq->cookie, &scrq->hw_irq);
+
+@@ -5788,7 +5795,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
+ }
+
+ scrq->hwq_id = index;
+- scrq->vhost = vhost;
+
+ LEAVE;
+ return 0;
+@@ -5798,7 +5804,6 @@ irq_failed:
+ rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
+ } while (rtas_busy_delay(rc));
+ reg_failed:
+- ibmvfc_free_queue(vhost, scrq);
+ LEAVE;
+ return rc;
+ }
+@@ -5824,12 +5829,50 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index)
+ if (rc)
+ dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc);
+
+- ibmvfc_free_queue(vhost, scrq);
++ /* Clean out the queue */
++ memset(scrq->msgs.crq, 0, PAGE_SIZE);
++ scrq->cur = 0;
++
++ LEAVE;
++}
++
++static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *vhost)
++{
++ int i, j;
++
++ ENTER;
++ if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
++ return;
++
++ for (i = 0; i < nr_scsi_hw_queues; i++) {
++ if (ibmvfc_register_scsi_channel(vhost, i)) {
++ for (j = i; j > 0; j--)
++ ibmvfc_deregister_scsi_channel(vhost, j - 1);
++ vhost->do_enquiry = 0;
++ return;
++ }
++ }
++
++ LEAVE;
++}
++
++static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *vhost)
++{
++ int i;
++
++ ENTER;
++ if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
++ return;
++
++ for (i = 0; i < nr_scsi_hw_queues; i++)
++ ibmvfc_deregister_scsi_channel(vhost, i);
++
+ LEAVE;
+ }
+
+ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
+ {
++ struct ibmvfc_queue *scrq;
+ int i, j;
+
+ ENTER;
+@@ -5845,30 +5888,41 @@ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
+ }
+
+ for (i = 0; i < nr_scsi_hw_queues; i++) {
+- if (ibmvfc_register_scsi_channel(vhost, i)) {
+- for (j = i; j > 0; j--)
+- ibmvfc_deregister_scsi_channel(vhost, j - 1);
++ scrq = &vhost->scsi_scrqs.scrqs[i];
++ if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) {
++ for (j = i; j > 0; j--) {
++ scrq = &vhost->scsi_scrqs.scrqs[j - 1];
++ ibmvfc_free_queue(vhost, scrq);
++ }
+ kfree(vhost->scsi_scrqs.scrqs);
+ vhost->scsi_scrqs.scrqs = NULL;
+ vhost->scsi_scrqs.active_queues = 0;
+ vhost->do_enquiry = 0;
+- break;
++ vhost->mq_enabled = 0;
++ return;
+ }
+ }
+
++ ibmvfc_reg_sub_crqs(vhost);
++
+ LEAVE;
+ }
+
+ static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
+ {
++ struct ibmvfc_queue *scrq;
+ int i;
+
+ ENTER;
+ if (!vhost->scsi_scrqs.scrqs)
+ return;
+
+- for (i = 0; i < nr_scsi_hw_queues; i++)
+- ibmvfc_deregister_scsi_channel(vhost, i);
++ ibmvfc_dereg_sub_crqs(vhost);
++
++ for (i = 0; i < nr_scsi_hw_queues; i++) {
++ scrq = &vhost->scsi_scrqs.scrqs[i];
++ ibmvfc_free_queue(vhost, scrq);
++ }
+
+ kfree(vhost->scsi_scrqs.scrqs);
+ vhost->scsi_scrqs.scrqs = NULL;
+diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
+index 3718406e09887..c39a245f43d02 100644
+--- a/drivers/scsi/ibmvscsi/ibmvfc.h
++++ b/drivers/scsi/ibmvscsi/ibmvfc.h
+@@ -789,6 +789,7 @@ struct ibmvfc_queue {
+ spinlock_t _lock;
+ spinlock_t *q_lock;
+
++ struct ibmvfc_host *vhost;
+ struct ibmvfc_event_pool evt_pool;
+ struct list_head sent;
+ struct list_head free;
+@@ -797,7 +798,6 @@ struct ibmvfc_queue {
+ union ibmvfc_iu cancel_rsp;
+
+ /* Sub-CRQ fields */
+- struct ibmvfc_host *vhost;
+ unsigned long cookie;
+ unsigned long vios_cookie;
+ unsigned long hw_irq;
+diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+index 10b6c6daaacda..d43bb18f58fd5 100644
+--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
++++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+@@ -36,7 +36,7 @@
+
+ #define IBMVSCSIS_VERSION "v0.2"
+
+-#define INITIAL_SRP_LIMIT 800
++#define INITIAL_SRP_LIMIT 1024
+ #define DEFAULT_MAX_SECTORS 256
+ #define MAX_TXU 1024 * 1024
+
+diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
+index 5d78f7e939a36..e5e38431c5c73 100644
+--- a/drivers/scsi/ipr.c
++++ b/drivers/scsi/ipr.c
+@@ -1516,23 +1516,22 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd)
+ }
+
+ /**
+- * strip_and_pad_whitespace - Strip and pad trailing whitespace.
+- * @i: index into buffer
+- * @buf: string to modify
++ * strip_whitespace - Strip and pad trailing whitespace.
++ * @i: size of buffer
++ * @buf: string to modify
+ *
+- * This function will strip all trailing whitespace, pad the end
+- * of the string with a single space, and NULL terminate the string.
++ * This function will strip all trailing whitespace and
++ * NUL terminate the string.
+ *
+- * Return value:
+- * new length of string
+ **/
+-static int strip_and_pad_whitespace(int i, char *buf)
++static void strip_whitespace(int i, char *buf)
+ {
++ if (i < 1)
++ return;
++ i--;
+ while (i && buf[i] == ' ')
+ i--;
+- buf[i+1] = ' ';
+- buf[i+2] = '\0';
+- return i + 2;
++ buf[i+1] = '\0';
+ }
+
+ /**
+@@ -1547,19 +1546,21 @@ static int strip_and_pad_whitespace(int i, char *buf)
+ static void ipr_log_vpd_compact(char *prefix, struct ipr_hostrcb *hostrcb,
+ struct ipr_vpd *vpd)
+ {
+- char buffer[IPR_VENDOR_ID_LEN + IPR_PROD_ID_LEN + IPR_SERIAL_NUM_LEN + 3];
+- int i = 0;
++ char vendor_id[IPR_VENDOR_ID_LEN + 1];
++ char product_id[IPR_PROD_ID_LEN + 1];
++ char sn[IPR_SERIAL_NUM_LEN + 1];
+
+- memcpy(buffer, vpd->vpids.vendor_id, IPR_VENDOR_ID_LEN);
+- i = strip_and_pad_whitespace(IPR_VENDOR_ID_LEN - 1, buffer);
++ memcpy(vendor_id, vpd->vpids.vendor_id, IPR_VENDOR_ID_LEN);
++ strip_whitespace(IPR_VENDOR_ID_LEN, vendor_id);
+
+- memcpy(&buffer[i], vpd->vpids.product_id, IPR_PROD_ID_LEN);
+- i = strip_and_pad_whitespace(i + IPR_PROD_ID_LEN - 1, buffer);
++ memcpy(product_id, vpd->vpids.product_id, IPR_PROD_ID_LEN);
++ strip_whitespace(IPR_PROD_ID_LEN, product_id);
+
+- memcpy(&buffer[i], vpd->sn, IPR_SERIAL_NUM_LEN);
+- buffer[IPR_SERIAL_NUM_LEN + i] = '\0';
++ memcpy(sn, vpd->sn, IPR_SERIAL_NUM_LEN);
++ strip_whitespace(IPR_SERIAL_NUM_LEN, sn);
+
+- ipr_hcam_err(hostrcb, "%s VPID/SN: %s\n", prefix, buffer);
++ ipr_hcam_err(hostrcb, "%s VPID/SN: %s %s %s\n", prefix,
++ vendor_id, product_id, sn);
+ }
+
+ /**
+@@ -9791,7 +9792,7 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
+ GFP_KERNEL);
+
+ if (!ioa_cfg->hrrq[i].host_rrq) {
+- while (--i > 0)
++ while (--i >= 0)
+ dma_free_coherent(&pdev->dev,
+ sizeof(u32) * ioa_cfg->hrrq[i].size,
+ ioa_cfg->hrrq[i].host_rrq,
+@@ -10064,7 +10065,7 @@ static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg,
+ ioa_cfg->vectors_info[i].desc,
+ &ioa_cfg->hrrq[i]);
+ if (rc) {
+- while (--i >= 0)
++ while (--i > 0)
+ free_irq(pci_irq_vector(pdev, i),
+ &ioa_cfg->hrrq[i]);
+ return rc;
+@@ -10869,11 +10870,19 @@ static struct notifier_block ipr_notifier = {
+ **/
+ static int __init ipr_init(void)
+ {
++ int rc;
++
+ ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n",
+ IPR_DRIVER_VERSION, IPR_DRIVER_DATE);
+
+ register_reboot_notifier(&ipr_notifier);
+- return pci_register_driver(&ipr_driver);
++ rc = pci_register_driver(&ipr_driver);
++ if (rc) {
++ unregister_reboot_notifier(&ipr_notifier);
++ return rc;
++ }
++
++ return 0;
+ }
+
+ /**
+diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
+index 1bc37593c88ff..fe705b8bf4643 100644
+--- a/drivers/scsi/iscsi_tcp.c
++++ b/drivers/scsi/iscsi_tcp.c
+@@ -52,6 +52,10 @@ static struct iscsi_transport iscsi_sw_tcp_transport;
+ static unsigned int iscsi_max_lun = ~0;
+ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
+
++static bool iscsi_recv_from_iscsi_q;
++module_param_named(recv_from_iscsi_q, iscsi_recv_from_iscsi_q, bool, 0644);
++MODULE_PARM_DESC(recv_from_iscsi_q, "Set to true to read iSCSI data/headers from the iscsi_q workqueue. The default is false which will perform reads from the network softirq context.");
++
+ static int iscsi_sw_tcp_dbg;
+ module_param_named(debug_iscsi_tcp, iscsi_sw_tcp_dbg, int,
+ S_IRUGO | S_IWUSR);
+@@ -122,20 +126,13 @@ static inline int iscsi_sw_sk_state_check(struct sock *sk)
+ return 0;
+ }
+
+-static void iscsi_sw_tcp_data_ready(struct sock *sk)
++static void iscsi_sw_tcp_recv_data(struct iscsi_conn *conn)
+ {
+- struct iscsi_conn *conn;
+- struct iscsi_tcp_conn *tcp_conn;
++ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
++ struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
++ struct sock *sk = tcp_sw_conn->sock->sk;
+ read_descriptor_t rd_desc;
+
+- read_lock_bh(&sk->sk_callback_lock);
+- conn = sk->sk_user_data;
+- if (!conn) {
+- read_unlock_bh(&sk->sk_callback_lock);
+- return;
+- }
+- tcp_conn = conn->dd_data;
+-
+ /*
+ * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
+ * We set count to 1 because we want the network layer to
+@@ -144,13 +141,48 @@ static void iscsi_sw_tcp_data_ready(struct sock *sk)
+ */
+ rd_desc.arg.data = conn;
+ rd_desc.count = 1;
+- tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
+
+- iscsi_sw_sk_state_check(sk);
++ tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
+
+ /* If we had to (atomically) map a highmem page,
+ * unmap it now. */
+ iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
++
++ iscsi_sw_sk_state_check(sk);
++}
++
++static void iscsi_sw_tcp_recv_data_work(struct work_struct *work)
++{
++ struct iscsi_conn *conn = container_of(work, struct iscsi_conn,
++ recvwork);
++ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
++ struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
++ struct sock *sk = tcp_sw_conn->sock->sk;
++
++ lock_sock(sk);
++ iscsi_sw_tcp_recv_data(conn);
++ release_sock(sk);
++}
++
++static void iscsi_sw_tcp_data_ready(struct sock *sk)
++{
++ struct iscsi_sw_tcp_conn *tcp_sw_conn;
++ struct iscsi_tcp_conn *tcp_conn;
++ struct iscsi_conn *conn;
++
++ read_lock_bh(&sk->sk_callback_lock);
++ conn = sk->sk_user_data;
++ if (!conn) {
++ read_unlock_bh(&sk->sk_callback_lock);
++ return;
++ }
++ tcp_conn = conn->dd_data;
++ tcp_sw_conn = tcp_conn->dd_data;
++
++ if (tcp_sw_conn->queue_recv)
++ iscsi_conn_queue_recv(conn);
++ else
++ iscsi_sw_tcp_recv_data(conn);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
+
+@@ -205,7 +237,7 @@ static void iscsi_sw_tcp_write_space(struct sock *sk)
+ old_write_space(sk);
+
+ ISCSI_SW_TCP_DBG(conn, "iscsi_write_space\n");
+- iscsi_conn_queue_work(conn);
++ iscsi_conn_queue_xmit(conn);
+ }
+
+ static void iscsi_sw_tcp_conn_set_callbacks(struct iscsi_conn *conn)
+@@ -276,6 +308,9 @@ static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
+ if (segment->total_copied + segment->size < segment->total_size)
+ flags |= MSG_MORE;
+
++ if (tcp_sw_conn->queue_recv)
++ flags |= MSG_DONTWAIT;
++
+ /* Use sendpage if we can; else fall back to sendmsg */
+ if (!segment->data) {
+ sg = segment->sg;
+@@ -557,6 +592,10 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
+ conn = cls_conn->dd_data;
+ tcp_conn = conn->dd_data;
+ tcp_sw_conn = tcp_conn->dd_data;
++ INIT_WORK(&conn->recvwork, iscsi_sw_tcp_recv_data_work);
++ tcp_sw_conn->queue_recv = iscsi_recv_from_iscsi_q;
++
++ mutex_init(&tcp_sw_conn->sock_lock);
+
+ tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+@@ -592,11 +631,15 @@ free_conn:
+
+ static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
+ {
+- struct iscsi_session *session = conn->session;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
+ struct socket *sock = tcp_sw_conn->sock;
+
++ /*
++ * The iscsi transport class will make sure we are not called in
++ * parallel with start, stop, bind and destroys. However, this can be
++ * called twice if userspace does a stop then a destroy.
++ */
+ if (!sock)
+ return;
+
+@@ -610,9 +653,11 @@ static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
+ iscsi_sw_tcp_conn_restore_callbacks(conn);
+ sock_put(sock->sk);
+
+- spin_lock_bh(&session->frwd_lock);
++ iscsi_suspend_rx(conn);
++
++ mutex_lock(&tcp_sw_conn->sock_lock);
+ tcp_sw_conn->sock = NULL;
+- spin_unlock_bh(&session->frwd_lock);
++ mutex_unlock(&tcp_sw_conn->sock_lock);
+ sockfd_put(sock);
+ }
+
+@@ -664,7 +709,6 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
+ struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
+ int is_leading)
+ {
+- struct iscsi_session *session = cls_session->dd_data;
+ struct iscsi_conn *conn = cls_conn->dd_data;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
+@@ -684,10 +728,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
+ if (err)
+ goto free_socket;
+
+- spin_lock_bh(&session->frwd_lock);
++ mutex_lock(&tcp_sw_conn->sock_lock);
+ /* bind iSCSI connection and socket */
+ tcp_sw_conn->sock = sock;
+- spin_unlock_bh(&session->frwd_lock);
++ mutex_unlock(&tcp_sw_conn->sock_lock);
+
+ /* setup Socket parameters */
+ sk = sock->sk;
+@@ -723,9 +767,15 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
+ iscsi_set_param(cls_conn, param, buf, buflen);
+ break;
+ case ISCSI_PARAM_DATADGST_EN:
++ mutex_lock(&tcp_sw_conn->sock_lock);
++ if (!tcp_sw_conn->sock) {
++ mutex_unlock(&tcp_sw_conn->sock_lock);
++ return -ENOTCONN;
++ }
+ iscsi_set_param(cls_conn, param, buf, buflen);
+ tcp_sw_conn->sendpage = conn->datadgst_en ?
+ sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
++ mutex_unlock(&tcp_sw_conn->sock_lock);
+ break;
+ case ISCSI_PARAM_MAX_R2T:
+ return iscsi_tcp_set_max_r2t(conn, buf);
+@@ -740,8 +790,8 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
+ enum iscsi_param param, char *buf)
+ {
+ struct iscsi_conn *conn = cls_conn->dd_data;
+- struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+- struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
++ struct iscsi_sw_tcp_conn *tcp_sw_conn;
++ struct iscsi_tcp_conn *tcp_conn;
+ struct sockaddr_in6 addr;
+ struct socket *sock;
+ int rc;
+@@ -751,21 +801,36 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
+ case ISCSI_PARAM_CONN_ADDRESS:
+ case ISCSI_PARAM_LOCAL_PORT:
+ spin_lock_bh(&conn->session->frwd_lock);
+- if (!tcp_sw_conn || !tcp_sw_conn->sock) {
++ if (!conn->session->leadconn) {
+ spin_unlock_bh(&conn->session->frwd_lock);
+ return -ENOTCONN;
+ }
+- sock = tcp_sw_conn->sock;
+- sock_hold(sock->sk);
++ /*
++ * The conn has been setup and bound, so just grab a ref
++ * incase a destroy runs while we are in the net layer.
++ */
++ iscsi_get_conn(conn->cls_conn);
+ spin_unlock_bh(&conn->session->frwd_lock);
+
++ tcp_conn = conn->dd_data;
++ tcp_sw_conn = tcp_conn->dd_data;
++
++ mutex_lock(&tcp_sw_conn->sock_lock);
++ sock = tcp_sw_conn->sock;
++ if (!sock) {
++ rc = -ENOTCONN;
++ goto sock_unlock;
++ }
++
+ if (param == ISCSI_PARAM_LOCAL_PORT)
+ rc = kernel_getsockname(sock,
+ (struct sockaddr *)&addr);
+ else
+ rc = kernel_getpeername(sock,
+ (struct sockaddr *)&addr);
+- sock_put(sock->sk);
++sock_unlock:
++ mutex_unlock(&tcp_sw_conn->sock_lock);
++ iscsi_put_conn(conn->cls_conn);
+ if (rc < 0)
+ return rc;
+
+@@ -782,7 +847,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
+ enum iscsi_host_param param, char *buf)
+ {
+ struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost);
+- struct iscsi_session *session = tcp_sw_host->session;
++ struct iscsi_session *session;
+ struct iscsi_conn *conn;
+ struct iscsi_tcp_conn *tcp_conn;
+ struct iscsi_sw_tcp_conn *tcp_sw_conn;
+@@ -792,6 +857,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
+
+ switch (param) {
+ case ISCSI_HOST_PARAM_IPADDRESS:
++ session = tcp_sw_host->session;
+ if (!session)
+ return -ENOTCONN;
+
+@@ -803,17 +869,21 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
+ }
+ tcp_conn = conn->dd_data;
+ tcp_sw_conn = tcp_conn->dd_data;
+- sock = tcp_sw_conn->sock;
+- if (!sock) {
+- spin_unlock_bh(&session->frwd_lock);
+- return -ENOTCONN;
+- }
+- sock_hold(sock->sk);
++ /*
++ * The conn has been setup and bound, so just grab a ref
++ * incase a destroy runs while we are in the net layer.
++ */
++ iscsi_get_conn(conn->cls_conn);
+ spin_unlock_bh(&session->frwd_lock);
+
+- rc = kernel_getsockname(sock,
+- (struct sockaddr *)&addr);
+- sock_put(sock->sk);
++ mutex_lock(&tcp_sw_conn->sock_lock);
++ sock = tcp_sw_conn->sock;
++ if (!sock)
++ rc = -ENOTCONN;
++ else
++ rc = kernel_getsockname(sock, (struct sockaddr *)&addr);
++ mutex_unlock(&tcp_sw_conn->sock_lock);
++ iscsi_put_conn(conn->cls_conn);
+ if (rc < 0)
+ return rc;
+
+@@ -888,17 +958,19 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
+ if (!cls_session)
+ goto remove_host;
+ session = cls_session->dd_data;
+- tcp_sw_host = iscsi_host_priv(shost);
+- tcp_sw_host->session = session;
+
+ if (iscsi_tcp_r2tpool_alloc(session))
+ goto remove_session;
++
++ /* We are now fully setup so expose the session to sysfs. */
++ tcp_sw_host = iscsi_host_priv(shost);
++ tcp_sw_host->session = session;
+ return cls_session;
+
+ remove_session:
+ iscsi_session_teardown(cls_session);
+ remove_host:
+- iscsi_host_remove(shost);
++ iscsi_host_remove(shost, false);
+ free_host:
+ iscsi_host_free(shost);
+ return NULL;
+@@ -912,10 +984,17 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
+ if (WARN_ON_ONCE(session->leadconn))
+ return;
+
++ iscsi_session_remove(cls_session);
++ /*
++ * Our get_host_param needs to access the session, so remove the
++ * host from sysfs before freeing the session to make sure userspace
++ * is no longer accessing the callout.
++ */
++ iscsi_host_remove(shost, false);
++
+ iscsi_tcp_r2tpool_free(cls_session->dd_data);
+- iscsi_session_teardown(cls_session);
+
+- iscsi_host_remove(shost);
++ iscsi_session_free(cls_session);
+ iscsi_host_free(shost);
+ }
+
+diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h
+index 791453195099c..68e14a344904f 100644
+--- a/drivers/scsi/iscsi_tcp.h
++++ b/drivers/scsi/iscsi_tcp.h
+@@ -28,6 +28,11 @@ struct iscsi_sw_tcp_send {
+
+ struct iscsi_sw_tcp_conn {
+ struct socket *sock;
++ /* Taken when accessing the sock from the netlink/sysfs interface */
++ struct mutex sock_lock;
++
++ struct work_struct recvwork;
++ bool queue_recv;
+
+ struct iscsi_sw_tcp_send out;
+ /* old values for socket callbacks */
+diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
+index 841000445b9a1..aa223db4cf53c 100644
+--- a/drivers/scsi/libfc/fc_exch.c
++++ b/drivers/scsi/libfc/fc_exch.c
+@@ -1701,6 +1701,7 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp)
+ if (cancel_delayed_work_sync(&ep->timeout_work)) {
+ FC_EXCH_DBG(ep, "Exchange timer canceled due to ABTS response\n");
+ fc_exch_release(ep); /* release from pending timer hold */
++ return;
+ }
+
+ spin_lock_bh(&ep->ex_lock);
+diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
+index 5bc91d34df634..d422e8fd7137f 100644
+--- a/drivers/scsi/libiscsi.c
++++ b/drivers/scsi/libiscsi.c
+@@ -83,7 +83,7 @@ MODULE_PARM_DESC(debug_libiscsi_eh,
+ "%s " dbg_fmt, __func__, ##arg); \
+ } while (0);
+
+-inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
++inline void iscsi_conn_queue_xmit(struct iscsi_conn *conn)
+ {
+ struct Scsi_Host *shost = conn->session->host;
+ struct iscsi_host *ihost = shost_priv(shost);
+@@ -91,7 +91,17 @@ inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
+ if (ihost->workq)
+ queue_work(ihost->workq, &conn->xmitwork);
+ }
+-EXPORT_SYMBOL_GPL(iscsi_conn_queue_work);
++EXPORT_SYMBOL_GPL(iscsi_conn_queue_xmit);
++
++inline void iscsi_conn_queue_recv(struct iscsi_conn *conn)
++{
++ struct Scsi_Host *shost = conn->session->host;
++ struct iscsi_host *ihost = shost_priv(shost);
++
++ if (ihost->workq && !test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))
++ queue_work(ihost->workq, &conn->recvwork);
++}
++EXPORT_SYMBOL_GPL(iscsi_conn_queue_recv);
+
+ static void __iscsi_update_cmdsn(struct iscsi_session *session,
+ uint32_t exp_cmdsn, uint32_t max_cmdsn)
+@@ -678,7 +688,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ struct iscsi_task *task;
+ itt_t itt;
+
+- if (session->state == ISCSI_STATE_TERMINATE)
++ if (session->state == ISCSI_STATE_TERMINATE ||
++ !test_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags))
+ return NULL;
+
+ if (opcode == ISCSI_OP_LOGIN || opcode == ISCSI_OP_TEXT) {
+@@ -764,7 +775,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ goto free_task;
+ } else {
+ list_add_tail(&task->running, &conn->mgmtqueue);
+- iscsi_conn_queue_work(conn);
++ iscsi_conn_queue_xmit(conn);
+ }
+
+ return task;
+@@ -1392,8 +1403,8 @@ static bool iscsi_set_conn_failed(struct iscsi_conn *conn)
+ if (conn->stop_stage == 0)
+ session->state = ISCSI_STATE_FAILED;
+
+- set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+- set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
++ set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
++ set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags);
+ return true;
+ }
+
+@@ -1454,7 +1465,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task,
+ * Do this after dropping the extra ref because if this was a requeue
+ * it's removed from that list and cleanup_queued_task would miss it.
+ */
+- if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
++ if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) {
+ /*
+ * Save the task and ref in case we weren't cleaning up this
+ * task and get woken up again.
+@@ -1512,7 +1523,7 @@ void iscsi_requeue_task(struct iscsi_task *task)
+ */
+ iscsi_put_task(task);
+ }
+- iscsi_conn_queue_work(conn);
++ iscsi_conn_queue_xmit(conn);
+ spin_unlock_bh(&conn->session->frwd_lock);
+ }
+ EXPORT_SYMBOL_GPL(iscsi_requeue_task);
+@@ -1532,7 +1543,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
+ int rc = 0;
+
+ spin_lock_bh(&conn->session->frwd_lock);
+- if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
++ if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) {
+ ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n");
+ spin_unlock_bh(&conn->session->frwd_lock);
+ return -ENODATA;
+@@ -1746,7 +1757,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
+ goto fault;
+ }
+
+- if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
++ if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) {
+ reason = FAILURE_SESSION_IN_RECOVERY;
+ sc->result = DID_REQUEUE << 16;
+ goto fault;
+@@ -1781,7 +1792,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
+ }
+ } else {
+ list_add_tail(&task->running, &conn->cmdqueue);
+- iscsi_conn_queue_work(conn);
++ iscsi_conn_queue_xmit(conn);
+ }
+
+ session->queued_cmdsn++;
+@@ -1935,14 +1946,14 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error)
+ void iscsi_suspend_queue(struct iscsi_conn *conn)
+ {
+ spin_lock_bh(&conn->session->frwd_lock);
+- set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
++ set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
+ spin_unlock_bh(&conn->session->frwd_lock);
+ }
+ EXPORT_SYMBOL_GPL(iscsi_suspend_queue);
+
+ /**
+ * iscsi_suspend_tx - suspend iscsi_data_xmit
+- * @conn: iscsi conn tp stop processing IO on.
++ * @conn: iscsi conn to stop processing IO on.
+ *
+ * This function sets the suspend bit to prevent iscsi_data_xmit
+ * from sending new IO, and if work is queued on the xmit thread
+@@ -1953,17 +1964,32 @@ void iscsi_suspend_tx(struct iscsi_conn *conn)
+ struct Scsi_Host *shost = conn->session->host;
+ struct iscsi_host *ihost = shost_priv(shost);
+
+- set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
++ set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
+ if (ihost->workq)
+- flush_workqueue(ihost->workq);
++ flush_work(&conn->xmitwork);
+ }
+ EXPORT_SYMBOL_GPL(iscsi_suspend_tx);
+
+ static void iscsi_start_tx(struct iscsi_conn *conn)
+ {
+- clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+- iscsi_conn_queue_work(conn);
++ clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
++ iscsi_conn_queue_xmit(conn);
++}
++
++/**
++ * iscsi_suspend_rx - Prevent recvwork from running again.
++ * @conn: iscsi conn to stop.
++ */
++void iscsi_suspend_rx(struct iscsi_conn *conn)
++{
++ struct Scsi_Host *shost = conn->session->host;
++ struct iscsi_host *ihost = shost_priv(shost);
++
++ set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags);
++ if (ihost->workq)
++ flush_work(&conn->recvwork);
+ }
++EXPORT_SYMBOL_GPL(iscsi_suspend_rx);
+
+ /*
+ * We want to make sure a ping is in flight. It has timed out.
+@@ -2214,6 +2240,8 @@ void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active)
+ iscsi_suspend_tx(conn);
+
+ spin_lock_bh(&session->frwd_lock);
++ clear_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags);
++
+ if (!is_active) {
+ /*
+ * if logout timed out before userspace could even send a PDU
+@@ -2827,11 +2855,12 @@ static void iscsi_notify_host_removed(struct iscsi_cls_session *cls_session)
+ /**
+ * iscsi_host_remove - remove host and sessions
+ * @shost: scsi host
++ * @is_shutdown: true if called from a driver shutdown callout
+ *
+ * If there are any sessions left, this will initiate the removal and wait
+ * for the completion.
+ */
+-void iscsi_host_remove(struct Scsi_Host *shost)
++void iscsi_host_remove(struct Scsi_Host *shost, bool is_shutdown)
+ {
+ struct iscsi_host *ihost = shost_priv(shost);
+ unsigned long flags;
+@@ -2840,7 +2869,11 @@ void iscsi_host_remove(struct Scsi_Host *shost)
+ ihost->state = ISCSI_HOST_REMOVED;
+ spin_unlock_irqrestore(&ihost->lock, flags);
+
+- iscsi_host_for_each_session(shost, iscsi_notify_host_removed);
++ if (!is_shutdown)
++ iscsi_host_for_each_session(shost, iscsi_notify_host_removed);
++ else
++ iscsi_host_for_each_session(shost, iscsi_force_destroy_session);
++
+ wait_event_interruptible(ihost->session_removal_wq,
+ ihost->num_sessions == 0);
+ if (signal_pending(current))
+@@ -2991,17 +3024,32 @@ dec_session_count:
+ }
+ EXPORT_SYMBOL_GPL(iscsi_session_setup);
+
+-/**
+- * iscsi_session_teardown - destroy session, host, and cls_session
+- * @cls_session: iscsi session
++/*
++ * issi_session_remove - Remove session from iSCSI class.
+ */
+-void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
++void iscsi_session_remove(struct iscsi_cls_session *cls_session)
+ {
+ struct iscsi_session *session = cls_session->dd_data;
+- struct module *owner = cls_session->transport->owner;
+ struct Scsi_Host *shost = session->host;
+
+ iscsi_remove_session(cls_session);
++ /*
++ * host removal only has to wait for its children to be removed from
++ * sysfs, and iscsi_tcp needs to do iscsi_host_remove before freeing
++ * the session, so drop the session count here.
++ */
++ iscsi_host_dec_session_cnt(shost);
++}
++EXPORT_SYMBOL_GPL(iscsi_session_remove);
++
++/**
++ * iscsi_session_free - Free iscsi session and it's resources
++ * @cls_session: iscsi session
++ */
++void iscsi_session_free(struct iscsi_cls_session *cls_session)
++{
++ struct iscsi_session *session = cls_session->dd_data;
++ struct module *owner = cls_session->transport->owner;
+
+ iscsi_pool_free(&session->cmdpool);
+ kfree(session->password);
+@@ -3019,10 +3067,19 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
+ kfree(session->discovery_parent_type);
+
+ iscsi_free_session(cls_session);
+-
+- iscsi_host_dec_session_cnt(shost);
+ module_put(owner);
+ }
++EXPORT_SYMBOL_GPL(iscsi_session_free);
++
++/**
++ * iscsi_session_teardown - destroy session and cls_session
++ * @cls_session: iscsi session
++ */
++void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
++{
++ iscsi_session_remove(cls_session);
++ iscsi_session_free(cls_session);
++}
+ EXPORT_SYMBOL_GPL(iscsi_session_teardown);
+
+ /**
+@@ -3101,6 +3158,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
+ {
+ struct iscsi_conn *conn = cls_conn->dd_data;
+ struct iscsi_session *session = conn->session;
++ char *tmp_persistent_address = conn->persistent_address;
++ char *tmp_local_ipaddr = conn->local_ipaddr;
+
+ del_timer_sync(&conn->transport_timer);
+
+@@ -3122,8 +3181,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
+ spin_lock_bh(&session->frwd_lock);
+ free_pages((unsigned long) conn->data,
+ get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
+- kfree(conn->persistent_address);
+- kfree(conn->local_ipaddr);
+ /* regular RX path uses back_lock */
+ spin_lock_bh(&session->back_lock);
+ kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
+@@ -3135,6 +3192,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
+ mutex_unlock(&session->eh_mutex);
+
+ iscsi_destroy_conn(cls_conn);
++ kfree(tmp_persistent_address);
++ kfree(tmp_local_ipaddr);
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_teardown);
+
+@@ -3310,6 +3369,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
+ spin_lock_bh(&session->frwd_lock);
+ if (is_leading)
+ session->leadconn = conn;
++
++ set_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags);
+ spin_unlock_bh(&session->frwd_lock);
+
+ /*
+@@ -3322,8 +3383,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
+ /*
+ * Unblock xmitworker(), Login Phase will pass through.
+ */
+- clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+- clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
++ clear_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags);
++ clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_bind);
+diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
+index 2e9ffe3d1a55e..883005757ddb8 100644
+--- a/drivers/scsi/libiscsi_tcp.c
++++ b/drivers/scsi/libiscsi_tcp.c
+@@ -927,7 +927,7 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
+ */
+ conn->last_recv = jiffies;
+
+- if (unlikely(conn->suspend_rx)) {
++ if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) {
+ ISCSI_DBG_TCP(conn, "Rx suspended!\n");
+ *status = ISCSI_TCP_SUSPENDED;
+ return 0;
+diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
+index a315715b36227..7e0cde710fc3c 100644
+--- a/drivers/scsi/libsas/sas_ata.c
++++ b/drivers/scsi/libsas/sas_ata.c
+@@ -197,7 +197,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
+ task->total_xfer_len = qc->nbytes;
+ task->num_scatter = qc->n_elem;
+ task->data_dir = qc->dma_dir;
+- } else if (qc->tf.protocol == ATA_PROT_NODATA) {
++ } else if (!ata_is_data(qc->tf.protocol)) {
+ task->data_dir = DMA_NONE;
+ } else {
+ for_each_sg(qc->sg, sg, qc->n_elem, si)
+diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
+index c2150a8184237..9ae35631135d8 100644
+--- a/drivers/scsi/libsas/sas_expander.c
++++ b/drivers/scsi/libsas/sas_expander.c
+@@ -85,7 +85,7 @@ static int smp_execute_task_sg(struct domain_device *dev,
+ res = i->dft->lldd_execute_task(task, GFP_KERNEL);
+
+ if (res) {
+- del_timer(&task->slow_task->timer);
++ del_timer_sync(&task->slow_task->timer);
+ pr_notice("executing SMP task failed:%d\n", res);
+ break;
+ }
+diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
+index befeb7c342903..457ff86e02b30 100644
+--- a/drivers/scsi/lpfc/lpfc.h
++++ b/drivers/scsi/lpfc/lpfc.h
+@@ -593,6 +593,7 @@ struct lpfc_vport {
+ #define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */
+ #define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */
+ #define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/
++#define FC_PT2PT_NO_NVME 0x1000 /* Don't send NVME PRLI */
+ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */
+ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */
+ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */
+@@ -1022,12 +1023,12 @@ struct lpfc_hba {
+ #define HBA_DEVLOSS_TMO 0x2000 /* HBA in devloss timeout */
+ #define HBA_RRQ_ACTIVE 0x4000 /* process the rrq active list */
+ #define HBA_IOQ_FLUSH 0x8000 /* FCP/NVME I/O queues being flushed */
+-#define HBA_FW_DUMP_OP 0x10000 /* Skips fn reset before FW dump */
+ #define HBA_RECOVERABLE_UE 0x20000 /* Firmware supports recoverable UE */
+ #define HBA_FORCED_LINK_SPEED 0x40000 /*
+ * Firmware supports Forced Link Speed
+ * capability
+ */
++#define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */
+ #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */
+ #define HBA_CGN_RSVD1 0x200000 /* Reserved CGN flag */
+ #define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */
+@@ -1038,6 +1039,7 @@ struct lpfc_hba {
+ #define HBA_HBEAT_TMO 0x8000000 /* HBEAT initiated after timeout */
+ #define HBA_FLOGI_OUTSTANDING 0x10000000 /* FLOGI is outstanding */
+
++ struct completion *fw_dump_cmpl; /* cmpl event tracker for fw_dump */
+ uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
+ struct lpfc_dmabuf slim2p;
+
+@@ -1163,6 +1165,16 @@ struct lpfc_hba {
+ uint32_t cfg_hostmem_hgp;
+ uint32_t cfg_log_verbose;
+ uint32_t cfg_enable_fc4_type;
++#define LPFC_ENABLE_FCP 1
++#define LPFC_ENABLE_NVME 2
++#define LPFC_ENABLE_BOTH 3
++#if (IS_ENABLED(CONFIG_NVME_FC))
++#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_BOTH
++#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_BOTH
++#else
++#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_FCP
++#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_FCP
++#endif
+ uint32_t cfg_aer_support;
+ uint32_t cfg_sriov_nr_virtfn;
+ uint32_t cfg_request_firmware_upgrade;
+@@ -1184,9 +1196,6 @@ struct lpfc_hba {
+ uint32_t cfg_ras_fwlog_func;
+ uint32_t cfg_enable_bbcr; /* Enable BB Credit Recovery */
+ uint32_t cfg_enable_dpp; /* Enable Direct Packet Push */
+-#define LPFC_ENABLE_FCP 1
+-#define LPFC_ENABLE_NVME 2
+-#define LPFC_ENABLE_BOTH 3
+ uint32_t cfg_enable_pbde;
+ uint32_t cfg_enable_mi;
+ struct nvmet_fc_target_port *targetport;
+@@ -1549,10 +1558,7 @@ struct lpfc_hba {
+ u32 cgn_acqe_cnt;
+
+ /* RX monitor handling for CMF */
+- struct rxtable_entry *rxtable; /* RX_monitor information */
+- atomic_t rxtable_idx_head;
+-#define LPFC_RXMONITOR_TABLE_IN_USE (LPFC_MAX_RXMONITOR_ENTRY + 73)
+- atomic_t rxtable_idx_tail;
++ struct lpfc_rx_info_monitor *rx_monitor;
+ atomic_t rx_max_read_cnt; /* Maximum read bytes */
+ uint64_t rx_block_cnt;
+
+@@ -1601,7 +1607,8 @@ struct lpfc_hba {
+
+ #define LPFC_MAX_RXMONITOR_ENTRY 800
+ #define LPFC_MAX_RXMONITOR_DUMP 32
+-struct rxtable_entry {
++struct rx_info_entry {
++ uint64_t cmf_bytes; /* Total no of read bytes for CMF_SYNC_WQE */
+ uint64_t total_bytes; /* Total no of read bytes requested */
+ uint64_t rcv_bytes; /* Total no of read bytes completed */
+ uint64_t avg_io_size;
+@@ -1615,6 +1622,13 @@ struct rxtable_entry {
+ uint32_t timer_interval;
+ };
+
++struct lpfc_rx_info_monitor {
++ struct rx_info_entry *ring; /* info organized in a circular buffer */
++ u32 head_idx, tail_idx; /* index to head/tail of ring */
++ spinlock_t lock; /* spinlock for ring */
++ u32 entries; /* storing number entries/size of ring */
++};
++
+ static inline struct Scsi_Host *
+ lpfc_shost_from_vport(struct lpfc_vport *vport)
+ {
+diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
+index ebe417921dac0..9f3f7805f1f95 100644
+--- a/drivers/scsi/lpfc/lpfc_attr.c
++++ b/drivers/scsi/lpfc/lpfc_attr.c
+@@ -1315,6 +1315,9 @@ lpfc_issue_lip(struct Scsi_Host *shost)
+ pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK;
+ pmboxq->u.mb.mbxOwner = OWN_HOST;
+
++ if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME))
++ vport->fc_flag &= ~FC_PT2PT_NO_NVME;
++
+ mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2);
+
+ if ((mbxstatus == MBX_SUCCESS) &&
+@@ -1709,25 +1712,25 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
+ before_fc_flag = phba->pport->fc_flag;
+ sriov_nr_virtfn = phba->cfg_sriov_nr_virtfn;
+
+- /* Disable SR-IOV virtual functions if enabled */
+- if (phba->cfg_sriov_nr_virtfn) {
+- pci_disable_sriov(pdev);
+- phba->cfg_sriov_nr_virtfn = 0;
+- }
++ if (opcode == LPFC_FW_DUMP) {
++ init_completion(&online_compl);
++ phba->fw_dump_cmpl = &online_compl;
++ } else {
++ /* Disable SR-IOV virtual functions if enabled */
++ if (phba->cfg_sriov_nr_virtfn) {
++ pci_disable_sriov(pdev);
++ phba->cfg_sriov_nr_virtfn = 0;
++ }
+
+- if (opcode == LPFC_FW_DUMP)
+- phba->hba_flag |= HBA_FW_DUMP_OP;
++ status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
+
+- status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
++ if (status != 0)
++ return status;
+
+- if (status != 0) {
+- phba->hba_flag &= ~HBA_FW_DUMP_OP;
+- return status;
++ /* wait for the device to be quiesced before firmware reset */
++ msleep(100);
+ }
+
+- /* wait for the device to be quiesced before firmware reset */
+- msleep(100);
+-
+ reg_val = readl(phba->sli4_hba.conf_regs_memmap_p +
+ LPFC_CTL_PDEV_CTL_OFFSET);
+
+@@ -1756,24 +1759,42 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "3153 Fail to perform the requested "
+ "access: x%x\n", reg_val);
++ if (phba->fw_dump_cmpl)
++ phba->fw_dump_cmpl = NULL;
+ return rc;
+ }
+
+ /* keep the original port state */
+- if (before_fc_flag & FC_OFFLINE_MODE)
+- goto out;
+-
+- init_completion(&online_compl);
+- job_posted = lpfc_workq_post_event(phba, &status, &online_compl,
+- LPFC_EVT_ONLINE);
+- if (!job_posted)
++ if (before_fc_flag & FC_OFFLINE_MODE) {
++ if (phba->fw_dump_cmpl)
++ phba->fw_dump_cmpl = NULL;
+ goto out;
++ }
+
+- wait_for_completion(&online_compl);
++ /* Firmware dump will trigger an HA_ERATT event, and
++ * lpfc_handle_eratt_s4 routine already handles bringing the port back
++ * online.
++ */
++ if (opcode == LPFC_FW_DUMP) {
++ wait_for_completion(phba->fw_dump_cmpl);
++ } else {
++ init_completion(&online_compl);
++ job_posted = lpfc_workq_post_event(phba, &status, &online_compl,
++ LPFC_EVT_ONLINE);
++ if (!job_posted)
++ goto out;
+
++ wait_for_completion(&online_compl);
++ }
+ out:
+ /* in any case, restore the virtual functions enabled as before */
+ if (sriov_nr_virtfn) {
++ /* If fw_dump was performed, first disable to clean up */
++ if (opcode == LPFC_FW_DUMP) {
++ pci_disable_sriov(pdev);
++ phba->cfg_sriov_nr_virtfn = 0;
++ }
++
+ sriov_err =
+ lpfc_sli_probe_sriov_nr_virtfn(phba, sriov_nr_virtfn);
+ if (!sriov_err)
+@@ -3960,8 +3981,8 @@ LPFC_ATTR_R(nvmet_mrq_post,
+ * 3 - register both FCP and NVME
+ * Supported values are [1,3]. Default value is 3
+ */
+-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+- LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
++LPFC_ATTR_R(enable_fc4_type, LPFC_DEF_ENBL_FC4_TYPE,
++ LPFC_ENABLE_FCP, LPFC_MAX_ENBL_FC4_TYPE,
+ "Enable FC4 Protocol support - FCP / NVME");
+
+ /*
+diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
+index fdf08cb572071..ed827f198cb68 100644
+--- a/drivers/scsi/lpfc/lpfc_bsg.c
++++ b/drivers/scsi/lpfc/lpfc_bsg.c
+@@ -911,7 +911,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+ struct lpfc_iocbq *piocbq)
+ {
+ uint32_t evt_req_id = 0;
+- uint32_t cmd;
++ u16 cmd;
+ struct lpfc_dmabuf *dmabuf = NULL;
+ struct lpfc_bsg_event *evt;
+ struct event_data *evt_dat = NULL;
+@@ -936,7 +936,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+
+ ct_req = (struct lpfc_sli_ct_request *)bdeBuf1->virt;
+ evt_req_id = ct_req->FsType;
+- cmd = ct_req->CommandResponse.bits.CmdRsp;
++ cmd = be16_to_cpu(ct_req->CommandResponse.bits.CmdRsp);
+
+ spin_lock_irqsave(&phba->ct_ev_lock, flags);
+ list_for_each_entry(evt, &phba->ct_ev_waiters, node) {
+@@ -3243,8 +3243,8 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job)
+ ctreq->RevisionId.bits.InId = 0;
+ ctreq->FsType = SLI_CT_ELX_LOOPBACK;
+ ctreq->FsSubType = 0;
+- ctreq->CommandResponse.bits.CmdRsp = ELX_LOOPBACK_DATA;
+- ctreq->CommandResponse.bits.Size = size;
++ ctreq->CommandResponse.bits.CmdRsp = cpu_to_be16(ELX_LOOPBACK_DATA);
++ ctreq->CommandResponse.bits.Size = cpu_to_be16(size);
+ segment_offset = ELX_LOOPBACK_HEADER_SZ;
+ } else
+ segment_offset = 0;
+diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
+index c512f41991429..470239394e649 100644
+--- a/drivers/scsi/lpfc/lpfc_crtn.h
++++ b/drivers/scsi/lpfc/lpfc_crtn.h
+@@ -90,6 +90,14 @@ void lpfc_cgn_dump_rxmonitor(struct lpfc_hba *phba);
+ void lpfc_cgn_update_stat(struct lpfc_hba *phba, uint32_t dtag);
+ void lpfc_unblock_requests(struct lpfc_hba *phba);
+ void lpfc_block_requests(struct lpfc_hba *phba);
++int lpfc_rx_monitor_create_ring(struct lpfc_rx_info_monitor *rx_monitor,
++ u32 entries);
++void lpfc_rx_monitor_destroy_ring(struct lpfc_rx_info_monitor *rx_monitor);
++void lpfc_rx_monitor_record(struct lpfc_rx_info_monitor *rx_monitor,
++ struct rx_info_entry *entry);
++u32 lpfc_rx_monitor_report(struct lpfc_hba *phba,
++ struct lpfc_rx_info_monitor *rx_monitor, char *buf,
++ u32 buf_len, u32 max_read_entries);
+
+ void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+@@ -119,6 +127,8 @@ int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *,
+ struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did);
+ struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *);
+ int lpfc_nlp_put(struct lpfc_nodelist *);
++void lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp);
+ void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ struct lpfc_iocbq *rspiocb);
+ int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp);
+diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
+index bd6d459afce54..560b2504e674d 100644
+--- a/drivers/scsi/lpfc/lpfc_debugfs.c
++++ b/drivers/scsi/lpfc/lpfc_debugfs.c
+@@ -2157,10 +2157,13 @@ lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf,
+ char mybuf[64];
+ char *pbuf;
+ int i;
++ size_t bsize;
+
+ memset(mybuf, 0, sizeof(mybuf));
+
+- if (copy_from_user(mybuf, buf, nbytes))
++ bsize = min(nbytes, (sizeof(mybuf) - 1));
++
++ if (copy_from_user(mybuf, buf, bsize))
+ return -EFAULT;
+ pbuf = &mybuf[0];
+
+@@ -2181,7 +2184,7 @@ lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf,
+ qp->lock_conflict.wq_access = 0;
+ }
+ }
+- return nbytes;
++ return bsize;
+ }
+ #endif
+
+@@ -2607,8 +2610,8 @@ lpfc_debugfs_multixripools_write(struct file *file, const char __user *buf,
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_multixri_pool *multixri_pool;
+
+- if (nbytes > 64)
+- nbytes = 64;
++ if (nbytes > sizeof(mybuf) - 1)
++ nbytes = sizeof(mybuf) - 1;
+
+ memset(mybuf, 0, sizeof(mybuf));
+
+@@ -2688,8 +2691,8 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
+ if (!phba->targetport)
+ return -ENXIO;
+
+- if (nbytes > 64)
+- nbytes = 64;
++ if (nbytes > sizeof(mybuf) - 1)
++ nbytes = sizeof(mybuf) - 1;
+
+ memset(mybuf, 0, sizeof(mybuf));
+
+@@ -2826,8 +2829,8 @@ lpfc_debugfs_ioktime_write(struct file *file, const char __user *buf,
+ char mybuf[64];
+ char *pbuf;
+
+- if (nbytes > 64)
+- nbytes = 64;
++ if (nbytes > sizeof(mybuf) - 1)
++ nbytes = sizeof(mybuf) - 1;
+
+ memset(mybuf, 0, sizeof(mybuf));
+
+@@ -2954,8 +2957,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf,
+ char mybuf[64];
+ char *pbuf;
+
+- if (nbytes > 64)
+- nbytes = 64;
++ if (nbytes > sizeof(mybuf) - 1)
++ nbytes = sizeof(mybuf) - 1;
+
+ memset(mybuf, 0, sizeof(mybuf));
+
+@@ -3060,8 +3063,8 @@ lpfc_debugfs_hdwqstat_write(struct file *file, const char __user *buf,
+ char *pbuf;
+ int i;
+
+- if (nbytes > 64)
+- nbytes = 64;
++ if (nbytes > sizeof(mybuf) - 1)
++ nbytes = sizeof(mybuf) - 1;
+
+ memset(mybuf, 0, sizeof(mybuf));
+
+@@ -5520,7 +5523,7 @@ lpfc_rx_monitor_open(struct inode *inode, struct file *file)
+ if (!debug)
+ goto out;
+
+- debug->buffer = vmalloc(MAX_DEBUGFS_RX_TABLE_SIZE);
++ debug->buffer = vmalloc(MAX_DEBUGFS_RX_INFO_SIZE);
+ if (!debug->buffer) {
+ kfree(debug);
+ goto out;
+@@ -5541,55 +5544,18 @@ lpfc_rx_monitor_read(struct file *file, char __user *buf, size_t nbytes,
+ struct lpfc_rx_monitor_debug *debug = file->private_data;
+ struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+ char *buffer = debug->buffer;
+- struct rxtable_entry *entry;
+- int i, len = 0, head, tail, last, start;
+-
+- head = atomic_read(&phba->rxtable_idx_head);
+- while (head == LPFC_RXMONITOR_TABLE_IN_USE) {
+- /* Table is getting updated */
+- msleep(20);
+- head = atomic_read(&phba->rxtable_idx_head);
+- }
+
+- tail = atomic_xchg(&phba->rxtable_idx_tail, head);
+- if (!phba->rxtable || head == tail) {
+- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len,
+- "Rxtable is empty\n");
+- goto out;
+- }
+- last = (head > tail) ? head : LPFC_MAX_RXMONITOR_ENTRY;
+- start = tail;
+-
+- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len,
+- " MaxBPI\t Total Data Cmd Total Data Cmpl "
+- " Latency(us) Avg IO Size\tMax IO Size IO cnt "
+- "Info BWutil(ms)\n");
+-get_table:
+- for (i = start; i < last; i++) {
+- entry = &phba->rxtable[i];
+- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len,
+- "%3d:%12lld %12lld\t%12lld\t"
+- "%8lldus\t%8lld\t%10lld "
+- "%8d %2d %2d(%2d)\n",
+- i, entry->max_bytes_per_interval,
+- entry->total_bytes,
+- entry->rcv_bytes,
+- entry->avg_io_latency,
+- entry->avg_io_size,
+- entry->max_read_cnt,
+- entry->io_cnt,
+- entry->cmf_info,
+- entry->timer_utilization,
+- entry->timer_interval);
++ if (!phba->rx_monitor) {
++ scnprintf(buffer, MAX_DEBUGFS_RX_INFO_SIZE,
++ "Rx Monitor Info is empty.\n");
++ } else {
++ lpfc_rx_monitor_report(phba, phba->rx_monitor, buffer,
++ MAX_DEBUGFS_RX_INFO_SIZE,
++ LPFC_MAX_RXMONITOR_ENTRY);
+ }
+
+- if (head != last) {
+- start = 0;
+- last = head;
+- goto get_table;
+- }
+-out:
+- return simple_read_from_buffer(buf, nbytes, ppos, buffer, len);
++ return simple_read_from_buffer(buf, nbytes, ppos, buffer,
++ strlen(buffer));
+ }
+
+ static int
+diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
+index a5bf71b349720..f71e5b6311ac0 100644
+--- a/drivers/scsi/lpfc/lpfc_debugfs.h
++++ b/drivers/scsi/lpfc/lpfc_debugfs.h
+@@ -282,7 +282,7 @@ struct lpfc_idiag {
+ void *ptr_private;
+ };
+
+-#define MAX_DEBUGFS_RX_TABLE_SIZE (100 * LPFC_MAX_RXMONITOR_ENTRY)
++#define MAX_DEBUGFS_RX_INFO_SIZE (128 * LPFC_MAX_RXMONITOR_ENTRY)
+ struct lpfc_rx_monitor_debug {
+ char *i_private;
+ char *buffer;
+diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
+index 871b665bd72e3..37a4b79010bfc 100644
+--- a/drivers/scsi/lpfc/lpfc_disc.h
++++ b/drivers/scsi/lpfc/lpfc_disc.h
+@@ -85,6 +85,13 @@ enum lpfc_fc4_xpt_flags {
+ NLP_XPT_HAS_HH = 0x10
+ };
+
++enum lpfc_nlp_save_flags {
++ /* devloss occurred during recovery */
++ NLP_IN_RECOV_POST_DEV_LOSS = 0x1,
++ /* wait for outstanding LOGO to cmpl */
++ NLP_WAIT_FOR_LOGO = 0x2,
++};
++
+ struct lpfc_nodelist {
+ struct list_head nlp_listp;
+ struct serv_parm fc_sparam; /* buffer for service params */
+@@ -144,8 +151,9 @@ struct lpfc_nodelist {
+ unsigned long *active_rrqs_xri_bitmap;
+ struct lpfc_scsicmd_bkt *lat_data; /* Latency data */
+ uint32_t fc4_prli_sent;
+- u32 upcall_flags;
+-#define NLP_WAIT_FOR_LOGO 0x2
++
++ /* flags to keep ndlp alive until special conditions are met */
++ enum lpfc_nlp_save_flags save_flags;
+
+ enum lpfc_fc4_xpt_flags fc4_xpt_flags;
+
+diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
+index 052c0e5b11195..5f44a0763f37d 100644
+--- a/drivers/scsi/lpfc/lpfc_els.c
++++ b/drivers/scsi/lpfc/lpfc_els.c
+@@ -1059,9 +1059,10 @@ stop_rr_fcf_flogi:
+
+ lpfc_printf_vlog(vport, KERN_WARNING, LOG_TRACE_EVENT,
+ "0150 FLOGI failure Status:x%x/x%x "
+- "xri x%x TMO:x%x\n",
++ "xri x%x TMO:x%x refcnt %d\n",
+ irsp->ulpStatus, irsp->un.ulpWord[4],
+- cmdiocb->sli4_xritag, irsp->ulpTimeout);
++ cmdiocb->sli4_xritag, irsp->ulpTimeout,
++ kref_read(&ndlp->kref));
+
+ /* If this is not a loop open failure, bail out */
+ if (!(irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
+@@ -1071,7 +1072,8 @@ stop_rr_fcf_flogi:
+
+ /* FLOGI failed, so there is no fabric */
+ spin_lock_irq(shost->host_lock);
+- vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP |
++ FC_PT2PT_NO_NVME);
+ spin_unlock_irq(shost->host_lock);
+
+ /* If private loop, then allow max outstanding els to be
+@@ -1122,12 +1124,12 @@ stop_rr_fcf_flogi:
+ /* FLOGI completes successfully */
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+ "0101 FLOGI completes successfully, I/O tag:x%x, "
+- "xri x%x Data: x%x x%x x%x x%x x%x x%x x%x\n",
++ "xri x%x Data: x%x x%x x%x x%x x%x x%x x%x %d\n",
+ cmdiocb->iotag, cmdiocb->sli4_xritag,
+ irsp->un.ulpWord[4], sp->cmn.e_d_tov,
+ sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution,
+ vport->port_state, vport->fc_flag,
+- sp->cmn.priority_tagging);
++ sp->cmn.priority_tagging, kref_read(&ndlp->kref));
+
+ if (sp->cmn.priority_tagging)
+ vport->vmid_flag |= LPFC_VMID_ISSUE_QFPA;
+@@ -1205,8 +1207,6 @@ flogifail:
+ phba->fcf.fcf_flag &= ~FCF_DISCOVERY;
+ spin_unlock_irq(&phba->hbalock);
+
+- if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD)))
+- lpfc_nlp_put(ndlp);
+ if (!lpfc_error_lost_link(irsp)) {
+ /* FLOGI failed, so just use loop map to make discovery list */
+ lpfc_disc_list_loopmap(vport);
+@@ -2899,9 +2899,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ irsp = &(rspiocb->iocb);
+ spin_lock_irq(&ndlp->lock);
+ ndlp->nlp_flag &= ~NLP_LOGO_SND;
+- if (ndlp->upcall_flags & NLP_WAIT_FOR_LOGO) {
++ if (ndlp->save_flags & NLP_WAIT_FOR_LOGO) {
+ wake_up_waiter = 1;
+- ndlp->upcall_flags &= ~NLP_WAIT_FOR_LOGO;
++ ndlp->save_flags &= ~NLP_WAIT_FOR_LOGO;
+ }
+ spin_unlock_irq(&ndlp->lock);
+
+@@ -2955,18 +2955,10 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ spin_unlock_irq(&ndlp->lock);
+ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_DEVICE_RM);
+- lpfc_els_free_iocb(phba, cmdiocb);
+- lpfc_nlp_put(ndlp);
+-
+- /* Presume the node was released. */
+- return;
++ goto out_rsrc_free;
+ }
+
+ out:
+- /* Driver is done with the IO. */
+- lpfc_els_free_iocb(phba, cmdiocb);
+- lpfc_nlp_put(ndlp);
+-
+ /* At this point, the LOGO processing is complete. NOTE: For a
+ * pt2pt topology, we are assuming the NPortID will only change
+ * on link up processing. For a LOGO / PLOGI initiated by the
+@@ -2993,6 +2985,10 @@ out:
+ ndlp->nlp_DID, irsp->ulpStatus,
+ irsp->un.ulpWord[4], irsp->ulpTimeout,
+ vport->num_disc_nodes);
++
++ lpfc_els_free_iocb(phba, cmdiocb);
++ lpfc_nlp_put(ndlp);
++
+ lpfc_disc_start(vport);
+ return;
+ }
+@@ -3009,6 +3005,10 @@ out:
+ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_DEVICE_RM);
+ }
++out_rsrc_free:
++ /* Driver is done with the I/O. */
++ lpfc_els_free_iocb(phba, cmdiocb);
++ lpfc_nlp_put(ndlp);
+ }
+
+ /**
+@@ -3532,11 +3532,6 @@ lpfc_issue_els_rscn(struct lpfc_vport *vport, uint8_t retry)
+ return 1;
+ }
+
+- /* This will cause the callback-function lpfc_cmpl_els_cmd to
+- * trigger the release of node.
+- */
+- if (!(vport->fc_flag & FC_PT2PT))
+- lpfc_nlp_put(ndlp);
+ return 0;
+ }
+
+@@ -3782,9 +3777,6 @@ lpfc_least_capable_settings(struct lpfc_hba *phba,
+ {
+ u32 rsp_sig_cap = 0, drv_sig_cap = 0;
+ u32 rsp_sig_freq_cyc = 0, rsp_sig_freq_scale = 0;
+- struct lpfc_cgn_info *cp;
+- u32 crc;
+- u16 sig_freq;
+
+ /* Get rsp signal and frequency capabilities. */
+ rsp_sig_cap = be32_to_cpu(pcgd->xmt_signal_capability);
+@@ -3840,25 +3832,7 @@ lpfc_least_capable_settings(struct lpfc_hba *phba,
+ }
+ }
+
+- if (!phba->cgn_i)
+- return;
+-
+- /* Update signal frequency in congestion info buffer */
+- cp = (struct lpfc_cgn_info *)phba->cgn_i->virt;
+-
+- /* Frequency (in ms) Signal Warning/Signal Congestion Notifications
+- * are received by the HBA
+- */
+- sig_freq = phba->cgn_sig_freq;
+-
+- if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY)
+- cp->cgn_warn_freq = cpu_to_le16(sig_freq);
+- if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+- cp->cgn_alarm_freq = cpu_to_le16(sig_freq);
+- cp->cgn_warn_freq = cpu_to_le16(sig_freq);
+- }
+- crc = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED);
+- cp->cgn_info_crc = cpu_to_le32(crc);
++ /* We are NOT recording signal frequency in congestion info buffer */
+ return;
+
+ out_no_support:
+@@ -4593,6 +4567,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ /* Added for Vendor specifc support
+ * Just keep retrying for these Rsn / Exp codes
+ */
++ if ((vport->fc_flag & FC_PT2PT) &&
++ cmd == ELS_CMD_NVMEPRLI) {
++ switch (stat.un.b.lsRjtRsnCode) {
++ case LSRJT_UNABLE_TPC:
++ case LSRJT_INVALID_CMD:
++ case LSRJT_LOGICAL_ERR:
++ case LSRJT_CMD_UNSUPPORTED:
++ lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
++ "0168 NVME PRLI LS_RJT "
++ "reason %x port doesn't "
++ "support NVME, disabling NVME\n",
++ stat.un.b.lsRjtRsnCode);
++ retry = 0;
++ vport->fc_flag |= FC_PT2PT_NO_NVME;
++ goto out_retry;
++ }
++ }
+ switch (stat.un.b.lsRjtRsnCode) {
+ case LSRJT_UNABLE_TPC:
+ /* The driver has a VALID PLOGI but the rport has
+@@ -5076,14 +5067,9 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ /* NPort Recovery mode or node is just allocated */
+ if (!lpfc_nlp_not_used(ndlp)) {
+ /* A LOGO is completing and the node is in NPR state.
+- * If this a fabric node that cleared its transport
+- * registration, release the rpi.
++ * Just unregister the RPI because the node is still
++ * required.
+ */
+- spin_lock_irq(&ndlp->lock);
+- ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+- if (phba->sli_rev == LPFC_SLI_REV4)
+- ndlp->nlp_flag |= NLP_RELEASE_RPI;
+- spin_unlock_irq(&ndlp->lock);
+ lpfc_unreg_rpi(vport, ndlp);
+ } else {
+ /* Indicate the node has already released, should
+@@ -6216,6 +6202,7 @@ lpfc_els_disc_adisc(struct lpfc_vport *vport)
+ * from backend
+ */
+ lpfc_nlp_unreg_node(vport, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+ continue;
+ }
+
+@@ -6882,6 +6869,7 @@ static int
+ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context)
+ {
+ LPFC_MBOXQ_t *mbox = NULL;
++ struct lpfc_dmabuf *mp;
+ int rc;
+
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+@@ -6897,8 +6885,11 @@ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context)
+ mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0;
+ mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
+- if (rc == MBX_NOT_FINISHED)
++ if (rc == MBX_NOT_FINISHED) {
++ mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ goto issue_mbox_fail;
++ }
+
+ return 0;
+
+@@ -9572,11 +9563,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv)
+ /* Take action here for an Alarm event */
+ if (phba->cmf_active_mode != LPFC_CFG_OFF) {
+ if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM) {
+- /* Track of alarm cnt for cgn_info */
+- atomic_inc(&phba->cgn_fabric_alarm_cnt);
+ /* Track of alarm cnt for SYNC_WQE */
+ atomic_inc(&phba->cgn_sync_alarm_cnt);
+ }
++ /* Track alarm cnt for cgn_info regardless
++ * of whether CMF is configured for Signals
++ * or FPINs.
++ */
++ atomic_inc(&phba->cgn_fabric_alarm_cnt);
+ goto cleanup;
+ }
+ break;
+@@ -9584,11 +9578,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv)
+ /* Take action here for a Warning event */
+ if (phba->cmf_active_mode != LPFC_CFG_OFF) {
+ if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN) {
+- /* Track of warning cnt for cgn_info */
+- atomic_inc(&phba->cgn_fabric_warn_cnt);
+ /* Track of warning cnt for SYNC_WQE */
+ atomic_inc(&phba->cgn_sync_warn_cnt);
+ }
++ /* Track warning cnt and freq for cgn_info
++ * regardless of whether CMF is configured for
++ * Signals or FPINs.
++ */
++ atomic_inc(&phba->cgn_fabric_warn_cnt);
+ cleanup:
+ /* Save frequency in ms */
+ phba->cgn_fpin_frequency =
+@@ -9597,14 +9594,10 @@ cleanup:
+ if (phba->cgn_i) {
+ cp = (struct lpfc_cgn_info *)
+ phba->cgn_i->virt;
+- if (phba->cgn_reg_fpin &
+- LPFC_CGN_FPIN_ALARM)
+- cp->cgn_alarm_freq =
+- cpu_to_le16(value);
+- if (phba->cgn_reg_fpin &
+- LPFC_CGN_FPIN_WARN)
+- cp->cgn_warn_freq =
+- cpu_to_le16(value);
++ cp->cgn_alarm_freq =
++ cpu_to_le16(value);
++ cp->cgn_warn_freq =
++ cpu_to_le16(value);
+ crc = lpfc_cgn_calc_crc32
+ (cp,
+ LPFC_CGN_INFO_SZ,
+@@ -10713,6 +10706,9 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ irsp->ulpStatus, irsp->un.ulpWord[4]);
+ goto fdisc_failed;
+ }
++
++ lpfc_check_nlp_post_devloss(vport, ndlp);
++
+ spin_lock_irq(shost->host_lock);
+ vport->fc_flag &= ~FC_VPORT_CVL_RCVD;
+ vport->fc_flag &= ~FC_VPORT_LOGO_RCVD;
+diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
+index 7195ca0275f93..4bb0a15cfcc01 100644
+--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
++++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
+@@ -209,7 +209,12 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
+
+ spin_lock_irqsave(&ndlp->lock, iflags);
+ ndlp->nlp_flag |= NLP_IN_DEV_LOSS;
+- ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
++
++ /* If there is a PLOGI in progress, and we are in a
++ * NLP_NPR_2B_DISC state, don't turn off the flag.
++ */
++ if (ndlp->nlp_state != NLP_STE_PLOGI_ISSUE)
++ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+
+ /*
+ * The backend does not expect any more calls associated with this
+@@ -340,6 +345,37 @@ static void lpfc_check_inactive_vmid(struct lpfc_hba *phba)
+ lpfc_destroy_vport_work_array(phba, vports);
+ }
+
++/**
++ * lpfc_check_nlp_post_devloss - Check to restore ndlp refcnt after devloss
++ * @vport: Pointer to vport object.
++ * @ndlp: Pointer to remote node object.
++ *
++ * If NLP_IN_RECOV_POST_DEV_LOSS flag was set due to outstanding recovery of
++ * node during dev_loss_tmo processing, then this function restores the nlp_put
++ * kref decrement from lpfc_dev_loss_tmo_handler.
++ **/
++void
++lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp)
++{
++ unsigned long iflags;
++
++ spin_lock_irqsave(&ndlp->lock, iflags);
++ if (ndlp->save_flags & NLP_IN_RECOV_POST_DEV_LOSS) {
++ ndlp->save_flags &= ~NLP_IN_RECOV_POST_DEV_LOSS;
++ spin_unlock_irqrestore(&ndlp->lock, iflags);
++ lpfc_nlp_get(ndlp);
++ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE,
++ "8438 Devloss timeout reversed on DID x%x "
++ "refcnt %d ndlp %p flag x%x "
++ "port_state = x%x\n",
++ ndlp->nlp_DID, kref_read(&ndlp->kref), ndlp,
++ ndlp->nlp_flag, vport->port_state);
++ spin_lock_irqsave(&ndlp->lock, iflags);
++ }
++ spin_unlock_irqrestore(&ndlp->lock, iflags);
++}
++
+ /**
+ * lpfc_dev_loss_tmo_handler - Remote node devloss timeout handler
+ * @ndlp: Pointer to remote node object.
+@@ -358,6 +394,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
+ uint8_t *name;
+ int warn_on = 0;
+ int fcf_inuse = 0;
++ bool recovering = false;
++ struct fc_vport *fc_vport = NULL;
+ unsigned long iflags;
+
+ vport = ndlp->vport;
+@@ -394,6 +432,64 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
+
+ /* Fabric nodes are done. */
+ if (ndlp->nlp_type & NLP_FABRIC) {
++ spin_lock_irqsave(&ndlp->lock, iflags);
++ /* In massive vport configuration settings, it's possible
++ * dev_loss_tmo fired during node recovery. So, check if
++ * fabric nodes are in discovery states outstanding.
++ */
++ switch (ndlp->nlp_DID) {
++ case Fabric_DID:
++ fc_vport = vport->fc_vport;
++ if (fc_vport &&
++ fc_vport->vport_state == FC_VPORT_INITIALIZING)
++ recovering = true;
++ break;
++ case Fabric_Cntl_DID:
++ if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND)
++ recovering = true;
++ break;
++ case FDMI_DID:
++ fallthrough;
++ case NameServer_DID:
++ if (ndlp->nlp_state >= NLP_STE_PLOGI_ISSUE &&
++ ndlp->nlp_state <= NLP_STE_REG_LOGIN_ISSUE)
++ recovering = true;
++ break;
++ }
++ spin_unlock_irqrestore(&ndlp->lock, iflags);
++
++ /* Mark an NLP_IN_RECOV_POST_DEV_LOSS flag to know if reversing
++ * the following lpfc_nlp_put is necessary after fabric node is
++ * recovered.
++ */
++ if (recovering) {
++ lpfc_printf_vlog(vport, KERN_INFO,
++ LOG_DISCOVERY | LOG_NODE,
++ "8436 Devloss timeout marked on "
++ "DID x%x refcnt %d ndlp %p "
++ "flag x%x port_state = x%x\n",
++ ndlp->nlp_DID, kref_read(&ndlp->kref),
++ ndlp, ndlp->nlp_flag,
++ vport->port_state);
++ spin_lock_irqsave(&ndlp->lock, iflags);
++ ndlp->save_flags |= NLP_IN_RECOV_POST_DEV_LOSS;
++ spin_unlock_irqrestore(&ndlp->lock, iflags);
++ } else if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
++ /* Fabric node fully recovered before this dev_loss_tmo
++ * queue work is processed. Thus, ignore the
++ * dev_loss_tmo event.
++ */
++ lpfc_printf_vlog(vport, KERN_INFO,
++ LOG_DISCOVERY | LOG_NODE,
++ "8437 Devloss timeout ignored on "
++ "DID x%x refcnt %d ndlp %p "
++ "flag x%x port_state = x%x\n",
++ ndlp->nlp_DID, kref_read(&ndlp->kref),
++ ndlp, ndlp->nlp_flag,
++ vport->port_state);
++ return fcf_inuse;
++ }
++
+ lpfc_nlp_put(ndlp);
+ return fcf_inuse;
+ }
+@@ -423,6 +519,14 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
+ ndlp->nlp_state, ndlp->nlp_rpi);
+ }
+
++ /* If we are devloss, but we are in the process of rediscovering the
++ * ndlp, don't issue a NLP_EVT_DEVICE_RM event.
++ */
++ if (ndlp->nlp_state >= NLP_STE_PLOGI_ISSUE &&
++ ndlp->nlp_state <= NLP_STE_PRLI_ISSUE) {
++ return fcf_inuse;
++ }
++
+ if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD))
+ lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
+
+@@ -765,10 +869,16 @@ lpfc_work_done(struct lpfc_hba *phba)
+ if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
+ lpfc_sli4_post_async_mbox(phba);
+
+- if (ha_copy & HA_ERATT)
++ if (ha_copy & HA_ERATT) {
+ /* Handle the error attention event */
+ lpfc_handle_eratt(phba);
+
++ if (phba->fw_dump_cmpl) {
++ complete(phba->fw_dump_cmpl);
++ phba->fw_dump_cmpl = NULL;
++ }
++ }
++
+ if (ha_copy & HA_MBATT)
+ lpfc_sli_handle_mb_event(phba);
+
+@@ -4351,6 +4461,8 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ goto out;
+ }
+
++ lpfc_check_nlp_post_devloss(vport, ndlp);
++
+ if (phba->sli_rev < LPFC_SLI_REV4)
+ ndlp->nlp_rpi = mb->un.varWords[0];
+
+@@ -4360,6 +4472,7 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ ndlp->nlp_state);
+
+ ndlp->nlp_flag |= NLP_RPI_REGISTERED;
++ ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
+ ndlp->nlp_type |= NLP_FABRIC;
+ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+
+@@ -4449,8 +4562,9 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ fc_remote_port_rolechg(rport, rport_ids.roles);
+
+ lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+- "3183 %s rport x%px DID x%x, role x%x\n",
+- __func__, rport, rport->port_id, rport->roles);
++ "3183 %s rport x%px DID x%x, role x%x refcnt %d\n",
++ __func__, rport, rport->port_id, rport->roles,
++ kref_read(&ndlp->kref));
+
+ if ((rport->scsi_target_id != -1) &&
+ (rport->scsi_target_id < LPFC_MAX_TARGET)) {
+@@ -4475,8 +4589,9 @@ lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
+ "3184 rport unregister x%06x, rport x%px "
+- "xptflg x%x\n",
+- ndlp->nlp_DID, rport, ndlp->fc4_xpt_flags);
++ "xptflg x%x refcnt %d\n",
++ ndlp->nlp_DID, rport, ndlp->fc4_xpt_flags,
++ kref_read(&ndlp->kref));
+
+ fc_remote_port_delete(rport);
+ lpfc_nlp_put(ndlp);
+@@ -4525,9 +4640,10 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
+ void
+ lpfc_nlp_reg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
+-
+ unsigned long iflags;
+
++ lpfc_check_nlp_post_devloss(vport, ndlp);
++
+ spin_lock_irqsave(&ndlp->lock, iflags);
+ if (ndlp->fc4_xpt_flags & NLP_XPT_REGD) {
+ /* Already registered with backend, trigger rescan */
+@@ -5233,6 +5349,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+
+ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
+ if (rc == MBX_NOT_FINISHED) {
++ ndlp->nlp_flag &= ~NLP_UNREG_INP;
+ mempool_free(mbox, phba->mbox_mem_pool);
+ acc_plogi = 1;
+ }
+diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
+index 7359505e60419..824fc8c08840b 100644
+--- a/drivers/scsi/lpfc/lpfc_hw4.h
++++ b/drivers/scsi/lpfc/lpfc_hw4.h
+@@ -4448,6 +4448,9 @@ struct wqe_common {
+ #define wqe_sup_SHIFT 6
+ #define wqe_sup_MASK 0x00000001
+ #define wqe_sup_WORD word11
++#define wqe_ffrq_SHIFT 6
++#define wqe_ffrq_MASK 0x00000001
++#define wqe_ffrq_WORD word11
+ #define wqe_wqec_SHIFT 7
+ #define wqe_wqec_MASK 0x00000001
+ #define wqe_wqec_WORD word11
+diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
+index 195169badb372..2ca4cf1b58c4f 100644
+--- a/drivers/scsi/lpfc/lpfc_init.c
++++ b/drivers/scsi/lpfc/lpfc_init.c
+@@ -1606,6 +1606,11 @@ void
+ lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
+ {
+ spin_lock_irq(&phba->hbalock);
++ if (phba->link_state == LPFC_HBA_ERROR &&
++ phba->hba_flag & HBA_PCI_ERR) {
++ spin_unlock_irq(&phba->hbalock);
++ return;
++ }
+ phba->link_state = LPFC_HBA_ERROR;
+ spin_unlock_irq(&phba->hbalock);
+
+@@ -1945,7 +1950,6 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
+ if (pci_channel_offline(phba->pcidev)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+ "3166 pci channel is offline\n");
+- lpfc_sli4_offline_eratt(phba);
+ return;
+ }
+
+@@ -2055,7 +2059,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
+ }
+ if (reg_err1 == SLIPORT_ERR1_REG_ERR_CODE_2 &&
+ reg_err2 == SLIPORT_ERR2_REG_FW_RESTART) {
+- lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "3143 Port Down: Firmware Update "
+ "Detected\n");
+ en_rn_msg = false;
+@@ -3643,6 +3647,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
+ struct lpfc_vport **vports;
+ struct Scsi_Host *shost;
+ int i;
++ int offline = 0;
+
+ if (vport->fc_flag & FC_OFFLINE_MODE)
+ return;
+@@ -3651,6 +3656,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
+
+ lpfc_linkdown(phba);
+
++ offline = pci_channel_offline(phba->pcidev);
++
+ /* Issue an unreg_login to all nodes on all vports */
+ vports = lpfc_create_vport_work_array(phba);
+ if (vports != NULL) {
+@@ -3673,7 +3680,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
+ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+ spin_unlock_irq(&ndlp->lock);
+
+- lpfc_unreg_rpi(vports[i], ndlp);
++ if (offline) {
++ spin_lock_irq(&ndlp->lock);
++ ndlp->nlp_flag &= ~(NLP_UNREG_INP |
++ NLP_RPI_REGISTERED);
++ spin_unlock_irq(&ndlp->lock);
++ } else {
++ lpfc_unreg_rpi(vports[i], ndlp);
++ }
+ /*
+ * Whenever an SLI4 port goes offline, free the
+ * RPI. Get a new RPI when the adapter port
+@@ -3694,12 +3708,16 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
+ lpfc_disc_state_machine(vports[i], ndlp,
+ NULL, NLP_EVT_DEVICE_RECOVERY);
+
+- /* Don't remove the node unless the
++ /* Don't remove the node unless the node
+ * has been unregistered with the
+- * transport. If so, let dev_loss
+- * take care of the node.
++ * transport, and we're not in recovery
++ * before dev_loss_tmo triggered.
++ * Otherwise, let dev_loss take care of
++ * the node.
+ */
+- if (!(ndlp->fc4_xpt_flags &
++ if (!(ndlp->save_flags &
++ NLP_IN_RECOV_POST_DEV_LOSS) &&
++ !(ndlp->fc4_xpt_flags &
+ (NVME_XPT_REGD | SCSI_XPT_REGD)))
+ lpfc_disc_state_machine
+ (vports[i], ndlp,
+@@ -4649,7 +4667,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
+ rc = lpfc_vmid_res_alloc(phba, vport);
+
+ if (rc)
+- goto out;
++ goto out_put_shost;
+
+ /* Initialize all internally managed lists. */
+ INIT_LIST_HEAD(&vport->fc_nodes);
+@@ -4667,16 +4685,17 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
+
+ error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
+ if (error)
+- goto out_put_shost;
++ goto out_free_vmid;
+
+ spin_lock_irq(&phba->port_list_lock);
+ list_add_tail(&vport->listentry, &phba->port_list);
+ spin_unlock_irq(&phba->port_list_lock);
+ return vport;
+
+-out_put_shost:
++out_free_vmid:
+ kfree(vport->vmid);
+ bitmap_free(vport->vmid_priority_range);
++out_put_shost:
+ scsi_host_put(shost);
+ out:
+ return NULL;
+@@ -5310,8 +5329,10 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
+ */
+ if (!(phba->hba_flag & HBA_FCOE_MODE)) {
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+- if (rc == MBX_NOT_FINISHED)
++ if (rc == MBX_NOT_FINISHED) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ goto out_free_dmabuf;
++ }
+ return;
+ }
+ /*
+@@ -5407,38 +5428,12 @@ lpfc_async_link_speed_to_read_top(struct lpfc_hba *phba, uint8_t speed_code)
+ void
+ lpfc_cgn_dump_rxmonitor(struct lpfc_hba *phba)
+ {
+- struct rxtable_entry *entry;
+- int cnt = 0, head, tail, last, start;
+-
+- head = atomic_read(&phba->rxtable_idx_head);
+- tail = atomic_read(&phba->rxtable_idx_tail);
+- if (!phba->rxtable || head == tail) {
+- lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,
+- "4411 Rxtable is empty\n");
+- return;
+- }
+- last = tail;
+- start = head;
+-
+- /* Display the last LPFC_MAX_RXMONITOR_DUMP entries from the rxtable */
+- while (start != last) {
+- if (start)
+- start--;
+- else
+- start = LPFC_MAX_RXMONITOR_ENTRY - 1;
+- entry = &phba->rxtable[start];
++ if (!phba->rx_monitor) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+- "4410 %02d: MBPI %lld Xmit %lld Cmpl %lld "
+- "Lat %lld ASz %lld Info %02d BWUtil %d "
+- "Int %d slot %d\n",
+- cnt, entry->max_bytes_per_interval,
+- entry->total_bytes, entry->rcv_bytes,
+- entry->avg_io_latency, entry->avg_io_size,
+- entry->cmf_info, entry->timer_utilization,
+- entry->timer_interval, start);
+- cnt++;
+- if (cnt >= LPFC_MAX_RXMONITOR_DUMP)
+- return;
++ "4411 Rx Monitor Info is empty.\n");
++ } else {
++ lpfc_rx_monitor_report(phba, phba->rx_monitor, NULL, 0,
++ LPFC_MAX_RXMONITOR_DUMP);
+ }
+ }
+
+@@ -5789,21 +5784,8 @@ lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba)
+
+ /* Use the frequency found in the last rcv'ed FPIN */
+ value = phba->cgn_fpin_frequency;
+- if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN)
+- cp->cgn_warn_freq = cpu_to_le16(value);
+- if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM)
+- cp->cgn_alarm_freq = cpu_to_le16(value);
+-
+- /* Frequency (in ms) Signal Warning/Signal Congestion Notifications
+- * are received by the HBA
+- */
+- value = phba->cgn_sig_freq;
+-
+- if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY ||
+- phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM)
+- cp->cgn_warn_freq = cpu_to_le16(value);
+- if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM)
+- cp->cgn_alarm_freq = cpu_to_le16(value);
++ cp->cgn_warn_freq = cpu_to_le16(value);
++ cp->cgn_alarm_freq = cpu_to_le16(value);
+
+ lvalue = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ,
+ LPFC_CGN_CRC32_SEED);
+@@ -5858,11 +5840,10 @@ lpfc_cmf_timer(struct hrtimer *timer)
+ {
+ struct lpfc_hba *phba = container_of(timer, struct lpfc_hba,
+ cmf_timer);
+- struct rxtable_entry *entry;
++ struct rx_info_entry entry;
+ uint32_t io_cnt;
+- uint32_t head, tail;
+ uint32_t busy, max_read;
+- uint64_t total, rcv, lat, mbpi;
++ uint64_t total, rcv, lat, mbpi, extra, cnt;
+ int timer_interval = LPFC_CMF_INTERVAL;
+ uint32_t ms;
+ struct lpfc_cgn_stat *cgs;
+@@ -5929,12 +5910,27 @@ lpfc_cmf_timer(struct hrtimer *timer)
+ phba->hba_flag & HBA_SETUP) {
+ mbpi = phba->cmf_last_sync_bw;
+ phba->cmf_last_sync_bw = 0;
+- lpfc_issue_cmf_sync_wqe(phba, LPFC_CMF_INTERVAL, total);
++ extra = 0;
++
++ /* Calculate any extra bytes needed to account for the
++ * timer accuracy. If we are less than LPFC_CMF_INTERVAL
++ * calculate the adjustment needed for total to reflect
++ * a full LPFC_CMF_INTERVAL.
++ */
++ if (ms && ms < LPFC_CMF_INTERVAL) {
++ cnt = div_u64(total, ms); /* bytes per ms */
++ cnt *= LPFC_CMF_INTERVAL; /* what total should be */
++ if (cnt > mbpi)
++ cnt = mbpi;
++ extra = cnt - total;
++ }
++ lpfc_issue_cmf_sync_wqe(phba, LPFC_CMF_INTERVAL, total + extra);
+ } else {
+ /* For Monitor mode or link down we want mbpi
+ * to be the full link speed
+ */
+ mbpi = phba->cmf_link_byte_count;
++ extra = 0;
+ }
+ phba->cmf_timer_cnt++;
+
+@@ -5960,39 +5956,30 @@ lpfc_cmf_timer(struct hrtimer *timer)
+ }
+
+ /* Save rxmonitor information for debug */
+- if (phba->rxtable) {
+- head = atomic_xchg(&phba->rxtable_idx_head,
+- LPFC_RXMONITOR_TABLE_IN_USE);
+- entry = &phba->rxtable[head];
+- entry->total_bytes = total;
+- entry->rcv_bytes = rcv;
+- entry->cmf_busy = busy;
+- entry->cmf_info = phba->cmf_active_info;
++ if (phba->rx_monitor) {
++ entry.total_bytes = total;
++ entry.cmf_bytes = total + extra;
++ entry.rcv_bytes = rcv;
++ entry.cmf_busy = busy;
++ entry.cmf_info = phba->cmf_active_info;
+ if (io_cnt) {
+- entry->avg_io_latency = div_u64(lat, io_cnt);
+- entry->avg_io_size = div_u64(rcv, io_cnt);
++ entry.avg_io_latency = div_u64(lat, io_cnt);
++ entry.avg_io_size = div_u64(rcv, io_cnt);
+ } else {
+- entry->avg_io_latency = 0;
+- entry->avg_io_size = 0;
++ entry.avg_io_latency = 0;
++ entry.avg_io_size = 0;
+ }
+- entry->max_read_cnt = max_read;
+- entry->io_cnt = io_cnt;
+- entry->max_bytes_per_interval = mbpi;
++ entry.max_read_cnt = max_read;
++ entry.io_cnt = io_cnt;
++ entry.max_bytes_per_interval = mbpi;
+ if (phba->cmf_active_mode == LPFC_CFG_MANAGED)
+- entry->timer_utilization = phba->cmf_last_ts;
++ entry.timer_utilization = phba->cmf_last_ts;
+ else
+- entry->timer_utilization = ms;
+- entry->timer_interval = ms;
++ entry.timer_utilization = ms;
++ entry.timer_interval = ms;
+ phba->cmf_last_ts = 0;
+
+- /* Increment rxtable index */
+- head = (head + 1) % LPFC_MAX_RXMONITOR_ENTRY;
+- tail = atomic_read(&phba->rxtable_idx_tail);
+- if (head == tail) {
+- tail = (tail + 1) % LPFC_MAX_RXMONITOR_ENTRY;
+- atomic_set(&phba->rxtable_idx_tail, tail);
+- }
+- atomic_set(&phba->rxtable_idx_head, head);
++ lpfc_rx_monitor_record(phba->rx_monitor, &entry);
+ }
+
+ if (phba->cmf_active_mode == LPFC_CFG_MONITOR) {
+@@ -6262,8 +6249,10 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
+ }
+
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+- if (rc == MBX_NOT_FINISHED)
++ if (rc == MBX_NOT_FINISHED) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ goto out_free_dmabuf;
++ }
+ return;
+
+ out_free_dmabuf:
+@@ -6485,9 +6474,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
+ /* Alarm overrides warning, so check that first */
+ if (cgn_signal->alarm_cnt) {
+ if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+- /* Keep track of alarm cnt for cgn_info */
+- atomic_add(cgn_signal->alarm_cnt,
+- &phba->cgn_fabric_alarm_cnt);
+ /* Keep track of alarm cnt for CMF_SYNC_WQE */
+ atomic_add(cgn_signal->alarm_cnt,
+ &phba->cgn_sync_alarm_cnt);
+@@ -6496,8 +6482,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
+ /* signal action needs to be taken */
+ if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY ||
+ phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) {
+- /* Keep track of warning cnt for cgn_info */
+- atomic_add(cnt, &phba->cgn_fabric_warn_cnt);
+ /* Keep track of warning cnt for CMF_SYNC_WQE */
+ atomic_add(cnt, &phba->cgn_sync_warn_cnt);
+ }
+@@ -7072,6 +7056,8 @@ lpfc_sli4_cgn_params_read(struct lpfc_hba *phba)
+ /* Find out if the FW has a new set of congestion parameters. */
+ len = sizeof(struct lpfc_cgn_param);
+ pdata = kzalloc(len, GFP_KERNEL);
++ if (!pdata)
++ return -ENOMEM;
+ ret = lpfc_read_object(phba, (char *)LPFC_PORT_CFG_NAME,
+ pdata, len);
+
+@@ -7890,7 +7876,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
+ /* Allocate device driver memory */
+ rc = lpfc_mem_alloc(phba, SGL_ALIGN_SZ);
+ if (rc)
+- return -ENOMEM;
++ goto out_destroy_workqueue;
+
+ /* IF Type 2 ports get initialized now. */
+ if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
+@@ -8306,6 +8292,9 @@ out_free_bsmbx:
+ lpfc_destroy_bootstrap_mbox(phba);
+ out_free_mem:
+ lpfc_mem_free(phba);
++out_destroy_workqueue:
++ destroy_workqueue(phba->wq);
++ phba->wq = NULL;
+ return rc;
+ }
+
+@@ -11749,7 +11738,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
+ goto out_iounmap_all;
+ } else {
+ error = -ENOMEM;
+- goto out_iounmap_all;
++ goto out_iounmap_ctrl;
+ }
+ }
+
+@@ -11767,7 +11756,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
+ dev_err(&pdev->dev,
+ "ioremap failed for SLI4 HBA dpp registers.\n");
+ error = -ENOMEM;
+- goto out_iounmap_ctrl;
++ goto out_iounmap_all;
+ }
+ phba->pci_bar4_memmap_p = phba->sli4_hba.dpp_regs_memmap_p;
+ }
+@@ -11792,9 +11781,11 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
+ return 0;
+
+ out_iounmap_all:
+- iounmap(phba->sli4_hba.drbl_regs_memmap_p);
++ if (phba->sli4_hba.drbl_regs_memmap_p)
++ iounmap(phba->sli4_hba.drbl_regs_memmap_p);
+ out_iounmap_ctrl:
+- iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
++ if (phba->sli4_hba.ctrl_regs_memmap_p)
++ iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
+ out_iounmap_conf:
+ iounmap(phba->sli4_hba.conf_regs_memmap_p);
+
+@@ -14080,6 +14071,10 @@ lpfc_pci_resume_one_s3(struct device *dev_d)
+ return error;
+ }
+
++ /* Init cpu_map array */
++ lpfc_cpu_map_array_init(phba);
++ /* Init hba_eq_hdl array */
++ lpfc_hba_eq_hdl_array_init(phba);
+ /* Configure and enable interrupt */
+ intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
+ if (intr_mode == LPFC_INTR_ERROR) {
+@@ -15033,14 +15028,17 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
+ lpfc_sli4_prep_dev_for_recover(phba);
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
++ phba->hba_flag |= HBA_PCI_ERR;
+ /* Fatal error, prepare for slot reset */
+ lpfc_sli4_prep_dev_for_reset(phba);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
++ phba->hba_flag |= HBA_PCI_ERR;
+ /* Permanent failure, prepare for device down */
+ lpfc_sli4_prep_dev_for_perm_failure(phba);
+ return PCI_ERS_RESULT_DISCONNECT;
+ default:
++ phba->hba_flag |= HBA_PCI_ERR;
+ /* Unknown state, prepare and request slot reset */
+ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+ "2825 Unknown PCI error state: x%x\n", state);
+@@ -15084,6 +15082,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
+
+ pci_restore_state(pdev);
+
++ phba->hba_flag &= ~HBA_PCI_ERR;
+ /*
+ * As the new kernel behavior of pci_restore_state() API call clears
+ * device saved_state flag, need to save the restored state again.
+@@ -15097,6 +15096,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
+ psli->sli_flag &= ~LPFC_SLI_ACTIVE;
+ spin_unlock_irq(&phba->hbalock);
+
++ /* Init cpu_map array */
++ lpfc_cpu_map_array_init(phba);
+ /* Configure and enable interrupt */
+ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode);
+ if (intr_mode == LPFC_INTR_ERROR) {
+@@ -15106,6 +15107,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
+ return PCI_ERS_RESULT_DISCONNECT;
+ } else
+ phba->intr_mode = intr_mode;
++ lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann);
+
+ /* Log the current active interrupt mode */
+ lpfc_log_intr_mode(phba, phba->intr_mode);
+@@ -15307,6 +15309,10 @@ lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+ struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+ pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+
++ if (phba->link_state == LPFC_HBA_ERROR &&
++ phba->hba_flag & HBA_IOQ_FLUSH)
++ return PCI_ERS_RESULT_NEED_RESET;
++
+ switch (phba->pci_dev_grp) {
+ case LPFC_PCI_DEV_LP:
+ rc = lpfc_io_error_detected_s3(pdev, state);
+@@ -15554,34 +15560,7 @@ void lpfc_dmp_dbg(struct lpfc_hba *phba)
+ unsigned int temp_idx;
+ int i;
+ int j = 0;
+- unsigned long rem_nsec, iflags;
+- bool log_verbose = false;
+- struct lpfc_vport *port_iterator;
+-
+- /* Don't dump messages if we explicitly set log_verbose for the
+- * physical port or any vport.
+- */
+- if (phba->cfg_log_verbose)
+- return;
+-
+- spin_lock_irqsave(&phba->port_list_lock, iflags);
+- list_for_each_entry(port_iterator, &phba->port_list, listentry) {
+- if (port_iterator->load_flag & FC_UNLOADING)
+- continue;
+- if (scsi_host_get(lpfc_shost_from_vport(port_iterator))) {
+- if (port_iterator->cfg_log_verbose)
+- log_verbose = true;
+-
+- scsi_host_put(lpfc_shost_from_vport(port_iterator));
+-
+- if (log_verbose) {
+- spin_unlock_irqrestore(&phba->port_list_lock,
+- iflags);
+- return;
+- }
+- }
+- }
+- spin_unlock_irqrestore(&phba->port_list_lock, iflags);
++ unsigned long rem_nsec;
+
+ if (atomic_cmpxchg(&phba->dbg_log_dmping, 0, 1) != 0)
+ return;
+diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
+index 7d480c7987942..a5aafe230c74f 100644
+--- a/drivers/scsi/lpfc/lpfc_logmsg.h
++++ b/drivers/scsi/lpfc/lpfc_logmsg.h
+@@ -73,7 +73,7 @@ do { \
+ #define lpfc_printf_vlog(vport, level, mask, fmt, arg...) \
+ do { \
+ { if (((mask) & (vport)->cfg_log_verbose) || (level[1] <= '3')) { \
+- if ((mask) & LOG_TRACE_EVENT) \
++ if ((mask) & LOG_TRACE_EVENT && !(vport)->cfg_log_verbose) \
+ lpfc_dmp_dbg((vport)->phba); \
+ dev_printk(level, &((vport)->phba->pcidev)->dev, "%d:(%d):" \
+ fmt, (vport)->phba->brd_no, vport->vpi, ##arg); \
+@@ -89,11 +89,11 @@ do { \
+ (phba)->pport->cfg_log_verbose : \
+ (phba)->cfg_log_verbose; \
+ if (((mask) & log_verbose) || (level[1] <= '3')) { \
+- if ((mask) & LOG_TRACE_EVENT) \
++ if ((mask) & LOG_TRACE_EVENT && !log_verbose) \
+ lpfc_dmp_dbg(phba); \
+ dev_printk(level, &((phba)->pcidev)->dev, "%d:" \
+ fmt, phba->brd_no, ##arg); \
+- } else if (!(phba)->cfg_log_verbose)\
++ } else if (!log_verbose)\
+ lpfc_dbg_print(phba, "%d:" fmt, phba->brd_no, ##arg); \
+ } \
+ } while (0)
+diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
+index 870e53b8f81dd..5d36b35148646 100644
+--- a/drivers/scsi/lpfc/lpfc_mem.c
++++ b/drivers/scsi/lpfc/lpfc_mem.c
+@@ -344,9 +344,12 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
+ phba->cgn_i = NULL;
+ }
+
+- /* Free RX table */
+- kfree(phba->rxtable);
+- phba->rxtable = NULL;
++ /* Free RX Monitor */
++ if (phba->rx_monitor) {
++ lpfc_rx_monitor_destroy_ring(phba->rx_monitor);
++ kfree(phba->rx_monitor);
++ phba->rx_monitor = NULL;
++ }
+
+ /* Free the iocb lookup array */
+ kfree(psli->iocbq_lookup);
+diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
+index 27263f02ab9f6..2bd35a7424c25 100644
+--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
++++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
+@@ -322,6 +322,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ {
+ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_dmabuf *pcmd;
++ struct lpfc_dmabuf *mp;
+ uint64_t nlp_portwwn = 0;
+ uint32_t *lp;
+ IOCB_t *icmd;
+@@ -571,6 +572,11 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ * a default RPI.
+ */
+ if (phba->sli_rev == LPFC_SLI_REV4) {
++ mp = (struct lpfc_dmabuf *)login_mbox->ctx_buf;
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
+ mempool_free(login_mbox, phba->mbox_mem_pool);
+ login_mbox = NULL;
+ } else {
+@@ -804,7 +810,8 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ lpfc_nvmet_invalidate_host(phba, ndlp);
+
+ if (ndlp->nlp_DID == Fabric_DID) {
+- if (vport->port_state <= LPFC_FDISC)
++ if (vport->port_state <= LPFC_FDISC ||
++ vport->fc_flag & FC_PT2PT)
+ goto out;
+ lpfc_linkdown_port(vport);
+ spin_lock_irq(shost->host_lock);
+@@ -1955,8 +1962,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
+ * is configured try it.
+ */
+ ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+- if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+- (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
++ if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) &&
++ (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH ||
++ vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+ ndlp->nlp_fc4_type |= NLP_FC4_NVME;
+ /* We need to update the localport also */
+ lpfc_nvme_update_localport(vport);
+diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
+index 479b3eed62085..4e0c0b273e5fe 100644
+--- a/drivers/scsi/lpfc/lpfc_nvme.c
++++ b/drivers/scsi/lpfc/lpfc_nvme.c
+@@ -209,8 +209,9 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
+ * calling state machine to remove the node.
+ */
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+- "6146 remoteport delete of remoteport x%px\n",
+- remoteport);
++ "6146 remoteport delete of remoteport x%px, ndlp x%px "
++ "DID x%x xflags x%x\n",
++ remoteport, ndlp, ndlp->nlp_DID, ndlp->fc4_xpt_flags);
+ spin_lock_irq(&ndlp->lock);
+
+ /* The register rebind might have occurred before the delete
+@@ -936,6 +937,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
+ #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ int cpu;
+ #endif
++ int offline = 0;
+
+ /* Sanity check on return of outstanding command */
+ if (!lpfc_ncmd) {
+@@ -1097,11 +1099,12 @@ out_err:
+ nCmd->transferred_length = 0;
+ nCmd->rcv_rsplen = 0;
+ nCmd->status = NVME_SC_INTERNAL;
++ offline = pci_channel_offline(vport->phba->pcidev);
+ }
+ }
+
+ /* pick up SLI4 exhange busy condition */
+- if (bf_get(lpfc_wcqe_c_xb, wcqe))
++ if (bf_get(lpfc_wcqe_c_xb, wcqe) && !offline)
+ lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
+ else
+ lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+@@ -1181,7 +1184,8 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
+ {
+ struct lpfc_hba *phba = vport->phba;
+ struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
+- struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq);
++ struct nvme_common_command *sqe;
++ struct lpfc_iocbq *pwqeq = &lpfc_ncmd->cur_iocbq;
+ union lpfc_wqe128 *wqe = &pwqeq->wqe;
+ uint32_t req_len;
+
+@@ -1238,8 +1242,14 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
+ cstat->control_requests++;
+ }
+
+- if (pnode->nlp_nvme_info & NLP_NVME_NSLER)
++ if (pnode->nlp_nvme_info & NLP_NVME_NSLER) {
+ bf_set(wqe_erp, &wqe->generic.wqe_com, 1);
++ sqe = &((struct nvme_fc_cmd_iu *)
++ nCmd->cmdaddr)->sqe.common;
++ if (sqe->opcode == nvme_admin_async_event)
++ bf_set(wqe_ffrq, &wqe->generic.wqe_com, 1);
++ }
++
+ /*
+ * Finish initializing those WQE fields that are independent
+ * of the nvme_cmnd request_buffer
+@@ -2166,6 +2176,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
+ abts_nvme = 0;
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ qp = &phba->sli4_hba.hdwq[i];
++ if (!vport || !vport->localport ||
++ !qp || !qp->io_wq)
++ return;
++
+ pring = qp->io_wq->pring;
+ if (!pring)
+ continue;
+@@ -2173,6 +2187,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
+ abts_scsi += qp->abts_scsi_io_bufs;
+ abts_nvme += qp->abts_nvme_io_bufs;
+ }
++ if (!vport || !vport->localport ||
++ vport->phba->hba_flag & HBA_PCI_ERR)
++ return;
++
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
+ "6176 Lport x%px Localport x%px wait "
+ "timed out. Pending %d [%d:%d]. "
+@@ -2212,6 +2230,8 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
+ return;
+
+ localport = vport->localport;
++ if (!localport)
++ return;
+ lport = (struct lpfc_nvme_lport *)localport->private;
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+@@ -2528,7 +2548,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ * return values is ignored. The upcall is a courtesy to the
+ * transport.
+ */
+- if (vport->load_flag & FC_UNLOADING)
++ if (vport->load_flag & FC_UNLOADING ||
++ unlikely(vport->phba->hba_flag & HBA_PCI_ERR))
+ (void)nvme_fc_set_remoteport_devloss(remoteport, 0);
+
+ ret = nvme_fc_unregister_remoteport(remoteport);
+@@ -2556,6 +2577,42 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ vport->localport, ndlp->rport, ndlp->nlp_DID);
+ }
+
++/**
++ * lpfc_sli4_nvme_pci_offline_aborted - Fast-path process of NVME xri abort
++ * @phba: pointer to lpfc hba data structure.
++ * @lpfc_ncmd: The nvme job structure for the request being aborted.
++ *
++ * This routine is invoked by the worker thread to process a SLI4 fast-path
++ * NVME aborted xri. Aborted NVME IO commands are completed to the transport
++ * here.
++ **/
++void
++lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
++ struct lpfc_io_buf *lpfc_ncmd)
++{
++ struct nvmefc_fcp_req *nvme_cmd = NULL;
++
++ lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
++ "6533 %s nvme_cmd %p tag x%x abort complete and "
++ "xri released\n", __func__,
++ lpfc_ncmd->nvmeCmd,
++ lpfc_ncmd->cur_iocbq.iotag);
++
++ /* Aborted NVME commands are required to not complete
++ * before the abort exchange command fully completes.
++ * Once completed, it is available via the put list.
++ */
++ if (lpfc_ncmd->nvmeCmd) {
++ nvme_cmd = lpfc_ncmd->nvmeCmd;
++ nvme_cmd->transferred_length = 0;
++ nvme_cmd->rcv_rsplen = 0;
++ nvme_cmd->status = NVME_SC_INTERNAL;
++ nvme_cmd->done(nvme_cmd);
++ lpfc_ncmd->nvmeCmd = NULL;
++ }
++ lpfc_release_nvme_buf(phba, lpfc_ncmd);
++}
++
+ /**
+ * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
+index befdf864c43bd..4813adec0301d 100644
+--- a/drivers/scsi/lpfc/lpfc_scsi.c
++++ b/drivers/scsi/lpfc/lpfc_scsi.c
+@@ -117,8 +117,6 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba,
+ }
+ }
+
+-#define LPFC_INVALID_REFTAG ((u32)-1)
+-
+ /**
+ * lpfc_update_stats - Update statistical data for the command completion
+ * @vport: The virtual port on which this call is executing.
+@@ -493,8 +491,8 @@ void
+ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri, int idx)
+ {
+- uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+- uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
++ u16 xri = 0;
++ u16 rxid = 0;
+ struct lpfc_io_buf *psb, *next_psb;
+ struct lpfc_sli4_hdw_queue *qp;
+ unsigned long iflag = 0;
+@@ -504,15 +502,22 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
+ int rrq_empty = 0;
+ struct lpfc_sli_ring *pring = phba->sli4_hba.els_wq->pring;
+ struct scsi_cmnd *cmd;
++ int offline = 0;
+
+ if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+ return;
+-
++ offline = pci_channel_offline(phba->pcidev);
++ if (!offline) {
++ xri = bf_get(lpfc_wcqe_xa_xri, axri);
++ rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
++ }
+ qp = &phba->sli4_hba.hdwq[idx];
+ spin_lock_irqsave(&phba->hbalock, iflag);
+ spin_lock(&qp->abts_io_buf_list_lock);
+ list_for_each_entry_safe(psb, next_psb,
+ &qp->lpfc_abts_io_buf_list, list) {
++ if (offline)
++ xri = psb->cur_iocbq.sli4_xritag;
+ if (psb->cur_iocbq.sli4_xritag == xri) {
+ list_del_init(&psb->list);
+ psb->flags &= ~LPFC_SBUF_XBUSY;
+@@ -521,8 +526,15 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
+ qp->abts_nvme_io_bufs--;
+ spin_unlock(&qp->abts_io_buf_list_lock);
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+- lpfc_sli4_nvme_xri_aborted(phba, axri, psb);
+- return;
++ if (!offline) {
++ lpfc_sli4_nvme_xri_aborted(phba, axri,
++ psb);
++ return;
++ }
++ lpfc_sli4_nvme_pci_offline_aborted(phba, psb);
++ spin_lock_irqsave(&phba->hbalock, iflag);
++ spin_lock(&qp->abts_io_buf_list_lock);
++ continue;
+ }
+ qp->abts_scsi_io_bufs--;
+ spin_unlock(&qp->abts_io_buf_list_lock);
+@@ -534,13 +546,13 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
+
+ rrq_empty = list_empty(&phba->active_rrq_list);
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+- if (ndlp) {
++ if (ndlp && !offline) {
+ lpfc_set_rrq_active(phba, ndlp,
+ psb->cur_iocbq.sli4_lxritag, rxid, 1);
+ lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+ }
+
+- if (phba->cfg_fcp_wait_abts_rsp) {
++ if (phba->cfg_fcp_wait_abts_rsp || offline) {
+ spin_lock_irqsave(&psb->buf_lock, iflag);
+ cmd = psb->pCmd;
+ psb->pCmd = NULL;
+@@ -567,25 +579,30 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
+ lpfc_release_scsi_buf_s4(phba, psb);
+ if (rrq_empty)
+ lpfc_worker_wake_up(phba);
+- return;
++ if (!offline)
++ return;
++ spin_lock_irqsave(&phba->hbalock, iflag);
++ spin_lock(&qp->abts_io_buf_list_lock);
++ continue;
+ }
+ }
+ spin_unlock(&qp->abts_io_buf_list_lock);
+- for (i = 1; i <= phba->sli.last_iotag; i++) {
+- iocbq = phba->sli.iocbq_lookup[i];
+-
+- if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
+- (iocbq->iocb_flag & LPFC_IO_LIBDFC))
+- continue;
+- if (iocbq->sli4_xritag != xri)
+- continue;
+- psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
+- psb->flags &= ~LPFC_SBUF_XBUSY;
+- spin_unlock_irqrestore(&phba->hbalock, iflag);
+- if (!list_empty(&pring->txq))
+- lpfc_worker_wake_up(phba);
+- return;
++ if (!offline) {
++ for (i = 1; i <= phba->sli.last_iotag; i++) {
++ iocbq = phba->sli.iocbq_lookup[i];
+
++ if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
++ (iocbq->iocb_flag & LPFC_IO_LIBDFC))
++ continue;
++ if (iocbq->sli4_xritag != xri)
++ continue;
++ psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
++ psb->flags &= ~LPFC_SBUF_XBUSY;
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
++ if (!list_empty(&pring->txq))
++ lpfc_worker_wake_up(phba);
++ return;
++ }
+ }
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+@@ -1023,8 +1040,6 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc,
+
+ sgpe = scsi_prot_sglist(sc);
+ lba = scsi_prot_ref_tag(sc);
+- if (lba == LPFC_INVALID_REFTAG)
+- return 0;
+
+ /* First check if we need to match the LBA */
+ if (phba->lpfc_injerr_lba != LPFC_INJERR_LBA_OFF) {
+@@ -1605,8 +1620,6 @@ lpfc_bg_setup_bpl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
+
+ /* extract some info from the scsi command for pde*/
+ reftag = scsi_prot_ref_tag(sc);
+- if (reftag == LPFC_INVALID_REFTAG)
+- goto out;
+
+ #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
+@@ -1768,8 +1781,6 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
+ /* extract some info from the scsi command */
+ blksize = scsi_prot_interval(sc);
+ reftag = scsi_prot_ref_tag(sc);
+- if (reftag == LPFC_INVALID_REFTAG)
+- goto out;
+
+ #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
+@@ -1999,8 +2010,6 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
+
+ /* extract some info from the scsi command for pde*/
+ reftag = scsi_prot_ref_tag(sc);
+- if (reftag == LPFC_INVALID_REFTAG)
+- goto out;
+
+ #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
+@@ -2200,8 +2209,6 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
+ /* extract some info from the scsi command */
+ blksize = scsi_prot_interval(sc);
+ reftag = scsi_prot_ref_tag(sc);
+- if (reftag == LPFC_INVALID_REFTAG)
+- goto out;
+
+ #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
+@@ -2793,8 +2800,6 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
+
+ src = (struct scsi_dif_tuple *)sg_virt(sgpe);
+ start_ref_tag = scsi_prot_ref_tag(cmd);
+- if (start_ref_tag == LPFC_INVALID_REFTAG)
+- goto out;
+ start_app_tag = src->app_tag;
+ len = sgpe->length;
+ while (src && protsegcnt) {
+@@ -3641,11 +3646,11 @@ err:
+ scsi_cmnd->sc_data_direction);
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+- "9084 Cannot setup S/G List for HBA"
+- "IO segs %d/%d SGL %d SCSI %d: %d %d\n",
++ "9084 Cannot setup S/G List for HBA "
++ "IO segs %d/%d SGL %d SCSI %d: %d %d %d\n",
+ lpfc_cmd->seg_cnt, lpfc_cmd->prot_seg_cnt,
+ phba->cfg_total_seg_cnt, phba->cfg_sg_seg_cnt,
+- prot_group_type, num_sge);
++ prot_group_type, num_sge, ret);
+
+ lpfc_cmd->seg_cnt = 0;
+ lpfc_cmd->prot_seg_cnt = 0;
+@@ -3917,7 +3922,7 @@ lpfc_update_cmf_cmpl(struct lpfc_hba *phba,
+ else
+ time = div_u64(time + 500, 1000); /* round it */
+
+- cgs = this_cpu_ptr(phba->cmf_stat);
++ cgs = per_cpu_ptr(phba->cmf_stat, raw_smp_processor_id());
+ atomic64_add(size, &cgs->rcv_bytes);
+ atomic64_add(time, &cgs->rx_latency);
+ atomic_inc(&cgs->rx_io_cnt);
+@@ -3960,7 +3965,7 @@ lpfc_update_cmf_cmd(struct lpfc_hba *phba, uint32_t size)
+ atomic_set(&phba->rx_max_read_cnt, size);
+ }
+
+- cgs = this_cpu_ptr(phba->cmf_stat);
++ cgs = per_cpu_ptr(phba->cmf_stat, raw_smp_processor_id());
+ atomic64_add(size, &cgs->total_bytes);
+ return 0;
+ }
+@@ -4374,7 +4379,7 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
+ lpfc_cmd->result == IOERR_NO_RESOURCES ||
+ lpfc_cmd->result == IOERR_ABORT_REQUESTED ||
+ lpfc_cmd->result == IOERR_SLER_CMD_RCV_FAILURE) {
+- cmd->result = DID_REQUEUE << 16;
++ cmd->result = DID_TRANSPORT_DISRUPTED << 16;
+ break;
+ }
+ if ((lpfc_cmd->result == IOERR_RX_DMA_FAILED ||
+@@ -4661,7 +4666,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
+ lpfc_cmd->result == IOERR_NO_RESOURCES ||
+ lpfc_cmd->result == IOERR_ABORT_REQUESTED ||
+ lpfc_cmd->result == IOERR_SLER_CMD_RCV_FAILURE) {
+- cmd->result = DID_REQUEUE << 16;
++ cmd->result = DID_TRANSPORT_DISRUPTED << 16;
+ break;
+ }
+ if ((lpfc_cmd->result == IOERR_RX_DMA_FAILED ||
+@@ -5732,7 +5737,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
+ }
+ }
+
+- atomic_inc(&ndlp->cmd_pending);
+ #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
+ this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);
+@@ -5885,25 +5889,25 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
+ if (!lpfc_cmd)
+ return ret;
+
+- spin_lock_irqsave(&phba->hbalock, flags);
++ /* Guard against IO completion being called at same time */
++ spin_lock_irqsave(&lpfc_cmd->buf_lock, flags);
++
++ spin_lock(&phba->hbalock);
+ /* driver queued commands are in process of being flushed */
+ if (phba->hba_flag & HBA_IOQ_FLUSH) {
+ lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+ "3168 SCSI Layer abort requested I/O has been "
+ "flushed by LLD.\n");
+ ret = FAILED;
+- goto out_unlock;
++ goto out_unlock_hba;
+ }
+
+- /* Guard against IO completion being called at same time */
+- spin_lock(&lpfc_cmd->buf_lock);
+-
+ if (!lpfc_cmd->pCmd) {
+ lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+ "2873 SCSI Layer I/O Abort Request IO CMPL Status "
+ "x%x ID %d LUN %llu\n",
+ SUCCESS, cmnd->device->id, cmnd->device->lun);
+- goto out_unlock_buf;
++ goto out_unlock_hba;
+ }
+
+ iocb = &lpfc_cmd->cur_iocbq;
+@@ -5911,7 +5915,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
+ pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring;
+ if (!pring_s4) {
+ ret = FAILED;
+- goto out_unlock_buf;
++ goto out_unlock_hba;
+ }
+ spin_lock(&pring_s4->ring_lock);
+ }
+@@ -5944,8 +5948,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
+ "3389 SCSI Layer I/O Abort Request is pending\n");
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ spin_unlock(&pring_s4->ring_lock);
+- spin_unlock(&lpfc_cmd->buf_lock);
+- spin_unlock_irqrestore(&phba->hbalock, flags);
++ spin_unlock(&phba->hbalock);
++ spin_unlock_irqrestore(&lpfc_cmd->buf_lock, flags);
+ goto wait_for_cmpl;
+ }
+
+@@ -5966,15 +5970,13 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
+ if (ret_val != IOCB_SUCCESS) {
+ /* Indicate the IO is not being aborted by the driver. */
+ lpfc_cmd->waitq = NULL;
+- spin_unlock(&lpfc_cmd->buf_lock);
+- spin_unlock_irqrestore(&phba->hbalock, flags);
+ ret = FAILED;
+- goto out;
++ goto out_unlock_hba;
+ }
+
+ /* no longer need the lock after this point */
+- spin_unlock(&lpfc_cmd->buf_lock);
+- spin_unlock_irqrestore(&phba->hbalock, flags);
++ spin_unlock(&phba->hbalock);
++ spin_unlock_irqrestore(&lpfc_cmd->buf_lock, flags);
+
+ if (phba->cfg_poll & DISABLE_FCP_RING_INT)
+ lpfc_sli_handle_fast_ring_event(phba,
+@@ -6009,10 +6011,9 @@ wait_for_cmpl:
+ out_unlock_ring:
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ spin_unlock(&pring_s4->ring_lock);
+-out_unlock_buf:
+- spin_unlock(&lpfc_cmd->buf_lock);
+-out_unlock:
+- spin_unlock_irqrestore(&phba->hbalock, flags);
++out_unlock_hba:
++ spin_unlock(&phba->hbalock);
++ spin_unlock_irqrestore(&lpfc_cmd->buf_lock, flags);
+ out:
+ lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+ "0749 SCSI Layer I/O Abort Request Status x%x ID %d "
+@@ -6455,28 +6456,28 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
+
+ /* Issue LOGO, if no LOGO is outstanding */
+ spin_lock_irqsave(&pnode->lock, flags);
+- if (!(pnode->upcall_flags & NLP_WAIT_FOR_LOGO) &&
++ if (!(pnode->save_flags & NLP_WAIT_FOR_LOGO) &&
+ !pnode->logo_waitq) {
+ pnode->logo_waitq = &waitq;
+ pnode->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+ pnode->nlp_flag |= NLP_ISSUE_LOGO;
+- pnode->upcall_flags |= NLP_WAIT_FOR_LOGO;
++ pnode->save_flags |= NLP_WAIT_FOR_LOGO;
+ spin_unlock_irqrestore(&pnode->lock, flags);
+ lpfc_unreg_rpi(vport, pnode);
+ wait_event_timeout(waitq,
+- (!(pnode->upcall_flags &
++ (!(pnode->save_flags &
+ NLP_WAIT_FOR_LOGO)),
+ msecs_to_jiffies(dev_loss_tmo *
+ 1000));
+
+- if (pnode->upcall_flags & NLP_WAIT_FOR_LOGO) {
++ if (pnode->save_flags & NLP_WAIT_FOR_LOGO) {
+ lpfc_printf_vlog(vport, KERN_ERR, logit,
+ "0725 SCSI layer TGTRST "
+ "failed & LOGO TMO (%d, %llu) "
+ "return x%x\n",
+ tgt_id, lun_id, status);
+ spin_lock_irqsave(&pnode->lock, flags);
+- pnode->upcall_flags &= ~NLP_WAIT_FOR_LOGO;
++ pnode->save_flags &= ~NLP_WAIT_FOR_LOGO;
+ } else {
+ spin_lock_irqsave(&pnode->lock, flags);
+ }
+@@ -6628,6 +6629,13 @@ lpfc_host_reset_handler(struct scsi_cmnd *cmnd)
+ if (rc)
+ goto error;
+
++ /* Wait for successful restart of adapter */
++ if (phba->sli_rev < LPFC_SLI_REV4) {
++ rc = lpfc_sli_chipset_init(phba);
++ if (rc)
++ goto error;
++ }
++
+ rc = lpfc_online(phba);
+ if (rc)
+ goto error;
+diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
+index 026a1196a54d5..30bc72324f068 100644
+--- a/drivers/scsi/lpfc/lpfc_sli.c
++++ b/drivers/scsi/lpfc/lpfc_sli.c
+@@ -1404,7 +1404,8 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+ }
+
+ if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) &&
+- (sglq->state != SGL_XRI_ABORTED)) {
++ (!(unlikely(pci_channel_offline(phba->pcidev)))) &&
++ sglq->state != SGL_XRI_ABORTED) {
+ spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
+ iflag);
+
+@@ -2007,10 +2008,12 @@ initpath:
+
+ sync_buf->iocb_flag |= LPFC_IO_CMF;
+ ret_val = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], sync_buf);
+- if (ret_val)
++ if (ret_val) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+ "6214 Cannot issue CMF_SYNC_WQE: x%x\n",
+ ret_val);
++ __lpfc_sli_release_iocbq(phba, sync_buf);
++ }
+ out_unlock:
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ return ret_val;
+@@ -4583,10 +4586,12 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba)
+ lpfc_sli_cancel_iocbs(phba, &txq,
+ IOSTAT_LOCAL_REJECT,
+ IOERR_SLI_DOWN);
+- /* Flush the txcmpq */
++ /* Flush the txcmplq */
+ lpfc_sli_cancel_iocbs(phba, &txcmplq,
+ IOSTAT_LOCAL_REJECT,
+ IOERR_SLI_DOWN);
++ if (unlikely(pci_channel_offline(phba->pcidev)))
++ lpfc_sli4_io_xri_aborted(phba, NULL, 0);
+ }
+ } else {
+ pring = &psli->sli3_ring[LPFC_FCP_RING];
+@@ -5043,12 +5048,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba)
+ phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
+
+- /* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */
+- if (phba->hba_flag & HBA_FW_DUMP_OP) {
+- phba->hba_flag &= ~HBA_FW_DUMP_OP;
+- return rc;
+- }
+-
+ /* Now physically reset the device */
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "0389 Performing PCI function reset!\n");
+@@ -7879,6 +7878,172 @@ static void lpfc_sli4_dip(struct lpfc_hba *phba)
+ }
+ }
+
++/**
++ * lpfc_rx_monitor_create_ring - Initialize ring buffer for rx_monitor
++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object
++ * @entries: Number of rx_info_entry objects to allocate in ring
++ *
++ * Return:
++ * 0 - Success
++ * ENOMEM - Failure to kmalloc
++ **/
++int lpfc_rx_monitor_create_ring(struct lpfc_rx_info_monitor *rx_monitor,
++ u32 entries)
++{
++ rx_monitor->ring = kmalloc_array(entries, sizeof(struct rx_info_entry),
++ GFP_KERNEL);
++ if (!rx_monitor->ring)
++ return -ENOMEM;
++
++ rx_monitor->head_idx = 0;
++ rx_monitor->tail_idx = 0;
++ spin_lock_init(&rx_monitor->lock);
++ rx_monitor->entries = entries;
++
++ return 0;
++}
++
++/**
++ * lpfc_rx_monitor_destroy_ring - Free ring buffer for rx_monitor
++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object
++ **/
++void lpfc_rx_monitor_destroy_ring(struct lpfc_rx_info_monitor *rx_monitor)
++{
++ spin_lock(&rx_monitor->lock);
++ kfree(rx_monitor->ring);
++ rx_monitor->ring = NULL;
++ rx_monitor->entries = 0;
++ rx_monitor->head_idx = 0;
++ rx_monitor->tail_idx = 0;
++ spin_unlock(&rx_monitor->lock);
++}
++
++/**
++ * lpfc_rx_monitor_record - Insert an entry into rx_monitor's ring
++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object
++ * @entry: Pointer to rx_info_entry
++ *
++ * Used to insert an rx_info_entry into rx_monitor's ring. Note that this is a
++ * deep copy of rx_info_entry not a shallow copy of the rx_info_entry ptr.
++ *
++ * This is called from lpfc_cmf_timer, which is in timer/softirq context.
++ *
++ * In cases of old data overflow, we do a best effort of FIFO order.
++ **/
++void lpfc_rx_monitor_record(struct lpfc_rx_info_monitor *rx_monitor,
++ struct rx_info_entry *entry)
++{
++ struct rx_info_entry *ring = rx_monitor->ring;
++ u32 *head_idx = &rx_monitor->head_idx;
++ u32 *tail_idx = &rx_monitor->tail_idx;
++ spinlock_t *ring_lock = &rx_monitor->lock;
++ u32 ring_size = rx_monitor->entries;
++
++ spin_lock(ring_lock);
++ memcpy(&ring[*tail_idx], entry, sizeof(*entry));
++ *tail_idx = (*tail_idx + 1) % ring_size;
++
++ /* Best effort of FIFO saved data */
++ if (*tail_idx == *head_idx)
++ *head_idx = (*head_idx + 1) % ring_size;
++
++ spin_unlock(ring_lock);
++}
++
++/**
++ * lpfc_rx_monitor_report - Read out rx_monitor's ring
++ * @phba: Pointer to lpfc_hba object
++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object
++ * @buf: Pointer to char buffer that will contain rx monitor info data
++ * @buf_len: Length buf including null char
++ * @max_read_entries: Maximum number of entries to read out of ring
++ *
++ * Used to dump/read what's in rx_monitor's ring buffer.
++ *
++ * If buf is NULL || buf_len == 0, then it is implied that we want to log the
++ * information to kmsg instead of filling out buf.
++ *
++ * Return:
++ * Number of entries read out of the ring
++ **/
++u32 lpfc_rx_monitor_report(struct lpfc_hba *phba,
++ struct lpfc_rx_info_monitor *rx_monitor, char *buf,
++ u32 buf_len, u32 max_read_entries)
++{
++ struct rx_info_entry *ring = rx_monitor->ring;
++ struct rx_info_entry *entry;
++ u32 *head_idx = &rx_monitor->head_idx;
++ u32 *tail_idx = &rx_monitor->tail_idx;
++ spinlock_t *ring_lock = &rx_monitor->lock;
++ u32 ring_size = rx_monitor->entries;
++ u32 cnt = 0;
++ char tmp[DBG_LOG_STR_SZ] = {0};
++ bool log_to_kmsg = (!buf || !buf_len) ? true : false;
++
++ if (!log_to_kmsg) {
++ /* clear the buffer to be sure */
++ memset(buf, 0, buf_len);
++
++ scnprintf(buf, buf_len, "\t%-16s%-16s%-16s%-16s%-8s%-8s%-8s"
++ "%-8s%-8s%-8s%-16s\n",
++ "MaxBPI", "Tot_Data_CMF",
++ "Tot_Data_Cmd", "Tot_Data_Cmpl",
++ "Lat(us)", "Avg_IO", "Max_IO", "Bsy",
++ "IO_cnt", "Info", "BWutil(ms)");
++ }
++
++ /* Needs to be _irq because record is called from timer interrupt
++ * context
++ */
++ spin_lock_irq(ring_lock);
++ while (*head_idx != *tail_idx) {
++ entry = &ring[*head_idx];
++
++ /* Read out this entry's data. */
++ if (!log_to_kmsg) {
++ /* If !log_to_kmsg, then store to buf. */
++ scnprintf(tmp, sizeof(tmp),
++ "%03d:\t%-16llu%-16llu%-16llu%-16llu%-8llu"
++ "%-8llu%-8llu%-8u%-8u%-8u%u(%u)\n",
++ *head_idx, entry->max_bytes_per_interval,
++ entry->cmf_bytes, entry->total_bytes,
++ entry->rcv_bytes, entry->avg_io_latency,
++ entry->avg_io_size, entry->max_read_cnt,
++ entry->cmf_busy, entry->io_cnt,
++ entry->cmf_info, entry->timer_utilization,
++ entry->timer_interval);
++
++ /* Check for buffer overflow */
++ if ((strlen(buf) + strlen(tmp)) >= buf_len)
++ break;
++
++ /* Append entry's data to buffer */
++ strlcat(buf, tmp, buf_len);
++ } else {
++ lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
++ "4410 %02u: MBPI %llu Xmit %llu "
++ "Cmpl %llu Lat %llu ASz %llu Info %02u "
++ "BWUtil %u Int %u slot %u\n",
++ cnt, entry->max_bytes_per_interval,
++ entry->total_bytes, entry->rcv_bytes,
++ entry->avg_io_latency,
++ entry->avg_io_size, entry->cmf_info,
++ entry->timer_utilization,
++ entry->timer_interval, *head_idx);
++ }
++
++ *head_idx = (*head_idx + 1) % ring_size;
++
++ /* Don't feed more than max_read_entries */
++ cnt++;
++ if (cnt >= max_read_entries)
++ break;
++ }
++ spin_unlock_irq(ring_lock);
++
++ return cnt;
++}
++
+ /**
+ * lpfc_cmf_setup - Initialize idle_stat tracking
+ * @phba: Pointer to HBA context object.
+@@ -8070,19 +8235,29 @@ no_cmf:
+ phba->cmf_interval_rate = LPFC_CMF_INTERVAL;
+
+ /* Allocate RX Monitor Buffer */
+- if (!phba->rxtable) {
+- phba->rxtable = kmalloc_array(LPFC_MAX_RXMONITOR_ENTRY,
+- sizeof(struct rxtable_entry),
+- GFP_KERNEL);
+- if (!phba->rxtable) {
++ if (!phba->rx_monitor) {
++ phba->rx_monitor = kzalloc(sizeof(*phba->rx_monitor),
++ GFP_KERNEL);
++
++ if (!phba->rx_monitor) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "2644 Failed to alloc memory "
+ "for RX Monitor Buffer\n");
+ return -ENOMEM;
+ }
++
++ /* Instruct the rx_monitor object to instantiate its ring */
++ if (lpfc_rx_monitor_create_ring(phba->rx_monitor,
++ LPFC_MAX_RXMONITOR_ENTRY)) {
++ kfree(phba->rx_monitor);
++ phba->rx_monitor = NULL;
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "2645 Failed to alloc memory "
++ "for RX Monitor's Ring\n");
++ return -ENOMEM;
++ }
+ }
+- atomic_set(&phba->rxtable_idx_head, 0);
+- atomic_set(&phba->rxtable_idx_tail, 0);
++
+ return 0;
+ }
+
+@@ -8153,6 +8328,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
+ struct lpfc_vport *vport = phba->pport;
+ struct lpfc_dmabuf *mp;
+ struct lpfc_rqb *rqbp;
++ u32 flg;
+
+ /* Perform a PCI function reset to start from clean */
+ rc = lpfc_pci_function_reset(phba);
+@@ -8166,7 +8342,17 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
+ else {
+ spin_lock_irq(&phba->hbalock);
+ phba->sli.sli_flag |= LPFC_SLI_ACTIVE;
++ flg = phba->sli.sli_flag;
+ spin_unlock_irq(&phba->hbalock);
++ /* Allow a little time after setting SLI_ACTIVE for any polled
++ * MBX commands to complete via BSG.
++ */
++ for (i = 0; i < 50 && (flg & LPFC_SLI_MBOX_ACTIVE); i++) {
++ msleep(20);
++ spin_lock_irq(&phba->hbalock);
++ flg = phba->sli.sli_flag;
++ spin_unlock_irq(&phba->hbalock);
++ }
+ }
+
+ lpfc_sli4_dip(phba);
+@@ -9750,7 +9936,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
+ "(%d):2541 Mailbox command x%x "
+ "(x%x/x%x) failure: "
+ "mqe_sta: x%x mcqe_sta: x%x/x%x "
+- "Data: x%x x%x\n,",
++ "Data: x%x x%x\n",
+ mboxq->vport ? mboxq->vport->vpi : 0,
+ mboxq->u.mb.mbxCommand,
+ lpfc_sli_config_mbox_subsys_get(phba,
+@@ -9784,7 +9970,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
+ "(%d):2597 Sync Mailbox command "
+ "x%x (x%x/x%x) failure: "
+ "mqe_sta: x%x mcqe_sta: x%x/x%x "
+- "Data: x%x x%x\n,",
++ "Data: x%x x%x\n",
+ mboxq->vport ? mboxq->vport->vpi : 0,
+ mboxq->u.mb.mbxCommand,
+ lpfc_sli_config_mbox_subsys_get(phba,
+@@ -12404,17 +12590,17 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+
+ /* ABTS WQE must go to the same WQ as the WQE to be aborted */
+ abtsiocbp->hba_wqidx = cmdiocb->hba_wqidx;
+- if (cmdiocb->iocb_flag & LPFC_IO_FCP) {
+- abtsiocbp->iocb_flag |= LPFC_IO_FCP;
+- abtsiocbp->iocb_flag |= LPFC_USE_FCPWQIDX;
+- }
++ if (cmdiocb->iocb_flag & LPFC_IO_FCP)
++ abtsiocbp->iocb_flag |= (LPFC_IO_FCP | LPFC_USE_FCPWQIDX);
+ if (cmdiocb->iocb_flag & LPFC_IO_FOF)
+ abtsiocbp->iocb_flag |= LPFC_IO_FOF;
+
+- if (phba->link_state >= LPFC_LINK_UP)
+- iabt->ulpCommand = CMD_ABORT_XRI_CN;
+- else
++ if (phba->link_state < LPFC_LINK_UP ||
++ (phba->sli_rev == LPFC_SLI_REV4 &&
++ phba->sli4_hba.link_state.status == LPFC_FC_LA_TYPE_LINK_DOWN))
+ iabt->ulpCommand = CMD_CLOSE_XRI_CN;
++ else
++ iabt->ulpCommand = CMD_ABORT_XRI_CN;
+
+ if (cmpl)
+ abtsiocbp->iocb_cmpl = cmpl;
+@@ -12488,15 +12674,54 @@ lpfc_sli_hba_iocb_abort(struct lpfc_hba *phba)
+ }
+
+ /**
+- * lpfc_sli_validate_fcp_iocb - find commands associated with a vport or LUN
++ * lpfc_sli_validate_fcp_iocb_for_abort - filter iocbs appropriate for FCP aborts
++ * @iocbq: Pointer to iocb object.
++ * @vport: Pointer to driver virtual port object.
++ *
++ * This function acts as an iocb filter for functions which abort FCP iocbs.
++ *
++ * Return values
++ * -ENODEV, if a null iocb or vport ptr is encountered
++ * -EINVAL, if the iocb is not an FCP I/O, not on the TX cmpl queue, premarked as
++ * driver already started the abort process, or is an abort iocb itself
++ * 0, passes criteria for aborting the FCP I/O iocb
++ **/
++static int
++lpfc_sli_validate_fcp_iocb_for_abort(struct lpfc_iocbq *iocbq,
++ struct lpfc_vport *vport)
++{
++ IOCB_t *icmd = NULL;
++
++ /* No null ptr vports */
++ if (!iocbq || iocbq->vport != vport)
++ return -ENODEV;
++
++ /* iocb must be for FCP IO, already exists on the TX cmpl queue,
++ * can't be premarked as driver aborted, nor be an ABORT iocb itself
++ */
++ icmd = &iocbq->iocb;
++ if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
++ !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ) ||
++ (iocbq->iocb_flag & LPFC_DRIVER_ABORTED) ||
++ (icmd->ulpCommand == CMD_ABORT_XRI_CN ||
++ icmd->ulpCommand == CMD_CLOSE_XRI_CN))
++ return -EINVAL;
++
++ return 0;
++}
++
++/**
++ * lpfc_sli_validate_fcp_iocb - validate commands associated with a SCSI target
+ * @iocbq: Pointer to driver iocb object.
+ * @vport: Pointer to driver virtual port object.
+ * @tgt_id: SCSI ID of the target.
+ * @lun_id: LUN ID of the scsi device.
+ * @ctx_cmd: LPFC_CTX_LUN/LPFC_CTX_TGT/LPFC_CTX_HOST
+ *
+- * This function acts as an iocb filter for functions which abort or count
+- * all FCP iocbs pending on a lun/SCSI target/SCSI host. It will return
++ * This function acts as an iocb filter for validating a lun/SCSI target/SCSI
++ * host.
++ *
++ * It will return
+ * 0 if the filtering criteria is met for the given iocb and will return
+ * 1 if the filtering criteria is not met.
+ * If ctx_cmd == LPFC_CTX_LUN, the function returns 0 only if the
+@@ -12515,22 +12740,8 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
+ lpfc_ctx_cmd ctx_cmd)
+ {
+ struct lpfc_io_buf *lpfc_cmd;
+- IOCB_t *icmd = NULL;
+ int rc = 1;
+
+- if (!iocbq || iocbq->vport != vport)
+- return rc;
+-
+- if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
+- !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ) ||
+- iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+- return rc;
+-
+- icmd = &iocbq->iocb;
+- if (icmd->ulpCommand == CMD_ABORT_XRI_CN ||
+- icmd->ulpCommand == CMD_CLOSE_XRI_CN)
+- return rc;
+-
+ lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
+
+ if (lpfc_cmd->pCmd == NULL)
+@@ -12585,17 +12796,33 @@ lpfc_sli_sum_iocb(struct lpfc_vport *vport, uint16_t tgt_id, uint64_t lun_id,
+ {
+ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *iocbq;
++ IOCB_t *icmd = NULL;
+ int sum, i;
++ unsigned long iflags;
+
+- spin_lock_irq(&phba->hbalock);
++ spin_lock_irqsave(&phba->hbalock, iflags);
+ for (i = 1, sum = 0; i <= phba->sli.last_iotag; i++) {
+ iocbq = phba->sli.iocbq_lookup[i];
+
+- if (lpfc_sli_validate_fcp_iocb (iocbq, vport, tgt_id, lun_id,
+- ctx_cmd) == 0)
++ if (!iocbq || iocbq->vport != vport)
++ continue;
++ if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
++ !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ))
++ continue;
++
++ /* Include counting outstanding aborts */
++ icmd = &iocbq->iocb;
++ if (icmd->ulpCommand == CMD_ABORT_XRI_CN ||
++ icmd->ulpCommand == CMD_CLOSE_XRI_CN) {
++ sum++;
++ continue;
++ }
++
++ if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id,
++ ctx_cmd) == 0)
+ sum++;
+ }
+- spin_unlock_irq(&phba->hbalock);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+ return sum;
+ }
+@@ -12662,7 +12889,11 @@ lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ *
+ * This function sends an abort command for every SCSI command
+ * associated with the given virtual port pending on the ring
+- * filtered by lpfc_sli_validate_fcp_iocb function.
++ * filtered by lpfc_sli_validate_fcp_iocb_for_abort and then
++ * lpfc_sli_validate_fcp_iocb function. The ordering for validation before
++ * submitting abort iocbs must be lpfc_sli_validate_fcp_iocb_for_abort
++ * followed by lpfc_sli_validate_fcp_iocb.
++ *
+ * When abort_cmd == LPFC_CTX_LUN, the function sends abort only to the
+ * FCP iocbs associated with lun specified by tgt_id and lun_id
+ * parameters
+@@ -12694,6 +12925,9 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, u16 tgt_id, u64 lun_id,
+ for (i = 1; i <= phba->sli.last_iotag; i++) {
+ iocbq = phba->sli.iocbq_lookup[i];
+
++ if (lpfc_sli_validate_fcp_iocb_for_abort(iocbq, vport))
++ continue;
++
+ if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id,
+ abort_cmd) != 0)
+ continue;
+@@ -12726,7 +12960,11 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, u16 tgt_id, u64 lun_id,
+ *
+ * This function sends an abort command for every SCSI command
+ * associated with the given virtual port pending on the ring
+- * filtered by lpfc_sli_validate_fcp_iocb function.
++ * filtered by lpfc_sli_validate_fcp_iocb_for_abort and then
++ * lpfc_sli_validate_fcp_iocb function. The ordering for validation before
++ * submitting abort iocbs must be lpfc_sli_validate_fcp_iocb_for_abort
++ * followed by lpfc_sli_validate_fcp_iocb.
++ *
+ * When taskmgmt_cmd == LPFC_CTX_LUN, the function sends abort only to the
+ * FCP iocbs associated with lun specified by tgt_id and lun_id
+ * parameters
+@@ -12764,6 +13002,9 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
+ for (i = 1; i <= phba->sli.last_iotag; i++) {
+ iocbq = phba->sli.iocbq_lookup[i];
+
++ if (lpfc_sli_validate_fcp_iocb_for_abort(iocbq, vport))
++ continue;
++
+ if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id,
+ cmd) != 0)
+ continue;
+@@ -13317,6 +13558,7 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba)
+ uint32_t uerr_sta_hi, uerr_sta_lo;
+ uint32_t if_type, portsmphr;
+ struct lpfc_register portstat_reg;
++ u32 logmask;
+
+ /*
+ * For now, use the SLI4 device internal unrecoverable error
+@@ -13367,7 +13609,12 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba)
+ readl(phba->sli4_hba.u.if_type2.ERR1regaddr);
+ phba->work_status[1] =
+ readl(phba->sli4_hba.u.if_type2.ERR2regaddr);
+- lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
++ logmask = LOG_TRACE_EVENT;
++ if (phba->work_status[0] ==
++ SLIPORT_ERR1_REG_ERR_CODE_2 &&
++ phba->work_status[1] == SLIPORT_ERR2_REG_FW_RESTART)
++ logmask = LOG_SLI;
++ lpfc_printf_log(phba, KERN_ERR, logmask,
+ "2885 Port Status Event: "
+ "port status reg 0x%x, "
+ "port smphr reg 0x%x, "
+@@ -18389,7 +18636,6 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
+ case FC_RCTL_ELS_REP: /* extended link services reply */
+ case FC_RCTL_ELS4_REQ: /* FC-4 ELS request */
+ case FC_RCTL_ELS4_REP: /* FC-4 ELS reply */
+- case FC_RCTL_BA_NOP: /* basic link service NOP */
+ case FC_RCTL_BA_ABTS: /* basic link service abort */
+ case FC_RCTL_BA_RMC: /* remove connection */
+ case FC_RCTL_BA_ACC: /* basic accept */
+@@ -18410,6 +18656,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
+ fc_vft_hdr = (struct fc_vft_header *)fc_hdr;
+ fc_hdr = &((struct fc_frame_header *)fc_vft_hdr)[1];
+ return lpfc_fc_frame_check(phba, fc_hdr);
++ case FC_RCTL_BA_NOP: /* basic link service NOP */
+ default:
+ goto drop;
+ }
+@@ -19222,12 +19469,14 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
+ if (!lpfc_complete_unsol_iocb(phba,
+ phba->sli4_hba.els_wq->pring,
+ iocbq, fc_hdr->fh_r_ctl,
+- fc_hdr->fh_type))
++ fc_hdr->fh_type)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+ "2540 Ring %d handler: unexpected Rctl "
+ "x%x Type x%x received\n",
+ LPFC_ELS_RING,
+ fc_hdr->fh_r_ctl, fc_hdr->fh_type);
++ lpfc_in_buf_free(phba, &seq_dmabuf->dbuf);
++ }
+
+ /* Free iocb created in lpfc_prep_seq */
+ list_for_each_entry_safe(curr_iocb, next_iocb,
+@@ -20817,6 +21066,7 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
+ struct lpfc_mbx_wr_object *wr_object;
+ LPFC_MBOXQ_t *mbox;
+ int rc = 0, i = 0;
++ int mbox_status = 0;
+ uint32_t shdr_status, shdr_add_status, shdr_add_status_2;
+ uint32_t shdr_change_status = 0, shdr_csf = 0;
+ uint32_t mbox_tmo;
+@@ -20862,11 +21112,15 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
+ wr_object->u.request.bde_count = i;
+ bf_set(lpfc_wr_object_write_length, &wr_object->u.request, written);
+ if (!phba->sli4_hba.intr_enable)
+- rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
++ mbox_status = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+ else {
+ mbox_tmo = lpfc_mbox_tmo_val(phba, mbox);
+- rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
++ mbox_status = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+ }
++
++ /* The mbox status needs to be maintained to detect MBOX_TIMEOUT. */
++ rc = mbox_status;
++
+ /* The IOCTL status is embedded in the mailbox subheader. */
+ shdr_status = bf_get(lpfc_mbox_hdr_status,
+ &wr_object->header.cfg_shdr.response);
+@@ -20881,10 +21135,6 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
+ &wr_object->u.response);
+ }
+
+- if (!phba->sli4_hba.intr_enable)
+- mempool_free(mbox, phba->mbox_mem_pool);
+- else if (rc != MBX_TIMEOUT)
+- mempool_free(mbox, phba->mbox_mem_pool);
+ if (shdr_status || shdr_add_status || shdr_add_status_2 || rc) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
+ "3025 Write Object mailbox failed with "
+@@ -20902,6 +21152,12 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list,
+ lpfc_log_fw_write_cmpl(phba, shdr_status, shdr_add_status,
+ shdr_add_status_2, shdr_change_status,
+ shdr_csf);
++
++ if (!phba->sli4_hba.intr_enable)
++ mempool_free(mbox, phba->mbox_mem_pool);
++ else if (mbox_status != MBX_TIMEOUT)
++ mempool_free(mbox, phba->mbox_mem_pool);
++
+ return rc;
+ }
+
+@@ -21107,6 +21363,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
+ fail_msg,
+ piocbq->iotag, piocbq->sli4_xritag);
+ list_add_tail(&piocbq->list, &completions);
++ fail_msg = NULL;
+ }
+ spin_unlock_irqrestore(&pring->ring_lock, iflags);
+ }
+@@ -21909,20 +22166,20 @@ lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
+ static struct lpfc_io_buf *
+ lpfc_get_io_buf_from_expedite_pool(struct lpfc_hba *phba)
+ {
+- struct lpfc_io_buf *lpfc_ncmd;
++ struct lpfc_io_buf *lpfc_ncmd = NULL, *iter;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ unsigned long iflag;
+ struct lpfc_epd_pool *epd_pool;
+
+ epd_pool = &phba->epd_pool;
+- lpfc_ncmd = NULL;
+
+ spin_lock_irqsave(&epd_pool->lock, iflag);
+ if (epd_pool->count > 0) {
+- list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
++ list_for_each_entry_safe(iter, lpfc_ncmd_next,
+ &epd_pool->list, list) {
+- list_del(&lpfc_ncmd->list);
++ list_del(&iter->list);
+ epd_pool->count--;
++ lpfc_ncmd = iter;
+ break;
+ }
+ }
+@@ -21966,8 +22223,26 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ lpfc_ncmd = NULL;
++ if (!qp) {
++ lpfc_printf_log(phba, KERN_INFO,
++ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
++ "5556 NULL qp for hwqid x%x\n", hwqid);
++ return lpfc_ncmd;
++ }
+ multixri_pool = qp->p_multixri_pool;
++ if (!multixri_pool) {
++ lpfc_printf_log(phba, KERN_INFO,
++ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
++ "5557 NULL multixri for hwqid x%x\n", hwqid);
++ return lpfc_ncmd;
++ }
+ pvt_pool = &multixri_pool->pvt_pool;
++ if (!pvt_pool) {
++ lpfc_printf_log(phba, KERN_INFO,
++ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
++ "5558 NULL pvt_pool for hwqid x%x\n", hwqid);
++ return lpfc_ncmd;
++ }
+ multixri_pool->io_req_count++;
+
+ /* If pvt_pool is empty, move some XRIs from public to private pool */
+@@ -22043,6 +22318,12 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ lpfc_cmd = NULL;
++ if (!qp) {
++ lpfc_printf_log(phba, KERN_WARNING,
++ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
++ "5555 NULL qp for hwqid x%x\n", hwqid);
++ return lpfc_cmd;
++ }
+
+ if (phba->cfg_xri_rebalancing)
+ lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
+@@ -22095,10 +22376,6 @@ lpfc_read_object(struct lpfc_hba *phba, char *rdobject, uint32_t *datap,
+ struct lpfc_dmabuf *pcmd;
+ u32 rd_object_name[LPFC_MBX_OBJECT_NAME_LEN_DW] = {0};
+
+- /* sanity check on queue memory */
+- if (!datap)
+- return -ENODEV;
+-
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!mbox)
+ return -ENOMEM;
+diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
+index 99c5d1e4da5ef..5962cf508842f 100644
+--- a/drivers/scsi/lpfc/lpfc_sli4.h
++++ b/drivers/scsi/lpfc/lpfc_sli4.h
+@@ -1116,6 +1116,8 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
+ int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
+ void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
+ void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba);
++void lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
++ struct lpfc_io_buf *lpfc_ncmd);
+ void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri,
+ struct lpfc_io_buf *lpfc_ncmd);
+diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
+index 56910e94dbf2a..6122cc60a8b3d 100644
+--- a/drivers/scsi/megaraid.c
++++ b/drivers/scsi/megaraid.c
+@@ -1443,6 +1443,7 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status)
+ */
+ if (cmdid == CMDID_INT_CMDS) {
+ scb = &adapter->int_scb;
++ cmd = scb->cmd;
+
+ list_del_init(&scb->list);
+ scb->state = SCB_FREE;
+@@ -4628,7 +4629,7 @@ static int __init megaraid_init(void)
+ * major number allocation.
+ */
+ major = register_chrdev(0, "megadev_legacy", &megadev_fops);
+- if (!major) {
++ if (major < 0) {
+ printk(KERN_WARNING
+ "megaraid: failed to register char device\n");
+ }
+diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
+index 7af2c23652b01..02d7ab119f806 100644
+--- a/drivers/scsi/megaraid/megaraid_sas.h
++++ b/drivers/scsi/megaraid/megaraid_sas.h
+@@ -1517,6 +1517,8 @@ struct megasas_ctrl_info {
+ #define MEGASAS_MAX_LD_IDS (MEGASAS_MAX_LD_CHANNELS * \
+ MEGASAS_MAX_DEV_PER_CHANNEL)
+
++#define MEGASAS_MAX_SUPPORTED_LD_IDS 240
++
+ #define MEGASAS_MAX_SECTORS (2*1024)
+ #define MEGASAS_MAX_SECTORS_IEEE (2*128)
+ #define MEGASAS_DBG_LVL 1
+@@ -2558,6 +2560,9 @@ struct megasas_instance_template {
+ #define MEGASAS_IS_LOGICAL(sdev) \
+ ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1)
+
++#define MEGASAS_IS_LUN_VALID(sdev) \
++ (((sdev)->lun == 0) ? 1 : 0)
++
+ #define MEGASAS_DEV_INDEX(scp) \
+ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \
+ scp->device->id)
+diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
+index 39d8754e63acf..f7da1876e7a38 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_base.c
++++ b/drivers/scsi/megaraid/megaraid_sas_base.c
+@@ -2126,6 +2126,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev)
+ goto scan_target;
+ }
+ return -ENXIO;
++ } else if (!MEGASAS_IS_LUN_VALID(sdev)) {
++ sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__);
++ return -ENXIO;
+ }
+
+ scan_target:
+@@ -2156,6 +2159,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev)
+ instance = megasas_lookup_instance(sdev->host->host_no);
+
+ if (MEGASAS_IS_LOGICAL(sdev)) {
++ if (!MEGASAS_IS_LUN_VALID(sdev)) {
++ sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__);
++ return;
++ }
+ ld_tgt_id = MEGASAS_TARGET_ID(sdev);
+ instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED;
+ if (megasas_dbg_lvl & LD_PD_DEBUG)
+@@ -3189,6 +3196,9 @@ static int megasas_map_queues(struct Scsi_Host *shost)
+ qoff += map->nr_queues;
+ offset += map->nr_queues;
+
++ /* we never use READ queue, so can't cheat blk-mq */
++ shost->tag_set.map[HCTX_TYPE_READ].nr_queues = 0;
++
+ /* Setup Poll hctx */
+ map = &shost->tag_set.map[HCTX_TYPE_POLL];
+ map->nr_queues = instance->iopoll_q_count;
+@@ -3292,7 +3302,7 @@ fw_crash_buffer_show(struct device *cdev,
+
+ spin_lock_irqsave(&instance->crashdump_lock, flags);
+ buff_offset = instance->fw_crash_buffer_offset;
+- if (!instance->crash_dump_buf &&
++ if (!instance->crash_dump_buf ||
+ !((instance->fw_crash_state == AVAILABLE) ||
+ (instance->fw_crash_state == COPYING))) {
+ dev_err(&instance->pdev->dev,
+diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c
+index 83f69c33b01a9..ec10d35b4685a 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_fp.c
++++ b/drivers/scsi/megaraid/megaraid_sas_fp.c
+@@ -358,7 +358,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id)
+ ld = MR_TargetIdToLdGet(i, drv_map);
+
+ /* For non existing VDs, iterate to next VD*/
+- if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1))
++ if (ld >= MEGASAS_MAX_SUPPORTED_LD_IDS)
+ continue;
+
+ raid = MR_LdRaidGet(ld, drv_map);
+diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+index 26d0cf9353dd6..c254254aa72f8 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+@@ -3530,6 +3530,9 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
+ if (atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR)
+ return IRQ_HANDLED;
+
++ if (irq_context && !atomic_add_unless(&irq_context->in_used, 1, 1))
++ return 0;
++
+ desc = fusion->reply_frames_desc[MSIxIndex] +
+ fusion->last_reply_idx[MSIxIndex];
+
+@@ -3540,11 +3543,11 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
+ reply_descript_type = reply_desc->ReplyFlags &
+ MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
+
+- if (reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED)
++ if (reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) {
++ if (irq_context)
++ atomic_dec(&irq_context->in_used);
+ return IRQ_NONE;
+-
+- if (irq_context && !atomic_add_unless(&irq_context->in_used, 1, 1))
+- return 0;
++ }
+
+ num_completed = 0;
+
+@@ -4734,7 +4737,7 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd)
+ devhandle = megasas_get_tm_devhandle(scmd->device);
+
+ if (devhandle == (u16)ULONG_MAX) {
+- ret = SUCCESS;
++ ret = FAILED;
+ sdev_printk(KERN_INFO, scmd->device,
+ "task abort issued for invalid devhandle\n");
+ mutex_unlock(&instance->reset_mutex);
+@@ -4804,7 +4807,7 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd)
+ devhandle = megasas_get_tm_devhandle(scmd->device);
+
+ if (devhandle == (u16)ULONG_MAX) {
+- ret = SUCCESS;
++ ret = FAILED;
+ sdev_printk(KERN_INFO, scmd->device,
+ "target reset issued for invalid devhandle\n");
+ mutex_unlock(&instance->reset_mutex);
+@@ -5276,7 +5279,6 @@ megasas_alloc_fusion_context(struct megasas_instance *instance)
+ if (!fusion->log_to_span) {
+ dev_err(&instance->pdev->dev, "Failed from %s %d\n",
+ __func__, __LINE__);
+- kfree(instance->ctrl_context);
+ return -ENOMEM;
+ }
+ }
+diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile
+index 7c2063e04c818..7ebca0ba538da 100644
+--- a/drivers/scsi/mpi3mr/Makefile
++++ b/drivers/scsi/mpi3mr/Makefile
+@@ -1,4 +1,4 @@
+ # mpi3mr makefile
+-obj-m += mpi3mr.o
++obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o
+ mpi3mr-y += mpi3mr_os.o \
+ mpi3mr_fw.o \
+diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
+index 9787b53a2b598..2cc42432bd0c0 100644
+--- a/drivers/scsi/mpi3mr/mpi3mr.h
++++ b/drivers/scsi/mpi3mr/mpi3mr.h
+@@ -79,7 +79,8 @@ extern int prot_mask;
+
+ /* Operational queue management definitions */
+ #define MPI3MR_OP_REQ_Q_QD 512
+-#define MPI3MR_OP_REP_Q_QD 4096
++#define MPI3MR_OP_REP_Q_QD 1024
++#define MPI3MR_OP_REP_Q_QD4K 4096
+ #define MPI3MR_OP_REQ_Q_SEG_SIZE 4096
+ #define MPI3MR_OP_REP_Q_SEG_SIZE 4096
+ #define MPI3MR_MAX_SEG_LIST_SIZE 4096
+diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
+index 4a8316c6bd41a..3ef6b6edef46d 100644
+--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
++++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
+@@ -1275,10 +1275,10 @@ static void mpi3mr_free_op_req_q_segments(struct mpi3mr_ioc *mrioc, u16 q_idx)
+ MPI3MR_MAX_SEG_LIST_SIZE,
+ mrioc->req_qinfo[q_idx].q_segment_list,
+ mrioc->req_qinfo[q_idx].q_segment_list_dma);
+- mrioc->op_reply_qinfo[q_idx].q_segment_list = NULL;
++ mrioc->req_qinfo[q_idx].q_segment_list = NULL;
+ }
+ } else
+- size = mrioc->req_qinfo[q_idx].num_requests *
++ size = mrioc->req_qinfo[q_idx].segment_qd *
+ mrioc->facts.op_req_sz;
+
+ for (j = 0; j < mrioc->req_qinfo[q_idx].num_segments; j++) {
+@@ -1565,6 +1565,8 @@ static int mpi3mr_create_op_reply_q(struct mpi3mr_ioc *mrioc, u16 qidx)
+
+ reply_qid = qidx + 1;
+ op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD;
++ if (!mrioc->pdev->revision)
++ op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD4K;
+ op_reply_q->ci = 0;
+ op_reply_q->ephase = 1;
+ atomic_set(&op_reply_q->pend_ios, 0);
+diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
+index 3cae8803383b6..b2c650542bac5 100644
+--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
++++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
+@@ -2204,6 +2204,8 @@ void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc,
+ scmd->result = DID_OK << 16;
+ goto out_success;
+ }
++
++ scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_count);
+ if (ioc_status == MPI3_IOCSTATUS_SCSI_DATA_UNDERRUN &&
+ xfer_count == 0 && (scsi_status == MPI3_SCSI_STATUS_BUSY ||
+ scsi_status == MPI3_SCSI_STATUS_RESERVATION_CONFLICT ||
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
+index 27eb652b564f5..d7dabf53a0d5f 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
+@@ -139,6 +139,9 @@ _base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc);
+ static void
+ _base_clear_outstanding_commands(struct MPT3SAS_ADAPTER *ioc);
+
++static u32
++_base_readl_ext_retry(const volatile void __iomem *addr);
++
+ /**
+ * mpt3sas_base_check_cmd_timeout - Function
+ * to check timeout and command termination due
+@@ -214,6 +217,20 @@ _base_readl_aero(const volatile void __iomem *addr)
+ return ret_val;
+ }
+
++static u32
++_base_readl_ext_retry(const volatile void __iomem *addr)
++{
++ u32 i, ret_val;
++
++ for (i = 0 ; i < 30 ; i++) {
++ ret_val = readl(addr);
++ if (ret_val == 0)
++ continue;
++ }
++
++ return ret_val;
++}
++
+ static inline u32
+ _base_readl(const volatile void __iomem *addr)
+ {
+@@ -639,8 +656,8 @@ static void _base_sync_drv_fw_timestamp(struct MPT3SAS_ADAPTER *ioc)
+ mpi_request->IOCParameter = MPI26_SET_IOC_PARAMETER_SYNC_TIMESTAMP;
+ current_time = ktime_get_real();
+ TimeStamp = ktime_to_ms(current_time);
+- mpi_request->Reserved7 = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
+- mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp >> 32);
++ mpi_request->Reserved7 = cpu_to_le32(TimeStamp >> 32);
++ mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
+ init_completion(&ioc->scsih_cmds.done);
+ ioc->put_smid_default(ioc, smid);
+ dinitprintk(ioc, ioc_info(ioc,
+@@ -941,7 +958,7 @@ mpt3sas_halt_firmware(struct MPT3SAS_ADAPTER *ioc)
+
+ dump_stack();
+
+- doorbell = ioc->base_readl(&ioc->chip->Doorbell);
++ doorbell = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
+ if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
+ mpt3sas_print_fault_code(ioc, doorbell &
+ MPI2_DOORBELL_DATA_MASK);
+@@ -2011,9 +2028,10 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc, u8 poll)
+ enable_irq(reply_q->os_irq);
+ }
+ }
++
++ if (poll)
++ _base_process_reply_queue(reply_q);
+ }
+- if (poll)
+- _base_process_reply_queue(reply_q);
+ }
+
+ /**
+@@ -2593,12 +2611,8 @@ _base_check_pcie_native_sgl(struct MPT3SAS_ADAPTER *ioc,
+
+ /* Get the SG list pointer and info. */
+ sges_left = scsi_dma_map(scmd);
+- if (sges_left < 0) {
+- sdev_printk(KERN_ERR, scmd->device,
+- "scsi_dma_map failed: request for %d bytes!\n",
+- scsi_bufflen(scmd));
++ if (sges_left < 0)
+ return 1;
+- }
+
+ /* Check if we need to build a native SG list. */
+ if (!base_is_prp_possible(ioc, pcie_device,
+@@ -2705,12 +2719,8 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPTER *ioc,
+
+ sg_scmd = scsi_sglist(scmd);
+ sges_left = scsi_dma_map(scmd);
+- if (sges_left < 0) {
+- sdev_printk(KERN_ERR, scmd->device,
+- "scsi_dma_map failed: request for %d bytes!\n",
+- scsi_bufflen(scmd));
++ if (sges_left < 0)
+ return -ENOMEM;
+- }
+
+ sg_local = &mpi_request->SGL;
+ sges_in_segment = ioc->max_sges_in_main_message;
+@@ -2853,12 +2863,8 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc,
+
+ sg_scmd = scsi_sglist(scmd);
+ sges_left = scsi_dma_map(scmd);
+- if (sges_left < 0) {
+- sdev_printk(KERN_ERR, scmd->device,
+- "scsi_dma_map failed: request for %d bytes!\n",
+- scsi_bufflen(scmd));
++ if (sges_left < 0)
+ return -ENOMEM;
+- }
+
+ sg_local = &mpi_request->SGL;
+ sges_in_segment = (ioc->request_sz -
+@@ -3001,19 +3007,25 @@ static int
+ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev)
+ {
+ struct sysinfo s;
++ u64 coherent_dma_mask, dma_mask;
+
+- if (ioc->is_mcpu_endpoint ||
+- sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma ||
+- dma_get_required_mask(&pdev->dev) <= 32)
++ if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4) {
+ ioc->dma_mask = 32;
++ coherent_dma_mask = dma_mask = DMA_BIT_MASK(32);
+ /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */
+- else if (ioc->hba_mpi_version_belonged > MPI2_VERSION)
++ } else if (ioc->hba_mpi_version_belonged > MPI2_VERSION) {
+ ioc->dma_mask = 63;
+- else
++ coherent_dma_mask = dma_mask = DMA_BIT_MASK(63);
++ } else {
+ ioc->dma_mask = 64;
++ coherent_dma_mask = dma_mask = DMA_BIT_MASK(64);
++ }
+
+- if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask)) ||
+- dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask)))
++ if (ioc->use_32bit_dma)
++ coherent_dma_mask = DMA_BIT_MASK(32);
++
++ if (dma_set_mask(&pdev->dev, dma_mask) ||
++ dma_set_coherent_mask(&pdev->dev, coherent_dma_mask))
+ return -ENODEV;
+
+ if (ioc->dma_mask > 32) {
+@@ -5380,6 +5392,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc)
+ Mpi2ConfigReply_t mpi_reply;
+ Mpi2SasIOUnitPage1_t *sas_iounit_pg1 = NULL;
+ Mpi26PCIeIOUnitPage1_t pcie_iounit_pg1;
++ u16 depth;
+ int sz;
+ int rc = 0;
+
+@@ -5391,7 +5404,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc)
+ goto out;
+ /* sas iounit page 1 */
+ sz = offsetof(Mpi2SasIOUnitPage1_t, PhyData);
+- sas_iounit_pg1 = kzalloc(sz, GFP_KERNEL);
++ sas_iounit_pg1 = kzalloc(sizeof(Mpi2SasIOUnitPage1_t), GFP_KERNEL);
+ if (!sas_iounit_pg1) {
+ pr_err("%s: failure at %s:%d/%s()!\n",
+ ioc->name, __FILE__, __LINE__, __func__);
+@@ -5404,16 +5417,16 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc)
+ ioc->name, __FILE__, __LINE__, __func__);
+ goto out;
+ }
+- ioc->max_wideport_qd =
+- (le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth)) ?
+- le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth) :
+- MPT3SAS_SAS_QUEUE_DEPTH;
+- ioc->max_narrowport_qd =
+- (le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth)) ?
+- le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth) :
+- MPT3SAS_SAS_QUEUE_DEPTH;
+- ioc->max_sata_qd = (sas_iounit_pg1->SATAMaxQDepth) ?
+- sas_iounit_pg1->SATAMaxQDepth : MPT3SAS_SATA_QUEUE_DEPTH;
++
++ depth = le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth);
++ ioc->max_wideport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH);
++
++ depth = le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth);
++ ioc->max_narrowport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH);
++
++ depth = sas_iounit_pg1->SATAMaxQDepth;
++ ioc->max_sata_qd = (depth ? depth : MPT3SAS_SATA_QUEUE_DEPTH);
++
+ /* pcie iounit page 1 */
+ rc = mpt3sas_config_get_pcie_iounit_pg1(ioc, &mpi_reply,
+ &pcie_iounit_pg1, sizeof(Mpi26PCIeIOUnitPage1_t));
+@@ -5692,6 +5705,9 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc)
+ }
+ dma_pool_destroy(ioc->pcie_sgl_dma_pool);
+ }
++ kfree(ioc->pcie_sg_lookup);
++ ioc->pcie_sg_lookup = NULL;
++
+ if (ioc->config_page) {
+ dexitprintk(ioc,
+ ioc_info(ioc, "config_page(0x%p): free\n",
+@@ -5736,14 +5752,13 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc)
+ */
+
+ static int
+-mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz)
++mpt3sas_check_same_4gb_region(dma_addr_t start_address, u32 pool_sz)
+ {
+- long reply_pool_end_address;
++ dma_addr_t end_address;
+
+- reply_pool_end_address = reply_pool_start_address + pool_sz;
++ end_address = start_address + pool_sz - 1;
+
+- if (upper_32_bits(reply_pool_start_address) ==
+- upper_32_bits(reply_pool_end_address))
++ if (upper_32_bits(start_address) == upper_32_bits(end_address))
+ return 1;
+ else
+ return 0;
+@@ -5804,7 +5819,7 @@ _base_allocate_pcie_sgl_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz)
+ }
+
+ if (!mpt3sas_check_same_4gb_region(
+- (long)ioc->pcie_sg_lookup[i].pcie_sgl, sz)) {
++ ioc->pcie_sg_lookup[i].pcie_sgl_dma, sz)) {
+ ioc_err(ioc, "PCIE SGLs are not in same 4G !! pcie sgl (0x%p) dma = (0x%llx)\n",
+ ioc->pcie_sg_lookup[i].pcie_sgl,
+ (unsigned long long)
+@@ -5859,8 +5874,8 @@ _base_allocate_chain_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz)
+ GFP_KERNEL, &ctr->chain_buffer_dma);
+ if (!ctr->chain_buffer)
+ return -EAGAIN;
+- if (!mpt3sas_check_same_4gb_region((long)
+- ctr->chain_buffer, ioc->chain_segment_sz)) {
++ if (!mpt3sas_check_same_4gb_region(
++ ctr->chain_buffer_dma, ioc->chain_segment_sz)) {
+ ioc_err(ioc,
+ "Chain buffers are not in same 4G !!! Chain buff (0x%p) dma = (0x%llx)\n",
+ ctr->chain_buffer,
+@@ -5896,7 +5911,7 @@ _base_allocate_sense_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz)
+ GFP_KERNEL, &ioc->sense_dma);
+ if (!ioc->sense)
+ return -EAGAIN;
+- if (!mpt3sas_check_same_4gb_region((long)ioc->sense, sz)) {
++ if (!mpt3sas_check_same_4gb_region(ioc->sense_dma, sz)) {
+ dinitprintk(ioc, pr_err(
+ "Bad Sense Pool! sense (0x%p) sense_dma = (0x%llx)\n",
+ ioc->sense, (unsigned long long) ioc->sense_dma));
+@@ -5929,7 +5944,7 @@ _base_allocate_reply_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz)
+ &ioc->reply_dma);
+ if (!ioc->reply)
+ return -EAGAIN;
+- if (!mpt3sas_check_same_4gb_region((long)ioc->reply_free, sz)) {
++ if (!mpt3sas_check_same_4gb_region(ioc->reply_dma, sz)) {
+ dinitprintk(ioc, pr_err(
+ "Bad Reply Pool! Reply (0x%p) Reply dma = (0x%llx)\n",
+ ioc->reply, (unsigned long long) ioc->reply_dma));
+@@ -5964,7 +5979,7 @@ _base_allocate_reply_free_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz)
+ GFP_KERNEL, &ioc->reply_free_dma);
+ if (!ioc->reply_free)
+ return -EAGAIN;
+- if (!mpt3sas_check_same_4gb_region((long)ioc->reply_free, sz)) {
++ if (!mpt3sas_check_same_4gb_region(ioc->reply_free_dma, sz)) {
+ dinitprintk(ioc,
+ pr_err("Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n",
+ ioc->reply_free, (unsigned long long) ioc->reply_free_dma));
+@@ -6003,7 +6018,7 @@ _base_allocate_reply_post_free_array(struct MPT3SAS_ADAPTER *ioc,
+ GFP_KERNEL, &ioc->reply_post_free_array_dma);
+ if (!ioc->reply_post_free_array)
+ return -EAGAIN;
+- if (!mpt3sas_check_same_4gb_region((long)ioc->reply_post_free_array,
++ if (!mpt3sas_check_same_4gb_region(ioc->reply_post_free_array_dma,
+ reply_post_free_array_sz)) {
+ dinitprintk(ioc, pr_err(
+ "Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n",
+@@ -6068,7 +6083,7 @@ base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz)
+ * resources and set DMA mask to 32 and allocate.
+ */
+ if (!mpt3sas_check_same_4gb_region(
+- (long)ioc->reply_post[i].reply_post_free, sz)) {
++ ioc->reply_post[i].reply_post_free_dma, sz)) {
+ dinitprintk(ioc,
+ ioc_err(ioc, "bad Replypost free pool(0x%p)"
+ "reply_post_free_dma = (0x%llx)\n",
+@@ -6458,11 +6473,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
+ else if (rc == -EAGAIN)
+ goto try_32bit_dma;
+ total_sz += sense_sz;
+- ioc_info(ioc,
+- "sense pool(0x%p)- dma(0x%llx): depth(%d),"
+- "element_size(%d), pool_size(%d kB)\n",
+- ioc->sense, (unsigned long long)ioc->sense_dma, ioc->scsiio_depth,
+- SCSI_SENSE_BUFFERSIZE, sz / 1024);
+ /* reply pool, 4 byte align */
+ sz = ioc->reply_free_queue_depth * ioc->reply_sz;
+ rc = _base_allocate_reply_pool(ioc, sz);
+@@ -6544,7 +6554,7 @@ mpt3sas_base_get_iocstate(struct MPT3SAS_ADAPTER *ioc, int cooked)
+ {
+ u32 s, sc;
+
+- s = ioc->base_readl(&ioc->chip->Doorbell);
++ s = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
+ sc = s & MPI2_IOC_STATE_MASK;
+ return cooked ? sc : s;
+ }
+@@ -6689,7 +6699,7 @@ _base_wait_for_doorbell_ack(struct MPT3SAS_ADAPTER *ioc, int timeout)
+ __func__, count, timeout));
+ return 0;
+ } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
+- doorbell = ioc->base_readl(&ioc->chip->Doorbell);
++ doorbell = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
+ if ((doorbell & MPI2_IOC_STATE_MASK) ==
+ MPI2_IOC_STATE_FAULT) {
+ mpt3sas_print_fault_code(ioc, doorbell);
+@@ -6729,7 +6739,7 @@ _base_wait_for_doorbell_not_used(struct MPT3SAS_ADAPTER *ioc, int timeout)
+ count = 0;
+ cntdn = 1000 * timeout;
+ do {
+- doorbell_reg = ioc->base_readl(&ioc->chip->Doorbell);
++ doorbell_reg = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
+ if (!(doorbell_reg & MPI2_DOORBELL_USED)) {
+ dhsprintk(ioc,
+ ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n",
+@@ -6877,7 +6887,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
+ __le32 *mfp;
+
+ /* make sure doorbell is not in use */
+- if ((ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
++ if ((ioc->base_readl_ext_retry(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
+ ioc_err(ioc, "doorbell is in use (line=%d)\n", __LINE__);
+ return -EFAULT;
+ }
+@@ -6926,7 +6936,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
+ }
+
+ /* read the first two 16-bits, it gives the total length of the reply */
+- reply[0] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
++ reply[0] = le16_to_cpu(ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
+ & MPI2_DOORBELL_DATA_MASK);
+ writel(0, &ioc->chip->HostInterruptStatus);
+ if ((_base_wait_for_doorbell_int(ioc, 5))) {
+@@ -6934,7 +6944,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
+ __LINE__);
+ return -EFAULT;
+ }
+- reply[1] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
++ reply[1] = le16_to_cpu(ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
+ & MPI2_DOORBELL_DATA_MASK);
+ writel(0, &ioc->chip->HostInterruptStatus);
+
+@@ -6945,10 +6955,10 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
+ return -EFAULT;
+ }
+ if (i >= reply_bytes/2) /* overflow case */
+- ioc->base_readl(&ioc->chip->Doorbell);
++ ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
+ else
+ reply[i] = le16_to_cpu(
+- ioc->base_readl(&ioc->chip->Doorbell)
++ ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
+ & MPI2_DOORBELL_DATA_MASK);
+ writel(0, &ioc->chip->HostInterruptStatus);
+ }
+@@ -7807,7 +7817,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
+ goto out;
+ }
+
+- host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
++ host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
+ drsprintk(ioc,
+ ioc_info(ioc, "wrote magic sequence: count(%d), host_diagnostic(0x%08x)\n",
+ count, host_diagnostic));
+@@ -7827,7 +7837,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
+ for (count = 0; count < (300000000 /
+ MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC); count++) {
+
+- host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
++ host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
+
+ if (host_diagnostic == 0xFFFFFFFF) {
+ ioc_info(ioc,
+@@ -8217,10 +8227,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
+ ioc->rdpq_array_enable_assigned = 0;
+ ioc->use_32bit_dma = false;
+ ioc->dma_mask = 64;
+- if (ioc->is_aero_ioc)
++ if (ioc->is_aero_ioc) {
+ ioc->base_readl = &_base_readl_aero;
+- else
++ ioc->base_readl_ext_retry = &_base_readl_ext_retry;
++ } else {
+ ioc->base_readl = &_base_readl;
++ ioc->base_readl_ext_retry = &_base_readl;
++ }
+ r = mpt3sas_base_map_resources(ioc);
+ if (r)
+ goto out_free_resources;
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
+index f87c0911f66ad..0ff208a41a476 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
+@@ -142,6 +142,8 @@
+
+ #define MPT_MAX_CALLBACKS 32
+
++#define MPT_MAX_HBA_NUM_PHYS 32
++
+ #define INTERNAL_CMDS_COUNT 10 /* reserved cmds */
+ /* reserved for issuing internally framed scsi io cmds */
+ #define INTERNAL_SCSIIO_CMDS_COUNT 3
+@@ -798,6 +800,7 @@ struct _sas_phy {
+ * @enclosure_handle: handle for this a member of an enclosure
+ * @device_info: bitwise defining capabilities of this sas_host/expander
+ * @responding: used in _scsih_expander_device_mark_responding
++ * @nr_phys_allocated: Allocated memory for this many count phys
+ * @phy: a list of phys that make up this sas_host/expander
+ * @sas_port_list: list of ports attached to this sas_host/expander
+ * @port: hba port entry containing node's port number info
+@@ -813,6 +816,7 @@ struct _sas_node {
+ u16 enclosure_handle;
+ u64 enclosure_logical_id;
+ u8 responding;
++ u8 nr_phys_allocated;
+ struct hba_port *port;
+ struct _sas_phy *phy;
+ struct list_head sas_port_list;
+@@ -1614,6 +1618,7 @@ struct MPT3SAS_ADAPTER {
+ u8 diag_trigger_active;
+ u8 atomic_desc_capable;
+ BASE_READ_REG base_readl;
++ BASE_READ_REG base_readl_ext_retry;
+ struct SL_WH_MASTER_TRIGGER_T diag_trigger_master;
+ struct SL_WH_EVENT_TRIGGERS_T diag_trigger_event;
+ struct SL_WH_SCSI_TRIGGERS_T diag_trigger_scsi;
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c
+index 0563078227de6..a8dd14c91efdb 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_config.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_config.c
+@@ -394,10 +394,13 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t
+ retry_count++;
+ if (ioc->config_cmds.smid == smid)
+ mpt3sas_base_free_smid(ioc, smid);
+- if ((ioc->shost_recovery) || (ioc->config_cmds.status &
+- MPT3_CMD_RESET) || ioc->pci_error_recovery)
++ if (ioc->config_cmds.status & MPT3_CMD_RESET)
+ goto retry_config;
+- issue_host_reset = 1;
++ if (ioc->shost_recovery || ioc->pci_error_recovery) {
++ issue_host_reset = 0;
++ r = -EFAULT;
++ } else
++ issue_host_reset = 1;
+ goto free_mem;
+ }
+
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+index ad1b6c2b37a74..9eb3d0b4891dd 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+@@ -3670,6 +3670,7 @@ static struct fw_event_work *dequeue_next_fw_event(struct MPT3SAS_ADAPTER *ioc)
+ fw_event = list_first_entry(&ioc->fw_event_list,
+ struct fw_event_work, list);
+ list_del_init(&fw_event->list);
++ fw_event_work_put(fw_event);
+ }
+ spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+
+@@ -3751,7 +3752,6 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc)
+ if (cancel_work_sync(&fw_event->work))
+ fw_event_work_put(fw_event);
+
+- fw_event_work_put(fw_event);
+ }
+ ioc->fw_events_cleanup = 0;
+ }
+@@ -3869,7 +3869,7 @@ _scsih_ublock_io_device(struct MPT3SAS_ADAPTER *ioc,
+
+ shost_for_each_device(sdev, ioc->shost) {
+ sas_device_priv_data = sdev->hostdata;
+- if (!sas_device_priv_data)
++ if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
+ continue;
+ if (sas_device_priv_data->sas_target->sas_address
+ != sas_address)
+@@ -6406,11 +6406,26 @@ _scsih_sas_port_refresh(struct MPT3SAS_ADAPTER *ioc)
+ int i, j, count = 0, lcount = 0;
+ int ret;
+ u64 sas_addr;
++ u8 num_phys;
+
+ drsprintk(ioc, ioc_info(ioc,
+ "updating ports for sas_host(0x%016llx)\n",
+ (unsigned long long)ioc->sas_hba.sas_address));
+
++ mpt3sas_config_get_number_hba_phys(ioc, &num_phys);
++ if (!num_phys) {
++ ioc_err(ioc, "failure at %s:%d/%s()!\n",
++ __FILE__, __LINE__, __func__);
++ return;
++ }
++
++ if (num_phys > ioc->sas_hba.nr_phys_allocated) {
++ ioc_err(ioc, "failure at %s:%d/%s()!\n",
++ __FILE__, __LINE__, __func__);
++ return;
++ }
++ ioc->sas_hba.num_phys = num_phys;
++
+ port_table = kcalloc(ioc->sas_hba.num_phys,
+ sizeof(struct hba_port), GFP_KERNEL);
+ if (!port_table)
+@@ -6611,6 +6626,30 @@ _scsih_sas_host_refresh(struct MPT3SAS_ADAPTER *ioc)
+ ioc->sas_hba.phy[i].hba_vphy = 1;
+ }
+
++ /*
++ * Add new HBA phys to STL if these new phys got added as part
++ * of HBA Firmware upgrade/downgrade operation.
++ */
++ if (!ioc->sas_hba.phy[i].phy) {
++ if ((mpt3sas_config_get_phy_pg0(ioc, &mpi_reply,
++ &phy_pg0, i))) {
++ ioc_err(ioc, "failure at %s:%d/%s()!\n",
++ __FILE__, __LINE__, __func__);
++ continue;
++ }
++ ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
++ MPI2_IOCSTATUS_MASK;
++ if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
++ ioc_err(ioc, "failure at %s:%d/%s()!\n",
++ __FILE__, __LINE__, __func__);
++ continue;
++ }
++ ioc->sas_hba.phy[i].phy_id = i;
++ mpt3sas_transport_add_host_phy(ioc,
++ &ioc->sas_hba.phy[i], phy_pg0,
++ ioc->sas_hba.parent_dev);
++ continue;
++ }
+ ioc->sas_hba.phy[i].handle = ioc->sas_hba.handle;
+ attached_handle = le16_to_cpu(sas_iounit_pg0->PhyData[i].
+ AttachedDevHandle);
+@@ -6622,6 +6661,19 @@ _scsih_sas_host_refresh(struct MPT3SAS_ADAPTER *ioc)
+ attached_handle, i, link_rate,
+ ioc->sas_hba.phy[i].port);
+ }
++ /*
++ * Clear the phy details if this phy got disabled as part of
++ * HBA Firmware upgrade/downgrade operation.
++ */
++ for (i = ioc->sas_hba.num_phys;
++ i < ioc->sas_hba.nr_phys_allocated; i++) {
++ if (ioc->sas_hba.phy[i].phy &&
++ ioc->sas_hba.phy[i].phy->negotiated_linkrate >=
++ SAS_LINK_RATE_1_5_GBPS)
++ mpt3sas_transport_update_links(ioc,
++ ioc->sas_hba.sas_address, 0, i,
++ MPI2_SAS_NEG_LINK_RATE_PHY_DISABLED, NULL);
++ }
+ out:
+ kfree(sas_iounit_pg0);
+ }
+@@ -6654,7 +6706,10 @@ _scsih_sas_host_add(struct MPT3SAS_ADAPTER *ioc)
+ __FILE__, __LINE__, __func__);
+ return;
+ }
+- ioc->sas_hba.phy = kcalloc(num_phys,
++
++ ioc->sas_hba.nr_phys_allocated = max_t(u8,
++ MPT_MAX_HBA_NUM_PHYS, num_phys);
++ ioc->sas_hba.phy = kcalloc(ioc->sas_hba.nr_phys_allocated,
+ sizeof(struct _sas_phy), GFP_KERNEL);
+ if (!ioc->sas_hba.phy) {
+ ioc_err(ioc, "failure at %s:%d/%s()!\n",
+@@ -10980,6 +11035,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc,
+ {
+ struct _sas_port *mpt3sas_port, *next;
+ unsigned long flags;
++ int port_id;
+
+ /* remove sibling ports attached to this expander */
+ list_for_each_entry_safe(mpt3sas_port, next,
+@@ -11000,6 +11056,8 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc,
+ mpt3sas_port->hba_port);
+ }
+
++ port_id = sas_expander->port->port_id;
++
+ mpt3sas_transport_port_remove(ioc, sas_expander->sas_address,
+ sas_expander->sas_address_parent, sas_expander->port);
+
+@@ -11007,7 +11065,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc,
+ "expander_remove: handle(0x%04x), sas_addr(0x%016llx), port:%d\n",
+ sas_expander->handle, (unsigned long long)
+ sas_expander->sas_address,
+- sas_expander->port->port_id);
++ port_id);
+
+ spin_lock_irqsave(&ioc->sas_node_lock, flags);
+ list_del(&sas_expander->list);
+@@ -11328,6 +11386,7 @@ scsih_shutdown(struct pci_dev *pdev)
+ _scsih_ir_shutdown(ioc);
+ _scsih_nvme_shutdown(ioc);
+ mpt3sas_base_mask_interrupts(ioc);
++ mpt3sas_base_stop_watchdog(ioc);
+ ioc->shost_recovery = 1;
+ mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
+ ioc->shost_recovery = 0;
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
+index 0681daee6c149..e8a4750f6ec47 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
+@@ -785,7 +785,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ goto out_fail;
+ }
+ port = sas_port_alloc_num(sas_node->parent_dev);
+- if ((sas_port_add(port))) {
++ if (!port || (sas_port_add(port))) {
+ ioc_err(ioc, "failure at %s:%d/%s()!\n",
+ __FILE__, __LINE__, __func__);
+ goto out_fail;
+@@ -824,11 +824,20 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ mpt3sas_port->remote_identify.sas_address;
+ }
+
++ if (!rphy) {
++ ioc_err(ioc, "failure at %s:%d/%s()!\n",
++ __FILE__, __LINE__, __func__);
++ goto out_delete_port;
++ }
++
+ rphy->identify = mpt3sas_port->remote_identify;
+
+ if ((sas_rphy_add(rphy))) {
+ ioc_err(ioc, "failure at %s:%d/%s()!\n",
+ __FILE__, __LINE__, __func__);
++ sas_rphy_free(rphy);
++ rphy = NULL;
++ goto out_delete_port;
+ }
+
+ if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
+@@ -855,7 +864,10 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ rphy_to_expander_device(rphy), hba_port->port_id);
+ return mpt3sas_port;
+
+- out_fail:
++out_delete_port:
++ sas_port_delete(port);
++
++out_fail:
+ list_for_each_entry_safe(mpt3sas_phy, next, &mpt3sas_port->phy_list,
+ port_siblings)
+ list_del(&mpt3sas_phy->port_siblings);
+diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
+index f18dd97035955..f6f8ca3c8c7f5 100644
+--- a/drivers/scsi/mvsas/mv_init.c
++++ b/drivers/scsi/mvsas/mv_init.c
+@@ -646,6 +646,7 @@ static struct pci_device_id mvs_pci_table[] = {
+ { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 },
+ { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 },
+ { PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 },
++ { PCI_VDEVICE(TTI, 0x2640), chip_6440 },
+ { PCI_VDEVICE(TTI, 0x2710), chip_9480 },
+ { PCI_VDEVICE(TTI, 0x2720), chip_9480 },
+ { PCI_VDEVICE(TTI, 0x2721), chip_9480 },
+@@ -696,7 +697,7 @@ static struct pci_driver mvs_pci_driver = {
+ static ssize_t driver_version_show(struct device *cdev,
+ struct device_attribute *attr, char *buffer)
+ {
+- return snprintf(buffer, PAGE_SIZE, "%s\n", DRV_VERSION);
++ return sysfs_emit(buffer, "%s\n", DRV_VERSION);
+ }
+
+ static DEVICE_ATTR_RO(driver_version);
+@@ -744,7 +745,7 @@ static ssize_t interrupt_coalescing_store(struct device *cdev,
+ static ssize_t interrupt_coalescing_show(struct device *cdev,
+ struct device_attribute *attr, char *buffer)
+ {
+- return snprintf(buffer, PAGE_SIZE, "%d\n", interrupt_coalescing);
++ return sysfs_emit(buffer, "%d\n", interrupt_coalescing);
+ }
+
+ static DEVICE_ATTR_RW(interrupt_coalescing);
+diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
+index a4a88323e0209..386256369dfc3 100644
+--- a/drivers/scsi/myrb.c
++++ b/drivers/scsi/myrb.c
+@@ -1239,7 +1239,8 @@ static void myrb_cleanup(struct myrb_hba *cb)
+ myrb_unmap(cb);
+
+ if (cb->mmio_base) {
+- cb->disable_intr(cb->io_base);
++ if (cb->disable_intr)
++ cb->disable_intr(cb->io_base);
+ iounmap(cb->mmio_base);
+ }
+ if (cb->irq)
+@@ -3409,9 +3410,13 @@ static struct myrb_hba *myrb_detect(struct pci_dev *pdev,
+ mutex_init(&cb->dcmd_mutex);
+ mutex_init(&cb->dma_mutex);
+ cb->pdev = pdev;
++ cb->host = shost;
+
+- if (pci_enable_device(pdev))
+- goto failure;
++ if (pci_enable_device(pdev)) {
++ dev_err(&pdev->dev, "Failed to enable PCI device\n");
++ scsi_host_put(shost);
++ return NULL;
++ }
+
+ if (privdata->hw_init == DAC960_PD_hw_init ||
+ privdata->hw_init == DAC960_P_hw_init) {
+diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
+index 07f274afd7e5e..a4d244ee4548c 100644
+--- a/drivers/scsi/myrs.c
++++ b/drivers/scsi/myrs.c
+@@ -2265,7 +2265,8 @@ static void myrs_cleanup(struct myrs_hba *cs)
+ myrs_unmap(cs);
+
+ if (cs->mmio_base) {
+- cs->disable_intr(cs);
++ if (cs->disable_intr)
++ cs->disable_intr(cs);
+ iounmap(cs->mmio_base);
+ cs->mmio_base = NULL;
+ }
+diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
+index 63690508313b7..32fc450bf84b4 100644
+--- a/drivers/scsi/pm8001/pm8001_hwi.c
++++ b/drivers/scsi/pm8001/pm8001_hwi.c
+@@ -1325,7 +1325,9 @@ int pm8001_mpi_build_cmd(struct pm8001_hba_info *pm8001_ha,
+ int q_index = circularQ - pm8001_ha->inbnd_q_tbl;
+ int rv;
+
+- WARN_ON(q_index >= PM8001_MAX_INB_NUM);
++ if (WARN_ON(q_index >= pm8001_ha->max_q_num))
++ return -EINVAL;
++
+ spin_lock_irqsave(&circularQ->iq_lock, flags);
+ rv = pm8001_mpi_msg_free_get(circularQ, pm8001_ha->iomb_size,
+ &pMessage);
+@@ -1765,7 +1767,6 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha,
+ }
+
+ task = sas_alloc_slow_task(GFP_ATOMIC);
+-
+ if (!task) {
+ pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n");
+ return;
+@@ -1774,13 +1775,16 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha,
+ task->task_done = pm8001_task_done;
+
+ res = pm8001_tag_alloc(pm8001_ha, &ccb_tag);
+- if (res)
++ if (res) {
++ sas_free_task(task);
+ return;
++ }
+
+ ccb = &pm8001_ha->ccb_info[ccb_tag];
+ ccb->device = pm8001_ha_dev;
+ ccb->ccb_tag = ccb_tag;
+ ccb->task = task;
++ ccb->n_elem = 0;
+
+ circularQ = &pm8001_ha->inbnd_q_tbl[0];
+
+@@ -1791,8 +1795,10 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha,
+
+ ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &task_abort,
+ sizeof(task_abort), 0);
+- if (ret)
++ if (ret) {
++ sas_free_task(task);
+ pm8001_tag_free(pm8001_ha, ccb_tag);
++ }
+
+ }
+
+@@ -1842,6 +1848,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha,
+ ccb->device = pm8001_ha_dev;
+ ccb->ccb_tag = ccb_tag;
+ ccb->task = task;
++ ccb->n_elem = 0;
+ pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG;
+ pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG;
+
+@@ -1858,7 +1865,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha,
+
+ sata_cmd.tag = cpu_to_le32(ccb_tag);
+ sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id);
+- sata_cmd.ncqtag_atap_dir_m |= ((0x1 << 7) | (0x5 << 9));
++ sata_cmd.ncqtag_atap_dir_m = cpu_to_le32((0x1 << 7) | (0x5 << 9));
+ memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis));
+
+ res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd,
+@@ -2419,7 +2426,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ len = sizeof(struct pio_setup_fis);
+ pm8001_dbg(pm8001_ha, IO,
+ "PIO read len = %d\n", len);
+- } else if (t->ata_task.use_ncq) {
++ } else if (t->ata_task.use_ncq &&
++ t->data_dir != DMA_NONE) {
+ len = sizeof(struct set_dev_bits_fis);
+ pm8001_dbg(pm8001_ha, IO, "FPDMA len = %d\n",
+ len);
+@@ -2693,7 +2701,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ u32 tag = le32_to_cpu(psataPayload->tag);
+ u32 port_id = le32_to_cpu(psataPayload->port_id);
+ u32 dev_id = le32_to_cpu(psataPayload->device_id);
+- unsigned long flags;
+
+ ccb = &pm8001_ha->ccb_info[tag];
+
+@@ -2733,8 +2740,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ ts->resp = SAS_TASK_COMPLETE;
+ ts->stat = SAS_DATA_OVERRUN;
+ ts->residual = 0;
+- if (pm8001_dev)
+- atomic_dec(&pm8001_dev->running_req);
+ break;
+ case IO_XFER_ERROR_BREAK:
+ pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n");
+@@ -2776,7 +2781,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+ ts->resp = SAS_TASK_COMPLETE;
+ ts->stat = SAS_QUEUE_FULL;
+- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
+ return;
+ }
+ break;
+@@ -2862,20 +2866,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ ts->stat = SAS_OPEN_TO;
+ break;
+ }
+- spin_lock_irqsave(&t->task_state_lock, flags);
+- t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+- t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+- t->task_state_flags |= SAS_TASK_STATE_DONE;
+- if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+- spin_unlock_irqrestore(&t->task_state_lock, flags);
+- pm8001_dbg(pm8001_ha, FAIL,
+- "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
+- t, event, ts->resp, ts->stat);
+- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+- } else {
+- spin_unlock_irqrestore(&t->task_state_lock, flags);
+- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
+- }
+ }
+
+ /*See the comments for mpi_ssp_completion */
+@@ -3169,7 +3159,7 @@ pm8001_mpi_get_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ * fw_control_context->usrAddr
+ */
+ complete(pm8001_ha->nvmd_completion);
+- pm8001_dbg(pm8001_ha, MSG, "Set nvm data complete!\n");
++ pm8001_dbg(pm8001_ha, MSG, "Get nvmd data complete!\n");
+ ccb->task = NULL;
+ ccb->ccb_tag = 0xFFFFFFFF;
+ pm8001_tag_free(pm8001_ha, tag);
+@@ -3726,12 +3716,11 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ mb();
+
+ if (pm8001_dev->id & NCQ_ABORT_ALL_FLAG) {
+- pm8001_tag_free(pm8001_ha, tag);
+ sas_free_task(t);
+- /* clear the flag */
+- pm8001_dev->id &= 0xBFFFFFFF;
+- } else
++ pm8001_dev->id &= ~NCQ_ABORT_ALL_FLAG;
++ } else {
+ t->task_done(t);
++ }
+
+ return 0;
+ }
+@@ -4294,22 +4283,22 @@ static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+ u32 opc = OPC_INB_SATA_HOST_OPSTART;
+ memset(&sata_cmd, 0, sizeof(sata_cmd));
+ circularQ = &pm8001_ha->inbnd_q_tbl[0];
+- if (task->data_dir == DMA_NONE) {
++
++ if (task->data_dir == DMA_NONE && !task->ata_task.use_ncq) {
+ ATAP = 0x04; /* no data*/
+ pm8001_dbg(pm8001_ha, IO, "no data\n");
+ } else if (likely(!task->ata_task.device_control_reg_update)) {
+- if (task->ata_task.dma_xfer) {
++ if (task->ata_task.use_ncq &&
++ dev->sata_dev.class != ATA_DEV_ATAPI) {
++ ATAP = 0x07; /* FPDMA */
++ pm8001_dbg(pm8001_ha, IO, "FPDMA\n");
++ } else if (task->ata_task.dma_xfer) {
+ ATAP = 0x06; /* DMA */
+ pm8001_dbg(pm8001_ha, IO, "DMA\n");
+ } else {
+ ATAP = 0x05; /* PIO*/
+ pm8001_dbg(pm8001_ha, IO, "PIO\n");
+ }
+- if (task->ata_task.use_ncq &&
+- dev->sata_dev.class != ATA_DEV_ATAPI) {
+- ATAP = 0x07; /* FPDMA */
+- pm8001_dbg(pm8001_ha, IO, "FPDMA\n");
+- }
+ }
+ if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag)) {
+ task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3);
+@@ -4499,6 +4488,9 @@ static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha,
+ SAS_ADDR_SIZE);
+ rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
+ sizeof(payload), 0);
++ if (rc)
++ pm8001_tag_free(pm8001_ha, tag);
++
+ return rc;
+ }
+
+@@ -4649,7 +4641,7 @@ int pm8001_chip_ssp_tm_req(struct pm8001_hba_info *pm8001_ha,
+ memcpy(sspTMCmd.lun, task->ssp_task.LUN, 8);
+ sspTMCmd.tag = cpu_to_le32(ccb->ccb_tag);
+ if (pm8001_ha->chip_id != chip_8001)
+- sspTMCmd.ds_ads_m = 0x08;
++ sspTMCmd.ds_ads_m = cpu_to_le32(0x08);
+ circularQ = &pm8001_ha->inbnd_q_tbl[0];
+ ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sspTMCmd,
+ sizeof(sspTMCmd), 0);
+@@ -4911,6 +4903,11 @@ pm8001_chip_fw_flash_update_req(struct pm8001_hba_info *pm8001_ha,
+ ccb->ccb_tag = tag;
+ rc = pm8001_chip_fw_flash_update_build(pm8001_ha, &flash_update_info,
+ tag);
++ if (rc) {
++ kfree(fw_control_context);
++ pm8001_tag_free(pm8001_ha, tag);
++ }
++
+ return rc;
+ }
+
+@@ -5015,6 +5012,9 @@ pm8001_chip_set_dev_state_req(struct pm8001_hba_info *pm8001_ha,
+ payload.nds = cpu_to_le32(state);
+ rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
+ sizeof(payload), 0);
++ if (rc)
++ pm8001_tag_free(pm8001_ha, tag);
++
+ return rc;
+
+ }
+diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
+index 47db7e0beae6f..a25a34535b7a4 100644
+--- a/drivers/scsi/pm8001/pm8001_init.c
++++ b/drivers/scsi/pm8001/pm8001_init.c
+@@ -281,12 +281,12 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
+ if (rc) {
+ pm8001_dbg(pm8001_ha, FAIL,
+ "pm8001_setup_irq failed [ret: %d]\n", rc);
+- goto err_out_shost;
++ goto err_out;
+ }
+ /* Request Interrupt */
+ rc = pm8001_request_irq(pm8001_ha);
+ if (rc)
+- goto err_out_shost;
++ goto err_out;
+
+ count = pm8001_ha->max_q_num;
+ /* Queues are chosen based on the number of cores/msix availability */
+@@ -422,8 +422,6 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
+ pm8001_tag_init(pm8001_ha);
+ return 0;
+
+-err_out_shost:
+- scsi_remove_host(pm8001_ha->shost);
+ err_out_nodev:
+ for (i = 0; i < pm8001_ha->max_memcnt; i++) {
+ if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) {
+@@ -1198,6 +1196,7 @@ pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha, struct Scsi_Host *shost,
+ goto err_out;
+
+ /* Memory region for ccb_info*/
++ pm8001_ha->ccb_count = ccb_count;
+ pm8001_ha->ccb_info =
+ kcalloc(ccb_count, sizeof(struct pm8001_ccb_info), GFP_KERNEL);
+ if (!pm8001_ha->ccb_info) {
+@@ -1259,6 +1258,16 @@ static void pm8001_pci_remove(struct pci_dev *pdev)
+ tasklet_kill(&pm8001_ha->tasklet[j]);
+ #endif
+ scsi_host_put(pm8001_ha->shost);
++
++ for (i = 0; i < pm8001_ha->ccb_count; i++) {
++ dma_free_coherent(&pm8001_ha->pdev->dev,
++ sizeof(struct pm8001_prd) * PM8001_MAX_DMA_SG,
++ pm8001_ha->ccb_info[i].buf_prd,
++ pm8001_ha->ccb_info[i].ccb_dma_handle);
++ }
++ kfree(pm8001_ha->ccb_info);
++ kfree(pm8001_ha->devices);
++
+ pm8001_free(pm8001_ha);
+ kfree(sha->sas_phy);
+ kfree(sha->sas_port);
+diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
+index 32e60f0c3b148..5fb08acbc0e5e 100644
+--- a/drivers/scsi/pm8001/pm8001_sas.c
++++ b/drivers/scsi/pm8001/pm8001_sas.c
+@@ -753,8 +753,13 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
+ res = -TMF_RESP_FUNC_FAILED;
+ /* Even TMF timed out, return direct. */
+ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
++ struct pm8001_ccb_info *ccb = task->lldd_task;
++
+ pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
+ tmf->tmf);
++
++ if (ccb)
++ ccb->task = NULL;
+ goto ex_err;
+ }
+
+@@ -826,10 +831,10 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
+
+ res = PM8001_CHIP_DISP->task_abort(pm8001_ha,
+ pm8001_dev, flag, task_tag, ccb_tag);
+-
+ if (res) {
+ del_timer(&task->slow_task->timer);
+ pm8001_dbg(pm8001_ha, FAIL, "Executing internal task failed\n");
++ pm8001_tag_free(pm8001_ha, ccb_tag);
+ goto ex_err;
+ }
+ wait_for_completion(&task->slow_task->completion);
+diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
+index 62d08b535a4b6..f40a41f450d9b 100644
+--- a/drivers/scsi/pm8001/pm8001_sas.h
++++ b/drivers/scsi/pm8001/pm8001_sas.h
+@@ -457,6 +457,7 @@ struct outbound_queue_table {
+ __le32 producer_index;
+ u32 consumer_idx;
+ spinlock_t oq_lock;
++ unsigned long lock_flags;
+ };
+ struct pm8001_hba_memspace {
+ void __iomem *memvirtaddr;
+@@ -516,6 +517,7 @@ struct pm8001_hba_info {
+ u32 iomb_size; /* SPC and SPCV IOMB size */
+ struct pm8001_device *devices;
+ struct pm8001_ccb_info *ccb_info;
++ u32 ccb_count;
+ #ifdef PM8001_USE_MSIX
+ int number_of_intr;/*will be used in remove()*/
+ char intr_drvname[PM8001_MAX_MSIX_VEC]
+@@ -738,9 +740,7 @@ pm8001_ccb_task_free_done(struct pm8001_hba_info *pm8001_ha,
+ {
+ pm8001_ccb_task_free(pm8001_ha, task, ccb, ccb_idx);
+ smp_mb(); /*in order to force CPU ordering*/
+- spin_unlock(&pm8001_ha->lock);
+ task->task_done(task);
+- spin_lock(&pm8001_ha->lock);
+ }
+
+ #endif
+diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
+index 6ffe17b849ae8..04746df26c6c9 100644
+--- a/drivers/scsi/pm8001/pm80xx_hwi.c
++++ b/drivers/scsi/pm8001/pm80xx_hwi.c
+@@ -66,18 +66,16 @@ int pm80xx_bar4_shift(struct pm8001_hba_info *pm8001_ha, u32 shift_value)
+ }
+
+ static void pm80xx_pci_mem_copy(struct pm8001_hba_info *pm8001_ha, u32 soffset,
+- const void *destination,
++ __le32 *destination,
+ u32 dw_count, u32 bus_base_number)
+ {
+ u32 index, value, offset;
+- u32 *destination1;
+- destination1 = (u32 *)destination;
+
+- for (index = 0; index < dw_count; index += 4, destination1++) {
++ for (index = 0; index < dw_count; index += 4, destination++) {
+ offset = (soffset + index);
+ if (offset < (64 * 1024)) {
+ value = pm8001_cr32(pm8001_ha, bus_base_number, offset);
+- *destination1 = cpu_to_le32(value);
++ *destination = cpu_to_le32(value);
+ }
+ }
+ return;
+@@ -767,6 +765,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
+ pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity = 0x01;
+ pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt = 0x01;
+
++ /* Enable higher IQs and OQs, 32 to 63, bit 16 */
++ if (pm8001_ha->max_q_num > 32)
++ pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |=
++ 1 << 16;
+ /* Disable end to end CRC checking */
+ pm8001_ha->main_cfg_tbl.pm80xx_tbl.crc_core_dump = (0x1 << 16);
+
+@@ -1028,6 +1030,13 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha)
+ if (0x0000 != gst_len_mpistate)
+ return -EBUSY;
+
++ /*
++ * As per controller datasheet, after successful MPI
++ * initialization minimum 500ms delay is required before
++ * issuing commands.
++ */
++ msleep(500);
++
+ return 0;
+ }
+
+@@ -1202,9 +1211,11 @@ pm80xx_set_thermal_config(struct pm8001_hba_info *pm8001_ha)
+ else
+ page_code = THERMAL_PAGE_CODE_8H;
+
+- payload.cfg_pg[0] = (THERMAL_LOG_ENABLE << 9) |
+- (THERMAL_ENABLE << 8) | page_code;
+- payload.cfg_pg[1] = (LTEMPHIL << 24) | (RTEMPHIL << 8);
++ payload.cfg_pg[0] =
++ cpu_to_le32((THERMAL_LOG_ENABLE << 9) |
++ (THERMAL_ENABLE << 8) | page_code);
++ payload.cfg_pg[1] =
++ cpu_to_le32((LTEMPHIL << 24) | (RTEMPHIL << 8));
+
+ pm8001_dbg(pm8001_ha, DEV,
+ "Setting up thermal config. cfg_pg 0 0x%x cfg_pg 1 0x%x\n",
+@@ -1244,43 +1255,41 @@ pm80xx_set_sas_protocol_timer_config(struct pm8001_hba_info *pm8001_ha)
+ circularQ = &pm8001_ha->inbnd_q_tbl[0];
+ payload.tag = cpu_to_le32(tag);
+
+- SASConfigPage.pageCode = SAS_PROTOCOL_TIMER_CONFIG_PAGE;
+- SASConfigPage.MST_MSI = 3 << 15;
+- SASConfigPage.STP_SSP_MCT_TMO = (STP_MCT_TMO << 16) | SSP_MCT_TMO;
+- SASConfigPage.STP_FRM_TMO = (SAS_MAX_OPEN_TIME << 24) |
+- (SMP_MAX_CONN_TIMER << 16) | STP_FRM_TIMER;
+- SASConfigPage.STP_IDLE_TMO = STP_IDLE_TIME;
+-
+- if (SASConfigPage.STP_IDLE_TMO > 0x3FFFFFF)
+- SASConfigPage.STP_IDLE_TMO = 0x3FFFFFF;
+-
+-
+- SASConfigPage.OPNRJT_RTRY_INTVL = (SAS_MFD << 16) |
+- SAS_OPNRJT_RTRY_INTVL;
+- SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO = (SAS_DOPNRJT_RTRY_TMO << 16)
+- | SAS_COPNRJT_RTRY_TMO;
+- SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR = (SAS_DOPNRJT_RTRY_THR << 16)
+- | SAS_COPNRJT_RTRY_THR;
+- SASConfigPage.MAX_AIP = SAS_MAX_AIP;
++ SASConfigPage.pageCode = cpu_to_le32(SAS_PROTOCOL_TIMER_CONFIG_PAGE);
++ SASConfigPage.MST_MSI = cpu_to_le32(3 << 15);
++ SASConfigPage.STP_SSP_MCT_TMO =
++ cpu_to_le32((STP_MCT_TMO << 16) | SSP_MCT_TMO);
++ SASConfigPage.STP_FRM_TMO =
++ cpu_to_le32((SAS_MAX_OPEN_TIME << 24) |
++ (SMP_MAX_CONN_TIMER << 16) | STP_FRM_TIMER);
++ SASConfigPage.STP_IDLE_TMO = cpu_to_le32(STP_IDLE_TIME);
++
++ SASConfigPage.OPNRJT_RTRY_INTVL =
++ cpu_to_le32((SAS_MFD << 16) | SAS_OPNRJT_RTRY_INTVL);
++ SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO =
++ cpu_to_le32((SAS_DOPNRJT_RTRY_TMO << 16) | SAS_COPNRJT_RTRY_TMO);
++ SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR =
++ cpu_to_le32((SAS_DOPNRJT_RTRY_THR << 16) | SAS_COPNRJT_RTRY_THR);
++ SASConfigPage.MAX_AIP = cpu_to_le32(SAS_MAX_AIP);
+
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.pageCode 0x%08x\n",
+- SASConfigPage.pageCode);
++ le32_to_cpu(SASConfigPage.pageCode));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.MST_MSI 0x%08x\n",
+- SASConfigPage.MST_MSI);
++ le32_to_cpu(SASConfigPage.MST_MSI));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_SSP_MCT_TMO 0x%08x\n",
+- SASConfigPage.STP_SSP_MCT_TMO);
++ le32_to_cpu(SASConfigPage.STP_SSP_MCT_TMO));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_FRM_TMO 0x%08x\n",
+- SASConfigPage.STP_FRM_TMO);
++ le32_to_cpu(SASConfigPage.STP_FRM_TMO));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_IDLE_TMO 0x%08x\n",
+- SASConfigPage.STP_IDLE_TMO);
++ le32_to_cpu(SASConfigPage.STP_IDLE_TMO));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.OPNRJT_RTRY_INTVL 0x%08x\n",
+- SASConfigPage.OPNRJT_RTRY_INTVL);
++ le32_to_cpu(SASConfigPage.OPNRJT_RTRY_INTVL));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO 0x%08x\n",
+- SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO);
++ le32_to_cpu(SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR 0x%08x\n",
+- SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR);
++ le32_to_cpu(SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR));
+ pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.MAX_AIP 0x%08x\n",
+- SASConfigPage.MAX_AIP);
++ le32_to_cpu(SASConfigPage.MAX_AIP));
+
+ memcpy(&payload.cfg_pg, &SASConfigPage,
+ sizeof(SASProtocolTimerConfig_t));
+@@ -1406,12 +1415,13 @@ static int pm80xx_encrypt_update(struct pm8001_hba_info *pm8001_ha)
+ /* Currently only one key is used. New KEK index is 1.
+ * Current KEK index is 1. Store KEK to NVRAM is 1.
+ */
+- payload.new_curidx_ksop = ((1 << 24) | (1 << 16) | (1 << 8) |
+- KEK_MGMT_SUBOP_KEYCARDUPDATE);
++ payload.new_curidx_ksop =
++ cpu_to_le32(((1 << 24) | (1 << 16) | (1 << 8) |
++ KEK_MGMT_SUBOP_KEYCARDUPDATE));
+
+ pm8001_dbg(pm8001_ha, DEV,
+ "Saving Encryption info to flash. payload 0x%x\n",
+- payload.new_curidx_ksop);
++ le32_to_cpu(payload.new_curidx_ksop));
+
+ rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
+ sizeof(payload), 0);
+@@ -1734,10 +1744,11 @@ static void
+ pm80xx_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha, u8 vec)
+ {
+ #ifdef PM8001_USE_MSIX
+- u32 mask;
+- mask = (u32)(1 << vec);
+-
+- pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, (u32)(mask & 0xFFFFFFFF));
++ if (vec < 32)
++ pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, 1U << vec);
++ else
++ pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR_U,
++ 1U << (vec - 32));
+ return;
+ #endif
+ pm80xx_chip_intx_interrupt_enable(pm8001_ha);
+@@ -1753,12 +1764,15 @@ static void
+ pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec)
+ {
+ #ifdef PM8001_USE_MSIX
+- u32 mask;
+- if (vec == 0xFF)
+- mask = 0xFFFFFFFF;
++ if (vec == 0xFF) {
++ /* disable all vectors 0-31, 32-63 */
++ pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 0xFFFFFFFF);
++ pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, 0xFFFFFFFF);
++ } else if (vec < 32)
++ pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 1U << vec);
+ else
+- mask = (u32)(1 << vec);
+- pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, (u32)(mask & 0xFFFFFFFF));
++ pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U,
++ 1U << (vec - 32));
+ return;
+ #endif
+ pm80xx_chip_intx_interrupt_disable(pm8001_ha);
+@@ -1800,6 +1814,7 @@ static void pm80xx_send_abort_all(struct pm8001_hba_info *pm8001_ha,
+ ccb->device = pm8001_ha_dev;
+ ccb->ccb_tag = ccb_tag;
+ ccb->task = task;
++ ccb->n_elem = 0;
+
+ circularQ = &pm8001_ha->inbnd_q_tbl[0];
+
+@@ -1881,7 +1896,7 @@ static void pm80xx_send_read_log(struct pm8001_hba_info *pm8001_ha,
+
+ sata_cmd.tag = cpu_to_le32(ccb_tag);
+ sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id);
+- sata_cmd.ncqtag_atap_dir_m_dad |= ((0x1 << 7) | (0x5 << 9));
++ sata_cmd.ncqtag_atap_dir_m_dad = cpu_to_le32(((0x1 << 7) | (0x5 << 9)));
+ memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis));
+
+ res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd,
+@@ -2184,9 +2199,9 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ pm8001_dbg(pm8001_ha, FAIL,
+ "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
+ t, status, ts->resp, ts->stat);
++ pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+ if (t->slow_task)
+ complete(&t->slow_task->completion);
+- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+ } else {
+ spin_unlock_irqrestore(&t->task_state_lock, flags);
+ pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+@@ -2379,7 +2394,8 @@ static void mpi_ssp_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+
+ /*See the comments for mpi_ssp_completion */
+ static void
+-mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
++mpi_sata_completion(struct pm8001_hba_info *pm8001_ha,
++ struct outbound_queue_table *circularQ, void *piomb)
+ {
+ struct sas_task *t;
+ struct pm8001_ccb_info *ccb;
+@@ -2516,7 +2532,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ len = sizeof(struct pio_setup_fis);
+ pm8001_dbg(pm8001_ha, IO,
+ "PIO read len = %d\n", len);
+- } else if (t->ata_task.use_ncq) {
++ } else if (t->ata_task.use_ncq &&
++ t->data_dir != DMA_NONE) {
+ len = sizeof(struct set_dev_bits_fis);
+ pm8001_dbg(pm8001_ha, IO, "FPDMA len = %d\n",
+ len);
+@@ -2616,7 +2633,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+ ts->resp = SAS_TASK_UNDELIVERED;
+ ts->stat = SAS_QUEUE_FULL;
++ spin_unlock_irqrestore(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
++ spin_lock_irqsave(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ return;
+ }
+ break;
+@@ -2632,7 +2653,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+ ts->resp = SAS_TASK_UNDELIVERED;
+ ts->stat = SAS_QUEUE_FULL;
++ spin_unlock_irqrestore(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
++ spin_lock_irqsave(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ return;
+ }
+ break;
+@@ -2656,7 +2681,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY);
+ ts->resp = SAS_TASK_UNDELIVERED;
+ ts->stat = SAS_QUEUE_FULL;
++ spin_unlock_irqrestore(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
++ spin_lock_irqsave(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ return;
+ }
+ break;
+@@ -2727,7 +2756,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ IO_DS_NON_OPERATIONAL);
+ ts->resp = SAS_TASK_UNDELIVERED;
+ ts->stat = SAS_QUEUE_FULL;
++ spin_unlock_irqrestore(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
++ spin_lock_irqsave(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ return;
+ }
+ break;
+@@ -2747,7 +2780,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ IO_DS_IN_ERROR);
+ ts->resp = SAS_TASK_UNDELIVERED;
+ ts->stat = SAS_QUEUE_FULL;
++ spin_unlock_irqrestore(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
++ spin_lock_irqsave(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ return;
+ }
+ break;
+@@ -2780,17 +2817,22 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ pm8001_dbg(pm8001_ha, FAIL,
+ "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
+ t, status, ts->resp, ts->stat);
++ pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+ if (t->slow_task)
+ complete(&t->slow_task->completion);
+- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+ } else {
+ spin_unlock_irqrestore(&t->task_state_lock, flags);
++ spin_unlock_irqrestore(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
++ spin_lock_irqsave(&circularQ->oq_lock,
++ circularQ->lock_flags);
+ }
+ }
+
+ /*See the comments for mpi_ssp_completion */
+-static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
++static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
++ struct outbound_queue_table *circularQ, void *piomb)
+ {
+ struct sas_task *t;
+ struct task_status_struct *ts;
+@@ -2802,7 +2844,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ u32 tag = le32_to_cpu(psataPayload->tag);
+ u32 port_id = le32_to_cpu(psataPayload->port_id);
+ u32 dev_id = le32_to_cpu(psataPayload->device_id);
+- unsigned long flags;
+
+ ccb = &pm8001_ha->ccb_info[tag];
+
+@@ -2840,8 +2881,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ ts->resp = SAS_TASK_COMPLETE;
+ ts->stat = SAS_DATA_OVERRUN;
+ ts->residual = 0;
+- if (pm8001_dev)
+- atomic_dec(&pm8001_dev->running_req);
+ break;
+ case IO_XFER_ERROR_BREAK:
+ pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n");
+@@ -2890,7 +2929,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
+ ts->resp = SAS_TASK_COMPLETE;
+ ts->stat = SAS_QUEUE_FULL;
+- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
+ return;
+ }
+ break;
+@@ -2990,20 +3028,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ ts->stat = SAS_OPEN_TO;
+ break;
+ }
+- spin_lock_irqsave(&t->task_state_lock, flags);
+- t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+- t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
+- t->task_state_flags |= SAS_TASK_STATE_DONE;
+- if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+- spin_unlock_irqrestore(&t->task_state_lock, flags);
+- pm8001_dbg(pm8001_ha, FAIL,
+- "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
+- t, event, ts->resp, ts->stat);
+- pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
+- } else {
+- spin_unlock_irqrestore(&t->task_state_lock, flags);
+- pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
+- }
+ }
+
+ /*See the comments for mpi_ssp_completion */
+@@ -3902,7 +3926,8 @@ static int ssp_coalesced_comp_resp(struct pm8001_hba_info *pm8001_ha,
+ * @pm8001_ha: our hba card information
+ * @piomb: IO message buffer
+ */
+-static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb)
++static void process_one_iomb(struct pm8001_hba_info *pm8001_ha,
++ struct outbound_queue_table *circularQ, void *piomb)
+ {
+ __le32 pHeader = *(__le32 *)piomb;
+ u32 opc = (u32)((le32_to_cpu(pHeader)) & 0xFFF);
+@@ -3944,11 +3969,11 @@ static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb)
+ break;
+ case OPC_OUB_SATA_COMP:
+ pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_SATA_COMP\n");
+- mpi_sata_completion(pm8001_ha, piomb);
++ mpi_sata_completion(pm8001_ha, circularQ, piomb);
+ break;
+ case OPC_OUB_SATA_EVENT:
+ pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_SATA_EVENT\n");
+- mpi_sata_event(pm8001_ha, piomb);
++ mpi_sata_event(pm8001_ha, circularQ, piomb);
+ break;
+ case OPC_OUB_SSP_EVENT:
+ pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_SSP_EVENT\n");
+@@ -4117,13 +4142,24 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
+ void *pMsg1 = NULL;
+ u8 bc;
+ u32 ret = MPI_IO_STATUS_FAIL;
+- unsigned long flags;
+ u32 regval;
+
++ /*
++ * Fatal errors are programmed to be signalled in irq vector
++ * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl.
++ * fatal_err_interrupt
++ */
+ if (vec == (pm8001_ha->max_q_num - 1)) {
++ u32 mipsall_ready;
++
++ if (pm8001_ha->chip_id == chip_8008 ||
++ pm8001_ha->chip_id == chip_8009)
++ mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT;
++ else
++ mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT;
++
+ regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
+- if ((regval & SCRATCH_PAD_MIPSALL_READY) !=
+- SCRATCH_PAD_MIPSALL_READY) {
++ if ((regval & mipsall_ready) != mipsall_ready) {
+ pm8001_ha->controller_fatal_error = true;
+ pm8001_dbg(pm8001_ha, FAIL,
+ "Firmware Fatal error! Regval:0x%x\n",
+@@ -4134,7 +4170,7 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
+ }
+ }
+ circularQ = &pm8001_ha->outbnd_q_tbl[vec];
+- spin_lock_irqsave(&circularQ->oq_lock, flags);
++ spin_lock_irqsave(&circularQ->oq_lock, circularQ->lock_flags);
+ do {
+ /* spurious interrupt during setup if kexec-ing and
+ * driver doing a doorbell access w/ the pre-kexec oq
+@@ -4145,7 +4181,8 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
+ ret = pm8001_mpi_msg_consume(pm8001_ha, circularQ, &pMsg1, &bc);
+ if (MPI_IO_STATUS_SUCCESS == ret) {
+ /* process the outbound message */
+- process_one_iomb(pm8001_ha, (void *)(pMsg1 - 4));
++ process_one_iomb(pm8001_ha, circularQ,
++ (void *)(pMsg1 - 4));
+ /* free the message from the outbound circular buffer */
+ pm8001_mpi_msg_free_set(pm8001_ha, pMsg1,
+ circularQ, bc);
+@@ -4160,7 +4197,7 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec)
+ break;
+ }
+ } while (1);
+- spin_unlock_irqrestore(&circularQ->oq_lock, flags);
++ spin_unlock_irqrestore(&circularQ->oq_lock, circularQ->lock_flags);
+ return ret;
+ }
+
+@@ -4359,13 +4396,15 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha,
+ struct ssp_ini_io_start_req ssp_cmd;
+ u32 tag = ccb->ccb_tag;
+ int ret;
+- u64 phys_addr, start_addr, end_addr;
++ u64 phys_addr, end_addr;
+ u32 end_addr_high, end_addr_low;
+ struct inbound_queue_table *circularQ;
+ u32 q_index, cpu_id;
+ u32 opc = OPC_INB_SSPINIIOSTART;
++
+ memset(&ssp_cmd, 0, sizeof(ssp_cmd));
+ memcpy(ssp_cmd.ssp_iu.lun, task->ssp_task.LUN, 8);
++
+ /* data address domain added for spcv; set to 0 by host,
+ * used internally by controller
+ * 0 for SAS 1.1 and SAS 2.0 compatible TLR
+@@ -4376,7 +4415,7 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha,
+ ssp_cmd.device_id = cpu_to_le32(pm8001_dev->device_id);
+ ssp_cmd.tag = cpu_to_le32(tag);
+ if (task->ssp_task.enable_first_burst)
+- ssp_cmd.ssp_iu.efb_prio_attr |= 0x80;
++ ssp_cmd.ssp_iu.efb_prio_attr = 0x80;
+ ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_prio << 3);
+ ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_attr & 7);
+ memcpy(ssp_cmd.ssp_iu.cdb, task->ssp_task.cmd->cmnd,
+@@ -4408,21 +4447,24 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha,
+ ssp_cmd.enc_esgl = cpu_to_le32(1<<31);
+ } else if (task->num_scatter == 1) {
+ u64 dma_addr = sg_dma_address(task->scatter);
++
+ ssp_cmd.enc_addr_low =
+ cpu_to_le32(lower_32_bits(dma_addr));
+ ssp_cmd.enc_addr_high =
+ cpu_to_le32(upper_32_bits(dma_addr));
+ ssp_cmd.enc_len = cpu_to_le32(task->total_xfer_len);
+ ssp_cmd.enc_esgl = 0;
++
+ /* Check 4G Boundary */
+- start_addr = cpu_to_le64(dma_addr);
+- end_addr = (start_addr + ssp_cmd.enc_len) - 1;
+- end_addr_low = cpu_to_le32(lower_32_bits(end_addr));
+- end_addr_high = cpu_to_le32(upper_32_bits(end_addr));
+- if (end_addr_high != ssp_cmd.enc_addr_high) {
++ end_addr = dma_addr + le32_to_cpu(ssp_cmd.enc_len) - 1;
++ end_addr_low = lower_32_bits(end_addr);
++ end_addr_high = upper_32_bits(end_addr);
++
++ if (end_addr_high != le32_to_cpu(ssp_cmd.enc_addr_high)) {
+ pm8001_dbg(pm8001_ha, FAIL,
+ "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n",
+- start_addr, ssp_cmd.enc_len,
++ dma_addr,
++ le32_to_cpu(ssp_cmd.enc_len),
+ end_addr_high, end_addr_low);
+ pm8001_chip_make_sg(task->scatter, 1,
+ ccb->buf_prd);
+@@ -4431,7 +4473,7 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha,
+ cpu_to_le32(lower_32_bits(phys_addr));
+ ssp_cmd.enc_addr_high =
+ cpu_to_le32(upper_32_bits(phys_addr));
+- ssp_cmd.enc_esgl = cpu_to_le32(1<<31);
++ ssp_cmd.enc_esgl = cpu_to_le32(1U<<31);
+ }
+ } else if (task->num_scatter == 0) {
+ ssp_cmd.enc_addr_low = 0;
+@@ -4439,8 +4481,10 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha,
+ ssp_cmd.enc_len = cpu_to_le32(task->total_xfer_len);
+ ssp_cmd.enc_esgl = 0;
+ }
++
+ /* XTS mode. All other fields are 0 */
+- ssp_cmd.key_cmode = 0x6 << 4;
++ ssp_cmd.key_cmode = cpu_to_le32(0x6 << 4);
++
+ /* set tweak values. Should be the start lba */
+ ssp_cmd.twk_val0 = cpu_to_le32((task->ssp_task.cmd->cmnd[2] << 24) |
+ (task->ssp_task.cmd->cmnd[3] << 16) |
+@@ -4462,20 +4506,22 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha,
+ ssp_cmd.esgl = cpu_to_le32(1<<31);
+ } else if (task->num_scatter == 1) {
+ u64 dma_addr = sg_dma_address(task->scatter);
++
+ ssp_cmd.addr_low = cpu_to_le32(lower_32_bits(dma_addr));
+ ssp_cmd.addr_high =
+ cpu_to_le32(upper_32_bits(dma_addr));
+ ssp_cmd.len = cpu_to_le32(task->total_xfer_len);
+ ssp_cmd.esgl = 0;
++
+ /* Check 4G Boundary */
+- start_addr = cpu_to_le64(dma_addr);
+- end_addr = (start_addr + ssp_cmd.len) - 1;
+- end_addr_low = cpu_to_le32(lower_32_bits(end_addr));
+- end_addr_high = cpu_to_le32(upper_32_bits(end_addr));
+- if (end_addr_high != ssp_cmd.addr_high) {
++ end_addr = dma_addr + le32_to_cpu(ssp_cmd.len) - 1;
++ end_addr_low = lower_32_bits(end_addr);
++ end_addr_high = upper_32_bits(end_addr);
++ if (end_addr_high != le32_to_cpu(ssp_cmd.addr_high)) {
+ pm8001_dbg(pm8001_ha, FAIL,
+ "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n",
+- start_addr, ssp_cmd.len,
++ dma_addr,
++ le32_to_cpu(ssp_cmd.len),
+ end_addr_high, end_addr_low);
+ pm8001_chip_make_sg(task->scatter, 1,
+ ccb->buf_prd);
+@@ -4509,7 +4555,7 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+ u32 q_index, cpu_id;
+ struct sata_start_req sata_cmd;
+ u32 hdr_tag, ncg_tag = 0;
+- u64 phys_addr, start_addr, end_addr;
++ u64 phys_addr, end_addr;
+ u32 end_addr_high, end_addr_low;
+ u32 ATAP = 0x0;
+ u32 dir;
+@@ -4521,22 +4567,21 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+ q_index = (u32) (cpu_id) % (pm8001_ha->max_q_num);
+ circularQ = &pm8001_ha->inbnd_q_tbl[q_index];
+
+- if (task->data_dir == DMA_NONE) {
++ if (task->data_dir == DMA_NONE && !task->ata_task.use_ncq) {
+ ATAP = 0x04; /* no data*/
+ pm8001_dbg(pm8001_ha, IO, "no data\n");
+ } else if (likely(!task->ata_task.device_control_reg_update)) {
+- if (task->ata_task.dma_xfer) {
++ if (task->ata_task.use_ncq &&
++ dev->sata_dev.class != ATA_DEV_ATAPI) {
++ ATAP = 0x07; /* FPDMA */
++ pm8001_dbg(pm8001_ha, IO, "FPDMA\n");
++ } else if (task->ata_task.dma_xfer) {
+ ATAP = 0x06; /* DMA */
+ pm8001_dbg(pm8001_ha, IO, "DMA\n");
+ } else {
+ ATAP = 0x05; /* PIO*/
+ pm8001_dbg(pm8001_ha, IO, "PIO\n");
+ }
+- if (task->ata_task.use_ncq &&
+- dev->sata_dev.class != ATA_DEV_ATAPI) {
+- ATAP = 0x07; /* FPDMA */
+- pm8001_dbg(pm8001_ha, IO, "FPDMA\n");
+- }
+ }
+ if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag)) {
+ task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3);
+@@ -4570,32 +4615,38 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+ pm8001_chip_make_sg(task->scatter,
+ ccb->n_elem, ccb->buf_prd);
+ phys_addr = ccb->ccb_dma_handle;
+- sata_cmd.enc_addr_low = lower_32_bits(phys_addr);
+- sata_cmd.enc_addr_high = upper_32_bits(phys_addr);
++ sata_cmd.enc_addr_low =
++ cpu_to_le32(lower_32_bits(phys_addr));
++ sata_cmd.enc_addr_high =
++ cpu_to_le32(upper_32_bits(phys_addr));
+ sata_cmd.enc_esgl = cpu_to_le32(1 << 31);
+ } else if (task->num_scatter == 1) {
+ u64 dma_addr = sg_dma_address(task->scatter);
+- sata_cmd.enc_addr_low = lower_32_bits(dma_addr);
+- sata_cmd.enc_addr_high = upper_32_bits(dma_addr);
++
++ sata_cmd.enc_addr_low =
++ cpu_to_le32(lower_32_bits(dma_addr));
++ sata_cmd.enc_addr_high =
++ cpu_to_le32(upper_32_bits(dma_addr));
+ sata_cmd.enc_len = cpu_to_le32(task->total_xfer_len);
+ sata_cmd.enc_esgl = 0;
++
+ /* Check 4G Boundary */
+- start_addr = cpu_to_le64(dma_addr);
+- end_addr = (start_addr + sata_cmd.enc_len) - 1;
+- end_addr_low = cpu_to_le32(lower_32_bits(end_addr));
+- end_addr_high = cpu_to_le32(upper_32_bits(end_addr));
+- if (end_addr_high != sata_cmd.enc_addr_high) {
++ end_addr = dma_addr + le32_to_cpu(sata_cmd.enc_len) - 1;
++ end_addr_low = lower_32_bits(end_addr);
++ end_addr_high = upper_32_bits(end_addr);
++ if (end_addr_high != le32_to_cpu(sata_cmd.enc_addr_high)) {
+ pm8001_dbg(pm8001_ha, FAIL,
+ "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n",
+- start_addr, sata_cmd.enc_len,
++ dma_addr,
++ le32_to_cpu(sata_cmd.enc_len),
+ end_addr_high, end_addr_low);
+ pm8001_chip_make_sg(task->scatter, 1,
+ ccb->buf_prd);
+ phys_addr = ccb->ccb_dma_handle;
+ sata_cmd.enc_addr_low =
+- lower_32_bits(phys_addr);
++ cpu_to_le32(lower_32_bits(phys_addr));
+ sata_cmd.enc_addr_high =
+- upper_32_bits(phys_addr);
++ cpu_to_le32(upper_32_bits(phys_addr));
+ sata_cmd.enc_esgl =
+ cpu_to_le32(1 << 31);
+ }
+@@ -4606,7 +4657,8 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+ sata_cmd.enc_esgl = 0;
+ }
+ /* XTS mode. All other fields are 0 */
+- sata_cmd.key_index_mode = 0x6 << 4;
++ sata_cmd.key_index_mode = cpu_to_le32(0x6 << 4);
++
+ /* set tweak values. Should be the start lba */
+ sata_cmd.twk_val0 =
+ cpu_to_le32((sata_cmd.sata_fis.lbal_exp << 24) |
+@@ -4632,31 +4684,31 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+ phys_addr = ccb->ccb_dma_handle;
+ sata_cmd.addr_low = lower_32_bits(phys_addr);
+ sata_cmd.addr_high = upper_32_bits(phys_addr);
+- sata_cmd.esgl = cpu_to_le32(1 << 31);
++ sata_cmd.esgl = cpu_to_le32(1U << 31);
+ } else if (task->num_scatter == 1) {
+ u64 dma_addr = sg_dma_address(task->scatter);
++
+ sata_cmd.addr_low = lower_32_bits(dma_addr);
+ sata_cmd.addr_high = upper_32_bits(dma_addr);
+ sata_cmd.len = cpu_to_le32(task->total_xfer_len);
+ sata_cmd.esgl = 0;
++
+ /* Check 4G Boundary */
+- start_addr = cpu_to_le64(dma_addr);
+- end_addr = (start_addr + sata_cmd.len) - 1;
+- end_addr_low = cpu_to_le32(lower_32_bits(end_addr));
+- end_addr_high = cpu_to_le32(upper_32_bits(end_addr));
++ end_addr = dma_addr + le32_to_cpu(sata_cmd.len) - 1;
++ end_addr_low = lower_32_bits(end_addr);
++ end_addr_high = upper_32_bits(end_addr);
+ if (end_addr_high != sata_cmd.addr_high) {
+ pm8001_dbg(pm8001_ha, FAIL,
+ "The sg list address start_addr=0x%016llx data_len=0x%xend_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n",
+- start_addr, sata_cmd.len,
++ dma_addr,
++ le32_to_cpu(sata_cmd.len),
+ end_addr_high, end_addr_low);
+ pm8001_chip_make_sg(task->scatter, 1,
+ ccb->buf_prd);
+ phys_addr = ccb->ccb_dma_handle;
+- sata_cmd.addr_low =
+- lower_32_bits(phys_addr);
+- sata_cmd.addr_high =
+- upper_32_bits(phys_addr);
+- sata_cmd.esgl = cpu_to_le32(1 << 31);
++ sata_cmd.addr_low = lower_32_bits(phys_addr);
++ sata_cmd.addr_high = upper_32_bits(phys_addr);
++ sata_cmd.esgl = cpu_to_le32(1U << 31);
+ }
+ } else if (task->num_scatter == 0) {
+ sata_cmd.addr_low = 0;
+@@ -4664,27 +4716,28 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
+ sata_cmd.len = cpu_to_le32(task->total_xfer_len);
+ sata_cmd.esgl = 0;
+ }
++
+ /* scsi cdb */
+ sata_cmd.atapi_scsi_cdb[0] =
+ cpu_to_le32(((task->ata_task.atapi_packet[0]) |
+- (task->ata_task.atapi_packet[1] << 8) |
+- (task->ata_task.atapi_packet[2] << 16) |
+- (task->ata_task.atapi_packet[3] << 24)));
++ (task->ata_task.atapi_packet[1] << 8) |
++ (task->ata_task.atapi_packet[2] << 16) |
++ (task->ata_task.atapi_packet[3] << 24)));
+ sata_cmd.atapi_scsi_cdb[1] =
+ cpu_to_le32(((task->ata_task.atapi_packet[4]) |
+- (task->ata_task.atapi_packet[5] << 8) |
+- (task->ata_task.atapi_packet[6] << 16) |
+- (task->ata_task.atapi_packet[7] << 24)));
++ (task->ata_task.atapi_packet[5] << 8) |
++ (task->ata_task.atapi_packet[6] << 16) |
++ (task->ata_task.atapi_packet[7] << 24)));
+ sata_cmd.atapi_scsi_cdb[2] =
+ cpu_to_le32(((task->ata_task.atapi_packet[8]) |
+- (task->ata_task.atapi_packet[9] << 8) |
+- (task->ata_task.atapi_packet[10] << 16) |
+- (task->ata_task.atapi_packet[11] << 24)));
++ (task->ata_task.atapi_packet[9] << 8) |
++ (task->ata_task.atapi_packet[10] << 16) |
++ (task->ata_task.atapi_packet[11] << 24)));
+ sata_cmd.atapi_scsi_cdb[3] =
+ cpu_to_le32(((task->ata_task.atapi_packet[12]) |
+- (task->ata_task.atapi_packet[13] << 8) |
+- (task->ata_task.atapi_packet[14] << 16) |
+- (task->ata_task.atapi_packet[15] << 24)));
++ (task->ata_task.atapi_packet[13] << 8) |
++ (task->ata_task.atapi_packet[14] << 16) |
++ (task->ata_task.atapi_packet[15] << 24)));
+ }
+
+ /* Check for read log for failed drive and return */
+@@ -4882,8 +4935,13 @@ static int pm80xx_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha,
+ payload.tag = cpu_to_le32(tag);
+ payload.phyop_phyid =
+ cpu_to_le32(((phy_op & 0xFF) << 8) | (phyId & 0xFF));
+- return pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
+- sizeof(payload), 0);
++
++ rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload,
++ sizeof(payload), 0);
++ if (rc)
++ pm8001_tag_free(pm8001_ha, tag);
++
++ return rc;
+ }
+
+ static u32 pm80xx_chip_is_our_interrupt(struct pm8001_hba_info *pm8001_ha)
+diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h
+index c7e5d93bea924..c41ed039c92ac 100644
+--- a/drivers/scsi/pm8001/pm80xx_hwi.h
++++ b/drivers/scsi/pm8001/pm80xx_hwi.h
+@@ -1405,8 +1405,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t;
+ #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0
+ #define SCRATCH_PAD_IOP0_READY 0xC00
+ #define SCRATCH_PAD_IOP1_READY 0x3000
+-#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \
++#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \
+ SCRATCH_PAD_IOP0_READY | \
++ SCRATCH_PAD_ILA_READY | \
++ SCRATCH_PAD_RAAE_READY)
++#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \
++ SCRATCH_PAD_ILA_READY | \
+ SCRATCH_PAD_RAAE_READY)
+
+ /* boot loader state */
+diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
+index bffd9a9349e72..9660c4f4de40b 100644
+--- a/drivers/scsi/pmcraid.c
++++ b/drivers/scsi/pmcraid.c
+@@ -4526,7 +4526,7 @@ pmcraid_register_interrupt_handler(struct pmcraid_instance *pinstance)
+ return 0;
+
+ out_unwind:
+- while (--i > 0)
++ while (--i >= 0)
+ free_irq(pci_irq_vector(pdev, i), &pinstance->hrrq_vector[i]);
+ pci_free_irq_vectors(pdev);
+ return rc;
+diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h
+index f4d81127239eb..5ec2b817c694a 100644
+--- a/drivers/scsi/qedf/qedf_dbg.h
++++ b/drivers/scsi/qedf/qedf_dbg.h
+@@ -59,6 +59,8 @@ extern uint qedf_debug;
+ #define QEDF_LOG_NOTICE 0x40000000 /* Notice logs */
+ #define QEDF_LOG_WARN 0x80000000 /* Warning logs */
+
++#define QEDF_DEBUGFS_LOG_LEN (2 * PAGE_SIZE)
++
+ /* Debug context structure */
+ struct qedf_dbg_ctx {
+ unsigned int host_no;
+diff --git a/drivers/scsi/qedf/qedf_debugfs.c b/drivers/scsi/qedf/qedf_debugfs.c
+index a3ed681c8ce3f..451fd236bfd05 100644
+--- a/drivers/scsi/qedf/qedf_debugfs.c
++++ b/drivers/scsi/qedf/qedf_debugfs.c
+@@ -8,6 +8,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/debugfs.h>
+ #include <linux/module.h>
++#include <linux/vmalloc.h>
+
+ #include "qedf.h"
+ #include "qedf_dbg.h"
+@@ -98,7 +99,9 @@ static ssize_t
+ qedf_dbg_fp_int_cmd_read(struct file *filp, char __user *buffer, size_t count,
+ loff_t *ppos)
+ {
++ ssize_t ret;
+ size_t cnt = 0;
++ char *cbuf;
+ int id;
+ struct qedf_fastpath *fp = NULL;
+ struct qedf_dbg_ctx *qedf_dbg =
+@@ -108,19 +111,25 @@ qedf_dbg_fp_int_cmd_read(struct file *filp, char __user *buffer, size_t count,
+
+ QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+
+- cnt = sprintf(buffer, "\nFastpath I/O completions\n\n");
++ cbuf = vmalloc(QEDF_DEBUGFS_LOG_LEN);
++ if (!cbuf)
++ return 0;
++
++ cnt += scnprintf(cbuf + cnt, QEDF_DEBUGFS_LOG_LEN - cnt, "\nFastpath I/O completions\n\n");
+
+ for (id = 0; id < qedf->num_queues; id++) {
+ fp = &(qedf->fp_array[id]);
+ if (fp->sb_id == QEDF_SB_ID_NULL)
+ continue;
+- cnt += sprintf((buffer + cnt), "#%d: %lu\n", id,
+- fp->completions);
++ cnt += scnprintf(cbuf + cnt, QEDF_DEBUGFS_LOG_LEN - cnt,
++ "#%d: %lu\n", id, fp->completions);
+ }
+
+- cnt = min_t(int, count, cnt - *ppos);
+- *ppos += cnt;
+- return cnt;
++ ret = simple_read_from_buffer(buffer, count, ppos, cbuf, cnt);
++
++ vfree(cbuf);
++
++ return ret;
+ }
+
+ static ssize_t
+@@ -138,15 +147,14 @@ qedf_dbg_debug_cmd_read(struct file *filp, char __user *buffer, size_t count,
+ loff_t *ppos)
+ {
+ int cnt;
++ char cbuf[32];
+ struct qedf_dbg_ctx *qedf_dbg =
+ (struct qedf_dbg_ctx *)filp->private_data;
+
+ QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "debug mask=0x%x\n", qedf_debug);
+- cnt = sprintf(buffer, "debug mask = 0x%x\n", qedf_debug);
++ cnt = scnprintf(cbuf, sizeof(cbuf), "debug mask = 0x%x\n", qedf_debug);
+
+- cnt = min_t(int, count, cnt - *ppos);
+- *ppos += cnt;
+- return cnt;
++ return simple_read_from_buffer(buffer, count, ppos, cbuf, cnt);
+ }
+
+ static ssize_t
+@@ -185,18 +193,17 @@ qedf_dbg_stop_io_on_error_cmd_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+ {
+ int cnt;
++ char cbuf[7];
+ struct qedf_dbg_ctx *qedf_dbg =
+ (struct qedf_dbg_ctx *)filp->private_data;
+ struct qedf_ctx *qedf = container_of(qedf_dbg,
+ struct qedf_ctx, dbg_ctx);
+
+ QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+- cnt = sprintf(buffer, "%s\n",
++ cnt = scnprintf(cbuf, sizeof(cbuf), "%s\n",
+ qedf->stop_io_on_error ? "true" : "false");
+
+- cnt = min_t(int, count, cnt - *ppos);
+- *ppos += cnt;
+- return cnt;
++ return simple_read_from_buffer(buffer, count, ppos, cbuf, cnt);
+ }
+
+ static ssize_t
+diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
+index 3404782988d58..bb5761ed3f511 100644
+--- a/drivers/scsi/qedf/qedf_io.c
++++ b/drivers/scsi/qedf/qedf_io.c
+@@ -2257,6 +2257,7 @@ process_els:
+ io_req->tm_flags == FCP_TMF_TGT_RESET) {
+ clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
+ io_req->sc_cmd = NULL;
++ kref_put(&io_req->refcount, qedf_release_cmd);
+ complete(&io_req->tm_done);
+ }
+
+diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
+index 42d0d941dba5c..61959dd2237fc 100644
+--- a/drivers/scsi/qedf/qedf_main.c
++++ b/drivers/scsi/qedf/qedf_main.c
+@@ -31,6 +31,7 @@ static void qedf_remove(struct pci_dev *pdev);
+ static void qedf_shutdown(struct pci_dev *pdev);
+ static void qedf_schedule_recovery_handler(void *dev);
+ static void qedf_recovery_handler(struct work_struct *work);
++static int qedf_suspend(struct pci_dev *pdev, pm_message_t state);
+
+ /*
+ * Driver module parameters.
+@@ -911,7 +912,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport)
+ struct qed_link_output if_link;
+
+ if (lport->vport) {
+- QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n");
++ printk_ratelimited("Cannot issue host reset on NPIV port.\n");
+ return;
+ }
+
+@@ -1862,6 +1863,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
+ vport_qedf->cmd_mgr = base_qedf->cmd_mgr;
+ init_completion(&vport_qedf->flogi_compl);
+ INIT_LIST_HEAD(&vport_qedf->fcports);
++ INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work);
+
+ rc = qedf_vport_libfc_config(vport, vn_port);
+ if (rc) {
+@@ -1920,6 +1922,27 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
+ fc_vport_setlink(vn_port);
+ }
+
++ /* Set symbolic node name */
++ if (base_qedf->pdev->device == QL45xxx)
++ snprintf(fc_host_symbolic_name(vn_port->host), 256,
++ "Marvell FastLinQ 45xxx FCoE v%s", QEDF_VERSION);
++
++ if (base_qedf->pdev->device == QL41xxx)
++ snprintf(fc_host_symbolic_name(vn_port->host), 256,
++ "Marvell FastLinQ 41xxx FCoE v%s", QEDF_VERSION);
++
++ /* Set supported speed */
++ fc_host_supported_speeds(vn_port->host) = n_port->link_supported_speeds;
++
++ /* Set speed */
++ vn_port->link_speed = n_port->link_speed;
++
++ /* Set port type */
++ fc_host_port_type(vn_port->host) = FC_PORTTYPE_NPIV;
++
++ /* Set maxframe size */
++ fc_host_maxframe_size(vn_port->host) = n_port->mfs;
++
+ QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_NPIV, "vn_port=%p.\n",
+ vn_port);
+
+@@ -3024,9 +3047,8 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
+ * addresses of our queues
+ */
+ if (!qedf->p_cpuq) {
+- status = -EINVAL;
+ QEDF_ERR(&qedf->dbg_ctx, "p_cpuq is NULL.\n");
+- goto mem_alloc_failure;
++ return -EINVAL;
+ }
+
+ qedf->global_queues = kzalloc((sizeof(struct global_queue *)
+@@ -3255,6 +3277,7 @@ static struct pci_driver qedf_pci_driver = {
+ .probe = qedf_probe,
+ .remove = qedf_remove,
+ .shutdown = qedf_shutdown,
++ .suspend = qedf_suspend,
+ };
+
+ static int __qedf_probe(struct pci_dev *pdev, int mode)
+@@ -3416,7 +3439,9 @@ retry_probe:
+ qedf->devlink = qed_ops->common->devlink_register(qedf->cdev);
+ if (IS_ERR(qedf->devlink)) {
+ QEDF_ERR(&qedf->dbg_ctx, "Cannot register devlink\n");
++ rc = PTR_ERR(qedf->devlink);
+ qedf->devlink = NULL;
++ goto err2;
+ }
+ }
+
+@@ -3683,11 +3708,6 @@ err2:
+ err1:
+ scsi_host_put(lport->host);
+ err0:
+- if (qedf) {
+- QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC, "Probe done.\n");
+-
+- clear_bit(QEDF_PROBING, &qedf->flags);
+- }
+ return rc;
+ }
+
+@@ -3976,7 +3996,9 @@ void qedf_stag_change_work(struct work_struct *work)
+ struct qedf_ctx *qedf =
+ container_of(work, struct qedf_ctx, stag_work.work);
+
+- QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n");
++ printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.",
++ dev_name(&qedf->pdev->dev), __func__, __LINE__,
++ qedf->dbg_ctx.host_no);
+ qedf_ctx_soft_reset(qedf->lport);
+ }
+
+@@ -3985,6 +4007,22 @@ static void qedf_shutdown(struct pci_dev *pdev)
+ __qedf_remove(pdev, QEDF_MODE_NORMAL);
+ }
+
++static int qedf_suspend(struct pci_dev *pdev, pm_message_t state)
++{
++ struct qedf_ctx *qedf;
++
++ if (!pdev) {
++ QEDF_ERR(NULL, "pdev is NULL.\n");
++ return -ENODEV;
++ }
++
++ qedf = pci_get_drvdata(pdev);
++
++ QEDF_ERR(&qedf->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
++
++ return -EPERM;
++}
++
+ /*
+ * Recovery handler code
+ */
+diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
+index d01cd829ef975..df9ce6ed52bf9 100644
+--- a/drivers/scsi/qedi/qedi_fw.c
++++ b/drivers/scsi/qedi/qedi_fw.c
+@@ -772,11 +772,10 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
+ qedi_cmd->list_tmf_work = NULL;
+ }
+ }
++ spin_unlock_bh(&qedi_conn->tmf_work_lock);
+
+- if (!found) {
+- spin_unlock_bh(&qedi_conn->tmf_work_lock);
++ if (!found)
+ goto check_cleanup_reqs;
+- }
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
+ "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n",
+@@ -807,7 +806,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
+ qedi_cmd->state = CLEANUP_RECV;
+ unlock:
+ spin_unlock_bh(&conn->session->back_lock);
+- spin_unlock_bh(&qedi_conn->tmf_work_lock);
+ wake_up_interruptible(&qedi_conn->wait_queue);
+ return;
+
+diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
+index c5260429c637e..04b40a6c1afff 100644
+--- a/drivers/scsi/qedi/qedi_iscsi.c
++++ b/drivers/scsi/qedi/qedi_iscsi.c
+@@ -859,6 +859,37 @@ static int qedi_task_xmit(struct iscsi_task *task)
+ return qedi_iscsi_send_ioreq(task);
+ }
+
++static void qedi_offload_work(struct work_struct *work)
++{
++ struct qedi_endpoint *qedi_ep =
++ container_of(work, struct qedi_endpoint, offload_work);
++ struct qedi_ctx *qedi;
++ int wait_delay = 5 * HZ;
++ int ret;
++
++ qedi = qedi_ep->qedi;
++
++ ret = qedi_iscsi_offload_conn(qedi_ep);
++ if (ret) {
++ QEDI_ERR(&qedi->dbg_ctx,
++ "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n",
++ qedi_ep->iscsi_cid, qedi_ep, ret);
++ qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
++ return;
++ }
++
++ ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait,
++ (qedi_ep->state ==
++ EP_STATE_OFLDCONN_COMPL),
++ wait_delay);
++ if (ret <= 0 || qedi_ep->state != EP_STATE_OFLDCONN_COMPL) {
++ qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
++ QEDI_ERR(&qedi->dbg_ctx,
++ "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n",
++ qedi_ep->iscsi_cid, qedi_ep);
++ }
++}
++
+ static struct iscsi_endpoint *
+ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
+ int non_blocking)
+@@ -907,6 +938,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
+ }
+ qedi_ep = ep->dd_data;
+ memset(qedi_ep, 0, sizeof(struct qedi_endpoint));
++ INIT_WORK(&qedi_ep->offload_work, qedi_offload_work);
+ qedi_ep->state = EP_STATE_IDLE;
+ qedi_ep->iscsi_cid = (u32)-1;
+ qedi_ep->qedi = qedi;
+@@ -1055,12 +1087,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
+ qedi_ep = ep->dd_data;
+ qedi = qedi_ep->qedi;
+
++ flush_work(&qedi_ep->offload_work);
++
+ if (qedi_ep->state == EP_STATE_OFLDCONN_START)
+ goto ep_exit_recover;
+
+- if (qedi_ep->state != EP_STATE_OFLDCONN_NONE)
+- flush_work(&qedi_ep->offload_work);
+-
+ if (qedi_ep->conn) {
+ qedi_conn = qedi_ep->conn;
+ abrt_conn = qedi_conn->abrt_conn;
+@@ -1234,37 +1265,6 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid)
+ return rc;
+ }
+
+-static void qedi_offload_work(struct work_struct *work)
+-{
+- struct qedi_endpoint *qedi_ep =
+- container_of(work, struct qedi_endpoint, offload_work);
+- struct qedi_ctx *qedi;
+- int wait_delay = 5 * HZ;
+- int ret;
+-
+- qedi = qedi_ep->qedi;
+-
+- ret = qedi_iscsi_offload_conn(qedi_ep);
+- if (ret) {
+- QEDI_ERR(&qedi->dbg_ctx,
+- "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n",
+- qedi_ep->iscsi_cid, qedi_ep, ret);
+- qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
+- return;
+- }
+-
+- ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait,
+- (qedi_ep->state ==
+- EP_STATE_OFLDCONN_COMPL),
+- wait_delay);
+- if ((ret <= 0) || (qedi_ep->state != EP_STATE_OFLDCONN_COMPL)) {
+- qedi_ep->state = EP_STATE_OFLDCONN_FAILED;
+- QEDI_ERR(&qedi->dbg_ctx,
+- "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n",
+- qedi_ep->iscsi_cid, qedi_ep);
+- }
+-}
+-
+ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
+ {
+ struct qedi_ctx *qedi;
+@@ -1380,7 +1380,6 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
+ qedi_ep->dst_addr, qedi_ep->dst_port);
+ }
+
+- INIT_WORK(&qedi_ep->offload_work, qedi_offload_work);
+ queue_work(qedi->offload_thread, &qedi_ep->offload_work);
+
+ ret = 0;
+diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
+index e6dc0b495a829..b36edbef5b82f 100644
+--- a/drivers/scsi/qedi/qedi_main.c
++++ b/drivers/scsi/qedi/qedi_main.c
+@@ -69,6 +69,7 @@ static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
+ static void qedi_recovery_handler(struct work_struct *work);
+ static void qedi_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type);
++static int qedi_suspend(struct pci_dev *pdev, pm_message_t state);
+
+ static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
+ {
+@@ -1978,8 +1979,9 @@ static int qedi_cpu_offline(unsigned int cpu)
+ struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu);
+ struct qedi_work *work, *tmp;
+ struct task_struct *thread;
++ unsigned long flags;
+
+- spin_lock_bh(&p->p_work_lock);
++ spin_lock_irqsave(&p->p_work_lock, flags);
+ thread = p->iothread;
+ p->iothread = NULL;
+
+@@ -1990,7 +1992,7 @@ static int qedi_cpu_offline(unsigned int cpu)
+ kfree(work);
+ }
+
+- spin_unlock_bh(&p->p_work_lock);
++ spin_unlock_irqrestore(&p->p_work_lock, flags);
+ if (thread)
+ kthread_stop(thread);
+ return 0;
+@@ -2417,9 +2419,12 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
+ int rval;
+ u16 retry = 10;
+
+- if (mode == QEDI_MODE_NORMAL || mode == QEDI_MODE_SHUTDOWN) {
+- iscsi_host_remove(qedi->shost);
++ if (mode == QEDI_MODE_NORMAL)
++ iscsi_host_remove(qedi->shost, false);
++ else if (mode == QEDI_MODE_SHUTDOWN)
++ iscsi_host_remove(qedi->shost, true);
+
++ if (mode == QEDI_MODE_NORMAL || mode == QEDI_MODE_SHUTDOWN) {
+ if (qedi->tmf_thread) {
+ flush_workqueue(qedi->tmf_thread);
+ destroy_workqueue(qedi->tmf_thread);
+@@ -2452,6 +2457,9 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
+ qedi_ops->ll2->stop(qedi->cdev);
+ }
+
++ cancel_delayed_work_sync(&qedi->recovery_work);
++ cancel_delayed_work_sync(&qedi->board_disable_work);
++
+ qedi_free_iscsi_pf_param(qedi);
+
+ rval = qedi_ops->common->update_drv_state(qedi->cdev, false);
+@@ -2509,6 +2517,22 @@ static void qedi_shutdown(struct pci_dev *pdev)
+ __qedi_remove(pdev, QEDI_MODE_SHUTDOWN);
+ }
+
++static int qedi_suspend(struct pci_dev *pdev, pm_message_t state)
++{
++ struct qedi_ctx *qedi;
++
++ if (!pdev) {
++ QEDI_ERR(NULL, "pdev is NULL.\n");
++ return -ENODEV;
++ }
++
++ qedi = pci_get_drvdata(pdev);
++
++ QEDI_ERR(&qedi->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
++
++ return -EPERM;
++}
++
+ static int __qedi_probe(struct pci_dev *pdev, int mode)
+ {
+ struct qedi_ctx *qedi;
+@@ -2796,7 +2820,7 @@ remove_host:
+ #ifdef CONFIG_DEBUG_FS
+ qedi_dbg_host_exit(&qedi->dbg_ctx);
+ #endif
+- iscsi_host_remove(qedi->shost);
++ iscsi_host_remove(qedi->shost, false);
+ stop_iscsi_func:
+ qedi_ops->stop(qedi->cdev);
+ stop_slowpath:
+@@ -2867,6 +2891,7 @@ static struct pci_driver qedi_pci_driver = {
+ .remove = qedi_remove,
+ .shutdown = qedi_shutdown,
+ .err_handler = &qedi_err_handler,
++ .suspend = qedi_suspend,
+ };
+
+ static int __init qedi_init(void)
+diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
+index d09776b77af2e..fe0e8b23a805e 100644
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -555,7 +555,7 @@ qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj,
+ if (!capable(CAP_SYS_ADMIN))
+ return -EINVAL;
+
+- if (IS_NOCACHE_VPD_TYPE(ha))
++ if (!IS_NOCACHE_VPD_TYPE(ha))
+ goto skip;
+
+ faddr = ha->flt_region_vpd << 2;
+@@ -745,7 +745,7 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
+ ql_log(ql_log_info, vha, 0x706f,
+ "Issuing MPI reset.\n");
+
+- if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
++ if (IS_QLA83XX(ha)) {
+ uint32_t idc_control;
+
+ qla83xx_idc_lock(vha, 0);
+@@ -1056,9 +1056,6 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *vha, bool stop_beacon)
+ continue;
+ if (iter->type == 3 && !(IS_CNA_CAPABLE(ha)))
+ continue;
+- if (iter->type == 0x27 &&
+- (!IS_QLA27XX(ha) || !IS_QLA28XX(ha)))
+- continue;
+
+ sysfs_remove_bin_file(&host->shost_gendev.kobj,
+ iter->attr);
+@@ -1868,6 +1865,18 @@ qla2x00_port_speed_store(struct device *dev, struct device_attribute *attr,
+ return strlen(buf);
+ }
+
++static const struct {
++ u16 rate;
++ char *str;
++} port_speed_str[] = {
++ { PORT_SPEED_4GB, "4" },
++ { PORT_SPEED_8GB, "8" },
++ { PORT_SPEED_16GB, "16" },
++ { PORT_SPEED_32GB, "32" },
++ { PORT_SPEED_64GB, "64" },
++ { PORT_SPEED_10GB, "10" },
++};
++
+ static ssize_t
+ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+@@ -1875,7 +1884,8 @@ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr,
+ struct scsi_qla_host *vha = shost_priv(dev_to_shost(dev));
+ struct qla_hw_data *ha = vha->hw;
+ ssize_t rval;
+- char *spd[7] = {"0", "0", "0", "4", "8", "16", "32"};
++ u16 i;
++ char *speed = "Unknown";
+
+ rval = qla2x00_get_data_rate(vha);
+ if (rval != QLA_SUCCESS) {
+@@ -1884,7 +1894,14 @@ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr,
+ return -EINVAL;
+ }
+
+- return scnprintf(buf, PAGE_SIZE, "%s\n", spd[ha->link_data_rate]);
++ for (i = 0; i < ARRAY_SIZE(port_speed_str); i++) {
++ if (port_speed_str[i].rate != ha->link_data_rate)
++ continue;
++ speed = port_speed_str[i].str;
++ break;
++ }
++
++ return scnprintf(buf, PAGE_SIZE, "%s\n", speed);
+ }
+
+ static ssize_t
+@@ -2688,17 +2705,27 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
+ if (!fcport)
+ return;
+
+- /* Now that the rport has been deleted, set the fcport state to
+- FCS_DEVICE_DEAD */
+- qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
++ ql_dbg(ql_dbg_async, fcport->vha, 0x5101,
++ DBG_FCPORT_PRFMT(fcport, "dev_loss_tmo expiry, rport_state=%d",
++ rport->port_state));
++
++ /*
++ * Now that the rport has been deleted, set the fcport state to
++ * FCS_DEVICE_DEAD, if the fcport is still lost.
++ */
++ if (fcport->scan_state != QLA_FCPORT_FOUND)
++ qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
+
+ /*
+ * Transport has effectively 'deleted' the rport, clear
+ * all local references.
+ */
+ spin_lock_irqsave(host->host_lock, flags);
+- fcport->rport = fcport->drport = NULL;
+- *((fc_port_t **)rport->dd_data) = NULL;
++ /* Confirm port has not reappeared before clearing pointers. */
++ if (rport->port_state != FC_PORTSTATE_ONLINE) {
++ fcport->rport = fcport->drport = NULL;
++ *((fc_port_t **)rport->dd_data) = NULL;
++ }
+ spin_unlock_irqrestore(host->host_lock, flags);
+
+ if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+@@ -2714,6 +2741,7 @@ static void
+ qla2x00_terminate_rport_io(struct fc_rport *rport)
+ {
+ fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
++ scsi_qla_host_t *vha;
+
+ if (!fcport)
+ return;
+@@ -2723,25 +2751,45 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
+
+ if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+ return;
++ vha = fcport->vha;
+
+ if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
+ qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
++ qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24,
++ 0, WAIT_TARGET);
+ return;
+ }
+ /*
+ * At this point all fcport's software-states are cleared. Perform any
+ * final cleanup of firmware resources (PCBs and XCBs).
++ *
++ * Attempt to cleanup only lost devices.
+ */
+ if (fcport->loop_id != FC_NO_LOOP_ID) {
+- if (IS_FWI2_CAPABLE(fcport->vha->hw)) {
++ if (IS_FWI2_CAPABLE(fcport->vha->hw) &&
++ fcport->scan_state != QLA_FCPORT_FOUND) {
+ if (fcport->loop_id != FC_NO_LOOP_ID)
+ fcport->logout_on_delete = 1;
+
+- qlt_schedule_sess_for_deletion(fcport);
+- } else {
++ if (!EDIF_NEGOTIATION_PENDING(fcport)) {
++ ql_dbg(ql_dbg_disc, fcport->vha, 0x911e,
++ "%s %d schedule session deletion\n", __func__,
++ __LINE__);
++ qlt_schedule_sess_for_deletion(fcport);
++ }
++ } else if (!IS_FWI2_CAPABLE(fcport->vha->hw)) {
+ qla2x00_port_logout(fcport->vha, fcport);
+ }
+ }
++
++ /* check for any straggling io left behind */
++ if (qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET)) {
++ ql_log(ql_log_warn, vha, 0x300b,
++ "IO not return. Resetting. \n");
++ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
++ qla2xxx_wake_dpc(vha);
++ qla2x00_wait_for_chip_reset(vha);
++ }
+ }
+
+ static int
+@@ -3036,8 +3084,6 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
+ vha->flags.difdix_supported = 1;
+ ql_dbg(ql_dbg_user, vha, 0x7082,
+ "Registered for DIF/DIX type 1 and 3 protection.\n");
+- if (ql2xenabledif == 1)
+- prot = SHOST_DIX_TYPE0_PROTECTION;
+ scsi_host_set_prot(vha->host,
+ prot | SHOST_DIF_TYPE1_PROTECTION
+ | SHOST_DIF_TYPE2_PROTECTION
+@@ -3286,11 +3332,34 @@ struct fc_function_template qla2xxx_transport_vport_functions = {
+ .bsg_timeout = qla24xx_bsg_timeout,
+ };
+
++static uint
++qla2x00_get_host_supported_speeds(scsi_qla_host_t *vha, uint speeds)
++{
++ uint supported_speeds = FC_PORTSPEED_UNKNOWN;
++
++ if (speeds & FDMI_PORT_SPEED_64GB)
++ supported_speeds |= FC_PORTSPEED_64GBIT;
++ if (speeds & FDMI_PORT_SPEED_32GB)
++ supported_speeds |= FC_PORTSPEED_32GBIT;
++ if (speeds & FDMI_PORT_SPEED_16GB)
++ supported_speeds |= FC_PORTSPEED_16GBIT;
++ if (speeds & FDMI_PORT_SPEED_8GB)
++ supported_speeds |= FC_PORTSPEED_8GBIT;
++ if (speeds & FDMI_PORT_SPEED_4GB)
++ supported_speeds |= FC_PORTSPEED_4GBIT;
++ if (speeds & FDMI_PORT_SPEED_2GB)
++ supported_speeds |= FC_PORTSPEED_2GBIT;
++ if (speeds & FDMI_PORT_SPEED_1GB)
++ supported_speeds |= FC_PORTSPEED_1GBIT;
++
++ return supported_speeds;
++}
++
+ void
+ qla2x00_init_host_attr(scsi_qla_host_t *vha)
+ {
+ struct qla_hw_data *ha = vha->hw;
+- u32 speeds = FC_PORTSPEED_UNKNOWN;
++ u32 speeds = 0, fdmi_speed = 0;
+
+ fc_host_dev_loss_tmo(vha->host) = ha->port_down_retry_count;
+ fc_host_node_name(vha->host) = wwn_to_u64(vha->node_name);
+@@ -3300,7 +3369,8 @@ qla2x00_init_host_attr(scsi_qla_host_t *vha)
+ fc_host_max_npiv_vports(vha->host) = ha->max_npiv_vports;
+ fc_host_npiv_vports_inuse(vha->host) = ha->cur_vport_count;
+
+- speeds = qla25xx_fdmi_port_speed_capability(ha);
++ fdmi_speed = qla25xx_fdmi_port_speed_capability(ha);
++ speeds = qla2x00_get_host_supported_speeds(vha, fdmi_speed);
+
+ fc_host_supported_speeds(vha->host) = speeds;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
+index 655cf5de604b8..5db43b6b76c52 100644
+--- a/drivers/scsi/qla2xxx/qla_bsg.c
++++ b/drivers/scsi/qla2xxx/qla_bsg.c
+@@ -29,7 +29,8 @@ void qla2x00_bsg_job_done(srb_t *sp, int res)
+ "%s: sp hdl %x, result=%x bsg ptr %p\n",
+ __func__, sp->handle, res, bsg_job);
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+
+ bsg_reply->result = res;
+ bsg_job_done(bsg_job, bsg_reply->result,
+@@ -277,11 +278,15 @@ qla2x00_process_els(struct bsg_job *bsg_job)
+ const char *type;
+ int req_sg_cnt, rsp_sg_cnt;
+ int rval = (DID_ERROR << 16);
+- uint16_t nextlid = 0;
+ uint32_t els_cmd = 0;
++ int qla_port_allocated = 0;
+
+ if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
+ rport = fc_bsg_to_rport(bsg_job);
++ if (!rport) {
++ rval = -ENOMEM;
++ goto done;
++ }
+ fcport = *(fc_port_t **) rport->dd_data;
+ host = rport_to_shost(rport);
+ vha = shost_priv(host);
+@@ -328,9 +333,9 @@ qla2x00_process_els(struct bsg_job *bsg_job)
+ /* make sure the rport is logged in,
+ * if not perform fabric login
+ */
+- if (qla2x00_fabric_login(vha, fcport, &nextlid)) {
++ if (atomic_read(&fcport->state) != FCS_ONLINE) {
+ ql_dbg(ql_dbg_user, vha, 0x7003,
+- "Failed to login port %06X for ELS passthru.\n",
++ "Port %06X is not online for ELS passthru.\n",
+ fcport->d_id.b24);
+ rval = -EIO;
+ goto done;
+@@ -347,6 +352,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
+ goto done;
+ }
+
++ qla_port_allocated = 1;
+ /* Initialize all required fields of fcport */
+ fcport->vha = vha;
+ fcport->d_id.b.al_pa =
+@@ -431,7 +437,7 @@ done_unmap_sg:
+ goto done_free_fcport;
+
+ done_free_fcport:
+- if (bsg_request->msgcode != FC_BSG_RPT_ELS)
++ if (qla_port_allocated)
+ qla2x00_free_fcport(fcport);
+ done:
+ return rval;
+@@ -2897,6 +2903,8 @@ qla24xx_bsg_request(struct bsg_job *bsg_job)
+
+ if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
+ rport = fc_bsg_to_rport(bsg_job);
++ if (!rport)
++ return ret;
+ host = rport_to_shost(rport);
+ vha = shost_priv(host);
+ } else {
+@@ -2971,6 +2979,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
+
+ ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
+ __func__, bsg_job);
++
++ if (qla2x00_isp_reg_stat(ha)) {
++ ql_log(ql_log_info, vha, 0x9007,
++ "PCI/Register disconnect.\n");
++ qla_pci_set_eeh_busy(vha);
++ }
++
+ /* find the bsg job from the active list of commands */
+ spin_lock_irqsave(&ha->hardware_lock, flags);
+ for (que = 0; que < ha->max_req_queues; que++) {
+@@ -2988,7 +3003,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
+ sp->u.bsg_job == bsg_job) {
+ req->outstanding_cmds[cnt] = NULL;
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+- if (ha->isp_ops->abort_command(sp)) {
++
++ if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) {
+ ql_log(ql_log_warn, vha, 0x7089,
+ "mbx abort_command failed.\n");
+ bsg_reply->result = -EIO;
+@@ -3010,6 +3026,7 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
+
+ done:
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return 0;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
+index 25549a8a2d72d..8e9ffbec6643f 100644
+--- a/drivers/scsi/qla2xxx/qla_dbg.c
++++ b/drivers/scsi/qla2xxx/qla_dbg.c
+@@ -18,7 +18,7 @@
+ * | Queue Command and IO tracing | 0x3074 | 0x300b |
+ * | | | 0x3027-0x3028 |
+ * | | | 0x303d-0x3041 |
+- * | | | 0x302d,0x3033 |
++ * | | | 0x302e,0x3033 |
+ * | | | 0x3036,0x3038 |
+ * | | | 0x303a |
+ * | DPC Thread | 0x4023 | 0x4002,0x4013 |
+@@ -2491,6 +2491,9 @@ ql_dbg(uint level, scsi_qla_host_t *vha, uint id, const char *fmt, ...)
+ struct va_format vaf;
+ char pbuf[64];
+
++ if (!ql_mask_match(level) && !trace_ql_dbg_log_enabled())
++ return;
++
+ va_start(va, fmt);
+
+ vaf.fmt = fmt;
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index be2eb75ee1a37..d70c2f4ba718e 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -452,6 +452,16 @@ static inline be_id_t port_id_to_be_id(port_id_t port_id)
+ return res;
+ }
+
++struct tmf_arg {
++ struct list_head tmf_elem;
++ struct qla_qpair *qpair;
++ struct fc_port *fcport;
++ struct scsi_qla_host *vha;
++ u64 lun;
++ u32 flags;
++ uint8_t modifier;
++};
++
+ struct els_logo_payload {
+ uint8_t opcode;
+ uint8_t rsvd[3];
+@@ -531,6 +541,10 @@ struct srb_iocb {
+ uint32_t data;
+ struct completion comp;
+ __le16 comp_status;
++
++ uint8_t modifier;
++ uint8_t vp_index;
++ uint16_t loop_id;
+ } tmf;
+ struct {
+ #define SRB_FXDISC_REQ_DMA_VALID BIT_0
+@@ -634,6 +648,7 @@ struct srb_iocb {
+ #define SRB_SA_UPDATE 25
+ #define SRB_ELS_CMD_HST_NOLOGIN 26
+ #define SRB_SA_REPLACE 27
++#define SRB_MARKER 28
+
+ struct qla_els_pt_arg {
+ u8 els_opcode;
+@@ -655,7 +670,7 @@ enum {
+
+ struct iocb_resource {
+ u8 res_type;
+- u8 pad;
++ u8 exch_cnt;
+ u16 iocb_cnt;
+ };
+
+@@ -676,7 +691,6 @@ typedef struct srb {
+ struct iocb_resource iores;
+ struct kref cmd_kref; /* need to migrate ref_count over to this */
+ void *priv;
+- wait_queue_head_t nvme_ls_waitq;
+ struct fc_port *fcport;
+ struct scsi_qla_host *vha;
+ unsigned int start_timer:1;
+@@ -726,6 +740,11 @@ typedef struct srb {
+ * code.
+ */
+ void (*put_fn)(struct kref *kref);
++
++ /*
++ * Report completion for asynchronous commands.
++ */
++ void (*async_done)(struct srb *sp, int res);
+ } srb_t;
+
+ #define GET_CMD_SP(sp) (sp->u.scmd.cmd)
+@@ -2153,6 +2172,11 @@ typedef struct {
+ #define CS_IOCB_ERROR 0x31 /* Generic error for IOCB request
+ failure */
+ #define CS_REJECT_RECEIVED 0x4E /* Reject received */
++#define CS_EDIF_AUTH_ERROR 0x63 /* decrypt error */
++#define CS_EDIF_PAD_LEN_ERROR 0x65 /* pad > frame size, not 4byte align */
++#define CS_EDIF_INV_REQ 0x66 /* invalid request */
++#define CS_EDIF_SPI_ERROR 0x67 /* rx frame unable to locate sa */
++#define CS_EDIF_HDR_ERROR 0x69 /* data frame != expected len */
+ #define CS_BAD_PAYLOAD 0x80 /* Driver defined */
+ #define CS_UNKNOWN 0x81 /* Driver defined */
+ #define CS_RETRY 0x82 /* Driver defined */
+@@ -2520,7 +2544,6 @@ typedef struct fc_port {
+ unsigned int do_prli_nvme:1;
+
+ uint8_t nvme_flag;
+-
+ uint8_t node_name[WWN_SIZE];
+ uint8_t port_name[WWN_SIZE];
+ port_id_t d_id;
+@@ -2886,7 +2909,11 @@ struct ct_fdmi2_hba_attributes {
+ #define FDMI_PORT_SPEED_8GB 0x10
+ #define FDMI_PORT_SPEED_16GB 0x20
+ #define FDMI_PORT_SPEED_32GB 0x40
+-#define FDMI_PORT_SPEED_64GB 0x80
++#define FDMI_PORT_SPEED_20GB 0x80
++#define FDMI_PORT_SPEED_40GB 0x100
++#define FDMI_PORT_SPEED_128GB 0x200
++#define FDMI_PORT_SPEED_64GB 0x400
++#define FDMI_PORT_SPEED_256GB 0x800
+ #define FDMI_PORT_SPEED_UNKNOWN 0x8000
+
+ #define FC_CLASS_2 0x04
+@@ -3192,6 +3219,8 @@ struct ct_sns_rsp {
+ #define GFF_NVME_OFFSET 23 /* type = 28h */
+ struct {
+ uint8_t fc4_features[128];
++#define FC4_FF_TARGET BIT_0
++#define FC4_FF_INITIATOR BIT_1
+ } gff_id;
+ struct {
+ uint8_t reserved;
+@@ -3691,6 +3720,20 @@ struct qla_fw_resources {
+ u16 iocbs_limit;
+ u16 iocbs_qp_limit;
+ u16 iocbs_used;
++ u16 exch_total;
++ u16 exch_limit;
++ u16 exch_used;
++ u16 pad;
++};
++
++struct qla_fw_res {
++ u16 iocb_total;
++ u16 iocb_limit;
++ atomic_t iocb_used;
++
++ u16 exch_total;
++ u16 exch_limit;
++ atomic_t exch_used;
+ };
+
+ #define QLA_IOCB_PCT_LIMIT 95
+@@ -3750,6 +3793,7 @@ struct qla_qpair {
+ struct qla_fw_resources fwres ____cacheline_aligned;
+ u32 cmd_cnt;
+ u32 cmd_completion_cnt;
++ u32 prev_completion_cnt;
+ };
+
+ /* Place holder for FW buffer parameters */
+@@ -3962,6 +4006,7 @@ struct qla_hw_data {
+ /* SRB cache. */
+ #define SRB_MIN_REQ 128
+ mempool_t *srb_mempool;
++ u8 port_name[WWN_SIZE];
+
+ volatile struct {
+ uint32_t mbox_int :1;
+@@ -4027,6 +4072,9 @@ struct qla_hw_data {
+ uint32_t n2n_fw_acc_sec:1;
+ uint32_t plogi_template_valid:1;
+ uint32_t port_isolated:1;
++ uint32_t eeh_flush:2;
++#define EEH_FLUSH_RDY 1
++#define EEH_FLUSH_DONE 2
+ } flags;
+
+ uint16_t max_exchg;
+@@ -4061,6 +4109,7 @@ struct qla_hw_data {
+ uint32_t rsp_que_len;
+ uint32_t req_que_off;
+ uint32_t rsp_que_off;
++ unsigned long eeh_jif;
+
+ /* Multi queue data structs */
+ device_reg_t *mqiobase;
+@@ -4243,8 +4292,8 @@ struct qla_hw_data {
+ #define IS_OEM_001(ha) ((ha)->device_type & DT_OEM_001)
+ #define HAS_EXTENDED_IDS(ha) ((ha)->device_type & DT_EXTENDED_IDS)
+ #define IS_CT6_SUPPORTED(ha) ((ha)->device_type & DT_CT6_SUPPORTED)
+-#define IS_MQUE_CAPABLE(ha) ((ha)->mqenable || IS_QLA83XX(ha) || \
+- IS_QLA27XX(ha) || IS_QLA28XX(ha))
++#define IS_MQUE_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
++ IS_QLA28XX(ha))
+ #define IS_BIDI_CAPABLE(ha) \
+ (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
+ /* Bit 21 of fw_attributes decides the MCTP capabilities */
+@@ -4261,8 +4310,10 @@ struct qla_hw_data {
+ #define QLA_ABTS_WAIT_ENABLED(_sp) \
+ (QLA_NVME_IOS(_sp) && QLA_ABTS_FW_ENABLED(_sp->fcport->vha->hw))
+
+-#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha))
+-#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha))
++#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
++ IS_QLA28XX(ha))
++#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
++ IS_QLA28XX(ha))
+ #define IS_PI_DIFB_DIX0_CAPABLE(ha) (0)
+ #define IS_PI_SPLIT_DET_CAPABLE_HBA(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
+ IS_QLA28XX(ha))
+@@ -4312,7 +4363,6 @@ struct qla_hw_data {
+ uint8_t aen_mbx_count;
+ atomic_t num_pend_mbx_stage1;
+ atomic_t num_pend_mbx_stage2;
+- atomic_t num_pend_mbx_stage3;
+ uint16_t frame_payload_size;
+
+ uint32_t login_retry_count;
+@@ -4582,6 +4632,8 @@ struct qla_hw_data {
+ uint32_t flt_region_aux_img_status_sec;
+ };
+ uint8_t active_image;
++ uint8_t active_tmf;
++#define MAX_ACTIVE_TMF 8
+
+ /* Needed for BEACON */
+ uint16_t beacon_blink_led;
+@@ -4596,6 +4648,8 @@ struct qla_hw_data {
+
+ struct qla_msix_entry *msix_entries;
+
++ struct list_head tmf_pending;
++ struct list_head tmf_active;
+ struct list_head vp_list; /* list of VP */
+ unsigned long vp_idx_map[(MAX_MULTI_ID_FABRIC / 8) /
+ sizeof(unsigned long)];
+@@ -4607,7 +4661,9 @@ struct qla_hw_data {
+ struct qla_chip_state_84xx *cs84xx;
+ struct isp_operations *isp_ops;
+ struct workqueue_struct *wq;
++ struct work_struct heartbeat_work;
+ struct qlfc_fw fw_buf;
++ unsigned long last_heartbeat_run_jiffies;
+
+ /* FCP_CMND priority support */
+ struct qla_fcp_prio_cfg *fcp_prio_cfg;
+@@ -4708,7 +4764,6 @@ struct qla_hw_data {
+
+ struct qla_hw_data_stat stat;
+ pci_error_state_t pci_error_state;
+- u64 prev_cmd_cnt;
+ struct dma_pool *purex_dma_pool;
+ struct btree_head32 host_map;
+
+@@ -4723,6 +4778,7 @@ struct qla_hw_data {
+ spinlock_t sadb_lock; /* protects list */
+ struct els_reject elsrej;
+ u8 edif_post_stop_cnt_down;
++ struct qla_fw_res fwres ____cacheline_aligned;
+ };
+
+ #define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES))
+@@ -4854,7 +4910,6 @@ typedef struct scsi_qla_host {
+ #define SET_ZIO_THRESHOLD_NEEDED 32
+ #define ISP_ABORT_TO_ROM 33
+ #define VPORT_DELETE 34
+-#define HEARTBEAT_CHK 38
+
+ #define PROCESS_PUREX_IOCB 63
+
+@@ -5093,17 +5148,17 @@ struct secure_flash_update_block_pk {
+ (test_bit(ISP_ABORT_NEEDED, &ha->dpc_flags) || \
+ test_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags))
+
+-#define QLA_VHA_MARK_BUSY(__vha, __bail) do { \
+- atomic_inc(&__vha->vref_count); \
+- mb(); \
+- if (__vha->flags.delete_progress) { \
+- atomic_dec(&__vha->vref_count); \
+- wake_up(&__vha->vref_waitq); \
+- __bail = 1; \
+- } else { \
+- __bail = 0; \
+- } \
+-} while (0)
++static inline bool qla_vha_mark_busy(scsi_qla_host_t *vha)
++{
++ atomic_inc(&vha->vref_count);
++ mb();
++ if (vha->flags.delete_progress) {
++ atomic_dec(&vha->vref_count);
++ wake_up(&vha->vref_waitq);
++ return true;
++ }
++ return false;
++}
+
+ #define QLA_VHA_MARK_NOT_BUSY(__vha) do { \
+ atomic_dec(&__vha->vref_count); \
+@@ -5427,4 +5482,18 @@ struct ql_vnd_tgt_stats_resp {
+ #include "qla_gbl.h"
+ #include "qla_dbg.h"
+ #include "qla_inline.h"
++
++#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \
++ _fcport->disc_state == DSC_DELETED)
++
++#define DBG_FCPORT_PRFMT(_fp, _fmt, _args...) \
++ "%s: %8phC: " _fmt " (state=%d disc_state=%d scan_state=%d loopid=0x%x deleted=%d flags=0x%x)\n", \
++ __func__, _fp->port_name, ##_args, atomic_read(&_fp->state), \
++ _fp->disc_state, _fp->scan_state, _fp->loop_id, _fp->deleted, \
++ _fp->flags
++
++#define TMF_NOT_READY(_fcport) \
++ (!_fcport || IS_SESSION_DELETED(_fcport) || atomic_read(&_fcport->state) != FCS_ONLINE || \
++ !_fcport->vha->hw->flags.fw_started)
++
+ #endif
+diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
+index 85bd0e468d43e..aa9d69e5274d8 100644
+--- a/drivers/scsi/qla2xxx/qla_dfs.c
++++ b/drivers/scsi/qla2xxx/qla_dfs.c
+@@ -235,7 +235,7 @@ qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
+ uint16_t mb[MAX_IOCB_MB_REG];
+ int rc;
+ struct qla_hw_data *ha = vha->hw;
+- u16 iocbs_used, i;
++ u16 iocbs_used, i, exch_used;
+
+ rc = qla24xx_res_count_wait(vha, mb, SIZEOF_IOCB_MB_REG);
+ if (rc != QLA_SUCCESS) {
+@@ -263,13 +263,29 @@ qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
+ if (ql2xenforce_iocb_limit) {
+ /* lock is not require. It's an estimate. */
+ iocbs_used = ha->base_qpair->fwres.iocbs_used;
++ exch_used = ha->base_qpair->fwres.exch_used;
+ for (i = 0; i < ha->max_qpairs; i++) {
+- if (ha->queue_pair_map[i])
++ if (ha->queue_pair_map[i]) {
+ iocbs_used += ha->queue_pair_map[i]->fwres.iocbs_used;
++ exch_used += ha->queue_pair_map[i]->fwres.exch_used;
++ }
+ }
+
+ seq_printf(s, "Driver: estimate iocb used [%d] high water limit [%d]\n",
+ iocbs_used, ha->base_qpair->fwres.iocbs_limit);
++
++ seq_printf(s, "estimate exchange used[%d] high water limit [%d] n",
++ exch_used, ha->base_qpair->fwres.exch_limit);
++
++ if (ql2xenforce_iocb_limit == 2) {
++ iocbs_used = atomic_read(&ha->fwres.iocb_used);
++ exch_used = atomic_read(&ha->fwres.exch_used);
++ seq_printf(s, " estimate iocb2 used [%d] high water limit [%d]\n",
++ iocbs_used, ha->fwres.iocb_limit);
++
++ seq_printf(s, " estimate exchange2 used[%d] high water limit [%d] \n",
++ exch_used, ha->fwres.exch_limit);
++ }
+ }
+
+ return 0;
+diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
+index ad746c62f0d44..40a03f9c2d21f 100644
+--- a/drivers/scsi/qla2xxx/qla_edif.c
++++ b/drivers/scsi/qla2xxx/qla_edif.c
+@@ -218,7 +218,7 @@ fc_port_t *fcport)
+ "%s edif not enabled\n", __func__);
+ goto done;
+ }
+- if (vha->e_dbell.db_flags != EDB_ACTIVE) {
++ if (DBELL_INACTIVE(vha)) {
+ ql_dbg(ql_dbg_edif, vha, 0x09102,
+ "%s doorbell not enabled\n", __func__);
+ goto done;
+@@ -290,63 +290,6 @@ qla_edif_app_check(scsi_qla_host_t *vha, struct app_id appid)
+ return false;
+ }
+
+-static void qla_edif_reset_auth_wait(struct fc_port *fcport, int state,
+- int waitonly)
+-{
+- int cnt, max_cnt = 200;
+- bool traced = false;
+-
+- fcport->keep_nport_handle = 1;
+-
+- if (!waitonly) {
+- qla2x00_set_fcport_disc_state(fcport, state);
+- qlt_schedule_sess_for_deletion(fcport);
+- } else {
+- qla2x00_set_fcport_disc_state(fcport, state);
+- }
+-
+- ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+- "%s: waiting for session, max_cnt=%u\n",
+- __func__, max_cnt);
+-
+- cnt = 0;
+-
+- if (waitonly) {
+- /* Marker wait min 10 msecs. */
+- msleep(50);
+- cnt += 50;
+- }
+- while (1) {
+- if (!traced) {
+- ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+- "%s: session sleep.\n",
+- __func__);
+- traced = true;
+- }
+- msleep(20);
+- cnt++;
+- if (waitonly && (fcport->disc_state == state ||
+- fcport->disc_state == DSC_LOGIN_COMPLETE))
+- break;
+- if (fcport->disc_state == DSC_LOGIN_AUTH_PEND)
+- break;
+- if (cnt > max_cnt)
+- break;
+- }
+-
+- if (!waitonly) {
+- ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+- "%s: waited for session - %8phC, loopid=%x portid=%06x fcport=%p state=%u, cnt=%u\n",
+- __func__, fcport->port_name, fcport->loop_id,
+- fcport->d_id.b24, fcport, fcport->disc_state, cnt);
+- } else {
+- ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
+- "%s: waited ONLY for session - %8phC, loopid=%x portid=%06x fcport=%p state=%u, cnt=%u\n",
+- __func__, fcport->port_name, fcport->loop_id,
+- fcport->d_id.b24, fcport, fcport->disc_state, cnt);
+- }
+-}
+-
+ static void
+ qla_edif_free_sa_ctl(fc_port_t *fcport, struct edif_sa_ctl *sa_ctl,
+ int index)
+@@ -529,7 +472,8 @@ qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ struct app_start_reply appreply;
+ struct fc_port *fcport, *tf;
+
+- ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app start\n", __func__);
++ ql_log(ql_log_info, vha, 0x1313,
++ "EDIF application registration with driver, FC device connections will be re-established.\n");
+
+ sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+ bsg_job->request_payload.sg_cnt, &appstart,
+@@ -538,15 +482,17 @@ qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app_vid=%x app_start_flags %x\n",
+ __func__, appstart.app_info.app_vid, appstart.app_start_flags);
+
+- if (vha->e_dbell.db_flags != EDB_ACTIVE) {
++ if (DBELL_INACTIVE(vha)) {
+ /* mark doorbell as active since an app is now present */
+- vha->e_dbell.db_flags = EDB_ACTIVE;
++ vha->e_dbell.db_flags |= EDB_ACTIVE;
+ } else {
+- ql_dbg(ql_dbg_edif, vha, 0x911e, "%s doorbell already active\n",
+- __func__);
++ goto out;
+ }
+
+ if (N2N_TOPO(vha->hw)) {
++ list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list)
++ fcport->n2n_link_reset_cnt = 0;
++
+ if (vha->hw->flags.n2n_fw_acc_sec)
+ set_bit(N2N_LINK_RESET, &vha->dpc_flags);
+ else
+@@ -554,39 +500,51 @@ qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ qla2xxx_wake_dpc(vha);
+ } else {
+ list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
++ ql_dbg(ql_dbg_edif, vha, 0x2058,
++ "FCSP - nn %8phN pn %8phN portid=%06x.\n",
++ fcport->node_name, fcport->port_name,
++ fcport->d_id.b24);
+ ql_dbg(ql_dbg_edif, vha, 0xf084,
+- "%s: sess %p %8phC lid %#04x s_id %06x logout %d\n",
+- __func__, fcport, fcport->port_name,
+- fcport->loop_id, fcport->d_id.b24,
+- fcport->logout_on_delete);
+-
+- ql_dbg(ql_dbg_edif, vha, 0xf084,
+- "keep %d els_logo %d disc state %d auth state %d stop state %d\n",
+- fcport->keep_nport_handle,
+- fcport->send_els_logo, fcport->disc_state,
+- fcport->edif.auth_state, fcport->edif.app_stop);
++ "%s: se_sess %p / sess %p from port %8phC "
++ "loop_id %#04x s_id %06x logout %d "
++ "keep %d els_logo %d disc state %d auth state %d"
++ "stop state %d\n",
++ __func__, fcport->se_sess, fcport,
++ fcport->port_name, fcport->loop_id,
++ fcport->d_id.b24, fcport->logout_on_delete,
++ fcport->keep_nport_handle, fcport->send_els_logo,
++ fcport->disc_state, fcport->edif.auth_state,
++ fcport->edif.app_stop);
+
+ if (atomic_read(&vha->loop_state) == LOOP_DOWN)
+ break;
+- if (!(fcport->flags & FCF_FCSP_DEVICE))
+- continue;
+
++ fcport->login_retry = vha->hw->login_retry_count;
++
++ fcport->edif.app_stop = 0;
++ fcport->edif.app_sess_online = 0;
+ fcport->edif.app_started = 1;
+- if (fcport->edif.app_stop ||
+- (fcport->disc_state != DSC_LOGIN_COMPLETE &&
+- fcport->disc_state != DSC_LOGIN_PEND &&
+- fcport->disc_state != DSC_DELETED)) {
+- /* no activity */
+- fcport->edif.app_stop = 0;
+-
+- ql_dbg(ql_dbg_edif, vha, 0x911e,
+- "%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
+- __func__, fcport->port_name);
+- fcport->edif.app_sess_online = 1;
+- qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 0);
+- }
++
++ if (fcport->scan_state != QLA_FCPORT_FOUND)
++ continue;
++
++ if (fcport->port_type == FCT_UNKNOWN &&
++ !fcport->fc4_features)
++ rval = qla24xx_async_gffid(vha, fcport, true);
++
++ if (!rval && !(fcport->fc4_features & FC4_FF_TARGET ||
++ fcport->port_type & (FCT_TARGET|FCT_NVME_TARGET)))
++ continue;
++
++ rval = 0;
++
++ ql_dbg(ql_dbg_edif, vha, 0x911e,
++ "%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
++ __func__, fcport->port_name);
++ qlt_schedule_sess_for_deletion(fcport);
+ qla_edif_sa_ctl_init(vha, fcport);
+ }
++ set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+ }
+
+ if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
+@@ -597,18 +555,19 @@ qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ __func__);
+ }
+
++out:
+ appreply.host_support_edif = vha->hw->flags.edif_enabled;
+ appreply.edif_enode_active = vha->pur_cinfo.enode_flags;
+ appreply.edif_edb_active = vha->e_dbell.db_flags;
+
+- bsg_job->reply_len = sizeof(struct fc_bsg_reply) +
+- sizeof(struct app_start_reply);
++ bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+
+ SET_DID_STATUS(bsg_reply->result, DID_OK);
+
+- sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+- bsg_job->reply_payload.sg_cnt, &appreply,
+- sizeof(struct app_start_reply));
++ bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
++ bsg_job->reply_payload.sg_cnt,
++ &appreply,
++ sizeof(struct app_start_reply));
+
+ ql_dbg(ql_dbg_edif, vha, 0x911d,
+ "%s app start completed with 0x%x\n",
+@@ -725,6 +684,11 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ bsg_job->request_payload.sg_cnt, &appplogiok,
+ sizeof(struct auth_complete_cmd));
+
++ /* silent unaligned access warning */
++ portid.b.domain = appplogiok.u.d_id.b.domain;
++ portid.b.area = appplogiok.u.d_id.b.area;
++ portid.b.al_pa = appplogiok.u.d_id.b.al_pa;
++
+ switch (appplogiok.type) {
+ case PL_TYPE_WWPN:
+ fcport = qla2x00_find_fcport_by_wwpn(vha,
+@@ -735,7 +699,7 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ __func__, appplogiok.u.wwpn);
+ break;
+ case PL_TYPE_DID:
+- fcport = qla2x00_find_fcport_by_pid(vha, &appplogiok.u.d_id);
++ fcport = qla2x00_find_fcport_by_pid(vha, &portid);
+ if (!fcport)
+ ql_dbg(ql_dbg_edif, vha, 0x911d,
+ "%s d_id lookup failed: %x\n", __func__,
+@@ -800,15 +764,15 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ ql_dbg(ql_dbg_edif, vha, 0x911e,
+ "%s AUTH complete - RESUME with prli for wwpn %8phC\n",
+ __func__, fcport->port_name);
+- qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 1);
+ qla24xx_post_prli_work(vha, fcport);
+ }
+
+ errstate_exit:
+ bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+- sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+- bsg_job->reply_payload.sg_cnt, &appplogireply,
+- sizeof(struct app_plogi_reply));
++ bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
++ bsg_job->reply_payload.sg_cnt,
++ &appplogireply,
++ sizeof(struct app_plogi_reply));
+
+ return rval;
+ }
+@@ -834,6 +798,11 @@ qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ bsg_job->request_payload.sg_cnt, &appplogifail,
+ sizeof(struct auth_complete_cmd));
+
++ /* silent unaligned access warning */
++ portid.b.domain = appplogifail.u.d_id.b.domain;
++ portid.b.area = appplogifail.u.d_id.b.area;
++ portid.b.al_pa = appplogifail.u.d_id.b.al_pa;
++
+ /*
+ * TODO: edif: app has failed this plogi. Inform driver to
+ * take any action (if any).
+@@ -845,7 +814,7 @@ qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ SET_DID_STATUS(bsg_reply->result, DID_OK);
+ break;
+ case PL_TYPE_DID:
+- fcport = qla2x00_find_fcport_by_pid(vha, &appplogifail.u.d_id);
++ fcport = qla2x00_find_fcport_by_pid(vha, &portid);
+ if (!fcport)
+ ql_dbg(ql_dbg_edif, vha, 0x911d,
+ "%s d_id lookup failed: %x\n", __func__,
+@@ -873,7 +842,7 @@ qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+
+ if (qla_ini_mode_enabled(fcport->vha)) {
+ fcport->send_els_logo = 1;
+- qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 0);
++ qlt_schedule_sess_for_deletion(fcport);
+ }
+ }
+
+@@ -891,7 +860,7 @@ static int
+ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ {
+ int32_t rval = 0;
+- int32_t num_cnt;
++ int32_t pcnt = 0;
+ struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+ struct app_pinfo_req app_req;
+ struct app_pinfo_reply *app_reply;
+@@ -903,16 +872,14 @@ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ bsg_job->request_payload.sg_cnt, &app_req,
+ sizeof(struct app_pinfo_req));
+
+- num_cnt = app_req.num_ports; /* num of ports alloc'd by app */
+-
+ app_reply = kzalloc((sizeof(struct app_pinfo_reply) +
+- sizeof(struct app_pinfo) * num_cnt), GFP_KERNEL);
++ sizeof(struct app_pinfo) * app_req.num_ports), GFP_KERNEL);
++
+ if (!app_reply) {
+ SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+ rval = -1;
+ } else {
+ struct fc_port *fcport = NULL, *tf;
+- uint32_t pcnt = 0;
+
+ list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+ if (!(fcport->flags & FCF_FCSP_DEVICE))
+@@ -924,7 +891,7 @@ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ "APP request entry - portid=%06x.\n", tdid.b24);
+
+ /* Ran out of space */
+- if (pcnt > app_req.num_ports)
++ if (pcnt >= app_req.num_ports)
+ break;
+
+ if (tdid.b24 != 0 && tdid.b24 != fcport->d_id.b24)
+@@ -933,6 +900,20 @@ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ app_reply->ports[pcnt].rekey_count =
+ fcport->edif.rekey_cnt;
+
++ if (fcport->scan_state != QLA_FCPORT_FOUND)
++ continue;
++
++ if (fcport->port_type == FCT_UNKNOWN && !fcport->fc4_features)
++ rval = qla24xx_async_gffid(vha, fcport, true);
++
++ if (!rval &&
++ !(fcport->fc4_features & FC4_FF_TARGET ||
++ fcport->port_type &
++ (FCT_TARGET | FCT_NVME_TARGET)))
++ continue;
++
++ rval = 0;
++
+ app_reply->ports[pcnt].remote_type =
+ VND_CMD_RTYPE_UNKNOWN;
+ if (fcport->port_type & (FCT_NVME_TARGET | FCT_TARGET))
+@@ -981,9 +962,11 @@ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ SET_DID_STATUS(bsg_reply->result, DID_OK);
+ }
+
+- sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+- bsg_job->reply_payload.sg_cnt, app_reply,
+- sizeof(struct app_pinfo_reply) + sizeof(struct app_pinfo) * num_cnt);
++ bsg_job->reply_len = sizeof(struct fc_bsg_reply);
++ bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
++ bsg_job->reply_payload.sg_cnt,
++ app_reply,
++ sizeof(struct app_pinfo_reply) + sizeof(struct app_pinfo) * pcnt);
+
+ kfree(app_reply);
+
+@@ -1000,10 +983,11 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ {
+ int32_t rval = 0;
+ struct fc_bsg_reply *bsg_reply = bsg_job->reply;
+- uint32_t ret_size, size;
++ uint32_t size;
+
+ struct app_sinfo_req app_req;
+ struct app_stats_reply *app_reply;
++ uint32_t pcnt = 0;
+
+ sg_copy_to_buffer(bsg_job->request_payload.sg_list,
+ bsg_job->request_payload.sg_cnt, &app_req,
+@@ -1019,18 +1003,12 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ size = sizeof(struct app_stats_reply) +
+ (sizeof(struct app_sinfo) * app_req.num_ports);
+
+- if (size > bsg_job->reply_payload.payload_len)
+- ret_size = bsg_job->reply_payload.payload_len;
+- else
+- ret_size = size;
+-
+ app_reply = kzalloc(size, GFP_KERNEL);
+ if (!app_reply) {
+ SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+ rval = -1;
+ } else {
+ struct fc_port *fcport = NULL, *tf;
+- uint32_t pcnt = 0;
+
+ list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+ if (fcport->edif.enable) {
+@@ -1054,9 +1032,11 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ SET_DID_STATUS(bsg_reply->result, DID_OK);
+ }
+
++ bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+ bsg_reply->reply_payload_rcv_len =
+ sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+- bsg_job->reply_payload.sg_cnt, app_reply, ret_size);
++ bsg_job->reply_payload.sg_cnt, app_reply,
++ sizeof(struct app_stats_reply) + (sizeof(struct app_sinfo) * pcnt));
+
+ kfree(app_reply);
+
+@@ -1130,8 +1110,7 @@ qla_edif_app_mgmt(struct bsg_job *bsg_job)
+ __func__,
+ bsg_request->rqst_data.h_vendor.vendor_cmd[1]);
+ rval = EXT_STATUS_INVALID_PARAM;
+- bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+- SET_DID_STATUS(bsg_reply->result, DID_ERROR);
++ done = false;
+ break;
+ }
+
+@@ -1299,6 +1278,8 @@ qla24xx_check_sadb_avail_slot(struct bsg_job *bsg_job, fc_port_t *fcport,
+
+ #define QLA_SA_UPDATE_FLAGS_RX_KEY 0x0
+ #define QLA_SA_UPDATE_FLAGS_TX_KEY 0x2
++#define EDIF_MSLEEP_INTERVAL 100
++#define EDIF_RETRY_COUNT 50
+
+ int
+ qla24xx_sadb_update(struct bsg_job *bsg_job)
+@@ -1311,9 +1292,10 @@ qla24xx_sadb_update(struct bsg_job *bsg_job)
+ struct edif_list_entry *edif_entry = NULL;
+ int found = 0;
+ int rval = 0;
+- int result = 0;
++ int result = 0, cnt;
+ struct qla_sa_update_frame sa_frame;
+ struct srb_iocb *iocb_cmd;
++ port_id_t portid;
+
+ ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x911d,
+ "%s entered, vha: 0x%p\n", __func__, vha);
+@@ -1330,14 +1312,19 @@ qla24xx_sadb_update(struct bsg_job *bsg_job)
+ goto done;
+ }
+
+- if (vha->e_dbell.db_flags != EDB_ACTIVE) {
++ if (DBELL_INACTIVE(vha)) {
+ ql_log(ql_log_warn, vha, 0x70a1, "App not started\n");
+ rval = -EIO;
+ SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+ goto done;
+ }
+
+- fcport = qla2x00_find_fcport_by_pid(vha, &sa_frame.port_id);
++ /* silent unaligned access warning */
++ portid.b.domain = sa_frame.port_id.b.domain;
++ portid.b.area = sa_frame.port_id.b.area;
++ portid.b.al_pa = sa_frame.port_id.b.al_pa;
++
++ fcport = qla2x00_find_fcport_by_pid(vha, &portid);
+ if (fcport) {
+ found = 1;
+ if (sa_frame.flags == QLA_SA_UPDATE_FLAGS_TX_KEY)
+@@ -1546,11 +1533,23 @@ force_rx_delete:
+ sp->done = qla2x00_bsg_job_done;
+ iocb_cmd = &sp->u.iocb_cmd;
+ iocb_cmd->u.sa_update.sa_frame = sa_frame;
+-
++ cnt = 0;
++retry:
+ rval = qla2x00_start_sp(sp);
+- if (rval != QLA_SUCCESS) {
++ switch (rval) {
++ case QLA_SUCCESS:
++ break;
++ case EAGAIN:
++ msleep(EDIF_MSLEEP_INTERVAL);
++ cnt++;
++ if (cnt < EDIF_RETRY_COUNT)
++ goto retry;
++
++ fallthrough;
++ default:
+ ql_log(ql_dbg_edif, vha, 0x70e3,
+- "qla2x00_start_sp failed=%d.\n", rval);
++ "%s qla2x00_start_sp failed=%d.\n",
++ __func__, rval);
+
+ qla2x00_rel_sp(sp);
+ rval = -EIO;
+@@ -1651,6 +1650,40 @@ qla_enode_stop(scsi_qla_host_t *vha)
+ spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+ }
+
++static void qla_enode_clear(scsi_qla_host_t *vha, port_id_t portid)
++{
++ unsigned long flags;
++ struct enode *e, *tmp;
++ struct purexevent *purex;
++ LIST_HEAD(enode_list);
++
++ if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
++ ql_dbg(ql_dbg_edif, vha, 0x09102,
++ "%s enode not active\n", __func__);
++ return;
++ }
++ spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
++ list_for_each_entry_safe(e, tmp, &vha->pur_cinfo.head, list) {
++ purex = &e->u.purexinfo;
++ if (purex->pur_info.pur_sid.b24 == portid.b24) {
++ ql_dbg(ql_dbg_edif, vha, 0x911d,
++ "%s free ELS sid=%06x. xchg %x, nb=%xh\n",
++ __func__, portid.b24,
++ purex->pur_info.pur_rx_xchg_address,
++ purex->pur_info.pur_bytes_rcvd);
++
++ list_del_init(&e->list);
++ list_add_tail(&e->list, &enode_list);
++ }
++ }
++ spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
++
++ list_for_each_entry_safe(e, tmp, &enode_list, list) {
++ list_del_init(&e->list);
++ qla_enode_free(vha, e);
++ }
++}
++
+ /*
+ * allocate enode struct and populate buffer
+ * returns: enode pointer with buffers
+@@ -1695,41 +1728,25 @@ static struct enode *
+ qla_enode_find(scsi_qla_host_t *vha, uint32_t ntype, uint32_t p1, uint32_t p2)
+ {
+ struct enode *node_rtn = NULL;
+- struct enode *list_node = NULL;
++ struct enode *list_node, *q;
+ unsigned long flags;
+- struct list_head *pos, *q;
+ uint32_t sid;
+- uint32_t rw_flag;
+ struct purexevent *purex;
+
+ /* secure the list from moving under us */
+ spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+
+- list_for_each_safe(pos, q, &vha->pur_cinfo.head) {
+- list_node = list_entry(pos, struct enode, list);
++ list_for_each_entry_safe(list_node, q, &vha->pur_cinfo.head, list) {
+
+ /* node type determines what p1 and p2 are */
+ purex = &list_node->u.purexinfo;
+ sid = p1;
+- rw_flag = p2;
+
+ if (purex->pur_info.pur_sid.b24 == sid) {
+- if (purex->pur_info.pur_pend == 1 &&
+- rw_flag == PUR_GET) {
+- /*
+- * if the receive is in progress
+- * and its a read/get then can't
+- * transfer yet
+- */
+- ql_dbg(ql_dbg_edif, vha, 0x9106,
+- "%s purex xfer in progress for sid=%x\n",
+- __func__, sid);
+- } else {
+- /* found it and its complete */
+- node_rtn = list_node;
+- list_del(pos);
+- break;
+- }
++ /* found it and its complete */
++ node_rtn = list_node;
++ list_del(&list_node->list);
++ break;
+ }
+ }
+
+@@ -1814,7 +1831,7 @@ qla_els_reject_iocb(scsi_qla_host_t *vha, struct qla_qpair *qp,
+ void
+ qla_edb_init(scsi_qla_host_t *vha)
+ {
+- if (vha->e_dbell.db_flags == EDB_ACTIVE) {
++ if (DBELL_ACTIVE(vha)) {
+ /* list already init'd - error */
+ ql_dbg(ql_dbg_edif, vha, 0x09102,
+ "edif db already initialized, cannot reinit\n");
+@@ -1850,6 +1867,57 @@ qla_edb_node_free(scsi_qla_host_t *vha, struct edb_node *node)
+ node->ntype = N_UNDEF;
+ }
+
++static void qla_edb_clear(scsi_qla_host_t *vha, port_id_t portid)
++{
++ unsigned long flags;
++ struct edb_node *e, *tmp;
++ port_id_t sid;
++ LIST_HEAD(edb_list);
++
++ if (DBELL_INACTIVE(vha)) {
++ /* doorbell list not enabled */
++ ql_dbg(ql_dbg_edif, vha, 0x09102,
++ "%s doorbell not enabled\n", __func__);
++ return;
++ }
++
++ /* grab lock so list doesn't move */
++ spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
++ list_for_each_entry_safe(e, tmp, &vha->e_dbell.head, list) {
++ switch (e->ntype) {
++ case VND_CMD_AUTH_STATE_NEEDED:
++ case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
++ sid = e->u.plogi_did;
++ break;
++ case VND_CMD_AUTH_STATE_ELS_RCVD:
++ sid = e->u.els_sid;
++ break;
++ case VND_CMD_AUTH_STATE_SAUPDATE_COMPL:
++ /* app wants to see this */
++ continue;
++ default:
++ ql_log(ql_log_warn, vha, 0x09102,
++ "%s unknown node type: %x\n", __func__, e->ntype);
++ sid.b24 = 0;
++ break;
++ }
++ if (sid.b24 == portid.b24) {
++ ql_dbg(ql_dbg_edif, vha, 0x910f,
++ "%s free doorbell event : node type = %x %p\n",
++ __func__, e->ntype, e);
++ list_del_init(&e->list);
++ list_add_tail(&e->list, &edb_list);
++ }
++ }
++ spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
++
++ list_for_each_entry_safe(e, tmp, &edb_list, list) {
++ qla_edb_node_free(vha, e);
++ list_del_init(&e->list);
++ kfree(e);
++ }
++}
++
+ /* function called when app is stopping */
+
+ void
+@@ -1858,7 +1926,7 @@ qla_edb_stop(scsi_qla_host_t *vha)
+ unsigned long flags;
+ struct edb_node *node, *q;
+
+- if (vha->e_dbell.db_flags != EDB_ACTIVE) {
++ if (DBELL_INACTIVE(vha)) {
+ /* doorbell list not enabled */
+ ql_dbg(ql_dbg_edif, vha, 0x09102,
+ "%s doorbell not enabled\n", __func__);
+@@ -1909,7 +1977,7 @@ qla_edb_node_add(scsi_qla_host_t *vha, struct edb_node *ptr)
+ {
+ unsigned long flags;
+
+- if (vha->e_dbell.db_flags != EDB_ACTIVE) {
++ if (DBELL_INACTIVE(vha)) {
+ /* doorbell list not enabled */
+ ql_dbg(ql_dbg_edif, vha, 0x09102,
+ "%s doorbell not enabled\n", __func__);
+@@ -1940,7 +2008,7 @@ qla_edb_eventcreate(scsi_qla_host_t *vha, uint32_t dbtype,
+ return;
+ }
+
+- if (vha->e_dbell.db_flags != EDB_ACTIVE) {
++ if (DBELL_INACTIVE(vha)) {
+ if (fcport)
+ fcport->edif.auth_state = dbtype;
+ /* doorbell list not enabled */
+@@ -2035,7 +2103,7 @@ qla_edif_timer(scsi_qla_host_t *vha)
+ struct qla_hw_data *ha = vha->hw;
+
+ if (!vha->vp_idx && N2N_TOPO(ha) && ha->flags.n2n_fw_acc_sec) {
+- if (vha->e_dbell.db_flags != EDB_ACTIVE &&
++ if (DBELL_INACTIVE(vha) &&
+ ha->edif_post_stop_cnt_down) {
+ ha->edif_post_stop_cnt_down--;
+
+@@ -2073,7 +2141,7 @@ edif_doorbell_show(struct device *dev, struct device_attribute *attr,
+ sz = 256;
+
+ /* stop new threads from waiting if we're not init'd */
+- if (vha->e_dbell.db_flags != EDB_ACTIVE) {
++ if (DBELL_INACTIVE(vha)) {
+ ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x09122,
+ "%s error - edif db not enabled\n", __func__);
+ return 0;
+@@ -2137,7 +2205,9 @@ edif_doorbell_show(struct device *dev, struct device_attribute *attr,
+
+ static void qla_noop_sp_done(srb_t *sp, int res)
+ {
+- sp->free(sp);
++ sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ /*
+@@ -2160,7 +2230,8 @@ qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e)
+ if (!sa_ctl) {
+ ql_dbg(ql_dbg_edif, vha, 0x70e6,
+ "sa_ctl allocation failed\n");
+- return -ENOMEM;
++ rval = -ENOMEM;
++ return rval;
+ }
+
+ fcport = sa_ctl->fcport;
+@@ -2170,7 +2241,8 @@ qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e)
+ if (!sp) {
+ ql_dbg(ql_dbg_edif, vha, 0x70e6,
+ "SRB allocation failed\n");
+- return -ENOMEM;
++ rval = -ENOMEM;
++ goto done;
+ }
+
+ fcport->flags |= FCF_ASYNC_SENT;
+@@ -2199,9 +2271,16 @@ qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e)
+
+ rval = qla2x00_start_sp(sp);
+
+- if (rval != QLA_SUCCESS)
+- rval = QLA_FUNCTION_FAILED;
++ if (rval != QLA_SUCCESS) {
++ goto done_free_sp;
++ }
+
++ return rval;
++done_free_sp:
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
++ fcport->flags &= ~FCF_ASYNC_SENT;
++done:
++ fcport->flags &= ~FCF_ASYNC_ACTIVE;
+ return rval;
+ }
+
+@@ -2357,7 +2436,7 @@ void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp)
+ return;
+ }
+
+- if (totlen > MAX_PAYLOAD) {
++ if (totlen > ELS_MAX_PAYLOAD) {
+ ql_dbg(ql_dbg_edif, vha, 0x0910d,
+ "%s WARNING: verbose ELS frame received (totlen=%x)\n",
+ __func__, totlen);
+@@ -2387,7 +2466,6 @@ void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp)
+
+ purex = &ptr->u.purexinfo;
+ purex->pur_info.pur_sid = a.did;
+- purex->pur_info.pur_pend = 0;
+ purex->pur_info.pur_bytes_rcvd = totlen;
+ purex->pur_info.pur_rx_xchg_address = le32_to_cpu(p->rx_xchg_addr);
+ purex->pur_info.pur_nphdl = le16_to_cpu(p->nport_handle);
+@@ -2419,8 +2497,7 @@ void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp)
+
+ fcport = qla2x00_find_fcport_by_pid(host, &purex->pur_info.pur_sid);
+
+- if (host->e_dbell.db_flags != EDB_ACTIVE ||
+- (fcport && EDIF_SESSION_DOWN(fcport))) {
++ if (DBELL_INACTIVE(vha)) {
+ ql_dbg(ql_dbg_edif, host, 0x0910c, "%s e_dbell.db_flags =%x %06x\n",
+ __func__, host->e_dbell.db_flags,
+ fcport ? fcport->d_id.b24 : 0);
+@@ -2430,13 +2507,29 @@ void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp)
+ return;
+ }
+
++ if (fcport && EDIF_SESSION_DOWN(fcport)) {
++ ql_dbg(ql_dbg_edif, host, 0x13b6,
++ "%s terminate exchange. Send logo to 0x%x\n",
++ __func__, a.did.b24);
++
++ a.tx_byte_count = a.tx_len = 0;
++ a.tx_addr = 0;
++ a.control_flags = EPD_RX_XCHG; /* EPD_RX_XCHG = terminate cmd */
++ qla_els_reject_iocb(host, (*rsp)->qpair, &a);
++ qla_enode_free(host, ptr);
++ /* send logo to let remote port knows to tear down session */
++ fcport->send_els_logo = 1;
++ qlt_schedule_sess_for_deletion(fcport);
++ return;
++ }
++
+ /* add the local enode to the list */
+ qla_enode_add(host, ptr);
+
+ ql_dbg(ql_dbg_edif, host, 0x0910c,
+ "%s COMPLETE purex->pur_info.pur_bytes_rcvd =%xh s:%06x -> d:%06x xchg=%xh\n",
+ __func__, purex->pur_info.pur_bytes_rcvd, purex->pur_info.pur_sid.b24,
+- purex->pur_info.pur_did.b24, p->rx_xchg_addr);
++ purex->pur_info.pur_did.b24, purex->pur_info.pur_rx_xchg_address);
+
+ qla_edb_eventcreate(host, VND_CMD_AUTH_STATE_ELS_RCVD, sid, 0, NULL);
+ }
+@@ -2805,6 +2898,13 @@ qla28xx_start_scsi_edif(srb_t *sp)
+
+ tot_dsds = nseg;
+ req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
++
++ sp->iores.res_type = RESOURCE_IOCB | RESOURCE_EXCH;
++ sp->iores.exch_cnt = 1;
++ sp->iores.iocb_cnt = req_cnt;
++ if (qla_get_fw_resources(sp->qpair, &sp->iores))
++ goto queuing_error;
++
+ if (req->cnt < (req_cnt + 2)) {
+ cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr :
+ rd_reg_dword(req->req_q_out);
+@@ -2996,6 +3096,7 @@ queuing_error:
+ mempool_free(sp->u.scmd.ct6_ctx, ha->ctx_mempool);
+ sp->u.scmd.ct6_ctx = NULL;
+ }
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ spin_unlock_irqrestore(lock, flags);
+
+ return QLA_FUNCTION_FAILED;
+@@ -3139,18 +3240,14 @@ static uint16_t qla_edif_sadb_get_sa_index(fc_port_t *fcport,
+ /* release any sadb entries -- only done at teardown */
+ void qla_edif_sadb_release(struct qla_hw_data *ha)
+ {
+- struct list_head *pos;
+- struct list_head *tmp;
+- struct edif_sa_index_entry *entry;
++ struct edif_sa_index_entry *entry, *tmp;
+
+- list_for_each_safe(pos, tmp, &ha->sadb_rx_index_list) {
+- entry = list_entry(pos, struct edif_sa_index_entry, next);
++ list_for_each_entry_safe(entry, tmp, &ha->sadb_rx_index_list, next) {
+ list_del(&entry->next);
+ kfree(entry);
+ }
+
+- list_for_each_safe(pos, tmp, &ha->sadb_tx_index_list) {
+- entry = list_entry(pos, struct edif_sa_index_entry, next);
++ list_for_each_entry_safe(entry, tmp, &ha->sadb_tx_index_list, next) {
+ list_del(&entry->next);
+ kfree(entry);
+ }
+@@ -3326,7 +3423,7 @@ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ fc_port_t *fcport = NULL;
+ struct qla_hw_data *ha = vha->hw;
+ srb_t *sp;
+- int rval = (DID_ERROR << 16);
++ int rval = (DID_ERROR << 16), cnt;
+ port_id_t d_id;
+ struct qla_bsg_auth_els_request *p =
+ (struct qla_bsg_auth_els_request *)bsg_job->request;
+@@ -3348,7 +3445,7 @@ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ if (qla_bsg_check(vha, bsg_job, fcport))
+ return 0;
+
+- if (fcport->loop_id == FC_NO_LOOP_ID) {
++ if (EDIF_SESS_DELETE(fcport)) {
+ ql_dbg(ql_dbg_edif, vha, 0x910d,
+ "%s ELS code %x, no loop id.\n", __func__,
+ bsg_request->rqst_data.r_els.els_code);
+@@ -3417,17 +3514,26 @@ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+ sp->free = qla2x00_bsg_sp_free;
+ sp->done = qla2x00_bsg_job_done;
+
++ cnt = 0;
++retry:
+ rval = qla2x00_start_sp(sp);
+-
+- ql_dbg(ql_dbg_edif, vha, 0x700a,
+- "%s %s %8phN xchg %x ctlflag %x hdl %x reqlen %xh bsg ptr %p\n",
+- __func__, sc_to_str(p->e.sub_cmd), fcport->port_name,
+- p->e.extra_rx_xchg_address, p->e.extra_control_flags,
+- sp->handle, sp->remap.req.len, bsg_job);
+-
+- if (rval != QLA_SUCCESS) {
++ switch (rval) {
++ case QLA_SUCCESS:
++ ql_dbg(ql_dbg_edif, vha, 0x700a,
++ "%s %s %8phN xchg %x ctlflag %x hdl %x reqlen %xh bsg ptr %p\n",
++ __func__, sc_to_str(p->e.sub_cmd), fcport->port_name,
++ p->e.extra_rx_xchg_address, p->e.extra_control_flags,
++ sp->handle, sp->remap.req.len, bsg_job);
++ break;
++ case EAGAIN:
++ msleep(EDIF_MSLEEP_INTERVAL);
++ cnt++;
++ if (cnt < EDIF_RETRY_COUNT)
++ goto retry;
++ fallthrough;
++ default:
+ ql_log(ql_log_warn, vha, 0x700e,
+- "qla2x00_start_sp failed = %d\n", rval);
++ "%s qla2x00_start_sp failed = %d\n", __func__, rval);
+ SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY);
+ rval = -EIO;
+ goto done_free_remap_rsp;
+@@ -3449,7 +3555,7 @@ done:
+
+ void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess)
+ {
+- if (sess->edif.app_sess_online && vha->e_dbell.db_flags & EDB_ACTIVE) {
++ if (sess->edif.app_sess_online && DBELL_ACTIVE(vha)) {
+ ql_dbg(ql_dbg_disc, vha, 0xf09c,
+ "%s: sess %8phN send port_offline event\n",
+ __func__, sess->port_name);
+@@ -3459,3 +3565,12 @@ void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess)
+ qla2x00_post_aen_work(vha, FCH_EVT_PORT_OFFLINE, sess->d_id.b24);
+ }
+ }
++
++void qla_edif_clear_appdata(struct scsi_qla_host *vha, struct fc_port *fcport)
++{
++ if (!(fcport->flags & FCF_FCSP_DEVICE))
++ return;
++
++ qla_edb_clear(vha, fcport->d_id);
++ qla_enode_clear(vha, fcport->d_id);
++}
+diff --git a/drivers/scsi/qla2xxx/qla_edif.h b/drivers/scsi/qla2xxx/qla_edif.h
+index 9e8f28d0caa1b..b9cedf6defd94 100644
+--- a/drivers/scsi/qla2xxx/qla_edif.h
++++ b/drivers/scsi/qla2xxx/qla_edif.h
+@@ -41,9 +41,12 @@ struct pur_core {
+ };
+
+ enum db_flags_t {
+- EDB_ACTIVE = 0x1,
++ EDB_ACTIVE = BIT_0,
+ };
+
++#define DBELL_ACTIVE(_v) (_v->e_dbell.db_flags & EDB_ACTIVE)
++#define DBELL_INACTIVE(_v) (!(_v->e_dbell.db_flags & EDB_ACTIVE))
++
+ struct edif_dbell {
+ enum db_flags_t db_flags;
+ spinlock_t db_lock;
+@@ -93,7 +96,6 @@ struct sa_update_28xx {
+ };
+
+ #define NUM_ENTRIES 256
+-#define MAX_PAYLOAD 1024
+ #define PUR_GET 1
+
+ struct dinfo {
+@@ -102,7 +104,6 @@ struct dinfo {
+ };
+
+ struct pur_ninfo {
+- unsigned int pur_pend:1;
+ port_id_t pur_sid;
+ port_id_t pur_did;
+ uint8_t vp_idx;
+@@ -128,9 +129,19 @@ struct enode {
+ } u;
+ };
+
++#define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES))
++
+ #define EDIF_SESSION_DOWN(_s) \
+ (qla_ini_mode_enabled(_s->vha) && (_s->disc_state == DSC_DELETE_PEND || \
+ _s->disc_state == DSC_DELETED || \
+ !_s->edif.app_sess_online))
+
++#define EDIF_NEGOTIATION_PENDING(_fcport) \
++ (DBELL_ACTIVE(_fcport->vha) && \
++ (_fcport->disc_state == DSC_LOGIN_AUTH_PEND))
++
++#define EDIF_SESS_DELETE(_s) \
++ (qla_ini_mode_enabled(_s->vha) && (_s->disc_state == DSC_DELETE_PEND || \
++ _s->disc_state == DSC_DELETED))
++
+ #endif /* __QLA_EDIF_H */
+diff --git a/drivers/scsi/qla2xxx/qla_edif_bsg.h b/drivers/scsi/qla2xxx/qla_edif_bsg.h
+index 58b718d35d19a..af9f1ffb1e4a6 100644
+--- a/drivers/scsi/qla2xxx/qla_edif_bsg.h
++++ b/drivers/scsi/qla2xxx/qla_edif_bsg.h
+@@ -8,7 +8,7 @@
+ #define __QLA_EDIF_BSG_H
+
+ /* BSG Vendor specific commands */
+-#define ELS_MAX_PAYLOAD 1024
++#define ELS_MAX_PAYLOAD 2112
+ #ifndef WWN_SIZE
+ #define WWN_SIZE 8
+ #endif
+@@ -217,4 +217,6 @@ struct auth_complete_cmd {
+
+ #define RX_DELAY_DELETE_TIMEOUT 20
+
++#define FCH_EVT_VENDOR_UNIQUE_VPORT_DOWN 1
++
+ #endif /* QLA_EDIF_BSG_H */
+diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
+index 073d06e88c589..6faf7533958f1 100644
+--- a/drivers/scsi/qla2xxx/qla_fw.h
++++ b/drivers/scsi/qla2xxx/qla_fw.h
+@@ -807,7 +807,7 @@ struct els_entry_24xx {
+ #define EPD_ELS_COMMAND (0 << 13)
+ #define EPD_ELS_ACC (1 << 13)
+ #define EPD_ELS_RJT (2 << 13)
+-#define EPD_RX_XCHG (3 << 13)
++#define EPD_RX_XCHG (3 << 13) /* terminate exchange */
+ #define ECF_CLR_PASSTHRU_PEND BIT_12
+ #define ECF_INCL_FRAME_HDR BIT_11
+ #define ECF_SEC_LOGIN BIT_3
+diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
+index 1c3f055d41b8e..3861e41a8d2cd 100644
+--- a/drivers/scsi/qla2xxx/qla_gbl.h
++++ b/drivers/scsi/qla2xxx/qla_gbl.h
+@@ -69,9 +69,7 @@ extern int qla2x00_async_logout(struct scsi_qla_host *, fc_port_t *);
+ extern int qla2x00_async_prlo(struct scsi_qla_host *, fc_port_t *);
+ extern int qla2x00_async_adisc(struct scsi_qla_host *, fc_port_t *,
+ uint16_t *);
+-extern int qla2x00_async_tm_cmd(fc_port_t *, uint32_t, uint32_t, uint32_t);
+-extern void qla2x00_async_login_done(struct scsi_qla_host *, fc_port_t *,
+- uint16_t *);
++extern int qla2x00_async_tm_cmd(fc_port_t *, uint32_t, uint64_t, uint32_t);
+ struct qla_work_evt *qla2x00_alloc_work(struct scsi_qla_host *,
+ enum qla_work_type);
+ extern int qla24xx_async_gnl(struct scsi_qla_host *, fc_port_t *);
+@@ -142,7 +140,10 @@ void qlt_chk_edif_rx_sa_delete_pending(scsi_qla_host_t *vha, fc_port_t *fcport,
+ void qla2x00_release_all_sadb(struct scsi_qla_host *vha, struct fc_port *fcport);
+ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsgjob);
+ void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess);
++void qla_edif_clear_appdata(struct scsi_qla_host *vha,
++ struct fc_port *fcport);
+ const char *sc_to_str(uint16_t cmd);
++void qla_adjust_iocb_limit(scsi_qla_host_t *vha);
+
+ /*
+ * Global Data in qla_os.c source file.
+@@ -171,7 +172,6 @@ extern int ql2xasynctmfenable;
+ extern int ql2xgffidenable;
+ extern int ql2xenabledif;
+ extern int ql2xenablehba_err_chk;
+-extern int ql2xtargetreset;
+ extern int ql2xdontresethba;
+ extern uint64_t ql2xmaxlun;
+ extern int ql2xmdcapmask;
+@@ -191,6 +191,8 @@ extern int ql2xfulldump_on_mpifail;
+ extern int ql2xsecenable;
+ extern int ql2xenforce_iocb_limit;
+ extern int ql2xabts_wait_nvme;
++extern int ql2xrspq_follow_inptr;
++extern int ql2xrspq_follow_inptr_legacy;
+
+ extern int qla2x00_loop_reset(scsi_qla_host_t *);
+ extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
+@@ -275,7 +277,6 @@ extern int qla24xx_vport_create_req_sanity_check(struct fc_vport *);
+ extern scsi_qla_host_t *qla24xx_create_vhost(struct fc_vport *);
+
+ extern void qla2x00_sp_free_dma(srb_t *sp);
+-extern char *qla2x00_get_fw_version_str(struct scsi_qla_host *, char *);
+
+ extern void qla2x00_mark_device_lost(scsi_qla_host_t *, fc_port_t *, int);
+ extern void qla2x00_mark_all_devices_lost(scsi_qla_host_t *);
+@@ -315,7 +316,8 @@ extern int qla2x00_start_sp(srb_t *);
+ extern int qla24xx_dif_start_scsi(srb_t *);
+ extern int qla2x00_start_bidir(srb_t *, struct scsi_qla_host *, uint32_t);
+ extern int qla2xxx_dif_start_scsi_mq(srb_t *);
+-extern void qla2x00_init_timer(srb_t *sp, unsigned long tmo);
++extern void qla2x00_init_async_sp(srb_t *sp, unsigned long tmo,
++ void (*done)(struct srb *, int));
+ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *);
+
+ extern void *qla2x00_alloc_iocbs(struct scsi_qla_host *, srb_t *);
+@@ -331,6 +333,8 @@ extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
+ extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
+ extern int qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha,
+ struct qla_work_evt *e);
++void qla2x00_sp_release(struct kref *kref);
++void qla2x00_els_dcmd2_iocb_timeout(void *data);
+
+ /*
+ * Global Function Prototypes in qla_mbx.c source file.
+@@ -429,7 +433,8 @@ extern int
+ qla2x00_get_resource_cnts(scsi_qla_host_t *);
+
+ extern int
+-qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map);
++qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map,
++ u8 *num_entries);
+
+ extern int
+ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
+@@ -604,7 +609,6 @@ void __qla_consume_iocb(struct scsi_qla_host *vha, void **pkt, struct rsp_que **
+ /*
+ * Global Function Prototypes in qla_sup.c source file.
+ */
+-extern void qla2x00_release_nvram_protection(scsi_qla_host_t *);
+ extern int qla24xx_read_flash_data(scsi_qla_host_t *, uint32_t *,
+ uint32_t, uint32_t);
+ extern uint8_t *qla2x00_read_nvram_data(scsi_qla_host_t *, void *, uint32_t,
+@@ -719,7 +723,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *, fc_port_t *);
+ void qla24xx_handle_gpsc_event(scsi_qla_host_t *, struct event_arg *);
+ int qla2x00_mgmt_svr_login(scsi_qla_host_t *);
+ void qla24xx_handle_gffid_event(scsi_qla_host_t *vha, struct event_arg *ea);
+-int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport);
++int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport, bool);
+ int qla24xx_async_gpnft(scsi_qla_host_t *, u8, srb_t *);
+ void qla24xx_async_gpnft_done(scsi_qla_host_t *, srb_t *);
+ void qla24xx_async_gnnft_done(scsi_qla_host_t *, srb_t *);
+@@ -774,12 +778,6 @@ extern void qla2x00_init_response_q_entries(struct rsp_que *);
+ extern int qla25xx_delete_req_que(struct scsi_qla_host *, struct req_que *);
+ extern int qla25xx_delete_rsp_que(struct scsi_qla_host *, struct rsp_que *);
+ extern int qla25xx_delete_queues(struct scsi_qla_host *);
+-extern uint16_t qla24xx_rd_req_reg(struct qla_hw_data *, uint16_t);
+-extern uint16_t qla25xx_rd_req_reg(struct qla_hw_data *, uint16_t);
+-extern void qla24xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
+-extern void qla25xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
+-extern void qla25xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
+-extern void qla24xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
+
+ /* qlafx00 related functions */
+ extern int qlafx00_pci_config(struct scsi_qla_host *);
+@@ -816,7 +814,6 @@ extern void qlafx00_abort_iocb(srb_t *, struct abort_iocb_entry_fx00 *);
+ extern void qlafx00_fxdisc_iocb(srb_t *, struct fxdisc_entry_fx00 *);
+ extern void qlafx00_timer_routine(scsi_qla_host_t *);
+ extern int qlafx00_rescan_isp(scsi_qla_host_t *);
+-extern int qlafx00_loop_reset(scsi_qla_host_t *vha);
+
+ /* qla82xx related functions */
+
+@@ -865,8 +862,6 @@ extern void qla82xx_init_flags(struct qla_hw_data *);
+ extern void qla82xx_set_drv_active(scsi_qla_host_t *);
+ extern int qla82xx_wr_32(struct qla_hw_data *, ulong, u32);
+ extern int qla82xx_rd_32(struct qla_hw_data *, ulong);
+-extern int qla82xx_rdmem(struct qla_hw_data *, u64, void *, int);
+-extern int qla82xx_wrmem(struct qla_hw_data *, u64, void *, int);
+
+ /* ISP 8021 IDC */
+ extern void qla82xx_clear_drv_active(struct qla_hw_data *);
+diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
+index ebc8fdb0b43d3..d3742a83d2fd7 100644
+--- a/drivers/scsi/qla2xxx/qla_gs.c
++++ b/drivers/scsi/qla2xxx/qla_gs.c
+@@ -529,7 +529,6 @@ static void qla2x00_async_sns_sp_done(srb_t *sp, int rc)
+ if (!e)
+ goto err2;
+
+- del_timer(&sp->u.iocb_cmd.timer);
+ e->u.iosb.sp = sp;
+ qla2x00_post_work(vha, e);
+ return;
+@@ -556,8 +555,8 @@ err2:
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+ }
+
+- sp->free(sp);
+-
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return;
+ }
+
+@@ -592,13 +591,15 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id)
+ if (!vha->flags.online)
+ goto done;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_CT_PTHRU_CMD;
+ sp->name = "rft_id";
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_sns_sp_done);
+
+ sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
+ sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
+@@ -638,8 +639,6 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id)
+ sp->u.iocb_cmd.u.ctarg.req_size = RFT_ID_REQ_SIZE;
+ sp->u.iocb_cmd.u.ctarg.rsp_size = RFT_ID_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- sp->done = qla2x00_async_sns_sp_done;
+
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s - hdl=%x portid %06x.\n",
+@@ -653,7 +652,8 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id)
+ }
+ return rval;
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+@@ -676,8 +676,7 @@ qla2x00_rff_id(scsi_qla_host_t *vha, u8 type)
+ return (QLA_SUCCESS);
+ }
+
+- return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha),
+- FC4_TYPE_FCP_SCSI);
++ return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), type);
+ }
+
+ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id,
+@@ -688,13 +687,15 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id,
+ srb_t *sp;
+ struct ct_sns_pkt *ct_sns;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_CT_PTHRU_CMD;
+ sp->name = "rff_id";
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_sns_sp_done);
+
+ sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
+ sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
+@@ -727,13 +728,11 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id,
+ /* Prepare CT arguments -- port_id, FC-4 feature, FC-4 type */
+ ct_req->req.rff_id.port_id = port_id_to_be_id(*d_id);
+ ct_req->req.rff_id.fc4_feature = fc4feature;
+- ct_req->req.rff_id.fc4_type = fc4type; /* SCSI - FCP */
++ ct_req->req.rff_id.fc4_type = fc4type; /* SCSI-FCP or FC-NVMe */
+
+ sp->u.iocb_cmd.u.ctarg.req_size = RFF_ID_REQ_SIZE;
+ sp->u.iocb_cmd.u.ctarg.rsp_size = RFF_ID_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- sp->done = qla2x00_async_sns_sp_done;
+
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s - hdl=%x portid %06x feature %x type %x.\n",
+@@ -749,7 +748,8 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id,
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+@@ -779,13 +779,15 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id,
+ srb_t *sp;
+ struct ct_sns_pkt *ct_sns;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_CT_PTHRU_CMD;
+ sp->name = "rnid";
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_sns_sp_done);
+
+ sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
+ sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
+@@ -823,9 +825,6 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id,
+ sp->u.iocb_cmd.u.ctarg.rsp_size = RNN_ID_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- sp->done = qla2x00_async_sns_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s - hdl=%x portid %06x\n",
+ sp->name, sp->handle, d_id->b24);
+@@ -840,7 +839,8 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id,
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+@@ -886,13 +886,15 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha)
+ srb_t *sp;
+ struct ct_sns_pkt *ct_sns;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_CT_PTHRU_CMD;
+ sp->name = "rsnn_nn";
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_sns_sp_done);
+
+ sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
+ sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
+@@ -936,9 +938,6 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha)
+ sp->u.iocb_cmd.u.ctarg.rsp_size = RSNN_NN_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- sp->done = qla2x00_async_sns_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s - hdl=%x.\n",
+ sp->name, sp->handle);
+@@ -953,7 +952,8 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+@@ -1595,7 +1595,6 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
+ unsigned int callopt)
+ {
+ struct qla_hw_data *ha = vha->hw;
+- struct init_cb_24xx *icb24 = (void *)ha->init_cb;
+ struct new_utsname *p_sysid = utsname();
+ struct ct_fdmi_hba_attr *eiter;
+ uint16_t alen;
+@@ -1757,8 +1756,8 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
+ /* MAX CT Payload Length */
+ eiter = entries + size;
+ eiter->type = cpu_to_be16(FDMI_HBA_MAXIMUM_CT_PAYLOAD_LENGTH);
+- eiter->a.max_ct_len = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ?
+- icb24->frame_payload_size : ha->init_cb->frame_payload_size));
++ eiter->a.max_ct_len = cpu_to_be32(ha->frame_payload_size >> 2);
++
+ alen = sizeof(eiter->a.max_ct_len);
+ alen += FDMI_ATTR_TYPELEN(eiter);
+ eiter->len = cpu_to_be16(alen);
+@@ -1850,7 +1849,6 @@ qla2x00_port_attributes(scsi_qla_host_t *vha, void *entries,
+ unsigned int callopt)
+ {
+ struct qla_hw_data *ha = vha->hw;
+- struct init_cb_24xx *icb24 = (void *)ha->init_cb;
+ struct new_utsname *p_sysid = utsname();
+ char *hostname = p_sysid ?
+ p_sysid->nodename : fc_host_system_hostname(vha->host);
+@@ -1902,8 +1900,7 @@ qla2x00_port_attributes(scsi_qla_host_t *vha, void *entries,
+ /* Max frame size. */
+ eiter = entries + size;
+ eiter->type = cpu_to_be16(FDMI_PORT_MAX_FRAME_SIZE);
+- eiter->a.max_frame_size = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ?
+- icb24->frame_payload_size : ha->init_cb->frame_payload_size));
++ eiter->a.max_frame_size = cpu_to_be32(ha->frame_payload_size);
+ alen = sizeof(eiter->a.max_frame_size);
+ alen += FDMI_ATTR_TYPELEN(eiter);
+ eiter->len = cpu_to_be16(alen);
+@@ -2892,7 +2889,8 @@ static void qla24xx_async_gpsc_sp_done(srb_t *sp, int res)
+ qla24xx_handle_gpsc_event(vha, &ea);
+
+ done:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
+@@ -2904,6 +2902,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
+ if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
+ return rval;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+@@ -2912,8 +2911,8 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
+ sp->name = "gpsc";
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
+-
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla24xx_async_gpsc_sp_done);
+
+ /* CT_IU preamble */
+ ct_req = qla24xx_prep_ct_fm_req(fcport->ct_desc.ct_sns, GPSC_CMD,
+@@ -2931,9 +2930,6 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
+ sp->u.iocb_cmd.u.ctarg.rsp_size = GPSC_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = vha->mgmt_svr_loop_id;
+
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- sp->done = qla24xx_async_gpsc_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0x205e,
+ "Async-%s %8phC hdl=%x loopid=%x portid=%02x%02x%02x.\n",
+ sp->name, fcport->port_name, sp->handle,
+@@ -2946,7 +2942,8 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+@@ -2995,7 +2992,8 @@ void qla24xx_sp_unmap(scsi_qla_host_t *vha, srb_t *sp)
+ break;
+ }
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
+@@ -3134,13 +3132,15 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res)
+ if (res) {
+ if (res == QLA_FUNCTION_TIMEOUT) {
+ qla24xx_post_gpnid_work(sp->vha, &ea.id);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return;
+ }
+ } else if (sp->gen1) {
+ /* There was another RSCN for this Nport ID */
+ qla24xx_post_gpnid_work(sp->vha, &ea.id);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return;
+ }
+
+@@ -3161,7 +3161,8 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res)
+ sp->u.iocb_cmd.u.ctarg.rsp_dma);
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return;
+ }
+
+@@ -3181,6 +3182,7 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
+ if (!vha->flags.online)
+ goto done;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp)
+ goto done;
+@@ -3189,14 +3191,16 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
+ sp->name = "gpnid";
+ sp->u.iocb_cmd.u.ctarg.id = *id;
+ sp->gen1 = 0;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_gpnid_sp_done);
+
+ spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+ list_for_each_entry(tsp, &vha->gpnid_list, elem) {
+ if (tsp->u.iocb_cmd.u.ctarg.id.b24 == id->b24) {
+ tsp->gen1++;
+ spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ goto done;
+ }
+ }
+@@ -3237,9 +3241,6 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
+ sp->u.iocb_cmd.u.ctarg.rsp_size = GPN_ID_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- sp->done = qla2x00_async_gpnid_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0x2067,
+ "Async-%s hdl=%x ID %3phC.\n", sp->name,
+ sp->handle, &ct_req->req.port_id.port_id);
+@@ -3269,25 +3270,18 @@ done_free_sp:
+ sp->u.iocb_cmd.u.ctarg.rsp_dma);
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+ }
+-
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+
+-void qla24xx_handle_gffid_event(scsi_qla_host_t *vha, struct event_arg *ea)
+-{
+- fc_port_t *fcport = ea->fcport;
+-
+- qla24xx_post_gnl_work(vha, fcport);
+-}
+
+ void qla24xx_async_gffid_sp_done(srb_t *sp, int res)
+ {
+ struct scsi_qla_host *vha = sp->vha;
+ fc_port_t *fcport = sp->fcport;
+ struct ct_sns_rsp *ct_rsp;
+- struct event_arg ea;
+ uint8_t fc4_scsi_feat;
+ uint8_t fc4_nvme_feat;
+
+@@ -3295,10 +3289,10 @@ void qla24xx_async_gffid_sp_done(srb_t *sp, int res)
+ "Async done-%s res %x ID %x. %8phC\n",
+ sp->name, res, fcport->d_id.b24, fcport->port_name);
+
+- fcport->flags &= ~FCF_ASYNC_SENT;
+- ct_rsp = &fcport->ct_desc.ct_sns->p.rsp;
++ ct_rsp = sp->u.iocb_cmd.u.ctarg.rsp;
+ fc4_scsi_feat = ct_rsp->rsp.gff_id.fc4_features[GFF_FCP_SCSI_OFFSET];
+ fc4_nvme_feat = ct_rsp->rsp.gff_id.fc4_features[GFF_NVME_OFFSET];
++ sp->rc = res;
+
+ /*
+ * FC-GS-7, 5.2.3.12 FC-4 Features - format
+@@ -3319,68 +3313,129 @@ void qla24xx_async_gffid_sp_done(srb_t *sp, int res)
+ }
+ }
+
+- memset(&ea, 0, sizeof(ea));
+- ea.sp = sp;
+- ea.fcport = sp->fcport;
+- ea.rc = res;
++ if (sp->flags & SRB_WAKEUP_ON_COMP) {
++ complete(sp->comp);
++ } else {
++ if (sp->u.iocb_cmd.u.ctarg.req) {
++ dma_free_coherent(&vha->hw->pdev->dev,
++ sp->u.iocb_cmd.u.ctarg.req_allocated_size,
++ sp->u.iocb_cmd.u.ctarg.req,
++ sp->u.iocb_cmd.u.ctarg.req_dma);
++ sp->u.iocb_cmd.u.ctarg.req = NULL;
++ }
+
+- qla24xx_handle_gffid_event(vha, &ea);
+- sp->free(sp);
++ if (sp->u.iocb_cmd.u.ctarg.rsp) {
++ dma_free_coherent(&vha->hw->pdev->dev,
++ sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
++ sp->u.iocb_cmd.u.ctarg.rsp,
++ sp->u.iocb_cmd.u.ctarg.rsp_dma);
++ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
++ }
++
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
++ /* we should not be here */
++ dump_stack();
++ }
+ }
+
+ /* Get FC4 Feature with Nport ID. */
+-int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport)
++int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport, bool wait)
+ {
+ int rval = QLA_FUNCTION_FAILED;
+ struct ct_sns_req *ct_req;
+ srb_t *sp;
++ DECLARE_COMPLETION_ONSTACK(comp);
+
+- if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
++ /* this routine does not have handling for no wait */
++ if (!vha->flags.online || !wait)
+ return rval;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ return rval;
+
+- fcport->flags |= FCF_ASYNC_SENT;
+ sp->type = SRB_CT_PTHRU_CMD;
+ sp->name = "gffid";
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla24xx_async_gffid_sp_done);
++ sp->comp = &comp;
++ sp->u.iocb_cmd.timeout = qla2x00_els_dcmd2_iocb_timeout;
++
++ if (wait)
++ sp->flags = SRB_WAKEUP_ON_COMP;
+
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ sp->u.iocb_cmd.u.ctarg.req_allocated_size = sizeof(struct ct_sns_pkt);
++ sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
++ sp->u.iocb_cmd.u.ctarg.req_allocated_size,
++ &sp->u.iocb_cmd.u.ctarg.req_dma,
++ GFP_KERNEL);
++ if (!sp->u.iocb_cmd.u.ctarg.req) {
++ ql_log(ql_log_warn, vha, 0xd041,
++ "%s: Failed to allocate ct_sns request.\n",
++ __func__);
++ goto done_free_sp;
++ }
++
++ sp->u.iocb_cmd.u.ctarg.rsp_allocated_size = sizeof(struct ct_sns_pkt);
++ sp->u.iocb_cmd.u.ctarg.rsp = dma_alloc_coherent(&vha->hw->pdev->dev,
++ sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
++ &sp->u.iocb_cmd.u.ctarg.rsp_dma,
++ GFP_KERNEL);
++ if (!sp->u.iocb_cmd.u.ctarg.rsp) {
++ ql_log(ql_log_warn, vha, 0xd041,
++ "%s: Failed to allocate ct_sns response.\n",
++ __func__);
++ goto done_free_sp;
++ }
+
+ /* CT_IU preamble */
+- ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFF_ID_CMD,
+- GFF_ID_RSP_SIZE);
++ ct_req = qla2x00_prep_ct_req(sp->u.iocb_cmd.u.ctarg.req, GFF_ID_CMD, GFF_ID_RSP_SIZE);
+
+ ct_req->req.gff_id.port_id[0] = fcport->d_id.b.domain;
+ ct_req->req.gff_id.port_id[1] = fcport->d_id.b.area;
+ ct_req->req.gff_id.port_id[2] = fcport->d_id.b.al_pa;
+
+- sp->u.iocb_cmd.u.ctarg.req = fcport->ct_desc.ct_sns;
+- sp->u.iocb_cmd.u.ctarg.req_dma = fcport->ct_desc.ct_sns_dma;
+- sp->u.iocb_cmd.u.ctarg.rsp = fcport->ct_desc.ct_sns;
+- sp->u.iocb_cmd.u.ctarg.rsp_dma = fcport->ct_desc.ct_sns_dma;
+ sp->u.iocb_cmd.u.ctarg.req_size = GFF_ID_REQ_SIZE;
+ sp->u.iocb_cmd.u.ctarg.rsp_size = GFF_ID_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->done = qla24xx_async_gffid_sp_done;
+-
+- ql_dbg(ql_dbg_disc, vha, 0x2132,
+- "Async-%s hdl=%x %8phC.\n", sp->name,
+- sp->handle, fcport->port_name);
+-
+ rval = qla2x00_start_sp(sp);
+- if (rval != QLA_SUCCESS)
++
++ if (rval != QLA_SUCCESS) {
++ rval = QLA_FUNCTION_FAILED;
+ goto done_free_sp;
++ } else {
++ ql_dbg(ql_dbg_disc, vha, 0x3074,
++ "Async-%s hdl=%x portid %06x\n",
++ sp->name, sp->handle, fcport->d_id.b24);
++ }
++
++ wait_for_completion(sp->comp);
++ rval = sp->rc;
+
+- return rval;
+ done_free_sp:
+- sp->free(sp);
+- fcport->flags &= ~FCF_ASYNC_SENT;
++ if (sp->u.iocb_cmd.u.ctarg.req) {
++ dma_free_coherent(&vha->hw->pdev->dev,
++ sp->u.iocb_cmd.u.ctarg.req_allocated_size,
++ sp->u.iocb_cmd.u.ctarg.req,
++ sp->u.iocb_cmd.u.ctarg.req_dma);
++ sp->u.iocb_cmd.u.ctarg.req = NULL;
++ }
++
++ if (sp->u.iocb_cmd.u.ctarg.rsp) {
++ dma_free_coherent(&vha->hw->pdev->dev,
++ sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
++ sp->u.iocb_cmd.u.ctarg.rsp,
++ sp->u.iocb_cmd.u.ctarg.rsp_dma);
++ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
++ }
++
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return rval;
+ }
+
+@@ -3573,7 +3628,7 @@ login_logout:
+ do_delete) {
+ if (fcport->loop_id != FC_NO_LOOP_ID) {
+ if (fcport->flags & FCF_FCP2_DEVICE)
+- fcport->logout_on_delete = 0;
++ continue;
+
+ ql_log(ql_log_warn, vha, 0x20f0,
+ "%s %d %8phC post del sess\n",
+@@ -3766,7 +3821,6 @@ static void qla2x00_async_gpnft_gnnft_sp_done(srb_t *sp, int res)
+ "Async done-%s res %x FC4Type %x\n",
+ sp->name, res, sp->gen2);
+
+- del_timer(&sp->u.iocb_cmd.timer);
+ sp->rc = res;
+ if (res) {
+ unsigned long flags;
+@@ -3891,9 +3945,8 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp,
+ sp->name = "gnnft";
+ sp->gen1 = vha->hw->base_qpair->chip_reset;
+ sp->gen2 = fc4_type;
+-
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_gpnft_gnnft_sp_done);
+
+ memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size);
+ memset(sp->u.iocb_cmd.u.ctarg.req, 0, sp->u.iocb_cmd.u.ctarg.req_size);
+@@ -3909,8 +3962,6 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp,
+ sp->u.iocb_cmd.u.ctarg.req_size = GNN_FT_REQ_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->done = qla2x00_async_gpnft_gnnft_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s hdl=%x FC4Type %x.\n", sp->name,
+ sp->handle, ct_req->req.gpn_ft.port_type);
+@@ -3937,8 +3988,8 @@ done_free_sp:
+ sp->u.iocb_cmd.u.ctarg.rsp_dma);
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+ }
+-
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+
+ spin_lock_irqsave(&vha->work_lock, flags);
+ vha->scan.scan_flags &= ~SF_SCANNING;
+@@ -3990,9 +4041,12 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp)
+ ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff,
+ "%s: Performing FCP Scan\n", __func__);
+
+- if (sp)
+- sp->free(sp); /* should not happen */
++ if (sp) {
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
++ }
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp) {
+ spin_lock_irqsave(&vha->work_lock, flags);
+@@ -4037,6 +4091,7 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp)
+ sp->u.iocb_cmd.u.ctarg.req,
+ sp->u.iocb_cmd.u.ctarg.req_dma);
+ sp->u.iocb_cmd.u.ctarg.req = NULL;
++ /* ref: INIT */
+ qla2x00_rel_sp(sp);
+ return rval;
+ }
+@@ -4056,9 +4111,8 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp)
+ sp->name = "gpnft";
+ sp->gen1 = vha->hw->base_qpair->chip_reset;
+ sp->gen2 = fc4_type;
+-
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_gpnft_gnnft_sp_done);
+
+ rspsz = sp->u.iocb_cmd.u.ctarg.rsp_size;
+ memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size);
+@@ -4073,8 +4127,6 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp)
+
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->done = qla2x00_async_gpnft_gnnft_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s hdl=%x FC4Type %x.\n", sp->name,
+ sp->handle, ct_req->req.gpn_ft.port_type);
+@@ -4102,7 +4154,8 @@ done_free_sp:
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+ }
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+
+ spin_lock_irqsave(&vha->work_lock, flags);
+ vha->scan.scan_flags &= ~SF_SCANNING;
+@@ -4166,7 +4219,8 @@ static void qla2x00_async_gnnid_sp_done(srb_t *sp, int res)
+
+ qla24xx_handle_gnnid_event(vha, &ea);
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+@@ -4179,6 +4233,7 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ return rval;
+
+ qla2x00_set_fcport_disc_state(fcport, DSC_GNN_ID);
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+ if (!sp)
+ goto done;
+@@ -4188,9 +4243,8 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ sp->name = "gnnid";
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
+-
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_gnnid_sp_done);
+
+ /* CT_IU preamble */
+ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GNN_ID_CMD,
+@@ -4209,8 +4263,6 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ sp->u.iocb_cmd.u.ctarg.rsp_size = GNN_ID_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->done = qla2x00_async_gnnid_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n",
+ sp->name, fcport->port_name,
+@@ -4222,7 +4274,8 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ fcport->flags &= ~FCF_ASYNC_SENT;
+ done:
+ return rval;
+@@ -4296,7 +4349,8 @@ static void qla2x00_async_gfpnid_sp_done(srb_t *sp, int res)
+
+ qla24xx_handle_gfpnid_event(vha, &ea);
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+@@ -4308,6 +4362,7 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
+ return rval;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+ if (!sp)
+ goto done;
+@@ -4316,9 +4371,8 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ sp->name = "gfpnid";
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
+-
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_gfpnid_sp_done);
+
+ /* CT_IU preamble */
+ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFPN_ID_CMD,
+@@ -4337,8 +4391,6 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ sp->u.iocb_cmd.u.ctarg.rsp_size = GFPN_ID_RSP_SIZE;
+ sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+- sp->done = qla2x00_async_gfpnid_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n",
+ sp->name, fcport->port_name,
+@@ -4351,7 +4403,8 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index 5fc7697f0af4c..1a2ceef92bf07 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -47,10 +47,20 @@ qla2x00_sp_timeout(struct timer_list *t)
+ {
+ srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer);
+ struct srb_iocb *iocb;
++ scsi_qla_host_t *vha = sp->vha;
+
+ WARN_ON(irqs_disabled());
+ iocb = &sp->u.iocb_cmd;
+ iocb->timeout(sp);
++
++ /* ref: TMR */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
++
++ if (vha && qla2x00_isp_reg_stat(vha->hw)) {
++ ql_log(ql_log_info, vha, 0x9008,
++ "PCI/Register disconnect.\n");
++ qla_pci_set_eeh_busy(vha);
++ }
+ }
+
+ void qla2x00_sp_free(srb_t *sp)
+@@ -100,6 +110,7 @@ static void qla24xx_abort_iocb_timeout(void *data)
+ struct qla_qpair *qpair = sp->qpair;
+ u32 handle;
+ unsigned long flags;
++ int sp_found = 0, cmdsp_found = 0;
+
+ if (sp->cmd_sp)
+ ql_dbg(ql_dbg_async, sp->vha, 0x507c,
+@@ -114,22 +125,34 @@ static void qla24xx_abort_iocb_timeout(void *data)
+ spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+ for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) {
+ if (sp->cmd_sp && (qpair->req->outstanding_cmds[handle] ==
+- sp->cmd_sp))
++ sp->cmd_sp)) {
+ qpair->req->outstanding_cmds[handle] = NULL;
++ cmdsp_found = 1;
++ qla_put_fw_resources(qpair, &sp->cmd_sp->iores);
++ }
+
+ /* removing the abort */
+ if (qpair->req->outstanding_cmds[handle] == sp) {
+ qpair->req->outstanding_cmds[handle] = NULL;
++ sp_found = 1;
++ qla_put_fw_resources(qpair, &sp->iores);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+
+- if (sp->cmd_sp)
++ if (cmdsp_found && sp->cmd_sp) {
++ /*
++ * This done function should take care of
++ * original command ref: INIT
++ */
+ sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED);
++ }
+
+- abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT);
+- sp->done(sp, QLA_OS_TIMER_EXPIRED);
++ if (sp_found) {
++ abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT);
++ sp->done(sp, QLA_OS_TIMER_EXPIRED);
++ }
+ }
+
+ static void qla24xx_abort_sp_done(srb_t *sp, int res)
+@@ -140,11 +163,11 @@ static void qla24xx_abort_sp_done(srb_t *sp, int res)
+ if (orig_sp)
+ qla_wait_nvme_release_cmd_kref(orig_sp);
+
+- del_timer(&sp->u.iocb_cmd.timer);
+ if (sp->flags & SRB_WAKEUP_ON_COMP)
+ complete(&abt->u.abt.comp);
+ else
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+@@ -154,11 +177,13 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+ srb_t *sp;
+ int rval = QLA_FUNCTION_FAILED;
+
++ /* ref: INIT for ABTS command */
+ sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
+ GFP_ATOMIC);
+ if (!sp)
+ return QLA_MEMORY_ALLOC_FAILED;
+
++ qla_vha_mark_busy(vha);
+ abt_iocb = &sp->u.iocb_cmd;
+ sp->type = SRB_ABT_CMD;
+ sp->name = "abort";
+@@ -167,23 +192,22 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+ if (wait)
+ sp->flags = SRB_WAKEUP_ON_COMP;
+
+- abt_iocb->timeout = qla24xx_abort_iocb_timeout;
+ init_completion(&abt_iocb->u.abt.comp);
+ /* FW can send 2 x ABTS's timeout/20s */
+- qla2x00_init_timer(sp, 42);
++ qla2x00_init_async_sp(sp, 42, qla24xx_abort_sp_done);
++ sp->u.iocb_cmd.timeout = qla24xx_abort_iocb_timeout;
+
+ abt_iocb->u.abt.cmd_hndl = cmd_sp->handle;
+ abt_iocb->u.abt.req_que_no = cpu_to_le16(cmd_sp->qpair->req->id);
+
+- sp->done = qla24xx_abort_sp_done;
+-
+ ql_dbg(ql_dbg_async, vha, 0x507c,
+ "Abort command issued - hdl=%x, type=%x\n", cmd_sp->handle,
+ cmd_sp->type);
+
+ rval = qla2x00_start_sp(sp);
+ if (rval != QLA_SUCCESS) {
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return rval;
+ }
+
+@@ -191,7 +215,8 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+ wait_for_completion(&abt_iocb->u.abt.comp);
+ rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
+ QLA_SUCCESS : QLA_ERR_FROM_FW;
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ return rval;
+@@ -286,10 +311,13 @@ static void qla2x00_async_login_sp_done(srb_t *sp, int res)
+ ea.iop[0] = lio->u.logio.iop[0];
+ ea.iop[1] = lio->u.logio.iop[1];
+ ea.sp = sp;
++ if (res)
++ ea.data[0] = MBS_COMMAND_ERROR;
+ qla24xx_handle_plogi_done_event(vha, &ea);
+ }
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int
+@@ -308,6 +336,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
+ return rval;
+ }
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+@@ -320,17 +349,15 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
+ sp->name = "login";
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_login_sp_done);
+
+ lio = &sp->u.iocb_cmd;
+- lio->timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+-
+- sp->done = qla2x00_async_login_sp_done;
+ if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) {
+ lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY;
+ } else {
+ if (vha->hw->flags.edif_enabled &&
+- vha->e_dbell.db_flags & EDB_ACTIVE) {
++ DBELL_ACTIVE(vha)) {
+ lio->u.logio.flags |=
+ (SRB_LOGIN_FCSP | SRB_LOGIN_SKIP_PRLI);
+ ql_dbg(ql_dbg_disc, vha, 0x2072,
+@@ -359,7 +386,8 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ fcport->flags &= ~FCF_ASYNC_SENT;
+ done:
+ fcport->flags &= ~FCF_ASYNC_ACTIVE;
+@@ -371,29 +399,26 @@ static void qla2x00_async_logout_sp_done(srb_t *sp, int res)
+ sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ sp->fcport->login_gen++;
+ qlt_logo_completion_handler(sp->fcport, sp->u.iocb_cmd.u.logio.data[0]);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int
+ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
+ {
+ srb_t *sp;
+- struct srb_iocb *lio;
+ int rval = QLA_FUNCTION_FAILED;
+
+ fcport->flags |= FCF_ASYNC_SENT;
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_LOGOUT_CMD;
+ sp->name = "logout";
+-
+- lio = &sp->u.iocb_cmd;
+- lio->timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+-
+- sp->done = qla2x00_async_logout_sp_done;
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_logout_sp_done),
+
+ ql_dbg(ql_dbg_disc, vha, 0x2070,
+ "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC explicit %d.\n",
+@@ -407,7 +432,8 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ return rval;
+@@ -433,29 +459,26 @@ static void qla2x00_async_prlo_sp_done(srb_t *sp, int res)
+ if (!test_bit(UNLOADING, &vha->dpc_flags))
+ qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport,
+ lio->u.logio.data);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int
+ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport)
+ {
+ srb_t *sp;
+- struct srb_iocb *lio;
+ int rval;
+
+ rval = QLA_FUNCTION_FAILED;
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_PRLO_CMD;
+ sp->name = "prlo";
+-
+- lio = &sp->u.iocb_cmd;
+- lio->timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+-
+- sp->done = qla2x00_async_prlo_sp_done;
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_prlo_sp_done);
+
+ ql_dbg(ql_dbg_disc, vha, 0x2070,
+ "Async-prlo - hdl=%x loop-id=%x portid=%02x%02x%02x.\n",
+@@ -469,7 +492,8 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ fcport->flags &= ~FCF_ASYNC_ACTIVE;
+ return rval;
+@@ -479,6 +503,7 @@ static
+ void qla24xx_handle_adisc_event(scsi_qla_host_t *vha, struct event_arg *ea)
+ {
+ struct fc_port *fcport = ea->fcport;
++ unsigned long flags;
+
+ ql_dbg(ql_dbg_disc, vha, 0x20d2,
+ "%s %8phC DS %d LS %d rc %d login %d|%d rscn %d|%d lid %d\n",
+@@ -493,9 +518,15 @@ void qla24xx_handle_adisc_event(scsi_qla_host_t *vha, struct event_arg *ea)
+ ql_dbg(ql_dbg_disc, vha, 0x2066,
+ "%s %8phC: adisc fail: post delete\n",
+ __func__, ea->fcport->port_name);
++
++ spin_lock_irqsave(&vha->work_lock, flags);
+ /* deleted = 0 & logout_on_delete = force fw cleanup */
+- fcport->deleted = 0;
++ if (fcport->deleted == QLA_SESS_DELETED)
++ fcport->deleted = 0;
++
+ fcport->logout_on_delete = 1;
++ spin_unlock_irqrestore(&vha->work_lock, flags);
++
+ qlt_schedule_sess_for_deletion(ea->fcport);
+ return;
+ }
+@@ -552,10 +583,12 @@ static void qla2x00_async_adisc_sp_done(srb_t *sp, int res)
+ ea.iop[1] = lio->u.logio.iop[1];
+ ea.fcport = sp->fcport;
+ ea.sp = sp;
++ if (res)
++ ea.data[0] = MBS_COMMAND_ERROR;
+
+ qla24xx_handle_adisc_event(vha, &ea);
+-
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int
+@@ -566,26 +599,34 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
+ struct srb_iocb *lio;
+ int rval = QLA_FUNCTION_FAILED;
+
++ if (IS_SESSION_DELETED(fcport)) {
++ ql_log(ql_log_warn, vha, 0xffff,
++ "%s: %8phC is being delete - not sending command.\n",
++ __func__, fcport->port_name);
++ fcport->flags &= ~FCF_ASYNC_ACTIVE;
++ return rval;
++ }
++
+ if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
+ return rval;
+
+ fcport->flags |= FCF_ASYNC_SENT;
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_ADISC_CMD;
+ sp->name = "adisc";
+-
+- lio = &sp->u.iocb_cmd;
+- lio->timeout = qla2x00_async_iocb_timeout;
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_adisc_sp_done);
+
+- sp->done = qla2x00_async_adisc_sp_done;
+- if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
++ if (data[1] & QLA_LOGIO_LOGIN_RETRIED) {
++ lio = &sp->u.iocb_cmd;
+ lio->u.logio.flags |= SRB_LOGIN_RETRIED;
++ }
+
+ ql_dbg(ql_dbg_disc, vha, 0x206f,
+ "Async-adisc - hdl=%x loopid=%x portid=%06x %8phC.\n",
+@@ -598,7 +639,8 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ qla2x00_post_async_adisc_work(vha, fcport, data);
+@@ -862,7 +904,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
+ break;
+ case DSC_LS_PLOGI_COMP:
+ if (vha->hw->flags.edif_enabled &&
+- vha->e_dbell.db_flags & EDB_ACTIVE) {
++ DBELL_ACTIVE(vha)) {
+ /* check to see if App support secure or not */
+ qla24xx_post_gpdb_work(vha, fcport, 0);
+ break;
+@@ -964,6 +1006,9 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
+ set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+ }
+ break;
++ case ISP_CFG_NL:
++ qla24xx_fcport_handle_login(vha, fcport);
++ break;
+ default:
+ break;
+ }
+@@ -987,8 +1032,6 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res)
+ sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1],
+ sp->u.iocb_cmd.u.mbx.in_mb[2]);
+
+- if (res == QLA_FUNCTION_TIMEOUT)
+- return;
+
+ sp->fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE);
+ memset(&ea, 0, sizeof(ea));
+@@ -1026,8 +1069,8 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res)
+ spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+ list_for_each_entry_safe(fcport, tf, &h, gnl_entry) {
+- list_del_init(&fcport->gnl_entry);
+ spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
++ list_del_init(&fcport->gnl_entry);
+ fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+ ea.fcport = fcport;
+@@ -1081,19 +1124,19 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res)
+ }
+ spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
+ {
+ srb_t *sp;
+- struct srb_iocb *mbx;
+ int rval = QLA_FUNCTION_FAILED;
+ unsigned long flags;
+ u16 *mb;
+
+ if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT))
+- return rval;
++ goto done;
+
+ ql_dbg(ql_dbg_disc, vha, 0x20d9,
+ "Async-gnlist WWPN %8phC \n", fcport->port_name);
+@@ -1112,6 +1155,7 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
+ vha->gnl.sent = 1;
+ spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+@@ -1120,10 +1164,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
+ sp->name = "gnlist";
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
+-
+- mbx = &sp->u.iocb_cmd;
+- mbx->timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla24xx_async_gnl_sp_done);
+
+ mb = sp->u.iocb_cmd.u.mbx.out_mb;
+ mb[0] = MBC_PORT_NODE_NAME_LIST;
+@@ -1135,8 +1177,6 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
+ mb[8] = vha->gnl.size;
+ mb[9] = vha->vp_idx;
+
+- sp->done = qla24xx_async_gnl_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0x20da,
+ "Async-%s - OUT WWPN %8phC hndl %x\n",
+ sp->name, fcport->port_name, sp->handle);
+@@ -1148,9 +1188,11 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
++ fcport->flags &= ~(FCF_ASYNC_SENT);
+ done:
+- fcport->flags &= ~(FCF_ASYNC_ACTIVE | FCF_ASYNC_SENT);
++ fcport->flags &= ~(FCF_ASYNC_ACTIVE);
+ return rval;
+ }
+
+@@ -1194,7 +1236,7 @@ done:
+ dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in,
+ sp->u.iocb_cmd.u.mbx.in_dma);
+
+- sp->free(sp);
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+@@ -1235,11 +1277,13 @@ static void qla2x00_async_prli_sp_done(srb_t *sp, int res)
+ ea.sp = sp;
+ if (res == QLA_OS_TIMER_EXPIRED)
+ ea.data[0] = QLA_OS_TIMER_EXPIRED;
++ else if (res)
++ ea.data[0] = MBS_COMMAND_ERROR;
+
+ qla24xx_handle_prli_done_event(vha, &ea);
+ }
+
+- sp->free(sp);
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int
+@@ -1272,12 +1316,10 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport)
+
+ sp->type = SRB_PRLI_CMD;
+ sp->name = "prli";
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_prli_sp_done);
+
+ lio = &sp->u.iocb_cmd;
+- lio->timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+-
+- sp->done = qla2x00_async_prli_sp_done;
+ lio->u.logio.flags = 0;
+
+ if (NVME_TARGET(vha->hw, fcport))
+@@ -1299,7 +1341,8 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport)
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ fcport->flags &= ~FCF_ASYNC_SENT;
+ return rval;
+ }
+@@ -1328,14 +1371,21 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+ struct port_database_24xx *pd;
+ struct qla_hw_data *ha = vha->hw;
+
+- if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) ||
+- fcport->loop_id == FC_NO_LOOP_ID) {
++ if (IS_SESSION_DELETED(fcport)) {
+ ql_log(ql_log_warn, vha, 0xffff,
+- "%s: %8phC - not sending command.\n",
+- __func__, fcport->port_name);
++ "%s: %8phC is being delete - not sending command.\n",
++ __func__, fcport->port_name);
++ fcport->flags &= ~FCF_ASYNC_ACTIVE;
+ return rval;
+ }
+
++ if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) {
++ ql_log(ql_log_warn, vha, 0xffff,
++ "%s: %8phC online %d flags %x - not sending command.\n",
++ __func__, fcport->port_name, vha->flags.online, fcport->flags);
++ goto done;
++ }
++
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+@@ -1347,10 +1397,8 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+ sp->name = "gpdb";
+ sp->gen1 = fcport->rscn_gen;
+ sp->gen2 = fcport->login_gen;
+-
+- mbx = &sp->u.iocb_cmd;
+- mbx->timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla24xx_async_gpdb_sp_done);
+
+ pd = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+ if (pd == NULL) {
+@@ -1369,11 +1417,10 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+ mb[9] = vha->vp_idx;
+ mb[10] = opt;
+
+- mbx->u.mbx.in = pd;
++ mbx = &sp->u.iocb_cmd;
++ mbx->u.mbx.in = (void *)pd;
+ mbx->u.mbx.in_dma = pd_dma;
+
+- sp->done = qla24xx_async_gpdb_sp_done;
+-
+ ql_dbg(ql_dbg_disc, vha, 0x20dc,
+ "Async-%s %8phC hndl %x opt %x\n",
+ sp->name, fcport->port_name, sp->handle, opt);
+@@ -1387,7 +1434,7 @@ done_free_sp:
+ if (pd)
+ dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+
+- sp->free(sp);
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ fcport->flags &= ~FCF_ASYNC_SENT;
+ done:
+ fcport->flags &= ~FCF_ASYNC_ACTIVE;
+@@ -1402,7 +1449,6 @@ void __qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
+
+ spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+ ea->fcport->login_gen++;
+- ea->fcport->deleted = 0;
+ ea->fcport->logout_on_delete = 1;
+
+ if (!ea->fcport->login_succ && !IS_SW_RESV_ADDR(ea->fcport->d_id)) {
+@@ -1454,7 +1500,7 @@ static int qla_chk_secure_login(scsi_qla_host_t *vha, fc_port_t *fcport,
+ qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
+ fcport->d_id.b24);
+
+- if (vha->e_dbell.db_flags == EDB_ACTIVE) {
++ if (DBELL_ACTIVE(vha)) {
+ ql_dbg(ql_dbg_disc, vha, 0x20ef,
+ "%s %d %8phC EDIF: post DB_AUTH: AUTH needed\n",
+ __func__, __LINE__, fcport->port_name);
+@@ -1559,6 +1605,11 @@ static void qla_chk_n2n_b4_login(struct scsi_qla_host *vha, fc_port_t *fcport)
+ u8 login = 0;
+ int rc;
+
++ ql_dbg(ql_dbg_disc, vha, 0x307b,
++ "%s %8phC DS %d LS %d lid %d retries=%d\n",
++ __func__, fcport->port_name, fcport->disc_state,
++ fcport->fw_login_state, fcport->loop_id, fcport->login_retry);
++
+ if (qla_tgt_mode_enabled(vha))
+ return;
+
+@@ -1617,7 +1668,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
+ fcport->login_gen, fcport->loop_id, fcport->scan_state,
+ fcport->fc4_type);
+
+- if (fcport->scan_state != QLA_FCPORT_FOUND)
++ if (fcport->scan_state != QLA_FCPORT_FOUND ||
++ fcport->disc_state == DSC_DELETE_PEND)
+ return 0;
+
+ if ((fcport->loop_id != FC_NO_LOOP_ID) &&
+@@ -1638,7 +1690,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
+ if (vha->host->active_mode == MODE_TARGET && !N2N_TOPO(vha->hw))
+ return 0;
+
+- if (fcport->flags & FCF_ASYNC_SENT) {
++ if (fcport->flags & (FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE)) {
+ set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+ return 0;
+ }
+@@ -1735,8 +1787,16 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
+ break;
+
+ case DSC_LOGIN_PEND:
+- if (fcport->fw_login_state == DSC_LS_PLOGI_COMP)
++ if (vha->hw->flags.edif_enabled)
++ break;
++
++ if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
++ ql_dbg(ql_dbg_disc, vha, 0x2118,
++ "%s %d %8phC post %s PRLI\n",
++ __func__, __LINE__, fcport->port_name,
++ NVME_TARGET(vha->hw, fcport) ? "NVME" : "FC");
+ qla24xx_post_prli_work(vha, fcport);
++ }
+ break;
+
+ case DSC_UPD_FCPORT:
+@@ -1786,16 +1846,76 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+ fc_port_t *fcport;
+ unsigned long flags;
+
+- fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+- if (fcport) {
+- if (fcport->flags & FCF_FCP2_DEVICE) {
+- ql_dbg(ql_dbg_disc, vha, 0x2115,
+- "Delaying session delete for FCP2 portid=%06x %8phC ",
+- fcport->d_id.b24, fcport->port_name);
+- return;
++ switch (ea->id.b.rsvd_1) {
++ case RSCN_PORT_ADDR:
++ fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
++ if (fcport) {
++ if (fcport->flags & FCF_FCP2_DEVICE &&
++ atomic_read(&fcport->state) == FCS_ONLINE) {
++ ql_dbg(ql_dbg_disc, vha, 0x2115,
++ "Delaying session delete for FCP2 portid=%06x %8phC ",
++ fcport->d_id.b24, fcport->port_name);
++ return;
++ }
++
++ if (vha->hw->flags.edif_enabled && DBELL_ACTIVE(vha)) {
++ /*
++ * On ipsec start by remote port, Target port
++ * may use RSCN to trigger initiator to
++ * relogin. If driver is already in the
++ * process of a relogin, then ignore the RSCN
++ * and allow the current relogin to continue.
++ * This reduces thrashing of the connection.
++ */
++ if (atomic_read(&fcport->state) == FCS_ONLINE) {
++ /*
++ * If state = online, then set scan_needed=1 to do relogin.
++ * Otherwise we're already in the middle of a relogin
++ */
++ fcport->scan_needed = 1;
++ fcport->rscn_gen++;
++ }
++ } else {
++ fcport->scan_needed = 1;
++ fcport->rscn_gen++;
++ }
++ }
++ break;
++ case RSCN_AREA_ADDR:
++ list_for_each_entry(fcport, &vha->vp_fcports, list) {
++ if (fcport->flags & FCF_FCP2_DEVICE &&
++ atomic_read(&fcport->state) == FCS_ONLINE)
++ continue;
++
++ if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) {
++ fcport->scan_needed = 1;
++ fcport->rscn_gen++;
++ }
++ }
++ break;
++ case RSCN_DOM_ADDR:
++ list_for_each_entry(fcport, &vha->vp_fcports, list) {
++ if (fcport->flags & FCF_FCP2_DEVICE &&
++ atomic_read(&fcport->state) == FCS_ONLINE)
++ continue;
++
++ if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) {
++ fcport->scan_needed = 1;
++ fcport->rscn_gen++;
++ }
+ }
+- fcport->scan_needed = 1;
+- fcport->rscn_gen++;
++ break;
++ case RSCN_FAB_ADDR:
++ default:
++ list_for_each_entry(fcport, &vha->vp_fcports, list) {
++ if (fcport->flags & FCF_FCP2_DEVICE &&
++ atomic_read(&fcport->state) == FCS_ONLINE)
++ continue;
++
++ fcport->scan_needed = 1;
++ fcport->rscn_gen++;
++ }
++ break;
+ }
+
+ spin_lock_irqsave(&vha->work_lock, flags);
+@@ -1885,12 +2005,17 @@ qla2x00_tmf_iocb_timeout(void *data)
+ int rc, h;
+ unsigned long flags;
+
+- rc = qla24xx_async_abort_cmd(sp, false);
++ if (sp->type == SRB_MARKER)
++ rc = QLA_FUNCTION_FAILED;
++ else
++ rc = qla24xx_async_abort_cmd(sp, false);
++
+ if (rc) {
+ spin_lock_irqsave(sp->qpair->qp_lock_ptr, flags);
+ for (h = 1; h < sp->qpair->req->num_outstanding_cmds; h++) {
+ if (sp->qpair->req->outstanding_cmds[h] == sp) {
+ sp->qpair->req->outstanding_cmds[h] = NULL;
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ break;
+ }
+ }
+@@ -1901,45 +2026,176 @@ qla2x00_tmf_iocb_timeout(void *data)
+ }
+ }
+
+-static void qla2x00_tmf_sp_done(srb_t *sp, int res)
++static void qla_marker_sp_done(srb_t *sp, int res)
+ {
+ struct srb_iocb *tmf = &sp->u.iocb_cmd;
+
++ if (res != QLA_SUCCESS)
++ ql_dbg(ql_dbg_taskm, sp->vha, 0x8004,
++ "Async-marker fail hdl=%x portid=%06x ctrl=%x lun=%lld qp=%d.\n",
++ sp->handle, sp->fcport->d_id.b24, sp->u.iocb_cmd.u.tmf.flags,
++ sp->u.iocb_cmd.u.tmf.lun, sp->qpair->id);
++
++ sp->u.iocb_cmd.u.tmf.data = res;
+ complete(&tmf->u.tmf.comp);
+ }
+
+-int
+-qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
+- uint32_t tag)
++#define START_SP_W_RETRIES(_sp, _rval, _chip_gen, _login_gen) \
++{\
++ int cnt = 5; \
++ do { \
++ if (_chip_gen != sp->vha->hw->chip_reset || _login_gen != sp->fcport->login_gen) {\
++ _rval = EINVAL; \
++ break; \
++ } \
++ _rval = qla2x00_start_sp(_sp); \
++ if (_rval == EAGAIN) \
++ msleep(1); \
++ else \
++ break; \
++ cnt--; \
++ } while (cnt); \
++}
++
++/**
++ * qla26xx_marker: send marker IOCB and wait for the completion of it.
++ * @arg: pointer to argument list.
++ * It is assume caller will provide an fcport pointer and modifier
++ */
++static int
++qla26xx_marker(struct tmf_arg *arg)
+ {
+- struct scsi_qla_host *vha = fcport->vha;
++ struct scsi_qla_host *vha = arg->vha;
+ struct srb_iocb *tm_iocb;
+ srb_t *sp;
+ int rval = QLA_FUNCTION_FAILED;
++ fc_port_t *fcport = arg->fcport;
++ u32 chip_gen, login_gen;
+
+- sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
++ if (TMF_NOT_READY(arg->fcport)) {
++ ql_dbg(ql_dbg_taskm, vha, 0x8039,
++ "FC port not ready for marker loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n",
++ fcport->loop_id, fcport->d_id.b24,
++ arg->modifier, arg->lun, arg->qpair->id);
++ return QLA_SUSPENDED;
++ }
++
++ chip_gen = vha->hw->chip_reset;
++ login_gen = fcport->login_gen;
++
++ /* ref: INIT */
++ sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
++ sp->type = SRB_MARKER;
++ sp->name = "marker";
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), qla_marker_sp_done);
++ sp->u.iocb_cmd.timeout = qla2x00_tmf_iocb_timeout;
++
+ tm_iocb = &sp->u.iocb_cmd;
++ init_completion(&tm_iocb->u.tmf.comp);
++ tm_iocb->u.tmf.modifier = arg->modifier;
++ tm_iocb->u.tmf.lun = arg->lun;
++ tm_iocb->u.tmf.loop_id = fcport->loop_id;
++ tm_iocb->u.tmf.vp_index = vha->vp_idx;
++
++ START_SP_W_RETRIES(sp, rval, chip_gen, login_gen);
++
++ ql_dbg(ql_dbg_taskm, vha, 0x8006,
++ "Async-marker hdl=%x loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d rval %d.\n",
++ sp->handle, fcport->loop_id, fcport->d_id.b24,
++ arg->modifier, arg->lun, sp->qpair->id, rval);
++
++ if (rval != QLA_SUCCESS) {
++ ql_log(ql_log_warn, vha, 0x8031,
++ "Marker IOCB send failure (%x).\n", rval);
++ goto done_free_sp;
++ }
++
++ wait_for_completion(&tm_iocb->u.tmf.comp);
++ rval = tm_iocb->u.tmf.data;
++
++ if (rval != QLA_SUCCESS) {
++ ql_log(ql_log_warn, vha, 0x8019,
++ "Marker failed hdl=%x loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d rval %d.\n",
++ sp->handle, fcport->loop_id, fcport->d_id.b24,
++ arg->modifier, arg->lun, sp->qpair->id, rval);
++ }
++
++done_free_sp:
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
++done:
++ return rval;
++}
++
++static void qla2x00_tmf_sp_done(srb_t *sp, int res)
++{
++ struct srb_iocb *tmf = &sp->u.iocb_cmd;
++
++ if (res)
++ tmf->u.tmf.data = res;
++ complete(&tmf->u.tmf.comp);
++}
++
++static int qla_tmf_wait(struct tmf_arg *arg)
++{
++ /* there are only 2 types of error handling that reaches here, lun or target reset */
++ if (arg->flags & (TCF_LUN_RESET | TCF_ABORT_TASK_SET | TCF_CLEAR_TASK_SET))
++ return qla2x00_eh_wait_for_pending_commands(arg->vha,
++ arg->fcport->d_id.b24, arg->lun, WAIT_LUN);
++ else
++ return qla2x00_eh_wait_for_pending_commands(arg->vha,
++ arg->fcport->d_id.b24, arg->lun, WAIT_TARGET);
++}
++
++static int
++__qla2x00_async_tm_cmd(struct tmf_arg *arg)
++{
++ struct scsi_qla_host *vha = arg->vha;
++ struct srb_iocb *tm_iocb;
++ srb_t *sp;
++ int rval = QLA_FUNCTION_FAILED;
++ fc_port_t *fcport = arg->fcport;
++ u32 chip_gen, login_gen;
++ u64 jif;
++
++ if (TMF_NOT_READY(arg->fcport)) {
++ ql_dbg(ql_dbg_taskm, vha, 0x8032,
++ "FC port not ready for TM command loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n",
++ fcport->loop_id, fcport->d_id.b24,
++ arg->modifier, arg->lun, arg->qpair->id);
++ return QLA_SUSPENDED;
++ }
++
++ chip_gen = vha->hw->chip_reset;
++ login_gen = fcport->login_gen;
++
++ /* ref: INIT */
++ sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL);
++ if (!sp)
++ goto done;
++
++ qla_vha_mark_busy(vha);
+ sp->type = SRB_TM_CMD;
+ sp->name = "tmf";
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha),
++ qla2x00_tmf_sp_done);
++ sp->u.iocb_cmd.timeout = qla2x00_tmf_iocb_timeout;
+
+- tm_iocb->timeout = qla2x00_tmf_iocb_timeout;
++ tm_iocb = &sp->u.iocb_cmd;
+ init_completion(&tm_iocb->u.tmf.comp);
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha));
++ tm_iocb->u.tmf.flags = arg->flags;
++ tm_iocb->u.tmf.lun = arg->lun;
+
+- tm_iocb->u.tmf.flags = flags;
+- tm_iocb->u.tmf.lun = lun;
+- tm_iocb->u.tmf.data = tag;
+- sp->done = qla2x00_tmf_sp_done;
++ START_SP_W_RETRIES(sp, rval, chip_gen, login_gen);
+
+ ql_dbg(ql_dbg_taskm, vha, 0x802f,
+- "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x.\n",
+- sp->handle, fcport->loop_id, fcport->d_id.b.domain,
+- fcport->d_id.b.area, fcport->d_id.b.al_pa);
++ "Async-tmf hdl=%x loop-id=%x portid=%06x ctrl=%x lun=%lld qp=%d rval=%x.\n",
++ sp->handle, fcport->loop_id, fcport->d_id.b24,
++ arg->flags, arg->lun, sp->qpair->id, rval);
+
+- rval = qla2x00_start_sp(sp);
+ if (rval != QLA_SUCCESS)
+ goto done_free_sp;
+ wait_for_completion(&tm_iocb->u.tmf.comp);
+@@ -1952,22 +2208,130 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
+ }
+
+ if (!test_bit(UNLOADING, &vha->dpc_flags) && !IS_QLAFX00(vha->hw)) {
+- flags = tm_iocb->u.tmf.flags;
+- lun = (uint16_t)tm_iocb->u.tmf.lun;
++ jif = jiffies;
++ if (qla_tmf_wait(arg)) {
++ ql_log(ql_log_info, vha, 0x803e,
++ "Waited %u ms Nexus=%ld:%06x:%llu.\n",
++ jiffies_to_msecs(jiffies - jif), vha->host_no,
++ fcport->d_id.b24, arg->lun);
++ }
+
+- /* Issue Marker IOCB */
+- qla2x00_marker(vha, vha->hw->base_qpair,
+- fcport->loop_id, lun,
+- flags == TCF_LUN_RESET ? MK_SYNC_ID_LUN : MK_SYNC_ID);
++ if (chip_gen == vha->hw->chip_reset && login_gen == fcport->login_gen) {
++ rval = qla26xx_marker(arg);
++ } else {
++ ql_log(ql_log_info, vha, 0x803e,
++ "Skip Marker due to disruption. Nexus=%ld:%06x:%llu.\n",
++ vha->host_no, fcport->d_id.b24, arg->lun);
++ rval = QLA_FUNCTION_FAILED;
++ }
+ }
++ if (tm_iocb->u.tmf.data)
++ rval = tm_iocb->u.tmf.data;
+
+ done_free_sp:
+- sp->free(sp);
+- fcport->flags &= ~FCF_ASYNC_SENT;
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+
++static void qla_put_tmf(struct tmf_arg *arg)
++{
++ struct scsi_qla_host *vha = arg->vha;
++ struct qla_hw_data *ha = vha->hw;
++ unsigned long flags;
++
++ spin_lock_irqsave(&ha->tgt.sess_lock, flags);
++ ha->active_tmf--;
++ list_del(&arg->tmf_elem);
++ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
++}
++
++static
++int qla_get_tmf(struct tmf_arg *arg)
++{
++ struct scsi_qla_host *vha = arg->vha;
++ struct qla_hw_data *ha = vha->hw;
++ unsigned long flags;
++ fc_port_t *fcport = arg->fcport;
++ int rc = 0;
++ struct tmf_arg *t;
++
++ spin_lock_irqsave(&ha->tgt.sess_lock, flags);
++ list_for_each_entry(t, &ha->tmf_active, tmf_elem) {
++ if (t->fcport == arg->fcport && t->lun == arg->lun) {
++ /* reject duplicate TMF */
++ ql_log(ql_log_warn, vha, 0x802c,
++ "found duplicate TMF. Nexus=%ld:%06x:%llu.\n",
++ vha->host_no, fcport->d_id.b24, arg->lun);
++ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
++ return -EINVAL;
++ }
++ }
++
++ list_add_tail(&arg->tmf_elem, &ha->tmf_pending);
++ while (ha->active_tmf >= MAX_ACTIVE_TMF) {
++ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
++
++ msleep(1);
++
++ spin_lock_irqsave(&ha->tgt.sess_lock, flags);
++ if (TMF_NOT_READY(fcport)) {
++ ql_log(ql_log_warn, vha, 0x802c,
++ "Unable to acquire TM resource due to disruption.\n");
++ rc = EIO;
++ break;
++ }
++ if (ha->active_tmf < MAX_ACTIVE_TMF &&
++ list_is_first(&arg->tmf_elem, &ha->tmf_pending))
++ break;
++ }
++
++ list_del(&arg->tmf_elem);
++
++ if (!rc) {
++ ha->active_tmf++;
++ list_add_tail(&arg->tmf_elem, &ha->tmf_active);
++ }
++
++ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
++
++ return rc;
++}
++
++int
++qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun,
++ uint32_t tag)
++{
++ struct scsi_qla_host *vha = fcport->vha;
++ struct tmf_arg a;
++ int rval = QLA_SUCCESS;
++
++ if (TMF_NOT_READY(fcport))
++ return QLA_SUSPENDED;
++
++ a.vha = fcport->vha;
++ a.fcport = fcport;
++ a.lun = lun;
++ a.flags = flags;
++ INIT_LIST_HEAD(&a.tmf_elem);
++
++ if (flags & (TCF_LUN_RESET|TCF_ABORT_TASK_SET|TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) {
++ a.modifier = MK_SYNC_ID_LUN;
++ } else {
++ a.modifier = MK_SYNC_ID;
++ }
++
++ if (qla_get_tmf(&a))
++ return QLA_FUNCTION_FAILED;
++
++ a.qpair = vha->hw->base_qpair;
++ rval = __qla2x00_async_tm_cmd(&a);
++
++ qla_put_tmf(&a);
++ return rval;
++}
++
+ int
+ qla24xx_async_abort_command(srb_t *sp)
+ {
+@@ -2021,13 +2385,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
+ qla24xx_post_gpdb_work(vha, ea->fcport, 0);
+ break;
+ default:
+- if ((ea->iop[0] == LSC_SCODE_ELS_REJECT) &&
+- (ea->iop[1] == 0x50000)) { /* reson 5=busy expl:0x0 */
+- set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+- ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
+- break;
+- }
+-
+ sp = ea->sp;
+ ql_dbg(ql_dbg_disc, vha, 0x2118,
+ "%s %d %8phC priority %s, fc4type %x prev try %s\n",
+@@ -2047,6 +2404,13 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
+ }
+
+ if (N2N_TOPO(vha->hw)) {
++ if (ea->fcport->n2n_link_reset_cnt ==
++ vha->hw->login_retry_count &&
++ ea->fcport->flags & FCF_FCSP_DEVICE) {
++ /* remote authentication app just started */
++ ea->fcport->n2n_link_reset_cnt = 0;
++ }
++
+ if (ea->fcport->n2n_link_reset_cnt <
+ vha->hw->login_retry_count) {
+ ea->fcport->n2n_link_reset_cnt++;
+@@ -2171,12 +2535,7 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
+ ql_dbg(ql_dbg_disc, vha, 0x20eb, "%s %d %8phC cmd error %x\n",
+ __func__, __LINE__, ea->fcport->port_name, ea->data[1]);
+
+- ea->fcport->flags &= ~FCF_ASYNC_SENT;
+- qla2x00_set_fcport_disc_state(ea->fcport, DSC_LOGIN_FAILED);
+- if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED)
+- set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+- else
+- qla2x00_mark_device_lost(vha, ea->fcport, 1);
++ qlt_schedule_sess_for_deletion(ea->fcport);
+ break;
+ case MBS_LOOP_ID_USED:
+ /* data[1] = IO PARAM 1 = nport ID */
+@@ -3419,6 +3778,14 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
+ struct rsp_que *rsp = ha->rsp_q_map[0];
+ struct qla2xxx_fw_dump *fw_dump;
+
++ if (ha->fw_dump) {
++ ql_dbg(ql_dbg_init, vha, 0x00bd,
++ "Firmware dump already allocated.\n");
++ return;
++ }
++
++ ha->fw_dumped = 0;
++ ha->fw_dump_cap_flags = 0;
+ dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0;
+ req_q_size = rsp_q_size = 0;
+
+@@ -3429,7 +3796,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
+ mem_size = (ha->fw_memory_size - 0x11000 + 1) *
+ sizeof(uint16_t);
+ } else if (IS_FWI2_CAPABLE(ha)) {
+- if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
++ if (IS_QLA83XX(ha))
+ fixed_size = offsetof(struct qla83xx_fw_dump, ext_mem);
+ else if (IS_QLA81XX(ha))
+ fixed_size = offsetof(struct qla81xx_fw_dump, ext_mem);
+@@ -3441,8 +3808,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
+ mem_size = (ha->fw_memory_size - 0x100000 + 1) *
+ sizeof(uint32_t);
+ if (ha->mqenable) {
+- if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha) &&
+- !IS_QLA28XX(ha))
++ if (!IS_QLA83XX(ha))
+ mq_size = sizeof(struct qla2xxx_mq_chain);
+ /*
+ * Allocate maximum buffer size for all queues - Q0.
+@@ -3818,29 +4184,61 @@ out:
+ return ha->flags.lr_detected;
+ }
+
+-void qla_init_iocb_limit(scsi_qla_host_t *vha)
++static void __qla_adjust_iocb_limit(struct qla_qpair *qpair)
+ {
+- u16 i, num_qps;
+- u32 limit;
+- struct qla_hw_data *ha = vha->hw;
++ u8 num_qps;
++ u16 limit;
++ struct qla_hw_data *ha = qpair->vha->hw;
+
+ num_qps = ha->num_qpairs + 1;
+ limit = (ha->orig_fw_iocb_count * QLA_IOCB_PCT_LIMIT) / 100;
+
+- ha->base_qpair->fwres.iocbs_total = ha->orig_fw_iocb_count;
+- ha->base_qpair->fwres.iocbs_limit = limit;
+- ha->base_qpair->fwres.iocbs_qp_limit = limit / num_qps;
++ qpair->fwres.iocbs_total = ha->orig_fw_iocb_count;
++ qpair->fwres.iocbs_limit = limit;
++ qpair->fwres.iocbs_qp_limit = limit / num_qps;
++
++ qpair->fwres.exch_total = ha->orig_fw_xcb_count;
++ qpair->fwres.exch_limit = (ha->orig_fw_xcb_count *
++ QLA_IOCB_PCT_LIMIT) / 100;
++}
++
++void qla_init_iocb_limit(scsi_qla_host_t *vha)
++{
++ u8 i;
++ struct qla_hw_data *ha = vha->hw;
++
++ __qla_adjust_iocb_limit(ha->base_qpair);
+ ha->base_qpair->fwres.iocbs_used = 0;
++ ha->base_qpair->fwres.exch_used = 0;
++
+ for (i = 0; i < ha->max_qpairs; i++) {
+ if (ha->queue_pair_map[i]) {
+- ha->queue_pair_map[i]->fwres.iocbs_total =
+- ha->orig_fw_iocb_count;
+- ha->queue_pair_map[i]->fwres.iocbs_limit = limit;
+- ha->queue_pair_map[i]->fwres.iocbs_qp_limit =
+- limit / num_qps;
++ __qla_adjust_iocb_limit(ha->queue_pair_map[i]);
+ ha->queue_pair_map[i]->fwres.iocbs_used = 0;
++ ha->queue_pair_map[i]->fwres.exch_used = 0;
+ }
+ }
++
++ ha->fwres.iocb_total = ha->orig_fw_iocb_count;
++ ha->fwres.iocb_limit = (ha->orig_fw_iocb_count * QLA_IOCB_PCT_LIMIT) / 100;
++ ha->fwres.exch_total = ha->orig_fw_xcb_count;
++ ha->fwres.exch_limit = (ha->orig_fw_xcb_count * QLA_IOCB_PCT_LIMIT) / 100;
++
++ atomic_set(&ha->fwres.iocb_used, 0);
++ atomic_set(&ha->fwres.exch_used, 0);
++}
++
++void qla_adjust_iocb_limit(scsi_qla_host_t *vha)
++{
++ u8 i;
++ struct qla_hw_data *ha = vha->hw;
++
++ __qla_adjust_iocb_limit(ha->base_qpair);
++
++ for (i = 0; i < ha->max_qpairs; i++) {
++ if (ha->queue_pair_map[i])
++ __qla_adjust_iocb_limit(ha->queue_pair_map[i]);
++ }
+ }
+
+ /**
+@@ -4003,8 +4401,7 @@ enable_82xx_npiv:
+ ha->fw_major_version, ha->fw_minor_version,
+ ha->fw_subminor_version);
+
+- if (IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
+- IS_QLA28XX(ha)) {
++ if (IS_QLA83XX(ha)) {
+ ha->flags.fac_supported = 0;
+ rval = QLA_SUCCESS;
+ }
+@@ -4187,7 +4584,7 @@ qla24xx_update_fw_options(scsi_qla_host_t *vha)
+ * fw shal not send PRLI after PLOGI Acc
+ */
+ if (ha->flags.edif_enabled &&
+- vha->e_dbell.db_flags & EDB_ACTIVE) {
++ DBELL_ACTIVE(vha)) {
+ ha->fw_options[3] |= BIT_15;
+ ha->flags.n2n_fw_acc_sec = 1;
+ } else {
+@@ -4431,17 +4828,24 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
+ BIT_6) != 0;
+ ql_dbg(ql_dbg_init, vha, 0x00bc, "FA-WWPN Support: %s.\n",
+ (ha->flags.fawwpn_enabled) ? "enabled" : "disabled");
++ /* Init_cb will be reused for other command(s). Save a backup copy of port_name */
++ memcpy(ha->port_name, ha->init_cb->port_name, WWN_SIZE);
+ }
+
++ /* ELS pass through payload is limit by frame size. */
++ if (ha->flags.edif_enabled)
++ mid_init_cb->init_cb.frame_payload_size = cpu_to_le16(ELS_MAX_PAYLOAD);
++
++ QLA_FW_STARTED(ha);
+ rval = qla2x00_init_firmware(vha, ha->init_cb_size);
+ next_check:
+ if (rval) {
++ QLA_FW_STOPPED(ha);
+ ql_log(ql_log_fatal, vha, 0x00d2,
+ "Init Firmware **** FAILED ****.\n");
+ } else {
+ ql_dbg(ql_dbg_init, vha, 0x00d3,
+ "Init Firmware -- success.\n");
+- QLA_FW_STARTED(ha);
+ vha->u_ql2xexchoffld = vha->u_ql2xiniexchg = 0;
+ }
+
+@@ -5335,15 +5739,13 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
+ "LOOP READY.\n");
+ ha->flags.fw_init_done = 1;
+
+- if (ha->flags.edif_enabled &&
+- !(vha->e_dbell.db_flags & EDB_ACTIVE) &&
+- N2N_TOPO(vha->hw)) {
+- /*
+- * use port online to wake up app to get ready
+- * for authentication
+- */
+- qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE, 0);
+- }
++ /*
++ * use link up to wake up app to get ready for
++ * authentication.
++ */
++ if (ha->flags.edif_enabled && DBELL_INACTIVE(vha))
++ qla2x00_post_aen_work(vha, FCH_EVT_LINKUP,
++ ha->link_data_rate);
+
+ /*
+ * Process any ATIO queue entries that came in
+@@ -5408,6 +5810,22 @@ static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha)
+ return QLA_FUNCTION_FAILED;
+ }
+
++static void
++qla_reinitialize_link(scsi_qla_host_t *vha)
++{
++ int rval;
++
++ atomic_set(&vha->loop_state, LOOP_DOWN);
++ atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
++ rval = qla2x00_full_login_lip(vha);
++ if (rval == QLA_SUCCESS) {
++ ql_dbg(ql_dbg_disc, vha, 0xd050, "Link reinitialized\n");
++ } else {
++ ql_dbg(ql_dbg_disc, vha, 0xd051,
++ "Link reinitialization failed (%d)\n", rval);
++ }
++}
++
+ /*
+ * qla2x00_configure_local_loop
+ * Updates Fibre Channel Device Database with local loop devices.
+@@ -5459,6 +5877,19 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
+ spin_unlock_irqrestore(&vha->work_lock, flags);
+
+ if (vha->scan.scan_retry < MAX_SCAN_RETRIES) {
++ u8 loop_map_entries = 0;
++ int rc;
++
++ rc = qla2x00_get_fcal_position_map(vha, NULL,
++ &loop_map_entries);
++ if (rc == QLA_SUCCESS && loop_map_entries > 1) {
++ /*
++ * There are devices that are still not logged
++ * in. Reinitialize to give them a chance.
++ */
++ qla_reinitialize_link(vha);
++ return QLA_FUNCTION_FAILED;
++ }
+ set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+ set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+ }
+@@ -5547,6 +5978,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
+ memcpy(fcport->node_name, new_fcport->node_name,
+ WWN_SIZE);
+ fcport->scan_state = QLA_FCPORT_FOUND;
++ if (fcport->login_retry == 0) {
++ fcport->login_retry = vha->hw->login_retry_count;
++ ql_dbg(ql_dbg_disc, vha, 0x2135,
++ "Port login retry %8phN, lid 0x%04x retry cnt=%d.\n",
++ fcport->port_name, fcport->loop_id,
++ fcport->login_retry);
++ }
+ found++;
+ break;
+ }
+@@ -5739,6 +6177,8 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
+ void
+ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
+ {
++ unsigned long flags;
++
+ if (IS_SW_RESV_ADDR(fcport->d_id))
+ return;
+
+@@ -5748,7 +6188,11 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
+ qla2x00_set_fcport_disc_state(fcport, DSC_UPD_FCPORT);
+ fcport->login_retry = vha->hw->login_retry_count;
+ fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
++
++ spin_lock_irqsave(&vha->work_lock, flags);
+ fcport->deleted = 0;
++ spin_unlock_irqrestore(&vha->work_lock, flags);
++
+ if (vha->hw->current_topology == ISP_CFG_NL)
+ fcport->logout_on_delete = 0;
+ else
+@@ -7026,24 +7470,27 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
+ ha->chip_reset++;
+ ha->base_qpair->chip_reset = ha->chip_reset;
+ ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0;
++ ha->base_qpair->prev_completion_cnt = 0;
+ for (i = 0; i < ha->max_qpairs; i++) {
+ if (ha->queue_pair_map[i]) {
+ ha->queue_pair_map[i]->chip_reset =
+ ha->base_qpair->chip_reset;
+ ha->queue_pair_map[i]->cmd_cnt =
+ ha->queue_pair_map[i]->cmd_completion_cnt = 0;
++ ha->base_qpair->prev_completion_cnt = 0;
+ }
+ }
+
+ /* purge MBox commands */
+- if (atomic_read(&ha->num_pend_mbx_stage3)) {
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags)) {
+ clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
+ complete(&ha->mbx_intr_comp);
+ }
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ i = 0;
+- while (atomic_read(&ha->num_pend_mbx_stage3) ||
+- atomic_read(&ha->num_pend_mbx_stage2) ||
++ while (atomic_read(&ha->num_pend_mbx_stage2) ||
+ atomic_read(&ha->num_pend_mbx_stage1)) {
+ msleep(20);
+ i++;
+@@ -9333,7 +9780,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
+ qpair->rsp->req = qpair->req;
+ qpair->rsp->qpair = qpair;
+ /* init qpair to this cpu. Will adjust at run time. */
+- qla_cpu_update(qpair, smp_processor_id());
++ qla_cpu_update(qpair, raw_smp_processor_id());
+
+ if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
+ if (ha->fw_attributes & BIT_4)
+@@ -9591,6 +10038,12 @@ int qla2xxx_disable_port(struct Scsi_Host *host)
+
+ vha->hw->flags.port_isolated = 1;
+
++ if (qla2x00_isp_reg_stat(vha->hw)) {
++ ql_log(ql_log_info, vha, 0x9006,
++ "PCI/Register disconnect, exiting.\n");
++ qla_pci_set_eeh_busy(vha);
++ return FAILED;
++ }
+ if (qla2x00_chip_is_down(vha))
+ return 0;
+
+@@ -9606,6 +10059,13 @@ int qla2xxx_enable_port(struct Scsi_Host *host)
+ {
+ scsi_qla_host_t *vha = shost_priv(host);
+
++ if (qla2x00_isp_reg_stat(vha->hw)) {
++ ql_log(ql_log_info, vha, 0x9001,
++ "PCI/Register disconnect, exiting.\n");
++ qla_pci_set_eeh_busy(vha);
++ return FAILED;
++ }
++
+ vha->hw->flags.port_isolated = 0;
+ /* Set the flag to 1, so that isp_abort can proceed */
+ vha->flags.online = 1;
+diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
+index 5f3b7995cc8f3..a7b5d11146827 100644
+--- a/drivers/scsi/qla2xxx/qla_inline.h
++++ b/drivers/scsi/qla2xxx/qla_inline.h
+@@ -109,11 +109,13 @@ qla2x00_set_fcport_disc_state(fc_port_t *fcport, int state)
+ {
+ int old_val;
+ uint8_t shiftbits, mask;
++ uint8_t port_dstate_str_sz;
+
+ /* This will have to change when the max no. of states > 16 */
+ shiftbits = 4;
+ mask = (1 << shiftbits) - 1;
+
++ port_dstate_str_sz = sizeof(port_dstate_str) / sizeof(char *);
+ fcport->disc_state = state;
+ while (1) {
+ old_val = atomic_read(&fcport->shadow_disc_state);
+@@ -121,7 +123,8 @@ qla2x00_set_fcport_disc_state(fc_port_t *fcport, int state)
+ old_val, (old_val << shiftbits) | state)) {
+ ql_dbg(ql_dbg_disc, fcport->vha, 0x2134,
+ "FCPort %8phC disc_state transition: %s to %s - portid=%06x.\n",
+- fcport->port_name, port_dstate_str[old_val & mask],
++ fcport->port_name, (old_val & mask) < port_dstate_str_sz ?
++ port_dstate_str[old_val & mask] : "Unknown",
+ port_dstate_str[state], fcport->d_id.b24);
+ return;
+ }
+@@ -184,6 +187,8 @@ static void qla2xxx_init_sp(srb_t *sp, scsi_qla_host_t *vha,
+ sp->vha = vha;
+ sp->qpair = qpair;
+ sp->cmd_type = TYPE_SRB;
++ /* ref : INIT - normal flow */
++ kref_init(&sp->cmd_kref);
+ INIT_LIST_HEAD(&sp->elem);
+ }
+
+@@ -223,11 +228,9 @@ static inline srb_t *
+ qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
+ {
+ srb_t *sp = NULL;
+- uint8_t bail;
+ struct qla_qpair *qpair;
+
+- QLA_VHA_MARK_BUSY(vha, bail);
+- if (unlikely(bail))
++ if (unlikely(qla_vha_mark_busy(vha)))
+ return NULL;
+
+ qpair = vha->hw->base_qpair;
+@@ -380,24 +383,27 @@ qla2xxx_get_fc4_priority(struct scsi_qla_host *vha)
+
+ enum {
+ RESOURCE_NONE,
+- RESOURCE_INI,
++ RESOURCE_IOCB = BIT_0,
++ RESOURCE_EXCH = BIT_1, /* exchange */
++ RESOURCE_FORCE = BIT_2,
++ RESOURCE_HA = BIT_3,
+ };
+
+ static inline int
+-qla_get_iocbs(struct qla_qpair *qp, struct iocb_resource *iores)
++qla_get_fw_resources(struct qla_qpair *qp, struct iocb_resource *iores)
+ {
+ u16 iocbs_used, i;
+- struct qla_hw_data *ha = qp->vha->hw;
++ u16 exch_used;
++ struct qla_hw_data *ha = qp->hw;
+
+ if (!ql2xenforce_iocb_limit) {
+ iores->res_type = RESOURCE_NONE;
+ return 0;
+ }
++ if (iores->res_type & RESOURCE_FORCE)
++ goto force;
+
+- if ((iores->iocb_cnt + qp->fwres.iocbs_used) < qp->fwres.iocbs_qp_limit) {
+- qp->fwres.iocbs_used += iores->iocb_cnt;
+- return 0;
+- } else {
++ if ((iores->iocb_cnt + qp->fwres.iocbs_used) >= qp->fwres.iocbs_qp_limit) {
+ /* no need to acquire qpair lock. It's just rough calculation */
+ iocbs_used = ha->base_qpair->fwres.iocbs_used;
+ for (i = 0; i < ha->max_qpairs; i++) {
+@@ -405,30 +411,103 @@ qla_get_iocbs(struct qla_qpair *qp, struct iocb_resource *iores)
+ iocbs_used += ha->queue_pair_map[i]->fwres.iocbs_used;
+ }
+
+- if ((iores->iocb_cnt + iocbs_used) < qp->fwres.iocbs_limit) {
+- qp->fwres.iocbs_used += iores->iocb_cnt;
+- return 0;
+- } else {
++ if ((iores->iocb_cnt + iocbs_used) >= qp->fwres.iocbs_limit) {
++ iores->res_type = RESOURCE_NONE;
++ return -ENOSPC;
++ }
++ }
++
++ if (iores->res_type & RESOURCE_EXCH) {
++ exch_used = ha->base_qpair->fwres.exch_used;
++ for (i = 0; i < ha->max_qpairs; i++) {
++ if (ha->queue_pair_map[i])
++ exch_used += ha->queue_pair_map[i]->fwres.exch_used;
++ }
++
++ if ((exch_used + iores->exch_cnt) >= qp->fwres.exch_limit) {
++ iores->res_type = RESOURCE_NONE;
++ return -ENOSPC;
++ }
++ }
++
++ if (ql2xenforce_iocb_limit == 2) {
++ if ((iores->iocb_cnt + atomic_read(&ha->fwres.iocb_used)) >=
++ ha->fwres.iocb_limit) {
+ iores->res_type = RESOURCE_NONE;
+ return -ENOSPC;
+ }
++
++ if (iores->res_type & RESOURCE_EXCH) {
++ if ((iores->exch_cnt + atomic_read(&ha->fwres.exch_used)) >=
++ ha->fwres.exch_limit) {
++ iores->res_type = RESOURCE_NONE;
++ return -ENOSPC;
++ }
++ }
++ }
++
++force:
++ qp->fwres.iocbs_used += iores->iocb_cnt;
++ qp->fwres.exch_used += iores->exch_cnt;
++ if (ql2xenforce_iocb_limit == 2) {
++ atomic_add(iores->iocb_cnt, &ha->fwres.iocb_used);
++ atomic_add(iores->exch_cnt, &ha->fwres.exch_used);
++ iores->res_type |= RESOURCE_HA;
++ }
++ return 0;
++}
++
++/*
++ * decrement to zero. This routine will not decrement below zero
++ * @v: pointer of type atomic_t
++ * @amount: amount to decrement from v
++ */
++static void qla_atomic_dtz(atomic_t *v, int amount)
++{
++ int c, old, dec;
++
++ c = atomic_read(v);
++ for (;;) {
++ dec = c - amount;
++ if (unlikely(dec < 0))
++ dec = 0;
++
++ old = atomic_cmpxchg((v), c, dec);
++ if (likely(old == c))
++ break;
++ c = old;
+ }
+ }
+
+ static inline void
+-qla_put_iocbs(struct qla_qpair *qp, struct iocb_resource *iores)
++qla_put_fw_resources(struct qla_qpair *qp, struct iocb_resource *iores)
+ {
+- switch (iores->res_type) {
+- case RESOURCE_NONE:
+- break;
+- default:
++ struct qla_hw_data *ha = qp->hw;
++
++ if (iores->res_type & RESOURCE_HA) {
++ if (iores->res_type & RESOURCE_IOCB)
++ qla_atomic_dtz(&ha->fwres.iocb_used, iores->iocb_cnt);
++
++ if (iores->res_type & RESOURCE_EXCH)
++ qla_atomic_dtz(&ha->fwres.exch_used, iores->exch_cnt);
++ }
++
++ if (iores->res_type & RESOURCE_IOCB) {
+ if (qp->fwres.iocbs_used >= iores->iocb_cnt) {
+ qp->fwres.iocbs_used -= iores->iocb_cnt;
+ } else {
+- // should not happen
++ /* should not happen */
+ qp->fwres.iocbs_used = 0;
+ }
+- break;
++ }
++
++ if (iores->res_type & RESOURCE_EXCH) {
++ if (qp->fwres.exch_used >= iores->exch_cnt) {
++ qp->fwres.exch_used -= iores->exch_cnt;
++ } else {
++ /* should not happen */
++ qp->fwres.exch_used = 0;
++ }
+ }
+ iores->res_type = RESOURCE_NONE;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
+index 9d4ad1d2b00a2..9e524d52dc862 100644
+--- a/drivers/scsi/qla2xxx/qla_iocb.c
++++ b/drivers/scsi/qla2xxx/qla_iocb.c
+@@ -522,21 +522,25 @@ __qla2x00_marker(struct scsi_qla_host *vha, struct qla_qpair *qpair,
+ return (QLA_FUNCTION_FAILED);
+ }
+
++ mrk24 = (struct mrk_entry_24xx *)mrk;
++
+ mrk->entry_type = MARKER_TYPE;
+ mrk->modifier = type;
+ if (type != MK_SYNC_ALL) {
+ if (IS_FWI2_CAPABLE(ha)) {
+- mrk24 = (struct mrk_entry_24xx *) mrk;
+ mrk24->nport_handle = cpu_to_le16(loop_id);
+ int_to_scsilun(lun, (struct scsi_lun *)&mrk24->lun);
+ host_to_fcp_swap(mrk24->lun, sizeof(mrk24->lun));
+ mrk24->vp_index = vha->vp_idx;
+- mrk24->handle = make_handle(req->id, mrk24->handle);
+ } else {
+ SET_TARGET_ID(ha, mrk->target, loop_id);
+ mrk->lun = cpu_to_le16((uint16_t)lun);
+ }
+ }
++
++ if (IS_FWI2_CAPABLE(ha))
++ mrk24->handle = QLA_SKIP_HANDLE;
++
+ wmb();
+
+ qla2x00_start_iocbs(vha, req);
+@@ -603,7 +607,8 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
+ put_unaligned_le32(COMMAND_TYPE_6, &cmd_pkt->entry_type);
+
+ /* No data transfer */
+- if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE ||
++ tot_dsds == 0) {
+ cmd_pkt->byte_count = cpu_to_le32(0);
+ return 0;
+ }
+@@ -1589,9 +1594,10 @@ qla24xx_start_scsi(srb_t *sp)
+ tot_dsds = nseg;
+ req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+
+- sp->iores.res_type = RESOURCE_INI;
++ sp->iores.res_type = RESOURCE_IOCB | RESOURCE_EXCH;
++ sp->iores.exch_cnt = 1;
+ sp->iores.iocb_cnt = req_cnt;
+- if (qla_get_iocbs(sp->qpair, &sp->iores))
++ if (qla_get_fw_resources(sp->qpair, &sp->iores))
+ goto queuing_error;
+
+ if (req->cnt < (req_cnt + 2)) {
+@@ -1678,7 +1684,7 @@ queuing_error:
+ if (tot_dsds)
+ scsi_dma_unmap(cmd);
+
+- qla_put_iocbs(sp->qpair, &sp->iores);
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ return QLA_FUNCTION_FAILED;
+@@ -1793,9 +1799,10 @@ qla24xx_dif_start_scsi(srb_t *sp)
+ tot_prot_dsds = nseg;
+ tot_dsds += nseg;
+
+- sp->iores.res_type = RESOURCE_INI;
++ sp->iores.res_type = RESOURCE_IOCB | RESOURCE_EXCH;
++ sp->iores.exch_cnt = 1;
+ sp->iores.iocb_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+- if (qla_get_iocbs(sp->qpair, &sp->iores))
++ if (qla_get_fw_resources(sp->qpair, &sp->iores))
+ goto queuing_error;
+
+ if (req->cnt < (req_cnt + 2)) {
+@@ -1883,7 +1890,7 @@ queuing_error:
+ }
+ /* Cleanup will be performed by the caller (queuecommand) */
+
+- qla_put_iocbs(sp->qpair, &sp->iores);
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ return QLA_FUNCTION_FAILED;
+@@ -1952,9 +1959,10 @@ qla2xxx_start_scsi_mq(srb_t *sp)
+ tot_dsds = nseg;
+ req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+
+- sp->iores.res_type = RESOURCE_INI;
++ sp->iores.res_type = RESOURCE_IOCB | RESOURCE_EXCH;
++ sp->iores.exch_cnt = 1;
+ sp->iores.iocb_cnt = req_cnt;
+- if (qla_get_iocbs(sp->qpair, &sp->iores))
++ if (qla_get_fw_resources(sp->qpair, &sp->iores))
+ goto queuing_error;
+
+ if (req->cnt < (req_cnt + 2)) {
+@@ -2041,7 +2049,7 @@ queuing_error:
+ if (tot_dsds)
+ scsi_dma_unmap(cmd);
+
+- qla_put_iocbs(sp->qpair, &sp->iores);
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ spin_unlock_irqrestore(&qpair->qp_lock, flags);
+
+ return QLA_FUNCTION_FAILED;
+@@ -2171,9 +2179,10 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp)
+ tot_prot_dsds = nseg;
+ tot_dsds += nseg;
+
+- sp->iores.res_type = RESOURCE_INI;
++ sp->iores.res_type = RESOURCE_IOCB | RESOURCE_EXCH;
++ sp->iores.exch_cnt = 1;
+ sp->iores.iocb_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+- if (qla_get_iocbs(sp->qpair, &sp->iores))
++ if (qla_get_fw_resources(sp->qpair, &sp->iores))
+ goto queuing_error;
+
+ if (req->cnt < (req_cnt + 2)) {
+@@ -2260,7 +2269,7 @@ queuing_error:
+ }
+ /* Cleanup will be performed by the caller (queuecommand) */
+
+- qla_put_iocbs(sp->qpair, &sp->iores);
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ spin_unlock_irqrestore(&qpair->qp_lock, flags);
+
+ return QLA_FUNCTION_FAILED;
+@@ -2537,7 +2546,7 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk)
+ scsi_qla_host_t *vha = fcport->vha;
+ struct qla_hw_data *ha = vha->hw;
+ struct srb_iocb *iocb = &sp->u.iocb_cmd;
+- struct req_que *req = vha->req;
++ struct req_que *req = sp->qpair->req;
+
+ flags = iocb->u.tmf.flags;
+ lun = iocb->u.tmf.lun;
+@@ -2553,18 +2562,46 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk)
+ tsk->port_id[2] = fcport->d_id.b.domain;
+ tsk->vp_index = fcport->vha->vp_idx;
+
+- if (flags == TCF_LUN_RESET) {
++ if (flags & (TCF_LUN_RESET | TCF_ABORT_TASK_SET|
++ TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) {
+ int_to_scsilun(lun, &tsk->lun);
+ host_to_fcp_swap((uint8_t *)&tsk->lun,
+ sizeof(tsk->lun));
+ }
+ }
+
+-void qla2x00_init_timer(srb_t *sp, unsigned long tmo)
++static void
++qla2x00_async_done(struct srb *sp, int res)
++{
++ if (del_timer(&sp->u.iocb_cmd.timer)) {
++ /*
++ * Successfully cancelled the timeout handler
++ * ref: TMR
++ */
++ if (kref_put(&sp->cmd_kref, qla2x00_sp_release))
++ return;
++ }
++ sp->async_done(sp, res);
++}
++
++void
++qla2x00_sp_release(struct kref *kref)
++{
++ struct srb *sp = container_of(kref, struct srb, cmd_kref);
++
++ sp->free(sp);
++}
++
++void
++qla2x00_init_async_sp(srb_t *sp, unsigned long tmo,
++ void (*done)(struct srb *sp, int res))
+ {
+ timer_setup(&sp->u.iocb_cmd.timer, qla2x00_sp_timeout, 0);
+- sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ;
++ sp->done = qla2x00_async_done;
++ sp->async_done = done;
+ sp->free = qla2x00_sp_free;
++ sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
++ sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ;
+ if (IS_QLAFX00(sp->vha->hw) && sp->type == SRB_FXIOCB_DCMD)
+ init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp);
+ sp->start_timer = 1;
+@@ -2651,7 +2688,9 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
+ return -ENOMEM;
+ }
+
+- /* Alloc SRB structure */
++ /* Alloc SRB structure
++ * ref: INIT
++ */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp) {
+ kfree(fcport);
+@@ -2672,18 +2711,19 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
+ sp->type = SRB_ELS_DCMD;
+ sp->name = "ELS_DCMD";
+ sp->fcport = fcport;
+- elsio->timeout = qla2x00_els_dcmd_iocb_timeout;
+- qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT);
+- init_completion(&sp->u.iocb_cmd.u.els_logo.comp);
+- sp->done = qla2x00_els_dcmd_sp_done;
++ qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT,
++ qla2x00_els_dcmd_sp_done);
+ sp->free = qla2x00_els_dcmd_sp_free;
++ sp->u.iocb_cmd.timeout = qla2x00_els_dcmd_iocb_timeout;
++ init_completion(&sp->u.iocb_cmd.u.els_logo.comp);
+
+ elsio->u.els_logo.els_logo_pyld = dma_alloc_coherent(&ha->pdev->dev,
+ DMA_POOL_SIZE, &elsio->u.els_logo.els_logo_pyld_dma,
+ GFP_KERNEL);
+
+ if (!elsio->u.els_logo.els_logo_pyld) {
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return QLA_FUNCTION_FAILED;
+ }
+
+@@ -2706,7 +2746,8 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
+
+ rval = qla2x00_start_sp(sp);
+ if (rval != QLA_SUCCESS) {
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return QLA_FUNCTION_FAILED;
+ }
+
+@@ -2717,7 +2758,8 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
+
+ wait_for_completion(&elsio->u.els_logo.comp);
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return rval;
+ }
+
+@@ -2787,7 +2829,7 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
+ sp->vha->qla_stats.control_requests++;
+ }
+
+-static void
++void
+ qla2x00_els_dcmd2_iocb_timeout(void *data)
+ {
+ srb_t *sp = data;
+@@ -2850,7 +2892,9 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
+ sp->name, res, sp->handle, fcport->d_id.b24, fcport->port_name);
+
+ fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE);
+- del_timer(&sp->u.iocb_cmd.timer);
++ /* For edif, set logout on delete to ensure any residual key from FW is flushed.*/
++ fcport->logout_on_delete = 1;
++ fcport->chip_reset = vha->hw->base_qpair->chip_reset;
+
+ if (sp->flags & SRB_WAKEUP_ON_COMP)
+ complete(&lio->u.els_plogi.comp);
+@@ -2927,6 +2971,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
+ set_bit(ISP_ABORT_NEEDED,
+ &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
++ break;
+ }
+ fallthrough;
+ default:
+@@ -2936,9 +2981,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
+ fw_status[0], fw_status[1], fw_status[2]);
+
+ fcport->flags &= ~FCF_ASYNC_SENT;
+- qla2x00_set_fcport_disc_state(fcport,
+- DSC_LOGIN_FAILED);
+- set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
++ qlt_schedule_sess_for_deletion(fcport);
+ break;
+ }
+ break;
+@@ -2950,8 +2993,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
+ fw_status[0], fw_status[1], fw_status[2]);
+
+ sp->fcport->flags &= ~FCF_ASYNC_SENT;
+- qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_FAILED);
+- set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
++ qlt_schedule_sess_for_deletion(fcport);
+ break;
+ }
+
+@@ -2960,7 +3002,8 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
+ struct srb_iocb *elsio = &sp->u.iocb_cmd;
+
+ qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return;
+ }
+ e->u.iosb.sp = sp;
+@@ -2978,7 +3021,9 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
+ int rval = QLA_SUCCESS;
+ void *ptr, *resp_ptr;
+
+- /* Alloc SRB structure */
++ /* Alloc SRB structure
++ * ref: INIT
++ */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp) {
+ ql_log(ql_log_info, vha, 0x70e6,
+@@ -2993,17 +3038,16 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
+ ql_dbg(ql_dbg_io, vha, 0x3073,
+ "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24);
+
+- sp->type = SRB_ELS_DCMD;
+- sp->name = "ELS_DCMD";
+- sp->fcport = fcport;
+-
+- elsio->timeout = qla2x00_els_dcmd2_iocb_timeout;
+ if (wait)
+ sp->flags = SRB_WAKEUP_ON_COMP;
+
+- qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT + 2);
++ sp->type = SRB_ELS_DCMD;
++ sp->name = "ELS_DCMD";
++ sp->fcport = fcport;
++ qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT + 2,
++ qla2x00_els_dcmd2_sp_done);
++ sp->u.iocb_cmd.timeout = qla2x00_els_dcmd2_iocb_timeout;
+
+- sp->done = qla2x00_els_dcmd2_sp_done;
+ elsio->u.els_plogi.tx_size = elsio->u.els_plogi.rx_size = DMA_POOL_SIZE;
+
+ ptr = elsio->u.els_plogi.els_plogi_pyld =
+@@ -3034,8 +3078,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
+ elsio->u.els_plogi.els_cmd = els_opcode;
+ elsio->u.els_plogi.els_plogi_pyld->opcode = els_opcode;
+
+- if (els_opcode == ELS_DCMD_PLOGI && vha->hw->flags.edif_enabled &&
+- vha->e_dbell.db_flags & EDB_ACTIVE) {
++ if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) {
+ struct fc_els_flogi *p = ptr;
+
+ p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC);
+@@ -3069,7 +3112,8 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
+ out:
+ fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+@@ -3779,6 +3823,79 @@ qla24xx_prlo_iocb(srb_t *sp, struct logio_entry_24xx *logio)
+ logio->vp_index = sp->fcport->vha->vp_idx;
+ }
+
++int qla_get_iocbs_resource(struct srb *sp)
++{
++ bool get_exch;
++ bool push_it_through = false;
++
++ if (!ql2xenforce_iocb_limit) {
++ sp->iores.res_type = RESOURCE_NONE;
++ return 0;
++ }
++ sp->iores.res_type = RESOURCE_NONE;
++
++ switch (sp->type) {
++ case SRB_TM_CMD:
++ case SRB_PRLI_CMD:
++ case SRB_ADISC_CMD:
++ push_it_through = true;
++ fallthrough;
++ case SRB_LOGIN_CMD:
++ case SRB_ELS_CMD_RPT:
++ case SRB_ELS_CMD_HST:
++ case SRB_ELS_CMD_HST_NOLOGIN:
++ case SRB_CT_CMD:
++ case SRB_NVME_LS:
++ case SRB_ELS_DCMD:
++ get_exch = true;
++ break;
++
++ case SRB_FXIOCB_DCMD:
++ case SRB_FXIOCB_BCMD:
++ sp->iores.res_type = RESOURCE_NONE;
++ return 0;
++
++ case SRB_SA_UPDATE:
++ case SRB_SA_REPLACE:
++ case SRB_MB_IOCB:
++ case SRB_ABT_CMD:
++ case SRB_NACK_PLOGI:
++ case SRB_NACK_PRLI:
++ case SRB_NACK_LOGO:
++ case SRB_LOGOUT_CMD:
++ case SRB_CTRL_VP:
++ case SRB_MARKER:
++ default:
++ push_it_through = true;
++ get_exch = false;
++ }
++
++ sp->iores.res_type |= RESOURCE_IOCB;
++ sp->iores.iocb_cnt = 1;
++ if (get_exch) {
++ sp->iores.res_type |= RESOURCE_EXCH;
++ sp->iores.exch_cnt = 1;
++ }
++ if (push_it_through)
++ sp->iores.res_type |= RESOURCE_FORCE;
++
++ return qla_get_fw_resources(sp->qpair, &sp->iores);
++}
++
++static void
++qla_marker_iocb(srb_t *sp, struct mrk_entry_24xx *mrk)
++{
++ mrk->entry_type = MARKER_TYPE;
++ mrk->modifier = sp->u.iocb_cmd.u.tmf.modifier;
++ mrk->handle = make_handle(sp->qpair->req->id, sp->handle);
++ if (sp->u.iocb_cmd.u.tmf.modifier != MK_SYNC_ALL) {
++ mrk->nport_handle = cpu_to_le16(sp->u.iocb_cmd.u.tmf.loop_id);
++ int_to_scsilun(sp->u.iocb_cmd.u.tmf.lun, (struct scsi_lun *)&mrk->lun);
++ host_to_fcp_swap(mrk->lun, sizeof(mrk->lun));
++ mrk->vp_index = sp->u.iocb_cmd.u.tmf.vp_index;
++ }
++}
++
+ int
+ qla2x00_start_sp(srb_t *sp)
+ {
+@@ -3793,9 +3910,15 @@ qla2x00_start_sp(srb_t *sp)
+ return -EIO;
+
+ spin_lock_irqsave(qp->qp_lock_ptr, flags);
++ rval = qla_get_iocbs_resource(sp);
++ if (rval) {
++ spin_unlock_irqrestore(qp->qp_lock_ptr, flags);
++ return -EAGAIN;
++ }
++
+ pkt = __qla2x00_alloc_iocbs(sp->qpair, sp);
+ if (!pkt) {
+- rval = EAGAIN;
++ rval = -EAGAIN;
+ ql_log(ql_log_warn, vha, 0x700c,
+ "qla2x00_alloc_iocbs failed.\n");
+ goto done;
+@@ -3876,16 +3999,28 @@ qla2x00_start_sp(srb_t *sp)
+ case SRB_SA_REPLACE:
+ qla24xx_sa_replace_iocb(sp, pkt);
+ break;
++ case SRB_MARKER:
++ qla_marker_iocb(sp, pkt);
++ break;
+ default:
+ break;
+ }
+
+- if (sp->start_timer)
++ if (sp->start_timer) {
++ /* ref: TMR timer ref
++ * this code should be just before start_iocbs function
++ * This will make sure that caller function don't to do
++ * kref_put even on failure
++ */
++ kref_get(&sp->cmd_kref);
+ add_timer(&sp->u.iocb_cmd.timer);
++ }
+
+ wmb();
+ qla2x00_start_iocbs(vha, qp->req);
+ done:
++ if (rval)
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ spin_unlock_irqrestore(qp->qp_lock_ptr, flags);
+ return rval;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index b26f2699adb27..80c2dcf567b0c 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -1121,8 +1121,12 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
+ unsigned long flags;
+ fc_port_t *fcport = NULL;
+
+- if (!vha->hw->flags.fw_started)
++ if (!vha->hw->flags.fw_started) {
++ ql_log(ql_log_warn, vha, 0x50ff,
++ "Dropping AEN - %04x %04x %04x %04x.\n",
++ mb[0], mb[1], mb[2], mb[3]);
+ return;
++ }
+
+ /* Setup to process RIO completion. */
+ handle_cnt = 0;
+@@ -1354,9 +1358,7 @@ skip_rio:
+ if (!vha->vp_idx) {
+ if (ha->flags.fawwpn_enabled &&
+ (ha->current_topology == ISP_CFG_F)) {
+- void *wwpn = ha->init_cb->port_name;
+-
+- memcpy(vha->port_name, wwpn, WWN_SIZE);
++ memcpy(vha->port_name, ha->port_name, WWN_SIZE);
+ fc_host_port_name(vha->host) =
+ wwn_to_u64(vha->port_name);
+ ql_dbg(ql_dbg_init + ql_dbg_verbose,
+@@ -1899,6 +1901,8 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
+ }
+
+ req->outstanding_cmds[index] = NULL;
++
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ return sp;
+ }
+
+@@ -2494,6 +2498,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk)
+ iocb->u.tmf.data = QLA_FUNCTION_FAILED;
+ } else if ((le16_to_cpu(sts->scsi_status) &
+ SS_RESPONSE_INFO_LEN_VALID)) {
++ host_to_fcp_swap(sts->data, sizeof(sts->data));
+ if (le32_to_cpu(sts->rsp_data_len) < 4) {
+ ql_log(ql_log_warn, fcport->vha, 0x503b,
+ "Async-%s error - hdl=%x not enough response(%d).\n",
+@@ -2512,7 +2517,6 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk)
+ case CS_PORT_BUSY:
+ case CS_INCOMPLETE:
+ case CS_PORT_UNAVAILABLE:
+- case CS_TIMEOUT:
+ case CS_RESET:
+ if (atomic_read(&fcport->state) == FCS_ONLINE) {
+ ql_dbg(ql_dbg_disc, fcport->vha, 0x3021,
+@@ -2634,7 +2638,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+ }
+
+ if (unlikely(logit))
+- ql_log(ql_dbg_io, fcport->vha, 0x5060,
++ ql_dbg(ql_dbg_io, fcport->vha, 0x5060,
+ "NVME-%s ERR Handling - hdl=%x status(%x) tr_len:%x resid=%x ox_id=%x\n",
+ sp->name, sp->handle, comp_status,
+ fd->transferred_length, le32_to_cpu(sts->residual_len),
+@@ -3185,7 +3189,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
+ }
+ return;
+ }
+- qla_put_iocbs(sp->qpair, &sp->iores);
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+
+ if (sp->cmd_type != TYPE_SRB) {
+ req->outstanding_cmds[handle] = NULL;
+@@ -3350,8 +3354,6 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
+ "Dropped frame(s) detected (0x%x of 0x%x bytes).\n",
+ resid, scsi_bufflen(cp));
+
+- vha->interface_err_cnt++;
+-
+ res = DID_ERROR << 16 | lscsi_status;
+ goto check_scsi_status;
+ }
+@@ -3421,6 +3423,7 @@ check_scsi_status:
+ case CS_PORT_UNAVAILABLE:
+ case CS_TIMEOUT:
+ case CS_RESET:
++ case CS_EDIF_INV_REQ:
+
+ /*
+ * We are going to have the fc class block the rport
+@@ -3491,7 +3494,7 @@ check_scsi_status:
+
+ out:
+ if (logit)
+- ql_log(ql_dbg_io, fcport->vha, 0x3022,
++ ql_dbg(ql_dbg_io, fcport->vha, 0x3022,
+ "FCP command status: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu portid=%02x%02x%02x oxid=0x%x cdb=%10phN len=0x%x rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n",
+ comp_status, scsi_status, res, vha->host_no,
+ cp->device->id, cp->device->lun, fcport->d_id.b.domain,
+@@ -3605,7 +3608,6 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
+ default:
+ sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ if (sp) {
+- qla_put_iocbs(sp->qpair, &sp->iores);
+ sp->done(sp, res);
+ return 0;
+ }
+@@ -3707,12 +3709,11 @@ void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *vha,
+ * Return: 0 all iocbs has arrived, xx- all iocbs have not arrived.
+ */
+ static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+- struct rsp_que *rsp, response_t *pkt)
++ struct rsp_que *rsp, response_t *pkt, u32 rsp_q_in)
+ {
+- int start_pkt_ring_index, end_pkt_ring_index, n_ring_index;
+- response_t *end_pkt;
++ int start_pkt_ring_index;
++ u32 iocb_cnt = 0;
+ int rc = 0;
+- u32 rsp_q_in;
+
+ if (pkt->entry_count == 1)
+ return rc;
+@@ -3723,36 +3724,42 @@ static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+ else
+ start_pkt_ring_index = rsp->ring_index - 1;
+
+- if ((start_pkt_ring_index + pkt->entry_count) >= rsp->length)
+- end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count -
+- rsp->length - 1;
++ if (rsp_q_in < start_pkt_ring_index)
++ /* q in ptr is wrapped */
++ iocb_cnt = rsp->length - start_pkt_ring_index + rsp_q_in;
+ else
+- end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count - 1;
++ iocb_cnt = rsp_q_in - start_pkt_ring_index;
++
++ if (iocb_cnt < pkt->entry_count)
++ rc = -EIO;
+
+- end_pkt = rsp->ring + end_pkt_ring_index;
++ ql_dbg(ql_dbg_init, vha, 0x5091,
++ "%s - ring %p pkt %p entry count %d iocb_cnt %d rsp_q_in %d rc %d\n",
++ __func__, rsp->ring, pkt, pkt->entry_count, iocb_cnt, rsp_q_in, rc);
+
+- /* next pkt = end_pkt + 1 */
+- n_ring_index = end_pkt_ring_index + 1;
+- if (n_ring_index >= rsp->length)
+- n_ring_index = 0;
++ return rc;
++}
+
+- rsp_q_in = rsp->qpair->use_shadow_reg ? *rsp->in_ptr :
+- rd_reg_dword(rsp->rsp_q_in);
++static void qla_marker_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
++ struct mrk_entry_24xx *pkt)
++{
++ const char func[] = "MRK-IOCB";
++ srb_t *sp;
++ int res = QLA_SUCCESS;
+
+- /* rsp_q_in is either wrapped or pointing beyond endpkt */
+- if ((rsp_q_in < start_pkt_ring_index && rsp_q_in < n_ring_index) ||
+- rsp_q_in >= n_ring_index)
+- /* all IOCBs arrived. */
+- rc = 0;
+- else
+- rc = -EIO;
++ if (!IS_FWI2_CAPABLE(vha->hw))
++ return;
+
+- ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x5091,
+- "%s - ring %p pkt %p end pkt %p entry count %#x rsp_q_in %d rc %d\n",
+- __func__, rsp->ring, pkt, end_pkt, pkt->entry_count,
+- rsp_q_in, rc);
++ sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
++ if (!sp)
++ return;
+
+- return rc;
++ if (pkt->entry_status) {
++ ql_dbg(ql_dbg_taskm, vha, 0x8025, "marker failure.\n");
++ res = QLA_COMMAND_ERROR;
++ }
++ sp->u.iocb_cmd.u.tmf.data = res;
++ sp->done(sp, res);
+ }
+
+ /**
+@@ -3767,6 +3774,8 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+ struct qla_hw_data *ha = vha->hw;
+ struct purex_entry_24xx *purex_entry;
+ struct purex_item *pure_item;
++ u16 rsp_in = 0, cur_ring_index;
++ int follow_inptr, is_shadow_hba;
+
+ if (!ha->flags.fw_started)
+ return;
+@@ -3776,8 +3785,27 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+ qla_cpu_update(rsp->qpair, smp_processor_id());
+ }
+
+- while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
++#define __update_rsp_in(_update, _is_shadow_hba, _rsp, _rsp_in) \
++ do { \
++ if (_update) { \
++ _rsp_in = _is_shadow_hba ? *(_rsp)->in_ptr : \
++ rd_reg_dword_relaxed((_rsp)->rsp_q_in); \
++ } \
++ } while (0)
++
++ is_shadow_hba = IS_SHADOW_REG_CAPABLE(ha);
++ follow_inptr = is_shadow_hba ? ql2xrspq_follow_inptr :
++ ql2xrspq_follow_inptr_legacy;
++
++ __update_rsp_in(follow_inptr, is_shadow_hba, rsp, rsp_in);
++
++ while ((likely(follow_inptr &&
++ rsp->ring_index != rsp_in &&
++ rsp->ring_ptr->signature != RESPONSE_PROCESSED)) ||
++ (!follow_inptr &&
++ rsp->ring_ptr->signature != RESPONSE_PROCESSED)) {
+ pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
++ cur_ring_index = rsp->ring_index;
+
+ rsp->ring_index++;
+ if (rsp->ring_index == rsp->length) {
+@@ -3855,9 +3883,7 @@ process_err:
+ (struct nack_to_isp *)pkt);
+ break;
+ case MARKER_TYPE:
+- /* Do nothing in this case, this check is to prevent it
+- * from falling into default case
+- */
++ qla_marker_iocb_entry(vha, rsp->req, (struct mrk_entry_24xx *)pkt);
+ break;
+ case ABORT_IOCB_TYPE:
+ qla24xx_abort_iocb_entry(vha, rsp->req,
+@@ -3889,6 +3915,8 @@ process_err:
+ }
+ pure_item = qla27xx_copy_fpin_pkt(vha,
+ (void **)&pkt, &rsp);
++ __update_rsp_in(follow_inptr, is_shadow_hba,
++ rsp, rsp_in);
+ if (!pure_item)
+ break;
+ qla24xx_queue_purex_item(vha, pure_item,
+@@ -3896,7 +3924,17 @@ process_err:
+ break;
+
+ case ELS_AUTH_ELS:
+- if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt)) {
++ if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt, rsp_in)) {
++ /*
++ * ring_ptr and ring_index were
++ * pre-incremented above. Reset them
++ * back to current. Wait for next
++ * interrupt with all IOCBs to arrive
++ * and re-process.
++ */
++ rsp->ring_ptr = (response_t *)pkt;
++ rsp->ring_index = cur_ring_index;
++
+ ql_dbg(ql_dbg_init, vha, 0x5091,
+ "Defer processing ELS opcode %#x...\n",
+ purex_entry->els_frame_payload[3]);
+@@ -4415,16 +4453,12 @@ msix_register_fail:
+ }
+
+ /* Enable MSI-X vector for response queue update for queue 0 */
+- if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+- if (ha->msixbase && ha->mqiobase &&
+- (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
+- ql2xmqsupport))
+- ha->mqenable = 1;
+- } else
+- if (ha->mqiobase &&
+- (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
+- ql2xmqsupport))
+- ha->mqenable = 1;
++ if (IS_MQUE_CAPABLE(ha) &&
++ (ha->msixbase && ha->mqiobase && ha->max_qpairs))
++ ha->mqenable = 1;
++ else
++ ha->mqenable = 0;
++
+ ql_dbg(ql_dbg_multiq, vha, 0xc005,
+ "mqiobase=%p, max_rsp_queues=%d, max_req_queues=%d.\n",
+ ha->mqiobase, ha->max_rsp_queues, ha->max_req_queues);
+diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
+index 7811c4952035b..511f31611aede 100644
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -9,6 +9,12 @@
+ #include <linux/delay.h>
+ #include <linux/gfp.h>
+
++#ifdef CONFIG_PPC
++#define IS_PPCARCH true
++#else
++#define IS_PPCARCH false
++#endif
++
+ static struct mb_cmd_name {
+ uint16_t cmd;
+ const char *str;
+@@ -232,6 +238,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+ ql_dbg(ql_dbg_mbx, vha, 0x1112,
+ "mbox[%d]<-0x%04x\n", cnt, *iptr);
+ wrt_reg_word(optr, *iptr);
++ } else {
++ wrt_reg_word(optr, 0);
+ }
+
+ mboxes >>= 1;
+@@ -265,9 +273,14 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ wait_time = jiffies;
+- atomic_inc(&ha->num_pend_mbx_stage3);
+ if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
+ mcp->tov * HZ)) {
++ ql_dbg(ql_dbg_mbx, vha, 0x117a,
++ "cmd=%x Timeout.\n", command);
++ spin_lock_irqsave(&ha->hardware_lock, flags);
++ clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
++ spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
+ if (chip_reset != ha->chip_reset) {
+ eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+
+@@ -276,16 +289,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+ spin_unlock_irqrestore(&ha->hardware_lock,
+ flags);
+ atomic_dec(&ha->num_pend_mbx_stage2);
+- atomic_dec(&ha->num_pend_mbx_stage3);
+ rval = QLA_ABORTED;
+ goto premature_exit;
+ }
+- ql_dbg(ql_dbg_mbx, vha, 0x117a,
+- "cmd=%x Timeout.\n", command);
+- spin_lock_irqsave(&ha->hardware_lock, flags);
+- clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
+- spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-
+ } else if (ha->flags.purge_mbox ||
+ chip_reset != ha->chip_reset) {
+ eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+@@ -294,11 +300,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+ ha->flags.mbox_busy = 0;
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+ atomic_dec(&ha->num_pend_mbx_stage2);
+- atomic_dec(&ha->num_pend_mbx_stage3);
+ rval = QLA_ABORTED;
+ goto premature_exit;
+ }
+- atomic_dec(&ha->num_pend_mbx_stage3);
+
+ if (time_after(jiffies, wait_time + 5 * HZ))
+ ql_log(ql_log_warn, vha, 0x1015, "cmd=0x%x, waited %d msecs\n",
+@@ -728,6 +732,9 @@ again:
+ vha->min_supported_speed =
+ nv->min_supported_speed;
+ }
++
++ if (IS_PPCARCH)
++ mcp->mb[11] |= BIT_4;
+ }
+
+ if (ha->flags.exlogins_enabled)
+@@ -1695,10 +1702,8 @@ qla2x00_get_adapter_id(scsi_qla_host_t *vha, uint16_t *id, uint8_t *al_pa,
+ mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10;
+ if (IS_FWI2_CAPABLE(vha->hw))
+ mcp->in_mb |= MBX_19|MBX_18|MBX_17|MBX_16;
+- if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw)) {
+- mcp->in_mb |= MBX_15;
+- mcp->out_mb |= MBX_7|MBX_21|MBX_22|MBX_23;
+- }
++ if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw))
++ mcp->in_mb |= MBX_15|MBX_21|MBX_22|MBX_23;
+
+ mcp->tov = MBX_TOV_SECONDS;
+ mcp->flags = 0;
+@@ -2198,6 +2203,9 @@ qla2x00_get_firmware_state(scsi_qla_host_t *vha, uint16_t *states)
+ ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1054,
+ "Entered %s.\n", __func__);
+
++ if (!ha->flags.fw_started)
++ return QLA_FUNCTION_FAILED;
++
+ mcp->mb[0] = MBC_GET_FIRMWARE_STATE;
+ mcp->out_mb = MBX_0;
+ if (IS_FWI2_CAPABLE(vha->hw))
+@@ -3031,8 +3039,7 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha)
+ ha->orig_fw_iocb_count = mcp->mb[10];
+ if (ha->flags.npiv_supported)
+ ha->max_npiv_vports = mcp->mb[11];
+- if (IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
+- IS_QLA28XX(ha))
++ if (IS_QLA81XX(ha) || IS_QLA83XX(ha))
+ ha->fw_max_fcf_count = mcp->mb[12];
+ }
+
+@@ -3054,7 +3061,8 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha)
+ * Kernel context.
+ */
+ int
+-qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map)
++qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map,
++ u8 *num_entries)
+ {
+ int rval;
+ mbx_cmd_t mc;
+@@ -3094,6 +3102,8 @@ qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map)
+
+ if (pos_map)
+ memcpy(pos_map, pmap, FCAL_MAP_SIZE);
++ if (num_entries)
++ *num_entries = pmap[0];
+ }
+ dma_pool_free(ha->s_dma_pool, pmap, pmap_dma);
+
+@@ -5623,7 +5633,7 @@ qla2x00_get_data_rate(scsi_qla_host_t *vha)
+ mcp->out_mb = MBX_1|MBX_0;
+ mcp->in_mb = MBX_2|MBX_1|MBX_0;
+ if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
+- mcp->in_mb |= MBX_3;
++ mcp->in_mb |= MBX_4|MBX_3;
+ mcp->tov = MBX_TOV_SECONDS;
+ mcp->flags = 0;
+ rval = qla2x00_mailbox_command(vha, mcp);
+@@ -6481,23 +6491,21 @@ int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp)
+ if (!vha->hw->flags.fw_started)
+ goto done;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+- sp->type = SRB_MB_IOCB;
+- sp->name = mb_to_str(mcp->mb[0]);
+-
+ c = &sp->u.iocb_cmd;
+- c->timeout = qla2x00_async_iocb_timeout;
+ init_completion(&c->u.mbx.comp);
+
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ sp->type = SRB_MB_IOCB;
++ sp->name = mb_to_str(mcp->mb[0]);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_mb_sp_done);
+
+ memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG);
+
+- sp->done = qla2x00_async_mb_sp_done;
+-
+ rval = qla2x00_start_sp(sp);
+ if (rval != QLA_SUCCESS) {
+ ql_dbg(ql_dbg_mbx, vha, 0x1018,
+@@ -6529,7 +6537,8 @@ int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp)
+ }
+
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
+index 1c024055f8c50..eb43a5f1b3992 100644
+--- a/drivers/scsi/qla2xxx/qla_mid.c
++++ b/drivers/scsi/qla2xxx/qla_mid.c
+@@ -166,9 +166,13 @@ qla24xx_disable_vp(scsi_qla_host_t *vha)
+ int ret = QLA_SUCCESS;
+ fc_port_t *fcport;
+
+- if (vha->hw->flags.edif_enabled)
++ if (vha->hw->flags.edif_enabled) {
++ if (DBELL_ACTIVE(vha))
++ qla2x00_post_aen_work(vha, FCH_EVT_VENDOR_UNIQUE,
++ FCH_EVT_VENDOR_UNIQUE_VPORT_DOWN);
+ /* delete sessions and flush sa_indexes */
+ qla2x00_wait_for_sess_deletion(vha);
++ }
+
+ if (vha->hw->flags.fw_started)
+ ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL);
+@@ -965,6 +969,7 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd)
+ if (vp_index == 0 || vp_index >= ha->max_npiv_vports)
+ return QLA_PARAMETER_ERROR;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(base_vha, NULL, GFP_KERNEL);
+ if (!sp)
+ return rval;
+@@ -972,9 +977,8 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd)
+ sp->type = SRB_CTRL_VP;
+ sp->name = "ctrl_vp";
+ sp->comp = &comp;
+- sp->done = qla_ctrlvp_sp_done;
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla_ctrlvp_sp_done);
+ sp->u.iocb_cmd.u.ctrlvp.cmd = cmd;
+ sp->u.iocb_cmd.u.ctrlvp.vp_index = vp_index;
+
+@@ -1008,6 +1012,7 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd)
+ break;
+ }
+ done:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ return rval;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
+index 6e920da64863e..f726eb8449c5e 100644
+--- a/drivers/scsi/qla2xxx/qla_mr.c
++++ b/drivers/scsi/qla2xxx/qla_mr.c
+@@ -738,29 +738,6 @@ qlafx00_lun_reset(fc_port_t *fcport, uint64_t l, int tag)
+ return qla2x00_async_tm_cmd(fcport, TCF_LUN_RESET, l, tag);
+ }
+
+-int
+-qlafx00_loop_reset(scsi_qla_host_t *vha)
+-{
+- int ret;
+- struct fc_port *fcport;
+- struct qla_hw_data *ha = vha->hw;
+-
+- if (ql2xtargetreset) {
+- list_for_each_entry(fcport, &vha->vp_fcports, list) {
+- if (fcport->port_type != FCT_TARGET)
+- continue;
+-
+- ret = ha->isp_ops->target_reset(fcport, 0, 0);
+- if (ret != QLA_SUCCESS) {
+- ql_dbg(ql_dbg_taskm, vha, 0x803d,
+- "Bus Reset failed: Reset=%d "
+- "d_id=%x.\n", ret, fcport->d_id.b24);
+- }
+- }
+- }
+- return QLA_SUCCESS;
+-}
+-
+ int
+ qlafx00_iospace_config(struct qla_hw_data *ha)
+ {
+@@ -1810,17 +1787,18 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type)
+ struct register_host_info *preg_hsi;
+ struct new_utsname *p_sysid = NULL;
+
++ /* ref: INIT */
+ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+ if (!sp)
+ goto done;
+
+ sp->type = SRB_FXIOCB_DCMD;
+ sp->name = "fxdisc";
++ qla2x00_init_async_sp(sp, FXDISC_TIMEOUT,
++ qla2x00_fxdisc_sp_done);
++ sp->u.iocb_cmd.timeout = qla2x00_fxdisc_iocb_timeout;
+
+ fdisc = &sp->u.iocb_cmd;
+- fdisc->timeout = qla2x00_fxdisc_iocb_timeout;
+- qla2x00_init_timer(sp, FXDISC_TIMEOUT);
+-
+ switch (fx_type) {
+ case FXDISC_GET_CONFIG_INFO:
+ fdisc->u.fxiocb.flags =
+@@ -1921,7 +1899,6 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type)
+ }
+
+ fdisc->u.fxiocb.req_func_type = cpu_to_le16(fx_type);
+- sp->done = qla2x00_fxdisc_sp_done;
+
+ rval = qla2x00_start_sp(sp);
+ if (rval != QLA_SUCCESS)
+@@ -1997,7 +1974,8 @@ done_unmap_req:
+ dma_free_coherent(&ha->pdev->dev, fdisc->u.fxiocb.req_len,
+ fdisc->u.fxiocb.req_addr, fdisc->u.fxiocb.req_dma_handle);
+ done_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ return rval;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
+index 1c5da2dbd6f97..f535f478e37c8 100644
+--- a/drivers/scsi/qla2xxx/qla_nvme.c
++++ b/drivers/scsi/qla2xxx/qla_nvme.c
+@@ -127,6 +127,7 @@ static int qla_nvme_alloc_queue(struct nvme_fc_local_port *lport,
+ "Failed to allocate qpair\n");
+ return -EINVAL;
+ }
++ qla_adjust_iocb_limit(vha);
+ }
+ *handle = qpair;
+
+@@ -181,6 +182,7 @@ static void qla_nvme_release_ls_cmd_kref(struct kref *kref)
+ spin_unlock_irqrestore(&priv->cmd_lock, flags);
+
+ fd = priv->fd;
++
+ fd->done(fd, priv->comp_status);
+ out:
+ qla2x00_rel_sp(sp);
+@@ -228,6 +230,8 @@ static void qla_nvme_abort_work(struct work_struct *work)
+ fc_port_t *fcport = sp->fcport;
+ struct qla_hw_data *ha = fcport->vha->hw;
+ int rval, abts_done_called = 1;
++ bool io_wait_for_abort_done;
++ uint32_t handle;
+
+ ql_dbg(ql_dbg_io, fcport->vha, 0xffff,
+ "%s called for sp=%p, hndl=%x on fcport=%p desc=%p deleted=%d\n",
+@@ -244,12 +248,20 @@ static void qla_nvme_abort_work(struct work_struct *work)
+ goto out;
+ }
+
++ /*
++ * sp may not be valid after abort_command if return code is either
++ * SUCCESS or ERR_FROM_FW codes, so cache the value here.
++ */
++ io_wait_for_abort_done = ql2xabts_wait_nvme &&
++ QLA_ABTS_WAIT_ENABLED(sp);
++ handle = sp->handle;
++
+ rval = ha->isp_ops->abort_command(sp);
+
+ ql_dbg(ql_dbg_io, fcport->vha, 0x212b,
+ "%s: %s command for sp=%p, handle=%x on fcport=%p rval=%x\n",
+ __func__, (rval != QLA_SUCCESS) ? "Failed to abort" : "Aborted",
+- sp, sp->handle, fcport, rval);
++ sp, handle, fcport, rval);
+
+ /*
+ * If async tmf is enabled, the abort callback is called only on
+@@ -264,7 +276,7 @@ static void qla_nvme_abort_work(struct work_struct *work)
+ * are waited until ABTS complete. This kref is decreased
+ * at qla24xx_abort_sp_done function.
+ */
+- if (abts_done_called && ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(sp))
++ if (abts_done_called && io_wait_for_abort_done)
+ return;
+ out:
+ /* kref_get was done before work was schedule. */
+@@ -336,8 +348,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport,
+ nvme->u.nvme.rsp_len = fd->rsplen;
+ nvme->u.nvme.rsp_dma = fd->rspdma;
+ nvme->u.nvme.timeout_sec = fd->timeout;
+- nvme->u.nvme.cmd_dma = dma_map_single(&ha->pdev->dev, fd->rqstaddr,
+- fd->rqstlen, DMA_TO_DEVICE);
++ nvme->u.nvme.cmd_dma = fd->rqstdma;
+ dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma,
+ fd->rqstlen, DMA_TO_DEVICE);
+
+@@ -345,7 +356,6 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport,
+ if (rval != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x700e,
+ "qla2x00_start_sp failed = %d\n", rval);
+- wake_up(&sp->nvme_ls_waitq);
+ sp->priv = NULL;
+ priv->sp = NULL;
+ qla2x00_rel_sp(sp);
+@@ -411,13 +421,24 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
+ goto queuing_error;
+ }
+ req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
++
++ sp->iores.res_type = RESOURCE_IOCB | RESOURCE_EXCH;
++ sp->iores.exch_cnt = 1;
++ sp->iores.iocb_cnt = req_cnt;
++ if (qla_get_fw_resources(sp->qpair, &sp->iores)) {
++ rval = -EBUSY;
++ goto queuing_error;
++ }
++
+ if (req->cnt < (req_cnt + 2)) {
+ if (IS_SHADOW_REG_CAPABLE(ha)) {
+ cnt = *req->out_ptr;
+ } else {
+ cnt = rd_reg_dword_relaxed(req->req_q_out);
+- if (qla2x00_check_reg16_for_disconnect(vha, cnt))
++ if (qla2x00_check_reg16_for_disconnect(vha, cnt)) {
++ rval = -EBUSY;
+ goto queuing_error;
++ }
+ }
+
+ if (req->ring_index < cnt)
+@@ -562,6 +583,8 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
+ wrt_reg_dword(req->req_q_in, req->ring_index);
+
+ queuing_error:
++ if (rval)
++ qla_put_fw_resources(sp->qpair, &sp->iores);
+ spin_unlock_irqrestore(&qpair->qp_lock, flags);
+
+ return rval;
+@@ -614,7 +637,6 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport,
+ if (!sp)
+ return -EBUSY;
+
+- init_waitqueue_head(&sp->nvme_ls_waitq);
+ kref_init(&sp->cmd_kref);
+ spin_lock_init(&priv->cmd_lock);
+ sp->priv = priv;
+@@ -631,9 +653,8 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport,
+
+ rval = qla2x00_start_nvme_mq(sp);
+ if (rval != QLA_SUCCESS) {
+- ql_log(ql_log_warn, vha, 0x212d,
++ ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x212d,
+ "qla2x00_start_nvme_mq failed = %d\n", rval);
+- wake_up(&sp->nvme_ls_waitq);
+ sp->priv = NULL;
+ priv->sp = NULL;
+ qla2xxx_rel_qpair_sp(sp->qpair, sp);
+@@ -743,7 +764,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha)
+ ha = vha->hw;
+ tmpl = &qla_nvme_fc_transport;
+
+- WARN_ON(vha->nvme_local_port);
+
+ qla_nvme_fc_transport.max_hw_queues =
+ min((uint8_t)(qla_nvme_fc_transport.max_hw_queues),
+@@ -754,13 +774,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha)
+ pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR;
+ pinfo.port_id = vha->d_id.b24;
+
+- ql_log(ql_log_info, vha, 0xffff,
+- "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n",
+- pinfo.node_name, pinfo.port_name, pinfo.port_id);
+- qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary;
+-
+- ret = nvme_fc_register_localport(&pinfo, tmpl,
+- get_device(&ha->pdev->dev), &vha->nvme_local_port);
++ mutex_lock(&ha->vport_lock);
++ /*
++ * Check again for nvme_local_port to see if any other thread raced
++ * with this one and finished registration.
++ */
++ if (!vha->nvme_local_port) {
++ ql_log(ql_log_info, vha, 0xffff,
++ "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n",
++ pinfo.node_name, pinfo.port_name, pinfo.port_id);
++ qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary;
++
++ ret = nvme_fc_register_localport(&pinfo, tmpl,
++ get_device(&ha->pdev->dev),
++ &vha->nvme_local_port);
++ mutex_unlock(&ha->vport_lock);
++ } else {
++ mutex_unlock(&ha->vport_lock);
++ return 0;
++ }
+ if (ret) {
+ ql_log(ql_log_warn, vha, 0xffff,
+ "register_localport failed: ret=%x\n", ret);
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index 836fedcea241b..a40af9b832ab4 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -40,10 +40,11 @@ module_param(ql2xfulldump_on_mpifail, int, S_IRUGO | S_IWUSR);
+ MODULE_PARM_DESC(ql2xfulldump_on_mpifail,
+ "Set this to take full dump on MPI hang.");
+
+-int ql2xenforce_iocb_limit = 1;
++int ql2xenforce_iocb_limit = 2;
+ module_param(ql2xenforce_iocb_limit, int, S_IRUGO | S_IWUSR);
+ MODULE_PARM_DESC(ql2xenforce_iocb_limit,
+- "Enforce IOCB throttling, to avoid FW congestion. (default: 1)");
++ "Enforce IOCB throttling, to avoid FW congestion. (default: 2) "
++ "1: track usage per queue, 2: track usage per adapter");
+
+ /*
+ * CT6 CTX allocation cache
+@@ -202,12 +203,6 @@ MODULE_PARM_DESC(ql2xdbwr,
+ " 0 -- Regular doorbell.\n"
+ " 1 -- CAMRAM doorbell (faster).\n");
+
+-int ql2xtargetreset = 1;
+-module_param(ql2xtargetreset, int, S_IRUGO);
+-MODULE_PARM_DESC(ql2xtargetreset,
+- "Enable target reset."
+- "Default is 1 - use hw defaults.");
+-
+ int ql2xgffidenable;
+ module_param(ql2xgffidenable, int, S_IRUGO);
+ MODULE_PARM_DESC(ql2xgffidenable,
+@@ -339,6 +334,21 @@ MODULE_PARM_DESC(ql2xabts_wait_nvme,
+ "To wait for ABTS response on I/O timeouts for NVMe. (default: 1)");
+
+
++u32 ql2xdelay_before_pci_error_handling = 5;
++module_param(ql2xdelay_before_pci_error_handling, uint, 0644);
++MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling,
++ "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n");
++
++int ql2xrspq_follow_inptr = 1;
++module_param(ql2xrspq_follow_inptr, int, 0644);
++MODULE_PARM_DESC(ql2xrspq_follow_inptr,
++ "Follow RSP IN pointer for RSP updates for HBAs 27xx and newer (default: 1).");
++
++int ql2xrspq_follow_inptr_legacy = 1;
++module_param(ql2xrspq_follow_inptr_legacy, int, 0644);
++MODULE_PARM_DESC(ql2xrspq_follow_inptr_legacy,
++ "Follow RSP IN pointer for RSP updates for HBAs older than 27XX. (default: 1).");
++
+ static void qla2x00_clear_drv_active(struct qla_hw_data *);
+ static void qla2x00_free_device(scsi_qla_host_t *);
+ static int qla2xxx_map_queues(struct Scsi_Host *shost);
+@@ -734,7 +744,8 @@ void qla2x00_sp_compl(srb_t *sp, int res)
+ struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+ struct completion *comp = sp->comp;
+
+- sp->free(sp);
++ /* kref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ cmd->result = res;
+ CMD_SP(cmd) = NULL;
+ cmd->scsi_done(cmd);
+@@ -825,7 +836,8 @@ void qla2xxx_qpair_sp_compl(srb_t *sp, int res)
+ struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+ struct completion *comp = sp->comp;
+
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ cmd->result = res;
+ CMD_SP(cmd) = NULL;
+ cmd->scsi_done(cmd);
+@@ -925,6 +937,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
+ goto qc24_target_busy;
+
+ sp = scsi_cmd_priv(cmd);
++ /* ref: INIT */
+ qla2xxx_init_sp(sp, vha, vha->hw->base_qpair, fcport);
+
+ sp->u.scmd.cmd = cmd;
+@@ -944,7 +957,8 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
+ return 0;
+
+ qc24_host_busy_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+
+ qc24_target_busy:
+ return SCSI_MLQUEUE_TARGET_BUSY;
+@@ -1014,6 +1028,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
+ goto qc24_target_busy;
+
+ sp = scsi_cmd_priv(cmd);
++ /* ref: INIT */
+ qla2xxx_init_sp(sp, vha, qpair, fcport);
+
+ sp->u.scmd.cmd = cmd;
+@@ -1032,7 +1047,8 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
+ return 0;
+
+ qc24_host_busy_free_sp:
+- sp->free(sp);
++ /* ref: INIT */
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+
+ qc24_target_busy:
+ return SCSI_MLQUEUE_TARGET_BUSY;
+@@ -1258,6 +1274,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
+ uint32_t ratov_j;
+ struct qla_qpair *qpair;
+ unsigned long flags;
++ int fast_fail_status = SUCCESS;
+
+ if (qla2x00_isp_reg_stat(ha)) {
+ ql_log(ql_log_info, vha, 0x8042,
+@@ -1266,9 +1283,10 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
+ return FAILED;
+ }
+
++ /* Save any FAST_IO_FAIL value to return later if abort succeeds */
+ ret = fc_block_scsi_eh(cmd);
+ if (ret != 0)
+- return ret;
++ fast_fail_status = ret;
+
+ sp = scsi_cmd_priv(cmd);
+ qpair = sp->qpair;
+@@ -1276,7 +1294,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
+ vha->cmd_timeout_cnt++;
+
+ if ((sp->fcport && sp->fcport->deleted) || !qpair)
+- return SUCCESS;
++ return fast_fail_status != SUCCESS ? fast_fail_status : FAILED;
+
+ spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+ sp->comp = &comp;
+@@ -1311,7 +1329,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
+ __func__, ha->r_a_tov/10);
+ ret = FAILED;
+ } else {
+- ret = SUCCESS;
++ ret = fast_fail_status;
+ }
+ break;
+ default:
+@@ -1331,21 +1349,20 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
+ /*
+ * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED.
+ */
+-int
+-qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
+- uint64_t l, enum nexus_wait_type type)
++static int
++__qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t,
++ uint64_t l, enum nexus_wait_type type)
+ {
+ int cnt, match, status;
+ unsigned long flags;
+- struct qla_hw_data *ha = vha->hw;
+- struct req_que *req;
++ scsi_qla_host_t *vha = qpair->vha;
++ struct req_que *req = qpair->req;
+ srb_t *sp;
+ struct scsi_cmnd *cmd;
+
+ status = QLA_SUCCESS;
+
+- spin_lock_irqsave(&ha->hardware_lock, flags);
+- req = vha->req;
++ spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+ for (cnt = 1; status == QLA_SUCCESS &&
+ cnt < req->num_outstanding_cmds; cnt++) {
+ sp = req->outstanding_cmds[cnt];
+@@ -1372,15 +1389,35 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
+ if (!match)
+ continue;
+
+- spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+ status = qla2x00_eh_wait_on_command(cmd);
+- spin_lock_irqsave(&ha->hardware_lock, flags);
++ spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+ }
+- spin_unlock_irqrestore(&ha->hardware_lock, flags);
++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+
+ return status;
+ }
+
++int
++qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
++ uint64_t l, enum nexus_wait_type type)
++{
++ struct qla_qpair *qpair;
++ struct qla_hw_data *ha = vha->hw;
++ int i, status = QLA_SUCCESS;
++
++ status = __qla2x00_eh_wait_for_pending_commands(ha->base_qpair, t, l,
++ type);
++ for (i = 0; status == QLA_SUCCESS && i < ha->max_qpairs; i++) {
++ qpair = ha->queue_pair_map[i];
++ if (!qpair)
++ continue;
++ status = __qla2x00_eh_wait_for_pending_commands(qpair, t, l,
++ type);
++ }
++ return status;
++}
++
+ static char *reset_errors[] = {
+ "HBA not online",
+ "HBA not ready",
+@@ -1414,7 +1451,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
+ return err;
+
+ if (fcport->deleted)
+- return SUCCESS;
++ return FAILED;
+
+ ql_log(ql_log_info, vha, 0x8009,
+ "DEVICE RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", vha->host_no,
+@@ -1434,8 +1471,9 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
+ goto eh_reset_failed;
+ }
+ err = 3;
+- if (qla2x00_eh_wait_for_pending_commands(vha, sdev->id,
+- sdev->lun, WAIT_LUN) != QLA_SUCCESS) {
++ if (qla2x00_eh_wait_for_pending_commands(vha, fcport->d_id.b24,
++ cmd->device->lun,
++ WAIT_LUN) != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x800d,
+ "wait for pending cmds failed for cmd=%p.\n", cmd);
+ goto eh_reset_failed;
+@@ -1482,7 +1520,7 @@ qla2xxx_eh_target_reset(struct scsi_cmnd *cmd)
+ return err;
+
+ if (fcport->deleted)
+- return SUCCESS;
++ return FAILED;
+
+ ql_log(ql_log_info, vha, 0x8009,
+ "TARGET RESET ISSUED nexus=%ld:%d cmd=%p.\n", vha->host_no,
+@@ -1501,8 +1539,8 @@ qla2xxx_eh_target_reset(struct scsi_cmnd *cmd)
+ goto eh_reset_failed;
+ }
+ err = 3;
+- if (qla2x00_eh_wait_for_pending_commands(vha, sdev->id,
+- 0, WAIT_TARGET) != QLA_SUCCESS) {
++ if (qla2x00_eh_wait_for_pending_commands(vha, fcport->d_id.b24, 0,
++ WAIT_TARGET) != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x800d,
+ "wait for pending cmds failed for cmd=%p.\n", cmd);
+ goto eh_reset_failed;
+@@ -1693,27 +1731,10 @@ int
+ qla2x00_loop_reset(scsi_qla_host_t *vha)
+ {
+ int ret;
+- struct fc_port *fcport;
+ struct qla_hw_data *ha = vha->hw;
+
+- if (IS_QLAFX00(ha)) {
+- return qlafx00_loop_reset(vha);
+- }
+-
+- if (ql2xtargetreset == 1 && ha->flags.enable_target_reset) {
+- list_for_each_entry(fcport, &vha->vp_fcports, list) {
+- if (fcport->port_type != FCT_TARGET)
+- continue;
+-
+- ret = ha->isp_ops->target_reset(fcport, 0, 0);
+- if (ret != QLA_SUCCESS) {
+- ql_dbg(ql_dbg_taskm, vha, 0x802c,
+- "Bus Reset failed: Reset=%d "
+- "d_id=%x.\n", ret, fcport->d_id.b24);
+- }
+- }
+- }
+-
++ if (IS_QLAFX00(ha))
++ return QLA_SUCCESS;
+
+ if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) {
+ atomic_set(&vha->loop_state, LOOP_DOWN);
+@@ -1826,6 +1847,17 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
+ for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
+ sp = req->outstanding_cmds[cnt];
+ if (sp) {
++ /*
++ * perform lockless completion during driver unload
++ */
++ if (qla2x00_chip_is_down(vha)) {
++ req->outstanding_cmds[cnt] = NULL;
++ spin_unlock_irqrestore(qp->qp_lock_ptr, flags);
++ sp->done(sp, res);
++ spin_lock_irqsave(qp->qp_lock_ptr, flags);
++ continue;
++ }
++
+ switch (sp->cmd_type) {
+ case TYPE_SRB:
+ qla2x00_abort_srb(qp, sp, res, &flags);
+@@ -2794,6 +2826,16 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
+ return atomic_read(&vha->loop_state) == LOOP_READY;
+ }
+
++static void qla_heartbeat_work_fn(struct work_struct *work)
++{
++ struct qla_hw_data *ha = container_of(work,
++ struct qla_hw_data, heartbeat_work);
++ struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
++
++ if (!ha->flags.mbox_busy && base_vha->flags.init_done)
++ qla_no_op_mb(base_vha);
++}
++
+ static void qla2x00_iocb_work_fn(struct work_struct *work)
+ {
+ struct scsi_qla_host *vha = container_of(work,
+@@ -2930,9 +2972,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+ ha->max_exchg = FW_MAX_EXCHANGES_CNT;
+ atomic_set(&ha->num_pend_mbx_stage1, 0);
+ atomic_set(&ha->num_pend_mbx_stage2, 0);
+- atomic_set(&ha->num_pend_mbx_stage3, 0);
+ atomic_set(&ha->zio_threshold, DEFAULT_ZIO_THRESHOLD);
+ ha->last_zio_threshold = DEFAULT_ZIO_THRESHOLD;
++ INIT_LIST_HEAD(&ha->tmf_pending);
++ INIT_LIST_HEAD(&ha->tmf_active);
+
+ /* Assign ISP specific operations. */
+ if (IS_QLA2100(ha)) {
+@@ -3209,6 +3252,13 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+ host->max_id = ha->max_fibre_devices;
+ host->cmd_per_lun = 3;
+ host->unique_id = host->host_no;
++
++ if (ql2xenabledif && ql2xenabledif != 2) {
++ ql_log(ql_log_warn, base_vha, 0x302d,
++ "Invalid value for ql2xenabledif, resetting it to default (2)\n");
++ ql2xenabledif = 2;
++ }
++
+ if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
+ host->max_cmd_len = 32;
+ else
+@@ -3232,6 +3282,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+ host->transportt, sht->vendor_id);
+
+ INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
++ INIT_WORK(&ha->heartbeat_work, qla_heartbeat_work_fn);
+
+ /* Set up the irqs */
+ ret = qla2x00_request_irqs(ha, rsp);
+@@ -3441,8 +3492,6 @@ skip_dpc:
+ base_vha->flags.difdix_supported = 1;
+ ql_dbg(ql_dbg_init, base_vha, 0x00f1,
+ "Registering for DIF/DIX type 1 and 3 protection.\n");
+- if (ql2xenabledif == 1)
+- prot = SHOST_DIX_TYPE0_PROTECTION;
+ if (ql2xprotmask)
+ scsi_host_set_prot(host, ql2xprotmask);
+ else
+@@ -3532,6 +3581,7 @@ skip_dpc:
+ probe_failed:
+ qla_enode_stop(base_vha);
+ qla_edb_stop(base_vha);
++ vfree(base_vha->scan.l);
+ if (base_vha->gnl.l) {
+ dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size,
+ base_vha->gnl.l, base_vha->gnl.ldma);
+@@ -3754,8 +3804,7 @@ qla2x00_unmap_iobases(struct qla_hw_data *ha)
+ if (ha->mqiobase)
+ iounmap(ha->mqiobase);
+
+- if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) &&
+- ha->msixbase)
++ if (ha->msixbase)
+ iounmap(ha->msixbase);
+ }
+ }
+@@ -3891,13 +3940,15 @@ qla2x00_remove_one(struct pci_dev *pdev)
+ static inline void
+ qla24xx_free_purex_list(struct purex_list *list)
+ {
+- struct list_head *item, *next;
++ struct purex_item *item, *next;
+ ulong flags;
+
+ spin_lock_irqsave(&list->lock, flags);
+- list_for_each_safe(item, next, &list->head) {
+- list_del(item);
+- kfree(list_entry(item, struct purex_item, list));
++ list_for_each_entry_safe(item, next, &list->head, list) {
++ list_del(&item->list);
++ if (item == &item->vha->default_item)
++ continue;
++ kfree(item);
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+ }
+@@ -4358,7 +4409,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
+
+ /* allocate the purex dma pool */
+ ha->purex_dma_pool = dma_pool_create(name, &ha->pdev->dev,
+- MAX_PAYLOAD, 8, 0);
++ ELS_MAX_PAYLOAD, 8, 0);
+
+ if (!ha->purex_dma_pool) {
+ ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011b,
+@@ -4999,7 +5050,8 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
+ }
+ INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn);
+
+- sprintf(vha->host_str, "%s_%lu", QLA2XXX_DRIVER_NAME, vha->host_no);
++ snprintf(vha->host_str, sizeof(vha->host_str), "%s_%lu",
++ QLA2XXX_DRIVER_NAME, vha->host_no);
+ ql_dbg(ql_dbg_init, vha, 0x0041,
+ "Allocated the host=%p hw=%p vha=%p dev_name=%s",
+ vha->host, vha->hw, vha,
+@@ -5012,13 +5064,11 @@ struct qla_work_evt *
+ qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
+ {
+ struct qla_work_evt *e;
+- uint8_t bail;
+
+ if (test_bit(UNLOADING, &vha->dpc_flags))
+ return NULL;
+
+- QLA_VHA_MARK_BUSY(vha, bail);
+- if (bail)
++ if (qla_vha_mark_busy(vha))
+ return NULL;
+
+ e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC);
+@@ -5469,7 +5519,7 @@ qla2x00_do_work(struct scsi_qla_host *vha)
+ e->u.fcport.fcport, false);
+ break;
+ case QLA_EVT_SA_REPLACE:
+- qla24xx_issue_sa_replace_iocb(vha, e);
++ rc = qla24xx_issue_sa_replace_iocb(vha, e);
+ break;
+ }
+
+@@ -5532,6 +5582,11 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
+ memset(&ea, 0, sizeof(ea));
+ ea.fcport = fcport;
+ qla24xx_handle_relogin_event(vha, &ea);
++ } else if (vha->hw->current_topology ==
++ ISP_CFG_NL &&
++ IS_QLA2XXX_MIDTYPE(vha->hw)) {
++ (void)qla24xx_fcport_handle_login(vha,
++ fcport);
+ } else if (vha->hw->current_topology ==
+ ISP_CFG_NL) {
+ fcport->login_retry--;
+@@ -7035,9 +7090,12 @@ qla2x00_do_dpc(void *data)
+ }
+ }
+ loop_resync_check:
+- if (test_and_clear_bit(LOOP_RESYNC_NEEDED,
++ if (!qla2x00_reset_active(base_vha) &&
++ test_and_clear_bit(LOOP_RESYNC_NEEDED,
+ &base_vha->dpc_flags)) {
+-
++ /*
++ * Allow abort_isp to complete before moving on to scanning.
++ */
+ ql_dbg(ql_dbg_dpc, base_vha, 0x400f,
+ "Loop resync scheduled.\n");
+
+@@ -7114,17 +7172,6 @@ intr_on_check:
+ qla2x00_lip_reset(base_vha);
+ }
+
+- if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) {
+- /*
+- * if there is a mb in progress then that's
+- * enough of a check to see if fw is still ticking.
+- */
+- if (!ha->flags.mbox_busy && base_vha->flags.init_done)
+- qla_no_op_mb(base_vha);
+-
+- clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags);
+- }
+-
+ ha->dpc_active = 0;
+ end_loop:
+ set_current_state(TASK_INTERRUPTIBLE);
+@@ -7183,56 +7230,99 @@ qla2x00_rst_aen(scsi_qla_host_t *vha)
+
+ static bool qla_do_heartbeat(struct scsi_qla_host *vha)
+ {
+- u64 cmd_cnt, prev_cmd_cnt;
+- bool do_hb = false;
+ struct qla_hw_data *ha = vha->hw;
+- int i;
++ u32 cmpl_cnt;
++ u16 i;
++ bool do_heartbeat = false;
+
+- /* if cmds are still pending down in fw, then do hb */
+- if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) {
+- do_hb = true;
++ /*
++ * Allow do_heartbeat only if we don’t have any active interrupts,
++ * but there are still IOs outstanding with firmware.
++ */
++ cmpl_cnt = ha->base_qpair->cmd_completion_cnt;
++ if (cmpl_cnt == ha->base_qpair->prev_completion_cnt &&
++ cmpl_cnt != ha->base_qpair->cmd_cnt) {
++ do_heartbeat = true;
+ goto skip;
+ }
++ ha->base_qpair->prev_completion_cnt = cmpl_cnt;
+
+ for (i = 0; i < ha->max_qpairs; i++) {
+- if (ha->queue_pair_map[i] &&
+- ha->queue_pair_map[i]->cmd_cnt !=
+- ha->queue_pair_map[i]->cmd_completion_cnt) {
+- do_hb = true;
+- break;
++ if (ha->queue_pair_map[i]) {
++ cmpl_cnt = ha->queue_pair_map[i]->cmd_completion_cnt;
++ if (cmpl_cnt == ha->queue_pair_map[i]->prev_completion_cnt &&
++ cmpl_cnt != ha->queue_pair_map[i]->cmd_cnt) {
++ do_heartbeat = true;
++ break;
++ }
++ ha->queue_pair_map[i]->prev_completion_cnt = cmpl_cnt;
+ }
+ }
+
+ skip:
+- prev_cmd_cnt = ha->prev_cmd_cnt;
+- cmd_cnt = ha->base_qpair->cmd_cnt;
+- for (i = 0; i < ha->max_qpairs; i++) {
+- if (ha->queue_pair_map[i])
+- cmd_cnt += ha->queue_pair_map[i]->cmd_cnt;
+- }
+- ha->prev_cmd_cnt = cmd_cnt;
+-
+- if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50))
+- /*
+- * IOs are completing before periodic hb check.
+- * IOs seems to be running, do hb for sanity check.
+- */
+- do_hb = true;
+-
+- return do_hb;
++ return do_heartbeat;
+ }
+
+-static void qla_heart_beat(struct scsi_qla_host *vha)
++static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started)
+ {
++ struct qla_hw_data *ha = vha->hw;
++
+ if (vha->vp_idx)
+ return;
+
+ if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha))
+ return;
+
++ /*
++ * dpc thread cannot run if heartbeat is running at the same time.
++ * We also do not want to starve heartbeat task. Therefore, do
++ * heartbeat task at least once every 5 seconds.
++ */
++ if (dpc_started &&
++ time_before(jiffies, ha->last_heartbeat_run_jiffies + 5 * HZ))
++ return;
++
+ if (qla_do_heartbeat(vha)) {
+- set_bit(HEARTBEAT_CHK, &vha->dpc_flags);
+- qla2xxx_wake_dpc(vha);
++ ha->last_heartbeat_run_jiffies = jiffies;
++ queue_work(ha->wq, &ha->heartbeat_work);
++ }
++}
++
++static void qla_wind_down_chip(scsi_qla_host_t *vha)
++{
++ struct qla_hw_data *ha = vha->hw;
++
++ if (!ha->flags.eeh_busy)
++ return;
++ if (ha->pci_error_state)
++ /* system is trying to recover */
++ return;
++
++ /*
++ * Current system is not handling PCIE error. At this point, this is
++ * best effort to wind down the adapter.
++ */
++ if (time_after_eq(jiffies, ha->eeh_jif + ql2xdelay_before_pci_error_handling * HZ) &&
++ !ha->flags.eeh_flush) {
++ ql_log(ql_log_info, vha, 0x9009,
++ "PCI Error detected, attempting to reset hardware.\n");
++
++ ha->isp_ops->reset_chip(vha);
++ ha->isp_ops->disable_intrs(ha);
++
++ ha->flags.eeh_flush = EEH_FLUSH_RDY;
++ ha->eeh_jif = jiffies;
++
++ } else if (ha->flags.eeh_flush == EEH_FLUSH_RDY &&
++ time_after_eq(jiffies, ha->eeh_jif + 5 * HZ)) {
++ pci_clear_master(ha->pdev);
++
++ /* flush all command */
++ qla2x00_abort_isp_cleanup(vha);
++ ha->flags.eeh_flush = EEH_FLUSH_DONE;
++
++ ql_log(ql_log_info, vha, 0x900a,
++ "PCI Error handling complete, all IOs aborted.\n");
+ }
+ }
+
+@@ -7259,6 +7349,8 @@ qla2x00_timer(struct timer_list *t)
+ fc_port_t *fcport = NULL;
+
+ if (ha->flags.eeh_busy) {
++ qla_wind_down_chip(vha);
++
+ ql_dbg(ql_dbg_timer, vha, 0x6000,
+ "EEH = %d, restarting timer.\n",
+ ha->flags.eeh_busy);
+@@ -7354,7 +7446,7 @@ qla2x00_timer(struct timer_list *t)
+
+ /* if the loop has been down for 4 minutes, reinit adapter */
+ if (atomic_dec_and_test(&vha->loop_down_timer) != 0) {
+- if (!(vha->device_flags & DFLG_NO_CABLE)) {
++ if (!(vha->device_flags & DFLG_NO_CABLE) && !vha->vp_idx) {
+ ql_log(ql_log_warn, vha, 0x6009,
+ "Loop down - aborting ISP.\n");
+
+@@ -7424,6 +7516,8 @@ qla2x00_timer(struct timer_list *t)
+ start_dpc++;
+ }
+
++ /* borrowing w to signify dpc will run */
++ w = 0;
+ /* Schedule the DPC routine if needed */
+ if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) ||
+ test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) ||
+@@ -7456,9 +7550,10 @@ qla2x00_timer(struct timer_list *t)
+ test_bit(RELOGIN_NEEDED, &vha->dpc_flags),
+ test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags));
+ qla2xxx_wake_dpc(vha);
++ w = 1;
+ }
+
+- qla_heart_beat(vha);
++ qla_heart_beat(vha, w);
+
+ qla2x00_restart_timer(vha, WATCH_INTERVAL);
+ }
+@@ -7656,7 +7751,7 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+
+ switch (state) {
+ case pci_channel_io_normal:
+- ha->flags.eeh_busy = 0;
++ qla_pci_set_eeh_busy(vha);
+ if (ql2xmqsupport || ql2xnvmeenable) {
+ set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+@@ -7697,9 +7792,16 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev)
+ "mmio enabled\n");
+
+ ha->pci_error_state = QLA_PCI_MMIO_ENABLED;
++
+ if (IS_QLA82XX(ha))
+ return PCI_ERS_RESULT_RECOVERED;
+
++ if (qla2x00_isp_reg_stat(ha)) {
++ ql_log(ql_log_info, base_vha, 0x803f,
++ "During mmio enabled, PCI/Register disconnect still detected.\n");
++ goto out;
++ }
++
+ spin_lock_irqsave(&ha->hardware_lock, flags);
+ if (IS_QLA2100(ha) || IS_QLA2200(ha)){
+ stat = rd_reg_word(&reg->hccr);
+@@ -7721,6 +7823,7 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev)
+ "RISC paused -- mmio_enabled, Dumping firmware.\n");
+ qla2xxx_dump_fw(base_vha);
+ }
++out:
+ /* set PCI_ERS_RESULT_NEED_RESET to trigger call to qla2xxx_pci_slot_reset */
+ ql_dbg(ql_dbg_aer, base_vha, 0x600d,
+ "mmio enabled returning.\n");
+@@ -7828,6 +7931,9 @@ void qla_pci_set_eeh_busy(struct scsi_qla_host *vha)
+
+ spin_lock_irqsave(&base_vha->work_lock, flags);
+ if (!ha->flags.eeh_busy) {
++ ha->eeh_jif = jiffies;
++ ha->flags.eeh_flush = 0;
++
+ ha->flags.eeh_busy = 1;
+ do_cleanup = true;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
+index a0aeba69513d4..c092a6b1ced4f 100644
+--- a/drivers/scsi/qla2xxx/qla_sup.c
++++ b/drivers/scsi/qla2xxx/qla_sup.c
+@@ -844,7 +844,7 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr)
+ ha->flt_region_nvram = start;
+ break;
+ case FLT_REG_IMG_PRI_27XX:
+- if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
++ if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
+ ha->flt_region_img_status_pri = start;
+ break;
+ case FLT_REG_IMG_SEC_27XX:
+@@ -1356,7 +1356,7 @@ next:
+ flash_data_addr(ha, faddr), le32_to_cpu(*dwptr));
+ if (ret) {
+ ql_dbg(ql_dbg_user, vha, 0x7006,
+- "Failed slopw write %x (%x)\n", faddr, *dwptr);
++ "Failed slow write %x (%x)\n", faddr, *dwptr);
+ break;
+ }
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
+index 7d8242c120fc7..2ce041fdec755 100644
+--- a/drivers/scsi/qla2xxx/qla_target.c
++++ b/drivers/scsi/qla2xxx/qla_target.c
+@@ -620,7 +620,7 @@ static void qla2x00_async_nack_sp_done(srb_t *sp, int res)
+ }
+ spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+- sp->free(sp);
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ }
+
+ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
+@@ -656,12 +656,10 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
+
+ sp->type = type;
+ sp->name = "nack";
+-
+- sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+- qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
++ qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2,
++ qla2x00_async_nack_sp_done);
+
+ sp->u.iocb_cmd.u.nack.ntfy = ntfy;
+- sp->done = qla2x00_async_nack_sp_done;
+
+ ql_dbg(ql_dbg_disc, vha, 0x20f4,
+ "Async-%s %8phC hndl %x %s\n",
+@@ -674,7 +672,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
+ return rval;
+
+ done_free_sp:
+- sp->free(sp);
++ kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ done:
+ fcport->flags &= ~FCF_ASYNC_SENT;
+ return rval;
+@@ -1003,6 +1001,7 @@ void qlt_free_session_done(struct work_struct *work)
+ "%s bypassing release_all_sadb\n",
+ __func__);
+ }
++ qla_edif_clear_appdata(vha, sess);
+ qla_edif_sess_down(vha, sess);
+ }
+ qla2x00_mark_device_lost(vha, sess, 0);
+@@ -1090,10 +1089,6 @@ void qlt_free_session_done(struct work_struct *work)
+ (struct imm_ntfy_from_isp *)sess->iocb, SRB_NACK_LOGO);
+ }
+
+- spin_lock_irqsave(&vha->work_lock, flags);
+- sess->flags &= ~FCF_ASYNC_SENT;
+- spin_unlock_irqrestore(&vha->work_lock, flags);
+-
+ spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+ if (sess->se_sess) {
+ sess->se_sess = NULL;
+@@ -1103,7 +1098,6 @@ void qlt_free_session_done(struct work_struct *work)
+
+ qla2x00_set_fcport_disc_state(sess, DSC_DELETED);
+ sess->fw_login_state = DSC_LS_PORT_UNAVAIL;
+- sess->deleted = QLA_SESS_DELETED;
+
+ if (sess->login_succ && !IS_SW_RESV_ADDR(sess->d_id)) {
+ vha->fcport_count--;
+@@ -1155,10 +1149,15 @@ void qlt_free_session_done(struct work_struct *work)
+
+ sess->explicit_logout = 0;
+ spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+- sess->free_pending = 0;
+
+ qla2x00_dfs_remove_rport(vha, sess);
+
++ spin_lock_irqsave(&vha->work_lock, flags);
++ sess->flags &= ~FCF_ASYNC_SENT;
++ sess->deleted = QLA_SESS_DELETED;
++ sess->free_pending = 0;
++ spin_unlock_irqrestore(&vha->work_lock, flags);
++
+ ql_dbg(ql_dbg_disc, vha, 0xf001,
+ "Unregistration of sess %p %8phC finished fcp_cnt %d\n",
+ sess, sess->port_name, vha->fcport_count);
+@@ -1207,12 +1206,12 @@ void qlt_unreg_sess(struct fc_port *sess)
+ * management from being sent.
+ */
+ sess->flags |= FCF_ASYNC_SENT;
++ sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
+ spin_unlock_irqrestore(&sess->vha->work_lock, flags);
+
+ if (sess->se_sess)
+ vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess);
+
+- sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
+ qla2x00_set_fcport_disc_state(sess, DSC_DELETE_PEND);
+ sess->last_rscn_gen = sess->rscn_gen;
+ sess->last_login_gen = sess->login_gen;
+@@ -2167,8 +2166,10 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
+
+ abort_cmd = ha->tgt.tgt_ops->find_cmd_by_tag(sess,
+ le32_to_cpu(abts->exchange_addr_to_abort));
+- if (!abort_cmd)
++ if (!abort_cmd) {
++ mempool_free(mcmd, qla_tgt_mgmt_cmd_mempool);
+ return -EIO;
++ }
+ mcmd->unpacked_lun = abort_cmd->se_cmd.orig_fe_lun;
+
+ if (abort_cmd->qpair) {
+@@ -3319,6 +3320,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
+ "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n",
+ vha->flags.online, qla2x00_reset_active(vha),
+ cmd->reset_count, qpair->chip_reset);
++ res = 0;
+ goto out_unmap_unlock;
+ }
+
+@@ -3837,6 +3839,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
+
+ spin_lock_irqsave(&cmd->cmd_lock, flags);
+ if (cmd->aborted) {
++ if (cmd->sg_mapped)
++ qlt_unmap_sg(vha, cmd);
++
+ spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+ /*
+ * It's normal to see 2 calls in this path:
+@@ -4812,7 +4817,7 @@ static int qlt_handle_login(struct scsi_qla_host *vha,
+ }
+
+ if (vha->hw->flags.edif_enabled) {
+- if (!(vha->e_dbell.db_flags & EDB_ACTIVE)) {
++ if (DBELL_INACTIVE(vha)) {
+ ql_dbg(ql_dbg_disc, vha, 0xffff,
+ "%s %d Term INOT due to app not started lid=%d, NportID %06X ",
+ __func__, __LINE__, loop_id, port_id.b24);
+@@ -6958,14 +6963,8 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha)
+
+ if (ha->flags.msix_enabled) {
+ if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+- if (IS_QLA2071(ha)) {
+- /* 4 ports Baker: Enable Interrupt Handshake */
+- icb->msix_atio = 0;
+- icb->firmware_options_2 |= cpu_to_le32(BIT_26);
+- } else {
+- icb->msix_atio = cpu_to_le16(msix->entry);
+- icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
+- }
++ icb->msix_atio = cpu_to_le16(msix->entry);
++ icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
+ ql_dbg(ql_dbg_init, vha, 0xf072,
+ "Registering ICB vector 0x%x for atio que.\n",
+ msix->entry);
+@@ -7220,8 +7219,7 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
+ if (!QLA_TGT_MODE_ENABLED())
+ return;
+
+- if ((ql2xenablemsix == 0) || IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
+- IS_QLA28XX(ha)) {
++ if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+ ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in;
+ ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out;
+ } else {
+diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
+index 156b950ca7e72..aa83434448377 100644
+--- a/drivers/scsi/qla2xxx/qla_target.h
++++ b/drivers/scsi/qla2xxx/qla_target.h
+@@ -1080,8 +1080,6 @@ extern void qlt_81xx_config_nvram_stage2(struct scsi_qla_host *,
+ struct init_cb_81xx *);
+ extern void qlt_81xx_config_nvram_stage1(struct scsi_qla_host *,
+ struct nvram_81xx *);
+-extern int qlt_24xx_process_response_error(struct scsi_qla_host *,
+- struct sts_entry_24xx *);
+ extern void qlt_modify_vp_config(struct scsi_qla_host *,
+ struct vp_config_entry_24xx *);
+ extern void qlt_probe_one_stage1(struct scsi_qla_host *, struct qla_hw_data *);
+diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c
+index 26c13a953b975..b0a74b036cf4b 100644
+--- a/drivers/scsi/qla2xxx/qla_tmpl.c
++++ b/drivers/scsi/qla2xxx/qla_tmpl.c
+@@ -435,8 +435,13 @@ qla27xx_fwdt_entry_t266(struct scsi_qla_host *vha,
+ {
+ ql_dbg(ql_dbg_misc, vha, 0xd20a,
+ "%s: reset risc [%lx]\n", __func__, *len);
+- if (buf)
+- WARN_ON_ONCE(qla24xx_soft_reset(vha->hw) != QLA_SUCCESS);
++ if (buf) {
++ if (qla24xx_soft_reset(vha->hw) != QLA_SUCCESS) {
++ ql_dbg(ql_dbg_async, vha, 0x5001,
++ "%s: unable to soft reset\n", __func__);
++ return INVALID_ENTRY;
++ }
++ }
+
+ return qla27xx_next_entry(ent);
+ }
+diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
+index f1ea65c6e5f5d..dc466a364fb1f 100644
+--- a/drivers/scsi/qla4xxx/ql4_os.c
++++ b/drivers/scsi/qla4xxx/ql4_os.c
+@@ -968,6 +968,11 @@ static int qla4xxx_set_chap_entry(struct Scsi_Host *shost, void *data, int len)
+ memset(&chap_rec, 0, sizeof(chap_rec));
+
+ nla_for_each_attr(attr, data, len, rem) {
++ if (nla_len(attr) < sizeof(*param_info)) {
++ rc = -EINVAL;
++ goto exit_set_chap;
++ }
++
+ param_info = nla_data(attr);
+
+ switch (param_info->param) {
+@@ -2750,6 +2755,11 @@ qla4xxx_iface_set_param(struct Scsi_Host *shost, void *data, uint32_t len)
+ }
+
+ nla_for_each_attr(attr, data, len, rem) {
++ if (nla_len(attr) < sizeof(*iface_param)) {
++ rval = -EINVAL;
++ goto exit_init_fw_cb;
++ }
++
+ iface_param = nla_data(attr);
+
+ if (iface_param->param_type == ISCSI_NET_PARAM) {
+@@ -8105,6 +8115,11 @@ qla4xxx_sysfs_ddb_set_param(struct iscsi_bus_flash_session *fnode_sess,
+
+ memset((void *)&chap_tbl, 0, sizeof(chap_tbl));
+ nla_for_each_attr(attr, data, len, rem) {
++ if (nla_len(attr) < sizeof(*fnode_param)) {
++ rc = -EINVAL;
++ goto exit_set_param;
++ }
++
+ fnode_param = nla_data(attr);
+
+ switch (fnode_param->param) {
+diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c
+index 898a0bdf8df67..95a86e0dfd77a 100644
+--- a/drivers/scsi/raid_class.c
++++ b/drivers/scsi/raid_class.c
+@@ -209,53 +209,6 @@ raid_attr_ro_state(level);
+ raid_attr_ro_fn(resync);
+ raid_attr_ro_state_fn(state);
+
+-static void raid_component_release(struct device *dev)
+-{
+- struct raid_component *rc =
+- container_of(dev, struct raid_component, dev);
+- dev_printk(KERN_ERR, rc->dev.parent, "COMPONENT RELEASE\n");
+- put_device(rc->dev.parent);
+- kfree(rc);
+-}
+-
+-int raid_component_add(struct raid_template *r,struct device *raid_dev,
+- struct device *component_dev)
+-{
+- struct device *cdev =
+- attribute_container_find_class_device(&r->raid_attrs.ac,
+- raid_dev);
+- struct raid_component *rc;
+- struct raid_data *rd = dev_get_drvdata(cdev);
+- int err;
+-
+- rc = kzalloc(sizeof(*rc), GFP_KERNEL);
+- if (!rc)
+- return -ENOMEM;
+-
+- INIT_LIST_HEAD(&rc->node);
+- device_initialize(&rc->dev);
+- rc->dev.release = raid_component_release;
+- rc->dev.parent = get_device(component_dev);
+- rc->num = rd->component_count++;
+-
+- dev_set_name(&rc->dev, "component-%d", rc->num);
+- list_add_tail(&rc->node, &rd->component_list);
+- rc->dev.class = &raid_class.class;
+- err = device_add(&rc->dev);
+- if (err)
+- goto err_out;
+-
+- return 0;
+-
+-err_out:
+- list_del(&rc->node);
+- rd->component_count--;
+- put_device(component_dev);
+- kfree(rc);
+- return err;
+-}
+-EXPORT_SYMBOL(raid_component_add);
+-
+ struct raid_template *
+ raid_class_attach(struct raid_function_template *ft)
+ {
+diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
+index 291ecc33b1fe6..a499a57150720 100644
+--- a/drivers/scsi/scsi.c
++++ b/drivers/scsi/scsi.c
+@@ -209,11 +209,11 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
+
+
+ /*
+- * 1024 is big enough for saturating the fast scsi LUN now
++ * 1024 is big enough for saturating fast SCSI LUNs.
+ */
+ int scsi_device_max_queue_depth(struct scsi_device *sdev)
+ {
+- return max_t(int, sdev->host->can_queue, 1024);
++ return min_t(int, sdev->host->can_queue, 1024);
+ }
+
+ /**
+@@ -323,11 +323,18 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer,
+ if (result)
+ return -EIO;
+
+- /* Sanity check that we got the page back that we asked for */
++ /*
++ * Sanity check that we got the page back that we asked for and that
++ * the page size is not 0.
++ */
+ if (buffer[1] != page)
+ return -EIO;
+
+- return get_unaligned_be16(&buffer[2]) + 4;
++ result = get_unaligned_be16(&buffer[2]);
++ if (!result)
++ return -EIO;
++
++ return result + 4;
+ }
+
+ /**
+diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
+index 66f507469a31a..591df0a91057e 100644
+--- a/drivers/scsi/scsi_debug.c
++++ b/drivers/scsi/scsi_debug.c
+@@ -1189,7 +1189,7 @@ static int p_fill_from_dev_buffer(struct scsi_cmnd *scp, const void *arr,
+ __func__, off_dst, scsi_bufflen(scp), act_len,
+ scsi_get_resid(scp));
+ n = scsi_bufflen(scp) - (off_dst + act_len);
+- scsi_set_resid(scp, min_t(int, scsi_get_resid(scp), n));
++ scsi_set_resid(scp, min_t(u32, scsi_get_resid(scp), n));
+ return 0;
+ }
+
+@@ -1562,7 +1562,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
+ unsigned char pq_pdt;
+ unsigned char *arr;
+ unsigned char *cmd = scp->cmnd;
+- int alloc_len, n, ret;
++ u32 alloc_len, n;
++ int ret;
+ bool have_wlun, is_disk, is_zbc, is_disk_zbc;
+
+ alloc_len = get_unaligned_be16(cmd + 3);
+@@ -1585,7 +1586,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
+ kfree(arr);
+ return check_condition_result;
+ } else if (0x1 & cmd[1]) { /* EVPD bit set */
+- int lu_id_num, port_group_id, target_dev_id, len;
++ int lu_id_num, port_group_id, target_dev_id;
++ u32 len;
+ char lu_id_str[6];
+ int host_no = devip->sdbg_host->shost->host_no;
+
+@@ -1676,9 +1678,9 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
+ kfree(arr);
+ return check_condition_result;
+ }
+- len = min(get_unaligned_be16(arr + 2) + 4, alloc_len);
++ len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len);
+ ret = fill_from_dev_buffer(scp, arr,
+- min(len, SDEBUG_MAX_INQ_ARR_SZ));
++ min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ));
+ kfree(arr);
+ return ret;
+ }
+@@ -1714,7 +1716,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
+ }
+ put_unaligned_be16(0x2100, arr + n); /* SPL-4 no version claimed */
+ ret = fill_from_dev_buffer(scp, arr,
+- min_t(int, alloc_len, SDEBUG_LONG_INQ_SZ));
++ min_t(u32, alloc_len, SDEBUG_LONG_INQ_SZ));
+ kfree(arr);
+ return ret;
+ }
+@@ -1729,8 +1731,8 @@ static int resp_requests(struct scsi_cmnd *scp,
+ unsigned char *cmd = scp->cmnd;
+ unsigned char arr[SCSI_SENSE_BUFFERSIZE]; /* assume >= 18 bytes */
+ bool dsense = !!(cmd[1] & 1);
+- int alloc_len = cmd[4];
+- int len = 18;
++ u32 alloc_len = cmd[4];
++ u32 len = 18;
+ int stopped_state = atomic_read(&devip->stopped);
+
+ memset(arr, 0, sizeof(arr));
+@@ -1774,7 +1776,7 @@ static int resp_requests(struct scsi_cmnd *scp,
+ arr[7] = 0xa;
+ }
+ }
+- return fill_from_dev_buffer(scp, arr, min_t(int, len, alloc_len));
++ return fill_from_dev_buffer(scp, arr, min_t(u32, len, alloc_len));
+ }
+
+ static int resp_start_stop(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
+@@ -1856,7 +1858,7 @@ static int resp_readcap16(struct scsi_cmnd *scp,
+ {
+ unsigned char *cmd = scp->cmnd;
+ unsigned char arr[SDEBUG_READCAP16_ARR_SZ];
+- int alloc_len;
++ u32 alloc_len;
+
+ alloc_len = get_unaligned_be32(cmd + 10);
+ /* following just in case virtual_gb changed */
+@@ -1877,6 +1879,13 @@ static int resp_readcap16(struct scsi_cmnd *scp,
+ arr[14] |= 0x40;
+ }
+
++ /*
++ * Since the scsi_debug READ CAPACITY implementation always reports the
++ * total disk capacity, set RC BASIS = 1 for host-managed ZBC devices.
++ */
++ if (devip->zmodel == BLK_ZONED_HM)
++ arr[12] |= 1 << 4;
++
+ arr[15] = sdebug_lowest_aligned & 0xff;
+
+ if (have_dif_prot) {
+@@ -1885,7 +1894,7 @@ static int resp_readcap16(struct scsi_cmnd *scp,
+ }
+
+ return fill_from_dev_buffer(scp, arr,
+- min_t(int, alloc_len, SDEBUG_READCAP16_ARR_SZ));
++ min_t(u32, alloc_len, SDEBUG_READCAP16_ARR_SZ));
+ }
+
+ #define SDEBUG_MAX_TGTPGS_ARR_SZ 1412
+@@ -1896,8 +1905,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp,
+ unsigned char *cmd = scp->cmnd;
+ unsigned char *arr;
+ int host_no = devip->sdbg_host->shost->host_no;
+- int n, ret, alen, rlen;
+ int port_group_a, port_group_b, port_a, port_b;
++ u32 alen, n, rlen;
++ int ret;
+
+ alen = get_unaligned_be32(cmd + 6);
+ arr = kzalloc(SDEBUG_MAX_TGTPGS_ARR_SZ, GFP_ATOMIC);
+@@ -1959,9 +1969,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp,
+ * - The constructed command length
+ * - The maximum array size
+ */
+- rlen = min_t(int, alen, n);
++ rlen = min(alen, n);
+ ret = fill_from_dev_buffer(scp, arr,
+- min_t(int, rlen, SDEBUG_MAX_TGTPGS_ARR_SZ));
++ min_t(u32, rlen, SDEBUG_MAX_TGTPGS_ARR_SZ));
+ kfree(arr);
+ return ret;
+ }
+@@ -2311,7 +2321,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
+ {
+ int pcontrol, pcode, subpcode, bd_len;
+ unsigned char dev_spec;
+- int alloc_len, offset, len, target_dev_id;
++ u32 alloc_len, offset, len;
++ int target_dev_id;
+ int target = scp->device->id;
+ unsigned char *ap;
+ unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
+@@ -2467,7 +2478,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
+ arr[0] = offset - 1;
+ else
+ put_unaligned_be16((offset - 2), arr + 0);
+- return fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, offset));
++ return fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, offset));
+ }
+
+ #define SDEBUG_MAX_MSELECT_SZ 512
+@@ -2498,11 +2509,11 @@ static int resp_mode_select(struct scsi_cmnd *scp,
+ __func__, param_len, res);
+ md_len = mselect6 ? (arr[0] + 1) : (get_unaligned_be16(arr + 0) + 2);
+ bd_len = mselect6 ? arr[3] : get_unaligned_be16(arr + 6);
+- if (md_len > 2) {
++ off = bd_len + (mselect6 ? 4 : 8);
++ if (md_len > 2 || off >= res) {
+ mk_sense_invalid_fld(scp, SDEB_IN_DATA, 0, -1);
+ return check_condition_result;
+ }
+- off = bd_len + (mselect6 ? 4 : 8);
+ mpage = arr[off] & 0x3f;
+ ps = !!(arr[off] & 0x80);
+ if (ps) {
+@@ -2582,7 +2593,8 @@ static int resp_ie_l_pg(unsigned char *arr)
+ static int resp_log_sense(struct scsi_cmnd *scp,
+ struct sdebug_dev_info *devip)
+ {
+- int ppc, sp, pcode, subpcode, alloc_len, len, n;
++ int ppc, sp, pcode, subpcode;
++ u32 alloc_len, len, n;
+ unsigned char arr[SDEBUG_MAX_LSENSE_SZ];
+ unsigned char *cmd = scp->cmnd;
+
+@@ -2652,9 +2664,9 @@ static int resp_log_sense(struct scsi_cmnd *scp,
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
+ return check_condition_result;
+ }
+- len = min_t(int, get_unaligned_be16(arr + 2) + 4, alloc_len);
++ len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len);
+ return fill_from_dev_buffer(scp, arr,
+- min_t(int, len, SDEBUG_MAX_INQ_ARR_SZ));
++ min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ));
+ }
+
+ static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip)
+@@ -2742,6 +2754,24 @@ static void zbc_open_zone(struct sdebug_dev_info *devip,
+ }
+ }
+
++static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
++ struct sdeb_zone_state *zsp)
++{
++ switch (zsp->z_cond) {
++ case ZC2_IMPLICIT_OPEN:
++ devip->nr_imp_open--;
++ break;
++ case ZC3_EXPLICIT_OPEN:
++ devip->nr_exp_open--;
++ break;
++ default:
++ WARN_ONCE(true, "Invalid zone %llu condition %x\n",
++ zsp->z_start, zsp->z_cond);
++ break;
++ }
++ zsp->z_cond = ZC5_FULL;
++}
++
+ static void zbc_inc_wp(struct sdebug_dev_info *devip,
+ unsigned long long lba, unsigned int num)
+ {
+@@ -2754,7 +2784,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
+ if (zsp->z_type == ZBC_ZONE_TYPE_SWR) {
+ zsp->z_wp += num;
+ if (zsp->z_wp >= zend)
+- zsp->z_cond = ZC5_FULL;
++ zbc_set_zone_full(devip, zsp);
+ return;
+ }
+
+@@ -2773,7 +2803,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
+ n = num;
+ }
+ if (zsp->z_wp >= zend)
+- zsp->z_cond = ZC5_FULL;
++ zbc_set_zone_full(devip, zsp);
+
+ num -= n;
+ lba += n;
+@@ -3610,7 +3640,7 @@ static int resp_write_scat(struct scsi_cmnd *scp,
+ mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+ return illegal_condition_result;
+ }
+- lrdp = kzalloc(lbdof_blen, GFP_ATOMIC);
++ lrdp = kzalloc(lbdof_blen, GFP_ATOMIC | __GFP_NOWARN);
+ if (lrdp == NULL)
+ return SCSI_MLQUEUE_HOST_BUSY;
+ if (sdebug_verbose)
+@@ -4258,13 +4288,15 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
+ mk_sense_invalid_opcode(scp);
+ return check_condition_result;
+ }
++ if (vnum == 0)
++ return 0; /* not an error */
+ a_num = is_bytchk3 ? 1 : vnum;
+ /* Treat following check like one for read (i.e. no write) access */
+ ret = check_device_access_params(scp, lba, a_num, false);
+ if (ret)
+ return ret;
+
+- arr = kcalloc(lb_size, vnum, GFP_ATOMIC);
++ arr = kcalloc(lb_size, vnum, GFP_ATOMIC | __GFP_NOWARN);
+ if (!arr) {
+ mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
+ INSUFF_RES_ASCQ);
+@@ -4321,6 +4353,8 @@ static int resp_report_zones(struct scsi_cmnd *scp,
+ }
+ zs_lba = get_unaligned_be64(cmd + 2);
+ alloc_len = get_unaligned_be32(cmd + 10);
++ if (alloc_len == 0)
++ return 0; /* not an error */
+ rep_opts = cmd[14] & 0x3f;
+ partial = cmd[14] & 0x80;
+
+@@ -4333,7 +4367,7 @@ static int resp_report_zones(struct scsi_cmnd *scp,
+ rep_max_zones = min((alloc_len - 64) >> ilog2(RZONES_DESC_HD),
+ max_zones);
+
+- arr = kcalloc(RZONES_DESC_HD, alloc_len, GFP_ATOMIC);
++ arr = kzalloc(alloc_len, GFP_ATOMIC | __GFP_NOWARN);
+ if (!arr) {
+ mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
+ INSUFF_RES_ASCQ);
+@@ -4425,7 +4459,7 @@ static int resp_report_zones(struct scsi_cmnd *scp,
+ put_unaligned_be64(sdebug_capacity - 1, arr + 8);
+
+ rep_len = (unsigned long)desc - (unsigned long)arr;
+- ret = fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, rep_len));
++ ret = fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, rep_len));
+
+ fini:
+ read_unlock(macc_lckp);
+@@ -4648,6 +4682,7 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip,
+ struct sdeb_zone_state *zsp)
+ {
+ enum sdebug_z_cond zc;
++ struct sdeb_store_info *sip = devip2sip(devip, false);
+
+ if (zbc_zone_is_conv(zsp))
+ return;
+@@ -4659,6 +4694,10 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip,
+ if (zsp->z_cond == ZC4_CLOSED)
+ devip->nr_closed--;
+
++ if (zsp->z_wp > zsp->z_start)
++ memset(sip->storep + zsp->z_start * sdebug_sector_size, 0,
++ (zsp->z_wp - zsp->z_start) * sdebug_sector_size);
++
+ zsp->z_non_seq_resource = false;
+ zsp->z_wp = zsp->z_start;
+ zsp->z_cond = ZC1_EMPTY;
+@@ -7100,8 +7139,12 @@ static int sdebug_add_host_helper(int per_host_idx)
+ dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts);
+
+ error = device_register(&sdbg_host->dev);
+- if (error)
++ if (error) {
++ spin_lock(&sdebug_host_list_lock);
++ list_del(&sdbg_host->host_list);
++ spin_unlock(&sdebug_host_list_lock);
+ goto clean;
++ }
+
+ ++sdebug_num_hosts;
+ return 0;
+@@ -7113,7 +7156,10 @@ clean:
+ kfree(sdbg_devinfo->zstate);
+ kfree(sdbg_devinfo);
+ }
+- kfree(sdbg_host);
++ if (sdbg_host->dev.release)
++ put_device(&sdbg_host->dev);
++ else
++ kfree(sdbg_host);
+ pr_warn("%s: failed, errno=%d\n", __func__, -error);
+ return error;
+ }
+diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c
+index d9109771f274d..db8517f1a485a 100644
+--- a/drivers/scsi/scsi_debugfs.c
++++ b/drivers/scsi/scsi_debugfs.c
+@@ -9,6 +9,7 @@
+ static const char *const scsi_cmd_flags[] = {
+ SCSI_CMD_FLAG_NAME(TAGGED),
+ SCSI_CMD_FLAG_NAME(INITIALIZED),
++ SCSI_CMD_FLAG_NAME(LAST),
+ };
+ #undef SCSI_CMD_FLAG_NAME
+
+diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
+index c7080454aea99..bd110a93d0472 100644
+--- a/drivers/scsi/scsi_devinfo.c
++++ b/drivers/scsi/scsi_devinfo.c
+@@ -233,6 +233,7 @@ static struct {
+ {"SGI", "RAID5", "*", BLIST_SPARSELUN},
+ {"SGI", "TP9100", "*", BLIST_REPORTLUN2},
+ {"SGI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
++ {"SKhynix", "H28U74301AMR", NULL, BLIST_SKIP_VPD_PAGES},
+ {"IBM", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+ {"SUN", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+ {"DELL", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
+index b6c86cce57bfa..dd9f5778f687d 100644
+--- a/drivers/scsi/scsi_error.c
++++ b/drivers/scsi/scsi_error.c
+@@ -135,6 +135,23 @@ static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd)
+ return true;
+ }
+
++static void scsi_eh_complete_abort(struct scsi_cmnd *scmd, struct Scsi_Host *shost)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(shost->host_lock, flags);
++ list_del_init(&scmd->eh_entry);
++ /*
++ * If the abort succeeds, and there is no further
++ * EH action, clear the ->last_reset time.
++ */
++ if (list_empty(&shost->eh_abort_list) &&
++ list_empty(&shost->eh_cmd_q))
++ if (shost->eh_deadline != -1)
++ shost->last_reset = 0;
++ spin_unlock_irqrestore(shost->host_lock, flags);
++}
++
+ /**
+ * scmd_eh_abort_handler - Handle command aborts
+ * @work: command to be aborted.
+@@ -152,6 +169,7 @@ scmd_eh_abort_handler(struct work_struct *work)
+ container_of(work, struct scsi_cmnd, abort_work.work);
+ struct scsi_device *sdev = scmd->device;
+ enum scsi_disposition rtn;
++ unsigned long flags;
+
+ if (scsi_host_eh_past_deadline(sdev->host)) {
+ SCSI_LOG_ERROR_RECOVERY(3,
+@@ -175,12 +193,14 @@ scmd_eh_abort_handler(struct work_struct *work)
+ SCSI_LOG_ERROR_RECOVERY(3,
+ scmd_printk(KERN_WARNING, scmd,
+ "retry aborted command\n"));
++ scsi_eh_complete_abort(scmd, sdev->host);
+ scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
+ return;
+ } else {
+ SCSI_LOG_ERROR_RECOVERY(3,
+ scmd_printk(KERN_WARNING, scmd,
+ "finish aborted command\n"));
++ scsi_eh_complete_abort(scmd, sdev->host);
+ scsi_finish_command(scmd);
+ return;
+ }
+@@ -193,6 +213,9 @@ scmd_eh_abort_handler(struct work_struct *work)
+ }
+ }
+
++ spin_lock_irqsave(sdev->host->host_lock, flags);
++ list_del_init(&scmd->eh_entry);
++ spin_unlock_irqrestore(sdev->host->host_lock, flags);
+ scsi_eh_scmd_add(scmd);
+ }
+
+@@ -223,6 +246,8 @@ scsi_abort_command(struct scsi_cmnd *scmd)
+ spin_lock_irqsave(shost->host_lock, flags);
+ if (shost->eh_deadline != -1 && !shost->last_reset)
+ shost->last_reset = jiffies;
++ BUG_ON(!list_empty(&scmd->eh_entry));
++ list_add_tail(&scmd->eh_entry, &shost->eh_abort_list);
+ spin_unlock_irqrestore(shost->host_lock, flags);
+
+ scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
+@@ -318,19 +343,11 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
+
+ if (rtn == BLK_EH_DONE) {
+ /*
+- * Set the command to complete first in order to prevent a real
+- * completion from releasing the command while error handling
+- * is using it. If the command was already completed, then the
+- * lower level driver beat the timeout handler, and it is safe
+- * to return without escalating error recovery.
+- *
+- * If timeout handling lost the race to a real completion, the
+- * block layer may ignore that due to a fake timeout injection,
+- * so return RESET_TIMER to allow error handling another shot
+- * at this command.
++ * If scsi_done() has already set SCMD_STATE_COMPLETE, do not
++ * modify *scmd.
+ */
+ if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
+- return BLK_EH_RESET_TIMER;
++ return BLK_EH_DONE;
+ if (scsi_abort_command(scmd) != SUCCESS) {
+ set_host_byte(scmd, DID_TIME_OUT);
+ scsi_eh_scmd_add(scmd);
+@@ -460,8 +477,13 @@ static void scsi_report_sense(struct scsi_device *sdev,
+
+ if (sshdr->asc == 0x29) {
+ evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED;
+- sdev_printk(KERN_WARNING, sdev,
+- "Power-on or device reset occurred\n");
++ /*
++ * Do not print message if it is an expected side-effect
++ * of runtime PM.
++ */
++ if (!sdev->silence_suspend)
++ sdev_printk(KERN_WARNING, sdev,
++ "Power-on or device reset occurred\n");
+ }
+
+ if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) {
+diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
+index 6ff2207bd45a0..6e2f82152b4a1 100644
+--- a/drivers/scsi/scsi_ioctl.c
++++ b/drivers/scsi/scsi_ioctl.c
+@@ -347,6 +347,8 @@ static int scsi_fill_sghdr_rq(struct scsi_device *sdev, struct request *rq,
+ {
+ struct scsi_request *req = scsi_req(rq);
+
++ if (hdr->cmd_len < 6)
++ return -EMSGSIZE;
+ if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len))
+ return -EFAULT;
+ if (!scsi_cmd_allowed(req->cmd, mode))
+@@ -455,7 +457,7 @@ static int sg_io(struct scsi_device *sdev, struct gendisk *disk,
+ goto out_free_cdb;
+
+ ret = 0;
+- if (hdr->iovec_count) {
++ if (hdr->iovec_count && hdr->dxfer_len) {
+ struct iov_iter i;
+ struct iovec *iov = NULL;
+
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index 572673873ddf8..5525e6ffee537 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -1143,6 +1143,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
+ cmd->sense_buffer = buf;
+ cmd->prot_sdb = prot;
+ cmd->flags = flags;
++ INIT_LIST_HEAD(&cmd->eh_entry);
+ INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
+ cmd->jiffies_at_alloc = jiffies_at_alloc;
+ cmd->retries = retries;
+@@ -1174,8 +1175,6 @@ static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev,
+ }
+
+ cmd->cmd_len = scsi_req(req)->cmd_len;
+- if (cmd->cmd_len == 0)
+- cmd->cmd_len = scsi_command_size(cmd->cmnd);
+ cmd->cmnd = scsi_req(req)->cmd;
+ cmd->transfersize = blk_rq_bytes(req);
+ cmd->allowed = scsi_req(req)->retries;
+@@ -1477,6 +1476,7 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
+ */
+ SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
+ "queuecommand : device blocked\n"));
++ atomic_dec(&cmd->device->iorequest_cnt);
+ return SCSI_MLQUEUE_DEVICE_BUSY;
+ }
+
+@@ -1509,6 +1509,7 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
+ trace_scsi_dispatch_cmd_start(cmd);
+ rtn = host->hostt->queuecommand(host, cmd);
+ if (rtn) {
++ atomic_dec(&cmd->device->iorequest_cnt);
+ trace_scsi_dispatch_cmd_error(cmd, rtn);
+ if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
+ rtn != SCSI_MLQUEUE_TARGET_BUSY)
+@@ -2075,7 +2076,7 @@ EXPORT_SYMBOL_GPL(scsi_mode_select);
+ /**
+ * scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary.
+ * @sdev: SCSI device to be queried
+- * @dbd: set if mode sense will allow block descriptors to be returned
++ * @dbd: set to prevent mode sense from returning block descriptors
+ * @modepage: mode page being requested
+ * @buffer: request buffer (may not be smaller than eight bytes)
+ * @len: length of request buffer.
+@@ -2110,18 +2111,18 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
+ sshdr = &my_sshdr;
+
+ retry:
+- use_10_for_ms = sdev->use_10_for_ms;
++ use_10_for_ms = sdev->use_10_for_ms || len > 255;
+
+ if (use_10_for_ms) {
+- if (len < 8)
+- len = 8;
++ if (len < 8 || len > 65535)
++ return -EINVAL;
+
+ cmd[0] = MODE_SENSE_10;
+- cmd[8] = len;
++ put_unaligned_be16(len, &cmd[7]);
+ header_length = 8;
+ } else {
+ if (len < 4)
+- len = 4;
++ return -EINVAL;
+
+ cmd[0] = MODE_SENSE;
+ cmd[4] = len;
+@@ -2145,9 +2146,15 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
+ if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
+ (sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
+ /*
+- * Invalid command operation code
++ * Invalid command operation code: retry using
++ * MODE SENSE(6) if this was a MODE SENSE(10)
++ * request, except if the request mode page is
++ * too large for MODE SENSE single byte
++ * allocation length field.
+ */
+ if (use_10_for_ms) {
++ if (len > 255)
++ return -EIO;
+ sdev->use_10_for_ms = 0;
+ goto retry;
+ }
+@@ -2171,12 +2178,11 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
+ data->longlba = 0;
+ data->block_descriptor_length = 0;
+ } else if (use_10_for_ms) {
+- data->length = buffer[0]*256 + buffer[1] + 2;
++ data->length = get_unaligned_be16(&buffer[0]) + 2;
+ data->medium_type = buffer[2];
+ data->device_specific = buffer[3];
+ data->longlba = buffer[4] & 0x01;
+- data->block_descriptor_length = buffer[6]*256
+- + buffer[7];
++ data->block_descriptor_length = get_unaligned_be16(&buffer[6]);
+ } else {
+ data->length = buffer[0] + 1;
+ data->medium_type = buffer[1];
+diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
+index 3717eea37ecb3..e91a0a5bc7a3e 100644
+--- a/drivers/scsi/scsi_pm.c
++++ b/drivers/scsi/scsi_pm.c
+@@ -262,7 +262,7 @@ static int sdev_runtime_resume(struct device *dev)
+ blk_pre_runtime_resume(sdev->request_queue);
+ if (pm && pm->runtime_resume)
+ err = pm->runtime_resume(dev);
+- blk_post_runtime_resume(sdev->request_queue, err);
++ blk_post_runtime_resume(sdev->request_queue);
+
+ return err;
+ }
+diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
+index d6982d3557396..94603e64cc6bf 100644
+--- a/drivers/scsi/scsi_proc.c
++++ b/drivers/scsi/scsi_proc.c
+@@ -311,7 +311,7 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
+ size_t length, loff_t *ppos)
+ {
+ int host, channel, id, lun;
+- char *buffer, *p;
++ char *buffer, *end, *p;
+ int err;
+
+ if (!buf || length > PAGE_SIZE)
+@@ -326,10 +326,14 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
+ goto out;
+
+ err = -EINVAL;
+- if (length < PAGE_SIZE)
+- buffer[length] = '\0';
+- else if (buffer[PAGE_SIZE-1])
+- goto out;
++ if (length < PAGE_SIZE) {
++ end = buffer + length;
++ *end = '\0';
++ } else {
++ end = buffer + PAGE_SIZE - 1;
++ if (*end)
++ goto out;
++ }
+
+ /*
+ * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
+@@ -338,10 +342,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
+ if (!strncmp("scsi add-single-device", buffer, 22)) {
+ p = buffer + 23;
+
+- host = simple_strtoul(p, &p, 0);
+- channel = simple_strtoul(p + 1, &p, 0);
+- id = simple_strtoul(p + 1, &p, 0);
+- lun = simple_strtoul(p + 1, &p, 0);
++ host = (p < end) ? simple_strtoul(p, &p, 0) : 0;
++ channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
++ id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
++ lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+
+ err = scsi_add_single_device(host, channel, id, lun);
+
+@@ -352,10 +356,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
+ } else if (!strncmp("scsi remove-single-device", buffer, 25)) {
+ p = buffer + 26;
+
+- host = simple_strtoul(p, &p, 0);
+- channel = simple_strtoul(p + 1, &p, 0);
+- id = simple_strtoul(p + 1, &p, 0);
+- lun = simple_strtoul(p + 1, &p, 0);
++ host = (p < end) ? simple_strtoul(p, &p, 0) : 0;
++ channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
++ id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
++ lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+
+ err = scsi_remove_single_device(host, channel, id, lun);
+ }
+diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+index fe22191522a3b..86c10edbb5f1e 100644
+--- a/drivers/scsi/scsi_scan.c
++++ b/drivers/scsi/scsi_scan.c
+@@ -198,6 +198,53 @@ static void scsi_unlock_floptical(struct scsi_device *sdev,
+ SCSI_TIMEOUT, 3, NULL);
+ }
+
++static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
++ unsigned int depth)
++{
++ int new_shift = sbitmap_calculate_shift(depth);
++ bool need_alloc = !sdev->budget_map.map;
++ bool need_free = false;
++ int ret;
++ struct sbitmap sb_backup;
++
++ depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev));
++
++ /*
++ * realloc if new shift is calculated, which is caused by setting
++ * up one new default queue depth after calling ->slave_configure
++ */
++ if (!need_alloc && new_shift != sdev->budget_map.shift)
++ need_alloc = need_free = true;
++
++ if (!need_alloc)
++ return 0;
++
++ /*
++ * Request queue has to be frozen for reallocating budget map,
++ * and here disk isn't added yet, so freezing is pretty fast
++ */
++ if (need_free) {
++ blk_mq_freeze_queue(sdev->request_queue);
++ sb_backup = sdev->budget_map;
++ }
++ ret = sbitmap_init_node(&sdev->budget_map,
++ scsi_device_max_queue_depth(sdev),
++ new_shift, GFP_KERNEL,
++ sdev->request_queue->node, false, true);
++ if (!ret)
++ sbitmap_resize(&sdev->budget_map, depth);
++
++ if (need_free) {
++ if (ret)
++ sdev->budget_map = sb_backup;
++ else
++ sbitmap_free(&sb_backup);
++ ret = 0;
++ blk_mq_unfreeze_queue(sdev->request_queue);
++ }
++ return ret;
++}
++
+ /**
+ * scsi_alloc_sdev - allocate and setup a scsi_Device
+ * @starget: which target to allocate a &scsi_device for
+@@ -291,11 +338,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
+ * default device queue depth to figure out sbitmap shift
+ * since we use this queue depth most of times.
+ */
+- if (sbitmap_init_node(&sdev->budget_map,
+- scsi_device_max_queue_depth(sdev),
+- sbitmap_calculate_shift(depth),
+- GFP_KERNEL, sdev->request_queue->node,
+- false, true)) {
++ if (scsi_realloc_sdev_budget_map(sdev, depth)) {
+ put_device(&starget->dev);
+ kfree(sdev);
+ goto out;
+@@ -1001,6 +1044,13 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
+ }
+ return SCSI_SCAN_NO_RESPONSE;
+ }
++
++ /*
++ * The queue_depth is often changed in ->slave_configure.
++ * Set up budget map again since memory consumption of
++ * the map depends on actual queue depth.
++ */
++ scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth);
+ }
+
+ if (sdev->scsi_level >= SCSI_3)
+@@ -1156,8 +1206,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
+ * that no LUN is present, so don't add sdev in these cases.
+ * Two specific examples are:
+ * 1) NetApp targets: return PQ=1, PDT=0x1f
+- * 2) IBM/2145 targets: return PQ=1, PDT=0
+- * 3) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
++ * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
+ * in the UFI 1.0 spec (we cannot rely on reserved bits).
+ *
+ * References:
+@@ -1171,8 +1220,8 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
+ * PDT=00h Direct-access device (floppy)
+ * PDT=1Fh none (no FDD connected to the requested logical unit)
+ */
+- if (((result[0] >> 5) == 1 ||
+- (starget->pdt_1f_for_no_lun && (result[0] & 0x1f) == 0x1f)) &&
++ if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) &&
++ (result[0] & 0x1f) == 0x1f &&
+ !scsi_is_wlun(lun)) {
+ SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev,
+ "scsi scan: peripheral device type"
+diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
+index a35841b34bfd9..774864b54b97c 100644
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -797,6 +797,7 @@ store_state_field(struct device *dev, struct device_attribute *attr,
+ int i, ret;
+ struct scsi_device *sdev = to_scsi_device(dev);
+ enum scsi_device_state state = 0;
++ bool rescan_dev = false;
+
+ for (i = 0; i < ARRAY_SIZE(sdev_states); i++) {
+ const int len = strlen(sdev_states[i].name);
+@@ -815,20 +816,35 @@ store_state_field(struct device *dev, struct device_attribute *attr,
+ }
+
+ mutex_lock(&sdev->state_mutex);
+- ret = scsi_device_set_state(sdev, state);
+- /*
+- * If the device state changes to SDEV_RUNNING, we need to
+- * run the queue to avoid I/O hang, and rescan the device
+- * to revalidate it. Running the queue first is necessary
+- * because another thread may be waiting inside
+- * blk_mq_freeze_queue_wait() and because that call may be
+- * waiting for pending I/O to finish.
+- */
+- if (ret == 0 && state == SDEV_RUNNING) {
++ switch (sdev->sdev_state) {
++ case SDEV_RUNNING:
++ case SDEV_OFFLINE:
++ break;
++ default:
++ mutex_unlock(&sdev->state_mutex);
++ return -EINVAL;
++ }
++ if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) {
++ ret = 0;
++ } else {
++ ret = scsi_device_set_state(sdev, state);
++ if (ret == 0 && state == SDEV_RUNNING)
++ rescan_dev = true;
++ }
++ mutex_unlock(&sdev->state_mutex);
++
++ if (rescan_dev) {
++ /*
++ * If the device state changes to SDEV_RUNNING, we need to
++ * run the queue to avoid I/O hang, and rescan the device
++ * to revalidate it. Running the queue first is necessary
++ * because another thread may be waiting inside
++ * blk_mq_freeze_queue_wait() and because that call may be
++ * waiting for pending I/O to finish.
++ */
+ blk_mq_run_hw_queues(sdev->request_queue, true);
+ scsi_rescan_device(dev);
+ }
+- mutex_unlock(&sdev->state_mutex);
+
+ return ret == 0 ? count : -EINVAL;
+ }
+@@ -1388,6 +1404,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
+ * We're treating error on bsg register as non-fatal, so
+ * pretend nothing went wrong.
+ */
++ error = PTR_ERR(sdev->bsg_dev);
+ sdev_printk(KERN_INFO, sdev,
+ "Failed to register bsg queue, errno=%d\n",
+ error);
+diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
+index 60e406bcf42a9..a2524106206db 100644
+--- a/drivers/scsi/scsi_transport_fc.c
++++ b/drivers/scsi/scsi_transport_fc.c
+@@ -34,7 +34,7 @@ static int fc_bsg_hostadd(struct Scsi_Host *, struct fc_host_attrs *);
+ static int fc_bsg_rportadd(struct Scsi_Host *, struct fc_rport *);
+ static void fc_bsg_remove(struct request_queue *);
+ static void fc_bsg_goose_queue(struct fc_rport *);
+-static void fc_li_stats_update(struct fc_fn_li_desc *li_desc,
++static void fc_li_stats_update(u16 event_type,
+ struct fc_fpin_stats *stats);
+ static void fc_delivery_stats_update(u32 reason_code,
+ struct fc_fpin_stats *stats);
+@@ -670,42 +670,34 @@ fc_find_rport_by_wwpn(struct Scsi_Host *shost, u64 wwpn)
+ EXPORT_SYMBOL(fc_find_rport_by_wwpn);
+
+ static void
+-fc_li_stats_update(struct fc_fn_li_desc *li_desc,
++fc_li_stats_update(u16 event_type,
+ struct fc_fpin_stats *stats)
+ {
+- stats->li += be32_to_cpu(li_desc->event_count);
+- switch (be16_to_cpu(li_desc->event_type)) {
++ stats->li++;
++ switch (event_type) {
+ case FPIN_LI_UNKNOWN:
+- stats->li_failure_unknown +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_failure_unknown++;
+ break;
+ case FPIN_LI_LINK_FAILURE:
+- stats->li_link_failure_count +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_link_failure_count++;
+ break;
+ case FPIN_LI_LOSS_OF_SYNC:
+- stats->li_loss_of_sync_count +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_loss_of_sync_count++;
+ break;
+ case FPIN_LI_LOSS_OF_SIG:
+- stats->li_loss_of_signals_count +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_loss_of_signals_count++;
+ break;
+ case FPIN_LI_PRIM_SEQ_ERR:
+- stats->li_prim_seq_err_count +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_prim_seq_err_count++;
+ break;
+ case FPIN_LI_INVALID_TX_WD:
+- stats->li_invalid_tx_word_count +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_invalid_tx_word_count++;
+ break;
+ case FPIN_LI_INVALID_CRC:
+- stats->li_invalid_crc_count +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_invalid_crc_count++;
+ break;
+ case FPIN_LI_DEVICE_SPEC:
+- stats->li_device_specific +=
+- be32_to_cpu(li_desc->event_count);
++ stats->li_device_specific++;
+ break;
+ }
+ }
+@@ -767,6 +759,7 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv)
+ struct fc_rport *attach_rport = NULL;
+ struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
+ struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv;
++ u16 event_type = be16_to_cpu(li_desc->event_type);
+ u64 wwpn;
+
+ rport = fc_find_rport_by_wwpn(shost,
+@@ -775,7 +768,7 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv)
+ (rport->roles & FC_PORT_ROLE_FCP_TARGET ||
+ rport->roles & FC_PORT_ROLE_NVME_TARGET)) {
+ attach_rport = rport;
+- fc_li_stats_update(li_desc, &attach_rport->fpin_stats);
++ fc_li_stats_update(event_type, &attach_rport->fpin_stats);
+ }
+
+ if (be32_to_cpu(li_desc->pname_count) > 0) {
+@@ -789,14 +782,14 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv)
+ rport->roles & FC_PORT_ROLE_NVME_TARGET)) {
+ if (rport == attach_rport)
+ continue;
+- fc_li_stats_update(li_desc,
++ fc_li_stats_update(event_type,
+ &rport->fpin_stats);
+ }
+ }
+ }
+
+ if (fc_host->port_name == be64_to_cpu(li_desc->attached_wwpn))
+- fc_li_stats_update(li_desc, &fc_host->fpin_stats);
++ fc_li_stats_update(event_type, &fc_host->fpin_stats);
+ }
+
+ /*
+diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
+index 78343d3f93857..e044b65ee0d08 100644
+--- a/drivers/scsi/scsi_transport_iscsi.c
++++ b/drivers/scsi/scsi_transport_iscsi.c
+@@ -86,6 +86,9 @@ struct iscsi_internal {
+ struct transport_container session_cont;
+ };
+
++static DEFINE_IDR(iscsi_ep_idr);
++static DEFINE_MUTEX(iscsi_ep_idr_mutex);
++
+ static atomic_t iscsi_session_nr; /* sysfs session id for next new session */
+ static struct workqueue_struct *iscsi_eh_timer_workq;
+
+@@ -169,6 +172,11 @@ struct device_attribute dev_attr_##_prefix##_##_name = \
+ static void iscsi_endpoint_release(struct device *dev)
+ {
+ struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
++
++ mutex_lock(&iscsi_ep_idr_mutex);
++ idr_remove(&iscsi_ep_idr, ep->id);
++ mutex_unlock(&iscsi_ep_idr_mutex);
++
+ kfree(ep);
+ }
+
+@@ -181,7 +189,7 @@ static ssize_t
+ show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+ struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
+- return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id);
++ return sysfs_emit(buf, "%d\n", ep->id);
+ }
+ static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL);
+
+@@ -194,48 +202,37 @@ static struct attribute_group iscsi_endpoint_group = {
+ .attrs = iscsi_endpoint_attrs,
+ };
+
+-#define ISCSI_MAX_EPID -1
+-
+-static int iscsi_match_epid(struct device *dev, const void *data)
+-{
+- struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
+- const uint64_t *epid = data;
+-
+- return *epid == ep->id;
+-}
+-
+ struct iscsi_endpoint *
+ iscsi_create_endpoint(int dd_size)
+ {
+- struct device *dev;
+ struct iscsi_endpoint *ep;
+- uint64_t id;
+- int err;
+-
+- for (id = 1; id < ISCSI_MAX_EPID; id++) {
+- dev = class_find_device(&iscsi_endpoint_class, NULL, &id,
+- iscsi_match_epid);
+- if (!dev)
+- break;
+- else
+- put_device(dev);
+- }
+- if (id == ISCSI_MAX_EPID) {
+- printk(KERN_ERR "Too many connections. Max supported %u\n",
+- ISCSI_MAX_EPID - 1);
+- return NULL;
+- }
++ int err, id;
+
+ ep = kzalloc(sizeof(*ep) + dd_size, GFP_KERNEL);
+ if (!ep)
+ return NULL;
+
++ mutex_lock(&iscsi_ep_idr_mutex);
++
++ /*
++ * First endpoint id should be 1 to comply with user space
++ * applications (iscsid).
++ */
++ id = idr_alloc(&iscsi_ep_idr, ep, 1, -1, GFP_NOIO);
++ if (id < 0) {
++ mutex_unlock(&iscsi_ep_idr_mutex);
++ printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n",
++ id);
++ goto free_ep;
++ }
++ mutex_unlock(&iscsi_ep_idr_mutex);
++
+ ep->id = id;
+ ep->dev.class = &iscsi_endpoint_class;
+- dev_set_name(&ep->dev, "ep-%llu", (unsigned long long) id);
++ dev_set_name(&ep->dev, "ep-%d", id);
+ err = device_register(&ep->dev);
+ if (err)
+- goto free_ep;
++ goto put_dev;
+
+ err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group);
+ if (err)
+@@ -249,6 +246,12 @@ unregister_dev:
+ device_unregister(&ep->dev);
+ return NULL;
+
++put_dev:
++ mutex_lock(&iscsi_ep_idr_mutex);
++ idr_remove(&iscsi_ep_idr, id);
++ mutex_unlock(&iscsi_ep_idr_mutex);
++ put_device(&ep->dev);
++ return NULL;
+ free_ep:
+ kfree(ep);
+ return NULL;
+@@ -276,14 +279,17 @@ EXPORT_SYMBOL_GPL(iscsi_put_endpoint);
+ */
+ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle)
+ {
+- struct device *dev;
++ struct iscsi_endpoint *ep;
+
+- dev = class_find_device(&iscsi_endpoint_class, NULL, &handle,
+- iscsi_match_epid);
+- if (!dev)
+- return NULL;
++ mutex_lock(&iscsi_ep_idr_mutex);
++ ep = idr_find(&iscsi_ep_idr, handle);
++ if (!ep)
++ goto unlock;
+
+- return iscsi_dev_to_endpoint(dev);
++ get_device(&ep->dev);
++unlock:
++ mutex_unlock(&iscsi_ep_idr_mutex);
++ return ep;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint);
+
+@@ -763,7 +769,7 @@ iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport,
+
+ err = device_register(&iface->dev);
+ if (err)
+- goto free_iface;
++ goto put_dev;
+
+ err = sysfs_create_group(&iface->dev.kobj, &iscsi_iface_group);
+ if (err)
+@@ -777,9 +783,8 @@ unreg_iface:
+ device_unregister(&iface->dev);
+ return NULL;
+
+-free_iface:
+- put_device(iface->dev.parent);
+- kfree(iface);
++put_dev:
++ put_device(&iface->dev);
+ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_create_iface);
+@@ -1248,15 +1253,15 @@ iscsi_create_flashnode_sess(struct Scsi_Host *shost, int index,
+
+ err = device_register(&fnode_sess->dev);
+ if (err)
+- goto free_fnode_sess;
++ goto put_dev;
+
+ if (dd_size)
+ fnode_sess->dd_data = &fnode_sess[1];
+
+ return fnode_sess;
+
+-free_fnode_sess:
+- kfree(fnode_sess);
++put_dev:
++ put_device(&fnode_sess->dev);
+ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_create_flashnode_sess);
+@@ -1296,15 +1301,15 @@ iscsi_create_flashnode_conn(struct Scsi_Host *shost,
+
+ err = device_register(&fnode_conn->dev);
+ if (err)
+- goto free_fnode_conn;
++ goto put_dev;
+
+ if (dd_size)
+ fnode_conn->dd_data = &fnode_conn[1];
+
+ return fnode_conn;
+
+-free_fnode_conn:
+- kfree(fnode_conn);
++put_dev:
++ put_device(&fnode_conn->dev);
+ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn);
+@@ -1674,6 +1679,13 @@ static const char *iscsi_session_state_name(int state)
+ return name;
+ }
+
++static char *iscsi_session_target_state_name[] = {
++ [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND",
++ [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED",
++ [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED",
++ [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING",
++};
++
+ int iscsi_session_chkready(struct iscsi_cls_session *session)
+ {
+ int err;
+@@ -1802,9 +1814,13 @@ static int iscsi_user_scan_session(struct device *dev, void *data)
+ if ((scan_data->channel == SCAN_WILD_CARD ||
+ scan_data->channel == 0) &&
+ (scan_data->id == SCAN_WILD_CARD ||
+- scan_data->id == id))
++ scan_data->id == id)) {
+ scsi_scan_target(&session->dev, 0, id,
+ scan_data->lun, scan_data->rescan);
++ spin_lock_irqsave(&session->lock, flags);
++ session->target_state = ISCSI_SESSION_TARGET_SCANNED;
++ spin_unlock_irqrestore(&session->lock, flags);
++ }
+ }
+
+ user_scan_exit:
+@@ -1899,12 +1915,12 @@ static void session_recovery_timedout(struct work_struct *work)
+ }
+ spin_unlock_irqrestore(&session->lock, flags);
+
+- if (session->transport->session_recovery_timedout)
+- session->transport->session_recovery_timedout(session);
+-
+ ISCSI_DBG_TRANS_SESSION(session, "Unblocking SCSI target\n");
+ scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE);
+ ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking SCSI target\n");
++
++ if (session->transport->session_recovery_timedout)
++ session->transport->session_recovery_timedout(session);
+ }
+
+ static void __iscsi_unblock_session(struct work_struct *work)
+@@ -1993,31 +2009,41 @@ static void __iscsi_unbind_session(struct work_struct *work)
+ struct iscsi_cls_host *ihost = shost->shost_data;
+ unsigned long flags;
+ unsigned int target_id;
++ bool remove_target = true;
+
+ ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n");
+
+ /* Prevent new scans and make sure scanning is not in progress */
+ mutex_lock(&ihost->mutex);
+ spin_lock_irqsave(&session->lock, flags);
+- if (session->target_id == ISCSI_MAX_TARGET) {
++ if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) {
++ remove_target = false;
++ } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) {
+ spin_unlock_irqrestore(&session->lock, flags);
+ mutex_unlock(&ihost->mutex);
+- goto unbind_session_exit;
++ ISCSI_DBG_TRANS_SESSION(session,
++ "Skipping target unbinding: Session is unbound/unbinding.\n");
++ return;
+ }
+
++ session->target_state = ISCSI_SESSION_TARGET_UNBINDING;
+ target_id = session->target_id;
+ session->target_id = ISCSI_MAX_TARGET;
+ spin_unlock_irqrestore(&session->lock, flags);
+ mutex_unlock(&ihost->mutex);
+
+- scsi_remove_target(&session->dev);
++ if (remove_target)
++ scsi_remove_target(&session->dev);
+
+ if (session->ida_used)
+ ida_simple_remove(&iscsi_sess_ida, target_id);
+
+-unbind_session_exit:
+ iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION);
+ ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n");
++
++ spin_lock_irqsave(&session->lock, flags);
++ session->target_state = ISCSI_SESSION_TARGET_UNBOUND;
++ spin_unlock_irqrestore(&session->lock, flags);
+ }
+
+ static void __iscsi_destroy_session(struct work_struct *work)
+@@ -2086,6 +2112,9 @@ int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id)
+ session->ida_used = true;
+ } else
+ session->target_id = target_id;
++ spin_lock_irqsave(&session->lock, flags);
++ session->target_state = ISCSI_SESSION_TARGET_ALLOCATED;
++ spin_unlock_irqrestore(&session->lock, flags);
+
+ dev_set_name(&session->dev, "session%u", session->sid);
+ err = device_add(&session->dev);
+@@ -2221,10 +2250,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag)
+
+ switch (flag) {
+ case STOP_CONN_RECOVER:
+- conn->state = ISCSI_CONN_FAILED;
++ WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+ break;
+ case STOP_CONN_TERM:
+- conn->state = ISCSI_CONN_DOWN;
++ WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+ break;
+ default:
+ iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n",
+@@ -2236,16 +2265,51 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag)
+ ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n");
+ }
+
+-static int iscsi_if_stop_conn(struct iscsi_transport *transport,
+- struct iscsi_uevent *ev)
++static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active)
+ {
+- int flag = ev->u.stop_conn.flag;
+- struct iscsi_cls_conn *conn;
++ struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
++ struct iscsi_endpoint *ep;
+
+- conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid);
+- if (!conn)
+- return -EINVAL;
++ ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
++ WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
++
++ if (!conn->ep || !session->transport->ep_disconnect)
++ return;
++
++ ep = conn->ep;
++ conn->ep = NULL;
++
++ session->transport->unbind_conn(conn, is_active);
++ session->transport->ep_disconnect(ep);
++ ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n");
++}
++
++static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn,
++ struct iscsi_endpoint *ep,
++ bool is_active)
++{
++ /* Check if this was a conn error and the kernel took ownership */
++ spin_lock_irq(&conn->lock);
++ if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
++ spin_unlock_irq(&conn->lock);
++ iscsi_ep_disconnect(conn, is_active);
++ } else {
++ spin_unlock_irq(&conn->lock);
++ ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n");
++ mutex_unlock(&conn->ep_mutex);
++
++ flush_work(&conn->cleanup_work);
++ /*
++ * Userspace is now done with the EP so we can release the ref
++ * iscsi_cleanup_conn_work_fn took.
++ */
++ iscsi_put_endpoint(ep);
++ mutex_lock(&conn->ep_mutex);
++ }
++}
+
++static int iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag)
++{
+ ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n");
+ /*
+ * If this is a termination we have to call stop_conn with that flag
+@@ -2256,12 +2320,25 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport,
+ cancel_work_sync(&conn->cleanup_work);
+ iscsi_stop_conn(conn, flag);
+ } else {
++ /*
++ * For offload, when iscsid is restarted it won't know about
++ * existing endpoints so it can't do a ep_disconnect. We clean
++ * it up here for userspace.
++ */
++ mutex_lock(&conn->ep_mutex);
++ if (conn->ep)
++ iscsi_if_disconnect_bound_ep(conn, conn->ep, true);
++ mutex_unlock(&conn->ep_mutex);
++
+ /*
+ * Figure out if it was the kernel or userspace initiating this.
+ */
++ spin_lock_irq(&conn->lock);
+ if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
++ spin_unlock_irq(&conn->lock);
+ iscsi_stop_conn(conn, flag);
+ } else {
++ spin_unlock_irq(&conn->lock);
+ ISCSI_DBG_TRANS_CONN(conn,
+ "flush kernel conn cleanup.\n");
+ flush_work(&conn->cleanup_work);
+@@ -2270,31 +2347,14 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport,
+ * Only clear for recovery to avoid extra cleanup runs during
+ * termination.
+ */
++ spin_lock_irq(&conn->lock);
+ clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
++ spin_unlock_irq(&conn->lock);
+ }
+ ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n");
+ return 0;
+ }
+
+-static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active)
+-{
+- struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
+- struct iscsi_endpoint *ep;
+-
+- ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
+- conn->state = ISCSI_CONN_FAILED;
+-
+- if (!conn->ep || !session->transport->ep_disconnect)
+- return;
+-
+- ep = conn->ep;
+- conn->ep = NULL;
+-
+- session->transport->unbind_conn(conn, is_active);
+- session->transport->ep_disconnect(ep);
+- ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n");
+-}
+-
+ static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
+ {
+ struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn,
+@@ -2303,18 +2363,11 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
+
+ mutex_lock(&conn->ep_mutex);
+ /*
+- * If we are not at least bound there is nothing for us to do. Userspace
+- * will do a ep_disconnect call if offload is used, but will not be
+- * doing a stop since there is nothing to clean up, so we have to clear
+- * the cleanup bit here.
++ * Get a ref to the ep, so we don't release its ID until after
++ * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
+ */
+- if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) {
+- ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n");
+- clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
+- mutex_unlock(&conn->ep_mutex);
+- return;
+- }
+-
++ if (conn->ep)
++ get_device(&conn->ep->dev);
+ iscsi_ep_disconnect(conn, false);
+
+ if (system_state != SYSTEM_RUNNING) {
+@@ -2332,6 +2385,55 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
+ ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n");
+ }
+
++static int iscsi_iter_force_destroy_conn_fn(struct device *dev, void *data)
++{
++ struct iscsi_transport *transport;
++ struct iscsi_cls_conn *conn;
++
++ if (!iscsi_is_conn_dev(dev))
++ return 0;
++
++ conn = iscsi_dev_to_conn(dev);
++ transport = conn->transport;
++
++ if (READ_ONCE(conn->state) != ISCSI_CONN_DOWN)
++ iscsi_if_stop_conn(conn, STOP_CONN_TERM);
++
++ transport->destroy_conn(conn);
++ return 0;
++}
++
++/**
++ * iscsi_force_destroy_session - destroy a session from the kernel
++ * @session: session to destroy
++ *
++ * Force the destruction of a session from the kernel. This should only be
++ * used when userspace is no longer running during system shutdown.
++ */
++void iscsi_force_destroy_session(struct iscsi_cls_session *session)
++{
++ struct iscsi_transport *transport = session->transport;
++ unsigned long flags;
++
++ WARN_ON_ONCE(system_state == SYSTEM_RUNNING);
++
++ spin_lock_irqsave(&sesslock, flags);
++ if (list_empty(&session->sess_list)) {
++ spin_unlock_irqrestore(&sesslock, flags);
++ /*
++ * Conn/ep is already freed. Session is being torn down via
++ * async path. For shutdown we don't care about it so return.
++ */
++ return;
++ }
++ spin_unlock_irqrestore(&sesslock, flags);
++
++ device_for_each_child(&session->dev, NULL,
++ iscsi_iter_force_destroy_conn_fn);
++ transport->destroy_session(session);
++}
++EXPORT_SYMBOL_GPL(iscsi_force_destroy_session);
++
+ void iscsi_free_session(struct iscsi_cls_session *session)
+ {
+ ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n");
+@@ -2370,11 +2472,12 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
+ conn->dd_data = &conn[1];
+
+ mutex_init(&conn->ep_mutex);
++ spin_lock_init(&conn->lock);
+ INIT_LIST_HEAD(&conn->conn_list);
+ INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn);
+ conn->transport = transport;
+ conn->cid = cid;
+- conn->state = ISCSI_CONN_DOWN;
++ WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+
+ /* this is released in the dev's release function */
+ if (!get_device(&session->dev))
+@@ -2561,9 +2664,32 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
+ struct iscsi_uevent *ev;
+ struct iscsi_internal *priv;
+ int len = nlmsg_total_size(sizeof(*ev));
++ unsigned long flags;
++ int state;
+
+- if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags))
+- queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
++ spin_lock_irqsave(&conn->lock, flags);
++ /*
++ * Userspace will only do a stop call if we are at least bound. And, we
++ * only need to do the in kernel cleanup if in the UP state so cmds can
++ * be released to upper layers. If in other states just wait for
++ * userspace to avoid races that can leave the cleanup_work queued.
++ */
++ state = READ_ONCE(conn->state);
++ switch (state) {
++ case ISCSI_CONN_BOUND:
++ case ISCSI_CONN_UP:
++ if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP,
++ &conn->flags)) {
++ queue_work(iscsi_conn_cleanup_workq,
++ &conn->cleanup_work);
++ }
++ break;
++ default:
++ ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n",
++ state);
++ break;
++ }
++ spin_unlock_irqrestore(&conn->lock, flags);
+
+ priv = iscsi_if_transport_lookup(conn->transport);
+ if (!priv)
+@@ -2908,14 +3034,15 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev
+ }
+
+ static int
+-iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
++iscsi_if_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen)
+ {
+ char *data = (char*)ev + sizeof(*ev);
+ struct iscsi_cls_conn *conn;
+ struct iscsi_cls_session *session;
+- int err = 0, value = 0;
++ int err = 0, value = 0, state;
+
+- if (ev->u.set_param.len > PAGE_SIZE)
++ if (ev->u.set_param.len > rlen ||
++ ev->u.set_param.len > PAGE_SIZE)
+ return -EINVAL;
+
+ session = iscsi_session_lookup(ev->u.set_param.sid);
+@@ -2923,6 +3050,10 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+ if (!conn || !session)
+ return -EINVAL;
+
++ /* data will be regarded as NULL-ended string, do length check */
++ if (strlen(data) > ev->u.set_param.len)
++ return -EINVAL;
++
+ switch (ev->u.set_param.param) {
+ case ISCSI_PARAM_SESS_RECOVERY_TMO:
+ sscanf(data, "%d", &value);
+@@ -2930,8 +3061,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+ session->recovery_tmo = value;
+ break;
+ default:
+- if ((conn->state == ISCSI_CONN_BOUND) ||
+- (conn->state == ISCSI_CONN_UP)) {
++ state = READ_ONCE(conn->state);
++ if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) {
+ err = transport->set_param(conn, ev->u.set_param.param,
+ data, ev->u.set_param.len);
+ } else {
+@@ -3003,16 +3134,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport,
+ }
+
+ mutex_lock(&conn->ep_mutex);
+- /* Check if this was a conn error and the kernel took ownership */
+- if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
+- ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n");
+- mutex_unlock(&conn->ep_mutex);
+-
+- flush_work(&conn->cleanup_work);
+- goto put_ep;
+- }
+-
+- iscsi_ep_disconnect(conn, false);
++ iscsi_if_disconnect_bound_ep(conn, ep, false);
+ mutex_unlock(&conn->ep_mutex);
+ put_ep:
+ iscsi_put_endpoint(ep);
+@@ -3021,7 +3143,7 @@ put_ep:
+
+ static int
+ iscsi_if_transport_ep(struct iscsi_transport *transport,
+- struct iscsi_uevent *ev, int msg_type)
++ struct iscsi_uevent *ev, int msg_type, u32 rlen)
+ {
+ struct iscsi_endpoint *ep;
+ int rc = 0;
+@@ -3029,7 +3151,10 @@ iscsi_if_transport_ep(struct iscsi_transport *transport,
+ switch (msg_type) {
+ case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
+ case ISCSI_UEVENT_TRANSPORT_EP_CONNECT:
+- rc = iscsi_if_ep_connect(transport, ev, msg_type);
++ if (rlen < sizeof(struct sockaddr))
++ rc = -EINVAL;
++ else
++ rc = iscsi_if_ep_connect(transport, ev, msg_type);
+ break;
+ case ISCSI_UEVENT_TRANSPORT_EP_POLL:
+ if (!transport->ep_poll)
+@@ -3053,12 +3178,15 @@ iscsi_if_transport_ep(struct iscsi_transport *transport,
+
+ static int
+ iscsi_tgt_dscvr(struct iscsi_transport *transport,
+- struct iscsi_uevent *ev)
++ struct iscsi_uevent *ev, u32 rlen)
+ {
+ struct Scsi_Host *shost;
+ struct sockaddr *dst_addr;
+ int err;
+
++ if (rlen < sizeof(*dst_addr))
++ return -EINVAL;
++
+ if (!transport->tgt_dscvr)
+ return -EINVAL;
+
+@@ -3079,7 +3207,7 @@ iscsi_tgt_dscvr(struct iscsi_transport *transport,
+
+ static int
+ iscsi_set_host_param(struct iscsi_transport *transport,
+- struct iscsi_uevent *ev)
++ struct iscsi_uevent *ev, u32 rlen)
+ {
+ char *data = (char*)ev + sizeof(*ev);
+ struct Scsi_Host *shost;
+@@ -3088,7 +3216,8 @@ iscsi_set_host_param(struct iscsi_transport *transport,
+ if (!transport->set_host_param)
+ return -ENOSYS;
+
+- if (ev->u.set_host_param.len > PAGE_SIZE)
++ if (ev->u.set_host_param.len > rlen ||
++ ev->u.set_host_param.len > PAGE_SIZE)
+ return -EINVAL;
+
+ shost = scsi_host_lookup(ev->u.set_host_param.host_no);
+@@ -3098,6 +3227,10 @@ iscsi_set_host_param(struct iscsi_transport *transport,
+ return -ENODEV;
+ }
+
++ /* see similar check in iscsi_if_set_param() */
++ if (strlen(data) > ev->u.set_host_param.len)
++ return -EINVAL;
++
+ err = transport->set_host_param(shost, ev->u.set_host_param.param,
+ data, ev->u.set_host_param.len);
+ scsi_host_put(shost);
+@@ -3105,12 +3238,15 @@ iscsi_set_host_param(struct iscsi_transport *transport,
+ }
+
+ static int
+-iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev)
++iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen)
+ {
+ struct Scsi_Host *shost;
+ struct iscsi_path *params;
+ int err;
+
++ if (rlen < sizeof(*params))
++ return -EINVAL;
++
+ if (!transport->set_path)
+ return -ENOSYS;
+
+@@ -3170,12 +3306,15 @@ iscsi_set_iface_params(struct iscsi_transport *transport,
+ }
+
+ static int
+-iscsi_send_ping(struct iscsi_transport *transport, struct iscsi_uevent *ev)
++iscsi_send_ping(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen)
+ {
+ struct Scsi_Host *shost;
+ struct sockaddr *dst_addr;
+ int err;
+
++ if (rlen < sizeof(*dst_addr))
++ return -EINVAL;
++
+ if (!transport->send_ping)
+ return -ENOSYS;
+
+@@ -3673,13 +3812,12 @@ exit_host_stats:
+ }
+
+ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
+- struct nlmsghdr *nlh)
++ struct nlmsghdr *nlh, u32 pdu_len)
+ {
+ struct iscsi_uevent *ev = nlmsg_data(nlh);
+ struct iscsi_cls_session *session;
+ struct iscsi_cls_conn *conn = NULL;
+ struct iscsi_endpoint *ep;
+- uint32_t pdu_len;
+ int err = 0;
+
+ switch (nlh->nlmsg_type) {
+@@ -3688,7 +3826,12 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
+ case ISCSI_UEVENT_DESTROY_CONN:
+ return iscsi_if_destroy_conn(transport, ev);
+ case ISCSI_UEVENT_STOP_CONN:
+- return iscsi_if_stop_conn(transport, ev);
++ conn = iscsi_conn_lookup(ev->u.stop_conn.sid,
++ ev->u.stop_conn.cid);
++ if (!conn)
++ return -EINVAL;
++
++ return iscsi_if_stop_conn(conn, ev->u.stop_conn.flag);
+ }
+
+ /*
+@@ -3715,24 +3858,17 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
+ return -EINVAL;
+
+ mutex_lock(&conn->ep_mutex);
++ spin_lock_irq(&conn->lock);
+ if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
++ spin_unlock_irq(&conn->lock);
+ mutex_unlock(&conn->ep_mutex);
+ ev->r.retcode = -ENOTCONN;
+ return 0;
+ }
++ spin_unlock_irq(&conn->lock);
+
+ switch (nlh->nlmsg_type) {
+ case ISCSI_UEVENT_BIND_CONN:
+- if (conn->ep) {
+- /*
+- * For offload boot support where iscsid is restarted
+- * during the pivot root stage, the ep will be intact
+- * here when the new iscsid instance starts up and
+- * reconnects.
+- */
+- iscsi_ep_disconnect(conn, true);
+- }
+-
+ session = iscsi_session_lookup(ev->u.b_conn.sid);
+ if (!session) {
+ err = -EINVAL;
+@@ -3743,7 +3879,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
+ ev->u.b_conn.transport_eph,
+ ev->u.b_conn.is_leading);
+ if (!ev->r.retcode)
+- conn->state = ISCSI_CONN_BOUND;
++ WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
+
+ if (ev->r.retcode || !transport->ep_connect)
+ break;
+@@ -3762,11 +3898,10 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
+ case ISCSI_UEVENT_START_CONN:
+ ev->r.retcode = transport->start_conn(conn);
+ if (!ev->r.retcode)
+- conn->state = ISCSI_CONN_UP;
++ WRITE_ONCE(conn->state, ISCSI_CONN_UP);
++
+ break;
+ case ISCSI_UEVENT_SEND_PDU:
+- pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+-
+ if ((ev->u.send_pdu.hdr_size > pdu_len) ||
+ (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) {
+ err = -EINVAL;
+@@ -3796,6 +3931,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
+ struct iscsi_internal *priv;
+ struct iscsi_cls_session *session;
+ struct iscsi_endpoint *ep = NULL;
++ u32 rlen;
+
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
+ return -EPERM;
+@@ -3815,6 +3951,13 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
+
+ portid = NETLINK_CB(skb).portid;
+
++ /*
++ * Even though the remaining payload may not be regarded as nlattr,
++ * (like address or something else), calculate the remaining length
++ * here to ease following length checks.
++ */
++ rlen = nlmsg_attrlen(nlh, sizeof(*ev));
++
+ switch (nlh->nlmsg_type) {
+ case ISCSI_UEVENT_CREATE_SESSION:
+ err = iscsi_if_create_session(priv, ep, ev,
+@@ -3872,7 +4015,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
+ err = -EINVAL;
+ break;
+ case ISCSI_UEVENT_SET_PARAM:
+- err = iscsi_set_param(transport, ev);
++ err = iscsi_if_set_param(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_CREATE_CONN:
+ case ISCSI_UEVENT_DESTROY_CONN:
+@@ -3880,7 +4023,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
+ case ISCSI_UEVENT_START_CONN:
+ case ISCSI_UEVENT_BIND_CONN:
+ case ISCSI_UEVENT_SEND_PDU:
+- err = iscsi_if_transport_conn(transport, nlh);
++ err = iscsi_if_transport_conn(transport, nlh, rlen);
+ break;
+ case ISCSI_UEVENT_GET_STATS:
+ err = iscsi_if_get_stats(transport, nlh);
+@@ -3889,23 +4032,22 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
+ case ISCSI_UEVENT_TRANSPORT_EP_POLL:
+ case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT:
+ case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
+- err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type);
++ err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type, rlen);
+ break;
+ case ISCSI_UEVENT_TGT_DSCVR:
+- err = iscsi_tgt_dscvr(transport, ev);
++ err = iscsi_tgt_dscvr(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_SET_HOST_PARAM:
+- err = iscsi_set_host_param(transport, ev);
++ err = iscsi_set_host_param(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_PATH_UPDATE:
+- err = iscsi_set_path(transport, ev);
++ err = iscsi_set_path(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_SET_IFACE_PARAMS:
+- err = iscsi_set_iface_params(transport, ev,
+- nlmsg_attrlen(nlh, sizeof(*ev)));
++ err = iscsi_set_iface_params(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_PING:
+- err = iscsi_send_ping(transport, ev);
++ err = iscsi_send_ping(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_GET_CHAP:
+ err = iscsi_get_chap(transport, nlh);
+@@ -3914,13 +4056,10 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
+ err = iscsi_delete_chap(transport, ev);
+ break;
+ case ISCSI_UEVENT_SET_FLASHNODE_PARAMS:
+- err = iscsi_set_flashnode_param(transport, ev,
+- nlmsg_attrlen(nlh,
+- sizeof(*ev)));
++ err = iscsi_set_flashnode_param(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_NEW_FLASHNODE:
+- err = iscsi_new_flashnode(transport, ev,
+- nlmsg_attrlen(nlh, sizeof(*ev)));
++ err = iscsi_new_flashnode(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_DEL_FLASHNODE:
+ err = iscsi_del_flashnode(transport, ev);
+@@ -3935,8 +4074,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
+ err = iscsi_logout_flashnode_sid(transport, ev);
+ break;
+ case ISCSI_UEVENT_SET_CHAP:
+- err = iscsi_set_chap(transport, ev,
+- nlmsg_attrlen(nlh, sizeof(*ev)));
++ err = iscsi_set_chap(transport, ev, rlen);
+ break;
+ case ISCSI_UEVENT_GET_HOST_STATS:
+ err = iscsi_get_host_stats(transport, nlh);
+@@ -4070,10 +4208,11 @@ static ssize_t show_conn_state(struct device *dev,
+ {
+ struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent);
+ const char *state = "unknown";
++ int conn_state = READ_ONCE(conn->state);
+
+- if (conn->state >= 0 &&
+- conn->state < ARRAY_SIZE(connection_state_names))
+- state = connection_state_names[conn->state];
++ if (conn_state >= 0 &&
++ conn_state < ARRAY_SIZE(connection_state_names))
++ state = connection_state_names[conn_state];
+
+ return sysfs_emit(buf, "%s\n", state);
+ }
+@@ -4298,6 +4437,19 @@ iscsi_session_attr(def_taskmgmt_tmo, ISCSI_PARAM_DEF_TASKMGMT_TMO, 0);
+ iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0);
+ iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0);
+
++static ssize_t
++show_priv_session_target_state(struct device *dev, struct device_attribute *attr,
++ char *buf)
++{
++ struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
++
++ return sysfs_emit(buf, "%s\n",
++ iscsi_session_target_state_name[session->target_state]);
++}
++
++static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO,
++ show_priv_session_target_state, NULL);
++
+ static ssize_t
+ show_priv_session_state(struct device *dev, struct device_attribute *attr,
+ char *buf)
+@@ -4400,6 +4552,7 @@ static struct attribute *iscsi_session_attrs[] = {
+ &dev_attr_sess_boot_target.attr,
+ &dev_attr_priv_sess_recovery_tmo.attr,
+ &dev_attr_priv_sess_state.attr,
++ &dev_attr_priv_sess_target_state.attr,
+ &dev_attr_priv_sess_creator.attr,
+ &dev_attr_sess_chap_out_idx.attr,
+ &dev_attr_sess_chap_in_idx.attr,
+@@ -4513,6 +4666,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj,
+ return S_IRUGO | S_IWUSR;
+ else if (attr == &dev_attr_priv_sess_state.attr)
+ return S_IRUGO;
++ else if (attr == &dev_attr_priv_sess_target_state.attr)
++ return S_IRUGO;
+ else if (attr == &dev_attr_priv_sess_creator.attr)
+ return S_IRUGO;
+ else if (attr == &dev_attr_priv_sess_target_id.attr)
+@@ -4746,7 +4901,7 @@ iscsi_register_transport(struct iscsi_transport *tt)
+ dev_set_name(&priv->dev, "%s", tt->name);
+ err = device_register(&priv->dev);
+ if (err)
+- goto free_priv;
++ goto put_dev;
+
+ err = sysfs_create_group(&priv->dev.kobj, &iscsi_transport_group);
+ if (err)
+@@ -4781,8 +4936,8 @@ iscsi_register_transport(struct iscsi_transport *tt)
+ unregister_dev:
+ device_unregister(&priv->dev);
+ return NULL;
+-free_priv:
+- kfree(priv);
++put_dev:
++ put_device(&priv->dev);
+ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_register_transport);
+diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
+index 4a96fb05731d2..c6256fdc24b10 100644
+--- a/drivers/scsi/scsi_transport_sas.c
++++ b/drivers/scsi/scsi_transport_sas.c
+@@ -716,12 +716,17 @@ int sas_phy_add(struct sas_phy *phy)
+ int error;
+
+ error = device_add(&phy->dev);
+- if (!error) {
+- transport_add_device(&phy->dev);
+- transport_configure_device(&phy->dev);
++ if (error)
++ return error;
++
++ error = transport_add_device(&phy->dev);
++ if (error) {
++ device_del(&phy->dev);
++ return error;
+ }
++ transport_configure_device(&phy->dev);
+
+- return error;
++ return 0;
+ }
+ EXPORT_SYMBOL(sas_phy_add);
+
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index fce63335084ed..1e887c11e83d0 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -48,6 +48,7 @@
+ #include <linux/blkpg.h>
+ #include <linux/blk-pm.h>
+ #include <linux/delay.h>
++#include <linux/major.h>
+ #include <linux/mutex.h>
+ #include <linux/string_helpers.h>
+ #include <linux/async.h>
+@@ -1071,6 +1072,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
+ struct bio *bio = rq->bio;
+ u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq));
+ u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
++ unsigned int nr_bytes = blk_rq_bytes(rq);
+ blk_status_t ret;
+
+ if (sdkp->device->no_write_same)
+@@ -1107,7 +1109,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
+ */
+ rq->__data_len = sdp->sector_size;
+ ret = scsi_alloc_sgtables(cmd);
+- rq->__data_len = blk_rq_bytes(rq);
++ rq->__data_len = nr_bytes;
+
+ return ret;
+ }
+@@ -2607,6 +2609,13 @@ sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage,
+ unsigned char *buffer, int len, struct scsi_mode_data *data,
+ struct scsi_sense_hdr *sshdr)
+ {
++ /*
++ * If we must use MODE SENSE(10), make sure that the buffer length
++ * is at least 8 bytes so that the mode sense header fits.
++ */
++ if (sdkp->device->use_10_for_ms && len < 8)
++ len = 8;
++
+ return scsi_mode_sense(sdkp->device, dbd, modepage, buffer, len,
+ SD_TIMEOUT, sdkp->max_retries, data,
+ sshdr);
+@@ -3472,7 +3481,6 @@ static int sd_probe(struct device *dev)
+ out_put:
+ put_disk(gd);
+ out_free:
+- sd_zbc_release_disk(sdkp);
+ kfree(sdkp);
+ out:
+ scsi_autopm_put_device(sdp);
+@@ -3620,7 +3628,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
+ return 0;
+
+ if (sdkp->WCE && sdkp->media_present) {
+- sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
++ if (!sdkp->device->silence_suspend)
++ sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
+ ret = sd_sync_cache(sdkp, &sshdr);
+
+ if (ret) {
+@@ -3642,7 +3651,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
+ }
+
+ if (sdkp->device->manage_start_stop) {
+- sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
++ if (!sdkp->device->silence_suspend)
++ sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
+ /* an error is not worth aborting a system sleep */
+ ret = sd_start_stop_device(sdkp, 0);
+ if (ignore_stop_errors)
+diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
+index 0a1734f34587d..6a1428d453f3e 100644
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -433,8 +433,8 @@ int ses_match_host(struct enclosure_device *edev, void *data)
+ }
+ #endif /* 0 */
+
+-static void ses_process_descriptor(struct enclosure_component *ecomp,
+- unsigned char *desc)
++static int ses_process_descriptor(struct enclosure_component *ecomp,
++ unsigned char *desc, int max_desc_len)
+ {
+ int eip = desc[0] & 0x10;
+ int invalid = desc[0] & 0x80;
+@@ -445,22 +445,32 @@ static void ses_process_descriptor(struct enclosure_component *ecomp,
+ unsigned char *d;
+
+ if (invalid)
+- return;
++ return 0;
+
+ switch (proto) {
+ case SCSI_PROTOCOL_FCP:
+ if (eip) {
++ if (max_desc_len <= 7)
++ return 1;
+ d = desc + 4;
+ slot = d[3];
+ }
+ break;
+ case SCSI_PROTOCOL_SAS:
++
+ if (eip) {
++ if (max_desc_len <= 27)
++ return 1;
+ d = desc + 4;
+ slot = d[3];
+ d = desc + 8;
+- } else
++ } else {
++ if (max_desc_len <= 23)
++ return 1;
+ d = desc + 4;
++ }
++
++
+ /* only take the phy0 addr */
+ addr = (u64)d[12] << 56 |
+ (u64)d[13] << 48 |
+@@ -477,6 +487,8 @@ static void ses_process_descriptor(struct enclosure_component *ecomp,
+ }
+ ecomp->slot = slot;
+ scomp->addr = addr;
++
++ return 0;
+ }
+
+ struct efd {
+@@ -491,9 +503,6 @@ static int ses_enclosure_find_by_addr(struct enclosure_device *edev,
+ int i;
+ struct ses_component *scomp;
+
+- if (!edev->component[0].scratch)
+- return 0;
+-
+ for (i = 0; i < edev->components; i++) {
+ scomp = edev->component[i].scratch;
+ if (scomp->addr != efd->addr)
+@@ -549,7 +558,7 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
+ /* skip past overall descriptor */
+ desc_ptr += len + 4;
+ }
+- if (ses_dev->page10)
++ if (ses_dev->page10 && ses_dev->page10_len > 9)
+ addl_desc_ptr = ses_dev->page10 + 8;
+ type_ptr = ses_dev->page1_types;
+ components = 0;
+@@ -557,17 +566,22 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
+ for (j = 0; j < type_ptr[1]; j++) {
+ char *name = NULL;
+ struct enclosure_component *ecomp;
++ int max_desc_len;
+
+ if (desc_ptr) {
+- if (desc_ptr >= buf + page7_len) {
++ if (desc_ptr + 3 >= buf + page7_len) {
+ desc_ptr = NULL;
+ } else {
+ len = (desc_ptr[2] << 8) + desc_ptr[3];
+ desc_ptr += 4;
+- /* Add trailing zero - pushes into
+- * reserved space */
+- desc_ptr[len] = '\0';
+- name = desc_ptr;
++ if (desc_ptr + len > buf + page7_len)
++ desc_ptr = NULL;
++ else {
++ /* Add trailing zero - pushes into
++ * reserved space */
++ desc_ptr[len] = '\0';
++ name = desc_ptr;
++ }
+ }
+ }
+ if (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE ||
+@@ -579,14 +593,20 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
+ components++,
+ type_ptr[0],
+ name);
+- else
++ else if (components < edev->components)
+ ecomp = &edev->component[components++];
++ else
++ ecomp = ERR_PTR(-EINVAL);
+
+ if (!IS_ERR(ecomp)) {
+- if (addl_desc_ptr)
+- ses_process_descriptor(
+- ecomp,
+- addl_desc_ptr);
++ if (addl_desc_ptr) {
++ max_desc_len = ses_dev->page10_len -
++ (addl_desc_ptr - ses_dev->page10);
++ if (ses_process_descriptor(ecomp,
++ addl_desc_ptr,
++ max_desc_len))
++ addl_desc_ptr = NULL;
++ }
+ if (create)
+ enclosure_component_register(
+ ecomp);
+@@ -603,9 +623,11 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
+ /* these elements are optional */
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT ||
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT ||
+- type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS))
++ type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS)) {
+ addl_desc_ptr += addl_desc_ptr[1] + 2;
+-
++ if (addl_desc_ptr + 1 >= ses_dev->page10 + ses_dev->page10_len)
++ addl_desc_ptr = NULL;
++ }
+ }
+ }
+ kfree(buf);
+@@ -704,6 +726,7 @@ static int ses_intf_add(struct device *cdev,
+ type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE)
+ components += type_ptr[1];
+ }
++
+ ses_dev->page1 = buf;
+ ses_dev->page1_len = len;
+ buf = NULL;
+@@ -745,9 +768,11 @@ static int ses_intf_add(struct device *cdev,
+ buf = NULL;
+ }
+ page2_not_supported:
+- scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
+- if (!scomp)
+- goto err_free;
++ if (components > 0) {
++ scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
++ if (!scomp)
++ goto err_free;
++ }
+
+ edev = enclosure_register(cdev->parent, dev_name(&sdev->sdev_gendev),
+ components, &ses_enclosure_callbacks);
+@@ -827,7 +852,8 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
+ kfree(ses_dev->page2);
+ kfree(ses_dev);
+
+- kfree(edev->component[0].scratch);
++ if (edev->components)
++ kfree(edev->component[0].scratch);
+
+ put_device(&edev->edev);
+ enclosure_unregister(edev);
+diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
+index 8f05248920e8e..d771a1988f942 100644
+--- a/drivers/scsi/sg.c
++++ b/drivers/scsi/sg.c
+@@ -31,6 +31,7 @@ static int sg_version_num = 30536; /* 2 digits for each component */
+ #include <linux/errno.h>
+ #include <linux/mtio.h>
+ #include <linux/ioctl.h>
++#include <linux/major.h>
+ #include <linux/slab.h>
+ #include <linux/fcntl.h>
+ #include <linux/init.h>
+@@ -190,7 +191,7 @@ static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
+ static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
+ static Sg_fd *sg_add_sfp(Sg_device * sdp);
+ static void sg_remove_sfp(struct kref *);
+-static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
++static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy);
+ static Sg_request *sg_add_request(Sg_fd * sfp);
+ static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
+ static Sg_device *sg_get_dev(int dev);
+@@ -444,6 +445,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
+ Sg_fd *sfp;
+ Sg_request *srp;
+ int req_pack_id = -1;
++ bool busy;
+ sg_io_hdr_t *hp;
+ struct sg_header *old_hdr;
+ int retval;
+@@ -466,20 +468,16 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
+ if (retval)
+ return retval;
+
+- srp = sg_get_rq_mark(sfp, req_pack_id);
++ srp = sg_get_rq_mark(sfp, req_pack_id, &busy);
+ if (!srp) { /* now wait on packet to arrive */
+- if (atomic_read(&sdp->detaching))
+- return -ENODEV;
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+ retval = wait_event_interruptible(sfp->read_wait,
+- (atomic_read(&sdp->detaching) ||
+- (srp = sg_get_rq_mark(sfp, req_pack_id))));
+- if (atomic_read(&sdp->detaching))
+- return -ENODEV;
+- if (retval)
+- /* -ERESTARTSYS as signal hit process */
+- return retval;
++ ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) ||
++ (!busy && atomic_read(&sdp->detaching))));
++ if (!srp)
++ /* signal or detaching */
++ return retval ? retval : -ENODEV;
+ }
+ if (srp->header.interface_id != '\0')
+ return sg_new_read(sfp, buf, count, srp);
+@@ -940,9 +938,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
+ if (result < 0)
+ return result;
+ result = wait_event_interruptible(sfp->read_wait,
+- (srp_done(sfp, srp) || atomic_read(&sdp->detaching)));
+- if (atomic_read(&sdp->detaching))
+- return -ENODEV;
++ srp_done(sfp, srp));
+ write_lock_irq(&sfp->rq_list_lock);
+ if (srp->done) {
+ srp->done = 2;
+@@ -2055,19 +2051,28 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
+ }
+
+ static Sg_request *
+-sg_get_rq_mark(Sg_fd * sfp, int pack_id)
++sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy)
+ {
+ Sg_request *resp;
+ unsigned long iflags;
+
++ *busy = false;
+ write_lock_irqsave(&sfp->rq_list_lock, iflags);
+ list_for_each_entry(resp, &sfp->rq_list, entry) {
+- /* look for requests that are ready + not SG_IO owned */
+- if ((1 == resp->done) && (!resp->sg_io_owned) &&
++ /* look for requests that are not SG_IO owned */
++ if ((!resp->sg_io_owned) &&
+ ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
+- resp->done = 2; /* guard against other readers */
+- write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+- return resp;
++ switch (resp->done) {
++ case 0: /* request active */
++ *busy = true;
++ break;
++ case 1: /* request done; response ready to return */
++ resp->done = 2; /* guard against other readers */
++ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
++ return resp;
++ case 2: /* response already being returned */
++ break;
++ }
+ }
+ }
+ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+@@ -2121,6 +2126,15 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp)
+ res = 1;
+ }
+ write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
++
++ /*
++ * If the device is detaching, wakeup any readers in case we just
++ * removed the last response, which would leave nothing for them to
++ * return other than -ENODEV.
++ */
++ if (unlikely(atomic_read(&sfp->parentdp->detaching)))
++ wake_up_interruptible_all(&sfp->read_wait);
++
+ return res;
+ }
+
+diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
+index ecb2af3f43ca3..e3d8de1159b51 100644
+--- a/drivers/scsi/smartpqi/smartpqi_init.c
++++ b/drivers/scsi/smartpqi/smartpqi_init.c
+@@ -234,15 +234,46 @@ static inline bool pqi_is_hba_lunid(u8 *scsi3addr)
+ return pqi_scsi3addr_equal(scsi3addr, RAID_CTLR_LUNID);
+ }
+
++#define PQI_DRIVER_SCRATCH_PQI_MODE 0x1
++#define PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED 0x2
++
+ static inline enum pqi_ctrl_mode pqi_get_ctrl_mode(struct pqi_ctrl_info *ctrl_info)
+ {
+- return sis_read_driver_scratch(ctrl_info);
++ return sis_read_driver_scratch(ctrl_info) & PQI_DRIVER_SCRATCH_PQI_MODE ? PQI_MODE : SIS_MODE;
+ }
+
+ static inline void pqi_save_ctrl_mode(struct pqi_ctrl_info *ctrl_info,
+ enum pqi_ctrl_mode mode)
+ {
+- sis_write_driver_scratch(ctrl_info, mode);
++ u32 driver_scratch;
++
++ driver_scratch = sis_read_driver_scratch(ctrl_info);
++
++ if (mode == PQI_MODE)
++ driver_scratch |= PQI_DRIVER_SCRATCH_PQI_MODE;
++ else
++ driver_scratch &= ~PQI_DRIVER_SCRATCH_PQI_MODE;
++
++ sis_write_driver_scratch(ctrl_info, driver_scratch);
++}
++
++static inline bool pqi_is_fw_triage_supported(struct pqi_ctrl_info *ctrl_info)
++{
++ return (sis_read_driver_scratch(ctrl_info) & PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED) != 0;
++}
++
++static inline void pqi_save_fw_triage_setting(struct pqi_ctrl_info *ctrl_info, bool is_supported)
++{
++ u32 driver_scratch;
++
++ driver_scratch = sis_read_driver_scratch(ctrl_info);
++
++ if (is_supported)
++ driver_scratch |= PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED;
++ else
++ driver_scratch &= ~PQI_DRIVER_SCRATCH_FW_TRIAGE_SUPPORTED;
++
++ sis_write_driver_scratch(ctrl_info, driver_scratch);
+ }
+
+ static inline void pqi_ctrl_block_scan(struct pqi_ctrl_info *ctrl_info)
+@@ -5279,10 +5310,10 @@ static int pqi_raid_submit_scsi_cmd_with_io_request(
+ }
+
+ switch (scmd->sc_data_direction) {
+- case DMA_TO_DEVICE:
++ case DMA_FROM_DEVICE:
+ request->data_direction = SOP_READ_FLAG;
+ break;
+- case DMA_FROM_DEVICE:
++ case DMA_TO_DEVICE:
+ request->data_direction = SOP_WRITE_FLAG;
+ break;
+ case DMA_NONE:
+@@ -7301,6 +7332,7 @@ static void pqi_ctrl_update_feature_flags(struct pqi_ctrl_info *ctrl_info,
+ ctrl_info->unique_wwid_in_report_phys_lun_supported =
+ firmware_feature->enabled;
+ break;
++ pqi_save_fw_triage_setting(ctrl_info, firmware_feature->enabled);
+ }
+
+ pqi_firmware_feature_status(ctrl_info, firmware_feature);
+@@ -7621,12 +7653,32 @@ static int pqi_force_sis_mode(struct pqi_ctrl_info *ctrl_info)
+ return pqi_revert_to_sis_mode(ctrl_info);
+ }
+
++static void pqi_perform_lockup_action(void)
++{
++ switch (pqi_lockup_action) {
++ case PANIC:
++ panic("FATAL: Smart Family Controller lockup detected");
++ break;
++ case REBOOT:
++ emergency_restart();
++ break;
++ case NONE:
++ default:
++ break;
++ }
++}
++
+ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
+ {
+ int rc;
+ u32 product_id;
+
+ if (reset_devices) {
++ if (pqi_is_fw_triage_supported(ctrl_info)) {
++ rc = sis_wait_for_fw_triage_completion(ctrl_info);
++ if (rc)
++ return rc;
++ }
+ sis_soft_reset(ctrl_info);
+ msleep(PQI_POST_RESET_DELAY_SECS * PQI_HZ);
+ } else {
+@@ -7640,8 +7692,15 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
+ * commands.
+ */
+ rc = sis_wait_for_ctrl_ready(ctrl_info);
+- if (rc)
++ if (rc) {
++ if (reset_devices) {
++ dev_err(&ctrl_info->pci_dev->dev,
++ "kdump init failed with error %d\n", rc);
++ pqi_lockup_action = REBOOT;
++ pqi_perform_lockup_action();
++ }
+ return rc;
++ }
+
+ /*
+ * Get the controller properties. This allows us to determine
+@@ -8365,21 +8424,6 @@ static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info, unsigned int de
+ return pqi_ctrl_init_resume(ctrl_info);
+ }
+
+-static void pqi_perform_lockup_action(void)
+-{
+- switch (pqi_lockup_action) {
+- case PANIC:
+- panic("FATAL: Smart Family Controller lockup detected");
+- break;
+- case REBOOT:
+- emergency_restart();
+- break;
+- case NONE:
+- default:
+- break;
+- }
+-}
+-
+ static struct pqi_raid_error_info pqi_ctrl_offline_raid_error_info = {
+ .data_out_result = PQI_DATA_IN_OUT_HARDWARE_ERROR,
+ .status = SAM_STAT_CHECK_CONDITION,
+diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c
+index d63c46a8e38bb..8acd3a80f5822 100644
+--- a/drivers/scsi/smartpqi/smartpqi_sis.c
++++ b/drivers/scsi/smartpqi/smartpqi_sis.c
+@@ -51,12 +51,20 @@
+ #define SIS_BASE_STRUCT_REVISION 9
+ #define SIS_BASE_STRUCT_ALIGNMENT 16
+
++#define SIS_CTRL_KERNEL_FW_TRIAGE 0x3
+ #define SIS_CTRL_KERNEL_UP 0x80
+ #define SIS_CTRL_KERNEL_PANIC 0x100
+ #define SIS_CTRL_READY_TIMEOUT_SECS 180
+ #define SIS_CTRL_READY_RESUME_TIMEOUT_SECS 90
+ #define SIS_CTRL_READY_POLL_INTERVAL_MSECS 10
+
++enum sis_fw_triage_status {
++ FW_TRIAGE_NOT_STARTED = 0,
++ FW_TRIAGE_STARTED,
++ FW_TRIAGE_COND_INVALID,
++ FW_TRIAGE_COMPLETED
++};
++
+ #pragma pack(1)
+
+ /* for use with SIS_CMD_INIT_BASE_STRUCT_ADDRESS command */
+@@ -419,12 +427,55 @@ u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info)
+ return readl(&ctrl_info->registers->sis_driver_scratch);
+ }
+
++static inline enum sis_fw_triage_status
++ sis_read_firmware_triage_status(struct pqi_ctrl_info *ctrl_info)
++{
++ return ((enum sis_fw_triage_status)(readl(&ctrl_info->registers->sis_firmware_status) &
++ SIS_CTRL_KERNEL_FW_TRIAGE));
++}
++
+ void sis_soft_reset(struct pqi_ctrl_info *ctrl_info)
+ {
+ writel(SIS_SOFT_RESET,
+ &ctrl_info->registers->sis_host_to_ctrl_doorbell);
+ }
+
++#define SIS_FW_TRIAGE_STATUS_TIMEOUT_SECS 300
++#define SIS_FW_TRIAGE_STATUS_POLL_INTERVAL_SECS 1
++
++int sis_wait_for_fw_triage_completion(struct pqi_ctrl_info *ctrl_info)
++{
++ int rc;
++ enum sis_fw_triage_status status;
++ unsigned long timeout;
++
++ timeout = (SIS_FW_TRIAGE_STATUS_TIMEOUT_SECS * PQI_HZ) + jiffies;
++ while (1) {
++ status = sis_read_firmware_triage_status(ctrl_info);
++ if (status == FW_TRIAGE_COND_INVALID) {
++ dev_err(&ctrl_info->pci_dev->dev,
++ "firmware triage condition invalid\n");
++ rc = -EINVAL;
++ break;
++ } else if (status == FW_TRIAGE_NOT_STARTED ||
++ status == FW_TRIAGE_COMPLETED) {
++ rc = 0;
++ break;
++ }
++
++ if (time_after(jiffies, timeout)) {
++ dev_err(&ctrl_info->pci_dev->dev,
++ "timed out waiting for firmware triage status\n");
++ rc = -ETIMEDOUT;
++ break;
++ }
++
++ ssleep(SIS_FW_TRIAGE_STATUS_POLL_INTERVAL_SECS);
++ }
++
++ return rc;
++}
++
+ static void __attribute__((unused)) verify_structures(void)
+ {
+ BUILD_BUG_ON(offsetof(struct sis_base_struct,
+diff --git a/drivers/scsi/smartpqi/smartpqi_sis.h b/drivers/scsi/smartpqi/smartpqi_sis.h
+index d29c1352a826a..c1db93054c863 100644
+--- a/drivers/scsi/smartpqi/smartpqi_sis.h
++++ b/drivers/scsi/smartpqi/smartpqi_sis.h
+@@ -28,5 +28,6 @@ void sis_write_driver_scratch(struct pqi_ctrl_info *ctrl_info, u32 value);
+ u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info);
+ void sis_soft_reset(struct pqi_ctrl_info *ctrl_info);
+ u32 sis_get_product_id(struct pqi_ctrl_info *ctrl_info);
++int sis_wait_for_fw_triage_completion(struct pqi_ctrl_info *ctrl_info);
+
+ #endif /* _SMARTPQI_SIS_H */
+diff --git a/drivers/scsi/snic/snic_debugfs.c b/drivers/scsi/snic/snic_debugfs.c
+index 5e0faeba516e5..76baa4f9a06e3 100644
+--- a/drivers/scsi/snic/snic_debugfs.c
++++ b/drivers/scsi/snic/snic_debugfs.c
+@@ -451,6 +451,6 @@ void snic_trc_debugfs_init(void)
+ void
+ snic_trc_debugfs_term(void)
+ {
+- debugfs_remove(debugfs_lookup(TRC_FILE, snic_glob->trc_root));
+- debugfs_remove(debugfs_lookup(TRC_ENABLE_FILE, snic_glob->trc_root));
++ debugfs_lookup_and_remove(TRC_FILE, snic_glob->trc_root);
++ debugfs_lookup_and_remove(TRC_ENABLE_FILE, snic_glob->trc_root);
+ }
+diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c
+index e9ccfb97773f1..e362453e8d262 100644
+--- a/drivers/scsi/snic/snic_disc.c
++++ b/drivers/scsi/snic/snic_disc.c
+@@ -318,7 +318,10 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid)
+ ret);
+
+ put_device(&snic->shost->shost_gendev);
+- kfree(tgt);
++ spin_lock_irqsave(snic->shost->host_lock, flags);
++ list_del(&tgt->list);
++ spin_unlock_irqrestore(snic->shost->host_lock, flags);
++ put_device(&tgt->dev);
+ tgt = NULL;
+
+ return tgt;
+diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
+index 8b17b35283aa5..af210910dadf2 100644
+--- a/drivers/scsi/sr.c
++++ b/drivers/scsi/sr.c
+@@ -44,6 +44,7 @@
+ #include <linux/cdrom.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
++#include <linux/major.h>
+ #include <linux/blkdev.h>
+ #include <linux/blk-pm.h>
+ #include <linux/mutex.h>
+@@ -578,7 +579,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
+
+ scsi_autopm_get_device(sdev);
+
+- if (ret != CDROMCLOSETRAY && ret != CDROMEJECT) {
++ if (cmd != CDROMCLOSETRAY && cmd != CDROMEJECT) {
+ ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
+ if (ret != -ENOSYS)
+ goto put;
+@@ -683,9 +684,10 @@ static int sr_probe(struct device *dev)
+ disk->minors = 1;
+ sprintf(disk->disk_name, "sr%d", minor);
+ disk->fops = &sr_bdops;
+- disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
++ disk->flags = GENHD_FL_CD;
+ disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
+- disk->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT;
++ disk->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT |
++ DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE;
+
+ blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
+
+@@ -851,7 +853,7 @@ static void get_capabilities(struct scsi_cd *cd)
+
+
+ /* allocate transfer buffer */
+- buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
++ buffer = kmalloc(512, GFP_KERNEL);
+ if (!buffer) {
+ sr_printk(KERN_ERR, cd, "out of memory.\n");
+ return;
+diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
+index ddd00efc48825..fbdb5124d7f7d 100644
+--- a/drivers/scsi/sr_ioctl.c
++++ b/drivers/scsi/sr_ioctl.c
+@@ -41,7 +41,7 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi,
+ int result;
+ unsigned char *buffer;
+
+- buffer = kmalloc(32, GFP_KERNEL);
++ buffer = kzalloc(32, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+@@ -55,10 +55,13 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi,
+ cgc.data_direction = DMA_FROM_DEVICE;
+
+ result = sr_do_ioctl(cd, &cgc);
++ if (result)
++ goto err;
+
+ tochdr->cdth_trk0 = buffer[2];
+ tochdr->cdth_trk1 = buffer[3];
+
++err:
+ kfree(buffer);
+ return result;
+ }
+@@ -71,7 +74,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
+ int result;
+ unsigned char *buffer;
+
+- buffer = kmalloc(32, GFP_KERNEL);
++ buffer = kzalloc(32, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+@@ -86,6 +89,8 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
+ cgc.data_direction = DMA_FROM_DEVICE;
+
+ result = sr_do_ioctl(cd, &cgc);
++ if (result)
++ goto err;
+
+ tocentry->cdte_ctrl = buffer[5] & 0xf;
+ tocentry->cdte_adr = buffer[5] >> 4;
+@@ -98,6 +103,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
+ tocentry->cdte_addr.lba = (((((buffer[8] << 8) + buffer[9]) << 8)
+ + buffer[10]) << 8) + buffer[11];
+
++err:
+ kfree(buffer);
+ return result;
+ }
+@@ -384,7 +390,7 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
+ {
+ Scsi_CD *cd = cdi->handle;
+ struct packet_command cgc;
+- char *buffer = kmalloc(32, GFP_KERNEL);
++ char *buffer = kzalloc(32, GFP_KERNEL);
+ int result;
+
+ if (!buffer)
+@@ -400,10 +406,13 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
+ cgc.data_direction = DMA_FROM_DEVICE;
+ cgc.timeout = IOCTL_TIMEOUT;
+ result = sr_do_ioctl(cd, &cgc);
++ if (result)
++ goto err;
+
+ memcpy(mcn->medium_catalog_number, buffer + 9, 13);
+ mcn->medium_catalog_number[13] = 0;
+
++err:
+ kfree(buffer);
+ return result;
+ }
+diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c
+index 1f988a1b9166f..a61635326ae0a 100644
+--- a/drivers/scsi/sr_vendor.c
++++ b/drivers/scsi/sr_vendor.c
+@@ -131,7 +131,7 @@ int sr_set_blocklength(Scsi_CD *cd, int blocklength)
+ if (cd->vendor == VENDOR_TOSHIBA)
+ density = (blocklength > 2048) ? 0x81 : 0x83;
+
+- buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
++ buffer = kmalloc(512, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+@@ -179,7 +179,7 @@ int sr_cd_check(struct cdrom_device_info *cdi)
+ if (cd->cdi.mask & CDC_MULTI_SESSION)
+ return 0;
+
+- buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
++ buffer = kmalloc(512, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
+index ae8636d3780b6..9933722acfd96 100644
+--- a/drivers/scsi/st.c
++++ b/drivers/scsi/st.c
+@@ -32,6 +32,7 @@ static const char *verstr = "20160209";
+ #include <linux/slab.h>
+ #include <linux/errno.h>
+ #include <linux/mtio.h>
++#include <linux/major.h>
+ #include <linux/cdrom.h>
+ #include <linux/ioctl.h>
+ #include <linux/fcntl.h>
+diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
+index f1ba7f5b52a89..1ff9b8e85b09c 100644
+--- a/drivers/scsi/stex.c
++++ b/drivers/scsi/stex.c
+@@ -109,7 +109,9 @@ enum {
+ TASK_ATTRIBUTE_HEADOFQUEUE = 0x1,
+ TASK_ATTRIBUTE_ORDERED = 0x2,
+ TASK_ATTRIBUTE_ACA = 0x4,
++};
+
++enum {
+ SS_STS_NORMAL = 0x80000000,
+ SS_STS_DONE = 0x40000000,
+ SS_STS_HANDSHAKE = 0x20000000,
+@@ -121,7 +123,9 @@ enum {
+ SS_I2H_REQUEST_RESET = 0x2000,
+
+ SS_MU_OPERATIONAL = 0x80000000,
++};
+
++enum {
+ STEX_CDB_LENGTH = 16,
+ STATUS_VAR_LEN = 128,
+
+@@ -665,16 +669,17 @@ stex_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+ return 0;
+ case PASSTHRU_CMD:
+ if (cmd->cmnd[1] == PASSTHRU_GET_DRVVER) {
+- struct st_drvver ver;
++ const struct st_drvver ver = {
++ .major = ST_VER_MAJOR,
++ .minor = ST_VER_MINOR,
++ .oem = ST_OEM,
++ .build = ST_BUILD_VER,
++ .signature[0] = PASSTHRU_SIGNATURE,
++ .console_id = host->max_id - 1,
++ .host_no = hba->host->host_no,
++ };
+ size_t cp_len = sizeof(ver);
+
+- ver.major = ST_VER_MAJOR;
+- ver.minor = ST_VER_MINOR;
+- ver.oem = ST_OEM;
+- ver.build = ST_BUILD_VER;
+- ver.signature[0] = PASSTHRU_SIGNATURE;
+- ver.console_id = host->max_id - 1;
+- ver.host_no = hba->host->host_no;
+ cp_len = scsi_sg_copy_from_buffer(cmd, &ver, cp_len);
+ if (sizeof(ver) == cp_len)
+ cmd->result = DID_OK << 16;
+diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
+index 9eb1b88a29dde..5caf7bd5877f9 100644
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -344,16 +344,21 @@ enum storvsc_request_type {
+ };
+
+ /*
+- * SRB status codes and masks; a subset of the codes used here.
++ * SRB status codes and masks. In the 8-bit field, the two high order bits
++ * are flags, while the remaining 6 bits are an integer status code. The
++ * definitions here include only the subset of the integer status codes that
++ * are tested for in this driver.
+ */
+-
+ #define SRB_STATUS_AUTOSENSE_VALID 0x80
+ #define SRB_STATUS_QUEUE_FROZEN 0x40
+-#define SRB_STATUS_INVALID_LUN 0x20
+-#define SRB_STATUS_SUCCESS 0x01
+-#define SRB_STATUS_ABORTED 0x02
+-#define SRB_STATUS_ERROR 0x04
+-#define SRB_STATUS_DATA_OVERRUN 0x12
++
++/* SRB status integer codes */
++#define SRB_STATUS_SUCCESS 0x01
++#define SRB_STATUS_ABORTED 0x02
++#define SRB_STATUS_ERROR 0x04
++#define SRB_STATUS_INVALID_REQUEST 0x06
++#define SRB_STATUS_DATA_OVERRUN 0x12
++#define SRB_STATUS_INVALID_LUN 0x20
+
+ #define SRB_STATUS(status) \
+ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
+@@ -401,6 +406,7 @@ static void storvsc_on_channel_callback(void *context);
+ #define STORVSC_FC_MAX_LUNS_PER_TARGET 255
+ #define STORVSC_FC_MAX_TARGETS 128
+ #define STORVSC_FC_MAX_CHANNELS 8
++#define STORVSC_FC_MAX_XFER_SIZE ((u32)(512 * 1024))
+
+ #define STORVSC_IDE_MAX_LUNS_PER_TARGET 64
+ #define STORVSC_IDE_MAX_TARGETS 1
+@@ -1032,38 +1038,41 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
+ void (*process_err_fn)(struct work_struct *work);
+ struct hv_host_device *host_dev = shost_priv(host);
+
+- /*
+- * In some situations, Hyper-V sets multiple bits in the
+- * srb_status, such as ABORTED and ERROR. So process them
+- * individually, with the most specific bits first.
+- */
+-
+- if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) {
+- set_host_byte(scmnd, DID_NO_CONNECT);
+- process_err_fn = storvsc_remove_lun;
+- goto do_work;
+- }
++ switch (SRB_STATUS(vm_srb->srb_status)) {
++ case SRB_STATUS_ERROR:
++ case SRB_STATUS_ABORTED:
++ case SRB_STATUS_INVALID_REQUEST:
++ if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
++ /* Check for capacity change */
++ if ((asc == 0x2a) && (ascq == 0x9)) {
++ process_err_fn = storvsc_device_scan;
++ /* Retry the I/O that triggered this. */
++ set_host_byte(scmnd, DID_REQUEUE);
++ goto do_work;
++ }
+
+- if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
+- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
+- /* Capacity data has changed */
+- (asc == 0x2a) && (ascq == 0x9)) {
+- process_err_fn = storvsc_device_scan;
+ /*
+- * Retry the I/O that triggered this.
++ * Check for "Operating parameters have changed"
++ * due to Hyper-V changing the VHD/VHDX BlockSize
++ * when adding/removing a differencing disk. This
++ * causes discard_granularity to change, so do a
++ * rescan to pick up the new granularity. We don't
++ * want scsi_report_sense() to output a message
++ * that a sysadmin wouldn't know what to do with.
+ */
+- set_host_byte(scmnd, DID_REQUEUE);
+- goto do_work;
+- }
+- }
++ if ((asc == 0x3f) && (ascq != 0x03) &&
++ (ascq != 0x0e)) {
++ process_err_fn = storvsc_device_scan;
++ set_host_byte(scmnd, DID_REQUEUE);
++ goto do_work;
++ }
+
+- if (vm_srb->srb_status & SRB_STATUS_ERROR) {
+- /*
+- * Let upper layer deal with error when
+- * sense message is present.
+- */
+- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
++ /*
++ * Otherwise, let upper layer deal with the
++ * error when sense message is present
++ */
+ return;
++ }
+
+ /*
+ * If there is an error; offline the device since all
+@@ -1086,6 +1095,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
+ default:
+ set_host_byte(scmnd, DID_ERROR);
+ }
++ return;
++
++ case SRB_STATUS_INVALID_LUN:
++ set_host_byte(scmnd, DID_NO_CONNECT);
++ process_err_fn = storvsc_remove_lun;
++ goto do_work;
++
+ }
+ return;
+
+@@ -1611,6 +1627,8 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
+ {
+ blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
+
++ /* storvsc devices don't support MAINTENANCE_IN SCSI cmd */
++ sdevice->no_report_opcodes = 1;
+ sdevice->no_write_same = 1;
+
+ /*
+@@ -1714,10 +1732,6 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
+ */
+ static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
+ {
+-#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
+- if (scmnd->device->host->transportt == fc_transport_template)
+- return fc_eh_timed_out(scmnd);
+-#endif
+ return BLK_EH_RESET_TIMER;
+ }
+
+@@ -1828,7 +1842,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
+
+ length = scsi_bufflen(scmnd);
+ payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
+- payload_sz = sizeof(cmd_request->mpb);
++ payload_sz = 0;
+
+ if (sg_count) {
+ unsigned int hvpgoff, hvpfns_to_add;
+@@ -1836,10 +1850,10 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
+ unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
+ u64 hvpfn;
+
+- if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
++ payload_sz = (hvpg_count * sizeof(u64) +
++ sizeof(struct vmbus_packet_mpb_array));
+
+- payload_sz = (hvpg_count * sizeof(u64) +
+- sizeof(struct vmbus_packet_mpb_array));
++ if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
+ payload = kzalloc(payload_sz, GFP_ATOMIC);
+ if (!payload)
+ return SCSI_MLQUEUE_DEVICE_BUSY;
+@@ -1907,7 +1921,7 @@ static struct scsi_host_template scsi_driver = {
+ .cmd_per_lun = 2048,
+ .this_id = -1,
+ /* Ensure there are no gaps in presented sgls */
+- .virt_boundary_mask = PAGE_SIZE-1,
++ .virt_boundary_mask = HV_HYP_PAGE_SIZE - 1,
+ .no_write_same = 1,
+ .track_queue_depth = 1,
+ .change_queue_depth = storvsc_change_queue_depth,
+@@ -1961,6 +1975,7 @@ static int storvsc_probe(struct hv_device *device,
+ int max_targets;
+ int max_channels;
+ int max_sub_channels = 0;
++ u32 max_xfer_bytes;
+
+ /*
+ * Based on the windows host we are running on,
+@@ -2049,12 +2064,31 @@ static int storvsc_probe(struct hv_device *device,
+ }
+ /* max cmd length */
+ host->max_cmd_len = STORVSC_MAX_CMD_LEN;
++ /*
++ * Any reasonable Hyper-V configuration should provide
++ * max_transfer_bytes value aligning to HV_HYP_PAGE_SIZE,
++ * protecting it from any weird value.
++ */
++ max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
++ if (is_fc)
++ max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE);
+
++ /* max_hw_sectors_kb */
++ host->max_sectors = max_xfer_bytes >> 9;
+ /*
+- * set the table size based on the info we got
+- * from the host.
++ * There are 2 requirements for Hyper-V storvsc sgl segments,
++ * based on which the below calculation for max segments is
++ * done:
++ *
++ * 1. Except for the first and last sgl segment, all sgl segments
++ * should be align to HV_HYP_PAGE_SIZE, that also means the
++ * maximum number of segments in a sgl can be calculated by
++ * dividing the total max transfer length by HV_HYP_PAGE_SIZE.
++ *
++ * 2. Except for the first and last, each entry in the SGL must
++ * have an offset that is a multiple of HV_HYP_PAGE_SIZE.
+ */
+- host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT);
++ host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1;
+ /*
+ * For non-IDE disks, the host supports multiple channels.
+ * Set the number of HW queues we are supporting.
+@@ -2076,7 +2110,7 @@ static int storvsc_probe(struct hv_device *device,
+ */
+ host_dev->handle_error_wq =
+ alloc_ordered_workqueue("storvsc_error_wq_%d",
+- WQ_MEM_RECLAIM,
++ 0,
+ host->host_no);
+ if (!host_dev->handle_error_wq) {
+ ret = -ENOMEM;
+diff --git a/drivers/scsi/ufs/tc-dwc-g210-pci.c b/drivers/scsi/ufs/tc-dwc-g210-pci.c
+index 679289e1a78e6..7b08e2e07cc5f 100644
+--- a/drivers/scsi/ufs/tc-dwc-g210-pci.c
++++ b/drivers/scsi/ufs/tc-dwc-g210-pci.c
+@@ -110,7 +110,6 @@ tc_dwc_g210_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ return err;
+ }
+
+- pci_set_drvdata(pdev, hba);
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_allow(&pdev->dev);
+
+diff --git a/drivers/scsi/ufs/ti-j721e-ufs.c b/drivers/scsi/ufs/ti-j721e-ufs.c
+index eafe0db98d542..122d650d08102 100644
+--- a/drivers/scsi/ufs/ti-j721e-ufs.c
++++ b/drivers/scsi/ufs/ti-j721e-ufs.c
+@@ -29,11 +29,9 @@ static int ti_j721e_ufs_probe(struct platform_device *pdev)
+ return PTR_ERR(regbase);
+
+ pm_runtime_enable(dev);
+- ret = pm_runtime_get_sync(dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(dev);
++ ret = pm_runtime_resume_and_get(dev);
++ if (ret < 0)
+ goto disable_pm;
+- }
+
+ /* Select MPHY refclk frequency */
+ clk = devm_clk_get(dev, NULL);
+diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
+index 80b3545dd17d6..a9ddb50d593cf 100644
+--- a/drivers/scsi/ufs/ufs-mediatek.c
++++ b/drivers/scsi/ufs/ufs-mediatek.c
+@@ -501,7 +501,7 @@ static void ufs_mtk_init_va09_pwr_ctrl(struct ufs_hba *hba)
+ struct ufs_mtk_host *host = ufshcd_get_variant(hba);
+
+ host->reg_va09 = regulator_get(hba->dev, "va09");
+- if (!host->reg_va09)
++ if (IS_ERR(host->reg_va09))
+ dev_info(hba->dev, "failed to get va09");
+ else
+ host->caps |= UFS_MTK_CAP_VA09_PWR_CTRL;
+@@ -949,7 +949,6 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+ * ufshcd_suspend() re-enabling regulators while vreg is still
+ * in low-power mode.
+ */
+- ufs_mtk_vreg_set_lpm(hba, true);
+ err = ufs_mtk_mphy_power_on(hba, false);
+ if (err)
+ goto fail;
+@@ -973,12 +972,13 @@ static int ufs_mtk_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+ {
+ int err;
+
++ if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
++ ufs_mtk_vreg_set_lpm(hba, false);
++
+ err = ufs_mtk_mphy_power_on(hba, true);
+ if (err)
+ goto fail;
+
+- ufs_mtk_vreg_set_lpm(hba, false);
+-
+ if (ufshcd_is_link_hibern8(hba)) {
+ err = ufs_mtk_link_set_hpm(hba);
+ if (err)
+@@ -1139,9 +1139,59 @@ static int ufs_mtk_remove(struct platform_device *pdev)
+ return 0;
+ }
+
++#ifdef CONFIG_PM_SLEEP
++int ufs_mtk_system_suspend(struct device *dev)
++{
++ struct ufs_hba *hba = dev_get_drvdata(dev);
++ int ret;
++
++ ret = ufshcd_system_suspend(dev);
++ if (ret)
++ return ret;
++
++ ufs_mtk_vreg_set_lpm(hba, true);
++
++ return 0;
++}
++
++int ufs_mtk_system_resume(struct device *dev)
++{
++ struct ufs_hba *hba = dev_get_drvdata(dev);
++
++ ufs_mtk_vreg_set_lpm(hba, false);
++
++ return ufshcd_system_resume(dev);
++}
++#endif
++
++int ufs_mtk_runtime_suspend(struct device *dev)
++{
++ struct ufs_hba *hba = dev_get_drvdata(dev);
++ int ret = 0;
++
++ ret = ufshcd_runtime_suspend(dev);
++ if (ret)
++ return ret;
++
++ ufs_mtk_vreg_set_lpm(hba, true);
++
++ return 0;
++}
++
++int ufs_mtk_runtime_resume(struct device *dev)
++{
++ struct ufs_hba *hba = dev_get_drvdata(dev);
++
++ ufs_mtk_vreg_set_lpm(hba, false);
++
++ return ufshcd_runtime_resume(dev);
++}
++
+ static const struct dev_pm_ops ufs_mtk_pm_ops = {
+- SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume)
+- SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL)
++ SET_SYSTEM_SLEEP_PM_OPS(ufs_mtk_system_suspend,
++ ufs_mtk_system_resume)
++ SET_RUNTIME_PM_OPS(ufs_mtk_runtime_suspend,
++ ufs_mtk_runtime_resume, NULL)
+ .prepare = ufshcd_suspend_prepare,
+ .complete = ufshcd_resume_complete,
+ };
+diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
+index 9d9770f1db4fb..f810b99ef5c51 100644
+--- a/drivers/scsi/ufs/ufs-qcom.c
++++ b/drivers/scsi/ufs/ufs-qcom.c
+@@ -637,12 +637,7 @@ static int ufs_qcom_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+ return err;
+ }
+
+- err = ufs_qcom_ice_resume(host);
+- if (err)
+- return err;
+-
+- hba->is_sys_suspended = false;
+- return 0;
++ return ufs_qcom_ice_resume(host);
+ }
+
+ static void ufs_qcom_dev_ref_clk_ctrl(struct ufs_qcom_host *host, bool enable)
+@@ -683,8 +678,11 @@ static void ufs_qcom_dev_ref_clk_ctrl(struct ufs_qcom_host *host, bool enable)
+
+ writel_relaxed(temp, host->dev_ref_clk_ctrl_mmio);
+
+- /* ensure that ref_clk is enabled/disabled before we return */
+- wmb();
++ /*
++ * Make sure the write to ref_clk reaches the destination and
++ * not stored in a Write Buffer (WB).
++ */
++ readl(host->dev_ref_clk_ctrl_mmio);
+
+ /*
+ * If we call hibern8 exit after this, we need to make sure that
+diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
+index 51424557810da..0920530a72d28 100644
+--- a/drivers/scsi/ufs/ufshcd-pci.c
++++ b/drivers/scsi/ufs/ufshcd-pci.c
+@@ -421,6 +421,19 @@ static int ufs_intel_lkf_init(struct ufs_hba *hba)
+ return err;
+ }
+
++static int ufs_intel_adl_init(struct ufs_hba *hba)
++{
++ hba->nop_out_timeout = 200;
++ hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
++ return ufs_intel_common_init(hba);
++}
++
++static int ufs_intel_mtl_init(struct ufs_hba *hba)
++{
++ hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN;
++ return ufs_intel_common_init(hba);
++}
++
+ static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
+ .name = "intel-pci",
+ .init = ufs_intel_common_init,
+@@ -449,6 +462,25 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = {
+ .device_reset = ufs_intel_device_reset,
+ };
+
++static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = {
++ .name = "intel-pci",
++ .init = ufs_intel_adl_init,
++ .exit = ufs_intel_common_exit,
++ .link_startup_notify = ufs_intel_link_startup_notify,
++ .resume = ufs_intel_resume,
++ .device_reset = ufs_intel_device_reset,
++};
++
++static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = {
++ .name = "intel-pci",
++ .init = ufs_intel_mtl_init,
++ .exit = ufs_intel_common_exit,
++ .hce_enable_notify = ufs_intel_hce_enable_notify,
++ .link_startup_notify = ufs_intel_link_startup_notify,
++ .resume = ufs_intel_resume,
++ .device_reset = ufs_intel_device_reset,
++};
++
+ #ifdef CONFIG_PM_SLEEP
+ static int ufshcd_pci_restore(struct device *dev)
+ {
+@@ -522,8 +554,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ return err;
+ }
+
+- pci_set_drvdata(pdev, hba);
+-
+ hba->vops = (struct ufs_hba_variant_ops *)id->driver_data;
+
+ err = ufshcd_init(hba, mmio_base, pdev->irq);
+@@ -563,6 +593,10 @@ static const struct pci_device_id ufshcd_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, 0x4B41), (kernel_ulong_t)&ufs_intel_ehl_hba_vops },
+ { PCI_VDEVICE(INTEL, 0x4B43), (kernel_ulong_t)&ufs_intel_ehl_hba_vops },
+ { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops },
++ { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops },
++ { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops },
++ { PCI_VDEVICE(INTEL, 0x7E47), (kernel_ulong_t)&ufs_intel_mtl_hba_vops },
++ { PCI_VDEVICE(INTEL, 0xA847), (kernel_ulong_t)&ufs_intel_mtl_hba_vops },
+ { } /* terminate list */
+ };
+
+diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
+index 8859c13f4e091..adc302b1a57ae 100644
+--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
+@@ -91,7 +91,12 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba)
+
+ clki->min_freq = clkfreq[i];
+ clki->max_freq = clkfreq[i+1];
+- clki->name = kstrdup(name, GFP_KERNEL);
++ clki->name = devm_kstrdup(dev, name, GFP_KERNEL);
++ if (!clki->name) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
+ if (!strcmp(name, "ref_clk"))
+ clki->keep_link_active = true;
+ dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz",
+@@ -102,9 +107,20 @@ out:
+ return ret;
+ }
+
++static bool phandle_exists(const struct device_node *np,
++ const char *phandle_name, int index)
++{
++ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);
++
++ if (parse_np)
++ of_node_put(parse_np);
++
++ return parse_np != NULL;
++}
++
+ #define MAX_PROP_SIZE 32
+ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+- struct ufs_vreg **out_vreg)
++ struct ufs_vreg **out_vreg)
+ {
+ char prop_name[MAX_PROP_SIZE];
+ struct ufs_vreg *vreg = NULL;
+@@ -116,7 +132,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+ }
+
+ snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
+- if (!of_parse_phandle(np, prop_name, 0)) {
++ if (!phandle_exists(np, prop_name, 0)) {
+ dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",
+ __func__, prop_name);
+ goto out;
+@@ -126,7 +142,9 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+ if (!vreg)
+ return -ENOMEM;
+
+- vreg->name = kstrdup(name, GFP_KERNEL);
++ vreg->name = devm_kstrdup(dev, name, GFP_KERNEL);
++ if (!vreg->name)
++ return -ENOMEM;
+
+ snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name);
+ if (of_property_read_u32(np, prop_name, &vreg->max_uA)) {
+@@ -361,8 +379,6 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
+ goto dealloc_host;
+ }
+
+- platform_set_drvdata(pdev, hba);
+-
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
+index 41f2ff35f82b2..d00d263705e15 100644
+--- a/drivers/scsi/ufs/ufshcd.c
++++ b/drivers/scsi/ufs/ufshcd.c
+@@ -112,8 +112,13 @@ int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len,
+ if (!regs)
+ return -ENOMEM;
+
+- for (pos = 0; pos < len; pos += 4)
++ for (pos = 0; pos < len; pos += 4) {
++ if (offset == 0 &&
++ pos >= REG_UIC_ERROR_CODE_PHY_ADAPTER_LAYER &&
++ pos <= REG_UIC_ERROR_CODE_DME)
++ continue;
+ regs[pos / 4] = ufshcd_readl(hba, offset + pos);
++ }
+
+ ufshcd_hex_dump(prefix, regs, len);
+ kfree(regs);
+@@ -125,8 +130,9 @@ EXPORT_SYMBOL_GPL(ufshcd_dump_regs);
+ enum {
+ UFSHCD_MAX_CHANNEL = 0,
+ UFSHCD_MAX_ID = 1,
+- UFSHCD_CMD_PER_LUN = 32,
+- UFSHCD_CAN_QUEUE = 32,
++ UFSHCD_NUM_RESERVED = 1,
++ UFSHCD_CMD_PER_LUN = 32 - UFSHCD_NUM_RESERVED,
++ UFSHCD_CAN_QUEUE = 32 - UFSHCD_NUM_RESERVED,
+ };
+
+ /* UFSHCD error handling flags */
+@@ -222,8 +228,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba);
+ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd);
+ static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
+ static void ufshcd_hba_exit(struct ufs_hba *hba);
+-static int ufshcd_clear_ua_wluns(struct ufs_hba *hba);
+-static int ufshcd_probe_hba(struct ufs_hba *hba, bool async);
++static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params);
+ static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on);
+ static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba);
+ static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba);
+@@ -358,7 +363,7 @@ static void ufshcd_add_uic_command_trace(struct ufs_hba *hba,
+ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
+ enum ufs_trace_str_t str_t)
+ {
+- u64 lba;
++ u64 lba = 0;
+ u8 opcode = 0, group_id = 0;
+ u32 intr, doorbell;
+ struct ufshcd_lrb *lrbp = &hba->lrb[tag];
+@@ -375,7 +380,6 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
+ return;
+
+ opcode = cmd->cmnd[0];
+- lba = scsi_get_lba(cmd);
+
+ if (opcode == READ_10 || opcode == WRITE_10) {
+ /*
+@@ -383,6 +387,7 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
+ */
+ transfer_len =
+ be32_to_cpu(lrbp->ucd_req_ptr->sc.exp_data_transfer_len);
++ lba = scsi_get_lba(cmd);
+ if (opcode == WRITE_10)
+ group_id = lrbp->cmd->cmnd[6];
+ } else if (opcode == UNMAP) {
+@@ -390,6 +395,7 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
+ * The number of Bytes to be unmapped beginning with the lba.
+ */
+ transfer_len = blk_rq_bytes(rq);
++ lba = scsi_get_lba(cmd);
+ }
+
+ intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
+@@ -576,7 +582,12 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba)
+ "INVALID MODE",
+ };
+
+- dev_err(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n",
++ /*
++ * Using dev_dbg to avoid messages during runtime PM to avoid
++ * never-ending cycles of messages written back to storage by user space
++ * causing runtime resume, causing more messages and so on.
++ */
++ dev_dbg(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n",
+ __func__,
+ hba->pwr_info.gear_rx, hba->pwr_info.gear_tx,
+ hba->pwr_info.lane_rx, hba->pwr_info.lane_tx,
+@@ -1174,12 +1185,14 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
+ * clock scaling is in progress
+ */
+ ufshcd_scsi_block_requests(hba);
++ mutex_lock(&hba->wb_mutex);
+ down_write(&hba->clk_scaling_lock);
+
+ if (!hba->clk_scaling.is_allowed ||
+ ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
+ ret = -EBUSY;
+ up_write(&hba->clk_scaling_lock);
++ mutex_unlock(&hba->wb_mutex);
+ ufshcd_scsi_unblock_requests(hba);
+ goto out;
+ }
+@@ -1191,12 +1204,15 @@ out:
+ return ret;
+ }
+
+-static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
++static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool scale_up)
+ {
+- if (writelock)
+- up_write(&hba->clk_scaling_lock);
+- else
+- up_read(&hba->clk_scaling_lock);
++ up_write(&hba->clk_scaling_lock);
++
++ /* Enable Write Booster if we have scaled up else disable it */
++ ufshcd_wb_toggle(hba, scale_up);
++
++ mutex_unlock(&hba->wb_mutex);
++
+ ufshcd_scsi_unblock_requests(hba);
+ ufshcd_release(hba);
+ }
+@@ -1213,7 +1229,6 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
+ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
+ {
+ int ret = 0;
+- bool is_writelock = true;
+
+ ret = ufshcd_clock_scaling_prepare(hba);
+ if (ret)
+@@ -1242,13 +1257,8 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
+ }
+ }
+
+- /* Enable Write Booster if we have scaled up else disable it */
+- downgrade_write(&hba->clk_scaling_lock);
+- is_writelock = false;
+- ufshcd_wb_toggle(hba, scale_up);
+-
+ out_unprepare:
+- ufshcd_clock_scaling_unprepare(hba, is_writelock);
++ ufshcd_clock_scaling_unprepare(hba, ret, scale_up);
+ return ret;
+ }
+
+@@ -1658,7 +1668,8 @@ int ufshcd_hold(struct ufs_hba *hba, bool async)
+ bool flush_result;
+ unsigned long flags;
+
+- if (!ufshcd_is_clkgating_allowed(hba))
++ if (!ufshcd_is_clkgating_allowed(hba) ||
++ !hba->clk_gating.is_initialized)
+ goto out;
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ hba->clk_gating.active_reqs++;
+@@ -1818,7 +1829,7 @@ static void __ufshcd_release(struct ufs_hba *hba)
+
+ if (hba->clk_gating.active_reqs || hba->clk_gating.is_suspended ||
+ hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL ||
+- hba->outstanding_tasks ||
++ hba->outstanding_tasks || !hba->clk_gating.is_initialized ||
+ hba->active_uic_cmd || hba->uic_async_done ||
+ hba->clk_gating.state == CLKS_OFF)
+ return;
+@@ -1953,11 +1964,15 @@ static void ufshcd_exit_clk_gating(struct ufs_hba *hba)
+ {
+ if (!hba->clk_gating.is_initialized)
+ return;
++
+ ufshcd_remove_clk_gating_sysfs(hba);
+- cancel_work_sync(&hba->clk_gating.ungate_work);
+- cancel_delayed_work_sync(&hba->clk_gating.gate_work);
+- destroy_workqueue(hba->clk_gating.clk_gating_workq);
++
++ /* Ungate the clock if necessary. */
++ ufshcd_hold(hba, false);
+ hba->clk_gating.is_initialized = false;
++ ufshcd_release(hba);
++
++ destroy_workqueue(hba->clk_gating.clk_gating_workq);
+ }
+
+ /* Must be called with host lock acquired */
+@@ -2181,6 +2196,7 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba)
+ hba->nutrs = (hba->capabilities & MASK_TRANSFER_REQUESTS_SLOTS) + 1;
+ hba->nutmrs =
+ ((hba->capabilities & MASK_TASK_MANAGEMENT_REQUEST_SLOTS) >> 16) + 1;
++ hba->reserved_slot = hba->nutrs - 1;
+
+ /* Read crypto capabilities */
+ err = ufshcd_hba_init_crypto_capabilities(hba);
+@@ -2683,6 +2699,12 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
+ if (!down_read_trylock(&hba->clk_scaling_lock))
+ return SCSI_MLQUEUE_HOST_BUSY;
+
++ /*
++ * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
++ * calls.
++ */
++ rcu_read_lock();
++
+ switch (hba->ufshcd_state) {
+ case UFSHCD_STATE_OPERATIONAL:
+ case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL:
+@@ -2749,7 +2771,10 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
+ }
+
+ ufshcd_send_command(hba, tag);
++
+ out:
++ rcu_read_unlock();
++
+ up_read(&hba->clk_scaling_lock);
+
+ if (ufs_trigger_eh()) {
+@@ -2906,30 +2931,15 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
+ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
+ enum dev_cmd_type cmd_type, int timeout)
+ {
+- struct request_queue *q = hba->cmd_queue;
+ DECLARE_COMPLETION_ONSTACK(wait);
+- struct request *req;
++ const u32 tag = hba->reserved_slot;
+ struct ufshcd_lrb *lrbp;
+ int err;
+- int tag;
+
+- down_read(&hba->clk_scaling_lock);
++ /* Protects use of hba->reserved_slot. */
++ lockdep_assert_held(&hba->dev_cmd.lock);
+
+- /*
+- * Get free slot, sleep if slots are unavailable.
+- * Even though we use wait_event() which sleeps indefinitely,
+- * the maximum wait time is bounded by SCSI request timeout.
+- */
+- req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
+- if (IS_ERR(req)) {
+- err = PTR_ERR(req);
+- goto out_unlock;
+- }
+- tag = req->tag;
+- WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
+- /* Set the timeout such that the SCSI error handler is not activated. */
+- req->timeout = msecs_to_jiffies(2 * timeout);
+- blk_mq_start_request(req);
++ down_read(&hba->clk_scaling_lock);
+
+ lrbp = &hba->lrb[tag];
+ WARN_ON(lrbp->cmd);
+@@ -2947,8 +2957,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
+ (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
+
+ out:
+- blk_put_request(req);
+-out_unlock:
+ up_read(&hba->clk_scaling_lock);
+ return err;
+ }
+@@ -4073,8 +4081,6 @@ int ufshcd_link_recovery(struct ufs_hba *hba)
+ if (ret)
+ dev_err(hba->dev, "%s: link recovery failed, err %d",
+ __func__, ret);
+- else
+- ufshcd_clear_ua_wluns(hba);
+
+ return ret;
+ }
+@@ -4980,6 +4986,12 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
+ pm_runtime_get_noresume(&sdev->sdev_gendev);
+ else if (ufshcd_is_rpm_autosuspend_allowed(hba))
+ sdev->rpm_autosuspend = 1;
++ /*
++ * Do not print messages during runtime PM to avoid never-ending cycles
++ * of messages written back to storage by user space causing runtime
++ * resume, causing more messages and so on.
++ */
++ sdev->silence_suspend = 1;
+
+ ufshcd_crypto_setup_rq_keyslot_manager(hba, q);
+
+@@ -5634,7 +5646,7 @@ int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable)
+ }
+
+ hba->dev_info.wb_enabled = enable;
+- dev_info(hba->dev, "%s Write Booster %s\n",
++ dev_dbg(hba->dev, "%s Write Booster %s\n",
+ __func__, enable ? "enabled" : "disabled");
+
+ return ret;
+@@ -5896,11 +5908,21 @@ static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
+ }
+ }
+
++static void ufshcd_force_error_recovery(struct ufs_hba *hba)
++{
++ spin_lock_irq(hba->host->host_lock);
++ hba->force_reset = true;
++ ufshcd_schedule_eh_work(hba);
++ spin_unlock_irq(hba->host->host_lock);
++}
++
+ static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
+ {
++ mutex_lock(&hba->wb_mutex);
+ down_write(&hba->clk_scaling_lock);
+ hba->clk_scaling.is_allowed = allow;
+ up_write(&hba->clk_scaling_lock);
++ mutex_unlock(&hba->wb_mutex);
+ }
+
+ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
+@@ -5948,8 +5970,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
+ }
+ ufshcd_scsi_block_requests(hba);
+ /* Drain ufshcd_queuecommand() */
+- down_write(&hba->clk_scaling_lock);
+- up_write(&hba->clk_scaling_lock);
++ synchronize_rcu();
+ cancel_work_sync(&hba->eeh_work);
+ }
+
+@@ -5959,7 +5980,6 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
+ ufshcd_release(hba);
+ if (ufshcd_is_clkscaling_supported(hba))
+ ufshcd_clk_scaling_suspend(hba, false);
+- ufshcd_clear_ua_wluns(hba);
+ ufshcd_rpm_put(hba);
+ }
+
+@@ -6386,9 +6406,8 @@ static irqreturn_t ufshcd_tmc_handler(struct ufs_hba *hba)
+ irqreturn_t ret = IRQ_NONE;
+ int tag;
+
+- pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
+-
+ spin_lock_irqsave(hba->host->host_lock, flags);
++ pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
+ issued = hba->outstanding_tasks & ~pending;
+ for_each_set_bit(tag, &issued, hba->nutmrs) {
+ struct request *req = hba->tmf_rqs[tag];
+@@ -6545,11 +6564,6 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
+ err = wait_for_completion_io_timeout(&wait,
+ msecs_to_jiffies(TM_CMD_TIMEOUT));
+ if (!err) {
+- /*
+- * Make sure that ufshcd_compl_tm() does not trigger a
+- * use-after-free.
+- */
+- req->end_io_data = NULL;
+ ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_ERR);
+ dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
+ __func__, tm_function);
+@@ -6645,28 +6659,16 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
+ enum dev_cmd_type cmd_type,
+ enum query_opcode desc_op)
+ {
+- struct request_queue *q = hba->cmd_queue;
+ DECLARE_COMPLETION_ONSTACK(wait);
+- struct request *req;
++ const u32 tag = hba->reserved_slot;
+ struct ufshcd_lrb *lrbp;
+ int err = 0;
+- int tag;
+ u8 upiu_flags;
+
+- down_read(&hba->clk_scaling_lock);
+-
+- req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
+- if (IS_ERR(req)) {
+- err = PTR_ERR(req);
+- goto out_unlock;
+- }
+- tag = req->tag;
+- WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
++ /* Protects use of hba->reserved_slot. */
++ lockdep_assert_held(&hba->dev_cmd.lock);
+
+- if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
+- err = -EBUSY;
+- goto out;
+- }
++ down_read(&hba->clk_scaling_lock);
+
+ lrbp = &hba->lrb[tag];
+ WARN_ON(lrbp->cmd);
+@@ -6735,9 +6737,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
+ ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
+ (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
+
+-out:
+- blk_put_request(req);
+-out_unlock:
+ up_read(&hba->clk_scaling_lock);
+ return err;
+ }
+@@ -7044,6 +7043,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
+ goto release;
+ }
+
++ lrbp->cmd = NULL;
+ err = SUCCESS;
+
+ release:
+@@ -7233,7 +7233,13 @@ static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba,
+
+ if (!hba->vreg_info.vcc || !hba->vreg_info.vccq ||
+ !hba->vreg_info.vccq2) {
+- dev_err(hba->dev,
++ /*
++ * Using dev_dbg to avoid messages during runtime PM to avoid
++ * never-ending cycles of messages written back to storage by
++ * user space causing runtime resume, causing more messages and
++ * so on.
++ */
++ dev_dbg(hba->dev,
+ "%s: Regulator capability was not set, actvIccLevel=%d",
+ __func__, icc_level);
+ goto out;
+@@ -7875,8 +7881,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
+ if (ret)
+ goto out;
+
+- ufshcd_clear_ua_wluns(hba);
+-
+ /* Initialize devfreq after UFS device is detected */
+ if (ufshcd_is_clkscaling_supported(hba)) {
+ memcpy(&hba->clk_scaling.saved_pwr_info.info,
+@@ -7902,116 +7906,6 @@ out:
+ return ret;
+ }
+
+-static void ufshcd_request_sense_done(struct request *rq, blk_status_t error)
+-{
+- if (error != BLK_STS_OK)
+- pr_err("%s: REQUEST SENSE failed (%d)\n", __func__, error);
+- kfree(rq->end_io_data);
+- blk_put_request(rq);
+-}
+-
+-static int
+-ufshcd_request_sense_async(struct ufs_hba *hba, struct scsi_device *sdev)
+-{
+- /*
+- * Some UFS devices clear unit attention condition only if the sense
+- * size used (UFS_SENSE_SIZE in this case) is non-zero.
+- */
+- static const u8 cmd[6] = {REQUEST_SENSE, 0, 0, 0, UFS_SENSE_SIZE, 0};
+- struct scsi_request *rq;
+- struct request *req;
+- char *buffer;
+- int ret;
+-
+- buffer = kzalloc(UFS_SENSE_SIZE, GFP_KERNEL);
+- if (!buffer)
+- return -ENOMEM;
+-
+- req = blk_get_request(sdev->request_queue, REQ_OP_DRV_IN,
+- /*flags=*/BLK_MQ_REQ_PM);
+- if (IS_ERR(req)) {
+- ret = PTR_ERR(req);
+- goto out_free;
+- }
+-
+- ret = blk_rq_map_kern(sdev->request_queue, req,
+- buffer, UFS_SENSE_SIZE, GFP_NOIO);
+- if (ret)
+- goto out_put;
+-
+- rq = scsi_req(req);
+- rq->cmd_len = ARRAY_SIZE(cmd);
+- memcpy(rq->cmd, cmd, rq->cmd_len);
+- rq->retries = 3;
+- req->timeout = 1 * HZ;
+- req->rq_flags |= RQF_PM | RQF_QUIET;
+- req->end_io_data = buffer;
+-
+- blk_execute_rq_nowait(/*bd_disk=*/NULL, req, /*at_head=*/true,
+- ufshcd_request_sense_done);
+- return 0;
+-
+-out_put:
+- blk_put_request(req);
+-out_free:
+- kfree(buffer);
+- return ret;
+-}
+-
+-static int ufshcd_clear_ua_wlun(struct ufs_hba *hba, u8 wlun)
+-{
+- struct scsi_device *sdp;
+- unsigned long flags;
+- int ret = 0;
+-
+- spin_lock_irqsave(hba->host->host_lock, flags);
+- if (wlun == UFS_UPIU_UFS_DEVICE_WLUN)
+- sdp = hba->sdev_ufs_device;
+- else if (wlun == UFS_UPIU_RPMB_WLUN)
+- sdp = hba->sdev_rpmb;
+- else
+- BUG();
+- if (sdp) {
+- ret = scsi_device_get(sdp);
+- if (!ret && !scsi_device_online(sdp)) {
+- ret = -ENODEV;
+- scsi_device_put(sdp);
+- }
+- } else {
+- ret = -ENODEV;
+- }
+- spin_unlock_irqrestore(hba->host->host_lock, flags);
+- if (ret)
+- goto out_err;
+-
+- ret = ufshcd_request_sense_async(hba, sdp);
+- scsi_device_put(sdp);
+-out_err:
+- if (ret)
+- dev_err(hba->dev, "%s: UAC clear LU=%x ret = %d\n",
+- __func__, wlun, ret);
+- return ret;
+-}
+-
+-static int ufshcd_clear_ua_wluns(struct ufs_hba *hba)
+-{
+- int ret = 0;
+-
+- if (!hba->wlun_dev_clr_ua)
+- goto out;
+-
+- ret = ufshcd_clear_ua_wlun(hba, UFS_UPIU_UFS_DEVICE_WLUN);
+- if (!ret)
+- ret = ufshcd_clear_ua_wlun(hba, UFS_UPIU_RPMB_WLUN);
+- if (!ret)
+- hba->wlun_dev_clr_ua = false;
+-out:
+- if (ret)
+- dev_err(hba->dev, "%s: Failed to clear UAC WLUNS ret = %d\n",
+- __func__, ret);
+- return ret;
+-}
+-
+ /**
+ * ufshcd_probe_hba - probe hba to detect device and initialize it
+ * @hba: per-adapter instance
+@@ -8062,8 +7956,6 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
+ /* UFS device is also active now */
+ ufshcd_set_ufs_dev_active(hba);
+ ufshcd_force_reset_auto_bkops(hba);
+- hba->wlun_dev_clr_ua = true;
+- hba->wlun_rpmb_clr_ua = true;
+
+ /* Gear up to HS gear if supported */
+ if (hba->max_pwr_info.is_valid) {
+@@ -8591,7 +8483,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
+ * @pwr_mode: device power mode to set
+ *
+ * Returns 0 if requested power mode is set successfully
+- * Returns non-zero if failed to set the requested power mode
++ * Returns < 0 if failed to set the requested power mode
+ */
+ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
+ enum ufs_dev_pwr_mode pwr_mode)
+@@ -8600,7 +8492,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
+ struct scsi_sense_hdr sshdr;
+ struct scsi_device *sdp;
+ unsigned long flags;
+- int ret;
++ int ret, retries;
+
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ sdp = hba->sdev_ufs_device;
+@@ -8625,8 +8517,6 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
+ * handling context.
+ */
+ hba->host->eh_noresume = 1;
+- if (hba->wlun_dev_clr_ua)
+- ufshcd_clear_ua_wlun(hba, UFS_UPIU_UFS_DEVICE_WLUN);
+
+ cmd[4] = pwr_mode << 4;
+
+@@ -8635,14 +8525,23 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
+ * callbacks hence set the RQF_PM flag so that it doesn't resume the
+ * already suspended childs.
+ */
+- ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+- START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL);
++ for (retries = 3; retries > 0; --retries) {
++ ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
++ HZ, 0, 0, RQF_PM, NULL);
++ if (!scsi_status_is_check_condition(ret) ||
++ !scsi_sense_valid(&sshdr) ||
++ sshdr.sense_key != UNIT_ATTENTION)
++ break;
++ }
+ if (ret) {
+ sdev_printk(KERN_WARNING, sdp,
+ "START_STOP failed for power mode: %d, result %x\n",
+ pwr_mode, ret);
+- if (ret > 0 && scsi_sense_valid(&sshdr))
+- scsi_print_sense_hdr(sdp, NULL, &sshdr);
++ if (ret > 0) {
++ if (scsi_sense_valid(&sshdr))
++ scsi_print_sense_hdr(sdp, NULL, &sshdr);
++ ret = -EIO;
++ }
+ }
+
+ if (!ret)
+@@ -8857,8 +8756,16 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+ * that performance might be impacted.
+ */
+ ret = ufshcd_urgent_bkops(hba);
+- if (ret)
++ if (ret) {
++ /*
++ * If return err in suspend flow, IO will hang.
++ * Trigger error handler and break suspend for
++ * error recovery.
++ */
++ ufshcd_force_error_recovery(hba);
++ ret = -EBUSY;
+ goto enable_scaling;
++ }
+ } else {
+ /* make sure that auto bkops is disabled */
+ ufshcd_disable_auto_bkops(hba);
+@@ -8885,6 +8792,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+
+ if (!hba->dev_info.b_rpm_dev_flush_capable) {
+ ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode);
++ if (ret && pm_op != UFS_SHUTDOWN_PM) {
++ /*
++ * If return err in suspend flow, IO will hang.
++ * Trigger error handler and break suspend for
++ * error recovery.
++ */
++ ufshcd_force_error_recovery(hba);
++ ret = -EBUSY;
++ }
+ if (ret)
+ goto enable_scaling;
+ }
+@@ -8896,6 +8812,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
+ */
+ check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba);
+ ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops);
++ if (ret && pm_op != UFS_SHUTDOWN_PM) {
++ /*
++ * If return err in suspend flow, IO will hang.
++ * Trigger error handler and break suspend for
++ * error recovery.
++ */
++ ufshcd_force_error_recovery(hba);
++ ret = -EBUSY;
++ }
+ if (ret)
+ goto set_dev_active;
+
+@@ -9357,12 +9282,8 @@ EXPORT_SYMBOL(ufshcd_runtime_resume);
+ int ufshcd_shutdown(struct ufs_hba *hba)
+ {
+ if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba))
+- goto out;
+-
+- pm_runtime_get_sync(hba->dev);
++ ufshcd_suspend(hba);
+
+- ufshcd_suspend(hba);
+-out:
+ hba->is_powered = false;
+ /* allow force shutdown even in case of errors */
+ return 0;
+@@ -9485,6 +9406,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
+ struct device *dev = hba->dev;
+ char eh_wq_name[sizeof("ufs_eh_wq_00")];
+
++ /*
++ * dev_set_drvdata() must be called before any callbacks are registered
++ * that use dev_get_drvdata() (frequency scaling, clock scaling, hwmon,
++ * sysfs).
++ */
++ dev_set_drvdata(dev, hba);
++
+ if (!mmio_base) {
+ dev_err(hba->dev,
+ "Invalid memory reference for mmio_base is NULL\n");
+@@ -9527,8 +9455,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
+ /* Configure LRB */
+ ufshcd_host_memory_configure(hba);
+
+- host->can_queue = hba->nutrs;
+- host->cmd_per_lun = hba->nutrs;
++ host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED;
++ host->cmd_per_lun = hba->nutrs - UFSHCD_NUM_RESERVED;
+ host->max_id = UFSHCD_MAX_ID;
+ host->max_lun = UFS_MAX_LUNS;
+ host->max_channel = UFSHCD_MAX_CHANNEL;
+@@ -9561,6 +9489,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
+ /* Initialize mutex for exception event control */
+ mutex_init(&hba->ee_ctrl_mutex);
+
++ mutex_init(&hba->wb_mutex);
+ init_rwsem(&hba->clk_scaling_lock);
+
+ ufshcd_init_clk_gating(hba);
+@@ -9699,10 +9628,6 @@ void ufshcd_resume_complete(struct device *dev)
+ ufshcd_rpm_put(hba);
+ hba->complete_put = false;
+ }
+- if (hba->rpmb_complete_put) {
+- ufshcd_rpmb_rpm_put(hba);
+- hba->rpmb_complete_put = false;
+- }
+ }
+ EXPORT_SYMBOL_GPL(ufshcd_resume_complete);
+
+@@ -9725,10 +9650,6 @@ int ufshcd_suspend_prepare(struct device *dev)
+ }
+ hba->complete_put = true;
+ }
+- if (hba->sdev_rpmb) {
+- ufshcd_rpmb_rpm_get_sync(hba);
+- hba->rpmb_complete_put = true;
+- }
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(ufshcd_suspend_prepare);
+@@ -9797,49 +9718,6 @@ static struct scsi_driver ufs_dev_wlun_template = {
+ },
+ };
+
+-static int ufshcd_rpmb_probe(struct device *dev)
+-{
+- return is_rpmb_wlun(to_scsi_device(dev)) ? 0 : -ENODEV;
+-}
+-
+-static inline int ufshcd_clear_rpmb_uac(struct ufs_hba *hba)
+-{
+- int ret = 0;
+-
+- if (!hba->wlun_rpmb_clr_ua)
+- return 0;
+- ret = ufshcd_clear_ua_wlun(hba, UFS_UPIU_RPMB_WLUN);
+- if (!ret)
+- hba->wlun_rpmb_clr_ua = 0;
+- return ret;
+-}
+-
+-#ifdef CONFIG_PM
+-static int ufshcd_rpmb_resume(struct device *dev)
+-{
+- struct ufs_hba *hba = wlun_dev_to_hba(dev);
+-
+- if (hba->sdev_rpmb)
+- ufshcd_clear_rpmb_uac(hba);
+- return 0;
+-}
+-#endif
+-
+-static const struct dev_pm_ops ufs_rpmb_pm_ops = {
+- SET_RUNTIME_PM_OPS(NULL, ufshcd_rpmb_resume, NULL)
+- SET_SYSTEM_SLEEP_PM_OPS(NULL, ufshcd_rpmb_resume)
+-};
+-
+-/* ufs_rpmb_wlun_template - Describes UFS RPMB WLUN. Used only to send UAC. */
+-static struct scsi_driver ufs_rpmb_wlun_template = {
+- .gendrv = {
+- .name = "ufs_rpmb_wlun",
+- .owner = THIS_MODULE,
+- .probe = ufshcd_rpmb_probe,
+- .pm = &ufs_rpmb_pm_ops,
+- },
+-};
+-
+ static int __init ufshcd_core_init(void)
+ {
+ int ret;
+@@ -9848,24 +9726,13 @@ static int __init ufshcd_core_init(void)
+
+ ret = scsi_register_driver(&ufs_dev_wlun_template.gendrv);
+ if (ret)
+- goto debugfs_exit;
+-
+- ret = scsi_register_driver(&ufs_rpmb_wlun_template.gendrv);
+- if (ret)
+- goto unregister;
+-
+- return ret;
+-unregister:
+- scsi_unregister_driver(&ufs_dev_wlun_template.gendrv);
+-debugfs_exit:
+- ufs_debugfs_exit();
++ ufs_debugfs_exit();
+ return ret;
+ }
+
+ static void __exit ufshcd_core_exit(void)
+ {
+ ufs_debugfs_exit();
+- scsi_unregister_driver(&ufs_rpmb_wlun_template.gendrv);
+ scsi_unregister_driver(&ufs_dev_wlun_template.gendrv);
+ }
+
+@@ -9875,5 +9742,6 @@ module_exit(ufshcd_core_exit);
+ MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>");
+ MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>");
+ MODULE_DESCRIPTION("Generic UFS host controller driver Core");
++MODULE_SOFTDEP("pre: governor_simpleondemand");
+ MODULE_LICENSE("GPL");
+ MODULE_VERSION(UFSHCD_DRIVER_VERSION);
+diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
+index 41f6e06f91856..c8513cc6c2bdd 100644
+--- a/drivers/scsi/ufs/ufshcd.h
++++ b/drivers/scsi/ufs/ufshcd.h
+@@ -725,6 +725,7 @@ struct ufs_hba_monitor {
+ * @capabilities: UFS Controller Capabilities
+ * @nutrs: Transfer Request Queue depth supported by controller
+ * @nutmrs: Task Management Queue depth supported by controller
++ * @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock.
+ * @ufs_version: UFS Version to which controller complies
+ * @vops: pointer to variant specific operations
+ * @priv: pointer to variant specific private data
+@@ -762,6 +763,7 @@ struct ufs_hba_monitor {
+ * @urgent_bkops_lvl: keeps track of urgent bkops level for device
+ * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
+ * device is known or not.
++ * @wb_mutex: used to serialize devfreq and sysfs write booster toggling
+ * @scsi_block_reqs_cnt: reference counting for scsi block requests
+ * @crypto_capabilities: Content of crypto capabilities register (0x100)
+ * @crypto_cap_array: Array of crypto capabilities
+@@ -813,6 +815,7 @@ struct ufs_hba {
+ u32 capabilities;
+ int nutrs;
+ int nutmrs;
++ u32 reserved_slot;
+ u32 ufs_version;
+ const struct ufs_hba_variant_ops *vops;
+ struct ufs_hba_variant_params *vps;
+@@ -871,9 +874,6 @@ struct ufs_hba {
+ struct ufs_vreg_info vreg_info;
+ struct list_head clk_list_head;
+
+- bool wlun_dev_clr_ua;
+- bool wlun_rpmb_clr_ua;
+-
+ /* Number of requests aborts */
+ int req_abort_count;
+
+@@ -893,6 +893,7 @@ struct ufs_hba {
+ enum bkops_status urgent_bkops_lvl;
+ bool is_urgent_bkops_lvl_checked;
+
++ struct mutex wb_mutex;
+ struct rw_semaphore clk_scaling_lock;
+ unsigned char desc_size[QUERY_DESC_IDN_MAX];
+ atomic_t scsi_block_reqs_cnt;
+@@ -920,7 +921,6 @@ struct ufs_hba {
+ #endif
+ u32 luns_avail;
+ bool complete_put;
+- bool rpmb_complete_put;
+ };
+
+ /* Returns true if clocks can be gated. Otherwise false */
+@@ -1393,14 +1393,4 @@ static inline int ufshcd_rpm_put(struct ufs_hba *hba)
+ return pm_runtime_put(&hba->sdev_ufs_device->sdev_gendev);
+ }
+
+-static inline int ufshcd_rpmb_rpm_get_sync(struct ufs_hba *hba)
+-{
+- return pm_runtime_get_sync(&hba->sdev_rpmb->sdev_gendev);
+-}
+-
+-static inline int ufshcd_rpmb_rpm_put(struct ufs_hba *hba)
+-{
+- return pm_runtime_put(&hba->sdev_rpmb->sdev_gendev);
+-}
+-
+ #endif /* End of Header */
+diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
+index de95be5d11d4e..8dbe9866ea6c6 100644
+--- a/drivers/scsi/ufs/ufshci.h
++++ b/drivers/scsi/ufs/ufshci.h
+@@ -133,16 +133,13 @@ static inline u32 ufshci_version(u32 major, u32 minor)
+
+ #define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK)
+
+-#define UFSHCD_ERROR_MASK (UIC_ERROR |\
+- DEVICE_FATAL_ERROR |\
+- CONTROLLER_FATAL_ERROR |\
+- SYSTEM_BUS_FATAL_ERROR |\
+- CRYPTO_ENGINE_FATAL_ERROR)
++#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS)
+
+ #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\
+ CONTROLLER_FATAL_ERROR |\
+ SYSTEM_BUS_FATAL_ERROR |\
+- CRYPTO_ENGINE_FATAL_ERROR)
++ CRYPTO_ENGINE_FATAL_ERROR |\
++ UIC_LINK_LOST)
+
+ /* HCS - Host Controller Status 30h */
+ #define DEVICE_PRESENT 0x1
+diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
+index 026a133149dce..14300896c57fe 100644
+--- a/drivers/scsi/ufs/ufshpb.c
++++ b/drivers/scsi/ufs/ufshpb.c
+@@ -394,8 +394,6 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+ if (!ufshpb_is_supported_chunk(hpb, transfer_len))
+ return 0;
+
+- WARN_ON_ONCE(transfer_len > HPB_MULTI_CHUNK_HIGH);
+-
+ if (hpb->is_hcm) {
+ /*
+ * in host control mode, reads are the main source for
+@@ -872,12 +870,6 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb)
+ struct ufshpb_region *rgn, *victim_rgn = NULL;
+
+ list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) {
+- if (!rgn) {
+- dev_err(&hpb->sdev_ufs_lu->sdev_dev,
+- "%s: no region allocated\n",
+- __func__);
+- return NULL;
+- }
+ if (ufshpb_check_srgns_issue_state(hpb, rgn))
+ continue;
+
+@@ -893,6 +885,11 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb)
+ break;
+ }
+
++ if (!victim_rgn)
++ dev_err(&hpb->sdev_ufs_lu->sdev_dev,
++ "%s: no region allocated\n",
++ __func__);
++
+ return victim_rgn;
+ }
+
+@@ -1260,6 +1257,13 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+ struct utp_hpb_rsp *rsp_field = &lrbp->ucd_rsp_ptr->hr;
+ int data_seg_len;
+
++ data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
++ & MASK_RSP_UPIU_DATA_SEG_LEN;
++
++ /* If data segment length is zero, rsp_field is not valid */
++ if (!data_seg_len)
++ return;
++
+ if (unlikely(lrbp->lun != rsp_field->lun)) {
+ struct scsi_device *sdev;
+ bool found = false;
+@@ -1294,18 +1298,6 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+ return;
+ }
+
+- data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
+- & MASK_RSP_UPIU_DATA_SEG_LEN;
+-
+- /* To flush remained rsp_list, we queue the map_work task */
+- if (!data_seg_len) {
+- if (!ufshpb_is_general_lun(hpb->lun))
+- return;
+-
+- ufshpb_kick_map_work(hpb);
+- return;
+- }
+-
+ BUILD_BUG_ON(sizeof(struct utp_hpb_rsp) != UTP_HPB_RSP_SIZE);
+
+ if (!ufshpb_is_hpb_rsp_valid(hba, lrbp, rsp_field))
+@@ -1572,7 +1564,7 @@ static void ufshpb_lu_parameter_init(struct ufs_hba *hba,
+ if (ufshpb_is_legacy(hba))
+ hpb->pre_req_max_tr_len = HPB_LEGACY_CHUNK_HIGH;
+ else
+- hpb->pre_req_max_tr_len = HPB_MULTI_CHUNK_HIGH;
++ hpb->pre_req_max_tr_len = hpb_dev_info->max_hpb_single_cmd;
+
+ hpb->lu_pinned_start = hpb_lu_info->pinned_start;
+ hpb->lu_pinned_end = hpb_lu_info->num_pinned ?
+@@ -2371,11 +2363,11 @@ static int ufshpb_get_lu_info(struct ufs_hba *hba, int lun,
+
+ ufshcd_map_desc_id_to_length(hba, QUERY_DESC_IDN_UNIT, &size);
+
+- pm_runtime_get_sync(hba->dev);
++ ufshcd_rpm_get_sync(hba);
+ ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+ QUERY_DESC_IDN_UNIT, lun, 0,
+ desc_buf, &size);
+- pm_runtime_put_sync(hba->dev);
++ ufshcd_rpm_put_sync(hba);
+
+ if (ret) {
+ dev_err(hba->dev,
+@@ -2582,7 +2574,7 @@ void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf)
+ {
+ struct ufshpb_dev_info *hpb_dev_info = &hba->ufshpb_dev;
+ int version, ret;
+- u32 max_hpb_single_cmd = HPB_MULTI_CHUNK_LOW;
++ int max_single_cmd;
+
+ hpb_dev_info->control_mode = desc_buf[DEVICE_DESC_PARAM_HPB_CONTROL];
+
+@@ -2598,21 +2590,22 @@ void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf)
+ if (version == HPB_SUPPORT_LEGACY_VERSION)
+ hpb_dev_info->is_legacy = true;
+
+- pm_runtime_get_sync(hba->dev);
+- ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR,
+- QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD, 0, 0, &max_hpb_single_cmd);
+- pm_runtime_put_sync(hba->dev);
+-
+- if (ret)
+- dev_err(hba->dev, "%s: idn: read max size of single hpb cmd query request failed",
+- __func__);
+- hpb_dev_info->max_hpb_single_cmd = max_hpb_single_cmd;
+-
+ /*
+ * Get the number of user logical unit to check whether all
+ * scsi_device finish initialization
+ */
+ hpb_dev_info->num_lu = desc_buf[DEVICE_DESC_PARAM_NUM_LU];
++
++ if (hpb_dev_info->is_legacy)
++ return;
++
++ ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR,
++ QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD, 0, 0, &max_single_cmd);
++
++ if (ret)
++ hpb_dev_info->max_hpb_single_cmd = HPB_LEGACY_CHUNK_HIGH;
++ else
++ hpb_dev_info->max_hpb_single_cmd = min(max_single_cmd + 1, HPB_MULTI_CHUNK_HIGH);
+ }
+
+ void ufshpb_init(struct ufs_hba *hba)
+diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h
+index f15d8fdbce2ef..b475dbd789883 100644
+--- a/drivers/scsi/ufs/ufshpb.h
++++ b/drivers/scsi/ufs/ufshpb.h
+@@ -31,7 +31,6 @@
+
+ /* hpb support chunk size */
+ #define HPB_LEGACY_CHUNK_HIGH 1
+-#define HPB_MULTI_CHUNK_LOW 7
+ #define HPB_MULTI_CHUNK_HIGH 255
+
+ /* hpb vender defined opcode */
+diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
+index ce1ba1b936298..9419d6d1d8d26 100644
+--- a/drivers/scsi/vmw_pvscsi.c
++++ b/drivers/scsi/vmw_pvscsi.c
+@@ -586,9 +586,12 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
+ * Commands like INQUIRY may transfer less data than
+ * requested by the initiator via bufflen. Set residual
+ * count to make upper layer aware of the actual amount
+- * of data returned.
++ * of data returned. There are cases when controller
++ * returns zero dataLen with non zero data - do not set
++ * residual count in that case.
+ */
+- scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
++ if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
++ scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
+ cmd->result = (DID_OK << 16);
+ break;
+
+diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h
+index 51a82f7803d3c..9d16cf9254837 100644
+--- a/drivers/scsi/vmw_pvscsi.h
++++ b/drivers/scsi/vmw_pvscsi.h
+@@ -331,8 +331,8 @@ struct PVSCSIRingReqDesc {
+ u8 tag;
+ u8 bus;
+ u8 target;
+- u8 vcpuHint;
+- u8 unused[59];
++ u16 vcpuHint;
++ u8 unused[58];
+ } __packed;
+
+ /*
+diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
+index 0204e314b4825..17b8c88840873 100644
+--- a/drivers/scsi/xen-scsifront.c
++++ b/drivers/scsi/xen-scsifront.c
+@@ -233,12 +233,11 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info,
+ return;
+
+ for (i = 0; i < shadow->nr_grants; i++) {
+- if (unlikely(gnttab_query_foreign_access(shadow->gref[i]))) {
++ if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) {
+ shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
+ "grant still in use by backend\n");
+ BUG();
+ }
+- gnttab_end_foreign_access(shadow->gref[i], 0, 0UL);
+ }
+
+ kfree(shadow->sg);
+diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c
+index 27b9e2baab1a6..7acf9193a9e80 100644
+--- a/drivers/scsi/zorro7xx.c
++++ b/drivers/scsi/zorro7xx.c
+@@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z)
+ scsi_remove_host(host);
+
+ NCR_700_release(host);
++ if (host->base > 0x01000000)
++ iounmap(hostdata->base);
+ kfree(hostdata);
+ free_irq(host->irq, host);
+ zorro_release_device(z);
+diff --git a/drivers/sh/intc/chip.c b/drivers/sh/intc/chip.c
+index 358df75101860..828d81e02b37a 100644
+--- a/drivers/sh/intc/chip.c
++++ b/drivers/sh/intc/chip.c
+@@ -72,7 +72,7 @@ static int intc_set_affinity(struct irq_data *data,
+ if (!cpumask_intersects(cpumask, cpu_online_mask))
+ return -1;
+
+- cpumask_copy(irq_data_get_affinity_mask(data), cpumask);
++ irq_data_update_affinity(data, cpumask);
+
+ return IRQ_SET_MASK_OK_NOCOPY;
+ }
+diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
+index bd0fbcdbdefe9..e24e220e56eea 100644
+--- a/drivers/sh/maple/maple.c
++++ b/drivers/sh/maple/maple.c
+@@ -834,8 +834,10 @@ static int __init maple_bus_init(void)
+
+ maple_queue_cache = KMEM_CACHE(maple_buffer, SLAB_HWCACHE_ALIGN);
+
+- if (!maple_queue_cache)
++ if (!maple_queue_cache) {
++ retval = -ENOMEM;
+ goto cleanup_bothirqs;
++ }
+
+ INIT_LIST_HEAD(&maple_waitq);
+ INIT_LIST_HEAD(&maple_sentq);
+@@ -848,6 +850,7 @@ static int __init maple_bus_init(void)
+ if (!mdev[i]) {
+ while (i-- > 0)
+ maple_free_dev(mdev[i]);
++ retval = -ENOMEM;
+ goto cleanup_cache;
+ }
+ baseunits[i] = mdev[i];
+diff --git a/drivers/siox/siox-core.c b/drivers/siox/siox-core.c
+index 7c4f32d769666..561408583b2bf 100644
+--- a/drivers/siox/siox-core.c
++++ b/drivers/siox/siox-core.c
+@@ -839,6 +839,8 @@ static struct siox_device *siox_device_add(struct siox_master *smaster,
+
+ err_device_register:
+ /* don't care to make the buffer smaller again */
++ put_device(&sdevice->dev);
++ sdevice = NULL;
+
+ err_buf_alloc:
+ siox_master_unlock(smaster);
+diff --git a/drivers/slimbus/Kconfig b/drivers/slimbus/Kconfig
+index 1235b7dc8496c..a0fdf9d792cb4 100644
+--- a/drivers/slimbus/Kconfig
++++ b/drivers/slimbus/Kconfig
+@@ -22,7 +22,8 @@ config SLIM_QCOM_CTRL
+
+ config SLIM_QCOM_NGD_CTRL
+ tristate "Qualcomm SLIMbus Satellite Non-Generic Device Component"
+- depends on HAS_IOMEM && DMA_ENGINE && NET && QCOM_RPROC_COMMON
++ depends on HAS_IOMEM && DMA_ENGINE && NET
++ depends on QCOM_RPROC_COMMON || (COMPILE_TEST && !QCOM_RPROC_COMMON)
+ depends on ARCH_QCOM || COMPILE_TEST
+ select QCOM_QMI_HELPERS
+ select QCOM_PDR_HELPERS
+diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c
+index f04b961b96cd4..ec58091fc948a 100644
+--- a/drivers/slimbus/qcom-ctrl.c
++++ b/drivers/slimbus/qcom-ctrl.c
+@@ -510,9 +510,9 @@ static int qcom_slim_probe(struct platform_device *pdev)
+ }
+
+ ctrl->irq = platform_get_irq(pdev, 0);
+- if (!ctrl->irq) {
++ if (ctrl->irq < 0) {
+ dev_err(&pdev->dev, "no slimbus IRQ\n");
+- return -ENODEV;
++ return ctrl->irq;
+ }
+
+ sctrl = &ctrl->ctrl;
+diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
+index 7040293c2ee8f..21519ce05bdb8 100644
+--- a/drivers/slimbus/qcom-ngd-ctrl.c
++++ b/drivers/slimbus/qcom-ngd-ctrl.c
+@@ -1567,17 +1567,27 @@ static int qcom_slim_ngd_ctrl_probe(struct platform_device *pdev)
+ ctrl->pdr = pdr_handle_alloc(slim_pd_status, ctrl);
+ if (IS_ERR(ctrl->pdr)) {
+ dev_err(dev, "Failed to init PDR handle\n");
+- return PTR_ERR(ctrl->pdr);
++ ret = PTR_ERR(ctrl->pdr);
++ goto err_pdr_alloc;
+ }
+
+ pds = pdr_add_lookup(ctrl->pdr, "avs/audio", "msm/adsp/audio_pd");
+ if (IS_ERR(pds) && PTR_ERR(pds) != -EALREADY) {
++ ret = PTR_ERR(pds);
+ dev_err(dev, "pdr add lookup failed: %d\n", ret);
+- return PTR_ERR(pds);
++ goto err_pdr_lookup;
+ }
+
+ platform_driver_register(&qcom_slim_ngd_driver);
+ return of_qcom_slim_ngd_register(dev, ctrl);
++
++err_pdr_alloc:
++ qcom_unregister_ssr_notifier(ctrl->notifier, &ctrl->nb);
++
++err_pdr_lookup:
++ pdr_handle_release(ctrl->pdr);
++
++ return ret;
+ }
+
+ static int qcom_slim_ngd_ctrl_remove(struct platform_device *pdev)
+diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c
+index 75f87b3d8b953..73a2aa3629572 100644
+--- a/drivers/slimbus/stream.c
++++ b/drivers/slimbus/stream.c
+@@ -67,10 +67,10 @@ static const int slim_presence_rate_table[] = {
+ 384000,
+ 768000,
+ 0, /* Reserved */
+- 110250,
+- 220500,
+- 441000,
+- 882000,
++ 11025,
++ 22050,
++ 44100,
++ 88200,
+ 176400,
+ 352800,
+ 705600,
+diff --git a/drivers/soc/amlogic/meson-mx-socinfo.c b/drivers/soc/amlogic/meson-mx-socinfo.c
+index 78f0f1aeca578..92125dd65f338 100644
+--- a/drivers/soc/amlogic/meson-mx-socinfo.c
++++ b/drivers/soc/amlogic/meson-mx-socinfo.c
+@@ -126,6 +126,7 @@ static int __init meson_mx_socinfo_init(void)
+ np = of_find_matching_node(NULL, meson_mx_socinfo_analog_top_ids);
+ if (np) {
+ analog_top_regmap = syscon_node_to_regmap(np);
++ of_node_put(np);
+ if (IS_ERR(analog_top_regmap))
+ return PTR_ERR(analog_top_regmap);
+
+diff --git a/drivers/soc/amlogic/meson-secure-pwrc.c b/drivers/soc/amlogic/meson-secure-pwrc.c
+index 59bd195fa9c92..2f3ca5531fa96 100644
+--- a/drivers/soc/amlogic/meson-secure-pwrc.c
++++ b/drivers/soc/amlogic/meson-secure-pwrc.c
+@@ -104,7 +104,7 @@ static struct meson_secure_pwrc_domain_desc a1_pwrc_domains[] = {
+ SEC_PD(ACODEC, 0),
+ SEC_PD(AUDIO, 0),
+ SEC_PD(OTP, 0),
+- SEC_PD(DMA, 0),
++ SEC_PD(DMA, GENPD_FLAG_ALWAYS_ON | GENPD_FLAG_IRQ_SAFE),
+ SEC_PD(SD_EMMC, 0),
+ SEC_PD(RAMA, 0),
+ /* SRAMB is used as ATF runtime memory, and should be always on */
+@@ -139,8 +139,10 @@ static int meson_secure_pwrc_probe(struct platform_device *pdev)
+ }
+
+ pwrc = devm_kzalloc(&pdev->dev, sizeof(*pwrc), GFP_KERNEL);
+- if (!pwrc)
++ if (!pwrc) {
++ of_node_put(sm_np);
+ return -ENOMEM;
++ }
+
+ pwrc->fw = meson_sm_get(sm_np);
+ of_node_put(sm_np);
+diff --git a/drivers/soc/aspeed/aspeed-lpc-ctrl.c b/drivers/soc/aspeed/aspeed-lpc-ctrl.c
+index 72771e018c42e..258894ed234b3 100644
+--- a/drivers/soc/aspeed/aspeed-lpc-ctrl.c
++++ b/drivers/soc/aspeed/aspeed-lpc-ctrl.c
+@@ -306,10 +306,9 @@ static int aspeed_lpc_ctrl_probe(struct platform_device *pdev)
+ }
+
+ lpc_ctrl->clk = devm_clk_get(dev, NULL);
+- if (IS_ERR(lpc_ctrl->clk)) {
+- dev_err(dev, "couldn't get clock\n");
+- return PTR_ERR(lpc_ctrl->clk);
+- }
++ if (IS_ERR(lpc_ctrl->clk))
++ return dev_err_probe(dev, PTR_ERR(lpc_ctrl->clk),
++ "couldn't get clock\n");
+ rc = clk_prepare_enable(lpc_ctrl->clk);
+ if (rc) {
+ dev_err(dev, "couldn't enable clock\n");
+diff --git a/drivers/soc/aspeed/aspeed-socinfo.c b/drivers/soc/aspeed/aspeed-socinfo.c
+index 1ca140356a084..3f759121dc00a 100644
+--- a/drivers/soc/aspeed/aspeed-socinfo.c
++++ b/drivers/soc/aspeed/aspeed-socinfo.c
+@@ -137,6 +137,7 @@ static int __init aspeed_socinfo_init(void)
+
+ soc_dev = soc_device_register(attrs);
+ if (IS_ERR(soc_dev)) {
++ kfree(attrs->machine);
+ kfree(attrs->soc_id);
+ kfree(attrs->serial_number);
+ kfree(attrs);
+diff --git a/drivers/soc/atmel/soc.c b/drivers/soc/atmel/soc.c
+index a490ad7e090f2..9e3d370114474 100644
+--- a/drivers/soc/atmel/soc.c
++++ b/drivers/soc/atmel/soc.c
+@@ -91,14 +91,14 @@ static const struct at91_soc socs[] __initconst = {
+ AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
+ AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
+ "sam9x60", "sam9x60"),
+- AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D5M_EXID_MATCH,
+- AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
++ AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
++ AT91_CIDR_VERSION_MASK, SAM9X60_D5M_EXID_MATCH,
+ "sam9x60 64MiB DDR2 SiP", "sam9x60"),
+- AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D1G_EXID_MATCH,
+- AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
++ AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
++ AT91_CIDR_VERSION_MASK, SAM9X60_D1G_EXID_MATCH,
+ "sam9x60 128MiB DDR2 SiP", "sam9x60"),
+- AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D6K_EXID_MATCH,
+- AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
++ AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
++ AT91_CIDR_VERSION_MASK, SAM9X60_D6K_EXID_MATCH,
+ "sam9x60 8MiB SDRAM SiP", "sam9x60"),
+ #endif
+ #ifdef CONFIG_SOC_SAMA5
+diff --git a/drivers/soc/bcm/bcm63xx/bcm-pmb.c b/drivers/soc/bcm/bcm63xx/bcm-pmb.c
+index 774465c119be2..2ac20084e5a5b 100644
+--- a/drivers/soc/bcm/bcm63xx/bcm-pmb.c
++++ b/drivers/soc/bcm/bcm63xx/bcm-pmb.c
+@@ -314,6 +314,9 @@ static int bcm_pmb_probe(struct platform_device *pdev)
+ for (e = table; e->name; e++) {
+ struct bcm_pmb_pm_domain *pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL);
+
++ if (!pd)
++ return -ENOMEM;
++
+ pd->pmb = pmb;
+ pd->data = e;
+ pd->genpd.name = e->name;
+diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c
+index 3cbb165d6e309..286f5d57c0cab 100644
+--- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c
++++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c
+@@ -684,13 +684,14 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ const struct of_device_id *of_id = NULL;
+ struct device_node *dn;
+ void __iomem *base;
+- int ret, i;
++ int ret, i, s;
+
+ /* AON ctrl registers */
+ base = brcmstb_ioremap_match(aon_ctrl_dt_ids, 0, NULL);
+ if (IS_ERR(base)) {
+ pr_err("error mapping AON_CTRL\n");
+- return PTR_ERR(base);
++ ret = PTR_ERR(base);
++ goto aon_err;
+ }
+ ctrl.aon_ctrl_base = base;
+
+@@ -700,8 +701,10 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ /* Assume standard offset */
+ ctrl.aon_sram = ctrl.aon_ctrl_base +
+ AON_CTRL_SYSTEM_DATA_RAM_OFS;
++ s = 0;
+ } else {
+ ctrl.aon_sram = base;
++ s = 1;
+ }
+
+ writel_relaxed(0, ctrl.aon_sram + AON_REG_PANIC);
+@@ -711,7 +714,8 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ (const void **)&ddr_phy_data);
+ if (IS_ERR(base)) {
+ pr_err("error mapping DDR PHY\n");
+- return PTR_ERR(base);
++ ret = PTR_ERR(base);
++ goto ddr_phy_err;
+ }
+ ctrl.support_warm_boot = ddr_phy_data->supports_warm_boot;
+ ctrl.pll_status_offset = ddr_phy_data->pll_status_offset;
+@@ -731,17 +735,20 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ for_each_matching_node(dn, ddr_shimphy_dt_ids) {
+ i = ctrl.num_memc;
+ if (i >= MAX_NUM_MEMC) {
++ of_node_put(dn);
+ pr_warn("too many MEMCs (max %d)\n", MAX_NUM_MEMC);
+ break;
+ }
+
+ base = of_io_request_and_map(dn, 0, dn->full_name);
+ if (IS_ERR(base)) {
++ of_node_put(dn);
+ if (!ctrl.support_warm_boot)
+ break;
+
+ pr_err("error mapping DDR SHIMPHY %d\n", i);
+- return PTR_ERR(base);
++ ret = PTR_ERR(base);
++ goto ddr_shimphy_err;
+ }
+ ctrl.memcs[i].ddr_shimphy_base = base;
+ ctrl.num_memc++;
+@@ -752,14 +759,18 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ for_each_matching_node(dn, brcmstb_memc_of_match) {
+ base = of_iomap(dn, 0);
+ if (!base) {
++ of_node_put(dn);
+ pr_err("error mapping DDR Sequencer %d\n", i);
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto brcmstb_memc_err;
+ }
+
+ of_id = of_match_node(brcmstb_memc_of_match, dn);
+ if (!of_id) {
+ iounmap(base);
+- return -EINVAL;
++ of_node_put(dn);
++ ret = -EINVAL;
++ goto brcmstb_memc_err;
+ }
+
+ ddr_seq_data = of_id->data;
+@@ -779,20 +790,24 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ dn = of_find_matching_node(NULL, sram_dt_ids);
+ if (!dn) {
+ pr_err("SRAM not found\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto brcmstb_memc_err;
+ }
+
+ ret = brcmstb_init_sram(dn);
++ of_node_put(dn);
+ if (ret) {
+ pr_err("error setting up SRAM for PM\n");
+- return ret;
++ goto brcmstb_memc_err;
+ }
+
+ ctrl.pdev = pdev;
+
+ ctrl.s3_params = kmalloc(sizeof(*ctrl.s3_params), GFP_KERNEL);
+- if (!ctrl.s3_params)
+- return -ENOMEM;
++ if (!ctrl.s3_params) {
++ ret = -ENOMEM;
++ goto s3_params_err;
++ }
+ ctrl.s3_params_pa = dma_map_single(&pdev->dev, ctrl.s3_params,
+ sizeof(*ctrl.s3_params),
+ DMA_TO_DEVICE);
+@@ -812,7 +827,21 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+
+ out:
+ kfree(ctrl.s3_params);
+-
++s3_params_err:
++ iounmap(ctrl.boot_sram);
++brcmstb_memc_err:
++ for (i--; i >= 0; i--)
++ iounmap(ctrl.memcs[i].ddr_ctrl);
++ddr_shimphy_err:
++ for (i = 0; i < ctrl.num_memc; i++)
++ iounmap(ctrl.memcs[i].ddr_shimphy_base);
++
++ iounmap(ctrl.memcs[0].ddr_phy_base);
++ddr_phy_err:
++ iounmap(ctrl.aon_ctrl_base);
++ if (s)
++ iounmap(ctrl.aon_sram);
++aon_err:
+ pr_warn("PM: initialization failed with code %d\n", ret);
+
+ return ret;
+diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig
+index 4df32bc4c7a6e..c5d46152d4680 100644
+--- a/drivers/soc/fsl/Kconfig
++++ b/drivers/soc/fsl/Kconfig
+@@ -24,6 +24,7 @@ config FSL_MC_DPIO
+ tristate "QorIQ DPAA2 DPIO driver"
+ depends on FSL_MC_BUS
+ select SOC_BUS
++ select FSL_GUTS
+ help
+ Driver for the DPAA2 DPIO object. A DPIO provides queue and
+ buffer management facilities for software to interact with
+diff --git a/drivers/soc/fsl/dpaa2-console.c b/drivers/soc/fsl/dpaa2-console.c
+index 27243f706f376..53917410f2bdb 100644
+--- a/drivers/soc/fsl/dpaa2-console.c
++++ b/drivers/soc/fsl/dpaa2-console.c
+@@ -231,6 +231,7 @@ static ssize_t dpaa2_console_read(struct file *fp, char __user *buf,
+ cd->cur_ptr += bytes;
+ written += bytes;
+
++ kfree(kbuf);
+ return written;
+
+ err_free_buf:
+diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c
+index 7351f30305506..779c319a4b820 100644
+--- a/drivers/soc/fsl/dpio/dpio-service.c
++++ b/drivers/soc/fsl/dpio/dpio-service.c
+@@ -59,7 +59,7 @@ static inline struct dpaa2_io *service_select_by_cpu(struct dpaa2_io *d,
+ * potentially being migrated away.
+ */
+ if (cpu < 0)
+- cpu = smp_processor_id();
++ cpu = raw_smp_processor_id();
+
+ /* If a specific cpu was requested, pick it up immediately */
+ return dpio_by_cpu[cpu];
+diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c
+index f13da4d7d1c52..3ec8ab08b9889 100644
+--- a/drivers/soc/fsl/dpio/qbman-portal.c
++++ b/drivers/soc/fsl/dpio/qbman-portal.c
+@@ -732,8 +732,7 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+ int i, num_enqueued = 0;
+ unsigned long irq_flags;
+
+- spin_lock(&s->access_spinlock);
+- local_irq_save(irq_flags);
++ spin_lock_irqsave(&s->access_spinlock, irq_flags);
+
+ half_mask = (s->eqcr.pi_ci_mask>>1);
+ full_mask = s->eqcr.pi_ci_mask;
+@@ -744,8 +743,7 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+ eqcr_ci, s->eqcr.ci);
+ if (!s->eqcr.available) {
+- local_irq_restore(irq_flags);
+- spin_unlock(&s->access_spinlock);
++ spin_unlock_irqrestore(&s->access_spinlock, irq_flags);
+ return 0;
+ }
+ }
+@@ -784,8 +782,7 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+ dma_wmb();
+ qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI,
+ (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
+- local_irq_restore(irq_flags);
+- spin_unlock(&s->access_spinlock);
++ spin_unlock_irqrestore(&s->access_spinlock, irq_flags);
+
+ return num_enqueued;
+ }
+diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c
+index d5e9a5f2c0874..0b2c7fdbaa5b2 100644
+--- a/drivers/soc/fsl/guts.c
++++ b/drivers/soc/fsl/guts.c
+@@ -28,7 +28,6 @@ struct fsl_soc_die_attr {
+ static struct guts *guts;
+ static struct soc_device_attribute soc_dev_attr;
+ static struct soc_device *soc_dev;
+-static struct device_node *root;
+
+
+ /* SoC die attribute definition for QorIQ platform */
+@@ -138,11 +137,11 @@ static u32 fsl_guts_get_svr(void)
+
+ static int fsl_guts_probe(struct platform_device *pdev)
+ {
+- struct device_node *np = pdev->dev.of_node;
++ struct device_node *root, *np = pdev->dev.of_node;
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ const struct fsl_soc_die_attr *soc_die;
+- const char *machine;
++ const char *machine = NULL;
+ u32 svr;
+
+ /* Initialize guts */
+@@ -161,8 +160,14 @@ static int fsl_guts_probe(struct platform_device *pdev)
+ root = of_find_node_by_path("/");
+ if (of_property_read_string(root, "model", &machine))
+ of_property_read_string_index(root, "compatible", 0, &machine);
+- if (machine)
+- soc_dev_attr.machine = machine;
++ if (machine) {
++ soc_dev_attr.machine = devm_kstrdup(dev, machine, GFP_KERNEL);
++ if (!soc_dev_attr.machine) {
++ of_node_put(root);
++ return -ENOMEM;
++ }
++ }
++ of_node_put(root);
+
+ svr = fsl_guts_get_svr();
+ soc_die = fsl_soc_die_match(svr, fsl_soc_die);
+@@ -197,7 +202,6 @@ static int fsl_guts_probe(struct platform_device *pdev)
+ static int fsl_guts_remove(struct platform_device *dev)
+ {
+ soc_device_unregister(soc_dev);
+- of_node_put(root);
+ return 0;
+ }
+
+diff --git a/drivers/soc/fsl/qe/Kconfig b/drivers/soc/fsl/qe/Kconfig
+index 357c5800b112f..7afa796dbbb89 100644
+--- a/drivers/soc/fsl/qe/Kconfig
++++ b/drivers/soc/fsl/qe/Kconfig
+@@ -39,6 +39,7 @@ config QE_TDM
+
+ config QE_USB
+ bool
++ depends on QUICC_ENGINE
+ default y if USB_FSL_QE
+ help
+ QE USB Controller support
+diff --git a/drivers/soc/fsl/qe/qe_io.c b/drivers/soc/fsl/qe/qe_io.c
+index e277c827bdf33..a5e2d0e5ab511 100644
+--- a/drivers/soc/fsl/qe/qe_io.c
++++ b/drivers/soc/fsl/qe/qe_io.c
+@@ -35,6 +35,8 @@ int par_io_init(struct device_node *np)
+ if (ret)
+ return ret;
+ par_io = ioremap(res.start, resource_size(&res));
++ if (!par_io)
++ return -ENOMEM;
+
+ if (!of_property_read_u32(np, "num-ports", &num_ports))
+ num_par_io_ports = num_ports;
+diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c
+index 34a9ac1f2b9b1..4dc3a3f73511e 100644
+--- a/drivers/soc/imx/gpcv2.c
++++ b/drivers/soc/imx/gpcv2.c
+@@ -237,6 +237,8 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
+ }
+ }
+
++ reset_control_assert(domain->reset);
++
+ /* Enable reset clocks for all devices in the domain */
+ ret = clk_bulk_prepare_enable(domain->num_clks, domain->clks);
+ if (ret) {
+@@ -244,6 +246,9 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
+ goto out_regulator_disable;
+ }
+
++ /* delays for reset to propagate */
++ udelay(5);
++
+ if (domain->bits.pxx) {
+ /* request the domain to power up */
+ regmap_update_bits(domain->regmap, GPC_PU_PGC_SW_PUP_REQ,
+@@ -266,8 +271,6 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
+ GPC_PGC_CTRL_PCR);
+ }
+
+- reset_control_assert(domain->reset);
+-
+ /* delay for reset to propagate */
+ udelay(5);
+
+@@ -369,7 +372,7 @@ static int imx_pgc_power_down(struct generic_pm_domain *genpd)
+ }
+ }
+
+- pm_runtime_put(domain->dev);
++ pm_runtime_put_sync_suspend(domain->dev);
+
+ return 0;
+
+diff --git a/drivers/soc/imx/soc-imx.c b/drivers/soc/imx/soc-imx.c
+index ac6d856ba228d..77bc12039c3d4 100644
+--- a/drivers/soc/imx/soc-imx.c
++++ b/drivers/soc/imx/soc-imx.c
+@@ -36,6 +36,10 @@ static int __init imx_soc_device_init(void)
+ int ret;
+ int i;
+
++ /* Return early if this is running on devices with different SoCs */
++ if (!__mxc_cpu_type)
++ return 0;
++
+ if (of_machine_is_compatible("fsl,ls1021a"))
+ return 0;
+
+diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c
+index cc57a384d74d2..32ed9dc88e455 100644
+--- a/drivers/soc/imx/soc-imx8m.c
++++ b/drivers/soc/imx/soc-imx8m.c
+@@ -11,6 +11,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/arm-smccc.h>
+ #include <linux/of.h>
++#include <linux/clk.h>
+
+ #define REV_B1 0x21
+
+@@ -56,6 +57,7 @@ static u32 __init imx8mq_soc_revision(void)
+ void __iomem *ocotp_base;
+ u32 magic;
+ u32 rev;
++ struct clk *clk;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,imx8mq-ocotp");
+ if (!np)
+@@ -63,6 +65,13 @@ static u32 __init imx8mq_soc_revision(void)
+
+ ocotp_base = of_iomap(np, 0);
+ WARN_ON(!ocotp_base);
++ clk = of_clk_get_by_name(np, NULL);
++ if (IS_ERR(clk)) {
++ WARN_ON(IS_ERR(clk));
++ return 0;
++ }
++
++ clk_prepare_enable(clk);
+
+ /*
+ * SOC revision on older imx8mq is not available in fuses so query
+@@ -79,6 +88,8 @@ static u32 __init imx8mq_soc_revision(void)
+ soc_uid <<= 32;
+ soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW);
+
++ clk_disable_unprepare(clk);
++ clk_put(clk);
+ iounmap(ocotp_base);
+ of_node_put(np);
+
+diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c
+index f490c4ca51f51..a0159805d061b 100644
+--- a/drivers/soc/ixp4xx/ixp4xx-npe.c
++++ b/drivers/soc/ixp4xx/ixp4xx-npe.c
+@@ -743,7 +743,7 @@ static const struct of_device_id ixp4xx_npe_of_match[] = {
+ static struct platform_driver ixp4xx_npe_driver = {
+ .driver = {
+ .name = "ixp4xx-npe",
+- .of_match_table = of_match_ptr(ixp4xx_npe_of_match),
++ .of_match_table = ixp4xx_npe_of_match,
+ },
+ .probe = ixp4xx_npe_probe,
+ .remove = ixp4xx_npe_remove,
+diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c
+index b762bc40f56bd..52ecde8e446cf 100644
+--- a/drivers/soc/mediatek/mtk-pm-domains.c
++++ b/drivers/soc/mediatek/mtk-pm-domains.c
+@@ -272,9 +272,9 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
+ clk_bulk_disable_unprepare(pd->num_subsys_clks, pd->subsys_clks);
+
+ /* subsys power off */
+- regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT);
+ regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_ISO_BIT);
+ regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_CLK_DIS_BIT);
++ regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT);
+ regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_2ND_BIT);
+ regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_BIT);
+
+@@ -443,6 +443,9 @@ generic_pm_domain *scpsys_add_one_domain(struct scpsys *scpsys, struct device_no
+ pd->genpd.power_off = scpsys_power_off;
+ pd->genpd.power_on = scpsys_power_on;
+
++ if (MTK_SCPD_CAPS(pd, MTK_SCPD_ACTIVE_WAKEUP))
++ pd->genpd.flags |= GENPD_FLAG_ACTIVE_WAKEUP;
++
+ if (MTK_SCPD_CAPS(pd, MTK_SCPD_KEEP_DEFAULT_OFF))
+ pm_genpd_init(&pd->genpd, NULL, true);
+ else
+diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
+index 79b568f82a1c3..6a97e8af93908 100644
+--- a/drivers/soc/qcom/Kconfig
++++ b/drivers/soc/qcom/Kconfig
+@@ -63,6 +63,7 @@ config QCOM_GSBI
+ config QCOM_LLCC
+ tristate "Qualcomm Technologies, Inc. LLCC driver"
+ depends on ARCH_QCOM || COMPILE_TEST
++ select REGMAP_MMIO
+ help
+ Qualcomm Technologies, Inc. platform specific
+ Last Level Cache Controller(LLCC) driver for platforms such as,
+@@ -129,6 +130,7 @@ config QCOM_RPMHPD
+
+ config QCOM_RPMPD
+ tristate "Qualcomm RPM Power domain driver"
++ depends on PM
+ depends on QCOM_SMD_RPM
+ help
+ QCOM RPM Power domain driver to support power-domains with
+diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
+index 475a57b435b24..8fd823b40f4be 100644
+--- a/drivers/soc/qcom/apr.c
++++ b/drivers/soc/qcom/apr.c
+@@ -15,13 +15,18 @@
+ #include <linux/rpmsg.h>
+ #include <linux/of.h>
+
+-struct apr {
++enum {
++ PR_TYPE_APR = 0,
++};
++
++struct packet_router {
+ struct rpmsg_endpoint *ch;
+ struct device *dev;
+ spinlock_t svcs_lock;
+ spinlock_t rx_lock;
+ struct idr svcs_idr;
+ int dest_domain_id;
++ int type;
+ struct pdr_handle *pdr;
+ struct workqueue_struct *rxwq;
+ struct work_struct rx_work;
+@@ -44,21 +49,21 @@ struct apr_rx_buf {
+ */
+ int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt)
+ {
+- struct apr *apr = dev_get_drvdata(adev->dev.parent);
++ struct packet_router *apr = dev_get_drvdata(adev->dev.parent);
+ struct apr_hdr *hdr;
+ unsigned long flags;
+ int ret;
+
+- spin_lock_irqsave(&adev->lock, flags);
++ spin_lock_irqsave(&adev->svc.lock, flags);
+
+ hdr = &pkt->hdr;
+ hdr->src_domain = APR_DOMAIN_APPS;
+- hdr->src_svc = adev->svc_id;
++ hdr->src_svc = adev->svc.id;
+ hdr->dest_domain = adev->domain_id;
+- hdr->dest_svc = adev->svc_id;
++ hdr->dest_svc = adev->svc.id;
+
+ ret = rpmsg_trysend(apr->ch, pkt, hdr->pkt_size);
+- spin_unlock_irqrestore(&adev->lock, flags);
++ spin_unlock_irqrestore(&adev->svc.lock, flags);
+
+ return ret ? ret : hdr->pkt_size;
+ }
+@@ -74,7 +79,7 @@ static void apr_dev_release(struct device *dev)
+ static int apr_callback(struct rpmsg_device *rpdev, void *buf,
+ int len, void *priv, u32 addr)
+ {
+- struct apr *apr = dev_get_drvdata(&rpdev->dev);
++ struct packet_router *apr = dev_get_drvdata(&rpdev->dev);
+ struct apr_rx_buf *abuf;
+ unsigned long flags;
+
+@@ -100,11 +105,11 @@ static int apr_callback(struct rpmsg_device *rpdev, void *buf,
+ return 0;
+ }
+
+-
+-static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf)
++static int apr_do_rx_callback(struct packet_router *apr, struct apr_rx_buf *abuf)
+ {
+ uint16_t hdr_size, msg_type, ver, svc_id;
+- struct apr_device *svc = NULL;
++ struct pkt_router_svc *svc;
++ struct apr_device *adev;
+ struct apr_driver *adrv = NULL;
+ struct apr_resp_pkt resp;
+ struct apr_hdr *hdr;
+@@ -145,12 +150,15 @@ static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf)
+ svc_id = hdr->dest_svc;
+ spin_lock_irqsave(&apr->svcs_lock, flags);
+ svc = idr_find(&apr->svcs_idr, svc_id);
+- if (svc && svc->dev.driver)
+- adrv = to_apr_driver(svc->dev.driver);
++ if (svc && svc->dev->driver) {
++ adev = svc_to_apr_device(svc);
++ adrv = to_apr_driver(adev->dev.driver);
++ }
+ spin_unlock_irqrestore(&apr->svcs_lock, flags);
+
+- if (!adrv) {
+- dev_err(apr->dev, "APR: service is not registered\n");
++ if (!adrv || !adev) {
++ dev_err(apr->dev, "APR: service is not registered (%d)\n",
++ svc_id);
+ return -EINVAL;
+ }
+
+@@ -164,20 +172,26 @@ static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf)
+ if (resp.payload_size > 0)
+ resp.payload = buf + hdr_size;
+
+- adrv->callback(svc, &resp);
++ adrv->callback(adev, &resp);
+
+ return 0;
+ }
+
+ static void apr_rxwq(struct work_struct *work)
+ {
+- struct apr *apr = container_of(work, struct apr, rx_work);
++ struct packet_router *apr = container_of(work, struct packet_router, rx_work);
+ struct apr_rx_buf *abuf, *b;
+ unsigned long flags;
+
+ if (!list_empty(&apr->rx_list)) {
+ list_for_each_entry_safe(abuf, b, &apr->rx_list, node) {
+- apr_do_rx_callback(apr, abuf);
++ switch (apr->type) {
++ case PR_TYPE_APR:
++ apr_do_rx_callback(apr, abuf);
++ break;
++ default:
++ break;
++ }
+ spin_lock_irqsave(&apr->rx_lock, flags);
+ list_del(&abuf->node);
+ spin_unlock_irqrestore(&apr->rx_lock, flags);
+@@ -201,7 +215,7 @@ static int apr_device_match(struct device *dev, struct device_driver *drv)
+
+ while (id->domain_id != 0 || id->svc_id != 0) {
+ if (id->domain_id == adev->domain_id &&
+- id->svc_id == adev->svc_id)
++ id->svc_id == adev->svc.id)
+ return 1;
+ id++;
+ }
+@@ -221,14 +235,14 @@ static void apr_device_remove(struct device *dev)
+ {
+ struct apr_device *adev = to_apr_device(dev);
+ struct apr_driver *adrv;
+- struct apr *apr = dev_get_drvdata(adev->dev.parent);
++ struct packet_router *apr = dev_get_drvdata(adev->dev.parent);
+
+ if (dev->driver) {
+ adrv = to_apr_driver(dev->driver);
+ if (adrv->remove)
+ adrv->remove(adev);
+ spin_lock(&apr->svcs_lock);
+- idr_remove(&apr->svcs_idr, adev->svc_id);
++ idr_remove(&apr->svcs_idr, adev->svc.id);
+ spin_unlock(&apr->svcs_lock);
+ }
+ }
+@@ -255,28 +269,39 @@ struct bus_type aprbus = {
+ EXPORT_SYMBOL_GPL(aprbus);
+
+ static int apr_add_device(struct device *dev, struct device_node *np,
+- const struct apr_device_id *id)
++ u32 svc_id, u32 domain_id)
+ {
+- struct apr *apr = dev_get_drvdata(dev);
++ struct packet_router *apr = dev_get_drvdata(dev);
+ struct apr_device *adev = NULL;
++ struct pkt_router_svc *svc;
+ int ret;
+
+ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+- spin_lock_init(&adev->lock);
++ adev->svc_id = svc_id;
++ svc = &adev->svc;
++
++ svc->id = svc_id;
++ svc->pr = apr;
++ svc->priv = adev;
++ svc->dev = dev;
++ spin_lock_init(&svc->lock);
++
++ adev->domain_id = domain_id;
+
+- adev->svc_id = id->svc_id;
+- adev->domain_id = id->domain_id;
+- adev->version = id->svc_version;
+ if (np)
+ snprintf(adev->name, APR_NAME_SIZE, "%pOFn", np);
+- else
+- strscpy(adev->name, id->name, APR_NAME_SIZE);
+
+- dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name,
+- id->domain_id, id->svc_id);
++ switch (apr->type) {
++ case PR_TYPE_APR:
++ dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name,
++ domain_id, svc_id);
++ break;
++ default:
++ break;
++ }
+
+ adev->dev.bus = &aprbus;
+ adev->dev.parent = dev;
+@@ -285,12 +310,20 @@ static int apr_add_device(struct device *dev, struct device_node *np,
+ adev->dev.driver = NULL;
+
+ spin_lock(&apr->svcs_lock);
+- idr_alloc(&apr->svcs_idr, adev, id->svc_id,
+- id->svc_id + 1, GFP_ATOMIC);
++ ret = idr_alloc(&apr->svcs_idr, svc, svc_id, svc_id + 1, GFP_ATOMIC);
+ spin_unlock(&apr->svcs_lock);
++ if (ret < 0) {
++ dev_err(dev, "idr_alloc failed: %d\n", ret);
++ goto out;
++ }
+
+- of_property_read_string_index(np, "qcom,protection-domain",
+- 1, &adev->service_path);
++ /* Protection domain is optional, it does not exist on older platforms */
++ ret = of_property_read_string_index(np, "qcom,protection-domain",
++ 1, &adev->service_path);
++ if (ret < 0 && ret != -EINVAL) {
++ dev_err(dev, "Failed to read second value of qcom,protection-domain\n");
++ goto out;
++ }
+
+ dev_info(dev, "Adding APR dev: %s\n", dev_name(&adev->dev));
+
+@@ -300,13 +333,14 @@ static int apr_add_device(struct device *dev, struct device_node *np,
+ put_device(&adev->dev);
+ }
+
++out:
+ return ret;
+ }
+
+ static int of_apr_add_pd_lookups(struct device *dev)
+ {
+ const char *service_name, *service_path;
+- struct apr *apr = dev_get_drvdata(dev);
++ struct packet_router *apr = dev_get_drvdata(dev);
+ struct device_node *node;
+ struct pdr_service *pds;
+ int ret;
+@@ -321,12 +355,14 @@ static int of_apr_add_pd_lookups(struct device *dev)
+ 1, &service_path);
+ if (ret < 0) {
+ dev_err(dev, "pdr service path missing: %d\n", ret);
++ of_node_put(node);
+ return ret;
+ }
+
+ pds = pdr_add_lookup(apr->pdr, service_name, service_path);
+ if (IS_ERR(pds) && PTR_ERR(pds) != -EALREADY) {
+ dev_err(dev, "pdr add lookup failed: %ld\n", PTR_ERR(pds));
++ of_node_put(node);
+ return PTR_ERR(pds);
+ }
+ }
+@@ -336,13 +372,14 @@ static int of_apr_add_pd_lookups(struct device *dev)
+
+ static void of_register_apr_devices(struct device *dev, const char *svc_path)
+ {
+- struct apr *apr = dev_get_drvdata(dev);
++ struct packet_router *apr = dev_get_drvdata(dev);
+ struct device_node *node;
+ const char *service_path;
+ int ret;
+
+ for_each_child_of_node(dev->of_node, node) {
+- struct apr_device_id id = { {0} };
++ u32 svc_id;
++ u32 domain_id;
+
+ /*
+ * This function is called with svc_path NULL during
+@@ -372,13 +409,13 @@ static void of_register_apr_devices(struct device *dev, const char *svc_path)
+ continue;
+ }
+
+- if (of_property_read_u32(node, "reg", &id.svc_id))
++ if (of_property_read_u32(node, "reg", &svc_id))
+ continue;
+
+- id.domain_id = apr->dest_domain_id;
++ domain_id = apr->dest_domain_id;
+
+- if (apr_add_device(dev, node, &id))
+- dev_err(dev, "Failed to add apr %d svc\n", id.svc_id);
++ if (apr_add_device(dev, node, svc_id, domain_id))
++ dev_err(dev, "Failed to add apr %d svc\n", svc_id);
+ }
+ }
+
+@@ -398,7 +435,7 @@ static int apr_remove_device(struct device *dev, void *svc_path)
+
+ static void apr_pd_status(int state, char *svc_path, void *priv)
+ {
+- struct apr *apr = (struct apr *)priv;
++ struct packet_router *apr = (struct packet_router *)priv;
+
+ switch (state) {
+ case SERVREG_SERVICE_STATE_UP:
+@@ -413,16 +450,20 @@ static void apr_pd_status(int state, char *svc_path, void *priv)
+ static int apr_probe(struct rpmsg_device *rpdev)
+ {
+ struct device *dev = &rpdev->dev;
+- struct apr *apr;
++ struct packet_router *apr;
+ int ret;
+
+ apr = devm_kzalloc(dev, sizeof(*apr), GFP_KERNEL);
+ if (!apr)
+ return -ENOMEM;
+
+- ret = of_property_read_u32(dev->of_node, "qcom,apr-domain", &apr->dest_domain_id);
++ ret = of_property_read_u32(dev->of_node, "qcom,domain", &apr->dest_domain_id);
++ if (ret) /* try deprecated apr-domain property */
++ ret = of_property_read_u32(dev->of_node, "qcom,apr-domain",
++ &apr->dest_domain_id);
++ apr->type = PR_TYPE_APR;
+ if (ret) {
+- dev_err(dev, "APR Domain ID not specified in DT\n");
++ dev_err(dev, "Domain ID not specified in DT\n");
+ return ret;
+ }
+
+@@ -465,7 +506,7 @@ destroy_wq:
+
+ static void apr_remove(struct rpmsg_device *rpdev)
+ {
+- struct apr *apr = dev_get_drvdata(&rpdev->dev);
++ struct packet_router *apr = dev_get_drvdata(&rpdev->dev);
+
+ pdr_handle_release(apr->pdr);
+ device_for_each_child(&rpdev->dev, NULL, apr_remove_device);
+@@ -502,20 +543,20 @@ void apr_driver_unregister(struct apr_driver *drv)
+ }
+ EXPORT_SYMBOL_GPL(apr_driver_unregister);
+
+-static const struct of_device_id apr_of_match[] = {
++static const struct of_device_id pkt_router_of_match[] = {
+ { .compatible = "qcom,apr"},
+ { .compatible = "qcom,apr-v2"},
+ {}
+ };
+-MODULE_DEVICE_TABLE(of, apr_of_match);
++MODULE_DEVICE_TABLE(of, pkt_router_of_match);
+
+-static struct rpmsg_driver apr_driver = {
++static struct rpmsg_driver packet_router_driver = {
+ .probe = apr_probe,
+ .remove = apr_remove,
+ .callback = apr_callback,
+ .drv = {
+ .name = "qcom,apr",
+- .of_match_table = apr_of_match,
++ .of_match_table = pkt_router_of_match,
+ },
+ };
+
+@@ -525,7 +566,7 @@ static int __init apr_init(void)
+
+ ret = bus_register(&aprbus);
+ if (!ret)
+- ret = register_rpmsg_driver(&apr_driver);
++ ret = register_rpmsg_driver(&packet_router_driver);
+ else
+ bus_unregister(&aprbus);
+
+@@ -535,7 +576,7 @@ static int __init apr_init(void)
+ static void __exit apr_exit(void)
+ {
+ bus_unregister(&aprbus);
+- unregister_rpmsg_driver(&apr_driver);
++ unregister_rpmsg_driver(&packet_router_driver);
+ }
+
+ subsys_initcall(apr_init);
+diff --git a/drivers/soc/qcom/cpr.c b/drivers/soc/qcom/cpr.c
+index 4ce8e816154f9..e61cff3d9c8a6 100644
+--- a/drivers/soc/qcom/cpr.c
++++ b/drivers/soc/qcom/cpr.c
+@@ -1010,7 +1010,7 @@ static int cpr_interpolate(const struct corner *corner, int step_volt,
+ return corner->uV;
+
+ temp = f_diff * (uV_high - uV_low);
+- do_div(temp, f_high - f_low);
++ temp = div64_ul(temp, f_high - f_low);
+
+ /*
+ * max_volt_scale has units of uV/MHz while freq values
+@@ -1710,12 +1710,16 @@ static int cpr_probe(struct platform_device *pdev)
+
+ ret = of_genpd_add_provider_simple(dev->of_node, &drv->pd);
+ if (ret)
+- return ret;
++ goto err_remove_genpd;
+
+ platform_set_drvdata(pdev, drv);
+ cpr_debugfs_init(drv);
+
+ return 0;
++
++err_remove_genpd:
++ pm_genpd_remove(&drv->pd);
++ return ret;
+ }
+
+ static int cpr_remove(struct platform_device *pdev)
+diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
+index 15a36dcab990e..47d41804fdf67 100644
+--- a/drivers/soc/qcom/llcc-qcom.c
++++ b/drivers/soc/qcom/llcc-qcom.c
+@@ -115,7 +115,7 @@ static const struct llcc_slice_config sc7280_data[] = {
+ { LLCC_CMPT, 10, 768, 1, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0},
+ { LLCC_GPUHTW, 11, 256, 1, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0},
+ { LLCC_GPU, 12, 512, 1, 0, 0x3f, 0x0, 0, 0, 0, 1, 0, 0},
+- { LLCC_MMUHWT, 13, 256, 1, 1, 0x3f, 0x0, 0, 0, 0, 1, 1, 0},
++ { LLCC_MMUHWT, 13, 256, 1, 1, 0x3f, 0x0, 0, 0, 0, 0, 1, 0},
+ { LLCC_MDMPNG, 21, 768, 0, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0},
+ { LLCC_WLHW, 24, 256, 1, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0},
+ { LLCC_MODPE, 29, 64, 1, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0},
+@@ -607,7 +607,7 @@ static int qcom_llcc_probe(struct platform_device *pdev)
+ if (ret)
+ goto err;
+
+- drv_data->ecc_irq = platform_get_irq(pdev, 0);
++ drv_data->ecc_irq = platform_get_irq_optional(pdev, 0);
+ if (drv_data->ecc_irq >= 0) {
+ llcc_edac = platform_device_register_data(&pdev->dev,
+ "qcom_llcc_edac", -1, drv_data,
+@@ -630,6 +630,7 @@ static const struct of_device_id qcom_llcc_of_match[] = {
+ { .compatible = "qcom,sm8250-llcc", .data = &sm8250_cfg },
+ { }
+ };
++MODULE_DEVICE_TABLE(of, qcom_llcc_of_match);
+
+ static struct platform_driver qcom_llcc_driver = {
+ .driver = {
+diff --git a/drivers/soc/qcom/ocmem.c b/drivers/soc/qcom/ocmem.c
+index f1875dc31ae2c..8b80c8e94c77a 100644
+--- a/drivers/soc/qcom/ocmem.c
++++ b/drivers/soc/qcom/ocmem.c
+@@ -76,8 +76,12 @@ struct ocmem {
+ #define OCMEM_REG_GFX_MPU_START 0x00001004
+ #define OCMEM_REG_GFX_MPU_END 0x00001008
+
+-#define OCMEM_HW_PROFILE_NUM_PORTS(val) FIELD_PREP(0x0000000f, (val))
+-#define OCMEM_HW_PROFILE_NUM_MACROS(val) FIELD_PREP(0x00003f00, (val))
++#define OCMEM_HW_VERSION_MAJOR(val) FIELD_GET(GENMASK(31, 28), val)
++#define OCMEM_HW_VERSION_MINOR(val) FIELD_GET(GENMASK(27, 16), val)
++#define OCMEM_HW_VERSION_STEP(val) FIELD_GET(GENMASK(15, 0), val)
++
++#define OCMEM_HW_PROFILE_NUM_PORTS(val) FIELD_GET(0x0000000f, (val))
++#define OCMEM_HW_PROFILE_NUM_MACROS(val) FIELD_GET(0x00003f00, (val))
+
+ #define OCMEM_HW_PROFILE_LAST_REGN_HALFSIZE 0x00010000
+ #define OCMEM_HW_PROFILE_INTERLEAVING 0x00020000
+@@ -194,18 +198,22 @@ struct ocmem *of_get_ocmem(struct device *dev)
+ devnode = of_parse_phandle(dev->of_node, "sram", 0);
+ if (!devnode || !devnode->parent) {
+ dev_err(dev, "Cannot look up sram phandle\n");
++ of_node_put(devnode);
+ return ERR_PTR(-ENODEV);
+ }
+
+ pdev = of_find_device_by_node(devnode->parent);
+ if (!pdev) {
+ dev_err(dev, "Cannot find device node %s\n", devnode->name);
++ of_node_put(devnode);
+ return ERR_PTR(-EPROBE_DEFER);
+ }
++ of_node_put(devnode);
+
+ ocmem = platform_get_drvdata(pdev);
+ if (!ocmem) {
+ dev_err(dev, "Cannot get ocmem\n");
++ put_device(&pdev->dev);
+ return ERR_PTR(-ENODEV);
+ }
+ return ocmem;
+@@ -353,6 +361,12 @@ static int ocmem_dev_probe(struct platform_device *pdev)
+ }
+ }
+
++ reg = ocmem_read(ocmem, OCMEM_REG_HW_VERSION);
++ dev_dbg(dev, "OCMEM hardware version: %lu.%lu.%lu\n",
++ OCMEM_HW_VERSION_MAJOR(reg),
++ OCMEM_HW_VERSION_MINOR(reg),
++ OCMEM_HW_VERSION_STEP(reg));
++
+ reg = ocmem_read(ocmem, OCMEM_REG_HW_PROFILE);
+ ocmem->num_ports = OCMEM_HW_PROFILE_NUM_PORTS(reg);
+ ocmem->num_macros = OCMEM_HW_PROFILE_NUM_MACROS(reg);
+diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c
+index 536c3e4114fb9..3973accdc9820 100644
+--- a/drivers/soc/qcom/qcom_aoss.c
++++ b/drivers/soc/qcom/qcom_aoss.c
+@@ -8,10 +8,12 @@
+ #include <linux/io.h>
+ #include <linux/mailbox_client.h>
+ #include <linux/module.h>
++#include <linux/of_platform.h>
+ #include <linux/platform_device.h>
+ #include <linux/pm_domain.h>
+ #include <linux/thermal.h>
+ #include <linux/slab.h>
++#include <linux/soc/qcom/qcom_aoss.h>
+
+ #define QMP_DESC_MAGIC 0x0
+ #define QMP_DESC_VERSION 0x4
+@@ -223,11 +225,14 @@ static bool qmp_message_empty(struct qmp *qmp)
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+-static int qmp_send(struct qmp *qmp, const void *data, size_t len)
++int qmp_send(struct qmp *qmp, const void *data, size_t len)
+ {
+ long time_left;
+ int ret;
+
++ if (WARN_ON(IS_ERR_OR_NULL(qmp) || !data))
++ return -EINVAL;
++
+ if (WARN_ON(len + sizeof(u32) > qmp->size))
+ return -EINVAL;
+
+@@ -261,6 +266,7 @@ static int qmp_send(struct qmp *qmp, const void *data, size_t len)
+
+ return ret;
+ }
++EXPORT_SYMBOL(qmp_send);
+
+ static int qmp_qdss_clk_prepare(struct clk_hw *hw)
+ {
+@@ -493,8 +499,10 @@ static int qmp_cooling_devices_register(struct qmp *qmp)
+ continue;
+ ret = qmp_cooling_device_add(qmp, &qmp->cooling_devs[count++],
+ child);
+- if (ret)
++ if (ret) {
++ of_node_put(child);
+ goto unroll;
++ }
+ }
+
+ if (!count)
+@@ -519,6 +527,55 @@ static void qmp_cooling_devices_remove(struct qmp *qmp)
+ thermal_cooling_device_unregister(qmp->cooling_devs[i].cdev);
+ }
+
++/**
++ * qmp_get() - get a qmp handle from a device
++ * @dev: client device pointer
++ *
++ * Return: handle to qmp device on success, ERR_PTR() on failure
++ */
++struct qmp *qmp_get(struct device *dev)
++{
++ struct platform_device *pdev;
++ struct device_node *np;
++ struct qmp *qmp;
++
++ if (!dev || !dev->of_node)
++ return ERR_PTR(-EINVAL);
++
++ np = of_parse_phandle(dev->of_node, "qcom,qmp", 0);
++ if (!np)
++ return ERR_PTR(-ENODEV);
++
++ pdev = of_find_device_by_node(np);
++ of_node_put(np);
++ if (!pdev)
++ return ERR_PTR(-EINVAL);
++
++ qmp = platform_get_drvdata(pdev);
++
++ if (!qmp) {
++ put_device(&pdev->dev);
++ return ERR_PTR(-EPROBE_DEFER);
++ }
++ return qmp;
++}
++EXPORT_SYMBOL(qmp_get);
++
++/**
++ * qmp_put() - release a qmp handle
++ * @qmp: qmp handle obtained from qmp_get()
++ */
++void qmp_put(struct qmp *qmp)
++{
++ /*
++ * Match get_device() inside of_find_device_by_node() in
++ * qmp_get()
++ */
++ if (!IS_ERR_OR_NULL(qmp))
++ put_device(qmp->dev);
++}
++EXPORT_SYMBOL(qmp_put);
++
+ static int qmp_probe(struct platform_device *pdev)
+ {
+ struct resource *res;
+@@ -548,7 +605,7 @@ static int qmp_probe(struct platform_device *pdev)
+ }
+
+ irq = platform_get_irq(pdev, 0);
+- ret = devm_request_irq(&pdev->dev, irq, qmp_intr, IRQF_ONESHOT,
++ ret = devm_request_irq(&pdev->dev, irq, qmp_intr, 0,
+ "aoss-qmp", qmp);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "failed to request interrupt\n");
+@@ -615,6 +672,7 @@ static struct platform_driver qmp_driver = {
+ .driver = {
+ .name = "qcom_aoss_qmp",
+ .of_match_table = qmp_dt_match,
++ .suppress_bind_attrs = true,
+ },
+ .probe = qmp_probe,
+ .remove = qmp_remove,
+diff --git a/drivers/soc/qcom/qmi_encdec.c b/drivers/soc/qcom/qmi_encdec.c
+index 328cc82371919..9f9a5ad3eb22d 100644
+--- a/drivers/soc/qcom/qmi_encdec.c
++++ b/drivers/soc/qcom/qmi_encdec.c
+@@ -534,8 +534,8 @@ static int qmi_decode_string_elem(struct qmi_elem_info *ei_array,
+ decoded_bytes += rc;
+ }
+
+- if (string_len > temp_ei->elem_len) {
+- pr_err("%s: String len %d > Max Len %d\n",
++ if (string_len >= temp_ei->elem_len) {
++ pr_err("%s: String len %d >= Max Len %d\n",
+ __func__, string_len, temp_ei->elem_len);
+ return -ETOOSMALL;
+ } else if (string_len > tlv_len) {
+diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c
+index fa209b479ab35..d98cc8c2e5d5c 100644
+--- a/drivers/soc/qcom/rpmhpd.c
++++ b/drivers/soc/qcom/rpmhpd.c
+@@ -30,6 +30,7 @@
+ * @active_only: True if it represents an Active only peer
+ * @corner: current corner
+ * @active_corner: current active corner
++ * @enable_corner: lowest non-zero corner
+ * @level: An array of level (vlvl) to corner (hlvl) mappings
+ * derived from cmd-db
+ * @level_count: Number of levels supported by the power domain. max
+@@ -47,6 +48,7 @@ struct rpmhpd {
+ const bool active_only;
+ unsigned int corner;
+ unsigned int active_corner;
++ unsigned int enable_corner;
+ u32 level[RPMH_ARC_MAX_LEVELS];
+ size_t level_count;
+ bool enabled;
+@@ -204,7 +206,7 @@ static const struct rpmhpd_desc sm8250_desc = {
+ static struct rpmhpd sm8350_mxc_ao;
+ static struct rpmhpd sm8350_mxc = {
+ .pd = { .name = "mxc", },
+- .peer = &sm8150_mmcx_ao,
++ .peer = &sm8350_mxc_ao,
+ .res_name = "mxc.lvl",
+ };
+
+@@ -385,13 +387,13 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
+ static int rpmhpd_power_on(struct generic_pm_domain *domain)
+ {
+ struct rpmhpd *pd = domain_to_rpmhpd(domain);
+- int ret = 0;
++ unsigned int corner;
++ int ret;
+
+ mutex_lock(&rpmhpd_lock);
+
+- if (pd->corner)
+- ret = rpmhpd_aggregate_corner(pd, pd->corner);
+-
++ corner = max(pd->corner, pd->enable_corner);
++ ret = rpmhpd_aggregate_corner(pd, corner);
+ if (!ret)
+ pd->enabled = true;
+
+@@ -436,6 +438,10 @@ static int rpmhpd_set_performance_state(struct generic_pm_domain *domain,
+ i--;
+
+ if (pd->enabled) {
++ /* Ensure that the domain isn't turn off */
++ if (i < pd->enable_corner)
++ i = pd->enable_corner;
++
+ ret = rpmhpd_aggregate_corner(pd, i);
+ if (ret)
+ goto out;
+@@ -472,6 +478,10 @@ static int rpmhpd_update_level_mapping(struct rpmhpd *rpmhpd)
+ for (i = 0; i < rpmhpd->level_count; i++) {
+ rpmhpd->level[i] = buf[i];
+
++ /* Remember the first corner with non-zero level */
++ if (!rpmhpd->level[rpmhpd->enable_corner] && rpmhpd->level[i])
++ rpmhpd->enable_corner = i;
++
+ /*
+ * The AUX data may be zero padded. These 0 valued entries at
+ * the end of the map must be ignored.
+diff --git a/drivers/soc/qcom/rpmpd.c b/drivers/soc/qcom/rpmpd.c
+index dbf494e925743..9f07274b0d281 100644
+--- a/drivers/soc/qcom/rpmpd.c
++++ b/drivers/soc/qcom/rpmpd.c
+@@ -546,6 +546,9 @@ static int rpmpd_probe(struct platform_device *pdev)
+
+ data->domains = devm_kcalloc(&pdev->dev, num, sizeof(*data->domains),
+ GFP_KERNEL);
++ if (!data->domains)
++ return -ENOMEM;
++
+ data->num_domains = num;
+
+ for (i = 0; i < num; i++) {
+diff --git a/drivers/soc/qcom/smem_state.c b/drivers/soc/qcom/smem_state.c
+index 31faf4aa868e6..e848cc9a3cf80 100644
+--- a/drivers/soc/qcom/smem_state.c
++++ b/drivers/soc/qcom/smem_state.c
+@@ -136,6 +136,7 @@ static void qcom_smem_state_release(struct kref *ref)
+ struct qcom_smem_state *state = container_of(ref, struct qcom_smem_state, refcount);
+
+ list_del(&state->list);
++ of_node_put(state->of_node);
+ kfree(state);
+ }
+
+@@ -205,7 +206,7 @@ struct qcom_smem_state *qcom_smem_state_register(struct device_node *of_node,
+
+ kref_init(&state->refcount);
+
+- state->of_node = of_node;
++ state->of_node = of_node_get(of_node);
+ state->ops = *ops;
+ state->priv = priv;
+
+diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c
+index 2df488333be9c..cac6b0b7b0b1b 100644
+--- a/drivers/soc/qcom/smp2p.c
++++ b/drivers/soc/qcom/smp2p.c
+@@ -421,6 +421,7 @@ static int smp2p_parse_ipc(struct qcom_smp2p *smp2p)
+ }
+
+ smp2p->ipc_regmap = syscon_node_to_regmap(syscon);
++ of_node_put(syscon);
+ if (IS_ERR(smp2p->ipc_regmap))
+ return PTR_ERR(smp2p->ipc_regmap);
+
+diff --git a/drivers/soc/qcom/smsm.c b/drivers/soc/qcom/smsm.c
+index ef15d014c03a3..3e8994d6110e6 100644
+--- a/drivers/soc/qcom/smsm.c
++++ b/drivers/soc/qcom/smsm.c
+@@ -374,6 +374,7 @@ static int smsm_parse_ipc(struct qcom_smsm *smsm, unsigned host_id)
+ return 0;
+
+ host->ipc_regmap = syscon_node_to_regmap(syscon);
++ of_node_put(syscon);
+ if (IS_ERR(host->ipc_regmap))
+ return PTR_ERR(host->ipc_regmap);
+
+@@ -525,7 +526,7 @@ static int qcom_smsm_probe(struct platform_device *pdev)
+ for (id = 0; id < smsm->num_hosts; id++) {
+ ret = smsm_parse_ipc(smsm, id);
+ if (ret < 0)
+- return ret;
++ goto out_put;
+ }
+
+ /* Acquire the main SMSM state vector */
+@@ -533,13 +534,14 @@ static int qcom_smsm_probe(struct platform_device *pdev)
+ smsm->num_entries * sizeof(u32));
+ if (ret < 0 && ret != -EEXIST) {
+ dev_err(&pdev->dev, "unable to allocate shared state entry\n");
+- return ret;
++ goto out_put;
+ }
+
+ states = qcom_smem_get(QCOM_SMEM_HOST_ANY, SMEM_SMSM_SHARED_STATE, NULL);
+ if (IS_ERR(states)) {
+ dev_err(&pdev->dev, "Unable to acquire shared state entry\n");
+- return PTR_ERR(states);
++ ret = PTR_ERR(states);
++ goto out_put;
+ }
+
+ /* Acquire the list of interrupt mask vectors */
+@@ -547,13 +549,14 @@ static int qcom_smsm_probe(struct platform_device *pdev)
+ ret = qcom_smem_alloc(QCOM_SMEM_HOST_ANY, SMEM_SMSM_CPU_INTR_MASK, size);
+ if (ret < 0 && ret != -EEXIST) {
+ dev_err(&pdev->dev, "unable to allocate smsm interrupt mask\n");
+- return ret;
++ goto out_put;
+ }
+
+ intr_mask = qcom_smem_get(QCOM_SMEM_HOST_ANY, SMEM_SMSM_CPU_INTR_MASK, NULL);
+ if (IS_ERR(intr_mask)) {
+ dev_err(&pdev->dev, "unable to acquire shared memory interrupt mask\n");
+- return PTR_ERR(intr_mask);
++ ret = PTR_ERR(intr_mask);
++ goto out_put;
+ }
+
+ /* Setup the reference to the local state bits */
+@@ -564,7 +567,8 @@ static int qcom_smsm_probe(struct platform_device *pdev)
+ smsm->state = qcom_smem_state_register(local_node, &smsm_state_ops, smsm);
+ if (IS_ERR(smsm->state)) {
+ dev_err(smsm->dev, "failed to register qcom_smem_state\n");
+- return PTR_ERR(smsm->state);
++ ret = PTR_ERR(smsm->state);
++ goto out_put;
+ }
+
+ /* Register handlers for remote processor entries of interest. */
+@@ -594,16 +598,19 @@ static int qcom_smsm_probe(struct platform_device *pdev)
+ }
+
+ platform_set_drvdata(pdev, smsm);
++ of_node_put(local_node);
+
+ return 0;
+
+ unwind_interfaces:
++ of_node_put(node);
+ for (id = 0; id < smsm->num_entries; id++)
+ if (smsm->entries[id].domain)
+ irq_domain_remove(smsm->entries[id].domain);
+
+ qcom_smem_state_unregister(smsm->state);
+-
++out_put:
++ of_node_put(local_node);
+ return ret;
+ }
+
+diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c
+index 52e5811671155..5beb452f24013 100644
+--- a/drivers/soc/qcom/socinfo.c
++++ b/drivers/soc/qcom/socinfo.c
+@@ -87,8 +87,8 @@ static const char *const pmic_models[] = {
+ [15] = "PM8901",
+ [16] = "PM8950/PM8027",
+ [17] = "PMI8950/ISL9519",
+- [18] = "PM8921",
+- [19] = "PM8018",
++ [18] = "PMK8001/PM8921",
++ [19] = "PMI8996/PM8018",
+ [20] = "PM8998/PM8015",
+ [21] = "PMI8998/PM8014",
+ [22] = "PM8821",
+diff --git a/drivers/soc/renesas/r8a779a0-sysc.c b/drivers/soc/renesas/r8a779a0-sysc.c
+index 7410b9fa9846f..7e1aba9abce24 100644
+--- a/drivers/soc/renesas/r8a779a0-sysc.c
++++ b/drivers/soc/renesas/r8a779a0-sysc.c
+@@ -83,11 +83,11 @@ static struct r8a779a0_sysc_area r8a779a0_areas[] __initdata = {
+ { "a2cv6", R8A779A0_PD_A2CV6, R8A779A0_PD_A3IR },
+ { "a2cn2", R8A779A0_PD_A2CN2, R8A779A0_PD_A3IR },
+ { "a2imp23", R8A779A0_PD_A2IMP23, R8A779A0_PD_A3IR },
+- { "a2dp1", R8A779A0_PD_A2DP0, R8A779A0_PD_A3IR },
+- { "a2cv2", R8A779A0_PD_A2CV0, R8A779A0_PD_A3IR },
+- { "a2cv3", R8A779A0_PD_A2CV1, R8A779A0_PD_A3IR },
+- { "a2cv5", R8A779A0_PD_A2CV4, R8A779A0_PD_A3IR },
+- { "a2cv7", R8A779A0_PD_A2CV6, R8A779A0_PD_A3IR },
++ { "a2dp1", R8A779A0_PD_A2DP1, R8A779A0_PD_A3IR },
++ { "a2cv2", R8A779A0_PD_A2CV2, R8A779A0_PD_A3IR },
++ { "a2cv3", R8A779A0_PD_A2CV3, R8A779A0_PD_A3IR },
++ { "a2cv5", R8A779A0_PD_A2CV5, R8A779A0_PD_A3IR },
++ { "a2cv7", R8A779A0_PD_A2CV7, R8A779A0_PD_A3IR },
+ { "a2cn1", R8A779A0_PD_A2CN1, R8A779A0_PD_A3IR },
+ { "a1cnn0", R8A779A0_PD_A1CNN0, R8A779A0_PD_A2CN0 },
+ { "a1cnn2", R8A779A0_PD_A1CNN2, R8A779A0_PD_A2CN2 },
+diff --git a/drivers/soc/rockchip/grf.c b/drivers/soc/rockchip/grf.c
+index 494cf2b5bf7b6..343ff61ccccbb 100644
+--- a/drivers/soc/rockchip/grf.c
++++ b/drivers/soc/rockchip/grf.c
+@@ -148,12 +148,14 @@ static int __init rockchip_grf_init(void)
+ return -ENODEV;
+ if (!match || !match->data) {
+ pr_err("%s: missing grf data\n", __func__);
++ of_node_put(np);
+ return -EINVAL;
+ }
+
+ grf_info = match->data;
+
+ grf = syscon_node_to_regmap(np);
++ of_node_put(np);
+ if (IS_ERR(grf)) {
+ pr_err("%s: could not get grf syscon\n", __func__);
+ return PTR_ERR(grf);
+diff --git a/drivers/soc/samsung/Kconfig b/drivers/soc/samsung/Kconfig
+index 5745d7e5908e9..1f643c0f5c93f 100644
+--- a/drivers/soc/samsung/Kconfig
++++ b/drivers/soc/samsung/Kconfig
+@@ -25,6 +25,7 @@ config EXYNOS_PMU
+ bool "Exynos PMU controller driver" if COMPILE_TEST
+ depends on ARCH_EXYNOS || ((ARM || ARM64) && COMPILE_TEST)
+ select EXYNOS_PMU_ARM_DRIVERS if ARM && ARCH_EXYNOS
++ select MFD_CORE
+
+ # There is no need to enable these drivers for ARMv8
+ config EXYNOS_PMU_ARM_DRIVERS
+diff --git a/drivers/soc/sifive/sifive_l2_cache.c b/drivers/soc/sifive/sifive_l2_cache.c
+index 59640a1d0b28a..7831580704905 100644
+--- a/drivers/soc/sifive/sifive_l2_cache.c
++++ b/drivers/soc/sifive/sifive_l2_cache.c
+@@ -202,17 +202,22 @@ static int __init sifive_l2_init(void)
+ if (!np)
+ return -ENODEV;
+
+- if (of_address_to_resource(np, 0, &res))
+- return -ENODEV;
++ if (of_address_to_resource(np, 0, &res)) {
++ rc = -ENODEV;
++ goto err_node_put;
++ }
+
+ l2_base = ioremap(res.start, resource_size(&res));
+- if (!l2_base)
+- return -ENOMEM;
++ if (!l2_base) {
++ rc = -ENOMEM;
++ goto err_node_put;
++ }
+
+ intr_num = of_property_count_u32_elems(np, "interrupts");
+ if (!intr_num) {
+ pr_err("L2CACHE: no interrupts property\n");
+- return -ENODEV;
++ rc = -ENODEV;
++ goto err_unmap;
+ }
+
+ for (i = 0; i < intr_num; i++) {
+@@ -220,9 +225,10 @@ static int __init sifive_l2_init(void)
+ rc = request_irq(g_irq[i], l2_int_handler, 0, "l2_ecc", NULL);
+ if (rc) {
+ pr_err("L2CACHE: Could not request IRQ %d\n", g_irq[i]);
+- return rc;
++ goto err_free_irq;
+ }
+ }
++ of_node_put(np);
+
+ l2_config_read();
+
+@@ -233,5 +239,14 @@ static int __init sifive_l2_init(void)
+ setup_sifive_debug();
+ #endif
+ return 0;
++
++err_free_irq:
++ while (--i >= 0)
++ free_irq(g_irq[i], NULL);
++err_unmap:
++ iounmap(l2_base);
++err_node_put:
++ of_node_put(np);
++ return rc;
+ }
+ device_initcall(sifive_l2_init);
+diff --git a/drivers/soc/sunxi/sunxi_sram.c b/drivers/soc/sunxi/sunxi_sram.c
+index 42833e33a96cc..09754cd1d57dc 100644
+--- a/drivers/soc/sunxi/sunxi_sram.c
++++ b/drivers/soc/sunxi/sunxi_sram.c
+@@ -78,8 +78,8 @@ static struct sunxi_sram_desc sun4i_a10_sram_d = {
+
+ static struct sunxi_sram_desc sun50i_a64_sram_c = {
+ .data = SUNXI_SRAM_DATA("C", 0x4, 24, 1,
+- SUNXI_SRAM_MAP(0, 1, "cpu"),
+- SUNXI_SRAM_MAP(1, 0, "de2")),
++ SUNXI_SRAM_MAP(1, 0, "cpu"),
++ SUNXI_SRAM_MAP(0, 1, "de2")),
+ };
+
+ static const struct of_device_id sunxi_sram_dt_ids[] = {
+@@ -254,6 +254,7 @@ int sunxi_sram_claim(struct device *dev)
+ writel(val | ((device << sram_data->offset) & mask),
+ base + sram_data->reg);
+
++ sram_desc->claimed = true;
+ spin_unlock(&sram_lock);
+
+ return 0;
+@@ -329,12 +330,11 @@ static struct regmap_config sunxi_sram_emac_clock_regmap = {
+ .writeable_reg = sunxi_sram_regmap_accessible_reg,
+ };
+
+-static int sunxi_sram_probe(struct platform_device *pdev)
++static int __init sunxi_sram_probe(struct platform_device *pdev)
+ {
+- struct resource *res;
+- struct dentry *d;
+ struct regmap *emac_clock;
+ const struct sunxi_sramc_variant *variant;
++ struct device *dev = &pdev->dev;
+
+ sram_dev = &pdev->dev;
+
+@@ -342,18 +342,10 @@ static int sunxi_sram_probe(struct platform_device *pdev)
+ if (!variant)
+ return -EINVAL;
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- base = devm_ioremap_resource(&pdev->dev, res);
++ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+- of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+-
+- d = debugfs_create_file("sram", S_IRUGO, NULL, NULL,
+- &sunxi_sram_fops);
+- if (!d)
+- return -ENOMEM;
+-
+ if (variant->num_emac_clocks > 0) {
+ emac_clock = devm_regmap_init_mmio(&pdev->dev, base,
+ &sunxi_sram_emac_clock_regmap);
+@@ -362,6 +354,10 @@ static int sunxi_sram_probe(struct platform_device *pdev)
+ return PTR_ERR(emac_clock);
+ }
+
++ of_platform_populate(dev->of_node, NULL, NULL, dev);
++
++ debugfs_create_file("sram", 0444, NULL, NULL, &sunxi_sram_fops);
++
+ return 0;
+ }
+
+@@ -411,9 +407,8 @@ static struct platform_driver sunxi_sram_driver = {
+ .name = "sunxi-sram",
+ .of_match_table = sunxi_sram_dt_match,
+ },
+- .probe = sunxi_sram_probe,
+ };
+-module_platform_driver(sunxi_sram_driver);
++builtin_platform_driver_probe(sunxi_sram_driver, sunxi_sram_probe);
+
+ MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>");
+ MODULE_DESCRIPTION("Allwinner sunXi SRAM Controller Driver");
+diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
+index 8b53ed1cc67ec..1224e1c8c2c92 100644
+--- a/drivers/soc/tegra/Kconfig
++++ b/drivers/soc/tegra/Kconfig
+@@ -136,7 +136,6 @@ config SOC_TEGRA_FUSE
+ def_bool y
+ depends on ARCH_TEGRA
+ select SOC_BUS
+- select TEGRA20_APB_DMA if ARCH_TEGRA_2x_SOC
+
+ config SOC_TEGRA_FLOWCTRL
+ bool
+diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c
+index f2151815db585..e714ed3b61bc3 100644
+--- a/drivers/soc/tegra/fuse/fuse-tegra.c
++++ b/drivers/soc/tegra/fuse/fuse-tegra.c
+@@ -320,7 +320,7 @@ static struct platform_driver tegra_fuse_driver = {
+ };
+ builtin_platform_driver(tegra_fuse_driver);
+
+-bool __init tegra_fuse_read_spare(unsigned int spare)
++u32 __init tegra_fuse_read_spare(unsigned int spare)
+ {
+ unsigned int offset = fuse->soc->info->spare + spare * 4;
+
+diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h
+index de58feba04350..ecff0c08e9595 100644
+--- a/drivers/soc/tegra/fuse/fuse.h
++++ b/drivers/soc/tegra/fuse/fuse.h
+@@ -65,7 +65,7 @@ struct tegra_fuse {
+ void tegra_init_revision(void);
+ void tegra_init_apbmisc(void);
+
+-bool __init tegra_fuse_read_spare(unsigned int spare);
++u32 __init tegra_fuse_read_spare(unsigned int spare);
+ u32 __init tegra_fuse_read_early(unsigned int offset);
+
+ u8 tegra_get_major_rev(void);
+diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
+index 50091c4ec9481..a60e142ade344 100644
+--- a/drivers/soc/tegra/pmc.c
++++ b/drivers/soc/tegra/pmc.c
+@@ -782,7 +782,7 @@ static int tegra_powergate_power_up(struct tegra_powergate *pg,
+
+ err = reset_control_deassert(pg->reset);
+ if (err)
+- goto powergate_off;
++ goto disable_clks;
+
+ usleep_range(10, 20);
+
+diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
+index 2ac3856b8d42d..52389859395c6 100644
+--- a/drivers/soc/ti/knav_qmss_queue.c
++++ b/drivers/soc/ti/knav_qmss_queue.c
+@@ -67,7 +67,7 @@ static DEFINE_MUTEX(knav_dev_lock);
+ * Newest followed by older ones. Search is done from start of the array
+ * until a firmware file is found.
+ */
+-const char *knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"};
++static const char * const knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"};
+
+ static bool device_ready;
+ bool knav_qmss_device_ready(void)
+@@ -1785,9 +1785,9 @@ static int knav_queue_probe(struct platform_device *pdev)
+ INIT_LIST_HEAD(&kdev->pdsps);
+
+ pm_runtime_enable(&pdev->dev);
+- ret = pm_runtime_get_sync(&pdev->dev);
++ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0) {
+- pm_runtime_put_noidle(&pdev->dev);
++ pm_runtime_disable(&pdev->dev);
+ dev_err(dev, "Failed to enable QMSS\n");
+ return ret;
+ }
+diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c
+index 7bab4bbaf02dc..285302bf3ef91 100644
+--- a/drivers/soc/ti/pm33xx.c
++++ b/drivers/soc/ti/pm33xx.c
+@@ -527,7 +527,7 @@ static int am33xx_pm_probe(struct platform_device *pdev)
+
+ ret = am33xx_pm_alloc_sram();
+ if (ret)
+- return ret;
++ goto err_wkup_m3_ipc_put;
+
+ ret = am33xx_pm_rtc_setup();
+ if (ret)
+@@ -574,13 +574,14 @@ err_pm_runtime_put:
+ pm_runtime_put_sync(dev);
+ err_pm_runtime_disable:
+ pm_runtime_disable(dev);
+- wkup_m3_ipc_put(m3_ipc);
+ err_unsetup_rtc:
+ iounmap(rtc_base_virt);
+ clk_put(rtc_fck);
+ err_free_sram:
+ am33xx_pm_free_sram();
+ pm33xx_dev = NULL;
++err_wkup_m3_ipc_put:
++ wkup_m3_ipc_put(m3_ipc);
+ return ret;
+ }
+
+diff --git a/drivers/soc/ti/pruss.c b/drivers/soc/ti/pruss.c
+index 49da387d77494..b36779309e49b 100644
+--- a/drivers/soc/ti/pruss.c
++++ b/drivers/soc/ti/pruss.c
+@@ -129,7 +129,7 @@ static int pruss_clk_init(struct pruss *pruss, struct device_node *cfg_node)
+
+ clks_np = of_get_child_by_name(cfg_node, "clocks");
+ if (!clks_np) {
+- dev_err(dev, "%pOF is missing its 'clocks' node\n", clks_np);
++ dev_err(dev, "%pOF is missing its 'clocks' node\n", cfg_node);
+ return -ENODEV;
+ }
+
+diff --git a/drivers/soc/ti/smartreflex.c b/drivers/soc/ti/smartreflex.c
+index b5b2fa538d5c3..4d15587324d4f 100644
+--- a/drivers/soc/ti/smartreflex.c
++++ b/drivers/soc/ti/smartreflex.c
+@@ -931,6 +931,7 @@ static int omap_sr_probe(struct platform_device *pdev)
+ err_debugfs:
+ debugfs_remove_recursive(sr_info->dbg_dir);
+ err_list_del:
++ pm_runtime_disable(&pdev->dev);
+ list_del(&sr_info->node);
+ clk_unprepare(sr_info->fck);
+
+diff --git a/drivers/soc/ti/ti_sci_pm_domains.c b/drivers/soc/ti/ti_sci_pm_domains.c
+index 8afb3f45d2637..a33ec7eaf23d1 100644
+--- a/drivers/soc/ti/ti_sci_pm_domains.c
++++ b/drivers/soc/ti/ti_sci_pm_domains.c
+@@ -183,6 +183,8 @@ static int ti_sci_pm_domain_probe(struct platform_device *pdev)
+ devm_kcalloc(dev, max_id + 1,
+ sizeof(*pd_provider->data.domains),
+ GFP_KERNEL);
++ if (!pd_provider->data.domains)
++ return -ENOMEM;
+
+ pd_provider->data.num_domains = max_id + 1;
+ pd_provider->data.xlate = ti_sci_pd_xlate;
+diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c
+index 09abd17065ba5..8b3ff44fd9010 100644
+--- a/drivers/soc/ti/wkup_m3_ipc.c
++++ b/drivers/soc/ti/wkup_m3_ipc.c
+@@ -449,9 +449,9 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev)
+ return PTR_ERR(m3_ipc->ipc_mem_base);
+
+ irq = platform_get_irq(pdev, 0);
+- if (!irq) {
++ if (irq < 0) {
+ dev_err(&pdev->dev, "no irq resource\n");
+- return -ENXIO;
++ return irq;
+ }
+
+ ret = devm_request_irq(dev, irq, wkup_m3_txev_handler,
+diff --git a/drivers/soc/ux500/ux500-soc-id.c b/drivers/soc/ux500/ux500-soc-id.c
+index a9472e0e5d61c..27d6e25a01153 100644
+--- a/drivers/soc/ux500/ux500-soc-id.c
++++ b/drivers/soc/ux500/ux500-soc-id.c
+@@ -167,20 +167,18 @@ ATTRIBUTE_GROUPS(ux500_soc);
+ static const char *db8500_read_soc_id(struct device_node *backupram)
+ {
+ void __iomem *base;
+- void __iomem *uid;
+ const char *retstr;
++ u32 uid[5];
+
+ base = of_iomap(backupram, 0);
+ if (!base)
+ return NULL;
+- uid = base + 0x1fc0;
++ memcpy_fromio(uid, base + 0x1fc0, sizeof(uid));
+
+ /* Throw these device-specific numbers into the entropy pool */
+- add_device_randomness(uid, 0x14);
++ add_device_randomness(uid, sizeof(uid));
+ retstr = kasprintf(GFP_KERNEL, "%08x%08x%08x%08x%08x",
+- readl((u32 *)uid+0),
+- readl((u32 *)uid+1), readl((u32 *)uid+2),
+- readl((u32 *)uid+3), readl((u32 *)uid+4));
++ uid[0], uid[1], uid[2], uid[3], uid[4]);
+ iounmap(base);
+ return retstr;
+ }
+diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
+index 1b115734a8f6b..230a3250f3154 100644
+--- a/drivers/soundwire/bus.c
++++ b/drivers/soundwire/bus.c
+@@ -7,6 +7,7 @@
+ #include <linux/pm_runtime.h>
+ #include <linux/soundwire/sdw_registers.h>
+ #include <linux/soundwire/sdw.h>
++#include <linux/soundwire/sdw_type.h>
+ #include "bus.h"
+ #include "sysfs_local.h"
+
+@@ -827,8 +828,8 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
+ "%s: initializing enumeration and init completion for Slave %d\n",
+ __func__, slave->dev_num);
+
+- init_completion(&slave->enumeration_complete);
+- init_completion(&slave->initialization_complete);
++ reinit_completion(&slave->enumeration_complete);
++ reinit_completion(&slave->initialization_complete);
+
+ } else if ((status == SDW_SLAVE_ATTACHED) &&
+ (slave->status == SDW_SLAVE_UNATTACHED)) {
+@@ -836,7 +837,7 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
+ "%s: signaling enumeration completion for Slave %d\n",
+ __func__, slave->dev_num);
+
+- complete(&slave->enumeration_complete);
++ complete_all(&slave->enumeration_complete);
+ }
+ slave->status = status;
+ mutex_unlock(&bus->bus_lock);
+@@ -846,15 +847,21 @@ static int sdw_slave_clk_stop_callback(struct sdw_slave *slave,
+ enum sdw_clk_stop_mode mode,
+ enum sdw_clk_stop_type type)
+ {
+- int ret;
++ int ret = 0;
+
+- if (slave->ops && slave->ops->clk_stop) {
+- ret = slave->ops->clk_stop(slave, mode, type);
+- if (ret < 0)
+- return ret;
++ mutex_lock(&slave->sdw_dev_lock);
++
++ if (slave->probed) {
++ struct device *dev = &slave->dev;
++ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
++
++ if (drv->ops && drv->ops->clk_stop)
++ ret = drv->ops->clk_stop(slave, mode, type);
+ }
+
+- return 0;
++ mutex_unlock(&slave->sdw_dev_lock);
++
++ return ret;
+ }
+
+ static int sdw_slave_clk_stop_prepare(struct sdw_slave *slave,
+@@ -1110,7 +1117,7 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus)
+ if (!simple_clk_stop) {
+ ret = sdw_bus_wait_for_clk_prep_deprep(bus, SDW_BROADCAST_DEV_NUM);
+ if (ret < 0)
+- dev_warn(&slave->dev, "clock stop deprepare wait failed:%d\n", ret);
++ dev_warn(bus->dev, "clock stop deprepare wait failed:%d\n", ret);
+ }
+
+ list_for_each_entry(slave, &bus->slaves, node) {
+@@ -1616,14 +1623,24 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
+ }
+
+ /* Update the Slave driver */
+- if (slave_notify && slave->ops &&
+- slave->ops->interrupt_callback) {
+- slave_intr.sdca_cascade = sdca_cascade;
+- slave_intr.control_port = clear;
+- memcpy(slave_intr.port, &port_status,
+- sizeof(slave_intr.port));
+-
+- slave->ops->interrupt_callback(slave, &slave_intr);
++ if (slave_notify) {
++ mutex_lock(&slave->sdw_dev_lock);
++
++ if (slave->probed) {
++ struct device *dev = &slave->dev;
++ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
++
++ if (drv->ops && drv->ops->interrupt_callback) {
++ slave_intr.sdca_cascade = sdca_cascade;
++ slave_intr.control_port = clear;
++ memcpy(slave_intr.port, &port_status,
++ sizeof(slave_intr.port));
++
++ drv->ops->interrupt_callback(slave, &slave_intr);
++ }
++ }
++
++ mutex_unlock(&slave->sdw_dev_lock);
+ }
+
+ /* Ack interrupt */
+@@ -1697,29 +1714,21 @@ io_err:
+ static int sdw_update_slave_status(struct sdw_slave *slave,
+ enum sdw_slave_status status)
+ {
+- unsigned long time;
++ int ret = 0;
+
+- if (!slave->probed) {
+- /*
+- * the slave status update is typically handled in an
+- * interrupt thread, which can race with the driver
+- * probe, e.g. when a module needs to be loaded.
+- *
+- * make sure the probe is complete before updating
+- * status.
+- */
+- time = wait_for_completion_timeout(&slave->probe_complete,
+- msecs_to_jiffies(DEFAULT_PROBE_TIMEOUT));
+- if (!time) {
+- dev_err(&slave->dev, "Probe not complete, timed out\n");
+- return -ETIMEDOUT;
+- }
++ mutex_lock(&slave->sdw_dev_lock);
++
++ if (slave->probed) {
++ struct device *dev = &slave->dev;
++ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
++
++ if (drv->ops && drv->ops->update_status)
++ ret = drv->ops->update_status(slave, status);
+ }
+
+- if (!slave->ops || !slave->ops->update_status)
+- return 0;
++ mutex_unlock(&slave->sdw_dev_lock);
+
+- return slave->ops->update_status(slave, status);
++ return ret;
+ }
+
+ /**
+@@ -1831,7 +1840,19 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
+ "%s: signaling initialization completion for Slave %d\n",
+ __func__, slave->dev_num);
+
+- complete(&slave->initialization_complete);
++ complete_all(&slave->initialization_complete);
++
++ /*
++ * If the manager became pm_runtime active, the peripherals will be
++ * restarted and attach, but their pm_runtime status may remain
++ * suspended. If the 'update_slave_status' callback initiates
++ * any sort of deferred processing, this processing would not be
++ * cancelled on pm_runtime suspend.
++ * To avoid such zombie states, we queue a request to resume.
++ * This would be a no-op in case the peripheral was being resumed
++ * by e.g. the ALSA/ASoC framework.
++ */
++ pm_request_resume(&slave->dev);
+ }
+ }
+
+diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c
+index 893296f3fe395..963498db0fd22 100644
+--- a/drivers/soundwire/bus_type.c
++++ b/drivers/soundwire/bus_type.c
+@@ -98,8 +98,6 @@ static int sdw_drv_probe(struct device *dev)
+ if (!id)
+ return -ENODEV;
+
+- slave->ops = drv->ops;
+-
+ /*
+ * attach to power domain but don't turn on (last arg)
+ */
+@@ -112,14 +110,17 @@ static int sdw_drv_probe(struct device *dev)
+ name = drv->name;
+ if (!name)
+ name = drv->driver.name;
++
+ dev_err(dev, "Probe of %s failed: %d\n", name, ret);
+ dev_pm_domain_detach(dev, false);
+ return ret;
+ }
+
++ mutex_lock(&slave->sdw_dev_lock);
++
+ /* device is probed so let's read the properties now */
+- if (slave->ops && slave->ops->read_prop)
+- slave->ops->read_prop(slave);
++ if (drv->ops && drv->ops->read_prop)
++ drv->ops->read_prop(slave);
+
+ /* init the sysfs as we have properties now */
+ ret = sdw_slave_sysfs_init(slave);
+@@ -139,7 +140,19 @@ static int sdw_drv_probe(struct device *dev)
+ slave->prop.clk_stop_timeout);
+
+ slave->probed = true;
+- complete(&slave->probe_complete);
++
++ /*
++ * if the probe happened after the bus was started, notify the codec driver
++ * of the current hardware status to e.g. start the initialization.
++ * Errors are only logged as warnings to avoid failing the probe.
++ */
++ if (drv->ops && drv->ops->update_status) {
++ ret = drv->ops->update_status(slave, slave->status);
++ if (ret < 0)
++ dev_warn(dev, "%s: update_status failed with status %d\n", __func__, ret);
++ }
++
++ mutex_unlock(&slave->sdw_dev_lock);
+
+ dev_dbg(dev, "probe complete\n");
+
+@@ -152,6 +165,10 @@ static int sdw_drv_remove(struct device *dev)
+ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
+ int ret = 0;
+
++ mutex_lock(&slave->sdw_dev_lock);
++ slave->probed = false;
++ mutex_unlock(&slave->sdw_dev_lock);
++
+ if (drv->remove)
+ ret = drv->remove(slave);
+
+@@ -193,12 +210,8 @@ int __sdw_register_driver(struct sdw_driver *drv, struct module *owner)
+
+ drv->driver.owner = owner;
+ drv->driver.probe = sdw_drv_probe;
+-
+- if (drv->remove)
+- drv->driver.remove = sdw_drv_remove;
+-
+- if (drv->shutdown)
+- drv->driver.shutdown = sdw_drv_shutdown;
++ drv->driver.remove = sdw_drv_remove;
++ drv->driver.shutdown = sdw_drv_shutdown;
+
+ return driver_register(&drv->driver);
+ }
+diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
+index 4fcc3ba93004a..7b340f3832133 100644
+--- a/drivers/soundwire/cadence_master.c
++++ b/drivers/soundwire/cadence_master.c
+@@ -127,7 +127,8 @@ MODULE_PARM_DESC(cdns_mcp_int_mask, "Cadence MCP IntMask");
+
+ #define CDNS_MCP_CMD_BASE 0x80
+ #define CDNS_MCP_RESP_BASE 0x80
+-#define CDNS_MCP_CMD_LEN 0x20
++/* FIFO can hold 8 commands */
++#define CDNS_MCP_CMD_LEN 8
+ #define CDNS_MCP_CMD_WORD_LEN 0x4
+
+ #define CDNS_MCP_CMD_SSP_TAG BIT(31)
+@@ -545,13 +546,39 @@ cdns_fill_msg_resp(struct sdw_cdns *cdns,
+ return SDW_CMD_IGNORED;
+ }
+
+- /* fill response */
+- for (i = 0; i < count; i++)
+- msg->buf[i + offset] = FIELD_GET(CDNS_MCP_RESP_RDATA, cdns->response_buf[i]);
++ if (msg->flags == SDW_MSG_FLAG_READ) {
++ /* fill response */
++ for (i = 0; i < count; i++)
++ msg->buf[i + offset] = FIELD_GET(CDNS_MCP_RESP_RDATA,
++ cdns->response_buf[i]);
++ }
+
+ return SDW_CMD_OK;
+ }
+
++static void cdns_read_response(struct sdw_cdns *cdns)
++{
++ u32 num_resp, cmd_base;
++ int i;
++
++ /* RX_FIFO_AVAIL can be 2 entries more than the FIFO size */
++ BUILD_BUG_ON(ARRAY_SIZE(cdns->response_buf) < CDNS_MCP_CMD_LEN + 2);
++
++ num_resp = cdns_readl(cdns, CDNS_MCP_FIFOSTAT);
++ num_resp &= CDNS_MCP_RX_FIFO_AVAIL;
++ if (num_resp > ARRAY_SIZE(cdns->response_buf)) {
++ dev_warn(cdns->dev, "RX AVAIL %d too long\n", num_resp);
++ num_resp = ARRAY_SIZE(cdns->response_buf);
++ }
++
++ cmd_base = CDNS_MCP_CMD_BASE;
++
++ for (i = 0; i < num_resp; i++) {
++ cdns->response_buf[i] = cdns_readl(cdns, cmd_base);
++ cmd_base += CDNS_MCP_CMD_WORD_LEN;
++ }
++}
++
+ static enum sdw_command_response
+ _cdns_xfer_msg(struct sdw_cdns *cdns, struct sdw_msg *msg, int cmd,
+ int offset, int count, bool defer)
+@@ -593,6 +620,10 @@ _cdns_xfer_msg(struct sdw_cdns *cdns, struct sdw_msg *msg, int cmd,
+ dev_err(cdns->dev, "IO transfer timed out, cmd %d device %d addr %x len %d\n",
+ cmd, msg->dev_num, msg->addr, msg->len);
+ msg->len = 0;
++
++ /* Drain anything in the RX_FIFO */
++ cdns_read_response(cdns);
++
+ return SDW_CMD_TIMEOUT;
+ }
+
+@@ -761,22 +792,6 @@ EXPORT_SYMBOL(cdns_reset_page_addr);
+ * IRQ handling
+ */
+
+-static void cdns_read_response(struct sdw_cdns *cdns)
+-{
+- u32 num_resp, cmd_base;
+- int i;
+-
+- num_resp = cdns_readl(cdns, CDNS_MCP_FIFOSTAT);
+- num_resp &= CDNS_MCP_RX_FIFO_AVAIL;
+-
+- cmd_base = CDNS_MCP_CMD_BASE;
+-
+- for (i = 0; i < num_resp; i++) {
+- cdns->response_buf[i] = cdns_readl(cdns, cmd_base);
+- cmd_base += CDNS_MCP_CMD_WORD_LEN;
+- }
+-}
+-
+ static int cdns_update_slave_status(struct sdw_cdns *cdns,
+ u64 slave_intstat)
+ {
+diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h
+index e587aede63bf0..e437a604429fa 100644
+--- a/drivers/soundwire/cadence_master.h
++++ b/drivers/soundwire/cadence_master.h
+@@ -8,6 +8,12 @@
+ #define SDW_CADENCE_GSYNC_KHZ 4 /* 4 kHz */
+ #define SDW_CADENCE_GSYNC_HZ (SDW_CADENCE_GSYNC_KHZ * 1000)
+
++/*
++ * The Cadence IP supports up to 32 entries in the FIFO, though implementations
++ * can configure the IP to have a smaller FIFO.
++ */
++#define CDNS_MCP_IP_MAX_CMD_LEN 32
++
+ /**
+ * struct sdw_cdns_pdi: PDI (Physical Data Interface) instance
+ *
+@@ -119,7 +125,12 @@ struct sdw_cdns {
+ struct sdw_bus bus;
+ unsigned int instance;
+
+- u32 response_buf[0x80];
++ /*
++ * The datasheet says the RX FIFO AVAIL can be 2 entries more
++ * than the FIFO capacity, so allow for this.
++ */
++ u32 response_buf[CDNS_MCP_IP_MAX_CMD_LEN + 2];
++
+ struct completion tx_complete;
+ struct sdw_defer *defer;
+
+diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c
+index b6cad0d59b7b9..49900cd207bc7 100644
+--- a/drivers/soundwire/debugfs.c
++++ b/drivers/soundwire/debugfs.c
+@@ -19,7 +19,7 @@ void sdw_bus_debugfs_init(struct sdw_bus *bus)
+ return;
+
+ /* create the debugfs master-N */
+- snprintf(name, sizeof(name), "master-%d", bus->link_id);
++ snprintf(name, sizeof(name), "master-%d-%d", bus->id, bus->link_id);
+ bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root);
+ }
+
+diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c
+index 0ca2a3e3a02e2..39f0cc2a5b333 100644
+--- a/drivers/soundwire/dmi-quirks.c
++++ b/drivers/soundwire/dmi-quirks.c
+@@ -59,7 +59,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = {
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Conv"),
++ },
++ .driver_data = (void *)intel_tgl_bios,
++ },
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
++ DMI_MATCH(DMI_BOARD_NAME, "8709"),
+ },
+ .driver_data = (void *)intel_tgl_bios,
+ },
+@@ -71,6 +78,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = {
+ },
+ .driver_data = (void *)intel_tgl_bios,
+ },
++ {
++ /* quirk used for NUC15 LAPBC710 skew */
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
++ DMI_MATCH(DMI_BOARD_NAME, "LAPBC710"),
++ },
++ .driver_data = (void *)intel_tgl_bios,
++ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
+index 78037ffdb09ba..90e0bf8ca37d9 100644
+--- a/drivers/soundwire/intel.c
++++ b/drivers/soundwire/intel.c
+@@ -448,8 +448,8 @@ static void intel_shim_wake(struct sdw_intel *sdw, bool wake_enable)
+
+ /* Clear wake status */
+ wake_sts = intel_readw(shim, SDW_SHIM_WAKESTS);
+- wake_sts |= (SDW_SHIM_WAKEEN_ENABLE << link_id);
+- intel_writew(shim, SDW_SHIM_WAKESTS_STATUS, wake_sts);
++ wake_sts |= (SDW_SHIM_WAKESTS_STATUS << link_id);
++ intel_writew(shim, SDW_SHIM_WAKESTS, wake_sts);
+ }
+ mutex_unlock(sdw->link_res->shim_lock);
+ }
+@@ -1065,8 +1065,8 @@ static const struct snd_soc_dai_ops intel_pcm_dai_ops = {
+ .prepare = intel_prepare,
+ .hw_free = intel_hw_free,
+ .shutdown = intel_shutdown,
+- .set_sdw_stream = intel_pcm_set_sdw_stream,
+- .get_sdw_stream = intel_get_sdw_stream,
++ .set_stream = intel_pcm_set_sdw_stream,
++ .get_stream = intel_get_sdw_stream,
+ };
+
+ static const struct snd_soc_dai_ops intel_pdm_dai_ops = {
+@@ -1075,8 +1075,8 @@ static const struct snd_soc_dai_ops intel_pdm_dai_ops = {
+ .prepare = intel_prepare,
+ .hw_free = intel_hw_free,
+ .shutdown = intel_shutdown,
+- .set_sdw_stream = intel_pdm_set_sdw_stream,
+- .get_sdw_stream = intel_get_sdw_stream,
++ .set_stream = intel_pdm_set_sdw_stream,
++ .get_stream = intel_get_sdw_stream,
+ };
+
+ static const struct snd_soc_component_driver dai_component = {
+@@ -1285,6 +1285,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev,
+ cdns->msg_count = 0;
+
+ bus->link_id = auxdev->id;
++ bus->clk_stop_timeout = 1;
+
+ sdw_cdns_probe(cdns);
+
+@@ -1298,6 +1299,9 @@ static int intel_link_probe(struct auxiliary_device *auxdev,
+ /* use generic bandwidth allocation algorithm */
+ sdw->cdns.bus.compute_params = sdw_compute_params;
+
++ /* avoid resuming from pm_runtime suspend if it's not required */
++ dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
++
+ ret = sdw_bus_master_add(bus, dev, dev->fwnode);
+ if (ret) {
+ dev_err(dev, "sdw_bus_master_add fail: %d\n", ret);
+@@ -1404,7 +1408,6 @@ int intel_link_startup(struct auxiliary_device *auxdev)
+ ret = intel_register_dai(sdw);
+ if (ret) {
+ dev_err(dev, "DAI registration failed: %d\n", ret);
+- snd_soc_unregister_component(dev);
+ goto err_interrupt;
+ }
+
+diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
+index 0ef79d60e88e6..e3b52d5aa411e 100644
+--- a/drivers/soundwire/qcom.c
++++ b/drivers/soundwire/qcom.c
+@@ -97,7 +97,7 @@
+
+ #define SWRM_SPECIAL_CMD_ID 0xF
+ #define MAX_FREQ_NUM 1
+-#define TIMEOUT_MS (2 * HZ)
++#define TIMEOUT_MS 100
+ #define QCOM_SWRM_MAX_RD_LEN 0x1
+ #define QCOM_SDW_MAX_PORTS 14
+ #define DEFAULT_CLK_FREQ 9600000
+@@ -146,9 +146,10 @@ struct qcom_swrm_ctrl {
+ u32 intr_mask;
+ u8 rcmd_id;
+ u8 wcmd_id;
+- struct qcom_swrm_port_config pconfig[QCOM_SDW_MAX_PORTS];
++ /* Port numbers are 1 - 14 */
++ struct qcom_swrm_port_config pconfig[QCOM_SDW_MAX_PORTS + 1];
+ struct sdw_stream_runtime *sruntime[SWRM_MAX_DAIS];
+- enum sdw_slave_status status[SDW_MAX_DEVICES];
++ enum sdw_slave_status status[SDW_MAX_DEVICES + 1];
+ int (*reg_read)(struct qcom_swrm_ctrl *ctrl, int reg, u32 *val);
+ int (*reg_write)(struct qcom_swrm_ctrl *ctrl, int reg, int val);
+ u32 slave_status;
+@@ -315,6 +316,9 @@ static int qcom_swrm_cmd_fifo_wr_cmd(struct qcom_swrm_ctrl *swrm, u8 cmd_data,
+ if (swrm_wait_for_wr_fifo_avail(swrm))
+ return SDW_CMD_FAIL_OTHER;
+
++ if (cmd_id == SWR_BROADCAST_CMD_ID)
++ reinit_completion(&swrm->broadcast);
++
+ /* Its assumed that write is okay as we do not get any status back */
+ swrm->reg_write(swrm, SWRM_CMD_FIFO_WR_CMD, val);
+
+@@ -348,6 +352,12 @@ static int qcom_swrm_cmd_fifo_rd_cmd(struct qcom_swrm_ctrl *swrm,
+
+ val = swrm_get_packed_reg_val(&swrm->rcmd_id, len, dev_addr, reg_addr);
+
++ /*
++ * Check for outstanding cmd wrt. write fifo depth to avoid
++ * overflow as read will also increase write fifo cnt.
++ */
++ swrm_wait_for_wr_fifo_avail(swrm);
++
+ /* wait for FIFO RD to complete to avoid overflow */
+ usleep_range(100, 105);
+ swrm->reg_write(swrm, SWRM_CMD_FIFO_RD_CMD, val);
+@@ -391,11 +401,11 @@ static int qcom_swrm_get_alert_slave_dev_num(struct qcom_swrm_ctrl *ctrl)
+
+ ctrl->reg_read(ctrl, SWRM_MCP_SLV_STATUS, &val);
+
+- for (dev_num = 0; dev_num < SDW_MAX_DEVICES; dev_num++) {
++ for (dev_num = 0; dev_num <= SDW_MAX_DEVICES; dev_num++) {
+ status = (val >> (dev_num * SWRM_MCP_SLV_STATUS_SZ));
+
+ if ((status & SWRM_MCP_SLV_STATUS_MASK) == SDW_SLAVE_ALERT) {
+- ctrl->status[dev_num] = status;
++ ctrl->status[dev_num] = status & SWRM_MCP_SLV_STATUS_MASK;
+ return dev_num;
+ }
+ }
+@@ -411,7 +421,7 @@ static void qcom_swrm_get_device_status(struct qcom_swrm_ctrl *ctrl)
+ ctrl->reg_read(ctrl, SWRM_MCP_SLV_STATUS, &val);
+ ctrl->slave_status = val;
+
+- for (i = 0; i < SDW_MAX_DEVICES; i++) {
++ for (i = 0; i <= SDW_MAX_DEVICES; i++) {
+ u32 s;
+
+ s = (val >> (i * 2));
+@@ -451,6 +461,10 @@ static int qcom_swrm_enumerate(struct sdw_bus *bus)
+ char *buf1 = (char *)&val1, *buf2 = (char *)&val2;
+
+ for (i = 1; i <= SDW_MAX_DEVICES; i++) {
++ /* do not continue if the status is Not Present */
++ if (!ctrl->status[i])
++ continue;
++
+ /*SCP_Devid5 - Devid 4*/
+ ctrl->reg_read(ctrl, SWRM_ENUMERATOR_SLAVE_DEV_ID_1(i), &val1);
+
+@@ -627,7 +641,7 @@ static int qcom_swrm_init(struct qcom_swrm_ctrl *ctrl)
+
+ ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START);
+ /* Configure number of retries of a read/write cmd */
+- if (ctrl->version > 0x01050001) {
++ if (ctrl->version >= 0x01050001) {
+ /* Only for versions >= 1.5.1 */
+ ctrl->reg_write(ctrl, SWRM_CMD_FIFO_CFG_ADDR,
+ SWRM_RD_WR_CMD_RETRIES |
+@@ -1019,8 +1033,8 @@ static int qcom_swrm_startup(struct snd_pcm_substream *substream,
+ ctrl->sruntime[dai->id] = sruntime;
+
+ for_each_rtd_codec_dais(rtd, i, codec_dai) {
+- ret = snd_soc_dai_set_sdw_stream(codec_dai, sruntime,
+- substream->stream);
++ ret = snd_soc_dai_set_stream(codec_dai, sruntime,
++ substream->stream);
+ if (ret < 0 && ret != -ENOTSUPP) {
+ dev_err(dai->dev, "Failed to set sdw stream on %s\n",
+ codec_dai->name);
+@@ -1046,8 +1060,8 @@ static const struct snd_soc_dai_ops qcom_swrm_pdm_dai_ops = {
+ .hw_free = qcom_swrm_hw_free,
+ .startup = qcom_swrm_startup,
+ .shutdown = qcom_swrm_shutdown,
+- .set_sdw_stream = qcom_swrm_set_sdw_stream,
+- .get_sdw_stream = qcom_swrm_get_sdw_stream,
++ .set_stream = qcom_swrm_set_sdw_stream,
++ .get_stream = qcom_swrm_get_sdw_stream,
+ };
+
+ static const struct snd_soc_component_driver qcom_swrm_dai_component = {
+@@ -1129,6 +1143,9 @@ static int qcom_swrm_get_port_config(struct qcom_swrm_ctrl *ctrl)
+ ctrl->num_dout_ports = val;
+
+ nports = ctrl->num_dout_ports + ctrl->num_din_ports;
++ if (nports > QCOM_SDW_MAX_PORTS)
++ return -EINVAL;
++
+ /* Valid port numbers are from 1-14, so mask out port 0 explicitly */
+ set_bit(0, &ctrl->dout_port_mask);
+ set_bit(0, &ctrl->din_port_mask);
+diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c
+index 669d7573320b7..25e76b5d4a1a3 100644
+--- a/drivers/soundwire/slave.c
++++ b/drivers/soundwire/slave.c
+@@ -12,6 +12,7 @@ static void sdw_slave_release(struct device *dev)
+ {
+ struct sdw_slave *slave = dev_to_sdw_dev(dev);
+
++ mutex_destroy(&slave->sdw_dev_lock);
+ kfree(slave);
+ }
+
+@@ -58,9 +59,9 @@ int sdw_slave_add(struct sdw_bus *bus,
+ init_completion(&slave->enumeration_complete);
+ init_completion(&slave->initialization_complete);
+ slave->dev_num = 0;
+- init_completion(&slave->probe_complete);
+ slave->probed = false;
+ slave->first_interrupt_done = false;
++ mutex_init(&slave->sdw_dev_lock);
+
+ for (i = 0; i < SDW_MAX_PORTS; i++)
+ init_completion(&slave->port_ready[i]);
+diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
+index 5d4f6b308ef73..2a900aa302a3b 100644
+--- a/drivers/soundwire/stream.c
++++ b/drivers/soundwire/stream.c
+@@ -13,6 +13,7 @@
+ #include <linux/slab.h>
+ #include <linux/soundwire/sdw_registers.h>
+ #include <linux/soundwire/sdw.h>
++#include <linux/soundwire/sdw_type.h>
+ #include <sound/soc.h>
+ #include "bus.h"
+
+@@ -401,20 +402,26 @@ static int sdw_do_port_prep(struct sdw_slave_runtime *s_rt,
+ struct sdw_prepare_ch prep_ch,
+ enum sdw_port_prep_ops cmd)
+ {
+- const struct sdw_slave_ops *ops = s_rt->slave->ops;
+- int ret;
++ int ret = 0;
++ struct sdw_slave *slave = s_rt->slave;
+
+- if (ops->port_prep) {
+- ret = ops->port_prep(s_rt->slave, &prep_ch, cmd);
+- if (ret < 0) {
+- dev_err(&s_rt->slave->dev,
+- "Slave Port Prep cmd %d failed: %d\n",
+- cmd, ret);
+- return ret;
++ mutex_lock(&slave->sdw_dev_lock);
++
++ if (slave->probed) {
++ struct device *dev = &slave->dev;
++ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
++
++ if (drv->ops && drv->ops->port_prep) {
++ ret = drv->ops->port_prep(slave, &prep_ch, cmd);
++ if (ret < 0)
++ dev_err(dev, "Slave Port Prep cmd %d failed: %d\n",
++ cmd, ret);
+ }
+ }
+
+- return 0;
++ mutex_unlock(&slave->sdw_dev_lock);
++
++ return ret;
+ }
+
+ static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus,
+@@ -578,7 +585,7 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt)
+ struct sdw_slave_runtime *s_rt;
+ struct sdw_bus *bus = m_rt->bus;
+ struct sdw_slave *slave;
+- int ret = 0;
++ int ret;
+
+ if (bus->ops->set_bus_conf) {
+ ret = bus->ops->set_bus_conf(bus, &bus->params);
+@@ -589,17 +596,27 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt)
+ list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
+ slave = s_rt->slave;
+
+- if (slave->ops->bus_config) {
+- ret = slave->ops->bus_config(slave, &bus->params);
+- if (ret < 0) {
+- dev_err(bus->dev, "Notify Slave: %d failed\n",
+- slave->dev_num);
+- return ret;
++ mutex_lock(&slave->sdw_dev_lock);
++
++ if (slave->probed) {
++ struct device *dev = &slave->dev;
++ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
++
++ if (drv->ops && drv->ops->bus_config) {
++ ret = drv->ops->bus_config(slave, &bus->params);
++ if (ret < 0) {
++ dev_err(dev, "Notify Slave: %d failed\n",
++ slave->dev_num);
++ mutex_unlock(&slave->sdw_dev_lock);
++ return ret;
++ }
+ }
+ }
++
++ mutex_unlock(&slave->sdw_dev_lock);
+ }
+
+- return ret;
++ return 0;
+ }
+
+ /**
+@@ -1863,7 +1880,7 @@ static int set_stream(struct snd_pcm_substream *substream,
+
+ /* Set stream pointer on all DAIs */
+ for_each_rtd_dais(rtd, i, dai) {
+- ret = snd_soc_dai_set_sdw_stream(dai, sdw_stream, substream->stream);
++ ret = snd_soc_dai_set_stream(dai, sdw_stream, substream->stream);
+ if (ret < 0) {
+ dev_err(rtd->dev, "failed to set stream pointer on dai %s\n", dai->name);
+ break;
+@@ -1934,7 +1951,7 @@ void sdw_shutdown_stream(void *sdw_substream)
+ /* Find stream from first CPU DAI */
+ dai = asoc_rtd_to_cpu(rtd, 0);
+
+- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream);
++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream);
+
+ if (IS_ERR(sdw_stream)) {
+ dev_err(rtd->dev, "no stream found for DAI %s\n", dai->name);
+diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
+index 83e352b0c8f9a..4fc23236d3bd2 100644
+--- a/drivers/spi/Kconfig
++++ b/drivers/spi/Kconfig
+@@ -272,7 +272,6 @@ config SPI_DW_BT1
+ tristate "Baikal-T1 SPI driver for DW SPI core"
+ depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+ select MULTIPLEXER
+- select MUX_MMIO
+ help
+ Baikal-T1 SoC is equipped with three DW APB SSI-based MMIO SPI
+ controllers. Two of them are pretty much normal: with IRQ, DMA,
+diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
+index 95d4fa32c2995..938017a60c8ed 100644
+--- a/drivers/spi/atmel-quadspi.c
++++ b/drivers/spi/atmel-quadspi.c
+@@ -277,6 +277,9 @@ static int atmel_qspi_find_mode(const struct spi_mem_op *op)
+ static bool atmel_qspi_supports_op(struct spi_mem *mem,
+ const struct spi_mem_op *op)
+ {
++ if (!spi_mem_default_supports_op(mem, op))
++ return false;
++
+ if (atmel_qspi_find_mode(op) < 0)
+ return false;
+
+@@ -310,7 +313,7 @@ static int atmel_qspi_set_cfg(struct atmel_qspi *aq,
+ return mode;
+ ifr |= atmel_qspi_modes[mode].config;
+
+- if (op->dummy.buswidth && op->dummy.nbytes)
++ if (op->dummy.nbytes)
+ dummy_cycles = op->dummy.nbytes * 8 / op->dummy.buswidth;
+
+ /*
+diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c
+index ca40923258af3..596e181ae1368 100644
+--- a/drivers/spi/spi-altera-dfl.c
++++ b/drivers/spi/spi-altera-dfl.c
+@@ -128,9 +128,9 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev)
+ struct spi_master *master;
+ struct altera_spi *hw;
+ void __iomem *base;
+- int err = -ENODEV;
++ int err;
+
+- master = spi_alloc_master(dev, sizeof(struct altera_spi));
++ master = devm_spi_alloc_master(dev, sizeof(struct altera_spi));
+ if (!master)
+ return -ENOMEM;
+
+@@ -159,10 +159,9 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev)
+ altera_spi_init_master(master);
+
+ err = devm_spi_register_master(dev, master);
+- if (err) {
+- dev_err(dev, "%s failed to register spi master %d\n", __func__, err);
+- goto exit;
+- }
++ if (err)
++ return dev_err_probe(dev, err, "%s failed to register spi master\n",
++ __func__);
+
+ if (dfl_dev->revision == FME_FEATURE_REV_MAX10_SPI_N5010)
+ strscpy(board_info.modalias, "m10-n5010", SPI_NAME_SIZE);
+@@ -179,9 +178,6 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev)
+ }
+
+ return 0;
+-exit:
+- spi_master_put(master);
+- return err;
+ }
+
+ static const struct dfl_device_id dfl_spi_altera_ids[] = {
+diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
+index 3cf76096a76d8..39dbe9903da2c 100644
+--- a/drivers/spi/spi-amd.c
++++ b/drivers/spi/spi-amd.c
+@@ -28,6 +28,7 @@
+ #define AMD_SPI_RX_COUNT_REG 0x4B
+ #define AMD_SPI_STATUS_REG 0x4C
+
++#define AMD_SPI_FIFO_SIZE 70
+ #define AMD_SPI_MEM_SIZE 200
+
+ /* M_CMD OP codes for SPI */
+@@ -245,6 +246,11 @@ static int amd_spi_master_transfer(struct spi_master *master,
+ return 0;
+ }
+
++static size_t amd_spi_max_transfer_size(struct spi_device *spi)
++{
++ return AMD_SPI_FIFO_SIZE;
++}
++
+ static int amd_spi_probe(struct platform_device *pdev)
+ {
+ struct device *dev = &pdev->dev;
+@@ -275,6 +281,8 @@ static int amd_spi_probe(struct platform_device *pdev)
+ master->flags = SPI_MASTER_HALF_DUPLEX;
+ master->setup = amd_spi_master_setup;
+ master->transfer_one_message = amd_spi_master_transfer;
++ master->max_transfer_size = amd_spi_max_transfer_size;
++ master->max_message_size = amd_spi_max_transfer_size;
+
+ /* Register the controller with SPI framework */
+ err = devm_spi_register_master(dev, master);
+diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
+index 46feafe4e201c..d8cc4b270644a 100644
+--- a/drivers/spi/spi-armada-3700.c
++++ b/drivers/spi/spi-armada-3700.c
+@@ -901,7 +901,7 @@ static int a3700_spi_probe(struct platform_device *pdev)
+ return 0;
+
+ error_clk:
+- clk_disable_unprepare(spi->clk);
++ clk_unprepare(spi->clk);
+ error:
+ spi_master_put(master);
+ out:
+diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
+index 3043677ba2226..c79797c06cda1 100644
+--- a/drivers/spi/spi-bcm-qspi.c
++++ b/drivers/spi/spi-bcm-qspi.c
+@@ -395,7 +395,8 @@ static int bcm_qspi_bspi_set_flex_mode(struct bcm_qspi *qspi,
+ if (addrlen == BSPI_ADDRLEN_4BYTES)
+ bpp = BSPI_BPP_ADDR_SELECT_MASK;
+
+- bpp |= (op->dummy.nbytes * 8) / op->dummy.buswidth;
++ if (op->dummy.nbytes)
++ bpp |= (op->dummy.nbytes * 8) / op->dummy.buswidth;
+
+ switch (width) {
+ case SPI_NBITS_SINGLE:
+@@ -551,7 +552,7 @@ static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs)
+ u32 rd = 0;
+ u32 wr = 0;
+
+- if (qspi->base[CHIP_SELECT]) {
++ if (cs >= 0 && qspi->base[CHIP_SELECT]) {
+ rd = bcm_qspi_read(qspi, CHIP_SELECT, 0);
+ wr = (rd & ~0xff) | (1 << cs);
+ if (rd == wr)
+@@ -1032,7 +1033,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
+ addr = op->addr.val;
+ len = op->data.nbytes;
+
+- if (bcm_qspi_bspi_ver_three(qspi) == true) {
++ if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) {
+ /*
+ * The address coming into this function is a raw flash offset.
+ * But for BSPI <= V3, we need to convert it to a remapped BSPI
+@@ -1051,7 +1052,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
+ len < 4)
+ mspi_read = true;
+
+- if (mspi_read)
++ if (!has_bspi(qspi) || mspi_read)
+ return bcm_qspi_mspi_exec_mem_op(spi, op);
+
+ ret = bcm_qspi_bspi_set_mode(qspi, op, 0);
+@@ -1369,13 +1370,9 @@ int bcm_qspi_probe(struct platform_device *pdev,
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ "mspi");
+
+- if (res) {
+- qspi->base[MSPI] = devm_ioremap_resource(dev, res);
+- if (IS_ERR(qspi->base[MSPI]))
+- return PTR_ERR(qspi->base[MSPI]);
+- } else {
+- return 0;
+- }
++ qspi->base[MSPI] = devm_ioremap_resource(dev, res);
++ if (IS_ERR(qspi->base[MSPI]))
++ return PTR_ERR(qspi->base[MSPI]);
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi");
+ if (res) {
+@@ -1460,7 +1457,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
+ &qspi->dev_ids[val]);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "IRQ %s not found\n", name);
+- goto qspi_probe_err;
++ goto qspi_unprepare_err;
+ }
+
+ qspi->dev_ids[val].dev = qspi;
+@@ -1475,7 +1472,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
+ if (!num_ints) {
+ dev_err(&pdev->dev, "no IRQs registered, cannot init driver\n");
+ ret = -EINVAL;
+- goto qspi_probe_err;
++ goto qspi_unprepare_err;
+ }
+
+ bcm_qspi_hw_init(qspi);
+@@ -1499,6 +1496,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
+
+ qspi_reg_err:
+ bcm_qspi_hw_uninit(qspi);
++qspi_unprepare_err:
+ clk_disable_unprepare(qspi->clk);
+ qspi_probe_err:
+ kfree(qspi->dev_ids);
+diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
+index 775c0bf2f923d..0933948d7df3d 100644
+--- a/drivers/spi/spi-bcm2835.c
++++ b/drivers/spi/spi-bcm2835.c
+@@ -1138,10 +1138,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr,
+ struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+
+ /* if an error occurred and we have an active dma, then terminate */
+- dmaengine_terminate_sync(ctlr->dma_tx);
+- bs->tx_dma_active = false;
+- dmaengine_terminate_sync(ctlr->dma_rx);
+- bs->rx_dma_active = false;
++ if (ctlr->dma_tx) {
++ dmaengine_terminate_sync(ctlr->dma_tx);
++ bs->tx_dma_active = false;
++ }
++ if (ctlr->dma_rx) {
++ dmaengine_terminate_sync(ctlr->dma_rx);
++ bs->rx_dma_active = false;
++ }
+ bcm2835_spi_undo_prologue(bs);
+
+ /* and reset */
+diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
+index b871fd810d801..02f56fc001b47 100644
+--- a/drivers/spi/spi-bcm63xx-hsspi.c
++++ b/drivers/spi/spi-bcm63xx-hsspi.c
+@@ -163,6 +163,7 @@ static int bcm63xx_hsspi_do_txrx(struct spi_device *spi, struct spi_transfer *t)
+ int step_size = HSSPI_BUFFER_LEN;
+ const u8 *tx = t->tx_buf;
+ u8 *rx = t->rx_buf;
++ u32 val = 0;
+
+ bcm63xx_hsspi_set_clk(bs, spi, t->speed_hz);
+ bcm63xx_hsspi_set_cs(bs, spi->chip_select, true);
+@@ -178,11 +179,16 @@ static int bcm63xx_hsspi_do_txrx(struct spi_device *spi, struct spi_transfer *t)
+ step_size -= HSSPI_OPCODE_LEN;
+
+ if ((opcode == HSSPI_OP_READ && t->rx_nbits == SPI_NBITS_DUAL) ||
+- (opcode == HSSPI_OP_WRITE && t->tx_nbits == SPI_NBITS_DUAL))
++ (opcode == HSSPI_OP_WRITE && t->tx_nbits == SPI_NBITS_DUAL)) {
+ opcode |= HSSPI_OP_MULTIBIT;
+
+- __raw_writel(1 << MODE_CTRL_MULTIDATA_WR_SIZE_SHIFT |
+- 1 << MODE_CTRL_MULTIDATA_RD_SIZE_SHIFT | 0xff,
++ if (t->rx_nbits == SPI_NBITS_DUAL)
++ val |= 1 << MODE_CTRL_MULTIDATA_RD_SIZE_SHIFT;
++ if (t->tx_nbits == SPI_NBITS_DUAL)
++ val |= 1 << MODE_CTRL_MULTIDATA_WR_SIZE_SHIFT;
++ }
++
++ __raw_writel(val | 0xff,
+ bs->regs + HSSPI_PROFILE_MODE_CTRL_REG(chip_select));
+
+ while (pending > 0) {
+diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
+index 80fa0ef8909ca..147199002df1e 100644
+--- a/drivers/spi/spi-bcm63xx.c
++++ b/drivers/spi/spi-bcm63xx.c
+@@ -126,7 +126,7 @@ enum bcm63xx_regs_spi {
+ SPI_MSG_DATA_SIZE,
+ };
+
+-#define BCM63XX_SPI_MAX_PREPEND 15
++#define BCM63XX_SPI_MAX_PREPEND 7
+
+ #define BCM63XX_SPI_MAX_CS 8
+ #define BCM63XX_SPI_BUS_NUM 0
+diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
+index 101cc71bffa75..5c8f198b0ae38 100644
+--- a/drivers/spi/spi-cadence-quadspi.c
++++ b/drivers/spi/spi-cadence-quadspi.c
+@@ -18,6 +18,7 @@
+ #include <linux/iopoll.h>
+ #include <linux/jiffies.h>
+ #include <linux/kernel.h>
++#include <linux/log2.h>
+ #include <linux/module.h>
+ #include <linux/of_device.h>
+ #include <linux/of.h>
+@@ -35,6 +36,7 @@
+ /* Quirks */
+ #define CQSPI_NEEDS_WR_DELAY BIT(0)
+ #define CQSPI_DISABLE_DAC_MODE BIT(1)
++#define CQSPI_NO_SUPPORT_WR_COMPLETION BIT(3)
+
+ /* Capabilities */
+ #define CQSPI_SUPPORTS_OCTAL BIT(0)
+@@ -82,6 +84,7 @@ struct cqspi_st {
+ u32 wr_delay;
+ bool use_direct_mode;
+ struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT];
++ bool wr_completion;
+ };
+
+ struct cqspi_driver_platdata {
+@@ -93,12 +96,6 @@ struct cqspi_driver_platdata {
+ #define CQSPI_TIMEOUT_MS 500
+ #define CQSPI_READ_TIMEOUT_MS 10
+
+-/* Instruction type */
+-#define CQSPI_INST_TYPE_SINGLE 0
+-#define CQSPI_INST_TYPE_DUAL 1
+-#define CQSPI_INST_TYPE_QUAD 2
+-#define CQSPI_INST_TYPE_OCTAL 3
+-
+ #define CQSPI_DUMMY_CLKS_PER_BYTE 8
+ #define CQSPI_DUMMY_BYTES_MAX 4
+ #define CQSPI_DUMMY_CLKS_MAX 31
+@@ -322,10 +319,6 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr)
+ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
+ const struct spi_mem_op *op)
+ {
+- f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE;
+- f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE;
+- f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
+-
+ /*
+ * For an op to be DTR, cmd phase along with every other non-empty
+ * phase should have dtr field set to 1. If an op phase has zero
+@@ -335,32 +328,23 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
+ (!op->addr.nbytes || op->addr.dtr) &&
+ (!op->data.nbytes || op->data.dtr);
+
+- switch (op->data.buswidth) {
+- case 0:
+- break;
+- case 1:
+- f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
+- break;
+- case 2:
+- f_pdata->data_width = CQSPI_INST_TYPE_DUAL;
+- break;
+- case 4:
+- f_pdata->data_width = CQSPI_INST_TYPE_QUAD;
+- break;
+- case 8:
+- f_pdata->data_width = CQSPI_INST_TYPE_OCTAL;
+- break;
+- default:
+- return -EINVAL;
+- }
++ f_pdata->inst_width = 0;
++ if (op->cmd.buswidth)
++ f_pdata->inst_width = ilog2(op->cmd.buswidth);
++
++ f_pdata->addr_width = 0;
++ if (op->addr.buswidth)
++ f_pdata->addr_width = ilog2(op->addr.buswidth);
++
++ f_pdata->data_width = 0;
++ if (op->data.buswidth)
++ f_pdata->data_width = ilog2(op->data.buswidth);
+
+ /* Right now we only support 8-8-8 DTR mode. */
+ if (f_pdata->dtr) {
+ switch (op->cmd.buswidth) {
+ case 0:
+- break;
+ case 8:
+- f_pdata->inst_width = CQSPI_INST_TYPE_OCTAL;
+ break;
+ default:
+ return -EINVAL;
+@@ -368,9 +352,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
+
+ switch (op->addr.buswidth) {
+ case 0:
+- break;
+ case 8:
+- f_pdata->addr_width = CQSPI_INST_TYPE_OCTAL;
+ break;
+ default:
+ return -EINVAL;
+@@ -378,9 +360,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
+
+ switch (op->data.buswidth) {
+ case 0:
+- break;
+ case 8:
+- f_pdata->data_width = CQSPI_INST_TYPE_OCTAL;
+ break;
+ default:
+ return -EINVAL;
+@@ -819,9 +799,11 @@ static int cqspi_write_setup(struct cqspi_flash_pdata *f_pdata,
+ * polling on the controller's side. spinand and spi-nor will take
+ * care of polling the status register.
+ */
+- reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
+- reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL;
+- writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
++ if (cqspi->wr_completion) {
++ reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
++ reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL;
++ writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
++ }
+
+ reg = readl(reg_base + CQSPI_REG_SIZE);
+ reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
+@@ -1248,9 +1230,24 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem,
+ all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr &&
+ !op->data.dtr;
+
+- /* Mixed DTR modes not supported. */
+- if (!(all_true || all_false))
++ if (all_true) {
++ /* Right now we only support 8-8-8 DTR mode. */
++ if (op->cmd.nbytes && op->cmd.buswidth != 8)
++ return false;
++ if (op->addr.nbytes && op->addr.buswidth != 8)
++ return false;
++ if (op->data.nbytes && op->data.buswidth != 8)
++ return false;
++ } else if (all_false) {
++ /* Only 1-1-X ops are supported without DTR */
++ if (op->cmd.nbytes && op->cmd.buswidth > 1)
++ return false;
++ if (op->addr.nbytes && op->addr.buswidth > 1)
++ return false;
++ } else {
++ /* Mixed DTR modes are not supported. */
+ return false;
++ }
+
+ if (all_true)
+ return spi_mem_dtr_supports_op(mem, op);
+@@ -1539,6 +1536,10 @@ static int cqspi_probe(struct platform_device *pdev)
+
+ cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk);
+ master->max_speed_hz = cqspi->master_ref_clk_hz;
++
++ /* write completion is supported by default */
++ cqspi->wr_completion = true;
++
+ ddata = of_device_get_match_data(dev);
+ if (ddata) {
+ if (ddata->quirks & CQSPI_NEEDS_WR_DELAY)
+@@ -1548,6 +1549,8 @@ static int cqspi_probe(struct platform_device *pdev)
+ master->mode_bits |= SPI_RX_OCTAL | SPI_TX_OCTAL;
+ if (!(ddata->quirks & CQSPI_DISABLE_DAC_MODE))
+ cqspi->use_direct_mode = true;
++ if (ddata->quirks & CQSPI_NO_SUPPORT_WR_COMPLETION)
++ cqspi->wr_completion = false;
+ }
+
+ ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0,
+@@ -1616,17 +1619,30 @@ static int cqspi_remove(struct platform_device *pdev)
+ static int cqspi_suspend(struct device *dev)
+ {
+ struct cqspi_st *cqspi = dev_get_drvdata(dev);
++ struct spi_master *master = dev_get_drvdata(dev);
++ int ret;
+
++ ret = spi_master_suspend(master);
+ cqspi_controller_enable(cqspi, 0);
+- return 0;
++
++ clk_disable_unprepare(cqspi->clk);
++
++ return ret;
+ }
+
+ static int cqspi_resume(struct device *dev)
+ {
+ struct cqspi_st *cqspi = dev_get_drvdata(dev);
++ struct spi_master *master = dev_get_drvdata(dev);
+
+- cqspi_controller_enable(cqspi, 1);
+- return 0;
++ clk_prepare_enable(cqspi->clk);
++ cqspi_wait_idle(cqspi);
++ cqspi_controller_init(cqspi);
++
++ cqspi->current_cs = -1;
++ cqspi->sclk = 0;
++
++ return spi_master_resume(master);
+ }
+
+ static const struct dev_pm_ops cqspi__dev_pm_ops = {
+@@ -1656,6 +1672,10 @@ static const struct cqspi_driver_platdata intel_lgm_qspi = {
+ .quirks = CQSPI_DISABLE_DAC_MODE,
+ };
+
++static const struct cqspi_driver_platdata socfpga_qspi = {
++ .quirks = CQSPI_DISABLE_DAC_MODE | CQSPI_NO_SUPPORT_WR_COMPLETION,
++};
++
+ static const struct of_device_id cqspi_dt_ids[] = {
+ {
+ .compatible = "cdns,qspi-nor",
+@@ -1673,6 +1693,10 @@ static const struct of_device_id cqspi_dt_ids[] = {
+ .compatible = "intel,lgm-qspi",
+ .data = &intel_lgm_qspi,
+ },
++ {
++ .compatible = "intel,socfpga-qspi",
++ .data = (void *)&socfpga_qspi,
++ },
+ { /* end of table */ }
+ };
+
+diff --git a/drivers/spi/spi-dw-bt1.c b/drivers/spi/spi-dw-bt1.c
+index 5be6b7b80c21b..7e3ff54f6616c 100644
+--- a/drivers/spi/spi-dw-bt1.c
++++ b/drivers/spi/spi-dw-bt1.c
+@@ -293,8 +293,10 @@ static int dw_spi_bt1_probe(struct platform_device *pdev)
+ pm_runtime_enable(&pdev->dev);
+
+ ret = dw_spi_add_host(&pdev->dev, dws);
+- if (ret)
++ if (ret) {
++ pm_runtime_disable(&pdev->dev);
+ goto err_disable_clk;
++ }
+
+ platform_set_drvdata(pdev, dwsbt1);
+
+diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c
+index a305074c482e8..8fc598f09a728 100644
+--- a/drivers/spi/spi-dw-core.c
++++ b/drivers/spi/spi-dw-core.c
+@@ -357,7 +357,7 @@ static void dw_spi_irq_setup(struct dw_spi *dws)
+ * will be adjusted at the final stage of the IRQ-based SPI transfer
+ * execution so not to lose the leftover of the incoming data.
+ */
+- level = min_t(u16, dws->fifo_len / 2, dws->tx_len);
++ level = min_t(unsigned int, dws->fifo_len / 2, dws->tx_len);
+ dw_writel(dws, DW_SPI_TXFTLR, level);
+ dw_writel(dws, DW_SPI_RXFTLR, level - 1);
+
+@@ -416,7 +416,10 @@ static int dw_spi_transfer_one(struct spi_controller *master,
+ int ret;
+
+ dws->dma_mapped = 0;
+- dws->n_bytes = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
++ dws->n_bytes =
++ roundup_pow_of_two(DIV_ROUND_UP(transfer->bits_per_word,
++ BITS_PER_BYTE));
++
+ dws->tx = (void *)transfer->tx_buf;
+ dws->tx_len = transfer->len / dws->n_bytes;
+ dws->rx = transfer->rx_buf;
+diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c
+index a09831c62192a..32ac8f9068e87 100644
+--- a/drivers/spi/spi-dw-dma.c
++++ b/drivers/spi/spi-dw-dma.c
+@@ -127,12 +127,15 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws)
+
+ dw_spi_dma_sg_burst_init(dws);
+
++ pci_dev_put(dma_dev);
++
+ return 0;
+
+ free_rxchan:
+ dma_release_channel(dws->rxchan);
+ dws->rxchan = NULL;
+ err_exit:
++ pci_dev_put(dma_dev);
+ return -EBUSY;
+ }
+
+diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
+index ee905880769e6..7832ce330b29d 100644
+--- a/drivers/spi/spi-fsl-cpm.c
++++ b/drivers/spi/spi-fsl-cpm.c
+@@ -21,6 +21,7 @@
+ #include <linux/spi/spi.h>
+ #include <linux/types.h>
+ #include <linux/platform_device.h>
++#include <linux/byteorder/generic.h>
+
+ #include "spi-fsl-cpm.h"
+ #include "spi-fsl-lib.h"
+@@ -120,6 +121,21 @@ int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
+ mspi->rx_dma = mspi->dma_dummy_rx;
+ mspi->map_rx_dma = 0;
+ }
++ if (t->bits_per_word == 16 && t->tx_buf) {
++ const u16 *src = t->tx_buf;
++ u16 *dst;
++ int i;
++
++ dst = kmalloc(t->len, GFP_KERNEL);
++ if (!dst)
++ return -ENOMEM;
++
++ for (i = 0; i < t->len >> 1; i++)
++ dst[i] = cpu_to_le16p(src + i);
++
++ mspi->tx = dst;
++ mspi->map_tx_dma = 1;
++ }
+
+ if (mspi->map_tx_dma) {
+ void *nonconst_tx = (void *)mspi->tx; /* shut up gcc */
+@@ -173,6 +189,13 @@ void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi)
+ if (mspi->map_rx_dma)
+ dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE);
+ mspi->xfer_in_progress = NULL;
++
++ if (t->bits_per_word == 16 && t->rx_buf) {
++ int i;
++
++ for (i = 0; i < t->len; i += 2)
++ le16_to_cpus(t->rx_buf + i);
++ }
+ }
+ EXPORT_SYMBOL_GPL(fsl_spi_cpm_bufs_complete);
+
+diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
+index fd004c9db9dc0..0d9201a2999de 100644
+--- a/drivers/spi/spi-fsl-dspi.c
++++ b/drivers/spi/spi-fsl-dspi.c
+@@ -975,7 +975,9 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
+ static int dspi_setup(struct spi_device *spi)
+ {
+ struct fsl_dspi *dspi = spi_controller_get_devdata(spi->controller);
++ u32 period_ns = DIV_ROUND_UP(NSEC_PER_SEC, spi->max_speed_hz);
+ unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0;
++ u32 quarter_period_ns = DIV_ROUND_UP(period_ns, 4);
+ u32 cs_sck_delay = 0, sck_cs_delay = 0;
+ struct fsl_dspi_platform_data *pdata;
+ unsigned char pasc = 0, asc = 0;
+@@ -1003,6 +1005,19 @@ static int dspi_setup(struct spi_device *spi)
+ sck_cs_delay = pdata->sck_cs_delay;
+ }
+
++ /* Since tCSC and tASC apply to continuous transfers too, avoid SCK
++ * glitches of half a cycle by never allowing tCSC + tASC to go below
++ * half a SCK period.
++ */
++ if (cs_sck_delay < quarter_period_ns)
++ cs_sck_delay = quarter_period_ns;
++ if (sck_cs_delay < quarter_period_ns)
++ sck_cs_delay = quarter_period_ns;
++
++ dev_dbg(&spi->dev,
++ "DSPI controller timing params: CS-to-SCK delay %u ns, SCK-to-CS delay %u ns\n",
++ cs_sck_delay, sck_cs_delay);
++
+ clkrate = clk_get_rate(dspi->clk);
+ hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate);
+
+diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
+index 5d98611dd999d..c5ff6e8c45be0 100644
+--- a/drivers/spi/spi-fsl-lpspi.c
++++ b/drivers/spi/spi-fsl-lpspi.c
+@@ -906,9 +906,14 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
+ ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller);
+ if (ret == -EPROBE_DEFER)
+ goto out_pm_get;
+-
+ if (ret < 0)
+ dev_err(&pdev->dev, "dma setup error %d, use pio\n", ret);
++ else
++ /*
++ * disable LPSPI module IRQ when enable DMA mode successfully,
++ * to prevent the unexpected LPSPI module IRQ events.
++ */
++ disable_irq(irq);
+
+ ret = devm_spi_register_controller(&pdev->dev, controller);
+ if (ret < 0) {
+diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
+index 9851551ebbe05..46ae46a944c5c 100644
+--- a/drivers/spi/spi-fsl-qspi.c
++++ b/drivers/spi/spi-fsl-qspi.c
+@@ -876,6 +876,10 @@ static int fsl_qspi_probe(struct platform_device *pdev)
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ "QuadSPI-memory");
++ if (!res) {
++ ret = -EINVAL;
++ goto err_put_ctrl;
++ }
+ q->memmap_phy = res->start;
+ /* Since there are 4 cs, map size required is 4 times ahb_buf_size */
+ q->ahb_addr = devm_ioremap(dev, q->memmap_phy,
+diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
+index bdf94cc7be1af..63302e21e574c 100644
+--- a/drivers/spi/spi-fsl-spi.c
++++ b/drivers/spi/spi-fsl-spi.c
+@@ -203,24 +203,6 @@ static int mspi_apply_cpu_mode_quirks(struct spi_mpc8xxx_cs *cs,
+ return bits_per_word;
+ }
+
+-static int mspi_apply_qe_mode_quirks(struct spi_mpc8xxx_cs *cs,
+- struct spi_device *spi,
+- int bits_per_word)
+-{
+- /* QE uses Little Endian for words > 8
+- * so transform all words > 8 into 8 bits
+- * Unfortnatly that doesn't work for LSB so
+- * reject these for now */
+- /* Note: 32 bits word, LSB works iff
+- * tfcr/rfcr is set to CPMFCR_GBL */
+- if (spi->mode & SPI_LSB_FIRST &&
+- bits_per_word > 8)
+- return -EINVAL;
+- if (bits_per_word > 8)
+- return 8; /* pretend its 8 bits */
+- return bits_per_word;
+-}
+-
+ static int fsl_spi_setup_transfer(struct spi_device *spi,
+ struct spi_transfer *t)
+ {
+@@ -248,9 +230,6 @@ static int fsl_spi_setup_transfer(struct spi_device *spi,
+ bits_per_word = mspi_apply_cpu_mode_quirks(cs, spi,
+ mpc8xxx_spi,
+ bits_per_word);
+- else if (mpc8xxx_spi->flags & SPI_QE)
+- bits_per_word = mspi_apply_qe_mode_quirks(cs, spi,
+- bits_per_word);
+
+ if (bits_per_word < 0)
+ return bits_per_word;
+@@ -368,14 +347,30 @@ static int fsl_spi_do_one_msg(struct spi_master *master,
+ * In CPU mode, optimize large byte transfers to use larger
+ * bits_per_word values to reduce number of interrupts taken.
+ */
+- if (!(mpc8xxx_spi->flags & SPI_CPM_MODE)) {
+- list_for_each_entry(t, &m->transfers, transfer_list) {
++ list_for_each_entry(t, &m->transfers, transfer_list) {
++ if (!(mpc8xxx_spi->flags & SPI_CPM_MODE)) {
+ if (t->len < 256 || t->bits_per_word != 8)
+ continue;
+ if ((t->len & 3) == 0)
+ t->bits_per_word = 32;
+ else if ((t->len & 1) == 0)
+ t->bits_per_word = 16;
++ } else {
++ /*
++ * CPM/QE uses Little Endian for words > 8
++ * so transform 16 and 32 bits words into 8 bits
++ * Unfortnatly that doesn't work for LSB so
++ * reject these for now
++ * Note: 32 bits word, LSB works iff
++ * tfcr/rfcr is set to CPMFCR_GBL
++ */
++ if (m->spi->mode & SPI_LSB_FIRST && t->bits_per_word > 8)
++ return -EINVAL;
++ if (t->bits_per_word == 16 || t->bits_per_word == 32)
++ t->bits_per_word = 8; /* pretend its 8 bits */
++ if (t->bits_per_word == 8 && t->len >= 256 &&
++ (mpc8xxx_spi->flags & SPI_CPM1))
++ t->bits_per_word = 16;
+ }
+ }
+
+@@ -633,8 +628,14 @@ static struct spi_master *fsl_spi_probe(struct device *dev,
+ if (mpc8xxx_spi->type == TYPE_GRLIB)
+ fsl_spi_grlib_probe(dev);
+
+- master->bits_per_word_mask =
+- (SPI_BPW_RANGE_MASK(4, 16) | SPI_BPW_MASK(32)) &
++ if (mpc8xxx_spi->flags & SPI_CPM_MODE)
++ master->bits_per_word_mask =
++ (SPI_BPW_RANGE_MASK(4, 8) | SPI_BPW_MASK(16) | SPI_BPW_MASK(32));
++ else
++ master->bits_per_word_mask =
++ (SPI_BPW_RANGE_MASK(4, 16) | SPI_BPW_MASK(32));
++
++ master->bits_per_word_mask &=
+ SPI_BPW_RANGE_MASK(1, mpc8xxx_spi->max_bits_per_word);
+
+ if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE)
+diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
+index 2f51421e2a718..4b92f119955c7 100644
+--- a/drivers/spi/spi-geni-qcom.c
++++ b/drivers/spi/spi-geni-qcom.c
+@@ -32,7 +32,7 @@
+ #define CS_DEMUX_OUTPUT_SEL GENMASK(3, 0)
+
+ #define SE_SPI_TRANS_CFG 0x25c
+-#define CS_TOGGLE BIT(0)
++#define CS_TOGGLE BIT(1)
+
+ #define SE_SPI_WORD_LEN 0x268
+ #define WORD_LEN_MSK GENMASK(9, 0)
+diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
+index 0584f4d2fde29..3ffdab6caac2a 100644
+--- a/drivers/spi/spi-gpio.c
++++ b/drivers/spi/spi-gpio.c
+@@ -244,9 +244,19 @@ static int spi_gpio_set_direction(struct spi_device *spi, bool output)
+ if (output)
+ return gpiod_direction_output(spi_gpio->mosi, 1);
+
+- ret = gpiod_direction_input(spi_gpio->mosi);
+- if (ret)
+- return ret;
++ /*
++ * Only change MOSI to an input if using 3WIRE mode.
++ * Otherwise, MOSI could be left floating if there is
++ * no pull resistor connected to the I/O pin, or could
++ * be left logic high if there is a pull-up. Transmitting
++ * logic high when only clocking MISO data in can put some
++ * SPI devices in to a bad state.
++ */
++ if (spi->mode & SPI_3WIRE) {
++ ret = gpiod_direction_input(spi_gpio->mosi);
++ if (ret)
++ return ret;
++ }
+ /*
+ * Send a turnaround high impedance cycle when switching
+ * from output to input. Theoretically there should be
+diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c
+index 58b823a16fc4d..525cc0143a305 100644
+--- a/drivers/spi/spi-hisi-kunpeng.c
++++ b/drivers/spi/spi-hisi-kunpeng.c
+@@ -127,7 +127,6 @@ struct hisi_spi {
+ void __iomem *regs;
+ int irq;
+ u32 fifo_len; /* depth of the FIFO buffer */
+- u16 bus_num;
+
+ /* Current message transfer state info */
+ const void *tx;
+@@ -165,7 +164,10 @@ static int hisi_spi_debugfs_init(struct hisi_spi *hs)
+ {
+ char name[32];
+
+- snprintf(name, 32, "hisi_spi%d", hs->bus_num);
++ struct spi_controller *master;
++
++ master = container_of(hs->dev, struct spi_controller, dev);
++ snprintf(name, 32, "hisi_spi%d", master->bus_num);
+ hs->debugfs = debugfs_create_dir(name, NULL);
+ if (!hs->debugfs)
+ return -ENOMEM;
+@@ -467,7 +469,6 @@ static int hisi_spi_probe(struct platform_device *pdev)
+ hs = spi_controller_get_devdata(master);
+ hs->dev = dev;
+ hs->irq = irq;
+- hs->bus_num = pdev->id;
+
+ hs->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(hs->regs))
+@@ -490,7 +491,7 @@ static int hisi_spi_probe(struct platform_device *pdev)
+ master->use_gpio_descriptors = true;
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
+ master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+- master->bus_num = hs->bus_num;
++ master->bus_num = pdev->id;
+ master->setup = hisi_spi_setup;
+ master->cleanup = hisi_spi_cleanup;
+ master->transfer_one = hisi_spi_transfer_one;
+@@ -506,15 +507,15 @@ static int hisi_spi_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- if (hisi_spi_debugfs_init(hs))
+- dev_info(dev, "failed to create debugfs dir\n");
+-
+ ret = spi_register_controller(master);
+ if (ret) {
+ dev_err(dev, "failed to register spi master, ret=%d\n", ret);
+ return ret;
+ }
+
++ if (hisi_spi_debugfs_init(hs))
++ dev_info(dev, "failed to create debugfs dir\n");
++
+ dev_info(dev, "hw version:0x%x max-freq:%u kHz\n",
+ readl(hs->regs + HISI_SPI_VERSION),
+ master->max_speed_hz / 1000);
+diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
+index 5f05d519fbbd0..71376b6df89db 100644
+--- a/drivers/spi/spi-img-spfi.c
++++ b/drivers/spi/spi-img-spfi.c
+@@ -731,7 +731,7 @@ static int img_spfi_resume(struct device *dev)
+ int ret;
+
+ ret = pm_runtime_get_sync(dev);
+- if (ret) {
++ if (ret < 0) {
+ pm_runtime_put_noidle(dev);
+ return ret;
+ }
+diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
+index b2dd0a4d24462..f201653931d89 100644
+--- a/drivers/spi/spi-imx.c
++++ b/drivers/spi/spi-imx.c
+@@ -247,6 +247,18 @@ static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
+ return true;
+ }
+
++/*
++ * Note the number of natively supported chip selects for MX51 is 4. Some
++ * devices may have less actual SS pins but the register map supports 4. When
++ * using gpio chip selects the cs values passed into the macros below can go
++ * outside the range 0 - 3. We therefore need to limit the cs value to avoid
++ * corrupting bits outside the allocated locations.
++ *
++ * The simplest way to do this is to just mask the cs bits to 2 bits. This
++ * still allows all 4 native chip selects to work as well as gpio chip selects
++ * (which can use any of the 4 chip select configurations).
++ */
++
+ #define MX51_ECSPI_CTRL 0x08
+ #define MX51_ECSPI_CTRL_ENABLE (1 << 0)
+ #define MX51_ECSPI_CTRL_XCH (1 << 2)
+@@ -255,16 +267,16 @@ static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
+ #define MX51_ECSPI_CTRL_DRCTL(drctl) ((drctl) << 16)
+ #define MX51_ECSPI_CTRL_POSTDIV_OFFSET 8
+ #define MX51_ECSPI_CTRL_PREDIV_OFFSET 12
+-#define MX51_ECSPI_CTRL_CS(cs) ((cs) << 18)
++#define MX51_ECSPI_CTRL_CS(cs) ((cs & 3) << 18)
+ #define MX51_ECSPI_CTRL_BL_OFFSET 20
+ #define MX51_ECSPI_CTRL_BL_MASK (0xfff << 20)
+
+ #define MX51_ECSPI_CONFIG 0x0c
+-#define MX51_ECSPI_CONFIG_SCLKPHA(cs) (1 << ((cs) + 0))
+-#define MX51_ECSPI_CONFIG_SCLKPOL(cs) (1 << ((cs) + 4))
+-#define MX51_ECSPI_CONFIG_SBBCTRL(cs) (1 << ((cs) + 8))
+-#define MX51_ECSPI_CONFIG_SSBPOL(cs) (1 << ((cs) + 12))
+-#define MX51_ECSPI_CONFIG_SCLKCTL(cs) (1 << ((cs) + 20))
++#define MX51_ECSPI_CONFIG_SCLKPHA(cs) (1 << ((cs & 3) + 0))
++#define MX51_ECSPI_CONFIG_SCLKPOL(cs) (1 << ((cs & 3) + 4))
++#define MX51_ECSPI_CONFIG_SBBCTRL(cs) (1 << ((cs & 3) + 8))
++#define MX51_ECSPI_CONFIG_SSBPOL(cs) (1 << ((cs & 3) + 12))
++#define MX51_ECSPI_CONFIG_SCLKCTL(cs) (1 << ((cs & 3) + 20))
+
+ #define MX51_ECSPI_INT 0x10
+ #define MX51_ECSPI_INT_TEEN (1 << 0)
+@@ -439,8 +451,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx,
+ unsigned int pre, post;
+ unsigned int fin = spi_imx->spi_clk;
+
+- if (unlikely(fspi > fin))
+- return 0;
++ fspi = min(fspi, fin);
+
+ post = fls(fin) - fls(fspi);
+ if (fin > fspi << post)
+@@ -1553,9 +1564,8 @@ spi_imx_prepare_message(struct spi_master *master, struct spi_message *msg)
+ struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+ int ret;
+
+- ret = pm_runtime_get_sync(spi_imx->dev);
++ ret = pm_runtime_resume_and_get(spi_imx->dev);
+ if (ret < 0) {
+- pm_runtime_put_noidle(spi_imx->dev);
+ dev_err(spi_imx->dev, "failed to enable clock\n");
+ return ret;
+ }
+@@ -1772,13 +1782,10 @@ static int spi_imx_remove(struct platform_device *pdev)
+ spi_bitbang_stop(&spi_imx->bitbang);
+
+ ret = pm_runtime_get_sync(spi_imx->dev);
+- if (ret < 0) {
+- pm_runtime_put_noidle(spi_imx->dev);
+- dev_err(spi_imx->dev, "failed to enable clock\n");
+- return ret;
+- }
+-
+- writel(0, spi_imx->base + MXC_CSPICTRL);
++ if (ret >= 0)
++ writel(0, spi_imx->base + MXC_CSPICTRL);
++ else
++ dev_warn(spi_imx->dev, "failed to enable clock, skip hw disable\n");
+
+ pm_runtime_dont_use_autosuspend(spi_imx->dev);
+ pm_runtime_put_sync(spi_imx->dev);
+diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
+index c208efeadd184..6974a1c947aad 100644
+--- a/drivers/spi/spi-meson-spicc.c
++++ b/drivers/spi/spi-meson-spicc.c
+@@ -156,6 +156,7 @@ struct meson_spicc_device {
+ void __iomem *base;
+ struct clk *core;
+ struct clk *pclk;
++ struct clk_divider pow2_div;
+ struct clk *clk;
+ struct spi_message *message;
+ struct spi_transfer *xfer;
+@@ -168,6 +169,8 @@ struct meson_spicc_device {
+ unsigned long xfer_remain;
+ };
+
++#define pow2_clk_to_spicc(_div) container_of(_div, struct meson_spicc_device, pow2_div)
++
+ static void meson_spicc_oen_enable(struct meson_spicc_device *spicc)
+ {
+ u32 conf;
+@@ -421,7 +424,7 @@ static int meson_spicc_prepare_message(struct spi_master *master,
+ {
+ struct meson_spicc_device *spicc = spi_master_get_devdata(master);
+ struct spi_device *spi = message->spi;
+- u32 conf = 0;
++ u32 conf = readl_relaxed(spicc->base + SPICC_CONREG) & SPICC_DATARATE_MASK;
+
+ /* Store current message */
+ spicc->message = message;
+@@ -458,8 +461,6 @@ static int meson_spicc_prepare_message(struct spi_master *master,
+ /* Select CS */
+ conf |= FIELD_PREP(SPICC_CS_MASK, spi->chip_select);
+
+- /* Default Clock rate core/4 */
+-
+ /* Default 8bit word */
+ conf |= FIELD_PREP(SPICC_BITLENGTH_MASK, 8 - 1);
+
+@@ -476,12 +477,16 @@ static int meson_spicc_prepare_message(struct spi_master *master,
+ static int meson_spicc_unprepare_transfer(struct spi_master *master)
+ {
+ struct meson_spicc_device *spicc = spi_master_get_devdata(master);
++ u32 conf = readl_relaxed(spicc->base + SPICC_CONREG) & SPICC_DATARATE_MASK;
+
+ /* Disable all IRQs */
+ writel(0, spicc->base + SPICC_INTREG);
+
+ device_reset_optional(&spicc->pdev->dev);
+
++ /* Set default configuration, keeping datarate field */
++ writel_relaxed(conf, spicc->base + SPICC_CONREG);
++
+ return 0;
+ }
+
+@@ -518,14 +523,60 @@ static void meson_spicc_cleanup(struct spi_device *spi)
+ * Clk path for G12A series:
+ * pclk -> pow2 fixed div -> pow2 div -> mux -> out
+ * pclk -> enh fixed div -> enh div -> mux -> out
++ *
++ * The pow2 divider is tied to the controller HW state, and the
++ * divider is only valid when the controller is initialized.
++ *
++ * A set of clock ops is added to make sure we don't read/set this
++ * clock rate while the controller is in an unknown state.
+ */
+
+-static int meson_spicc_clk_init(struct meson_spicc_device *spicc)
++static unsigned long meson_spicc_pow2_recalc_rate(struct clk_hw *hw,
++ unsigned long parent_rate)
++{
++ struct clk_divider *divider = to_clk_divider(hw);
++ struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider);
++
++ if (!spicc->master->cur_msg)
++ return 0;
++
++ return clk_divider_ops.recalc_rate(hw, parent_rate);
++}
++
++static int meson_spicc_pow2_determine_rate(struct clk_hw *hw,
++ struct clk_rate_request *req)
++{
++ struct clk_divider *divider = to_clk_divider(hw);
++ struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider);
++
++ if (!spicc->master->cur_msg)
++ return -EINVAL;
++
++ return clk_divider_ops.determine_rate(hw, req);
++}
++
++static int meson_spicc_pow2_set_rate(struct clk_hw *hw, unsigned long rate,
++ unsigned long parent_rate)
++{
++ struct clk_divider *divider = to_clk_divider(hw);
++ struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider);
++
++ if (!spicc->master->cur_msg)
++ return -EINVAL;
++
++ return clk_divider_ops.set_rate(hw, rate, parent_rate);
++}
++
++const struct clk_ops meson_spicc_pow2_clk_ops = {
++ .recalc_rate = meson_spicc_pow2_recalc_rate,
++ .determine_rate = meson_spicc_pow2_determine_rate,
++ .set_rate = meson_spicc_pow2_set_rate,
++};
++
++static int meson_spicc_pow2_clk_init(struct meson_spicc_device *spicc)
+ {
+ struct device *dev = &spicc->pdev->dev;
+- struct clk_fixed_factor *pow2_fixed_div, *enh_fixed_div;
+- struct clk_divider *pow2_div, *enh_div;
+- struct clk_mux *mux;
++ struct clk_fixed_factor *pow2_fixed_div;
+ struct clk_init_data init;
+ struct clk *clk;
+ struct clk_parent_data parent_data[2];
+@@ -560,31 +611,45 @@ static int meson_spicc_clk_init(struct meson_spicc_device *spicc)
+ if (WARN_ON(IS_ERR(clk)))
+ return PTR_ERR(clk);
+
+- pow2_div = devm_kzalloc(dev, sizeof(*pow2_div), GFP_KERNEL);
+- if (!pow2_div)
+- return -ENOMEM;
+-
+ snprintf(name, sizeof(name), "%s#pow2_div", dev_name(dev));
+ init.name = name;
+- init.ops = &clk_divider_ops;
+- init.flags = CLK_SET_RATE_PARENT;
++ init.ops = &meson_spicc_pow2_clk_ops;
++ /*
++ * Set NOCACHE here to make sure we read the actual HW value
++ * since we reset the HW after each transfer.
++ */
++ init.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE;
+ parent_data[0].hw = &pow2_fixed_div->hw;
+ init.num_parents = 1;
+
+- pow2_div->shift = 16,
+- pow2_div->width = 3,
+- pow2_div->flags = CLK_DIVIDER_POWER_OF_TWO,
+- pow2_div->reg = spicc->base + SPICC_CONREG;
+- pow2_div->hw.init = &init;
++ spicc->pow2_div.shift = 16,
++ spicc->pow2_div.width = 3,
++ spicc->pow2_div.flags = CLK_DIVIDER_POWER_OF_TWO,
++ spicc->pow2_div.reg = spicc->base + SPICC_CONREG;
++ spicc->pow2_div.hw.init = &init;
+
+- clk = devm_clk_register(dev, &pow2_div->hw);
+- if (WARN_ON(IS_ERR(clk)))
+- return PTR_ERR(clk);
++ spicc->clk = devm_clk_register(dev, &spicc->pow2_div.hw);
++ if (WARN_ON(IS_ERR(spicc->clk)))
++ return PTR_ERR(spicc->clk);
+
+- if (!spicc->data->has_enhance_clk_div) {
+- spicc->clk = clk;
+- return 0;
+- }
++ return 0;
++}
++
++static int meson_spicc_enh_clk_init(struct meson_spicc_device *spicc)
++{
++ struct device *dev = &spicc->pdev->dev;
++ struct clk_fixed_factor *enh_fixed_div;
++ struct clk_divider *enh_div;
++ struct clk_mux *mux;
++ struct clk_init_data init;
++ struct clk *clk;
++ struct clk_parent_data parent_data[2];
++ char name[64];
++
++ memset(&init, 0, sizeof(init));
++ memset(&parent_data, 0, sizeof(parent_data));
++
++ init.parent_data = parent_data;
+
+ /* algorithm for enh div: rate = freq / 2 / (N + 1) */
+
+@@ -637,7 +702,7 @@ static int meson_spicc_clk_init(struct meson_spicc_device *spicc)
+ snprintf(name, sizeof(name), "%s#sel", dev_name(dev));
+ init.name = name;
+ init.ops = &clk_mux_ops;
+- parent_data[0].hw = &pow2_div->hw;
++ parent_data[0].hw = &spicc->pow2_div.hw;
+ parent_data[1].hw = &enh_div->hw;
+ init.num_parents = 2;
+ init.flags = CLK_SET_RATE_PARENT;
+@@ -693,6 +758,11 @@ static int meson_spicc_probe(struct platform_device *pdev)
+ writel_relaxed(0, spicc->base + SPICC_INTREG);
+
+ irq = platform_get_irq(pdev, 0);
++ if (irq < 0) {
++ ret = irq;
++ goto out_master;
++ }
++
+ ret = devm_request_irq(&pdev->dev, irq, meson_spicc_irq,
+ 0, NULL, spicc);
+ if (ret) {
+@@ -749,12 +819,20 @@ static int meson_spicc_probe(struct platform_device *pdev)
+
+ meson_spicc_oen_enable(spicc);
+
+- ret = meson_spicc_clk_init(spicc);
++ ret = meson_spicc_pow2_clk_init(spicc);
+ if (ret) {
+- dev_err(&pdev->dev, "clock registration failed\n");
++ dev_err(&pdev->dev, "pow2 clock registration failed\n");
+ goto out_clk;
+ }
+
++ if (spicc->data->has_enhance_clk_div) {
++ ret = meson_spicc_enh_clk_init(spicc);
++ if (ret) {
++ dev_err(&pdev->dev, "clock registration failed\n");
++ goto out_clk;
++ }
++ }
++
+ ret = devm_spi_register_master(&pdev->dev, master);
+ if (ret) {
+ dev_err(&pdev->dev, "spi master registration failed\n");
+diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c
+index 8eca6f24cb799..c8ed7815c4ba6 100644
+--- a/drivers/spi/spi-meson-spifc.c
++++ b/drivers/spi/spi-meson-spifc.c
+@@ -349,6 +349,7 @@ static int meson_spifc_probe(struct platform_device *pdev)
+ return 0;
+ out_clk:
+ clk_disable_unprepare(spifc->clk);
++ pm_runtime_disable(spifc->dev);
+ out_err:
+ spi_master_put(master);
+ return ret;
+diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
+index a15de10ee286a..49acba1dea1e7 100644
+--- a/drivers/spi/spi-mt65xx.c
++++ b/drivers/spi/spi-mt65xx.c
+@@ -43,8 +43,11 @@
+ #define SPI_CFG1_PACKET_LOOP_OFFSET 8
+ #define SPI_CFG1_PACKET_LENGTH_OFFSET 16
+ #define SPI_CFG1_GET_TICK_DLY_OFFSET 29
++#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1 30
+
+ #define SPI_CFG1_GET_TICK_DLY_MASK 0xe0000000
++#define SPI_CFG1_GET_TICK_DLY_MASK_V1 0xc0000000
++
+ #define SPI_CFG1_CS_IDLE_MASK 0xff
+ #define SPI_CFG1_PACKET_LOOP_MASK 0xff00
+ #define SPI_CFG1_PACKET_LENGTH_MASK 0x3ff0000
+@@ -346,9 +349,15 @@ static int mtk_spi_prepare_message(struct spi_master *master,
+
+ /* tick delay */
+ reg_val = readl(mdata->base + SPI_CFG1_REG);
+- reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
+- reg_val |= ((chip_config->tick_delay & 0x7)
+- << SPI_CFG1_GET_TICK_DLY_OFFSET);
++ if (mdata->dev_comp->enhance_timing) {
++ reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
++ reg_val |= ((chip_config->tick_delay & 0x7)
++ << SPI_CFG1_GET_TICK_DLY_OFFSET);
++ } else {
++ reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK_V1;
++ reg_val |= ((chip_config->tick_delay & 0x3)
++ << SPI_CFG1_GET_TICK_DLY_OFFSET_V1);
++ }
+ writel(reg_val, mdata->base + SPI_CFG1_REG);
+
+ /* set hw cs timing */
+@@ -624,7 +633,7 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id)
+ else
+ mdata->state = MTK_SPI_IDLE;
+
+- if (!master->can_dma(master, master->cur_msg->spi, trans)) {
++ if (!master->can_dma(master, NULL, trans)) {
+ if (trans->rx_buf) {
+ cnt = mdata->xfer_len / 4;
+ ioread32_rep(mdata->base + SPI_RX_DATA_REG,
+@@ -903,14 +912,20 @@ static int mtk_spi_remove(struct platform_device *pdev)
+ {
+ struct spi_master *master = platform_get_drvdata(pdev);
+ struct mtk_spi *mdata = spi_master_get_devdata(master);
++ int ret;
+
+- pm_runtime_disable(&pdev->dev);
++ ret = pm_runtime_resume_and_get(&pdev->dev);
++ if (ret < 0)
++ return ret;
+
+ mtk_spi_reset(mdata);
+
+ if (mdata->dev_comp->no_need_unprepare)
+ clk_unprepare(mdata->spi_clk);
+
++ pm_runtime_put_noidle(&pdev->dev);
++ pm_runtime_disable(&pdev->dev);
++
+ return 0;
+ }
+
+diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c
+index b4b9b7309b5e9..351b0ef52bbc8 100644
+--- a/drivers/spi/spi-mt7621.c
++++ b/drivers/spi/spi-mt7621.c
+@@ -340,11 +340,9 @@ static int mt7621_spi_probe(struct platform_device *pdev)
+ return PTR_ERR(base);
+
+ clk = devm_clk_get(&pdev->dev, NULL);
+- if (IS_ERR(clk)) {
+- dev_err(&pdev->dev, "unable to get SYS clock, err=%d\n",
+- status);
+- return PTR_ERR(clk);
+- }
++ if (IS_ERR(clk))
++ return dev_err_probe(&pdev->dev, PTR_ERR(clk),
++ "unable to get SYS clock\n");
+
+ status = clk_prepare_enable(clk);
+ if (status)
+diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
+index 41e7b341d2616..6d203477c04b1 100644
+--- a/drivers/spi/spi-mtk-nor.c
++++ b/drivers/spi/spi-mtk-nor.c
+@@ -160,7 +160,7 @@ static bool mtk_nor_match_read(const struct spi_mem_op *op)
+ {
+ int dummy = 0;
+
+- if (op->dummy.buswidth)
++ if (op->dummy.nbytes)
+ dummy = op->dummy.nbytes * BITS_PER_BYTE / op->dummy.buswidth;
+
+ if ((op->data.buswidth == 2) || (op->data.buswidth == 4)) {
+@@ -909,7 +909,17 @@ static int __maybe_unused mtk_nor_suspend(struct device *dev)
+
+ static int __maybe_unused mtk_nor_resume(struct device *dev)
+ {
+- return pm_runtime_force_resume(dev);
++ struct spi_controller *ctlr = dev_get_drvdata(dev);
++ struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
++ int ret;
++
++ ret = pm_runtime_force_resume(dev);
++ if (ret)
++ return ret;
++
++ mtk_nor_init(sp);
++
++ return 0;
+ }
+
+ static const struct dev_pm_ops mtk_nor_pm_ops = {
+diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c
+index 45889947afed8..03fce4493aa79 100644
+--- a/drivers/spi/spi-mxic.c
++++ b/drivers/spi/spi-mxic.c
+@@ -304,25 +304,21 @@ static int mxic_spi_data_xfer(struct mxic_spi *mxic, const void *txbuf,
+
+ writel(data, mxic->regs + TXD(nbytes % 4));
+
++ ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
++ sts & INT_TX_EMPTY, 0, USEC_PER_SEC);
++ if (ret)
++ return ret;
++
++ ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
++ sts & INT_RX_NOT_EMPTY, 0,
++ USEC_PER_SEC);
++ if (ret)
++ return ret;
++
++ data = readl(mxic->regs + RXD);
+ if (rxbuf) {
+- ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+- sts & INT_TX_EMPTY, 0,
+- USEC_PER_SEC);
+- if (ret)
+- return ret;
+-
+- ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+- sts & INT_RX_NOT_EMPTY, 0,
+- USEC_PER_SEC);
+- if (ret)
+- return ret;
+-
+- data = readl(mxic->regs + RXD);
+ data >>= (8 * (4 - nbytes));
+ memcpy(rxbuf + pos, &data, nbytes);
+- WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY);
+- } else {
+- readl(mxic->regs + RXD);
+ }
+ WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY);
+
+diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
+index 20b0471729651..061f7394e5b9b 100644
+--- a/drivers/spi/spi-omap-100k.c
++++ b/drivers/spi/spi-omap-100k.c
+@@ -412,6 +412,7 @@ static int omap1_spi100k_probe(struct platform_device *pdev)
+ return status;
+
+ err_fck:
++ pm_runtime_disable(&pdev->dev);
+ clk_disable_unprepare(spi100k->fck);
+ err_ick:
+ clk_disable_unprepare(spi100k->ick);
+diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
+index 2e134eb4bd2c9..6502fda6243e0 100644
+--- a/drivers/spi/spi-pxa2xx-pci.c
++++ b/drivers/spi/spi-pxa2xx-pci.c
+@@ -76,14 +76,23 @@ static bool lpss_dma_filter(struct dma_chan *chan, void *param)
+ return true;
+ }
+
++static void lpss_dma_put_device(void *dma_dev)
++{
++ pci_dev_put(dma_dev);
++}
++
+ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+ {
+ struct pci_dev *dma_dev;
++ int ret;
+
+ c->num_chipselect = 1;
+ c->max_clk_rate = 50000000;
+
+ dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
++ ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
++ if (ret)
++ return ret;
+
+ if (c->tx_param) {
+ struct dw_dma_slave *slave = c->tx_param;
+@@ -107,8 +116,9 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+
+ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+ {
+- struct pci_dev *dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
+ struct dw_dma_slave *tx, *rx;
++ struct pci_dev *dma_dev;
++ int ret;
+
+ switch (PCI_FUNC(dev->devfn)) {
+ case 0:
+@@ -133,6 +143,11 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+ return -ENODEV;
+ }
+
++ dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
++ ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
++ if (ret)
++ return ret;
++
+ tx = c->tx_param;
+ tx->dma_dev = &dma_dev->dev;
+
+diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
+index d39dec6d1c91e..2cc9bb413c108 100644
+--- a/drivers/spi/spi-qup.c
++++ b/drivers/spi/spi-qup.c
+@@ -1030,23 +1030,8 @@ static int spi_qup_probe(struct platform_device *pdev)
+ return -ENXIO;
+ }
+
+- ret = clk_prepare_enable(cclk);
+- if (ret) {
+- dev_err(dev, "cannot enable core clock\n");
+- return ret;
+- }
+-
+- ret = clk_prepare_enable(iclk);
+- if (ret) {
+- clk_disable_unprepare(cclk);
+- dev_err(dev, "cannot enable iface clock\n");
+- return ret;
+- }
+-
+ master = spi_alloc_master(dev, sizeof(struct spi_qup));
+ if (!master) {
+- clk_disable_unprepare(cclk);
+- clk_disable_unprepare(iclk);
+ dev_err(dev, "cannot allocate master\n");
+ return -ENOMEM;
+ }
+@@ -1092,6 +1077,19 @@ static int spi_qup_probe(struct platform_device *pdev)
+ spin_lock_init(&controller->lock);
+ init_completion(&controller->done);
+
++ ret = clk_prepare_enable(cclk);
++ if (ret) {
++ dev_err(dev, "cannot enable core clock\n");
++ goto error_dma;
++ }
++
++ ret = clk_prepare_enable(iclk);
++ if (ret) {
++ clk_disable_unprepare(cclk);
++ dev_err(dev, "cannot enable iface clock\n");
++ goto error_dma;
++ }
++
+ iomode = readl_relaxed(base + QUP_IO_M_MODES);
+
+ size = QUP_IO_M_OUTPUT_BLOCK_SIZE(iomode);
+@@ -1121,7 +1119,7 @@ static int spi_qup_probe(struct platform_device *pdev)
+ ret = spi_qup_set_state(controller, QUP_STATE_RESET);
+ if (ret) {
+ dev_err(dev, "cannot set RESET state\n");
+- goto error_dma;
++ goto error_clk;
+ }
+
+ writel_relaxed(0, base + QUP_OPERATIONAL);
+@@ -1145,7 +1143,7 @@ static int spi_qup_probe(struct platform_device *pdev)
+ ret = devm_request_irq(dev, irq, spi_qup_qup_irq,
+ IRQF_TRIGGER_HIGH, pdev->name, controller);
+ if (ret)
+- goto error_dma;
++ goto error_clk;
+
+ pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
+ pm_runtime_use_autosuspend(dev);
+@@ -1160,11 +1158,12 @@ static int spi_qup_probe(struct platform_device *pdev)
+
+ disable_pm:
+ pm_runtime_disable(&pdev->dev);
++error_clk:
++ clk_disable_unprepare(cclk);
++ clk_disable_unprepare(iclk);
+ error_dma:
+ spi_qup_release_dma(master);
+ error:
+- clk_disable_unprepare(cclk);
+- clk_disable_unprepare(iclk);
+ spi_master_put(master);
+ return ret;
+ }
+@@ -1199,8 +1198,10 @@ static int spi_qup_pm_resume_runtime(struct device *device)
+ return ret;
+
+ ret = clk_prepare_enable(controller->cclk);
+- if (ret)
++ if (ret) {
++ clk_disable_unprepare(controller->iclk);
+ return ret;
++ }
+
+ /* Disable clocks auto gaiting */
+ config = readl_relaxed(controller->base + QUP_CONFIG);
+@@ -1246,14 +1247,25 @@ static int spi_qup_resume(struct device *device)
+ return ret;
+
+ ret = clk_prepare_enable(controller->cclk);
+- if (ret)
++ if (ret) {
++ clk_disable_unprepare(controller->iclk);
+ return ret;
++ }
+
+ ret = spi_qup_set_state(controller, QUP_STATE_RESET);
+ if (ret)
+- return ret;
++ goto disable_clk;
++
++ ret = spi_master_resume(master);
++ if (ret)
++ goto disable_clk;
++
++ return 0;
+
+- return spi_master_resume(master);
++disable_clk:
++ clk_disable_unprepare(controller->cclk);
++ clk_disable_unprepare(controller->iclk);
++ return ret;
+ }
+ #endif /* CONFIG_PM_SLEEP */
+
+@@ -1263,18 +1275,22 @@ static int spi_qup_remove(struct platform_device *pdev)
+ struct spi_qup *controller = spi_master_get_devdata(master);
+ int ret;
+
+- ret = pm_runtime_resume_and_get(&pdev->dev);
+- if (ret < 0)
+- return ret;
++ ret = pm_runtime_get_sync(&pdev->dev);
+
+- ret = spi_qup_set_state(controller, QUP_STATE_RESET);
+- if (ret)
+- return ret;
++ if (ret >= 0) {
++ ret = spi_qup_set_state(controller, QUP_STATE_RESET);
++ if (ret)
++ dev_warn(&pdev->dev, "failed to reset controller (%pe)\n",
++ ERR_PTR(ret));
+
+- spi_qup_release_dma(master);
++ clk_disable_unprepare(controller->cclk);
++ clk_disable_unprepare(controller->iclk);
++ } else {
++ dev_warn(&pdev->dev, "failed to resume, skip hw disable (%pe)\n",
++ ERR_PTR(ret));
++ }
+
+- clk_disable_unprepare(controller->cclk);
+- clk_disable_unprepare(controller->iclk);
++ spi_qup_release_dma(master);
+
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
+index a46b385440273..014106f8f978c 100644
+--- a/drivers/spi/spi-rockchip-sfc.c
++++ b/drivers/spi/spi-rockchip-sfc.c
+@@ -634,7 +634,7 @@ static int rockchip_sfc_probe(struct platform_device *pdev)
+ if (ret) {
+ dev_err(dev, "Failed to request irq\n");
+
+- return ret;
++ goto err_irq;
+ }
+
+ ret = rockchip_sfc_init(sfc);
+diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
+index 553b6b9d02222..b721b62118e12 100644
+--- a/drivers/spi/spi-rockchip.c
++++ b/drivers/spi/spi-rockchip.c
+@@ -133,7 +133,8 @@
+ #define INT_TF_OVERFLOW (1 << 1)
+ #define INT_RF_UNDERFLOW (1 << 2)
+ #define INT_RF_OVERFLOW (1 << 3)
+-#define INT_RF_FULL (1 << 4)
++#define INT_RF_FULL (1 << 4)
++#define INT_CS_INACTIVE (1 << 6)
+
+ /* Bit fields in ICR, 4bit */
+ #define ICR_MASK 0x0f
+@@ -194,6 +195,10 @@ struct rockchip_spi {
+ bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
+
+ bool slave_abort;
++ bool cs_inactive; /* spi slave tansmition stop when cs inactive */
++ bool cs_high_supported; /* native CS supports active-high polarity */
++
++ struct spi_transfer *xfer; /* Store xfer temporarily */
+ };
+
+ static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
+@@ -343,6 +348,15 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
+ struct spi_controller *ctlr = dev_id;
+ struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
+
++ /* When int_cs_inactive comes, spi slave abort */
++ if (rs->cs_inactive && readl_relaxed(rs->regs + ROCKCHIP_SPI_IMR) & INT_CS_INACTIVE) {
++ ctlr->slave_abort(ctlr);
++ writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
++ writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
++
++ return IRQ_HANDLED;
++ }
++
+ if (rs->tx_left)
+ rockchip_spi_pio_writer(rs);
+
+@@ -350,6 +364,7 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
+ if (!rs->rx_left) {
+ spi_enable_chip(rs, false);
+ writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
++ writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
+ spi_finalize_current_transfer(ctlr);
+ }
+
+@@ -357,14 +372,18 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
+ }
+
+ static int rockchip_spi_prepare_irq(struct rockchip_spi *rs,
+- struct spi_transfer *xfer)
++ struct spi_controller *ctlr,
++ struct spi_transfer *xfer)
+ {
+ rs->tx = xfer->tx_buf;
+ rs->rx = xfer->rx_buf;
+ rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0;
+ rs->rx_left = xfer->len / rs->n_bytes;
+
+- writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
++ if (rs->cs_inactive)
++ writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
++ else
++ writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
+ spi_enable_chip(rs, true);
+
+ if (rs->tx_left)
+@@ -383,6 +402,9 @@ static void rockchip_spi_dma_rxcb(void *data)
+ if (state & TXDMA && !rs->slave_abort)
+ return;
+
++ if (rs->cs_inactive)
++ writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
++
+ spi_enable_chip(rs, false);
+ spi_finalize_current_transfer(ctlr);
+ }
+@@ -423,14 +445,16 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
+
+ atomic_set(&rs->state, 0);
+
++ rs->tx = xfer->tx_buf;
++ rs->rx = xfer->rx_buf;
++
+ rxdesc = NULL;
+ if (xfer->rx_buf) {
+ struct dma_slave_config rxconf = {
+ .direction = DMA_DEV_TO_MEM,
+ .src_addr = rs->dma_addr_rx,
+ .src_addr_width = rs->n_bytes,
+- .src_maxburst = rockchip_spi_calc_burst_size(xfer->len /
+- rs->n_bytes),
++ .src_maxburst = rockchip_spi_calc_burst_size(xfer->len / rs->n_bytes),
+ };
+
+ dmaengine_slave_config(ctlr->dma_rx, &rxconf);
+@@ -474,10 +498,13 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
+ /* rx must be started before tx due to spi instinct */
+ if (rxdesc) {
+ atomic_or(RXDMA, &rs->state);
+- dmaengine_submit(rxdesc);
++ ctlr->dma_rx->cookie = dmaengine_submit(rxdesc);
+ dma_async_issue_pending(ctlr->dma_rx);
+ }
+
++ if (rs->cs_inactive)
++ writel_relaxed(INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
++
+ spi_enable_chip(rs, true);
+
+ if (txdesc) {
+@@ -584,7 +611,48 @@ static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
+ static int rockchip_spi_slave_abort(struct spi_controller *ctlr)
+ {
+ struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
++ u32 rx_fifo_left;
++ struct dma_tx_state state;
++ enum dma_status status;
++
++ /* Get current dma rx point */
++ if (atomic_read(&rs->state) & RXDMA) {
++ dmaengine_pause(ctlr->dma_rx);
++ status = dmaengine_tx_status(ctlr->dma_rx, ctlr->dma_rx->cookie, &state);
++ if (status == DMA_ERROR) {
++ rs->rx = rs->xfer->rx_buf;
++ rs->xfer->len = 0;
++ rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
++ for (; rx_fifo_left; rx_fifo_left--)
++ readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
++ goto out;
++ } else {
++ rs->rx += rs->xfer->len - rs->n_bytes * state.residue;
++ }
++ }
+
++ /* Get the valid data left in rx fifo and set rs->xfer->len real rx size */
++ if (rs->rx) {
++ rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
++ for (; rx_fifo_left; rx_fifo_left--) {
++ u32 rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
++
++ if (rs->n_bytes == 1)
++ *(u8 *)rs->rx = (u8)rxw;
++ else
++ *(u16 *)rs->rx = (u16)rxw;
++ rs->rx += rs->n_bytes;
++ }
++ rs->xfer->len = (unsigned int)(rs->rx - rs->xfer->rx_buf);
++ }
++
++out:
++ if (atomic_read(&rs->state) & RXDMA)
++ dmaengine_terminate_sync(ctlr->dma_rx);
++ if (atomic_read(&rs->state) & TXDMA)
++ dmaengine_terminate_sync(ctlr->dma_tx);
++ atomic_set(&rs->state, 0);
++ spi_enable_chip(rs, false);
+ rs->slave_abort = true;
+ spi_finalize_current_transfer(ctlr);
+
+@@ -620,7 +688,7 @@ static int rockchip_spi_transfer_one(
+ }
+
+ rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2;
+-
++ rs->xfer = xfer;
+ use_dma = ctlr->can_dma ? ctlr->can_dma(ctlr, spi, xfer) : false;
+
+ ret = rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->slave);
+@@ -630,7 +698,7 @@ static int rockchip_spi_transfer_one(
+ if (use_dma)
+ return rockchip_spi_prepare_dma(rs, ctlr, xfer);
+
+- return rockchip_spi_prepare_irq(rs, xfer);
++ return rockchip_spi_prepare_irq(rs, ctlr, xfer);
+ }
+
+ static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
+@@ -647,6 +715,34 @@ static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
+ return xfer->len / bytes_per_word >= rs->fifo_len;
+ }
+
++static int rockchip_spi_setup(struct spi_device *spi)
++{
++ struct rockchip_spi *rs = spi_controller_get_devdata(spi->controller);
++ u32 cr0;
++
++ if (!spi->cs_gpiod && (spi->mode & SPI_CS_HIGH) && !rs->cs_high_supported) {
++ dev_warn(&spi->dev, "setup: non GPIO CS can't be active-high\n");
++ return -EINVAL;
++ }
++
++ pm_runtime_get_sync(rs->dev);
++
++ cr0 = readl_relaxed(rs->regs + ROCKCHIP_SPI_CTRLR0);
++
++ cr0 &= ~(0x3 << CR0_SCPH_OFFSET);
++ cr0 |= ((spi->mode & 0x3) << CR0_SCPH_OFFSET);
++ if (spi->mode & SPI_CS_HIGH && spi->chip_select <= 1)
++ cr0 |= BIT(spi->chip_select) << CR0_SOI_OFFSET;
++ else if (spi->chip_select <= 1)
++ cr0 &= ~(BIT(spi->chip_select) << CR0_SOI_OFFSET);
++
++ writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
++
++ pm_runtime_put(rs->dev);
++
++ return 0;
++}
++
+ static int rockchip_spi_probe(struct platform_device *pdev)
+ {
+ int ret;
+@@ -654,7 +750,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
+ struct spi_controller *ctlr;
+ struct resource *mem;
+ struct device_node *np = pdev->dev.of_node;
+- u32 rsd_nsecs;
++ u32 rsd_nsecs, num_cs;
+ bool slave_mode;
+
+ slave_mode = of_property_read_bool(np, "spi-slave");
+@@ -764,8 +860,9 @@ static int rockchip_spi_probe(struct platform_device *pdev)
+ * rk spi0 has two native cs, spi1..5 one cs only
+ * if num-cs is missing in the dts, default to 1
+ */
+- if (of_property_read_u16(np, "num-cs", &ctlr->num_chipselect))
+- ctlr->num_chipselect = 1;
++ if (of_property_read_u32(np, "num-cs", &num_cs))
++ num_cs = 1;
++ ctlr->num_chipselect = num_cs;
+ ctlr->use_gpio_descriptors = true;
+ }
+ ctlr->dev.of_node = pdev->dev.of_node;
+@@ -773,6 +870,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
+ ctlr->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
+ ctlr->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
+
++ ctlr->setup = rockchip_spi_setup;
+ ctlr->set_cs = rockchip_spi_set_cs;
+ ctlr->transfer_one = rockchip_spi_transfer_one;
+ ctlr->max_transfer_size = rockchip_spi_max_transfer_size;
+@@ -807,9 +905,15 @@ static int rockchip_spi_probe(struct platform_device *pdev)
+
+ switch (readl_relaxed(rs->regs + ROCKCHIP_SPI_VERSION)) {
+ case ROCKCHIP_SPI_VER2_TYPE2:
++ rs->cs_high_supported = true;
+ ctlr->mode_bits |= SPI_CS_HIGH;
++ if (ctlr->can_dma && slave_mode)
++ rs->cs_inactive = true;
++ else
++ rs->cs_inactive = false;
+ break;
+ default:
++ rs->cs_inactive = false;
+ break;
+ }
+
+diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c
+index c53138ce00309..83796a4ead34a 100644
+--- a/drivers/spi/spi-rpc-if.c
++++ b/drivers/spi/spi-rpc-if.c
+@@ -139,7 +139,9 @@ static int rpcif_spi_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ rpc = spi_controller_get_devdata(ctlr);
+- rpcif_sw_init(rpc, parent);
++ error = rpcif_sw_init(rpc, parent);
++ if (error)
++ return error;
+
+ platform_set_drvdata(pdev, ctlr);
+
+diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
+index d16ed88802d36..f634a405382cb 100644
+--- a/drivers/spi/spi-rspi.c
++++ b/drivers/spi/spi-rspi.c
+@@ -612,6 +612,10 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx,
+ rspi->dma_callbacked, HZ);
+ if (ret > 0 && rspi->dma_callbacked) {
+ ret = 0;
++ if (tx)
++ dmaengine_synchronize(rspi->ctlr->dma_tx);
++ if (rx)
++ dmaengine_synchronize(rspi->ctlr->dma_rx);
+ } else {
+ if (!ret) {
+ dev_err(&rspi->ctlr->dev, "DMA timeout\n");
+@@ -1107,14 +1111,11 @@ static struct dma_chan *rspi_request_dma_chan(struct device *dev,
+ }
+
+ memset(&cfg, 0, sizeof(cfg));
++ cfg.dst_addr = port_addr + RSPI_SPDR;
++ cfg.src_addr = port_addr + RSPI_SPDR;
++ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
++ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+ cfg.direction = dir;
+- if (dir == DMA_MEM_TO_DEV) {
+- cfg.dst_addr = port_addr;
+- cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+- } else {
+- cfg.src_addr = port_addr;
+- cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+- }
+
+ ret = dmaengine_slave_config(chan, &cfg);
+ if (ret) {
+@@ -1145,12 +1146,12 @@ static int rspi_request_dma(struct device *dev, struct spi_controller *ctlr,
+ }
+
+ ctlr->dma_tx = rspi_request_dma_chan(dev, DMA_MEM_TO_DEV, dma_tx_id,
+- res->start + RSPI_SPDR);
++ res->start);
+ if (!ctlr->dma_tx)
+ return -ENODEV;
+
+ ctlr->dma_rx = rspi_request_dma_chan(dev, DMA_DEV_TO_MEM, dma_rx_id,
+- res->start + RSPI_SPDR);
++ res->start);
+ if (!ctlr->dma_rx) {
+ dma_release_channel(ctlr->dma_tx);
+ ctlr->dma_tx = NULL;
+diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
+index 8755cd85e83ce..90c70d53e85e2 100644
+--- a/drivers/spi/spi-s3c64xx.c
++++ b/drivers/spi/spi-s3c64xx.c
+@@ -85,6 +85,7 @@
+ #define S3C64XX_SPI_ST_TX_FIFORDY (1<<0)
+
+ #define S3C64XX_SPI_PACKET_CNT_EN (1<<16)
++#define S3C64XX_SPI_PACKET_CNT_MASK GENMASK(15, 0)
+
+ #define S3C64XX_SPI_PND_TX_UNDERRUN_CLR (1<<4)
+ #define S3C64XX_SPI_PND_TX_OVERRUN_CLR (1<<3)
+@@ -661,6 +662,13 @@ static int s3c64xx_spi_prepare_message(struct spi_master *master,
+ return 0;
+ }
+
++static size_t s3c64xx_spi_max_transfer_size(struct spi_device *spi)
++{
++ struct spi_controller *ctlr = spi->controller;
++
++ return ctlr->can_dma ? S3C64XX_SPI_PACKET_CNT_MASK : SIZE_MAX;
++}
++
+ static int s3c64xx_spi_transfer_one(struct spi_master *master,
+ struct spi_device *spi,
+ struct spi_transfer *xfer)
+@@ -1130,6 +1138,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
+ master->prepare_transfer_hardware = s3c64xx_spi_prepare_transfer;
+ master->prepare_message = s3c64xx_spi_prepare_message;
+ master->transfer_one = s3c64xx_spi_transfer_one;
++ master->max_transfer_size = s3c64xx_spi_max_transfer_size;
+ master->num_chipselect = sci->num_cs;
+ master->dma_alignment = 8;
+ master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
+diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
+index 27f35aa2d746d..dd38cb8ffbc20 100644
+--- a/drivers/spi/spi-stm32-qspi.c
++++ b/drivers/spi/spi-stm32-qspi.c
+@@ -308,7 +308,8 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
+ if (!op->data.nbytes)
+ goto wait_nobusy;
+
+- if (readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF)
++ if ((readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) ||
++ qspi->fmode == CCR_FMODE_APM)
+ goto out;
+
+ reinit_completion(&qspi->data_completion);
+@@ -397,7 +398,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
+ ccr |= FIELD_PREP(CCR_ADSIZE_MASK, op->addr.nbytes - 1);
+ }
+
+- if (op->dummy.buswidth && op->dummy.nbytes)
++ if (op->dummy.nbytes)
+ ccr |= FIELD_PREP(CCR_DCYC_MASK,
+ op->dummy.nbytes * 8 / op->dummy.buswidth);
+
+@@ -688,7 +689,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
+ struct resource *res;
+ int ret, irq;
+
+- ctrl = spi_alloc_master(dev, sizeof(*qspi));
++ ctrl = devm_spi_alloc_master(dev, sizeof(*qspi));
+ if (!ctrl)
+ return -ENOMEM;
+
+@@ -697,58 +698,46 @@ static int stm32_qspi_probe(struct platform_device *pdev)
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi");
+ qspi->io_base = devm_ioremap_resource(dev, res);
+- if (IS_ERR(qspi->io_base)) {
+- ret = PTR_ERR(qspi->io_base);
+- goto err_master_put;
+- }
++ if (IS_ERR(qspi->io_base))
++ return PTR_ERR(qspi->io_base);
+
+ qspi->phys_base = res->start;
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mm");
+ qspi->mm_base = devm_ioremap_resource(dev, res);
+- if (IS_ERR(qspi->mm_base)) {
+- ret = PTR_ERR(qspi->mm_base);
+- goto err_master_put;
+- }
++ if (IS_ERR(qspi->mm_base))
++ return PTR_ERR(qspi->mm_base);
+
+ qspi->mm_size = resource_size(res);
+- if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) {
+- ret = -EINVAL;
+- goto err_master_put;
+- }
++ if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ)
++ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+- if (irq < 0) {
+- ret = irq;
+- goto err_master_put;
+- }
++ if (irq < 0)
++ return irq;
+
+ ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0,
+ dev_name(dev), qspi);
+ if (ret) {
+ dev_err(dev, "failed to request irq\n");
+- goto err_master_put;
++ return ret;
+ }
+
+ init_completion(&qspi->data_completion);
+ init_completion(&qspi->match_completion);
+
+ qspi->clk = devm_clk_get(dev, NULL);
+- if (IS_ERR(qspi->clk)) {
+- ret = PTR_ERR(qspi->clk);
+- goto err_master_put;
+- }
++ if (IS_ERR(qspi->clk))
++ return PTR_ERR(qspi->clk);
+
+ qspi->clk_rate = clk_get_rate(qspi->clk);
+- if (!qspi->clk_rate) {
+- ret = -EINVAL;
+- goto err_master_put;
+- }
++ if (!qspi->clk_rate)
++ return -EINVAL;
+
+ ret = clk_prepare_enable(qspi->clk);
+ if (ret) {
+ dev_err(dev, "can not enable the clock\n");
+- goto err_master_put;
++ return ret;
+ }
+
+ rstc = devm_reset_control_get_exclusive(dev, NULL);
+@@ -784,7 +773,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
+ pm_runtime_enable(dev);
+ pm_runtime_get_noresume(dev);
+
+- ret = devm_spi_register_master(dev, ctrl);
++ ret = spi_register_master(ctrl);
+ if (ret)
+ goto err_pm_runtime_free;
+
+@@ -806,8 +795,6 @@ err_dma_free:
+ stm32_qspi_dma_free(qspi);
+ err_clk_disable:
+ clk_disable_unprepare(qspi->clk);
+-err_master_put:
+- spi_master_put(qspi->ctrl);
+
+ return ret;
+ }
+@@ -817,6 +804,7 @@ static int stm32_qspi_remove(struct platform_device *pdev)
+ struct stm32_qspi *qspi = platform_get_drvdata(pdev);
+
+ pm_runtime_get_sync(qspi->dev);
++ spi_unregister_master(qspi->ctrl);
+ /* disable qspi */
+ writel_relaxed(0, qspi->io_base + QSPI_CR);
+ stm32_qspi_dma_free(qspi);
+diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
+index 9bd3fd1652f74..3c6f201b5dd85 100644
+--- a/drivers/spi/spi-stm32.c
++++ b/drivers/spi/spi-stm32.c
+@@ -434,7 +434,7 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz,
+ u32 div, mbrdiv;
+
+ /* Ensure spi->clk_rate is even */
+- div = DIV_ROUND_UP(spi->clk_rate & ~0x1, speed_hz);
++ div = DIV_ROUND_CLOSEST(spi->clk_rate & ~0x1, speed_hz);
+
+ /*
+ * SPI framework set xfer->speed_hz to master->max_speed_hz if
+@@ -886,6 +886,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
+ static DEFINE_RATELIMIT_STATE(rs,
+ DEFAULT_RATELIMIT_INTERVAL * 10,
+ 1);
++ ratelimit_set_flags(&rs, RATELIMIT_MSG_ON_RELEASE);
+ if (__ratelimit(&rs))
+ dev_dbg_ratelimited(spi->dev, "Communication suspended\n");
+ if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
+diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c
+index ea706d9629cb1..dc188f9202c97 100644
+--- a/drivers/spi/spi-synquacer.c
++++ b/drivers/spi/spi-synquacer.c
+@@ -472,10 +472,9 @@ static int synquacer_spi_transfer_one(struct spi_master *master,
+ read_fifo(sspi);
+ }
+
+- if (status < 0) {
+- dev_err(sspi->dev, "failed to transfer. status: 0x%x\n",
+- status);
+- return status;
++ if (status == 0) {
++ dev_err(sspi->dev, "failed to transfer. Timeout.\n");
++ return -ETIMEDOUT;
+ }
+
+ return 0;
+@@ -783,6 +782,7 @@ static int __maybe_unused synquacer_spi_resume(struct device *dev)
+
+ ret = synquacer_spi_enable(master);
+ if (ret) {
++ clk_disable_unprepare(sspi->clk);
+ dev_err(dev, "failed to enable spi (%d)\n", ret);
+ return ret;
+ }
+diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
+index e9de1d958bbd2..8f345247a8c32 100644
+--- a/drivers/spi/spi-tegra114.c
++++ b/drivers/spi/spi-tegra114.c
+@@ -1352,6 +1352,10 @@ static int tegra_spi_probe(struct platform_device *pdev)
+ tspi->phys = r->start;
+
+ spi_irq = platform_get_irq(pdev, 0);
++ if (spi_irq < 0) {
++ ret = spi_irq;
++ goto exit_free_master;
++ }
+ tspi->irq = spi_irq;
+
+ tspi->clk = devm_clk_get(&pdev->dev, "spi");
+diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
+index 2888d8a8dc6d5..6915451cc93e2 100644
+--- a/drivers/spi/spi-tegra20-sflash.c
++++ b/drivers/spi/spi-tegra20-sflash.c
+@@ -455,7 +455,11 @@ static int tegra_sflash_probe(struct platform_device *pdev)
+ goto exit_free_master;
+ }
+
+- tsd->irq = platform_get_irq(pdev, 0);
++ ret = platform_get_irq(pdev, 0);
++ if (ret < 0)
++ goto exit_free_master;
++ tsd->irq = ret;
++
+ ret = request_irq(tsd->irq, tegra_sflash_isr, 0,
+ dev_name(&pdev->dev), tsd);
+ if (ret < 0) {
+diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
+index 3226c4e1c7c0d..cf61bf302a059 100644
+--- a/drivers/spi/spi-tegra20-slink.c
++++ b/drivers/spi/spi-tegra20-slink.c
+@@ -1003,14 +1003,8 @@ static int tegra_slink_probe(struct platform_device *pdev)
+ struct resource *r;
+ int ret, spi_irq;
+ const struct tegra_slink_chip_data *cdata = NULL;
+- const struct of_device_id *match;
+
+- match = of_match_device(tegra_slink_of_match, &pdev->dev);
+- if (!match) {
+- dev_err(&pdev->dev, "Error: No device match found\n");
+- return -ENODEV;
+- }
+- cdata = match->data;
++ cdata = of_device_get_match_data(&pdev->dev);
+
+ master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
+ if (!master) {
+@@ -1136,7 +1130,7 @@ exit_free_master:
+
+ static int tegra_slink_remove(struct platform_device *pdev)
+ {
+- struct spi_master *master = platform_get_drvdata(pdev);
++ struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
+ struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+
+ spi_unregister_master(master);
+@@ -1151,6 +1145,7 @@ static int tegra_slink_remove(struct platform_device *pdev)
+ if (tspi->rx_dma_chan)
+ tegra_slink_deinit_dma_param(tspi, true);
+
++ spi_master_put(master);
+ return 0;
+ }
+
+diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
+index 2354ca1e38581..7967073c13545 100644
+--- a/drivers/spi/spi-tegra210-quad.c
++++ b/drivers/spi/spi-tegra210-quad.c
+@@ -1249,6 +1249,8 @@ static int tegra_qspi_probe(struct platform_device *pdev)
+
+ tqspi->phys = r->start;
+ qspi_irq = platform_get_irq(pdev, 0);
++ if (qspi_irq < 0)
++ return qspi_irq;
+ tqspi->irq = qspi_irq;
+
+ tqspi->clk = devm_clk_get(&pdev->dev, "qspi");
+diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
+index e06aafe169e0c..081da1fd3fd7e 100644
+--- a/drivers/spi/spi-ti-qspi.c
++++ b/drivers/spi/spi-ti-qspi.c
+@@ -448,6 +448,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst,
+ enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+ struct dma_async_tx_descriptor *tx;
+ int ret;
++ unsigned long time_left;
+
+ tx = dmaengine_prep_dma_memcpy(chan, dma_dst, dma_src, len, flags);
+ if (!tx) {
+@@ -467,9 +468,9 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst,
+ }
+
+ dma_async_issue_pending(chan);
+- ret = wait_for_completion_timeout(&qspi->transfer_complete,
++ time_left = wait_for_completion_timeout(&qspi->transfer_complete,
+ msecs_to_jiffies(len));
+- if (ret <= 0) {
++ if (time_left == 0) {
+ dmaengine_terminate_sync(chan);
+ dev_err(qspi->dev, "DMA wait_for_completion_timeout\n");
+ return -ETIMEDOUT;
+diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
+index 8900e51e1a1cc..cc0da48222311 100644
+--- a/drivers/spi/spi-uniphier.c
++++ b/drivers/spi/spi-uniphier.c
+@@ -726,7 +726,7 @@ static int uniphier_spi_probe(struct platform_device *pdev)
+ if (ret) {
+ dev_err(&pdev->dev, "failed to get TX DMA capacities: %d\n",
+ ret);
+- goto out_disable_clk;
++ goto out_release_dma;
+ }
+ dma_tx_burst = caps.max_burst;
+ }
+@@ -735,7 +735,7 @@ static int uniphier_spi_probe(struct platform_device *pdev)
+ if (IS_ERR_OR_NULL(master->dma_rx)) {
+ if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
+ ret = -EPROBE_DEFER;
+- goto out_disable_clk;
++ goto out_release_dma;
+ }
+ master->dma_rx = NULL;
+ dma_rx_burst = INT_MAX;
+@@ -744,7 +744,7 @@ static int uniphier_spi_probe(struct platform_device *pdev)
+ if (ret) {
+ dev_err(&pdev->dev, "failed to get RX DMA capacities: %d\n",
+ ret);
+- goto out_disable_clk;
++ goto out_release_dma;
+ }
+ dma_rx_burst = caps.max_burst;
+ }
+@@ -753,10 +753,20 @@ static int uniphier_spi_probe(struct platform_device *pdev)
+
+ ret = devm_spi_register_master(&pdev->dev, master);
+ if (ret)
+- goto out_disable_clk;
++ goto out_release_dma;
+
+ return 0;
+
++out_release_dma:
++ if (!IS_ERR_OR_NULL(master->dma_rx)) {
++ dma_release_channel(master->dma_rx);
++ master->dma_rx = NULL;
++ }
++ if (!IS_ERR_OR_NULL(master->dma_tx)) {
++ dma_release_channel(master->dma_tx);
++ master->dma_tx = NULL;
++ }
++
+ out_disable_clk:
+ clk_disable_unprepare(priv->clk);
+
+@@ -767,12 +777,13 @@ out_master_put:
+
+ static int uniphier_spi_remove(struct platform_device *pdev)
+ {
+- struct uniphier_spi_priv *priv = platform_get_drvdata(pdev);
++ struct spi_master *master = platform_get_drvdata(pdev);
++ struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+
+- if (priv->master->dma_tx)
+- dma_release_channel(priv->master->dma_tx);
+- if (priv->master->dma_rx)
+- dma_release_channel(priv->master->dma_rx);
++ if (master->dma_tx)
++ dma_release_channel(master->dma_tx);
++ if (master->dma_rx)
++ dma_release_channel(master->dma_rx);
+
+ clk_disable_unprepare(priv->clk);
+
+diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
+index cfa222c9bd5e7..78f31b61a2aac 100644
+--- a/drivers/spi/spi-zynq-qspi.c
++++ b/drivers/spi/spi-zynq-qspi.c
+@@ -570,6 +570,9 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
+
+ if (op->dummy.nbytes) {
+ tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL);
++ if (!tmpbuf)
++ return -ENOMEM;
++
+ memset(tmpbuf, 0xff, op->dummy.nbytes);
+ reinit_completion(&xqspi->data_completion);
+ xqspi->txbuf = tmpbuf;
+diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
+index 328b6559bb19a..2b5afae8ff7fc 100644
+--- a/drivers/spi/spi-zynqmp-gqspi.c
++++ b/drivers/spi/spi-zynqmp-gqspi.c
+@@ -1172,7 +1172,10 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
+ goto clk_dis_all;
+ }
+
+- dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
++ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
++ if (ret)
++ goto clk_dis_all;
++
+ ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+ ctlr->num_chipselect = GQSPI_DEFAULT_NUM_CS;
+ ctlr->mem_ops = &zynqmp_qspi_mem_ops;
+diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
+index 926b68aa45d3e..06dd1be54925e 100644
+--- a/drivers/spi/spi.c
++++ b/drivers/spi/spi.c
+@@ -451,6 +451,47 @@ int __spi_register_driver(struct module *owner, struct spi_driver *sdrv)
+ {
+ sdrv->driver.owner = owner;
+ sdrv->driver.bus = &spi_bus_type;
++
++ /*
++ * For Really Good Reasons we use spi: modaliases not of:
++ * modaliases for DT so module autoloading won't work if we
++ * don't have a spi_device_id as well as a compatible string.
++ */
++ if (sdrv->driver.of_match_table) {
++ const struct of_device_id *of_id;
++
++ for (of_id = sdrv->driver.of_match_table; of_id->compatible[0];
++ of_id++) {
++ const char *of_name;
++
++ /* Strip off any vendor prefix */
++ of_name = strnchr(of_id->compatible,
++ sizeof(of_id->compatible), ',');
++ if (of_name)
++ of_name++;
++ else
++ of_name = of_id->compatible;
++
++ if (sdrv->id_table) {
++ const struct spi_device_id *spi_id;
++
++ for (spi_id = sdrv->id_table; spi_id->name[0];
++ spi_id++)
++ if (strcmp(spi_id->name, of_name) == 0)
++ break;
++
++ if (spi_id->name[0])
++ continue;
++ } else {
++ if (strcmp(sdrv->driver.name, of_name) == 0)
++ continue;
++ }
++
++ pr_warn("SPI driver %s has no spi_device_id for %s\n",
++ sdrv->driver.name, of_id->compatible);
++ }
++ }
++
+ return driver_register(&sdrv->driver);
+ }
+ EXPORT_SYMBOL_GPL(__spi_register_driver);
+@@ -827,12 +868,9 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
+ spi->controller->last_cs_enable = enable;
+ spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
+
+- if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
+- !spi->controller->set_cs_timing) {
+- if (activate)
+- spi_delay_exec(&spi->cs_setup, NULL);
+- else
+- spi_delay_exec(&spi->cs_hold, NULL);
++ if ((spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
++ !spi->controller->set_cs_timing) && !activate) {
++ spi_delay_exec(&spi->cs_hold, NULL);
+ }
+
+ if (spi->mode & SPI_CS_HIGH)
+@@ -874,7 +912,9 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
+
+ if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
+ !spi->controller->set_cs_timing) {
+- if (!activate)
++ if (activate)
++ spi_delay_exec(&spi->cs_setup, NULL);
++ else
+ spi_delay_exec(&spi->cs_inactive, NULL);
+ }
+ }
+@@ -902,10 +942,10 @@ int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+ int i, ret;
+
+ if (vmalloced_buf || kmap_buf) {
+- desc_len = min_t(int, max_seg_size, PAGE_SIZE);
++ desc_len = min_t(unsigned long, max_seg_size, PAGE_SIZE);
+ sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len);
+ } else if (virt_addr_valid(buf)) {
+- desc_len = min_t(int, max_seg_size, ctlr->max_dma_len);
++ desc_len = min_t(size_t, max_seg_size, ctlr->max_dma_len);
+ sgs = DIV_ROUND_UP(len, desc_len);
+ } else {
+ return -EINVAL;
+@@ -967,6 +1007,8 @@ void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
+ if (sgt->orig_nents) {
+ dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir);
+ sg_free_table(sgt);
++ sgt->orig_nents = 0;
++ sgt->nents = 0;
+ }
+ }
+
+@@ -1032,11 +1074,15 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg)
+
+ if (ctlr->dma_tx)
+ tx_dev = ctlr->dma_tx->device->dev;
++ else if (ctlr->dma_map_dev)
++ tx_dev = ctlr->dma_map_dev;
+ else
+ tx_dev = ctlr->dev.parent;
+
+ if (ctlr->dma_rx)
+ rx_dev = ctlr->dma_rx->device->dev;
++ else if (ctlr->dma_map_dev)
++ rx_dev = ctlr->dma_map_dev;
+ else
+ rx_dev = ctlr->dev.parent;
+
+@@ -2905,9 +2951,9 @@ free_bus_id:
+ }
+ EXPORT_SYMBOL_GPL(spi_register_controller);
+
+-static void devm_spi_unregister(void *ctlr)
++static void devm_spi_unregister(struct device *dev, void *res)
+ {
+- spi_unregister_controller(ctlr);
++ spi_unregister_controller(*(struct spi_controller **)res);
+ }
+
+ /**
+@@ -2926,13 +2972,22 @@ static void devm_spi_unregister(void *ctlr)
+ int devm_spi_register_controller(struct device *dev,
+ struct spi_controller *ctlr)
+ {
++ struct spi_controller **ptr;
+ int ret;
+
++ ptr = devres_alloc(devm_spi_unregister, sizeof(*ptr), GFP_KERNEL);
++ if (!ptr)
++ return -ENOMEM;
++
+ ret = spi_register_controller(ctlr);
+- if (ret)
+- return ret;
++ if (!ret) {
++ *ptr = ctlr;
++ devres_add(dev, ptr);
++ } else {
++ devres_free(ptr);
++ }
+
+- return devm_add_action_or_reset(dev, devm_spi_unregister, ctlr);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(devm_spi_register_controller);
+
+@@ -2979,12 +3034,6 @@ void spi_unregister_controller(struct spi_controller *ctlr)
+
+ device_del(&ctlr->dev);
+
+- /* Release the last reference on the controller if its driver
+- * has not yet been converted to devm_spi_alloc_master/slave().
+- */
+- if (!ctlr->devm_allocated)
+- put_device(&ctlr->dev);
+-
+ /* free bus id */
+ mutex_lock(&board_lock);
+ if (found == ctlr)
+@@ -2993,6 +3042,12 @@ void spi_unregister_controller(struct spi_controller *ctlr)
+
+ if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
+ mutex_unlock(&ctlr->add_lock);
++
++ /* Release the last reference on the controller if its driver
++ * has not yet been converted to devm_spi_alloc_master/slave().
++ */
++ if (!ctlr->devm_allocated)
++ put_device(&ctlr->dev);
+ }
+ EXPORT_SYMBOL_GPL(spi_unregister_controller);
+
+diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
+index 1bd73e322b7bb..922d778df0641 100644
+--- a/drivers/spi/spidev.c
++++ b/drivers/spi/spidev.c
+@@ -376,12 +376,23 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ switch (cmd) {
+ /* read requests */
+ case SPI_IOC_RD_MODE:
+- retval = put_user(spi->mode & SPI_MODE_MASK,
+- (__u8 __user *)arg);
+- break;
+ case SPI_IOC_RD_MODE32:
+- retval = put_user(spi->mode & SPI_MODE_MASK,
+- (__u32 __user *)arg);
++ tmp = spi->mode;
++
++ {
++ struct spi_controller *ctlr = spi->controller;
++
++ if (ctlr->use_gpio_descriptors && ctlr->cs_gpiods &&
++ ctlr->cs_gpiods[spi->chip_select])
++ tmp &= ~SPI_CS_HIGH;
++ }
++
++ if (cmd == SPI_IOC_RD_MODE)
++ retval = put_user(tmp & SPI_MODE_MASK,
++ (__u8 __user *)arg);
++ else
++ retval = put_user(tmp & SPI_MODE_MASK,
++ (__u32 __user *)arg);
+ break;
+ case SPI_IOC_RD_LSB_FIRST:
+ retval = put_user((spi->mode & SPI_LSB_FIRST) ? 1 : 0,
+@@ -581,7 +592,6 @@ static int spidev_open(struct inode *inode, struct file *filp)
+ if (!spidev->tx_buffer) {
+ spidev->tx_buffer = kmalloc(bufsiz, GFP_KERNEL);
+ if (!spidev->tx_buffer) {
+- dev_dbg(&spidev->spi->dev, "open/ENOMEM\n");
+ status = -ENOMEM;
+ goto err_find_dev;
+ }
+@@ -590,7 +600,6 @@ static int spidev_open(struct inode *inode, struct file *filp)
+ if (!spidev->rx_buffer) {
+ spidev->rx_buffer = kmalloc(bufsiz, GFP_KERNEL);
+ if (!spidev->rx_buffer) {
+- dev_dbg(&spidev->spi->dev, "open/ENOMEM\n");
+ status = -ENOMEM;
+ goto err_alloc_rx_buf;
+ }
+diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
+index bbbd311eda030..e6de2aeece8d3 100644
+--- a/drivers/spmi/spmi-pmic-arb.c
++++ b/drivers/spmi/spmi-pmic-arb.c
+@@ -887,7 +887,8 @@ static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb)
+ * version 5, there is more than one APID mapped to each PPID.
+ * The owner field for each of these mappings specifies the EE which is
+ * allowed to write to the APID. The owner of the last (highest) APID
+- * for a given PPID will receive interrupts from the PPID.
++ * which has the IRQ owner bit set for a given PPID will receive
++ * interrupts from the PPID.
+ */
+ for (i = 0; ; i++, apidd++) {
+ offset = pmic_arb->ver_ops->apid_map_offset(i);
+@@ -910,16 +911,16 @@ static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb)
+ apid = pmic_arb->ppid_to_apid[ppid] & ~PMIC_ARB_APID_VALID;
+ prev_apidd = &pmic_arb->apid_data[apid];
+
+- if (valid && is_irq_ee &&
+- prev_apidd->write_ee == pmic_arb->ee) {
++ if (!valid || apidd->write_ee == pmic_arb->ee) {
++ /* First PPID mapping or one for this EE */
++ pmic_arb->ppid_to_apid[ppid] = i | PMIC_ARB_APID_VALID;
++ } else if (valid && is_irq_ee &&
++ prev_apidd->write_ee == pmic_arb->ee) {
+ /*
+ * Duplicate PPID mapping after the one for this EE;
+ * override the irq owner
+ */
+ prev_apidd->irq_ee = apidd->irq_ee;
+- } else if (!valid || is_irq_ee) {
+- /* First PPID mapping or duplicate for another EE */
+- pmic_arb->ppid_to_apid[ppid] = i | PMIC_ARB_APID_VALID;
+ }
+
+ apidd->ppid = ppid;
+diff --git a/drivers/spmi/spmi.c b/drivers/spmi/spmi.c
+index b37ead9e2fade..38913c0f11158 100644
+--- a/drivers/spmi/spmi.c
++++ b/drivers/spmi/spmi.c
+@@ -350,7 +350,8 @@ static void spmi_drv_remove(struct device *dev)
+ const struct spmi_driver *sdrv = to_spmi_driver(dev->driver);
+
+ pm_runtime_get_sync(dev);
+- sdrv->remove(to_spmi_device(dev));
++ if (sdrv->remove)
++ sdrv->remove(to_spmi_device(dev));
+ pm_runtime_put_noidle(dev);
+
+ pm_runtime_disable(dev);
+diff --git a/drivers/staging/clocking-wizard/clk-xlnx-clock-wizard.c b/drivers/staging/clocking-wizard/clk-xlnx-clock-wizard.c
+index 39367712ef540..8c1934df70dea 100644
+--- a/drivers/staging/clocking-wizard/clk-xlnx-clock-wizard.c
++++ b/drivers/staging/clocking-wizard/clk-xlnx-clock-wizard.c
+@@ -347,7 +347,7 @@ static struct clk *clk_wzrd_register_divider(struct device *dev,
+ hw = &div->hw;
+ ret = devm_clk_hw_register(dev, hw);
+ if (ret)
+- hw = ERR_PTR(ret);
++ return ERR_PTR(ret);
+
+ return hw->clk;
+ }
+diff --git a/drivers/staging/emxx_udc/emxx_udc.c b/drivers/staging/emxx_udc/emxx_udc.c
+index b6abd3770e81c..edd20a03f7a26 100644
+--- a/drivers/staging/emxx_udc/emxx_udc.c
++++ b/drivers/staging/emxx_udc/emxx_udc.c
+@@ -2590,10 +2590,15 @@ static int nbu2ss_ep_queue(struct usb_ep *_ep,
+ req->unaligned = false;
+
+ if (req->unaligned) {
+- if (!ep->virt_buf)
++ if (!ep->virt_buf) {
+ ep->virt_buf = dma_alloc_coherent(udc->dev, PAGE_SIZE,
+ &ep->phys_buf,
+ GFP_ATOMIC | GFP_DMA);
++ if (!ep->virt_buf) {
++ spin_unlock_irqrestore(&udc->lock, flags);
++ return -ENOMEM;
++ }
++ }
+ if (ep->epnum > 0) {
+ if (ep->direct == USB_DIR_IN)
+ memcpy(ep->virt_buf, req->req.buf,
+diff --git a/drivers/staging/fbtft/fb_ssd1351.c b/drivers/staging/fbtft/fb_ssd1351.c
+index cf263a58a1489..6fd549a424d53 100644
+--- a/drivers/staging/fbtft/fb_ssd1351.c
++++ b/drivers/staging/fbtft/fb_ssd1351.c
+@@ -187,7 +187,6 @@ static struct fbtft_display display = {
+ },
+ };
+
+-#ifdef CONFIG_FB_BACKLIGHT
+ static int update_onboard_backlight(struct backlight_device *bd)
+ {
+ struct fbtft_par *par = bl_get_data(bd);
+@@ -231,9 +230,6 @@ static void register_onboard_backlight(struct fbtft_par *par)
+ if (!par->fbtftops.unregister_backlight)
+ par->fbtftops.unregister_backlight = fbtft_unregister_backlight;
+ }
+-#else
+-static void register_onboard_backlight(struct fbtft_par *par) { };
+-#endif
+
+ FBTFT_REGISTER_DRIVER(DRVNAME, "solomon,ssd1351", &display);
+
+diff --git a/drivers/staging/fbtft/fb_st7789v.c b/drivers/staging/fbtft/fb_st7789v.c
+index abe9395a0aefd..861a154144e66 100644
+--- a/drivers/staging/fbtft/fb_st7789v.c
++++ b/drivers/staging/fbtft/fb_st7789v.c
+@@ -144,6 +144,8 @@ static int init_display(struct fbtft_par *par)
+ {
+ int rc;
+
++ par->fbtftops.reset(par);
++
+ rc = init_tearing_effect_line(par);
+ if (rc)
+ return rc;
+diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c
+index ed992ca605ebe..1690358b8f018 100644
+--- a/drivers/staging/fbtft/fbtft-core.c
++++ b/drivers/staging/fbtft/fbtft-core.c
+@@ -128,7 +128,6 @@ static int fbtft_request_gpios(struct fbtft_par *par)
+ return 0;
+ }
+
+-#ifdef CONFIG_FB_BACKLIGHT
+ static int fbtft_backlight_update_status(struct backlight_device *bd)
+ {
+ struct fbtft_par *par = bl_get_data(bd);
+@@ -161,6 +160,7 @@ void fbtft_unregister_backlight(struct fbtft_par *par)
+ par->info->bl_dev = NULL;
+ }
+ }
++EXPORT_SYMBOL(fbtft_unregister_backlight);
+
+ static const struct backlight_ops fbtft_bl_ops = {
+ .get_brightness = fbtft_backlight_get_brightness,
+@@ -198,12 +198,7 @@ void fbtft_register_backlight(struct fbtft_par *par)
+ if (!par->fbtftops.unregister_backlight)
+ par->fbtftops.unregister_backlight = fbtft_unregister_backlight;
+ }
+-#else
+-void fbtft_register_backlight(struct fbtft_par *par) { };
+-void fbtft_unregister_backlight(struct fbtft_par *par) { };
+-#endif
+ EXPORT_SYMBOL(fbtft_register_backlight);
+-EXPORT_SYMBOL(fbtft_unregister_backlight);
+
+ static void fbtft_set_addr_win(struct fbtft_par *par, int xs, int ys, int xe,
+ int ye)
+@@ -853,13 +848,11 @@ int fbtft_register_framebuffer(struct fb_info *fb_info)
+ fb_info->fix.smem_len >> 10, text1,
+ HZ / fb_info->fbdefio->delay, text2);
+
+-#ifdef CONFIG_FB_BACKLIGHT
+ /* Turn on backlight if available */
+ if (fb_info->bl_dev) {
+ fb_info->bl_dev->props.power = FB_BLANK_UNBLANK;
+ fb_info->bl_dev->ops->update_status(fb_info->bl_dev);
+ }
+-#endif
+
+ return 0;
+
+diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h
+index 76f8c090a8370..06afaa9d505ba 100644
+--- a/drivers/staging/fbtft/fbtft.h
++++ b/drivers/staging/fbtft/fbtft.h
+@@ -332,7 +332,10 @@ static int __init fbtft_driver_module_init(void) \
+ ret = spi_register_driver(&fbtft_driver_spi_driver); \
+ if (ret < 0) \
+ return ret; \
+- return platform_driver_register(&fbtft_driver_platform_driver); \
++ ret = platform_driver_register(&fbtft_driver_platform_driver); \
++ if (ret < 0) \
++ spi_unregister_driver(&fbtft_driver_spi_driver); \
++ return ret; \
+ } \
+ \
+ static void __exit fbtft_driver_module_exit(void) \
+diff --git a/drivers/staging/fieldbus/anybuss/host.c b/drivers/staging/fieldbus/anybuss/host.c
+index 8a75f6642c783..0c41d1e0204fb 100644
+--- a/drivers/staging/fieldbus/anybuss/host.c
++++ b/drivers/staging/fieldbus/anybuss/host.c
+@@ -1384,7 +1384,7 @@ anybuss_host_common_probe(struct device *dev,
+ goto err_device;
+ return cd;
+ err_device:
+- device_unregister(&cd->client->dev);
++ put_device(&cd->client->dev);
+ err_kthread:
+ kthread_stop(cd->qthread);
+ err_reset:
+diff --git a/drivers/staging/gdm724x/gdm_lte.c b/drivers/staging/gdm724x/gdm_lte.c
+index e390c924ec1c8..3c680ed4429c1 100644
+--- a/drivers/staging/gdm724x/gdm_lte.c
++++ b/drivers/staging/gdm724x/gdm_lte.c
+@@ -76,14 +76,15 @@ static void tx_complete(void *arg)
+
+ static int gdm_lte_rx(struct sk_buff *skb, struct nic *nic, int nic_type)
+ {
+- int ret;
++ int ret, len;
+
++ len = skb->len + ETH_HLEN;
+ ret = netif_rx_ni(skb);
+ if (ret == NET_RX_DROP) {
+ nic->stats.rx_dropped++;
+ } else {
+ nic->stats.rx_packets++;
+- nic->stats.rx_bytes += skb->len + ETH_HLEN;
++ nic->stats.rx_bytes += len;
+ }
+
+ return 0;
+diff --git a/drivers/staging/greybus/audio_codec.c b/drivers/staging/greybus/audio_codec.c
+index b589cf6b1d034..e19b91e7a72ef 100644
+--- a/drivers/staging/greybus/audio_codec.c
++++ b/drivers/staging/greybus/audio_codec.c
+@@ -599,8 +599,8 @@ static int gbcodec_mute_stream(struct snd_soc_dai *dai, int mute, int stream)
+ break;
+ }
+ if (!data) {
+- dev_err(dai->dev, "%s:%s DATA connection missing\n",
+- dai->name, module->name);
++ dev_err(dai->dev, "%s DATA connection missing\n",
++ dai->name);
+ mutex_unlock(&codec->lock);
+ return -ENODEV;
+ }
+diff --git a/drivers/staging/greybus/audio_helper.c b/drivers/staging/greybus/audio_helper.c
+index 1ed4772d27715..79bb2bd8e0007 100644
+--- a/drivers/staging/greybus/audio_helper.c
++++ b/drivers/staging/greybus/audio_helper.c
+@@ -3,7 +3,6 @@
+ * Greybus Audio Sound SoC helper APIs
+ */
+
+-#include <linux/debugfs.h>
+ #include <sound/core.h>
+ #include <sound/soc.h>
+ #include <sound/soc-dapm.h>
+@@ -116,10 +115,6 @@ int gbaudio_dapm_free_controls(struct snd_soc_dapm_context *dapm,
+ {
+ int i;
+ struct snd_soc_dapm_widget *w, *next_w;
+-#ifdef CONFIG_DEBUG_FS
+- struct dentry *parent = dapm->debugfs_dapm;
+- struct dentry *debugfs_w = NULL;
+-#endif
+
+ mutex_lock(&dapm->card->dapm_mutex);
+ for (i = 0; i < num; i++) {
+@@ -139,12 +134,6 @@ int gbaudio_dapm_free_controls(struct snd_soc_dapm_context *dapm,
+ continue;
+ }
+ widget++;
+-#ifdef CONFIG_DEBUG_FS
+- if (!parent)
+- debugfs_w = debugfs_lookup(w->name, parent);
+- debugfs_remove(debugfs_w);
+- debugfs_w = NULL;
+-#endif
+ gbaudio_dapm_free_widget(w);
+ }
+ mutex_unlock(&dapm->card->dapm_mutex);
+@@ -192,7 +181,11 @@ int gbaudio_remove_component_controls(struct snd_soc_component *component,
+ unsigned int num_controls)
+ {
+ struct snd_card *card = component->card->snd_card;
++ int err;
+
+- return gbaudio_remove_controls(card, component->dev, controls,
+- num_controls, component->name_prefix);
++ down_write(&card->controls_rwsem);
++ err = gbaudio_remove_controls(card, component->dev, controls,
++ num_controls, component->name_prefix);
++ up_write(&card->controls_rwsem);
++ return err;
+ }
+diff --git a/drivers/staging/greybus/audio_topology.c b/drivers/staging/greybus/audio_topology.c
+index 1e613d42d8237..62d7674852bec 100644
+--- a/drivers/staging/greybus/audio_topology.c
++++ b/drivers/staging/greybus/audio_topology.c
+@@ -147,6 +147,9 @@ static const char **gb_generate_enum_strings(struct gbaudio_module_info *gb,
+
+ items = le32_to_cpu(gbenum->items);
+ strings = devm_kcalloc(gb->dev, items, sizeof(char *), GFP_KERNEL);
++ if (!strings)
++ return NULL;
++
+ data = gbenum->names;
+
+ for (i = 0; i < items; i++) {
+@@ -655,6 +658,8 @@ static int gbaudio_tplg_create_enum_kctl(struct gbaudio_module_info *gb,
+ /* since count=1, and reg is dummy */
+ gbe->items = le32_to_cpu(gb_enum->items);
+ gbe->texts = gb_generate_enum_strings(gb, gb_enum);
++ if (!gbe->texts)
++ return -ENOMEM;
+
+ /* debug enum info */
+ dev_dbg(gb->dev, "Max:%d, name_length:%d\n", gbe->items,
+@@ -862,6 +867,8 @@ static int gbaudio_tplg_create_enum_ctl(struct gbaudio_module_info *gb,
+ /* since count=1, and reg is dummy */
+ gbe->items = le32_to_cpu(gb_enum->items);
+ gbe->texts = gb_generate_enum_strings(gb, gb_enum);
++ if (!gbe->texts)
++ return -ENOMEM;
+
+ /* debug enum info */
+ dev_dbg(gb->dev, "Max:%d, name_length:%d\n", gbe->items,
+@@ -974,6 +981,44 @@ static int gbaudio_widget_event(struct snd_soc_dapm_widget *w,
+ return ret;
+ }
+
++static const struct snd_soc_dapm_widget gbaudio_widgets[] = {
++ [snd_soc_dapm_spk] = SND_SOC_DAPM_SPK(NULL, gbcodec_event_spk),
++ [snd_soc_dapm_hp] = SND_SOC_DAPM_HP(NULL, gbcodec_event_hp),
++ [snd_soc_dapm_mic] = SND_SOC_DAPM_MIC(NULL, gbcodec_event_int_mic),
++ [snd_soc_dapm_output] = SND_SOC_DAPM_OUTPUT(NULL),
++ [snd_soc_dapm_input] = SND_SOC_DAPM_INPUT(NULL),
++ [snd_soc_dapm_switch] = SND_SOC_DAPM_SWITCH_E(NULL, SND_SOC_NOPM,
++ 0, 0, NULL,
++ gbaudio_widget_event,
++ SND_SOC_DAPM_PRE_PMU |
++ SND_SOC_DAPM_POST_PMD),
++ [snd_soc_dapm_pga] = SND_SOC_DAPM_PGA_E(NULL, SND_SOC_NOPM,
++ 0, 0, NULL, 0,
++ gbaudio_widget_event,
++ SND_SOC_DAPM_PRE_PMU |
++ SND_SOC_DAPM_POST_PMD),
++ [snd_soc_dapm_mixer] = SND_SOC_DAPM_MIXER_E(NULL, SND_SOC_NOPM,
++ 0, 0, NULL, 0,
++ gbaudio_widget_event,
++ SND_SOC_DAPM_PRE_PMU |
++ SND_SOC_DAPM_POST_PMD),
++ [snd_soc_dapm_mux] = SND_SOC_DAPM_MUX_E(NULL, SND_SOC_NOPM,
++ 0, 0, NULL,
++ gbaudio_widget_event,
++ SND_SOC_DAPM_PRE_PMU |
++ SND_SOC_DAPM_POST_PMD),
++ [snd_soc_dapm_aif_in] = SND_SOC_DAPM_AIF_IN_E(NULL, NULL, 0,
++ SND_SOC_NOPM, 0, 0,
++ gbaudio_widget_event,
++ SND_SOC_DAPM_PRE_PMU |
++ SND_SOC_DAPM_POST_PMD),
++ [snd_soc_dapm_aif_out] = SND_SOC_DAPM_AIF_OUT_E(NULL, NULL, 0,
++ SND_SOC_NOPM, 0, 0,
++ gbaudio_widget_event,
++ SND_SOC_DAPM_PRE_PMU |
++ SND_SOC_DAPM_POST_PMD),
++};
++
+ static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module,
+ struct snd_soc_dapm_widget *dw,
+ struct gb_audio_widget *w, int *w_size)
+@@ -1034,6 +1079,10 @@ static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module,
+ csize += le16_to_cpu(gbenum->names_length);
+ control->texts = (const char * const *)
+ gb_generate_enum_strings(module, gbenum);
++ if (!control->texts) {
++ ret = -ENOMEM;
++ goto error;
++ }
+ control->items = le32_to_cpu(gbenum->items);
+ } else {
+ csize = sizeof(struct gb_audio_control);
+@@ -1052,77 +1101,37 @@ static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module,
+
+ switch (w->type) {
+ case snd_soc_dapm_spk:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_SPK(w->name, gbcodec_event_spk);
++ *dw = gbaudio_widgets[w->type];
+ module->op_devices |= GBAUDIO_DEVICE_OUT_SPEAKER;
+ break;
+ case snd_soc_dapm_hp:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_HP(w->name, gbcodec_event_hp);
++ *dw = gbaudio_widgets[w->type];
+ module->op_devices |= (GBAUDIO_DEVICE_OUT_WIRED_HEADSET
+ | GBAUDIO_DEVICE_OUT_WIRED_HEADPHONE);
+ module->ip_devices |= GBAUDIO_DEVICE_IN_WIRED_HEADSET;
+ break;
+ case snd_soc_dapm_mic:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_MIC(w->name, gbcodec_event_int_mic);
++ *dw = gbaudio_widgets[w->type];
+ module->ip_devices |= GBAUDIO_DEVICE_IN_BUILTIN_MIC;
+ break;
+ case snd_soc_dapm_output:
+- *dw = (struct snd_soc_dapm_widget)SND_SOC_DAPM_OUTPUT(w->name);
+- break;
+ case snd_soc_dapm_input:
+- *dw = (struct snd_soc_dapm_widget)SND_SOC_DAPM_INPUT(w->name);
+- break;
+ case snd_soc_dapm_switch:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_SWITCH_E(w->name, SND_SOC_NOPM, 0, 0,
+- widget_kctls,
+- gbaudio_widget_event,
+- SND_SOC_DAPM_PRE_PMU |
+- SND_SOC_DAPM_POST_PMD);
+- break;
+ case snd_soc_dapm_pga:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_PGA_E(w->name, SND_SOC_NOPM, 0, 0, NULL, 0,
+- gbaudio_widget_event,
+- SND_SOC_DAPM_PRE_PMU |
+- SND_SOC_DAPM_POST_PMD);
+- break;
+ case snd_soc_dapm_mixer:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_MIXER_E(w->name, SND_SOC_NOPM, 0, 0, NULL,
+- 0, gbaudio_widget_event,
+- SND_SOC_DAPM_PRE_PMU |
+- SND_SOC_DAPM_POST_PMD);
+- break;
+ case snd_soc_dapm_mux:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_MUX_E(w->name, SND_SOC_NOPM, 0, 0,
+- widget_kctls, gbaudio_widget_event,
+- SND_SOC_DAPM_PRE_PMU |
+- SND_SOC_DAPM_POST_PMD);
++ *dw = gbaudio_widgets[w->type];
+ break;
+ case snd_soc_dapm_aif_in:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_AIF_IN_E(w->name, w->sname, 0,
+- SND_SOC_NOPM,
+- 0, 0, gbaudio_widget_event,
+- SND_SOC_DAPM_PRE_PMU |
+- SND_SOC_DAPM_POST_PMD);
+- break;
+ case snd_soc_dapm_aif_out:
+- *dw = (struct snd_soc_dapm_widget)
+- SND_SOC_DAPM_AIF_OUT_E(w->name, w->sname, 0,
+- SND_SOC_NOPM,
+- 0, 0, gbaudio_widget_event,
+- SND_SOC_DAPM_PRE_PMU |
+- SND_SOC_DAPM_POST_PMD);
++ *dw = gbaudio_widgets[w->type];
++ dw->sname = w->sname;
+ break;
+ default:
+ ret = -EINVAL;
+ goto error;
+ }
++ dw->name = w->name;
+
+ dev_dbg(module->dev, "%s: widget of type %d created\n", dw->name,
+ dw->id);
+@@ -1183,6 +1192,10 @@ static int gbaudio_tplg_process_kcontrols(struct gbaudio_module_info *module,
+ csize += le16_to_cpu(gbenum->names_length);
+ control->texts = (const char * const *)
+ gb_generate_enum_strings(module, gbenum);
++ if (!control->texts) {
++ ret = -ENOMEM;
++ goto error;
++ }
+ control->items = le32_to_cpu(gbenum->items);
+ } else {
+ csize = sizeof(struct gb_audio_control);
+diff --git a/drivers/staging/iio/accel/adis16203.c b/drivers/staging/iio/accel/adis16203.c
+index 1d3026dae827e..62d5397ff1f98 100644
+--- a/drivers/staging/iio/accel/adis16203.c
++++ b/drivers/staging/iio/accel/adis16203.c
+@@ -312,3 +312,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+ MODULE_DESCRIPTION("Analog Devices ADIS16203 Programmable 360 Degrees Inclinometer");
+ MODULE_LICENSE("GPL v2");
+ MODULE_ALIAS("spi:adis16203");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/staging/iio/accel/adis16240.c b/drivers/staging/iio/accel/adis16240.c
+index 2a8aa83b8d9e6..bca857eef92e2 100644
+--- a/drivers/staging/iio/accel/adis16240.c
++++ b/drivers/staging/iio/accel/adis16240.c
+@@ -440,3 +440,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+ MODULE_DESCRIPTION("Analog Devices Programmable Impact Sensor and Recorder");
+ MODULE_LICENSE("GPL v2");
+ MODULE_ALIAS("spi:adis16240");
++MODULE_IMPORT_NS(IIO_ADISLIB);
+diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c
+index fef0055b89909..20183b2ea1279 100644
+--- a/drivers/staging/iio/adc/ad7280a.c
++++ b/drivers/staging/iio/adc/ad7280a.c
+@@ -107,9 +107,9 @@
+ static unsigned int ad7280a_devaddr(unsigned int addr)
+ {
+ return ((addr & 0x1) << 4) |
+- ((addr & 0x2) << 3) |
++ ((addr & 0x2) << 2) |
+ (addr & 0x4) |
+- ((addr & 0x8) >> 3) |
++ ((addr & 0x8) >> 2) |
+ ((addr & 0x10) >> 4);
+ }
+
+diff --git a/drivers/staging/iio/resolver/ad2s1210.c b/drivers/staging/iio/resolver/ad2s1210.c
+index 74adb82f37c30..a19cfb2998c93 100644
+--- a/drivers/staging/iio/resolver/ad2s1210.c
++++ b/drivers/staging/iio/resolver/ad2s1210.c
+@@ -101,7 +101,7 @@ struct ad2s1210_state {
+ static const int ad2s1210_mode_vals[4][2] = {
+ [MOD_POS] = { 0, 0 },
+ [MOD_VEL] = { 0, 1 },
+- [MOD_CONFIG] = { 1, 0 },
++ [MOD_CONFIG] = { 1, 1 },
+ };
+
+ static inline void ad2s1210_set_mode(enum ad2s1210_mode mode,
+diff --git a/drivers/staging/ks7010/Kconfig b/drivers/staging/ks7010/Kconfig
+index 0987fdc2f70db..8ea6c09286798 100644
+--- a/drivers/staging/ks7010/Kconfig
++++ b/drivers/staging/ks7010/Kconfig
+@@ -5,6 +5,9 @@ config KS7010
+ select WIRELESS_EXT
+ select WEXT_PRIV
+ select FW_LOADER
++ select CRYPTO
++ select CRYPTO_HASH
++ select CRYPTO_MICHAEL_MIC
+ help
+ This is a driver for KeyStream KS7010 based SDIO WIFI cards. It is
+ found on at least later Spectec SDW-821 (FCC-ID "S2Y-WLAN-11G-K" only,
+diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c
+index 631ad769c3d56..8d8822c60a2bc 100644
+--- a/drivers/staging/ks7010/ks_wlan_net.c
++++ b/drivers/staging/ks7010/ks_wlan_net.c
+@@ -1584,8 +1584,10 @@ static int ks_wlan_set_encode_ext(struct net_device *dev,
+ commit |= SME_WEP_FLAG;
+ }
+ if (enc->key_len) {
+- memcpy(&key->key_val[0], &enc->key[0], enc->key_len);
+- key->key_len = enc->key_len;
++ int key_len = clamp_val(enc->key_len, 0, IW_ENCODING_TOKEN_MAX);
++
++ memcpy(&key->key_val[0], &enc->key[0], key_len);
++ key->key_len = key_len;
+ commit |= (SME_WEP_VAL1 << index);
+ }
+ break;
+diff --git a/drivers/staging/media/atomisp/Kconfig b/drivers/staging/media/atomisp/Kconfig
+index aeed5803dfb1e..0031d76356c1c 100644
+--- a/drivers/staging/media/atomisp/Kconfig
++++ b/drivers/staging/media/atomisp/Kconfig
+@@ -13,6 +13,7 @@ config VIDEO_ATOMISP
+ tristate "Intel Atom Image Signal Processor Driver"
+ depends on VIDEO_V4L2 && INTEL_ATOMISP
+ depends on PMIC_OPREGION
++ select V4L2_FWNODE
+ select IOSF_MBI
+ select VIDEOBUF_VMALLOC
+ select VIDEO_V4L2_SUBDEV_API
+diff --git a/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c b/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c
+index 362ed44b4effa..e046489cd253b 100644
+--- a/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c
++++ b/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c
+@@ -835,7 +835,6 @@ static int lm3554_probe(struct i2c_client *client)
+ int err = 0;
+ struct lm3554 *flash;
+ unsigned int i;
+- int ret;
+
+ flash = kzalloc(sizeof(*flash), GFP_KERNEL);
+ if (!flash)
+@@ -844,7 +843,7 @@ static int lm3554_probe(struct i2c_client *client)
+ flash->pdata = lm3554_platform_data_func(client);
+ if (IS_ERR(flash->pdata)) {
+ err = PTR_ERR(flash->pdata);
+- goto fail1;
++ goto free_flash;
+ }
+
+ v4l2_i2c_subdev_init(&flash->sd, client, &lm3554_ops);
+@@ -852,12 +851,12 @@ static int lm3554_probe(struct i2c_client *client)
+ flash->sd.flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
+ flash->mode = ATOMISP_FLASH_MODE_OFF;
+ flash->timeout = LM3554_MAX_TIMEOUT / LM3554_TIMEOUT_STEPSIZE - 1;
+- ret =
++ err =
+ v4l2_ctrl_handler_init(&flash->ctrl_handler,
+ ARRAY_SIZE(lm3554_controls));
+- if (ret) {
++ if (err) {
+ dev_err(&client->dev, "error initialize a ctrl_handler.\n");
+- goto fail3;
++ goto unregister_subdev;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(lm3554_controls); i++)
+@@ -866,14 +865,15 @@ static int lm3554_probe(struct i2c_client *client)
+
+ if (flash->ctrl_handler.error) {
+ dev_err(&client->dev, "ctrl_handler error.\n");
+- goto fail3;
++ err = flash->ctrl_handler.error;
++ goto free_handler;
+ }
+
+ flash->sd.ctrl_handler = &flash->ctrl_handler;
+ err = media_entity_pads_init(&flash->sd.entity, 0, NULL);
+ if (err) {
+ dev_err(&client->dev, "error initialize a media entity.\n");
+- goto fail2;
++ goto free_handler;
+ }
+
+ flash->sd.entity.function = MEDIA_ENT_F_FLASH;
+@@ -884,16 +884,27 @@ static int lm3554_probe(struct i2c_client *client)
+
+ err = lm3554_gpio_init(client);
+ if (err) {
+- dev_err(&client->dev, "gpio request/direction_output fail");
+- goto fail3;
++ dev_err(&client->dev, "gpio request/direction_output fail.\n");
++ goto cleanup_media;
++ }
++
++ err = atomisp_register_i2c_module(&flash->sd, NULL, LED_FLASH);
++ if (err) {
++ dev_err(&client->dev, "fail to register atomisp i2c module.\n");
++ goto uninit_gpio;
+ }
+- return atomisp_register_i2c_module(&flash->sd, NULL, LED_FLASH);
+-fail3:
++
++ return 0;
++
++uninit_gpio:
++ lm3554_gpio_uninit(client);
++cleanup_media:
+ media_entity_cleanup(&flash->sd.entity);
++free_handler:
+ v4l2_ctrl_handler_free(&flash->ctrl_handler);
+-fail2:
++unregister_subdev:
+ v4l2_device_unregister_subdev(&flash->sd);
+-fail1:
++free_flash:
+ kfree(flash);
+
+ return err;
+diff --git a/drivers/staging/media/atomisp/i2c/ov2680.h b/drivers/staging/media/atomisp/i2c/ov2680.h
+index 874115f35fcad..798b28e134b64 100644
+--- a/drivers/staging/media/atomisp/i2c/ov2680.h
++++ b/drivers/staging/media/atomisp/i2c/ov2680.h
+@@ -289,8 +289,6 @@ static struct ov2680_reg const ov2680_global_setting[] = {
+ */
+ static struct ov2680_reg const ov2680_QCIF_30fps[] = {
+ {0x3086, 0x01},
+- {0x3501, 0x24},
+- {0x3502, 0x40},
+ {0x370a, 0x23},
+ {0x3801, 0xa0},
+ {0x3802, 0x00},
+@@ -334,8 +332,6 @@ static struct ov2680_reg const ov2680_QCIF_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_CIF_30fps[] = {
+ {0x3086, 0x01},
+- {0x3501, 0x24},
+- {0x3502, 0x40},
+ {0x370a, 0x23},
+ {0x3801, 0xa0},
+ {0x3802, 0x00},
+@@ -377,8 +373,6 @@ static struct ov2680_reg const ov2680_CIF_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_QVGA_30fps[] = {
+ {0x3086, 0x01},
+- {0x3501, 0x24},
+- {0x3502, 0x40},
+ {0x370a, 0x23},
+ {0x3801, 0xa0},
+ {0x3802, 0x00},
+@@ -420,8 +414,6 @@ static struct ov2680_reg const ov2680_QVGA_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_656x496_30fps[] = {
+ {0x3086, 0x01},
+- {0x3501, 0x24},
+- {0x3502, 0x40},
+ {0x370a, 0x23},
+ {0x3801, 0xa0},
+ {0x3802, 0x00},
+@@ -463,8 +455,6 @@ static struct ov2680_reg const ov2680_656x496_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_720x592_30fps[] = {
+ {0x3086, 0x01},
+- {0x3501, 0x26},
+- {0x3502, 0x40},
+ {0x370a, 0x23},
+ {0x3801, 0x00}, // X_ADDR_START;
+ {0x3802, 0x00},
+@@ -508,8 +498,6 @@ static struct ov2680_reg const ov2680_720x592_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_800x600_30fps[] = {
+ {0x3086, 0x01},
+- {0x3501, 0x26},
+- {0x3502, 0x40},
+ {0x370a, 0x23},
+ {0x3801, 0x00},
+ {0x3802, 0x00},
+@@ -551,8 +539,6 @@ static struct ov2680_reg const ov2680_800x600_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_720p_30fps[] = {
+ {0x3086, 0x00},
+- {0x3501, 0x48},
+- {0x3502, 0xe0},
+ {0x370a, 0x21},
+ {0x3801, 0xa0},
+ {0x3802, 0x00},
+@@ -594,8 +580,6 @@ static struct ov2680_reg const ov2680_720p_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_1296x976_30fps[] = {
+ {0x3086, 0x00},
+- {0x3501, 0x48},
+- {0x3502, 0xe0},
+ {0x370a, 0x21},
+ {0x3801, 0xa0},
+ {0x3802, 0x00},
+@@ -637,8 +621,6 @@ static struct ov2680_reg const ov2680_1296x976_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_1456x1096_30fps[] = {
+ {0x3086, 0x00},
+- {0x3501, 0x48},
+- {0x3502, 0xe0},
+ {0x370a, 0x21},
+ {0x3801, 0x90},
+ {0x3802, 0x00},
+@@ -682,8 +664,6 @@ static struct ov2680_reg const ov2680_1456x1096_30fps[] = {
+
+ static struct ov2680_reg const ov2680_1616x916_30fps[] = {
+ {0x3086, 0x00},
+- {0x3501, 0x48},
+- {0x3502, 0xe0},
+ {0x370a, 0x21},
+ {0x3801, 0x00},
+ {0x3802, 0x00},
+@@ -726,8 +706,6 @@ static struct ov2680_reg const ov2680_1616x916_30fps[] = {
+ #if 0
+ static struct ov2680_reg const ov2680_1616x1082_30fps[] = {
+ {0x3086, 0x00},
+- {0x3501, 0x48},
+- {0x3502, 0xe0},
+ {0x370a, 0x21},
+ {0x3801, 0x00},
+ {0x3802, 0x00},
+@@ -769,8 +747,6 @@ static struct ov2680_reg const ov2680_1616x1082_30fps[] = {
+ */
+ static struct ov2680_reg const ov2680_1616x1216_30fps[] = {
+ {0x3086, 0x00},
+- {0x3501, 0x48},
+- {0x3502, 0xe0},
+ {0x370a, 0x21},
+ {0x3801, 0x00},
+ {0x3802, 0x00},
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_acc.c b/drivers/staging/media/atomisp/pci/atomisp_acc.c
+index 9a1751895ab03..28cb271663c47 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_acc.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_acc.c
+@@ -439,6 +439,18 @@ int atomisp_acc_s_mapped_arg(struct atomisp_sub_device *asd,
+ return 0;
+ }
+
++static void atomisp_acc_unload_some_extensions(struct atomisp_sub_device *asd,
++ int i,
++ struct atomisp_acc_fw *acc_fw)
++{
++ while (--i >= 0) {
++ if (acc_fw->flags & acc_flag_to_pipe[i].flag) {
++ atomisp_css_unload_acc_extension(asd, acc_fw->fw,
++ acc_flag_to_pipe[i].pipe_id);
++ }
++ }
++}
++
+ /*
+ * Appends the loaded acceleration binary extensions to the
+ * current ISP mode. Must be called just before sh_css_start().
+@@ -479,16 +491,20 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd)
+ acc_fw->fw,
+ acc_flag_to_pipe[i].pipe_id,
+ acc_fw->type);
+- if (ret)
++ if (ret) {
++ atomisp_acc_unload_some_extensions(asd, i, acc_fw);
+ goto error;
++ }
+
+ ext_loaded = true;
+ }
+ }
+
+ ret = atomisp_css_set_acc_parameters(acc_fw);
+- if (ret < 0)
++ if (ret < 0) {
++ atomisp_acc_unload_some_extensions(asd, i, acc_fw);
+ goto error;
++ }
+ }
+
+ if (!ext_loaded)
+@@ -497,6 +513,7 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd)
+ ret = atomisp_css_update_stream(asd);
+ if (ret) {
+ dev_err(isp->dev, "%s: update stream failed.\n", __func__);
++ atomisp_acc_unload_extensions(asd);
+ goto error;
+ }
+
+@@ -504,13 +521,6 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd)
+ return 0;
+
+ error:
+- while (--i >= 0) {
+- if (acc_fw->flags & acc_flag_to_pipe[i].flag) {
+- atomisp_css_unload_acc_extension(asd, acc_fw->fw,
+- acc_flag_to_pipe[i].pipe_id);
+- }
+- }
+-
+ list_for_each_entry_continue_reverse(acc_fw, &asd->acc.fw, list) {
+ if (acc_fw->type != ATOMISP_ACC_FW_LOAD_TYPE_OUTPUT &&
+ acc_fw->type != ATOMISP_ACC_FW_LOAD_TYPE_VIEWFINDER)
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+index 366161cff5602..88db9818e0839 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+@@ -899,9 +899,9 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error,
+ int err;
+ unsigned long irqflags;
+ struct ia_css_frame *frame = NULL;
+- struct atomisp_s3a_buf *s3a_buf = NULL, *_s3a_buf_tmp;
+- struct atomisp_dis_buf *dis_buf = NULL, *_dis_buf_tmp;
+- struct atomisp_metadata_buf *md_buf = NULL, *_md_buf_tmp;
++ struct atomisp_s3a_buf *s3a_buf = NULL, *_s3a_buf_tmp, *s3a_iter;
++ struct atomisp_dis_buf *dis_buf = NULL, *_dis_buf_tmp, *dis_iter;
++ struct atomisp_metadata_buf *md_buf = NULL, *_md_buf_tmp, *md_iter;
+ enum atomisp_metadata_type md_type;
+ struct atomisp_device *isp = asd->isp;
+ struct v4l2_control ctrl;
+@@ -940,60 +940,75 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error,
+
+ switch (buf_type) {
+ case IA_CSS_BUFFER_TYPE_3A_STATISTICS:
+- list_for_each_entry_safe(s3a_buf, _s3a_buf_tmp,
++ list_for_each_entry_safe(s3a_iter, _s3a_buf_tmp,
+ &asd->s3a_stats_in_css, list) {
+- if (s3a_buf->s3a_data ==
++ if (s3a_iter->s3a_data ==
+ buffer.css_buffer.data.stats_3a) {
+- list_del_init(&s3a_buf->list);
+- list_add_tail(&s3a_buf->list,
++ list_del_init(&s3a_iter->list);
++ list_add_tail(&s3a_iter->list,
+ &asd->s3a_stats_ready);
++ s3a_buf = s3a_iter;
+ break;
+ }
+ }
+
+ asd->s3a_bufs_in_css[css_pipe_id]--;
+ atomisp_3a_stats_ready_event(asd, buffer.css_buffer.exp_id);
+- dev_dbg(isp->dev, "%s: s3a stat with exp_id %d is ready\n",
+- __func__, s3a_buf->s3a_data->exp_id);
++ if (s3a_buf)
++ dev_dbg(isp->dev, "%s: s3a stat with exp_id %d is ready\n",
++ __func__, s3a_buf->s3a_data->exp_id);
++ else
++ dev_dbg(isp->dev, "%s: s3a stat is ready with no exp_id found\n",
++ __func__);
+ break;
+ case IA_CSS_BUFFER_TYPE_METADATA:
+ if (error)
+ break;
+
+ md_type = atomisp_get_metadata_type(asd, css_pipe_id);
+- list_for_each_entry_safe(md_buf, _md_buf_tmp,
++ list_for_each_entry_safe(md_iter, _md_buf_tmp,
+ &asd->metadata_in_css[md_type], list) {
+- if (md_buf->metadata ==
++ if (md_iter->metadata ==
+ buffer.css_buffer.data.metadata) {
+- list_del_init(&md_buf->list);
+- list_add_tail(&md_buf->list,
++ list_del_init(&md_iter->list);
++ list_add_tail(&md_iter->list,
+ &asd->metadata_ready[md_type]);
++ md_buf = md_iter;
+ break;
+ }
+ }
+ asd->metadata_bufs_in_css[stream_id][css_pipe_id]--;
+ atomisp_metadata_ready_event(asd, md_type);
+- dev_dbg(isp->dev, "%s: metadata with exp_id %d is ready\n",
+- __func__, md_buf->metadata->exp_id);
++ if (md_buf)
++ dev_dbg(isp->dev, "%s: metadata with exp_id %d is ready\n",
++ __func__, md_buf->metadata->exp_id);
++ else
++ dev_dbg(isp->dev, "%s: metadata is ready with no exp_id found\n",
++ __func__);
+ break;
+ case IA_CSS_BUFFER_TYPE_DIS_STATISTICS:
+- list_for_each_entry_safe(dis_buf, _dis_buf_tmp,
++ list_for_each_entry_safe(dis_iter, _dis_buf_tmp,
+ &asd->dis_stats_in_css, list) {
+- if (dis_buf->dis_data ==
++ if (dis_iter->dis_data ==
+ buffer.css_buffer.data.stats_dvs) {
+ spin_lock_irqsave(&asd->dis_stats_lock,
+ irqflags);
+- list_del_init(&dis_buf->list);
+- list_add(&dis_buf->list, &asd->dis_stats);
++ list_del_init(&dis_iter->list);
++ list_add(&dis_iter->list, &asd->dis_stats);
+ asd->params.dis_proj_data_valid = true;
+ spin_unlock_irqrestore(&asd->dis_stats_lock,
+ irqflags);
++ dis_buf = dis_iter;
+ break;
+ }
+ }
+ asd->dis_bufs_in_css--;
+- dev_dbg(isp->dev, "%s: dis stat with exp_id %d is ready\n",
+- __func__, dis_buf->dis_data->exp_id);
++ if (dis_buf)
++ dev_dbg(isp->dev, "%s: dis stat with exp_id %d is ready\n",
++ __func__, dis_buf->dis_data->exp_id);
++ else
++ dev_dbg(isp->dev, "%s: dis stat is ready with no exp_id found\n",
++ __func__);
+ break;
+ case IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME:
+ case IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME:
+@@ -1715,6 +1730,12 @@ void atomisp_wdt_refresh_pipe(struct atomisp_video_pipe *pipe,
+ {
+ unsigned long next;
+
++ if (!pipe->asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, pipe->vdev.name);
++ return;
++ }
++
+ if (delay != ATOMISP_WDT_KEEP_CURRENT_DELAY)
+ pipe->wdt_duration = delay;
+
+@@ -1777,6 +1798,12 @@ void atomisp_wdt_refresh(struct atomisp_sub_device *asd, unsigned int delay)
+ /* ISP2401 */
+ void atomisp_wdt_stop_pipe(struct atomisp_video_pipe *pipe, bool sync)
+ {
++ if (!pipe->asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, pipe->vdev.name);
++ return;
++ }
++
+ if (!atomisp_is_wdt_running(pipe))
+ return;
+
+@@ -4109,6 +4136,12 @@ void atomisp_handle_parameter_and_buffer(struct atomisp_video_pipe *pipe)
+ unsigned long irqflags;
+ bool need_to_enqueue_buffer = false;
+
++ if (!asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, pipe->vdev.name);
++ return;
++ }
++
+ if (atomisp_is_vf_pipe(pipe))
+ return;
+
+@@ -4196,6 +4229,12 @@ int atomisp_set_parameters(struct video_device *vdev,
+ struct atomisp_css_params *css_param = &asd->params.css_param;
+ int ret;
+
++ if (!asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (!asd->stream_env[ATOMISP_INPUT_STREAM_GENERAL].stream) {
+ dev_err(asd->isp->dev, "%s: internal error!\n", __func__);
+ return -EINVAL;
+@@ -4857,6 +4896,12 @@ int atomisp_try_fmt(struct video_device *vdev, struct v4l2_pix_format *f,
+ int source_pad = atomisp_subdev_source_pad(vdev);
+ int ret;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (!isp->inputs[asd->input_curr].camera)
+ return -EINVAL;
+
+@@ -5194,10 +5239,17 @@ static int atomisp_set_fmt_to_isp(struct video_device *vdev,
+ int (*configure_pp_input)(struct atomisp_sub_device *asd,
+ unsigned int width, unsigned int height) =
+ configure_pp_input_nop;
+- u16 stream_index = atomisp_source_pad_to_stream_id(asd, source_pad);
++ u16 stream_index;
+ const struct atomisp_in_fmt_conv *fc;
+ int ret, i;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++ stream_index = atomisp_source_pad_to_stream_id(asd, source_pad);
++
+ v4l2_fh_init(&fh.vfh, vdev);
+
+ isp_sink_crop = atomisp_subdev_get_rect(
+@@ -5493,7 +5545,8 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
+ unsigned int padding_w, unsigned int padding_h,
+ unsigned int dvs_env_w, unsigned int dvs_env_h)
+ {
+- struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
++ struct atomisp_video_pipe *pipe = atomisp_to_video_pipe(vdev);
++ struct atomisp_sub_device *asd = pipe->asd;
+ const struct atomisp_format_bridge *format;
+ struct v4l2_subdev_pad_config pad_cfg;
+ struct v4l2_subdev_state pad_state = {
+@@ -5504,7 +5557,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
+ };
+ struct v4l2_mbus_framefmt *ffmt = &vformat.format;
+ struct v4l2_mbus_framefmt *req_ffmt;
+- struct atomisp_device *isp = asd->isp;
++ struct atomisp_device *isp;
+ struct atomisp_input_stream_info *stream_info =
+ (struct atomisp_input_stream_info *)ffmt->reserved;
+ u16 stream_index = ATOMISP_INPUT_STREAM_GENERAL;
+@@ -5512,6 +5565,14 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
+ struct v4l2_subdev_fh fh;
+ int ret;
+
++ if (!asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
++ isp = asd->isp;
++
+ v4l2_fh_init(&fh.vfh, vdev);
+
+ stream_index = atomisp_source_pad_to_stream_id(asd, source_pad);
+@@ -5602,6 +5663,12 @@ int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f)
+ struct v4l2_subdev_fh fh;
+ int ret;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (source_pad >= ATOMISP_SUBDEV_PADS_NUM)
+ return -EINVAL;
+
+@@ -6034,6 +6101,12 @@ int atomisp_set_fmt_file(struct video_device *vdev, struct v4l2_format *f)
+ struct v4l2_subdev_fh fh;
+ int ret;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ v4l2_fh_init(&fh.vfh, vdev);
+
+ dev_dbg(isp->dev, "setting fmt %ux%u 0x%x for file inject\n",
+@@ -6359,6 +6432,12 @@ bool atomisp_is_vf_pipe(struct atomisp_video_pipe *pipe)
+ {
+ struct atomisp_sub_device *asd = pipe->asd;
+
++ if (!asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, pipe->vdev.name);
++ return false;
++ }
++
+ if (pipe == &asd->video_out_vf)
+ return true;
+
+@@ -6572,6 +6651,12 @@ static int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
+ {
+ struct atomisp_sub_device *asd = pipe->asd;
+
++ if (!asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, pipe->vdev.name);
++ return -EINVAL;
++ }
++
+ if (ATOMISP_USE_YUVPP(asd)) {
+ return IA_CSS_PIPE_ID_YUVPP;
+ } else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_SCALER) {
+@@ -6609,6 +6694,12 @@ int atomisp_get_invalid_frame_num(struct video_device *vdev,
+ struct ia_css_pipe_info p_info;
+ int ret;
+
++ if (!asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (asd->isp->inputs[asd->input_curr].camera_caps->
+ sensor[asd->sensor_curr].stream_num > 1) {
+ /* External ISP */
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp_fops.c
+index f82bf082aa796..18fff47bd25d2 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_fops.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_fops.c
+@@ -877,6 +877,11 @@ done:
+ else
+ pipe->users++;
+ rt_mutex_unlock(&isp->mutex);
++
++ /* Ensure that a mode is set */
++ if (asd)
++ v4l2_ctrl_s_ctrl(asd->run_mode, pipe->default_run_mode);
++
+ return 0;
+
+ css_error:
+@@ -1171,6 +1176,12 @@ static int atomisp_mmap(struct file *file, struct vm_area_struct *vma)
+ u32 origin_size, new_size;
+ int ret;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (!(vma->vm_flags & (VM_WRITE | VM_READ)))
+ return -EACCES;
+
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c
+index d8c9e31314b2e..2a8ef766b25a4 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c
+@@ -481,7 +481,7 @@ fail:
+
+ static u8 gmin_get_pmic_id_and_addr(struct device *dev)
+ {
+- struct i2c_client *power;
++ struct i2c_client *power = NULL;
+ static u8 pmic_i2c_addr;
+
+ if (pmic_id)
+@@ -729,6 +729,21 @@ static int axp_regulator_set(struct device *dev, struct gmin_subdev *gs,
+ return 0;
+ }
+
++/*
++ * Some boards contain a hw-bug where turning eldo2 back on after having turned
++ * it off causes the CPLM3218 ambient-light-sensor on the image-sensor's I2C bus
++ * to crash, hanging the bus. Do not turn eldo2 off on these systems.
++ */
++static const struct dmi_system_id axp_leave_eldo2_on_ids[] = {
++ {
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "TrekStor"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "SurfTab duo W1 10.1 (VT4)"),
++ },
++ },
++ { }
++};
++
+ static int axp_v1p8_on(struct device *dev, struct gmin_subdev *gs)
+ {
+ int ret;
+@@ -763,6 +778,9 @@ static int axp_v1p8_off(struct device *dev, struct gmin_subdev *gs)
+ if (ret)
+ return ret;
+
++ if (dmi_check_system(axp_leave_eldo2_on_ids))
++ return 0;
++
+ ret = axp_regulator_set(dev, gs, gs->eldo2_sel_reg, gs->eldo2_1p8v,
+ ELDO_CTRL_REG, gs->eldo2_ctrl_shift, false);
+ return ret;
+@@ -1180,7 +1198,7 @@ static int gmin_get_config_dsm_var(struct device *dev,
+ dev_info(dev, "found _DSM entry for '%s': %s\n", var,
+ cur->string.pointer);
+ strscpy(out, cur->string.pointer, *out_len);
+- *out_len = strlen(cur->string.pointer);
++ *out_len = strlen(out);
+
+ ACPI_FREE(obj);
+ return 0;
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
+index c8a625667e81e..b7dda4b96d49c 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
+@@ -646,6 +646,12 @@ static int atomisp_g_input(struct file *file, void *fh, unsigned int *input)
+ struct atomisp_device *isp = video_get_drvdata(vdev);
+ struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ rt_mutex_lock(&isp->mutex);
+ *input = asd->input_curr;
+ rt_mutex_unlock(&isp->mutex);
+@@ -665,6 +671,12 @@ static int atomisp_s_input(struct file *file, void *fh, unsigned int input)
+ struct v4l2_subdev *motor;
+ int ret;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ rt_mutex_lock(&isp->mutex);
+ if (input >= ATOM_ISP_MAX_INPUTS || input >= isp->input_cnt) {
+ dev_dbg(isp->dev, "input_cnt: %d\n", isp->input_cnt);
+@@ -761,18 +773,33 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh,
+ struct video_device *vdev = video_devdata(file);
+ struct atomisp_device *isp = video_get_drvdata(vdev);
+ struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+- struct v4l2_subdev_mbus_code_enum code = { 0 };
++ struct v4l2_subdev_mbus_code_enum code = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
++ struct v4l2_subdev *camera;
+ unsigned int i, fi = 0;
+ int rval;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
++ camera = isp->inputs[asd->input_curr].camera;
++ if(!camera) {
++ dev_err(isp->dev, "%s(): camera is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ rt_mutex_lock(&isp->mutex);
+- rval = v4l2_subdev_call(isp->inputs[asd->input_curr].camera, pad,
+- enum_mbus_code, NULL, &code);
++
++ rval = v4l2_subdev_call(camera, pad, enum_mbus_code, NULL, &code);
+ if (rval == -ENOIOCTLCMD) {
+ dev_warn(isp->dev,
+- "enum_mbus_code pad op not supported. Please fix your sensor driver!\n");
+- // rval = v4l2_subdev_call(isp->inputs[asd->input_curr].camera,
+- // video, enum_mbus_fmt, 0, &code.code);
++ "enum_mbus_code pad op not supported by %s. Please fix your sensor driver!\n",
++ camera->name);
+ }
+ rt_mutex_unlock(&isp->mutex);
+
+@@ -802,6 +829,8 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh,
+ f->pixelformat = format->pixelformat;
+ return 0;
+ }
++ dev_err(isp->dev, "%s(): format for code %x not found.\n",
++ __func__, code.code);
+
+ return -EINVAL;
+ }
+@@ -834,6 +863,72 @@ static int atomisp_g_fmt_file(struct file *file, void *fh,
+ return 0;
+ }
+
++static int atomisp_adjust_fmt(struct v4l2_format *f)
++{
++ const struct atomisp_format_bridge *format_bridge;
++ u32 padded_width;
++
++ format_bridge = atomisp_get_format_bridge(f->fmt.pix.pixelformat);
++
++ padded_width = f->fmt.pix.width + pad_w;
++
++ if (format_bridge->planar) {
++ f->fmt.pix.bytesperline = padded_width;
++ f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height *
++ DIV_ROUND_UP(format_bridge->depth *
++ padded_width, 8));
++ } else {
++ f->fmt.pix.bytesperline = DIV_ROUND_UP(format_bridge->depth *
++ padded_width, 8);
++ f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height * f->fmt.pix.bytesperline);
++ }
++
++ if (f->fmt.pix.field == V4L2_FIELD_ANY)
++ f->fmt.pix.field = V4L2_FIELD_NONE;
++
++ format_bridge = atomisp_get_format_bridge(f->fmt.pix.pixelformat);
++ if (!format_bridge)
++ return -EINVAL;
++
++ /* Currently, raw formats are broken!!! */
++ if (format_bridge->sh_fmt == IA_CSS_FRAME_FORMAT_RAW) {
++ f->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420;
++
++ format_bridge = atomisp_get_format_bridge(f->fmt.pix.pixelformat);
++ if (!format_bridge)
++ return -EINVAL;
++ }
++
++ padded_width = f->fmt.pix.width + pad_w;
++
++ if (format_bridge->planar) {
++ f->fmt.pix.bytesperline = padded_width;
++ f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height *
++ DIV_ROUND_UP(format_bridge->depth *
++ padded_width, 8));
++ } else {
++ f->fmt.pix.bytesperline = DIV_ROUND_UP(format_bridge->depth *
++ padded_width, 8);
++ f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height * f->fmt.pix.bytesperline);
++ }
++
++ if (f->fmt.pix.field == V4L2_FIELD_ANY)
++ f->fmt.pix.field = V4L2_FIELD_NONE;
++
++ /*
++ * FIXME: do we need to setup this differently, depending on the
++ * sensor or the pipeline?
++ */
++ f->fmt.pix.colorspace = V4L2_COLORSPACE_REC709;
++ f->fmt.pix.ycbcr_enc = V4L2_YCBCR_ENC_709;
++ f->fmt.pix.xfer_func = V4L2_XFER_FUNC_709;
++
++ f->fmt.pix.width -= pad_w;
++ f->fmt.pix.height -= pad_h;
++
++ return 0;
++}
++
+ /* This function looks up the closest available resolution. */
+ static int atomisp_try_fmt_cap(struct file *file, void *fh,
+ struct v4l2_format *f)
+@@ -845,7 +940,11 @@ static int atomisp_try_fmt_cap(struct file *file, void *fh,
+ rt_mutex_lock(&isp->mutex);
+ ret = atomisp_try_fmt(vdev, &f->fmt.pix, NULL);
+ rt_mutex_unlock(&isp->mutex);
+- return ret;
++
++ if (ret)
++ return ret;
++
++ return atomisp_adjust_fmt(f);
+ }
+
+ static int atomisp_s_fmt_cap(struct file *file, void *fh,
+@@ -1024,9 +1123,16 @@ int __atomisp_reqbufs(struct file *file, void *fh,
+ struct ia_css_frame *frame;
+ struct videobuf_vmalloc_memory *vm_mem;
+ u16 source_pad = atomisp_subdev_source_pad(vdev);
+- u16 stream_id = atomisp_source_pad_to_stream_id(asd, source_pad);
++ u16 stream_id;
+ int ret = 0, i = 0;
+
++ if (!asd) {
++ dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++ stream_id = atomisp_source_pad_to_stream_id(asd, source_pad);
++
+ if (req->count == 0) {
+ mutex_lock(&pipe->capq.vb_lock);
+ if (!list_empty(&pipe->capq.stream))
+@@ -1154,6 +1260,12 @@ static int atomisp_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
+ u32 pgnr;
+ int ret = 0;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ rt_mutex_lock(&isp->mutex);
+ if (isp->isp_fatal_error) {
+ ret = -EIO;
+@@ -1389,6 +1501,12 @@ static int atomisp_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
+ struct atomisp_device *isp = video_get_drvdata(vdev);
+ int ret = 0;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ rt_mutex_lock(&isp->mutex);
+
+ if (isp->isp_fatal_error) {
+@@ -1640,6 +1758,12 @@ static int atomisp_streamon(struct file *file, void *fh,
+ int ret = 0;
+ unsigned long irqflags;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ dev_dbg(isp->dev, "Start stream on pad %d for asd%d\n",
+ atomisp_subdev_source_pad(vdev), asd->index);
+
+@@ -1901,6 +2025,12 @@ int __atomisp_streamoff(struct file *file, void *fh, enum v4l2_buf_type type)
+ unsigned long flags;
+ bool first_streamoff = false;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ dev_dbg(isp->dev, "Stop stream on pad %d for asd%d\n",
+ atomisp_subdev_source_pad(vdev), asd->index);
+
+@@ -2150,6 +2280,12 @@ static int atomisp_g_ctrl(struct file *file, void *fh,
+ struct atomisp_device *isp = video_get_drvdata(vdev);
+ int i, ret = -EINVAL;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ for (i = 0; i < ctrls_num; i++) {
+ if (ci_v4l2_controls[i].id == control->id) {
+ ret = 0;
+@@ -2229,6 +2365,12 @@ static int atomisp_s_ctrl(struct file *file, void *fh,
+ struct atomisp_device *isp = video_get_drvdata(vdev);
+ int i, ret = -EINVAL;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ for (i = 0; i < ctrls_num; i++) {
+ if (ci_v4l2_controls[i].id == control->id) {
+ ret = 0;
+@@ -2310,6 +2452,12 @@ static int atomisp_queryctl(struct file *file, void *fh,
+ struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+ struct atomisp_device *isp = video_get_drvdata(vdev);
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ switch (qc->id) {
+ case V4L2_CID_FOCUS_ABSOLUTE:
+ case V4L2_CID_FOCUS_RELATIVE:
+@@ -2355,6 +2503,12 @@ static int atomisp_camera_g_ext_ctrls(struct file *file, void *fh,
+ int i;
+ int ret = 0;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (!IS_ISP2401)
+ motor = isp->inputs[asd->input_curr].motor;
+ else
+@@ -2466,6 +2620,12 @@ static int atomisp_camera_s_ext_ctrls(struct file *file, void *fh,
+ int i;
+ int ret = 0;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (!IS_ISP2401)
+ motor = isp->inputs[asd->input_curr].motor;
+ else
+@@ -2591,6 +2751,12 @@ static int atomisp_g_parm(struct file *file, void *fh,
+ struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+ struct atomisp_device *isp = video_get_drvdata(vdev);
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+ dev_err(isp->dev, "unsupported v4l2 buf type\n");
+ return -EINVAL;
+@@ -2613,6 +2779,12 @@ static int atomisp_s_parm(struct file *file, void *fh,
+ int rval;
+ int fps;
+
++ if (!asd) {
++ dev_err(isp->dev, "%s(): asd is NULL, device is %s\n",
++ __func__, vdev->name);
++ return -EINVAL;
++ }
++
+ if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+ dev_err(isp->dev, "unsupported v4l2 buf type\n");
+ return -EINVAL;
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
+index 12f22ad007c73..ffaf11e0b0ad8 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
+@@ -1164,23 +1164,28 @@ static int isp_subdev_init_entities(struct atomisp_sub_device *asd)
+
+ atomisp_init_acc_pipe(asd, &asd->video_acc);
+
+- ret = atomisp_video_init(&asd->video_in, "MEMORY");
++ ret = atomisp_video_init(&asd->video_in, "MEMORY",
++ ATOMISP_RUN_MODE_SDV);
+ if (ret < 0)
+ return ret;
+
+- ret = atomisp_video_init(&asd->video_out_capture, "CAPTURE");
++ ret = atomisp_video_init(&asd->video_out_capture, "CAPTURE",
++ ATOMISP_RUN_MODE_STILL_CAPTURE);
+ if (ret < 0)
+ return ret;
+
+- ret = atomisp_video_init(&asd->video_out_vf, "VIEWFINDER");
++ ret = atomisp_video_init(&asd->video_out_vf, "VIEWFINDER",
++ ATOMISP_RUN_MODE_CONTINUOUS_CAPTURE);
+ if (ret < 0)
+ return ret;
+
+- ret = atomisp_video_init(&asd->video_out_preview, "PREVIEW");
++ ret = atomisp_video_init(&asd->video_out_preview, "PREVIEW",
++ ATOMISP_RUN_MODE_PREVIEW);
+ if (ret < 0)
+ return ret;
+
+- ret = atomisp_video_init(&asd->video_out_video_capture, "VIDEO");
++ ret = atomisp_video_init(&asd->video_out_video_capture, "VIDEO",
++ ATOMISP_RUN_MODE_VIDEO);
+ if (ret < 0)
+ return ret;
+
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.h b/drivers/staging/media/atomisp/pci/atomisp_subdev.h
+index d6fcfab6352d7..a8d210ea5f8be 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_subdev.h
++++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.h
+@@ -81,6 +81,9 @@ struct atomisp_video_pipe {
+ /* the link list to store per_frame parameters */
+ struct list_head per_frame_params;
+
++ /* Store here the initial run mode */
++ unsigned int default_run_mode;
++
+ unsigned int buffers_in_css;
+
+ /* irq_lock is used to protect video buffer state change operations and
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
+index 1e324f1f656e5..14c39b8987c95 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
+@@ -447,7 +447,8 @@ const struct atomisp_dfs_config dfs_config_cht_soc = {
+ .dfs_table_size = ARRAY_SIZE(dfs_rules_cht_soc),
+ };
+
+-int atomisp_video_init(struct atomisp_video_pipe *video, const char *name)
++int atomisp_video_init(struct atomisp_video_pipe *video, const char *name,
++ unsigned int run_mode)
+ {
+ int ret;
+ const char *direction;
+@@ -478,6 +479,7 @@ int atomisp_video_init(struct atomisp_video_pipe *video, const char *name)
+ "ATOMISP ISP %s %s", name, direction);
+ video->vdev.release = video_device_release_empty;
+ video_set_drvdata(&video->vdev, video->isp);
++ video->default_run_mode = run_mode;
+
+ return 0;
+ }
+@@ -711,15 +713,15 @@ static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
+
+ dev_dbg(isp->dev, "IUNIT power-%s.\n", enable ? "on" : "off");
+
+- /*WA:Enable DVFS*/
++ /* WA for P-Unit, if DVFS enabled, ISP timeout observed */
+ if (IS_CHT && enable)
+- punit_ddr_dvfs_enable(true);
++ punit_ddr_dvfs_enable(false);
+
+ /*
+ * FIXME:WA for ECS28A, with this sleep, CTS
+ * android.hardware.camera2.cts.CameraDeviceTest#testCameraDeviceAbort
+ * PASS, no impact on other platforms
+- */
++ */
+ if (IS_BYT && enable)
+ msleep(10);
+
+@@ -727,7 +729,7 @@ static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
+ iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, MRFLD_ISPSSPM0,
+ val, MRFLD_ISPSSPM0_ISPSSC_MASK);
+
+- /*WA:Enable DVFS*/
++ /* WA:Enable DVFS */
+ if (IS_CHT && !enable)
+ punit_ddr_dvfs_enable(true);
+
+@@ -1182,6 +1184,7 @@ static void atomisp_unregister_entities(struct atomisp_device *isp)
+
+ v4l2_device_unregister(&isp->v4l2_dev);
+ media_device_unregister(&isp->media_dev);
++ media_device_cleanup(&isp->media_dev);
+ }
+
+ static int atomisp_register_entities(struct atomisp_device *isp)
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.h b/drivers/staging/media/atomisp/pci/atomisp_v4l2.h
+index 81bb356b81720..72611b8286a4a 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.h
++++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.h
+@@ -27,7 +27,8 @@ struct v4l2_device;
+ struct atomisp_device;
+ struct firmware;
+
+-int atomisp_video_init(struct atomisp_video_pipe *video, const char *name);
++int atomisp_video_init(struct atomisp_video_pipe *video, const char *name,
++ unsigned int run_mode);
+ void atomisp_acc_init(struct atomisp_acc_pipe *video, const char *name);
+ void atomisp_video_unregister(struct atomisp_video_pipe *video);
+ void atomisp_acc_unregister(struct atomisp_acc_pipe *video);
+diff --git a/drivers/staging/media/atomisp/pci/hmm/hmm.c b/drivers/staging/media/atomisp/pci/hmm/hmm.c
+index 6a5ee46070898..c1cda16f2dc01 100644
+--- a/drivers/staging/media/atomisp/pci/hmm/hmm.c
++++ b/drivers/staging/media/atomisp/pci/hmm/hmm.c
+@@ -39,7 +39,7 @@
+ struct hmm_bo_device bo_device;
+ struct hmm_pool dynamic_pool;
+ struct hmm_pool reserved_pool;
+-static ia_css_ptr dummy_ptr;
++static ia_css_ptr dummy_ptr = mmgr_EXCEPTION;
+ static bool hmm_initialized;
+ struct _hmm_mem_stat hmm_mem_stat;
+
+@@ -209,7 +209,7 @@ int hmm_init(void)
+
+ void hmm_cleanup(void)
+ {
+- if (!dummy_ptr)
++ if (dummy_ptr == mmgr_EXCEPTION)
+ return;
+ sysfs_remove_group(&atomisp_dev->kobj, atomisp_attribute_group);
+
+@@ -288,7 +288,8 @@ void hmm_free(ia_css_ptr virt)
+
+ dev_dbg(atomisp_dev, "%s: free 0x%08x\n", __func__, virt);
+
+- WARN_ON(!virt);
++ if (WARN_ON(virt == mmgr_EXCEPTION))
++ return;
+
+ bo = hmm_bo_device_search_start(&bo_device, (unsigned int)virt);
+
+diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
+index c4b35cbab3737..ba25d0da8b811 100644
+--- a/drivers/staging/media/atomisp/pci/sh_css.c
++++ b/drivers/staging/media/atomisp/pci/sh_css.c
+@@ -522,6 +522,7 @@ ia_css_stream_input_format_bits_per_pixel(struct ia_css_stream *stream)
+ return bpp;
+ }
+
++/* TODO: move define to proper file in tools */
+ #define GP_ISEL_TPG_MODE 0x90058
+
+ #if !defined(ISP2401)
+@@ -573,12 +574,8 @@ sh_css_config_input_network(struct ia_css_stream *stream)
+ vblank_cycles = vblank_lines * (width + hblank_cycles);
+ sh_css_sp_configure_sync_gen(width, height, hblank_cycles,
+ vblank_cycles);
+- if (!IS_ISP2401) {
+- if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_TPG) {
+- /* TODO: move define to proper file in tools */
+- ia_css_device_store_uint32(GP_ISEL_TPG_MODE, 0);
+- }
+- }
++ if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_TPG)
++ ia_css_device_store_uint32(GP_ISEL_TPG_MODE, 0);
+ }
+ ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
+ "sh_css_config_input_network() leave:\n");
+@@ -1009,16 +1006,14 @@ static bool sh_css_translate_stream_cfg_to_isys_stream_descr(
+ * ia_css_isys_stream_capture_indication() instead of
+ * ia_css_pipeline_sp_wait_for_isys_stream_N() as isp processing of
+ * capture takes longer than getting an ISYS frame
+- *
+- * Only 2401 relevant ??
+ */
+-#if 0 // FIXME: NOT USED on Yocto Aero
+- isys_stream_descr->polling_mode
+- = early_polling ? INPUT_SYSTEM_POLL_ON_CAPTURE_REQUEST
+- : INPUT_SYSTEM_POLL_ON_WAIT_FOR_FRAME;
+- ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
+- "sh_css_translate_stream_cfg_to_isys_stream_descr() leave:\n");
+-#endif
++ if (IS_ISP2401) {
++ isys_stream_descr->polling_mode
++ = early_polling ? INPUT_SYSTEM_POLL_ON_CAPTURE_REQUEST
++ : INPUT_SYSTEM_POLL_ON_WAIT_FOR_FRAME;
++ ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
++ "sh_css_translate_stream_cfg_to_isys_stream_descr() leave:\n");
++ }
+
+ return rc;
+ }
+@@ -1433,7 +1428,7 @@ static void start_pipe(
+
+ assert(me); /* all callers are in this file and call with non null argument */
+
+- if (!IS_ISP2401) {
++ if (IS_ISP2401) {
+ coord = &me->config.internal_frame_origin_bqs_on_sctbl;
+ params = me->stream->isp_params_configs;
+ }
+diff --git a/drivers/staging/media/atomisp/pci/sh_css_mipi.c b/drivers/staging/media/atomisp/pci/sh_css_mipi.c
+index 75489f7d75eec..c1f2f6151c5f8 100644
+--- a/drivers/staging/media/atomisp/pci/sh_css_mipi.c
++++ b/drivers/staging/media/atomisp/pci/sh_css_mipi.c
+@@ -374,17 +374,17 @@ static bool buffers_needed(struct ia_css_pipe *pipe)
+ {
+ if (!IS_ISP2401) {
+ if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR)
+- return false;
+- else
+ return true;
++ else
++ return false;
+ }
+
+ if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR ||
+ pipe->stream->config.mode == IA_CSS_INPUT_MODE_TPG ||
+ pipe->stream->config.mode == IA_CSS_INPUT_MODE_PRBS)
+- return false;
++ return true;
+
+- return true;
++ return false;
+ }
+
+ int
+@@ -423,14 +423,17 @@ allocate_mipi_frames(struct ia_css_pipe *pipe,
+ return 0; /* AM TODO: Check */
+ }
+
+- if (!IS_ISP2401)
++ if (!IS_ISP2401) {
+ port = (unsigned int)pipe->stream->config.source.port.port;
+- else
+- err = ia_css_mipi_is_source_port_valid(pipe, &port);
++ } else {
++ /* Returns true if port is valid. So, invert it */
++ err = !ia_css_mipi_is_source_port_valid(pipe, &port);
++ }
+
+ assert(port < N_CSI_PORTS);
+
+- if (port >= N_CSI_PORTS || err) {
++ if ((!IS_ISP2401 && port >= N_CSI_PORTS) ||
++ (IS_ISP2401 && err)) {
+ ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
+ "allocate_mipi_frames(%p) exit: error: port is not correct (port=%d).\n",
+ pipe, port);
+@@ -552,14 +555,17 @@ free_mipi_frames(struct ia_css_pipe *pipe)
+ return err;
+ }
+
+- if (!IS_ISP2401)
++ if (!IS_ISP2401) {
+ port = (unsigned int)pipe->stream->config.source.port.port;
+- else
+- err = ia_css_mipi_is_source_port_valid(pipe, &port);
++ } else {
++ /* Returns true if port is valid. So, invert it */
++ err = !ia_css_mipi_is_source_port_valid(pipe, &port);
++ }
+
+ assert(port < N_CSI_PORTS);
+
+- if (port >= N_CSI_PORTS || err) {
++ if ((!IS_ISP2401 && port >= N_CSI_PORTS) ||
++ (IS_ISP2401 && err)) {
+ ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
+ "free_mipi_frames(%p, %d) exit: error: pipe port is not correct.\n",
+ pipe, port);
+@@ -663,14 +669,17 @@ send_mipi_frames(struct ia_css_pipe *pipe)
+ /* TODO: AM: maybe this should be returning an error. */
+ }
+
+- if (!IS_ISP2401)
++ if (!IS_ISP2401) {
+ port = (unsigned int)pipe->stream->config.source.port.port;
+- else
+- err = ia_css_mipi_is_source_port_valid(pipe, &port);
++ } else {
++ /* Returns true if port is valid. So, invert it */
++ err = !ia_css_mipi_is_source_port_valid(pipe, &port);
++ }
+
+ assert(port < N_CSI_PORTS);
+
+- if (port >= N_CSI_PORTS || err) {
++ if ((!IS_ISP2401 && port >= N_CSI_PORTS) ||
++ (IS_ISP2401 && err)) {
+ IA_CSS_ERROR("send_mipi_frames(%p) exit: invalid port specified (port=%d).\n",
+ pipe, port);
+ return err;
+diff --git a/drivers/staging/media/atomisp/pci/sh_css_params.c b/drivers/staging/media/atomisp/pci/sh_css_params.c
+index dbd3bfe3d343c..deecffd438aeb 100644
+--- a/drivers/staging/media/atomisp/pci/sh_css_params.c
++++ b/drivers/staging/media/atomisp/pci/sh_css_params.c
+@@ -962,8 +962,8 @@ sh_css_set_black_frame(struct ia_css_stream *stream,
+ params->fpn_config.data = NULL;
+ }
+ if (!params->fpn_config.data) {
+- params->fpn_config.data = kvmalloc(height * width *
+- sizeof(short), GFP_KERNEL);
++ params->fpn_config.data = kvmalloc(array3_size(height, width, sizeof(short)),
++ GFP_KERNEL);
+ if (!params->fpn_config.data) {
+ IA_CSS_ERROR("out of memory");
+ IA_CSS_LEAVE_ERR_PRIVATE(-ENOMEM);
+@@ -2431,7 +2431,7 @@ sh_css_create_isp_params(struct ia_css_stream *stream,
+ unsigned int i;
+ struct sh_css_ddr_address_map *ddr_ptrs;
+ struct sh_css_ddr_address_map_size *ddr_ptrs_size;
+- int err = 0;
++ int err;
+ size_t params_size;
+ struct ia_css_isp_parameters *params =
+ kvmalloc(sizeof(struct ia_css_isp_parameters), GFP_KERNEL);
+@@ -2473,7 +2473,11 @@ sh_css_create_isp_params(struct ia_css_stream *stream,
+ succ &= (ddr_ptrs->macc_tbl != mmgr_NULL);
+
+ *isp_params_out = params;
+- return err;
++
++ if (!succ)
++ return -ENOMEM;
++
++ return 0;
+ }
+
+ static bool
+diff --git a/drivers/staging/media/av7110/av7110_av.c b/drivers/staging/media/av7110/av7110_av.c
+index 91f4866c7e59b..964092e2f41fd 100644
+--- a/drivers/staging/media/av7110/av7110_av.c
++++ b/drivers/staging/media/av7110/av7110_av.c
+@@ -823,10 +823,10 @@ static int write_ts_to_decoder(struct av7110 *av7110, int type, const u8 *buf, s
+ av7110_ipack_flush(ipack);
+
+ if (buf[3] & ADAPT_FIELD) {
++ if (buf[4] > len - 1 - 4)
++ return 0;
+ len -= buf[4] + 1;
+ buf += buf[4] + 1;
+- if (!len)
+- return 0;
+ }
+
+ av7110_ipack_instant_repack(buf + 4, len - 4, ipack);
+diff --git a/drivers/staging/media/av7110/sp8870.c b/drivers/staging/media/av7110/sp8870.c
+index 9767159aeb9b2..abf5c72607b64 100644
+--- a/drivers/staging/media/av7110/sp8870.c
++++ b/drivers/staging/media/av7110/sp8870.c
+@@ -606,4 +606,4 @@ MODULE_DESCRIPTION("Spase SP8870 DVB-T Demodulator driver");
+ MODULE_AUTHOR("Juergen Peitz");
+ MODULE_LICENSE("GPL");
+
+-EXPORT_SYMBOL(sp8870_attach);
++EXPORT_SYMBOL_GPL(sp8870_attach);
+diff --git a/drivers/staging/media/hantro/hantro.h b/drivers/staging/media/hantro/hantro.h
+index c2e2dca38628a..88792c863edc1 100644
+--- a/drivers/staging/media/hantro/hantro.h
++++ b/drivers/staging/media/hantro/hantro.h
+@@ -262,6 +262,7 @@ struct hantro_ctx {
+ * @max_depth: Maximum depth, for bitstream formats
+ * @enc_fmt: Format identifier for encoder registers.
+ * @frmsize: Supported range of frame sizes (only for bitstream formats).
++ * @postprocessed: Indicates if this format needs the post-processor.
+ */
+ struct hantro_fmt {
+ char *name;
+@@ -271,6 +272,7 @@ struct hantro_fmt {
+ int max_depth;
+ enum hantro_enc_fmt enc_fmt;
+ struct v4l2_frmsize_stepwise frmsize;
++ bool postprocessed;
+ };
+
+ struct hantro_reg {
+diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
+index 20e5081588719..281aa585e3375 100644
+--- a/drivers/staging/media/hantro/hantro_drv.c
++++ b/drivers/staging/media/hantro/hantro_drv.c
+@@ -958,7 +958,7 @@ static int hantro_probe(struct platform_device *pdev)
+ ret = clk_bulk_prepare(vpu->variant->num_clocks, vpu->clocks);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to prepare clocks\n");
+- return ret;
++ goto err_pm_disable;
+ }
+
+ ret = v4l2_device_register(&pdev->dev, &vpu->v4l2_dev);
+@@ -1014,6 +1014,7 @@ err_v4l2_unreg:
+ v4l2_device_unregister(&vpu->v4l2_dev);
+ err_clk_unprepare:
+ clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks);
++err_pm_disable:
+ pm_runtime_dont_use_autosuspend(vpu->dev);
+ pm_runtime_disable(vpu->dev);
+ return ret;
+diff --git a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c
+index 340efb57fd185..bcdfa359de7f1 100644
+--- a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c
++++ b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c
+@@ -74,7 +74,7 @@ static void prepare_tile_info_buffer(struct hantro_ctx *ctx)
+ no_chroma = 1;
+ for (j = 0, tmp_w = 0; j < num_tile_cols - 1; j++) {
+ tmp_w += pps->column_width_minus1[j] + 1;
+- *p++ = pps->column_width_minus1[j + 1];
++ *p++ = pps->column_width_minus1[j] + 1;
+ *p++ = h;
+ if (i == 0 && h == 1 && ctb_size == 16)
+ no_chroma = 1;
+@@ -194,13 +194,8 @@ static void set_params(struct hantro_ctx *ctx)
+ hantro_reg_write(vpu, &g2_max_cu_qpd_depth, 0);
+ }
+
+- if (pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT) {
+- hantro_reg_write(vpu, &g2_cb_qp_offset, pps->pps_cb_qp_offset);
+- hantro_reg_write(vpu, &g2_cr_qp_offset, pps->pps_cr_qp_offset);
+- } else {
+- hantro_reg_write(vpu, &g2_cb_qp_offset, 0);
+- hantro_reg_write(vpu, &g2_cr_qp_offset, 0);
+- }
++ hantro_reg_write(vpu, &g2_cb_qp_offset, pps->pps_cb_qp_offset);
++ hantro_reg_write(vpu, &g2_cr_qp_offset, pps->pps_cr_qp_offset);
+
+ hantro_reg_write(vpu, &g2_filt_offset_beta, pps->pps_beta_offset_div2);
+ hantro_reg_write(vpu, &g2_filt_offset_tc, pps->pps_tc_offset_div2);
+@@ -269,24 +264,11 @@ static void set_params(struct hantro_ctx *ctx)
+ hantro_reg_write(vpu, &g2_apf_threshold, 8);
+ }
+
+-static int find_ref_pic_index(const struct v4l2_hevc_dpb_entry *dpb, int pic_order_cnt)
+-{
+- int i;
+-
+- for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
+- if (dpb[i].pic_order_cnt[0] == pic_order_cnt)
+- return i;
+- }
+-
+- return 0x0;
+-}
+-
+ static void set_ref_pic_list(struct hantro_ctx *ctx)
+ {
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ struct hantro_dev *vpu = ctx->dev;
+ const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+- const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
+ u32 list0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {};
+ u32 list1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {};
+ static const struct hantro_reg ref_pic_regs0[] = {
+@@ -330,11 +312,11 @@ static void set_ref_pic_list(struct hantro_ctx *ctx)
+ /* List 0 contains: short term before, short term after and long term */
+ j = 0;
+ for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list0); i++)
+- list0[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_before[i]);
++ list0[j++] = decode_params->poc_st_curr_before[i];
+ for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list0); i++)
+- list0[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_after[i]);
++ list0[j++] = decode_params->poc_st_curr_after[i];
+ for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list0); i++)
+- list0[j++] = find_ref_pic_index(dpb, decode_params->poc_lt_curr[i]);
++ list0[j++] = decode_params->poc_lt_curr[i];
+
+ /* Fill the list, copying over and over */
+ i = 0;
+@@ -343,11 +325,11 @@ static void set_ref_pic_list(struct hantro_ctx *ctx)
+
+ j = 0;
+ for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list1); i++)
+- list1[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_after[i]);
++ list1[j++] = decode_params->poc_st_curr_after[i];
+ for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list1); i++)
+- list1[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_before[i]);
++ list1[j++] = decode_params->poc_st_curr_before[i];
+ for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list1); i++)
+- list1[j++] = find_ref_pic_index(dpb, decode_params->poc_lt_curr[i]);
++ list1[j++] = decode_params->poc_lt_curr[i];
+
+ i = 0;
+ while (j < ARRAY_SIZE(list1))
+@@ -431,7 +413,7 @@ static int set_ref(struct hantro_ctx *ctx)
+
+ set_ref_pic_list(ctx);
+
+- /* We will only keep the references picture that are still used */
++ /* We will only keep the reference pictures that are still used */
+ ctx->hevc_dec.ref_bufs_used = 0;
+
+ /* Set up addresses of DPB buffers */
+diff --git a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c
+index 56cf261a8e958..686d813f5c626 100644
+--- a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c
++++ b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c
+@@ -23,7 +23,7 @@ static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu,
+
+ reg = H1_REG_IN_IMG_CTRL_ROW_LEN(pix_fmt->width)
+ | H1_REG_IN_IMG_CTRL_OVRFLR_D4(0)
+- | H1_REG_IN_IMG_CTRL_OVRFLB_D4(0)
++ | H1_REG_IN_IMG_CTRL_OVRFLB(0)
+ | H1_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
+ vepu_write_relaxed(vpu, reg, H1_REG_IN_IMG_CTRL);
+ }
+@@ -140,7 +140,7 @@ int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx)
+ return 0;
+ }
+
+-void hantro_jpeg_enc_done(struct hantro_ctx *ctx)
++void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx)
+ {
+ struct hantro_dev *vpu = ctx->dev;
+ u32 bytesused = vepu_read(vpu, H1_REG_STR_BUF_LIMIT) / 8;
+diff --git a/drivers/staging/media/hantro/hantro_h1_regs.h b/drivers/staging/media/hantro/hantro_h1_regs.h
+index d6e9825bb5c7b..30e7e7b920b55 100644
+--- a/drivers/staging/media/hantro/hantro_h1_regs.h
++++ b/drivers/staging/media/hantro/hantro_h1_regs.h
+@@ -47,7 +47,7 @@
+ #define H1_REG_IN_IMG_CTRL 0x03c
+ #define H1_REG_IN_IMG_CTRL_ROW_LEN(x) ((x) << 12)
+ #define H1_REG_IN_IMG_CTRL_OVRFLR_D4(x) ((x) << 10)
+-#define H1_REG_IN_IMG_CTRL_OVRFLB_D4(x) ((x) << 6)
++#define H1_REG_IN_IMG_CTRL_OVRFLB(x) ((x) << 6)
+ #define H1_REG_IN_IMG_CTRL_FMT(x) ((x) << 2)
+ #define H1_REG_ENC_CTRL0 0x040
+ #define H1_REG_ENC_CTRL0_INIT_QP(x) ((x) << 26)
+diff --git a/drivers/staging/media/hantro/hantro_h264.c b/drivers/staging/media/hantro/hantro_h264.c
+index 0b4d2491be3b8..228629fb3cdf9 100644
+--- a/drivers/staging/media/hantro/hantro_h264.c
++++ b/drivers/staging/media/hantro/hantro_h264.c
+@@ -354,8 +354,6 @@ u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx)
+
+ if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
+ return 0;
+- if (dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
+- return dpb->pic_num;
+ return dpb->frame_num;
+ }
+
+diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c
+index 5347f5a41c2ac..7ce98a2b16558 100644
+--- a/drivers/staging/media/hantro/hantro_hevc.c
++++ b/drivers/staging/media/hantro/hantro_hevc.c
+@@ -98,7 +98,7 @@ dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
+ struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+ int i;
+
+- /* Find the reference buffer in already know ones */
++ /* Find the reference buffer in already known ones */
+ for (i = 0; i < NUM_REF_PICTURES; i++) {
+ if (hevc_dec->ref_bufs_poc[i] == poc) {
+ hevc_dec->ref_bufs_used |= 1 << i;
+diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
+index df7b5e3a57b9b..fd738653a5735 100644
+--- a/drivers/staging/media/hantro/hantro_hw.h
++++ b/drivers/staging/media/hantro/hantro_hw.h
+@@ -235,7 +235,8 @@ int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx);
+ int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx);
+ int hantro_jpeg_enc_init(struct hantro_ctx *ctx);
+ void hantro_jpeg_enc_exit(struct hantro_ctx *ctx);
+-void hantro_jpeg_enc_done(struct hantro_ctx *ctx);
++void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx);
++void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx);
+
+ dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
+ unsigned int dpb_idx);
+diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/staging/media/hantro/hantro_postproc.c
+index ed8916c950a4f..46434c97317bd 100644
+--- a/drivers/staging/media/hantro/hantro_postproc.c
++++ b/drivers/staging/media/hantro/hantro_postproc.c
+@@ -53,15 +53,9 @@ const struct hantro_postproc_regs hantro_g1_postproc_regs = {
+ bool hantro_needs_postproc(const struct hantro_ctx *ctx,
+ const struct hantro_fmt *fmt)
+ {
+- struct hantro_dev *vpu = ctx->dev;
+-
+ if (ctx->is_encoder)
+ return false;
+-
+- if (!vpu->variant->postproc_fmts)
+- return false;
+-
+- return fmt->fourcc != V4L2_PIX_FMT_NV12;
++ return fmt->postprocessed;
+ }
+
+ void hantro_postproc_enable(struct hantro_ctx *ctx)
+@@ -132,9 +126,10 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
+ unsigned int num_buffers = cap_queue->num_buffers;
+ unsigned int i, buf_size;
+
+- buf_size = ctx->dst_fmt.plane_fmt[0].sizeimage +
+- hantro_h264_mv_size(ctx->dst_fmt.width,
+- ctx->dst_fmt.height);
++ buf_size = ctx->dst_fmt.plane_fmt[0].sizeimage;
++ if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
++ buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
++ ctx->dst_fmt.height);
+
+ for (i = 0; i < num_buffers; ++i) {
+ struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
+diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
+index bcb0bdff4a9a6..629bf40a5e5cb 100644
+--- a/drivers/staging/media/hantro/hantro_v4l2.c
++++ b/drivers/staging/media/hantro/hantro_v4l2.c
+@@ -647,8 +647,12 @@ static int hantro_buf_prepare(struct vb2_buffer *vb)
+ * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets
+ * it to buffer length).
+ */
+- if (V4L2_TYPE_IS_CAPTURE(vq->type))
+- vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage);
++ if (V4L2_TYPE_IS_CAPTURE(vq->type)) {
++ if (ctx->is_encoder)
++ vb2_set_plane_payload(vb, 0, 0);
++ else
++ vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage);
++ }
+
+ return 0;
+ }
+diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
+index ea919bfb9891a..b692b74b09149 100644
+--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
++++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
+@@ -82,6 +82,7 @@ static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_YUYV,
+ .codec_mode = HANTRO_MODE_NONE,
++ .postprocessed = true,
+ },
+ };
+
+diff --git a/drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c b/drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c
+index 991213ce16108..5d9ff420f0b5f 100644
+--- a/drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c
++++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c
+@@ -171,3 +171,20 @@ int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx)
+
+ return 0;
+ }
++
++void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx)
++{
++ struct hantro_dev *vpu = ctx->dev;
++ u32 bytesused = vepu_read(vpu, VEPU_REG_STR_BUF_LIMIT) / 8;
++ struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
++
++ /*
++ * TODO: Rework the JPEG encoder to eliminate the need
++ * for a bounce buffer.
++ */
++ memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) +
++ ctx->vpu_dst_fmt->header_size,
++ ctx->jpeg_enc.bounce_buffer.cpu, bytesused);
++ vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
++ ctx->vpu_dst_fmt->header_size + bytesused);
++}
+diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
+index d4f52957cc534..543dc4a5486c0 100644
+--- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
++++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
+@@ -62,6 +62,7 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_YUYV,
+ .codec_mode = HANTRO_MODE_NONE,
++ .postprocessed = true,
+ },
+ };
+
+@@ -343,7 +344,7 @@ static const struct hantro_codec_ops rk3066_vpu_codec_ops[] = {
+ .run = hantro_h1_jpeg_enc_run,
+ .reset = rockchip_vpu1_enc_reset,
+ .init = hantro_jpeg_enc_init,
+- .done = hantro_jpeg_enc_done,
++ .done = hantro_h1_jpeg_enc_done,
+ .exit = hantro_jpeg_enc_exit,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+@@ -371,7 +372,7 @@ static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
+ .run = hantro_h1_jpeg_enc_run,
+ .reset = rockchip_vpu1_enc_reset,
+ .init = hantro_jpeg_enc_init,
+- .done = hantro_jpeg_enc_done,
++ .done = hantro_h1_jpeg_enc_done,
+ .exit = hantro_jpeg_enc_exit,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+@@ -399,6 +400,7 @@ static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = {
+ .run = rockchip_vpu2_jpeg_enc_run,
+ .reset = rockchip_vpu2_enc_reset,
+ .init = hantro_jpeg_enc_init,
++ .done = rockchip_vpu2_jpeg_enc_done,
+ .exit = hantro_jpeg_enc_exit,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
+index 9c3b8cd0b2394..99432008b2414 100644
+--- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c
++++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
+@@ -15,6 +15,7 @@ static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_YUYV,
+ .codec_mode = HANTRO_MODE_NONE,
++ .postprocessed = true,
+ },
+ };
+
+diff --git a/drivers/staging/media/imx/imx-media-capture.c b/drivers/staging/media/imx/imx-media-capture.c
+index 93ba092360105..5cc67786b9169 100644
+--- a/drivers/staging/media/imx/imx-media-capture.c
++++ b/drivers/staging/media/imx/imx-media-capture.c
+@@ -501,14 +501,14 @@ static int capture_legacy_g_parm(struct file *file, void *fh,
+ struct v4l2_streamparm *a)
+ {
+ struct capture_priv *priv = video_drvdata(file);
+- struct v4l2_subdev_frame_interval fi;
++ struct v4l2_subdev_frame_interval fi = {
++ .pad = priv->src_sd_pad,
++ };
+ int ret;
+
+ if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ return -EINVAL;
+
+- memset(&fi, 0, sizeof(fi));
+- fi.pad = priv->src_sd_pad;
+ ret = v4l2_subdev_call(priv->src_sd, video, g_frame_interval, &fi);
+ if (ret < 0)
+ return ret;
+@@ -523,14 +523,14 @@ static int capture_legacy_s_parm(struct file *file, void *fh,
+ struct v4l2_streamparm *a)
+ {
+ struct capture_priv *priv = video_drvdata(file);
+- struct v4l2_subdev_frame_interval fi;
++ struct v4l2_subdev_frame_interval fi = {
++ .pad = priv->src_sd_pad,
++ };
+ int ret;
+
+ if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ return -EINVAL;
+
+- memset(&fi, 0, sizeof(fi));
+- fi.pad = priv->src_sd_pad;
+ fi.interval = a->parm.capture.timeperframe;
+ ret = v4l2_subdev_call(priv->src_sd, video, s_frame_interval, &fi);
+ if (ret < 0)
+diff --git a/drivers/staging/media/imx/imx-media-dev-common.c b/drivers/staging/media/imx/imx-media-dev-common.c
+index d186179388d03..4d873726a461b 100644
+--- a/drivers/staging/media/imx/imx-media-dev-common.c
++++ b/drivers/staging/media/imx/imx-media-dev-common.c
+@@ -367,6 +367,8 @@ struct imx_media_dev *imx_media_dev_init(struct device *dev,
+ imxmd->v4l2_dev.notify = imx_media_notify;
+ strscpy(imxmd->v4l2_dev.name, "imx-media",
+ sizeof(imxmd->v4l2_dev.name));
++ snprintf(imxmd->md.bus_info, sizeof(imxmd->md.bus_info),
++ "platform:%s", dev_name(imxmd->md.dev));
+
+ media_device_init(&imxmd->md);
+
+diff --git a/drivers/staging/media/imx/imx-media-utils.c b/drivers/staging/media/imx/imx-media-utils.c
+index 6f90acf9c725c..49ba521dd9edd 100644
+--- a/drivers/staging/media/imx/imx-media-utils.c
++++ b/drivers/staging/media/imx/imx-media-utils.c
+@@ -432,15 +432,15 @@ int imx_media_init_cfg(struct v4l2_subdev *sd,
+ struct v4l2_subdev_state *sd_state)
+ {
+ struct v4l2_mbus_framefmt *mf_try;
+- struct v4l2_subdev_format format;
+ unsigned int pad;
+ int ret;
+
+ for (pad = 0; pad < sd->entity.num_pads; pad++) {
+- memset(&format, 0, sizeof(format));
++ struct v4l2_subdev_format format = {
++ .pad = pad,
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+
+- format.pad = pad;
+- format.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+ ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &format);
+ if (ret)
+ continue;
+diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
+index 41e33535de552..d35e523741168 100644
+--- a/drivers/staging/media/imx/imx7-mipi-csis.c
++++ b/drivers/staging/media/imx/imx7-mipi-csis.c
+@@ -32,7 +32,6 @@
+ #include <media/v4l2-subdev.h>
+
+ #define CSIS_DRIVER_NAME "imx7-mipi-csis"
+-#define CSIS_SUBDEV_NAME CSIS_DRIVER_NAME
+
+ #define CSIS_PAD_SINK 0
+ #define CSIS_PAD_SOURCE 1
+@@ -311,7 +310,6 @@ struct csi_state {
+ struct reset_control *mrst;
+ struct regulator *mipi_phy_regulator;
+ const struct mipi_csis_info *info;
+- u8 index;
+
+ struct v4l2_subdev sd;
+ struct media_pad pads[CSIS_PADS_NUM];
+@@ -1303,8 +1301,8 @@ static int mipi_csis_subdev_init(struct csi_state *state)
+
+ v4l2_subdev_init(sd, &mipi_csis_subdev_ops);
+ sd->owner = THIS_MODULE;
+- snprintf(sd->name, sizeof(sd->name), "%s.%d",
+- CSIS_SUBDEV_NAME, state->index);
++ snprintf(sd->name, sizeof(sd->name), "csis-%s",
++ dev_name(state->dev));
+
+ sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
+ sd->ctrl_handler = NULL;
+diff --git a/drivers/staging/media/imx/imx8mq-mipi-csi2.c b/drivers/staging/media/imx/imx8mq-mipi-csi2.c
+index a6f562009b9a0..1d28313dbed7d 100644
+--- a/drivers/staging/media/imx/imx8mq-mipi-csi2.c
++++ b/drivers/staging/media/imx/imx8mq-mipi-csi2.c
+@@ -398,9 +398,6 @@ static int imx8mq_mipi_csi_s_stream(struct v4l2_subdev *sd, int enable)
+ struct csi_state *state = mipi_sd_to_csi2_state(sd);
+ int ret = 0;
+
+- imx8mq_mipi_csi_write(state, CSI2RX_IRQ_MASK,
+- CSI2RX_IRQ_MASK_ULPS_STATUS_CHANGE);
+-
+ if (enable) {
+ ret = pm_runtime_resume_and_get(state->dev);
+ if (ret < 0)
+@@ -696,7 +693,7 @@ err_parse:
+ * Suspend/resume
+ */
+
+-static int imx8mq_mipi_csi_pm_suspend(struct device *dev, bool runtime)
++static int imx8mq_mipi_csi_pm_suspend(struct device *dev)
+ {
+ struct v4l2_subdev *sd = dev_get_drvdata(dev);
+ struct csi_state *state = mipi_sd_to_csi2_state(sd);
+@@ -708,36 +705,21 @@ static int imx8mq_mipi_csi_pm_suspend(struct device *dev, bool runtime)
+ imx8mq_mipi_csi_stop_stream(state);
+ imx8mq_mipi_csi_clk_disable(state);
+ state->state &= ~ST_POWERED;
+- if (!runtime)
+- state->state |= ST_SUSPENDED;
+ }
+
+ mutex_unlock(&state->lock);
+
+- ret = icc_set_bw(state->icc_path, 0, 0);
+- if (ret)
+- dev_err(dev, "icc_set_bw failed with %d\n", ret);
+-
+ return ret ? -EAGAIN : 0;
+ }
+
+-static int imx8mq_mipi_csi_pm_resume(struct device *dev, bool runtime)
++static int imx8mq_mipi_csi_pm_resume(struct device *dev)
+ {
+ struct v4l2_subdev *sd = dev_get_drvdata(dev);
+ struct csi_state *state = mipi_sd_to_csi2_state(sd);
+ int ret = 0;
+
+- ret = icc_set_bw(state->icc_path, 0, state->icc_path_bw);
+- if (ret) {
+- dev_err(dev, "icc_set_bw failed with %d\n", ret);
+- return ret;
+- }
+-
+ mutex_lock(&state->lock);
+
+- if (!runtime && !(state->state & ST_SUSPENDED))
+- goto unlock;
+-
+ if (!(state->state & ST_POWERED)) {
+ state->state |= ST_POWERED;
+ ret = imx8mq_mipi_csi_clk_enable(state);
+@@ -758,22 +740,60 @@ unlock:
+
+ static int __maybe_unused imx8mq_mipi_csi_suspend(struct device *dev)
+ {
+- return imx8mq_mipi_csi_pm_suspend(dev, false);
++ struct v4l2_subdev *sd = dev_get_drvdata(dev);
++ struct csi_state *state = mipi_sd_to_csi2_state(sd);
++ int ret;
++
++ ret = imx8mq_mipi_csi_pm_suspend(dev);
++ if (ret)
++ return ret;
++
++ state->state |= ST_SUSPENDED;
++
++ return ret;
+ }
+
+ static int __maybe_unused imx8mq_mipi_csi_resume(struct device *dev)
+ {
+- return imx8mq_mipi_csi_pm_resume(dev, false);
++ struct v4l2_subdev *sd = dev_get_drvdata(dev);
++ struct csi_state *state = mipi_sd_to_csi2_state(sd);
++
++ if (!(state->state & ST_SUSPENDED))
++ return 0;
++
++ return imx8mq_mipi_csi_pm_resume(dev);
+ }
+
+ static int __maybe_unused imx8mq_mipi_csi_runtime_suspend(struct device *dev)
+ {
+- return imx8mq_mipi_csi_pm_suspend(dev, true);
++ struct v4l2_subdev *sd = dev_get_drvdata(dev);
++ struct csi_state *state = mipi_sd_to_csi2_state(sd);
++ int ret;
++
++ ret = imx8mq_mipi_csi_pm_suspend(dev);
++ if (ret)
++ return ret;
++
++ ret = icc_set_bw(state->icc_path, 0, 0);
++ if (ret)
++ dev_err(dev, "icc_set_bw failed with %d\n", ret);
++
++ return ret;
+ }
+
+ static int __maybe_unused imx8mq_mipi_csi_runtime_resume(struct device *dev)
+ {
+- return imx8mq_mipi_csi_pm_resume(dev, true);
++ struct v4l2_subdev *sd = dev_get_drvdata(dev);
++ struct csi_state *state = mipi_sd_to_csi2_state(sd);
++ int ret;
++
++ ret = icc_set_bw(state->icc_path, 0, state->icc_path_bw);
++ if (ret) {
++ dev_err(dev, "icc_set_bw failed with %d\n", ret);
++ return ret;
++ }
++
++ return imx8mq_mipi_csi_pm_resume(dev);
+ }
+
+ static const struct dev_pm_ops imx8mq_mipi_csi_pm_ops = {
+@@ -921,7 +941,7 @@ static int imx8mq_mipi_csi_probe(struct platform_device *pdev)
+ /* Enable runtime PM. */
+ pm_runtime_enable(dev);
+ if (!pm_runtime_enabled(dev)) {
+- ret = imx8mq_mipi_csi_pm_resume(dev, true);
++ ret = imx8mq_mipi_csi_runtime_resume(dev);
+ if (ret < 0)
+ goto icc;
+ }
+@@ -934,7 +954,7 @@ static int imx8mq_mipi_csi_probe(struct platform_device *pdev)
+
+ cleanup:
+ pm_runtime_disable(&pdev->dev);
+- imx8mq_mipi_csi_pm_suspend(&pdev->dev, true);
++ imx8mq_mipi_csi_runtime_suspend(&pdev->dev);
+
+ media_entity_cleanup(&state->sd.entity);
+ v4l2_async_notifier_unregister(&state->notifier);
+@@ -958,7 +978,7 @@ static int imx8mq_mipi_csi_remove(struct platform_device *pdev)
+ v4l2_async_unregister_subdev(&state->sd);
+
+ pm_runtime_disable(&pdev->dev);
+- imx8mq_mipi_csi_pm_suspend(&pdev->dev, true);
++ imx8mq_mipi_csi_runtime_suspend(&pdev->dev);
+ media_entity_cleanup(&state->sd.entity);
+ mutex_destroy(&state->lock);
+ pm_runtime_set_suspended(&pdev->dev);
+diff --git a/drivers/staging/media/ipu3/ipu3-css-fw.c b/drivers/staging/media/ipu3/ipu3-css-fw.c
+index 45aff76198e2c..981693eed8155 100644
+--- a/drivers/staging/media/ipu3/ipu3-css-fw.c
++++ b/drivers/staging/media/ipu3/ipu3-css-fw.c
+@@ -124,12 +124,11 @@ int imgu_css_fw_init(struct imgu_css *css)
+ /* Check and display fw header info */
+
+ css->fwp = (struct imgu_fw_header *)css->fw->data;
+- if (css->fw->size < sizeof(struct imgu_fw_header *) ||
++ if (css->fw->size < struct_size(css->fwp, binary_header, 1) ||
+ css->fwp->file_header.h_size != sizeof(struct imgu_fw_bi_file_h))
+ goto bad_fw;
+- if (sizeof(struct imgu_fw_bi_file_h) +
+- css->fwp->file_header.binary_nr * sizeof(struct imgu_fw_info) >
+- css->fw->size)
++ if (struct_size(css->fwp, binary_header,
++ css->fwp->file_header.binary_nr) > css->fw->size)
+ goto bad_fw;
+
+ dev_info(dev, "loaded firmware version %.64s, %u binaries, %zu bytes\n",
+diff --git a/drivers/staging/media/ipu3/ipu3-css-fw.h b/drivers/staging/media/ipu3/ipu3-css-fw.h
+index 3c078f15a2959..c0bc57fd678a7 100644
+--- a/drivers/staging/media/ipu3/ipu3-css-fw.h
++++ b/drivers/staging/media/ipu3/ipu3-css-fw.h
+@@ -171,7 +171,7 @@ struct imgu_fw_bi_file_h {
+
+ struct imgu_fw_header {
+ struct imgu_fw_bi_file_h file_header;
+- struct imgu_fw_info binary_header[1]; /* binary_nr items */
++ struct imgu_fw_info binary_header[]; /* binary_nr items */
+ };
+
+ /******************* Firmware functions *******************/
+diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c
+index 38a2407645096..ed091418f7e73 100644
+--- a/drivers/staging/media/ipu3/ipu3-v4l2.c
++++ b/drivers/staging/media/ipu3/ipu3-v4l2.c
+@@ -188,37 +188,50 @@ static int imgu_subdev_set_fmt(struct v4l2_subdev *sd,
+ return 0;
+ }
+
++static struct v4l2_rect *
++imgu_subdev_get_crop(struct imgu_v4l2_subdev *sd,
++ struct v4l2_subdev_state *sd_state, unsigned int pad,
++ enum v4l2_subdev_format_whence which)
++{
++ if (which == V4L2_SUBDEV_FORMAT_TRY)
++ return v4l2_subdev_get_try_crop(&sd->subdev, sd_state, pad);
++ else
++ return &sd->rect.eff;
++}
++
++static struct v4l2_rect *
++imgu_subdev_get_compose(struct imgu_v4l2_subdev *sd,
++ struct v4l2_subdev_state *sd_state, unsigned int pad,
++ enum v4l2_subdev_format_whence which)
++{
++ if (which == V4L2_SUBDEV_FORMAT_TRY)
++ return v4l2_subdev_get_try_compose(&sd->subdev, sd_state, pad);
++ else
++ return &sd->rect.bds;
++}
++
+ static int imgu_subdev_get_selection(struct v4l2_subdev *sd,
+ struct v4l2_subdev_state *sd_state,
+ struct v4l2_subdev_selection *sel)
+ {
+- struct v4l2_rect *try_sel, *r;
+- struct imgu_v4l2_subdev *imgu_sd = container_of(sd,
+- struct imgu_v4l2_subdev,
+- subdev);
++ struct imgu_v4l2_subdev *imgu_sd =
++ container_of(sd, struct imgu_v4l2_subdev, subdev);
+
+ if (sel->pad != IMGU_NODE_IN)
+ return -EINVAL;
+
+ switch (sel->target) {
+ case V4L2_SEL_TGT_CROP:
+- try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
+- r = &imgu_sd->rect.eff;
+- break;
++ sel->r = *imgu_subdev_get_crop(imgu_sd, sd_state, sel->pad,
++ sel->which);
++ return 0;
+ case V4L2_SEL_TGT_COMPOSE:
+- try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad);
+- r = &imgu_sd->rect.bds;
+- break;
++ sel->r = *imgu_subdev_get_compose(imgu_sd, sd_state, sel->pad,
++ sel->which);
++ return 0;
+ default:
+ return -EINVAL;
+ }
+-
+- if (sel->which == V4L2_SUBDEV_FORMAT_TRY)
+- sel->r = *try_sel;
+- else
+- sel->r = *r;
+-
+- return 0;
+ }
+
+ static int imgu_subdev_set_selection(struct v4l2_subdev *sd,
+@@ -226,10 +239,9 @@ static int imgu_subdev_set_selection(struct v4l2_subdev *sd,
+ struct v4l2_subdev_selection *sel)
+ {
+ struct imgu_device *imgu = v4l2_get_subdevdata(sd);
+- struct imgu_v4l2_subdev *imgu_sd = container_of(sd,
+- struct imgu_v4l2_subdev,
+- subdev);
+- struct v4l2_rect *rect, *try_sel;
++ struct imgu_v4l2_subdev *imgu_sd =
++ container_of(sd, struct imgu_v4l2_subdev, subdev);
++ struct v4l2_rect *rect;
+
+ dev_dbg(&imgu->pci_dev->dev,
+ "set subdev %u sel which %u target 0x%4x rect [%ux%u]",
+@@ -241,22 +253,18 @@ static int imgu_subdev_set_selection(struct v4l2_subdev *sd,
+
+ switch (sel->target) {
+ case V4L2_SEL_TGT_CROP:
+- try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
+- rect = &imgu_sd->rect.eff;
++ rect = imgu_subdev_get_crop(imgu_sd, sd_state, sel->pad,
++ sel->which);
+ break;
+ case V4L2_SEL_TGT_COMPOSE:
+- try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad);
+- rect = &imgu_sd->rect.bds;
++ rect = imgu_subdev_get_compose(imgu_sd, sd_state, sel->pad,
++ sel->which);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+- if (sel->which == V4L2_SUBDEV_FORMAT_TRY)
+- *try_sel = sel->r;
+- else
+- *rect = sel->r;
+-
++ *rect = sel->r;
+ return 0;
+ }
+
+@@ -592,11 +600,12 @@ static const struct imgu_fmt *find_format(struct v4l2_format *f, u32 type)
+ static int imgu_vidioc_querycap(struct file *file, void *fh,
+ struct v4l2_capability *cap)
+ {
+- struct imgu_video_device *node = file_to_intel_imgu_node(file);
++ struct imgu_device *imgu = video_drvdata(file);
+
+ strscpy(cap->driver, IMGU_NAME, sizeof(cap->driver));
+ strscpy(cap->card, IMGU_NAME, sizeof(cap->card));
+- snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", node->name);
++ snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s",
++ pci_name(imgu->pci_dev));
+
+ return 0;
+ }
+@@ -696,7 +705,7 @@ static int imgu_fmt(struct imgu_device *imgu, unsigned int pipe, int node,
+
+ /* CSS expects some format on OUT queue */
+ if (i != IPU3_CSS_QUEUE_OUT &&
+- !imgu_pipe->nodes[inode].enabled) {
++ !imgu_pipe->nodes[inode].enabled && !try) {
+ fmts[i] = NULL;
+ continue;
+ }
+diff --git a/drivers/staging/media/meson/vdec/esparser.c b/drivers/staging/media/meson/vdec/esparser.c
+index db7022707ff8d..86ccc8937afca 100644
+--- a/drivers/staging/media/meson/vdec/esparser.c
++++ b/drivers/staging/media/meson/vdec/esparser.c
+@@ -328,7 +328,12 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
+
+ offset = esparser_get_offset(sess);
+
+- amvdec_add_ts(sess, vb->timestamp, vbuf->timecode, offset, vbuf->flags);
++ ret = amvdec_add_ts(sess, vb->timestamp, vbuf->timecode, offset, vbuf->flags);
++ if (ret) {
++ v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
++ return ret;
++ }
++
+ dev_dbg(core->dev, "esparser: ts = %llu pld_size = %u offset = %08X flags = %08X\n",
+ vb->timestamp, payload_size, offset, vbuf->flags);
+
+diff --git a/drivers/staging/media/meson/vdec/vdec.c b/drivers/staging/media/meson/vdec/vdec.c
+index e51d69c4729df..040ed56eb24f3 100644
+--- a/drivers/staging/media/meson/vdec/vdec.c
++++ b/drivers/staging/media/meson/vdec/vdec.c
+@@ -1105,6 +1105,7 @@ static int vdec_probe(struct platform_device *pdev)
+
+ err_vdev_release:
+ video_device_release(vdev);
++ v4l2_device_unregister(&core->v4l2_dev);
+ return ret;
+ }
+
+@@ -1113,6 +1114,7 @@ static int vdec_remove(struct platform_device *pdev)
+ struct amvdec_core *core = platform_get_drvdata(pdev);
+
+ video_unregister_device(core->vdev_dec);
++ v4l2_device_unregister(&core->v4l2_dev);
+
+ return 0;
+ }
+diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.c b/drivers/staging/media/meson/vdec/vdec_helpers.c
+index b9125c295d1d3..06fd66539797a 100644
+--- a/drivers/staging/media/meson/vdec/vdec_helpers.c
++++ b/drivers/staging/media/meson/vdec/vdec_helpers.c
+@@ -227,13 +227,16 @@ int amvdec_set_canvases(struct amvdec_session *sess,
+ }
+ EXPORT_SYMBOL_GPL(amvdec_set_canvases);
+
+-void amvdec_add_ts(struct amvdec_session *sess, u64 ts,
+- struct v4l2_timecode tc, u32 offset, u32 vbuf_flags)
++int amvdec_add_ts(struct amvdec_session *sess, u64 ts,
++ struct v4l2_timecode tc, u32 offset, u32 vbuf_flags)
+ {
+ struct amvdec_timestamp *new_ts;
+ unsigned long flags;
+
+ new_ts = kzalloc(sizeof(*new_ts), GFP_KERNEL);
++ if (!new_ts)
++ return -ENOMEM;
++
+ new_ts->ts = ts;
+ new_ts->tc = tc;
+ new_ts->offset = offset;
+@@ -242,6 +245,7 @@ void amvdec_add_ts(struct amvdec_session *sess, u64 ts,
+ spin_lock_irqsave(&sess->ts_spinlock, flags);
+ list_add_tail(&new_ts->list, &sess->timestamps);
+ spin_unlock_irqrestore(&sess->ts_spinlock, flags);
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(amvdec_add_ts);
+
+diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.h b/drivers/staging/media/meson/vdec/vdec_helpers.h
+index cfaed52ab5265..798e5a8a9b3f1 100644
+--- a/drivers/staging/media/meson/vdec/vdec_helpers.h
++++ b/drivers/staging/media/meson/vdec/vdec_helpers.h
+@@ -55,8 +55,8 @@ void amvdec_dst_buf_done_offset(struct amvdec_session *sess,
+ * @offset: offset in the VIFIFO where the associated packet was written
+ * @flags the vb2_v4l2_buffer flags
+ */
+-void amvdec_add_ts(struct amvdec_session *sess, u64 ts,
+- struct v4l2_timecode tc, u32 offset, u32 flags);
++int amvdec_add_ts(struct amvdec_session *sess, u64 ts,
++ struct v4l2_timecode tc, u32 offset, u32 flags);
+ void amvdec_remove_ts(struct amvdec_session *sess, u64 ts);
+
+ /**
+diff --git a/drivers/staging/media/meson/vdec/vdec_hevc.c b/drivers/staging/media/meson/vdec/vdec_hevc.c
+index 9530e580e57a2..afced435c9070 100644
+--- a/drivers/staging/media/meson/vdec/vdec_hevc.c
++++ b/drivers/staging/media/meson/vdec/vdec_hevc.c
+@@ -167,8 +167,12 @@ static int vdec_hevc_start(struct amvdec_session *sess)
+
+ clk_set_rate(core->vdec_hevc_clk, 666666666);
+ ret = clk_prepare_enable(core->vdec_hevc_clk);
+- if (ret)
++ if (ret) {
++ if (core->platform->revision == VDEC_REVISION_G12A ||
++ core->platform->revision == VDEC_REVISION_SM1)
++ clk_disable_unprepare(core->vdec_hevcf_clk);
+ return ret;
++ }
+
+ if (core->platform->revision == VDEC_REVISION_SM1)
+ regmap_update_bits(core->regmap_ao, AO_RTI_GEN_PWR_SLEEP0,
+diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c
+index d0da083deed53..801e145ea976a 100644
+--- a/drivers/staging/media/omap4iss/iss_video.c
++++ b/drivers/staging/media/omap4iss/iss_video.c
+@@ -244,7 +244,9 @@ static int
+ __iss_video_get_format(struct iss_video *video,
+ struct v4l2_mbus_framefmt *format)
+ {
+- struct v4l2_subdev_format fmt;
++ struct v4l2_subdev_format fmt = {
++ .which = V4L2_SUBDEV_FORMAT_ACTIVE,
++ };
+ struct v4l2_subdev *subdev;
+ u32 pad;
+ int ret;
+@@ -253,9 +255,7 @@ __iss_video_get_format(struct iss_video *video,
+ if (!subdev)
+ return -EINVAL;
+
+- memset(&fmt, 0, sizeof(fmt));
+ fmt.pad = pad;
+- fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+
+ mutex_lock(&video->mutex);
+ ret = v4l2_subdev_call(subdev, pad, get_fmt, NULL, &fmt);
+diff --git a/drivers/staging/media/rkvdec/rkvdec-h264.c b/drivers/staging/media/rkvdec/rkvdec-h264.c
+index 76e97cbe25123..438252fa19441 100644
+--- a/drivers/staging/media/rkvdec/rkvdec-h264.c
++++ b/drivers/staging/media/rkvdec/rkvdec-h264.c
+@@ -112,6 +112,7 @@ struct rkvdec_h264_run {
+ const struct v4l2_ctrl_h264_sps *sps;
+ const struct v4l2_ctrl_h264_pps *pps;
+ const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix;
++ int ref_buf_idx[V4L2_H264_NUM_DPB_ENTRIES];
+ };
+
+ struct rkvdec_h264_ctx {
+@@ -661,8 +662,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
+ WRITE_PPS(0xff, PROFILE_IDC);
+ WRITE_PPS(1, CONSTRAINT_SET3_FLAG);
+ WRITE_PPS(sps->chroma_format_idc, CHROMA_FORMAT_IDC);
+- WRITE_PPS(sps->bit_depth_luma_minus8 + 8, BIT_DEPTH_LUMA);
+- WRITE_PPS(sps->bit_depth_chroma_minus8 + 8, BIT_DEPTH_CHROMA);
++ WRITE_PPS(sps->bit_depth_luma_minus8, BIT_DEPTH_LUMA);
++ WRITE_PPS(sps->bit_depth_chroma_minus8, BIT_DEPTH_CHROMA);
+ WRITE_PPS(0, QPPRIME_Y_ZERO_TRANSFORM_BYPASS_FLAG);
+ WRITE_PPS(sps->log2_max_frame_num_minus4, LOG2_MAX_FRAME_NUM_MINUS4);
+ WRITE_PPS(sps->max_num_ref_frames, MAX_NUM_REF_FRAMES);
+@@ -725,6 +726,26 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx,
+ }
+ }
+
++static void lookup_ref_buf_idx(struct rkvdec_ctx *ctx,
++ struct rkvdec_h264_run *run)
++{
++ const struct v4l2_ctrl_h264_decode_params *dec_params = run->decode_params;
++ u32 i;
++
++ for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) {
++ struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
++ const struct v4l2_h264_dpb_entry *dpb = run->decode_params->dpb;
++ struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q;
++ int buf_idx = -1;
++
++ if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
++ buf_idx = vb2_find_timestamp(cap_q,
++ dpb[i].reference_ts, 0);
++
++ run->ref_buf_idx[i] = buf_idx;
++ }
++}
++
+ static void assemble_hw_rps(struct rkvdec_ctx *ctx,
+ struct rkvdec_h264_run *run)
+ {
+@@ -762,7 +783,7 @@ static void assemble_hw_rps(struct rkvdec_ctx *ctx,
+
+ for (j = 0; j < RKVDEC_NUM_REFLIST; j++) {
+ for (i = 0; i < h264_ctx->reflists.num_valid; i++) {
+- u8 dpb_valid = 0;
++ bool dpb_valid = run->ref_buf_idx[i] >= 0;
+ u8 idx = 0;
+
+ switch (j) {
+@@ -779,8 +800,6 @@ static void assemble_hw_rps(struct rkvdec_ctx *ctx,
+
+ if (idx >= ARRAY_SIZE(dec_params->dpb))
+ continue;
+- dpb_valid = !!(dpb[idx].flags &
+- V4L2_H264_DPB_ENTRY_FLAG_ACTIVE);
+
+ set_ps_field(hw_rps, DPB_INFO(i, j),
+ idx | dpb_valid << 4);
+@@ -859,13 +878,8 @@ get_ref_buf(struct rkvdec_ctx *ctx, struct rkvdec_h264_run *run,
+ unsigned int dpb_idx)
+ {
+ struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
+- const struct v4l2_h264_dpb_entry *dpb = run->decode_params->dpb;
+ struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q;
+- int buf_idx = -1;
+-
+- if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
+- buf_idx = vb2_find_timestamp(cap_q,
+- dpb[dpb_idx].reference_ts, 0);
++ int buf_idx = run->ref_buf_idx[dpb_idx];
+
+ /*
+ * If a DPB entry is unused or invalid, address of current destination
+@@ -1015,8 +1029,9 @@ static int rkvdec_h264_adjust_fmt(struct rkvdec_ctx *ctx,
+ struct v4l2_pix_format_mplane *fmt = &f->fmt.pix_mp;
+
+ fmt->num_planes = 1;
+- fmt->plane_fmt[0].sizeimage = fmt->width * fmt->height *
+- RKVDEC_H264_MAX_DEPTH_IN_BYTES;
++ if (!fmt->plane_fmt[0].sizeimage)
++ fmt->plane_fmt[0].sizeimage = fmt->width * fmt->height *
++ RKVDEC_H264_MAX_DEPTH_IN_BYTES;
+ return 0;
+ }
+
+@@ -1101,6 +1116,7 @@ static int rkvdec_h264_run(struct rkvdec_ctx *ctx)
+
+ assemble_hw_scaling_list(ctx, &run);
+ assemble_hw_pps(ctx, &run);
++ lookup_ref_buf_idx(ctx, &run);
+ assemble_hw_rps(ctx, &run);
+ config_registers(ctx, &run);
+
+@@ -1108,8 +1124,8 @@ static int rkvdec_h264_run(struct rkvdec_ctx *ctx)
+
+ schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000));
+
+- writel(0xffffffff, rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN);
+- writel(0xffffffff, rkvdec->regs + RKVDEC_REG_H264_ERR_E);
++ writel(0, rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN);
++ writel(0, rkvdec->regs + RKVDEC_REG_H264_ERR_E);
+ writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND);
+ writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND);
+
+diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c
+index 7131156c1f2cf..29b68a13674ee 100644
+--- a/drivers/staging/media/rkvdec/rkvdec.c
++++ b/drivers/staging/media/rkvdec/rkvdec.c
+@@ -111,7 +111,7 @@ static const struct rkvdec_coded_fmt_desc rkvdec_coded_fmts[] = {
+ .max_width = 4096,
+ .step_width = 16,
+ .min_height = 48,
+- .max_height = 2304,
++ .max_height = 2560,
+ .step_height = 16,
+ },
+ .ctrls = &rkvdec_h264_ctrls,
+@@ -280,31 +280,20 @@ static int rkvdec_try_output_fmt(struct file *file, void *priv,
+ return 0;
+ }
+
+-static int rkvdec_s_fmt(struct file *file, void *priv,
+- struct v4l2_format *f,
+- int (*try_fmt)(struct file *, void *,
+- struct v4l2_format *))
++static int rkvdec_s_capture_fmt(struct file *file, void *priv,
++ struct v4l2_format *f)
+ {
+ struct rkvdec_ctx *ctx = fh_to_rkvdec_ctx(priv);
+ struct vb2_queue *vq;
++ int ret;
+
+- if (!try_fmt)
+- return -EINVAL;
+-
+- vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
++ /* Change not allowed if queue is busy */
++ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
++ V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ if (vb2_is_busy(vq))
+ return -EBUSY;
+
+- return try_fmt(file, priv, f);
+-}
+-
+-static int rkvdec_s_capture_fmt(struct file *file, void *priv,
+- struct v4l2_format *f)
+-{
+- struct rkvdec_ctx *ctx = fh_to_rkvdec_ctx(priv);
+- int ret;
+-
+- ret = rkvdec_s_fmt(file, priv, f, rkvdec_try_capture_fmt);
++ ret = rkvdec_try_capture_fmt(file, priv, f);
+ if (ret)
+ return ret;
+
+@@ -319,9 +308,20 @@ static int rkvdec_s_output_fmt(struct file *file, void *priv,
+ struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
+ const struct rkvdec_coded_fmt_desc *desc;
+ struct v4l2_format *cap_fmt;
+- struct vb2_queue *peer_vq;
++ struct vb2_queue *peer_vq, *vq;
+ int ret;
+
++ /*
++ * In order to support dynamic resolution change, the decoder admits
++ * a resolution change, as long as the pixelformat remains. Can't be
++ * done if streaming.
++ */
++ vq = v4l2_m2m_get_vq(m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
++ if (vb2_is_streaming(vq) ||
++ (vb2_is_busy(vq) &&
++ f->fmt.pix_mp.pixelformat != ctx->coded_fmt.fmt.pix_mp.pixelformat))
++ return -EBUSY;
++
+ /*
+ * Since format change on the OUTPUT queue will reset the CAPTURE
+ * queue, we can't allow doing so when the CAPTURE queue has buffers
+@@ -331,7 +331,7 @@ static int rkvdec_s_output_fmt(struct file *file, void *priv,
+ if (vb2_is_busy(peer_vq))
+ return -EBUSY;
+
+- ret = rkvdec_s_fmt(file, priv, f, rkvdec_try_output_fmt);
++ ret = rkvdec_try_output_fmt(file, priv, f);
+ if (ret)
+ return ret;
+
+@@ -967,7 +967,6 @@ static const char * const rkvdec_clk_names[] = {
+ static int rkvdec_probe(struct platform_device *pdev)
+ {
+ struct rkvdec_dev *rkvdec;
+- struct resource *res;
+ unsigned int i;
+ int ret, irq;
+
+@@ -999,8 +998,7 @@ static int rkvdec_probe(struct platform_device *pdev)
+ */
+ clk_set_rate(rkvdec->clocks[0].clk, 500 * 1000 * 1000);
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- rkvdec->regs = devm_ioremap_resource(&pdev->dev, res);
++ rkvdec->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(rkvdec->regs))
+ return PTR_ERR(rkvdec->regs);
+
+@@ -1044,6 +1042,8 @@ static int rkvdec_remove(struct platform_device *pdev)
+ {
+ struct rkvdec_dev *rkvdec = platform_get_drvdata(pdev);
+
++ cancel_delayed_work_sync(&rkvdec->watchdog_work);
++
+ rkvdec_v4l2_cleanup(rkvdec);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
+index c0d005dafc6c0..b1755407547b6 100644
+--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
++++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
+@@ -369,6 +369,8 @@ static int cedrus_probe(struct platform_device *pdev)
+ if (!dev)
+ return -ENOMEM;
+
++ platform_set_drvdata(pdev, dev);
++
+ dev->vfd = cedrus_video_device;
+ dev->dev = &pdev->dev;
+ dev->pdev = pdev;
+@@ -440,8 +442,6 @@ static int cedrus_probe(struct platform_device *pdev)
+ goto err_m2m_mc;
+ }
+
+- platform_set_drvdata(pdev, dev);
+-
+ return 0;
+
+ err_m2m_mc:
+diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
+index de7442d4834dc..d3e26bfe6c90b 100644
+--- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
++++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
+@@ -38,7 +38,7 @@ struct cedrus_h264_sram_ref_pic {
+
+ #define CEDRUS_H264_FRAME_NUM 18
+
+-#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K)
++#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K)
+ #define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K)
+
+ static void cedrus_h264_write_sram(struct cedrus_dev *dev,
+diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
+index ef0311a16d019..f2d9603a9ffbf 100644
+--- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
++++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
+@@ -23,7 +23,7 @@
+ * Subsequent BSP implementations seem to double the neighbor info buffer size
+ * for the H6 SoC, which may be related to 10 bit H265 support.
+ */
+-#define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (397 * SZ_1K)
++#define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (794 * SZ_1K)
+ #define CEDRUS_H265_ENTRY_POINTS_BUF_SIZE (4 * SZ_1K)
+ #define CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE 160
+
+@@ -147,6 +147,9 @@ static void cedrus_h265_frame_info_write_dpb(struct cedrus_ctx *ctx,
+ dpb[i].pic_order_cnt[1]
+ };
+
++ if (buffer_index < 0)
++ continue;
++
+ cedrus_h265_frame_info_write_single(ctx, i, dpb[i].field_pic,
+ pic_order_cnt,
+ buffer_index);
+@@ -231,8 +234,9 @@ static void cedrus_h265_skip_bits(struct cedrus_dev *dev, int num)
+ cedrus_write(dev, VE_DEC_H265_TRIGGER,
+ VE_DEC_H265_TRIGGER_FLUSH_BITS |
+ VE_DEC_H265_TRIGGER_TYPE_N_BITS(tmp));
+- while (cedrus_read(dev, VE_DEC_H265_STATUS) & VE_DEC_H265_STATUS_VLD_BUSY)
+- udelay(1);
++
++ if (cedrus_wait_for(dev, VE_DEC_H265_STATUS, VE_DEC_H265_STATUS_VLD_BUSY))
++ dev_err_ratelimited(dev->dev, "timed out waiting to skip bits\n");
+
+ count += tmp;
+ }
+@@ -495,7 +499,6 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
+
+ reg = VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(slice_params->slice_tc_offset_div2) |
+ VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(slice_params->slice_beta_offset_div2) |
+- VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_POC_BIGEST_IN_RPS_ST(decode_params->num_poc_st_curr_after == 0) |
+ VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(slice_params->slice_cr_qp_offset) |
+ VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(slice_params->slice_cb_qp_offset) |
+ VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(slice_params->slice_qp_delta);
+@@ -508,6 +511,9 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED,
+ slice_params->flags);
+
++ if (decode_params->num_poc_st_curr_after == 0)
++ reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY;
++
+ cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO1, reg);
+
+ chroma_log2_weight_denom = pred_weight_table->luma_log2_weight_denom +
+diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h
+index 92ace87c1c7d1..5f34e36702893 100644
+--- a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h
++++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h
+@@ -377,13 +377,12 @@
+
+ #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED BIT(23)
+ #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED BIT(22)
++#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY BIT(21)
+
+ #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(v) \
+ SHIFT_AND_MASK_BITS(v, 31, 28)
+ #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(v) \
+ SHIFT_AND_MASK_BITS(v, 27, 24)
+-#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_POC_BIGEST_IN_RPS_ST(v) \
+- ((v) ? BIT(21) : 0)
+ #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(v) \
+ SHIFT_AND_MASK_BITS(v, 20, 16)
+ #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(v) \
+diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c
+index b26e44adb2be7..426e653bd55d5 100644
+--- a/drivers/staging/media/tegra-video/csi.c
++++ b/drivers/staging/media/tegra-video/csi.c
+@@ -433,7 +433,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi,
+ for (i = 0; i < chan->numgangports; i++)
+ chan->csi_port_nums[i] = port_num + i * CSI_PORTS_PER_BRICK;
+
+- chan->of_node = node;
++ chan->of_node = of_node_get(node);
+ chan->numpads = num_pads;
+ if (num_pads & 0x2) {
+ chan->pads[0].flags = MEDIA_PAD_FL_SINK;
+@@ -448,6 +448,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi,
+ chan->mipi = tegra_mipi_request(csi->dev, node);
+ if (IS_ERR(chan->mipi)) {
+ ret = PTR_ERR(chan->mipi);
++ chan->mipi = NULL;
+ dev_err(csi->dev, "failed to get mipi device: %d\n", ret);
+ }
+
+@@ -640,6 +641,7 @@ static void tegra_csi_channels_cleanup(struct tegra_csi *csi)
+ media_entity_cleanup(&subdev->entity);
+ }
+
++ of_node_put(chan->of_node);
+ list_del(&chan->list);
+ kfree(chan);
+ }
+diff --git a/drivers/staging/media/tegra-video/csi.h b/drivers/staging/media/tegra-video/csi.h
+index 4ee05a1785cfa..6960ea2e3d360 100644
+--- a/drivers/staging/media/tegra-video/csi.h
++++ b/drivers/staging/media/tegra-video/csi.h
+@@ -56,7 +56,7 @@ struct tegra_csi;
+ * @framerate: active framerate for TPG
+ * @h_blank: horizontal blanking for TPG active format
+ * @v_blank: vertical blanking for TPG active format
+- * @mipi: mipi device for corresponding csi channel pads
++ * @mipi: mipi device for corresponding csi channel pads, or NULL if not applicable (TPG, error)
+ * @pixel_rate: active pixel rate from the sensor on this channel
+ */
+ struct tegra_csi_channel {
+diff --git a/drivers/staging/media/zoran/zoran.h b/drivers/staging/media/zoran/zoran.h
+index b1ad2a2b914cd..50d5a7acfab6c 100644
+--- a/drivers/staging/media/zoran/zoran.h
++++ b/drivers/staging/media/zoran/zoran.h
+@@ -313,6 +313,6 @@ static inline struct zoran *to_zoran(struct v4l2_device *v4l2_dev)
+
+ #endif
+
+-int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq);
++int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq, int dir);
+ void zoran_queue_exit(struct zoran *zr);
+ int zr_set_buf(struct zoran *zr);
+diff --git a/drivers/staging/media/zoran/zoran_card.c b/drivers/staging/media/zoran/zoran_card.c
+index f259585b06897..11d415c0c05d2 100644
+--- a/drivers/staging/media/zoran/zoran_card.c
++++ b/drivers/staging/media/zoran/zoran_card.c
+@@ -803,6 +803,52 @@ int zoran_check_jpg_settings(struct zoran *zr,
+ return 0;
+ }
+
++static int zoran_init_video_device(struct zoran *zr, struct video_device *video_dev, int dir)
++{
++ int err;
++
++ /* Now add the template and register the device unit. */
++ *video_dev = zoran_template;
++ video_dev->v4l2_dev = &zr->v4l2_dev;
++ video_dev->lock = &zr->lock;
++ video_dev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE | dir;
++
++ strscpy(video_dev->name, ZR_DEVNAME(zr), sizeof(video_dev->name));
++ /*
++ * It's not a mem2mem device, but you can both capture and output from one and the same
++ * device. This should really be split up into two device nodes, but that's a job for
++ * another day.
++ */
++ video_dev->vfl_dir = VFL_DIR_M2M;
++ zoran_queue_init(zr, &zr->vq, V4L2_BUF_TYPE_VIDEO_CAPTURE);
++
++ err = video_register_device(video_dev, VFL_TYPE_VIDEO, video_nr[zr->id]);
++ if (err < 0)
++ return err;
++ video_set_drvdata(video_dev, zr);
++ return 0;
++}
++
++static void zoran_exit_video_devices(struct zoran *zr)
++{
++ video_unregister_device(zr->video_dev);
++ kfree(zr->video_dev);
++}
++
++static int zoran_init_video_devices(struct zoran *zr)
++{
++ int err;
++
++ zr->video_dev = video_device_alloc();
++ if (!zr->video_dev)
++ return -ENOMEM;
++
++ err = zoran_init_video_device(zr, zr->video_dev, V4L2_CAP_VIDEO_CAPTURE);
++ if (err)
++ kfree(zr->video_dev);
++ return err;
++}
++
+ void zoran_open_init_params(struct zoran *zr)
+ {
+ int i;
+@@ -874,17 +920,11 @@ static int zr36057_init(struct zoran *zr)
+ zoran_open_init_params(zr);
+
+ /* allocate memory *before* doing anything to the hardware in case allocation fails */
+- zr->video_dev = video_device_alloc();
+- if (!zr->video_dev) {
+- err = -ENOMEM;
+- goto exit;
+- }
+ zr->stat_com = dma_alloc_coherent(&zr->pci_dev->dev,
+ BUZ_NUM_STAT_COM * sizeof(u32),
+ &zr->p_sc, GFP_KERNEL);
+ if (!zr->stat_com) {
+- err = -ENOMEM;
+- goto exit_video;
++ return -ENOMEM;
+ }
+ for (j = 0; j < BUZ_NUM_STAT_COM; j++)
+ zr->stat_com[j] = cpu_to_le32(1); /* mark as unavailable to zr36057 */
+@@ -897,26 +937,9 @@ static int zr36057_init(struct zoran *zr)
+ goto exit_statcom;
+ }
+
+- /* Now add the template and register the device unit. */
+- *zr->video_dev = zoran_template;
+- zr->video_dev->v4l2_dev = &zr->v4l2_dev;
+- zr->video_dev->lock = &zr->lock;
+- zr->video_dev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_CAPTURE;
+-
+- strscpy(zr->video_dev->name, ZR_DEVNAME(zr), sizeof(zr->video_dev->name));
+- /*
+- * It's not a mem2mem device, but you can both capture and output from one and the same
+- * device. This should really be split up into two device nodes, but that's a job for
+- * another day.
+- */
+- zr->video_dev->vfl_dir = VFL_DIR_M2M;
+-
+- zoran_queue_init(zr, &zr->vq);
+-
+- err = video_register_device(zr->video_dev, VFL_TYPE_VIDEO, video_nr[zr->id]);
+- if (err < 0)
++ err = zoran_init_video_devices(zr);
++ if (err)
+ goto exit_statcomb;
+- video_set_drvdata(zr->video_dev, zr);
+
+ zoran_init_hardware(zr);
+ if (!pass_through) {
+@@ -931,9 +954,6 @@ exit_statcomb:
+ dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32) * 2, zr->stat_comb, zr->p_scb);
+ exit_statcom:
+ dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32), zr->stat_com, zr->p_sc);
+-exit_video:
+- kfree(zr->video_dev);
+-exit:
+ return err;
+ }
+
+@@ -965,7 +985,7 @@ static void zoran_remove(struct pci_dev *pdev)
+ dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32) * 2, zr->stat_comb, zr->p_scb);
+ pci_release_regions(pdev);
+ pci_disable_device(zr->pci_dev);
+- video_unregister_device(zr->video_dev);
++ zoran_exit_video_devices(zr);
+ exit_free:
+ v4l2_ctrl_handler_free(&zr->hdl);
+ v4l2_device_unregister(&zr->v4l2_dev);
+@@ -1069,8 +1089,10 @@ static int zoran_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (err)
+- return -ENODEV;
+- vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32));
++ return err;
++ err = vb2_dma_contig_set_max_seg_size(&pdev->dev, U32_MAX);
++ if (err)
++ return err;
+
+ nr = zoran_num++;
+ if (nr >= BUZ_MAX) {
+diff --git a/drivers/staging/media/zoran/zoran_device.c b/drivers/staging/media/zoran/zoran_device.c
+index 5b12a730a2290..fb1f0465ca87f 100644
+--- a/drivers/staging/media/zoran/zoran_device.c
++++ b/drivers/staging/media/zoran/zoran_device.c
+@@ -814,7 +814,7 @@ static void zoran_reap_stat_com(struct zoran *zr)
+ if (zr->jpg_settings.tmp_dcm == 1)
+ i = (zr->jpg_dma_tail - zr->jpg_err_shift) & BUZ_MASK_STAT_COM;
+ else
+- i = ((zr->jpg_dma_tail - zr->jpg_err_shift) & 1) * 2 + 1;
++ i = ((zr->jpg_dma_tail - zr->jpg_err_shift) & 1) * 2;
+
+ stat_com = le32_to_cpu(zr->stat_com[i]);
+ if ((stat_com & 1) == 0) {
+@@ -826,6 +826,11 @@ static void zoran_reap_stat_com(struct zoran *zr)
+ size = (stat_com & GENMASK(22, 1)) >> 1;
+
+ buf = zr->inuse[i];
++ if (!buf) {
++ spin_unlock_irqrestore(&zr->queued_bufs_lock, flags);
++ pci_err(zr->pci_dev, "No buffer at slot %d\n", i);
++ return;
++ }
+ buf->vbuf.vb2_buf.timestamp = ktime_get_ns();
+
+ if (zr->codec_mode == BUZ_MODE_MOTION_COMPRESS) {
+diff --git a/drivers/staging/media/zoran/zoran_driver.c b/drivers/staging/media/zoran/zoran_driver.c
+index 46382e43f1bf7..84665637ebb79 100644
+--- a/drivers/staging/media/zoran/zoran_driver.c
++++ b/drivers/staging/media/zoran/zoran_driver.c
+@@ -255,8 +255,6 @@ static int zoran_querycap(struct file *file, void *__fh, struct v4l2_capability
+ strscpy(cap->card, ZR_DEVNAME(zr), sizeof(cap->card));
+ strscpy(cap->driver, "zoran", sizeof(cap->driver));
+ snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", pci_name(zr->pci_dev));
+- cap->device_caps = zr->video_dev->device_caps;
+- cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
+ return 0;
+ }
+
+@@ -582,6 +580,9 @@ static int zoran_s_std(struct file *file, void *__fh, v4l2_std_id std)
+ struct zoran *zr = video_drvdata(file);
+ int res = 0;
+
++ if (zr->norm == std)
++ return 0;
++
+ if (zr->running != ZORAN_MAP_MODE_NONE)
+ return -EBUSY;
+
+@@ -739,6 +740,7 @@ static int zoran_g_parm(struct file *file, void *priv, struct v4l2_streamparm *p
+ if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ return -EINVAL;
+
++ parm->parm.capture.readbuffers = 9;
+ return 0;
+ }
+
+@@ -869,6 +871,10 @@ int zr_set_buf(struct zoran *zr)
+ vbuf = &buf->vbuf;
+
+ buf->vbuf.field = V4L2_FIELD_INTERLACED;
++ if (BUZ_MAX_HEIGHT < (zr->v4l_settings.height * 2))
++ buf->vbuf.field = V4L2_FIELD_INTERLACED;
++ else
++ buf->vbuf.field = V4L2_FIELD_TOP;
+ vb2_set_plane_payload(&buf->vbuf.vb2_buf, 0, zr->buffer_size);
+ vb2_buffer_done(&buf->vbuf.vb2_buf, VB2_BUF_STATE_DONE);
+ zr->inuse[0] = NULL;
+@@ -928,6 +934,7 @@ static int zr_vb2_start_streaming(struct vb2_queue *vq, unsigned int count)
+ zr->stat_com[j] = cpu_to_le32(1);
+ zr->inuse[j] = NULL;
+ }
++ zr->vbseq = 0;
+
+ if (zr->map_mode != ZORAN_MAP_MODE_RAW) {
+ pci_info(zr->pci_dev, "START JPG\n");
+@@ -1008,7 +1015,7 @@ static const struct vb2_ops zr_video_qops = {
+ .wait_finish = vb2_ops_wait_finish,
+ };
+
+-int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq)
++int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq, int dir)
+ {
+ int err;
+
+@@ -1016,8 +1023,9 @@ int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq)
+ INIT_LIST_HEAD(&zr->queued_bufs);
+
+ vq->dev = &zr->pci_dev->dev;
+- vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+- vq->io_modes = VB2_USERPTR | VB2_DMABUF | VB2_MMAP | VB2_READ | VB2_WRITE;
++ vq->type = dir;
++
++ vq->io_modes = VB2_DMABUF | VB2_MMAP | VB2_READ | VB2_WRITE;
+ vq->drv_priv = zr;
+ vq->buf_struct_size = sizeof(struct zr_buffer);
+ vq->ops = &zr_video_qops;
+diff --git a/drivers/staging/most/dim2/Makefile b/drivers/staging/most/dim2/Makefile
+index 861adacf6c729..5f9612af3fa3c 100644
+--- a/drivers/staging/most/dim2/Makefile
++++ b/drivers/staging/most/dim2/Makefile
+@@ -1,4 +1,4 @@
+ # SPDX-License-Identifier: GPL-2.0
+ obj-$(CONFIG_MOST_DIM2) += most_dim2.o
+
+-most_dim2-objs := dim2.o hal.o sysfs.o
++most_dim2-objs := dim2.o hal.o
+diff --git a/drivers/staging/most/dim2/dim2.c b/drivers/staging/most/dim2/dim2.c
+index 093ef9a2b2919..81e062009d271 100644
+--- a/drivers/staging/most/dim2/dim2.c
++++ b/drivers/staging/most/dim2/dim2.c
+@@ -117,7 +117,8 @@ struct dim2_platform_data {
+ (((p)[1] == 0x18) && ((p)[2] == 0x05) && ((p)[3] == 0x0C) && \
+ ((p)[13] == 0x3C) && ((p)[14] == 0x00) && ((p)[15] == 0x0A))
+
+-bool dim2_sysfs_get_state_cb(void)
++static ssize_t state_show(struct device *dev, struct device_attribute *attr,
++ char *buf)
+ {
+ bool state;
+ unsigned long flags;
+@@ -126,9 +127,18 @@ bool dim2_sysfs_get_state_cb(void)
+ state = dim_get_lock_state();
+ spin_unlock_irqrestore(&dim_lock, flags);
+
+- return state;
++ return sysfs_emit(buf, "%s\n", state ? "locked" : "");
+ }
+
++static DEVICE_ATTR_RO(state);
++
++static struct attribute *dim2_attrs[] = {
++ &dev_attr_state.attr,
++ NULL,
++};
++
++ATTRIBUTE_GROUPS(dim2);
++
+ /**
+ * dimcb_on_error - callback from HAL to report miscommunication between
+ * HDM and HAL
+@@ -716,6 +726,23 @@ static int get_dim2_clk_speed(const char *clock_speed, u8 *val)
+ return -EINVAL;
+ }
+
++static void dim2_release(struct device *d)
++{
++ struct dim2_hdm *dev = container_of(d, struct dim2_hdm, dev);
++ unsigned long flags;
++
++ kthread_stop(dev->netinfo_task);
++
++ spin_lock_irqsave(&dim_lock, flags);
++ dim_shutdown();
++ spin_unlock_irqrestore(&dim_lock, flags);
++
++ if (dev->disable_platform)
++ dev->disable_platform(to_platform_device(d->parent));
++
++ kfree(dev);
++}
++
+ /*
+ * dim2_probe - dim2 probe handler
+ * @pdev: platform device structure
+@@ -736,7 +763,7 @@ static int dim2_probe(struct platform_device *pdev)
+
+ enum { MLB_INT_IDX, AHB0_INT_IDX };
+
+- dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
++ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+@@ -748,25 +775,27 @@ static int dim2_probe(struct platform_device *pdev)
+ "microchip,clock-speed", &clock_speed);
+ if (ret) {
+ dev_err(&pdev->dev, "missing dt property clock-speed\n");
+- return ret;
++ goto err_free_dev;
+ }
+
+ ret = get_dim2_clk_speed(clock_speed, &dev->clk_speed);
+ if (ret) {
+ dev_err(&pdev->dev, "bad dt property clock-speed\n");
+- return ret;
++ goto err_free_dev;
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dev->io_base = devm_ioremap_resource(&pdev->dev, res);
+- if (IS_ERR(dev->io_base))
+- return PTR_ERR(dev->io_base);
++ if (IS_ERR(dev->io_base)) {
++ ret = PTR_ERR(dev->io_base);
++ goto err_free_dev;
++ }
+
+ of_id = of_match_node(dim2_of_match, pdev->dev.of_node);
+ pdata = of_id->data;
+ ret = pdata && pdata->enable ? pdata->enable(pdev) : 0;
+ if (ret)
+- return ret;
++ goto err_free_dev;
+
+ dev->disable_platform = pdata ? pdata->disable : NULL;
+
+@@ -857,32 +886,19 @@ static int dim2_probe(struct platform_device *pdev)
+ dev->most_iface.request_netinfo = request_netinfo;
+ dev->most_iface.driver_dev = &pdev->dev;
+ dev->most_iface.dev = &dev->dev;
+- dev->dev.init_name = "dim2_state";
++ dev->dev.init_name = dev->name;
+ dev->dev.parent = &pdev->dev;
++ dev->dev.release = dim2_release;
+
+- ret = most_register_interface(&dev->most_iface);
+- if (ret) {
+- dev_err(&pdev->dev, "failed to register MOST interface\n");
+- goto err_stop_thread;
+- }
+-
+- ret = dim2_sysfs_probe(&dev->dev);
+- if (ret) {
+- dev_err(&pdev->dev, "failed to create sysfs attribute\n");
+- goto err_unreg_iface;
+- }
+-
+- return 0;
++ return most_register_interface(&dev->most_iface);
+
+-err_unreg_iface:
+- most_deregister_interface(&dev->most_iface);
+-err_stop_thread:
+- kthread_stop(dev->netinfo_task);
+ err_shutdown_dim:
+ dim_shutdown();
+ err_disable_platform:
+ if (dev->disable_platform)
+ dev->disable_platform(pdev);
++err_free_dev:
++ kfree(dev);
+
+ return ret;
+ }
+@@ -896,18 +912,8 @@ err_disable_platform:
+ static int dim2_remove(struct platform_device *pdev)
+ {
+ struct dim2_hdm *dev = platform_get_drvdata(pdev);
+- unsigned long flags;
+
+- dim2_sysfs_destroy(&dev->dev);
+ most_deregister_interface(&dev->most_iface);
+- kthread_stop(dev->netinfo_task);
+-
+- spin_lock_irqsave(&dim_lock, flags);
+- dim_shutdown();
+- spin_unlock_irqrestore(&dim_lock, flags);
+-
+- if (dev->disable_platform)
+- dev->disable_platform(pdev);
+
+ return 0;
+ }
+@@ -1082,6 +1088,7 @@ static struct platform_driver dim2_driver = {
+ .driver = {
+ .name = "hdm_dim2",
+ .of_match_table = dim2_of_match,
++ .dev_groups = dim2_groups,
+ },
+ };
+
+diff --git a/drivers/staging/most/dim2/sysfs.c b/drivers/staging/most/dim2/sysfs.c
+deleted file mode 100644
+index c85b2cdcdca3d..0000000000000
+--- a/drivers/staging/most/dim2/sysfs.c
++++ /dev/null
+@@ -1,49 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * sysfs.c - MediaLB sysfs information
+- *
+- * Copyright (C) 2015, Microchip Technology Germany II GmbH & Co. KG
+- */
+-
+-/* Author: Andrey Shvetsov <andrey.shvetsov@k2l.de> */
+-
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+-#include <linux/kernel.h>
+-#include "sysfs.h"
+-#include <linux/device.h>
+-
+-static ssize_t state_show(struct device *dev, struct device_attribute *attr,
+- char *buf)
+-{
+- bool state = dim2_sysfs_get_state_cb();
+-
+- return sprintf(buf, "%s\n", state ? "locked" : "");
+-}
+-
+-static DEVICE_ATTR_RO(state);
+-
+-static struct attribute *dev_attrs[] = {
+- &dev_attr_state.attr,
+- NULL,
+-};
+-
+-static struct attribute_group dev_attr_group = {
+- .attrs = dev_attrs,
+-};
+-
+-static const struct attribute_group *dev_attr_groups[] = {
+- &dev_attr_group,
+- NULL,
+-};
+-
+-int dim2_sysfs_probe(struct device *dev)
+-{
+- dev->groups = dev_attr_groups;
+- return device_register(dev);
+-}
+-
+-void dim2_sysfs_destroy(struct device *dev)
+-{
+- device_unregister(dev);
+-}
+diff --git a/drivers/staging/most/dim2/sysfs.h b/drivers/staging/most/dim2/sysfs.h
+index 24277a17cff3d..09115cf4ed00e 100644
+--- a/drivers/staging/most/dim2/sysfs.h
++++ b/drivers/staging/most/dim2/sysfs.h
+@@ -16,15 +16,4 @@ struct medialb_bus {
+ struct kobject kobj_group;
+ };
+
+-struct device;
+-
+-int dim2_sysfs_probe(struct device *dev);
+-void dim2_sysfs_destroy(struct device *dev);
+-
+-/*
+- * callback,
+- * must deliver MediaLB state as true if locked or false if unlocked
+- */
+-bool dim2_sysfs_get_state_cb(void);
+-
+ #endif /* DIM2_SYSFS_H */
+diff --git a/drivers/staging/mt7621-dts/gbpc1.dts b/drivers/staging/mt7621-dts/gbpc1.dts
+index b65d716868146..cf5d6e9a9b548 100644
+--- a/drivers/staging/mt7621-dts/gbpc1.dts
++++ b/drivers/staging/mt7621-dts/gbpc1.dts
+@@ -11,14 +11,15 @@
+
+ memory@0 {
+ device_type = "memory";
+- reg = <0x0 0x1c000000>, <0x20000000 0x4000000>;
++ reg = <0x00000000 0x1c000000>,
++ <0x20000000 0x04000000>;
+ };
+
+ chosen {
+ bootargs = "console=ttyS0,57600";
+ };
+
+- palmbus: palmbus@1E000000 {
++ palmbus: palmbus@1e000000 {
+ i2c@900 {
+ status = "okay";
+ };
+@@ -37,24 +38,16 @@
+ gpio-leds {
+ compatible = "gpio-leds";
+
+- system {
+- label = "gb-pc1:green:system";
++ power {
++ label = "green:power";
+ gpios = <&gpio 6 GPIO_ACTIVE_LOW>;
++ linux,default-trigger = "default-on";
+ };
+
+- status {
+- label = "gb-pc1:green:status";
++ system {
++ label = "green:system";
+ gpios = <&gpio 8 GPIO_ACTIVE_LOW>;
+- };
+-
+- lan1 {
+- label = "gb-pc1:green:lan1";
+- gpios = <&gpio 24 GPIO_ACTIVE_LOW>;
+- };
+-
+- lan2 {
+- label = "gb-pc1:green:lan2";
+- gpios = <&gpio 25 GPIO_ACTIVE_LOW>;
++ linux,default-trigger = "disk-activity";
+ };
+ };
+ };
+@@ -94,9 +87,8 @@
+
+ partition@50000 {
+ label = "firmware";
+- reg = <0x50000 0x1FB0000>;
++ reg = <0x50000 0x1fb0000>;
+ };
+-
+ };
+ };
+
+@@ -105,9 +97,12 @@
+ };
+
+ &pinctrl {
+- state_default: pinctrl0 {
+- default_gpio: gpio {
+- groups = "wdt", "rgmii2", "uart3";
++ pinctrl-names = "default";
++ pinctrl-0 = <&state_default>;
++
++ state_default: state-default {
++ gpio-pinmux {
++ groups = "rgmii2", "uart3", "wdt";
+ function = "gpio";
+ };
+ };
+@@ -116,12 +111,13 @@
+ &switch0 {
+ ports {
+ port@0 {
++ status = "okay";
+ label = "ethblack";
+- status = "ok";
+ };
++
+ port@4 {
++ status = "okay";
+ label = "ethblue";
+- status = "ok";
+ };
+ };
+ };
+diff --git a/drivers/staging/mt7621-dts/gbpc2.dts b/drivers/staging/mt7621-dts/gbpc2.dts
+index 52760e7351f6c..6f6fed071dda0 100644
+--- a/drivers/staging/mt7621-dts/gbpc2.dts
++++ b/drivers/staging/mt7621-dts/gbpc2.dts
+@@ -1,21 +1,121 @@
+ /dts-v1/;
+
+-#include "gbpc1.dts"
++#include "mt7621.dtsi"
++
++#include <dt-bindings/gpio/gpio.h>
++#include <dt-bindings/input/input.h>
+
+ / {
+ compatible = "gnubee,gb-pc2", "mediatek,mt7621-soc";
+ model = "GB-PC2";
++
++ memory@0 {
++ device_type = "memory";
++ reg = <0x00000000 0x1c000000>,
++ <0x20000000 0x04000000>;
++ };
++
++ chosen {
++ bootargs = "console=ttyS0,57600";
++ };
++
++ palmbus: palmbus@1e000000 {
++ i2c@900 {
++ status = "okay";
++ };
++ };
++
++ gpio-keys {
++ compatible = "gpio-keys";
++
++ reset {
++ label = "reset";
++ gpios = <&gpio 18 GPIO_ACTIVE_HIGH>;
++ linux,code = <KEY_RESTART>;
++ };
++ };
++};
++
++&sdhci {
++ status = "okay";
++};
++
++&spi0 {
++ status = "okay";
++
++ m25p80@0 {
++ #address-cells = <1>;
++ #size-cells = <1>;
++ compatible = "jedec,spi-nor";
++ reg = <0>;
++ spi-max-frequency = <50000000>;
++ broken-flash-reset;
++
++ partition@0 {
++ label = "u-boot";
++ reg = <0x0 0x30000>;
++ read-only;
++ };
++
++ partition@30000 {
++ label = "u-boot-env";
++ reg = <0x30000 0x10000>;
++ read-only;
++ };
++
++ factory: partition@40000 {
++ label = "factory";
++ reg = <0x40000 0x10000>;
++ read-only;
++ };
++
++ partition@50000 {
++ label = "firmware";
++ reg = <0x50000 0x1fb0000>;
++ };
++ };
+ };
+
+-&default_gpio {
+- groups = "wdt", "uart3";
+- function = "gpio";
++&pcie {
++ status = "okay";
+ };
+
+-&gmac1 {
+- status = "ok";
++&pinctrl {
++ pinctrl-names = "default";
++ pinctrl-0 = <&state_default>;
++
++ state_default: state-default {
++ gpio-pinmux {
++ groups = "wdt";
++ function = "gpio";
++ };
++ };
+ };
+
+-&phy_external {
+- status = "ok";
++&ethernet {
++ gmac1: mac@1 {
++ status = "okay";
++ phy-handle = <&ethphy7>;
++ };
++
++ mdio-bus {
++ ethphy7: ethernet-phy@7 {
++ reg = <7>;
++ phy-mode = "rgmii-rxid";
++ };
++ };
++};
++
++&switch0 {
++ ports {
++ port@0 {
++ status = "okay";
++ label = "ethblack";
++ };
++
++ port@4 {
++ status = "okay";
++ label = "ethblue";
++ };
++ };
+ };
+diff --git a/drivers/staging/mt7621-dts/mt7621.dtsi b/drivers/staging/mt7621-dts/mt7621.dtsi
+index eeabe9c0f4fb8..04c4d6eeea19b 100644
+--- a/drivers/staging/mt7621-dts/mt7621.dtsi
++++ b/drivers/staging/mt7621-dts/mt7621.dtsi
+@@ -36,9 +36,9 @@
+ regulator-max-microvolt = <3300000>;
+ enable-active-high;
+ regulator-always-on;
+- };
++ };
+
+- mmc_fixed_1v8_io: fixedregulator@1 {
++ mmc_fixed_1v8_io: fixedregulator@1 {
+ compatible = "regulator-fixed";
+ regulator-name = "mmc_io";
+ regulator-min-microvolt = <1800000>;
+@@ -47,10 +47,10 @@
+ regulator-always-on;
+ };
+
+- palmbus: palmbus@1E000000 {
++ palmbus: palmbus@1e000000 {
+ compatible = "palmbus";
+- reg = <0x1E000000 0x100000>;
+- ranges = <0x0 0x1E000000 0x0FFFFF>;
++ reg = <0x1e000000 0x100000>;
++ ranges = <0x0 0x1e000000 0x0fffff>;
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+@@ -301,11 +301,11 @@
+ #reset-cells = <1>;
+ };
+
+- sdhci: sdhci@1E130000 {
++ sdhci: sdhci@1e130000 {
+ status = "disabled";
+
+ compatible = "mediatek,mt7620-mmc";
+- reg = <0x1E130000 0x4000>;
++ reg = <0x1e130000 0x4000>;
+
+ bus-width = <4>;
+ max-frequency = <48000000>;
+@@ -327,7 +327,7 @@
+ interrupts = <GIC_SHARED 20 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+- xhci: xhci@1E1C0000 {
++ xhci: xhci@1e1c0000 {
+ status = "okay";
+
+ compatible = "mediatek,mt8173-xhci";
+@@ -391,37 +391,32 @@
+
+ mediatek,ethsys = <&sysc>;
+
++ pinctrl-names = "default";
++ pinctrl-0 = <&mdio_pins>, <&rgmii1_pins>, <&rgmii2_pins>;
+
+ gmac0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "rgmii";
++
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
++
+ gmac1: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ status = "off";
+ phy-mode = "rgmii-rxid";
+- phy-handle = <&phy_external>;
+ };
++
+ mdio-bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+- phy_external: ethernet-phy@5 {
+- status = "off";
+- reg = <5>;
+- phy-mode = "rgmii-rxid";
+-
+- pinctrl-names = "default";
+- pinctrl-0 = <&rgmii2_pins>;
+- };
+-
+ switch0: switch0@0 {
+ compatible = "mediatek,mt7621";
+ #address-cells = <1>;
+@@ -439,36 +434,43 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
++
+ port@0 {
+ status = "off";
+ reg = <0>;
+ label = "lan0";
+ };
++
+ port@1 {
+ status = "off";
+ reg = <1>;
+ label = "lan1";
+ };
++
+ port@2 {
+ status = "off";
+ reg = <2>;
+ label = "lan2";
+ };
++
+ port@3 {
+ status = "off";
+ reg = <3>;
+ label = "lan3";
+ };
++
+ port@4 {
+ status = "off";
+ reg = <4>;
+ label = "lan4";
+ };
++
+ port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "trgmii";
++
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+diff --git a/drivers/staging/mt7621-pci/pci-mt7621.c b/drivers/staging/mt7621-pci/pci-mt7621.c
+index 6acfc94a16e73..b520d1e0edd14 100644
+--- a/drivers/staging/mt7621-pci/pci-mt7621.c
++++ b/drivers/staging/mt7621-pci/pci-mt7621.c
+@@ -93,8 +93,8 @@ struct mt7621_pcie_port {
+ * reset lines are inverted.
+ */
+ struct mt7621_pcie {
+- void __iomem *base;
+ struct device *dev;
++ void __iomem *base;
+ struct list_head ports;
+ bool resets_inverted;
+ };
+@@ -129,7 +129,7 @@ static inline void pcie_port_write(struct mt7621_pcie_port *port,
+ writel_relaxed(val, port->base + reg);
+ }
+
+-static inline u32 mt7621_pci_get_cfgaddr(unsigned int bus, unsigned int slot,
++static inline u32 mt7621_pcie_get_cfgaddr(unsigned int bus, unsigned int slot,
+ unsigned int func, unsigned int where)
+ {
+ return (((where & 0xF00) >> 8) << 24) | (bus << 16) | (slot << 11) |
+@@ -140,7 +140,7 @@ static void __iomem *mt7621_pcie_map_bus(struct pci_bus *bus,
+ unsigned int devfn, int where)
+ {
+ struct mt7621_pcie *pcie = bus->sysdata;
+- u32 address = mt7621_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn),
++ u32 address = mt7621_pcie_get_cfgaddr(bus->number, PCI_SLOT(devfn),
+ PCI_FUNC(devfn), where);
+
+ writel_relaxed(address, pcie->base + RALINK_PCI_CONFIG_ADDR);
+@@ -148,7 +148,7 @@ static void __iomem *mt7621_pcie_map_bus(struct pci_bus *bus,
+ return pcie->base + RALINK_PCI_CONFIG_DATA + (where & 3);
+ }
+
+-struct pci_ops mt7621_pci_ops = {
++struct pci_ops mt7621_pcie_ops = {
+ .map_bus = mt7621_pcie_map_bus,
+ .read = pci_generic_config_read,
+ .write = pci_generic_config_write,
+@@ -156,7 +156,7 @@ struct pci_ops mt7621_pci_ops = {
+
+ static u32 read_config(struct mt7621_pcie *pcie, unsigned int dev, u32 reg)
+ {
+- u32 address = mt7621_pci_get_cfgaddr(0, dev, 0, reg);
++ u32 address = mt7621_pcie_get_cfgaddr(0, dev, 0, reg);
+
+ pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR);
+ return pcie_read(pcie, RALINK_PCI_CONFIG_DATA);
+@@ -165,7 +165,7 @@ static u32 read_config(struct mt7621_pcie *pcie, unsigned int dev, u32 reg)
+ static void write_config(struct mt7621_pcie *pcie, unsigned int dev,
+ u32 reg, u32 val)
+ {
+- u32 address = mt7621_pci_get_cfgaddr(0, dev, 0, reg);
++ u32 address = mt7621_pcie_get_cfgaddr(0, dev, 0, reg);
+
+ pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR);
+ pcie_write(pcie, val, RALINK_PCI_CONFIG_DATA);
+@@ -505,16 +505,17 @@ static int mt7621_pcie_register_host(struct pci_host_bridge *host)
+ {
+ struct mt7621_pcie *pcie = pci_host_bridge_priv(host);
+
+- host->ops = &mt7621_pci_ops;
++ host->ops = &mt7621_pcie_ops;
+ host->sysdata = pcie;
+ return pci_host_probe(host);
+ }
+
+-static const struct soc_device_attribute mt7621_pci_quirks_match[] = {
+- { .soc_id = "mt7621", .revision = "E2" }
++static const struct soc_device_attribute mt7621_pcie_quirks_match[] = {
++ { .soc_id = "mt7621", .revision = "E2" },
++ { /* sentinel */ }
+ };
+
+-static int mt7621_pci_probe(struct platform_device *pdev)
++static int mt7621_pcie_probe(struct platform_device *pdev)
+ {
+ struct device *dev = &pdev->dev;
+ const struct soc_device_attribute *attr;
+@@ -535,7 +536,7 @@ static int mt7621_pci_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, pcie);
+ INIT_LIST_HEAD(&pcie->ports);
+
+- attr = soc_device_match(mt7621_pci_quirks_match);
++ attr = soc_device_match(mt7621_pcie_quirks_match);
+ if (attr)
+ pcie->resets_inverted = true;
+
+@@ -572,7 +573,7 @@ remove_resets:
+ return err;
+ }
+
+-static int mt7621_pci_remove(struct platform_device *pdev)
++static int mt7621_pcie_remove(struct platform_device *pdev)
+ {
+ struct mt7621_pcie *pcie = platform_get_drvdata(pdev);
+ struct mt7621_pcie_port *port;
+@@ -583,18 +584,18 @@ static int mt7621_pci_remove(struct platform_device *pdev)
+ return 0;
+ }
+
+-static const struct of_device_id mt7621_pci_ids[] = {
++static const struct of_device_id mt7621_pcie_ids[] = {
+ { .compatible = "mediatek,mt7621-pci" },
+ {},
+ };
+-MODULE_DEVICE_TABLE(of, mt7621_pci_ids);
++MODULE_DEVICE_TABLE(of, mt7621_pcie_ids);
+
+-static struct platform_driver mt7621_pci_driver = {
+- .probe = mt7621_pci_probe,
+- .remove = mt7621_pci_remove,
++static struct platform_driver mt7621_pcie_driver = {
++ .probe = mt7621_pcie_probe,
++ .remove = mt7621_pcie_remove,
+ .driver = {
+ .name = "mt7621-pci",
+- .of_match_table = of_match_ptr(mt7621_pci_ids),
++ .of_match_table = of_match_ptr(mt7621_pcie_ids),
+ },
+ };
+-builtin_platform_driver(mt7621_pci_driver);
++builtin_platform_driver(mt7621_pcie_driver);
+diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
+index b33e34cce12e4..f9a8cdd9a1689 100644
+--- a/drivers/staging/r8188eu/core/rtw_led.c
++++ b/drivers/staging/r8188eu/core/rtw_led.c
+@@ -74,6 +74,7 @@ void DeInitLed871x(struct LED_871x *pLed)
+ _cancel_workitem_sync(&pLed->BlinkWorkItem);
+ _cancel_timer_ex(&pLed->BlinkTimer);
+ ResetLedStatus(pLed);
++ SwLedOff(pLed->padapter, pLed);
+ }
+
+ /* */
+diff --git a/drivers/staging/r8188eu/core/rtw_mlme.c b/drivers/staging/r8188eu/core/rtw_mlme.c
+index 1115ff5d865ad..bd991d7ed8090 100644
+--- a/drivers/staging/r8188eu/core/rtw_mlme.c
++++ b/drivers/staging/r8188eu/core/rtw_mlme.c
+@@ -1722,6 +1722,8 @@ int rtw_set_key(struct adapter *adapter, struct security_priv *psecuritypriv, in
+ psetkeyparm->grpkey = 1;
+ break;
+ default:
++ kfree(psetkeyparm);
++ kfree(pcmd);
+ res = _FAIL;
+ goto exit;
+ }
+diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+index 5a472a4954b0f..63d312d01171e 100644
+--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
++++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+@@ -104,6 +104,7 @@ static struct rt_channel_plan_map RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = {
+ {0x01}, /* 0x10, RT_CHANNEL_DOMAIN_JAPAN */
+ {0x02}, /* 0x11, RT_CHANNEL_DOMAIN_FCC_NO_DFS */
+ {0x01}, /* 0x12, RT_CHANNEL_DOMAIN_JAPAN_NO_DFS */
++ {0x00}, /* 0x13 */
+ {0x02}, /* 0x14, RT_CHANNEL_DOMAIN_TAIWAN_NO_DFS */
+ {0x00}, /* 0x15, RT_CHANNEL_DOMAIN_ETSI_NO_DFS */
+ {0x00}, /* 0x16, RT_CHANNEL_DOMAIN_KOREA_NO_DFS */
+@@ -115,6 +116,7 @@ static struct rt_channel_plan_map RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = {
+ {0x00}, /* 0x1C, */
+ {0x00}, /* 0x1D, */
+ {0x00}, /* 0x1E, */
++ {0x00}, /* 0x1F, */
+ /* 0x20 ~ 0x7F , New Define ===== */
+ {0x00}, /* 0x20, RT_CHANNEL_DOMAIN_WORLD_NULL */
+ {0x01}, /* 0x21, RT_CHANNEL_DOMAIN_ETSI1_NULL */
+@@ -7080,12 +7082,12 @@ void report_del_sta_event(struct adapter *padapter, unsigned char *MacAddr, unsi
+ struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv;
+ struct cmd_priv *pcmdpriv = &padapter->cmdpriv;
+
+- pcmd_obj = kzalloc(sizeof(struct cmd_obj), GFP_KERNEL);
++ pcmd_obj = kzalloc(sizeof(*pcmd_obj), GFP_ATOMIC);
+ if (!pcmd_obj)
+ return;
+
+ cmdsz = (sizeof(struct stadel_event) + sizeof(struct C2HEvent_Header));
+- pevtcmd = kzalloc(cmdsz, GFP_KERNEL);
++ pevtcmd = kzalloc(cmdsz, GFP_ATOMIC);
+ if (!pevtcmd) {
+ kfree(pcmd_obj);
+ return;
+diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c
+index e082edfbaad82..30ca9f1e03639 100644
+--- a/drivers/staging/r8188eu/core/rtw_recv.c
++++ b/drivers/staging/r8188eu/core/rtw_recv.c
+@@ -1942,8 +1942,7 @@ static int recv_func(struct adapter *padapter, struct recv_frame *rframe)
+ struct recv_frame *pending_frame;
+ int cnt = 0;
+
+- pending_frame = rtw_alloc_recvframe(&padapter->recvpriv.uc_swdec_pending_queue);
+- while (pending_frame) {
++ while ((pending_frame = rtw_alloc_recvframe(&padapter->recvpriv.uc_swdec_pending_queue))) {
+ cnt++;
+ recv_func_posthandle(padapter, pending_frame);
+ }
+diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c
+index 46fe62c7c32ce..af13079a6d2c8 100644
+--- a/drivers/staging/r8188eu/core/rtw_xmit.c
++++ b/drivers/staging/r8188eu/core/rtw_xmit.c
+@@ -179,7 +179,11 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
+
+ pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf;
+
+- rtw_alloc_hwxmits(padapter);
++ if (rtw_alloc_hwxmits(padapter)) {
++ res = _FAIL;
++ goto exit;
++ }
++
+ rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry);
+
+ for (i = 0; i < 4; i++)
+@@ -1516,7 +1520,7 @@ exit:
+ return res;
+ }
+
+-void rtw_alloc_hwxmits(struct adapter *padapter)
++int rtw_alloc_hwxmits(struct adapter *padapter)
+ {
+ struct hw_xmit *hwxmits;
+ struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
+@@ -1524,22 +1528,17 @@ void rtw_alloc_hwxmits(struct adapter *padapter)
+ pxmitpriv->hwxmit_entry = HWXMIT_ENTRY;
+
+ pxmitpriv->hwxmits = kzalloc(sizeof(struct hw_xmit) * pxmitpriv->hwxmit_entry, GFP_KERNEL);
++ if (!pxmitpriv->hwxmits)
++ return -ENOMEM;
+
+ hwxmits = pxmitpriv->hwxmits;
+
+- if (pxmitpriv->hwxmit_entry == 5) {
+- hwxmits[0] .sta_queue = &pxmitpriv->bm_pending;
+- hwxmits[1] .sta_queue = &pxmitpriv->vo_pending;
+- hwxmits[2] .sta_queue = &pxmitpriv->vi_pending;
+- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
+- hwxmits[4] .sta_queue = &pxmitpriv->be_pending;
+- } else if (pxmitpriv->hwxmit_entry == 4) {
+- hwxmits[0] .sta_queue = &pxmitpriv->vo_pending;
+- hwxmits[1] .sta_queue = &pxmitpriv->vi_pending;
+- hwxmits[2] .sta_queue = &pxmitpriv->be_pending;
+- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
+- } else {
+- }
++ hwxmits[0].sta_queue = &pxmitpriv->vo_pending;
++ hwxmits[1].sta_queue = &pxmitpriv->vi_pending;
++ hwxmits[2].sta_queue = &pxmitpriv->be_pending;
++ hwxmits[3].sta_queue = &pxmitpriv->bk_pending;
++
++ return 0;
+ }
+
+ void rtw_free_hwxmits(struct adapter *padapter)
+diff --git a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
+index 14758361960cc..9f2b86f9b6604 100644
+--- a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
++++ b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
+@@ -572,10 +572,10 @@ static int load_firmware(struct rt_firmware *pFirmware, struct device *device)
+ }
+ memcpy(pFirmware->szFwBuffer, fw->data, fw->size);
+ pFirmware->ulFwLength = fw->size;
+- release_firmware(fw);
+- DBG_88E_LEVEL(_drv_info_, "+%s: !bUsedWoWLANFw, FmrmwareLen:%d+\n", __func__, pFirmware->ulFwLength);
++ dev_dbg(device, "!bUsedWoWLANFw, FmrmwareLen:%d+\n", pFirmware->ulFwLength);
+
+ Exit:
++ release_firmware(fw);
+ return rtStatus;
+ }
+
+diff --git a/drivers/staging/r8188eu/include/rtw_xmit.h b/drivers/staging/r8188eu/include/rtw_xmit.h
+index 5f6e2402e5c4d..762a2fa3bd17c 100644
+--- a/drivers/staging/r8188eu/include/rtw_xmit.h
++++ b/drivers/staging/r8188eu/include/rtw_xmit.h
+@@ -345,7 +345,7 @@ s32 rtw_txframes_sta_ac_pending(struct adapter *padapter,
+ void rtw_init_hwxmits(struct hw_xmit *phwxmit, int entry);
+ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter);
+ void _rtw_free_xmit_priv(struct xmit_priv *pxmitpriv);
+-void rtw_alloc_hwxmits(struct adapter *padapter);
++int rtw_alloc_hwxmits(struct adapter *padapter);
+ void rtw_free_hwxmits(struct adapter *padapter);
+ s32 rtw_xmit(struct adapter *padapter, struct sk_buff **pkt);
+
+diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+index 1fd3750760018..ca376f7efd42b 100644
+--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
++++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+@@ -465,12 +465,11 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
+
+ if (wep_key_len > 0) {
+ wep_key_len = wep_key_len <= 5 ? 5 : 13;
+- wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, KeyMaterial);
+- pwep = kmalloc(wep_total_len, GFP_KERNEL);
++ wep_total_len = wep_key_len + sizeof(*pwep);
++ pwep = kzalloc(wep_total_len, GFP_KERNEL);
+ if (!pwep)
+ goto exit;
+
+- memset(pwep, 0, wep_total_len);
+ pwep->KeyLength = wep_key_len;
+ pwep->Length = wep_total_len;
+ if (wep_key_len == 13) {
+@@ -1249,9 +1248,11 @@ static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
+ break;
+ }
+ sec_len = *(pos++); len -= 1;
+- if (sec_len > 0 && sec_len <= len) {
++ if (sec_len > 0 &&
++ sec_len <= len &&
++ sec_len <= 32) {
+ ssid[ssid_index].SsidLength = sec_len;
+- memcpy(ssid[ssid_index].Ssid, pos, ssid[ssid_index].SsidLength);
++ memcpy(ssid[ssid_index].Ssid, pos, sec_len);
+ ssid_index++;
+ }
+ pos += sec_len;
+@@ -1978,7 +1979,7 @@ static int rtw_wx_set_enc_ext(struct net_device *dev,
+ struct ieee_param *param = NULL;
+ struct iw_point *pencoding = &wrqu->encoding;
+ struct iw_encode_ext *pext = (struct iw_encode_ext *)extra;
+- int ret = 0;
++ int ret = -1;
+
+ param_len = sizeof(struct ieee_param) + pext->key_len;
+ param = kzalloc(param_len, GFP_KERNEL);
+@@ -2004,7 +2005,7 @@ static int rtw_wx_set_enc_ext(struct net_device *dev,
+ alg_name = "CCMP";
+ break;
+ default:
+- return -1;
++ goto out;
+ }
+
+ strncpy((char *)param->u.crypt.alg, alg_name, IEEE_CRYPT_ALG_NAME_LEN);
+@@ -2031,6 +2032,7 @@ static int rtw_wx_set_enc_ext(struct net_device *dev,
+
+ ret = wpa_set_encryption(dev, param, param_len);
+
++out:
+ kfree(param);
+ return ret;
+ }
+@@ -2049,93 +2051,6 @@ static int rtw_wx_get_nick(struct net_device *dev,
+ return 0;
+ }
+
+-static int rtw_wx_read32(struct net_device *dev,
+- struct iw_request_info *info,
+- union iwreq_data *wrqu, char *extra)
+-{
+- struct adapter *padapter;
+- struct iw_point *p;
+- u16 len;
+- u32 addr;
+- u32 data32;
+- u32 bytes;
+- u8 *ptmp;
+-
+- padapter = (struct adapter *)rtw_netdev_priv(dev);
+- p = &wrqu->data;
+- len = p->length;
+- ptmp = kmalloc(len, GFP_KERNEL);
+- if (!ptmp)
+- return -ENOMEM;
+-
+- if (copy_from_user(ptmp, p->pointer, len)) {
+- kfree(ptmp);
+- return -EFAULT;
+- }
+-
+- bytes = 0;
+- addr = 0;
+- sscanf(ptmp, "%d,%x", &bytes, &addr);
+-
+- switch (bytes) {
+- case 1:
+- data32 = rtw_read8(padapter, addr);
+- sprintf(extra, "0x%02X", data32);
+- break;
+- case 2:
+- data32 = rtw_read16(padapter, addr);
+- sprintf(extra, "0x%04X", data32);
+- break;
+- case 4:
+- data32 = rtw_read32(padapter, addr);
+- sprintf(extra, "0x%08X", data32);
+- break;
+- default:
+- DBG_88E(KERN_INFO "%s: usage> read [bytes],[address(hex)]\n", __func__);
+- return -EINVAL;
+- }
+- DBG_88E(KERN_INFO "%s: addr = 0x%08X data =%s\n", __func__, addr, extra);
+-
+- kfree(ptmp);
+- return 0;
+-}
+-
+-static int rtw_wx_write32(struct net_device *dev,
+- struct iw_request_info *info,
+- union iwreq_data *wrqu, char *extra)
+-{
+- struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev);
+-
+- u32 addr;
+- u32 data32;
+- u32 bytes;
+-
+- bytes = 0;
+- addr = 0;
+- data32 = 0;
+- sscanf(extra, "%d,%x,%x", &bytes, &addr, &data32);
+-
+- switch (bytes) {
+- case 1:
+- rtw_write8(padapter, addr, (u8)data32);
+- DBG_88E(KERN_INFO "%s: addr = 0x%08X data = 0x%02X\n", __func__, addr, (u8)data32);
+- break;
+- case 2:
+- rtw_write16(padapter, addr, (u16)data32);
+- DBG_88E(KERN_INFO "%s: addr = 0x%08X data = 0x%04X\n", __func__, addr, (u16)data32);
+- break;
+- case 4:
+- rtw_write32(padapter, addr, data32);
+- DBG_88E(KERN_INFO "%s: addr = 0x%08X data = 0x%08X\n", __func__, addr, data32);
+- break;
+- default:
+- DBG_88E(KERN_INFO "%s: usage> write [bytes],[address(hex)],[data(hex)]\n", __func__);
+- return -EINVAL;
+- }
+-
+- return 0;
+-}
+-
+ static int rtw_wx_read_rf(struct net_device *dev,
+ struct iw_request_info *info,
+ union iwreq_data *wrqu, char *extra)
+@@ -6570,8 +6485,8 @@ static const struct iw_priv_args rtw_private_args[] = {
+ };
+
+ static iw_handler rtw_private_handler[] = {
+-rtw_wx_write32, /* 0x00 */
+-rtw_wx_read32, /* 0x01 */
++ NULL, /* 0x00 */
++ NULL, /* 0x01 */
+ rtw_drvext_hdl, /* 0x02 */
+ rtw_mp_ioctl_hdl, /* 0x03 */
+
+diff --git a/drivers/staging/r8188eu/os_dep/mlme_linux.c b/drivers/staging/r8188eu/os_dep/mlme_linux.c
+index e3ee9dc7ab900..b0d1e20edc4c2 100644
+--- a/drivers/staging/r8188eu/os_dep/mlme_linux.c
++++ b/drivers/staging/r8188eu/os_dep/mlme_linux.c
+@@ -114,7 +114,7 @@ void rtw_report_sec_ie(struct adapter *adapter, u8 authmode, u8 *sec_ie)
+
+ buff = NULL;
+ if (authmode == _WPA_IE_ID_) {
+- buff = kzalloc(IW_CUSTOM_MAX, GFP_KERNEL);
++ buff = kzalloc(IW_CUSTOM_MAX, GFP_ATOMIC);
+ if (!buff)
+ return;
+ p = buff;
+diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
+index 8d0158f4a45d0..30caa1139c8eb 100644
+--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
++++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
+@@ -17,6 +17,7 @@ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("Realtek Wireless Lan Driver");
+ MODULE_AUTHOR("Realtek Semiconductor Corp.");
+ MODULE_VERSION(DRIVERVERSION);
++MODULE_FIRMWARE("rtlwifi/rtl8188eufw.bin");
+
+ #define CONFIG_BR_EXT_BRNAME "br0"
+ #define RTW_NOTCH_FILTER 0 /* 0:Disable, 1:Enable, */
+diff --git a/drivers/staging/r8188eu/os_dep/usb_intf.c b/drivers/staging/r8188eu/os_dep/usb_intf.c
+index bb85ab77fd261..640f1ca2d9855 100644
+--- a/drivers/staging/r8188eu/os_dep/usb_intf.c
++++ b/drivers/staging/r8188eu/os_dep/usb_intf.c
+@@ -30,7 +30,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = {
+ /*=== Realtek demoboard ===*/
+ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0x8179)}, /* 8188EUS */
+ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0x0179)}, /* 8188ETV */
+- {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xf179)}, /* 8188FU */
++ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill USB-N150 Nano */
+ /*=== Customer ID ===*/
+ /****** 8188EUS ********/
+ {USB_DEVICE(0x07B8, 0x8179)}, /* Abocom - Abocom */
+diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+index a7dd1578b2c6a..48c696df8d015 100644
+--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
++++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+@@ -768,6 +768,7 @@ static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset)
+ else
+ netif_wake_queue(dev);
+
++ priv->bfirst_after_down = false;
+ return 0;
+ }
+
+@@ -2549,13 +2550,14 @@ static void _rtl92e_pci_disconnect(struct pci_dev *pdev)
+ free_irq(dev->irq, dev);
+ priv->irq = 0;
+ }
+- free_rtllib(dev);
+
+ if (dev->mem_start != 0) {
+ iounmap((void __iomem *)dev->mem_start);
+ release_mem_region(pci_resource_start(pdev, 1),
+ pci_resource_len(pdev, 1));
+ }
++
++ free_rtllib(dev);
+ }
+
+ pci_disable_device(pdev);
+diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+index 756d8db51937f..cd1c4f610159b 100644
+--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
++++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+@@ -185,7 +185,6 @@ static void _rtl92e_dm_init_fsync(struct net_device *dev);
+ static void _rtl92e_dm_deinit_fsync(struct net_device *dev);
+
+ static void _rtl92e_dm_check_txrateandretrycount(struct net_device *dev);
+-static void _rtl92e_dm_check_ac_dc_power(struct net_device *dev);
+ static void _rtl92e_dm_check_fsync(struct net_device *dev);
+ static void _rtl92e_dm_check_rf_ctrl_gpio(void *data);
+ static void _rtl92e_dm_fsync_timer_callback(struct timer_list *t);
+@@ -238,8 +237,6 @@ void rtl92e_dm_watchdog(struct net_device *dev)
+ if (priv->being_init_adapter)
+ return;
+
+- _rtl92e_dm_check_ac_dc_power(dev);
+-
+ _rtl92e_dm_check_txrateandretrycount(dev);
+ _rtl92e_dm_check_edca_turbo(dev);
+
+@@ -257,30 +254,6 @@ void rtl92e_dm_watchdog(struct net_device *dev)
+ _rtl92e_dm_cts_to_self(dev);
+ }
+
+-static void _rtl92e_dm_check_ac_dc_power(struct net_device *dev)
+-{
+- struct r8192_priv *priv = rtllib_priv(dev);
+- static const char ac_dc_script[] = "/etc/acpi/wireless-rtl-ac-dc-power.sh";
+- char *argv[] = {(char *)ac_dc_script, DRV_NAME, NULL};
+- static char *envp[] = {"HOME=/",
+- "TERM=linux",
+- "PATH=/usr/bin:/bin",
+- NULL};
+-
+- if (priv->ResetProgress == RESET_TYPE_SILENT) {
+- RT_TRACE((COMP_INIT | COMP_POWER | COMP_RF),
+- "GPIOChangeRFWorkItemCallBack(): Silent Reset!!!!!!!\n");
+- return;
+- }
+-
+- if (priv->rtllib->state != RTLLIB_LINKED)
+- return;
+- call_usermodehelper(ac_dc_script, argv, envp, UMH_WAIT_PROC);
+-
+- return;
+-};
+-
+-
+ void rtl92e_init_adaptive_rate(struct net_device *dev)
+ {
+
+@@ -1800,10 +1773,6 @@ static void _rtl92e_dm_check_rf_ctrl_gpio(void *data)
+ u8 tmp1byte;
+ enum rt_rf_power_state eRfPowerStateToSet;
+ bool bActuallySet = false;
+- char *argv[3];
+- static const char RadioPowerPath[] = "/etc/acpi/events/RadioPower.sh";
+- static char *envp[] = {"HOME=/", "TERM=linux", "PATH=/usr/bin:/bin",
+- NULL};
+
+ bActuallySet = false;
+
+@@ -1835,14 +1804,6 @@ static void _rtl92e_dm_check_rf_ctrl_gpio(void *data)
+ mdelay(1000);
+ priv->bHwRfOffAction = 1;
+ rtl92e_set_rf_state(dev, eRfPowerStateToSet, RF_CHANGE_BY_HW);
+- if (priv->bHwRadioOff)
+- argv[1] = "RFOFF";
+- else
+- argv[1] = "RFON";
+-
+- argv[0] = (char *)RadioPowerPath;
+- argv[2] = NULL;
+- call_usermodehelper(RadioPowerPath, argv, envp, UMH_WAIT_PROC);
+ }
+ }
+
+diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
+index c6f8b772335c1..c985e4ebc545a 100644
+--- a/drivers/staging/rtl8192e/rtllib.h
++++ b/drivers/staging/rtl8192e/rtllib.h
+@@ -1980,7 +1980,7 @@ void SendDisassociation(struct rtllib_device *ieee, bool deauth, u16 asRsn);
+ void rtllib_softmac_xmit(struct rtllib_txb *txb, struct rtllib_device *ieee);
+
+ void rtllib_start_ibss(struct rtllib_device *ieee);
+-void rtllib_softmac_init(struct rtllib_device *ieee);
++int rtllib_softmac_init(struct rtllib_device *ieee);
+ void rtllib_softmac_free(struct rtllib_device *ieee);
+ void rtllib_disassociate(struct rtllib_device *ieee);
+ void rtllib_stop_scan(struct rtllib_device *ieee);
+diff --git a/drivers/staging/rtl8192e/rtllib_module.c b/drivers/staging/rtl8192e/rtllib_module.c
+index 64d9feee1f392..f00ac94b2639b 100644
+--- a/drivers/staging/rtl8192e/rtllib_module.c
++++ b/drivers/staging/rtl8192e/rtllib_module.c
+@@ -88,7 +88,7 @@ struct net_device *alloc_rtllib(int sizeof_priv)
+ err = rtllib_networks_allocate(ieee);
+ if (err) {
+ pr_err("Unable to allocate beacon storage: %d\n", err);
+- goto failed;
++ goto free_netdev;
+ }
+ rtllib_networks_initialize(ieee);
+
+@@ -121,11 +121,13 @@ struct net_device *alloc_rtllib(int sizeof_priv)
+ ieee->hwsec_active = 0;
+
+ memset(ieee->swcamtable, 0, sizeof(struct sw_cam_table) * 32);
+- rtllib_softmac_init(ieee);
++ err = rtllib_softmac_init(ieee);
++ if (err)
++ goto free_crypt_info;
+
+ ieee->pHTInfo = kzalloc(sizeof(struct rt_hi_throughput), GFP_KERNEL);
+ if (!ieee->pHTInfo)
+- return NULL;
++ goto free_softmac;
+
+ HTUpdateDefaultSetting(ieee);
+ HTInitializeHTInfo(ieee);
+@@ -141,8 +143,14 @@ struct net_device *alloc_rtllib(int sizeof_priv)
+
+ return dev;
+
+- failed:
++free_softmac:
++ rtllib_softmac_free(ieee);
++free_crypt_info:
++ lib80211_crypt_info_free(&ieee->crypt_info);
++ rtllib_networks_free(ieee);
++free_netdev:
+ free_netdev(dev);
++
+ return NULL;
+ }
+ EXPORT_SYMBOL(alloc_rtllib);
+diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
+index e3d0a361d370d..98e90670560b5 100644
+--- a/drivers/staging/rtl8192e/rtllib_rx.c
++++ b/drivers/staging/rtl8192e/rtllib_rx.c
+@@ -1489,9 +1489,9 @@ static int rtllib_rx_Monitor(struct rtllib_device *ieee, struct sk_buff *skb,
+ hdrlen += 4;
+ }
+
+- rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen);
+ ieee->stats.rx_packets++;
+ ieee->stats.rx_bytes += skb->len;
++ rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen);
+
+ return 1;
+ }
+diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
+index d2726d01c7573..ea8bc27fce494 100644
+--- a/drivers/staging/rtl8192e/rtllib_softmac.c
++++ b/drivers/staging/rtl8192e/rtllib_softmac.c
+@@ -651,9 +651,9 @@ static void rtllib_beacons_stop(struct rtllib_device *ieee)
+ spin_lock_irqsave(&ieee->beacon_lock, flags);
+
+ ieee->beacon_txing = 0;
+- del_timer_sync(&ieee->beacon_timer);
+
+ spin_unlock_irqrestore(&ieee->beacon_lock, flags);
++ del_timer_sync(&ieee->beacon_timer);
+
+ }
+
+@@ -2952,7 +2952,7 @@ void rtllib_start_protocol(struct rtllib_device *ieee)
+ }
+ }
+
+-void rtllib_softmac_init(struct rtllib_device *ieee)
++int rtllib_softmac_init(struct rtllib_device *ieee)
+ {
+ int i;
+
+@@ -2963,7 +2963,8 @@ void rtllib_softmac_init(struct rtllib_device *ieee)
+ ieee->seq_ctrl[i] = 0;
+ ieee->dot11d_info = kzalloc(sizeof(struct rt_dot11d_info), GFP_ATOMIC);
+ if (!ieee->dot11d_info)
+- netdev_err(ieee->dev, "Can't alloc memory for DOT11D\n");
++ return -ENOMEM;
++
+ ieee->LinkDetectInfo.SlotIndex = 0;
+ ieee->LinkDetectInfo.SlotNum = 2;
+ ieee->LinkDetectInfo.NumRecvBcnInPeriod = 0;
+@@ -3029,6 +3030,7 @@ void rtllib_softmac_init(struct rtllib_device *ieee)
+
+ tasklet_setup(&ieee->ps_task, rtllib_sta_ps);
+
++ return 0;
+ }
+
+ void rtllib_softmac_free(struct rtllib_device *ieee)
+diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
+index b58e75932ecd5..3686b3c599ce7 100644
+--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
++++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
+@@ -951,9 +951,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
+ #endif
+
+ if (ieee->iw_mode == IW_MODE_MONITOR) {
++ unsigned int len = skb->len;
++
+ ieee80211_monitor_rx(ieee, skb, rx_stats);
+ stats->rx_packets++;
+- stats->rx_bytes += skb->len;
++ stats->rx_bytes += len;
+ return 1;
+ }
+
+diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+index 1a193f900779d..2b06706a70717 100644
+--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
++++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+@@ -528,9 +528,9 @@ static void ieee80211_beacons_stop(struct ieee80211_device *ieee)
+ spin_lock_irqsave(&ieee->beacon_lock, flags);
+
+ ieee->beacon_txing = 0;
+- del_timer_sync(&ieee->beacon_timer);
+
+ spin_unlock_irqrestore(&ieee->beacon_lock, flags);
++ del_timer_sync(&ieee->beacon_timer);
+ }
+
+ void ieee80211_stop_send_beacons(struct ieee80211_device *ieee)
+diff --git a/drivers/staging/rtl8192u/r8192U.h b/drivers/staging/rtl8192u/r8192U.h
+index 4013107cd93a2..a23d6d41de9d4 100644
+--- a/drivers/staging/rtl8192u/r8192U.h
++++ b/drivers/staging/rtl8192u/r8192U.h
+@@ -1013,7 +1013,7 @@ typedef struct r8192_priv {
+ bool bis_any_nonbepkts;
+ bool bcurrent_turbo_EDCA;
+ bool bis_cur_rdlstate;
+- struct timer_list fsync_timer;
++ struct delayed_work fsync_work;
+ bool bfsync_processing; /* 500ms Fsync timer is active or not */
+ u32 rate_record;
+ u32 rateCountDiffRecord;
+diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c
+index b6698656fc014..cf5cfee2936fd 100644
+--- a/drivers/staging/rtl8192u/r8192U_core.c
++++ b/drivers/staging/rtl8192u/r8192U_core.c
+@@ -229,7 +229,7 @@ int write_nic_byte_E(struct net_device *dev, int indx, u8 data)
+
+ status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+- indx | 0xfe00, 0, usbdata, 1, HZ / 2);
++ indx | 0xfe00, 0, usbdata, 1, 500);
+ kfree(usbdata);
+
+ if (status < 0) {
+@@ -251,7 +251,7 @@ int read_nic_byte_E(struct net_device *dev, int indx, u8 *data)
+
+ status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+- indx | 0xfe00, 0, usbdata, 1, HZ / 2);
++ indx | 0xfe00, 0, usbdata, 1, 500);
+ *data = *usbdata;
+ kfree(usbdata);
+
+@@ -279,7 +279,7 @@ int write_nic_byte(struct net_device *dev, int indx, u8 data)
+ status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+ (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f,
+- usbdata, 1, HZ / 2);
++ usbdata, 1, 500);
+ kfree(usbdata);
+
+ if (status < 0) {
+@@ -305,7 +305,7 @@ int write_nic_word(struct net_device *dev, int indx, u16 data)
+ status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+ (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f,
+- usbdata, 2, HZ / 2);
++ usbdata, 2, 500);
+ kfree(usbdata);
+
+ if (status < 0) {
+@@ -331,7 +331,7 @@ int write_nic_dword(struct net_device *dev, int indx, u32 data)
+ status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+ (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f,
+- usbdata, 4, HZ / 2);
++ usbdata, 4, 500);
+ kfree(usbdata);
+
+ if (status < 0) {
+@@ -355,7 +355,7 @@ int read_nic_byte(struct net_device *dev, int indx, u8 *data)
+ status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f,
+- usbdata, 1, HZ / 2);
++ usbdata, 1, 500);
+ *data = *usbdata;
+ kfree(usbdata);
+
+@@ -380,7 +380,7 @@ int read_nic_word(struct net_device *dev, int indx, u16 *data)
+ status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f,
+- usbdata, 2, HZ / 2);
++ usbdata, 2, 500);
+ *data = *usbdata;
+ kfree(usbdata);
+
+@@ -404,7 +404,7 @@ static int read_nic_word_E(struct net_device *dev, int indx, u16 *data)
+
+ status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+- indx | 0xfe00, 0, usbdata, 2, HZ / 2);
++ indx | 0xfe00, 0, usbdata, 2, 500);
+ *data = *usbdata;
+ kfree(usbdata);
+
+@@ -430,7 +430,7 @@ int read_nic_dword(struct net_device *dev, int indx, u32 *data)
+ status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f,
+- usbdata, 4, HZ / 2);
++ usbdata, 4, 500);
+ *data = *usbdata;
+ kfree(usbdata);
+
+diff --git a/drivers/staging/rtl8192u/r8192U_dm.c b/drivers/staging/rtl8192u/r8192U_dm.c
+index 725bf5ca9e34d..0fcfcaa6500bf 100644
+--- a/drivers/staging/rtl8192u/r8192U_dm.c
++++ b/drivers/staging/rtl8192u/r8192U_dm.c
+@@ -2578,19 +2578,20 @@ static void dm_init_fsync(struct net_device *dev)
+ priv->ieee80211->fsync_seconddiff_ratethreshold = 200;
+ priv->ieee80211->fsync_state = Default_Fsync;
+ priv->framesyncMonitor = 1; /* current default 0xc38 monitor on */
+- timer_setup(&priv->fsync_timer, dm_fsync_timer_callback, 0);
++ INIT_DELAYED_WORK(&priv->fsync_work, dm_fsync_work_callback);
+ }
+
+ static void dm_deInit_fsync(struct net_device *dev)
+ {
+ struct r8192_priv *priv = ieee80211_priv(dev);
+
+- del_timer_sync(&priv->fsync_timer);
++ cancel_delayed_work_sync(&priv->fsync_work);
+ }
+
+-void dm_fsync_timer_callback(struct timer_list *t)
++void dm_fsync_work_callback(struct work_struct *work)
+ {
+- struct r8192_priv *priv = from_timer(priv, t, fsync_timer);
++ struct r8192_priv *priv =
++ container_of(work, struct r8192_priv, fsync_work.work);
+ struct net_device *dev = priv->ieee80211->dev;
+ u32 rate_index, rate_count = 0, rate_count_diff = 0;
+ bool bSwitchFromCountDiff = false;
+@@ -2657,17 +2658,16 @@ void dm_fsync_timer_callback(struct timer_list *t)
+ }
+ }
+ if (bDoubleTimeInterval) {
+- if (timer_pending(&priv->fsync_timer))
+- del_timer_sync(&priv->fsync_timer);
+- priv->fsync_timer.expires = jiffies +
+- msecs_to_jiffies(priv->ieee80211->fsync_time_interval*priv->ieee80211->fsync_multiple_timeinterval);
+- add_timer(&priv->fsync_timer);
++ cancel_delayed_work_sync(&priv->fsync_work);
++ schedule_delayed_work(&priv->fsync_work,
++ msecs_to_jiffies(priv
++ ->ieee80211->fsync_time_interval *
++ priv->ieee80211->fsync_multiple_timeinterval));
+ } else {
+- if (timer_pending(&priv->fsync_timer))
+- del_timer_sync(&priv->fsync_timer);
+- priv->fsync_timer.expires = jiffies +
+- msecs_to_jiffies(priv->ieee80211->fsync_time_interval);
+- add_timer(&priv->fsync_timer);
++ cancel_delayed_work_sync(&priv->fsync_work);
++ schedule_delayed_work(&priv->fsync_work,
++ msecs_to_jiffies(priv
++ ->ieee80211->fsync_time_interval));
+ }
+ } else {
+ /* Let Register return to default value; */
+@@ -2695,7 +2695,7 @@ static void dm_EndSWFsync(struct net_device *dev)
+ struct r8192_priv *priv = ieee80211_priv(dev);
+
+ RT_TRACE(COMP_HALDM, "%s\n", __func__);
+- del_timer_sync(&(priv->fsync_timer));
++ cancel_delayed_work_sync(&priv->fsync_work);
+
+ /* Let Register return to default value; */
+ if (priv->bswitch_fsync) {
+@@ -2736,11 +2736,9 @@ static void dm_StartSWFsync(struct net_device *dev)
+ if (priv->ieee80211->fsync_rate_bitmap & rateBitmap)
+ priv->rate_record += priv->stats.received_rate_histogram[1][rateIndex];
+ }
+- if (timer_pending(&priv->fsync_timer))
+- del_timer_sync(&priv->fsync_timer);
+- priv->fsync_timer.expires = jiffies +
+- msecs_to_jiffies(priv->ieee80211->fsync_time_interval);
+- add_timer(&priv->fsync_timer);
++ cancel_delayed_work_sync(&priv->fsync_work);
++ schedule_delayed_work(&priv->fsync_work,
++ msecs_to_jiffies(priv->ieee80211->fsync_time_interval));
+
+ write_nic_dword(dev, rOFDM0_RxDetector2, 0x465c12cd);
+ }
+diff --git a/drivers/staging/rtl8192u/r8192U_dm.h b/drivers/staging/rtl8192u/r8192U_dm.h
+index 0b2a1c688597c..2159018b4e38f 100644
+--- a/drivers/staging/rtl8192u/r8192U_dm.h
++++ b/drivers/staging/rtl8192u/r8192U_dm.h
+@@ -166,7 +166,7 @@ void dm_force_tx_fw_info(struct net_device *dev,
+ void dm_init_edca_turbo(struct net_device *dev);
+ void dm_rf_operation_test_callback(unsigned long data);
+ void dm_rf_pathcheck_workitemcallback(struct work_struct *work);
+-void dm_fsync_timer_callback(struct timer_list *t);
++void dm_fsync_work_callback(struct work_struct *work);
+ void dm_cck_txpower_adjust(struct net_device *dev, bool binch14);
+ void dm_shadow_init(struct net_device *dev);
+ void dm_initialize_txpower_tracking(struct net_device *dev);
+diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c
+index 9502f6aa53060..2a4c6cf14facc 100644
+--- a/drivers/staging/rtl8712/os_intfs.c
++++ b/drivers/staging/rtl8712/os_intfs.c
+@@ -323,6 +323,7 @@ int r8712_init_drv_sw(struct _adapter *padapter)
+ mp871xinit(padapter);
+ init_default_value(padapter);
+ r8712_InitSwLeds(padapter);
++ mutex_init(&padapter->mutex_start);
+ return ret;
+ }
+
+@@ -332,7 +333,6 @@ void r8712_free_drv_sw(struct _adapter *padapter)
+ r8712_free_evt_priv(&padapter->evtpriv);
+ r8712_DeInitSwLeds(padapter);
+ r8712_free_mlme_priv(&padapter->mlmepriv);
+- r8712_free_io_queue(padapter);
+ _free_xmit_priv(&padapter->xmitpriv);
+ _r8712_free_sta_priv(&padapter->stapriv);
+ _r8712_free_recv_priv(&padapter->recvpriv);
+diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c
+index e9294e1ed06eb..eacf5efa34307 100644
+--- a/drivers/staging/rtl8712/rtl8712_cmd.c
++++ b/drivers/staging/rtl8712/rtl8712_cmd.c
+@@ -117,34 +117,6 @@ static void r871x_internal_cmd_hdl(struct _adapter *padapter, u8 *pbuf)
+ kfree(pdrvcmd->pbuf);
+ }
+
+-static u8 read_macreg_hdl(struct _adapter *padapter, u8 *pbuf)
+-{
+- void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd);
+- struct cmd_obj *pcmd = (struct cmd_obj *)pbuf;
+-
+- /* invoke cmd->callback function */
+- pcmd_callback = cmd_callback[pcmd->cmdcode].callback;
+- if (!pcmd_callback)
+- r8712_free_cmd_obj(pcmd);
+- else
+- pcmd_callback(padapter, pcmd);
+- return H2C_SUCCESS;
+-}
+-
+-static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf)
+-{
+- void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd);
+- struct cmd_obj *pcmd = (struct cmd_obj *)pbuf;
+-
+- /* invoke cmd->callback function */
+- pcmd_callback = cmd_callback[pcmd->cmdcode].callback;
+- if (!pcmd_callback)
+- r8712_free_cmd_obj(pcmd);
+- else
+- pcmd_callback(padapter, pcmd);
+- return H2C_SUCCESS;
+-}
+-
+ static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf)
+ {
+ struct cmd_obj *pcmd = (struct cmd_obj *)pbuf;
+@@ -213,14 +185,6 @@ static struct cmd_obj *cmd_hdl_filter(struct _adapter *padapter,
+ pcmd_r = NULL;
+
+ switch (pcmd->cmdcode) {
+- case GEN_CMD_CODE(_Read_MACREG):
+- read_macreg_hdl(padapter, (u8 *)pcmd);
+- pcmd_r = pcmd;
+- break;
+- case GEN_CMD_CODE(_Write_MACREG):
+- write_macreg_hdl(padapter, (u8 *)pcmd);
+- pcmd_r = pcmd;
+- break;
+ case GEN_CMD_CODE(_Read_BBREG):
+ read_bbreg_hdl(padapter, (u8 *)pcmd);
+ break;
+diff --git a/drivers/staging/rtl8712/rtl871x_xmit.c b/drivers/staging/rtl8712/rtl871x_xmit.c
+index 090345bad2230..6353dbe554d3a 100644
+--- a/drivers/staging/rtl8712/rtl871x_xmit.c
++++ b/drivers/staging/rtl8712/rtl871x_xmit.c
+@@ -21,6 +21,7 @@
+ #include "osdep_intf.h"
+ #include "usb_ops.h"
+
++#include <linux/usb.h>
+ #include <linux/ieee80211.h>
+
+ static const u8 P802_1H_OUI[P80211_OUI_LEN] = {0x00, 0x00, 0xf8};
+@@ -55,6 +56,7 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
+ sint i;
+ struct xmit_buf *pxmitbuf;
+ struct xmit_frame *pxframe;
++ int j;
+
+ memset((unsigned char *)pxmitpriv, 0, sizeof(struct xmit_priv));
+ spin_lock_init(&pxmitpriv->lock);
+@@ -117,11 +119,8 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
+ _init_queue(&pxmitpriv->pending_xmitbuf_queue);
+ pxmitpriv->pallocated_xmitbuf =
+ kmalloc(NR_XMITBUFF * sizeof(struct xmit_buf) + 4, GFP_ATOMIC);
+- if (!pxmitpriv->pallocated_xmitbuf) {
+- kfree(pxmitpriv->pallocated_frame_buf);
+- pxmitpriv->pallocated_frame_buf = NULL;
+- return -ENOMEM;
+- }
++ if (!pxmitpriv->pallocated_xmitbuf)
++ goto clean_up_frame_buf;
+ pxmitpriv->pxmitbuf = pxmitpriv->pallocated_xmitbuf + 4 -
+ ((addr_t)(pxmitpriv->pallocated_xmitbuf) & 3);
+ pxmitbuf = (struct xmit_buf *)pxmitpriv->pxmitbuf;
+@@ -129,13 +128,17 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
+ INIT_LIST_HEAD(&pxmitbuf->list);
+ pxmitbuf->pallocated_buf =
+ kmalloc(MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ, GFP_ATOMIC);
+- if (!pxmitbuf->pallocated_buf)
+- return -ENOMEM;
++ if (!pxmitbuf->pallocated_buf) {
++ j = 0;
++ goto clean_up_alloc_buf;
++ }
+ pxmitbuf->pbuf = pxmitbuf->pallocated_buf + XMITBUF_ALIGN_SZ -
+ ((addr_t) (pxmitbuf->pallocated_buf) &
+ (XMITBUF_ALIGN_SZ - 1));
+- if (r8712_xmit_resource_alloc(padapter, pxmitbuf))
+- return -ENOMEM;
++ if (r8712_xmit_resource_alloc(padapter, pxmitbuf)) {
++ j = 1;
++ goto clean_up_alloc_buf;
++ }
+ list_add_tail(&pxmitbuf->list,
+ &(pxmitpriv->free_xmitbuf_queue.queue));
+ pxmitbuf++;
+@@ -146,6 +149,28 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
+ init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry);
+ tasklet_setup(&pxmitpriv->xmit_tasklet, r8712_xmit_bh);
+ return 0;
++
++clean_up_alloc_buf:
++ if (j) {
++ /* failure happened in r8712_xmit_resource_alloc()
++ * delete extra pxmitbuf->pallocated_buf
++ */
++ kfree(pxmitbuf->pallocated_buf);
++ }
++ for (j = 0; j < i; j++) {
++ int k;
++
++ pxmitbuf--; /* reset pointer */
++ kfree(pxmitbuf->pallocated_buf);
++ for (k = 0; k < 8; k++) /* delete xmit urb's */
++ usb_free_urb(pxmitbuf->pxmit_urb[k]);
++ }
++ kfree(pxmitpriv->pallocated_xmitbuf);
++ pxmitpriv->pallocated_xmitbuf = NULL;
++clean_up_frame_buf:
++ kfree(pxmitpriv->pallocated_frame_buf);
++ pxmitpriv->pallocated_frame_buf = NULL;
++ return -ENOMEM;
+ }
+
+ void _free_xmit_priv(struct xmit_priv *pxmitpriv)
+diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c
+index 505ebeb643dc2..ed8e70cce68bf 100644
+--- a/drivers/staging/rtl8712/usb_intf.c
++++ b/drivers/staging/rtl8712/usb_intf.c
+@@ -265,6 +265,7 @@ static uint r8712_usb_dvobj_init(struct _adapter *padapter)
+
+ static void r8712_usb_dvobj_deinit(struct _adapter *padapter)
+ {
++ r8712_free_io_queue(padapter);
+ }
+
+ void rtl871x_intf_stop(struct _adapter *padapter)
+@@ -302,9 +303,6 @@ void r871x_dev_unload(struct _adapter *padapter)
+ rtl8712_hal_deinit(padapter);
+ }
+
+- /*s6.*/
+- if (padapter->dvobj_deinit)
+- padapter->dvobj_deinit(padapter);
+ padapter->bup = false;
+ }
+ }
+@@ -538,13 +536,13 @@ static int r871xu_drv_init(struct usb_interface *pusb_intf,
+ } else {
+ AutoloadFail = false;
+ }
+- if (((mac[0] == 0xff) && (mac[1] == 0xff) &&
++ if ((!AutoloadFail) ||
++ ((mac[0] == 0xff) && (mac[1] == 0xff) &&
+ (mac[2] == 0xff) && (mac[3] == 0xff) &&
+ (mac[4] == 0xff) && (mac[5] == 0xff)) ||
+ ((mac[0] == 0x00) && (mac[1] == 0x00) &&
+ (mac[2] == 0x00) && (mac[3] == 0x00) &&
+- (mac[4] == 0x00) && (mac[5] == 0x00)) ||
+- (!AutoloadFail)) {
++ (mac[4] == 0x00) && (mac[5] == 0x00))) {
+ mac[0] = 0x00;
+ mac[1] = 0xe0;
+ mac[2] = 0x4c;
+@@ -569,7 +567,6 @@ static int r871xu_drv_init(struct usb_interface *pusb_intf,
+ if (rtl871x_load_fw(padapter))
+ goto deinit_drv_sw;
+ spin_lock_init(&padapter->lock_rx_ff0_filter);
+- mutex_init(&padapter->mutex_start);
+ return 0;
+
+ deinit_drv_sw:
+@@ -595,18 +592,20 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf)
+
+ /* never exit with a firmware callback pending */
+ wait_for_completion(&padapter->rtl8712_fw_ready);
++ if (pnetdev->reg_state != NETREG_UNINITIALIZED)
++ unregister_netdev(pnetdev); /* will call netdev_close() */
+ usb_set_intfdata(pusb_intf, NULL);
+ release_firmware(padapter->fw);
+ if (drvpriv.drv_registered)
+ padapter->surprise_removed = true;
+- if (pnetdev->reg_state != NETREG_UNINITIALIZED)
+- unregister_netdev(pnetdev); /* will call netdev_close() */
+ r8712_flush_rwctrl_works(padapter);
+ r8712_flush_led_works(padapter);
+ udelay(1);
+ /* Stop driver mlme relation timer */
+ r8712_stop_drv_timers(padapter);
+ r871x_dev_unload(padapter);
++ if (padapter->dvobj_deinit)
++ padapter->dvobj_deinit(padapter);
+ r8712_free_drv_sw(padapter);
+ free_netdev(pnetdev);
+
+diff --git a/drivers/staging/rtl8712/usb_ops.c b/drivers/staging/rtl8712/usb_ops.c
+index e64845e6adf3d..af9966d03979c 100644
+--- a/drivers/staging/rtl8712/usb_ops.c
++++ b/drivers/staging/rtl8712/usb_ops.c
+@@ -29,7 +29,8 @@ static u8 usb_read8(struct intf_hdl *intfhdl, u32 addr)
+ u16 wvalue;
+ u16 index;
+ u16 len;
+- __le32 data;
++ int status;
++ __le32 data = 0;
+ struct intf_priv *intfpriv = intfhdl->pintfpriv;
+
+ request = 0x05;
+@@ -37,8 +38,10 @@ static u8 usb_read8(struct intf_hdl *intfhdl, u32 addr)
+ index = 0;
+ wvalue = (u16)(addr & 0x0000ffff);
+ len = 1;
+- r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len,
+- requesttype);
++ status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index,
++ &data, len, requesttype);
++ if (status < 0)
++ return 0;
+ return (u8)(le32_to_cpu(data) & 0x0ff);
+ }
+
+@@ -49,7 +52,8 @@ static u16 usb_read16(struct intf_hdl *intfhdl, u32 addr)
+ u16 wvalue;
+ u16 index;
+ u16 len;
+- __le32 data;
++ int status;
++ __le32 data = 0;
+ struct intf_priv *intfpriv = intfhdl->pintfpriv;
+
+ request = 0x05;
+@@ -57,8 +61,10 @@ static u16 usb_read16(struct intf_hdl *intfhdl, u32 addr)
+ index = 0;
+ wvalue = (u16)(addr & 0x0000ffff);
+ len = 2;
+- r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len,
+- requesttype);
++ status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index,
++ &data, len, requesttype);
++ if (status < 0)
++ return 0;
+ return (u16)(le32_to_cpu(data) & 0xffff);
+ }
+
+@@ -69,7 +75,8 @@ static u32 usb_read32(struct intf_hdl *intfhdl, u32 addr)
+ u16 wvalue;
+ u16 index;
+ u16 len;
+- __le32 data;
++ int status;
++ __le32 data = 0;
+ struct intf_priv *intfpriv = intfhdl->pintfpriv;
+
+ request = 0x05;
+@@ -77,8 +84,10 @@ static u32 usb_read32(struct intf_hdl *intfhdl, u32 addr)
+ index = 0;
+ wvalue = (u16)(addr & 0x0000ffff);
+ len = 4;
+- r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len,
+- requesttype);
++ status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index,
++ &data, len, requesttype);
++ if (status < 0)
++ return 0;
+ return le32_to_cpu(data);
+ }
+
+diff --git a/drivers/staging/rtl8712/usb_ops_linux.c b/drivers/staging/rtl8712/usb_ops_linux.c
+index 655497cead122..f984a5ab2c6ff 100644
+--- a/drivers/staging/rtl8712/usb_ops_linux.c
++++ b/drivers/staging/rtl8712/usb_ops_linux.c
+@@ -494,7 +494,7 @@ int r8712_usbctrl_vendorreq(struct intf_priv *pintfpriv, u8 request, u16 value,
+ memcpy(pIo_buf, pdata, len);
+ }
+ status = usb_control_msg(udev, pipe, request, reqtype, value, index,
+- pIo_buf, len, HZ / 2);
++ pIo_buf, len, 500);
+ if (status > 0) { /* Success this control transfer. */
+ if (requesttype == 0x01) {
+ /* For Control read transfer, we have to copy the read
+diff --git a/drivers/staging/rtl8712/xmit_linux.c b/drivers/staging/rtl8712/xmit_linux.c
+index 90d34cf9d2ffd..a820ce7cce713 100644
+--- a/drivers/staging/rtl8712/xmit_linux.c
++++ b/drivers/staging/rtl8712/xmit_linux.c
+@@ -118,6 +118,12 @@ int r8712_xmit_resource_alloc(struct _adapter *padapter,
+ for (i = 0; i < 8; i++) {
+ pxmitbuf->pxmit_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
+ if (!pxmitbuf->pxmit_urb[i]) {
++ int k;
++
++ for (k = i - 1; k >= 0; k--) {
++ /* handle allocation errors part way through loop */
++ usb_free_urb(pxmitbuf->pxmit_urb[k]);
++ }
+ netdev_err(padapter->pnetdev, "pxmitbuf->pxmit_urb[i] == NULL\n");
+ return -ENOMEM;
+ }
+diff --git a/drivers/staging/rtl8723bs/core/rtw_ap.c b/drivers/staging/rtl8723bs/core/rtw_ap.c
+index 6064dd6a76b42..674592e914e26 100644
+--- a/drivers/staging/rtl8723bs/core/rtw_ap.c
++++ b/drivers/staging/rtl8723bs/core/rtw_ap.c
+@@ -891,7 +891,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len)
+ &ie_len,
+ (pbss_network->ie_length - _BEACON_IE_OFFSET_)
+ );
+- if (p != NULL) {
++ if (p) {
+ memcpy(supportRate, p + 2, ie_len);
+ supportRateNum = ie_len;
+ }
+@@ -903,7 +903,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len)
+ &ie_len,
+ pbss_network->ie_length - _BEACON_IE_OFFSET_
+ );
+- if (p != NULL) {
++ if (p) {
+ memcpy(supportRate + supportRateNum, p + 2, ie_len);
+ supportRateNum += ie_len;
+ }
+@@ -991,7 +991,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len)
+ break;
+ }
+
+- if ((p == NULL) || (ie_len == 0))
++ if (!p || ie_len == 0)
+ break;
+ }
+
+@@ -1021,7 +1021,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len)
+ break;
+ }
+
+- if ((p == NULL) || (ie_len == 0))
++ if (!p || ie_len == 0)
+ break;
+ }
+ }
+@@ -1145,7 +1145,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len)
+ psta = rtw_get_stainfo(&padapter->stapriv, pbss_network->mac_address);
+ if (!psta) {
+ psta = rtw_alloc_stainfo(&padapter->stapriv, pbss_network->mac_address);
+- if (psta == NULL)
++ if (!psta)
+ return _FAIL;
+ }
+
+@@ -1275,7 +1275,7 @@ u8 rtw_ap_set_pairwise_key(struct adapter *padapter, struct sta_info *psta)
+ }
+
+ psetstakey_para = rtw_zmalloc(sizeof(struct set_stakey_parm));
+- if (psetstakey_para == NULL) {
++ if (!psetstakey_para) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1311,12 +1311,12 @@ static int rtw_ap_set_key(
+ int res = _SUCCESS;
+
+ pcmd = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (pcmd == NULL) {
++ if (!pcmd) {
+ res = _FAIL;
+ goto exit;
+ }
+ psetkeyparm = rtw_zmalloc(sizeof(struct setkey_parm));
+- if (psetkeyparm == NULL) {
++ if (!psetkeyparm) {
+ kfree(pcmd);
+ res = _FAIL;
+ goto exit;
+@@ -1474,11 +1474,11 @@ static void update_bcn_wps_ie(struct adapter *padapter)
+ &wps_ielen
+ );
+
+- if (pwps_ie == NULL || wps_ielen == 0)
++ if (!pwps_ie || wps_ielen == 0)
+ return;
+
+ pwps_ie_src = pmlmepriv->wps_beacon_ie;
+- if (pwps_ie_src == NULL)
++ if (!pwps_ie_src)
+ return;
+
+ wps_offset = (uint)(pwps_ie - ie);
+diff --git a/drivers/staging/rtl8723bs/core/rtw_cmd.c b/drivers/staging/rtl8723bs/core/rtw_cmd.c
+index d494c06dab967..5f4f603b3b366 100644
+--- a/drivers/staging/rtl8723bs/core/rtw_cmd.c
++++ b/drivers/staging/rtl8723bs/core/rtw_cmd.c
+@@ -161,8 +161,6 @@ static struct cmd_hdl wlancmds[] = {
+
+ int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
+ {
+- int res = 0;
+-
+ init_completion(&pcmdpriv->cmd_queue_comp);
+ init_completion(&pcmdpriv->terminate_cmdthread_comp);
+
+@@ -174,18 +172,16 @@ int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
+
+ pcmdpriv->cmd_allocated_buf = rtw_zmalloc(MAX_CMDSZ + CMDBUFF_ALIGN_SZ);
+
+- if (!pcmdpriv->cmd_allocated_buf) {
+- res = -ENOMEM;
+- goto exit;
+- }
++ if (!pcmdpriv->cmd_allocated_buf)
++ return -ENOMEM;
+
+ pcmdpriv->cmd_buf = pcmdpriv->cmd_allocated_buf + CMDBUFF_ALIGN_SZ - ((SIZE_PTR)(pcmdpriv->cmd_allocated_buf) & (CMDBUFF_ALIGN_SZ-1));
+
+ pcmdpriv->rsp_allocated_buf = rtw_zmalloc(MAX_RSPSZ + 4);
+
+ if (!pcmdpriv->rsp_allocated_buf) {
+- res = -ENOMEM;
+- goto exit;
++ kfree(pcmdpriv->cmd_allocated_buf);
++ return -ENOMEM;
+ }
+
+ pcmdpriv->rsp_buf = pcmdpriv->rsp_allocated_buf + 4 - ((SIZE_PTR)(pcmdpriv->rsp_allocated_buf) & 3);
+@@ -195,8 +191,8 @@ int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
+ pcmdpriv->rsp_cnt = 0;
+
+ mutex_init(&pcmdpriv->sctx_mutex);
+-exit:
+- return res;
++
++ return 0;
+ }
+
+ static void c2h_wk_callback(struct work_struct *work);
+@@ -255,7 +251,7 @@ int _rtw_enqueue_cmd(struct __queue *queue, struct cmd_obj *obj)
+ {
+ unsigned long irqL;
+
+- if (obj == NULL)
++ if (!obj)
+ goto exit;
+
+ /* spin_lock_bh(&queue->lock); */
+@@ -323,7 +319,7 @@ int rtw_enqueue_cmd(struct cmd_priv *pcmdpriv, struct cmd_obj *cmd_obj)
+ int res = _FAIL;
+ struct adapter *padapter = pcmdpriv->padapter;
+
+- if (cmd_obj == NULL)
++ if (!cmd_obj)
+ goto exit;
+
+ cmd_obj->padapter = padapter;
+@@ -488,7 +484,7 @@ post_process:
+ /* call callback function for post-processed */
+ if (pcmd->cmdcode < ARRAY_SIZE(rtw_cmd_callback)) {
+ pcmd_callback = rtw_cmd_callback[pcmd->cmdcode].callback;
+- if (pcmd_callback == NULL) {
++ if (!pcmd_callback) {
+ rtw_free_cmd_obj(pcmd);
+ } else {
+ /* todo: !!! fill rsp_buf to pcmd->rsp if (pcmd->rsp!= NULL) */
+@@ -507,7 +503,7 @@ post_process:
+ /* free all cmd_obj resources */
+ do {
+ pcmd = rtw_dequeue_cmd(pcmdpriv);
+- if (pcmd == NULL) {
++ if (!pcmd) {
+ rtw_unregister_cmd_alive(padapter);
+ break;
+ }
+@@ -546,11 +542,11 @@ u8 rtw_sitesurvey_cmd(struct adapter *padapter, struct ndis_802_11_ssid *ssid,
+ rtw_lps_ctrl_wk_cmd(padapter, LPS_CTRL_SCAN, 1);
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL)
++ if (!ph2c)
+ return _FAIL;
+
+ psurveyPara = rtw_zmalloc(sizeof(struct sitesurvey_parm));
+- if (psurveyPara == NULL) {
++ if (!psurveyPara) {
+ kfree(ph2c);
+ return _FAIL;
+ }
+@@ -608,13 +604,13 @@ u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pbsetdataratepara = rtw_zmalloc(sizeof(struct setdatarate_parm));
+- if (pbsetdataratepara == NULL) {
++ if (!pbsetdataratepara) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -644,7 +640,7 @@ u8 rtw_createbss_cmd(struct adapter *padapter)
+ u8 res = _SUCCESS;
+
+ pcmd = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (pcmd == NULL) {
++ if (!pcmd) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -677,7 +673,7 @@ int rtw_startbss_cmd(struct adapter *padapter, int flags)
+ } else {
+ /* need enqueue, prepare cmd_obj and enqueue */
+ pcmd = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (pcmd == NULL) {
++ if (!pcmd) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -729,7 +725,7 @@ u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network *pnetwork)
+ u8 *ptmp = NULL;
+
+ pcmd = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (pcmd == NULL) {
++ if (!pcmd) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -841,7 +837,7 @@ u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueu
+
+ /* prepare cmd parameter */
+ param = rtw_zmalloc(sizeof(*param));
+- if (param == NULL) {
++ if (!param) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -850,7 +846,7 @@ u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueu
+ if (enqueue) {
+ /* need enqueue, prepare cmd_obj and enqueue */
+ cmdobj = rtw_zmalloc(sizeof(*cmdobj));
+- if (cmdobj == NULL) {
++ if (!cmdobj) {
+ res = _FAIL;
+ kfree(param);
+ goto exit;
+@@ -878,7 +874,7 @@ u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infrast
+
+ psetop = rtw_zmalloc(sizeof(struct setopmode_parm));
+
+- if (psetop == NULL) {
++ if (!psetop) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -886,7 +882,7 @@ u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infrast
+
+ if (enqueue) {
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ kfree(psetop);
+ res = _FAIL;
+ goto exit;
+@@ -914,7 +910,7 @@ u8 rtw_setstakey_cmd(struct adapter *padapter, struct sta_info *sta, u8 unicast_
+ u8 res = _SUCCESS;
+
+ psetstakey_para = rtw_zmalloc(sizeof(struct set_stakey_parm));
+- if (psetstakey_para == NULL) {
++ if (!psetstakey_para) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -936,14 +932,14 @@ u8 rtw_setstakey_cmd(struct adapter *padapter, struct sta_info *sta, u8 unicast_
+
+ if (enqueue) {
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ kfree(psetstakey_para);
+ res = _FAIL;
+ goto exit;
+ }
+
+ psetstakey_rsp = rtw_zmalloc(sizeof(struct set_stakey_rsp));
+- if (psetstakey_rsp == NULL) {
++ if (!psetstakey_rsp) {
+ kfree(ph2c);
+ kfree(psetstakey_para);
+ res = _FAIL;
+@@ -981,20 +977,20 @@ u8 rtw_clearstakey_cmd(struct adapter *padapter, struct sta_info *sta, u8 enqueu
+ }
+ } else {
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ psetstakey_para = rtw_zmalloc(sizeof(struct set_stakey_parm));
+- if (psetstakey_para == NULL) {
++ if (!psetstakey_para) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+ }
+
+ psetstakey_rsp = rtw_zmalloc(sizeof(struct set_stakey_rsp));
+- if (psetstakey_rsp == NULL) {
++ if (!psetstakey_rsp) {
+ kfree(ph2c);
+ kfree(psetstakey_para);
+ res = _FAIL;
+@@ -1026,13 +1022,13 @@ u8 rtw_addbareq_cmd(struct adapter *padapter, u8 tid, u8 *addr)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ paddbareq_parm = rtw_zmalloc(sizeof(struct addBaReq_parm));
+- if (paddbareq_parm == NULL) {
++ if (!paddbareq_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1058,13 +1054,13 @@ u8 rtw_reset_securitypriv_cmd(struct adapter *padapter)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1093,13 +1089,13 @@ u8 rtw_free_assoc_resources_cmd(struct adapter *padapter)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1129,13 +1125,13 @@ u8 rtw_dynamic_chk_wk_cmd(struct adapter *padapter)
+
+ /* only primary padapter does this cmd */
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1177,7 +1173,7 @@ u8 rtw_set_chplan_cmd(struct adapter *padapter, u8 chplan, u8 enqueue, u8 swconf
+
+ /* prepare cmd parameter */
+ setChannelPlan_param = rtw_zmalloc(sizeof(struct SetChannelPlan_param));
+- if (setChannelPlan_param == NULL) {
++ if (!setChannelPlan_param) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -1186,7 +1182,7 @@ u8 rtw_set_chplan_cmd(struct adapter *padapter, u8 chplan, u8 enqueue, u8 swconf
+ if (enqueue) {
+ /* need enqueue, prepare cmd_obj and enqueue */
+ pcmdobj = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (pcmdobj == NULL) {
++ if (!pcmdobj) {
+ kfree(setChannelPlan_param);
+ res = _FAIL;
+ goto exit;
+@@ -1436,13 +1432,13 @@ u8 rtw_lps_ctrl_wk_cmd(struct adapter *padapter, u8 lps_ctrl_type, u8 enqueue)
+
+ if (enqueue) {
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1478,13 +1474,13 @@ u8 rtw_dm_in_lps_wk_cmd(struct adapter *padapter)
+
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1544,13 +1540,13 @@ u8 rtw_dm_ra_mask_wk_cmd(struct adapter *padapter, u8 *psta)
+
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1579,13 +1575,13 @@ u8 rtw_ps_cmd(struct adapter *padapter)
+ u8 res = _SUCCESS;
+
+ ppscmd = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ppscmd == NULL) {
++ if (!ppscmd) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ppscmd);
+ res = _FAIL;
+ goto exit;
+@@ -1651,13 +1647,13 @@ u8 rtw_chk_hi_queue_cmd(struct adapter *padapter)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1745,13 +1741,13 @@ u8 rtw_c2h_packet_wk_cmd(struct adapter *padapter, u8 *pbuf, u16 length)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1780,13 +1776,13 @@ u8 rtw_c2h_wk_cmd(struct adapter *padapter, u8 *c2h_evt)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ pdrvextra_cmd_parm = rtw_zmalloc(sizeof(struct drvextra_cmd_parm));
+- if (pdrvextra_cmd_parm == NULL) {
++ if (!pdrvextra_cmd_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -1961,7 +1957,7 @@ void rtw_createbss_cmd_callback(struct adapter *padapter, struct cmd_obj *pcmd)
+ struct wlan_bssid_ex *pnetwork = (struct wlan_bssid_ex *)pcmd->parmbuf;
+ struct wlan_network *tgt_network = &(pmlmepriv->cur_network);
+
+- if (pcmd->parmbuf == NULL)
++ if (!pcmd->parmbuf)
+ goto exit;
+
+ if (pcmd->res != H2C_SUCCESS)
+@@ -1984,9 +1980,9 @@ void rtw_createbss_cmd_callback(struct adapter *padapter, struct cmd_obj *pcmd)
+ } else {
+ pwlan = rtw_alloc_network(pmlmepriv);
+ spin_lock_bh(&(pmlmepriv->scanned_queue.lock));
+- if (pwlan == NULL) {
++ if (!pwlan) {
+ pwlan = rtw_get_oldest_wlan_network(&pmlmepriv->scanned_queue);
+- if (pwlan == NULL) {
++ if (!pwlan) {
+ spin_unlock_bh(&(pmlmepriv->scanned_queue.lock));
+ goto createbss_cmd_fail;
+ }
+diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
+index 5cfde71766173..8c11daff2d590 100644
+--- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
++++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
+@@ -370,7 +370,7 @@ u8 rtw_set_802_11_bssid_list_scan(struct adapter *padapter, struct ndis_802_11_s
+ struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
+ u8 res = true;
+
+- if (padapter == NULL) {
++ if (!padapter) {
+ res = false;
+ goto exit;
+ }
+@@ -481,7 +481,7 @@ u16 rtw_get_cur_max_rate(struct adapter *adapter)
+ return 0;
+
+ psta = rtw_get_stainfo(&adapter->stapriv, get_bssid(pmlmepriv));
+- if (psta == NULL)
++ if (!psta)
+ return 0;
+
+ short_GI = query_ra_short_GI(psta);
+diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c b/drivers/staging/rtl8723bs/core/rtw_mlme.c
+index ab6a24d70cc96..5b64980e8522f 100644
+--- a/drivers/staging/rtl8723bs/core/rtw_mlme.c
++++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c
+@@ -439,7 +439,7 @@ struct wlan_network *rtw_get_oldest_wlan_network(struct __queue *scanned_queue)
+ pwlan = list_entry(plist, struct wlan_network, list);
+
+ if (!pwlan->fixed) {
+- if (oldest == NULL || time_after(oldest->last_scanned, pwlan->last_scanned))
++ if (!oldest || time_after(oldest->last_scanned, pwlan->last_scanned))
+ oldest = pwlan;
+ }
+ }
+@@ -542,7 +542,7 @@ void rtw_update_scanned_network(struct adapter *adapter, struct wlan_bssid_ex *t
+ /* TODO: don't select network in the same ess as oldest if it's new enough*/
+ }
+
+- if (oldest == NULL || time_after(oldest->last_scanned, pnetwork->last_scanned))
++ if (!oldest || time_after(oldest->last_scanned, pnetwork->last_scanned))
+ oldest = pnetwork;
+
+ }
+@@ -749,7 +749,9 @@ void rtw_surveydone_event_callback(struct adapter *adapter, u8 *pbuf)
+ }
+
+ if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY)) {
++ spin_unlock_bh(&pmlmepriv->lock);
+ del_timer_sync(&pmlmepriv->scan_to_timer);
++ spin_lock_bh(&pmlmepriv->lock);
+ _clr_fwstate_(pmlmepriv, _FW_UNDER_SURVEY);
+ }
+
+@@ -897,7 +899,6 @@ void rtw_free_assoc_resources(struct adapter *adapter, int lock_scanned_queue)
+ {
+ struct mlme_priv *pmlmepriv = &adapter->mlmepriv;
+ struct wlan_network *tgt_network = &pmlmepriv->cur_network;
+- struct sta_priv *pstapriv = &adapter->stapriv;
+ struct dvobj_priv *psdpriv = adapter->dvobj;
+ struct debug_priv *pdbgpriv = &psdpriv->drv_dbg;
+
+@@ -905,11 +906,7 @@ void rtw_free_assoc_resources(struct adapter *adapter, int lock_scanned_queue)
+ struct sta_info *psta;
+
+ psta = rtw_get_stainfo(&adapter->stapriv, tgt_network->network.mac_address);
+- spin_lock_bh(&(pstapriv->sta_hash_lock));
+ rtw_free_stainfo(adapter, psta);
+-
+- spin_unlock_bh(&(pstapriv->sta_hash_lock));
+-
+ }
+
+ if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE|WIFI_ADHOC_MASTER_STATE|WIFI_AP_STATE)) {
+@@ -1239,16 +1236,15 @@ void rtw_joinbss_event_prehandle(struct adapter *adapter, u8 *pbuf)
+ rtw_indicate_connect(adapter);
+ }
+
++ spin_unlock_bh(&pmlmepriv->scanned_queue.lock);
++
++ spin_unlock_bh(&pmlmepriv->lock);
+ /* s5. Cancel assoc_timer */
+ del_timer_sync(&pmlmepriv->assoc_timer);
+-
++ spin_lock_bh(&pmlmepriv->lock);
+ } else {
+ spin_unlock_bh(&(pmlmepriv->scanned_queue.lock));
+- goto ignore_joinbss_callback;
+ }
+-
+- spin_unlock_bh(&(pmlmepriv->scanned_queue.lock));
+-
+ } else if (pnetwork->join_res == -4) {
+ rtw_reset_securitypriv(adapter);
+ _set_timer(&pmlmepriv->assoc_timer, 1);
+@@ -1820,7 +1816,7 @@ static int rtw_check_join_candidate(struct mlme_priv *mlme
+ goto exit;
+ }
+
+- if (*candidate == NULL || (*candidate)->network.rssi < competitor->network.rssi) {
++ if (!*candidate || (*candidate)->network.rssi < competitor->network.rssi) {
+ *candidate = competitor;
+ updated = true;
+ }
+diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
+index 375d2a742dd2d..e923f306cf0c3 100644
+--- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
++++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
+@@ -742,11 +742,11 @@ unsigned int OnAuth(struct adapter *padapter, union recv_frame *precv_frame)
+ }
+
+ pstat = rtw_get_stainfo(pstapriv, sa);
+- if (pstat == NULL) {
++ if (!pstat) {
+
+ /* allocate a new one */
+ pstat = rtw_alloc_stainfo(pstapriv, sa);
+- if (pstat == NULL) {
++ if (!pstat) {
+ status = WLAN_STATUS_AP_UNABLE_TO_HANDLE_NEW_STA;
+ goto auth_fail;
+ }
+@@ -814,7 +814,7 @@ unsigned int OnAuth(struct adapter *padapter, union recv_frame *precv_frame)
+ p = rtw_get_ie(pframe + WLAN_HDR_A3_LEN + 4 + _AUTH_IE_OFFSET_, WLAN_EID_CHALLENGE, (int *)&ie_len,
+ len - WLAN_HDR_A3_LEN - _AUTH_IE_OFFSET_ - 4);
+
+- if ((p == NULL) || (ie_len <= 0)) {
++ if (!p || ie_len <= 0) {
+ status = WLAN_STATUS_CHALLENGE_FAIL;
+ goto auth_fail;
+ }
+@@ -1034,7 +1034,7 @@ unsigned int OnAssocReq(struct adapter *padapter, union recv_frame *precv_frame)
+
+ /* check if the supported rate is ok */
+ p = rtw_get_ie(pframe + WLAN_HDR_A3_LEN + ie_offset, WLAN_EID_SUPP_RATES, &ie_len, pkt_len - WLAN_HDR_A3_LEN - ie_offset);
+- if (p == NULL) {
++ if (!p) {
+ /* use our own rate set as statoin used */
+ /* memcpy(supportRate, AP_BSSRATE, AP_BSSRATE_LEN); */
+ /* supportRateNum = AP_BSSRATE_LEN; */
+@@ -1047,7 +1047,7 @@ unsigned int OnAssocReq(struct adapter *padapter, union recv_frame *precv_frame)
+
+ p = rtw_get_ie(pframe + WLAN_HDR_A3_LEN + ie_offset, WLAN_EID_EXT_SUPP_RATES, &ie_len,
+ pkt_len - WLAN_HDR_A3_LEN - ie_offset);
+- if (p != NULL) {
++ if (p) {
+
+ if (supportRateNum <= sizeof(supportRate)) {
+ memcpy(supportRate+supportRateNum, p+2, ie_len);
+@@ -1294,7 +1294,7 @@ unsigned int OnAssocReq(struct adapter *padapter, union recv_frame *precv_frame)
+ /* get a unique AID */
+ if (pstat->aid == 0) {
+ for (pstat->aid = 1; pstat->aid <= NUM_STA; pstat->aid++)
+- if (pstapriv->sta_aid[pstat->aid - 1] == NULL)
++ if (!pstapriv->sta_aid[pstat->aid - 1])
+ break;
+
+ /* if (pstat->aid > NUM_STA) { */
+@@ -1489,9 +1489,7 @@ unsigned int OnDeAuth(struct adapter *padapter, union recv_frame *precv_frame)
+ struct sta_info *psta;
+ struct sta_priv *pstapriv = &padapter->stapriv;
+
+- /* spin_lock_bh(&(pstapriv->sta_hash_lock)); */
+ /* rtw_free_stainfo(padapter, psta); */
+- /* spin_unlock_bh(&(pstapriv->sta_hash_lock)); */
+
+ netdev_dbg(padapter->pnetdev,
+ "ap recv deauth reason code(%d) sta:%pM\n", reason,
+@@ -1565,9 +1563,7 @@ unsigned int OnDisassoc(struct adapter *padapter, union recv_frame *precv_frame)
+ struct sta_info *psta;
+ struct sta_priv *pstapriv = &padapter->stapriv;
+
+- /* spin_lock_bh(&(pstapriv->sta_hash_lock)); */
+ /* rtw_free_stainfo(padapter, psta); */
+- /* spin_unlock_bh(&(pstapriv->sta_hash_lock)); */
+
+ netdev_dbg(padapter->pnetdev,
+ "ap recv disassoc reason code(%d) sta:%pM\n",
+@@ -1944,7 +1940,7 @@ static struct xmit_frame *_alloc_mgtxmitframe(struct xmit_priv *pxmitpriv, bool
+ goto exit;
+
+ pxmitbuf = rtw_alloc_xmitbuf_ext(pxmitpriv);
+- if (pxmitbuf == NULL) {
++ if (!pxmitbuf) {
+ rtw_free_xmitframe(pxmitpriv, pmgntframe);
+ pmgntframe = NULL;
+ goto exit;
+@@ -2297,7 +2293,7 @@ void issue_probersp(struct adapter *padapter, unsigned char *da, u8 is_valid_p2p
+ struct wlan_bssid_ex *cur_network = &(pmlmeinfo->network);
+ unsigned int rate_len;
+
+- if (da == NULL)
++ if (!da)
+ return;
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+@@ -2621,7 +2617,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta, unsigned short
+ __le16 le_tmp;
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+- if (pmgntframe == NULL)
++ if (!pmgntframe)
+ return;
+
+ /* update attribute */
+@@ -2752,7 +2748,7 @@ void issue_asocrsp(struct adapter *padapter, unsigned short status, struct sta_i
+ __le16 lestatus, le_tmp;
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+- if (pmgntframe == NULL)
++ if (!pmgntframe)
+ return;
+
+ /* update attribute */
+@@ -2840,7 +2836,7 @@ void issue_asocrsp(struct adapter *padapter, unsigned short status, struct sta_i
+ break;
+ }
+
+- if ((pbuf == NULL) || (ie_len == 0)) {
++ if (!pbuf || ie_len == 0) {
+ break;
+ }
+ }
+@@ -2884,7 +2880,7 @@ void issue_assocreq(struct adapter *padapter)
+ u8 vs_ie_length = 0;
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+- if (pmgntframe == NULL)
++ if (!pmgntframe)
+ goto exit;
+
+ /* update attribute */
+@@ -3061,7 +3057,7 @@ static int _issue_nulldata(struct adapter *padapter, unsigned char *da,
+ pmlmeinfo = &(pmlmeext->mlmext_info);
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+- if (pmgntframe == NULL)
++ if (!pmgntframe)
+ goto exit;
+
+ /* update attribute */
+@@ -3200,7 +3196,7 @@ static int _issue_qos_nulldata(struct adapter *padapter, unsigned char *da,
+ struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info);
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+- if (pmgntframe == NULL)
++ if (!pmgntframe)
+ goto exit;
+
+ /* update attribute */
+@@ -3313,7 +3309,7 @@ static int _issue_deauth(struct adapter *padapter, unsigned char *da,
+ __le16 le_tmp;
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+- if (pmgntframe == NULL) {
++ if (!pmgntframe) {
+ goto exit;
+ }
+
+@@ -3639,7 +3635,7 @@ static void issue_action_BSSCoexistPacket(struct adapter *padapter)
+ action = ACT_PUBLIC_BSSCOEXIST;
+
+ pmgntframe = alloc_mgtxmitframe(pxmitpriv);
+- if (pmgntframe == NULL) {
++ if (!pmgntframe) {
+ return;
+ }
+
+@@ -3706,7 +3702,7 @@ static void issue_action_BSSCoexistPacket(struct adapter *padapter)
+ pbss_network = (struct wlan_bssid_ex *)&pnetwork->network;
+
+ p = rtw_get_ie(pbss_network->ies + _FIXED_IE_LENGTH_, WLAN_EID_HT_CAPABILITY, &len, pbss_network->ie_length - _FIXED_IE_LENGTH_);
+- if ((p == NULL) || (len == 0)) {/* non-HT */
++ if (!p || len == 0) {/* non-HT */
+
+ if (pbss_network->configuration.ds_config <= 0)
+ continue;
+@@ -3769,7 +3765,7 @@ unsigned int send_delba(struct adapter *padapter, u8 initiator, u8 *addr)
+ return _SUCCESS;
+
+ psta = rtw_get_stainfo(pstapriv, addr);
+- if (psta == NULL)
++ if (!psta)
+ return _SUCCESS;
+
+ if (initiator == 0) {/* recipient */
+@@ -4641,13 +4637,13 @@ void report_del_sta_event(struct adapter *padapter, unsigned char *MacAddr, unsi
+ struct cmd_priv *pcmdpriv = &padapter->cmdpriv;
+
+ pcmd_obj = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (pcmd_obj == NULL) {
++ if (!pcmd_obj) {
+ return;
+ }
+
+ cmdsz = (sizeof(struct stadel_event) + sizeof(struct C2HEvent_Header));
+ pevtcmd = rtw_zmalloc(cmdsz);
+- if (pevtcmd == NULL) {
++ if (!pevtcmd) {
+ kfree(pcmd_obj);
+ return;
+ }
+@@ -4693,12 +4689,12 @@ void report_add_sta_event(struct adapter *padapter, unsigned char *MacAddr, int
+ struct cmd_priv *pcmdpriv = &padapter->cmdpriv;
+
+ pcmd_obj = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (pcmd_obj == NULL)
++ if (!pcmd_obj)
+ return;
+
+ cmdsz = (sizeof(struct stassoc_event) + sizeof(struct C2HEvent_Header));
+ pevtcmd = rtw_zmalloc(cmdsz);
+- if (pevtcmd == NULL) {
++ if (!pevtcmd) {
+ kfree(pcmd_obj);
+ return;
+ }
+@@ -5147,12 +5143,12 @@ void survey_timer_hdl(struct timer_list *t)
+ }
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ goto exit_survey_timer_hdl;
+ }
+
+ psurveyPara = rtw_zmalloc(sizeof(struct sitesurvey_parm));
+- if (psurveyPara == NULL) {
++ if (!psurveyPara) {
+ kfree(ph2c);
+ goto exit_survey_timer_hdl;
+ }
+@@ -5781,7 +5777,7 @@ u8 chk_bmc_sleepq_cmd(struct adapter *padapter)
+ u8 res = _SUCCESS;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+@@ -5805,13 +5801,13 @@ u8 set_tx_beacon_cmd(struct adapter *padapter)
+ int len_diff = 0;
+
+ ph2c = rtw_zmalloc(sizeof(struct cmd_obj));
+- if (ph2c == NULL) {
++ if (!ph2c) {
+ res = _FAIL;
+ goto exit;
+ }
+
+ ptxBeacon_parm = rtw_zmalloc(sizeof(struct Tx_Beacon_param));
+- if (ptxBeacon_parm == NULL) {
++ if (!ptxBeacon_parm) {
+ kfree(ph2c);
+ res = _FAIL;
+ goto exit;
+@@ -5871,7 +5867,7 @@ u8 mlme_evt_hdl(struct adapter *padapter, unsigned char *pbuf)
+ void (*event_callback)(struct adapter *dev, u8 *pbuf);
+ struct evt_priv *pevt_priv = &(padapter->evtpriv);
+
+- if (pbuf == NULL)
++ if (!pbuf)
+ goto _abort_event_;
+
+ peventbuf = (uint *)pbuf;
+diff --git a/drivers/staging/rtl8723bs/core/rtw_security.c b/drivers/staging/rtl8723bs/core/rtw_security.c
+index b050bf62e3b94..ac731415f7332 100644
+--- a/drivers/staging/rtl8723bs/core/rtw_security.c
++++ b/drivers/staging/rtl8723bs/core/rtw_security.c
+@@ -51,7 +51,7 @@ void rtw_wep_encrypt(struct adapter *padapter, u8 *pxmitframe)
+ struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
+ struct arc4_ctx *ctx = &psecuritypriv->xmit_arc4_ctx;
+
+- if (((struct xmit_frame *)pxmitframe)->buf_addr == NULL)
++ if (!((struct xmit_frame *)pxmitframe)->buf_addr)
+ return;
+
+ hw_hdr_offset = TXDESC_OFFSET;
+@@ -476,7 +476,7 @@ u32 rtw_tkip_encrypt(struct adapter *padapter, u8 *pxmitframe)
+ struct arc4_ctx *ctx = &psecuritypriv->xmit_arc4_ctx;
+ u32 res = _SUCCESS;
+
+- if (((struct xmit_frame *)pxmitframe)->buf_addr == NULL)
++ if (!((struct xmit_frame *)pxmitframe)->buf_addr)
+ return _FAIL;
+
+ hw_hdr_offset = TXDESC_OFFSET;
+@@ -1043,7 +1043,7 @@ u32 rtw_aes_encrypt(struct adapter *padapter, u8 *pxmitframe)
+
+ u32 res = _SUCCESS;
+
+- if (((struct xmit_frame *)pxmitframe)->buf_addr == NULL)
++ if (!((struct xmit_frame *)pxmitframe)->buf_addr)
+ return _FAIL;
+
+ hw_hdr_offset = TXDESC_OFFSET;
+diff --git a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
+index 67ca219f95bf8..5eae3ccb1ff59 100644
+--- a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
++++ b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
+@@ -263,7 +263,6 @@ exit:
+ return psta;
+ }
+
+-/* using pstapriv->sta_hash_lock to protect */
+ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
+ {
+ int i;
+@@ -332,8 +331,10 @@ u32 rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
+
+ spin_unlock_bh(&pxmitpriv->lock);
+
++ spin_lock_bh(&pstapriv->sta_hash_lock);
+ list_del_init(&psta->hash_list);
+ pstapriv->asoc_sta_count--;
++ spin_unlock_bh(&pstapriv->sta_hash_lock);
+
+ /* re-init sta_info; 20061114 will be init in alloc_stainfo */
+ /* _rtw_init_sta_xmit_priv(&psta->sta_xmitpriv); */
+@@ -428,6 +429,7 @@ void rtw_free_all_stainfo(struct adapter *padapter)
+ struct sta_info *psta = NULL;
+ struct sta_priv *pstapriv = &padapter->stapriv;
+ struct sta_info *pbcmc_stainfo = rtw_get_bcmc_stainfo(padapter);
++ LIST_HEAD(stainfo_free_list);
+
+ if (pstapriv->asoc_sta_count == 1)
+ return;
+@@ -440,11 +442,16 @@ void rtw_free_all_stainfo(struct adapter *padapter)
+ psta = list_entry(plist, struct sta_info, hash_list);
+
+ if (pbcmc_stainfo != psta)
+- rtw_free_stainfo(padapter, psta);
++ list_move(&psta->hash_list, &stainfo_free_list);
+ }
+ }
+
+ spin_unlock_bh(&pstapriv->sta_hash_lock);
++
++ list_for_each_safe(plist, tmp, &stainfo_free_list) {
++ psta = list_entry(plist, struct sta_info, hash_list);
++ rtw_free_stainfo(padapter, psta);
++ }
+ }
+
+ /* any station allocated can be searched by hash list */
+diff --git a/drivers/staging/rtl8723bs/include/rtw_security.h b/drivers/staging/rtl8723bs/include/rtw_security.h
+index a68b738584623..7587fa8885274 100644
+--- a/drivers/staging/rtl8723bs/include/rtw_security.h
++++ b/drivers/staging/rtl8723bs/include/rtw_security.h
+@@ -107,13 +107,13 @@ struct security_priv {
+
+ u32 dot118021XGrpPrivacy; /* This specify the privacy algthm. used for Grp key */
+ u32 dot118021XGrpKeyid; /* key id used for Grp Key (tx key index) */
+- union Keytype dot118021XGrpKey[BIP_MAX_KEYID]; /* 802.1x Group Key, for inx0 and inx1 */
+- union Keytype dot118021XGrptxmickey[BIP_MAX_KEYID];
+- union Keytype dot118021XGrprxmickey[BIP_MAX_KEYID];
++ union Keytype dot118021XGrpKey[BIP_MAX_KEYID + 1]; /* 802.1x Group Key, for inx0 and inx1 */
++ union Keytype dot118021XGrptxmickey[BIP_MAX_KEYID + 1];
++ union Keytype dot118021XGrprxmickey[BIP_MAX_KEYID + 1];
+ union pn48 dot11Grptxpn; /* PN48 used for Grp Key xmit. */
+ union pn48 dot11Grprxpn; /* PN48 used for Grp Key recv. */
+ u32 dot11wBIPKeyid; /* key id used for BIP Key (tx key index) */
+- union Keytype dot11wBIPKey[6]; /* BIP Key, for index4 and index5 */
++ union Keytype dot11wBIPKey[BIP_MAX_KEYID + 1]; /* BIP Key, for index4 and index5 */
+ union pn48 dot11wBIPtxpn; /* PN48 used for Grp Key xmit. */
+ union pn48 dot11wBIPrxpn; /* PN48 used for Grp Key recv. */
+
+diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+index 499ac3a775128..b33424a9e83b7 100644
+--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+@@ -113,13 +113,10 @@ static struct ieee80211_supported_band *rtw_spt_band_alloc(
+ struct ieee80211_supported_band *spt_band = NULL;
+ int n_channels, n_bitrates;
+
+- if (band == NL80211_BAND_2GHZ)
+- {
++ if (band == NL80211_BAND_2GHZ) {
+ n_channels = RTW_2G_CHANNELS_NUM;
+ n_bitrates = RTW_G_RATES_NUM;
+- }
+- else
+- {
++ } else {
+ goto exit;
+ }
+
+@@ -135,8 +132,7 @@ static struct ieee80211_supported_band *rtw_spt_band_alloc(
+ spt_band->n_channels = n_channels;
+ spt_band->n_bitrates = n_bitrates;
+
+- if (band == NL80211_BAND_2GHZ)
+- {
++ if (band == NL80211_BAND_2GHZ) {
+ rtw_2g_channels_init(spt_band->channels);
+ rtw_2g_rates_init(spt_band->bitrates);
+ }
+@@ -235,8 +231,7 @@ struct cfg80211_bss *rtw_cfg80211_inform_bss(struct adapter *padapter, struct wl
+ {
+ u16 wapi_len = 0;
+
+- if (rtw_get_wapi_ie(pnetwork->network.ies, pnetwork->network.ie_length, NULL, &wapi_len) > 0)
+- {
++ if (rtw_get_wapi_ie(pnetwork->network.ies, pnetwork->network.ie_length, NULL, &wapi_len) > 0) {
+ if (wapi_len > 0)
+ goto exit;
+ }
+@@ -244,8 +239,7 @@ struct cfg80211_bss *rtw_cfg80211_inform_bss(struct adapter *padapter, struct wl
+
+ /* To reduce PBC Overlap rate */
+ /* spin_lock_bh(&pwdev_priv->scan_req_lock); */
+- if (adapter_wdev_data(padapter)->scan_request)
+- {
++ if (adapter_wdev_data(padapter)->scan_request) {
+ u8 *psr = NULL, sr = 0;
+ struct ndis_802_11_ssid *pssid = &pnetwork->network.ssid;
+ struct cfg80211_scan_request *request = adapter_wdev_data(padapter)->scan_request;
+@@ -258,14 +252,12 @@ struct cfg80211_bss *rtw_cfg80211_inform_bss(struct adapter *padapter, struct wl
+ if (wpsie && wpsielen > 0)
+ psr = rtw_get_wps_attr_content(wpsie, wpsielen, WPS_ATTR_SELECTED_REGISTRAR, (u8 *)(&sr), NULL);
+
+- if (sr != 0)
+- {
+- if (request->n_ssids == 1 && request->n_channels == 1) /* it means under processing WPS */
+- {
++ if (sr != 0) {
++ /* it means under processing WPS */
++ if (request->n_ssids == 1 && request->n_channels == 1) {
+ if (ssids[0].ssid_len != 0 &&
+ (pssid->ssid_length != ssids[0].ssid_len ||
+- memcmp(pssid->ssid, ssids[0].ssid, ssids[0].ssid_len)))
+- {
++ memcmp(pssid->ssid, ssids[0].ssid, ssids[0].ssid_len))) {
+ if (psr)
+ *psr = 0; /* clear sr */
+ }
+@@ -358,7 +350,7 @@ int rtw_cfg80211_check_bss(struct adapter *padapter)
+ bss = cfg80211_get_bss(padapter->rtw_wdev->wiphy, notify_channel,
+ pnetwork->mac_address, pnetwork->ssid.ssid,
+ pnetwork->ssid.ssid_length,
+- WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
++ IEEE80211_BSS_TYPE_ANY, IEEE80211_PRIVACY_ANY);
+
+ cfg80211_put_bss(padapter->rtw_wdev->wiphy, bss);
+
+@@ -375,23 +367,18 @@ void rtw_cfg80211_ibss_indicate_connect(struct adapter *padapter)
+ struct ieee80211_channel *chan;
+
+ if (pwdev->iftype != NL80211_IFTYPE_ADHOC)
+- {
+ return;
+- }
+
+ if (!rtw_cfg80211_check_bss(padapter)) {
+ struct wlan_bssid_ex *pnetwork = &(padapter->mlmeextpriv.mlmext_info.network);
+ struct wlan_network *scanned = pmlmepriv->cur_network_scanned;
+
+- if (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE) == true)
+- {
++ if (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE) == true) {
+
+ memcpy(&cur_network->network, pnetwork, sizeof(struct wlan_bssid_ex));
+ rtw_cfg80211_inform_bss(padapter, cur_network);
+- }
+- else
+- {
+- if (scanned == NULL) {
++ } else {
++ if (!scanned) {
+ rtw_warn_on(1);
+ return;
+ }
+@@ -432,7 +419,7 @@ void rtw_cfg80211_indicate_connect(struct adapter *padapter)
+ struct wlan_bssid_ex *pnetwork = &(padapter->mlmeextpriv.mlmext_info.network);
+ struct wlan_network *scanned = pmlmepriv->cur_network_scanned;
+
+- if (scanned == NULL) {
++ if (!scanned) {
+ rtw_warn_on(1);
+ goto check_bss;
+ }
+@@ -473,9 +460,7 @@ check_bss:
+ roam_info.resp_ie_len =
+ pmlmepriv->assoc_rsp_len-sizeof(struct ieee80211_hdr_3addr)-6;
+ cfg80211_roamed(padapter->pnetdev, &roam_info, GFP_ATOMIC);
+- }
+- else
+- {
++ } else {
+ cfg80211_connect_result(padapter->pnetdev, cur_network->network.mac_address
+ , pmlmepriv->assoc_req+sizeof(struct ieee80211_hdr_3addr)+2
+ , pmlmepriv->assoc_req_len-sizeof(struct ieee80211_hdr_3addr)-2
+@@ -527,51 +512,41 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+ param->u.crypt.err = 0;
+ param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0';
+
+- if (param_len != sizeof(struct ieee_param) + param->u.crypt.key_len)
+- {
++ if (param_len != sizeof(struct ieee_param) + param->u.crypt.key_len) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (param->sta_addr[0] == 0xff && param->sta_addr[1] == 0xff &&
+ param->sta_addr[2] == 0xff && param->sta_addr[3] == 0xff &&
+- param->sta_addr[4] == 0xff && param->sta_addr[5] == 0xff)
+- {
+- if (param->u.crypt.idx >= WEP_KEYS)
+- {
++ param->sta_addr[4] == 0xff && param->sta_addr[5] == 0xff) {
++ if (param->u.crypt.idx >= WEP_KEYS) {
+ ret = -EINVAL;
+ goto exit;
+ }
+- }
+- else
+- {
++ } else {
+ psta = rtw_get_stainfo(pstapriv, param->sta_addr);
+ if (!psta)
+ /* ret = -EINVAL; */
+ goto exit;
+ }
+
+- if (strcmp(param->u.crypt.alg, "none") == 0 && (psta == NULL))
++ if (strcmp(param->u.crypt.alg, "none") == 0 && !psta)
+ goto exit;
+
+- if (strcmp(param->u.crypt.alg, "WEP") == 0 && (psta == NULL))
+- {
++ if (strcmp(param->u.crypt.alg, "WEP") == 0 && !psta) {
+ wep_key_idx = param->u.crypt.idx;
+ wep_key_len = param->u.crypt.key_len;
+
+- if ((wep_key_idx >= WEP_KEYS) || (wep_key_len <= 0))
+- {
++ if ((wep_key_idx >= WEP_KEYS) || (wep_key_len <= 0)) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (wep_key_len > 0)
+- {
+ wep_key_len = wep_key_len <= 5 ? 5 : 13;
+- }
+
+- if (psecuritypriv->bWepDefaultKeyIdxSet == 0)
+- {
++ if (psecuritypriv->bWepDefaultKeyIdxSet == 0) {
+ /* wep default key has not been set, so use this key index as default key. */
+
+ psecuritypriv->dot11AuthAlgrthm = dot11AuthAlgrthm_Auto;
+@@ -579,8 +554,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+ psecuritypriv->dot11PrivacyAlgrthm = _WEP40_;
+ psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
+
+- if (wep_key_len == 13)
+- {
++ if (wep_key_len == 13) {
+ psecuritypriv->dot11PrivacyAlgrthm = _WEP104_;
+ psecuritypriv->dot118021XGrpPrivacy = _WEP104_;
+ }
+@@ -598,24 +572,18 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+
+ }
+
+-
+- if (!psta && check_fwstate(pmlmepriv, WIFI_AP_STATE)) /* group key */
+- {
+- if (param->u.crypt.set_tx == 0) /* group key */
+- {
+- if (strcmp(param->u.crypt.alg, "WEP") == 0)
+- {
++ /* group key */
++ if (!psta && check_fwstate(pmlmepriv, WIFI_AP_STATE)) {
++ /* group key */
++ if (param->u.crypt.set_tx == 0) {
++ if (strcmp(param->u.crypt.alg, "WEP") == 0) {
+ memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+
+ psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
+ if (param->u.crypt.key_len == 13)
+- {
+ psecuritypriv->dot118021XGrpPrivacy = _WEP104_;
+- }
+
+- }
+- else if (strcmp(param->u.crypt.alg, "TKIP") == 0)
+- {
++ } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) {
+ psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
+
+ memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+@@ -627,15 +595,11 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+
+ psecuritypriv->busetkipkey = true;
+
+- }
+- else if (strcmp(param->u.crypt.alg, "CCMP") == 0)
+- {
++ } else if (strcmp(param->u.crypt.alg, "CCMP") == 0) {
+ psecuritypriv->dot118021XGrpPrivacy = _AES_;
+
+ memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+- }
+- else
+- {
++ } else {
+ psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_;
+ }
+
+@@ -648,8 +612,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+ rtw_ap_set_group_key(padapter, param->u.crypt.key, psecuritypriv->dot118021XGrpPrivacy, param->u.crypt.idx);
+
+ pbcmc_sta = rtw_get_bcmc_stainfo(padapter);
+- if (pbcmc_sta)
+- {
++ if (pbcmc_sta) {
+ pbcmc_sta->ieee8021x_blocked = false;
+ pbcmc_sta->dot118021XPrivacy = psecuritypriv->dot118021XGrpPrivacy;/* rx will use bmc_sta's dot118021XPrivacy */
+ }
+@@ -660,24 +623,16 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+
+ }
+
+- if (psecuritypriv->dot11AuthAlgrthm == dot11AuthAlgrthm_8021X && psta) /* psk/802_1x */
+- {
+- if (check_fwstate(pmlmepriv, WIFI_AP_STATE))
+- {
+- if (param->u.crypt.set_tx == 1) /* pairwise key */
+- {
++ if (psecuritypriv->dot11AuthAlgrthm == dot11AuthAlgrthm_8021X && psta) { /* psk/802_1x */
++ if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) {
++ if (param->u.crypt.set_tx == 1) { /* pairwise key */
+ memcpy(psta->dot118021x_UncstKey.skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+
+- if (strcmp(param->u.crypt.alg, "WEP") == 0)
+- {
++ if (strcmp(param->u.crypt.alg, "WEP") == 0) {
+ psta->dot118021XPrivacy = _WEP40_;
+ if (param->u.crypt.key_len == 13)
+- {
+ psta->dot118021XPrivacy = _WEP104_;
+- }
+- }
+- else if (strcmp(param->u.crypt.alg, "TKIP") == 0)
+- {
++ } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) {
+ psta->dot118021XPrivacy = _TKIP_;
+
+ /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
+@@ -687,14 +642,10 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+
+ psecuritypriv->busetkipkey = true;
+
+- }
+- else if (strcmp(param->u.crypt.alg, "CCMP") == 0)
+- {
++ } else if (strcmp(param->u.crypt.alg, "CCMP") == 0) {
+
+ psta->dot118021XPrivacy = _AES_;
+- }
+- else
+- {
++ } else {
+ psta->dot118021XPrivacy = _NO_PRIVACY_;
+ }
+
+@@ -704,21 +655,14 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+
+ psta->bpairwise_key_installed = true;
+
+- }
+- else/* group key??? */
+- {
+- if (strcmp(param->u.crypt.alg, "WEP") == 0)
+- {
++ } else { /* group key??? */
++ if (strcmp(param->u.crypt.alg, "WEP") == 0) {
+ memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+
+ psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
+ if (param->u.crypt.key_len == 13)
+- {
+ psecuritypriv->dot118021XGrpPrivacy = _WEP104_;
+- }
+- }
+- else if (strcmp(param->u.crypt.alg, "TKIP") == 0)
+- {
++ } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) {
+ psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
+
+ memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+@@ -730,15 +674,11 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+
+ psecuritypriv->busetkipkey = true;
+
+- }
+- else if (strcmp(param->u.crypt.alg, "CCMP") == 0)
+- {
++ } else if (strcmp(param->u.crypt.alg, "CCMP") == 0) {
+ psecuritypriv->dot118021XGrpPrivacy = _AES_;
+
+ memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+- }
+- else
+- {
++ } else {
+ psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_;
+ }
+
+@@ -751,8 +691,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
+ rtw_ap_set_group_key(padapter, param->u.crypt.key, psecuritypriv->dot118021XGrpPrivacy, param->u.crypt.idx);
+
+ pbcmc_sta = rtw_get_bcmc_stainfo(padapter);
+- if (pbcmc_sta)
+- {
++ if (pbcmc_sta) {
+ pbcmc_sta->ieee8021x_blocked = false;
+ pbcmc_sta->dot118021XPrivacy = psecuritypriv->dot118021XGrpPrivacy;/* rx will use bmc_sta's dot118021XPrivacy */
+ }
+@@ -772,6 +711,7 @@ exit:
+ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param *param, u32 param_len)
+ {
+ int ret = 0;
++ u8 max_idx;
+ u32 wep_key_idx, wep_key_len;
+ struct adapter *padapter = rtw_netdev_priv(dev);
+ struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
+@@ -780,43 +720,39 @@ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param
+ param->u.crypt.err = 0;
+ param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0';
+
+- if (param_len < (u32) ((u8 *) param->u.crypt.key - (u8 *) param) + param->u.crypt.key_len)
+- {
++ if (param_len < (u32) ((u8 *) param->u.crypt.key - (u8 *) param) + param->u.crypt.key_len) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+- if (param->sta_addr[0] == 0xff && param->sta_addr[1] == 0xff &&
+- param->sta_addr[2] == 0xff && param->sta_addr[3] == 0xff &&
+- param->sta_addr[4] == 0xff && param->sta_addr[5] == 0xff)
+- {
+- if (param->u.crypt.idx >= WEP_KEYS
+- || param->u.crypt.idx >= BIP_MAX_KEYID
+- )
+- {
+- ret = -EINVAL;
+- goto exit;
+- }
+- } else {
+- {
++ if (param->sta_addr[0] != 0xff || param->sta_addr[1] != 0xff ||
++ param->sta_addr[2] != 0xff || param->sta_addr[3] != 0xff ||
++ param->sta_addr[4] != 0xff || param->sta_addr[5] != 0xff) {
+ ret = -EINVAL;
+ goto exit;
+ }
+- }
+
+ if (strcmp(param->u.crypt.alg, "WEP") == 0)
+- {
++ max_idx = WEP_KEYS - 1;
++ else
++ max_idx = BIP_MAX_KEYID;
++
++ if (param->u.crypt.idx > max_idx) {
++ netdev_err(dev, "Error crypt.idx %d > %d\n", param->u.crypt.idx, max_idx);
++ ret = -EINVAL;
++ goto exit;
++ }
++
++ if (strcmp(param->u.crypt.alg, "WEP") == 0) {
+ wep_key_idx = param->u.crypt.idx;
+ wep_key_len = param->u.crypt.key_len;
+
+- if ((wep_key_idx >= WEP_KEYS) || (wep_key_len <= 0))
+- {
++ if (wep_key_len <= 0) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+- if (psecuritypriv->bWepDefaultKeyIdxSet == 0)
+- {
++ if (psecuritypriv->bWepDefaultKeyIdxSet == 0) {
+ /* wep default key has not been set, so use this key index as default key. */
+
+ wep_key_len = wep_key_len <= 5 ? 5 : 13;
+@@ -825,8 +761,7 @@ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param
+ psecuritypriv->dot11PrivacyAlgrthm = _WEP40_;
+ psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
+
+- if (wep_key_len == 13)
+- {
++ if (wep_key_len == 13) {
+ psecuritypriv->dot11PrivacyAlgrthm = _WEP104_;
+ psecuritypriv->dot118021XGrpPrivacy = _WEP104_;
+ }
+@@ -843,13 +778,11 @@ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param
+ goto exit;
+ }
+
+- if (padapter->securitypriv.dot11AuthAlgrthm == dot11AuthAlgrthm_8021X) /* 802_1x */
+- {
++ if (padapter->securitypriv.dot11AuthAlgrthm == dot11AuthAlgrthm_8021X) { /* 802_1x */
+ struct sta_info *psta, *pbcmc_sta;
+ struct sta_priv *pstapriv = &padapter->stapriv;
+
+- if (check_fwstate(pmlmepriv, WIFI_STATION_STATE | WIFI_MP_STATE) == true) /* sta mode */
+- {
++ if (check_fwstate(pmlmepriv, WIFI_STATION_STATE | WIFI_MP_STATE) == true) { /* sta mode */
+ psta = rtw_get_stainfo(pstapriv, get_bssid(pmlmepriv));
+ if (psta) {
+ /* Jeff: don't disable ieee8021x_blocked while clearing key */
+@@ -858,18 +791,15 @@ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param
+
+
+ if ((padapter->securitypriv.ndisencryptstatus == Ndis802_11Encryption2Enabled) ||
+- (padapter->securitypriv.ndisencryptstatus == Ndis802_11Encryption3Enabled))
+- {
++ (padapter->securitypriv.ndisencryptstatus == Ndis802_11Encryption3Enabled)) {
+ psta->dot118021XPrivacy = padapter->securitypriv.dot11PrivacyAlgrthm;
+ }
+
+- if (param->u.crypt.set_tx == 1)/* pairwise key */
+- {
++ if (param->u.crypt.set_tx == 1) { /* pairwise key */
+
+ memcpy(psta->dot118021x_UncstKey.skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+
+- if (strcmp(param->u.crypt.alg, "TKIP") == 0)/* set mic key */
+- {
++ if (strcmp(param->u.crypt.alg, "TKIP") == 0) { /* set mic key */
+ /* DEBUG_ERR(("\nset key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len)); */
+ memcpy(psta->dot11tkiptxmickey.skey, &(param->u.crypt.key[16]), 8);
+ memcpy(psta->dot11tkiprxmickey.skey, &(param->u.crypt.key[24]), 8);
+@@ -879,11 +809,8 @@ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param
+ }
+
+ rtw_setstakey_cmd(padapter, psta, true, true);
+- }
+- else/* group key */
+- {
+- if (strcmp(param->u.crypt.alg, "TKIP") == 0 || strcmp(param->u.crypt.alg, "CCMP") == 0)
+- {
++ } else { /* group key */
++ if (strcmp(param->u.crypt.alg, "TKIP") == 0 || strcmp(param->u.crypt.alg, "CCMP") == 0) {
+ memcpy(padapter->securitypriv.dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+ memcpy(padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
+ memcpy(padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
+@@ -891,9 +818,7 @@ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param
+
+ padapter->securitypriv.dot118021XGrpKeyid = param->u.crypt.idx;
+ rtw_set_key(padapter, &padapter->securitypriv, param->u.crypt.idx, 1, true);
+- }
+- else if (strcmp(param->u.crypt.alg, "BIP") == 0)
+- {
++ } else if (strcmp(param->u.crypt.alg, "BIP") == 0) {
+ /* save the IGTK key, length 16 bytes */
+ memcpy(padapter->securitypriv.dot11wBIPKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+ /*
+@@ -907,25 +832,19 @@ static int rtw_cfg80211_set_encryption(struct net_device *dev, struct ieee_param
+ }
+
+ pbcmc_sta = rtw_get_bcmc_stainfo(padapter);
+- if (pbcmc_sta == NULL)
+- {
++ if (!pbcmc_sta) {
+ /* DEBUG_ERR(("Set OID_802_11_ADD_KEY: bcmc stainfo is null\n")); */
+- }
+- else
+- {
++ } else {
+ /* Jeff: don't disable ieee8021x_blocked while clearing key */
+ if (strcmp(param->u.crypt.alg, "none") != 0)
+ pbcmc_sta->ieee8021x_blocked = false;
+
+ if ((padapter->securitypriv.ndisencryptstatus == Ndis802_11Encryption2Enabled) ||
+- (padapter->securitypriv.ndisencryptstatus == Ndis802_11Encryption3Enabled))
+- {
++ (padapter->securitypriv.ndisencryptstatus == Ndis802_11Encryption3Enabled)) {
+ pbcmc_sta->dot118021XPrivacy = padapter->securitypriv.dot11PrivacyAlgrthm;
+ }
+ }
+- }
+- else if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE)) /* adhoc mode */
+- {
++ } else if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE)) { /* adhoc mode */
+ }
+ }
+
+@@ -947,7 +866,7 @@ static int cfg80211_rtw_add_key(struct wiphy *wiphy, struct net_device *ndev,
+
+ param_len = sizeof(struct ieee_param) + params->key_len;
+ param = rtw_malloc(param_len);
+- if (param == NULL)
++ if (!param)
+ return -1;
+
+ memset(param, 0, param_len);
+@@ -983,39 +902,29 @@ static int cfg80211_rtw_add_key(struct wiphy *wiphy, struct net_device *ndev,
+
+
+ if (!mac_addr || is_broadcast_ether_addr(mac_addr))
+- {
+ param->u.crypt.set_tx = 0; /* for wpa/wpa2 group key */
+- } else {
++ else
+ param->u.crypt.set_tx = 1; /* for wpa/wpa2 pairwise key */
+- }
+
+ param->u.crypt.idx = key_index;
+
+ if (params->seq_len && params->seq)
+- {
+ memcpy(param->u.crypt.seq, (u8 *)params->seq, params->seq_len);
+- }
+
+- if (params->key_len && params->key)
+- {
++ if (params->key_len && params->key) {
+ param->u.crypt.key_len = params->key_len;
+ memcpy(param->u.crypt.key, (u8 *)params->key, params->key_len);
+ }
+
+- if (check_fwstate(pmlmepriv, WIFI_STATION_STATE) == true)
+- {
++ if (check_fwstate(pmlmepriv, WIFI_STATION_STATE) == true) {
+ ret = rtw_cfg80211_set_encryption(ndev, param, param_len);
+- }
+- else if (check_fwstate(pmlmepriv, WIFI_AP_STATE) == true)
+- {
++ } else if (check_fwstate(pmlmepriv, WIFI_AP_STATE) == true) {
+ if (mac_addr)
+ memcpy(param->sta_addr, (void *)mac_addr, ETH_ALEN);
+
+ ret = rtw_cfg80211_ap_set_encryption(ndev, param, param_len);
+- }
+- else if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) == true
+- || check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE) == true)
+- {
++ } else if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) == true
++ || check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE) == true) {
+ ret = rtw_cfg80211_set_encryption(ndev, param, param_len);
+ }
+
+@@ -1041,8 +950,7 @@ static int cfg80211_rtw_del_key(struct wiphy *wiphy, struct net_device *ndev,
+ struct adapter *padapter = rtw_netdev_priv(ndev);
+ struct security_priv *psecuritypriv = &padapter->securitypriv;
+
+- if (key_index == psecuritypriv->dot11PrivacyKeyIndex)
+- {
++ if (key_index == psecuritypriv->dot11PrivacyKeyIndex) {
+ /* clear the flag of wep default key set. */
+ psecuritypriv->bWepDefaultKeyIdxSet = 0;
+ }
+@@ -1058,16 +966,14 @@ static int cfg80211_rtw_set_default_key(struct wiphy *wiphy,
+ struct adapter *padapter = rtw_netdev_priv(ndev);
+ struct security_priv *psecuritypriv = &padapter->securitypriv;
+
+- if ((key_index < WEP_KEYS) && ((psecuritypriv->dot11PrivacyAlgrthm == _WEP40_) || (psecuritypriv->dot11PrivacyAlgrthm == _WEP104_))) /* set wep default key */
+- {
++ if ((key_index < WEP_KEYS) && ((psecuritypriv->dot11PrivacyAlgrthm == _WEP40_) || (psecuritypriv->dot11PrivacyAlgrthm == _WEP104_))) { /* set wep default key */
+ psecuritypriv->ndisencryptstatus = Ndis802_11Encryption1Enabled;
+
+ psecuritypriv->dot11PrivacyKeyIndex = key_index;
+
+ psecuritypriv->dot11PrivacyAlgrthm = _WEP40_;
+ psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
+- if (psecuritypriv->dot11DefKeylen[key_index] == 13)
+- {
++ if (psecuritypriv->dot11DefKeylen[key_index] == 13) {
+ psecuritypriv->dot11PrivacyAlgrthm = _WEP104_;
+ psecuritypriv->dot118021XGrpPrivacy = _WEP104_;
+ }
+@@ -1098,16 +1004,14 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
+ }
+
+ psta = rtw_get_stainfo(pstapriv, (u8 *)mac);
+- if (psta == NULL) {
++ if (!psta) {
+ ret = -ENOENT;
+ goto exit;
+ }
+
+ /* for infra./P2PClient mode */
+ if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)
+- && check_fwstate(pmlmepriv, _FW_LINKED)
+- )
+- {
++ && check_fwstate(pmlmepriv, _FW_LINKED)) {
+ struct wlan_network *cur_network = &(pmlmepriv->cur_network);
+
+ if (memcmp((u8 *)mac, cur_network->network.mac_address, ETH_ALEN)) {
+@@ -1133,9 +1037,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
+ if ((check_fwstate(pmlmepriv, WIFI_ADHOC_STATE)
+ || check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE)
+ || check_fwstate(pmlmepriv, WIFI_AP_STATE))
+- && check_fwstate(pmlmepriv, _FW_LINKED)
+- )
+- {
++ && check_fwstate(pmlmepriv, _FW_LINKED)) {
+ /* TODO: should acquire station info... */
+ }
+
+@@ -1155,8 +1057,7 @@ static int cfg80211_rtw_change_iface(struct wiphy *wiphy,
+ struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv);
+ int ret = 0;
+
+- if (adapter_to_dvobj(padapter)->processing_dev_remove == true)
+- {
++ if (adapter_to_dvobj(padapter)->processing_dev_remove == true) {
+ ret = -EPERM;
+ goto exit;
+ }
+@@ -1175,8 +1076,7 @@ static int cfg80211_rtw_change_iface(struct wiphy *wiphy,
+
+ old_type = rtw_wdev->iftype;
+
+- if (old_type != type)
+- {
++ if (old_type != type) {
+ pmlmeext->action_public_rxseq = 0xffff;
+ pmlmeext->action_public_dialog_token = 0xff;
+ }
+@@ -1198,8 +1098,7 @@ static int cfg80211_rtw_change_iface(struct wiphy *wiphy,
+
+ rtw_wdev->iftype = type;
+
+- if (rtw_set_802_11_infrastructure_mode(padapter, networkType) == false)
+- {
++ if (rtw_set_802_11_infrastructure_mode(padapter, networkType) == false) {
+ rtw_wdev->iftype = old_type;
+ ret = -EPERM;
+ goto exit;
+@@ -1239,8 +1138,8 @@ void rtw_cfg80211_unlink_bss(struct adapter *padapter, struct wlan_network *pnet
+
+ bss = cfg80211_get_bss(wiphy, NULL/*notify_channel*/,
+ select_network->mac_address, select_network->ssid.ssid,
+- select_network->ssid.ssid_length, 0/*WLAN_CAPABILITY_ESS*/,
+- 0/*WLAN_CAPABILITY_ESS*/);
++ select_network->ssid.ssid_length, IEEE80211_BSS_TYPE_ANY,
++ IEEE80211_PRIVACY_ANY);
+
+ if (bss) {
+ cfg80211_unlink_bss(wiphy, bss);
+@@ -1264,9 +1163,7 @@ void rtw_cfg80211_surveydone_event_callback(struct adapter *padapter)
+
+ /* report network only if the current channel set contains the channel to which this network belongs */
+ if (rtw_ch_set_search_ch(padapter->mlmeextpriv.channel_set, pnetwork->network.configuration.ds_config) >= 0
+- && true == rtw_validate_ssid(&(pnetwork->network.ssid))
+- )
+- {
++ && true == rtw_validate_ssid(&(pnetwork->network.ssid))) {
+ /* ev =translate_scan(padapter, a, pnetwork, ev, stop); */
+ rtw_cfg80211_inform_bss(padapter, pnetwork);
+ }
+@@ -1283,13 +1180,10 @@ static int rtw_cfg80211_set_probe_req_wpsp2pie(struct adapter *padapter, char *b
+ u8 *wps_ie;
+ struct mlme_priv *pmlmepriv = &(padapter->mlmepriv);
+
+- if (len > 0)
+- {
++ if (len > 0) {
+ wps_ie = rtw_get_wps_ie(buf, len, NULL, &wps_ielen);
+- if (wps_ie)
+- {
+- if (pmlmepriv->wps_probe_req_ie)
+- {
++ if (wps_ie) {
++ if (pmlmepriv->wps_probe_req_ie) {
+ pmlmepriv->wps_probe_req_ie_len = 0;
+ kfree(pmlmepriv->wps_probe_req_ie);
+ pmlmepriv->wps_probe_req_ie = NULL;
+@@ -1327,7 +1221,7 @@ static int cfg80211_rtw_scan(struct wiphy *wiphy
+ struct rtw_wdev_priv *pwdev_priv;
+ struct mlme_priv *pmlmepriv;
+
+- if (ndev == NULL) {
++ if (!ndev) {
+ ret = -EINVAL;
+ goto exit;
+ }
+@@ -1341,10 +1235,8 @@ static int cfg80211_rtw_scan(struct wiphy *wiphy
+ pwdev_priv->scan_request = request;
+ spin_unlock_bh(&pwdev_priv->scan_req_lock);
+
+- if (check_fwstate(pmlmepriv, WIFI_AP_STATE) == true)
+- {
+- if (check_fwstate(pmlmepriv, WIFI_UNDER_WPS|_FW_UNDER_SURVEY|_FW_UNDER_LINKING) == true)
+- {
++ if (check_fwstate(pmlmepriv, WIFI_AP_STATE) == true) {
++ if (check_fwstate(pmlmepriv, WIFI_UNDER_WPS|_FW_UNDER_SURVEY|_FW_UNDER_LINKING) == true) {
+ need_indicate_scan_done = true;
+ goto check_need_indicate_scan_done;
+ }
+@@ -1367,15 +1259,13 @@ static int cfg80211_rtw_scan(struct wiphy *wiphy
+ goto check_need_indicate_scan_done;
+ }
+
+- if (pmlmepriv->LinkDetectInfo.bBusyTraffic == true)
+- {
++ if (pmlmepriv->LinkDetectInfo.bBusyTraffic == true) {
+ static unsigned long lastscantime = 0;
+ unsigned long passtime;
+
+ passtime = jiffies_to_msecs(jiffies - lastscantime);
+ lastscantime = jiffies;
+- if (passtime > 12000)
+- {
++ if (passtime > 12000) {
+ need_indicate_scan_done = true;
+ goto check_need_indicate_scan_done;
+ }
+@@ -1414,9 +1304,7 @@ static int cfg80211_rtw_scan(struct wiphy *wiphy
+ } else if (request->n_channels <= 4) {
+ for (j = request->n_channels - 1; j >= 0; j--)
+ for (i = 0; i < survey_times; i++)
+- {
+ memcpy(&ch[j*survey_times+i], &ch[j], sizeof(struct rtw_ieee80211_channel));
+- }
+ _status = rtw_sitesurvey_cmd(padapter, ssid, RTW_SSID_SCAN_AMOUNT, ch, survey_times * request->n_channels);
+ } else {
+ _status = rtw_sitesurvey_cmd(padapter, ssid, RTW_SSID_SCAN_AMOUNT, NULL, 0);
+@@ -1425,14 +1313,11 @@ static int cfg80211_rtw_scan(struct wiphy *wiphy
+
+
+ if (_status == false)
+- {
+ ret = -1;
+- }
+
+ check_need_indicate_scan_done:
+ kfree(ssid);
+- if (need_indicate_scan_done)
+- {
++ if (need_indicate_scan_done) {
+ rtw_cfg80211_surveydone_event_callback(padapter);
+ rtw_cfg80211_indicate_scan_done(padapter, false);
+ }
+@@ -1458,9 +1343,7 @@ static int rtw_cfg80211_set_wpa_version(struct security_priv *psecuritypriv, u32
+
+
+ if (wpa_version & (NL80211_WPA_VERSION_1 | NL80211_WPA_VERSION_2))
+- {
+ psecuritypriv->ndisauthtype = Ndis802_11AuthModeWPAPSK;
+- }
+
+ return 0;
+
+@@ -1571,7 +1454,7 @@ static int rtw_cfg80211_set_wpa_ie(struct adapter *padapter, u8 *pie, size_t iel
+ u8 *pwpa, *pwpa2;
+ u8 null_addr[] = {0, 0, 0, 0, 0, 0};
+
+- if (pie == NULL || !ielen) {
++ if (!pie || !ielen) {
+ /* Treat this as normal case, but need to clear WIFI_UNDER_WPS */
+ _clr_fwstate_(&padapter->mlmepriv, WIFI_UNDER_WPS);
+ goto exit;
+@@ -1583,7 +1466,7 @@ static int rtw_cfg80211_set_wpa_ie(struct adapter *padapter, u8 *pie, size_t iel
+ }
+
+ buf = rtw_zmalloc(ielen);
+- if (buf == NULL) {
++ if (!buf) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+@@ -1619,8 +1502,7 @@ static int rtw_cfg80211_set_wpa_ie(struct adapter *padapter, u8 *pie, size_t iel
+ if (pairwise_cipher == 0)
+ pairwise_cipher = WPA_CIPHER_NONE;
+
+- switch (group_cipher)
+- {
++ switch (group_cipher) {
+ case WPA_CIPHER_NONE:
+ padapter->securitypriv.dot118021XGrpPrivacy = _NO_PRIVACY_;
+ padapter->securitypriv.ndisencryptstatus = Ndis802_11EncryptionDisabled;
+@@ -1643,8 +1525,7 @@ static int rtw_cfg80211_set_wpa_ie(struct adapter *padapter, u8 *pie, size_t iel
+ break;
+ }
+
+- switch (pairwise_cipher)
+- {
++ switch (pairwise_cipher) {
+ case WPA_CIPHER_NONE:
+ padapter->securitypriv.dot11PrivacyAlgrthm = _NO_PRIVACY_;
+ padapter->securitypriv.ndisencryptstatus = Ndis802_11EncryptionDisabled;
+@@ -1765,8 +1646,7 @@ static int cfg80211_rtw_leave_ibss(struct wiphy *wiphy, struct net_device *ndev)
+
+ rtw_wdev->iftype = NL80211_IFTYPE_STATION;
+
+- if (rtw_set_802_11_infrastructure_mode(padapter, Ndis802_11Infrastructure) == false)
+- {
++ if (rtw_set_802_11_infrastructure_mode(padapter, Ndis802_11Infrastructure) == false) {
+ rtw_wdev->iftype = old_type;
+ ret = -EPERM;
+ goto leave_ibss;
+@@ -1826,9 +1706,8 @@ static int cfg80211_rtw_connect(struct wiphy *wiphy, struct net_device *ndev,
+ ret = -EBUSY;
+ goto exit;
+ }
+- if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY) == true) {
++ if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY) == true)
+ rtw_scan_abort(padapter);
+- }
+
+ psecuritypriv->ndisencryptstatus = Ndis802_11EncryptionDisabled;
+ psecuritypriv->dot11PrivacyAlgrthm = _NO_PRIVACY_;
+@@ -1873,7 +1752,7 @@ static int cfg80211_rtw_connect(struct wiphy *wiphy, struct net_device *ndev,
+ wep_key_len = wep_key_len <= 5 ? 5 : 13;
+ wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
+ pwep = rtw_malloc(wep_total_len);
+- if (pwep == NULL) {
++ if (!pwep) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+@@ -2321,9 +2200,8 @@ static int rtw_cfg80211_add_monitor_if(struct adapter *padapter, char *name, str
+ mon_ndev->ieee80211_ptr = mon_wdev;
+
+ ret = cfg80211_register_netdevice(mon_ndev);
+- if (ret) {
++ if (ret)
+ goto out;
+- }
+
+ *ndev = pwdev_priv->pmon_ndev = mon_ndev;
+ memcpy(pwdev_priv->ifname_mon, name, IFNAMSIZ+1);
+@@ -2436,11 +2314,10 @@ static int rtw_add_beacon(struct adapter *adapter, const u8 *head, size_t head_l
+ rtw_ies_remove_ie(pbuf, &len, _BEACON_IE_OFFSET_, WLAN_EID_VENDOR_SPECIFIC, P2P_OUI, 4);
+ rtw_ies_remove_ie(pbuf, &len, _BEACON_IE_OFFSET_, WLAN_EID_VENDOR_SPECIFIC, WFD_OUI, 4);
+
+- if (rtw_check_beacon_data(adapter, pbuf, len) == _SUCCESS) {
++ if (rtw_check_beacon_data(adapter, pbuf, len) == _SUCCESS)
+ ret = 0;
+- } else {
++ else
+ ret = -EINVAL;
+- }
+
+
+ kfree(pbuf);
+@@ -2708,7 +2585,7 @@ static int cfg80211_rtw_mgmt_tx(struct wiphy *wiphy,
+ struct adapter *padapter;
+ struct rtw_wdev_priv *pwdev_priv;
+
+- if (ndev == NULL) {
++ if (!ndev) {
+ ret = -EINVAL;
+ goto exit;
+ }
+diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+index 9d4a233a861e3..0d2cb3e7ea4df 100644
+--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+@@ -60,6 +60,7 @@ static int wpa_set_auth_algs(struct net_device *dev, u32 value)
+ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param, u32 param_len)
+ {
+ int ret = 0;
++ u8 max_idx;
+ u32 wep_key_idx, wep_key_len, wep_total_len;
+ struct ndis_802_11_wep *pwep = NULL;
+ struct adapter *padapter = rtw_netdev_priv(dev);
+@@ -74,19 +75,22 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
+ goto exit;
+ }
+
+- if (param->sta_addr[0] == 0xff && param->sta_addr[1] == 0xff &&
+- param->sta_addr[2] == 0xff && param->sta_addr[3] == 0xff &&
+- param->sta_addr[4] == 0xff && param->sta_addr[5] == 0xff) {
+- if (param->u.crypt.idx >= WEP_KEYS ||
+- param->u.crypt.idx >= BIP_MAX_KEYID) {
+- ret = -EINVAL;
+- goto exit;
+- }
+- } else {
+- {
+- ret = -EINVAL;
+- goto exit;
+- }
++ if (param->sta_addr[0] != 0xff || param->sta_addr[1] != 0xff ||
++ param->sta_addr[2] != 0xff || param->sta_addr[3] != 0xff ||
++ param->sta_addr[4] != 0xff || param->sta_addr[5] != 0xff) {
++ ret = -EINVAL;
++ goto exit;
++ }
++
++ if (strcmp(param->u.crypt.alg, "WEP") == 0)
++ max_idx = WEP_KEYS - 1;
++ else
++ max_idx = BIP_MAX_KEYID;
++
++ if (param->u.crypt.idx > max_idx) {
++ netdev_err(dev, "Error crypt.idx %d > %d\n", param->u.crypt.idx, max_idx);
++ ret = -EINVAL;
++ goto exit;
+ }
+
+ if (strcmp(param->u.crypt.alg, "WEP") == 0) {
+@@ -98,9 +102,6 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
+ wep_key_idx = param->u.crypt.idx;
+ wep_key_len = param->u.crypt.key_len;
+
+- if (wep_key_idx > WEP_KEYS)
+- return -EINVAL;
+-
+ if (wep_key_len > 0) {
+ wep_key_len = wep_key_len <= 5 ? 5 : 13;
+ wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
+@@ -153,7 +154,7 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
+
+ if (check_fwstate(pmlmepriv, WIFI_STATION_STATE | WIFI_MP_STATE) == true) { /* sta mode */
+ psta = rtw_get_stainfo(pstapriv, get_bssid(pmlmepriv));
+- if (psta == NULL) {
++ if (!psta) {
+ /* DEBUG_ERR(("Set wpa_set_encryption: Obtain Sta_info fail\n")); */
+ } else {
+ /* Jeff: don't disable ieee8021x_blocked while clearing key */
+@@ -206,7 +207,7 @@ static int wpa_set_encryption(struct net_device *dev, struct ieee_param *param,
+ }
+
+ pbcmc_sta = rtw_get_bcmc_stainfo(padapter);
+- if (pbcmc_sta == NULL) {
++ if (!pbcmc_sta) {
+ /* DEBUG_ERR(("Set OID_802_11_ADD_KEY: bcmc stainfo is null\n")); */
+ } else {
+ /* Jeff: don't disable ieee8021x_blocked while clearing key */
+@@ -236,9 +237,9 @@ static int rtw_set_wpa_ie(struct adapter *padapter, char *pie, unsigned short ie
+ int ret = 0;
+ u8 null_addr[] = {0, 0, 0, 0, 0, 0};
+
+- if ((ielen > MAX_WPA_IE_LEN) || (pie == NULL)) {
++ if (ielen > MAX_WPA_IE_LEN || !pie) {
+ _clr_fwstate_(&padapter->mlmepriv, WIFI_UNDER_WPS);
+- if (pie == NULL)
++ if (!pie)
+ return ret;
+ else
+ return -EINVAL;
+@@ -246,7 +247,7 @@ static int rtw_set_wpa_ie(struct adapter *padapter, char *pie, unsigned short ie
+
+ if (ielen) {
+ buf = rtw_zmalloc(ielen);
+- if (buf == NULL) {
++ if (!buf) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+@@ -491,7 +492,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
+ return -EINVAL;
+
+ param = rtw_malloc(p->length);
+- if (param == NULL)
++ if (!param)
+ return -ENOMEM;
+
+ if (copy_from_user(param, p->pointer, p->length)) {
+@@ -571,7 +572,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
+ goto exit;
+ }
+
+- if (strcmp(param->u.crypt.alg, "none") == 0 && (psta == NULL)) {
++ if (strcmp(param->u.crypt.alg, "none") == 0 && !psta) {
+ /* todo:clear default encryption keys */
+
+ psecuritypriv->dot11AuthAlgrthm = dot11AuthAlgrthm_Open;
+@@ -583,7 +584,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
+ }
+
+
+- if (strcmp(param->u.crypt.alg, "WEP") == 0 && (psta == NULL)) {
++ if (strcmp(param->u.crypt.alg, "WEP") == 0 && !psta) {
+ wep_key_idx = param->u.crypt.idx;
+ wep_key_len = param->u.crypt.key_len;
+
+@@ -835,9 +836,7 @@ static int rtw_add_sta(struct net_device *dev, struct ieee_param *param)
+ psta = rtw_get_stainfo(pstapriv, param->sta_addr);
+ if (psta)
+ {
+- spin_lock_bh(&(pstapriv->sta_hash_lock));
+ rtw_free_stainfo(padapter, psta);
+- spin_unlock_bh(&(pstapriv->sta_hash_lock));
+
+ psta = NULL;
+ }
+@@ -1229,7 +1228,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
+ return -EINVAL;
+
+ param = rtw_malloc(p->length);
+- if (param == NULL)
++ if (!param)
+ return -ENOMEM;
+
+ if (copy_from_user(param, p->pointer, p->length)) {
+diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+index f78bf174de8e2..279347be77c40 100644
+--- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c
++++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+@@ -488,7 +488,7 @@ void rtw_unregister_netdevs(struct dvobj_priv *dvobj)
+
+ padapter = dvobj->padapters;
+
+- if (padapter == NULL)
++ if (!padapter)
+ return;
+
+ pnetdev = padapter->pnetdev;
+@@ -594,7 +594,7 @@ struct dvobj_priv *devobj_init(void)
+ struct dvobj_priv *pdvobj = NULL;
+
+ pdvobj = rtw_zmalloc(sizeof(*pdvobj));
+- if (pdvobj == NULL)
++ if (!pdvobj)
+ return NULL;
+
+ mutex_init(&pdvobj->hw_init_mutex);
+@@ -664,51 +664,36 @@ void rtw_reset_drv_sw(struct adapter *padapter)
+
+ u8 rtw_init_drv_sw(struct adapter *padapter)
+ {
+- u8 ret8 = _SUCCESS;
+-
+ rtw_init_default_value(padapter);
+
+ rtw_init_hal_com_default_value(padapter);
+
+- if (rtw_init_cmd_priv(&padapter->cmdpriv)) {
+- ret8 = _FAIL;
+- goto exit;
+- }
++ if (rtw_init_cmd_priv(&padapter->cmdpriv))
++ return _FAIL;
+
+ padapter->cmdpriv.padapter = padapter;
+
+- if (rtw_init_evt_priv(&padapter->evtpriv)) {
+- ret8 = _FAIL;
+- goto exit;
+- }
+-
++ if (rtw_init_evt_priv(&padapter->evtpriv))
++ goto free_cmd_priv;
+
+- if (rtw_init_mlme_priv(padapter) == _FAIL) {
+- ret8 = _FAIL;
+- goto exit;
+- }
++ if (rtw_init_mlme_priv(padapter) == _FAIL)
++ goto free_evt_priv;
+
+ init_mlme_ext_priv(padapter);
+
+- if (_rtw_init_xmit_priv(&padapter->xmitpriv, padapter) == _FAIL) {
+- ret8 = _FAIL;
+- goto exit;
+- }
++ if (_rtw_init_xmit_priv(&padapter->xmitpriv, padapter) == _FAIL)
++ goto free_mlme_ext;
+
+- if (_rtw_init_recv_priv(&padapter->recvpriv, padapter) == _FAIL) {
+- ret8 = _FAIL;
+- goto exit;
+- }
++ if (_rtw_init_recv_priv(&padapter->recvpriv, padapter) == _FAIL)
++ goto free_xmit_priv;
+ /* add for CONFIG_IEEE80211W, none 11w also can use */
+ spin_lock_init(&padapter->security_key_mutex);
+
+ /* We don't need to memset padapter->XXX to zero, because adapter is allocated by vzalloc(). */
+ /* memset((unsigned char *)&padapter->securitypriv, 0, sizeof (struct security_priv)); */
+
+- if (_rtw_init_sta_priv(&padapter->stapriv) == _FAIL) {
+- ret8 = _FAIL;
+- goto exit;
+- }
++ if (_rtw_init_sta_priv(&padapter->stapriv) == _FAIL)
++ goto free_recv_priv;
+
+ padapter->stapriv.padapter = padapter;
+ padapter->setband = GHZ24_50;
+@@ -719,9 +704,26 @@ u8 rtw_init_drv_sw(struct adapter *padapter)
+
+ rtw_hal_dm_init(padapter);
+
+-exit:
++ return _SUCCESS;
++
++free_recv_priv:
++ _rtw_free_recv_priv(&padapter->recvpriv);
++
++free_xmit_priv:
++ _rtw_free_xmit_priv(&padapter->xmitpriv);
++
++free_mlme_ext:
++ free_mlme_ext_priv(&padapter->mlmeextpriv);
+
+- return ret8;
++ rtw_free_mlme_priv(&padapter->mlmepriv);
++
++free_evt_priv:
++ rtw_free_evt_priv(&padapter->evtpriv);
++
++free_cmd_priv:
++ rtw_free_cmd_priv(&padapter->cmdpriv);
++
++ return _FAIL;
+ }
+
+ void rtw_cancel_all_timer(struct adapter *padapter)
+diff --git a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h
+index 81db7fb76d6db..97893bb0a0fde 100644
+--- a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h
++++ b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h
+@@ -82,7 +82,7 @@ struct vchiq_service_params_kernel {
+
+ struct vchiq_instance;
+
+-extern enum vchiq_status vchiq_initialise(struct vchiq_instance **pinstance);
++extern int vchiq_initialise(struct vchiq_instance **pinstance);
+ extern enum vchiq_status vchiq_shutdown(struct vchiq_instance *instance);
+ extern enum vchiq_status vchiq_connect(struct vchiq_instance *instance);
+ extern enum vchiq_status vchiq_open_service(struct vchiq_instance *instance,
+diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+index 967f10b9582a8..099359fc01152 100644
+--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
++++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+@@ -1033,15 +1033,27 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
+
+ DEBUG_TRACE(SERVICE_CALLBACK_LINE);
+
++ rcu_read_lock();
+ service = handle_to_service(handle);
+- if (WARN_ON(!service))
++ if (WARN_ON(!service)) {
++ rcu_read_unlock();
+ return VCHIQ_SUCCESS;
++ }
+
+ user_service = (struct user_service *)service->base.userdata;
+ instance = user_service->instance;
+
+- if (!instance || instance->closing)
++ if (!instance || instance->closing) {
++ rcu_read_unlock();
+ return VCHIQ_SUCCESS;
++ }
++
++ /*
++ * As hopping around different synchronization mechanism,
++ * taking an extra reference results in simpler implementation.
++ */
++ vchiq_service_get(service);
++ rcu_read_unlock();
+
+ vchiq_log_trace(vchiq_arm_log_level,
+ "%s - service %lx(%d,%p), reason %d, header %lx, instance %lx, bulk_userdata %lx",
+@@ -1074,6 +1086,7 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
+ NULL, user_service, bulk_userdata);
+ if (status != VCHIQ_SUCCESS) {
+ DEBUG_TRACE(SERVICE_CALLBACK_LINE);
++ vchiq_service_put(service);
+ return status;
+ }
+ }
+@@ -1084,11 +1097,13 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
+ vchiq_log_info(vchiq_arm_log_level,
+ "%s interrupted", __func__);
+ DEBUG_TRACE(SERVICE_CALLBACK_LINE);
++ vchiq_service_put(service);
+ return VCHIQ_RETRY;
+ } else if (instance->closing) {
+ vchiq_log_info(vchiq_arm_log_level,
+ "%s closing", __func__);
+ DEBUG_TRACE(SERVICE_CALLBACK_LINE);
++ vchiq_service_put(service);
+ return VCHIQ_ERROR;
+ }
+ DEBUG_TRACE(SERVICE_CALLBACK_LINE);
+@@ -1117,6 +1132,7 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header,
+ header = NULL;
+ }
+ DEBUG_TRACE(SERVICE_CALLBACK_LINE);
++ vchiq_service_put(service);
+
+ if (skip_completion)
+ return VCHIQ_SUCCESS;
+@@ -1173,6 +1189,9 @@ int vchiq_dump_platform_instances(void *dump_context)
+ int len;
+ int i;
+
++ if (!state)
++ return -ENOTCONN;
++
+ /*
+ * There is no list of instances, so instead scan all services,
+ * marking those that have been dumped.
+diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
+index e8e39a154c743..69f342e9bb7ab 100644
+--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
++++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
+@@ -152,10 +152,10 @@ extern struct vchiq_arm_state*
+ vchiq_platform_get_arm_state(struct vchiq_state *state);
+
+
+-extern enum vchiq_status
++extern int
+ vchiq_use_internal(struct vchiq_state *state, struct vchiq_service *service,
+ enum USE_TYPE_E use_type);
+-extern enum vchiq_status
++extern int
+ vchiq_release_internal(struct vchiq_state *state,
+ struct vchiq_service *service);
+
+diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
+index 9429b8a642fbb..630ed0dc24c39 100644
+--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
++++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c
+@@ -2421,6 +2421,9 @@ void vchiq_msg_queue_push(unsigned int handle, struct vchiq_header *header)
+ struct vchiq_service *service = find_service_by_handle(handle);
+ int pos;
+
++ if (!service)
++ return;
++
+ while (service->msg_queue_write == service->msg_queue_read +
+ VCHIQ_MAX_SLOTS) {
+ if (wait_for_completion_interruptible(&service->msg_queue_pop))
+@@ -2441,6 +2444,9 @@ struct vchiq_header *vchiq_msg_hold(unsigned int handle)
+ struct vchiq_header *header;
+ int pos;
+
++ if (!service)
++ return NULL;
++
+ if (service->msg_queue_write == service->msg_queue_read)
+ return NULL;
+
+diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
+index d40c2ac149280..775537b243aab 100644
+--- a/drivers/staging/vt6655/device_main.c
++++ b/drivers/staging/vt6655/device_main.c
+@@ -565,7 +565,7 @@ err_free_rd:
+ kfree(desc->rd_info);
+
+ err_free_desc:
+- while (--i) {
++ while (i--) {
+ desc = &priv->aRD0Ring[i];
+ device_free_rx_buf(priv, desc);
+ kfree(desc->rd_info);
+@@ -611,7 +611,7 @@ err_free_rd:
+ kfree(desc->rd_info);
+
+ err_free_desc:
+- while (--i) {
++ while (i--) {
+ desc = &priv->aRD1Ring[i];
+ device_free_rx_buf(priv, desc);
+ kfree(desc->rd_info);
+@@ -676,7 +676,7 @@ static int device_init_td0_ring(struct vnt_private *priv)
+ return 0;
+
+ err_free_desc:
+- while (--i) {
++ while (i--) {
+ desc = &priv->apTD0Rings[i];
+ kfree(desc->td_info);
+ }
+@@ -716,7 +716,7 @@ static int device_init_td1_ring(struct vnt_private *priv)
+ return 0;
+
+ err_free_desc:
+- while (--i) {
++ while (i--) {
+ desc = &priv->apTD1Rings[i];
+ kfree(desc->td_info);
+ }
+diff --git a/drivers/staging/wfx/bus_sdio.c b/drivers/staging/wfx/bus_sdio.c
+index e06d7e1ebe9c3..61b8cc05f2935 100644
+--- a/drivers/staging/wfx/bus_sdio.c
++++ b/drivers/staging/wfx/bus_sdio.c
+@@ -120,19 +120,22 @@ static int wfx_sdio_irq_subscribe(void *priv)
+ return ret;
+ }
+
++ flags = irq_get_trigger_type(bus->of_irq);
++ if (!flags)
++ flags = IRQF_TRIGGER_HIGH;
++ flags |= IRQF_ONESHOT;
++ ret = devm_request_threaded_irq(&bus->func->dev, bus->of_irq, NULL,
++ wfx_sdio_irq_handler_ext, flags,
++ "wfx", bus);
++ if (ret)
++ return ret;
+ sdio_claim_host(bus->func);
+ cccr = sdio_f0_readb(bus->func, SDIO_CCCR_IENx, NULL);
+ cccr |= BIT(0);
+ cccr |= BIT(bus->func->num);
+ sdio_f0_writeb(bus->func, cccr, SDIO_CCCR_IENx, NULL);
+ sdio_release_host(bus->func);
+- flags = irq_get_trigger_type(bus->of_irq);
+- if (!flags)
+- flags = IRQF_TRIGGER_HIGH;
+- flags |= IRQF_ONESHOT;
+- return devm_request_threaded_irq(&bus->func->dev, bus->of_irq, NULL,
+- wfx_sdio_irq_handler_ext, flags,
+- "wfx", bus);
++ return 0;
+ }
+
+ static int wfx_sdio_irq_unsubscribe(void *priv)
+diff --git a/drivers/staging/wfx/main.c b/drivers/staging/wfx/main.c
+index 4b9fdf99981b1..9ff69c5e0ae97 100644
+--- a/drivers/staging/wfx/main.c
++++ b/drivers/staging/wfx/main.c
+@@ -309,7 +309,8 @@ struct wfx_dev *wfx_init_common(struct device *dev,
+ wdev->pdata.gpio_wakeup = devm_gpiod_get_optional(dev, "wakeup",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(wdev->pdata.gpio_wakeup))
+- return NULL;
++ goto err;
++
+ if (wdev->pdata.gpio_wakeup)
+ gpiod_set_consumer_name(wdev->pdata.gpio_wakeup, "wfx wakeup");
+
+@@ -328,6 +329,10 @@ struct wfx_dev *wfx_init_common(struct device *dev,
+ return NULL;
+
+ return wdev;
++
++err:
++ ieee80211_free_hw(hw);
++ return NULL;
+ }
+
+ int wfx_probe(struct wfx_dev *wdev)
+diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c
+index 8c8524679ba38..0d869b5e309c0 100644
+--- a/drivers/staging/wlan-ng/hfa384x_usb.c
++++ b/drivers/staging/wlan-ng/hfa384x_usb.c
+@@ -3778,18 +3778,18 @@ static void hfa384x_usb_throttlefn(struct timer_list *t)
+
+ spin_lock_irqsave(&hw->ctlxq.lock, flags);
+
+- /*
+- * We need to check BOTH the RX and the TX throttle controls,
+- * so we use the bitwise OR instead of the logical OR.
+- */
+ pr_debug("flags=0x%lx\n", hw->usb_flags);
+- if (!hw->wlandev->hwremoved &&
+- ((test_and_clear_bit(THROTTLE_RX, &hw->usb_flags) &&
+- !test_and_set_bit(WORK_RX_RESUME, &hw->usb_flags)) |
+- (test_and_clear_bit(THROTTLE_TX, &hw->usb_flags) &&
+- !test_and_set_bit(WORK_TX_RESUME, &hw->usb_flags))
+- )) {
+- schedule_work(&hw->usb_work);
++ if (!hw->wlandev->hwremoved) {
++ bool rx_throttle = test_and_clear_bit(THROTTLE_RX, &hw->usb_flags) &&
++ !test_and_set_bit(WORK_RX_RESUME, &hw->usb_flags);
++ bool tx_throttle = test_and_clear_bit(THROTTLE_TX, &hw->usb_flags) &&
++ !test_and_set_bit(WORK_TX_RESUME, &hw->usb_flags);
++ /*
++ * We need to check BOTH the RX and the TX throttle controls,
++ * so we use the bitwise OR instead of the logical OR.
++ */
++ if (rx_throttle | tx_throttle)
++ schedule_work(&hw->usb_work);
+ }
+
+ spin_unlock_irqrestore(&hw->ctlxq.lock, flags);
+diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
+index 2c54c5d8412d8..686a9e5918e21 100644
+--- a/drivers/target/iscsi/iscsi_target.c
++++ b/drivers/target/iscsi/iscsi_target.c
+@@ -4086,9 +4086,12 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
+ list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) {
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+
+- if (se_cmd->se_tfo != NULL) {
+- spin_lock_irq(&se_cmd->t_state_lock);
+- if (se_cmd->transport_state & CMD_T_ABORTED) {
++ if (!se_cmd->se_tfo)
++ continue;
++
++ spin_lock_irq(&se_cmd->t_state_lock);
++ if (se_cmd->transport_state & CMD_T_ABORTED) {
++ if (!(se_cmd->transport_state & CMD_T_TAS))
+ /*
+ * LIO's abort path owns the cleanup for this,
+ * so put it back on the list and let
+@@ -4096,11 +4099,10 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
+ */
+ list_move_tail(&cmd->i_conn_node,
+ &conn->conn_cmd_list);
+- } else {
+- se_cmd->transport_state |= CMD_T_FABRIC_STOP;
+- }
+- spin_unlock_irq(&se_cmd->t_state_lock);
++ } else {
++ se_cmd->transport_state |= CMD_T_FABRIC_STOP;
+ }
++ spin_unlock_irq(&se_cmd->t_state_lock);
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+@@ -4385,6 +4387,9 @@ int iscsit_close_session(struct iscsi_session *sess, bool can_sleep)
+ iscsit_stop_time2retain_timer(sess);
+ spin_unlock_bh(&se_tpg->session_lock);
+
++ if (sess->sess_ops->ErrorRecoveryLevel == 2)
++ iscsit_free_connection_recovery_entries(sess);
++
+ /*
+ * transport_deregister_session_configfs() will clear the
+ * struct se_node_acl->nacl_sess pointer now as a iscsi_np process context
+@@ -4408,9 +4413,6 @@ int iscsit_close_session(struct iscsi_session *sess, bool can_sleep)
+
+ transport_deregister_session(sess->se_sess);
+
+- if (sess->sess_ops->ErrorRecoveryLevel == 2)
+- iscsit_free_connection_recovery_entries(sess);
+-
+ iscsit_free_all_ooo_cmdsns(sess);
+
+ spin_lock_bh(&se_tpg->session_lock);
+diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
+index c0ed6f8e5c5b9..32a2852352db1 100644
+--- a/drivers/target/iscsi/iscsi_target_nego.c
++++ b/drivers/target/iscsi/iscsi_target_nego.c
+@@ -1071,6 +1071,7 @@ int iscsi_target_locate_portal(
+ iscsi_target_set_sock_callbacks(conn);
+
+ login->np = np;
++ conn->tpg = NULL;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ payload_length = ntoh24(login_req->dlength);
+@@ -1138,7 +1139,6 @@ int iscsi_target_locate_portal(
+ */
+ sessiontype = strncmp(s_buf, DISCOVERY, 9);
+ if (!sessiontype) {
+- conn->tpg = iscsit_global->discovery_tpg;
+ if (!login->leading_connection)
+ goto get_target;
+
+@@ -1155,9 +1155,11 @@ int iscsi_target_locate_portal(
+ * Serialize access across the discovery struct iscsi_portal_group to
+ * process login attempt.
+ */
++ conn->tpg = iscsit_global->discovery_tpg;
+ if (iscsit_access_np(np, conn->tpg) < 0) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
++ conn->tpg = NULL;
+ ret = -1;
+ goto out;
+ }
+diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
+index 6bc3aaf655fc4..62004e3fe1ccc 100644
+--- a/drivers/target/iscsi/iscsi_target_parameters.c
++++ b/drivers/target/iscsi/iscsi_target_parameters.c
+@@ -1262,18 +1262,20 @@ static struct iscsi_param *iscsi_check_key(
+ return param;
+
+ if (!(param->phase & phase)) {
+- pr_err("Key \"%s\" may not be negotiated during ",
+- param->name);
++ char *phase_name;
++
+ switch (phase) {
+ case PHASE_SECURITY:
+- pr_debug("Security phase.\n");
++ phase_name = "Security";
+ break;
+ case PHASE_OPERATIONAL:
+- pr_debug("Operational phase.\n");
++ phase_name = "Operational";
+ break;
+ default:
+- pr_debug("Unknown phase.\n");
++ phase_name = "Unknown";
+ }
++ pr_err("Key \"%s\" may not be negotiated during %s phase.\n",
++ param->name, phase_name);
+ return NULL;
+ }
+
+diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
+index 8075f60fd02c3..2d5cf1714ae05 100644
+--- a/drivers/target/iscsi/iscsi_target_tpg.c
++++ b/drivers/target/iscsi/iscsi_target_tpg.c
+@@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal(
+ break;
+ }
+ spin_unlock(&tpg->tpg_np_lock);
++
++ if (match)
++ break;
+ }
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+
+diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
+index 52db28d868d58..600a4d1ee45ff 100644
+--- a/drivers/target/loopback/tcm_loop.c
++++ b/drivers/target/loopback/tcm_loop.c
+@@ -397,6 +397,7 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host
+ ret = device_register(&tl_hba->dev);
+ if (ret) {
+ pr_err("device_register() failed for tl_hba->dev: %d\n", ret);
++ put_device(&tl_hba->dev);
+ return -ENODEV;
+ }
+
+@@ -1073,7 +1074,7 @@ check_len:
+ */
+ ret = tcm_loop_setup_hba_bus(tl_hba, tcm_loop_hba_no_cnt);
+ if (ret)
+- goto out;
++ return ERR_PTR(ret);
+
+ sh = tl_hba->sh;
+ tcm_loop_hba_no_cnt++;
+diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
+index cb1de1ecaaa61..bd0f2ce011dd7 100644
+--- a/drivers/target/target_core_alua.c
++++ b/drivers/target/target_core_alua.c
+@@ -1674,7 +1674,6 @@ int core_alua_set_tg_pt_gp_id(
+ pr_err("Maximum ALUA alua_tg_pt_gps_count:"
+ " 0x0000ffff reached\n");
+ spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
+- kmem_cache_free(t10_alua_tg_pt_gp_cache, tg_pt_gp);
+ return -ENOSPC;
+ }
+ again:
+diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
+index 8cb1fa0c05857..e18617371a9b2 100644
+--- a/drivers/target/target_core_device.c
++++ b/drivers/target/target_core_device.c
+@@ -772,6 +772,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
+ INIT_LIST_HEAD(&dev->t10_alua.lba_map_list);
+ spin_lock_init(&dev->t10_alua.lba_map_lock);
+
++ INIT_WORK(&dev->delayed_cmd_work, target_do_delayed_work);
++ mutex_init(&dev->lun_reset_mutex);
++
+ dev->t10_wwn.t10_dev = dev;
+ /*
+ * Use OpenFabrics IEEE Company ID: 00 14 05
+@@ -848,7 +851,6 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+ attrib->unmap_granularity = q->limits.discard_granularity / block_size;
+ attrib->unmap_granularity_alignment = q->limits.discard_alignment /
+ block_size;
+- attrib->unmap_zeroes_data = !!(q->limits.max_write_zeroes_sectors);
+ return true;
+ }
+ EXPORT_SYMBOL(target_configure_unmap_from_queue);
+diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
+index ef4a8e189fba0..014860716605b 100644
+--- a/drivers/target/target_core_file.c
++++ b/drivers/target/target_core_file.c
+@@ -332,7 +332,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
+ len += sg->length;
+ }
+
+- iov_iter_bvec(&iter, READ, bvec, sgl_nents, len);
++ iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len);
+ if (is_write)
+ ret = vfs_iter_write(fd, &iter, &pos, 0);
+ else
+@@ -469,7 +469,7 @@ fd_execute_write_same(struct se_cmd *cmd)
+ len += se_dev->dev_attrib.block_size;
+ }
+
+- iov_iter_bvec(&iter, READ, bvec, nolb, len);
++ iov_iter_bvec(&iter, WRITE, bvec, nolb, len);
+ ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
+
+ kfree(bvec);
+diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
+index a343bcfa2180f..a889a6237d9c1 100644
+--- a/drivers/target/target_core_internal.h
++++ b/drivers/target/target_core_internal.h
+@@ -151,6 +151,7 @@ int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int);
+ void transport_clear_lun_ref(struct se_lun *);
+ sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size);
+ void target_qf_do_work(struct work_struct *work);
++void target_do_delayed_work(struct work_struct *work);
+ bool target_check_wce(struct se_device *dev);
+ bool target_check_fua(struct se_device *dev);
+ void __target_execute_cmd(struct se_cmd *, bool);
+diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
+index e7fcbc09f9dbc..4718db628222b 100644
+--- a/drivers/target/target_core_tmr.c
++++ b/drivers/target/target_core_tmr.c
+@@ -50,15 +50,6 @@ EXPORT_SYMBOL(core_tmr_alloc_req);
+
+ void core_tmr_release_req(struct se_tmr_req *tmr)
+ {
+- struct se_device *dev = tmr->tmr_dev;
+- unsigned long flags;
+-
+- if (dev) {
+- spin_lock_irqsave(&dev->se_tmr_lock, flags);
+- list_del_init(&tmr->tmr_list);
+- spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
+- }
+-
+ kfree(tmr);
+ }
+
+@@ -82,8 +73,8 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
+ {
+ struct se_session *sess = se_cmd->se_sess;
+
+- assert_spin_locked(&sess->sess_cmd_lock);
+- WARN_ON_ONCE(!irqs_disabled());
++ lockdep_assert_held(&sess->sess_cmd_lock);
++
+ /*
+ * If command already reached CMD_T_COMPLETE state within
+ * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown,
+@@ -156,13 +147,6 @@ void core_tmr_abort_task(
+ se_cmd->state_active = false;
+ spin_unlock_irqrestore(&dev->queues[i].lock, flags);
+
+- /*
+- * Ensure that this ABORT request is visible to the LU
+- * RESET code.
+- */
+- if (!tmr->tmr_dev)
+- WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd) < 0);
+-
+ if (dev->transport->tmr_notify)
+ dev->transport->tmr_notify(dev, TMR_ABORT_TASK,
+ &aborted_list);
+@@ -204,14 +188,23 @@ static void core_tmr_drain_tmr_list(
+ * LUN_RESET tmr..
+ */
+ spin_lock_irqsave(&dev->se_tmr_lock, flags);
+- if (tmr)
+- list_del_init(&tmr->tmr_list);
+ list_for_each_entry_safe(tmr_p, tmr_pp, &dev->dev_tmr_list, tmr_list) {
++ if (tmr_p == tmr)
++ continue;
++
+ cmd = tmr_p->task_cmd;
+ if (!cmd) {
+ pr_err("Unable to locate struct se_cmd for TMR\n");
+ continue;
+ }
++
++ /*
++ * We only execute one LUN_RESET at a time so we can't wait
++ * on them below.
++ */
++ if (tmr_p->function == TMR_LUN_RESET)
++ continue;
++
+ /*
+ * If this function was called with a valid pr_res_key
+ * parameter (eg: for PROUT PREEMPT_AND_ABORT service action
+@@ -234,6 +227,7 @@ static void core_tmr_drain_tmr_list(
+ }
+
+ list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);
++ tmr_p->tmr_dev = NULL;
+ }
+ spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
+
+@@ -394,14 +388,25 @@ int core_tmr_lun_reset(
+ tmr_nacl->initiatorname);
+ }
+ }
++
++
++ /*
++ * We only allow one reset or preempt and abort to execute at a time
++ * to prevent one call from claiming all the cmds causing a second
++ * call from returning while cmds it should have waited on are still
++ * running.
++ */
++ mutex_lock(&dev->lun_reset_mutex);
++
+ pr_debug("LUN_RESET: %s starting for [%s], tas: %d\n",
+ (preempt_and_abort_list) ? "Preempt" : "TMR",
+ dev->transport->name, tas);
+-
+ core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list);
+ core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas,
+ preempt_and_abort_list);
+
++ mutex_unlock(&dev->lun_reset_mutex);
++
+ /*
+ * Clear any legacy SPC-2 reservation when called during
+ * LOGICAL UNIT RESET
+diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
+index 14c6f2bb1b01d..72edf5bd75ee6 100644
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -676,6 +676,21 @@ static void target_remove_from_state_list(struct se_cmd *cmd)
+ spin_unlock_irqrestore(&dev->queues[cmd->cpuid].lock, flags);
+ }
+
++static void target_remove_from_tmr_list(struct se_cmd *cmd)
++{
++ struct se_device *dev = NULL;
++ unsigned long flags;
++
++ if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
++ dev = cmd->se_tmr_req->tmr_dev;
++
++ if (dev) {
++ spin_lock_irqsave(&dev->se_tmr_lock, flags);
++ if (cmd->se_tmr_req->tmr_dev)
++ list_del_init(&cmd->se_tmr_req->tmr_list);
++ spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
++ }
++}
+ /*
+ * This function is called by the target core after the target core has
+ * finished processing a SCSI command or SCSI TMF. Both the regular command
+@@ -687,13 +702,6 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
+ {
+ unsigned long flags;
+
+- target_remove_from_state_list(cmd);
+-
+- /*
+- * Clear struct se_cmd->se_lun before the handoff to FE.
+- */
+- cmd->se_lun = NULL;
+-
+ spin_lock_irqsave(&cmd->t_state_lock, flags);
+ /*
+ * Determine if frontend context caller is requesting the stopping of
+@@ -728,8 +736,16 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
+ if (!lun)
+ return;
+
++ target_remove_from_state_list(cmd);
++ target_remove_from_tmr_list(cmd);
++
+ if (cmpxchg(&cmd->lun_ref_active, true, false))
+ percpu_ref_put(&lun->lun_ref);
++
++ /*
++ * Clear struct se_cmd->se_lun before the handoff to FE.
++ */
++ cmd->se_lun = NULL;
+ }
+
+ static void target_complete_failure_work(struct work_struct *work)
+@@ -2173,32 +2189,35 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
+ */
+ switch (cmd->sam_task_attr) {
+ case TCM_HEAD_TAG:
++ atomic_inc_mb(&dev->non_ordered);
+ pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x\n",
+ cmd->t_task_cdb[0]);
+ return false;
+ case TCM_ORDERED_TAG:
+- atomic_inc_mb(&dev->dev_ordered_sync);
++ atomic_inc_mb(&dev->delayed_cmd_count);
+
+ pr_debug("Added ORDERED for CDB: 0x%02x to ordered list\n",
+ cmd->t_task_cdb[0]);
+-
+- /*
+- * Execute an ORDERED command if no other older commands
+- * exist that need to be completed first.
+- */
+- if (!atomic_read(&dev->simple_cmds))
+- return false;
+ break;
+ default:
+ /*
+ * For SIMPLE and UNTAGGED Task Attribute commands
+ */
+- atomic_inc_mb(&dev->simple_cmds);
++ atomic_inc_mb(&dev->non_ordered);
++
++ if (atomic_read(&dev->delayed_cmd_count) == 0)
++ return false;
+ break;
+ }
+
+- if (atomic_read(&dev->dev_ordered_sync) == 0)
+- return false;
++ if (cmd->sam_task_attr != TCM_ORDERED_TAG) {
++ atomic_inc_mb(&dev->delayed_cmd_count);
++ /*
++ * We will account for this when we dequeue from the delayed
++ * list.
++ */
++ atomic_dec_mb(&dev->non_ordered);
++ }
+
+ spin_lock(&dev->delayed_cmd_lock);
+ list_add_tail(&cmd->se_delayed_node, &dev->delayed_cmd_list);
+@@ -2206,6 +2225,12 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
+
+ pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to delayed CMD listn",
+ cmd->t_task_cdb[0], cmd->sam_task_attr);
++ /*
++ * We may have no non ordered cmds when this function started or we
++ * could have raced with the last simple/head cmd completing, so kick
++ * the delayed handler here.
++ */
++ schedule_work(&dev->delayed_cmd_work);
+ return true;
+ }
+
+@@ -2243,29 +2268,48 @@ EXPORT_SYMBOL(target_execute_cmd);
+ * Process all commands up to the last received ORDERED task attribute which
+ * requires another blocking boundary
+ */
+-static void target_restart_delayed_cmds(struct se_device *dev)
++void target_do_delayed_work(struct work_struct *work)
+ {
+- for (;;) {
++ struct se_device *dev = container_of(work, struct se_device,
++ delayed_cmd_work);
++
++ spin_lock(&dev->delayed_cmd_lock);
++ while (!dev->ordered_sync_in_progress) {
+ struct se_cmd *cmd;
+
+- spin_lock(&dev->delayed_cmd_lock);
+- if (list_empty(&dev->delayed_cmd_list)) {
+- spin_unlock(&dev->delayed_cmd_lock);
++ if (list_empty(&dev->delayed_cmd_list))
+ break;
+- }
+
+ cmd = list_entry(dev->delayed_cmd_list.next,
+ struct se_cmd, se_delayed_node);
++
++ if (cmd->sam_task_attr == TCM_ORDERED_TAG) {
++ /*
++ * Check if we started with:
++ * [ordered] [simple] [ordered]
++ * and we are now at the last ordered so we have to wait
++ * for the simple cmd.
++ */
++ if (atomic_read(&dev->non_ordered) > 0)
++ break;
++
++ dev->ordered_sync_in_progress = true;
++ }
++
+ list_del(&cmd->se_delayed_node);
++ atomic_dec_mb(&dev->delayed_cmd_count);
+ spin_unlock(&dev->delayed_cmd_lock);
+
++ if (cmd->sam_task_attr != TCM_ORDERED_TAG)
++ atomic_inc_mb(&dev->non_ordered);
++
+ cmd->transport_state |= CMD_T_SENT;
+
+ __target_execute_cmd(cmd, true);
+
+- if (cmd->sam_task_attr == TCM_ORDERED_TAG)
+- break;
++ spin_lock(&dev->delayed_cmd_lock);
+ }
++ spin_unlock(&dev->delayed_cmd_lock);
+ }
+
+ /*
+@@ -2283,14 +2327,17 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
+ goto restart;
+
+ if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
+- atomic_dec_mb(&dev->simple_cmds);
++ atomic_dec_mb(&dev->non_ordered);
+ dev->dev_cur_ordered_id++;
+ } else if (cmd->sam_task_attr == TCM_HEAD_TAG) {
++ atomic_dec_mb(&dev->non_ordered);
+ dev->dev_cur_ordered_id++;
+ pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n",
+ dev->dev_cur_ordered_id);
+ } else if (cmd->sam_task_attr == TCM_ORDERED_TAG) {
+- atomic_dec_mb(&dev->dev_ordered_sync);
++ spin_lock(&dev->delayed_cmd_lock);
++ dev->ordered_sync_in_progress = false;
++ spin_unlock(&dev->delayed_cmd_lock);
+
+ dev->dev_cur_ordered_id++;
+ pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n",
+@@ -2299,7 +2346,8 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
+ cmd->se_cmd_flags &= ~SCF_TASK_ATTR_SET;
+
+ restart:
+- target_restart_delayed_cmds(dev);
++ if (atomic_read(&dev->delayed_cmd_count) > 0)
++ schedule_work(&dev->delayed_cmd_work);
+ }
+
+ static void transport_complete_qf(struct se_cmd *cmd)
+diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
+index 9f552f48084cd..1e8e9dd3f482c 100644
+--- a/drivers/target/target_core_user.c
++++ b/drivers/target/target_core_user.c
+@@ -20,6 +20,7 @@
+ #include <linux/configfs.h>
+ #include <linux/mutex.h>
+ #include <linux/workqueue.h>
++#include <linux/pagemap.h>
+ #include <net/genetlink.h>
+ #include <scsi/scsi_common.h>
+ #include <scsi/scsi_proto.h>
+@@ -1660,17 +1661,37 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
+ static u32 tcmu_blocks_release(struct tcmu_dev *udev, unsigned long first,
+ unsigned long last)
+ {
+- XA_STATE(xas, &udev->data_pages, first * udev->data_pages_per_blk);
+ struct page *page;
++ unsigned long dpi;
+ u32 pages_freed = 0;
+
+- xas_lock(&xas);
+- xas_for_each(&xas, page, (last + 1) * udev->data_pages_per_blk - 1) {
+- xas_store(&xas, NULL);
++ first = first * udev->data_pages_per_blk;
++ last = (last + 1) * udev->data_pages_per_blk - 1;
++ xa_for_each_range(&udev->data_pages, dpi, page, first, last) {
++ xa_erase(&udev->data_pages, dpi);
++ /*
++ * While reaching here there may be page faults occurring on
++ * the to-be-released pages. A race condition may occur if
++ * unmap_mapping_range() is called before page faults on these
++ * pages have completed; a valid but stale map is created.
++ *
++ * If another command subsequently runs and needs to extend
++ * dbi_thresh, it may reuse the slot corresponding to the
++ * previous page in data_bitmap. Though we will allocate a new
++ * page for the slot in data_area, no page fault will happen
++ * because we have a valid map. Therefore the command's data
++ * will be lost.
++ *
++ * We lock and unlock pages that are to be released to ensure
++ * all page faults have completed. This way
++ * unmap_mapping_range() can ensure stale maps are cleanly
++ * removed.
++ */
++ lock_page(page);
++ unlock_page(page);
+ __free_page(page);
+ pages_freed++;
+ }
+- xas_unlock(&xas);
+
+ atomic_sub(pages_freed, &global_page_count);
+
+@@ -1821,6 +1842,8 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
+ mutex_lock(&udev->cmdr_lock);
+ page = xa_load(&udev->data_pages, dpi);
+ if (likely(page)) {
++ get_page(page);
++ lock_page(page);
+ mutex_unlock(&udev->cmdr_lock);
+ return page;
+ }
+@@ -1862,6 +1885,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
+ struct page *page;
+ unsigned long offset;
+ void *addr;
++ vm_fault_t ret = 0;
+
+ int mi = tcmu_find_mem_index(vmf->vma);
+ if (mi < 0)
+@@ -1877,6 +1901,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
+ /* For the vmalloc()ed cmd area pages */
+ addr = (void *)(unsigned long)info->mem[mi].addr + offset;
+ page = vmalloc_to_page(addr);
++ get_page(page);
+ } else {
+ uint32_t dpi;
+
+@@ -1885,11 +1910,11 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
+ page = tcmu_try_get_data_page(udev, dpi);
+ if (!page)
+ return VM_FAULT_SIGBUS;
++ ret = VM_FAULT_LOCKED;
+ }
+
+- get_page(page);
+ vmf->page = page;
+- return 0;
++ return ret;
+ }
+
+ static const struct vm_operations_struct tcmu_vm_ops = {
+@@ -3152,12 +3177,22 @@ static void find_free_blocks(void)
+ udev->dbi_max = block;
+ }
+
++ /*
++ * Release the block pages.
++ *
++ * Also note that since tcmu_vma_fault() gets an extra page
++ * refcount, tcmu_blocks_release() won't free pages if pages
++ * are mapped. This means it is safe to call
++ * tcmu_blocks_release() before unmap_mapping_range() which
++ * drops the refcount of any pages it unmaps and thus releases
++ * them.
++ */
++ pages_freed = tcmu_blocks_release(udev, start, end - 1);
++
+ /* Here will truncate the data area from off */
+ off = udev->data_off + (loff_t)start * udev->data_blk_size;
+ unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
+
+- /* Release the block pages */
+- pages_freed = tcmu_blocks_release(udev, start, end - 1);
+ mutex_unlock(&udev->cmdr_lock);
+
+ total_pages_freed += pages_freed;
+diff --git a/drivers/tee/amdtee/amdtee_if.h b/drivers/tee/amdtee/amdtee_if.h
+index ff48c3e473750..e2014e21530ac 100644
+--- a/drivers/tee/amdtee/amdtee_if.h
++++ b/drivers/tee/amdtee/amdtee_if.h
+@@ -118,16 +118,18 @@ struct tee_cmd_unmap_shared_mem {
+
+ /**
+ * struct tee_cmd_load_ta - load Trusted Application (TA) binary into TEE
+- * @low_addr: [in] bits [31:0] of the physical address of the TA binary
+- * @hi_addr: [in] bits [63:32] of the physical address of the TA binary
+- * @size: [in] size of TA binary in bytes
+- * @ta_handle: [out] return handle of the loaded TA
++ * @low_addr: [in] bits [31:0] of the physical address of the TA binary
++ * @hi_addr: [in] bits [63:32] of the physical address of the TA binary
++ * @size: [in] size of TA binary in bytes
++ * @ta_handle: [out] return handle of the loaded TA
++ * @return_origin: [out] origin of return code after TEE processing
+ */
+ struct tee_cmd_load_ta {
+ u32 low_addr;
+ u32 hi_addr;
+ u32 size;
+ u32 ta_handle;
++ u32 return_origin;
+ };
+
+ /**
+diff --git a/drivers/tee/amdtee/call.c b/drivers/tee/amdtee/call.c
+index 07f36ac834c88..63d428423e904 100644
+--- a/drivers/tee/amdtee/call.c
++++ b/drivers/tee/amdtee/call.c
+@@ -423,19 +423,23 @@ int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg)
+ if (ret) {
+ arg->ret_origin = TEEC_ORIGIN_COMMS;
+ arg->ret = TEEC_ERROR_COMMUNICATION;
+- } else if (arg->ret == TEEC_SUCCESS) {
+- ret = get_ta_refcount(load_cmd.ta_handle);
+- if (!ret) {
+- arg->ret_origin = TEEC_ORIGIN_COMMS;
+- arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+-
+- /* Unload the TA on error */
+- unload_cmd.ta_handle = load_cmd.ta_handle;
+- psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA,
+- (void *)&unload_cmd,
+- sizeof(unload_cmd), &ret);
+- } else {
+- set_session_id(load_cmd.ta_handle, 0, &arg->session);
++ } else {
++ arg->ret_origin = load_cmd.return_origin;
++
++ if (arg->ret == TEEC_SUCCESS) {
++ ret = get_ta_refcount(load_cmd.ta_handle);
++ if (!ret) {
++ arg->ret_origin = TEEC_ORIGIN_COMMS;
++ arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
++
++ /* Unload the TA on error */
++ unload_cmd.ta_handle = load_cmd.ta_handle;
++ psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA,
++ (void *)&unload_cmd,
++ sizeof(unload_cmd), &ret);
++ } else {
++ set_session_id(load_cmd.ta_handle, 0, &arg->session);
++ }
+ }
+ }
+ mutex_unlock(&ta_refcount_mutex);
+diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c
+index da6b88e80dc07..372d64756ed64 100644
+--- a/drivers/tee/amdtee/core.c
++++ b/drivers/tee/amdtee/core.c
+@@ -203,9 +203,8 @@ static int copy_ta_binary(struct tee_context *ctx, void *ptr, void **ta,
+
+ *ta_size = roundup(fw->size, PAGE_SIZE);
+ *ta = (void *)__get_free_pages(GFP_KERNEL, get_order(*ta_size));
+- if (IS_ERR(*ta)) {
+- pr_err("%s: get_free_pages failed 0x%llx\n", __func__,
+- (u64)*ta);
++ if (!*ta) {
++ pr_err("%s: get_free_pages failed\n", __func__);
+ rc = -ENOMEM;
+ goto rel_fw;
+ }
+@@ -268,35 +267,34 @@ int amdtee_open_session(struct tee_context *ctx,
+ goto out;
+ }
+
++ /* Open session with loaded TA */
++ handle_open_session(arg, &session_info, param);
++ if (arg->ret != TEEC_SUCCESS) {
++ pr_err("open_session failed %d\n", arg->ret);
++ handle_unload_ta(ta_handle);
++ kref_put(&sess->refcount, destroy_session);
++ goto out;
++ }
++
+ /* Find an empty session index for the given TA */
+ spin_lock(&sess->lock);
+ i = find_first_zero_bit(sess->sess_mask, TEE_NUM_SESSIONS);
+- if (i < TEE_NUM_SESSIONS)
++ if (i < TEE_NUM_SESSIONS) {
++ sess->session_info[i] = session_info;
++ set_session_id(ta_handle, i, &arg->session);
+ set_bit(i, sess->sess_mask);
++ }
+ spin_unlock(&sess->lock);
+
+ if (i >= TEE_NUM_SESSIONS) {
+ pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS);
++ handle_close_session(ta_handle, session_info);
+ handle_unload_ta(ta_handle);
+ kref_put(&sess->refcount, destroy_session);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+- /* Open session with loaded TA */
+- handle_open_session(arg, &session_info, param);
+- if (arg->ret != TEEC_SUCCESS) {
+- pr_err("open_session failed %d\n", arg->ret);
+- spin_lock(&sess->lock);
+- clear_bit(i, sess->sess_mask);
+- spin_unlock(&sess->lock);
+- handle_unload_ta(ta_handle);
+- kref_put(&sess->refcount, destroy_session);
+- goto out;
+- }
+-
+- sess->session_info[i] = session_info;
+- set_session_id(ta_handle, i, &arg->session);
+ out:
+ free_pages((u64)ta, get_order(ta_size));
+ return rc;
+diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
+index 5363ebebfc357..50c0d839fe751 100644
+--- a/drivers/tee/optee/core.c
++++ b/drivers/tee/optee/core.c
+@@ -588,6 +588,7 @@ static int optee_remove(struct platform_device *pdev)
+ /* Unregister OP-TEE specific client devices on TEE bus */
+ optee_unregister_devices();
+
++ teedev_close_context(optee->ctx);
+ /*
+ * Ask OP-TEE to free all cached shared memory objects to decrease
+ * reference counters and also avoid wild pointers in secure world
+@@ -633,6 +634,7 @@ static int optee_probe(struct platform_device *pdev)
+ struct optee *optee = NULL;
+ void *memremaped_shm = NULL;
+ struct tee_device *teedev;
++ struct tee_context *ctx;
+ u32 sec_caps;
+ int rc;
+
+@@ -719,6 +721,12 @@ static int optee_probe(struct platform_device *pdev)
+ optee_supp_init(&optee->supp);
+ optee->memremaped_shm = memremaped_shm;
+ optee->pool = pool;
++ ctx = teedev_open(optee->teedev);
++ if (IS_ERR(ctx)) {
++ rc = PTR_ERR(ctx);
++ goto err;
++ }
++ optee->ctx = ctx;
+
+ /*
+ * Ensure that there are no pre-existing shm objects before enabling
+diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c
+index 128a2d2a50a16..a74d82e230e36 100644
+--- a/drivers/tee/optee/device.c
++++ b/drivers/tee/optee/device.c
+@@ -80,7 +80,7 @@ static int optee_register_device(const uuid_t *device_uuid)
+ rc = device_register(&optee_device->dev);
+ if (rc) {
+ pr_err("device registration failed, err: %d\n", rc);
+- kfree(optee_device);
++ put_device(&optee_device->dev);
+ }
+
+ return rc;
+diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
+index f6bb4a763ba94..ea09533e30cde 100644
+--- a/drivers/tee/optee/optee_private.h
++++ b/drivers/tee/optee/optee_private.h
+@@ -70,6 +70,7 @@ struct optee_supp {
+ * struct optee - main service struct
+ * @supp_teedev: supplicant device
+ * @teedev: client device
++ * @ctx: driver internal TEE context
+ * @invoke_fn: function to issue smc or hvc
+ * @call_queue: queue of threads waiting to call @invoke_fn
+ * @wait_queue: queue of threads from secure world waiting for a
+@@ -87,6 +88,7 @@ struct optee {
+ struct tee_device *supp_teedev;
+ struct tee_device *teedev;
+ optee_invoke_fn *invoke_fn;
++ struct tee_context *ctx;
+ struct optee_call_queue call_queue;
+ struct optee_wait_queue wait_queue;
+ struct optee_supp supp;
+diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c
+index efbaff7ad7e59..456833d820078 100644
+--- a/drivers/tee/optee/rpc.c
++++ b/drivers/tee/optee/rpc.c
+@@ -285,6 +285,7 @@ static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz)
+ }
+
+ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
++ struct optee *optee,
+ struct optee_msg_arg *arg,
+ struct optee_call_ctx *call_ctx)
+ {
+@@ -314,7 +315,8 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
+ shm = cmd_alloc_suppl(ctx, sz);
+ break;
+ case OPTEE_RPC_SHM_TYPE_KERNEL:
+- shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV);
++ shm = tee_shm_alloc(optee->ctx, sz,
++ TEE_SHM_MAPPED | TEE_SHM_PRIV);
+ break;
+ default:
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+@@ -471,7 +473,7 @@ static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee,
+ break;
+ case OPTEE_RPC_CMD_SHM_ALLOC:
+ free_pages_list(call_ctx);
+- handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx);
++ handle_rpc_func_cmd_shm_alloc(ctx, optee, arg, call_ctx);
+ break;
+ case OPTEE_RPC_CMD_SHM_FREE:
+ handle_rpc_func_cmd_shm_free(ctx, arg);
+@@ -502,7 +504,7 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param,
+
+ switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) {
+ case OPTEE_SMC_RPC_FUNC_ALLOC:
+- shm = tee_shm_alloc(ctx, param->a1,
++ shm = tee_shm_alloc(optee->ctx, param->a1,
+ TEE_SHM_MAPPED | TEE_SHM_PRIV);
+ if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) {
+ reg_pair_from_64(&param->a1, &param->a2, pa);
+diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c
+index d167039af519e..1aa843f2ecc7c 100644
+--- a/drivers/tee/optee/shm_pool.c
++++ b/drivers/tee/optee/shm_pool.c
+@@ -41,10 +41,8 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm,
+ goto err;
+ }
+
+- for (i = 0; i < nr_pages; i++) {
+- pages[i] = page;
+- page++;
+- }
++ for (i = 0; i < nr_pages; i++)
++ pages[i] = page + i;
+
+ shm->flags |= TEE_SHM_REGISTER;
+ rc = optee_shm_register(shm->ctx, shm, pages, nr_pages,
+diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
+index 2b37bc408fc3d..a44e5b53e7a91 100644
+--- a/drivers/tee/tee_core.c
++++ b/drivers/tee/tee_core.c
+@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(driver_lock);
+ static struct class *tee_class;
+ static dev_t tee_devt;
+
+-static struct tee_context *teedev_open(struct tee_device *teedev)
++struct tee_context *teedev_open(struct tee_device *teedev)
+ {
+ int rc;
+ struct tee_context *ctx;
+@@ -70,6 +70,7 @@ err:
+ return ERR_PTR(rc);
+
+ }
++EXPORT_SYMBOL_GPL(teedev_open);
+
+ void teedev_ctx_get(struct tee_context *ctx)
+ {
+@@ -96,11 +97,14 @@ void teedev_ctx_put(struct tee_context *ctx)
+ kref_put(&ctx->refcount, teedev_ctx_release);
+ }
+
+-static void teedev_close_context(struct tee_context *ctx)
++void teedev_close_context(struct tee_context *ctx)
+ {
+- tee_device_put(ctx->teedev);
++ struct tee_device *teedev = ctx->teedev;
++
+ teedev_ctx_put(ctx);
++ tee_device_put(teedev);
+ }
++EXPORT_SYMBOL_GPL(teedev_close_context);
+
+ static int tee_open(struct inode *inode, struct file *filp)
+ {
+@@ -330,6 +334,9 @@ tee_ioctl_shm_register(struct tee_context *ctx,
+ if (data.flags)
+ return -EINVAL;
+
++ if (!access_ok((void __user *)(unsigned long)data.addr, data.length))
++ return -EFAULT;
++
+ shm = tee_shm_register(ctx, data.addr, data.length,
+ TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED);
+ if (IS_ERR(shm))
+diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
+index 8a9384a64f3e2..6fb4400333fb4 100644
+--- a/drivers/tee/tee_shm.c
++++ b/drivers/tee/tee_shm.c
+@@ -1,14 +1,15 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+- * Copyright (c) 2015-2016, Linaro Limited
++ * Copyright (c) 2015-2017, 2019-2021 Linaro Limited
+ */
++#include <linux/anon_inodes.h>
+ #include <linux/device.h>
+-#include <linux/dma-buf.h>
+-#include <linux/fdtable.h>
+ #include <linux/idr.h>
++#include <linux/mm.h>
+ #include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/tee_drv.h>
++#include <linux/uaccess.h>
+ #include <linux/uio.h>
+ #include "tee_private.h"
+
+@@ -28,16 +29,8 @@ static void release_registered_pages(struct tee_shm *shm)
+ }
+ }
+
+-static void tee_shm_release(struct tee_shm *shm)
++static void tee_shm_release(struct tee_device *teedev, struct tee_shm *shm)
+ {
+- struct tee_device *teedev = shm->ctx->teedev;
+-
+- if (shm->flags & TEE_SHM_DMA_BUF) {
+- mutex_lock(&teedev->mutex);
+- idr_remove(&teedev->idr, shm->id);
+- mutex_unlock(&teedev->mutex);
+- }
+-
+ if (shm->flags & TEE_SHM_POOL) {
+ struct tee_shm_pool_mgr *poolm;
+
+@@ -64,45 +57,6 @@ static void tee_shm_release(struct tee_shm *shm)
+ tee_device_put(teedev);
+ }
+
+-static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment
+- *attach, enum dma_data_direction dir)
+-{
+- return NULL;
+-}
+-
+-static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach,
+- struct sg_table *table,
+- enum dma_data_direction dir)
+-{
+-}
+-
+-static void tee_shm_op_release(struct dma_buf *dmabuf)
+-{
+- struct tee_shm *shm = dmabuf->priv;
+-
+- tee_shm_release(shm);
+-}
+-
+-static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+-{
+- struct tee_shm *shm = dmabuf->priv;
+- size_t size = vma->vm_end - vma->vm_start;
+-
+- /* Refuse sharing shared memory provided by application */
+- if (shm->flags & TEE_SHM_USER_MAPPED)
+- return -EINVAL;
+-
+- return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT,
+- size, vma->vm_page_prot);
+-}
+-
+-static const struct dma_buf_ops tee_shm_dma_buf_ops = {
+- .map_dma_buf = tee_shm_op_map_dma_buf,
+- .unmap_dma_buf = tee_shm_op_unmap_dma_buf,
+- .release = tee_shm_op_release,
+- .mmap = tee_shm_op_mmap,
+-};
+-
+ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags)
+ {
+ struct tee_device *teedev = ctx->teedev;
+@@ -137,6 +91,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags)
+ goto err_dev_put;
+ }
+
++ refcount_set(&shm->refcount, 1);
+ shm->flags = flags | TEE_SHM_POOL;
+ shm->ctx = ctx;
+ if (flags & TEE_SHM_DMA_BUF)
+@@ -150,10 +105,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags)
+ goto err_kfree;
+ }
+
+-
+ if (flags & TEE_SHM_DMA_BUF) {
+- DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+-
+ mutex_lock(&teedev->mutex);
+ shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL);
+ mutex_unlock(&teedev->mutex);
+@@ -161,28 +113,11 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags)
+ ret = ERR_PTR(shm->id);
+ goto err_pool_free;
+ }
+-
+- exp_info.ops = &tee_shm_dma_buf_ops;
+- exp_info.size = shm->size;
+- exp_info.flags = O_RDWR;
+- exp_info.priv = shm;
+-
+- shm->dmabuf = dma_buf_export(&exp_info);
+- if (IS_ERR(shm->dmabuf)) {
+- ret = ERR_CAST(shm->dmabuf);
+- goto err_rem;
+- }
+ }
+
+ teedev_ctx_get(ctx);
+
+ return shm;
+-err_rem:
+- if (flags & TEE_SHM_DMA_BUF) {
+- mutex_lock(&teedev->mutex);
+- idr_remove(&teedev->idr, shm->id);
+- mutex_unlock(&teedev->mutex);
+- }
+ err_pool_free:
+ poolm->ops->free(poolm, shm);
+ err_kfree:
+@@ -243,6 +178,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
+ goto err;
+ }
+
++ refcount_set(&shm->refcount, 1);
+ shm->flags = flags | TEE_SHM_REGISTER;
+ shm->ctx = ctx;
+ shm->id = -1;
+@@ -303,22 +239,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
+ goto err;
+ }
+
+- if (flags & TEE_SHM_DMA_BUF) {
+- DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+-
+- exp_info.ops = &tee_shm_dma_buf_ops;
+- exp_info.size = shm->size;
+- exp_info.flags = O_RDWR;
+- exp_info.priv = shm;
+-
+- shm->dmabuf = dma_buf_export(&exp_info);
+- if (IS_ERR(shm->dmabuf)) {
+- ret = ERR_CAST(shm->dmabuf);
+- teedev->desc->ops->shm_unregister(ctx, shm);
+- goto err;
+- }
+- }
+-
+ return shm;
+ err:
+ if (shm) {
+@@ -336,6 +256,35 @@ err:
+ }
+ EXPORT_SYMBOL_GPL(tee_shm_register);
+
++static int tee_shm_fop_release(struct inode *inode, struct file *filp)
++{
++ tee_shm_put(filp->private_data);
++ return 0;
++}
++
++static int tee_shm_fop_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++ struct tee_shm *shm = filp->private_data;
++ size_t size = vma->vm_end - vma->vm_start;
++
++ /* Refuse sharing shared memory provided by application */
++ if (shm->flags & TEE_SHM_USER_MAPPED)
++ return -EINVAL;
++
++ /* check for overflowing the buffer's size */
++ if (vma->vm_pgoff + vma_pages(vma) > shm->size >> PAGE_SHIFT)
++ return -EINVAL;
++
++ return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT,
++ size, vma->vm_page_prot);
++}
++
++static const struct file_operations tee_shm_fops = {
++ .owner = THIS_MODULE,
++ .release = tee_shm_fop_release,
++ .mmap = tee_shm_fop_mmap,
++};
++
+ /**
+ * tee_shm_get_fd() - Increase reference count and return file descriptor
+ * @shm: Shared memory handle
+@@ -348,10 +297,11 @@ int tee_shm_get_fd(struct tee_shm *shm)
+ if (!(shm->flags & TEE_SHM_DMA_BUF))
+ return -EINVAL;
+
+- get_dma_buf(shm->dmabuf);
+- fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
++ /* matched by tee_shm_put() in tee_shm_op_release() */
++ refcount_inc(&shm->refcount);
++ fd = anon_inode_getfd("tee_shm", &tee_shm_fops, shm, O_RDWR);
+ if (fd < 0)
+- dma_buf_put(shm->dmabuf);
++ tee_shm_put(shm);
+ return fd;
+ }
+
+@@ -361,17 +311,7 @@ int tee_shm_get_fd(struct tee_shm *shm)
+ */
+ void tee_shm_free(struct tee_shm *shm)
+ {
+- /*
+- * dma_buf_put() decreases the dmabuf reference counter and will
+- * call tee_shm_release() when the last reference is gone.
+- *
+- * In the case of driver private memory we call tee_shm_release
+- * directly instead as it doesn't have a reference counter.
+- */
+- if (shm->flags & TEE_SHM_DMA_BUF)
+- dma_buf_put(shm->dmabuf);
+- else
+- tee_shm_release(shm);
++ tee_shm_put(shm);
+ }
+ EXPORT_SYMBOL_GPL(tee_shm_free);
+
+@@ -478,10 +418,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id)
+ teedev = ctx->teedev;
+ mutex_lock(&teedev->mutex);
+ shm = idr_find(&teedev->idr, id);
++ /*
++ * If the tee_shm was found in the IDR it must have a refcount
++ * larger than 0 due to the guarantee in tee_shm_put() below. So
++ * it's safe to use refcount_inc().
++ */
+ if (!shm || shm->ctx != ctx)
+ shm = ERR_PTR(-EINVAL);
+- else if (shm->flags & TEE_SHM_DMA_BUF)
+- get_dma_buf(shm->dmabuf);
++ else
++ refcount_inc(&shm->refcount);
+ mutex_unlock(&teedev->mutex);
+ return shm;
+ }
+@@ -493,7 +438,24 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id);
+ */
+ void tee_shm_put(struct tee_shm *shm)
+ {
+- if (shm->flags & TEE_SHM_DMA_BUF)
+- dma_buf_put(shm->dmabuf);
++ struct tee_device *teedev = shm->ctx->teedev;
++ bool do_release = false;
++
++ mutex_lock(&teedev->mutex);
++ if (refcount_dec_and_test(&shm->refcount)) {
++ /*
++ * refcount has reached 0, we must now remove it from the
++ * IDR before releasing the mutex. This will guarantee that
++ * the refcount_inc() in tee_shm_get_from_id() never starts
++ * from 0.
++ */
++ if (shm->flags & TEE_SHM_DMA_BUF)
++ idr_remove(&teedev->idr, shm->id);
++ do_release = true;
++ }
++ mutex_unlock(&teedev->mutex);
++
++ if (do_release)
++ tee_shm_release(teedev, shm);
+ }
+ EXPORT_SYMBOL_GPL(tee_shm_put);
+diff --git a/drivers/thermal/broadcom/bcm2711_thermal.c b/drivers/thermal/broadcom/bcm2711_thermal.c
+index 1ec57d9ecf539..e9bef5c3414b6 100644
+--- a/drivers/thermal/broadcom/bcm2711_thermal.c
++++ b/drivers/thermal/broadcom/bcm2711_thermal.c
+@@ -38,7 +38,6 @@ static int bcm2711_get_temp(void *data, int *temp)
+ int offset = thermal_zone_get_offset(priv->thermal);
+ u32 val;
+ int ret;
+- long t;
+
+ ret = regmap_read(priv->regmap, AVS_RO_TEMP_STATUS, &val);
+ if (ret)
+@@ -50,9 +49,7 @@ static int bcm2711_get_temp(void *data, int *temp)
+ val &= AVS_RO_TEMP_STATUS_DATA_MSK;
+
+ /* Convert a HW code to a temperature reading (millidegree celsius) */
+- t = slope * val + offset;
+-
+- *temp = t < 0 ? 0 : t;
++ *temp = slope * val + offset;
+
+ return 0;
+ }
+diff --git a/drivers/thermal/broadcom/sr-thermal.c b/drivers/thermal/broadcom/sr-thermal.c
+index 475ce29007713..85ab9edd580cc 100644
+--- a/drivers/thermal/broadcom/sr-thermal.c
++++ b/drivers/thermal/broadcom/sr-thermal.c
+@@ -60,6 +60,9 @@ static int sr_thermal_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -ENOENT;
++
+ sr_thermal->regs = (void __iomem *)devm_memremap(&pdev->dev, res->start,
+ resource_size(res),
+ MEMREMAP_WB);
+diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
+index 43b1ae8a77893..12a60415af955 100644
+--- a/drivers/thermal/cpufreq_cooling.c
++++ b/drivers/thermal/cpufreq_cooling.c
+@@ -525,17 +525,17 @@ __cpufreq_cooling_register(struct device_node *np,
+ struct thermal_cooling_device_ops *cooling_ops;
+ char *name;
+
++ if (IS_ERR_OR_NULL(policy)) {
++ pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy);
++ return ERR_PTR(-EINVAL);
++ }
++
+ dev = get_cpu_device(policy->cpu);
+ if (unlikely(!dev)) {
+ pr_warn("No cpu device for cpu %d\n", policy->cpu);
+ return ERR_PTR(-ENODEV);
+ }
+
+- if (IS_ERR_OR_NULL(policy)) {
+- pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy);
+- return ERR_PTR(-EINVAL);
+- }
+-
+ i = cpufreq_table_count_valid_entries(policy);
+ if (!i) {
+ pr_debug("%s: CPUFreq table not found or has no valid entries\n",
+diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
+index 4310cb342a9fb..d38a80adec733 100644
+--- a/drivers/thermal/devfreq_cooling.c
++++ b/drivers/thermal/devfreq_cooling.c
+@@ -358,21 +358,28 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+ struct thermal_cooling_device *cdev;
+ struct device *dev = df->dev.parent;
+ struct devfreq_cooling_device *dfc;
++ struct thermal_cooling_device_ops *ops;
+ char *name;
+ int err, num_opps;
+
+- dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
+- if (!dfc)
++ ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL);
++ if (!ops)
+ return ERR_PTR(-ENOMEM);
+
++ dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
++ if (!dfc) {
++ err = -ENOMEM;
++ goto free_ops;
++ }
++
+ dfc->devfreq = df;
+
+ dfc->em_pd = em_pd_get(dev);
+ if (dfc->em_pd) {
+- devfreq_cooling_ops.get_requested_power =
++ ops->get_requested_power =
+ devfreq_cooling_get_requested_power;
+- devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
+- devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
++ ops->state2power = devfreq_cooling_state2power;
++ ops->power2state = devfreq_cooling_power2state;
+
+ dfc->power_ops = dfc_power;
+
+@@ -407,8 +414,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+ if (!name)
+ goto remove_qos_req;
+
+- cdev = thermal_of_cooling_device_register(np, name, dfc,
+- &devfreq_cooling_ops);
++ cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
+ kfree(name);
+
+ if (IS_ERR(cdev)) {
+@@ -429,6 +435,8 @@ free_table:
+ kfree(dfc->freq_table);
+ free_dfc:
+ kfree(dfc);
++free_ops:
++ kfree(ops);
+
+ return ERR_PTR(err);
+ }
+@@ -510,11 +518,13 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
+ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
+ {
+ struct devfreq_cooling_device *dfc;
++ const struct thermal_cooling_device_ops *ops;
+ struct device *dev;
+
+ if (IS_ERR_OR_NULL(cdev))
+ return;
+
++ ops = cdev->ops;
+ dfc = cdev->devdata;
+ dev = dfc->devfreq->dev.parent;
+
+@@ -525,5 +535,6 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
+
+ kfree(dfc->freq_table);
+ kfree(dfc);
++ kfree(ops);
+ }
+ EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
+diff --git a/drivers/thermal/gov_fair_share.c b/drivers/thermal/gov_fair_share.c
+index 1e5abf4822bed..a4c30797b5343 100644
+--- a/drivers/thermal/gov_fair_share.c
++++ b/drivers/thermal/gov_fair_share.c
+@@ -25,10 +25,10 @@ static int get_trip_level(struct thermal_zone_device *tz)
+ int trip_temp;
+ enum thermal_trip_type trip_type;
+
+- if (tz->trips == 0 || !tz->ops->get_trip_temp)
++ if (tz->num_trips == 0 || !tz->ops->get_trip_temp)
+ return 0;
+
+- for (count = 0; count < tz->trips; count++) {
++ for (count = 0; count < tz->num_trips; count++) {
+ tz->ops->get_trip_temp(tz, count, &trip_temp);
+ if (tz->temperature < trip_temp)
+ break;
+@@ -49,11 +49,7 @@ static int get_trip_level(struct thermal_zone_device *tz)
+ static long get_target_state(struct thermal_zone_device *tz,
+ struct thermal_cooling_device *cdev, int percentage, int level)
+ {
+- unsigned long max_state;
+-
+- cdev->ops->get_max_state(cdev, &max_state);
+-
+- return (long)(percentage * level * max_state) / (100 * tz->trips);
++ return (long)(percentage * level * cdev->max_state) / (100 * tz->num_trips);
+ }
+
+ /**
+diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
+index 13e375751d229..1d50524709672 100644
+--- a/drivers/thermal/gov_power_allocator.c
++++ b/drivers/thermal/gov_power_allocator.c
+@@ -527,7 +527,7 @@ static void get_governor_trips(struct thermal_zone_device *tz,
+ last_active = INVALID_TRIP;
+ last_passive = INVALID_TRIP;
+
+- for (i = 0; i < tz->trips; i++) {
++ for (i = 0; i < tz->num_trips; i++) {
+ enum thermal_trip_type type;
+ int ret;
+
+@@ -668,7 +668,7 @@ static int power_allocator_bind(struct thermal_zone_device *tz)
+
+ get_governor_trips(tz, params);
+
+- if (tz->trips > 0) {
++ if (tz->num_trips > 0) {
+ ret = tz->ops->get_trip_temp(tz,
+ params->trip_max_desired_temperature,
+ &control_temp);
+diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
+index 9a21ac0ceb112..29ff1e66dd6e9 100644
+--- a/drivers/thermal/hisi_thermal.c
++++ b/drivers/thermal/hisi_thermal.c
+@@ -435,10 +435,6 @@ static int hi3660_thermal_probe(struct hisi_thermal_data *data)
+ data->sensor[0].irq_name = "tsensor_a73";
+ data->sensor[0].data = data;
+
+- data->sensor[1].id = HI3660_LITTLE_SENSOR;
+- data->sensor[1].irq_name = "tsensor_a53";
+- data->sensor[1].data = data;
+-
+ return 0;
+ }
+
+diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
+index 7442e013738f8..c5cd873c6e016 100644
+--- a/drivers/thermal/imx8mm_thermal.c
++++ b/drivers/thermal/imx8mm_thermal.c
+@@ -21,6 +21,7 @@
+ #define TPS 0x4
+ #define TRITSR 0x20 /* TMU immediate temp */
+
++#define TER_ADC_PD BIT(30)
+ #define TER_EN BIT(31)
+ #define TRITSR_TEMP0_VAL_MASK 0xff
+ #define TRITSR_TEMP1_VAL_MASK 0xff0000
+@@ -64,8 +65,14 @@ static int imx8mm_tmu_get_temp(void *data, int *temp)
+ u32 val;
+
+ val = readl_relaxed(tmu->base + TRITSR) & TRITSR_TEMP0_VAL_MASK;
++
++ /*
++ * Do not validate against the V bit (bit 31) due to errata
++ * ERR051272: TMU: Bit 31 of registers TMU_TSCR/TMU_TRITSR/TMU_TRATSR invalid
++ */
++
+ *temp = val * 1000;
+- if (*temp < VER1_TEMP_LOW_LIMIT)
++ if (*temp < VER1_TEMP_LOW_LIMIT || *temp > VER2_TEMP_HIGH_LIMIT)
+ return -EAGAIN;
+
+ return 0;
+@@ -113,6 +120,8 @@ static void imx8mm_tmu_enable(struct imx8mm_tmu *tmu, bool enable)
+
+ val = readl_relaxed(tmu->base + TER);
+ val = enable ? (val | TER_EN) : (val & ~TER_EN);
++ if (tmu->socdata->version == TMU_VER2)
++ val = enable ? (val & ~TER_ADC_PD) : (val | TER_ADC_PD);
+ writel_relaxed(val, tmu->base + TER);
+ }
+
+diff --git a/drivers/thermal/imx_sc_thermal.c b/drivers/thermal/imx_sc_thermal.c
+index 8d76dbfde6a9f..331a241eb0ef3 100644
+--- a/drivers/thermal/imx_sc_thermal.c
++++ b/drivers/thermal/imx_sc_thermal.c
+@@ -94,8 +94,8 @@ static int imx_sc_thermal_probe(struct platform_device *pdev)
+ sensor = devm_kzalloc(&pdev->dev, sizeof(*sensor), GFP_KERNEL);
+ if (!sensor) {
+ of_node_put(child);
+- of_node_put(sensor_np);
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto put_node;
+ }
+
+ ret = thermal_zone_of_get_sensor_id(child,
+@@ -124,7 +124,9 @@ static int imx_sc_thermal_probe(struct platform_device *pdev)
+ dev_warn(&pdev->dev, "failed to add hwmon sysfs attributes\n");
+ }
+
++put_node:
+ of_node_put(sensor_np);
++ of_node_put(np);
+
+ return ret;
+ }
+diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
+index 2c7473d86a59b..16663373b6829 100644
+--- a/drivers/thermal/imx_thermal.c
++++ b/drivers/thermal/imx_thermal.c
+@@ -15,6 +15,7 @@
+ #include <linux/regmap.h>
+ #include <linux/thermal.h>
+ #include <linux/nvmem-consumer.h>
++#include <linux/pm_runtime.h>
+
+ #define REG_SET 0x4
+ #define REG_CLR 0x8
+@@ -194,6 +195,7 @@ static struct thermal_soc_data thermal_imx7d_data = {
+ };
+
+ struct imx_thermal_data {
++ struct device *dev;
+ struct cpufreq_policy *policy;
+ struct thermal_zone_device *tz;
+ struct thermal_cooling_device *cdev;
+@@ -252,44 +254,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
+ const struct thermal_soc_data *soc_data = data->socdata;
+ struct regmap *map = data->tempmon;
+ unsigned int n_meas;
+- bool wait, run_measurement;
+ u32 val;
++ int ret;
+
+- run_measurement = !data->irq_enabled;
+- if (!run_measurement) {
+- /* Check if a measurement is currently in progress */
+- regmap_read(map, soc_data->temp_data, &val);
+- wait = !(val & soc_data->temp_valid_mask);
+- } else {
+- /*
+- * Every time we measure the temperature, we will power on the
+- * temperature sensor, enable measurements, take a reading,
+- * disable measurements, power off the temperature sensor.
+- */
+- regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
+- soc_data->power_down_mask);
+- regmap_write(map, soc_data->sensor_ctrl + REG_SET,
+- soc_data->measure_temp_mask);
+-
+- wait = true;
+- }
+-
+- /*
+- * According to the temp sensor designers, it may require up to ~17us
+- * to complete a measurement.
+- */
+- if (wait)
+- usleep_range(20, 50);
++ ret = pm_runtime_resume_and_get(data->dev);
++ if (ret < 0)
++ return ret;
+
+ regmap_read(map, soc_data->temp_data, &val);
+
+- if (run_measurement) {
+- regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
+- soc_data->measure_temp_mask);
+- regmap_write(map, soc_data->sensor_ctrl + REG_SET,
+- soc_data->power_down_mask);
+- }
+-
+ if ((val & soc_data->temp_valid_mask) == 0) {
+ dev_dbg(&tz->device, "temp measurement never finished\n");
+ return -EAGAIN;
+@@ -328,6 +301,8 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
+ enable_irq(data->irq);
+ }
+
++ pm_runtime_put(data->dev);
++
+ return 0;
+ }
+
+@@ -335,24 +310,16 @@ static int imx_change_mode(struct thermal_zone_device *tz,
+ enum thermal_device_mode mode)
+ {
+ struct imx_thermal_data *data = tz->devdata;
+- struct regmap *map = data->tempmon;
+- const struct thermal_soc_data *soc_data = data->socdata;
+
+ if (mode == THERMAL_DEVICE_ENABLED) {
+- regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
+- soc_data->power_down_mask);
+- regmap_write(map, soc_data->sensor_ctrl + REG_SET,
+- soc_data->measure_temp_mask);
++ pm_runtime_get(data->dev);
+
+ if (!data->irq_enabled) {
+ data->irq_enabled = true;
+ enable_irq(data->irq);
+ }
+ } else {
+- regmap_write(map, soc_data->sensor_ctrl + REG_CLR,
+- soc_data->measure_temp_mask);
+- regmap_write(map, soc_data->sensor_ctrl + REG_SET,
+- soc_data->power_down_mask);
++ pm_runtime_put(data->dev);
+
+ if (data->irq_enabled) {
+ disable_irq(data->irq);
+@@ -393,6 +360,11 @@ static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip,
+ int temp)
+ {
+ struct imx_thermal_data *data = tz->devdata;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(data->dev);
++ if (ret < 0)
++ return ret;
+
+ /* do not allow changing critical threshold */
+ if (trip == IMX_TRIP_CRITICAL)
+@@ -406,6 +378,8 @@ static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip,
+
+ imx_set_alarm_temp(data, temp);
+
++ pm_runtime_put(data->dev);
++
+ return 0;
+ }
+
+@@ -681,6 +655,8 @@ static int imx_thermal_probe(struct platform_device *pdev)
+ if (!data)
+ return -ENOMEM;
+
++ data->dev = &pdev->dev;
++
+ map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "fsl,tempmon");
+ if (IS_ERR(map)) {
+ ret = PTR_ERR(map);
+@@ -800,6 +776,16 @@ static int imx_thermal_probe(struct platform_device *pdev)
+ data->socdata->power_down_mask);
+ regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
+ data->socdata->measure_temp_mask);
++ /* After power up, we need a delay before first access can be done. */
++ usleep_range(20, 50);
++
++ /* the core was configured and enabled just before */
++ pm_runtime_set_active(&pdev->dev);
++ pm_runtime_enable(data->dev);
++
++ ret = pm_runtime_resume_and_get(data->dev);
++ if (ret < 0)
++ goto disable_runtime_pm;
+
+ data->irq_enabled = true;
+ ret = thermal_zone_device_enable(data->tz);
+@@ -814,10 +800,15 @@ static int imx_thermal_probe(struct platform_device *pdev)
+ goto thermal_zone_unregister;
+ }
+
++ pm_runtime_put(data->dev);
++
+ return 0;
+
+ thermal_zone_unregister:
+ thermal_zone_device_unregister(data->tz);
++disable_runtime_pm:
++ pm_runtime_put_noidle(data->dev);
++ pm_runtime_disable(data->dev);
+ clk_disable:
+ clk_disable_unprepare(data->thermal_clk);
+ legacy_cleanup:
+@@ -829,13 +820,9 @@ legacy_cleanup:
+ static int imx_thermal_remove(struct platform_device *pdev)
+ {
+ struct imx_thermal_data *data = platform_get_drvdata(pdev);
+- struct regmap *map = data->tempmon;
+
+- /* Disable measurements */
+- regmap_write(map, data->socdata->sensor_ctrl + REG_SET,
+- data->socdata->power_down_mask);
+- if (!IS_ERR(data->thermal_clk))
+- clk_disable_unprepare(data->thermal_clk);
++ pm_runtime_put_noidle(data->dev);
++ pm_runtime_disable(data->dev);
+
+ thermal_zone_device_unregister(data->tz);
+ imx_thermal_unregister_legacy_cooling(data);
+@@ -858,29 +845,79 @@ static int __maybe_unused imx_thermal_suspend(struct device *dev)
+ ret = thermal_zone_device_disable(data->tz);
+ if (ret)
+ return ret;
++
++ return pm_runtime_force_suspend(data->dev);
++}
++
++static int __maybe_unused imx_thermal_resume(struct device *dev)
++{
++ struct imx_thermal_data *data = dev_get_drvdata(dev);
++ int ret;
++
++ ret = pm_runtime_force_resume(data->dev);
++ if (ret)
++ return ret;
++ /* Enabled thermal sensor after resume */
++ return thermal_zone_device_enable(data->tz);
++}
++
++static int __maybe_unused imx_thermal_runtime_suspend(struct device *dev)
++{
++ struct imx_thermal_data *data = dev_get_drvdata(dev);
++ const struct thermal_soc_data *socdata = data->socdata;
++ struct regmap *map = data->tempmon;
++ int ret;
++
++ ret = regmap_write(map, socdata->sensor_ctrl + REG_CLR,
++ socdata->measure_temp_mask);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(map, socdata->sensor_ctrl + REG_SET,
++ socdata->power_down_mask);
++ if (ret)
++ return ret;
++
+ clk_disable_unprepare(data->thermal_clk);
+
+ return 0;
+ }
+
+-static int __maybe_unused imx_thermal_resume(struct device *dev)
++static int __maybe_unused imx_thermal_runtime_resume(struct device *dev)
+ {
+ struct imx_thermal_data *data = dev_get_drvdata(dev);
++ const struct thermal_soc_data *socdata = data->socdata;
++ struct regmap *map = data->tempmon;
+ int ret;
+
+ ret = clk_prepare_enable(data->thermal_clk);
+ if (ret)
+ return ret;
+- /* Enabled thermal sensor after resume */
+- ret = thermal_zone_device_enable(data->tz);
++
++ ret = regmap_write(map, socdata->sensor_ctrl + REG_CLR,
++ socdata->power_down_mask);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(map, socdata->sensor_ctrl + REG_SET,
++ socdata->measure_temp_mask);
+ if (ret)
+ return ret;
+
++ /*
++ * According to the temp sensor designers, it may require up to ~17us
++ * to complete a measurement.
++ */
++ usleep_range(20, 50);
++
+ return 0;
+ }
+
+-static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops,
+- imx_thermal_suspend, imx_thermal_resume);
++static const struct dev_pm_ops imx_thermal_pm_ops = {
++ SET_SYSTEM_SLEEP_PM_OPS(imx_thermal_suspend, imx_thermal_resume)
++ SET_RUNTIME_PM_OPS(imx_thermal_runtime_suspend,
++ imx_thermal_runtime_resume, NULL)
++};
+
+ static struct platform_driver imx_thermal = {
+ .driver = {
+diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
+index c83ea5d04a1da..e0d65e450c89e 100644
+--- a/drivers/thermal/intel/Kconfig
++++ b/drivers/thermal/intel/Kconfig
+@@ -64,7 +64,8 @@ endmenu
+
+ config INTEL_BXT_PMIC_THERMAL
+ tristate "Intel Broxton PMIC thermal driver"
+- depends on X86 && INTEL_SOC_PMIC_BXTWC && REGMAP
++ depends on X86 && INTEL_SOC_PMIC_BXTWC
++ select REGMAP
+ help
+ Select this driver for Intel Broxton PMIC with ADC channels monitoring
+ system temperature measurements and alerts.
+diff --git a/drivers/thermal/intel/int340x_thermal/Kconfig b/drivers/thermal/intel/int340x_thermal/Kconfig
+index 45c31f3d6054b..5d046de96a5d0 100644
+--- a/drivers/thermal/intel/int340x_thermal/Kconfig
++++ b/drivers/thermal/intel/int340x_thermal/Kconfig
+@@ -5,12 +5,12 @@
+
+ config INT340X_THERMAL
+ tristate "ACPI INT340X thermal drivers"
+- depends on X86 && ACPI && PCI
++ depends on X86_64 && ACPI && PCI
+ select THERMAL_GOV_USER_SPACE
+ select ACPI_THERMAL_REL
+ select ACPI_FAN
+ select INTEL_SOC_DTS_IOSF_CORE
+- select PROC_THERMAL_MMIO_RAPL if X86_64 && POWERCAP
++ select PROC_THERMAL_MMIO_RAPL if POWERCAP
+ help
+ Newer laptops and tablets that use ACPI may have thermal sensors and
+ other devices with thermal control capabilities outside the core
+diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+index 19926beeb3b71..6aa5fe9736138 100644
+--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
++++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+@@ -53,7 +53,7 @@ struct int3400_thermal_priv {
+ struct art *arts;
+ int trt_count;
+ struct trt *trts;
+- u8 uuid_bitmap;
++ u32 uuid_bitmap;
+ int rel_misc_dev_res;
+ int current_uuid_index;
+ char *data_vault;
+@@ -67,7 +67,7 @@ static int evaluate_odvp(struct int3400_thermal_priv *priv);
+ struct odvp_attr {
+ int odvp;
+ struct int3400_thermal_priv *priv;
+- struct kobj_attribute attr;
++ struct device_attribute attr;
+ };
+
+ static ssize_t data_vault_read(struct file *file, struct kobject *kobj,
+@@ -272,7 +272,7 @@ static int int3400_thermal_run_osc(acpi_handle handle,
+ return result;
+ }
+
+-static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr,
++static ssize_t odvp_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+ struct odvp_attr *odvp_attr;
+@@ -405,6 +405,10 @@ static void int3400_notify(acpi_handle handle,
+ thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event);
+ thermal_prop[4] = NULL;
+ kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop);
++ kfree(thermal_prop[0]);
++ kfree(thermal_prop[1]);
++ kfree(thermal_prop[2]);
++ kfree(thermal_prop[3]);
+ }
+
+ static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
+@@ -465,6 +469,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
+ priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
+ obj->package.elements[0].buffer.length,
+ GFP_KERNEL);
++ if (!priv->data_vault) {
++ kfree(buffer.pointer);
++ return;
++ }
++
+ bin_attr_data_vault.private = priv->data_vault;
+ bin_attr_data_vault.size = obj->package.elements[0].buffer.length;
+ kfree(buffer.pointer);
+diff --git a/drivers/thermal/intel/int340x_thermal/int3401_thermal.c b/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
+index acebc8ba94e29..217786fba185c 100644
+--- a/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
++++ b/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
+@@ -44,15 +44,21 @@ static int int3401_remove(struct platform_device *pdev)
+ }
+
+ #ifdef CONFIG_PM_SLEEP
++static int int3401_thermal_suspend(struct device *dev)
++{
++ return proc_thermal_suspend(dev);
++}
+ static int int3401_thermal_resume(struct device *dev)
+ {
+ return proc_thermal_resume(dev);
+ }
+ #else
++#define int3401_thermal_suspend NULL
+ #define int3401_thermal_resume NULL
+ #endif
+
+-static SIMPLE_DEV_PM_OPS(int3401_proc_thermal_pm, NULL, int3401_thermal_resume);
++static SIMPLE_DEV_PM_OPS(int3401_proc_thermal_pm, int3401_thermal_suspend,
++ int3401_thermal_resume);
+
+ static struct platform_driver int3401_driver = {
+ .probe = int3401_add,
+diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
+index 62c0aa5d07837..0a4eaa307156d 100644
+--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
++++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
+@@ -44,11 +44,13 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
+ int trip, int *temp)
+ {
+ struct int34x_thermal_zone *d = zone->devdata;
+- int i;
++ int i, ret = 0;
+
+ if (d->override_ops && d->override_ops->get_trip_temp)
+ return d->override_ops->get_trip_temp(zone, trip, temp);
+
++ mutex_lock(&d->trip_mutex);
++
+ if (trip < d->aux_trip_nr)
+ *temp = d->aux_trips[trip];
+ else if (trip == d->crt_trip_id)
+@@ -66,10 +68,12 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
+ }
+ }
+ if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT)
+- return -EINVAL;
++ ret = -EINVAL;
+ }
+
+- return 0;
++ mutex_unlock(&d->trip_mutex);
++
++ return ret;
+ }
+
+ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone,
+@@ -77,11 +81,13 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone,
+ enum thermal_trip_type *type)
+ {
+ struct int34x_thermal_zone *d = zone->devdata;
+- int i;
++ int i, ret = 0;
+
+ if (d->override_ops && d->override_ops->get_trip_type)
+ return d->override_ops->get_trip_type(zone, trip, type);
+
++ mutex_lock(&d->trip_mutex);
++
+ if (trip < d->aux_trip_nr)
+ *type = THERMAL_TRIP_PASSIVE;
+ else if (trip == d->crt_trip_id)
+@@ -99,10 +105,12 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone,
+ }
+ }
+ if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT)
+- return -EINVAL;
++ ret = -EINVAL;
+ }
+
+- return 0;
++ mutex_unlock(&d->trip_mutex);
++
++ return ret;
+ }
+
+ static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone,
+@@ -180,6 +188,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone)
+ int trip_cnt = int34x_zone->aux_trip_nr;
+ int i;
+
++ mutex_lock(&int34x_zone->trip_mutex);
++
+ int34x_zone->crt_trip_id = -1;
+ if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_CRT",
+ &int34x_zone->crt_temp))
+@@ -207,6 +217,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone)
+ int34x_zone->act_trips[i].valid = true;
+ }
+
++ mutex_unlock(&int34x_zone->trip_mutex);
++
+ return trip_cnt;
+ }
+ EXPORT_SYMBOL_GPL(int340x_thermal_read_trips);
+@@ -230,6 +242,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
+ if (!int34x_thermal_zone)
+ return ERR_PTR(-ENOMEM);
+
++ mutex_init(&int34x_thermal_zone->trip_mutex);
++
+ int34x_thermal_zone->adev = adev;
+ int34x_thermal_zone->override_ops = override_ops;
+
+@@ -281,6 +295,7 @@ err_thermal_zone:
+ acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table);
+ kfree(int34x_thermal_zone->aux_trips);
+ err_trip_alloc:
++ mutex_destroy(&int34x_thermal_zone->trip_mutex);
+ kfree(int34x_thermal_zone);
+ return ERR_PTR(ret);
+ }
+@@ -292,6 +307,7 @@ void int340x_thermal_zone_remove(struct int34x_thermal_zone
+ thermal_zone_device_unregister(int34x_thermal_zone->zone);
+ acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table);
+ kfree(int34x_thermal_zone->aux_trips);
++ mutex_destroy(&int34x_thermal_zone->trip_mutex);
+ kfree(int34x_thermal_zone);
+ }
+ EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove);
+diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
+index 3b4971df1b33b..8f9872afd0d3c 100644
+--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
++++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
+@@ -32,6 +32,7 @@ struct int34x_thermal_zone {
+ struct thermal_zone_device_ops *override_ops;
+ void *priv_data;
+ struct acpi_lpat_conversion_table *lpat_table;
++ struct mutex trip_mutex;
+ };
+
+ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *,
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
+index fb64acfd5e07d..a8d98f1bd6c67 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
+@@ -68,8 +68,7 @@ static const struct attribute_group power_limit_attribute_group = {
+ .name = "power_limits"
+ };
+
+-static ssize_t tcc_offset_degree_celsius_show(struct device *dev,
+- struct device_attribute *attr, char *buf)
++static int tcc_get_offset(void)
+ {
+ u64 val;
+ int err;
+@@ -78,8 +77,20 @@ static ssize_t tcc_offset_degree_celsius_show(struct device *dev,
+ if (err)
+ return err;
+
+- val = (val >> 24) & 0x3f;
+- return sprintf(buf, "%d\n", (int)val);
++ return (val >> 24) & 0x3f;
++}
++
++static ssize_t tcc_offset_degree_celsius_show(struct device *dev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ int tcc;
++
++ tcc = tcc_get_offset();
++ if (tcc < 0)
++ return tcc;
++
++ return sprintf(buf, "%d\n", tcc);
+ }
+
+ static int tcc_offset_update(unsigned int tcc)
+@@ -107,8 +118,6 @@ static int tcc_offset_update(unsigned int tcc)
+ return 0;
+ }
+
+-static int tcc_offset_save = -1;
+-
+ static ssize_t tcc_offset_degree_celsius_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+@@ -131,8 +140,6 @@ static ssize_t tcc_offset_degree_celsius_store(struct device *dev,
+ if (err)
+ return err;
+
+- tcc_offset_save = tcc;
+-
+ return count;
+ }
+
+@@ -345,6 +352,18 @@ void proc_thermal_remove(struct proc_thermal_device *proc_priv)
+ }
+ EXPORT_SYMBOL_GPL(proc_thermal_remove);
+
++static int tcc_offset_save = -1;
++
++int proc_thermal_suspend(struct device *dev)
++{
++ tcc_offset_save = tcc_get_offset();
++ if (tcc_offset_save < 0)
++ dev_warn(dev, "failed to save offset (%d)\n", tcc_offset_save);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(proc_thermal_suspend);
++
+ int proc_thermal_resume(struct device *dev)
+ {
+ struct proc_thermal_device *proc_dev;
+@@ -352,6 +371,7 @@ int proc_thermal_resume(struct device *dev)
+ proc_dev = dev_get_drvdata(dev);
+ proc_thermal_read_ppcc(proc_dev);
+
++ /* Do not update if saving failed */
+ if (tcc_offset_save >= 0)
+ tcc_offset_update(tcc_offset_save);
+
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
+index 5a1cfe4864f16..9b2a64ef55d02 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
+@@ -80,9 +80,11 @@ void proc_thermal_rfim_remove(struct pci_dev *pdev);
+ int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+ void proc_thermal_mbox_remove(struct pci_dev *pdev);
+
+-int processor_thermal_send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cmd_resp);
++int processor_thermal_send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp);
++int processor_thermal_send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data);
+ int proc_thermal_add(struct device *dev, struct proc_thermal_device *priv);
+ void proc_thermal_remove(struct proc_thermal_device *proc_priv);
++int proc_thermal_suspend(struct device *dev);
+ int proc_thermal_resume(struct device *dev);
+ int proc_thermal_mmio_add(struct pci_dev *pdev,
+ struct proc_thermal_device *proc_priv,
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+index 11dd2e825f4ff..b4bcd3fe9eb2f 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+@@ -314,6 +314,20 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev)
+ }
+
+ #ifdef CONFIG_PM_SLEEP
++static int proc_thermal_pci_suspend(struct device *dev)
++{
++ struct pci_dev *pdev = to_pci_dev(dev);
++ struct proc_thermal_device *proc_priv;
++ struct proc_thermal_pci *pci_info;
++
++ proc_priv = pci_get_drvdata(pdev);
++ pci_info = proc_priv->priv_data;
++
++ if (!pci_info->no_legacy)
++ return proc_thermal_suspend(dev);
++
++ return 0;
++}
+ static int proc_thermal_pci_resume(struct device *dev)
+ {
+ struct pci_dev *pdev = to_pci_dev(dev);
+@@ -335,10 +349,12 @@ static int proc_thermal_pci_resume(struct device *dev)
+ return 0;
+ }
+ #else
++#define proc_thermal_pci_suspend NULL
+ #define proc_thermal_pci_resume NULL
+ #endif
+
+-static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, NULL, proc_thermal_pci_resume);
++static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend,
++ proc_thermal_pci_resume);
+
+ static const struct pci_device_id proc_thermal_pci_ids[] = {
+ { PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
+index f5fc1791b11ef..4571a1a53b841 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
+@@ -107,15 +107,21 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev)
+ }
+
+ #ifdef CONFIG_PM_SLEEP
++static int proc_thermal_pci_suspend(struct device *dev)
++{
++ return proc_thermal_suspend(dev);
++}
+ static int proc_thermal_pci_resume(struct device *dev)
+ {
+ return proc_thermal_resume(dev);
+ }
+ #else
++#define proc_thermal_pci_suspend NULL
+ #define proc_thermal_pci_resume NULL
+ #endif
+
+-static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, NULL, proc_thermal_pci_resume);
++static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend,
++ proc_thermal_pci_resume);
+
+ static const struct pci_device_id proc_thermal_pci_ids[] = {
+ { PCI_DEVICE_DATA(INTEL, BDW_THERMAL, 0) },
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
+index 59e93b04f0a9e..0b89a4340ff4e 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
+@@ -7,6 +7,7 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/pci.h>
++#include <linux/io-64-nonatomic-lo-hi.h>
+ #include "processor_thermal_device.h"
+
+ #define MBOX_CMD_WORKLOAD_TYPE_READ 0x0E
+@@ -23,19 +24,15 @@
+
+ static DEFINE_MUTEX(mbox_lock);
+
+-static int send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cmd_resp)
++static int wait_for_mbox_ready(struct proc_thermal_device *proc_priv)
+ {
+- struct proc_thermal_device *proc_priv;
+ u32 retries, data;
+ int ret;
+
+- mutex_lock(&mbox_lock);
+- proc_priv = pci_get_drvdata(pdev);
+-
+ /* Poll for rb bit == 0 */
+ retries = MBOX_RETRY_COUNT;
+ do {
+- data = readl((void __iomem *) (proc_priv->mmio_base + MBOX_OFFSET_INTERFACE));
++ data = readl(proc_priv->mmio_base + MBOX_OFFSET_INTERFACE);
+ if (data & BIT_ULL(MBOX_BUSY_BIT)) {
+ ret = -EBUSY;
+ continue;
+@@ -44,49 +41,78 @@ static int send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cm
+ break;
+ } while (--retries);
+
++ return ret;
++}
++
++static int send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data)
++{
++ struct proc_thermal_device *proc_priv;
++ u32 reg_data;
++ int ret;
++
++ proc_priv = pci_get_drvdata(pdev);
++
++ mutex_lock(&mbox_lock);
++
++ ret = wait_for_mbox_ready(proc_priv);
+ if (ret)
+ goto unlock_mbox;
+
+- if (cmd_id == MBOX_CMD_WORKLOAD_TYPE_WRITE)
+- writel(cmd_data, (void __iomem *) ((proc_priv->mmio_base + MBOX_OFFSET_DATA)));
+-
++ writel(data, (proc_priv->mmio_base + MBOX_OFFSET_DATA));
+ /* Write command register */
+- data = BIT_ULL(MBOX_BUSY_BIT) | cmd_id;
+- writel(data, (void __iomem *) ((proc_priv->mmio_base + MBOX_OFFSET_INTERFACE)));
++ reg_data = BIT_ULL(MBOX_BUSY_BIT) | id;
++ writel(reg_data, (proc_priv->mmio_base + MBOX_OFFSET_INTERFACE));
+
+- /* Poll for rb bit == 0 */
+- retries = MBOX_RETRY_COUNT;
+- do {
+- data = readl((void __iomem *) (proc_priv->mmio_base + MBOX_OFFSET_INTERFACE));
+- if (data & BIT_ULL(MBOX_BUSY_BIT)) {
+- ret = -EBUSY;
+- continue;
+- }
++ ret = wait_for_mbox_ready(proc_priv);
+
+- if (data) {
+- ret = -ENXIO;
+- goto unlock_mbox;
+- }
++unlock_mbox:
++ mutex_unlock(&mbox_lock);
++ return ret;
++}
+
+- if (cmd_id == MBOX_CMD_WORKLOAD_TYPE_READ) {
+- data = readl((void __iomem *) (proc_priv->mmio_base + MBOX_OFFSET_DATA));
+- *cmd_resp = data & 0xff;
+- }
++static int send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
++{
++ struct proc_thermal_device *proc_priv;
++ u32 reg_data;
++ int ret;
+
+- ret = 0;
+- break;
+- } while (--retries);
++ proc_priv = pci_get_drvdata(pdev);
++
++ mutex_lock(&mbox_lock);
++
++ ret = wait_for_mbox_ready(proc_priv);
++ if (ret)
++ goto unlock_mbox;
++
++ /* Write command register */
++ reg_data = BIT_ULL(MBOX_BUSY_BIT) | id;
++ writel(reg_data, (proc_priv->mmio_base + MBOX_OFFSET_INTERFACE));
++
++ ret = wait_for_mbox_ready(proc_priv);
++ if (ret)
++ goto unlock_mbox;
++
++ if (id == MBOX_CMD_WORKLOAD_TYPE_READ)
++ *resp = readl(proc_priv->mmio_base + MBOX_OFFSET_DATA);
++ else
++ *resp = readq(proc_priv->mmio_base + MBOX_OFFSET_DATA);
+
+ unlock_mbox:
+ mutex_unlock(&mbox_lock);
+ return ret;
+ }
+
+-int processor_thermal_send_mbox_cmd(struct pci_dev *pdev, u16 cmd_id, u32 cmd_data, u32 *cmd_resp)
++int processor_thermal_send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
+ {
+- return send_mbox_cmd(pdev, cmd_id, cmd_data, cmd_resp);
++ return send_mbox_read_cmd(pdev, id, resp);
+ }
+-EXPORT_SYMBOL_GPL(processor_thermal_send_mbox_cmd);
++EXPORT_SYMBOL_NS_GPL(processor_thermal_send_mbox_read_cmd, INT340X_THERMAL);
++
++int processor_thermal_send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data)
++{
++ return send_mbox_write_cmd(pdev, id, data);
++}
++EXPORT_SYMBOL_NS_GPL(processor_thermal_send_mbox_write_cmd, INT340X_THERMAL);
+
+ /* List of workload types */
+ static const char * const workload_types[] = {
+@@ -99,7 +125,6 @@ static const char * const workload_types[] = {
+ NULL
+ };
+
+-
+ static ssize_t workload_available_types_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+@@ -141,7 +166,7 @@ static ssize_t workload_type_store(struct device *dev,
+
+ data |= ret;
+
+- ret = send_mbox_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_WRITE, data, NULL);
++ ret = send_mbox_write_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_WRITE, data);
+ if (ret)
+ return false;
+
+@@ -153,10 +178,10 @@ static ssize_t workload_type_show(struct device *dev,
+ char *buf)
+ {
+ struct pci_dev *pdev = to_pci_dev(dev);
+- u32 cmd_resp;
++ u64 cmd_resp;
+ int ret;
+
+- ret = send_mbox_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, 0, &cmd_resp);
++ ret = send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
+ if (ret)
+ return false;
+
+@@ -181,17 +206,15 @@ static const struct attribute_group workload_req_attribute_group = {
+ .name = "workload_request"
+ };
+
+-
+-
+ static bool workload_req_created;
+
+ int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+ {
+- u32 cmd_resp;
++ u64 cmd_resp;
+ int ret;
+
+ /* Check if there is a mailbox support, if fails return success */
+- ret = send_mbox_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, 0, &cmd_resp);
++ ret = send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
+ if (ret)
+ return 0;
+
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
+index 2b8a3235d518b..92ed1213fe379 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
+@@ -9,6 +9,8 @@
+ #include <linux/pci.h>
+ #include "processor_thermal_device.h"
+
++MODULE_IMPORT_NS(INT340X_THERMAL);
++
+ struct mmio_reg {
+ int read_only;
+ u32 offset;
+@@ -29,7 +31,7 @@ static const char * const fivr_strings[] = {
+ };
+
+ static const struct mmio_reg tgl_fivr_mmio_regs[] = {
+- { 0, 0x5A18, 3, 0x7, 12}, /* vco_ref_code_lo */
++ { 0, 0x5A18, 3, 0x7, 11}, /* vco_ref_code_lo */
+ { 0, 0x5A18, 8, 0xFF, 16}, /* vco_ref_code_hi */
+ { 0, 0x5A08, 8, 0xFF, 0}, /* spread_spectrum_pct */
+ { 0, 0x5A08, 1, 0x1, 8}, /* spread_spectrum_clk_enable */
+@@ -170,6 +172,7 @@ static const struct attribute_group fivr_attribute_group = {
+ RFIM_SHOW(rfi_restriction_run_busy, 1)
+ RFIM_SHOW(rfi_restriction_err_code, 1)
+ RFIM_SHOW(rfi_restriction_data_rate, 1)
++RFIM_SHOW(rfi_restriction_data_rate_base, 1)
+ RFIM_SHOW(ddr_data_rate_point_0, 1)
+ RFIM_SHOW(ddr_data_rate_point_1, 1)
+ RFIM_SHOW(ddr_data_rate_point_2, 1)
+@@ -179,11 +182,13 @@ RFIM_SHOW(rfi_disable, 1)
+ RFIM_STORE(rfi_restriction_run_busy, 1)
+ RFIM_STORE(rfi_restriction_err_code, 1)
+ RFIM_STORE(rfi_restriction_data_rate, 1)
++RFIM_STORE(rfi_restriction_data_rate_base, 1)
+ RFIM_STORE(rfi_disable, 1)
+
+ static DEVICE_ATTR_RW(rfi_restriction_run_busy);
+ static DEVICE_ATTR_RW(rfi_restriction_err_code);
+ static DEVICE_ATTR_RW(rfi_restriction_data_rate);
++static DEVICE_ATTR_RW(rfi_restriction_data_rate_base);
+ static DEVICE_ATTR_RO(ddr_data_rate_point_0);
+ static DEVICE_ATTR_RO(ddr_data_rate_point_1);
+ static DEVICE_ATTR_RO(ddr_data_rate_point_2);
+@@ -194,8 +199,7 @@ static ssize_t rfi_restriction_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+ {
+- u16 cmd_id = 0x0008;
+- u32 cmd_resp;
++ u16 id = 0x0008;
+ u32 input;
+ int ret;
+
+@@ -203,7 +207,7 @@ static ssize_t rfi_restriction_store(struct device *dev,
+ if (ret)
+ return ret;
+
+- ret = processor_thermal_send_mbox_cmd(to_pci_dev(dev), cmd_id, input, &cmd_resp);
++ ret = processor_thermal_send_mbox_write_cmd(to_pci_dev(dev), id, input);
+ if (ret)
+ return ret;
+
+@@ -214,30 +218,30 @@ static ssize_t rfi_restriction_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+ {
+- u16 cmd_id = 0x0007;
+- u32 cmd_resp;
++ u16 id = 0x0007;
++ u64 resp;
+ int ret;
+
+- ret = processor_thermal_send_mbox_cmd(to_pci_dev(dev), cmd_id, 0, &cmd_resp);
++ ret = processor_thermal_send_mbox_read_cmd(to_pci_dev(dev), id, &resp);
+ if (ret)
+ return ret;
+
+- return sprintf(buf, "%u\n", cmd_resp);
++ return sprintf(buf, "%llu\n", resp);
+ }
+
+ static ssize_t ddr_data_rate_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+ {
+- u16 cmd_id = 0x0107;
+- u32 cmd_resp;
++ u16 id = 0x0107;
++ u64 resp;
+ int ret;
+
+- ret = processor_thermal_send_mbox_cmd(to_pci_dev(dev), cmd_id, 0, &cmd_resp);
++ ret = processor_thermal_send_mbox_read_cmd(to_pci_dev(dev), id, &resp);
+ if (ret)
+ return ret;
+
+- return sprintf(buf, "%u\n", cmd_resp);
++ return sprintf(buf, "%llu\n", resp);
+ }
+
+ static DEVICE_ATTR_RW(rfi_restriction);
+@@ -247,6 +251,7 @@ static struct attribute *dvfs_attrs[] = {
+ &dev_attr_rfi_restriction_run_busy.attr,
+ &dev_attr_rfi_restriction_err_code.attr,
+ &dev_attr_rfi_restriction_data_rate.attr,
++ &dev_attr_rfi_restriction_data_rate_base.attr,
+ &dev_attr_ddr_data_rate_point_0.attr,
+ &dev_attr_ddr_data_rate_point_1.attr,
+ &dev_attr_ddr_data_rate_point_2.attr,
+diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c
+index 527c91f5960be..768c66046a599 100644
+--- a/drivers/thermal/intel/intel_pch_thermal.c
++++ b/drivers/thermal/intel/intel_pch_thermal.c
+@@ -29,6 +29,7 @@
+ #define PCH_THERMAL_DID_CNL_LP 0x02F9 /* CNL-LP PCH */
+ #define PCH_THERMAL_DID_CML_H 0X06F9 /* CML-H PCH */
+ #define PCH_THERMAL_DID_LWB 0xA1B1 /* Lewisburg PCH */
++#define PCH_THERMAL_DID_WBG 0x8D24 /* Wellsburg PCH */
+
+ /* Wildcat Point-LP PCH Thermal registers */
+ #define WPT_TEMP 0x0000 /* Temperature */
+@@ -345,6 +346,7 @@ enum board_ids {
+ board_cnl,
+ board_cml,
+ board_lwb,
++ board_wbg,
+ };
+
+ static const struct board_info {
+@@ -375,6 +377,10 @@ static const struct board_info {
+ .name = "pch_lewisburg",
+ .ops = &pch_dev_ops_wpt,
+ },
++ [board_wbg] = {
++ .name = "pch_wellsburg",
++ .ops = &pch_dev_ops_wpt,
++ },
+ };
+
+ static int intel_pch_thermal_probe(struct pci_dev *pdev,
+@@ -490,6 +496,8 @@ static const struct pci_device_id intel_pch_thermal_id[] = {
+ .driver_data = board_cml, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_LWB),
+ .driver_data = board_lwb, },
++ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WBG),
++ .driver_data = board_wbg, },
+ { 0, },
+ };
+ MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
+diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
+index a5b58ea89cc6d..08ea6cdb25b88 100644
+--- a/drivers/thermal/intel/intel_powerclamp.c
++++ b/drivers/thermal/intel/intel_powerclamp.c
+@@ -57,6 +57,7 @@
+
+ static unsigned int target_mwait;
+ static struct dentry *debug_dir;
++static bool poll_pkg_cstate_enable;
+
+ /* user selected target */
+ static unsigned int set_target_ratio;
+@@ -262,6 +263,9 @@ static unsigned int get_compensation(int ratio)
+ {
+ unsigned int comp = 0;
+
++ if (!poll_pkg_cstate_enable)
++ return 0;
++
+ /* we only use compensation if all adjacent ones are good */
+ if (ratio == 1 &&
+ cal_data[ratio].confidence >= CONFIDENCE_OK &&
+@@ -531,12 +535,11 @@ static int start_power_clamp(void)
+ cpus_read_lock();
+
+ /* prefer BSP */
+- control_cpu = 0;
+- if (!cpu_online(control_cpu))
+- control_cpu = smp_processor_id();
++ control_cpu = cpumask_first(cpu_online_mask);
+
+ clamping = true;
+- schedule_delayed_work(&poll_pkg_cstate_work, 0);
++ if (poll_pkg_cstate_enable)
++ schedule_delayed_work(&poll_pkg_cstate_work, 0);
+
+ /* start one kthread worker per online cpu */
+ for_each_online_cpu(cpu) {
+@@ -605,11 +608,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
+ static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+ {
+- if (true == clamping)
+- *state = pkg_cstate_ratio_cur;
+- else
++ if (clamping) {
++ if (poll_pkg_cstate_enable)
++ *state = pkg_cstate_ratio_cur;
++ else
++ *state = set_target_ratio;
++ } else {
+ /* to save power, do not poll idle ratio while not clamping */
+ *state = -1; /* indicates invalid state */
++ }
+
+ return 0;
+ }
+@@ -734,6 +741,9 @@ static int __init powerclamp_init(void)
+ goto exit_unregister;
+ }
+
++ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
++ poll_pkg_cstate_enable = true;
++
+ cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
+ &powerclamp_cooling_ops);
+ if (IS_ERR(cooling_dev)) {
+diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c
+index 3eafc6b0e6c30..b43fbd5eaa6b4 100644
+--- a/drivers/thermal/intel/intel_quark_dts_thermal.c
++++ b/drivers/thermal/intel/intel_quark_dts_thermal.c
+@@ -415,22 +415,14 @@ MODULE_DEVICE_TABLE(x86cpu, qrk_thermal_ids);
+
+ static int __init intel_quark_thermal_init(void)
+ {
+- int err = 0;
+-
+ if (!x86_match_cpu(qrk_thermal_ids) || !iosf_mbi_available())
+ return -ENODEV;
+
+ soc_dts = alloc_soc_dts();
+- if (IS_ERR(soc_dts)) {
+- err = PTR_ERR(soc_dts);
+- goto err_free;
+- }
++ if (IS_ERR(soc_dts))
++ return PTR_ERR(soc_dts);
+
+ return 0;
+-
+-err_free:
+- free_soc_dts(soc_dts);
+- return err;
+ }
+
+ static void __exit intel_quark_thermal_exit(void)
+diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
+index 342b0bb5a56d9..8651ff1abe754 100644
+--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
++++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
+@@ -405,7 +405,7 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
+ {
+ struct intel_soc_dts_sensors *sensors;
+ bool notification;
+- u32 tj_max;
++ int tj_max;
+ int ret;
+ int i;
+
+diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig
+index 7d942f71e5328..bfd889422dd32 100644
+--- a/drivers/thermal/qcom/Kconfig
++++ b/drivers/thermal/qcom/Kconfig
+@@ -34,7 +34,7 @@ config QCOM_SPMI_TEMP_ALARM
+
+ config QCOM_LMH
+ tristate "Qualcomm Limits Management Hardware"
+- depends on ARCH_QCOM
++ depends on ARCH_QCOM && QCOM_SCM
+ help
+ This enables initialization of Qualcomm limits management
+ hardware(LMh). LMh allows for hardware-enforced mitigation for cpus based on
+diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
+index eafa7526eb8b4..cc94d8b005d49 100644
+--- a/drivers/thermal/qcom/lmh.c
++++ b/drivers/thermal/qcom/lmh.c
+@@ -43,7 +43,7 @@ static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
+ if (irq)
+ generic_handle_irq(irq);
+
+- return 0;
++ return IRQ_HANDLED;
+ }
+
+ static void lmh_enable_interrupt(struct irq_data *d)
+diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
+index 7419e196dbb06..1037de19873a5 100644
+--- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
++++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
+@@ -251,7 +251,8 @@ static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip,
+ disable_s2_shutdown = true;
+ else
+ dev_warn(chip->dev,
+- "No ADC is configured and critical temperature is above the maximum stage 2 threshold of 140 C! Configuring stage 2 shutdown at 140 C.\n");
++ "No ADC is configured and critical temperature %d mC is above the maximum stage 2 threshold of %ld mC! Configuring stage 2 shutdown at %ld mC.\n",
++ temp, stage2_threshold_max, stage2_threshold_max);
+ }
+
+ skip:
+diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c
+index f136cb3502384..8d036727b99fe 100644
+--- a/drivers/thermal/qcom/tsens-v0_1.c
++++ b/drivers/thermal/qcom/tsens-v0_1.c
+@@ -285,7 +285,7 @@ static int calibrate_8939(struct tsens_priv *priv)
+ u32 p1[10], p2[10];
+ int mode = 0;
+ u32 *qfprom_cdata;
+- u32 cdata[6];
++ u32 cdata[4];
+
+ qfprom_cdata = (u32 *)qfprom_read(priv->dev, "calib");
+ if (IS_ERR(qfprom_cdata))
+@@ -296,8 +296,6 @@ static int calibrate_8939(struct tsens_priv *priv)
+ cdata[1] = qfprom_cdata[13];
+ cdata[2] = qfprom_cdata[0];
+ cdata[3] = qfprom_cdata[1];
+- cdata[4] = qfprom_cdata[22];
+- cdata[5] = qfprom_cdata[21];
+
+ mode = (cdata[0] & MSM8939_CAL_SEL_MASK) >> MSM8939_CAL_SEL_SHIFT;
+ dev_dbg(priv->dev, "calibration mode is %d\n", mode);
+@@ -314,8 +312,6 @@ static int calibrate_8939(struct tsens_priv *priv)
+ p2[6] = (cdata[2] & MSM8939_S6_P2_MASK) >> MSM8939_S6_P2_SHIFT;
+ p2[7] = (cdata[3] & MSM8939_S7_P2_MASK) >> MSM8939_S7_P2_SHIFT;
+ p2[8] = (cdata[3] & MSM8939_S8_P2_MASK) >> MSM8939_S8_P2_SHIFT;
+- p2[9] = (cdata[4] & MSM8939_S9_P2_MASK_0_4) >> MSM8939_S9_P2_SHIFT_0_4;
+- p2[9] |= ((cdata[5] & MSM8939_S9_P2_MASK_5) >> MSM8939_S9_P2_SHIFT_5) << 5;
+ for (i = 0; i < priv->num_sensors; i++)
+ p2[i] = (base1 + p2[i]) << 2;
+ fallthrough;
+@@ -331,7 +327,6 @@ static int calibrate_8939(struct tsens_priv *priv)
+ p1[6] = (cdata[2] & MSM8939_S6_P1_MASK) >> MSM8939_S6_P1_SHIFT;
+ p1[7] = (cdata[3] & MSM8939_S7_P1_MASK) >> MSM8939_S7_P1_SHIFT;
+ p1[8] = (cdata[3] & MSM8939_S8_P1_MASK) >> MSM8939_S8_P1_SHIFT;
+- p1[9] = (cdata[4] & MSM8939_S9_P1_MASK) >> MSM8939_S9_P1_SHIFT;
+ for (i = 0; i < priv->num_sensors; i++)
+ p1[i] = ((base0) + p1[i]) << 2;
+ break;
+@@ -534,6 +529,21 @@ static int calibrate_9607(struct tsens_priv *priv)
+ return 0;
+ }
+
++static int __init init_8939(struct tsens_priv *priv) {
++ priv->sensor[0].slope = 2911;
++ priv->sensor[1].slope = 2789;
++ priv->sensor[2].slope = 2906;
++ priv->sensor[3].slope = 2763;
++ priv->sensor[4].slope = 2922;
++ priv->sensor[5].slope = 2867;
++ priv->sensor[6].slope = 2833;
++ priv->sensor[7].slope = 2838;
++ priv->sensor[8].slope = 2840;
++ /* priv->sensor[9].slope = 2852; */
++
++ return init_common(priv);
++}
++
+ /* v0.1: 8916, 8939, 8974, 9607 */
+
+ static struct tsens_features tsens_v0_1_feat = {
+@@ -596,15 +606,15 @@ struct tsens_plat_data data_8916 = {
+ };
+
+ static const struct tsens_ops ops_8939 = {
+- .init = init_common,
++ .init = init_8939,
+ .calibrate = calibrate_8939,
+ .get_temp = get_temp_common,
+ };
+
+ struct tsens_plat_data data_8939 = {
+- .num_sensors = 10,
++ .num_sensors = 9,
+ .ops = &ops_8939,
+- .hw_ids = (unsigned int []){ 0, 1, 2, 4, 5, 6, 7, 8, 9, 10 },
++ .hw_ids = (unsigned int []){ 0, 1, 2, 3, 5, 6, 7, 8, 9, /* 10 */ },
+
+ .feat = &tsens_v0_1_feat,
+ .fields = tsens_v0_1_regfields,
+diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c
+index 573e261ccca74..faa4576fa028f 100644
+--- a/drivers/thermal/qcom/tsens-v1.c
++++ b/drivers/thermal/qcom/tsens-v1.c
+@@ -78,11 +78,6 @@
+
+ #define MSM8976_CAL_SEL_MASK 0x3
+
+-#define MSM8976_CAL_DEGC_PT1 30
+-#define MSM8976_CAL_DEGC_PT2 120
+-#define MSM8976_SLOPE_FACTOR 1000
+-#define MSM8976_SLOPE_DEFAULT 3200
+-
+ /* eeprom layout data for qcs404/405 (v1) */
+ #define BASE0_MASK 0x000007f8
+ #define BASE1_MASK 0x0007f800
+@@ -142,30 +137,6 @@
+ #define CAL_SEL_MASK 7
+ #define CAL_SEL_SHIFT 0
+
+-static void compute_intercept_slope_8976(struct tsens_priv *priv,
+- u32 *p1, u32 *p2, u32 mode)
+-{
+- int i;
+-
+- priv->sensor[0].slope = 3313;
+- priv->sensor[1].slope = 3275;
+- priv->sensor[2].slope = 3320;
+- priv->sensor[3].slope = 3246;
+- priv->sensor[4].slope = 3279;
+- priv->sensor[5].slope = 3257;
+- priv->sensor[6].slope = 3234;
+- priv->sensor[7].slope = 3269;
+- priv->sensor[8].slope = 3255;
+- priv->sensor[9].slope = 3239;
+- priv->sensor[10].slope = 3286;
+-
+- for (i = 0; i < priv->num_sensors; i++) {
+- priv->sensor[i].offset = (p1[i] * MSM8976_SLOPE_FACTOR) -
+- (MSM8976_CAL_DEGC_PT1 *
+- priv->sensor[i].slope);
+- }
+-}
+-
+ static int calibrate_v1(struct tsens_priv *priv)
+ {
+ u32 base0 = 0, base1 = 0;
+@@ -291,7 +262,7 @@ static int calibrate_8976(struct tsens_priv *priv)
+ break;
+ }
+
+- compute_intercept_slope_8976(priv, p1, p2, mode);
++ compute_intercept_slope(priv, p1, p2, mode);
+ kfree(qfprom_cdata);
+
+ return 0;
+@@ -362,6 +333,22 @@ static const struct reg_field tsens_v1_regfields[MAX_REGFIELDS] = {
+ [TRDY] = REG_FIELD(TM_TRDY_OFF, 0, 0),
+ };
+
++static int __init init_8956(struct tsens_priv *priv) {
++ priv->sensor[0].slope = 3313;
++ priv->sensor[1].slope = 3275;
++ priv->sensor[2].slope = 3320;
++ priv->sensor[3].slope = 3246;
++ priv->sensor[4].slope = 3279;
++ priv->sensor[5].slope = 3257;
++ priv->sensor[6].slope = 3234;
++ priv->sensor[7].slope = 3269;
++ priv->sensor[8].slope = 3255;
++ priv->sensor[9].slope = 3239;
++ priv->sensor[10].slope = 3286;
++
++ return init_common(priv);
++}
++
+ static const struct tsens_ops ops_generic_v1 = {
+ .init = init_common,
+ .calibrate = calibrate_v1,
+@@ -374,13 +361,25 @@ struct tsens_plat_data data_tsens_v1 = {
+ .fields = tsens_v1_regfields,
+ };
+
++static const struct tsens_ops ops_8956 = {
++ .init = init_8956,
++ .calibrate = calibrate_8976,
++ .get_temp = get_temp_tsens_valid,
++};
++
++struct tsens_plat_data data_8956 = {
++ .num_sensors = 11,
++ .ops = &ops_8956,
++ .feat = &tsens_v1_feat,
++ .fields = tsens_v1_regfields,
++};
++
+ static const struct tsens_ops ops_8976 = {
+ .init = init_common,
+ .calibrate = calibrate_8976,
+ .get_temp = get_temp_tsens_valid,
+ };
+
+-/* Valid for both MSM8956 and MSM8976. */
+ struct tsens_plat_data data_8976 = {
+ .num_sensors = 11,
+ .ops = &ops_8976,
+diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
+index b1162e566a707..926cd8b41132c 100644
+--- a/drivers/thermal/qcom/tsens.c
++++ b/drivers/thermal/qcom/tsens.c
+@@ -603,22 +603,21 @@ int get_temp_tsens_valid(const struct tsens_sensor *s, int *temp)
+ int ret;
+
+ /* VER_0 doesn't have VALID bit */
+- if (tsens_version(priv) >= VER_0_1) {
+- ret = regmap_field_read(priv->rf[valid_idx], &valid);
+- if (ret)
+- return ret;
+- while (!valid) {
+- /* Valid bit is 0 for 6 AHB clock cycles.
+- * At 19.2MHz, 1 AHB clock is ~60ns.
+- * We should enter this loop very, very rarely.
+- */
+- ndelay(400);
+- ret = regmap_field_read(priv->rf[valid_idx], &valid);
+- if (ret)
+- return ret;
+- }
+- }
++ if (tsens_version(priv) == VER_0)
++ goto get_temp;
++
++ /* Valid bit is 0 for 6 AHB clock cycles.
++ * At 19.2MHz, 1 AHB clock is ~60ns.
++ * We should enter this loop very, very rarely.
++ * Wait 1 us since it's the min of poll_timeout macro.
++ * Old value was 400 ns.
++ */
++ ret = regmap_field_read_poll_timeout(priv->rf[valid_idx], valid,
++ valid, 1, 20 * USEC_PER_MSEC);
++ if (ret)
++ return ret;
+
++get_temp:
+ /* Valid bit is set, OK to read the temperature */
+ *temp = tsens_hw_to_mC(s, temp_idx);
+
+@@ -979,6 +978,12 @@ static const struct of_device_id tsens_table[] = {
+ }, {
+ .compatible = "qcom,msm8939-tsens",
+ .data = &data_8939,
++ }, {
++ .compatible = "qcom,msm8956-tsens",
++ .data = &data_8956,
++ }, {
++ .compatible = "qcom,msm8960-tsens",
++ .data = &data_8960,
+ }, {
+ .compatible = "qcom,msm8974-tsens",
+ .data = &data_8974,
+diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h
+index 1471a2c00f158..c2e5aee159276 100644
+--- a/drivers/thermal/qcom/tsens.h
++++ b/drivers/thermal/qcom/tsens.h
+@@ -590,7 +590,7 @@ extern struct tsens_plat_data data_8960;
+ extern struct tsens_plat_data data_8916, data_8939, data_8974, data_9607;
+
+ /* TSENS v1 targets */
+-extern struct tsens_plat_data data_tsens_v1, data_8976;
++extern struct tsens_plat_data data_tsens_v1, data_8976, data_8956;
+
+ /* TSENS v2 targets */
+ extern struct tsens_plat_data data_8996, data_tsens_v2;
+diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
+index d9cd23cbb6717..cd464db064142 100644
+--- a/drivers/thermal/sun8i_thermal.c
++++ b/drivers/thermal/sun8i_thermal.c
+@@ -319,6 +319,11 @@ out:
+ return ret;
+ }
+
++static void sun8i_ths_reset_control_assert(void *data)
++{
++ reset_control_assert(data);
++}
++
+ static int sun8i_ths_resource_init(struct ths_device *tmdev)
+ {
+ struct device *dev = tmdev->dev;
+@@ -339,47 +344,35 @@ static int sun8i_ths_resource_init(struct ths_device *tmdev)
+ if (IS_ERR(tmdev->reset))
+ return PTR_ERR(tmdev->reset);
+
+- tmdev->bus_clk = devm_clk_get(&pdev->dev, "bus");
++ ret = reset_control_deassert(tmdev->reset);
++ if (ret)
++ return ret;
++
++ ret = devm_add_action_or_reset(dev, sun8i_ths_reset_control_assert,
++ tmdev->reset);
++ if (ret)
++ return ret;
++
++ tmdev->bus_clk = devm_clk_get_enabled(&pdev->dev, "bus");
+ if (IS_ERR(tmdev->bus_clk))
+ return PTR_ERR(tmdev->bus_clk);
+ }
+
+ if (tmdev->chip->has_mod_clk) {
+- tmdev->mod_clk = devm_clk_get(&pdev->dev, "mod");
++ tmdev->mod_clk = devm_clk_get_enabled(&pdev->dev, "mod");
+ if (IS_ERR(tmdev->mod_clk))
+ return PTR_ERR(tmdev->mod_clk);
+ }
+
+- ret = reset_control_deassert(tmdev->reset);
+- if (ret)
+- return ret;
+-
+- ret = clk_prepare_enable(tmdev->bus_clk);
+- if (ret)
+- goto assert_reset;
+-
+ ret = clk_set_rate(tmdev->mod_clk, 24000000);
+ if (ret)
+- goto bus_disable;
+-
+- ret = clk_prepare_enable(tmdev->mod_clk);
+- if (ret)
+- goto bus_disable;
++ return ret;
+
+ ret = sun8i_ths_calibrate(tmdev);
+ if (ret)
+- goto mod_disable;
++ return ret;
+
+ return 0;
+-
+-mod_disable:
+- clk_disable_unprepare(tmdev->mod_clk);
+-bus_disable:
+- clk_disable_unprepare(tmdev->bus_clk);
+-assert_reset:
+- reset_control_assert(tmdev->reset);
+-
+- return ret;
+ }
+
+ static int sun8i_h3_thermal_init(struct ths_device *tmdev)
+@@ -530,17 +523,6 @@ static int sun8i_ths_probe(struct platform_device *pdev)
+ return 0;
+ }
+
+-static int sun8i_ths_remove(struct platform_device *pdev)
+-{
+- struct ths_device *tmdev = platform_get_drvdata(pdev);
+-
+- clk_disable_unprepare(tmdev->mod_clk);
+- clk_disable_unprepare(tmdev->bus_clk);
+- reset_control_assert(tmdev->reset);
+-
+- return 0;
+-}
+-
+ static const struct ths_thermal_chip sun8i_a83t_ths = {
+ .sensor_num = 3,
+ .scale = 705,
+@@ -642,7 +624,6 @@ MODULE_DEVICE_TABLE(of, of_ths_match);
+
+ static struct platform_driver ths_driver = {
+ .probe = sun8i_ths_probe,
+- .remove = sun8i_ths_remove,
+ .driver = {
+ .name = "sun8i-thermal",
+ .of_match_table = of_ths_match,
+diff --git a/drivers/thermal/tegra/tegra30-tsensor.c b/drivers/thermal/tegra/tegra30-tsensor.c
+index 9b6b693cbcf85..05886684f4295 100644
+--- a/drivers/thermal/tegra/tegra30-tsensor.c
++++ b/drivers/thermal/tegra/tegra30-tsensor.c
+@@ -316,7 +316,7 @@ static void tegra_tsensor_get_hw_channel_trips(struct thermal_zone_device *tzd,
+ *hot_trip = 85000;
+ *crit_trip = 90000;
+
+- for (i = 0; i < tzd->trips; i++) {
++ for (i = 0; i < tzd->num_trips; i++) {
+ enum thermal_trip_type type;
+ int trip_temp;
+
+diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
+index 51374f4e1ccaf..052e8e8fbb21e 100644
+--- a/drivers/thermal/thermal_core.c
++++ b/drivers/thermal/thermal_core.c
+@@ -419,6 +419,8 @@ static void thermal_zone_device_init(struct thermal_zone_device *tz)
+ {
+ struct thermal_instance *pos;
+ tz->temperature = THERMAL_TEMP_INVALID;
++ tz->prev_low_trip = -INT_MAX;
++ tz->prev_high_trip = INT_MAX;
+ list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+ pos->initialized = false;
+ }
+@@ -501,7 +503,7 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
+
+ tz->notify_event = event;
+
+- for (count = 0; count < tz->trips; count++)
++ for (count = 0; count < tz->num_trips; count++)
+ handle_thermal_trip(tz, count);
+ }
+ EXPORT_SYMBOL_GPL(thermal_zone_device_update);
+@@ -623,10 +625,9 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+ struct thermal_instance *pos;
+ struct thermal_zone_device *pos1;
+ struct thermal_cooling_device *pos2;
+- unsigned long max_state;
+- int result, ret;
++ int result;
+
+- if (trip >= tz->trips || trip < 0)
++ if (trip >= tz->num_trips || trip < 0)
+ return -EINVAL;
+
+ list_for_each_entry(pos1, &thermal_tz_list, node) {
+@@ -641,15 +642,11 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+ if (tz != pos1 || cdev != pos2)
+ return -EINVAL;
+
+- ret = cdev->ops->get_max_state(cdev, &max_state);
+- if (ret)
+- return ret;
+-
+ /* lower default 0, upper default max_state */
+ lower = lower == THERMAL_NO_LIMIT ? 0 : lower;
+- upper = upper == THERMAL_NO_LIMIT ? max_state : upper;
++ upper = upper == THERMAL_NO_LIMIT ? cdev->max_state : upper;
+
+- if (lower > upper || upper > max_state)
++ if (lower > upper || upper > cdev->max_state)
+ return -EINVAL;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+@@ -807,7 +804,7 @@ static void __bind(struct thermal_zone_device *tz, int mask,
+ {
+ int i, ret;
+
+- for (i = 0; i < tz->trips; i++) {
++ for (i = 0; i < tz->num_trips; i++) {
+ if (mask & (1 << i)) {
+ unsigned long upper, lower;
+
+@@ -887,7 +884,7 @@ __thermal_cooling_device_register(struct device_node *np,
+ {
+ struct thermal_cooling_device *cdev;
+ struct thermal_zone_device *pos = NULL;
+- int ret;
++ int id, ret;
+
+ if (!ops || !ops->get_max_state || !ops->get_cur_state ||
+ !ops->set_cur_state)
+@@ -901,6 +898,7 @@ __thermal_cooling_device_register(struct device_node *np,
+ if (ret < 0)
+ goto out_kfree_cdev;
+ cdev->id = ret;
++ id = ret;
+
+ cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
+ if (!cdev->type) {
+@@ -915,8 +913,22 @@ __thermal_cooling_device_register(struct device_node *np,
+ cdev->updated = false;
+ cdev->device.class = &thermal_class;
+ cdev->devdata = devdata;
++
++ ret = cdev->ops->get_max_state(cdev, &cdev->max_state);
++ if (ret) {
++ kfree(cdev->type);
++ goto out_ida_remove;
++ }
++
+ thermal_cooling_device_setup_sysfs(cdev);
+- dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
++
++ ret = dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
++ if (ret) {
++ kfree(cdev->type);
++ thermal_cooling_device_destroy_sysfs(cdev);
++ goto out_ida_remove;
++ }
++
+ ret = device_register(&cdev->device);
+ if (ret)
+ goto out_kfree_type;
+@@ -939,10 +951,14 @@ __thermal_cooling_device_register(struct device_node *np,
+ return cdev;
+
+ out_kfree_type:
++ thermal_cooling_device_destroy_sysfs(cdev);
+ kfree(cdev->type);
+ put_device(&cdev->device);
++
++ /* thermal_release() takes care of the rest */
++ cdev = NULL;
+ out_ida_remove:
+- ida_simple_remove(&thermal_cdev_ida, cdev->id);
++ ida_simple_remove(&thermal_cdev_ida, id);
+ out_kfree_cdev:
+ kfree(cdev);
+ return ERR_PTR(ret);
+@@ -1047,7 +1063,7 @@ static void __unbind(struct thermal_zone_device *tz, int mask,
+ {
+ int i;
+
+- for (i = 0; i < tz->trips; i++)
++ for (i = 0; i < tz->num_trips; i++)
+ if (mask & (1 << i))
+ thermal_zone_unbind_cooling_device(tz, i, cdev);
+ }
+@@ -1152,7 +1168,7 @@ exit:
+ /**
+ * thermal_zone_device_register() - register a new thermal zone device
+ * @type: the thermal zone device type
+- * @trips: the number of trip points the thermal zone support
++ * @num_trips: the number of trip points the thermal zone support
+ * @mask: a bit string indicating the writeablility of trip points
+ * @devdata: private device data
+ * @ops: standard thermal zone device callbacks
+@@ -1174,7 +1190,7 @@ exit:
+ * IS_ERR*() helpers.
+ */
+ struct thermal_zone_device *
+-thermal_zone_device_register(const char *type, int trips, int mask,
++thermal_zone_device_register(const char *type, int num_trips, int mask,
+ void *devdata, struct thermal_zone_device_ops *ops,
+ struct thermal_zone_params *tzp, int passive_delay,
+ int polling_delay)
+@@ -1188,27 +1204,27 @@ thermal_zone_device_register(const char *type, int trips, int mask,
+ struct thermal_governor *governor;
+
+ if (!type || strlen(type) == 0) {
+- pr_err("Error: No thermal zone type defined\n");
++ pr_err("No thermal zone type defined\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (type && strlen(type) >= THERMAL_NAME_LENGTH) {
+- pr_err("Error: Thermal zone name (%s) too long, should be under %d chars\n",
++ pr_err("Thermal zone name (%s) too long, should be under %d chars\n",
+ type, THERMAL_NAME_LENGTH);
+ return ERR_PTR(-EINVAL);
+ }
+
+- if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) {
+- pr_err("Error: Incorrect number of thermal trips\n");
++ if (num_trips > THERMAL_MAX_TRIPS || num_trips < 0 || mask >> num_trips) {
++ pr_err("Incorrect number of thermal trips\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!ops) {
+- pr_err("Error: Thermal zone device ops not defined\n");
++ pr_err("Thermal zone device ops not defined\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+- if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
++ if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
+ return ERR_PTR(-EINVAL);
+
+ tz = kzalloc(sizeof(*tz), GFP_KERNEL);
+@@ -1234,7 +1250,7 @@ thermal_zone_device_register(const char *type, int trips, int mask,
+ tz->tzp = tzp;
+ tz->device.class = &thermal_class;
+ tz->devdata = devdata;
+- tz->trips = trips;
++ tz->num_trips = num_trips;
+
+ thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay);
+ thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay);
+@@ -1248,12 +1264,16 @@ thermal_zone_device_register(const char *type, int trips, int mask,
+ /* A new thermal zone needs to be updated anyway. */
+ atomic_set(&tz->need_update, 1);
+
+- dev_set_name(&tz->device, "thermal_zone%d", tz->id);
++ result = dev_set_name(&tz->device, "thermal_zone%d", tz->id);
++ if (result) {
++ thermal_zone_destroy_device_groups(tz);
++ goto remove_id;
++ }
+ result = device_register(&tz->device);
+ if (result)
+ goto release_device;
+
+- for (count = 0; count < trips; count++) {
++ for (count = 0; count < num_trips; count++) {
+ if (tz->ops->get_trip_type(tz, count, &trip_type) ||
+ tz->ops->get_trip_temp(tz, count, &trip_temp) ||
+ !trip_temp)
+diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
+index 3edd047e144f0..ee7027bdcafa8 100644
+--- a/drivers/thermal/thermal_helpers.c
++++ b/drivers/thermal/thermal_helpers.c
+@@ -90,7 +90,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)
+ ret = tz->ops->get_temp(tz, temp);
+
+ if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {
+- for (count = 0; count < tz->trips; count++) {
++ for (count = 0; count < tz->num_trips; count++) {
+ ret = tz->ops->get_trip_type(tz, count, &type);
+ if (!ret && type == THERMAL_TRIP_CRITICAL) {
+ ret = tz->ops->get_trip_temp(tz, count,
+@@ -138,7 +138,7 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz)
+ if (!tz->ops->set_trips || !tz->ops->get_trip_hyst)
+ goto exit;
+
+- for (i = 0; i < tz->trips; i++) {
++ for (i = 0; i < tz->num_trips; i++) {
+ int trip_low;
+
+ tz->ops->get_trip_temp(tz, i, &trip_temp);
+diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
+index 1234dbe958951..c70d407c2c714 100644
+--- a/drivers/thermal/thermal_netlink.c
++++ b/drivers/thermal/thermal_netlink.c
+@@ -415,14 +415,15 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p)
+
+ mutex_lock(&tz->lock);
+
+- for (i = 0; i < tz->trips; i++) {
++ for (i = 0; i < tz->num_trips; i++) {
+
+ enum thermal_trip_type type;
+- int temp, hyst;
++ int temp, hyst = 0;
+
+ tz->ops->get_trip_type(tz, i, &type);
+ tz->ops->get_trip_temp(tz, i, &temp);
+- tz->ops->get_trip_hyst(tz, i, &hyst);
++ if (tz->ops->get_trip_hyst)
++ tz->ops->get_trip_hyst(tz, i, &hyst);
+
+ if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, i) ||
+ nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, type) ||
+diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
+index 6379f26a335f6..9233f7e744544 100644
+--- a/drivers/thermal/thermal_of.c
++++ b/drivers/thermal/thermal_of.c
+@@ -89,7 +89,7 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz,
+ {
+ struct __thermal_zone *data = tz->devdata;
+
+- if (!data->ops->get_temp)
++ if (!data->ops || !data->ops->get_temp)
+ return -EINVAL;
+
+ return data->ops->get_temp(data->sensor_data, temp);
+@@ -186,6 +186,9 @@ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz,
+ {
+ struct __thermal_zone *data = tz->devdata;
+
++ if (!data->ops || !data->ops->set_emul_temp)
++ return -EINVAL;
++
+ return data->ops->set_emul_temp(data->sensor_data, temp);
+ }
+
+@@ -194,7 +197,7 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
+ {
+ struct __thermal_zone *data = tz->devdata;
+
+- if (!data->ops->get_trend)
++ if (!data->ops || !data->ops->get_trend)
+ return -EINVAL;
+
+ return data->ops->get_trend(data->sensor_data, trip, trend);
+@@ -301,7 +304,7 @@ static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip,
+ if (trip >= data->ntrips || trip < 0)
+ return -EDOM;
+
+- if (data->ops->set_trip_temp) {
++ if (data->ops && data->ops->set_trip_temp) {
+ int ret;
+
+ ret = data->ops->set_trip_temp(data->sensor_data, trip, temp);
+diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
+index 1c4aac8464a70..de7cdec3db909 100644
+--- a/drivers/thermal/thermal_sysfs.c
++++ b/drivers/thermal/thermal_sysfs.c
+@@ -416,15 +416,15 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
+ int indx;
+
+ /* This function works only for zones with at least one trip */
+- if (tz->trips <= 0)
++ if (tz->num_trips <= 0)
+ return -EINVAL;
+
+- tz->trip_type_attrs = kcalloc(tz->trips, sizeof(*tz->trip_type_attrs),
++ tz->trip_type_attrs = kcalloc(tz->num_trips, sizeof(*tz->trip_type_attrs),
+ GFP_KERNEL);
+ if (!tz->trip_type_attrs)
+ return -ENOMEM;
+
+- tz->trip_temp_attrs = kcalloc(tz->trips, sizeof(*tz->trip_temp_attrs),
++ tz->trip_temp_attrs = kcalloc(tz->num_trips, sizeof(*tz->trip_temp_attrs),
+ GFP_KERNEL);
+ if (!tz->trip_temp_attrs) {
+ kfree(tz->trip_type_attrs);
+@@ -432,7 +432,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
+ }
+
+ if (tz->ops->get_trip_hyst) {
+- tz->trip_hyst_attrs = kcalloc(tz->trips,
++ tz->trip_hyst_attrs = kcalloc(tz->num_trips,
+ sizeof(*tz->trip_hyst_attrs),
+ GFP_KERNEL);
+ if (!tz->trip_hyst_attrs) {
+@@ -442,7 +442,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
+ }
+ }
+
+- attrs = kcalloc(tz->trips * 3 + 1, sizeof(*attrs), GFP_KERNEL);
++ attrs = kcalloc(tz->num_trips * 3 + 1, sizeof(*attrs), GFP_KERNEL);
+ if (!attrs) {
+ kfree(tz->trip_type_attrs);
+ kfree(tz->trip_temp_attrs);
+@@ -451,7 +451,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
+ return -ENOMEM;
+ }
+
+- for (indx = 0; indx < tz->trips; indx++) {
++ for (indx = 0; indx < tz->num_trips; indx++) {
+ /* create trip type attribute */
+ snprintf(tz->trip_type_attrs[indx].name, THERMAL_NAME_LENGTH,
+ "trip_point_%d_type", indx);
+@@ -478,7 +478,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
+ tz->trip_temp_attrs[indx].attr.store =
+ trip_point_temp_store;
+ }
+- attrs[indx + tz->trips] = &tz->trip_temp_attrs[indx].attr.attr;
++ attrs[indx + tz->num_trips] = &tz->trip_temp_attrs[indx].attr.attr;
+
+ /* create Optional trip hyst attribute */
+ if (!tz->ops->get_trip_hyst)
+@@ -496,10 +496,10 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
+ tz->trip_hyst_attrs[indx].attr.store =
+ trip_point_hyst_store;
+ }
+- attrs[indx + tz->trips * 2] =
++ attrs[indx + tz->num_trips * 2] =
+ &tz->trip_hyst_attrs[indx].attr.attr;
+ }
+- attrs[tz->trips * 3] = NULL;
++ attrs[tz->num_trips * 3] = NULL;
+
+ tz->trips_attribute_group.attrs = attrs;
+
+@@ -540,7 +540,7 @@ int thermal_zone_create_device_groups(struct thermal_zone_device *tz,
+ for (i = 0; i < size - 2; i++)
+ groups[i] = thermal_zone_attribute_groups[i];
+
+- if (tz->trips) {
++ if (tz->num_trips) {
+ result = create_trip_attrs(tz, mask);
+ if (result) {
+ kfree(groups);
+@@ -561,7 +561,7 @@ void thermal_zone_destroy_device_groups(struct thermal_zone_device *tz)
+ if (!tz)
+ return;
+
+- if (tz->trips)
++ if (tz->num_trips)
+ destroy_trip_attrs(tz);
+
+ kfree(tz->device.groups);
+@@ -580,13 +580,8 @@ static ssize_t max_state_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+ struct thermal_cooling_device *cdev = to_cooling_device(dev);
+- unsigned long state;
+- int ret;
+
+- ret = cdev->ops->get_max_state(cdev, &state);
+- if (ret)
+- return ret;
+- return sprintf(buf, "%ld\n", state);
++ return sprintf(buf, "%ld\n", cdev->max_state);
+ }
+
+ static ssize_t cur_state_show(struct device *dev, struct device_attribute *attr,
+@@ -616,6 +611,10 @@ cur_state_store(struct device *dev, struct device_attribute *attr,
+ if ((long)state < 0)
+ return -EINVAL;
+
++ /* Requested state should be less than max_state + 1 */
++ if (state > cdev->max_state)
++ return -EINVAL;
++
+ mutex_lock(&cdev->lock);
+
+ result = cdev->ops->set_cur_state(cdev, state);
+@@ -813,12 +812,13 @@ static const struct attribute_group cooling_device_stats_attr_group = {
+
+ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+ {
++ const struct attribute_group *stats_attr_group = NULL;
+ struct cooling_dev_stats *stats;
+ unsigned long states;
+ int var;
+
+ if (cdev->ops->get_max_state(cdev, &states))
+- return;
++ goto out;
+
+ states++; /* Total number of states is highest state + 1 */
+
+@@ -828,7 +828,7 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+
+ stats = kzalloc(var, GFP_KERNEL);
+ if (!stats)
+- return;
++ goto out;
+
+ stats->time_in_state = (ktime_t *)(stats + 1);
+ stats->trans_table = (unsigned int *)(stats->time_in_state + states);
+@@ -838,9 +838,12 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+
+ spin_lock_init(&stats->lock);
+
++ stats_attr_group = &cooling_device_stats_attr_group;
++
++out:
+ /* Fill the empty slot left in cooling_device_attr_groups */
+ var = ARRAY_SIZE(cooling_device_attr_groups) - 2;
+- cooling_device_attr_groups[var] = &cooling_device_stats_attr_group;
++ cooling_device_attr_groups[var] = stats_attr_group;
+ }
+
+ static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev)
+diff --git a/drivers/thunderbolt/acpi.c b/drivers/thunderbolt/acpi.c
+index b67e72d5644b3..7c9597a339295 100644
+--- a/drivers/thunderbolt/acpi.c
++++ b/drivers/thunderbolt/acpi.c
+@@ -7,6 +7,7 @@
+ */
+
+ #include <linux/acpi.h>
++#include <linux/pm_runtime.h>
+
+ #include "tb.h"
+
+@@ -74,8 +75,18 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data,
+ pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM))) {
+ const struct device_link *link;
+
++ /*
++ * Make them both active first to make sure the NHI does
++ * not runtime suspend before the consumer. The
++ * pm_runtime_put() below then allows the consumer to
++ * runtime suspend again (which then allows NHI runtime
++ * suspend too now that the device link is established).
++ */
++ pm_runtime_get_sync(&pdev->dev);
++
+ link = device_link_add(&pdev->dev, &nhi->pdev->dev,
+ DL_FLAG_AUTOREMOVE_SUPPLIER |
++ DL_FLAG_RPM_ACTIVE |
+ DL_FLAG_PM_RUNTIME);
+ if (link) {
+ dev_dbg(&nhi->pdev->dev, "created link from %s\n",
+@@ -84,6 +95,8 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data,
+ dev_warn(&nhi->pdev->dev, "device link creation from %s failed\n",
+ dev_name(&pdev->dev));
+ }
++
++ pm_runtime_put(&pdev->dev);
+ }
+
+ out_put:
+diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c
+index 0fb5e04191e24..409ee1551a7cf 100644
+--- a/drivers/thunderbolt/ctl.c
++++ b/drivers/thunderbolt/ctl.c
+@@ -408,7 +408,7 @@ static void tb_ctl_rx_submit(struct ctl_pkg *pkg)
+
+ static int tb_async_error(const struct ctl_pkg *pkg)
+ {
+- const struct cfg_error_pkg *error = (const struct cfg_error_pkg *)pkg;
++ const struct cfg_error_pkg *error = pkg->buffer;
+
+ if (pkg->frame.eof != TB_CFG_PKG_ERROR)
+ return false;
+diff --git a/drivers/thunderbolt/dma_test.c b/drivers/thunderbolt/dma_test.c
+index 3bedecb236e0d..14bb6dec6c4b0 100644
+--- a/drivers/thunderbolt/dma_test.c
++++ b/drivers/thunderbolt/dma_test.c
+@@ -192,9 +192,9 @@ static int dma_test_start_rings(struct dma_test *dt)
+ }
+
+ ret = tb_xdomain_enable_paths(dt->xd, dt->tx_hopid,
+- dt->tx_ring ? dt->tx_ring->hop : 0,
++ dt->tx_ring ? dt->tx_ring->hop : -1,
+ dt->rx_hopid,
+- dt->rx_ring ? dt->rx_ring->hop : 0);
++ dt->rx_ring ? dt->rx_ring->hop : -1);
+ if (ret) {
+ dma_test_free_rings(dt);
+ return ret;
+@@ -218,9 +218,9 @@ static void dma_test_stop_rings(struct dma_test *dt)
+ tb_ring_stop(dt->tx_ring);
+
+ ret = tb_xdomain_disable_paths(dt->xd, dt->tx_hopid,
+- dt->tx_ring ? dt->tx_ring->hop : 0,
++ dt->tx_ring ? dt->tx_ring->hop : -1,
+ dt->rx_hopid,
+- dt->rx_ring ? dt->rx_ring->hop : 0);
++ dt->rx_ring ? dt->rx_ring->hop : -1);
+ if (ret)
+ dev_warn(&dt->svc->dev, "failed to disable DMA paths\n");
+
+diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
+index 6255f1ef95994..69eead8a6015c 100644
+--- a/drivers/thunderbolt/icm.c
++++ b/drivers/thunderbolt/icm.c
+@@ -2522,6 +2522,7 @@ struct tb *icm_probe(struct tb_nhi *nhi)
+ tb->cm_ops = &icm_icl_ops;
+ break;
+
++ case PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_2C_NHI:
+ case PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_4C_NHI:
+ icm->is_supported = icm_tgl_is_supported;
+ icm->get_mode = icm_ar_get_mode;
+diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
+index c73da0532be4f..7341376140eb7 100644
+--- a/drivers/thunderbolt/nhi.c
++++ b/drivers/thunderbolt/nhi.c
+@@ -25,7 +25,11 @@
+ #define RING_TYPE(ring) ((ring)->is_tx ? "TX ring" : "RX ring")
+
+ #define RING_FIRST_USABLE_HOPID 1
+-
++/*
++ * Used with QUIRK_E2E to specify an unused HopID the Rx credits are
++ * transferred.
++ */
++#define RING_E2E_RESERVED_HOPID RING_FIRST_USABLE_HOPID
+ /*
+ * Minimal number of vectors when we use MSI-X. Two for control channel
+ * Rx/Tx and the rest four are for cross domain DMA paths.
+@@ -35,9 +39,11 @@
+
+ #define NHI_MAILBOX_TIMEOUT 500 /* ms */
+
++/* Host interface quirks */
+ #define QUIRK_AUTO_CLEAR_INT BIT(0)
++#define QUIRK_E2E BIT(1)
+
+-static int ring_interrupt_index(struct tb_ring *ring)
++static int ring_interrupt_index(const struct tb_ring *ring)
+ {
+ int bit = ring->hop;
+ if (!ring->is_tx)
+@@ -45,6 +51,26 @@ static int ring_interrupt_index(struct tb_ring *ring)
+ return bit;
+ }
+
++static void nhi_mask_interrupt(struct tb_nhi *nhi, int mask, int ring)
++{
++ if (nhi->quirks & QUIRK_AUTO_CLEAR_INT) {
++ u32 val;
++
++ val = ioread32(nhi->iobase + REG_RING_INTERRUPT_BASE + ring);
++ iowrite32(val & ~mask, nhi->iobase + REG_RING_INTERRUPT_BASE + ring);
++ } else {
++ iowrite32(mask, nhi->iobase + REG_RING_INTERRUPT_MASK_CLEAR_BASE + ring);
++ }
++}
++
++static void nhi_clear_interrupt(struct tb_nhi *nhi, int ring)
++{
++ if (nhi->quirks & QUIRK_AUTO_CLEAR_INT)
++ ioread32(nhi->iobase + REG_RING_NOTIFY_BASE + ring);
++ else
++ iowrite32(~0, nhi->iobase + REG_RING_INT_CLEAR + ring);
++}
++
+ /*
+ * ring_interrupt_active() - activate/deactivate interrupts for a single ring
+ *
+@@ -52,15 +78,16 @@ static int ring_interrupt_index(struct tb_ring *ring)
+ */
+ static void ring_interrupt_active(struct tb_ring *ring, bool active)
+ {
+- int reg = REG_RING_INTERRUPT_BASE +
+- ring_interrupt_index(ring) / 32 * 4;
+- int bit = ring_interrupt_index(ring) & 31;
+- int mask = 1 << bit;
++ int index = ring_interrupt_index(ring) / 32 * 4;
++ int reg = REG_RING_INTERRUPT_BASE + index;
++ int interrupt_bit = ring_interrupt_index(ring) & 31;
++ int mask = 1 << interrupt_bit;
+ u32 old, new;
+
+ if (ring->irq > 0) {
+ u32 step, shift, ivr, misc;
+ void __iomem *ivr_base;
++ int auto_clear_bit;
+ int index;
+
+ if (ring->is_tx)
+@@ -68,18 +95,25 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active)
+ else
+ index = ring->hop + ring->nhi->hop_count;
+
+- if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) {
+- /*
+- * Ask the hardware to clear interrupt status
+- * bits automatically since we already know
+- * which interrupt was triggered.
+- */
+- misc = ioread32(ring->nhi->iobase + REG_DMA_MISC);
+- if (!(misc & REG_DMA_MISC_INT_AUTO_CLEAR)) {
+- misc |= REG_DMA_MISC_INT_AUTO_CLEAR;
+- iowrite32(misc, ring->nhi->iobase + REG_DMA_MISC);
+- }
+- }
++ /*
++ * Intel routers support a bit that isn't part of
++ * the USB4 spec to ask the hardware to clear
++ * interrupt status bits automatically since
++ * we already know which interrupt was triggered.
++ *
++ * Other routers explicitly disable auto-clear
++ * to prevent conditions that may occur where two
++ * MSIX interrupts are simultaneously active and
++ * reading the register clears both of them.
++ */
++ misc = ioread32(ring->nhi->iobase + REG_DMA_MISC);
++ if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT)
++ auto_clear_bit = REG_DMA_MISC_INT_AUTO_CLEAR;
++ else
++ auto_clear_bit = REG_DMA_MISC_DISABLE_AUTO_CLEAR;
++ if (!(misc & auto_clear_bit))
++ iowrite32(misc | auto_clear_bit,
++ ring->nhi->iobase + REG_DMA_MISC);
+
+ ivr_base = ring->nhi->iobase + REG_INT_VEC_ALLOC_BASE;
+ step = index / REG_INT_VEC_ALLOC_REGS * REG_INT_VEC_ALLOC_BITS;
+@@ -99,14 +133,18 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active)
+
+ dev_dbg(&ring->nhi->pdev->dev,
+ "%s interrupt at register %#x bit %d (%#x -> %#x)\n",
+- active ? "enabling" : "disabling", reg, bit, old, new);
++ active ? "enabling" : "disabling", reg, interrupt_bit, old, new);
+
+ if (new == old)
+ dev_WARN(&ring->nhi->pdev->dev,
+ "interrupt for %s %d is already %s\n",
+ RING_TYPE(ring), ring->hop,
+ active ? "enabled" : "disabled");
+- iowrite32(new, ring->nhi->iobase + reg);
++
++ if (active)
++ iowrite32(new, ring->nhi->iobase + reg);
++ else
++ nhi_mask_interrupt(ring->nhi, mask, index);
+ }
+
+ /*
+@@ -119,11 +157,11 @@ static void nhi_disable_interrupts(struct tb_nhi *nhi)
+ int i = 0;
+ /* disable interrupts */
+ for (i = 0; i < RING_INTERRUPT_REG_COUNT(nhi); i++)
+- iowrite32(0, nhi->iobase + REG_RING_INTERRUPT_BASE + 4 * i);
++ nhi_mask_interrupt(nhi, ~0, 4 * i);
+
+ /* clear interrupt status bits */
+ for (i = 0; i < RING_NOTIFY_REG_COUNT(nhi); i++)
+- ioread32(nhi->iobase + REG_RING_NOTIFY_BASE + 4 * i);
++ nhi_clear_interrupt(nhi, 4 * i);
+ }
+
+ /* ring helper methods */
+@@ -384,14 +422,17 @@ EXPORT_SYMBOL_GPL(tb_ring_poll_complete);
+
+ static void ring_clear_msix(const struct tb_ring *ring)
+ {
++ int bit;
++
+ if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT)
+ return;
+
++ bit = ring_interrupt_index(ring) & 31;
+ if (ring->is_tx)
+- ioread32(ring->nhi->iobase + REG_RING_NOTIFY_BASE);
++ iowrite32(BIT(bit), ring->nhi->iobase + REG_RING_INT_CLEAR);
+ else
+- ioread32(ring->nhi->iobase + REG_RING_NOTIFY_BASE +
+- 4 * (ring->nhi->hop_count / 32));
++ iowrite32(BIT(bit), ring->nhi->iobase + REG_RING_INT_CLEAR +
++ 4 * (ring->nhi->hop_count / 32));
+ }
+
+ static irqreturn_t ring_msix(int irq, void *data)
+@@ -455,8 +496,18 @@ static void ring_release_msix(struct tb_ring *ring)
+
+ static int nhi_alloc_hop(struct tb_nhi *nhi, struct tb_ring *ring)
+ {
++ unsigned int start_hop = RING_FIRST_USABLE_HOPID;
+ int ret = 0;
+
++ if (nhi->quirks & QUIRK_E2E) {
++ start_hop = RING_FIRST_USABLE_HOPID + 1;
++ if (ring->flags & RING_FLAG_E2E && !ring->is_tx) {
++ dev_dbg(&nhi->pdev->dev, "quirking E2E TX HopID %u -> %u\n",
++ ring->e2e_tx_hop, RING_E2E_RESERVED_HOPID);
++ ring->e2e_tx_hop = RING_E2E_RESERVED_HOPID;
++ }
++ }
++
+ spin_lock_irq(&nhi->lock);
+
+ if (ring->hop < 0) {
+@@ -466,7 +517,7 @@ static int nhi_alloc_hop(struct tb_nhi *nhi, struct tb_ring *ring)
+ * Automatically allocate HopID from the non-reserved
+ * range 1 .. hop_count - 1.
+ */
+- for (i = RING_FIRST_USABLE_HOPID; i < nhi->hop_count; i++) {
++ for (i = start_hop; i < nhi->hop_count; i++) {
+ if (ring->is_tx) {
+ if (!nhi->tx_rings[i]) {
+ ring->hop = i;
+@@ -481,6 +532,11 @@ static int nhi_alloc_hop(struct tb_nhi *nhi, struct tb_ring *ring)
+ }
+ }
+
++ if (ring->hop > 0 && ring->hop < start_hop) {
++ dev_warn(&nhi->pdev->dev, "invalid hop: %d\n", ring->hop);
++ ret = -EINVAL;
++ goto err_unlock;
++ }
+ if (ring->hop < 0 || ring->hop >= nhi->hop_count) {
+ dev_warn(&nhi->pdev->dev, "invalid hop: %d\n", ring->hop);
+ ret = -EINVAL;
+@@ -1094,12 +1150,26 @@ static void nhi_shutdown(struct tb_nhi *nhi)
+
+ static void nhi_check_quirks(struct tb_nhi *nhi)
+ {
+- /*
+- * Intel hardware supports auto clear of the interrupt status
+- * reqister right after interrupt is being issued.
+- */
+- if (nhi->pdev->vendor == PCI_VENDOR_ID_INTEL)
++ if (nhi->pdev->vendor == PCI_VENDOR_ID_INTEL) {
++ /*
++ * Intel hardware supports auto clear of the interrupt
++ * status register right after interrupt is being
++ * issued.
++ */
+ nhi->quirks |= QUIRK_AUTO_CLEAR_INT;
++
++ switch (nhi->pdev->device) {
++ case PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI:
++ case PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI:
++ /*
++ * Falcon Ridge controller needs the end-to-end
++ * flow control workaround to avoid losing Rx
++ * packets when RING_FLAG_E2E is set.
++ */
++ nhi->quirks |= QUIRK_E2E;
++ break;
++ }
++ }
+ }
+
+ static int nhi_init_msi(struct tb_nhi *nhi)
+diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
+index 69083aab2736c..5091677b3f4ba 100644
+--- a/drivers/thunderbolt/nhi.h
++++ b/drivers/thunderbolt/nhi.h
+@@ -55,6 +55,7 @@ extern const struct tb_nhi_ops icl_nhi_ops;
+ * need for the PCI quirk anymore as we will use ICM also on Apple
+ * hardware.
+ */
++#define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_2C_NHI 0x1134
+ #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_4C_NHI 0x1137
+ #define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_NHI 0x157d
+ #define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_BRIDGE 0x157e
+diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h
+index 0d4970dcef842..6ba2958154770 100644
+--- a/drivers/thunderbolt/nhi_regs.h
++++ b/drivers/thunderbolt/nhi_regs.h
+@@ -77,12 +77,13 @@ struct ring_desc {
+
+ /*
+ * three bitfields: tx, rx, rx overflow
+- * Every bitfield contains one bit for every hop (REG_HOP_COUNT). Registers are
+- * cleared on read. New interrupts are fired only after ALL registers have been
++ * Every bitfield contains one bit for every hop (REG_HOP_COUNT).
++ * New interrupts are fired only after ALL registers have been
+ * read (even those containing only disabled rings).
+ */
+ #define REG_RING_NOTIFY_BASE 0x37800
+ #define RING_NOTIFY_REG_COUNT(nhi) ((31 + 3 * nhi->hop_count) / 32)
++#define REG_RING_INT_CLEAR 0x37808
+
+ /*
+ * two bitfields: rx, tx
+@@ -92,6 +93,8 @@ struct ring_desc {
+ #define REG_RING_INTERRUPT_BASE 0x38200
+ #define RING_INTERRUPT_REG_COUNT(nhi) ((31 + 2 * nhi->hop_count) / 32)
+
++#define REG_RING_INTERRUPT_MASK_CLEAR_BASE 0x38208
++
+ #define REG_INT_THROTTLING_RATE 0x38c00
+
+ /* Interrupt Vector Allocation */
+@@ -105,6 +108,7 @@ struct ring_desc {
+
+ #define REG_DMA_MISC 0x39864
+ #define REG_DMA_MISC_INT_AUTO_CLEAR BIT(2)
++#define REG_DMA_MISC_DISABLE_AUTO_CLEAR BIT(17)
+
+ #define REG_INMAIL_DATA 0x39900
+
+diff --git a/drivers/thunderbolt/path.c b/drivers/thunderbolt/path.c
+index 564e2f42cebd9..299712accfe9b 100644
+--- a/drivers/thunderbolt/path.c
++++ b/drivers/thunderbolt/path.c
+@@ -85,11 +85,12 @@ static int tb_path_find_src_hopid(struct tb_port *src,
+ * @dst_hopid: HopID to the @dst (%-1 if don't care)
+ * @last: Last port is filled here if not %NULL
+ * @name: Name of the path
++ * @alloc_hopid: Allocate HopIDs for the ports
+ *
+ * Follows a path starting from @src and @src_hopid to the last output
+- * port of the path. Allocates HopIDs for the visited ports. Call
+- * tb_path_free() to release the path and allocated HopIDs when the path
+- * is not needed anymore.
++ * port of the path. Allocates HopIDs for the visited ports (if
++ * @alloc_hopid is true). Call tb_path_free() to release the path and
++ * allocated HopIDs when the path is not needed anymore.
+ *
+ * Note function discovers also incomplete paths so caller should check
+ * that the @dst port is the expected one. If it is not, the path can be
+@@ -99,7 +100,8 @@ static int tb_path_find_src_hopid(struct tb_port *src,
+ */
+ struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid,
+ struct tb_port *dst, int dst_hopid,
+- struct tb_port **last, const char *name)
++ struct tb_port **last, const char *name,
++ bool alloc_hopid)
+ {
+ struct tb_port *out_port;
+ struct tb_regs_hop hop;
+@@ -156,6 +158,7 @@ struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid,
+ path->tb = src->sw->tb;
+ path->path_length = num_hops;
+ path->activated = true;
++ path->alloc_hopid = alloc_hopid;
+
+ path->hops = kcalloc(num_hops, sizeof(*path->hops), GFP_KERNEL);
+ if (!path->hops) {
+@@ -177,13 +180,14 @@ struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid,
+ goto err;
+ }
+
+- if (tb_port_alloc_in_hopid(p, h, h) < 0)
++ if (alloc_hopid && tb_port_alloc_in_hopid(p, h, h) < 0)
+ goto err;
+
+ out_port = &sw->ports[hop.out_port];
+ next_hop = hop.next_hop;
+
+- if (tb_port_alloc_out_hopid(out_port, next_hop, next_hop) < 0) {
++ if (alloc_hopid &&
++ tb_port_alloc_out_hopid(out_port, next_hop, next_hop) < 0) {
+ tb_port_release_in_hopid(p, h);
+ goto err;
+ }
+@@ -263,6 +267,8 @@ struct tb_path *tb_path_alloc(struct tb *tb, struct tb_port *src, int src_hopid,
+ return NULL;
+ }
+
++ path->alloc_hopid = true;
++
+ in_hopid = src_hopid;
+ out_port = NULL;
+
+@@ -345,17 +351,19 @@ err:
+ */
+ void tb_path_free(struct tb_path *path)
+ {
+- int i;
+-
+- for (i = 0; i < path->path_length; i++) {
+- const struct tb_path_hop *hop = &path->hops[i];
+-
+- if (hop->in_port)
+- tb_port_release_in_hopid(hop->in_port,
+- hop->in_hop_index);
+- if (hop->out_port)
+- tb_port_release_out_hopid(hop->out_port,
+- hop->next_hop_index);
++ if (path->alloc_hopid) {
++ int i;
++
++ for (i = 0; i < path->path_length; i++) {
++ const struct tb_path_hop *hop = &path->hops[i];
++
++ if (hop->in_port)
++ tb_port_release_in_hopid(hop->in_port,
++ hop->in_hop_index);
++ if (hop->out_port)
++ tb_port_release_out_hopid(hop->out_port,
++ hop->next_hop_index);
++ }
+ }
+
+ kfree(path->hops);
+diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c
+index 722694052f4a8..1b7ab0bbd1328 100644
+--- a/drivers/thunderbolt/retimer.c
++++ b/drivers/thunderbolt/retimer.c
+@@ -208,6 +208,37 @@ static ssize_t nvm_authenticate_show(struct device *dev,
+ return ret;
+ }
+
++static void tb_retimer_nvm_authenticate_status(struct tb_port *port, u32 *status)
++{
++ int i;
++
++ tb_port_dbg(port, "reading NVM authentication status of retimers\n");
++
++ /*
++ * Before doing anything else, read the authentication status.
++ * If the retimer has it set, store it for the new retimer
++ * device instance.
++ */
++ for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++)
++ usb4_port_retimer_nvm_authenticate_status(port, i, &status[i]);
++}
++
++static void tb_retimer_set_inbound_sbtx(struct tb_port *port)
++{
++ int i;
++
++ for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++)
++ usb4_port_retimer_set_inbound_sbtx(port, i);
++}
++
++static void tb_retimer_unset_inbound_sbtx(struct tb_port *port)
++{
++ int i;
++
++ for (i = TB_MAX_RETIMER_INDEX; i >= 1; i--)
++ usb4_port_retimer_unset_inbound_sbtx(port, i);
++}
++
+ static ssize_t nvm_authenticate_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+ {
+@@ -234,6 +265,7 @@ static ssize_t nvm_authenticate_store(struct device *dev,
+ rt->auth_status = 0;
+
+ if (val) {
++ tb_retimer_set_inbound_sbtx(rt->port);
+ if (val == AUTHENTICATE_ONLY) {
+ ret = tb_retimer_nvm_authenticate(rt, true);
+ } else {
+@@ -253,6 +285,7 @@ static ssize_t nvm_authenticate_store(struct device *dev,
+ }
+
+ exit_unlock:
++ tb_retimer_unset_inbound_sbtx(rt->port);
+ mutex_unlock(&rt->tb->lock);
+ exit_rpm:
+ pm_runtime_mark_last_busy(&rt->dev);
+@@ -463,19 +496,16 @@ int tb_retimer_scan(struct tb_port *port, bool add)
+ return ret;
+
+ /*
+- * Enable sideband channel for each retimer. We can do this
+- * regardless whether there is device connected or not.
++ * Immediately after sending enumerate retimers read the
++ * authentication status of each retimer.
+ */
+- for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++)
+- usb4_port_retimer_set_inbound_sbtx(port, i);
++ tb_retimer_nvm_authenticate_status(port, status);
+
+ /*
+- * Before doing anything else, read the authentication status.
+- * If the retimer has it set, store it for the new retimer
+- * device instance.
++ * Enable sideband channel for each retimer. We can do this
++ * regardless whether there is device connected or not.
+ */
+- for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++)
+- usb4_port_retimer_nvm_authenticate_status(port, i, &status[i]);
++ tb_retimer_set_inbound_sbtx(port);
+
+ for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++) {
+ /*
+@@ -490,6 +520,8 @@ int tb_retimer_scan(struct tb_port *port, bool add)
+ break;
+ }
+
++ tb_retimer_unset_inbound_sbtx(port);
++
+ if (!last_idx)
+ return 0;
+
+diff --git a/drivers/thunderbolt/sb_regs.h b/drivers/thunderbolt/sb_regs.h
+index bda889ff3bda5..a8a35b04035be 100644
+--- a/drivers/thunderbolt/sb_regs.h
++++ b/drivers/thunderbolt/sb_regs.h
+@@ -20,6 +20,7 @@ enum usb4_sb_opcode {
+ USB4_SB_OPCODE_ROUTER_OFFLINE = 0x4e45534c, /* "LSEN" */
+ USB4_SB_OPCODE_ENUMERATE_RETIMERS = 0x4d554e45, /* "ENUM" */
+ USB4_SB_OPCODE_SET_INBOUND_SBTX = 0x5055534c, /* "LSUP" */
++ USB4_SB_OPCODE_UNSET_INBOUND_SBTX = 0x50555355, /* "USUP" */
+ USB4_SB_OPCODE_QUERY_LAST_RETIMER = 0x5453414c, /* "LAST" */
+ USB4_SB_OPCODE_GET_NVM_SECTOR_SIZE = 0x53534e47, /* "GNSS" */
+ USB4_SB_OPCODE_NVM_SET_OFFSET = 0x53504f42, /* "BOPS" */
+diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
+index 3014146081c19..8cc9e8c55e402 100644
+--- a/drivers/thunderbolt/switch.c
++++ b/drivers/thunderbolt/switch.c
+@@ -2281,6 +2281,7 @@ int tb_switch_configure(struct tb_switch *sw)
+ * additional capabilities.
+ */
+ sw->config.cmuv = USB4_VERSION_1_0;
++ sw->config.plug_events_delay = 0xa;
+
+ /* Enumerate the switch */
+ ret = tb_sw_write(sw, (u32 *)&sw->config + 1, TB_CFG_SWITCH,
+@@ -2689,6 +2690,26 @@ static void tb_switch_credits_init(struct tb_switch *sw)
+ tb_sw_info(sw, "failed to determine preferred buffer allocation, using defaults\n");
+ }
+
++static int tb_switch_port_hotplug_enable(struct tb_switch *sw)
++{
++ struct tb_port *port;
++
++ if (tb_switch_is_icm(sw))
++ return 0;
++
++ tb_switch_for_each_port(sw, port) {
++ int res;
++
++ if (!port->cap_usb4)
++ continue;
++
++ res = usb4_port_hotplug_enable(port);
++ if (res)
++ return res;
++ }
++ return 0;
++}
++
+ /**
+ * tb_switch_add() - Add a switch to the domain
+ * @sw: Switch to add
+@@ -2729,8 +2750,6 @@ int tb_switch_add(struct tb_switch *sw)
+ }
+ tb_sw_dbg(sw, "uid: %#llx\n", sw->uid);
+
+- tb_check_quirks(sw);
+-
+ ret = tb_switch_set_uuid(sw);
+ if (ret) {
+ dev_err(&sw->dev, "failed to set UUID\n");
+@@ -2749,6 +2768,8 @@ int tb_switch_add(struct tb_switch *sw)
+ }
+ }
+
++ tb_check_quirks(sw);
++
+ tb_switch_default_link_ports(sw);
+
+ ret = tb_switch_update_link_attributes(sw);
+@@ -2760,6 +2781,10 @@ int tb_switch_add(struct tb_switch *sw)
+ return ret;
+ }
+
++ ret = tb_switch_port_hotplug_enable(sw);
++ if (ret)
++ return ret;
++
+ ret = device_add(&sw->dev);
+ if (ret) {
+ dev_err(&sw->dev, "failed to add device: %d\n", ret);
+diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
+index 2897a77d44c34..0c3e1d14cddca 100644
+--- a/drivers/thunderbolt/tb.c
++++ b/drivers/thunderbolt/tb.c
+@@ -105,10 +105,37 @@ static void tb_remove_dp_resources(struct tb_switch *sw)
+ }
+ }
+
+-static void tb_discover_tunnels(struct tb_switch *sw)
++static void tb_discover_dp_resource(struct tb *tb, struct tb_port *port)
+ {
+- struct tb *tb = sw->tb;
+ struct tb_cm *tcm = tb_priv(tb);
++ struct tb_port *p;
++
++ list_for_each_entry(p, &tcm->dp_resources, list) {
++ if (p == port)
++ return;
++ }
++
++ tb_port_dbg(port, "DP %s resource available discovered\n",
++ tb_port_is_dpin(port) ? "IN" : "OUT");
++ list_add_tail(&port->list, &tcm->dp_resources);
++}
++
++static void tb_discover_dp_resources(struct tb *tb)
++{
++ struct tb_cm *tcm = tb_priv(tb);
++ struct tb_tunnel *tunnel;
++
++ list_for_each_entry(tunnel, &tcm->tunnel_list, list) {
++ if (tb_tunnel_is_dp(tunnel))
++ tb_discover_dp_resource(tb, tunnel->dst_port);
++ }
++}
++
++static void tb_switch_discover_tunnels(struct tb_switch *sw,
++ struct list_head *list,
++ bool alloc_hopids)
++{
++ struct tb *tb = sw->tb;
+ struct tb_port *port;
+
+ tb_switch_for_each_port(sw, port) {
+@@ -116,24 +143,41 @@ static void tb_discover_tunnels(struct tb_switch *sw)
+
+ switch (port->config.type) {
+ case TB_TYPE_DP_HDMI_IN:
+- tunnel = tb_tunnel_discover_dp(tb, port);
++ tunnel = tb_tunnel_discover_dp(tb, port, alloc_hopids);
+ break;
+
+ case TB_TYPE_PCIE_DOWN:
+- tunnel = tb_tunnel_discover_pci(tb, port);
++ tunnel = tb_tunnel_discover_pci(tb, port, alloc_hopids);
+ break;
+
+ case TB_TYPE_USB3_DOWN:
+- tunnel = tb_tunnel_discover_usb3(tb, port);
++ tunnel = tb_tunnel_discover_usb3(tb, port, alloc_hopids);
+ break;
+
+ default:
+ break;
+ }
+
+- if (!tunnel)
+- continue;
++ if (tunnel)
++ list_add_tail(&tunnel->list, list);
++ }
+
++ tb_switch_for_each_port(sw, port) {
++ if (tb_port_has_remote(port)) {
++ tb_switch_discover_tunnels(port->remote->sw, list,
++ alloc_hopids);
++ }
++ }
++}
++
++static void tb_discover_tunnels(struct tb *tb)
++{
++ struct tb_cm *tcm = tb_priv(tb);
++ struct tb_tunnel *tunnel;
++
++ tb_switch_discover_tunnels(tb->root_switch, &tcm->tunnel_list, true);
++
++ list_for_each_entry(tunnel, &tcm->tunnel_list, list) {
+ if (tb_tunnel_is_pci(tunnel)) {
+ struct tb_switch *parent = tunnel->dst_port->sw;
+
+@@ -146,13 +190,6 @@ static void tb_discover_tunnels(struct tb_switch *sw)
+ pm_runtime_get_sync(&tunnel->src_port->sw->dev);
+ pm_runtime_get_sync(&tunnel->dst_port->sw->dev);
+ }
+-
+- list_add_tail(&tunnel->list, &tcm->tunnel_list);
+- }
+-
+- tb_switch_for_each_port(sw, port) {
+- if (tb_port_has_remote(port))
+- tb_discover_tunnels(port->remote->sw);
+ }
+ }
+
+@@ -851,7 +888,7 @@ static struct tb_port *tb_find_dp_out(struct tb *tb, struct tb_port *in)
+
+ static void tb_tunnel_dp(struct tb *tb)
+ {
+- int available_up, available_down, ret;
++ int available_up, available_down, ret, link_nr;
+ struct tb_cm *tcm = tb_priv(tb);
+ struct tb_port *port, *in, *out;
+ struct tb_tunnel *tunnel;
+@@ -896,6 +933,20 @@ static void tb_tunnel_dp(struct tb *tb)
+ return;
+ }
+
++ /*
++ * This is only applicable to links that are not bonded (so
++ * when Thunderbolt 1 hardware is involved somewhere in the
++ * topology). For these try to share the DP bandwidth between
++ * the two lanes.
++ */
++ link_nr = 1;
++ list_for_each_entry(tunnel, &tcm->tunnel_list, list) {
++ if (tb_tunnel_is_dp(tunnel)) {
++ link_nr = 0;
++ break;
++ }
++ }
++
+ /*
+ * DP stream needs the domain to be active so runtime resume
+ * both ends of the tunnel.
+@@ -927,7 +978,8 @@ static void tb_tunnel_dp(struct tb *tb)
+ tb_dbg(tb, "available bandwidth for new DP tunnel %u/%u Mb/s\n",
+ available_up, available_down);
+
+- tunnel = tb_tunnel_alloc_dp(tb, in, out, available_up, available_down);
++ tunnel = tb_tunnel_alloc_dp(tb, in, out, link_nr, available_up,
++ available_down);
+ if (!tunnel) {
+ tb_port_dbg(out, "could not allocate DP tunnel\n");
+ goto err_reclaim;
+@@ -1369,7 +1421,9 @@ static int tb_start(struct tb *tb)
+ /* Full scan to discover devices added before the driver was loaded. */
+ tb_scan_switch(tb->root_switch);
+ /* Find out tunnels created by the boot firmware */
+- tb_discover_tunnels(tb->root_switch);
++ tb_discover_tunnels(tb);
++ /* Add DP resources from the DP tunnels created by the boot firmware */
++ tb_discover_dp_resources(tb);
+ /*
+ * If the boot firmware did not create USB 3.x tunnels create them
+ * now for the whole topology.
+@@ -1429,6 +1483,8 @@ static int tb_resume_noirq(struct tb *tb)
+ {
+ struct tb_cm *tcm = tb_priv(tb);
+ struct tb_tunnel *tunnel, *n;
++ unsigned int usb3_delay = 0;
++ LIST_HEAD(tunnels);
+
+ tb_dbg(tb, "resuming...\n");
+
+@@ -1439,8 +1495,31 @@ static int tb_resume_noirq(struct tb *tb)
+ tb_free_invalid_tunnels(tb);
+ tb_free_unplugged_children(tb->root_switch);
+ tb_restore_children(tb->root_switch);
+- list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list)
++
++ /*
++ * If we get here from suspend to disk the boot firmware or the
++ * restore kernel might have created tunnels of its own. Since
++ * we cannot be sure they are usable for us we find and tear
++ * them down.
++ */
++ tb_switch_discover_tunnels(tb->root_switch, &tunnels, false);
++ list_for_each_entry_safe_reverse(tunnel, n, &tunnels, list) {
++ if (tb_tunnel_is_usb3(tunnel))
++ usb3_delay = 500;
++ tb_tunnel_deactivate(tunnel);
++ tb_tunnel_free(tunnel);
++ }
++
++ /* Re-create our tunnels now */
++ list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list) {
++ /* USB3 requires delay before it can be re-activated */
++ if (tb_tunnel_is_usb3(tunnel)) {
++ msleep(usb3_delay);
++ /* Only need to do it once */
++ usb3_delay = 0;
++ }
+ tb_tunnel_restart(tunnel);
++ }
+ if (!list_empty(&tcm->tunnel_list)) {
+ /*
+ * the pcie links need some time to get going.
+diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
+index 725104c83e3d5..db0d3d37772fb 100644
+--- a/drivers/thunderbolt/tb.h
++++ b/drivers/thunderbolt/tb.h
+@@ -354,6 +354,7 @@ enum tb_path_port {
+ * when deactivating this path
+ * @hops: Path hops
+ * @path_length: How many hops the path uses
++ * @alloc_hopid: Does this path consume port HopID
+ *
+ * A path consists of a number of hops (see &struct tb_path_hop). To
+ * establish a PCIe tunnel two paths have to be created between the two
+@@ -374,6 +375,7 @@ struct tb_path {
+ bool clear_fc;
+ struct tb_path_hop *hops;
+ int path_length;
++ bool alloc_hopid;
+ };
+
+ /* HopIDs 0-7 are reserved by the Thunderbolt protocol */
+@@ -957,7 +959,8 @@ int tb_dp_port_enable(struct tb_port *port, bool enable);
+
+ struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid,
+ struct tb_port *dst, int dst_hopid,
+- struct tb_port **last, const char *name);
++ struct tb_port **last, const char *name,
++ bool alloc_hopid);
+ struct tb_path *tb_path_alloc(struct tb *tb, struct tb_port *src, int src_hopid,
+ struct tb_port *dst, int dst_hopid, int link_nr,
+ const char *name);
+@@ -1067,6 +1070,7 @@ int usb4_switch_add_ports(struct tb_switch *sw);
+ void usb4_switch_remove_ports(struct tb_switch *sw);
+
+ int usb4_port_unlock(struct tb_port *port);
++int usb4_port_hotplug_enable(struct tb_port *port);
+ int usb4_port_configure(struct tb_port *port);
+ void usb4_port_unconfigure(struct tb_port *port);
+ int usb4_port_configure_xdomain(struct tb_port *port);
+@@ -1076,6 +1080,7 @@ int usb4_port_router_online(struct tb_port *port);
+ int usb4_port_enumerate_retimers(struct tb_port *port);
+
+ int usb4_port_retimer_set_inbound_sbtx(struct tb_port *port, u8 index);
++int usb4_port_retimer_unset_inbound_sbtx(struct tb_port *port, u8 index);
+ int usb4_port_retimer_read(struct tb_port *port, u8 index, u8 reg, void *buf,
+ u8 size);
+ int usb4_port_retimer_write(struct tb_port *port, u8 index, u8 reg,
+diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h
+index 484f25be28490..67f21e6c18135 100644
+--- a/drivers/thunderbolt/tb_regs.h
++++ b/drivers/thunderbolt/tb_regs.h
+@@ -301,6 +301,7 @@ struct tb_regs_port_header {
+ #define ADP_CS_5 0x05
+ #define ADP_CS_5_LCA_MASK GENMASK(28, 22)
+ #define ADP_CS_5_LCA_SHIFT 22
++#define ADP_CS_5_DHP BIT(31)
+
+ /* TMU adapter registers */
+ #define TMU_ADP_CS_3 0x03
+diff --git a/drivers/thunderbolt/test.c b/drivers/thunderbolt/test.c
+index 1f69bab236ee9..66b6e665e96f0 100644
+--- a/drivers/thunderbolt/test.c
++++ b/drivers/thunderbolt/test.c
+@@ -1348,7 +1348,7 @@ static void tb_test_tunnel_dp(struct kunit *test)
+ in = &host->ports[5];
+ out = &dev->ports[13];
+
+- tunnel = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ tunnel = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, tunnel != NULL);
+ KUNIT_EXPECT_EQ(test, tunnel->type, TB_TUNNEL_DP);
+ KUNIT_EXPECT_PTR_EQ(test, tunnel->src_port, in);
+@@ -1394,7 +1394,7 @@ static void tb_test_tunnel_dp_chain(struct kunit *test)
+ in = &host->ports[5];
+ out = &dev4->ports[14];
+
+- tunnel = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ tunnel = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, tunnel != NULL);
+ KUNIT_EXPECT_EQ(test, tunnel->type, TB_TUNNEL_DP);
+ KUNIT_EXPECT_PTR_EQ(test, tunnel->src_port, in);
+@@ -1444,7 +1444,7 @@ static void tb_test_tunnel_dp_tree(struct kunit *test)
+ in = &dev2->ports[13];
+ out = &dev5->ports[13];
+
+- tunnel = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ tunnel = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, tunnel != NULL);
+ KUNIT_EXPECT_EQ(test, tunnel->type, TB_TUNNEL_DP);
+ KUNIT_EXPECT_PTR_EQ(test, tunnel->src_port, in);
+@@ -1509,7 +1509,7 @@ static void tb_test_tunnel_dp_max_length(struct kunit *test)
+ in = &dev6->ports[13];
+ out = &dev12->ports[13];
+
+- tunnel = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ tunnel = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, tunnel != NULL);
+ KUNIT_EXPECT_EQ(test, tunnel->type, TB_TUNNEL_DP);
+ KUNIT_EXPECT_PTR_EQ(test, tunnel->src_port, in);
+@@ -1627,7 +1627,7 @@ static void tb_test_tunnel_port_on_path(struct kunit *test)
+ in = &dev2->ports[13];
+ out = &dev5->ports[13];
+
+- dp_tunnel = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ dp_tunnel = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, dp_tunnel != NULL);
+
+ KUNIT_EXPECT_TRUE(test, tb_tunnel_port_on_path(dp_tunnel, in));
+@@ -2009,7 +2009,7 @@ static void tb_test_credit_alloc_dp(struct kunit *test)
+ in = &host->ports[5];
+ out = &dev->ports[14];
+
+- tunnel = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ tunnel = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, tunnel != NULL);
+ KUNIT_ASSERT_EQ(test, tunnel->npaths, (size_t)3);
+
+@@ -2245,7 +2245,7 @@ static struct tb_tunnel *TB_TEST_DP_TUNNEL1(struct kunit *test,
+
+ in = &host->ports[5];
+ out = &dev->ports[13];
+- dp_tunnel1 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ dp_tunnel1 = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, dp_tunnel1 != NULL);
+ KUNIT_ASSERT_EQ(test, dp_tunnel1->npaths, (size_t)3);
+
+@@ -2282,7 +2282,7 @@ static struct tb_tunnel *TB_TEST_DP_TUNNEL2(struct kunit *test,
+
+ in = &host->ports[6];
+ out = &dev->ports[14];
+- dp_tunnel2 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
++ dp_tunnel2 = tb_tunnel_alloc_dp(NULL, in, out, 1, 0, 0);
+ KUNIT_ASSERT_TRUE(test, dp_tunnel2 != NULL);
+ KUNIT_ASSERT_EQ(test, dp_tunnel2->npaths, (size_t)3);
+
+diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
+index bb5cc480fc9a3..42cc4ef02e86e 100644
+--- a/drivers/thunderbolt/tunnel.c
++++ b/drivers/thunderbolt/tunnel.c
+@@ -207,12 +207,14 @@ static int tb_pci_init_path(struct tb_path *path)
+ * tb_tunnel_discover_pci() - Discover existing PCIe tunnels
+ * @tb: Pointer to the domain structure
+ * @down: PCIe downstream adapter
++ * @alloc_hopid: Allocate HopIDs from visited ports
+ *
+ * If @down adapter is active, follows the tunnel to the PCIe upstream
+ * adapter and back. Returns the discovered tunnel or %NULL if there was
+ * no tunnel.
+ */
+-struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down)
++struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down,
++ bool alloc_hopid)
+ {
+ struct tb_tunnel *tunnel;
+ struct tb_path *path;
+@@ -233,7 +235,7 @@ struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down)
+ * case.
+ */
+ path = tb_path_discover(down, TB_PCI_HOPID, NULL, -1,
+- &tunnel->dst_port, "PCIe Up");
++ &tunnel->dst_port, "PCIe Up", alloc_hopid);
+ if (!path) {
+ /* Just disable the downstream port */
+ tb_pci_port_enable(down, false);
+@@ -244,7 +246,7 @@ struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down)
+ goto err_free;
+
+ path = tb_path_discover(tunnel->dst_port, -1, down, TB_PCI_HOPID, NULL,
+- "PCIe Down");
++ "PCIe Down", alloc_hopid);
+ if (!path)
+ goto err_deactivate;
+ tunnel->paths[TB_PCI_PATH_DOWN] = path;
+@@ -761,6 +763,7 @@ static int tb_dp_init_video_path(struct tb_path *path)
+ * tb_tunnel_discover_dp() - Discover existing Display Port tunnels
+ * @tb: Pointer to the domain structure
+ * @in: DP in adapter
++ * @alloc_hopid: Allocate HopIDs from visited ports
+ *
+ * If @in adapter is active, follows the tunnel to the DP out adapter
+ * and back. Returns the discovered tunnel or %NULL if there was no
+@@ -768,7 +771,8 @@ static int tb_dp_init_video_path(struct tb_path *path)
+ *
+ * Return: DP tunnel or %NULL if no tunnel found.
+ */
+-struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in)
++struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in,
++ bool alloc_hopid)
+ {
+ struct tb_tunnel *tunnel;
+ struct tb_port *port;
+@@ -787,7 +791,7 @@ struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in)
+ tunnel->src_port = in;
+
+ path = tb_path_discover(in, TB_DP_VIDEO_HOPID, NULL, -1,
+- &tunnel->dst_port, "Video");
++ &tunnel->dst_port, "Video", alloc_hopid);
+ if (!path) {
+ /* Just disable the DP IN port */
+ tb_dp_port_enable(in, false);
+@@ -797,14 +801,15 @@ struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in)
+ if (tb_dp_init_video_path(tunnel->paths[TB_DP_VIDEO_PATH_OUT]))
+ goto err_free;
+
+- path = tb_path_discover(in, TB_DP_AUX_TX_HOPID, NULL, -1, NULL, "AUX TX");
++ path = tb_path_discover(in, TB_DP_AUX_TX_HOPID, NULL, -1, NULL, "AUX TX",
++ alloc_hopid);
+ if (!path)
+ goto err_deactivate;
+ tunnel->paths[TB_DP_AUX_PATH_OUT] = path;
+ tb_dp_init_aux_path(tunnel->paths[TB_DP_AUX_PATH_OUT]);
+
+ path = tb_path_discover(tunnel->dst_port, -1, in, TB_DP_AUX_RX_HOPID,
+- &port, "AUX RX");
++ &port, "AUX RX", alloc_hopid);
+ if (!path)
+ goto err_deactivate;
+ tunnel->paths[TB_DP_AUX_PATH_IN] = path;
+@@ -843,6 +848,7 @@ err_free:
+ * @tb: Pointer to the domain structure
+ * @in: DP in adapter port
+ * @out: DP out adapter port
++ * @link_nr: Preferred lane adapter when the link is not bonded
+ * @max_up: Maximum available upstream bandwidth for the DP tunnel (%0
+ * if not limited)
+ * @max_down: Maximum available downstream bandwidth for the DP tunnel
+@@ -854,8 +860,8 @@ err_free:
+ * Return: Returns a tb_tunnel on success or NULL on failure.
+ */
+ struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in,
+- struct tb_port *out, int max_up,
+- int max_down)
++ struct tb_port *out, int link_nr,
++ int max_up, int max_down)
+ {
+ struct tb_tunnel *tunnel;
+ struct tb_path **paths;
+@@ -879,21 +885,21 @@ struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in,
+ paths = tunnel->paths;
+
+ path = tb_path_alloc(tb, in, TB_DP_VIDEO_HOPID, out, TB_DP_VIDEO_HOPID,
+- 1, "Video");
++ link_nr, "Video");
+ if (!path)
+ goto err_free;
+ tb_dp_init_video_path(path);
+ paths[TB_DP_VIDEO_PATH_OUT] = path;
+
+ path = tb_path_alloc(tb, in, TB_DP_AUX_TX_HOPID, out,
+- TB_DP_AUX_TX_HOPID, 1, "AUX TX");
++ TB_DP_AUX_TX_HOPID, link_nr, "AUX TX");
+ if (!path)
+ goto err_free;
+ tb_dp_init_aux_path(path);
+ paths[TB_DP_AUX_PATH_OUT] = path;
+
+ path = tb_path_alloc(tb, out, TB_DP_AUX_RX_HOPID, in,
+- TB_DP_AUX_RX_HOPID, 1, "AUX RX");
++ TB_DP_AUX_RX_HOPID, link_nr, "AUX RX");
+ if (!path)
+ goto err_free;
+ tb_dp_init_aux_path(path);
+@@ -1256,7 +1262,7 @@ static void tb_usb3_reclaim_available_bandwidth(struct tb_tunnel *tunnel,
+ return;
+ } else if (!ret) {
+ /* Use maximum link rate if the link valid is not set */
+- ret = usb4_usb3_port_max_link_rate(tunnel->src_port);
++ ret = tb_usb3_max_link_rate(tunnel->dst_port, tunnel->src_port);
+ if (ret < 0) {
+ tb_tunnel_warn(tunnel, "failed to read maximum link rate\n");
+ return;
+@@ -1343,12 +1349,14 @@ static void tb_usb3_init_path(struct tb_path *path)
+ * tb_tunnel_discover_usb3() - Discover existing USB3 tunnels
+ * @tb: Pointer to the domain structure
+ * @down: USB3 downstream adapter
++ * @alloc_hopid: Allocate HopIDs from visited ports
+ *
+ * If @down adapter is active, follows the tunnel to the USB3 upstream
+ * adapter and back. Returns the discovered tunnel or %NULL if there was
+ * no tunnel.
+ */
+-struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down)
++struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down,
++ bool alloc_hopid)
+ {
+ struct tb_tunnel *tunnel;
+ struct tb_path *path;
+@@ -1369,7 +1377,7 @@ struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down)
+ * case.
+ */
+ path = tb_path_discover(down, TB_USB3_HOPID, NULL, -1,
+- &tunnel->dst_port, "USB3 Down");
++ &tunnel->dst_port, "USB3 Down", alloc_hopid);
+ if (!path) {
+ /* Just disable the downstream port */
+ tb_usb3_port_enable(down, false);
+@@ -1379,7 +1387,7 @@ struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down)
+ tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_DOWN]);
+
+ path = tb_path_discover(tunnel->dst_port, -1, down, TB_USB3_HOPID, NULL,
+- "USB3 Up");
++ "USB3 Up", alloc_hopid);
+ if (!path)
+ goto err_deactivate;
+ tunnel->paths[TB_USB3_PATH_UP] = path;
+diff --git a/drivers/thunderbolt/tunnel.h b/drivers/thunderbolt/tunnel.h
+index eea14e24f7e0a..bb4d1f1d6d0b0 100644
+--- a/drivers/thunderbolt/tunnel.h
++++ b/drivers/thunderbolt/tunnel.h
+@@ -64,20 +64,23 @@ struct tb_tunnel {
+ int allocated_down;
+ };
+
+-struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down);
++struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down,
++ bool alloc_hopid);
+ struct tb_tunnel *tb_tunnel_alloc_pci(struct tb *tb, struct tb_port *up,
+ struct tb_port *down);
+-struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in);
++struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in,
++ bool alloc_hopid);
+ struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in,
+- struct tb_port *out, int max_up,
+- int max_down);
++ struct tb_port *out, int link_nr,
++ int max_up, int max_down);
+ struct tb_tunnel *tb_tunnel_alloc_dma(struct tb *tb, struct tb_port *nhi,
+ struct tb_port *dst, int transmit_path,
+ int transmit_ring, int receive_path,
+ int receive_ring);
+ bool tb_tunnel_match_dma(const struct tb_tunnel *tunnel, int transmit_path,
+ int transmit_ring, int receive_path, int receive_ring);
+-struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down);
++struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down,
++ bool alloc_hopid);
+ struct tb_tunnel *tb_tunnel_alloc_usb3(struct tb *tb, struct tb_port *up,
+ struct tb_port *down, int max_up,
+ int max_down);
+diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
+index ceddbe7e9f93f..36547afa18966 100644
+--- a/drivers/thunderbolt/usb4.c
++++ b/drivers/thunderbolt/usb4.c
+@@ -1068,6 +1068,26 @@ int usb4_port_unlock(struct tb_port *port)
+ return tb_port_write(port, &val, TB_CFG_PORT, ADP_CS_4, 1);
+ }
+
++/**
++ * usb4_port_hotplug_enable() - Enables hotplug for a port
++ * @port: USB4 port to operate on
++ *
++ * Enables hot plug events on a given port. This is only intended
++ * to be used on lane, DP-IN, and DP-OUT adapters.
++ */
++int usb4_port_hotplug_enable(struct tb_port *port)
++{
++ int ret;
++ u32 val;
++
++ ret = tb_port_read(port, &val, TB_CFG_PORT, ADP_CS_5, 1);
++ if (ret)
++ return ret;
++
++ val &= ~ADP_CS_5_DHP;
++ return tb_port_write(port, &val, TB_CFG_PORT, ADP_CS_5, 1);
++}
++
+ static int usb4_port_set_configured(struct tb_port *port, bool configured)
+ {
+ int ret;
+@@ -1421,6 +1441,20 @@ int usb4_port_retimer_set_inbound_sbtx(struct tb_port *port, u8 index)
+ 500);
+ }
+
++/**
++ * usb4_port_retimer_unset_inbound_sbtx() - Disable sideband channel transactions
++ * @port: USB4 port
++ * @index: Retimer index
++ *
++ * Disables sideband channel transations on SBTX. The reverse of
++ * usb4_port_retimer_set_inbound_sbtx().
++ */
++int usb4_port_retimer_unset_inbound_sbtx(struct tb_port *port, u8 index)
++{
++ return usb4_port_retimer_op(port, index,
++ USB4_SB_OPCODE_UNSET_INBOUND_SBTX, 500);
++}
++
+ /**
+ * usb4_port_retimer_read() - Read from retimer sideband registers
+ * @port: USB4 port
+@@ -1910,18 +1944,30 @@ static int usb4_usb3_port_write_allocated_bandwidth(struct tb_port *port,
+ int downstream_bw)
+ {
+ u32 val, ubw, dbw, scale;
+- int ret;
++ int ret, max_bw;
+
+- /* Read the used scale, hardware default is 0 */
+- ret = tb_port_read(port, &scale, TB_CFG_PORT,
+- port->cap_adap + ADP_USB3_CS_3, 1);
++ /* Figure out suitable scale */
++ scale = 0;
++ max_bw = max(upstream_bw, downstream_bw);
++ while (scale < 64) {
++ if (mbps_to_usb3_bw(max_bw, scale) < 4096)
++ break;
++ scale++;
++ }
++
++ if (WARN_ON(scale >= 64))
++ return -EINVAL;
++
++ ret = tb_port_write(port, &scale, TB_CFG_PORT,
++ port->cap_adap + ADP_USB3_CS_3, 1);
+ if (ret)
+ return ret;
+
+- scale &= ADP_USB3_CS_3_SCALE_MASK;
+ ubw = mbps_to_usb3_bw(upstream_bw, scale);
+ dbw = mbps_to_usb3_bw(downstream_bw, scale);
+
++ tb_port_dbg(port, "scaled bandwidth %u/%u, scale %u\n", ubw, dbw, scale);
++
+ ret = tb_port_read(port, &val, TB_CFG_PORT,
+ port->cap_adap + ADP_USB3_CS_2, 1);
+ if (ret)
+diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c
+index d24af649a8bb5..7ca3cd8eb5742 100644
+--- a/drivers/tty/goldfish.c
++++ b/drivers/tty/goldfish.c
+@@ -61,13 +61,13 @@ static void do_rw_io(struct goldfish_tty *qtty,
+ spin_lock_irqsave(&qtty->lock, irq_flags);
+ gf_write_ptr((void *)address, base + GOLDFISH_TTY_REG_DATA_PTR,
+ base + GOLDFISH_TTY_REG_DATA_PTR_HIGH);
+- __raw_writel(count, base + GOLDFISH_TTY_REG_DATA_LEN);
++ gf_iowrite32(count, base + GOLDFISH_TTY_REG_DATA_LEN);
+
+ if (is_write)
+- __raw_writel(GOLDFISH_TTY_CMD_WRITE_BUFFER,
++ gf_iowrite32(GOLDFISH_TTY_CMD_WRITE_BUFFER,
+ base + GOLDFISH_TTY_REG_CMD);
+ else
+- __raw_writel(GOLDFISH_TTY_CMD_READ_BUFFER,
++ gf_iowrite32(GOLDFISH_TTY_CMD_READ_BUFFER,
+ base + GOLDFISH_TTY_REG_CMD);
+
+ spin_unlock_irqrestore(&qtty->lock, irq_flags);
+@@ -142,7 +142,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id)
+ unsigned char *buf;
+ u32 count;
+
+- count = __raw_readl(base + GOLDFISH_TTY_REG_BYTES_READY);
++ count = gf_ioread32(base + GOLDFISH_TTY_REG_BYTES_READY);
+ if (count == 0)
+ return IRQ_NONE;
+
+@@ -151,7 +151,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id)
+ address = (unsigned long)(void *)buf;
+ goldfish_tty_rw(qtty, address, count, 0);
+
+- tty_schedule_flip(&qtty->port);
++ tty_flip_buffer_push(&qtty->port);
+ return IRQ_HANDLED;
+ }
+
+@@ -159,7 +159,7 @@ static int goldfish_tty_activate(struct tty_port *port, struct tty_struct *tty)
+ {
+ struct goldfish_tty *qtty = container_of(port, struct goldfish_tty,
+ port);
+- __raw_writel(GOLDFISH_TTY_CMD_INT_ENABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
++ gf_iowrite32(GOLDFISH_TTY_CMD_INT_ENABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
+ return 0;
+ }
+
+@@ -167,7 +167,7 @@ static void goldfish_tty_shutdown(struct tty_port *port)
+ {
+ struct goldfish_tty *qtty = container_of(port, struct goldfish_tty,
+ port);
+- __raw_writel(GOLDFISH_TTY_CMD_INT_DISABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
++ gf_iowrite32(GOLDFISH_TTY_CMD_INT_DISABLE, qtty->base + GOLDFISH_TTY_REG_CMD);
+ }
+
+ static int goldfish_tty_open(struct tty_struct *tty, struct file *filp)
+@@ -202,7 +202,7 @@ static unsigned int goldfish_tty_chars_in_buffer(struct tty_struct *tty)
+ {
+ struct goldfish_tty *qtty = &goldfish_ttys[tty->index];
+ void __iomem *base = qtty->base;
+- return __raw_readl(base + GOLDFISH_TTY_REG_BYTES_READY);
++ return gf_ioread32(base + GOLDFISH_TTY_REG_BYTES_READY);
+ }
+
+ static void goldfish_tty_console_write(struct console *co, const char *b,
+@@ -357,7 +357,7 @@ static int goldfish_tty_probe(struct platform_device *pdev)
+ * on Ranchu emulator (qemu2) returns 1 here and
+ * driver will use physical addresses.
+ */
+- qtty->version = __raw_readl(base + GOLDFISH_TTY_REG_VERSION);
++ qtty->version = gf_ioread32(base + GOLDFISH_TTY_REG_VERSION);
+
+ /*
+ * Goldfish TTY device on Ranchu emulator (qemu2)
+@@ -376,7 +376,7 @@ static int goldfish_tty_probe(struct platform_device *pdev)
+ }
+ }
+
+- __raw_writel(GOLDFISH_TTY_CMD_INT_DISABLE, base + GOLDFISH_TTY_REG_CMD);
++ gf_iowrite32(GOLDFISH_TTY_CMD_INT_DISABLE, base + GOLDFISH_TTY_REG_CMD);
+
+ ret = request_irq(irq, goldfish_tty_interrupt, IRQF_SHARED,
+ "goldfish_tty", qtty);
+@@ -407,6 +407,7 @@ static int goldfish_tty_probe(struct platform_device *pdev)
+ err_tty_register_device_failed:
+ free_irq(irq, qtty);
+ err_dec_line_count:
++ tty_port_destroy(&qtty->port);
+ goldfish_tty_current_line_count--;
+ if (goldfish_tty_current_line_count == 0)
+ goldfish_tty_delete_driver();
+@@ -427,7 +428,8 @@ static int goldfish_tty_remove(struct platform_device *pdev)
+ tty_unregister_device(goldfish_tty_driver, qtty->console.index);
+ iounmap(qtty->base);
+ qtty->base = NULL;
+- free_irq(qtty->irq, pdev);
++ free_irq(qtty->irq, qtty);
++ tty_port_destroy(&qtty->port);
+ goldfish_tty_current_line_count--;
+ if (goldfish_tty_current_line_count == 0)
+ goldfish_tty_delete_driver();
+@@ -438,7 +440,7 @@ static int goldfish_tty_remove(struct platform_device *pdev)
+ #ifdef CONFIG_GOLDFISH_TTY_EARLY_CONSOLE
+ static void gf_early_console_putchar(struct uart_port *port, int ch)
+ {
+- __raw_writel(ch, port->membase);
++ gf_iowrite32(ch, port->membase);
+ }
+
+ static void gf_early_write(struct console *con, const char *s, unsigned int n)
+diff --git a/drivers/tty/hvc/hvc_iucv.c b/drivers/tty/hvc/hvc_iucv.c
+index 82a76cac94deb..32366caca6623 100644
+--- a/drivers/tty/hvc/hvc_iucv.c
++++ b/drivers/tty/hvc/hvc_iucv.c
+@@ -1417,7 +1417,9 @@ out_error:
+ */
+ static int __init hvc_iucv_config(char *val)
+ {
+- return kstrtoul(val, 10, &hvc_iucv_devices);
++ if (kstrtoul(val, 10, &hvc_iucv_devices))
++ pr_warn("hvc_iucv= invalid parameter value '%s'\n", val);
++ return 1;
+ }
+
+
+diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
+index f0bf01ea069ae..f2f066ce8d9ef 100644
+--- a/drivers/tty/hvc/hvc_xen.c
++++ b/drivers/tty/hvc/hvc_xen.c
+@@ -37,10 +37,13 @@ struct xencons_info {
+ struct xenbus_device *xbdev;
+ struct xencons_interface *intf;
+ unsigned int evtchn;
++ XENCONS_RING_IDX out_cons;
++ unsigned int out_cons_same;
+ struct hvc_struct *hvc;
+ int irq;
+ int vtermno;
+ grant_ref_t gntref;
++ spinlock_t ring_lock;
+ };
+
+ static LIST_HEAD(xenconsoles);
+@@ -50,17 +53,22 @@ static DEFINE_SPINLOCK(xencons_lock);
+
+ static struct xencons_info *vtermno_to_xencons(int vtermno)
+ {
+- struct xencons_info *entry, *n, *ret = NULL;
++ struct xencons_info *entry, *ret = NULL;
++ unsigned long flags;
+
+- if (list_empty(&xenconsoles))
+- return NULL;
++ spin_lock_irqsave(&xencons_lock, flags);
++ if (list_empty(&xenconsoles)) {
++ spin_unlock_irqrestore(&xencons_lock, flags);
++ return NULL;
++ }
+
+- list_for_each_entry_safe(entry, n, &xenconsoles, list) {
++ list_for_each_entry(entry, &xenconsoles, list) {
+ if (entry->vtermno == vtermno) {
+ ret = entry;
+ break;
+ }
+ }
++ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return ret;
+ }
+@@ -82,12 +90,15 @@ static int __write_console(struct xencons_info *xencons,
+ XENCONS_RING_IDX cons, prod;
+ struct xencons_interface *intf = xencons->intf;
+ int sent = 0;
++ unsigned long flags;
+
++ spin_lock_irqsave(&xencons->ring_lock, flags);
+ cons = intf->out_cons;
+ prod = intf->out_prod;
+ mb(); /* update queue values before going on */
+
+ if ((prod - cons) > sizeof(intf->out)) {
++ spin_unlock_irqrestore(&xencons->ring_lock, flags);
+ pr_err_once("xencons: Illegal ring page indices");
+ return -EINVAL;
+ }
+@@ -97,6 +108,7 @@ static int __write_console(struct xencons_info *xencons,
+
+ wmb(); /* write ring before updating pointer */
+ intf->out_prod = prod;
++ spin_unlock_irqrestore(&xencons->ring_lock, flags);
+
+ if (sent)
+ notify_daemon(xencons);
+@@ -138,15 +150,20 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
+ XENCONS_RING_IDX cons, prod;
+ int recv = 0;
+ struct xencons_info *xencons = vtermno_to_xencons(vtermno);
++ unsigned int eoiflag = 0;
++ unsigned long flags;
++
+ if (xencons == NULL)
+ return -EINVAL;
+ intf = xencons->intf;
+
++ spin_lock_irqsave(&xencons->ring_lock, flags);
+ cons = intf->in_cons;
+ prod = intf->in_prod;
+ mb(); /* get pointers before reading ring */
+
+ if ((prod - cons) > sizeof(intf->in)) {
++ spin_unlock_irqrestore(&xencons->ring_lock, flags);
+ pr_err_once("xencons: Illegal ring page indices");
+ return -EINVAL;
+ }
+@@ -157,7 +174,30 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
+ mb(); /* read ring before consuming */
+ intf->in_cons = cons;
+
+- notify_daemon(xencons);
++ /*
++ * When to mark interrupt having been spurious:
++ * - there was no new data to be read, and
++ * - the backend did not consume some output bytes, and
++ * - the previous round with no read data didn't see consumed bytes
++ * (we might have a race with an interrupt being in flight while
++ * updating xencons->out_cons, so account for that by allowing one
++ * round without any visible reason)
++ */
++ if (intf->out_cons != xencons->out_cons) {
++ xencons->out_cons = intf->out_cons;
++ xencons->out_cons_same = 0;
++ }
++ if (!recv && xencons->out_cons_same++ > 1) {
++ eoiflag = XEN_EOI_FLAG_SPURIOUS;
++ }
++ spin_unlock_irqrestore(&xencons->ring_lock, flags);
++
++ if (recv) {
++ notify_daemon(xencons);
++ }
++
++ xen_irq_lateeoi(xencons->irq, eoiflag);
++
+ return recv;
+ }
+
+@@ -199,7 +239,7 @@ static int xen_hvm_console_init(void)
+ {
+ int r;
+ uint64_t v = 0;
+- unsigned long gfn;
++ unsigned long gfn, flags;
+ struct xencons_info *info;
+
+ if (!xen_hvm_domain())
+@@ -210,6 +250,7 @@ static int xen_hvm_console_init(void)
+ info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
++ spin_lock_init(&info->ring_lock);
+ } else if (info->intf != NULL) {
+ /* already configured */
+ return 0;
+@@ -234,9 +275,9 @@ static int xen_hvm_console_init(void)
+ goto err;
+ info->vtermno = HVC_COOKIE;
+
+- spin_lock(&xencons_lock);
++ spin_lock_irqsave(&xencons_lock, flags);
+ list_add_tail(&info->list, &xenconsoles);
+- spin_unlock(&xencons_lock);
++ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return 0;
+ err:
+@@ -246,6 +287,7 @@ err:
+
+ static int xencons_info_pv_init(struct xencons_info *info, int vtermno)
+ {
++ spin_lock_init(&info->ring_lock);
+ info->evtchn = xen_start_info->console.domU.evtchn;
+ /* GFN == MFN for PV guest */
+ info->intf = gfn_to_virt(xen_start_info->console.domU.mfn);
+@@ -259,6 +301,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno)
+ static int xen_pv_console_init(void)
+ {
+ struct xencons_info *info;
++ unsigned long flags;
+
+ if (!xen_pv_domain())
+ return -ENODEV;
+@@ -275,9 +318,9 @@ static int xen_pv_console_init(void)
+ /* already configured */
+ return 0;
+ }
+- spin_lock(&xencons_lock);
++ spin_lock_irqsave(&xencons_lock, flags);
+ xencons_info_pv_init(info, HVC_COOKIE);
+- spin_unlock(&xencons_lock);
++ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return 0;
+ }
+@@ -285,6 +328,7 @@ static int xen_pv_console_init(void)
+ static int xen_initial_domain_console_init(void)
+ {
+ struct xencons_info *info;
++ unsigned long flags;
+
+ if (!xen_initial_domain())
+ return -ENODEV;
+@@ -294,14 +338,15 @@ static int xen_initial_domain_console_init(void)
+ info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
++ spin_lock_init(&info->ring_lock);
+ }
+
+ info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false);
+ info->vtermno = HVC_COOKIE;
+
+- spin_lock(&xencons_lock);
++ spin_lock_irqsave(&xencons_lock, flags);
+ list_add_tail(&info->list, &xenconsoles);
+- spin_unlock(&xencons_lock);
++ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return 0;
+ }
+@@ -356,10 +401,12 @@ static void xencons_free(struct xencons_info *info)
+
+ static int xen_console_remove(struct xencons_info *info)
+ {
++ unsigned long flags;
++
+ xencons_disconnect_backend(info);
+- spin_lock(&xencons_lock);
++ spin_lock_irqsave(&xencons_lock, flags);
+ list_del(&info->list);
+- spin_unlock(&xencons_lock);
++ spin_unlock_irqrestore(&xencons_lock, flags);
+ if (info->xbdev != NULL)
+ xencons_free(info);
+ else {
+@@ -386,7 +433,7 @@ static int xencons_connect_backend(struct xenbus_device *dev,
+ if (ret)
+ return ret;
+ info->evtchn = evtchn;
+- irq = bind_evtchn_to_irq(evtchn);
++ irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn);
+ if (irq < 0)
+ return irq;
+ info->irq = irq;
+@@ -440,6 +487,7 @@ static int xencons_probe(struct xenbus_device *dev,
+ {
+ int ret, devid;
+ struct xencons_info *info;
++ unsigned long flags;
+
+ devid = dev->nodename[strlen(dev->nodename) - 1] - '0';
+ if (devid == 0)
+@@ -448,6 +496,7 @@ static int xencons_probe(struct xenbus_device *dev,
+ info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
++ spin_lock_init(&info->ring_lock);
+ dev_set_drvdata(&dev->dev, info);
+ info->xbdev = dev;
+ info->vtermno = xenbus_devid_to_vtermno(devid);
+@@ -458,9 +507,9 @@ static int xencons_probe(struct xenbus_device *dev,
+ ret = xencons_connect_backend(dev, info);
+ if (ret < 0)
+ goto error;
+- spin_lock(&xencons_lock);
++ spin_lock_irqsave(&xencons_lock, flags);
+ list_add_tail(&info->list, &xenconsoles);
+- spin_unlock(&xencons_lock);
++ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return 0;
+
+@@ -550,7 +599,7 @@ static int __init xen_hvc_init(void)
+ return r;
+
+ info = vtermno_to_xencons(HVC_COOKIE);
+- info->irq = bind_evtchn_to_irq(info->evtchn);
++ info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn);
+ }
+ if (info->irq < 0)
+ info->irq = 0; /* NO_IRQ */
+@@ -559,10 +608,12 @@ static int __init xen_hvc_init(void)
+
+ info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256);
+ if (IS_ERR(info->hvc)) {
++ unsigned long flags;
++
+ r = PTR_ERR(info->hvc);
+- spin_lock(&xencons_lock);
++ spin_lock_irqsave(&xencons_lock, flags);
+ list_del(&info->list);
+- spin_unlock(&xencons_lock);
++ spin_unlock_irqrestore(&xencons_lock, flags);
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, NULL);
+ kfree(info);
+diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c
+index bf17e90858b8c..a29ec5a938396 100644
+--- a/drivers/tty/moxa.c
++++ b/drivers/tty/moxa.c
+@@ -1383,7 +1383,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle,
+ if (inited && !tty_throttled(tty) &&
+ MoxaPortRxQueue(p) > 0) { /* RX */
+ MoxaPortReadData(p);
+- tty_schedule_flip(&p->port);
++ tty_flip_buffer_push(&p->port);
+ }
+ } else {
+ clear_bit(EMPTYWAIT, &p->statusflags);
+@@ -1408,7 +1408,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle,
+
+ if (tty && (intr & IntrBreak) && !I_IGNBRK(tty)) { /* BREAK */
+ tty_insert_flip_char(&p->port, 0, TTY_BREAK);
+- tty_schedule_flip(&p->port);
++ tty_flip_buffer_push(&p->port);
+ }
+
+ if (intr & IntrLine)
+diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
+index 1216f3985e18e..3b3e169c1f699 100644
+--- a/drivers/tty/mxser.c
++++ b/drivers/tty/mxser.c
+@@ -261,7 +261,6 @@ struct mxser_port {
+ unsigned int xmit_head;
+ unsigned int xmit_tail;
+ unsigned int xmit_cnt;
+- int closing;
+
+ spinlock_t slock;
+ };
+@@ -712,6 +711,7 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty)
+ struct mxser_port *info = container_of(port, struct mxser_port, port);
+ unsigned long page;
+ unsigned long flags;
++ int ret;
+
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+@@ -721,9 +721,9 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty)
+
+ if (!info->type) {
+ set_bit(TTY_IO_ERROR, &tty->flags);
+- free_page(page);
+ spin_unlock_irqrestore(&info->slock, flags);
+- return 0;
++ ret = 0;
++ goto err_free_xmit;
+ }
+ info->port.xmit_buf = (unsigned char *) page;
+
+@@ -749,8 +749,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty)
+ if (capable(CAP_SYS_ADMIN)) {
+ set_bit(TTY_IO_ERROR, &tty->flags);
+ return 0;
+- } else
+- return -ENODEV;
++ }
++
++ ret = -ENODEV;
++ goto err_free_xmit;
+ }
+
+ /*
+@@ -795,6 +797,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty)
+ spin_unlock_irqrestore(&info->slock, flags);
+
+ return 0;
++err_free_xmit:
++ free_page(page);
++ info->port.xmit_buf = NULL;
++ return ret;
+ }
+
+ /*
+@@ -923,7 +929,6 @@ static void mxser_close(struct tty_struct *tty, struct file *filp)
+ return;
+ if (tty_port_close_start(port, tty, filp) == 0)
+ return;
+- info->closing = 1;
+ mutex_lock(&port->mutex);
+ mxser_close_port(port);
+ mxser_flush_buffer(tty);
+@@ -932,7 +937,6 @@ static void mxser_close(struct tty_struct *tty, struct file *filp)
+ mxser_shutdown_port(port);
+ tty_port_set_initialized(port, 0);
+ mutex_unlock(&port->mutex);
+- info->closing = 0;
+ /* Right now the tty_port set is done outside of the close_end helper
+ as we don't yet have everyone using refcounts */
+ tty_port_close_end(port, tty);
+@@ -1693,7 +1697,7 @@ static bool mxser_port_isr(struct mxser_port *port)
+
+ iir &= MOXA_MUST_IIR_MASK;
+ tty = tty_port_tty_get(&port->port);
+- if (!tty || port->closing || !tty_port_initialized(&port->port)) {
++ if (!tty) {
+ status = inb(port->ioaddr + UART_LSR);
+ outb(MOXA_MUST_FCR_GDA_MODE_ENABLE | UART_FCR_ENABLE_FIFO |
+ UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT,
+diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
+index 1d92d2a848894..67889c0144142 100644
+--- a/drivers/tty/n_gsm.c
++++ b/drivers/tty/n_gsm.c
+@@ -73,6 +73,8 @@ module_param(debug, int, 0600);
+ */
+ #define MAX_MRU 1500
+ #define MAX_MTU 1500
++/* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */
++#define PROT_OVERHEAD 7
+ #define GSM_NET_TX_TIMEOUT (HZ*10)
+
+ /*
+@@ -135,6 +137,7 @@ struct gsm_dlci {
+ int retries;
+ /* Uplink tty if active */
+ struct tty_port port; /* The tty bound to this DLCI if there is one */
++#define TX_SIZE 4096 /* Must be power of 2. */
+ struct kfifo fifo; /* Queue fifo for the DLCI */
+ int adaption; /* Adaption layer in use */
+ int prev_adaption;
+@@ -219,7 +222,6 @@ struct gsm_mux {
+ int encoding;
+ u8 control;
+ u8 fcs;
+- u8 received_fcs;
+ u8 *txframe; /* TX framing buffer */
+
+ /* Method for the receiver side */
+@@ -231,7 +233,9 @@ struct gsm_mux {
+ int initiator; /* Did we initiate connection */
+ bool dead; /* Has the mux been shut down */
+ struct gsm_dlci *dlci[NUM_DLCI];
++ int old_c_iflag; /* termios c_iflag value before attach */
+ bool constipated; /* Asked by remote to shut up */
++ bool has_devices; /* Devices were registered */
+
+ spinlock_t tx_lock;
+ unsigned int tx_bytes; /* TX data outstanding */
+@@ -240,6 +244,7 @@ struct gsm_mux {
+ struct list_head tx_list; /* Pending data packets */
+
+ /* Control messages */
++ struct timer_list kick_timer; /* Kick TX queuing on timeout */
+ struct timer_list t2_timer; /* Retransmit timer for commands */
+ int cretries; /* Command retry counter */
+ struct gsm_control *pending_cmd;/* Our current pending command */
+@@ -271,6 +276,10 @@ static DEFINE_SPINLOCK(gsm_mux_lock);
+
+ static struct tty_driver *gsm_tty_driver;
+
++/* Save dlci open address */
++static int addr_open[256] = { 0 };
++/* Save dlci open count */
++static int addr_cnt;
+ /*
+ * This section of the driver logic implements the GSM encodings
+ * both the basic and the 'advanced'. Reliable transport is not
+@@ -318,6 +327,7 @@ static struct tty_driver *gsm_tty_driver;
+ #define GSM1_ESCAPE_BITS 0x20
+ #define XON 0x11
+ #define XOFF 0x13
++#define ISO_IEC_646_MASK 0x7F
+
+ static const struct tty_port_operations gsm_port_ops;
+
+@@ -364,6 +374,7 @@ static const u8 gsm_fcs8[256] = {
+ #define GOOD_FCS 0xCF
+
+ static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len);
++static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk);
+
+ /**
+ * gsm_fcs_add - update FCS
+@@ -414,6 +425,27 @@ static int gsm_read_ea(unsigned int *val, u8 c)
+ return c & EA;
+ }
+
++/**
++ * gsm_read_ea_val - read a value until EA
++ * @val: variable holding value
++ * @data: buffer of data
++ * @dlen: length of data
++ *
++ * Processes an EA value. Updates the passed variable and
++ * returns the processed data length.
++ */
++static unsigned int gsm_read_ea_val(unsigned int *val, const u8 *data, int dlen)
++{
++ unsigned int len = 0;
++
++ for (; dlen > 0; dlen--) {
++ len++;
++ if (gsm_read_ea(val, *data++))
++ break;
++ }
++ return len;
++}
++
+ /**
+ * gsm_encode_modem - encode modem data bits
+ * @dlci: DLCI to encode from
+@@ -434,16 +466,97 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci)
+ modembits |= MDM_RTR;
+ if (dlci->modem_tx & TIOCM_RI)
+ modembits |= MDM_IC;
+- if (dlci->modem_tx & TIOCM_CD)
++ if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator)
+ modembits |= MDM_DV;
+ return modembits;
+ }
+
++static void gsm_hex_dump_bytes(const char *fname, const u8 *data,
++ unsigned long len)
++{
++ char *prefix;
++
++ if (!fname) {
++ print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, data, len,
++ true);
++ return;
++ }
++
++ prefix = kasprintf(GFP_ATOMIC, "%s: ", fname);
++ if (!prefix)
++ return;
++ print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, 16, 1, data, len,
++ true);
++ kfree(prefix);
++}
++
++/**
++ * gsm_register_devices - register all tty devices for a given mux index
++ *
++ * @driver: the tty driver that describes the tty devices
++ * @index: the mux number is used to calculate the minor numbers of the
++ * ttys for this mux and may differ from the position in the
++ * mux array.
++ */
++static int gsm_register_devices(struct tty_driver *driver, unsigned int index)
++{
++ struct device *dev;
++ int i;
++ unsigned int base;
++
++ if (!driver || index >= MAX_MUX)
++ return -EINVAL;
++
++ base = index * NUM_DLCI; /* first minor for this index */
++ for (i = 1; i < NUM_DLCI; i++) {
++ /* Don't register device 0 - this is the control channel
++ * and not a usable tty interface
++ */
++ dev = tty_register_device(gsm_tty_driver, base + i, NULL);
++ if (IS_ERR(dev)) {
++ if (debug & 8)
++ pr_info("%s failed to register device minor %u",
++ __func__, base + i);
++ for (i--; i >= 1; i--)
++ tty_unregister_device(gsm_tty_driver, base + i);
++ return PTR_ERR(dev);
++ }
++ }
++
++ return 0;
++}
++
++/**
++ * gsm_unregister_devices - unregister all tty devices for a given mux index
++ *
++ * @driver: the tty driver that describes the tty devices
++ * @index: the mux number is used to calculate the minor numbers of the
++ * ttys for this mux and may differ from the position in the
++ * mux array.
++ */
++static void gsm_unregister_devices(struct tty_driver *driver,
++ unsigned int index)
++{
++ int i;
++ unsigned int base;
++
++ if (!driver || index >= MAX_MUX)
++ return;
++
++ base = index * NUM_DLCI; /* first minor for this index */
++ for (i = 1; i < NUM_DLCI; i++) {
++ /* Don't unregister device 0 - this is the control
++ * channel and not a usable tty interface
++ */
++ tty_unregister_device(gsm_tty_driver, base + i);
++ }
++}
++
+ /**
+ * gsm_print_packet - display a frame for debug
+ * @hdr: header to print before decode
+ * @addr: address EA from the frame
+- * @cr: C/R bit from the frame
++ * @cr: C/R bit seen as initiator
+ * @control: control including PF bit
+ * @data: following data bytes
+ * @dlen: length of data
+@@ -503,7 +616,7 @@ static void gsm_print_packet(const char *hdr, int addr, int cr,
+ else
+ pr_cont("(F)");
+
+- print_hex_dump_bytes("", DUMP_PREFIX_NONE, data, dlen);
++ gsm_hex_dump_bytes(NULL, data, dlen);
+ }
+
+
+@@ -527,7 +640,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
+ int olen = 0;
+ while (len--) {
+ if (*input == GSM1_SOF || *input == GSM1_ESCAPE
+- || *input == XON || *input == XOFF) {
++ || (*input & ISO_IEC_646_MASK) == XON
++ || (*input & ISO_IEC_646_MASK) == XOFF) {
+ *output++ = GSM1_ESCAPE;
+ *output++ = *input++ ^ GSM1_ESCAPE_BITS;
+ olen++;
+@@ -542,7 +656,7 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
+ * gsm_send - send a control frame
+ * @gsm: our GSM mux
+ * @addr: address for control frame
+- * @cr: command/response bit
++ * @cr: command/response bit seen as initiator
+ * @control: control byte including PF bit
+ *
+ * Format up and transmit a control frame. These do not go via the
+@@ -557,11 +671,15 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control)
+ int len;
+ u8 cbuf[10];
+ u8 ibuf[3];
++ int ocr;
++
++ /* toggle C/R coding if not initiator */
++ ocr = cr ^ (gsm->initiator ? 0 : 1);
+
+ switch (gsm->encoding) {
+ case 0:
+ cbuf[0] = GSM0_SOF;
+- cbuf[1] = (addr << 2) | (cr << 1) | EA;
++ cbuf[1] = (addr << 2) | (ocr << 1) | EA;
+ cbuf[2] = control;
+ cbuf[3] = EA; /* Length of data = 0 */
+ cbuf[4] = 0xFF - gsm_fcs_add_block(INIT_FCS, cbuf + 1, 3);
+@@ -571,7 +689,7 @@ static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control)
+ case 1:
+ case 2:
+ /* Control frame + packing (but not frame stuffing) in mode 1 */
+- ibuf[0] = (addr << 2) | (cr << 1) | EA;
++ ibuf[0] = (addr << 2) | (ocr << 1) | EA;
+ ibuf[1] = control;
+ ibuf[2] = 0xFF - gsm_fcs_add_block(INIT_FCS, ibuf, 2);
+ /* Stuffing may double the size worst case */
+@@ -649,6 +767,37 @@ static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len,
+ return m;
+ }
+
++/**
++ * gsm_is_flow_ctrl_msg - checks if flow control message
++ * @msg: message to check
++ *
++ * Returns true if the given message is a flow control command of the
++ * control channel. False is returned in any other case.
++ */
++static bool gsm_is_flow_ctrl_msg(struct gsm_msg *msg)
++{
++ unsigned int cmd;
++
++ if (msg->addr > 0)
++ return false;
++
++ switch (msg->ctrl & ~PF) {
++ case UI:
++ case UIH:
++ cmd = 0;
++ if (gsm_read_ea_val(&cmd, msg->data + 2, msg->len - 2) < 1)
++ break;
++ switch (cmd & ~PF) {
++ case CMD_FCOFF:
++ case CMD_FCON:
++ return true;
++ }
++ break;
++ }
++
++ return false;
++}
++
+ /**
+ * gsm_data_kick - poke the queue
+ * @gsm: GSM Mux
+@@ -668,7 +817,7 @@ static void gsm_data_kick(struct gsm_mux *gsm, struct gsm_dlci *dlci)
+ int len;
+
+ list_for_each_entry_safe(msg, nmsg, &gsm->tx_list, list) {
+- if (gsm->constipated && msg->addr)
++ if (gsm->constipated && !gsm_is_flow_ctrl_msg(msg))
+ continue;
+ if (gsm->encoding != 0) {
+ gsm->txframe[0] = GSM1_SOF;
+@@ -684,10 +833,8 @@ static void gsm_data_kick(struct gsm_mux *gsm, struct gsm_dlci *dlci)
+ }
+
+ if (debug & 4)
+- print_hex_dump_bytes("gsm_data_kick: ",
+- DUMP_PREFIX_OFFSET,
+- gsm->txframe, len);
+- if (gsmld_output(gsm, gsm->txframe, len) < 0)
++ gsm_hex_dump_bytes(__func__, gsm->txframe, len);
++ if (gsmld_output(gsm, gsm->txframe, len) <= 0)
+ break;
+ /* FIXME: Can eliminate one SOF in many more cases */
+ gsm->tx_bytes -= msg->len;
+@@ -756,6 +903,7 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg)
+ list_add_tail(&msg->list, &gsm->tx_list);
+ gsm->tx_bytes += msg->len;
+ gsm_data_kick(gsm, dlci);
++ mod_timer(&gsm->kick_timer, jiffies + 10 * gsm->t1 * HZ / 100);
+ }
+
+ /**
+@@ -792,41 +940,48 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci)
+ {
+ struct gsm_msg *msg;
+ u8 *dp;
+- int len, total_size, size;
+- int h = dlci->adaption - 1;
++ int h, len, size;
+
+- total_size = 0;
+- while (1) {
+- len = kfifo_len(&dlci->fifo);
+- if (len == 0)
+- return total_size;
+-
+- /* MTU/MRU count only the data bits */
+- if (len > gsm->mtu)
+- len = gsm->mtu;
+-
+- size = len + h;
+-
+- msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+- /* FIXME: need a timer or something to kick this so it can't
+- get stuck with no work outstanding and no buffer free */
+- if (msg == NULL)
+- return -ENOMEM;
+- dp = msg->data;
+- switch (dlci->adaption) {
+- case 1: /* Unstructured */
+- break;
+- case 2: /* Unstructed with modem bits.
+- Always one byte as we never send inline break data */
+- *dp++ = gsm_encode_modem(dlci);
+- break;
+- }
+- WARN_ON(kfifo_out_locked(&dlci->fifo, dp , len, &dlci->lock) != len);
+- __gsm_data_queue(dlci, msg);
+- total_size += size;
++ /* for modem bits without break data */
++ h = ((dlci->adaption == 1) ? 0 : 1);
++
++ len = kfifo_len(&dlci->fifo);
++ if (len == 0)
++ return 0;
++
++ /* MTU/MRU count only the data bits but watch adaption mode */
++ if ((len + h) > gsm->mtu)
++ len = gsm->mtu - h;
++
++ size = len + h;
++
++ msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
++ if (!msg)
++ return -ENOMEM;
++ dp = msg->data;
++ switch (dlci->adaption) {
++ case 1: /* Unstructured */
++ break;
++ case 2: /* Unstructured with modem bits.
++ * Always one byte as we never send inline break data
++ */
++ *dp++ = (gsm_encode_modem(dlci) << 1) | EA;
++ break;
++ default:
++ pr_err("%s: unsupported adaption %d\n", __func__,
++ dlci->adaption);
++ break;
+ }
++
++ WARN_ON(len != kfifo_out_locked(&dlci->fifo, dp, len,
++ &dlci->lock));
++
++ /* Notify upper layer about available send space. */
++ tty_port_tty_wakeup(&dlci->port);
++
++ __gsm_data_queue(dlci, msg);
+ /* Bytes of data we used up */
+- return total_size;
++ return size;
+ }
+
+ /**
+@@ -877,9 +1032,6 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+
+ size = len + overhead;
+ msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+-
+- /* FIXME: need a timer or something to kick this so it can't
+- get stuck with no work outstanding and no buffer free */
+ if (msg == NULL) {
+ skb_queue_tail(&dlci->skb_list, dlci->skb);
+ dlci->skb = NULL;
+@@ -902,6 +1054,66 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+ return size;
+ }
+
++/**
++ * gsm_dlci_modem_output - try and push modem status out of a DLCI
++ * @gsm: mux
++ * @dlci: the DLCI to pull modem status from
++ * @brk: break signal
++ *
++ * Push an empty frame in to the transmit queue to update the modem status
++ * bits and to transmit an optional break.
++ *
++ * Caller must hold the tx_lock of the mux.
++ */
++
++static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci,
++ u8 brk)
++{
++ u8 *dp = NULL;
++ struct gsm_msg *msg;
++ int size = 0;
++
++ /* for modem bits without break data */
++ switch (dlci->adaption) {
++ case 1: /* Unstructured */
++ break;
++ case 2: /* Unstructured with modem bits. */
++ size++;
++ if (brk > 0)
++ size++;
++ break;
++ default:
++ pr_err("%s: unsupported adaption %d\n", __func__,
++ dlci->adaption);
++ return -EINVAL;
++ }
++
++ msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
++ if (!msg) {
++ pr_err("%s: gsm_data_alloc error", __func__);
++ return -ENOMEM;
++ }
++ dp = msg->data;
++ switch (dlci->adaption) {
++ case 1: /* Unstructured */
++ break;
++ case 2: /* Unstructured with modem bits. */
++ if (brk == 0) {
++ *dp++ = (gsm_encode_modem(dlci) << 1) | EA;
++ } else {
++ *dp++ = gsm_encode_modem(dlci) << 1;
++ *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */
++ }
++ break;
++ default:
++ /* Handled above */
++ break;
++ }
++
++ __gsm_data_queue(dlci, msg);
++ return size;
++}
++
+ /**
+ * gsm_dlci_data_sweep - look for data to send
+ * @gsm: the GSM mux
+@@ -915,9 +1127,9 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+ * renegotiate DLCI priorities with optional stuff. Needs optimising.
+ */
+
+-static void gsm_dlci_data_sweep(struct gsm_mux *gsm)
++static int gsm_dlci_data_sweep(struct gsm_mux *gsm)
+ {
+- int len;
++ int len, ret = 0;
+ /* Priority ordering: We should do priority with RR of the groups */
+ int i = 1;
+
+@@ -940,7 +1152,11 @@ static void gsm_dlci_data_sweep(struct gsm_mux *gsm)
+ /* DLCI empty - try the next */
+ if (len == 0)
+ i++;
++ else
++ ret++;
+ }
++
++ return ret;
+ }
+
+ /**
+@@ -1007,25 +1223,25 @@ static void gsm_control_reply(struct gsm_mux *gsm, int cmd, const u8 *data,
+ * @tty: virtual tty bound to the DLCI
+ * @dlci: DLCI to affect
+ * @modem: modem bits (full EA)
+- * @clen: command length
++ * @slen: number of signal octets
+ *
+ * Used when a modem control message or line state inline in adaption
+ * layer 2 is processed. Sort out the local modem state and throttles
+ */
+
+ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci,
+- u32 modem, int clen)
++ u32 modem, int slen)
+ {
+ int mlines = 0;
+ u8 brk = 0;
+ int fc;
+
+- /* The modem status command can either contain one octet (v.24 signals)
+- or two octets (v.24 signals + break signals). The length field will
+- either be 2 or 3 respectively. This is specified in section
+- 5.4.6.3.7 of the 27.010 mux spec. */
++ /* The modem status command can either contain one octet (V.24 signals)
++ * or two octets (V.24 signals + break signals). This is specified in
++ * section 5.4.6.3.7 of the 07.10 mux spec.
++ */
+
+- if (clen == 2)
++ if (slen == 1)
+ modem = modem & 0x7f;
+ else {
+ brk = modem & 0x7f;
+@@ -1079,9 +1295,9 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
+ {
+ unsigned int addr = 0;
+ unsigned int modem = 0;
+- unsigned int brk = 0;
+ struct gsm_dlci *dlci;
+ int len = clen;
++ int slen;
+ const u8 *dp = data;
+ struct tty_struct *tty;
+
+@@ -1101,23 +1317,15 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
+ return;
+ dlci = gsm->dlci[addr];
+
++ slen = len;
+ while (gsm_read_ea(&modem, *dp++) == 0) {
+ len--;
+ if (len == 0)
+ return;
+ }
+ len--;
+- if (len > 0) {
+- while (gsm_read_ea(&brk, *dp++) == 0) {
+- len--;
+- if (len == 0)
+- return;
+- }
+- modem <<= 7;
+- modem |= (brk & 0x7f);
+- }
+ tty = tty_port_tty_get(&dlci->port);
+- gsm_process_modem(tty, dlci, modem, clen);
++ gsm_process_modem(tty, dlci, modem, slen - len);
+ if (tty) {
+ tty_wakeup(tty);
+ tty_kref_put(tty);
+@@ -1177,6 +1385,7 @@ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen)
+ }
+
+ static void gsm_dlci_begin_close(struct gsm_dlci *dlci);
++static void gsm_dlci_close(struct gsm_dlci *dlci);
+
+ /**
+ * gsm_control_message - DLCI 0 control processing
+@@ -1195,15 +1404,28 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command,
+ {
+ u8 buf[1];
+ unsigned long flags;
++ struct gsm_dlci *dlci;
++ int i;
++ int address;
+
+ switch (command) {
+ case CMD_CLD: {
+- struct gsm_dlci *dlci = gsm->dlci[0];
++ if (addr_cnt > 0) {
++ for (i = 0; i < addr_cnt; i++) {
++ address = addr_open[i];
++ dlci = gsm->dlci[address];
++ gsm_dlci_close(dlci);
++ addr_open[i] = 0;
++ }
++ }
+ /* Modem wishes to close down */
++ dlci = gsm->dlci[0];
+ if (dlci) {
+ dlci->dead = true;
+ gsm->dead = true;
+- gsm_dlci_begin_close(dlci);
++ gsm_dlci_close(dlci);
++ addr_cnt = 0;
++ gsm_response(gsm, 0, UA|PF);
+ }
+ }
+ break;
+@@ -1296,11 +1518,12 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command,
+
+ static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl)
+ {
+- struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 1, gsm->ftype);
++ struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 2, gsm->ftype);
+ if (msg == NULL)
+ return;
+- msg->data[0] = (ctrl->cmd << 1) | 2 | EA; /* command */
+- memcpy(msg->data + 1, ctrl->data, ctrl->len);
++ msg->data[0] = (ctrl->cmd << 1) | CR | EA; /* command */
++ msg->data[1] = (ctrl->len << 1) | EA;
++ memcpy(msg->data + 2, ctrl->data, ctrl->len);
+ gsm_data_queue(gsm->dlci[0], msg);
+ }
+
+@@ -1323,8 +1546,7 @@ static void gsm_control_retransmit(struct timer_list *t)
+ spin_lock_irqsave(&gsm->control_lock, flags);
+ ctrl = gsm->pending_cmd;
+ if (ctrl) {
+- gsm->cretries--;
+- if (gsm->cretries == 0) {
++ if (gsm->cretries == 0 || !gsm->dlci[0] || gsm->dlci[0]->dead) {
+ gsm->pending_cmd = NULL;
+ ctrl->error = -ETIMEDOUT;
+ ctrl->done = 1;
+@@ -1332,6 +1554,7 @@ static void gsm_control_retransmit(struct timer_list *t)
+ wake_up(&gsm->event);
+ return;
+ }
++ gsm->cretries--;
+ gsm_control_transmit(gsm, ctrl);
+ mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100);
+ }
+@@ -1354,7 +1577,7 @@ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm,
+ unsigned int command, u8 *data, int clen)
+ {
+ struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control),
+- GFP_KERNEL);
++ GFP_ATOMIC);
+ unsigned long flags;
+ if (ctrl == NULL)
+ return NULL;
+@@ -1372,7 +1595,7 @@ retry:
+
+ /* If DLCI0 is in ADM mode skip retries, it won't respond */
+ if (gsm->dlci[0]->mode == DLCI_MODE_ADM)
+- gsm->cretries = 1;
++ gsm->cretries = 0;
+ else
+ gsm->cretries = gsm->n2;
+
+@@ -1420,13 +1643,22 @@ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control)
+
+ static void gsm_dlci_close(struct gsm_dlci *dlci)
+ {
++ unsigned long flags;
++
+ del_timer(&dlci->t1);
+ if (debug & 8)
+ pr_debug("DLCI %d goes closed.\n", dlci->addr);
+ dlci->state = DLCI_CLOSED;
++ /* Prevent us from sending data before the link is up again */
++ dlci->constipated = true;
+ if (dlci->addr != 0) {
+ tty_port_tty_hangup(&dlci->port, false);
++ spin_lock_irqsave(&dlci->lock, flags);
+ kfifo_reset(&dlci->fifo);
++ spin_unlock_irqrestore(&dlci->lock, flags);
++ /* Ensure that gsmtty_open() can return. */
++ tty_port_set_initialized(&dlci->port, 0);
++ wake_up_interruptible(&dlci->port.open_wait);
+ } else
+ dlci->gsm->dead = true;
+ wake_up(&dlci->gsm->event);
+@@ -1448,8 +1680,12 @@ static void gsm_dlci_open(struct gsm_dlci *dlci)
+ del_timer(&dlci->t1);
+ /* This will let a tty open continue */
+ dlci->state = DLCI_OPEN;
++ dlci->constipated = false;
+ if (debug & 8)
+ pr_debug("DLCI %d goes open.\n", dlci->addr);
++ /* Send current modem state */
++ if (dlci->addr)
++ gsm_modem_update(dlci, 0);
+ wake_up(&dlci->gsm->event);
+ }
+
+@@ -1475,8 +1711,8 @@ static void gsm_dlci_t1(struct timer_list *t)
+
+ switch (dlci->state) {
+ case DLCI_OPENING:
+- dlci->retries--;
+ if (dlci->retries) {
++ dlci->retries--;
+ gsm_command(dlci->gsm, dlci->addr, SABM|PF);
+ mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+ } else if (!dlci->addr && gsm->control == (DM | PF)) {
+@@ -1486,13 +1722,13 @@ static void gsm_dlci_t1(struct timer_list *t)
+ dlci->mode = DLCI_MODE_ADM;
+ gsm_dlci_open(dlci);
+ } else {
+- gsm_dlci_close(dlci);
++ gsm_dlci_begin_close(dlci); /* prevent half open link */
+ }
+
+ break;
+ case DLCI_CLOSING:
+- dlci->retries--;
+ if (dlci->retries) {
++ dlci->retries--;
+ gsm_command(dlci->gsm, dlci->addr, DISC|PF);
+ mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+ } else
+@@ -1525,6 +1761,25 @@ static void gsm_dlci_begin_open(struct gsm_dlci *dlci)
+ mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+ }
+
++/**
++ * gsm_dlci_set_opening - change state to opening
++ * @dlci: DLCI to open
++ *
++ * Change internal state to wait for DLCI open from initiator side.
++ * We set off timers and responses upon reception of an SABM.
++ */
++static void gsm_dlci_set_opening(struct gsm_dlci *dlci)
++{
++ switch (dlci->state) {
++ case DLCI_CLOSED:
++ case DLCI_CLOSING:
++ dlci->state = DLCI_OPENING;
++ break;
++ default:
++ break;
++ }
++}
++
+ /**
+ * gsm_dlci_begin_close - start channel open procedure
+ * @dlci: DLCI to open
+@@ -1565,6 +1820,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen)
+ struct tty_struct *tty;
+ unsigned int modem = 0;
+ int len = clen;
++ int slen = 0;
+
+ if (debug & 16)
+ pr_debug("%d bytes for tty\n", len);
+@@ -1577,12 +1833,16 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen)
+ case 2: /* Asynchronous serial with line state in each frame */
+ while (gsm_read_ea(&modem, *data++) == 0) {
+ len--;
++ slen++;
+ if (len == 0)
+ return;
+ }
++ len--;
++ slen++;
+ tty = tty_port_tty_get(port);
+ if (tty) {
+- gsm_process_modem(tty, dlci, modem, clen);
++ gsm_process_modem(tty, dlci, modem, slen);
++ tty_wakeup(tty);
+ tty_kref_put(tty);
+ }
+ fallthrough;
+@@ -1629,6 +1889,30 @@ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len)
+ }
+ }
+
++/**
++ * gsm_kick_timer - transmit if possible
++ * @t: timer contained in our gsm object
++ *
++ * Transmit data from DLCIs if the queue is empty. We can't rely on
++ * a tty wakeup except when we filled the pipe so we need to fire off
++ * new data ourselves in other cases.
++ */
++static void gsm_kick_timer(struct timer_list *t)
++{
++ struct gsm_mux *gsm = from_timer(gsm, t, kick_timer);
++ unsigned long flags;
++ int sent = 0;
++
++ spin_lock_irqsave(&gsm->tx_lock, flags);
++ /* If we have nothing running then we need to fire up */
++ if (gsm->tx_bytes < TX_THRESH_LO)
++ sent = gsm_dlci_data_sweep(gsm);
++ spin_unlock_irqrestore(&gsm->tx_lock, flags);
++
++ if (sent && debug & 4)
++ pr_info("%s TX queue stalled\n", __func__);
++}
++
+ /*
+ * Allocate/Free DLCI channels
+ */
+@@ -1650,7 +1934,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
+ return NULL;
+ spin_lock_init(&dlci->lock);
+ mutex_init(&dlci->mutex);
+- if (kfifo_alloc(&dlci->fifo, 4096, GFP_KERNEL) < 0) {
++ if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) {
+ kfree(dlci);
+ return NULL;
+ }
+@@ -1663,10 +1947,13 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
+ dlci->addr = addr;
+ dlci->adaption = gsm->adaption;
+ dlci->state = DLCI_CLOSED;
+- if (addr)
++ if (addr) {
+ dlci->data = gsm_dlci_data;
+- else
++ /* Prevent us from sending data before the link is up */
++ dlci->constipated = true;
++ } else {
+ dlci->data = gsm_dlci_command;
++ }
+ gsm->dlci[addr] = dlci;
+ return dlci;
+ }
+@@ -1720,7 +2007,12 @@ static void gsm_dlci_release(struct gsm_dlci *dlci)
+ gsm_destroy_network(dlci);
+ mutex_unlock(&dlci->mutex);
+
+- tty_hangup(tty);
++ /* We cannot use tty_hangup() because in tty_kref_put() the tty
++ * driver assumes that the hangup queue is free and reuses it to
++ * queue release_one_tty() -> NULL pointer panic in
++ * process_one_work().
++ */
++ tty_vhangup(tty);
+
+ tty_port_tty_set(&dlci->port, NULL);
+ tty_kref_put(tty);
+@@ -1748,18 +2040,8 @@ static void gsm_queue(struct gsm_mux *gsm)
+ struct gsm_dlci *dlci;
+ u8 cr;
+ int address;
+- /* We have to sneak a look at the packet body to do the FCS.
+- A somewhat layering violation in the spec */
++ int i, j, k, address_tmp;
+
+- if ((gsm->control & ~PF) == UI)
+- gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len);
+- if (gsm->encoding == 0) {
+- /* WARNING: gsm->received_fcs is used for
+- gsm->encoding = 0 only.
+- In this case it contain the last piece of data
+- required to generate final CRC */
+- gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs);
+- }
+ if (gsm->fcs != GOOD_FCS) {
+ gsm->bad_fcs++;
+ if (debug & 4)
+@@ -1771,10 +2053,10 @@ static void gsm_queue(struct gsm_mux *gsm)
+ goto invalid;
+
+ cr = gsm->address & 1; /* C/R bit */
++ cr ^= gsm->initiator ? 0 : 1; /* Flip so 1 always means command */
+
+ gsm_print_packet("<--", address, cr, gsm->control, gsm->buf, gsm->len);
+
+- cr ^= 1 - gsm->initiator; /* Flip so 1 always means command */
+ dlci = gsm->dlci[address];
+
+ switch (gsm->control) {
+@@ -1786,24 +2068,53 @@ static void gsm_queue(struct gsm_mux *gsm)
+ if (dlci == NULL)
+ return;
+ if (dlci->dead)
+- gsm_response(gsm, address, DM);
++ gsm_response(gsm, address, DM|PF);
+ else {
+- gsm_response(gsm, address, UA);
++ gsm_response(gsm, address, UA|PF);
+ gsm_dlci_open(dlci);
++ /* Save dlci open address */
++ if (address) {
++ addr_open[addr_cnt] = address;
++ addr_cnt++;
++ }
+ }
+ break;
+ case DISC|PF:
+ if (cr == 0)
+ goto invalid;
+ if (dlci == NULL || dlci->state == DLCI_CLOSED) {
+- gsm_response(gsm, address, DM);
++ gsm_response(gsm, address, DM|PF);
+ return;
+ }
+ /* Real close complete */
+- gsm_response(gsm, address, UA);
+- gsm_dlci_close(dlci);
++ if (!address) {
++ if (addr_cnt > 0) {
++ for (i = 0; i < addr_cnt; i++) {
++ address = addr_open[i];
++ dlci = gsm->dlci[address];
++ gsm_dlci_close(dlci);
++ addr_open[i] = 0;
++ }
++ }
++ dlci = gsm->dlci[0];
++ gsm_dlci_close(dlci);
++ addr_cnt = 0;
++ gsm_response(gsm, 0, UA|PF);
++ } else {
++ gsm_response(gsm, address, UA|PF);
++ gsm_dlci_close(dlci);
++ /* clear dlci address */
++ for (j = 0; j < addr_cnt; j++) {
++ address_tmp = addr_open[j];
++ if (address_tmp == address) {
++ for (k = j; k < addr_cnt; k++)
++ addr_open[k] = addr_open[k+1];
++ addr_cnt--;
++ break;
++ }
++ }
++ }
+ break;
+- case UA:
+ case UA|PF:
+ if (cr == 0 || dlci == NULL)
+ break;
+@@ -1837,7 +2148,7 @@ static void gsm_queue(struct gsm_mux *gsm)
+ goto invalid;
+ #endif
+ if (dlci == NULL || dlci->state != DLCI_OPEN) {
+- gsm_command(gsm, address, DM|PF);
++ gsm_response(gsm, address, DM|PF);
+ return;
+ }
+ dlci->data(dlci, gsm->buf, gsm->len);
+@@ -1917,19 +2228,25 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c)
+ break;
+ case GSM_DATA: /* Data */
+ gsm->buf[gsm->count++] = c;
+- if (gsm->count == gsm->len)
++ if (gsm->count == gsm->len) {
++ /* Calculate final FCS for UI frames over all data */
++ if ((gsm->control & ~PF) != UIH) {
++ gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf,
++ gsm->count);
++ }
+ gsm->state = GSM_FCS;
++ }
+ break;
+ case GSM_FCS: /* FCS follows the packet */
+- gsm->received_fcs = c;
+- gsm_queue(gsm);
++ gsm->fcs = gsm_fcs_add(gsm->fcs, c);
+ gsm->state = GSM_SSOF;
+ break;
+ case GSM_SSOF:
+- if (c == GSM0_SOF) {
+- gsm->state = GSM_SEARCH;
+- break;
+- }
++ gsm->state = GSM_SEARCH;
++ if (c == GSM0_SOF)
++ gsm_queue(gsm);
++ else
++ gsm->bad_size++;
+ break;
+ default:
+ pr_debug("%s: unhandled state: %d\n", __func__, gsm->state);
+@@ -1947,12 +2264,35 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c)
+
+ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c)
+ {
++ /* handle XON/XOFF */
++ if ((c & ISO_IEC_646_MASK) == XON) {
++ gsm->constipated = true;
++ return;
++ } else if ((c & ISO_IEC_646_MASK) == XOFF) {
++ gsm->constipated = false;
++ /* Kick the link in case it is idling */
++ gsm_data_kick(gsm, NULL);
++ return;
++ }
+ if (c == GSM1_SOF) {
+- /* EOF is only valid in frame if we have got to the data state
+- and received at least one byte (the FCS) */
+- if (gsm->state == GSM_DATA && gsm->count) {
+- /* Extract the FCS */
++ /* EOF is only valid in frame if we have got to the data state */
++ if (gsm->state == GSM_DATA) {
++ if (gsm->count < 1) {
++ /* Missing FSC */
++ gsm->malformed++;
++ gsm->state = GSM_START;
++ return;
++ }
++ /* Remove the FCS from data */
+ gsm->count--;
++ if ((gsm->control & ~PF) != UIH) {
++ /* Calculate final FCS for UI frames over all
++ * data but FCS
++ */
++ gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf,
++ gsm->count);
++ }
++ /* Add the FCS itself to test against GOOD_FCS */
+ gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]);
+ gsm->len = gsm->count;
+ gsm_queue(gsm);
+@@ -1961,7 +2301,8 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c)
+ }
+ /* Any partial frame was a runt so go back to start */
+ if (gsm->state != GSM_START) {
+- gsm->malformed++;
++ if (gsm->state != GSM_SEARCH)
++ gsm->malformed++;
+ gsm->state = GSM_START;
+ }
+ /* A SOF in GSM_START means we are still reading idling or
+@@ -2033,74 +2374,51 @@ static void gsm_error(struct gsm_mux *gsm,
+ gsm->io_error++;
+ }
+
+-static int gsm_disconnect(struct gsm_mux *gsm)
+-{
+- struct gsm_dlci *dlci = gsm->dlci[0];
+- struct gsm_control *gc;
+-
+- if (!dlci)
+- return 0;
+-
+- /* In theory disconnecting DLCI 0 is sufficient but for some
+- modems this is apparently not the case. */
+- gc = gsm_control_send(gsm, CMD_CLD, NULL, 0);
+- if (gc)
+- gsm_control_wait(gsm, gc);
+-
+- del_timer_sync(&gsm->t2_timer);
+- /* Now we are sure T2 has stopped */
+-
+- gsm_dlci_begin_close(dlci);
+- wait_event_interruptible(gsm->event,
+- dlci->state == DLCI_CLOSED);
+-
+- if (signal_pending(current))
+- return -EINTR;
+-
+- return 0;
+-}
+-
+ /**
+ * gsm_cleanup_mux - generic GSM protocol cleanup
+ * @gsm: our mux
++ * @disc: disconnect link?
+ *
+ * Clean up the bits of the mux which are the same for all framing
+ * protocols. Remove the mux from the mux table, stop all the timers
+ * and then shut down each device hanging up the channels as we go.
+ */
+
+-static void gsm_cleanup_mux(struct gsm_mux *gsm)
++static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
+ {
+ int i;
+- struct gsm_dlci *dlci = gsm->dlci[0];
++ struct gsm_dlci *dlci;
+ struct gsm_msg *txq, *ntxq;
+
+ gsm->dead = true;
++ mutex_lock(&gsm->mutex);
+
+- spin_lock(&gsm_mux_lock);
+- for (i = 0; i < MAX_MUX; i++) {
+- if (gsm_mux[i] == gsm) {
+- gsm_mux[i] = NULL;
+- break;
++ dlci = gsm->dlci[0];
++ if (dlci) {
++ if (disc && dlci->state != DLCI_CLOSED) {
++ gsm_dlci_begin_close(dlci);
++ wait_event(gsm->event, dlci->state == DLCI_CLOSED);
+ }
++ dlci->dead = true;
+ }
+- spin_unlock(&gsm_mux_lock);
+- /* open failed before registering => nothing to do */
+- if (i == MAX_MUX)
+- return;
+
++ /* Finish outstanding timers, making sure they are done */
++ del_timer_sync(&gsm->kick_timer);
+ del_timer_sync(&gsm->t2_timer);
+- /* Now we are sure T2 has stopped */
+- if (dlci)
+- dlci->dead = true;
+
+- /* Free up any link layer users */
+- mutex_lock(&gsm->mutex);
+- for (i = 0; i < NUM_DLCI; i++)
+- if (gsm->dlci[i])
++ /* Free up any link layer users and finally the control channel */
++ if (gsm->has_devices) {
++ gsm_unregister_devices(gsm_tty_driver, gsm->num);
++ gsm->has_devices = false;
++ }
++ for (i = NUM_DLCI - 1; i >= 0; i--)
++ if (gsm->dlci[i]) {
+ gsm_dlci_release(gsm->dlci[i]);
++ gsm->dlci[i] = NULL;
++ }
+ mutex_unlock(&gsm->mutex);
+ /* Now wipe the queues */
++ tty_ldisc_flush(gsm->tty);
+ list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list)
+ kfree(txq);
+ INIT_LIST_HEAD(&gsm->tx_list);
+@@ -2118,8 +2436,13 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm)
+ static int gsm_activate_mux(struct gsm_mux *gsm)
+ {
+ struct gsm_dlci *dlci;
+- int i = 0;
++ int ret;
+
++ dlci = gsm_dlci_alloc(gsm, 0);
++ if (dlci == NULL)
++ return -ENOMEM;
++
++ timer_setup(&gsm->kick_timer, gsm_kick_timer, 0);
+ timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0);
+ init_waitqueue_head(&gsm->event);
+ spin_lock_init(&gsm->control_lock);
+@@ -2130,21 +2453,11 @@ static int gsm_activate_mux(struct gsm_mux *gsm)
+ else
+ gsm->receive = gsm1_receive;
+
+- spin_lock(&gsm_mux_lock);
+- for (i = 0; i < MAX_MUX; i++) {
+- if (gsm_mux[i] == NULL) {
+- gsm->num = i;
+- gsm_mux[i] = gsm;
+- break;
+- }
+- }
+- spin_unlock(&gsm_mux_lock);
+- if (i == MAX_MUX)
+- return -EBUSY;
++ ret = gsm_register_devices(gsm_tty_driver, gsm->num);
++ if (ret)
++ return ret;
+
+- dlci = gsm_dlci_alloc(gsm, 0);
+- if (dlci == NULL)
+- return -ENOMEM;
++ gsm->has_devices = true;
+ gsm->dead = false; /* Tty opens are now permissible */
+ return 0;
+ }
+@@ -2157,6 +2470,15 @@ static int gsm_activate_mux(struct gsm_mux *gsm)
+ */
+ static void gsm_free_mux(struct gsm_mux *gsm)
+ {
++ int i;
++
++ for (i = 0; i < MAX_MUX; i++) {
++ if (gsm == gsm_mux[i]) {
++ gsm_mux[i] = NULL;
++ break;
++ }
++ }
++ mutex_destroy(&gsm->mutex);
+ kfree(gsm->txframe);
+ kfree(gsm->buf);
+ kfree(gsm);
+@@ -2176,12 +2498,20 @@ static void gsm_free_muxr(struct kref *ref)
+
+ static inline void mux_get(struct gsm_mux *gsm)
+ {
++ unsigned long flags;
++
++ spin_lock_irqsave(&gsm_mux_lock, flags);
+ kref_get(&gsm->ref);
++ spin_unlock_irqrestore(&gsm_mux_lock, flags);
+ }
+
+ static inline void mux_put(struct gsm_mux *gsm)
+ {
++ unsigned long flags;
++
++ spin_lock_irqsave(&gsm_mux_lock, flags);
+ kref_put(&gsm->ref, gsm_free_muxr);
++ spin_unlock_irqrestore(&gsm_mux_lock, flags);
+ }
+
+ static inline unsigned int mux_num_to_base(struct gsm_mux *gsm)
+@@ -2202,6 +2532,7 @@ static inline unsigned int mux_line_to_num(unsigned int line)
+
+ static struct gsm_mux *gsm_alloc_mux(void)
+ {
++ int i;
+ struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL);
+ if (gsm == NULL)
+ return NULL;
+@@ -2210,7 +2541,7 @@ static struct gsm_mux *gsm_alloc_mux(void)
+ kfree(gsm);
+ return NULL;
+ }
+- gsm->txframe = kmalloc(2 * MAX_MRU + 2, GFP_KERNEL);
++ gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL);
+ if (gsm->txframe == NULL) {
+ kfree(gsm->buf);
+ kfree(gsm);
+@@ -2231,6 +2562,26 @@ static struct gsm_mux *gsm_alloc_mux(void)
+ gsm->mtu = 64;
+ gsm->dead = true; /* Avoid early tty opens */
+
++ /* Store the instance to the mux array or abort if no space is
++ * available.
++ */
++ spin_lock(&gsm_mux_lock);
++ for (i = 0; i < MAX_MUX; i++) {
++ if (!gsm_mux[i]) {
++ gsm_mux[i] = gsm;
++ gsm->num = i;
++ break;
++ }
++ }
++ spin_unlock(&gsm_mux_lock);
++ if (i == MAX_MUX) {
++ mutex_destroy(&gsm->mutex);
++ kfree(gsm->txframe);
++ kfree(gsm->buf);
++ kfree(gsm);
++ return NULL;
++ }
++
+ return gsm;
+ }
+
+@@ -2257,6 +2608,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm,
+
+ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
+ {
++ int ret = 0;
+ int need_close = 0;
+ int need_restart = 0;
+
+@@ -2266,7 +2618,7 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
+ /* Check the MRU/MTU range looks sane */
+ if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8)
+ return -EINVAL;
+- if (c->n2 < 3)
++ if (c->n2 > 255)
+ return -EINVAL;
+ if (c->encapsulation > 1) /* Basic, advanced, no I */
+ return -EINVAL;
+@@ -2297,19 +2649,11 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
+
+ /*
+ * Close down what is needed, restart and initiate the new
+- * configuration
++ * configuration. On the first time there is no DLCI[0]
++ * and closing or cleaning up is not necessary.
+ */
+-
+- if (need_close || need_restart) {
+- int ret;
+-
+- ret = gsm_disconnect(gsm);
+-
+- if (ret)
+- return ret;
+- }
+- if (need_restart)
+- gsm_cleanup_mux(gsm);
++ if (need_close || need_restart)
++ gsm_cleanup_mux(gsm, true);
+
+ gsm->initiator = c->initiator;
+ gsm->mru = c->mru;
+@@ -2332,10 +2676,13 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
+ * FIXME: We need to separate activation/deactivation from adding
+ * and removing from the mux array
+ */
+- if (need_restart)
+- gsm_activate_mux(gsm);
+- if (gsm->initiator && need_close)
+- gsm_dlci_begin_open(gsm->dlci[0]);
++ if (gsm->dead) {
++ ret = gsm_activate_mux(gsm);
++ if (ret)
++ return ret;
++ if (gsm->initiator)
++ gsm_dlci_begin_open(gsm->dlci[0]);
++ }
+ return 0;
+ }
+
+@@ -2356,10 +2703,8 @@ static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len)
+ return -ENOSPC;
+ }
+ if (debug & 4)
+- print_hex_dump_bytes("gsmld_output: ", DUMP_PREFIX_OFFSET,
+- data, len);
+- gsm->tty->ops->write(gsm->tty, data, len);
+- return len;
++ gsm_hex_dump_bytes(__func__, data, len);
++ return gsm->tty->ops->write(gsm->tty, data, len);
+ }
+
+ /**
+@@ -2372,36 +2717,14 @@ static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len)
+ * will need moving to an ioctl path.
+ */
+
+-static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
++static void gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
+ {
+- unsigned int base;
+- int ret, i;
+-
+ gsm->tty = tty_kref_get(tty);
+- ret = gsm_activate_mux(gsm);
+- if (ret != 0)
+- tty_kref_put(gsm->tty);
+- else {
+- /* Don't register device 0 - this is the control channel and not
+- a usable tty interface */
+- base = mux_num_to_base(gsm); /* Base for this MUX */
+- for (i = 1; i < NUM_DLCI; i++) {
+- struct device *dev;
+-
+- dev = tty_register_device(gsm_tty_driver,
+- base + i, NULL);
+- if (IS_ERR(dev)) {
+- for (i--; i >= 1; i--)
+- tty_unregister_device(gsm_tty_driver,
+- base + i);
+- return PTR_ERR(dev);
+- }
+- }
+- }
+- return ret;
++ /* Turn off tty XON/XOFF handling to handle it explicitly. */
++ gsm->old_c_iflag = tty->termios.c_iflag;
++ tty->termios.c_iflag &= (IXON | IXOFF);
+ }
+
+-
+ /**
+ * gsmld_detach_gsm - stop doing 0710 mux
+ * @tty: tty attached to the mux
+@@ -2412,13 +2735,9 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
+
+ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
+ {
+- unsigned int base = mux_num_to_base(gsm); /* Base for this MUX */
+- int i;
+-
+ WARN_ON(tty != gsm->tty);
+- for (i = 1; i < NUM_DLCI; i++)
+- tty_unregister_device(gsm_tty_driver, base + i);
+- gsm_cleanup_mux(gsm);
++ /* Restore tty XON/XOFF handling. */
++ gsm->tty->termios.c_iflag = gsm->old_c_iflag;
+ tty_kref_put(gsm->tty);
+ gsm->tty = NULL;
+ }
+@@ -2430,15 +2749,15 @@ static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+ char flags = TTY_NORMAL;
+
+ if (debug & 4)
+- print_hex_dump_bytes("gsmld_receive: ", DUMP_PREFIX_OFFSET,
+- cp, count);
++ gsm_hex_dump_bytes(__func__, cp, count);
+
+ for (; count; count--, cp++) {
+ if (fp)
+ flags = *fp++;
+ switch (flags) {
+ case TTY_NORMAL:
+- gsm->receive(gsm, *cp);
++ if (gsm->receive)
++ gsm->receive(gsm, *cp);
+ break;
+ case TTY_OVERRUN:
+ case TTY_BREAK:
+@@ -2483,6 +2802,12 @@ static void gsmld_close(struct tty_struct *tty)
+ {
+ struct gsm_mux *gsm = tty->disc_data;
+
++ /* The ldisc locks and closes the port before calling our close. This
++ * means we have no way to do a proper disconnect. We will not bother
++ * to do one.
++ */
++ gsm_cleanup_mux(gsm, false);
++
+ gsmld_detach_gsm(tty, gsm);
+
+ gsmld_flush_buffer(tty);
+@@ -2503,7 +2828,6 @@ static void gsmld_close(struct tty_struct *tty)
+ static int gsmld_open(struct tty_struct *tty)
+ {
+ struct gsm_mux *gsm;
+- int ret;
+
+ if (tty->ops->write == NULL)
+ return -EINVAL;
+@@ -2519,12 +2843,12 @@ static int gsmld_open(struct tty_struct *tty)
+ /* Attach the initial passive connection */
+ gsm->encoding = 1;
+
+- ret = gsmld_attach_gsm(tty, gsm);
+- if (ret != 0) {
+- gsm_cleanup_mux(gsm);
+- mux_put(gsm);
+- }
+- return ret;
++ gsmld_attach_gsm(tty, gsm);
++
++ timer_setup(&gsm->kick_timer, gsm_kick_timer, 0);
++ timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0);
++
++ return 0;
+ }
+
+ /**
+@@ -2592,11 +2916,24 @@ static ssize_t gsmld_read(struct tty_struct *tty, struct file *file,
+ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file,
+ const unsigned char *buf, size_t nr)
+ {
+- int space = tty_write_room(tty);
++ struct gsm_mux *gsm = tty->disc_data;
++ unsigned long flags;
++ int space;
++ int ret;
++
++ if (!gsm)
++ return -ENODEV;
++
++ ret = -ENOBUFS;
++ spin_lock_irqsave(&gsm->tx_lock, flags);
++ space = tty_write_room(tty);
+ if (space >= nr)
+- return tty->ops->write(tty, buf, nr);
+- set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+- return -ENOBUFS;
++ ret = tty->ops->write(tty, buf, nr);
++ else
++ set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
++ spin_unlock_irqrestore(&gsm->tx_lock, flags);
++
++ return ret;
+ }
+
+ /**
+@@ -2621,12 +2958,15 @@ static __poll_t gsmld_poll(struct tty_struct *tty, struct file *file,
+
+ poll_wait(file, &tty->read_wait, wait);
+ poll_wait(file, &tty->write_wait, wait);
++
++ if (gsm->dead)
++ mask |= EPOLLHUP;
+ if (tty_hung_up_p(file))
+ mask |= EPOLLHUP;
++ if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
++ mask |= EPOLLHUP;
+ if (!tty_is_writelocked(tty) && tty_write_room(tty) > 0)
+ mask |= EPOLLOUT | EPOLLWRNORM;
+- if (gsm->dead)
+- mask |= EPOLLHUP;
+ return mask;
+ }
+
+@@ -2876,28 +3216,78 @@ static struct tty_ldisc_ops tty_ldisc_packet = {
+ * Virtual tty side
+ */
+
+-#define TX_SIZE 512
++/**
++ * gsm_modem_upd_via_data - send modem bits via convergence layer
++ * @dlci: channel
++ * @brk: break signal
++ *
++ * Send an empty frame to signal mobile state changes and to transmit the
++ * break signal for adaption 2.
++ */
+
+-static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk)
++static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk)
+ {
+- u8 modembits[5];
++ struct gsm_mux *gsm = dlci->gsm;
++ unsigned long flags;
++
++ if (dlci->state != DLCI_OPEN || dlci->adaption != 2)
++ return;
++
++ spin_lock_irqsave(&gsm->tx_lock, flags);
++ gsm_dlci_modem_output(gsm, dlci, brk);
++ spin_unlock_irqrestore(&gsm->tx_lock, flags);
++}
++
++/**
++ * gsm_modem_upd_via_msc - send modem bits via control frame
++ * @dlci: channel
++ * @brk: break signal
++ */
++
++static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk)
++{
++ u8 modembits[3];
+ struct gsm_control *ctrl;
+ int len = 2;
+
+- if (brk)
+- len++;
++ if (dlci->gsm->encoding != 0)
++ return 0;
+
+- modembits[0] = len << 1 | EA; /* Data bytes */
+- modembits[1] = dlci->addr << 2 | 3; /* DLCI, EA, 1 */
+- modembits[2] = gsm_encode_modem(dlci) << 1 | EA;
+- if (brk)
+- modembits[3] = brk << 4 | 2 | EA; /* Valid, EA */
+- ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len + 1);
++ modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */
++ if (!brk) {
++ modembits[1] = (gsm_encode_modem(dlci) << 1) | EA;
++ } else {
++ modembits[1] = gsm_encode_modem(dlci) << 1;
++ modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */
++ len++;
++ }
++ ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len);
+ if (ctrl == NULL)
+ return -ENOMEM;
+ return gsm_control_wait(dlci->gsm, ctrl);
+ }
+
++/**
++ * gsm_modem_update - send modem status line state
++ * @dlci: channel
++ * @brk: break signal
++ */
++
++static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk)
++{
++ if (dlci->adaption == 2) {
++ /* Send convergence layer type 2 empty data frame. */
++ gsm_modem_upd_via_data(dlci, brk);
++ return 0;
++ } else if (dlci->gsm->encoding == 0) {
++ /* Send as MSC control message. */
++ return gsm_modem_upd_via_msc(dlci, brk);
++ }
++
++ /* Modem status lines are not supported. */
++ return -EPROTONOSUPPORT;
++}
++
+ static int gsm_carrier_raised(struct tty_port *port)
+ {
+ struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port);
+@@ -2930,7 +3320,7 @@ static void gsm_dtr_rts(struct tty_port *port, int onoff)
+ modem_tx &= ~(TIOCM_DTR | TIOCM_RTS);
+ if (modem_tx != dlci->modem_tx) {
+ dlci->modem_tx = modem_tx;
+- gsmtty_modem_update(dlci, 0);
++ gsm_modem_update(dlci, 0);
+ }
+ }
+
+@@ -3000,6 +3390,7 @@ static int gsmtty_open(struct tty_struct *tty, struct file *filp)
+ {
+ struct gsm_dlci *dlci = tty->driver_data;
+ struct tty_port *port = &dlci->port;
++ struct gsm_mux *gsm = dlci->gsm;
+
+ port->count++;
+ tty_port_tty_set(port, tty);
+@@ -3009,7 +3400,10 @@ static int gsmtty_open(struct tty_struct *tty, struct file *filp)
+ a DM straight back. This is ok as that will have caused a hangup */
+ tty_port_set_initialized(port, 1);
+ /* Start sending off SABM messages */
+- gsm_dlci_begin_open(dlci);
++ if (gsm->initiator)
++ gsm_dlci_begin_open(dlci);
++ else
++ gsm_dlci_set_opening(dlci);
+ /* And wait for virtual carrier */
+ return tty_port_block_til_ready(port, tty, filp);
+ }
+@@ -3063,7 +3457,7 @@ static unsigned int gsmtty_write_room(struct tty_struct *tty)
+ struct gsm_dlci *dlci = tty->driver_data;
+ if (dlci->state == DLCI_CLOSED)
+ return 0;
+- return TX_SIZE - kfifo_len(&dlci->fifo);
++ return kfifo_avail(&dlci->fifo);
+ }
+
+ static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty)
+@@ -3077,13 +3471,17 @@ static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty)
+ static void gsmtty_flush_buffer(struct tty_struct *tty)
+ {
+ struct gsm_dlci *dlci = tty->driver_data;
++ unsigned long flags;
++
+ if (dlci->state == DLCI_CLOSED)
+ return;
+ /* Caution needed: If we implement reliable transport classes
+ then the data being transmitted can't simply be junked once
+ it has first hit the stack. Until then we can just blow it
+ away */
++ spin_lock_irqsave(&dlci->lock, flags);
+ kfifo_reset(&dlci->fifo);
++ spin_unlock_irqrestore(&dlci->lock, flags);
+ /* Need to unhook this DLCI from the transmit queue logic */
+ }
+
+@@ -3115,7 +3513,7 @@ static int gsmtty_tiocmset(struct tty_struct *tty,
+
+ if (modem_tx != dlci->modem_tx) {
+ dlci->modem_tx = modem_tx;
+- return gsmtty_modem_update(dlci, 0);
++ return gsm_modem_update(dlci, 0);
+ }
+ return 0;
+ }
+@@ -3173,10 +3571,10 @@ static void gsmtty_throttle(struct tty_struct *tty)
+ if (dlci->state == DLCI_CLOSED)
+ return;
+ if (C_CRTSCTS(tty))
+- dlci->modem_tx &= ~TIOCM_DTR;
++ dlci->modem_tx &= ~TIOCM_RTS;
+ dlci->throttled = true;
+- /* Send an MSC with DTR cleared */
+- gsmtty_modem_update(dlci, 0);
++ /* Send an MSC with RTS cleared */
++ gsm_modem_update(dlci, 0);
+ }
+
+ static void gsmtty_unthrottle(struct tty_struct *tty)
+@@ -3185,10 +3583,10 @@ static void gsmtty_unthrottle(struct tty_struct *tty)
+ if (dlci->state == DLCI_CLOSED)
+ return;
+ if (C_CRTSCTS(tty))
+- dlci->modem_tx |= TIOCM_DTR;
++ dlci->modem_tx |= TIOCM_RTS;
+ dlci->throttled = false;
+- /* Send an MSC with DTR set */
+- gsmtty_modem_update(dlci, 0);
++ /* Send an MSC with RTS set */
++ gsm_modem_update(dlci, 0);
+ }
+
+ static int gsmtty_break_ctl(struct tty_struct *tty, int state)
+@@ -3206,7 +3604,7 @@ static int gsmtty_break_ctl(struct tty_struct *tty, int state)
+ if (encode > 0x0F)
+ encode = 0x0F; /* Best effort */
+ }
+- return gsmtty_modem_update(dlci, encode);
++ return gsm_modem_update(dlci, encode);
+ }
+
+ static void gsmtty_cleanup(struct tty_struct *tty)
+diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
+index 580a37b3fe1b9..166f651fde33a 100644
+--- a/drivers/tty/n_hdlc.c
++++ b/drivers/tty/n_hdlc.c
+@@ -140,6 +140,8 @@ struct n_hdlc {
+ struct n_hdlc_buf_list rx_buf_list;
+ struct n_hdlc_buf_list tx_free_buf_list;
+ struct n_hdlc_buf_list rx_free_buf_list;
++ struct work_struct write_work;
++ struct tty_struct *tty_for_write_work;
+ };
+
+ /*
+@@ -154,6 +156,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list);
+ /* Local functions */
+
+ static struct n_hdlc *n_hdlc_alloc(void);
++static void n_hdlc_tty_write_work(struct work_struct *work);
+
+ /* max frame size for memory allocations */
+ static int maxframe = 4096;
+@@ -210,6 +213,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty)
+ wake_up_interruptible(&tty->read_wait);
+ wake_up_interruptible(&tty->write_wait);
+
++ cancel_work_sync(&n_hdlc->write_work);
++
+ n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list);
+ n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list);
+ n_hdlc_free_buf_list(&n_hdlc->rx_buf_list);
+@@ -241,6 +246,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty)
+ return -ENFILE;
+ }
+
++ INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work);
++ n_hdlc->tty_for_write_work = tty;
+ tty->disc_data = n_hdlc;
+ tty->receive_room = 65536;
+
+@@ -334,6 +341,20 @@ check_again:
+ goto check_again;
+ } /* end of n_hdlc_send_frames() */
+
++/**
++ * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup
++ * @work: pointer to work_struct
++ *
++ * Called when low level device driver can accept more send data.
++ */
++static void n_hdlc_tty_write_work(struct work_struct *work)
++{
++ struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work);
++ struct tty_struct *tty = n_hdlc->tty_for_write_work;
++
++ n_hdlc_send_frames(n_hdlc, tty);
++} /* end of n_hdlc_tty_write_work() */
++
+ /**
+ * n_hdlc_tty_wakeup - Callback for transmit wakeup
+ * @tty: pointer to associated tty instance data
+@@ -344,7 +365,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
+ {
+ struct n_hdlc *n_hdlc = tty->disc_data;
+
+- n_hdlc_send_frames(n_hdlc, tty);
++ schedule_work(&n_hdlc->write_work);
+ } /* end of n_hdlc_tty_wakeup() */
+
+ /**
+diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
+index 0ec93f1a61f5d..6259249b11670 100644
+--- a/drivers/tty/n_tty.c
++++ b/drivers/tty/n_tty.c
+@@ -202,8 +202,8 @@ static void n_tty_kick_worker(struct tty_struct *tty)
+ struct n_tty_data *ldata = tty->disc_data;
+
+ /* Did the input worker stop? Restart it */
+- if (unlikely(ldata->no_room)) {
+- ldata->no_room = 0;
++ if (unlikely(READ_ONCE(ldata->no_room))) {
++ WRITE_ONCE(ldata->no_room, 0);
+
+ WARN_RATELIMIT(tty->port->itty == NULL,
+ "scheduling with invalid itty\n");
+@@ -1369,7 +1369,7 @@ handle_newline:
+ put_tty_queue(c, ldata);
+ smp_store_release(&ldata->canon_head, ldata->read_head);
+ kill_fasync(&tty->fasync, SIGIO, POLL_IN);
+- wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
++ wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM);
+ return;
+ }
+ }
+@@ -1589,7 +1589,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp,
+
+ if (read_cnt(ldata)) {
+ kill_fasync(&tty->fasync, SIGIO, POLL_IN);
+- wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
++ wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM);
+ }
+ }
+
+@@ -1661,7 +1661,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp,
+ if (overflow && room < 0)
+ ldata->read_head--;
+ room = overflow;
+- ldata->no_room = flow && !room;
++ WRITE_ONCE(ldata->no_room, flow && !room);
+ } else
+ overflow = 0;
+
+@@ -1692,6 +1692,17 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp,
+ } else
+ n_tty_check_throttle(tty);
+
++ if (unlikely(ldata->no_room)) {
++ /*
++ * Barrier here is to ensure to read the latest read_tail in
++ * chars_in_buffer() and to make sure that read_tail is not loaded
++ * before ldata->no_room is set.
++ */
++ smp_mb();
++ if (!chars_in_buffer(tty))
++ n_tty_kick_worker(tty);
++ }
++
+ up_read(&tty->termios_rwsem);
+
+ return rcvd;
+@@ -1963,7 +1974,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty,
+ return false;
+
+ canon_head = smp_load_acquire(&ldata->canon_head);
+- n = min(*nr + 1, canon_head - ldata->read_tail);
++ n = min(*nr, canon_head - ldata->read_tail);
+
+ tail = ldata->read_tail & (N_TTY_BUF_SIZE - 1);
+ size = min_t(size_t, tail + n, N_TTY_BUF_SIZE);
+@@ -1985,10 +1996,8 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty,
+ n += N_TTY_BUF_SIZE;
+ c = n + found;
+
+- if (!found || read_buf(ldata, eol) != __DISABLED_CHAR) {
+- c = min(*nr, c);
++ if (!found || read_buf(ldata, eol) != __DISABLED_CHAR)
+ n = c;
+- }
+
+ n_tty_trace("%s: eol:%zu found:%d n:%zu c:%zu tail:%zu more:%zu\n",
+ __func__, eol, found, n, c, tail, more);
+@@ -2014,6 +2023,35 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty,
+ return ldata->read_tail != canon_head;
+ }
+
++/*
++ * If we finished a read at the exact location of an
++ * EOF (special EOL character that's a __DISABLED_CHAR)
++ * in the stream, silently eat the EOF.
++ */
++static void canon_skip_eof(struct tty_struct *tty)
++{
++ struct n_tty_data *ldata = tty->disc_data;
++ size_t tail, canon_head;
++
++ canon_head = smp_load_acquire(&ldata->canon_head);
++ tail = ldata->read_tail;
++
++ // No data?
++ if (tail == canon_head)
++ return;
++
++ // See if the tail position is EOF in the circular buffer
++ tail &= (N_TTY_BUF_SIZE - 1);
++ if (!test_bit(tail, ldata->read_flags))
++ return;
++ if (read_buf(ldata, tail) != __DISABLED_CHAR)
++ return;
++
++ // Clear the EOL bit, skip the EOF char.
++ clear_bit(tail, ldata->read_flags);
++ smp_store_release(&ldata->read_tail, ldata->read_tail + 1);
++}
++
+ /**
+ * job_control - check job control
+ * @tty: tty
+@@ -2073,7 +2111,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+ ssize_t retval = 0;
+ long timeout;
+ bool packet;
+- size_t tail;
++ size_t old_tail;
+
+ /*
+ * Is this a continuation of a read started earler?
+@@ -2083,7 +2121,14 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+ */
+ if (*cookie) {
+ if (ldata->icanon && !L_EXTPROC(tty)) {
+- if (canon_copy_from_read_buf(tty, &kb, &nr))
++ /*
++ * If we have filled the user buffer, see
++ * if we should skip an EOF character before
++ * releasing the lock and returning done.
++ */
++ if (!nr)
++ canon_skip_eof(tty);
++ else if (canon_copy_from_read_buf(tty, &kb, &nr))
+ return kb - kbuf;
+ } else {
+ if (copy_from_read_buf(tty, &kb, &nr))
+@@ -2129,7 +2174,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+ }
+
+ packet = tty->ctrl.packet;
+- tail = ldata->read_tail;
++ old_tail = ldata->read_tail;
+
+ add_wait_queue(&tty->read_wait, &wait);
+ while (nr) {
+@@ -2218,8 +2263,14 @@ more_to_be_read:
+ if (time)
+ timeout = time;
+ }
+- if (tail != ldata->read_tail)
++ if (old_tail != ldata->read_tail) {
++ /*
++ * Make sure no_room is not read in n_tty_kick_worker()
++ * before setting ldata->read_tail in copy_from_read_buf().
++ */
++ smp_mb();
+ n_tty_kick_worker(tty);
++ }
+ up_read(&tty->termios_rwsem);
+
+ remove_wait_queue(&tty->read_wait, &wait);
+diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
+index 74bfabe5b4538..752dab3356d72 100644
+--- a/drivers/tty/pty.c
++++ b/drivers/tty/pty.c
+@@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty)
+ static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
+ {
+ struct tty_struct *to = tty->link;
+- unsigned long flags;
+
+- if (tty->flow.stopped)
++ if (tty->flow.stopped || !c)
+ return 0;
+
+- if (c > 0) {
+- spin_lock_irqsave(&to->port->lock, flags);
+- /* Stuff the data into the input queue of the other end */
+- c = tty_insert_flip_string(to->port, buf, c);
+- spin_unlock_irqrestore(&to->port->lock, flags);
+- /* And shovel */
+- if (c)
+- tty_flip_buffer_push(to->port);
+- }
+- return c;
++ return tty_insert_flip_string_and_push_buffer(to->port, buf, c);
+ }
+
+ /**
+diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
+index 6473361525d1f..d9500a25e03ab 100644
+--- a/drivers/tty/serial/8250/8250.h
++++ b/drivers/tty/serial/8250/8250.h
+@@ -17,6 +17,8 @@
+ struct uart_8250_dma {
+ int (*tx_dma)(struct uart_8250_port *p);
+ int (*rx_dma)(struct uart_8250_port *p);
++ void (*prepare_tx_dma)(struct uart_8250_port *p);
++ void (*prepare_rx_dma)(struct uart_8250_port *p);
+
+ /* Filter function */
+ dma_filter_fn fn;
+@@ -88,7 +90,6 @@ struct serial8250_config {
+ #define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */
+ #define UART_BUG_NOMSR BIT(2) /* UART has buggy MSR status bits (Au1x00) */
+ #define UART_BUG_THRE BIT(3) /* UART has buggy THRE reassertion */
+-#define UART_BUG_PARITY BIT(4) /* UART mishandles parity if FIFO enabled */
+ #define UART_BUG_TXRACE BIT(5) /* UART Tx fails to set remote DR */
+
+
+@@ -120,6 +121,28 @@ static inline void serial_out(struct uart_8250_port *up, int offset, int value)
+ up->port.serial_out(&up->port, offset, value);
+ }
+
++/*
++ * For the 16C950
++ */
++static void serial_icr_write(struct uart_8250_port *up, int offset, int value)
++{
++ serial_out(up, UART_SCR, offset);
++ serial_out(up, UART_ICR, value);
++}
++
++static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up,
++ int offset)
++{
++ unsigned int value;
++
++ serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD);
++ serial_out(up, UART_SCR, offset);
++ value = serial_in(up, UART_ICR);
++ serial_icr_write(up, UART_ACR, up->acr);
++
++ return value;
++}
++
+ void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p);
+
+ static inline int serial_dl_read(struct uart_8250_port *up)
+@@ -309,6 +332,29 @@ extern int serial8250_rx_dma(struct uart_8250_port *);
+ extern void serial8250_rx_dma_flush(struct uart_8250_port *);
+ extern int serial8250_request_dma(struct uart_8250_port *);
+ extern void serial8250_release_dma(struct uart_8250_port *);
++
++static inline void serial8250_do_prepare_tx_dma(struct uart_8250_port *p)
++{
++ struct uart_8250_dma *dma = p->dma;
++
++ if (dma->prepare_tx_dma)
++ dma->prepare_tx_dma(p);
++}
++
++static inline void serial8250_do_prepare_rx_dma(struct uart_8250_port *p)
++{
++ struct uart_8250_dma *dma = p->dma;
++
++ if (dma->prepare_rx_dma)
++ dma->prepare_rx_dma(p);
++}
++
++static inline bool serial8250_tx_dma_running(struct uart_8250_port *p)
++{
++ struct uart_8250_dma *dma = p->dma;
++
++ return dma && dma->tx_running;
++}
+ #else
+ static inline int serial8250_tx_dma(struct uart_8250_port *p)
+ {
+@@ -324,6 +370,11 @@ static inline int serial8250_request_dma(struct uart_8250_port *p)
+ return -1;
+ }
+ static inline void serial8250_release_dma(struct uart_8250_port *p) { }
++
++static inline bool serial8250_tx_dma_running(struct uart_8250_port *p)
++{
++ return false;
++}
+ #endif
+
+ static inline int ns16550a_goto_highspeed(struct uart_8250_port *up)
+diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
+index 2350fb3bb5e4c..179bb1375636b 100644
+--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
++++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
+@@ -429,6 +429,8 @@ static int aspeed_vuart_probe(struct platform_device *pdev)
+ timer_setup(&vuart->unthrottle_timer, aspeed_vuart_unthrottle_exp, 0);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -EINVAL;
+
+ memset(&port, 0, sizeof(port));
+ port.port.private_data = vuart;
+@@ -487,7 +489,7 @@ static int aspeed_vuart_probe(struct platform_device *pdev)
+ port.port.irq = irq_of_parse_and_map(np, 0);
+ port.port.handle_irq = aspeed_vuart_handle_irq;
+ port.port.iotype = UPIO_MEM;
+- port.port.type = PORT_16550A;
++ port.port.type = PORT_ASPEED_VUART;
+ port.port.uartclk = clk;
+ port.port.flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF | UPF_IOREMAP
+ | UPF_FIXED_PORT | UPF_FIXED_TYPE | UPF_NO_THRE_TEST;
+diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c
+index 7f656fac503fe..f95047160b4d2 100644
+--- a/drivers/tty/serial/8250/8250_bcm7271.c
++++ b/drivers/tty/serial/8250/8250_bcm7271.c
+@@ -237,6 +237,7 @@ struct brcmuart_priv {
+ u32 rx_err;
+ u32 rx_timeout;
+ u32 rx_abort;
++ u32 saved_mctrl;
+ };
+
+ static struct dentry *brcmuart_debugfs_root;
+@@ -1015,16 +1016,18 @@ static int brcmuart_probe(struct platform_device *pdev)
+ of_property_read_u32(np, "clock-frequency", &clk_rate);
+
+ /* See if a Baud clock has been specified */
+- baud_mux_clk = of_clk_get_by_name(np, "sw_baud");
++ baud_mux_clk = devm_clk_get(dev, "sw_baud");
+ if (IS_ERR(baud_mux_clk)) {
+- if (PTR_ERR(baud_mux_clk) == -EPROBE_DEFER)
+- return -EPROBE_DEFER;
++ if (PTR_ERR(baud_mux_clk) == -EPROBE_DEFER) {
++ ret = -EPROBE_DEFER;
++ goto release_dma;
++ }
+ dev_dbg(dev, "BAUD MUX clock not specified\n");
+ } else {
+ dev_dbg(dev, "BAUD MUX clock found\n");
+ ret = clk_prepare_enable(baud_mux_clk);
+ if (ret)
+- return ret;
++ goto release_dma;
+ priv->baud_mux_clk = baud_mux_clk;
+ init_real_clk_rates(dev, priv);
+ clk_rate = priv->default_mux_rate;
+@@ -1032,7 +1035,8 @@ static int brcmuart_probe(struct platform_device *pdev)
+
+ if (clk_rate == 0) {
+ dev_err(dev, "clock-frequency or clk not defined\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto err_clk_disable;
+ }
+
+ dev_dbg(dev, "DMA is %senabled\n", priv->dma_enabled ? "" : "not ");
+@@ -1075,14 +1079,18 @@ static int brcmuart_probe(struct platform_device *pdev)
+ priv->rx_bufs = dma_alloc_coherent(dev,
+ priv->rx_size,
+ &priv->rx_addr, GFP_KERNEL);
+- if (!priv->rx_bufs)
++ if (!priv->rx_bufs) {
++ ret = -EINVAL;
+ goto err;
++ }
+ priv->tx_size = UART_XMIT_SIZE;
+ priv->tx_buf = dma_alloc_coherent(dev,
+ priv->tx_size,
+ &priv->tx_addr, GFP_KERNEL);
+- if (!priv->tx_buf)
++ if (!priv->tx_buf) {
++ ret = -EINVAL;
+ goto err;
++ }
+ }
+
+ ret = serial8250_register_8250_port(&up);
+@@ -1096,6 +1104,7 @@ static int brcmuart_probe(struct platform_device *pdev)
+ if (priv->dma_enabled) {
+ dma_irq = platform_get_irq_byname(pdev, "dma");
+ if (dma_irq < 0) {
++ ret = dma_irq;
+ dev_err(dev, "no IRQ resource info\n");
+ goto err1;
+ }
+@@ -1114,8 +1123,12 @@ err1:
+ serial8250_unregister_port(priv->line);
+ err:
+ brcmuart_free_bufs(dev, priv);
+- brcmuart_arbitration(priv, 0);
+- return -ENODEV;
++err_clk_disable:
++ clk_disable_unprepare(baud_mux_clk);
++release_dma:
++ if (priv->dma_enabled)
++ brcmuart_arbitration(priv, 0);
++ return ret;
+ }
+
+ static int brcmuart_remove(struct platform_device *pdev)
+@@ -1126,13 +1139,27 @@ static int brcmuart_remove(struct platform_device *pdev)
+ hrtimer_cancel(&priv->hrt);
+ serial8250_unregister_port(priv->line);
+ brcmuart_free_bufs(&pdev->dev, priv);
+- brcmuart_arbitration(priv, 0);
++ clk_disable_unprepare(priv->baud_mux_clk);
++ if (priv->dma_enabled)
++ brcmuart_arbitration(priv, 0);
+ return 0;
+ }
+
+ static int __maybe_unused brcmuart_suspend(struct device *dev)
+ {
+ struct brcmuart_priv *priv = dev_get_drvdata(dev);
++ struct uart_8250_port *up = serial8250_get_port(priv->line);
++ struct uart_port *port = &up->port;
++ unsigned long flags;
++
++ /*
++ * This will prevent resume from enabling RTS before the
++ * baud rate has been restored.
++ */
++ spin_lock_irqsave(&port->lock, flags);
++ priv->saved_mctrl = port->mctrl;
++ port->mctrl &= ~TIOCM_RTS;
++ spin_unlock_irqrestore(&port->lock, flags);
+
+ serial8250_suspend_port(priv->line);
+ clk_disable_unprepare(priv->baud_mux_clk);
+@@ -1143,6 +1170,9 @@ static int __maybe_unused brcmuart_suspend(struct device *dev)
+ static int __maybe_unused brcmuart_resume(struct device *dev)
+ {
+ struct brcmuart_priv *priv = dev_get_drvdata(dev);
++ struct uart_8250_port *up = serial8250_get_port(priv->line);
++ struct uart_port *port = &up->port;
++ unsigned long flags;
+ int ret;
+
+ ret = clk_prepare_enable(priv->baud_mux_clk);
+@@ -1165,6 +1195,15 @@ static int __maybe_unused brcmuart_resume(struct device *dev)
+ start_rx_dma(serial8250_get_port(priv->line));
+ }
+ serial8250_resume_port(priv->line);
++
++ if (priv->saved_mctrl & TIOCM_RTS) {
++ /* Restore RTS */
++ spin_lock_irqsave(&port->lock, flags);
++ port->mctrl |= TIOCM_RTS;
++ port->ops->set_mctrl(port, port->mctrl);
++ spin_unlock_irqrestore(&port->lock, flags);
++ }
++
+ return 0;
+ }
+
+@@ -1184,9 +1223,17 @@ static struct platform_driver brcmuart_platform_driver = {
+
+ static int __init brcmuart_init(void)
+ {
++ int ret;
++
+ brcmuart_debugfs_root = debugfs_create_dir(
+ brcmuart_platform_driver.driver.name, NULL);
+- return platform_driver_register(&brcmuart_platform_driver);
++ ret = platform_driver_register(&brcmuart_platform_driver);
++ if (ret) {
++ debugfs_remove_recursive(brcmuart_debugfs_root);
++ return ret;
++ }
++
++ return 0;
+ }
+ module_init(brcmuart_init);
+
+diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
+index 1ce193daea7f1..1890f342f090a 100644
+--- a/drivers/tty/serial/8250/8250_core.c
++++ b/drivers/tty/serial/8250/8250_core.c
+@@ -23,6 +23,7 @@
+ #include <linux/sysrq.h>
+ #include <linux/delay.h>
+ #include <linux/platform_device.h>
++#include <linux/pm_runtime.h>
+ #include <linux/tty.h>
+ #include <linux/ratelimit.h>
+ #include <linux/tty_flip.h>
+@@ -299,10 +300,9 @@ static void serial8250_backup_timeout(struct timer_list *t)
+ jiffies + uart_poll_timeout(&up->port) + HZ / 5);
+ }
+
+-static int univ8250_setup_irq(struct uart_8250_port *up)
++static void univ8250_setup_timer(struct uart_8250_port *up)
+ {
+ struct uart_port *port = &up->port;
+- int retval = 0;
+
+ /*
+ * The above check will only give an accurate result the first time
+@@ -323,10 +323,16 @@ static int univ8250_setup_irq(struct uart_8250_port *up)
+ */
+ if (!port->irq)
+ mod_timer(&up->timer, jiffies + uart_poll_timeout(port));
+- else
+- retval = serial_link_irq_chain(up);
++}
+
+- return retval;
++static int univ8250_setup_irq(struct uart_8250_port *up)
++{
++ struct uart_port *port = &up->port;
++
++ if (port->irq)
++ return serial_link_irq_chain(up);
++
++ return 0;
+ }
+
+ static void univ8250_release_irq(struct uart_8250_port *up)
+@@ -382,6 +388,7 @@ static struct uart_ops univ8250_port_ops;
+ static const struct uart_8250_ops univ8250_driver_ops = {
+ .setup_irq = univ8250_setup_irq,
+ .release_irq = univ8250_release_irq,
++ .setup_timer = univ8250_setup_timer,
+ };
+
+ static struct uart_8250_port serial8250_ports[UART_NR];
+@@ -561,6 +568,9 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
+
+ up->port.dev = dev;
+
++ if (uart_console_enabled(&up->port))
++ pm_runtime_get_sync(up->port.dev);
++
+ serial8250_apply_quirks(up);
+ uart_add_one_port(drv, &up->port);
+ }
+@@ -1006,6 +1016,7 @@ int serial8250_register_8250_port(const struct uart_8250_port *up)
+ uart->port.throttle = up->port.throttle;
+ uart->port.unthrottle = up->port.unthrottle;
+ uart->port.rs485_config = up->port.rs485_config;
++ uart->port.rs485_supported = up->port.rs485_supported;
+ uart->port.rs485 = up->port.rs485;
+ uart->rs485_start_tx = up->rs485_start_tx;
+ uart->rs485_stop_tx = up->rs485_stop_tx;
+@@ -1145,6 +1156,7 @@ void serial8250_unregister_port(int line)
+ uart->port.type = PORT_UNKNOWN;
+ uart->port.dev = &serial8250_isa_devs->dev;
+ uart->capabilities = 0;
++ serial8250_init_port(uart);
+ serial8250_apply_quirks(uart);
+ uart_add_one_port(&serial8250_reg, &uart->port);
+ } else {
+diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
+index 890fa7ddaa7f3..ec3cd723256fb 100644
+--- a/drivers/tty/serial/8250/8250_dma.c
++++ b/drivers/tty/serial/8250/8250_dma.c
+@@ -46,28 +46,57 @@ static void __dma_rx_complete(void *param)
+ struct uart_8250_dma *dma = p->dma;
+ struct tty_port *tty_port = &p->port.state->port;
+ struct dma_tx_state state;
++ enum dma_status dma_status;
+ int count;
+
+- dma->rx_running = 0;
+- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
++ /*
++ * New DMA Rx can be started during the completion handler before it
++ * could acquire port's lock and it might still be ongoing. Don't to
++ * anything in such case.
++ */
++ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
++ if (dma_status == DMA_IN_PROGRESS)
++ return;
+
+ count = dma->rx_size - state.residue;
+
+ tty_insert_flip_string(tty_port, dma->rx_buf, count);
+ p->port.icount.rx += count;
++ dma->rx_running = 0;
+
+ tty_flip_buffer_push(tty_port);
+ }
+
++static void dma_rx_complete(void *param)
++{
++ struct uart_8250_port *p = param;
++ struct uart_8250_dma *dma = p->dma;
++ unsigned long flags;
++
++ spin_lock_irqsave(&p->port.lock, flags);
++ if (dma->rx_running)
++ __dma_rx_complete(p);
++ spin_unlock_irqrestore(&p->port.lock, flags);
++}
++
+ int serial8250_tx_dma(struct uart_8250_port *p)
+ {
+ struct uart_8250_dma *dma = p->dma;
+ struct circ_buf *xmit = &p->port.state->xmit;
+ struct dma_async_tx_descriptor *desc;
++ struct uart_port *up = &p->port;
+ int ret;
+
+- if (dma->tx_running)
++ if (dma->tx_running) {
++ if (up->x_char) {
++ dmaengine_pause(dma->txchan);
++ uart_xchar_out(up, UART_TX);
++ dmaengine_resume(dma->txchan);
++ }
+ return 0;
++ } else if (up->x_char) {
++ uart_xchar_out(up, UART_TX);
++ }
+
+ if (uart_tx_stopped(&p->port) || uart_circ_empty(xmit)) {
+ /* We have been called from __dma_tx_complete() */
+@@ -77,6 +106,8 @@ int serial8250_tx_dma(struct uart_8250_port *p)
+
+ dma->tx_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
+
++ serial8250_do_prepare_tx_dma(p);
++
+ desc = dmaengine_prep_slave_single(dma->txchan,
+ dma->tx_addr + xmit->tail,
+ dma->tx_size, DMA_MEM_TO_DEV,
+@@ -114,6 +145,8 @@ int serial8250_rx_dma(struct uart_8250_port *p)
+ if (dma->rx_running)
+ return 0;
+
++ serial8250_do_prepare_rx_dma(p);
++
+ desc = dmaengine_prep_slave_single(dma->rxchan, dma->rx_addr,
+ dma->rx_size, DMA_DEV_TO_MEM,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+@@ -121,7 +154,7 @@ int serial8250_rx_dma(struct uart_8250_port *p)
+ return -EBUSY;
+
+ dma->rx_running = 1;
+- desc->callback = __dma_rx_complete;
++ desc->callback = dma_rx_complete;
+ desc->callback_param = p;
+
+ dma->rx_cookie = dmaengine_submit(desc);
+diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
+index a3a0154da567d..ace221afeb039 100644
+--- a/drivers/tty/serial/8250/8250_dw.c
++++ b/drivers/tty/serial/8250/8250_dw.c
+@@ -124,12 +124,15 @@ static void dw8250_check_lcr(struct uart_port *p, int value)
+ /* Returns once the transmitter is empty or we run out of retries */
+ static void dw8250_tx_wait_empty(struct uart_port *p)
+ {
++ struct uart_8250_port *up = up_to_u8250p(p);
+ unsigned int tries = 20000;
+ unsigned int delay_threshold = tries - 1000;
+ unsigned int lsr;
+
+ while (tries--) {
+ lsr = readb (p->membase + (UART_LSR << p->regshift));
++ up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
++
+ if (lsr & UART_LSR_TEMT)
+ break;
+
+@@ -726,7 +729,7 @@ static struct platform_driver dw8250_platform_driver = {
+ .name = "dw-apb-uart",
+ .pm = &dw8250_pm_ops,
+ .of_match_table = dw8250_of_match,
+- .acpi_match_table = ACPI_PTR(dw8250_acpi_match),
++ .acpi_match_table = dw8250_acpi_match,
+ },
+ .probe = dw8250_probe,
+ .remove = dw8250_remove,
+diff --git a/drivers/tty/serial/8250/8250_dwlib.c b/drivers/tty/serial/8250/8250_dwlib.c
+index 6d6a78eead3ef..1cf229cca5928 100644
+--- a/drivers/tty/serial/8250/8250_dwlib.c
++++ b/drivers/tty/serial/8250/8250_dwlib.c
+@@ -80,7 +80,7 @@ static void dw8250_set_divisor(struct uart_port *p, unsigned int baud,
+ void dw8250_setup_port(struct uart_port *p)
+ {
+ struct uart_8250_port *up = up_to_u8250p(p);
+- u32 reg;
++ u32 reg, old_dlf;
+
+ /*
+ * If the Component Version Register returns zero, we know that
+@@ -93,9 +93,11 @@ void dw8250_setup_port(struct uart_port *p)
+ dev_dbg(p->dev, "Designware UART version %c.%c%c\n",
+ (reg >> 24) & 0xff, (reg >> 16) & 0xff, (reg >> 8) & 0xff);
+
++ /* Preserve value written by firmware or bootloader */
++ old_dlf = dw8250_readl_ext(p, DW_UART_DLF);
+ dw8250_writel_ext(p, DW_UART_DLF, ~0U);
+ reg = dw8250_readl_ext(p, DW_UART_DLF);
+- dw8250_writel_ext(p, DW_UART_DLF, 0);
++ dw8250_writel_ext(p, DW_UART_DLF, old_dlf);
+
+ if (reg) {
+ struct dw8250_port_data *d = p->private_data;
+diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c
+index f8e99995eee91..d94c3811a8f7a 100644
+--- a/drivers/tty/serial/8250/8250_em.c
++++ b/drivers/tty/serial/8250/8250_em.c
+@@ -106,8 +106,8 @@ static int serial8250_em_probe(struct platform_device *pdev)
+ memset(&up, 0, sizeof(up));
+ up.port.mapbase = regs->start;
+ up.port.irq = irq;
+- up.port.type = PORT_UNKNOWN;
+- up.port.flags = UPF_BOOT_AUTOCONF | UPF_FIXED_PORT | UPF_IOREMAP;
++ up.port.type = PORT_16750;
++ up.port.flags = UPF_FIXED_PORT | UPF_IOREMAP | UPF_FIXED_TYPE;
+ up.port.dev = &pdev->dev;
+ up.port.private_data = priv;
+
+diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
+index d502240bbcf23..0b1976ceb01f7 100644
+--- a/drivers/tty/serial/8250/8250_exar.c
++++ b/drivers/tty/serial/8250/8250_exar.c
+@@ -40,9 +40,19 @@
+ #define PCI_DEVICE_ID_COMMTECH_4224PCIE 0x0020
+ #define PCI_DEVICE_ID_COMMTECH_4228PCIE 0x0021
+ #define PCI_DEVICE_ID_COMMTECH_4222PCIE 0x0022
++
+ #define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358
+ #define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358
+
++#define PCI_SUBDEVICE_ID_USR_2980 0x0128
++#define PCI_SUBDEVICE_ID_USR_2981 0x0129
++
++#define PCI_DEVICE_ID_SEALEVEL_710xC 0x1001
++#define PCI_DEVICE_ID_SEALEVEL_720xC 0x1002
++#define PCI_DEVICE_ID_SEALEVEL_740xC 0x1004
++#define PCI_DEVICE_ID_SEALEVEL_780xC 0x1008
++#define PCI_DEVICE_ID_SEALEVEL_716xC 0x1010
++
+ #define UART_EXAR_INT0 0x80
+ #define UART_EXAR_8XMODE 0x88 /* 8X sampling rate select */
+ #define UART_EXAR_SLEEP 0x8b /* Sleep mode */
+@@ -623,7 +633,14 @@ exar_pci_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
+
+ maxnr = pci_resource_len(pcidev, bar) >> (board->reg_shift + 3);
+
+- nr_ports = board->num_ports ? board->num_ports : pcidev->device & 0x0f;
++ if (pcidev->vendor == PCI_VENDOR_ID_ACCESSIO)
++ nr_ports = BIT(((pcidev->device & 0x38) >> 3) - 1);
++ else if (board->num_ports)
++ nr_ports = board->num_ports;
++ else if (pcidev->vendor == PCI_VENDOR_ID_SEALEVEL)
++ nr_ports = pcidev->device & 0xff;
++ else
++ nr_ports = pcidev->device & 0x0f;
+
+ priv = devm_kzalloc(&pcidev->dev, struct_size(priv, line, nr_ports), GFP_KERNEL);
+ if (!priv)
+@@ -722,22 +739,6 @@ static int __maybe_unused exar_resume(struct device *dev)
+
+ static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume);
+
+-static const struct exar8250_board acces_com_2x = {
+- .num_ports = 2,
+- .setup = pci_xr17c154_setup,
+-};
+-
+-static const struct exar8250_board acces_com_4x = {
+- .num_ports = 4,
+- .setup = pci_xr17c154_setup,
+-};
+-
+-static const struct exar8250_board acces_com_8x = {
+- .num_ports = 8,
+- .setup = pci_xr17c154_setup,
+-};
+-
+-
+ static const struct exar8250_board pbn_fastcom335_2 = {
+ .num_ports = 2,
+ .setup = pci_fastcom335_setup,
+@@ -821,14 +822,23 @@ static const struct exar8250_board pbn_exar_XR17V8358 = {
+ (kernel_ulong_t)&bd \
+ }
+
++#define USR_DEVICE(devid, sdevid, bd) { \
++ PCI_DEVICE_SUB( \
++ PCI_VENDOR_ID_USR, \
++ PCI_DEVICE_ID_EXAR_##devid, \
++ PCI_VENDOR_ID_EXAR, \
++ PCI_SUBDEVICE_ID_USR_##sdevid), 0, 0, \
++ (kernel_ulong_t)&bd \
++ }
++
+ static const struct pci_device_id exar_pci_tbl[] = {
+- EXAR_DEVICE(ACCESSIO, COM_2S, acces_com_2x),
+- EXAR_DEVICE(ACCESSIO, COM_4S, acces_com_4x),
+- EXAR_DEVICE(ACCESSIO, COM_8S, acces_com_8x),
+- EXAR_DEVICE(ACCESSIO, COM232_8, acces_com_8x),
+- EXAR_DEVICE(ACCESSIO, COM_2SM, acces_com_2x),
+- EXAR_DEVICE(ACCESSIO, COM_4SM, acces_com_4x),
+- EXAR_DEVICE(ACCESSIO, COM_8SM, acces_com_8x),
++ EXAR_DEVICE(ACCESSIO, COM_2S, pbn_exar_XR17C15x),
++ EXAR_DEVICE(ACCESSIO, COM_4S, pbn_exar_XR17C15x),
++ EXAR_DEVICE(ACCESSIO, COM_8S, pbn_exar_XR17C15x),
++ EXAR_DEVICE(ACCESSIO, COM232_8, pbn_exar_XR17C15x),
++ EXAR_DEVICE(ACCESSIO, COM_2SM, pbn_exar_XR17C15x),
++ EXAR_DEVICE(ACCESSIO, COM_4SM, pbn_exar_XR17C15x),
++ EXAR_DEVICE(ACCESSIO, COM_8SM, pbn_exar_XR17C15x),
+
+ CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect),
+ CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect),
+@@ -845,6 +855,10 @@ static const struct pci_device_id exar_pci_tbl[] = {
+
+ IBM_DEVICE(XR17C152, SATURN_SERIAL_ONE_PORT, pbn_exar_ibm_saturn),
+
++ /* USRobotics USR298x-OEM PCI Modems */
++ USR_DEVICE(XR17C152, 2980, pbn_exar_XR17C15x),
++ USR_DEVICE(XR17C152, 2981, pbn_exar_XR17C15x),
++
+ /* Exar Corp. XR17C15[248] Dual/Quad/Octal UART */
+ EXAR_DEVICE(EXAR, XR17C152, pbn_exar_XR17C15x),
+ EXAR_DEVICE(EXAR, XR17C154, pbn_exar_XR17C15x),
+@@ -864,6 +878,12 @@ static const struct pci_device_id exar_pci_tbl[] = {
+ EXAR_DEVICE(COMMTECH, 4224PCI335, pbn_fastcom335_4),
+ EXAR_DEVICE(COMMTECH, 2324PCI335, pbn_fastcom335_4),
+ EXAR_DEVICE(COMMTECH, 2328PCI335, pbn_fastcom335_8),
++
++ EXAR_DEVICE(SEALEVEL, 710xC, pbn_exar_XR17V35x),
++ EXAR_DEVICE(SEALEVEL, 720xC, pbn_exar_XR17V35x),
++ EXAR_DEVICE(SEALEVEL, 740xC, pbn_exar_XR17V35x),
++ EXAR_DEVICE(SEALEVEL, 780xC, pbn_exar_XR17V35x),
++ EXAR_DEVICE(SEALEVEL, 716xC, pbn_exar_XR17V35x),
+ { 0, }
+ };
+ MODULE_DEVICE_TABLE(pci, exar_pci_tbl);
+diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c
+index 31c9e83ea3cb2..dba5950b8d0e2 100644
+--- a/drivers/tty/serial/8250/8250_fintek.c
++++ b/drivers/tty/serial/8250/8250_fintek.c
+@@ -200,12 +200,12 @@ static int fintek_8250_rs485_config(struct uart_port *port,
+ if (!pdata)
+ return -EINVAL;
+
+- /* Hardware do not support same RTS level on send and receive */
+- if (!(rs485->flags & SER_RS485_RTS_ON_SEND) ==
+- !(rs485->flags & SER_RS485_RTS_AFTER_SEND))
+- return -EINVAL;
+
+ if (rs485->flags & SER_RS485_ENABLED) {
++ /* Hardware do not support same RTS level on send and receive */
++ if (!(rs485->flags & SER_RS485_RTS_ON_SEND) ==
++ !(rs485->flags & SER_RS485_RTS_AFTER_SEND))
++ return -EINVAL;
+ memset(rs485->padding, 0, sizeof(rs485->padding));
+ config |= RS485_URA;
+ } else {
+@@ -290,25 +290,6 @@ static void fintek_8250_set_max_fifo(struct fintek_8250 *pdata)
+ }
+ }
+
+-static void fintek_8250_goto_highspeed(struct uart_8250_port *uart,
+- struct fintek_8250 *pdata)
+-{
+- sio_write_reg(pdata, LDN, pdata->index);
+-
+- switch (pdata->pid) {
+- case CHIP_ID_F81966:
+- case CHIP_ID_F81866: /* set uart clock for high speed serial mode */
+- sio_write_mask_reg(pdata, F81866_UART_CLK,
+- F81866_UART_CLK_MASK,
+- F81866_UART_CLK_14_769MHZ);
+-
+- uart->port.uartclk = 921600 * 16;
+- break;
+- default: /* leave clock speed untouched */
+- break;
+- }
+-}
+-
+ static void fintek_8250_set_termios(struct uart_port *port,
+ struct ktermios *termios,
+ struct ktermios *old)
+@@ -430,7 +411,6 @@ static int probe_setup_port(struct fintek_8250 *pdata,
+
+ fintek_8250_set_irq_mode(pdata, level_mode);
+ fintek_8250_set_max_fifo(pdata);
+- fintek_8250_goto_highspeed(uart, pdata);
+
+ fintek_8250_exit_key(addr[i]);
+
+diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
+index fc65a2293ce9e..6a22f3a970f35 100644
+--- a/drivers/tty/serial/8250/8250_fsl.c
++++ b/drivers/tty/serial/8250/8250_fsl.c
+@@ -38,7 +38,7 @@ int fsl8250_handle_irq(struct uart_port *port)
+
+ iir = port->serial_in(port, UART_IIR);
+ if (iir & UART_IIR_NO_INT) {
+- spin_unlock(&up->port.lock);
++ spin_unlock_irqrestore(&up->port.lock, flags);
+ return 0;
+ }
+
+@@ -46,7 +46,7 @@ int fsl8250_handle_irq(struct uart_port *port)
+ if (unlikely(up->lsr_saved_flags & UART_LSR_BI)) {
+ up->lsr_saved_flags &= ~UART_LSR_BI;
+ port->serial_in(port, UART_RX);
+- spin_unlock(&up->port.lock);
++ spin_unlock_irqrestore(&up->port.lock, flags);
+ return 1;
+ }
+
+@@ -81,7 +81,7 @@ int fsl8250_handle_irq(struct uart_port *port)
+ if ((lsr & UART_LSR_THRE) && (up->ier & UART_IER_THRI))
+ serial8250_tx_chars(up);
+
+- up->lsr_saved_flags = orig_lsr;
++ up->lsr_saved_flags |= orig_lsr & UART_LSR_BI;
+
+ uart_unlock_and_check_sysrq_irqrestore(&up->port, flags);
+
+diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c
+index 673cda3d011d0..948d0a1c6ae8e 100644
+--- a/drivers/tty/serial/8250/8250_gsc.c
++++ b/drivers/tty/serial/8250/8250_gsc.c
+@@ -26,7 +26,7 @@ static int __init serial_init_chip(struct parisc_device *dev)
+ unsigned long address;
+ int err;
+
+-#ifdef CONFIG_64BIT
++#if defined(CONFIG_64BIT) && defined(CONFIG_IOSAPIC)
+ if (!dev->irq && (dev->id.sversion == 0xad))
+ dev->irq = iosapic_serial_irq(dev);
+ #endif
+diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c
+index 848d81e3838c2..87d70e81273c0 100644
+--- a/drivers/tty/serial/8250/8250_lpss.c
++++ b/drivers/tty/serial/8250/8250_lpss.c
+@@ -121,8 +121,7 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+ {
+ struct dw_dma_slave *param = &lpss->dma_param;
+ struct pci_dev *pdev = to_pci_dev(port->dev);
+- unsigned int dma_devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+- struct pci_dev *dma_dev = pci_get_slot(pdev->bus, dma_devfn);
++ struct pci_dev *dma_dev;
+
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_BYT_UART1:
+@@ -141,6 +140,8 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+ return -EINVAL;
+ }
+
++ dma_dev = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
++
+ param->dma_dev = &dma_dev->dev;
+ param->m_master = 0;
+ param->p_master = 1;
+@@ -156,6 +157,14 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+ return 0;
+ }
+
++static void byt_serial_exit(struct lpss8250 *lpss)
++{
++ struct dw_dma_slave *param = &lpss->dma_param;
++
++ /* Paired with pci_get_slot() in the byt_serial_setup() above */
++ put_device(param->dma_dev);
++}
++
+ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+ {
+ struct uart_8250_dma *dma = &lpss->data.dma;
+@@ -168,9 +177,19 @@ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+ * matching with the registered General Purpose DMA controllers.
+ */
+ up->dma = dma;
++
++ lpss->dma_maxburst = 16;
++
+ return 0;
+ }
+
++static void ehl_serial_exit(struct lpss8250 *lpss)
++{
++ struct uart_8250_port *up = serial8250_get_port(lpss->data.line);
++
++ up->dma = NULL;
++}
++
+ #ifdef CONFIG_SERIAL_8250_DMA
+ static const struct dw_dma_platform_data qrk_serial_dma_pdata = {
+ .nr_channels = 2,
+@@ -262,8 +281,13 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port
+ struct dw_dma_slave *rx_param, *tx_param;
+ struct device *dev = port->port.dev;
+
+- if (!lpss->dma_param.dma_dev)
++ if (!lpss->dma_param.dma_dev) {
++ dma = port->dma;
++ if (dma)
++ goto out_configuration_only;
++
+ return 0;
++ }
+
+ rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL);
+ if (!rx_param)
+@@ -274,16 +298,18 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port
+ return -ENOMEM;
+
+ *rx_param = lpss->dma_param;
+- dma->rxconf.src_maxburst = lpss->dma_maxburst;
+-
+ *tx_param = lpss->dma_param;
+- dma->txconf.dst_maxburst = lpss->dma_maxburst;
+
+ dma->fn = lpss8250_dma_filter;
+ dma->rx_param = rx_param;
+ dma->tx_param = tx_param;
+
+ port->dma = dma;
++
++out_configuration_only:
++ dma->rxconf.src_maxburst = lpss->dma_maxburst;
++ dma->txconf.dst_maxburst = lpss->dma_maxburst;
++
+ return 0;
+ }
+
+@@ -345,8 +371,7 @@ static int lpss8250_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ return 0;
+
+ err_exit:
+- if (lpss->board->exit)
+- lpss->board->exit(lpss);
++ lpss->board->exit(lpss);
+ pci_free_irq_vectors(pdev);
+ return ret;
+ }
+@@ -357,8 +382,7 @@ static void lpss8250_remove(struct pci_dev *pdev)
+
+ serial8250_unregister_port(lpss->data.line);
+
+- if (lpss->board->exit)
+- lpss->board->exit(lpss);
++ lpss->board->exit(lpss);
+ pci_free_irq_vectors(pdev);
+ }
+
+@@ -366,12 +390,14 @@ static const struct lpss8250_board byt_board = {
+ .freq = 100000000,
+ .base_baud = 2764800,
+ .setup = byt_serial_setup,
++ .exit = byt_serial_exit,
+ };
+
+ static const struct lpss8250_board ehl_board = {
+ .freq = 200000000,
+ .base_baud = 12500000,
+ .setup = ehl_serial_setup,
++ .exit = ehl_serial_exit,
+ };
+
+ static const struct lpss8250_board qrk_board = {
+diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c
+index efa0515139f8e..e6c1791609ddf 100644
+--- a/drivers/tty/serial/8250/8250_mid.c
++++ b/drivers/tty/serial/8250/8250_mid.c
+@@ -73,6 +73,11 @@ static int pnw_setup(struct mid8250 *mid, struct uart_port *p)
+ return 0;
+ }
+
++static void pnw_exit(struct mid8250 *mid)
++{
++ pci_dev_put(mid->dma_dev);
++}
++
+ static int tng_handle_irq(struct uart_port *p)
+ {
+ struct mid8250 *mid = p->private_data;
+@@ -124,6 +129,11 @@ static int tng_setup(struct mid8250 *mid, struct uart_port *p)
+ return 0;
+ }
+
++static void tng_exit(struct mid8250 *mid)
++{
++ pci_dev_put(mid->dma_dev);
++}
++
+ static int dnv_handle_irq(struct uart_port *p)
+ {
+ struct mid8250 *mid = p->private_data;
+@@ -330,9 +340,9 @@ static int mid8250_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+
+ pci_set_drvdata(pdev, mid);
+ return 0;
++
+ err:
+- if (mid->board->exit)
+- mid->board->exit(mid);
++ mid->board->exit(mid);
+ return ret;
+ }
+
+@@ -342,8 +352,7 @@ static void mid8250_remove(struct pci_dev *pdev)
+
+ serial8250_unregister_port(mid->line);
+
+- if (mid->board->exit)
+- mid->board->exit(mid);
++ mid->board->exit(mid);
+ }
+
+ static const struct mid8250_board pnw_board = {
+@@ -351,6 +360,7 @@ static const struct mid8250_board pnw_board = {
+ .freq = 50000000,
+ .base_baud = 115200,
+ .setup = pnw_setup,
++ .exit = pnw_exit,
+ };
+
+ static const struct mid8250_board tng_board = {
+@@ -358,6 +368,7 @@ static const struct mid8250_board tng_board = {
+ .freq = 38400000,
+ .base_baud = 1843200,
+ .setup = tng_setup,
++ .exit = tng_exit,
+ };
+
+ static const struct mid8250_board dnv_board = {
+diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
+index fb65dc601b237..de48a58460f47 100644
+--- a/drivers/tty/serial/8250/8250_mtk.c
++++ b/drivers/tty/serial/8250/8250_mtk.c
+@@ -37,6 +37,7 @@
+ #define MTK_UART_IER_RTSI 0x40 /* Enable RTS Modem status interrupt */
+ #define MTK_UART_IER_CTSI 0x80 /* Enable CTS Modem status interrupt */
+
++#define MTK_UART_EFR 38 /* I/O: Extended Features Register */
+ #define MTK_UART_EFR_EN 0x10 /* Enable enhancement feature */
+ #define MTK_UART_EFR_RTS 0x40 /* Enable hardware rx flow control */
+ #define MTK_UART_EFR_CTS 0x80 /* Enable hardware tx flow control */
+@@ -53,6 +54,9 @@
+ #define MTK_UART_TX_TRIGGER 1
+ #define MTK_UART_RX_TRIGGER MTK_UART_RX_SIZE
+
++#define MTK_UART_XON1 40 /* I/O: Xon character 1 */
++#define MTK_UART_XOFF1 42 /* I/O: Xoff character 1 */
++
+ #ifdef CONFIG_SERIAL_8250_DMA
+ enum dma_rx_status {
+ DMA_RX_START = 0,
+@@ -169,7 +173,7 @@ static void mtk8250_dma_enable(struct uart_8250_port *up)
+ MTK_UART_DMA_EN_RX | MTK_UART_DMA_EN_TX);
+
+ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+- serial_out(up, UART_EFR, UART_EFR_ECB);
++ serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
+ serial_out(up, UART_LCR, lcr);
+
+ if (dmaengine_slave_config(dma->rxchan, &dma->rxconf) != 0)
+@@ -232,7 +236,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
+ int lcr = serial_in(up, UART_LCR);
+
+ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+- serial_out(up, UART_EFR, UART_EFR_ECB);
++ serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
+ serial_out(up, UART_LCR, lcr);
+ lcr = serial_in(up, UART_LCR);
+
+@@ -241,7 +245,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
+ serial_out(up, MTK_UART_ESCAPE_DAT, MTK_UART_ESCAPE_CHAR);
+ serial_out(up, MTK_UART_ESCAPE_EN, 0x00);
+ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+- serial_out(up, UART_EFR, serial_in(up, UART_EFR) &
++ serial_out(up, MTK_UART_EFR, serial_in(up, MTK_UART_EFR) &
+ (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)));
+ serial_out(up, UART_LCR, lcr);
+ mtk8250_disable_intrs(up, MTK_UART_IER_XOFFI |
+@@ -255,8 +259,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
+ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+
+ /*enable hw flow control*/
+- serial_out(up, UART_EFR, MTK_UART_EFR_HW_FC |
+- (serial_in(up, UART_EFR) &
++ serial_out(up, MTK_UART_EFR, MTK_UART_EFR_HW_FC |
++ (serial_in(up, MTK_UART_EFR) &
+ (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
+
+ serial_out(up, UART_LCR, lcr);
+@@ -270,12 +274,12 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
+ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+
+ /*enable sw flow control */
+- serial_out(up, UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
+- (serial_in(up, UART_EFR) &
++ serial_out(up, MTK_UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
++ (serial_in(up, MTK_UART_EFR) &
+ (~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
+
+- serial_out(up, UART_XON1, START_CHAR(port->state->port.tty));
+- serial_out(up, UART_XOFF1, STOP_CHAR(port->state->port.tty));
++ serial_out(up, MTK_UART_XON1, START_CHAR(port->state->port.tty));
++ serial_out(up, MTK_UART_XOFF1, STOP_CHAR(port->state->port.tty));
+ serial_out(up, UART_LCR, lcr);
+ mtk8250_disable_intrs(up, MTK_UART_IER_CTSI|MTK_UART_IER_RTSI);
+ mtk8250_enable_intrs(up, MTK_UART_IER_XOFFI);
+diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
+index bce28729dd7bd..be8626234627e 100644
+--- a/drivers/tty/serial/8250/8250_of.c
++++ b/drivers/tty/serial/8250/8250_of.c
+@@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
+ port->mapsize = resource_size(&resource);
+
+ /* Check for shifted address mapping */
+- if (of_property_read_u32(np, "reg-offset", &prop) == 0)
++ if (of_property_read_u32(np, "reg-offset", &prop) == 0) {
++ if (prop >= port->mapsize) {
++ dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n",
++ prop, &port->mapsize);
++ ret = -EINVAL;
++ goto err_unprepare;
++ }
++
+ port->mapbase += prop;
++ port->mapsize -= prop;
++ }
+
+ port->iotype = UPIO_MEM;
+ if (of_property_read_u32(np, "reg-io-width", &prop) == 0) {
+diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
+index 73e5f1dbd075d..a6b374c026a87 100644
+--- a/drivers/tty/serial/8250/8250_omap.c
++++ b/drivers/tty/serial/8250/8250_omap.c
+@@ -157,7 +157,11 @@ static u32 uart_read(struct uart_8250_port *up, u32 reg)
+ return readl(up->port.membase + (reg << up->port.regshift));
+ }
+
+-static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
++/*
++ * Called on runtime PM resume path from omap8250_restore_regs(), and
++ * omap8250_set_mctrl().
++ */
++static void __omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
+ {
+ struct uart_8250_port *up = up_to_u8250p(port);
+ struct omap8250_priv *priv = up->port.private_data;
+@@ -181,6 +185,20 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
+ }
+ }
+
++static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
++{
++ int err;
++
++ err = pm_runtime_resume_and_get(port->dev);
++ if (err)
++ return;
++
++ __omap8250_set_mctrl(port, mctrl);
++
++ pm_runtime_mark_last_busy(port->dev);
++ pm_runtime_put_autosuspend(port->dev);
++}
++
+ /*
+ * Work Around for Errata i202 (2430, 3430, 3630, 4430 and 4460)
+ * The access to uart register after MDR1 Access
+@@ -193,27 +211,10 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
+ static void omap_8250_mdr1_errataset(struct uart_8250_port *up,
+ struct omap8250_priv *priv)
+ {
+- u8 timeout = 255;
+-
+ serial_out(up, UART_OMAP_MDR1, priv->mdr1);
+ udelay(2);
+ serial_out(up, UART_FCR, up->fcr | UART_FCR_CLEAR_XMIT |
+ UART_FCR_CLEAR_RCVR);
+- /*
+- * Wait for FIFO to empty: when empty, RX_FIFO_E bit is 0 and
+- * TX_FIFO_E bit is 1.
+- */
+- while (UART_LSR_THRE != (serial_in(up, UART_LSR) &
+- (UART_LSR_THRE | UART_LSR_DR))) {
+- timeout--;
+- if (!timeout) {
+- /* Should *never* happen. we warn and carry on */
+- dev_crit(up->port.dev, "Errata i202: timedout %x\n",
+- serial_in(up, UART_LSR));
+- break;
+- }
+- udelay(1);
+- }
+ }
+
+ static void omap_8250_get_divisor(struct uart_port *port, unsigned int baud,
+@@ -292,6 +293,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
+ {
+ struct omap8250_priv *priv = up->port.private_data;
+ struct uart_8250_dma *dma = up->dma;
++ u8 mcr = serial8250_in_MCR(up);
+
+ if (dma && dma->tx_running) {
+ /*
+@@ -308,7 +310,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
+ serial_out(up, UART_EFR, UART_EFR_ECB);
+
+ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
+- serial8250_out_MCR(up, UART_MCR_TCRTLR);
++ serial8250_out_MCR(up, mcr | UART_MCR_TCRTLR);
+ serial_out(up, UART_FCR, up->fcr);
+
+ omap8250_update_scr(up, priv);
+@@ -324,7 +326,8 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
+ serial_out(up, UART_LCR, 0);
+
+ /* drop TCR + TLR access, we setup XON/XOFF later */
+- serial8250_out_MCR(up, up->mcr);
++ serial8250_out_MCR(up, mcr);
++
+ serial_out(up, UART_IER, up->ier);
+
+ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+@@ -341,7 +344,10 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
+
+ omap8250_update_mdr1(up, priv);
+
+- up->port.ops->set_mctrl(&up->port, up->port.mctrl);
++ __omap8250_set_mctrl(&up->port, up->port.mctrl);
++
++ if (up->port.rs485.flags & SER_RS485_ENABLED)
++ serial8250_em485_stop_tx(up);
+ }
+
+ /*
+@@ -647,6 +653,8 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
+ if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) {
+ unsigned long delay;
+
++ /* Synchronize UART_IER access against the console. */
++ spin_lock(&port->lock);
+ up->ier = port->serial_in(port, UART_IER);
+ if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
+ port->ops->stop_rx(port);
+@@ -656,6 +664,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
+ */
+ cancel_delayed_work(&up->overrun_backoff);
+ }
++ spin_unlock(&port->lock);
+
+ delay = msecs_to_jiffies(up->overrun_backoff_time_ms);
+ schedule_delayed_work(&up->overrun_backoff, delay);
+@@ -680,7 +689,6 @@ static int omap_8250_startup(struct uart_port *port)
+
+ pm_runtime_get_sync(port->dev);
+
+- up->mcr = 0;
+ serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
+
+ serial_out(up, UART_LCR, UART_LCR_WLEN8);
+@@ -1464,16 +1472,24 @@ static int omap8250_probe(struct platform_device *pdev)
+ err:
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
++ flush_work(&priv->qos_work);
+ pm_runtime_disable(&pdev->dev);
++ cpu_latency_qos_remove_request(&priv->pm_qos_request);
+ return ret;
+ }
+
+ static int omap8250_remove(struct platform_device *pdev)
+ {
+ struct omap8250_priv *priv = platform_get_drvdata(pdev);
++ int err;
++
++ err = pm_runtime_resume_and_get(&pdev->dev);
++ if (err)
++ return err;
+
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
++ flush_work(&priv->qos_work);
+ pm_runtime_disable(&pdev->dev);
+ serial8250_unregister_port(priv->line);
+ cpu_latency_qos_remove_request(&priv->pm_qos_request);
+@@ -1505,25 +1521,35 @@ static int omap8250_suspend(struct device *dev)
+ {
+ struct omap8250_priv *priv = dev_get_drvdata(dev);
+ struct uart_8250_port *up = serial8250_get_port(priv->line);
++ int err;
+
+ serial8250_suspend_port(priv->line);
+
+- pm_runtime_get_sync(dev);
++ err = pm_runtime_resume_and_get(dev);
++ if (err)
++ return err;
+ if (!device_may_wakeup(dev))
+ priv->wer = 0;
+ serial_out(up, UART_OMAP_WER, priv->wer);
+- pm_runtime_mark_last_busy(dev);
+- pm_runtime_put_autosuspend(dev);
+-
++ err = pm_runtime_force_suspend(dev);
+ flush_work(&priv->qos_work);
+- return 0;
++
++ return err;
+ }
+
+ static int omap8250_resume(struct device *dev)
+ {
+ struct omap8250_priv *priv = dev_get_drvdata(dev);
++ int err;
+
++ err = pm_runtime_force_resume(dev);
++ if (err)
++ return err;
+ serial8250_resume_port(priv->line);
++ /* Paired with pm_runtime_resume_and_get() in omap8250_suspend() */
++ pm_runtime_mark_last_busy(dev);
++ pm_runtime_put_autosuspend(dev);
++
+ return 0;
+ }
+ #else
+diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
+index 726912b16a559..5f0daa0d1dd0e 100644
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -11,6 +11,7 @@
+ #include <linux/pci.h>
+ #include <linux/string.h>
+ #include <linux/kernel.h>
++#include <linux/math.h>
+ #include <linux/slab.h>
+ #include <linux/delay.h>
+ #include <linux/tty.h>
+@@ -75,13 +76,12 @@ static int pci_default_setup(struct serial_private*,
+
+ static void moan_device(const char *str, struct pci_dev *dev)
+ {
+- dev_err(&dev->dev,
+- "%s: %s\n"
++ pci_err(dev, "%s\n"
+ "Please send the output of lspci -vv, this\n"
+ "message (0x%04x,0x%04x,0x%04x,0x%04x), the\n"
+ "manufacturer and name of serial board or\n"
+ "modem board to <linux-serial@vger.kernel.org>.\n",
+- pci_name(dev), str, dev->vendor, dev->device,
++ str, dev->vendor, dev->device,
+ dev->subsystem_vendor, dev->subsystem_device);
+ }
+
+@@ -238,7 +238,7 @@ static int pci_inteli960ni_init(struct pci_dev *dev)
+ /* is firmware started? */
+ pci_read_config_dword(dev, 0x44, &oldval);
+ if (oldval == 0x00001000L) { /* RESET value */
+- dev_dbg(&dev->dev, "Local i960 firmware missing\n");
++ pci_dbg(dev, "Local i960 firmware missing\n");
+ return -ENODEV;
+ }
+ return 0;
+@@ -588,9 +588,8 @@ static int pci_timedia_probe(struct pci_dev *dev)
+ * (0,2,3,5,6: serial only -- 7,8,9: serial + parallel)
+ */
+ if ((dev->subsystem_device & 0x00f0) >= 0x70) {
+- dev_info(&dev->dev,
+- "ignoring Timedia subdevice %04x for parport_serial\n",
+- dev->subsystem_device);
++ pci_info(dev, "ignoring Timedia subdevice %04x for parport_serial\n",
++ dev->subsystem_device);
+ return -ENODEV;
+ }
+
+@@ -827,8 +826,7 @@ static int pci_netmos_9900_numports(struct pci_dev *dev)
+ if (sub_serports > 0)
+ return sub_serports;
+
+- dev_err(&dev->dev,
+- "NetMos/Mostech serial driver ignoring port on ambiguous config.\n");
++ pci_err(dev, "NetMos/Mostech serial driver ignoring port on ambiguous config.\n");
+ return 0;
+ }
+
+@@ -897,18 +895,16 @@ static int pci_netmos_init(struct pci_dev *dev)
+ /* enable IO_Space bit */
+ #define ITE_887x_POSIO_ENABLE (1 << 31)
+
++/* inta_addr are the configuration addresses of the ITE */
++static const short inta_addr[] = { 0x2a0, 0x2c0, 0x220, 0x240, 0x1e0, 0x200, 0x280 };
+ static int pci_ite887x_init(struct pci_dev *dev)
+ {
+- /* inta_addr are the configuration addresses of the ITE */
+- static const short inta_addr[] = { 0x2a0, 0x2c0, 0x220, 0x240, 0x1e0,
+- 0x200, 0x280, 0 };
+ int ret, i, type;
+ struct resource *iobase = NULL;
+ u32 miscr, uartbar, ioport;
+
+ /* search for the base-ioport */
+- i = 0;
+- while (inta_addr[i] && iobase == NULL) {
++ for (i = 0; i < ARRAY_SIZE(inta_addr); i++) {
+ iobase = request_region(inta_addr[i], ITE_887x_IOSIZE,
+ "ite887x");
+ if (iobase != NULL) {
+@@ -925,13 +921,11 @@ static int pci_ite887x_init(struct pci_dev *dev)
+ break;
+ }
+ release_region(iobase->start, ITE_887x_IOSIZE);
+- iobase = NULL;
+ }
+- i++;
+ }
+
+- if (!inta_addr[i]) {
+- dev_err(&dev->dev, "ite887x: could not find iobase\n");
++ if (i == ARRAY_SIZE(inta_addr)) {
++ pci_err(dev, "could not find iobase\n");
+ return -ENODEV;
+ }
+
+@@ -1001,43 +995,29 @@ static void pci_ite887x_exit(struct pci_dev *dev)
+ }
+
+ /*
+- * EndRun Technologies.
+- * Determine the number of ports available on the device.
++ * Oxford Semiconductor Inc.
++ * Check if an OxSemi device is part of the Tornado range of devices.
+ */
+ #define PCI_VENDOR_ID_ENDRUN 0x7401
+ #define PCI_DEVICE_ID_ENDRUN_1588 0xe100
+
+-static int pci_endrun_init(struct pci_dev *dev)
++static bool pci_oxsemi_tornado_p(struct pci_dev *dev)
+ {
+- u8 __iomem *p;
+- unsigned long deviceID;
+- unsigned int number_uarts = 0;
++ /* OxSemi Tornado devices are all 0xCxxx */
++ if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
++ (dev->device & 0xf000) != 0xc000)
++ return false;
+
+- /* EndRun device is all 0xexxx */
++ /* EndRun devices are all 0xExxx */
+ if (dev->vendor == PCI_VENDOR_ID_ENDRUN &&
+- (dev->device & 0xf000) != 0xe000)
+- return 0;
+-
+- p = pci_iomap(dev, 0, 5);
+- if (p == NULL)
+- return -ENOMEM;
++ (dev->device & 0xf000) != 0xe000)
++ return false;
+
+- deviceID = ioread32(p);
+- /* EndRun device */
+- if (deviceID == 0x07000200) {
+- number_uarts = ioread8(p + 4);
+- dev_dbg(&dev->dev,
+- "%d ports detected on EndRun PCI Express device\n",
+- number_uarts);
+- }
+- pci_iounmap(dev, p);
+- return number_uarts;
++ return true;
+ }
+
+ /*
+- * Oxford Semiconductor Inc.
+- * Check that device is part of the Tornado range of devices, then determine
+- * the number of ports available on the device.
++ * Determine the number of ports available on a Tornado device.
+ */
+ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+ {
+@@ -1045,9 +1025,7 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+ unsigned long deviceID;
+ unsigned int number_uarts = 0;
+
+- /* OxSemi Tornado devices are all 0xCxxx */
+- if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
+- (dev->device & 0xF000) != 0xC000)
++ if (!pci_oxsemi_tornado_p(dev))
+ return 0;
+
+ p = pci_iomap(dev, 0, 5);
+@@ -1058,20 +1036,220 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+ /* Tornado device */
+ if (deviceID == 0x07000200) {
+ number_uarts = ioread8(p + 4);
+- dev_dbg(&dev->dev,
+- "%d ports detected on Oxford PCI Express device\n",
+- number_uarts);
++ pci_dbg(dev, "%d ports detected on %s PCI Express device\n",
++ number_uarts,
++ dev->vendor == PCI_VENDOR_ID_ENDRUN ?
++ "EndRun" : "Oxford");
+ }
+ pci_iounmap(dev, p);
+ return number_uarts;
+ }
+
+-static int pci_asix_setup(struct serial_private *priv,
+- const struct pciserial_board *board,
+- struct uart_8250_port *port, int idx)
++/* Tornado-specific constants for the TCR and CPR registers; see below. */
++#define OXSEMI_TORNADO_TCR_MASK 0xf
++#define OXSEMI_TORNADO_CPR_MASK 0x1ff
++#define OXSEMI_TORNADO_CPR_MIN 0x008
++#define OXSEMI_TORNADO_CPR_DEF 0x10f
++
++/*
++ * Determine the oversampling rate, the clock prescaler, and the clock
++ * divisor for the requested baud rate. The clock rate is 62.5 MHz,
++ * which is four times the baud base, and the prescaler increments in
++ * steps of 1/8. Therefore to make calculations on integers we need
++ * to use a scaled clock rate, which is the baud base multiplied by 32
++ * (or our assumed UART clock rate multiplied by 2).
++ *
++ * The allowed oversampling rates are from 4 up to 16 inclusive (values
++ * from 0 to 3 inclusive map to 16). Likewise the clock prescaler allows
++ * values between 1.000 and 63.875 inclusive (operation for values from
++ * 0.000 to 0.875 has not been specified). The clock divisor is the usual
++ * unsigned 16-bit integer.
++ *
++ * For the most accurate baud rate we use a table of predetermined
++ * oversampling rates and clock prescalers that records all possible
++ * products of the two parameters in the range from 4 up to 255 inclusive,
++ * and additionally 335 for the 1500000bps rate, with the prescaler scaled
++ * by 8. The table is sorted by the decreasing value of the oversampling
++ * rate and ties are resolved by sorting by the decreasing value of the
++ * product. This way preference is given to higher oversampling rates.
++ *
++ * We iterate over the table and choose the product of an oversampling
++ * rate and a clock prescaler that gives the lowest integer division
++ * result deviation, or if an exact integer divider is found we stop
++ * looking for it right away. We do some fixup if the resulting clock
++ * divisor required would be out of its unsigned 16-bit integer range.
++ *
++ * Finally we abuse the supposed fractional part returned to encode the
++ * 4-bit value of the oversampling rate and the 9-bit value of the clock
++ * prescaler which will end up in the TCR and CPR/CPR2 registers.
++ */
++static unsigned int pci_oxsemi_tornado_get_divisor(struct uart_port *port,
++ unsigned int baud,
++ unsigned int *frac)
+ {
+- port->bugs |= UART_BUG_PARITY;
+- return pci_default_setup(priv, board, port, idx);
++ static u8 p[][2] = {
++ { 16, 14, }, { 16, 13, }, { 16, 12, }, { 16, 11, },
++ { 16, 10, }, { 16, 9, }, { 16, 8, }, { 15, 17, },
++ { 15, 16, }, { 15, 15, }, { 15, 14, }, { 15, 13, },
++ { 15, 12, }, { 15, 11, }, { 15, 10, }, { 15, 9, },
++ { 15, 8, }, { 14, 18, }, { 14, 17, }, { 14, 14, },
++ { 14, 13, }, { 14, 12, }, { 14, 11, }, { 14, 10, },
++ { 14, 9, }, { 14, 8, }, { 13, 19, }, { 13, 18, },
++ { 13, 17, }, { 13, 13, }, { 13, 12, }, { 13, 11, },
++ { 13, 10, }, { 13, 9, }, { 13, 8, }, { 12, 19, },
++ { 12, 18, }, { 12, 17, }, { 12, 11, }, { 12, 9, },
++ { 12, 8, }, { 11, 23, }, { 11, 22, }, { 11, 21, },
++ { 11, 20, }, { 11, 19, }, { 11, 18, }, { 11, 17, },
++ { 11, 11, }, { 11, 10, }, { 11, 9, }, { 11, 8, },
++ { 10, 25, }, { 10, 23, }, { 10, 20, }, { 10, 19, },
++ { 10, 17, }, { 10, 10, }, { 10, 9, }, { 10, 8, },
++ { 9, 27, }, { 9, 23, }, { 9, 21, }, { 9, 19, },
++ { 9, 18, }, { 9, 17, }, { 9, 9, }, { 9, 8, },
++ { 8, 31, }, { 8, 29, }, { 8, 23, }, { 8, 19, },
++ { 8, 17, }, { 8, 8, }, { 7, 35, }, { 7, 31, },
++ { 7, 29, }, { 7, 25, }, { 7, 23, }, { 7, 21, },
++ { 7, 19, }, { 7, 17, }, { 7, 15, }, { 7, 14, },
++ { 7, 13, }, { 7, 12, }, { 7, 11, }, { 7, 10, },
++ { 7, 9, }, { 7, 8, }, { 6, 41, }, { 6, 37, },
++ { 6, 31, }, { 6, 29, }, { 6, 23, }, { 6, 19, },
++ { 6, 17, }, { 6, 13, }, { 6, 11, }, { 6, 10, },
++ { 6, 9, }, { 6, 8, }, { 5, 67, }, { 5, 47, },
++ { 5, 43, }, { 5, 41, }, { 5, 37, }, { 5, 31, },
++ { 5, 29, }, { 5, 25, }, { 5, 23, }, { 5, 19, },
++ { 5, 17, }, { 5, 15, }, { 5, 13, }, { 5, 11, },
++ { 5, 10, }, { 5, 9, }, { 5, 8, }, { 4, 61, },
++ { 4, 59, }, { 4, 53, }, { 4, 47, }, { 4, 43, },
++ { 4, 41, }, { 4, 37, }, { 4, 31, }, { 4, 29, },
++ { 4, 23, }, { 4, 19, }, { 4, 17, }, { 4, 13, },
++ { 4, 9, }, { 4, 8, },
++ };
++ /* Scale the quotient for comparison to get the fractional part. */
++ const unsigned int quot_scale = 65536;
++ unsigned int sclk = port->uartclk * 2;
++ unsigned int sdiv = DIV_ROUND_CLOSEST(sclk, baud);
++ unsigned int best_squot;
++ unsigned int squot;
++ unsigned int quot;
++ u16 cpr;
++ u8 tcr;
++ int i;
++
++ /* Old custom speed handling. */
++ if (baud == 38400 && (port->flags & UPF_SPD_MASK) == UPF_SPD_CUST) {
++ unsigned int cust_div = port->custom_divisor;
++
++ quot = cust_div & UART_DIV_MAX;
++ tcr = (cust_div >> 16) & OXSEMI_TORNADO_TCR_MASK;
++ cpr = (cust_div >> 20) & OXSEMI_TORNADO_CPR_MASK;
++ if (cpr < OXSEMI_TORNADO_CPR_MIN)
++ cpr = OXSEMI_TORNADO_CPR_DEF;
++ } else {
++ best_squot = quot_scale;
++ for (i = 0; i < ARRAY_SIZE(p); i++) {
++ unsigned int spre;
++ unsigned int srem;
++ u8 cp;
++ u8 tc;
++
++ tc = p[i][0];
++ cp = p[i][1];
++ spre = tc * cp;
++
++ srem = sdiv % spre;
++ if (srem > spre / 2)
++ srem = spre - srem;
++ squot = DIV_ROUND_CLOSEST(srem * quot_scale, spre);
++
++ if (srem == 0) {
++ tcr = tc;
++ cpr = cp;
++ quot = sdiv / spre;
++ break;
++ } else if (squot < best_squot) {
++ best_squot = squot;
++ tcr = tc;
++ cpr = cp;
++ quot = DIV_ROUND_CLOSEST(sdiv, spre);
++ }
++ }
++ while (tcr <= (OXSEMI_TORNADO_TCR_MASK + 1) >> 1 &&
++ quot % 2 == 0) {
++ quot >>= 1;
++ tcr <<= 1;
++ }
++ while (quot > UART_DIV_MAX) {
++ if (tcr <= (OXSEMI_TORNADO_TCR_MASK + 1) >> 1) {
++ quot >>= 1;
++ tcr <<= 1;
++ } else if (cpr <= OXSEMI_TORNADO_CPR_MASK >> 1) {
++ quot >>= 1;
++ cpr <<= 1;
++ } else {
++ quot = quot * cpr / OXSEMI_TORNADO_CPR_MASK;
++ cpr = OXSEMI_TORNADO_CPR_MASK;
++ }
++ }
++ }
++
++ *frac = (cpr << 8) | (tcr & OXSEMI_TORNADO_TCR_MASK);
++ return quot;
++}
++
++/*
++ * Set the oversampling rate in the transmitter clock cycle register (TCR),
++ * the clock prescaler in the clock prescaler register (CPR and CPR2), and
++ * the clock divisor in the divisor latch (DLL and DLM). Note that for
++ * backwards compatibility any write to CPR clears CPR2 and therefore CPR
++ * has to be written first, followed by CPR2, which occupies the location
++ * of CKS used with earlier UART designs.
++ */
++static void pci_oxsemi_tornado_set_divisor(struct uart_port *port,
++ unsigned int baud,
++ unsigned int quot,
++ unsigned int quot_frac)
++{
++ struct uart_8250_port *up = up_to_u8250p(port);
++ u8 cpr2 = quot_frac >> 16;
++ u8 cpr = quot_frac >> 8;
++ u8 tcr = quot_frac;
++
++ serial_icr_write(up, UART_TCR, tcr);
++ serial_icr_write(up, UART_CPR, cpr);
++ serial_icr_write(up, UART_CKS, cpr2);
++ serial8250_do_set_divisor(port, baud, quot, 0);
++}
++
++/*
++ * For Tornado devices we force MCR[7] set for the Divide-by-M N/8 baud rate
++ * generator prescaler (CPR and CPR2). Otherwise no prescaler would be used.
++ */
++static void pci_oxsemi_tornado_set_mctrl(struct uart_port *port,
++ unsigned int mctrl)
++{
++ struct uart_8250_port *up = up_to_u8250p(port);
++
++ up->mcr |= UART_MCR_CLKSEL;
++ serial8250_do_set_mctrl(port, mctrl);
++}
++
++/*
++ * We require EFR features for clock programming, so set UPF_FULL_PROBE
++ * for full probing regardless of CONFIG_SERIAL_8250_16550A_VARIANTS setting.
++ */
++static int pci_oxsemi_tornado_setup(struct serial_private *priv,
++ const struct pciserial_board *board,
++ struct uart_8250_port *up, int idx)
++{
++ struct pci_dev *dev = priv->dev;
++
++ if (pci_oxsemi_tornado_p(dev)) {
++ up->port.flags |= UPF_FULL_PROBE;
++ up->port.get_divisor = pci_oxsemi_tornado_get_divisor;
++ up->port.set_divisor = pci_oxsemi_tornado_set_divisor;
++ up->port.set_mctrl = pci_oxsemi_tornado_set_mctrl;
++ }
++
++ return pci_default_setup(priv, board, up, idx);
+ }
+
+ /* Quatech devices have their own extra interface features */
+@@ -1120,15 +1298,15 @@ static struct quatech_feature quatech_cards[] = {
+ { 0, }
+ };
+
+-static int pci_quatech_amcc(u16 devid)
++static int pci_quatech_amcc(struct pci_dev *dev)
+ {
+ struct quatech_feature *qf = &quatech_cards[0];
+ while (qf->devid) {
+- if (qf->devid == devid)
++ if (qf->devid == dev->device)
+ return qf->amcc;
+ qf++;
+ }
+- pr_err("quatech: unknown port type '0x%04X'.\n", devid);
++ pci_err(dev, "unknown port type '0x%04X'.\n", dev->device);
+ return 0;
+ };
+
+@@ -1291,7 +1469,7 @@ static int pci_quatech_rs422(struct uart_8250_port *port)
+
+ static int pci_quatech_init(struct pci_dev *dev)
+ {
+- if (pci_quatech_amcc(dev->device)) {
++ if (pci_quatech_amcc(dev)) {
+ unsigned long base = pci_resource_start(dev, 0);
+ if (base) {
+ u32 tmp;
+@@ -1315,7 +1493,7 @@ static int pci_quatech_setup(struct serial_private *priv,
+ port->port.uartclk = pci_quatech_clock(port);
+ /* For now just warn about RS422 */
+ if (pci_quatech_rs422(port))
+- pr_warn("quatech: software control of RS422 features not currently supported.\n");
++ pci_warn(priv->dev, "software control of RS422 features not currently supported.\n");
+ return pci_default_setup(priv, board, port, idx);
+ }
+
+@@ -1349,29 +1527,33 @@ pericom_do_set_divisor(struct uart_port *port, unsigned int baud,
+ {
+ int scr;
+ int lcr;
+- int actual_baud;
+- int tolerance;
+
+- for (scr = 5 ; scr <= 15 ; scr++) {
+- actual_baud = 921600 * 16 / scr;
+- tolerance = actual_baud / 50;
++ for (scr = 16; scr > 4; scr--) {
++ unsigned int maxrate = port->uartclk / scr;
++ unsigned int divisor = max(maxrate / baud, 1U);
++ int delta = maxrate / divisor - baud;
+
+- if ((baud < actual_baud + tolerance) &&
+- (baud > actual_baud - tolerance)) {
++ if (baud > maxrate + baud / 50)
++ continue;
+
++ if (delta > baud / 50)
++ divisor++;
++
++ if (divisor > 0xffff)
++ continue;
++
++ /* Update delta due to possible divisor change */
++ delta = maxrate / divisor - baud;
++ if (abs(delta) < baud / 50) {
+ lcr = serial_port_in(port, UART_LCR);
+ serial_port_out(port, UART_LCR, lcr | 0x80);
+-
+- serial_port_out(port, UART_DLL, 1);
+- serial_port_out(port, UART_DLM, 0);
++ serial_port_out(port, UART_DLL, divisor & 0xff);
++ serial_port_out(port, UART_DLM, divisor >> 8 & 0xff);
+ serial_port_out(port, 2, 16 - scr);
+ serial_port_out(port, UART_LCR, lcr);
+ return;
+- } else if (baud > actual_baud) {
+- break;
+ }
+ }
+- serial8250_do_set_divisor(port, baud, quot, quot_frac);
+ }
+ static int pci_pericom_setup(struct serial_private *priv,
+ const struct pciserial_board *board,
+@@ -1525,7 +1707,7 @@ static int pci_fintek_setup(struct serial_private *priv,
+ /* Get the io address from configuration space */
+ pci_read_config_word(pdev, config_base + 4, &iobase);
+
+- dev_dbg(&pdev->dev, "%s: idx=%d iobase=0x%x", __func__, idx, iobase);
++ pci_dbg(pdev, "idx=%d iobase=0x%x", idx, iobase);
+
+ port->port.iotype = UPIO_PORT;
+ port->port.iobase = iobase;
+@@ -1549,7 +1731,6 @@ static int pci_fintek_init(struct pci_dev *dev)
+ resource_size_t bar_data[3];
+ u8 config_base;
+ struct serial_private *priv = pci_get_drvdata(dev);
+- struct uart_8250_port *port;
+
+ if (!(pci_resource_flags(dev, 5) & IORESOURCE_IO) ||
+ !(pci_resource_flags(dev, 4) & IORESOURCE_IO) ||
+@@ -1596,13 +1777,7 @@ static int pci_fintek_init(struct pci_dev *dev)
+
+ pci_write_config_byte(dev, config_base + 0x06, dev->irq);
+
+- if (priv) {
+- /* re-apply RS232/485 mode when
+- * pciserial_resume_ports()
+- */
+- port = serial8250_get_port(priv->line[i]);
+- pci_fintek_rs485_config(&port->port, NULL);
+- } else {
++ if (!priv) {
+ /* First init without port data
+ * force init to RS232 Mode
+ */
+@@ -1689,7 +1864,7 @@ static int skip_tx_en_setup(struct serial_private *priv,
+ struct uart_8250_port *port, int idx)
+ {
+ port->port.quirks |= UPQ_NO_TXEN_TEST;
+- dev_dbg(&priv->dev->dev,
++ pci_dbg(priv->dev,
+ "serial8250: skipping TxEn test for device [%04x:%04x] subsystem [%04x:%04x]\n",
+ priv->dev->vendor, priv->dev->device,
+ priv->dev->subsystem_vendor, priv->dev->subsystem_device);
+@@ -1864,6 +2039,8 @@ pci_moxa_setup(struct serial_private *priv,
+ #define PCI_SUBDEVICE_ID_SIIG_DUAL_30 0x2530
+ #define PCI_VENDOR_ID_ADVANTECH 0x13fe
+ #define PCI_DEVICE_ID_INTEL_CE4100_UART 0x2e66
++#define PCI_DEVICE_ID_ADVANTECH_PCI1600 0x1600
++#define PCI_DEVICE_ID_ADVANTECH_PCI1600_1611 0x1611
+ #define PCI_DEVICE_ID_ADVANTECH_PCI3620 0x3620
+ #define PCI_DEVICE_ID_ADVANTECH_PCI3618 0x3618
+ #define PCI_DEVICE_ID_ADVANTECH_PCIf618 0xf618
+@@ -1897,7 +2074,6 @@ pci_moxa_setup(struct serial_private *priv,
+ #define PCI_DEVICE_ID_WCH_CH355_4S 0x7173
+ #define PCI_VENDOR_ID_AGESTAR 0x5372
+ #define PCI_DEVICE_ID_AGESTAR_9375 0x6872
+-#define PCI_VENDOR_ID_ASIX 0x9710
+ #define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a
+ #define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e
+
+@@ -2317,12 +2493,19 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+ .setup = pci_pericom_setup_four_at_eight,
+ },
+ {
+- .vendor = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S,
++ .vendor = PCI_VENDOR_ID_ACCESIO,
+ .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .setup = pci_pericom_setup_four_at_eight,
+ },
++ {
++ .vendor = PCI_VENDOR_ID_ACCESIO,
++ .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ .setup = pci_pericom_setup_four_at_eight,
++ },
+ {
+ .vendor = PCI_VENDOR_ID_ACCESIO,
+ .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4,
+@@ -2506,7 +2689,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+ .device = PCI_ANY_ID,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+- .init = pci_endrun_init,
++ .init = pci_oxsemi_tornado_init,
+ .setup = pci_default_setup,
+ },
+ /*
+@@ -2518,7 +2701,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .init = pci_oxsemi_tornado_init,
+- .setup = pci_default_setup,
++ .setup = pci_oxsemi_tornado_setup,
+ },
+ {
+ .vendor = PCI_VENDOR_ID_MAINPINE,
+@@ -2526,7 +2709,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .init = pci_oxsemi_tornado_init,
+- .setup = pci_default_setup,
++ .setup = pci_oxsemi_tornado_setup,
+ },
+ {
+ .vendor = PCI_VENDOR_ID_DIGI,
+@@ -2534,7 +2717,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+ .subvendor = PCI_SUBVENDOR_ID_IBM,
+ .subdevice = PCI_ANY_ID,
+ .init = pci_oxsemi_tornado_init,
+- .setup = pci_default_setup,
++ .setup = pci_oxsemi_tornado_setup,
+ },
+ {
+ .vendor = PCI_VENDOR_ID_INTEL,
+@@ -2700,16 +2883,6 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+ .exit = pci_wch_ch38x_exit,
+ .setup = pci_wch_ch38x_setup,
+ },
+- /*
+- * ASIX devices with FIFO bug
+- */
+- {
+- .vendor = PCI_VENDOR_ID_ASIX,
+- .device = PCI_ANY_ID,
+- .subvendor = PCI_ANY_ID,
+- .subdevice = PCI_ANY_ID,
+- .setup = pci_asix_setup,
+- },
+ /*
+ * Broadcom TruManage (NetXtreme)
+ */
+@@ -2851,7 +3024,7 @@ enum pci_board_num_t {
+ pbn_b0_2_1843200,
+ pbn_b0_4_1843200,
+
+- pbn_b0_1_3906250,
++ pbn_b0_1_15625000,
+
+ pbn_b0_bt_1_115200,
+ pbn_b0_bt_2_115200,
+@@ -2929,12 +3102,11 @@ enum pci_board_num_t {
+ pbn_panacom2,
+ pbn_panacom4,
+ pbn_plx_romulus,
+- pbn_endrun_2_4000000,
+ pbn_oxsemi,
+- pbn_oxsemi_1_3906250,
+- pbn_oxsemi_2_3906250,
+- pbn_oxsemi_4_3906250,
+- pbn_oxsemi_8_3906250,
++ pbn_oxsemi_1_15625000,
++ pbn_oxsemi_2_15625000,
++ pbn_oxsemi_4_15625000,
++ pbn_oxsemi_8_15625000,
+ pbn_intel_i960,
+ pbn_sgi_ioc3,
+ pbn_computone_4,
+@@ -3081,10 +3253,10 @@ static struct pciserial_board pci_boards[] = {
+ .uart_offset = 8,
+ },
+
+- [pbn_b0_1_3906250] = {
++ [pbn_b0_1_15625000] = {
+ .flags = FL_BASE0,
+ .num_ports = 1,
+- .base_baud = 3906250,
++ .base_baud = 15625000,
+ .uart_offset = 8,
+ },
+
+@@ -3455,20 +3627,6 @@ static struct pciserial_board pci_boards[] = {
+ .first_offset = 0x03,
+ },
+
+- /*
+- * EndRun Technologies
+- * Uses the size of PCI Base region 0 to
+- * signal now many ports are available
+- * 2 port 952 Uart support
+- */
+- [pbn_endrun_2_4000000] = {
+- .flags = FL_BASE0,
+- .num_ports = 2,
+- .base_baud = 4000000,
+- .uart_offset = 0x200,
+- .first_offset = 0x1000,
+- },
+-
+ /*
+ * This board uses the size of PCI Base region 0 to
+ * signal now many ports are available
+@@ -3479,31 +3637,31 @@ static struct pciserial_board pci_boards[] = {
+ .base_baud = 115200,
+ .uart_offset = 8,
+ },
+- [pbn_oxsemi_1_3906250] = {
++ [pbn_oxsemi_1_15625000] = {
+ .flags = FL_BASE0,
+ .num_ports = 1,
+- .base_baud = 3906250,
++ .base_baud = 15625000,
+ .uart_offset = 0x200,
+ .first_offset = 0x1000,
+ },
+- [pbn_oxsemi_2_3906250] = {
++ [pbn_oxsemi_2_15625000] = {
+ .flags = FL_BASE0,
+ .num_ports = 2,
+- .base_baud = 3906250,
++ .base_baud = 15625000,
+ .uart_offset = 0x200,
+ .first_offset = 0x1000,
+ },
+- [pbn_oxsemi_4_3906250] = {
++ [pbn_oxsemi_4_15625000] = {
+ .flags = FL_BASE0,
+ .num_ports = 4,
+- .base_baud = 3906250,
++ .base_baud = 15625000,
+ .uart_offset = 0x200,
+ .first_offset = 0x1000,
+ },
+- [pbn_oxsemi_8_3906250] = {
++ [pbn_oxsemi_8_15625000] = {
+ .flags = FL_BASE0,
+ .num_ports = 8,
+- .base_baud = 3906250,
++ .base_baud = 15625000,
+ .uart_offset = 0x200,
+ .first_offset = 0x1000,
+ },
+@@ -4000,12 +4158,12 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
+ uart.port.irq = 0;
+ } else {
+ if (pci_match_id(pci_use_msi, dev)) {
+- dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n");
++ pci_dbg(dev, "Using MSI(-X) interrupts\n");
+ pci_set_master(dev);
+ uart.port.flags &= ~UPF_SHARE_IRQ;
+ rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES);
+ } else {
+- dev_dbg(&dev->dev, "Using legacy interrupts\n");
++ pci_dbg(dev, "Using legacy interrupts\n");
+ rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
+ }
+ if (rc < 0) {
+@@ -4023,12 +4181,12 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
+ if (quirk->setup(priv, board, &uart, i))
+ break;
+
+- dev_dbg(&dev->dev, "Setup PCI port: port %lx, irq %d, type %d\n",
++ pci_dbg(dev, "Setup PCI port: port %lx, irq %d, type %d\n",
+ uart.port.iobase, uart.port.irq, uart.port.iotype);
+
+ priv->line[i] = serial8250_register_8250_port(&uart);
+ if (priv->line[i] < 0) {
+- dev_err(&dev->dev,
++ pci_err(dev,
+ "Couldn't register serial port %lx, irq %d, type %d, error %d\n",
+ uart.port.iobase, uart.port.irq,
+ uart.port.iotype, priv->line[i]);
+@@ -4124,8 +4282,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
+ }
+
+ if (ent->driver_data >= ARRAY_SIZE(pci_boards)) {
+- dev_err(&dev->dev, "invalid driver_data: %ld\n",
+- ent->driver_data);
++ pci_err(dev, "invalid driver_data: %ld\n", ent->driver_data);
+ return -EINVAL;
+ }
+
+@@ -4208,7 +4365,7 @@ static int pciserial_resume_one(struct device *dev)
+ err = pci_enable_device(pdev);
+ /* FIXME: We cannot simply error out here */
+ if (err)
+- dev_err(dev, "Unable to re-enable ports, trying to continue.\n");
++ pci_err(pdev, "Unable to re-enable ports, trying to continue.\n");
+ pciserial_resume_ports(priv);
+ }
+ return 0;
+@@ -4219,6 +4376,9 @@ static SIMPLE_DEV_PM_OPS(pciserial_pm_ops, pciserial_suspend_one,
+ pciserial_resume_one);
+
+ static const struct pci_device_id serial_pci_tbl[] = {
++ { PCI_VENDOR_ID_ADVANTECH, PCI_DEVICE_ID_ADVANTECH_PCI1600,
++ PCI_DEVICE_ID_ADVANTECH_PCI1600_1611, PCI_ANY_ID, 0, 0,
++ pbn_b0_4_921600 },
+ /* Advantech use PCI_DEVICE_ID_ADVANTECH_PCI3620 (0x3620) as 'PCI_SUBVENDOR_ID' */
+ { PCI_VENDOR_ID_ADVANTECH, PCI_DEVICE_ID_ADVANTECH_PCI3620,
+ PCI_DEVICE_ID_ADVANTECH_PCI3620, 0x0001, 0, 0,
+@@ -4401,13 +4561,6 @@ static const struct pci_device_id serial_pci_tbl[] = {
+ { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_ROMULUS,
+ 0x10b5, 0x106a, 0, 0,
+ pbn_plx_romulus },
+- /*
+- * EndRun Technologies. PCI express device range.
+- * EndRun PTP/1588 has 2 Native UARTs.
+- */
+- { PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588,
+- PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_endrun_2_4000000 },
+ /*
+ * Quatech cards. These actually have configurable clocks but for
+ * now we just use the default.
+@@ -4517,158 +4670,165 @@ static const struct pci_device_id serial_pci_tbl[] = {
+ */
+ { PCI_VENDOR_ID_OXSEMI, 0xc101, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc105, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc11b, /* OXPCIe952 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc11f, /* OXPCIe952 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc120, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc124, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc138, /* OXPCIe952 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc13d, /* OXPCIe952 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc140, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc141, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc144, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc145, /* OXPCIe952 1 Legacy UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_b0_1_3906250 },
++ pbn_b0_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc158, /* OXPCIe952 2 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_2_3906250 },
++ pbn_oxsemi_2_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc15d, /* OXPCIe952 2 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_2_3906250 },
++ pbn_oxsemi_2_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc208, /* OXPCIe954 4 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_4_3906250 },
++ pbn_oxsemi_4_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc20d, /* OXPCIe954 4 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_4_3906250 },
++ pbn_oxsemi_4_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc308, /* OXPCIe958 8 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_8_3906250 },
++ pbn_oxsemi_8_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc30d, /* OXPCIe958 8 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_8_3906250 },
++ pbn_oxsemi_8_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc40b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc40f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc41b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc41f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc42b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc42f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc43b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc43f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc44b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc44f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc45b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc45f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc46b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc46f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc47b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc47f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc48b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc48f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc49b, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc49f, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc4ab, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc4af, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc4bb, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc4bf, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc4cb, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_OXSEMI, 0xc4cf, /* OXPCIe200 1 Native UART */
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ /*
+ * Mainpine Inc. IQ Express "Rev3" utilizing OxSemi Tornado
+ */
+ { PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 1 Port V.34 Super-G3 Fax */
+ PCI_VENDOR_ID_MAINPINE, 0x4001, 0, 0,
+- pbn_oxsemi_1_3906250 },
++ pbn_oxsemi_1_15625000 },
+ { PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 2 Port V.34 Super-G3 Fax */
+ PCI_VENDOR_ID_MAINPINE, 0x4002, 0, 0,
+- pbn_oxsemi_2_3906250 },
++ pbn_oxsemi_2_15625000 },
+ { PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 4 Port V.34 Super-G3 Fax */
+ PCI_VENDOR_ID_MAINPINE, 0x4004, 0, 0,
+- pbn_oxsemi_4_3906250 },
++ pbn_oxsemi_4_15625000 },
+ { PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 8 Port V.34 Super-G3 Fax */
+ PCI_VENDOR_ID_MAINPINE, 0x4008, 0, 0,
+- pbn_oxsemi_8_3906250 },
++ pbn_oxsemi_8_15625000 },
+
+ /*
+ * Digi/IBM PCIe 2-port Async EIA-232 Adapter utilizing OxSemi Tornado
+ */
+ { PCI_VENDOR_ID_DIGI, PCIE_DEVICE_ID_NEO_2_OX_IBM,
+ PCI_SUBVENDOR_ID_IBM, PCI_ANY_ID, 0, 0,
+- pbn_oxsemi_2_3906250 },
++ pbn_oxsemi_2_15625000 },
++ /*
++ * EndRun Technologies. PCI express device range.
++ * EndRun PTP/1588 has 2 Native UARTs utilizing OxSemi 952.
++ */
++ { PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_oxsemi_2_15625000 },
+
+ /*
+ * SBS Technologies, Inc. P-Octal and PMC-OCTPRO cards,
+@@ -5192,8 +5352,30 @@ static const struct pci_device_id serial_pci_tbl[] = {
+ { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */
+ pbn_b2_4_115200 },
++ /* Brainboxes Devices */
++ /*
++ * Brainboxes UC-101
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0BA1,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_2_115200 },
++ /*
++ * Brainboxes UC-235/246
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0AA1,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_1_115200 },
++ /*
++ * Brainboxes UC-257
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0861,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_2_115200 },
+ /*
+- * BrainBoxes UC-260
++ * Brainboxes UC-260/271/701/756
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x0D21,
+ PCI_ANY_ID, PCI_ANY_ID,
+@@ -5201,8 +5383,191 @@ static const struct pci_device_id serial_pci_tbl[] = {
+ pbn_b2_4_115200 },
+ { PCI_VENDOR_ID_INTASHIELD, 0x0E34,
+ PCI_ANY_ID, PCI_ANY_ID,
+- PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
++ PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
++ pbn_b2_4_115200 },
++ /*
++ * Brainboxes UC-268
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0841,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_4_115200 },
++ /*
++ * Brainboxes UC-275/279
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0881,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_8_115200 },
++ /*
++ * Brainboxes UC-302
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x08E1,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_2_115200 },
++ /*
++ * Brainboxes UC-310
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x08C1,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_2_115200 },
++ /*
++ * Brainboxes UC-313
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x08A3,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_2_115200 },
++ /*
++ * Brainboxes UC-320/324
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0A61,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_1_115200 },
++ /*
++ * Brainboxes UC-346
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0B02,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
+ pbn_b2_4_115200 },
++ /*
++ * Brainboxes UC-357
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0A81,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_2_115200 },
++ { PCI_VENDOR_ID_INTASHIELD, 0x0A83,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_2_115200 },
++ /*
++ * Brainboxes UC-368
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0C41,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_4_115200 },
++ /*
++ * Brainboxes UC-420/431
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x0921,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b2_4_115200 },
++ /*
++ * Brainboxes PX-101
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x4005,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b0_2_115200 },
++ { PCI_VENDOR_ID_INTASHIELD, 0x4019,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_2_15625000 },
++ /*
++ * Brainboxes PX-235/246
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x4004,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b0_1_115200 },
++ { PCI_VENDOR_ID_INTASHIELD, 0x4016,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_1_15625000 },
++ /*
++ * Brainboxes PX-203/PX-257
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x4006,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b0_2_115200 },
++ { PCI_VENDOR_ID_INTASHIELD, 0x4015,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_4_15625000 },
++ /*
++ * Brainboxes PX-260/PX-701
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x400A,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_4_15625000 },
++ /*
++ * Brainboxes PX-310
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x400E,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_2_15625000 },
++ /*
++ * Brainboxes PX-313
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x400C,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_2_15625000 },
++ /*
++ * Brainboxes PX-320/324/PX-376/PX-387
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x400B,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_1_15625000 },
++ /*
++ * Brainboxes PX-335/346
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x400F,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_4_15625000 },
++ /*
++ * Brainboxes PX-368
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x4010,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_4_15625000 },
++ /*
++ * Brainboxes PX-420
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x4000,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b0_4_115200 },
++ { PCI_VENDOR_ID_INTASHIELD, 0x4011,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_4_15625000 },
++ /*
++ * Brainboxes PX-803
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x4009,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b0_1_115200 },
++ { PCI_VENDOR_ID_INTASHIELD, 0x401E,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_1_15625000 },
++ /*
++ * Brainboxes PX-846
++ */
++ { PCI_VENDOR_ID_INTASHIELD, 0x4008,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_b0_1_115200 },
++ { PCI_VENDOR_ID_INTASHIELD, 0x4017,
++ PCI_ANY_ID, PCI_ANY_ID,
++ 0, 0,
++ pbn_oxsemi_1_15625000 },
++
+ /*
+ * Perle PCI-RAS cards
+ */
+diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
+index 66374704747ec..74e477016f255 100644
+--- a/drivers/tty/serial/8250/8250_port.c
++++ b/drivers/tty/serial/8250/8250_port.c
+@@ -15,6 +15,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/ioport.h>
+ #include <linux/init.h>
++#include <linux/irq.h>
+ #include <linux/console.h>
+ #include <linux/gpio/consumer.h>
+ #include <linux/sysrq.h>
+@@ -307,6 +308,14 @@ static const struct serial8250_config uart_config[] = {
+ .rxtrig_bytes = {1, 32, 64, 112},
+ .flags = UART_CAP_FIFO | UART_CAP_SLEEP,
+ },
++ [PORT_ASPEED_VUART] = {
++ .name = "ASPEED VUART",
++ .fifo_size = 16,
++ .tx_loadsz = 16,
++ .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
++ .rxtrig_bytes = {1, 4, 8, 14},
++ .flags = UART_CAP_FIFO,
++ },
+ };
+
+ /* Uart divisor latch read */
+@@ -529,27 +538,6 @@ serial_port_out_sync(struct uart_port *p, int offset, int value)
+ }
+ }
+
+-/*
+- * For the 16C950
+- */
+-static void serial_icr_write(struct uart_8250_port *up, int offset, int value)
+-{
+- serial_out(up, UART_SCR, offset);
+- serial_out(up, UART_ICR, value);
+-}
+-
+-static unsigned int serial_icr_read(struct uart_8250_port *up, int offset)
+-{
+- unsigned int value;
+-
+- serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD);
+- serial_out(up, UART_SCR, offset);
+- value = serial_in(up, UART_ICR);
+- serial_icr_write(up, UART_ACR, up->acr);
+-
+- return value;
+-}
+-
+ /*
+ * FIFO support.
+ */
+@@ -613,7 +601,7 @@ EXPORT_SYMBOL_GPL(serial8250_rpm_put);
+ static int serial8250_em485_init(struct uart_8250_port *p)
+ {
+ if (p->em485)
+- return 0;
++ goto deassert_rts;
+
+ p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC);
+ if (!p->em485)
+@@ -629,7 +617,9 @@ static int serial8250_em485_init(struct uart_8250_port *p)
+ p->em485->active_timer = NULL;
+ p->em485->tx_stopped = true;
+
+- p->rs485_stop_tx(p);
++deassert_rts:
++ if (p->em485->tx_stopped)
++ p->rs485_stop_tx(p);
+
+ return 0;
+ }
+@@ -1042,7 +1032,8 @@ static void autoconfig_16550a(struct uart_8250_port *up)
+ up->port.type = PORT_16550A;
+ up->capabilities |= UART_CAP_FIFO;
+
+- if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS))
++ if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) &&
++ !(up->port.flags & UPF_FULL_PROBE))
+ return;
+
+ /*
+@@ -1527,6 +1518,8 @@ static inline void __stop_tx(struct uart_8250_port *p)
+
+ if (em485) {
+ unsigned char lsr = serial_in(p, UART_LSR);
++ p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
++
+ /*
+ * To provide required timeing and allow FIFO transfer,
+ * __stop_tx_rs485() must be called only when both FIFO and
+@@ -1615,6 +1608,18 @@ static inline void start_tx_rs485(struct uart_port *port)
+ struct uart_8250_port *up = up_to_u8250p(port);
+ struct uart_8250_em485 *em485 = up->em485;
+
++ /*
++ * While serial8250_em485_handle_stop_tx() is a noop if
++ * em485->active_timer != &em485->stop_tx_timer, it might happen that
++ * the timer is still armed and triggers only after the current bunch of
++ * chars is send and em485->active_timer == &em485->stop_tx_timer again.
++ * So cancel the timer. There is still a theoretical race condition if
++ * the timer is already running and only comes around to check for
++ * em485->active_timer when &em485->stop_tx_timer is armed again.
++ */
++ if (em485->active_timer == &em485->stop_tx_timer)
++ hrtimer_try_to_cancel(&em485->stop_tx_timer);
++
+ em485->active_timer = NULL;
+
+ if (em485->tx_stopped) {
+@@ -1799,9 +1804,7 @@ void serial8250_tx_chars(struct uart_8250_port *up)
+ int count;
+
+ if (port->x_char) {
+- serial_out(up, UART_TX, port->x_char);
+- port->icount.tx++;
+- port->x_char = 0;
++ uart_xchar_out(port, UART_TX);
+ return;
+ }
+ if (uart_tx_stopped(port)) {
+@@ -1883,10 +1886,13 @@ EXPORT_SYMBOL_GPL(serial8250_modem_status);
+ static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir)
+ {
+ switch (iir & 0x3f) {
+- case UART_IIR_RX_TIMEOUT:
+- serial8250_rx_dma_flush(up);
++ case UART_IIR_RDI:
++ if (!up->dma->rx_running)
++ break;
+ fallthrough;
+ case UART_IIR_RLSI:
++ case UART_IIR_RX_TIMEOUT:
++ serial8250_rx_dma_flush(up);
+ return true;
+ }
+ return up->dma->rx_dma(up);
+@@ -1899,6 +1905,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
+ {
+ unsigned char status;
+ struct uart_8250_port *up = up_to_u8250p(port);
++ struct tty_port *tport = &port->state->port;
+ bool skip_rx = false;
+ unsigned long flags;
+
+@@ -1923,6 +1930,8 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
+ skip_rx = true;
+
+ if (status & (UART_LSR_DR | UART_LSR_BI) && !skip_rx) {
++ if (irqd_is_wakeup_set(irq_get_irq_data(port->irq)))
++ pm_wakeup_event(tport->tty->dev, 0);
+ if (!up->dma || handle_rx_dma(up, iir))
+ status = serial8250_rx_chars(up, status);
+ }
+@@ -1979,19 +1988,25 @@ static int serial8250_tx_threshold_handle_irq(struct uart_port *port)
+ static unsigned int serial8250_tx_empty(struct uart_port *port)
+ {
+ struct uart_8250_port *up = up_to_u8250p(port);
++ unsigned int result = 0;
+ unsigned long flags;
+ unsigned int lsr;
+
+ serial8250_rpm_get(up);
+
+ spin_lock_irqsave(&port->lock, flags);
+- lsr = serial_port_in(port, UART_LSR);
+- up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
++ if (!serial8250_tx_dma_running(up)) {
++ lsr = serial_port_in(port, UART_LSR);
++ up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
++
++ if ((lsr & BOTH_EMPTY) == BOTH_EMPTY)
++ result = TIOCSER_TEMT;
++ }
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ serial8250_rpm_put(up);
+
+- return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0;
++ return result;
+ }
+
+ unsigned int serial8250_do_get_mctrl(struct uart_port *port)
+@@ -2024,13 +2039,6 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl)
+ struct uart_8250_port *up = up_to_u8250p(port);
+ unsigned char mcr;
+
+- if (port->rs485.flags & SER_RS485_ENABLED) {
+- if (serial8250_in_MCR(up) & UART_MCR_RTS)
+- mctrl |= TIOCM_RTS;
+- else
+- mctrl &= ~TIOCM_RTS;
+- }
+-
+ mcr = serial8250_TIOCM_to_MCR(mctrl);
+
+ mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr;
+@@ -2041,6 +2049,9 @@ EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl);
+
+ static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
+ {
++ if (port->rs485.flags & SER_RS485_ENABLED)
++ return;
++
+ if (port->set_mctrl)
+ port->set_mctrl(port, mctrl);
+ else
+@@ -2287,6 +2298,10 @@ int serial8250_do_startup(struct uart_port *port)
+ if (port->irq && (up->port.flags & UPF_SHARE_IRQ))
+ up->port.irqflags |= IRQF_SHARED;
+
++ retval = up->ops->setup_irq(up);
++ if (retval)
++ goto out;
++
+ if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
+ unsigned char iir1;
+
+@@ -2329,9 +2344,7 @@ int serial8250_do_startup(struct uart_port *port)
+ }
+ }
+
+- retval = up->ops->setup_irq(up);
+- if (retval)
+- goto out;
++ up->ops->setup_timer(up);
+
+ /*
+ * Now, initialize the UART
+@@ -2607,11 +2620,8 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
+
+ if (c_cflag & CSTOPB)
+ cval |= UART_LCR_STOP;
+- if (c_cflag & PARENB) {
++ if (c_cflag & PARENB)
+ cval |= UART_LCR_PARITY;
+- if (up->bugs & UART_BUG_PARITY)
+- up->fifo_bug = true;
+- }
+ if (!(c_cflag & PARODD))
+ cval |= UART_LCR_EPAR;
+ #ifdef CMSPAR
+@@ -2696,21 +2706,32 @@ static unsigned int serial8250_get_baud_rate(struct uart_port *port,
+ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk)
+ {
+ struct uart_8250_port *up = up_to_u8250p(port);
++ struct tty_port *tport = &port->state->port;
+ unsigned int baud, quot, frac = 0;
+ struct ktermios *termios;
++ struct tty_struct *tty;
+ unsigned long flags;
+
+- mutex_lock(&port->state->port.mutex);
++ tty = tty_port_tty_get(tport);
++ if (!tty) {
++ mutex_lock(&tport->mutex);
++ port->uartclk = uartclk;
++ mutex_unlock(&tport->mutex);
++ return;
++ }
++
++ down_write(&tty->termios_rwsem);
++ mutex_lock(&tport->mutex);
+
+ if (port->uartclk == uartclk)
+ goto out_lock;
+
+ port->uartclk = uartclk;
+
+- if (!tty_port_initialized(&port->state->port))
++ if (!tty_port_initialized(tport))
+ goto out_lock;
+
+- termios = &port->state->port.tty->termios;
++ termios = &tty->termios;
+
+ baud = serial8250_get_baud_rate(port, termios, NULL);
+ quot = serial8250_get_divisor(port, baud, &frac);
+@@ -2727,7 +2748,9 @@ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk)
+ serial8250_rpm_put(up);
+
+ out_lock:
+- mutex_unlock(&port->state->port.mutex);
++ mutex_unlock(&tport->mutex);
++ up_write(&tty->termios_rwsem);
++ tty_kref_put(tty);
+ }
+ EXPORT_SYMBOL_GPL(serial8250_update_uartclk);
+
+@@ -2761,8 +2784,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
+ up->lcr = cval; /* Save computed LCR */
+
+ if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) {
+- /* NOTE: If fifo_bug is not set, a user can set RX_trigger. */
+- if ((baud < 2400 && !up->dma) || up->fifo_bug) {
++ if (baud < 2400 && !up->dma) {
+ up->fcr &= ~UART_FCR_TRIGGER_MASK;
+ up->fcr |= UART_FCR_TRIGGER_1;
+ }
+@@ -2956,8 +2978,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up)
+ case UPIO_MEM32BE:
+ case UPIO_MEM16:
+ case UPIO_MEM:
+- if (!port->mapbase)
++ if (!port->mapbase) {
++ ret = -EINVAL;
+ break;
++ }
+
+ if (!request_mem_region(port->mapbase, size, "serial")) {
+ ret = -EBUSY;
+@@ -3096,8 +3120,7 @@ static int do_set_rxtrig(struct tty_port *port, unsigned char bytes)
+ struct uart_8250_port *up = up_to_u8250p(uport);
+ int rxtrig;
+
+- if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 ||
+- up->fifo_bug)
++ if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1)
+ return -EINVAL;
+
+ rxtrig = bytes_to_fcr_rxtrig(up, bytes);
+@@ -3181,9 +3204,6 @@ static void serial8250_config_port(struct uart_port *port, int flags)
+ if (flags & UART_CONFIG_TYPE)
+ autoconfig(up);
+
+- if (port->rs485.flags & SER_RS485_ENABLED)
+- port->rs485_config(port, &port->rs485);
+-
+ /* if access method is AU, it is a 16550 with a quirk */
+ if (port->type == PORT_16550A && port->iotype == UPIO_AU)
+ up->bugs |= UART_BUG_NOMSR;
+@@ -3254,6 +3274,7 @@ void serial8250_init_port(struct uart_8250_port *up)
+ struct uart_port *port = &up->port;
+
+ spin_lock_init(&port->lock);
++ port->pm = NULL;
+ port->ops = &serial8250_pops;
+ port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+
+@@ -3308,15 +3329,20 @@ static void serial8250_console_restore(struct uart_8250_port *up)
+ unsigned int baud, quot, frac = 0;
+
+ termios.c_cflag = port->cons->cflag;
+- if (port->state->port.tty && termios.c_cflag == 0)
++ termios.c_ispeed = port->cons->ispeed;
++ termios.c_ospeed = port->cons->ospeed;
++ if (port->state->port.tty && termios.c_cflag == 0) {
+ termios.c_cflag = port->state->port.tty->termios.c_cflag;
++ termios.c_ispeed = port->state->port.tty->termios.c_ispeed;
++ termios.c_ospeed = port->state->port.tty->termios.c_ospeed;
++ }
+
+ baud = serial8250_get_baud_rate(port, &termios, NULL);
+ quot = serial8250_get_divisor(port, baud, &frac);
+
+ serial8250_set_divisor(port, baud, quot, frac);
+ serial_port_out(port, UART_LCR, up->lcr);
+- serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
++ serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
+ }
+
+ /*
+diff --git a/drivers/tty/serial/8250/8250_tegra.c b/drivers/tty/serial/8250/8250_tegra.c
+index e13ae18b0713e..2e29c9f3cd429 100644
+--- a/drivers/tty/serial/8250/8250_tegra.c
++++ b/drivers/tty/serial/8250/8250_tegra.c
+@@ -112,13 +112,15 @@ static int tegra_uart_probe(struct platform_device *pdev)
+
+ ret = serial8250_register_8250_port(&port8250);
+ if (ret < 0)
+- goto err_clkdisable;
++ goto err_ctrl_assert;
+
+ platform_set_drvdata(pdev, uart);
+ uart->line = ret;
+
+ return 0;
+
++err_ctrl_assert:
++ reset_control_assert(uart->rst);
+ err_clkdisable:
+ clk_disable_unprepare(uart->clk);
+
+diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
+index 39fc96dc2531c..6ccadfa0caf06 100644
+--- a/drivers/tty/serial/8250/Kconfig
++++ b/drivers/tty/serial/8250/Kconfig
+@@ -118,7 +118,7 @@ config SERIAL_8250_CONSOLE
+
+ config SERIAL_8250_GSC
+ tristate
+- depends on SERIAL_8250 && GSC
++ depends on SERIAL_8250 && PARISC
+ default SERIAL_8250
+
+ config SERIAL_8250_DMA
+@@ -253,7 +253,9 @@ config SERIAL_8250_ASPEED_VUART
+ tristate "Aspeed Virtual UART"
+ depends on SERIAL_8250
+ depends on OF
+- depends on REGMAP && MFD_SYSCON
++ depends on MFD_SYSCON
++ depends on ARCH_ASPEED || COMPILE_TEST
++ select REGMAP
+ help
+ If you want to use the virtual UART (VUART) device on Aspeed
+ BMC platforms, enable this option. This enables the 16550A-
+diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
+index 7c5f4e966b594..91799c420e250 100644
+--- a/drivers/tty/serial/altera_uart.c
++++ b/drivers/tty/serial/altera_uart.c
+@@ -199,9 +199,8 @@ static void altera_uart_set_termios(struct uart_port *port,
+ */
+ }
+
+-static void altera_uart_rx_chars(struct altera_uart *pp)
++static void altera_uart_rx_chars(struct uart_port *port)
+ {
+- struct uart_port *port = &pp->port;
+ unsigned char ch, flag;
+ unsigned short status;
+
+@@ -246,9 +245,8 @@ static void altera_uart_rx_chars(struct altera_uart *pp)
+ tty_flip_buffer_push(&port->state->port);
+ }
+
+-static void altera_uart_tx_chars(struct altera_uart *pp)
++static void altera_uart_tx_chars(struct uart_port *port)
+ {
+- struct uart_port *port = &pp->port;
+ struct circ_buf *xmit = &port->state->xmit;
+
+ if (port->x_char) {
+@@ -272,26 +270,25 @@ static void altera_uart_tx_chars(struct altera_uart *pp)
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
+
+- if (xmit->head == xmit->tail) {
+- pp->imr &= ~ALTERA_UART_CONTROL_TRDY_MSK;
+- altera_uart_update_ctrl_reg(pp);
+- }
++ if (uart_circ_empty(xmit))
++ altera_uart_stop_tx(port);
+ }
+
+ static irqreturn_t altera_uart_interrupt(int irq, void *data)
+ {
+ struct uart_port *port = data;
+ struct altera_uart *pp = container_of(port, struct altera_uart, port);
++ unsigned long flags;
+ unsigned int isr;
+
+ isr = altera_uart_readl(port, ALTERA_UART_STATUS_REG) & pp->imr;
+
+- spin_lock(&port->lock);
++ spin_lock_irqsave(&port->lock, flags);
+ if (isr & ALTERA_UART_STATUS_RRDY_MSK)
+- altera_uart_rx_chars(pp);
++ altera_uart_rx_chars(port);
+ if (isr & ALTERA_UART_STATUS_TRDY_MSK)
+- altera_uart_tx_chars(pp);
+- spin_unlock(&port->lock);
++ altera_uart_tx_chars(port);
++ spin_unlock_irqrestore(&port->lock, flags);
+
+ return IRQ_RETVAL(isr);
+ }
+diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c
+index e744b953ca346..47654073123d6 100644
+--- a/drivers/tty/serial/amba-pl010.c
++++ b/drivers/tty/serial/amba-pl010.c
+@@ -446,14 +446,11 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios,
+ if ((termios->c_cflag & CREAD) == 0)
+ uap->port.ignore_status_mask |= UART_DUMMY_RSR_RX;
+
+- /* first, disable everything */
+ old_cr = readb(uap->port.membase + UART010_CR) & ~UART010_CR_MSIE;
+
+ if (UART_ENABLE_MS(port, termios->c_cflag))
+ old_cr |= UART010_CR_MSIE;
+
+- writel(0, uap->port.membase + UART010_CR);
+-
+ /* Set baud rate */
+ quot -= 1;
+ writel((quot & 0xf00) >> 8, uap->port.membase + UART010_LCRM);
+diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
+index d361cd84ff8cf..b91fe25a64a18 100644
+--- a/drivers/tty/serial/amba-pl011.c
++++ b/drivers/tty/serial/amba-pl011.c
+@@ -1050,6 +1050,9 @@ static void pl011_dma_rx_callback(void *data)
+ */
+ static inline void pl011_dma_rx_stop(struct uart_amba_port *uap)
+ {
++ if (!uap->using_rx_dma)
++ return;
++
+ /* FIXME. Just disable the DMA enable */
+ uap->dmacr &= ~UART011_RXDMAE;
+ pl011_write(uap->dmacr, uap, REG_DMACR);
+@@ -1288,13 +1291,18 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap)
+
+ static void pl011_rs485_tx_stop(struct uart_amba_port *uap)
+ {
++ /*
++ * To be on the safe side only time out after twice as many iterations
++ * as fifo size.
++ */
++ const int MAX_TX_DRAIN_ITERS = uap->port.fifosize * 2;
+ struct uart_port *port = &uap->port;
+ int i = 0;
+ u32 cr;
+
+ /* Wait until hardware tx queue is empty */
+ while (!pl011_tx_empty(port)) {
+- if (i == port->fifosize) {
++ if (i > MAX_TX_DRAIN_ITERS) {
+ dev_warn(port->dev,
+ "timeout while draining hardware tx queue\n");
+ break;
+@@ -1367,6 +1375,15 @@ static void pl011_stop_rx(struct uart_port *port)
+ pl011_dma_rx_stop(uap);
+ }
+
++static void pl011_throttle_rx(struct uart_port *port)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&port->lock, flags);
++ pl011_stop_rx(port);
++ spin_unlock_irqrestore(&port->lock, flags);
++}
++
+ static void pl011_enable_ms(struct uart_port *port)
+ {
+ struct uart_amba_port *uap =
+@@ -1455,6 +1472,10 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq)
+ struct circ_buf *xmit = &uap->port.state->xmit;
+ int count = uap->fifosize >> 1;
+
++ if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
++ !uap->rs485_tx_started)
++ pl011_rs485_tx_start(uap);
++
+ if (uap->port.x_char) {
+ if (!pl011_tx_char(uap, uap->port.x_char, from_irq))
+ return true;
+@@ -1466,10 +1487,6 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq)
+ return false;
+ }
+
+- if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
+- !uap->rs485_tx_started)
+- pl011_rs485_tx_start(uap);
+-
+ /* If we are using DMA mode, try to send some characters. */
+ if (pl011_dma_tx_irq(uap))
+ return true;
+@@ -1615,9 +1632,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl)
+ container_of(port, struct uart_amba_port, port);
+ unsigned int cr;
+
+- if (port->rs485.flags & SER_RS485_ENABLED)
+- mctrl &= ~TIOCM_RTS;
+-
+ cr = pl011_read(uap, REG_CR);
+
+ #define TIOCMBIT(tiocmbit, uartbit) \
+@@ -1791,9 +1805,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap)
+ */
+ static void pl011_enable_interrupts(struct uart_amba_port *uap)
+ {
++ unsigned long flags;
+ unsigned int i;
+
+- spin_lock_irq(&uap->port.lock);
++ spin_lock_irqsave(&uap->port.lock, flags);
+
+ /* Clear out any spuriously appearing RX interrupts */
+ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR);
+@@ -1815,7 +1830,23 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap)
+ if (!pl011_dma_rx_running(uap))
+ uap->im |= UART011_RXIM;
+ pl011_write(uap->im, uap, REG_IMSC);
+- spin_unlock_irq(&uap->port.lock);
++ spin_unlock_irqrestore(&uap->port.lock, flags);
++}
++
++static void pl011_unthrottle_rx(struct uart_port *port)
++{
++ struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port);
++ unsigned long flags;
++
++ spin_lock_irqsave(&uap->port.lock, flags);
++
++ uap->im = UART011_RTIM;
++ if (!pl011_dma_rx_running(uap))
++ uap->im |= UART011_RXIM;
++
++ pl011_write(uap->im, uap, REG_IMSC);
++
++ spin_unlock_irqrestore(&uap->port.lock, flags);
+ }
+
+ static int pl011_startup(struct uart_port *port)
+@@ -1841,14 +1872,8 @@ static int pl011_startup(struct uart_port *port)
+ cr = uap->old_cr & (UART011_CR_RTS | UART011_CR_DTR);
+ cr |= UART01x_CR_UARTEN | UART011_CR_RXE;
+
+- if (port->rs485.flags & SER_RS485_ENABLED) {
+- if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
+- cr &= ~UART011_CR_RTS;
+- else
+- cr |= UART011_CR_RTS;
+- } else {
++ if (!(port->rs485.flags & SER_RS485_ENABLED))
+ cr |= UART011_CR_TXE;
+- }
+
+ pl011_write(cr, uap, REG_CR);
+
+@@ -2095,7 +2120,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
+ * with the given baud rate. We use this as the poll interval when we
+ * wait for the tx queue to empty.
+ */
+- uap->rs485_tx_drain_interval = (bits * 1000 * 1000) / baud;
++ uap->rs485_tx_drain_interval = DIV_ROUND_UP(bits * 1000 * 1000, baud);
+
+ pl011_setup_status_masks(port, termios);
+
+@@ -2105,9 +2130,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
+ if (port->rs485.flags & SER_RS485_ENABLED)
+ termios->c_cflag &= ~CRTSCTS;
+
+- /* first, disable everything */
+ old_cr = pl011_read(uap, REG_CR);
+- pl011_write(0, uap, REG_CR);
+
+ if (termios->c_cflag & CRTSCTS) {
+ if (old_cr & UART011_CR_RTS)
+@@ -2183,32 +2206,13 @@ static const char *pl011_type(struct uart_port *port)
+ return uap->port.type == PORT_AMBA ? uap->type : NULL;
+ }
+
+-/*
+- * Release the memory region(s) being used by 'port'
+- */
+-static void pl011_release_port(struct uart_port *port)
+-{
+- release_mem_region(port->mapbase, SZ_4K);
+-}
+-
+-/*
+- * Request the memory region(s) being used by 'port'
+- */
+-static int pl011_request_port(struct uart_port *port)
+-{
+- return request_mem_region(port->mapbase, SZ_4K, "uart-pl011")
+- != NULL ? 0 : -EBUSY;
+-}
+-
+ /*
+ * Configure/autoconfigure the port.
+ */
+ static void pl011_config_port(struct uart_port *port, int flags)
+ {
+- if (flags & UART_CONFIG_TYPE) {
++ if (flags & UART_CONFIG_TYPE)
+ port->type = PORT_AMBA;
+- pl011_request_port(port);
+- }
+ }
+
+ /*
+@@ -2223,6 +2227,8 @@ static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser)
+ ret = -EINVAL;
+ if (ser->baud_base < 9600)
+ ret = -EINVAL;
++ if (port->mapbase != (unsigned long) ser->iomem_base)
++ ret = -EINVAL;
+ return ret;
+ }
+
+@@ -2268,6 +2274,8 @@ static const struct uart_ops amba_pl011_pops = {
+ .stop_tx = pl011_stop_tx,
+ .start_tx = pl011_start_tx,
+ .stop_rx = pl011_stop_rx,
++ .throttle = pl011_throttle_rx,
++ .unthrottle = pl011_unthrottle_rx,
+ .enable_ms = pl011_enable_ms,
+ .break_ctl = pl011_break_ctl,
+ .startup = pl011_startup,
+@@ -2275,8 +2283,6 @@ static const struct uart_ops amba_pl011_pops = {
+ .flush_buffer = pl011_dma_flush_buffer,
+ .set_termios = pl011_set_termios,
+ .type = pl011_type,
+- .release_port = pl011_release_port,
+- .request_port = pl011_request_port,
+ .config_port = pl011_config_port,
+ .verify_port = pl011_verify_port,
+ #ifdef CONFIG_CONSOLE_POLL
+@@ -2306,8 +2312,6 @@ static const struct uart_ops sbsa_uart_pops = {
+ .shutdown = sbsa_uart_shutdown,
+ .set_termios = sbsa_uart_set_termios,
+ .type = pl011_type,
+- .release_port = pl011_release_port,
+- .request_port = pl011_request_port,
+ .config_port = pl011_config_port,
+ .verify_port = pl011_verify_port,
+ #ifdef CONFIG_CONSOLE_POLL
+@@ -2947,6 +2951,7 @@ MODULE_DEVICE_TABLE(of, sbsa_uart_of_match);
+
+ static const struct acpi_device_id __maybe_unused sbsa_uart_acpi_match[] = {
+ { "ARMH0011", 0 },
++ { "ARMHB000", 0 },
+ {},
+ };
+ MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match);
+diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
+index 4379ca4842ae7..0f2677695b521 100644
+--- a/drivers/tty/serial/ar933x_uart.c
++++ b/drivers/tty/serial/ar933x_uart.c
+@@ -591,6 +591,11 @@ static int ar933x_config_rs485(struct uart_port *port,
+ dev_err(port->dev, "RS485 needs rts-gpio\n");
+ return 1;
+ }
++
++ if (rs485conf->flags & SER_RS485_ENABLED)
++ gpiod_set_value(up->rts_gpiod,
++ !!(rs485conf->flags & SER_RS485_RTS_AFTER_SEND));
++
+ port->rs485 = *rs485conf;
+ return 0;
+ }
+diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
+index 596217d10d5c7..4d0e992f78445 100644
+--- a/drivers/tty/serial/arc_uart.c
++++ b/drivers/tty/serial/arc_uart.c
+@@ -607,10 +607,11 @@ static int arc_serial_probe(struct platform_device *pdev)
+ }
+ uart->baud = val;
+
+- port->membase = of_iomap(np, 0);
+- if (!port->membase)
++ port->membase = devm_platform_ioremap_resource(pdev, 0);
++ if (IS_ERR(port->membase)) {
+ /* No point of dev_err since UART itself is hosed here */
+- return -ENXIO;
++ return PTR_ERR(port->membase);
++ }
+
+ port->irq = irq_of_parse_and_map(np, 0);
+
+diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
+index 249ea35088d27..98a3f36b40b92 100644
+--- a/drivers/tty/serial/atmel_serial.c
++++ b/drivers/tty/serial/atmel_serial.c
+@@ -295,20 +295,16 @@ static int atmel_config_rs485(struct uart_port *port,
+
+ mode = atmel_uart_readl(port, ATMEL_US_MR);
+
+- /* Resetting serial mode to RS232 (0x0) */
+- mode &= ~ATMEL_US_USMODE;
+-
+- port->rs485 = *rs485conf;
+-
+ if (rs485conf->flags & SER_RS485_ENABLED) {
+ dev_dbg(port->dev, "Setting UART to RS485\n");
+- if (port->rs485.flags & SER_RS485_RX_DURING_TX)
++ if (rs485conf->flags & SER_RS485_RX_DURING_TX)
+ atmel_port->tx_done_mask = ATMEL_US_TXRDY;
+ else
+ atmel_port->tx_done_mask = ATMEL_US_TXEMPTY;
+
+ atmel_uart_writel(port, ATMEL_US_TTGR,
+ rs485conf->delay_rts_after_send);
++ mode &= ~ATMEL_US_USMODE;
+ mode |= ATMEL_US_USMODE_RS485;
+ } else {
+ dev_dbg(port->dev, "Setting UART to RS232\n");
+@@ -877,11 +873,11 @@ static void atmel_complete_tx_dma(void *arg)
+
+ port->icount.tx += atmel_port->tx_len;
+
+- spin_lock_irq(&atmel_port->lock_tx);
++ spin_lock(&atmel_port->lock_tx);
+ async_tx_ack(atmel_port->desc_tx);
+ atmel_port->cookie_tx = -EINVAL;
+ atmel_port->desc_tx = NULL;
+- spin_unlock_irq(&atmel_port->lock_tx);
++ spin_unlock(&atmel_port->lock_tx);
+
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
+@@ -1004,6 +1000,13 @@ static void atmel_tx_dma(struct uart_port *port)
+ desc->callback = atmel_complete_tx_dma;
+ desc->callback_param = atmel_port;
+ atmel_port->cookie_tx = dmaengine_submit(desc);
++ if (dma_submit_error(atmel_port->cookie_tx)) {
++ dev_err(port->dev, "dma_submit_error %d\n",
++ atmel_port->cookie_tx);
++ return;
++ }
++
++ dma_async_issue_pending(chan);
+ }
+
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+@@ -1258,6 +1261,13 @@ static int atmel_prepare_rx_dma(struct uart_port *port)
+ desc->callback_param = port;
+ atmel_port->desc_rx = desc;
+ atmel_port->cookie_rx = dmaengine_submit(desc);
++ if (dma_submit_error(atmel_port->cookie_rx)) {
++ dev_err(port->dev, "dma_submit_error %d\n",
++ atmel_port->cookie_rx);
++ goto chan_err;
++ }
++
++ dma_async_issue_pending(atmel_port->chan_rx);
+
+ return 0;
+
+@@ -2605,13 +2615,7 @@ static void __init atmel_console_get_options(struct uart_port *port, int *baud,
+ else if (mr == ATMEL_US_PAR_ODD)
+ *parity = 'o';
+
+- /*
+- * The serial core only rounds down when matching this to a
+- * supported baud rate. Make sure we don't end up slightly
+- * lower than one of those, as it would make us fall through
+- * to a much lower baud rate than we really want.
+- */
+- *baud = port->uartclk / (16 * (quot - 1));
++ *baud = port->uartclk / (16 * quot);
+ }
+
+ static int __init atmel_console_setup(struct console *co, char *options)
+diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+index c719aa2b18328..db07d6a5d764d 100644
+--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
++++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+@@ -1090,6 +1090,7 @@ static void cpm_put_poll_char(struct uart_port *port,
+ cpm_uart_early_write(pinfo, ch, 1, false);
+ }
+
++#ifdef CONFIG_SERIAL_CPM_CONSOLE
+ static struct uart_port *udbg_port;
+
+ static void udbg_cpm_putc(char c)
+@@ -1114,6 +1115,7 @@ static int udbg_cpm_getc(void)
+ cpu_relax();
+ return c;
+ }
++#endif /* CONFIG_SERIAL_CPM_CONSOLE */
+
+ #endif /* CONFIG_CONSOLE_POLL */
+
+@@ -1245,7 +1247,7 @@ static int cpm_uart_init_port(struct device_node *np,
+ }
+
+ #ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+-#ifdef CONFIG_CONSOLE_POLL
++#if defined(CONFIG_CONSOLE_POLL) && defined(CONFIG_SERIAL_CPM_CONSOLE)
+ if (!udbg_port)
+ #endif
+ udbg_putc = NULL;
+diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c
+index 13ac36e2da4f0..5fea9bf86e85e 100644
+--- a/drivers/tty/serial/digicolor-usart.c
++++ b/drivers/tty/serial/digicolor-usart.c
+@@ -309,6 +309,8 @@ static void digicolor_uart_set_termios(struct uart_port *port,
+ case CS8:
+ default:
+ config |= UA_CONFIG_CHAR_LEN;
++ termios->c_cflag &= ~CSIZE;
++ termios->c_cflag |= CS8;
+ break;
+ }
+
+@@ -471,11 +473,10 @@ static int digicolor_uart_probe(struct platform_device *pdev)
+ if (IS_ERR(uart_clk))
+ return PTR_ERR(uart_clk);
+
+- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- dp->port.mapbase = res->start;
+- dp->port.membase = devm_ioremap_resource(&pdev->dev, res);
++ dp->port.membase = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(dp->port.membase))
+ return PTR_ERR(dp->port.membase);
++ dp->port.mapbase = res->start;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
+index b1e7190ae4836..e0d576b88d7d5 100644
+--- a/drivers/tty/serial/fsl_lpuart.c
++++ b/drivers/tty/serial/fsl_lpuart.c
+@@ -12,6 +12,7 @@
+ #include <linux/dmaengine.h>
+ #include <linux/dmapool.h>
+ #include <linux/io.h>
++#include <linux/iopoll.h>
+ #include <linux/irq.h>
+ #include <linux/module.h>
+ #include <linux/of.h>
+@@ -239,14 +240,13 @@
+ /* IMX lpuart has four extra unused regs located at the beginning */
+ #define IMX_REG_OFF 0x10
+
+-static DEFINE_IDA(fsl_lpuart_ida);
+-
+ enum lpuart_type {
+ VF610_LPUART,
+ LS1021A_LPUART,
+ LS1028A_LPUART,
+ IMX7ULP_LPUART,
+ IMX8QXP_LPUART,
++ IMXRT1050_LPUART,
+ };
+
+ struct lpuart_port {
+@@ -257,6 +257,7 @@ struct lpuart_port {
+ unsigned int txfifo_size;
+ unsigned int rxfifo_size;
+
++ u8 rx_watermark;
+ bool lpuart_dma_tx_use;
+ bool lpuart_dma_rx_use;
+ struct dma_chan *dma_tx_chan;
+@@ -275,40 +276,51 @@ struct lpuart_port {
+ int rx_dma_rng_buf_len;
+ unsigned int dma_tx_nents;
+ wait_queue_head_t dma_wait;
+- bool id_allocated;
+ };
+
+ struct lpuart_soc_data {
+ enum lpuart_type devtype;
+ char iotype;
+ u8 reg_off;
++ u8 rx_watermark;
+ };
+
+ static const struct lpuart_soc_data vf_data = {
+ .devtype = VF610_LPUART,
+ .iotype = UPIO_MEM,
++ .rx_watermark = 1,
+ };
+
+ static const struct lpuart_soc_data ls1021a_data = {
+ .devtype = LS1021A_LPUART,
+ .iotype = UPIO_MEM32BE,
++ .rx_watermark = 1,
+ };
+
+ static const struct lpuart_soc_data ls1028a_data = {
+ .devtype = LS1028A_LPUART,
+ .iotype = UPIO_MEM32,
++ .rx_watermark = 0,
+ };
+
+ static struct lpuart_soc_data imx7ulp_data = {
+ .devtype = IMX7ULP_LPUART,
+ .iotype = UPIO_MEM32,
+ .reg_off = IMX_REG_OFF,
++ .rx_watermark = 1,
+ };
+
+ static struct lpuart_soc_data imx8qxp_data = {
+ .devtype = IMX8QXP_LPUART,
+ .iotype = UPIO_MEM32,
+ .reg_off = IMX_REG_OFF,
++ .rx_watermark = 1,
++};
++static struct lpuart_soc_data imxrt1050_data = {
++ .devtype = IMXRT1050_LPUART,
++ .iotype = UPIO_MEM32,
++ .reg_off = IMX_REG_OFF,
++ .rx_watermark = 1,
+ };
+
+ static const struct of_device_id lpuart_dt_ids[] = {
+@@ -317,6 +329,7 @@ static const struct of_device_id lpuart_dt_ids[] = {
+ { .compatible = "fsl,ls1028a-lpuart", .data = &ls1028a_data, },
+ { .compatible = "fsl,imx7ulp-lpuart", .data = &imx7ulp_data, },
+ { .compatible = "fsl,imx8qxp-lpuart", .data = &imx8qxp_data, },
++ { .compatible = "fsl,imxrt1050-lpuart", .data = &imxrt1050_data},
+ { /* sentinel */ }
+ };
+ MODULE_DEVICE_TABLE(of, lpuart_dt_ids);
+@@ -398,33 +411,6 @@ static unsigned int lpuart_get_baud_clk_rate(struct lpuart_port *sport)
+ #define lpuart_enable_clks(x) __lpuart_enable_clks(x, true)
+ #define lpuart_disable_clks(x) __lpuart_enable_clks(x, false)
+
+-static int lpuart_global_reset(struct lpuart_port *sport)
+-{
+- struct uart_port *port = &sport->port;
+- void __iomem *global_addr;
+- int ret;
+-
+- if (uart_console(port))
+- return 0;
+-
+- ret = clk_prepare_enable(sport->ipg_clk);
+- if (ret) {
+- dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret);
+- return ret;
+- }
+-
+- if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) {
+- global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF;
+- writel(UART_GLOBAL_RST, global_addr);
+- usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US);
+- writel(0, global_addr);
+- usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US);
+- }
+-
+- clk_disable_unprepare(sport->ipg_clk);
+- return 0;
+-}
+-
+ static void lpuart_stop_tx(struct uart_port *port)
+ {
+ unsigned char temp;
+@@ -602,7 +588,7 @@ static void lpuart_flush_buffer(struct uart_port *port)
+ sport->dma_tx_nents, DMA_TO_DEVICE);
+ sport->dma_tx_in_progress = false;
+ }
+- dmaengine_terminate_all(chan);
++ dmaengine_terminate_async(chan);
+ }
+
+ if (lpuart_is_32(sport)) {
+@@ -858,11 +844,17 @@ static unsigned int lpuart32_tx_empty(struct uart_port *port)
+ struct lpuart_port, port);
+ unsigned long stat = lpuart32_read(port, UARTSTAT);
+ unsigned long sfifo = lpuart32_read(port, UARTFIFO);
++ unsigned long ctrl = lpuart32_read(port, UARTCTRL);
+
+ if (sport->dma_tx_in_progress)
+ return 0;
+
+- if (stat & UARTSTAT_TC && sfifo & UARTFIFO_TXEMPT)
++ /*
++ * LPUART Transmission Complete Flag may never be set while queuing a break
++ * character, so avoid checking for transmission complete when UARTCTRL_SBK
++ * is asserted.
++ */
++ if ((stat & UARTSTAT_TC && sfifo & UARTFIFO_TXEMPT) || ctrl & UARTCTRL_SBK)
+ return TIOCSER_TEMT;
+
+ return 0;
+@@ -985,12 +977,12 @@ static void lpuart32_rxint(struct lpuart_port *sport)
+
+ if (sr & (UARTSTAT_PE | UARTSTAT_OR | UARTSTAT_FE)) {
+ if (sr & UARTSTAT_PE) {
++ sport->port.icount.parity++;
++ } else if (sr & UARTSTAT_FE) {
+ if (is_break)
+ sport->port.icount.brk++;
+ else
+- sport->port.icount.parity++;
+- } else if (sr & UARTSTAT_FE) {
+- sport->port.icount.frame++;
++ sport->port.icount.frame++;
+ }
+
+ if (sr & UARTSTAT_OR)
+@@ -1005,12 +997,12 @@ static void lpuart32_rxint(struct lpuart_port *sport)
+ sr &= sport->port.read_status_mask;
+
+ if (sr & UARTSTAT_PE) {
++ flg = TTY_PARITY;
++ } else if (sr & UARTSTAT_FE) {
+ if (is_break)
+ flg = TTY_BREAK;
+ else
+- flg = TTY_PARITY;
+- } else if (sr & UARTSTAT_FE) {
+- flg = TTY_FRAME;
++ flg = TTY_FRAME;
+ }
+
+ if (sr & UARTSTAT_OR)
+@@ -1115,8 +1107,8 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
+ unsigned long sr = lpuart32_read(&sport->port, UARTSTAT);
+
+ if (sr & (UARTSTAT_PE | UARTSTAT_FE)) {
+- /* Read DR to clear the error flags */
+- lpuart32_read(&sport->port, UARTDATA);
++ /* Clear the error flags */
++ lpuart32_write(&sport->port, sr, UARTSTAT);
+
+ if (sr & UARTSTAT_PE)
+ sport->port.icount.parity++;
+@@ -1273,7 +1265,7 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
+ * 10ms at any baud rate.
+ */
+ sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud / bits / 1000) * 2;
+- sport->rx_dma_rng_buf_len = (1 << (fls(sport->rx_dma_rng_buf_len) - 1));
++ sport->rx_dma_rng_buf_len = (1 << fls(sport->rx_dma_rng_buf_len));
+ if (sport->rx_dma_rng_buf_len < 16)
+ sport->rx_dma_rng_buf_len = 16;
+
+@@ -1336,7 +1328,8 @@ static void lpuart_dma_rx_free(struct uart_port *port)
+ struct lpuart_port, port);
+ struct dma_chan *chan = sport->dma_rx_chan;
+
+- dmaengine_terminate_all(chan);
++ dmaengine_terminate_sync(chan);
++ del_timer_sync(&sport->lpuart_timer);
+ dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+ kfree(sport->rx_ring.buf);
+ sport->rx_ring.tail = 0;
+@@ -1384,9 +1377,9 @@ static int lpuart_config_rs485(struct uart_port *port,
+ * Note: UART is assumed to be active high.
+ */
+ if (rs485->flags & SER_RS485_RTS_ON_SEND)
+- modem &= ~UARTMODEM_TXRTSPOL;
+- else if (rs485->flags & SER_RS485_RTS_AFTER_SEND)
+ modem |= UARTMODEM_TXRTSPOL;
++ else if (rs485->flags & SER_RS485_RTS_AFTER_SEND)
++ modem &= ~UARTMODEM_TXRTSPOL;
+ }
+
+ /* Store the new configuration */
+@@ -1435,9 +1428,9 @@ static int lpuart32_config_rs485(struct uart_port *port,
+ * Note: UART is assumed to be active high.
+ */
+ if (rs485->flags & SER_RS485_RTS_ON_SEND)
+- modem &= ~UARTMODEM_TXRTSPOL;
+- else if (rs485->flags & SER_RS485_RTS_AFTER_SEND)
+ modem |= UARTMODEM_TXRTSPOL;
++ else if (rs485->flags & SER_RS485_RTS_AFTER_SEND)
++ modem &= ~UARTMODEM_TXRTSPOL;
+ }
+
+ /* Store the new configuration */
+@@ -1515,12 +1508,34 @@ static void lpuart32_break_ctl(struct uart_port *port, int break_state)
+ {
+ unsigned long temp;
+
+- temp = lpuart32_read(port, UARTCTRL) & ~UARTCTRL_SBK;
+-
+- if (break_state != 0)
+- temp |= UARTCTRL_SBK;
++ temp = lpuart32_read(port, UARTCTRL);
+
+- lpuart32_write(port, temp, UARTCTRL);
++ /*
++ * LPUART IP now has two known bugs, one is CTS has higher priority than the
++ * break signal, which causes the break signal sending through UARTCTRL_SBK
++ * may impacted by the CTS input if the HW flow control is enabled. It
++ * exists on all platforms we support in this driver.
++ * Another bug is i.MX8QM LPUART may have an additional break character
++ * being sent after SBK was cleared.
++ * To avoid above two bugs, we use Transmit Data Inversion function to send
++ * the break signal instead of UARTCTRL_SBK.
++ */
++ if (break_state != 0) {
++ /*
++ * Disable the transmitter to prevent any data from being sent out
++ * during break, then invert the TX line to send break.
++ */
++ temp &= ~UARTCTRL_TE;
++ lpuart32_write(port, temp, UARTCTRL);
++ temp |= UARTCTRL_TXINV;
++ lpuart32_write(port, temp, UARTCTRL);
++ } else {
++ /* Disable the TXINV to turn off break and re-enable transmitter. */
++ temp &= ~UARTCTRL_TXINV;
++ lpuart32_write(port, temp, UARTCTRL);
++ temp |= UARTCTRL_TE;
++ lpuart32_write(port, temp, UARTCTRL);
++ }
+ }
+
+ static void lpuart_setup_watermark(struct lpuart_port *sport)
+@@ -1549,7 +1564,7 @@ static void lpuart_setup_watermark(struct lpuart_port *sport)
+ }
+
+ writeb(0, sport->port.membase + UARTTWFIFO);
+- writeb(1, sport->port.membase + UARTRWFIFO);
++ writeb(sport->rx_watermark, sport->port.membase + UARTRWFIFO);
+
+ /* Restore cr2 */
+ writeb(cr2_saved, sport->port.membase + UARTCR2);
+@@ -1584,7 +1599,8 @@ static void lpuart32_setup_watermark(struct lpuart_port *sport)
+ lpuart32_write(&sport->port, val, UARTFIFO);
+
+ /* set the watermark */
+- val = (0x1 << UARTWATER_RXWATER_OFF) | (0x0 << UARTWATER_TXWATER_OFF);
++ val = (sport->rx_watermark << UARTWATER_RXWATER_OFF) |
++ (0x0 << UARTWATER_TXWATER_OFF);
+ lpuart32_write(&sport->port, val, UARTWATER);
+
+ /* Restore cr2 */
+@@ -1729,12 +1745,6 @@ static void lpuart32_configure(struct lpuart_port *sport)
+ {
+ unsigned long temp;
+
+- if (sport->lpuart_dma_rx_use) {
+- /* RXWATER must be 0 */
+- temp = lpuart32_read(&sport->port, UARTWATER);
+- temp &= ~(UARTWATER_WATER_MASK << UARTWATER_RXWATER_OFF);
+- lpuart32_write(&sport->port, temp, UARTWATER);
+- }
+ temp = lpuart32_read(&sport->port, UARTCTRL);
+ if (!sport->lpuart_dma_rx_use)
+ temp |= UARTCTRL_RIE;
+@@ -1788,16 +1798,17 @@ static int lpuart32_startup(struct uart_port *port)
+ static void lpuart_dma_shutdown(struct lpuart_port *sport)
+ {
+ if (sport->lpuart_dma_rx_use) {
+- del_timer_sync(&sport->lpuart_timer);
+ lpuart_dma_rx_free(&sport->port);
++ sport->lpuart_dma_rx_use = false;
+ }
+
+ if (sport->lpuart_dma_tx_use) {
+ if (wait_event_interruptible(sport->dma_wait,
+ !sport->dma_tx_in_progress) != false) {
+ sport->dma_tx_in_progress = false;
+- dmaengine_terminate_all(sport->dma_tx_chan);
++ dmaengine_terminate_sync(sport->dma_tx_chan);
+ }
++ sport->lpuart_dma_tx_use = false;
+ }
+
+ if (sport->dma_tx_chan)
+@@ -1834,6 +1845,15 @@ static void lpuart32_shutdown(struct uart_port *port)
+
+ spin_lock_irqsave(&port->lock, flags);
+
++ /* clear status */
++ temp = lpuart32_read(&sport->port, UARTSTAT);
++ lpuart32_write(&sport->port, temp, UARTSTAT);
++
++ /* disable Rx/Tx DMA */
++ temp = lpuart32_read(port, UARTBAUD);
++ temp &= ~(UARTBAUD_TDMAE | UARTBAUD_RDMAE);
++ lpuart32_write(port, temp, UARTBAUD);
++
+ /* disable Rx/Tx and interrupts */
+ temp = lpuart32_read(port, UARTCTRL);
+ temp &= ~(UARTCTRL_TE | UARTCTRL_RE |
+@@ -1937,10 +1957,8 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios,
+ * Since timer function acqures sport->port.lock, need to stop before
+ * acquring same lock because otherwise del_timer_sync() can deadlock.
+ */
+- if (old && sport->lpuart_dma_rx_use) {
+- del_timer_sync(&sport->lpuart_timer);
++ if (old && sport->lpuart_dma_rx_use)
+ lpuart_dma_rx_free(&sport->port);
+- }
+
+ spin_lock_irqsave(&sport->port.lock, flags);
+
+@@ -2175,10 +2193,8 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
+ * Since timer function acqures sport->port.lock, need to stop before
+ * acquring same lock because otherwise del_timer_sync() can deadlock.
+ */
+- if (old && sport->lpuart_dma_rx_use) {
+- del_timer_sync(&sport->lpuart_timer);
++ if (old && sport->lpuart_dma_rx_use)
+ lpuart_dma_rx_free(&sport->port);
+- }
+
+ spin_lock_irqsave(&sport->port.lock, flags);
+
+@@ -2205,8 +2221,15 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
+ /* update the per-port timeout */
+ uart_update_timeout(port, termios->c_cflag, baud);
+
+- /* wait transmit engin complete */
+- lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
++ /*
++ * LPUART Transmission Complete Flag may never be set while queuing a break
++ * character, so skip waiting for transmission complete when UARTCTRL_SBK is
++ * asserted.
++ */
++ if (!(old_ctrl & UARTCTRL_SBK)) {
++ lpuart32_write(&sport->port, 0, UARTMODIR);
++ lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
++ }
+
+ /* disable transmit and receive */
+ lpuart32_write(&sport->port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE),
+@@ -2625,6 +2648,9 @@ OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup);
+ OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup);
+ OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1028a-lpuart", ls1028a_early_console_setup);
+ OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup);
++OF_EARLYCON_DECLARE(lpuart32, "fsl,imx8ulp-lpuart", lpuart32_imx_early_console_setup);
++OF_EARLYCON_DECLARE(lpuart32, "fsl,imx8qxp-lpuart", lpuart32_imx_early_console_setup);
++OF_EARLYCON_DECLARE(lpuart32, "fsl,imxrt1050-lpuart", lpuart32_imx_early_console_setup);
+ EARLYCON_DECLARE(lpuart, lpuart_early_console_setup);
+ EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup);
+
+@@ -2643,12 +2669,66 @@ static struct uart_driver lpuart_reg = {
+ .cons = LPUART_CONSOLE,
+ };
+
++static const struct serial_rs485 lpuart_rs485_supported = {
++ .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RTS_AFTER_SEND,
++ /* delay_rts_* and RX_DURING_TX are not supported */
++};
++
++static int lpuart_global_reset(struct lpuart_port *sport)
++{
++ struct uart_port *port = &sport->port;
++ void __iomem *global_addr;
++ unsigned long ctrl, bd;
++ unsigned int val = 0;
++ int ret;
++
++ ret = clk_prepare_enable(sport->ipg_clk);
++ if (ret) {
++ dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret);
++ return ret;
++ }
++
++ if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) {
++ /*
++ * If the transmitter is used by earlycon, wait for transmit engine to
++ * complete and then reset.
++ */
++ ctrl = lpuart32_read(port, UARTCTRL);
++ if (ctrl & UARTCTRL_TE) {
++ bd = lpuart32_read(&sport->port, UARTBAUD);
++ if (read_poll_timeout(lpuart32_tx_empty, val, val, 1, 100000, false,
++ port)) {
++ dev_warn(sport->port.dev,
++ "timeout waiting for transmit engine to complete\n");
++ clk_disable_unprepare(sport->ipg_clk);
++ return 0;
++ }
++ }
++
++ global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF;
++ writel(UART_GLOBAL_RST, global_addr);
++ usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US);
++ writel(0, global_addr);
++ usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US);
++
++ /* Recover the transmitter for earlycon. */
++ if (ctrl & UARTCTRL_TE) {
++ lpuart32_write(port, bd, UARTBAUD);
++ lpuart32_write(port, ctrl, UARTCTRL);
++ }
++ }
++
++ clk_disable_unprepare(sport->ipg_clk);
++ return 0;
++}
++
+ static int lpuart_probe(struct platform_device *pdev)
+ {
+ const struct lpuart_soc_data *sdata = of_device_get_match_data(&pdev->dev);
+ struct device_node *np = pdev->dev.of_node;
+ struct lpuart_port *sport;
+ struct resource *res;
++ irq_handler_t handler;
+ int ret;
+
+ sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL);
+@@ -2665,6 +2745,7 @@ static int lpuart_probe(struct platform_device *pdev)
+ sport->port.dev = &pdev->dev;
+ sport->port.type = PORT_LPUART;
+ sport->devtype = sdata->devtype;
++ sport->rx_watermark = sdata->rx_watermark;
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+ return ret;
+@@ -2681,6 +2762,7 @@ static int lpuart_probe(struct platform_device *pdev)
+ sport->port.rs485_config = lpuart32_config_rs485;
+ else
+ sport->port.rs485_config = lpuart_config_rs485;
++ sport->port.rs485_supported = &lpuart_rs485_supported;
+
+ sport->ipg_clk = devm_clk_get(&pdev->dev, "ipg");
+ if (IS_ERR(sport->ipg_clk)) {
+@@ -2701,23 +2783,18 @@ static int lpuart_probe(struct platform_device *pdev)
+
+ ret = of_alias_get_id(np, "serial");
+ if (ret < 0) {
+- ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL);
+- if (ret < 0) {
+- dev_err(&pdev->dev, "port line is full, add device failed\n");
+- return ret;
+- }
+- sport->id_allocated = true;
++ dev_err(&pdev->dev, "failed to get alias id, errno %d\n", ret);
++ return ret;
+ }
+ if (ret >= ARRAY_SIZE(lpuart_ports)) {
+ dev_err(&pdev->dev, "serial%d out of range\n", ret);
+- ret = -EINVAL;
+- goto failed_out_of_range;
++ return -EINVAL;
+ }
+ sport->port.line = ret;
+
+ ret = lpuart_enable_clks(sport);
+ if (ret)
+- goto failed_clock_enable;
++ return ret;
+ sport->port.uartclk = lpuart_get_baud_clk_rate(sport);
+
+ lpuart_ports[sport->port.line] = sport;
+@@ -2726,21 +2803,12 @@ static int lpuart_probe(struct platform_device *pdev)
+
+ if (lpuart_is_32(sport)) {
+ lpuart_reg.cons = LPUART32_CONSOLE;
+- ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart32_int, 0,
+- DRIVER_NAME, sport);
++ handler = lpuart32_int;
+ } else {
+ lpuart_reg.cons = LPUART_CONSOLE;
+- ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart_int, 0,
+- DRIVER_NAME, sport);
++ handler = lpuart_int;
+ }
+
+- if (ret)
+- goto failed_irq_request;
+-
+- ret = uart_add_one_port(&lpuart_reg, &sport->port);
+- if (ret)
+- goto failed_attach_port;
+-
+ ret = lpuart_global_reset(sport);
+ if (ret)
+ goto failed_reset;
+@@ -2756,20 +2824,23 @@ static int lpuart_probe(struct platform_device *pdev)
+ sport->port.rs485.delay_rts_after_send)
+ dev_err(&pdev->dev, "driver doesn't support RTS delays\n");
+
+- sport->port.rs485_config(&sport->port, &sport->port.rs485);
++ ret = uart_add_one_port(&lpuart_reg, &sport->port);
++ if (ret)
++ goto failed_attach_port;
++
++ ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0,
++ DRIVER_NAME, sport);
++ if (ret)
++ goto failed_irq_request;
+
+ return 0;
+
+-failed_get_rs485:
+-failed_reset:
++failed_irq_request:
+ uart_remove_one_port(&lpuart_reg, &sport->port);
+ failed_attach_port:
+-failed_irq_request:
++failed_get_rs485:
++failed_reset:
+ lpuart_disable_clks(sport);
+-failed_clock_enable:
+-failed_out_of_range:
+- if (sport->id_allocated)
+- ida_simple_remove(&fsl_lpuart_ida, sport->port.line);
+ return ret;
+ }
+
+@@ -2779,9 +2850,6 @@ static int lpuart_remove(struct platform_device *pdev)
+
+ uart_remove_one_port(&lpuart_reg, &sport->port);
+
+- if (sport->id_allocated)
+- ida_simple_remove(&fsl_lpuart_ida, sport->port.line);
+-
+ lpuart_disable_clks(sport);
+
+ if (sport->dma_tx_chan)
+@@ -2821,11 +2889,10 @@ static int __maybe_unused lpuart_suspend(struct device *dev)
+ * EDMA driver during suspend will forcefully release any
+ * non-idle DMA channels. If port wakeup is enabled or if port
+ * is console port or 'no_console_suspend' is set the Rx DMA
+- * cannot resume as as expected, hence gracefully release the
++ * cannot resume as expected, hence gracefully release the
+ * Rx DMA path before suspend and start Rx DMA path on resume.
+ */
+ if (irq_wake) {
+- del_timer_sync(&sport->lpuart_timer);
+ lpuart_dma_rx_free(&sport->port);
+ }
+
+@@ -2911,7 +2978,6 @@ static int __init lpuart_serial_init(void)
+
+ static void __exit lpuart_serial_exit(void)
+ {
+- ida_destroy(&fsl_lpuart_ida);
+ platform_driver_unregister(&lpuart_driver);
+ uart_unregister_driver(&lpuart_reg);
+ }
+diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c
+index 03a2fe9f4c9a9..02b375ba2f078 100644
+--- a/drivers/tty/serial/icom.c
++++ b/drivers/tty/serial/icom.c
+@@ -1501,7 +1501,7 @@ static int icom_probe(struct pci_dev *dev,
+ retval = pci_read_config_dword(dev, PCI_COMMAND, &command_reg);
+ if (retval) {
+ dev_err(&dev->dev, "PCI Config read FAILED\n");
+- return retval;
++ goto probe_exit0;
+ }
+
+ pci_write_config_dword(dev, PCI_COMMAND,
+diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
+index 8b121cd869e94..3b2beb98eb1e9 100644
+--- a/drivers/tty/serial/imx.c
++++ b/drivers/tty/serial/imx.c
+@@ -380,8 +380,7 @@ static void imx_uart_rts_active(struct imx_port *sport, u32 *ucr2)
+ {
+ *ucr2 &= ~(UCR2_CTSC | UCR2_CTS);
+
+- sport->port.mctrl |= TIOCM_RTS;
+- mctrl_gpio_set(sport->gpios, sport->port.mctrl);
++ mctrl_gpio_set(sport->gpios, sport->port.mctrl | TIOCM_RTS);
+ }
+
+ /* called with port.lock taken and irqs caller dependent */
+@@ -390,8 +389,7 @@ static void imx_uart_rts_inactive(struct imx_port *sport, u32 *ucr2)
+ *ucr2 &= ~UCR2_CTSC;
+ *ucr2 |= UCR2_CTS;
+
+- sport->port.mctrl &= ~TIOCM_RTS;
+- mctrl_gpio_set(sport->gpios, sport->port.mctrl);
++ mctrl_gpio_set(sport->gpios, sport->port.mctrl & ~TIOCM_RTS);
+ }
+
+ static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec)
+@@ -399,6 +397,16 @@ static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec)
+ hrtimer_start(hrt, ms_to_ktime(msec), HRTIMER_MODE_REL);
+ }
+
++static void imx_uart_disable_loopback_rs485(struct imx_port *sport)
++{
++ unsigned int uts;
++
++ /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
++ uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
++ uts &= ~UTS_LOOP;
++ imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
++}
++
+ /* called with port.lock taken and irqs off */
+ static void imx_uart_start_rx(struct uart_port *port)
+ {
+@@ -420,6 +428,7 @@ static void imx_uart_start_rx(struct uart_port *port)
+ /* Write UCR2 first as it includes RXEN */
+ imx_uart_writel(sport, ucr2, UCR2);
+ imx_uart_writel(sport, ucr1, UCR1);
++ imx_uart_disable_loopback_rs485(sport);
+ }
+
+ /* called with port.lock taken and irqs off */
+@@ -486,20 +495,34 @@ static void imx_uart_stop_tx(struct uart_port *port)
+ static void imx_uart_stop_rx(struct uart_port *port)
+ {
+ struct imx_port *sport = (struct imx_port *)port;
+- u32 ucr1, ucr2;
++ u32 ucr1, ucr2, ucr4, uts;
+
+ ucr1 = imx_uart_readl(sport, UCR1);
+ ucr2 = imx_uart_readl(sport, UCR2);
++ ucr4 = imx_uart_readl(sport, UCR4);
+
+ if (sport->dma_is_enabled) {
+ ucr1 &= ~(UCR1_RXDMAEN | UCR1_ATDMAEN);
+ } else {
+ ucr1 &= ~UCR1_RRDYEN;
+ ucr2 &= ~UCR2_ATEN;
++ ucr4 &= ~UCR4_OREN;
+ }
+ imx_uart_writel(sport, ucr1, UCR1);
++ imx_uart_writel(sport, ucr4, UCR4);
++
++ /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
++ if (port->rs485.flags & SER_RS485_ENABLED &&
++ port->rs485.flags & SER_RS485_RTS_ON_SEND &&
++ sport->have_rtscts && !sport->have_rtsgpio) {
++ uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
++ uts |= UTS_LOOP;
++ imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
++ ucr2 |= UCR2_RXEN;
++ } else {
++ ucr2 &= ~UCR2_RXEN;
++ }
+
+- ucr2 &= ~UCR2_RXEN;
+ imx_uart_writel(sport, ucr2, UCR2);
+ }
+
+@@ -1435,7 +1458,7 @@ static int imx_uart_startup(struct uart_port *port)
+ imx_uart_writel(sport, ucr1, UCR1);
+
+ ucr4 = imx_uart_readl(sport, UCR4) & ~(UCR4_OREN | UCR4_INVR);
+- if (!sport->dma_is_enabled)
++ if (!dma_is_inited)
+ ucr4 |= UCR4_OREN;
+ if (sport->inverted_rx)
+ ucr4 |= UCR4_INVR;
+@@ -1487,6 +1510,8 @@ static int imx_uart_startup(struct uart_port *port)
+ imx_uart_writel(sport, ucr2, UCR2);
+ }
+
++ imx_uart_disable_loopback_rs485(sport);
++
+ spin_unlock_irqrestore(&sport->port.lock, flags);
+
+ return 0;
+@@ -1496,7 +1521,7 @@ static void imx_uart_shutdown(struct uart_port *port)
+ {
+ struct imx_port *sport = (struct imx_port *)port;
+ unsigned long flags;
+- u32 ucr1, ucr2, ucr4;
++ u32 ucr1, ucr2, ucr4, uts;
+
+ if (sport->dma_is_enabled) {
+ dmaengine_terminate_sync(sport->dma_chan_tx);
+@@ -1540,11 +1565,22 @@ static void imx_uart_shutdown(struct uart_port *port)
+ spin_lock_irqsave(&sport->port.lock, flags);
+
+ ucr1 = imx_uart_readl(sport, UCR1);
+- ucr1 &= ~(UCR1_TRDYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN | UCR1_RXDMAEN | UCR1_ATDMAEN);
++ ucr1 &= ~(UCR1_TRDYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_RXDMAEN | UCR1_ATDMAEN);
++ /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
++ if (port->rs485.flags & SER_RS485_ENABLED &&
++ port->rs485.flags & SER_RS485_RTS_ON_SEND &&
++ sport->have_rtscts && !sport->have_rtsgpio) {
++ uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
++ uts |= UTS_LOOP;
++ imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
++ ucr1 |= UCR1_UARTEN;
++ } else {
++ ucr1 &= ~UCR1_UARTEN;
++ }
+ imx_uart_writel(sport, ucr1, UCR1);
+
+ ucr4 = imx_uart_readl(sport, UCR4);
+- ucr4 &= ~(UCR4_OREN | UCR4_TCEN);
++ ucr4 &= ~UCR4_TCEN;
+ imx_uart_writel(sport, ucr4, UCR4);
+
+ spin_unlock_irqrestore(&sport->port.lock, flags);
+@@ -2017,7 +2053,7 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count)
+ * If the port was already initialised (eg, by a boot loader),
+ * try to determine the current setup.
+ */
+-static void __init
++static void
+ imx_uart_console_get_options(struct imx_port *sport, int *baud,
+ int *parity, int *bits)
+ {
+@@ -2076,7 +2112,7 @@ imx_uart_console_get_options(struct imx_port *sport, int *baud,
+ }
+ }
+
+-static int __init
++static int
+ imx_uart_console_setup(struct console *co, char *options)
+ {
+ struct imx_port *sport;
+@@ -2188,7 +2224,7 @@ static int imx_uart_probe(struct platform_device *pdev)
+ void __iomem *base;
+ u32 dma_buf_conf[2];
+ int ret = 0;
+- u32 ucr1;
++ u32 ucr1, ucr2, uts;
+ struct resource *res;
+ int txirq, rxirq, rtsirq;
+
+@@ -2315,13 +2351,41 @@ static int imx_uart_probe(struct platform_device *pdev)
+ dev_err(&pdev->dev,
+ "low-active RTS not possible when receiver is off, enabling receiver\n");
+
+- imx_uart_rs485_config(&sport->port, &sport->port.rs485);
+-
+ /* Disable interrupts before requesting them */
+ ucr1 = imx_uart_readl(sport, UCR1);
+ ucr1 &= ~(UCR1_ADEN | UCR1_TRDYEN | UCR1_IDEN | UCR1_RRDYEN | UCR1_RTSDEN);
+ imx_uart_writel(sport, ucr1, UCR1);
+
++ /* Disable Ageing Timer interrupt */
++ ucr2 = imx_uart_readl(sport, UCR2);
++ ucr2 &= ~UCR2_ATEN;
++ imx_uart_writel(sport, ucr2, UCR2);
++
++ /*
++ * In case RS485 is enabled without GPIO RTS control, the UART IP
++ * is used to control CTS signal. Keep both the UART and Receiver
++ * enabled, otherwise the UART IP pulls CTS signal always HIGH no
++ * matter how the UCR2 CTSC and CTS bits are set. To prevent any
++ * data from being fed into the RX FIFO, enable loopback mode in
++ * UTS register, which disconnects the RX path from external RXD
++ * pin and connects it to the Transceiver, which is disabled, so
++ * no data can be fed to the RX FIFO that way.
++ */
++ if (sport->port.rs485.flags & SER_RS485_ENABLED &&
++ sport->have_rtscts && !sport->have_rtsgpio) {
++ uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
++ uts |= UTS_LOOP;
++ imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
++
++ ucr1 = imx_uart_readl(sport, UCR1);
++ ucr1 |= UCR1_UARTEN;
++ imx_uart_writel(sport, ucr1, UCR1);
++
++ ucr2 = imx_uart_readl(sport, UCR2);
++ ucr2 |= UCR2_RXEN;
++ imx_uart_writel(sport, ucr2, UCR2);
++ }
++
+ if (!imx_uart_is_imx1(sport) && sport->dte_mode) {
+ /*
+ * The DCEDTE bit changes the direction of DSR, DCD, DTR and RI
+@@ -2564,6 +2628,7 @@ static const struct dev_pm_ops imx_uart_pm_ops = {
+ .suspend_noirq = imx_uart_suspend_noirq,
+ .resume_noirq = imx_uart_resume_noirq,
+ .freeze_noirq = imx_uart_suspend_noirq,
++ .thaw_noirq = imx_uart_resume_noirq,
+ .restore_noirq = imx_uart_resume_noirq,
+ .suspend = imx_uart_suspend,
+ .resume = imx_uart_resume,
+diff --git a/drivers/tty/serial/jsm/jsm_driver.c b/drivers/tty/serial/jsm/jsm_driver.c
+index 0ea799bf8dbb1..417a5b6bffc34 100644
+--- a/drivers/tty/serial/jsm/jsm_driver.c
++++ b/drivers/tty/serial/jsm/jsm_driver.c
+@@ -211,7 +211,8 @@ static int jsm_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ break;
+ default:
+- return -ENXIO;
++ rc = -ENXIO;
++ goto out_kfree_brd;
+ }
+
+ rc = request_irq(brd->irq, brd->bd_ops->intr, IRQF_SHARED, "JSM", brd);
+diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
+index 49d0c7f2b29b8..79b7db8580e05 100644
+--- a/drivers/tty/serial/kgdboc.c
++++ b/drivers/tty/serial/kgdboc.c
+@@ -403,16 +403,16 @@ static int kgdboc_option_setup(char *opt)
+ {
+ if (!opt) {
+ pr_err("config string not provided\n");
+- return -EINVAL;
++ return 1;
+ }
+
+ if (strlen(opt) >= MAX_CONFIG_LEN) {
+ pr_err("config string too long\n");
+- return -ENOSPC;
++ return 1;
+ }
+ strcpy(config, opt);
+
+- return 0;
++ return 1;
+ }
+
+ __setup("kgdboc=", kgdboc_option_setup);
+diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
+index 497b334bc8452..903dc53f43836 100644
+--- a/drivers/tty/serial/lantiq.c
++++ b/drivers/tty/serial/lantiq.c
+@@ -274,6 +274,7 @@ lqasc_err_int(int irq, void *_port)
+ struct ltq_uart_port *ltq_port = to_ltq_uart_port(port);
+
+ spin_lock_irqsave(&ltq_port->lock, flags);
++ __raw_writel(ASC_IRNCR_EIR, port->membase + LTQ_ASC_IRNCR);
+ /* clear any pending interrupts */
+ asc_update_bits(0, ASCWHBSTATE_CLRPE | ASCWHBSTATE_CLRFE |
+ ASCWHBSTATE_CLRROE, port->membase + LTQ_ASC_WHBSTATE);
+diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c
+index dbc0559a91575..7f74bf7bdcff8 100644
+--- a/drivers/tty/serial/liteuart.c
++++ b/drivers/tty/serial/liteuart.c
+@@ -270,8 +270,10 @@ static int liteuart_probe(struct platform_device *pdev)
+
+ /* get membase */
+ port->membase = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+- if (IS_ERR(port->membase))
+- return PTR_ERR(port->membase);
++ if (IS_ERR(port->membase)) {
++ ret = PTR_ERR(port->membase);
++ goto err_erase_id;
++ }
+
+ /* values not from device tree */
+ port->dev = &pdev->dev;
+@@ -285,7 +287,18 @@ static int liteuart_probe(struct platform_device *pdev)
+ port->line = dev_id;
+ spin_lock_init(&port->lock);
+
+- return uart_add_one_port(&liteuart_driver, &uart->port);
++ platform_set_drvdata(pdev, port);
++
++ ret = uart_add_one_port(&liteuart_driver, &uart->port);
++ if (ret)
++ goto err_erase_id;
++
++ return 0;
++
++err_erase_id:
++ xa_erase(&liteuart_array, uart->id);
++
++ return ret;
+ }
+
+ static int liteuart_remove(struct platform_device *pdev)
+@@ -293,6 +306,7 @@ static int liteuart_remove(struct platform_device *pdev)
+ struct uart_port *port = platform_get_drvdata(pdev);
+ struct liteuart_port *uart = to_liteuart_port(port);
+
++ uart_remove_one_port(&liteuart_driver, port);
+ xa_erase(&liteuart_array, uart->id);
+
+ return 0;
+@@ -422,4 +436,4 @@ module_exit(liteuart_exit);
+ MODULE_AUTHOR("Antmicro <www.antmicro.com>");
+ MODULE_DESCRIPTION("LiteUART serial driver");
+ MODULE_LICENSE("GPL v2");
+-MODULE_ALIAS("platform: liteuart");
++MODULE_ALIAS("platform:liteuart");
+diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c
+index b199d78599611..07c4161eb4cc2 100644
+--- a/drivers/tty/serial/lpc32xx_hs.c
++++ b/drivers/tty/serial/lpc32xx_hs.c
+@@ -341,7 +341,7 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
+ LPC32XX_HSUART_IIR(port->membase));
+ port->icount.overrun++;
+ tty_insert_flip_char(tport, 0, TTY_OVERRUN);
+- tty_schedule_flip(tport);
++ tty_flip_buffer_push(tport);
+ }
+
+ /* Data received? */
+diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c
+index efee3935917fc..62e6c1af13445 100644
+--- a/drivers/tty/serial/meson_uart.c
++++ b/drivers/tty/serial/meson_uart.c
+@@ -253,6 +253,14 @@ static const char *meson_uart_type(struct uart_port *port)
+ return (port->type == PORT_MESON) ? "meson_uart" : NULL;
+ }
+
++/*
++ * This function is called only from probe() using a temporary io mapping
++ * in order to perform a reset before setting up the device. Since the
++ * temporarily mapped region was successfully requested, there can be no
++ * console on this port at this time. Hence it is not necessary for this
++ * function to acquire the port->lock. (Since there is no console on this
++ * port at this time, the port->lock is not initialized yet.)
++ */
+ static void meson_uart_reset(struct uart_port *port)
+ {
+ u32 val;
+@@ -267,9 +275,12 @@ static void meson_uart_reset(struct uart_port *port)
+
+ static int meson_uart_startup(struct uart_port *port)
+ {
++ unsigned long flags;
+ u32 val;
+ int ret = 0;
+
++ spin_lock_irqsave(&port->lock, flags);
++
+ val = readl(port->membase + AML_UART_CONTROL);
+ val |= AML_UART_CLEAR_ERR;
+ writel(val, port->membase + AML_UART_CONTROL);
+@@ -285,6 +296,8 @@ static int meson_uart_startup(struct uart_port *port)
+ val = (AML_UART_RECV_IRQ(1) | AML_UART_XMIT_IRQ(port->fifosize / 2));
+ writel(val, port->membase + AML_UART_MISC);
+
++ spin_unlock_irqrestore(&port->lock, flags);
++
+ ret = request_irq(port->irq, meson_uart_interrupt, 0,
+ port->name, port);
+
+diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
+index fcef7a961430b..03ff63438e772 100644
+--- a/drivers/tty/serial/msm_serial.c
++++ b/drivers/tty/serial/msm_serial.c
+@@ -598,6 +598,9 @@ static void msm_start_rx_dma(struct msm_port *msm_port)
+ u32 val;
+ int ret;
+
++ if (IS_ENABLED(CONFIG_CONSOLE_POLL))
++ return;
++
+ if (!dma->chan)
+ return;
+
+@@ -1585,6 +1588,7 @@ static inline struct uart_port *msm_get_port_from_line(unsigned int line)
+ static void __msm_console_write(struct uart_port *port, const char *s,
+ unsigned int count, bool is_uartdm)
+ {
++ unsigned long flags;
+ int i;
+ int num_newlines = 0;
+ bool replaced = false;
+@@ -1602,6 +1606,8 @@ static void __msm_console_write(struct uart_port *port, const char *s,
+ num_newlines++;
+ count += num_newlines;
+
++ local_irq_save(flags);
++
+ if (port->sysrq)
+ locked = 0;
+ else if (oops_in_progress)
+@@ -1647,6 +1653,8 @@ static void __msm_console_write(struct uart_port *port, const char *s,
+
+ if (locked)
+ spin_unlock(&port->lock);
++
++ local_irq_restore(flags);
+ }
+
+ static void msm_console_write(struct console *co, const char *s,
+diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
+index ab226da75f7ba..1074a0fdf7a1c 100644
+--- a/drivers/tty/serial/mvebu-uart.c
++++ b/drivers/tty/serial/mvebu-uart.c
+@@ -237,6 +237,7 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
+ struct tty_port *tport = &port->state->port;
+ unsigned char ch = 0;
+ char flag = 0;
++ int ret;
+
+ do {
+ if (status & STAT_RX_RDY(port)) {
+@@ -249,6 +250,16 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
+ port->icount.parity++;
+ }
+
++ /*
++ * For UART2, error bits are not cleared on buffer read.
++ * This causes interrupt loop and system hang.
++ */
++ if (IS_EXTENDED(port) && (status & STAT_BRK_ERR)) {
++ ret = readl(port->membase + UART_STAT);
++ ret |= STAT_BRK_ERR;
++ writel(ret, port->membase + UART_STAT);
++ }
++
+ if (status & STAT_BRK_DET) {
+ port->icount.brk++;
+ status &= ~(STAT_FRM_ERR | STAT_PAR_ERR);
+@@ -442,13 +453,13 @@ static void mvebu_uart_shutdown(struct uart_port *port)
+ }
+ }
+
+-static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
++static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
+ {
+ unsigned int d_divisor, m_divisor;
+ u32 brdv, osamp;
+
+ if (!port->uartclk)
+- return -EOPNOTSUPP;
++ return 0;
+
+ /*
+ * The baudrate is derived from the UART clock thanks to two divisors:
+@@ -472,7 +483,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
+ osamp &= ~OSAMP_DIVISORS_MASK;
+ writel(osamp, port->membase + UART_OSAMP);
+
+- return 0;
++ return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor);
+ }
+
+ static void mvebu_uart_set_termios(struct uart_port *port,
+@@ -509,15 +520,11 @@ static void mvebu_uart_set_termios(struct uart_port *port,
+ max_baud = 230400;
+
+ baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud);
+- if (mvebu_uart_baud_rate_set(port, baud)) {
+- /* No clock available, baudrate cannot be changed */
+- if (old)
+- baud = uart_get_baud_rate(port, old, NULL,
+- min_baud, max_baud);
+- } else {
+- tty_termios_encode_baud_rate(termios, baud, baud);
+- uart_update_timeout(port, termios->c_cflag, baud);
+- }
++ baud = mvebu_uart_baud_rate_set(port, baud);
++
++ /* In case baudrate cannot be changed, report previous old value */
++ if (baud == 0 && old)
++ baud = tty_termios_baud_rate(old);
+
+ /* Only the following flag changes are supported */
+ if (old) {
+@@ -528,6 +535,11 @@ static void mvebu_uart_set_termios(struct uart_port *port,
+ termios->c_cflag |= CS8;
+ }
+
++ if (baud != 0) {
++ tty_termios_encode_baud_rate(termios, baud, baud);
++ uart_update_timeout(port, termios->c_cflag, baud);
++ }
++
+ spin_unlock_irqrestore(&port->lock, flags);
+ }
+
+diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c
+index 91f1eb0058d7e..9a6611cfc18e9 100644
+--- a/drivers/tty/serial/owl-uart.c
++++ b/drivers/tty/serial/owl-uart.c
+@@ -731,6 +731,7 @@ static int owl_uart_probe(struct platform_device *pdev)
+ owl_port->port.uartclk = clk_get_rate(owl_port->clk);
+ if (owl_port->port.uartclk == 0) {
+ dev_err(&pdev->dev, "clock rate is zero\n");
++ clk_disable_unprepare(owl_port->clk);
+ return -EINVAL;
+ }
+ owl_port->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_LOW_LATENCY;
+diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
+index f0351e6f0ef6d..e783a4225bf04 100644
+--- a/drivers/tty/serial/pch_uart.c
++++ b/drivers/tty/serial/pch_uart.c
+@@ -624,22 +624,6 @@ static int push_rx(struct eg20t_port *priv, const unsigned char *buf,
+ return 0;
+ }
+
+-static int pop_tx_x(struct eg20t_port *priv, unsigned char *buf)
+-{
+- int ret = 0;
+- struct uart_port *port = &priv->port;
+-
+- if (port->x_char) {
+- dev_dbg(priv->port.dev, "%s:X character send %02x (%lu)\n",
+- __func__, port->x_char, jiffies);
+- buf[0] = port->x_char;
+- port->x_char = 0;
+- ret = 1;
+- }
+-
+- return ret;
+-}
+-
+ static int dma_push_rx(struct eg20t_port *priv, int size)
+ {
+ int room;
+@@ -723,6 +707,7 @@ static void pch_request_dma(struct uart_port *port)
+ if (!chan) {
+ dev_err(priv->port.dev, "%s:dma_request_channel FAILS(Tx)\n",
+ __func__);
++ pci_dev_put(dma_dev);
+ return;
+ }
+ priv->chan_tx = chan;
+@@ -739,6 +724,7 @@ static void pch_request_dma(struct uart_port *port)
+ __func__);
+ dma_release_channel(priv->chan_tx);
+ priv->chan_tx = NULL;
++ pci_dev_put(dma_dev);
+ return;
+ }
+
+@@ -746,6 +732,8 @@ static void pch_request_dma(struct uart_port *port)
+ priv->rx_buf_virt = dma_alloc_coherent(port->dev, port->fifosize,
+ &priv->rx_buf_dma, GFP_KERNEL);
+ priv->chan_rx = chan;
++
++ pci_dev_put(dma_dev);
+ }
+
+ static void pch_dma_rx_complete(void *arg)
+@@ -777,7 +765,7 @@ static void pch_dma_tx_complete(void *arg)
+ }
+ xmit->tail &= UART_XMIT_SIZE - 1;
+ async_tx_ack(priv->desc_tx);
+- dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE);
++ dma_unmap_sg(port->dev, priv->sg_tx_p, priv->orig_nent, DMA_TO_DEVICE);
+ priv->tx_dma_use = 0;
+ priv->nent = 0;
+ priv->orig_nent = 0;
+@@ -889,9 +877,10 @@ static unsigned int handle_tx(struct eg20t_port *priv)
+
+ fifo_size = max(priv->fifo_size, 1);
+ tx_empty = 1;
+- if (pop_tx_x(priv, xmit->buf)) {
+- pch_uart_hal_write(priv, xmit->buf, 1);
++ if (port->x_char) {
++ pch_uart_hal_write(priv, &port->x_char, 1);
+ port->icount.tx++;
++ port->x_char = 0;
+ tx_empty = 0;
+ fifo_size--;
+ }
+@@ -946,9 +935,11 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv)
+ }
+
+ fifo_size = max(priv->fifo_size, 1);
+- if (pop_tx_x(priv, xmit->buf)) {
+- pch_uart_hal_write(priv, xmit->buf, 1);
++
++ if (port->x_char) {
++ pch_uart_hal_write(priv, &port->x_char, 1);
+ port->icount.tx++;
++ port->x_char = 0;
+ fifo_size--;
+ }
+
+@@ -1830,7 +1821,7 @@ static void pch_uart_exit_port(struct eg20t_port *priv)
+ char name[32];
+
+ snprintf(name, sizeof(name), "uart%d_regs", priv->port.line);
+- debugfs_remove(debugfs_lookup(name, NULL));
++ debugfs_lookup_and_remove(name, NULL);
+ uart_remove_one_port(&pch_uart_driver, &priv->port);
+ free_page((unsigned long)priv->rxbuf.buf);
+ }
+diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
+index aedc38893e6cf..45010c77fe3a6 100644
+--- a/drivers/tty/serial/qcom_geni_serial.c
++++ b/drivers/tty/serial/qcom_geni_serial.c
+@@ -125,6 +125,7 @@ struct qcom_geni_serial_port {
+ u32 tx_fifo_width;
+ u32 rx_fifo_depth;
+ bool setup;
++ unsigned long clk_rate;
+ int (*handle_rx)(struct uart_port *uport, u32 bytes, bool drop);
+ unsigned int baud;
+ void *rx_fifo;
+@@ -866,9 +867,10 @@ out_unlock:
+ return IRQ_HANDLED;
+ }
+
+-static void get_tx_fifo_size(struct qcom_geni_serial_port *port)
++static int setup_fifos(struct qcom_geni_serial_port *port)
+ {
+ struct uart_port *uport;
++ u32 old_rx_fifo_depth = port->rx_fifo_depth;
+
+ uport = &port->uport;
+ port->tx_fifo_depth = geni_se_get_tx_fifo_depth(&port->se);
+@@ -876,6 +878,16 @@ static void get_tx_fifo_size(struct qcom_geni_serial_port *port)
+ port->rx_fifo_depth = geni_se_get_rx_fifo_depth(&port->se);
+ uport->fifosize =
+ (port->tx_fifo_depth * port->tx_fifo_width) / BITS_PER_BYTE;
++
++ if (port->rx_fifo && (old_rx_fifo_depth != port->rx_fifo_depth) && port->rx_fifo_depth) {
++ port->rx_fifo = devm_krealloc(uport->dev, port->rx_fifo,
++ port->rx_fifo_depth * sizeof(u32),
++ GFP_KERNEL);
++ if (!port->rx_fifo)
++ return -ENOMEM;
++ }
++
++ return 0;
+ }
+
+
+@@ -890,6 +902,7 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport)
+ u32 rxstale = DEFAULT_BITS_PER_CHAR * STALE_TIMEOUT;
+ u32 proto;
+ u32 pin_swap;
++ int ret;
+
+ proto = geni_se_read_proto(&port->se);
+ if (proto != GENI_SE_UART) {
+@@ -899,7 +912,9 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport)
+
+ qcom_geni_serial_stop_rx(uport);
+
+- get_tx_fifo_size(port);
++ ret = setup_fifos(port);
++ if (ret)
++ return ret;
+
+ writel(rxstale, uport->membase + SE_UART_RX_STALE_CNT);
+
+@@ -1008,6 +1023,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
+ goto out_restart_rx;
+
+ uport->uartclk = clk_rate;
++ port->clk_rate = clk_rate;
+ dev_pm_opp_set_rate(uport->dev, clk_rate);
+ ser_clk_cfg = SER_CLK_EN;
+ ser_clk_cfg |= clk_div << CLK_DIV_SHFT;
+@@ -1277,10 +1293,13 @@ static void qcom_geni_serial_pm(struct uart_port *uport,
+
+ if (new_state == UART_PM_STATE_ON && old_state == UART_PM_STATE_OFF) {
+ geni_icc_enable(&port->se);
++ if (port->clk_rate)
++ dev_pm_opp_set_rate(uport->dev, port->clk_rate);
+ geni_se_resources_on(&port->se);
+ } else if (new_state == UART_PM_STATE_OFF &&
+ old_state == UART_PM_STATE_ON) {
+ geni_se_resources_off(&port->se);
++ dev_pm_opp_set_rate(uport->dev, 0);
+ geni_icc_disable(&port->se);
+ }
+ }
+@@ -1429,25 +1448,17 @@ static int qcom_geni_serial_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, port);
+ port->handle_rx = console ? handle_rx_console : handle_rx_uart;
+
+- ret = uart_add_one_port(drv, uport);
+- if (ret)
+- return ret;
+-
+ irq_set_status_flags(uport->irq, IRQ_NOAUTOEN);
+ ret = devm_request_irq(uport->dev, uport->irq, qcom_geni_serial_isr,
+ IRQF_TRIGGER_HIGH, port->name, uport);
+ if (ret) {
+ dev_err(uport->dev, "Failed to get IRQ ret %d\n", ret);
+- uart_remove_one_port(drv, uport);
+ return ret;
+ }
+
+- /*
+- * Set pm_runtime status as ACTIVE so that wakeup_irq gets
+- * enabled/disabled from dev_pm_arm_wake_irq during system
+- * suspend/resume respectively.
+- */
+- pm_runtime_set_active(&pdev->dev);
++ ret = uart_add_one_port(drv, uport);
++ if (ret)
++ return ret;
+
+ if (port->wakeup_irq > 0) {
+ device_init_wakeup(&pdev->dev, true);
+diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c
+index d550d8fa2fabf..a8fe1c3ebcd98 100644
+--- a/drivers/tty/serial/rda-uart.c
++++ b/drivers/tty/serial/rda-uart.c
+@@ -262,6 +262,8 @@ static void rda_uart_set_termios(struct uart_port *port,
+ fallthrough;
+ case CS7:
+ ctrl &= ~RDA_UART_DBITS_8;
++ termios->c_cflag &= ~CSIZE;
++ termios->c_cflag |= CS7;
+ break;
+ default:
+ ctrl |= RDA_UART_DBITS_8;
+diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c
+index 697b6a002a16e..4ddcc985621a8 100644
+--- a/drivers/tty/serial/sa1100.c
++++ b/drivers/tty/serial/sa1100.c
+@@ -446,6 +446,8 @@ sa1100_set_termios(struct uart_port *port, struct ktermios *termios,
+ baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
+ quot = uart_get_divisor(port, baud);
+
++ del_timer_sync(&sport->timer);
++
+ spin_lock_irqsave(&sport->port.lock, flags);
+
+ sport->port.read_status_mask &= UTSR0_TO_SM(UTSR0_TFS);
+@@ -476,8 +478,6 @@ sa1100_set_termios(struct uart_port *port, struct ktermios *termios,
+ UTSR1_TO_SM(UTSR1_ROR);
+ }
+
+- del_timer_sync(&sport->timer);
+-
+ /*
+ * Update the per-port timeout.
+ */
+diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c
+index e2f49863e9c2d..09b2b40b63c4d 100644
+--- a/drivers/tty/serial/samsung_tty.c
++++ b/drivers/tty/serial/samsung_tty.c
+@@ -378,8 +378,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport)
+ /* Enable tx dma mode */
+ ucon = rd_regl(port, S3C2410_UCON);
+ ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK);
+- ucon |= (dma_get_cache_alignment() >= 16) ?
+- S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1;
++ ucon |= S3C64XX_UCON_TXBURST_1;
+ ucon |= S3C64XX_UCON_TXMODE_DMA;
+ wr_regl(port, S3C2410_UCON, ucon);
+
+@@ -675,7 +674,7 @@ static void enable_rx_dma(struct s3c24xx_uart_port *ourport)
+ S3C64XX_UCON_DMASUS_EN |
+ S3C64XX_UCON_TIMEOUT_EN |
+ S3C64XX_UCON_RXMODE_MASK);
+- ucon |= S3C64XX_UCON_RXBURST_16 |
++ ucon |= S3C64XX_UCON_RXBURST_1 |
+ 0xf << S3C64XX_UCON_TIMEOUT_SHIFT |
+ S3C64XX_UCON_EMPTYINT_EN |
+ S3C64XX_UCON_TIMEOUT_EN |
+@@ -922,11 +921,8 @@ static void s3c24xx_serial_tx_chars(struct s3c24xx_uart_port *ourport)
+ return;
+ }
+
+- if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) {
+- spin_unlock(&port->lock);
++ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
+- spin_lock(&port->lock);
+- }
+
+ if (uart_circ_empty(xmit))
+ s3c24xx_serial_stop_tx(port);
+@@ -1482,8 +1478,12 @@ static unsigned int s3c24xx_serial_getclk(struct s3c24xx_uart_port *ourport,
+ continue;
+
+ rate = clk_get_rate(clk);
+- if (!rate)
++ if (!rate) {
++ dev_err(ourport->port.dev,
++ "Failed to get clock rate for %s.\n", clkname);
++ clk_put(clk);
+ continue;
++ }
+
+ if (ourport->info->has_divslot) {
+ unsigned long div = rate / req_baud;
+@@ -1509,10 +1509,18 @@ static unsigned int s3c24xx_serial_getclk(struct s3c24xx_uart_port *ourport,
+ calc_deviation = -calc_deviation;
+
+ if (calc_deviation < deviation) {
++ /*
++ * If we find a better clk, release the previous one, if
++ * any.
++ */
++ if (!IS_ERR(*best_clk))
++ clk_put(*best_clk);
+ *best_clk = clk;
+ best_quot = quot;
+ *clk_num = cnt;
+ deviation = calc_deviation;
++ } else {
++ clk_put(clk);
+ }
+ }
+
+diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
+index acbb615dd28fd..e8f8a94ad606f 100644
+--- a/drivers/tty/serial/sc16is7xx.c
++++ b/drivers/tty/serial/sc16is7xx.c
+@@ -734,12 +734,15 @@ static irqreturn_t sc16is7xx_irq(int irq, void *dev_id)
+ static void sc16is7xx_tx_proc(struct kthread_work *ws)
+ {
+ struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port);
++ struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
+
+ if ((port->rs485.flags & SER_RS485_ENABLED) &&
+ (port->rs485.delay_rts_before_send > 0))
+ msleep(port->rs485.delay_rts_before_send);
+
++ mutex_lock(&s->efr_lock);
+ sc16is7xx_handle_tx(port);
++ mutex_unlock(&s->efr_lock);
+ }
+
+ static void sc16is7xx_reconf_rs485(struct uart_port *port)
+@@ -1167,9 +1170,18 @@ static int sc16is7xx_gpio_direction_output(struct gpio_chip *chip,
+ state |= BIT(offset);
+ else
+ state &= ~BIT(offset);
+- sc16is7xx_port_write(port, SC16IS7XX_IOSTATE_REG, state);
++
++ /*
++ * If we write IOSTATE first, and then IODIR, the output value is not
++ * transferred to the corresponding I/O pin.
++ * The datasheet states that each register bit will be transferred to
++ * the corresponding I/O pin programmed as output when writing to
++ * IOSTATE. Therefore, configure direction first with IODIR, and then
++ * set value after with IOSTATE.
++ */
+ sc16is7xx_port_update(port, SC16IS7XX_IODIR_REG, BIT(offset),
+ BIT(offset));
++ sc16is7xx_port_write(port, SC16IS7XX_IOSTATE_REG, state);
+
+ return 0;
+ }
+@@ -1242,25 +1254,6 @@ static int sc16is7xx_probe(struct device *dev,
+ }
+ sched_set_fifo(s->kworker_task);
+
+-#ifdef CONFIG_GPIOLIB
+- if (devtype->nr_gpio) {
+- /* Setup GPIO cotroller */
+- s->gpio.owner = THIS_MODULE;
+- s->gpio.parent = dev;
+- s->gpio.label = dev_name(dev);
+- s->gpio.direction_input = sc16is7xx_gpio_direction_input;
+- s->gpio.get = sc16is7xx_gpio_get;
+- s->gpio.direction_output = sc16is7xx_gpio_direction_output;
+- s->gpio.set = sc16is7xx_gpio_set;
+- s->gpio.base = -1;
+- s->gpio.ngpio = devtype->nr_gpio;
+- s->gpio.can_sleep = 1;
+- ret = gpiochip_add_data(&s->gpio, s);
+- if (ret)
+- goto out_thread;
+- }
+-#endif
+-
+ /* reset device, purging any pending irq / data */
+ regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT,
+ SC16IS7XX_IOCONTROL_SRESET_BIT);
+@@ -1274,6 +1267,12 @@ static int sc16is7xx_probe(struct device *dev,
+ s->p[i].port.fifosize = SC16IS7XX_FIFO_SIZE;
+ s->p[i].port.flags = UPF_FIXED_TYPE | UPF_LOW_LATENCY;
+ s->p[i].port.iobase = i;
++ /*
++ * Use all ones as membase to make sure uart_configure_port() in
++ * serial_core.c does not abort for SPI/I2C devices where the
++ * membase address is not applicable.
++ */
++ s->p[i].port.membase = (void __iomem *)~0;
+ s->p[i].port.iotype = UPIO_PORT;
+ s->p[i].port.uartclk = freq;
+ s->p[i].port.rs485_config = sc16is7xx_config_rs485;
+@@ -1326,6 +1325,25 @@ static int sc16is7xx_probe(struct device *dev,
+ s->p[u].irda_mode = true;
+ }
+
++#ifdef CONFIG_GPIOLIB
++ if (devtype->nr_gpio) {
++ /* Setup GPIO cotroller */
++ s->gpio.owner = THIS_MODULE;
++ s->gpio.parent = dev;
++ s->gpio.label = dev_name(dev);
++ s->gpio.direction_input = sc16is7xx_gpio_direction_input;
++ s->gpio.get = sc16is7xx_gpio_get;
++ s->gpio.direction_output = sc16is7xx_gpio_direction_output;
++ s->gpio.set = sc16is7xx_gpio_set;
++ s->gpio.base = -1;
++ s->gpio.ngpio = devtype->nr_gpio;
++ s->gpio.can_sleep = 1;
++ ret = gpiochip_add_data(&s->gpio, s);
++ if (ret)
++ goto out_thread;
++ }
++#endif
++
+ /*
+ * Setup interrupt. We first try to acquire the IRQ line as level IRQ.
+ * If that succeeds, we can allow sharing the interrupt as well.
+@@ -1345,18 +1363,19 @@ static int sc16is7xx_probe(struct device *dev,
+ if (!ret)
+ return 0;
+
+-out_ports:
+- for (i--; i >= 0; i--) {
+- uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
+- clear_bit(s->p[i].port.line, &sc16is7xx_lines);
+- }
+-
+ #ifdef CONFIG_GPIOLIB
+ if (devtype->nr_gpio)
+ gpiochip_remove(&s->gpio);
+
+ out_thread:
+ #endif
++
++out_ports:
++ for (i--; i >= 0; i--) {
++ uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port);
++ clear_bit(s->p[i].port.line, &sc16is7xx_lines);
++ }
++
+ kthread_stop(s->kworker_task);
+
+ out_clk:
+diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
+index 45e2e4109acd0..52d5c72227e79 100644
+--- a/drivers/tty/serial/serial-tegra.c
++++ b/drivers/tty/serial/serial-tegra.c
+@@ -525,7 +525,7 @@ static void tegra_uart_tx_dma_complete(void *args)
+ count = tup->tx_bytes_requested - state.residue;
+ async_tx_ack(tup->tx_dma_desc);
+ spin_lock_irqsave(&tup->uport.lock, flags);
+- xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
++ uart_xmit_advance(&tup->uport, count);
+ tup->tx_in_progress = 0;
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(&tup->uport);
+@@ -613,18 +613,18 @@ static unsigned int tegra_uart_tx_empty(struct uart_port *u)
+ static void tegra_uart_stop_tx(struct uart_port *u)
+ {
+ struct tegra_uart_port *tup = to_tegra_uport(u);
+- struct circ_buf *xmit = &tup->uport.state->xmit;
+ struct dma_tx_state state;
+ unsigned int count;
+
+ if (tup->tx_in_progress != TEGRA_UART_TX_DMA)
+ return;
+
+- dmaengine_terminate_all(tup->tx_dma_chan);
++ dmaengine_pause(tup->tx_dma_chan);
+ dmaengine_tx_status(tup->tx_dma_chan, tup->tx_cookie, &state);
++ dmaengine_terminate_all(tup->tx_dma_chan);
+ count = tup->tx_bytes_requested - state.residue;
+ async_tx_ack(tup->tx_dma_desc);
+- xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
++ uart_xmit_advance(&tup->uport, count);
+ tup->tx_in_progress = 0;
+ }
+
+@@ -764,8 +764,9 @@ static void tegra_uart_terminate_rx_dma(struct tegra_uart_port *tup)
+ return;
+ }
+
+- dmaengine_terminate_all(tup->rx_dma_chan);
++ dmaengine_pause(tup->rx_dma_chan);
+ dmaengine_tx_status(tup->rx_dma_chan, tup->rx_cookie, &state);
++ dmaengine_terminate_all(tup->rx_dma_chan);
+
+ tegra_uart_rx_buffer_push(tup, state.residue);
+ tup->rx_dma_active = false;
+@@ -998,7 +999,11 @@ static int tegra_uart_hw_init(struct tegra_uart_port *tup)
+ tup->ier_shadow = 0;
+ tup->current_baud = 0;
+
+- clk_prepare_enable(tup->uart_clk);
++ ret = clk_prepare_enable(tup->uart_clk);
++ if (ret) {
++ dev_err(tup->uport.dev, "could not enable clk\n");
++ return ret;
++ }
+
+ /* Reset the UART controller to clear all previous status.*/
+ reset_control_assert(tup->rst);
+@@ -1046,6 +1051,7 @@ static int tegra_uart_hw_init(struct tegra_uart_port *tup)
+ if (tup->cdata->fifo_mode_enable_status) {
+ ret = tegra_uart_wait_fifo_mode_enabled(tup);
+ if (ret < 0) {
++ clk_disable_unprepare(tup->uart_clk);
+ dev_err(tup->uport.dev,
+ "Failed to enable FIFO mode: %d\n", ret);
+ return ret;
+@@ -1067,6 +1073,7 @@ static int tegra_uart_hw_init(struct tegra_uart_port *tup)
+ */
+ ret = tegra_set_baudrate(tup, TEGRA_UART_DEFAULT_BAUD);
+ if (ret < 0) {
++ clk_disable_unprepare(tup->uart_clk);
+ dev_err(tup->uport.dev, "Failed to set baud rate\n");
+ return ret;
+ }
+@@ -1226,10 +1233,13 @@ static int tegra_uart_startup(struct uart_port *u)
+ dev_name(u->dev), tup);
+ if (ret < 0) {
+ dev_err(u->dev, "Failed to register ISR for IRQ %d\n", u->irq);
+- goto fail_hw_init;
++ goto fail_request_irq;
+ }
+ return 0;
+
++fail_request_irq:
++ /* tup->uart_clk is already enabled in tegra_uart_hw_init */
++ clk_disable_unprepare(tup->uart_clk);
+ fail_hw_init:
+ if (!tup->use_rx_pio)
+ tegra_uart_dma_channel_free(tup, true);
+@@ -1506,7 +1516,7 @@ static struct tegra_uart_chip_data tegra20_uart_chip_data = {
+ .fifo_mode_enable_status = false,
+ .uart_max_port = 5,
+ .max_dma_burst_bytes = 4,
+- .error_tolerance_low_range = 0,
++ .error_tolerance_low_range = -4,
+ .error_tolerance_high_range = 4,
+ };
+
+@@ -1517,7 +1527,7 @@ static struct tegra_uart_chip_data tegra30_uart_chip_data = {
+ .fifo_mode_enable_status = false,
+ .uart_max_port = 5,
+ .max_dma_burst_bytes = 4,
+- .error_tolerance_low_range = 0,
++ .error_tolerance_low_range = -4,
+ .error_tolerance_high_range = 4,
+ };
+
+diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
+index 0e2e35ab64c79..45b721abaa2f5 100644
+--- a/drivers/tty/serial/serial_core.c
++++ b/drivers/tty/serial/serial_core.c
+@@ -42,6 +42,11 @@ static struct lock_class_key port_lock_key;
+
+ #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8)
+
++/*
++ * Max time with active RTS before/after data is sent.
++ */
++#define RS485_MAX_RTS_DELAY 100 /* msecs */
++
+ static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
+ struct ktermios *old_termios);
+ static void uart_wait_until_sent(struct tty_struct *tty, int timeout);
+@@ -147,7 +152,7 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
+ spin_lock_irqsave(&port->lock, flags);
+ old = port->mctrl;
+ port->mctrl = (old & ~clear) | set;
+- if (old != port->mctrl)
++ if (old != port->mctrl && !(port->rs485.flags & SER_RS485_ENABLED))
+ port->ops->set_mctrl(port, port->mctrl);
+ spin_unlock_irqrestore(&port->lock, flags);
+ }
+@@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
+
+ static void uart_port_dtr_rts(struct uart_port *uport, int raise)
+ {
+- int rs485_on = uport->rs485_config &&
+- (uport->rs485.flags & SER_RS485_ENABLED);
+- int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND);
+-
+- if (raise) {
+- if (rs485_on && !RTS_after_send) {
+- uart_set_mctrl(uport, TIOCM_DTR);
+- uart_clear_mctrl(uport, TIOCM_RTS);
+- } else {
+- uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
+- }
+- } else {
+- unsigned int clear = TIOCM_DTR;
+-
+- clear |= (!rs485_on || !RTS_after_send) ? TIOCM_RTS : 0;
+- uart_clear_mctrl(uport, clear);
+- }
++ if (raise)
++ uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
++ else
++ uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
+ }
+
+ /*
+@@ -222,7 +214,11 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
+ if (retval == 0) {
+ if (uart_console(uport) && uport->cons->cflag) {
+ tty->termios.c_cflag = uport->cons->cflag;
++ tty->termios.c_ispeed = uport->cons->ispeed;
++ tty->termios.c_ospeed = uport->cons->ospeed;
+ uport->cons->cflag = 0;
++ uport->cons->ispeed = 0;
++ uport->cons->ospeed = 0;
+ }
+ /*
+ * Initialise the hardware port settings.
+@@ -290,8 +286,11 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state)
+ /*
+ * Turn off DTR and RTS early.
+ */
+- if (uport && uart_console(uport) && tty)
++ if (uport && uart_console(uport) && tty) {
+ uport->cons->cflag = tty->termios.c_cflag;
++ uport->cons->ispeed = tty->termios.c_ispeed;
++ uport->cons->ospeed = tty->termios.c_ospeed;
++ }
+
+ if (!tty || C_HUPCL(tty))
+ uart_port_dtr_rts(uport, 0);
+@@ -645,6 +644,20 @@ static void uart_flush_buffer(struct tty_struct *tty)
+ tty_port_tty_wakeup(&state->port);
+ }
+
++/*
++ * This function performs low-level write of high-priority XON/XOFF
++ * character and accounting for it.
++ *
++ * Requires uart_port to implement .serial_out().
++ */
++void uart_xchar_out(struct uart_port *uport, int offset)
++{
++ serial_port_out(uport, offset, uport->x_char);
++ uport->icount.tx++;
++ uport->x_char = 0;
++}
++EXPORT_SYMBOL_GPL(uart_xchar_out);
++
+ /*
+ * This function is used to send a high-priority XON/XOFF character to
+ * the device
+@@ -1286,8 +1299,41 @@ static int uart_set_rs485_config(struct uart_port *port,
+ if (copy_from_user(&rs485, rs485_user, sizeof(*rs485_user)))
+ return -EFAULT;
+
++ /* pick sane settings if the user hasn't */
++ if (!(rs485.flags & SER_RS485_RTS_ON_SEND) ==
++ !(rs485.flags & SER_RS485_RTS_AFTER_SEND)) {
++ dev_warn_ratelimited(port->dev,
++ "%s (%d): invalid RTS setting, using RTS_ON_SEND instead\n",
++ port->name, port->line);
++ rs485.flags |= SER_RS485_RTS_ON_SEND;
++ rs485.flags &= ~SER_RS485_RTS_AFTER_SEND;
++ }
++
++ if (rs485.delay_rts_before_send > RS485_MAX_RTS_DELAY) {
++ rs485.delay_rts_before_send = RS485_MAX_RTS_DELAY;
++ dev_warn_ratelimited(port->dev,
++ "%s (%d): RTS delay before sending clamped to %u ms\n",
++ port->name, port->line, rs485.delay_rts_before_send);
++ }
++
++ if (rs485.delay_rts_after_send > RS485_MAX_RTS_DELAY) {
++ rs485.delay_rts_after_send = RS485_MAX_RTS_DELAY;
++ dev_warn_ratelimited(port->dev,
++ "%s (%d): RTS delay after sending clamped to %u ms\n",
++ port->name, port->line, rs485.delay_rts_after_send);
++ }
++ /* Return clean padding area to userspace */
++ memset(rs485.padding, 0, sizeof(rs485.padding));
++
+ spin_lock_irqsave(&port->lock, flags);
+ ret = port->rs485_config(port, &rs485);
++ if (!ret) {
++ port->rs485 = rs485;
++
++ /* Reset RTS and other mctrl lines when disabling RS485 */
++ if (!(rs485.flags & SER_RS485_ENABLED))
++ port->ops->set_mctrl(port, port->mctrl);
++ }
+ spin_unlock_irqrestore(&port->lock, flags);
+ if (ret)
+ return ret;
+@@ -1542,6 +1588,7 @@ static void uart_tty_port_shutdown(struct tty_port *port)
+ {
+ struct uart_state *state = container_of(port, struct uart_state, port);
+ struct uart_port *uport = uart_port_check(state);
++ char *buf;
+
+ /*
+ * At this point, we stop accepting input. To do this, we
+@@ -1563,8 +1610,18 @@ static void uart_tty_port_shutdown(struct tty_port *port)
+ */
+ tty_port_set_suspended(port, 0);
+
+- uart_change_pm(state, UART_PM_STATE_OFF);
++ /*
++ * Free the transmit buffer.
++ */
++ spin_lock_irq(&uport->lock);
++ buf = state->xmit.buf;
++ state->xmit.buf = NULL;
++ spin_unlock_irq(&uport->lock);
+
++ if (buf)
++ free_page((unsigned long)buf);
++
++ uart_change_pm(state, UART_PM_STATE_OFF);
+ }
+
+ static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
+@@ -1888,11 +1945,6 @@ static int uart_proc_show(struct seq_file *m, void *v)
+ }
+ #endif
+
+-static inline bool uart_console_enabled(struct uart_port *port)
+-{
+- return uart_console(port) && (port->cons->flags & CON_ENABLED);
+-}
+-
+ static void uart_port_spin_lock_init(struct uart_port *port)
+ {
+ spin_lock_init(&port->lock);
+@@ -2094,8 +2146,11 @@ uart_set_options(struct uart_port *port, struct console *co,
+ * Allow the setting of the UART parameters with a NULL console
+ * too:
+ */
+- if (co)
++ if (co) {
+ co->cflag = termios.c_cflag;
++ co->ispeed = termios.c_ispeed;
++ co->ospeed = termios.c_ospeed;
++ }
+
+ return 0;
+ }
+@@ -2170,7 +2225,8 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
+
+ spin_lock_irq(&uport->lock);
+ ops->stop_tx(uport);
+- ops->set_mctrl(uport, 0);
++ if (!(uport->rs485.flags & SER_RS485_ENABLED))
++ ops->set_mctrl(uport, 0);
+ ops->stop_rx(uport);
+ spin_unlock_irq(&uport->lock);
+
+@@ -2229,6 +2285,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
+ */
+ memset(&termios, 0, sizeof(struct ktermios));
+ termios.c_cflag = uport->cons->cflag;
++ termios.c_ispeed = uport->cons->ispeed;
++ termios.c_ospeed = uport->cons->ospeed;
+
+ /*
+ * If that's unset, use the tty termios setting.
+@@ -2249,7 +2307,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
+
+ uart_change_pm(state, UART_PM_STATE_ON);
+ spin_lock_irq(&uport->lock);
+- ops->set_mctrl(uport, 0);
++ if (!(uport->rs485.flags & SER_RS485_ENABLED))
++ ops->set_mctrl(uport, 0);
+ spin_unlock_irq(&uport->lock);
+ if (console_suspend_enabled || !uart_console(uport)) {
+ /* Protected by port mutex for now */
+@@ -2260,7 +2319,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
+ if (tty)
+ uart_change_speed(tty, state, NULL);
+ spin_lock_irq(&uport->lock);
+- ops->set_mctrl(uport, uport->mctrl);
++ if (!(uport->rs485.flags & SER_RS485_ENABLED))
++ ops->set_mctrl(uport, uport->mctrl);
++ else
++ uport->rs485_config(uport, &uport->rs485);
+ ops->start_tx(uport);
+ spin_unlock_irq(&uport->lock);
+ tty_port_set_initialized(port, 1);
+@@ -2365,7 +2427,11 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
+ * We probably don't need a spinlock around this, but
+ */
+ spin_lock_irqsave(&port->lock, flags);
+- port->ops->set_mctrl(port, port->mctrl & TIOCM_DTR);
++ port->mctrl &= TIOCM_DTR;
++ if (!(port->rs485.flags & SER_RS485_ENABLED))
++ port->ops->set_mctrl(port, port->mctrl);
++ else
++ port->rs485_config(port, &port->rs485);
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ /*
+diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c
+index aaca4fe38486a..1f8362d5e3b97 100644
+--- a/drivers/tty/serial/serial_txx9.c
++++ b/drivers/tty/serial/serial_txx9.c
+@@ -644,6 +644,8 @@ serial_txx9_set_termios(struct uart_port *port, struct ktermios *termios,
+ case CS6: /* not supported */
+ case CS8:
+ cval |= TXX9_SILCR_UMODE_8BIT;
++ termios->c_cflag &= ~CSIZE;
++ termios->c_cflag |= CS8;
+ break;
+ }
+
+diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
+index 89ee43061d3ae..25318176091b2 100644
+--- a/drivers/tty/serial/sh-sci.c
++++ b/drivers/tty/serial/sh-sci.c
+@@ -31,6 +31,7 @@
+ #include <linux/ioport.h>
+ #include <linux/ktime.h>
+ #include <linux/major.h>
++#include <linux/minmax.h>
+ #include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/of.h>
+@@ -2390,8 +2391,12 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
+ int best_clk = -1;
+ unsigned long flags;
+
+- if ((termios->c_cflag & CSIZE) == CS7)
++ if ((termios->c_cflag & CSIZE) == CS7) {
+ smr_val |= SCSMR_CHR;
++ } else {
++ termios->c_cflag &= ~CSIZE;
++ termios->c_cflag |= CS8;
++ }
+ if (termios->c_cflag & PARENB)
+ smr_val |= SCSMR_PE;
+ if (termios->c_cflag & PARODD)
+@@ -2891,6 +2896,13 @@ static int sci_init_single(struct platform_device *dev,
+ sci_port->irqs[i] = platform_get_irq(dev, i);
+ }
+
++ /*
++ * The fourth interrupt on SCI port is transmit end interrupt, so
++ * shuffle the interrupts.
++ */
++ if (p->type == PORT_SCI)
++ swap(sci_port->irqs[SCIx_BRI_IRQ], sci_port->irqs[SCIx_TEI_IRQ]);
++
+ /* The SCI generates several interrupts. They can be muxed together or
+ * connected to different interrupt lines. In the muxed case only one
+ * interrupt resource is specified as there is only one interrupt ID.
+@@ -2956,7 +2968,7 @@ static int sci_init_single(struct platform_device *dev,
+ port->flags = UPF_FIXED_PORT | UPF_BOOT_AUTOCONF | p->flags;
+ port->fifosize = sci_port->params->fifosize;
+
+- if (port->type == PORT_SCI) {
++ if (port->type == PORT_SCI && !dev->dev.of_node) {
+ if (sci_port->reg_size >= 0x20)
+ port->regshift = 2;
+ else
+diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c
+index 0ac0371f943b6..69a32d94ec9df 100644
+--- a/drivers/tty/serial/sifive.c
++++ b/drivers/tty/serial/sifive.c
+@@ -666,12 +666,16 @@ static void sifive_serial_set_termios(struct uart_port *port,
+ int rate;
+ char nstop;
+
+- if ((termios->c_cflag & CSIZE) != CS8)
++ if ((termios->c_cflag & CSIZE) != CS8) {
+ dev_err_once(ssp->port.dev, "only 8-bit words supported\n");
++ termios->c_cflag &= ~CSIZE;
++ termios->c_cflag |= CS8;
++ }
+ if (termios->c_iflag & (INPCK | PARMRK))
+ dev_err_once(ssp->port.dev, "parity checking not supported\n");
+ if (termios->c_iflag & BRKINT)
+ dev_err_once(ssp->port.dev, "BREAK detection not supported\n");
++ termios->c_iflag &= ~(INPCK|PARMRK|BRKINT);
+
+ /* Set number of stop bits */
+ nstop = (termios->c_cflag & CSTOPB) ? 2 : 1;
+@@ -839,7 +843,7 @@ static void sifive_serial_console_write(struct console *co, const char *s,
+ local_irq_restore(flags);
+ }
+
+-static int __init sifive_serial_console_setup(struct console *co, char *options)
++static int sifive_serial_console_setup(struct console *co, char *options)
+ {
+ struct sifive_serial_port *ssp;
+ int baud = SIFIVE_DEFAULT_BAUD_RATE;
+@@ -998,7 +1002,7 @@ static int sifive_serial_probe(struct platform_device *pdev)
+ /* Set up clock divider */
+ ssp->clkin_rate = clk_get_rate(ssp->clk);
+ ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE;
+- ssp->port.uartclk = ssp->baud_rate * 16;
++ ssp->port.uartclk = ssp->clkin_rate;
+ __ssp_update_div(ssp);
+
+ platform_set_drvdata(pdev, ssp);
+diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c
+index 9a7ae6384edfa..a1952e4f1fcbb 100644
+--- a/drivers/tty/serial/sprd_serial.c
++++ b/drivers/tty/serial/sprd_serial.c
+@@ -367,7 +367,7 @@ static void sprd_rx_free_buf(struct sprd_uart_port *sp)
+ if (sp->rx_dma.virt)
+ dma_free_coherent(sp->port.dev, SPRD_UART_RX_SIZE,
+ sp->rx_dma.virt, sp->rx_dma.phys_addr);
+-
++ sp->rx_dma.virt = NULL;
+ }
+
+ static int sprd_rx_dma_config(struct uart_port *port, u32 burst)
+@@ -1133,7 +1133,7 @@ static bool sprd_uart_is_console(struct uart_port *uport)
+ static int sprd_clk_init(struct uart_port *uport)
+ {
+ struct clk *clk_uart, *clk_parent;
+- struct sprd_uart_port *u = sprd_port[uport->line];
++ struct sprd_uart_port *u = container_of(uport, struct sprd_uart_port, port);
+
+ clk_uart = devm_clk_get(uport->dev, "uart");
+ if (IS_ERR(clk_uart)) {
+@@ -1176,22 +1176,22 @@ static int sprd_probe(struct platform_device *pdev)
+ {
+ struct resource *res;
+ struct uart_port *up;
++ struct sprd_uart_port *sport;
+ int irq;
+ int index;
+ int ret;
+
+ index = of_alias_get_id(pdev->dev.of_node, "serial");
+- if (index < 0 || index >= ARRAY_SIZE(sprd_port)) {
++ if (index < 0 || index >= UART_NR_MAX) {
+ dev_err(&pdev->dev, "got a wrong serial alias id %d\n", index);
+ return -EINVAL;
+ }
+
+- sprd_port[index] = devm_kzalloc(&pdev->dev, sizeof(*sprd_port[index]),
+- GFP_KERNEL);
+- if (!sprd_port[index])
++ sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL);
++ if (!sport)
+ return -ENOMEM;
+
+- up = &sprd_port[index]->port;
++ up = &sport->port;
+ up->dev = &pdev->dev;
+ up->line = index;
+ up->type = PORT_SPRD;
+@@ -1222,7 +1222,7 @@ static int sprd_probe(struct platform_device *pdev)
+ * Allocate one dma buffer to prepare for receive transfer, in case
+ * memory allocation failure at runtime.
+ */
+- ret = sprd_rx_alloc_buf(sprd_port[index]);
++ ret = sprd_rx_alloc_buf(sport);
+ if (ret)
+ return ret;
+
+@@ -1230,17 +1230,27 @@ static int sprd_probe(struct platform_device *pdev)
+ ret = uart_register_driver(&sprd_uart_driver);
+ if (ret < 0) {
+ pr_err("Failed to register SPRD-UART driver\n");
+- return ret;
++ goto free_rx_buf;
+ }
+ }
++
+ sprd_ports_num++;
++ sprd_port[index] = sport;
+
+ ret = uart_add_one_port(&sprd_uart_driver, up);
+ if (ret)
+- sprd_remove(pdev);
++ goto clean_port;
+
+ platform_set_drvdata(pdev, up);
+
++ return 0;
++
++clean_port:
++ sprd_port[index] = NULL;
++ if (--sprd_ports_num == 0)
++ uart_unregister_driver(&sprd_uart_driver);
++free_rx_buf:
++ sprd_rx_free_buf(sport);
+ return ret;
+ }
+
+diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
+index 87e480cc8206d..5a45633aaea8d 100644
+--- a/drivers/tty/serial/st-asc.c
++++ b/drivers/tty/serial/st-asc.c
+@@ -535,10 +535,14 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios,
+ /* set character length */
+ if ((cflag & CSIZE) == CS7) {
+ ctrl_val |= ASC_CTL_MODE_7BIT_PAR;
++ cflag |= PARENB;
+ } else {
+ ctrl_val |= (cflag & PARENB) ? ASC_CTL_MODE_8BIT_PAR :
+ ASC_CTL_MODE_8BIT;
++ cflag &= ~CSIZE;
++ cflag |= CS8;
+ }
++ termios->c_cflag = cflag;
+
+ /* set stop bit */
+ ctrl_val |= (cflag & CSTOPB) ? ASC_CTL_STOP_2BIT : ASC_CTL_STOP_1BIT;
+diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
+index 8f032e77b954a..3b7d4481edbea 100644
+--- a/drivers/tty/serial/stm32-usart.c
++++ b/drivers/tty/serial/stm32-usart.c
+@@ -61,6 +61,53 @@ static void stm32_usart_clr_bits(struct uart_port *port, u32 reg, u32 bits)
+ writel_relaxed(val, port->membase + reg);
+ }
+
++static unsigned int stm32_usart_tx_empty(struct uart_port *port)
++{
++ struct stm32_port *stm32_port = to_stm32_port(port);
++ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
++
++ if (readl_relaxed(port->membase + ofs->isr) & USART_SR_TC)
++ return TIOCSER_TEMT;
++
++ return 0;
++}
++
++static void stm32_usart_rs485_rts_enable(struct uart_port *port)
++{
++ struct stm32_port *stm32_port = to_stm32_port(port);
++ struct serial_rs485 *rs485conf = &port->rs485;
++
++ if (stm32_port->hw_flow_control ||
++ !(rs485conf->flags & SER_RS485_ENABLED))
++ return;
++
++ if (rs485conf->flags & SER_RS485_RTS_ON_SEND) {
++ mctrl_gpio_set(stm32_port->gpios,
++ stm32_port->port.mctrl | TIOCM_RTS);
++ } else {
++ mctrl_gpio_set(stm32_port->gpios,
++ stm32_port->port.mctrl & ~TIOCM_RTS);
++ }
++}
++
++static void stm32_usart_rs485_rts_disable(struct uart_port *port)
++{
++ struct stm32_port *stm32_port = to_stm32_port(port);
++ struct serial_rs485 *rs485conf = &port->rs485;
++
++ if (stm32_port->hw_flow_control ||
++ !(rs485conf->flags & SER_RS485_ENABLED))
++ return;
++
++ if (rs485conf->flags & SER_RS485_RTS_ON_SEND) {
++ mctrl_gpio_set(stm32_port->gpios,
++ stm32_port->port.mctrl & ~TIOCM_RTS);
++ } else {
++ mctrl_gpio_set(stm32_port->gpios,
++ stm32_port->port.mctrl | TIOCM_RTS);
++ }
++}
++
+ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE,
+ u32 delay_DDE, u32 baud)
+ {
+@@ -71,6 +118,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE,
+ *cr3 |= USART_CR3_DEM;
+ over8 = *cr1 & USART_CR1_OVER8;
+
++ *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK);
++
+ if (over8)
+ rs485_deat_dedt = delay_ADE * baud * 8;
+ else
+@@ -147,6 +196,12 @@ static int stm32_usart_config_rs485(struct uart_port *port,
+
+ stm32_usart_set_bits(port, ofs->cr1, BIT(cfg->uart_enable_bit));
+
++ /* Adjust RTS polarity in case it's driven in software */
++ if (stm32_usart_tx_empty(port))
++ stm32_usart_rs485_rts_disable(port);
++ else
++ stm32_usart_rs485_rts_enable(port);
++
+ return 0;
+ }
+
+@@ -209,19 +264,22 @@ static unsigned long stm32_usart_get_char(struct uart_port *port, u32 *sr,
+ return c;
+ }
+
+-static void stm32_usart_receive_chars(struct uart_port *port, bool threaded)
++static void stm32_usart_receive_chars(struct uart_port *port, bool irqflag)
+ {
+ struct tty_port *tport = &port->state->port;
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+- unsigned long c;
++ unsigned long c, flags;
+ u32 sr;
+ char flag;
+
+- spin_lock(&port->lock);
++ if (irqflag)
++ spin_lock_irqsave(&port->lock, flags);
++ else
++ spin_lock(&port->lock);
+
+ while (stm32_usart_pending_rx(port, &sr, &stm32_port->last_res,
+- threaded)) {
++ irqflag)) {
+ sr |= USART_SR_DUMMY_RX;
+ flag = TTY_NORMAL;
+
+@@ -275,7 +333,10 @@ static void stm32_usart_receive_chars(struct uart_port *port, bool threaded)
+ uart_insert_char(port, sr, USART_SR_ORE, c, flag);
+ }
+
+- uart_unlock_and_check_sysrq(port);
++ if (irqflag)
++ uart_unlock_and_check_sysrq_irqrestore(port, irqflag);
++ else
++ uart_unlock_and_check_sysrq(port);
+
+ tty_flip_buffer_push(tport);
+ }
+@@ -312,6 +373,14 @@ static void stm32_usart_tx_interrupt_enable(struct uart_port *port)
+ stm32_usart_set_bits(port, ofs->cr1, USART_CR1_TXEIE);
+ }
+
++static void stm32_usart_tc_interrupt_enable(struct uart_port *port)
++{
++ struct stm32_port *stm32_port = to_stm32_port(port);
++ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
++
++ stm32_usart_set_bits(port, ofs->cr1, USART_CR1_TCIE);
++}
++
+ static void stm32_usart_tx_interrupt_disable(struct uart_port *port)
+ {
+ struct stm32_port *stm32_port = to_stm32_port(port);
+@@ -323,6 +392,14 @@ static void stm32_usart_tx_interrupt_disable(struct uart_port *port)
+ stm32_usart_clr_bits(port, ofs->cr1, USART_CR1_TXEIE);
+ }
+
++static void stm32_usart_tc_interrupt_disable(struct uart_port *port)
++{
++ struct stm32_port *stm32_port = to_stm32_port(port);
++ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
++
++ stm32_usart_clr_bits(port, ofs->cr1, USART_CR1_TCIE);
++}
++
+ static void stm32_usart_transmit_chars_pio(struct uart_port *port)
+ {
+ struct stm32_port *stm32_port = to_stm32_port(port);
+@@ -421,10 +498,30 @@ static void stm32_usart_transmit_chars(struct uart_port *port)
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ struct circ_buf *xmit = &port->state->xmit;
++ u32 isr;
++ int ret;
++
++ if (!stm32_port->hw_flow_control &&
++ port->rs485.flags & SER_RS485_ENABLED &&
++ (port->x_char ||
++ !(uart_circ_empty(xmit) || uart_tx_stopped(port)))) {
++ stm32_usart_tc_interrupt_disable(port);
++ stm32_usart_rs485_rts_enable(port);
++ }
+
+ if (port->x_char) {
+ if (stm32_port->tx_dma_busy)
+ stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
++
++ /* Check that TDR is empty before filling FIFO */
++ ret =
++ readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
++ isr,
++ (isr & USART_SR_TXE),
++ 10, 1000);
++ if (ret)
++ dev_warn(port->dev, "1 character may be erased\n");
++
+ writel_relaxed(port->x_char, port->membase + ofs->tdr);
+ port->x_char = 0;
+ port->icount.tx++;
+@@ -451,8 +548,14 @@ static void stm32_usart_transmit_chars(struct uart_port *port)
+ if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ uart_write_wakeup(port);
+
+- if (uart_circ_empty(xmit))
++ if (uart_circ_empty(xmit)) {
+ stm32_usart_tx_interrupt_disable(port);
++ if (!stm32_port->hw_flow_control &&
++ port->rs485.flags & SER_RS485_ENABLED) {
++ stm32_port->txdone = true;
++ stm32_usart_tc_interrupt_enable(port);
++ }
++ }
+ }
+
+ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
+@@ -465,6 +568,13 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
+
+ sr = readl_relaxed(port->membase + ofs->isr);
+
++ if (!stm32_port->hw_flow_control &&
++ port->rs485.flags & SER_RS485_ENABLED &&
++ (sr & USART_SR_TC)) {
++ stm32_usart_tc_interrupt_disable(port);
++ stm32_usart_rs485_rts_disable(port);
++ }
++
+ if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG)
+ writel_relaxed(USART_ICR_RTOCF,
+ port->membase + ofs->icr);
+@@ -496,25 +606,13 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
+ static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr)
+ {
+ struct uart_port *port = ptr;
+- struct stm32_port *stm32_port = to_stm32_port(port);
+
+- if (stm32_port->rx_ch)
+- stm32_usart_receive_chars(port, true);
++ /* Receiver timeout irq for DMA RX */
++ stm32_usart_receive_chars(port, false);
+
+ return IRQ_HANDLED;
+ }
+
+-static unsigned int stm32_usart_tx_empty(struct uart_port *port)
+-{
+- struct stm32_port *stm32_port = to_stm32_port(port);
+- const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+-
+- if (readl_relaxed(port->membase + ofs->isr) & USART_SR_TC)
+- return TIOCSER_TEMT;
+-
+- return 0;
+-}
+-
+ static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+ {
+ struct stm32_port *stm32_port = to_stm32_port(port);
+@@ -552,42 +650,23 @@ static void stm32_usart_disable_ms(struct uart_port *port)
+ /* Transmit stop */
+ static void stm32_usart_stop_tx(struct uart_port *port)
+ {
+- struct stm32_port *stm32_port = to_stm32_port(port);
+- struct serial_rs485 *rs485conf = &port->rs485;
+-
+ stm32_usart_tx_interrupt_disable(port);
+
+- if (rs485conf->flags & SER_RS485_ENABLED) {
+- if (rs485conf->flags & SER_RS485_RTS_ON_SEND) {
+- mctrl_gpio_set(stm32_port->gpios,
+- stm32_port->port.mctrl & ~TIOCM_RTS);
+- } else {
+- mctrl_gpio_set(stm32_port->gpios,
+- stm32_port->port.mctrl | TIOCM_RTS);
+- }
+- }
++ stm32_usart_rs485_rts_disable(port);
+ }
+
+ /* There are probably characters waiting to be transmitted. */
+ static void stm32_usart_start_tx(struct uart_port *port)
+ {
+- struct stm32_port *stm32_port = to_stm32_port(port);
+- struct serial_rs485 *rs485conf = &port->rs485;
+ struct circ_buf *xmit = &port->state->xmit;
+
+- if (uart_circ_empty(xmit))
++ if (uart_circ_empty(xmit) && !port->x_char) {
++ stm32_usart_rs485_rts_disable(port);
+ return;
+-
+- if (rs485conf->flags & SER_RS485_ENABLED) {
+- if (rs485conf->flags & SER_RS485_RTS_ON_SEND) {
+- mctrl_gpio_set(stm32_port->gpios,
+- stm32_port->port.mctrl | TIOCM_RTS);
+- } else {
+- mctrl_gpio_set(stm32_port->gpios,
+- stm32_port->port.mctrl & ~TIOCM_RTS);
+- }
+ }
+
++ stm32_usart_rs485_rts_enable(port);
++
+ stm32_usart_transmit_chars(port);
+ }
+
+@@ -691,6 +770,11 @@ static void stm32_usart_shutdown(struct uart_port *port)
+ u32 val, isr;
+ int ret;
+
++ if (stm32_port->tx_dma_busy) {
++ dmaengine_terminate_async(stm32_port->tx_ch);
++ stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
++ }
++
+ /* Disable modem control interrupts */
+ stm32_usart_disable_ms(port);
+
+@@ -790,13 +874,22 @@ static void stm32_usart_set_termios(struct uart_port *port,
+ * CS8 or (CS7 + parity), 8 bits word aka [M1:M0] = 0b00
+ * M0 and M1 already cleared by cr1 initialization.
+ */
+- if (bits == 9)
++ if (bits == 9) {
+ cr1 |= USART_CR1_M0;
+- else if ((bits == 7) && cfg->has_7bits_data)
++ } else if ((bits == 7) && cfg->has_7bits_data) {
+ cr1 |= USART_CR1_M1;
+- else if (bits != 8)
++ } else if (bits != 8) {
+ dev_dbg(port->dev, "Unsupported data bits config: %u bits\n"
+ , bits);
++ cflag &= ~CSIZE;
++ cflag |= CS8;
++ termios->c_cflag = cflag;
++ bits = 8;
++ if (cflag & PARENB) {
++ bits++;
++ cr1 |= USART_CR1_M0;
++ }
++ }
+
+ if (ofs->rtor != UNDEF_REG && (stm32_port->rx_ch ||
+ (stm32_port->fifoen &&
+@@ -1276,22 +1369,10 @@ static int stm32_usart_serial_probe(struct platform_device *pdev)
+ if (!stm32port->info)
+ return -EINVAL;
+
+- ret = stm32_usart_init_port(stm32port, pdev);
+- if (ret)
+- return ret;
+-
+- if (stm32port->wakeup_src) {
+- device_set_wakeup_capable(&pdev->dev, true);
+- ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq);
+- if (ret)
+- goto err_deinit_port;
+- }
+-
+ stm32port->rx_ch = dma_request_chan(&pdev->dev, "rx");
+- if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER) {
+- ret = -EPROBE_DEFER;
+- goto err_wakeirq;
+- }
++ if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER)
++ return -EPROBE_DEFER;
++
+ /* Fall back in interrupt mode for any non-deferral error */
+ if (IS_ERR(stm32port->rx_ch))
+ stm32port->rx_ch = NULL;
+@@ -1305,6 +1386,17 @@ static int stm32_usart_serial_probe(struct platform_device *pdev)
+ if (IS_ERR(stm32port->tx_ch))
+ stm32port->tx_ch = NULL;
+
++ ret = stm32_usart_init_port(stm32port, pdev);
++ if (ret)
++ goto err_dma_tx;
++
++ if (stm32port->wakeup_src) {
++ device_set_wakeup_capable(&pdev->dev, true);
++ ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq);
++ if (ret)
++ goto err_deinit_port;
++ }
++
+ if (stm32port->rx_ch && stm32_usart_of_dma_rx_probe(stm32port, pdev)) {
+ /* Fall back in interrupt mode */
+ dma_release_channel(stm32port->rx_ch);
+@@ -1341,19 +1433,11 @@ err_port:
+ pm_runtime_set_suspended(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+
+- if (stm32port->tx_ch) {
++ if (stm32port->tx_ch)
+ stm32_usart_of_dma_tx_remove(stm32port, pdev);
+- dma_release_channel(stm32port->tx_ch);
+- }
+-
+ if (stm32port->rx_ch)
+ stm32_usart_of_dma_rx_remove(stm32port, pdev);
+
+-err_dma_rx:
+- if (stm32port->rx_ch)
+- dma_release_channel(stm32port->rx_ch);
+-
+-err_wakeirq:
+ if (stm32port->wakeup_src)
+ dev_pm_clear_wake_irq(&pdev->dev);
+
+@@ -1363,6 +1447,14 @@ err_deinit_port:
+
+ stm32_usart_deinit_port(stm32port);
+
++err_dma_tx:
++ if (stm32port->tx_ch)
++ dma_release_channel(stm32port->tx_ch);
++
++err_dma_rx:
++ if (stm32port->rx_ch)
++ dma_release_channel(stm32port->rx_ch);
++
+ return ret;
+ }
+
+@@ -1385,7 +1477,6 @@ static int stm32_usart_serial_remove(struct platform_device *pdev)
+ stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAR);
+
+ if (stm32_port->tx_ch) {
+- dmaengine_terminate_async(stm32_port->tx_ch);
+ stm32_usart_of_dma_tx_remove(stm32_port, pdev);
+ dma_release_channel(stm32_port->tx_ch);
+ }
+diff --git a/drivers/tty/serial/stm32-usart.h b/drivers/tty/serial/stm32-usart.h
+index 07ac291328cda..ad6335155de2d 100644
+--- a/drivers/tty/serial/stm32-usart.h
++++ b/drivers/tty/serial/stm32-usart.h
+@@ -267,6 +267,7 @@ struct stm32_port {
+ bool hw_flow_control;
+ bool swap; /* swap RX & TX pins */
+ bool fifoen;
++ bool txdone;
+ int rxftcfg; /* RX FIFO threshold CFG */
+ int txftcfg; /* TX FIFO threshold CFG */
+ bool wakeup_src;
+diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c
+index 92e5726340090..ac7cb80e4d6bd 100644
+--- a/drivers/tty/serial/sunsab.c
++++ b/drivers/tty/serial/sunsab.c
+@@ -1137,7 +1137,13 @@ static int __init sunsab_init(void)
+ }
+ }
+
+- return platform_driver_register(&sab_driver);
++ err = platform_driver_register(&sab_driver);
++ if (err) {
++ kfree(sunsab_ports);
++ sunsab_ports = NULL;
++ }
++
++ return err;
+ }
+
+ static void __exit sunsab_exit(void)
+diff --git a/drivers/tty/serial/tegra-tcu.c b/drivers/tty/serial/tegra-tcu.c
+index 4877c54c613d1..889b701ba7c62 100644
+--- a/drivers/tty/serial/tegra-tcu.c
++++ b/drivers/tty/serial/tegra-tcu.c
+@@ -101,7 +101,7 @@ static void tegra_tcu_uart_start_tx(struct uart_port *port)
+ break;
+
+ tegra_tcu_write(tcu, &xmit->buf[xmit->tail], count);
+- xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
++ uart_xmit_advance(port, count);
+ }
+
+ uart_write_wakeup(port);
+diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
+index dfc1ba4e15724..36871cebd6a0f 100644
+--- a/drivers/tty/serial/uartlite.c
++++ b/drivers/tty/serial/uartlite.c
+@@ -612,7 +612,7 @@ static struct uart_driver ulite_uart_driver = {
+ *
+ * Returns: 0 on success, <0 otherwise
+ */
+-static int ulite_assign(struct device *dev, int id, u32 base, int irq,
++static int ulite_assign(struct device *dev, int id, phys_addr_t base, int irq,
+ struct uartlite_data *pdata)
+ {
+ struct uart_port *port;
+diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
+index 6000853973c10..3cc9ef08455c2 100644
+--- a/drivers/tty/serial/ucc_uart.c
++++ b/drivers/tty/serial/ucc_uart.c
+@@ -1137,6 +1137,8 @@ static unsigned int soc_info(unsigned int *rev_h, unsigned int *rev_l)
+ /* No compatible property, so try the name. */
+ soc_string = np->name;
+
++ of_node_put(np);
++
+ /* Extract the SOC number from the "PowerPC," string */
+ if ((sscanf(soc_string, "PowerPC,%u", &soc) != 1) || !soc)
+ return 0;
+diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
+index 962e522ccc45c..815e3e26ee206 100644
+--- a/drivers/tty/serial/xilinx_uartps.c
++++ b/drivers/tty/serial/xilinx_uartps.c
+@@ -375,6 +375,8 @@ static irqreturn_t cdns_uart_isr(int irq, void *dev_id)
+ isrstatus &= ~CDNS_UART_IXR_TXEMPTY;
+ }
+
++ isrstatus &= port->read_status_mask;
++ isrstatus &= ~port->ignore_status_mask;
+ /*
+ * Skip RX processing if RX is disabled as RXEMPTY will never be set
+ * as read bytes will not be removed from the FIFO.
+@@ -601,9 +603,10 @@ static void cdns_uart_start_tx(struct uart_port *port)
+ if (uart_circ_empty(&port->state->xmit))
+ return;
+
++ writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_ISR);
++
+ cdns_uart_handle_tx(port);
+
+- writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_ISR);
+ /* Enable the TX Empty interrupt */
+ writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_IER);
+ }
+diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
+index 25c558e65ece0..9bc2a92652772 100644
+--- a/drivers/tty/synclink_gt.c
++++ b/drivers/tty/synclink_gt.c
+@@ -1746,6 +1746,8 @@ static int hdlcdev_init(struct slgt_info *info)
+ */
+ static void hdlcdev_exit(struct slgt_info *info)
+ {
++ if (!info->netdev)
++ return;
+ unregister_hdlc_device(info->netdev);
+ free_netdev(info->netdev);
+ info->netdev = NULL;
+diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
+index c911196ac8935..4ffed77f80018 100644
+--- a/drivers/tty/sysrq.c
++++ b/drivers/tty/sysrq.c
+@@ -232,8 +232,10 @@ static void showacpu(void *dummy)
+ unsigned long flags;
+
+ /* Idle CPUs have no interesting backtrace. */
+- if (idle_cpu(smp_processor_id()))
++ if (idle_cpu(smp_processor_id())) {
++ pr_info("CPU%d: backtrace skipped as idling\n", smp_processor_id());
+ return;
++ }
+
+ raw_spin_lock_irqsave(&show_lock, flags);
+ pr_info("CPU%d:\n", smp_processor_id());
+@@ -260,10 +262,13 @@ static void sysrq_handle_showallcpus(int key)
+
+ if (in_hardirq())
+ regs = get_irq_regs();
+- if (regs) {
+- pr_info("CPU%d:\n", smp_processor_id());
++
++ pr_info("CPU%d:\n", smp_processor_id());
++ if (regs)
+ show_regs(regs);
+- }
++ else
++ show_stack(NULL, NULL, KERN_INFO);
++
+ schedule_work(&sysrq_showallcpus);
+ }
+ }
+@@ -296,7 +301,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {
+ static void sysrq_handle_showstate(int key)
+ {
+ show_state();
+- show_workqueue_state();
++ show_all_workqueues();
+ }
+ static const struct sysrq_key_op sysrq_showstate_op = {
+ .handler = sysrq_handle_showstate,
+diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h
+index b710c5ef89ab2..72b88aafd5361 100644
+--- a/drivers/tty/tty.h
++++ b/drivers/tty/tty.h
+@@ -62,6 +62,8 @@ int __tty_check_change(struct tty_struct *tty, int sig);
+ int tty_check_change(struct tty_struct *tty);
+ void __stop_tty(struct tty_struct *tty);
+ void __start_tty(struct tty_struct *tty);
++void tty_write_unlock(struct tty_struct *tty);
++int tty_write_lock(struct tty_struct *tty, int ndelay);
+ void tty_vhangup_session(struct tty_struct *tty);
+ void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty);
+ int tty_signal_session_leader(struct tty_struct *tty, int exit_session);
+@@ -111,4 +113,7 @@ static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
+
+ ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *);
+
++int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
++ const unsigned char *chars, size_t cnt);
++
+ #endif
+diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
+index 635d0af229b72..f3143ae4bf7fe 100644
+--- a/drivers/tty/tty_buffer.c
++++ b/drivers/tty/tty_buffer.c
+@@ -174,7 +174,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
+ */
+ if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
+ return NULL;
+- p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
++ p = kmalloc(sizeof(struct tty_buffer) + 2 * size,
++ GFP_ATOMIC | __GFP_NOWARN);
+ if (p == NULL)
+ return NULL;
+
+@@ -401,27 +402,6 @@ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag)
+ }
+ EXPORT_SYMBOL(__tty_insert_flip_char);
+
+-/**
+- * tty_schedule_flip - push characters to ldisc
+- * @port: tty port to push from
+- *
+- * Takes any pending buffers and transfers their ownership to the
+- * ldisc side of the queue. It then schedules those characters for
+- * processing by the line discipline.
+- */
+-
+-void tty_schedule_flip(struct tty_port *port)
+-{
+- struct tty_bufhead *buf = &port->buf;
+-
+- /* paired w/ acquire in flush_to_ldisc(); ensures
+- * flush_to_ldisc() sees buffer data.
+- */
+- smp_store_release(&buf->tail->commit, buf->tail->used);
+- queue_work(system_unbound_wq, &buf->work);
+-}
+-EXPORT_SYMBOL(tty_schedule_flip);
+-
+ /**
+ * tty_prepare_flip_string - make room for characters
+ * @port: tty port
+@@ -544,12 +524,24 @@ static void flush_to_ldisc(struct work_struct *work)
+ if (!count)
+ break;
+ head->read += count;
++
++ if (need_resched())
++ cond_resched();
+ }
+
+ mutex_unlock(&buf->lock);
+
+ }
+
++static inline void tty_flip_buffer_commit(struct tty_buffer *tail)
++{
++ /*
++ * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees
++ * buffer data.
++ */
++ smp_store_release(&tail->commit, tail->used);
++}
++
+ /**
+ * tty_flip_buffer_push - terminal
+ * @port: tty port to push
+@@ -563,10 +555,44 @@ static void flush_to_ldisc(struct work_struct *work)
+
+ void tty_flip_buffer_push(struct tty_port *port)
+ {
+- tty_schedule_flip(port);
++ struct tty_bufhead *buf = &port->buf;
++
++ tty_flip_buffer_commit(buf->tail);
++ queue_work(system_unbound_wq, &buf->work);
+ }
+ EXPORT_SYMBOL(tty_flip_buffer_push);
+
++/**
++ * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and
++ * push
++ * @port: tty port
++ * @chars: characters
++ * @size: size
++ *
++ * The function combines tty_insert_flip_string() and tty_flip_buffer_push()
++ * with the exception of properly holding the @port->lock.
++ *
++ * To be used only internally (by pty currently).
++ *
++ * Returns: the number added.
++ */
++int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
++ const unsigned char *chars, size_t size)
++{
++ struct tty_bufhead *buf = &port->buf;
++ unsigned long flags;
++
++ spin_lock_irqsave(&port->lock, flags);
++ size = tty_insert_flip_string(port, chars, size);
++ if (size)
++ tty_flip_buffer_commit(buf->tail);
++ spin_unlock_irqrestore(&port->lock, flags);
++
++ queue_work(system_unbound_wq, &buf->work);
++
++ return size;
++}
++
+ /**
+ * tty_buffer_init - prepare a tty buffer structure
+ * @port: tty port to initialise
+diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
+index 6616d4a0d41de..3d540dff42ef4 100644
+--- a/drivers/tty/tty_io.c
++++ b/drivers/tty/tty_io.c
+@@ -950,13 +950,13 @@ static ssize_t tty_read(struct kiocb *iocb, struct iov_iter *to)
+ return i;
+ }
+
+-static void tty_write_unlock(struct tty_struct *tty)
++void tty_write_unlock(struct tty_struct *tty)
+ {
+ mutex_unlock(&tty->atomic_write_lock);
+ wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT);
+ }
+
+-static int tty_write_lock(struct tty_struct *tty, int ndelay)
++int tty_write_lock(struct tty_struct *tty, int ndelay)
+ {
+ if (!mutex_trylock(&tty->atomic_write_lock)) {
+ if (ndelay)
+@@ -1244,14 +1244,16 @@ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
+ {
+ struct tty_struct *tty;
+
+- if (driver->ops->lookup)
++ if (driver->ops->lookup) {
+ if (!file)
+ tty = ERR_PTR(-EIO);
+ else
+ tty = driver->ops->lookup(driver, file, idx);
+- else
++ } else {
++ if (idx >= driver->num)
++ return ERR_PTR(-EINVAL);
+ tty = driver->ttys[idx];
+-
++ }
+ if (!IS_ERR(tty))
+ tty_kref_get(tty);
+ return tty;
+diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
+index 507a25d692bb1..1736130f9c394 100644
+--- a/drivers/tty/tty_ioctl.c
++++ b/drivers/tty/tty_ioctl.c
+@@ -421,21 +421,42 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt)
+ tmp_termios.c_ispeed = tty_termios_input_baud_rate(&tmp_termios);
+ tmp_termios.c_ospeed = tty_termios_baud_rate(&tmp_termios);
+
+- ld = tty_ldisc_ref(tty);
++ if (opt & (TERMIOS_FLUSH|TERMIOS_WAIT)) {
++retry_write_wait:
++ retval = wait_event_interruptible(tty->write_wait, !tty_chars_in_buffer(tty));
++ if (retval < 0)
++ return retval;
+
+- if (ld != NULL) {
+- if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer)
+- ld->ops->flush_buffer(tty);
+- tty_ldisc_deref(ld);
+- }
++ if (tty_write_lock(tty, 0) < 0)
++ goto retry_write_wait;
+
+- if (opt & TERMIOS_WAIT) {
+- tty_wait_until_sent(tty, 0);
+- if (signal_pending(current))
+- return -ERESTARTSYS;
+- }
++ /* Racing writer? */
++ if (tty_chars_in_buffer(tty)) {
++ tty_write_unlock(tty);
++ goto retry_write_wait;
++ }
+
+- tty_set_termios(tty, &tmp_termios);
++ ld = tty_ldisc_ref(tty);
++ if (ld != NULL) {
++ if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer)
++ ld->ops->flush_buffer(tty);
++ tty_ldisc_deref(ld);
++ }
++
++ if ((opt & TERMIOS_WAIT) && tty->ops->wait_until_sent) {
++ tty->ops->wait_until_sent(tty, 0);
++ if (signal_pending(current)) {
++ tty_write_unlock(tty);
++ return -ERESTARTSYS;
++ }
++ }
++
++ tty_set_termios(tty, &tmp_termios);
++
++ tty_write_unlock(tty);
++ } else {
++ tty_set_termios(tty, &tmp_termios);
++ }
+
+ /* FIXME: Arguably if tmp_termios == tty->termios AND the
+ actual requested termios was not tmp_termios then we may
+diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
+index c7fbbcdcc3461..3700cd057f273 100644
+--- a/drivers/tty/vt/keyboard.c
++++ b/drivers/tty/vt/keyboard.c
+@@ -324,13 +324,13 @@ int kbd_rate(struct kbd_repeat *rpt)
+ static void put_queue(struct vc_data *vc, int ch)
+ {
+ tty_insert_flip_char(&vc->port, ch, 0);
+- tty_schedule_flip(&vc->port);
++ tty_flip_buffer_push(&vc->port);
+ }
+
+ static void puts_queue(struct vc_data *vc, const char *cp)
+ {
+ tty_insert_flip_string(&vc->port, cp, strlen(cp));
+- tty_schedule_flip(&vc->port);
++ tty_flip_buffer_push(&vc->port);
+ }
+
+ static void applkey(struct vc_data *vc, int key, char mode)
+@@ -584,7 +584,7 @@ static void fn_inc_console(struct vc_data *vc)
+ static void fn_send_intr(struct vc_data *vc)
+ {
+ tty_insert_flip_char(&vc->port, 0, TTY_BREAK);
+- tty_schedule_flip(&vc->port);
++ tty_flip_buffer_push(&vc->port);
+ }
+
+ static void fn_scroll_forw(struct vc_data *vc)
+diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
+index 1850bacdb5b0e..01c96537fa36b 100644
+--- a/drivers/tty/vt/vc_screen.c
++++ b/drivers/tty/vt/vc_screen.c
+@@ -386,10 +386,6 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+
+ uni_mode = use_unicode(inode);
+ attr = use_attributes(inode);
+- ret = -ENXIO;
+- vc = vcs_vc(inode, &viewed);
+- if (!vc)
+- goto unlock_out;
+
+ ret = -EINVAL;
+ if (pos < 0)
+@@ -407,16 +403,20 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+ unsigned int this_round, skip = 0;
+ int size;
+
++ vc = vcs_vc(inode, &viewed);
++ if (!vc) {
++ ret = -ENXIO;
++ break;
++ }
++
+ /* Check whether we are above size each round,
+ * as copy_to_user at the end of this loop
+ * could sleep.
+ */
+ size = vcs_size(vc, attr, uni_mode);
+ if (size < 0) {
+- if (read)
+- break;
+ ret = size;
+- goto unlock_out;
++ break;
+ }
+ if (pos >= size)
+ break;
+@@ -656,10 +656,17 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+ }
+ }
+
+- /* The vcs_size might have changed while we slept to grab
+- * the user buffer, so recheck.
++ /* The vc might have been freed or vcs_size might have changed
++ * while we slept to grab the user buffer, so recheck.
+ * Return data written up to now on failure.
+ */
++ vc = vcs_vc(inode, &viewed);
++ if (!vc) {
++ if (written)
++ break;
++ ret = -ENXIO;
++ goto unlock_out;
++ }
+ size = vcs_size(vc, attr, false);
+ if (size < 0) {
+ if (written)
+diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
+index 7359c3e80d63e..b8f5bc19416d9 100644
+--- a/drivers/tty/vt/vt.c
++++ b/drivers/tty/vt/vt.c
+@@ -344,7 +344,7 @@ static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows)
+ /* allocate everything in one go */
+ memsize = cols * rows * sizeof(char32_t);
+ memsize += rows * sizeof(char32_t *);
+- p = vmalloc(memsize);
++ p = vzalloc(memsize);
+ if (!p)
+ return NULL;
+
+@@ -855,7 +855,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr)
+ unsigned short *p = (unsigned short *) vc->vc_pos;
+
+ vc_uniscr_delete(vc, nr);
+- scr_memcpyw(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2);
++ scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2);
+ scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char,
+ nr * 2);
+ vc->vc_need_wrap = 0;
+@@ -1833,7 +1833,7 @@ static void csi_m(struct vc_data *vc)
+ static void respond_string(const char *p, size_t len, struct tty_port *port)
+ {
+ tty_insert_flip_string(port, p, len);
+- tty_schedule_flip(port);
++ tty_flip_buffer_push(port);
+ }
+
+ static void cursor_report(struct vc_data *vc, struct tty_struct *tty)
+@@ -4662,9 +4662,11 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op)
+ console_lock();
+ if (vc->vc_mode != KD_TEXT)
+ rc = -EINVAL;
+- else if (vc->vc_sw->con_font_set)
++ else if (vc->vc_sw->con_font_set) {
++ if (vc_is_sel(vc))
++ clear_selection();
+ rc = vc->vc_sw->con_font_set(vc, &font, op->flags);
+- else
++ } else
+ rc = -ENOSYS;
+ console_unlock();
+ kfree(font.data);
+@@ -4691,9 +4693,11 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op)
+ console_unlock();
+ return -EINVAL;
+ }
+- if (vc->vc_sw->con_font_default)
++ if (vc->vc_sw->con_font_default) {
++ if (vc_is_sel(vc))
++ clear_selection();
+ rc = vc->vc_sw->con_font_default(vc, &font, s);
+- else
++ } else
+ rc = -ENOSYS;
+ console_unlock();
+ if (!rc) {
+diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
+index 3639bb6dc372e..58013698635f0 100644
+--- a/drivers/tty/vt/vt_ioctl.c
++++ b/drivers/tty/vt/vt_ioctl.c
+@@ -599,8 +599,8 @@ static int vt_setactivate(struct vt_setactivate __user *sa)
+ if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES)
+ return -ENXIO;
+
+- vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES + 1);
+ vsa.console--;
++ vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES);
+ console_lock();
+ ret = vc_allocate(vsa.console);
+ if (ret) {
+@@ -845,6 +845,7 @@ int vt_ioctl(struct tty_struct *tty,
+ return -ENXIO;
+
+ arg--;
++ arg = array_index_nospec(arg, MAX_NR_CONSOLES);
+ console_lock();
+ ret = vc_allocate(arg);
+ console_unlock();
+diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
+index 6b5cfa5b06733..28be820b546e9 100644
+--- a/drivers/uio/uio_dmem_genirq.c
++++ b/drivers/uio/uio_dmem_genirq.c
+@@ -110,8 +110,10 @@ static irqreturn_t uio_dmem_genirq_handler(int irq, struct uio_info *dev_info)
+ * remember the state so we can allow user space to enable it later.
+ */
+
++ spin_lock(&priv->lock);
+ if (!test_and_set_bit(0, &priv->flags))
+ disable_irq_nosync(irq);
++ spin_unlock(&priv->lock);
+
+ return IRQ_HANDLED;
+ }
+@@ -125,20 +127,19 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
+ * in the interrupt controller, but keep track of the
+ * state to prevent per-irq depth damage.
+ *
+- * Serialize this operation to support multiple tasks.
++ * Serialize this operation to support multiple tasks and concurrency
++ * with irq handler on SMP systems.
+ */
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (irq_on) {
+ if (test_and_clear_bit(0, &priv->flags))
+ enable_irq(dev_info->irq);
+- spin_unlock_irqrestore(&priv->lock, flags);
+ } else {
+- if (!test_and_set_bit(0, &priv->flags)) {
+- spin_unlock_irqrestore(&priv->lock, flags);
+- disable_irq(dev_info->irq);
+- }
++ if (!test_and_set_bit(0, &priv->flags))
++ disable_irq_nosync(dev_info->irq);
+ }
++ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return 0;
+ }
+diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
+index 1f3b4a1422126..69a44bd7e5d02 100644
+--- a/drivers/usb/cdns3/cdns3-gadget.c
++++ b/drivers/usb/cdns3/cdns3-gadget.c
+@@ -220,7 +220,7 @@ int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
+
+ if (!priv_ep->trb_pool) {
+ priv_ep->trb_pool = dma_pool_alloc(priv_dev->eps_dma_pool,
+- GFP_DMA32 | GFP_ATOMIC,
++ GFP_ATOMIC,
+ &priv_ep->trb_pool_dma);
+
+ if (!priv_ep->trb_pool)
+@@ -337,19 +337,6 @@ static void cdns3_ep_inc_deq(struct cdns3_endpoint *priv_ep)
+ cdns3_ep_inc_trb(&priv_ep->dequeue, &priv_ep->ccs, priv_ep->num_trbs);
+ }
+
+-static void cdns3_move_deq_to_next_trb(struct cdns3_request *priv_req)
+-{
+- struct cdns3_endpoint *priv_ep = priv_req->priv_ep;
+- int current_trb = priv_req->start_trb;
+-
+- while (current_trb != priv_req->end_trb) {
+- cdns3_ep_inc_deq(priv_ep);
+- current_trb = priv_ep->dequeue;
+- }
+-
+- cdns3_ep_inc_deq(priv_ep);
+-}
+-
+ /**
+ * cdns3_allow_enable_l1 - enable/disable permits to transition to L1.
+ * @priv_dev: Extended gadget object
+@@ -638,9 +625,9 @@ static void cdns3_wa2_remove_old_request(struct cdns3_endpoint *priv_ep)
+ trace_cdns3_wa2(priv_ep, "removes eldest request");
+
+ kfree(priv_req->request.buf);
++ list_del_init(&priv_req->list);
+ cdns3_gadget_ep_free_request(&priv_ep->endpoint,
+ &priv_req->request);
+- list_del_init(&priv_req->list);
+ --priv_ep->wa2_counter;
+
+ if (!chain)
+@@ -1517,10 +1504,11 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev,
+
+ trb = priv_ep->trb_pool + priv_ep->dequeue;
+
+- /* Request was dequeued and TRB was changed to TRB_LINK. */
+- if (TRB_FIELD_TO_TYPE(le32_to_cpu(trb->control)) == TRB_LINK) {
++ /* The TRB was changed as link TRB, and the request was handled at ep_dequeue */
++ while (TRB_FIELD_TO_TYPE(le32_to_cpu(trb->control)) == TRB_LINK) {
+ trace_cdns3_complete_trb(priv_ep, trb);
+- cdns3_move_deq_to_next_trb(priv_req);
++ cdns3_ep_inc_deq(priv_ep);
++ trb = priv_ep->trb_pool + priv_ep->dequeue;
+ }
+
+ if (!request->stream_id) {
+@@ -1542,7 +1530,8 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev,
+ TRB_LEN(le32_to_cpu(trb->length));
+
+ if (priv_req->num_of_trb > 1 &&
+- le32_to_cpu(trb->control) & TRB_SMM)
++ le32_to_cpu(trb->control) & TRB_SMM &&
++ le32_to_cpu(trb->control) & TRB_CHAIN)
+ transfer_end = true;
+
+ cdns3_ep_inc_deq(priv_ep);
+@@ -1702,6 +1691,7 @@ static int cdns3_check_ep_interrupt_proceed(struct cdns3_endpoint *priv_ep)
+ ep_cfg &= ~EP_CFG_ENABLE;
+ writel(ep_cfg, &priv_dev->regs->ep_cfg);
+ priv_ep->flags &= ~EP_QUIRK_ISO_OUT_EN;
++ priv_ep->flags |= EP_UPDATE_EP_TRBADDR;
+ }
+ cdns3_transfer_completed(priv_dev, priv_ep);
+ } else if (!(priv_ep->flags & EP_STALLED) &&
+@@ -2050,7 +2040,7 @@ int cdns3_ep_config(struct cdns3_endpoint *priv_ep, bool enable)
+ u8 mult = 0;
+ int ret;
+
+- buffering = CDNS3_EP_BUF_SIZE - 1;
++ buffering = priv_dev->ep_buf_size - 1;
+
+ cdns3_configure_dmult(priv_dev, priv_ep);
+
+@@ -2069,7 +2059,7 @@ int cdns3_ep_config(struct cdns3_endpoint *priv_ep, bool enable)
+ break;
+ default:
+ ep_cfg = EP_CFG_EPTYPE(USB_ENDPOINT_XFER_ISOC);
+- mult = CDNS3_EP_ISO_HS_MULT - 1;
++ mult = priv_dev->ep_iso_burst - 1;
+ buffering = mult + 1;
+ }
+
+@@ -2085,14 +2075,14 @@ int cdns3_ep_config(struct cdns3_endpoint *priv_ep, bool enable)
+ mult = 0;
+ max_packet_size = 1024;
+ if (priv_ep->type == USB_ENDPOINT_XFER_ISOC) {
+- maxburst = CDNS3_EP_ISO_SS_BURST - 1;
++ maxburst = priv_dev->ep_iso_burst - 1;
+ buffering = (mult + 1) *
+ (maxburst + 1);
+
+ if (priv_ep->interval > 1)
+ buffering++;
+ } else {
+- maxburst = CDNS3_EP_BUF_SIZE - 1;
++ maxburst = priv_dev->ep_buf_size - 1;
+ }
+ break;
+ default:
+@@ -2107,6 +2097,23 @@ int cdns3_ep_config(struct cdns3_endpoint *priv_ep, bool enable)
+ else
+ priv_ep->trb_burst_size = 16;
+
++ /*
++ * In versions preceding DEV_VER_V2, for example, iMX8QM, there exit the bugs
++ * in the DMA. These bugs occur when the trb_burst_size exceeds 16 and the
++ * address is not aligned to 128 Bytes (which is a product of the 64-bit AXI
++ * and AXI maximum burst length of 16 or 0xF+1, dma_axi_ctrl0[3:0]). This
++ * results in data corruption when it crosses the 4K border. The corruption
++ * specifically occurs from the position (4K - (address & 0x7F)) to 4K.
++ *
++ * So force trb_burst_size to 16 at such platform.
++ */
++ if (priv_dev->dev_ver < DEV_VER_V2)
++ priv_ep->trb_burst_size = 16;
++
++ mult = min_t(u8, mult, EP_CFG_MULT_MAX);
++ buffering = min_t(u8, buffering, EP_CFG_BUFFERING_MAX);
++ maxburst = min_t(u8, maxburst, EP_CFG_MAXBURST_MAX);
++
+ /* onchip buffer is only allocated before configuration */
+ if (!priv_dev->hw_configured_flag) {
+ ret = cdns3_ep_onchip_buffer_reserve(priv_dev, buffering + 1,
+@@ -2292,11 +2299,16 @@ static int cdns3_gadget_ep_enable(struct usb_ep *ep,
+ int ret = 0;
+ int val;
+
++ if (!ep) {
++ pr_debug("usbss: ep not configured?\n");
++ return -EINVAL;
++ }
++
+ priv_ep = ep_to_cdns3_ep(ep);
+ priv_dev = priv_ep->cdns3_dev;
+ comp_desc = priv_ep->endpoint.comp_desc;
+
+- if (!ep || !desc || desc->bDescriptorType != USB_DT_ENDPOINT) {
++ if (!desc || desc->bDescriptorType != USB_DT_ENDPOINT) {
+ dev_dbg(priv_dev->dev, "usbss: invalid parameters\n");
+ return -EINVAL;
+ }
+@@ -2608,17 +2620,20 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
+ struct usb_request *request)
+ {
+ struct cdns3_endpoint *priv_ep = ep_to_cdns3_ep(ep);
+- struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
++ struct cdns3_device *priv_dev;
+ struct usb_request *req, *req_temp;
+ struct cdns3_request *priv_req;
+ struct cdns3_trb *link_trb;
+ u8 req_on_hw_ring = 0;
+ unsigned long flags;
+ int ret = 0;
++ int val;
+
+ if (!ep || !request || !ep->desc)
+ return -EINVAL;
+
++ priv_dev = priv_ep->cdns3_dev;
++
+ spin_lock_irqsave(&priv_dev->lock, flags);
+
+ priv_req = to_cdns3_request(request);
+@@ -2648,6 +2663,13 @@ found:
+
+ /* Update ring only if removed request is on pending_req_list list */
+ if (req_on_hw_ring && link_trb) {
++ /* Stop DMA */
++ writel(EP_CMD_DFLUSH, &priv_dev->regs->ep_cmd);
++
++ /* wait for DFLUSH cleared */
++ readl_poll_timeout_atomic(&priv_dev->regs->ep_cmd, val,
++ !(val & EP_CMD_DFLUSH), 1, 1000);
++
+ link_trb->buffer = cpu_to_le32(TRB_BUFFER(priv_ep->trb_pool_dma +
+ ((priv_req->end_trb + 1) * TRB_SIZE)));
+ link_trb->control = cpu_to_le32((le32_to_cpu(link_trb->control) & TRB_CYCLE) |
+@@ -2659,6 +2681,10 @@ found:
+
+ cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET);
+
++ req = cdns3_next_request(&priv_ep->pending_req_list);
++ if (req)
++ cdns3_rearm_transfer(priv_ep, 1);
++
+ not_found:
+ spin_unlock_irqrestore(&priv_dev->lock, flags);
+ return ret;
+@@ -2696,6 +2722,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
+ struct usb_request *request;
+ struct cdns3_request *priv_req;
+ struct cdns3_trb *trb = NULL;
++ struct cdns3_trb trb_tmp;
+ int ret;
+ int val;
+
+@@ -2705,8 +2732,10 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
+ if (request) {
+ priv_req = to_cdns3_request(request);
+ trb = priv_req->trb;
+- if (trb)
++ if (trb) {
++ trb_tmp = *trb;
+ trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE);
++ }
+ }
+
+ writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd);
+@@ -2721,7 +2750,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
+
+ if (request) {
+ if (trb)
+- trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE);
++ *trb = trb_tmp;
+
+ cdns3_rearm_transfer(priv_ep, 1);
+ }
+@@ -2970,6 +2999,42 @@ static int cdns3_gadget_udc_stop(struct usb_gadget *gadget)
+ return 0;
+ }
+
++/**
++ * cdns3_gadget_check_config - ensure cdns3 can support the USB configuration
++ * @gadget: pointer to the USB gadget
++ *
++ * Used to record the maximum number of endpoints being used in a USB composite
++ * device. (across all configurations) This is to be used in the calculation
++ * of the TXFIFO sizes when resizing internal memory for individual endpoints.
++ * It will help ensured that the resizing logic reserves enough space for at
++ * least one max packet.
++ */
++static int cdns3_gadget_check_config(struct usb_gadget *gadget)
++{
++ struct cdns3_device *priv_dev = gadget_to_cdns3_device(gadget);
++ struct cdns3_endpoint *priv_ep;
++ struct usb_ep *ep;
++ int n_in = 0;
++ int total;
++
++ list_for_each_entry(ep, &gadget->ep_list, ep_list) {
++ priv_ep = ep_to_cdns3_ep(ep);
++ if ((priv_ep->flags & EP_CLAIMED) && (ep->address & USB_DIR_IN))
++ n_in++;
++ }
++
++ /* 2KB are reserved for EP0, 1KB for out*/
++ total = 2 + n_in + 1;
++
++ if (total > priv_dev->onchip_buffers)
++ return -ENOMEM;
++
++ priv_dev->ep_buf_size = priv_dev->ep_iso_burst =
++ (priv_dev->onchip_buffers - 2) / (n_in + 1);
++
++ return 0;
++}
++
+ static const struct usb_gadget_ops cdns3_gadget_ops = {
+ .get_frame = cdns3_gadget_get_frame,
+ .wakeup = cdns3_gadget_wakeup,
+@@ -2978,6 +3043,7 @@ static const struct usb_gadget_ops cdns3_gadget_ops = {
+ .udc_start = cdns3_gadget_udc_start,
+ .udc_stop = cdns3_gadget_udc_stop,
+ .match_ep = cdns3_gadget_match_ep,
++ .check_config = cdns3_gadget_check_config,
+ };
+
+ static void cdns3_free_all_eps(struct cdns3_device *priv_dev)
+diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-gadget.h
+index c5660f2c4293f..fbe4a8e3aa897 100644
+--- a/drivers/usb/cdns3/cdns3-gadget.h
++++ b/drivers/usb/cdns3/cdns3-gadget.h
+@@ -562,15 +562,18 @@ struct cdns3_usb_regs {
+ /* Max burst size (used only in SS mode). */
+ #define EP_CFG_MAXBURST_MASK GENMASK(11, 8)
+ #define EP_CFG_MAXBURST(p) (((p) << 8) & EP_CFG_MAXBURST_MASK)
++#define EP_CFG_MAXBURST_MAX 15
+ /* ISO max burst. */
+ #define EP_CFG_MULT_MASK GENMASK(15, 14)
+ #define EP_CFG_MULT(p) (((p) << 14) & EP_CFG_MULT_MASK)
++#define EP_CFG_MULT_MAX 2
+ /* ISO max burst. */
+ #define EP_CFG_MAXPKTSIZE_MASK GENMASK(26, 16)
+ #define EP_CFG_MAXPKTSIZE(p) (((p) << 16) & EP_CFG_MAXPKTSIZE_MASK)
+ /* Max number of buffered packets. */
+ #define EP_CFG_BUFFERING_MASK GENMASK(31, 27)
+ #define EP_CFG_BUFFERING(p) (((p) << 27) & EP_CFG_BUFFERING_MASK)
++#define EP_CFG_BUFFERING_MAX 15
+
+ /* EP_CMD - bitmasks */
+ /* Endpoint reset. */
+@@ -1094,9 +1097,6 @@ struct cdns3_trb {
+ #define CDNS3_ENDPOINTS_MAX_COUNT 32
+ #define CDNS3_EP_ZLP_BUF_SIZE 1024
+
+-#define CDNS3_EP_BUF_SIZE 4 /* KB */
+-#define CDNS3_EP_ISO_HS_MULT 3
+-#define CDNS3_EP_ISO_SS_BURST 3
+ #define CDNS3_MAX_NUM_DESCMISS_BUF 32
+ #define CDNS3_DESCMIS_BUF_SIZE 2048 /* Bytes */
+ #define CDNS3_WA2_NUM_BUFFERS 128
+@@ -1333,6 +1333,9 @@ struct cdns3_device {
+ /*in KB */
+ u16 onchip_buffers;
+ u16 onchip_used_size;
++
++ u16 ep_buf_size;
++ u16 ep_iso_burst;
+ };
+
+ void cdns3_set_register_bit(void __iomem *ptr, u32 mask);
+diff --git a/drivers/usb/cdns3/cdns3-pci-wrap.c b/drivers/usb/cdns3/cdns3-pci-wrap.c
+index deeea618ba33b..1f6320d98a76b 100644
+--- a/drivers/usb/cdns3/cdns3-pci-wrap.c
++++ b/drivers/usb/cdns3/cdns3-pci-wrap.c
+@@ -60,6 +60,11 @@ static struct pci_dev *cdns3_get_second_fun(struct pci_dev *pdev)
+ return NULL;
+ }
+
++ if (func->devfn != PCI_DEV_FN_HOST_DEVICE &&
++ func->devfn != PCI_DEV_FN_OTG) {
++ return NULL;
++ }
++
+ return func;
+ }
+
+diff --git a/drivers/usb/cdns3/cdnsp-debug.h b/drivers/usb/cdns3/cdnsp-debug.h
+index a8776df2d4e0c..f0ca865cce2a0 100644
+--- a/drivers/usb/cdns3/cdnsp-debug.h
++++ b/drivers/usb/cdns3/cdnsp-debug.h
+@@ -182,208 +182,211 @@ static inline const char *cdnsp_decode_trb(char *str, size_t size, u32 field0,
+ int ep_id = TRB_TO_EP_INDEX(field3) - 1;
+ int type = TRB_FIELD_TO_TYPE(field3);
+ unsigned int ep_num;
+- int ret = 0;
++ int ret;
+ u32 temp;
+
+ ep_num = DIV_ROUND_UP(ep_id, 2);
+
+ switch (type) {
+ case TRB_LINK:
+- ret += snprintf(str, size,
+- "LINK %08x%08x intr %ld type '%s' flags %c:%c:%c:%c",
+- field1, field0, GET_INTR_TARGET(field2),
+- cdnsp_trb_type_string(type),
+- field3 & TRB_IOC ? 'I' : 'i',
+- field3 & TRB_CHAIN ? 'C' : 'c',
+- field3 & TRB_TC ? 'T' : 't',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "LINK %08x%08x intr %ld type '%s' flags %c:%c:%c:%c",
++ field1, field0, GET_INTR_TARGET(field2),
++ cdnsp_trb_type_string(type),
++ field3 & TRB_IOC ? 'I' : 'i',
++ field3 & TRB_CHAIN ? 'C' : 'c',
++ field3 & TRB_TC ? 'T' : 't',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_TRANSFER:
+ case TRB_COMPLETION:
+ case TRB_PORT_STATUS:
+ case TRB_HC_EVENT:
+- ret += snprintf(str, size,
+- "ep%d%s(%d) type '%s' TRB %08x%08x status '%s'"
+- " len %ld slot %ld flags %c:%c",
+- ep_num, ep_id % 2 ? "out" : "in",
+- TRB_TO_EP_INDEX(field3),
+- cdnsp_trb_type_string(type), field1, field0,
+- cdnsp_trb_comp_code_string(GET_COMP_CODE(field2)),
+- EVENT_TRB_LEN(field2), TRB_TO_SLOT_ID(field3),
+- field3 & EVENT_DATA ? 'E' : 'e',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "ep%d%s(%d) type '%s' TRB %08x%08x status '%s'"
++ " len %ld slot %ld flags %c:%c",
++ ep_num, ep_id % 2 ? "out" : "in",
++ TRB_TO_EP_INDEX(field3),
++ cdnsp_trb_type_string(type), field1, field0,
++ cdnsp_trb_comp_code_string(GET_COMP_CODE(field2)),
++ EVENT_TRB_LEN(field2), TRB_TO_SLOT_ID(field3),
++ field3 & EVENT_DATA ? 'E' : 'e',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_MFINDEX_WRAP:
+- ret += snprintf(str, size, "%s: flags %c",
+- cdnsp_trb_type_string(type),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size, "%s: flags %c",
++ cdnsp_trb_type_string(type),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_SETUP:
+- ret += snprintf(str, size,
+- "type '%s' bRequestType %02x bRequest %02x "
+- "wValue %02x%02x wIndex %02x%02x wLength %d "
+- "length %ld TD size %ld intr %ld Setup ID %ld "
+- "flags %c:%c:%c",
+- cdnsp_trb_type_string(type),
+- field0 & 0xff,
+- (field0 & 0xff00) >> 8,
+- (field0 & 0xff000000) >> 24,
+- (field0 & 0xff0000) >> 16,
+- (field1 & 0xff00) >> 8,
+- field1 & 0xff,
+- (field1 & 0xff000000) >> 16 |
+- (field1 & 0xff0000) >> 16,
+- TRB_LEN(field2), GET_TD_SIZE(field2),
+- GET_INTR_TARGET(field2),
+- TRB_SETUPID_TO_TYPE(field3),
+- field3 & TRB_IDT ? 'D' : 'd',
+- field3 & TRB_IOC ? 'I' : 'i',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "type '%s' bRequestType %02x bRequest %02x "
++ "wValue %02x%02x wIndex %02x%02x wLength %d "
++ "length %ld TD size %ld intr %ld Setup ID %ld "
++ "flags %c:%c:%c",
++ cdnsp_trb_type_string(type),
++ field0 & 0xff,
++ (field0 & 0xff00) >> 8,
++ (field0 & 0xff000000) >> 24,
++ (field0 & 0xff0000) >> 16,
++ (field1 & 0xff00) >> 8,
++ field1 & 0xff,
++ (field1 & 0xff000000) >> 16 |
++ (field1 & 0xff0000) >> 16,
++ TRB_LEN(field2), GET_TD_SIZE(field2),
++ GET_INTR_TARGET(field2),
++ TRB_SETUPID_TO_TYPE(field3),
++ field3 & TRB_IDT ? 'D' : 'd',
++ field3 & TRB_IOC ? 'I' : 'i',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_DATA:
+- ret += snprintf(str, size,
+- "type '%s' Buffer %08x%08x length %ld TD size %ld "
+- "intr %ld flags %c:%c:%c:%c:%c:%c:%c",
+- cdnsp_trb_type_string(type),
+- field1, field0, TRB_LEN(field2),
+- GET_TD_SIZE(field2),
+- GET_INTR_TARGET(field2),
+- field3 & TRB_IDT ? 'D' : 'i',
+- field3 & TRB_IOC ? 'I' : 'i',
+- field3 & TRB_CHAIN ? 'C' : 'c',
+- field3 & TRB_NO_SNOOP ? 'S' : 's',
+- field3 & TRB_ISP ? 'I' : 'i',
+- field3 & TRB_ENT ? 'E' : 'e',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "type '%s' Buffer %08x%08x length %ld TD size %ld "
++ "intr %ld flags %c:%c:%c:%c:%c:%c:%c",
++ cdnsp_trb_type_string(type),
++ field1, field0, TRB_LEN(field2),
++ GET_TD_SIZE(field2),
++ GET_INTR_TARGET(field2),
++ field3 & TRB_IDT ? 'D' : 'i',
++ field3 & TRB_IOC ? 'I' : 'i',
++ field3 & TRB_CHAIN ? 'C' : 'c',
++ field3 & TRB_NO_SNOOP ? 'S' : 's',
++ field3 & TRB_ISP ? 'I' : 'i',
++ field3 & TRB_ENT ? 'E' : 'e',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_STATUS:
+- ret += snprintf(str, size,
+- "Buffer %08x%08x length %ld TD size %ld intr"
+- "%ld type '%s' flags %c:%c:%c:%c",
+- field1, field0, TRB_LEN(field2),
+- GET_TD_SIZE(field2),
+- GET_INTR_TARGET(field2),
+- cdnsp_trb_type_string(type),
+- field3 & TRB_IOC ? 'I' : 'i',
+- field3 & TRB_CHAIN ? 'C' : 'c',
+- field3 & TRB_ENT ? 'E' : 'e',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "Buffer %08x%08x length %ld TD size %ld intr"
++ "%ld type '%s' flags %c:%c:%c:%c",
++ field1, field0, TRB_LEN(field2),
++ GET_TD_SIZE(field2),
++ GET_INTR_TARGET(field2),
++ cdnsp_trb_type_string(type),
++ field3 & TRB_IOC ? 'I' : 'i',
++ field3 & TRB_CHAIN ? 'C' : 'c',
++ field3 & TRB_ENT ? 'E' : 'e',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_NORMAL:
+ case TRB_ISOC:
+ case TRB_EVENT_DATA:
+ case TRB_TR_NOOP:
+- ret += snprintf(str, size,
+- "type '%s' Buffer %08x%08x length %ld "
+- "TD size %ld intr %ld "
+- "flags %c:%c:%c:%c:%c:%c:%c:%c:%c",
+- cdnsp_trb_type_string(type),
+- field1, field0, TRB_LEN(field2),
+- GET_TD_SIZE(field2),
+- GET_INTR_TARGET(field2),
+- field3 & TRB_BEI ? 'B' : 'b',
+- field3 & TRB_IDT ? 'T' : 't',
+- field3 & TRB_IOC ? 'I' : 'i',
+- field3 & TRB_CHAIN ? 'C' : 'c',
+- field3 & TRB_NO_SNOOP ? 'S' : 's',
+- field3 & TRB_ISP ? 'I' : 'i',
+- field3 & TRB_ENT ? 'E' : 'e',
+- field3 & TRB_CYCLE ? 'C' : 'c',
+- !(field3 & TRB_EVENT_INVALIDATE) ? 'V' : 'v');
++ ret = snprintf(str, size,
++ "type '%s' Buffer %08x%08x length %ld "
++ "TD size %ld intr %ld "
++ "flags %c:%c:%c:%c:%c:%c:%c:%c:%c",
++ cdnsp_trb_type_string(type),
++ field1, field0, TRB_LEN(field2),
++ GET_TD_SIZE(field2),
++ GET_INTR_TARGET(field2),
++ field3 & TRB_BEI ? 'B' : 'b',
++ field3 & TRB_IDT ? 'T' : 't',
++ field3 & TRB_IOC ? 'I' : 'i',
++ field3 & TRB_CHAIN ? 'C' : 'c',
++ field3 & TRB_NO_SNOOP ? 'S' : 's',
++ field3 & TRB_ISP ? 'I' : 'i',
++ field3 & TRB_ENT ? 'E' : 'e',
++ field3 & TRB_CYCLE ? 'C' : 'c',
++ !(field3 & TRB_EVENT_INVALIDATE) ? 'V' : 'v');
+ break;
+ case TRB_CMD_NOOP:
+ case TRB_ENABLE_SLOT:
+- ret += snprintf(str, size, "%s: flags %c",
+- cdnsp_trb_type_string(type),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size, "%s: flags %c",
++ cdnsp_trb_type_string(type),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_DISABLE_SLOT:
+- ret += snprintf(str, size, "%s: slot %ld flags %c",
+- cdnsp_trb_type_string(type),
+- TRB_TO_SLOT_ID(field3),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size, "%s: slot %ld flags %c",
++ cdnsp_trb_type_string(type),
++ TRB_TO_SLOT_ID(field3),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_ADDR_DEV:
+- ret += snprintf(str, size,
+- "%s: ctx %08x%08x slot %ld flags %c:%c",
+- cdnsp_trb_type_string(type), field1, field0,
+- TRB_TO_SLOT_ID(field3),
+- field3 & TRB_BSR ? 'B' : 'b',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "%s: ctx %08x%08x slot %ld flags %c:%c",
++ cdnsp_trb_type_string(type), field1, field0,
++ TRB_TO_SLOT_ID(field3),
++ field3 & TRB_BSR ? 'B' : 'b',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_CONFIG_EP:
+- ret += snprintf(str, size,
+- "%s: ctx %08x%08x slot %ld flags %c:%c",
+- cdnsp_trb_type_string(type), field1, field0,
+- TRB_TO_SLOT_ID(field3),
+- field3 & TRB_DC ? 'D' : 'd',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "%s: ctx %08x%08x slot %ld flags %c:%c",
++ cdnsp_trb_type_string(type), field1, field0,
++ TRB_TO_SLOT_ID(field3),
++ field3 & TRB_DC ? 'D' : 'd',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_EVAL_CONTEXT:
+- ret += snprintf(str, size,
+- "%s: ctx %08x%08x slot %ld flags %c",
+- cdnsp_trb_type_string(type), field1, field0,
+- TRB_TO_SLOT_ID(field3),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "%s: ctx %08x%08x slot %ld flags %c",
++ cdnsp_trb_type_string(type), field1, field0,
++ TRB_TO_SLOT_ID(field3),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_RESET_EP:
+ case TRB_HALT_ENDPOINT:
+ case TRB_FLUSH_ENDPOINT:
+- ret += snprintf(str, size,
+- "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c",
+- cdnsp_trb_type_string(type),
+- ep_num, ep_id % 2 ? "out" : "in",
+- TRB_TO_EP_INDEX(field3), field1, field0,
+- TRB_TO_SLOT_ID(field3),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c",
++ cdnsp_trb_type_string(type),
++ ep_num, ep_id % 2 ? "out" : "in",
++ TRB_TO_EP_INDEX(field3), field1, field0,
++ TRB_TO_SLOT_ID(field3),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_STOP_RING:
+- ret += snprintf(str, size,
+- "%s: ep%d%s(%d) slot %ld sp %d flags %c",
+- cdnsp_trb_type_string(type),
+- ep_num, ep_id % 2 ? "out" : "in",
+- TRB_TO_EP_INDEX(field3),
+- TRB_TO_SLOT_ID(field3),
+- TRB_TO_SUSPEND_PORT(field3),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "%s: ep%d%s(%d) slot %ld sp %d flags %c",
++ cdnsp_trb_type_string(type),
++ ep_num, ep_id % 2 ? "out" : "in",
++ TRB_TO_EP_INDEX(field3),
++ TRB_TO_SLOT_ID(field3),
++ TRB_TO_SUSPEND_PORT(field3),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_SET_DEQ:
+- ret += snprintf(str, size,
+- "%s: ep%d%s(%d) deq %08x%08x stream %ld slot %ld flags %c",
+- cdnsp_trb_type_string(type),
+- ep_num, ep_id % 2 ? "out" : "in",
+- TRB_TO_EP_INDEX(field3), field1, field0,
+- TRB_TO_STREAM_ID(field2),
+- TRB_TO_SLOT_ID(field3),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size,
++ "%s: ep%d%s(%d) deq %08x%08x stream %ld slot %ld flags %c",
++ cdnsp_trb_type_string(type),
++ ep_num, ep_id % 2 ? "out" : "in",
++ TRB_TO_EP_INDEX(field3), field1, field0,
++ TRB_TO_STREAM_ID(field2),
++ TRB_TO_SLOT_ID(field3),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_RESET_DEV:
+- ret += snprintf(str, size, "%s: slot %ld flags %c",
+- cdnsp_trb_type_string(type),
+- TRB_TO_SLOT_ID(field3),
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ ret = snprintf(str, size, "%s: slot %ld flags %c",
++ cdnsp_trb_type_string(type),
++ TRB_TO_SLOT_ID(field3),
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_ENDPOINT_NRDY:
+- temp = TRB_TO_HOST_STREAM(field2);
+-
+- ret += snprintf(str, size,
+- "%s: ep%d%s(%d) H_SID %x%s%s D_SID %lx flags %c:%c",
+- cdnsp_trb_type_string(type),
+- ep_num, ep_id % 2 ? "out" : "in",
+- TRB_TO_EP_INDEX(field3), temp,
+- temp == STREAM_PRIME_ACK ? "(PRIME)" : "",
+- temp == STREAM_REJECTED ? "(REJECTED)" : "",
+- TRB_TO_DEV_STREAM(field0),
+- field3 & TRB_STAT ? 'S' : 's',
+- field3 & TRB_CYCLE ? 'C' : 'c');
++ temp = TRB_TO_HOST_STREAM(field2);
++
++ ret = snprintf(str, size,
++ "%s: ep%d%s(%d) H_SID %x%s%s D_SID %lx flags %c:%c",
++ cdnsp_trb_type_string(type),
++ ep_num, ep_id % 2 ? "out" : "in",
++ TRB_TO_EP_INDEX(field3), temp,
++ temp == STREAM_PRIME_ACK ? "(PRIME)" : "",
++ temp == STREAM_REJECTED ? "(REJECTED)" : "",
++ TRB_TO_DEV_STREAM(field0),
++ field3 & TRB_STAT ? 'S' : 's',
++ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ default:
+- ret += snprintf(str, size,
+- "type '%s' -> raw %08x %08x %08x %08x",
+- cdnsp_trb_type_string(type),
+- field0, field1, field2, field3);
++ ret = snprintf(str, size,
++ "type '%s' -> raw %08x %08x %08x %08x",
++ cdnsp_trb_type_string(type),
++ field0, field1, field2, field3);
+ }
+
++ if (ret >= size)
++ pr_info("CDNSP: buffer overflowed.\n");
++
+ return str;
+ }
+
+diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c
+index 9b8325f824992..f317d3c847810 100644
+--- a/drivers/usb/cdns3/cdnsp-ep0.c
++++ b/drivers/usb/cdns3/cdnsp-ep0.c
+@@ -403,20 +403,6 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev,
+ case USB_REQ_SET_ISOCH_DELAY:
+ ret = cdnsp_ep0_set_isoch_delay(pdev, ctrl);
+ break;
+- case USB_REQ_SET_INTERFACE:
+- /*
+- * Add request into pending list to block sending status stage
+- * by libcomposite.
+- */
+- list_add_tail(&pdev->ep0_preq.list,
+- &pdev->ep0_preq.pep->pending_list);
+-
+- ret = cdnsp_ep0_delegate_req(pdev, ctrl);
+- if (ret == -EBUSY)
+- ret = 0;
+-
+- list_del(&pdev->ep0_preq.list);
+- break;
+ default:
+ ret = cdnsp_ep0_delegate_req(pdev, ctrl);
+ break;
+@@ -428,7 +414,7 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev,
+ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
+ {
+ struct usb_ctrlrequest *ctrl = &pdev->setup;
+- int ret = 0;
++ int ret = -EINVAL;
+ u16 len;
+
+ trace_cdnsp_ctrl_req(ctrl);
+@@ -438,7 +424,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
+
+ if (pdev->gadget.state == USB_STATE_NOTATTACHED) {
+ dev_err(pdev->dev, "ERR: Setup detected in unattached state\n");
+- ret = -EINVAL;
+ goto out;
+ }
+
+@@ -474,9 +459,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
+ else
+ ret = cdnsp_ep0_delegate_req(pdev, ctrl);
+
+- if (!len)
+- pdev->ep0_stage = CDNSP_STATUS_STAGE;
+-
+ if (ret == USB_GADGET_DELAYED_STATUS) {
+ trace_cdnsp_ep0_status_stage("delayed");
+ return;
+@@ -484,6 +466,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev)
+ out:
+ if (ret < 0)
+ cdnsp_ep0_stall(pdev);
+- else if (pdev->ep0_stage == CDNSP_STATUS_STAGE)
++ else if (!len && pdev->ep0_stage != CDNSP_STATUS_STAGE)
+ cdnsp_status_stage(pdev);
+ }
+diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c
+index 27df0c6978978..068ccbd144b24 100644
+--- a/drivers/usb/cdns3/cdnsp-gadget.c
++++ b/drivers/usb/cdns3/cdnsp-gadget.c
+@@ -600,11 +600,11 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev,
+
+ trace_cdnsp_ep_halt(value ? "Set" : "Clear");
+
+- if (value) {
+- ret = cdnsp_cmd_stop_ep(pdev, pep);
+- if (ret)
+- return ret;
++ ret = cdnsp_cmd_stop_ep(pdev, pep);
++ if (ret)
++ return ret;
+
++ if (value) {
+ if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_STOPPED) {
+ cdnsp_queue_halt_endpoint(pdev, pep->idx);
+ cdnsp_ring_cmd_db(pdev);
+@@ -613,10 +613,6 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev,
+
+ pep->ep_state |= EP_HALTED;
+ } else {
+- /*
+- * In device mode driver can call reset endpoint command
+- * from any endpoint state.
+- */
+ cdnsp_queue_reset_ep(pdev, pep->idx);
+ cdnsp_ring_cmd_db(pdev);
+ ret = cdnsp_wait_for_cmd_compl(pdev);
+@@ -1541,15 +1537,27 @@ static int cdnsp_gadget_pullup(struct usb_gadget *gadget, int is_on)
+ {
+ struct cdnsp_device *pdev = gadget_to_cdnsp(gadget);
+ struct cdns *cdns = dev_get_drvdata(pdev->dev);
++ unsigned long flags;
+
+ trace_cdnsp_pullup(is_on);
+
++ /*
++ * Disable events handling while controller is being
++ * enabled/disabled.
++ */
++ disable_irq(cdns->dev_irq);
++ spin_lock_irqsave(&pdev->lock, flags);
++
+ if (!is_on) {
+ cdnsp_reset_device(pdev);
+ cdns_clear_vbus(cdns);
+ } else {
+ cdns_set_vbus(cdns);
+ }
++
++ spin_unlock_irqrestore(&pdev->lock, flags);
++ enable_irq(cdns->dev_irq);
++
+ return 0;
+ }
+
+diff --git a/drivers/usb/cdns3/cdnsp-mem.c b/drivers/usb/cdns3/cdnsp-mem.c
+index ad9aee3f1e398..97866bfb2da9d 100644
+--- a/drivers/usb/cdns3/cdnsp-mem.c
++++ b/drivers/usb/cdns3/cdnsp-mem.c
+@@ -987,6 +987,9 @@ int cdnsp_endpoint_init(struct cdnsp_device *pdev,
+
+ /* Set up the endpoint ring. */
+ pep->ring = cdnsp_ring_alloc(pdev, 2, ring_type, max_packet, mem_flags);
++ if (!pep->ring)
++ return -ENOMEM;
++
+ pep->skip = false;
+
+ /* Fill the endpoint context */
+diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c
+index fe8a114c586cc..29f433c5a6f3f 100644
+--- a/drivers/usb/cdns3/cdnsp-pci.c
++++ b/drivers/usb/cdns3/cdnsp-pci.c
+@@ -29,30 +29,23 @@
+ #define PLAT_DRIVER_NAME "cdns-usbssp"
+
+ #define CDNS_VENDOR_ID 0x17cd
+-#define CDNS_DEVICE_ID 0x0100
++#define CDNS_DEVICE_ID 0x0200
++#define CDNS_DRD_ID 0x0100
+ #define CDNS_DRD_IF (PCI_CLASS_SERIAL_USB << 8 | 0x80)
+
+ static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev)
+ {
+- struct pci_dev *func;
+-
+ /*
+ * Gets the second function.
+- * It's little tricky, but this platform has two function.
+- * The fist keeps resources for Host/Device while the second
+- * keeps resources for DRD/OTG.
++ * Platform has two function. The fist keeps resources for
++ * Host/Device while the secon keeps resources for DRD/OTG.
+ */
+- func = pci_get_device(pdev->vendor, pdev->device, NULL);
+- if (!func)
+- return NULL;
++ if (pdev->device == CDNS_DEVICE_ID)
++ return pci_get_device(pdev->vendor, CDNS_DRD_ID, NULL);
++ else if (pdev->device == CDNS_DRD_ID)
++ return pci_get_device(pdev->vendor, CDNS_DEVICE_ID, NULL);
+
+- if (func->devfn == pdev->devfn) {
+- func = pci_get_device(pdev->vendor, pdev->device, func);
+- if (!func)
+- return NULL;
+- }
+-
+- return func;
++ return NULL;
+ }
+
+ static int cdnsp_pci_probe(struct pci_dev *pdev,
+@@ -232,6 +225,8 @@ static const struct pci_device_id cdnsp_pci_ids[] = {
+ PCI_CLASS_SERIAL_USB_DEVICE, PCI_ANY_ID },
+ { PCI_VENDOR_ID_CDNS, CDNS_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+ CDNS_DRD_IF, PCI_ANY_ID },
++ { PCI_VENDOR_ID_CDNS, CDNS_DRD_ID, PCI_ANY_ID, PCI_ANY_ID,
++ CDNS_DRD_IF, PCI_ANY_ID },
+ { 0, }
+ };
+
+diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c
+index 1b1438457fb04..b23e543b3a3d5 100644
+--- a/drivers/usb/cdns3/cdnsp-ring.c
++++ b/drivers/usb/cdns3/cdnsp-ring.c
+@@ -1029,6 +1029,8 @@ static void cdnsp_process_ctrl_td(struct cdnsp_device *pdev,
+ return;
+ }
+
++ *status = 0;
++
+ cdnsp_finish_td(pdev, td, event, pep, status);
+ }
+
+@@ -1523,7 +1525,14 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
+ spin_lock_irqsave(&pdev->lock, flags);
+
+ if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) {
+- cdnsp_died(pdev);
++ /*
++ * While removing or stopping driver there may still be deferred
++ * not handled interrupt which should not be treated as error.
++ * Driver should simply ignore it.
++ */
++ if (pdev->gadget_driver)
++ cdnsp_died(pdev);
++
+ spin_unlock_irqrestore(&pdev->lock, flags);
+ return IRQ_HANDLED;
+ }
+@@ -1754,10 +1763,15 @@ static u32 cdnsp_td_remainder(struct cdnsp_device *pdev,
+ int trb_buff_len,
+ unsigned int td_total_len,
+ struct cdnsp_request *preq,
+- bool more_trbs_coming)
++ bool more_trbs_coming,
++ bool zlp)
+ {
+ u32 maxp, total_packet_count;
+
++ /* Before ZLP driver needs set TD_SIZE = 1. */
++ if (zlp)
++ return 1;
++
+ /* One TRB with a zero-length data packet. */
+ if (!more_trbs_coming || (transferred == 0 && trb_buff_len == 0) ||
+ trb_buff_len == td_total_len)
+@@ -1932,13 +1946,16 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
+ }
+
+ if (enqd_len + trb_buff_len >= full_len) {
+- if (need_zero_pkt)
+- zero_len_trb = !zero_len_trb;
+-
+- field &= ~TRB_CHAIN;
+- field |= TRB_IOC;
+- more_trbs_coming = false;
+- preq->td.last_trb = ring->enqueue;
++ if (need_zero_pkt && !zero_len_trb) {
++ zero_len_trb = true;
++ } else {
++ zero_len_trb = false;
++ field &= ~TRB_CHAIN;
++ field |= TRB_IOC;
++ more_trbs_coming = false;
++ need_zero_pkt = false;
++ preq->td.last_trb = ring->enqueue;
++ }
+ }
+
+ /* Only set interrupt on short packet for OUT endpoints. */
+@@ -1948,12 +1965,13 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
+ /* Set the TRB length, TD size, and interrupter fields. */
+ remainder = cdnsp_td_remainder(pdev, enqd_len, trb_buff_len,
+ full_len, preq,
+- more_trbs_coming);
++ more_trbs_coming,
++ zero_len_trb);
+
+ length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) |
+ TRB_INTR_TARGET(0);
+
+- cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb,
++ cdnsp_queue_trb(pdev, ring, more_trbs_coming,
+ lower_32_bits(send_addr),
+ upper_32_bits(send_addr),
+ length_field,
+@@ -1988,10 +2006,11 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
+
+ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
+ {
+- u32 field, length_field, remainder;
++ u32 field, length_field, zlp = 0;
+ struct cdnsp_ep *pep = preq->pep;
+ struct cdnsp_ring *ep_ring;
+ int num_trbs;
++ u32 maxp;
+ int ret;
+
+ ep_ring = cdnsp_request_to_transfer_ring(pdev, preq);
+@@ -2001,26 +2020,33 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
+ /* 1 TRB for data, 1 for status */
+ num_trbs = (pdev->three_stage_setup) ? 2 : 1;
+
++ maxp = usb_endpoint_maxp(pep->endpoint.desc);
++
++ if (preq->request.zero && preq->request.length &&
++ (preq->request.length % maxp == 0)) {
++ num_trbs++;
++ zlp = 1;
++ }
++
+ ret = cdnsp_prepare_transfer(pdev, preq, num_trbs);
+ if (ret)
+ return ret;
+
+ /* If there's data, queue data TRBs */
+- if (pdev->ep0_expect_in)
+- field = TRB_TYPE(TRB_DATA) | TRB_IOC;
+- else
+- field = TRB_ISP | TRB_TYPE(TRB_DATA) | TRB_IOC;
+-
+ if (preq->request.length > 0) {
+- remainder = cdnsp_td_remainder(pdev, 0, preq->request.length,
+- preq->request.length, preq, 1);
++ field = TRB_TYPE(TRB_DATA);
+
+- length_field = TRB_LEN(preq->request.length) |
+- TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0);
++ if (zlp)
++ field |= TRB_CHAIN;
++ else
++ field |= TRB_IOC | (pdev->ep0_expect_in ? 0 : TRB_ISP);
+
+ if (pdev->ep0_expect_in)
+ field |= TRB_DIR_IN;
+
++ length_field = TRB_LEN(preq->request.length) |
++ TRB_TD_SIZE(zlp) | TRB_INTR_TARGET(0);
++
+ cdnsp_queue_trb(pdev, ep_ring, true,
+ lower_32_bits(preq->request.dma),
+ upper_32_bits(preq->request.dma), length_field,
+@@ -2028,6 +2054,20 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
+ TRB_SETUPID(pdev->setup_id) |
+ pdev->setup_speed);
+
++ if (zlp) {
++ field = TRB_TYPE(TRB_NORMAL) | TRB_IOC;
++
++ if (!pdev->ep0_expect_in)
++ field = TRB_ISP;
++
++ cdnsp_queue_trb(pdev, ep_ring, true,
++ lower_32_bits(preq->request.dma),
++ upper_32_bits(preq->request.dma), 0,
++ field | ep_ring->cycle_state |
++ TRB_SETUPID(pdev->setup_id) |
++ pdev->setup_speed);
++ }
++
+ pdev->ep0_stage = CDNSP_DATA_STAGE;
+ }
+
+@@ -2064,7 +2104,8 @@ int cdnsp_cmd_stop_ep(struct cdnsp_device *pdev, struct cdnsp_ep *pep)
+ u32 ep_state = GET_EP_CTX_STATE(pep->out_ctx);
+ int ret = 0;
+
+- if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED) {
++ if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED ||
++ ep_state == EP_STATE_HALTED) {
+ trace_cdnsp_ep_stopped_or_disabled(pep->out_ctx);
+ goto ep_stopped;
+ }
+@@ -2213,7 +2254,7 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev,
+ /* Set the TRB length, TD size, & interrupter fields. */
+ remainder = cdnsp_td_remainder(pdev, running_total,
+ trb_buff_len, td_len, preq,
+- more_trbs_coming);
++ more_trbs_coming, 0);
+
+ length_field = TRB_LEN(trb_buff_len) | TRB_INTR_TARGET(0);
+
+diff --git a/drivers/usb/cdns3/cdnsp-trace.h b/drivers/usb/cdns3/cdnsp-trace.h
+index 6a2571c6aa9ed..5983dfb996537 100644
+--- a/drivers/usb/cdns3/cdnsp-trace.h
++++ b/drivers/usb/cdns3/cdnsp-trace.h
+@@ -57,9 +57,9 @@ DECLARE_EVENT_CLASS(cdnsp_log_ep,
+ __entry->first_prime_det = pep->stream_info.first_prime_det;
+ __entry->drbls_count = pep->stream_info.drbls_count;
+ ),
+- TP_printk("%s: SID: %08x ep state: %x stream: enabled: %d num %d "
++ TP_printk("%s: SID: %08x, ep state: %x, stream: enabled: %d num %d "
+ "tds %d, first prime: %d drbls %d",
+- __get_str(name), __entry->state, __entry->stream_id,
++ __get_str(name), __entry->stream_id, __entry->state,
+ __entry->enabled, __entry->num_streams, __entry->td_count,
+ __entry->first_prime_det, __entry->drbls_count)
+ );
+diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c
+index 55c73b1d87047..d00ff98dffabf 100644
+--- a/drivers/usb/cdns3/drd.c
++++ b/drivers/usb/cdns3/drd.c
+@@ -483,11 +483,11 @@ int cdns_drd_exit(struct cdns *cdns)
+ /* Indicate the cdns3 core was power lost before */
+ bool cdns_power_is_lost(struct cdns *cdns)
+ {
+- if (cdns->version == CDNS3_CONTROLLER_V1) {
+- if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
++ if (cdns->version == CDNS3_CONTROLLER_V0) {
++ if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
+ return true;
+ } else {
+- if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0)))
++ if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0)))
+ return true;
+ }
+ return false;
+diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c
+index 84dadfa726aa6..3e85b5d3cf7a1 100644
+--- a/drivers/usb/cdns3/host.c
++++ b/drivers/usb/cdns3/host.c
+@@ -23,11 +23,37 @@
+ #define CFG_RXDET_P3_EN BIT(15)
+ #define LPM_2_STB_SWITCH_EN BIT(25)
+
+-static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd);
++static void xhci_cdns3_plat_start(struct usb_hcd *hcd)
++{
++ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
++ u32 value;
++
++ /* set usbcmd.EU3S */
++ value = readl(&xhci->op_regs->command);
++ value |= CMD_PM_INDEX;
++ writel(value, &xhci->op_regs->command);
++
++ if (hcd->regs) {
++ value = readl(hcd->regs + XECP_AUX_CTRL_REG1);
++ value |= CFG_RXDET_P3_EN;
++ writel(value, hcd->regs + XECP_AUX_CTRL_REG1);
++
++ value = readl(hcd->regs + XECP_PORT_CAP_REG);
++ value |= LPM_2_STB_SWITCH_EN;
++ writel(value, hcd->regs + XECP_PORT_CAP_REG);
++ }
++}
++
++static int xhci_cdns3_resume_quirk(struct usb_hcd *hcd)
++{
++ xhci_cdns3_plat_start(hcd);
++ return 0;
++}
+
+ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = {
+ .quirks = XHCI_SKIP_PHY_INIT | XHCI_AVOID_BEI,
+- .suspend_quirk = xhci_cdns3_suspend_quirk,
++ .plat_start = xhci_cdns3_plat_start,
++ .resume_quirk = xhci_cdns3_resume_quirk,
+ };
+
+ static int __cdns_host_init(struct cdns *cdns)
+@@ -89,32 +115,6 @@ err1:
+ return ret;
+ }
+
+-static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd)
+-{
+- struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+- u32 value;
+-
+- if (pm_runtime_status_suspended(hcd->self.controller))
+- return 0;
+-
+- /* set usbcmd.EU3S */
+- value = readl(&xhci->op_regs->command);
+- value |= CMD_PM_INDEX;
+- writel(value, &xhci->op_regs->command);
+-
+- if (hcd->regs) {
+- value = readl(hcd->regs + XECP_AUX_CTRL_REG1);
+- value |= CFG_RXDET_P3_EN;
+- writel(value, hcd->regs + XECP_AUX_CTRL_REG1);
+-
+- value = readl(hcd->regs + XECP_PORT_CAP_REG);
+- value |= LPM_2_STB_SWITCH_EN;
+- writel(value, hcd->regs + XECP_PORT_CAP_REG);
+- }
+-
+- return 0;
+-}
+-
+ static void cdns_host_exit(struct cdns *cdns)
+ {
+ kfree(cdns->xhci_plat_data);
+diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h
+index 99440baa6458a..50e37846f0378 100644
+--- a/drivers/usb/chipidea/ci.h
++++ b/drivers/usb/chipidea/ci.h
+@@ -203,6 +203,7 @@ struct hw_bank {
+ * @in_lpm: if the core in low power mode
+ * @wakeup_int: if wakeup interrupt occur
+ * @rev: The revision number for controller
++ * @mutex: protect code from concorrent running when doing role switch
+ */
+ struct ci_hdrc {
+ struct device *dev;
+@@ -255,6 +256,7 @@ struct ci_hdrc {
+ bool in_lpm;
+ bool wakeup_int;
+ enum ci_revision rev;
++ struct mutex mutex;
+ };
+
+ static inline struct ci_role_driver *ci_role(struct ci_hdrc *ci)
+diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
+index f1d100671ee6a..d8efa90479e23 100644
+--- a/drivers/usb/chipidea/ci_hdrc_imx.c
++++ b/drivers/usb/chipidea/ci_hdrc_imx.c
+@@ -70,6 +70,10 @@ static const struct ci_hdrc_imx_platform_flag imx7ulp_usb_data = {
+ CI_HDRC_PMQOS,
+ };
+
++static const struct ci_hdrc_imx_platform_flag imx8ulp_usb_data = {
++ .flags = CI_HDRC_SUPPORTS_RUNTIME_PM,
++};
++
+ static const struct of_device_id ci_hdrc_imx_dt_ids[] = {
+ { .compatible = "fsl,imx23-usb", .data = &imx23_usb_data},
+ { .compatible = "fsl,imx28-usb", .data = &imx28_usb_data},
+@@ -80,6 +84,7 @@ static const struct of_device_id ci_hdrc_imx_dt_ids[] = {
+ { .compatible = "fsl,imx6ul-usb", .data = &imx6ul_usb_data},
+ { .compatible = "fsl,imx7d-usb", .data = &imx7d_usb_data},
+ { .compatible = "fsl,imx7ulp-usb", .data = &imx7ulp_usb_data},
++ { .compatible = "fsl,imx8ulp-usb", .data = &imx8ulp_usb_data},
+ { /* sentinel */ }
+ };
+ MODULE_DEVICE_TABLE(of, ci_hdrc_imx_dt_ids);
+@@ -170,10 +175,12 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev)
+ if (of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI)
+ data->ulpi = 1;
+
+- of_property_read_u32(np, "samsung,picophy-pre-emp-curr-control",
+- &data->emp_curr_control);
+- of_property_read_u32(np, "samsung,picophy-dc-vol-level-adjust",
+- &data->dc_vol_level_adjust);
++ if (of_property_read_u32(np, "samsung,picophy-pre-emp-curr-control",
++ &data->emp_curr_control))
++ data->emp_curr_control = -1;
++ if (of_property_read_u32(np, "samsung,picophy-dc-vol-level-adjust",
++ &data->dc_vol_level_adjust))
++ data->dc_vol_level_adjust = -1;
+
+ return data;
+ }
+@@ -420,15 +427,15 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
+ data->phy = devm_usb_get_phy_by_phandle(dev, "fsl,usbphy", 0);
+ if (IS_ERR(data->phy)) {
+ ret = PTR_ERR(data->phy);
+- if (ret == -ENODEV) {
+- data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0);
+- if (IS_ERR(data->phy)) {
+- ret = PTR_ERR(data->phy);
+- if (ret == -ENODEV)
+- data->phy = NULL;
+- else
+- goto err_clk;
+- }
++ if (ret != -ENODEV)
++ goto err_clk;
++ data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0);
++ if (IS_ERR(data->phy)) {
++ ret = PTR_ERR(data->phy);
++ if (ret == -ENODEV)
++ data->phy = NULL;
++ else
++ goto err_clk;
+ }
+ }
+
+diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
+index 2b18f5088ae4a..0e8f4aa031f81 100644
+--- a/drivers/usb/chipidea/core.c
++++ b/drivers/usb/chipidea/core.c
+@@ -514,7 +514,7 @@ int hw_device_reset(struct ci_hdrc *ci)
+ return 0;
+ }
+
+-static irqreturn_t ci_irq(int irq, void *data)
++static irqreturn_t ci_irq_handler(int irq, void *data)
+ {
+ struct ci_hdrc *ci = data;
+ irqreturn_t ret = IRQ_NONE;
+@@ -567,6 +567,15 @@ static irqreturn_t ci_irq(int irq, void *data)
+ return ret;
+ }
+
++static void ci_irq(struct ci_hdrc *ci)
++{
++ unsigned long flags;
++
++ local_irq_save(flags);
++ ci_irq_handler(ci->irq, ci);
++ local_irq_restore(flags);
++}
++
+ static int ci_cable_notifier(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+ {
+@@ -576,7 +585,7 @@ static int ci_cable_notifier(struct notifier_block *nb, unsigned long event,
+ cbl->connected = event;
+ cbl->changed = true;
+
+- ci_irq(ci->irq, ci);
++ ci_irq(ci);
+ return NOTIFY_DONE;
+ }
+
+@@ -617,7 +626,7 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw,
+ if (cable) {
+ cable->changed = true;
+ cable->connected = false;
+- ci_irq(ci->irq, ci);
++ ci_irq(ci);
+ spin_unlock_irqrestore(&ci->lock, flags);
+ if (ci->wq && role != USB_ROLE_NONE)
+ flush_workqueue(ci->wq);
+@@ -635,7 +644,7 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw,
+ if (cable) {
+ cable->changed = true;
+ cable->connected = true;
+- ci_irq(ci->irq, ci);
++ ci_irq(ci);
+ }
+ spin_unlock_irqrestore(&ci->lock, flags);
+ pm_runtime_put_sync(ci->dev);
+@@ -965,9 +974,16 @@ static ssize_t role_store(struct device *dev,
+ strlen(ci->roles[role]->name)))
+ break;
+
+- if (role == CI_ROLE_END || role == ci->role)
++ if (role == CI_ROLE_END)
+ return -EINVAL;
+
++ mutex_lock(&ci->mutex);
++
++ if (role == ci->role) {
++ mutex_unlock(&ci->mutex);
++ return n;
++ }
++
+ pm_runtime_get_sync(dev);
+ disable_irq(ci->irq);
+ ci_role_stop(ci);
+@@ -976,6 +992,7 @@ static ssize_t role_store(struct device *dev,
+ ci_handle_vbus_change(ci);
+ enable_irq(ci->irq);
+ pm_runtime_put_sync(dev);
++ mutex_unlock(&ci->mutex);
+
+ return (ret == 0) ? n : ret;
+ }
+@@ -1011,6 +1028,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ spin_lock_init(&ci->lock);
++ mutex_init(&ci->mutex);
+ ci->dev = dev;
+ ci->platdata = dev_get_platdata(dev);
+ ci->imx28_write_fix = !!(ci->platdata->flags &
+@@ -1080,7 +1098,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
+ ret = ci_usb_phy_init(ci);
+ if (ret) {
+ dev_err(dev, "unable to init phy: %d\n", ret);
+- return ret;
++ goto ulpi_exit;
+ }
+
+ ci->hw_bank.phys = res->start;
+@@ -1174,7 +1192,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
+ }
+ }
+
+- ret = devm_request_irq(dev, ci->irq, ci_irq, IRQF_SHARED,
++ ret = devm_request_irq(dev, ci->irq, ci_irq_handler, IRQF_SHARED,
+ ci->platdata->name, ci);
+ if (ret)
+ goto stop;
+@@ -1295,11 +1313,11 @@ static void ci_extcon_wakeup_int(struct ci_hdrc *ci)
+
+ if (!IS_ERR(cable_id->edev) && ci->is_otg &&
+ (otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS))
+- ci_irq(ci->irq, ci);
++ ci_irq(ci);
+
+ if (!IS_ERR(cable_vbus->edev) && ci->is_otg &&
+ (otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS))
+- ci_irq(ci->irq, ci);
++ ci_irq(ci);
+ }
+
+ static int ci_controller_resume(struct device *dev)
+diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
+index faf6b078b6c44..bbc610e5bd69c 100644
+--- a/drivers/usb/chipidea/debug.c
++++ b/drivers/usb/chipidea/debug.c
+@@ -364,5 +364,5 @@ void dbg_create_files(struct ci_hdrc *ci)
+ */
+ void dbg_remove_files(struct ci_hdrc *ci)
+ {
+- debugfs_remove(debugfs_lookup(dev_name(ci->dev), usb_debug_root));
++ debugfs_lookup_and_remove(dev_name(ci->dev), usb_debug_root);
+ }
+diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c
+index 8dd59282827b0..2d9d694eb0bd4 100644
+--- a/drivers/usb/chipidea/otg.c
++++ b/drivers/usb/chipidea/otg.c
+@@ -167,8 +167,10 @@ static int hw_wait_vbus_lower_bsv(struct ci_hdrc *ci)
+
+ static void ci_handle_id_switch(struct ci_hdrc *ci)
+ {
+- enum ci_role role = ci_otg_role(ci);
++ enum ci_role role;
+
++ mutex_lock(&ci->mutex);
++ role = ci_otg_role(ci);
+ if (role != ci->role) {
+ dev_dbg(ci->dev, "switching from %s to %s\n",
+ ci_role(ci)->name, ci->roles[role]->name);
+@@ -198,6 +200,7 @@ static void ci_handle_id_switch(struct ci_hdrc *ci)
+ if (role == CI_ROLE_GADGET)
+ ci_handle_vbus_change(ci);
+ }
++ mutex_unlock(&ci->mutex);
+ }
+ /**
+ * ci_otg_work - perform otg (vbus/id) event handle
+diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c
+index 6ed4b00dba961..7a2a9559693fb 100644
+--- a/drivers/usb/chipidea/otg_fsm.c
++++ b/drivers/usb/chipidea/otg_fsm.c
+@@ -256,8 +256,10 @@ static void ci_otg_del_timer(struct ci_hdrc *ci, enum otg_fsm_timer t)
+ ci->enabled_otg_timer_bits &= ~(1 << t);
+ if (ci->next_otg_timer == t) {
+ if (ci->enabled_otg_timer_bits == 0) {
++ spin_unlock_irqrestore(&ci->lock, flags);
+ /* No enabled timers after delete it */
+ hrtimer_cancel(&ci->otg_fsm_hrtimer);
++ spin_lock_irqsave(&ci->lock, flags);
+ ci->next_otg_timer = NUM_OTG_FSM_TIMERS;
+ } else {
+ /* Find the next timer */
+diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
+index 8834ca6137219..aacc37736db6e 100644
+--- a/drivers/usb/chipidea/udc.c
++++ b/drivers/usb/chipidea/udc.c
+@@ -1040,6 +1040,9 @@ isr_setup_status_complete(struct usb_ep *ep, struct usb_request *req)
+ struct ci_hdrc *ci = req->context;
+ unsigned long flags;
+
++ if (req->status < 0)
++ return;
++
+ if (ci->setaddr) {
+ hw_usb_set_address(ci, ci->address);
+ ci->setaddr = false;
+diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
+index bac0f5458cab9..a2cb4f48c84c6 100644
+--- a/drivers/usb/chipidea/usbmisc_imx.c
++++ b/drivers/usb/chipidea/usbmisc_imx.c
+@@ -135,7 +135,7 @@
+ #define TXVREFTUNE0_MASK (0xf << 20)
+
+ #define MX6_USB_OTG_WAKEUP_BITS (MX6_BM_WAKEUP_ENABLE | MX6_BM_VBUS_WAKEUP | \
+- MX6_BM_ID_WAKEUP)
++ MX6_BM_ID_WAKEUP | MX6SX_BM_DPDM_WAKEUP_EN)
+
+ struct usbmisc_ops {
+ /* It's called once when probe a usb device */
+@@ -657,13 +657,15 @@ static int usbmisc_imx7d_init(struct imx_usbmisc_data *data)
+ usbmisc->base + MX7D_USBNC_USB_CTRL2);
+ /* PHY tuning for signal quality */
+ reg = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG1);
+- if (data->emp_curr_control && data->emp_curr_control <=
++ if (data->emp_curr_control >= 0 &&
++ data->emp_curr_control <=
+ (TXPREEMPAMPTUNE0_MASK >> TXPREEMPAMPTUNE0_BIT)) {
+ reg &= ~TXPREEMPAMPTUNE0_MASK;
+ reg |= (data->emp_curr_control << TXPREEMPAMPTUNE0_BIT);
+ }
+
+- if (data->dc_vol_level_adjust && data->dc_vol_level_adjust <=
++ if (data->dc_vol_level_adjust >= 0 &&
++ data->dc_vol_level_adjust <=
+ (TXVREFTUNE0_MASK >> TXVREFTUNE0_BIT)) {
+ reg &= ~TXVREFTUNE0_MASK;
+ reg |= (data->dc_vol_level_adjust << TXVREFTUNE0_BIT);
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 7b2e2420ecaea..adc154b691d05 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -1814,6 +1814,9 @@ static const struct usb_device_id acm_ids[] = {
+ { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */
+ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */
+ },
++ { USB_DEVICE(0x0c26, 0x0020), /* Icom ICF3400 Serie */
++ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
++ },
+ { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */
+ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
+ },
+diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
+index fdf79bcf7eb09..0d99ba64ea528 100644
+--- a/drivers/usb/class/cdc-wdm.c
++++ b/drivers/usb/class/cdc-wdm.c
+@@ -774,6 +774,7 @@ static int wdm_release(struct inode *inode, struct file *file)
+ poison_urbs(desc);
+ spin_lock_irq(&desc->iuspin);
+ desc->resp_count = 0;
++ clear_bit(WDM_RESPONDING, &desc->flags);
+ spin_unlock_irq(&desc->iuspin);
+ desc->manage_power(desc->intf, 0);
+ unpoison_urbs(desc);
+diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
+index 73f419adce610..311007b1d9046 100644
+--- a/drivers/usb/class/usbtmc.c
++++ b/drivers/usb/class/usbtmc.c
+@@ -1919,6 +1919,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
+ struct usbtmc_ctrlrequest request;
+ u8 *buffer = NULL;
+ int rv;
++ unsigned int is_in, pipe;
+ unsigned long res;
+
+ res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest));
+@@ -1927,13 +1928,17 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
+
+ if (request.req.wLength > USBTMC_BUFSIZE)
+ return -EMSGSIZE;
++ if (request.req.wLength == 0) /* Length-0 requests are never IN */
++ request.req.bRequestType &= ~USB_DIR_IN;
++
++ is_in = request.req.bRequestType & USB_DIR_IN;
+
+ if (request.req.wLength) {
+ buffer = kmalloc(request.req.wLength, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+- if ((request.req.bRequestType & USB_DIR_IN) == 0) {
++ if (!is_in) {
+ /* Send control data to device */
+ res = copy_from_user(buffer, request.data,
+ request.req.wLength);
+@@ -1944,8 +1949,12 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
+ }
+ }
+
++ if (is_in)
++ pipe = usb_rcvctrlpipe(data->usb_dev, 0);
++ else
++ pipe = usb_sndctrlpipe(data->usb_dev, 0);
+ rv = usb_control_msg(data->usb_dev,
+- usb_rcvctrlpipe(data->usb_dev, 0),
++ pipe,
+ request.req.bRequest,
+ request.req.bRequestType,
+ request.req.wValue,
+@@ -1957,7 +1966,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
+ goto exit;
+ }
+
+- if (rv && (request.req.bRequestType & USB_DIR_IN)) {
++ if (rv && is_in) {
+ /* Read control data from device */
+ res = copy_to_user(request.data, buffer, rv);
+ if (res)
+diff --git a/drivers/usb/common/debug.c b/drivers/usb/common/debug.c
+index a76a086b9c548..f0c0e8db70388 100644
+--- a/drivers/usb/common/debug.c
++++ b/drivers/usb/common/debug.c
+@@ -207,30 +207,28 @@ static void usb_decode_set_isoch_delay(__u8 wValue, char *str, size_t size)
+ snprintf(str, size, "Set Isochronous Delay(Delay = %d ns)", wValue);
+ }
+
+-/**
+- * usb_decode_ctrl - Returns human readable representation of control request.
+- * @str: buffer to return a human-readable representation of control request.
+- * This buffer should have about 200 bytes.
+- * @size: size of str buffer.
+- * @bRequestType: matches the USB bmRequestType field
+- * @bRequest: matches the USB bRequest field
+- * @wValue: matches the USB wValue field (CPU byte order)
+- * @wIndex: matches the USB wIndex field (CPU byte order)
+- * @wLength: matches the USB wLength field (CPU byte order)
+- *
+- * Function returns decoded, formatted and human-readable description of
+- * control request packet.
+- *
+- * The usage scenario for this is for tracepoints, so function as a return
+- * use the same value as in parameters. This approach allows to use this
+- * function in TP_printk
+- *
+- * Important: wValue, wIndex, wLength parameters before invoking this function
+- * should be processed by le16_to_cpu macro.
+- */
+-const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType,
+- __u8 bRequest, __u16 wValue, __u16 wIndex,
+- __u16 wLength)
++static void usb_decode_ctrl_generic(char *str, size_t size, __u8 bRequestType,
++ __u8 bRequest, __u16 wValue, __u16 wIndex,
++ __u16 wLength)
++{
++ u8 recip = bRequestType & USB_RECIP_MASK;
++ u8 type = bRequestType & USB_TYPE_MASK;
++
++ snprintf(str, size,
++ "Type=%s Recipient=%s Dir=%s bRequest=%u wValue=%u wIndex=%u wLength=%u",
++ (type == USB_TYPE_STANDARD) ? "Standard" :
++ (type == USB_TYPE_VENDOR) ? "Vendor" :
++ (type == USB_TYPE_CLASS) ? "Class" : "Unknown",
++ (recip == USB_RECIP_DEVICE) ? "Device" :
++ (recip == USB_RECIP_INTERFACE) ? "Interface" :
++ (recip == USB_RECIP_ENDPOINT) ? "Endpoint" : "Unknown",
++ (bRequestType & USB_DIR_IN) ? "IN" : "OUT",
++ bRequest, wValue, wIndex, wLength);
++}
++
++static void usb_decode_ctrl_standard(char *str, size_t size, __u8 bRequestType,
++ __u8 bRequest, __u16 wValue, __u16 wIndex,
++ __u16 wLength)
+ {
+ switch (bRequest) {
+ case USB_REQ_GET_STATUS:
+@@ -271,14 +269,48 @@ const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType,
+ usb_decode_set_isoch_delay(wValue, str, size);
+ break;
+ default:
+- snprintf(str, size, "%02x %02x %02x %02x %02x %02x %02x %02x",
+- bRequestType, bRequest,
+- (u8)(cpu_to_le16(wValue) & 0xff),
+- (u8)(cpu_to_le16(wValue) >> 8),
+- (u8)(cpu_to_le16(wIndex) & 0xff),
+- (u8)(cpu_to_le16(wIndex) >> 8),
+- (u8)(cpu_to_le16(wLength) & 0xff),
+- (u8)(cpu_to_le16(wLength) >> 8));
++ usb_decode_ctrl_generic(str, size, bRequestType, bRequest,
++ wValue, wIndex, wLength);
++ break;
++ }
++}
++
++/**
++ * usb_decode_ctrl - Returns human readable representation of control request.
++ * @str: buffer to return a human-readable representation of control request.
++ * This buffer should have about 200 bytes.
++ * @size: size of str buffer.
++ * @bRequestType: matches the USB bmRequestType field
++ * @bRequest: matches the USB bRequest field
++ * @wValue: matches the USB wValue field (CPU byte order)
++ * @wIndex: matches the USB wIndex field (CPU byte order)
++ * @wLength: matches the USB wLength field (CPU byte order)
++ *
++ * Function returns decoded, formatted and human-readable description of
++ * control request packet.
++ *
++ * The usage scenario for this is for tracepoints, so function as a return
++ * use the same value as in parameters. This approach allows to use this
++ * function in TP_printk
++ *
++ * Important: wValue, wIndex, wLength parameters before invoking this function
++ * should be processed by le16_to_cpu macro.
++ */
++const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType,
++ __u8 bRequest, __u16 wValue, __u16 wIndex,
++ __u16 wLength)
++{
++ switch (bRequestType & USB_TYPE_MASK) {
++ case USB_TYPE_STANDARD:
++ usb_decode_ctrl_standard(str, size, bRequestType, bRequest,
++ wValue, wIndex, wLength);
++ break;
++ case USB_TYPE_VENDOR:
++ case USB_TYPE_CLASS:
++ default:
++ usb_decode_ctrl_generic(str, size, bRequestType, bRequest,
++ wValue, wIndex, wLength);
++ break;
+ }
+
+ return str;
+diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
+index 4169cf40a03b5..5509d3847af4b 100644
+--- a/drivers/usb/common/ulpi.c
++++ b/drivers/usb/common/ulpi.c
+@@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver)
+ struct ulpi *ulpi = to_ulpi_dev(dev);
+ const struct ulpi_device_id *id;
+
+- /* Some ULPI devices don't have a vendor id so rely on OF match */
+- if (ulpi->id.vendor == 0)
++ /*
++ * Some ULPI devices don't have a vendor id
++ * or provide an id_table so rely on OF match.
++ */
++ if (ulpi->id.vendor == 0 || !drv->id_table)
+ return of_driver_match_device(dev, driver);
+
+ for (id = drv->id_table; id->vendor; id++)
+@@ -127,6 +130,7 @@ static const struct attribute_group *ulpi_dev_attr_groups[] = {
+
+ static void ulpi_dev_release(struct device *dev)
+ {
++ of_node_put(dev->of_node);
+ kfree(to_ulpi_dev(dev));
+ }
+
+@@ -244,12 +248,16 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi)
+ return ret;
+
+ ret = ulpi_read_id(ulpi);
+- if (ret)
++ if (ret) {
++ of_node_put(ulpi->dev.of_node);
+ return ret;
++ }
+
+ ret = device_register(&ulpi->dev);
+- if (ret)
++ if (ret) {
++ put_device(&ulpi->dev);
+ return ret;
++ }
+
+ dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n",
+ ulpi->id.vendor, ulpi->id.product);
+@@ -296,7 +304,6 @@ EXPORT_SYMBOL_GPL(ulpi_register_interface);
+ */
+ void ulpi_unregister_interface(struct ulpi *ulpi)
+ {
+- of_node_put(ulpi->dev.of_node);
+ device_unregister(&ulpi->dev);
+ }
+ EXPORT_SYMBOL_GPL(ulpi_unregister_interface);
+diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c
+index 0158148cb0546..521c95935d4c3 100644
+--- a/drivers/usb/common/usb-conn-gpio.c
++++ b/drivers/usb/common/usb-conn-gpio.c
+@@ -42,6 +42,7 @@ struct usb_conn_info {
+
+ struct power_supply_desc desc;
+ struct power_supply *charger;
++ bool initial_detection;
+ };
+
+ /*
+@@ -86,11 +87,13 @@ static void usb_conn_detect_cable(struct work_struct *work)
+ dev_dbg(info->dev, "role %s -> %s, gpios: id %d, vbus %d\n",
+ usb_role_string(info->last_role), usb_role_string(role), id, vbus);
+
+- if (info->last_role == role) {
++ if (!info->initial_detection && info->last_role == role) {
+ dev_warn(info->dev, "repeated role: %s\n", usb_role_string(role));
+ return;
+ }
+
++ info->initial_detection = false;
++
+ if (info->last_role == USB_ROLE_HOST && info->vbus)
+ regulator_disable(info->vbus);
+
+@@ -273,6 +276,7 @@ static int usb_conn_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, info);
+
+ /* Perform initial detection */
++ info->initial_detection = true;
+ usb_conn_queue_dwork(info, 0);
+
+ return 0;
+diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
+index fbb087b728dc9..268ccbec88f95 100644
+--- a/drivers/usb/core/buffer.c
++++ b/drivers/usb/core/buffer.c
+@@ -172,3 +172,44 @@ void hcd_buffer_free(
+ }
+ dma_free_coherent(hcd->self.sysdev, size, addr, dma);
+ }
++
++void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
++ size_t size, gfp_t mem_flags, dma_addr_t *dma)
++{
++ if (size == 0)
++ return NULL;
++
++ if (hcd->localmem_pool)
++ return gen_pool_dma_alloc_align(hcd->localmem_pool,
++ size, dma, PAGE_SIZE);
++
++ /* some USB hosts just use PIO */
++ if (!hcd_uses_dma(hcd)) {
++ *dma = DMA_MAPPING_ERROR;
++ return (void *)__get_free_pages(mem_flags,
++ get_order(size));
++ }
++
++ return dma_alloc_coherent(hcd->self.sysdev,
++ size, dma, mem_flags);
++}
++
++void hcd_buffer_free_pages(struct usb_hcd *hcd,
++ size_t size, void *addr, dma_addr_t dma)
++{
++ if (!addr)
++ return;
++
++ if (hcd->localmem_pool) {
++ gen_pool_free(hcd->localmem_pool,
++ (unsigned long)addr, size);
++ return;
++ }
++
++ if (!hcd_uses_dma(hcd)) {
++ free_pages((unsigned long)addr, get_order(size));
++ return;
++ }
++
++ dma_free_coherent(hcd->self.sysdev, size, addr, dma);
++}
+diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
+index b199eb65f3780..00e28456e4cc2 100644
+--- a/drivers/usb/core/config.c
++++ b/drivers/usb/core/config.c
+@@ -406,7 +406,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
+ * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0
+ * (see the end of section 5.6.3), so don't warn about them.
+ */
+- maxp = usb_endpoint_maxp(&endpoint->desc);
++ maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize);
+ if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) {
+ dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n",
+ cfgno, inum, asnum, d->bEndpointAddress);
+@@ -422,9 +422,9 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
+ maxpacket_maxes = full_speed_maxpacket_maxes;
+ break;
+ case USB_SPEED_HIGH:
+- /* Bits 12..11 are allowed only for HS periodic endpoints */
++ /* Multiple-transactions bits are allowed only for HS periodic endpoints */
+ if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) {
+- i = maxp & (BIT(12) | BIT(11));
++ i = maxp & USB_EP_MAXP_MULT_MASK;
+ maxp &= ~i;
+ }
+ fallthrough;
+diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
+index 9618ba622a2d0..5cd0a724b425e 100644
+--- a/drivers/usb/core/devio.c
++++ b/drivers/usb/core/devio.c
+@@ -32,6 +32,7 @@
+ #include <linux/usb.h>
+ #include <linux/usbdevice_fs.h>
+ #include <linux/usb/hcd.h> /* for usbcore internals */
++#include <linux/usb/quirks.h>
+ #include <linux/cdev.h>
+ #include <linux/notifier.h>
+ #include <linux/security.h>
+@@ -173,6 +174,7 @@ static int connected(struct usb_dev_state *ps)
+ static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count)
+ {
+ struct usb_dev_state *ps = usbm->ps;
++ struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ps->lock, flags);
+@@ -181,8 +183,8 @@ static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count)
+ list_del(&usbm->memlist);
+ spin_unlock_irqrestore(&ps->lock, flags);
+
+- usb_free_coherent(ps->dev, usbm->size, usbm->mem,
+- usbm->dma_handle);
++ hcd_buffer_free_pages(hcd, usbm->size,
++ usbm->mem, usbm->dma_handle);
+ usbfs_decrease_memory_usage(
+ usbm->size + sizeof(struct usb_memory));
+ kfree(usbm);
+@@ -221,7 +223,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
+ size_t size = vma->vm_end - vma->vm_start;
+ void *mem;
+ unsigned long flags;
+- dma_addr_t dma_handle;
++ dma_addr_t dma_handle = DMA_MAPPING_ERROR;
+ int ret;
+
+ ret = usbfs_increase_memory_usage(size + sizeof(struct usb_memory));
+@@ -234,8 +236,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
+ goto error_decrease_mem;
+ }
+
+- mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
+- &dma_handle);
++ mem = hcd_buffer_alloc_pages(hcd,
++ size, GFP_USER | __GFP_NOWARN, &dma_handle);
+ if (!mem) {
+ ret = -ENOMEM;
+ goto error_free_usbm;
+@@ -251,7 +253,14 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
+ usbm->vma_use_count = 1;
+ INIT_LIST_HEAD(&usbm->memlist);
+
+- if (hcd->localmem_pool || !hcd_uses_dma(hcd)) {
++ /*
++ * In DMA-unavailable cases, hcd_buffer_alloc_pages allocates
++ * normal pages and assigns DMA_MAPPING_ERROR to dma_handle. Check
++ * whether we are in such cases, and then use remap_pfn_range (or
++ * dma_mmap_coherent) to map normal (or DMA) pages into the user
++ * space, respectively.
++ */
++ if (dma_handle == DMA_MAPPING_ERROR) {
+ if (remap_pfn_range(vma, vma->vm_start,
+ virt_to_phys(usbm->mem) >> PAGE_SHIFT,
+ size, vma->vm_page_prot) < 0) {
+@@ -726,6 +735,7 @@ static int driver_resume(struct usb_interface *intf)
+ return 0;
+ }
+
++#ifdef CONFIG_PM
+ /* The following routines apply to the entire device, not interfaces */
+ void usbfs_notify_suspend(struct usb_device *udev)
+ {
+@@ -744,6 +754,7 @@ void usbfs_notify_resume(struct usb_device *udev)
+ }
+ mutex_unlock(&usbfs_mutex);
+ }
++#endif
+
+ struct usb_driver usbfs_driver = {
+ .name = "usbfs",
+@@ -1102,14 +1113,55 @@ static int usbdev_release(struct inode *inode, struct file *file)
+ return 0;
+ }
+
++static void usbfs_blocking_completion(struct urb *urb)
++{
++ complete((struct completion *) urb->context);
++}
++
++/*
++ * Much like usb_start_wait_urb, but returns status separately from
++ * actual_length and uses a killable wait.
++ */
++static int usbfs_start_wait_urb(struct urb *urb, int timeout,
++ unsigned int *actlen)
++{
++ DECLARE_COMPLETION_ONSTACK(ctx);
++ unsigned long expire;
++ int rc;
++
++ urb->context = &ctx;
++ urb->complete = usbfs_blocking_completion;
++ *actlen = 0;
++ rc = usb_submit_urb(urb, GFP_KERNEL);
++ if (unlikely(rc))
++ return rc;
++
++ expire = (timeout ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT);
++ rc = wait_for_completion_killable_timeout(&ctx, expire);
++ if (rc <= 0) {
++ usb_kill_urb(urb);
++ *actlen = urb->actual_length;
++ if (urb->status != -ENOENT)
++ ; /* Completed before it was killed */
++ else if (rc < 0)
++ return -EINTR;
++ else
++ return -ETIMEDOUT;
++ }
++ *actlen = urb->actual_length;
++ return urb->status;
++}
++
+ static int do_proc_control(struct usb_dev_state *ps,
+ struct usbdevfs_ctrltransfer *ctrl)
+ {
+ struct usb_device *dev = ps->dev;
+ unsigned int tmo;
+ unsigned char *tbuf;
+- unsigned wLength;
++ unsigned int wLength, actlen;
+ int i, pipe, ret;
++ struct urb *urb = NULL;
++ struct usb_ctrlrequest *dr = NULL;
+
+ ret = check_ctrlrecip(ps, ctrl->bRequestType, ctrl->bRequest,
+ ctrl->wIndex);
+@@ -1122,51 +1174,71 @@ static int do_proc_control(struct usb_dev_state *ps,
+ sizeof(struct usb_ctrlrequest));
+ if (ret)
+ return ret;
++
++ ret = -ENOMEM;
+ tbuf = (unsigned char *)__get_free_page(GFP_KERNEL);
+- if (!tbuf) {
+- ret = -ENOMEM;
++ if (!tbuf)
+ goto done;
+- }
++ urb = usb_alloc_urb(0, GFP_NOIO);
++ if (!urb)
++ goto done;
++ dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_NOIO);
++ if (!dr)
++ goto done;
++
++ dr->bRequestType = ctrl->bRequestType;
++ dr->bRequest = ctrl->bRequest;
++ dr->wValue = cpu_to_le16(ctrl->wValue);
++ dr->wIndex = cpu_to_le16(ctrl->wIndex);
++ dr->wLength = cpu_to_le16(ctrl->wLength);
++
+ tmo = ctrl->timeout;
+ snoop(&dev->dev, "control urb: bRequestType=%02x "
+ "bRequest=%02x wValue=%04x "
+ "wIndex=%04x wLength=%04x\n",
+ ctrl->bRequestType, ctrl->bRequest, ctrl->wValue,
+ ctrl->wIndex, ctrl->wLength);
+- if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) {
++
++ if ((ctrl->bRequestType & USB_DIR_IN) && wLength) {
+ pipe = usb_rcvctrlpipe(dev, 0);
+- snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT, NULL, 0);
++ usb_fill_control_urb(urb, dev, pipe, (unsigned char *) dr, tbuf,
++ wLength, NULL, NULL);
++ snoop_urb(dev, NULL, pipe, wLength, tmo, SUBMIT, NULL, 0);
+
+ usb_unlock_device(dev);
+- i = usb_control_msg(dev, pipe, ctrl->bRequest,
+- ctrl->bRequestType, ctrl->wValue, ctrl->wIndex,
+- tbuf, ctrl->wLength, tmo);
++ i = usbfs_start_wait_urb(urb, tmo, &actlen);
++
++ /* Linger a bit, prior to the next control message. */
++ if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
++ msleep(200);
+ usb_lock_device(dev);
+- snoop_urb(dev, NULL, pipe, max(i, 0), min(i, 0), COMPLETE,
+- tbuf, max(i, 0));
+- if ((i > 0) && ctrl->wLength) {
+- if (copy_to_user(ctrl->data, tbuf, i)) {
++ snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, tbuf, actlen);
++ if (!i && actlen) {
++ if (copy_to_user(ctrl->data, tbuf, actlen)) {
+ ret = -EFAULT;
+ goto done;
+ }
+ }
+ } else {
+- if (ctrl->wLength) {
+- if (copy_from_user(tbuf, ctrl->data, ctrl->wLength)) {
++ if (wLength) {
++ if (copy_from_user(tbuf, ctrl->data, wLength)) {
+ ret = -EFAULT;
+ goto done;
+ }
+ }
+ pipe = usb_sndctrlpipe(dev, 0);
+- snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT,
+- tbuf, ctrl->wLength);
++ usb_fill_control_urb(urb, dev, pipe, (unsigned char *) dr, tbuf,
++ wLength, NULL, NULL);
++ snoop_urb(dev, NULL, pipe, wLength, tmo, SUBMIT, tbuf, wLength);
+
+ usb_unlock_device(dev);
+- i = usb_control_msg(dev, pipe, ctrl->bRequest,
+- ctrl->bRequestType, ctrl->wValue, ctrl->wIndex,
+- tbuf, ctrl->wLength, tmo);
++ i = usbfs_start_wait_urb(urb, tmo, &actlen);
++
++ /* Linger a bit, prior to the next control message. */
++ if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
++ msleep(200);
+ usb_lock_device(dev);
+- snoop_urb(dev, NULL, pipe, max(i, 0), min(i, 0), COMPLETE, NULL, 0);
++ snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, NULL, 0);
+ }
+ if (i < 0 && i != -EPIPE) {
+ dev_printk(KERN_DEBUG, &dev->dev, "usbfs: USBDEVFS_CONTROL "
+@@ -1174,8 +1246,11 @@ static int do_proc_control(struct usb_dev_state *ps,
+ current->comm, ctrl->bRequestType, ctrl->bRequest,
+ ctrl->wLength, i);
+ }
+- ret = i;
++ ret = (i < 0 ? i : actlen);
++
+ done:
++ kfree(dr);
++ usb_free_urb(urb);
+ free_page((unsigned long) tbuf);
+ usbfs_decrease_memory_usage(PAGE_SIZE + sizeof(struct urb) +
+ sizeof(struct usb_ctrlrequest));
+@@ -1195,10 +1270,11 @@ static int do_proc_bulk(struct usb_dev_state *ps,
+ struct usbdevfs_bulktransfer *bulk)
+ {
+ struct usb_device *dev = ps->dev;
+- unsigned int tmo, len1, pipe;
+- int len2;
++ unsigned int tmo, len1, len2, pipe;
+ unsigned char *tbuf;
+ int i, ret;
++ struct urb *urb = NULL;
++ struct usb_host_endpoint *ep;
+
+ ret = findintfep(ps->dev, bulk->ep);
+ if (ret < 0)
+@@ -1206,14 +1282,17 @@ static int do_proc_bulk(struct usb_dev_state *ps,
+ ret = checkintf(ps, ret);
+ if (ret)
+ return ret;
++
++ len1 = bulk->len;
++ if (len1 < 0 || len1 >= (INT_MAX - sizeof(struct urb)))
++ return -EINVAL;
++
+ if (bulk->ep & USB_DIR_IN)
+ pipe = usb_rcvbulkpipe(dev, bulk->ep & 0x7f);
+ else
+ pipe = usb_sndbulkpipe(dev, bulk->ep & 0x7f);
+- if (!usb_maxpacket(dev, pipe, !(bulk->ep & USB_DIR_IN)))
+- return -EINVAL;
+- len1 = bulk->len;
+- if (len1 >= (INT_MAX - sizeof(struct urb)))
++ ep = usb_pipe_endpoint(dev, pipe);
++ if (!ep || !usb_endpoint_maxp(&ep->desc))
+ return -EINVAL;
+ ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb));
+ if (ret)
+@@ -1223,17 +1302,29 @@ static int do_proc_bulk(struct usb_dev_state *ps,
+ * len1 can be almost arbitrarily large. Don't WARN if it's
+ * too big, just fail the request.
+ */
++ ret = -ENOMEM;
+ tbuf = kmalloc(len1, GFP_KERNEL | __GFP_NOWARN);
+- if (!tbuf) {
+- ret = -ENOMEM;
++ if (!tbuf)
++ goto done;
++ urb = usb_alloc_urb(0, GFP_KERNEL);
++ if (!urb)
+ goto done;
++
++ if ((ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
++ USB_ENDPOINT_XFER_INT) {
++ pipe = (pipe & ~(3 << 30)) | (PIPE_INTERRUPT << 30);
++ usb_fill_int_urb(urb, dev, pipe, tbuf, len1,
++ NULL, NULL, ep->desc.bInterval);
++ } else {
++ usb_fill_bulk_urb(urb, dev, pipe, tbuf, len1, NULL, NULL);
+ }
++
+ tmo = bulk->timeout;
+ if (bulk->ep & 0x80) {
+ snoop_urb(dev, NULL, pipe, len1, tmo, SUBMIT, NULL, 0);
+
+ usb_unlock_device(dev);
+- i = usb_bulk_msg(dev, pipe, tbuf, len1, &len2, tmo);
++ i = usbfs_start_wait_urb(urb, tmo, &len2);
+ usb_lock_device(dev);
+ snoop_urb(dev, NULL, pipe, len2, i, COMPLETE, tbuf, len2);
+
+@@ -1253,12 +1344,13 @@ static int do_proc_bulk(struct usb_dev_state *ps,
+ snoop_urb(dev, NULL, pipe, len1, tmo, SUBMIT, tbuf, len1);
+
+ usb_unlock_device(dev);
+- i = usb_bulk_msg(dev, pipe, tbuf, len1, &len2, tmo);
++ i = usbfs_start_wait_urb(urb, tmo, &len2);
+ usb_lock_device(dev);
+ snoop_urb(dev, NULL, pipe, len2, i, COMPLETE, NULL, 0);
+ }
+ ret = (i < 0 ? i : len2);
+ done:
++ usb_free_urb(urb);
+ kfree(tbuf);
+ usbfs_decrease_memory_usage(len1 + sizeof(struct urb));
+ return ret;
+diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
+index d630cccd2e6ea..5af810cd8a58f 100644
+--- a/drivers/usb/core/hcd-pci.c
++++ b/drivers/usb/core/hcd-pci.c
+@@ -616,10 +616,10 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = {
+ .suspend_noirq = hcd_pci_suspend_noirq,
+ .resume_noirq = hcd_pci_resume_noirq,
+ .resume = hcd_pci_resume,
+- .freeze = check_root_hub_suspended,
++ .freeze = hcd_pci_suspend,
+ .freeze_noirq = check_root_hub_suspended,
+ .thaw_noirq = NULL,
+- .thaw = NULL,
++ .thaw = hcd_pci_resume,
+ .poweroff = hcd_pci_suspend,
+ .poweroff_noirq = hcd_pci_suspend_noirq,
+ .restore_noirq = hcd_pci_resume_noirq,
+diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
+index 7ee6e4cc0d89e..8cbabc39f818c 100644
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -753,6 +753,7 @@ void usb_hcd_poll_rh_status(struct usb_hcd *hcd)
+ {
+ struct urb *urb;
+ int length;
++ int status;
+ unsigned long flags;
+ char buffer[6]; /* Any root hubs with > 31 ports? */
+
+@@ -770,11 +771,17 @@ void usb_hcd_poll_rh_status(struct usb_hcd *hcd)
+ if (urb) {
+ clear_bit(HCD_FLAG_POLL_PENDING, &hcd->flags);
+ hcd->status_urb = NULL;
++ if (urb->transfer_buffer_length >= length) {
++ status = 0;
++ } else {
++ status = -EOVERFLOW;
++ length = urb->transfer_buffer_length;
++ }
+ urb->actual_length = length;
+ memcpy(urb->transfer_buffer, buffer, length);
+
+ usb_hcd_unlink_urb_from_ep(hcd, urb);
+- usb_hcd_giveback_urb(hcd, urb, 0);
++ usb_hcd_giveback_urb(hcd, urb, status);
+ } else {
+ length = 0;
+ set_bit(HCD_FLAG_POLL_PENDING, &hcd->flags);
+@@ -976,6 +983,7 @@ static int register_root_hub(struct usb_hcd *hcd)
+ {
+ struct device *parent_dev = hcd->self.controller;
+ struct usb_device *usb_dev = hcd->self.root_hub;
++ struct usb_device_descriptor *descr;
+ const int devnum = 1;
+ int retval;
+
+@@ -987,13 +995,16 @@ static int register_root_hub(struct usb_hcd *hcd)
+ mutex_lock(&usb_bus_idr_lock);
+
+ usb_dev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
+- retval = usb_get_device_descriptor(usb_dev, USB_DT_DEVICE_SIZE);
+- if (retval != sizeof usb_dev->descriptor) {
++ descr = usb_get_device_descriptor(usb_dev);
++ if (IS_ERR(descr)) {
++ retval = PTR_ERR(descr);
+ mutex_unlock(&usb_bus_idr_lock);
+ dev_dbg (parent_dev, "can't read %s device descriptor %d\n",
+ dev_name(&usb_dev->dev), retval);
+- return (retval < 0) ? retval : -EMSGSIZE;
++ return retval;
+ }
++ usb_dev->descriptor = *descr;
++ kfree(descr);
+
+ if (le16_to_cpu(usb_dev->descriptor.bcdUSB) >= 0x0201) {
+ retval = usb_get_bos_descriptor(usb_dev);
+@@ -1556,6 +1567,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
+ urb->hcpriv = NULL;
+ INIT_LIST_HEAD(&urb->urb_list);
+ atomic_dec(&urb->use_count);
++ /*
++ * Order the write of urb->use_count above before the read
++ * of urb->reject below. Pairs with the memory barriers in
++ * usb_kill_urb() and usb_poison_urb().
++ */
++ smp_mb__after_atomic();
++
+ atomic_dec(&urb->dev->urbnum);
+ if (atomic_read(&urb->reject))
+ wake_up(&usb_kill_urb_queue);
+@@ -1658,6 +1676,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
+
+ usb_anchor_resume_wakeups(anchor);
+ atomic_dec(&urb->use_count);
++ /*
++ * Order the write of urb->use_count above before the read
++ * of urb->reject below. Pairs with the memory barriers in
++ * usb_kill_urb() and usb_poison_urb().
++ */
++ smp_mb__after_atomic();
++
+ if (unlikely(atomic_read(&urb->reject)))
+ wake_up(&usb_kill_urb_queue);
+ usb_put_urb(urb);
+@@ -1670,7 +1695,6 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
+
+ spin_lock_irq(&bh->lock);
+ bh->running = true;
+- restart:
+ list_replace_init(&bh->head, &local_list);
+ spin_unlock_irq(&bh->lock);
+
+@@ -1684,10 +1708,17 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
+ bh->completing_ep = NULL;
+ }
+
+- /* check if there are new URBs to giveback */
++ /*
++ * giveback new URBs next time to prevent this function
++ * from not exiting for a long time.
++ */
+ spin_lock_irq(&bh->lock);
+- if (!list_empty(&bh->head))
+- goto restart;
++ if (!list_empty(&bh->head)) {
++ if (bh->high_prio)
++ tasklet_hi_schedule(&bh->bh);
++ else
++ tasklet_schedule(&bh->bh);
++ }
+ bh->running = false;
+ spin_unlock_irq(&bh->lock);
+ }
+@@ -1716,7 +1747,7 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
+ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
+ {
+ struct giveback_urb_bh *bh;
+- bool running, high_prio_bh;
++ bool running;
+
+ /* pass status to tasklet via unlinked */
+ if (likely(!urb->unlinked))
+@@ -1727,13 +1758,10 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
+ return;
+ }
+
+- if (usb_pipeisoc(urb->pipe) || usb_pipeint(urb->pipe)) {
++ if (usb_pipeisoc(urb->pipe) || usb_pipeint(urb->pipe))
+ bh = &hcd->high_prio_bh;
+- high_prio_bh = true;
+- } else {
++ else
+ bh = &hcd->low_prio_bh;
+- high_prio_bh = false;
+- }
+
+ spin_lock(&bh->lock);
+ list_add_tail(&urb->urb_list, &bh->head);
+@@ -1742,7 +1770,7 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
+
+ if (running)
+ ;
+- else if (high_prio_bh)
++ else if (bh->high_prio)
+ tasklet_hi_schedule(&bh->bh);
+ else
+ tasklet_schedule(&bh->bh);
+@@ -2938,6 +2966,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
+
+ /* initialize tasklets */
+ init_giveback_urb_bh(&hcd->high_prio_bh);
++ hcd->high_prio_bh.high_prio = true;
+ init_giveback_urb_bh(&hcd->low_prio_bh);
+
+ /* enable irqs just before we start the controller,
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index 86658a81d2844..4eb453d7e6f83 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -43,6 +43,9 @@
+ #define USB_PRODUCT_USB5534B 0x5534
+ #define USB_VENDOR_CYPRESS 0x04b4
+ #define USB_PRODUCT_CY7C65632 0x6570
++#define USB_VENDOR_TEXAS_INSTRUMENTS 0x0451
++#define USB_PRODUCT_TUSB8041_USB3 0x8140
++#define USB_PRODUCT_TUSB8041_USB2 0x8142
+ #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01
+ #define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02
+
+@@ -1110,7 +1113,10 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
+ } else {
+ hub_power_on(hub, true);
+ }
+- }
++ /* Give some time on remote wakeup to let links to transit to U0 */
++ } else if (hub_is_superspeed(hub->hdev))
++ msleep(20);
++
+ init2:
+
+ /*
+@@ -1225,7 +1231,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
+ */
+ if (portchange || (hub_is_superspeed(hub->hdev) &&
+ port_resumed))
+- set_bit(port1, hub->change_bits);
++ set_bit(port1, hub->event_bits);
+
+ } else if (udev->persist_enabled) {
+ #ifdef CONFIG_PM
+@@ -2376,9 +2382,8 @@ static int usb_enumerate_device_otg(struct usb_device *udev)
+ * usb_enumerate_device - Read device configs/intfs/otg (usbcore-internal)
+ * @udev: newly addressed device (in ADDRESS state)
+ *
+- * This is only called by usb_new_device() and usb_authorize_device()
+- * and FIXME -- all comments that apply to them apply here wrt to
+- * environment.
++ * This is only called by usb_new_device() -- all comments that apply there
++ * apply here wrt to environment.
+ *
+ * If the device is WUSB and not authorized, we don't attempt to read
+ * the string descriptors, as they will be errored out by the device
+@@ -2644,12 +2649,17 @@ int usb_authorize_device(struct usb_device *usb_dev)
+ }
+
+ if (usb_dev->wusb) {
+- result = usb_get_device_descriptor(usb_dev, sizeof(usb_dev->descriptor));
+- if (result < 0) {
++ struct usb_device_descriptor *descr;
++
++ descr = usb_get_device_descriptor(usb_dev);
++ if (IS_ERR(descr)) {
++ result = PTR_ERR(descr);
+ dev_err(&usb_dev->dev, "can't re-read device descriptor for "
+ "authorization: %d\n", result);
+ goto error_device_descriptor;
+ }
++ usb_dev->descriptor = *descr;
++ kfree(descr);
+ }
+
+ usb_dev->authorized = 1;
+@@ -4661,6 +4671,67 @@ static int hub_enable_device(struct usb_device *udev)
+ return hcd->driver->enable_device(hcd, udev);
+ }
+
++/*
++ * Get the bMaxPacketSize0 value during initialization by reading the
++ * device's device descriptor. Since we don't already know this value,
++ * the transfer is unsafe and it ignores I/O errors, only testing for
++ * reasonable received values.
++ *
++ * For "old scheme" initialization, size will be 8 so we read just the
++ * start of the device descriptor, which should work okay regardless of
++ * the actual bMaxPacketSize0 value. For "new scheme" initialization,
++ * size will be 64 (and buf will point to a sufficiently large buffer),
++ * which might not be kosher according to the USB spec but it's what
++ * Windows does and what many devices expect.
++ *
++ * Returns: bMaxPacketSize0 or a negative error code.
++ */
++static int get_bMaxPacketSize0(struct usb_device *udev,
++ struct usb_device_descriptor *buf, int size, bool first_time)
++{
++ int i, rc;
++
++ /*
++ * Retry on all errors; some devices are flakey.
++ * 255 is for WUSB devices, we actually need to use
++ * 512 (WUSB1.0[4.8.1]).
++ */
++ for (i = 0; i < GET_MAXPACKET0_TRIES; ++i) {
++ /* Start with invalid values in case the transfer fails */
++ buf->bDescriptorType = buf->bMaxPacketSize0 = 0;
++ rc = usb_control_msg(udev, usb_rcvaddr0pipe(),
++ USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
++ USB_DT_DEVICE << 8, 0,
++ buf, size,
++ initial_descriptor_timeout);
++ switch (buf->bMaxPacketSize0) {
++ case 8: case 16: case 32: case 64: case 9:
++ if (buf->bDescriptorType == USB_DT_DEVICE) {
++ rc = buf->bMaxPacketSize0;
++ break;
++ }
++ fallthrough;
++ default:
++ if (rc >= 0)
++ rc = -EPROTO;
++ break;
++ }
++
++ /*
++ * Some devices time out if they are powered on
++ * when already connected. They need a second
++ * reset, so return early. But only on the first
++ * attempt, lest we get into a time-out/reset loop.
++ */
++ if (rc > 0 || (rc == -ETIMEDOUT && first_time &&
++ udev->speed > USB_SPEED_FULL))
++ break;
++ }
++ return rc;
++}
++
++#define GET_DESCRIPTOR_BUFSIZE 64
++
+ /* Reset device, (re)assign address, get device descriptor.
+ * Device connection must be stable, no more debouncing needed.
+ * Returns device in USB_STATE_ADDRESS, except on error.
+@@ -4670,10 +4741,17 @@ static int hub_enable_device(struct usb_device *udev)
+ * the port lock. For a newly detected device that is not accessible
+ * through any global pointers, it's not necessary to lock the device,
+ * but it is still necessary to lock the port.
++ *
++ * For a newly detected device, @dev_descr must be NULL. The device
++ * descriptor retrieved from the device will then be stored in
++ * @udev->descriptor. For an already existing device, @dev_descr
++ * must be non-NULL. The device descriptor will be stored there,
++ * not in @udev->descriptor, because descriptors for registered
++ * devices are meant to be immutable.
+ */
+ static int
+ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+- int retry_counter)
++ int retry_counter, struct usb_device_descriptor *dev_descr)
+ {
+ struct usb_device *hdev = hub->hdev;
+ struct usb_hcd *hcd = bus_to_hcd(hdev->bus);
+@@ -4685,6 +4763,13 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ int devnum = udev->devnum;
+ const char *driver_name;
+ bool do_new_scheme;
++ const bool initial = !dev_descr;
++ int maxp0;
++ struct usb_device_descriptor *buf, *descr;
++
++ buf = kmalloc(GET_DESCRIPTOR_BUFSIZE, GFP_NOIO);
++ if (!buf)
++ return -ENOMEM;
+
+ /* root hub ports have a slightly longer reset period
+ * (from USB 2.0 spec, section 7.1.7.5)
+@@ -4700,8 +4785,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ if (oldspeed == USB_SPEED_LOW)
+ delay = HUB_LONG_RESET_TIME;
+
+- mutex_lock(hcd->address0_mutex);
+-
+ /* Reset the device; full speed may morph to high speed */
+ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
+ retval = hub_port_reset(hub, port1, udev, delay, false);
+@@ -4719,32 +4802,34 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ }
+ oldspeed = udev->speed;
+
+- /* USB 2.0 section 5.5.3 talks about ep0 maxpacket ...
+- * it's fixed size except for full speed devices.
+- * For Wireless USB devices, ep0 max packet is always 512 (tho
+- * reported as 0xff in the device descriptor). WUSB1.0[4.8.1].
+- */
+- switch (udev->speed) {
+- case USB_SPEED_SUPER_PLUS:
+- case USB_SPEED_SUPER:
+- case USB_SPEED_WIRELESS: /* fixed at 512 */
+- udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512);
+- break;
+- case USB_SPEED_HIGH: /* fixed at 64 */
+- udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
+- break;
+- case USB_SPEED_FULL: /* 8, 16, 32, or 64 */
+- /* to determine the ep0 maxpacket size, try to read
+- * the device descriptor to get bMaxPacketSize0 and
+- * then correct our initial guess.
++ if (initial) {
++ /* USB 2.0 section 5.5.3 talks about ep0 maxpacket ...
++ * it's fixed size except for full speed devices.
++ * For Wireless USB devices, ep0 max packet is always 512 (tho
++ * reported as 0xff in the device descriptor). WUSB1.0[4.8.1].
+ */
+- udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
+- break;
+- case USB_SPEED_LOW: /* fixed at 8 */
+- udev->ep0.desc.wMaxPacketSize = cpu_to_le16(8);
+- break;
+- default:
+- goto fail;
++ switch (udev->speed) {
++ case USB_SPEED_SUPER_PLUS:
++ case USB_SPEED_SUPER:
++ case USB_SPEED_WIRELESS: /* fixed at 512 */
++ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512);
++ break;
++ case USB_SPEED_HIGH: /* fixed at 64 */
++ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
++ break;
++ case USB_SPEED_FULL: /* 8, 16, 32, or 64 */
++ /* to determine the ep0 maxpacket size, try to read
++ * the device descriptor to get bMaxPacketSize0 and
++ * then correct our initial guess.
++ */
++ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
++ break;
++ case USB_SPEED_LOW: /* fixed at 8 */
++ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(8);
++ break;
++ default:
++ goto fail;
++ }
+ }
+
+ if (udev->speed == USB_SPEED_WIRELESS)
+@@ -4767,22 +4852,24 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ if (udev->speed < USB_SPEED_SUPER)
+ dev_info(&udev->dev,
+ "%s %s USB device number %d using %s\n",
+- (udev->config) ? "reset" : "new", speed,
++ (initial ? "new" : "reset"), speed,
+ devnum, driver_name);
+
+- /* Set up TT records, if needed */
+- if (hdev->tt) {
+- udev->tt = hdev->tt;
+- udev->ttport = hdev->ttport;
+- } else if (udev->speed != USB_SPEED_HIGH
+- && hdev->speed == USB_SPEED_HIGH) {
+- if (!hub->tt.hub) {
+- dev_err(&udev->dev, "parent hub has no TT\n");
+- retval = -EINVAL;
+- goto fail;
++ if (initial) {
++ /* Set up TT records, if needed */
++ if (hdev->tt) {
++ udev->tt = hdev->tt;
++ udev->ttport = hdev->ttport;
++ } else if (udev->speed != USB_SPEED_HIGH
++ && hdev->speed == USB_SPEED_HIGH) {
++ if (!hub->tt.hub) {
++ dev_err(&udev->dev, "parent hub has no TT\n");
++ retval = -EINVAL;
++ goto fail;
++ }
++ udev->tt = &hub->tt;
++ udev->ttport = port1;
+ }
+- udev->tt = &hub->tt;
+- udev->ttport = port1;
+ }
+
+ /* Why interleave GET_DESCRIPTOR and SET_ADDRESS this way?
+@@ -4801,9 +4888,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+
+ for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) {
+ if (do_new_scheme) {
+- struct usb_device_descriptor *buf;
+- int r = 0;
+-
+ retval = hub_enable_device(udev);
+ if (retval < 0) {
+ dev_err(&udev->dev,
+@@ -4812,52 +4896,14 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ goto fail;
+ }
+
+-#define GET_DESCRIPTOR_BUFSIZE 64
+- buf = kmalloc(GET_DESCRIPTOR_BUFSIZE, GFP_NOIO);
+- if (!buf) {
+- retval = -ENOMEM;
+- continue;
+- }
+-
+- /* Retry on all errors; some devices are flakey.
+- * 255 is for WUSB devices, we actually need to use
+- * 512 (WUSB1.0[4.8.1]).
+- */
+- for (operations = 0; operations < GET_MAXPACKET0_TRIES;
+- ++operations) {
+- buf->bMaxPacketSize0 = 0;
+- r = usb_control_msg(udev, usb_rcvaddr0pipe(),
+- USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
+- USB_DT_DEVICE << 8, 0,
+- buf, GET_DESCRIPTOR_BUFSIZE,
+- initial_descriptor_timeout);
+- switch (buf->bMaxPacketSize0) {
+- case 8: case 16: case 32: case 64: case 255:
+- if (buf->bDescriptorType ==
+- USB_DT_DEVICE) {
+- r = 0;
+- break;
+- }
+- fallthrough;
+- default:
+- if (r == 0)
+- r = -EPROTO;
+- break;
+- }
+- /*
+- * Some devices time out if they are powered on
+- * when already connected. They need a second
+- * reset. But only on the first attempt,
+- * lest we get into a time out/reset loop
+- */
+- if (r == 0 || (r == -ETIMEDOUT &&
+- retries == 0 &&
+- udev->speed > USB_SPEED_FULL))
+- break;
++ maxp0 = get_bMaxPacketSize0(udev, buf,
++ GET_DESCRIPTOR_BUFSIZE, retries == 0);
++ if (maxp0 > 0 && !initial &&
++ maxp0 != udev->descriptor.bMaxPacketSize0) {
++ dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n");
++ retval = -ENODEV;
++ goto fail;
+ }
+- udev->descriptor.bMaxPacketSize0 =
+- buf->bMaxPacketSize0;
+- kfree(buf);
+
+ retval = hub_port_reset(hub, port1, udev, delay, false);
+ if (retval < 0) /* error or disconnect */
+@@ -4868,14 +4914,13 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ retval = -ENODEV;
+ goto fail;
+ }
+- if (r) {
+- if (r != -ENODEV)
++ if (maxp0 < 0) {
++ if (maxp0 != -ENODEV)
+ dev_err(&udev->dev, "device descriptor read/64, error %d\n",
+- r);
+- retval = -EMSGSIZE;
++ maxp0);
++ retval = maxp0;
+ continue;
+ }
+-#undef GET_DESCRIPTOR_BUFSIZE
+ }
+
+ /*
+@@ -4921,18 +4966,22 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ break;
+ }
+
+- retval = usb_get_device_descriptor(udev, 8);
+- if (retval < 8) {
++ /* !do_new_scheme || wusb */
++ maxp0 = get_bMaxPacketSize0(udev, buf, 8, retries == 0);
++ if (maxp0 < 0) {
++ retval = maxp0;
+ if (retval != -ENODEV)
+ dev_err(&udev->dev,
+ "device descriptor read/8, error %d\n",
+ retval);
+- if (retval >= 0)
+- retval = -EMSGSIZE;
+ } else {
+ u32 delay;
+
+- retval = 0;
++ if (!initial && maxp0 != udev->descriptor.bMaxPacketSize0) {
++ dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n");
++ retval = -ENODEV;
++ goto fail;
++ }
+
+ delay = udev->parent->hub_delay;
+ udev->hub_delay = min_t(u32, delay,
+@@ -4951,48 +5000,61 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ goto fail;
+
+ /*
+- * Some superspeed devices have finished the link training process
+- * and attached to a superspeed hub port, but the device descriptor
+- * got from those devices show they aren't superspeed devices. Warm
+- * reset the port attached by the devices can fix them.
++ * Check the ep0 maxpacket guess and correct it if necessary.
++ * maxp0 is the value stored in the device descriptor;
++ * i is the value it encodes (logarithmic for SuperSpeed or greater).
+ */
+- if ((udev->speed >= USB_SPEED_SUPER) &&
+- (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0300)) {
+- dev_err(&udev->dev, "got a wrong device descriptor, "
+- "warm reset device\n");
+- hub_port_reset(hub, port1, udev,
+- HUB_BH_RESET_TIME, true);
+- retval = -EINVAL;
+- goto fail;
+- }
+-
+- if (udev->descriptor.bMaxPacketSize0 == 0xff ||
+- udev->speed >= USB_SPEED_SUPER)
+- i = 512;
+- else
+- i = udev->descriptor.bMaxPacketSize0;
+- if (usb_endpoint_maxp(&udev->ep0.desc) != i) {
+- if (udev->speed == USB_SPEED_LOW ||
+- !(i == 8 || i == 16 || i == 32 || i == 64)) {
+- dev_err(&udev->dev, "Invalid ep0 maxpacket: %d\n", i);
+- retval = -EMSGSIZE;
+- goto fail;
+- }
++ i = maxp0;
++ if (udev->speed >= USB_SPEED_SUPER) {
++ if (maxp0 <= 16)
++ i = 1 << maxp0;
++ else
++ i = 0; /* Invalid */
++ }
++ if (usb_endpoint_maxp(&udev->ep0.desc) == i) {
++ ; /* Initial ep0 maxpacket guess is right */
++ } else if ((udev->speed == USB_SPEED_FULL ||
++ udev->speed == USB_SPEED_HIGH) &&
++ (i == 8 || i == 16 || i == 32 || i == 64)) {
++ /* Initial guess is wrong; use the descriptor's value */
+ if (udev->speed == USB_SPEED_FULL)
+ dev_dbg(&udev->dev, "ep0 maxpacket = %d\n", i);
+ else
+ dev_warn(&udev->dev, "Using ep0 maxpacket: %d\n", i);
+ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(i);
+ usb_ep0_reinit(udev);
++ } else {
++ /* Initial guess is wrong and descriptor's value is invalid */
++ dev_err(&udev->dev, "Invalid ep0 maxpacket: %d\n", maxp0);
++ retval = -EMSGSIZE;
++ goto fail;
+ }
+
+- retval = usb_get_device_descriptor(udev, USB_DT_DEVICE_SIZE);
+- if (retval < (signed)sizeof(udev->descriptor)) {
++ descr = usb_get_device_descriptor(udev);
++ if (IS_ERR(descr)) {
++ retval = PTR_ERR(descr);
+ if (retval != -ENODEV)
+ dev_err(&udev->dev, "device descriptor read/all, error %d\n",
+ retval);
+- if (retval >= 0)
+- retval = -ENOMSG;
++ goto fail;
++ }
++ if (initial)
++ udev->descriptor = *descr;
++ else
++ *dev_descr = *descr;
++ kfree(descr);
++
++ /*
++ * Some superspeed devices have finished the link training process
++ * and attached to a superspeed hub port, but the device descriptor
++ * got from those devices show they aren't superspeed devices. Warm
++ * reset the port attached by the devices can fix them.
++ */
++ if ((udev->speed >= USB_SPEED_SUPER) &&
++ (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0300)) {
++ dev_err(&udev->dev, "got a wrong device descriptor, warm reset device\n");
++ hub_port_reset(hub, port1, udev, HUB_BH_RESET_TIME, true);
++ retval = -EINVAL;
+ goto fail;
+ }
+
+@@ -5016,7 +5078,7 @@ fail:
+ hub_port_disable(hub, port1, 0);
+ update_devnum(udev, devnum); /* for disconnect processing */
+ }
+- mutex_unlock(hcd->address0_mutex);
++ kfree(buf);
+ return retval;
+ }
+
+@@ -5097,7 +5159,7 @@ hub_power_remaining(struct usb_hub *hub)
+
+
+ static int descriptors_changed(struct usb_device *udev,
+- struct usb_device_descriptor *old_device_descriptor,
++ struct usb_device_descriptor *new_device_descriptor,
+ struct usb_host_bos *old_bos)
+ {
+ int changed = 0;
+@@ -5108,8 +5170,8 @@ static int descriptors_changed(struct usb_device *udev,
+ int length;
+ char *buf;
+
+- if (memcmp(&udev->descriptor, old_device_descriptor,
+- sizeof(*old_device_descriptor)) != 0)
++ if (memcmp(&udev->descriptor, new_device_descriptor,
++ sizeof(*new_device_descriptor)) != 0)
+ return 1;
+
+ if ((old_bos && !udev->bos) || (!old_bos && udev->bos))
+@@ -5191,6 +5253,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
+ struct usb_port *port_dev = hub->ports[port1 - 1];
+ struct usb_device *udev = port_dev->child;
+ static int unreliable_port = -1;
++ bool retry_locked;
+
+ /* Disconnect any existing devices under this port */
+ if (udev) {
+@@ -5246,8 +5309,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
+ unit_load = 100;
+
+ status = 0;
+- for (i = 0; i < PORT_INIT_TRIES; i++) {
+
++ for (i = 0; i < PORT_INIT_TRIES; i++) {
++ usb_lock_port(port_dev);
++ mutex_lock(hcd->address0_mutex);
++ retry_locked = true;
+ /* reallocate for each attempt, since references
+ * to the previous one can escape in various ways
+ */
+@@ -5255,6 +5321,8 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
+ if (!udev) {
+ dev_err(&port_dev->dev,
+ "couldn't allocate usb_device\n");
++ mutex_unlock(hcd->address0_mutex);
++ usb_unlock_port(port_dev);
+ goto done;
+ }
+
+@@ -5276,12 +5344,14 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
+ }
+
+ /* reset (non-USB 3.0 devices) and get descriptor */
+- usb_lock_port(port_dev);
+- status = hub_port_init(hub, udev, port1, i);
+- usb_unlock_port(port_dev);
++ status = hub_port_init(hub, udev, port1, i, NULL);
+ if (status < 0)
+ goto loop;
+
++ mutex_unlock(hcd->address0_mutex);
++ usb_unlock_port(port_dev);
++ retry_locked = false;
++
+ if (udev->quirks & USB_QUIRK_DELAY_INIT)
+ msleep(2000);
+
+@@ -5374,6 +5444,10 @@ loop:
+ usb_ep0_reinit(udev);
+ release_devnum(udev);
+ hub_free_dev(udev);
++ if (retry_locked) {
++ mutex_unlock(hcd->address0_mutex);
++ usb_unlock_port(port_dev);
++ }
+ usb_put_dev(udev);
+ if ((status == -ENOTCONN) || (status == -ENOTSUPP))
+ break;
+@@ -5417,9 +5491,8 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
+ {
+ struct usb_port *port_dev = hub->ports[port1 - 1];
+ struct usb_device *udev = port_dev->child;
+- struct usb_device_descriptor descriptor;
++ struct usb_device_descriptor *descr;
+ int status = -ENODEV;
+- int retval;
+
+ dev_dbg(&port_dev->dev, "status %04x, change %04x, %s\n", portstatus,
+ portchange, portspeed(hub, portstatus));
+@@ -5446,23 +5519,20 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
+ * changed device descriptors before resuscitating the
+ * device.
+ */
+- descriptor = udev->descriptor;
+- retval = usb_get_device_descriptor(udev,
+- sizeof(udev->descriptor));
+- if (retval < 0) {
++ descr = usb_get_device_descriptor(udev);
++ if (IS_ERR(descr)) {
+ dev_dbg(&udev->dev,
+- "can't read device descriptor %d\n",
+- retval);
++ "can't read device descriptor %ld\n",
++ PTR_ERR(descr));
+ } else {
+- if (descriptors_changed(udev, &descriptor,
++ if (descriptors_changed(udev, descr,
+ udev->bos)) {
+ dev_dbg(&udev->dev,
+ "device descriptor has changed\n");
+- /* for disconnect() calls */
+- udev->descriptor = descriptor;
+ } else {
+ status = 0; /* Nothing to do */
+ }
++ kfree(descr);
+ }
+ #ifdef CONFIG_PM
+ } else if (udev->state == USB_STATE_SUSPENDED &&
+@@ -5779,6 +5849,16 @@ static const struct usb_device_id hub_id_table[] = {
+ .idVendor = USB_VENDOR_GENESYS_LOGIC,
+ .bInterfaceClass = USB_CLASS_HUB,
+ .driver_info = HUB_QUIRK_CHECK_PORT_AUTOSUSPEND},
++ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
++ | USB_DEVICE_ID_MATCH_PRODUCT,
++ .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS,
++ .idProduct = USB_PRODUCT_TUSB8041_USB2,
++ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
++ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
++ | USB_DEVICE_ID_MATCH_PRODUCT,
++ .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS,
++ .idProduct = USB_PRODUCT_TUSB8041_USB3,
++ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
+ { .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS,
+ .bDeviceClass = USB_CLASS_HUB},
+ { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS,
+@@ -5880,7 +5960,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
+ struct usb_device *parent_hdev = udev->parent;
+ struct usb_hub *parent_hub;
+ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+- struct usb_device_descriptor descriptor = udev->descriptor;
++ struct usb_device_descriptor descriptor;
+ struct usb_host_bos *bos;
+ int i, j, ret = 0;
+ int port1 = udev->portnum;
+@@ -5915,15 +5995,18 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
+ bos = udev->bos;
+ udev->bos = NULL;
+
++ mutex_lock(hcd->address0_mutex);
++
+ for (i = 0; i < PORT_INIT_TRIES; ++i) {
+
+ /* ep0 maxpacket size may change; let the HCD know about it.
+ * Other endpoints will be handled by re-enumeration. */
+ usb_ep0_reinit(udev);
+- ret = hub_port_init(parent_hub, udev, port1, i);
++ ret = hub_port_init(parent_hub, udev, port1, i, &descriptor);
+ if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV)
+ break;
+ }
++ mutex_unlock(hcd->address0_mutex);
+
+ if (ret < 0)
+ goto re_enumerate;
+@@ -5931,7 +6014,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
+ /* Device might have changed firmware (DFU or similar) */
+ if (descriptors_changed(udev, &descriptor, bos)) {
+ dev_info(&udev->dev, "device firmware changed\n");
+- udev->descriptor = descriptor; /* for disconnect() calls */
+ goto re_enumerate;
+ }
+
+@@ -6028,6 +6110,11 @@ re_enumerate_no_bos:
+ * the reset is over (using their post_reset method).
+ *
+ * Return: The same as for usb_reset_and_verify_device().
++ * However, if a reset is already in progress (for instance, if a
++ * driver doesn't have pre_reset() or post_reset() callbacks, and while
++ * being unbound or re-bound during the ongoing reset its disconnect()
++ * or probe() routine tries to perform a second, nested reset), the
++ * routine returns -EINPROGRESS.
+ *
+ * Note:
+ * The caller must own the device lock. For example, it's safe to use
+@@ -6061,6 +6148,10 @@ int usb_reset_device(struct usb_device *udev)
+ return -EISDIR;
+ }
+
++ if (udev->reset_in_progress)
++ return -EINPROGRESS;
++ udev->reset_in_progress = 1;
++
+ port_dev = hub->ports[udev->portnum - 1];
+
+ /*
+@@ -6125,6 +6216,7 @@ int usb_reset_device(struct usb_device *udev)
+
+ usb_autosuspend_device(udev);
+ memalloc_noio_restore(noio_flag);
++ udev->reset_in_progress = 0;
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(usb_reset_device);
+diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
+index 4d59d927ae3e3..1673e5d089263 100644
+--- a/drivers/usb/core/message.c
++++ b/drivers/usb/core/message.c
+@@ -1039,40 +1039,35 @@ char *usb_cache_string(struct usb_device *udev, int index)
+ }
+
+ /*
+- * usb_get_device_descriptor - (re)reads the device descriptor (usbcore)
+- * @dev: the device whose device descriptor is being updated
+- * @size: how much of the descriptor to read
++ * usb_get_device_descriptor - read the device descriptor
++ * @udev: the device whose device descriptor should be read
+ *
+ * Context: task context, might sleep.
+ *
+- * Updates the copy of the device descriptor stored in the device structure,
+- * which dedicates space for this purpose.
+- *
+ * Not exported, only for use by the core. If drivers really want to read
+ * the device descriptor directly, they can call usb_get_descriptor() with
+ * type = USB_DT_DEVICE and index = 0.
+ *
+- * This call is synchronous, and may not be used in an interrupt context.
+- *
+- * Return: The number of bytes received on success, or else the status code
+- * returned by the underlying usb_control_msg() call.
++ * Returns: a pointer to a dynamically allocated usb_device_descriptor
++ * structure (which the caller must deallocate), or an ERR_PTR value.
+ */
+-int usb_get_device_descriptor(struct usb_device *dev, unsigned int size)
++struct usb_device_descriptor *usb_get_device_descriptor(struct usb_device *udev)
+ {
+ struct usb_device_descriptor *desc;
+ int ret;
+
+- if (size > sizeof(*desc))
+- return -EINVAL;
+ desc = kmalloc(sizeof(*desc), GFP_NOIO);
+ if (!desc)
+- return -ENOMEM;
++ return ERR_PTR(-ENOMEM);
++
++ ret = usb_get_descriptor(udev, USB_DT_DEVICE, 0, desc, sizeof(*desc));
++ if (ret == sizeof(*desc))
++ return desc;
+
+- ret = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc, size);
+ if (ret >= 0)
+- memcpy(&dev->descriptor, desc, size);
++ ret = -EMSGSIZE;
+ kfree(desc);
+- return ret;
++ return ERR_PTR(ret);
+ }
+
+ /*
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 8239fe7129dd7..15e9bd180a1d2 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -362,6 +362,9 @@ static const struct usb_device_id usb_quirk_list[] = {
+ { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM },
+ { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM },
+
++ /* Realforce 87U Keyboard */
++ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM },
++
+ /* M-Systems Flash Disk Pioneers */
+ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
+
+@@ -388,6 +391,15 @@ static const struct usb_device_id usb_quirk_list[] = {
+ /* Kingston DataTraveler 3.0 */
+ { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM },
+
++ /* NVIDIA Jetson devices in Force Recovery mode */
++ { USB_DEVICE(0x0955, 0x7018), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7019), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7418), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7721), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7c18), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7e19), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x0955, 0x7f21), .driver_info = USB_QUIRK_RESET_RESUME },
++
+ /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */
+ { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF },
+
+@@ -404,6 +416,9 @@ static const struct usb_device_id usb_quirk_list[] = {
+ { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+ USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
++ /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/
++ { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },
++
+ /* Realtek hub in Dell WD19 (Type-C) */
+ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM },
+
+@@ -421,6 +436,10 @@ static const struct usb_device_id usb_quirk_list[] = {
+ /* novation SoundControl XL */
+ { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },
+
++ /* Focusrite Scarlett Solo USB */
++ { USB_DEVICE(0x1235, 0x8211), .driver_info =
++ USB_QUIRK_DISCONNECT_SUSPEND },
++
+ /* Huawei 4G LTE module */
+ { USB_DEVICE(0x12d1, 0x15bb), .driver_info =
+ USB_QUIRK_DISCONNECT_SUSPEND },
+@@ -434,6 +453,16 @@ static const struct usb_device_id usb_quirk_list[] = {
+ { USB_DEVICE(0x1532, 0x0116), .driver_info =
+ USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
+
++ /* Lenovo ThinkPad OneLink+ Dock twin hub controllers (VIA Labs VL812) */
++ { USB_DEVICE(0x17ef, 0x1018), .driver_info = USB_QUIRK_RESET_RESUME },
++ { USB_DEVICE(0x17ef, 0x1019), .driver_info = USB_QUIRK_RESET_RESUME },
++
++ /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */
++ { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM },
++
++ /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */
++ { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM },
++
+ /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */
+ { USB_DEVICE(0x17ef, 0xa012), .driver_info =
+ USB_QUIRK_DISCONNECT_SUSPEND },
+@@ -501,6 +530,15 @@ static const struct usb_device_id usb_quirk_list[] = {
+ /* DJI CineSSD */
+ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
+
++ /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */
++ { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM },
++
++ /* DELL USB GEN2 */
++ { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME },
++
++ /* VCOM device */
++ { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },
++
+ /* INTEL VALUE SSD */
+ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
+
+diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
+index fa2e49d432ff6..60ee0469d86e7 100644
+--- a/drivers/usb/core/sysfs.c
++++ b/drivers/usb/core/sysfs.c
+@@ -868,11 +868,7 @@ read_descriptors(struct file *filp, struct kobject *kobj,
+ size_t srclen, n;
+ int cfgno;
+ void *src;
+- int retval;
+
+- retval = usb_lock_device_interruptible(udev);
+- if (retval < 0)
+- return -EINTR;
+ /* The binary attribute begins with the device descriptor.
+ * Following that are the raw descriptor entries for all the
+ * configurations (config plus subsidiary descriptors).
+@@ -897,7 +893,6 @@ read_descriptors(struct file *filp, struct kobject *kobj,
+ off -= srclen;
+ }
+ }
+- usb_unlock_device(udev);
+ return count - nleft;
+ }
+
+diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
+index 30727729a44cc..33d62d7e3929f 100644
+--- a/drivers/usb/core/urb.c
++++ b/drivers/usb/core/urb.c
+@@ -715,6 +715,12 @@ void usb_kill_urb(struct urb *urb)
+ if (!(urb && urb->dev && urb->ep))
+ return;
+ atomic_inc(&urb->reject);
++ /*
++ * Order the write of urb->reject above before the read
++ * of urb->use_count below. Pairs with the barriers in
++ * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
++ */
++ smp_mb__after_atomic();
+
+ usb_hcd_unlink_urb(urb, -ENOENT);
+ wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);
+@@ -756,6 +762,12 @@ void usb_poison_urb(struct urb *urb)
+ if (!urb)
+ return;
+ atomic_inc(&urb->reject);
++ /*
++ * Order the write of urb->reject above before the read
++ * of urb->use_count below. Pairs with the barriers in
++ * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
++ */
++ smp_mb__after_atomic();
+
+ if (!urb->dev || !urb->ep)
+ return;
+diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
+index 50b2fc7fcc0e3..8751276ef5789 100644
+--- a/drivers/usb/core/usb-acpi.c
++++ b/drivers/usb/core/usb-acpi.c
+@@ -37,6 +37,71 @@ bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
+ }
+ EXPORT_SYMBOL_GPL(usb_acpi_power_manageable);
+
++#define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899"
++#define USB_DSM_DISABLE_U1_U2_FOR_PORT 5
++
++/**
++ * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port.
++ * @hdev: USB device belonging to the usb hub
++ * @index: zero based port index
++ *
++ * Some USB3 ports may not support USB3 link power management U1/U2 states
++ * due to different retimer setup. ACPI provides _DSM method which returns 0x01
++ * if U1 and U2 states should be disabled. Evaluate _DSM with:
++ * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899
++ * Arg1: Revision ID = 0
++ * Arg2: Function Index = 5
++ * Arg3: (empty)
++ *
++ * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0
++ */
++
++int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
++{
++ union acpi_object *obj;
++ acpi_handle port_handle;
++ int port1 = index + 1;
++ guid_t guid;
++ int ret;
++
++ ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid);
++ if (ret)
++ return ret;
++
++ port_handle = usb_get_hub_port_acpi_handle(hdev, port1);
++ if (!port_handle) {
++ dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1);
++ return -ENODEV;
++ }
++
++ if (!acpi_check_dsm(port_handle, &guid, 0,
++ BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) {
++ dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n",
++ port1, USB_DSM_DISABLE_U1_U2_FOR_PORT);
++ return -ENODEV;
++ }
++
++ obj = acpi_evaluate_dsm(port_handle, &guid, 0,
++ USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL);
++
++ if (!obj)
++ return -ENODEV;
++
++ if (obj->type != ACPI_TYPE_INTEGER) {
++ dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1);
++ ACPI_FREE(obj);
++ return -EINVAL;
++ }
++
++ if (obj->integer.value == 0x01)
++ ret = 1;
++
++ ACPI_FREE(obj);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable);
++
+ /**
+ * usb_acpi_set_power_state - control usb port's power via acpi power
+ * resource
+diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
+index 62368c4ed37af..ec8e003f59415 100644
+--- a/drivers/usb/core/usb.c
++++ b/drivers/usb/core/usb.c
+@@ -206,6 +206,82 @@ int usb_find_common_endpoints_reverse(struct usb_host_interface *alt,
+ }
+ EXPORT_SYMBOL_GPL(usb_find_common_endpoints_reverse);
+
++/**
++ * usb_find_endpoint() - Given an endpoint address, search for the endpoint's
++ * usb_host_endpoint structure in an interface's current altsetting.
++ * @intf: the interface whose current altsetting should be searched
++ * @ep_addr: the endpoint address (number and direction) to find
++ *
++ * Search the altsetting's list of endpoints for one with the specified address.
++ *
++ * Return: Pointer to the usb_host_endpoint if found, %NULL otherwise.
++ */
++static const struct usb_host_endpoint *usb_find_endpoint(
++ const struct usb_interface *intf, unsigned int ep_addr)
++{
++ int n;
++ const struct usb_host_endpoint *ep;
++
++ n = intf->cur_altsetting->desc.bNumEndpoints;
++ ep = intf->cur_altsetting->endpoint;
++ for (; n > 0; (--n, ++ep)) {
++ if (ep->desc.bEndpointAddress == ep_addr)
++ return ep;
++ }
++ return NULL;
++}
++
++/**
++ * usb_check_bulk_endpoints - Check whether an interface's current altsetting
++ * contains a set of bulk endpoints with the given addresses.
++ * @intf: the interface whose current altsetting should be searched
++ * @ep_addrs: 0-terminated array of the endpoint addresses (number and
++ * direction) to look for
++ *
++ * Search for endpoints with the specified addresses and check their types.
++ *
++ * Return: %true if all the endpoints are found and are bulk, %false otherwise.
++ */
++bool usb_check_bulk_endpoints(
++ const struct usb_interface *intf, const u8 *ep_addrs)
++{
++ const struct usb_host_endpoint *ep;
++
++ for (; *ep_addrs; ++ep_addrs) {
++ ep = usb_find_endpoint(intf, *ep_addrs);
++ if (!ep || !usb_endpoint_xfer_bulk(&ep->desc))
++ return false;
++ }
++ return true;
++}
++EXPORT_SYMBOL_GPL(usb_check_bulk_endpoints);
++
++/**
++ * usb_check_int_endpoints - Check whether an interface's current altsetting
++ * contains a set of interrupt endpoints with the given addresses.
++ * @intf: the interface whose current altsetting should be searched
++ * @ep_addrs: 0-terminated array of the endpoint addresses (number and
++ * direction) to look for
++ *
++ * Search for endpoints with the specified addresses and check their types.
++ *
++ * Return: %true if all the endpoints are found and are interrupt,
++ * %false otherwise.
++ */
++bool usb_check_int_endpoints(
++ const struct usb_interface *intf, const u8 *ep_addrs)
++{
++ const struct usb_host_endpoint *ep;
++
++ for (; *ep_addrs; ++ep_addrs) {
++ ep = usb_find_endpoint(intf, *ep_addrs);
++ if (!ep || !usb_endpoint_xfer_int(&ep->desc))
++ return false;
++ }
++ return true;
++}
++EXPORT_SYMBOL_GPL(usb_check_int_endpoints);
++
+ /**
+ * usb_find_alt_setting() - Given a configuration, find the alternate setting
+ * for the given interface.
+@@ -1036,7 +1112,7 @@ static void usb_debugfs_init(void)
+
+ static void usb_debugfs_cleanup(void)
+ {
+- debugfs_remove(debugfs_lookup("devices", usb_debug_root));
++ debugfs_lookup_and_remove("devices", usb_debug_root);
+ }
+
+ /*
+diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
+index 82538daac8b89..3bb2e1db42b5d 100644
+--- a/drivers/usb/core/usb.h
++++ b/drivers/usb/core/usb.h
+@@ -42,8 +42,8 @@ extern bool usb_endpoint_is_ignored(struct usb_device *udev,
+ struct usb_endpoint_descriptor *epd);
+ extern int usb_remove_device(struct usb_device *udev);
+
+-extern int usb_get_device_descriptor(struct usb_device *dev,
+- unsigned int size);
++extern struct usb_device_descriptor *usb_get_device_descriptor(
++ struct usb_device *udev);
+ extern int usb_set_isoch_delay(struct usb_device *dev);
+ extern int usb_get_bos_descriptor(struct usb_device *dev);
+ extern void usb_release_bos_descriptor(struct usb_device *dev);
+diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
+index cb9059a8444b5..71e62b3081dbb 100644
+--- a/drivers/usb/dwc2/core.h
++++ b/drivers/usb/dwc2/core.h
+@@ -1417,6 +1417,7 @@ void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg);
+ void dwc2_hsotg_disconnect(struct dwc2_hsotg *dwc2);
+ int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode);
+ #define dwc2_is_device_connected(hsotg) (hsotg->connected)
++#define dwc2_is_device_enabled(hsotg) (hsotg->enabled)
+ int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg);
+ int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, int remote_wakeup);
+ int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg);
+@@ -1453,6 +1454,7 @@ static inline int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg,
+ int testmode)
+ { return 0; }
+ #define dwc2_is_device_connected(hsotg) (0)
++#define dwc2_is_device_enabled(hsotg) (0)
+ static inline int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg)
+ { return 0; }
+ static inline int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg,
+diff --git a/drivers/usb/dwc2/drd.c b/drivers/usb/dwc2/drd.c
+index 2d4176f5788eb..36f2c38416e5e 100644
+--- a/drivers/usb/dwc2/drd.c
++++ b/drivers/usb/dwc2/drd.c
+@@ -7,6 +7,7 @@
+ * Author(s): Amelie Delaunay <amelie.delaunay@st.com>
+ */
+
++#include <linux/clk.h>
+ #include <linux/iopoll.h>
+ #include <linux/platform_device.h>
+ #include <linux/usb/role.h>
+@@ -25,9 +26,9 @@ static void dwc2_ovr_init(struct dwc2_hsotg *hsotg)
+ gotgctl &= ~(GOTGCTL_BVALOVAL | GOTGCTL_AVALOVAL | GOTGCTL_VBVALOVAL);
+ dwc2_writel(hsotg, gotgctl, GOTGCTL);
+
+- dwc2_force_mode(hsotg, false);
+-
+ spin_unlock_irqrestore(&hsotg->lock, flags);
++
++ dwc2_force_mode(hsotg, (hsotg->dr_mode == USB_DR_MODE_HOST));
+ }
+
+ static int dwc2_ovr_avalid(struct dwc2_hsotg *hsotg, bool valid)
+@@ -39,6 +40,7 @@ static int dwc2_ovr_avalid(struct dwc2_hsotg *hsotg, bool valid)
+ (!valid && !(gotgctl & GOTGCTL_ASESVLD)))
+ return -EALREADY;
+
++ gotgctl &= ~GOTGCTL_BVALOVAL;
+ if (valid)
+ gotgctl |= GOTGCTL_AVALOVAL | GOTGCTL_VBVALOVAL;
+ else
+@@ -57,6 +59,7 @@ static int dwc2_ovr_bvalid(struct dwc2_hsotg *hsotg, bool valid)
+ (!valid && !(gotgctl & GOTGCTL_BSESVLD)))
+ return -EALREADY;
+
++ gotgctl &= ~GOTGCTL_AVALOVAL;
+ if (valid)
+ gotgctl |= GOTGCTL_BVALOVAL | GOTGCTL_VBVALOVAL;
+ else
+@@ -86,14 +89,30 @@ static int dwc2_drd_role_sw_set(struct usb_role_switch *sw, enum usb_role role)
+ }
+ #endif
+
++ /*
++ * In case of USB_DR_MODE_PERIPHERAL, clock is disabled at the end of
++ * the probe and enabled on udc_start.
++ * If role-switch set is called before the udc_start, we need to enable
++ * the clock to read/write GOTGCTL and GUSBCFG registers to override
++ * mode and sessions. It is the case if cable is plugged at boot.
++ */
++ if (!hsotg->ll_hw_enabled && hsotg->clk) {
++ int ret = clk_prepare_enable(hsotg->clk);
++
++ if (ret)
++ return ret;
++ }
++
+ spin_lock_irqsave(&hsotg->lock, flags);
+
+ if (role == USB_ROLE_HOST) {
+ already = dwc2_ovr_avalid(hsotg, true);
+ } else if (role == USB_ROLE_DEVICE) {
+ already = dwc2_ovr_bvalid(hsotg, true);
+- /* This clear DCTL.SFTDISCON bit */
+- dwc2_hsotg_core_connect(hsotg);
++ if (dwc2_is_device_enabled(hsotg)) {
++ /* This clear DCTL.SFTDISCON bit */
++ dwc2_hsotg_core_connect(hsotg);
++ }
+ } else {
+ if (dwc2_is_device_mode(hsotg)) {
+ if (!dwc2_ovr_bvalid(hsotg, false))
+@@ -110,6 +129,9 @@ static int dwc2_drd_role_sw_set(struct usb_role_switch *sw, enum usb_role role)
+ /* This will raise a Connector ID Status Change Interrupt */
+ dwc2_force_mode(hsotg, role == USB_ROLE_HOST);
+
++ if (!hsotg->ll_hw_enabled && hsotg->clk)
++ clk_disable_unprepare(hsotg->clk);
++
+ dev_dbg(hsotg->dev, "%s-session valid\n",
+ role == USB_ROLE_NONE ? "No" :
+ role == USB_ROLE_HOST ? "A" : "B");
+diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
+index 11d85a6e0b0dc..519bb82b00e80 100644
+--- a/drivers/usb/dwc2/gadget.c
++++ b/drivers/usb/dwc2/gadget.c
+@@ -1198,6 +1198,8 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg,
+ }
+ ctrl |= DXEPCTL_CNAK;
+ } else {
++ hs_req->req.frame_number = hs_ep->target_frame;
++ hs_req->req.actual = 0;
+ dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA);
+ return;
+ }
+@@ -2857,9 +2859,12 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep)
+
+ do {
+ hs_req = get_ep_head(hs_ep);
+- if (hs_req)
++ if (hs_req) {
++ hs_req->req.frame_number = hs_ep->target_frame;
++ hs_req->req.actual = 0;
+ dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req,
+ -ENODATA);
++ }
+ dwc2_gadget_incr_frame_num(hs_ep);
+ /* Update current frame number value. */
+ hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg);
+@@ -2912,8 +2917,11 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep)
+
+ while (dwc2_gadget_target_frame_elapsed(ep)) {
+ hs_req = get_ep_head(ep);
+- if (hs_req)
++ if (hs_req) {
++ hs_req->req.frame_number = ep->target_frame;
++ hs_req->req.actual = 0;
+ dwc2_hsotg_complete_request(hsotg, ep, hs_req, -ENODATA);
++ }
+
+ dwc2_gadget_incr_frame_num(ep);
+ /* Update current frame number value. */
+@@ -3002,8 +3010,11 @@ static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep)
+
+ while (dwc2_gadget_target_frame_elapsed(hs_ep)) {
+ hs_req = get_ep_head(hs_ep);
+- if (hs_req)
++ if (hs_req) {
++ hs_req->req.frame_number = hs_ep->target_frame;
++ hs_req->req.actual = 0;
+ dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA);
++ }
+
+ dwc2_gadget_incr_frame_num(hs_ep);
+ /* Update current frame number value. */
+@@ -3583,7 +3594,8 @@ void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg)
+ void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg)
+ {
+ /* remove the soft-disconnect and let's go */
+- dwc2_clear_bit(hsotg, DCTL, DCTL_SFTDISCON);
++ if (!hsotg->role_sw || (dwc2_readl(hsotg, GOTGCTL) & GOTGCTL_BSESVLD))
++ dwc2_clear_bit(hsotg, DCTL, DCTL_SFTDISCON);
+ }
+
+ /**
+@@ -4533,7 +4545,6 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget,
+
+ WARN_ON(hsotg->driver);
+
+- driver->driver.bus = NULL;
+ hsotg->driver = driver;
+ hsotg->gadget.dev.of_node = hsotg->dev->of_node;
+ hsotg->gadget.speed = USB_SPEED_UNKNOWN;
+@@ -4963,7 +4974,18 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg)
+ hsotg->params.g_np_tx_fifo_size);
+ dev_dbg(dev, "RXFIFO size: %d\n", hsotg->params.g_rx_fifo_size);
+
+- hsotg->gadget.max_speed = USB_SPEED_HIGH;
++ switch (hsotg->params.speed) {
++ case DWC2_SPEED_PARAM_LOW:
++ hsotg->gadget.max_speed = USB_SPEED_LOW;
++ break;
++ case DWC2_SPEED_PARAM_FULL:
++ hsotg->gadget.max_speed = USB_SPEED_FULL;
++ break;
++ default:
++ hsotg->gadget.max_speed = USB_SPEED_HIGH;
++ break;
++ }
++
+ hsotg->gadget.ops = &dwc2_hsotg_gadget_ops;
+ hsotg->gadget.name = dev_name(dev);
+ hsotg->remote_wakeup_allowed = 0;
+@@ -5074,7 +5096,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
+ hsotg->gadget.speed = USB_SPEED_UNKNOWN;
+ spin_unlock_irqrestore(&hsotg->lock, flags);
+
+- for (ep = 0; ep < hsotg->num_of_eps; ep++) {
++ for (ep = 1; ep < hsotg->num_of_eps; ep++) {
+ if (hsotg->eps_in[ep])
+ dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
+ if (hsotg->eps_out[ep])
+diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
+index a215ec9e172e6..82322696b903b 100644
+--- a/drivers/usb/dwc2/hcd.c
++++ b/drivers/usb/dwc2/hcd.c
+@@ -4403,11 +4403,12 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd)
+ * If not hibernation nor partial power down are supported,
+ * clock gating is used to save power.
+ */
+- if (!hsotg->params.no_clock_gating)
++ if (!hsotg->params.no_clock_gating) {
+ dwc2_host_enter_clock_gating(hsotg);
+
+- /* After entering suspend, hardware is not accessible */
+- clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
++ /* After entering suspend, hardware is not accessible */
++ clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
++ }
+ break;
+ default:
+ goto skip_power_saving;
+@@ -5193,7 +5194,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ retval = -EINVAL;
+- goto error1;
++ goto error2;
+ }
+ hcd->rsrc_start = res->start;
+ hcd->rsrc_len = resource_size(res);
+diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
+index 89a788326c562..24beff610cf2c 100644
+--- a/drivers/usb/dwc2/hcd_queue.c
++++ b/drivers/usb/dwc2/hcd_queue.c
+@@ -59,7 +59,7 @@
+ #define DWC2_UNRESERVE_DELAY (msecs_to_jiffies(5))
+
+ /* If we get a NAK, wait this long before retrying */
+-#define DWC2_RETRY_WAIT_DELAY (1 * 1E6L)
++#define DWC2_RETRY_WAIT_DELAY (1 * NSEC_PER_MSEC)
+
+ /**
+ * dwc2_periodic_channel_available() - Checks that a channel is available for a
+diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
+index c8f18f3ba9e35..6496bfbd34ed9 100644
+--- a/drivers/usb/dwc2/platform.c
++++ b/drivers/usb/dwc2/platform.c
+@@ -121,13 +121,6 @@ static int dwc2_get_dr_mode(struct dwc2_hsotg *hsotg)
+ return 0;
+ }
+
+-static void __dwc2_disable_regulators(void *data)
+-{
+- struct dwc2_hsotg *hsotg = data;
+-
+- regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies), hsotg->supplies);
+-}
+-
+ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg)
+ {
+ struct platform_device *pdev = to_platform_device(hsotg->dev);
+@@ -138,11 +131,6 @@ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg)
+ if (ret)
+ return ret;
+
+- ret = devm_add_action_or_reset(&pdev->dev,
+- __dwc2_disable_regulators, hsotg);
+- if (ret)
+- return ret;
+-
+ if (hsotg->clk) {
+ ret = clk_prepare_enable(hsotg->clk);
+ if (ret)
+@@ -154,9 +142,9 @@ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg)
+ } else if (hsotg->plat && hsotg->plat->phy_init) {
+ ret = hsotg->plat->phy_init(pdev, hsotg->plat->phy_type);
+ } else {
+- ret = phy_power_on(hsotg->phy);
++ ret = phy_init(hsotg->phy);
+ if (ret == 0)
+- ret = phy_init(hsotg->phy);
++ ret = phy_power_on(hsotg->phy);
+ }
+
+ return ret;
+@@ -188,9 +176,9 @@ static int __dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg)
+ } else if (hsotg->plat && hsotg->plat->phy_exit) {
+ ret = hsotg->plat->phy_exit(pdev, hsotg->plat->phy_type);
+ } else {
+- ret = phy_exit(hsotg->phy);
++ ret = phy_power_off(hsotg->phy);
+ if (ret == 0)
+- ret = phy_power_off(hsotg->phy);
++ ret = phy_exit(hsotg->phy);
+ }
+ if (ret)
+ return ret;
+@@ -198,7 +186,7 @@ static int __dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg)
+ if (hsotg->clk)
+ clk_disable_unprepare(hsotg->clk);
+
+- return 0;
++ return regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies), hsotg->supplies);
+ }
+
+ /**
+@@ -217,6 +205,11 @@ int dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg)
+ return ret;
+ }
+
++static void dwc2_reset_control_assert(void *data)
++{
++ reset_control_assert(data);
++}
++
+ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
+ {
+ int i, ret;
+@@ -229,6 +222,10 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
+ }
+
+ reset_control_deassert(hsotg->reset);
++ ret = devm_add_action_or_reset(hsotg->dev, dwc2_reset_control_assert,
++ hsotg->reset);
++ if (ret)
++ return ret;
+
+ hsotg->reset_ecc = devm_reset_control_get_optional(hsotg->dev, "dwc2-ecc");
+ if (IS_ERR(hsotg->reset_ecc)) {
+@@ -238,6 +235,10 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
+ }
+
+ reset_control_deassert(hsotg->reset_ecc);
++ ret = devm_add_action_or_reset(hsotg->dev, dwc2_reset_control_assert,
++ hsotg->reset_ecc);
++ if (ret)
++ return ret;
+
+ /*
+ * Attempt to find a generic PHY, then look for an old style
+@@ -364,10 +365,7 @@ static int dwc2_driver_remove(struct platform_device *dev)
+ if (hsotg->ll_hw_enabled)
+ dwc2_lowlevel_hw_disable(hsotg);
+
+- reset_control_assert(hsotg->reset);
+- reset_control_assert(hsotg->reset_ecc);
+-
+- return ret;
++ return 0;
+ }
+
+ /**
+@@ -575,6 +573,9 @@ static int dwc2_driver_probe(struct platform_device *dev)
+ ggpio |= GGPIO_STM32_OTG_GCCFG_IDEN;
+ ggpio |= GGPIO_STM32_OTG_GCCFG_VBDEN;
+ dwc2_writel(hsotg, ggpio, GGPIO);
++
++ /* ID/VBUS detection startup time */
++ usleep_range(5000, 7000);
+ }
+
+ retval = dwc2_drd_init(hsotg);
+@@ -655,7 +656,7 @@ error_init:
+ if (hsotg->params.activate_stm_id_vb_detection)
+ regulator_disable(hsotg->usb33d);
+ error:
+- if (hsotg->dr_mode != USB_DR_MODE_PERIPHERAL)
++ if (hsotg->ll_hw_enabled)
+ dwc2_lowlevel_hw_disable(hsotg);
+ return retval;
+ }
+diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
+index 0104a80b185e1..6377b9cf81a59 100644
+--- a/drivers/usb/dwc3/core.c
++++ b/drivers/usb/dwc3/core.c
+@@ -114,29 +114,31 @@ void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
+ dwc->current_dr_role = mode;
+ }
+
+-static int dwc3_core_soft_reset(struct dwc3 *dwc);
+-
+ static void __dwc3_set_mode(struct work_struct *work)
+ {
+ struct dwc3 *dwc = work_to_dwc(work);
+ unsigned long flags;
+ int ret;
+ u32 reg;
++ u32 desired_dr_role;
+
+ mutex_lock(&dwc->mutex);
++ spin_lock_irqsave(&dwc->lock, flags);
++ desired_dr_role = dwc->desired_dr_role;
++ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ pm_runtime_get_sync(dwc->dev);
+
+ if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_OTG)
+ dwc3_otg_update(dwc, 0);
+
+- if (!dwc->desired_dr_role)
++ if (!desired_dr_role)
+ goto out;
+
+- if (dwc->desired_dr_role == dwc->current_dr_role)
++ if (desired_dr_role == dwc->current_dr_role)
+ goto out;
+
+- if (dwc->desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev)
++ if (desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev)
+ goto out;
+
+ switch (dwc->current_dr_role) {
+@@ -158,8 +160,13 @@ static void __dwc3_set_mode(struct work_struct *work)
+ break;
+ }
+
+- /* For DRD host or device mode only */
+- if (dwc->desired_dr_role != DWC3_GCTL_PRTCAP_OTG) {
++ /*
++ * When current_dr_role is not set, there's no role switching.
++ * Only perform GCTL.CoreSoftReset when there's DRD role switching.
++ */
++ if (dwc->current_dr_role && ((DWC3_IP_IS(DWC3) ||
++ DWC3_VER_IS_PRIOR(DWC31, 190A)) &&
++ desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) {
+ reg = dwc3_readl(dwc->regs, DWC3_GCTL);
+ reg |= DWC3_GCTL_CORESOFTRESET;
+ dwc3_writel(dwc->regs, DWC3_GCTL, reg);
+@@ -179,11 +186,11 @@ static void __dwc3_set_mode(struct work_struct *work)
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+- dwc3_set_prtcap(dwc, dwc->desired_dr_role);
++ dwc3_set_prtcap(dwc, desired_dr_role);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+- switch (dwc->desired_dr_role) {
++ switch (desired_dr_role) {
+ case DWC3_GCTL_PRTCAP_HOST:
+ ret = dwc3_host_init(dwc);
+ if (ret) {
+@@ -260,7 +267,7 @@ u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type)
+ * dwc3_core_soft_reset - Issues core soft reset and PHY reset
+ * @dwc: pointer to our context structure
+ */
+-static int dwc3_core_soft_reset(struct dwc3 *dwc)
++int dwc3_core_soft_reset(struct dwc3 *dwc)
+ {
+ u32 reg;
+ int retries = 1000;
+@@ -268,14 +275,15 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
+ /*
+ * We're resetting only the device side because, if we're in host mode,
+ * XHCI driver will reset the host block. If dwc3 was configured for
+- * host-only mode, then we can return early.
++ * host-only mode or current role is host, then we can return early.
+ */
+- if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
++ if (dwc->dr_mode == USB_DR_MODE_HOST || dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
+ return 0;
+
+ reg = dwc3_readl(dwc->regs, DWC3_DCTL);
+ reg |= DWC3_DCTL_CSFTRST;
+- dwc3_writel(dwc->regs, DWC3_DCTL, reg);
++ reg &= ~DWC3_DCTL_RUN_STOP;
++ dwc3_gadget_dctl_write_safe(dwc, reg);
+
+ /*
+ * For DWC_usb31 controller 1.90a and later, the DCTL.CSFRST bit
+@@ -725,15 +733,16 @@ static void dwc3_core_exit(struct dwc3 *dwc)
+ {
+ dwc3_event_buffers_cleanup(dwc);
+
++ usb_phy_set_suspend(dwc->usb2_phy, 1);
++ usb_phy_set_suspend(dwc->usb3_phy, 1);
++ phy_power_off(dwc->usb2_generic_phy);
++ phy_power_off(dwc->usb3_generic_phy);
++
+ usb_phy_shutdown(dwc->usb2_phy);
+ usb_phy_shutdown(dwc->usb3_phy);
+ phy_exit(dwc->usb2_generic_phy);
+ phy_exit(dwc->usb3_generic_phy);
+
+- usb_phy_set_suspend(dwc->usb2_phy, 1);
+- usb_phy_set_suspend(dwc->usb3_phy, 1);
+- phy_power_off(dwc->usb2_generic_phy);
+- phy_power_off(dwc->usb3_generic_phy);
+ clk_bulk_disable_unprepare(dwc->num_clks, dwc->clks);
+ reset_control_assert(dwc->reset);
+ }
+@@ -954,8 +963,13 @@ static int dwc3_core_init(struct dwc3 *dwc)
+
+ if (!dwc->ulpi_ready) {
+ ret = dwc3_core_ulpi_init(dwc);
+- if (ret)
++ if (ret) {
++ if (ret == -ETIMEDOUT) {
++ dwc3_core_soft_reset(dwc);
++ ret = -EPROBE_DEFER;
++ }
+ goto err0;
++ }
+ dwc->ulpi_ready = true;
+ }
+
+@@ -1036,6 +1050,21 @@ static int dwc3_core_init(struct dwc3 *dwc)
+ dwc3_writel(dwc->regs, DWC3_GUCTL2, reg);
+ }
+
++ /*
++ * When configured in HOST mode, after issuing U3/L2 exit controller
++ * fails to send proper CRC checksum in CRC5 feild. Because of this
++ * behaviour Transaction Error is generated, resulting in reset and
++ * re-enumeration of usb device attached. All the termsel, xcvrsel,
++ * opmode becomes 0 during end of resume. Enabling bit 10 of GUCTL1
++ * will correct this problem. This option is to support certain
++ * legacy ULPI PHYs.
++ */
++ if (dwc->resume_hs_terminations) {
++ reg = dwc3_readl(dwc->regs, DWC3_GUCTL1);
++ reg |= DWC3_GUCTL1_RESUME_OPMODE_HS_HOST;
++ dwc3_writel(dwc->regs, DWC3_GUCTL1, reg);
++ }
++
+ if (!DWC3_VER_IS_PRIOR(DWC3, 250A)) {
+ reg = dwc3_readl(dwc->regs, DWC3_GUCTL1);
+
+@@ -1064,22 +1093,6 @@ static int dwc3_core_init(struct dwc3 *dwc)
+ dwc3_writel(dwc->regs, DWC3_GUCTL1, reg);
+ }
+
+- if (dwc->dr_mode == USB_DR_MODE_HOST ||
+- dwc->dr_mode == USB_DR_MODE_OTG) {
+- reg = dwc3_readl(dwc->regs, DWC3_GUCTL);
+-
+- /*
+- * Enable Auto retry Feature to make the controller operating in
+- * Host mode on seeing transaction errors(CRC errors or internal
+- * overrun scenerios) on IN transfers to reply to the device
+- * with a non-terminating retry ACK (i.e, an ACK transcation
+- * packet with Retry=1 & Nump != 0)
+- */
+- reg |= DWC3_GUCTL_HSTINAUTORETRY;
+-
+- dwc3_writel(dwc->regs, DWC3_GUCTL, reg);
+- }
+-
+ /*
+ * Must config both number of packets and max burst settings to enable
+ * RX and/or TX threshold.
+@@ -1268,10 +1281,10 @@ static void dwc3_get_properties(struct dwc3 *dwc)
+ u8 lpm_nyet_threshold;
+ u8 tx_de_emphasis;
+ u8 hird_threshold;
+- u8 rx_thr_num_pkt_prd;
+- u8 rx_max_burst_prd;
+- u8 tx_thr_num_pkt_prd;
+- u8 tx_max_burst_prd;
++ u8 rx_thr_num_pkt_prd = 0;
++ u8 rx_max_burst_prd = 0;
++ u8 tx_thr_num_pkt_prd = 0;
++ u8 tx_max_burst_prd = 0;
+ u8 tx_fifo_resize_max_num;
+ const char *usb_psy_name;
+ int ret;
+@@ -1378,6 +1391,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
+ "snps,dis-del-phy-power-chg-quirk");
+ dwc->dis_tx_ipgap_linecheck_quirk = device_property_read_bool(dev,
+ "snps,dis-tx-ipgap-linecheck-quirk");
++ dwc->resume_hs_terminations = device_property_read_bool(dev,
++ "snps,resume-hs-terminations");
+ dwc->parkmode_disable_ss_quirk = device_property_read_bool(dev,
+ "snps,parkmode-disable-ss-quirk");
+
+@@ -1565,10 +1580,6 @@ static int dwc3_probe(struct platform_device *pdev)
+
+ dwc3_get_properties(dwc);
+
+- ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
+- if (ret)
+- return ret;
+-
+ dwc->reset = devm_reset_control_array_get_optional_shared(dev);
+ if (IS_ERR(dwc->reset))
+ return PTR_ERR(dwc->reset);
+@@ -1605,16 +1616,21 @@ static int dwc3_probe(struct platform_device *pdev)
+ platform_set_drvdata(pdev, dwc);
+ dwc3_cache_hwparams(dwc);
+
++ if (!dwc->sysdev_is_parent &&
++ DWC3_GHWPARAMS0_AWIDTH(dwc->hwparams.hwparams0) == 64) {
++ ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
++ if (ret)
++ goto disable_clks;
++ }
++
+ spin_lock_init(&dwc->lock);
+ mutex_init(&dwc->mutex);
+
++ pm_runtime_get_noresume(dev);
+ pm_runtime_set_active(dev);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, DWC3_DEFAULT_AUTOSUSPEND_DELAY);
+ pm_runtime_enable(dev);
+- ret = pm_runtime_get_sync(dev);
+- if (ret < 0)
+- goto err1;
+
+ pm_runtime_forbid(dev);
+
+@@ -1654,16 +1670,16 @@ err5:
+ dwc3_debugfs_exit(dwc);
+ dwc3_event_buffers_cleanup(dwc);
+
+- usb_phy_shutdown(dwc->usb2_phy);
+- usb_phy_shutdown(dwc->usb3_phy);
+- phy_exit(dwc->usb2_generic_phy);
+- phy_exit(dwc->usb3_generic_phy);
+-
+ usb_phy_set_suspend(dwc->usb2_phy, 1);
+ usb_phy_set_suspend(dwc->usb3_phy, 1);
+ phy_power_off(dwc->usb2_generic_phy);
+ phy_power_off(dwc->usb3_generic_phy);
+
++ usb_phy_shutdown(dwc->usb2_phy);
++ usb_phy_shutdown(dwc->usb3_phy);
++ phy_exit(dwc->usb2_generic_phy);
++ phy_exit(dwc->usb3_generic_phy);
++
+ dwc3_ulpi_exit(dwc);
+
+ err4:
+@@ -1673,12 +1689,10 @@ err3:
+ dwc3_free_event_buffers(dwc);
+
+ err2:
+- pm_runtime_allow(&pdev->dev);
+-
+-err1:
+- pm_runtime_put_sync(&pdev->dev);
+- pm_runtime_disable(&pdev->dev);
+-
++ pm_runtime_allow(dev);
++ pm_runtime_disable(dev);
++ pm_runtime_set_suspended(dev);
++ pm_runtime_put_noidle(dev);
+ disable_clks:
+ clk_bulk_disable_unprepare(dwc->num_clks, dwc->clks);
+ assert_reset:
+@@ -1702,8 +1716,14 @@ static int dwc3_remove(struct platform_device *pdev)
+ dwc3_core_exit(dwc);
+ dwc3_ulpi_exit(dwc);
+
++ pm_runtime_allow(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
++ /*
++ * HACK: Clear the driver data, which is currently accessed by parent
++ * glue drivers, before allowing the parent to suspend.
++ */
++ platform_set_drvdata(pdev, NULL);
+ pm_runtime_set_suspended(&pdev->dev);
+
+ dwc3_free_event_buffers(dwc);
+@@ -1751,9 +1771,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
+ case DWC3_GCTL_PRTCAP_DEVICE:
+ if (pm_runtime_suspended(dwc->dev))
+ break;
+- spin_lock_irqsave(&dwc->lock, flags);
+ dwc3_gadget_suspend(dwc);
+- spin_unlock_irqrestore(&dwc->lock, flags);
+ synchronize_irq(dwc->irq_gadget);
+ dwc3_core_exit(dwc);
+ break;
+@@ -1814,9 +1832,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
+ return ret;
+
+ dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE);
+- spin_lock_irqsave(&dwc->lock, flags);
+ dwc3_gadget_resume(dwc);
+- spin_unlock_irqrestore(&dwc->lock, flags);
+ break;
+ case DWC3_GCTL_PRTCAP_HOST:
+ if (!PMSG_IS_AUTO(msg)) {
+diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
+index 5612bfdf37da9..3dcb5b744f7ce 100644
+--- a/drivers/usb/dwc3/core.h
++++ b/drivers/usb/dwc3/core.h
+@@ -143,7 +143,7 @@
+ #define DWC3_GHWPARAMS8 0xc600
+ #define DWC3_GUCTL3 0xc60c
+ #define DWC3_GFLADJ 0xc630
+-#define DWC3_GHWPARAMS9 0xc680
++#define DWC3_GHWPARAMS9 0xc6e0
+
+ /* Device Registers */
+ #define DWC3_DCFG 0xc700
+@@ -252,14 +252,12 @@
+ #define DWC3_GCTL_GBLHIBERNATIONEN BIT(1)
+ #define DWC3_GCTL_DSBLCLKGTNG BIT(0)
+
+-/* Global User Control Register */
+-#define DWC3_GUCTL_HSTINAUTORETRY BIT(14)
+-
+ /* Global User Control 1 Register */
+ #define DWC3_GUCTL1_DEV_DECOUPLE_L1L2_EVT BIT(31)
+ #define DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS BIT(28)
+ #define DWC3_GUCTL1_DEV_L1_EXIT_BY_HW BIT(24)
+ #define DWC3_GUCTL1_PARKMODE_DISABLE_SS BIT(17)
++#define DWC3_GUCTL1_RESUME_OPMODE_HS_HOST BIT(10)
+
+ /* Global Status Register */
+ #define DWC3_GSTS_OTG_IP BIT(10)
+@@ -723,6 +721,7 @@ struct dwc3_ep {
+ #define DWC3_EP_FORCE_RESTART_STREAM BIT(9)
+ #define DWC3_EP_FIRST_STREAM_PRIMED BIT(10)
+ #define DWC3_EP_PENDING_CLEAR_STALL BIT(11)
++#define DWC3_EP_TXFIFO_RESIZED BIT(12)
+
+ /* This last one is specific to EP0 */
+ #define DWC3_EP0_DIR_IN BIT(31)
+@@ -1027,6 +1026,7 @@ struct dwc3_scratchpad_array {
+ * @tx_fifo_resize_max_num: max number of fifos allocated during txfifo resize
+ * @hsphy_interface: "utmi" or "ulpi"
+ * @connected: true when we're connected to a host, false otherwise
++ * @softconnect: true when gadget connect is called, false when disconnect runs
+ * @delayed_status: true when gadget driver asks for delayed status
+ * @ep0_bounced: true when we used bounce buffer
+ * @ep0_expect_in: true when we expect a DATA IN transfer
+@@ -1070,6 +1070,8 @@ struct dwc3_scratchpad_array {
+ * change quirk.
+ * @dis_tx_ipgap_linecheck_quirk: set if we disable u2mac linestate
+ * check during HS transmit.
++ * @resume-hs-terminations: Set if we enable quirk for fixing improper crc
++ * generation after resume from suspend.
+ * @parkmode_disable_ss_quirk: set if we need to disable all SuperSpeed
+ * instances in park mode.
+ * @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk
+@@ -1080,6 +1082,7 @@ struct dwc3_scratchpad_array {
+ * 3 - Reserved
+ * @dis_metastability_quirk: set to disable metastability quirk.
+ * @dis_split_quirk: set to disable split boundary.
++ * @suspended: set to track suspend event due to U3/L2.
+ * @imod_interval: set the interrupt moderation interval in 250ns
+ * increments or 0 to disable.
+ * @max_cfg_eps: current max number of IN eps used across all USB configs.
+@@ -1087,6 +1090,7 @@ struct dwc3_scratchpad_array {
+ * address.
+ * @num_ep_resized: carries the current number endpoints which have had its tx
+ * fifo resized.
++ * @debug_root: root debugfs directory for this device to put its files in.
+ */
+ struct dwc3 {
+ struct work_struct drd_work;
+@@ -1246,6 +1250,7 @@ struct dwc3 {
+ const char *hsphy_interface;
+
+ unsigned connected:1;
++ unsigned softconnect:1;
+ unsigned delayed_status:1;
+ unsigned ep0_bounced:1;
+ unsigned ep0_expect_in:1;
+@@ -1281,6 +1286,7 @@ struct dwc3 {
+ unsigned dis_u2_freeclk_exists_quirk:1;
+ unsigned dis_del_phy_power_chg_quirk:1;
+ unsigned dis_tx_ipgap_linecheck_quirk:1;
++ unsigned resume_hs_terminations:1;
+ unsigned parkmode_disable_ss_quirk:1;
+
+ unsigned tx_de_emphasis_quirk:1;
+@@ -1290,12 +1296,14 @@ struct dwc3 {
+
+ unsigned dis_split_quirk:1;
+ unsigned async_callbacks:1;
++ unsigned suspended:1;
+
+ u16 imod_interval;
+
+ int max_cfg_eps;
+ int last_fifo_depth;
+ int num_ep_resized;
++ struct dentry *debug_root;
+ };
+
+ #define INCRX_BURST_MODE 0
+@@ -1507,6 +1515,8 @@ bool dwc3_has_imod(struct dwc3 *dwc);
+ int dwc3_event_buffers_setup(struct dwc3 *dwc);
+ void dwc3_event_buffers_cleanup(struct dwc3 *dwc);
+
++int dwc3_core_soft_reset(struct dwc3 *dwc);
++
+ #if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)
+ int dwc3_host_init(struct dwc3 *dwc);
+ void dwc3_host_exit(struct dwc3 *dwc);
+diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
+index d223c54115f4a..01d0366bf93ae 100644
+--- a/drivers/usb/dwc3/debug.h
++++ b/drivers/usb/dwc3/debug.h
+@@ -414,11 +414,14 @@ static inline const char *dwc3_gadget_generic_cmd_status_string(int status)
+
+ #ifdef CONFIG_DEBUG_FS
+ extern void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep);
++extern void dwc3_debugfs_remove_endpoint_dir(struct dwc3_ep *dep);
+ extern void dwc3_debugfs_init(struct dwc3 *d);
+ extern void dwc3_debugfs_exit(struct dwc3 *d);
+ #else
+ static inline void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep)
+ { }
++static inline void dwc3_debugfs_remove_endpoint_dir(struct dwc3_ep *dep)
++{ }
+ static inline void dwc3_debugfs_init(struct dwc3 *d)
+ { }
+ static inline void dwc3_debugfs_exit(struct dwc3 *d)
+diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c
+index f2b7675c7f621..f0ffd2e5c6429 100644
+--- a/drivers/usb/dwc3/debugfs.c
++++ b/drivers/usb/dwc3/debugfs.c
+@@ -327,6 +327,11 @@ static int dwc3_lsp_show(struct seq_file *s, void *unused)
+ unsigned int current_mode;
+ unsigned long flags;
+ u32 reg;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ reg = dwc3_readl(dwc->regs, DWC3_GSTS);
+@@ -345,6 +350,8 @@ static int dwc3_lsp_show(struct seq_file *s, void *unused)
+ }
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -390,6 +397,11 @@ static int dwc3_mode_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = s->private;
+ unsigned long flags;
+ u32 reg;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ reg = dwc3_readl(dwc->regs, DWC3_GCTL);
+@@ -409,6 +421,8 @@ static int dwc3_mode_show(struct seq_file *s, void *unused)
+ seq_printf(s, "UNKNOWN %08x\n", DWC3_GCTL_PRTCAP(reg));
+ }
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -458,6 +472,11 @@ static int dwc3_testmode_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = s->private;
+ unsigned long flags;
+ u32 reg;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ reg = dwc3_readl(dwc->regs, DWC3_DCTL);
+@@ -488,6 +507,8 @@ static int dwc3_testmode_show(struct seq_file *s, void *unused)
+ seq_printf(s, "UNKNOWN %d\n", reg);
+ }
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -504,6 +525,7 @@ static ssize_t dwc3_testmode_write(struct file *file,
+ unsigned long flags;
+ u32 testmode = 0;
+ char buf[32];
++ int ret;
+
+ if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
+ return -EFAULT;
+@@ -521,10 +543,16 @@ static ssize_t dwc3_testmode_write(struct file *file,
+ else
+ testmode = 0;
+
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
++
+ spin_lock_irqsave(&dwc->lock, flags);
+ dwc3_gadget_set_test_mode(dwc, testmode);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return count;
+ }
+
+@@ -543,12 +571,18 @@ static int dwc3_link_state_show(struct seq_file *s, void *unused)
+ enum dwc3_link_state state;
+ u32 reg;
+ u8 speed;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ reg = dwc3_readl(dwc->regs, DWC3_GSTS);
+ if (DWC3_GSTS_CURMOD(reg) != DWC3_GSTS_CURMOD_DEVICE) {
+ seq_puts(s, "Not available\n");
+ spin_unlock_irqrestore(&dwc->lock, flags);
++ pm_runtime_put_sync(dwc->dev);
+ return 0;
+ }
+
+@@ -561,6 +595,8 @@ static int dwc3_link_state_show(struct seq_file *s, void *unused)
+ dwc3_gadget_hs_link_string(state));
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -579,6 +615,7 @@ static ssize_t dwc3_link_state_write(struct file *file,
+ char buf[32];
+ u32 reg;
+ u8 speed;
++ int ret;
+
+ if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
+ return -EFAULT;
+@@ -598,10 +635,15 @@ static ssize_t dwc3_link_state_write(struct file *file,
+ else
+ return -EINVAL;
+
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
++
+ spin_lock_irqsave(&dwc->lock, flags);
+ reg = dwc3_readl(dwc->regs, DWC3_GSTS);
+ if (DWC3_GSTS_CURMOD(reg) != DWC3_GSTS_CURMOD_DEVICE) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
++ pm_runtime_put_sync(dwc->dev);
+ return -EINVAL;
+ }
+
+@@ -611,12 +653,15 @@ static ssize_t dwc3_link_state_write(struct file *file,
+ if (speed < DWC3_DSTS_SUPERSPEED &&
+ state != DWC3_LINK_STATE_RECOV) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
++ pm_runtime_put_sync(dwc->dev);
+ return -EINVAL;
+ }
+
+ dwc3_gadget_set_link_state(dwc, state);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return count;
+ }
+
+@@ -640,6 +685,11 @@ static int dwc3_tx_fifo_size_show(struct seq_file *s, void *unused)
+ unsigned long flags;
+ u32 mdwidth;
+ u32 val;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ val = dwc3_core_fifo_space(dep, DWC3_TXFIFO);
+@@ -652,6 +702,8 @@ static int dwc3_tx_fifo_size_show(struct seq_file *s, void *unused)
+ seq_printf(s, "%u\n", val);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -662,6 +714,11 @@ static int dwc3_rx_fifo_size_show(struct seq_file *s, void *unused)
+ unsigned long flags;
+ u32 mdwidth;
+ u32 val;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ val = dwc3_core_fifo_space(dep, DWC3_RXFIFO);
+@@ -674,6 +731,8 @@ static int dwc3_rx_fifo_size_show(struct seq_file *s, void *unused)
+ seq_printf(s, "%u\n", val);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -683,12 +742,19 @@ static int dwc3_tx_request_queue_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = dep->dwc;
+ unsigned long flags;
+ u32 val;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ val = dwc3_core_fifo_space(dep, DWC3_TXREQQ);
+ seq_printf(s, "%u\n", val);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -698,12 +764,19 @@ static int dwc3_rx_request_queue_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = dep->dwc;
+ unsigned long flags;
+ u32 val;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ val = dwc3_core_fifo_space(dep, DWC3_RXREQQ);
+ seq_printf(s, "%u\n", val);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -713,12 +786,19 @@ static int dwc3_rx_info_queue_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = dep->dwc;
+ unsigned long flags;
+ u32 val;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ val = dwc3_core_fifo_space(dep, DWC3_RXINFOQ);
+ seq_printf(s, "%u\n", val);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -728,12 +808,19 @@ static int dwc3_descriptor_fetch_queue_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = dep->dwc;
+ unsigned long flags;
+ u32 val;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ val = dwc3_core_fifo_space(dep, DWC3_DESCFETCHQ);
+ seq_printf(s, "%u\n", val);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -743,12 +830,19 @@ static int dwc3_event_queue_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = dep->dwc;
+ unsigned long flags;
+ u32 val;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ val = dwc3_core_fifo_space(dep, DWC3_EVENTQ);
+ seq_printf(s, "%u\n", val);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -793,6 +887,11 @@ static int dwc3_trb_ring_show(struct seq_file *s, void *unused)
+ struct dwc3 *dwc = dep->dwc;
+ unsigned long flags;
+ int i;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dep->number <= 1) {
+@@ -822,6 +921,8 @@ static int dwc3_trb_ring_show(struct seq_file *s, void *unused)
+ out:
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -834,6 +935,11 @@ static int dwc3_ep_info_register_show(struct seq_file *s, void *unused)
+ u32 lower_32_bits;
+ u32 upper_32_bits;
+ u32 reg;
++ int ret;
++
++ ret = pm_runtime_resume_and_get(dwc->dev);
++ if (ret < 0)
++ return ret;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ reg = DWC3_GDBGLSPMUX_EPSELECT(dep->number);
+@@ -846,6 +952,8 @@ static int dwc3_ep_info_register_show(struct seq_file *s, void *unused)
+ seq_printf(s, "0x%016llx\n", ep_info);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
++ pm_runtime_put_sync(dwc->dev);
++
+ return 0;
+ }
+
+@@ -873,27 +981,23 @@ static const struct dwc3_ep_file_map dwc3_ep_file_map[] = {
+ { "GDBGEPINFO", &dwc3_ep_info_register_fops, },
+ };
+
+-static void dwc3_debugfs_create_endpoint_files(struct dwc3_ep *dep,
+- struct dentry *parent)
++void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep)
+ {
++ struct dentry *dir;
+ int i;
+
++ dir = debugfs_create_dir(dep->name, dep->dwc->debug_root);
+ for (i = 0; i < ARRAY_SIZE(dwc3_ep_file_map); i++) {
+ const struct file_operations *fops = dwc3_ep_file_map[i].fops;
+ const char *name = dwc3_ep_file_map[i].name;
+
+- debugfs_create_file(name, 0444, parent, dep, fops);
++ debugfs_create_file(name, 0444, dir, dep, fops);
+ }
+ }
+
+-void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep)
++void dwc3_debugfs_remove_endpoint_dir(struct dwc3_ep *dep)
+ {
+- struct dentry *dir;
+- struct dentry *root;
+-
+- root = debugfs_lookup(dev_name(dep->dwc->dev), usb_debug_root);
+- dir = debugfs_create_dir(dep->name, root);
+- dwc3_debugfs_create_endpoint_files(dep, dir);
++ debugfs_lookup_and_remove(dep->name, dep->dwc->debug_root);
+ }
+
+ void dwc3_debugfs_init(struct dwc3 *dwc)
+@@ -909,8 +1013,10 @@ void dwc3_debugfs_init(struct dwc3 *dwc)
+ dwc->regset->regs = dwc3_regs;
+ dwc->regset->nregs = ARRAY_SIZE(dwc3_regs);
+ dwc->regset->base = dwc->regs - DWC3_GLOBALS_REGS_START;
++ dwc->regset->dev = dwc->dev;
+
+ root = debugfs_create_dir(dev_name(dwc->dev), usb_debug_root);
++ dwc->debug_root = root;
+ debugfs_create_regset32("regdump", 0444, root, dwc->regset);
+ debugfs_create_file("lsp_dump", 0644, root, dwc, &dwc3_lsp_fops);
+
+@@ -929,6 +1035,6 @@ void dwc3_debugfs_init(struct dwc3 *dwc)
+
+ void dwc3_debugfs_exit(struct dwc3 *dwc)
+ {
+- debugfs_remove(debugfs_lookup(dev_name(dwc->dev), usb_debug_root));
++ debugfs_lookup_and_remove(dev_name(dwc->dev), usb_debug_root);
+ kfree(dwc->regset);
+ }
+diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c
+index d7f76835137fa..81ff21bd405a8 100644
+--- a/drivers/usb/dwc3/drd.c
++++ b/drivers/usb/dwc3/drd.c
+@@ -454,13 +454,8 @@ static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc)
+ * This device property is for kernel internal use only and
+ * is expected to be set by the glue code.
+ */
+- if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) {
+- edev = extcon_get_extcon_dev(name);
+- if (!edev)
+- return ERR_PTR(-EPROBE_DEFER);
+-
+- return edev;
+- }
++ if (device_property_read_string(dev, "linux,extcon-name", &name) == 0)
++ return extcon_get_extcon_dev(name);
+
+ /*
+ * Try to get an extcon device from the USB PHY controller's "port"
+@@ -571,16 +566,15 @@ int dwc3_drd_init(struct dwc3 *dwc)
+ {
+ int ret, irq;
+
++ if (ROLE_SWITCH &&
++ device_property_read_bool(dwc->dev, "usb-role-switch"))
++ return dwc3_setup_role_switch(dwc);
++
+ dwc->edev = dwc3_get_extcon(dwc);
+ if (IS_ERR(dwc->edev))
+ return PTR_ERR(dwc->edev);
+
+- if (ROLE_SWITCH &&
+- device_property_read_bool(dwc->dev, "usb-role-switch")) {
+- ret = dwc3_setup_role_switch(dwc);
+- if (ret < 0)
+- return ret;
+- } else if (dwc->edev) {
++ if (dwc->edev) {
+ dwc->edev_nb.notifier_call = dwc3_drd_notifier;
+ ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST,
+ &dwc->edev_nb);
+diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
+index 0ecf20eeceee9..4be6a873bd071 100644
+--- a/drivers/usb/dwc3/dwc3-exynos.c
++++ b/drivers/usb/dwc3/dwc3-exynos.c
+@@ -37,15 +37,6 @@ struct dwc3_exynos {
+ struct regulator *vdd10;
+ };
+
+-static int dwc3_exynos_remove_child(struct device *dev, void *unused)
+-{
+- struct platform_device *pdev = to_platform_device(dev);
+-
+- platform_device_unregister(pdev);
+-
+- return 0;
+-}
+-
+ static int dwc3_exynos_probe(struct platform_device *pdev)
+ {
+ struct dwc3_exynos *exynos;
+@@ -142,7 +133,7 @@ static int dwc3_exynos_remove(struct platform_device *pdev)
+ struct dwc3_exynos *exynos = platform_get_drvdata(pdev);
+ int i;
+
+- device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child);
++ of_platform_depopulate(&pdev->dev);
+
+ for (i = exynos->num_clks - 1; i >= 0; i--)
+ clk_disable_unprepare(exynos->clks[i]);
+diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c
+index d0f9b7c296b0d..d3b4dc00007f0 100644
+--- a/drivers/usb/dwc3/dwc3-meson-g12a.c
++++ b/drivers/usb/dwc3/dwc3-meson-g12a.c
+@@ -755,16 +755,16 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
+
+ ret = dwc3_meson_g12a_get_phys(priv);
+ if (ret)
+- goto err_disable_clks;
++ goto err_rearm;
+
+ ret = priv->drvdata->setup_regmaps(priv, base);
+ if (ret)
+- goto err_disable_clks;
++ goto err_rearm;
+
+ if (priv->vbus) {
+ ret = regulator_enable(priv->vbus);
+ if (ret)
+- goto err_disable_clks;
++ goto err_rearm;
+ }
+
+ /* Get dr_mode */
+@@ -805,7 +805,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
+
+ ret = dwc3_meson_g12a_otg_init(pdev, priv);
+ if (ret)
+- goto err_phys_power;
++ goto err_plat_depopulate;
+
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+@@ -813,6 +813,9 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
+
+ return 0;
+
++err_plat_depopulate:
++ of_platform_depopulate(dev);
++
+ err_phys_power:
+ for (i = 0 ; i < PHY_COUNT ; ++i)
+ phy_power_off(priv->phys[i]);
+@@ -825,6 +828,9 @@ err_disable_regulator:
+ if (priv->vbus)
+ regulator_disable(priv->vbus);
+
++err_rearm:
++ reset_control_rearm(priv->reset);
++
+ err_disable_clks:
+ clk_bulk_disable_unprepare(priv->drvdata->num_clks,
+ priv->drvdata->clks);
+@@ -852,6 +858,8 @@ static int dwc3_meson_g12a_remove(struct platform_device *pdev)
+ pm_runtime_put_noidle(dev);
+ pm_runtime_set_suspended(dev);
+
++ reset_control_rearm(priv->reset);
++
+ clk_bulk_disable_unprepare(priv->drvdata->num_clks,
+ priv->drvdata->clks);
+
+@@ -892,7 +900,7 @@ static int __maybe_unused dwc3_meson_g12a_suspend(struct device *dev)
+ phy_exit(priv->phys[i]);
+ }
+
+- reset_control_assert(priv->reset);
++ reset_control_rearm(priv->reset);
+
+ return 0;
+ }
+@@ -902,7 +910,9 @@ static int __maybe_unused dwc3_meson_g12a_resume(struct device *dev)
+ struct dwc3_meson_g12a *priv = dev_get_drvdata(dev);
+ int i, ret;
+
+- reset_control_deassert(priv->reset);
++ ret = reset_control_reset(priv->reset);
++ if (ret)
++ return ret;
+
+ ret = priv->drvdata->usb_init(priv);
+ if (ret)
+@@ -928,6 +938,12 @@ static int __maybe_unused dwc3_meson_g12a_resume(struct device *dev)
+ return ret;
+ }
+
++ if (priv->drvdata->usb_post_init) {
++ ret = priv->drvdata->usb_post_init(priv);
++ if (ret)
++ return ret;
++ }
++
+ return 0;
+ }
+
+diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
+index e196673f5c647..efaf0db595f46 100644
+--- a/drivers/usb/dwc3/dwc3-omap.c
++++ b/drivers/usb/dwc3/dwc3-omap.c
+@@ -242,7 +242,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
+ break;
+
+ case OMAP_DWC3_ID_FLOAT:
+- if (omap->vbus_reg)
++ if (omap->vbus_reg && regulator_is_enabled(omap->vbus_reg))
+ regulator_disable(omap->vbus_reg);
+ val = dwc3_omap_read_utmi_ctrl(omap);
+ val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG;
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 7ff8fc8f79a9b..1872de3ce98bd 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -43,6 +43,12 @@
+ #define PCI_DEVICE_ID_INTEL_ADLP 0x51ee
+ #define PCI_DEVICE_ID_INTEL_ADLM 0x54ee
+ #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
++#define PCI_DEVICE_ID_INTEL_RPL 0xa70e
++#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61
++#define PCI_DEVICE_ID_INTEL_MTLM 0x7eb1
++#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
++#define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f
++#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
+ #define PCI_DEVICE_ID_INTEL_TGL 0x9a15
+ #define PCI_DEVICE_ID_AMD_MR 0x163a
+
+@@ -85,8 +91,8 @@ static const struct acpi_gpio_mapping acpi_dwc3_byt_gpios[] = {
+ static struct gpiod_lookup_table platform_bytcr_gpios = {
+ .dev_id = "0000:00:16.0",
+ .table = {
+- GPIO_LOOKUP("INT33FC:00", 54, "reset", GPIO_ACTIVE_HIGH),
+- GPIO_LOOKUP("INT33FC:02", 14, "cs", GPIO_ACTIVE_HIGH),
++ GPIO_LOOKUP("INT33FC:00", 54, "cs", GPIO_ACTIVE_HIGH),
++ GPIO_LOOKUP("INT33FC:02", 14, "reset", GPIO_ACTIVE_HIGH),
+ {}
+ },
+ };
+@@ -119,6 +125,13 @@ static const struct property_entry dwc3_pci_intel_properties[] = {
+ {}
+ };
+
++static const struct property_entry dwc3_pci_intel_byt_properties[] = {
++ PROPERTY_ENTRY_STRING("dr_mode", "peripheral"),
++ PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
++ PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
++ {}
++};
++
+ static const struct property_entry dwc3_pci_mrfld_properties[] = {
+ PROPERTY_ENTRY_STRING("dr_mode", "otg"),
+ PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"),
+@@ -161,6 +174,10 @@ static const struct software_node dwc3_pci_intel_swnode = {
+ .properties = dwc3_pci_intel_properties,
+ };
+
++static const struct software_node dwc3_pci_intel_byt_swnode = {
++ .properties = dwc3_pci_intel_byt_properties,
++};
++
+ static const struct software_node dwc3_pci_intel_mrfld_swnode = {
+ .properties = dwc3_pci_mrfld_properties,
+ };
+@@ -173,7 +190,8 @@ static const struct software_node dwc3_pci_amd_mr_swnode = {
+ .properties = dwc3_pci_mr_properties,
+ };
+
+-static int dwc3_pci_quirks(struct dwc3_pci *dwc)
++static int dwc3_pci_quirks(struct dwc3_pci *dwc,
++ const struct software_node *swnode)
+ {
+ struct pci_dev *pdev = dwc->pci;
+
+@@ -201,10 +219,12 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
+
+ /*
+ * A lot of BYT devices lack ACPI resource entries for
+- * the GPIOs, add a fallback mapping to the reference
++ * the GPIOs. If the ACPI entry for the GPIO controller
++ * is present add a fallback mapping to the reference
+ * design GPIOs which all boards seem to use.
+ */
+- gpiod_add_lookup_table(&platform_bytcr_gpios);
++ if (acpi_dev_present("INT33FC", NULL, -1))
++ gpiod_add_lookup_table(&platform_bytcr_gpios);
+
+ /*
+ * These GPIOs will turn on the USB2 PHY. Note that we have to
+@@ -230,7 +250,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
+ }
+ }
+
+- return 0;
++ return device_add_software_node(&dwc->dwc3->dev, swnode);
+ }
+
+ #ifdef CONFIG_PM
+@@ -241,7 +261,7 @@ static void dwc3_pci_resume_work(struct work_struct *work)
+ int ret;
+
+ ret = pm_runtime_get_sync(&dwc3->dev);
+- if (ret) {
++ if (ret < 0) {
+ pm_runtime_put_sync_autosuspend(&dwc3->dev);
+ return;
+ }
+@@ -295,11 +315,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+ dwc->dwc3->dev.parent = dev;
+ ACPI_COMPANION_SET(&dwc->dwc3->dev, ACPI_COMPANION(dev));
+
+- ret = device_add_software_node(&dwc->dwc3->dev, (void *)id->driver_data);
+- if (ret < 0)
+- goto err;
+-
+- ret = dwc3_pci_quirks(dwc);
++ ret = dwc3_pci_quirks(dwc, (void *)id->driver_data);
+ if (ret)
+ goto err;
+
+@@ -344,7 +360,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BYT),
+- (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++ (kernel_ulong_t) &dwc3_pci_intel_byt_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
+ (kernel_ulong_t) &dwc3_pci_intel_mrfld_swnode, },
+@@ -409,6 +425,24 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPL),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLM),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLS),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
+index 9abbd01028c5f..0180350a2c95c 100644
+--- a/drivers/usb/dwc3/dwc3-qcom.c
++++ b/drivers/usb/dwc3/dwc3-qcom.c
+@@ -258,7 +258,8 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom)
+ if (IS_ERR(qcom->icc_path_apps)) {
+ dev_err(dev, "failed to get apps-usb path: %ld\n",
+ PTR_ERR(qcom->icc_path_apps));
+- return PTR_ERR(qcom->icc_path_apps);
++ ret = PTR_ERR(qcom->icc_path_apps);
++ goto put_path_ddr;
+ }
+
+ if (usb_get_maximum_speed(&qcom->dwc3->dev) >= USB_SPEED_SUPER ||
+@@ -271,17 +272,23 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom)
+
+ if (ret) {
+ dev_err(dev, "failed to set bandwidth for usb-ddr path: %d\n", ret);
+- return ret;
++ goto put_path_apps;
+ }
+
+ ret = icc_set_bw(qcom->icc_path_apps,
+ APPS_USB_AVG_BW, APPS_USB_PEAK_BW);
+ if (ret) {
+ dev_err(dev, "failed to set bandwidth for apps-usb path: %d\n", ret);
+- return ret;
++ goto put_path_apps;
+ }
+
+ return 0;
++
++put_path_apps:
++ icc_put(qcom->icc_path_apps);
++put_path_ddr:
++ icc_put(qcom->icc_path_ddr);
++ return ret;
+ }
+
+ /**
+@@ -296,6 +303,23 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom)
+ icc_put(qcom->icc_path_apps);
+ }
+
++/* Only usable in contexts where the role can not change. */
++static bool dwc3_qcom_is_host(struct dwc3_qcom *qcom)
++{
++ struct dwc3 *dwc;
++
++ /*
++ * FIXME: Fix this layering violation.
++ */
++ dwc = platform_get_drvdata(qcom->dwc3);
++
++ /* Core driver may not have probed yet. */
++ if (!dwc)
++ return false;
++
++ return dwc->xhci;
++}
++
+ static void dwc3_qcom_disable_interrupts(struct dwc3_qcom *qcom)
+ {
+ if (qcom->hs_phy_irq) {
+@@ -411,7 +435,11 @@ static irqreturn_t qcom_dwc3_resume_irq(int irq, void *data)
+ if (qcom->pm_suspended)
+ return IRQ_HANDLED;
+
+- if (dwc->xhci)
++ /*
++ * This is safe as role switching is done from a freezable workqueue
++ * and the wakeup interrupts are disabled as part of resume.
++ */
++ if (dwc3_qcom_is_host(qcom))
+ pm_runtime_resume(&dwc->xhci->dev);
+
+ return IRQ_HANDLED;
+@@ -443,9 +471,9 @@ static int dwc3_qcom_get_irq(struct platform_device *pdev,
+ int ret;
+
+ if (np)
+- ret = platform_get_irq_byname(pdev_irq, name);
++ ret = platform_get_irq_byname_optional(pdev_irq, name);
+ else
+- ret = platform_get_irq(pdev_irq, num);
++ ret = platform_get_irq_optional(pdev_irq, num);
+
+ return ret;
+ }
+@@ -649,7 +677,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev)
+ struct dwc3_qcom *qcom = platform_get_drvdata(pdev);
+ struct device_node *np = pdev->dev.of_node, *dwc3_np;
+ struct device *dev = &pdev->dev;
+- struct property *prop;
+ int ret;
+
+ dwc3_np = of_get_compatible_child(np, "snps,dwc3");
+@@ -658,20 +685,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev)
+ return -ENODEV;
+ }
+
+- prop = devm_kzalloc(dev, sizeof(*prop), GFP_KERNEL);
+- if (!prop) {
+- ret = -ENOMEM;
+- dev_err(dev, "unable to allocate memory for property\n");
+- goto node_put;
+- }
+-
+- prop->name = "tx-fifo-resize";
+- ret = of_add_property(dwc3_np, prop);
+- if (ret) {
+- dev_err(dev, "unable to add property\n");
+- goto node_put;
+- }
+-
+ ret = of_platform_populate(np, NULL, NULL, dev);
+ if (ret) {
+ dev_err(dev, "failed to register dwc3 core - %d\n", ret);
+@@ -723,6 +736,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
+ struct device *dev = &pdev->dev;
+ struct dwc3_qcom *qcom;
+ struct resource *res, *parent_res = NULL;
++ struct resource local_res;
+ int ret, i;
+ bool ignore_pipe_clk;
+
+@@ -773,9 +787,8 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
+ if (np) {
+ parent_res = res;
+ } else {
+- parent_res = kmemdup(res, sizeof(struct resource), GFP_KERNEL);
+- if (!parent_res)
+- return -ENOMEM;
++ memcpy(&local_res, res, sizeof(struct resource));
++ parent_res = &local_res;
+
+ parent_res->start = res->start +
+ qcom->acpi_pdata->qscratch_base_offset;
+@@ -784,9 +797,13 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
+
+ if (qcom->acpi_pdata->is_urs) {
+ qcom->urs_usb = dwc3_qcom_create_urs_usb_platdev(dev);
+- if (!qcom->urs_usb) {
++ if (IS_ERR_OR_NULL(qcom->urs_usb)) {
+ dev_err(dev, "failed to create URS USB platdev\n");
+- return -ENODEV;
++ if (!qcom->urs_usb)
++ ret = -ENODEV;
++ else
++ ret = PTR_ERR(qcom->urs_usb);
++ goto clk_disable;
+ }
+ }
+ }
+@@ -829,7 +846,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
+ qcom->mode = usb_get_dr_mode(&qcom->dwc3->dev);
+
+ /* enable vbus override for device mode */
+- if (qcom->mode == USB_DR_MODE_PERIPHERAL)
++ if (qcom->mode != USB_DR_MODE_HOST)
+ dwc3_qcom_vbus_override_enable(qcom, true);
+
+ /* register extcon to override sw_vbus on Vbus change later */
+@@ -866,11 +883,15 @@ reset_assert:
+ static int dwc3_qcom_remove(struct platform_device *pdev)
+ {
+ struct dwc3_qcom *qcom = platform_get_drvdata(pdev);
++ struct device_node *np = pdev->dev.of_node;
+ struct device *dev = &pdev->dev;
+ int i;
+
+ device_remove_software_node(&qcom->dwc3->dev);
+- of_platform_depopulate(dev);
++ if (np)
++ of_platform_depopulate(&pdev->dev);
++ else
++ platform_device_put(pdev);
+
+ for (i = qcom->num_clocks - 1; i >= 0; i--) {
+ clk_disable_unprepare(qcom->clks[i]);
+diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c
+index 9cc3ad701a295..a6f3a9b38789e 100644
+--- a/drivers/usb/dwc3/dwc3-xilinx.c
++++ b/drivers/usb/dwc3/dwc3-xilinx.c
+@@ -99,17 +99,29 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
+ struct device *dev = priv_data->dev;
+ struct reset_control *crst, *hibrst, *apbrst;
+ struct phy *usb3_phy;
+- int ret;
++ int ret = 0;
+ u32 reg;
+
+- usb3_phy = devm_phy_get(dev, "usb3-phy");
+- if (PTR_ERR(usb3_phy) == -EPROBE_DEFER) {
+- ret = -EPROBE_DEFER;
++ usb3_phy = devm_phy_optional_get(dev, "usb3-phy");
++ if (IS_ERR(usb3_phy)) {
++ ret = PTR_ERR(usb3_phy);
++ dev_err_probe(dev, ret,
++ "failed to get USB3 PHY\n");
+ goto err;
+- } else if (IS_ERR(usb3_phy)) {
+- usb3_phy = NULL;
+ }
+
++ /*
++ * The following core resets are not required unless a USB3 PHY
++ * is used, and the subsequent register settings are not required
++ * unless a core reset is performed (they should be set properly
++ * by the first-stage boot loader, but may be reverted by a core
++ * reset). They may also break the configuration if USB3 is actually
++ * in use but the usb3-phy entry is missing from the device tree.
++ * Therefore, skip these operations in this case.
++ */
++ if (!usb3_phy)
++ goto skip_usb3_phy;
++
+ crst = devm_reset_control_get_exclusive(dev, "usb_crst");
+ if (IS_ERR(crst)) {
+ ret = PTR_ERR(crst);
+@@ -188,6 +200,7 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data)
+ goto err;
+ }
+
++skip_usb3_phy:
+ /*
+ * This routes the USB DMA traffic to go through FPD path instead
+ * of reaching DDR directly. This traffic routing is needed to
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 4519d06c9ca2b..8ada601901cfa 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -180,6 +180,7 @@ static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep,
+ list_del(&req->list);
+ req->remaining = 0;
+ req->needs_extra_trb = false;
++ req->num_trbs = 0;
+
+ if (req->request.status == -EINPROGRESS)
+ req->request.status = status;
+@@ -291,7 +292,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
+ *
+ * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2
+ */
+- if (dwc->gadget->speed <= USB_SPEED_HIGH) {
++ if (dwc->gadget->speed <= USB_SPEED_HIGH ||
++ DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) {
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
+ saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
+@@ -310,13 +312,24 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
+ if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) {
+ int link_state;
+
++ /*
++ * Initiate remote wakeup if the link state is in U3 when
++ * operating in SS/SSP or L1/L2 when operating in HS/FS. If the
++ * link state is in U1/U2, no remote wakeup is needed. The Start
++ * Transfer command will initiate the link recovery.
++ */
+ link_state = dwc3_gadget_get_link_state(dwc);
+- if (link_state == DWC3_LINK_STATE_U1 ||
+- link_state == DWC3_LINK_STATE_U2 ||
+- link_state == DWC3_LINK_STATE_U3) {
++ switch (link_state) {
++ case DWC3_LINK_STATE_U2:
++ if (dwc->gadget->speed >= USB_SPEED_SUPER)
++ break;
++
++ fallthrough;
++ case DWC3_LINK_STATE_U3:
+ ret = __dwc3_gadget_wakeup(dwc);
+ dev_WARN_ONCE(dwc->dev, ret, "wakeup failed --> %d\n",
+ ret);
++ break;
+ }
+ }
+
+@@ -702,6 +715,7 @@ void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
+ DWC31_GTXFIFOSIZ_TXFRAMNUM;
+
+ dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1), size);
++ dep->flags &= ~DWC3_EP_TXFIFO_RESIZED;
+ }
+ dwc->num_ep_resized = 0;
+ }
+@@ -747,6 +761,10 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep)
+ if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1)
+ return 0;
+
++ /* bail if already resized */
++ if (dep->flags & DWC3_EP_TXFIFO_RESIZED)
++ return 0;
++
+ ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
+
+ if ((dep->endpoint.maxburst > 1 &&
+@@ -807,6 +825,7 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep)
+ }
+
+ dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(dep->number >> 1), fifo_size);
++ dep->flags |= DWC3_EP_TXFIFO_RESIZED;
+ dwc->num_ep_resized++;
+
+ return 0;
+@@ -934,7 +953,7 @@ out:
+ return 0;
+ }
+
+-static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
++static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status)
+ {
+ struct dwc3_request *req;
+
+@@ -944,19 +963,19 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
+ while (!list_empty(&dep->started_list)) {
+ req = next_request(&dep->started_list);
+
+- dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
++ dwc3_gadget_giveback(dep, req, status);
+ }
+
+ while (!list_empty(&dep->pending_list)) {
+ req = next_request(&dep->pending_list);
+
+- dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
++ dwc3_gadget_giveback(dep, req, status);
+ }
+
+ while (!list_empty(&dep->cancelled_list)) {
+ req = next_request(&dep->cancelled_list);
+
+- dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
++ dwc3_gadget_giveback(dep, req, status);
+ }
+ }
+
+@@ -985,18 +1004,18 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
+ reg &= ~DWC3_DALEPENA_EP(dep->number);
+ dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
+
++ dwc3_remove_requests(dwc, dep, -ESHUTDOWN);
++
++ dep->stream_capable = false;
++ dep->type = 0;
++ dep->flags &= DWC3_EP_TXFIFO_RESIZED;
++
+ /* Clear out the ep descriptors for non-ep0 */
+ if (dep->number > 1) {
+ dep->endpoint.comp_desc = NULL;
+ dep->endpoint.desc = NULL;
+ }
+
+- dwc3_remove_requests(dwc, dep);
+-
+- dep->stream_capable = false;
+- dep->type = 0;
+- dep->flags = 0;
+-
+ return 0;
+ }
+
+@@ -1152,17 +1171,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
+ return trbs_left;
+ }
+
+-static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+- dma_addr_t dma, unsigned int length, unsigned int chain,
+- unsigned int node, unsigned int stream_id,
+- unsigned int short_not_ok, unsigned int no_interrupt,
+- unsigned int is_last, bool must_interrupt)
++/**
++ * dwc3_prepare_one_trb - setup one TRB from one request
++ * @dep: endpoint for which this request is prepared
++ * @req: dwc3_request pointer
++ * @trb_length: buffer size of the TRB
++ * @chain: should this TRB be chained to the next?
++ * @node: only for isochronous endpoints. First TRB needs different type.
++ * @use_bounce_buffer: set to use bounce buffer
++ * @must_interrupt: set to interrupt on TRB completion
++ */
++static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
++ struct dwc3_request *req, unsigned int trb_length,
++ unsigned int chain, unsigned int node, bool use_bounce_buffer,
++ bool must_interrupt)
+ {
++ struct dwc3_trb *trb;
++ dma_addr_t dma;
++ unsigned int stream_id = req->request.stream_id;
++ unsigned int short_not_ok = req->request.short_not_ok;
++ unsigned int no_interrupt = req->request.no_interrupt;
++ unsigned int is_last = req->request.is_last;
+ struct dwc3 *dwc = dep->dwc;
+ struct usb_gadget *gadget = dwc->gadget;
+ enum usb_device_speed speed = gadget->speed;
+
+- trb->size = DWC3_TRB_SIZE_LENGTH(length);
++ if (use_bounce_buffer)
++ dma = dep->dwc->bounce_addr;
++ else if (req->request.num_sgs > 0)
++ dma = sg_dma_address(req->start_sg);
++ else
++ dma = req->request.dma;
++
++ trb = &dep->trb_pool[dep->trb_enqueue];
++
++ if (!req->trb) {
++ dwc3_gadget_move_started_request(req);
++ req->trb = trb;
++ req->trb_dma = dwc3_trb_dma_offset(dep, trb);
++ }
++
++ req->num_trbs++;
++
++ trb->size = DWC3_TRB_SIZE_LENGTH(trb_length);
+ trb->bpl = lower_32_bits(dma);
+ trb->bph = upper_32_bits(dma);
+
+@@ -1202,10 +1253,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+ unsigned int mult = 2;
+ unsigned int maxp = usb_endpoint_maxp(ep->desc);
+
+- if (length <= (2 * maxp))
++ if (req->request.length <= (2 * maxp))
+ mult--;
+
+- if (length <= maxp)
++ if (req->request.length <= maxp)
+ mult--;
+
+ trb->size |= DWC3_TRB_SIZE_PCM1(mult);
+@@ -1214,8 +1265,8 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+ trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS;
+ }
+
+- /* always enable Interrupt on Missed ISOC */
+- trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI;
++ if (!no_interrupt && !chain)
++ trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI;
+ break;
+
+ case USB_ENDPOINT_XFER_BULK:
+@@ -1254,6 +1305,19 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+ if (usb_endpoint_xfer_bulk(dep->endpoint.desc) && dep->stream_capable)
+ trb->ctrl |= DWC3_TRB_CTRL_SID_SOFN(stream_id);
+
++ /*
++ * As per data book 4.2.3.2TRB Control Bit Rules section
++ *
++ * The controller autonomously checks the HWO field of a TRB to determine if the
++ * entire TRB is valid. Therefore, software must ensure that the rest of the TRB
++ * is valid before setting the HWO field to '1'. In most systems, this means that
++ * software must update the fourth DWORD of a TRB last.
++ *
++ * However there is a possibility of CPU re-ordering here which can cause
++ * controller to observe the HWO bit set prematurely.
++ * Add a write memory barrier to prevent CPU re-ordering.
++ */
++ wmb();
+ trb->ctrl |= DWC3_TRB_CTRL_HWO;
+
+ dwc3_ep_inc_enq(dep);
+@@ -1261,50 +1325,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+ trace_dwc3_prepare_trb(dep, trb);
+ }
+
+-/**
+- * dwc3_prepare_one_trb - setup one TRB from one request
+- * @dep: endpoint for which this request is prepared
+- * @req: dwc3_request pointer
+- * @trb_length: buffer size of the TRB
+- * @chain: should this TRB be chained to the next?
+- * @node: only for isochronous endpoints. First TRB needs different type.
+- * @use_bounce_buffer: set to use bounce buffer
+- * @must_interrupt: set to interrupt on TRB completion
+- */
+-static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
+- struct dwc3_request *req, unsigned int trb_length,
+- unsigned int chain, unsigned int node, bool use_bounce_buffer,
+- bool must_interrupt)
+-{
+- struct dwc3_trb *trb;
+- dma_addr_t dma;
+- unsigned int stream_id = req->request.stream_id;
+- unsigned int short_not_ok = req->request.short_not_ok;
+- unsigned int no_interrupt = req->request.no_interrupt;
+- unsigned int is_last = req->request.is_last;
+-
+- if (use_bounce_buffer)
+- dma = dep->dwc->bounce_addr;
+- else if (req->request.num_sgs > 0)
+- dma = sg_dma_address(req->start_sg);
+- else
+- dma = req->request.dma;
+-
+- trb = &dep->trb_pool[dep->trb_enqueue];
+-
+- if (!req->trb) {
+- dwc3_gadget_move_started_request(req);
+- req->trb = trb;
+- req->trb_dma = dwc3_trb_dma_offset(dep, trb);
+- }
+-
+- req->num_trbs++;
+-
+- __dwc3_prepare_one_trb(dep, trb, dma, trb_length, chain, node,
+- stream_id, short_not_ok, no_interrupt, is_last,
+- must_interrupt);
+-}
+-
+ static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req)
+ {
+ unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
+@@ -1620,6 +1640,44 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc)
+ return DWC3_DSTS_SOFFN(reg);
+ }
+
++/**
++ * __dwc3_stop_active_transfer - stop the current active transfer
++ * @dep: isoc endpoint
++ * @force: set forcerm bit in the command
++ * @interrupt: command complete interrupt after End Transfer command
++ *
++ * When setting force, the ForceRM bit will be set. In that case
++ * the controller won't update the TRB progress on command
++ * completion. It also won't clear the HWO bit in the TRB.
++ * The command will also not complete immediately in that case.
++ */
++static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt)
++{
++ struct dwc3 *dwc = dep->dwc;
++ struct dwc3_gadget_ep_cmd_params params;
++ u32 cmd;
++ int ret;
++
++ cmd = DWC3_DEPCMD_ENDTRANSFER;
++ cmd |= force ? DWC3_DEPCMD_HIPRI_FORCERM : 0;
++ cmd |= interrupt ? DWC3_DEPCMD_CMDIOC : 0;
++ cmd |= DWC3_DEPCMD_PARAM(dep->resource_index);
++ memset(&params, 0, sizeof(params));
++ ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
++ WARN_ON_ONCE(ret);
++ dep->resource_index = 0;
++
++ if (!interrupt) {
++ if (!DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC3, 310A))
++ mdelay(1);
++ dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
++ } else if (!ret) {
++ dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
++ }
++
++ return ret;
++}
++
+ /**
+ * dwc3_gadget_start_isoc_quirk - workaround invalid frame number
+ * @dep: isoc endpoint
+@@ -1789,21 +1847,8 @@ static int __dwc3_gadget_start_isoc(struct dwc3_ep *dep)
+ * status, issue END_TRANSFER command and retry on the next XferNotReady
+ * event.
+ */
+- if (ret == -EAGAIN) {
+- struct dwc3_gadget_ep_cmd_params params;
+- u32 cmd;
+-
+- cmd = DWC3_DEPCMD_ENDTRANSFER |
+- DWC3_DEPCMD_CMDIOC |
+- DWC3_DEPCMD_PARAM(dep->resource_index);
+-
+- dep->resource_index = 0;
+- memset(&params, 0, sizeof(params));
+-
+- ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
+- if (!ret)
+- dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
+- }
++ if (ret == -EAGAIN)
++ ret = __dwc3_stop_active_transfer(dep, false, true);
+
+ return ret;
+ }
+@@ -1925,10 +1970,10 @@ static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request *r
+ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep)
+ {
+ struct dwc3_request *req;
+- struct dwc3_request *tmp;
+ struct dwc3 *dwc = dep->dwc;
+
+- list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) {
++ while (!list_empty(&dep->cancelled_list)) {
++ req = next_request(&dep->cancelled_list);
+ dwc3_gadget_ep_skip_trbs(dep, req);
+ switch (req->status) {
+ case DWC3_REQUEST_STATUS_DISCONNECTED:
+@@ -1945,6 +1990,12 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep)
+ dwc3_gadget_giveback(dep, req, -ECONNRESET);
+ break;
+ }
++ /*
++ * The endpoint is disabled, let the dwc3_remove_requests()
++ * handle the cleanup.
++ */
++ if (!dep->endpoint.desc)
++ break;
+ }
+ }
+
+@@ -2264,7 +2315,7 @@ static void dwc3_stop_active_transfers(struct dwc3 *dwc)
+ if (!dep)
+ continue;
+
+- dwc3_remove_requests(dwc, dep);
++ dwc3_remove_requests(dwc, dep, -ESHUTDOWN);
+ }
+ }
+
+@@ -2361,7 +2412,7 @@ static void __dwc3_gadget_set_speed(struct dwc3 *dwc)
+ dwc3_writel(dwc->regs, DWC3_DCFG, reg);
+ }
+
+-static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
++static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
+ {
+ u32 reg;
+ u32 timeout = 500;
+@@ -2380,17 +2431,11 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
+ reg &= ~DWC3_DCTL_KEEP_CONNECT;
+ reg |= DWC3_DCTL_RUN_STOP;
+
+- if (dwc->has_hibernation)
+- reg |= DWC3_DCTL_KEEP_CONNECT;
+-
+ __dwc3_gadget_set_speed(dwc);
+ dwc->pullups_connected = true;
+ } else {
+ reg &= ~DWC3_DCTL_RUN_STOP;
+
+- if (dwc->has_hibernation && !suspend)
+- reg &= ~DWC3_DCTL_KEEP_CONNECT;
+-
+ dwc->pullups_connected = false;
+ }
+
+@@ -2411,14 +2456,57 @@ static void dwc3_gadget_disable_irq(struct dwc3 *dwc);
+ static void __dwc3_gadget_stop(struct dwc3 *dwc);
+ static int __dwc3_gadget_start(struct dwc3 *dwc);
+
++static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&dwc->lock, flags);
++ dwc->connected = false;
++
++ /*
++ * In the Synopsys DesignWare Cores USB3 Databook Rev. 3.30a
++ * Section 4.1.8 Table 4-7, it states that for a device-initiated
++ * disconnect, the SW needs to ensure that it sends "a DEPENDXFER
++ * command for any active transfers" before clearing the RunStop
++ * bit.
++ */
++ dwc3_stop_active_transfers(dwc);
++ __dwc3_gadget_stop(dwc);
++ spin_unlock_irqrestore(&dwc->lock, flags);
++
++ /*
++ * Note: if the GEVNTCOUNT indicates events in the event buffer, the
++ * driver needs to acknowledge them before the controller can halt.
++ * Simply let the interrupt handler acknowledges and handle the
++ * remaining event generated by the controller while polling for
++ * DSTS.DEVCTLHLT.
++ */
++ return dwc3_gadget_run_stop(dwc, false);
++}
++
++static int dwc3_gadget_soft_connect(struct dwc3 *dwc)
++{
++ /*
++ * In the Synopsys DWC_usb31 1.90a programming guide section
++ * 4.1.9, it specifies that for a reconnect after a
++ * device-initiated disconnect requires a core soft reset
++ * (DCTL.CSftRst) before enabling the run/stop bit.
++ */
++ dwc3_core_soft_reset(dwc);
++
++ dwc3_event_buffers_setup(dwc);
++ __dwc3_gadget_start(dwc);
++ return dwc3_gadget_run_stop(dwc, true);
++}
++
+ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
+ {
+ struct dwc3 *dwc = gadget_to_dwc(g);
+- unsigned long flags;
+ int ret;
+
+ is_on = !!is_on;
+
++ dwc->softconnect = is_on;
+ /*
+ * Per databook, when we want to stop the gadget, if a control transfer
+ * is still in process, complete it and get the core into setup phase.
+@@ -2451,52 +2539,22 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
+ ret = pm_runtime_get_sync(dwc->dev);
+ if (!ret || ret < 0) {
+ pm_runtime_put(dwc->dev);
+- return 0;
++ if (ret < 0)
++ pm_runtime_set_suspended(dwc->dev);
++ return ret;
+ }
+
+- /*
+- * Synchronize and disable any further event handling while controller
+- * is being enabled/disabled.
+- */
+- disable_irq(dwc->irq_gadget);
+-
+- spin_lock_irqsave(&dwc->lock, flags);
+-
+- if (!is_on) {
+- u32 count;
+-
+- dwc->connected = false;
+- /*
+- * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a
+- * Section 4.1.8 Table 4-7, it states that for a device-initiated
+- * disconnect, the SW needs to ensure that it sends "a DEPENDXFER
+- * command for any active transfers" before clearing the RunStop
+- * bit.
+- */
+- dwc3_stop_active_transfers(dwc);
+- __dwc3_gadget_stop(dwc);
+-
+- /*
+- * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a
+- * Section 1.3.4, it mentions that for the DEVCTRLHLT bit, the
+- * "software needs to acknowledge the events that are generated
+- * (by writing to GEVNTCOUNTn) while it is waiting for this bit
+- * to be set to '1'."
+- */
+- count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
+- count &= DWC3_GEVNTCOUNT_MASK;
+- if (count > 0) {
+- dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
+- dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
+- dwc->ev_buf->length;
+- }
+- } else {
+- __dwc3_gadget_start(dwc);
++ if (dwc->pullups_connected == is_on) {
++ pm_runtime_put(dwc->dev);
++ return 0;
+ }
+
+- ret = dwc3_gadget_run_stop(dwc, is_on, false);
+- spin_unlock_irqrestore(&dwc->lock, flags);
+- enable_irq(dwc->irq_gadget);
++ synchronize_irq(dwc->irq_gadget);
++
++ if (!is_on)
++ ret = dwc3_gadget_soft_disconnect(dwc);
++ else
++ ret = dwc3_gadget_soft_connect(dwc);
+
+ pm_runtime_put(dwc->dev);
+
+@@ -3051,9 +3109,7 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc)
+ list_del(&dep->endpoint.ep_list);
+ }
+
+- debugfs_remove_recursive(debugfs_lookup(dep->name,
+- debugfs_lookup(dev_name(dep->dwc->dev),
+- usb_debug_root)));
++ dwc3_debugfs_remove_endpoint_dir(dep);
+ kfree(dep);
+ }
+ }
+@@ -3117,6 +3173,10 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep,
+ if (event->status & DEPEVT_STATUS_SHORT && !chain)
+ return 1;
+
++ if ((trb->ctrl & DWC3_TRB_CTRL_ISP_IMI) &&
++ DWC3_TRB_SIZE_TRBSTS(trb->size) == DWC3_TRBSTS_MISSED_ISOC)
++ return 1;
++
+ if ((trb->ctrl & DWC3_TRB_CTRL_IOC) ||
+ (trb->ctrl & DWC3_TRB_CTRL_LST))
+ return 1;
+@@ -3169,6 +3229,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
+ const struct dwc3_event_depevt *event,
+ struct dwc3_request *req, int status)
+ {
++ int request_status;
+ int ret;
+
+ if (req->request.num_mapped_sgs)
+@@ -3189,7 +3250,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
+ req->needs_extra_trb = false;
+ }
+
+- dwc3_gadget_giveback(dep, req, status);
++ /*
++ * The event status only reflects the status of the TRB with IOC set.
++ * For the requests that don't set interrupt on completion, the driver
++ * needs to check and return the status of the completed TRBs associated
++ * with the request. Use the status of the last TRB of the request.
++ */
++ if (req->request.no_interrupt) {
++ struct dwc3_trb *trb;
++
++ trb = dwc3_ep_prev_trb(dep, dep->trb_dequeue);
++ switch (DWC3_TRB_SIZE_TRBSTS(trb->size)) {
++ case DWC3_TRBSTS_MISSED_ISOC:
++ /* Isoc endpoint only */
++ request_status = -EXDEV;
++ break;
++ case DWC3_TRB_STS_XFER_IN_PROG:
++ /* Applicable when End Transfer with ForceRM=0 */
++ case DWC3_TRBSTS_SETUP_PENDING:
++ /* Control endpoint only */
++ case DWC3_TRBSTS_OK:
++ default:
++ request_status = 0;
++ break;
++ }
++ } else {
++ request_status = status;
++ }
++
++ dwc3_gadget_giveback(dep, req, request_status);
+
+ out:
+ return ret;
+@@ -3199,15 +3288,21 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep,
+ const struct dwc3_event_depevt *event, int status)
+ {
+ struct dwc3_request *req;
+- struct dwc3_request *tmp;
+
+- list_for_each_entry_safe(req, tmp, &dep->started_list, list) {
++ while (!list_empty(&dep->started_list)) {
+ int ret;
+
++ req = next_request(&dep->started_list);
+ ret = dwc3_gadget_ep_cleanup_completed_request(dep, event,
+ req, status);
+ if (ret)
+ break;
++ /*
++ * The endpoint is disabled, let the dwc3_remove_requests()
++ * handle the cleanup.
++ */
++ if (!dep->endpoint.desc)
++ break;
+ }
+ }
+
+@@ -3251,6 +3346,9 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep,
+ if (dep->flags & DWC3_EP_END_TRANSFER_PENDING)
+ goto out;
+
++ if (!dep->endpoint.desc)
++ return no_started_trb;
++
+ if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
+ list_empty(&dep->started_list) &&
+ (list_empty(&dep->pending_list) || status == -EXDEV))
+@@ -3293,6 +3391,9 @@ static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep,
+ {
+ int status = 0;
+
++ if (!dep->endpoint.desc)
++ return;
++
+ if (usb_endpoint_xfer_isoc(dep->endpoint.desc))
+ dwc3_gadget_endpoint_frame_from_event(dep, event);
+
+@@ -3346,6 +3447,14 @@ static void dwc3_gadget_endpoint_command_complete(struct dwc3_ep *dep,
+ if (cmd != DWC3_DEPCMD_ENDTRANSFER)
+ return;
+
++ /*
++ * The END_TRANSFER command will cause the controller to generate a
++ * NoStream Event, and it's not due to the host DP NoStream rejection.
++ * Ignore the next NoStream event.
++ */
++ if (dep->stream_capable)
++ dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM;
++
+ dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
+ dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
+ dwc3_gadget_ep_cleanup_cancelled_requests(dep);
+@@ -3420,7 +3529,7 @@ static void dwc3_gadget_endpoint_stream_event(struct dwc3_ep *dep,
+ * streams are updated, and the device controller will not be
+ * triggered to generate ERDY to move the next stream data. To
+ * workaround this and maintain compatibility with various
+- * hosts, force to reinitate the stream until the host is ready
++ * hosts, force to reinitiate the stream until the host is ready
+ * instead of waiting for the host to prime the endpoint.
+ */
+ if (DWC3_VER_IS_WITHIN(DWC32, 100A, ANY)) {
+@@ -3524,10 +3633,6 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
+ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
+ bool interrupt)
+ {
+- struct dwc3_gadget_ep_cmd_params params;
+- u32 cmd;
+- int ret;
+-
+ if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) ||
+ (dep->flags & DWC3_EP_END_TRANSFER_PENDING))
+ return;
+@@ -3556,30 +3661,14 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
+ * enabled, the EndTransfer command will have completed upon
+ * returning from this function.
+ *
+- * This mode is NOT available on the DWC_usb31 IP.
++ * This mode is NOT available on the DWC_usb31 IP. In this
++ * case, if the IOC bit is not set, then delay by 1ms
++ * after issuing the EndTransfer command. This allows for the
++ * controller to handle the command completely before DWC3
++ * remove requests attempts to unmap USB request buffers.
+ */
+
+- cmd = DWC3_DEPCMD_ENDTRANSFER;
+- cmd |= force ? DWC3_DEPCMD_HIPRI_FORCERM : 0;
+- cmd |= interrupt ? DWC3_DEPCMD_CMDIOC : 0;
+- cmd |= DWC3_DEPCMD_PARAM(dep->resource_index);
+- memset(&params, 0, sizeof(params));
+- ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
+- WARN_ON_ONCE(ret);
+- dep->resource_index = 0;
+-
+- /*
+- * The END_TRANSFER command will cause the controller to generate a
+- * NoStream Event, and it's not due to the host DP NoStream rejection.
+- * Ignore the next NoStream event.
+- */
+- if (dep->stream_capable)
+- dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM;
+-
+- if (!interrupt)
+- dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
+- else
+- dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
++ __dwc3_stop_active_transfer(dep, force, interrupt);
+ }
+
+ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc)
+@@ -3608,6 +3697,8 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
+ {
+ int reg;
+
++ dwc->suspended = false;
++
+ dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RX_DET);
+
+ reg = dwc3_readl(dwc->regs, DWC3_DCTL);
+@@ -3628,6 +3719,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
+ {
+ u32 reg;
+
++ dwc->suspended = false;
++
+ /*
+ * Ideally, dwc3_reset_gadget() would trigger the function
+ * drivers to stop any active transfers through ep disable.
+@@ -3833,6 +3926,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
+
+ static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc)
+ {
++ dwc->suspended = false;
++
+ /*
+ * TODO take core out of low power mode when that's
+ * implemented.
+@@ -3948,36 +4043,14 @@ static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
+ {
+ enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
+
+- if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
++ if (!dwc->suspended && next == DWC3_LINK_STATE_U3) {
++ dwc->suspended = true;
+ dwc3_suspend_gadget(dwc);
++ }
+
+ dwc->link_state = next;
+ }
+
+-static void dwc3_gadget_hibernation_interrupt(struct dwc3 *dwc,
+- unsigned int evtinfo)
+-{
+- unsigned int is_ss = evtinfo & BIT(4);
+-
+- /*
+- * WORKAROUND: DWC3 revison 2.20a with hibernation support
+- * have a known issue which can cause USB CV TD.9.23 to fail
+- * randomly.
+- *
+- * Because of this issue, core could generate bogus hibernation
+- * events which SW needs to ignore.
+- *
+- * Refers to:
+- *
+- * STAR#9000546576: Device Mode Hibernation: Issue in USB 2.0
+- * Device Fallback from SuperSpeed
+- */
+- if (is_ss ^ (dwc->speed == USB_SPEED_SUPER))
+- return;
+-
+- /* enter hibernation here */
+-}
+-
+ static void dwc3_gadget_interrupt(struct dwc3 *dwc,
+ const struct dwc3_event_devt *event)
+ {
+@@ -3995,26 +4068,15 @@ static void dwc3_gadget_interrupt(struct dwc3 *dwc,
+ dwc3_gadget_wakeup_interrupt(dwc);
+ break;
+ case DWC3_DEVICE_EVENT_HIBER_REQ:
+- if (dev_WARN_ONCE(dwc->dev, !dwc->has_hibernation,
+- "unexpected hibernation event\n"))
+- break;
+-
+- dwc3_gadget_hibernation_interrupt(dwc, event->event_info);
++ dev_WARN_ONCE(dwc->dev, true, "unexpected hibernation event\n");
+ break;
+ case DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE:
+ dwc3_gadget_linksts_change_interrupt(dwc, event->event_info);
+ break;
+ case DWC3_DEVICE_EVENT_SUSPEND:
+ /* It changed to be suspend event for version 2.30a and above */
+- if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) {
+- /*
+- * Ignore suspend event until the gadget enters into
+- * USB_STATE_CONFIGURED state.
+- */
+- if (dwc->gadget->state >= USB_STATE_CONFIGURED)
+- dwc3_gadget_suspend_interrupt(dwc,
+- event->event_info);
+- }
++ if (!DWC3_VER_IS_PRIOR(DWC3, 230A))
++ dwc3_gadget_suspend_interrupt(dwc, event->event_info);
+ break;
+ case DWC3_DEVICE_EVENT_SOF:
+ case DWC3_DEVICE_EVENT_ERRATIC_ERROR:
+@@ -4072,7 +4134,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt)
+ }
+
+ evt->count = 0;
+- evt->flags &= ~DWC3_EVENT_PENDING;
+ ret = IRQ_HANDLED;
+
+ /* Unmask interrupt */
+@@ -4085,6 +4146,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt)
+ dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval);
+ }
+
++ /* Keep the clearing of DWC3_EVENT_PENDING at the end */
++ evt->flags &= ~DWC3_EVENT_PENDING;
++
+ return ret;
+ }
+
+@@ -4095,9 +4159,11 @@ static irqreturn_t dwc3_thread_interrupt(int irq, void *_evt)
+ unsigned long flags;
+ irqreturn_t ret = IRQ_NONE;
+
++ local_bh_disable();
+ spin_lock_irqsave(&dwc->lock, flags);
+ ret = dwc3_process_event_buf(evt);
+ spin_unlock_irqrestore(&dwc->lock, flags);
++ local_bh_enable();
+
+ return ret;
+ }
+@@ -4110,9 +4176,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
+ u32 reg;
+
+ if (pm_runtime_suspended(dwc->dev)) {
++ dwc->pending_events = true;
++ /*
++ * Trigger runtime resume. The get() function will be balanced
++ * after processing the pending events in dwc3_process_pending
++ * events().
++ */
+ pm_runtime_get(dwc->dev);
+ disable_irq_nosync(dwc->irq_gadget);
+- dwc->pending_events = true;
+ return IRQ_HANDLED;
+ }
+
+@@ -4338,44 +4409,48 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
+
+ int dwc3_gadget_suspend(struct dwc3 *dwc)
+ {
++ unsigned long flags;
++ int ret;
++
+ if (!dwc->gadget_driver)
+ return 0;
+
+- dwc3_gadget_run_stop(dwc, false, false);
++ ret = dwc3_gadget_soft_disconnect(dwc);
++ if (ret)
++ goto err;
++
++ spin_lock_irqsave(&dwc->lock, flags);
+ dwc3_disconnect_gadget(dwc);
+- __dwc3_gadget_stop(dwc);
++ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
++
++err:
++ /*
++ * Attempt to reset the controller's state. Likely no
++ * communication can be established until the host
++ * performs a port reset.
++ */
++ if (dwc->softconnect)
++ dwc3_gadget_soft_connect(dwc);
++
++ return ret;
+ }
+
+ int dwc3_gadget_resume(struct dwc3 *dwc)
+ {
+- int ret;
+-
+- if (!dwc->gadget_driver)
++ if (!dwc->gadget_driver || !dwc->softconnect)
+ return 0;
+
+- ret = __dwc3_gadget_start(dwc);
+- if (ret < 0)
+- goto err0;
+-
+- ret = dwc3_gadget_run_stop(dwc, true, false);
+- if (ret < 0)
+- goto err1;
+-
+- return 0;
+-
+-err1:
+- __dwc3_gadget_stop(dwc);
+-
+-err0:
+- return ret;
++ return dwc3_gadget_soft_connect(dwc);
+ }
+
+ void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
+ {
+ if (dwc->pending_events) {
+ dwc3_interrupt(dwc->irq_gadget, dwc->ev_buf);
++ dwc3_thread_interrupt(dwc->irq_gadget, dwc->ev_buf);
++ pm_runtime_put(dwc->dev);
+ dwc->pending_events = false;
+ enable_irq(dwc->irq_gadget);
+ }
+diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
+index f29a264635aa1..2078e9d702923 100644
+--- a/drivers/usb/dwc3/host.c
++++ b/drivers/usb/dwc3/host.c
+@@ -130,4 +130,5 @@ err:
+ void dwc3_host_exit(struct dwc3 *dwc)
+ {
+ platform_device_unregister(dwc->xhci);
++ dwc->xhci = NULL;
+ }
+diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
+index be4ecbabdd586..b0c4071f0b167 100644
+--- a/drivers/usb/early/xhci-dbc.c
++++ b/drivers/usb/early/xhci-dbc.c
+@@ -14,7 +14,6 @@
+ #include <linux/pci_ids.h>
+ #include <linux/memblock.h>
+ #include <linux/io.h>
+-#include <linux/iopoll.h>
+ #include <asm/pci-direct.h>
+ #include <asm/fixmap.h>
+ #include <linux/bcd.h>
+@@ -136,9 +135,17 @@ static int handshake(void __iomem *ptr, u32 mask, u32 done, int wait, int delay)
+ {
+ u32 result;
+
+- return readl_poll_timeout_atomic(ptr, result,
+- ((result & mask) == done),
+- delay, wait);
++ /* Can not use readl_poll_timeout_atomic() for early boot things */
++ do {
++ result = readl(ptr);
++ result &= mask;
++ if (result == done)
++ return 0;
++ udelay(delay);
++ wait -= delay;
++ } while (wait > 0);
++
++ return -ETIMEDOUT;
+ }
+
+ static void __init xdbc_bios_handoff(void)
+@@ -864,7 +871,8 @@ retry:
+
+ static void early_xdbc_write(struct console *con, const char *str, u32 n)
+ {
+- static char buf[XDBC_MAX_PACKET];
++ /* static variables are zeroed, so buf is always NULL terminated */
++ static char buf[XDBC_MAX_PACKET + 1];
+ int chunk, ret;
+ int use_cr = 0;
+
+diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
+index 504c1cbc255d1..edce0a1bdddf5 100644
+--- a/drivers/usb/gadget/composite.c
++++ b/drivers/usb/gadget/composite.c
+@@ -498,6 +498,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed,
+ return min(val, 900U) / 8;
+ }
+
++void check_remote_wakeup_config(struct usb_gadget *g,
++ struct usb_configuration *c)
++{
++ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) {
++ /* Reset the rw bit if gadget is not capable of it */
++ if (!g->wakeup_capable && g->ops->set_remote_wakeup) {
++ WARN(c->cdev, "Clearing wakeup bit for config c.%d\n",
++ c->bConfigurationValue);
++ c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP;
++ }
++ }
++}
++
+ static int config_buf(struct usb_configuration *config,
+ enum usb_device_speed speed, void *buf, u8 type)
+ {
+@@ -945,6 +958,11 @@ static int set_config(struct usb_composite_dev *cdev,
+ power = min(power, 500U);
+ else
+ power = min(power, 900U);
++
++ if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes)
++ usb_gadget_set_remote_wakeup(gadget, 1);
++ else
++ usb_gadget_set_remote_wakeup(gadget, 0);
+ done:
+ if (power <= USB_SELF_POWER_VBUS_MAX_DRAW)
+ usb_gadget_set_selfpowered(gadget);
+@@ -1015,6 +1033,10 @@ int usb_add_config(struct usb_composite_dev *cdev,
+ goto done;
+
+ status = bind(config);
++
++ if (status == 0)
++ status = usb_gadget_check_config(cdev->gadget);
++
+ if (status < 0) {
+ while (!list_empty(&config->functions)) {
+ struct usb_function *f;
+@@ -1679,6 +1701,18 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
+ struct usb_function *f = NULL;
+ u8 endp;
+
++ if (w_length > USB_COMP_EP0_BUFSIZ) {
++ if (ctrl->bRequestType & USB_DIR_IN) {
++ /* Cast away the const, we are going to overwrite on purpose. */
++ __le16 *temp = (__le16 *)&ctrl->wLength;
++
++ *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ);
++ w_length = USB_COMP_EP0_BUFSIZ;
++ } else {
++ goto done;
++ }
++ }
++
+ /* partial re-init of the response message; the function or the
+ * gadget might need to intercept e.g. a control-OUT completion
+ * when we delegate to it.
+@@ -1963,6 +1997,9 @@ unknown:
+ if (w_index != 0x5 || (w_value >> 8))
+ break;
+ interface = w_value & 0xFF;
++ if (interface >= MAX_CONFIG_INTERFACES ||
++ !os_desc_cfg->interface[interface])
++ break;
+ buf[6] = w_index;
+ count = count_ext_prop(os_desc_cfg,
+ interface);
+@@ -2209,7 +2246,7 @@ int composite_dev_prepare(struct usb_composite_driver *composite,
+ if (!cdev->req)
+ return -ENOMEM;
+
+- cdev->req->buf = kmalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL);
++ cdev->req->buf = kzalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL);
+ if (!cdev->req->buf)
+ goto fail;
+
+diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
+index 477e72a1d11e7..528b9ec1d9e85 100644
+--- a/drivers/usb/gadget/configfs.c
++++ b/drivers/usb/gadget/configfs.c
+@@ -416,10 +416,9 @@ static int config_usb_cfg_link(
+ struct usb_composite_dev *cdev = cfg->c.cdev;
+ struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev);
+
+- struct config_group *group = to_config_group(usb_func_ci);
+- struct usb_function_instance *fi = container_of(group,
+- struct usb_function_instance, group);
+- struct usb_function_instance *a_fi;
++ struct usb_function_instance *fi =
++ to_usb_function_instance(usb_func_ci);
++ struct usb_function_instance *a_fi = NULL, *iter;
+ struct usb_function *f;
+ int ret;
+
+@@ -429,11 +428,19 @@ static int config_usb_cfg_link(
+ * from another gadget or a random directory.
+ * Also a function instance can only be linked once.
+ */
+- list_for_each_entry(a_fi, &gi->available_func, cfs_list) {
+- if (a_fi == fi)
+- break;
++
++ if (gi->composite.gadget_driver.udc_name) {
++ ret = -EINVAL;
++ goto out;
+ }
+- if (a_fi != fi) {
++
++ list_for_each_entry(iter, &gi->available_func, cfs_list) {
++ if (iter != fi)
++ continue;
++ a_fi = iter;
++ break;
++ }
++ if (!a_fi) {
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -467,9 +474,8 @@ static void config_usb_cfg_unlink(
+ struct usb_composite_dev *cdev = cfg->c.cdev;
+ struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev);
+
+- struct config_group *group = to_config_group(usb_func_ci);
+- struct usb_function_instance *fi = container_of(group,
+- struct usb_function_instance, group);
++ struct usb_function_instance *fi =
++ to_usb_function_instance(usb_func_ci);
+ struct usb_function *f;
+
+ /*
+@@ -890,18 +896,18 @@ static int os_desc_link(struct config_item *os_desc_ci,
+ struct gadget_info *gi = container_of(to_config_group(os_desc_ci),
+ struct gadget_info, os_desc_group);
+ struct usb_composite_dev *cdev = &gi->cdev;
+- struct config_usb_cfg *c_target =
+- container_of(to_config_group(usb_cfg_ci),
+- struct config_usb_cfg, group);
+- struct usb_configuration *c;
++ struct config_usb_cfg *c_target = to_config_usb_cfg(usb_cfg_ci);
++ struct usb_configuration *c = NULL, *iter;
+ int ret;
+
+ mutex_lock(&gi->lock);
+- list_for_each_entry(c, &cdev->configs, list) {
+- if (c == &c_target->c)
+- break;
++ list_for_each_entry(iter, &cdev->configs, list) {
++ if (iter != &c_target->c)
++ continue;
++ c = iter;
++ break;
+ }
+- if (c != &c_target->c) {
++ if (!c) {
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -1378,6 +1384,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget,
+ if (gadget_is_otg(gadget))
+ c->descriptors = otg_desc;
+
++ /* Properly configure the bmAttributes wakeup bit */
++ check_remote_wakeup_config(gadget, c);
++
+ cfg = container_of(c, struct config_usb_cfg, c);
+ if (!list_empty(&cfg->string_list)) {
+ i = 0;
+@@ -1447,6 +1456,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget)
+ usb_ep_autoconfig_reset(cdev->gadget);
+ spin_lock_irqsave(&gi->spinlock, flags);
+ cdev->gadget = NULL;
++ cdev->deactivations = 0;
++ gadget->deactivated = false;
+ set_gadget_data(gadget, NULL);
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+ }
+diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
+index 8260f38025b72..73ad9c3acc336 100644
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -122,8 +122,6 @@ struct ffs_ep {
+ struct usb_endpoint_descriptor *descs[3];
+
+ u8 num;
+-
+- int status; /* P: epfile->mutex */
+ };
+
+ struct ffs_epfile {
+@@ -227,6 +225,9 @@ struct ffs_io_data {
+ bool use_sg;
+
+ struct ffs_data *ffs;
++
++ int status;
++ struct completion done;
+ };
+
+ struct ffs_desc_helper {
+@@ -278,6 +279,11 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len)
+ struct usb_request *req = ffs->ep0req;
+ int ret;
+
++ if (!req) {
++ spin_unlock_irq(&ffs->ev.waitq.lock);
++ return -EINVAL;
++ }
++
+ req->zero = len < le16_to_cpu(ffs->ev.setup.wLength);
+
+ spin_unlock_irq(&ffs->ev.waitq.lock);
+@@ -614,7 +620,7 @@ static int ffs_ep0_open(struct inode *inode, struct file *file)
+ file->private_data = ffs;
+ ffs_data_opened(ffs);
+
+- return 0;
++ return stream_open(inode, file);
+ }
+
+ static int ffs_ep0_release(struct inode *inode, struct file *file)
+@@ -707,12 +713,15 @@ static const struct file_operations ffs_ep0_operations = {
+
+ static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req)
+ {
++ struct ffs_io_data *io_data = req->context;
++
+ ENTER();
+- if (req->context) {
+- struct ffs_ep *ep = _ep->driver_data;
+- ep->status = req->status ? req->status : req->actual;
+- complete(req->context);
+- }
++ if (req->status)
++ io_data->status = req->status;
++ else
++ io_data->status = req->actual;
++
++ complete(&io_data->done);
+ }
+
+ static ssize_t ffs_copy_to_iter(void *data, int data_len, struct iov_iter *iter)
+@@ -1050,7 +1059,6 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
+ WARN(1, "%s: data_len == -EINVAL\n", __func__);
+ ret = -EINVAL;
+ } else if (!io_data->aio) {
+- DECLARE_COMPLETION_ONSTACK(done);
+ bool interrupted = false;
+
+ req = ep->req;
+@@ -1066,7 +1074,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
+
+ io_data->buf = data;
+
+- req->context = &done;
++ init_completion(&io_data->done);
++ req->context = io_data;
+ req->complete = ffs_epfile_io_complete;
+
+ ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC);
+@@ -1075,7 +1084,12 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
+
+ spin_unlock_irq(&epfile->ffs->eps_lock);
+
+- if (wait_for_completion_interruptible(&done)) {
++ if (wait_for_completion_interruptible(&io_data->done)) {
++ spin_lock_irq(&epfile->ffs->eps_lock);
++ if (epfile->ep != ep) {
++ ret = -ESHUTDOWN;
++ goto error_lock;
++ }
+ /*
+ * To avoid race condition with ffs_epfile_io_complete,
+ * dequeue the request first then check
+@@ -1083,17 +1097,18 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
+ * condition with req->complete callback.
+ */
+ usb_ep_dequeue(ep->ep, req);
+- wait_for_completion(&done);
+- interrupted = ep->status < 0;
++ spin_unlock_irq(&epfile->ffs->eps_lock);
++ wait_for_completion(&io_data->done);
++ interrupted = io_data->status < 0;
+ }
+
+ if (interrupted)
+ ret = -EINTR;
+- else if (io_data->read && ep->status > 0)
+- ret = __ffs_epfile_read_data(epfile, data, ep->status,
++ else if (io_data->read && io_data->status > 0)
++ ret = __ffs_epfile_read_data(epfile, data, io_data->status,
+ &io_data->data);
+ else
+- ret = ep->status;
++ ret = io_data->status;
+ goto error_mutex;
+ } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) {
+ ret = -ENOMEM;
+@@ -1154,7 +1169,7 @@ ffs_epfile_open(struct inode *inode, struct file *file)
+ file->private_data = epfile;
+ ffs_data_opened(epfile->ffs);
+
+- return 0;
++ return stream_open(inode, file);
+ }
+
+ static int ffs_aio_cancel(struct kiocb *kiocb)
+@@ -1711,16 +1726,24 @@ static void ffs_data_put(struct ffs_data *ffs)
+
+ static void ffs_data_closed(struct ffs_data *ffs)
+ {
++ struct ffs_epfile *epfiles;
++ unsigned long flags;
++
+ ENTER();
+
+ if (atomic_dec_and_test(&ffs->opened)) {
+ if (ffs->no_disconnect) {
+ ffs->state = FFS_DEACTIVATED;
+- if (ffs->epfiles) {
+- ffs_epfiles_destroy(ffs->epfiles,
+- ffs->eps_count);
+- ffs->epfiles = NULL;
+- }
++ spin_lock_irqsave(&ffs->eps_lock, flags);
++ epfiles = ffs->epfiles;
++ ffs->epfiles = NULL;
++ spin_unlock_irqrestore(&ffs->eps_lock,
++ flags);
++
++ if (epfiles)
++ ffs_epfiles_destroy(epfiles,
++ ffs->eps_count);
++
+ if (ffs->setup_state == FFS_SETUP_PENDING)
+ __ffs_ep0_stall(ffs);
+ } else {
+@@ -1767,17 +1790,34 @@ static struct ffs_data *ffs_data_new(const char *dev_name)
+
+ static void ffs_data_clear(struct ffs_data *ffs)
+ {
++ struct ffs_epfile *epfiles;
++ unsigned long flags;
++
+ ENTER();
+
+ ffs_closed(ffs);
+
+ BUG_ON(ffs->gadget);
+
+- if (ffs->epfiles)
+- ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
++ spin_lock_irqsave(&ffs->eps_lock, flags);
++ epfiles = ffs->epfiles;
++ ffs->epfiles = NULL;
++ spin_unlock_irqrestore(&ffs->eps_lock, flags);
+
+- if (ffs->ffs_eventfd)
++ /*
++ * potential race possible between ffs_func_eps_disable
++ * & ffs_epfile_release therefore maintaining a local
++ * copy of epfile will save us from use-after-free.
++ */
++ if (epfiles) {
++ ffs_epfiles_destroy(epfiles, ffs->eps_count);
++ ffs->epfiles = NULL;
++ }
++
++ if (ffs->ffs_eventfd) {
+ eventfd_ctx_put(ffs->ffs_eventfd);
++ ffs->ffs_eventfd = NULL;
++ }
+
+ kfree(ffs->raw_descs_data);
+ kfree(ffs->raw_strings);
+@@ -1790,7 +1830,6 @@ static void ffs_data_reset(struct ffs_data *ffs)
+
+ ffs_data_clear(ffs);
+
+- ffs->epfiles = NULL;
+ ffs->raw_descs_data = NULL;
+ ffs->raw_descs = NULL;
+ ffs->raw_strings = NULL;
+@@ -1858,10 +1897,14 @@ static void functionfs_unbind(struct ffs_data *ffs)
+ ENTER();
+
+ if (!WARN_ON(!ffs->gadget)) {
++ /* dequeue before freeing ep0req */
++ usb_ep_dequeue(ffs->gadget->ep0, ffs->ep0req);
++ mutex_lock(&ffs->mutex);
+ usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req);
+ ffs->ep0req = NULL;
+ ffs->gadget = NULL;
+ clear_bit(FFS_FL_BOUND, &ffs->flags);
++ mutex_unlock(&ffs->mutex);
+ ffs_data_put(ffs);
+ }
+ }
+@@ -1919,12 +1962,15 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count)
+
+ static void ffs_func_eps_disable(struct ffs_function *func)
+ {
+- struct ffs_ep *ep = func->eps;
+- struct ffs_epfile *epfile = func->ffs->epfiles;
+- unsigned count = func->ffs->eps_count;
++ struct ffs_ep *ep;
++ struct ffs_epfile *epfile;
++ unsigned short count;
+ unsigned long flags;
+
+ spin_lock_irqsave(&func->ffs->eps_lock, flags);
++ count = func->ffs->eps_count;
++ epfile = func->ffs->epfiles;
++ ep = func->eps;
+ while (count--) {
+ /* pending requests get nuked */
+ if (ep->ep)
+@@ -1942,14 +1988,18 @@ static void ffs_func_eps_disable(struct ffs_function *func)
+
+ static int ffs_func_eps_enable(struct ffs_function *func)
+ {
+- struct ffs_data *ffs = func->ffs;
+- struct ffs_ep *ep = func->eps;
+- struct ffs_epfile *epfile = ffs->epfiles;
+- unsigned count = ffs->eps_count;
++ struct ffs_data *ffs;
++ struct ffs_ep *ep;
++ struct ffs_epfile *epfile;
++ unsigned short count;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&func->ffs->eps_lock, flags);
++ ffs = func->ffs;
++ ep = func->eps;
++ epfile = ffs->epfiles;
++ count = ffs->eps_count;
+ while(count--) {
+ ep->ep->driver_data = ep;
+
+@@ -3570,6 +3620,7 @@ static void ffs_func_unbind(struct usb_configuration *c,
+ /* Drain any pending AIO completions */
+ drain_workqueue(ffs->io_completion_wq);
+
++ ffs_event_add(ffs, FUNCTIONFS_UNBIND);
+ if (!--opts->refcnt)
+ functionfs_unbind(ffs);
+
+@@ -3594,7 +3645,6 @@ static void ffs_func_unbind(struct usb_configuration *c,
+ func->function.ssp_descriptors = NULL;
+ func->interfaces_nums = NULL;
+
+- ffs_event_add(ffs, FUNCTIONFS_UNBIND);
+ }
+
+ static struct usb_function *ffs_alloc(struct usb_function_instance *fi)
+diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
+index ca0a7d9eaa34e..6be6009f911e1 100644
+--- a/drivers/usb/gadget/function/f_hid.c
++++ b/drivers/usb/gadget/function/f_hid.c
+@@ -71,7 +71,7 @@ struct f_hidg {
+ wait_queue_head_t write_queue;
+ struct usb_request *req;
+
+- int minor;
++ struct device dev;
+ struct cdev cdev;
+ struct usb_function func;
+
+@@ -84,6 +84,14 @@ static inline struct f_hidg *func_to_hidg(struct usb_function *f)
+ return container_of(f, struct f_hidg, func);
+ }
+
++static void hidg_release(struct device *dev)
++{
++ struct f_hidg *hidg = container_of(dev, struct f_hidg, dev);
++
++ kfree(hidg->set_report_buf);
++ kfree(hidg);
++}
++
+ /*-------------------------------------------------------------------------*/
+ /* Static descriptors */
+
+@@ -904,9 +912,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
+ struct usb_ep *ep;
+ struct f_hidg *hidg = func_to_hidg(f);
+ struct usb_string *us;
+- struct device *device;
+ int status;
+- dev_t dev;
+
+ /* maybe allocate device-global string IDs, and patch descriptors */
+ us = usb_gstrings_attach(c->cdev, ct_func_strings,
+@@ -999,21 +1005,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
+
+ /* create char device */
+ cdev_init(&hidg->cdev, &f_hidg_fops);
+- dev = MKDEV(major, hidg->minor);
+- status = cdev_add(&hidg->cdev, dev, 1);
++ status = cdev_device_add(&hidg->cdev, &hidg->dev);
+ if (status)
+ goto fail_free_descs;
+
+- device = device_create(hidg_class, NULL, dev, NULL,
+- "%s%d", "hidg", hidg->minor);
+- if (IS_ERR(device)) {
+- status = PTR_ERR(device);
+- goto del;
+- }
+-
+ return 0;
+-del:
+- cdev_del(&hidg->cdev);
+ fail_free_descs:
+ usb_free_all_descriptors(f);
+ fail:
+@@ -1244,9 +1240,7 @@ static void hidg_free(struct usb_function *f)
+
+ hidg = func_to_hidg(f);
+ opts = container_of(f->fi, struct f_hid_opts, func_inst);
+- kfree(hidg->report_desc);
+- kfree(hidg->set_report_buf);
+- kfree(hidg);
++ put_device(&hidg->dev);
+ mutex_lock(&opts->lock);
+ --opts->refcnt;
+ mutex_unlock(&opts->lock);
+@@ -1256,8 +1250,7 @@ static void hidg_unbind(struct usb_configuration *c, struct usb_function *f)
+ {
+ struct f_hidg *hidg = func_to_hidg(f);
+
+- device_destroy(hidg_class, MKDEV(major, hidg->minor));
+- cdev_del(&hidg->cdev);
++ cdev_device_del(&hidg->cdev, &hidg->dev);
+
+ usb_free_all_descriptors(f);
+ }
+@@ -1266,6 +1259,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
+ {
+ struct f_hidg *hidg;
+ struct f_hid_opts *opts;
++ int ret;
+
+ /* allocate and initialize one new instance */
+ hidg = kzalloc(sizeof(*hidg), GFP_KERNEL);
+@@ -1277,17 +1271,28 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
+ mutex_lock(&opts->lock);
+ ++opts->refcnt;
+
+- hidg->minor = opts->minor;
++ device_initialize(&hidg->dev);
++ hidg->dev.release = hidg_release;
++ hidg->dev.class = hidg_class;
++ hidg->dev.devt = MKDEV(major, opts->minor);
++ ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor);
++ if (ret) {
++ --opts->refcnt;
++ mutex_unlock(&opts->lock);
++ return ERR_PTR(ret);
++ }
++
+ hidg->bInterfaceSubClass = opts->subclass;
+ hidg->bInterfaceProtocol = opts->protocol;
+ hidg->report_length = opts->report_length;
+ hidg->report_desc_length = opts->report_desc_length;
+ if (opts->report_desc) {
+- hidg->report_desc = kmemdup(opts->report_desc,
+- opts->report_desc_length,
+- GFP_KERNEL);
++ hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc,
++ opts->report_desc_length,
++ GFP_KERNEL);
+ if (!hidg->report_desc) {
+- kfree(hidg);
++ put_device(&hidg->dev);
++ --opts->refcnt;
+ mutex_unlock(&opts->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
+index 6ad669dde41c8..5bd76c6d38e74 100644
+--- a/drivers/usb/gadget/function/f_mass_storage.c
++++ b/drivers/usb/gadget/function/f_mass_storage.c
+@@ -919,7 +919,7 @@ static void invalidate_sub(struct fsg_lun *curlun)
+ {
+ struct file *filp = curlun->filp;
+ struct inode *inode = file_inode(filp);
+- unsigned long rc;
++ unsigned long __maybe_unused rc;
+
+ rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
+ VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc);
+diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
+index dc8f078f918c5..e0c1832342838 100644
+--- a/drivers/usb/gadget/function/f_ncm.c
++++ b/drivers/usb/gadget/function/f_ncm.c
+@@ -83,7 +83,9 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f)
+ /* peak (theoretical) bulk transfer rate in bits-per-second */
+ static inline unsigned ncm_bitrate(struct usb_gadget *g)
+ {
+- if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS)
++ if (!g)
++ return 0;
++ else if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS)
+ return 4250000000U;
+ else if (gadget_is_superspeed(g) && g->speed == USB_SPEED_SUPER)
+ return 3750000000U;
+diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
+index abec5c58f5251..a881c69b1f2bf 100644
+--- a/drivers/usb/gadget/function/f_printer.c
++++ b/drivers/usb/gadget/function/f_printer.c
+@@ -89,7 +89,7 @@ struct printer_dev {
+ u8 printer_cdev_open;
+ wait_queue_head_t wait;
+ unsigned q_len;
+- char *pnp_string; /* We don't own memory! */
++ char **pnp_string; /* We don't own memory! */
+ struct usb_function function;
+ };
+
+@@ -1000,16 +1000,16 @@ static int printer_func_setup(struct usb_function *f,
+ if ((wIndex>>8) != dev->interface)
+ break;
+
+- if (!dev->pnp_string) {
++ if (!*dev->pnp_string) {
+ value = 0;
+ break;
+ }
+- value = strlen(dev->pnp_string);
++ value = strlen(*dev->pnp_string);
+ buf[0] = (value >> 8) & 0xFF;
+ buf[1] = value & 0xFF;
+- memcpy(buf + 2, dev->pnp_string, value);
++ memcpy(buf + 2, *dev->pnp_string, value);
+ DBG(dev, "1284 PNP String: %x %s\n", value,
+- dev->pnp_string);
++ *dev->pnp_string);
+ break;
+
+ case GET_PORT_STATUS: /* Get Port Status */
+@@ -1475,7 +1475,7 @@ static struct usb_function *gprinter_alloc(struct usb_function_instance *fi)
+ kref_init(&dev->kref);
+ ++opts->refcnt;
+ dev->minor = opts->minor;
+- dev->pnp_string = opts->pnp_string;
++ dev->pnp_string = &opts->pnp_string;
+ dev->q_len = opts->q_len;
+ mutex_unlock(&opts->lock);
+
+diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c
+index 1abf08e5164af..6803cd60cc6dc 100644
+--- a/drivers/usb/gadget/function/f_sourcesink.c
++++ b/drivers/usb/gadget/function/f_sourcesink.c
+@@ -584,6 +584,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in,
+
+ if (is_iso) {
+ switch (speed) {
++ case USB_SPEED_SUPER_PLUS:
+ case USB_SPEED_SUPER:
+ size = ss->isoc_maxpacket *
+ (ss->isoc_mult + 1) *
+diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
+index ef55b8bb5870a..850394ed8eb14 100644
+--- a/drivers/usb/gadget/function/f_uac2.c
++++ b/drivers/usb/gadget/function/f_uac2.c
+@@ -202,7 +202,7 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = {
+
+ .bDescriptorSubtype = UAC_INPUT_TERMINAL,
+ /* .bTerminalID = DYNAMIC */
+- .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_UNDEFINED),
++ .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_MICROPHONE),
+ .bAssocTerminal = 0,
+ /* .bCSourceID = DYNAMIC */
+ .iChannelNames = 0,
+@@ -230,7 +230,7 @@ static struct uac2_output_terminal_descriptor io_out_ot_desc = {
+
+ .bDescriptorSubtype = UAC_OUTPUT_TERMINAL,
+ /* .bTerminalID = DYNAMIC */
+- .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_UNDEFINED),
++ .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_SPEAKER),
+ .bAssocTerminal = 0,
+ /* .bSourceID = DYNAMIC */
+ /* .bCSourceID = DYNAMIC */
+@@ -281,6 +281,12 @@ static struct usb_endpoint_descriptor ss_ep_int_desc = {
+ .bInterval = 4,
+ };
+
++static struct usb_ss_ep_comp_descriptor ss_ep_int_desc_comp = {
++ .bLength = sizeof(ss_ep_int_desc_comp),
++ .bDescriptorType = USB_DT_SS_ENDPOINT_COMP,
++ .wBytesPerInterval = cpu_to_le16(6),
++};
++
+ /* Audio Streaming OUT Interface - Alt0 */
+ static struct usb_interface_descriptor std_as_out_if0_desc = {
+ .bLength = sizeof std_as_out_if0_desc,
+@@ -594,7 +600,8 @@ static struct usb_descriptor_header *ss_audio_desc[] = {
+ (struct usb_descriptor_header *)&in_feature_unit_desc,
+ (struct usb_descriptor_header *)&io_out_ot_desc,
+
+- (struct usb_descriptor_header *)&ss_ep_int_desc,
++ (struct usb_descriptor_header *)&ss_ep_int_desc,
++ (struct usb_descriptor_header *)&ss_ep_int_desc_comp,
+
+ (struct usb_descriptor_header *)&std_as_out_if0_desc,
+ (struct usb_descriptor_header *)&std_as_out_if1_desc,
+@@ -721,6 +728,7 @@ static void setup_headers(struct f_uac2_opts *opts,
+ struct usb_ss_ep_comp_descriptor *epout_desc_comp = NULL;
+ struct usb_ss_ep_comp_descriptor *epin_desc_comp = NULL;
+ struct usb_ss_ep_comp_descriptor *epin_fback_desc_comp = NULL;
++ struct usb_ss_ep_comp_descriptor *ep_int_desc_comp = NULL;
+ struct usb_endpoint_descriptor *epout_desc;
+ struct usb_endpoint_descriptor *epin_desc;
+ struct usb_endpoint_descriptor *epin_fback_desc;
+@@ -748,6 +756,7 @@ static void setup_headers(struct f_uac2_opts *opts,
+ epin_fback_desc = &ss_epin_fback_desc;
+ epin_fback_desc_comp = &ss_epin_fback_desc_comp;
+ ep_int_desc = &ss_ep_int_desc;
++ ep_int_desc_comp = &ss_ep_int_desc_comp;
+ }
+
+ i = 0;
+@@ -760,15 +769,15 @@ static void setup_headers(struct f_uac2_opts *opts,
+ headers[i++] = USBDHDR(&out_clk_src_desc);
+ headers[i++] = USBDHDR(&usb_out_it_desc);
+
+- if (FUOUT_EN(opts))
+- headers[i++] = USBDHDR(out_feature_unit_desc);
+- }
++ if (FUOUT_EN(opts))
++ headers[i++] = USBDHDR(out_feature_unit_desc);
++ }
+
+ if (EPIN_EN(opts)) {
+ headers[i++] = USBDHDR(&io_in_it_desc);
+
+- if (FUIN_EN(opts))
+- headers[i++] = USBDHDR(in_feature_unit_desc);
++ if (FUIN_EN(opts))
++ headers[i++] = USBDHDR(in_feature_unit_desc);
+
+ headers[i++] = USBDHDR(&usb_in_ot_desc);
+ }
+@@ -776,10 +785,13 @@ static void setup_headers(struct f_uac2_opts *opts,
+ if (EPOUT_EN(opts))
+ headers[i++] = USBDHDR(&io_out_ot_desc);
+
+- if (FUOUT_EN(opts) || FUIN_EN(opts))
+- headers[i++] = USBDHDR(ep_int_desc);
++ if (FUOUT_EN(opts) || FUIN_EN(opts)) {
++ headers[i++] = USBDHDR(ep_int_desc);
++ if (ep_int_desc_comp)
++ headers[i++] = USBDHDR(ep_int_desc_comp);
++ }
+
+- if (EPOUT_EN(opts)) {
++ if (EPOUT_EN(opts)) {
+ headers[i++] = USBDHDR(&std_as_out_if0_desc);
+ headers[i++] = USBDHDR(&std_as_out_if1_desc);
+ headers[i++] = USBDHDR(&as_out_hdr_desc);
+@@ -1057,6 +1069,7 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
+ }
+ std_as_out_if0_desc.bInterfaceNumber = ret;
+ std_as_out_if1_desc.bInterfaceNumber = ret;
++ std_as_out_if1_desc.bNumEndpoints = 1;
+ uac2->as_out_intf = ret;
+ uac2->as_out_alt = 0;
+
+diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
+index 9d87c0fb8f92e..5df1b68e5eacc 100644
+--- a/drivers/usb/gadget/function/f_uvc.c
++++ b/drivers/usb/gadget/function/f_uvc.c
+@@ -213,8 +213,9 @@ uvc_function_ep0_complete(struct usb_ep *ep, struct usb_request *req)
+
+ memset(&v4l2_event, 0, sizeof(v4l2_event));
+ v4l2_event.type = UVC_EVENT_DATA;
+- uvc_event->data.length = req->actual;
+- memcpy(&uvc_event->data.data, req->buf, req->actual);
++ uvc_event->data.length = min_t(unsigned int, req->actual,
++ sizeof(uvc_event->data.data));
++ memcpy(&uvc_event->data.data, req->buf, uvc_event->data.length);
+ v4l2_event_queue(&uvc->vdev, &v4l2_event);
+ }
+ }
+@@ -884,17 +885,42 @@ static void uvc_free(struct usb_function *f)
+ kfree(uvc);
+ }
+
+-static void uvc_unbind(struct usb_configuration *c, struct usb_function *f)
++static void uvc_function_unbind(struct usb_configuration *c,
++ struct usb_function *f)
+ {
+ struct usb_composite_dev *cdev = c->cdev;
+ struct uvc_device *uvc = to_uvc(f);
++ long wait_ret = 1;
+
+- uvcg_info(f, "%s\n", __func__);
++ uvcg_info(f, "%s()\n", __func__);
++
++ /* If we know we're connected via v4l2, then there should be a cleanup
++ * of the device from userspace either via UVC_EVENT_DISCONNECT or
++ * though the video device removal uevent. Allow some time for the
++ * application to close out before things get deleted.
++ */
++ if (uvc->func_connected) {
++ uvcg_dbg(f, "waiting for clean disconnect\n");
++ wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
++ uvc->func_connected == false, msecs_to_jiffies(500));
++ uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret);
++ }
+
+ device_remove_file(&uvc->vdev.dev, &dev_attr_function_name);
+ video_unregister_device(&uvc->vdev);
+ v4l2_device_unregister(&uvc->v4l2_dev);
+
++ if (uvc->func_connected) {
++ /* Wait for the release to occur to ensure there are no longer any
++ * pending operations that may cause panics when resources are cleaned
++ * up.
++ */
++ uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__);
++ wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
++ uvc->func_connected == false, msecs_to_jiffies(1000));
++ uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret);
++ }
++
+ usb_ep_free_request(cdev->gadget->ep0, uvc->control_req);
+ kfree(uvc->control_buf);
+
+@@ -913,6 +939,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi)
+
+ mutex_init(&uvc->video.mutex);
+ uvc->state = UVC_STATE_DISCONNECTED;
++ init_waitqueue_head(&uvc->func_connected_queue);
+ opts = fi_to_f_uvc_opts(fi);
+
+ mutex_lock(&opts->lock);
+@@ -943,7 +970,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi)
+ /* Register the function. */
+ uvc->func.name = "uvc";
+ uvc->func.bind = uvc_function_bind;
+- uvc->func.unbind = uvc_unbind;
++ uvc->func.unbind = uvc_function_unbind;
+ uvc->func.get_alt = uvc_function_get_alt;
+ uvc->func.set_alt = uvc_function_set_alt;
+ uvc->func.disable = uvc_function_disable;
+diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
+index 64de9f1b874c5..4150de96b937a 100644
+--- a/drivers/usb/gadget/function/rndis.c
++++ b/drivers/usb/gadget/function/rndis.c
+@@ -637,14 +637,18 @@ static int rndis_set_response(struct rndis_params *params,
+ rndis_set_cmplt_type *resp;
+ rndis_resp_t *r;
+
++ BufLength = le32_to_cpu(buf->InformationBufferLength);
++ BufOffset = le32_to_cpu(buf->InformationBufferOffset);
++ if ((BufLength > RNDIS_MAX_TOTAL_SIZE) ||
++ (BufOffset > RNDIS_MAX_TOTAL_SIZE) ||
++ (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE))
++ return -EINVAL;
++
+ r = rndis_add_response(params, sizeof(rndis_set_cmplt_type));
+ if (!r)
+ return -ENOMEM;
+ resp = (rndis_set_cmplt_type *)r->buf;
+
+- BufLength = le32_to_cpu(buf->InformationBufferLength);
+- BufOffset = le32_to_cpu(buf->InformationBufferOffset);
+-
+ #ifdef VERBOSE_DEBUG
+ pr_debug("%s: Length: %d\n", __func__, BufLength);
+ pr_debug("%s: Offset: %d\n", __func__, BufOffset);
+@@ -919,6 +923,7 @@ struct rndis_params *rndis_register(void (*resp_avail)(void *v), void *v)
+ params->resp_avail = resp_avail;
+ params->v = v;
+ INIT_LIST_HEAD(&params->resp_queue);
++ spin_lock_init(&params->resp_lock);
+ pr_debug("%s: configNr = %d\n", __func__, i);
+
+ return params;
+@@ -1012,12 +1017,14 @@ void rndis_free_response(struct rndis_params *params, u8 *buf)
+ {
+ rndis_resp_t *r, *n;
+
++ spin_lock(&params->resp_lock);
+ list_for_each_entry_safe(r, n, &params->resp_queue, list) {
+ if (r->buf == buf) {
+ list_del(&r->list);
+ kfree(r);
+ }
+ }
++ spin_unlock(&params->resp_lock);
+ }
+ EXPORT_SYMBOL_GPL(rndis_free_response);
+
+@@ -1027,14 +1034,17 @@ u8 *rndis_get_next_response(struct rndis_params *params, u32 *length)
+
+ if (!length) return NULL;
+
++ spin_lock(&params->resp_lock);
+ list_for_each_entry_safe(r, n, &params->resp_queue, list) {
+ if (!r->send) {
+ r->send = 1;
+ *length = r->length;
++ spin_unlock(&params->resp_lock);
+ return r->buf;
+ }
+ }
+
++ spin_unlock(&params->resp_lock);
+ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(rndis_get_next_response);
+@@ -1051,7 +1061,9 @@ static rndis_resp_t *rndis_add_response(struct rndis_params *params, u32 length)
+ r->length = length;
+ r->send = 0;
+
++ spin_lock(&params->resp_lock);
+ list_add_tail(&r->list, &params->resp_queue);
++ spin_unlock(&params->resp_lock);
+ return r;
+ }
+
+diff --git a/drivers/usb/gadget/function/rndis.h b/drivers/usb/gadget/function/rndis.h
+index f6167f7fea82b..6206b8b7490f6 100644
+--- a/drivers/usb/gadget/function/rndis.h
++++ b/drivers/usb/gadget/function/rndis.h
+@@ -174,6 +174,7 @@ typedef struct rndis_params {
+ void (*resp_avail)(void *v);
+ void *v;
+ struct list_head resp_queue;
++ spinlock_t resp_lock;
+ } rndis_params;
+
+ /* RNDIS Message parser and other useless functions */
+diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c
+index b859a158a4140..e122050eebaf1 100644
+--- a/drivers/usb/gadget/function/storage_common.c
++++ b/drivers/usb/gadget/function/storage_common.c
+@@ -294,8 +294,10 @@ EXPORT_SYMBOL_GPL(fsg_lun_fsync_sub);
+ void store_cdrom_address(u8 *dest, int msf, u32 addr)
+ {
+ if (msf) {
+- /* Convert to Minutes-Seconds-Frames */
+- addr >>= 2; /* Convert to 2048-byte frames */
++ /*
++ * Convert to Minutes-Seconds-Frames.
++ * Sector size is already set to 2048 bytes.
++ */
+ addr += 2*75; /* Lead-in occupies 2 seconds */
+ dest[3] = addr % 75; /* Frames */
+ addr /= 75;
+diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c
+index ad16163b5ff80..200eb788a74b3 100644
+--- a/drivers/usb/gadget/function/u_audio.c
++++ b/drivers/usb/gadget/function/u_audio.c
+@@ -1097,7 +1097,7 @@ int g_audio_setup(struct g_audio *g_audio, const char *pcm_name,
+ }
+
+ kctl->id.device = pcm->device;
+- kctl->id.subdevice = i;
++ kctl->id.subdevice = 0;
+
+ err = snd_ctl_add(card, kctl);
+ if (err < 0)
+@@ -1120,7 +1120,7 @@ int g_audio_setup(struct g_audio *g_audio, const char *pcm_name,
+ }
+
+ kctl->id.device = pcm->device;
+- kctl->id.subdevice = i;
++ kctl->id.subdevice = 0;
+
+
+ kctl->tlv.c = u_audio_volume_tlv;
+@@ -1174,7 +1174,7 @@ void g_audio_cleanup(struct g_audio *g_audio)
+ uac = g_audio->uac;
+ card = uac->card;
+ if (card)
+- snd_card_free(card);
++ snd_card_free_when_closed(card);
+
+ kfree(uac->p_prm.reqs);
+ kfree(uac->c_prm.reqs);
+diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
+index 85a3f6d4b5af3..116dbc2ae04dd 100644
+--- a/drivers/usb/gadget/function/u_ether.c
++++ b/drivers/usb/gadget/function/u_ether.c
+@@ -17,6 +17,8 @@
+ #include <linux/etherdevice.h>
+ #include <linux/ethtool.h>
+ #include <linux/if_vlan.h>
++#include <linux/etherdevice.h>
++#include <linux/string_helpers.h>
+
+ #include "u_ether.h"
+
+@@ -773,9 +775,13 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
+ dev->qmult = qmult;
+ snprintf(net->name, sizeof(net->name), "%s%%d", netname);
+
+- if (get_ether_addr(dev_addr, net->dev_addr))
++ if (get_ether_addr(dev_addr, net->dev_addr)) {
++ net->addr_assign_type = NET_ADDR_RANDOM;
+ dev_warn(&g->dev,
+ "using random %s ethernet address\n", "self");
++ } else {
++ net->addr_assign_type = NET_ADDR_SET;
++ }
+ if (get_ether_addr(host_addr, dev->host_mac))
+ dev_warn(&g->dev,
+ "using random %s ethernet address\n", "host");
+@@ -832,6 +838,9 @@ struct net_device *gether_setup_name_default(const char *netname)
+ INIT_LIST_HEAD(&dev->tx_reqs);
+ INIT_LIST_HEAD(&dev->rx_reqs);
+
++ /* by default we always have a random MAC address */
++ net->addr_assign_type = NET_ADDR_RANDOM;
++
+ skb_queue_head_init(&dev->rx_frames);
+
+ /* network device setup */
+@@ -861,19 +870,22 @@ int gether_register_netdev(struct net_device *net)
+ {
+ struct eth_dev *dev;
+ struct usb_gadget *g;
+- struct sockaddr sa;
+ int status;
+
+ if (!net->dev.parent)
+ return -EINVAL;
+ dev = netdev_priv(net);
+ g = dev->gadget;
++
++ eth_hw_addr_set(net, dev->dev_mac);
++
+ status = register_netdev(net);
+ if (status < 0) {
+ dev_dbg(&g->dev, "register_netdev failed, %d\n", status);
+ return status;
+ } else {
+ INFO(dev, "HOST MAC %pM\n", dev->host_mac);
++ INFO(dev, "MAC %pM\n", dev->dev_mac);
+
+ /* two kinds of host-initiated state changes:
+ * - iff DATA transfer is active, carrier is "on"
+@@ -881,15 +893,6 @@ int gether_register_netdev(struct net_device *net)
+ */
+ netif_carrier_off(net);
+ }
+- sa.sa_family = net->type;
+- memcpy(sa.sa_data, dev->dev_mac, ETH_ALEN);
+- rtnl_lock();
+- status = dev_set_mac_address(net, &sa, NULL);
+- rtnl_unlock();
+- if (status)
+- pr_warn("cannot set self ethernet address: %d\n", status);
+- else
+- INFO(dev, "MAC %pM\n", dev->dev_mac);
+
+ return status;
+ }
+@@ -914,6 +917,7 @@ int gether_set_dev_addr(struct net_device *net, const char *dev_addr)
+ if (get_ether_addr(dev_addr, new_addr))
+ return -EINVAL;
+ memcpy(dev->dev_mac, new_addr, ETH_ALEN);
++ net->addr_assign_type = NET_ADDR_SET;
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(gether_set_dev_addr);
+@@ -973,6 +977,8 @@ int gether_get_host_addr_cdc(struct net_device *net, char *host_addr, int len)
+ dev = netdev_priv(net);
+ snprintf(host_addr, len, "%pm", dev->host_mac);
+
++ string_upper(host_addr, host_addr);
++
+ return strlen(host_addr);
+ }
+ EXPORT_SYMBOL_GPL(gether_get_host_addr_cdc);
+diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
+index 6f68cbeeee7c0..f975dc03a1904 100644
+--- a/drivers/usb/gadget/function/u_serial.c
++++ b/drivers/usb/gadget/function/u_serial.c
+@@ -81,6 +81,9 @@
+ #define WRITE_BUF_SIZE 8192 /* TX only */
+ #define GS_CONSOLE_BUF_SIZE 8192
+
++/* Prevents race conditions while accessing gser->ioport */
++static DEFINE_SPINLOCK(serial_port_lock);
++
+ /* console info */
+ struct gs_console {
+ struct console console;
+@@ -912,8 +915,11 @@ static void __gs_console_push(struct gs_console *cons)
+ }
+
+ req->length = size;
++
++ spin_unlock_irq(&cons->lock);
+ if (usb_ep_queue(ep, req, GFP_ATOMIC))
+ req->length = 0;
++ spin_lock_irq(&cons->lock);
+ }
+
+ static void gs_console_work(struct work_struct *work)
+@@ -1374,8 +1380,10 @@ void gserial_disconnect(struct gserial *gser)
+ if (!port)
+ return;
+
++ spin_lock_irqsave(&serial_port_lock, flags);
++
+ /* tell the TTY glue not to do I/O here any more */
+- spin_lock_irqsave(&port->port_lock, flags);
++ spin_lock(&port->port_lock);
+
+ gs_console_disconnect(port);
+
+@@ -1390,7 +1398,8 @@ void gserial_disconnect(struct gserial *gser)
+ tty_hangup(port->port.tty);
+ }
+ port->suspended = false;
+- spin_unlock_irqrestore(&port->port_lock, flags);
++ spin_unlock(&port->port_lock);
++ spin_unlock_irqrestore(&serial_port_lock, flags);
+
+ /* disable endpoints, aborting down any active I/O */
+ usb_ep_disable(gser->out);
+@@ -1413,10 +1422,19 @@ EXPORT_SYMBOL_GPL(gserial_disconnect);
+
+ void gserial_suspend(struct gserial *gser)
+ {
+- struct gs_port *port = gser->ioport;
++ struct gs_port *port;
+ unsigned long flags;
+
+- spin_lock_irqsave(&port->port_lock, flags);
++ spin_lock_irqsave(&serial_port_lock, flags);
++ port = gser->ioport;
++
++ if (!port) {
++ spin_unlock_irqrestore(&serial_port_lock, flags);
++ return;
++ }
++
++ spin_lock(&port->port_lock);
++ spin_unlock(&serial_port_lock);
+ port->suspended = true;
+ spin_unlock_irqrestore(&port->port_lock, flags);
+ }
+@@ -1424,10 +1442,19 @@ EXPORT_SYMBOL_GPL(gserial_suspend);
+
+ void gserial_resume(struct gserial *gser)
+ {
+- struct gs_port *port = gser->ioport;
++ struct gs_port *port;
+ unsigned long flags;
+
+- spin_lock_irqsave(&port->port_lock, flags);
++ spin_lock_irqsave(&serial_port_lock, flags);
++ port = gser->ioport;
++
++ if (!port) {
++ spin_unlock_irqrestore(&serial_port_lock, flags);
++ return;
++ }
++
++ spin_lock(&port->port_lock);
++ spin_unlock(&serial_port_lock);
+ port->suspended = false;
+ if (!port->start_delayed) {
+ spin_unlock_irqrestore(&port->port_lock, flags);
+diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
+index 255a61bd6a6a8..d1a4ef74742b7 100644
+--- a/drivers/usb/gadget/function/uvc.h
++++ b/drivers/usb/gadget/function/uvc.h
+@@ -14,6 +14,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/usb/composite.h>
+ #include <linux/videodev2.h>
++#include <linux/wait.h>
+
+ #include <media/v4l2-device.h>
+ #include <media/v4l2-dev.h>
+@@ -68,6 +69,8 @@ extern unsigned int uvc_gadget_trace_param;
+ #define UVC_MAX_REQUEST_SIZE 64
+ #define UVC_MAX_EVENTS 4
+
++#define UVCG_REQUEST_HEADER_LEN 2
++
+ /* ------------------------------------------------------------------------
+ * Structures
+ */
+@@ -76,7 +79,8 @@ struct uvc_request {
+ u8 *req_buffer;
+ struct uvc_video *video;
+ struct sg_table sgt;
+- u8 header[2];
++ u8 header[UVCG_REQUEST_HEADER_LEN];
++ struct uvc_buffer *last_buf;
+ };
+
+ struct uvc_video {
+@@ -126,6 +130,8 @@ struct uvc_device {
+ enum uvc_state state;
+ struct usb_function func;
+ struct uvc_video video;
++ bool func_connected;
++ wait_queue_head_t func_connected_queue;
+
+ /* Descriptors */
+ struct {
+@@ -156,6 +162,7 @@ static inline struct uvc_device *to_uvc(struct usb_function *f)
+ struct uvc_file_handle {
+ struct v4l2_fh vfh;
+ struct uvc_video *device;
++ bool is_uvc_app_handle;
+ };
+
+ #define to_uvc_file_handle(handle) \
+diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
+index 77d64031aa9c2..b553dca9246e2 100644
+--- a/drivers/usb/gadget/function/uvc_configfs.c
++++ b/drivers/usb/gadget/function/uvc_configfs.c
+@@ -505,11 +505,68 @@ UVC_ATTR_RO(uvcg_default_output_, cname, aname)
+ UVCG_DEFAULT_OUTPUT_ATTR(b_terminal_id, bTerminalID, 8);
+ UVCG_DEFAULT_OUTPUT_ATTR(w_terminal_type, wTerminalType, 16);
+ UVCG_DEFAULT_OUTPUT_ATTR(b_assoc_terminal, bAssocTerminal, 8);
+-UVCG_DEFAULT_OUTPUT_ATTR(b_source_id, bSourceID, 8);
+ UVCG_DEFAULT_OUTPUT_ATTR(i_terminal, iTerminal, 8);
+
+ #undef UVCG_DEFAULT_OUTPUT_ATTR
+
++static ssize_t uvcg_default_output_b_source_id_show(struct config_item *item,
++ char *page)
++{
++ struct config_group *group = to_config_group(item);
++ struct f_uvc_opts *opts;
++ struct config_item *opts_item;
++ struct mutex *su_mutex = &group->cg_subsys->su_mutex;
++ struct uvc_output_terminal_descriptor *cd;
++ int result;
++
++ mutex_lock(su_mutex); /* for navigating configfs hierarchy */
++
++ opts_item = group->cg_item.ci_parent->ci_parent->
++ ci_parent->ci_parent;
++ opts = to_f_uvc_opts(opts_item);
++ cd = &opts->uvc_output_terminal;
++
++ mutex_lock(&opts->lock);
++ result = sprintf(page, "%u\n", le8_to_cpu(cd->bSourceID));
++ mutex_unlock(&opts->lock);
++
++ mutex_unlock(su_mutex);
++
++ return result;
++}
++
++static ssize_t uvcg_default_output_b_source_id_store(struct config_item *item,
++ const char *page, size_t len)
++{
++ struct config_group *group = to_config_group(item);
++ struct f_uvc_opts *opts;
++ struct config_item *opts_item;
++ struct mutex *su_mutex = &group->cg_subsys->su_mutex;
++ struct uvc_output_terminal_descriptor *cd;
++ int result;
++ u8 num;
++
++ result = kstrtou8(page, 0, &num);
++ if (result)
++ return result;
++
++ mutex_lock(su_mutex); /* for navigating configfs hierarchy */
++
++ opts_item = group->cg_item.ci_parent->ci_parent->
++ ci_parent->ci_parent;
++ opts = to_f_uvc_opts(opts_item);
++ cd = &opts->uvc_output_terminal;
++
++ mutex_lock(&opts->lock);
++ cd->bSourceID = num;
++ mutex_unlock(&opts->lock);
++
++ mutex_unlock(su_mutex);
++
++ return len;
++}
++UVC_ATTR(uvcg_default_output_, b_source_id, bSourceID);
++
+ static struct configfs_attribute *uvcg_default_output_attrs[] = {
+ &uvcg_default_output_attr_b_terminal_id,
+ &uvcg_default_output_attr_w_terminal_type,
+diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
+index 7d00ad7c154c2..0cc8422afe4e2 100644
+--- a/drivers/usb/gadget/function/uvc_queue.c
++++ b/drivers/usb/gadget/function/uvc_queue.c
+@@ -44,7 +44,8 @@ static int uvc_queue_setup(struct vb2_queue *vq,
+ {
+ struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
+ struct uvc_video *video = container_of(queue, struct uvc_video, queue);
+- struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
++ unsigned int req_size;
++ unsigned int nreq;
+
+ if (*nbuffers > UVC_MAX_VIDEO_BUFFERS)
+ *nbuffers = UVC_MAX_VIDEO_BUFFERS;
+@@ -53,10 +54,16 @@ static int uvc_queue_setup(struct vb2_queue *vq,
+
+ sizes[0] = video->imagesize;
+
+- if (cdev->gadget->speed < USB_SPEED_SUPER)
+- video->uvc_num_requests = 4;
+- else
+- video->uvc_num_requests = 64;
++ req_size = video->ep->maxpacket
++ * max_t(unsigned int, video->ep->maxburst, 1)
++ * (video->ep->mult);
++
++ /* We divide by two, to increase the chance to run
++ * into fewer requests for smaller framesizes.
++ */
++ nreq = DIV_ROUND_UP(DIV_ROUND_UP(sizes[0], 2), req_size);
++ nreq = clamp(nreq, 4U, 64U);
++ video->uvc_num_requests = nreq;
+
+ return 0;
+ }
+@@ -264,6 +271,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect)
+ buf->state = UVC_BUF_STATE_ERROR;
+ vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR);
+ }
++ queue->buf_used = 0;
++
+ /* This must be protected by the irqlock spinlock to avoid race
+ * conditions between uvc_queue_buffer and the disconnection event that
+ * could result in an interruptible wait in uvc_dequeue_buffer. Do not
+@@ -304,6 +313,7 @@ int uvcg_queue_enable(struct uvc_video_queue *queue, int enable)
+
+ queue->sequence = 0;
+ queue->buf_used = 0;
++ queue->flags &= ~UVC_QUEUE_DROP_INCOMPLETE;
+ } else {
+ ret = vb2_streamoff(&queue->queue, queue->queue.type);
+ if (ret < 0)
+@@ -326,33 +336,23 @@ int uvcg_queue_enable(struct uvc_video_queue *queue, int enable)
+ }
+
+ /* called with &queue_irqlock held.. */
+-struct uvc_buffer *uvcg_queue_next_buffer(struct uvc_video_queue *queue,
++void uvcg_complete_buffer(struct uvc_video_queue *queue,
+ struct uvc_buffer *buf)
+ {
+- struct uvc_buffer *nextbuf;
+-
+- if ((queue->flags & UVC_QUEUE_DROP_INCOMPLETE) &&
+- buf->length != buf->bytesused) {
+- buf->state = UVC_BUF_STATE_QUEUED;
++ if (queue->flags & UVC_QUEUE_DROP_INCOMPLETE) {
++ queue->flags &= ~UVC_QUEUE_DROP_INCOMPLETE;
++ buf->state = UVC_BUF_STATE_ERROR;
+ vb2_set_plane_payload(&buf->buf.vb2_buf, 0, 0);
+- return buf;
++ vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR);
++ return;
+ }
+
+- list_del(&buf->queue);
+- if (!list_empty(&queue->irqqueue))
+- nextbuf = list_first_entry(&queue->irqqueue, struct uvc_buffer,
+- queue);
+- else
+- nextbuf = NULL;
+-
+ buf->buf.field = V4L2_FIELD_NONE;
+ buf->buf.sequence = queue->sequence++;
+ buf->buf.vb2_buf.timestamp = ktime_get_ns();
+
+ vb2_set_plane_payload(&buf->buf.vb2_buf, 0, buf->bytesused);
+ vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_DONE);
+-
+- return nextbuf;
+ }
+
+ struct uvc_buffer *uvcg_queue_head(struct uvc_video_queue *queue)
+diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h
+index 05360a0767f61..b668927b5d2c4 100644
+--- a/drivers/usb/gadget/function/uvc_queue.h
++++ b/drivers/usb/gadget/function/uvc_queue.h
+@@ -93,7 +93,7 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect);
+
+ int uvcg_queue_enable(struct uvc_video_queue *queue, int enable);
+
+-struct uvc_buffer *uvcg_queue_next_buffer(struct uvc_video_queue *queue,
++void uvcg_complete_buffer(struct uvc_video_queue *queue,
+ struct uvc_buffer *buf);
+
+ struct uvc_buffer *uvcg_queue_head(struct uvc_video_queue *queue);
+diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
+index 4ca89eab61590..65abd55ce2348 100644
+--- a/drivers/usb/gadget/function/uvc_v4l2.c
++++ b/drivers/usb/gadget/function/uvc_v4l2.c
+@@ -227,17 +227,56 @@ static int
+ uvc_v4l2_subscribe_event(struct v4l2_fh *fh,
+ const struct v4l2_event_subscription *sub)
+ {
++ struct uvc_device *uvc = video_get_drvdata(fh->vdev);
++ struct uvc_file_handle *handle = to_uvc_file_handle(fh);
++ int ret;
++
+ if (sub->type < UVC_EVENT_FIRST || sub->type > UVC_EVENT_LAST)
+ return -EINVAL;
+
+- return v4l2_event_subscribe(fh, sub, 2, NULL);
++ if (sub->type == UVC_EVENT_SETUP && uvc->func_connected)
++ return -EBUSY;
++
++ ret = v4l2_event_subscribe(fh, sub, 2, NULL);
++ if (ret < 0)
++ return ret;
++
++ if (sub->type == UVC_EVENT_SETUP) {
++ uvc->func_connected = true;
++ handle->is_uvc_app_handle = true;
++ uvc_function_connect(uvc);
++ }
++
++ return 0;
++}
++
++static void uvc_v4l2_disable(struct uvc_device *uvc)
++{
++ uvc_function_disconnect(uvc);
++ uvcg_video_enable(&uvc->video, 0);
++ uvcg_free_buffers(&uvc->video.queue);
++ uvc->func_connected = false;
++ wake_up_interruptible(&uvc->func_connected_queue);
+ }
+
+ static int
+ uvc_v4l2_unsubscribe_event(struct v4l2_fh *fh,
+ const struct v4l2_event_subscription *sub)
+ {
+- return v4l2_event_unsubscribe(fh, sub);
++ struct uvc_device *uvc = video_get_drvdata(fh->vdev);
++ struct uvc_file_handle *handle = to_uvc_file_handle(fh);
++ int ret;
++
++ ret = v4l2_event_unsubscribe(fh, sub);
++ if (ret < 0)
++ return ret;
++
++ if (sub->type == UVC_EVENT_SETUP && handle->is_uvc_app_handle) {
++ uvc_v4l2_disable(uvc);
++ handle->is_uvc_app_handle = false;
++ }
++
++ return 0;
+ }
+
+ static long
+@@ -292,7 +331,6 @@ uvc_v4l2_open(struct file *file)
+ handle->device = &uvc->video;
+ file->private_data = &handle->vfh;
+
+- uvc_function_connect(uvc);
+ return 0;
+ }
+
+@@ -304,11 +342,9 @@ uvc_v4l2_release(struct file *file)
+ struct uvc_file_handle *handle = to_uvc_file_handle(file->private_data);
+ struct uvc_video *video = handle->device;
+
+- uvc_function_disconnect(uvc);
+-
+ mutex_lock(&video->mutex);
+- uvcg_video_enable(video, 0);
+- uvcg_free_buffers(&video->queue);
++ if (handle->is_uvc_app_handle)
++ uvc_v4l2_disable(uvc);
+ mutex_unlock(&video->mutex);
+
+ file->private_data = NULL;
+diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
+index b4a763e5f70e1..0de7f11d14256 100644
+--- a/drivers/usb/gadget/function/uvc_video.c
++++ b/drivers/usb/gadget/function/uvc_video.c
+@@ -33,7 +33,7 @@ uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
+ if (buf->bytesused - video->queue.buf_used <= len - UVCG_REQUEST_HEADER_LEN)
+ data[1] |= UVC_STREAM_EOF;
+
+- return 2;
++ return UVCG_REQUEST_HEADER_LEN;
+ }
+
+ static int
+@@ -59,6 +59,7 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video,
+ struct uvc_buffer *buf)
+ {
+ void *mem = req->buf;
++ struct uvc_request *ureq = req->context;
+ int len = video->req_size;
+ int ret;
+
+@@ -83,13 +84,15 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video,
+ if (buf->bytesused == video->queue.buf_used) {
+ video->queue.buf_used = 0;
+ buf->state = UVC_BUF_STATE_DONE;
+- uvcg_queue_next_buffer(&video->queue, buf);
++ list_del(&buf->queue);
+ video->fid ^= UVC_STREAM_FID;
++ ureq->last_buf = buf;
+
+ video->payload_size = 0;
+ }
+
+ if (video->payload_size == video->max_payload_size ||
++ video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE ||
+ buf->bytesused == video->queue.buf_used)
+ video->payload_size = 0;
+ }
+@@ -104,31 +107,31 @@ uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video,
+ unsigned int len = video->req_size;
+ unsigned int sg_left, part = 0;
+ unsigned int i;
+- int ret;
++ int header_len;
+
+ sg = ureq->sgt.sgl;
+ sg_init_table(sg, ureq->sgt.nents);
+
+ /* Init the header. */
+- ret = uvc_video_encode_header(video, buf, ureq->header,
++ header_len = uvc_video_encode_header(video, buf, ureq->header,
+ video->req_size);
+- sg_set_buf(sg, ureq->header, UVCG_REQUEST_HEADER_LEN);
+- len -= ret;
++ sg_set_buf(sg, ureq->header, header_len);
++ len -= header_len;
+
+ if (pending <= len)
+ len = pending;
+
+ req->length = (len == pending) ?
+- len + UVCG_REQUEST_HEADER_LEN : video->req_size;
++ len + header_len : video->req_size;
+
+ /* Init the pending sgs with payload */
+ sg = sg_next(sg);
+
+ for_each_sg(sg, iter, ureq->sgt.nents - 1, i) {
+- if (!len || !buf->sg)
++ if (!len || !buf->sg || !buf->sg->length)
+ break;
+
+- sg_left = sg_dma_len(buf->sg) - buf->offset;
++ sg_left = buf->sg->length - buf->offset;
+ part = min_t(unsigned int, len, sg_left);
+
+ sg_set_page(iter, sg_page(buf->sg), part, buf->offset);
+@@ -148,14 +151,16 @@ uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video,
+ req->num_sgs = i + 1;
+
+ req->length -= len;
+- video->queue.buf_used += req->length - UVCG_REQUEST_HEADER_LEN;
++ video->queue.buf_used += req->length - header_len;
+
+- if (buf->bytesused == video->queue.buf_used || !buf->sg) {
++ if (buf->bytesused == video->queue.buf_used || !buf->sg ||
++ video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) {
+ video->queue.buf_used = 0;
+ buf->state = UVC_BUF_STATE_DONE;
+ buf->offset = 0;
+- uvcg_queue_next_buffer(&video->queue, buf);
++ list_del(&buf->queue);
+ video->fid ^= UVC_STREAM_FID;
++ ureq->last_buf = buf;
+ }
+ }
+
+@@ -164,6 +169,7 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
+ struct uvc_buffer *buf)
+ {
+ void *mem = req->buf;
++ struct uvc_request *ureq = req->context;
+ int len = video->req_size;
+ int ret;
+
+@@ -178,11 +184,13 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
+
+ req->length = video->req_size - len;
+
+- if (buf->bytesused == video->queue.buf_used) {
++ if (buf->bytesused == video->queue.buf_used ||
++ video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) {
+ video->queue.buf_used = 0;
+ buf->state = UVC_BUF_STATE_DONE;
+- uvcg_queue_next_buffer(&video->queue, buf);
++ list_del(&buf->queue);
+ video->fid ^= UVC_STREAM_FID;
++ ureq->last_buf = buf;
+ }
+ }
+
+@@ -219,18 +227,28 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
+ case 0:
+ break;
+
++ case -EXDEV:
++ uvcg_dbg(&video->uvc->func, "VS request missed xfer.\n");
++ queue->flags |= UVC_QUEUE_DROP_INCOMPLETE;
++ break;
++
+ case -ESHUTDOWN: /* disconnect from host. */
+ uvcg_dbg(&video->uvc->func, "VS request cancelled.\n");
+ uvcg_queue_cancel(queue, 1);
+ break;
+
+ default:
+- uvcg_info(&video->uvc->func,
++ uvcg_warn(&video->uvc->func,
+ "VS request completed with status %d.\n",
+ req->status);
+ uvcg_queue_cancel(queue, 0);
+ }
+
++ if (ureq->last_buf) {
++ uvcg_complete_buffer(&video->queue, ureq->last_buf);
++ ureq->last_buf = NULL;
++ }
++
+ spin_lock_irqsave(&video->req_lock, flags);
+ list_add_tail(&req->list, &video->req_free);
+ spin_unlock_irqrestore(&video->req_lock, flags);
+@@ -298,12 +316,13 @@ uvc_video_alloc_requests(struct uvc_video *video)
+ video->ureq[i].req->complete = uvc_video_complete;
+ video->ureq[i].req->context = &video->ureq[i];
+ video->ureq[i].video = video;
++ video->ureq[i].last_buf = NULL;
+
+ list_add_tail(&video->ureq[i].req->list, &video->req_free);
+ /* req_size/PAGE_SIZE + 1 for overruns and + 1 for header */
+ sg_alloc_table(&video->ureq[i].sgt,
+- DIV_ROUND_UP(req_size - 2, PAGE_SIZE) + 2,
+- GFP_KERNEL);
++ DIV_ROUND_UP(req_size - UVCG_REQUEST_HEADER_LEN,
++ PAGE_SIZE) + 2, GFP_KERNEL);
+ }
+
+ video->req_size = req_size;
+diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
+index 9bf19475f6f9a..03adeefa343b7 100644
+--- a/drivers/usb/gadget/function/uvc_video.h
++++ b/drivers/usb/gadget/function/uvc_video.h
+@@ -12,8 +12,6 @@
+ #ifndef __UVC_VIDEO_H__
+ #define __UVC_VIDEO_H__
+
+-#define UVCG_REQUEST_HEADER_LEN 2
+-
+ struct uvc_video;
+
+ int uvcg_video_enable(struct uvc_video *video, int enable);
+diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c
+index e1d566c9918ae..6bcbad3825802 100644
+--- a/drivers/usb/gadget/legacy/dbgp.c
++++ b/drivers/usb/gadget/legacy/dbgp.c
+@@ -137,7 +137,7 @@ static int dbgp_enable_ep_req(struct usb_ep *ep)
+ goto fail_1;
+ }
+
+- req->buf = kmalloc(DBGP_REQ_LEN, GFP_KERNEL);
++ req->buf = kzalloc(DBGP_REQ_LEN, GFP_KERNEL);
+ if (!req->buf) {
+ err = -ENOMEM;
+ stp = 2;
+@@ -345,6 +345,19 @@ static int dbgp_setup(struct usb_gadget *gadget,
+ void *data = NULL;
+ u16 len = 0;
+
++ if (length > DBGP_REQ_LEN) {
++ if (ctrl->bRequestType & USB_DIR_IN) {
++ /* Cast away the const, we are going to overwrite on purpose. */
++ __le16 *temp = (__le16 *)&ctrl->wLength;
++
++ *temp = cpu_to_le16(DBGP_REQ_LEN);
++ length = DBGP_REQ_LEN;
++ } else {
++ return err;
++ }
++ }
++
++
+ if (request == USB_REQ_GET_DESCRIPTOR) {
+ switch (value>>8) {
+ case USB_DT_DEVICE:
+diff --git a/drivers/usb/gadget/legacy/hid.c b/drivers/usb/gadget/legacy/hid.c
+index 5b27d289443fe..3912cc805f3af 100644
+--- a/drivers/usb/gadget/legacy/hid.c
++++ b/drivers/usb/gadget/legacy/hid.c
+@@ -99,8 +99,10 @@ static int do_config(struct usb_configuration *c)
+
+ list_for_each_entry(e, &hidg_func_list, node) {
+ e->f = usb_get_function(e->fi);
+- if (IS_ERR(e->f))
++ if (IS_ERR(e->f)) {
++ status = PTR_ERR(e->f);
+ goto put;
++ }
+ status = usb_add_function(c, e->f);
+ if (status < 0) {
+ usb_put_function(e->f);
+diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
+index 539220d7f5b62..ed28aaa82e251 100644
+--- a/drivers/usb/gadget/legacy/inode.c
++++ b/drivers/usb/gadget/legacy/inode.c
+@@ -110,6 +110,8 @@ enum ep0_state {
+ /* enough for the whole queue: most events invalidate others */
+ #define N_EVENT 5
+
++#define RBUF_SIZE 256
++
+ struct dev_data {
+ spinlock_t lock;
+ refcount_t count;
+@@ -144,7 +146,7 @@ struct dev_data {
+ struct dentry *dentry;
+
+ /* except this scratch i/o buffer for ep0 */
+- u8 rbuf [256];
++ u8 rbuf[RBUF_SIZE];
+ };
+
+ static inline void get_dev (struct dev_data *data)
+@@ -227,6 +229,7 @@ static void put_ep (struct ep_data *data)
+ */
+
+ static const char *CHIP;
++static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */
+
+ /*----------------------------------------------------------------------*/
+
+@@ -360,6 +363,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
+ spin_unlock_irq (&epdata->dev->lock);
+
+ DBG (epdata->dev, "endpoint gone\n");
++ wait_for_completion(&done);
+ epdata->status = -ENODEV;
+ }
+ }
+@@ -1334,6 +1338,18 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
+ u16 w_value = le16_to_cpu(ctrl->wValue);
+ u16 w_length = le16_to_cpu(ctrl->wLength);
+
++ if (w_length > RBUF_SIZE) {
++ if (ctrl->bRequestType & USB_DIR_IN) {
++ /* Cast away the const, we are going to overwrite on purpose. */
++ __le16 *temp = (__le16 *)&ctrl->wLength;
++
++ *temp = cpu_to_le16(RBUF_SIZE);
++ w_length = RBUF_SIZE;
++ } else {
++ return value;
++ }
++ }
++
+ spin_lock (&dev->lock);
+ dev->setup_abort = 0;
+ if (dev->state == STATE_DEV_UNCONNECTED) {
+@@ -1815,8 +1831,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
+ spin_lock_irq (&dev->lock);
+ value = -EINVAL;
+ if (dev->buf) {
++ spin_unlock_irq(&dev->lock);
+ kfree(kbuf);
+- goto fail;
++ return value;
+ }
+ dev->buf = kbuf;
+
+@@ -1863,8 +1880,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
+
+ value = usb_gadget_probe_driver(&gadgetfs_driver);
+ if (value != 0) {
+- kfree (dev->buf);
+- dev->buf = NULL;
++ spin_lock_irq(&dev->lock);
++ goto fail;
+ } else {
+ /* at this point "good" hardware has for the first time
+ * let the USB the host see us. alternatively, if users
+@@ -1881,6 +1898,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
+ return value;
+
+ fail:
++ dev->config = NULL;
++ dev->hs_config = NULL;
++ dev->dev = NULL;
+ spin_unlock_irq (&dev->lock);
+ pr_debug ("%s: %s fail %zd, %p\n", shortname, __func__, value, dev);
+ kfree (dev->buf);
+@@ -1994,13 +2014,20 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc)
+ {
+ struct inode *inode;
+ struct dev_data *dev;
++ int rc;
+
+- if (the_device)
+- return -ESRCH;
++ mutex_lock(&sb_mutex);
++
++ if (the_device) {
++ rc = -ESRCH;
++ goto Done;
++ }
+
+ CHIP = usb_get_gadget_udc_name();
+- if (!CHIP)
+- return -ENODEV;
++ if (!CHIP) {
++ rc = -ENODEV;
++ goto Done;
++ }
+
+ /* superblock */
+ sb->s_blocksize = PAGE_SIZE;
+@@ -2037,13 +2064,17 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc)
+ * from binding to a controller.
+ */
+ the_device = dev;
+- return 0;
++ rc = 0;
++ goto Done;
+
+-Enomem:
++ Enomem:
+ kfree(CHIP);
+ CHIP = NULL;
++ rc = -ENOMEM;
+
+- return -ENOMEM;
++ Done:
++ mutex_unlock(&sb_mutex);
++ return rc;
+ }
+
+ /* "mount -t gadgetfs path /dev/gadget" ends up here */
+@@ -2065,6 +2096,7 @@ static int gadgetfs_init_fs_context(struct fs_context *fc)
+ static void
+ gadgetfs_kill_sb (struct super_block *sb)
+ {
++ mutex_lock(&sb_mutex);
+ kill_litter_super (sb);
+ if (the_device) {
+ put_dev (the_device);
+@@ -2072,6 +2104,7 @@ gadgetfs_kill_sb (struct super_block *sb)
+ }
+ kfree(CHIP);
+ CHIP = NULL;
++ mutex_unlock(&sb_mutex);
+ }
+
+ /*----------------------------------------------------------------------*/
+diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c
+index c5a2c734234a5..9d13f22743986 100644
+--- a/drivers/usb/gadget/legacy/raw_gadget.c
++++ b/drivers/usb/gadget/legacy/raw_gadget.c
+@@ -11,6 +11,7 @@
+ #include <linux/ctype.h>
+ #include <linux/debugfs.h>
+ #include <linux/delay.h>
++#include <linux/idr.h>
+ #include <linux/kref.h>
+ #include <linux/miscdevice.h>
+ #include <linux/module.h>
+@@ -36,6 +37,9 @@ MODULE_LICENSE("GPL");
+
+ /*----------------------------------------------------------------------*/
+
++static DEFINE_IDA(driver_id_numbers);
++#define DRIVER_DRIVER_NAME_LENGTH_MAX 32
++
+ #define RAW_EVENT_QUEUE_SIZE 16
+
+ struct raw_event_queue {
+@@ -145,6 +149,7 @@ enum dev_state {
+ STATE_DEV_INVALID = 0,
+ STATE_DEV_OPENED,
+ STATE_DEV_INITIALIZED,
++ STATE_DEV_REGISTERING,
+ STATE_DEV_RUNNING,
+ STATE_DEV_CLOSED,
+ STATE_DEV_FAILED
+@@ -160,6 +165,9 @@ struct raw_dev {
+ /* Reference to misc device: */
+ struct device *dev;
+
++ /* Make driver names unique */
++ int driver_id_number;
++
+ /* Protected by lock: */
+ enum dev_state state;
+ bool gadget_registered;
+@@ -188,6 +196,7 @@ static struct raw_dev *dev_new(void)
+ spin_lock_init(&dev->lock);
+ init_completion(&dev->ep0_done);
+ raw_event_queue_init(&dev->queue);
++ dev->driver_id_number = -1;
+ return dev;
+ }
+
+@@ -198,6 +207,9 @@ static void dev_free(struct kref *kref)
+
+ kfree(dev->udc_name);
+ kfree(dev->driver.udc_name);
++ kfree(dev->driver.driver.name);
++ if (dev->driver_id_number >= 0)
++ ida_free(&driver_id_numbers, dev->driver_id_number);
+ if (dev->req) {
+ if (dev->ep0_urb_queued)
+ usb_ep_dequeue(dev->gadget->ep0, dev->req);
+@@ -298,13 +310,15 @@ static int gadget_bind(struct usb_gadget *gadget,
+ dev->eps_num = i;
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+- /* Matches kref_put() in gadget_unbind(). */
+- kref_get(&dev->count);
+-
+ ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL);
+- if (ret < 0)
++ if (ret < 0) {
+ dev_err(&gadget->dev, "failed to queue event\n");
++ set_gadget_data(gadget, NULL);
++ return ret;
++ }
+
++ /* Matches kref_put() in gadget_unbind(). */
++ kref_get(&dev->count);
+ return ret;
+ }
+
+@@ -418,9 +432,11 @@ out_put:
+ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
+ {
+ int ret = 0;
++ int driver_id_number;
+ struct usb_raw_init arg;
+ char *udc_driver_name;
+ char *udc_device_name;
++ char *driver_driver_name;
+ unsigned long flags;
+
+ if (copy_from_user(&arg, (void __user *)value, sizeof(arg)))
+@@ -439,36 +455,43 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
+ return -EINVAL;
+ }
+
++ driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL);
++ if (driver_id_number < 0)
++ return driver_id_number;
++
++ driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL);
++ if (!driver_driver_name) {
++ ret = -ENOMEM;
++ goto out_free_driver_id_number;
++ }
++ snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX,
++ DRIVER_NAME ".%d", driver_id_number);
++
+ udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL);
+- if (!udc_driver_name)
+- return -ENOMEM;
++ if (!udc_driver_name) {
++ ret = -ENOMEM;
++ goto out_free_driver_driver_name;
++ }
+ ret = strscpy(udc_driver_name, &arg.driver_name[0],
+ UDC_NAME_LENGTH_MAX);
+- if (ret < 0) {
+- kfree(udc_driver_name);
+- return ret;
+- }
++ if (ret < 0)
++ goto out_free_udc_driver_name;
+ ret = 0;
+
+ udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL);
+ if (!udc_device_name) {
+- kfree(udc_driver_name);
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto out_free_udc_driver_name;
+ }
+ ret = strscpy(udc_device_name, &arg.device_name[0],
+ UDC_NAME_LENGTH_MAX);
+- if (ret < 0) {
+- kfree(udc_driver_name);
+- kfree(udc_device_name);
+- return ret;
+- }
++ if (ret < 0)
++ goto out_free_udc_device_name;
+ ret = 0;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ if (dev->state != STATE_DEV_OPENED) {
+ dev_dbg(dev->dev, "fail, device is not opened\n");
+- kfree(udc_driver_name);
+- kfree(udc_device_name);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+@@ -483,14 +506,25 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
+ dev->driver.suspend = gadget_suspend;
+ dev->driver.resume = gadget_resume;
+ dev->driver.reset = gadget_reset;
+- dev->driver.driver.name = DRIVER_NAME;
++ dev->driver.driver.name = driver_driver_name;
+ dev->driver.udc_name = udc_device_name;
+ dev->driver.match_existing_only = 1;
++ dev->driver_id_number = driver_id_number;
+
+ dev->state = STATE_DEV_INITIALIZED;
++ spin_unlock_irqrestore(&dev->lock, flags);
++ return ret;
+
+ out_unlock:
+ spin_unlock_irqrestore(&dev->lock, flags);
++out_free_udc_device_name:
++ kfree(udc_device_name);
++out_free_udc_driver_name:
++ kfree(udc_driver_name);
++out_free_driver_driver_name:
++ kfree(driver_driver_name);
++out_free_driver_id_number:
++ ida_free(&driver_id_numbers, driver_id_number);
+ return ret;
+ }
+
+@@ -508,6 +542,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value)
+ ret = -EINVAL;
+ goto out_unlock;
+ }
++ dev->state = STATE_DEV_REGISTERING;
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ ret = usb_gadget_probe_driver(&dev->driver);
+@@ -1004,7 +1039,7 @@ static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io,
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+- if ((in && !ep->ep->caps.dir_in) || (!in && ep->ep->caps.dir_in)) {
++ if (in != usb_endpoint_dir_in(ep->ep->desc)) {
+ dev_dbg(&dev->gadget->dev, "fail, wrong direction\n");
+ ret = -EINVAL;
+ goto out_unlock;
+diff --git a/drivers/usb/gadget/legacy/webcam.c b/drivers/usb/gadget/legacy/webcam.c
+index 94e22867da1d0..e9b5846b2322c 100644
+--- a/drivers/usb/gadget/legacy/webcam.c
++++ b/drivers/usb/gadget/legacy/webcam.c
+@@ -293,6 +293,7 @@ static const struct uvc_descriptor_header * const uvc_fs_streaming_cls[] = {
+ (const struct uvc_descriptor_header *) &uvc_format_yuv,
+ (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p,
+ (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p,
++ (const struct uvc_descriptor_header *) &uvc_color_matching,
+ (const struct uvc_descriptor_header *) &uvc_format_mjpg,
+ (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p,
+ (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p,
+@@ -305,6 +306,7 @@ static const struct uvc_descriptor_header * const uvc_hs_streaming_cls[] = {
+ (const struct uvc_descriptor_header *) &uvc_format_yuv,
+ (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p,
+ (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p,
++ (const struct uvc_descriptor_header *) &uvc_color_matching,
+ (const struct uvc_descriptor_header *) &uvc_format_mjpg,
+ (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p,
+ (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p,
+@@ -317,6 +319,7 @@ static const struct uvc_descriptor_header * const uvc_ss_streaming_cls[] = {
+ (const struct uvc_descriptor_header *) &uvc_format_yuv,
+ (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p,
+ (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p,
++ (const struct uvc_descriptor_header *) &uvc_color_matching,
+ (const struct uvc_descriptor_header *) &uvc_format_mjpg,
+ (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p,
+ (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p,
+diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
+index 8c614bb86c665..2cdd37be165a4 100644
+--- a/drivers/usb/gadget/udc/Kconfig
++++ b/drivers/usb/gadget/udc/Kconfig
+@@ -311,7 +311,7 @@ source "drivers/usb/gadget/udc/bdc/Kconfig"
+
+ config USB_AMD5536UDC
+ tristate "AMD5536 UDC"
+- depends on USB_PCI
++ depends on USB_PCI && HAS_DMA
+ select USB_SNP_CORE
+ help
+ The AMD5536 UDC is part of the AMD Geode CS5536, an x86 southbridge.
+@@ -330,6 +330,7 @@ config USB_AMD5536UDC
+ config USB_FSL_QE
+ tristate "Freescale QE/CPM USB Device Controller"
+ depends on FSL_SOC && (QUICC_ENGINE || CPM)
++ depends on !64BIT || BROKEN
+ help
+ Some of Freescale PowerPC processors have a Full Speed
+ QE/CPM2 USB controller, which support device mode with 4
+diff --git a/drivers/usb/gadget/udc/amd5536udc_pci.c b/drivers/usb/gadget/udc/amd5536udc_pci.c
+index c80f9bd51b750..a36913ae31f9e 100644
+--- a/drivers/usb/gadget/udc/amd5536udc_pci.c
++++ b/drivers/usb/gadget/udc/amd5536udc_pci.c
+@@ -170,6 +170,9 @@ static int udc_pci_probe(
+ retval = -ENODEV;
+ goto err_probe;
+ }
++
++ udc = dev;
++
+ return 0;
+
+ err_probe:
+diff --git a/drivers/usb/gadget/udc/aspeed-vhub/hub.c b/drivers/usb/gadget/udc/aspeed-vhub/hub.c
+index b9960fdd8a515..16a12d2d492e2 100644
+--- a/drivers/usb/gadget/udc/aspeed-vhub/hub.c
++++ b/drivers/usb/gadget/udc/aspeed-vhub/hub.c
+@@ -1028,8 +1028,10 @@ static int ast_vhub_init_desc(struct ast_vhub *vhub)
+ /* Initialize vhub String Descriptors. */
+ INIT_LIST_HEAD(&vhub->vhub_str_desc);
+ desc_np = of_get_child_by_name(vhub_np, "vhub-strings");
+- if (desc_np)
++ if (desc_np) {
+ ret = ast_vhub_of_parse_str_desc(vhub, desc_np);
++ of_node_put(desc_np);
++ }
+ else
+ ret = ast_vhub_str_alloc_add(vhub, &ast_vhub_strings);
+
+diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c
+index a9f07c59fc377..5c7dff6bc638f 100644
+--- a/drivers/usb/gadget/udc/bcm63xx_udc.c
++++ b/drivers/usb/gadget/udc/bcm63xx_udc.c
+@@ -2259,7 +2259,7 @@ static void bcm63xx_udc_init_debugfs(struct bcm63xx_udc *udc)
+ */
+ static void bcm63xx_udc_cleanup_debugfs(struct bcm63xx_udc *udc)
+ {
+- debugfs_remove(debugfs_lookup(udc->gadget.name, usb_debug_root));
++ debugfs_lookup_and_remove(udc->gadget.name, usb_debug_root);
+ }
+
+ /***********************************************************************
+diff --git a/drivers/usb/gadget/udc/bdc/bdc_udc.c b/drivers/usb/gadget/udc/bdc/bdc_udc.c
+index 5ac0ef88334eb..53ffaf4e2e376 100644
+--- a/drivers/usb/gadget/udc/bdc/bdc_udc.c
++++ b/drivers/usb/gadget/udc/bdc/bdc_udc.c
+@@ -151,6 +151,7 @@ static void bdc_uspc_disconnected(struct bdc *bdc, bool reinit)
+ bdc->delayed_status = false;
+ bdc->reinit = reinit;
+ bdc->test_mode = false;
++ usb_gadget_set_state(&bdc->gadget, USB_STATE_NOTATTACHED);
+ }
+
+ /* TNotify wkaeup timer */
+diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
+index 14fdf918ecfeb..6c05a3a9b542f 100644
+--- a/drivers/usb/gadget/udc/core.c
++++ b/drivers/usb/gadget/udc/core.c
+@@ -508,6 +508,33 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(usb_gadget_wakeup);
+
++/**
++ * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature.
++ * @gadget:the device being configured for remote wakeup
++ * @set:value to be configured.
++ *
++ * set to one to enable remote wakeup feature and zero to disable it.
++ *
++ * returns zero on success, else negative errno.
++ */
++int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
++{
++ int ret = 0;
++
++ if (!gadget->ops->set_remote_wakeup) {
++ ret = -EOPNOTSUPP;
++ goto out;
++ }
++
++ ret = gadget->ops->set_remote_wakeup(gadget, set);
++
++out:
++ trace_usb_gadget_set_remote_wakeup(gadget, ret);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup);
++
+ /**
+ * usb_gadget_set_selfpowered - sets the device selfpowered feature.
+ * @gadget:the device being declared as self-powered
+@@ -1434,7 +1461,6 @@ static void usb_gadget_remove_driver(struct usb_udc *udc)
+ usb_gadget_udc_stop(udc);
+
+ udc->driver = NULL;
+- udc->dev.driver = NULL;
+ udc->gadget->dev.driver = NULL;
+ }
+
+@@ -1496,7 +1522,6 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
+ driver->function);
+
+ udc->driver = driver;
+- udc->dev.driver = &driver->driver;
+ udc->gadget->dev.driver = &driver->driver;
+
+ usb_gadget_udc_set_speed(udc, driver->max_speed);
+@@ -1519,7 +1544,6 @@ err1:
+ dev_err(&udc->dev, "failed to start %s: %d\n",
+ udc->driver->function, ret);
+ udc->driver = NULL;
+- udc->dev.driver = NULL;
+ udc->gadget->dev.driver = NULL;
+ return ret;
+ }
+diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c
+index fdca28e72a3b4..6f7ade156437a 100644
+--- a/drivers/usb/gadget/udc/fotg210-udc.c
++++ b/drivers/usb/gadget/udc/fotg210-udc.c
+@@ -629,10 +629,10 @@ static void fotg210_request_error(struct fotg210_udc *fotg210)
+ static void fotg210_set_address(struct fotg210_udc *fotg210,
+ struct usb_ctrlrequest *ctrl)
+ {
+- if (ctrl->wValue >= 0x0100) {
++ if (le16_to_cpu(ctrl->wValue) >= 0x0100) {
+ fotg210_request_error(fotg210);
+ } else {
+- fotg210_set_dev_addr(fotg210, ctrl->wValue);
++ fotg210_set_dev_addr(fotg210, le16_to_cpu(ctrl->wValue));
+ fotg210_set_cxdone(fotg210);
+ }
+ }
+@@ -706,6 +706,20 @@ static int fotg210_is_epnstall(struct fotg210_ep *ep)
+ return value & INOUTEPMPSR_STL_EP ? 1 : 0;
+ }
+
++/* For EP0 requests triggered by this driver (currently GET_STATUS response) */
++static void fotg210_ep0_complete(struct usb_ep *_ep, struct usb_request *req)
++{
++ struct fotg210_ep *ep;
++ struct fotg210_udc *fotg210;
++
++ ep = container_of(_ep, struct fotg210_ep, ep);
++ fotg210 = ep->fotg210;
++
++ if (req->status || req->actual != req->length) {
++ dev_warn(&fotg210->gadget.dev, "EP0 request failed: %d\n", req->status);
++ }
++}
++
+ static void fotg210_get_status(struct fotg210_udc *fotg210,
+ struct usb_ctrlrequest *ctrl)
+ {
+@@ -713,17 +727,17 @@ static void fotg210_get_status(struct fotg210_udc *fotg210,
+
+ switch (ctrl->bRequestType & USB_RECIP_MASK) {
+ case USB_RECIP_DEVICE:
+- fotg210->ep0_data = 1 << USB_DEVICE_SELF_POWERED;
++ fotg210->ep0_data = cpu_to_le16(1 << USB_DEVICE_SELF_POWERED);
+ break;
+ case USB_RECIP_INTERFACE:
+- fotg210->ep0_data = 0;
++ fotg210->ep0_data = cpu_to_le16(0);
+ break;
+ case USB_RECIP_ENDPOINT:
+ epnum = ctrl->wIndex & USB_ENDPOINT_NUMBER_MASK;
+ if (epnum)
+ fotg210->ep0_data =
+- fotg210_is_epnstall(fotg210->ep[epnum])
+- << USB_ENDPOINT_HALT;
++ cpu_to_le16(fotg210_is_epnstall(fotg210->ep[epnum])
++ << USB_ENDPOINT_HALT);
+ else
+ fotg210_request_error(fotg210);
+ break;
+@@ -1172,6 +1186,8 @@ static int fotg210_udc_probe(struct platform_device *pdev)
+ if (fotg210->ep0_req == NULL)
+ goto err_map;
+
++ fotg210->ep0_req->complete = fotg210_ep0_complete;
++
+ fotg210_init(fotg210);
+
+ fotg210_disable_unplug(fotg210);
+diff --git a/drivers/usb/gadget/udc/fusb300_udc.c b/drivers/usb/gadget/udc/fusb300_udc.c
+index 9af8b415f303b..5e9e8e56e2d09 100644
+--- a/drivers/usb/gadget/udc/fusb300_udc.c
++++ b/drivers/usb/gadget/udc/fusb300_udc.c
+@@ -1347,6 +1347,7 @@ static int fusb300_remove(struct platform_device *pdev)
+ usb_del_gadget_udc(&fusb300->gadget);
+ iounmap(fusb300->reg);
+ free_irq(platform_get_irq(pdev, 0), fusb300);
++ free_irq(platform_get_irq(pdev, 1), fusb300);
+
+ fusb300_free_request(&fusb300->ep[0]->ep, fusb300->ep0_req);
+ for (i = 0; i < FUSB300_MAX_NUM_EP; i++)
+@@ -1432,7 +1433,7 @@ static int fusb300_probe(struct platform_device *pdev)
+ IRQF_SHARED, udc_name, fusb300);
+ if (ret < 0) {
+ pr_err("request_irq1 error (%d)\n", ret);
+- goto clean_up;
++ goto err_request_irq1;
+ }
+
+ INIT_LIST_HEAD(&fusb300->gadget.ep_list);
+@@ -1471,7 +1472,7 @@ static int fusb300_probe(struct platform_device *pdev)
+ GFP_KERNEL);
+ if (fusb300->ep0_req == NULL) {
+ ret = -ENOMEM;
+- goto clean_up3;
++ goto err_alloc_request;
+ }
+
+ init_controller(fusb300);
+@@ -1486,7 +1487,10 @@ static int fusb300_probe(struct platform_device *pdev)
+ err_add_udc:
+ fusb300_free_request(&fusb300->ep[0]->ep, fusb300->ep0_req);
+
+-clean_up3:
++err_alloc_request:
++ free_irq(ires1->start, fusb300);
++
++err_request_irq1:
+ free_irq(ires->start, fusb300);
+
+ clean_up:
+diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c
+index 4b35739d36951..d1febde6f2c4a 100644
+--- a/drivers/usb/gadget/udc/gr_udc.c
++++ b/drivers/usb/gadget/udc/gr_udc.c
+@@ -215,7 +215,7 @@ static void gr_dfs_create(struct gr_udc *dev)
+
+ static void gr_dfs_delete(struct gr_udc *dev)
+ {
+- debugfs_remove(debugfs_lookup(dev_name(dev->dev), usb_debug_root));
++ debugfs_lookup_and_remove(dev_name(dev->dev), usb_debug_root);
+ }
+
+ #else /* !CONFIG_USB_GADGET_DEBUG_FS */
+diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c
+index a25d01c895641..ec0d3d74d66e2 100644
+--- a/drivers/usb/gadget/udc/lpc32xx_udc.c
++++ b/drivers/usb/gadget/udc/lpc32xx_udc.c
+@@ -532,7 +532,7 @@ static void create_debug_file(struct lpc32xx_udc *udc)
+
+ static void remove_debug_file(struct lpc32xx_udc *udc)
+ {
+- debugfs_remove(debugfs_lookup(debug_filename, NULL));
++ debugfs_lookup_and_remove(debug_filename, NULL);
+ }
+
+ #else
+@@ -3014,6 +3014,7 @@ static int lpc32xx_udc_probe(struct platform_device *pdev)
+ }
+
+ udc->isp1301_i2c_client = isp1301_get_client(isp1301_node);
++ of_node_put(isp1301_node);
+ if (!udc->isp1301_i2c_client) {
+ return -EPROBE_DEFER;
+ }
+diff --git a/drivers/usb/gadget/udc/pxa25x_udc.c b/drivers/usb/gadget/udc/pxa25x_udc.c
+index a09ec1d826b21..e4d2ab5768ba2 100644
+--- a/drivers/usb/gadget/udc/pxa25x_udc.c
++++ b/drivers/usb/gadget/udc/pxa25x_udc.c
+@@ -1341,7 +1341,7 @@ DEFINE_SHOW_ATTRIBUTE(udc_debug);
+ debugfs_create_file(dev->gadget.name, \
+ S_IRUGO, NULL, dev, &udc_debug_fops); \
+ } while (0)
+-#define remove_debug_files(dev) debugfs_remove(debugfs_lookup(dev->gadget.name, NULL))
++#define remove_debug_files(dev) debugfs_lookup_and_remove(dev->gadget.name, NULL)
+
+ #else /* !CONFIG_USB_GADGET_DEBUG_FILES */
+
+diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
+index f4b7a2a3e7114..282b114f382f8 100644
+--- a/drivers/usb/gadget/udc/pxa27x_udc.c
++++ b/drivers/usb/gadget/udc/pxa27x_udc.c
+@@ -215,7 +215,7 @@ static void pxa_init_debugfs(struct pxa_udc *udc)
+
+ static void pxa_cleanup_debugfs(struct pxa_udc *udc)
+ {
+- debugfs_remove(debugfs_lookup(udc->gadget.name, usb_debug_root));
++ debugfs_lookup_and_remove(udc->gadget.name, usb_debug_root);
+ }
+
+ #else
+diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
+index 57d417a7c3e0a..a10f41c4a3f2f 100644
+--- a/drivers/usb/gadget/udc/renesas_usb3.c
++++ b/drivers/usb/gadget/udc/renesas_usb3.c
+@@ -2378,6 +2378,8 @@ static void handle_ext_role_switch_states(struct device *dev,
+ switch (role) {
+ case USB_ROLE_NONE:
+ usb3->connection_state = USB_ROLE_NONE;
++ if (cur_role == USB_ROLE_HOST)
++ device_release_driver(host);
+ if (usb3->driver)
+ usb3_disconnect(usb3);
+ usb3_vbus_out(usb3, false);
+@@ -2566,6 +2568,7 @@ static int renesas_usb3_remove(struct platform_device *pdev)
+ debugfs_remove_recursive(usb3->dentry);
+ device_remove_file(&pdev->dev, &dev_attr_role);
+
++ cancel_work_sync(&usb3->role_work);
+ usb_role_switch_unregister(usb3->role_sw);
+
+ usb_del_gadget_udc(&usb3->gadget);
+diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
+index 43f1b0d461c1e..52996bf2cc705 100644
+--- a/drivers/usb/gadget/udc/tegra-xudc.c
++++ b/drivers/usb/gadget/udc/tegra-xudc.c
+@@ -32,9 +32,6 @@
+ #include <linux/workqueue.h>
+
+ /* XUSB_DEV registers */
+-#define SPARAM 0x000
+-#define SPARAM_ERSTMAX_MASK GENMASK(20, 16)
+-#define SPARAM_ERSTMAX(x) (((x) << 16) & SPARAM_ERSTMAX_MASK)
+ #define DB 0x004
+ #define DB_TARGET_MASK GENMASK(15, 8)
+ #define DB_TARGET(x) (((x) << 8) & DB_TARGET_MASK)
+@@ -275,8 +272,10 @@ BUILD_EP_CONTEXT_RW(deq_hi, deq_hi, 0, 0xffffffff)
+ BUILD_EP_CONTEXT_RW(avg_trb_len, tx_info, 0, 0xffff)
+ BUILD_EP_CONTEXT_RW(max_esit_payload, tx_info, 16, 0xffff)
+ BUILD_EP_CONTEXT_RW(edtla, rsvd[0], 0, 0xffffff)
+-BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 24, 0xff)
++BUILD_EP_CONTEXT_RW(rsvd, rsvd[0], 24, 0x1)
+ BUILD_EP_CONTEXT_RW(partial_td, rsvd[0], 25, 0x1)
++BUILD_EP_CONTEXT_RW(splitxstate, rsvd[0], 26, 0x1)
++BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 27, 0x1f)
+ BUILD_EP_CONTEXT_RW(cerrcnt, rsvd[1], 18, 0x3)
+ BUILD_EP_CONTEXT_RW(data_offset, rsvd[2], 0, 0x1ffff)
+ BUILD_EP_CONTEXT_RW(numtrbs, rsvd[2], 22, 0x1f)
+@@ -1557,6 +1556,9 @@ static int __tegra_xudc_ep_set_halt(struct tegra_xudc_ep *ep, bool halt)
+ ep_reload(xudc, ep->index);
+
+ ep_ctx_write_state(ep->context, EP_STATE_RUNNING);
++ ep_ctx_write_rsvd(ep->context, 0);
++ ep_ctx_write_partial_td(ep->context, 0);
++ ep_ctx_write_splitxstate(ep->context, 0);
+ ep_ctx_write_seq_num(ep->context, 0);
+
+ ep_reload(xudc, ep->index);
+@@ -2152,7 +2154,7 @@ static int tegra_xudc_gadget_vbus_draw(struct usb_gadget *gadget,
+
+ dev_dbg(xudc->dev, "%s: %u mA\n", __func__, m_a);
+
+- if (xudc->curr_usbphy->chg_type == SDP_TYPE)
++ if (xudc->curr_usbphy && xudc->curr_usbphy->chg_type == SDP_TYPE)
+ ret = usb_phy_set_power(xudc->curr_usbphy, m_a);
+
+ return ret;
+@@ -2812,7 +2814,10 @@ static void tegra_xudc_reset(struct tegra_xudc *xudc)
+ xudc->setup_seq_num = 0;
+ xudc->queued_setup_packet = false;
+
+- ep_ctx_write_seq_num(ep0->context, xudc->setup_seq_num);
++ ep_ctx_write_rsvd(ep0->context, 0);
++ ep_ctx_write_partial_td(ep0->context, 0);
++ ep_ctx_write_splitxstate(ep0->context, 0);
++ ep_ctx_write_seq_num(ep0->context, 0);
+
+ deq_ptr = trb_virt_to_phys(ep0, &ep0->transfer_ring[ep0->deq_ptr]);
+
+@@ -3295,11 +3300,6 @@ static void tegra_xudc_init_event_ring(struct tegra_xudc *xudc)
+ unsigned int i;
+ u32 val;
+
+- val = xudc_readl(xudc, SPARAM);
+- val &= ~(SPARAM_ERSTMAX_MASK);
+- val |= SPARAM_ERSTMAX(XUDC_NR_EVENT_RINGS);
+- xudc_writel(xudc, val, SPARAM);
+-
+ for (i = 0; i < ARRAY_SIZE(xudc->event_ring); i++) {
+ memset(xudc->event_ring[i], 0, XUDC_EVENT_RING_SIZE *
+ sizeof(*xudc->event_ring[i]));
+diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h
+index 98584f6b6c662..428819311afbf 100644
+--- a/drivers/usb/gadget/udc/trace.h
++++ b/drivers/usb/gadget/udc/trace.h
+@@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup,
+ TP_ARGS(g, ret)
+ );
+
++DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup,
++ TP_PROTO(struct usb_gadget *g, int ret),
++ TP_ARGS(g, ret)
++);
++
+ DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered,
+ TP_PROTO(struct usb_gadget *g, int ret),
+ TP_ARGS(g, ret)
+diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
+index fb4ffedd6f0dd..9cf43731bcd18 100644
+--- a/drivers/usb/gadget/udc/udc-xilinx.c
++++ b/drivers/usb/gadget/udc/udc-xilinx.c
+@@ -1612,6 +1612,8 @@ static void xudc_getstatus(struct xusb_udc *udc)
+ break;
+ case USB_RECIP_ENDPOINT:
+ epnum = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK;
++ if (epnum >= XUSB_MAX_ENDPOINTS)
++ goto stall;
+ target_ep = &udc->ep[epnum];
+ epcfgreg = udc->read_fn(udc->addr + target_ep->offset);
+ halt = epcfgreg & XUSB_EP_CFG_STALL_MASK;
+@@ -1679,6 +1681,10 @@ static void xudc_set_clear_feature(struct xusb_udc *udc)
+ case USB_RECIP_ENDPOINT:
+ if (!udc->setup.wValue) {
+ endpoint = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK;
++ if (endpoint >= XUSB_MAX_ENDPOINTS) {
++ xudc_ep0_stall(udc);
++ return;
++ }
+ target_ep = &udc->ep[endpoint];
+ outinbit = udc->setup.wIndex & USB_ENDPOINT_DIR_MASK;
+ outinbit = outinbit >> 7;
+diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c
+index 2df52f75f6b3c..7558cc4d90cc6 100644
+--- a/drivers/usb/host/bcma-hcd.c
++++ b/drivers/usb/host/bcma-hcd.c
+@@ -285,7 +285,7 @@ static void bcma_hci_platform_power_gpio(struct bcma_device *dev, bool val)
+ {
+ struct bcma_hcd_device *usb_dev = bcma_get_drvdata(dev);
+
+- if (IS_ERR_OR_NULL(usb_dev->gpio_desc))
++ if (!usb_dev->gpio_desc)
+ return;
+
+ gpiod_set_value(usb_dev->gpio_desc, val);
+@@ -406,9 +406,11 @@ static int bcma_hcd_probe(struct bcma_device *core)
+ return -ENOMEM;
+ usb_dev->core = core;
+
+- if (core->dev.of_node)
+- usb_dev->gpio_desc = devm_gpiod_get(&core->dev, "vcc",
+- GPIOD_OUT_HIGH);
++ usb_dev->gpio_desc = devm_gpiod_get_optional(&core->dev, "vcc",
++ GPIOD_OUT_HIGH);
++ if (IS_ERR(usb_dev->gpio_desc))
++ return dev_err_probe(&core->dev, PTR_ERR(usb_dev->gpio_desc),
++ "error obtaining VCC GPIO");
+
+ switch (core->id.id) {
+ case BCMA_CORE_USB20_HOST:
+diff --git a/drivers/usb/host/ehci-brcm.c b/drivers/usb/host/ehci-brcm.c
+index d3626bfa966b4..6a0f64c9e5e88 100644
+--- a/drivers/usb/host/ehci-brcm.c
++++ b/drivers/usb/host/ehci-brcm.c
+@@ -62,8 +62,12 @@ static int ehci_brcm_hub_control(
+ u32 __iomem *status_reg;
+ unsigned long flags;
+ int retval, irq_disabled = 0;
++ u32 temp;
+
+- status_reg = &ehci->regs->port_status[(wIndex & 0xff) - 1];
++ temp = (wIndex & 0xff) - 1;
++ if (temp >= HCS_N_PORTS_MAX) /* Avoid index-out-of-bounds warning */
++ temp = 0;
++ status_reg = &ehci->regs->port_status[temp];
+
+ /*
+ * RESUME is cleared when GetPortStatus() is called 20ms after start
+diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
+index 385be30baad36..e38dfbd0d9ddd 100644
+--- a/drivers/usb/host/ehci-fsl.c
++++ b/drivers/usb/host/ehci-fsl.c
+@@ -29,7 +29,7 @@
+ #include "ehci-fsl.h"
+
+ #define DRIVER_DESC "Freescale EHCI Host controller driver"
+-#define DRV_NAME "ehci-fsl"
++#define DRV_NAME "fsl-ehci"
+
+ static struct hc_driver __read_mostly fsl_ehci_hc_driver;
+
+diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
+index 1776c05d0a486..1440803216297 100644
+--- a/drivers/usb/host/ehci-hcd.c
++++ b/drivers/usb/host/ehci-hcd.c
+@@ -635,7 +635,16 @@ static int ehci_run (struct usb_hcd *hcd)
+ /* Wait until HC become operational */
+ ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */
+ msleep(5);
+- rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, 0, 100 * 1000);
++
++ /* For Aspeed, STS_HALT also depends on ASS/PSS status.
++ * Check CMD_RUN instead.
++ */
++ if (ehci->is_aspeed)
++ rc = ehci_handshake(ehci, &ehci->regs->command, CMD_RUN,
++ 1, 100 * 1000);
++ else
++ rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT,
++ 0, 100 * 1000);
+
+ up_write(&ehci_cf_port_reset_rwsem);
+
+diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
+index e87cf3a00fa4b..638f03b897394 100644
+--- a/drivers/usb/host/ehci-pci.c
++++ b/drivers/usb/host/ehci-pci.c
+@@ -21,6 +21,9 @@ static const char hcd_name[] = "ehci-pci";
+ /* defined here to avoid adding to pci_ids.h for single instance use */
+ #define PCI_DEVICE_ID_INTEL_CE4100_USB 0x2e70
+
++#define PCI_VENDOR_ID_ASPEED 0x1a03
++#define PCI_DEVICE_ID_ASPEED_EHCI 0x2603
++
+ /*-------------------------------------------------------------------------*/
+ #define PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC 0x0939
+ static inline bool is_intel_quark_x1000(struct pci_dev *pdev)
+@@ -222,6 +225,12 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
+ ehci->has_synopsys_hc_bug = 1;
+ }
+ break;
++ case PCI_VENDOR_ID_ASPEED:
++ if (pdev->device == PCI_DEVICE_ID_ASPEED_EHCI) {
++ ehci_info(ehci, "applying Aspeed HC workaround\n");
++ ehci->is_aspeed = 1;
++ }
++ break;
+ }
+
+ /* optional debug port, normally in the first BAR */
+diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
+index c70f2d0b4aaf0..c3dc906274d93 100644
+--- a/drivers/usb/host/ehci-platform.c
++++ b/drivers/usb/host/ehci-platform.c
+@@ -297,6 +297,12 @@ static int ehci_platform_probe(struct platform_device *dev)
+ "has-transaction-translator"))
+ hcd->has_tt = 1;
+
++ if (of_device_is_compatible(dev->dev.of_node,
++ "aspeed,ast2500-ehci") ||
++ of_device_is_compatible(dev->dev.of_node,
++ "aspeed,ast2600-ehci"))
++ ehci->is_aspeed = 1;
++
+ if (soc_device_match(quirk_poll_match))
+ priv->quirk_poll = true;
+
+diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c
+index 6bbaee74f7e7d..28a19693c19fe 100644
+--- a/drivers/usb/host/ehci-ppc-of.c
++++ b/drivers/usb/host/ehci-ppc-of.c
+@@ -148,6 +148,7 @@ static int ehci_hcd_ppc_of_probe(struct platform_device *op)
+ } else {
+ ehci->has_amcc_usb23 = 1;
+ }
++ of_node_put(np);
+ }
+
+ if (of_get_property(dn, "big-endian", NULL)) {
+diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
+index 80bb823aa9fe8..fdd073cc053b8 100644
+--- a/drivers/usb/host/ehci.h
++++ b/drivers/usb/host/ehci.h
+@@ -219,6 +219,7 @@ struct ehci_hcd { /* one per controller */
+ unsigned need_oc_pp_cycle:1; /* MPC834X port power */
+ unsigned imx28_write_fix:1; /* For Freescale i.MX28 */
+ unsigned spurious_oc:1;
++ unsigned is_aspeed:1;
+
+ /* required for usb32 quirk */
+ #define OHCI_CTRL_HCFS (3 << 6)
+diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
+index 4b02ace09f3dc..d9a3fd8af7a01 100644
+--- a/drivers/usb/host/fotg210-hcd.c
++++ b/drivers/usb/host/fotg210-hcd.c
+@@ -862,7 +862,7 @@ static inline void remove_debug_files(struct fotg210_hcd *fotg210)
+ {
+ struct usb_bus *bus = &fotg210_to_hcd(fotg210)->self;
+
+- debugfs_remove(debugfs_lookup(bus->bus_name, fotg210_debug_root));
++ debugfs_lookup_and_remove(bus->bus_name, fotg210_debug_root);
+ }
+
+ /* handshake - spin reading hc until handshake completes or fails
+diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
+index 8835f6bd528e1..9c3e12f2f25d8 100644
+--- a/drivers/usb/host/isp116x-hcd.c
++++ b/drivers/usb/host/isp116x-hcd.c
+@@ -1206,7 +1206,7 @@ static void create_debug_file(struct isp116x *isp116x)
+
+ static void remove_debug_file(struct isp116x *isp116x)
+ {
+- debugfs_remove(debugfs_lookup(hcd_name, usb_debug_root));
++ debugfs_lookup_and_remove(hcd_name, usb_debug_root);
+ }
+
+ #else
+@@ -1541,10 +1541,12 @@ static int isp116x_remove(struct platform_device *pdev)
+
+ iounmap(isp116x->data_reg);
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+- release_mem_region(res->start, 2);
++ if (res)
++ release_mem_region(res->start, 2);
+ iounmap(isp116x->addr_reg);
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- release_mem_region(res->start, 2);
++ if (res)
++ release_mem_region(res->start, 2);
+
+ usb_put_hcd(hcd);
+ return 0;
+diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
+index d8610ce8f2ecd..bc68669dfc50c 100644
+--- a/drivers/usb/host/isp1362-hcd.c
++++ b/drivers/usb/host/isp1362-hcd.c
+@@ -2170,7 +2170,7 @@ static void create_debug_file(struct isp1362_hcd *isp1362_hcd)
+
+ static void remove_debug_file(struct isp1362_hcd *isp1362_hcd)
+ {
+- debugfs_remove(debugfs_lookup("isp1362", usb_debug_root));
++ debugfs_lookup_and_remove("isp1362", usb_debug_root);
+ }
+
+ /*-------------------------------------------------------------------------*/
+diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c
+index 59cc1bc7f12f5..994dc562b2db0 100644
+--- a/drivers/usb/host/max3421-hcd.c
++++ b/drivers/usb/host/max3421-hcd.c
+@@ -125,8 +125,6 @@ struct max3421_hcd {
+
+ struct task_struct *spi_thread;
+
+- struct max3421_hcd *next;
+-
+ enum max3421_rh_state rh_state;
+ /* lower 16 bits contain port status, upper 16 bits the change mask: */
+ u32 port_status;
+@@ -174,8 +172,6 @@ struct max3421_ep {
+ u8 retransmit; /* packet needs retransmission */
+ };
+
+-static struct max3421_hcd *max3421_hcd_list;
+-
+ #define MAX3421_FIFO_SIZE 64
+
+ #define MAX3421_SPI_DIR_RD 0 /* read register from MAX3421 */
+@@ -1440,7 +1436,7 @@ max3421_spi_thread(void *dev_id)
+ * use spi_wr_buf().
+ */
+ for (i = 0; i < ARRAY_SIZE(max3421_hcd->iopins); ++i) {
+- u8 val = spi_rd8(hcd, MAX3421_REG_IOPINS1);
++ u8 val = spi_rd8(hcd, MAX3421_REG_IOPINS1 + i);
+
+ val = ((val & 0xf0) |
+ (max3421_hcd->iopins[i] & 0x0f));
+@@ -1882,9 +1878,8 @@ max3421_probe(struct spi_device *spi)
+ }
+ set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+ max3421_hcd = hcd_to_max3421(hcd);
+- max3421_hcd->next = max3421_hcd_list;
+- max3421_hcd_list = max3421_hcd;
+ INIT_LIST_HEAD(&max3421_hcd->ep_list);
++ spi_set_drvdata(spi, max3421_hcd);
+
+ max3421_hcd->tx = kmalloc(sizeof(*max3421_hcd->tx), GFP_KERNEL);
+ if (!max3421_hcd->tx)
+@@ -1934,28 +1929,18 @@ error:
+ static int
+ max3421_remove(struct spi_device *spi)
+ {
+- struct max3421_hcd *max3421_hcd = NULL, **prev;
+- struct usb_hcd *hcd = NULL;
++ struct max3421_hcd *max3421_hcd;
++ struct usb_hcd *hcd;
+ unsigned long flags;
+
+- for (prev = &max3421_hcd_list; *prev; prev = &(*prev)->next) {
+- max3421_hcd = *prev;
+- hcd = max3421_to_hcd(max3421_hcd);
+- if (hcd->self.controller == &spi->dev)
+- break;
+- }
+- if (!max3421_hcd) {
+- dev_err(&spi->dev, "no MAX3421 HCD found for SPI device %p\n",
+- spi);
+- return -ENODEV;
+- }
++ max3421_hcd = spi_get_drvdata(spi);
++ hcd = max3421_to_hcd(max3421_hcd);
+
+ usb_remove_hcd(hcd);
+
+ spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+ kthread_stop(max3421_hcd->spi_thread);
+- *prev = max3421_hcd->next;
+
+ spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+
+diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
+index a24aea3d2759e..e72f2e456f4a8 100644
+--- a/drivers/usb/host/ohci-at91.c
++++ b/drivers/usb/host/ohci-at91.c
+@@ -652,7 +652,13 @@ ohci_hcd_at91_drv_resume(struct device *dev)
+ else
+ at91_start_clock(ohci_at91);
+
+- ohci_resume(hcd, false);
++ /*
++ * According to the comment in ohci_hcd_at91_drv_suspend()
++ * we need to do a reset if the 48Mhz clock was stopped,
++ * that is, if ohci_at91->wakeup is clear. Tell ohci_resume()
++ * to reset in this case by setting its "hibernated" flag.
++ */
++ ohci_resume(hcd, !ohci_at91->wakeup);
+
+ return 0;
+ }
+diff --git a/drivers/usb/host/ohci-nxp.c b/drivers/usb/host/ohci-nxp.c
+index 85878e8ad3311..106a6bcefb087 100644
+--- a/drivers/usb/host/ohci-nxp.c
++++ b/drivers/usb/host/ohci-nxp.c
+@@ -164,6 +164,7 @@ static int ohci_hcd_nxp_probe(struct platform_device *pdev)
+ }
+
+ isp1301_i2c_client = isp1301_get_client(isp1301_node);
++ of_node_put(isp1301_node);
+ if (!isp1301_i2c_client)
+ return -EPROBE_DEFER;
+
+diff --git a/drivers/usb/host/ohci-ppc-of.c b/drivers/usb/host/ohci-ppc-of.c
+index 45f7cceb6df31..98e46725999e9 100644
+--- a/drivers/usb/host/ohci-ppc-of.c
++++ b/drivers/usb/host/ohci-ppc-of.c
+@@ -169,6 +169,7 @@ static int ohci_hcd_ppc_of_probe(struct platform_device *op)
+ release_mem_region(res.start, 0x4);
+ } else
+ pr_debug("%s: cannot get ehci offset from fdt\n", __FILE__);
++ of_node_put(np);
+ }
+
+ irq_dispose_mapping(irq);
+diff --git a/drivers/usb/host/ohci-tmio.c b/drivers/usb/host/ohci-tmio.c
+index 08ec2ab0d95a5..3f3d62dc06746 100644
+--- a/drivers/usb/host/ohci-tmio.c
++++ b/drivers/usb/host/ohci-tmio.c
+@@ -199,7 +199,7 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev)
+ if (usb_disabled())
+ return -ENODEV;
+
+- if (!cell)
++ if (!cell || !regs || !config || !sram)
+ return -EINVAL;
+
+ if (irq < 0)
+diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
+index 4300326b3730d..6be6c5878d08b 100644
+--- a/drivers/usb/host/oxu210hp-hcd.c
++++ b/drivers/usb/host/oxu210hp-hcd.c
+@@ -3909,8 +3909,10 @@ static int oxu_bus_suspend(struct usb_hcd *hcd)
+ }
+ }
+
++ spin_unlock_irq(&oxu->lock);
+ /* turn off now-idle HC */
+ del_timer_sync(&oxu->watchdog);
++ spin_lock_irq(&oxu->lock);
+ ehci_halt(oxu);
+ hcd->state = HC_STATE_SUSPENDED;
+
+diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
+index 85623731a5162..825ff67273102 100644
+--- a/drivers/usb/host/sl811-hcd.c
++++ b/drivers/usb/host/sl811-hcd.c
+@@ -1501,7 +1501,7 @@ static void create_debug_file(struct sl811 *sl811)
+
+ static void remove_debug_file(struct sl811 *sl811)
+ {
+- debugfs_remove(debugfs_lookup("sl811h", usb_debug_root));
++ debugfs_lookup_and_remove("sl811h", usb_debug_root);
+ }
+
+ /*-------------------------------------------------------------------------*/
+diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
+index d90b869f5f409..d138f62ce84d7 100644
+--- a/drivers/usb/host/uhci-hcd.c
++++ b/drivers/usb/host/uhci-hcd.c
+@@ -536,8 +536,8 @@ static void release_uhci(struct uhci_hcd *uhci)
+ uhci->is_initialized = 0;
+ spin_unlock_irq(&uhci->lock);
+
+- debugfs_remove(debugfs_lookup(uhci_to_hcd(uhci)->self.bus_name,
+- uhci_debugfs_root));
++ debugfs_lookup_and_remove(uhci_to_hcd(uhci)->self.bus_name,
++ uhci_debugfs_root);
+
+ for (i = 0; i < UHCI_NUM_SKELQH; i++)
+ uhci_free_qh(uhci, uhci->skelqh[i]);
+@@ -700,7 +700,7 @@ err_alloc_frame_cpu:
+ uhci->frame, uhci->frame_dma_handle);
+
+ err_alloc_frame:
+- debugfs_remove(debugfs_lookup(hcd->self.bus_name, uhci_debugfs_root));
++ debugfs_lookup_and_remove(hcd->self.bus_name, uhci_debugfs_root);
+
+ return retval;
+ }
+diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c
+index 9b88745d247f5..3316533b8bc29 100644
+--- a/drivers/usb/host/uhci-pci.c
++++ b/drivers/usb/host/uhci-pci.c
+@@ -119,11 +119,13 @@ static int uhci_pci_init(struct usb_hcd *hcd)
+
+ uhci->rh_numports = uhci_count_ports(hcd);
+
+- /* Intel controllers report the OverCurrent bit active on.
+- * VIA controllers report it active off, so we'll adjust the
+- * bit value. (It's not standardized in the UHCI spec.)
++ /*
++ * Intel controllers report the OverCurrent bit active on. VIA
++ * and ZHAOXIN controllers report it active off, so we'll adjust
++ * the bit value. (It's not standardized in the UHCI spec.)
+ */
+- if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_VIA)
++ if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_VIA ||
++ to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_ZHAOXIN)
+ uhci->oc_low = 1;
+
+ /* HP's server management chip requires a longer port reset delay. */
+diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c
+index 70dbd95c3f063..be9e9db7cad10 100644
+--- a/drivers/usb/host/uhci-platform.c
++++ b/drivers/usb/host/uhci-platform.c
+@@ -113,7 +113,8 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev)
+ num_ports);
+ }
+ if (of_device_is_compatible(np, "aspeed,ast2400-uhci") ||
+- of_device_is_compatible(np, "aspeed,ast2500-uhci")) {
++ of_device_is_compatible(np, "aspeed,ast2500-uhci") ||
++ of_device_is_compatible(np, "aspeed,ast2600-uhci")) {
+ uhci->is_aspeed = 1;
+ dev_info(&pdev->dev,
+ "Enabled Aspeed implementation workarounds\n");
+diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
+index ccb0156fcebeb..f297f1f8edc7b 100644
+--- a/drivers/usb/host/xhci-dbgcap.c
++++ b/drivers/usb/host/xhci-dbgcap.c
+@@ -914,59 +914,6 @@ static void xhci_dbc_handle_events(struct work_struct *work)
+ mod_delayed_work(system_wq, &dbc->event_work, 1);
+ }
+
+-static void xhci_do_dbc_exit(struct xhci_hcd *xhci)
+-{
+- unsigned long flags;
+-
+- spin_lock_irqsave(&xhci->lock, flags);
+- kfree(xhci->dbc);
+- xhci->dbc = NULL;
+- spin_unlock_irqrestore(&xhci->lock, flags);
+-}
+-
+-static int xhci_do_dbc_init(struct xhci_hcd *xhci)
+-{
+- u32 reg;
+- struct xhci_dbc *dbc;
+- unsigned long flags;
+- void __iomem *base;
+- int dbc_cap_offs;
+-
+- base = &xhci->cap_regs->hc_capbase;
+- dbc_cap_offs = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_DEBUG);
+- if (!dbc_cap_offs)
+- return -ENODEV;
+-
+- dbc = kzalloc(sizeof(*dbc), GFP_KERNEL);
+- if (!dbc)
+- return -ENOMEM;
+-
+- dbc->regs = base + dbc_cap_offs;
+-
+- /* We will avoid using DbC in xhci driver if it's in use. */
+- reg = readl(&dbc->regs->control);
+- if (reg & DBC_CTRL_DBC_ENABLE) {
+- kfree(dbc);
+- return -EBUSY;
+- }
+-
+- spin_lock_irqsave(&xhci->lock, flags);
+- if (xhci->dbc) {
+- spin_unlock_irqrestore(&xhci->lock, flags);
+- kfree(dbc);
+- return -EBUSY;
+- }
+- xhci->dbc = dbc;
+- spin_unlock_irqrestore(&xhci->lock, flags);
+-
+- dbc->xhci = xhci;
+- dbc->dev = xhci_to_hcd(xhci)->self.sysdev;
+- INIT_DELAYED_WORK(&dbc->event_work, xhci_dbc_handle_events);
+- spin_lock_init(&dbc->lock);
+-
+- return 0;
+-}
+-
+ static ssize_t dbc_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+@@ -1026,44 +973,86 @@ static ssize_t dbc_store(struct device *dev,
+
+ static DEVICE_ATTR_RW(dbc);
+
+-int xhci_dbc_init(struct xhci_hcd *xhci)
++struct xhci_dbc *
++xhci_alloc_dbc(struct device *dev, void __iomem *base, const struct dbc_driver *driver)
+ {
++ struct xhci_dbc *dbc;
+ int ret;
+- struct device *dev = xhci_to_hcd(xhci)->self.controller;
+
+- ret = xhci_do_dbc_init(xhci);
+- if (ret)
+- goto init_err3;
++ dbc = kzalloc(sizeof(*dbc), GFP_KERNEL);
++ if (!dbc)
++ return NULL;
+
+- ret = xhci_dbc_tty_probe(xhci);
+- if (ret)
+- goto init_err2;
++ dbc->regs = base;
++ dbc->dev = dev;
++ dbc->driver = driver;
++
++ if (readl(&dbc->regs->control) & DBC_CTRL_DBC_ENABLE)
++ goto err;
++
++ INIT_DELAYED_WORK(&dbc->event_work, xhci_dbc_handle_events);
++ spin_lock_init(&dbc->lock);
+
+ ret = device_create_file(dev, &dev_attr_dbc);
+ if (ret)
+- goto init_err1;
++ goto err;
+
+- return 0;
++ return dbc;
++err:
++ kfree(dbc);
++ return NULL;
++}
++
++/* undo what xhci_alloc_dbc() did */
++void xhci_dbc_remove(struct xhci_dbc *dbc)
++{
++ if (!dbc)
++ return;
++ /* stop hw, stop wq and call dbc->ops->stop() */
++ xhci_dbc_stop(dbc);
++
++ /* remove sysfs files */
++ device_remove_file(dbc->dev, &dev_attr_dbc);
++
++ kfree(dbc);
++}
++
++
++int xhci_create_dbc_dev(struct xhci_hcd *xhci)
++{
++ struct device *dev;
++ void __iomem *base;
++ int ret;
++ int dbc_cap_offs;
++
++ /* create all parameters needed resembling a dbc device */
++ dev = xhci_to_hcd(xhci)->self.controller;
++ base = &xhci->cap_regs->hc_capbase;
++
++ dbc_cap_offs = xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_DEBUG);
++ if (!dbc_cap_offs)
++ return -ENODEV;
++
++ /* already allocated and in use */
++ if (xhci->dbc)
++ return -EBUSY;
++
++ ret = xhci_dbc_tty_probe(dev, base + dbc_cap_offs, xhci);
+
+-init_err1:
+- xhci_dbc_tty_remove(xhci->dbc);
+-init_err2:
+- xhci_do_dbc_exit(xhci);
+-init_err3:
+ return ret;
+ }
+
+-void xhci_dbc_exit(struct xhci_hcd *xhci)
++void xhci_remove_dbc_dev(struct xhci_hcd *xhci)
+ {
+- struct device *dev = xhci_to_hcd(xhci)->self.controller;
++ unsigned long flags;
+
+ if (!xhci->dbc)
+ return;
+
+- device_remove_file(dev, &dev_attr_dbc);
+ xhci_dbc_tty_remove(xhci->dbc);
+- xhci_dbc_stop(xhci->dbc);
+- xhci_do_dbc_exit(xhci);
++ spin_lock_irqsave(&xhci->lock, flags);
++ xhci->dbc = NULL;
++ spin_unlock_irqrestore(&xhci->lock, flags);
+ }
+
+ #ifdef CONFIG_PM
+diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h
+index c70b78d504eb5..8b5b363a07194 100644
+--- a/drivers/usb/host/xhci-dbgcap.h
++++ b/drivers/usb/host/xhci-dbgcap.h
+@@ -194,10 +194,13 @@ static inline struct dbc_ep *get_out_ep(struct xhci_dbc *dbc)
+ }
+
+ #ifdef CONFIG_USB_XHCI_DBGCAP
+-int xhci_dbc_init(struct xhci_hcd *xhci);
+-void xhci_dbc_exit(struct xhci_hcd *xhci);
+-int xhci_dbc_tty_probe(struct xhci_hcd *xhci);
++int xhci_create_dbc_dev(struct xhci_hcd *xhci);
++void xhci_remove_dbc_dev(struct xhci_hcd *xhci);
++int xhci_dbc_tty_probe(struct device *dev, void __iomem *res, struct xhci_hcd *xhci);
+ void xhci_dbc_tty_remove(struct xhci_dbc *dbc);
++struct xhci_dbc *xhci_alloc_dbc(struct device *dev, void __iomem *res,
++ const struct dbc_driver *driver);
++void xhci_dbc_remove(struct xhci_dbc *dbc);
+ struct dbc_request *dbc_alloc_request(struct xhci_dbc *dbc,
+ unsigned int direction,
+ gfp_t flags);
+@@ -208,12 +211,12 @@ int xhci_dbc_suspend(struct xhci_hcd *xhci);
+ int xhci_dbc_resume(struct xhci_hcd *xhci);
+ #endif /* CONFIG_PM */
+ #else
+-static inline int xhci_dbc_init(struct xhci_hcd *xhci)
++static inline int xhci_create_dbc_dev(struct xhci_hcd *xhci)
+ {
+ return 0;
+ }
+
+-static inline void xhci_dbc_exit(struct xhci_hcd *xhci)
++static inline void xhci_remove_dbc_dev(struct xhci_hcd *xhci)
+ {
+ }
+
+diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
+index eb46e642e87aa..18bcc96853ae1 100644
+--- a/drivers/usb/host/xhci-dbgtty.c
++++ b/drivers/usb/host/xhci-dbgtty.c
+@@ -468,9 +468,9 @@ static const struct dbc_driver dbc_driver = {
+ .disconnect = xhci_dbc_tty_unregister_device,
+ };
+
+-int xhci_dbc_tty_probe(struct xhci_hcd *xhci)
++int xhci_dbc_tty_probe(struct device *dev, void __iomem *base, struct xhci_hcd *xhci)
+ {
+- struct xhci_dbc *dbc = xhci->dbc;
++ struct xhci_dbc *dbc;
+ struct dbc_port *port;
+ int status;
+
+@@ -485,13 +485,22 @@ int xhci_dbc_tty_probe(struct xhci_hcd *xhci)
+ goto out;
+ }
+
+- dbc->driver = &dbc_driver;
+- dbc->priv = port;
++ dbc_tty_driver->driver_state = port;
++
++ dbc = xhci_alloc_dbc(dev, base, &dbc_driver);
++ if (!dbc) {
++ status = -ENOMEM;
++ goto out2;
++ }
+
++ dbc->priv = port;
+
+- dbc_tty_driver->driver_state = port;
++ /* get rid of xhci once this is a real driver binding to a device */
++ xhci->dbc = dbc;
+
+ return 0;
++out2:
++ kfree(port);
+ out:
+ /* dbc_tty_exit will be called by module_exit() in the future */
+ dbc_tty_exit();
+@@ -506,8 +515,7 @@ void xhci_dbc_tty_remove(struct xhci_dbc *dbc)
+ {
+ struct dbc_port *port = dbc_to_port(dbc);
+
+- dbc->driver = NULL;
+- dbc->priv = NULL;
++ xhci_dbc_remove(dbc);
+ kfree(port);
+
+ /* dbc_tty_exit will be called by module_exit() in the future */
+diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
+index dc832ddf7033f..bd40caeeb21c6 100644
+--- a/drivers/usb/host/xhci-debugfs.c
++++ b/drivers/usb/host/xhci-debugfs.c
+@@ -133,6 +133,7 @@ static void xhci_debugfs_regset(struct xhci_hcd *xhci, u32 base,
+ regset->regs = regs;
+ regset->nregs = nregs;
+ regset->base = hcd->regs + base;
++ regset->dev = hcd->self.controller;
+
+ debugfs_create_regset32((const char *)rgs->name, 0444, parent, regset);
+ }
+diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
+index a3f875eea7519..b9754784161d7 100644
+--- a/drivers/usb/host/xhci-hub.c
++++ b/drivers/usb/host/xhci-hub.c
+@@ -257,7 +257,6 @@ static void xhci_common_hub_descriptor(struct xhci_hcd *xhci,
+ {
+ u16 temp;
+
+- desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.9 says 20ms max */
+ desc->bHubContrCurrent = 0;
+
+ desc->bNbrPorts = ports;
+@@ -292,6 +291,7 @@ static void xhci_usb2_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci,
+ desc->bDescriptorType = USB_DT_HUB;
+ temp = 1 + (ports / 8);
+ desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * temp;
++ desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.8 says 20ms */
+
+ /* The Device Removable bits are reported on a byte granularity.
+ * If the port doesn't exist within that byte, the bit is set to 0.
+@@ -344,6 +344,7 @@ static void xhci_usb3_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci,
+ xhci_common_hub_descriptor(xhci, desc, ports);
+ desc->bDescriptorType = USB_DT_SS_HUB;
+ desc->bDescLength = USB_DT_SS_HUB_SIZE;
++ desc->bPwrOn2PwrGood = 50; /* usb 3.1 may fail if less than 100ms */
+
+ /* header decode latency should be zero for roothubs,
+ * see section 4.23.5.2.
+@@ -716,6 +717,7 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci,
+ continue;
+
+ retval = xhci_disable_slot(xhci, i);
++ xhci_free_virt_device(xhci, i);
+ if (retval)
+ xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n",
+ i, retval);
+@@ -760,7 +762,7 @@ static int xhci_exit_test_mode(struct xhci_hcd *xhci)
+ }
+ pm_runtime_allow(xhci_to_hcd(xhci)->self.controller);
+ xhci->test_mode = 0;
+- return xhci_reset(xhci);
++ return xhci_reset(xhci, XHCI_RESET_SHORT_USEC);
+ }
+
+ void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port,
+@@ -1086,6 +1088,9 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status,
+ if (link_state == XDEV_U2)
+ *status |= USB_PORT_STAT_L1;
+ if (link_state == XDEV_U0) {
++ if (bus_state->resume_done[portnum])
++ usb_hcd_end_port_resume(&port->rhub->hcd->self,
++ portnum);
+ bus_state->resume_done[portnum] = 0;
+ clear_bit(portnum, &bus_state->resuming_ports);
+ if (bus_state->suspended_ports & (1 << portnum)) {
+@@ -1429,7 +1434,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+ }
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ if (!wait_for_completion_timeout(&bus_state->u3exit_done[wIndex],
+- msecs_to_jiffies(100)))
++ msecs_to_jiffies(500)))
+ xhci_dbg(xhci, "missing U0 port change event for port %d-%d\n",
+ hcd->self.busnum, wIndex + 1);
+ spin_lock_irqsave(&xhci->lock, flags);
+@@ -1642,6 +1647,17 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf)
+
+ status = bus_state->resuming_ports;
+
++ /*
++ * SS devices are only visible to roothub after link training completes.
++ * Keep polling roothubs for a grace period after xHC start
++ */
++ if (xhci->run_graceperiod) {
++ if (time_before(jiffies, xhci->run_graceperiod))
++ status = 1;
++ else
++ xhci->run_graceperiod = 0;
++ }
++
+ mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC;
+
+ /* For each port, did anything change? If so, set that bit in buf. */
+diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
+index 0e312066c5c63..6444aef33cf08 100644
+--- a/drivers/usb/host/xhci-mem.c
++++ b/drivers/usb/host/xhci-mem.c
+@@ -642,7 +642,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
+ num_stream_ctxs, &stream_info->ctx_array_dma,
+ mem_flags);
+ if (!stream_info->stream_ctx_array)
+- goto cleanup_ctx;
++ goto cleanup_ring_array;
+ memset(stream_info->stream_ctx_array, 0,
+ sizeof(struct xhci_stream_ctx)*num_stream_ctxs);
+
+@@ -703,6 +703,11 @@ cleanup_rings:
+ }
+ xhci_free_command(xhci, stream_info->free_streams_command);
+ cleanup_ctx:
++ xhci_free_stream_ctx(xhci,
++ stream_info->num_stream_ctxs,
++ stream_info->stream_ctx_array,
++ stream_info->ctx_array_dma);
++cleanup_ring_array:
+ kfree(stream_info->stream_rings);
+ cleanup_info:
+ kfree(stream_info);
+@@ -893,15 +898,19 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id)
+ if (dev->eps[i].stream_info)
+ xhci_free_stream_info(xhci,
+ dev->eps[i].stream_info);
+- /* Endpoints on the TT/root port lists should have been removed
+- * when usb_disable_device() was called for the device.
+- * We can't drop them anyway, because the udev might have gone
+- * away by this point, and we can't tell what speed it was.
++ /*
++ * Endpoints are normally deleted from the bandwidth list when
++ * endpoints are dropped, before device is freed.
++ * If host is dying or being removed then endpoints aren't
++ * dropped cleanly, so delete the endpoint from list here.
++ * Only applicable for hosts with software bandwidth checking.
+ */
+- if (!list_empty(&dev->eps[i].bw_endpoint_list))
+- xhci_warn(xhci, "Slot %u endpoint %u "
+- "not removed from BW list!\n",
+- slot_id, i);
++
++ if (!list_empty(&dev->eps[i].bw_endpoint_list)) {
++ list_del_init(&dev->eps[i].bw_endpoint_list);
++ xhci_dbg(xhci, "Slot %u endpoint %u not removed from BW list!\n",
++ slot_id, i);
++ }
+ }
+ /* If this is a hub, free the TT(s) from the TT list */
+ xhci_free_tt_info(xhci, dev, slot_id);
+@@ -2119,7 +2128,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
+ {
+ u32 temp, port_offset, port_count;
+ int i;
+- u8 major_revision, minor_revision;
++ u8 major_revision, minor_revision, tmp_minor_revision;
+ struct xhci_hub *rhub;
+ struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
+ struct xhci_port_cap *port_cap;
+@@ -2139,6 +2148,15 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
+ */
+ if (minor_revision > 0x00 && minor_revision < 0x10)
+ minor_revision <<= 4;
++ /*
++ * Some zhaoxin's xHCI controller that follow usb3.1 spec
++ * but only support Gen1.
++ */
++ if (xhci->quirks & XHCI_ZHAOXIN_HOST) {
++ tmp_minor_revision = minor_revision;
++ minor_revision = 0;
++ }
++
+ } else if (major_revision <= 0x02) {
+ rhub = &xhci->usb2_rhub;
+ } else {
+@@ -2148,10 +2166,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
+ /* Ignoring port protocol we can't understand. FIXME */
+ return;
+ }
+- rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp);
+-
+- if (rhub->min_rev < minor_revision)
+- rhub->min_rev = minor_revision;
+
+ /* Port offset and count in the third dword, see section 7.2 */
+ temp = readl(addr + 2);
+@@ -2170,8 +2184,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
+ if (xhci->num_port_caps > max_caps)
+ return;
+
+- port_cap->maj_rev = major_revision;
+- port_cap->min_rev = minor_revision;
+ port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp);
+
+ if (port_cap->psi_count) {
+@@ -2192,6 +2204,11 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
+ XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1])))
+ port_cap->psi_uid_count++;
+
++ if (xhci->quirks & XHCI_ZHAOXIN_HOST &&
++ major_revision == 0x03 &&
++ XHCI_EXT_PORT_PSIV(port_cap->psi[i]) >= 5)
++ minor_revision = tmp_minor_revision;
++
+ xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n",
+ XHCI_EXT_PORT_PSIV(port_cap->psi[i]),
+ XHCI_EXT_PORT_PSIE(port_cap->psi[i]),
+@@ -2201,6 +2218,15 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
+ XHCI_EXT_PORT_PSIM(port_cap->psi[i]));
+ }
+ }
++
++ rhub->maj_rev = major_revision;
++
++ if (rhub->min_rev < minor_revision)
++ rhub->min_rev = minor_revision;
++
++ port_cap->maj_rev = major_revision;
++ port_cap->min_rev = minor_revision;
++
+ /* cache usb2 port capabilities */
+ if (major_revision < 0x03 && xhci->num_ext_caps < max_caps)
+ xhci->ext_caps[xhci->num_ext_caps++] = temp;
+@@ -2445,8 +2471,12 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
+ * and our use of dma addresses in the trb_address_map radix tree needs
+ * TRB_SEGMENT_SIZE alignment, so we pick the greater alignment need.
+ */
+- xhci->segment_pool = dma_pool_create("xHCI ring segments", dev,
+- TRB_SEGMENT_SIZE, TRB_SEGMENT_SIZE, xhci->page_size);
++ if (xhci->quirks & XHCI_ZHAOXIN_TRB_FETCH)
++ xhci->segment_pool = dma_pool_create("xHCI ring segments", dev,
++ TRB_SEGMENT_SIZE * 2, TRB_SEGMENT_SIZE * 2, xhci->page_size * 2);
++ else
++ xhci->segment_pool = dma_pool_create("xHCI ring segments", dev,
++ TRB_SEGMENT_SIZE, TRB_SEGMENT_SIZE, xhci->page_size);
+
+ /* See Table 46 and Note on Figure 55 */
+ xhci->device_pool = dma_pool_create("xHCI input/output contexts", dev,
+@@ -2583,7 +2613,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
+
+ fail:
+ xhci_halt(xhci);
+- xhci_reset(xhci);
++ xhci_reset(xhci, XHCI_RESET_SHORT_USEC);
+ xhci_mem_cleanup(xhci);
+ return -ENOMEM;
+ }
+diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c
+index 134f4789bd897..9d8094afcc8bc 100644
+--- a/drivers/usb/host/xhci-mtk-sch.c
++++ b/drivers/usb/host/xhci-mtk-sch.c
+@@ -465,7 +465,7 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset)
+ */
+ for (j = 0; j < sch_ep->num_budget_microframes; j++) {
+ k = XHCI_MTK_BW_INDEX(base + j);
+- tmp = tt->fs_bus_bw[k] + sch_ep->bw_budget_table[j];
++ tmp = tt->fs_bus_bw[k] + sch_ep->bw_cost_per_microframe;
+ if (tmp > FS_PAYLOAD_MAX)
+ return -ESCH_BW_OVERFLOW;
+ }
+@@ -476,7 +476,6 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset)
+
+ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset)
+ {
+- u32 extra_cs_count;
+ u32 start_ss, last_ss;
+ u32 start_cs, last_cs;
+
+@@ -512,18 +511,12 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset)
+ if (last_cs > 7)
+ return -ESCH_CS_OVERFLOW;
+
+- if (sch_ep->ep_type == ISOC_IN_EP)
+- extra_cs_count = (last_cs == 7) ? 1 : 2;
+- else /* ep_type : INTR IN / INTR OUT */
+- extra_cs_count = 1;
+-
+- cs_count += extra_cs_count;
+ if (cs_count > 7)
+ cs_count = 7; /* HW limit */
+
+ sch_ep->cs_count = cs_count;
+- /* one for ss, the other for idle */
+- sch_ep->num_budget_microframes = cs_count + 2;
++ /* ss, idle are ignored */
++ sch_ep->num_budget_microframes = cs_count;
+
+ /*
+ * if interval=1, maxp >752, num_budge_micoframe is larger
+@@ -539,19 +532,17 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset)
+ static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used)
+ {
+ struct mu3h_sch_tt *tt = sch_ep->sch_tt;
++ int bw_updated;
+ u32 base;
+- int i, j, k;
++ int i, j;
++
++ bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
+
+ for (i = 0; i < sch_ep->num_esit; i++) {
+ base = sch_ep->offset + i * sch_ep->esit;
+
+- for (j = 0; j < sch_ep->num_budget_microframes; j++) {
+- k = XHCI_MTK_BW_INDEX(base + j);
+- if (used)
+- tt->fs_bus_bw[k] += sch_ep->bw_budget_table[j];
+- else
+- tt->fs_bus_bw[k] -= sch_ep->bw_budget_table[j];
+- }
++ for (j = 0; j < sch_ep->num_budget_microframes; j++)
++ tt->fs_bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
+ }
+
+ if (used)
+@@ -781,7 +772,7 @@ int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
+
+ ret = xhci_check_bandwidth(hcd, udev);
+ if (!ret)
+- INIT_LIST_HEAD(&mtk->bw_ep_chk_list);
++ list_del_init(&mtk->bw_ep_chk_list);
+
+ return ret;
+ }
+@@ -824,8 +815,8 @@ int xhci_mtk_drop_ep(struct usb_hcd *hcd, struct usb_device *udev,
+ if (ret)
+ return ret;
+
+- if (ep->hcpriv)
+- drop_ep_quirk(hcd, udev, ep);
++ /* needn't check @ep->hcpriv, xhci_endpoint_disable set it NULL */
++ drop_ep_quirk(hcd, udev, ep);
+
+ return 0;
+ }
+diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
+index c53f6f276d5c6..b55ddc1156cce 100644
+--- a/drivers/usb/host/xhci-mtk.c
++++ b/drivers/usb/host/xhci-mtk.c
+@@ -570,6 +570,7 @@ static int xhci_mtk_probe(struct platform_device *pdev)
+ }
+
+ device_init_wakeup(dev, true);
++ dma_set_max_seg_size(dev, UINT_MAX);
+
+ xhci = hcd_to_xhci(hcd);
+ xhci->main_hcd = hcd;
+@@ -619,7 +620,6 @@ static int xhci_mtk_probe(struct platform_device *pdev)
+
+ dealloc_usb3_hcd:
+ usb_remove_hcd(xhci->shared_hcd);
+- xhci->shared_hcd = NULL;
+
+ dealloc_usb2_hcd:
+ usb_remove_hcd(hcd);
+diff --git a/drivers/usb/host/xhci-mvebu.c b/drivers/usb/host/xhci-mvebu.c
+index 8ca1a235d1645..eabccf25796b2 100644
+--- a/drivers/usb/host/xhci-mvebu.c
++++ b/drivers/usb/host/xhci-mvebu.c
+@@ -33,7 +33,7 @@ static void xhci_mvebu_mbus_config(void __iomem *base,
+
+ /* Program each DRAM CS in a seperate window */
+ for (win = 0; win < dram->num_cs; win++) {
+- const struct mbus_dram_window *cs = dram->cs + win;
++ const struct mbus_dram_window *cs = &dram->cs[win];
+
+ writel(((cs->size - 1) & 0xffff0000) | (cs->mbus_attr << 8) |
+ (dram->mbus_dram_target_id << 4) | 1,
+diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
+index 2484a9d38ce2b..29a442b621182 100644
+--- a/drivers/usb/host/xhci-pci.c
++++ b/drivers/usb/host/xhci-pci.c
+@@ -13,6 +13,7 @@
+ #include <linux/module.h>
+ #include <linux/acpi.h>
+ #include <linux/reset.h>
++#include <linux/suspend.h>
+
+ #include "xhci.h"
+ #include "xhci-trace.h"
+@@ -58,13 +59,15 @@
+ #define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af
+ #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13
+ #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138
+-#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e
++#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed
++#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI 0x54ed
+
+ #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639
+ #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
+ #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
+ #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb
+ #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc
++
+ #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042
+ #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142
+ #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242
+@@ -76,9 +79,12 @@ static const char hcd_name[] = "xhci_hcd";
+ static struct hc_driver __read_mostly xhci_pci_hc_driver;
+
+ static int xhci_pci_setup(struct usb_hcd *hcd);
++static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
++ struct usb_tt *tt, gfp_t mem_flags);
+
+ static const struct xhci_driver_overrides xhci_pci_overrides __initconst = {
+ .reset = xhci_pci_setup,
++ .update_hub_device = xhci_pci_update_hub_device,
+ };
+
+ /* called after powerup, by probe or system-pm "wakeup" */
+@@ -114,7 +120,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ /* Look for vendor-specific quirks */
+ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+ (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK ||
+- pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 ||
+ pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) {
+ if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK &&
+ pdev->revision == 0x0) {
+@@ -149,6 +154,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
+ xhci->quirks |= XHCI_BROKEN_STREAMS;
+
++ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
++ pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100)
++ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
++
+ if (pdev->vendor == PCI_VENDOR_ID_NEC)
+ xhci->quirks |= XHCI_NEC_HOST;
+
+@@ -187,7 +196,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+
+ if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+ pdev->device == PCI_DEVICE_ID_AMD_RENOIR_XHCI)
+- xhci->quirks |= XHCI_BROKEN_D3COLD;
++ xhci->quirks |= XHCI_BROKEN_D3COLD_S2I;
+
+ if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
+ xhci->quirks |= XHCI_LPM_SUPPORT;
+@@ -242,6 +251,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI))
+ xhci->quirks |= XHCI_MISSING_CAS;
+
++ if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
++ (pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI ||
++ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI))
++ xhci->quirks |= XHCI_RESET_TO_DEFAULT;
++
+ if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+ (pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_XHCI ||
+@@ -253,8 +267,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI ||
+- pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI ||
+- pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI))
++ pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI))
+ xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
+
+ if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
+@@ -281,14 +294,18 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ pdev->device == 0x3432)
+ xhci->quirks |= XHCI_BROKEN_STREAMS;
+
+- if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) {
++ if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483)
+ xhci->quirks |= XHCI_LPM_SUPPORT;
+- xhci->quirks |= XHCI_EP_CTX_BROKEN_DCS;
+- }
+
+ if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
+- pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI)
++ pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) {
++ /*
++ * try to tame the ASMedia 1042 controller which reports 0.96
++ * but appears to behave more like 1.0
++ */
++ xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
+ xhci->quirks |= XHCI_BROKEN_STREAMS;
++ }
+ if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
+ pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) {
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+@@ -317,6 +334,22 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4))
+ xhci->quirks |= XHCI_NO_SOFT_RETRY;
+
++ if (pdev->vendor == PCI_VENDOR_ID_ZHAOXIN) {
++ xhci->quirks |= XHCI_ZHAOXIN_HOST;
++
++ if (pdev->device == 0x9202) {
++ xhci->quirks |= XHCI_RESET_ON_RESUME;
++ xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH;
++ }
++
++ if (pdev->device == 0x9203)
++ xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH;
++ }
++
++ /* xHC spec requires PCI devices to support D3hot and D3cold */
++ if (xhci->hci_version >= 0x120)
++ xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
++
+ if (xhci->quirks & XHCI_RESET_ON_RESUME)
+ xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
+ "QUIRK: Resetting on resume");
+@@ -334,8 +367,38 @@ static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
+ NULL);
+ ACPI_FREE(obj);
+ }
++
++static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev)
++{
++ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
++ struct xhci_hub *rhub = &xhci->usb3_rhub;
++ int ret;
++ int i;
++
++ /* This is not the usb3 roothub we are looking for */
++ if (hcd != rhub->hcd)
++ return;
++
++ if (hdev->maxchild > rhub->num_ports) {
++ dev_err(&hdev->dev, "USB3 roothub port number mismatch\n");
++ return;
++ }
++
++ for (i = 0; i < hdev->maxchild; i++) {
++ ret = usb_acpi_port_lpm_incapable(hdev, i);
++
++ dev_dbg(&hdev->dev, "port-%d disable U1/U2 _DSM: %d\n", i + 1, ret);
++
++ if (ret >= 0) {
++ rhub->ports[i]->lpm_incapable = ret;
++ continue;
++ }
++ }
++}
++
+ #else
+ static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { }
++static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev) { }
+ #endif /* CONFIG_ACPI */
+
+ /* called during probe() after chip reset completes */
+@@ -368,6 +431,16 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
+ return xhci_pci_reinit(xhci, pdev);
+ }
+
++static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
++ struct usb_tt *tt, gfp_t mem_flags)
++{
++ /* Check if acpi claims some USB3 roothub ports are lpm incapable */
++ if (!hdev->parent)
++ xhci_find_lpm_incapable_ports(hcd, hdev);
++
++ return xhci_update_hub_device(hcd, hdev, tt, mem_flags);
++}
++
+ /*
+ * We need to register our own PCI probe function (instead of the USB core's
+ * function) in order to create a second roothub under xHCI.
+@@ -437,6 +510,8 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
+ pm_runtime_allow(&dev->dev);
+
++ dma_set_max_seg_size(&dev->dev, UINT_MAX);
++
+ return 0;
+
+ put_usb3_hcd:
+@@ -546,8 +621,15 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
+ * Systems with the TI redriver that loses port status change events
+ * need to have the registers polled during D3, so avoid D3cold.
+ */
+- if (xhci->quirks & (XHCI_COMP_MODE_QUIRK | XHCI_BROKEN_D3COLD))
++ if (xhci->quirks & XHCI_COMP_MODE_QUIRK)
++ pci_d3cold_disable(pdev);
++
++#ifdef CONFIG_SUSPEND
++ /* d3cold is broken, but only when s2idle is used */
++ if (pm_suspend_target_state == PM_SUSPEND_TO_IDLE &&
++ xhci->quirks & (XHCI_BROKEN_D3COLD_S2I))
+ pci_d3cold_disable(pdev);
++#endif
+
+ if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
+ xhci_pme_quirk(hcd);
+diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
+index c1edcc9b13cec..972a44b2a7f12 100644
+--- a/drivers/usb/host/xhci-plat.c
++++ b/drivers/usb/host/xhci-plat.c
+@@ -134,7 +134,7 @@ static const struct xhci_plat_priv xhci_plat_renesas_rcar_gen3 = {
+ };
+
+ static const struct xhci_plat_priv xhci_plat_brcm = {
+- .quirks = XHCI_RESET_ON_RESUME,
++ .quirks = XHCI_RESET_ON_RESUME | XHCI_SUSPEND_RESUME_CLKS,
+ };
+
+ static const struct of_device_id usb_xhci_of_match[] = {
+@@ -437,6 +437,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
+ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ int ret;
+
++ if (pm_runtime_suspended(dev))
++ pm_runtime_resume(dev);
++
+ ret = xhci_priv_suspend_quirk(hcd);
+ if (ret)
+ return ret;
+@@ -444,7 +447,16 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
+ * xhci_suspend() needs `do_wakeup` to know whether host is allowed
+ * to do wakeup during suspend.
+ */
+- return xhci_suspend(xhci, device_may_wakeup(dev));
++ ret = xhci_suspend(xhci, device_may_wakeup(dev));
++ if (ret)
++ return ret;
++
++ if (!device_may_wakeup(dev) && (xhci->quirks & XHCI_SUSPEND_RESUME_CLKS)) {
++ clk_disable_unprepare(xhci->clk);
++ clk_disable_unprepare(xhci->reg_clk);
++ }
++
++ return 0;
+ }
+
+ static int __maybe_unused xhci_plat_resume(struct device *dev)
+@@ -453,6 +465,11 @@ static int __maybe_unused xhci_plat_resume(struct device *dev)
+ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ int ret;
+
++ if (!device_may_wakeup(dev) && (xhci->quirks & XHCI_SUSPEND_RESUME_CLKS)) {
++ clk_prepare_enable(xhci->clk);
++ clk_prepare_enable(xhci->reg_clk);
++ }
++
+ ret = xhci_priv_resume_quirk(hcd);
+ if (ret)
+ return ret;
+diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c
+index 9888ba7d85b6a..cfafa1c50adea 100644
+--- a/drivers/usb/host/xhci-rcar.c
++++ b/drivers/usb/host/xhci-rcar.c
+@@ -75,7 +75,6 @@ MODULE_FIRMWARE(XHCI_RCAR_FIRMWARE_NAME_V3);
+
+ /* For soc_device_attribute */
+ #define RCAR_XHCI_FIRMWARE_V2 BIT(0) /* FIRMWARE V2 */
+-#define RCAR_XHCI_FIRMWARE_V3 BIT(1) /* FIRMWARE V3 */
+
+ static const struct soc_device_attribute rcar_quirks_match[] = {
+ {
+@@ -147,8 +146,6 @@ static int xhci_rcar_download_firmware(struct usb_hcd *hcd)
+
+ if (quirks & RCAR_XHCI_FIRMWARE_V2)
+ firmware_name = XHCI_RCAR_FIRMWARE_NAME_V2;
+- else if (quirks & RCAR_XHCI_FIRMWARE_V3)
+- firmware_name = XHCI_RCAR_FIRMWARE_NAME_V3;
+ else
+ firmware_name = priv->firmware_name;
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 311597bba80e2..15e44045230e1 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -276,6 +276,26 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
+ trace_xhci_inc_enq(ring);
+ }
+
++static int xhci_num_trbs_to(struct xhci_segment *start_seg, union xhci_trb *start,
++ struct xhci_segment *end_seg, union xhci_trb *end,
++ unsigned int num_segs)
++{
++ union xhci_trb *last_on_seg;
++ int num = 0;
++ int i = 0;
++
++ do {
++ if (start_seg == end_seg && end >= start)
++ return num + (end - start);
++ last_on_seg = &start_seg->trbs[TRBS_PER_SEGMENT - 1];
++ num += last_on_seg - start;
++ start_seg = start_seg->next;
++ start = start_seg->trbs;
++ } while (i++ <= num_segs);
++
++ return -EINVAL;
++}
++
+ /*
+ * Check to see if there's room to enqueue num_trbs on the ring and make sure
+ * enqueue pointer will not advance into dequeue segment. See rules above.
+@@ -366,7 +386,9 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
+ /* Must be called with xhci->lock held, releases and aquires lock back */
+ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
+ {
+- u32 temp_32;
++ struct xhci_segment *new_seg = xhci->cmd_ring->deq_seg;
++ union xhci_trb *new_deq = xhci->cmd_ring->dequeue;
++ u64 crcr;
+ int ret;
+
+ xhci_dbg(xhci, "Abort command ring\n");
+@@ -375,13 +397,18 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
+
+ /*
+ * The control bits like command stop, abort are located in lower
+- * dword of the command ring control register. Limit the write
+- * to the lower dword to avoid corrupting the command ring pointer
+- * in case if the command ring is stopped by the time upper dword
+- * is written.
++ * dword of the command ring control register.
++ * Some controllers require all 64 bits to be written to abort the ring.
++ * Make sure the upper dword is valid, pointing to the next command,
++ * avoiding corrupting the command ring pointer in case the command ring
++ * is stopped by the time the upper dword is written.
+ */
+- temp_32 = readl(&xhci->op_regs->cmd_ring);
+- writel(temp_32 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring);
++ next_trb(xhci, NULL, &new_seg, &new_deq);
++ if (trb_is_link(new_deq))
++ next_trb(xhci, NULL, &new_seg, &new_deq);
++
++ crcr = xhci_trb_virt_to_dma(new_seg, new_deq);
++ xhci_write_64(xhci, crcr | CMD_RING_ABORT, &xhci->op_regs->cmd_ring);
+
+ /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the
+ * completion of the Command Abort operation. If CRR is not negated in 5
+@@ -565,11 +592,8 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci,
+ struct xhci_ring *ep_ring;
+ struct xhci_command *cmd;
+ struct xhci_segment *new_seg;
+- struct xhci_segment *halted_seg = NULL;
+ union xhci_trb *new_deq;
+ int new_cycle;
+- union xhci_trb *halted_trb;
+- int index = 0;
+ dma_addr_t addr;
+ u64 hw_dequeue;
+ bool cycle_found = false;
+@@ -607,27 +631,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci,
+ hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id);
+ new_seg = ep_ring->deq_seg;
+ new_deq = ep_ring->dequeue;
+-
+- /*
+- * Quirk: xHC write-back of the DCS field in the hardware dequeue
+- * pointer is wrong - use the cycle state of the TRB pointed to by
+- * the dequeue pointer.
+- */
+- if (xhci->quirks & XHCI_EP_CTX_BROKEN_DCS &&
+- !(ep->ep_state & EP_HAS_STREAMS))
+- halted_seg = trb_in_td(xhci, td->start_seg,
+- td->first_trb, td->last_trb,
+- hw_dequeue & ~0xf, false);
+- if (halted_seg) {
+- index = ((dma_addr_t)(hw_dequeue & ~0xf) - halted_seg->dma) /
+- sizeof(*halted_trb);
+- halted_trb = &halted_seg->trbs[index];
+- new_cycle = halted_trb->generic.field[3] & 0x1;
+- xhci_dbg(xhci, "Endpoint DCS = %d TRB index = %d cycle = %d\n",
+- (u8)(hw_dequeue & 0x1), index, new_cycle);
+- } else {
+- new_cycle = hw_dequeue & 0x1;
+- }
++ new_cycle = hw_dequeue & 0x1;
+
+ /*
+ * We want to find the pointer, segment and cycle state of the new trb
+@@ -1171,7 +1175,10 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci,
+ struct xhci_virt_ep *ep;
+ struct xhci_ring *ring;
+
+- ep = &xhci->devs[slot_id]->eps[ep_index];
++ ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
++ if (!ep)
++ return;
++
+ if ((ep->ep_state & EP_HAS_STREAMS) ||
+ (ep->ep_state & EP_GETTING_NO_STREAMS)) {
+ int stream_id;
+@@ -1518,7 +1525,6 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id)
+ if (xhci->quirks & XHCI_EP_LIMIT_QUIRK)
+ /* Delete default control endpoint resources */
+ xhci_free_device_endpoint_resources(xhci, virt_dev, true);
+- xhci_free_virt_device(xhci, slot_id);
+ }
+
+ static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id,
+@@ -2198,6 +2204,7 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ u32 trb_comp_code)
+ {
+ struct xhci_ep_ctx *ep_ctx;
++ int trbs_freed;
+
+ ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);
+
+@@ -2269,9 +2276,15 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ }
+
+ /* Update ring dequeue pointer */
++ trbs_freed = xhci_num_trbs_to(ep_ring->deq_seg, ep_ring->dequeue,
++ td->last_trb_seg, td->last_trb,
++ ep_ring->num_segs);
++ if (trbs_freed < 0)
++ xhci_dbg(xhci, "Failed to count freed trbs at TD finish\n");
++ else
++ ep_ring->num_trbs_free += trbs_freed;
+ ep_ring->dequeue = td->last_trb;
+ ep_ring->deq_seg = td->last_trb_seg;
+- ep_ring->num_trbs_free += td->num_trbs - 1;
+ inc_deq(xhci, ep_ring);
+
+ return xhci_td_cleanup(xhci, td, ep_ring, td->status);
+@@ -2518,7 +2531,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+
+ switch (trb_comp_code) {
+ case COMP_SUCCESS:
+- ep_ring->err_count = 0;
++ ep->err_count = 0;
+ /* handle success with untransferred data as short packet */
+ if (ep_trb != td->last_trb || remaining) {
+ xhci_warn(xhci, "WARN Successful completion on short TX\n");
+@@ -2544,7 +2557,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
+ break;
+ case COMP_USB_TRANSACTION_ERROR:
+ if (xhci->quirks & XHCI_NO_SOFT_RETRY ||
+- (ep_ring->err_count++ > MAX_SOFT_RETRY) ||
++ (ep->err_count++ > MAX_SOFT_RETRY) ||
+ le32_to_cpu(slot_ctx->tt_info) & TT_SLOT)
+ break;
+
+@@ -2625,8 +2638,14 @@ static int handle_tx_event(struct xhci_hcd *xhci,
+ case COMP_USB_TRANSACTION_ERROR:
+ case COMP_INVALID_STREAM_TYPE_ERROR:
+ case COMP_INVALID_STREAM_ID_ERROR:
+- xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
+- EP_SOFT_RESET);
++ xhci_dbg(xhci, "Stream transaction error ep %u no id\n",
++ ep_index);
++ if (ep->err_count++ > MAX_SOFT_RETRY)
++ xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
++ EP_HARD_RESET);
++ else
++ xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
++ EP_SOFT_RESET);
+ goto cleanup;
+ case COMP_RING_UNDERRUN:
+ case COMP_RING_OVERRUN:
+@@ -3135,6 +3154,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
+ if (event_loop++ < TRBS_PER_SEGMENT / 2)
+ continue;
+ xhci_update_erst_dequeue(xhci, event_ring_deq);
++ event_ring_deq = xhci->event_ring->dequeue;
+
+ /* ring is half-full, force isoc trbs to interrupt more often */
+ if (xhci->isoc_bei_interval > AVOID_BEI_INTERVAL_MIN)
+diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
+index 1bf494b649bd2..51eabc5e87701 100644
+--- a/drivers/usb/host/xhci-tegra.c
++++ b/drivers/usb/host/xhci-tegra.c
+@@ -1034,13 +1034,13 @@ static int tegra_xusb_unpowergate_partitions(struct tegra_xusb *tegra)
+ int rc;
+
+ if (tegra->use_genpd) {
+- rc = pm_runtime_get_sync(tegra->genpd_dev_ss);
++ rc = pm_runtime_resume_and_get(tegra->genpd_dev_ss);
+ if (rc < 0) {
+ dev_err(dev, "failed to enable XUSB SS partition\n");
+ return rc;
+ }
+
+- rc = pm_runtime_get_sync(tegra->genpd_dev_host);
++ rc = pm_runtime_resume_and_get(tegra->genpd_dev_host);
+ if (rc < 0) {
+ dev_err(dev, "failed to enable XUSB Host partition\n");
+ pm_runtime_put_sync(tegra->genpd_dev_ss);
+@@ -1225,6 +1225,9 @@ static void tegra_xhci_id_work(struct work_struct *work)
+
+ mutex_unlock(&tegra->lock);
+
++ tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl,
++ tegra->otg_usb2_port);
++
+ if (tegra->host_mode) {
+ /* switch to host mode */
+ if (tegra->otg_usb3_port >= 0) {
+@@ -1339,9 +1342,6 @@ static int tegra_xhci_id_notify(struct notifier_block *nb,
+ }
+
+ tegra->otg_usb2_port = tegra_xusb_get_usb2_port(tegra, usbphy);
+- tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(
+- tegra->padctl,
+- tegra->otg_usb2_port);
+
+ tegra->host_mode = (usbphy->last_event == USB_EVENT_ID) ? true : false;
+
+@@ -1400,6 +1400,7 @@ static void tegra_xusb_deinit_usb_phy(struct tegra_xusb *tegra)
+
+ static int tegra_xusb_probe(struct platform_device *pdev)
+ {
++ struct of_phandle_args args;
+ struct tegra_xusb *tegra;
+ struct device_node *np;
+ struct resource *regs;
+@@ -1454,10 +1455,17 @@ static int tegra_xusb_probe(struct platform_device *pdev)
+ goto put_padctl;
+ }
+
+- tegra->padctl_irq = of_irq_get(np, 0);
+- if (tegra->padctl_irq <= 0) {
+- err = (tegra->padctl_irq == 0) ? -ENODEV : tegra->padctl_irq;
+- goto put_padctl;
++ /* Older device-trees don't have padctrl interrupt */
++ err = of_irq_parse_one(np, 0, &args);
++ if (!err) {
++ tegra->padctl_irq = of_irq_get(np, 0);
++ if (tegra->padctl_irq <= 0) {
++ err = (tegra->padctl_irq == 0) ? -ENODEV : tegra->padctl_irq;
++ goto put_padctl;
++ }
++ } else {
++ dev_dbg(&pdev->dev,
++ "%pOF is missing an interrupt, disabling PM support\n", np);
+ }
+
+ tegra->host_clk = devm_clk_get(&pdev->dev, "xusb_host");
+@@ -1696,11 +1704,15 @@ static int tegra_xusb_probe(struct platform_device *pdev)
+ goto remove_usb3;
+ }
+
+- err = devm_request_threaded_irq(&pdev->dev, tegra->padctl_irq, NULL, tegra_xusb_padctl_irq,
+- IRQF_ONESHOT, dev_name(&pdev->dev), tegra);
+- if (err < 0) {
+- dev_err(&pdev->dev, "failed to request padctl IRQ: %d\n", err);
+- goto remove_usb3;
++ if (tegra->padctl_irq) {
++ err = devm_request_threaded_irq(&pdev->dev, tegra->padctl_irq,
++ NULL, tegra_xusb_padctl_irq,
++ IRQF_ONESHOT, dev_name(&pdev->dev),
++ tegra);
++ if (err < 0) {
++ dev_err(&pdev->dev, "failed to request padctl IRQ: %d\n", err);
++ goto remove_usb3;
++ }
+ }
+
+ err = tegra_xusb_enable_firmware_messages(tegra);
+@@ -1718,13 +1730,16 @@ static int tegra_xusb_probe(struct platform_device *pdev)
+ /* Enable wake for both USB 2.0 and USB 3.0 roothubs */
+ device_init_wakeup(&tegra->hcd->self.root_hub->dev, true);
+ device_init_wakeup(&xhci->shared_hcd->self.root_hub->dev, true);
+- device_init_wakeup(tegra->dev, true);
+
+ pm_runtime_use_autosuspend(tegra->dev);
+ pm_runtime_set_autosuspend_delay(tegra->dev, 2000);
+ pm_runtime_mark_last_busy(tegra->dev);
+ pm_runtime_set_active(tegra->dev);
+- pm_runtime_enable(tegra->dev);
++
++ if (tegra->padctl_irq) {
++ device_init_wakeup(tegra->dev, true);
++ pm_runtime_enable(tegra->dev);
++ }
+
+ return 0;
+
+@@ -1772,7 +1787,9 @@ static int tegra_xusb_remove(struct platform_device *pdev)
+ dma_free_coherent(&pdev->dev, tegra->fw.size, tegra->fw.virt,
+ tegra->fw.phys);
+
+- pm_runtime_disable(&pdev->dev);
++ if (tegra->padctl_irq)
++ pm_runtime_disable(&pdev->dev);
++
+ pm_runtime_put(&pdev->dev);
+
+ tegra_xusb_powergate_partitions(tegra);
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index 541fe4dcc43a2..1fd2f6a850ebc 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -9,6 +9,7 @@
+ */
+
+ #include <linux/pci.h>
++#include <linux/iommu.h>
+ #include <linux/iopoll.h>
+ #include <linux/irq.h>
+ #include <linux/log2.h>
+@@ -65,7 +66,7 @@ static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring)
+ * handshake done). There are two failure modes: "usec" have passed (major
+ * hardware flakeout), or the register reads as all-ones (hardware removed).
+ */
+-int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec)
++int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us)
+ {
+ u32 result;
+ int ret;
+@@ -73,7 +74,7 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec)
+ ret = readl_poll_timeout_atomic(ptr, result,
+ (result & mask) == done ||
+ result == U32_MAX,
+- 1, usec);
++ 1, timeout_us);
+ if (result == U32_MAX) /* card removed */
+ return -ENODEV;
+
+@@ -148,9 +149,11 @@ int xhci_start(struct xhci_hcd *xhci)
+ xhci_err(xhci, "Host took too long to start, "
+ "waited %u microseconds.\n",
+ XHCI_MAX_HALT_USEC);
+- if (!ret)
++ if (!ret) {
+ /* clear state flags. Including dying, halted or removing */
+ xhci->xhc_state = 0;
++ xhci->run_graceperiod = jiffies + msecs_to_jiffies(500);
++ }
+
+ return ret;
+ }
+@@ -162,7 +165,7 @@ int xhci_start(struct xhci_hcd *xhci)
+ * Transactions will be terminated immediately, and operational registers
+ * will be set to their defaults.
+ */
+-int xhci_reset(struct xhci_hcd *xhci)
++int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us)
+ {
+ u32 command;
+ u32 state;
+@@ -195,8 +198,7 @@ int xhci_reset(struct xhci_hcd *xhci)
+ if (xhci->quirks & XHCI_INTEL_HOST)
+ udelay(1000);
+
+- ret = xhci_handshake(&xhci->op_regs->command,
+- CMD_RESET, 0, 10 * 1000 * 1000);
++ ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us);
+ if (ret)
+ return ret;
+
+@@ -209,8 +211,7 @@ int xhci_reset(struct xhci_hcd *xhci)
+ * xHCI cannot write to any doorbells or operational registers other
+ * than status until the "Controller Not Ready" flag is cleared.
+ */
+- ret = xhci_handshake(&xhci->op_regs->status,
+- STS_CNR, 0, 10 * 1000 * 1000);
++ ret = xhci_handshake(&xhci->op_regs->status, STS_CNR, 0, timeout_us);
+
+ xhci->usb2_rhub.bus_state.port_c_suspend = 0;
+ xhci->usb2_rhub.bus_state.suspended_ports = 0;
+@@ -225,6 +226,7 @@ int xhci_reset(struct xhci_hcd *xhci)
+ static void xhci_zero_64b_regs(struct xhci_hcd *xhci)
+ {
+ struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
++ struct iommu_domain *domain;
+ int err, i;
+ u64 val;
+ u32 intrs;
+@@ -243,7 +245,9 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci)
+ * an iommu. Doing anything when there is no iommu is definitely
+ * unsafe...
+ */
+- if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !device_iommu_mapped(dev))
++ domain = iommu_get_domain_for_dev(dev);
++ if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !domain ||
++ domain->type == IOMMU_DOMAIN_IDENTITY)
+ return;
+
+ xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n");
+@@ -692,11 +696,12 @@ int xhci_run(struct usb_hcd *hcd)
+ if (ret)
+ xhci_free_command(xhci, command);
+ }
+- set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags);
+ xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ "Finished xhci_run for USB2 roothub");
+
+- xhci_dbc_init(xhci);
++ set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags);
++
++ xhci_create_dbc_dev(xhci);
+
+ xhci_debugfs_init(xhci);
+
+@@ -726,13 +731,13 @@ static void xhci_stop(struct usb_hcd *hcd)
+ return;
+ }
+
+- xhci_dbc_exit(xhci);
++ xhci_remove_dbc_dev(xhci);
+
+ spin_lock_irq(&xhci->lock);
+ xhci->xhc_state |= XHCI_STATE_HALTED;
+ xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
+ xhci_halt(xhci);
+- xhci_reset(xhci);
++ xhci_reset(xhci, XHCI_RESET_SHORT_USEC);
+ spin_unlock_irq(&xhci->lock);
+
+ xhci_cleanup_msix(xhci);
+@@ -781,11 +786,28 @@ void xhci_shutdown(struct usb_hcd *hcd)
+ if (xhci->quirks & XHCI_SPURIOUS_REBOOT)
+ usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev));
+
++ /* Don't poll the roothubs after shutdown. */
++ xhci_dbg(xhci, "%s: stopping usb%d port polling.\n",
++ __func__, hcd->self.busnum);
++ clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
++ del_timer_sync(&hcd->rh_timer);
++
++ if (xhci->shared_hcd) {
++ clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
++ del_timer_sync(&xhci->shared_hcd->rh_timer);
++ }
++
+ spin_lock_irq(&xhci->lock);
+ xhci_halt(xhci);
+- /* Workaround for spurious wakeups at shutdown with HSW */
+- if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
+- xhci_reset(xhci);
++
++ /*
++ * Workaround for spurious wakeps at shutdown with HSW, and for boot
++ * firmware delay in ADL-P PCH if port are left in U3 at shutdown
++ */
++ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP ||
++ xhci->quirks & XHCI_RESET_TO_DEFAULT)
++ xhci_reset(xhci, XHCI_RESET_SHORT_USEC);
++
+ spin_unlock_irq(&xhci->lock);
+
+ xhci_cleanup_msix(xhci);
+@@ -1092,6 +1114,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+ int retval = 0;
+ bool comp_timer_running = false;
+ bool pending_portevent = false;
++ bool reinit_xhc = false;
+
+ if (!hcd->state)
+ return 0;
+@@ -1108,10 +1131,11 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+ set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags);
+
+ spin_lock_irq(&xhci->lock);
+- if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend)
+- hibernated = true;
+
+- if (!hibernated) {
++ if (hibernated || xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend)
++ reinit_xhc = true;
++
++ if (!reinit_xhc) {
+ /*
+ * Some controllers might lose power during suspend, so wait
+ * for controller not ready bit to clear, just as in xHC init.
+@@ -1144,12 +1168,18 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+ spin_unlock_irq(&xhci->lock);
+ return -ETIMEDOUT;
+ }
+- temp = readl(&xhci->op_regs->status);
+ }
+
+- /* If restore operation fails, re-initialize the HC during resume */
+- if ((temp & STS_SRE) || hibernated) {
++ temp = readl(&xhci->op_regs->status);
++
++ /* re-initialize the HC on Restore Error, or Host Controller Error */
++ if (temp & (STS_SRE | STS_HCE)) {
++ reinit_xhc = true;
++ if (!xhci->broken_suspend)
++ xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp);
++ }
+
++ if (reinit_xhc) {
+ if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) &&
+ !(xhci_all_ports_seen_u0(xhci))) {
+ del_timer_sync(&xhci->comp_mode_recovery_timer);
+@@ -1164,7 +1194,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+ xhci_dbg(xhci, "Stop HCD\n");
+ xhci_halt(xhci);
+ xhci_zero_64b_regs(xhci);
+- retval = xhci_reset(xhci);
++ retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC);
+ spin_unlock_irq(&xhci->lock);
+ if (retval)
+ return retval;
+@@ -1605,9 +1635,12 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
+ struct urb_priv *urb_priv;
+ int num_tds;
+
+- if (!urb || xhci_check_args(hcd, urb->dev, urb->ep,
+- true, true, __func__) <= 0)
++ if (!urb)
+ return -EINVAL;
++ ret = xhci_check_args(hcd, urb->dev, urb->ep,
++ true, true, __func__);
++ if (ret <= 0)
++ return ret ? ret : -EINVAL;
+
+ slot_id = urb->dev->slot_id;
+ ep_index = xhci_get_endpoint_index(&urb->ep->desc);
+@@ -3324,7 +3357,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci,
+ return -EINVAL;
+ ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, true, __func__);
+ if (ret <= 0)
+- return -EINVAL;
++ return ret ? ret : -EINVAL;
+ if (usb_ss_max_streams(&ep->ss_ep_comp) == 0) {
+ xhci_warn(xhci, "WARN: SuperSpeed Endpoint Companion"
+ " descriptor for ep 0x%x does not support streams\n",
+@@ -3933,9 +3966,9 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
+ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ struct xhci_virt_device *virt_dev;
+ struct xhci_slot_ctx *slot_ctx;
++ unsigned long flags;
+ int i, ret;
+
+-#ifndef CONFIG_USB_DEFAULT_PERSIST
+ /*
+ * We called pm_runtime_get_noresume when the device was attached.
+ * Decrement the counter here to allow controller to runtime suspend
+@@ -3943,7 +3976,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
+ */
+ if (xhci->quirks & XHCI_RESET_ON_RESUME)
+ pm_runtime_put_noidle(hcd->self.controller);
+-#endif
+
+ ret = xhci_check_args(hcd, udev, NULL, 0, true, __func__);
+ /* If the host is halted due to driver unload, we still need to free the
+@@ -3962,9 +3994,12 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
+ del_timer_sync(&virt_dev->eps[i].stop_cmd_timer);
+ }
+ virt_dev->udev = NULL;
+- ret = xhci_disable_slot(xhci, udev->slot_id);
+- if (ret)
+- xhci_free_virt_device(xhci, udev->slot_id);
++ xhci_disable_slot(xhci, udev->slot_id);
++
++ spin_lock_irqsave(&xhci->lock, flags);
++ xhci_free_virt_device(xhci, udev->slot_id);
++ spin_unlock_irqrestore(&xhci->lock, flags);
++
+ }
+
+ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
+@@ -3974,7 +4009,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
+ u32 state;
+ int ret = 0;
+
+- command = xhci_alloc_command(xhci, false, GFP_KERNEL);
++ command = xhci_alloc_command(xhci, true, GFP_KERNEL);
+ if (!command)
+ return -ENOMEM;
+
+@@ -3999,6 +4034,15 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
+ }
+ xhci_ring_cmd_db(xhci);
+ spin_unlock_irqrestore(&xhci->lock, flags);
++
++ wait_for_completion(command->completion);
++
++ if (command->status != COMP_SUCCESS)
++ xhci_warn(xhci, "Unsuccessful disable slot %u command, status %d\n",
++ slot_id, command->status);
++
++ xhci_free_command(xhci, command);
++
+ return ret;
+ }
+
+@@ -4095,23 +4139,20 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
+
+ xhci_debugfs_create_slot(xhci, slot_id);
+
+-#ifndef CONFIG_USB_DEFAULT_PERSIST
+ /*
+ * If resetting upon resume, we can't put the controller into runtime
+ * suspend if there is a device attached.
+ */
+ if (xhci->quirks & XHCI_RESET_ON_RESUME)
+ pm_runtime_get_noresume(hcd->self.controller);
+-#endif
+
+ /* Is this a LS or FS device under a HS hub? */
+ /* Hub or peripherial? */
+ return 1;
+
+ disable_slot:
+- ret = xhci_disable_slot(xhci, udev->slot_id);
+- if (ret)
+- xhci_free_virt_device(xhci, udev->slot_id);
++ xhci_disable_slot(xhci, udev->slot_id);
++ xhci_free_virt_device(xhci, udev->slot_id);
+
+ return 0;
+ }
+@@ -4241,6 +4282,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
+
+ mutex_unlock(&xhci->mutex);
+ ret = xhci_disable_slot(xhci, udev->slot_id);
++ xhci_free_virt_device(xhci, udev->slot_id);
+ if (!ret)
+ xhci_alloc_dev(hcd, udev);
+ kfree(command->completion);
+@@ -5013,6 +5055,7 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd,
+ struct usb_device *udev, enum usb3_link_state state)
+ {
+ struct xhci_hcd *xhci;
++ struct xhci_port *port;
+ u16 hub_encoded_timeout;
+ int mel;
+ int ret;
+@@ -5026,6 +5069,13 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd,
+ !xhci->devs[udev->slot_id])
+ return USB3_LPM_DISABLED;
+
++ /* If connected to root port then check port can handle lpm */
++ if (udev->parent && !udev->parent->parent) {
++ port = xhci->usb3_rhub.ports[udev->portnum - 1];
++ if (port->lpm_incapable)
++ return USB3_LPM_DISABLED;
++ }
++
+ hub_encoded_timeout = xhci_calculate_lpm_timeout(hcd, udev, state);
+ mel = calculate_max_exit_latency(udev, state, hub_encoded_timeout);
+ if (mel < 0) {
+@@ -5085,7 +5135,7 @@ static int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd,
+ /* Once a hub descriptor is fetched for a device, we need to update the xHC's
+ * internal data structures for the device.
+ */
+-static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
++int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+ struct usb_tt *tt, gfp_t mem_flags)
+ {
+ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+@@ -5185,6 +5235,7 @@ static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
+ xhci_free_command(xhci, config_cmd);
+ return ret;
+ }
++EXPORT_SYMBOL_GPL(xhci_update_hub_device);
+
+ static int xhci_get_frame(struct usb_hcd *hcd)
+ {
+@@ -5305,7 +5356,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
+
+ xhci_dbg(xhci, "Resetting HCD\n");
+ /* Reset the internal HC memory state and registers. */
+- retval = xhci_reset(xhci);
++ retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC);
+ if (retval)
+ return retval;
+ xhci_dbg(xhci, "Reset complete\n");
+@@ -5462,6 +5513,8 @@ void xhci_init_driver(struct hc_driver *drv,
+ drv->check_bandwidth = over->check_bandwidth;
+ if (over->reset_bandwidth)
+ drv->reset_bandwidth = over->reset_bandwidth;
++ if (over->update_hub_device)
++ drv->update_hub_device = over->update_hub_device;
+ }
+ }
+ EXPORT_SYMBOL_GPL(xhci_init_driver);
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 5a75fe5631238..64278cd77f988 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -229,6 +229,9 @@ struct xhci_op_regs {
+ #define CMD_ETE (1 << 14)
+ /* bits 15:31 are reserved (and should be preserved on writes). */
+
++#define XHCI_RESET_LONG_USEC (10 * 1000 * 1000)
++#define XHCI_RESET_SHORT_USEC (250 * 1000)
++
+ /* IMAN - Interrupt Management Register */
+ #define IMAN_IE (1 << 1)
+ #define IMAN_IP (1 << 0)
+@@ -930,6 +933,7 @@ struct xhci_virt_ep {
+ * have to restore the device state to the previous state
+ */
+ struct xhci_ring *new_ring;
++ unsigned int err_count;
+ unsigned int ep_state;
+ #define SET_DEQ_PENDING (1 << 0)
+ #define EP_HALTED (1 << 1) /* For stall handling */
+@@ -1626,7 +1630,6 @@ struct xhci_ring {
+ * if we own the TRB (if we are the consumer). See section 4.9.1.
+ */
+ u32 cycle_state;
+- unsigned int err_count;
+ unsigned int stream_id;
+ unsigned int num_segs;
+ unsigned int num_trbs_free;
+@@ -1734,6 +1737,7 @@ struct xhci_port {
+ int hcd_portnum;
+ struct xhci_hub *rhub;
+ struct xhci_port_cap *port_cap;
++ unsigned int lpm_incapable:1;
+ };
+
+ struct xhci_hub {
+@@ -1827,7 +1831,7 @@ struct xhci_hcd {
+
+ /* Host controller watchdog timer structures */
+ unsigned int xhc_state;
+-
++ unsigned long run_graceperiod;
+ u32 command;
+ struct s3_save s3;
+ /* Host controller is dying - not responding to commands. "I'm not dead yet!"
+@@ -1898,8 +1902,12 @@ struct xhci_hcd {
+ #define XHCI_DISABLE_SPARSE BIT_ULL(38)
+ #define XHCI_SG_TRB_CACHE_SIZE_QUIRK BIT_ULL(39)
+ #define XHCI_NO_SOFT_RETRY BIT_ULL(40)
+-#define XHCI_BROKEN_D3COLD BIT_ULL(41)
++#define XHCI_BROKEN_D3COLD_S2I BIT_ULL(41)
+ #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42)
++#define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43)
++#define XHCI_RESET_TO_DEFAULT BIT_ULL(44)
++#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45)
++#define XHCI_ZHAOXIN_HOST BIT_ULL(46)
+
+ unsigned int num_active_eps;
+ unsigned int limit_active_eps;
+@@ -1943,6 +1951,8 @@ struct xhci_driver_overrides {
+ struct usb_host_endpoint *ep);
+ int (*check_bandwidth)(struct usb_hcd *, struct usb_device *);
+ void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
++ int (*update_hub_device)(struct usb_hcd *hcd, struct usb_device *hdev,
++ struct usb_tt *tt, gfp_t mem_flags);
+ };
+
+ #define XHCI_CFC_DELAY 10
+@@ -2083,11 +2093,11 @@ void xhci_free_container_ctx(struct xhci_hcd *xhci,
+
+ /* xHCI host controller glue */
+ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *);
+-int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec);
++int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us);
+ void xhci_quiesce(struct xhci_hcd *xhci);
+ int xhci_halt(struct xhci_hcd *xhci);
+ int xhci_start(struct xhci_hcd *xhci);
+-int xhci_reset(struct xhci_hcd *xhci);
++int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us);
+ int xhci_run(struct usb_hcd *hcd);
+ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks);
+ void xhci_shutdown(struct usb_hcd *hcd);
+@@ -2099,6 +2109,8 @@ int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev,
+ struct usb_host_endpoint *ep);
+ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev);
+ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev);
++int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
++ struct usb_tt *tt, gfp_t mem_flags);
+ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id);
+ int xhci_ext_cap_init(struct xhci_hcd *xhci);
+
+@@ -2390,7 +2402,7 @@ static inline const char *xhci_decode_trb(char *str, size_t size,
+ field3 & TRB_CYCLE ? 'C' : 'c');
+ break;
+ case TRB_STOP_RING:
+- sprintf(str,
++ snprintf(str, size,
+ "%s: slot %d sp %d ep %d flags %c",
+ xhci_trb_type_string(type),
+ TRB_TO_SLOT_ID(field3),
+@@ -2467,6 +2479,8 @@ static inline const char *xhci_decode_ctrl_ctx(char *str,
+ unsigned int bit;
+ int ret = 0;
+
++ str[0] = '\0';
++
+ if (drop) {
+ ret = sprintf(str, "Drop:");
+ for_each_set_bit(bit, &drop, 32)
+@@ -2624,8 +2638,11 @@ static inline const char *xhci_decode_usbsts(char *str, u32 usbsts)
+ {
+ int ret = 0;
+
++ ret = sprintf(str, " 0x%08x", usbsts);
++
+ if (usbsts == ~(u32)0)
+- return " 0xffffffff";
++ return str;
++
+ if (usbsts & STS_HALT)
+ ret += sprintf(str + ret, " HCHalted");
+ if (usbsts & STS_FATAL)
+diff --git a/drivers/usb/isp1760/isp1760-core.c b/drivers/usb/isp1760/isp1760-core.c
+index d1d9a7d5da175..af88f4fe00d27 100644
+--- a/drivers/usb/isp1760/isp1760-core.c
++++ b/drivers/usb/isp1760/isp1760-core.c
+@@ -251,6 +251,8 @@ static const struct reg_field isp1760_hc_reg_fields[] = {
+ [HW_DM_PULLDOWN] = REG_FIELD(ISP176x_HC_OTG_CTRL, 2, 2),
+ [HW_DP_PULLDOWN] = REG_FIELD(ISP176x_HC_OTG_CTRL, 1, 1),
+ [HW_DP_PULLUP] = REG_FIELD(ISP176x_HC_OTG_CTRL, 0, 0),
++ /* Make sure the array is sized properly during compilation */
++ [HC_FIELD_MAX] = {},
+ };
+
+ static const struct reg_field isp1763_hc_reg_fields[] = {
+@@ -321,6 +323,8 @@ static const struct reg_field isp1763_hc_reg_fields[] = {
+ [HW_DM_PULLDOWN_CLEAR] = REG_FIELD(ISP1763_HC_OTG_CTRL_CLEAR, 2, 2),
+ [HW_DP_PULLDOWN_CLEAR] = REG_FIELD(ISP1763_HC_OTG_CTRL_CLEAR, 1, 1),
+ [HW_DP_PULLUP_CLEAR] = REG_FIELD(ISP1763_HC_OTG_CTRL_CLEAR, 0, 0),
++ /* Make sure the array is sized properly during compilation */
++ [HC_FIELD_MAX] = {},
+ };
+
+ static const struct regmap_range isp1763_hc_volatile_ranges[] = {
+@@ -405,6 +409,8 @@ static const struct reg_field isp1761_dc_reg_fields[] = {
+ [DC_CHIP_ID_HIGH] = REG_FIELD(ISP176x_DC_CHIPID, 16, 31),
+ [DC_CHIP_ID_LOW] = REG_FIELD(ISP176x_DC_CHIPID, 0, 15),
+ [DC_SCRATCH] = REG_FIELD(ISP176x_DC_SCRATCH, 0, 15),
++ /* Make sure the array is sized properly during compilation */
++ [DC_FIELD_MAX] = {},
+ };
+
+ static const struct regmap_range isp1763_dc_volatile_ranges[] = {
+@@ -458,6 +464,8 @@ static const struct reg_field isp1763_dc_reg_fields[] = {
+ [DC_CHIP_ID_HIGH] = REG_FIELD(ISP1763_DC_CHIPID_HIGH, 0, 15),
+ [DC_CHIP_ID_LOW] = REG_FIELD(ISP1763_DC_CHIPID_LOW, 0, 15),
+ [DC_SCRATCH] = REG_FIELD(ISP1763_DC_SCRATCH, 0, 15),
++ /* Make sure the array is sized properly during compilation */
++ [DC_FIELD_MAX] = {},
+ };
+
+ static const struct regmap_config isp1763_dc_regmap_conf = {
+diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
+index e5a8fcdbb78e7..6c38c62d29b26 100644
+--- a/drivers/usb/misc/ftdi-elan.c
++++ b/drivers/usb/misc/ftdi-elan.c
+@@ -202,6 +202,7 @@ static void ftdi_elan_delete(struct kref *kref)
+ mutex_unlock(&ftdi_module_lock);
+ kfree(ftdi->bulk_in_buffer);
+ ftdi->bulk_in_buffer = NULL;
++ kfree(ftdi);
+ }
+
+ static void ftdi_elan_put_kref(struct usb_ftdi *ftdi)
+diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
+index e9437a176518a..ea39243efee39 100644
+--- a/drivers/usb/misc/idmouse.c
++++ b/drivers/usb/misc/idmouse.c
+@@ -177,10 +177,6 @@ static int idmouse_create_image(struct usb_idmouse *dev)
+ bytes_read += bulk_read;
+ }
+
+- /* reset the device */
+-reset:
+- ftip_command(dev, FTIP_RELEASE, 0, 0);
+-
+ /* check for valid image */
+ /* right border should be black (0x00) */
+ for (bytes_read = sizeof(HEADER)-1 + WIDTH-1; bytes_read < IMGSIZE; bytes_read += WIDTH)
+@@ -192,6 +188,10 @@ reset:
+ if (dev->bulk_in_buffer[bytes_read] != 0xFF)
+ return -EAGAIN;
+
++ /* reset the device */
++reset:
++ ftip_command(dev, FTIP_RELEASE, 0, 0);
++
+ /* should be IMGSIZE == 65040 */
+ dev_dbg(&dev->interface->dev, "read %d bytes fingerprint data\n",
+ bytes_read);
+diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
+index efbd317f2f252..b421f13260875 100644
+--- a/drivers/usb/misc/iowarrior.c
++++ b/drivers/usb/misc/iowarrior.c
+@@ -99,10 +99,6 @@ struct iowarrior {
+ /* globals */
+ /*--------------*/
+
+-/*
+- * USB spec identifies 5 second timeouts.
+- */
+-#define GET_TIMEOUT 5
+ #define USB_REQ_GET_REPORT 0x01
+ //#if 0
+ static int usb_get_report(struct usb_device *dev,
+@@ -114,7 +110,7 @@ static int usb_get_report(struct usb_device *dev,
+ USB_DIR_IN | USB_TYPE_CLASS |
+ USB_RECIP_INTERFACE, (type << 8) + id,
+ inter->desc.bInterfaceNumber, buf, size,
+- GET_TIMEOUT*HZ);
++ USB_CTRL_GET_TIMEOUT);
+ }
+ //#endif
+
+@@ -129,7 +125,7 @@ static int usb_set_report(struct usb_interface *intf, unsigned char type,
+ USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+ (type << 8) + id,
+ intf->cur_altsetting->desc.bInterfaceNumber, buf,
+- size, HZ);
++ size, 1000);
+ }
+
+ /*---------------------*/
+@@ -818,7 +814,7 @@ static int iowarrior_probe(struct usb_interface *interface,
+ break;
+
+ case USB_DEVICE_ID_CODEMERCS_IOW100:
+- dev->report_size = 13;
++ dev->report_size = 12;
+ break;
+ }
+ }
+diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
+index f08de33d9ff38..8ed803c4a251d 100644
+--- a/drivers/usb/misc/sisusbvga/sisusb.c
++++ b/drivers/usb/misc/sisusbvga/sisusb.c
+@@ -3014,6 +3014,20 @@ static int sisusb_probe(struct usb_interface *intf,
+ struct usb_device *dev = interface_to_usbdev(intf);
+ struct sisusb_usb_data *sisusb;
+ int retval = 0, i;
++ static const u8 ep_addresses[] = {
++ SISUSB_EP_GFX_IN | USB_DIR_IN,
++ SISUSB_EP_GFX_OUT | USB_DIR_OUT,
++ SISUSB_EP_GFX_BULK_OUT | USB_DIR_OUT,
++ SISUSB_EP_GFX_LBULK_OUT | USB_DIR_OUT,
++ SISUSB_EP_BRIDGE_IN | USB_DIR_IN,
++ SISUSB_EP_BRIDGE_OUT | USB_DIR_OUT,
++ 0};
++
++ /* Are the expected endpoints present? */
++ if (!usb_check_bulk_endpoints(intf, ep_addresses)) {
++ dev_err(&intf->dev, "Invalid USB2VGA device\n");
++ return -EINVAL;
++ }
+
+ dev_info(&dev->dev, "USB2VGA dongle found at address %d\n",
+ dev->devnum);
+diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
+index 748139d262633..0be8efcda15d5 100644
+--- a/drivers/usb/misc/uss720.c
++++ b/drivers/usb/misc/uss720.c
+@@ -71,6 +71,7 @@ static void destroy_priv(struct kref *kref)
+
+ dev_dbg(&priv->usbdev->dev, "destroying priv datastructure\n");
+ usb_put_dev(priv->usbdev);
++ priv->usbdev = NULL;
+ kfree(priv);
+ }
+
+@@ -736,7 +737,6 @@ static int uss720_probe(struct usb_interface *intf,
+ parport_announce_port(pp);
+
+ usb_set_intfdata(intf, pp);
+- usb_put_dev(usbdev);
+ return 0;
+
+ probe_abort:
+@@ -754,7 +754,6 @@ static void uss720_disconnect(struct usb_interface *intf)
+ usb_set_intfdata(intf, NULL);
+ if (pp) {
+ priv = pp->private_data;
+- priv->usbdev = NULL;
+ priv->pp = NULL;
+ dev_dbg(&intf->dev, "parport_remove_port\n");
+ parport_remove_port(pp);
+diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
+index f48a23adbc35d..094e812e9e692 100644
+--- a/drivers/usb/mon/mon_bin.c
++++ b/drivers/usb/mon/mon_bin.c
+@@ -1268,6 +1268,11 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma)
+ {
+ /* don't do anything here: "fault" will set up page table entries */
+ vma->vm_ops = &mon_bin_vm_ops;
++
++ if (vma->vm_flags & VM_WRITE)
++ return -EPERM;
++
++ vma->vm_flags &= ~VM_MAYWRITE;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_private_data = filp->private_data;
+ mon_bin_vma_open(vma);
+diff --git a/drivers/usb/mtu3/mtu3_core.c b/drivers/usb/mtu3/mtu3_core.c
+index c4a2c37abf628..3ea5145a842b1 100644
+--- a/drivers/usb/mtu3/mtu3_core.c
++++ b/drivers/usb/mtu3/mtu3_core.c
+@@ -971,8 +971,6 @@ int ssusb_gadget_init(struct ssusb_mtk *ssusb)
+ goto irq_err;
+ }
+
+- device_init_wakeup(dev, true);
+-
+ /* power down device IP for power saving by default */
+ mtu3_stop(mtu);
+
+diff --git a/drivers/usb/mtu3/mtu3_dr.c b/drivers/usb/mtu3/mtu3_dr.c
+index a6b04831b20bf..9b8aded3d95e9 100644
+--- a/drivers/usb/mtu3/mtu3_dr.c
++++ b/drivers/usb/mtu3/mtu3_dr.c
+@@ -21,10 +21,8 @@ static inline struct ssusb_mtk *otg_sx_to_ssusb(struct otg_switch_mtk *otg_sx)
+
+ static void toggle_opstate(struct ssusb_mtk *ssusb)
+ {
+- if (!ssusb->otg_switch.is_u3_drd) {
+- mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION);
+- mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN);
+- }
++ mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION);
++ mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN);
+ }
+
+ /* only port0 supports dual-role mode */
+diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c
+index a9a65b4bbfede..9977600616d7e 100644
+--- a/drivers/usb/mtu3/mtu3_gadget.c
++++ b/drivers/usb/mtu3/mtu3_gadget.c
+@@ -77,7 +77,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep)
+ if (usb_endpoint_xfer_int(desc) ||
+ usb_endpoint_xfer_isoc(desc)) {
+ interval = desc->bInterval;
+- interval = clamp_val(interval, 1, 16) - 1;
++ interval = clamp_val(interval, 1, 16);
+ if (usb_endpoint_xfer_isoc(desc) && comp_desc)
+ mult = comp_desc->bmAttributes;
+ }
+@@ -89,9 +89,16 @@ static int mtu3_ep_enable(struct mtu3_ep *mep)
+ if (usb_endpoint_xfer_isoc(desc) ||
+ usb_endpoint_xfer_int(desc)) {
+ interval = desc->bInterval;
+- interval = clamp_val(interval, 1, 16) - 1;
++ interval = clamp_val(interval, 1, 16);
+ mult = usb_endpoint_maxp_mult(desc) - 1;
+ }
++ break;
++ case USB_SPEED_FULL:
++ if (usb_endpoint_xfer_isoc(desc))
++ interval = clamp_val(desc->bInterval, 1, 16);
++ else if (usb_endpoint_xfer_int(desc))
++ interval = clamp_val(desc->bInterval, 1, 255);
++
+ break;
+ default:
+ break; /*others are ignored */
+@@ -235,6 +242,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
+ mreq->request.dma = DMA_ADDR_INVALID;
+ mreq->epnum = mep->epnum;
+ mreq->mep = mep;
++ INIT_LIST_HEAD(&mreq->list);
+ trace_mtu3_alloc_request(mreq);
+
+ return &mreq->request;
+diff --git a/drivers/usb/mtu3/mtu3_plat.c b/drivers/usb/mtu3/mtu3_plat.c
+index f13531022f4a3..4c4dcbf17518f 100644
+--- a/drivers/usb/mtu3/mtu3_plat.c
++++ b/drivers/usb/mtu3/mtu3_plat.c
+@@ -332,6 +332,8 @@ static int mtu3_probe(struct platform_device *pdev)
+ pm_runtime_enable(dev);
+ pm_runtime_get_sync(dev);
+
++ device_init_wakeup(dev, true);
++
+ ret = ssusb_rscs_init(ssusb);
+ if (ret)
+ goto comm_init_err;
+diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c
+index 3f414f91b5899..e65586147965d 100644
+--- a/drivers/usb/mtu3/mtu3_qmu.c
++++ b/drivers/usb/mtu3/mtu3_qmu.c
+@@ -210,6 +210,7 @@ static struct qmu_gpd *advance_enq_gpd(struct mtu3_gpd_ring *ring)
+ return ring->enqueue;
+ }
+
++/* @dequeue may be NULL if ring is unallocated or freed */
+ static struct qmu_gpd *advance_deq_gpd(struct mtu3_gpd_ring *ring)
+ {
+ if (ring->dequeue < ring->end)
+@@ -273,6 +274,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
+ gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP);
+ }
+
++ /* prevent reorder, make sure GPD's HWO is set last */
++ mb();
+ gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
+
+ mreq->gpd = gpd;
+@@ -306,6 +309,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
+ gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma));
+ ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma));
+ gpd->dw3_info = cpu_to_le32(ext_addr);
++ /* prevent reorder, make sure GPD's HWO is set last */
++ mb();
+ gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
+
+ mreq->gpd = gpd;
+@@ -445,7 +450,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum)
+ return;
+ }
+ mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY);
+-
++ /* prevent reorder, make sure GPD's HWO is set last */
++ mb();
+ /* by pass the current GDP */
+ gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO);
+
+@@ -479,7 +485,7 @@ static void qmu_done_tx(struct mtu3 *mtu, u8 epnum)
+ dev_dbg(mtu->dev, "%s EP%d, last=%p, current=%p, enq=%p\n",
+ __func__, epnum, gpd, gpd_current, ring->enqueue);
+
+- while (gpd != gpd_current && !GET_GPD_HWO(gpd)) {
++ while (gpd && gpd != gpd_current && !GET_GPD_HWO(gpd)) {
+
+ mreq = next_request(mep);
+
+@@ -518,7 +524,7 @@ static void qmu_done_rx(struct mtu3 *mtu, u8 epnum)
+ dev_dbg(mtu->dev, "%s EP%d, last=%p, current=%p, enq=%p\n",
+ __func__, epnum, gpd, gpd_current, ring->enqueue);
+
+- while (gpd != gpd_current && !GET_GPD_HWO(gpd)) {
++ while (gpd && gpd != gpd_current && !GET_GPD_HWO(gpd)) {
+
+ mreq = next_request(mep);
+
+diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
+index 8de143807c1ae..70693cae83efb 100644
+--- a/drivers/usb/musb/Kconfig
++++ b/drivers/usb/musb/Kconfig
+@@ -86,7 +86,7 @@ config USB_MUSB_TUSB6010
+ tristate "TUSB6010"
+ depends on HAS_IOMEM
+ depends on ARCH_OMAP2PLUS || COMPILE_TEST
+- depends on NOP_USB_XCEIV = USB_MUSB_HDRC # both built-in or both modules
++ depends on NOP_USB_XCEIV!=m || USB_MUSB_HDRC=m
+
+ config USB_MUSB_OMAP2PLUS
+ tristate "OMAP2430 and onwards"
+@@ -120,7 +120,7 @@ config USB_MUSB_MEDIATEK
+ tristate "MediaTek platforms"
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ depends on NOP_USB_XCEIV
+- depends on GENERIC_PHY
++ select GENERIC_PHY
+ select USB_ROLE_SWITCH
+
+ comment "MUSB DMA mode"
+diff --git a/drivers/usb/musb/mediatek.c b/drivers/usb/musb/mediatek.c
+index 6b92d037d8fc8..4f52b92c45974 100644
+--- a/drivers/usb/musb/mediatek.c
++++ b/drivers/usb/musb/mediatek.c
+@@ -346,7 +346,8 @@ static int mtk_musb_init(struct musb *musb)
+ err_phy_power_on:
+ phy_exit(glue->phy);
+ err_phy_init:
+- mtk_otg_switch_exit(glue);
++ if (musb->port_mode == MUSB_OTG)
++ mtk_otg_switch_exit(glue);
+ return ret;
+ }
+
+diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
+index 98c0f4c1bffd9..22c3df49ba8af 100644
+--- a/drivers/usb/musb/musb_gadget.c
++++ b/drivers/usb/musb/musb_gadget.c
+@@ -760,6 +760,9 @@ static void rxstate(struct musb *musb, struct musb_request *req)
+ musb_writew(epio, MUSB_RXCSR, csr);
+
+ buffer_aint_mapped:
++ fifo_count = min_t(unsigned int,
++ request->length - request->actual,
++ (unsigned int)fifo_count);
+ musb_read_fifo(musb_ep->hw_ep, fifo_count, (u8 *)
+ (request->buf + request->actual));
+ request->actual += fifo_count;
+@@ -1247,9 +1250,11 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
+ status = musb_queue_resume_work(musb,
+ musb_ep_restart_resume_work,
+ request);
+- if (status < 0)
++ if (status < 0) {
+ dev_err(musb->controller, "%s resume work: %i\n",
+ __func__, status);
++ list_del(&request->list);
++ }
+ }
+
+ unlock:
+@@ -1623,8 +1628,6 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
+ {
+ struct musb *musb = gadget_to_musb(gadget);
+
+- if (!musb->xceiv->set_power)
+- return -EOPNOTSUPP;
+ return usb_phy_set_power(musb->xceiv, mA);
+ }
+
+diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
+index f086960fe2b50..bd1de5c4c4340 100644
+--- a/drivers/usb/musb/omap2430.c
++++ b/drivers/usb/musb/omap2430.c
+@@ -363,6 +363,7 @@ static int omap2430_probe(struct platform_device *pdev)
+ control_node = of_parse_phandle(np, "ctrl-module", 0);
+ if (control_node) {
+ control_pdev = of_find_device_by_node(control_node);
++ of_node_put(control_node);
+ if (!control_pdev) {
+ dev_err(&pdev->dev, "Failed to get control device\n");
+ ret = -EINVAL;
+diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
+index c968ecda42aa8..7ed4cc348d993 100644
+--- a/drivers/usb/musb/tusb6010.c
++++ b/drivers/usb/musb/tusb6010.c
+@@ -1104,6 +1104,11 @@ static int tusb_musb_init(struct musb *musb)
+
+ /* dma address for async dma */
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!mem) {
++ pr_debug("no async dma resource?\n");
++ ret = -ENODEV;
++ goto done;
++ }
+ musb->async = mem->start;
+
+ /* dma address for sync dma */
+diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
+index 661a229c105dd..34b9f81401871 100644
+--- a/drivers/usb/phy/phy-generic.c
++++ b/drivers/usb/phy/phy-generic.c
+@@ -268,6 +268,13 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
+ return -EPROBE_DEFER;
+ }
+
++ nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus");
++ if (PTR_ERR(nop->vbus_draw) == -ENODEV)
++ nop->vbus_draw = NULL;
++ if (IS_ERR(nop->vbus_draw))
++ return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
++ "could not get vbus regulator\n");
++
+ nop->dev = dev;
+ nop->phy.dev = nop->dev;
+ nop->phy.label = "nop-xceiv";
+diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
+index 8a262c5a0408f..7a7eb8af60448 100644
+--- a/drivers/usb/phy/phy-mxs-usb.c
++++ b/drivers/usb/phy/phy-mxs-usb.c
+@@ -388,14 +388,8 @@ static void __mxs_phy_disconnect_line(struct mxs_phy *mxs_phy, bool disconnect)
+
+ static bool mxs_phy_is_otg_host(struct mxs_phy *mxs_phy)
+ {
+- void __iomem *base = mxs_phy->phy.io_priv;
+- u32 phyctrl = readl(base + HW_USBPHY_CTRL);
+-
+- if (IS_ENABLED(CONFIG_USB_OTG) &&
+- !(phyctrl & BM_USBPHY_CTRL_OTG_ID_VALUE))
+- return true;
+-
+- return false;
++ return IS_ENABLED(CONFIG_USB_OTG) &&
++ mxs_phy->phy.last_event == USB_EVENT_ID;
+ }
+
+ static void mxs_phy_disconnect_line(struct mxs_phy *mxs_phy, bool on)
+diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c
+index ee0863c6553ed..6e6ef8c0bc7ed 100644
+--- a/drivers/usb/phy/phy-omap-otg.c
++++ b/drivers/usb/phy/phy-omap-otg.c
+@@ -95,8 +95,8 @@ static int omap_otg_probe(struct platform_device *pdev)
+ return -ENODEV;
+
+ extcon = extcon_get_extcon_dev(config->extcon);
+- if (!extcon)
+- return -EPROBE_DEFER;
++ if (IS_ERR(extcon))
++ return PTR_ERR(extcon);
+
+ otg_dev = devm_kzalloc(&pdev->dev, sizeof(*otg_dev), GFP_KERNEL);
+ if (!otg_dev)
+diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
+index a3e043e3e4aae..d0672b6712985 100644
+--- a/drivers/usb/phy/phy-tahvo.c
++++ b/drivers/usb/phy/phy-tahvo.c
+@@ -395,7 +395,7 @@ static int tahvo_usb_probe(struct platform_device *pdev)
+
+ tu->irq = ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+- return ret;
++ goto err_remove_phy;
+ ret = request_threaded_irq(tu->irq, NULL, tahvo_usb_vbus_interrupt,
+ IRQF_ONESHOT,
+ "tahvo-vbus", tu);
+diff --git a/drivers/usb/renesas_usbhs/rza.c b/drivers/usb/renesas_usbhs/rza.c
+index 24de64edb674b..2d77edefb4b30 100644
+--- a/drivers/usb/renesas_usbhs/rza.c
++++ b/drivers/usb/renesas_usbhs/rza.c
+@@ -23,6 +23,10 @@ static int usbhs_rza1_hardware_init(struct platform_device *pdev)
+ extal_clk = of_find_node_by_name(NULL, "extal");
+ of_property_read_u32(usb_x1_clk, "clock-frequency", &freq_usb);
+ of_property_read_u32(extal_clk, "clock-frequency", &freq_extal);
++
++ of_node_put(usb_x1_clk);
++ of_node_put(extal_clk);
++
+ if (freq_usb == 0) {
+ if (freq_extal == 12000000) {
+ /* Select 12MHz XTAL */
+diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
+index dfaed7eee94fc..32e6d19f7011a 100644
+--- a/drivers/usb/roles/class.c
++++ b/drivers/usb/roles/class.c
+@@ -106,10 +106,13 @@ usb_role_switch_is_parent(struct fwnode_handle *fwnode)
+ struct fwnode_handle *parent = fwnode_get_parent(fwnode);
+ struct device *dev;
+
+- if (!parent || !fwnode_property_present(parent, "usb-role-switch"))
++ if (!fwnode_property_present(parent, "usb-role-switch")) {
++ fwnode_handle_put(parent);
+ return NULL;
++ }
+
+ dev = class_find_device_by_fwnode(role_class, parent);
++ fwnode_handle_put(parent);
+ return dev ? to_role_switch(dev) : ERR_PTR(-EPROBE_DEFER);
+ }
+
+diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
+index de5c012570603..ef8d1c73c7545 100644
+--- a/drivers/usb/serial/Kconfig
++++ b/drivers/usb/serial/Kconfig
+@@ -66,6 +66,7 @@ config USB_SERIAL_SIMPLE
+ - Libtransistor USB console
+ - a number of Motorola phones
+ - Motorola Tetra devices
++ - Nokia mobile phones
+ - Novatel Wireless GPS receivers
+ - Siemens USB/MPI adapter.
+ - ViVOtech ViVOpay USB device.
+diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
+index 2db917eab7995..752daa952abd6 100644
+--- a/drivers/usb/serial/ch341.c
++++ b/drivers/usb/serial/ch341.c
+@@ -81,10 +81,10 @@
+ #define CH341_QUIRK_SIMULATE_BREAK BIT(1)
+
+ static const struct usb_device_id id_table[] = {
+- { USB_DEVICE(0x1a86, 0x5512) },
+ { USB_DEVICE(0x1a86, 0x5523) },
+ { USB_DEVICE(0x1a86, 0x7522) },
+ { USB_DEVICE(0x1a86, 0x7523) },
++ { USB_DEVICE(0x2184, 0x0057) },
+ { USB_DEVICE(0x4348, 0x5523) },
+ { USB_DEVICE(0x9986, 0x7523) },
+ { },
+@@ -97,7 +97,10 @@ struct ch341_private {
+ u8 mcr;
+ u8 msr;
+ u8 lcr;
++
+ unsigned long quirks;
++ u8 version;
++
+ unsigned long break_end;
+ };
+
+@@ -256,8 +259,12 @@ static int ch341_set_baudrate_lcr(struct usb_device *dev,
+ /*
+ * CH341A buffers data until a full endpoint-size packet (32 bytes)
+ * has been received unless bit 7 is set.
++ *
++ * At least one device with version 0x27 appears to have this bit
++ * inverted.
+ */
+- val |= BIT(7);
++ if (priv->version > 0x27)
++ val |= BIT(7);
+
+ r = ch341_control_out(dev, CH341_REQ_WRITE_REG,
+ CH341_REG_DIVISOR << 8 | CH341_REG_PRESCALER,
+@@ -271,6 +278,9 @@ static int ch341_set_baudrate_lcr(struct usb_device *dev,
+ * (stop bits, parity and word length). Version 0x30 and above use
+ * CH341_REG_LCR only and CH341_REG_LCR2 is always set to zero.
+ */
++ if (priv->version < 0x30)
++ return 0;
++
+ r = ch341_control_out(dev, CH341_REQ_WRITE_REG,
+ CH341_REG_LCR2 << 8 | CH341_REG_LCR, lcr);
+ if (r)
+@@ -323,7 +333,9 @@ static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
+ r = ch341_control_in(dev, CH341_REQ_READ_VERSION, 0, 0, buffer, size);
+ if (r < 0)
+ goto out;
+- dev_dbg(&dev->dev, "Chip version: 0x%02x\n", buffer[0]);
++
++ priv->version = buffer[0];
++ dev_dbg(&dev->dev, "Chip version: 0x%02x\n", priv->version);
+
+ r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT, 0, 0);
+ if (r < 0)
+diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
+index 189279869a8b0..b3f128bd47189 100644
+--- a/drivers/usb/serial/cp210x.c
++++ b/drivers/usb/serial/cp210x.c
+@@ -51,6 +51,7 @@ static void cp210x_enable_event_mode(struct usb_serial_port *port);
+ static void cp210x_disable_event_mode(struct usb_serial_port *port);
+
+ static const struct usb_device_id id_table[] = {
++ { USB_DEVICE(0x0404, 0x034C) }, /* NCR Retail IO Box */
+ { USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */
+ { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
+ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
+@@ -59,6 +60,7 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */
+ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
+ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
++ { USB_DEVICE(0x0908, 0x0070) }, /* Siemens SCALANCE LPE-9000 USB Serial Console */
+ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */
+ { USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */
+ { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */
+@@ -68,6 +70,7 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
+ { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
+ { USB_DEVICE(0x0FDE, 0xCA05) }, /* OWL Wireless Electricity Monitor CM-160 */
++ { USB_DEVICE(0x106F, 0x0003) }, /* CPI / Money Controls Bulk Coin Recycler */
+ { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
+ { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
+ { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
+@@ -117,6 +120,7 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */
+ { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */
+ { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */
++ { USB_DEVICE(0x10C4, 0x82AA) }, /* Silicon Labs IFS-USB-DATACABLE used with Quint UPS */
+ { USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */
+ { USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */
+ { USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */
+@@ -128,6 +132,7 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(0x10C4, 0x83AA) }, /* Mark-10 Digital Force Gauge */
+ { USB_DEVICE(0x10C4, 0x83D8) }, /* DekTec DTA Plus VHF/UHF Booster/Attenuator */
+ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */
++ { USB_DEVICE(0x10C4, 0x8414) }, /* Decagon USB Cable Adapter */
+ { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */
+ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
+ { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
+@@ -192,6 +197,10 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
+ { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */
+ { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */
++ { USB_DEVICE(0x17A8, 0x0011) }, /* Kamstrup 444 MHz RF sniffer */
++ { USB_DEVICE(0x17A8, 0x0013) }, /* Kamstrup 870 MHz RF sniffer */
++ { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */
++ { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */
+ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
+ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
+ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+@@ -1682,6 +1691,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial)
+
+ /* 2 banks of GPIO - One for the pins taken from each serial port */
+ if (intf_num == 0) {
++ priv->gc.ngpio = 2;
++
+ if (mode.eci == CP210X_PIN_MODE_MODEM) {
+ /* mark all GPIOs of this interface as reserved */
+ priv->gpio_altfunc = 0xff;
+@@ -1692,8 +1703,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial)
+ priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) &
+ CP210X_ECI_GPIO_MODE_MASK) >>
+ CP210X_ECI_GPIO_MODE_OFFSET);
+- priv->gc.ngpio = 2;
+ } else if (intf_num == 1) {
++ priv->gc.ngpio = 3;
++
+ if (mode.sci == CP210X_PIN_MODE_MODEM) {
+ /* mark all GPIOs of this interface as reserved */
+ priv->gpio_altfunc = 0xff;
+@@ -1704,7 +1716,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial)
+ priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) &
+ CP210X_SCI_GPIO_MODE_MASK) >>
+ CP210X_SCI_GPIO_MODE_OFFSET);
+- priv->gc.ngpio = 3;
+ } else {
+ return -ENODEV;
+ }
+diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
+index a7a7af8d05bff..e04bdb3082657 100644
+--- a/drivers/usb/serial/f81232.c
++++ b/drivers/usb/serial/f81232.c
+@@ -130,9 +130,6 @@ static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ,
+
+ static int calc_baud_divisor(speed_t baudrate, speed_t clockrate)
+ {
+- if (!baudrate)
+- return 0;
+-
+ return DIV_ROUND_CLOSEST(clockrate, baudrate);
+ }
+
+@@ -519,9 +516,14 @@ static void f81232_set_baudrate(struct tty_struct *tty,
+ speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE };
+
+ for (i = 0; i < ARRAY_SIZE(baud_list); ++i) {
+- idx = f81232_find_clk(baud_list[i]);
++ baudrate = baud_list[i];
++ if (baudrate == 0) {
++ tty_encode_baud_rate(tty, 0, 0);
++ return;
++ }
++
++ idx = f81232_find_clk(baudrate);
+ if (idx >= 0) {
+- baudrate = baud_list[i];
+ tty_encode_baud_rate(tty, baudrate, baudrate);
+ break;
+ }
+diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c
+index c0bca52ef92aa..556d4e0dda873 100644
+--- a/drivers/usb/serial/f81534.c
++++ b/drivers/usb/serial/f81534.c
+@@ -536,9 +536,6 @@ static int f81534_submit_writer(struct usb_serial_port *port, gfp_t mem_flags)
+
+ static u32 f81534_calc_baud_divisor(u32 baudrate, u32 clockrate)
+ {
+- if (!baudrate)
+- return 0;
+-
+ /* Round to nearest divisor */
+ return DIV_ROUND_CLOSEST(clockrate, baudrate);
+ }
+@@ -568,9 +565,14 @@ static int f81534_set_port_config(struct usb_serial_port *port,
+ u32 baud_list[] = {baudrate, old_baudrate, F81534_DEFAULT_BAUD_RATE};
+
+ for (i = 0; i < ARRAY_SIZE(baud_list); ++i) {
+- idx = f81534_find_clk(baud_list[i]);
++ baudrate = baud_list[i];
++ if (baudrate == 0) {
++ tty_encode_baud_rate(tty, 0, 0);
++ return 0;
++ }
++
++ idx = f81534_find_clk(baudrate);
+ if (idx >= 0) {
+- baudrate = baud_list[i];
+ tty_encode_baud_rate(tty, baudrate, baudrate);
+ break;
+ }
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index 99d19828dae6d..49448cdbe9985 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -969,6 +969,7 @@ static const struct usb_device_id id_table_combined[] = {
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) },
++ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) },
+@@ -977,12 +978,14 @@ static const struct usb_device_id id_table_combined[] = {
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) },
++ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) },
++ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) },
+ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) },
+@@ -1020,6 +1023,9 @@ static const struct usb_device_id id_table_combined[] = {
+ { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) },
+ { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) },
+ { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) },
++ /* Belimo Automation devices */
++ { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) },
++ { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) },
+ /* ICP DAS I-756xU devices */
+ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
+ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
+@@ -1039,6 +1045,8 @@ static const struct usb_device_id id_table_combined[] = {
+ /* IDS GmbH devices */
+ { USB_DEVICE(IDS_VID, IDS_SI31A_PID) },
+ { USB_DEVICE(IDS_VID, IDS_CM31A_PID) },
++ /* Omron devices */
++ { USB_DEVICE(OMRON_VID, OMRON_CS1W_CIF31_PID) },
+ /* U-Blox devices */
+ { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) },
+ { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) },
+@@ -1311,8 +1319,7 @@ static u32 get_ftdi_divisor(struct tty_struct *tty,
+ case 38400: div_value = ftdi_sio_b38400; break;
+ case 57600: div_value = ftdi_sio_b57600; break;
+ case 115200: div_value = ftdi_sio_b115200; break;
+- } /* baud */
+- if (div_value == 0) {
++ default:
+ dev_dbg(dev, "%s - Baudrate (%d) requested is not supported\n",
+ __func__, baud);
+ div_value = ftdi_sio_b9600;
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index 755858ca20bac..31c8ccabbbb78 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -661,6 +661,12 @@
+ #define INFINEON_TRIBOARD_TC1798_PID 0x0028 /* DAS JTAG TriBoard TC1798 V1.0 */
+ #define INFINEON_TRIBOARD_TC2X7_PID 0x0043 /* DAS JTAG TriBoard TC2X7 V1.0 */
+
++/*
++ * Omron corporation (https://www.omron.com)
++ */
++ #define OMRON_VID 0x0590
++ #define OMRON_CS1W_CIF31_PID 0x00b2
++
+ /*
+ * Acton Research Corp.
+ */
+@@ -1506,6 +1512,9 @@
+ #define BRAINBOXES_VX_023_PID 0x1003 /* VX-023 ExpressCard 1 Port RS422/485 */
+ #define BRAINBOXES_VX_034_PID 0x1004 /* VX-034 ExpressCard 2 Port RS422/485 */
+ #define BRAINBOXES_US_101_PID 0x1011 /* US-101 1xRS232 */
++#define BRAINBOXES_US_159_PID 0x1021 /* US-159 1xRS232 */
++#define BRAINBOXES_US_235_PID 0x1017 /* US-235 1xRS232 */
++#define BRAINBOXES_US_320_PID 0x1019 /* US-320 1xRS422/485 */
+ #define BRAINBOXES_US_324_PID 0x1013 /* US-324 1xRS422/485 1Mbaud */
+ #define BRAINBOXES_US_606_1_PID 0x2001 /* US-606 6 Port RS232 Serial Port 1 and 2 */
+ #define BRAINBOXES_US_606_2_PID 0x2002 /* US-606 6 Port RS232 Serial Port 3 and 4 */
+@@ -1565,6 +1574,12 @@
+ #define CHETCO_SEASMART_LITE_PID 0xA5AE /* SeaSmart Lite USB Adapter */
+ #define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */
+
++/*
++ * Belimo Automation
++ */
++#define BELIMO_ZTH_PID 0x8050
++#define BELIMO_ZIP_PID 0xC811
++
+ /*
+ * Unjo AB
+ */
+diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
+index a7b3c15957ba9..feba2a8d1233a 100644
+--- a/drivers/usb/serial/io_ti.c
++++ b/drivers/usb/serial/io_ti.c
+@@ -166,6 +166,7 @@ static const struct usb_device_id edgeport_2port_id_table[] = {
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
++ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
+ { }
+ };
+
+@@ -204,6 +205,7 @@ static const struct usb_device_id id_table_combined[] = {
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
++ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
+ { }
+ };
+
+diff --git a/drivers/usb/serial/io_usbvend.h b/drivers/usb/serial/io_usbvend.h
+index 52cbc353051fe..9a6f742ad3abd 100644
+--- a/drivers/usb/serial/io_usbvend.h
++++ b/drivers/usb/serial/io_usbvend.h
+@@ -212,6 +212,7 @@
+ //
+ // Definitions for other product IDs
+ #define ION_DEVICE_ID_MT4X56USB 0x1403 // OEM device
++#define ION_DEVICE_ID_E5805A 0x1A01 // OEM device (rebranded Edgeport/4)
+
+
+ #define GENERATION_ID_FROM_USB_PRODUCT_ID(ProductId) \
+diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
+index 87b89c99d5177..1cfcd805f2868 100644
+--- a/drivers/usb/serial/keyspan.c
++++ b/drivers/usb/serial/keyspan.c
+@@ -2890,22 +2890,22 @@ static int keyspan_port_probe(struct usb_serial_port *port)
+ for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) {
+ p_priv->in_buffer[i] = kzalloc(IN_BUFLEN, GFP_KERNEL);
+ if (!p_priv->in_buffer[i])
+- goto err_in_buffer;
++ goto err_free_in_buffer;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) {
+ p_priv->out_buffer[i] = kzalloc(OUT_BUFLEN, GFP_KERNEL);
+ if (!p_priv->out_buffer[i])
+- goto err_out_buffer;
++ goto err_free_out_buffer;
+ }
+
+ p_priv->inack_buffer = kzalloc(INACK_BUFLEN, GFP_KERNEL);
+ if (!p_priv->inack_buffer)
+- goto err_inack_buffer;
++ goto err_free_out_buffer;
+
+ p_priv->outcont_buffer = kzalloc(OUTCONT_BUFLEN, GFP_KERNEL);
+ if (!p_priv->outcont_buffer)
+- goto err_outcont_buffer;
++ goto err_free_inack_buffer;
+
+ p_priv->device_details = d_details;
+
+@@ -2951,15 +2951,14 @@ static int keyspan_port_probe(struct usb_serial_port *port)
+
+ return 0;
+
+-err_outcont_buffer:
++err_free_inack_buffer:
+ kfree(p_priv->inack_buffer);
+-err_inack_buffer:
++err_free_out_buffer:
+ for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i)
+ kfree(p_priv->out_buffer[i]);
+-err_out_buffer:
++err_free_in_buffer:
+ for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i)
+ kfree(p_priv->in_buffer[i]);
+-err_in_buffer:
+ kfree(p_priv);
+
+ return -ENOMEM;
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index a484ff5e4ebf8..f13930b4534c1 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -162,6 +162,8 @@ static void option_instat_callback(struct urb *urb);
+ #define NOVATELWIRELESS_PRODUCT_G2 0xA010
+ #define NOVATELWIRELESS_PRODUCT_MC551 0xB001
+
++#define UBLOX_VENDOR_ID 0x1546
++
+ /* AMOI PRODUCTS */
+ #define AMOI_VENDOR_ID 0x1614
+ #define AMOI_PRODUCT_H01 0x0800
+@@ -198,6 +200,8 @@ static void option_instat_callback(struct urb *urb);
+
+ #define DELL_PRODUCT_5821E 0x81d7
+ #define DELL_PRODUCT_5821E_ESIM 0x81e0
++#define DELL_PRODUCT_5829E_ESIM 0x81e4
++#define DELL_PRODUCT_5829E 0x81e6
+
+ #define KYOCERA_VENDOR_ID 0x0c88
+ #define KYOCERA_PRODUCT_KPC650 0x17da
+@@ -238,22 +242,39 @@ static void option_instat_callback(struct urb *urb);
+ #define QUECTEL_PRODUCT_UC15 0x9090
+ /* These u-blox products use Qualcomm's vendor ID */
+ #define UBLOX_PRODUCT_R410M 0x90b2
+-#define UBLOX_PRODUCT_R6XX 0x90fa
+ /* These Yuga products use Qualcomm's vendor ID */
+ #define YUGA_PRODUCT_CLM920_NC5 0x9625
+
+ #define QUECTEL_VENDOR_ID 0x2c7c
+ /* These Quectel products use Quectel's vendor ID */
+ #define QUECTEL_PRODUCT_EC21 0x0121
++#define QUECTEL_PRODUCT_EM061K_LTA 0x0123
++#define QUECTEL_PRODUCT_EM061K_LMS 0x0124
+ #define QUECTEL_PRODUCT_EC25 0x0125
++#define QUECTEL_PRODUCT_EM060K_128 0x0128
+ #define QUECTEL_PRODUCT_EG91 0x0191
+ #define QUECTEL_PRODUCT_EG95 0x0195
+ #define QUECTEL_PRODUCT_BG96 0x0296
+ #define QUECTEL_PRODUCT_EP06 0x0306
++#define QUECTEL_PRODUCT_EM05G 0x030a
++#define QUECTEL_PRODUCT_EM060K 0x030b
++#define QUECTEL_PRODUCT_EM05G_CS 0x030c
++#define QUECTEL_PRODUCT_EM05GV2 0x030e
++#define QUECTEL_PRODUCT_EM05CN_SG 0x0310
++#define QUECTEL_PRODUCT_EM05G_SG 0x0311
++#define QUECTEL_PRODUCT_EM05CN 0x0312
++#define QUECTEL_PRODUCT_EM05G_GR 0x0313
++#define QUECTEL_PRODUCT_EM05G_RS 0x0314
+ #define QUECTEL_PRODUCT_EM12 0x0512
+ #define QUECTEL_PRODUCT_RM500Q 0x0800
++#define QUECTEL_PRODUCT_RM520N 0x0801
++#define QUECTEL_PRODUCT_EC200U 0x0901
+ #define QUECTEL_PRODUCT_EC200S_CN 0x6002
++#define QUECTEL_PRODUCT_EC200A 0x6005
++#define QUECTEL_PRODUCT_EM061K_LWW 0x6008
++#define QUECTEL_PRODUCT_EM061K_LCN 0x6009
+ #define QUECTEL_PRODUCT_EC200T 0x6026
++#define QUECTEL_PRODUCT_RM500K 0x7001
+
+ #define CMOTECH_VENDOR_ID 0x16d8
+ #define CMOTECH_PRODUCT_6001 0x6001
+@@ -388,6 +409,8 @@ static void option_instat_callback(struct urb *urb);
+ #define LONGCHEER_VENDOR_ID 0x1c9e
+
+ /* 4G Systems products */
++/* This one was sold as the VW and Skoda "Carstick LTE" */
++#define FOUR_G_SYSTEMS_PRODUCT_CARSTICK_LTE 0x7605
+ /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick *
+ * It seems to contain a Qualcomm QSC6240/6290 chipset */
+ #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603
+@@ -430,6 +453,12 @@ static void option_instat_callback(struct urb *urb);
+ #define CINTERION_PRODUCT_CLS8 0x00b0
+ #define CINTERION_PRODUCT_MV31_MBIM 0x00b3
+ #define CINTERION_PRODUCT_MV31_RMNET 0x00b7
++#define CINTERION_PRODUCT_MV31_2_MBIM 0x00b8
++#define CINTERION_PRODUCT_MV31_2_RMNET 0x00b9
++#define CINTERION_PRODUCT_MV32_WA 0x00f1
++#define CINTERION_PRODUCT_MV32_WB 0x00f2
++#define CINTERION_PRODUCT_MV32_WA_RMNET 0x00f3
++#define CINTERION_PRODUCT_MV32_WB_RMNET 0x00f4
+
+ /* Olivetti products */
+ #define OLIVETTI_VENDOR_ID 0x0b3c
+@@ -565,6 +594,18 @@ static void option_instat_callback(struct urb *urb);
+ #define WETELECOM_PRODUCT_6802 0x6802
+ #define WETELECOM_PRODUCT_WMD300 0x6803
+
++/* OPPO products */
++#define OPPO_VENDOR_ID 0x22d9
++#define OPPO_PRODUCT_R11 0x276c
++
++/* Sierra Wireless products */
++#define SIERRA_VENDOR_ID 0x1199
++#define SIERRA_PRODUCT_EM9191 0x90d3
++
++/* UNISOC (Spreadtrum) products */
++#define UNISOC_VENDOR_ID 0x1782
++/* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */
++#define TOZED_PRODUCT_LT70C 0x4055
+
+ /* Device flags */
+
+@@ -1063,6 +1104,10 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
+ { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM),
+ .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
++ { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E),
++ .driver_info = RSVD(0) | RSVD(6) },
++ { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM),
++ .driver_info = RSVD(0) | RSVD(6) },
+ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */
+ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
+ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },
+@@ -1104,8 +1149,20 @@ static const struct usb_device_id option_ids[] = {
+ /* u-blox products using Qualcomm vendor ID */
+ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M),
+ .driver_info = RSVD(1) | RSVD(3) },
+- { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX),
++ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x908b), /* u-blox LARA-R6 00B */
++ .driver_info = RSVD(4) },
++ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa),
+ .driver_info = RSVD(3) },
++ /* u-blox products */
++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1311) }, /* u-blox LARA-R6 01B */
++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1312), /* u-blox LARA-R6 01B (RMNET) */
++ .driver_info = RSVD(4) },
++ { USB_DEVICE_INTERFACE_CLASS(UBLOX_VENDOR_ID, 0x1313, 0xff) }, /* u-blox LARA-R6 01B (ECM) */
++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1341) }, /* u-blox LARA-L6 */
++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1342), /* u-blox LARA-L6 (RMNET) */
++ .driver_info = RSVD(4) },
++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1343), /* u-blox LARA-L6 (ECM) */
++ .driver_info = RSVD(4) },
+ /* Quectel products using Quectel vendor ID */
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff),
+ .driver_info = NUMEP2 },
+@@ -1119,22 +1176,68 @@ static const struct usb_device_id option_ids[] = {
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff),
+ .driver_info = NUMEP2 },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0203, 0xff), /* BG95-M3 */
++ .driver_info = ZLP },
+ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
+ .driver_info = RSVD(4) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
+ .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN, 0xff),
++ .driver_info = RSVD(6) | ZLP },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff),
++ .driver_info = RSVD(6) | ZLP },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff),
++ .driver_info = RSVD(6) | ZLP },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff),
++ .driver_info = RSVD(6) | ZLP },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05GV2, 0xff),
++ .driver_info = RSVD(4) | ZLP },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff),
++ .driver_info = RSVD(6) | ZLP },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_RS, 0xff),
++ .driver_info = RSVD(6) | ZLP },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff),
++ .driver_info = RSVD(6) | ZLP },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0x00, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0x00, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0x00, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff),
+ .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) },
++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0700, 0xff), /* BG95 */
++ .driver_info = RSVD(3) | ZLP },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10),
+ .driver_info = ZLP },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0900, 0xff, 0, 0), /* RM500U-CN */
++ .driver_info = ZLP },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200A, 0xff, 0, 0) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) },
+
+ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
+ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
+@@ -1211,6 +1314,10 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */
+ .driver_info = NCTRL(2) | RSVD(3) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1057, 0xff), /* Telit FN980 */
++ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1058, 0xff), /* Telit FN980 (PCIe) */
++ .driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff), /* Telit LN920 (rmnet) */
+ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff), /* Telit LN920 (MBIM) */
+@@ -1219,6 +1326,24 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = NCTRL(2) | RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */
+ .driver_info = NCTRL(0) | RSVD(1) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */
++ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */
++ .driver_info = NCTRL(0) | RSVD(1) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */
++ .driver_info = NCTRL(2) | RSVD(3) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */
++ .driver_info = NCTRL(0) | RSVD(1) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */
++ .driver_info = RSVD(0) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */
++ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990 (MBIM) */
++ .driver_info = NCTRL(0) | RSVD(1) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990 (RNDIS) */
++ .driver_info = NCTRL(2) | RSVD(3) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */
++ .driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
+ .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
+@@ -1253,6 +1378,7 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */
+ .driver_info = NCTRL(2) | RSVD(3) },
++ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */
+ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260),
+ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+ { USB_DEVICE(TELIT_VENDOR_ID, 0x1261),
+@@ -1265,8 +1391,16 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = NCTRL(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */
+ .driver_info = NCTRL(2) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701a, 0xff), /* Telit LE910R1 (RNDIS) */
++ .driver_info = NCTRL(2) },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */
++ .driver_info = NCTRL(2) },
+ { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */
+ .driver_info = NCTRL(0) | ZLP },
++ { USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */
++ .driver_info = NCTRL(0) | ZLP },
++ { USB_DEVICE(TELIT_VENDOR_ID, 0x9201), /* Telit LE910R1 flashing device */
++ .driver_info = NCTRL(0) | ZLP },
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
+ .driver_info = RSVD(1) },
+@@ -1639,6 +1773,8 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = RSVD(2) },
+ { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */
++ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1485, 0xff, 0xff, 0xff), /* ZTE MF286D */
++ .driver_info = RSVD(5) },
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) },
+@@ -1886,6 +2022,8 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = RSVD(2) },
+ { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) },
+ { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) },
++ { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_CARSTICK_LTE),
++ .driver_info = RSVD(0) },
+ { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
+ .driver_info = NCTRL(0) | NCTRL(1) },
+ { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100),
+@@ -1945,6 +2083,18 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = RSVD(3)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff),
+ .driver_info = RSVD(0)},
++ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_MBIM, 0xff),
++ .driver_info = RSVD(3)},
++ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_RMNET, 0xff),
++ .driver_info = RSVD(0)},
++ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff),
++ .driver_info = RSVD(3)},
++ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA_RMNET, 0xff),
++ .driver_info = RSVD(0) },
++ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff),
++ .driver_info = RSVD(3)},
++ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB_RMNET, 0xff),
++ .driver_info = RSVD(0) },
+ { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100),
+ .driver_info = RSVD(4) },
+ { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120),
+@@ -2085,19 +2235,35 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
+ { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0db, 0xff), /* Foxconn T99W265 MBIM */
+ .driver_info = RSVD(3) },
++ { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0ee, 0xff), /* Foxconn T99W368 MBIM */
++ .driver_info = RSVD(3) },
++ { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0f0, 0xff), /* Foxconn T99W373 MBIM */
++ .driver_info = RSVD(3) },
+ { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */
+ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
++ { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */
++ { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */
+ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */
+ .driver_info = RSVD(4) | RSVD(5) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */
+ .driver_info = RSVD(6) },
++ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) }, /* Fibocom MA510 (ECM mode w/ diag intf.) */
++ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */
++ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
++ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */
++ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */
++ .driver_info = RSVD(4) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */
+ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */
+ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */
+ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */
++ { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) },
++ { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) },
++ { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) },
+ { } /* Terminating entry */
+ };
+ MODULE_DEVICE_TABLE(usb, option_ids);
+diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
+index f45ca7ddf78ea..40b1ab3d284dc 100644
+--- a/drivers/usb/serial/pl2303.c
++++ b/drivers/usb/serial/pl2303.c
+@@ -106,6 +106,7 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) },
++ { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) },
+ { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) },
+@@ -116,6 +117,7 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) },
+ { USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) },
+ { USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) },
++ { USB_DEVICE(IBM_VENDOR_ID, IBM_PRODUCT_ID) },
+ { } /* Terminating entry */
+ };
+
+@@ -419,6 +421,9 @@ static int pl2303_detect_type(struct usb_serial *serial)
+ bcdUSB = le16_to_cpu(desc->bcdUSB);
+
+ switch (bcdUSB) {
++ case 0x101:
++ /* USB 1.0.1? Let's assume they meant 1.1... */
++ fallthrough;
+ case 0x110:
+ switch (bcdDevice) {
+ case 0x300:
+@@ -431,20 +436,27 @@ static int pl2303_detect_type(struct usb_serial *serial)
+ break;
+ case 0x200:
+ switch (bcdDevice) {
+- case 0x100:
++ case 0x100: /* GC */
++ case 0x105:
++ return TYPE_HXN;
++ case 0x300: /* GT / TA */
++ if (pl2303_supports_hx_status(serial))
++ return TYPE_TA;
++ fallthrough;
+ case 0x305:
++ case 0x400: /* GL */
+ case 0x405:
+- /*
+- * Assume it's an HXN-type if the device doesn't
+- * support the old read request value.
+- */
+- if (!pl2303_supports_hx_status(serial))
+- return TYPE_HXN;
+- break;
+- case 0x300:
+- return TYPE_TA;
+- case 0x500:
+- return TYPE_TB;
++ return TYPE_HXN;
++ case 0x500: /* GE / TB */
++ if (pl2303_supports_hx_status(serial))
++ return TYPE_TB;
++ fallthrough;
++ case 0x505:
++ case 0x600: /* GS */
++ case 0x605:
++ case 0x700: /* GR */
++ case 0x705:
++ return TYPE_HXN;
+ }
+ break;
+ }
+diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
+index 6097ee8fccb25..732f9b13ad5d5 100644
+--- a/drivers/usb/serial/pl2303.h
++++ b/drivers/usb/serial/pl2303.h
+@@ -35,6 +35,9 @@
+ #define ATEN_PRODUCT_UC232B 0x2022
+ #define ATEN_PRODUCT_ID2 0x2118
+
++#define IBM_VENDOR_ID 0x04b3
++#define IBM_PRODUCT_ID 0x4016
++
+ #define IODATA_VENDOR_ID 0x04bb
+ #define IODATA_PRODUCT_ID 0x0a03
+ #define IODATA_PRODUCT_ID_RSAQ5 0x0a0e
+@@ -132,6 +135,7 @@
+ #define HP_TD620_PRODUCT_ID 0x0956
+ #define HP_LD960_PRODUCT_ID 0x0b39
+ #define HP_LD381_PRODUCT_ID 0x0f7f
++#define HP_LM930_PRODUCT_ID 0x0f9b
+ #define HP_LCM220_PRODUCT_ID 0x3139
+ #define HP_LCM960_PRODUCT_ID 0x3239
+ #define HP_LD220_PRODUCT_ID 0x3524
+diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
+index c18bf8164bc2e..b1e844bf31f81 100644
+--- a/drivers/usb/serial/qcserial.c
++++ b/drivers/usb/serial/qcserial.c
+@@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = {
+ {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */
+ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */
+ {DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */
++ {DEVICE_SWI(0x1199, 0xc080)}, /* Sierra Wireless EM7590 QDL */
++ {DEVICE_SWI(0x1199, 0xc081)}, /* Sierra Wireless EM7590 */
+ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
+ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
+ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
+@@ -175,6 +177,7 @@ static const struct usb_device_id id_table[] = {
+ {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+ {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */
+ {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */
++ {DEVICE_SWI(0x413c, 0x81c2)}, /* Dell Wireless 5811e */
+ {DEVICE_SWI(0x413c, 0x81cb)}, /* Dell Wireless 5816e QDL */
+ {DEVICE_SWI(0x413c, 0x81cc)}, /* Dell Wireless 5816e */
+ {DEVICE_SWI(0x413c, 0x81cf)}, /* Dell Wireless 5819 */
+diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
+index 9d56138133a97..ef6a2891f290c 100644
+--- a/drivers/usb/serial/sierra.c
++++ b/drivers/usb/serial/sierra.c
+@@ -737,7 +737,8 @@ static void sierra_close(struct usb_serial_port *port)
+
+ /*
+ * Need to take susp_lock to make sure port is not already being
+- * resumed, but no need to hold it due to initialized
++ * resumed, but no need to hold it due to the tty-port initialized
++ * flag.
+ */
+ spin_lock_irq(&intfdata->susp_lock);
+ if (--intfdata->open_ports == 0)
+diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
+index bd23a7cb1be2b..24b8772a345e2 100644
+--- a/drivers/usb/serial/usb-serial-simple.c
++++ b/drivers/usb/serial/usb-serial-simple.c
+@@ -38,16 +38,6 @@ static struct usb_serial_driver vendor##_device = { \
+ { USB_DEVICE(0x0a21, 0x8001) } /* MMT-7305WW */
+ DEVICE(carelink, CARELINK_IDS);
+
+-/* ZIO Motherboard USB driver */
+-#define ZIO_IDS() \
+- { USB_DEVICE(0x1CBE, 0x0103) }
+-DEVICE(zio, ZIO_IDS);
+-
+-/* Funsoft Serial USB driver */
+-#define FUNSOFT_IDS() \
+- { USB_DEVICE(0x1404, 0xcddc) }
+-DEVICE(funsoft, FUNSOFT_IDS);
+-
+ /* Infineon Flashloader driver */
+ #define FLASHLOADER_IDS() \
+ { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
+@@ -55,6 +45,11 @@ DEVICE(funsoft, FUNSOFT_IDS);
+ { USB_DEVICE(0x8087, 0x0801) }
+ DEVICE(flashloader, FLASHLOADER_IDS);
+
++/* Funsoft Serial USB driver */
++#define FUNSOFT_IDS() \
++ { USB_DEVICE(0x1404, 0xcddc) }
++DEVICE(funsoft, FUNSOFT_IDS);
++
+ /* Google Serial USB SubClass */
+ #define GOOGLE_IDS() \
+ { USB_VENDOR_AND_INTERFACE_INFO(0x18d1, \
+@@ -63,16 +58,21 @@ DEVICE(flashloader, FLASHLOADER_IDS);
+ 0x01) }
+ DEVICE(google, GOOGLE_IDS);
+
++/* HP4x (48/49) Generic Serial driver */
++#define HP4X_IDS() \
++ { USB_DEVICE(0x03f0, 0x0121) }
++DEVICE(hp4x, HP4X_IDS);
++
++/* KAUFMANN RKS+CAN VCP */
++#define KAUFMANN_IDS() \
++ { USB_DEVICE(0x16d0, 0x0870) }
++DEVICE(kaufmann, KAUFMANN_IDS);
++
+ /* Libtransistor USB console */
+ #define LIBTRANSISTOR_IDS() \
+ { USB_DEVICE(0x1209, 0x8b00) }
+ DEVICE(libtransistor, LIBTRANSISTOR_IDS);
+
+-/* ViVOpay USB Serial Driver */
+-#define VIVOPAY_IDS() \
+- { USB_DEVICE(0x1d5f, 0x1004) } /* ViVOpay 8800 */
+-DEVICE(vivopay, VIVOPAY_IDS);
+-
+ /* Motorola USB Phone driver */
+ #define MOTO_IDS() \
+ { USB_DEVICE(0x05c6, 0x3197) }, /* unknown Motorola phone */ \
+@@ -91,15 +91,20 @@ DEVICE(moto_modem, MOTO_IDS);
+ { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */
+ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS);
+
++/* Nokia mobile phone driver */
++#define NOKIA_IDS() \
++ { USB_DEVICE(0x0421, 0x069a) } /* Nokia 130 (RM-1035) */
++DEVICE(nokia, NOKIA_IDS);
++
+ /* Novatel Wireless GPS driver */
+ #define NOVATEL_IDS() \
+ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */
+ DEVICE_N(novatel_gps, NOVATEL_IDS, 3);
+
+-/* HP4x (48/49) Generic Serial driver */
+-#define HP4X_IDS() \
+- { USB_DEVICE(0x03f0, 0x0121) }
+-DEVICE(hp4x, HP4X_IDS);
++/* Siemens USB/MPI adapter */
++#define SIEMENS_IDS() \
++ { USB_DEVICE(0x908, 0x0004) }
++DEVICE(siemens_mpi, SIEMENS_IDS);
+
+ /* Suunto ANT+ USB Driver */
+ #define SUUNTO_IDS() \
+@@ -107,43 +112,52 @@ DEVICE(hp4x, HP4X_IDS);
+ { USB_DEVICE(0x0fcf, 0x1009) } /* Dynastream ANT USB-m Stick */
+ DEVICE(suunto, SUUNTO_IDS);
+
+-/* Siemens USB/MPI adapter */
+-#define SIEMENS_IDS() \
+- { USB_DEVICE(0x908, 0x0004) }
+-DEVICE(siemens_mpi, SIEMENS_IDS);
++/* ViVOpay USB Serial Driver */
++#define VIVOPAY_IDS() \
++ { USB_DEVICE(0x1d5f, 0x1004) } /* ViVOpay 8800 */
++DEVICE(vivopay, VIVOPAY_IDS);
++
++/* ZIO Motherboard USB driver */
++#define ZIO_IDS() \
++ { USB_DEVICE(0x1CBE, 0x0103) }
++DEVICE(zio, ZIO_IDS);
+
+ /* All of the above structures mushed into two lists */
+ static struct usb_serial_driver * const serial_drivers[] = {
+ &carelink_device,
+- &zio_device,
+- &funsoft_device,
+ &flashloader_device,
++ &funsoft_device,
+ &google_device,
++ &hp4x_device,
++ &kaufmann_device,
+ &libtransistor_device,
+- &vivopay_device,
+ &moto_modem_device,
+ &motorola_tetra_device,
++ &nokia_device,
+ &novatel_gps_device,
+- &hp4x_device,
+- &suunto_device,
+ &siemens_mpi_device,
++ &suunto_device,
++ &vivopay_device,
++ &zio_device,
+ NULL
+ };
+
+ static const struct usb_device_id id_table[] = {
+ CARELINK_IDS(),
+- ZIO_IDS(),
+- FUNSOFT_IDS(),
+ FLASHLOADER_IDS(),
++ FUNSOFT_IDS(),
+ GOOGLE_IDS(),
++ HP4X_IDS(),
++ KAUFMANN_IDS(),
+ LIBTRANSISTOR_IDS(),
+- VIVOPAY_IDS(),
+ MOTO_IDS(),
+ MOTOROLA_TETRA_IDS(),
++ NOKIA_IDS(),
+ NOVATEL_IDS(),
+- HP4X_IDS(),
+- SUUNTO_IDS(),
+ SIEMENS_IDS(),
++ SUUNTO_IDS(),
++ VIVOPAY_IDS(),
++ ZIO_IDS(),
+ { },
+ };
+ MODULE_DEVICE_TABLE(usb, id_table);
+diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
+index 090a78c948f28..255fb9583c0a2 100644
+--- a/drivers/usb/serial/usb-serial.c
++++ b/drivers/usb/serial/usb-serial.c
+@@ -292,7 +292,7 @@ static int serial_open(struct tty_struct *tty, struct file *filp)
+ *
+ * Shut down a USB serial port. Serialized against activate by the
+ * tport mutex and kept to matching open/close pairs
+- * of calls by the initialized flag.
++ * of calls by the tty-port initialized flag.
+ *
+ * Not called if tty is console.
+ */
+diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
+index cb01283d4d159..f21f25a8cf6fe 100644
+--- a/drivers/usb/serial/usb_wwan.c
++++ b/drivers/usb/serial/usb_wwan.c
+@@ -389,7 +389,8 @@ void usb_wwan_close(struct usb_serial_port *port)
+
+ /*
+ * Need to take susp_lock to make sure port is not already being
+- * resumed, but no need to hold it due to initialized
++ * resumed, but no need to hold it due to the tty-port initialized
++ * flag.
+ */
+ spin_lock_irq(&intfdata->susp_lock);
+ if (--intfdata->open_ports == 0)
+diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
+index da65d14c9ed5e..06aad0d727ddc 100644
+--- a/drivers/usb/serial/whiteheat.c
++++ b/drivers/usb/serial/whiteheat.c
+@@ -584,9 +584,8 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command,
+ switch (command) {
+ case WHITEHEAT_GET_DTR_RTS:
+ info = usb_get_serial_port_data(port);
+- memcpy(&info->mcr, command_info->result_buffer,
+- sizeof(struct whiteheat_dr_info));
+- break;
++ info->mcr = command_info->result_buffer[0];
++ break;
+ }
+ }
+ exit:
+diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c
+index 20b857e97e60c..dcc4778d1ae99 100644
+--- a/drivers/usb/storage/alauda.c
++++ b/drivers/usb/storage/alauda.c
+@@ -318,7 +318,8 @@ static int alauda_get_media_status(struct us_data *us, unsigned char *data)
+ rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe,
+ command, 0xc0, 0, 1, data, 2);
+
+- usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
++ if (rc == USB_STOR_XFER_GOOD)
++ usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
+
+ return rc;
+ }
+@@ -438,6 +439,8 @@ static int alauda_init_media(struct us_data *us)
+ + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift);
+ MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO);
+ MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO);
++ if (MEDIA_INFO(us).pba_to_lba == NULL || MEDIA_INFO(us).lba_to_pba == NULL)
++ return USB_STOR_TRANSPORT_ERROR;
+
+ if (alauda_reset_media(us) != USB_STOR_XFER_GOOD)
+ return USB_STOR_TRANSPORT_ERROR;
+@@ -452,9 +455,14 @@ static int alauda_init_media(struct us_data *us)
+ static int alauda_check_media(struct us_data *us)
+ {
+ struct alauda_info *info = (struct alauda_info *) us->extra;
+- unsigned char status[2];
++ unsigned char *status = us->iobuf;
++ int rc;
+
+- alauda_get_media_status(us, status);
++ rc = alauda_get_media_status(us, status);
++ if (rc != USB_STOR_XFER_GOOD) {
++ status[0] = 0xF0; /* Pretend there's no media */
++ status[1] = 0;
++ }
+
+ /* Check for no media or door open */
+ if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10)
+diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c
+index 5f7d678502be4..97c66c0d91f4d 100644
+--- a/drivers/usb/storage/ene_ub6250.c
++++ b/drivers/usb/storage/ene_ub6250.c
+@@ -237,36 +237,33 @@ static struct us_unusual_dev ene_ub6250_unusual_dev_list[] = {
+ #define memstick_logaddr(logadr1, logadr0) ((((u16)(logadr1)) << 8) | (logadr0))
+
+
+-struct SD_STATUS {
+- u8 Insert:1;
+- u8 Ready:1;
+- u8 MediaChange:1;
+- u8 IsMMC:1;
+- u8 HiCapacity:1;
+- u8 HiSpeed:1;
+- u8 WtP:1;
+- u8 Reserved:1;
+-};
+-
+-struct MS_STATUS {
+- u8 Insert:1;
+- u8 Ready:1;
+- u8 MediaChange:1;
+- u8 IsMSPro:1;
+- u8 IsMSPHG:1;
+- u8 Reserved1:1;
+- u8 WtP:1;
+- u8 Reserved2:1;
+-};
+-
+-struct SM_STATUS {
+- u8 Insert:1;
+- u8 Ready:1;
+- u8 MediaChange:1;
+- u8 Reserved:3;
+- u8 WtP:1;
+- u8 IsMS:1;
+-};
++/* SD_STATUS bits */
++#define SD_Insert BIT(0)
++#define SD_Ready BIT(1)
++#define SD_MediaChange BIT(2)
++#define SD_IsMMC BIT(3)
++#define SD_HiCapacity BIT(4)
++#define SD_HiSpeed BIT(5)
++#define SD_WtP BIT(6)
++ /* Bit 7 reserved */
++
++/* MS_STATUS bits */
++#define MS_Insert BIT(0)
++#define MS_Ready BIT(1)
++#define MS_MediaChange BIT(2)
++#define MS_IsMSPro BIT(3)
++#define MS_IsMSPHG BIT(4)
++ /* Bit 5 reserved */
++#define MS_WtP BIT(6)
++ /* Bit 7 reserved */
++
++/* SM_STATUS bits */
++#define SM_Insert BIT(0)
++#define SM_Ready BIT(1)
++#define SM_MediaChange BIT(2)
++ /* Bits 3-5 reserved */
++#define SM_WtP BIT(6)
++#define SM_IsMS BIT(7)
+
+ struct ms_bootblock_cis {
+ u8 bCistplDEVICE[6]; /* 0 */
+@@ -437,9 +434,9 @@ struct ene_ub6250_info {
+ u8 *bbuf;
+
+ /* for 6250 code */
+- struct SD_STATUS SD_Status;
+- struct MS_STATUS MS_Status;
+- struct SM_STATUS SM_Status;
++ u8 SD_Status;
++ u8 MS_Status;
++ u8 SM_Status;
+
+ /* ----- SD Control Data ---------------- */
+ /*SD_REGISTER SD_Regs; */
+@@ -602,7 +599,7 @@ static int sd_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb)
+ {
+ struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+
+- if (info->SD_Status.Insert && info->SD_Status.Ready)
++ if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready))
+ return USB_STOR_TRANSPORT_GOOD;
+ else {
+ ene_sd_init(us);
+@@ -622,7 +619,7 @@ static int sd_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb)
+ 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00,
+ 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 };
+
+- if (info->SD_Status.WtP)
++ if (info->SD_Status & SD_WtP)
+ usb_stor_set_xfer_buf(mediaWP, 12, srb);
+ else
+ usb_stor_set_xfer_buf(mediaNoWP, 12, srb);
+@@ -641,9 +638,9 @@ static int sd_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb)
+ struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+
+ usb_stor_dbg(us, "sd_scsi_read_capacity\n");
+- if (info->SD_Status.HiCapacity) {
++ if (info->SD_Status & SD_HiCapacity) {
+ bl_len = 0x200;
+- if (info->SD_Status.IsMMC)
++ if (info->SD_Status & SD_IsMMC)
+ bl_num = info->HC_C_SIZE-1;
+ else
+ bl_num = (info->HC_C_SIZE + 1) * 1024 - 1;
+@@ -693,7 +690,7 @@ static int sd_scsi_read(struct us_data *us, struct scsi_cmnd *srb)
+ return USB_STOR_TRANSPORT_ERROR;
+ }
+
+- if (info->SD_Status.HiCapacity)
++ if (info->SD_Status & SD_HiCapacity)
+ bnByte = bn;
+
+ /* set up the command wrapper */
+@@ -733,7 +730,7 @@ static int sd_scsi_write(struct us_data *us, struct scsi_cmnd *srb)
+ return USB_STOR_TRANSPORT_ERROR;
+ }
+
+- if (info->SD_Status.HiCapacity)
++ if (info->SD_Status & SD_HiCapacity)
+ bnByte = bn;
+
+ /* set up the command wrapper */
+@@ -942,7 +939,7 @@ static int ms_lib_process_bootblock(struct us_data *us, u16 PhyBlock, u8 *PageDa
+ struct ms_lib_type_extdat ExtraData;
+ struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+
+- PageBuffer = kmalloc(MS_BYTES_PER_PAGE, GFP_KERNEL);
++ PageBuffer = kzalloc(MS_BYTES_PER_PAGE * 2, GFP_KERNEL);
+ if (PageBuffer == NULL)
+ return (u32)-1;
+
+@@ -1456,7 +1453,7 @@ static int ms_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb)
+ struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra);
+
+ /* pr_info("MS_SCSI_Test_Unit_Ready\n"); */
+- if (info->MS_Status.Insert && info->MS_Status.Ready) {
++ if ((info->MS_Status & MS_Insert) && (info->MS_Status & MS_Ready)) {
+ return USB_STOR_TRANSPORT_GOOD;
+ } else {
+ ene_ms_init(us);
+@@ -1476,7 +1473,7 @@ static int ms_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb)
+ 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00,
+ 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 };
+
+- if (info->MS_Status.WtP)
++ if (info->MS_Status & MS_WtP)
+ usb_stor_set_xfer_buf(mediaWP, 12, srb);
+ else
+ usb_stor_set_xfer_buf(mediaNoWP, 12, srb);
+@@ -1495,7 +1492,7 @@ static int ms_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb)
+
+ usb_stor_dbg(us, "ms_scsi_read_capacity\n");
+ bl_len = 0x200;
+- if (info->MS_Status.IsMSPro)
++ if (info->MS_Status & MS_IsMSPro)
+ bl_num = info->MSP_TotalBlock - 1;
+ else
+ bl_num = info->MS_Lib.NumberOfLogBlock * info->MS_Lib.blockSize * 2 - 1;
+@@ -1650,7 +1647,7 @@ static int ms_scsi_read(struct us_data *us, struct scsi_cmnd *srb)
+ if (bn > info->bl_num)
+ return USB_STOR_TRANSPORT_ERROR;
+
+- if (info->MS_Status.IsMSPro) {
++ if (info->MS_Status & MS_IsMSPro) {
+ result = ene_load_bincode(us, MSP_RW_PATTERN);
+ if (result != USB_STOR_XFER_GOOD) {
+ usb_stor_dbg(us, "Load MPS RW pattern Fail !!\n");
+@@ -1751,7 +1748,7 @@ static int ms_scsi_write(struct us_data *us, struct scsi_cmnd *srb)
+ if (bn > info->bl_num)
+ return USB_STOR_TRANSPORT_ERROR;
+
+- if (info->MS_Status.IsMSPro) {
++ if (info->MS_Status & MS_IsMSPro) {
+ result = ene_load_bincode(us, MSP_RW_PATTERN);
+ if (result != USB_STOR_XFER_GOOD) {
+ pr_info("Load MSP RW pattern Fail !!\n");
+@@ -1859,12 +1856,12 @@ static int ene_get_card_status(struct us_data *us, u8 *buf)
+
+ tmpreg = (u16) reg4b;
+ reg4b = *(u32 *)(&buf[0x14]);
+- if (info->SD_Status.HiCapacity && !info->SD_Status.IsMMC)
++ if ((info->SD_Status & SD_HiCapacity) && !(info->SD_Status & SD_IsMMC))
+ info->HC_C_SIZE = (reg4b >> 8) & 0x3fffff;
+
+ info->SD_C_SIZE = ((tmpreg & 0x03) << 10) | (u16)(reg4b >> 22);
+ info->SD_C_SIZE_MULT = (u8)(reg4b >> 7) & 0x07;
+- if (info->SD_Status.HiCapacity && info->SD_Status.IsMMC)
++ if ((info->SD_Status & SD_HiCapacity) && (info->SD_Status & SD_IsMMC))
+ info->HC_C_SIZE = *(u32 *)(&buf[0x100]);
+
+ if (info->SD_READ_BL_LEN > SD_BLOCK_LEN) {
+@@ -2076,6 +2073,7 @@ static int ene_ms_init(struct us_data *us)
+ u16 MSP_BlockSize, MSP_UserAreaBlocks;
+ struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra;
+ u8 *bbuf = info->bbuf;
++ unsigned int s;
+
+ printk(KERN_INFO "transport --- ENE_MSInit\n");
+
+@@ -2100,15 +2098,16 @@ static int ene_ms_init(struct us_data *us)
+ return USB_STOR_TRANSPORT_ERROR;
+ }
+ /* the same part to test ENE */
+- info->MS_Status = *(struct MS_STATUS *) bbuf;
+-
+- if (info->MS_Status.Insert && info->MS_Status.Ready) {
+- printk(KERN_INFO "Insert = %x\n", info->MS_Status.Insert);
+- printk(KERN_INFO "Ready = %x\n", info->MS_Status.Ready);
+- printk(KERN_INFO "IsMSPro = %x\n", info->MS_Status.IsMSPro);
+- printk(KERN_INFO "IsMSPHG = %x\n", info->MS_Status.IsMSPHG);
+- printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP);
+- if (info->MS_Status.IsMSPro) {
++ info->MS_Status = bbuf[0];
++
++ s = info->MS_Status;
++ if ((s & MS_Insert) && (s & MS_Ready)) {
++ printk(KERN_INFO "Insert = %x\n", !!(s & MS_Insert));
++ printk(KERN_INFO "Ready = %x\n", !!(s & MS_Ready));
++ printk(KERN_INFO "IsMSPro = %x\n", !!(s & MS_IsMSPro));
++ printk(KERN_INFO "IsMSPHG = %x\n", !!(s & MS_IsMSPHG));
++ printk(KERN_INFO "WtP= %x\n", !!(s & MS_WtP));
++ if (s & MS_IsMSPro) {
+ MSP_BlockSize = (bbuf[6] << 8) | bbuf[7];
+ MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11];
+ info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks;
+@@ -2169,17 +2168,17 @@ static int ene_sd_init(struct us_data *us)
+ return USB_STOR_TRANSPORT_ERROR;
+ }
+
+- info->SD_Status = *(struct SD_STATUS *) bbuf;
+- if (info->SD_Status.Insert && info->SD_Status.Ready) {
+- struct SD_STATUS *s = &info->SD_Status;
++ info->SD_Status = bbuf[0];
++ if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) {
++ unsigned int s = info->SD_Status;
+
+ ene_get_card_status(us, bbuf);
+- usb_stor_dbg(us, "Insert = %x\n", s->Insert);
+- usb_stor_dbg(us, "Ready = %x\n", s->Ready);
+- usb_stor_dbg(us, "IsMMC = %x\n", s->IsMMC);
+- usb_stor_dbg(us, "HiCapacity = %x\n", s->HiCapacity);
+- usb_stor_dbg(us, "HiSpeed = %x\n", s->HiSpeed);
+- usb_stor_dbg(us, "WtP = %x\n", s->WtP);
++ usb_stor_dbg(us, "Insert = %x\n", !!(s & SD_Insert));
++ usb_stor_dbg(us, "Ready = %x\n", !!(s & SD_Ready));
++ usb_stor_dbg(us, "IsMMC = %x\n", !!(s & SD_IsMMC));
++ usb_stor_dbg(us, "HiCapacity = %x\n", !!(s & SD_HiCapacity));
++ usb_stor_dbg(us, "HiSpeed = %x\n", !!(s & SD_HiSpeed));
++ usb_stor_dbg(us, "WtP = %x\n", !!(s & SD_WtP));
+ } else {
+ usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]);
+ return USB_STOR_TRANSPORT_ERROR;
+@@ -2201,14 +2200,14 @@ static int ene_init(struct us_data *us)
+
+ misc_reg03 = bbuf[0];
+ if (misc_reg03 & 0x01) {
+- if (!info->SD_Status.Ready) {
++ if (!(info->SD_Status & SD_Ready)) {
+ result = ene_sd_init(us);
+ if (result != USB_STOR_XFER_GOOD)
+ return USB_STOR_TRANSPORT_ERROR;
+ }
+ }
+ if (misc_reg03 & 0x02) {
+- if (!info->MS_Status.Ready) {
++ if (!(info->MS_Status & MS_Ready)) {
+ result = ene_ms_init(us);
+ if (result != USB_STOR_XFER_GOOD)
+ return USB_STOR_TRANSPORT_ERROR;
+@@ -2307,14 +2306,14 @@ static int ene_transport(struct scsi_cmnd *srb, struct us_data *us)
+
+ /*US_DEBUG(usb_stor_show_command(us, srb)); */
+ scsi_set_resid(srb, 0);
+- if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready)))
++ if (unlikely(!(info->SD_Status & SD_Ready) || (info->MS_Status & MS_Ready)))
+ result = ene_init(us);
+ if (result == USB_STOR_XFER_GOOD) {
+ result = USB_STOR_TRANSPORT_ERROR;
+- if (info->SD_Status.Ready)
++ if (info->SD_Status & SD_Ready)
+ result = sd_scsi_irp(us, srb);
+
+- if (info->MS_Status.Ready)
++ if (info->MS_Status & MS_Ready)
+ result = ms_scsi_irp(us, srb);
+ }
+ return result;
+@@ -2378,7 +2377,6 @@ static int ene_ub6250_probe(struct usb_interface *intf,
+
+ static int ene_ub6250_resume(struct usb_interface *iface)
+ {
+- u8 tmp = 0;
+ struct us_data *us = usb_get_intfdata(iface);
+ struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra);
+
+@@ -2390,17 +2388,16 @@ static int ene_ub6250_resume(struct usb_interface *iface)
+ mutex_unlock(&us->dev_mutex);
+
+ info->Power_IsResum = true;
+- /*info->SD_Status.Ready = 0; */
+- info->SD_Status = *(struct SD_STATUS *)&tmp;
+- info->MS_Status = *(struct MS_STATUS *)&tmp;
+- info->SM_Status = *(struct SM_STATUS *)&tmp;
++ /* info->SD_Status &= ~SD_Ready; */
++ info->SD_Status = 0;
++ info->MS_Status = 0;
++ info->SM_Status = 0;
+
+ return 0;
+ }
+
+ static int ene_ub6250_reset_resume(struct usb_interface *iface)
+ {
+- u8 tmp = 0;
+ struct us_data *us = usb_get_intfdata(iface);
+ struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra);
+
+@@ -2412,10 +2409,10 @@ static int ene_ub6250_reset_resume(struct usb_interface *iface)
+ * the device
+ */
+ info->Power_IsResum = true;
+- /*info->SD_Status.Ready = 0; */
+- info->SD_Status = *(struct SD_STATUS *)&tmp;
+- info->MS_Status = *(struct MS_STATUS *)&tmp;
+- info->SM_Status = *(struct SM_STATUS *)&tmp;
++ /* info->SD_Status &= ~SD_Ready; */
++ info->SD_Status = 0;
++ info->MS_Status = 0;
++ info->SM_Status = 0;
+
+ return 0;
+ }
+diff --git a/drivers/usb/storage/karma.c b/drivers/usb/storage/karma.c
+index 05cec81dcd3f2..38ddfedef629c 100644
+--- a/drivers/usb/storage/karma.c
++++ b/drivers/usb/storage/karma.c
+@@ -174,24 +174,25 @@ static void rio_karma_destructor(void *extra)
+
+ static int rio_karma_init(struct us_data *us)
+ {
+- int ret = 0;
+ struct karma_data *data = kzalloc(sizeof(struct karma_data), GFP_NOIO);
+
+ if (!data)
+- goto out;
++ return -ENOMEM;
+
+ data->recv = kmalloc(RIO_RECV_LEN, GFP_NOIO);
+ if (!data->recv) {
+ kfree(data);
+- goto out;
++ return -ENOMEM;
+ }
+
+ us->extra = data;
+ us->extra_destructor = rio_karma_destructor;
+- ret = rio_karma_send_command(RIO_ENTER_STORAGE, us);
+- data->in_storage = (ret == 0);
+-out:
+- return ret;
++ if (rio_karma_send_command(RIO_ENTER_STORAGE, us))
++ return -EIO;
++
++ data->in_storage = 1;
++
++ return 0;
+ }
+
+ static struct scsi_host_template karma_host_template;
+diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c
+index 3789698d9d3c6..0c423916d7bfa 100644
+--- a/drivers/usb/storage/realtek_cr.c
++++ b/drivers/usb/storage/realtek_cr.c
+@@ -365,7 +365,7 @@ static int rts51x_read_mem(struct us_data *us, u16 addr, u8 *data, u16 len)
+
+ buf = kmalloc(len, GFP_NOIO);
+ if (buf == NULL)
+- return USB_STOR_TRANSPORT_ERROR;
++ return -ENOMEM;
+
+ usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len);
+
+diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
+index e5a971b83e3f5..b8e1109f0e0d4 100644
+--- a/drivers/usb/storage/scsiglue.c
++++ b/drivers/usb/storage/scsiglue.c
+@@ -407,22 +407,25 @@ static DEF_SCSI_QCMD(queuecommand)
+ ***********************************************************************/
+
+ /* Command timeout and abort */
+-static int command_abort(struct scsi_cmnd *srb)
++static int command_abort_matching(struct us_data *us, struct scsi_cmnd *srb_match)
+ {
+- struct us_data *us = host_to_us(srb->device->host);
+-
+- usb_stor_dbg(us, "%s called\n", __func__);
+-
+ /*
+ * us->srb together with the TIMED_OUT, RESETTING, and ABORTING
+ * bits are protected by the host lock.
+ */
+ scsi_lock(us_to_host(us));
+
+- /* Is this command still active? */
+- if (us->srb != srb) {
++ /* is there any active pending command to abort ? */
++ if (!us->srb) {
+ scsi_unlock(us_to_host(us));
+ usb_stor_dbg(us, "-- nothing to abort\n");
++ return SUCCESS;
++ }
++
++ /* Does the command match the passed srb if any ? */
++ if (srb_match && us->srb != srb_match) {
++ scsi_unlock(us_to_host(us));
++ usb_stor_dbg(us, "-- pending command mismatch\n");
+ return FAILED;
+ }
+
+@@ -445,6 +448,14 @@ static int command_abort(struct scsi_cmnd *srb)
+ return SUCCESS;
+ }
+
++static int command_abort(struct scsi_cmnd *srb)
++{
++ struct us_data *us = host_to_us(srb->device->host);
++
++ usb_stor_dbg(us, "%s called\n", __func__);
++ return command_abort_matching(us, srb);
++}
++
+ /*
+ * This invokes the transport reset mechanism to reset the state of the
+ * device
+@@ -456,6 +467,9 @@ static int device_reset(struct scsi_cmnd *srb)
+
+ usb_stor_dbg(us, "%s called\n", __func__);
+
++ /* abort any pending command before reset */
++ command_abort_matching(us, NULL);
++
+ /* lock the device pointers and do the reset */
+ mutex_lock(&(us->dev_mutex));
+ result = us->transport_reset(us);
+diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h
+index 3f720faa6f97c..d73282c0ec501 100644
+--- a/drivers/usb/storage/uas-detect.h
++++ b/drivers/usb/storage/uas-detect.h
+@@ -116,6 +116,19 @@ static int uas_use_uas_driver(struct usb_interface *intf,
+ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2)
+ flags |= US_FL_NO_ATA_1X;
+
++ /*
++ * RTL9210-based enclosure from HIKSEMI, MD202 reportedly have issues
++ * with UAS. This isn't distinguishable with just idVendor and
++ * idProduct, use manufacturer and product too.
++ *
++ * Reported-by: Hongling Zeng <zenghongling@kylinos.cn>
++ */
++ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bda &&
++ le16_to_cpu(udev->descriptor.idProduct) == 0x9210 &&
++ (udev->manufacturer && !strcmp(udev->manufacturer, "HIKSEMI")) &&
++ (udev->product && !strcmp(udev->product, "MD202")))
++ flags |= US_FL_IGNORE_UAS;
++
+ usb_stor_adjust_quirks(udev, &flags);
+
+ if (flags & US_FL_IGNORE_UAS) {
+diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
+index c6b3fcf901805..20dcbccb290b3 100644
+--- a/drivers/usb/storage/unusual_devs.h
++++ b/drivers/usb/storage/unusual_devs.h
+@@ -406,6 +406,16 @@ UNUSUAL_DEV( 0x04b8, 0x0602, 0x0110, 0x0110,
+ "785EPX Storage",
+ USB_SC_SCSI, USB_PR_BULK, NULL, US_FL_SINGLE_LUN),
+
++/*
++ * Reported by James Buren <braewoods+lkml@braewoods.net>
++ * Virtual ISOs cannot be remounted if ejected while the device is locked
++ * Disable locking to mimic Windows behavior that bypasses the issue
++ */
++UNUSUAL_DEV( 0x04c5, 0x2028, 0x0001, 0x0001,
++ "iODD",
++ "2531/2541",
++ USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE),
++
+ /*
+ * Not sure who reported this originally but
+ * Pavel Machek <pavel@ucw.cz> reported that the extra US_FL_SINGLE_LUN
+@@ -1265,12 +1275,6 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999,
+ USB_SC_RBC, USB_PR_BULK, NULL,
+ 0 ),
+
+-UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100,
+- "Samsung",
+- "Flash Drive FIT",
+- USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+- US_FL_MAX_SECTORS_64),
+-
+ /* aeb */
+ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff,
+ "Feiya",
+@@ -2284,6 +2288,13 @@ UNUSUAL_DEV( 0x1e74, 0x4621, 0x0000, 0x0000,
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_BULK_IGNORE_TAG | US_FL_MAX_SECTORS_64 ),
+
++/* Reported by Witold Lipieta <witold.lipieta@thaumatec.com> */
++UNUSUAL_DEV( 0x1fc9, 0x0117, 0x0100, 0x0100,
++ "NXP Semiconductors",
++ "PN7462AU",
++ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
++ US_FL_IGNORE_RESIDUE ),
++
+ /* Supplied with some Castlewood ORB removable drives */
+ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999,
+ "Double-H Technology",
+@@ -2291,6 +2302,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999,
+ USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
+ US_FL_SCM_MULT_TARG ),
+
++/*
++ * Reported by DocMAX <mail@vacharakis.de>
++ * and Thomas Weißschuh <linux@weissschuh.net>
++ */
++UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999,
++ "VIA Labs, Inc.",
++ "VL817 SATA Bridge",
++ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
++ US_FL_IGNORE_UAS),
++
+ UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001,
+ "ST",
+ "2A",
+diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
+index 4051c8cd0cd8a..1f8c9b16a0fb8 100644
+--- a/drivers/usb/storage/unusual_uas.h
++++ b/drivers/usb/storage/unusual_uas.h
+@@ -52,6 +52,13 @@ UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999,
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME),
+
++/* Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */
++UNUSUAL_DEV(0x090c, 0x2000, 0x0000, 0x9999,
++ "Hiksemi",
++ "External HDD",
++ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
++ US_FL_IGNORE_UAS),
++
+ /*
+ * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI
+ * commands in UAS mode. Observed with the 1.28 firmware; are there others?
+@@ -62,6 +69,13 @@ UNUSUAL_DEV(0x0984, 0x0301, 0x0128, 0x0128,
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
++/* Reported-by: Tom Hu <huxiaoying@kylinos.cn> */
++UNUSUAL_DEV(0x0b05, 0x1932, 0x0000, 0x9999,
++ "ASUS",
++ "External HDD",
++ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
++ US_FL_IGNORE_UAS),
++
+ /* Reported-by: David Webb <djw@noc.ac.uk> */
+ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999,
+ "Seagate",
+@@ -97,6 +111,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_BROKEN_FUA),
+
++/* Reported by: Yaroslav Furman <yaro330@gmail.com> */
++UNUSUAL_DEV(0x152d, 0x0583, 0x0000, 0x9999,
++ "JMicron",
++ "JMS583Gen 2",
++ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
++ US_FL_NO_REPORT_OPCODES),
++
+ /* Reported-by: Thinh Nguyen <thinhn@synopsys.com> */
+ UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999,
+ "PNY",
+@@ -111,6 +132,13 @@ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999,
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_NO_ATA_1X),
+
++/* Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */
++UNUSUAL_DEV(0x17ef, 0x3899, 0x0000, 0x9999,
++ "Thinkplus",
++ "External HDD",
++ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
++ US_FL_IGNORE_UAS),
++
+ /* Reported-by: Hans de Goede <hdegoede@redhat.com> */
+ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
+ "VIA",
+diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
+index a0418f23b4aae..ab480f38523aa 100644
+--- a/drivers/usb/typec/Kconfig
++++ b/drivers/usb/typec/Kconfig
+@@ -65,9 +65,9 @@ config TYPEC_HD3SS3220
+
+ config TYPEC_STUSB160X
+ tristate "STMicroelectronics STUSB160x Type-C controller driver"
+- depends on I2C
+- depends on REGMAP_I2C
+ depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH
++ depends on I2C
++ select REGMAP_I2C
+ help
+ Say Y or M here if your system has STMicroelectronics STUSB160x
+ Type-C port controller.
+diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
+index b7f094435b00a..8f0c6da27dd1b 100644
+--- a/drivers/usb/typec/altmodes/displayport.c
++++ b/drivers/usb/typec/altmodes/displayport.c
+@@ -88,8 +88,8 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con)
+ case DP_STATUS_CON_UFP_D:
+ case DP_STATUS_CON_BOTH: /* NOTE: First acting as DP source */
+ conf |= DP_CONF_UFP_U_AS_UFP_D;
+- pin_assign = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo) &
+- DP_CAP_UFP_D_PIN_ASSIGN(dp->port->vdo);
++ pin_assign = DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo) &
++ DP_CAP_PIN_ASSIGN_DFP_D(dp->port->vdo);
+ break;
+ default:
+ break;
+@@ -101,8 +101,12 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con)
+ if (dp->data.status & DP_STATUS_PREFER_MULTI_FUNC &&
+ pin_assign & DP_PIN_ASSIGN_MULTI_FUNC_MASK)
+ pin_assign &= DP_PIN_ASSIGN_MULTI_FUNC_MASK;
+- else if (pin_assign & DP_PIN_ASSIGN_DP_ONLY_MASK)
++ else if (pin_assign & DP_PIN_ASSIGN_DP_ONLY_MASK) {
+ pin_assign &= DP_PIN_ASSIGN_DP_ONLY_MASK;
++ /* Default to pin assign C if available */
++ if (pin_assign & BIT(DP_PIN_ASSIGN_C))
++ pin_assign = BIT(DP_PIN_ASSIGN_C);
++ }
+
+ if (!pin_assign)
+ return -EINVAL;
+@@ -418,6 +422,18 @@ static const char * const pin_assignments[] = {
+ [DP_PIN_ASSIGN_F] = "F",
+ };
+
++/*
++ * Helper function to extract a peripheral's currently supported
++ * Pin Assignments from its DisplayPort alternate mode state.
++ */
++static u8 get_current_pin_assignments(struct dp_altmode *dp)
++{
++ if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
++ return DP_CAP_PIN_ASSIGN_DFP_D(dp->alt->vdo);
++ else
++ return DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo);
++}
++
+ static ssize_t
+ pin_assignment_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+@@ -444,10 +460,7 @@ pin_assignment_store(struct device *dev, struct device_attribute *attr,
+ goto out_unlock;
+ }
+
+- if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
+- assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
+- else
+- assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
++ assignments = get_current_pin_assignments(dp);
+
+ if (!(DP_CONF_GET_PIN_ASSIGN(conf) & assignments)) {
+ ret = -EINVAL;
+@@ -484,10 +497,7 @@ static ssize_t pin_assignment_show(struct device *dev,
+
+ cur = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf));
+
+- if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
+- assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
+- else
+- assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
++ assignments = get_current_pin_assignments(dp);
+
+ for (i = 0; assignments; assignments >>= 1, i++) {
+ if (assignments & 1) {
+@@ -502,6 +512,10 @@ static ssize_t pin_assignment_show(struct device *dev,
+
+ mutex_unlock(&dp->lock);
+
++ /* get_current_pin_assignments can return 0 when no matching pin assignments are found */
++ if (len == 0)
++ len++;
++
+ buf[len - 1] = '\n';
+ return len;
+ }
+@@ -527,10 +541,10 @@ int dp_altmode_probe(struct typec_altmode *alt)
+ /* FIXME: Port can only be DFP_U. */
+
+ /* Make sure we have compatiple pin configurations */
+- if (!(DP_CAP_DFP_D_PIN_ASSIGN(port->vdo) &
+- DP_CAP_UFP_D_PIN_ASSIGN(alt->vdo)) &&
+- !(DP_CAP_UFP_D_PIN_ASSIGN(port->vdo) &
+- DP_CAP_DFP_D_PIN_ASSIGN(alt->vdo)))
++ if (!(DP_CAP_PIN_ASSIGN_DFP_D(port->vdo) &
++ DP_CAP_PIN_ASSIGN_UFP_D(alt->vdo)) &&
++ !(DP_CAP_PIN_ASSIGN_UFP_D(port->vdo) &
++ DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo)))
+ return -ENODEV;
+
+ ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
+diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c
+index 78e0e78954f2d..407d2a84633a8 100644
+--- a/drivers/usb/typec/bus.c
++++ b/drivers/usb/typec/bus.c
+@@ -134,7 +134,7 @@ int typec_altmode_exit(struct typec_altmode *adev)
+ if (!adev || !adev->active)
+ return 0;
+
+- if (!pdev->ops || !pdev->ops->enter)
++ if (!pdev->ops || !pdev->ops->exit)
+ return -EOPNOTSUPP;
+
+ /* Moving to USB Safe State */
+@@ -154,12 +154,20 @@ EXPORT_SYMBOL_GPL(typec_altmode_exit);
+ *
+ * Notifies the partner of @adev about Attention command.
+ */
+-void typec_altmode_attention(struct typec_altmode *adev, u32 vdo)
++int typec_altmode_attention(struct typec_altmode *adev, u32 vdo)
+ {
+- struct typec_altmode *pdev = &to_altmode(adev)->partner->adev;
++ struct altmode *partner = to_altmode(adev)->partner;
++ struct typec_altmode *pdev;
++
++ if (!partner)
++ return -ENODEV;
++
++ pdev = &partner->adev;
+
+ if (pdev->ops && pdev->ops->attention)
+ pdev->ops->attention(pdev, vdo);
++
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(typec_altmode_attention);
+
+diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
+index aeef453aa6585..339752fef65e0 100644
+--- a/drivers/usb/typec/class.c
++++ b/drivers/usb/typec/class.c
+@@ -1718,6 +1718,7 @@ void typec_set_pwr_opmode(struct typec_port *port,
+ partner->usb_pd = 1;
+ sysfs_notify(&partner_dev->kobj, NULL,
+ "supports_usb_power_delivery");
++ kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE);
+ }
+ put_device(partner_dev);
+ }
+@@ -1894,6 +1895,49 @@ void *typec_get_drvdata(struct typec_port *port)
+ }
+ EXPORT_SYMBOL_GPL(typec_get_drvdata);
+
++int typec_get_fw_cap(struct typec_capability *cap,
++ struct fwnode_handle *fwnode)
++{
++ const char *cap_str;
++ int ret;
++
++ cap->fwnode = fwnode;
++
++ ret = fwnode_property_read_string(fwnode, "power-role", &cap_str);
++ if (ret < 0)
++ return ret;
++
++ ret = typec_find_port_power_role(cap_str);
++ if (ret < 0)
++ return ret;
++ cap->type = ret;
++
++ /* USB data support is optional */
++ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str);
++ if (ret == 0) {
++ ret = typec_find_port_data_role(cap_str);
++ if (ret < 0)
++ return ret;
++ cap->data = ret;
++ }
++
++ /* Get the preferred power role for a DRP */
++ if (cap->type == TYPEC_PORT_DRP) {
++ cap->prefer_role = TYPEC_NO_PREFERRED_ROLE;
++
++ ret = fwnode_property_read_string(fwnode, "try-power-role", &cap_str);
++ if (ret == 0) {
++ ret = typec_find_power_role(cap_str);
++ if (ret < 0)
++ return ret;
++ cap->prefer_role = ret;
++ }
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(typec_get_fw_cap);
++
+ /**
+ * typec_port_register_altmode - Register USB Type-C Port Alternate Mode
+ * @port: USB Type-C Port that supports the alternate mode
+diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
+index c8340de0ed495..d2aaf294b6493 100644
+--- a/drivers/usb/typec/mux.c
++++ b/drivers/usb/typec/mux.c
+@@ -131,8 +131,11 @@ typec_switch_register(struct device *parent,
+ sw->dev.class = &typec_mux_class;
+ sw->dev.type = &typec_switch_dev_type;
+ sw->dev.driver_data = desc->drvdata;
+- dev_set_name(&sw->dev, "%s-switch",
+- desc->name ? desc->name : dev_name(parent));
++ ret = dev_set_name(&sw->dev, "%s-switch", desc->name ? desc->name : dev_name(parent));
++ if (ret) {
++ put_device(&sw->dev);
++ return ERR_PTR(ret);
++ }
+
+ ret = device_add(&sw->dev);
+ if (ret) {
+@@ -338,8 +341,11 @@ typec_mux_register(struct device *parent, const struct typec_mux_desc *desc)
+ mux->dev.class = &typec_mux_class;
+ mux->dev.type = &typec_mux_dev_type;
+ mux->dev.driver_data = desc->drvdata;
+- dev_set_name(&mux->dev, "%s-mux",
+- desc->name ? desc->name : dev_name(parent));
++ ret = dev_set_name(&mux->dev, "%s-mux", desc->name ? desc->name : dev_name(parent));
++ if (ret) {
++ put_device(&mux->dev);
++ return ERR_PTR(ret);
++ }
+
+ ret = device_add(&mux->dev);
+ if (ret) {
+diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
+index 2cdd22130834e..a7313c2d9f0fe 100644
+--- a/drivers/usb/typec/mux/intel_pmc_mux.c
++++ b/drivers/usb/typec/mux/intel_pmc_mux.c
+@@ -352,13 +352,24 @@ pmc_usb_mux_usb4(struct pmc_usb_port *port, struct typec_mux_state *state)
+ return pmc_usb_command(port, (void *)&req, sizeof(req));
+ }
+
+-static int pmc_usb_mux_safe_state(struct pmc_usb_port *port)
++static int pmc_usb_mux_safe_state(struct pmc_usb_port *port,
++ struct typec_mux_state *state)
+ {
+ u8 msg;
+
+ if (IOM_PORT_ACTIVITY_IS(port->iom_status, SAFE_MODE))
+ return 0;
+
++ if ((IOM_PORT_ACTIVITY_IS(port->iom_status, DP) ||
++ IOM_PORT_ACTIVITY_IS(port->iom_status, DP_MFD)) &&
++ state->alt && state->alt->svid == USB_TYPEC_DP_SID)
++ return 0;
++
++ if ((IOM_PORT_ACTIVITY_IS(port->iom_status, TBT) ||
++ IOM_PORT_ACTIVITY_IS(port->iom_status, ALT_MODE_TBT_USB)) &&
++ state->alt && state->alt->svid == USB_TYPEC_TBT_SID)
++ return 0;
++
+ msg = PMC_USB_SAFE_MODE;
+ msg |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT;
+
+@@ -426,7 +437,7 @@ pmc_usb_mux_set(struct typec_mux *mux, struct typec_mux_state *state)
+ return 0;
+
+ if (state->mode == TYPEC_STATE_SAFE)
+- return pmc_usb_mux_safe_state(port);
++ return pmc_usb_mux_safe_state(port, state);
+ if (state->mode == TYPEC_STATE_USB)
+ return pmc_usb_connect(port, port->role);
+
+@@ -552,13 +563,6 @@ err_unregister_switch:
+ return ret;
+ }
+
+-static int is_memory(struct acpi_resource *res, void *data)
+-{
+- struct resource r;
+-
+- return !acpi_dev_resource_memory(res, &r);
+-}
+-
+ /* IOM ACPI IDs and IOM_PORT_STATUS_OFFSET */
+ static const struct acpi_device_id iom_acpi_ids[] = {
+ /* TigerLake */
+@@ -566,6 +570,9 @@ static const struct acpi_device_id iom_acpi_ids[] = {
+
+ /* AlderLake */
+ { "INTC1079", 0x160, },
++
++ /* Meteor Lake */
++ { "INTC107A", 0x160, },
+ {}
+ };
+
+@@ -589,9 +596,11 @@ static int pmc_usb_probe_iom(struct pmc_usb *pmc)
+ return -ENODEV;
+
+ INIT_LIST_HEAD(&resource_list);
+- ret = acpi_dev_get_resources(adev, &resource_list, is_memory, NULL);
+- if (ret < 0)
++ ret = acpi_dev_get_memory_resources(adev, &resource_list);
++ if (ret < 0) {
++ acpi_dev_put(adev);
+ return ret;
++ }
+
+ rentry = list_first_entry_or_null(&resource_list, struct resource_entry, node);
+ if (rentry)
+diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig
+index 557f392fe24da..073fd2ea5e0bb 100644
+--- a/drivers/usb/typec/tcpm/Kconfig
++++ b/drivers/usb/typec/tcpm/Kconfig
+@@ -56,7 +56,6 @@ config TYPEC_WCOVE
+ tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver"
+ depends on ACPI
+ depends on MFD_INTEL_PMC_BXT
+- depends on INTEL_SOC_PMIC
+ depends on BXT_WC_PMIC_OPREGION
+ help
+ This driver adds support for USB Type-C on Intel Broxton platforms
+diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
+index 7a2a17866a823..96c55eaf3f808 100644
+--- a/drivers/usb/typec/tcpm/fusb302.c
++++ b/drivers/usb/typec/tcpm/fusb302.c
+@@ -669,25 +669,27 @@ static int tcpm_set_cc(struct tcpc_dev *dev, enum typec_cc_status cc)
+ ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK,
+ FUSB_REG_MASK_BC_LVL |
+ FUSB_REG_MASK_COMP_CHNG,
+- FUSB_REG_MASK_COMP_CHNG);
++ FUSB_REG_MASK_BC_LVL);
+ if (ret < 0) {
+ fusb302_log(chip, "cannot set SRC interrupt, ret=%d",
+ ret);
+ goto done;
+ }
+ chip->intr_comp_chng = true;
++ chip->intr_bc_lvl = false;
+ break;
+ case TYPEC_CC_RD:
+ ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK,
+ FUSB_REG_MASK_BC_LVL |
+ FUSB_REG_MASK_COMP_CHNG,
+- FUSB_REG_MASK_BC_LVL);
++ FUSB_REG_MASK_COMP_CHNG);
+ if (ret < 0) {
+ fusb302_log(chip, "cannot set SRC interrupt, ret=%d",
+ ret);
+ goto done;
+ }
+ chip->intr_bc_lvl = true;
++ chip->intr_comp_chng = false;
+ break;
+ default:
+ break;
+@@ -1706,8 +1708,8 @@ static int fusb302_probe(struct i2c_client *client,
+ */
+ if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) {
+ chip->extcon = extcon_get_extcon_dev(name);
+- if (!chip->extcon)
+- return -EPROBE_DEFER;
++ if (IS_ERR(chip->extcon))
++ return PTR_ERR(chip->extcon);
+ }
+
+ chip->vbus = devm_regulator_get(chip->dev, "vbus");
+diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
+index c15eec9cc460a..a7b0134d382b8 100644
+--- a/drivers/usb/typec/tcpm/tcpci.c
++++ b/drivers/usb/typec/tcpm/tcpci.c
+@@ -13,11 +13,10 @@
+ #include <linux/property.h>
+ #include <linux/regmap.h>
+ #include <linux/usb/pd.h>
++#include <linux/usb/tcpci.h>
+ #include <linux/usb/tcpm.h>
+ #include <linux/usb/typec.h>
+
+-#include "tcpci.h"
+-
+ #define PD_RETRY_COUNT_DEFAULT 3
+ #define PD_RETRY_COUNT_3_0_OR_HIGHER 2
+ #define AUTO_DISCHARGE_DEFAULT_THRESHOLD_MV 3500
+@@ -75,9 +74,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val)
+ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
+ {
+ struct tcpci *tcpci = tcpc_to_tcpci(tcpc);
++ bool vconn_pres;
++ enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1;
+ unsigned int reg;
+ int ret;
+
++ ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, &reg);
++ if (ret < 0)
++ return ret;
++
++ vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES);
++ if (vconn_pres) {
++ ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, &reg);
++ if (ret < 0)
++ return ret;
++
++ if (reg & TCPC_TCPC_CTRL_ORIENTATION)
++ polarity = TYPEC_POLARITY_CC2;
++ }
++
+ switch (cc) {
+ case TYPEC_CC_RA:
+ reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) |
+@@ -112,6 +127,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
+ break;
+ }
+
++ if (vconn_pres) {
++ if (polarity == TYPEC_POLARITY_CC2) {
++ reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT);
++ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT);
++ } else {
++ reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT);
++ reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT);
++ }
++ }
++
+ ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg);
+ if (ret < 0)
+ return ret;
+@@ -590,6 +615,10 @@ static int tcpci_init(struct tcpc_dev *tcpc)
+ if (time_after(jiffies, timeout))
+ return -ETIMEDOUT;
+
++ ret = tcpci_write16(tcpci, TCPC_FAULT_STATUS, TCPC_FAULT_STATUS_ALL_REG_RST_TO_DEFAULT);
++ if (ret < 0)
++ return ret;
++
+ /* Handle vendor init */
+ if (tcpci->data->init) {
+ ret = tcpci->data->init(tcpci, tcpci->data);
+@@ -791,8 +820,10 @@ struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data)
+ return ERR_PTR(err);
+
+ tcpci->port = tcpm_register_port(tcpci->dev, &tcpci->tcpc);
+- if (IS_ERR(tcpci->port))
++ if (IS_ERR(tcpci->port)) {
++ fwnode_handle_put(tcpci->tcpc.fwnode);
+ return ERR_CAST(tcpci->port);
++ }
+
+ return tcpci;
+ }
+@@ -801,6 +832,7 @@ EXPORT_SYMBOL_GPL(tcpci_register_port);
+ void tcpci_unregister_port(struct tcpci *tcpci)
+ {
+ tcpm_unregister_port(tcpci->port);
++ fwnode_handle_put(tcpci->tcpc.fwnode);
+ }
+ EXPORT_SYMBOL_GPL(tcpci_unregister_port);
+
+@@ -851,7 +883,7 @@ static int tcpci_remove(struct i2c_client *client)
+ /* Disable chip interrupts before unregistering port */
+ err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0);
+ if (err < 0)
+- return err;
++ dev_warn(&client->dev, "Failed to disable irqs (%pe)\n", ERR_PTR(err));
+
+ tcpci_unregister_port(chip->tcpci);
+
+diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h
+deleted file mode 100644
+index 2be7a77d400ef..0000000000000
+--- a/drivers/usb/typec/tcpm/tcpci.h
++++ /dev/null
+@@ -1,208 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0+ */
+-/*
+- * Copyright 2015-2017 Google, Inc
+- *
+- * USB Type-C Port Controller Interface.
+- */
+-
+-#ifndef __LINUX_USB_TCPCI_H
+-#define __LINUX_USB_TCPCI_H
+-
+-#include <linux/usb/typec.h>
+-
+-#define TCPC_VENDOR_ID 0x0
+-#define TCPC_PRODUCT_ID 0x2
+-#define TCPC_BCD_DEV 0x4
+-#define TCPC_TC_REV 0x6
+-#define TCPC_PD_REV 0x8
+-#define TCPC_PD_INT_REV 0xa
+-
+-#define TCPC_ALERT 0x10
+-#define TCPC_ALERT_EXTND BIT(14)
+-#define TCPC_ALERT_EXTENDED_STATUS BIT(13)
+-#define TCPC_ALERT_VBUS_DISCNCT BIT(11)
+-#define TCPC_ALERT_RX_BUF_OVF BIT(10)
+-#define TCPC_ALERT_FAULT BIT(9)
+-#define TCPC_ALERT_V_ALARM_LO BIT(8)
+-#define TCPC_ALERT_V_ALARM_HI BIT(7)
+-#define TCPC_ALERT_TX_SUCCESS BIT(6)
+-#define TCPC_ALERT_TX_DISCARDED BIT(5)
+-#define TCPC_ALERT_TX_FAILED BIT(4)
+-#define TCPC_ALERT_RX_HARD_RST BIT(3)
+-#define TCPC_ALERT_RX_STATUS BIT(2)
+-#define TCPC_ALERT_POWER_STATUS BIT(1)
+-#define TCPC_ALERT_CC_STATUS BIT(0)
+-
+-#define TCPC_ALERT_MASK 0x12
+-#define TCPC_POWER_STATUS_MASK 0x14
+-#define TCPC_FAULT_STATUS_MASK 0x15
+-
+-#define TCPC_EXTENDED_STATUS_MASK 0x16
+-#define TCPC_EXTENDED_STATUS_MASK_VSAFE0V BIT(0)
+-
+-#define TCPC_ALERT_EXTENDED_MASK 0x17
+-#define TCPC_SINK_FAST_ROLE_SWAP BIT(0)
+-
+-#define TCPC_CONFIG_STD_OUTPUT 0x18
+-
+-#define TCPC_TCPC_CTRL 0x19
+-#define TCPC_TCPC_CTRL_ORIENTATION BIT(0)
+-#define PLUG_ORNT_CC1 0
+-#define PLUG_ORNT_CC2 1
+-#define TCPC_TCPC_CTRL_BIST_TM BIT(1)
+-#define TCPC_TCPC_CTRL_EN_LK4CONN_ALRT BIT(6)
+-
+-#define TCPC_EXTENDED_STATUS 0x20
+-#define TCPC_EXTENDED_STATUS_VSAFE0V BIT(0)
+-
+-#define TCPC_ROLE_CTRL 0x1a
+-#define TCPC_ROLE_CTRL_DRP BIT(6)
+-#define TCPC_ROLE_CTRL_RP_VAL_SHIFT 4
+-#define TCPC_ROLE_CTRL_RP_VAL_MASK 0x3
+-#define TCPC_ROLE_CTRL_RP_VAL_DEF 0x0
+-#define TCPC_ROLE_CTRL_RP_VAL_1_5 0x1
+-#define TCPC_ROLE_CTRL_RP_VAL_3_0 0x2
+-#define TCPC_ROLE_CTRL_CC2_SHIFT 2
+-#define TCPC_ROLE_CTRL_CC2_MASK 0x3
+-#define TCPC_ROLE_CTRL_CC1_SHIFT 0
+-#define TCPC_ROLE_CTRL_CC1_MASK 0x3
+-#define TCPC_ROLE_CTRL_CC_RA 0x0
+-#define TCPC_ROLE_CTRL_CC_RP 0x1
+-#define TCPC_ROLE_CTRL_CC_RD 0x2
+-#define TCPC_ROLE_CTRL_CC_OPEN 0x3
+-
+-#define TCPC_FAULT_CTRL 0x1b
+-
+-#define TCPC_POWER_CTRL 0x1c
+-#define TCPC_POWER_CTRL_VCONN_ENABLE BIT(0)
+-#define TCPC_POWER_CTRL_BLEED_DISCHARGE BIT(3)
+-#define TCPC_POWER_CTRL_AUTO_DISCHARGE BIT(4)
+-#define TCPC_DIS_VOLT_ALRM BIT(5)
+-#define TCPC_POWER_CTRL_VBUS_VOLT_MON BIT(6)
+-#define TCPC_FAST_ROLE_SWAP_EN BIT(7)
+-
+-#define TCPC_CC_STATUS 0x1d
+-#define TCPC_CC_STATUS_TOGGLING BIT(5)
+-#define TCPC_CC_STATUS_TERM BIT(4)
+-#define TCPC_CC_STATUS_TERM_RP 0
+-#define TCPC_CC_STATUS_TERM_RD 1
+-#define TCPC_CC_STATE_SRC_OPEN 0
+-#define TCPC_CC_STATUS_CC2_SHIFT 2
+-#define TCPC_CC_STATUS_CC2_MASK 0x3
+-#define TCPC_CC_STATUS_CC1_SHIFT 0
+-#define TCPC_CC_STATUS_CC1_MASK 0x3
+-
+-#define TCPC_POWER_STATUS 0x1e
+-#define TCPC_POWER_STATUS_DBG_ACC_CON BIT(7)
+-#define TCPC_POWER_STATUS_UNINIT BIT(6)
+-#define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4)
+-#define TCPC_POWER_STATUS_VBUS_DET BIT(3)
+-#define TCPC_POWER_STATUS_VBUS_PRES BIT(2)
+-#define TCPC_POWER_STATUS_SINKING_VBUS BIT(0)
+-
+-#define TCPC_FAULT_STATUS 0x1f
+-
+-#define TCPC_ALERT_EXTENDED 0x21
+-
+-#define TCPC_COMMAND 0x23
+-#define TCPC_CMD_WAKE_I2C 0x11
+-#define TCPC_CMD_DISABLE_VBUS_DETECT 0x22
+-#define TCPC_CMD_ENABLE_VBUS_DETECT 0x33
+-#define TCPC_CMD_DISABLE_SINK_VBUS 0x44
+-#define TCPC_CMD_SINK_VBUS 0x55
+-#define TCPC_CMD_DISABLE_SRC_VBUS 0x66
+-#define TCPC_CMD_SRC_VBUS_DEFAULT 0x77
+-#define TCPC_CMD_SRC_VBUS_HIGH 0x88
+-#define TCPC_CMD_LOOK4CONNECTION 0x99
+-#define TCPC_CMD_RXONEMORE 0xAA
+-#define TCPC_CMD_I2C_IDLE 0xFF
+-
+-#define TCPC_DEV_CAP_1 0x24
+-#define TCPC_DEV_CAP_2 0x26
+-#define TCPC_STD_INPUT_CAP 0x28
+-#define TCPC_STD_OUTPUT_CAP 0x29
+-
+-#define TCPC_MSG_HDR_INFO 0x2e
+-#define TCPC_MSG_HDR_INFO_DATA_ROLE BIT(3)
+-#define TCPC_MSG_HDR_INFO_PWR_ROLE BIT(0)
+-#define TCPC_MSG_HDR_INFO_REV_SHIFT 1
+-#define TCPC_MSG_HDR_INFO_REV_MASK 0x3
+-
+-#define TCPC_RX_DETECT 0x2f
+-#define TCPC_RX_DETECT_HARD_RESET BIT(5)
+-#define TCPC_RX_DETECT_SOP BIT(0)
+-#define TCPC_RX_DETECT_SOP1 BIT(1)
+-#define TCPC_RX_DETECT_SOP2 BIT(2)
+-#define TCPC_RX_DETECT_DBG1 BIT(3)
+-#define TCPC_RX_DETECT_DBG2 BIT(4)
+-
+-#define TCPC_RX_BYTE_CNT 0x30
+-#define TCPC_RX_BUF_FRAME_TYPE 0x31
+-#define TCPC_RX_BUF_FRAME_TYPE_SOP 0
+-#define TCPC_RX_HDR 0x32
+-#define TCPC_RX_DATA 0x34 /* through 0x4f */
+-
+-#define TCPC_TRANSMIT 0x50
+-#define TCPC_TRANSMIT_RETRY_SHIFT 4
+-#define TCPC_TRANSMIT_RETRY_MASK 0x3
+-#define TCPC_TRANSMIT_TYPE_SHIFT 0
+-#define TCPC_TRANSMIT_TYPE_MASK 0x7
+-
+-#define TCPC_TX_BYTE_CNT 0x51
+-#define TCPC_TX_HDR 0x52
+-#define TCPC_TX_DATA 0x54 /* through 0x6f */
+-
+-#define TCPC_VBUS_VOLTAGE 0x70
+-#define TCPC_VBUS_VOLTAGE_MASK 0x3ff
+-#define TCPC_VBUS_VOLTAGE_LSB_MV 25
+-#define TCPC_VBUS_SINK_DISCONNECT_THRESH 0x72
+-#define TCPC_VBUS_SINK_DISCONNECT_THRESH_LSB_MV 25
+-#define TCPC_VBUS_SINK_DISCONNECT_THRESH_MAX 0x3ff
+-#define TCPC_VBUS_STOP_DISCHARGE_THRESH 0x74
+-#define TCPC_VBUS_VOLTAGE_ALARM_HI_CFG 0x76
+-#define TCPC_VBUS_VOLTAGE_ALARM_LO_CFG 0x78
+-
+-/* I2C_WRITE_BYTE_COUNT + 1 when TX_BUF_BYTE_x is only accessible I2C_WRITE_BYTE_COUNT */
+-#define TCPC_TRANSMIT_BUFFER_MAX_LEN 31
+-
+-struct tcpci;
+-
+-/*
+- * @TX_BUF_BYTE_x_hidden:
+- * optional; Set when TX_BUF_BYTE_x can only be accessed through I2C_WRITE_BYTE_COUNT.
+- * @frs_sourcing_vbus:
+- * Optional; Callback to perform chip specific operations when FRS
+- * is sourcing vbus.
+- * @auto_discharge_disconnect:
+- * Optional; Enables TCPC to autonously discharge vbus on disconnect.
+- * @vbus_vsafe0v:
+- * optional; Set when TCPC can detect whether vbus is at VSAFE0V.
+- * @set_partner_usb_comm_capable:
+- * Optional; The USB Communications Capable bit indicates if port
+- * partner is capable of communication over the USB data lines
+- * (e.g. D+/- or SS Tx/Rx). Called to notify the status of the bit.
+- */
+-struct tcpci_data {
+- struct regmap *regmap;
+- unsigned char TX_BUF_BYTE_x_hidden:1;
+- unsigned char auto_discharge_disconnect:1;
+- unsigned char vbus_vsafe0v:1;
+-
+- int (*init)(struct tcpci *tcpci, struct tcpci_data *data);
+- int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data,
+- bool enable);
+- int (*start_drp_toggling)(struct tcpci *tcpci, struct tcpci_data *data,
+- enum typec_cc_status cc);
+- int (*set_vbus)(struct tcpci *tcpci, struct tcpci_data *data, bool source, bool sink);
+- void (*frs_sourcing_vbus)(struct tcpci *tcpci, struct tcpci_data *data);
+- void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data,
+- bool capable);
+-};
+-
+-struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data);
+-void tcpci_unregister_port(struct tcpci *tcpci);
+-irqreturn_t tcpci_irq(struct tcpci *tcpci);
+-
+-struct tcpm_port;
+-struct tcpm_port *tcpci_get_tcpm_port(struct tcpci *tcpci);
+-#endif /* __LINUX_USB_TCPCI_H */
+diff --git a/drivers/usb/typec/tcpm/tcpci_maxim.c b/drivers/usb/typec/tcpm/tcpci_maxim.c
+index df2505570f07d..4b6705f3d7b78 100644
+--- a/drivers/usb/typec/tcpm/tcpci_maxim.c
++++ b/drivers/usb/typec/tcpm/tcpci_maxim.c
+@@ -11,11 +11,10 @@
+ #include <linux/module.h>
+ #include <linux/regmap.h>
+ #include <linux/usb/pd.h>
++#include <linux/usb/tcpci.h>
+ #include <linux/usb/tcpm.h>
+ #include <linux/usb/typec.h>
+
+-#include "tcpci.h"
+-
+ #define PD_ACTIVITY_TIMEOUT_MS 10000
+
+ #define TCPC_VENDOR_ALERT 0x80
+diff --git a/drivers/usb/typec/tcpm/tcpci_mt6360.c b/drivers/usb/typec/tcpm/tcpci_mt6360.c
+index f1bd9e09bc87f..1b7c31278ebbe 100644
+--- a/drivers/usb/typec/tcpm/tcpci_mt6360.c
++++ b/drivers/usb/typec/tcpm/tcpci_mt6360.c
+@@ -11,10 +11,12 @@
+ #include <linux/of.h>
+ #include <linux/platform_device.h>
+ #include <linux/regmap.h>
++#include <linux/usb/tcpci.h>
+ #include <linux/usb/tcpm.h>
+
+-#include "tcpci.h"
+-
++#define MT6360_REG_PHYCTRL1 0x80
++#define MT6360_REG_PHYCTRL3 0x82
++#define MT6360_REG_PHYCTRL7 0x86
+ #define MT6360_REG_VCONNCTRL1 0x8C
+ #define MT6360_REG_MODECTRL2 0x8F
+ #define MT6360_REG_SWRESET 0xA0
+@@ -22,6 +24,8 @@
+ #define MT6360_REG_DRPCTRL1 0xA2
+ #define MT6360_REG_DRPCTRL2 0xA3
+ #define MT6360_REG_I2CTORST 0xBF
++#define MT6360_REG_PHYCTRL11 0xCA
++#define MT6360_REG_RXCTRL1 0xCE
+ #define MT6360_REG_RXCTRL2 0xCF
+ #define MT6360_REG_CTDCTRL2 0xEC
+
+@@ -106,6 +110,27 @@ static int mt6360_tcpc_init(struct tcpci *tcpci, struct tcpci_data *tdata)
+ if (ret)
+ return ret;
+
++ /* BMC PHY */
++ ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL1, 0x3A70);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(regmap, MT6360_REG_PHYCTRL3, 0x82);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(regmap, MT6360_REG_PHYCTRL7, 0x36);
++ if (ret)
++ return ret;
++
++ ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL11, 0x3C60);
++ if (ret)
++ return ret;
++
++ ret = regmap_write(regmap, MT6360_REG_RXCTRL1, 0xE8);
++ if (ret)
++ return ret;
++
+ /* Set shipping mode off, AUTOIDLE on */
+ return regmap_write(regmap, MT6360_REG_MODECTRL2, 0x7A);
+ }
+diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
+index b56a0880a0441..3291ca4948da7 100644
+--- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c
++++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
+@@ -10,9 +10,9 @@
+ #include <linux/i2c.h>
+ #include <linux/interrupt.h>
+ #include <linux/gpio/consumer.h>
++#include <linux/usb/tcpci.h>
+ #include <linux/usb/tcpm.h>
+ #include <linux/regmap.h>
+-#include "tcpci.h"
+
+ #define RT1711H_VID 0x29CF
+ #define RT1711H_PID 0x1711
+diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
+index 7f2f3ff1b3911..7833518c60388 100644
+--- a/drivers/usb/typec/tcpm/tcpm.c
++++ b/drivers/usb/typec/tcpm/tcpm.c
+@@ -324,6 +324,7 @@ struct tcpm_port {
+
+ bool attached;
+ bool connected;
++ bool registered;
+ bool pd_supported;
+ enum typec_port_type port_type;
+
+@@ -1427,10 +1428,18 @@ static int tcpm_ams_start(struct tcpm_port *port, enum tcpm_ams ams)
+ static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header,
+ const u32 *data, int cnt)
+ {
++ u32 vdo_hdr = port->vdo_data[0];
++
+ WARN_ON(!mutex_is_locked(&port->lock));
+
+- /* Make sure we are not still processing a previous VDM packet */
+- WARN_ON(port->vdm_state > VDM_STATE_DONE);
++ /* If is sending discover_identity, handle received message first */
++ if (PD_VDO_SVDM(vdo_hdr) && PD_VDO_CMD(vdo_hdr) == CMD_DISCOVER_IDENT) {
++ port->send_discover = true;
++ mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS);
++ } else {
++ /* Make sure we are not still processing a previous VDM packet */
++ WARN_ON(port->vdm_state > VDM_STATE_DONE);
++ }
+
+ port->vdo_count = cnt + 1;
+ port->vdo_data[0] = header;
+@@ -1497,7 +1506,21 @@ static bool svdm_consume_svids(struct tcpm_port *port, const u32 *p, int cnt)
+ pmdata->svids[pmdata->nsvids++] = svid;
+ tcpm_log(port, "SVID %d: 0x%x", pmdata->nsvids, svid);
+ }
+- return true;
++
++ /*
++ * PD3.0 Spec 6.4.4.3.2: The SVIDs are returned 2 per VDO (see Table
++ * 6-43), and can be returned maximum 6 VDOs per response (see Figure
++ * 6-19). If the Respondersupports 12 or more SVID then the Discover
++ * SVIDs Command Shall be executed multiple times until a Discover
++ * SVIDs VDO is returned ending either with a SVID value of 0x0000 in
++ * the last part of the last VDO or with a VDO containing two SVIDs
++ * with values of 0x0000.
++ *
++ * However, some odd dockers support SVIDs less than 12 but without
++ * 0x0000 in the last VDO, so we need to break the Discover SVIDs
++ * request and return false here.
++ */
++ return cnt == 7;
+ abort:
+ tcpm_log(port, "SVID_DISCOVERY_MAX(%d) too low!", SVID_DISCOVERY_MAX);
+ return false;
+@@ -1840,7 +1863,8 @@ static void tcpm_handle_vdm_request(struct tcpm_port *port,
+ }
+ break;
+ case ADEV_ATTENTION:
+- typec_altmode_attention(adev, p[1]);
++ if (typec_altmode_attention(adev, p[1]))
++ tcpm_log(port, "typec_altmode_attention no port partner altmode");
+ break;
+ }
+ }
+@@ -1933,11 +1957,13 @@ static void vdm_run_state_machine(struct tcpm_port *port)
+ switch (PD_VDO_CMD(vdo_hdr)) {
+ case CMD_DISCOVER_IDENT:
+ res = tcpm_ams_start(port, DISCOVER_IDENTITY);
+- if (res == 0)
++ if (res == 0) {
+ port->send_discover = false;
+- else if (res == -EAGAIN)
++ } else if (res == -EAGAIN) {
++ port->vdo_data[0] = 0;
+ mod_send_discover_delayed_work(port,
+ SEND_DISCOVER_RETRY_MS);
++ }
+ break;
+ case CMD_DISCOVER_SVID:
+ res = tcpm_ams_start(port, DISCOVER_SVIDS);
+@@ -2020,6 +2046,7 @@ static void vdm_run_state_machine(struct tcpm_port *port)
+ unsigned long timeout;
+
+ port->vdm_retries = 0;
++ port->vdo_data[0] = 0;
+ port->vdm_state = VDM_STATE_BUSY;
+ timeout = vdm_ready_timeout(vdo_hdr);
+ mod_vdm_delayed_work(port, timeout);
+@@ -2663,6 +2690,13 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
+ port->sink_cap_done = true;
+ tcpm_set_state(port, ready_state(port), 0);
+ break;
++ /*
++ * Some port partners do not support GET_STATUS, avoid soft reset the link to
++ * prevent redundant power re-negotiation
++ */
++ case GET_STATUS_SEND:
++ tcpm_set_state(port, ready_state(port), 0);
++ break;
+ case SRC_READY:
+ case SNK_READY:
+ if (port->vdm_state > VDM_STATE_READY) {
+@@ -3829,6 +3863,29 @@ static enum typec_cc_status tcpm_pwr_opmode_to_rp(enum typec_pwr_opmode opmode)
+ }
+ }
+
++static void tcpm_set_initial_svdm_version(struct tcpm_port *port)
++{
++ switch (port->negotiated_rev) {
++ case PD_REV30:
++ break;
++ /*
++ * 6.4.4.2.3 Structured VDM Version
++ * 2.0 states "At this time, there is only one version (1.0) defined.
++ * This field Shall be set to zero to indicate Version 1.0."
++ * 3.0 states "This field Shall be set to 01b to indicate Version 2.0."
++ * To ensure that we follow the Power Delivery revision we are currently
++ * operating on, downgrade the SVDM version to the highest one supported
++ * by the Power Delivery revision.
++ */
++ case PD_REV20:
++ typec_partner_set_svdm_version(port->partner, SVDM_VER_1_0);
++ break;
++ default:
++ typec_partner_set_svdm_version(port->partner, SVDM_VER_1_0);
++ break;
++ }
++}
++
+ static void run_state_machine(struct tcpm_port *port)
+ {
+ int ret;
+@@ -4053,10 +4110,12 @@ static void run_state_machine(struct tcpm_port *port)
+ * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using
+ * port->explicit_contract to decide whether to send the command.
+ */
+- if (port->explicit_contract)
++ if (port->explicit_contract) {
++ tcpm_set_initial_svdm_version(port);
+ mod_send_discover_delayed_work(port, 0);
+- else
++ } else {
+ port->send_discover = false;
++ }
+
+ /*
+ * 6.3.5
+@@ -4110,11 +4169,7 @@ static void run_state_machine(struct tcpm_port *port)
+ tcpm_try_src(port) ? SRC_TRY
+ : SNK_ATTACHED,
+ 0);
+- else
+- /* Wait for VBUS, but not forever */
+- tcpm_set_state(port, PORT_RESET, PD_T_PS_SOURCE_ON);
+ break;
+-
+ case SRC_TRY:
+ port->try_src_count++;
+ tcpm_set_cc(port, tcpm_rp_cc(port));
+@@ -4343,10 +4398,12 @@ static void run_state_machine(struct tcpm_port *port)
+ * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using
+ * port->explicit_contract.
+ */
+- if (port->explicit_contract)
++ if (port->explicit_contract) {
++ tcpm_set_initial_svdm_version(port);
+ mod_send_discover_delayed_work(port, 0);
+- else
++ } else {
+ port->send_discover = false;
++ }
+
+ power_supply_changed(port->psy);
+ break;
+@@ -4530,14 +4587,13 @@ static void run_state_machine(struct tcpm_port *port)
+ tcpm_set_state(port, ready_state(port), 0);
+ break;
+ case DR_SWAP_CHANGE_DR:
+- if (port->data_role == TYPEC_HOST) {
+- tcpm_unregister_altmodes(port);
++ tcpm_unregister_altmodes(port);
++ if (port->data_role == TYPEC_HOST)
+ tcpm_set_roles(port, true, port->pwr_role,
+ TYPEC_DEVICE);
+- } else {
++ else
+ tcpm_set_roles(port, true, port->pwr_role,
+ TYPEC_HOST);
+- }
+ tcpm_ams_finish(port);
+ tcpm_set_state(port, ready_state(port), 0);
+ break;
+@@ -5159,7 +5215,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
+ case SNK_TRYWAIT_DEBOUNCE:
+ break;
+ case SNK_ATTACH_WAIT:
+- tcpm_set_state(port, SNK_UNATTACHED, 0);
++ case SNK_DEBOUNCED:
++ /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */
+ break;
+
+ case SNK_NEGOTIATE_CAPABILITIES:
+@@ -5224,6 +5281,10 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
+ /* Do nothing, vbus drop expected */
+ break;
+
++ case SNK_HARD_RESET_WAIT_VBUS:
++ /* Do nothing, its OK to receive vbus off events */
++ break;
++
+ default:
+ if (port->pwr_role == TYPEC_SINK && port->attached)
+ tcpm_set_state(port, SNK_UNATTACHED, tcpm_wait_for_discharge(port));
+@@ -5266,6 +5327,13 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
+ case PR_SWAP_SNK_SRC_SOURCE_ON:
+ /* Do nothing, vsafe0v is expected during transition */
+ break;
++ case SNK_ATTACH_WAIT:
++ case SNK_DEBOUNCED:
++ /*Do nothing, still waiting for VSAFE5V for connect */
++ break;
++ case SNK_HARD_RESET_WAIT_VBUS:
++ /* Do nothing, its OK to receive vbus off events */
++ break;
+ default:
+ if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
+ tcpm_set_state(port, SNK_UNATTACHED, 0);
+@@ -5926,7 +5994,6 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
+ struct fwnode_handle *fwnode)
+ {
+ const char *opmode_str;
+- const char *cap_str;
+ int ret;
+ u32 mw, frs_current;
+
+@@ -5942,23 +6009,10 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
+ */
+ fw_devlink_purge_absent_suppliers(fwnode);
+
+- /* USB data support is optional */
+- ret = fwnode_property_read_string(fwnode, "data-role", &cap_str);
+- if (ret == 0) {
+- ret = typec_find_port_data_role(cap_str);
+- if (ret < 0)
+- return ret;
+- port->typec_caps.data = ret;
+- }
+-
+- ret = fwnode_property_read_string(fwnode, "power-role", &cap_str);
++ ret = typec_get_fw_cap(&port->typec_caps, fwnode);
+ if (ret < 0)
+ return ret;
+
+- ret = typec_find_port_power_role(cap_str);
+- if (ret < 0)
+- return ret;
+- port->typec_caps.type = ret;
+ port->port_type = port->typec_caps.type;
+ port->pd_supported = !fwnode_property_read_bool(fwnode, "pd-disable");
+
+@@ -5995,14 +6049,6 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
+ if (port->port_type == TYPEC_PORT_SRC)
+ return 0;
+
+- /* Get the preferred power role for DRP */
+- ret = fwnode_property_read_string(fwnode, "try-power-role", &cap_str);
+- if (ret < 0)
+- return ret;
+-
+- port->typec_caps.prefer_role = typec_find_power_role(cap_str);
+- if (port->typec_caps.prefer_role < 0)
+- return -EINVAL;
+ sink:
+ port->self_powered = fwnode_property_read_bool(fwnode, "self-powered");
+
+@@ -6211,6 +6257,13 @@ static int tcpm_psy_set_prop(struct power_supply *psy,
+ struct tcpm_port *port = power_supply_get_drvdata(psy);
+ int ret;
+
++ /*
++ * All the properties below are related to USB PD. The check needs to be
++ * property specific when a non-pd related property is added.
++ */
++ if (!port->pd_supported)
++ return -EOPNOTSUPP;
++
+ switch (psp) {
+ case POWER_SUPPLY_PROP_ONLINE:
+ ret = tcpm_psy_set_online(port, val);
+@@ -6295,7 +6348,8 @@ static enum hrtimer_restart state_machine_timer_handler(struct hrtimer *timer)
+ {
+ struct tcpm_port *port = container_of(timer, struct tcpm_port, state_machine_timer);
+
+- kthread_queue_work(port->wq, &port->state_machine);
++ if (port->registered)
++ kthread_queue_work(port->wq, &port->state_machine);
+ return HRTIMER_NORESTART;
+ }
+
+@@ -6303,7 +6357,8 @@ static enum hrtimer_restart vdm_state_machine_timer_handler(struct hrtimer *time
+ {
+ struct tcpm_port *port = container_of(timer, struct tcpm_port, vdm_state_machine_timer);
+
+- kthread_queue_work(port->wq, &port->vdm_state_machine);
++ if (port->registered)
++ kthread_queue_work(port->wq, &port->vdm_state_machine);
+ return HRTIMER_NORESTART;
+ }
+
+@@ -6311,7 +6366,8 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer)
+ {
+ struct tcpm_port *port = container_of(timer, struct tcpm_port, enable_frs_timer);
+
+- kthread_queue_work(port->wq, &port->enable_frs);
++ if (port->registered)
++ kthread_queue_work(port->wq, &port->enable_frs);
+ return HRTIMER_NORESTART;
+ }
+
+@@ -6319,7 +6375,8 @@ static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer)
+ {
+ struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer);
+
+- kthread_queue_work(port->wq, &port->send_discover_work);
++ if (port->registered)
++ kthread_queue_work(port->wq, &port->send_discover_work);
+ return HRTIMER_NORESTART;
+ }
+
+@@ -6407,6 +6464,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
+ typec_port_register_altmodes(port->typec_port,
+ &tcpm_altmode_ops, port,
+ port->port_altmode, ALTMODE_DISCOVERY_MAX);
++ port->registered = true;
+
+ mutex_lock(&port->lock);
+ tcpm_init(port);
+@@ -6428,6 +6486,9 @@ void tcpm_unregister_port(struct tcpm_port *port)
+ {
+ int i;
+
++ port->registered = false;
++ kthread_destroy_worker(port->wq);
++
+ hrtimer_cancel(&port->send_discover_timer);
+ hrtimer_cancel(&port->enable_frs_timer);
+ hrtimer_cancel(&port->vdm_state_machine_timer);
+@@ -6439,7 +6500,6 @@ void tcpm_unregister_port(struct tcpm_port *port)
+ typec_unregister_port(port->typec_port);
+ usb_role_switch_put(port->role_sw);
+ tcpm_debugfs_exit(port);
+- kthread_destroy_worker(port->wq);
+ }
+ EXPORT_SYMBOL_GPL(tcpm_unregister_port);
+
+diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
+index ea4cc0a6e40cc..2f32c3fceef87 100644
+--- a/drivers/usb/typec/tipd/core.c
++++ b/drivers/usb/typec/tipd/core.c
+@@ -117,7 +117,7 @@ tps6598x_block_read(struct tps6598x *tps, u8 reg, void *val, size_t len)
+ u8 data[TPS_MAX_LEN + 1];
+ int ret;
+
+- if (WARN_ON(len + 1 > sizeof(data)))
++ if (len + 1 > sizeof(data))
+ return -EINVAL;
+
+ if (!tps->i2c_protocol)
+@@ -246,6 +246,10 @@ static int tps6598x_connect(struct tps6598x *tps, u32 status)
+ typec_set_pwr_opmode(tps->port, mode);
+ typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status));
+ typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status));
++ if (TPS_STATUS_TO_UPSIDE_DOWN(status))
++ typec_set_orientation(tps->port, TYPEC_ORIENTATION_REVERSE);
++ else
++ typec_set_orientation(tps->port, TYPEC_ORIENTATION_NORMAL);
+ tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), true);
+
+ tps->partner = typec_register_partner(tps->port, &desc);
+@@ -268,6 +272,7 @@ static void tps6598x_disconnect(struct tps6598x *tps, u32 status)
+ typec_set_pwr_opmode(tps->port, TYPEC_PWR_MODE_USB);
+ typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status));
+ typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status));
++ typec_set_orientation(tps->port, TYPEC_ORIENTATION_NONE);
+ tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), false);
+
+ power_supply_changed(tps->psy);
+@@ -618,12 +623,12 @@ static int tps6598x_probe(struct i2c_client *client)
+
+ ret = tps6598x_read32(tps, TPS_REG_STATUS, &status);
+ if (ret < 0)
+- return ret;
++ goto err_clear_mask;
+ trace_tps6598x_status(status);
+
+ ret = tps6598x_read32(tps, TPS_REG_SYSTEM_CONF, &conf);
+ if (ret < 0)
+- return ret;
++ goto err_clear_mask;
+
+ /*
+ * This fwnode has a "compatible" property, but is never populated as a
+@@ -679,14 +684,13 @@ static int tps6598x_probe(struct i2c_client *client)
+
+ ret = devm_tps6598_psy_register(tps);
+ if (ret)
+- return ret;
++ goto err_role_put;
+
+ tps->port = typec_register_port(&client->dev, &typec_cap);
+ if (IS_ERR(tps->port)) {
+ ret = PTR_ERR(tps->port);
+ goto err_role_put;
+ }
+- fwnode_handle_put(fwnode);
+
+ if (status & TPS_STATUS_PLUG_PRESENT) {
+ ret = tps6598x_connect(tps, status);
+@@ -705,6 +709,7 @@ static int tps6598x_probe(struct i2c_client *client)
+ }
+
+ i2c_set_clientdata(client, tps);
++ fwnode_handle_put(fwnode);
+
+ return 0;
+
+@@ -712,7 +717,8 @@ err_role_put:
+ usb_role_switch_put(tps->role_sw);
+ err_fwnode_put:
+ fwnode_handle_put(fwnode);
+-
++err_clear_mask:
++ tps6598x_write64(tps, TPS_REG_INT_MASK1, 0);
+ return ret;
+ }
+
+diff --git a/drivers/usb/typec/tipd/tps6598x.h b/drivers/usb/typec/tipd/tps6598x.h
+index 003a577be2164..1f59b9fa3fad2 100644
+--- a/drivers/usb/typec/tipd/tps6598x.h
++++ b/drivers/usb/typec/tipd/tps6598x.h
+@@ -17,6 +17,7 @@
+ /* TPS_REG_STATUS bits */
+ #define TPS_STATUS_PLUG_PRESENT BIT(0)
+ #define TPS_STATUS_PLUG_UPSIDE_DOWN BIT(4)
++#define TPS_STATUS_TO_UPSIDE_DOWN(s) (!!((s) & TPS_STATUS_PLUG_UPSIDE_DOWN))
+ #define TPS_STATUS_PORTROLE BIT(5)
+ #define TPS_STATUS_TO_TYPEC_PORTROLE(s) (!!((s) & TPS_STATUS_PORTROLE))
+ #define TPS_STATUS_DATAROLE BIT(6)
+diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
+index 5ef5bd0e87cf2..dca6803a75bdd 100644
+--- a/drivers/usb/typec/ucsi/ucsi.c
++++ b/drivers/usb/typec/ucsi/ucsi.c
+@@ -76,6 +76,10 @@ static int ucsi_read_error(struct ucsi *ucsi)
+ if (ret)
+ return ret;
+
++ ret = ucsi_acknowledge_command(ucsi);
++ if (ret)
++ return ret;
++
+ switch (error) {
+ case UCSI_ERROR_INCOMPATIBLE_PARTNER:
+ return -EOPNOTSUPP;
+@@ -511,8 +515,6 @@ static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner,
+ num_pdos * sizeof(u32));
+ if (ret < 0)
+ dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret);
+- if (ret == 0 && offset == 0)
+- dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n");
+
+ return ret;
+ }
+@@ -955,6 +957,8 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role)
+ role == TYPEC_HOST))
+ goto out_unlock;
+
++ reinit_completion(&con->complete);
++
+ command = UCSI_SET_UOR | UCSI_CONNECTOR_NUMBER(con->num);
+ command |= UCSI_SET_UOR_ROLE(role);
+ command |= UCSI_SET_UOR_ACCEPT_ROLE_SWAPS;
+@@ -962,14 +966,18 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role)
+ if (ret < 0)
+ goto out_unlock;
+
++ mutex_unlock(&con->lock);
++
+ if (!wait_for_completion_timeout(&con->complete,
+- msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
+- ret = -ETIMEDOUT;
++ msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
++ return -ETIMEDOUT;
++
++ return 0;
+
+ out_unlock:
+ mutex_unlock(&con->lock);
+
+- return ret < 0 ? ret : 0;
++ return ret;
+ }
+
+ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
+@@ -991,6 +999,8 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
+ if (cur_role == role)
+ goto out_unlock;
+
++ reinit_completion(&con->complete);
++
+ command = UCSI_SET_PDR | UCSI_CONNECTOR_NUMBER(con->num);
+ command |= UCSI_SET_PDR_ROLE(role);
+ command |= UCSI_SET_PDR_ACCEPT_ROLE_SWAPS;
+@@ -998,11 +1008,13 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
+ if (ret < 0)
+ goto out_unlock;
+
++ mutex_unlock(&con->lock);
++
+ if (!wait_for_completion_timeout(&con->complete,
+- msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) {
+- ret = -ETIMEDOUT;
+- goto out_unlock;
+- }
++ msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
++ return -ETIMEDOUT;
++
++ mutex_lock(&con->lock);
+
+ /* Something has gone wrong while swapping the role */
+ if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) !=
+@@ -1190,7 +1202,7 @@ out_unlock:
+ static int ucsi_init(struct ucsi *ucsi)
+ {
+ struct ucsi_connector *con;
+- u64 command;
++ u64 command, ntfy;
+ int ret;
+ int i;
+
+@@ -1202,8 +1214,8 @@ static int ucsi_init(struct ucsi *ucsi)
+ }
+
+ /* Enable basic notifications */
+- ucsi->ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
+- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
++ ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
++ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
+ ret = ucsi_send_command(ucsi, command, NULL, 0);
+ if (ret < 0)
+ goto err_reset;
+@@ -1235,12 +1247,13 @@ static int ucsi_init(struct ucsi *ucsi)
+ }
+
+ /* Enable all notifications */
+- ucsi->ntfy = UCSI_ENABLE_NTFY_ALL;
+- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
++ ntfy = UCSI_ENABLE_NTFY_ALL;
++ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
+ ret = ucsi_send_command(ucsi, command, NULL, 0);
+ if (ret < 0)
+ goto err_unregister;
+
++ ucsi->ntfy = ntfy;
+ return 0;
+
+ err_unregister:
+diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
+index bff96d64dddff..6db7c8ddd51cd 100644
+--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
++++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
+@@ -325,7 +325,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc)
+ if (status < 0)
+ return status;
+
+- if (!data)
++ if (!(data & DEV_INT))
+ return 0;
+
+ status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
+index d8d3892e5a69a..3c6d452e3bf40 100644
+--- a/drivers/usb/usbip/stub_dev.c
++++ b/drivers/usb/usbip/stub_dev.c
+@@ -393,7 +393,6 @@ static int stub_probe(struct usb_device *udev)
+
+ err_port:
+ dev_set_drvdata(&udev->dev, NULL);
+- usb_put_dev(udev);
+
+ /* we already have busid_priv, just lock busid_lock */
+ spin_lock(&busid_priv->busid_lock);
+@@ -408,6 +407,7 @@ call_put_busid_priv:
+ put_busid_priv(busid_priv);
+
+ sdev_free:
++ usb_put_dev(udev);
+ stub_device_free(sdev);
+
+ return rc;
+diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
+index 325c22008e536..5dd41e8215e0f 100644
+--- a/drivers/usb/usbip/stub_rx.c
++++ b/drivers/usb/usbip/stub_rx.c
+@@ -138,7 +138,9 @@ static int tweak_set_configuration_cmd(struct urb *urb)
+ req = (struct usb_ctrlrequest *) urb->setup_packet;
+ config = le16_to_cpu(req->wValue);
+
++ usb_lock_device(sdev->udev);
+ err = usb_set_configuration(sdev->udev, config);
++ usb_unlock_device(sdev->udev);
+ if (err && err != -ENODEV)
+ dev_err(&sdev->udev->dev, "can't set config #%d, error %d\n",
+ config, err);
+diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
+index 2808f1ba9f7b8..5091ff9d6c93f 100644
+--- a/drivers/vdpa/ifcvf/ifcvf_base.c
++++ b/drivers/vdpa/ifcvf/ifcvf_base.c
+@@ -143,8 +143,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
+ IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr);
+ break;
+ case VIRTIO_PCI_CAP_DEVICE_CFG:
+- hw->net_cfg = get_cap_addr(hw, &cap);
+- IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg);
++ hw->dev_cfg = get_cap_addr(hw, &cap);
++ IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg);
+ break;
+ }
+
+@@ -153,7 +153,7 @@ next:
+ }
+
+ if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+- hw->isr == NULL || hw->net_cfg == NULL) {
++ hw->isr == NULL || hw->dev_cfg == NULL) {
+ IFCVF_ERR(pdev, "Incomplete PCI capabilities\n");
+ return -EIO;
+ }
+@@ -174,7 +174,7 @@ next:
+ IFCVF_DBG(pdev,
+ "PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n",
+ hw->common_cfg, hw->notify_base, hw->isr,
+- hw->net_cfg, hw->notify_off_multiplier);
++ hw->dev_cfg, hw->notify_off_multiplier);
+
+ return 0;
+ }
+@@ -242,33 +242,54 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
+ return 0;
+ }
+
+-void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
++u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
++{
++ struct ifcvf_adapter *adapter;
++ u32 config_size;
++
++ adapter = vf_to_adapter(hw);
++ switch (hw->dev_type) {
++ case VIRTIO_ID_NET:
++ config_size = sizeof(struct virtio_net_config);
++ break;
++ case VIRTIO_ID_BLOCK:
++ config_size = sizeof(struct virtio_blk_config);
++ break;
++ default:
++ config_size = 0;
++ IFCVF_ERR(adapter->pdev, "VIRTIO ID %u not supported\n", hw->dev_type);
++ }
++
++ return config_size;
++}
++
++void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset,
+ void *dst, int length)
+ {
+ u8 old_gen, new_gen, *p;
+ int i;
+
+- WARN_ON(offset + length > sizeof(struct virtio_net_config));
++ WARN_ON(offset + length > hw->config_size);
+ do {
+ old_gen = ifc_ioread8(&hw->common_cfg->config_generation);
+ p = dst;
+ for (i = 0; i < length; i++)
+- *p++ = ifc_ioread8(hw->net_cfg + offset + i);
++ *p++ = ifc_ioread8(hw->dev_cfg + offset + i);
+
+ new_gen = ifc_ioread8(&hw->common_cfg->config_generation);
+ } while (old_gen != new_gen);
+ }
+
+-void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
++void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset,
+ const void *src, int length)
+ {
+ const u8 *p;
+ int i;
+
+ p = src;
+- WARN_ON(offset + length > sizeof(struct virtio_net_config));
++ WARN_ON(offset + length > hw->config_size);
+ for (i = 0; i < length; i++)
+- ifc_iowrite8(*p++, hw->net_cfg + offset + i);
++ ifc_iowrite8(*p++, hw->dev_cfg + offset + i);
+ }
+
+ static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
+@@ -306,7 +327,7 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid)
+ u32 q_pair_id;
+
+ ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
+- q_pair_id = qid / hw->nr_vring;
++ q_pair_id = qid / 2;
+ avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
+ last_avail_idx = ifc_ioread16(avail_idx_addr);
+
+@@ -320,7 +341,7 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num)
+ u32 q_pair_id;
+
+ ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
+- q_pair_id = qid / hw->nr_vring;
++ q_pair_id = qid / 2;
+ avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
+ hw->vring[qid].last_avail_idx = num;
+ ifc_iowrite16(num, avail_idx_addr);
+diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
+index 09918af3ecf82..c486873f370a8 100644
+--- a/drivers/vdpa/ifcvf/ifcvf_base.h
++++ b/drivers/vdpa/ifcvf/ifcvf_base.h
+@@ -71,12 +71,14 @@ struct ifcvf_hw {
+ u64 hw_features;
+ u32 dev_type;
+ struct virtio_pci_common_cfg __iomem *common_cfg;
+- void __iomem *net_cfg;
++ void __iomem *dev_cfg;
+ struct vring_info vring[IFCVF_MAX_QUEUES];
+ void __iomem * const *base;
+ char config_msix_name[256];
+ struct vdpa_callback config_cb;
+ unsigned int config_irq;
++ /* virtio-net or virtio-blk device config size */
++ u32 config_size;
+ };
+
+ struct ifcvf_adapter {
+@@ -105,9 +107,9 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
+ int ifcvf_start_hw(struct ifcvf_hw *hw);
+ void ifcvf_stop_hw(struct ifcvf_hw *hw);
+ void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
+-void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
++void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset,
+ void *dst, int length);
+-void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset,
++void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset,
+ const void *src, int length);
+ u8 ifcvf_get_status(struct ifcvf_hw *hw);
+ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
+@@ -120,4 +122,5 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
+ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
+ struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
+ int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
++u32 ifcvf_get_config_size(struct ifcvf_hw *hw);
+ #endif /* _IFCVF_H_ */
+diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
+index dcd648e1f7e7e..4fe8aa13ac687 100644
+--- a/drivers/vdpa/ifcvf/ifcvf_main.c
++++ b/drivers/vdpa/ifcvf/ifcvf_main.c
+@@ -366,24 +366,9 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
+
+ static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev)
+ {
+- struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+- struct pci_dev *pdev = adapter->pdev;
+- size_t size;
+
+- switch (vf->dev_type) {
+- case VIRTIO_ID_NET:
+- size = sizeof(struct virtio_net_config);
+- break;
+- case VIRTIO_ID_BLOCK:
+- size = sizeof(struct virtio_blk_config);
+- break;
+- default:
+- size = 0;
+- IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
+- }
+-
+- return size;
++ return vf->config_size;
+ }
+
+ static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
+@@ -392,8 +377,7 @@ static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
+ {
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+- WARN_ON(offset + len > sizeof(struct virtio_net_config));
+- ifcvf_read_net_config(vf, offset, buf, len);
++ ifcvf_read_dev_config(vf, offset, buf, len);
+ }
+
+ static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
+@@ -402,8 +386,7 @@ static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev,
+ {
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+- WARN_ON(offset + len > sizeof(struct virtio_net_config));
+- ifcvf_write_net_config(vf, offset, buf, len);
++ ifcvf_write_dev_config(vf, offset, buf, len);
+ }
+
+ static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
+@@ -522,7 +505,6 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+ }
+
+ ifcvf_mgmt_dev->adapter = adapter;
+- pci_set_drvdata(pdev, ifcvf_mgmt_dev);
+
+ vf = &adapter->vf;
+ vf->dev_type = get_dev_type(pdev);
+@@ -541,6 +523,7 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+ vf->vring[i].irq = -EINVAL;
+
+ vf->hw_features = ifcvf_get_hw_features(vf);
++ vf->config_size = ifcvf_get_config_size(vf);
+
+ adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
+ ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
+@@ -636,6 +619,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ goto err;
+ }
+
++ pci_set_drvdata(pdev, ifcvf_mgmt_dev);
++
+ return 0;
+
+ err:
+diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+index 01a848adf5903..81dc3d88d3ddd 100644
+--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
++++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+@@ -63,7 +63,7 @@ struct mlx5_control_vq {
+ unsigned short head;
+ };
+
+-struct mlx5_ctrl_wq_ent {
++struct mlx5_vdpa_wq_ent {
+ struct work_struct work;
+ struct mlx5_vdpa_dev *mvdev;
+ };
+diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+index bd56de7484dcb..e748c00789f04 100644
+--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
+@@ -161,6 +161,9 @@ struct mlx5_vdpa_net {
+ bool setup;
+ u16 mtu;
+ u32 cur_num_vqs;
++ struct notifier_block nb;
++ struct vdpa_callback config_cb;
++ struct mlx5_vdpa_wq_ent cvq_ent;
+ };
+
+ static void free_resources(struct mlx5_vdpa_net *ndev);
+@@ -873,8 +876,6 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
+ MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
+ MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
+ MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
+- if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
+- MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
+
+ err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
+ if (err)
+@@ -1512,9 +1513,11 @@ static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
+ return 0;
+
+ clean_added:
+- for (--i; i >= cur_qps; --i)
++ for (--i; i >= 2 * cur_qps; --i)
+ teardown_vq(ndev, &ndev->vqs[i]);
+
++ ndev->cur_num_vqs = 2 * cur_qps;
++
+ return err;
+ }
+
+@@ -1529,11 +1532,27 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
+
+ switch (cmd) {
+ case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
++ /* This mq feature check aligns with pre-existing userspace
++ * implementation.
++ *
++ * Without it, an untrusted driver could fake a multiqueue config
++ * request down to a non-mq device that may cause kernel to
++ * panic due to uninitialized resources for extra vqs. Even with
++ * a well behaving guest driver, it is not expected to allow
++ * changing the number of vqs on a non-mq device.
++ */
++ if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
++ break;
++
+ read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
+ if (read != sizeof(mq))
+ break;
+
+ newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
++ if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
++ newqps > mlx5_vdpa_max_qps(mvdev->max_vqs))
++ break;
++
+ if (ndev->cur_num_vqs == 2 * newqps) {
+ status = VIRTIO_NET_OK;
+ break;
+@@ -1557,22 +1576,22 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
+ {
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ struct virtio_net_ctrl_hdr ctrl;
+- struct mlx5_ctrl_wq_ent *wqent;
++ struct mlx5_vdpa_wq_ent *wqent;
+ struct mlx5_vdpa_dev *mvdev;
+ struct mlx5_control_vq *cvq;
+ struct mlx5_vdpa_net *ndev;
+ size_t read, write;
+ int err;
+
+- wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
++ wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
+ mvdev = wqent->mvdev;
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+ cvq = &mvdev->cvq;
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+- goto out;
++ return;
+
+ if (!cvq->ready)
+- goto out;
++ return;
+
+ while (true) {
+ err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
+@@ -1606,9 +1625,10 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
+
+ if (vringh_need_notify_iotlb(&cvq->vring))
+ vringh_notify(&cvq->vring);
++
++ queue_work(mvdev->wq, &wqent->work);
++ break;
+ }
+-out:
+- kfree(wqent);
+ }
+
+ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
+@@ -1616,22 +1636,15 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ struct mlx5_vdpa_virtqueue *mvq;
+- struct mlx5_ctrl_wq_ent *wqent;
+
+ if (!is_index_valid(mvdev, idx))
+ return;
+
+ if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
+- if (!mvdev->cvq.ready)
+- return;
+-
+- wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+- if (!wqent)
++ if (!mvdev->wq || !mvdev->cvq.ready)
+ return;
+
+- wqent->mvdev = mvdev;
+- INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
+- queue_work(mvdev->wq, &wqent->work);
++ queue_work(mvdev->wq, &ndev->cvq_ent.work);
+ return;
+ }
+
+@@ -1685,6 +1698,8 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+
+ ndev->event_cbs[idx] = *cb;
++ if (is_ctrl_vq_idx(mvdev, idx))
++ mvdev->cvq.event_cb = *cb;
+ }
+
+ static void mlx5_cvq_notify(struct vringh *vring)
+@@ -1852,23 +1867,37 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
+ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
+ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
+ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
++ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
+
+ print_features(mvdev, ndev->mvdev.mlx_features, false);
+ return ndev->mvdev.mlx_features;
+ }
+
+-static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
++static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
+ {
++ /* Minimum features to expect */
+ if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
+ return -EOPNOTSUPP;
+
++ /* Double check features combination sent down by the driver.
++ * Fail invalid features due to absence of the depended feature.
++ *
++ * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
++ * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
++ * By failing the invalid features sent down by untrusted drivers,
++ * we're assured the assumption made upon is_index_valid() and
++ * is_ctrl_vq_idx() will not be compromised.
++ */
++ if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
++ BIT_ULL(VIRTIO_NET_F_MQ))
++ return -EINVAL;
++
+ return 0;
+ }
+
+ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
+ {
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+- struct mlx5_control_vq *cvq = &mvdev->cvq;
+ int err;
+ int i;
+
+@@ -1878,16 +1907,6 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
+ goto err_vq;
+ }
+
+- if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
+- err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
+- MLX5_CVQ_MAX_ENT, false,
+- (struct vring_desc *)(uintptr_t)cvq->desc_addr,
+- (struct vring_avail *)(uintptr_t)cvq->driver_addr,
+- (struct vring_used *)(uintptr_t)cvq->device_addr);
+- if (err)
+- goto err_vq;
+- }
+-
+ return 0;
+
+ err_vq:
+@@ -1937,7 +1956,7 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
+
+ print_features(mvdev, features, true);
+
+- err = verify_min_features(mvdev, features);
++ err = verify_driver_features(mvdev, features);
+ if (err)
+ return err;
+
+@@ -1950,8 +1969,10 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
+
+ static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
+ {
+- /* not implemented */
+- mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
++ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
++ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
++
++ ndev->config_cb = *cb;
+ }
+
+ #define MLX5_VDPA_MAX_VQ_ENTRIES 256
+@@ -2152,6 +2173,21 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
+ ndev->mvdev.cvq.ready = false;
+ }
+
++static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
++{
++ struct mlx5_control_vq *cvq = &mvdev->cvq;
++ int err = 0;
++
++ if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
++ err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
++ MLX5_CVQ_MAX_ENT, false,
++ (struct vring_desc *)(uintptr_t)cvq->desc_addr,
++ (struct vring_avail *)(uintptr_t)cvq->driver_addr,
++ (struct vring_used *)(uintptr_t)cvq->device_addr);
++
++ return err;
++}
++
+ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
+ {
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+@@ -2162,6 +2198,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
+
+ if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
+ if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
++ err = setup_cvq_vring(mvdev);
++ if (err) {
++ mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
++ goto err_setup;
++ }
+ err = setup_driver(mvdev);
+ if (err) {
+ mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
+@@ -2192,7 +2233,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
+ clear_vqs_ready(ndev);
+ mlx5_vdpa_destroy_mr(&ndev->mvdev);
+ ndev->mvdev.status = 0;
+- ndev->mvdev.mlx_features = 0;
+ memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
+ ndev->mvdev.actual_features = 0;
+ ++mvdev->generation;
+@@ -2404,6 +2444,82 @@ struct mlx5_vdpa_mgmtdev {
+ struct mlx5_vdpa_net *ndev;
+ };
+
++static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
++{
++ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
++ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
++ int err;
++
++ MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
++ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
++ MLX5_SET(query_vport_state_in, in, vport_number, vport);
++ if (vport)
++ MLX5_SET(query_vport_state_in, in, other_vport, 1);
++
++ err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
++ if (err)
++ return 0;
++
++ return MLX5_GET(query_vport_state_out, out, state);
++}
++
++static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
++{
++ if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
++ VPORT_STATE_UP)
++ return true;
++
++ return false;
++}
++
++static void update_carrier(struct work_struct *work)
++{
++ struct mlx5_vdpa_wq_ent *wqent;
++ struct mlx5_vdpa_dev *mvdev;
++ struct mlx5_vdpa_net *ndev;
++
++ wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
++ mvdev = wqent->mvdev;
++ ndev = to_mlx5_vdpa_ndev(mvdev);
++ if (get_link_state(mvdev))
++ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
++ else
++ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
++
++ if (ndev->config_cb.callback)
++ ndev->config_cb.callback(ndev->config_cb.private);
++
++ kfree(wqent);
++}
++
++static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
++{
++ struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
++ struct mlx5_eqe *eqe = param;
++ int ret = NOTIFY_DONE;
++ struct mlx5_vdpa_wq_ent *wqent;
++
++ if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
++ switch (eqe->sub_type) {
++ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
++ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
++ wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
++ if (!wqent)
++ return NOTIFY_DONE;
++
++ wqent->mvdev = &ndev->mvdev;
++ INIT_WORK(&wqent->work, update_carrier);
++ queue_work(ndev->mvdev.wq, &wqent->work);
++ ret = NOTIFY_OK;
++ break;
++ default:
++ return NOTIFY_DONE;
++ }
++ return ret;
++ }
++ return ret;
++}
++
+ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
+ {
+ struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
+@@ -2448,6 +2564,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
+ if (err)
+ goto err_mtu;
+
++ if (get_link_state(mvdev))
++ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
++ else
++ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
++
+ if (!is_zero_ether_addr(config->mac)) {
+ pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
+ err = mlx5_mpfs_add_mac(pfmdev, config->mac);
+@@ -2473,12 +2594,16 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
+ if (err)
+ goto err_mr;
+
+- mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
++ ndev->cvq_ent.mvdev = mvdev;
++ INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
++ mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
+ if (!mvdev->wq) {
+ err = -ENOMEM;
+ goto err_res2;
+ }
+
++ ndev->nb.notifier_call = event_handler;
++ mlx5_notifier_register(mdev, &ndev->nb);
+ ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
+ mvdev->vdev.mdev = &mgtdev->mgtdev;
+ err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
+@@ -2509,8 +2634,13 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
+ {
+ struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
++ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
++ struct workqueue_struct *wq;
+
+- destroy_workqueue(mvdev->wq);
++ mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
++ wq = mvdev->wq;
++ mvdev->wq = NULL;
++ destroy_workqueue(wq);
+ _vdpa_unregister_device(dev);
+ mgtdev->ndev = NULL;
+ }
+diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
+index 1dc121a07a934..86571498c1c23 100644
+--- a/drivers/vdpa/vdpa.c
++++ b/drivers/vdpa/vdpa.c
+@@ -353,7 +353,8 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m
+ goto msg_err;
+
+ while (mdev->id_table[i].device) {
+- supported_classes |= BIT(mdev->id_table[i].device);
++ if (mdev->id_table[i].device <= 63)
++ supported_classes |= BIT_ULL(mdev->id_table[i].device);
+ i++;
+ }
+
+@@ -557,14 +558,19 @@ static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
+ goto mdev_err;
+ }
+ err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack);
+- if (!err)
+- err = genlmsg_reply(msg, info);
++ if (err)
++ goto mdev_err;
++
++ err = genlmsg_reply(msg, info);
++ put_device(dev);
++ mutex_unlock(&vdpa_dev_mutex);
++ return err;
++
+ mdev_err:
+ put_device(dev);
+ err:
+ mutex_unlock(&vdpa_dev_mutex);
+- if (err)
+- nlmsg_free(msg);
++ nlmsg_free(msg);
+ return err;
+ }
+
+diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
+index 5f484fff8dbec..3ccefa58e405c 100644
+--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
++++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
+@@ -65,15 +65,27 @@ static void vdpasim_vq_notify(struct vringh *vring)
+ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
+ {
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
++ uint16_t last_avail_idx = vq->vring.last_avail_idx;
+
+- vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
+- VDPASIM_QUEUE_MAX, false,
++ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
+ (struct vring_desc *)(uintptr_t)vq->desc_addr,
+ (struct vring_avail *)
+ (uintptr_t)vq->driver_addr,
+ (struct vring_used *)
+ (uintptr_t)vq->device_addr);
+
++ vq->vring.last_avail_idx = last_avail_idx;
++
++ /*
++ * Since vdpa_sim does not support receive inflight descriptors as a
++ * destination of a migration, let's set both avail_idx and used_idx
++ * the same at vq start. This is how vhost-user works in a
++ * VHOST_SET_VRING_BASE call.
++ *
++ * Although the simple fix is to set last_used_idx at
++ * vdpasim_set_vq_state, it would be reset at vdpasim_queue_ready.
++ */
++ vq->vring.last_used_idx = last_avail_idx;
+ vq->vring.notify = vdpasim_vq_notify;
+ }
+
+@@ -353,11 +365,14 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready)
+ {
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
++ bool old_ready;
+
+ spin_lock(&vdpasim->lock);
++ old_ready = vq->ready;
+ vq->ready = ready;
+- if (vq->ready)
++ if (vq->ready && !old_ready) {
+ vdpasim_queue_ready(vdpasim, idx);
++ }
+ spin_unlock(&vdpasim->lock);
+ }
+
+@@ -591,8 +606,11 @@ static void vdpasim_free(struct vdpa_device *vdpa)
+ vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
+ }
+
+- put_iova_domain(&vdpasim->iova);
+- iova_cache_put();
++ if (vdpa_get_dma_dev(vdpa)) {
++ put_iova_domain(&vdpasim->iova);
++ iova_cache_put();
++ }
++
+ kvfree(vdpasim->buffer);
+ if (vdpasim->iommu)
+ vhost_iotlb_free(vdpasim->iommu);
+diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+index a790903f243e8..22b812c32bee8 100644
+--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+@@ -308,8 +308,10 @@ static int __init vdpasim_blk_init(void)
+ int ret;
+
+ ret = device_register(&vdpasim_blk_mgmtdev);
+- if (ret)
++ if (ret) {
++ put_device(&vdpasim_blk_mgmtdev);
+ return ret;
++ }
+
+ ret = vdpa_mgmtdev_register(&mgmt_dev);
+ if (ret)
+diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+index a1ab6163f7d13..f1c420c5e26eb 100644
+--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+@@ -194,8 +194,10 @@ static int __init vdpasim_net_init(void)
+ }
+
+ ret = device_register(&vdpasim_net_mgmtdev);
+- if (ret)
++ if (ret) {
++ put_device(&vdpasim_net_mgmtdev);
+ return ret;
++ }
+
+ ret = vdpa_mgmtdev_register(&mgmt_dev);
+ if (ret)
+diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
+index 1daae26088609..0678c25141973 100644
+--- a/drivers/vdpa/vdpa_user/iova_domain.c
++++ b/drivers/vdpa/vdpa_user/iova_domain.c
+@@ -302,7 +302,7 @@ vduse_domain_alloc_iova(struct iova_domain *iovad,
+ iova_len = roundup_pow_of_two(iova_len);
+ iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
+
+- return iova_pfn << shift;
++ return (dma_addr_t)iova_pfn << shift;
+ }
+
+ static void vduse_domain_free_iova(struct iova_domain *iovad,
+diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
+index 841667a896dd0..564864f039d20 100644
+--- a/drivers/vdpa/vdpa_user/vduse_dev.c
++++ b/drivers/vdpa/vdpa_user/vduse_dev.c
+@@ -655,9 +655,15 @@ static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
+ {
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+- if (len > dev->config_size - offset)
++ /* Initialize the buffer in case of partial copy. */
++ memset(buf, 0, len);
++
++ if (offset > dev->config_size)
+ return;
+
++ if (len > dev->config_size - offset)
++ len = dev->config_size - offset;
++
+ memcpy(buf, dev->config + offset, len);
+ }
+
+@@ -873,10 +879,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
+ {
+ struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
+
+- spin_lock_irq(&dev->irq_lock);
++ spin_lock_bh(&dev->irq_lock);
+ if (dev->config_cb.callback)
+ dev->config_cb.callback(dev->config_cb.private);
+- spin_unlock_irq(&dev->irq_lock);
++ spin_unlock_bh(&dev->irq_lock);
+ }
+
+ static void vduse_vq_irq_inject(struct work_struct *work)
+@@ -884,10 +890,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
+ struct vduse_virtqueue *vq = container_of(work,
+ struct vduse_virtqueue, inject);
+
+- spin_lock_irq(&vq->irq_lock);
++ spin_lock_bh(&vq->irq_lock);
+ if (vq->ready && vq->cb.callback)
+ vq->cb.callback(vq->cb.private);
+- spin_unlock_irq(&vq->irq_lock);
++ spin_unlock_bh(&vq->irq_lock);
+ }
+
+ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
+@@ -975,7 +981,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
+ break;
+
+ ret = -EINVAL;
+- if (config.length == 0 ||
++ if (config.offset > dev->config_size ||
++ config.length == 0 ||
+ config.length > dev->config_size - config.offset)
+ break;
+
+@@ -1244,6 +1251,12 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
+ if (config->config_size > PAGE_SIZE)
+ return false;
+
++ if (config->vq_num > 0xffff)
++ return false;
++
++ if (!config->name[0])
++ return false;
++
+ if (!device_is_allowed(config->device_id))
+ return false;
+
+@@ -1334,9 +1347,9 @@ static int vduse_create_dev(struct vduse_dev_config *config,
+
+ dev->minor = ret;
+ dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
+- dev->dev = device_create(vduse_class, NULL,
+- MKDEV(MAJOR(vduse_major), dev->minor),
+- dev, "%s", config->name);
++ dev->dev = device_create_with_groups(vduse_class, NULL,
++ MKDEV(MAJOR(vduse_major), dev->minor),
++ dev, vduse_dev_groups, "%s", config->name);
+ if (IS_ERR(dev->dev)) {
+ ret = PTR_ERR(dev->dev);
+ goto err_dev;
+@@ -1464,16 +1477,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode)
+ return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
+ }
+
+-static void vduse_mgmtdev_release(struct device *dev)
+-{
+-}
+-
+-static struct device vduse_mgmtdev = {
+- .init_name = "vduse",
+- .release = vduse_mgmtdev_release,
++struct vduse_mgmt_dev {
++ struct vdpa_mgmt_dev mgmt_dev;
++ struct device dev;
+ };
+
+-static struct vdpa_mgmt_dev mgmt_dev;
++static struct vduse_mgmt_dev *vduse_mgmt;
+
+ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
+ {
+@@ -1498,7 +1507,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
+ }
+ set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
+ vdev->vdpa.dma_dev = &vdev->vdpa.dev;
+- vdev->vdpa.mdev = &mgmt_dev;
++ vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
+
+ return 0;
+ }
+@@ -1543,34 +1552,52 @@ static struct virtio_device_id id_table[] = {
+ { 0 },
+ };
+
+-static struct vdpa_mgmt_dev mgmt_dev = {
+- .device = &vduse_mgmtdev,
+- .id_table = id_table,
+- .ops = &vdpa_dev_mgmtdev_ops,
+-};
++static void vduse_mgmtdev_release(struct device *dev)
++{
++ struct vduse_mgmt_dev *mgmt_dev;
++
++ mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
++ kfree(mgmt_dev);
++}
+
+ static int vduse_mgmtdev_init(void)
+ {
+ int ret;
+
+- ret = device_register(&vduse_mgmtdev);
+- if (ret)
++ vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
++ if (!vduse_mgmt)
++ return -ENOMEM;
++
++ ret = dev_set_name(&vduse_mgmt->dev, "vduse");
++ if (ret) {
++ kfree(vduse_mgmt);
+ return ret;
++ }
++
++ vduse_mgmt->dev.release = vduse_mgmtdev_release;
+
+- ret = vdpa_mgmtdev_register(&mgmt_dev);
++ ret = device_register(&vduse_mgmt->dev);
+ if (ret)
+- goto err;
++ goto dev_reg_err;
+
+- return 0;
+-err:
+- device_unregister(&vduse_mgmtdev);
++ vduse_mgmt->mgmt_dev.id_table = id_table;
++ vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
++ vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
++ ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
++ if (ret)
++ device_unregister(&vduse_mgmt->dev);
++
++ return ret;
++
++dev_reg_err:
++ put_device(&vduse_mgmt->dev);
+ return ret;
+ }
+
+ static void vduse_mgmtdev_exit(void)
+ {
+- vdpa_mgmtdev_unregister(&mgmt_dev);
+- device_unregister(&vduse_mgmtdev);
++ vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
++ device_unregister(&vduse_mgmt->dev);
+ }
+
+ static int vduse_init(void)
+@@ -1583,7 +1610,6 @@ static int vduse_init(void)
+ return PTR_ERR(vduse_class);
+
+ vduse_class->devnode = vduse_devnode;
+- vduse_class->dev_groups = vduse_dev_groups;
+
+ ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
+ if (ret)
+diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
+index 5bcd00246d2e8..dead832b4571f 100644
+--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
++++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
+@@ -513,8 +513,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
+ {
+ struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev);
+
+- vdpa_unregister_device(&vp_vdpa->vdpa);
+ vp_modern_remove(&vp_vdpa->mdev);
++ vdpa_unregister_device(&vp_vdpa->vdpa);
+ }
+
+ static struct pci_driver vp_vdpa_driver = {
+diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
+index a03b5a99c2dac..f3916e6b16b9d 100644
+--- a/drivers/vfio/pci/vfio_pci_core.c
++++ b/drivers/vfio/pci/vfio_pci_core.c
+@@ -36,6 +36,10 @@ static bool nointxmask;
+ static bool disable_vga;
+ static bool disable_idle_d3;
+
++/* List of PF's that vfio_pci_core_sriov_configure() has been called on */
++static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex);
++static LIST_HEAD(vfio_pci_sriov_pfs);
++
+ static inline bool vfio_vga_disabled(void)
+ {
+ #ifdef CONFIG_VFIO_PCI_VGA
+@@ -228,6 +232,19 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat
+ if (!ret) {
+ /* D3 might be unsupported via quirk, skip unless in D3 */
+ if (needs_save && pdev->current_state >= PCI_D3hot) {
++ /*
++ * The current PCI state will be saved locally in
++ * 'pm_save' during the D3hot transition. When the
++ * device state is changed to D0 again with the current
++ * function, then pci_store_saved_state() will restore
++ * the state and will free the memory pointed by
++ * 'pm_save'. There are few cases where the PCI power
++ * state can be changed to D0 without the involvement
++ * of the driver. For these cases, free the earlier
++ * allocated memory first before overwriting 'pm_save'
++ * to prevent the memory leak.
++ */
++ kfree(vdev->pm_save);
+ vdev->pm_save = pci_store_saved_state(pdev);
+ } else if (needs_restore) {
+ pci_load_and_free_saved_state(pdev, &vdev->pm_save);
+@@ -322,6 +339,17 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
+ /* For needs_reset */
+ lockdep_assert_held(&vdev->vdev.dev_set->lock);
+
++ /*
++ * This function can be invoked while the power state is non-D0.
++ * This function calls __pci_reset_function_locked() which internally
++ * can use pci_pm_reset() for the function reset. pci_pm_reset() will
++ * fail if the power state is non-D0. Also, for the devices which
++ * have NoSoftRst-, the reset function can cause the PCI config space
++ * reset without restoring the original state (saved locally in
++ * 'vdev->pm_save').
++ */
++ vfio_pci_set_power_state(vdev, PCI_D0);
++
+ /* Stop the device from further DMA */
+ pci_clear_master(pdev);
+
+@@ -410,47 +438,17 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(vfio_pci_core_disable);
+
+-static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev)
+-{
+- struct pci_dev *physfn = pci_physfn(vdev->pdev);
+- struct vfio_device *pf_dev;
+-
+- if (!vdev->pdev->is_virtfn)
+- return NULL;
+-
+- pf_dev = vfio_device_get_from_dev(&physfn->dev);
+- if (!pf_dev)
+- return NULL;
+-
+- if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {
+- vfio_device_put(pf_dev);
+- return NULL;
+- }
+-
+- return container_of(pf_dev, struct vfio_pci_core_device, vdev);
+-}
+-
+-static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val)
+-{
+- struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
+-
+- if (!pf_vdev)
+- return;
+-
+- mutex_lock(&pf_vdev->vf_token->lock);
+- pf_vdev->vf_token->users += val;
+- WARN_ON(pf_vdev->vf_token->users < 0);
+- mutex_unlock(&pf_vdev->vf_token->lock);
+-
+- vfio_device_put(&pf_vdev->vdev);
+-}
+-
+ void vfio_pci_core_close_device(struct vfio_device *core_vdev)
+ {
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+- vfio_pci_vf_token_user_add(vdev, -1);
++ if (vdev->sriov_pf_core_dev) {
++ mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
++ WARN_ON(!vdev->sriov_pf_core_dev->vf_token->users);
++ vdev->sriov_pf_core_dev->vf_token->users--;
++ mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
++ }
+ vfio_spapr_pci_eeh_release(vdev->pdev);
+ vfio_pci_core_disable(vdev);
+
+@@ -471,7 +469,12 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
+ {
+ vfio_pci_probe_mmaps(vdev);
+ vfio_spapr_pci_eeh_open(vdev->pdev);
+- vfio_pci_vf_token_user_add(vdev, 1);
++
++ if (vdev->sriov_pf_core_dev) {
++ mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
++ vdev->sriov_pf_core_dev->vf_token->users++;
++ mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
++ }
+ }
+ EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
+
+@@ -921,6 +924,19 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+ return -EINVAL;
+
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
++
++ /*
++ * This function can be invoked while the power state is non-D0.
++ * If pci_try_reset_function() has been called while the power
++ * state is non-D0, then pci_try_reset_function() will
++ * internally set the power state to D0 without vfio driver
++ * involvement. For the devices which have NoSoftRst-, the
++ * reset function can cause the PCI config space reset without
++ * restoring the original state (saved locally in
++ * 'vdev->pm_save').
++ */
++ vfio_pci_set_power_state(vdev, PCI_D0);
++
+ ret = pci_try_reset_function(vdev->pdev);
+ up_write(&vdev->memory_lock);
+
+@@ -1566,11 +1582,8 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
+ *
+ * If the VF token is provided but unused, an error is generated.
+ */
+- if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
+- return 0; /* No VF token provided or required */
+-
+ if (vdev->pdev->is_virtfn) {
+- struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
++ struct vfio_pci_core_device *pf_vdev = vdev->sriov_pf_core_dev;
+ bool match;
+
+ if (!pf_vdev) {
+@@ -1583,7 +1596,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
+ }
+
+ if (!vf_token) {
+- vfio_device_put(&pf_vdev->vdev);
+ pci_info_ratelimited(vdev->pdev,
+ "VF token required to access device\n");
+ return -EACCES;
+@@ -1593,8 +1605,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
+ match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
+ mutex_unlock(&pf_vdev->vf_token->lock);
+
+- vfio_device_put(&pf_vdev->vdev);
+-
+ if (!match) {
+ pci_info_ratelimited(vdev->pdev,
+ "Incorrect VF token provided for device\n");
+@@ -1715,8 +1725,30 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb,
+ static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev)
+ {
+ struct pci_dev *pdev = vdev->pdev;
++ struct vfio_pci_core_device *cur;
++ struct pci_dev *physfn;
+ int ret;
+
++ if (pdev->is_virtfn) {
++ /*
++ * If this VF was created by our vfio_pci_core_sriov_configure()
++ * then we can find the PF vfio_pci_core_device now, and due to
++ * the locking in pci_disable_sriov() it cannot change until
++ * this VF device driver is removed.
++ */
++ physfn = pci_physfn(vdev->pdev);
++ mutex_lock(&vfio_pci_sriov_pfs_mutex);
++ list_for_each_entry(cur, &vfio_pci_sriov_pfs, sriov_pfs_item) {
++ if (cur->pdev == physfn) {
++ vdev->sriov_pf_core_dev = cur;
++ break;
++ }
++ }
++ mutex_unlock(&vfio_pci_sriov_pfs_mutex);
++ return 0;
++ }
++
++ /* Not a SRIOV PF */
+ if (!pdev->is_physfn)
+ return 0;
+
+@@ -1788,6 +1820,7 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
+ INIT_LIST_HEAD(&vdev->ioeventfds_list);
+ mutex_init(&vdev->vma_lock);
+ INIT_LIST_HEAD(&vdev->vma_list);
++ INIT_LIST_HEAD(&vdev->sriov_pfs_item);
+ init_rwsem(&vdev->memory_lock);
+ }
+ EXPORT_SYMBOL_GPL(vfio_pci_core_init_device);
+@@ -1886,7 +1919,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
+ {
+ struct pci_dev *pdev = vdev->pdev;
+
+- pci_disable_sriov(pdev);
++ vfio_pci_core_sriov_configure(pdev, 0);
+
+ vfio_unregister_group_dev(&vdev->vdev);
+
+@@ -1926,21 +1959,49 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
+
+ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
+ {
++ struct vfio_pci_core_device *vdev;
+ struct vfio_device *device;
+ int ret = 0;
+
++ device_lock_assert(&pdev->dev);
++
+ device = vfio_device_get_from_dev(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
+- if (nr_virtfn == 0)
+- pci_disable_sriov(pdev);
+- else
++ vdev = container_of(device, struct vfio_pci_core_device, vdev);
++
++ if (nr_virtfn) {
++ mutex_lock(&vfio_pci_sriov_pfs_mutex);
++ /*
++ * The thread that adds the vdev to the list is the only thread
++ * that gets to call pci_enable_sriov() and we will only allow
++ * it to be called once without going through
++ * pci_disable_sriov()
++ */
++ if (!list_empty(&vdev->sriov_pfs_item)) {
++ ret = -EINVAL;
++ goto out_unlock;
++ }
++ list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
++ mutex_unlock(&vfio_pci_sriov_pfs_mutex);
+ ret = pci_enable_sriov(pdev, nr_virtfn);
++ if (ret)
++ goto out_del;
++ ret = nr_virtfn;
++ goto out_put;
++ }
+
+- vfio_device_put(device);
++ pci_disable_sriov(pdev);
+
+- return ret < 0 ? ret : nr_virtfn;
++out_del:
++ mutex_lock(&vfio_pci_sriov_pfs_mutex);
++ list_del_init(&vdev->sriov_pfs_item);
++out_unlock:
++ mutex_unlock(&vfio_pci_sriov_pfs_mutex);
++out_put:
++ vfio_device_put(device);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
+
+@@ -2064,6 +2125,18 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
+ }
+ cur_mem = NULL;
+
++ /*
++ * The pci_reset_bus() will reset all the devices in the bus.
++ * The power state can be non-D0 for some of the devices in the bus.
++ * For these devices, the pci_reset_bus() will internally set
++ * the power state to D0 without vfio driver involvement.
++ * For the devices which have NoSoftRst-, the reset function can
++ * cause the PCI config space reset without restoring the original
++ * state (saved locally in 'vdev->pm_save').
++ */
++ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
++ vfio_pci_set_power_state(cur, PCI_D0);
++
+ ret = pci_reset_bus(pdev);
+
+ err_undo:
+@@ -2117,6 +2190,18 @@ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
+ if (!pdev)
+ return false;
+
++ /*
++ * The pci_reset_bus() will reset all the devices in the bus.
++ * The power state can be non-D0 for some of the devices in the bus.
++ * For these devices, the pci_reset_bus() will internally set
++ * the power state to D0 without vfio driver involvement.
++ * For the devices which have NoSoftRst-, the reset function can
++ * cause the PCI config space reset without restoring the original
++ * state (saved locally in 'vdev->pm_save').
++ */
++ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
++ vfio_pci_set_power_state(cur, PCI_D0);
++
+ ret = pci_reset_bus(pdev);
+ if (ret)
+ return false;
+diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
+index 57d3b2cbbd8e5..82ac1569deb05 100644
+--- a/drivers/vfio/pci/vfio_pci_rdwr.c
++++ b/drivers/vfio/pci/vfio_pci_rdwr.c
+@@ -288,6 +288,7 @@ out:
+ return done;
+ }
+
++#ifdef CONFIG_VFIO_PCI_VGA
+ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite)
+ {
+@@ -355,6 +356,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+
+ return done;
+ }
++#endif
+
+ static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
+ bool test_mem)
+diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
+index 6af7ce7d619c2..701bd99a87198 100644
+--- a/drivers/vfio/platform/vfio_platform_common.c
++++ b/drivers/vfio/platform/vfio_platform_common.c
+@@ -72,12 +72,11 @@ static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev,
+ const char **extra_dbg)
+ {
+ #ifdef CONFIG_ACPI
+- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct device *dev = vdev->device;
+ acpi_handle handle = ACPI_HANDLE(dev);
+ acpi_status acpi_ret;
+
+- acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, &buffer);
++ acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, NULL);
+ if (ACPI_FAILURE(acpi_ret)) {
+ if (extra_dbg)
+ *extra_dbg = acpi_format_exception(acpi_ret);
+diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
+index 3c034fe14ccb0..818e47fc08968 100644
+--- a/drivers/vfio/vfio.c
++++ b/drivers/vfio/vfio.c
+@@ -1850,6 +1850,7 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
+ buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
+ if (!buf) {
+ kfree(caps->buf);
++ caps->buf = NULL;
+ caps->size = 0;
+ return ERR_PTR(-ENOMEM);
+ }
+diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
+index 0e9217687f5c3..66bbb125d7615 100644
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -100,6 +100,8 @@ struct vfio_dma {
+ struct task_struct *task;
+ struct rb_root pfn_list; /* Ex-user pinned pfn list */
+ unsigned long *bitmap;
++ struct mm_struct *mm;
++ size_t locked_vm;
+ };
+
+ struct vfio_batch {
+@@ -416,6 +418,19 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
+ return ret;
+ }
+
++static int mm_lock_acct(struct task_struct *task, struct mm_struct *mm,
++ bool lock_cap, long npage)
++{
++ int ret = mmap_write_lock_killable(mm);
++
++ if (ret)
++ return ret;
++
++ ret = __account_locked_vm(mm, abs(npage), npage > 0, task, lock_cap);
++ mmap_write_unlock(mm);
++ return ret;
++}
++
+ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
+ {
+ struct mm_struct *mm;
+@@ -424,16 +439,13 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
+ if (!npage)
+ return 0;
+
+- mm = async ? get_task_mm(dma->task) : dma->task->mm;
+- if (!mm)
++ mm = dma->mm;
++ if (async && !mmget_not_zero(mm))
+ return -ESRCH; /* process exited */
+
+- ret = mmap_write_lock_killable(mm);
+- if (!ret) {
+- ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
+- dma->lock_cap);
+- mmap_write_unlock(mm);
+- }
++ ret = mm_lock_acct(dma->task, mm, dma->lock_cap, npage);
++ if (!ret)
++ dma->locked_vm += npage;
+
+ if (async)
+ mmput(mm);
+@@ -561,6 +573,18 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
+ ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
+ pages, NULL, NULL);
+ if (ret > 0) {
++ int i;
++
++ /*
++ * The zero page is always resident, we don't need to pin it
++ * and it falls into our invalid/reserved test so we don't
++ * unpin in put_pfn(). Unpin all zero pages in the batch here.
++ */
++ for (i = 0 ; i < ret; i++) {
++ if (unlikely(is_zero_pfn(page_to_pfn(pages[i]))))
++ unpin_user_page(pages[i]);
++ }
++
+ *pfn = page_to_pfn(pages[0]);
+ goto done;
+ }
+@@ -786,8 +810,8 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
+ struct mm_struct *mm;
+ int ret;
+
+- mm = get_task_mm(dma->task);
+- if (!mm)
++ mm = dma->mm;
++ if (!mmget_not_zero(mm))
+ return -ENODEV;
+
+ ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
+@@ -797,7 +821,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
+ ret = 0;
+
+ if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
+- ret = vfio_lock_acct(dma, 1, true);
++ ret = vfio_lock_acct(dma, 1, false);
+ if (ret) {
+ put_pfn(*pfn_base, dma->prot);
+ if (ret == -ENOMEM)
+@@ -1167,6 +1191,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
+ vfio_unmap_unpin(iommu, dma, true);
+ vfio_unlink_dma(iommu, dma);
+ put_task_struct(dma->task);
++ mmdrop(dma->mm);
+ vfio_dma_bitmap_free(dma);
+ if (dma->vaddr_invalid) {
+ iommu->vaddr_invalid_count--;
+@@ -1551,6 +1576,38 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
+ return list_empty(iova);
+ }
+
++static int vfio_change_dma_owner(struct vfio_dma *dma)
++{
++ struct task_struct *task = current->group_leader;
++ struct mm_struct *mm = current->mm;
++ long npage = dma->locked_vm;
++ bool lock_cap;
++ int ret;
++
++ if (mm == dma->mm)
++ return 0;
++
++ lock_cap = capable(CAP_IPC_LOCK);
++ ret = mm_lock_acct(task, mm, lock_cap, npage);
++ if (ret)
++ return ret;
++
++ if (mmget_not_zero(dma->mm)) {
++ mm_lock_acct(dma->task, dma->mm, dma->lock_cap, -npage);
++ mmput(dma->mm);
++ }
++
++ if (dma->task != task) {
++ put_task_struct(dma->task);
++ dma->task = get_task_struct(task);
++ }
++ mmdrop(dma->mm);
++ dma->mm = mm;
++ mmgrab(dma->mm);
++ dma->lock_cap = lock_cap;
++ return 0;
++}
++
+ static int vfio_dma_do_map(struct vfio_iommu *iommu,
+ struct vfio_iommu_type1_dma_map *map)
+ {
+@@ -1600,6 +1657,9 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
+ dma->size != size) {
+ ret = -EINVAL;
+ } else {
++ ret = vfio_change_dma_owner(dma);
++ if (ret)
++ goto out_unlock;
+ dma->vaddr = vaddr;
+ dma->vaddr_invalid = false;
+ iommu->vaddr_invalid_count--;
+@@ -1637,29 +1697,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
+ * against the locked memory limit and we need to be able to do both
+ * outside of this call path as pinning can be asynchronous via the
+ * external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
+- * task_struct and VM locked pages requires an mm_struct, however
+- * holding an indefinite mm reference is not recommended, therefore we
+- * only hold a reference to a task. We could hold a reference to
+- * current, however QEMU uses this call path through vCPU threads,
+- * which can be killed resulting in a NULL mm and failure in the unmap
+- * path when called via a different thread. Avoid this problem by
+- * using the group_leader as threads within the same group require
+- * both CLONE_THREAD and CLONE_VM and will therefore use the same
+- * mm_struct.
+- *
+- * Previously we also used the task for testing CAP_IPC_LOCK at the
+- * time of pinning and accounting, however has_capability() makes use
+- * of real_cred, a copy-on-write field, so we can't guarantee that it
+- * matches group_leader, or in fact that it might not change by the
+- * time it's evaluated. If a process were to call MAP_DMA with
+- * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
+- * possibly see different results for an iommu_mapped vfio_dma vs
+- * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
+- * time of calling MAP_DMA.
++ * task_struct. Save the group_leader so that all DMA tracking uses
++ * the same task, to make debugging easier. VM locked pages requires
++ * an mm_struct, so grab the mm in case the task dies.
+ */
+ get_task_struct(current->group_leader);
+ dma->task = current->group_leader;
+ dma->lock_cap = capable(CAP_IPC_LOCK);
++ dma->mm = current->mm;
++ mmgrab(dma->mm);
+
+ dma->pfn_list = RB_ROOT;
+
+@@ -2820,7 +2866,7 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
+ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
+ struct vfio_info_cap *caps)
+ {
+- struct vfio_iommu_type1_info_cap_migration cap_mig;
++ struct vfio_iommu_type1_info_cap_migration cap_mig = {};
+
+ cap_mig.header.id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION;
+ cap_mig.header.version = 1;
+@@ -3156,9 +3202,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
+ !(dma->prot & IOMMU_READ))
+ return -EPERM;
+
+- mm = get_task_mm(dma->task);
+-
+- if (!mm)
++ mm = dma->mm;
++ if (!mmget_not_zero(mm))
+ return -EPERM;
+
+ if (kthread)
+diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
+index 670d56c879e50..5829cf2d0552d 100644
+--- a/drivers/vhost/iotlb.c
++++ b/drivers/vhost/iotlb.c
+@@ -57,6 +57,21 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
+ if (last < start)
+ return -EFAULT;
+
++ /* If the range being mapped is [0, ULONG_MAX], split it into two entries
++ * otherwise its size would overflow u64.
++ */
++ if (start == 0 && last == ULONG_MAX) {
++ u64 mid = last / 2;
++ int err = vhost_iotlb_add_range_ctx(iotlb, start, mid, addr,
++ perm, opaque);
++
++ if (err)
++ return err;
++
++ addr += mid + 1;
++ start = mid + 1;
++ }
++
+ if (iotlb->limit &&
+ iotlb->nmaps == iotlb->limit &&
+ iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) {
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index 28ef323882fb2..00f10d3402590 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -473,6 +473,7 @@ static void vhost_tx_batch(struct vhost_net *net,
+ goto signal_used;
+
+ msghdr->msg_control = &ctl;
++ msghdr->msg_controllen = sizeof(ctl);
+ err = sock->ops->sendmsg(sock, msghdr, 0);
+ if (unlikely(err < 0)) {
+ vq_err(&nvq->vq, "Fail to batch sending packets\n");
+@@ -932,13 +933,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
+
+ err = sock->ops->sendmsg(sock, &msg, len);
+ if (unlikely(err < 0)) {
++ bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS;
++
+ if (zcopy_used) {
+ if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
+ vhost_net_ubuf_put(ubufs);
+- nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
+- % UIO_MAXIOV;
++ if (retry)
++ nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
++ % UIO_MAXIOV;
++ else
++ vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
+ }
+- if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) {
++ if (retry) {
+ vhost_discard_vq_desc(vq, 1);
+ vhost_net_enable_vq(net, vq);
+ break;
+@@ -1449,13 +1455,9 @@ err:
+ return ERR_PTR(r);
+ }
+
+-static struct ptr_ring *get_tap_ptr_ring(int fd)
++static struct ptr_ring *get_tap_ptr_ring(struct file *file)
+ {
+ struct ptr_ring *ring;
+- struct file *file = fget(fd);
+-
+- if (!file)
+- return NULL;
+ ring = tun_get_tx_ring(file);
+ if (!IS_ERR(ring))
+ goto out;
+@@ -1464,7 +1466,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd)
+ goto out;
+ ring = NULL;
+ out:
+- fput(file);
+ return ring;
+ }
+
+@@ -1521,6 +1522,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
+ nvq = &n->vqs[index];
+ mutex_lock(&vq->mutex);
+
++ if (fd == -1)
++ vhost_clear_msg(&n->dev);
++
+ /* Verify that ring has been setup correctly. */
+ if (!vhost_vq_access_ok(vq)) {
+ r = -EFAULT;
+@@ -1551,8 +1555,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
+ r = vhost_net_enable_vq(n, vq);
+ if (r)
+ goto err_used;
+- if (index == VHOST_NET_VQ_RX)
+- nvq->rx_ring = get_tap_ptr_ring(fd);
++ if (index == VHOST_NET_VQ_RX) {
++ if (sock)
++ nvq->rx_ring = get_tap_ptr_ring(sock->file);
++ else
++ nvq->rx_ring = NULL;
++ }
+
+ oldubufs = nvq->ubufs;
+ nvq->ubufs = ubufs;
+diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
+index 39039e0461175..9ca8b92d92ae4 100644
+--- a/drivers/vhost/vdpa.c
++++ b/drivers/vhost/vdpa.c
+@@ -97,8 +97,11 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
+ return;
+
+ irq = ops->get_vq_irq(vdpa, qid);
++ if (irq < 0)
++ return;
++
+ irq_bypass_unregister_producer(&vq->call_ctx.producer);
+- if (!vq->call_ctx.ctx || irq < 0)
++ if (!vq->call_ctx.ctx)
+ return;
+
+ vq->call_ctx.producer.token = vq->call_ctx.ctx;
+@@ -197,7 +200,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
+ struct vdpa_device *vdpa = v->vdpa;
+ long size = vdpa->config->get_config_size(vdpa);
+
+- if (c->len == 0)
++ if (c->len == 0 || c->off > size)
+ return -EINVAL;
+
+ if (c->len > size - c->off)
+@@ -389,7 +392,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
+ if (r)
+ return r;
+
+- vq->last_avail_idx = vq_state.split.avail_index;
++ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
++ vq->last_avail_idx = vq_state.packed.last_avail_idx |
++ (vq_state.packed.last_avail_counter << 15);
++ vq->last_used_idx = vq_state.packed.last_used_idx |
++ (vq_state.packed.last_used_counter << 15);
++ } else {
++ vq->last_avail_idx = vq_state.split.avail_index;
++ }
+ break;
+ }
+
+@@ -407,9 +417,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
+ break;
+
+ case VHOST_SET_VRING_BASE:
+- vq_state.split.avail_index = vq->last_avail_idx;
+- if (ops->set_vq_state(vdpa, idx, &vq_state))
+- r = -EINVAL;
++ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
++ vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
++ vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
++ vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
++ vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
++ } else {
++ vq_state.split.avail_index = vq->last_avail_idx;
++ }
++ r = ops->set_vq_state(vdpa, idx, &vq_state);
+ break;
+
+ case VHOST_SET_VRING_CALL:
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index 59edb5a1ffe28..047fa2faef566 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -669,7 +669,7 @@ void vhost_dev_stop(struct vhost_dev *dev)
+ }
+ EXPORT_SYMBOL_GPL(vhost_dev_stop);
+
+-static void vhost_clear_msg(struct vhost_dev *dev)
++void vhost_clear_msg(struct vhost_dev *dev)
+ {
+ struct vhost_msg_node *node, *n;
+
+@@ -687,6 +687,7 @@ static void vhost_clear_msg(struct vhost_dev *dev)
+
+ spin_unlock(&dev->iotlb_lock);
+ }
++EXPORT_SYMBOL_GPL(vhost_clear_msg);
+
+ void vhost_dev_cleanup(struct vhost_dev *dev)
+ {
+@@ -1170,6 +1171,13 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
+ goto done;
+ }
+
++ if ((msg.type == VHOST_IOTLB_UPDATE ||
++ msg.type == VHOST_IOTLB_INVALIDATE) &&
++ msg.size == 0) {
++ ret = -EINVAL;
++ goto done;
++ }
++
+ if (dev->msg_handler)
+ ret = dev->msg_handler(dev, &msg);
+ else
+@@ -1620,17 +1628,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
+ r = -EFAULT;
+ break;
+ }
+- if (s.num > 0xffff) {
+- r = -EINVAL;
+- break;
++ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
++ vq->last_avail_idx = s.num & 0xffff;
++ vq->last_used_idx = (s.num >> 16) & 0xffff;
++ } else {
++ if (s.num > 0xffff) {
++ r = -EINVAL;
++ break;
++ }
++ vq->last_avail_idx = s.num;
+ }
+- vq->last_avail_idx = s.num;
+ /* Forget the cached index value. */
+ vq->avail_idx = vq->last_avail_idx;
+ break;
+ case VHOST_GET_VRING_BASE:
+ s.index = idx;
+- s.num = vq->last_avail_idx;
++ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
++ s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
++ else
++ s.num = vq->last_avail_idx;
+ if (copy_to_user(argp, &s, sizeof s))
+ r = -EFAULT;
+ break;
+@@ -2041,7 +2057,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+ struct vhost_dev *dev = vq->dev;
+ struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
+ struct iovec *_iov;
+- u64 s = 0;
++ u64 s = 0, last = addr + len - 1;
+ int ret = 0;
+
+ while ((u64)len > s) {
+@@ -2051,7 +2067,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+ break;
+ }
+
+- map = vhost_iotlb_itree_first(umem, addr, addr + len - 1);
++ map = vhost_iotlb_itree_first(umem, addr, last);
+ if (map == NULL || map->start > addr) {
+ if (umem != dev->iotlb) {
+ ret = -EFAULT;
+diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
+index 638bb640d6b4b..6378f020e6d53 100644
+--- a/drivers/vhost/vhost.h
++++ b/drivers/vhost/vhost.h
+@@ -86,13 +86,17 @@ struct vhost_virtqueue {
+ /* The routine to call when the Guest pings us, or timeout. */
+ vhost_work_fn_t handle_kick;
+
+- /* Last available index we saw. */
++ /* Last available index we saw.
++ * Values are limited to 0x7fff, and the high bit is used as
++ * a wrap counter when using VIRTIO_F_RING_PACKED. */
+ u16 last_avail_idx;
+
+ /* Caches available index value from user. */
+ u16 avail_idx;
+
+- /* Last index we used. */
++ /* Last index we used.
++ * Values are limited to 0x7fff, and the high bit is used as
++ * a wrap counter when using VIRTIO_F_RING_PACKED. */
+ u16 last_used_idx;
+
+ /* Used flags */
+@@ -182,6 +186,7 @@ long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
+ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
+ bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
+ bool vhost_log_access_ok(struct vhost_dev *);
++void vhost_clear_msg(struct vhost_dev *dev);
+
+ int vhost_get_vq_desc(struct vhost_virtqueue *,
+ struct iovec iov[], unsigned int iov_count,
+diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
+index 14e2043d76852..786876af0a73a 100644
+--- a/drivers/vhost/vringh.c
++++ b/drivers/vhost/vringh.c
+@@ -292,7 +292,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
+ int (*copy)(const struct vringh *vrh,
+ void *dst, const void *src, size_t len))
+ {
+- int err, count = 0, up_next, desc_max;
++ int err, count = 0, indirect_count = 0, up_next, desc_max;
+ struct vring_desc desc, *descs;
+ struct vringh_range range = { -1ULL, 0 }, slowrange;
+ bool slow = false;
+@@ -349,7 +349,12 @@ __vringh_iov(struct vringh *vrh, u16 i,
+ continue;
+ }
+
+- if (count++ == vrh->vring.num) {
++ if (up_next == -1)
++ count++;
++ else
++ indirect_count++;
++
++ if (count > vrh->vring.num || indirect_count > desc_max) {
+ vringh_bad("Descriptor loop in %p", descs);
+ err = -ELOOP;
+ goto fail;
+@@ -411,6 +416,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
+ i = return_from_indirect(vrh, &up_next,
+ &descs, &desc_max);
+ slow = false;
++ indirect_count = 0;
+ } else
+ break;
+ }
+@@ -1095,7 +1101,7 @@ static int iotlb_translate(const struct vringh *vrh,
+ struct vhost_iotlb_map *map;
+ struct vhost_iotlb *iotlb = vrh->iotlb;
+ int ret = 0;
+- u64 s = 0;
++ u64 s = 0, last = addr + len - 1;
+
+ spin_lock(vrh->iotlb_lock);
+
+@@ -1107,8 +1113,7 @@ static int iotlb_translate(const struct vringh *vrh,
+ break;
+ }
+
+- map = vhost_iotlb_itree_first(iotlb, addr,
+- addr + len - 1);
++ map = vhost_iotlb_itree_first(iotlb, addr, last);
+ if (!map || map->start > addr) {
+ ret = -EINVAL;
+ break;
+diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
+index 938aefbc75ecc..74ac0c28fe43a 100644
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -393,7 +393,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
+ return NULL;
+ }
+
+- pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
++ pkt->buf = kvmalloc(pkt->len, GFP_KERNEL);
+ if (!pkt->buf) {
+ kfree(pkt);
+ return NULL;
+@@ -554,7 +554,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+ virtio_transport_free_pkt(pkt);
+
+ len += sizeof(pkt->hdr);
+- vhost_add_used(vq, head, len);
++ vhost_add_used(vq, head, 0);
+ total_len += len;
+ added = true;
+ } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
+@@ -633,16 +633,18 @@ err:
+ return ret;
+ }
+
+-static int vhost_vsock_stop(struct vhost_vsock *vsock)
++static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
+ {
+ size_t i;
+- int ret;
++ int ret = 0;
+
+ mutex_lock(&vsock->dev.mutex);
+
+- ret = vhost_dev_check_owner(&vsock->dev);
+- if (ret)
+- goto err;
++ if (check_owner) {
++ ret = vhost_dev_check_owner(&vsock->dev);
++ if (ret)
++ goto err;
++ }
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+@@ -755,9 +757,15 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
+
+ /* Iterating over all connections for all CIDs to find orphans is
+ * inefficient. Room for improvement here. */
+- vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
++ vsock_for_each_connected_socket(&vhost_transport.transport,
++ vhost_vsock_reset_orphans);
+
+- vhost_vsock_stop(vsock);
++ /* Don't check the owner, because we are in the release path, so we
++ * need to stop the vsock device in any case.
++ * vhost_vsock_stop() can not fail in this case, so we don't need to
++ * check the return code.
++ */
++ vhost_vsock_stop(vsock, false);
+ vhost_vsock_flush(vsock);
+ vhost_dev_stop(&vsock->dev);
+
+@@ -872,7 +880,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
+ if (start)
+ return vhost_vsock_start(vsock);
+ else
+- return vhost_vsock_stop(vsock);
++ return vhost_vsock_stop(vsock, true);
+ case VHOST_GET_FEATURES:
+ features = VHOST_VSOCK_FEATURES;
+ if (copy_to_user(argp, &features, sizeof(features)))
+@@ -960,7 +968,14 @@ static int __init vhost_vsock_init(void)
+ VSOCK_TRANSPORT_F_H2G);
+ if (ret < 0)
+ return ret;
+- return misc_register(&vhost_vsock_misc);
++
++ ret = misc_register(&vhost_vsock_misc);
++ if (ret) {
++ vsock_core_unregister(&vhost_transport.transport);
++ return ret;
++ }
++
++ return 0;
+ };
+
+ static void __exit vhost_vsock_exit(void)
+diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
+index 537fe1b376ad7..fc990e576340b 100644
+--- a/drivers/video/backlight/backlight.c
++++ b/drivers/video/backlight/backlight.c
+@@ -688,12 +688,6 @@ static struct backlight_device *of_find_backlight(struct device *dev)
+ of_node_put(np);
+ if (!bd)
+ return ERR_PTR(-EPROBE_DEFER);
+- /*
+- * Note: gpio_backlight uses brightness as
+- * power state during probe
+- */
+- if (!bd->props.brightness)
+- bd->props.brightness = bd->props.max_brightness;
+ }
+ }
+
+diff --git a/drivers/video/backlight/bd6107.c b/drivers/video/backlight/bd6107.c
+index 515184fbe33a9..5c67ef8bd60ca 100644
+--- a/drivers/video/backlight/bd6107.c
++++ b/drivers/video/backlight/bd6107.c
+@@ -104,7 +104,7 @@ static int bd6107_backlight_check_fb(struct backlight_device *backlight,
+ {
+ struct bd6107 *bd = bl_get_data(backlight);
+
+- return bd->pdata->fbdev == NULL || bd->pdata->fbdev == info->dev;
++ return bd->pdata->fbdev == NULL || bd->pdata->fbdev == info->device;
+ }
+
+ static const struct backlight_ops bd6107_backlight_ops = {
+diff --git a/drivers/video/backlight/gpio_backlight.c b/drivers/video/backlight/gpio_backlight.c
+index 6f78d928f054a..30ec5b6845335 100644
+--- a/drivers/video/backlight/gpio_backlight.c
++++ b/drivers/video/backlight/gpio_backlight.c
+@@ -35,7 +35,7 @@ static int gpio_backlight_check_fb(struct backlight_device *bl,
+ {
+ struct gpio_backlight *gbl = bl_get_data(bl);
+
+- return gbl->fbdev == NULL || gbl->fbdev == info->dev;
++ return gbl->fbdev == NULL || gbl->fbdev == info->device;
+ }
+
+ static const struct backlight_ops gpio_backlight_ops = {
+@@ -87,8 +87,7 @@ static int gpio_backlight_probe(struct platform_device *pdev)
+ /* Not booted with device tree or no phandle link to the node */
+ bl->props.power = def_value ? FB_BLANK_UNBLANK
+ : FB_BLANK_POWERDOWN;
+- else if (gpiod_get_direction(gbl->gpiod) == 0 &&
+- gpiod_get_value_cansleep(gbl->gpiod) == 0)
++ else if (gpiod_get_value_cansleep(gbl->gpiod) == 0)
+ bl->props.power = FB_BLANK_POWERDOWN;
+ else
+ bl->props.power = FB_BLANK_UNBLANK;
+diff --git a/drivers/video/backlight/lv5207lp.c b/drivers/video/backlight/lv5207lp.c
+index 1842ae9a55f8b..720ada475ce53 100644
+--- a/drivers/video/backlight/lv5207lp.c
++++ b/drivers/video/backlight/lv5207lp.c
+@@ -67,7 +67,7 @@ static int lv5207lp_backlight_check_fb(struct backlight_device *backlight,
+ {
+ struct lv5207lp *lv = bl_get_data(backlight);
+
+- return lv->pdata->fbdev == NULL || lv->pdata->fbdev == info->dev;
++ return lv->pdata->fbdev == NULL || lv->pdata->fbdev == info->device;
+ }
+
+ static const struct backlight_ops lv5207lp_backlight_ops = {
+diff --git a/drivers/video/backlight/qcom-wled.c b/drivers/video/backlight/qcom-wled.c
+index d094299c2a485..f12c76d6e61de 100644
+--- a/drivers/video/backlight/qcom-wled.c
++++ b/drivers/video/backlight/qcom-wled.c
+@@ -231,14 +231,14 @@ struct wled {
+ static int wled3_set_brightness(struct wled *wled, u16 brightness)
+ {
+ int rc, i;
+- u8 v[2];
++ __le16 v;
+
+- v[0] = brightness & 0xff;
+- v[1] = (brightness >> 8) & 0xf;
++ v = cpu_to_le16(brightness & WLED3_SINK_REG_BRIGHT_MAX);
+
+ for (i = 0; i < wled->cfg.num_strings; ++i) {
+ rc = regmap_bulk_write(wled->regmap, wled->ctrl_addr +
+- WLED3_SINK_REG_BRIGHT(i), v, 2);
++ WLED3_SINK_REG_BRIGHT(wled->cfg.enabled_strings[i]),
++ &v, sizeof(v));
+ if (rc < 0)
+ return rc;
+ }
+@@ -250,18 +250,18 @@ static int wled4_set_brightness(struct wled *wled, u16 brightness)
+ {
+ int rc, i;
+ u16 low_limit = wled->max_brightness * 4 / 1000;
+- u8 v[2];
++ __le16 v;
+
+ /* WLED4's lower limit of operation is 0.4% */
+ if (brightness > 0 && brightness < low_limit)
+ brightness = low_limit;
+
+- v[0] = brightness & 0xff;
+- v[1] = (brightness >> 8) & 0xf;
++ v = cpu_to_le16(brightness & WLED3_SINK_REG_BRIGHT_MAX);
+
+ for (i = 0; i < wled->cfg.num_strings; ++i) {
+ rc = regmap_bulk_write(wled->regmap, wled->sink_addr +
+- WLED4_SINK_REG_BRIGHT(i), v, 2);
++ WLED4_SINK_REG_BRIGHT(wled->cfg.enabled_strings[i]),
++ &v, sizeof(v));
+ if (rc < 0)
+ return rc;
+ }
+@@ -273,21 +273,20 @@ static int wled5_set_brightness(struct wled *wled, u16 brightness)
+ {
+ int rc, offset;
+ u16 low_limit = wled->max_brightness * 1 / 1000;
+- u8 v[2];
++ __le16 v;
+
+ /* WLED5's lower limit is 0.1% */
+ if (brightness < low_limit)
+ brightness = low_limit;
+
+- v[0] = brightness & 0xff;
+- v[1] = (brightness >> 8) & 0x7f;
++ v = cpu_to_le16(brightness & WLED5_SINK_REG_BRIGHT_MAX_15B);
+
+ offset = (wled->cfg.mod_sel == MOD_A) ?
+ WLED5_SINK_REG_MOD_A_BRIGHTNESS_LSB :
+ WLED5_SINK_REG_MOD_B_BRIGHTNESS_LSB;
+
+ rc = regmap_bulk_write(wled->regmap, wled->sink_addr + offset,
+- v, 2);
++ &v, sizeof(v));
+ return rc;
+ }
+
+@@ -572,7 +571,7 @@ unlock_mutex:
+
+ static void wled_auto_string_detection(struct wled *wled)
+ {
+- int rc = 0, i, delay_time_us;
++ int rc = 0, i, j, delay_time_us;
+ u32 sink_config = 0;
+ u8 sink_test = 0, sink_valid = 0, val;
+ bool fault_set;
+@@ -619,14 +618,15 @@ static void wled_auto_string_detection(struct wled *wled)
+
+ /* Iterate through the strings one by one */
+ for (i = 0; i < wled->cfg.num_strings; i++) {
+- sink_test = BIT((WLED4_SINK_REG_CURR_SINK_SHFT + i));
++ j = wled->cfg.enabled_strings[i];
++ sink_test = BIT((WLED4_SINK_REG_CURR_SINK_SHFT + j));
+
+ /* Enable feedback control */
+ rc = regmap_write(wled->regmap, wled->ctrl_addr +
+- WLED3_CTRL_REG_FEEDBACK_CONTROL, i + 1);
++ WLED3_CTRL_REG_FEEDBACK_CONTROL, j + 1);
+ if (rc < 0) {
+ dev_err(wled->dev, "Failed to enable feedback for SINK %d rc = %d\n",
+- i + 1, rc);
++ j + 1, rc);
+ goto failed_detect;
+ }
+
+@@ -635,7 +635,7 @@ static void wled_auto_string_detection(struct wled *wled)
+ WLED4_SINK_REG_CURR_SINK, sink_test);
+ if (rc < 0) {
+ dev_err(wled->dev, "Failed to configure SINK %d rc=%d\n",
+- i + 1, rc);
++ j + 1, rc);
+ goto failed_detect;
+ }
+
+@@ -662,7 +662,7 @@ static void wled_auto_string_detection(struct wled *wled)
+
+ if (fault_set)
+ dev_dbg(wled->dev, "WLED OVP fault detected with SINK %d\n",
+- i + 1);
++ j + 1);
+ else
+ sink_valid |= sink_test;
+
+@@ -702,15 +702,16 @@ static void wled_auto_string_detection(struct wled *wled)
+ /* Enable valid sinks */
+ if (wled->version == 4) {
+ for (i = 0; i < wled->cfg.num_strings; i++) {
++ j = wled->cfg.enabled_strings[i];
+ if (sink_config &
+- BIT(WLED4_SINK_REG_CURR_SINK_SHFT + i))
++ BIT(WLED4_SINK_REG_CURR_SINK_SHFT + j))
+ val = WLED4_SINK_REG_STR_MOD_MASK;
+ else
+ /* Disable modulator_en for unused sink */
+ val = 0;
+
+ rc = regmap_write(wled->regmap, wled->sink_addr +
+- WLED4_SINK_REG_STR_MOD_EN(i), val);
++ WLED4_SINK_REG_STR_MOD_EN(j), val);
+ if (rc < 0) {
+ dev_err(wled->dev, "Failed to configure MODULATOR_EN rc=%d\n",
+ rc);
+@@ -1256,21 +1257,6 @@ static const struct wled_var_cfg wled5_ovp_cfg = {
+ .size = 16,
+ };
+
+-static u32 wled3_num_strings_values_fn(u32 idx)
+-{
+- return idx + 1;
+-}
+-
+-static const struct wled_var_cfg wled3_num_strings_cfg = {
+- .fn = wled3_num_strings_values_fn,
+- .size = 3,
+-};
+-
+-static const struct wled_var_cfg wled4_num_strings_cfg = {
+- .fn = wled3_num_strings_values_fn,
+- .size = 4,
+-};
+-
+ static u32 wled3_switch_freq_values_fn(u32 idx)
+ {
+ return 19200 / (2 * (1 + idx));
+@@ -1344,11 +1330,6 @@ static int wled_configure(struct wled *wled)
+ .val_ptr = &cfg->switch_freq,
+ .cfg = &wled3_switch_freq_cfg,
+ },
+- {
+- .name = "qcom,num-strings",
+- .val_ptr = &cfg->num_strings,
+- .cfg = &wled3_num_strings_cfg,
+- },
+ };
+
+ const struct wled_u32_opts wled4_opts[] = {
+@@ -1372,11 +1353,6 @@ static int wled_configure(struct wled *wled)
+ .val_ptr = &cfg->switch_freq,
+ .cfg = &wled3_switch_freq_cfg,
+ },
+- {
+- .name = "qcom,num-strings",
+- .val_ptr = &cfg->num_strings,
+- .cfg = &wled4_num_strings_cfg,
+- },
+ };
+
+ const struct wled_u32_opts wled5_opts[] = {
+@@ -1400,11 +1376,6 @@ static int wled_configure(struct wled *wled)
+ .val_ptr = &cfg->switch_freq,
+ .cfg = &wled3_switch_freq_cfg,
+ },
+- {
+- .name = "qcom,num-strings",
+- .val_ptr = &cfg->num_strings,
+- .cfg = &wled4_num_strings_cfg,
+- },
+ {
+ .name = "qcom,modulator-sel",
+ .val_ptr = &cfg->mod_sel,
+@@ -1523,16 +1494,57 @@ static int wled_configure(struct wled *wled)
+ *bool_opts[i].val_ptr = true;
+ }
+
+- cfg->num_strings = cfg->num_strings + 1;
+-
+ string_len = of_property_count_elems_of_size(dev->of_node,
+ "qcom,enabled-strings",
+ sizeof(u32));
+- if (string_len > 0)
+- of_property_read_u32_array(dev->of_node,
++ if (string_len > 0) {
++ if (string_len > wled->max_string_count) {
++ dev_err(dev, "Cannot have more than %d strings\n",
++ wled->max_string_count);
++ return -EINVAL;
++ }
++
++ rc = of_property_read_u32_array(dev->of_node,
+ "qcom,enabled-strings",
+ wled->cfg.enabled_strings,
+- sizeof(u32));
++ string_len);
++ if (rc) {
++ dev_err(dev, "Failed to read %d elements from qcom,enabled-strings: %d\n",
++ string_len, rc);
++ return rc;
++ }
++
++ for (i = 0; i < string_len; ++i) {
++ if (wled->cfg.enabled_strings[i] >= wled->max_string_count) {
++ dev_err(dev,
++ "qcom,enabled-strings index %d at %d is out of bounds\n",
++ wled->cfg.enabled_strings[i], i);
++ return -EINVAL;
++ }
++ }
++
++ cfg->num_strings = string_len;
++ }
++
++ rc = of_property_read_u32(dev->of_node, "qcom,num-strings", &val);
++ if (!rc) {
++ if (val < 1 || val > wled->max_string_count) {
++ dev_err(dev, "qcom,num-strings must be between 1 and %d\n",
++ wled->max_string_count);
++ return -EINVAL;
++ }
++
++ if (string_len > 0) {
++ dev_warn(dev, "Only one of qcom,num-strings or qcom,enabled-strings"
++ " should be set\n");
++ if (val > string_len) {
++ dev_err(dev, "qcom,num-strings exceeds qcom,enabled-strings\n");
++ return -EINVAL;
++ }
++ }
++
++ cfg->num_strings = val;
++ }
+
+ return 0;
+ }
+diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
+index 840d9813b0bc6..fcc46380e7c91 100644
+--- a/drivers/video/console/Kconfig
++++ b/drivers/video/console/Kconfig
+@@ -78,6 +78,26 @@ config FRAMEBUFFER_CONSOLE
+ help
+ Low-level framebuffer-based console driver.
+
++config FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
++ bool "Enable legacy fbcon hardware acceleration code"
++ depends on FRAMEBUFFER_CONSOLE
++ default y if PARISC
++ default n
++ help
++ This option enables the fbcon (framebuffer text-based) hardware
++ acceleration for graphics drivers which were written for the fbdev
++ graphics interface.
++
++ On modern machines, on mainstream machines (like x86-64) or when
++ using a modern Linux distribution those fbdev drivers usually aren't used.
++ So enabling this option wouldn't have any effect, which is why you want
++ to disable this option on such newer machines.
++
++ If you compile this kernel for older machines which still require the
++ fbdev drivers, you may want to say Y.
++
++ If unsure, select n.
++
+ config FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
+ bool "Map the console to the primary display device"
+ depends on FRAMEBUFFER_CONSOLE
+diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
+index 1b451165311c9..f304163e87e99 100644
+--- a/drivers/video/console/sticon.c
++++ b/drivers/video/console/sticon.c
+@@ -46,6 +46,7 @@
+ #include <linux/slab.h>
+ #include <linux/font.h>
+ #include <linux/crc32.h>
++#include <linux/fb.h>
+
+ #include <asm/io.h>
+
+@@ -332,13 +333,13 @@ static u8 sticon_build_attr(struct vc_data *conp, u8 color,
+ bool blink, bool underline, bool reverse,
+ bool italic)
+ {
+- u8 attr = ((color & 0x70) >> 1) | ((color & 7));
++ u8 fg = color & 7;
++ u8 bg = (color & 0x70) >> 4;
+
+- if (reverse) {
+- color = ((color >> 3) & 0x7) | ((color & 0x7) << 3);
+- }
+-
+- return attr;
++ if (reverse)
++ return (fg << 3) | bg;
++ else
++ return (bg << 3) | fg;
+ }
+
+ static void sticon_invert_region(struct vc_data *conp, u16 *p, int count)
+@@ -392,7 +393,9 @@ static int __init sticonsole_init(void)
+ for (i = 0; i < MAX_NR_CONSOLES; i++)
+ font_data[i] = STI_DEF_FONT;
+
+- pr_info("sticon: Initializing STI text console.\n");
++ pr_info("sticon: Initializing STI text console on %s at [%s]\n",
++ sticon_sti->sti_data->inq_outptr.dev_name,
++ sticon_sti->pa_path);
+ console_lock();
+ err = do_take_over_console(&sti_con, 0, MAX_NR_CONSOLES - 1,
+ PAGE0->mem_cons.cl_class != CL_DUPLEX);
+diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c
+index f869b723494f1..19fd3389946d9 100644
+--- a/drivers/video/console/sticore.c
++++ b/drivers/video/console/sticore.c
+@@ -30,10 +30,11 @@
+ #include <asm/pdc.h>
+ #include <asm/cacheflush.h>
+ #include <asm/grfioctl.h>
++#include <asm/fb.h>
+
+ #include "../fbdev/sticore.h"
+
+-#define STI_DRIVERVERSION "Version 0.9b"
++#define STI_DRIVERVERSION "Version 0.9c"
+
+ static struct sti_struct *default_sti __read_mostly;
+
+@@ -502,7 +503,7 @@ sti_select_fbfont(struct sti_cooked_rom *cooked_rom, const char *fbfont_name)
+ if (!fbfont)
+ return NULL;
+
+- pr_info("STI selected %ux%u framebuffer font %s for sticon\n",
++ pr_info(" using %ux%u framebuffer font %s\n",
+ fbfont->width, fbfont->height, fbfont->name);
+
+ bpc = ((fbfont->width+7)/8) * fbfont->height;
+@@ -946,6 +947,7 @@ out_err:
+
+ static void sticore_check_for_default_sti(struct sti_struct *sti, char *path)
+ {
++ pr_info(" located at [%s]\n", sti->pa_path);
+ if (strcmp (path, default_sti_path) == 0)
+ default_sti = sti;
+ }
+@@ -957,7 +959,6 @@ static void sticore_check_for_default_sti(struct sti_struct *sti, char *path)
+ */
+ static int __init sticore_pa_init(struct parisc_device *dev)
+ {
+- char pa_path[21];
+ struct sti_struct *sti = NULL;
+ int hpa = dev->hpa.start;
+
+@@ -970,8 +971,8 @@ static int __init sticore_pa_init(struct parisc_device *dev)
+ if (!sti)
+ return 1;
+
+- print_pa_hwpath(dev, pa_path);
+- sticore_check_for_default_sti(sti, pa_path);
++ print_pa_hwpath(dev, sti->pa_path);
++ sticore_check_for_default_sti(sti, sti->pa_path);
+ return 0;
+ }
+
+@@ -1007,9 +1008,8 @@ static int sticore_pci_init(struct pci_dev *pd, const struct pci_device_id *ent)
+
+ sti = sti_try_rom_generic(rom_base, fb_base, pd);
+ if (sti) {
+- char pa_path[30];
+- print_pci_hwpath(pd, pa_path);
+- sticore_check_for_default_sti(sti, pa_path);
++ print_pci_hwpath(pd, sti->pa_path);
++ sticore_check_for_default_sti(sti, sti->pa_path);
+ }
+
+ if (!sti) {
+@@ -1127,6 +1127,24 @@ int sti_call(const struct sti_struct *sti, unsigned long func,
+ return ret;
+ }
+
++#if defined(CONFIG_FB_STI)
++/* check if given fb_info is the primary device */
++int fb_is_primary_device(struct fb_info *info)
++{
++ struct sti_struct *sti;
++
++ sti = sti_get_rom(0);
++
++ /* if no built-in graphics card found, allow any fb driver as default */
++ if (!sti)
++ return true;
++
++ /* return true if it's the default built-in framebuffer driver */
++ return (sti->info == info);
++}
++EXPORT_SYMBOL(fb_is_primary_device);
++#endif
++
+ MODULE_AUTHOR("Philipp Rumpf, Helge Deller, Thomas Bogendoerfer");
+ MODULE_DESCRIPTION("Core STI driver for HP's NGLE series graphics cards in HP PARISC machines");
+ MODULE_LICENSE("GPL v2");
+diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
+index ef9c57ce09066..9a49ea6b5112f 100644
+--- a/drivers/video/console/vgacon.c
++++ b/drivers/video/console/vgacon.c
+@@ -366,11 +366,17 @@ static void vgacon_init(struct vc_data *c, int init)
+ struct uni_pagedir *p;
+
+ /*
+- * We cannot be loaded as a module, therefore init is always 1,
+- * but vgacon_init can be called more than once, and init will
+- * not be 1.
++ * We cannot be loaded as a module, therefore init will be 1
++ * if we are the default console, however if we are a fallback
++ * console, for example if fbcon has failed registration, then
++ * init will be 0, so we need to make sure our boot parameters
++ * have been copied to the console structure for vgacon_resize
++ * ultimately called by vc_resize. Any subsequent calls to
++ * vgacon_init init will have init set to 0 too.
+ */
+ c->vc_can_do_color = vga_can_do_color;
++ c->vc_scan_lines = vga_scan_lines;
++ c->vc_font.height = c->vc_cell_height = vga_video_font_height;
+
+ /* set dimensions manually if init != 0 since vc_resize() will fail */
+ if (init) {
+@@ -379,8 +385,6 @@ static void vgacon_init(struct vc_data *c, int init)
+ } else
+ vc_resize(c, vga_video_num_columns, vga_video_num_lines);
+
+- c->vc_scan_lines = vga_scan_lines;
+- c->vc_font.height = c->vc_cell_height = vga_video_font_height;
+ c->vc_complement_mask = 0x7700;
+ if (vga_512_chars)
+ c->vc_hi_font_mask = 0x0800;
+diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
+index 6ed5e608dd041..26531aa194282 100644
+--- a/drivers/video/fbdev/Kconfig
++++ b/drivers/video/fbdev/Kconfig
+@@ -606,6 +606,7 @@ config FB_TGA
+ config FB_UVESA
+ tristate "Userspace VESA VGA graphics support"
+ depends on FB && CONNECTOR
++ depends on !UML
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+@@ -2218,7 +2219,6 @@ config FB_SSD1307
+ select FB_SYS_COPYAREA
+ select FB_SYS_IMAGEBLIT
+ select FB_DEFERRED_IO
+- select PWM
+ select FB_BACKLIGHT
+ help
+ This driver implements support for the Solomon SSD1307
+diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
+index 9ec969e136bfd..f65c96d1394d3 100644
+--- a/drivers/video/fbdev/amba-clcd.c
++++ b/drivers/video/fbdev/amba-clcd.c
+@@ -698,16 +698,18 @@ static int clcdfb_of_init_display(struct clcd_fb *fb)
+ return -ENODEV;
+
+ panel = of_graph_get_remote_port_parent(endpoint);
+- if (!panel)
+- return -ENODEV;
++ if (!panel) {
++ err = -ENODEV;
++ goto out_endpoint_put;
++ }
+
+ err = clcdfb_of_get_backlight(&fb->dev->dev, fb->panel);
+ if (err)
+- return err;
++ goto out_panel_put;
+
+ err = clcdfb_of_get_mode(&fb->dev->dev, panel, fb->panel);
+ if (err)
+- return err;
++ goto out_panel_put;
+
+ err = of_property_read_u32(fb->dev->dev.of_node, "max-memory-bandwidth",
+ &max_bandwidth);
+@@ -736,11 +738,21 @@ static int clcdfb_of_init_display(struct clcd_fb *fb)
+
+ if (of_property_read_u32_array(endpoint,
+ "arm,pl11x,tft-r0g0b0-pads",
+- tft_r0b0g0, ARRAY_SIZE(tft_r0b0g0)) != 0)
+- return -ENOENT;
++ tft_r0b0g0, ARRAY_SIZE(tft_r0b0g0)) != 0) {
++ err = -ENOENT;
++ goto out_panel_put;
++ }
++
++ of_node_put(panel);
++ of_node_put(endpoint);
+
+ return clcdfb_of_init_tft_panel(fb, tft_r0b0g0[0],
+ tft_r0b0g0[1], tft_r0b0g0[2]);
++out_panel_put:
++ of_node_put(panel);
++out_endpoint_put:
++ of_node_put(endpoint);
++ return err;
+ }
+
+ static int clcdfb_of_vram_setup(struct clcd_fb *fb)
+@@ -758,12 +770,15 @@ static int clcdfb_of_vram_setup(struct clcd_fb *fb)
+ return -ENODEV;
+
+ fb->fb.screen_base = of_iomap(memory, 0);
+- if (!fb->fb.screen_base)
++ if (!fb->fb.screen_base) {
++ of_node_put(memory);
+ return -ENOMEM;
++ }
+
+ fb->fb.fix.smem_start = of_translate_address(memory,
+ of_get_address(memory, 0, &size, NULL));
+ fb->fb.fix.smem_len = size;
++ of_node_put(memory);
+
+ return 0;
+ }
+diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
+index 45e64016db328..024d0ee4f04f9 100644
+--- a/drivers/video/fbdev/arcfb.c
++++ b/drivers/video/fbdev/arcfb.c
+@@ -523,7 +523,7 @@ static int arcfb_probe(struct platform_device *dev)
+
+ info = framebuffer_alloc(sizeof(struct arcfb_par), &dev->dev);
+ if (!info)
+- goto err;
++ goto err_fb_alloc;
+
+ info->screen_base = (char __iomem *)videomemory;
+ info->fbops = &arcfb_ops;
+@@ -535,7 +535,7 @@ static int arcfb_probe(struct platform_device *dev)
+
+ if (!dio_addr || !cio_addr || !c2io_addr) {
+ printk(KERN_WARNING "no IO addresses supplied\n");
+- goto err1;
++ goto err_addr;
+ }
+ par->dio_addr = dio_addr;
+ par->cio_addr = cio_addr;
+@@ -551,12 +551,12 @@ static int arcfb_probe(struct platform_device *dev)
+ printk(KERN_INFO
+ "arcfb: Failed req IRQ %d\n", par->irq);
+ retval = -EBUSY;
+- goto err1;
++ goto err_addr;
+ }
+ }
+ retval = register_framebuffer(info);
+ if (retval < 0)
+- goto err1;
++ goto err_register_fb;
+ platform_set_drvdata(dev, info);
+ fb_info(info, "Arc frame buffer device, using %dK of video memory\n",
+ videomemorysize >> 10);
+@@ -580,9 +580,12 @@ static int arcfb_probe(struct platform_device *dev)
+ }
+
+ return 0;
+-err1:
++
++err_register_fb:
++ free_irq(par->irq, info);
++err_addr:
+ framebuffer_release(info);
+-err:
++err_fb_alloc:
+ vfree(videomemory);
+ return retval;
+ }
+diff --git a/drivers/video/fbdev/arkfb.c b/drivers/video/fbdev/arkfb.c
+index edf169d0816e6..8d092b1064706 100644
+--- a/drivers/video/fbdev/arkfb.c
++++ b/drivers/video/fbdev/arkfb.c
+@@ -778,7 +778,12 @@ static int arkfb_set_par(struct fb_info *info)
+ return -EINVAL;
+ }
+
+- ark_set_pixclock(info, (hdiv * info->var.pixclock) / hmul);
++ value = (hdiv * info->var.pixclock) / hmul;
++ if (!value) {
++ fb_dbg(info, "invalid pixclock\n");
++ value = 1;
++ }
++ ark_set_pixclock(info, value);
+ svga_set_timings(par->state.vgabase, &ark_timing_regs, &(info->var), hmul, hdiv,
+ (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1,
+ (info->var.vmode & FB_VMODE_INTERLACED) ? 2 : 1,
+@@ -789,6 +794,8 @@ static int arkfb_set_par(struct fb_info *info)
+ value = ((value * hmul / hdiv) / 8) - 5;
+ vga_wcrt(par->state.vgabase, 0x42, (value + 1) / 2);
+
++ if (screen_size > info->screen_size)
++ screen_size = info->screen_size;
+ memset_io(info->screen_base, 0x00, screen_size);
+ /* Device and screen back on */
+ svga_wcrt_mask(par->state.vgabase, 0x17, 0x80, 0x80);
+diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c
+index e3812a8ff55a4..29e650ecfceb1 100644
+--- a/drivers/video/fbdev/atafb.c
++++ b/drivers/video/fbdev/atafb.c
+@@ -1683,9 +1683,9 @@ static int falcon_setcolreg(unsigned int regno, unsigned int red,
+ ((blue & 0xfc00) >> 8));
+ if (regno < 16) {
+ shifter_tt.color_reg[regno] =
+- (((red & 0xe000) >> 13) | ((red & 0x1000) >> 12) << 8) |
+- (((green & 0xe000) >> 13) | ((green & 0x1000) >> 12) << 4) |
+- ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12);
++ ((((red & 0xe000) >> 13) | ((red & 0x1000) >> 12)) << 8) |
++ ((((green & 0xe000) >> 13) | ((green & 0x1000) >> 12)) << 4) |
++ ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12);
+ ((u32 *)info->pseudo_palette)[regno] = ((red & 0xf800) |
+ ((green & 0xfc00) >> 5) |
+ ((blue & 0xf800) >> 11));
+@@ -1971,9 +1971,9 @@ static int stste_setcolreg(unsigned int regno, unsigned int red,
+ green >>= 12;
+ if (ATARIHW_PRESENT(EXTD_SHIFTER))
+ shifter_tt.color_reg[regno] =
+- (((red & 0xe) >> 1) | ((red & 1) << 3) << 8) |
+- (((green & 0xe) >> 1) | ((green & 1) << 3) << 4) |
+- ((blue & 0xe) >> 1) | ((blue & 1) << 3);
++ ((((red & 0xe) >> 1) | ((red & 1) << 3)) << 8) |
++ ((((green & 0xe) >> 1) | ((green & 1) << 3)) << 4) |
++ ((blue & 0xe) >> 1) | ((blue & 1) << 3);
+ else
+ shifter_tt.color_reg[regno] =
+ ((red & 0xe) << 7) |
+diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
+index 355b6120dc4f0..1fc8de4ecbebf 100644
+--- a/drivers/video/fbdev/atmel_lcdfb.c
++++ b/drivers/video/fbdev/atmel_lcdfb.c
+@@ -1062,15 +1062,16 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev)
+
+ INIT_LIST_HEAD(&info->modelist);
+
+- if (pdev->dev.of_node) {
+- ret = atmel_lcdfb_of_init(sinfo);
+- if (ret)
+- goto free_info;
+- } else {
++ if (!pdev->dev.of_node) {
+ dev_err(dev, "cannot get default configuration\n");
+ goto free_info;
+ }
+
++ ret = atmel_lcdfb_of_init(sinfo);
++ if (ret)
++ goto free_info;
++
++ ret = -ENODEV;
+ if (!sinfo->config)
+ goto free_info;
+
+diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
+index c00e01a173685..80f54111baec1 100644
+--- a/drivers/video/fbdev/au1200fb.c
++++ b/drivers/video/fbdev/au1200fb.c
+@@ -1040,6 +1040,9 @@ static int au1200fb_fb_check_var(struct fb_var_screeninfo *var,
+ u32 pixclock;
+ int screen_size, plane;
+
++ if (!var->pixclock)
++ return -EINVAL;
++
+ plane = fbdev->plane;
+
+ /* Make sure that the mode respect all LCD controller and
+@@ -1729,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev)
+
+ /* Now hook interrupt too */
+ irq = platform_get_irq(dev, 0);
++ if (irq < 0)
++ return irq;
++
+ ret = request_irq(irq, au1200fb_handle_irq,
+ IRQF_SHARED, "lcd", (void *)dev);
+ if (ret) {
+diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c
+index 998067b701fa0..2b00a9d554fc0 100644
+--- a/drivers/video/fbdev/chipsfb.c
++++ b/drivers/video/fbdev/chipsfb.c
+@@ -331,7 +331,7 @@ static const struct fb_var_screeninfo chipsfb_var = {
+
+ static void init_chips(struct fb_info *p, unsigned long addr)
+ {
+- memset(p->screen_base, 0, 0x100000);
++ fb_memset(p->screen_base, 0, 0x100000);
+
+ p->fix = chipsfb_fix;
+ p->fix.smem_start = addr;
+@@ -430,6 +430,7 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
+ err_release_fb:
+ framebuffer_release(p);
+ err_disable:
++ pci_disable_device(dp);
+ err_out:
+ return rc;
+ }
+diff --git a/drivers/video/fbdev/cirrusfb.c b/drivers/video/fbdev/cirrusfb.c
+index 93802abbbc72a..3d47c347b8970 100644
+--- a/drivers/video/fbdev/cirrusfb.c
++++ b/drivers/video/fbdev/cirrusfb.c
+@@ -469,7 +469,7 @@ static int cirrusfb_check_mclk(struct fb_info *info, long freq)
+ return 0;
+ }
+
+-static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var,
++static int cirrusfb_check_pixclock(struct fb_var_screeninfo *var,
+ struct fb_info *info)
+ {
+ long freq;
+@@ -478,9 +478,7 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var,
+ unsigned maxclockidx = var->bits_per_pixel >> 3;
+
+ /* convert from ps to kHz */
+- freq = PICOS2KHZ(var->pixclock);
+-
+- dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq);
++ freq = PICOS2KHZ(var->pixclock ? : 1);
+
+ maxclock = cirrusfb_board_info[cinfo->btype].maxclock[maxclockidx];
+ cinfo->multiplexing = 0;
+@@ -488,11 +486,13 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var,
+ /* If the frequency is greater than we can support, we might be able
+ * to use multiplexing for the video mode */
+ if (freq > maxclock) {
+- dev_err(info->device,
+- "Frequency greater than maxclock (%ld kHz)\n",
+- maxclock);
+- return -EINVAL;
++ var->pixclock = KHZ2PICOS(maxclock);
++
++ while ((freq = PICOS2KHZ(var->pixclock)) > maxclock)
++ var->pixclock++;
+ }
++ dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq);
++
+ /*
+ * Additional constraint: 8bpp uses DAC clock doubling to allow maximum
+ * pixel clock
+diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c
+index 509311471d515..bd59e7b11ed53 100644
+--- a/drivers/video/fbdev/controlfb.c
++++ b/drivers/video/fbdev/controlfb.c
+@@ -67,7 +67,9 @@
+ #define out_8(addr, val) (void)(val)
+ #define in_le32(addr) 0
+ #define out_le32(addr, val) (void)(val)
++#ifndef pgprot_cached_wthru
+ #define pgprot_cached_wthru(prot) (prot)
++#endif
+ #else
+ static void invalid_vram_cache(void __force *addr)
+ {
+diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c
+index f98e8f298bc19..8587c9da06700 100644
+--- a/drivers/video/fbdev/core/bitblit.c
++++ b/drivers/video/fbdev/core/bitblit.c
+@@ -247,6 +247,9 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+
+ cursor.set = 0;
+
++ if (!vc->vc_font.data)
++ return;
++
+ c = scr_readw((u16 *) vc->vc_pos);
+ attribute = get_attribute(info, c);
+ src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height));
+diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
+index 22bb3892f6bd1..b6712655ec1f0 100644
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -115,8 +115,8 @@ static int logo_lines;
+ enums. */
+ static int logo_shown = FBCON_LOGO_CANSHOW;
+ /* console mappings */
+-static int first_fb_vc;
+-static int last_fb_vc = MAX_NR_CONSOLES - 1;
++static unsigned int first_fb_vc;
++static unsigned int last_fb_vc = MAX_NR_CONSOLES - 1;
+ static int fbcon_is_default = 1;
+ static int primary_device = -1;
+ static int fbcon_has_console_bind;
+@@ -464,10 +464,12 @@ static int __init fb_console_setup(char *this_opt)
+ options += 3;
+ if (*options)
+ first_fb_vc = simple_strtoul(options, &options, 10) - 1;
+- if (first_fb_vc < 0)
++ if (first_fb_vc >= MAX_NR_CONSOLES)
+ first_fb_vc = 0;
+ if (*options++ == '-')
+ last_fb_vc = simple_strtoul(options, &options, 10) - 1;
++ if (last_fb_vc < first_fb_vc || last_fb_vc >= MAX_NR_CONSOLES)
++ last_fb_vc = MAX_NR_CONSOLES - 1;
+ fbcon_is_default = 0;
+ continue;
+ }
+@@ -599,7 +601,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
+ if (scr_readw(r) != vc->vc_video_erase_char)
+ break;
+ if (r != q && new_rows >= rows + logo_lines) {
+- save = kmalloc(array3_size(logo_lines, new_cols, 2),
++ save = kzalloc(array3_size(logo_lines, new_cols, 2),
+ GFP_KERNEL);
+ if (save) {
+ int i = cols < new_cols ? cols : new_cols;
+@@ -987,7 +989,7 @@ static const char *fbcon_startup(void)
+ set_blitting_type(vc, info);
+
+ /* Setup default font */
+- if (!p->fontdata && !vc->vc_font.data) {
++ if (!p->fontdata) {
+ if (!fontname[0] || !(font = find_font(fontname)))
+ font = get_default_font(info->var.xres,
+ info->var.yres,
+@@ -997,8 +999,6 @@ static const char *fbcon_startup(void)
+ vc->vc_font.height = font->height;
+ vc->vc_font.data = (void *)(p->fontdata = font->data);
+ vc->vc_font.charcount = font->charcount;
+- } else {
+- p->fontdata = vc->vc_font.data;
+ }
+
+ cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres);
+@@ -1135,13 +1135,13 @@ static void fbcon_init(struct vc_data *vc, int init)
+
+ ops->graphics = 0;
+
+- /*
+- * No more hw acceleration for fbcon.
+- *
+- * FIXME: Garbage collect all the now dead code after sufficient time
+- * has passed.
+- */
+- p->scrollmode = SCROLL_REDRAW;
++#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
++ if ((info->flags & FBINFO_HWACCEL_COPYAREA) &&
++ !(info->flags & FBINFO_HWACCEL_DISABLED))
++ p->scrollmode = SCROLL_MOVE;
++ else /* default to something safe */
++ p->scrollmode = SCROLL_REDRAW;
++#endif
+
+ /*
+ * ++guenther: console.c:vc_allocate() relies on initializing
+@@ -1165,9 +1165,9 @@ static void fbcon_init(struct vc_data *vc, int init)
+ ops->p = &fb_display[fg_console];
+ }
+
+-static void fbcon_free_font(struct fbcon_display *p, bool freefont)
++static void fbcon_free_font(struct fbcon_display *p)
+ {
+- if (freefont && p->userfont && p->fontdata && (--REFCOUNT(p->fontdata) == 0))
++ if (p->userfont && p->fontdata && (--REFCOUNT(p->fontdata) == 0))
+ kfree(p->fontdata - FONT_EXTRA_WORDS * sizeof(int));
+ p->fontdata = NULL;
+ p->userfont = 0;
+@@ -1181,8 +1181,8 @@ static void fbcon_deinit(struct vc_data *vc)
+ struct fb_info *info;
+ struct fbcon_ops *ops;
+ int idx;
+- bool free_font = true;
+
++ fbcon_free_font(p);
+ idx = con2fb_map[vc->vc_num];
+
+ if (idx == -1)
+@@ -1193,8 +1193,6 @@ static void fbcon_deinit(struct vc_data *vc)
+ if (!info)
+ goto finished;
+
+- if (info->flags & FBINFO_MISC_FIRMWARE)
+- free_font = false;
+ ops = info->fbcon_par;
+
+ if (!ops)
+@@ -1206,9 +1204,8 @@ static void fbcon_deinit(struct vc_data *vc)
+ ops->flags &= ~FBCON_FLAGS_INIT;
+ finished:
+
+- fbcon_free_font(p, free_font);
+- if (free_font)
+- vc->vc_font.data = NULL;
++ fbcon_free_font(p);
++ vc->vc_font.data = NULL;
+
+ if (vc->vc_hi_font_mask && vc->vc_screenbuf)
+ set_vc_hi_font(vc, false);
+@@ -1704,9 +1701,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+ case SM_UP:
+ if (count > vc->vc_rows) /* Maximum realistic size */
+ count = vc->vc_rows;
+- if (logo_shown >= 0)
+- goto redraw_up;
+- switch (p->scrollmode) {
++ switch (fb_scrollmode(p)) {
+ case SCROLL_MOVE:
+ fbcon_redraw_blit(vc, info, p, t, b - t - count,
+ count);
+@@ -1794,9 +1789,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+ case SM_DOWN:
+ if (count > vc->vc_rows) /* Maximum realistic size */
+ count = vc->vc_rows;
+- if (logo_shown >= 0)
+- goto redraw_down;
+- switch (p->scrollmode) {
++ switch (fb_scrollmode(p)) {
+ case SCROLL_MOVE:
+ fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
+ -count);
+@@ -1947,6 +1940,48 @@ static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy,
+ height, width);
+ }
+
++static void updatescrollmode_accel(struct fbcon_display *p,
++ struct fb_info *info,
++ struct vc_data *vc)
++{
++#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
++ struct fbcon_ops *ops = info->fbcon_par;
++ int cap = info->flags;
++ u16 t = 0;
++ int ypan = FBCON_SWAP(ops->rotate, info->fix.ypanstep,
++ info->fix.xpanstep);
++ int ywrap = FBCON_SWAP(ops->rotate, info->fix.ywrapstep, t);
++ int yres = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
++ int vyres = FBCON_SWAP(ops->rotate, info->var.yres_virtual,
++ info->var.xres_virtual);
++ int good_pan = (cap & FBINFO_HWACCEL_YPAN) &&
++ divides(ypan, vc->vc_font.height) && vyres > yres;
++ int good_wrap = (cap & FBINFO_HWACCEL_YWRAP) &&
++ divides(ywrap, vc->vc_font.height) &&
++ divides(vc->vc_font.height, vyres) &&
++ divides(vc->vc_font.height, yres);
++ int reading_fast = cap & FBINFO_READS_FAST;
++ int fast_copyarea = (cap & FBINFO_HWACCEL_COPYAREA) &&
++ !(cap & FBINFO_HWACCEL_DISABLED);
++ int fast_imageblit = (cap & FBINFO_HWACCEL_IMAGEBLIT) &&
++ !(cap & FBINFO_HWACCEL_DISABLED);
++
++ if (good_wrap || good_pan) {
++ if (reading_fast || fast_copyarea)
++ p->scrollmode = good_wrap ?
++ SCROLL_WRAP_MOVE : SCROLL_PAN_MOVE;
++ else
++ p->scrollmode = good_wrap ? SCROLL_REDRAW :
++ SCROLL_PAN_REDRAW;
++ } else {
++ if (reading_fast || (fast_copyarea && !fast_imageblit))
++ p->scrollmode = SCROLL_MOVE;
++ else
++ p->scrollmode = SCROLL_REDRAW;
++ }
++#endif
++}
++
+ static void updatescrollmode(struct fbcon_display *p,
+ struct fb_info *info,
+ struct vc_data *vc)
+@@ -1962,6 +1997,9 @@ static void updatescrollmode(struct fbcon_display *p,
+ p->vrows -= (yres - (fh * vc->vc_rows)) / fh;
+ if ((yres % fh) && (vyres % fh < yres % fh))
+ p->vrows--;
++
++ /* update scrollmode in case hardware acceleration is used */
++ updatescrollmode_accel(p, info, vc);
+ }
+
+ #define PITCH(w) (((w) + 7) >> 3)
+@@ -2119,7 +2157,7 @@ static int fbcon_switch(struct vc_data *vc)
+
+ updatescrollmode(p, info, vc);
+
+- switch (p->scrollmode) {
++ switch (fb_scrollmode(p)) {
+ case SCROLL_WRAP_MOVE:
+ scrollback_phys_max = p->vrows - vc->vc_rows;
+ break;
+@@ -2370,15 +2408,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
+ struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *p = &fb_display[vc->vc_num];
+- int resize;
++ int resize, ret, old_userfont, old_width, old_height, old_charcount;
+ char *old_data = NULL;
+
+ resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+ if (p->userfont)
+ old_data = vc->vc_font.data;
+ vc->vc_font.data = (void *)(p->fontdata = data);
++ old_userfont = p->userfont;
+ if ((p->userfont = userfont))
+ REFCOUNT(data)++;
++
++ old_width = vc->vc_font.width;
++ old_height = vc->vc_font.height;
++ old_charcount = vc->vc_font.charcount;
++
+ vc->vc_font.width = w;
+ vc->vc_font.height = h;
+ vc->vc_font.charcount = charcount;
+@@ -2394,7 +2438,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
+ rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+ cols /= w;
+ rows /= h;
+- vc_resize(vc, cols, rows);
++ ret = vc_resize(vc, cols, rows);
++ if (ret)
++ goto err_out;
+ } else if (con_is_visible(vc)
+ && vc->vc_mode == KD_TEXT) {
+ fbcon_clear_margins(vc, 0);
+@@ -2404,6 +2450,22 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
+ if (old_data && (--REFCOUNT(old_data) == 0))
+ kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
+ return 0;
++
++err_out:
++ p->fontdata = old_data;
++ vc->vc_font.data = (void *)old_data;
++
++ if (userfont) {
++ p->userfont = old_userfont;
++ if (--REFCOUNT(data) == 0)
++ kfree(data - FONT_EXTRA_WORDS * sizeof(int));
++ }
++
++ vc->vc_font.width = old_width;
++ vc->vc_font.height = old_height;
++ vc->vc_font.charcount = old_charcount;
++
++ return ret;
+ }
+
+ /*
+@@ -2435,9 +2497,17 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font,
+ if (charcount != 256 && charcount != 512)
+ return -EINVAL;
+
++ /* font bigger than screen resolution ? */
++ if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) ||
++ h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres))
++ return -EINVAL;
++
++ if (font->width > 32 || font->height > 32)
++ return -EINVAL;
++
+ /* Make sure drawing engine can handle the font */
+- if (!(info->pixmap.blit_x & (1 << (font->width - 1))) ||
+- !(info->pixmap.blit_y & (1 << (font->height - 1))))
++ if (!(info->pixmap.blit_x & BIT(font->width - 1)) ||
++ !(info->pixmap.blit_y & BIT(font->height - 1)))
+ return -EINVAL;
+
+ /* Make sure driver can handle the font length */
+@@ -2697,6 +2767,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all)
+ }
+ EXPORT_SYMBOL(fbcon_update_vcs);
+
++/* let fbcon check if it supports a new screen resolution */
++int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var)
++{
++ struct fbcon_ops *ops = info->fbcon_par;
++ struct vc_data *vc;
++ unsigned int i;
++
++ WARN_CONSOLE_UNLOCKED();
++
++ if (!ops)
++ return 0;
++
++ /* prevent setting a screen size which is smaller than font size */
++ for (i = first_fb_vc; i <= last_fb_vc; i++) {
++ vc = vc_cons[i].d;
++ if (!vc || vc->vc_mode != KD_TEXT ||
++ registered_fb[con2fb_map[i]] != info)
++ continue;
++
++ if (vc->vc_font.width > FBCON_SWAP(var->rotate, var->xres, var->yres) ||
++ vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres))
++ return -EINVAL;
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(fbcon_modechange_possible);
++
+ int fbcon_mode_deleted(struct fb_info *info,
+ struct fb_videomode *mode)
+ {
+@@ -3220,6 +3318,9 @@ static void fbcon_register_existing_fbs(struct work_struct *work)
+
+ console_lock();
+
++ deferred_takeover = false;
++ logo_shown = FBCON_LOGO_DONTSHOW;
++
+ for_each_registered_fb(i)
+ fbcon_fb_registered(registered_fb[i]);
+
+@@ -3237,8 +3338,6 @@ static int fbcon_output_notifier(struct notifier_block *nb,
+ pr_info("fbcon: Taking over console\n");
+
+ dummycon_unregister_output_notifier(&fbcon_output_nb);
+- deferred_takeover = false;
+- logo_shown = FBCON_LOGO_DONTSHOW;
+
+ /* We may get called in atomic context */
+ schedule_work(&fbcon_deferred_takeover_work);
+diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
+index 9315b360c8981..0f16cbc99e6a4 100644
+--- a/drivers/video/fbdev/core/fbcon.h
++++ b/drivers/video/fbdev/core/fbcon.h
+@@ -29,7 +29,9 @@ struct fbcon_display {
+ /* Filled in by the low-level console driver */
+ const u_char *fontdata;
+ int userfont; /* != 0 if fontdata kmalloc()ed */
+- u_short scrollmode; /* Scroll Method */
++#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
++ u_short scrollmode; /* Scroll Method, use fb_scrollmode() */
++#endif
+ u_short inverse; /* != 0 text black on white as default */
+ short yscroll; /* Hardware scrolling */
+ int vrows; /* number of virtual rows */
+@@ -208,6 +210,17 @@ static inline int attr_col_ec(int shift, struct vc_data *vc,
+ #define SCROLL_REDRAW 0x004
+ #define SCROLL_PAN_REDRAW 0x005
+
++static inline u_short fb_scrollmode(struct fbcon_display *fb)
++{
++#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
++ return fb->scrollmode;
++#else
++ /* hardcoded to SCROLL_REDRAW if acceleration was disabled. */
++ return SCROLL_REDRAW;
++#endif
++}
++
++
+ #ifdef CONFIG_FB_TILEBLITTING
+ extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
+ #endif
+diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c
+index 9cd2c4b05c328..2789ace796342 100644
+--- a/drivers/video/fbdev/core/fbcon_ccw.c
++++ b/drivers/video/fbdev/core/fbcon_ccw.c
+@@ -65,7 +65,7 @@ static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_copyarea area;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
+
+ area.sx = sy * vc->vc_font.height;
+ area.sy = vyres - ((sx + width) * vc->vc_font.width);
+@@ -83,7 +83,7 @@ static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_fillrect region;
+ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
+
+ region.color = attr_bgcol_ec(bgshift,vc,info);
+ region.dx = sy * vc->vc_font.height;
+@@ -140,7 +140,7 @@ static void ccw_putcs(struct vc_data *vc, struct fb_info *info,
+ u32 cnt, pitch, size;
+ u32 attribute = get_attribute(info, scr_readw(s));
+ u8 *dst, *buf = NULL;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
+
+ if (!ops->fontbuffer)
+ return;
+@@ -229,7 +229,7 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ int err = 1, dx, dy;
+ char *src;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
+
+ if (!ops->fontbuffer)
+ return;
+@@ -387,7 +387,7 @@ static int ccw_update_start(struct fb_info *info)
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+ u32 yoffset;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
+ int err;
+
+ yoffset = (vyres - info->var.yres) - ops->var.xoffset;
+diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c
+index 88d89fad3f05e..86a254c1b2b7b 100644
+--- a/drivers/video/fbdev/core/fbcon_cw.c
++++ b/drivers/video/fbdev/core/fbcon_cw.c
+@@ -50,7 +50,7 @@ static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_copyarea area;
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ area.sx = vxres - ((sy + height) * vc->vc_font.height);
+ area.sy = sx * vc->vc_font.width;
+@@ -68,7 +68,7 @@ static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_fillrect region;
+ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ region.color = attr_bgcol_ec(bgshift,vc,info);
+ region.dx = vxres - ((sy + height) * vc->vc_font.height);
+@@ -125,7 +125,7 @@ static void cw_putcs(struct vc_data *vc, struct fb_info *info,
+ u32 cnt, pitch, size;
+ u32 attribute = get_attribute(info, scr_readw(s));
+ u8 *dst, *buf = NULL;
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ if (!ops->fontbuffer)
+ return;
+@@ -212,7 +212,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ int err = 1, dx, dy;
+ char *src;
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ if (!ops->fontbuffer)
+ return;
+@@ -369,7 +369,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ static int cw_update_start(struct fb_info *info)
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vxres = GETVXRES(ops->p, info);
+ u32 xoffset;
+ int err;
+
+diff --git a/drivers/video/fbdev/core/fbcon_rotate.h b/drivers/video/fbdev/core/fbcon_rotate.h
+index e233444cda664..01cbe303b8a29 100644
+--- a/drivers/video/fbdev/core/fbcon_rotate.h
++++ b/drivers/video/fbdev/core/fbcon_rotate.h
+@@ -12,11 +12,11 @@
+ #define _FBCON_ROTATE_H
+
+ #define GETVYRES(s,i) ({ \
+- (s == SCROLL_REDRAW || s == SCROLL_MOVE) ? \
++ (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE) ? \
+ (i)->var.yres : (i)->var.yres_virtual; })
+
+ #define GETVXRES(s,i) ({ \
+- (s == SCROLL_REDRAW || s == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
++ (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
+ (i)->var.xres : (i)->var.xres_virtual; })
+
+
+diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c
+index 8d5e66b1bdfbb..23bc045769d08 100644
+--- a/drivers/video/fbdev/core/fbcon_ud.c
++++ b/drivers/video/fbdev/core/fbcon_ud.c
+@@ -50,8 +50,8 @@ static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_copyarea area;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ area.sy = vyres - ((sy + height) * vc->vc_font.height);
+ area.sx = vxres - ((sx + width) * vc->vc_font.width);
+@@ -69,8 +69,8 @@ static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fb_fillrect region;
+ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ region.color = attr_bgcol_ec(bgshift,vc,info);
+ region.dy = vyres - ((sy + height) * vc->vc_font.height);
+@@ -162,8 +162,8 @@ static void ud_putcs(struct vc_data *vc, struct fb_info *info,
+ u32 mod = vc->vc_font.width % 8, cnt, pitch, size;
+ u32 attribute = get_attribute(info, scr_readw(s));
+ u8 *dst, *buf = NULL;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ if (!ops->fontbuffer)
+ return;
+@@ -259,8 +259,8 @@ static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
+ int err = 1, dx, dy;
+ char *src;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
++ u32 vxres = GETVXRES(ops->p, info);
+
+ if (!ops->fontbuffer)
+ return;
+@@ -410,8 +410,8 @@ static int ud_update_start(struct fb_info *info)
+ {
+ struct fbcon_ops *ops = info->fbcon_par;
+ int xoffset, yoffset;
+- u32 vyres = GETVYRES(ops->p->scrollmode, info);
+- u32 vxres = GETVXRES(ops->p->scrollmode, info);
++ u32 vyres = GETVYRES(ops->p, info);
++ u32 vxres = GETVXRES(ops->p, info);
+ int err;
+
+ xoffset = vxres - info->var.xres - ops->var.xoffset;
+diff --git a/drivers/video/fbdev/core/fbcvt.c b/drivers/video/fbdev/core/fbcvt.c
+index 55d2bd0ce5c02..64843464c6613 100644
+--- a/drivers/video/fbdev/core/fbcvt.c
++++ b/drivers/video/fbdev/core/fbcvt.c
+@@ -214,9 +214,11 @@ static u32 fb_cvt_aspect_ratio(struct fb_cvt_data *cvt)
+ static void fb_cvt_print_name(struct fb_cvt_data *cvt)
+ {
+ u32 pixcount, pixcount_mod;
+- int cnt = 255, offset = 0, read = 0;
+- u8 *buf = kzalloc(256, GFP_KERNEL);
++ int size = 256;
++ int off = 0;
++ u8 *buf;
+
++ buf = kzalloc(size, GFP_KERNEL);
+ if (!buf)
+ return;
+
+@@ -224,43 +226,30 @@ static void fb_cvt_print_name(struct fb_cvt_data *cvt)
+ pixcount_mod = (cvt->xres * (cvt->yres/cvt->interlace)) % 1000000;
+ pixcount_mod /= 1000;
+
+- read = snprintf(buf+offset, cnt, "fbcvt: %dx%d@%d: CVT Name - ",
+- cvt->xres, cvt->yres, cvt->refresh);
+- offset += read;
+- cnt -= read;
++ off += scnprintf(buf + off, size - off, "fbcvt: %dx%d@%d: CVT Name - ",
++ cvt->xres, cvt->yres, cvt->refresh);
+
+- if (cvt->status)
+- snprintf(buf+offset, cnt, "Not a CVT standard - %d.%03d Mega "
+- "Pixel Image\n", pixcount, pixcount_mod);
+- else {
+- if (pixcount) {
+- read = snprintf(buf+offset, cnt, "%d", pixcount);
+- cnt -= read;
+- offset += read;
+- }
++ if (cvt->status) {
++ off += scnprintf(buf + off, size - off,
++ "Not a CVT standard - %d.%03d Mega Pixel Image\n",
++ pixcount, pixcount_mod);
++ } else {
++ if (pixcount)
++ off += scnprintf(buf + off, size - off, "%d", pixcount);
+
+- read = snprintf(buf+offset, cnt, ".%03dM", pixcount_mod);
+- cnt -= read;
+- offset += read;
++ off += scnprintf(buf + off, size - off, ".%03dM", pixcount_mod);
+
+ if (cvt->aspect_ratio == 0)
+- read = snprintf(buf+offset, cnt, "3");
++ off += scnprintf(buf + off, size - off, "3");
+ else if (cvt->aspect_ratio == 3)
+- read = snprintf(buf+offset, cnt, "4");
++ off += scnprintf(buf + off, size - off, "4");
+ else if (cvt->aspect_ratio == 1 || cvt->aspect_ratio == 4)
+- read = snprintf(buf+offset, cnt, "9");
++ off += scnprintf(buf + off, size - off, "9");
+ else if (cvt->aspect_ratio == 2)
+- read = snprintf(buf+offset, cnt, "A");
+- else
+- read = 0;
+- cnt -= read;
+- offset += read;
+-
+- if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK) {
+- read = snprintf(buf+offset, cnt, "-R");
+- cnt -= read;
+- offset += read;
+- }
++ off += scnprintf(buf + off, size - off, "A");
++
++ if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK)
++ off += scnprintf(buf + off, size - off, "-R");
+ }
+
+ printk(KERN_INFO "%s\n", buf);
+diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
+index 7420d2c16e47e..1b288a613a6e5 100644
+--- a/drivers/video/fbdev/core/fbmem.c
++++ b/drivers/video/fbdev/core/fbmem.c
+@@ -19,12 +19,14 @@
+ #include <linux/kernel.h>
+ #include <linux/major.h>
+ #include <linux/slab.h>
++#include <linux/sysfb.h>
+ #include <linux/mm.h>
+ #include <linux/mman.h>
+ #include <linux/vt.h>
+ #include <linux/init.h>
+ #include <linux/linux_logo.h>
+ #include <linux/proc_fs.h>
++#include <linux/platform_device.h>
+ #include <linux/seq_file.h>
+ #include <linux/console.h>
+ #include <linux/kmod.h>
+@@ -513,7 +515,7 @@ static int fb_show_logo_line(struct fb_info *info, int rotate,
+
+ while (n && (n * (logo->width + 8) - 8 > xres))
+ --n;
+- image.dx = (xres - n * (logo->width + 8) - 8) / 2;
++ image.dx = (xres - (n * (logo->width + 8) - 8)) / 2;
+ image.dy = y ?: (yres - logo->height) / 2;
+ } else {
+ image.dx = 0;
+@@ -1019,6 +1021,16 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
+ if (ret)
+ return ret;
+
++ /* verify that virtual resolution >= physical resolution */
++ if (var->xres_virtual < var->xres ||
++ var->yres_virtual < var->yres) {
++ pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n",
++ info->fix.id,
++ var->xres_virtual, var->yres_virtual,
++ var->xres, var->yres);
++ return -EINVAL;
++ }
++
+ if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW)
+ return 0;
+
+@@ -1107,9 +1119,13 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
+ case FBIOPUT_VSCREENINFO:
+ if (copy_from_user(&var, argp, sizeof(var)))
+ return -EFAULT;
++ /* only for kernel-internal use */
++ var.activate &= ~FB_ACTIVATE_KD_TEXT;
+ console_lock();
+ lock_fb_info(info);
+- ret = fb_set_var(info, &var);
++ ret = fbcon_modechange_possible(info, &var);
++ if (!ret)
++ ret = fb_set_var(info, &var);
+ if (!ret)
+ fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL);
+ unlock_fb_info(info);
+@@ -1557,18 +1573,43 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
+ /* check all firmware fbs and kick off if the base addr overlaps */
+ for_each_registered_fb(i) {
+ struct apertures_struct *gen_aper;
++ struct device *device;
+
+ if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
+ continue;
+
+ gen_aper = registered_fb[i]->apertures;
++ device = registered_fb[i]->device;
+ if (fb_do_apertures_overlap(gen_aper, a) ||
+ (primary && gen_aper && gen_aper->count &&
+ gen_aper->ranges[0].base == VGA_FB_PHYS)) {
+
+ printk(KERN_INFO "fb%d: switching to %s from %s\n",
+ i, name, registered_fb[i]->fix.id);
+- do_unregister_framebuffer(registered_fb[i]);
++
++ /*
++ * If we kick-out a firmware driver, we also want to remove
++ * the underlying platform device, such as simple-framebuffer,
++ * VESA, EFI, etc. A native driver will then be able to
++ * allocate the memory range.
++ *
++ * If it's not a platform device, at least print a warning. A
++ * fix would add code to remove the device from the system.
++ */
++ if (!device) {
++ /* TODO: Represent each OF framebuffer as its own
++ * device in the device hierarchy. For now, offb
++ * doesn't have such a device, so unregister the
++ * framebuffer as before without warning.
++ */
++ do_unregister_framebuffer(registered_fb[i]);
++ } else if (dev_is_platform(device)) {
++ registered_fb[i]->forced_out = true;
++ platform_device_unregister(to_platform_device(device));
++ } else {
++ pr_warn("fb%d: cannot remove device\n", i);
++ do_unregister_framebuffer(registered_fb[i]);
++ }
+ }
+ }
+ }
+@@ -1748,6 +1789,17 @@ int remove_conflicting_framebuffers(struct apertures_struct *a,
+ do_free = true;
+ }
+
++ /*
++ * If a driver asked to unregister a platform device registered by
++ * sysfb, then can be assumed that this is a driver for a display
++ * that is set up by the system firmware and has a generic driver.
++ *
++ * Drivers for devices that don't have a generic driver will never
++ * ask for this, so let's assume that a real driver for the display
++ * was already probed and prevent sysfb to register devices later.
++ */
++ sysfb_disable();
++
+ mutex_lock(&registration_lock);
+ do_remove_conflicting_framebuffers(a, name, primary);
+ mutex_unlock(&registration_lock);
+@@ -1759,6 +1811,53 @@ int remove_conflicting_framebuffers(struct apertures_struct *a,
+ }
+ EXPORT_SYMBOL(remove_conflicting_framebuffers);
+
++/**
++ * is_firmware_framebuffer - detect if firmware-configured framebuffer matches
++ * @a: memory range, users of which are to be checked
++ *
++ * This function checks framebuffer devices (initialized by firmware/bootloader)
++ * which use memory range described by @a. If @a matchesm the function returns
++ * true, otherwise false.
++ */
++bool is_firmware_framebuffer(struct apertures_struct *a)
++{
++ bool do_free = false;
++ bool found = false;
++ int i;
++
++ if (!a) {
++ a = alloc_apertures(1);
++ if (!a)
++ return false;
++
++ a->ranges[0].base = 0;
++ a->ranges[0].size = ~0;
++ do_free = true;
++ }
++
++ mutex_lock(&registration_lock);
++ /* check all firmware fbs and kick off if the base addr overlaps */
++ for_each_registered_fb(i) {
++ struct apertures_struct *gen_aper;
++
++ if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
++ continue;
++
++ gen_aper = registered_fb[i]->apertures;
++ if (fb_do_apertures_overlap(gen_aper, a)) {
++ found = true;
++ break;
++ }
++ }
++ mutex_unlock(&registration_lock);
++
++ if (do_free)
++ kfree(a);
++
++ return found;
++}
++EXPORT_SYMBOL(is_firmware_framebuffer);
++
+ /**
+ * remove_conflicting_pci_framebuffers - remove firmware-configured framebuffers for PCI devices
+ * @pdev: PCI device
+@@ -1848,9 +1947,13 @@ EXPORT_SYMBOL(register_framebuffer);
+ void
+ unregister_framebuffer(struct fb_info *fb_info)
+ {
+- mutex_lock(&registration_lock);
++ bool forced_out = fb_info->forced_out;
++
++ if (!forced_out)
++ mutex_lock(&registration_lock);
+ do_unregister_framebuffer(fb_info);
+- mutex_unlock(&registration_lock);
++ if (!forced_out)
++ mutex_unlock(&registration_lock);
+ }
+ EXPORT_SYMBOL(unregister_framebuffer);
+
+diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c
+index 65dae05fff8e6..09ee27e7fc25f 100644
+--- a/drivers/video/fbdev/core/fbsysfs.c
++++ b/drivers/video/fbdev/core/fbsysfs.c
+@@ -80,6 +80,14 @@ void framebuffer_release(struct fb_info *info)
+ {
+ if (!info)
+ return;
++
++ if (WARN_ON(refcount_read(&info->count)))
++ return;
++
++#if IS_ENABLED(CONFIG_FB_BACKLIGHT)
++ mutex_destroy(&info->bl_curve_mutex);
++#endif
++
+ kfree(info->apertures);
+ kfree(info);
+ }
+diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c
+index 6473e0dfe1464..e78ec7f728463 100644
+--- a/drivers/video/fbdev/core/modedb.c
++++ b/drivers/video/fbdev/core/modedb.c
+@@ -257,6 +257,11 @@ static const struct fb_videomode modedb[] = {
+ { NULL, 72, 480, 300, 33386, 40, 24, 11, 19, 80, 3, 0,
+ FB_VMODE_DOUBLE },
+
++ /* 1920x1080 @ 60 Hz, 67.3 kHz hsync */
++ { NULL, 60, 1920, 1080, 6734, 148, 88, 36, 4, 44, 5, 0,
++ FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
++ FB_VMODE_NONINTERLACED },
++
+ /* 1920x1200 @ 60 Hz, 74.5 Khz hsync */
+ { NULL, 60, 1920, 1200, 5177, 128, 336, 1, 38, 208, 3,
+ FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+diff --git a/drivers/video/fbdev/core/sysimgblt.c b/drivers/video/fbdev/core/sysimgblt.c
+index a4d05b1b17d7d..665ef7a0a2495 100644
+--- a/drivers/video/fbdev/core/sysimgblt.c
++++ b/drivers/video/fbdev/core/sysimgblt.c
+@@ -188,23 +188,29 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
+ {
+ u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
+ u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
+- u32 bit_mask, end_mask, eorx, shift;
+- const char *s = image->data, *src;
++ u32 bit_mask, eorx, shift;
++ const u8 *s = image->data, *src;
+ u32 *dst;
+- const u32 *tab = NULL;
++ const u32 *tab;
++ size_t tablen;
++ u32 colortab[16];
+ int i, j, k;
+
+ switch (bpp) {
+ case 8:
+ tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le;
++ tablen = 16;
+ break;
+ case 16:
+ tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
++ tablen = 4;
+ break;
+ case 32:
+- default:
+ tab = cfb_tab32;
++ tablen = 2;
+ break;
++ default:
++ return;
+ }
+
+ for (i = ppw-1; i--; ) {
+@@ -218,20 +224,62 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
+ eorx = fgx ^ bgx;
+ k = image->width/ppw;
+
++ for (i = 0; i < tablen; ++i)
++ colortab[i] = (tab[i] & eorx) ^ bgx;
++
+ for (i = image->height; i--; ) {
+ dst = dst1;
+ shift = 8;
+ src = s;
+
+- for (j = k; j--; ) {
++ /*
++ * Manually unroll the per-line copying loop for better
++ * performance. This works until we processed the last
++ * completely filled source byte (inclusive).
++ */
++ switch (ppw) {
++ case 4: /* 8 bpp */
++ for (j = k; j >= 2; j -= 2, ++src) {
++ *dst++ = colortab[(*src >> 4) & bit_mask];
++ *dst++ = colortab[(*src >> 0) & bit_mask];
++ }
++ break;
++ case 2: /* 16 bpp */
++ for (j = k; j >= 4; j -= 4, ++src) {
++ *dst++ = colortab[(*src >> 6) & bit_mask];
++ *dst++ = colortab[(*src >> 4) & bit_mask];
++ *dst++ = colortab[(*src >> 2) & bit_mask];
++ *dst++ = colortab[(*src >> 0) & bit_mask];
++ }
++ break;
++ case 1: /* 32 bpp */
++ for (j = k; j >= 8; j -= 8, ++src) {
++ *dst++ = colortab[(*src >> 7) & bit_mask];
++ *dst++ = colortab[(*src >> 6) & bit_mask];
++ *dst++ = colortab[(*src >> 5) & bit_mask];
++ *dst++ = colortab[(*src >> 4) & bit_mask];
++ *dst++ = colortab[(*src >> 3) & bit_mask];
++ *dst++ = colortab[(*src >> 2) & bit_mask];
++ *dst++ = colortab[(*src >> 1) & bit_mask];
++ *dst++ = colortab[(*src >> 0) & bit_mask];
++ }
++ break;
++ }
++
++ /*
++ * For image widths that are not a multiple of 8, there
++ * are trailing pixels left on the current line. Print
++ * them as well.
++ */
++ for (; j--; ) {
+ shift -= ppw;
+- end_mask = tab[(*src >> shift) & bit_mask];
+- *dst++ = (end_mask & eorx) ^ bgx;
++ *dst++ = colortab[(*src >> shift) & bit_mask];
+ if (!shift) {
+ shift = 8;
+- src++;
++ ++src;
+ }
+ }
++
+ dst1 += p->fix.line_length;
+ s += spitch;
+ }
+diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
+index 8ea8f079cde26..b3d5f884c5445 100644
+--- a/drivers/video/fbdev/efifb.c
++++ b/drivers/video/fbdev/efifb.c
+@@ -47,6 +47,8 @@ static bool use_bgrt = true;
+ static bool request_mem_succeeded = false;
+ static u64 mem_flags = EFI_MEMORY_WC | EFI_MEMORY_UC;
+
++static struct pci_dev *efifb_pci_dev; /* dev with BAR covering the efifb */
++
+ static struct fb_var_screeninfo efifb_defined = {
+ .activate = FB_ACTIVATE_NOW,
+ .height = -1,
+@@ -241,18 +243,28 @@ error:
+ static inline void efifb_show_boot_graphics(struct fb_info *info) {}
+ #endif
+
++/*
++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
++ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
++ */
+ static void efifb_destroy(struct fb_info *info)
+ {
++ if (efifb_pci_dev)
++ pm_runtime_put(&efifb_pci_dev->dev);
++
+ if (info->screen_base) {
+ if (mem_flags & (EFI_MEMORY_UC | EFI_MEMORY_WC))
+ iounmap(info->screen_base);
+ else
+ memunmap(info->screen_base);
+ }
++
+ if (request_mem_succeeded)
+ release_mem_region(info->apertures->ranges[0].base,
+ info->apertures->ranges[0].size);
+ fb_dealloc_cmap(&info->cmap);
++
++ framebuffer_release(info);
+ }
+
+ static const struct fb_ops efifb_ops = {
+@@ -333,7 +345,6 @@ ATTRIBUTE_GROUPS(efifb);
+
+ static bool pci_dev_disabled; /* FB base matches BAR of a disabled device */
+
+-static struct pci_dev *efifb_pci_dev; /* dev with BAR covering the efifb */
+ static struct resource *bar_resource;
+ static u64 bar_offset;
+
+@@ -347,6 +358,17 @@ static int efifb_probe(struct platform_device *dev)
+ char *option = NULL;
+ efi_memory_desc_t md;
+
++ /*
++ * Generic drivers must not be registered if a framebuffer exists.
++ * If a native driver was probed, the display hardware was already
++ * taken and attempting to use the system framebuffer is dangerous.
++ */
++ if (num_registered_fb > 0) {
++ dev_err(&dev->dev,
++ "efifb: a framebuffer is already registered\n");
++ return -EINVAL;
++ }
++
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
+ return -ENODEV;
+
+@@ -569,17 +591,22 @@ static int efifb_probe(struct platform_device *dev)
+ pr_err("efifb: cannot allocate colormap\n");
+ goto err_groups;
+ }
++
++ if (efifb_pci_dev)
++ WARN_ON(pm_runtime_get_sync(&efifb_pci_dev->dev) < 0);
++
+ err = register_framebuffer(info);
+ if (err < 0) {
+ pr_err("efifb: cannot register framebuffer\n");
+- goto err_fb_dealoc;
++ goto err_put_rpm_ref;
+ }
+ fb_info(info, "%s frame buffer device\n", info->fix.id);
+- if (efifb_pci_dev)
+- pm_runtime_get_sync(&efifb_pci_dev->dev);
+ return 0;
+
+-err_fb_dealoc:
++err_put_rpm_ref:
++ if (efifb_pci_dev)
++ pm_runtime_put(&efifb_pci_dev->dev);
++
+ fb_dealloc_cmap(&info->cmap);
+ err_groups:
+ sysfs_remove_groups(&dev->dev.kobj, efifb_groups);
+@@ -600,11 +627,9 @@ static int efifb_remove(struct platform_device *pdev)
+ {
+ struct fb_info *info = platform_get_drvdata(pdev);
+
++ /* efifb_destroy takes care of info cleanup */
+ unregister_framebuffer(info);
+ sysfs_remove_groups(&pdev->dev.kobj, efifb_groups);
+- framebuffer_release(info);
+- if (efifb_pci_dev)
+- pm_runtime_put(&efifb_pci_dev->dev);
+
+ return 0;
+ }
+diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c
+index 2398b3d48fedf..8b2bc4adc50f7 100644
+--- a/drivers/video/fbdev/ep93xx-fb.c
++++ b/drivers/video/fbdev/ep93xx-fb.c
+@@ -474,7 +474,6 @@ static int ep93xxfb_probe(struct platform_device *pdev)
+ if (!info)
+ return -ENOMEM;
+
+- info->dev = &pdev->dev;
+ platform_set_drvdata(pdev, info);
+ fbi = info->par;
+ fbi->mach_info = mach_info;
+@@ -552,12 +551,14 @@ static int ep93xxfb_probe(struct platform_device *pdev)
+
+ err = register_framebuffer(info);
+ if (err)
+- goto failed_check;
++ goto failed_framebuffer;
+
+ dev_info(info->dev, "registered. Mode = %dx%d-%d\n",
+ info->var.xres, info->var.yres, info->var.bits_per_pixel);
+ return 0;
+
++failed_framebuffer:
++ clk_disable_unprepare(fbi->clk);
+ failed_check:
+ if (fbi->mach_info->teardown)
+ fbi->mach_info->teardown(pdev);
+diff --git a/drivers/video/fbdev/geode/Kconfig b/drivers/video/fbdev/geode/Kconfig
+index ac9c860592aaf..85bc14b6faf64 100644
+--- a/drivers/video/fbdev/geode/Kconfig
++++ b/drivers/video/fbdev/geode/Kconfig
+@@ -5,6 +5,7 @@
+ config FB_GEODE
+ bool "AMD Geode family framebuffer support"
+ depends on FB && PCI && (X86_32 || (X86 && COMPILE_TEST))
++ depends on !UML
+ help
+ Say 'Y' here to allow you to select framebuffer drivers for
+ the AMD Geode family of processors.
+diff --git a/drivers/video/fbdev/geode/lxfb_core.c b/drivers/video/fbdev/geode/lxfb_core.c
+index 66c81262d18f8..6c6b6efb49f69 100644
+--- a/drivers/video/fbdev/geode/lxfb_core.c
++++ b/drivers/video/fbdev/geode/lxfb_core.c
+@@ -234,6 +234,9 @@ static void get_modedb(struct fb_videomode **modedb, unsigned int *size)
+
+ static int lxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+ {
++ if (!var->pixclock)
++ return -EINVAL;
++
+ if (var->xres > 1920 || var->yres > 1440)
+ return -EINVAL;
+
+diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
+index 23999df527393..de865e197c8d9 100644
+--- a/drivers/video/fbdev/hyperv_fb.c
++++ b/drivers/video/fbdev/hyperv_fb.c
+@@ -287,8 +287,6 @@ struct hvfb_par {
+
+ static uint screen_width = HVFB_WIDTH;
+ static uint screen_height = HVFB_HEIGHT;
+-static uint screen_width_max = HVFB_WIDTH;
+-static uint screen_height_max = HVFB_HEIGHT;
+ static uint screen_depth;
+ static uint screen_fb_size;
+ static uint dio_fb_size; /* FB size for deferred IO */
+@@ -582,7 +580,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev)
+ int ret = 0;
+ unsigned long t;
+ u8 index;
+- int i;
+
+ memset(msg, 0, sizeof(struct synthvid_msg));
+ msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST;
+@@ -613,13 +610,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev)
+ goto out;
+ }
+
+- for (i = 0; i < msg->resolution_resp.resolution_count; i++) {
+- screen_width_max = max_t(unsigned int, screen_width_max,
+- msg->resolution_resp.supported_resolution[i].width);
+- screen_height_max = max_t(unsigned int, screen_height_max,
+- msg->resolution_resp.supported_resolution[i].height);
+- }
+-
+ screen_width =
+ msg->resolution_resp.supported_resolution[index].width;
+ screen_height =
+@@ -809,12 +799,18 @@ static void hvfb_ondemand_refresh_throttle(struct hvfb_par *par,
+ static int hvfb_on_panic(struct notifier_block *nb,
+ unsigned long e, void *p)
+ {
++ struct hv_device *hdev;
+ struct hvfb_par *par;
+ struct fb_info *info;
+
+ par = container_of(nb, struct hvfb_par, hvfb_panic_nb);
+- par->synchronous_fb = true;
+ info = par->info;
++ hdev = device_to_hv_device(info->device);
++
++ if (hv_ringbuffer_spinlock_busy(hdev->channel))
++ return NOTIFY_DONE;
++
++ par->synchronous_fb = true;
+ if (par->need_docopy)
+ hvfb_docopy(par, 0, dio_fb_size);
+ synthvid_update(info, 0, 0, INT_MAX, INT_MAX);
+@@ -941,7 +937,7 @@ static void hvfb_get_option(struct fb_info *info)
+
+ if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN ||
+ (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) &&
+- (x > screen_width_max || y > screen_height_max)) ||
++ (x * y * screen_depth / 8 > screen_fb_size)) ||
+ (par->synthvid_version == SYNTHVID_VERSION_WIN8 &&
+ x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) ||
+ (par->synthvid_version == SYNTHVID_VERSION_WIN7 &&
+@@ -1019,7 +1015,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
+ struct pci_dev *pdev = NULL;
+ void __iomem *fb_virt;
+ int gen2vm = efi_enabled(EFI_BOOT);
+- resource_size_t pot_start, pot_end;
+ phys_addr_t paddr;
+ int ret;
+
+@@ -1070,23 +1065,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
+ dio_fb_size =
+ screen_width * screen_height * screen_depth / 8;
+
+- if (gen2vm) {
+- pot_start = 0;
+- pot_end = -1;
+- } else {
+- if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
+- pci_resource_len(pdev, 0) < screen_fb_size) {
+- pr_err("Resource not available or (0x%lx < 0x%lx)\n",
+- (unsigned long) pci_resource_len(pdev, 0),
+- (unsigned long) screen_fb_size);
+- goto err1;
+- }
+-
+- pot_end = pci_resource_end(pdev, 0);
+- pot_start = pot_end - screen_fb_size + 1;
+- }
+-
+- ret = vmbus_allocate_mmio(&par->mem, hdev, pot_start, pot_end,
++ ret = vmbus_allocate_mmio(&par->mem, hdev, 0, -1,
+ screen_fb_size, 0x100000, true);
+ if (ret != 0) {
+ pr_err("Unable to allocate framebuffer memory\n");
+@@ -1194,8 +1173,8 @@ static int hvfb_probe(struct hv_device *hdev,
+ }
+
+ hvfb_get_option(info);
+- pr_info("Screen resolution: %dx%d, Color depth: %d\n",
+- screen_width, screen_height, screen_depth);
++ pr_info("Screen resolution: %dx%d, Color depth: %d, Frame buffer size: %d\n",
++ screen_width, screen_height, screen_depth, screen_fb_size);
+
+ ret = hvfb_getmem(hdev, info);
+ if (ret) {
+diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c
+index 52cce0db8bd34..8fb4e01e1943f 100644
+--- a/drivers/video/fbdev/i740fb.c
++++ b/drivers/video/fbdev/i740fb.c
+@@ -400,7 +400,7 @@ static int i740fb_decode_var(const struct fb_var_screeninfo *var,
+ u32 xres, right, hslen, left, xtotal;
+ u32 yres, lower, vslen, upper, ytotal;
+ u32 vxres, xoffset, vyres, yoffset;
+- u32 bpp, base, dacspeed24, mem;
++ u32 bpp, base, dacspeed24, mem, freq;
+ u8 r7;
+ int i;
+
+@@ -643,7 +643,12 @@ static int i740fb_decode_var(const struct fb_var_screeninfo *var,
+ par->atc[VGA_ATC_OVERSCAN] = 0;
+
+ /* Calculate VCLK that most closely matches the requested dot clock */
+- i740_calc_vclk((((u32)1e9) / var->pixclock) * (u32)(1e3), par);
++ freq = (((u32)1e9) / var->pixclock) * (u32)(1e3);
++ if (freq < I740_RFREQ_FIX) {
++ fb_dbg(info, "invalid pixclock\n");
++ freq = I740_RFREQ_FIX;
++ }
++ i740_calc_vclk(freq, par);
+
+ /* Since we program the clocks ourselves, always use VCLK2. */
+ par->misc |= 0x0C;
+@@ -657,6 +662,9 @@ static int i740fb_decode_var(const struct fb_var_screeninfo *var,
+
+ static int i740fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+ {
++ if (!var->pixclock)
++ return -EINVAL;
++
+ switch (var->bits_per_pixel) {
+ case 8:
+ var->red.offset = var->green.offset = var->blue.offset = 0;
+diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
+index 16f272a508112..1b2fb8ed76237 100644
+--- a/drivers/video/fbdev/imsttfb.c
++++ b/drivers/video/fbdev/imsttfb.c
+@@ -1346,7 +1346,7 @@ static const struct fb_ops imsttfb_ops = {
+ .fb_ioctl = imsttfb_ioctl,
+ };
+
+-static void init_imstt(struct fb_info *info)
++static int init_imstt(struct fb_info *info)
+ {
+ struct imstt_par *par = info->par;
+ __u32 i, tmp, *ip, *end;
+@@ -1419,7 +1419,7 @@ static void init_imstt(struct fb_info *info)
+ || !(compute_imstt_regvals(par, info->var.xres, info->var.yres))) {
+ printk("imsttfb: %ux%ux%u not supported\n", info->var.xres, info->var.yres, info->var.bits_per_pixel);
+ framebuffer_release(info);
+- return;
++ return -ENODEV;
+ }
+
+ sprintf(info->fix.id, "IMS TT (%s)", par->ramdac == IBM ? "IBM" : "TVP");
+@@ -1455,12 +1455,13 @@ static void init_imstt(struct fb_info *info)
+
+ if (register_framebuffer(info) < 0) {
+ framebuffer_release(info);
+- return;
++ return -ENODEV;
+ }
+
+ tmp = (read_reg_le32(par->dc_regs, SSTATUS) & 0x0f00) >> 8;
+ fb_info(info, "%s frame buffer; %uMB vram; chip version %u\n",
+ info->fix.id, info->fix.smem_len >> 20, tmp);
++ return 0;
+ }
+
+ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+@@ -1523,10 +1524,10 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ if (!par->cmap_regs)
+ goto error;
+ info->pseudo_palette = par->palette;
+- init_imstt(info);
+-
+- pci_set_drvdata(pdev, info);
+- return 0;
++ ret = init_imstt(info);
++ if (!ret)
++ pci_set_drvdata(pdev, info);
++ return ret;
+
+ error:
+ if (par->dc_regs)
+diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
+index ad598257ab386..cd376a9bfe1b7 100644
+--- a/drivers/video/fbdev/imxfb.c
++++ b/drivers/video/fbdev/imxfb.c
+@@ -602,10 +602,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
+ if (var->hsync_len < 1 || var->hsync_len > 64)
+ printk(KERN_ERR "%s: invalid hsync_len %d\n",
+ info->fix.id, var->hsync_len);
+- if (var->left_margin > 255)
++ if (var->left_margin < 3 || var->left_margin > 255)
+ printk(KERN_ERR "%s: invalid left_margin %d\n",
+ info->fix.id, var->left_margin);
+- if (var->right_margin > 255)
++ if (var->right_margin < 1 || var->right_margin > 255)
+ printk(KERN_ERR "%s: invalid right_margin %d\n",
+ info->fix.id, var->right_margin);
+ if (var->yres < 1 || var->yres > ymax_mask)
+diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c
+index a9579964eaba8..8a703adfa9360 100644
+--- a/drivers/video/fbdev/intelfb/intelfbdrv.c
++++ b/drivers/video/fbdev/intelfb/intelfbdrv.c
+@@ -1214,6 +1214,9 @@ static int intelfb_check_var(struct fb_var_screeninfo *var,
+
+ dinfo = GET_DINFO(info);
+
++ if (!var->pixclock)
++ return -EINVAL;
++
+ /* update the pitch */
+ if (intelfbhw_validate_mode(dinfo, var) != 0)
+ return -EINVAL;
+diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c
+index 5c82611e93d99..e7348d657e183 100644
+--- a/drivers/video/fbdev/matrox/matroxfb_base.c
++++ b/drivers/video/fbdev/matrox/matroxfb_base.c
+@@ -1377,8 +1377,8 @@ static struct video_board vbG200 = {
+ .lowlevel = &matrox_G100
+ };
+ static struct video_board vbG200eW = {
+- .maxvram = 0x800000,
+- .maxdisplayable = 0x800000,
++ .maxvram = 0x1000000,
++ .maxdisplayable = 0x0800000,
+ .accelID = FB_ACCEL_MATROX_MGAG200,
+ .lowlevel = &matrox_G100
+ };
+diff --git a/drivers/video/fbdev/mmp/hw/mmp_ctrl.c b/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
+index 061a105afb865..27c3ee5df8def 100644
+--- a/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
++++ b/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
+@@ -518,7 +518,9 @@ static int mmphw_probe(struct platform_device *pdev)
+ ret = -ENOENT;
+ goto failed;
+ }
+- clk_prepare_enable(ctrl->clk);
++ ret = clk_prepare_enable(ctrl->clk);
++ if (ret)
++ goto failed;
+
+ /* init global regs */
+ ctrl_set_default(ctrl);
+diff --git a/drivers/video/fbdev/nvidia/nv_i2c.c b/drivers/video/fbdev/nvidia/nv_i2c.c
+index d7994a1732459..0b48965a6420c 100644
+--- a/drivers/video/fbdev/nvidia/nv_i2c.c
++++ b/drivers/video/fbdev/nvidia/nv_i2c.c
+@@ -86,7 +86,7 @@ static int nvidia_setup_i2c_bus(struct nvidia_i2c_chan *chan, const char *name,
+ {
+ int rc;
+
+- strcpy(chan->adapter.name, name);
++ strscpy(chan->adapter.name, name, sizeof(chan->adapter.name));
+ chan->adapter.owner = THIS_MODULE;
+ chan->adapter.class = i2c_class;
+ chan->adapter.algo_data = &chan->algo;
+diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c
+index a372a183c1f01..f9c388a8c10e3 100644
+--- a/drivers/video/fbdev/nvidia/nvidia.c
++++ b/drivers/video/fbdev/nvidia/nvidia.c
+@@ -763,6 +763,8 @@ static int nvidiafb_check_var(struct fb_var_screeninfo *var,
+ int pitch, err = 0;
+
+ NVTRACE_ENTER();
++ if (!var->pixclock)
++ return -EINVAL;
+
+ var->transp.offset = 0;
+ var->transp.length = 0;
+diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c
+index a75ae0c9b14c7..d1cd8785d011d 100644
+--- a/drivers/video/fbdev/omap/lcd_mipid.c
++++ b/drivers/video/fbdev/omap/lcd_mipid.c
+@@ -563,11 +563,15 @@ static int mipid_spi_probe(struct spi_device *spi)
+
+ r = mipid_detect(md);
+ if (r < 0)
+- return r;
++ goto free_md;
+
+ omapfb_register_panel(&md->panel);
+
+ return 0;
++
++free_md:
++ kfree(md);
++ return r;
+ }
+
+ static int mipid_spi_remove(struct spi_device *spi)
+diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
+index 2fa436475b406..c8ad3ef42bd31 100644
+--- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
++++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c
+@@ -246,6 +246,7 @@ static int dvic_probe_of(struct platform_device *pdev)
+ adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0);
+ if (adapter_node) {
+ adapter = of_get_i2c_adapter_by_node(adapter_node);
++ of_node_put(adapter_node);
+ if (adapter == NULL) {
+ dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n");
+ omap_dss_put_device(ddata->in);
+diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+index 4b0793abdd84b..a2c7c5cb15234 100644
+--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
++++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+@@ -409,7 +409,7 @@ static ssize_t dsicm_num_errors_show(struct device *dev,
+ if (r)
+ return r;
+
+- return snprintf(buf, PAGE_SIZE, "%d\n", errors);
++ return sysfs_emit(buf, "%d\n", errors);
+ }
+
+ static ssize_t dsicm_hw_revision_show(struct device *dev,
+@@ -439,7 +439,7 @@ static ssize_t dsicm_hw_revision_show(struct device *dev,
+ if (r)
+ return r;
+
+- return snprintf(buf, PAGE_SIZE, "%02x.%02x.%02x\n", id1, id2, id3);
++ return sysfs_emit(buf, "%02x.%02x.%02x\n", id1, id2, id3);
+ }
+
+ static ssize_t dsicm_store_ulps(struct device *dev,
+@@ -487,7 +487,7 @@ static ssize_t dsicm_show_ulps(struct device *dev,
+ t = ddata->ulps_enabled;
+ mutex_unlock(&ddata->lock);
+
+- return snprintf(buf, PAGE_SIZE, "%u\n", t);
++ return sysfs_emit(buf, "%u\n", t);
+ }
+
+ static ssize_t dsicm_store_ulps_timeout(struct device *dev,
+@@ -532,7 +532,7 @@ static ssize_t dsicm_show_ulps_timeout(struct device *dev,
+ t = ddata->ulps_timeout;
+ mutex_unlock(&ddata->lock);
+
+- return snprintf(buf, PAGE_SIZE, "%u\n", t);
++ return sysfs_emit(buf, "%u\n", t);
+ }
+
+ static DEVICE_ATTR(num_dsi_errors, S_IRUGO, dsicm_num_errors_show, NULL);
+diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
+index 8d8b5ff7d43c8..3696eb09b69b4 100644
+--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
++++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
+@@ -476,7 +476,7 @@ static ssize_t show_cabc_available_modes(struct device *dev,
+ int i;
+
+ if (!ddata->has_cabc)
+- return snprintf(buf, PAGE_SIZE, "%s\n", cabc_modes[0]);
++ return sysfs_emit(buf, "%s\n", cabc_modes[0]);
+
+ for (i = 0, len = 0;
+ len < PAGE_SIZE && i < ARRAY_SIZE(cabc_modes); i++)
+diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
+index afac1d9445aa2..57b7d1f490962 100644
+--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
++++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
+@@ -169,7 +169,7 @@ static ssize_t tpo_td043_vmirror_show(struct device *dev,
+ {
+ struct panel_drv_data *ddata = dev_get_drvdata(dev);
+
+- return snprintf(buf, PAGE_SIZE, "%d\n", ddata->vmirror);
++ return sysfs_emit(buf, "%d\n", ddata->vmirror);
+ }
+
+ static ssize_t tpo_td043_vmirror_store(struct device *dev,
+@@ -199,7 +199,7 @@ static ssize_t tpo_td043_mode_show(struct device *dev,
+ {
+ struct panel_drv_data *ddata = dev_get_drvdata(dev);
+
+- return snprintf(buf, PAGE_SIZE, "%d\n", ddata->mode);
++ return sysfs_emit(buf, "%d\n", ddata->mode);
+ }
+
+ static ssize_t tpo_td043_mode_store(struct device *dev,
+diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
+index d43b081d592f0..db84a662e8de3 100644
+--- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
++++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c
+@@ -1538,22 +1538,28 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
+ {
+ struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
+ unsigned long flags;
+- struct dsi_irq_stats stats;
++ struct dsi_irq_stats *stats;
++
++ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
++ if (!stats) {
++ seq_printf(s, "out of memory\n");
++ return;
++ }
+
+ spin_lock_irqsave(&dsi->irq_stats_lock, flags);
+
+- stats = dsi->irq_stats;
++ *stats = dsi->irq_stats;
+ memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats));
+ dsi->irq_stats.last_reset = jiffies;
+
+ spin_unlock_irqrestore(&dsi->irq_stats_lock, flags);
+
+ seq_printf(s, "period %u ms\n",
+- jiffies_to_msecs(jiffies - stats.last_reset));
++ jiffies_to_msecs(jiffies - stats->last_reset));
+
+- seq_printf(s, "irqs %d\n", stats.irq_count);
++ seq_printf(s, "irqs %d\n", stats->irq_count);
+ #define PIS(x) \
+- seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1])
++ seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1])
+
+ seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1);
+ PIS(VC0);
+@@ -1577,10 +1583,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
+
+ #define PIS(x) \
+ seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \
+- stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
+- stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
+- stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
+- stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
++ stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \
++ stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \
++ stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \
++ stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]);
+
+ seq_printf(s, "-- VC interrupts --\n");
+ PIS(CS);
+@@ -1596,7 +1602,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
+
+ #define PIS(x) \
+ seq_printf(s, "%-20s %10d\n", #x, \
+- stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
++ stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]);
+
+ seq_printf(s, "-- CIO interrupts --\n");
+ PIS(ERRSYNCESC1);
+@@ -1620,6 +1626,8 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev,
+ PIS(ULPSACTIVENOT_ALL0);
+ PIS(ULPSACTIVENOT_ALL1);
+ #undef PIS
++
++ kfree(stats);
+ }
+
+ static void dsi1_dump_irqs(struct seq_file *s)
+diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c
+index c68725eebee3b..e8690f7aea050 100644
+--- a/drivers/video/fbdev/pm2fb.c
++++ b/drivers/video/fbdev/pm2fb.c
+@@ -617,6 +617,11 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+ return -EINVAL;
+ }
+
++ if (!var->pixclock) {
++ DPRINTK("pixclock is zero\n");
++ return -EINVAL;
++ }
++
+ if (PICOS2KHZ(var->pixclock) > PM2_MAX_PIXCLOCK) {
+ DPRINTK("pixclock too high (%ldKHz)\n",
+ PICOS2KHZ(var->pixclock));
+@@ -1525,8 +1530,10 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ }
+
+ info = framebuffer_alloc(sizeof(struct pm2fb_par), &pdev->dev);
+- if (!info)
+- return -ENOMEM;
++ if (!info) {
++ err = -ENOMEM;
++ goto err_exit_disable;
++ }
+ default_par = info->par;
+
+ switch (pdev->device) {
+@@ -1707,6 +1714,8 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ release_mem_region(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len);
+ err_exit_neither:
+ framebuffer_release(info);
++ err_exit_disable:
++ pci_disable_device(pdev);
+ return retval;
+ }
+
+@@ -1733,6 +1742,7 @@ static void pm2fb_remove(struct pci_dev *pdev)
+ fb_dealloc_cmap(&info->cmap);
+ kfree(info->pixmap.addr);
+ framebuffer_release(info);
++ pci_disable_device(pdev);
+ }
+
+ static const struct pci_device_id pm2fb_id_table[] = {
+diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c
+index 4279e13a3b58d..9e9888e40c573 100644
+--- a/drivers/video/fbdev/pxa3xx-gcu.c
++++ b/drivers/video/fbdev/pxa3xx-gcu.c
+@@ -381,7 +381,7 @@ pxa3xx_gcu_write(struct file *file, const char *buff,
+ struct pxa3xx_gcu_batch *buffer;
+ struct pxa3xx_gcu_priv *priv = to_pxa3xx_gcu_priv(file);
+
+- int words = count / 4;
++ size_t words = count / 4;
+
+ /* Does not need to be atomic. There's a lock in user space,
+ * but anyhow, this is just for statistics. */
+@@ -650,6 +650,7 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev)
+ for (i = 0; i < 8; i++) {
+ ret = pxa3xx_gcu_add_buffer(dev, priv);
+ if (ret) {
++ pxa3xx_gcu_free_buffers(dev, priv);
+ dev_err(dev, "failed to allocate DMA memory\n");
+ goto err_disable_clk;
+ }
+@@ -666,15 +667,15 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev)
+ SHARED_SIZE, irq);
+ return 0;
+
+-err_free_dma:
+- dma_free_coherent(dev, SHARED_SIZE,
+- priv->shared, priv->shared_phys);
++err_disable_clk:
++ clk_disable_unprepare(priv->clk);
+
+ err_misc_deregister:
+ misc_deregister(&priv->misc_dev);
+
+-err_disable_clk:
+- clk_disable_unprepare(priv->clk);
++err_free_dma:
++ dma_free_coherent(dev, SHARED_SIZE,
++ priv->shared, priv->shared_phys);
+
+ return ret;
+ }
+@@ -687,6 +688,7 @@ static int pxa3xx_gcu_remove(struct platform_device *pdev)
+ pxa3xx_gcu_wait_idle(priv);
+ misc_deregister(&priv->misc_dev);
+ dma_free_coherent(dev, SHARED_SIZE, priv->shared, priv->shared_phys);
++ clk_disable_unprepare(priv->clk);
+ pxa3xx_gcu_free_buffers(dev, priv);
+
+ return 0;
+diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c
+index 5c74253e7b2c0..a936455a3df2a 100644
+--- a/drivers/video/fbdev/s3fb.c
++++ b/drivers/video/fbdev/s3fb.c
+@@ -902,6 +902,8 @@ static int s3fb_set_par(struct fb_info *info)
+ value = clamp((htotal + hsstart + 1) / 2 + 2, hsstart + 4, htotal + 1);
+ svga_wcrt_multi(par->state.vgabase, s3_dtpc_regs, value);
+
++ if (screen_size > info->screen_size)
++ screen_size = info->screen_size;
+ memset_io(info->screen_base, 0x00, screen_size);
+ /* Device and screen back on */
+ svga_wcrt_mask(par->state.vgabase, 0x17, 0x80, 0x80);
+diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
+index 62f0ded706815..a2e3a46900252 100644
+--- a/drivers/video/fbdev/simplefb.c
++++ b/drivers/video/fbdev/simplefb.c
+@@ -70,12 +70,18 @@ struct simplefb_par;
+ static void simplefb_clocks_destroy(struct simplefb_par *par);
+ static void simplefb_regulators_destroy(struct simplefb_par *par);
+
++/*
++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
++ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
++ */
+ static void simplefb_destroy(struct fb_info *info)
+ {
+ simplefb_regulators_destroy(info->par);
+ simplefb_clocks_destroy(info->par);
+ if (info->screen_base)
+ iounmap(info->screen_base);
++
++ framebuffer_release(info);
+ }
+
+ static const struct fb_ops simplefb_ops = {
+@@ -407,6 +413,17 @@ static int simplefb_probe(struct platform_device *pdev)
+ struct simplefb_par *par;
+ struct resource *mem;
+
++ /*
++ * Generic drivers must not be registered if a framebuffer exists.
++ * If a native driver was probed, the display hardware was already
++ * taken and attempting to use the system framebuffer is dangerous.
++ */
++ if (num_registered_fb > 0) {
++ dev_err(&pdev->dev,
++ "simplefb: a framebuffer is already registered\n");
++ return -EINVAL;
++ }
++
+ if (fb_get_options("simplefb", NULL))
+ return -ENODEV;
+
+@@ -509,8 +526,8 @@ static int simplefb_remove(struct platform_device *pdev)
+ {
+ struct fb_info *info = platform_get_drvdata(pdev);
+
++ /* simplefb_destroy takes care of info cleanup */
+ unregister_framebuffer(info);
+- framebuffer_release(info);
+
+ return 0;
+ }
+diff --git a/drivers/video/fbdev/sis/init.c b/drivers/video/fbdev/sis/init.c
+index b568c646a76c2..2ba91d62af92e 100644
+--- a/drivers/video/fbdev/sis/init.c
++++ b/drivers/video/fbdev/sis/init.c
+@@ -355,12 +355,12 @@ SiS_GetModeID(int VGAEngine, unsigned int VBFlags, int HDisplay, int VDisplay,
+ }
+ break;
+ case 400:
+- if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 800) && (LCDwidth >= 600))) {
++ if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 800) && (LCDheight >= 600))) {
+ if(VDisplay == 300) ModeIndex = ModeIndex_400x300[Depth];
+ }
+ break;
+ case 512:
+- if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 1024) && (LCDwidth >= 768))) {
++ if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 1024) && (LCDheight >= 768))) {
+ if(VDisplay == 384) ModeIndex = ModeIndex_512x384[Depth];
+ }
+ break;
+diff --git a/drivers/video/fbdev/sm712fb.c b/drivers/video/fbdev/sm712fb.c
+index 0dbc6bf8268ac..092a1caa1208e 100644
+--- a/drivers/video/fbdev/sm712fb.c
++++ b/drivers/video/fbdev/sm712fb.c
+@@ -1047,7 +1047,7 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf,
+ if (count + p > total_size)
+ count = total_size - p;
+
+- buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL);
++ buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+@@ -1059,25 +1059,14 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf,
+ while (count) {
+ c = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+ dst = buffer;
+- for (i = c >> 2; i--;) {
+- *dst = fb_readl(src++);
+- *dst = big_swap(*dst);
++ for (i = (c + 3) >> 2; i--;) {
++ u32 val;
++
++ val = fb_readl(src);
++ *dst = big_swap(val);
++ src++;
+ dst++;
+ }
+- if (c & 3) {
+- u8 *dst8 = (u8 *)dst;
+- u8 __iomem *src8 = (u8 __iomem *)src;
+-
+- for (i = c & 3; i--;) {
+- if (i & 1) {
+- *dst8++ = fb_readb(++src8);
+- } else {
+- *dst8++ = fb_readb(--src8);
+- src8 += 2;
+- }
+- }
+- src = (u32 __iomem *)src8;
+- }
+
+ if (copy_to_user(buf, buffer, c)) {
+ err = -EFAULT;
+@@ -1130,7 +1119,7 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf,
+ count = total_size - p;
+ }
+
+- buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL);
++ buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+@@ -1148,24 +1137,11 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf,
+ break;
+ }
+
+- for (i = c >> 2; i--;) {
+- fb_writel(big_swap(*src), dst++);
++ for (i = (c + 3) >> 2; i--;) {
++ fb_writel(big_swap(*src), dst);
++ dst++;
+ src++;
+ }
+- if (c & 3) {
+- u8 *src8 = (u8 *)src;
+- u8 __iomem *dst8 = (u8 __iomem *)dst;
+-
+- for (i = c & 3; i--;) {
+- if (i & 1) {
+- fb_writeb(*src8++, ++dst8);
+- } else {
+- fb_writeb(*src8++, --dst8);
+- dst8 += 2;
+- }
+- }
+- dst = (u32 __iomem *)dst8;
+- }
+
+ *ppos += c;
+ buf += c;
+diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
+index bfac3ee4a6422..b3295cd7fd4f9 100644
+--- a/drivers/video/fbdev/smscufx.c
++++ b/drivers/video/fbdev/smscufx.c
+@@ -97,7 +97,6 @@ struct ufx_data {
+ struct kref kref;
+ int fb_count;
+ bool virtualized; /* true when physical usb device not present */
+- struct delayed_work free_framebuffer_work;
+ atomic_t usb_active; /* 0 = update virtual buffer, but no usb traffic */
+ atomic_t lost_pixels; /* 1 = a render op failed. Need screen refresh */
+ u8 *edid; /* null until we read edid from hw or get from sysfs */
+@@ -137,6 +136,8 @@ static int ufx_submit_urb(struct ufx_data *dev, struct urb * urb, size_t len);
+ static int ufx_alloc_urb_list(struct ufx_data *dev, int count, size_t size);
+ static void ufx_free_urb_list(struct ufx_data *dev);
+
++static DEFINE_MUTEX(disconnect_mutex);
++
+ /* reads a control register */
+ static int ufx_reg_read(struct ufx_data *dev, u32 index, u32 *data)
+ {
+@@ -1070,9 +1071,13 @@ static int ufx_ops_open(struct fb_info *info, int user)
+ if (user == 0 && !console)
+ return -EBUSY;
+
++ mutex_lock(&disconnect_mutex);
++
+ /* If the USB device is gone, we don't accept new opens */
+- if (dev->virtualized)
++ if (dev->virtualized) {
++ mutex_unlock(&disconnect_mutex);
+ return -ENODEV;
++ }
+
+ dev->fb_count++;
+
+@@ -1096,6 +1101,8 @@ static int ufx_ops_open(struct fb_info *info, int user)
+ pr_debug("open /dev/fb%d user=%d fb_info=%p count=%d",
+ info->node, user, info, dev->fb_count);
+
++ mutex_unlock(&disconnect_mutex);
++
+ return 0;
+ }
+
+@@ -1108,15 +1115,24 @@ static void ufx_free(struct kref *kref)
+ {
+ struct ufx_data *dev = container_of(kref, struct ufx_data, kref);
+
+- /* this function will wait for all in-flight urbs to complete */
+- if (dev->urbs.count > 0)
+- ufx_free_urb_list(dev);
++ kfree(dev);
++}
+
+- pr_debug("freeing ufx_data %p", dev);
++static void ufx_ops_destory(struct fb_info *info)
++{
++ struct ufx_data *dev = info->par;
++ int node = info->node;
+
+- kfree(dev);
++ /* Assume info structure is freed after this point */
++ framebuffer_release(info);
++
++ pr_debug("fb_info for /dev/fb%d has been freed", node);
++
++ /* release reference taken by kref_init in probe() */
++ kref_put(&dev->kref, ufx_free);
+ }
+
++
+ static void ufx_release_urb_work(struct work_struct *work)
+ {
+ struct urb_node *unode = container_of(work, struct urb_node,
+@@ -1125,14 +1141,9 @@ static void ufx_release_urb_work(struct work_struct *work)
+ up(&unode->dev->urbs.limit_sem);
+ }
+
+-static void ufx_free_framebuffer_work(struct work_struct *work)
++static void ufx_free_framebuffer(struct ufx_data *dev)
+ {
+- struct ufx_data *dev = container_of(work, struct ufx_data,
+- free_framebuffer_work.work);
+ struct fb_info *info = dev->info;
+- int node = info->node;
+-
+- unregister_framebuffer(info);
+
+ if (info->cmap.len != 0)
+ fb_dealloc_cmap(&info->cmap);
+@@ -1144,11 +1155,6 @@ static void ufx_free_framebuffer_work(struct work_struct *work)
+
+ dev->info = NULL;
+
+- /* Assume info structure is freed after this point */
+- framebuffer_release(info);
+-
+- pr_debug("fb_info for /dev/fb%d has been freed", node);
+-
+ /* ref taken in probe() as part of registering framebfufer */
+ kref_put(&dev->kref, ufx_free);
+ }
+@@ -1160,11 +1166,13 @@ static int ufx_ops_release(struct fb_info *info, int user)
+ {
+ struct ufx_data *dev = info->par;
+
++ mutex_lock(&disconnect_mutex);
++
+ dev->fb_count--;
+
+ /* We can't free fb_info here - fbmem will touch it when we return */
+ if (dev->virtualized && (dev->fb_count == 0))
+- schedule_delayed_work(&dev->free_framebuffer_work, HZ);
++ ufx_free_framebuffer(dev);
+
+ if ((dev->fb_count == 0) && (info->fbdefio)) {
+ fb_deferred_io_cleanup(info);
+@@ -1177,6 +1185,8 @@ static int ufx_ops_release(struct fb_info *info, int user)
+
+ kref_put(&dev->kref, ufx_free);
+
++ mutex_unlock(&disconnect_mutex);
++
+ return 0;
+ }
+
+@@ -1283,6 +1293,7 @@ static const struct fb_ops ufx_ops = {
+ .fb_blank = ufx_ops_blank,
+ .fb_check_var = ufx_ops_check_var,
+ .fb_set_par = ufx_ops_set_par,
++ .fb_destroy = ufx_ops_destory,
+ };
+
+ /* Assumes &info->lock held by caller
+@@ -1610,7 +1621,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
+ struct usb_device *usbdev;
+ struct ufx_data *dev;
+ struct fb_info *info;
+- int retval;
++ int retval = -ENOMEM;
+ u32 id_rev, fpga_rev;
+
+ /* usb initialization */
+@@ -1642,20 +1653,23 @@ static int ufx_usb_probe(struct usb_interface *interface,
+
+ if (!ufx_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) {
+ dev_err(dev->gdev, "ufx_alloc_urb_list failed\n");
+- goto e_nomem;
++ goto put_ref;
+ }
+
+ /* We don't register a new USB class. Our client interface is fbdev */
+
+ /* allocates framebuffer driver structure, not framebuffer memory */
+ info = framebuffer_alloc(0, &usbdev->dev);
+- if (!info)
+- goto e_nomem;
++ if (!info) {
++ dev_err(dev->gdev, "framebuffer_alloc failed\n");
++ goto free_urb_list;
++ }
+
+ dev->info = info;
+ info->par = dev;
+ info->pseudo_palette = dev->pseudo_palette;
+ info->fbops = &ufx_ops;
++ INIT_LIST_HEAD(&info->modelist);
+
+ retval = fb_alloc_cmap(&info->cmap, 256, 0);
+ if (retval < 0) {
+@@ -1663,11 +1677,6 @@ static int ufx_usb_probe(struct usb_interface *interface,
+ goto destroy_modedb;
+ }
+
+- INIT_DELAYED_WORK(&dev->free_framebuffer_work,
+- ufx_free_framebuffer_work);
+-
+- INIT_LIST_HEAD(&info->modelist);
+-
+ retval = ufx_reg_read(dev, 0x3000, &id_rev);
+ check_warn_goto_error(retval, "error %d reading 0x3000 register from device", retval);
+ dev_dbg(dev->gdev, "ID_REV register value 0x%08x", id_rev);
+@@ -1697,22 +1706,34 @@ static int ufx_usb_probe(struct usb_interface *interface,
+ check_warn_goto_error(retval, "unable to find common mode for display and adapter");
+
+ retval = ufx_reg_set_bits(dev, 0x4000, 0x00000001);
+- check_warn_goto_error(retval, "error %d enabling graphics engine", retval);
++ if (retval < 0) {
++ dev_err(dev->gdev, "error %d enabling graphics engine", retval);
++ goto setup_modes;
++ }
+
+ /* ready to begin using device */
+ atomic_set(&dev->usb_active, 1);
+
+ dev_dbg(dev->gdev, "checking var");
+ retval = ufx_ops_check_var(&info->var, info);
+- check_warn_goto_error(retval, "error %d ufx_ops_check_var", retval);
++ if (retval < 0) {
++ dev_err(dev->gdev, "error %d ufx_ops_check_var", retval);
++ goto reset_active;
++ }
+
+ dev_dbg(dev->gdev, "setting par");
+ retval = ufx_ops_set_par(info);
+- check_warn_goto_error(retval, "error %d ufx_ops_set_par", retval);
++ if (retval < 0) {
++ dev_err(dev->gdev, "error %d ufx_ops_set_par", retval);
++ goto reset_active;
++ }
+
+ dev_dbg(dev->gdev, "registering framebuffer");
+ retval = register_framebuffer(info);
+- check_warn_goto_error(retval, "error %d register_framebuffer", retval);
++ if (retval < 0) {
++ dev_err(dev->gdev, "error %d register_framebuffer", retval);
++ goto reset_active;
++ }
+
+ dev_info(dev->gdev, "SMSC UDX USB device /dev/fb%d attached. %dx%d resolution."
+ " Using %dK framebuffer memory\n", info->node,
+@@ -1720,28 +1741,34 @@ static int ufx_usb_probe(struct usb_interface *interface,
+
+ return 0;
+
+-error:
+- fb_dealloc_cmap(&info->cmap);
+-destroy_modedb:
++reset_active:
++ atomic_set(&dev->usb_active, 0);
++setup_modes:
+ fb_destroy_modedb(info->monspecs.modedb);
+ vfree(info->screen_base);
+ fb_destroy_modelist(&info->modelist);
++error:
++ fb_dealloc_cmap(&info->cmap);
++destroy_modedb:
+ framebuffer_release(info);
++free_urb_list:
++ if (dev->urbs.count > 0)
++ ufx_free_urb_list(dev);
+ put_ref:
+ kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
+ kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
+ return retval;
+-
+-e_nomem:
+- retval = -ENOMEM;
+- goto put_ref;
+ }
+
+ static void ufx_usb_disconnect(struct usb_interface *interface)
+ {
+ struct ufx_data *dev;
++ struct fb_info *info;
++
++ mutex_lock(&disconnect_mutex);
+
+ dev = usb_get_intfdata(interface);
++ info = dev->info;
+
+ pr_debug("USB disconnect starting\n");
+
+@@ -1755,12 +1782,17 @@ static void ufx_usb_disconnect(struct usb_interface *interface)
+
+ /* if clients still have us open, will be freed on last close */
+ if (dev->fb_count == 0)
+- schedule_delayed_work(&dev->free_framebuffer_work, 0);
++ ufx_free_framebuffer(dev);
+
+- /* release reference taken by kref_init in probe() */
+- kref_put(&dev->kref, ufx_free);
++ /* this function will wait for all in-flight urbs to complete */
++ if (dev->urbs.count > 0)
++ ufx_free_urb_list(dev);
++
++ pr_debug("freeing ufx_data %p", dev);
++
++ unregister_framebuffer(info);
+
+- /* consider ufx_data freed */
++ mutex_unlock(&disconnect_mutex);
+ }
+
+ static struct usb_driver ufx_driver = {
+diff --git a/drivers/video/fbdev/sticore.h b/drivers/video/fbdev/sticore.h
+index c338f7848ae2b..0ebdd28a0b813 100644
+--- a/drivers/video/fbdev/sticore.h
++++ b/drivers/video/fbdev/sticore.h
+@@ -370,6 +370,9 @@ struct sti_struct {
+
+ /* pointer to all internal data */
+ struct sti_all_data *sti_data;
++
++ /* pa_path of this device */
++ char pa_path[24];
+ };
+
+
+diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
+index 265865610edc6..63f51783352dc 100644
+--- a/drivers/video/fbdev/stifb.c
++++ b/drivers/video/fbdev/stifb.c
+@@ -921,6 +921,28 @@ SETUP_HCRX(struct stifb_info *fb)
+
+ /* ------------------- driver specific functions --------------------------- */
+
++static int
++stifb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
++{
++ struct stifb_info *fb = container_of(info, struct stifb_info, info);
++
++ if (var->xres != fb->info.var.xres ||
++ var->yres != fb->info.var.yres ||
++ var->bits_per_pixel != fb->info.var.bits_per_pixel)
++ return -EINVAL;
++
++ var->xres_virtual = var->xres;
++ var->yres_virtual = var->yres;
++ var->xoffset = 0;
++ var->yoffset = 0;
++ var->grayscale = fb->info.var.grayscale;
++ var->red.length = fb->info.var.red.length;
++ var->green.length = fb->info.var.green.length;
++ var->blue.length = fb->info.var.blue.length;
++
++ return 0;
++}
++
+ static int
+ stifb_setcolreg(u_int regno, u_int red, u_int green,
+ u_int blue, u_int transp, struct fb_info *info)
+@@ -1041,6 +1063,48 @@ stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+ SETUP_FB(fb);
+ }
+
++#define ARTIST_VRAM_SIZE 0x000804
++#define ARTIST_VRAM_SRC 0x000808
++#define ARTIST_VRAM_SIZE_TRIGGER_WINFILL 0x000a04
++#define ARTIST_VRAM_DEST_TRIGGER_BLOCKMOVE 0x000b00
++#define ARTIST_SRC_BM_ACCESS 0x018008
++#define ARTIST_FGCOLOR 0x018010
++#define ARTIST_BGCOLOR 0x018014
++#define ARTIST_BITMAP_OP 0x01801c
++
++static void
++stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
++{
++ struct stifb_info *fb = container_of(info, struct stifb_info, info);
++
++ if (rect->rop != ROP_COPY ||
++ (fb->id == S9000_ID_HCRX && fb->info.var.bits_per_pixel == 32))
++ return cfb_fillrect(info, rect);
++
++ SETUP_HW(fb);
++
++ if (fb->info.var.bits_per_pixel == 32) {
++ WRITE_WORD(0xBBA0A000, fb, REG_10);
++
++ NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff);
++ } else {
++ WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10);
++
++ NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff);
++ }
++
++ WRITE_WORD(0x03000300, fb, ARTIST_BITMAP_OP);
++ WRITE_WORD(0x2ea01000, fb, ARTIST_SRC_BM_ACCESS);
++ NGLE_QUICK_SET_DST_BM_ACCESS(fb, 0x2ea01000);
++ NGLE_REALLY_SET_IMAGE_FG_COLOR(fb, rect->color);
++ WRITE_WORD(0, fb, ARTIST_BGCOLOR);
++
++ NGLE_SET_DSTXY(fb, (rect->dx << 16) | (rect->dy));
++ SET_LENXY_START_RECFILL(fb, (rect->width << 16) | (rect->height));
++
++ SETUP_FB(fb);
++}
++
+ static void __init
+ stifb_init_display(struct stifb_info *fb)
+ {
+@@ -1103,9 +1167,10 @@ stifb_init_display(struct stifb_info *fb)
+
+ static const struct fb_ops stifb_ops = {
+ .owner = THIS_MODULE,
++ .fb_check_var = stifb_check_var,
+ .fb_setcolreg = stifb_setcolreg,
+ .fb_blank = stifb_blank,
+- .fb_fillrect = cfb_fillrect,
++ .fb_fillrect = stifb_fillrect,
+ .fb_copyarea = stifb_copyarea,
+ .fb_imageblit = cfb_imageblit,
+ };
+@@ -1122,6 +1187,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+ struct stifb_info *fb;
+ struct fb_info *info;
+ unsigned long sti_rom_address;
++ char modestr[32];
+ char *dev_name;
+ int bpp, xres, yres;
+
+@@ -1257,7 +1323,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+
+ /* limit fbsize to max visible screen size */
+ if (fix->smem_len > yres*fix->line_length)
+- fix->smem_len = yres*fix->line_length;
++ fix->smem_len = ALIGN(yres*fix->line_length, 4*1024*1024);
+
+ fix->accel = FB_ACCEL_NONE;
+
+@@ -1297,9 +1363,12 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+ goto out_err0;
+ }
+ info->screen_size = fix->smem_len;
+- info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
++ info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
+ info->pseudo_palette = &fb->pseudo_palette;
+
++ scnprintf(modestr, sizeof(modestr), "%dx%d-%d", xres, yres, bpp);
++ fb_find_mode(&info->var, info, modestr, NULL, 0, NULL, bpp);
++
+ /* This has to be done !!! */
+ if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0))
+ goto out_err1;
+@@ -1317,11 +1386,11 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+ goto out_err3;
+ }
+
++ /* save for primary gfx device detection & unregister_framebuffer() */
++ sti->info = info;
+ if (register_framebuffer(&fb->info) < 0)
+ goto out_err4;
+
+- sti->info = info; /* save for unregister_framebuffer() */
+-
+ fb_info(&fb->info, "%s %dx%d-%d frame buffer device, %s, id: %04x, mmio: 0x%04lx\n",
+ fix->id,
+ var->xres,
+@@ -1344,6 +1413,7 @@ out_err1:
+ iounmap(info->screen_base);
+ out_err0:
+ kfree(fb);
++ sti->info = NULL;
+ return -ENXIO;
+ }
+
+diff --git a/drivers/video/fbdev/tgafb.c b/drivers/video/fbdev/tgafb.c
+index ae0cf55406369..b9b00a1ffe222 100644
+--- a/drivers/video/fbdev/tgafb.c
++++ b/drivers/video/fbdev/tgafb.c
+@@ -166,6 +166,9 @@ tgafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+ {
+ struct tga_par *par = (struct tga_par *)info->par;
+
++ if (!var->pixclock)
++ return -EINVAL;
++
+ if (par->tga_type == TGA_TYPE_8PLANE) {
+ if (var->bits_per_pixel != 8)
+ return -EINVAL;
+diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
+index b9cdd02c10009..0de7b867714a7 100644
+--- a/drivers/video/fbdev/udlfb.c
++++ b/drivers/video/fbdev/udlfb.c
+@@ -27,6 +27,8 @@
+ #include <video/udlfb.h>
+ #include "edid.h"
+
++#define OUT_EP_NUM 1 /* The endpoint number we will use */
++
+ static const struct fb_fix_screeninfo dlfb_fix = {
+ .id = "udlfb",
+ .type = FB_TYPE_PACKED_PIXELS,
+@@ -1426,7 +1428,7 @@ static ssize_t metrics_bytes_rendered_show(struct device *fbdev,
+ struct device_attribute *a, char *buf) {
+ struct fb_info *fb_info = dev_get_drvdata(fbdev);
+ struct dlfb_data *dlfb = fb_info->par;
+- return snprintf(buf, PAGE_SIZE, "%u\n",
++ return sysfs_emit(buf, "%u\n",
+ atomic_read(&dlfb->bytes_rendered));
+ }
+
+@@ -1434,7 +1436,7 @@ static ssize_t metrics_bytes_identical_show(struct device *fbdev,
+ struct device_attribute *a, char *buf) {
+ struct fb_info *fb_info = dev_get_drvdata(fbdev);
+ struct dlfb_data *dlfb = fb_info->par;
+- return snprintf(buf, PAGE_SIZE, "%u\n",
++ return sysfs_emit(buf, "%u\n",
+ atomic_read(&dlfb->bytes_identical));
+ }
+
+@@ -1442,7 +1444,7 @@ static ssize_t metrics_bytes_sent_show(struct device *fbdev,
+ struct device_attribute *a, char *buf) {
+ struct fb_info *fb_info = dev_get_drvdata(fbdev);
+ struct dlfb_data *dlfb = fb_info->par;
+- return snprintf(buf, PAGE_SIZE, "%u\n",
++ return sysfs_emit(buf, "%u\n",
+ atomic_read(&dlfb->bytes_sent));
+ }
+
+@@ -1450,7 +1452,7 @@ static ssize_t metrics_cpu_kcycles_used_show(struct device *fbdev,
+ struct device_attribute *a, char *buf) {
+ struct fb_info *fb_info = dev_get_drvdata(fbdev);
+ struct dlfb_data *dlfb = fb_info->par;
+- return snprintf(buf, PAGE_SIZE, "%u\n",
++ return sysfs_emit(buf, "%u\n",
+ atomic_read(&dlfb->cpu_kcycles_used));
+ }
+
+@@ -1649,8 +1651,9 @@ static int dlfb_usb_probe(struct usb_interface *intf,
+ const struct device_attribute *attr;
+ struct dlfb_data *dlfb;
+ struct fb_info *info;
+- int retval = -ENOMEM;
++ int retval;
+ struct usb_device *usbdev = interface_to_usbdev(intf);
++ static u8 out_ep[] = {OUT_EP_NUM + USB_DIR_OUT, 0};
+
+ /* usb initialization */
+ dlfb = kzalloc(sizeof(*dlfb), GFP_KERNEL);
+@@ -1664,6 +1667,12 @@ static int dlfb_usb_probe(struct usb_interface *intf,
+ dlfb->udev = usb_get_dev(usbdev);
+ usb_set_intfdata(intf, dlfb);
+
++ if (!usb_check_bulk_endpoints(intf, out_ep)) {
++ dev_err(&intf->dev, "Invalid DisplayLink device!\n");
++ retval = -EINVAL;
++ goto error;
++ }
++
+ dev_dbg(&intf->dev, "console enable=%d\n", console);
+ dev_dbg(&intf->dev, "fb_defio enable=%d\n", fb_defio);
+ dev_dbg(&intf->dev, "shadow enable=%d\n", shadow);
+@@ -1673,6 +1682,7 @@ static int dlfb_usb_probe(struct usb_interface *intf,
+ if (!dlfb_parse_vendor_descriptor(dlfb, intf)) {
+ dev_err(&intf->dev,
+ "firmware not recognized, incompatible device?\n");
++ retval = -ENODEV;
+ goto error;
+ }
+
+@@ -1686,8 +1696,10 @@ static int dlfb_usb_probe(struct usb_interface *intf,
+
+ /* allocates framebuffer driver structure, not framebuffer memory */
+ info = framebuffer_alloc(0, &dlfb->udev->dev);
+- if (!info)
++ if (!info) {
++ retval = -ENOMEM;
+ goto error;
++ }
+
+ dlfb->info = info;
+ info->par = dlfb;
+@@ -1916,7 +1928,8 @@ retry:
+ }
+
+ /* urb->transfer_buffer_length set to actual before submit */
+- usb_fill_bulk_urb(urb, dlfb->udev, usb_sndbulkpipe(dlfb->udev, 1),
++ usb_fill_bulk_urb(urb, dlfb->udev,
++ usb_sndbulkpipe(dlfb->udev, OUT_EP_NUM),
+ buf, size, dlfb_urb_completion, unode);
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
+index 4df6772802d78..1f3b7e013568c 100644
+--- a/drivers/video/fbdev/uvesafb.c
++++ b/drivers/video/fbdev/uvesafb.c
+@@ -1758,6 +1758,7 @@ static int uvesafb_probe(struct platform_device *dev)
+ out_unmap:
+ iounmap(info->screen_base);
+ out_mem:
++ arch_phys_wc_del(par->mtrr_handle);
+ release_mem_region(info->fix.smem_start, info->fix.smem_len);
+ out_reg:
+ release_region(0x3c0, 32);
+diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
+index ff61605b8764f..a543643ce014d 100644
+--- a/drivers/video/fbdev/vermilion/vermilion.c
++++ b/drivers/video/fbdev/vermilion/vermilion.c
+@@ -277,8 +277,10 @@ static int vmlfb_get_gpu(struct vml_par *par)
+
+ mutex_unlock(&vml_mutex);
+
+- if (pci_enable_device(par->gpu) < 0)
++ if (pci_enable_device(par->gpu) < 0) {
++ pci_dev_put(par->gpu);
+ return -ENODEV;
++ }
+
+ return 0;
+ }
+diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c
+index df6de5a9dd4cd..929d4775cb4bc 100644
+--- a/drivers/video/fbdev/vesafb.c
++++ b/drivers/video/fbdev/vesafb.c
+@@ -179,6 +179,10 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
+ return err;
+ }
+
++/*
++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
++ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
++ */
+ static void vesafb_destroy(struct fb_info *info)
+ {
+ struct vesafb_par *par = info->par;
+@@ -188,6 +192,8 @@ static void vesafb_destroy(struct fb_info *info)
+ if (info->screen_base)
+ iounmap(info->screen_base);
+ release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
++
++ framebuffer_release(info);
+ }
+
+ static struct fb_ops vesafb_ops = {
+@@ -484,10 +490,11 @@ static int vesafb_remove(struct platform_device *pdev)
+ {
+ struct fb_info *info = platform_get_drvdata(pdev);
+
+- unregister_framebuffer(info);
+ if (((struct vesafb_par *)(info->par))->region)
+ release_region(0x3c0, 32);
+- framebuffer_release(info);
++
++ /* vesafb_destroy takes care of info cleanup */
++ unregister_framebuffer(info);
+
+ return 0;
+ }
+diff --git a/drivers/video/fbdev/vga16fb.c b/drivers/video/fbdev/vga16fb.c
+index e2757ff1c23d2..96e312a3eac75 100644
+--- a/drivers/video/fbdev/vga16fb.c
++++ b/drivers/video/fbdev/vga16fb.c
+@@ -184,6 +184,25 @@ static inline void setindex(int index)
+ vga_io_w(VGA_GFX_I, index);
+ }
+
++/* Check if the video mode is supported by the driver */
++static inline int check_mode_supported(void)
++{
++ /* non-x86 architectures treat orig_video_isVGA as a boolean flag */
++#if defined(CONFIG_X86)
++ /* only EGA and VGA in 16 color graphic mode are supported */
++ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EGAC &&
++ screen_info.orig_video_isVGA != VIDEO_TYPE_VGAC)
++ return -ENODEV;
++
++ if (screen_info.orig_video_mode != 0x0D && /* 320x200/4 (EGA) */
++ screen_info.orig_video_mode != 0x0E && /* 640x200/4 (EGA) */
++ screen_info.orig_video_mode != 0x10 && /* 640x350/4 (EGA) */
++ screen_info.orig_video_mode != 0x12) /* 640x480/4 (VGA) */
++ return -ENODEV;
++#endif
++ return 0;
++}
++
+ static void vga16fb_pan_var(struct fb_info *info,
+ struct fb_var_screeninfo *var)
+ {
+@@ -1422,6 +1441,11 @@ static int __init vga16fb_init(void)
+
+ vga16fb_setup(option);
+ #endif
++
++ ret = check_mode_supported();
++ if (ret)
++ return ret;
++
+ ret = platform_driver_register(&vga16fb_driver);
+
+ if (!ret) {
+diff --git a/drivers/video/fbdev/via/via-core.c b/drivers/video/fbdev/via/via-core.c
+index 89d75079b7307..0363b478fa3ef 100644
+--- a/drivers/video/fbdev/via/via-core.c
++++ b/drivers/video/fbdev/via/via-core.c
+@@ -725,7 +725,14 @@ static int __init via_core_init(void)
+ return ret;
+ viafb_i2c_init();
+ viafb_gpio_init();
+- return pci_register_driver(&via_driver);
++ ret = pci_register_driver(&via_driver);
++ if (ret) {
++ viafb_gpio_exit();
++ viafb_i2c_exit();
++ return ret;
++ }
++
++ return 0;
+ }
+
+ static void __exit via_core_exit(void)
+diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c
+index 7a959e5ba90b8..c274ec5e965ca 100644
+--- a/drivers/video/fbdev/vt8623fb.c
++++ b/drivers/video/fbdev/vt8623fb.c
+@@ -504,6 +504,8 @@ static int vt8623fb_set_par(struct fb_info *info)
+ (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1, 1,
+ 1, info->node);
+
++ if (screen_size > info->screen_size)
++ screen_size = info->screen_size;
+ memset_io(info->screen_base, 0x00, screen_size);
+
+ /* Device and screen back on */
+diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c
+index d96ab28f8ce4a..4e641a780726e 100644
+--- a/drivers/video/fbdev/w100fb.c
++++ b/drivers/video/fbdev/w100fb.c
+@@ -770,12 +770,18 @@ out:
+ fb_dealloc_cmap(&info->cmap);
+ kfree(info->pseudo_palette);
+ }
+- if (remapped_fbuf != NULL)
++ if (remapped_fbuf != NULL) {
+ iounmap(remapped_fbuf);
+- if (remapped_regs != NULL)
++ remapped_fbuf = NULL;
++ }
++ if (remapped_regs != NULL) {
+ iounmap(remapped_regs);
+- if (remapped_base != NULL)
++ remapped_regs = NULL;
++ }
++ if (remapped_base != NULL) {
+ iounmap(remapped_base);
++ remapped_base = NULL;
++ }
+ if (info)
+ framebuffer_release(info);
+ return err;
+@@ -795,8 +801,11 @@ static int w100fb_remove(struct platform_device *pdev)
+ fb_dealloc_cmap(&info->cmap);
+
+ iounmap(remapped_base);
++ remapped_base = NULL;
+ iounmap(remapped_regs);
++ remapped_regs = NULL;
+ iounmap(remapped_fbuf);
++ remapped_fbuf = NULL;
+
+ framebuffer_release(info);
+
+diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c
+index 130e12b8652a6..af889cee66805 100644
+--- a/drivers/virt/acrn/hsm.c
++++ b/drivers/virt/acrn/hsm.c
+@@ -134,8 +134,10 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
+ if (IS_ERR(vm_param))
+ return PTR_ERR(vm_param);
+
+- if ((vm_param->reserved0 | vm_param->reserved1) != 0)
++ if ((vm_param->reserved0 | vm_param->reserved1) != 0) {
++ kfree(vm_param);
+ return -EINVAL;
++ }
+
+ vm = acrn_vm_create(vm, vm_param);
+ if (!vm) {
+@@ -180,21 +182,29 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
+ return PTR_ERR(cpu_regs);
+
+ for (i = 0; i < ARRAY_SIZE(cpu_regs->reserved); i++)
+- if (cpu_regs->reserved[i])
++ if (cpu_regs->reserved[i]) {
++ kfree(cpu_regs);
+ return -EINVAL;
++ }
+
+ for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_32); i++)
+- if (cpu_regs->vcpu_regs.reserved_32[i])
++ if (cpu_regs->vcpu_regs.reserved_32[i]) {
++ kfree(cpu_regs);
+ return -EINVAL;
++ }
+
+ for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_64); i++)
+- if (cpu_regs->vcpu_regs.reserved_64[i])
++ if (cpu_regs->vcpu_regs.reserved_64[i]) {
++ kfree(cpu_regs);
+ return -EINVAL;
++ }
+
+ for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.gdt.reserved); i++)
+ if (cpu_regs->vcpu_regs.gdt.reserved[i] |
+- cpu_regs->vcpu_regs.idt.reserved[i])
++ cpu_regs->vcpu_regs.idt.reserved[i]) {
++ kfree(cpu_regs);
+ return -EINVAL;
++ }
+
+ ret = hcall_set_vcpu_regs(vm->vmid, virt_to_phys(cpu_regs));
+ if (ret < 0)
+diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c
+index c4f2e15c8a2ba..3b1b1e7a844b4 100644
+--- a/drivers/virt/acrn/mm.c
++++ b/drivers/virt/acrn/mm.c
+@@ -162,10 +162,34 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
+ void *remap_vaddr;
+ int ret, pinned;
+ u64 user_vm_pa;
++ unsigned long pfn;
++ struct vm_area_struct *vma;
+
+ if (!vm || !memmap)
+ return -EINVAL;
+
++ mmap_read_lock(current->mm);
++ vma = vma_lookup(current->mm, memmap->vma_base);
++ if (vma && ((vma->vm_flags & VM_PFNMAP) != 0)) {
++ if ((memmap->vma_base + memmap->len) > vma->vm_end) {
++ mmap_read_unlock(current->mm);
++ return -EINVAL;
++ }
++
++ ret = follow_pfn(vma, memmap->vma_base, &pfn);
++ mmap_read_unlock(current->mm);
++ if (ret < 0) {
++ dev_dbg(acrn_dev.this_device,
++ "Failed to lookup PFN at VMA:%pK.\n", (void *)memmap->vma_base);
++ return ret;
++ }
++
++ return acrn_mm_region_add(vm, memmap->user_vm_pa,
++ PFN_PHYS(pfn), memmap->len,
++ ACRN_MEM_TYPE_WB, memmap->attr);
++ }
++ mmap_read_unlock(current->mm);
++
+ /* Get the page number of the map region */
+ nr_pages = memmap->len >> PAGE_SHIFT;
+ pages = vzalloc(nr_pages * sizeof(struct page *));
+diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c
+index e21e1e86ad15f..fe7a8e4034097 100644
+--- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
++++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
+@@ -886,8 +886,9 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
+ goto put_pages;
+ }
+
+- gup_rc = get_user_pages(mem_region.userspace_addr + memory_size, 1, FOLL_GET,
+- ne_mem_region->pages + i, NULL);
++ gup_rc = get_user_pages_unlocked(mem_region.userspace_addr + memory_size, 1,
++ ne_mem_region->pages + i, FOLL_GET);
++
+ if (gup_rc < 0) {
+ rc = gup_rc;
+
+diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c
+index 73eb34849eaba..4ccfd30c2a304 100644
+--- a/drivers/virt/vboxguest/vboxguest_linux.c
++++ b/drivers/virt/vboxguest/vboxguest_linux.c
+@@ -356,8 +356,8 @@ static int vbg_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+ goto err_vbg_core_exit;
+ }
+
+- ret = devm_request_irq(dev, pci->irq, vbg_core_isr, IRQF_SHARED,
+- DEVICE_NAME, gdev);
++ ret = request_irq(pci->irq, vbg_core_isr, IRQF_SHARED, DEVICE_NAME,
++ gdev);
+ if (ret) {
+ vbg_err("vboxguest: Error requesting irq: %d\n", ret);
+ goto err_vbg_core_exit;
+@@ -367,7 +367,7 @@ static int vbg_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+ if (ret) {
+ vbg_err("vboxguest: Error misc_register %s failed: %d\n",
+ DEVICE_NAME, ret);
+- goto err_vbg_core_exit;
++ goto err_free_irq;
+ }
+
+ ret = misc_register(&gdev->misc_device_user);
+@@ -403,6 +403,8 @@ err_unregister_misc_device_user:
+ misc_deregister(&gdev->misc_device_user);
+ err_unregister_misc_device:
+ misc_deregister(&gdev->misc_device);
++err_free_irq:
++ free_irq(pci->irq, gdev);
+ err_vbg_core_exit:
+ vbg_core_exit(gdev);
+ err_disable_pcidev:
+@@ -419,6 +421,7 @@ static void vbg_pci_remove(struct pci_dev *pci)
+ vbg_gdev = NULL;
+ mutex_unlock(&vbg_gdev_mutex);
+
++ free_irq(pci->irq, gdev);
+ device_remove_file(gdev->dev, &dev_attr_host_features);
+ device_remove_file(gdev->dev, &dev_attr_host_version);
+ misc_deregister(&gdev->misc_device_user);
+diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
+index 236081afe9a2a..c2b733ef95b0d 100644
+--- a/drivers/virtio/virtio.c
++++ b/drivers/virtio/virtio.c
+@@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
+ }
+ EXPORT_SYMBOL_GPL(virtio_add_status);
+
+-int virtio_finalize_features(struct virtio_device *dev)
++/* Do some validation, then set FEATURES_OK */
++static int virtio_features_ok(struct virtio_device *dev)
+ {
+- int ret = dev->config->finalize_features(dev);
+ unsigned status;
++ int ret;
+
+ might_sleep();
+- if (ret)
+- return ret;
+
+ ret = arch_has_restricted_virtio_memory_access();
+ if (ret) {
+@@ -202,7 +201,6 @@ int virtio_finalize_features(struct virtio_device *dev)
+ }
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(virtio_finalize_features);
+
+ static int virtio_dev_probe(struct device *_d)
+ {
+@@ -239,17 +237,6 @@ static int virtio_dev_probe(struct device *_d)
+ driver_features_legacy = driver_features;
+ }
+
+- /*
+- * Some devices detect legacy solely via F_VERSION_1. Write
+- * F_VERSION_1 to force LE config space accesses before FEATURES_OK for
+- * these when needed.
+- */
+- if (drv->validate && !virtio_legacy_is_little_endian()
+- && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) {
+- dev->features = BIT_ULL(VIRTIO_F_VERSION_1);
+- dev->config->finalize_features(dev);
+- }
+-
+ if (device_features & (1ULL << VIRTIO_F_VERSION_1))
+ dev->features = driver_features & device_features;
+ else
+@@ -260,13 +247,26 @@ static int virtio_dev_probe(struct device *_d)
+ if (device_features & (1ULL << i))
+ __virtio_set_bit(dev, i);
+
++ err = dev->config->finalize_features(dev);
++ if (err)
++ goto err;
++
+ if (drv->validate) {
++ u64 features = dev->features;
++
+ err = drv->validate(dev);
+ if (err)
+ goto err;
++
++ /* Did validation change any features? Then write them again. */
++ if (features != dev->features) {
++ err = dev->config->finalize_features(dev);
++ if (err)
++ goto err;
++ }
+ }
+
+- err = virtio_finalize_features(dev);
++ err = virtio_features_ok(dev);
+ if (err)
+ goto err;
+
+@@ -490,7 +490,11 @@ int virtio_device_restore(struct virtio_device *dev)
+ /* We have a driver! */
+ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+
+- ret = virtio_finalize_features(dev);
++ ret = dev->config->finalize_features(dev);
++ if (ret)
++ goto err;
++
++ ret = virtio_features_ok(dev);
+ if (ret)
+ goto err;
+
+diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
+index bef8ad6bf4661..4624a2c3d0553 100644
+--- a/drivers/virtio/virtio_mem.c
++++ b/drivers/virtio/virtio_mem.c
+@@ -577,7 +577,7 @@ static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm)
+ return -ENOMEM;
+
+ mutex_lock(&vm->hotplug_mutex);
+- if (new_bitmap)
++ if (vm->sbm.sb_states)
+ memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE);
+
+ old_bitmap = vm->sbm.sb_states;
+diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
+index 56128b9c46eba..f4d43d60d710f 100644
+--- a/drivers/virtio/virtio_mmio.c
++++ b/drivers/virtio/virtio_mmio.c
+@@ -62,6 +62,7 @@
+ #include <linux/list.h>
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
++#include <linux/pm.h>
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
+ #include <linux/virtio.h>
+@@ -543,15 +544,36 @@ static const struct virtio_config_ops virtio_mmio_config_ops = {
+ .get_shm_region = vm_get_shm_region,
+ };
+
++#ifdef CONFIG_PM_SLEEP
++static int virtio_mmio_freeze(struct device *dev)
++{
++ struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev);
++
++ return virtio_device_freeze(&vm_dev->vdev);
++}
++
++static int virtio_mmio_restore(struct device *dev)
++{
++ struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev);
++
++ if (vm_dev->version == 1)
++ writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
++
++ return virtio_device_restore(&vm_dev->vdev);
++}
++
++static const struct dev_pm_ops virtio_mmio_pm_ops = {
++ SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore)
++};
++#endif
+
+ static void virtio_mmio_release_dev(struct device *_d)
+ {
+ struct virtio_device *vdev =
+ container_of(_d, struct virtio_device, dev);
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+- struct platform_device *pdev = vm_dev->pdev;
+
+- devm_kfree(&pdev->dev, vm_dev);
++ kfree(vm_dev);
+ }
+
+ /* Platform device */
+@@ -562,7 +584,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
+ unsigned long magic;
+ int rc;
+
+- vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
++ vm_dev = kzalloc(sizeof(*vm_dev), GFP_KERNEL);
+ if (!vm_dev)
+ return -ENOMEM;
+
+@@ -688,6 +710,7 @@ static int vm_cmdline_set(const char *device,
+ if (!vm_cmdline_parent_registered) {
+ err = device_register(&vm_cmdline_parent);
+ if (err) {
++ put_device(&vm_cmdline_parent);
+ pr_err("Failed to register parent device!\n");
+ return err;
+ }
+@@ -785,6 +808,9 @@ static struct platform_driver virtio_mmio_driver = {
+ .name = "virtio-mmio",
+ .of_match_table = virtio_mmio_match,
+ .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match),
++#ifdef CONFIG_PM_SLEEP
++ .pm = &virtio_mmio_pm_ops,
++#endif
+ },
+ };
+
+diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
+index b35bb2d57f62c..1e890ef176873 100644
+--- a/drivers/virtio/virtio_pci_common.c
++++ b/drivers/virtio/virtio_pci_common.c
+@@ -254,8 +254,7 @@ void vp_del_vqs(struct virtio_device *vdev)
+
+ if (vp_dev->msix_affinity_masks) {
+ for (i = 0; i < vp_dev->msix_vectors; i++)
+- if (vp_dev->msix_affinity_masks[i])
+- free_cpumask_var(vp_dev->msix_affinity_masks[i]);
++ free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+ }
+
+ if (vp_dev->msix_enabled) {
+diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
+index 30654d3a0b41e..a274261f36d63 100644
+--- a/drivers/virtio/virtio_pci_modern.c
++++ b/drivers/virtio/virtio_pci_modern.c
+@@ -196,7 +196,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+ int err;
+
+ if (index >= vp_modern_get_num_queues(mdev))
+- return ERR_PTR(-ENOENT);
++ return ERR_PTR(-EINVAL);
+
+ /* Check if queue is either not available or already active. */
+ num = vp_modern_get_queue_size(mdev, index);
+diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
+index e11ed748e6613..9ab66e44738ee 100644
+--- a/drivers/virtio/virtio_pci_modern_dev.c
++++ b/drivers/virtio/virtio_pci_modern_dev.c
+@@ -340,6 +340,7 @@ err_map_notify:
+ err_map_isr:
+ pci_iounmap(pci_dev, mdev->common);
+ err_map_common:
++ pci_release_selected_regions(pci_dev, mdev->modern_bars);
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(vp_modern_probe);
+diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
+index 3035bb6f54585..067b68168f93e 100644
+--- a/drivers/virtio/virtio_ring.c
++++ b/drivers/virtio/virtio_ring.c
+@@ -268,7 +268,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev)
+ size_t max_segment_size = SIZE_MAX;
+
+ if (vring_use_dma_api(vdev))
+- max_segment_size = dma_max_mapping_size(&vdev->dev);
++ max_segment_size = dma_max_mapping_size(vdev->dev.parent);
+
+ return max_segment_size;
+ }
+@@ -809,6 +809,14 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq)
+
+ if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+ vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
++
++ /*
++ * If device triggered an event already it won't trigger one again:
++ * no need to disable.
++ */
++ if (vq->event_triggered)
++ return;
++
+ if (vq->event)
+ /* TODO: this is a hack. Figure out a cleaner value to write. */
+ vring_used_event(&vq->split.vring) = 0x0;
+@@ -1065,6 +1073,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
+
+ head = vq->packed.next_avail_idx;
+ desc = alloc_indirect_packed(total_sg, gfp);
++ if (!desc)
++ return -ENOMEM;
+
+ if (unlikely(vq->vq.num_free < 1)) {
+ pr_debug("Can't add buf len 1 - avail = 0\n");
+@@ -1176,6 +1186,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
+ unsigned int i, n, c, descs_used, err_idx;
+ __le16 head_flags, flags;
+ u16 head, id, prev, curr, avail_used_flags;
++ int err;
+
+ START_USE(vq);
+
+@@ -1191,9 +1202,16 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
+
+ BUG_ON(total_sg == 0);
+
+- if (virtqueue_use_indirect(_vq, total_sg))
+- return virtqueue_add_indirect_packed(vq, sgs, total_sg,
+- out_sgs, in_sgs, data, gfp);
++ if (virtqueue_use_indirect(_vq, total_sg)) {
++ err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
++ in_sgs, data, gfp);
++ if (err != -ENOMEM) {
++ END_USE(vq);
++ return err;
++ }
++
++ /* fall back on direct */
++ }
+
+ head = vq->packed.next_avail_idx;
+ avail_used_flags = vq->packed.avail_used_flags;
+@@ -1253,7 +1271,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
+ }
+ }
+
+- if (i < head)
++ if (i <= head)
+ vq->packed.avail_wrap_counter ^= 1;
+
+ /* We're using some buffers from the free list. */
+@@ -1490,6 +1508,14 @@ static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
+
+ if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
+ vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
++
++ /*
++ * If device triggered an event already it won't trigger one again:
++ * no need to disable.
++ */
++ if (vq->event_triggered)
++ return;
++
+ vq->packed.vring.driver->flags =
+ cpu_to_le16(vq->packed.event_flags_shadow);
+ }
+@@ -2009,12 +2035,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
+ {
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+- /* If device triggered an event already it won't trigger one again:
+- * no need to disable.
+- */
+- if (vq->event_triggered)
+- return;
+-
+ if (vq->packed_ring)
+ virtqueue_disable_cb_packed(_vq);
+ else
+diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c
+index 6a1bc284f297c..eae78366eb028 100644
+--- a/drivers/vme/bridges/vme_fake.c
++++ b/drivers/vme/bridges/vme_fake.c
+@@ -1073,6 +1073,8 @@ static int __init fake_init(void)
+
+ /* We need a fake parent device */
+ vme_root = __root_device_register("vme", THIS_MODULE);
++ if (IS_ERR(vme_root))
++ return PTR_ERR(vme_root);
+
+ /* If we want to support more than one bridge at some point, we need to
+ * dynamically allocate this so we get one per device.
+diff --git a/drivers/vme/bridges/vme_tsi148.c b/drivers/vme/bridges/vme_tsi148.c
+index be9051b02f24c..5b4c766d15e69 100644
+--- a/drivers/vme/bridges/vme_tsi148.c
++++ b/drivers/vme/bridges/vme_tsi148.c
+@@ -1765,6 +1765,7 @@ static int tsi148_dma_list_add(struct vme_dma_list *list,
+ return 0;
+
+ err_dma:
++ list_del(&entry->list);
+ err_dest:
+ err_source:
+ err_align:
+diff --git a/drivers/w1/slaves/w1_ds28e04.c b/drivers/w1/slaves/w1_ds28e04.c
+index e4f336111edc6..6cef6e2edb892 100644
+--- a/drivers/w1/slaves/w1_ds28e04.c
++++ b/drivers/w1/slaves/w1_ds28e04.c
+@@ -32,7 +32,7 @@ static int w1_strong_pullup = 1;
+ module_param_named(strong_pullup, w1_strong_pullup, int, 0);
+
+ /* enable/disable CRC checking on DS28E04-100 memory accesses */
+-static char w1_enable_crccheck = 1;
++static bool w1_enable_crccheck = true;
+
+ #define W1_EEPROM_SIZE 512
+ #define W1_PAGE_COUNT 16
+@@ -339,32 +339,18 @@ static BIN_ATTR_RW(pio, 1);
+ static ssize_t crccheck_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+ {
+- if (put_user(w1_enable_crccheck + 0x30, buf))
+- return -EFAULT;
+-
+- return sizeof(w1_enable_crccheck);
++ return sysfs_emit(buf, "%d\n", w1_enable_crccheck);
+ }
+
+ static ssize_t crccheck_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+ {
+- char val;
+-
+- if (count != 1 || !buf)
+- return -EINVAL;
++ int err = kstrtobool(buf, &w1_enable_crccheck);
+
+- if (get_user(val, buf))
+- return -EFAULT;
++ if (err)
++ return err;
+
+- /* convert to decimal */
+- val = val - 0x30;
+- if (val != 0 && val != 1)
+- return -EINVAL;
+-
+- /* set the new value */
+- w1_enable_crccheck = val;
+-
+- return sizeof(w1_enable_crccheck);
++ return count;
+ }
+
+ static DEVICE_ATTR_RW(crccheck);
+diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
+index ca70c5f032060..67d1cfbbb5f7f 100644
+--- a/drivers/w1/slaves/w1_therm.c
++++ b/drivers/w1/slaves/w1_therm.c
+@@ -1093,29 +1093,26 @@ static int convert_t(struct w1_slave *sl, struct therm_info *info)
+
+ w1_write_8(dev_master, W1_CONVERT_TEMP);
+
+- if (strong_pullup) { /*some device need pullup */
++ if (SLAVE_FEATURES(sl) & W1_THERM_POLL_COMPLETION) {
++ ret = w1_poll_completion(dev_master, W1_POLL_CONVERT_TEMP);
++ if (ret) {
++ dev_dbg(&sl->dev, "%s: Timeout\n", __func__);
++ goto mt_unlock;
++ }
++ mutex_unlock(&dev_master->bus_mutex);
++ } else if (!strong_pullup) { /*no device need pullup */
+ sleep_rem = msleep_interruptible(t_conv);
+ if (sleep_rem != 0) {
+ ret = -EINTR;
+ goto mt_unlock;
+ }
+ mutex_unlock(&dev_master->bus_mutex);
+- } else { /*no device need pullup */
+- if (SLAVE_FEATURES(sl) & W1_THERM_POLL_COMPLETION) {
+- ret = w1_poll_completion(dev_master, W1_POLL_CONVERT_TEMP);
+- if (ret) {
+- dev_dbg(&sl->dev, "%s: Timeout\n", __func__);
+- goto mt_unlock;
+- }
+- mutex_unlock(&dev_master->bus_mutex);
+- } else {
+- /* Fixed delay */
+- mutex_unlock(&dev_master->bus_mutex);
+- sleep_rem = msleep_interruptible(t_conv);
+- if (sleep_rem != 0) {
+- ret = -EINTR;
+- goto dec_refcnt;
+- }
++ } else { /*some device need pullup */
++ mutex_unlock(&dev_master->bus_mutex);
++ sleep_rem = msleep_interruptible(t_conv);
++ if (sleep_rem != 0) {
++ ret = -EINTR;
++ goto dec_refcnt;
+ }
+ }
+ ret = read_scratchpad(sl, info);
+@@ -2090,16 +2087,20 @@ static ssize_t w1_seq_show(struct device *device,
+ if (sl->reg_num.id == reg_num->id)
+ seq = i;
+
++ if (w1_reset_bus(sl->master))
++ goto error;
++
++ /* Put the device into chain DONE state */
++ w1_write_8(sl->master, W1_MATCH_ROM);
++ w1_write_block(sl->master, (u8 *)&rn, 8);
+ w1_write_8(sl->master, W1_42_CHAIN);
+ w1_write_8(sl->master, W1_42_CHAIN_DONE);
+ w1_write_8(sl->master, W1_42_CHAIN_DONE_INV);
+- w1_read_block(sl->master, &ack, sizeof(ack));
+
+ /* check for acknowledgment */
+ ack = w1_read_8(sl->master);
+ if (ack != W1_42_SUCCESS_CONFIRM_BYTE)
+ goto error;
+-
+ }
+
+ /* Exit from CHAIN state */
+diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
+index f2ae2e563dc54..2eee26b7fc4a3 100644
+--- a/drivers/w1/w1.c
++++ b/drivers/w1/w1.c
+@@ -1166,6 +1166,8 @@ int w1_process(void *data)
+ /* remainder if it woke up early */
+ unsigned long jremain = 0;
+
++ atomic_inc(&dev->refcnt);
++
+ for (;;) {
+
+ if (!jremain && dev->search_count) {
+@@ -1193,8 +1195,10 @@ int w1_process(void *data)
+ */
+ mutex_unlock(&dev->list_mutex);
+
+- if (kthread_should_stop())
++ if (kthread_should_stop()) {
++ __set_current_state(TASK_RUNNING);
+ break;
++ }
+
+ /* Only sleep when the search is active. */
+ if (dev->search_count) {
+@@ -1259,10 +1263,10 @@ err_out_exit_init:
+
+ static void __exit w1_fini(void)
+ {
+- struct w1_master *dev;
++ struct w1_master *dev, *n;
+
+ /* Set netlink removal messages and some cleanup */
+- list_for_each_entry(dev, &w1_masters, w1_master_entry)
++ list_for_each_entry_safe(dev, n, &w1_masters, w1_master_entry)
+ __w1_remove_master_device(dev);
+
+ w1_fini_netlink();
+diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
+index b3e1792d9c49f..3a71c5eb2f837 100644
+--- a/drivers/w1/w1_int.c
++++ b/drivers/w1/w1_int.c
+@@ -51,10 +51,9 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl,
+ dev->search_count = w1_search_count;
+ dev->enable_pullup = w1_enable_pullup;
+
+- /* 1 for w1_process to decrement
+- * 1 for __w1_remove_master_device to decrement
++ /* For __w1_remove_master_device to decrement
+ */
+- atomic_set(&dev->refcnt, 2);
++ atomic_set(&dev->refcnt, 1);
+
+ INIT_LIST_HEAD(&dev->slist);
+ INIT_LIST_HEAD(&dev->async_list);
+diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
+index bf59faeb3de1b..d937f957f8df8 100644
+--- a/drivers/watchdog/Kconfig
++++ b/drivers/watchdog/Kconfig
+@@ -1679,7 +1679,7 @@ config SIBYTE_WDOG
+
+ config AR7_WDT
+ tristate "TI AR7 Watchdog Timer"
+- depends on AR7 || (MIPS && COMPILE_TEST)
++ depends on AR7 || (MIPS && 32BIT && COMPILE_TEST)
+ help
+ Hardware driver for the TI AR7 Watchdog Timer.
+
+diff --git a/drivers/watchdog/armada_37xx_wdt.c b/drivers/watchdog/armada_37xx_wdt.c
+index 1635f421ef2c3..854b1cc723cb6 100644
+--- a/drivers/watchdog/armada_37xx_wdt.c
++++ b/drivers/watchdog/armada_37xx_wdt.c
+@@ -274,6 +274,8 @@ static int armada_37xx_wdt_probe(struct platform_device *pdev)
+ if (!res)
+ return -ENODEV;
+ dev->reg = devm_ioremap(&pdev->dev, res->start, resource_size(res));
++ if (!dev->reg)
++ return -ENOMEM;
+
+ /* init clock */
+ dev->clk = devm_clk_get(&pdev->dev, NULL);
+diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
+index 292b5a1ca8318..fed7be2464420 100644
+--- a/drivers/watchdog/at91sam9_wdt.c
++++ b/drivers/watchdog/at91sam9_wdt.c
+@@ -206,10 +206,9 @@ static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt)
+ "min heartbeat and max heartbeat might be too close for the system to handle it correctly\n");
+
+ if ((tmp & AT91_WDT_WDFIEN) && wdt->irq) {
+- err = request_irq(wdt->irq, wdt_interrupt,
+- IRQF_SHARED | IRQF_IRQPOLL |
+- IRQF_NO_SUSPEND,
+- pdev->name, wdt);
++ err = devm_request_irq(dev, wdt->irq, wdt_interrupt,
++ IRQF_SHARED | IRQF_IRQPOLL | IRQF_NO_SUSPEND,
++ pdev->name, wdt);
+ if (err)
+ return err;
+ }
+diff --git a/drivers/watchdog/diag288_wdt.c b/drivers/watchdog/diag288_wdt.c
+index 4cb10877017c7..6ca5d9515d85c 100644
+--- a/drivers/watchdog/diag288_wdt.c
++++ b/drivers/watchdog/diag288_wdt.c
+@@ -86,7 +86,7 @@ static int __diag288(unsigned int func, unsigned int timeout,
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (err) : "d"(__func), "d"(__timeout),
+- "d"(__action), "d"(__len) : "1", "cc");
++ "d"(__action), "d"(__len) : "1", "cc", "memory");
+ return err;
+ }
+
+@@ -268,12 +268,21 @@ static int __init diag288_init(void)
+ char ebc_begin[] = {
+ 194, 197, 199, 201, 213
+ };
++ char *ebc_cmd;
+
+ watchdog_set_nowayout(&wdt_dev, nowayout_info);
+
+ if (MACHINE_IS_VM) {
+- if (__diag288_vm(WDT_FUNC_INIT, 15,
+- ebc_begin, sizeof(ebc_begin)) != 0) {
++ ebc_cmd = kmalloc(sizeof(ebc_begin), GFP_KERNEL);
++ if (!ebc_cmd) {
++ pr_err("The watchdog cannot be initialized\n");
++ return -ENOMEM;
++ }
++ memcpy(ebc_cmd, ebc_begin, sizeof(ebc_begin));
++ ret = __diag288_vm(WDT_FUNC_INIT, 15,
++ ebc_cmd, sizeof(ebc_begin));
++ kfree(ebc_cmd);
++ if (ret != 0) {
+ pr_err("The watchdog cannot be initialized\n");
+ return -EINVAL;
+ }
+diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
+index cd578843277e5..498c1c403fc92 100644
+--- a/drivers/watchdog/dw_wdt.c
++++ b/drivers/watchdog/dw_wdt.c
+@@ -637,7 +637,7 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
+
+ ret = dw_wdt_init_timeouts(dw_wdt, dev);
+ if (ret)
+- goto out_disable_clk;
++ goto out_assert_rst;
+
+ wdd = &dw_wdt->wdd;
+ wdd->ops = &dw_wdt_ops;
+@@ -668,12 +668,15 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
+
+ ret = watchdog_register_device(wdd);
+ if (ret)
+- goto out_disable_pclk;
++ goto out_assert_rst;
+
+ dw_wdt_dbgfs_init(dw_wdt);
+
+ return 0;
+
++out_assert_rst:
++ reset_control_assert(dw_wdt->rst);
++
+ out_disable_pclk:
+ clk_disable_unprepare(dw_wdt->pclk);
+
+diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
+index f60beec1bbaea..f7d82d2619133 100644
+--- a/drivers/watchdog/f71808e_wdt.c
++++ b/drivers/watchdog/f71808e_wdt.c
+@@ -228,15 +228,17 @@ static int watchdog_set_timeout(int timeout)
+
+ mutex_lock(&watchdog.lock);
+
+- watchdog.timeout = timeout;
+ if (timeout > 0xff) {
+ watchdog.timer_val = DIV_ROUND_UP(timeout, 60);
+ watchdog.minutes_mode = true;
++ timeout = watchdog.timer_val * 60;
+ } else {
+ watchdog.timer_val = timeout;
+ watchdog.minutes_mode = false;
+ }
+
++ watchdog.timeout = timeout;
++
+ mutex_unlock(&watchdog.lock);
+
+ return 0;
+diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
+index 9b2173f765c8c..fb7fae750181b 100644
+--- a/drivers/watchdog/intel-mid_wdt.c
++++ b/drivers/watchdog/intel-mid_wdt.c
+@@ -203,3 +203,4 @@ module_platform_driver(mid_wdt_driver);
+ MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
+ MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS("platform:intel_mid_wdt");
+diff --git a/drivers/watchdog/menz69_wdt.c b/drivers/watchdog/menz69_wdt.c
+index 8973f98bc6a56..bca0938f3429f 100644
+--- a/drivers/watchdog/menz69_wdt.c
++++ b/drivers/watchdog/menz69_wdt.c
+@@ -98,14 +98,6 @@ static const struct watchdog_ops men_z069_ops = {
+ .set_timeout = men_z069_wdt_set_timeout,
+ };
+
+-static struct watchdog_device men_z069_wdt = {
+- .info = &men_z069_info,
+- .ops = &men_z069_ops,
+- .timeout = MEN_Z069_DEFAULT_TIMEOUT,
+- .min_timeout = 1,
+- .max_timeout = MEN_Z069_WDT_COUNTER_MAX / MEN_Z069_TIMER_FREQ,
+-};
+-
+ static int men_z069_probe(struct mcb_device *dev,
+ const struct mcb_device_id *id)
+ {
+@@ -125,15 +117,19 @@ static int men_z069_probe(struct mcb_device *dev,
+ goto release_mem;
+
+ drv->mem = mem;
++ drv->wdt.info = &men_z069_info;
++ drv->wdt.ops = &men_z069_ops;
++ drv->wdt.timeout = MEN_Z069_DEFAULT_TIMEOUT;
++ drv->wdt.min_timeout = 1;
++ drv->wdt.max_timeout = MEN_Z069_WDT_COUNTER_MAX / MEN_Z069_TIMER_FREQ;
+
+- drv->wdt = men_z069_wdt;
+ watchdog_init_timeout(&drv->wdt, 0, &dev->dev);
+ watchdog_set_nowayout(&drv->wdt, nowayout);
+ watchdog_set_drvdata(&drv->wdt, drv);
+ drv->wdt.parent = &dev->dev;
+ mcb_set_drvdata(dev, drv);
+
+- return watchdog_register_device(&men_z069_wdt);
++ return watchdog_register_device(&drv->wdt);
+
+ release_mem:
+ mcb_release_mem(mem);
+diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c
+index 1bdaf17c1d38d..8202f0a6b0935 100644
+--- a/drivers/watchdog/pcwd_usb.c
++++ b/drivers/watchdog/pcwd_usb.c
+@@ -325,7 +325,8 @@ static int usb_pcwd_set_heartbeat(struct usb_pcwd_private *usb_pcwd, int t)
+ static int usb_pcwd_get_temperature(struct usb_pcwd_private *usb_pcwd,
+ int *temperature)
+ {
+- unsigned char msb, lsb;
++ unsigned char msb = 0x00;
++ unsigned char lsb = 0x00;
+
+ usb_pcwd_send_command(usb_pcwd, CMD_READ_TEMP, &msb, &lsb);
+
+@@ -341,7 +342,8 @@ static int usb_pcwd_get_temperature(struct usb_pcwd_private *usb_pcwd,
+ static int usb_pcwd_get_timeleft(struct usb_pcwd_private *usb_pcwd,
+ int *time_left)
+ {
+- unsigned char msb, lsb;
++ unsigned char msb = 0x00;
++ unsigned char lsb = 0x00;
+
+ /* Read the time that's left before rebooting */
+ /* Note: if the board is not yet armed then we will read 0xFFFF */
+diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c
+index 359302f71f7ef..46c2a4bd9ebe9 100644
+--- a/drivers/watchdog/rti_wdt.c
++++ b/drivers/watchdog/rti_wdt.c
+@@ -227,8 +227,9 @@ static int rti_wdt_probe(struct platform_device *pdev)
+
+ pm_runtime_enable(dev);
+ ret = pm_runtime_get_sync(dev);
+- if (ret) {
++ if (ret < 0) {
+ pm_runtime_put_noidle(dev);
++ pm_runtime_disable(&pdev->dev);
+ return dev_err_probe(dev, ret, "runtime pm failed\n");
+ }
+
+diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c
+index 9791c74aebd48..63862803421f1 100644
+--- a/drivers/watchdog/sbsa_gwdt.c
++++ b/drivers/watchdog/sbsa_gwdt.c
+@@ -150,6 +150,7 @@ static int sbsa_gwdt_set_timeout(struct watchdog_device *wdd,
+ struct sbsa_gwdt *gwdt = watchdog_get_drvdata(wdd);
+
+ wdd->timeout = timeout;
++ timeout = clamp_t(unsigned int, timeout, 1, wdd->max_hw_heartbeat_ms / 1000);
+
+ if (action)
+ sbsa_gwdt_reg_write(gwdt->clk * timeout, gwdt);
+diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c
+index a730ecbf78cd5..0141858188c56 100644
+--- a/drivers/watchdog/sp5100_tco.c
++++ b/drivers/watchdog/sp5100_tco.c
+@@ -48,7 +48,7 @@
+ /* internal variables */
+
+ enum tco_reg_layout {
+- sp5100, sb800, efch
++ sp5100, sb800, efch, efch_mmio
+ };
+
+ struct sp5100_tco {
+@@ -86,6 +86,10 @@ static enum tco_reg_layout tco_reg_layout(struct pci_dev *dev)
+ dev->revision < 0x40) {
+ return sp5100;
+ } else if (dev->vendor == PCI_VENDOR_ID_AMD &&
++ sp5100_tco_pci->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS &&
++ sp5100_tco_pci->revision >= AMD_ZEN_SMBUS_PCI_REV) {
++ return efch_mmio;
++ } else if ((dev->vendor == PCI_VENDOR_ID_AMD || dev->vendor == PCI_VENDOR_ID_HYGON) &&
+ ((dev->device == PCI_DEVICE_ID_AMD_HUDSON2_SMBUS &&
+ dev->revision >= 0x41) ||
+ (dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS &&
+@@ -104,6 +108,10 @@ static int tco_timer_start(struct watchdog_device *wdd)
+ val |= SP5100_WDT_START_STOP_BIT;
+ writel(val, SP5100_WDT_CONTROL(tco->tcobase));
+
++ /* This must be a distinct write. */
++ val |= SP5100_WDT_TRIGGER_BIT;
++ writel(val, SP5100_WDT_CONTROL(tco->tcobase));
++
+ return 0;
+ }
+
+@@ -201,6 +209,8 @@ static void tco_timer_enable(struct sp5100_tco *tco)
+ ~EFCH_PM_WATCHDOG_DISABLE,
+ EFCH_PM_DECODEEN_SECOND_RES);
+ break;
++ default:
++ break;
+ }
+ }
+
+@@ -215,14 +225,196 @@ static u32 sp5100_tco_read_pm_reg32(u8 index)
+ return val;
+ }
+
++static u32 sp5100_tco_request_region(struct device *dev,
++ u32 mmio_addr,
++ const char *dev_name)
++{
++ if (!devm_request_mem_region(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE,
++ dev_name)) {
++ dev_dbg(dev, "MMIO address 0x%08x already in use\n", mmio_addr);
++ return 0;
++ }
++
++ return mmio_addr;
++}
++
++static u32 sp5100_tco_prepare_base(struct sp5100_tco *tco,
++ u32 mmio_addr,
++ u32 alt_mmio_addr,
++ const char *dev_name)
++{
++ struct device *dev = tco->wdd.parent;
++
++ dev_dbg(dev, "Got 0x%08x from SBResource_MMIO register\n", mmio_addr);
++
++ if (!mmio_addr && !alt_mmio_addr)
++ return -ENODEV;
++
++ /* Check for MMIO address and alternate MMIO address conflicts */
++ if (mmio_addr)
++ mmio_addr = sp5100_tco_request_region(dev, mmio_addr, dev_name);
++
++ if (!mmio_addr && alt_mmio_addr)
++ mmio_addr = sp5100_tco_request_region(dev, alt_mmio_addr, dev_name);
++
++ if (!mmio_addr) {
++ dev_err(dev, "Failed to reserve MMIO or alternate MMIO region\n");
++ return -EBUSY;
++ }
++
++ tco->tcobase = devm_ioremap(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE);
++ if (!tco->tcobase) {
++ dev_err(dev, "MMIO address 0x%08x failed mapping\n", mmio_addr);
++ devm_release_mem_region(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE);
++ return -ENOMEM;
++ }
++
++ dev_info(dev, "Using 0x%08x for watchdog MMIO address\n", mmio_addr);
++
++ return 0;
++}
++
++static int sp5100_tco_timer_init(struct sp5100_tco *tco)
++{
++ struct watchdog_device *wdd = &tco->wdd;
++ struct device *dev = wdd->parent;
++ u32 val;
++
++ val = readl(SP5100_WDT_CONTROL(tco->tcobase));
++ if (val & SP5100_WDT_DISABLED) {
++ dev_err(dev, "Watchdog hardware is disabled\n");
++ return -ENODEV;
++ }
++
++ /*
++ * Save WatchDogFired status, because WatchDogFired flag is
++ * cleared here.
++ */
++ if (val & SP5100_WDT_FIRED)
++ wdd->bootstatus = WDIOF_CARDRESET;
++
++ /* Set watchdog action to reset the system */
++ val &= ~SP5100_WDT_ACTION_RESET;
++ writel(val, SP5100_WDT_CONTROL(tco->tcobase));
++
++ /* Set a reasonable heartbeat before we stop the timer */
++ tco_timer_set_timeout(wdd, wdd->timeout);
++
++ /*
++ * Stop the TCO before we change anything so we don't race with
++ * a zeroed timer.
++ */
++ tco_timer_stop(wdd);
++
++ return 0;
++}
++
++static u8 efch_read_pm_reg8(void __iomem *addr, u8 index)
++{
++ return readb(addr + index);
++}
++
++static void efch_update_pm_reg8(void __iomem *addr, u8 index, u8 reset, u8 set)
++{
++ u8 val;
++
++ val = readb(addr + index);
++ val &= reset;
++ val |= set;
++ writeb(val, addr + index);
++}
++
++static void tco_timer_enable_mmio(void __iomem *addr)
++{
++ efch_update_pm_reg8(addr, EFCH_PM_DECODEEN3,
++ ~EFCH_PM_WATCHDOG_DISABLE,
++ EFCH_PM_DECODEEN_SECOND_RES);
++}
++
++static int sp5100_tco_setupdevice_mmio(struct device *dev,
++ struct watchdog_device *wdd)
++{
++ struct sp5100_tco *tco = watchdog_get_drvdata(wdd);
++ const char *dev_name = SB800_DEVNAME;
++ u32 mmio_addr = 0, alt_mmio_addr = 0;
++ struct resource *res;
++ void __iomem *addr;
++ int ret;
++ u32 val;
++
++ res = request_mem_region_muxed(EFCH_PM_ACPI_MMIO_PM_ADDR,
++ EFCH_PM_ACPI_MMIO_PM_SIZE,
++ "sp5100_tco");
++
++ if (!res) {
++ dev_err(dev,
++ "Memory region 0x%08x already in use\n",
++ EFCH_PM_ACPI_MMIO_PM_ADDR);
++ return -EBUSY;
++ }
++
++ addr = ioremap(EFCH_PM_ACPI_MMIO_PM_ADDR, EFCH_PM_ACPI_MMIO_PM_SIZE);
++ if (!addr) {
++ dev_err(dev, "Address mapping failed\n");
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ /*
++ * EFCH_PM_DECODEEN_WDT_TMREN is dual purpose. This bitfield
++ * enables sp5100_tco register MMIO space decoding. The bitfield
++ * also starts the timer operation. Enable if not already enabled.
++ */
++ val = efch_read_pm_reg8(addr, EFCH_PM_DECODEEN);
++ if (!(val & EFCH_PM_DECODEEN_WDT_TMREN)) {
++ efch_update_pm_reg8(addr, EFCH_PM_DECODEEN, 0xff,
++ EFCH_PM_DECODEEN_WDT_TMREN);
++ }
++
++ /* Error if the timer could not be enabled */
++ val = efch_read_pm_reg8(addr, EFCH_PM_DECODEEN);
++ if (!(val & EFCH_PM_DECODEEN_WDT_TMREN)) {
++ dev_err(dev, "Failed to enable the timer\n");
++ ret = -EFAULT;
++ goto out;
++ }
++
++ mmio_addr = EFCH_PM_WDT_ADDR;
++
++ /* Determine alternate MMIO base address */
++ val = efch_read_pm_reg8(addr, EFCH_PM_ISACONTROL);
++ if (val & EFCH_PM_ISACONTROL_MMIOEN)
++ alt_mmio_addr = EFCH_PM_ACPI_MMIO_ADDR +
++ EFCH_PM_ACPI_MMIO_WDT_OFFSET;
++
++ ret = sp5100_tco_prepare_base(tco, mmio_addr, alt_mmio_addr, dev_name);
++ if (!ret) {
++ tco_timer_enable_mmio(addr);
++ ret = sp5100_tco_timer_init(tco);
++ }
++
++out:
++ if (addr)
++ iounmap(addr);
++
++ release_resource(res);
++ kfree(res);
++
++ return ret;
++}
++
+ static int sp5100_tco_setupdevice(struct device *dev,
+ struct watchdog_device *wdd)
+ {
+ struct sp5100_tco *tco = watchdog_get_drvdata(wdd);
+ const char *dev_name;
+ u32 mmio_addr = 0, val;
++ u32 alt_mmio_addr = 0;
+ int ret;
+
++ if (tco->tco_reg_layout == efch_mmio)
++ return sp5100_tco_setupdevice_mmio(dev, wdd);
++
+ /* Request the IO ports used by this driver */
+ if (!request_muxed_region(SP5100_IO_PM_INDEX_REG,
+ SP5100_PM_IOPORTS_SIZE, "sp5100_tco")) {
+@@ -239,138 +431,55 @@ static int sp5100_tco_setupdevice(struct device *dev,
+ dev_name = SP5100_DEVNAME;
+ mmio_addr = sp5100_tco_read_pm_reg32(SP5100_PM_WATCHDOG_BASE) &
+ 0xfffffff8;
++
++ /*
++ * Secondly, find the watchdog timer MMIO address
++ * from SBResource_MMIO register.
++ */
++
++ /* Read SBResource_MMIO from PCI config(PCI_Reg: 9Ch) */
++ pci_read_config_dword(sp5100_tco_pci,
++ SP5100_SB_RESOURCE_MMIO_BASE,
++ &val);
++
++ /* Verify MMIO is enabled and using bar0 */
++ if ((val & SB800_ACPI_MMIO_MASK) == SB800_ACPI_MMIO_DECODE_EN)
++ alt_mmio_addr = (val & ~0xfff) + SB800_PM_WDT_MMIO_OFFSET;
+ break;
+ case sb800:
+ dev_name = SB800_DEVNAME;
+ mmio_addr = sp5100_tco_read_pm_reg32(SB800_PM_WATCHDOG_BASE) &
+ 0xfffffff8;
++
++ /* Read SBResource_MMIO from AcpiMmioEn(PM_Reg: 24h) */
++ val = sp5100_tco_read_pm_reg32(SB800_PM_ACPI_MMIO_EN);
++
++ /* Verify MMIO is enabled and using bar0 */
++ if ((val & SB800_ACPI_MMIO_MASK) == SB800_ACPI_MMIO_DECODE_EN)
++ alt_mmio_addr = (val & ~0xfff) + SB800_PM_WDT_MMIO_OFFSET;
+ break;
+ case efch:
+ dev_name = SB800_DEVNAME;
+- /*
+- * On Family 17h devices, the EFCH_PM_DECODEEN_WDT_TMREN bit of
+- * EFCH_PM_DECODEEN not only enables the EFCH_PM_WDT_ADDR memory
+- * region, it also enables the watchdog itself.
+- */
+- if (boot_cpu_data.x86 == 0x17) {
+- val = sp5100_tco_read_pm_reg8(EFCH_PM_DECODEEN);
+- if (!(val & EFCH_PM_DECODEEN_WDT_TMREN)) {
+- sp5100_tco_update_pm_reg8(EFCH_PM_DECODEEN, 0xff,
+- EFCH_PM_DECODEEN_WDT_TMREN);
+- }
+- }
+ val = sp5100_tco_read_pm_reg8(EFCH_PM_DECODEEN);
+ if (val & EFCH_PM_DECODEEN_WDT_TMREN)
+ mmio_addr = EFCH_PM_WDT_ADDR;
++
++ val = sp5100_tco_read_pm_reg8(EFCH_PM_ISACONTROL);
++ if (val & EFCH_PM_ISACONTROL_MMIOEN)
++ alt_mmio_addr = EFCH_PM_ACPI_MMIO_ADDR +
++ EFCH_PM_ACPI_MMIO_WDT_OFFSET;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+- /* Check MMIO address conflict */
+- if (!mmio_addr ||
+- !devm_request_mem_region(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE,
+- dev_name)) {
+- if (mmio_addr)
+- dev_dbg(dev, "MMIO address 0x%08x already in use\n",
+- mmio_addr);
+- switch (tco->tco_reg_layout) {
+- case sp5100:
+- /*
+- * Secondly, Find the watchdog timer MMIO address
+- * from SBResource_MMIO register.
+- */
+- /* Read SBResource_MMIO from PCI config(PCI_Reg: 9Ch) */
+- pci_read_config_dword(sp5100_tco_pci,
+- SP5100_SB_RESOURCE_MMIO_BASE,
+- &mmio_addr);
+- if ((mmio_addr & (SB800_ACPI_MMIO_DECODE_EN |
+- SB800_ACPI_MMIO_SEL)) !=
+- SB800_ACPI_MMIO_DECODE_EN) {
+- ret = -ENODEV;
+- goto unreg_region;
+- }
+- mmio_addr &= ~0xFFF;
+- mmio_addr += SB800_PM_WDT_MMIO_OFFSET;
+- break;
+- case sb800:
+- /* Read SBResource_MMIO from AcpiMmioEn(PM_Reg: 24h) */
+- mmio_addr =
+- sp5100_tco_read_pm_reg32(SB800_PM_ACPI_MMIO_EN);
+- if ((mmio_addr & (SB800_ACPI_MMIO_DECODE_EN |
+- SB800_ACPI_MMIO_SEL)) !=
+- SB800_ACPI_MMIO_DECODE_EN) {
+- ret = -ENODEV;
+- goto unreg_region;
+- }
+- mmio_addr &= ~0xFFF;
+- mmio_addr += SB800_PM_WDT_MMIO_OFFSET;
+- break;
+- case efch:
+- val = sp5100_tco_read_pm_reg8(EFCH_PM_ISACONTROL);
+- if (!(val & EFCH_PM_ISACONTROL_MMIOEN)) {
+- ret = -ENODEV;
+- goto unreg_region;
+- }
+- mmio_addr = EFCH_PM_ACPI_MMIO_ADDR +
+- EFCH_PM_ACPI_MMIO_WDT_OFFSET;
+- break;
+- }
+- dev_dbg(dev, "Got 0x%08x from SBResource_MMIO register\n",
+- mmio_addr);
+- if (!devm_request_mem_region(dev, mmio_addr,
+- SP5100_WDT_MEM_MAP_SIZE,
+- dev_name)) {
+- dev_dbg(dev, "MMIO address 0x%08x already in use\n",
+- mmio_addr);
+- ret = -EBUSY;
+- goto unreg_region;
+- }
+- }
+-
+- tco->tcobase = devm_ioremap(dev, mmio_addr, SP5100_WDT_MEM_MAP_SIZE);
+- if (!tco->tcobase) {
+- dev_err(dev, "failed to get tcobase address\n");
+- ret = -ENOMEM;
+- goto unreg_region;
+- }
+-
+- dev_info(dev, "Using 0x%08x for watchdog MMIO address\n", mmio_addr);
+-
+- /* Setup the watchdog timer */
+- tco_timer_enable(tco);
+-
+- val = readl(SP5100_WDT_CONTROL(tco->tcobase));
+- if (val & SP5100_WDT_DISABLED) {
+- dev_err(dev, "Watchdog hardware is disabled\n");
+- ret = -ENODEV;
+- goto unreg_region;
++ ret = sp5100_tco_prepare_base(tco, mmio_addr, alt_mmio_addr, dev_name);
++ if (!ret) {
++ /* Setup the watchdog timer */
++ tco_timer_enable(tco);
++ ret = sp5100_tco_timer_init(tco);
+ }
+
+- /*
+- * Save WatchDogFired status, because WatchDogFired flag is
+- * cleared here.
+- */
+- if (val & SP5100_WDT_FIRED)
+- wdd->bootstatus = WDIOF_CARDRESET;
+- /* Set watchdog action to reset the system */
+- val &= ~SP5100_WDT_ACTION_RESET;
+- writel(val, SP5100_WDT_CONTROL(tco->tcobase));
+-
+- /* Set a reasonable heartbeat before we stop the timer */
+- tco_timer_set_timeout(wdd, wdd->timeout);
+-
+- /*
+- * Stop the TCO before we change anything so we don't race with
+- * a zeroed timer.
+- */
+- tco_timer_stop(wdd);
+-
+- release_region(SP5100_IO_PM_INDEX_REG, SP5100_PM_IOPORTS_SIZE);
+-
+- return 0;
+-
+-unreg_region:
+ release_region(SP5100_IO_PM_INDEX_REG, SP5100_PM_IOPORTS_SIZE);
+ return ret;
+ }
+@@ -452,6 +561,8 @@ static const struct pci_device_id sp5100_tco_pci_tbl[] = {
+ PCI_ANY_ID, },
+ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_KERNCZ_SMBUS, PCI_ANY_ID,
+ PCI_ANY_ID, },
++ { PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_KERNCZ_SMBUS, PCI_ANY_ID,
++ PCI_ANY_ID, },
+ { 0, }, /* End of list */
+ };
+ MODULE_DEVICE_TABLE(pci, sp5100_tco_pci_tbl);
+diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h
+index adf015aa4126f..6a0986d2c94b7 100644
+--- a/drivers/watchdog/sp5100_tco.h
++++ b/drivers/watchdog/sp5100_tco.h
+@@ -58,6 +58,7 @@
+ #define SB800_PM_WATCHDOG_SECOND_RES GENMASK(1, 0)
+ #define SB800_ACPI_MMIO_DECODE_EN BIT(0)
+ #define SB800_ACPI_MMIO_SEL BIT(1)
++#define SB800_ACPI_MMIO_MASK GENMASK(1, 0)
+
+ #define SB800_PM_WDT_MMIO_OFFSET 0xB00
+
+@@ -82,4 +83,10 @@
+ #define EFCH_PM_ISACONTROL_MMIOEN BIT(1)
+
+ #define EFCH_PM_ACPI_MMIO_ADDR 0xfed80000
++#define EFCH_PM_ACPI_MMIO_PM_OFFSET 0x00000300
+ #define EFCH_PM_ACPI_MMIO_WDT_OFFSET 0x00000b00
++
++#define EFCH_PM_ACPI_MMIO_PM_ADDR (EFCH_PM_ACPI_MMIO_ADDR + \
++ EFCH_PM_ACPI_MMIO_PM_OFFSET)
++#define EFCH_PM_ACPI_MMIO_PM_SIZE 8
++#define AMD_ZEN_SMBUS_PCI_REV 0x51
+diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c
+index c137ad2bd5c31..0ea554c7cda57 100644
+--- a/drivers/watchdog/ts4800_wdt.c
++++ b/drivers/watchdog/ts4800_wdt.c
+@@ -125,13 +125,16 @@ static int ts4800_wdt_probe(struct platform_device *pdev)
+ ret = of_property_read_u32_index(np, "syscon", 1, &reg);
+ if (ret < 0) {
+ dev_err(dev, "no offset in syscon\n");
++ of_node_put(syscon_np);
+ return ret;
+ }
+
+ /* allocate memory for watchdog struct */
+ wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
+- if (!wdt)
++ if (!wdt) {
++ of_node_put(syscon_np);
+ return -ENOMEM;
++ }
+
+ /* set regmap and offset to know where to write */
+ wdt->feed_offset = reg;
+diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
+index 3a3d8b5c7ad59..5eec84fa65170 100644
+--- a/drivers/watchdog/watchdog_dev.c
++++ b/drivers/watchdog/watchdog_dev.c
+@@ -1044,8 +1044,8 @@ static int watchdog_cdev_register(struct watchdog_device *wdd)
+ if (wdd->id == 0) {
+ misc_deregister(&watchdog_miscdev);
+ old_wd_data = NULL;
+- put_device(&wd_data->dev);
+ }
++ put_device(&wd_data->dev);
+ return err;
+ }
+
+diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
+index 195c8c004b69d..4fac8148a8e62 100644
+--- a/drivers/watchdog/wdat_wdt.c
++++ b/drivers/watchdog/wdat_wdt.c
+@@ -462,6 +462,7 @@ static int wdat_wdt_probe(struct platform_device *pdev)
+ return ret;
+
+ watchdog_set_nowayout(&wdat->wdd, nowayout);
++ watchdog_stop_on_reboot(&wdat->wdd);
+ return devm_watchdog_register_device(dev, &wdat->wdd);
+ }
+
+diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
+index 3a50f097ed3ed..8db96b5e72536 100644
+--- a/drivers/xen/balloon.c
++++ b/drivers/xen/balloon.c
+@@ -58,6 +58,7 @@
+ #include <linux/percpu-defs.h>
+ #include <linux/slab.h>
+ #include <linux/sysctl.h>
++#include <linux/moduleparam.h>
+
+ #include <asm/page.h>
+ #include <asm/tlb.h>
+@@ -73,6 +74,12 @@
+ #include <xen/page.h>
+ #include <xen/mem-reservation.h>
+
++#undef MODULE_PARAM_PREFIX
++#define MODULE_PARAM_PREFIX "xen."
++
++static uint __read_mostly balloon_boot_timeout = 180;
++module_param(balloon_boot_timeout, uint, 0444);
++
+ static int xen_hotplug_unpopulated;
+
+ #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+@@ -125,12 +132,12 @@ static struct ctl_table xen_root[] = {
+ * BP_ECANCELED: error, balloon operation canceled.
+ */
+
+-enum bp_state {
++static enum bp_state {
+ BP_DONE,
+ BP_WAIT,
+ BP_EAGAIN,
+ BP_ECANCELED
+-};
++} balloon_state = BP_DONE;
+
+ /* Main waiting point for xen-balloon thread. */
+ static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
+@@ -199,18 +206,15 @@ static struct page *balloon_next_page(struct page *page)
+ return list_entry(next, struct page, lru);
+ }
+
+-static enum bp_state update_schedule(enum bp_state state)
++static void update_schedule(void)
+ {
+- if (state == BP_WAIT)
+- return BP_WAIT;
+-
+- if (state == BP_ECANCELED)
+- return BP_ECANCELED;
++ if (balloon_state == BP_WAIT || balloon_state == BP_ECANCELED)
++ return;
+
+- if (state == BP_DONE) {
++ if (balloon_state == BP_DONE) {
+ balloon_stats.schedule_delay = 1;
+ balloon_stats.retry_count = 1;
+- return BP_DONE;
++ return;
+ }
+
+ ++balloon_stats.retry_count;
+@@ -219,7 +223,8 @@ static enum bp_state update_schedule(enum bp_state state)
+ balloon_stats.retry_count > balloon_stats.max_retry_count) {
+ balloon_stats.schedule_delay = 1;
+ balloon_stats.retry_count = 1;
+- return BP_ECANCELED;
++ balloon_state = BP_ECANCELED;
++ return;
+ }
+
+ balloon_stats.schedule_delay <<= 1;
+@@ -227,7 +232,7 @@ static enum bp_state update_schedule(enum bp_state state)
+ if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
+ balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
+
+- return BP_EAGAIN;
++ balloon_state = BP_EAGAIN;
+ }
+
+ #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+@@ -494,9 +499,9 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
+ * Stop waiting if either state is BP_DONE and ballooning action is
+ * needed, or if the credit has changed while state is not BP_DONE.
+ */
+-static bool balloon_thread_cond(enum bp_state state, long credit)
++static bool balloon_thread_cond(long credit)
+ {
+- if (state == BP_DONE)
++ if (balloon_state == BP_DONE)
+ credit = 0;
+
+ return current_credit() != credit || kthread_should_stop();
+@@ -510,13 +515,12 @@ static bool balloon_thread_cond(enum bp_state state, long credit)
+ */
+ static int balloon_thread(void *unused)
+ {
+- enum bp_state state = BP_DONE;
+ long credit;
+ unsigned long timeout;
+
+ set_freezable();
+ for (;;) {
+- switch (state) {
++ switch (balloon_state) {
+ case BP_DONE:
+ case BP_ECANCELED:
+ timeout = 3600 * HZ;
+@@ -532,7 +536,7 @@ static int balloon_thread(void *unused)
+ credit = current_credit();
+
+ wait_event_freezable_timeout(balloon_thread_wq,
+- balloon_thread_cond(state, credit), timeout);
++ balloon_thread_cond(credit), timeout);
+
+ if (kthread_should_stop())
+ return 0;
+@@ -543,22 +547,23 @@ static int balloon_thread(void *unused)
+
+ if (credit > 0) {
+ if (balloon_is_inflated())
+- state = increase_reservation(credit);
++ balloon_state = increase_reservation(credit);
+ else
+- state = reserve_additional_memory();
++ balloon_state = reserve_additional_memory();
+ }
+
+ if (credit < 0) {
+ long n_pages;
+
+ n_pages = min(-credit, si_mem_available());
+- state = decrease_reservation(n_pages, GFP_BALLOON);
+- if (state == BP_DONE && n_pages != -credit &&
++ balloon_state = decrease_reservation(n_pages,
++ GFP_BALLOON);
++ if (balloon_state == BP_DONE && n_pages != -credit &&
+ n_pages < totalreserve_pages)
+- state = BP_EAGAIN;
++ balloon_state = BP_EAGAIN;
+ }
+
+- state = update_schedule(state);
++ update_schedule();
+
+ mutex_unlock(&balloon_mutex);
+
+@@ -765,3 +770,38 @@ static int __init balloon_init(void)
+ return 0;
+ }
+ subsys_initcall(balloon_init);
++
++static int __init balloon_wait_finish(void)
++{
++ long credit, last_credit = 0;
++ unsigned long last_changed = 0;
++
++ if (!xen_domain())
++ return -ENODEV;
++
++ /* PV guests don't need to wait. */
++ if (xen_pv_domain() || !current_credit())
++ return 0;
++
++ pr_notice("Waiting for initial ballooning down having finished.\n");
++
++ while ((credit = current_credit()) < 0) {
++ if (credit != last_credit) {
++ last_changed = jiffies;
++ last_credit = credit;
++ }
++ if (balloon_state == BP_ECANCELED) {
++ pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n",
++ -credit);
++ if (jiffies - last_changed >= HZ * balloon_boot_timeout)
++ panic("Initial ballooning failed!\n");
++ }
++
++ schedule_timeout_interruptible(HZ / 10);
++ }
++
++ pr_notice("Initial ballooning down finished.\n");
++
++ return 0;
++}
++late_initcall_sync(balloon_wait_finish);
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index a78704ae36186..5e8321f43cbdd 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -528,9 +528,10 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ BUG_ON(irq == -1);
+
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
+- cpumask_copy(irq_get_effective_affinity_mask(irq),
+- cpumask_of(cpu));
++ struct irq_data *data = irq_get_irq_data(irq);
++
++ irq_data_update_affinity(data, cpumask_of(cpu));
++ irq_data_update_effective_affinity(data, cpumask_of(cpu));
+ }
+
+ xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
+@@ -1251,6 +1252,12 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn)
+ }
+ EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+
++int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
++{
++ return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
++}
++EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
++
+ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ {
+ struct evtchn_bind_ipi bind_ipi;
+diff --git a/drivers/xen/features.c b/drivers/xen/features.c
+index 7b591443833c9..87f1828d40d5e 100644
+--- a/drivers/xen/features.c
++++ b/drivers/xen/features.c
+@@ -42,7 +42,7 @@ void xen_setup_features(void)
+ if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
+ break;
+ for (j = 0; j < 32; j++)
+- xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
++ xen_features[i * 32 + j] = !!(fi.submap & 1U << j);
+ }
+
+ if (xen_pv_domain()) {
+diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
+index 3fa40c723e8e9..edb0acd0b8323 100644
+--- a/drivers/xen/gntalloc.c
++++ b/drivers/xen/gntalloc.c
+@@ -169,20 +169,14 @@ undo:
+ __del_gref(gref);
+ }
+
+- /* It's possible for the target domain to map the just-allocated grant
+- * references by blindly guessing their IDs; if this is done, then
+- * __del_gref will leave them in the queue_gref list. They need to be
+- * added to the global list so that we can free them when they are no
+- * longer referenced.
+- */
+- if (unlikely(!list_empty(&queue_gref)))
+- list_splice_tail(&queue_gref, &gref_list);
+ mutex_unlock(&gref_mutex);
+ return rc;
+ }
+
+ static void __del_gref(struct gntalloc_gref *gref)
+ {
++ unsigned long addr;
++
+ if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+ uint8_t *tmp = kmap(gref->page);
+ tmp[gref->notify.pgoff] = 0;
+@@ -196,21 +190,16 @@ static void __del_gref(struct gntalloc_gref *gref)
+ gref->notify.flags = 0;
+
+ if (gref->gref_id) {
+- if (gnttab_query_foreign_access(gref->gref_id))
+- return;
+-
+- if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
+- return;
+-
+- gnttab_free_grant_reference(gref->gref_id);
++ if (gref->page) {
++ addr = (unsigned long)page_to_virt(gref->page);
++ gnttab_end_foreign_access(gref->gref_id, 0, addr);
++ } else
++ gnttab_free_grant_reference(gref->gref_id);
+ }
+
+ gref_size--;
+ list_del(&gref->next_gref);
+
+- if (gref->page)
+- __free_page(gref->page);
+-
+ kfree(gref);
+ }
+
+diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
+index 20d7d059dadb5..9c286b2a19001 100644
+--- a/drivers/xen/gntdev-common.h
++++ b/drivers/xen/gntdev-common.h
+@@ -16,6 +16,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/types.h>
+ #include <xen/interface/event_channel.h>
++#include <xen/grant_table.h>
+
+ struct gntdev_dmabuf_priv;
+
+@@ -43,9 +44,10 @@ struct gntdev_unmap_notify {
+ };
+
+ struct gntdev_grant_map {
++ atomic_t in_use;
+ struct mmu_interval_notifier notifier;
++ bool notifier_init;
+ struct list_head next;
+- struct vm_area_struct *vma;
+ int index;
+ int count;
+ int flags;
+@@ -56,6 +58,7 @@ struct gntdev_grant_map {
+ struct gnttab_unmap_grant_ref *unmap_ops;
+ struct gnttab_map_grant_ref *kmap_ops;
+ struct gnttab_unmap_grant_ref *kunmap_ops;
++ bool *being_removed;
+ struct page **pages;
+ unsigned long pages_vm_start;
+
+@@ -73,6 +76,11 @@ struct gntdev_grant_map {
+ /* Needed to avoid allocation in gnttab_dma_free_pages(). */
+ xen_pfn_t *frames;
+ #endif
++
++ /* Number of live grants */
++ atomic_t live_grants;
++ /* Needed to avoid allocation in __unmap_grant_pages */
++ struct gntab_unmap_queue_data unmap_data;
+ };
+
+ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
+diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
+index fec1b65371665..4d9a3050de6a3 100644
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -35,6 +35,7 @@
+ #include <linux/slab.h>
+ #include <linux/highmem.h>
+ #include <linux/refcount.h>
++#include <linux/workqueue.h>
+
+ #include <xen/xen.h>
+ #include <xen/grant_table.h>
+@@ -60,10 +61,11 @@ module_param(limit, uint, 0644);
+ MODULE_PARM_DESC(limit,
+ "Maximum number of grants that may be mapped by one mapping request");
+
++/* True in PV mode, false otherwise */
+ static int use_ptemod;
+
+-static int unmap_grant_pages(struct gntdev_grant_map *map,
+- int offset, int pages);
++static void unmap_grant_pages(struct gntdev_grant_map *map,
++ int offset, int pages);
+
+ static struct miscdevice gntdev_miscdev;
+
+@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
+ kvfree(map->unmap_ops);
+ kvfree(map->kmap_ops);
+ kvfree(map->kunmap_ops);
++ kvfree(map->being_removed);
+ kfree(map);
+ }
+
+@@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
+ add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
+ GFP_KERNEL);
+ add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
++ add->being_removed =
++ kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
+ if (NULL == add->grants ||
+ NULL == add->map_ops ||
+ NULL == add->unmap_ops ||
+- NULL == add->pages)
++ NULL == add->pages ||
++ NULL == add->being_removed)
+ goto err;
+ if (use_ptemod) {
+ add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
+@@ -250,13 +256,43 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
++ if (map->pages && !use_ptemod) {
++ /*
++ * Increment the reference count. This ensures that the
++ * subsequent call to unmap_grant_pages() will not wind up
++ * re-entering itself. It *can* wind up calling
++ * gntdev_put_map() recursively, but such calls will be with a
++ * reference count greater than 1, so they will return before
++ * this code is reached. The recursion depth is thus limited to
++ * 1. Do NOT use refcount_inc() here, as it will detect that
++ * the reference count is zero and WARN().
++ */
++ refcount_set(&map->users, 1);
++
++ /*
++ * Unmap the grants. This may or may not be asynchronous, so it
++ * is possible that the reference count is 1 on return, but it
++ * could also be greater than 1.
++ */
++ unmap_grant_pages(map, 0, map->count);
++
++ /* Check if the memory now needs to be freed */
++ if (!refcount_dec_and_test(&map->users))
++ return;
++
++ /*
++ * All pages have been returned to the hypervisor, so free the
++ * map.
++ */
++ }
++
++ if (use_ptemod && map->notifier_init)
++ mmu_interval_notifier_remove(&map->notifier);
++
+ if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+ notify_remote_via_evtchn(map->notify.event);
+ evtchn_put(map->notify.event);
+ }
+-
+- if (map->pages && !use_ptemod)
+- unmap_grant_pages(map, 0, map->count);
+ gntdev_free_map(map);
+ }
+
+@@ -265,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
+ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+ {
+ struct gntdev_grant_map *map = data;
+- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
++ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
+ int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
+ (1 << _GNTMAP_guest_avail0);
+ u64 pte_maddr;
+@@ -283,6 +319,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+
+ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
+ {
++ size_t alloced = 0;
+ int i, err = 0;
+
+ if (!use_ptemod) {
+@@ -331,97 +368,130 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
+ map->count);
+
+ for (i = 0; i < map->count; i++) {
+- if (map->map_ops[i].status == GNTST_okay)
++ if (map->map_ops[i].status == GNTST_okay) {
+ map->unmap_ops[i].handle = map->map_ops[i].handle;
+- else if (!err)
++ alloced++;
++ } else if (!err)
+ err = -EINVAL;
+
+ if (map->flags & GNTMAP_device_map)
+ map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
+
+ if (use_ptemod) {
+- if (map->kmap_ops[i].status == GNTST_okay)
++ if (map->kmap_ops[i].status == GNTST_okay) {
++ alloced++;
+ map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+- else if (!err)
++ } else if (!err)
+ err = -EINVAL;
+ }
+ }
++ atomic_add(alloced, &map->live_grants);
+ return err;
+ }
+
+-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+- int pages)
++static void __unmap_grant_pages_done(int result,
++ struct gntab_unmap_queue_data *data)
+ {
+- int i, err = 0;
+- struct gntab_unmap_queue_data unmap_data;
++ unsigned int i;
++ struct gntdev_grant_map *map = data->data;
++ unsigned int offset = data->unmap_ops - map->unmap_ops;
++ int successful_unmaps = 0;
++ int live_grants;
++
++ for (i = 0; i < data->count; i++) {
++ if (map->unmap_ops[offset + i].status == GNTST_okay &&
++ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
++ successful_unmaps++;
++
++ WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
++ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
++ pr_debug("unmap handle=%d st=%d\n",
++ map->unmap_ops[offset+i].handle,
++ map->unmap_ops[offset+i].status);
++ map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
++ if (use_ptemod) {
++ if (map->kunmap_ops[offset + i].status == GNTST_okay &&
++ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
++ successful_unmaps++;
+
++ WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
++ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
++ pr_debug("kunmap handle=%u st=%d\n",
++ map->kunmap_ops[offset+i].handle,
++ map->kunmap_ops[offset+i].status);
++ map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
++ }
++ }
++
++ /*
++ * Decrease the live-grant counter. This must happen after the loop to
++ * prevent premature reuse of the grants by gnttab_mmap().
++ */
++ live_grants = atomic_sub_return(successful_unmaps, &map->live_grants);
++ if (WARN_ON(live_grants < 0))
++ pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
++ __func__, live_grants, successful_unmaps);
++
++ /* Release reference taken by __unmap_grant_pages */
++ gntdev_put_map(NULL, map);
++}
++
++static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
++ int pages)
++{
+ if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+ int pgno = (map->notify.addr >> PAGE_SHIFT);
++
+ if (pgno >= offset && pgno < offset + pages) {
+ /* No need for kmap, pages are in lowmem */
+ uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
++
+ tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+ map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+ }
+ }
+
+- unmap_data.unmap_ops = map->unmap_ops + offset;
+- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+- unmap_data.pages = map->pages + offset;
+- unmap_data.count = pages;
+-
+- err = gnttab_unmap_refs_sync(&unmap_data);
+- if (err)
+- return err;
++ map->unmap_data.unmap_ops = map->unmap_ops + offset;
++ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
++ map->unmap_data.pages = map->pages + offset;
++ map->unmap_data.count = pages;
++ map->unmap_data.done = __unmap_grant_pages_done;
++ map->unmap_data.data = map;
++ refcount_inc(&map->users); /* to keep map alive during async call below */
+
+- for (i = 0; i < pages; i++) {
+- if (map->unmap_ops[offset+i].status)
+- err = -EINVAL;
+- pr_debug("unmap handle=%d st=%d\n",
+- map->unmap_ops[offset+i].handle,
+- map->unmap_ops[offset+i].status);
+- map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+- if (use_ptemod) {
+- if (map->kunmap_ops[offset+i].status)
+- err = -EINVAL;
+- pr_debug("kunmap handle=%u st=%d\n",
+- map->kunmap_ops[offset+i].handle,
+- map->kunmap_ops[offset+i].status);
+- map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+- }
+- }
+- return err;
++ gnttab_unmap_refs_async(&map->unmap_data);
+ }
+
+-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+- int pages)
++static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
++ int pages)
+ {
+- int range, err = 0;
++ int range;
++
++ if (atomic_read(&map->live_grants) == 0)
++ return; /* Nothing to do */
+
+ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
+
+ /* It is possible the requested range will have a "hole" where we
+ * already unmapped some of the grants. Only unmap valid ranges.
+ */
+- while (pages && !err) {
+- while (pages &&
+- map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
++ while (pages) {
++ while (pages && map->being_removed[offset]) {
+ offset++;
+ pages--;
+ }
+ range = 0;
+ while (range < pages) {
+- if (map->unmap_ops[offset + range].handle ==
+- INVALID_GRANT_HANDLE)
++ if (map->being_removed[offset + range])
+ break;
++ map->being_removed[offset + range] = true;
+ range++;
+ }
+- err = __unmap_grant_pages(map, offset, range);
++ if (range)
++ __unmap_grant_pages(map, offset, range);
+ offset += range;
+ pages -= range;
+ }
+-
+- return err;
+ }
+
+ /* ------------------------------------------------------------------ */
+@@ -441,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
+ struct gntdev_priv *priv = file->private_data;
+
+ pr_debug("gntdev_vma_close %p\n", vma);
+- if (use_ptemod) {
+- WARN_ON(map->vma != vma);
+- mmu_interval_notifier_remove(&map->notifier);
+- map->vma = NULL;
+- }
++
+ vma->vm_private_data = NULL;
+ gntdev_put_map(priv, map);
+ }
+@@ -473,31 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
+ struct gntdev_grant_map *map =
+ container_of(mn, struct gntdev_grant_map, notifier);
+ unsigned long mstart, mend;
+- int err;
++ unsigned long map_start, map_end;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
++ map_start = map->pages_vm_start;
++ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
++
+ /*
+ * If the VMA is split or otherwise changed the notifier is not
+ * updated, but we don't want to process VA's outside the modified
+ * VMA. FIXME: It would be much more understandable to just prevent
+ * modifying the VMA in the first place.
+ */
+- if (map->vma->vm_start >= range->end ||
+- map->vma->vm_end <= range->start)
++ if (map_start >= range->end || map_end <= range->start)
+ return true;
+
+- mstart = max(range->start, map->vma->vm_start);
+- mend = min(range->end, map->vma->vm_end);
++ mstart = max(range->start, map_start);
++ mend = min(range->end, map_end);
+ pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
+- map->index, map->count,
+- map->vma->vm_start, map->vma->vm_end,
+- range->start, range->end, mstart, mend);
+- err = unmap_grant_pages(map,
+- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
+- (mend - mstart) >> PAGE_SHIFT);
+- WARN_ON(err);
++ map->index, map->count, map_start, map_end,
++ range->start, range->end, mstart, mend);
++ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
++ (mend - mstart) >> PAGE_SHIFT);
+
+ return true;
+ }
+@@ -977,14 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ return -EINVAL;
+
+ pr_debug("map %d+%d at %lx (pgoff %lx)\n",
+- index, count, vma->vm_start, vma->vm_pgoff);
++ index, count, vma->vm_start, vma->vm_pgoff);
+
+ mutex_lock(&priv->lock);
+ map = gntdev_find_map_index(priv, index, count);
+ if (!map)
+ goto unlock_out;
+- if (use_ptemod && map->vma)
++ if (!atomic_add_unless(&map->in_use, 1, 1))
+ goto unlock_out;
++
+ refcount_inc(&map->users);
+
+ vma->vm_ops = &gntdev_vmops;
+@@ -1005,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ map->flags |= GNTMAP_readonly;
+ }
+
++ map->pages_vm_start = vma->vm_start;
++
+ if (use_ptemod) {
+- map->vma = vma;
+ err = mmu_interval_notifier_insert_locked(
+ &map->notifier, vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
+- if (err) {
+- map->vma = NULL;
++ if (err)
+ goto out_unlock_put;
+- }
++
++ map->notifier_init = true;
+ }
+ mutex_unlock(&priv->lock);
+
+@@ -1030,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ */
+ mmu_interval_read_begin(&map->notifier);
+
+- map->pages_vm_start = vma->vm_start;
+ err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start,
+ find_grant_ptes, map);
+@@ -1059,13 +1125,8 @@ unlock_out:
+ out_unlock_put:
+ mutex_unlock(&priv->lock);
+ out_put_map:
+- if (use_ptemod) {
++ if (use_ptemod)
+ unmap_grant_pages(map, 0, map->count);
+- if (map->vma) {
+- mmu_interval_notifier_remove(&map->notifier);
+- map->vma = NULL;
+- }
+- }
+ gntdev_put_map(priv, map);
+ return err;
+ }
+diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
+index 3729bea0c9895..0a2d24d6ac6f7 100644
+--- a/drivers/xen/grant-table.c
++++ b/drivers/xen/grant-table.c
+@@ -134,12 +134,9 @@ struct gnttab_ops {
+ */
+ unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
+ /*
+- * Query the status of a grant entry. Ref parameter is reference of
+- * queried grant entry, return value is the status of queried entry.
+- * Detailed status(writing/reading) can be gotten from the return value
+- * by bit operations.
++ * Read the frame number related to a given grant reference.
+ */
+- int (*query_foreign_access)(grant_ref_t ref);
++ unsigned long (*read_frame)(grant_ref_t ref);
+ };
+
+ struct unmap_refs_callback_data {
+@@ -284,22 +281,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+ }
+ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
+
+-static int gnttab_query_foreign_access_v1(grant_ref_t ref)
+-{
+- return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
+-}
+-
+-static int gnttab_query_foreign_access_v2(grant_ref_t ref)
+-{
+- return grstatus[ref] & (GTF_reading|GTF_writing);
+-}
+-
+-int gnttab_query_foreign_access(grant_ref_t ref)
+-{
+- return gnttab_interface->query_foreign_access(ref);
+-}
+-EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
+-
+ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
+ {
+ u16 flags, nflags;
+@@ -353,6 +334,16 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+ }
+ EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
+
++static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
++{
++ return gnttab_shared.v1[ref].frame;
++}
++
++static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
++{
++ return gnttab_shared.v2[ref].full_page.frame;
++}
++
+ struct deferred_entry {
+ struct list_head list;
+ grant_ref_t ref;
+@@ -382,12 +373,9 @@ static void gnttab_handle_deferred(struct timer_list *unused)
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
+ put_free_entry(entry->ref);
+- if (entry->page) {
+- pr_debug("freeing g.e. %#x (pfn %#lx)\n",
+- entry->ref, page_to_pfn(entry->page));
+- put_page(entry->page);
+- } else
+- pr_info("freeing g.e. %#x\n", entry->ref);
++ pr_debug("freeing g.e. %#x (pfn %#lx)\n",
++ entry->ref, page_to_pfn(entry->page));
++ put_page(entry->page);
+ kfree(entry);
+ entry = NULL;
+ } else {
+@@ -412,9 +400,18 @@ static void gnttab_handle_deferred(struct timer_list *unused)
+ static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
+ struct page *page)
+ {
+- struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
++ struct deferred_entry *entry;
++ gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
+ const char *what = KERN_WARNING "leaking";
+
++ entry = kmalloc(sizeof(*entry), gfp);
++ if (!page) {
++ unsigned long gfn = gnttab_interface->read_frame(ref);
++
++ page = pfn_to_page(gfn_to_pfn(gfn));
++ get_page(page);
++ }
++
+ if (entry) {
+ unsigned long flags;
+
+@@ -435,11 +432,21 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
+ what, ref, page ? page_to_pfn(page) : -1);
+ }
+
++int gnttab_try_end_foreign_access(grant_ref_t ref)
++{
++ int ret = _gnttab_end_foreign_access_ref(ref, 0);
++
++ if (ret)
++ put_free_entry(ref);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
++
+ void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
+ unsigned long page)
+ {
+- if (gnttab_end_foreign_access_ref(ref, readonly)) {
+- put_free_entry(ref);
++ if (gnttab_try_end_foreign_access(ref)) {
+ if (page != 0)
+ put_page(virt_to_page(page));
+ } else
+@@ -974,6 +981,9 @@ int gnttab_dma_alloc_pages(struct gnttab_dma_alloc_args *args)
+ size_t size;
+ int i, ret;
+
++ if (args->nr_pages < 0 || args->nr_pages > (INT_MAX >> PAGE_SHIFT))
++ return -ENOMEM;
++
+ size = args->nr_pages << PAGE_SHIFT;
+ if (args->coherent)
+ args->vaddr = dma_alloc_coherent(args->dev, size,
+@@ -1417,7 +1427,7 @@ static const struct gnttab_ops gnttab_v1_ops = {
+ .update_entry = gnttab_update_entry_v1,
+ .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1,
+ .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1,
+- .query_foreign_access = gnttab_query_foreign_access_v1,
++ .read_frame = gnttab_read_frame_v1,
+ };
+
+ static const struct gnttab_ops gnttab_v2_ops = {
+@@ -1429,7 +1439,7 @@ static const struct gnttab_ops gnttab_v2_ops = {
+ .update_entry = gnttab_update_entry_v2,
+ .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
+ .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2,
+- .query_foreign_access = gnttab_query_foreign_access_v2,
++ .read_frame = gnttab_read_frame_v2,
+ };
+
+ static bool gnttab_need_v2(void)
+diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
+index 47aa3a1ccaf57..b3e3d1bb37f3e 100644
+--- a/drivers/xen/pcpu.c
++++ b/drivers/xen/pcpu.c
+@@ -58,6 +58,7 @@ struct pcpu {
+ struct list_head list;
+ struct device dev;
+ uint32_t cpu_id;
++ uint32_t acpi_id;
+ uint32_t flags;
+ };
+
+@@ -228,7 +229,7 @@ static int register_pcpu(struct pcpu *pcpu)
+
+ err = device_register(dev);
+ if (err) {
+- pcpu_release(dev);
++ put_device(dev);
+ return err;
+ }
+
+@@ -249,6 +250,7 @@ static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info)
+
+ INIT_LIST_HEAD(&pcpu->list);
+ pcpu->cpu_id = info->xen_cpuid;
++ pcpu->acpi_id = info->acpi_id;
+ pcpu->flags = info->flags;
+
+ /* Need hold on xen_pcpu_lock before pcpu list manipulations */
+@@ -381,3 +383,21 @@ err1:
+ return ret;
+ }
+ arch_initcall(xen_pcpu_init);
++
++#ifdef CONFIG_ACPI
++bool __init xen_processor_present(uint32_t acpi_id)
++{
++ const struct pcpu *pcpu;
++ bool online = false;
++
++ mutex_lock(&xen_pcpu_lock);
++ list_for_each_entry(pcpu, &xen_pcpus, list)
++ if (pcpu->acpi_id == acpi_id) {
++ online = pcpu->flags & XEN_PCPU_FLAGS_ONLINE;
++ break;
++ }
++ mutex_unlock(&xen_pcpu_lock);
++
++ return online;
++}
++#endif
+diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
+index 18f0ed8b1f93b..6ebd819338ecb 100644
+--- a/drivers/xen/platform-pci.c
++++ b/drivers/xen/platform-pci.c
+@@ -144,7 +144,7 @@ static int platform_pci_probe(struct pci_dev *pdev,
+ if (ret) {
+ dev_warn(&pdev->dev, "Unable to set the evtchn callback "
+ "err=%d\n", ret);
+- goto out;
++ goto irq_out;
+ }
+ }
+
+@@ -152,13 +152,16 @@ static int platform_pci_probe(struct pci_dev *pdev,
+ grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+ ret = gnttab_setup_auto_xlat_frames(grant_frames);
+ if (ret)
+- goto out;
++ goto irq_out;
+ ret = gnttab_init();
+ if (ret)
+ goto grant_out;
+ return 0;
+ grant_out:
+ gnttab_free_auto_xlat_frames();
++irq_out:
++ if (!xen_have_vector_callback)
++ free_irq(pdev->irq, pdev);
+ out:
+ pci_release_region(pdev, 0);
+ mem_out:
+diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
+index 3369734108af2..719c5d1dda274 100644
+--- a/drivers/xen/privcmd.c
++++ b/drivers/xen/privcmd.c
+@@ -581,27 +581,30 @@ static int lock_pages(
+ struct privcmd_dm_op_buf kbufs[], unsigned int num,
+ struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
+ {
+- unsigned int i;
++ unsigned int i, off = 0;
+
+- for (i = 0; i < num; i++) {
++ for (i = 0; i < num; ) {
+ unsigned int requested;
+ int page_count;
+
+ requested = DIV_ROUND_UP(
+ offset_in_page(kbufs[i].uptr) + kbufs[i].size,
+- PAGE_SIZE);
++ PAGE_SIZE) - off;
+ if (requested > nr_pages)
+ return -ENOSPC;
+
+ page_count = pin_user_pages_fast(
+- (unsigned long) kbufs[i].uptr,
++ (unsigned long)kbufs[i].uptr + off * PAGE_SIZE,
+ requested, FOLL_WRITE, pages);
+- if (page_count < 0)
+- return page_count;
++ if (page_count <= 0)
++ return page_count ? : -EFAULT;
+
+ *pinned += page_count;
+ nr_pages -= page_count;
+ pages += page_count;
++
++ off = (requested == page_count) ? 0 : off + page_count;
++ i += !off;
+ }
+
+ return 0;
+@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
+ }
+
+ rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+- if (rc < 0) {
+- nr_pages = pinned;
++ if (rc < 0)
+ goto out;
+- }
+
+ for (i = 0; i < kdata.num; i++) {
+ set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
+@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
+ xen_preemptible_hcall_end();
+
+ out:
+- unlock_pages(pages, nr_pages);
++ unlock_pages(pages, pinned);
+ kfree(xbufs);
+ kfree(pages);
+ kfree(kbufs);
+@@ -759,7 +760,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
+ goto out;
+ }
+
+- pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL);
++ pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL | __GFP_NOWARN);
+ if (!pfns) {
+ rc = -ENOMEM;
+ goto out;
+diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
+index b47fd8435061a..a1e8b6eab69d4 100644
+--- a/drivers/xen/pvcalls-back.c
++++ b/drivers/xen/pvcalls-back.c
+@@ -129,13 +129,13 @@ static bool pvcalls_conn_back_read(void *opaque)
+ if (masked_prod < masked_cons) {
+ vec[0].iov_base = data->in + masked_prod;
+ vec[0].iov_len = wanted;
+- iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted);
++ iov_iter_kvec(&msg.msg_iter, READ, vec, 1, wanted);
+ } else {
+ vec[0].iov_base = data->in + masked_prod;
+ vec[0].iov_len = array_size - masked_prod;
+ vec[1].iov_base = data->in;
+ vec[1].iov_len = wanted - vec[0].iov_len;
+- iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted);
++ iov_iter_kvec(&msg.msg_iter, READ, vec, 2, wanted);
+ }
+
+ atomic_set(&map->read, 0);
+@@ -188,13 +188,13 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map)
+ if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
+ vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
+ vec[0].iov_len = size;
+- iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size);
++ iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, size);
+ } else {
+ vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
+ vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
+ vec[1].iov_base = data->out;
+ vec[1].iov_len = size - vec[0].iov_len;
+- iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size);
++ iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, size);
+ }
+
+ atomic_set(&map->write, 0);
+@@ -321,8 +321,10 @@ static struct sock_mapping *pvcalls_new_active_socket(
+ void *page;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+- if (map == NULL)
++ if (map == NULL) {
++ sock_release(sock);
+ return NULL;
++ }
+
+ map->fedata = fedata;
+ map->sock = sock;
+@@ -414,10 +416,8 @@ static int pvcalls_back_connect(struct xenbus_device *dev,
+ req->u.connect.ref,
+ req->u.connect.evtchn,
+ sock);
+- if (!map) {
++ if (!map)
+ ret = -EFAULT;
+- sock_release(sock);
+- }
+
+ out:
+ rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
+@@ -558,7 +558,6 @@ static void __pvcalls_back_accept(struct work_struct *work)
+ sock);
+ if (!map) {
+ ret = -EFAULT;
+- sock_release(sock);
+ goto out_error;
+ }
+
+diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
+index 7984645b59563..bbe337dc296e3 100644
+--- a/drivers/xen/pvcalls-front.c
++++ b/drivers/xen/pvcalls-front.c
+@@ -337,8 +337,8 @@ static void free_active_ring(struct sock_mapping *map)
+ if (!map->active.ring)
+ return;
+
+- free_pages((unsigned long)map->active.data.in,
+- map->active.ring->ring_order);
++ free_pages_exact(map->active.data.in,
++ PAGE_SIZE << map->active.ring->ring_order);
+ free_page((unsigned long)map->active.ring);
+ }
+
+@@ -352,8 +352,8 @@ static int alloc_active_ring(struct sock_mapping *map)
+ goto out;
+
+ map->active.ring->ring_order = PVCALLS_RING_ORDER;
+- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+- PVCALLS_RING_ORDER);
++ bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER,
++ GFP_KERNEL | __GFP_ZERO);
+ if (!bytes)
+ goto out;
+
+diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
+index e56a5faac395c..cbdff89799807 100644
+--- a/drivers/xen/swiotlb-xen.c
++++ b/drivers/xen/swiotlb-xen.c
+@@ -380,7 +380,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
+ */
+ trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+
+- map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
++ map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
+ if (map == (phys_addr_t)DMA_MAPPING_ERROR)
+ return DMA_MAPPING_ERROR;
+
+diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
+index 22f13abbe9130..097316a741268 100644
+--- a/drivers/xen/xen-pciback/conf_space_capability.c
++++ b/drivers/xen/xen-pciback/conf_space_capability.c
+@@ -160,7 +160,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset)
+ }
+
+ out:
+- return ERR_PTR(err);
++ return err ? ERR_PTR(err) : NULL;
+ }
+
+ static const struct config_field caplist_pm[] = {
+@@ -190,13 +190,16 @@ static const struct config_field caplist_pm[] = {
+ };
+
+ static struct msi_msix_field_config {
+- u16 enable_bit; /* bit for enabling MSI/MSI-X */
+- unsigned int int_type; /* interrupt type for exclusiveness check */
++ u16 enable_bit; /* bit for enabling MSI/MSI-X */
++ u16 allowed_bits; /* bits allowed to be changed */
++ unsigned int int_type; /* interrupt type for exclusiveness check */
+ } msi_field_config = {
+ .enable_bit = PCI_MSI_FLAGS_ENABLE,
++ .allowed_bits = PCI_MSI_FLAGS_ENABLE,
+ .int_type = INTERRUPT_TYPE_MSI,
+ }, msix_field_config = {
+ .enable_bit = PCI_MSIX_FLAGS_ENABLE,
++ .allowed_bits = PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL,
+ .int_type = INTERRUPT_TYPE_MSIX,
+ };
+
+@@ -229,7 +232,7 @@ static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value,
+ return 0;
+
+ if (!dev_data->allow_interrupt_control ||
+- (new_value ^ old_value) & ~field_config->enable_bit)
++ (new_value ^ old_value) & ~field_config->allowed_bits)
+ return PCIBIOS_SET_FAILED;
+
+ if (new_value & field_config->enable_bit) {
+diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
+index e8bed1cb76ba2..df68906812315 100644
+--- a/drivers/xen/xenbus/xenbus_client.c
++++ b/drivers/xen/xenbus/xenbus_client.c
+@@ -379,7 +379,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+ unsigned int nr_pages, grant_ref_t *grefs)
+ {
+ int err;
+- int i, j;
++ unsigned int i;
++ grant_ref_t gref_head;
++
++ err = gnttab_alloc_grant_references(nr_pages, &gref_head);
++ if (err) {
++ xenbus_dev_fatal(dev, err, "granting access to ring page");
++ return err;
++ }
+
+ for (i = 0; i < nr_pages; i++) {
+ unsigned long gfn;
+@@ -389,23 +396,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
+ else
+ gfn = virt_to_gfn(vaddr);
+
+- err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0);
+- if (err < 0) {
+- xenbus_dev_fatal(dev, err,
+- "granting access to ring page");
+- goto fail;
+- }
+- grefs[i] = err;
++ grefs[i] = gnttab_claim_grant_reference(&gref_head);
++ gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
++ gfn, 0);
+
+ vaddr = vaddr + XEN_PAGE_SIZE;
+ }
+
+ return 0;
+-
+-fail:
+- for (j = 0; j < i; j++)
+- gnttab_end_foreign_access_ref(grefs[j], 0);
+- return err;
+ }
+ EXPORT_SYMBOL_GPL(xenbus_grant_ring);
+
+diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
+index 597af455a522b..0792fda49a15f 100644
+--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
++++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
+@@ -128,7 +128,7 @@ static ssize_t xenbus_file_read(struct file *filp,
+ {
+ struct xenbus_file_priv *u = filp->private_data;
+ struct read_buffer *rb;
+- unsigned i;
++ ssize_t i;
+ int ret;
+
+ mutex_lock(&u->reply_mutex);
+@@ -148,7 +148,7 @@ again:
+ rb = list_entry(u->read_buffers.next, struct read_buffer, list);
+ i = 0;
+ while (i < len) {
+- unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
++ size_t sz = min_t(size_t, len - i, rb->len - rb->cons);
+
+ ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
+
+diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
+index bd003ca8acbe9..fe360c33ce717 100644
+--- a/drivers/xen/xenbus/xenbus_probe.c
++++ b/drivers/xen/xenbus/xenbus_probe.c
+@@ -909,7 +909,7 @@ static struct notifier_block xenbus_resume_nb = {
+
+ static int __init xenbus_init(void)
+ {
+- int err = 0;
++ int err;
+ uint64_t v = 0;
+ xen_store_domain_type = XS_UNKNOWN;
+
+@@ -949,6 +949,29 @@ static int __init xenbus_init(void)
+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ if (err)
+ goto out_error;
++ /*
++ * Uninitialized hvm_params are zero and return no error.
++ * Although it is theoretically possible to have
++ * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is
++ * not zero when valid. If zero, it means that Xenstore hasn't
++ * been properly initialized. Instead of attempting to map a
++ * wrong guest physical address return error.
++ *
++ * Also recognize all bits set as an invalid value.
++ */
++ if (!v || !~v) {
++ err = -ENOENT;
++ goto out_error;
++ }
++ /* Avoid truncation on 32-bit. */
++#if BITS_PER_LONG == 32
++ if (v > ULONG_MAX) {
++ pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
++ __func__, v);
++ err = -EINVAL;
++ goto out_error;
++ }
++#endif
+ xen_store_gfn = (unsigned long)v;
+ xen_store_interface =
+ xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
+@@ -983,8 +1006,10 @@ static int __init xenbus_init(void)
+ */
+ proc_create_mount_point("xen");
+ #endif
++ return 0;
+
+ out_error:
++ xen_store_domain_type = XS_UNKNOWN;
+ return err;
+ }
+
+diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
+index 34742c6e189e3..f17c4c03db30c 100644
+--- a/drivers/xen/xlate_mmu.c
++++ b/drivers/xen/xlate_mmu.c
+@@ -261,7 +261,6 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
+
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages);
+
+ struct remap_pfn {
+ struct mm_struct *mm;
+diff --git a/fs/9p/acl.c b/fs/9p/acl.c
+index c381499f54160..da22415ed036c 100644
+--- a/fs/9p/acl.c
++++ b/fs/9p/acl.c
+@@ -123,6 +123,7 @@ static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl)
+ char *name;
+ size_t size;
+ void *buffer;
++
+ if (!acl)
+ return 0;
+
+diff --git a/fs/9p/acl.h b/fs/9p/acl.h
+index d43c8949e807b..bc87b36f529e8 100644
+--- a/fs/9p/acl.h
++++ b/fs/9p/acl.h
+@@ -15,14 +15,15 @@
+ #define FS_9P_ACL_H
+
+ #ifdef CONFIG_9P_FS_POSIX_ACL
+-extern int v9fs_get_acl(struct inode *, struct p9_fid *);
+-extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu);
+-extern int v9fs_acl_chmod(struct inode *, struct p9_fid *);
+-extern int v9fs_set_create_acl(struct inode *, struct p9_fid *,
+- struct posix_acl *, struct posix_acl *);
+-extern int v9fs_acl_mode(struct inode *dir, umode_t *modep,
+- struct posix_acl **dpacl, struct posix_acl **pacl);
+-extern void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl);
++int v9fs_get_acl(struct inode *inode, struct p9_fid *fid);
++struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type,
++ bool rcu);
++int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid);
++int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid,
++ struct posix_acl *dacl, struct posix_acl *acl);
++int v9fs_acl_mode(struct inode *dir, umode_t *modep,
++ struct posix_acl **dpacl, struct posix_acl **pacl);
++void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl);
+ #else
+ #define v9fs_iop_get_acl NULL
+ static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
+diff --git a/fs/9p/cache.c b/fs/9p/cache.c
+index 1769a44f48192..41da71320482c 100644
+--- a/fs/9p/cache.c
++++ b/fs/9p/cache.c
+@@ -19,8 +19,8 @@
+ #define CACHETAG_LEN 11
+
+ struct fscache_netfs v9fs_cache_netfs = {
+- .name = "9p",
+- .version = 0,
++ .name = "9p",
++ .version = 0,
+ };
+
+ /*
+diff --git a/fs/9p/fid.c b/fs/9p/fid.c
+index b8863dd0de5cc..c702a336837dd 100644
+--- a/fs/9p/fid.c
++++ b/fs/9p/fid.c
+@@ -96,12 +96,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
+ dentry, dentry, from_kuid(&init_user_ns, uid),
+ any);
+ ret = NULL;
+-
+- if (d_inode(dentry))
+- ret = v9fs_fid_find_inode(d_inode(dentry), uid);
+-
+ /* we'll recheck under lock if there's anything to look in */
+- if (!ret && dentry->d_fsdata) {
++ if (dentry->d_fsdata) {
+ struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata;
+ spin_lock(&dentry->d_lock);
+ hlist_for_each_entry(fid, h, dlist) {
+@@ -112,6 +108,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
+ }
+ }
+ spin_unlock(&dentry->d_lock);
++ } else {
++ if (dentry->d_inode)
++ ret = v9fs_fid_find_inode(dentry->d_inode, uid);
+ }
+
+ return ret;
+@@ -152,7 +151,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
+ const unsigned char **wnames, *uname;
+ int i, n, l, clone, access;
+ struct v9fs_session_info *v9ses;
+- struct p9_fid *fid, *old_fid = NULL;
++ struct p9_fid *fid, *old_fid;
+
+ v9ses = v9fs_dentry2v9ses(dentry);
+ access = v9ses->flags & V9FS_ACCESS_MASK;
+@@ -194,13 +193,12 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
+ if (IS_ERR(fid))
+ return fid;
+
++ refcount_inc(&fid->count);
+ v9fs_fid_add(dentry->d_sb->s_root, fid);
+ }
+ /* If we are root ourself just return that */
+- if (dentry->d_sb->s_root == dentry) {
+- refcount_inc(&fid->count);
++ if (dentry->d_sb->s_root == dentry)
+ return fid;
+- }
+ /*
+ * Do a multipath walk with attached root.
+ * When walking parent we need to make sure we
+@@ -212,6 +210,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
+ fid = ERR_PTR(n);
+ goto err_out;
+ }
++ old_fid = fid;
+ clone = 1;
+ i = 0;
+ while (i < n) {
+@@ -221,19 +220,15 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
+ * walk to ensure none of the patch component change
+ */
+ fid = p9_client_walk(fid, l, &wnames[i], clone);
++ /* non-cloning walk will return the same fid */
++ if (fid != old_fid) {
++ p9_client_clunk(old_fid);
++ old_fid = fid;
++ }
+ if (IS_ERR(fid)) {
+- if (old_fid) {
+- /*
+- * If we fail, clunk fid which are mapping
+- * to path component and not the last component
+- * of the path.
+- */
+- p9_client_clunk(old_fid);
+- }
+ kfree(wnames);
+ goto err_out;
+ }
+- old_fid = fid;
+ i += l;
+ clone = 0;
+ }
+diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
+index 2e0fa7c932db0..141067379f5e4 100644
+--- a/fs/9p/v9fs.c
++++ b/fs/9p/v9fs.c
+@@ -190,8 +190,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token, r;
++
+ if (!*p)
+ continue;
++
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_debug:
+@@ -659,6 +661,7 @@ static void v9fs_destroy_inode_cache(void)
+ static int v9fs_cache_register(void)
+ {
+ int ret;
++
+ ret = v9fs_init_inode_cache();
+ if (ret < 0)
+ return ret;
+@@ -686,6 +689,7 @@ static void v9fs_cache_unregister(void)
+ static int __init init_v9fs(void)
+ {
+ int err;
++
+ pr_info("Installing v9fs 9p2000 file system support\n");
+ /* TODO: Setup list of registered trasnport modules */
+
+diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
+index d44ade76966a0..bc417da7e9c1d 100644
+--- a/fs/9p/v9fs_vfs.h
++++ b/fs/9p/v9fs_vfs.h
+@@ -44,9 +44,10 @@ extern struct kmem_cache *v9fs_inode_cache;
+
+ struct inode *v9fs_alloc_inode(struct super_block *sb);
+ void v9fs_free_inode(struct inode *inode);
+-struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t);
++struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode,
++ dev_t rdev);
+ int v9fs_init_inode(struct v9fs_session_info *v9ses,
+- struct inode *inode, umode_t mode, dev_t);
++ struct inode *inode, umode_t mode, dev_t rdev);
+ void v9fs_evict_inode(struct inode *inode);
+ ino_t v9fs_qid2ino(struct p9_qid *qid);
+ void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
+@@ -59,8 +60,8 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
+ int v9fs_uflags2omode(int uflags, int extended);
+
+ void v9fs_blank_wstat(struct p9_wstat *wstat);
+-int v9fs_vfs_setattr_dotl(struct user_namespace *, struct dentry *,
+- struct iattr *);
++int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
++ struct dentry *dentry, struct iattr *iattr);
+ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
+ int datasync);
+ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
+@@ -68,9 +69,9 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
+ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
+ {
+ struct v9fs_inode *v9inode;
++
+ v9inode = V9FS_I(inode);
+ v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
+- return;
+ }
+
+ int v9fs_open_to_dotl_flags(int flags);
+diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
+index 1c4f1b39cc950..606d33ef35c66 100644
+--- a/fs/9p/vfs_addr.c
++++ b/fs/9p/vfs_addr.c
+@@ -242,11 +242,13 @@ v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+ loff_t pos = iocb->ki_pos;
+ ssize_t n;
+ int err = 0;
++
+ if (iov_iter_rw(iter) == WRITE) {
+ n = p9_client_write(file->private_data, pos, iter, &err);
+ if (n) {
+ struct inode *inode = file_inode(file);
+ loff_t i_size = i_size_read(inode);
++
+ if (pos + n > i_size)
+ inode_add_bytes(inode, pos + n - i_size);
+ }
+@@ -257,7 +259,7 @@ v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+ }
+
+ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
+- loff_t pos, unsigned len, unsigned flags,
++ loff_t pos, unsigned int len, unsigned int flags,
+ struct page **pagep, void **fsdata)
+ {
+ int retval = 0;
+@@ -293,7 +295,7 @@ out:
+ }
+
+ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
+- loff_t pos, unsigned len, unsigned copied,
++ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+ {
+ loff_t last_pos = pos + copied;
+diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
+index 4b4292123b3d1..c2736af97884f 100644
+--- a/fs/9p/vfs_dentry.c
++++ b/fs/9p/vfs_dentry.c
+@@ -52,6 +52,7 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
+ static void v9fs_dentry_release(struct dentry *dentry)
+ {
+ struct hlist_node *p, *n;
++
+ p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
+ dentry, dentry);
+ hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata)
+@@ -76,6 +77,7 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+ if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
+ int retval;
+ struct v9fs_session_info *v9ses;
++
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
+index 246235ebdb70a..7437b185fa8eb 100644
+--- a/fs/9p/vfs_file.c
++++ b/fs/9p/vfs_file.c
+@@ -408,6 +408,7 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ struct inode *inode = file_inode(file);
+ loff_t i_size;
+ unsigned long pg_start, pg_end;
++
+ pg_start = origin >> PAGE_SHIFT;
+ pg_end = (origin + retval - 1) >> PAGE_SHIFT;
+ if (inode->i_mapping && inode->i_mapping->nrpages)
+diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
+index 08f48b70a7414..0d9b7d453a877 100644
+--- a/fs/9p/vfs_inode.c
++++ b/fs/9p/vfs_inode.c
+@@ -49,6 +49,7 @@ static const struct inode_operations v9fs_symlink_inode_operations;
+ static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode)
+ {
+ int res;
++
+ res = mode & 0777;
+ if (S_ISDIR(mode))
+ res |= P9_DMDIR;
+@@ -223,6 +224,7 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
+ struct inode *v9fs_alloc_inode(struct super_block *sb)
+ {
+ struct v9fs_inode *v9inode;
++
+ v9inode = kmem_cache_alloc(v9fs_inode_cache, GFP_KERNEL);
+ if (!v9inode)
+ return NULL;
+@@ -251,7 +253,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
+ {
+ int err = 0;
+
+- inode_init_owner(&init_user_ns,inode, NULL, mode);
++ inode_init_owner(&init_user_ns, inode, NULL, mode);
+ inode->i_blocks = 0;
+ inode->i_rdev = rdev;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+@@ -440,7 +442,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+- int (*test)(struct inode *, void *);
++ int (*test)(struct inode *inode, void *data);
+
+ if (new)
+ test = v9fs_test_new_inode;
+@@ -499,8 +501,10 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ static int v9fs_at_to_dotl_flags(int flags)
+ {
+ int rflags = 0;
++
+ if (flags & AT_REMOVEDIR)
+ rflags |= P9_DOTL_AT_REMOVEDIR;
++
+ return rflags;
+ }
+
+@@ -797,7 +801,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+
+ static int
+ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
+- struct file *file, unsigned flags, umode_t mode)
++ struct file *file, unsigned int flags, umode_t mode)
+ {
+ int err;
+ u32 perm;
+@@ -1084,7 +1088,7 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
+ fid = v9fs_fid_lookup(dentry);
+ use_dentry = 1;
+ }
+- if(IS_ERR(fid))
++ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ v9fs_blank_wstat(&wstat);
+@@ -1228,15 +1232,15 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry,
+ return ERR_PTR(-ECHILD);
+
+ v9ses = v9fs_dentry2v9ses(dentry);
+- fid = v9fs_fid_lookup(dentry);
++ if (!v9fs_proto_dotu(v9ses))
++ return ERR_PTR(-EBADF);
++
+ p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
++ fid = v9fs_fid_lookup(dentry);
+
+ if (IS_ERR(fid))
+ return ERR_CAST(fid);
+
+- if (!v9fs_proto_dotu(v9ses))
+- return ERR_PTR(-EBADF);
+-
+ st = p9_client_stat(fid);
+ p9_client_clunk(fid);
+ if (IS_ERR(st))
+@@ -1364,7 +1368,7 @@ v9fs_vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1];
+ u32 perm;
+
+- p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
++ p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n",
+ dir->i_ino, dentry, mode,
+ MAJOR(rdev), MINOR(rdev));
+
+diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
+index 01b9e1281a297..0f73aa26ddf4b 100644
+--- a/fs/9p/vfs_inode_dotl.c
++++ b/fs/9p/vfs_inode_dotl.c
+@@ -107,7 +107,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+- int (*test)(struct inode *, void *);
++ int (*test)(struct inode *inode, void *data);
+
+ if (new)
+ test = v9fs_test_new_inode_dotl;
+@@ -230,7 +230,7 @@ v9fs_vfs_create_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+
+ static int
+ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+- struct file *file, unsigned flags, umode_t omode)
++ struct file *file, unsigned int flags, umode_t omode)
+ {
+ int err = 0;
+ kgid_t gid;
+@@ -261,7 +261,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+ v9ses = v9fs_inode2v9ses(dir);
+
+ name = dentry->d_name.name;
+- p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
++ p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%x\n",
+ name, flags, omode);
+
+ dfid = v9fs_parent_fid(dentry);
+@@ -276,6 +276,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+ if (IS_ERR(ofid)) {
+ err = PTR_ERR(ofid);
+ p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
++ p9_client_clunk(dfid);
+ goto out;
+ }
+
+@@ -287,6 +288,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+ if (err) {
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
+ err);
++ p9_client_clunk(dfid);
+ goto error;
+ }
+ err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
+@@ -294,6 +296,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+ if (err < 0) {
+ p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
+ err);
++ p9_client_clunk(dfid);
+ goto error;
+ }
+ v9fs_invalidate_inode_attr(dir);
+@@ -553,7 +556,10 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
+ {
+ int retval, use_dentry = 0;
+ struct p9_fid *fid = NULL;
+- struct p9_iattr_dotl p9attr;
++ struct p9_iattr_dotl p9attr = {
++ .uid = INVALID_UID,
++ .gid = INVALID_GID,
++ };
+ struct inode *inode = d_inode(dentry);
+
+ p9_debug(P9_DEBUG_VFS, "\n");
+@@ -563,14 +569,22 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
+ return retval;
+
+ p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
+- p9attr.mode = iattr->ia_mode;
+- p9attr.uid = iattr->ia_uid;
+- p9attr.gid = iattr->ia_gid;
+- p9attr.size = iattr->ia_size;
+- p9attr.atime_sec = iattr->ia_atime.tv_sec;
+- p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+- p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+- p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
++ if (iattr->ia_valid & ATTR_MODE)
++ p9attr.mode = iattr->ia_mode;
++ if (iattr->ia_valid & ATTR_UID)
++ p9attr.uid = iattr->ia_uid;
++ if (iattr->ia_valid & ATTR_GID)
++ p9attr.gid = iattr->ia_gid;
++ if (iattr->ia_valid & ATTR_SIZE)
++ p9attr.size = iattr->ia_size;
++ if (iattr->ia_valid & ATTR_ATIME_SET) {
++ p9attr.atime_sec = iattr->ia_atime.tv_sec;
++ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
++ }
++ if (iattr->ia_valid & ATTR_MTIME_SET) {
++ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
++ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
++ }
+
+ if (iattr->ia_valid & ATTR_FILE) {
+ fid = iattr->ia_file->private_data;
+@@ -807,6 +821,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ /* Get the latest stat info from server. */
+ struct p9_fid *fid;
++
+ fid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+@@ -843,7 +858,7 @@ v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+ struct p9_qid qid;
+ struct posix_acl *dacl = NULL, *pacl = NULL;
+
+- p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
++ p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n",
+ dir->i_ino, dentry, omode,
+ MAJOR(rdev), MINOR(rdev));
+
+diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
+index 5fce6e30bc5ae..7449f7fd47d22 100644
+--- a/fs/9p/vfs_super.c
++++ b/fs/9p/vfs_super.c
+@@ -113,7 +113,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
+ struct inode *inode = NULL;
+ struct dentry *root = NULL;
+ struct v9fs_session_info *v9ses = NULL;
+- umode_t mode = S_IRWXUGO | S_ISVTX;
++ umode_t mode = 0777 | S_ISVTX;
+ struct p9_fid *fid;
+ int retval = 0;
+
+@@ -157,6 +157,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
+ sb->s_root = root;
+ if (v9fs_proto_dotl(v9ses)) {
+ struct p9_stat_dotl *st = NULL;
++
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+@@ -167,6 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
+ kfree(st);
+ } else {
+ struct p9_wstat *st = NULL;
++
+ st = p9_client_stat(fid);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+@@ -275,12 +277,13 @@ done:
+ static int v9fs_drop_inode(struct inode *inode)
+ {
+ struct v9fs_session_info *v9ses;
++
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ return generic_drop_inode(inode);
+ /*
+ * in case of non cached mode always drop the
+- * the inode because we want the inode attribute
++ * inode because we want the inode attribute
+ * to always match that on the server.
+ */
+ return 1;
+diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
+index c63c3bea5de55..9b28842c63633 100644
+--- a/fs/9p/xattr.h
++++ b/fs/9p/xattr.h
+@@ -22,13 +22,14 @@ extern const struct xattr_handler *v9fs_xattr_handlers[];
+ extern const struct xattr_handler v9fs_xattr_acl_access_handler;
+ extern const struct xattr_handler v9fs_xattr_acl_default_handler;
+
+-extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
+- void *, size_t);
+-extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
+- void *, size_t);
+-extern int v9fs_fid_xattr_set(struct p9_fid *, const char *,
+- const void *, size_t, int);
+-extern int v9fs_xattr_set(struct dentry *, const char *,
+- const void *, size_t, int);
+-extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t);
++ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
++ void *buffer, size_t buffer_size);
++ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
++ void *buffer, size_t buffer_size);
++int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
++ const void *value, size_t value_len, int flags);
++int v9fs_xattr_set(struct dentry *dentry, const char *name,
++ const void *value, size_t value_len, int flags);
++ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer,
++ size_t buffer_size);
+ #endif /* FS_9P_XATTR_H */
+diff --git a/fs/Makefile b/fs/Makefile
+index 84c5e4cdfee5a..d504be65a210a 100644
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -32,8 +32,6 @@ obj-$(CONFIG_TIMERFD) += timerfd.o
+ obj-$(CONFIG_EVENTFD) += eventfd.o
+ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
+ obj-$(CONFIG_AIO) += aio.o
+-obj-$(CONFIG_IO_URING) += io_uring.o
+-obj-$(CONFIG_IO_WQ) += io-wq.o
+ obj-$(CONFIG_FS_DAX) += dax.o
+ obj-$(CONFIG_FS_ENCRYPTION) += crypto/
+ obj-$(CONFIG_FS_VERITY) += verity/
+diff --git a/fs/affs/file.c b/fs/affs/file.c
+index 75ebd2b576ca4..25d480ea797bd 100644
+--- a/fs/affs/file.c
++++ b/fs/affs/file.c
+@@ -881,7 +881,7 @@ affs_truncate(struct inode *inode)
+ if (inode->i_size > AFFS_I(inode)->mmu_private) {
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+- void *fsdata;
++ void *fsdata = NULL;
+ loff_t isize = inode->i_size;
+ int res;
+
+diff --git a/fs/afs/dir.c b/fs/afs/dir.c
+index 4579bbda46346..cec18f9f8bd7a 100644
+--- a/fs/afs/dir.c
++++ b/fs/afs/dir.c
+@@ -486,8 +486,11 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+ }
+
+ /* skip if starts before the current position */
+- if (offset < curr)
++ if (offset < curr) {
++ if (next > curr)
++ ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ continue;
++ }
+
+ /* found the next entry */
+ if (!dir_emit(ctx, dire->u.name, nlen,
+@@ -1391,6 +1394,7 @@ static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ op->dentry = dentry;
+ op->create.mode = S_IFDIR | mode;
+ op->create.reason = afs_edit_dir_for_mkdir;
++ op->mtime = current_time(dir);
+ op->ops = &afs_mkdir_operation;
+ return afs_do_sync_operation(op);
+ }
+@@ -1694,6 +1698,7 @@ static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ op->dentry = dentry;
+ op->create.mode = S_IFREG | mode;
+ op->create.reason = afs_edit_dir_for_create;
++ op->mtime = current_time(dir);
+ op->ops = &afs_create_operation;
+ return afs_do_sync_operation(op);
+
+@@ -1829,6 +1834,7 @@ static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ op->ops = &afs_symlink_operation;
+ op->create.reason = afs_edit_dir_for_symlink;
+ op->create.symlink = content;
++ op->mtime = current_time(dir);
+ return afs_do_sync_operation(op);
+
+ error:
+diff --git a/fs/afs/file.c b/fs/afs/file.c
+index e6c447ae91f38..b165377179c3c 100644
+--- a/fs/afs/file.c
++++ b/fs/afs/file.c
+@@ -502,8 +502,9 @@ static void afs_add_open_mmap(struct afs_vnode *vnode)
+ if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
+ down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+
+- list_add_tail(&vnode->cb_mmap_link,
+- &vnode->volume->cell->fs_open_mmaps);
++ if (list_empty(&vnode->cb_mmap_link))
++ list_add_tail(&vnode->cb_mmap_link,
++ &vnode->volume->cell->fs_open_mmaps);
+
+ up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+ }
+diff --git a/fs/afs/flock.c b/fs/afs/flock.c
+index c4210a3964d8b..bbcc5afd15760 100644
+--- a/fs/afs/flock.c
++++ b/fs/afs/flock.c
+@@ -76,7 +76,7 @@ void afs_lock_op_done(struct afs_call *call)
+ if (call->error == 0) {
+ spin_lock(&vnode->lock);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
+- vnode->locked_at = call->reply_time;
++ vnode->locked_at = call->issue_time;
+ afs_schedule_lock_extension(vnode);
+ spin_unlock(&vnode->lock);
+ }
+diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
+index c0031a3ab42f5..daaf3810cc925 100644
+--- a/fs/afs/fs_probe.c
++++ b/fs/afs/fs_probe.c
+@@ -167,8 +167,8 @@ responded:
+ clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
+ }
+
+- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
+- rtt_us < server->probe.rtt) {
++ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
++ if (rtt_us < server->probe.rtt) {
+ server->probe.rtt = rtt_us;
+ server->rtt = rtt_us;
+ alist->preferred = index;
+@@ -366,12 +366,15 @@ void afs_fs_probe_dispatcher(struct work_struct *work)
+ unsigned long nowj, timer_at, poll_at;
+ bool first_pass = true, set_timer = false;
+
+- if (!net->live)
++ if (!net->live) {
++ afs_dec_servers_outstanding(net);
+ return;
++ }
+
+ _enter("");
+
+ if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
++ afs_dec_servers_outstanding(net);
+ _leave(" [none]");
+ return;
+ }
+diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
+index 4943413d9c5f7..7d37f63ef0f09 100644
+--- a/fs/afs/fsclient.c
++++ b/fs/afs/fsclient.c
+@@ -131,7 +131,7 @@ bad:
+
+ static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
+ {
+- return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry;
++ return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry;
+ }
+
+ static void xdr_decode_AFSCallBack(const __be32 **_bp,
+diff --git a/fs/afs/inode.c b/fs/afs/inode.c
+index 8fcffea2daf50..91b1f8cabd58f 100644
+--- a/fs/afs/inode.c
++++ b/fs/afs/inode.c
+@@ -219,6 +219,7 @@ static void afs_apply_status(struct afs_operation *op,
+ set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+ }
+ change_size = true;
++ data_changed = true;
+ } else if (vnode->status.type == AFS_FTYPE_DIR) {
+ /* Expected directory change is handled elsewhere so
+ * that we can locally edit the directory and save on a
+@@ -728,10 +729,23 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ {
+ struct inode *inode = d_inode(path->dentry);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+- int seq = 0;
++ struct key *key;
++ int ret, seq = 0;
+
+ _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
+
++ if (vnode->volume &&
++ !(query_flags & AT_STATX_DONT_SYNC) &&
++ !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
++ key = afs_request_key(vnode->volume->cell);
++ if (IS_ERR(key))
++ return PTR_ERR(key);
++ ret = afs_validate(vnode, key);
++ key_put(key);
++ if (ret < 0)
++ return ret;
++ }
++
+ do {
+ read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ generic_fillattr(&init_user_ns, inode, stat);
+diff --git a/fs/afs/internal.h b/fs/afs/internal.h
+index 0ad97a8fc0d49..567e61b553f56 100644
+--- a/fs/afs/internal.h
++++ b/fs/afs/internal.h
+@@ -138,7 +138,6 @@ struct afs_call {
+ bool need_attention; /* T if RxRPC poked us */
+ bool async; /* T if asynchronous */
+ bool upgrade; /* T to request service upgrade */
+- bool have_reply_time; /* T if have got reply_time */
+ bool intr; /* T if interruptible */
+ bool unmarshalling_error; /* T if an unmarshalling error occurred */
+ u16 service_id; /* Actual service ID (after upgrade) */
+@@ -152,7 +151,7 @@ struct afs_call {
+ } __attribute__((packed));
+ __be64 tmp64;
+ };
+- ktime_t reply_time; /* Time of first reply packet */
++ ktime_t issue_time; /* Time of issue of operation */
+ };
+
+ struct afs_call_type {
+diff --git a/fs/afs/misc.c b/fs/afs/misc.c
+index 1d1a8debe4723..805328ca54284 100644
+--- a/fs/afs/misc.c
++++ b/fs/afs/misc.c
+@@ -69,6 +69,7 @@ int afs_abort_to_error(u32 abort_code)
+ /* Unified AFS error table */
+ case UAEPERM: return -EPERM;
+ case UAENOENT: return -ENOENT;
++ case UAEAGAIN: return -EAGAIN;
+ case UAEACCES: return -EACCES;
+ case UAEBUSY: return -EBUSY;
+ case UAEEXIST: return -EEXIST;
+@@ -163,8 +164,11 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
+ return;
+
+ case -ECONNABORTED:
++ error = afs_abort_to_error(abort_code);
++ fallthrough;
++ case -ENETRESET: /* Responded, but we seem to have changed address */
+ e->responded = true;
+- e->error = afs_abort_to_error(abort_code);
++ e->error = error;
+ return;
+ }
+ }
+diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
+index 79e1a5f6701be..a840c3588ebbb 100644
+--- a/fs/afs/rotate.c
++++ b/fs/afs/rotate.c
+@@ -292,6 +292,10 @@ bool afs_select_fileserver(struct afs_operation *op)
+ op->error = error;
+ goto iterate_address;
+
++ case -ENETRESET:
++ pr_warn("kAFS: Peer reset %s (op=%x)\n",
++ op->type ? op->type->name : "???", op->debug_id);
++ fallthrough;
+ case -ECONNRESET:
+ _debug("call reset");
+ op->error = error;
+diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
+index 23a1a92d64bb5..e3de7fea36435 100644
+--- a/fs/afs/rxrpc.c
++++ b/fs/afs/rxrpc.c
+@@ -347,6 +347,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
+ if (call->max_lifespan)
+ rxrpc_kernel_set_max_life(call->net->socket, rxcall,
+ call->max_lifespan);
++ call->issue_time = ktime_get_real();
+
+ /* send the request */
+ iov[0].iov_base = call->request;
+@@ -497,12 +498,6 @@ static void afs_deliver_to_call(struct afs_call *call)
+ return;
+ }
+
+- if (!call->have_reply_time &&
+- rxrpc_kernel_get_reply_time(call->net->socket,
+- call->rxcall,
+- &call->reply_time))
+- call->have_reply_time = true;
+-
+ ret = call->type->deliver(call);
+ state = READ_ONCE(call->state);
+ if (ret == 0 && call->unmarshalling_error)
+@@ -537,6 +532,8 @@ static void afs_deliver_to_call(struct afs_call *call)
+ case -ENODATA:
+ case -EBADMSG:
+ case -EMSGSIZE:
++ case -ENOMEM:
++ case -EFAULT:
+ abort_code = RXGEN_CC_UNMARSHAL;
+ if (state != AFS_CALL_CL_AWAIT_REPLY)
+ abort_code = RXGEN_SS_UNMARSHAL;
+@@ -544,7 +541,7 @@ static void afs_deliver_to_call(struct afs_call *call)
+ abort_code, ret, "KUM");
+ goto local_abort;
+ default:
+- abort_code = RX_USER_ABORT;
++ abort_code = RX_CALL_DEAD;
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, ret, "KER");
+ goto local_abort;
+@@ -836,7 +833,7 @@ void afs_send_empty_reply(struct afs_call *call)
+ case -ENOMEM:
+ _debug("oom");
+ rxrpc_kernel_abort_call(net->socket, call->rxcall,
+- RX_USER_ABORT, -ENOMEM, "KOO");
++ RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ fallthrough;
+ default:
+ _leave(" [error]");
+@@ -878,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
+ if (n == -ENOMEM) {
+ _debug("oom");
+ rxrpc_kernel_abort_call(net->socket, call->rxcall,
+- RX_USER_ABORT, -ENOMEM, "KOO");
++ RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ }
+ _leave(" [error]");
+ }
+diff --git a/fs/afs/super.c b/fs/afs/super.c
+index d110def8aa8eb..34c68724c98be 100644
+--- a/fs/afs/super.c
++++ b/fs/afs/super.c
+@@ -667,6 +667,7 @@ static void afs_i_init_once(void *_vnode)
+ INIT_LIST_HEAD(&vnode->pending_locks);
+ INIT_LIST_HEAD(&vnode->granted_locks);
+ INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
++ INIT_LIST_HEAD(&vnode->cb_mmap_link);
+ seqlock_init(&vnode->cb_lock);
+ }
+
+diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
+index d1c7068b4346f..58452b86e6727 100644
+--- a/fs/afs/vl_probe.c
++++ b/fs/afs/vl_probe.c
+@@ -115,8 +115,8 @@ responded:
+ }
+ }
+
+- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
+- rtt_us < server->probe.rtt) {
++ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
++ if (rtt_us < server->probe.rtt) {
+ server->probe.rtt = rtt_us;
+ server->rtt = rtt_us;
+ alist->preferred = index;
+diff --git a/fs/afs/write.c b/fs/afs/write.c
+index f24370f5c7744..a75c4742062aa 100644
+--- a/fs/afs/write.c
++++ b/fs/afs/write.c
+@@ -626,6 +626,7 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
++ case -ENETRESET:
+ afs_redirty_pages(wbc, mapping, start, len);
+ mapping_set_error(mapping, ret);
+ break;
+diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
+index 2b35cba8ad62b..88ea20e79ae27 100644
+--- a/fs/afs/yfsclient.c
++++ b/fs/afs/yfsclient.c
+@@ -239,8 +239,7 @@ static void xdr_decode_YFSCallBack(const __be32 **_bp,
+ struct afs_callback *cb = &scb->callback;
+ ktime_t cb_expiry;
+
+- cb_expiry = call->reply_time;
+- cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100);
++ cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100);
+ cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC);
+ scb->have_cb = true;
+ *_bp += xdr_size(x);
+diff --git a/fs/aio.c b/fs/aio.c
+index 51b08ab01dffc..e88fd9b58f3f1 100644
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -181,8 +181,9 @@ struct poll_iocb {
+ struct file *file;
+ struct wait_queue_head *head;
+ __poll_t events;
+- bool done;
+ bool cancelled;
++ bool work_scheduled;
++ bool work_need_resched;
+ struct wait_queue_entry wait;
+ struct work_struct work;
+ };
+@@ -333,6 +334,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
+ spin_lock(&mm->ioctx_lock);
+ rcu_read_lock();
+ table = rcu_dereference(mm->ioctx_table);
++ if (!table)
++ goto out_unlock;
++
+ for (i = 0; i < table->nr; i++) {
+ struct kioctx *ctx;
+
+@@ -346,6 +350,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
+ }
+ }
+
++out_unlock:
+ rcu_read_unlock();
+ spin_unlock(&mm->ioctx_lock);
+ return res;
+@@ -1620,6 +1625,51 @@ static void aio_poll_put_work(struct work_struct *work)
+ iocb_put(iocb);
+ }
+
++/*
++ * Safely lock the waitqueue which the request is on, synchronizing with the
++ * case where the ->poll() provider decides to free its waitqueue early.
++ *
++ * Returns true on success, meaning that req->head->lock was locked, req->wait
++ * is on req->head, and an RCU read lock was taken. Returns false if the
++ * request was already removed from its waitqueue (which might no longer exist).
++ */
++static bool poll_iocb_lock_wq(struct poll_iocb *req)
++{
++ wait_queue_head_t *head;
++
++ /*
++ * While we hold the waitqueue lock and the waitqueue is nonempty,
++ * wake_up_pollfree() will wait for us. However, taking the waitqueue
++ * lock in the first place can race with the waitqueue being freed.
++ *
++ * We solve this as eventpoll does: by taking advantage of the fact that
++ * all users of wake_up_pollfree() will RCU-delay the actual free. If
++ * we enter rcu_read_lock() and see that the pointer to the queue is
++ * non-NULL, we can then lock it without the memory being freed out from
++ * under us, then check whether the request is still on the queue.
++ *
++ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
++ * case the caller deletes the entry from the queue, leaving it empty.
++ * In that case, only RCU prevents the queue memory from being freed.
++ */
++ rcu_read_lock();
++ head = smp_load_acquire(&req->head);
++ if (head) {
++ spin_lock(&head->lock);
++ if (!list_empty(&req->wait.entry))
++ return true;
++ spin_unlock(&head->lock);
++ }
++ rcu_read_unlock();
++ return false;
++}
++
++static void poll_iocb_unlock_wq(struct poll_iocb *req)
++{
++ spin_unlock(&req->head->lock);
++ rcu_read_unlock();
++}
++
+ static void aio_poll_complete_work(struct work_struct *work)
+ {
+ struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+@@ -1639,14 +1689,27 @@ static void aio_poll_complete_work(struct work_struct *work)
+ * avoid further branches in the fast path.
+ */
+ spin_lock_irq(&ctx->ctx_lock);
+- if (!mask && !READ_ONCE(req->cancelled)) {
+- add_wait_queue(req->head, &req->wait);
+- spin_unlock_irq(&ctx->ctx_lock);
+- return;
+- }
++ if (poll_iocb_lock_wq(req)) {
++ if (!mask && !READ_ONCE(req->cancelled)) {
++ /*
++ * The request isn't actually ready to be completed yet.
++ * Reschedule completion if another wakeup came in.
++ */
++ if (req->work_need_resched) {
++ schedule_work(&req->work);
++ req->work_need_resched = false;
++ } else {
++ req->work_scheduled = false;
++ }
++ poll_iocb_unlock_wq(req);
++ spin_unlock_irq(&ctx->ctx_lock);
++ return;
++ }
++ list_del_init(&req->wait.entry);
++ poll_iocb_unlock_wq(req);
++ } /* else, POLLFREE has freed the waitqueue, so we must complete */
+ list_del_init(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+- req->done = true;
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ iocb_put(iocb);
+@@ -1658,13 +1721,14 @@ static int aio_poll_cancel(struct kiocb *iocb)
+ struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+ struct poll_iocb *req = &aiocb->poll;
+
+- spin_lock(&req->head->lock);
+- WRITE_ONCE(req->cancelled, true);
+- if (!list_empty(&req->wait.entry)) {
+- list_del_init(&req->wait.entry);
+- schedule_work(&aiocb->poll.work);
+- }
+- spin_unlock(&req->head->lock);
++ if (poll_iocb_lock_wq(req)) {
++ WRITE_ONCE(req->cancelled, true);
++ if (!req->work_scheduled) {
++ schedule_work(&aiocb->poll.work);
++ req->work_scheduled = true;
++ }
++ poll_iocb_unlock_wq(req);
++ } /* else, the request was force-cancelled by POLLFREE already */
+
+ return 0;
+ }
+@@ -1681,21 +1745,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ if (mask && !(mask & req->events))
+ return 0;
+
+- list_del_init(&req->wait.entry);
+-
+- if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
++ /*
++ * Complete the request inline if possible. This requires that three
++ * conditions be met:
++ * 1. An event mask must have been passed. If a plain wakeup was done
++ * instead, then mask == 0 and we have to call vfs_poll() to get
++ * the events, so inline completion isn't possible.
++ * 2. The completion work must not have already been scheduled.
++ * 3. ctx_lock must not be busy. We have to use trylock because we
++ * already hold the waitqueue lock, so this inverts the normal
++ * locking order. Use irqsave/irqrestore because not all
++ * filesystems (e.g. fuse) call this function with IRQs disabled,
++ * yet IRQs have to be disabled before ctx_lock is obtained.
++ */
++ if (mask && !req->work_scheduled &&
++ spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+ struct kioctx *ctx = iocb->ki_ctx;
+
+- /*
+- * Try to complete the iocb inline if we can. Use
+- * irqsave/irqrestore because not all filesystems (e.g. fuse)
+- * call this function with IRQs disabled and because IRQs
+- * have to be disabled before ctx_lock is obtained.
+- */
++ list_del_init(&req->wait.entry);
+ list_del(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+- req->done = true;
+- if (iocb->ki_eventfd && eventfd_signal_allowed()) {
++ if (iocb->ki_eventfd && !eventfd_signal_allowed()) {
+ iocb = NULL;
+ INIT_WORK(&req->work, aio_poll_put_work);
+ schedule_work(&req->work);
+@@ -1704,7 +1774,43 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ if (iocb)
+ iocb_put(iocb);
+ } else {
+- schedule_work(&req->work);
++ /*
++ * Schedule the completion work if needed. If it was already
++ * scheduled, record that another wakeup came in.
++ *
++ * Don't remove the request from the waitqueue here, as it might
++ * not actually be complete yet (we won't know until vfs_poll()
++ * is called), and we must not miss any wakeups. POLLFREE is an
++ * exception to this; see below.
++ */
++ if (req->work_scheduled) {
++ req->work_need_resched = true;
++ } else {
++ schedule_work(&req->work);
++ req->work_scheduled = true;
++ }
++
++ /*
++ * If the waitqueue is being freed early but we can't complete
++ * the request inline, we have to tear down the request as best
++ * we can. That means immediately removing the request from its
++ * waitqueue and preventing all further accesses to the
++ * waitqueue via the request. We also need to schedule the
++ * completion work (done above). Also mark the request as
++ * cancelled, to potentially skip an unneeded call to ->poll().
++ */
++ if (mask & POLLFREE) {
++ WRITE_ONCE(req->cancelled, true);
++ list_del_init(&req->wait.entry);
++
++ /*
++ * Careful: this *must* be the last step, since as soon
++ * as req->head is NULL'ed out, the request can be
++ * completed and freed, since aio_poll_complete_work()
++ * will no longer need to take the waitqueue lock.
++ */
++ smp_store_release(&req->head, NULL);
++ }
+ }
+ return 1;
+ }
+@@ -1712,6 +1818,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ struct aio_poll_table {
+ struct poll_table_struct pt;
+ struct aio_kiocb *iocb;
++ bool queued;
+ int error;
+ };
+
+@@ -1722,11 +1829,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+ struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
+
+ /* multiple wait queues per file are not supported */
+- if (unlikely(pt->iocb->poll.head)) {
++ if (unlikely(pt->queued)) {
+ pt->error = -EINVAL;
+ return;
+ }
+
++ pt->queued = true;
+ pt->error = 0;
+ pt->iocb->poll.head = head;
+ add_wait_queue(head, &pt->iocb->poll.wait);
+@@ -1751,12 +1859,14 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
+ req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+
+ req->head = NULL;
+- req->done = false;
+ req->cancelled = false;
++ req->work_scheduled = false;
++ req->work_need_resched = false;
+
+ apt.pt._qproc = aio_poll_queue_proc;
+ apt.pt._key = req->events;
+ apt.iocb = aiocb;
++ apt.queued = false;
+ apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+
+ /* initialized the list so that we can do list_empty checks */
+@@ -1765,23 +1875,35 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
+
+ mask = vfs_poll(req->file, &apt.pt) & req->events;
+ spin_lock_irq(&ctx->ctx_lock);
+- if (likely(req->head)) {
+- spin_lock(&req->head->lock);
+- if (unlikely(list_empty(&req->wait.entry))) {
+- if (apt.error)
++ if (likely(apt.queued)) {
++ bool on_queue = poll_iocb_lock_wq(req);
++
++ if (!on_queue || req->work_scheduled) {
++ /*
++ * aio_poll_wake() already either scheduled the async
++ * completion work, or completed the request inline.
++ */
++ if (apt.error) /* unsupported case: multiple queues */
+ cancel = true;
+ apt.error = 0;
+ mask = 0;
+ }
+ if (mask || apt.error) {
++ /* Steal to complete synchronously. */
+ list_del_init(&req->wait.entry);
+ } else if (cancel) {
++ /* Cancel if possible (may be too late though). */
+ WRITE_ONCE(req->cancelled, true);
+- } else if (!req->done) { /* actually waiting for an event */
++ } else if (on_queue) {
++ /*
++ * Actually waiting for an event, so add the request to
++ * active_reqs so that it can be cancelled if needed.
++ */
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
+- spin_unlock(&req->head->lock);
++ if (on_queue)
++ poll_iocb_unlock_wq(req);
+ }
+ if (mask) { /* no async, we'd stolen it */
+ aiocb->ki_res.res = mangle_poll(mask);
+diff --git a/fs/attr.c b/fs/attr.c
+index 473d21b3a86de..28e953e86960f 100644
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -18,6 +18,71 @@
+ #include <linux/evm.h>
+ #include <linux/ima.h>
+
++#include "internal.h"
++
++/**
++ * setattr_should_drop_sgid - determine whether the setgid bit needs to be
++ * removed
++ * @mnt_userns: user namespace of the mount @inode was found from
++ * @inode: inode to check
++ *
++ * This function determines whether the setgid bit needs to be removed.
++ * We retain backwards compatibility and require setgid bit to be removed
++ * unconditionally if S_IXGRP is set. Otherwise we have the exact same
++ * requirements as setattr_prepare() and setattr_copy().
++ *
++ * Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise.
++ */
++int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
++ const struct inode *inode)
++{
++ umode_t mode = inode->i_mode;
++
++ if (!(mode & S_ISGID))
++ return 0;
++ if (mode & S_IXGRP)
++ return ATTR_KILL_SGID;
++ if (!in_group_or_capable(mnt_userns, inode,
++ i_gid_into_mnt(mnt_userns, inode)))
++ return ATTR_KILL_SGID;
++ return 0;
++}
++EXPORT_SYMBOL(setattr_should_drop_sgid);
++
++/**
++ * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to
++ * be dropped
++ * @mnt_userns: user namespace of the mount @inode was found from
++ * @inode: inode to check
++ *
++ * This function determines whether the set{g,u}id bits need to be removed.
++ * If the setuid bit needs to be removed ATTR_KILL_SUID is returned. If the
++ * setgid bit needs to be removed ATTR_KILL_SGID is returned. If both
++ * set{g,u}id bits need to be removed the corresponding mask of both flags is
++ * returned.
++ *
++ * Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits
++ * to remove, 0 otherwise.
++ */
++int setattr_should_drop_suidgid(struct user_namespace *mnt_userns,
++ struct inode *inode)
++{
++ umode_t mode = inode->i_mode;
++ int kill = 0;
++
++ /* suid always must be killed */
++ if (unlikely(mode & S_ISUID))
++ kill = ATTR_KILL_SUID;
++
++ kill |= setattr_should_drop_sgid(mnt_userns, inode);
++
++ if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
++ return kill;
++
++ return 0;
++}
++EXPORT_SYMBOL(setattr_should_drop_suidgid);
++
+ /**
+ * chown_ok - verify permissions to chown inode
+ * @mnt_userns: user namespace of the mount @inode was found from
+@@ -35,7 +100,7 @@ static bool chown_ok(struct user_namespace *mnt_userns,
+ kuid_t uid)
+ {
+ kuid_t kuid = i_uid_into_mnt(mnt_userns, inode);
+- if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, kuid))
++ if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, inode->i_uid))
+ return true;
+ if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
+ return true;
+@@ -61,9 +126,15 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
+ const struct inode *inode, kgid_t gid)
+ {
+ kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
+- (in_group_p(gid) || gid_eq(gid, kgid)))
+- return true;
++ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
++ kgid_t mapped_gid;
++
++ if (gid_eq(gid, inode->i_gid))
++ return true;
++ mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);
++ if (in_group_p(mapped_gid))
++ return true;
++ }
+ if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
+ return true;
+ if (gid_eq(kgid, INVALID_GID) &&
+@@ -123,12 +194,19 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
+
+ /* Make sure a caller can chmod. */
+ if (ia_valid & ATTR_MODE) {
++ kgid_t mapped_gid;
++
+ if (!inode_owner_or_capable(mnt_userns, inode))
+ return -EPERM;
++
++ if (ia_valid & ATTR_GID)
++ mapped_gid = mapped_kgid_fs(mnt_userns,
++ i_user_ns(inode), attr->ia_gid);
++ else
++ mapped_gid = i_gid_into_mnt(mnt_userns, inode);
++
+ /* Also check the setgid bit! */
+- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
+- i_gid_into_mnt(mnt_userns, inode)) &&
+- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
++ if (!in_group_or_capable(mnt_userns, inode, mapped_gid))
+ attr->ia_mode &= ~S_ISGID;
+ }
+
+@@ -170,6 +248,8 @@ EXPORT_SYMBOL(setattr_prepare);
+ */
+ int inode_newsize_ok(const struct inode *inode, loff_t offset)
+ {
++ if (offset < 0)
++ return -EINVAL;
+ if (inode->i_size < offset) {
+ unsigned long limit;
+
+@@ -241,8 +321,7 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+ kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+- if (!in_group_p(kgid) &&
+- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
++ if (!in_group_or_capable(mnt_userns, inode, kgid))
+ mode &= ~S_ISGID;
+ inode->i_mode = mode;
+ }
+@@ -367,7 +446,7 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+ }
+ }
+ if (ia_valid & ATTR_KILL_SGID) {
+- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
++ if (mode & S_ISGID) {
+ if (!(ia_valid & ATTR_MODE)) {
+ ia_valid = attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode = inode->i_mode;
+diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
+index a813b70f594e6..30379c33ad20c 100644
+--- a/fs/binfmt_elf.c
++++ b/fs/binfmt_elf.c
+@@ -170,8 +170,8 @@ static int padzero(unsigned long elf_bss)
+
+ static int
+ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
+- unsigned long load_addr, unsigned long interp_load_addr,
+- unsigned long e_entry)
++ unsigned long interp_load_addr,
++ unsigned long e_entry, unsigned long phdr_addr)
+ {
+ struct mm_struct *mm = current->mm;
+ unsigned long p = bprm->p;
+@@ -257,7 +257,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
+ NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
+ NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
+ NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
+- NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
++ NEW_AUX_ENT(AT_PHDR, phdr_addr);
+ NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
+ NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
+ NEW_AUX_ENT(AT_BASE, interp_load_addr);
+@@ -823,7 +823,7 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
+ static int load_elf_binary(struct linux_binprm *bprm)
+ {
+ struct file *interpreter = NULL; /* to shut gcc up */
+- unsigned long load_addr = 0, load_bias = 0;
++ unsigned long load_addr, load_bias = 0, phdr_addr = 0;
+ int load_addr_set = 0;
+ unsigned long error;
+ struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
+@@ -910,7 +910,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
+ interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
+ if (!interp_elf_ex) {
+ retval = -ENOMEM;
+- goto out_free_ph;
++ goto out_free_file;
+ }
+
+ /* Get the exec headers */
+@@ -1156,6 +1156,17 @@ out_free_interp:
+ reloc_func_desc = load_bias;
+ }
+ }
++
++ /*
++ * Figure out which segment in the file contains the Program
++ * Header table, and map to the associated memory address.
++ */
++ if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
++ elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
++ phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
++ elf_ppnt->p_vaddr;
++ }
++
+ k = elf_ppnt->p_vaddr;
+ if ((elf_ppnt->p_flags & PF_X) && k < start_code)
+ start_code = k;
+@@ -1191,6 +1202,7 @@ out_free_interp:
+ }
+
+ e_entry = elf_ex->e_entry + load_bias;
++ phdr_addr += load_bias;
+ elf_bss += load_bias;
+ elf_brk += load_bias;
+ start_code += load_bias;
+@@ -1254,8 +1266,8 @@ out_free_interp:
+ goto out;
+ #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
+
+- retval = create_elf_tables(bprm, elf_ex,
+- load_addr, interp_load_addr, e_entry);
++ retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
++ e_entry, phdr_addr);
+ if (retval < 0)
+ goto out;
+
+@@ -1319,6 +1331,7 @@ out:
+ out_free_dentry:
+ kfree(interp_elf_ex);
+ kfree(interp_elf_phdata);
++out_free_file:
+ allow_write_access(interpreter);
+ if (interpreter)
+ fput(interpreter);
+@@ -1606,17 +1619,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
+ * long file_ofs
+ * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
+ */
+-static int fill_files_note(struct memelfnote *note)
++static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
+ {
+- struct mm_struct *mm = current->mm;
+- struct vm_area_struct *vma;
+ unsigned count, size, names_ofs, remaining, n;
+ user_long_t *data;
+ user_long_t *start_end_ofs;
+ char *name_base, *name_curpos;
++ int i;
+
+ /* *Estimated* file count and total data size needed */
+- count = mm->map_count;
++ count = cprm->vma_count;
+ if (count > UINT_MAX / 64)
+ return -EINVAL;
+ size = count * 64;
+@@ -1638,11 +1650,12 @@ static int fill_files_note(struct memelfnote *note)
+ name_base = name_curpos = ((char *)data) + names_ofs;
+ remaining = size - names_ofs;
+ count = 0;
+- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
++ for (i = 0; i < cprm->vma_count; i++) {
++ struct core_vma_metadata *m = &cprm->vma_meta[i];
+ struct file *file;
+ const char *filename;
+
+- file = vma->vm_file;
++ file = m->file;
+ if (!file)
+ continue;
+ filename = file_path(file, name_curpos, remaining);
+@@ -1662,9 +1675,9 @@ static int fill_files_note(struct memelfnote *note)
+ memmove(name_curpos, filename, n);
+ name_curpos += n;
+
+- *start_end_ofs++ = vma->vm_start;
+- *start_end_ofs++ = vma->vm_end;
+- *start_end_ofs++ = vma->vm_pgoff;
++ *start_end_ofs++ = m->start;
++ *start_end_ofs++ = m->end;
++ *start_end_ofs++ = m->pgoff;
+ count++;
+ }
+
+@@ -1675,7 +1688,7 @@ static int fill_files_note(struct memelfnote *note)
+ * Count usually is less than mm->map_count,
+ * we need to move filenames down.
+ */
+- n = mm->map_count - count;
++ n = cprm->vma_count - count;
+ if (n != 0) {
+ unsigned shift_bytes = n * 3 * sizeof(data[0]);
+ memmove(name_base - shift_bytes, name_base,
+@@ -1787,7 +1800,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
+
+ static int fill_note_info(struct elfhdr *elf, int phdrs,
+ struct elf_note_info *info,
+- const kernel_siginfo_t *siginfo, struct pt_regs *regs)
++ struct coredump_params *cprm)
+ {
+ struct task_struct *dump_task = current;
+ const struct user_regset_view *view = task_user_regset_view(dump_task);
+@@ -1859,7 +1872,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
+ * Now fill in each thread's information.
+ */
+ for (t = info->thread; t != NULL; t = t->next)
+- if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
++ if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, &info->size))
+ return 0;
+
+ /*
+@@ -1868,13 +1881,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
+ fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
+ info->size += notesize(&info->psinfo);
+
+- fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
++ fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
+ info->size += notesize(&info->signote);
+
+ fill_auxv_note(&info->auxv, current->mm);
+ info->size += notesize(&info->auxv);
+
+- if (fill_files_note(&info->files) == 0)
++ if (fill_files_note(&info->files, cprm) == 0)
+ info->size += notesize(&info->files);
+
+ return 1;
+@@ -2016,7 +2029,7 @@ static int elf_note_info_init(struct elf_note_info *info)
+
+ static int fill_note_info(struct elfhdr *elf, int phdrs,
+ struct elf_note_info *info,
+- const kernel_siginfo_t *siginfo, struct pt_regs *regs)
++ struct coredump_params *cprm)
+ {
+ struct core_thread *ct;
+ struct elf_thread_status *ets;
+@@ -2037,13 +2050,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
+ list_for_each_entry(ets, &info->thread_list, list) {
+ int sz;
+
+- sz = elf_dump_thread_status(siginfo->si_signo, ets);
++ sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
+ info->thread_status_size += sz;
+ }
+ /* now collect the dump for the current */
+ memset(info->prstatus, 0, sizeof(*info->prstatus));
+- fill_prstatus(&info->prstatus->common, current, siginfo->si_signo);
+- elf_core_copy_regs(&info->prstatus->pr_reg, regs);
++ fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo);
++ elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
+
+ /* Set up header */
+ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
+@@ -2059,18 +2072,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
+ fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
+ sizeof(*info->psinfo), info->psinfo);
+
+- fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
++ fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
+ fill_auxv_note(info->notes + 3, current->mm);
+ info->numnote = 4;
+
+- if (fill_files_note(info->notes + info->numnote) == 0) {
++ if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
+ info->notes_files = info->notes + info->numnote;
+ info->numnote++;
+ }
+
+ /* Try to dump the FPU. */
+- info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
+- info->fpu);
++ info->prstatus->pr_fpvalid =
++ elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
+ if (info->prstatus->pr_fpvalid)
+ fill_note(info->notes + info->numnote++,
+ "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
+@@ -2156,8 +2169,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
+ static int elf_core_dump(struct coredump_params *cprm)
+ {
+ int has_dumped = 0;
+- int vma_count, segs, i;
+- size_t vma_data_size;
++ int segs, i;
+ struct elfhdr elf;
+ loff_t offset = 0, dataoff;
+ struct elf_note_info info = { };
+@@ -2165,16 +2177,12 @@ static int elf_core_dump(struct coredump_params *cprm)
+ struct elf_shdr *shdr4extnum = NULL;
+ Elf_Half e_phnum;
+ elf_addr_t e_shoff;
+- struct core_vma_metadata *vma_meta;
+-
+- if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
+- return 0;
+
+ /*
+ * The number of segs are recored into ELF header as 16bit value.
+ * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
+ */
+- segs = vma_count + elf_core_extra_phdrs();
++ segs = cprm->vma_count + elf_core_extra_phdrs();
+
+ /* for notes section */
+ segs++;
+@@ -2188,7 +2196,7 @@ static int elf_core_dump(struct coredump_params *cprm)
+ * Collect all the non-memory information about the process for the
+ * notes. This also sets up the file header.
+ */
+- if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
++ if (!fill_note_info(&elf, e_phnum, &info, cprm))
+ goto end_coredump;
+
+ has_dumped = 1;
+@@ -2213,7 +2221,7 @@ static int elf_core_dump(struct coredump_params *cprm)
+
+ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
+
+- offset += vma_data_size;
++ offset += cprm->vma_data_size;
+ offset += elf_core_extra_data_size();
+ e_shoff = offset;
+
+@@ -2233,8 +2241,8 @@ static int elf_core_dump(struct coredump_params *cprm)
+ goto end_coredump;
+
+ /* Write program headers for segments dump */
+- for (i = 0; i < vma_count; i++) {
+- struct core_vma_metadata *meta = vma_meta + i;
++ for (i = 0; i < cprm->vma_count; i++) {
++ struct core_vma_metadata *meta = cprm->vma_meta + i;
+ struct elf_phdr phdr;
+
+ phdr.p_type = PT_LOAD;
+@@ -2271,8 +2279,8 @@ static int elf_core_dump(struct coredump_params *cprm)
+ /* Align to page */
+ dump_skip_to(cprm, dataoff);
+
+- for (i = 0; i < vma_count; i++) {
+- struct core_vma_metadata *meta = vma_meta + i;
++ for (i = 0; i < cprm->vma_count; i++) {
++ struct core_vma_metadata *meta = cprm->vma_meta + i;
+
+ if (!dump_user_range(cprm, meta->start, meta->dump_size))
+ goto end_coredump;
+@@ -2289,7 +2297,6 @@ static int elf_core_dump(struct coredump_params *cprm)
+ end_coredump:
+ free_note_info(&info);
+ kfree(shdr4extnum);
+- kvfree(vma_meta);
+ kfree(phdr4note);
+ return has_dumped;
+ }
+diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
+index 6d8fd6030cbb5..c316931fc99c5 100644
+--- a/fs/binfmt_elf_fdpic.c
++++ b/fs/binfmt_elf_fdpic.c
+@@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
+ current->mm->start_stack = current->mm->start_brk + stack_size;
+ #endif
+
+- if (create_elf_fdpic_tables(bprm, current->mm,
+- &exec_params, &interp_params) < 0)
++ retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params,
++ &interp_params);
++ if (retval < 0)
+ goto error;
+
+ kdebug("- start_code %lx", current->mm->start_code);
+@@ -1465,7 +1466,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
+ static int elf_fdpic_core_dump(struct coredump_params *cprm)
+ {
+ int has_dumped = 0;
+- int vma_count, segs;
++ int segs;
+ int i;
+ struct elfhdr *elf = NULL;
+ loff_t offset = 0, dataoff;
+@@ -1480,8 +1481,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
+ elf_addr_t e_shoff;
+ struct core_thread *ct;
+ struct elf_thread_status *tmp;
+- struct core_vma_metadata *vma_meta = NULL;
+- size_t vma_data_size;
+
+ /* alloc memory for large data structures: too large to be on stack */
+ elf = kmalloc(sizeof(*elf), GFP_KERNEL);
+@@ -1491,9 +1490,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
+ if (!psinfo)
+ goto end_coredump;
+
+- if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
+- goto end_coredump;
+-
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
+@@ -1513,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
+ tmp->next = thread_list;
+ thread_list = tmp;
+
+- segs = vma_count + elf_core_extra_phdrs();
++ segs = cprm->vma_count + elf_core_extra_phdrs();
+
+ /* for notes section */
+ segs++;
+@@ -1558,7 +1554,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
+ /* Page-align dumped data */
+ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
+
+- offset += vma_data_size;
++ offset += cprm->vma_data_size;
+ offset += elf_core_extra_data_size();
+ e_shoff = offset;
+
+@@ -1578,8 +1574,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
+ goto end_coredump;
+
+ /* write program headers for segments dump */
+- for (i = 0; i < vma_count; i++) {
+- struct core_vma_metadata *meta = vma_meta + i;
++ for (i = 0; i < cprm->vma_count; i++) {
++ struct core_vma_metadata *meta = cprm->vma_meta + i;
+ struct elf_phdr phdr;
+ size_t sz;
+
+@@ -1628,7 +1624,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
+
+ dump_skip_to(cprm, dataoff);
+
+- if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count))
++ if (!elf_fdpic_dump_segments(cprm, cprm->vma_meta, cprm->vma_count))
+ goto end_coredump;
+
+ if (!elf_core_write_extra_data(cprm))
+@@ -1652,7 +1648,6 @@ end_coredump:
+ thread_list = thread_list->next;
+ kfree(tmp);
+ }
+- kvfree(vma_meta);
+ kfree(phdr4note);
+ kfree(elf);
+ kfree(psinfo);
+diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
+index 5d776f80ee50c..7ca3e0db06ffa 100644
+--- a/fs/binfmt_flat.c
++++ b/fs/binfmt_flat.c
+@@ -433,6 +433,30 @@ static void old_reloc(unsigned long rl)
+
+ /****************************************************************************/
+
++static inline u32 __user *skip_got_header(u32 __user *rp)
++{
++ if (IS_ENABLED(CONFIG_RISCV)) {
++ /*
++ * RISC-V has a 16 byte GOT PLT header for elf64-riscv
++ * and 8 byte GOT PLT header for elf32-riscv.
++ * Skip the whole GOT PLT header, since it is reserved
++ * for the dynamic linker (ld.so).
++ */
++ u32 rp_val0, rp_val1;
++
++ if (get_user(rp_val0, rp))
++ return rp;
++ if (get_user(rp_val1, rp + 1))
++ return rp;
++
++ if (rp_val0 == 0xffffffff && rp_val1 == 0xffffffff)
++ rp += 4;
++ else if (rp_val0 == 0xffffffff)
++ rp += 2;
++ }
++ return rp;
++}
++
+ static int load_flat_file(struct linux_binprm *bprm,
+ struct lib_info *libinfo, int id, unsigned long *extra_stack)
+ {
+@@ -782,7 +806,8 @@ static int load_flat_file(struct linux_binprm *bprm,
+ * image.
+ */
+ if (flags & FLAT_FLAG_GOTPIC) {
+- for (rp = (u32 __user *)datapos; ; rp++) {
++ rp = skip_got_header((u32 __user *) datapos);
++ for (; ; rp++) {
+ u32 addr, rp_val;
+ if (get_user(rp_val, rp))
+ return -EFAULT;
+diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
+index e1eae7ea823ae..bb202ad369d53 100644
+--- a/fs/binfmt_misc.c
++++ b/fs/binfmt_misc.c
+@@ -44,10 +44,10 @@ static LIST_HEAD(entries);
+ static int enabled = 1;
+
+ enum {Enabled, Magic};
+-#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
+-#define MISC_FMT_OPEN_BINARY (1 << 30)
+-#define MISC_FMT_CREDENTIALS (1 << 29)
+-#define MISC_FMT_OPEN_FILE (1 << 28)
++#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
++#define MISC_FMT_OPEN_BINARY (1UL << 30)
++#define MISC_FMT_CREDENTIALS (1UL << 29)
++#define MISC_FMT_OPEN_FILE (1UL << 28)
+
+ typedef struct {
+ struct list_head list;
+diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
+index 309516e6a9682..43c89952b7d25 100644
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -234,6 +234,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq,
+ ordered_list);
+ if (!test_bit(WORK_DONE_BIT, &work->flags))
+ break;
++ /*
++ * Orders all subsequent loads after reading WORK_DONE_BIT,
++ * paired with the smp_mb__before_atomic in btrfs_work_helper
++ * this guarantees that the ordered function will see all
++ * updates from ordinary work function.
++ */
++ smp_rmb();
+
+ /*
+ * we are going to call the ordered done function, but
+@@ -317,6 +324,13 @@ static void btrfs_work_helper(struct work_struct *normal_work)
+ thresh_exec_hook(wq);
+ work->func(work);
+ if (need_order) {
++ /*
++ * Ensures all memory accesses done in the work function are
++ * ordered before setting the WORK_DONE_BIT. Ensuring the thread
++ * which is going to executed the ordered work sees them.
++ * Pairs with the smp_rmb in run_ordered_work.
++ */
++ smp_mb__before_atomic();
+ set_bit(WORK_DONE_BIT, &work->flags);
+ run_ordered_work(wq, work);
+ } else {
+diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
+index f735b8798ba12..cd9202867d98a 100644
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -138,6 +138,7 @@ struct share_check {
+ u64 root_objectid;
+ u64 inum;
+ int share_count;
++ bool have_delayed_delete_refs;
+ };
+
+ static inline int extent_is_shared(struct share_check *sc)
+@@ -288,8 +289,10 @@ static void prelim_release(struct preftree *preftree)
+ struct prelim_ref *ref, *next_ref;
+
+ rbtree_postorder_for_each_entry_safe(ref, next_ref,
+- &preftree->root.rb_root, rbnode)
++ &preftree->root.rb_root, rbnode) {
++ free_inode_elem_list(ref->inode_list);
+ free_pref(ref);
++ }
+
+ preftree->root = RB_ROOT_CACHED;
+ preftree->count = 0;
+@@ -430,6 +433,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+ u64 wanted_disk_byte = ref->wanted_disk_byte;
+ u64 count = 0;
+ u64 data_offset;
++ u8 type;
+
+ if (level != 0) {
+ eb = path->nodes[level];
+@@ -484,6 +488,9 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+ continue;
+ }
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
++ type = btrfs_file_extent_type(eb, fi);
++ if (type == BTRFS_FILE_EXTENT_INLINE)
++ goto next;
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ data_offset = btrfs_file_extent_offset(eb, fi);
+
+@@ -647,6 +654,18 @@ unode_aux_to_inode_list(struct ulist_node *node)
+ return (struct extent_inode_elem *)(uintptr_t)node->aux;
+ }
+
++static void free_leaf_list(struct ulist *ulist)
++{
++ struct ulist_node *node;
++ struct ulist_iterator uiter;
++
++ ULIST_ITER_INIT(&uiter);
++ while ((node = ulist_next(ulist, &uiter)))
++ free_inode_elem_list(unode_aux_to_inode_list(node));
++
++ ulist_free(ulist);
++}
++
+ /*
+ * We maintain three separate rbtrees: one for direct refs, one for
+ * indirect refs which have a key, and one for indirect refs which do not
+@@ -761,7 +780,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+ cond_resched();
+ }
+ out:
+- ulist_free(parents);
++ /*
++ * We may have inode lists attached to refs in the parents ulist, so we
++ * must free them before freeing the ulist and its refs.
++ */
++ free_leaf_list(parents);
+ return ret;
+ }
+
+@@ -818,16 +841,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ struct preftrees *preftrees, struct share_check *sc)
+ {
+ struct btrfs_delayed_ref_node *node;
+- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
+ struct btrfs_key key;
+- struct btrfs_key tmp_op_key;
+ struct rb_node *n;
+ int count;
+ int ret = 0;
+
+- if (extent_op && extent_op->update_key)
+- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
+-
+ spin_lock(&head->lock);
+ for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
+ node = rb_entry(n, struct btrfs_delayed_ref_node,
+@@ -853,10 +871,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ case BTRFS_TREE_BLOCK_REF_KEY: {
+ /* NORMAL INDIRECT METADATA backref */
+ struct btrfs_delayed_tree_ref *ref;
++ struct btrfs_key *key_ptr = NULL;
++
++ if (head->extent_op && head->extent_op->update_key) {
++ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
++ key_ptr = &key;
++ }
+
+ ref = btrfs_delayed_node_to_tree_ref(node);
+ ret = add_indirect_ref(fs_info, preftrees, ref->root,
+- &tmp_op_key, ref->level + 1,
++ key_ptr, ref->level + 1,
+ node->bytenr, count, sc,
+ GFP_ATOMIC);
+ break;
+@@ -882,13 +906,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ key.offset = ref->offset;
+
+ /*
+- * Found a inum that doesn't match our known inum, we
+- * know it's shared.
++ * If we have a share check context and a reference for
++ * another inode, we can't exit immediately. This is
++ * because even if this is a BTRFS_ADD_DELAYED_REF
++ * reference we may find next a BTRFS_DROP_DELAYED_REF
++ * which cancels out this ADD reference.
++ *
++ * If this is a DROP reference and there was no previous
++ * ADD reference, then we need to signal that when we
++ * process references from the extent tree (through
++ * add_inline_refs() and add_keyed_refs()), we should
++ * not exit early if we find a reference for another
++ * inode, because one of the delayed DROP references
++ * may cancel that reference in the extent tree.
+ */
+- if (sc && sc->inum && ref->objectid != sc->inum) {
+- ret = BACKREF_FOUND_SHARED;
+- goto out;
+- }
++ if (sc && count < 0)
++ sc->have_delayed_delete_refs = true;
+
+ ret = add_indirect_ref(fs_info, preftrees, ref->root,
+ &key, 0, node->bytenr, count, sc,
+@@ -918,7 +951,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
+ }
+ if (!ret)
+ ret = extent_is_shared(sc);
+-out:
++
+ spin_unlock(&head->lock);
+ return ret;
+ }
+@@ -1021,7 +1054,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+
+- if (sc && sc->inum && key.objectid != sc->inum) {
++ if (sc && sc->inum && key.objectid != sc->inum &&
++ !sc->have_delayed_delete_refs) {
+ ret = BACKREF_FOUND_SHARED;
+ break;
+ }
+@@ -1031,6 +1065,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
+ ret = add_indirect_ref(fs_info, preftrees, root,
+ &key, 0, bytenr, count,
+ sc, GFP_NOFS);
++
+ break;
+ }
+ default:
+@@ -1120,7 +1155,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+
+- if (sc && sc->inum && key.objectid != sc->inum) {
++ if (sc && sc->inum && key.objectid != sc->inum &&
++ !sc->have_delayed_delete_refs) {
+ ret = BACKREF_FOUND_SHARED;
+ break;
+ }
+@@ -1214,7 +1250,12 @@ again:
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+- BUG_ON(ret == 0);
++ if (ret == 0) {
++ /* This shouldn't happen, indicates a bug or fs corruption. */
++ ASSERT(ret != 0);
++ ret = -EUCLEAN;
++ goto out;
++ }
+
+ #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (trans && likely(trans->type != __TRANS_DUMMY) &&
+@@ -1352,6 +1393,12 @@ again:
+ if (ret < 0)
+ goto out;
+ ref->inode_list = eie;
++ /*
++ * We transferred the list ownership to the ref,
++ * so set to NULL to avoid a double free in case
++ * an error happens after this.
++ */
++ eie = NULL;
+ }
+ ret = ulist_add_merge_ptr(refs, ref->parent,
+ ref->inode_list,
+@@ -1360,15 +1407,31 @@ again:
+ goto out;
+ if (!ret && extent_item_pos) {
+ /*
+- * we've recorded that parent, so we must extend
+- * its inode list here
++ * We've recorded that parent, so we must extend
++ * its inode list here.
++ *
++ * However if there was corruption we may not
++ * have found an eie, return an error in this
++ * case.
+ */
+- BUG_ON(!eie);
++ ASSERT(eie);
++ if (!eie) {
++ ret = -EUCLEAN;
++ goto out;
++ }
+ while (eie->next)
+ eie = eie->next;
+ eie->next = ref->inode_list;
+ }
+ eie = NULL;
++ /*
++ * We have transferred the inode list ownership from
++ * this ref to the ref we added to the 'refs' ulist.
++ * So set this ref's inode list to NULL to avoid
++ * use-after-free when our caller uses it or double
++ * frees in case an error happens before we return.
++ */
++ ref->inode_list = NULL;
+ }
+ cond_resched();
+ }
+@@ -1385,24 +1448,6 @@ out:
+ return ret;
+ }
+
+-static void free_leaf_list(struct ulist *blocks)
+-{
+- struct ulist_node *node = NULL;
+- struct extent_inode_elem *eie;
+- struct ulist_iterator uiter;
+-
+- ULIST_ITER_INIT(&uiter);
+- while ((node = ulist_next(blocks, &uiter))) {
+- if (!node->aux)
+- continue;
+- eie = unode_aux_to_inode_list(node);
+- free_inode_elem_list(eie);
+- node->aux = 0;
+- }
+-
+- ulist_free(blocks);
+-}
+-
+ /*
+ * Finds all leafs with a reference to the specified combination of bytenr and
+ * offset. key_list_head will point to a list of corresponding keys (caller must
+@@ -1534,6 +1579,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+ .root_objectid = root->root_key.objectid,
+ .inum = inum,
+ .share_count = 0,
++ .have_delayed_delete_refs = false,
+ };
+
+ ulist_init(roots);
+@@ -1568,6 +1614,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+ break;
+ bytenr = node->val;
+ shared.share_count = 0;
++ shared.have_delayed_delete_refs = false;
+ cond_resched();
+ }
+
+@@ -2017,10 +2064,29 @@ out:
+ return ret;
+ }
+
++static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
++{
++ struct btrfs_data_container *inodes = ctx;
++ const size_t c = 3 * sizeof(u64);
++
++ if (inodes->bytes_left >= c) {
++ inodes->bytes_left -= c;
++ inodes->val[inodes->elem_cnt] = inum;
++ inodes->val[inodes->elem_cnt + 1] = offset;
++ inodes->val[inodes->elem_cnt + 2] = root;
++ inodes->elem_cnt += 3;
++ } else {
++ inodes->bytes_missing += c - inodes->bytes_left;
++ inodes->bytes_left = 0;
++ inodes->elem_missed += 3;
++ }
++
++ return 0;
++}
++
+ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+- iterate_extent_inodes_t *iterate, void *ctx,
+- bool ignore_offset)
++ void *ctx, bool ignore_offset)
+ {
+ int ret;
+ u64 extent_item_pos;
+@@ -2038,7 +2104,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+ extent_item_pos = logical - found_key.objectid;
+ ret = iterate_extent_inodes(fs_info, found_key.objectid,
+ extent_item_pos, search_commit_root,
+- iterate, ctx, ignore_offset);
++ build_ino_list, ctx, ignore_offset);
+
+ return ret;
+ }
+diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
+index ba454032dbe22..2759de7d324c8 100644
+--- a/fs/btrfs/backref.h
++++ b/fs/btrfs/backref.h
+@@ -35,8 +35,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
+ bool ignore_offset);
+
+ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
+- struct btrfs_path *path,
+- iterate_extent_inodes_t *iterate, void *ctx,
++ struct btrfs_path *path, void *ctx,
+ bool ignore_offset);
+
+ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index a3b830b8410a8..4ca6828586af5 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -78,14 +78,21 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
+ }
+ allowed &= flags;
+
+- if (allowed & BTRFS_BLOCK_GROUP_RAID6)
++ /* Select the highest-redundancy RAID level. */
++ if (allowed & BTRFS_BLOCK_GROUP_RAID1C4)
++ allowed = BTRFS_BLOCK_GROUP_RAID1C4;
++ else if (allowed & BTRFS_BLOCK_GROUP_RAID6)
+ allowed = BTRFS_BLOCK_GROUP_RAID6;
++ else if (allowed & BTRFS_BLOCK_GROUP_RAID1C3)
++ allowed = BTRFS_BLOCK_GROUP_RAID1C3;
+ else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
+ allowed = BTRFS_BLOCK_GROUP_RAID5;
+ else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
+ allowed = BTRFS_BLOCK_GROUP_RAID10;
+ else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
+ allowed = BTRFS_BLOCK_GROUP_RAID1;
++ else if (allowed & BTRFS_BLOCK_GROUP_DUP)
++ allowed = BTRFS_BLOCK_GROUP_DUP;
+ else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
+ allowed = BTRFS_BLOCK_GROUP_RAID0;
+
+@@ -123,7 +130,16 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
+ {
+ if (refcount_dec_and_test(&cache->refs)) {
+ WARN_ON(cache->pinned > 0);
+- WARN_ON(cache->reserved > 0);
++ /*
++ * If there was a failure to cleanup a log tree, very likely due
++ * to an IO failure on a writeback attempt of one or more of its
++ * extent buffers, we could not do proper (and cheap) unaccounting
++ * of their reserved space, so don't warn on reserved > 0 in that
++ * case.
++ */
++ if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
++ !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
++ WARN_ON(cache->reserved > 0);
+
+ /*
+ * A block_group shouldn't be on the discard_list anymore.
+@@ -409,39 +425,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
+ btrfs_put_caching_control(caching_ctl);
+ }
+
+-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
++static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache,
++ struct btrfs_caching_control *caching_ctl)
++{
++ wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
++ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0;
++}
++
++static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
+ {
+ struct btrfs_caching_control *caching_ctl;
+- int ret = 0;
++ int ret;
+
+ caching_ctl = btrfs_get_caching_control(cache);
+ if (!caching_ctl)
+ return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
+-
+- wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
+- if (cache->cached == BTRFS_CACHE_ERROR)
+- ret = -EIO;
++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
+ btrfs_put_caching_control(caching_ctl);
+ return ret;
+ }
+
+-static bool space_cache_v1_done(struct btrfs_block_group *cache)
+-{
+- bool ret;
+-
+- spin_lock(&cache->lock);
+- ret = cache->cached != BTRFS_CACHE_FAST;
+- spin_unlock(&cache->lock);
+-
+- return ret;
+-}
+-
+-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
+- struct btrfs_caching_control *caching_ctl)
+-{
+- wait_event(caching_ctl->wait, space_cache_v1_done(cache));
+-}
+-
+ #ifdef CONFIG_BTRFS_DEBUG
+ static void fragment_free_space(struct btrfs_block_group *block_group)
+ {
+@@ -718,9 +721,8 @@ done:
+ btrfs_put_block_group(block_group);
+ }
+
+-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
+ {
+- DEFINE_WAIT(wait);
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_caching_control *caching_ctl = NULL;
+ int ret = 0;
+@@ -753,10 +755,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
+ }
+ WARN_ON(cache->caching_ctl);
+ cache->caching_ctl = caching_ctl;
+- if (btrfs_test_opt(fs_info, SPACE_CACHE))
+- cache->cached = BTRFS_CACHE_FAST;
+- else
+- cache->cached = BTRFS_CACHE_STARTED;
++ cache->cached = BTRFS_CACHE_STARTED;
+ cache->has_caching_ctl = 1;
+ spin_unlock(&cache->lock);
+
+@@ -769,8 +768,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
+
+ btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
+ out:
+- if (load_cache_only && caching_ctl)
+- btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
++ if (wait && caching_ctl)
++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
+ if (caching_ctl)
+ btrfs_put_caching_control(caching_ctl);
+
+@@ -902,6 +901,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
+ spin_unlock(&cluster->refill_lock);
+
+ btrfs_clear_treelog_bg(block_group);
++ btrfs_clear_data_reloc_bg(block_group);
+
+ path = btrfs_alloc_path();
+ if (!path) {
+@@ -1475,11 +1475,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
+ {
+ struct btrfs_fs_info *fs_info = bg->fs_info;
+
++ trace_btrfs_add_unused_block_group(bg);
+ spin_lock(&fs_info->unused_bgs_lock);
+ if (list_empty(&bg->bg_list)) {
+ btrfs_get_block_group(bg);
+- trace_btrfs_add_unused_block_group(bg);
+ list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
++ } else {
++ /* Pull out the block group from the reclaim_bgs list. */
++ list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+ }
+@@ -1490,13 +1493,16 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
+ container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
+ struct btrfs_block_group *bg;
+ struct btrfs_space_info *space_info;
+- LIST_HEAD(again_list);
+
+ if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
+ return;
+
+- if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
++ sb_start_write(fs_info->sb);
++
++ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
++ sb_end_write(fs_info->sb);
+ return;
++ }
+
+ /*
+ * Long running balances can keep us blocked here for eternity, so
+@@ -1504,6 +1510,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
+ */
+ if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
+ btrfs_exclop_finish(fs_info);
++ sb_end_write(fs_info->sb);
+ return;
+ }
+
+@@ -1537,8 +1544,15 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
+ }
+ spin_unlock(&bg->lock);
+
+- /* Get out fast, in case we're unmounting the filesystem */
+- if (btrfs_fs_closing(fs_info)) {
++ /*
++ * Get out fast, in case we're read-only or unmounting the
++ * filesystem. It is OK to drop block groups from the list even
++ * for the read-only case. As we did sb_start_write(),
++ * "mount -o remount,ro" won't happen and read-only filesystem
++ * means it is forced read-only due to a fatal error. So, it
++ * never gets back to read-write to let us reclaim again.
++ */
++ if (btrfs_need_cleaner_sleep(fs_info)) {
+ up_write(&space_info->groups_sem);
+ goto next;
+ }
+@@ -1557,25 +1571,41 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
+
+ btrfs_info(fs_info,
+ "reclaiming chunk %llu with %llu%% used %llu%% unusable",
+- bg->start, div_u64(bg->used * 100, bg->length),
++ bg->start,
++ div64_u64(bg->used * 100, bg->length),
+ div64_u64(zone_unusable * 100, bg->length));
+ trace_btrfs_reclaim_block_group(bg);
+ ret = btrfs_relocate_chunk(fs_info, bg->start);
+- if (ret && ret != -EAGAIN)
++ if (ret) {
++ btrfs_dec_block_group_ro(bg);
+ btrfs_err(fs_info, "error relocating chunk %llu",
+ bg->start);
++ }
+
+ next:
++ if (ret)
++ btrfs_mark_bg_to_reclaim(bg);
++ btrfs_put_block_group(bg);
++
++ mutex_unlock(&fs_info->reclaim_bgs_lock);
++ /*
++ * Reclaiming all the block groups in the list can take really
++ * long. Prioritize cleaning up unused block groups.
++ */
++ btrfs_delete_unused_bgs(fs_info);
++ /*
++ * If we are interrupted by a balance, we can just bail out. The
++ * cleaner thread restart again if necessary.
++ */
++ if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
++ goto end;
+ spin_lock(&fs_info->unused_bgs_lock);
+- if (ret == -EAGAIN && list_empty(&bg->bg_list))
+- list_add_tail(&bg->bg_list, &again_list);
+- else
+- btrfs_put_block_group(bg);
+ }
+- list_splice_tail(&again_list, &fs_info->reclaim_bgs);
+ spin_unlock(&fs_info->unused_bgs_lock);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
++end:
+ btrfs_exclop_finish(fs_info);
++ sb_end_write(fs_info->sb);
+ }
+
+ void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
+@@ -1828,6 +1858,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
+
+ /* Shouldn't have super stripes in sequential zones */
+ if (zoned && nr) {
++ kfree(logical);
+ btrfs_err(fs_info,
+ "zoned: block group %llu must not contain super block",
+ cache->start);
+@@ -2143,7 +2174,16 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
+ int need_clear = 0;
+ u64 cache_gen;
+
+- if (!info->extent_root)
++ /*
++ * Either no extent root (with ibadroots rescue option) or we have
++ * unsupported RO options. The fs can never be mounted read-write, so no
++ * need to waste time searching block group items.
++ *
++ * This also allows new extent tree related changes to be RO compat,
++ * no need for a full incompat flag.
++ */
++ if (!info->extent_root || (btrfs_super_compat_ro_flags(info->super_copy) &
++ ~BTRFS_FEATURE_COMPAT_RO_SUPP))
+ return fill_dummy_bgs(info);
+
+ key.objectid = 0;
+@@ -2510,6 +2550,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
+ int ret;
+ bool dirty_bg_running;
+
++ /*
++ * This can only happen when we are doing read-only scrub on read-only
++ * mount.
++ * In that case we should not start a new transaction on read-only fs.
++ * Thus here we skip all chunk allocations.
++ */
++ if (sb_rdonly(fs_info->sb)) {
++ mutex_lock(&fs_info->ro_block_group_mutex);
++ ret = inc_block_group_ro(cache, 0);
++ mutex_unlock(&fs_info->ro_block_group_mutex);
++ return ret;
++ }
++
+ do {
+ trans = btrfs_join_transaction(fs_info->extent_root);
+ if (IS_ERR(trans))
+@@ -2557,10 +2610,20 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
+ }
+
+ ret = inc_block_group_ro(cache, 0);
+- if (!do_chunk_alloc || ret == -ETXTBSY)
+- goto unlock_out;
+ if (!ret)
+ goto out;
++ if (ret == -ETXTBSY)
++ goto unlock_out;
++
++ /*
++ * Skip chunk alloction if the bg is SYSTEM, this is to avoid system
++ * chunk allocation storm to exhaust the system chunk array. Otherwise
++ * we still want to try our best to mark the block group read-only.
++ */
++ if (!do_chunk_alloc && ret == -ENOSPC &&
++ (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
++ goto unlock_out;
++
+ alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
+ ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ if (ret < 0)
+@@ -2858,7 +2921,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
+ struct btrfs_path *path = NULL;
+ LIST_HEAD(dirty);
+ struct list_head *io = &cur_trans->io_bgs;
+- int num_started = 0;
+ int loops = 0;
+
+ spin_lock(&cur_trans->dirty_bgs_lock);
+@@ -2924,7 +2986,6 @@ again:
+ cache->io_ctl.inode = NULL;
+ ret = btrfs_write_out_cache(trans, cache, path);
+ if (ret == 0 && cache->io_ctl.inode) {
+- num_started++;
+ should_put = 0;
+
+ /*
+@@ -3025,7 +3086,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
+ int should_put;
+ struct btrfs_path *path;
+ struct list_head *io = &cur_trans->io_bgs;
+- int num_started = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+@@ -3083,7 +3143,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
+ cache->io_ctl.inode = NULL;
+ ret = btrfs_write_out_cache(trans, cache, path);
+ if (ret == 0 && cache->io_ctl.inode) {
+- num_started++;
+ should_put = 0;
+ list_add_tail(&cache->io_list, io);
+ } else {
+@@ -3178,7 +3237,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
+ * space back to the block group, otherwise we will leak space.
+ */
+ if (!alloc && !btrfs_block_group_done(cache))
+- btrfs_cache_block_group(cache, 1);
++ btrfs_cache_block_group(cache, true);
+
+ byte_in_group = bytenr - cache->start;
+ WARN_ON(byte_in_group > cache->length);
+@@ -3380,31 +3439,12 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
+ */
+ check_system_chunk(trans, flags);
+
+- bg = btrfs_alloc_chunk(trans, flags);
++ bg = btrfs_create_chunk(trans, flags);
+ if (IS_ERR(bg)) {
+ ret = PTR_ERR(bg);
+ goto out;
+ }
+
+- /*
+- * If this is a system chunk allocation then stop right here and do not
+- * add the chunk item to the chunk btree. This is to prevent a deadlock
+- * because this system chunk allocation can be triggered while COWing
+- * some extent buffer of the chunk btree and while holding a lock on a
+- * parent extent buffer, in which case attempting to insert the chunk
+- * item (or update the device item) would result in a deadlock on that
+- * parent extent buffer. In this case defer the chunk btree updates to
+- * the second phase of chunk allocation and keep our reservation until
+- * the second phase completes.
+- *
+- * This is a rare case and can only be triggered by the very few cases
+- * we have where we need to touch the chunk btree outside chunk allocation
+- * and chunk removal. These cases are basically adding a device, removing
+- * a device or resizing a device.
+- */
+- if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+- return 0;
+-
+ ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
+ /*
+ * Normally we are not expected to fail with -ENOSPC here, since we have
+@@ -3441,7 +3481,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
+ const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
+ struct btrfs_block_group *sys_bg;
+
+- sys_bg = btrfs_alloc_chunk(trans, sys_flags);
++ sys_bg = btrfs_create_chunk(trans, sys_flags);
+ if (IS_ERR(sys_bg)) {
+ ret = PTR_ERR(sys_bg);
+ btrfs_abort_transaction(trans, ret);
+@@ -3537,14 +3577,14 @@ out:
+ * This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of
+ * the system chunk array due to concurrent allocations") provides more details.
+ *
+- * For allocation of system chunks, we defer the updates and insertions into the
+- * chunk btree to phase 2. This is to prevent deadlocks on extent buffers because
+- * if the chunk allocation is triggered while COWing an extent buffer of the
+- * chunk btree, we are holding a lock on the parent of that extent buffer and
+- * doing the chunk btree updates and insertions can require locking that parent.
+- * This is for the very few and rare cases where we update the chunk btree that
+- * are not chunk allocation or chunk removal: adding a device, removing a device
+- * or resizing a device.
++ * Allocation of system chunks does not happen through this function. A task that
++ * needs to update the chunk btree (the only btree that uses system chunks), must
++ * preallocate chunk space by calling either check_system_chunk() or
++ * btrfs_reserve_chunk_metadata() - the former is used when allocating a data or
++ * metadata chunk or when removing a chunk, while the later is used before doing
++ * a modification to the chunk btree - use cases for the later are adding,
++ * removing and resizing a device as well as relocation of a system chunk.
++ * See the comment below for more details.
+ *
+ * The reservation of system space, done through check_system_chunk(), as well
+ * as all the updates and insertions into the chunk btree must be done while
+@@ -3581,11 +3621,27 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ if (trans->allocating_chunk)
+ return -ENOSPC;
+ /*
+- * If we are removing a chunk, don't re-enter or we would deadlock.
+- * System space reservation and system chunk allocation is done by the
+- * chunk remove operation (btrfs_remove_chunk()).
++ * Allocation of system chunks can not happen through this path, as we
++ * could end up in a deadlock if we are allocating a data or metadata
++ * chunk and there is another task modifying the chunk btree.
++ *
++ * This is because while we are holding the chunk mutex, we will attempt
++ * to add the new chunk item to the chunk btree or update an existing
++ * device item in the chunk btree, while the other task that is modifying
++ * the chunk btree is attempting to COW an extent buffer while holding a
++ * lock on it and on its parent - if the COW operation triggers a system
++ * chunk allocation, then we can deadlock because we are holding the
++ * chunk mutex and we may need to access that extent buffer or its parent
++ * in order to add the chunk item or update a device item.
++ *
++ * Tasks that want to modify the chunk tree should reserve system space
++ * before updating the chunk btree, by calling either
++ * btrfs_reserve_chunk_metadata() or check_system_chunk().
++ * It's possible that after a task reserves the space, it still ends up
++ * here - this happens in the cases described above at do_chunk_alloc().
++ * The task will have to either retry or fail.
+ */
+- if (trans->removing_chunk)
++ if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ return -ENOSPC;
+
+ space_info = btrfs_find_space_info(fs_info, flags);
+@@ -3615,6 +3671,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ * attempt.
+ */
+ wait_for_alloc = true;
++ force = CHUNK_ALLOC_NO_FORCE;
+ spin_unlock(&space_info->lock);
+ mutex_lock(&fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->chunk_mutex);
+@@ -3684,17 +3741,14 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
+ return num_dev;
+ }
+
+-/*
+- * Reserve space in the system space for allocating or removing a chunk
+- */
+-void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
++static void reserve_chunk_space(struct btrfs_trans_handle *trans,
++ u64 bytes,
++ u64 type)
+ {
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_space_info *info;
+ u64 left;
+- u64 thresh;
+ int ret = 0;
+- u64 num_devs;
+
+ /*
+ * Needed because we can end up allocating a system chunk and for an
+@@ -3707,19 +3761,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
+ left = info->total_bytes - btrfs_space_info_used(info, true);
+ spin_unlock(&info->lock);
+
+- num_devs = get_profile_num_devs(fs_info, type);
+-
+- /* num_devs device items to update and 1 chunk item to add or remove */
+- thresh = btrfs_calc_metadata_size(fs_info, num_devs) +
+- btrfs_calc_insert_metadata_size(fs_info, 1);
+-
+- if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
++ if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
+ btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
+- left, thresh, type);
++ left, bytes, type);
+ btrfs_dump_space_info(fs_info, info, 0, 0);
+ }
+
+- if (left < thresh) {
++ if (left < bytes) {
+ u64 flags = btrfs_system_alloc_profile(fs_info);
+ struct btrfs_block_group *bg;
+
+@@ -3728,21 +3776,20 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
+ * needing it, as we might not need to COW all nodes/leafs from
+ * the paths we visit in the chunk tree (they were already COWed
+ * or created in the current transaction for example).
+- *
+- * Also, if our caller is allocating a system chunk, do not
+- * attempt to insert the chunk item in the chunk btree, as we
+- * could deadlock on an extent buffer since our caller may be
+- * COWing an extent buffer from the chunk btree.
+ */
+- bg = btrfs_alloc_chunk(trans, flags);
++ bg = btrfs_create_chunk(trans, flags);
+ if (IS_ERR(bg)) {
+ ret = PTR_ERR(bg);
+- } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
++ } else {
+ /*
+ * If we fail to add the chunk item here, we end up
+ * trying again at phase 2 of chunk allocation, at
+ * btrfs_create_pending_block_groups(). So ignore
+- * any error here.
++ * any error here. An ENOSPC here could happen, due to
++ * the cases described at do_chunk_alloc() - the system
++ * block group we just created was just turned into RO
++ * mode by a scrub for example, or a running discard
++ * temporarily removed its free space entries, etc.
+ */
+ btrfs_chunk_alloc_add_chunk_item(trans, bg);
+ }
+@@ -3751,12 +3798,61 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
+ if (!ret) {
+ ret = btrfs_block_rsv_add(fs_info->chunk_root,
+ &fs_info->chunk_block_rsv,
+- thresh, BTRFS_RESERVE_NO_FLUSH);
++ bytes, BTRFS_RESERVE_NO_FLUSH);
+ if (!ret)
+- trans->chunk_bytes_reserved += thresh;
++ trans->chunk_bytes_reserved += bytes;
+ }
+ }
+
++/*
++ * Reserve space in the system space for allocating or removing a chunk.
++ * The caller must be holding fs_info->chunk_mutex.
++ */
++void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
++{
++ struct btrfs_fs_info *fs_info = trans->fs_info;
++ const u64 num_devs = get_profile_num_devs(fs_info, type);
++ u64 bytes;
++
++ /* num_devs device items to update and 1 chunk item to add or remove. */
++ bytes = btrfs_calc_metadata_size(fs_info, num_devs) +
++ btrfs_calc_insert_metadata_size(fs_info, 1);
++
++ reserve_chunk_space(trans, bytes, type);
++}
++
++/*
++ * Reserve space in the system space, if needed, for doing a modification to the
++ * chunk btree.
++ *
++ * @trans: A transaction handle.
++ * @is_item_insertion: Indicate if the modification is for inserting a new item
++ * in the chunk btree or if it's for the deletion or update
++ * of an existing item.
++ *
++ * This is used in a context where we need to update the chunk btree outside
++ * block group allocation and removal, to avoid a deadlock with a concurrent
++ * task that is allocating a metadata or data block group and therefore needs to
++ * update the chunk btree while holding the chunk mutex. After the update to the
++ * chunk btree is done, btrfs_trans_release_chunk_metadata() should be called.
++ *
++ */
++void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
++ bool is_item_insertion)
++{
++ struct btrfs_fs_info *fs_info = trans->fs_info;
++ u64 bytes;
++
++ if (is_item_insertion)
++ bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
++ else
++ bytes = btrfs_calc_metadata_size(fs_info, 1);
++
++ mutex_lock(&fs_info->chunk_mutex);
++ reserve_chunk_space(trans, bytes, BTRFS_BLOCK_GROUP_SYSTEM);
++ mutex_unlock(&fs_info->chunk_mutex);
++}
++
+ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+ {
+ struct btrfs_block_group *block_group;
+@@ -3879,9 +3975,22 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
+ * important and indicates a real bug if this happens.
+ */
+ if (WARN_ON(space_info->bytes_pinned > 0 ||
+- space_info->bytes_reserved > 0 ||
+ space_info->bytes_may_use > 0))
+ btrfs_dump_space_info(info, space_info, 0, 0);
++
++ /*
++ * If there was a failure to cleanup a log tree, very likely due
++ * to an IO failure on a writeback attempt of one or more of its
++ * extent buffers, we could not do proper (and cheap) unaccounting
++ * of their reserved space, so don't warn on bytes_reserved > 0 in
++ * that case.
++ */
++ if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
++ !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
++ if (WARN_ON(space_info->bytes_reserved > 0))
++ btrfs_dump_space_info(info, space_info, 0, 0);
++ }
++
+ WARN_ON(space_info->reclaim_size > 0);
+ list_del(&space_info->list);
+ btrfs_sysfs_remove_space_info(space_info);
+diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
+index c72a71efcb187..a15868d607a92 100644
+--- a/fs/btrfs/block-group.h
++++ b/fs/btrfs/block-group.h
+@@ -98,6 +98,7 @@ struct btrfs_block_group {
+ unsigned int to_copy:1;
+ unsigned int relocating_repair:1;
+ unsigned int chunk_item_inserted:1;
++ unsigned int zoned_data_reloc_ongoing:1;
+
+ int disk_cache_state;
+
+@@ -250,9 +251,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
+ void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
+ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
+ u64 num_bytes);
+-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
+-int btrfs_cache_block_group(struct btrfs_block_group *cache,
+- int load_cache_only);
++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
+ void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
+ struct btrfs_caching_control *btrfs_get_caching_control(
+ struct btrfs_block_group *cache);
+@@ -289,6 +288,8 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ enum btrfs_chunk_alloc_enum force);
+ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
+ void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
++void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
++ bool is_item_insertion);
+ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
+ void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
+ int btrfs_free_block_groups(struct btrfs_fs_info *info);
+diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
+index 04a6226e03888..aff09ffddb32a 100644
+--- a/fs/btrfs/block-rsv.c
++++ b/fs/btrfs/block-rsv.c
+@@ -121,7 +121,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
+ } else {
+ num_bytes = 0;
+ }
+- if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
++ if (qgroup_to_release_ret &&
++ block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ qgroup_to_release = block_rsv->qgroup_rsv_reserved -
+ block_rsv->qgroup_rsv_size;
+ block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
+diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
+index 76ee1452c57ba..37ceea85b871c 100644
+--- a/fs/btrfs/btrfs_inode.h
++++ b/fs/btrfs/btrfs_inode.h
+@@ -13,6 +13,13 @@
+ #include "ordered-data.h"
+ #include "delayed-inode.h"
+
++/*
++ * Since we search a directory based on f_pos (struct dir_context::pos) we have
++ * to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
++ * everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
++ */
++#define BTRFS_DIR_START_INDEX 2
++
+ /*
+ * ordered_data_close is set by truncate when a file that used
+ * to have good data has been truncated to zero. When it is set
+@@ -164,8 +171,9 @@ struct btrfs_inode {
+ u64 disk_i_size;
+
+ /*
+- * if this is a directory then index_cnt is the counter for the index
+- * number for new files that are created
++ * If this is a directory then index_cnt is the counter for the index
++ * number for new files that are created. For an empty directory, this
++ * must be initialized to BTRFS_DIR_START_INDEX.
+ */
+ u64 index_cnt;
+
+diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
+index 86816088927f1..81b11124b67a8 100644
+--- a/fs/btrfs/check-integrity.c
++++ b/fs/btrfs/check-integrity.c
+@@ -1455,7 +1455,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
+ struct btrfs_fs_info *fs_info = state->fs_info;
+ int ret;
+ u64 length;
+- struct btrfs_bio *multi = NULL;
++ struct btrfs_io_context *multi = NULL;
+ struct btrfs_device *device;
+
+ length = len;
+diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
+index 0913ee50e6c34..701fbd1b56766 100644
+--- a/fs/btrfs/compression.c
++++ b/fs/btrfs/compression.c
+@@ -550,7 +550,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
+ u64 isize = i_size_read(inode);
+ int ret;
+ struct page *page;
+- unsigned long nr_pages = 0;
+ struct extent_map *em;
+ struct address_space *mapping = inode->i_mapping;
+ struct extent_map_tree *em_tree;
+@@ -646,7 +645,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
+ PAGE_SIZE, 0);
+
+ if (ret == PAGE_SIZE) {
+- nr_pages++;
+ put_page(page);
+ } else {
+ unlock_extent(tree, last_offset, end);
+diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
+index 84627cbd5b5b5..a648dff2becec 100644
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -457,13 +457,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
+ btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+ parent_start = buf->start;
+
+- atomic_inc(&cow->refs);
+ ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
+- BUG_ON(ret < 0);
++ if (ret < 0) {
++ btrfs_tree_unlock(cow);
++ free_extent_buffer(cow);
++ btrfs_abort_transaction(trans, ret);
++ return ret;
++ }
++ atomic_inc(&cow->refs);
+ rcu_assign_pointer(root->node, cow);
+
+- btrfs_free_tree_block(trans, root, buf, parent_start,
+- last_ref);
++ btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
++ parent_start, last_ref);
+ free_extent_buffer(buf);
+ add_root_to_dirty_list(root);
+ } else {
+@@ -484,8 +489,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
+ return ret;
+ }
+ }
+- btrfs_free_tree_block(trans, root, buf, parent_start,
+- last_ref);
++ btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
++ parent_start, last_ref);
+ }
+ if (unlock_orig)
+ btrfs_tree_unlock(buf);
+@@ -912,7 +917,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ }
+
+ ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
+- BUG_ON(ret < 0);
++ if (ret < 0) {
++ btrfs_tree_unlock(child);
++ free_extent_buffer(child);
++ btrfs_abort_transaction(trans, ret);
++ goto enospc;
++ }
+ rcu_assign_pointer(root->node, child);
+
+ add_root_to_dirty_list(root);
+@@ -926,7 +936,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ free_extent_buffer(mid);
+
+ root_sub_used(root, mid->len);
+- btrfs_free_tree_block(trans, root, mid, 0, 1);
++ btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
+ /* once for the root ptr */
+ free_extent_buffer_stale(mid);
+ return 0;
+@@ -985,7 +995,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ btrfs_tree_unlock(right);
+ del_ptr(root, path, level + 1, pslot + 1);
+ root_sub_used(root, right->len);
+- btrfs_free_tree_block(trans, root, right, 0, 1);
++ btrfs_free_tree_block(trans, btrfs_root_id(root), right,
++ 0, 1);
+ free_extent_buffer_stale(right);
+ right = NULL;
+ } else {
+@@ -993,7 +1004,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ btrfs_node_key(right, &right_key, 0);
+ ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
+ BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
+- BUG_ON(ret < 0);
++ if (ret < 0) {
++ btrfs_abort_transaction(trans, ret);
++ goto enospc;
++ }
+ btrfs_set_node_key(parent, &right_key, pslot + 1);
+ btrfs_mark_buffer_dirty(parent);
+ }
+@@ -1030,7 +1044,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ btrfs_tree_unlock(mid);
+ del_ptr(root, path, level + 1, pslot);
+ root_sub_used(root, mid->len);
+- btrfs_free_tree_block(trans, root, mid, 0, 1);
++ btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
+ free_extent_buffer_stale(mid);
+ mid = NULL;
+ } else {
+@@ -1039,7 +1053,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ btrfs_node_key(mid, &mid_key, 0);
+ ret = btrfs_tree_mod_log_insert_key(parent, pslot,
+ BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
+- BUG_ON(ret < 0);
++ if (ret < 0) {
++ btrfs_abort_transaction(trans, ret);
++ goto enospc;
++ }
+ btrfs_set_node_key(parent, &mid_key, pslot);
+ btrfs_mark_buffer_dirty(parent);
+ }
+@@ -1566,35 +1583,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
+ struct btrfs_path *p,
+ int write_lock_level)
+ {
+- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *b;
+- int root_lock;
++ int root_lock = 0;
+ int level = 0;
+
+- /* We try very hard to do read locks on the root */
+- root_lock = BTRFS_READ_LOCK;
+-
+ if (p->search_commit_root) {
+- /*
+- * The commit roots are read only so we always do read locks,
+- * and we always must hold the commit_root_sem when doing
+- * searches on them, the only exception is send where we don't
+- * want to block transaction commits for a long time, so
+- * we need to clone the commit root in order to avoid races
+- * with transaction commits that create a snapshot of one of
+- * the roots used by a send operation.
+- */
+- if (p->need_commit_sem) {
+- down_read(&fs_info->commit_root_sem);
+- b = btrfs_clone_extent_buffer(root->commit_root);
+- up_read(&fs_info->commit_root_sem);
+- if (!b)
+- return ERR_PTR(-ENOMEM);
+-
+- } else {
+- b = root->commit_root;
+- atomic_inc(&b->refs);
+- }
++ b = root->commit_root;
++ atomic_inc(&b->refs);
+ level = btrfs_header_level(b);
+ /*
+ * Ensure that all callers have set skip_locking when
+@@ -1611,6 +1606,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
+ goto out;
+ }
+
++ /* We try very hard to do read locks on the root */
++ root_lock = BTRFS_READ_LOCK;
++
+ /*
+ * If the level is set to maximum, we can skip trying to get the read
+ * lock.
+@@ -1637,6 +1635,17 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
+ level = btrfs_header_level(b);
+
+ out:
++ /*
++ * The root may have failed to write out at some point, and thus is no
++ * longer valid, return an error in this case.
++ */
++ if (!extent_buffer_uptodate(b)) {
++ if (root_lock)
++ btrfs_tree_unlock_rw(b, root_lock);
++ free_extent_buffer(b);
++ return ERR_PTR(-EIO);
++ }
++
+ p->nodes[level] = b;
+ if (!p->skip_locking)
+ p->locks[level] = root_lock;
+@@ -1646,6 +1655,42 @@ out:
+ return b;
+ }
+
++/*
++ * Replace the extent buffer at the lowest level of the path with a cloned
++ * version. The purpose is to be able to use it safely, after releasing the
++ * commit root semaphore, even if relocation is happening in parallel, the
++ * transaction used for relocation is committed and the extent buffer is
++ * reallocated in the next transaction.
++ *
++ * This is used in a context where the caller does not prevent transaction
++ * commits from happening, either by holding a transaction handle or holding
++ * some lock, while it's doing searches through a commit root.
++ * At the moment it's only used for send operations.
++ */
++static int finish_need_commit_sem_search(struct btrfs_path *path)
++{
++ const int i = path->lowest_level;
++ const int slot = path->slots[i];
++ struct extent_buffer *lowest = path->nodes[i];
++ struct extent_buffer *clone;
++
++ ASSERT(path->need_commit_sem);
++
++ if (!lowest)
++ return 0;
++
++ lockdep_assert_held_read(&lowest->fs_info->commit_root_sem);
++
++ clone = btrfs_clone_extent_buffer(lowest);
++ if (!clone)
++ return -ENOMEM;
++
++ btrfs_release_path(path);
++ path->nodes[i] = clone;
++ path->slots[i] = slot;
++
++ return 0;
++}
+
+ /*
+ * btrfs_search_slot - look for a key in a tree and perform necessary
+@@ -1682,6 +1727,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key, struct btrfs_path *p,
+ int ins_len, int cow)
+ {
++ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *b;
+ int slot;
+ int ret;
+@@ -1723,6 +1769,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+
+ min_write_lock_level = write_lock_level;
+
++ if (p->need_commit_sem) {
++ ASSERT(p->search_commit_root);
++ down_read(&fs_info->commit_root_sem);
++ }
++
+ again:
+ prev_cmp = -1;
+ b = btrfs_search_slot_get_root(root, p, write_lock_level);
+@@ -1903,6 +1954,9 @@ cow_done:
+
+ if (!p->skip_locking) {
+ level = btrfs_header_level(b);
++
++ btrfs_maybe_reset_lockdep_class(root, b);
++
+ if (level <= write_lock_level) {
+ btrfs_tree_lock(b);
+ p->locks[level] = BTRFS_WRITE_LOCK;
+@@ -1917,6 +1971,16 @@ cow_done:
+ done:
+ if (ret < 0 && !p->skip_release_on_error)
+ btrfs_release_path(p);
++
++ if (p->need_commit_sem) {
++ int ret2;
++
++ ret2 = finish_need_commit_sem_search(p);
++ up_read(&fs_info->commit_root_sem);
++ if (ret2)
++ ret = ret2;
++ }
++
+ return ret;
+ }
+ ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
+@@ -2578,6 +2642,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
+
+ ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
+ if (ret) {
++ btrfs_tree_unlock(split);
++ free_extent_buffer(split);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+@@ -2859,6 +2925,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
+
+ if (check_sibling_keys(left, right)) {
+ ret = -EUCLEAN;
++ btrfs_abort_transaction(trans, ret);
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return ret;
+@@ -3102,6 +3169,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
+
+ if (check_sibling_keys(left, right)) {
+ ret = -EUCLEAN;
++ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ return __push_leaf_left(path, min_data_size,
+@@ -4015,7 +4083,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
+ root_sub_used(root, leaf->len);
+
+ atomic_inc(&leaf->refs);
+- btrfs_free_tree_block(trans, root, leaf, 0, 1);
++ btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
+ free_extent_buffer_stale(leaf);
+ }
+ /*
+@@ -4141,10 +4209,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+ {
+ struct btrfs_key key;
++ struct btrfs_key orig_key;
+ struct btrfs_disk_key found_key;
+ int ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
++ orig_key = key;
+
+ if (key.offset > 0) {
+ key.offset--;
+@@ -4161,8 +4231,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+- if (ret < 0)
++ if (ret <= 0)
+ return ret;
++
++ /*
++ * Previous key not found. Even if we were at slot 0 of the leaf we had
++ * before releasing the path and calling btrfs_search_slot(), we now may
++ * be in a slot pointing to the same original key - this can happen if
++ * after we released the path, one of more items were moved from a
++ * sibling leaf into the front of the leaf we had due to an insertion
++ * (see push_leaf_right()).
++ * If we hit this case and our slot is > 0 and just decrement the slot
++ * so that the caller does not process the same key again, which may or
++ * may not break the caller, depending on its logic.
++ */
++ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
++ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
++ ret = comp_keys(&found_key, &orig_key);
++ if (ret == 0) {
++ if (path->slots[0] > 0) {
++ path->slots[0]--;
++ return 0;
++ }
++ /*
++ * At slot 0, same key as before, it means orig_key is
++ * the lowest, leftmost, key in the tree. We're done.
++ */
++ return 1;
++ }
++ }
++
+ btrfs_item_key(path->nodes[0], &found_key, 0);
+ ret = comp_keys(&found_key, &key);
+ /*
+@@ -4385,7 +4483,9 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
+ int level;
+ struct extent_buffer *c;
+ struct extent_buffer *next;
++ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_key key;
++ bool need_commit_sem = false;
+ u32 nritems;
+ int ret;
+ int i;
+@@ -4402,14 +4502,20 @@ again:
+
+ path->keep_locks = 1;
+
+- if (time_seq)
++ if (time_seq) {
+ ret = btrfs_search_old_slot(root, &key, path, time_seq);
+- else
++ } else {
++ if (path->need_commit_sem) {
++ path->need_commit_sem = 0;
++ need_commit_sem = true;
++ down_read(&fs_info->commit_root_sem);
++ }
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
++ }
+ path->keep_locks = 0;
+
+ if (ret < 0)
+- return ret;
++ goto done;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+@@ -4532,6 +4638,15 @@ again:
+ ret = 0;
+ done:
+ unlock_up(path, 0, 1, 0, NULL);
++ if (need_commit_sem) {
++ int ret2;
++
++ path->need_commit_sem = 1;
++ ret2 = finish_need_commit_sem_search(path);
++ up_read(&fs_info->commit_root_sem);
++ if (ret2)
++ ret = ret2;
++ }
+
+ return ret;
+ }
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index c0cebcf745cef..02d3ee6c7d9b0 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -105,14 +105,6 @@ struct btrfs_ref;
+ #define BTRFS_STAT_CURR 0
+ #define BTRFS_STAT_PREV 1
+
+-/*
+- * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+- */
+-static inline u32 count_max_extents(u64 size)
+-{
+- return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+-}
+-
+ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+ {
+ BUG_ON(num_stripes == 0);
+@@ -142,6 +134,9 @@ enum {
+ BTRFS_FS_STATE_DEV_REPLACING,
+ /* The btrfs_fs_info created for self-tests */
+ BTRFS_FS_STATE_DUMMY_FS_INFO,
++
++ /* Indicates there was an error cleaning up a log tree. */
++ BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+ };
+
+ #define BTRFS_BACKREF_REV_MAX 256
+@@ -459,7 +454,6 @@ struct btrfs_free_cluster {
+ enum btrfs_caching_type {
+ BTRFS_CACHE_NO,
+ BTRFS_CACHE_STARTED,
+- BTRFS_CACHE_FAST,
+ BTRFS_CACHE_FINISHED,
+ BTRFS_CACHE_ERROR,
+ };
+@@ -568,7 +562,6 @@ enum {
+ /*
+ * Indicate that relocation of a chunk has started, it's set per chunk
+ * and is toggled between chunks.
+- * Set, tested and cleared while holding fs_info::send_reloc_lock.
+ */
+ BTRFS_FS_RELOC_RUNNING,
+
+@@ -593,6 +586,9 @@ enum {
+ /* Indicate whether there are any tree modification log users */
+ BTRFS_FS_TREE_MOD_LOG_USERS,
+
++ /* Indicate we have half completed snapshot deletions pending. */
++ BTRFS_FS_UNFINISHED_DROPS,
++
+ #if BITS_PER_LONG == 32
+ /* Indicate if we have error/warn message printed on 32bit systems */
+ BTRFS_FS_32BIT_ERROR,
+@@ -665,6 +661,12 @@ struct btrfs_fs_info {
+
+ u64 generation;
+ u64 last_trans_committed;
++ /*
++ * Generation of the last transaction used for block group relocation
++ * since the filesystem was last mounted (or 0 if none happened yet).
++ * Must be written and read while holding btrfs_fs_info::commit_root_sem.
++ */
++ u64 last_reloc_trans;
+ u64 avg_delayed_ref_runtime;
+
+ /*
+@@ -988,19 +990,18 @@ struct btrfs_fs_info {
+ u32 csums_per_leaf;
+ u32 stripesize;
+
++ /*
++ * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
++ * filesystem, on zoned it depends on the device constraints.
++ */
++ u64 max_extent_size;
++
+ /* Block groups and devices containing active swapfiles. */
+ spinlock_t swapfile_pins_lock;
+ struct rb_root swapfile_pins;
+
+ struct crypto_shash *csum_shash;
+
+- spinlock_t send_reloc_lock;
+- /*
+- * Number of send operations in progress.
+- * Updated while holding fs_info::send_reloc_lock.
+- */
+- int send_in_progress;
+-
+ /* Type of exclusive operation running, protected by super_lock */
+ enum btrfs_exclusive_operation exclusive_operation;
+
+@@ -1013,10 +1014,20 @@ struct btrfs_fs_info {
+ u64 zoned;
+ };
+
++ /* Max size to emit ZONE_APPEND write command */
++ u64 max_zone_append_size;
+ struct mutex zoned_meta_io_lock;
+ spinlock_t treelog_bg_lock;
+ u64 treelog_bg;
+
++ /*
++ * Start of the dedicated data relocation block group, protected by
++ * relocation_bg_lock.
++ */
++ spinlock_t relocation_bg_lock;
++ u64 data_reloc_bg;
++ struct mutex zoned_data_reloc_io_lock;
++
+ #ifdef CONFIG_BTRFS_FS_REF_VERIFY
+ spinlock_t ref_verify_lock;
+ struct rb_root block_tree;
+@@ -1091,8 +1102,17 @@ enum {
+ BTRFS_ROOT_HAS_LOG_TREE,
+ /* Qgroup flushing is in progress */
+ BTRFS_ROOT_QGROUP_FLUSHING,
++ /* This root has a drop operation that was started previously. */
++ BTRFS_ROOT_UNFINISHED_DROP,
++ /* This reloc root needs to have its buffers lockdep class reset. */
++ BTRFS_ROOT_RESET_LOCKDEP_CLASS,
+ };
+
++static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
++{
++ clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
++}
++
+ /*
+ * Record swapped tree blocks of a subvolume tree for delayed subtree trace
+ * code. For detail check comment in fs/btrfs/qgroup.c.
+@@ -2238,6 +2258,11 @@ static inline bool btrfs_root_dead(const struct btrfs_root *root)
+ return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
+ }
+
++static inline u64 btrfs_root_id(const struct btrfs_root *root)
++{
++ return root->root_key.objectid;
++}
++
+ /* struct btrfs_root_backup */
+ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
+ tree_root, 64);
+@@ -2700,7 +2725,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
+ u64 empty_size,
+ enum btrfs_lock_nesting nest);
+ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
++ u64 root_id,
+ struct extent_buffer *buf,
+ u64 parent, int last_ref);
+ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+@@ -3142,7 +3167,6 @@ void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
+ int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
+ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
+ const char *name, int name_len);
+ int btrfs_add_link(struct btrfs_trans_handle *trans,
+@@ -3563,6 +3587,10 @@ do { \
+ (errno), fmt, ##args); \
+ } while (0)
+
++#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
++ (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
++ &(fs_info)->fs_state)))
++
+ __printf(5, 6)
+ __cold
+ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
+@@ -3842,6 +3870,24 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
+ return fs_info->zoned != 0;
+ }
+
++/*
++ * Count how many fs_info->max_extent_size cover the @size
++ */
++static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
++{
++#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
++ if (!fs_info)
++ return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
++#endif
++
++ return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
++}
++
++static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
++{
++ return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
++}
++
+ /*
+ * We use page status Private2 to indicate there is an ordered extent with
+ * unfinished IO.
+diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
+index 2059d1504149a..b934429c24350 100644
+--- a/fs/btrfs/delalloc-space.c
++++ b/fs/btrfs/delalloc-space.c
+@@ -143,10 +143,13 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
+
+ /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
+ ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
+- if (ret < 0)
++ if (ret < 0) {
+ btrfs_free_reserved_data_space_noquota(fs_info, len);
+- else
++ extent_changeset_free(*reserved);
++ *reserved = NULL;
++ } else {
+ ret = 0;
++ }
+ return ret;
+ }
+
+@@ -270,7 +273,7 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+ u64 num_bytes, u64 *meta_reserve,
+ u64 *qgroup_reserve)
+ {
+- u64 nr_extents = count_max_extents(num_bytes);
++ u64 nr_extents = count_max_extents(fs_info, num_bytes);
+ u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+ u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+
+@@ -344,7 +347,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
+ * needs to free the reservation we just made.
+ */
+ spin_lock(&inode->lock);
+- nr_extents = count_max_extents(num_bytes);
++ nr_extents = count_max_extents(fs_info, num_bytes);
+ btrfs_mod_outstanding_extents(inode, nr_extents);
+ inode->csum_bytes += num_bytes;
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+@@ -407,7 +410,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
+ unsigned num_extents;
+
+ spin_lock(&inode->lock);
+- num_extents = count_max_extents(num_bytes);
++ num_extents = count_max_extents(fs_info, num_bytes);
+ btrfs_mod_outstanding_extents(inode, -num_extents);
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
+@@ -452,8 +455,11 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
+ if (ret < 0)
+ return ret;
+ ret = btrfs_delalloc_reserve_metadata(inode, len);
+- if (ret < 0)
++ if (ret < 0) {
+ btrfs_free_reserved_data_space(inode, *reserved, start, len);
++ extent_changeset_free(*reserved);
++ *reserved = NULL;
++ }
+ return ret;
+ }
+
+diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
+index e22fba272e4fd..31266ba1d4300 100644
+--- a/fs/btrfs/delayed-ref.h
++++ b/fs/btrfs/delayed-ref.h
+@@ -271,7 +271,7 @@ static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
+ }
+
+ static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
+- int level, u64 root)
++ int level, u64 root, u64 mod_root, bool skip_qgroup)
+ {
+ /* If @real_root not set, use @root as fallback */
+ if (!generic_ref->real_root)
+@@ -282,7 +282,8 @@ static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
+ }
+
+ static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
+- u64 ref_root, u64 ino, u64 offset)
++ u64 ref_root, u64 ino, u64 offset, u64 mod_root,
++ bool skip_qgroup)
+ {
+ /* If @real_root not set, use @root as fallback */
+ if (!generic_ref->real_root)
+diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
+index d029be40ea6f0..03d8a2d49bf41 100644
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -70,6 +70,7 @@ static int btrfs_dev_replace_kthread(void *data);
+
+ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
+ {
++ struct btrfs_dev_lookup_args args = { .devid = BTRFS_DEV_REPLACE_DEVID };
+ struct btrfs_key key;
+ struct btrfs_root *dev_root = fs_info->dev_root;
+ struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+@@ -100,8 +101,7 @@ no_valid_dev_replace_entry_found:
+ * We don't have a replace item or it's corrupted. If there is
+ * a replace target, fail the mount.
+ */
+- if (btrfs_find_device(fs_info->fs_devices,
+- BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
++ if (btrfs_find_device(fs_info->fs_devices, &args)) {
+ btrfs_err(fs_info,
+ "found replace target device without a valid replace item");
+ ret = -EUCLEAN;
+@@ -163,10 +163,9 @@ no_valid_dev_replace_entry_found:
+ * We don't have an active replace item but if there is a
+ * replace target, fail the mount.
+ */
+- if (btrfs_find_device(fs_info->fs_devices,
+- BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
++ if (btrfs_find_device(fs_info->fs_devices, &args)) {
+ btrfs_err(fs_info,
+- "replace devid present without an active replace item");
++"replace without active item, run 'device scan --forget' on the target device");
+ ret = -EUCLEAN;
+ } else {
+ dev_replace->srcdev = NULL;
+@@ -175,11 +174,10 @@ no_valid_dev_replace_entry_found:
+ break;
+ case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
+ case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
+- dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
+- src_devid, NULL, NULL);
+- dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
+- BTRFS_DEV_REPLACE_DEVID,
+- NULL, NULL);
++ dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, &args);
++ args.devid = src_devid;
++ dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, &args);
++
+ /*
+ * allow 'btrfs dev replace_cancel' if src/tgt device is
+ * missing
+@@ -325,7 +323,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
+ set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+ device->fs_devices = fs_info->fs_devices;
+
+- ret = btrfs_get_dev_zone_info(device);
++ ret = btrfs_get_dev_zone_info(device, false);
+ if (ret)
+ goto error;
+
+@@ -1153,8 +1151,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
+ up_write(&dev_replace->rwsem);
+
+ /* Scrub for replace must not be running in suspended state */
+- ret = btrfs_scrub_cancel(fs_info);
+- ASSERT(ret != -ENOTCONN);
++ btrfs_scrub_cancel(fs_info);
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
+index e1b7bd927d691..bd9dde374e5d8 100644
+--- a/fs/btrfs/discard.c
++++ b/fs/btrfs/discard.c
+@@ -77,6 +77,7 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ struct btrfs_block_group *block_group)
+ {
++ lockdep_assert_held(&discard_ctl->lock);
+ if (!btrfs_run_discard_work(discard_ctl))
+ return;
+
+@@ -88,6 +89,8 @@ static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ BTRFS_DISCARD_DELAY);
+ block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
+ }
++ if (list_empty(&block_group->discard_list))
++ btrfs_get_block_group(block_group);
+
+ list_move_tail(&block_group->discard_list,
+ get_discard_list(discard_ctl, block_group));
+@@ -107,8 +110,12 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
+ struct btrfs_block_group *block_group)
+ {
++ bool queued;
++
+ spin_lock(&discard_ctl->lock);
+
++ queued = !list_empty(&block_group->discard_list);
++
+ if (!btrfs_run_discard_work(discard_ctl)) {
+ spin_unlock(&discard_ctl->lock);
+ return;
+@@ -120,6 +127,8 @@ static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
+ block_group->discard_eligible_time = (ktime_get_ns() +
+ BTRFS_DISCARD_UNUSED_DELAY);
+ block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
++ if (!queued)
++ btrfs_get_block_group(block_group);
+ list_add_tail(&block_group->discard_list,
+ &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
+
+@@ -130,6 +139,7 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ struct btrfs_block_group *block_group)
+ {
+ bool running = false;
++ bool queued = false;
+
+ spin_lock(&discard_ctl->lock);
+
+@@ -139,7 +149,16 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ }
+
+ block_group->discard_eligible_time = 0;
++ queued = !list_empty(&block_group->discard_list);
+ list_del_init(&block_group->discard_list);
++ /*
++ * If the block group is currently running in the discard workfn, we
++ * don't want to deref it, since it's still being used by the workfn.
++ * The workfn will notice this case and deref the block group when it is
++ * finished.
++ */
++ if (queued && !running)
++ btrfs_put_block_group(block_group);
+
+ spin_unlock(&discard_ctl->lock);
+
+@@ -212,10 +231,12 @@ again:
+ if (block_group && now >= block_group->discard_eligible_time) {
+ if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
+ block_group->used != 0) {
+- if (btrfs_is_block_group_data_only(block_group))
++ if (btrfs_is_block_group_data_only(block_group)) {
+ __add_to_discard_list(discard_ctl, block_group);
+- else
++ } else {
+ list_del_init(&block_group->discard_list);
++ btrfs_put_block_group(block_group);
++ }
+ goto again;
+ }
+ if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
+@@ -502,6 +523,15 @@ static void btrfs_discard_workfn(struct work_struct *work)
+ spin_lock(&discard_ctl->lock);
+ discard_ctl->prev_discard = trimmed;
+ discard_ctl->prev_discard_time = now;
++ /*
++ * If the block group was removed from the discard list while it was
++ * running in this workfn, then we didn't deref it, since this function
++ * still owned that reference. But we set the discard_ctl->block_group
++ * back to NULL, so we can use that condition to know that now we need
++ * to deref the block_group.
++ */
++ if (discard_ctl->block_group == NULL)
++ btrfs_put_block_group(block_group);
+ discard_ctl->block_group = NULL;
+ __btrfs_discard_schedule_work(discard_ctl, now, false);
+ spin_unlock(&discard_ctl->lock);
+@@ -638,8 +668,12 @@ void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
+ list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
+ bg_list) {
+ list_del_init(&block_group->bg_list);
+- btrfs_put_block_group(block_group);
+ btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
++ /*
++ * This put is for the get done by btrfs_mark_bg_unused.
++ * Queueing discard incremented it for discard's reference.
++ */
++ btrfs_put_block_group(block_group);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+ }
+@@ -669,6 +703,7 @@ static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
+ if (block_group->used == 0)
+ btrfs_mark_bg_unused(block_group);
+ spin_lock(&discard_ctl->lock);
++ btrfs_put_block_group(block_group);
+ }
+ }
+ spin_unlock(&discard_ctl->lock);
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 355ea88d5c5f7..6e0fdfd98f234 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -121,88 +121,6 @@ struct async_submit_bio {
+ blk_status_t status;
+ };
+
+-/*
+- * Lockdep class keys for extent_buffer->lock's in this root. For a given
+- * eb, the lockdep key is determined by the btrfs_root it belongs to and
+- * the level the eb occupies in the tree.
+- *
+- * Different roots are used for different purposes and may nest inside each
+- * other and they require separate keysets. As lockdep keys should be
+- * static, assign keysets according to the purpose of the root as indicated
+- * by btrfs_root->root_key.objectid. This ensures that all special purpose
+- * roots have separate keysets.
+- *
+- * Lock-nesting across peer nodes is always done with the immediate parent
+- * node locked thus preventing deadlock. As lockdep doesn't know this, use
+- * subclass to avoid triggering lockdep warning in such cases.
+- *
+- * The key is set by the readpage_end_io_hook after the buffer has passed
+- * csum validation but before the pages are unlocked. It is also set by
+- * btrfs_init_new_buffer on freshly allocated blocks.
+- *
+- * We also add a check to make sure the highest level of the tree is the
+- * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
+- * needs update as well.
+- */
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# if BTRFS_MAX_LEVEL != 8
+-# error
+-# endif
+-
+-#define DEFINE_LEVEL(stem, level) \
+- .names[level] = "btrfs-" stem "-0" #level,
+-
+-#define DEFINE_NAME(stem) \
+- DEFINE_LEVEL(stem, 0) \
+- DEFINE_LEVEL(stem, 1) \
+- DEFINE_LEVEL(stem, 2) \
+- DEFINE_LEVEL(stem, 3) \
+- DEFINE_LEVEL(stem, 4) \
+- DEFINE_LEVEL(stem, 5) \
+- DEFINE_LEVEL(stem, 6) \
+- DEFINE_LEVEL(stem, 7)
+-
+-static struct btrfs_lockdep_keyset {
+- u64 id; /* root objectid */
+- /* Longest entry: btrfs-free-space-00 */
+- char names[BTRFS_MAX_LEVEL][20];
+- struct lock_class_key keys[BTRFS_MAX_LEVEL];
+-} btrfs_lockdep_keysets[] = {
+- { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
+- { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
+- { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
+- { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
+- { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
+- { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
+- { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
+- { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
+- { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
+- { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
+- { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
+- { .id = 0, DEFINE_NAME("tree") },
+-};
+-
+-#undef DEFINE_LEVEL
+-#undef DEFINE_NAME
+-
+-void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+- int level)
+-{
+- struct btrfs_lockdep_keyset *ks;
+-
+- BUG_ON(level >= ARRAY_SIZE(ks->keys));
+-
+- /* find the matching keyset, id 0 is the default entry */
+- for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+- if (ks->id == objectid)
+- break;
+-
+- lockdep_set_class_and_name(&eb->lock,
+- &ks->keys[level], ks->names[level]);
+-}
+-
+-#endif
+-
+ /*
+ * Compute the csum of a btree block and store the result to provided buffer.
+ */
+@@ -221,7 +139,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
+ crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
+ first_page_part - BTRFS_CSUM_SIZE);
+
+- for (i = 1; i < num_pages; i++) {
++ for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
+ kaddr = page_address(buf->pages[i]);
+ crypto_shash_update(shash, kaddr, PAGE_SIZE);
+ }
+@@ -284,11 +202,9 @@ static bool btrfs_supported_super_csum(u16 csum_type)
+ * Return 0 if the superblock checksum type matches the checksum value of that
+ * algorithm. Pass the raw disk superblock data.
+ */
+-static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
+- char *raw_disk_sb)
++int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
++ const struct btrfs_super_block *disk_sb)
+ {
+- struct btrfs_super_block *disk_sb =
+- (struct btrfs_super_block *)raw_disk_sb;
+ char result[BTRFS_CSUM_SIZE];
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+
+@@ -299,7 +215,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
+ * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
+ * filled with zeros and is included in the checksum.
+ */
+- crypto_shash_digest(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
++ crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
+
+ if (memcmp(disk_sb->csum, result, fs_info->csum_size))
+@@ -441,17 +357,38 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
+ else
+ ret = btrfs_check_leaf_full(eb);
+
+- if (ret < 0) {
+- btrfs_print_tree(eb, 0);
++ if (ret < 0)
++ goto error;
++
++ /*
++ * Also check the generation, the eb reached here must be newer than
++ * last committed. Or something seriously wrong happened.
++ */
++ if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
++ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+- "block=%llu write time tree block corruption detected",
+- eb->start);
+- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+- return ret;
++ "block=%llu bad generation, have %llu expect > %llu",
++ eb->start, btrfs_header_generation(eb),
++ fs_info->last_trans_committed);
++ goto error;
+ }
+ write_extent_buffer(eb, result, 0, fs_info->csum_size);
+
+ return 0;
++
++error:
++ btrfs_print_tree(eb, 0);
++ btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
++ eb->start);
++ /*
++ * Be noisy if this is an extent buffer from a log tree. We don't abort
++ * a transaction in case there's a bad log tree extent buffer, we just
++ * fallback to a transaction commit. Still we want to know when there is
++ * a bad log tree extent buffer, as that may signal a bug somewhere.
++ */
++ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
++ btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
++ return ret;
+ }
+
+ /* Checksum all dirty extent buffers in one bio_vec */
+@@ -1500,7 +1437,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
+ goto fail;
+
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
+- root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
++ !btrfs_is_data_reloc_root(root) &&
++ is_fstree(root->root_key.objectid)) {
+ set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
+ btrfs_check_and_init_root_item(&root->root_item);
+ }
+@@ -1724,13 +1662,22 @@ again:
+
+ ret = btrfs_insert_fs_root(fs_info, root);
+ if (ret) {
+- btrfs_put_root(root);
+- if (ret == -EEXIST)
++ if (ret == -EEXIST) {
++ btrfs_put_root(root);
+ goto again;
++ }
+ goto fail;
+ }
+ return root;
+ fail:
++ /*
++ * If our caller provided us an anonymous device, then it's his
++ * responsability to free it in case we fail. So we have to set our
++ * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
++ * and once again by our caller.
++ */
++ if (anon_dev)
++ root->anon_dev = 0;
+ btrfs_put_root(root);
+ return ERR_PTR(ret);
+ }
+@@ -2372,6 +2319,23 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
+
+ fs_info->csum_shash = csum_shash;
+
++ /*
++ * Check if the checksum implementation is a fast accelerated one.
++ * As-is this is a bit of a hack and should be replaced once the csum
++ * implementations provide that information themselves.
++ */
++ switch (csum_type) {
++ case BTRFS_CSUM_TYPE_CRC32:
++ if (!strstr(crypto_shash_driver_name(csum_shash), "generic"))
++ set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
++ break;
++ default:
++ break;
++ }
++
++ btrfs_info(fs_info, "using %s (%s) checksum algorithm",
++ btrfs_super_csum_name(csum_type),
++ crypto_shash_driver_name(csum_shash));
+ return 0;
+ }
+
+@@ -2463,7 +2427,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
+ fs_info->dev_root = root;
+ }
+ /* Initialize fs_info for all devices in any case */
+- btrfs_init_devices_late(fs_info);
++ ret = btrfs_init_devices_late(fs_info);
++ if (ret)
++ goto out;
+
+ /* If IGNOREDATACSUMS is set don't bother reading the csum root. */
+ if (!btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
+@@ -2548,8 +2514,8 @@ out:
+ * 1, 2 2nd and 3rd backup copy
+ * -1 skip bytenr check
+ */
+-static int validate_super(struct btrfs_fs_info *fs_info,
+- struct btrfs_super_block *sb, int mirror_num)
++int btrfs_validate_super(struct btrfs_fs_info *fs_info,
++ struct btrfs_super_block *sb, int mirror_num)
+ {
+ u64 nodesize = btrfs_super_nodesize(sb);
+ u64 sectorsize = btrfs_super_sectorsize(sb);
+@@ -2632,11 +2598,10 @@ static int validate_super(struct btrfs_fs_info *fs_info,
+ ret = -EINVAL;
+ }
+
+- if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
+- BTRFS_FSID_SIZE)) {
++ if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) {
+ btrfs_err(fs_info,
+ "superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
+- fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
++ sb->fsid, fs_info->fs_devices->fsid);
+ ret = -EINVAL;
+ }
+
+@@ -2732,7 +2697,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
+ */
+ static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
+ {
+- return validate_super(fs_info, fs_info->super_copy, 0);
++ return btrfs_validate_super(fs_info, fs_info->super_copy, 0);
+ }
+
+ /*
+@@ -2746,7 +2711,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
+ {
+ int ret;
+
+- ret = validate_super(fs_info, sb, -1);
++ ret = btrfs_validate_super(fs_info, sb, -1);
+ if (ret < 0)
+ goto out;
+ if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
+@@ -2851,6 +2816,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
+ /* All successful */
+ fs_info->generation = generation;
+ fs_info->last_trans_committed = generation;
++ fs_info->last_reloc_trans = 0;
+
+ /* Always begin writing backup roots after the one being used */
+ if (backup_index < 0) {
+@@ -2883,12 +2849,14 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
+ spin_lock_init(&fs_info->buffer_lock);
+ spin_lock_init(&fs_info->unused_bgs_lock);
+ spin_lock_init(&fs_info->treelog_bg_lock);
++ spin_lock_init(&fs_info->relocation_bg_lock);
+ rwlock_init(&fs_info->tree_mod_log_lock);
+ mutex_init(&fs_info->unused_bg_unpin_mutex);
+ mutex_init(&fs_info->reclaim_bgs_lock);
+ mutex_init(&fs_info->reloc_mutex);
+ mutex_init(&fs_info->delalloc_root_mutex);
+ mutex_init(&fs_info->zoned_meta_io_lock);
++ mutex_init(&fs_info->zoned_data_reloc_io_lock);
+ seqlock_init(&fs_info->profiles_lock);
+
+ INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
+@@ -2980,12 +2948,11 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
+ fs_info->sectorsize_bits = ilog2(4096);
+ fs_info->stripesize = 4096;
+
++ fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
++
+ spin_lock_init(&fs_info->swapfile_pins_lock);
+ fs_info->swapfile_pins = RB_ROOT;
+
+- spin_lock_init(&fs_info->send_reloc_lock);
+- fs_info->send_in_progress = 0;
+-
+ fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
+ INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work);
+ }
+@@ -3228,12 +3195,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
+ btrfs_init_btree_inode(fs_info);
+
+- invalidate_bdev(fs_devices->latest_bdev);
++ invalidate_bdev(fs_devices->latest_dev->bdev);
+
+ /*
+ * Read super block and check the signature bytes only
+ */
+- disk_super = btrfs_read_dev_super(fs_devices->latest_bdev);
++ disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
+ if (IS_ERR(disk_super)) {
+ err = PTR_ERR(disk_super);
+ goto fail_alloc;
+@@ -3265,7 +3232,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ * We want to check superblock checksum, the type is stored inside.
+ * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
+ */
+- if (btrfs_check_super_csum(fs_info, (u8 *)disk_super)) {
++ if (btrfs_check_super_csum(fs_info, disk_super)) {
+ btrfs_err(fs_info, "superblock checksum mismatch");
+ err = -EINVAL;
+ btrfs_release_disk_super(disk_super);
+@@ -3314,16 +3281,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ */
+ fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
+
+- /*
+- * Flag our filesystem as having big metadata blocks if they are bigger
+- * than the page size.
+- */
+- if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
+- if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+- btrfs_info(fs_info,
+- "flagging fs with big metadata feature");
+- features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+- }
+
+ /* Set up fs_info before parsing mount options */
+ nodesize = btrfs_super_nodesize(disk_super);
+@@ -3348,7 +3305,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ ~BTRFS_FEATURE_INCOMPAT_SUPP;
+ if (features) {
+ btrfs_err(fs_info,
+- "cannot mount because of unsupported optional features (%llx)",
++ "cannot mount because of unsupported optional features (0x%llx)",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+@@ -3364,6 +3321,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+ btrfs_info(fs_info, "has skinny extents");
+
++ /*
++ * Flag our filesystem as having big metadata blocks if they are bigger
++ * than the page size.
++ */
++ if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
++ if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
++ btrfs_info(fs_info,
++ "flagging fs with big metadata feature");
++ features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
++ }
++
+ /*
+ * mixed block groups end up with duplicate but slightly offset
+ * extent buffers for the same range. It leads to corruptions
+@@ -3386,13 +3354,24 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ ~BTRFS_FEATURE_COMPAT_RO_SUPP;
+ if (!sb_rdonly(sb) && features) {
+ btrfs_err(fs_info,
+- "cannot mount read-write because of unsupported optional features (%llx)",
++ "cannot mount read-write because of unsupported optional features (0x%llx)",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
+ if (sectorsize != PAGE_SIZE) {
++ /*
++ * V1 space cache has some hardcoded PAGE_SIZE usage, and is
++ * going to be deprecated.
++ *
++ * Force to use v2 cache for subpage case.
++ */
++ btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
++ btrfs_set_and_info(fs_info, FREE_SPACE_TREE,
++ "forcing free space tree for sector size %u with page size %lu",
++ sectorsize, PAGE_SIZE);
++
+ btrfs_warn(fs_info,
+ "read-write for sector size %u with page size %lu is experimental",
+ sectorsize, PAGE_SIZE);
+@@ -3465,7 +3444,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ * below in btrfs_init_dev_replace().
+ */
+ btrfs_free_extra_devids(fs_devices);
+- if (!fs_devices->latest_bdev) {
++ if (!fs_devices->latest_dev->bdev) {
+ btrfs_err(fs_info, "failed to read devices");
+ goto fail_tree_roots;
+ }
+@@ -3523,6 +3502,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ btrfs_err(fs_info, "failed to init dev_replace: %d", ret);
+ goto fail_block_groups;
+ }
++ /*
++ * We have unsupported RO compat features, although RO mounted, we
++ * should not cause any metadata write, including log replay.
++ * Or we could screw up whatever the new feature requires.
++ */
++ if (unlikely(features && btrfs_super_log_root(disk_super) &&
++ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
++ btrfs_err(fs_info,
++"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
++ features);
++ err = -EINVAL;
++ goto fail_alloc;
++ }
++
+
+ ret = btrfs_check_zoned_mode(fs_info);
+ if (ret) {
+@@ -3556,7 +3549,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+ goto fail_sysfs;
+ }
+
+- if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
++ btrfs_free_zone_cache(fs_info);
++
++ if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
++ !btrfs_check_rw_degradable(fs_info, NULL)) {
+ btrfs_warn(fs_info,
+ "writable mount is not allowed due to too many missing devices");
+ goto fail_sysfs;
+@@ -3647,6 +3643,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+
+ set_bit(BTRFS_FS_OPEN, &fs_info->flags);
+
++ /* Kick the cleaner thread so it'll start deleting snapshots. */
++ if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags))
++ wake_up_process(fs_info->cleaner_kthread);
++
+ clear_oneshot:
+ btrfs_clear_oneshot_options(fs_info);
+ return 0;
+@@ -3725,7 +3725,7 @@ static void btrfs_end_super_write(struct bio *bio)
+ }
+
+ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+- int copy_num)
++ int copy_num, bool drop_cache)
+ {
+ struct btrfs_super_block *super;
+ struct page *page;
+@@ -3743,6 +3743,19 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
+ return ERR_PTR(-EINVAL);
+
++ if (drop_cache) {
++ /* This should only be called with the primary sb. */
++ ASSERT(copy_num == 0);
++
++ /*
++ * Drop the page of the primary superblock, so later read will
++ * always read from the device.
++ */
++ invalidate_inode_pages2_range(mapping,
++ bytenr >> PAGE_SHIFT,
++ (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
++ }
++
+ page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
+ if (IS_ERR(page))
+ return ERR_CAST(page);
+@@ -3774,7 +3787,7 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
+ * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+ */
+ for (i = 0; i < 1; i++) {
+- super = btrfs_read_dev_one_super(bdev, i);
++ super = btrfs_read_dev_one_super(bdev, i, false);
+ if (IS_ERR(super))
+ continue;
+
+@@ -3968,11 +3981,23 @@ static void btrfs_end_empty_barrier(struct bio *bio)
+ */
+ static void write_dev_flush(struct btrfs_device *device)
+ {
+- struct request_queue *q = bdev_get_queue(device->bdev);
+ struct bio *bio = device->flush_bio;
+
++#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
++ /*
++ * When a disk has write caching disabled, we skip submission of a bio
++ * with flush and sync requests before writing the superblock, since
++ * it's not needed. However when the integrity checker is enabled, this
++ * results in reports that there are metadata blocks referred by a
++ * superblock that were not properly flushed. So don't skip the bio
++ * submission only when the integrity checker is enabled for the sake
++ * of simplicity, since this is a debug tool and not meant for use in
++ * non-debug builds.
++ */
++ struct request_queue *q = bdev_get_queue(device->bdev);
+ if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ return;
++#endif
+
+ bio_reset(bio);
+ bio->bi_end_io = btrfs_end_empty_barrier;
+@@ -4308,6 +4333,28 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
+ int ret;
+
+ set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
++
++ /*
++ * If we had UNFINISHED_DROPS we could still be processing them, so
++ * clear that bit and wake up relocation so it can stop.
++ * We must do this before stopping the block group reclaim task, because
++ * at btrfs_relocate_block_group() we wait for this bit, and after the
++ * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
++ * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
++ * return 1.
++ */
++ btrfs_wake_unfinished_drop(fs_info);
++
++ /*
++ * We may have the reclaim task running and relocating a data block group,
++ * in which case it may create delayed iputs. So stop it before we park
++ * the cleaner kthread otherwise we can get new delayed iputs after
++ * parking the cleaner, and that can make the async reclaim task to hang
++ * if it's waiting for delayed iputs to complete, since the cleaner is
++ * parked and can not run delayed iputs - this will make us hang when
++ * trying to stop the async reclaim task.
++ */
++ cancel_work_sync(&fs_info->reclaim_bgs_work);
+ /*
+ * We don't want the cleaner to start new transactions, add more delayed
+ * iputs, etc. while we're closing. We can't use kthread_stop() yet
+@@ -4338,12 +4385,35 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
+ /* clear out the rbtree of defraggable inodes */
+ btrfs_cleanup_defrag_inodes(fs_info);
+
++ /*
++ * After we parked the cleaner kthread, ordered extents may have
++ * completed and created new delayed iputs. If one of the async reclaim
++ * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
++ * can hang forever trying to stop it, because if a delayed iput is
++ * added after it ran btrfs_run_delayed_iputs() and before it called
++ * btrfs_wait_on_delayed_iputs(), it will hang forever since there is
++ * no one else to run iputs.
++ *
++ * So wait for all ongoing ordered extents to complete and then run
++ * delayed iputs. This works because once we reach this point no one
++ * can either create new ordered extents nor create delayed iputs
++ * through some other means.
++ *
++ * Also note that btrfs_wait_ordered_roots() is not safe here, because
++ * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
++ * but the delayed iput for the respective inode is made only when doing
++ * the final btrfs_put_ordered_extent() (which must happen at
++ * btrfs_finish_ordered_io() when we are unmounting).
++ */
++ btrfs_flush_workqueue(fs_info->endio_write_workers);
++ /* Ordered extents for free space inodes. */
++ btrfs_flush_workqueue(fs_info->endio_freespace_worker);
++ btrfs_run_delayed_iputs(fs_info);
++
+ cancel_work_sync(&fs_info->async_reclaim_work);
+ cancel_work_sync(&fs_info->async_data_reclaim_work);
+ cancel_work_sync(&fs_info->preempt_reclaim_work);
+
+- cancel_work_sync(&fs_info->reclaim_bgs_work);
+-
+ /* Cancel or finish ongoing discard work */
+ btrfs_discard_cleanup(fs_info);
+
+@@ -4726,7 +4796,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
+ */
+ inode = igrab(&btrfs_inode->vfs_inode);
+ if (inode) {
++ unsigned int nofs_flag;
++
++ nofs_flag = memalloc_nofs_save();
+ invalidate_inode_pages2(inode->i_mapping);
++ memalloc_nofs_restore(nofs_flag);
+ iput(inode);
+ }
+ spin_lock(&root->delalloc_lock);
+@@ -4831,7 +4905,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
+
+ inode = cache->io_ctl.inode;
+ if (inode) {
++ unsigned int nofs_flag;
++
++ nofs_flag = memalloc_nofs_save();
+ invalidate_inode_pages2(inode->i_mapping);
++ memalloc_nofs_restore(nofs_flag);
++
+ BTRFS_I(inode)->generation = 0;
+ cache->io_ctl.inode = NULL;
+ iput(inode);
+diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
+index 0e7e9526b6a83..718787dfdb8ea 100644
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -52,14 +52,18 @@ struct extent_buffer *btrfs_find_create_tree_block(
+ void btrfs_clean_tree_block(struct extent_buffer *buf);
+ void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info);
+ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
++int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
++ const struct btrfs_super_block *disk_sb);
+ int __cold open_ctree(struct super_block *sb,
+ struct btrfs_fs_devices *fs_devices,
+ char *options);
+ void __cold close_ctree(struct btrfs_fs_info *fs_info);
++int btrfs_validate_super(struct btrfs_fs_info *fs_info,
++ struct btrfs_super_block *sb, int mirror_num);
+ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
+ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
+ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
+- int copy_num);
++ int copy_num, bool drop_cache);
+ int btrfs_commit_super(struct btrfs_fs_info *fs_info);
+ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+ struct btrfs_key *key);
+@@ -140,14 +144,4 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root);
+ int __init btrfs_end_io_wq_init(void);
+ void __cold btrfs_end_io_wq_exit(void);
+
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-void btrfs_set_buffer_lockdep_class(u64 objectid,
+- struct extent_buffer *eb, int level);
+-#else
+-static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
+- struct extent_buffer *eb, int level)
+-{
+-}
+-#endif
+-
+ #endif
+diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
+index 1d4c2397d0d62..fab7eb76e53b2 100644
+--- a/fs/btrfs/export.c
++++ b/fs/btrfs/export.c
+@@ -58,7 +58,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+ }
+
+ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+- u64 root_objectid, u32 generation,
++ u64 root_objectid, u64 generation,
+ int check_generation)
+ {
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
+index f32f4113c976a..5afb7ca428289 100644
+--- a/fs/btrfs/export.h
++++ b/fs/btrfs/export.h
+@@ -19,7 +19,7 @@ struct btrfs_fid {
+ } __attribute__ ((packed));
+
+ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+- u64 root_objectid, u32 generation,
++ u64 root_objectid, u64 generation,
+ int check_generation);
+ struct dentry *btrfs_get_parent(struct dentry *child);
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 0ab456cb4bf80..597cc2607481c 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1266,7 +1266,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
+ return ret;
+ }
+
+-static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes)
++static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
+ {
+ struct btrfs_device *dev = stripe->dev;
+ struct btrfs_fs_info *fs_info = dev->fs_info;
+@@ -1313,22 +1313,21 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 discarded_bytes = 0;
+ u64 end = bytenr + num_bytes;
+ u64 cur = bytenr;
+- struct btrfs_bio *bbio = NULL;
+-
++ struct btrfs_io_context *bioc = NULL;
+
+ /*
+- * Avoid races with device replace and make sure our bbio has devices
++ * Avoid races with device replace and make sure our bioc has devices
+ * associated to its stripes that don't go away while we are discarding.
+ */
+ btrfs_bio_counter_inc_blocked(fs_info);
+ while (cur < end) {
+- struct btrfs_bio_stripe *stripe;
++ struct btrfs_io_stripe *stripe;
+ int i;
+
+ num_bytes = end - cur;
+ /* Tell the block device(s) that the sectors can be discarded */
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
+- &num_bytes, &bbio, 0);
++ &num_bytes, &bioc, 0);
+ /*
+ * Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
+ * -EOPNOTSUPP. For any such error, @num_bytes is not updated,
+@@ -1337,8 +1336,8 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
+ if (ret < 0)
+ goto out;
+
+- stripe = bbio->stripes;
+- for (i = 0; i < bbio->num_stripes; i++, stripe++) {
++ stripe = bioc->stripes;
++ for (i = 0; i < bioc->num_stripes; i++, stripe++) {
+ u64 bytes;
+ struct btrfs_device *device = stripe->dev;
+
+@@ -1361,7 +1360,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
+ * And since there are two loops, explicitly
+ * go to out to avoid confusion.
+ */
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ goto out;
+ }
+
+@@ -1372,7 +1371,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
+ */
+ ret = 0;
+ }
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ cur += num_bytes;
+ }
+ out:
+@@ -1718,6 +1717,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+ BUG();
+ if (ret && insert_reserved)
+ btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
++ if (ret < 0)
++ btrfs_err(trans->fs_info,
++"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
++ node->bytenr, node->num_bytes, node->type,
++ node->action, node->ref_mod, ret);
+ return ret;
+ }
+
+@@ -1956,8 +1960,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
+ if (ret) {
+ unselect_delayed_ref_head(delayed_refs, locked_ref);
+ btrfs_put_delayed_ref(ref);
+- btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
+- ret);
+ return ret;
+ }
+
+@@ -2376,7 +2378,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
+
+ out:
+ btrfs_free_path(path);
+- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
++ if (btrfs_is_data_reloc_root(root))
+ WARN_ON(ret > 0);
+ return ret;
+ }
+@@ -2440,7 +2442,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
+ num_bytes, parent);
+ generic_ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
+- key.offset);
++ key.offset, root->root_key.objectid,
++ for_reloc);
+ generic_ref.skip_qgroup = for_reloc;
+ if (inc)
+ ret = btrfs_inc_extent_ref(trans, &generic_ref);
+@@ -2454,7 +2457,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
+ btrfs_init_generic_ref(&generic_ref, action, bytenr,
+ num_bytes, parent);
+ generic_ref.real_root = root->root_key.objectid;
+- btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
++ btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
++ root->root_key.objectid, for_reloc);
+ generic_ref.skip_qgroup = for_reloc;
+ if (inc)
+ ret = btrfs_inc_extent_ref(trans, &generic_ref);
+@@ -2571,17 +2575,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
+ return -EINVAL;
+
+ /*
+- * pull in the free space cache (if any) so that our pin
+- * removes the free space from the cache. We have load_only set
+- * to one because the slow code to read in the free extents does check
+- * the pinned extents.
++ * Fully cache the free space first so that our pin removes the free space
++ * from the cache.
+ */
+- btrfs_cache_block_group(cache, 1);
+- /*
+- * Make sure we wait until the cache is completely built in case it is
+- * missing or is invalid and therefore needs to be rebuilt.
+- */
+- ret = btrfs_wait_block_group_cache_done(cache);
++ ret = btrfs_cache_block_group(cache, true);
+ if (ret)
+ goto out;
+
+@@ -2604,12 +2601,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
+ if (!block_group)
+ return -EINVAL;
+
+- btrfs_cache_block_group(block_group, 1);
+- /*
+- * Make sure we wait until the cache is completely built in case it is
+- * missing or is invalid and therefore needs to be rebuilt.
+- */
+- ret = btrfs_wait_block_group_cache_done(block_group);
++ ret = btrfs_cache_block_group(block_group, true);
+ if (ret)
+ goto out;
+
+@@ -3278,20 +3270,20 @@ out_delayed_unlock:
+ }
+
+ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
++ u64 root_id,
+ struct extent_buffer *buf,
+ u64 parent, int last_ref)
+ {
+- struct btrfs_fs_info *fs_info = root->fs_info;
++ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_ref generic_ref = { 0 };
+ int ret;
+
+ btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
+ buf->start, buf->len, parent);
+ btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
+- root->root_key.objectid);
++ root_id, 0, false);
+
+- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
++ if (root_id != BTRFS_TREE_LOG_OBJECTID) {
+ btrfs_ref_tree_mod(fs_info, &generic_ref);
+ ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
+ BUG_ON(ret); /* -ENOMEM */
+@@ -3301,7 +3293,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *cache;
+ bool must_pin = false;
+
+- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
++ if (root_id != BTRFS_TREE_LOG_OBJECTID) {
+ ret = check_ref_cleanup(trans, buf->start);
+ if (!ret) {
+ btrfs_redirty_list_add(trans->transaction, buf);
+@@ -3318,21 +3310,22 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ }
+
+ /*
+- * If this is a leaf and there are tree mod log users, we may
+- * have recorded mod log operations that point to this leaf.
+- * So we must make sure no one reuses this leaf's extent before
+- * mod log operations are applied to a node, otherwise after
+- * rewinding a node using the mod log operations we get an
+- * inconsistent btree, as the leaf's extent may now be used as
+- * a node or leaf for another different btree.
++ * If there are tree mod log users we may have recorded mod log
++ * operations for this node. If we re-allocate this node we
++ * could replay operations on this node that happened when it
++ * existed in a completely different root. For example if it
++ * was part of root A, then was reallocated to root B, and we
++ * are doing a btrfs_old_search_slot(root b), we could replay
++ * operations that happened when the block was part of root A,
++ * giving us an inconsistent view of the btree.
++ *
+ * We are safe from races here because at this point no other
+ * node or root points to this extent buffer, so if after this
+- * check a new tree mod log user joins, it will not be able to
+- * find a node pointing to this leaf and record operations that
+- * point to this leaf.
++ * check a new tree mod log user joins we will not have an
++ * existing log of operations on this node that we have to
++ * contend with.
+ */
+- if (btrfs_header_level(buf) == 0 &&
+- test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
++ if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
+ must_pin = true;
+
+ if (must_pin || btrfs_is_zoned(fs_info)) {
+@@ -3495,6 +3488,9 @@ struct find_free_extent_ctl {
+ /* Allocation is called for tree-log */
+ bool for_treelog;
+
++ /* Allocation is called for data relocation */
++ bool for_data_reloc;
++
+ /* RAID index, converted from flags */
+ int index;
+
+@@ -3756,6 +3752,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
+ u64 avail;
+ u64 bytenr = block_group->start;
+ u64 log_bytenr;
++ u64 data_reloc_bytenr;
+ int ret = 0;
+ bool skip;
+
+@@ -3773,15 +3770,33 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
+ if (skip)
+ return 1;
+
++ /*
++ * Do not allow non-relocation blocks in the dedicated relocation block
++ * group, and vice versa.
++ */
++ spin_lock(&fs_info->relocation_bg_lock);
++ data_reloc_bytenr = fs_info->data_reloc_bg;
++ if (data_reloc_bytenr &&
++ ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
++ (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
++ skip = true;
++ spin_unlock(&fs_info->relocation_bg_lock);
++ if (skip)
++ return 1;
++
+ spin_lock(&space_info->lock);
+ spin_lock(&block_group->lock);
+ spin_lock(&fs_info->treelog_bg_lock);
++ spin_lock(&fs_info->relocation_bg_lock);
+
+ ASSERT(!ffe_ctl->for_treelog ||
+ block_group->start == fs_info->treelog_bg ||
+ fs_info->treelog_bg == 0);
++ ASSERT(!ffe_ctl->for_data_reloc ||
++ block_group->start == fs_info->data_reloc_bg ||
++ fs_info->data_reloc_bg == 0);
+
+- if (block_group->ro) {
++ if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
+ ret = 1;
+ goto out;
+ }
+@@ -3796,6 +3811,16 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
+ goto out;
+ }
+
++ /*
++ * Do not allow currently used block group to be the data relocation
++ * dedicated block group.
++ */
++ if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
++ (block_group->used || block_group->reserved)) {
++ ret = 1;
++ goto out;
++ }
++
+ avail = block_group->length - block_group->alloc_offset;
+ if (avail < num_bytes) {
+ if (ffe_ctl->max_extent_size < avail) {
+@@ -3813,6 +3838,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
+ if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
+ fs_info->treelog_bg = block_group->start;
+
++ if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
++ fs_info->data_reloc_bg = block_group->start;
++
+ ffe_ctl->found_offset = start + block_group->alloc_offset;
+ block_group->alloc_offset += num_bytes;
+ spin_lock(&ctl->tree_lock);
+@@ -3829,6 +3857,25 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
+ out:
+ if (ret && ffe_ctl->for_treelog)
+ fs_info->treelog_bg = 0;
++ if (ret && ffe_ctl->for_data_reloc &&
++ fs_info->data_reloc_bg == block_group->start) {
++ /*
++ * Do not allow further allocations from this block group.
++ * Compared to increasing the ->ro, setting the
++ * ->zoned_data_reloc_ongoing flag still allows nocow
++ * writers to come in. See btrfs_inc_nocow_writers().
++ *
++ * We need to disable an allocation to avoid an allocation of
++ * regular (non-relocation data) extent. With mix of relocation
++ * extents and regular extents, we can dispatch WRITE commands
++ * (for relocation extents) and ZONE APPEND commands (for
++ * regular extents) at the same time to the same zone, which
++ * easily break the write pointer.
++ */
++ block_group->zoned_data_reloc_ongoing = 1;
++ fs_info->data_reloc_bg = 0;
++ }
++ spin_unlock(&fs_info->relocation_bg_lock);
+ spin_unlock(&fs_info->treelog_bg_lock);
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+@@ -4085,6 +4132,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
+ ffe_ctl->hint_byte = fs_info->treelog_bg;
+ spin_unlock(&fs_info->treelog_bg_lock);
+ }
++ if (ffe_ctl->for_data_reloc) {
++ spin_lock(&fs_info->relocation_bg_lock);
++ if (fs_info->data_reloc_bg)
++ ffe_ctl->hint_byte = fs_info->data_reloc_bg;
++ spin_unlock(&fs_info->relocation_bg_lock);
++ }
+ return 0;
+ default:
+ BUG();
+@@ -4129,6 +4182,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
+ struct btrfs_space_info *space_info;
+ bool full_search = false;
+ bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
++ bool for_data_reloc = (btrfs_is_data_reloc_root(root) &&
++ flags & BTRFS_BLOCK_GROUP_DATA);
+
+ WARN_ON(num_bytes < fs_info->sectorsize);
+
+@@ -4143,6 +4198,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
+ ffe_ctl.found_offset = 0;
+ ffe_ctl.hint_byte = hint_byte_orig;
+ ffe_ctl.for_treelog = for_treelog;
++ ffe_ctl.for_data_reloc = for_data_reloc;
+ ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
+
+ /* For clustered allocation */
+@@ -4220,6 +4276,8 @@ search:
+ if (unlikely(block_group->ro)) {
+ if (for_treelog)
+ btrfs_clear_treelog_bg(block_group);
++ if (ffe_ctl.for_data_reloc)
++ btrfs_clear_data_reloc_bg(block_group);
+ continue;
+ }
+
+@@ -4258,7 +4316,7 @@ have_block_group:
+ ffe_ctl.cached = btrfs_block_group_done(block_group);
+ if (unlikely(!ffe_ctl.cached)) {
+ ffe_ctl.have_caching_bg = true;
+- ret = btrfs_cache_block_group(block_group, 0);
++ ret = btrfs_cache_block_group(block_group, false);
+
+ /*
+ * If we get ENOMEM here or something else we want to
+@@ -4276,8 +4334,11 @@ have_block_group:
+ ret = 0;
+ }
+
+- if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
++ if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
++ if (!cache_block_group_error)
++ cache_block_group_error = -EIO;
+ goto loop;
++ }
+
+ bg_ret = NULL;
+ ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
+@@ -4408,6 +4469,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
+ u64 flags;
+ int ret;
+ bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
++ bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
+
+ flags = get_alloc_profile_by_root(root, is_data);
+ again:
+@@ -4431,8 +4493,8 @@ again:
+
+ sinfo = btrfs_find_space_info(fs_info, flags);
+ btrfs_err(fs_info,
+- "allocation failed flags %llu, wanted %llu tree-log %d",
+- flags, num_bytes, for_treelog);
++ "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
++ flags, num_bytes, for_treelog, for_data_reloc);
+ if (sinfo)
+ btrfs_dump_space_info(fs_info, sinfo,
+ num_bytes, 1);
+@@ -4655,7 +4717,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+
+ btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+ ins->objectid, ins->offset, 0);
+- btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
++ btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
++ offset, 0, false);
+ btrfs_ref_tree_mod(root->fs_info, &generic_ref);
+
+ return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
+@@ -4713,6 +4776,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ {
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *buf;
++ u64 lockdep_owner = owner;
+
+ buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
+ if (IS_ERR(buf))
+@@ -4731,12 +4795,30 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ return ERR_PTR(-EUCLEAN);
+ }
+
++ /*
++ * The reloc trees are just snapshots, so we need them to appear to be
++ * just like any other fs tree WRT lockdep.
++ *
++ * The exception however is in replace_path() in relocation, where we
++ * hold the lock on the original fs root and then search for the reloc
++ * root. At that point we need to make sure any reloc root buffers are
++ * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
++ * lockdep happy.
++ */
++ if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
++ !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
++ lockdep_owner = BTRFS_FS_TREE_OBJECTID;
++
++ /* btrfs_clean_tree_block() accesses generation field. */
++ btrfs_set_header_generation(buf, trans->transid);
++
+ /*
+ * This needs to stay, because we could allocate a freed block from an
+ * old tree into a new tree, so we need to make sure this new block is
+ * set to the appropriate level and owner.
+ */
+- btrfs_set_buffer_lockdep_class(owner, buf, level);
++ btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
++
+ __btrfs_tree_lock(buf, nest);
+ btrfs_clean_tree_block(buf);
+ clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
+@@ -4848,7 +4930,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
+ btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
+ ins.objectid, ins.offset, parent);
+ generic_ref.real_root = root->root_key.objectid;
+- btrfs_init_tree_ref(&generic_ref, level, root_objectid);
++ btrfs_init_tree_ref(&generic_ref, level, root_objectid,
++ root->root_key.objectid, false);
+ btrfs_ref_tree_mod(fs_info, &generic_ref);
+ ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
+ if (ret)
+@@ -5265,7 +5348,8 @@ skip:
+
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+ fs_info->nodesize, parent);
+- btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
++ btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
++ 0, false);
+ ret = btrfs_free_extent(trans, &ref);
+ if (ret)
+ goto out_unlock;
+@@ -5386,7 +5470,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
+ goto owner_mismatch;
+ }
+
+- btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
++ btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
++ wc->refs[level] == 1);
+ out:
+ wc->refs[level] = 0;
+ wc->flags[level] = 0;
+@@ -5491,6 +5576,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
+ int ret;
+ int level;
+ bool root_dropped = false;
++ bool unfinished_drop = false;
+
+ btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
+
+@@ -5533,6 +5619,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
+ * already dropped.
+ */
+ set_bit(BTRFS_ROOT_DELETING, &root->state);
++ unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
++
+ if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+ level = btrfs_header_level(root->node);
+ path->nodes[level] = btrfs_lock_root_node(root);
+@@ -5707,6 +5795,13 @@ out_free:
+ kfree(wc);
+ btrfs_free_path(path);
+ out:
++ /*
++ * We were an unfinished drop root, check to see if there are any
++ * pending, and if not clear and wake up any waiters.
++ */
++ if (!err && unfinished_drop)
++ btrfs_maybe_wake_unfinished_drop(fs_info);
++
+ /*
+ * So if we need to stop dropping the snapshot for whatever reason we
+ * need to make sure to add it back to the dead root list so that we
+@@ -5985,13 +6080,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
+
+ if (end - start >= range->minlen) {
+ if (!btrfs_block_group_done(cache)) {
+- ret = btrfs_cache_block_group(cache, 0);
+- if (ret) {
+- bg_failed++;
+- bg_ret = ret;
+- continue;
+- }
+- ret = btrfs_wait_block_group_cache_done(cache);
++ ret = btrfs_cache_block_group(cache, true);
+ if (ret) {
+ bg_failed++;
+ bg_ret = ret;
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index aaddd72253481..f9f6dfbc86bcd 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1985,8 +1985,10 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
+ struct page *locked_page, u64 *start,
+ u64 *end)
+ {
++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+- u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
++ /* The sanity tests may not set a valid fs_info. */
++ u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE;
+ u64 delalloc_start;
+ u64 delalloc_end;
+ bool found;
+@@ -2290,7 +2292,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+ struct btrfs_device *dev;
+ u64 map_length = 0;
+ u64 sector;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ int ret;
+
+ ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
+@@ -2304,7 +2306,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+ map_length = length;
+
+ /*
+- * Avoid races with device replace and make sure our bbio has devices
++ * Avoid races with device replace and make sure our bioc has devices
+ * associated to its stripes that don't go away while we are doing the
+ * read repair operation.
+ */
+@@ -2317,28 +2319,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+ * stripe's dev and sector.
+ */
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
+- &map_length, &bbio, 0);
++ &map_length, &bioc, 0);
+ if (ret) {
+ btrfs_bio_counter_dec(fs_info);
+ bio_put(bio);
+ return -EIO;
+ }
+- ASSERT(bbio->mirror_num == 1);
++ ASSERT(bioc->mirror_num == 1);
+ } else {
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
+- &map_length, &bbio, mirror_num);
++ &map_length, &bioc, mirror_num);
+ if (ret) {
+ btrfs_bio_counter_dec(fs_info);
+ bio_put(bio);
+ return -EIO;
+ }
+- BUG_ON(mirror_num != bbio->mirror_num);
++ BUG_ON(mirror_num != bioc->mirror_num);
+ }
+
+- sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
++ sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
+ bio->bi_iter.bi_sector = sector;
+- dev = bbio->stripes[bbio->mirror_num - 1].dev;
+- btrfs_put_bbio(bbio);
++ dev = bioc->stripes[bioc->mirror_num - 1].dev;
++ btrfs_put_bioc(bioc);
+ if (!dev || !dev->bdev ||
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
+ btrfs_bio_counter_dec(fs_info);
+@@ -3327,7 +3329,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
+ if (wbc) {
+ struct block_device *bdev;
+
+- bdev = fs_info->fs_devices->latest_bdev;
++ bdev = fs_info->fs_devices->latest_dev->bdev;
+ bio_set_dev(bio, bdev);
+ wbc_init_bio(wbc, bio);
+ }
+@@ -3561,7 +3563,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
+ u64 cur_end;
+ struct extent_map *em;
+ int ret = 0;
+- int nr = 0;
+ size_t pg_offset = 0;
+ size_t iosize;
+ size_t blocksize = inode->i_sb->s_blocksize;
+@@ -3727,11 +3728,13 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
+ end_bio_extent_readpage, 0,
+ this_bio_flag,
+ force_bio_submit);
+- if (!ret) {
+- nr++;
+- } else {
+- unlock_extent(tree, cur, cur + iosize - 1);
+- end_page_read(page, false, cur, iosize);
++ if (ret) {
++ /*
++ * We have to unlock the remaining range, or the page
++ * will never be unlocked.
++ */
++ unlock_extent(tree, cur, end);
++ end_page_read(page, false, cur, end + 1 - cur);
+ goto out;
+ }
+ cur = cur + iosize;
+@@ -3777,10 +3780,11 @@ static void update_nr_written(struct writeback_control *wbc,
+ */
+ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
+ struct page *page, struct writeback_control *wbc,
+- u64 delalloc_start, unsigned long *nr_written)
++ unsigned long *nr_written)
+ {
+- u64 page_end = delalloc_start + PAGE_SIZE - 1;
++ u64 page_end = page_offset(page) + PAGE_SIZE - 1;
+ bool found;
++ u64 delalloc_start = page_offset(page);
+ u64 delalloc_to_write = 0;
+ u64 delalloc_end = 0;
+ int ret;
+@@ -3905,10 +3909,12 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
+ u64 extent_offset;
+ u64 block_start;
+ struct extent_map *em;
++ int saved_ret = 0;
+ int ret = 0;
+ int nr = 0;
+ u32 opf = REQ_OP_WRITE;
+ const unsigned int write_flags = wbc_to_write_flags(wbc);
++ bool has_error = false;
+ bool compressed;
+
+ ret = btrfs_writepage_cow_fixup(page);
+@@ -3959,6 +3965,9 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
+ if (IS_ERR_OR_NULL(em)) {
+ btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
+ ret = PTR_ERR_OR_ZERO(em);
++ has_error = true;
++ if (!saved_ret)
++ saved_ret = ret;
+ break;
+ }
+
+@@ -4022,6 +4031,10 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
+ end_bio_extent_writepage,
+ 0, 0, false);
+ if (ret) {
++ has_error = true;
++ if (!saved_ret)
++ saved_ret = ret;
++
+ btrfs_page_set_error(fs_info, page, cur, iosize);
+ if (PageWriteback(page))
+ btrfs_page_clear_writeback(fs_info, page, cur,
+@@ -4035,8 +4048,10 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
+ * If we finish without problem, we should not only clear page dirty,
+ * but also empty subpage dirty bits
+ */
+- if (!ret)
++ if (!has_error)
+ btrfs_page_assert_not_dirty(fs_info, page);
++ else
++ ret = saved_ret;
+ *nr_ret = nr;
+ return ret;
+ }
+@@ -4054,8 +4069,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+ struct extent_page_data *epd)
+ {
+ struct inode *inode = page->mapping->host;
+- u64 start = page_offset(page);
+- u64 page_end = start + PAGE_SIZE - 1;
++ const u64 page_start = page_offset(page);
++ const u64 page_end = page_start + PAGE_SIZE - 1;
+ int ret;
+ int nr = 0;
+ size_t pg_offset;
+@@ -4090,8 +4105,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+ }
+
+ if (!epd->extent_locked) {
+- ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
+- &nr_written);
++ ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written);
+ if (ret == 1)
+ return 0;
+ if (ret)
+@@ -4141,7 +4155,7 @@ done:
+ * capable of that.
+ */
+ if (PageError(page))
+- end_extent_writepage(page, ret, start, page_end);
++ end_extent_writepage(page, ret, page_start, page_end);
+ unlock_page(page);
+ ASSERT(ret <= 0);
+ return ret;
+@@ -4284,6 +4298,12 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
+ if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
+ return;
+
++ /*
++ * A read may stumble upon this buffer later, make sure that it gets an
++ * error and knows there was an error.
++ */
++ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
++
+ /*
+ * If we error out, we should add back the dirty_metadata_bytes
+ * to make it consistent.
+@@ -4824,11 +4844,12 @@ retry:
+ }
+
+ /*
+- * the filesystem may choose to bump up nr_to_write.
++ * The filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+- * at any time
++ * at any time.
+ */
+- nr_to_write_done = wbc->nr_to_write <= 0;
++ nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
++ wbc->nr_to_write <= 0);
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+@@ -5120,6 +5141,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
+ int extent_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+ {
++ struct inode *inode = mapping->host;
+ int ret = 0;
+ struct extent_page_data epd = {
+ .bio_ctrl = { 0 },
+@@ -5127,13 +5149,20 @@ int extent_writepages(struct address_space *mapping,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
+ };
+
++ /*
++ * Allow only a single thread to do the reloc work in zoned mode to
++ * protect the write pointer updates.
++ */
++ btrfs_zoned_data_reloc_lock(BTRFS_I(inode));
+ ret = extent_write_cache_pages(mapping, wbc, &epd);
+ ASSERT(ret <= 0);
+ if (ret < 0) {
++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
+ end_write_bio(&epd, ret);
+ return ret;
+ }
+ ret = flush_write_bio(&epd);
++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
+ return ret;
+ }
+
+@@ -6081,6 +6110,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *exists = NULL;
+ struct page *p;
+ struct address_space *mapping = fs_info->btree_inode->i_mapping;
++ u64 lockdep_owner = owner_root;
+ int uptodate = 1;
+ int ret;
+
+@@ -6115,7 +6145,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+ eb = __alloc_extent_buffer(fs_info, start, len);
+ if (!eb)
+ return ERR_PTR(-ENOMEM);
+- btrfs_set_buffer_lockdep_class(owner_root, eb, level);
++
++ /*
++ * The reloc trees are just snapshots, so we need them to appear to be
++ * just like any other fs tree WRT lockdep.
++ */
++ if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID)
++ lockdep_owner = BTRFS_FS_TREE_OBJECTID;
++
++ btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level);
+
+ num_pages = num_extent_pages(eb);
+ for (i = 0; i < num_pages; i++, index++) {
+@@ -6530,6 +6568,14 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
+ if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+ return 0;
+
++ /*
++ * We could have had EXTENT_BUFFER_UPTODATE cleared by the write
++ * operation, which could potentially still be in flight. In this case
++ * we simply want to return an error.
++ */
++ if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
++ return -EIO;
++
+ if (eb->fs_info->sectorsize < PAGE_SIZE)
+ return read_extent_buffer_subpage(eb, wait, mirror_num);
+
+@@ -6776,14 +6822,24 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
+ {
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+
++ /*
++ * If we are using the commit root we could potentially clear a page
++ * Uptodate while we're using the extent buffer that we've previously
++ * looked up. We don't want to complain in this case, as the page was
++ * valid before, we just didn't write it out. Instead we want to catch
++ * the case where we didn't actually read the block properly, which
++ * would have !PageUptodate && !PageError, as we clear PageError before
++ * reading.
++ */
+ if (fs_info->sectorsize < PAGE_SIZE) {
+- bool uptodate;
++ bool uptodate, error;
+
+ uptodate = btrfs_subpage_test_uptodate(fs_info, page,
+ eb->start, eb->len);
+- WARN_ON(!uptodate);
++ error = btrfs_subpage_test_error(fs_info, page, eb->start, eb->len);
++ WARN_ON(!uptodate && !error);
+ } else {
+- WARN_ON(!PageUptodate(page));
++ WARN_ON(!PageUptodate(page) && !PageError(page));
+ }
+ }
+
+diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
+index 53abdc280451b..f7ab6ba8238e1 100644
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -117,7 +117,7 @@ struct btrfs_bio_ctrl {
+ */
+ struct extent_changeset {
+ /* How many bytes are set/cleared in this operation */
+- unsigned int bytes_changed;
++ u64 bytes_changed;
+
+ /* Changed ranges */
+ struct ulist range_changed;
+diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
+index 4a8e02f7b6c7a..5a36add213053 100644
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -360,7 +360,7 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
+ int i;
+
+ for (i = 0; i < map->num_stripes; i++) {
+- struct btrfs_bio_stripe *stripe = &map->stripes[i];
++ struct btrfs_io_stripe *stripe = &map->stripes[i];
+ struct btrfs_device *device = stripe->dev;
+
+ set_extent_bits_nowait(&device->alloc_state, stripe->physical,
+@@ -375,7 +375,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
+ int i;
+
+ for (i = 0; i < map->num_stripes; i++) {
+- struct btrfs_bio_stripe *stripe = &map->stripes[i];
++ struct btrfs_io_stripe *stripe = &map->stripes[i];
+ struct btrfs_device *device = stripe->dev;
+
+ __clear_extent_bit(&device->alloc_state, stripe->physical,
+diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
+index 0b9401a5afd33..4c210b2ac6994 100644
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -47,13 +47,13 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
+ u64 start, end, i_size;
+ int ret;
+
++ spin_lock(&inode->lock);
+ i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
+ if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
+ inode->disk_i_size = i_size;
+- return;
++ goto out_unlock;
+ }
+
+- spin_lock(&inode->lock);
+ ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
+ &end, EXTENT_DIRTY);
+ if (!ret && start == 0)
+@@ -61,6 +61,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
+ else
+ i_size = 0;
+ inode->disk_i_size = i_size;
++out_unlock:
+ spin_unlock(&inode->lock);
+ }
+
+@@ -303,7 +304,7 @@ found:
+ read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
+ ret * csum_size);
+ out:
+- if (ret == -ENOENT)
++ if (ret == -ENOENT || ret == -EFBIG)
+ ret = 0;
+ return ret;
+ }
+@@ -699,7 +700,9 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
+ sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
+ bytes_left), GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
+- BUG_ON(!sums); /* -ENOMEM */
++ if (!sums)
++ return BLK_STS_RESOURCE;
++
+ sums->len = bytes_left;
+ ordered = btrfs_lookup_ordered_extent(inode,
+ offset);
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index a1762363f61fa..eae622ef4c6d5 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -869,9 +869,13 @@ next_slot:
+ btrfs_init_data_ref(&ref,
+ root->root_key.objectid,
+ new_key.objectid,
+- args->start - extent_offset);
++ args->start - extent_offset,
++ 0, false);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+- BUG_ON(ret); /* -ENOMEM */
++ if (ret) {
++ btrfs_abort_transaction(trans, ret);
++ break;
++ }
+ }
+ key.offset = args->start;
+ }
+@@ -955,9 +959,13 @@ delete_extent_item:
+ btrfs_init_data_ref(&ref,
+ root->root_key.objectid,
+ key.objectid,
+- key.offset - extent_offset);
++ key.offset - extent_offset, 0,
++ false);
+ ret = btrfs_free_extent(trans, &ref);
+- BUG_ON(ret); /* -ENOMEM */
++ if (ret) {
++ btrfs_abort_transaction(trans, ret);
++ break;
++ }
+ args->bytes_found += extent_end - key.offset;
+ }
+
+@@ -1232,7 +1240,7 @@ again:
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
+ num_bytes, 0);
+ btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
+- orig_offset);
++ orig_offset, 0, false);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+@@ -1257,7 +1265,8 @@ again:
+ other_end = 0;
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
+ num_bytes, 0);
+- btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
++ btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset,
++ 0, false);
+ if (extent_mergeable(leaf, path->slots[0] + 1,
+ ino, bytenr, orig_offset,
+ &other_start, &other_end)) {
+@@ -1709,7 +1718,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
+ * Fault pages before locking them in prepare_pages
+ * to avoid recursive lock
+ */
+- if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
++ if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
+ ret = -EFAULT;
+ break;
+ }
+@@ -1909,10 +1918,11 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
+ loff_t pos;
+ ssize_t written = 0;
+ ssize_t written_buffered;
++ size_t prev_left = 0;
+ loff_t endbyte;
+ ssize_t err;
+ unsigned int ilock_flags = 0;
+- struct iomap_dio *dio = NULL;
++ struct iomap_dio *dio;
+
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ ilock_flags |= BTRFS_ILOCK_TRY;
+@@ -1955,23 +1965,73 @@ relock:
+ goto buffered;
+ }
+
++ /*
++ * The iov_iter can be mapped to the same file range we are writing to.
++ * If that's the case, then we will deadlock in the iomap code, because
++ * it first calls our callback btrfs_dio_iomap_begin(), which will create
++ * an ordered extent, and after that it will fault in the pages that the
++ * iov_iter refers to. During the fault in we end up in the readahead
++ * pages code (starting at btrfs_readahead()), which will lock the range,
++ * find that ordered extent and then wait for it to complete (at
++ * btrfs_lock_and_flush_ordered_range()), resulting in a deadlock since
++ * obviously the ordered extent can never complete as we didn't submit
++ * yet the respective bio(s). This always happens when the buffer is
++ * memory mapped to the same file range, since the iomap DIO code always
++ * invalidates pages in the target file range (after starting and waiting
++ * for any writeback).
++ *
++ * So here we disable page faults in the iov_iter and then retry if we
++ * got -EFAULT, faulting in the pages before the retry.
++ */
++ from->nofault = true;
+ dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+- 0);
++ IOMAP_DIO_PARTIAL, written);
++ from->nofault = false;
+
++ /*
++ * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
++ * iocb, and that needs to lock the inode. So unlock it before calling
++ * iomap_dio_complete() to avoid a deadlock.
++ */
+ btrfs_inode_unlock(inode, ilock_flags);
+
+- if (IS_ERR_OR_NULL(dio)) {
++ if (IS_ERR_OR_NULL(dio))
+ err = PTR_ERR_OR_ZERO(dio);
+- if (err < 0 && err != -ENOTBLK)
+- goto out;
+- } else {
+- written = iomap_dio_complete(dio);
++ else
++ err = iomap_dio_complete(dio);
++
++ /* No increment (+=) because iomap returns a cumulative value. */
++ if (err > 0)
++ written = err;
++
++ if (iov_iter_count(from) > 0 && (err == -EFAULT || err > 0)) {
++ const size_t left = iov_iter_count(from);
++ /*
++ * We have more data left to write. Try to fault in as many as
++ * possible of the remainder pages and retry. We do this without
++ * releasing and locking again the inode, to prevent races with
++ * truncate.
++ *
++ * Also, in case the iov refers to pages in the file range of the
++ * file we want to write to (due to a mmap), we could enter an
++ * infinite loop if we retry after faulting the pages in, since
++ * iomap will invalidate any pages in the range early on, before
++ * it tries to fault in the pages of the iov. So we keep track of
++ * how much was left of iov in the previous EFAULT and fallback
++ * to buffered IO in case we haven't made any progress.
++ */
++ if (left == prev_left) {
++ err = -ENOTBLK;
++ } else {
++ fault_in_iov_iter_readable(from, left);
++ prev_left = left;
++ goto relock;
++ }
+ }
+
+- if (written < 0 || !iov_iter_count(from)) {
+- err = written;
++ /* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */
++ if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from))
+ goto out;
+- }
+
+ buffered:
+ pos = iocb->ki_pos;
+@@ -1996,7 +2056,7 @@ buffered:
+ invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
+ endbyte >> PAGE_SHIFT);
+ out:
+- return written ? written : err;
++ return err < 0 ? err : written;
+ }
+
+ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
+@@ -2279,25 +2339,62 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+ */
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
+
+- if (ret != BTRFS_NO_LOG_SYNC) {
++ if (ret == BTRFS_NO_LOG_SYNC) {
++ ret = btrfs_end_transaction(trans);
++ goto out;
++ }
++
++ /* We successfully logged the inode, attempt to sync the log. */
++ if (!ret) {
++ ret = btrfs_sync_log(trans, root, &ctx);
+ if (!ret) {
+- ret = btrfs_sync_log(trans, root, &ctx);
+- if (!ret) {
+- ret = btrfs_end_transaction(trans);
+- goto out;
+- }
+- }
+- if (!full_sync) {
+- ret = btrfs_wait_ordered_range(inode, start, len);
+- if (ret) {
+- btrfs_end_transaction(trans);
+- goto out;
+- }
++ ret = btrfs_end_transaction(trans);
++ goto out;
+ }
+- ret = btrfs_commit_transaction(trans);
+- } else {
++ }
++
++ /*
++ * At this point we need to commit the transaction because we had
++ * btrfs_need_log_full_commit() or some other error.
++ *
++ * If we didn't do a full sync we have to stop the trans handle, wait on
++ * the ordered extents, start it again and commit the transaction. If
++ * we attempt to wait on the ordered extents here we could deadlock with
++ * something like fallocate() that is holding the extent lock trying to
++ * start a transaction while some other thread is trying to commit the
++ * transaction while we (fsync) are currently holding the transaction
++ * open.
++ */
++ if (!full_sync) {
+ ret = btrfs_end_transaction(trans);
++ if (ret)
++ goto out;
++ ret = btrfs_wait_ordered_range(inode, start, len);
++ if (ret)
++ goto out;
++
++ /*
++ * This is safe to use here because we're only interested in
++ * making sure the transaction that had the ordered extents is
++ * committed. We aren't waiting on anything past this point,
++ * we're purely getting the transaction and committing it.
++ */
++ trans = btrfs_attach_transaction_barrier(root);
++ if (IS_ERR(trans)) {
++ ret = PTR_ERR(trans);
++
++ /*
++ * We committed the transaction and there's no currently
++ * running transaction, this means everything we care
++ * about made it to disk and we are done.
++ */
++ if (ret == -ENOENT)
++ ret = 0;
++ goto out;
++ }
+ }
++
++ ret = btrfs_commit_transaction(trans);
+ out:
+ ASSERT(list_empty(&ctx.list));
+ err = file_check_and_advance_wb_err(file);
+@@ -2620,7 +2717,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
+ extent_info->disk_len, 0);
+ ref_offset = extent_info->file_offset - extent_info->data_offset;
+ btrfs_init_data_ref(&ref, root->root_key.objectid,
+- btrfs_ino(inode), ref_offset);
++ btrfs_ino(inode), ref_offset, 0, false);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ }
+
+@@ -2878,8 +2975,9 @@ out:
+ return ret;
+ }
+
+-static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
++static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
+ {
++ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_state *cached_state = NULL;
+@@ -2911,6 +3009,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
+ goto out_only_mutex;
+ }
+
++ ret = file_modified(file);
++ if (ret)
++ goto out_only_mutex;
++
+ lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode)));
+ lockend = round_down(offset + len,
+ btrfs_inode_sectorsize(BTRFS_I(inode))) - 1;
+@@ -3351,7 +3453,7 @@ static long btrfs_fallocate(struct file *file, int mode,
+ return -EOPNOTSUPP;
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+- return btrfs_punch_hole(inode, offset, len);
++ return btrfs_punch_hole(file, offset, len);
+
+ /*
+ * Only trigger disk allocation, don't trigger qgroup reserve
+@@ -3373,6 +3475,10 @@ static long btrfs_fallocate(struct file *file, int mode,
+ goto out;
+ }
+
++ ret = file_modified(file);
++ if (ret)
++ goto out;
++
+ /*
+ * TODO: Move these two operations after we have checked
+ * accurate reserved space, or fallocate can still fail but
+@@ -3650,6 +3756,8 @@ static int check_direct_read(struct btrfs_fs_info *fs_info,
+ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
+ {
+ struct inode *inode = file_inode(iocb->ki_filp);
++ size_t prev_left = 0;
++ ssize_t read = 0;
+ ssize_t ret;
+
+ if (fsverity_active(inode))
+@@ -3659,9 +3767,57 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
+ return 0;
+
+ btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
+- ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 0);
++again:
++ /*
++ * This is similar to what we do for direct IO writes, see the comment
++ * at btrfs_direct_write(), but we also disable page faults in addition
++ * to disabling them only at the iov_iter level. This is because when
++ * reading from a hole or prealloc extent, iomap calls iov_iter_zero(),
++ * which can still trigger page fault ins despite having set ->nofault
++ * to true of our 'to' iov_iter.
++ *
++ * The difference to direct IO writes is that we deadlock when trying
++ * to lock the extent range in the inode's tree during he page reads
++ * triggered by the fault in (while for writes it is due to waiting for
++ * our own ordered extent). This is because for direct IO reads,
++ * btrfs_dio_iomap_begin() returns with the extent range locked, which
++ * is only unlocked in the endio callback (end_bio_extent_readpage()).
++ */
++ pagefault_disable();
++ to->nofault = true;
++ ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
++ IOMAP_DIO_PARTIAL, read);
++ to->nofault = false;
++ pagefault_enable();
++
++ /* No increment (+=) because iomap returns a cumulative value. */
++ if (ret > 0)
++ read = ret;
++
++ if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) {
++ const size_t left = iov_iter_count(to);
++
++ if (left == prev_left) {
++ /*
++ * We didn't make any progress since the last attempt,
++ * fallback to a buffered read for the remainder of the
++ * range. This is just to avoid any possibility of looping
++ * for too long.
++ */
++ ret = read;
++ } else {
++ /*
++ * We made some progress since the last retry or this is
++ * the first time we are retrying. Fault in as many pages
++ * as possible and retry.
++ */
++ fault_in_iov_iter_writeable(to, left);
++ prev_left = left;
++ goto again;
++ }
++ }
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
+- return ret;
++ return ret < 0 ? ret : read;
+ }
+
+ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
+index da0eee7c9e5f3..9161bc4f40649 100644
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -672,6 +672,12 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
+
+ max_bitmaps = max_t(u64, max_bitmaps, 1);
+
++ if (ctl->total_bitmaps > max_bitmaps)
++ btrfs_err(block_group->fs_info,
++"invalid free space control: bg start=%llu len=%llu total_bitmaps=%u unit=%u max_bitmaps=%llu bytes_per_bg=%llu",
++ block_group->start, block_group->length,
++ ctl->total_bitmaps, ctl->unit, max_bitmaps,
++ bytes_per_bg);
+ ASSERT(ctl->total_bitmaps <= max_bitmaps);
+
+ /*
+@@ -819,15 +825,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
+ }
+ spin_lock(&ctl->tree_lock);
+ ret = link_free_space(ctl, e);
+- ctl->total_bitmaps++;
+- recalculate_thresholds(ctl);
+- spin_unlock(&ctl->tree_lock);
+ if (ret) {
++ spin_unlock(&ctl->tree_lock);
+ btrfs_err(fs_info,
+ "Duplicate entries in free space cache, dumping");
+ kmem_cache_free(btrfs_free_space_cachep, e);
+ goto free_cache;
+ }
++ ctl->total_bitmaps++;
++ recalculate_thresholds(ctl);
++ spin_unlock(&ctl->tree_lock);
+ list_add_tail(&e->list, &bitmaps);
+ }
+
+diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
+index a33bca94d133e..3abec44c62559 100644
+--- a/fs/btrfs/free-space-tree.c
++++ b/fs/btrfs/free-space-tree.c
+@@ -1256,8 +1256,8 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+ btrfs_tree_lock(free_space_root->node);
+ btrfs_clean_tree_block(free_space_root->node);
+ btrfs_tree_unlock(free_space_root->node);
+- btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
+- 0, 1);
++ btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
++ free_space_root->node, 0, 1);
+
+ btrfs_put_root(free_space_root);
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 7c096ab9bb5eb..95af29634e55e 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -60,8 +60,6 @@ struct btrfs_iget_args {
+ };
+
+ struct btrfs_dio_data {
+- u64 reserve;
+- loff_t length;
+ ssize_t submitted;
+ struct extent_changeset *data_reserved;
+ };
+@@ -1055,6 +1053,28 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
+ * *page_started is set to one if we unlock locked_page and do everything
+ * required to start IO on it. It may be clean and already done with
+ * IO when we return.
++ *
++ * When unlock == 1, we unlock the pages in successfully allocated regions.
++ * When unlock == 0, we leave them locked for writing them out.
++ *
++ * However, we unlock all the pages except @locked_page in case of failure.
++ *
++ * In summary, page locking state will be as follow:
++ *
++ * - page_started == 1 (return value)
++ * - All the pages are unlocked. IO is started.
++ * - Note that this can happen only on success
++ * - unlock == 1
++ * - All the pages except @locked_page are unlocked in any case
++ * - unlock == 0
++ * - On success, all the pages are locked for writing out them
++ * - On failure, all the pages except @locked_page are unlocked
++ *
++ * When a failure happens in the second or later iteration of the
++ * while-loop, the ordered extents created in previous iterations are kept
++ * intact. So, the caller must clean them up by calling
++ * btrfs_cleanup_ordered_extents(). See btrfs_run_delalloc_range() for
++ * example.
+ */
+ static noinline int cow_file_range(struct btrfs_inode *inode,
+ struct page *locked_page,
+@@ -1064,6 +1084,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ u64 alloc_hint = 0;
++ u64 orig_start = start;
+ u64 num_bytes;
+ unsigned long ram_size;
+ u64 cur_alloc_size = 0;
+@@ -1077,7 +1098,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+ int ret = 0;
+
+ if (btrfs_is_free_space_inode(inode)) {
+- WARN_ON_ONCE(1);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+@@ -1151,7 +1171,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
++ if (btrfs_is_data_reloc_root(root))
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+@@ -1187,8 +1207,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+ if (ret)
+ goto out_drop_extent_cache;
+
+- if (root->root_key.objectid ==
+- BTRFS_DATA_RELOC_TREE_OBJECTID) {
++ if (btrfs_is_data_reloc_root(root)) {
+ ret = btrfs_reloc_clone_csums(inode, start,
+ cur_alloc_size);
+ /*
+@@ -1249,18 +1268,44 @@ out_reserve:
+ btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ out_unlock:
++ /*
++ * Now, we have three regions to clean up:
++ *
++ * |-------(1)----|---(2)---|-------------(3)----------|
++ * `- orig_start `- start `- start + cur_alloc_size `- end
++ *
++ * We process each region below.
++ */
++
+ clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+ EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
+ page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
++
++ /*
++ * For the range (1). We have already instantiated the ordered extents
++ * for this region. They are cleaned up by
++ * btrfs_cleanup_ordered_extents() in e.g,
++ * btrfs_run_delalloc_range(). EXTENT_LOCKED | EXTENT_DELALLOC are
++ * already cleared in the above loop. And, EXTENT_DELALLOC_NEW |
++ * EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
++ * function.
++ *
++ * However, in case of unlock == 0, we still need to unlock the pages
++ * (except @locked_page) to ensure all the pages are unlocked.
++ */
++ if (!unlock && orig_start < start)
++ extent_clear_unlock_delalloc(inode, orig_start, start - 1,
++ locked_page, 0, page_ops);
++
+ /*
+- * If we reserved an extent for our delalloc range (or a subrange) and
+- * failed to create the respective ordered extent, then it means that
+- * when we reserved the extent we decremented the extent's size from
+- * the data space_info's bytes_may_use counter and incremented the
+- * space_info's bytes_reserved counter by the same amount. We must make
+- * sure extent_clear_unlock_delalloc() does not try to decrement again
+- * the data space_info's bytes_may_use counter, therefore we do not pass
+- * it the flag EXTENT_CLEAR_DATA_RESV.
++ * For the range (2). If we reserved an extent for our delalloc range
++ * (or a subrange) and failed to create the respective ordered extent,
++ * then it means that when we reserved the extent we decremented the
++ * extent's size from the data space_info's bytes_may_use counter and
++ * incremented the space_info's bytes_reserved counter by the same
++ * amount. We must make sure extent_clear_unlock_delalloc() does not try
++ * to decrement again the data space_info's bytes_may_use counter,
++ * therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV.
+ */
+ if (extent_reserved) {
+ extent_clear_unlock_delalloc(inode, start,
+@@ -1272,6 +1317,13 @@ out_unlock:
+ if (start >= end)
+ goto out;
+ }
++
++ /*
++ * For the range (3). We never touched the region. In addition to the
++ * clear_bits above, we add EXTENT_CLEAR_DATA_RESV to release the data
++ * space_info's bytes_may_use counter, reserved in
++ * btrfs_check_data_free_space().
++ */
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
+ clear_bits | EXTENT_CLEAR_DATA_RESV,
+ page_ops);
+@@ -1504,8 +1556,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
+ int *page_started, unsigned long *nr_written)
+ {
+ const bool is_space_ino = btrfs_is_free_space_inode(inode);
+- const bool is_reloc_ino = (inode->root->root_key.objectid ==
+- BTRFS_DATA_RELOC_TREE_OBJECTID);
++ const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
+ const u64 range_bytes = end + 1 - start;
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ u64 range_start = start;
+@@ -1867,8 +1918,7 @@ out_check:
+ btrfs_dec_nocow_writers(fs_info, disk_bytenr);
+ nocow = false;
+
+- if (root->root_key.objectid ==
+- BTRFS_DATA_RELOC_TREE_OBJECTID)
++ if (btrfs_is_data_reloc_root(root))
+ /*
+ * Error handled later, as we must prevent
+ * extent_clear_unlock_delalloc() in error handler
+@@ -1948,7 +1998,15 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
+ const bool zoned = btrfs_is_zoned(inode->root->fs_info);
+
+ if (should_nocow(inode, start, end)) {
+- ASSERT(!zoned);
++ /*
++ * Normally on a zoned device we're only doing COW writes, but
++ * in case of relocation on a zoned filesystem we have taken
++ * precaution, that we're only writing sequentially. It's safe
++ * to use run_delalloc_nocow() here, like for regular
++ * preallocated inodes.
++ */
++ ASSERT(!zoned ||
++ (zoned && btrfs_is_data_reloc_root(inode->root)));
+ ret = run_delalloc_nocow(inode, locked_page, start, end,
+ page_started, nr_written);
+ } else if (!inode_can_compress(inode) ||
+@@ -1974,6 +2032,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
+ void btrfs_split_delalloc_extent(struct inode *inode,
+ struct extent_state *orig, u64 split)
+ {
++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 size;
+
+ /* not delalloc, ignore it */
+@@ -1981,7 +2040,7 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+ return;
+
+ size = orig->end - orig->start + 1;
+- if (size > BTRFS_MAX_EXTENT_SIZE) {
++ if (size > fs_info->max_extent_size) {
+ u32 num_extents;
+ u64 new_size;
+
+@@ -1990,10 +2049,10 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+ * applies here, just in reverse.
+ */
+ new_size = orig->end - split + 1;
+- num_extents = count_max_extents(new_size);
++ num_extents = count_max_extents(fs_info, new_size);
+ new_size = split - orig->start;
+- num_extents += count_max_extents(new_size);
+- if (count_max_extents(size) >= num_extents)
++ num_extents += count_max_extents(fs_info, new_size);
++ if (count_max_extents(fs_info, size) >= num_extents)
+ return;
+ }
+
+@@ -2010,6 +2069,7 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+ struct extent_state *other)
+ {
++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 new_size, old_size;
+ u32 num_extents;
+
+@@ -2023,7 +2083,7 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+ new_size = other->end - new->start + 1;
+
+ /* we're not bigger than the max, unreserve the space and go */
+- if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
++ if (new_size <= fs_info->max_extent_size) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
+ spin_unlock(&BTRFS_I(inode)->lock);
+@@ -2049,10 +2109,10 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+ * this case.
+ */
+ old_size = other->end - other->start + 1;
+- num_extents = count_max_extents(old_size);
++ num_extents = count_max_extents(fs_info, old_size);
+ old_size = new->end - new->start + 1;
+- num_extents += count_max_extents(old_size);
+- if (count_max_extents(new_size) >= num_extents)
++ num_extents += count_max_extents(fs_info, old_size);
++ if (count_max_extents(fs_info, new_size) >= num_extents)
+ return;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+@@ -2131,7 +2191,7 @@ void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
+ if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len = state->end + 1 - state->start;
+- u32 num_extents = count_max_extents(len);
++ u32 num_extents = count_max_extents(fs_info, len);
+ bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+
+ spin_lock(&BTRFS_I(inode)->lock);
+@@ -2173,7 +2233,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
+ struct btrfs_inode *inode = BTRFS_I(vfs_inode);
+ struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
+ u64 len = state->end + 1 - state->start;
+- u32 num_extents = count_max_extents(len);
++ u32 num_extents = count_max_extents(fs_info, len);
+
+ if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+ spin_lock(&inode->lock);
+@@ -2207,7 +2267,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
+ if (btrfs_is_testing(fs_info))
+ return;
+
+- if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
++ if (!btrfs_is_data_reloc_root(root) &&
+ do_list && !(state->state & EXTENT_NORESERVE) &&
+ (*bits & EXTENT_CLEAR_DATA_RESV))
+ btrfs_free_reserved_data_space_noquota(fs_info, len);
+@@ -2532,7 +2592,7 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
+ goto mapit;
+ } else if (async && !skip_sum) {
+ /* csum items have already been cloned */
+- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
++ if (btrfs_is_data_reloc_root(root))
+ goto mapit;
+ /* we're doing a write, do the async checksumming */
+ ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
+@@ -3067,6 +3127,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
+ ordered_extent->file_offset,
+ ordered_extent->file_offset +
+ logical_len);
++ btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,
++ ordered_extent->disk_num_bytes);
+ } else {
+ BUG_ON(root == fs_info->tree_root);
+ ret = insert_ordered_extent_file_extent(trans, ordered_extent);
+@@ -3164,6 +3226,13 @@ out:
+ btrfs_free_reserved_extent(fs_info,
+ ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes, 1);
++ /*
++ * Actually free the qgroup rsv which was released when
++ * the ordered extent was created.
++ */
++ btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
++ ordered_extent->qgroup_rsv,
++ BTRFS_QGROUP_RSV_DATA);
+ }
+ }
+
+@@ -3304,7 +3373,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
+ u64 file_offset = pg_off + page_offset(page);
+ int ret;
+
+- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
++ if (btrfs_is_data_reloc_root(root) &&
+ test_range_bit(io_tree, file_offset,
+ file_offset + sectorsize - 1,
+ EXTENT_NODATASUM, 1, NULL)) {
+@@ -4005,7 +4074,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+ * without delay
+ */
+ if (!btrfs_is_free_space_inode(inode)
+- && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
++ && !btrfs_is_data_reloc_root(root)
+ && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
+ btrfs_update_root_times(trans, root);
+
+@@ -4035,11 +4104,11 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ * also drops the back refs in the inode to the directory
+ */
+ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode,
+ const char *name, int name_len)
+ {
++ struct btrfs_root *root = dir->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_path *path;
+ int ret = 0;
+@@ -4139,15 +4208,14 @@ out:
+ }
+
+ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
+ const char *name, int name_len)
+ {
+ int ret;
+- ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
++ ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len);
+ if (!ret) {
+ drop_nlink(&inode->vfs_inode);
+- ret = btrfs_update_inode(trans, root, inode);
++ ret = btrfs_update_inode(trans, inode->root, inode);
+ }
+ return ret;
+ }
+@@ -4176,7 +4244,6 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
+
+ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+ {
+- struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_trans_handle *trans;
+ struct inode *inode = d_inode(dentry);
+ int ret;
+@@ -4188,7 +4255,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+ btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+ 0);
+
+- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
++ ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
+ BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+ dentry->d_name.len);
+ if (ret)
+@@ -4202,7 +4269,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+
+ out:
+ btrfs_end_transaction(trans);
+- btrfs_btree_balance_dirty(root->fs_info);
++ btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
+ return ret;
+ }
+
+@@ -4447,6 +4514,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
+ dest->root_key.objectid);
+ return -EPERM;
+ }
++ if (atomic_read(&dest->nr_swapfiles)) {
++ spin_unlock(&dest->root_item_lock);
++ btrfs_warn(fs_info,
++ "attempt to delete subvolume %llu with active swapfile",
++ root->root_key.objectid);
++ return -EPERM;
++ }
+ root_flags = btrfs_root_flags(&dest->root_item);
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+@@ -4553,7 +4627,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+ {
+ struct inode *inode = d_inode(dentry);
+ int err = 0;
+- struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_trans_handle *trans;
+ u64 last_unlink_trans;
+
+@@ -4578,7 +4651,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+ last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
+ /* now the directory is empty */
+- err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
++ err = btrfs_unlink_inode(trans, BTRFS_I(dir),
+ BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+ dentry->d_name.len);
+ if (!err) {
+@@ -4599,7 +4672,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
+ }
+ out:
+ btrfs_end_transaction(trans);
+- btrfs_btree_balance_dirty(root->fs_info);
++ btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
+
+ return err;
+ }
+@@ -4910,7 +4983,8 @@ delete:
+ extent_start, extent_num_bytes, 0);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+- ino, extent_offset);
++ ino, extent_offset,
++ root->root_key.objectid, false);
+ ret = btrfs_free_extent(trans, &ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+@@ -6326,14 +6400,8 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+ goto out;
+ ret = 0;
+
+- /*
+- * MAGIC NUMBER EXPLANATION:
+- * since we search a directory based on f_pos we have to start at 2
+- * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
+- * else has to start at 2
+- */
+ if (path->slots[0] == 0) {
+- inode->index_cnt = 2;
++ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+@@ -6344,7 +6412,7 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+
+ if (found_key.objectid != btrfs_ino(inode) ||
+ found_key.type != BTRFS_DIR_INDEX_KEY) {
+- inode->index_cnt = 2;
++ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+@@ -6888,7 +6956,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+ goto fail;
+ }
+ d_instantiate(dentry, inode);
+- btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
++ btrfs_log_new_name(trans, old_dentry, NULL, parent);
+ }
+
+ fail:
+@@ -7758,6 +7826,11 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
+ {
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct extent_map *em = *map;
++ int type;
++ u64 block_start, orig_start, orig_block_len, ram_bytes;
++ bool can_nocow = false;
++ bool space_reserved = false;
++ u64 prev_len;
+ int ret = 0;
+
+ /*
+@@ -7772,9 +7845,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+ ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
+ em->block_start != EXTENT_MAP_HOLE)) {
+- int type;
+- u64 block_start, orig_start, orig_block_len, ram_bytes;
+-
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ type = BTRFS_ORDERED_PREALLOC;
+ else
+@@ -7784,53 +7854,91 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
+
+ if (can_nocow_extent(inode, start, &len, &orig_start,
+ &orig_block_len, &ram_bytes, false) == 1 &&
+- btrfs_inc_nocow_writers(fs_info, block_start)) {
+- struct extent_map *em2;
++ btrfs_inc_nocow_writers(fs_info, block_start))
++ can_nocow = true;
++ }
++
++ prev_len = len;
++ if (can_nocow) {
++ struct extent_map *em2;
+
+- em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
+- orig_start, block_start,
+- len, orig_block_len,
+- ram_bytes, type);
++ /* We can NOCOW, so only need to reserve metadata space. */
++ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
++ if (ret < 0) {
++ /* Our caller expects us to free the input extent map. */
++ free_extent_map(em);
++ *map = NULL;
+ btrfs_dec_nocow_writers(fs_info, block_start);
+- if (type == BTRFS_ORDERED_PREALLOC) {
+- free_extent_map(em);
+- *map = em = em2;
+- }
++ goto out;
++ }
++ space_reserved = true;
+
+- if (em2 && IS_ERR(em2)) {
+- ret = PTR_ERR(em2);
+- goto out;
+- }
+- /*
+- * For inode marked NODATACOW or extent marked PREALLOC,
+- * use the existing or preallocated extent, so does not
+- * need to adjust btrfs_space_info's bytes_may_use.
+- */
+- btrfs_free_reserved_data_space_noquota(fs_info, len);
+- goto skip_cow;
++ em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
++ orig_start, block_start,
++ len, orig_block_len,
++ ram_bytes, type);
++ btrfs_dec_nocow_writers(fs_info, block_start);
++ if (type == BTRFS_ORDERED_PREALLOC) {
++ free_extent_map(em);
++ *map = em = em2;
+ }
+- }
+
+- /* this will cow the extent */
+- free_extent_map(em);
+- *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
+- if (IS_ERR(em)) {
+- ret = PTR_ERR(em);
+- goto out;
++ if (IS_ERR(em2)) {
++ ret = PTR_ERR(em2);
++ goto out;
++ }
++ } else {
++ /* Our caller expects us to free the input extent map. */
++ free_extent_map(em);
++ *map = NULL;
++
++ /* We have to COW, so need to reserve metadata and data space. */
++ ret = btrfs_delalloc_reserve_space(BTRFS_I(inode),
++ &dio_data->data_reserved,
++ start, len);
++ if (ret < 0)
++ goto out;
++ space_reserved = true;
++
++ em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
++ if (IS_ERR(em)) {
++ ret = PTR_ERR(em);
++ goto out;
++ }
++ *map = em;
++ len = min(len, em->len - (start - em->start));
++ if (len < prev_len)
++ btrfs_delalloc_release_space(BTRFS_I(inode),
++ dio_data->data_reserved,
++ start + len, prev_len - len,
++ true);
+ }
+
+- len = min(len, em->len - (start - em->start));
++ /*
++ * We have created our ordered extent, so we can now release our reservation
++ * for an outstanding extent.
++ */
++ btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len);
+
+-skip_cow:
+ /*
+ * Need to update the i_size under the extent lock so buffered
+ * readers will get the updated i_size when we unlock.
+ */
+ if (start + len > i_size_read(inode))
+ i_size_write(inode, start + len);
+-
+- dio_data->reserve -= len;
+ out:
++ if (ret && space_reserved) {
++ btrfs_delalloc_release_extents(BTRFS_I(inode), len);
++ if (can_nocow) {
++ btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true);
++ } else {
++ btrfs_delalloc_release_space(BTRFS_I(inode),
++ dio_data->data_reserved,
++ start, len, true);
++ extent_changeset_free(dio_data->data_reserved);
++ dio_data->data_reserved = NULL;
++ }
++ }
+ return ret;
+ }
+
+@@ -7872,18 +7980,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
+ if (!dio_data)
+ return -ENOMEM;
+
+- dio_data->length = length;
+- if (write) {
+- dio_data->reserve = round_up(length, fs_info->sectorsize);
+- ret = btrfs_delalloc_reserve_space(BTRFS_I(inode),
+- &dio_data->data_reserved,
+- start, dio_data->reserve);
+- if (ret) {
+- extent_changeset_free(dio_data->data_reserved);
+- kfree(dio_data);
+- return ret;
+- }
+- }
+ iomap->private = dio_data;
+
+
+@@ -7919,11 +8015,51 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
+ em->block_start == EXTENT_MAP_INLINE) {
+ free_extent_map(em);
+- ret = -ENOTBLK;
++ /*
++ * If we are in a NOWAIT context, return -EAGAIN in order to
++ * fallback to buffered IO. This is not only because we can
++ * block with buffered IO (no support for NOWAIT semantics at
++ * the moment) but also to avoid returning short reads to user
++ * space - this happens if we were able to read some data from
++ * previous non-compressed extents and then when we fallback to
++ * buffered IO, at btrfs_file_read_iter() by calling
++ * filemap_read(), we fail to fault in pages for the read buffer,
++ * in which case filemap_read() returns a short read (the number
++ * of bytes previously read is > 0, so it does not return -EFAULT).
++ */
++ ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK;
+ goto unlock_err;
+ }
+
+ len = min(len, em->len - (start - em->start));
++
++ /*
++ * If we have a NOWAIT request and the range contains multiple extents
++ * (or a mix of extents and holes), then we return -EAGAIN to make the
++ * caller fallback to a context where it can do a blocking (without
++ * NOWAIT) request. This way we avoid doing partial IO and returning
++ * success to the caller, which is not optimal for writes and for reads
++ * it can result in unexpected behaviour for an application.
++ *
++ * When doing a read, because we use IOMAP_DIO_PARTIAL when calling
++ * iomap_dio_rw(), we can end up returning less data then what the caller
++ * asked for, resulting in an unexpected, and incorrect, short read.
++ * That is, the caller asked to read N bytes and we return less than that,
++ * which is wrong unless we are crossing EOF. This happens if we get a
++ * page fault error when trying to fault in pages for the buffer that is
++ * associated to the struct iov_iter passed to iomap_dio_rw(), and we
++ * have previously submitted bios for other extents in the range, in
++ * which case iomap_dio_rw() may return us EIOCBQUEUED if not all of
++ * those bios have completed by the time we get the page fault error,
++ * which we return back to our caller - we should only return EIOCBQUEUED
++ * after we have submitted bios for all the extents in the range.
++ */
++ if ((flags & IOMAP_NOWAIT) && len < length) {
++ free_extent_map(em);
++ ret = -EAGAIN;
++ goto unlock_err;
++ }
++
+ if (write) {
+ ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
+ start, len);
+@@ -7962,7 +8098,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
+ iomap->type = IOMAP_MAPPED;
+ }
+ iomap->offset = start;
+- iomap->bdev = fs_info->fs_devices->latest_bdev;
++ iomap->bdev = fs_info->fs_devices->latest_dev->bdev;
+ iomap->length = len;
+
+ if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
+@@ -7976,14 +8112,8 @@ unlock_err:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state);
+ err:
+- if (dio_data) {
+- btrfs_delalloc_release_space(BTRFS_I(inode),
+- dio_data->data_reserved, start,
+- dio_data->reserve, true);
+- btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve);
+- extent_changeset_free(dio_data->data_reserved);
+- kfree(dio_data);
+- }
++ kfree(dio_data);
++
+ return ret;
+ }
+
+@@ -8013,14 +8143,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+ ret = -ENOTBLK;
+ }
+
+- if (write) {
+- if (dio_data->reserve)
+- btrfs_delalloc_release_space(BTRFS_I(inode),
+- dio_data->data_reserved, pos,
+- dio_data->reserve, true);
+- btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length);
++ if (write)
+ extent_changeset_free(dio_data->data_reserved);
+- }
+ out:
+ kfree(dio_data);
+ iomap->private = NULL;
+@@ -9451,7 +9575,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+ if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
+ } else { /* src is an inode */
+- ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
++ ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+ BTRFS_I(old_dentry->d_inode),
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+@@ -9467,7 +9591,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+ if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
+ } else { /* dest is an inode */
+- ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
++ ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+ BTRFS_I(new_dentry->d_inode),
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+@@ -9501,13 +9625,13 @@ static int btrfs_rename_exchange(struct inode *old_dir,
+ BTRFS_I(new_inode)->dir_index = new_idx;
+
+ if (root_log_pinned) {
+- btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
++ btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
+ new_dentry->d_parent);
+ btrfs_end_log_trans(root);
+ root_log_pinned = false;
+ }
+ if (dest_log_pinned) {
+- btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
++ btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
+ old_dentry->d_parent);
+ btrfs_end_log_trans(dest);
+ dest_log_pinned = false;
+@@ -9742,7 +9866,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+ */
+ btrfs_pin_log_trans(root);
+ log_pinned = true;
+- ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
++ ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
+ BTRFS_I(d_inode(old_dentry)),
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+@@ -9762,7 +9886,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+ ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
+ BUG_ON(new_inode->i_nlink == 0);
+ } else {
+- ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
++ ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
+ BTRFS_I(d_inode(new_dentry)),
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+@@ -9788,7 +9912,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
+ BTRFS_I(old_inode)->dir_index = index;
+
+ if (log_pinned) {
+- btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
++ btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
+ new_dentry->d_parent);
+ btrfs_end_log_trans(root);
+ log_pinned = false;
+@@ -10581,9 +10705,19 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
+ struct btrfs_swap_info *bsi)
+ {
+ unsigned long nr_pages;
++ unsigned long max_pages;
+ u64 first_ppage, first_ppage_reported, next_ppage;
+ int ret;
+
++ /*
++ * Our swapfile may have had its size extended after the swap header was
++ * written. In that case activating the swapfile should not go beyond
++ * the max size set in the swap header.
++ */
++ if (bsi->nr_pages >= sis->max)
++ return 0;
++
++ max_pages = sis->max - bsi->nr_pages;
+ first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
+ next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
+ PAGE_SIZE) >> PAGE_SHIFT;
+@@ -10591,6 +10725,7 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
+ if (first_ppage >= next_ppage)
+ return 0;
+ nr_pages = next_ppage - first_ppage;
++ nr_pages = min(nr_pages, max_pages);
+
+ first_ppage_reported = first_ppage;
+ if (bsi->start == 0)
+@@ -10691,8 +10826,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ * set. We use this counter to prevent snapshots. We must increment it
+ * before walking the extents because we don't want a concurrent
+ * snapshot to run after we've already checked the extents.
++ *
++ * It is possible that subvolume is marked for deletion but still not
++ * removed yet. To prevent this race, we check the root status before
++ * activating the swapfile.
+ */
++ spin_lock(&root->root_item_lock);
++ if (btrfs_root_dead(root)) {
++ spin_unlock(&root->root_item_lock);
++
++ btrfs_exclop_finish(fs_info);
++ btrfs_warn(fs_info,
++ "cannot activate swapfile because subvolume %llu is being deleted",
++ root->root_key.objectid);
++ return -EPERM;
++ }
+ atomic_inc(&root->nr_swapfiles);
++ spin_unlock(&root->root_item_lock);
+
+ isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
+
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index cc61813213d83..c9b3d99171b26 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -615,11 +615,13 @@ static noinline int create_subvol(struct user_namespace *mnt_userns,
+ * Since we don't abort the transaction in this case, free the
+ * tree block so that we don't leak space and leave the
+ * filesystem in an inconsistent state (an extent item in the
+- * extent tree without backreferences). Also no need to have
+- * the tree block locked since it is not in any tree at this
+- * point, so no other task can find it and use it.
++ * extent tree with a backreference for a root that does not
++ * exists).
+ */
+- btrfs_free_tree_block(trans, root, leaf, 0, 1);
++ btrfs_tree_lock(leaf);
++ btrfs_clean_tree_block(leaf);
++ btrfs_tree_unlock(leaf);
++ btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
+ free_extent_buffer(leaf);
+ goto fail;
+ }
+@@ -775,10 +777,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
+ goto fail;
+ }
+
+- spin_lock(&fs_info->trans_lock);
+- list_add(&pending_snapshot->list,
+- &trans->transaction->pending_snapshots);
+- spin_unlock(&fs_info->trans_lock);
++ trans->pending_snapshot = pending_snapshot;
+
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+@@ -1658,6 +1657,7 @@ static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info,
+ static noinline int btrfs_ioctl_resize(struct file *file,
+ void __user *arg)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 new_size;
+@@ -1713,7 +1713,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
+ btrfs_info(fs_info, "resizing devid %llu", devid);
+ }
+
+- device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
++ args.devid = devid;
++ device = btrfs_find_device(fs_info->fs_devices, &args);
+ if (!device) {
+ btrfs_info(fs_info, "resizer unable to find device %llu",
+ devid);
+@@ -2261,9 +2262,8 @@ static noinline int search_ioctl(struct inode *inode,
+ key.offset = sk->min_offset;
+
+ while (1) {
+- ret = fault_in_pages_writeable(ubuf + sk_offset,
+- *buf_size - sk_offset);
+- if (ret)
++ ret = -EFAULT;
++ if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
+ break;
+
+ ret = btrfs_search_forward(root, &key, path, sk->min_transid);
+@@ -2788,6 +2788,8 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
+ }
+ }
+
++ btrfs_free_path(path);
++ path = NULL;
+ if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
+ ret = -EFAULT;
+
+@@ -2880,6 +2882,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
+ }
+
+ out:
++ btrfs_free_path(path);
++
+ if (!ret || ret == -EOVERFLOW) {
+ rootrefs->num_items = found;
+ /* update min_treeid for next search */
+@@ -2891,7 +2895,6 @@ out:
+ }
+
+ kfree(rootrefs);
+- btrfs_free_path(path);
+
+ return ret;
+ }
+@@ -3098,10 +3101,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
+ btrfs_inode_lock(inode, 0);
+ err = btrfs_delete_subvolume(dir, dentry);
+ btrfs_inode_unlock(inode, 0);
+- if (!err) {
+- fsnotify_rmdir(dir, dentry);
+- d_delete(dentry);
+- }
++ if (!err)
++ d_delete_notify(dir, dentry);
+
+ out_dput:
+ dput(dentry);
+@@ -3220,6 +3221,7 @@ out:
+
+ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_ioctl_vol_args_v2 *vol_args;
+@@ -3231,35 +3233,37 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+- ret = mnt_want_write_file(file);
+- if (ret)
+- return ret;
+-
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+- if (IS_ERR(vol_args)) {
+- ret = PTR_ERR(vol_args);
+- goto err_drop;
+- }
++ if (IS_ERR(vol_args))
++ return PTR_ERR(vol_args);
+
+ if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
++
+ vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+- if (!(vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) &&
+- strcmp("cancel", vol_args->name) == 0)
++ if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
++ args.devid = vol_args->devid;
++ } else if (!strcmp("cancel", vol_args->name)) {
+ cancel = true;
++ } else {
++ ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name);
++ if (ret)
++ goto out;
++ }
++
++ ret = mnt_want_write_file(file);
++ if (ret)
++ goto out;
+
+ ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
+ cancel);
+ if (ret)
+- goto out;
+- /* Exclusive operation is now claimed */
++ goto err_drop;
+
+- if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
+- ret = btrfs_rm_device(fs_info, NULL, vol_args->devid, &bdev, &mode);
+- else
+- ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
++ /* Exclusive operation is now claimed */
++ ret = btrfs_rm_device(fs_info, &args, &bdev, &mode);
+
+ btrfs_exclop_finish(fs_info);
+
+@@ -3271,54 +3275,62 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
+ btrfs_info(fs_info, "device deleted: %s",
+ vol_args->name);
+ }
+-out:
+- kfree(vol_args);
+ err_drop:
+ mnt_drop_write_file(file);
+ if (bdev)
+ blkdev_put(bdev, mode);
++out:
++ btrfs_put_dev_args_from_path(&args);
++ kfree(vol_args);
+ return ret;
+ }
+
+ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_ioctl_vol_args *vol_args;
+ struct block_device *bdev = NULL;
+ fmode_t mode;
+ int ret;
+- bool cancel;
++ bool cancel = false;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+- ret = mnt_want_write_file(file);
+- if (ret)
+- return ret;
+-
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+- if (IS_ERR(vol_args)) {
+- ret = PTR_ERR(vol_args);
+- goto out_drop_write;
+- }
++ if (IS_ERR(vol_args))
++ return PTR_ERR(vol_args);
++
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+- cancel = (strcmp("cancel", vol_args->name) == 0);
++ if (!strcmp("cancel", vol_args->name)) {
++ cancel = true;
++ } else {
++ ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name);
++ if (ret)
++ goto out;
++ }
++
++ ret = mnt_want_write_file(file);
++ if (ret)
++ goto out;
+
+ ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
+ cancel);
+ if (ret == 0) {
+- ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
++ ret = btrfs_rm_device(fs_info, &args, &bdev, &mode);
+ if (!ret)
+ btrfs_info(fs_info, "disk deleted %s", vol_args->name);
+ btrfs_exclop_finish(fs_info);
+ }
+
+- kfree(vol_args);
+-out_drop_write:
+ mnt_drop_write_file(file);
+ if (bdev)
+ blkdev_put(bdev, mode);
++out:
++ btrfs_put_dev_args_from_path(&args);
++ kfree(vol_args);
+ return ret;
+ }
+
+@@ -3379,22 +3391,21 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
+ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
+ void __user *arg)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct btrfs_ioctl_dev_info_args *di_args;
+ struct btrfs_device *dev;
+ int ret = 0;
+- char *s_uuid = NULL;
+
+ di_args = memdup_user(arg, sizeof(*di_args));
+ if (IS_ERR(di_args))
+ return PTR_ERR(di_args);
+
++ args.devid = di_args->devid;
+ if (!btrfs_is_empty_uuid(di_args->uuid))
+- s_uuid = di_args->uuid;
++ args.uuid = di_args->uuid;
+
+ rcu_read_lock();
+- dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid,
+- NULL);
+-
++ dev = btrfs_find_device(fs_info->fs_devices, &args);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+@@ -3404,13 +3415,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
+ di_args->bytes_used = btrfs_device_get_bytes_used(dev);
+ di_args->total_bytes = btrfs_device_get_total_bytes(dev);
+ memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
+- if (dev->name) {
+- strncpy(di_args->path, rcu_str_deref(dev->name),
+- sizeof(di_args->path) - 1);
+- di_args->path[sizeof(di_args->path) - 1] = 0;
+- } else {
++ if (dev->name)
++ strscpy(di_args->path, rcu_str_deref(dev->name), sizeof(di_args->path));
++ else
+ di_args->path[0] = '\0';
+- }
+
+ out:
+ rcu_read_unlock();
+@@ -3707,6 +3715,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
+ if (IS_ERR(sa))
+ return PTR_ERR(sa);
+
++ if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) {
++ ret = -EOPNOTSUPP;
++ goto out;
++ }
++
+ if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
+ ret = mnt_want_write_file(file);
+ if (ret)
+@@ -3883,6 +3896,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
+ ipath->fspath->val[i] = rel_ptr;
+ }
+
++ btrfs_free_path(path);
++ path = NULL;
+ ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
+ ipath->fspath, size);
+ if (ret) {
+@@ -3898,26 +3913,6 @@ out:
+ return ret;
+ }
+
+-static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
+-{
+- struct btrfs_data_container *inodes = ctx;
+- const size_t c = 3 * sizeof(u64);
+-
+- if (inodes->bytes_left >= c) {
+- inodes->bytes_left -= c;
+- inodes->val[inodes->elem_cnt] = inum;
+- inodes->val[inodes->elem_cnt + 1] = offset;
+- inodes->val[inodes->elem_cnt + 2] = root;
+- inodes->elem_cnt += 3;
+- } else {
+- inodes->bytes_missing += c - inodes->bytes_left;
+- inodes->bytes_left = 0;
+- inodes->elem_missed += 3;
+- }
+-
+- return 0;
+-}
+-
+ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
+ void __user *arg, int version)
+ {
+@@ -3953,21 +3948,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
+ size = min_t(u32, loi->size, SZ_16M);
+ }
+
+- path = btrfs_alloc_path();
+- if (!path) {
+- ret = -ENOMEM;
+- goto out;
+- }
+-
+ inodes = init_data_container(size);
+ if (IS_ERR(inodes)) {
+ ret = PTR_ERR(inodes);
+- inodes = NULL;
+- goto out;
++ goto out_loi;
+ }
+
++ path = btrfs_alloc_path();
++ if (!path) {
++ ret = -ENOMEM;
++ goto out;
++ }
+ ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
+- build_ino_list, inodes, ignore_offset);
++ inodes, ignore_offset);
++ btrfs_free_path(path);
+ if (ret == -EINVAL)
+ ret = -ENOENT;
+ if (ret < 0)
+@@ -3979,7 +3973,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
+ ret = -EFAULT;
+
+ out:
+- btrfs_free_path(path);
+ kvfree(inodes);
+ out_loi:
+ kfree(loi);
+@@ -4279,7 +4272,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
+ }
+
+ /* update qgroup status and info */
++ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ err = btrfs_run_qgroups(trans);
++ mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ if (err < 0)
+ btrfs_handle_fs_error(fs_info, err,
+ "failed to update qgroup status and info");
+diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
+index 313d9d685adb7..9063072b399bd 100644
+--- a/fs/btrfs/locking.c
++++ b/fs/btrfs/locking.c
+@@ -13,6 +13,93 @@
+ #include "extent_io.h"
+ #include "locking.h"
+
++/*
++ * Lockdep class keys for extent_buffer->lock's in this root. For a given
++ * eb, the lockdep key is determined by the btrfs_root it belongs to and
++ * the level the eb occupies in the tree.
++ *
++ * Different roots are used for different purposes and may nest inside each
++ * other and they require separate keysets. As lockdep keys should be
++ * static, assign keysets according to the purpose of the root as indicated
++ * by btrfs_root->root_key.objectid. This ensures that all special purpose
++ * roots have separate keysets.
++ *
++ * Lock-nesting across peer nodes is always done with the immediate parent
++ * node locked thus preventing deadlock. As lockdep doesn't know this, use
++ * subclass to avoid triggering lockdep warning in such cases.
++ *
++ * The key is set by the readpage_end_io_hook after the buffer has passed
++ * csum validation but before the pages are unlocked. It is also set by
++ * btrfs_init_new_buffer on freshly allocated blocks.
++ *
++ * We also add a check to make sure the highest level of the tree is the
++ * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
++ * needs update as well.
++ */
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++#if BTRFS_MAX_LEVEL != 8
++#error
++#endif
++
++#define DEFINE_LEVEL(stem, level) \
++ .names[level] = "btrfs-" stem "-0" #level,
++
++#define DEFINE_NAME(stem) \
++ DEFINE_LEVEL(stem, 0) \
++ DEFINE_LEVEL(stem, 1) \
++ DEFINE_LEVEL(stem, 2) \
++ DEFINE_LEVEL(stem, 3) \
++ DEFINE_LEVEL(stem, 4) \
++ DEFINE_LEVEL(stem, 5) \
++ DEFINE_LEVEL(stem, 6) \
++ DEFINE_LEVEL(stem, 7)
++
++static struct btrfs_lockdep_keyset {
++ u64 id; /* root objectid */
++ /* Longest entry: btrfs-free-space-00 */
++ char names[BTRFS_MAX_LEVEL][20];
++ struct lock_class_key keys[BTRFS_MAX_LEVEL];
++} btrfs_lockdep_keysets[] = {
++ { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
++ { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
++ { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
++ { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
++ { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
++ { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
++ { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
++ { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
++ { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
++ { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
++ { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
++ { .id = 0, DEFINE_NAME("tree") },
++};
++
++#undef DEFINE_LEVEL
++#undef DEFINE_NAME
++
++void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
++{
++ struct btrfs_lockdep_keyset *ks;
++
++ BUG_ON(level >= ARRAY_SIZE(ks->keys));
++
++ /* Find the matching keyset, id 0 is the default entry */
++ for (ks = btrfs_lockdep_keysets; ks->id; ks++)
++ if (ks->id == objectid)
++ break;
++
++ lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
++}
++
++void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
++{
++ if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
++ btrfs_set_buffer_lockdep_class(root->root_key.objectid,
++ eb, btrfs_header_level(eb));
++}
++
++#endif
++
+ /*
+ * Extent buffer locking
+ * =====================
+@@ -45,7 +132,6 @@ void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting ne
+ start_ns = ktime_get_ns();
+
+ down_read_nested(&eb->lock, nest);
+- eb->lock_owner = current->pid;
+ trace_btrfs_tree_read_lock(eb, start_ns);
+ }
+
+@@ -62,7 +148,6 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
+ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
+ {
+ if (down_read_trylock(&eb->lock)) {
+- eb->lock_owner = current->pid;
+ trace_btrfs_try_tree_read_lock(eb);
+ return 1;
+ }
+@@ -90,7 +175,6 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
+ void btrfs_tree_read_unlock(struct extent_buffer *eb)
+ {
+ trace_btrfs_tree_read_unlock(eb);
+- eb->lock_owner = 0;
+ up_read(&eb->lock);
+ }
+
+@@ -167,6 +251,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
+
+ while (1) {
+ eb = btrfs_root_node(root);
++
++ btrfs_maybe_reset_lockdep_class(root, eb);
+ btrfs_tree_lock(eb);
+ if (eb == root->node)
+ break;
+@@ -188,6 +274,8 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+
+ while (1) {
+ eb = btrfs_root_node(root);
++
++ btrfs_maybe_reset_lockdep_class(root, eb);
+ btrfs_tree_read_lock(eb);
+ if (eb == root->node)
+ break;
+diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
+index a2e1f1f5c6e34..26a2f962c268e 100644
+--- a/fs/btrfs/locking.h
++++ b/fs/btrfs/locking.h
+@@ -130,4 +130,18 @@ void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock);
+ void btrfs_drew_read_lock(struct btrfs_drew_lock *lock);
+ void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
+
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level);
++void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb);
++#else
++static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
++ struct extent_buffer *eb, int level)
++{
++}
++static inline void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root,
++ struct extent_buffer *eb)
++{
++}
++#endif
++
+ #endif
+diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
+index 3dbe6eb5fda75..fcd7eb496478c 100644
+--- a/fs/btrfs/lzo.c
++++ b/fs/btrfs/lzo.c
+@@ -357,11 +357,23 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
+ ASSERT(cur_in / sectorsize ==
+ (cur_in + LZO_LEN - 1) / sectorsize);
+ cur_page = cb->compressed_pages[cur_in / PAGE_SIZE];
+- kaddr = kmap(cur_page);
+ ASSERT(cur_page);
++ kaddr = kmap(cur_page);
+ seg_len = read_compress_length(kaddr + offset_in_page(cur_in));
++ kunmap(cur_page);
+ cur_in += LZO_LEN;
+
++ if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) {
++ /*
++ * seg_len shouldn't be larger than we have allocated
++ * for workspace->cbuf
++ */
++ btrfs_err(fs_info, "unexpectedly large lzo segment len %u",
++ seg_len);
++ ret = -EIO;
++ goto out;
++ }
++
+ /* Copy the compressed segment payload into workspace */
+ copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in);
+
+diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
+index aae1027bd76a1..0757f133e302a 100644
+--- a/fs/btrfs/print-tree.c
++++ b/fs/btrfs/print-tree.c
+@@ -147,10 +147,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
+ pr_cont("shared data backref parent %llu count %u\n",
+ offset, btrfs_shared_data_ref_count(eb, sref));
+ /*
+- * offset is supposed to be a tree block which
+- * must be aligned to nodesize.
++ * Offset is supposed to be a tree block which must be
++ * aligned to sectorsize.
+ */
+- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
++ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
+ pr_info(
+ "\t\t\t(parent %llu not aligned to sectorsize %u)\n",
+ offset, eb->fs_info->sectorsize);
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index db680f5be745a..d46a070275ff5 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -940,6 +940,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
+ int ret = 0;
+ int slot;
+
++ /*
++ * We need to have subvol_sem write locked, to prevent races between
++ * concurrent tasks trying to enable quotas, because we will unlock
++ * and relock qgroup_ioctl_lock before setting fs_info->quota_root
++ * and before setting BTRFS_FS_QUOTA_ENABLED.
++ */
++ lockdep_assert_held_write(&fs_info->subvol_sem);
++
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ if (fs_info->quota_root)
+ goto out;
+@@ -1117,8 +1125,19 @@ out_add_root:
+ goto out_free_path;
+ }
+
++ mutex_unlock(&fs_info->qgroup_ioctl_lock);
++ /*
++ * Commit the transaction while not holding qgroup_ioctl_lock, to avoid
++ * a deadlock with tasks concurrently doing other qgroup operations, such
++ * adding/removing qgroups or adding/deleting qgroup relations for example,
++ * because all qgroup operations first start or join a transaction and then
++ * lock the qgroup_ioctl_lock mutex.
++ * We are safe from a concurrent task trying to enable quotas, by calling
++ * this function, since we are serialized by fs_info->subvol_sem.
++ */
+ ret = btrfs_commit_transaction(trans);
+ trans = NULL;
++ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ if (ret)
+ goto out_free_path;
+
+@@ -1138,6 +1157,21 @@ out_add_root:
+ fs_info->qgroup_rescan_running = true;
+ btrfs_queue_work(fs_info->qgroup_rescan_workers,
+ &fs_info->qgroup_rescan_work);
++ } else {
++ /*
++ * We have set both BTRFS_FS_QUOTA_ENABLED and
++ * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with
++ * -EINPROGRESS. That can happen because someone started the
++ * rescan worker by calling quota rescan ioctl before we
++ * attempted to initialize the rescan worker. Failure due to
++ * quotas disabled in the meanwhile is not possible, because
++ * we are holding a write lock on fs_info->subvol_sem, which
++ * is also acquired when disabling quotas.
++ * Ignore such error, and any other error would need to undo
++ * everything we did in the transaction we just committed.
++ */
++ ASSERT(ret == -EINPROGRESS);
++ ret = 0;
+ }
+
+ out_free_path:
+@@ -1166,11 +1200,44 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
+ struct btrfs_trans_handle *trans = NULL;
+ int ret = 0;
+
++ /*
++ * We need to have subvol_sem write locked to prevent races with
++ * snapshot creation.
++ */
++ lockdep_assert_held_write(&fs_info->subvol_sem);
++
++ /*
++ * Lock the cleaner mutex to prevent races with concurrent relocation,
++ * because relocation may be building backrefs for blocks of the quota
++ * root while we are deleting the root. This is like dropping fs roots
++ * of deleted snapshots/subvolumes, we need the same protection.
++ *
++ * This also prevents races between concurrent tasks trying to disable
++ * quotas, because we will unlock and relock qgroup_ioctl_lock across
++ * BTRFS_FS_QUOTA_ENABLED changes.
++ */
++ mutex_lock(&fs_info->cleaner_mutex);
++
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ if (!fs_info->quota_root)
+ goto out;
++
++ /*
++ * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to
++ * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs
++ * to lock that mutex while holding a transaction handle and the rescan
++ * worker needs to commit a transaction.
++ */
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
++ /*
++ * Request qgroup rescan worker to complete and wait for it. This wait
++ * must be done before transaction start for quota disable since it may
++ * deadlock with transaction by the qgroup rescan worker.
++ */
++ clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
++ btrfs_qgroup_wait_for_completion(fs_info, false);
++
+ /*
+ * 1 For the root item
+ *
+@@ -1186,14 +1253,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
++ set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ goto out;
+ }
+
+ if (!fs_info->quota_root)
+ goto out;
+
+- clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+- btrfs_qgroup_wait_for_completion(fs_info, false);
+ spin_lock(&fs_info->qgroup_lock);
+ quota_root = fs_info->quota_root;
+ fs_info->quota_root = NULL;
+@@ -1214,12 +1280,15 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
+ goto out;
+ }
+
++ spin_lock(&fs_info->trans_lock);
+ list_del(&quota_root->dirty_list);
++ spin_unlock(&fs_info->trans_lock);
+
+ btrfs_tree_lock(quota_root->node);
+ btrfs_clean_tree_block(quota_root->node);
+ btrfs_tree_unlock(quota_root->node);
+- btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
++ btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
++ quota_root->node, 0, 1);
+
+ btrfs_put_root(quota_root);
+
+@@ -1229,6 +1298,7 @@ out:
+ btrfs_end_transaction(trans);
+ else if (trans)
+ ret = btrfs_end_transaction(trans);
++ mutex_unlock(&fs_info->cleaner_mutex);
+
+ return ret;
+ }
+@@ -2696,13 +2766,22 @@ cleanup:
+ }
+
+ /*
+- * called from commit_transaction. Writes all changed qgroups to disk.
++ * Writes all changed qgroups to disk.
++ * Called by the transaction commit path and the qgroup assign ioctl.
+ */
+ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
+ {
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ int ret = 0;
+
++ /*
++ * In case we are called from the qgroup assign ioctl, assert that we
++ * are holding the qgroup_ioctl_lock, otherwise we can race with a quota
++ * disable operation (ioctl) and access a freed quota root.
++ */
++ if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
++ lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
++
+ if (!fs_info->quota_root)
+ return ret;
+
+@@ -2847,14 +2926,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
+ dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
+ dstgroup->rsv_excl = inherit->lim.rsv_excl;
+
+- ret = update_qgroup_limit_item(trans, dstgroup);
+- if (ret) {
+- fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+- btrfs_info(fs_info,
+- "unable to update quota limit for %llu",
+- dstgroup->qgroupid);
+- goto unlock;
+- }
++ qgroup_dirty(fs_info, dstgroup);
+ }
+
+ if (srcid) {
+@@ -3224,7 +3296,8 @@ out:
+ static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
+ {
+ return btrfs_fs_closing(fs_info) ||
+- test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
++ test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
++ !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ }
+
+ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+@@ -3236,6 +3309,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+ int err = -ENOMEM;
+ int ret = 0;
+ bool stopped = false;
++ bool did_leaf_rescans = false;
+
+ path = btrfs_alloc_path();
+ if (!path)
+@@ -3254,11 +3328,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+ err = PTR_ERR(trans);
+ break;
+ }
+- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+- err = -EINTR;
+- } else {
+- err = qgroup_rescan_leaf(trans, path);
+- }
++
++ err = qgroup_rescan_leaf(trans, path);
++ did_leaf_rescans = true;
++
+ if (err > 0)
+ btrfs_commit_transaction(trans);
+ else
+@@ -3272,22 +3345,29 @@ out:
+ if (err > 0 &&
+ fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+- } else if (err < 0) {
++ } else if (err < 0 || stopped) {
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ }
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+ /*
+- * only update status, since the previous part has already updated the
+- * qgroup info.
++ * Only update status, since the previous part has already updated the
++ * qgroup info, and only if we did any actual work. This also prevents
++ * race with a concurrent quota disable, which has already set
++ * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at
++ * btrfs_quota_disable().
+ */
+- trans = btrfs_start_transaction(fs_info->quota_root, 1);
+- if (IS_ERR(trans)) {
+- err = PTR_ERR(trans);
++ if (did_leaf_rescans) {
++ trans = btrfs_start_transaction(fs_info->quota_root, 1);
++ if (IS_ERR(trans)) {
++ err = PTR_ERR(trans);
++ trans = NULL;
++ btrfs_err(fs_info,
++ "fail to start transaction for status update: %d",
++ err);
++ }
++ } else {
+ trans = NULL;
+- btrfs_err(fs_info,
+- "fail to start transaction for status update: %d",
+- err);
+ }
+
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+@@ -3360,6 +3440,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
+ btrfs_warn(fs_info,
+ "qgroup rescan init failed, qgroup is not enabled");
+ ret = -EINVAL;
++ } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
++ /* Quota disable is in progress */
++ ret = -EBUSY;
+ }
+
+ if (ret) {
+@@ -4271,4 +4354,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
+ ulist_free(entry->old_roots);
+ kfree(entry);
+ }
++ *root = RB_ROOT;
+ }
+diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
+index d8d268ca8aa76..5b27c289139ac 100644
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -61,7 +61,7 @@ enum btrfs_rbio_ops {
+
+ struct btrfs_raid_bio {
+ struct btrfs_fs_info *fs_info;
+- struct btrfs_bio *bbio;
++ struct btrfs_io_context *bioc;
+
+ /* while we're doing rmw on a stripe
+ * we put it into a hash table so we can
+@@ -271,7 +271,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
+ */
+ static int rbio_bucket(struct btrfs_raid_bio *rbio)
+ {
+- u64 num = rbio->bbio->raid_map[0];
++ u64 num = rbio->bioc->raid_map[0];
+
+ /*
+ * we shift down quite a bit. We're using byte
+@@ -324,6 +324,9 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
+ {
+ bio_list_merge(&dest->bio_list, &victim->bio_list);
+ dest->bio_list_bytes += victim->bio_list_bytes;
++ /* Also inherit the bitmaps from @victim. */
++ bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
++ dest->stripe_npages);
+ dest->generic_bio_cnt += victim->generic_bio_cnt;
+ bio_list_init(&victim->bio_list);
+ }
+@@ -559,8 +562,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
+ test_bit(RBIO_CACHE_BIT, &cur->flags))
+ return 0;
+
+- if (last->bbio->raid_map[0] !=
+- cur->bbio->raid_map[0])
++ if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
+ return 0;
+
+ /* we can't merge with different operations */
+@@ -673,7 +675,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
+
+ spin_lock_irqsave(&h->lock, flags);
+ list_for_each_entry(cur, &h->hash_list, hash_list) {
+- if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
++ if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
+ continue;
+
+ spin_lock(&cur->bio_list_lock);
+@@ -838,7 +840,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
+ }
+ }
+
+- btrfs_put_bbio(rbio->bbio);
++ btrfs_put_bioc(rbio->bioc);
+ kfree(rbio);
+ }
+
+@@ -866,6 +868,12 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
+
+ if (rbio->generic_bio_cnt)
+ btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
++ /*
++ * Clear the data bitmap, as the rbio may be cached for later usage.
++ * do this before before unlock_stripe() so there will be no new bio
++ * for this bio.
++ */
++ bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);
+
+ /*
+ * At this moment, rbio->bio_list is empty, however since rbio does not
+@@ -906,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
+
+ /* OK, we have read all the stripes we need to. */
+ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+- 0 : rbio->bbio->max_errors;
++ 0 : rbio->bioc->max_errors;
+ if (atomic_read(&rbio->error) > max_errors)
+ err = BLK_STS_IOERR;
+
+@@ -961,12 +969,12 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
+ * this does not allocate any pages for rbio->pages.
+ */
+ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
+- struct btrfs_bio *bbio,
++ struct btrfs_io_context *bioc,
+ u64 stripe_len)
+ {
+ struct btrfs_raid_bio *rbio;
+ int nr_data = 0;
+- int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
++ int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
+ int num_pages = rbio_nr_pages(stripe_len, real_stripes);
+ int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
+ void *p;
+@@ -987,7 +995,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
+ spin_lock_init(&rbio->bio_list_lock);
+ INIT_LIST_HEAD(&rbio->stripe_cache);
+ INIT_LIST_HEAD(&rbio->hash_list);
+- rbio->bbio = bbio;
++ rbio->bioc = bioc;
+ rbio->fs_info = fs_info;
+ rbio->stripe_len = stripe_len;
+ rbio->nr_pages = num_pages;
+@@ -1015,9 +1023,9 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
+ CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
+ #undef CONSUME_ALLOC
+
+- if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
++ if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
+ nr_data = real_stripes - 1;
+- else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
++ else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
+ nr_data = real_stripes - 2;
+ else
+ BUG();
+@@ -1077,10 +1085,10 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
+ struct bio *last = bio_list->tail;
+ int ret;
+ struct bio *bio;
+- struct btrfs_bio_stripe *stripe;
++ struct btrfs_io_stripe *stripe;
+ u64 disk_start;
+
+- stripe = &rbio->bbio->stripes[stripe_nr];
++ stripe = &rbio->bioc->stripes[stripe_nr];
+ disk_start = stripe->physical + (page_index << PAGE_SHIFT);
+
+ /* if the device is missing, just fail this stripe */
+@@ -1155,7 +1163,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
+ int i = 0;
+
+ start = bio->bi_iter.bi_sector << 9;
+- stripe_offset = start - rbio->bbio->raid_map[0];
++ stripe_offset = start - rbio->bioc->raid_map[0];
+ page_index = stripe_offset >> PAGE_SHIFT;
+
+ if (bio_flagged(bio, BIO_CLONED))
+@@ -1179,7 +1187,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
+ */
+ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
+ {
+- struct btrfs_bio *bbio = rbio->bbio;
++ struct btrfs_io_context *bioc = rbio->bioc;
+ void **pointers = rbio->finish_pointers;
+ int nr_data = rbio->nr_data;
+ int stripe;
+@@ -1198,6 +1206,9 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
+ else
+ BUG();
+
++ /* We should have at least one data sector. */
++ ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));
++
+ /* at this point we either have a full stripe,
+ * or we've read the full stripe from the drive.
+ * recalculate the parity and write the new results.
+@@ -1269,6 +1280,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+ struct page *page;
++
++ /* This vertical stripe has no data, skip it. */
++ if (!test_bit(pagenr, rbio->dbitmap))
++ continue;
++
+ if (stripe < rbio->nr_data) {
+ page = page_in_rbio(rbio, stripe, pagenr, 1);
+ if (!page)
+@@ -1284,15 +1300,20 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
+ }
+ }
+
+- if (likely(!bbio->num_tgtdevs))
++ if (likely(!bioc->num_tgtdevs))
+ goto write_data;
+
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+- if (!bbio->tgtdev_map[stripe])
++ if (!bioc->tgtdev_map[stripe])
+ continue;
+
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+ struct page *page;
++
++ /* This vertical stripe has no data, skip it. */
++ if (!test_bit(pagenr, rbio->dbitmap))
++ continue;
++
+ if (stripe < rbio->nr_data) {
+ page = page_in_rbio(rbio, stripe, pagenr, 1);
+ if (!page)
+@@ -1302,7 +1323,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
+ }
+
+ ret = rbio_add_io_page(rbio, &bio_list, page,
+- rbio->bbio->tgtdev_map[stripe],
++ rbio->bioc->tgtdev_map[stripe],
+ pagenr, rbio->stripe_len);
+ if (ret)
+ goto cleanup;
+@@ -1339,12 +1360,12 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
+ {
+ u64 physical = bio->bi_iter.bi_sector;
+ int i;
+- struct btrfs_bio_stripe *stripe;
++ struct btrfs_io_stripe *stripe;
+
+ physical <<= 9;
+
+- for (i = 0; i < rbio->bbio->num_stripes; i++) {
+- stripe = &rbio->bbio->stripes[i];
++ for (i = 0; i < rbio->bioc->num_stripes; i++) {
++ stripe = &rbio->bioc->stripes[i];
+ if (in_range(physical, stripe->physical, rbio->stripe_len) &&
+ stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
+ return i;
+@@ -1365,7 +1386,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
+ int i;
+
+ for (i = 0; i < rbio->nr_data; i++) {
+- u64 stripe_start = rbio->bbio->raid_map[i];
++ u64 stripe_start = rbio->bioc->raid_map[i];
+
+ if (in_range(logical, stripe_start, rbio->stripe_len))
+ return i;
+@@ -1456,7 +1477,7 @@ static void raid_rmw_end_io(struct bio *bio)
+ if (!atomic_dec_and_test(&rbio->stripes_pending))
+ return;
+
+- if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
++ if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
+ goto cleanup;
+
+ /*
+@@ -1538,8 +1559,8 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
+ }
+
+ /*
+- * the bbio may be freed once we submit the last bio. Make sure
+- * not to touch it after that
++ * The bioc may be freed once we submit the last bio. Make sure not to
++ * touch it after that.
+ */
+ atomic_set(&rbio->stripes_pending, bios_to_read);
+ while ((bio = bio_list_pop(&bio_list))) {
+@@ -1716,25 +1737,51 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
+ run_plug(plug);
+ }
+
++/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
++static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
++{
++ const struct btrfs_fs_info *fs_info = rbio->fs_info;
++ const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
++ const u64 full_stripe_start = rbio->bioc->raid_map[0];
++ const u32 orig_len = orig_bio->bi_iter.bi_size;
++ const u32 sectorsize = fs_info->sectorsize;
++ u64 cur_logical;
++
++ ASSERT(orig_logical >= full_stripe_start &&
++ orig_logical + orig_len <= full_stripe_start +
++ rbio->nr_data * rbio->stripe_len);
++
++ bio_list_add(&rbio->bio_list, orig_bio);
++ rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
++
++ /* Update the dbitmap. */
++ for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
++ cur_logical += sectorsize) {
++ int bit = ((u32)(cur_logical - full_stripe_start) >>
++ fs_info->sectorsize_bits) % rbio->stripe_npages;
++
++ set_bit(bit, rbio->dbitmap);
++ }
++}
++
+ /*
+ * our main entry point for writes from the rest of the FS.
+ */
+ int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 stripe_len)
++ struct btrfs_io_context *bioc, u64 stripe_len)
+ {
+ struct btrfs_raid_bio *rbio;
+ struct btrfs_plug_cb *plug = NULL;
+ struct blk_plug_cb *cb;
+ int ret;
+
+- rbio = alloc_rbio(fs_info, bbio, stripe_len);
++ rbio = alloc_rbio(fs_info, bioc, stripe_len);
+ if (IS_ERR(rbio)) {
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ return PTR_ERR(rbio);
+ }
+- bio_list_add(&rbio->bio_list, bio);
+- rbio->bio_list_bytes = bio->bi_iter.bi_size;
+ rbio->operation = BTRFS_RBIO_WRITE;
++ rbio_add_bio(rbio, bio);
+
+ btrfs_bio_counter_inc_noblocked(fs_info);
+ rbio->generic_bio_cnt = 1;
+@@ -1842,7 +1889,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
+ }
+
+ /* all raid6 handling here */
+- if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
++ if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
+ /*
+ * single failure, rebuild from parity raid5
+ * style
+@@ -1874,8 +1921,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
+ * here due to a crc mismatch and we can't give them the
+ * data they want
+ */
+- if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
+- if (rbio->bbio->raid_map[faila] ==
++ if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
++ if (rbio->bioc->raid_map[faila] ==
+ RAID5_P_STRIPE) {
+ err = BLK_STS_IOERR;
+ goto cleanup;
+@@ -1887,7 +1934,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
+ goto pstripe;
+ }
+
+- if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
++ if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
+ raid6_datap_recov(rbio->real_stripes,
+ PAGE_SIZE, faila, pointers);
+ } else {
+@@ -2006,7 +2053,7 @@ static void raid_recover_end_io(struct bio *bio)
+ if (!atomic_dec_and_test(&rbio->stripes_pending))
+ return;
+
+- if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
++ if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
+ rbio_orig_end_io(rbio, BLK_STS_IOERR);
+ else
+ __raid_recover_end_io(rbio);
+@@ -2038,9 +2085,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
+ atomic_set(&rbio->error, 0);
+
+ /*
+- * read everything that hasn't failed. Thanks to the
+- * stripe cache, it is possible that some or all of these
+- * pages are going to be uptodate.
++ * Read everything that hasn't failed. However this time we will
++ * not trust any cached sector.
++ * As we may read out some stale data but higher layer is not reading
++ * that stale part.
++ *
++ * So here we always re-read everything in recovery path.
+ */
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+ if (rbio->faila == stripe || rbio->failb == stripe) {
+@@ -2049,16 +2099,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
+ }
+
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
+- struct page *p;
+-
+- /*
+- * the rmw code may have already read this
+- * page in
+- */
+- p = rbio_stripe_page(rbio, stripe, pagenr);
+- if (PageUptodate(p))
+- continue;
+-
+ ret = rbio_add_io_page(rbio, &bio_list,
+ rbio_stripe_page(rbio, stripe, pagenr),
+ stripe, pagenr, rbio->stripe_len);
+@@ -2074,7 +2114,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
+ * were up to date, or we might have no bios to read because
+ * the devices were gone.
+ */
+- if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
++ if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
+ __raid_recover_end_io(rbio);
+ return 0;
+ } else {
+@@ -2083,8 +2123,8 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
+ }
+
+ /*
+- * the bbio may be freed once we submit the last bio. Make sure
+- * not to touch it after that
++ * The bioc may be freed once we submit the last bio. Make sure not to
++ * touch it after that.
+ */
+ atomic_set(&rbio->stripes_pending, bios_to_read);
+ while ((bio = bio_list_pop(&bio_list))) {
+@@ -2117,36 +2157,35 @@ cleanup:
+ * of the drive.
+ */
+ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 stripe_len,
++ struct btrfs_io_context *bioc, u64 stripe_len,
+ int mirror_num, int generic_io)
+ {
+ struct btrfs_raid_bio *rbio;
+ int ret;
+
+ if (generic_io) {
+- ASSERT(bbio->mirror_num == mirror_num);
++ ASSERT(bioc->mirror_num == mirror_num);
+ btrfs_io_bio(bio)->mirror_num = mirror_num;
+ }
+
+- rbio = alloc_rbio(fs_info, bbio, stripe_len);
++ rbio = alloc_rbio(fs_info, bioc, stripe_len);
+ if (IS_ERR(rbio)) {
+ if (generic_io)
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ return PTR_ERR(rbio);
+ }
+
+ rbio->operation = BTRFS_RBIO_READ_REBUILD;
+- bio_list_add(&rbio->bio_list, bio);
+- rbio->bio_list_bytes = bio->bi_iter.bi_size;
++ rbio_add_bio(rbio, bio);
+
+ rbio->faila = find_logical_bio_stripe(rbio, bio);
+ if (rbio->faila == -1) {
+ btrfs_warn(fs_info,
+- "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
++"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
+ __func__, bio->bi_iter.bi_sector << 9,
+- (u64)bio->bi_iter.bi_size, bbio->map_type);
++ (u64)bio->bi_iter.bi_size, bioc->map_type);
+ if (generic_io)
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ kfree(rbio);
+ return -EIO;
+ }
+@@ -2155,7 +2194,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
+ btrfs_bio_counter_inc_noblocked(fs_info);
+ rbio->generic_bio_cnt = 1;
+ } else {
+- btrfs_get_bbio(bbio);
++ btrfs_get_bioc(bioc);
+ }
+
+ /*
+@@ -2214,7 +2253,7 @@ static void read_rebuild_work(struct btrfs_work *work)
+ /*
+ * The following code is used to scrub/replace the parity stripe
+ *
+- * Caller must have already increased bio_counter for getting @bbio.
++ * Caller must have already increased bio_counter for getting @bioc.
+ *
+ * Note: We need make sure all the pages that add into the scrub/replace
+ * raid bio are correct and not be changed during the scrub/replace. That
+@@ -2223,14 +2262,14 @@ static void read_rebuild_work(struct btrfs_work *work)
+
+ struct btrfs_raid_bio *
+ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 stripe_len,
++ struct btrfs_io_context *bioc, u64 stripe_len,
+ struct btrfs_device *scrub_dev,
+ unsigned long *dbitmap, int stripe_nsectors)
+ {
+ struct btrfs_raid_bio *rbio;
+ int i;
+
+- rbio = alloc_rbio(fs_info, bbio, stripe_len);
++ rbio = alloc_rbio(fs_info, bioc, stripe_len);
+ if (IS_ERR(rbio))
+ return NULL;
+ bio_list_add(&rbio->bio_list, bio);
+@@ -2242,12 +2281,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
+
+ /*
+- * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
++ * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted
+ * to the end position, so this search can start from the first parity
+ * stripe.
+ */
+ for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
+- if (bbio->stripes[i].dev == scrub_dev) {
++ if (bioc->stripes[i].dev == scrub_dev) {
+ rbio->scrubp = i;
+ break;
+ }
+@@ -2260,7 +2299,7 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
+
+ /*
+- * We have already increased bio_counter when getting bbio, record it
++ * We have already increased bio_counter when getting bioc, record it
+ * so we can free it at rbio_orig_end_io().
+ */
+ rbio->generic_bio_cnt = 1;
+@@ -2275,10 +2314,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
+ int stripe_offset;
+ int index;
+
+- ASSERT(logical >= rbio->bbio->raid_map[0]);
+- ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
++ ASSERT(logical >= rbio->bioc->raid_map[0]);
++ ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] +
+ rbio->stripe_len * rbio->nr_data);
+- stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
++ stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
+ index = stripe_offset >> PAGE_SHIFT;
+ rbio->bio_pages[index] = page;
+ }
+@@ -2312,7 +2351,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
+ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
+ int need_check)
+ {
+- struct btrfs_bio *bbio = rbio->bbio;
++ struct btrfs_io_context *bioc = rbio->bioc;
+ void **pointers = rbio->finish_pointers;
+ unsigned long *pbitmap = rbio->finish_pbitmap;
+ int nr_data = rbio->nr_data;
+@@ -2335,7 +2374,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
+ else
+ BUG();
+
+- if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
++ if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
+ is_replace = 1;
+ bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
+ }
+@@ -2435,7 +2474,7 @@ writeback:
+
+ page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
+ ret = rbio_add_io_page(rbio, &bio_list, page,
+- bbio->tgtdev_map[rbio->scrubp],
++ bioc->tgtdev_map[rbio->scrubp],
+ pagenr, rbio->stripe_len);
+ if (ret)
+ goto cleanup;
+@@ -2483,7 +2522,7 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
+ */
+ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
+ {
+- if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
++ if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
+ goto cleanup;
+
+ if (rbio->faila >= 0 || rbio->failb >= 0) {
+@@ -2504,7 +2543,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
+ * the data, so the capability of the repair is declined.
+ * (In the case of RAID5, we can not repair anything)
+ */
+- if (dfail > rbio->bbio->max_errors - 1)
++ if (dfail > rbio->bioc->max_errors - 1)
+ goto cleanup;
+
+ /*
+@@ -2625,8 +2664,8 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
+ }
+
+ /*
+- * the bbio may be freed once we submit the last bio. Make sure
+- * not to touch it after that
++ * The bioc may be freed once we submit the last bio. Make sure not to
++ * touch it after that.
+ */
+ atomic_set(&rbio->stripes_pending, bios_to_read);
+ while ((bio = bio_list_pop(&bio_list))) {
+@@ -2671,11 +2710,11 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
+
+ struct btrfs_raid_bio *
+ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 length)
++ struct btrfs_io_context *bioc, u64 length)
+ {
+ struct btrfs_raid_bio *rbio;
+
+- rbio = alloc_rbio(fs_info, bbio, length);
++ rbio = alloc_rbio(fs_info, bioc, length);
+ if (IS_ERR(rbio))
+ return NULL;
+
+@@ -2689,13 +2728,15 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
+
+ rbio->faila = find_logical_bio_stripe(rbio, bio);
+ if (rbio->faila == -1) {
+- BUG();
+- kfree(rbio);
++ btrfs_warn_rl(fs_info,
++ "can not determine the failed stripe number for full stripe %llu",
++ bioc->raid_map[0]);
++ __free_raid_bio(rbio);
+ return NULL;
+ }
+
+ /*
+- * When we get bbio, we have already increased bio_counter, record it
++ * When we get bioc, we have already increased bio_counter, record it
+ * so we can free it at rbio_orig_end_io()
+ */
+ rbio->generic_bio_cnt = 1;
+diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
+index 2503485db859b..838d3a5e07ef4 100644
+--- a/fs/btrfs/raid56.h
++++ b/fs/btrfs/raid56.h
+@@ -31,24 +31,24 @@ struct btrfs_raid_bio;
+ struct btrfs_device;
+
+ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 stripe_len,
++ struct btrfs_io_context *bioc, u64 stripe_len,
+ int mirror_num, int generic_io);
+ int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 stripe_len);
++ struct btrfs_io_context *bioc, u64 stripe_len);
+
+ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
+ u64 logical);
+
+ struct btrfs_raid_bio *
+ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 stripe_len,
++ struct btrfs_io_context *bioc, u64 stripe_len,
+ struct btrfs_device *scrub_dev,
+ unsigned long *dbitmap, int stripe_nsectors);
+ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
+
+ struct btrfs_raid_bio *
+ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
+- struct btrfs_bio *bbio, u64 length);
++ struct btrfs_io_context *bioc, u64 length);
+ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
+
+ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
+diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
+index 5c1a617eb25de..5c2b66d155ef7 100644
+--- a/fs/btrfs/rcu-string.h
++++ b/fs/btrfs/rcu-string.h
+@@ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
+ (len * sizeof(char)), mask);
+ if (!ret)
+ return ret;
+- strncpy(ret->str, src, len);
++ /* Warn if the source got unexpectedly truncated. */
++ if (WARN_ON(strscpy(ret->str, src, len) < 0)) {
++ kfree(ret);
++ return NULL;
++ }
+ return ret;
+ }
+
+diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
+index 06713a8fe26b4..eb96fdc3be25f 100644
+--- a/fs/btrfs/reada.c
++++ b/fs/btrfs/reada.c
+@@ -227,7 +227,7 @@ start_machine:
+ }
+
+ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
+- struct btrfs_bio *bbio)
++ struct btrfs_io_context *bioc)
+ {
+ struct btrfs_fs_info *fs_info = dev->fs_info;
+ int ret;
+@@ -275,11 +275,11 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
+ kref_init(&zone->refcnt);
+ zone->elems = 0;
+ zone->device = dev; /* our device always sits at index 0 */
+- for (i = 0; i < bbio->num_stripes; ++i) {
++ for (i = 0; i < bioc->num_stripes; ++i) {
+ /* bounds have already been checked */
+- zone->devs[i] = bbio->stripes[i].dev;
++ zone->devs[i] = bioc->stripes[i].dev;
+ }
+- zone->ndevs = bbio->num_stripes;
++ zone->ndevs = bioc->num_stripes;
+
+ spin_lock(&fs_info->reada_lock);
+ ret = radix_tree_insert(&dev->reada_zones,
+@@ -309,7 +309,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
+ int ret;
+ struct reada_extent *re = NULL;
+ struct reada_extent *re_exist = NULL;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_device *dev;
+ struct btrfs_device *prev_dev;
+ u64 length;
+@@ -345,28 +345,28 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
+ */
+ length = fs_info->nodesize;
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
+- &length, &bbio, 0);
+- if (ret || !bbio || length < fs_info->nodesize)
++ &length, &bioc, 0);
++ if (ret || !bioc || length < fs_info->nodesize)
+ goto error;
+
+- if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
++ if (bioc->num_stripes > BTRFS_MAX_MIRRORS) {
+ btrfs_err(fs_info,
+ "readahead: more than %d copies not supported",
+ BTRFS_MAX_MIRRORS);
+ goto error;
+ }
+
+- real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
++ real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
+ for (nzones = 0; nzones < real_stripes; ++nzones) {
+ struct reada_zone *zone;
+
+- dev = bbio->stripes[nzones].dev;
++ dev = bioc->stripes[nzones].dev;
+
+ /* cannot read ahead on missing device. */
+ if (!dev->bdev)
+ continue;
+
+- zone = reada_find_zone(dev, logical, bbio);
++ zone = reada_find_zone(dev, logical, bioc);
+ if (!zone)
+ continue;
+
+@@ -464,7 +464,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
+ if (!have_zone)
+ goto error;
+
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ return re;
+
+ error:
+@@ -488,7 +488,7 @@ error:
+ kref_put(&zone->refcnt, reada_zone_release);
+ spin_unlock(&fs_info->reada_lock);
+ }
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ kfree(re);
+ return re_exist;
+ }
+diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
+index 9b0814318e726..fa60af00ebca2 100644
+--- a/fs/btrfs/reflink.c
++++ b/fs/btrfs/reflink.c
+@@ -505,8 +505,11 @@ process_slot:
+ */
+ ASSERT(key.offset == 0);
+ ASSERT(datal <= fs_info->sectorsize);
+- if (key.offset != 0 || datal > fs_info->sectorsize)
+- return -EUCLEAN;
++ if (WARN_ON(key.offset != 0) ||
++ WARN_ON(datal > fs_info->sectorsize)) {
++ ret = -EUCLEAN;
++ goto out;
++ }
+
+ ret = clone_copy_inline_extent(inode, path, &new_key,
+ drop_start, datal, size,
+@@ -649,7 +652,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
+ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
+ struct inode *dst, u64 dst_loff)
+ {
+- int ret;
++ int ret = 0;
+ u64 i, tail_len, chunk_count;
+ struct btrfs_root *root_dst = BTRFS_I(dst)->root;
+
+diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
+index 914d403b4415d..dd8d47958a814 100644
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1147,7 +1147,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
+ num_bytes, parent);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+- key.objectid, key.offset);
++ key.objectid, key.offset,
++ root->root_key.objectid, false);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+@@ -1158,7 +1159,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
+ num_bytes, parent);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
+- key.objectid, key.offset);
++ key.objectid, key.offset,
++ root->root_key.objectid, false);
+ ret = btrfs_free_extent(trans, &ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+@@ -1324,7 +1326,9 @@ again:
+ btrfs_release_path(path);
+
+ path->lowest_level = level;
++ set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
+ ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
++ clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
+ path->lowest_level = 0;
+ if (ret) {
+ if (ret > 0)
+@@ -1368,7 +1372,8 @@ again:
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
+ blocksize, path->nodes[level]->start);
+ ref.skip_qgroup = true;
+- btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
++ btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
++ 0, true);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+@@ -1377,7 +1382,8 @@ again:
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
+ blocksize, 0);
+ ref.skip_qgroup = true;
+- btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
++ btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0,
++ true);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+@@ -1386,7 +1392,8 @@ again:
+
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
+ blocksize, path->nodes[level]->start);
+- btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
++ btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
++ 0, true);
+ ref.skip_qgroup = true;
+ ret = btrfs_free_extent(trans, &ref);
+ if (ret) {
+@@ -1396,7 +1403,8 @@ again:
+
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
+ blocksize, 0);
+- btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
++ btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid,
++ 0, true);
+ ref.skip_qgroup = true;
+ ret = btrfs_free_extent(trans, &ref);
+ if (ret) {
+@@ -1897,7 +1905,39 @@ again:
+ err = PTR_ERR(root);
+ break;
+ }
+- ASSERT(root->reloc_root == reloc_root);
++
++ if (unlikely(root->reloc_root != reloc_root)) {
++ if (root->reloc_root) {
++ btrfs_err(fs_info,
++"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
++ root->root_key.objectid,
++ root->reloc_root->root_key.objectid,
++ root->reloc_root->root_key.type,
++ root->reloc_root->root_key.offset,
++ btrfs_root_generation(
++ &root->reloc_root->root_item),
++ reloc_root->root_key.objectid,
++ reloc_root->root_key.type,
++ reloc_root->root_key.offset,
++ btrfs_root_generation(
++ &reloc_root->root_item));
++ } else {
++ btrfs_err(fs_info,
++"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
++ root->root_key.objectid,
++ reloc_root->root_key.objectid,
++ reloc_root->root_key.type,
++ reloc_root->root_key.offset,
++ btrfs_root_generation(
++ &reloc_root->root_item));
++ }
++ list_add(&reloc_root->root_list, &reloc_roots);
++ btrfs_put_root(root);
++ btrfs_abort_transaction(trans, -EUCLEAN);
++ if (!err)
++ err = -EUCLEAN;
++ break;
++ }
+
+ /*
+ * set reference count to 1, so btrfs_recover_relocation
+@@ -1970,7 +2010,7 @@ again:
+ root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
+ false);
+ if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+- if (IS_ERR(root)) {
++ if (WARN_ON(IS_ERR(root))) {
+ /*
+ * For recovery we read the fs roots on mount,
+ * and if we didn't find the root then we marked
+@@ -1979,17 +2019,14 @@ again:
+ * memory. However there's no reason we can't
+ * handle the error properly here just in case.
+ */
+- ASSERT(0);
+ ret = PTR_ERR(root);
+ goto out;
+ }
+- if (root->reloc_root != reloc_root) {
++ if (WARN_ON(root->reloc_root != reloc_root)) {
+ /*
+- * This is actually impossible without something
+- * going really wrong (like weird race condition
+- * or cosmic rays).
++ * This can happen if on-disk metadata has some
++ * corruption, e.g. bad reloc tree key offset.
+ */
+- ASSERT(0);
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -2475,7 +2512,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
+ upper->eb->start);
+ ref.real_root = root->root_key.objectid;
+ btrfs_init_tree_ref(&ref, node->level,
+- btrfs_header_owner(upper->eb));
++ btrfs_header_owner(upper->eb),
++ root->root_key.objectid, false);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ if (!ret)
+ ret = btrfs_drop_subtree(trans, root, eb,
+@@ -2691,8 +2729,12 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
+ list_add_tail(&node->list, &rc->backref_cache.changed);
+ } else {
+ path->lowest_level = node->level;
++ if (root == root->fs_info->chunk_root)
++ btrfs_reserve_chunk_metadata(trans, false);
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+ btrfs_release_path(path);
++ if (root == root->fs_info->chunk_root)
++ btrfs_trans_release_chunk_metadata(trans);
+ if (ret > 0)
+ ret = 0;
+ }
+@@ -2852,31 +2894,6 @@ static noinline_for_stack int prealloc_file_extent_cluster(
+ if (ret)
+ return ret;
+
+- /*
+- * On a zoned filesystem, we cannot preallocate the file region.
+- * Instead, we dirty and fiemap_write the region.
+- */
+- if (btrfs_is_zoned(inode->root->fs_info)) {
+- struct btrfs_root *root = inode->root;
+- struct btrfs_trans_handle *trans;
+-
+- end = cluster->end - offset + 1;
+- trans = btrfs_start_transaction(root, 1);
+- if (IS_ERR(trans))
+- return PTR_ERR(trans);
+-
+- inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
+- i_size_write(&inode->vfs_inode, end);
+- ret = btrfs_update_inode(trans, root, inode);
+- if (ret) {
+- btrfs_abort_transaction(trans, ret);
+- btrfs_end_transaction(trans);
+- return ret;
+- }
+-
+- return btrfs_end_transaction(trans);
+- }
+-
+ btrfs_inode_lock(&inode->vfs_inode, 0);
+ for (nr = 0; nr < cluster->nr; nr++) {
+ start = cluster->boundary[nr] - offset;
+@@ -3084,7 +3101,6 @@ release_page:
+ static int relocate_file_extent_cluster(struct inode *inode,
+ struct file_extent_cluster *cluster)
+ {
+- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 offset = BTRFS_I(inode)->index_cnt;
+ unsigned long index;
+ unsigned long last_index;
+@@ -3114,8 +3130,6 @@ static int relocate_file_extent_cluster(struct inode *inode,
+ for (index = (cluster->start - offset) >> PAGE_SHIFT;
+ index <= last_index && !ret; index++)
+ ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
+- if (btrfs_is_zoned(fs_info) && !ret)
+- ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (ret == 0)
+ WARN_ON(cluster_nr != cluster->nr);
+ out:
+@@ -3593,7 +3607,12 @@ int prepare_to_relocate(struct reloc_control *rc)
+ */
+ return PTR_ERR(trans);
+ }
+- return btrfs_commit_transaction(trans);
++
++ ret = btrfs_commit_transaction(trans);
++ if (ret)
++ unset_reloc_control(rc);
++
++ return ret;
+ }
+
+ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
+@@ -3770,12 +3789,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path;
+ struct btrfs_inode_item *item;
+ struct extent_buffer *leaf;
+- u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
+ int ret;
+
+- if (btrfs_is_zoned(trans->fs_info))
+- flags &= ~BTRFS_INODE_PREALLOC;
+-
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+@@ -3790,7 +3805,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
+ btrfs_set_inode_generation(leaf, item, 1);
+ btrfs_set_inode_size(leaf, item, 0);
+ btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
+- btrfs_set_inode_flags(leaf, item, flags);
++ btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
++ BTRFS_INODE_PREALLOC);
+ btrfs_mark_buffer_dirty(leaf);
+ out:
+ btrfs_free_path(path);
+@@ -3885,25 +3901,14 @@ out:
+ * 0 success
+ * -EINPROGRESS operation is already in progress, that's probably a bug
+ * -ECANCELED cancellation request was set before the operation started
+- * -EAGAIN can not start because there are ongoing send operations
+ */
+ static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
+ {
+- spin_lock(&fs_info->send_reloc_lock);
+- if (fs_info->send_in_progress) {
+- btrfs_warn_rl(fs_info,
+-"cannot run relocation while send operations are in progress (%d in progress)",
+- fs_info->send_in_progress);
+- spin_unlock(&fs_info->send_reloc_lock);
+- return -EAGAIN;
+- }
+ if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
+ /* This should not happen */
+- spin_unlock(&fs_info->send_reloc_lock);
+ btrfs_err(fs_info, "reloc already running, cannot start");
+ return -EINPROGRESS;
+ }
+- spin_unlock(&fs_info->send_reloc_lock);
+
+ if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
+ btrfs_info(fs_info, "chunk relocation canceled on start");
+@@ -3925,9 +3930,7 @@ static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
+ /* Requested after start, clear bit first so any waiters can continue */
+ if (atomic_read(&fs_info->reloc_cancel_req) > 0)
+ btrfs_info(fs_info, "chunk relocation canceled during operation");
+- spin_lock(&fs_info->send_reloc_lock);
+ clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
+- spin_unlock(&fs_info->send_reloc_lock);
+ atomic_set(&fs_info->reloc_cancel_req, 0);
+ }
+
+@@ -3998,6 +4001,19 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
+ int rw = 0;
+ int err = 0;
+
++ /*
++ * This only gets set if we had a half-deleted snapshot on mount. We
++ * cannot allow relocation to start while we're still trying to clean up
++ * these pending deletions.
++ */
++ ret = wait_on_bit(&fs_info->flags, BTRFS_FS_UNFINISHED_DROPS, TASK_INTERRUPTIBLE);
++ if (ret)
++ return ret;
++
++ /* We may have been woken up by close_ctree, so bail if we're closing. */
++ if (btrfs_fs_closing(fs_info))
++ return -EINTR;
++
+ bg = btrfs_lookup_block_group(fs_info, group_start);
+ if (!bg)
+ return -ENOENT;
+@@ -4386,8 +4402,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+ if (!rc)
+ return 0;
+
+- BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
+- root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
++ BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root));
+
+ level = btrfs_header_level(buf);
+ if (btrfs_header_generation(buf) <=
+diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
+index 702dc5441f039..9328d87d96888 100644
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -280,6 +280,21 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
+
+ WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state));
+ if (btrfs_root_refs(&root->root_item) == 0) {
++ struct btrfs_key drop_key;
++
++ btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
++ /*
++ * If we have a non-zero drop_progress then we know we
++ * made it partly through deleting this snapshot, and
++ * thus we need to make sure we block any balance from
++ * happening until this snapshot is completely dropped.
++ */
++ if (drop_key.objectid != 0 || drop_key.type != 0 ||
++ drop_key.offset != 0) {
++ set_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
++ set_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
++ }
++
+ set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
+ btrfs_add_dead_root(root);
+ }
+@@ -336,8 +351,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ key.offset = ref_id;
+ again:
+ ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+- BUG_ON(ret < 0);
+- if (ret == 0) {
++ if (ret < 0) {
++ err = ret;
++ goto out;
++ } else if (ret == 0) {
+ leaf = path->nodes[0];
+ ref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index 088641ba7a8e6..0d1715ebdef9c 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -57,7 +57,7 @@ struct scrub_ctx;
+
+ struct scrub_recover {
+ refcount_t refs;
+- struct btrfs_bio *bbio;
++ struct btrfs_io_context *bioc;
+ u64 map_length;
+ };
+
+@@ -73,8 +73,8 @@ struct scrub_page {
+ u64 physical_for_dev_replace;
+ atomic_t refs;
+ u8 mirror_num;
+- int have_csum:1;
+- int io_error:1;
++ unsigned int have_csum:1;
++ unsigned int io_error:1;
+ u8 csum[BTRFS_CSUM_SIZE];
+
+ struct scrub_recover *recover;
+@@ -254,7 +254,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
+ static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
+ {
+ return spage->recover &&
+- (spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
++ (spage->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
+ }
+
+ static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
+@@ -798,7 +798,7 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
+ {
+ if (refcount_dec_and_test(&recover->refs)) {
+ btrfs_bio_counter_dec(fs_info);
+- btrfs_put_bbio(recover->bbio);
++ btrfs_put_bioc(recover->bioc);
+ kfree(recover);
+ }
+ }
+@@ -1027,8 +1027,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
+ sblock_other = sblocks_for_recheck + mirror_index;
+ } else {
+ struct scrub_recover *r = sblock_bad->pagev[0]->recover;
+- int max_allowed = r->bbio->num_stripes -
+- r->bbio->num_tgtdevs;
++ int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs;
+
+ if (mirror_index >= max_allowed)
+ break;
+@@ -1218,14 +1217,14 @@ out:
+ return 0;
+ }
+
+-static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
++static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
+ {
+- if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
++ if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
+ return 2;
+- else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
++ else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
+ return 3;
+ else
+- return (int)bbio->num_stripes;
++ return (int)bioc->num_stripes;
+ }
+
+ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
+@@ -1269,7 +1268,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
+ u64 flags = original_sblock->pagev[0]->flags;
+ u64 have_csum = original_sblock->pagev[0]->have_csum;
+ struct scrub_recover *recover;
+- struct btrfs_bio *bbio;
++ struct btrfs_io_context *bioc;
+ u64 sublen;
+ u64 mapped_length;
+ u64 stripe_offset;
+@@ -1288,7 +1287,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
+ while (length > 0) {
+ sublen = min_t(u64, length, fs_info->sectorsize);
+ mapped_length = sublen;
+- bbio = NULL;
++ bioc = NULL;
+
+ /*
+ * With a length of sectorsize, each returned stripe represents
+@@ -1296,27 +1295,27 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
+ */
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+- logical, &mapped_length, &bbio);
+- if (ret || !bbio || mapped_length < sublen) {
+- btrfs_put_bbio(bbio);
++ logical, &mapped_length, &bioc);
++ if (ret || !bioc || mapped_length < sublen) {
++ btrfs_put_bioc(bioc);
+ btrfs_bio_counter_dec(fs_info);
+ return -EIO;
+ }
+
+ recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
+ if (!recover) {
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ btrfs_bio_counter_dec(fs_info);
+ return -ENOMEM;
+ }
+
+ refcount_set(&recover->refs, 1);
+- recover->bbio = bbio;
++ recover->bioc = bioc;
+ recover->map_length = mapped_length;
+
+ BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
+
+- nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
++ nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS);
+
+ for (mirror_index = 0; mirror_index < nmirrors;
+ mirror_index++) {
+@@ -1348,17 +1347,17 @@ leave_nomem:
+ sctx->fs_info->csum_size);
+
+ scrub_stripe_index_and_offset(logical,
+- bbio->map_type,
+- bbio->raid_map,
++ bioc->map_type,
++ bioc->raid_map,
+ mapped_length,
+- bbio->num_stripes -
+- bbio->num_tgtdevs,
++ bioc->num_stripes -
++ bioc->num_tgtdevs,
+ mirror_index,
+ &stripe_index,
+ &stripe_offset);
+- spage->physical = bbio->stripes[stripe_index].physical +
++ spage->physical = bioc->stripes[stripe_index].physical +
+ stripe_offset;
+- spage->dev = bbio->stripes[stripe_index].dev;
++ spage->dev = bioc->stripes[stripe_index].dev;
+
+ BUG_ON(page_index >= original_sblock->page_count);
+ spage->physical_for_dev_replace =
+@@ -1401,7 +1400,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
+ bio->bi_end_io = scrub_bio_wait_endio;
+
+ mirror_num = spage->sblock->pagev[0]->mirror_num;
+- ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio,
++ ret = raid56_parity_recover(fs_info, bio, spage->recover->bioc,
+ spage->recover->map_length,
+ mirror_num, 0);
+ if (ret)
+@@ -2203,7 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ u64 length = sblock->page_count * PAGE_SIZE;
+ u64 logical = sblock->pagev[0]->logical;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ struct bio *bio;
+ struct btrfs_raid_bio *rbio;
+ int ret;
+@@ -2211,19 +2210,19 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
+- &length, &bbio);
+- if (ret || !bbio || !bbio->raid_map)
+- goto bbio_out;
++ &length, &bioc);
++ if (ret || !bioc || !bioc->raid_map)
++ goto bioc_out;
+
+ if (WARN_ON(!sctx->is_dev_replace ||
+- !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
++ !(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
+ /*
+ * We shouldn't be scrubbing a missing device. Even for dev
+ * replace, we should only get here for RAID 5/6. We either
+ * managed to mount something with no mirrors remaining or
+ * there's a bug in scrub_remap_extent()/btrfs_map_block().
+ */
+- goto bbio_out;
++ goto bioc_out;
+ }
+
+ bio = btrfs_io_bio_alloc(0);
+@@ -2231,7 +2230,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
+ bio->bi_private = sblock;
+ bio->bi_end_io = scrub_missing_raid56_end_io;
+
+- rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
++ rbio = raid56_alloc_missing_rbio(fs_info, bio, bioc, length);
+ if (!rbio)
+ goto rbio_out;
+
+@@ -2249,9 +2248,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
+
+ rbio_out:
+ bio_put(bio);
+-bbio_out:
++bioc_out:
+ btrfs_bio_counter_dec(fs_info);
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.malloc_errors++;
+ spin_unlock(&sctx->stat_lock);
+@@ -2826,7 +2825,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ struct bio *bio;
+ struct btrfs_raid_bio *rbio;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ u64 length;
+ int ret;
+
+@@ -2838,16 +2837,16 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
+- &length, &bbio);
+- if (ret || !bbio || !bbio->raid_map)
+- goto bbio_out;
++ &length, &bioc);
++ if (ret || !bioc || !bioc->raid_map)
++ goto bioc_out;
+
+ bio = btrfs_io_bio_alloc(0);
+ bio->bi_iter.bi_sector = sparity->logic_start >> 9;
+ bio->bi_private = sparity;
+ bio->bi_end_io = scrub_parity_bio_endio;
+
+- rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
++ rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bioc,
+ length, sparity->scrub_dev,
+ sparity->dbitmap,
+ sparity->nsectors);
+@@ -2860,9 +2859,9 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
+
+ rbio_out:
+ bio_put(bio);
+-bbio_out:
++bioc_out:
+ btrfs_bio_counter_dec(fs_info);
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
+ sparity->nsectors);
+ spin_lock(&sctx->stat_lock);
+@@ -2901,7 +2900,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_root *csum_root = fs_info->csum_root;
+ struct btrfs_extent_item *extent;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ u64 flags;
+ int ret;
+ int slot;
+@@ -3044,22 +3043,22 @@ again:
+ extent_len);
+
+ mapped_length = extent_len;
+- bbio = NULL;
++ bioc = NULL;
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
+- extent_logical, &mapped_length, &bbio,
++ extent_logical, &mapped_length, &bioc,
+ 0);
+ if (!ret) {
+- if (!bbio || mapped_length < extent_len)
++ if (!bioc || mapped_length < extent_len)
+ ret = -EIO;
+ }
+ if (ret) {
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ goto out;
+ }
+- extent_physical = bbio->stripes[0].physical;
+- extent_mirror_num = bbio->mirror_num;
+- extent_dev = bbio->stripes[0].dev;
+- btrfs_put_bbio(bbio);
++ extent_physical = bioc->stripes[0].physical;
++ extent_mirror_num = bioc->mirror_num;
++ extent_dev = bioc->stripes[0].dev;
++ btrfs_put_bioc(bioc);
+
+ ret = btrfs_lookup_csums_range(csum_root,
+ extent_logical,
+@@ -3813,13 +3812,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
+
+ if (ret == 0) {
+ ro_set = 1;
+- } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
++ } else if (ret == -ENOSPC && !sctx->is_dev_replace &&
++ !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) {
+ /*
+ * btrfs_inc_block_group_ro return -ENOSPC when it
+ * failed in creating new chunk for metadata.
+ * It is not a problem for scrub, because
+ * metadata are always cowed, and our scrub paused
+ * commit_transactions.
++ *
++ * For RAID56 chunks, we have to mark them read-only
++ * for scrub, as later we would use our own cache
++ * out of RAID56 realm.
++ * Thus we want the RAID56 bg to be marked RO to
++ * prevent RMW from screwing up out cache.
+ */
+ ro_set = 0;
+ } else if (ret == -ETXTBSY) {
+@@ -4068,10 +4074,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+ u64 end, struct btrfs_scrub_progress *progress,
+ int readonly, int is_dev_replace)
+ {
++ struct btrfs_dev_lookup_args args = { .devid = devid };
+ struct scrub_ctx *sctx;
+ int ret;
+ struct btrfs_device *dev;
+ unsigned int nofs_flag;
++ bool need_commit = false;
+
+ if (btrfs_fs_closing(fs_info))
+ return -EAGAIN;
+@@ -4115,7 +4123,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+ goto out_free_ctx;
+
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+- dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
++ dev = btrfs_find_device(fs_info->fs_devices, &args);
+ if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
+ !is_dev_replace)) {
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+@@ -4177,6 +4185,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+ */
+ nofs_flag = memalloc_nofs_save();
+ if (!is_dev_replace) {
++ u64 old_super_errors;
++
++ spin_lock(&sctx->stat_lock);
++ old_super_errors = sctx->stat.super_errors;
++ spin_unlock(&sctx->stat_lock);
++
+ btrfs_info(fs_info, "scrub: started on devid %llu", devid);
+ /*
+ * by holding device list mutex, we can
+@@ -4185,6 +4199,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ ret = scrub_supers(sctx, dev);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
++
++ spin_lock(&sctx->stat_lock);
++ /*
++ * Super block errors found, but we can not commit transaction
++ * at current context, since btrfs_commit_transaction() needs
++ * to pause the current running scrub (hold by ourselves).
++ */
++ if (sctx->stat.super_errors > old_super_errors && !sctx->readonly)
++ need_commit = true;
++ spin_unlock(&sctx->stat_lock);
+ }
+
+ if (!ret)
+@@ -4211,6 +4235,25 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+ scrub_workers_put(fs_info);
+ scrub_put_ctx(sctx);
+
++ /*
++ * We found some super block errors before, now try to force a
++ * transaction commit, as scrub has finished.
++ */
++ if (need_commit) {
++ struct btrfs_trans_handle *trans;
++
++ trans = btrfs_start_transaction(fs_info->tree_root, 0);
++ if (IS_ERR(trans)) {
++ ret = PTR_ERR(trans);
++ btrfs_err(fs_info,
++ "scrub: failed to start transaction to fix super block errors: %d", ret);
++ return ret;
++ }
++ ret = btrfs_commit_transaction(trans);
++ if (ret < 0)
++ btrfs_err(fs_info,
++ "scrub: failed to commit transaction to fix super block errors: %d", ret);
++ }
+ return ret;
+ out:
+ scrub_workers_put(fs_info);
+@@ -4288,11 +4331,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
+ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
+ struct btrfs_scrub_progress *progress)
+ {
++ struct btrfs_dev_lookup_args args = { .devid = devid };
+ struct btrfs_device *dev;
+ struct scrub_ctx *sctx = NULL;
+
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+- dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
++ dev = btrfs_find_device(fs_info->fs_devices, &args);
+ if (dev)
+ sctx = dev->scrub_ctx;
+ if (sctx)
+@@ -4309,20 +4353,20 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
+ int *extent_mirror_num)
+ {
+ u64 mapped_length;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ int ret;
+
+ mapped_length = extent_len;
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
+- &mapped_length, &bbio, 0);
+- if (ret || !bbio || mapped_length < extent_len ||
+- !bbio->stripes[0].dev->bdev) {
+- btrfs_put_bbio(bbio);
++ &mapped_length, &bioc, 0);
++ if (ret || !bioc || mapped_length < extent_len ||
++ !bioc->stripes[0].dev->bdev) {
++ btrfs_put_bioc(bioc);
+ return;
+ }
+
+- *extent_physical = bbio->stripes[0].physical;
+- *extent_mirror_num = bbio->mirror_num;
+- *extent_dev = bbio->stripes[0].dev;
+- btrfs_put_bbio(bbio);
++ *extent_physical = bioc->stripes[0].physical;
++ *extent_mirror_num = bioc->mirror_num;
++ *extent_dev = bioc->stripes[0].dev;
++ btrfs_put_bioc(bioc);
+ }
+diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
+index 72f9b865e8479..692ae2e2f8cc5 100644
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -24,6 +24,7 @@
+ #include "transaction.h"
+ #include "compression.h"
+ #include "xattr.h"
++#include "print-tree.h"
+
+ /*
+ * Maximum number of references an extent can have in order for us to attempt to
+@@ -95,6 +96,15 @@ struct send_ctx {
+ struct btrfs_path *right_path;
+ struct btrfs_key *cmp_key;
+
++ /*
++ * Keep track of the generation of the last transaction that was used
++ * for relocating a block group. This is periodically checked in order
++ * to detect if a relocation happened since the last check, so that we
++ * don't operate on stale extent buffers for nodes (level >= 1) or on
++ * stale disk_bytenr values of file extent items.
++ */
++ u64 last_reloc_trans;
++
+ /*
+ * infos of the currently processed inode. In case of deleted inodes,
+ * these are the values from the deleted inode.
+@@ -1415,6 +1425,26 @@ static int find_extent_clone(struct send_ctx *sctx,
+ if (ret < 0)
+ goto out;
+
++ down_read(&fs_info->commit_root_sem);
++ if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
++ /*
++ * A transaction commit for a transaction in which block group
++ * relocation was done just happened.
++ * The disk_bytenr of the file extent item we processed is
++ * possibly stale, referring to the extent's location before
++ * relocation. So act as if we haven't found any clone sources
++ * and fallback to write commands, which will read the correct
++ * data from the new extent location. Otherwise we will fail
++ * below because we haven't found our own back reference or we
++ * could be getting incorrect sources in case the old extent
++ * was already reallocated after the relocation.
++ */
++ up_read(&fs_info->commit_root_sem);
++ ret = -ENOENT;
++ goto out;
++ }
++ up_read(&fs_info->commit_root_sem);
++
+ if (!backref_ctx.found_itself) {
+ /* found a bug in backref code? */
+ ret = -EIO;
+@@ -4978,6 +5008,10 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
++ btrfs_err(fs_info,
++ "send: IO error at offset %llu for inode %llu root %llu",
++ page_offset(page), sctx->cur_ino,
++ sctx->send_root->root_key.objectid);
+ put_page(page);
+ ret = -EIO;
+ break;
+@@ -5364,6 +5398,7 @@ static int clone_range(struct send_ctx *sctx,
+ u64 ext_len;
+ u64 clone_len;
+ u64 clone_data_offset;
++ bool crossed_src_i_size = false;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(clone_root->root, path);
+@@ -5420,8 +5455,10 @@ static int clone_range(struct send_ctx *sctx,
+ if (key.offset >= clone_src_i_size)
+ break;
+
+- if (key.offset + ext_len > clone_src_i_size)
++ if (key.offset + ext_len > clone_src_i_size) {
+ ext_len = clone_src_i_size - key.offset;
++ crossed_src_i_size = true;
++ }
+
+ clone_data_offset = btrfs_file_extent_offset(leaf, ei);
+ if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
+@@ -5481,6 +5518,25 @@ static int clone_range(struct send_ctx *sctx,
+ ret = send_clone(sctx, offset, clone_len,
+ clone_root);
+ }
++ } else if (crossed_src_i_size && clone_len < len) {
++ /*
++ * If we are at i_size of the clone source inode and we
++ * can not clone from it, terminate the loop. This is
++ * to avoid sending two write operations, one with a
++ * length matching clone_len and the final one after
++ * this loop with a length of len - clone_len.
++ *
++ * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
++ * was passed to the send ioctl), this helps avoid
++ * sending an encoded write for an offset that is not
++ * sector size aligned, in case the i_size of the source
++ * inode is not sector size aligned. That will make the
++ * receiver fallback to decompression of the data and
++ * writing it using regular buffered IO, therefore while
++ * not incorrect, it's not optimal due decompression and
++ * possible re-compression at the receiver.
++ */
++ break;
+ } else {
+ ret = send_extent_data(sctx, offset, clone_len);
+ }
+@@ -6592,6 +6648,50 @@ static int changed_cb(struct btrfs_path *left_path,
+ {
+ int ret = 0;
+
++ /*
++ * We can not hold the commit root semaphore here. This is because in
++ * the case of sending and receiving to the same filesystem, using a
++ * pipe, could result in a deadlock:
++ *
++ * 1) The task running send blocks on the pipe because it's full;
++ *
++ * 2) The task running receive, which is the only consumer of the pipe,
++ * is waiting for a transaction commit (for example due to a space
++ * reservation when doing a write or triggering a transaction commit
++ * when creating a subvolume);
++ *
++ * 3) The transaction is waiting to write lock the commit root semaphore,
++ * but can not acquire it since it's being held at 1).
++ *
++ * Down this call chain we write to the pipe through kernel_write().
++ * The same type of problem can also happen when sending to a file that
++ * is stored in the same filesystem - when reserving space for a write
++ * into the file, we can trigger a transaction commit.
++ *
++ * Our caller has supplied us with clones of leaves from the send and
++ * parent roots, so we're safe here from a concurrent relocation and
++ * further reallocation of metadata extents while we are here. Below we
++ * also assert that the leaves are clones.
++ */
++ lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem);
++
++ /*
++ * We always have a send root, so left_path is never NULL. We will not
++ * have a leaf when we have reached the end of the send root but have
++ * not yet reached the end of the parent root.
++ */
++ if (left_path->nodes[0])
++ ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
++ &left_path->nodes[0]->bflags));
++ /*
++ * When doing a full send we don't have a parent root, so right_path is
++ * NULL. When doing an incremental send, we may have reached the end of
++ * the parent root already, so we don't have a leaf at right_path.
++ */
++ if (right_path && right_path->nodes[0])
++ ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
++ &right_path->nodes[0]->bflags));
++
+ if (result == BTRFS_COMPARE_TREE_SAME) {
+ if (key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY) {
+@@ -6638,14 +6738,46 @@ out:
+ return ret;
+ }
+
++static int search_key_again(const struct send_ctx *sctx,
++ struct btrfs_root *root,
++ struct btrfs_path *path,
++ const struct btrfs_key *key)
++{
++ int ret;
++
++ if (!path->need_commit_sem)
++ lockdep_assert_held_read(&root->fs_info->commit_root_sem);
++
++ /*
++ * Roots used for send operations are readonly and no one can add,
++ * update or remove keys from them, so we should be able to find our
++ * key again. The only exception is deduplication, which can operate on
++ * readonly roots and add, update or remove keys to/from them - but at
++ * the moment we don't allow it to run in parallel with send.
++ */
++ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
++ ASSERT(ret <= 0);
++ if (ret > 0) {
++ btrfs_print_tree(path->nodes[path->lowest_level], false);
++ btrfs_err(root->fs_info,
++"send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
++ key->objectid, key->type, key->offset,
++ (root == sctx->parent_root ? "parent" : "send"),
++ root->root_key.objectid, path->lowest_level,
++ path->slots[path->lowest_level]);
++ return -EUCLEAN;
++ }
++
++ return ret;
++}
++
+ static int full_send_tree(struct send_ctx *sctx)
+ {
+ int ret;
+ struct btrfs_root *send_root = sctx->send_root;
+ struct btrfs_key key;
++ struct btrfs_fs_info *fs_info = send_root->fs_info;
+ struct btrfs_path *path;
+- struct extent_buffer *eb;
+- int slot;
+
+ path = alloc_path_for_send();
+ if (!path)
+@@ -6656,6 +6788,10 @@ static int full_send_tree(struct send_ctx *sctx)
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
++ down_read(&fs_info->commit_root_sem);
++ sctx->last_reloc_trans = fs_info->last_reloc_trans;
++ up_read(&fs_info->commit_root_sem);
++
+ ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
+ if (ret < 0)
+ goto out;
+@@ -6663,15 +6799,35 @@ static int full_send_tree(struct send_ctx *sctx)
+ goto out_finish;
+
+ while (1) {
+- eb = path->nodes[0];
+- slot = path->slots[0];
+- btrfs_item_key_to_cpu(eb, &key, slot);
++ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ ret = changed_cb(path, NULL, &key,
+ BTRFS_COMPARE_TREE_NEW, sctx);
+ if (ret < 0)
+ goto out;
+
++ down_read(&fs_info->commit_root_sem);
++ if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
++ sctx->last_reloc_trans = fs_info->last_reloc_trans;
++ up_read(&fs_info->commit_root_sem);
++ /*
++ * A transaction used for relocating a block group was
++ * committed or is about to finish its commit. Release
++ * our path (leaf) and restart the search, so that we
++ * avoid operating on any file extent items that are
++ * stale, with a disk_bytenr that reflects a pre
++ * relocation value. This way we avoid as much as
++ * possible to fallback to regular writes when checking
++ * if we can clone file ranges.
++ */
++ btrfs_release_path(path);
++ ret = search_key_again(sctx, send_root, path, &key);
++ if (ret < 0)
++ goto out;
++ } else {
++ up_read(&fs_info->commit_root_sem);
++ }
++
+ ret = btrfs_next_item(send_root, path);
+ if (ret < 0)
+ goto out;
+@@ -6689,6 +6845,20 @@ out:
+ return ret;
+ }
+
++static int replace_node_with_clone(struct btrfs_path *path, int level)
++{
++ struct extent_buffer *clone;
++
++ clone = btrfs_clone_extent_buffer(path->nodes[level]);
++ if (!clone)
++ return -ENOMEM;
++
++ free_extent_buffer(path->nodes[level]);
++ path->nodes[level] = clone;
++
++ return 0;
++}
++
+ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)
+ {
+ struct extent_buffer *eb;
+@@ -6698,6 +6868,8 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen
+ u64 reada_max;
+ u64 reada_done = 0;
+
++ lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
++
+ BUG_ON(*level == 0);
+ eb = btrfs_read_node_slot(parent, slot);
+ if (IS_ERR(eb))
+@@ -6721,6 +6893,10 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen
+ path->nodes[*level - 1] = eb;
+ path->slots[*level - 1] = 0;
+ (*level)--;
++
++ if (*level == 0)
++ return replace_node_with_clone(path, 0);
++
+ return 0;
+ }
+
+@@ -6734,8 +6910,10 @@ static int tree_move_next_or_upnext(struct btrfs_path *path,
+ path->slots[*level]++;
+
+ while (path->slots[*level] >= nritems) {
+- if (*level == root_level)
++ if (*level == root_level) {
++ path->slots[*level] = nritems - 1;
+ return -1;
++ }
+
+ /* move upnext */
+ path->slots[*level] = 0;
+@@ -6767,14 +6945,20 @@ static int tree_advance(struct btrfs_path *path,
+ } else {
+ ret = tree_move_down(path, level, reada_min_gen);
+ }
+- if (ret >= 0) {
+- if (*level == 0)
+- btrfs_item_key_to_cpu(path->nodes[*level], key,
+- path->slots[*level]);
+- else
+- btrfs_node_key_to_cpu(path->nodes[*level], key,
+- path->slots[*level]);
+- }
++
++ /*
++ * Even if we have reached the end of a tree, ret is -1, update the key
++ * anyway, so that in case we need to restart due to a block group
++ * relocation, we can assert that the last key of the root node still
++ * exists in the tree.
++ */
++ if (*level == 0)
++ btrfs_item_key_to_cpu(path->nodes[*level], key,
++ path->slots[*level]);
++ else
++ btrfs_node_key_to_cpu(path->nodes[*level], key,
++ path->slots[*level]);
++
+ return ret;
+ }
+
+@@ -6803,6 +6987,97 @@ static int tree_compare_item(struct btrfs_path *left_path,
+ return 0;
+ }
+
++/*
++ * A transaction used for relocating a block group was committed or is about to
++ * finish its commit. Release our paths and restart the search, so that we are
++ * not using stale extent buffers:
++ *
++ * 1) For levels > 0, we are only holding references of extent buffers, without
++ * any locks on them, which does not prevent them from having been relocated
++ * and reallocated after the last time we released the commit root semaphore.
++ * The exception are the root nodes, for which we always have a clone, see
++ * the comment at btrfs_compare_trees();
++ *
++ * 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so
++ * we are safe from the concurrent relocation and reallocation. However they
++ * can have file extent items with a pre relocation disk_bytenr value, so we
++ * restart the start from the current commit roots and clone the new leaves so
++ * that we get the post relocation disk_bytenr values. Not doing so, could
++ * make us clone the wrong data in case there are new extents using the old
++ * disk_bytenr that happen to be shared.
++ */
++static int restart_after_relocation(struct btrfs_path *left_path,
++ struct btrfs_path *right_path,
++ const struct btrfs_key *left_key,
++ const struct btrfs_key *right_key,
++ int left_level,
++ int right_level,
++ const struct send_ctx *sctx)
++{
++ int root_level;
++ int ret;
++
++ lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem);
++
++ btrfs_release_path(left_path);
++ btrfs_release_path(right_path);
++
++ /*
++ * Since keys can not be added or removed to/from our roots because they
++ * are readonly and we do not allow deduplication to run in parallel
++ * (which can add, remove or change keys), the layout of the trees should
++ * not change.
++ */
++ left_path->lowest_level = left_level;
++ ret = search_key_again(sctx, sctx->send_root, left_path, left_key);
++ if (ret < 0)
++ return ret;
++
++ right_path->lowest_level = right_level;
++ ret = search_key_again(sctx, sctx->parent_root, right_path, right_key);
++ if (ret < 0)
++ return ret;
++
++ /*
++ * If the lowest level nodes are leaves, clone them so that they can be
++ * safely used by changed_cb() while not under the protection of the
++ * commit root semaphore, even if relocation and reallocation happens in
++ * parallel.
++ */
++ if (left_level == 0) {
++ ret = replace_node_with_clone(left_path, 0);
++ if (ret < 0)
++ return ret;
++ }
++
++ if (right_level == 0) {
++ ret = replace_node_with_clone(right_path, 0);
++ if (ret < 0)
++ return ret;
++ }
++
++ /*
++ * Now clone the root nodes (unless they happen to be the leaves we have
++ * already cloned). This is to protect against concurrent snapshotting of
++ * the send and parent roots (see the comment at btrfs_compare_trees()).
++ */
++ root_level = btrfs_header_level(sctx->send_root->commit_root);
++ if (root_level > 0) {
++ ret = replace_node_with_clone(left_path, root_level);
++ if (ret < 0)
++ return ret;
++ }
++
++ root_level = btrfs_header_level(sctx->parent_root->commit_root);
++ if (root_level > 0) {
++ ret = replace_node_with_clone(right_path, root_level);
++ if (ret < 0)
++ return ret;
++ }
++
++ return 0;
++}
++
+ /*
+ * This function compares two trees and calls the provided callback for
+ * every changed/new/deleted item it finds.
+@@ -6831,10 +7106,10 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ int right_root_level;
+ int left_level;
+ int right_level;
+- int left_end_reached;
+- int right_end_reached;
+- int advance_left;
+- int advance_right;
++ int left_end_reached = 0;
++ int right_end_reached = 0;
++ int advance_left = 0;
++ int advance_right = 0;
+ u64 left_blockptr;
+ u64 right_blockptr;
+ u64 left_gen;
+@@ -6902,12 +7177,18 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ down_read(&fs_info->commit_root_sem);
+ left_level = btrfs_header_level(left_root->commit_root);
+ left_root_level = left_level;
++ /*
++ * We clone the root node of the send and parent roots to prevent races
++ * with snapshot creation of these roots. Snapshot creation COWs the
++ * root node of a tree, so after the transaction is committed the old
++ * extent can be reallocated while this send operation is still ongoing.
++ * So we clone them, under the commit root semaphore, to be race free.
++ */
+ left_path->nodes[left_level] =
+ btrfs_clone_extent_buffer(left_root->commit_root);
+ if (!left_path->nodes[left_level]) {
+- up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+- goto out;
++ goto out_unlock;
+ }
+
+ right_level = btrfs_header_level(right_root->commit_root);
+@@ -6915,9 +7196,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ right_path->nodes[right_level] =
+ btrfs_clone_extent_buffer(right_root->commit_root);
+ if (!right_path->nodes[right_level]) {
+- up_read(&fs_info->commit_root_sem);
+ ret = -ENOMEM;
+- goto out;
++ goto out_unlock;
+ }
+ /*
+ * Our right root is the parent root, while the left root is the "send"
+@@ -6927,7 +7207,6 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ * will need to read them at some point.
+ */
+ reada_min_gen = btrfs_header_generation(right_root->commit_root);
+- up_read(&fs_info->commit_root_sem);
+
+ if (left_level == 0)
+ btrfs_item_key_to_cpu(left_path->nodes[left_level],
+@@ -6942,11 +7221,26 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ btrfs_node_key_to_cpu(right_path->nodes[right_level],
+ &right_key, right_path->slots[right_level]);
+
+- left_end_reached = right_end_reached = 0;
+- advance_left = advance_right = 0;
++ sctx->last_reloc_trans = fs_info->last_reloc_trans;
+
+ while (1) {
+- cond_resched();
++ if (need_resched() ||
++ rwsem_is_contended(&fs_info->commit_root_sem)) {
++ up_read(&fs_info->commit_root_sem);
++ cond_resched();
++ down_read(&fs_info->commit_root_sem);
++ }
++
++ if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
++ ret = restart_after_relocation(left_path, right_path,
++ &left_key, &right_key,
++ left_level, right_level,
++ sctx);
++ if (ret < 0)
++ goto out_unlock;
++ sctx->last_reloc_trans = fs_info->last_reloc_trans;
++ }
++
+ if (advance_left && !left_end_reached) {
+ ret = tree_advance(left_path, &left_level,
+ left_root_level,
+@@ -6955,7 +7249,7 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ if (ret == -1)
+ left_end_reached = ADVANCE;
+ else if (ret < 0)
+- goto out;
++ goto out_unlock;
+ advance_left = 0;
+ }
+ if (advance_right && !right_end_reached) {
+@@ -6966,54 +7260,55 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ if (ret == -1)
+ right_end_reached = ADVANCE;
+ else if (ret < 0)
+- goto out;
++ goto out_unlock;
+ advance_right = 0;
+ }
+
+ if (left_end_reached && right_end_reached) {
+ ret = 0;
+- goto out;
++ goto out_unlock;
+ } else if (left_end_reached) {
+ if (right_level == 0) {
++ up_read(&fs_info->commit_root_sem);
+ ret = changed_cb(left_path, right_path,
+ &right_key,
+ BTRFS_COMPARE_TREE_DELETED,
+ sctx);
+ if (ret < 0)
+ goto out;
++ down_read(&fs_info->commit_root_sem);
+ }
+ advance_right = ADVANCE;
+ continue;
+ } else if (right_end_reached) {
+ if (left_level == 0) {
++ up_read(&fs_info->commit_root_sem);
+ ret = changed_cb(left_path, right_path,
+ &left_key,
+ BTRFS_COMPARE_TREE_NEW,
+ sctx);
+ if (ret < 0)
+ goto out;
++ down_read(&fs_info->commit_root_sem);
+ }
+ advance_left = ADVANCE;
+ continue;
+ }
+
+ if (left_level == 0 && right_level == 0) {
++ up_read(&fs_info->commit_root_sem);
+ cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
+ if (cmp < 0) {
+ ret = changed_cb(left_path, right_path,
+ &left_key,
+ BTRFS_COMPARE_TREE_NEW,
+ sctx);
+- if (ret < 0)
+- goto out;
+ advance_left = ADVANCE;
+ } else if (cmp > 0) {
+ ret = changed_cb(left_path, right_path,
+ &right_key,
+ BTRFS_COMPARE_TREE_DELETED,
+ sctx);
+- if (ret < 0)
+- goto out;
+ advance_right = ADVANCE;
+ } else {
+ enum btrfs_compare_tree_result result;
+@@ -7027,11 +7322,13 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ result = BTRFS_COMPARE_TREE_SAME;
+ ret = changed_cb(left_path, right_path,
+ &left_key, result, sctx);
+- if (ret < 0)
+- goto out;
+ advance_left = ADVANCE;
+ advance_right = ADVANCE;
+ }
++
++ if (ret < 0)
++ goto out;
++ down_read(&fs_info->commit_root_sem);
+ } else if (left_level == right_level) {
+ cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
+ if (cmp < 0) {
+@@ -7071,6 +7368,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
+ }
+ }
+
++out_unlock:
++ up_read(&fs_info->commit_root_sem);
+ out:
+ btrfs_free_path(left_path);
+ btrfs_free_path(right_path);
+@@ -7250,10 +7549,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
+ /*
+ * Check that we don't overflow at later allocations, we request
+ * clone_sources_count + 1 items, and compare to unsigned long inside
+- * access_ok.
++ * access_ok. Also set an upper limit for allocation size so this can't
++ * easily exhaust memory. Max number of clone sources is about 200K.
+ */
+- if (arg->clone_sources_count >
+- ULONG_MAX / sizeof(struct clone_root) - 1) {
++ if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -7409,21 +7708,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
+ if (ret)
+ goto out;
+
+- spin_lock(&fs_info->send_reloc_lock);
+- if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
+- spin_unlock(&fs_info->send_reloc_lock);
+- btrfs_warn_rl(fs_info,
+- "cannot run send because a relocation operation is in progress");
+- ret = -EAGAIN;
+- goto out;
+- }
+- fs_info->send_in_progress++;
+- spin_unlock(&fs_info->send_reloc_lock);
+-
+ ret = send_subvol(sctx);
+- spin_lock(&fs_info->send_reloc_lock);
+- fs_info->send_in_progress--;
+- spin_unlock(&fs_info->send_reloc_lock);
+ if (ret < 0)
+ goto out;
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index aa5be0b24987a..5ed66a794e577 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -1054,7 +1054,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
+ trans_rsv->reserved;
+ if (block_rsv_size < space_info->bytes_may_use)
+ delalloc_size = space_info->bytes_may_use - block_rsv_size;
+- spin_unlock(&space_info->lock);
+
+ /*
+ * We don't want to include the global_rsv in our calculation,
+@@ -1085,6 +1084,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
+ flush = FLUSH_DELAYED_REFS_NR;
+ }
+
++ spin_unlock(&space_info->lock);
++
+ /*
+ * We don't want to reclaim everything, just a portion, so scale
+ * down the to_reclaim by 1/4. If it takes us down to 0,
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index 537d90bf5d844..ea23b83fc96be 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -574,6 +574,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ int saved_compress_level;
+ bool saved_compress_force;
+ int no_compress = 0;
++ const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state);
+
+ if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
+ btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
+@@ -712,6 +713,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ compress_force = false;
+ no_compress++;
+ } else {
++ btrfs_err(info, "unrecognized compression value %s",
++ args[0].from);
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -770,8 +773,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ case Opt_thread_pool:
+ ret = match_int(&args[0], &intarg);
+ if (ret) {
++ btrfs_err(info, "unrecognized thread_pool value %s",
++ args[0].from);
+ goto out;
+ } else if (intarg == 0) {
++ btrfs_err(info, "invalid value 0 for thread_pool");
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -832,8 +838,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ break;
+ case Opt_ratio:
+ ret = match_int(&args[0], &intarg);
+- if (ret)
++ if (ret) {
++ btrfs_err(info, "unrecognized metadata_ratio value %s",
++ args[0].from);
+ goto out;
++ }
+ info->metadata_ratio = intarg;
+ btrfs_info(info, "metadata ratio %u",
+ info->metadata_ratio);
+@@ -850,6 +859,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ btrfs_set_and_info(info, DISCARD_ASYNC,
+ "turning on async discard");
+ } else {
++ btrfs_err(info, "unrecognized discard mode value %s",
++ args[0].from);
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -874,6 +885,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ btrfs_set_and_info(info, FREE_SPACE_TREE,
+ "enabling free space tree");
+ } else {
++ btrfs_err(info, "unrecognized space_cache value %s",
++ args[0].from);
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -943,8 +956,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ break;
+ case Opt_check_integrity_print_mask:
+ ret = match_int(&args[0], &intarg);
+- if (ret)
++ if (ret) {
++ btrfs_err(info,
++ "unrecognized check_integrity_print_mask value %s",
++ args[0].from);
+ goto out;
++ }
+ info->check_integrity_print_mask = intarg;
+ btrfs_info(info, "check_integrity_print_mask 0x%x",
+ info->check_integrity_print_mask);
+@@ -959,13 +976,15 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ goto out;
+ #endif
+ case Opt_fatal_errors:
+- if (strcmp(args[0].from, "panic") == 0)
++ if (strcmp(args[0].from, "panic") == 0) {
+ btrfs_set_opt(info->mount_opt,
+ PANIC_ON_FATAL_ERROR);
+- else if (strcmp(args[0].from, "bug") == 0)
++ } else if (strcmp(args[0].from, "bug") == 0) {
+ btrfs_clear_opt(info->mount_opt,
+ PANIC_ON_FATAL_ERROR);
+- else {
++ } else {
++ btrfs_err(info, "unrecognized fatal_errors value %s",
++ args[0].from);
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -973,8 +992,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ case Opt_commit_interval:
+ intarg = 0;
+ ret = match_int(&args[0], &intarg);
+- if (ret)
++ if (ret) {
++ btrfs_err(info, "unrecognized commit_interval value %s",
++ args[0].from);
++ ret = -EINVAL;
+ goto out;
++ }
+ if (intarg == 0) {
+ btrfs_info(info,
+ "using default commit interval %us",
+@@ -988,8 +1011,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
+ break;
+ case Opt_rescue:
+ ret = parse_rescue_options(info, args[0].from);
+- if (ret < 0)
++ if (ret < 0) {
++ btrfs_err(info, "unrecognized rescue value %s",
++ args[0].from);
+ goto out;
++ }
+ break;
+ #ifdef CONFIG_BTRFS_DEBUG
+ case Opt_fragment_all:
+@@ -1040,10 +1066,12 @@ out:
+ }
+ if (!ret)
+ ret = btrfs_check_mountopts_zoned(info);
+- if (!ret && btrfs_test_opt(info, SPACE_CACHE))
+- btrfs_info(info, "disk space caching is enabled");
+- if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
+- btrfs_info(info, "using free space tree");
++ if (!ret && !remounting) {
++ if (btrfs_test_opt(info, SPACE_CACHE))
++ btrfs_info(info, "disk space caching is enabled");
++ if (btrfs_test_opt(info, FREE_SPACE_TREE))
++ btrfs_info(info, "using free space tree");
++ }
+ return ret;
+ }
+
+@@ -1705,7 +1733,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
+ goto error_close_devices;
+ }
+
+- bdev = fs_devices->latest_bdev;
++ bdev = fs_devices->latest_dev->bdev;
+ s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
+ fs_info);
+ if (IS_ERR(s)) {
+@@ -1721,8 +1749,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
+ } else {
+ snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
+ btrfs_sb(s)->bdev_holder = fs_type;
+- if (!strstr(crc32c_impl(), "generic"))
+- set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
+ error = btrfs_fill_super(s, fs_devices, data);
+ }
+ if (!error)
+@@ -1917,6 +1943,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
+ if (ret)
+ goto restore;
+
++ /* V1 cache is not supported for subpage mount. */
++ if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
++ btrfs_warn(fs_info,
++ "v1 space cache is not supported for page size %lu with sectorsize %u",
++ PAGE_SIZE, fs_info->sectorsize);
++ ret = -EINVAL;
++ goto restore;
++ }
+ btrfs_remount_begin(fs_info, old_opts, *flags);
+ btrfs_resize_thread_pool(fs_info,
+ fs_info->thread_pool_size, old_thread_pool_size);
+@@ -2012,6 +2046,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
+ ret = -EINVAL;
+ goto restore;
+ }
++ if (btrfs_super_compat_ro_flags(fs_info->super_copy) &
++ ~BTRFS_FEATURE_COMPAT_RO_SUPP) {
++ btrfs_err(fs_info,
++ "can not remount read-write due to unsupported optional flags 0x%llx",
++ btrfs_super_compat_ro_flags(fs_info->super_copy) &
++ ~BTRFS_FEATURE_COMPAT_RO_SUPP);
++ ret = -EINVAL;
++ goto restore;
++ }
+ if (fs_info->fs_devices->rw_devices == 0) {
+ ret = -EACCES;
+ goto restore;
+@@ -2452,41 +2495,103 @@ static int btrfs_freeze(struct super_block *sb)
+ return btrfs_commit_transaction(trans);
+ }
+
++static int check_dev_super(struct btrfs_device *dev)
++{
++ struct btrfs_fs_info *fs_info = dev->fs_info;
++ struct btrfs_super_block *sb;
++ u16 csum_type;
++ int ret = 0;
++
++ /* This should be called with fs still frozen. */
++ ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags));
++
++ /* Missing dev, no need to check. */
++ if (!dev->bdev)
++ return 0;
++
++ /* Only need to check the primary super block. */
++ sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
++ if (IS_ERR(sb))
++ return PTR_ERR(sb);
++
++ /* Verify the checksum. */
++ csum_type = btrfs_super_csum_type(sb);
++ if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) {
++ btrfs_err(fs_info, "csum type changed, has %u expect %u",
++ csum_type, btrfs_super_csum_type(fs_info->super_copy));
++ ret = -EUCLEAN;
++ goto out;
++ }
++
++ if (btrfs_check_super_csum(fs_info, sb)) {
++ btrfs_err(fs_info, "csum for on-disk super block no longer matches");
++ ret = -EUCLEAN;
++ goto out;
++ }
++
++ /* Btrfs_validate_super() includes fsid check against super->fsid. */
++ ret = btrfs_validate_super(fs_info, sb, 0);
++ if (ret < 0)
++ goto out;
++
++ if (btrfs_super_generation(sb) != fs_info->last_trans_committed) {
++ btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
++ btrfs_super_generation(sb),
++ fs_info->last_trans_committed);
++ ret = -EUCLEAN;
++ goto out;
++ }
++out:
++ btrfs_release_disk_super(sb);
++ return ret;
++}
++
+ static int btrfs_unfreeze(struct super_block *sb)
+ {
+ struct btrfs_fs_info *fs_info = btrfs_sb(sb);
++ struct btrfs_device *device;
++ int ret = 0;
+
++ /*
++ * Make sure the fs is not changed by accident (like hibernation then
++ * modified by other OS).
++ * If we found anything wrong, we mark the fs error immediately.
++ *
++ * And since the fs is frozen, no one can modify the fs yet, thus
++ * we don't need to hold device_list_mutex.
++ */
++ list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
++ ret = check_dev_super(device);
++ if (ret < 0) {
++ btrfs_handle_fs_error(fs_info, ret,
++ "super block on devid %llu got modified unexpectedly",
++ device->devid);
++ break;
++ }
++ }
+ clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
++
++ /*
++ * We still return 0, to allow VFS layer to unfreeze the fs even the
++ * above checks failed. Since the fs is either fine or read-only, we're
++ * safe to continue, without causing further damage.
++ */
+ return 0;
+ }
+
+ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
+ {
+ struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
+- struct btrfs_device *dev, *first_dev = NULL;
+
+ /*
+- * Lightweight locking of the devices. We should not need
+- * device_list_mutex here as we only read the device data and the list
+- * is protected by RCU. Even if a device is deleted during the list
+- * traversals, we'll get valid data, the freeing callback will wait at
+- * least until the rcu_read_unlock.
++ * There should be always a valid pointer in latest_dev, it may be stale
++ * for a short moment in case it's being deleted but still valid until
++ * the end of RCU grace period.
+ */
+ rcu_read_lock();
+- list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
+- if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
+- continue;
+- if (!dev->name)
+- continue;
+- if (!first_dev || dev->devid < first_dev->devid)
+- first_dev = dev;
+- }
+-
+- if (first_dev)
+- seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
+- else
+- WARN_ON(1);
++ seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\");
+ rcu_read_unlock();
++
+ return 0;
+ }
+
+diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
+index 25a6f587852be..1c40e51513210 100644
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -2035,8 +2035,11 @@ int __init btrfs_init_sysfs(void)
+
+ #ifdef CONFIG_BTRFS_DEBUG
+ ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group);
+- if (ret)
+- goto out2;
++ if (ret) {
++ sysfs_unmerge_group(&btrfs_kset->kobj,
++ &btrfs_static_feature_attr_group);
++ goto out_remove_group;
++ }
+ #endif
+
+ return 0;
+diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
+index 3a4099a2bf051..3df9904972546 100644
+--- a/fs/btrfs/tests/btrfs-tests.c
++++ b/fs/btrfs/tests/btrfs-tests.c
+@@ -199,7 +199,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
+
+ void btrfs_free_dummy_root(struct btrfs_root *root)
+ {
+- if (!root)
++ if (IS_ERR_OR_NULL(root))
+ return;
+ /* Will be freed by btrfs_free_fs_roots */
+ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
+diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
+index 19ba7d5b7d8ff..08c1abd6bb0c8 100644
+--- a/fs/btrfs/tests/qgroup-tests.c
++++ b/fs/btrfs/tests/qgroup-tests.c
+@@ -225,20 +225,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
+ */
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
+ if (ret) {
+- ulist_free(old_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+
+ ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FS_TREE_OBJECTID);
+- if (ret)
++ if (ret) {
++ ulist_free(old_roots);
+ return ret;
++ }
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
+ if (ret) {
+ ulist_free(old_roots);
+- ulist_free(new_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+@@ -250,29 +250,31 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
+ return ret;
+ }
+
++ /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */
++ old_roots = NULL;
++ new_roots = NULL;
++
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, nodesize)) {
+ test_err("qgroup counts didn't match expected values");
+ return -EINVAL;
+ }
+- old_roots = NULL;
+- new_roots = NULL;
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
+ if (ret) {
+- ulist_free(old_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+
+ ret = remove_extent_item(root, nodesize, nodesize);
+- if (ret)
++ if (ret) {
++ ulist_free(old_roots);
+ return -EINVAL;
++ }
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
+ if (ret) {
+ ulist_free(old_roots);
+- ulist_free(new_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+@@ -322,20 +324,20 @@ static int test_multiple_refs(struct btrfs_root *root,
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
+ if (ret) {
+- ulist_free(old_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+
+ ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FS_TREE_OBJECTID);
+- if (ret)
++ if (ret) {
++ ulist_free(old_roots);
+ return ret;
++ }
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
+ if (ret) {
+ ulist_free(old_roots);
+- ulist_free(new_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+@@ -355,20 +357,20 @@ static int test_multiple_refs(struct btrfs_root *root,
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
+ if (ret) {
+- ulist_free(old_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+
+ ret = add_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FIRST_FREE_OBJECTID);
+- if (ret)
++ if (ret) {
++ ulist_free(old_roots);
+ return ret;
++ }
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
+ if (ret) {
+ ulist_free(old_roots);
+- ulist_free(new_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+@@ -394,20 +396,20 @@ static int test_multiple_refs(struct btrfs_root *root,
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
+ if (ret) {
+- ulist_free(old_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+
+ ret = remove_extent_ref(root, nodesize, nodesize, 0,
+ BTRFS_FIRST_FREE_OBJECTID);
+- if (ret)
++ if (ret) {
++ ulist_free(old_roots);
+ return ret;
++ }
+
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
+ if (ret) {
+ ulist_free(old_roots);
+- ulist_free(new_roots);
+ test_err("couldn't find old roots: %d", ret);
+ return ret;
+ }
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 14b9fdc8aaa9a..99cdd1d6a4bf8 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -163,6 +163,10 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
+ struct btrfs_caching_control *caching_ctl, *next;
+
+ down_write(&fs_info->commit_root_sem);
++
++ if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
++ fs_info->last_reloc_trans = trans->transid;
++
+ list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
+ dirty_list) {
+ list_del_init(&root->dirty_list);
+@@ -307,10 +311,11 @@ loop:
+ spin_unlock(&fs_info->trans_lock);
+
+ /*
+- * If we are ATTACH, we just want to catch the current transaction,
+- * and commit it. If there is no transaction, just return ENOENT.
++ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
++ * current transaction, and commit it. If there is no transaction, just
++ * return ENOENT.
+ */
+- if (type == TRANS_ATTACH)
++ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
+ return -ENOENT;
+
+ /*
+@@ -836,8 +841,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
+
+ trans = start_transaction(root, 0, TRANS_ATTACH,
+ BTRFS_RESERVE_NO_FLUSH, true);
+- if (trans == ERR_PTR(-ENOENT))
+- btrfs_wait_for_commit(root->fs_info, 0);
++ if (trans == ERR_PTR(-ENOENT)) {
++ int ret;
++
++ ret = btrfs_wait_for_commit(root->fs_info, 0);
++ if (ret)
++ return ERR_PTR(ret);
++ }
+
+ return trans;
+ }
+@@ -846,7 +856,37 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
+ static noinline void wait_for_commit(struct btrfs_transaction *commit,
+ const enum btrfs_trans_state min_state)
+ {
+- wait_event(commit->commit_wait, commit->state >= min_state);
++ struct btrfs_fs_info *fs_info = commit->fs_info;
++ u64 transid = commit->transid;
++ bool put = false;
++
++ while (1) {
++ wait_event(commit->commit_wait, commit->state >= min_state);
++ if (put)
++ btrfs_put_transaction(commit);
++
++ if (min_state < TRANS_STATE_COMPLETED)
++ break;
++
++ /*
++ * A transaction isn't really completed until all of the
++ * previous transactions are completed, but with fsync we can
++ * end up with SUPER_COMMITTED transactions before a COMPLETED
++ * transaction. Wait for those.
++ */
++
++ spin_lock(&fs_info->trans_lock);
++ commit = list_first_entry_or_null(&fs_info->trans_list,
++ struct btrfs_transaction,
++ list);
++ if (!commit || commit->transid > transid) {
++ spin_unlock(&fs_info->trans_lock);
++ break;
++ }
++ refcount_inc(&commit->use_count);
++ put = true;
++ spin_unlock(&fs_info->trans_lock);
++ }
+ }
+
+ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
+@@ -902,6 +942,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
+ }
+
+ wait_for_commit(cur_trans, TRANS_STATE_COMPLETED);
++ ret = cur_trans->aborted;
+ btrfs_put_transaction(cur_trans);
+ out:
+ return ret;
+@@ -1310,6 +1351,32 @@ again:
+ return 0;
+ }
+
++/*
++ * If we had a pending drop we need to see if there are any others left in our
++ * dead roots list, and if not clear our bit and wake any waiters.
++ */
++void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
++{
++ /*
++ * We put the drop in progress roots at the front of the list, so if the
++ * first entry doesn't have UNFINISHED_DROP set we can wake everybody
++ * up.
++ */
++ spin_lock(&fs_info->trans_lock);
++ if (!list_empty(&fs_info->dead_roots)) {
++ struct btrfs_root *root = list_first_entry(&fs_info->dead_roots,
++ struct btrfs_root,
++ root_list);
++ if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state)) {
++ spin_unlock(&fs_info->trans_lock);
++ return;
++ }
++ }
++ spin_unlock(&fs_info->trans_lock);
++
++ btrfs_wake_unfinished_drop(fs_info);
++}
++
+ /*
+ * dead roots are old snapshots that need to be deleted. This allocates
+ * a dirty root struct and adds it into the list of dead roots that need to
+@@ -1322,7 +1389,12 @@ void btrfs_add_dead_root(struct btrfs_root *root)
+ spin_lock(&fs_info->trans_lock);
+ if (list_empty(&root->root_list)) {
+ btrfs_grab_root(root);
+- list_add_tail(&root->root_list, &fs_info->dead_roots);
++
++ /* We want to process the partially complete drops first. */
++ if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state))
++ list_add(&root->root_list, &fs_info->dead_roots);
++ else
++ list_add_tail(&root->root_list, &fs_info->dead_roots);
+ }
+ spin_unlock(&fs_info->trans_lock);
+ }
+@@ -2014,16 +2086,24 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
+ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+ {
+ /*
+- * We use writeback_inodes_sb here because if we used
++ * We use try_to_writeback_inodes_sb() here because if we used
+ * btrfs_start_delalloc_roots we would deadlock with fs freeze.
+ * Currently are holding the fs freeze lock, if we do an async flush
+ * we'll do btrfs_join_transaction() and deadlock because we need to
+ * wait for the fs freeze lock. Using the direct flushing we benefit
+ * from already being in a transaction and our join_transaction doesn't
+ * have to re-take the fs freeze lock.
++ *
++ * Note that try_to_writeback_inodes_sb() will only trigger writeback
++ * if it can read lock sb->s_umount. It will always be able to lock it,
++ * except when the filesystem is being unmounted or being frozen, but in
++ * those cases sync_filesystem() is called, which results in calling
++ * writeback_inodes_sb() while holding a write lock on sb->s_umount.
++ * Note that we don't call writeback_inodes_sb() directly, because it
++ * will emit a warning if sb->s_umount is not locked.
+ */
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+- writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
++ try_to_writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+ return 0;
+ }
+
+@@ -2033,6 +2113,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+ }
+
++/*
++ * Add a pending snapshot associated with the given transaction handle to the
++ * respective handle. This must be called after the transaction commit started
++ * and while holding fs_info->trans_lock.
++ * This serves to guarantee a caller of btrfs_commit_transaction() that it can
++ * safely free the pending snapshot pointer in case btrfs_commit_transaction()
++ * returns an error.
++ */
++static void add_pending_snapshot(struct btrfs_trans_handle *trans)
++{
++ struct btrfs_transaction *cur_trans = trans->transaction;
++
++ if (!trans->pending_snapshot)
++ return;
++
++ lockdep_assert_held(&trans->fs_info->trans_lock);
++ ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
++
++ list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
++}
++
+ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
+ {
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+@@ -2106,6 +2207,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
+ if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
+ enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
+
++ add_pending_snapshot(trans);
++
+ spin_unlock(&fs_info->trans_lock);
+ refcount_inc(&cur_trans->use_count);
+
+@@ -2196,6 +2299,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
+ * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
+ */
+ spin_lock(&fs_info->trans_lock);
++ add_pending_snapshot(trans);
+ cur_trans->state = TRANS_STATE_COMMIT_DOING;
+ spin_unlock(&fs_info->trans_lock);
+ wait_event(cur_trans->writer_wait,
+diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
+index ba45065f94511..0ded32bbd001e 100644
+--- a/fs/btrfs/transaction.h
++++ b/fs/btrfs/transaction.h
+@@ -123,6 +123,8 @@ struct btrfs_trans_handle {
+ struct btrfs_transaction *transaction;
+ struct btrfs_block_rsv *block_rsv;
+ struct btrfs_block_rsv *orig_rsv;
++ /* Set by a task that wants to create a snapshot. */
++ struct btrfs_pending_snapshot *pending_snapshot;
+ refcount_t use_count;
+ unsigned int type;
+ /*
+@@ -215,6 +217,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
+
+ void btrfs_add_dead_root(struct btrfs_root *root);
+ int btrfs_defrag_root(struct btrfs_root *root);
++void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
+ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
+ int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
+ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
+diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
+index 7733e8ac0a698..bd71c7369794a 100644
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -442,6 +442,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
+ btrfs_item_key_to_cpu(leaf, &item_key, slot);
+ is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
+
++ /*
++ * Bad rootid for reloc trees.
++ *
++ * Reloc trees are only for subvolume trees, other trees only need
++ * to be COWed to be relocated.
++ */
++ if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
++ !is_fstree(key->offset))) {
++ generic_err(leaf, slot,
++ "invalid reloc tree for root %lld, root id is not a subvolume tree",
++ key->offset);
++ return -EUCLEAN;
++ }
++
+ /* No such tree id */
+ if (unlikely(key->objectid == 0)) {
+ if (is_root_item)
+@@ -965,6 +979,7 @@ static int check_dev_item(struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+ {
+ struct btrfs_dev_item *ditem;
++ const u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+ if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
+ dev_item_err(leaf, slot,
+@@ -972,6 +987,13 @@ static int check_dev_item(struct extent_buffer *leaf,
+ key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
+ return -EUCLEAN;
+ }
++
++ if (unlikely(item_size != sizeof(*ditem))) {
++ dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
++ item_size, sizeof(*ditem));
++ return -EUCLEAN;
++ }
++
+ ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
+ if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
+ dev_item_err(leaf, slot,
+@@ -1007,6 +1029,7 @@ static int check_inode_item(struct extent_buffer *leaf,
+ struct btrfs_inode_item *iitem;
+ u64 super_gen = btrfs_super_generation(fs_info->super_copy);
+ u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
++ const u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u32 mode;
+ int ret;
+ u32 flags;
+@@ -1016,6 +1039,12 @@ static int check_inode_item(struct extent_buffer *leaf,
+ if (unlikely(ret < 0))
+ return ret;
+
++ if (unlikely(item_size != sizeof(*iitem))) {
++ generic_err(leaf, slot, "invalid item size: has %u expect %zu",
++ item_size, sizeof(*iitem));
++ return -EUCLEAN;
++ }
++
+ iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
+
+ /* Here we use super block generation + 1 to handle log tree */
+@@ -1201,7 +1230,8 @@ static void extent_err(const struct extent_buffer *eb, int slot,
+ }
+
+ static int check_extent_item(struct extent_buffer *leaf,
+- struct btrfs_key *key, int slot)
++ struct btrfs_key *key, int slot,
++ struct btrfs_key *prev_key)
+ {
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ struct btrfs_extent_item *ei;
+@@ -1421,6 +1451,26 @@ static int check_extent_item(struct extent_buffer *leaf,
+ total_refs, inline_refs);
+ return -EUCLEAN;
+ }
++
++ if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) ||
++ (prev_key->type == BTRFS_METADATA_ITEM_KEY)) {
++ u64 prev_end = prev_key->objectid;
++
++ if (prev_key->type == BTRFS_METADATA_ITEM_KEY)
++ prev_end += fs_info->nodesize;
++ else
++ prev_end += prev_key->offset;
++
++ if (unlikely(prev_end > key->objectid)) {
++ extent_err(leaf, slot,
++ "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]",
++ prev_key->objectid, prev_key->type,
++ prev_key->offset, key->objectid, key->type,
++ key->offset);
++ return -EUCLEAN;
++ }
++ }
++
+ return 0;
+ }
+
+@@ -1589,7 +1639,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
+ break;
+ case BTRFS_EXTENT_ITEM_KEY:
+ case BTRFS_METADATA_ITEM_KEY:
+- ret = check_extent_item(leaf, key, slot);
++ ret = check_extent_item(leaf, key, slot, prev_key);
+ break;
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ case BTRFS_SHARED_DATA_REF_KEY:
+diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
+index 7c45d960b53c6..259a3b5f93032 100644
+--- a/fs/btrfs/tree-defrag.c
++++ b/fs/btrfs/tree-defrag.c
+@@ -39,8 +39,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+ goto out;
+
+ path = btrfs_alloc_path();
+- if (!path)
+- return -ENOMEM;
++ if (!path) {
++ ret = -ENOMEM;
++ goto out;
++ }
+
+ level = btrfs_header_level(root->node);
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index b415c5ec03ea0..7c0c6fc0c536b 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -761,7 +761,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
+ ins.objectid, ins.offset, 0);
+ btrfs_init_data_ref(&ref,
+ root->root_key.objectid,
+- key->objectid, offset);
++ key->objectid, offset, 0, false);
+ ret = btrfs_inc_extent_ref(trans, &ref);
+ if (ret)
+ goto out;
+@@ -884,6 +884,26 @@ out:
+ return ret;
+ }
+
++static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
++ struct btrfs_inode *dir,
++ struct btrfs_inode *inode,
++ const char *name,
++ int name_len)
++{
++ int ret;
++
++ ret = btrfs_unlink_inode(trans, dir, inode, name, name_len);
++ if (ret)
++ return ret;
++ /*
++ * Whenever we need to check if a name exists or not, we check the
++ * fs/subvolume tree. So after an unlink we must run delayed items, so
++ * that future checks for a name during log replay see that the name
++ * does not exists anymore.
++ */
++ return btrfs_run_delayed_items(trans);
++}
++
+ /*
+ * when cleaning up conflicts between the directory names in the
+ * subvolume, directory names in the log and directory names in the
+@@ -926,12 +946,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+ if (ret)
+ goto out;
+
+- ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name,
++ ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name,
+ name_len);
+- if (ret)
+- goto out;
+- else
+- ret = btrfs_run_delayed_items(trans);
+ out:
+ kfree(name);
+ iput(inode);
+@@ -1091,12 +1107,9 @@ again:
+ inc_nlink(&inode->vfs_inode);
+ btrfs_release_path(path);
+
+- ret = btrfs_unlink_inode(trans, root, dir, inode,
++ ret = unlink_inode_for_log_replay(trans, dir, inode,
+ victim_name, victim_name_len);
+ kfree(victim_name);
+- if (ret)
+- return ret;
+- ret = btrfs_run_delayed_items(trans);
+ if (ret)
+ return ret;
+ *search_done = 1;
+@@ -1119,7 +1132,9 @@ again:
+ extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
+ inode_objectid, parent_objectid, 0,
+ 0);
+- if (!IS_ERR_OR_NULL(extref)) {
++ if (IS_ERR(extref)) {
++ return PTR_ERR(extref);
++ } else if (extref) {
+ u32 item_size;
+ u32 cur_offset = 0;
+ unsigned long base;
+@@ -1153,6 +1168,7 @@ again:
+ parent_objectid, victim_name,
+ victim_name_len);
+ if (ret < 0) {
++ kfree(victim_name);
+ return ret;
+ } else if (!ret) {
+ ret = -ENOENT;
+@@ -1162,14 +1178,11 @@ again:
+ inc_nlink(&inode->vfs_inode);
+ btrfs_release_path(path);
+
+- ret = btrfs_unlink_inode(trans, root,
++ ret = unlink_inode_for_log_replay(trans,
+ BTRFS_I(victim_parent),
+ inode,
+ victim_name,
+ victim_name_len);
+- if (!ret)
+- ret = btrfs_run_delayed_items(
+- trans);
+ }
+ iput(victim_parent);
+ kfree(victim_name);
+@@ -1324,7 +1337,7 @@ again:
+ kfree(name);
+ goto out;
+ }
+- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
++ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir),
+ inode, name, namelen);
+ kfree(name);
+ iput(dir);
+@@ -1422,8 +1435,8 @@ static int add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ ret = -ENOENT;
+ goto out;
+ }
+- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(other_inode),
+- name, namelen);
++ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(other_inode),
++ name, namelen);
+ if (ret)
+ goto out;
+ /*
+@@ -1431,11 +1444,7 @@ static int add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ * on the inode will not free it. We will fixup the link count later.
+ */
+ if (other_inode->i_nlink == 0)
+- inc_nlink(other_inode);
+-
+- ret = btrfs_run_delayed_items(trans);
+- if (ret)
+- goto out;
++ set_nlink(other_inode, 1);
+ add_link:
+ ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+ name, namelen, 0, ref_index);
+@@ -1568,7 +1577,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+ ret = btrfs_inode_ref_exists(inode, dir, key->type,
+ name, namelen);
+ if (ret > 0) {
+- ret = btrfs_unlink_inode(trans, root,
++ ret = unlink_inode_for_log_replay(trans,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
+ name, namelen);
+@@ -1578,7 +1587,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+ * free it. We will fixup the link count later.
+ */
+ if (!ret && inode->i_nlink == 0)
+- inc_nlink(inode);
++ set_nlink(inode, 1);
+ }
+ if (ret < 0)
+ goto out;
+@@ -2176,7 +2185,7 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
+ */
+ static noinline int find_dir_range(struct btrfs_root *root,
+ struct btrfs_path *path,
+- u64 dirid, int key_type,
++ u64 dirid,
+ u64 *start_ret, u64 *end_ret)
+ {
+ struct btrfs_key key;
+@@ -2189,7 +2198,7 @@ static noinline int find_dir_range(struct btrfs_root *root,
+ return 1;
+
+ key.objectid = dirid;
+- key.type = key_type;
++ key.type = BTRFS_DIR_LOG_INDEX_KEY;
+ key.offset = *start_ret;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+@@ -2203,7 +2212,7 @@ static noinline int find_dir_range(struct btrfs_root *root,
+ if (ret != 0)
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+- if (key.type != key_type || key.objectid != dirid) {
++ if (key.type != BTRFS_DIR_LOG_INDEX_KEY || key.objectid != dirid) {
+ ret = 1;
+ goto next;
+ }
+@@ -2230,7 +2239,7 @@ next:
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+- if (key.type != key_type || key.objectid != dirid) {
++ if (key.type != BTRFS_DIR_LOG_INDEX_KEY || key.objectid != dirid) {
+ ret = 1;
+ goto out;
+ }
+@@ -2261,95 +2270,75 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
+ int ret;
+ struct extent_buffer *eb;
+ int slot;
+- u32 item_size;
+ struct btrfs_dir_item *di;
+- struct btrfs_dir_item *log_di;
+ int name_len;
+- unsigned long ptr;
+- unsigned long ptr_end;
+ char *name;
+- struct inode *inode;
++ struct inode *inode = NULL;
+ struct btrfs_key location;
+
+-again:
++ /*
++ * Currenly we only log dir index keys. Even if we replay a log created
++ * by an older kernel that logged both dir index and dir item keys, all
++ * we need to do is process the dir index keys, we (and our caller) can
++ * safely ignore dir item keys (key type BTRFS_DIR_ITEM_KEY).
++ */
++ ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY);
++
+ eb = path->nodes[0];
+ slot = path->slots[0];
+- item_size = btrfs_item_size_nr(eb, slot);
+- ptr = btrfs_item_ptr_offset(eb, slot);
+- ptr_end = ptr + item_size;
+- while (ptr < ptr_end) {
+- di = (struct btrfs_dir_item *)ptr;
+- name_len = btrfs_dir_name_len(eb, di);
+- name = kmalloc(name_len, GFP_NOFS);
+- if (!name) {
+- ret = -ENOMEM;
+- goto out;
+- }
+- read_extent_buffer(eb, name, (unsigned long)(di + 1),
+- name_len);
+- log_di = NULL;
+- if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
+- log_di = btrfs_lookup_dir_item(trans, log, log_path,
+- dir_key->objectid,
+- name, name_len, 0);
+- } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
+- log_di = btrfs_lookup_dir_index_item(trans, log,
+- log_path,
+- dir_key->objectid,
+- dir_key->offset,
+- name, name_len, 0);
+- }
+- if (!log_di) {
+- btrfs_dir_item_key_to_cpu(eb, di, &location);
+- btrfs_release_path(path);
+- btrfs_release_path(log_path);
+- inode = read_one_inode(root, location.objectid);
+- if (!inode) {
+- kfree(name);
+- return -EIO;
+- }
++ di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
++ name_len = btrfs_dir_name_len(eb, di);
++ name = kmalloc(name_len, GFP_NOFS);
++ if (!name) {
++ ret = -ENOMEM;
++ goto out;
++ }
+
+- ret = link_to_fixup_dir(trans, root,
+- path, location.objectid);
+- if (ret) {
+- kfree(name);
+- iput(inode);
+- goto out;
+- }
++ read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len);
+
+- inc_nlink(inode);
+- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+- BTRFS_I(inode), name, name_len);
+- if (!ret)
+- ret = btrfs_run_delayed_items(trans);
+- kfree(name);
+- iput(inode);
+- if (ret)
+- goto out;
++ if (log) {
++ struct btrfs_dir_item *log_di;
+
+- /* there might still be more names under this key
+- * check and repeat if required
+- */
+- ret = btrfs_search_slot(NULL, root, dir_key, path,
+- 0, 0);
+- if (ret == 0)
+- goto again;
++ log_di = btrfs_lookup_dir_index_item(trans, log, log_path,
++ dir_key->objectid,
++ dir_key->offset,
++ name, name_len, 0);
++ if (IS_ERR(log_di)) {
++ ret = PTR_ERR(log_di);
++ goto out;
++ } else if (log_di) {
++ /* The dentry exists in the log, we have nothing to do. */
+ ret = 0;
+ goto out;
+- } else if (IS_ERR(log_di)) {
+- kfree(name);
+- return PTR_ERR(log_di);
+ }
+- btrfs_release_path(log_path);
+- kfree(name);
++ }
+
+- ptr = (unsigned long)(di + 1);
+- ptr += name_len;
++ btrfs_dir_item_key_to_cpu(eb, di, &location);
++ btrfs_release_path(path);
++ btrfs_release_path(log_path);
++ inode = read_one_inode(root, location.objectid);
++ if (!inode) {
++ ret = -EIO;
++ goto out;
+ }
+- ret = 0;
++
++ ret = link_to_fixup_dir(trans, root, path, location.objectid);
++ if (ret)
++ goto out;
++
++ inc_nlink(inode);
++ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode),
++ name, name_len);
++ /*
++ * Unlike dir item keys, dir index keys can only have one name (entry) in
++ * them, as there are no key collisions since each key has a unique offset
++ * (an index number), so we're done.
++ */
+ out:
+ btrfs_release_path(path);
+ btrfs_release_path(log_path);
++ kfree(name);
++ iput(inode);
+ return ret;
+ }
+
+@@ -2469,7 +2458,6 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
+ {
+ u64 range_start;
+ u64 range_end;
+- int key_type = BTRFS_DIR_LOG_ITEM_KEY;
+ int ret = 0;
+ struct btrfs_key dir_key;
+ struct btrfs_key found_key;
+@@ -2477,7 +2465,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
+ struct inode *dir;
+
+ dir_key.objectid = dirid;
+- dir_key.type = BTRFS_DIR_ITEM_KEY;
++ dir_key.type = BTRFS_DIR_INDEX_KEY;
+ log_path = btrfs_alloc_path();
+ if (!log_path)
+ return -ENOMEM;
+@@ -2491,16 +2479,18 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
+ btrfs_free_path(log_path);
+ return 0;
+ }
+-again:
++
+ range_start = 0;
+ range_end = 0;
+ while (1) {
+ if (del_all)
+ range_end = (u64)-1;
+ else {
+- ret = find_dir_range(log, path, dirid, key_type,
++ ret = find_dir_range(log, path, dirid,
+ &range_start, &range_end);
+- if (ret != 0)
++ if (ret < 0)
++ goto out;
++ else if (ret > 0)
+ break;
+ }
+
+@@ -2523,8 +2513,10 @@ again:
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ if (found_key.objectid != dirid ||
+- found_key.type != dir_key.type)
+- goto next_type;
++ found_key.type != dir_key.type) {
++ ret = 0;
++ goto out;
++ }
+
+ if (found_key.offset > range_end)
+ break;
+@@ -2543,15 +2535,7 @@ again:
+ break;
+ range_start = range_end + 1;
+ }
+-
+-next_type:
+ ret = 0;
+- if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
+- key_type = BTRFS_DIR_LOG_INDEX_KEY;
+- dir_key.type = BTRFS_DIR_INDEX_KEY;
+- btrfs_release_path(path);
+- goto again;
+- }
+ out:
+ btrfs_release_path(path);
+ btrfs_free_path(log_path);
+@@ -2877,6 +2861,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
+ path->nodes[*level]->len);
+ if (ret)
+ return ret;
++ btrfs_redirty_list_add(trans->transaction,
++ next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
+@@ -2957,6 +2943,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
+ next->start, next->len);
+ if (ret)
+ goto out;
++ btrfs_redirty_list_add(trans->transaction, next);
+ } else {
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+ clear_extent_buffer_dirty(next);
+@@ -3154,7 +3141,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+ ret = 0;
+ if (ret) {
+ blk_finish_plug(&plug);
+- btrfs_abort_transaction(trans, ret);
+ btrfs_set_log_full_commit(trans);
+ mutex_unlock(&root->log_mutex);
+ goto out;
+@@ -3192,6 +3178,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+ ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
+ if (ret) {
+ mutex_unlock(&fs_info->tree_root->log_mutex);
++ blk_finish_plug(&plug);
+ goto out;
+ }
+ }
+@@ -3285,7 +3272,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+ goto out_wake_log_root;
+ } else if (ret) {
+ btrfs_set_log_full_commit(trans);
+- btrfs_abort_transaction(trans, ret);
+ mutex_unlock(&log_root_tree->log_mutex);
+ goto out_wake_log_root;
+ }
+@@ -3399,6 +3385,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
+ if (log->node) {
+ ret = walk_log_tree(trans, log, &wc);
+ if (ret) {
++ /*
++ * We weren't able to traverse the entire log tree, the
++ * typical scenario is getting an -EIO when reading an
++ * extent buffer of the tree, due to a previous writeback
++ * failure of it.
++ */
++ set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
++ &log->fs_info->fs_state);
++
++ /*
++ * Some extent buffers of the log tree may still be dirty
++ * and not yet written back to storage, because we may
++ * have updates to a log tree without syncing a log tree,
++ * such as during rename and link operations. So flush
++ * them out and wait for their writeback to complete, so
++ * that we properly cleanup their state and pages.
++ */
++ btrfs_write_marked_extents(log->fs_info,
++ &log->dirty_log_pages,
++ EXTENT_DIRTY | EXTENT_NEW);
++ btrfs_wait_tree_log_extents(log,
++ EXTENT_DIRTY | EXTENT_NEW);
++
+ if (trans)
+ btrfs_abort_transaction(trans, ret);
+ else
+@@ -3410,8 +3419,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
+ EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
+ extent_io_tree_release(&log->log_csum_range);
+
+- if (trans && log->node)
+- btrfs_redirty_list_add(trans->transaction, log->node);
+ btrfs_put_root(log);
+ }
+
+@@ -4419,7 +4426,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
+
+ /*
+ * Log all prealloc extents beyond the inode's i_size to make sure we do not
+- * lose them after doing a fast fsync and replaying the log. We scan the
++ * lose them after doing a full/fast fsync and replaying the log. We scan the
+ * subvolume's root instead of iterating the inode's extent map tree because
+ * otherwise we can log incorrect extent items based on extent map conversion.
+ * That can happen due to the fact that extent maps are merged when they
+@@ -5204,6 +5211,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
+ struct btrfs_log_ctx *ctx,
+ bool *need_log_inode_item)
+ {
++ const u64 i_size = i_size_read(&inode->vfs_inode);
+ struct btrfs_root *root = inode->root;
+ int ins_start_slot = 0;
+ int ins_nr = 0;
+@@ -5224,13 +5232,21 @@ again:
+ if (min_key->type > max_key->type)
+ break;
+
+- if (min_key->type == BTRFS_INODE_ITEM_KEY)
++ if (min_key->type == BTRFS_INODE_ITEM_KEY) {
+ *need_log_inode_item = false;
+-
+- if ((min_key->type == BTRFS_INODE_REF_KEY ||
+- min_key->type == BTRFS_INODE_EXTREF_KEY) &&
+- inode->generation == trans->transid &&
+- !recursive_logging) {
++ } else if (min_key->type == BTRFS_EXTENT_DATA_KEY &&
++ min_key->offset >= i_size) {
++ /*
++ * Extents at and beyond eof are logged with
++ * btrfs_log_prealloc_extents().
++ * Only regular files have BTRFS_EXTENT_DATA_KEY keys,
++ * and no keys greater than that, so bail out.
++ */
++ break;
++ } else if ((min_key->type == BTRFS_INODE_REF_KEY ||
++ min_key->type == BTRFS_INODE_EXTREF_KEY) &&
++ inode->generation == trans->transid &&
++ !recursive_logging) {
+ u64 other_ino = 0;
+ u64 other_parent = 0;
+
+@@ -5261,10 +5277,8 @@ again:
+ btrfs_release_path(path);
+ goto next_key;
+ }
+- }
+-
+- /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+- if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
++ } else if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
++ /* Skip xattrs, logged later with btrfs_log_all_xattrs() */
+ if (ins_nr == 0)
+ goto next_slot;
+ ret = copy_items(trans, inode, dst_path, path,
+@@ -5317,9 +5331,21 @@ next_key:
+ break;
+ }
+ }
+- if (ins_nr)
++ if (ins_nr) {
+ ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
+ ins_nr, inode_only, logged_isize);
++ if (ret)
++ return ret;
++ }
++
++ if (inode_only == LOG_INODE_ALL && S_ISREG(inode->vfs_inode.i_mode)) {
++ /*
++ * Release the path because otherwise we might attempt to double
++ * lock the same leaf with btrfs_log_prealloc_extents() below.
++ */
++ btrfs_release_path(path);
++ ret = btrfs_log_prealloc_extents(trans, inode, dst_path);
++ }
+
+ return ret;
+ }
+@@ -5418,6 +5444,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+ mutex_lock(&inode->log_mutex);
+ }
+
++ /*
++ * For symlinks, we must always log their content, which is stored in an
++ * inline extent, otherwise we could end up with an empty symlink after
++ * log replay, which is invalid on linux (symlink(2) returns -ENOENT if
++ * one attempts to create an empty symlink).
++ * We don't need to worry about flushing delalloc, because when we create
++ * the inline extent when the symlink is created (we never have delalloc
++ * for symlinks).
++ */
++ if (S_ISLNK(inode->vfs_inode.i_mode))
++ inode_only = LOG_INODE_ALL;
++
+ /*
+ * This is for cases where logging a directory could result in losing a
+ * a file after replaying the log. For example, if we move a file from a
+@@ -5788,7 +5826,7 @@ process_leaf:
+ }
+
+ ctx->log_new_dentries = false;
+- if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
++ if (type == BTRFS_FT_DIR)
+ log_mode = LOG_INODE_ALL;
+ ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
+ log_mode, ctx);
+@@ -6549,14 +6587,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ mutex_unlock(&dir->log_mutex);
+ }
+
+-/*
+- * Call this after adding a new name for a file and it will properly
+- * update the log to reflect the new name.
++/**
++ * Update the log after adding a new name for an inode.
++ *
++ * @trans: Transaction handle.
++ * @old_dentry: The dentry associated with the old name and the old
++ * parent directory.
++ * @old_dir: The inode of the previous parent directory for the case
++ * of a rename. For a link operation, it must be NULL.
++ * @parent: The dentry associated with the directory under which the
++ * new name is located.
++ *
++ * Call this after adding a new name for an inode, as a result of a link or
++ * rename operation, and it will properly update the log to reflect the new name.
+ */
+ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+- struct btrfs_inode *inode, struct btrfs_inode *old_dir,
++ struct dentry *old_dentry, struct btrfs_inode *old_dir,
+ struct dentry *parent)
+ {
++ struct btrfs_inode *inode = BTRFS_I(d_inode(old_dentry));
+ struct btrfs_log_ctx ctx;
+
+ /*
+diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
+index 731bd9c029f55..7ffcac8a89905 100644
+--- a/fs/btrfs/tree-log.h
++++ b/fs/btrfs/tree-log.h
+@@ -84,7 +84,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
+ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *dir);
+ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+- struct btrfs_inode *inode, struct btrfs_inode *old_dir,
++ struct dentry *old_dentry, struct btrfs_inode *old_dir,
+ struct dentry *parent);
+
+ #endif
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 2ec3b8ac8fa35..0e9236a745b81 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -14,6 +14,7 @@
+ #include <linux/semaphore.h>
+ #include <linux/uuid.h>
+ #include <linux/list_sort.h>
++#include <linux/namei.h>
+ #include "misc.h"
+ #include "ctree.h"
+ #include "extent_map.h"
+@@ -250,7 +251,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ enum btrfs_map_op op,
+ u64 logical, u64 *length,
+- struct btrfs_bio **bbio_ret,
++ struct btrfs_io_context **bioc_ret,
+ int mirror_num, int need_raid_map);
+
+ /*
+@@ -408,6 +409,7 @@ void btrfs_free_device(struct btrfs_device *device)
+ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
+ {
+ struct btrfs_device *device;
++
+ WARN_ON(fs_devices->opened);
+ while (!list_empty(&fs_devices->devices)) {
+ device = list_entry(fs_devices->devices.next,
+@@ -529,15 +531,48 @@ error:
+ return ret;
+ }
+
+-static bool device_path_matched(const char *path, struct btrfs_device *device)
++/*
++ * Check if the device in the path matches the device in the given struct device.
++ *
++ * Returns:
++ * true If it is the same device.
++ * false If it is not the same device or on error.
++ */
++static bool device_matched(const struct btrfs_device *device, const char *path)
+ {
+- int found;
++ char *device_name;
++ dev_t dev_old;
++ dev_t dev_new;
++ int ret;
++
++ /*
++ * If we are looking for a device with the matching dev_t, then skip
++ * device without a name (a missing device).
++ */
++ if (!device->name)
++ return false;
++
++ device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
++ if (!device_name)
++ return false;
+
+ rcu_read_lock();
+- found = strcmp(rcu_str_deref(device->name), path);
++ scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name));
+ rcu_read_unlock();
+
+- return found == 0;
++ ret = lookup_bdev(device_name, &dev_old);
++ kfree(device_name);
++ if (ret)
++ return false;
++
++ ret = lookup_bdev(path, &dev_new);
++ if (ret)
++ return false;
++
++ if (dev_old == dev_new)
++ return true;
++
++ return false;
+ }
+
+ /*
+@@ -570,9 +605,7 @@ static int btrfs_free_stale_devices(const char *path,
+ &fs_devices->devices, dev_list) {
+ if (skip_device && skip_device == device)
+ continue;
+- if (path && !device->name)
+- continue;
+- if (path && !device_path_matched(path, device))
++ if (path && !device_matched(device, path))
+ continue;
+ if (fs_devices->opened) {
+ /* for an already deleted device return 0 */
+@@ -812,9 +845,13 @@ static noinline struct btrfs_device *device_list_add(const char *path,
+
+ device = NULL;
+ } else {
++ struct btrfs_dev_lookup_args args = {
++ .devid = devid,
++ .uuid = disk_super->dev_item.uuid,
++ };
++
+ mutex_lock(&fs_devices->device_list_mutex);
+- device = btrfs_find_device(fs_devices, devid,
+- disk_super->dev_item.uuid, NULL);
++ device = btrfs_find_device(fs_devices, &args);
+
+ /*
+ * If this disk has been pulled into an fs devices created by
+@@ -919,6 +956,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
+ /*
+ * We are going to replace the device path for a given devid,
+ * make sure it's the same device if the device is mounted
++ *
++ * NOTE: the device->fs_info may not be reliable here so pass
++ * in a NULL to message helpers instead. This avoids a possible
++ * use-after-free when the fs_info and fs_info->sb are already
++ * torn down.
+ */
+ if (device->bdev) {
+ int error;
+@@ -932,12 +974,6 @@ static noinline struct btrfs_device *device_list_add(const char *path,
+
+ if (device->bdev->bd_dev != path_dev) {
+ mutex_unlock(&fs_devices->device_list_mutex);
+- /*
+- * device->fs_info may not be reliable here, so
+- * pass in a NULL instead. This avoids a
+- * possible use-after-free when the fs_info and
+- * fs_info->sb are already torn down.
+- */
+ btrfs_warn_in_rcu(NULL,
+ "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
+ path, devid, found_transid,
+@@ -945,7 +981,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
+ task_pid_nr(current));
+ return ERR_PTR(-EEXIST);
+ }
+- btrfs_info_in_rcu(device->fs_info,
++ btrfs_info_in_rcu(NULL,
+ "devid %llu device path %s changed to %s scanned by %s (%d)",
+ devid, rcu_str_deref(device->name),
+ path, current->comm,
+@@ -1091,7 +1127,7 @@ void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices)
+ list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
+ __btrfs_free_extra_devids(seed_dev, &latest_dev);
+
+- fs_devices->latest_bdev = latest_dev->bdev;
++ fs_devices->latest_dev = latest_dev;
+
+ mutex_unlock(&uuid_mutex);
+ }
+@@ -1122,8 +1158,10 @@ static void btrfs_close_one_device(struct btrfs_device *device)
+ if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+ clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+
+- if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
++ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
++ clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
+ fs_devices->missing_devices--;
++ }
+
+ btrfs_close_bdev(device);
+ if (device->bdev) {
+@@ -1184,9 +1222,22 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+
+ mutex_lock(&uuid_mutex);
+ close_fs_devices(fs_devices);
+- if (!fs_devices->opened)
++ if (!fs_devices->opened) {
+ list_splice_init(&fs_devices->seed_list, &list);
+
++ /*
++ * If the struct btrfs_fs_devices is not assembled with any
++ * other device, it can be re-initialized during the next mount
++ * without the needing device-scan step. Therefore, it can be
++ * fully freed.
++ */
++ if (fs_devices->num_devices == 1) {
++ list_del(&fs_devices->fs_list);
++ free_fs_devices(fs_devices);
++ }
++ }
++
++
+ list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) {
+ close_fs_devices(fs_devices);
+ list_del(&fs_devices->seed_list);
+@@ -1222,7 +1273,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
+ return -EINVAL;
+
+ fs_devices->opened = 1;
+- fs_devices->latest_bdev = latest_dev->bdev;
++ fs_devices->latest_dev = latest_dev;
+ fs_devices->total_rw_bytes = 0;
+ fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
+ fs_devices->read_policy = BTRFS_READ_POLICY_PID;
+@@ -1355,16 +1406,27 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
+ * So, we need to add a special mount option to scan for
+ * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+ */
+- flags |= FMODE_EXCL;
+
++ /*
++ * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may
++ * initiate the device scan which may race with the user's mount
++ * or mkfs command, resulting in failure.
++ * Since the device scan is solely for reading purposes, there is
++ * no need for FMODE_EXCL. Additionally, the devices are read again
++ * during the mount process. It is ok to get some inconsistent
++ * values temporarily, as the device paths of the fsid are the only
++ * required information for assembling the volume.
++ */
+ bdev = blkdev_get_by_path(path, flags, holder);
+ if (IS_ERR(bdev))
+ return ERR_CAST(bdev);
+
+ bytenr_orig = btrfs_sb_offset(0);
+ ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
+- if (ret)
+- return ERR_PTR(ret);
++ if (ret) {
++ device = ERR_PTR(ret);
++ goto error_bdev_put;
++ }
+
+ disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
+ if (IS_ERR(disk_super)) {
+@@ -1607,7 +1669,7 @@ again:
+ if (ret < 0)
+ goto out;
+
+- while (1) {
++ while (search_start < search_end) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+@@ -1630,6 +1692,9 @@ again:
+ if (key.type != BTRFS_DEV_EXTENT_KEY)
+ goto next;
+
++ if (key.offset > search_end)
++ break;
++
+ if (key.offset > search_start) {
+ hole_size = key.offset - search_start;
+ dev_extent_hole_check(device, &search_start, &hole_size,
+@@ -1690,6 +1755,7 @@ next:
+ else
+ ret = 0;
+
++ ASSERT(max_hole_start + max_hole_size <= search_end);
+ out:
+ btrfs_free_path(path);
+ *start = max_hole_start;
+@@ -1843,8 +1909,10 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = device->devid;
+
++ btrfs_reserve_chunk_metadata(trans, true);
+ ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
+ &key, sizeof(*dev_item));
++ btrfs_trans_release_chunk_metadata(trans);
+ if (ret)
+ goto out;
+
+@@ -1882,60 +1950,52 @@ out:
+ /*
+ * Function to update ctime/mtime for a given device path.
+ * Mainly used for ctime/mtime based probe like libblkid.
++ *
++ * We don't care about errors here, this is just to be kind to userspace.
+ */
+-static void update_dev_time(struct block_device *bdev)
++static void update_dev_time(const char *device_path)
+ {
+- struct inode *inode = bdev->bd_inode;
++ struct path path;
+ struct timespec64 now;
++ int ret;
+
+- /* Shouldn't happen but just in case. */
+- if (!inode)
++ ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
++ if (ret)
+ return;
+
+- now = current_time(inode);
+- generic_update_time(inode, &now, S_MTIME | S_CTIME);
++ now = current_time(d_inode(path.dentry));
++ inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME);
++ path_put(&path);
+ }
+
+-static int btrfs_rm_dev_item(struct btrfs_device *device)
++static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
++ struct btrfs_device *device)
+ {
+ struct btrfs_root *root = device->fs_info->chunk_root;
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+- struct btrfs_trans_handle *trans;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+- trans = btrfs_start_transaction(root, 0);
+- if (IS_ERR(trans)) {
+- btrfs_free_path(path);
+- return PTR_ERR(trans);
+- }
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = device->devid;
+
++ btrfs_reserve_chunk_metadata(trans, false);
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
++ btrfs_trans_release_chunk_metadata(trans);
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
+- btrfs_abort_transaction(trans, ret);
+- btrfs_end_transaction(trans);
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, root, path);
+- if (ret) {
+- btrfs_abort_transaction(trans, ret);
+- btrfs_end_transaction(trans);
+- }
+-
+ out:
+ btrfs_free_path(path);
+- if (!ret)
+- ret = btrfs_commit_transaction(trans);
+ return ret;
+ }
+
+@@ -1986,7 +2046,7 @@ static struct btrfs_device * btrfs_find_next_active_device(
+ }
+
+ /*
+- * Helper function to check if the given device is part of s_bdev / latest_bdev
++ * Helper function to check if the given device is part of s_bdev / latest_dev
+ * and replace it with the provided or the next active device, in the context
+ * where this function called, there should be always be another device (or
+ * this_dev) which is active.
+@@ -2005,8 +2065,8 @@ void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
+ (fs_info->sb->s_bdev == device->bdev))
+ fs_info->sb->s_bdev = next_device->bdev;
+
+- if (fs_info->fs_devices->latest_bdev == device->bdev)
+- fs_info->fs_devices->latest_bdev = next_device->bdev;
++ if (fs_info->fs_devices->latest_dev->bdev == device->bdev)
++ fs_info->fs_devices->latest_dev = next_device;
+ }
+
+ /*
+@@ -2041,7 +2101,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+ struct page *page;
+ int ret;
+
+- disk_super = btrfs_read_dev_one_super(bdev, copy_num);
++ disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
+ if (IS_ERR(disk_super))
+ continue;
+
+@@ -2069,55 +2129,53 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+ btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+
+ /* Update ctime/mtime for device path for libblkid */
+- update_dev_time(bdev);
++ update_dev_time(device_path);
+ }
+
+-int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
+- u64 devid, struct block_device **bdev, fmode_t *mode)
++int btrfs_rm_device(struct btrfs_fs_info *fs_info,
++ struct btrfs_dev_lookup_args *args,
++ struct block_device **bdev, fmode_t *mode)
+ {
++ struct btrfs_trans_handle *trans;
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *cur_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ u64 num_devices;
+ int ret = 0;
+
+- mutex_lock(&uuid_mutex);
+-
++ /*
++ * The device list in fs_devices is accessed without locks (neither
++ * uuid_mutex nor device_list_mutex) as it won't change on a mounted
++ * filesystem and another device rm cannot run.
++ */
+ num_devices = btrfs_num_devices(fs_info);
+
+ ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
+ if (ret)
+- goto out;
+-
+- device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
++ return ret;
+
+- if (IS_ERR(device)) {
+- if (PTR_ERR(device) == -ENOENT &&
+- device_path && strcmp(device_path, "missing") == 0)
++ device = btrfs_find_device(fs_info->fs_devices, args);
++ if (!device) {
++ if (args->missing)
+ ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
+ else
+- ret = PTR_ERR(device);
+- goto out;
++ ret = -ENOENT;
++ return ret;
+ }
+
+ if (btrfs_pinned_by_swapfile(fs_info, device)) {
+ btrfs_warn_in_rcu(fs_info,
+ "cannot remove device %s (devid %llu) due to active swapfile",
+ rcu_str_deref(device->name), device->devid);
+- ret = -ETXTBSY;
+- goto out;
++ return -ETXTBSY;
+ }
+
+- if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
+- ret = BTRFS_ERROR_DEV_TGT_REPLACE;
+- goto out;
+- }
++ if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
++ return BTRFS_ERROR_DEV_TGT_REPLACE;
+
+ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
+- fs_info->fs_devices->rw_devices == 1) {
+- ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
+- goto out;
+- }
++ fs_info->fs_devices->rw_devices == 1)
++ return BTRFS_ERROR_DEV_ONLY_WRITABLE;
+
+ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
+ mutex_lock(&fs_info->chunk_mutex);
+@@ -2126,22 +2184,28 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
+ mutex_unlock(&fs_info->chunk_mutex);
+ }
+
+- mutex_unlock(&uuid_mutex);
+ ret = btrfs_shrink_device(device, 0);
+ if (!ret)
+ btrfs_reada_remove_dev(device);
+- mutex_lock(&uuid_mutex);
+ if (ret)
+ goto error_undo;
+
+- /*
+- * TODO: the superblock still includes this device in its num_devices
+- * counter although write_all_supers() is not locked out. This
+- * could give a filesystem state which requires a degraded mount.
+- */
+- ret = btrfs_rm_dev_item(device);
+- if (ret)
++ trans = btrfs_start_transaction(fs_info->chunk_root, 0);
++ if (IS_ERR(trans)) {
++ ret = PTR_ERR(trans);
+ goto error_undo;
++ }
++
++ ret = btrfs_rm_dev_item(trans, device);
++ if (ret) {
++ /* Any error in dev item removal is critical */
++ btrfs_crit(fs_info,
++ "failed to remove device item for devid %llu: %d",
++ device->devid, ret);
++ btrfs_abort_transaction(trans, ret);
++ btrfs_end_transaction(trans);
++ return ret;
++ }
+
+ clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+ btrfs_scrub_cancel_dev(device);
+@@ -2216,8 +2280,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
+ free_fs_devices(cur_devices);
+ }
+
+-out:
+- mutex_unlock(&uuid_mutex);
++ ret = btrfs_commit_transaction(trans);
++
+ return ret;
+
+ error_undo:
+@@ -2229,7 +2293,7 @@ error_undo:
+ device->fs_devices->rw_devices++;
+ mutex_unlock(&fs_info->chunk_mutex);
+ }
+- goto out;
++ return ret;
+ }
+
+ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
+@@ -2305,13 +2369,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
+
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+- /*
+- * The update_dev_time() with in btrfs_scratch_superblocks()
+- * may lead to a call to btrfs_show_devname() which will try
+- * to hold device_list_mutex. And here this device
+- * is already out of device list, so we don't have to hold
+- * the device_list_mutex lock.
+- */
+ btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
+ tgtdev->name->str);
+
+@@ -2320,69 +2377,101 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
+ btrfs_free_device(tgtdev);
+ }
+
+-static struct btrfs_device *btrfs_find_device_by_path(
+- struct btrfs_fs_info *fs_info, const char *device_path)
++/**
++ * Populate args from device at path
++ *
++ * @fs_info: the filesystem
++ * @args: the args to populate
++ * @path: the path to the device
++ *
++ * This will read the super block of the device at @path and populate @args with
++ * the devid, fsid, and uuid. This is meant to be used for ioctls that need to
++ * lookup a device to operate on, but need to do it before we take any locks.
++ * This properly handles the special case of "missing" that a user may pass in,
++ * and does some basic sanity checks. The caller must make sure that @path is
++ * properly NUL terminated before calling in, and must call
++ * btrfs_put_dev_args_from_path() in order to free up the temporary fsid and
++ * uuid buffers.
++ *
++ * Return: 0 for success, -errno for failure
++ */
++int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
++ struct btrfs_dev_lookup_args *args,
++ const char *path)
+ {
+- int ret = 0;
+ struct btrfs_super_block *disk_super;
+- u64 devid;
+- u8 *dev_uuid;
+ struct block_device *bdev;
+- struct btrfs_device *device;
++ int ret;
+
+- ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
+- fs_info->bdev_holder, 0, &bdev, &disk_super);
+- if (ret)
+- return ERR_PTR(ret);
++ if (!path || !path[0])
++ return -EINVAL;
++ if (!strcmp(path, "missing")) {
++ args->missing = true;
++ return 0;
++ }
+
+- devid = btrfs_stack_device_id(&disk_super->dev_item);
+- dev_uuid = disk_super->dev_item.uuid;
++ args->uuid = kzalloc(BTRFS_UUID_SIZE, GFP_KERNEL);
++ args->fsid = kzalloc(BTRFS_FSID_SIZE, GFP_KERNEL);
++ if (!args->uuid || !args->fsid) {
++ btrfs_put_dev_args_from_path(args);
++ return -ENOMEM;
++ }
++
++ ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
++ &bdev, &disk_super);
++ if (ret) {
++ btrfs_put_dev_args_from_path(args);
++ return ret;
++ }
++
++ args->devid = btrfs_stack_device_id(&disk_super->dev_item);
++ memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
+ if (btrfs_fs_incompat(fs_info, METADATA_UUID))
+- device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
+- disk_super->metadata_uuid);
++ memcpy(args->fsid, disk_super->metadata_uuid, BTRFS_FSID_SIZE);
+ else
+- device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
+- disk_super->fsid);
+-
++ memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
+ btrfs_release_disk_super(disk_super);
+- if (!device)
+- device = ERR_PTR(-ENOENT);
+ blkdev_put(bdev, FMODE_READ);
+- return device;
++ return 0;
+ }
+
+ /*
+- * Lookup a device given by device id, or the path if the id is 0.
++ * Only use this jointly with btrfs_get_dev_args_from_path() because we will
++ * allocate our ->uuid and ->fsid pointers, everybody else uses local variables
++ * that don't need to be freed.
+ */
++void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args)
++{
++ kfree(args->uuid);
++ kfree(args->fsid);
++ args->uuid = NULL;
++ args->fsid = NULL;
++}
++
+ struct btrfs_device *btrfs_find_device_by_devspec(
+ struct btrfs_fs_info *fs_info, u64 devid,
+ const char *device_path)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct btrfs_device *device;
++ int ret;
+
+ if (devid) {
+- device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
+- NULL);
++ args.devid = devid;
++ device = btrfs_find_device(fs_info->fs_devices, &args);
+ if (!device)
+ return ERR_PTR(-ENOENT);
+ return device;
+ }
+
+- if (!device_path || !device_path[0])
+- return ERR_PTR(-EINVAL);
+-
+- if (strcmp(device_path, "missing") == 0) {
+- /* Find first missing device */
+- list_for_each_entry(device, &fs_info->fs_devices->devices,
+- dev_list) {
+- if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+- &device->dev_state) && !device->bdev)
+- return device;
+- }
++ ret = btrfs_get_dev_args_from_path(fs_info, &args, device_path);
++ if (ret)
++ return ERR_PTR(ret);
++ device = btrfs_find_device(fs_info->fs_devices, &args);
++ btrfs_put_dev_args_from_path(&args);
++ if (!device)
+ return ERR_PTR(-ENOENT);
+- }
+-
+- return btrfs_find_device_by_path(fs_info, device_path);
++ return device;
+ }
+
+ /*
+@@ -2459,6 +2548,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
+ */
+ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *root = fs_info->chunk_root;
+ struct btrfs_path *path;
+@@ -2468,7 +2558,6 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
+ struct btrfs_key key;
+ u8 fs_uuid[BTRFS_FSID_SIZE];
+ u8 dev_uuid[BTRFS_UUID_SIZE];
+- u64 devid;
+ int ret;
+
+ path = btrfs_alloc_path();
+@@ -2480,7 +2569,9 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
+ key.type = BTRFS_DEV_ITEM_KEY;
+
+ while (1) {
++ btrfs_reserve_chunk_metadata(trans, false);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
++ btrfs_trans_release_chunk_metadata(trans);
+ if (ret < 0)
+ goto error;
+
+@@ -2505,13 +2596,14 @@ next_slot:
+
+ dev_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dev_item);
+- devid = btrfs_device_id(leaf, dev_item);
++ args.devid = btrfs_device_id(leaf, dev_item);
+ read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
+ BTRFS_UUID_SIZE);
+ read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
+ BTRFS_FSID_SIZE);
+- device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
+- fs_uuid);
++ args.uuid = dev_uuid;
++ args.fsid = fs_uuid;
++ device = btrfs_find_device(fs_info->fs_devices, &args);
+ BUG_ON(!device); /* Logic error */
+
+ if (device->fs_devices->seeding) {
+@@ -2594,7 +2686,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
+ device->fs_info = fs_info;
+ device->bdev = bdev;
+
+- ret = btrfs_get_dev_zone_info(device);
++ ret = btrfs_get_dev_zone_info(device, false);
+ if (ret)
+ goto error_free_device;
+
+@@ -2627,6 +2719,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
+ btrfs_abort_transaction(trans, ret);
+ goto error_trans;
+ }
++ btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev,
++ device);
+ }
+
+ device->fs_devices = fs_devices;
+@@ -2733,7 +2827,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
+ btrfs_forget_devices(device_path);
+
+ /* Update ctime/mtime for blkid or udev */
+- update_dev_time(bdev);
++ update_dev_time(device_path);
+
+ return ret;
+
+@@ -2826,6 +2920,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
+ struct btrfs_super_block *super_copy = fs_info->super_copy;
+ u64 old_total;
+ u64 diff;
++ int ret;
+
+ if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+ return -EACCES;
+@@ -2854,7 +2949,11 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
+ &trans->transaction->dev_update_list);
+ mutex_unlock(&fs_info->chunk_mutex);
+
+- return btrfs_update_device(trans, device);
++ btrfs_reserve_chunk_metadata(trans, false);
++ ret = btrfs_update_device(trans, device);
++ btrfs_trans_release_chunk_metadata(trans);
++
++ return ret;
+ }
+
+ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
+@@ -3096,7 +3195,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
+ const u64 sys_flags = btrfs_system_alloc_profile(fs_info);
+ struct btrfs_block_group *sys_bg;
+
+- sys_bg = btrfs_alloc_chunk(trans, sys_flags);
++ sys_bg = btrfs_create_chunk(trans, sys_flags);
+ if (IS_ERR(sys_bg)) {
+ ret = PTR_ERR(sys_bg);
+ btrfs_abort_transaction(trans, ret);
+@@ -4354,10 +4453,12 @@ static int balance_kthread(void *data)
+ struct btrfs_fs_info *fs_info = data;
+ int ret = 0;
+
++ sb_start_write(fs_info->sb);
+ mutex_lock(&fs_info->balance_mutex);
+ if (fs_info->balance_ctl)
+ ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
+ mutex_unlock(&fs_info->balance_mutex);
++ sb_end_write(fs_info->sb);
+
+ return ret;
+ }
+@@ -4535,8 +4636,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
+ }
+ }
+
+- BUG_ON(fs_info->balance_ctl ||
+- test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
++ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ atomic_dec(&fs_info->balance_cancel_req);
+ mutex_unlock(&fs_info->balance_mutex);
+ return 0;
+@@ -4889,8 +4989,10 @@ again:
+ round_down(old_total - diff, fs_info->sectorsize));
+ mutex_unlock(&fs_info->chunk_mutex);
+
++ btrfs_reserve_chunk_metadata(trans, false);
+ /* Now btrfs_update_device() will change the on-disk size. */
+ ret = btrfs_update_device(trans, device);
++ btrfs_trans_release_chunk_metadata(trans);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+@@ -4973,7 +5075,7 @@ static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
+ }
+
+ /*
+- * Structure used internally for __btrfs_alloc_chunk() function.
++ * Structure used internally for btrfs_create_chunk() function.
+ * Wraps needed parameters.
+ */
+ struct alloc_chunk_ctl {
+@@ -5377,7 +5479,7 @@ error_del_extent:
+ return block_group;
+ }
+
+-struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
++struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
+ u64 type)
+ {
+ struct btrfs_fs_info *info = trans->fs_info;
+@@ -5578,12 +5680,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
+ */
+
+ alloc_profile = btrfs_metadata_alloc_profile(fs_info);
+- meta_bg = btrfs_alloc_chunk(trans, alloc_profile);
++ meta_bg = btrfs_create_chunk(trans, alloc_profile);
+ if (IS_ERR(meta_bg))
+ return PTR_ERR(meta_bg);
+
+ alloc_profile = btrfs_system_alloc_profile(fs_info);
+- sys_bg = btrfs_alloc_chunk(trans, alloc_profile);
++ sys_bg = btrfs_create_chunk(trans, alloc_profile);
+ if (IS_ERR(sys_bg))
+ return PTR_ERR(sys_bg);
+
+@@ -5795,7 +5897,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
+ }
+
+ /* Bubble-sort the stripe set to put the parity/syndrome stripes last */
+-static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
++static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes)
+ {
+ int i;
+ int again = 1;
+@@ -5804,52 +5906,53 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
+ again = 0;
+ for (i = 0; i < num_stripes - 1; i++) {
+ /* Swap if parity is on a smaller index */
+- if (bbio->raid_map[i] > bbio->raid_map[i + 1]) {
+- swap(bbio->stripes[i], bbio->stripes[i + 1]);
+- swap(bbio->raid_map[i], bbio->raid_map[i + 1]);
++ if (bioc->raid_map[i] > bioc->raid_map[i + 1]) {
++ swap(bioc->stripes[i], bioc->stripes[i + 1]);
++ swap(bioc->raid_map[i], bioc->raid_map[i + 1]);
+ again = 1;
+ }
+ }
+ }
+ }
+
+-static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
++static struct btrfs_io_context *alloc_btrfs_io_context(int total_stripes,
++ int real_stripes)
+ {
+- struct btrfs_bio *bbio = kzalloc(
+- /* the size of the btrfs_bio */
+- sizeof(struct btrfs_bio) +
+- /* plus the variable array for the stripes */
+- sizeof(struct btrfs_bio_stripe) * (total_stripes) +
+- /* plus the variable array for the tgt dev */
++ struct btrfs_io_context *bioc = kzalloc(
++ /* The size of btrfs_io_context */
++ sizeof(struct btrfs_io_context) +
++ /* Plus the variable array for the stripes */
++ sizeof(struct btrfs_io_stripe) * (total_stripes) +
++ /* Plus the variable array for the tgt dev */
+ sizeof(int) * (real_stripes) +
+ /*
+- * plus the raid_map, which includes both the tgt dev
+- * and the stripes
++ * Plus the raid_map, which includes both the tgt dev
++ * and the stripes.
+ */
+ sizeof(u64) * (total_stripes),
+ GFP_NOFS|__GFP_NOFAIL);
+
+- atomic_set(&bbio->error, 0);
+- refcount_set(&bbio->refs, 1);
++ atomic_set(&bioc->error, 0);
++ refcount_set(&bioc->refs, 1);
+
+- bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes);
+- bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes);
++ bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes);
++ bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes);
+
+- return bbio;
++ return bioc;
+ }
+
+-void btrfs_get_bbio(struct btrfs_bio *bbio)
++void btrfs_get_bioc(struct btrfs_io_context *bioc)
+ {
+- WARN_ON(!refcount_read(&bbio->refs));
+- refcount_inc(&bbio->refs);
++ WARN_ON(!refcount_read(&bioc->refs));
++ refcount_inc(&bioc->refs);
+ }
+
+-void btrfs_put_bbio(struct btrfs_bio *bbio)
++void btrfs_put_bioc(struct btrfs_io_context *bioc)
+ {
+- if (!bbio)
++ if (!bioc)
+ return;
+- if (refcount_dec_and_test(&bbio->refs))
+- kfree(bbio);
++ if (refcount_dec_and_test(&bioc->refs))
++ kfree(bioc);
+ }
+
+ /* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
+@@ -5859,11 +5962,11 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
+ */
+ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 *length_ret,
+- struct btrfs_bio **bbio_ret)
++ struct btrfs_io_context **bioc_ret)
+ {
+ struct extent_map *em;
+ struct map_lookup *map;
+- struct btrfs_bio *bbio;
++ struct btrfs_io_context *bioc;
+ u64 length = *length_ret;
+ u64 offset;
+ u64 stripe_nr;
+@@ -5882,8 +5985,8 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+ int ret = 0;
+ int i;
+
+- /* discard always return a bbio */
+- ASSERT(bbio_ret);
++ /* Discard always returns a bioc. */
++ ASSERT(bioc_ret);
+
+ em = btrfs_get_chunk_map(fs_info, logical, length);
+ if (IS_ERR(em))
+@@ -5946,26 +6049,25 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+ &stripe_index);
+ }
+
+- bbio = alloc_btrfs_bio(num_stripes, 0);
+- if (!bbio) {
++ bioc = alloc_btrfs_io_context(num_stripes, 0);
++ if (!bioc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < num_stripes; i++) {
+- bbio->stripes[i].physical =
++ bioc->stripes[i].physical =
+ map->stripes[stripe_index].physical +
+ stripe_offset + stripe_nr * map->stripe_len;
+- bbio->stripes[i].dev = map->stripes[stripe_index].dev;
++ bioc->stripes[i].dev = map->stripes[stripe_index].dev;
+
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10)) {
+- bbio->stripes[i].length = stripes_per_dev *
++ bioc->stripes[i].length = stripes_per_dev *
+ map->stripe_len;
+
+ if (i / sub_stripes < remaining_stripes)
+- bbio->stripes[i].length +=
+- map->stripe_len;
++ bioc->stripes[i].length += map->stripe_len;
+
+ /*
+ * Special for the first stripe and
+@@ -5976,19 +6078,17 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+ * off end_off
+ */
+ if (i < sub_stripes)
+- bbio->stripes[i].length -=
+- stripe_offset;
++ bioc->stripes[i].length -= stripe_offset;
+
+ if (stripe_index >= last_stripe &&
+ stripe_index <= (last_stripe +
+ sub_stripes - 1))
+- bbio->stripes[i].length -=
+- stripe_end_offset;
++ bioc->stripes[i].length -= stripe_end_offset;
+
+ if (i == sub_stripes - 1)
+ stripe_offset = 0;
+ } else {
+- bbio->stripes[i].length = length;
++ bioc->stripes[i].length = length;
+ }
+
+ stripe_index++;
+@@ -5998,9 +6098,9 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+ }
+ }
+
+- *bbio_ret = bbio;
+- bbio->map_type = map->type;
+- bbio->num_stripes = num_stripes;
++ *bioc_ret = bioc;
++ bioc->map_type = map->type;
++ bioc->num_stripes = num_stripes;
+ out:
+ free_extent_map(em);
+ return ret;
+@@ -6024,7 +6124,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+ u64 srcdev_devid, int *mirror_num,
+ u64 *physical)
+ {
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ int num_stripes;
+ int index_srcdev = 0;
+ int found = 0;
+@@ -6033,20 +6133,20 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+ int ret = 0;
+
+ ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+- logical, &length, &bbio, 0, 0);
++ logical, &length, &bioc, 0, 0);
+ if (ret) {
+- ASSERT(bbio == NULL);
++ ASSERT(bioc == NULL);
+ return ret;
+ }
+
+- num_stripes = bbio->num_stripes;
++ num_stripes = bioc->num_stripes;
+ if (*mirror_num > num_stripes) {
+ /*
+ * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
+ * that means that the requested area is not left of the left
+ * cursor
+ */
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ return -EIO;
+ }
+
+@@ -6056,7 +6156,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+ * pointer to the one of the target drive.
+ */
+ for (i = 0; i < num_stripes; i++) {
+- if (bbio->stripes[i].dev->devid != srcdev_devid)
++ if (bioc->stripes[i].dev->devid != srcdev_devid)
+ continue;
+
+ /*
+@@ -6064,15 +6164,15 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+ * mirror with the lowest physical address
+ */
+ if (found &&
+- physical_of_found <= bbio->stripes[i].physical)
++ physical_of_found <= bioc->stripes[i].physical)
+ continue;
+
+ index_srcdev = i;
+ found = 1;
+- physical_of_found = bbio->stripes[i].physical;
++ physical_of_found = bioc->stripes[i].physical;
+ }
+
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+
+ ASSERT(found);
+ if (!found)
+@@ -6103,12 +6203,12 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
+ }
+
+ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+- struct btrfs_bio **bbio_ret,
++ struct btrfs_io_context **bioc_ret,
+ struct btrfs_dev_replace *dev_replace,
+ u64 logical,
+ int *num_stripes_ret, int *max_errors_ret)
+ {
+- struct btrfs_bio *bbio = *bbio_ret;
++ struct btrfs_io_context *bioc = *bioc_ret;
+ u64 srcdev_devid = dev_replace->srcdev->devid;
+ int tgtdev_indexes = 0;
+ int num_stripes = *num_stripes_ret;
+@@ -6138,17 +6238,17 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+ */
+ index_where_to_add = num_stripes;
+ for (i = 0; i < num_stripes; i++) {
+- if (bbio->stripes[i].dev->devid == srcdev_devid) {
++ if (bioc->stripes[i].dev->devid == srcdev_devid) {
+ /* write to new disk, too */
+- struct btrfs_bio_stripe *new =
+- bbio->stripes + index_where_to_add;
+- struct btrfs_bio_stripe *old =
+- bbio->stripes + i;
++ struct btrfs_io_stripe *new =
++ bioc->stripes + index_where_to_add;
++ struct btrfs_io_stripe *old =
++ bioc->stripes + i;
+
+ new->physical = old->physical;
+ new->length = old->length;
+ new->dev = dev_replace->tgtdev;
+- bbio->tgtdev_map[i] = index_where_to_add;
++ bioc->tgtdev_map[i] = index_where_to_add;
+ index_where_to_add++;
+ max_errors++;
+ tgtdev_indexes++;
+@@ -6168,30 +6268,29 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+ * full copy of the source drive.
+ */
+ for (i = 0; i < num_stripes; i++) {
+- if (bbio->stripes[i].dev->devid == srcdev_devid) {
++ if (bioc->stripes[i].dev->devid == srcdev_devid) {
+ /*
+ * In case of DUP, in order to keep it simple,
+ * only add the mirror with the lowest physical
+ * address
+ */
+ if (found &&
+- physical_of_found <=
+- bbio->stripes[i].physical)
++ physical_of_found <= bioc->stripes[i].physical)
+ continue;
+ index_srcdev = i;
+ found = 1;
+- physical_of_found = bbio->stripes[i].physical;
++ physical_of_found = bioc->stripes[i].physical;
+ }
+ }
+ if (found) {
+- struct btrfs_bio_stripe *tgtdev_stripe =
+- bbio->stripes + num_stripes;
++ struct btrfs_io_stripe *tgtdev_stripe =
++ bioc->stripes + num_stripes;
+
+ tgtdev_stripe->physical = physical_of_found;
+ tgtdev_stripe->length =
+- bbio->stripes[index_srcdev].length;
++ bioc->stripes[index_srcdev].length;
+ tgtdev_stripe->dev = dev_replace->tgtdev;
+- bbio->tgtdev_map[index_srcdev] = num_stripes;
++ bioc->tgtdev_map[index_srcdev] = num_stripes;
+
+ tgtdev_indexes++;
+ num_stripes++;
+@@ -6200,8 +6299,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+
+ *num_stripes_ret = num_stripes;
+ *max_errors_ret = max_errors;
+- bbio->num_tgtdevs = tgtdev_indexes;
+- *bbio_ret = bbio;
++ bioc->num_tgtdevs = tgtdev_indexes;
++ *bioc_ret = bioc;
+ }
+
+ static bool need_full_stripe(enum btrfs_map_op op)
+@@ -6304,7 +6403,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
+ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ enum btrfs_map_op op,
+ u64 logical, u64 *length,
+- struct btrfs_bio **bbio_ret,
++ struct btrfs_io_context **bioc_ret,
+ int mirror_num, int need_raid_map)
+ {
+ struct extent_map *em;
+@@ -6319,7 +6418,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ int num_stripes;
+ int max_errors = 0;
+ int tgtdev_indexes = 0;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
+ int dev_replace_is_ongoing = 0;
+ int num_alloc_stripes;
+@@ -6328,7 +6427,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ u64 raid56_full_stripe_start = (u64)-1;
+ struct btrfs_io_geometry geom;
+
+- ASSERT(bbio_ret);
++ ASSERT(bioc_ret);
+ ASSERT(op != BTRFS_MAP_DISCARD);
+
+ em = btrfs_get_chunk_map(fs_info, logical, *length);
+@@ -6472,20 +6571,20 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ tgtdev_indexes = num_stripes;
+ }
+
+- bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
+- if (!bbio) {
++ bioc = alloc_btrfs_io_context(num_alloc_stripes, tgtdev_indexes);
++ if (!bioc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < num_stripes; i++) {
+- bbio->stripes[i].physical = map->stripes[stripe_index].physical +
++ bioc->stripes[i].physical = map->stripes[stripe_index].physical +
+ stripe_offset + stripe_nr * map->stripe_len;
+- bbio->stripes[i].dev = map->stripes[stripe_index].dev;
++ bioc->stripes[i].dev = map->stripes[stripe_index].dev;
+ stripe_index++;
+ }
+
+- /* build raid_map */
++ /* Build raid_map */
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
+ (need_full_stripe(op) || mirror_num > 1)) {
+ u64 tmp;
+@@ -6497,15 +6596,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ /* Fill in the logical address of each stripe */
+ tmp = stripe_nr * data_stripes;
+ for (i = 0; i < data_stripes; i++)
+- bbio->raid_map[(i+rot) % num_stripes] =
++ bioc->raid_map[(i + rot) % num_stripes] =
+ em->start + (tmp + i) * map->stripe_len;
+
+- bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
++ bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+- bbio->raid_map[(i+rot+1) % num_stripes] =
++ bioc->raid_map[(i + rot + 1) % num_stripes] =
+ RAID6_Q_STRIPE;
+
+- sort_parity_stripes(bbio, num_stripes);
++ sort_parity_stripes(bioc, num_stripes);
+ }
+
+ if (need_full_stripe(op))
+@@ -6513,15 +6612,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+
+ if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
+ need_full_stripe(op)) {
+- handle_ops_on_dev_replace(op, &bbio, dev_replace, logical,
++ handle_ops_on_dev_replace(op, &bioc, dev_replace, logical,
+ &num_stripes, &max_errors);
+ }
+
+- *bbio_ret = bbio;
+- bbio->map_type = map->type;
+- bbio->num_stripes = num_stripes;
+- bbio->max_errors = max_errors;
+- bbio->mirror_num = mirror_num;
++ *bioc_ret = bioc;
++ bioc->map_type = map->type;
++ bioc->num_stripes = num_stripes;
++ bioc->max_errors = max_errors;
++ bioc->mirror_num = mirror_num;
+
+ /*
+ * this is the case that REQ_READ && dev_replace_is_ongoing &&
+@@ -6530,9 +6629,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ */
+ if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
+ WARN_ON(num_stripes > 1);
+- bbio->stripes[0].dev = dev_replace->tgtdev;
+- bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
+- bbio->mirror_num = map->num_stripes + 1;
++ bioc->stripes[0].dev = dev_replace->tgtdev;
++ bioc->stripes[0].physical = physical_to_patch_in_first_stripe;
++ bioc->mirror_num = map->num_stripes + 1;
+ }
+ out:
+ if (dev_replace_is_ongoing) {
+@@ -6546,40 +6645,40 @@ out:
+
+ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 *length,
+- struct btrfs_bio **bbio_ret, int mirror_num)
++ struct btrfs_io_context **bioc_ret, int mirror_num)
+ {
+ if (op == BTRFS_MAP_DISCARD)
+ return __btrfs_map_block_for_discard(fs_info, logical,
+- length, bbio_ret);
++ length, bioc_ret);
+
+- return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
++ return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
+ mirror_num, 0);
+ }
+
+ /* For Scrub/replace */
+ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 *length,
+- struct btrfs_bio **bbio_ret)
++ struct btrfs_io_context **bioc_ret)
+ {
+- return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
++ return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
+ }
+
+-static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
++static inline void btrfs_end_bioc(struct btrfs_io_context *bioc, struct bio *bio)
+ {
+- bio->bi_private = bbio->private;
+- bio->bi_end_io = bbio->end_io;
++ bio->bi_private = bioc->private;
++ bio->bi_end_io = bioc->end_io;
+ bio_endio(bio);
+
+- btrfs_put_bbio(bbio);
++ btrfs_put_bioc(bioc);
+ }
+
+ static void btrfs_end_bio(struct bio *bio)
+ {
+- struct btrfs_bio *bbio = bio->bi_private;
++ struct btrfs_io_context *bioc = bio->bi_private;
+ int is_orig_bio = 0;
+
+ if (bio->bi_status) {
+- atomic_inc(&bbio->error);
++ atomic_inc(&bioc->error);
+ if (bio->bi_status == BLK_STS_IOERR ||
+ bio->bi_status == BLK_STS_TARGET) {
+ struct btrfs_device *dev = btrfs_io_bio(bio)->device;
+@@ -6597,22 +6696,22 @@ static void btrfs_end_bio(struct bio *bio)
+ }
+ }
+
+- if (bio == bbio->orig_bio)
++ if (bio == bioc->orig_bio)
+ is_orig_bio = 1;
+
+- btrfs_bio_counter_dec(bbio->fs_info);
++ btrfs_bio_counter_dec(bioc->fs_info);
+
+- if (atomic_dec_and_test(&bbio->stripes_pending)) {
++ if (atomic_dec_and_test(&bioc->stripes_pending)) {
+ if (!is_orig_bio) {
+ bio_put(bio);
+- bio = bbio->orig_bio;
++ bio = bioc->orig_bio;
+ }
+
+- btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
++ btrfs_io_bio(bio)->mirror_num = bioc->mirror_num;
+ /* only send an error to the higher layers if it is
+ * beyond the tolerance of the btrfs bio
+ */
+- if (atomic_read(&bbio->error) > bbio->max_errors) {
++ if (atomic_read(&bioc->error) > bioc->max_errors) {
+ bio->bi_status = BLK_STS_IOERR;
+ } else {
+ /*
+@@ -6622,18 +6721,18 @@ static void btrfs_end_bio(struct bio *bio)
+ bio->bi_status = BLK_STS_OK;
+ }
+
+- btrfs_end_bbio(bbio, bio);
++ btrfs_end_bioc(bioc, bio);
+ } else if (!is_orig_bio) {
+ bio_put(bio);
+ }
+ }
+
+-static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
++static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
+ u64 physical, struct btrfs_device *dev)
+ {
+- struct btrfs_fs_info *fs_info = bbio->fs_info;
++ struct btrfs_fs_info *fs_info = bioc->fs_info;
+
+- bio->bi_private = bbio;
++ bio->bi_private = bioc;
+ btrfs_io_bio(bio)->device = dev;
+ bio->bi_end_io = btrfs_end_bio;
+ bio->bi_iter.bi_sector = physical >> 9;
+@@ -6663,20 +6762,20 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
+ btrfsic_submit_bio(bio);
+ }
+
+-static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
++static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logical)
+ {
+- atomic_inc(&bbio->error);
+- if (atomic_dec_and_test(&bbio->stripes_pending)) {
++ atomic_inc(&bioc->error);
++ if (atomic_dec_and_test(&bioc->stripes_pending)) {
+ /* Should be the original bio. */
+- WARN_ON(bio != bbio->orig_bio);
++ WARN_ON(bio != bioc->orig_bio);
+
+- btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
++ btrfs_io_bio(bio)->mirror_num = bioc->mirror_num;
+ bio->bi_iter.bi_sector = logical >> 9;
+- if (atomic_read(&bbio->error) > bbio->max_errors)
++ if (atomic_read(&bioc->error) > bioc->max_errors)
+ bio->bi_status = BLK_STS_IOERR;
+ else
+ bio->bi_status = BLK_STS_OK;
+- btrfs_end_bbio(bbio, bio);
++ btrfs_end_bioc(bioc, bio);
+ }
+ }
+
+@@ -6691,35 +6790,35 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ int ret;
+ int dev_nr;
+ int total_devs;
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+
+ length = bio->bi_iter.bi_size;
+ map_length = length;
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
+- &map_length, &bbio, mirror_num, 1);
++ &map_length, &bioc, mirror_num, 1);
+ if (ret) {
+ btrfs_bio_counter_dec(fs_info);
+ return errno_to_blk_status(ret);
+ }
+
+- total_devs = bbio->num_stripes;
+- bbio->orig_bio = first_bio;
+- bbio->private = first_bio->bi_private;
+- bbio->end_io = first_bio->bi_end_io;
+- bbio->fs_info = fs_info;
+- atomic_set(&bbio->stripes_pending, bbio->num_stripes);
++ total_devs = bioc->num_stripes;
++ bioc->orig_bio = first_bio;
++ bioc->private = first_bio->bi_private;
++ bioc->end_io = first_bio->bi_end_io;
++ bioc->fs_info = fs_info;
++ atomic_set(&bioc->stripes_pending, bioc->num_stripes);
+
+- if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
++ if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+ ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
+ /* In this case, map_length has been set to the length of
+ a single stripe; not the whole write */
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
+- ret = raid56_parity_write(fs_info, bio, bbio,
++ ret = raid56_parity_write(fs_info, bio, bioc,
+ map_length);
+ } else {
+- ret = raid56_parity_recover(fs_info, bio, bbio,
++ ret = raid56_parity_recover(fs_info, bio, bioc,
+ map_length, mirror_num, 1);
+ }
+
+@@ -6735,12 +6834,12 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ }
+
+ for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
+- dev = bbio->stripes[dev_nr].dev;
++ dev = bioc->stripes[dev_nr].dev;
+ if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
+ &dev->dev_state) ||
+ (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
+- bbio_error(bbio, first_bio, logical);
++ bioc_error(bioc, first_bio, logical);
+ continue;
+ }
+
+@@ -6749,12 +6848,39 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ else
+ bio = first_bio;
+
+- submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev);
++ submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev);
+ }
+ btrfs_bio_counter_dec(fs_info);
+ return BLK_STS_OK;
+ }
+
++static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
++ const struct btrfs_fs_devices *fs_devices)
++{
++ if (args->fsid == NULL)
++ return true;
++ if (memcmp(fs_devices->metadata_uuid, args->fsid, BTRFS_FSID_SIZE) == 0)
++ return true;
++ return false;
++}
++
++static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args,
++ const struct btrfs_device *device)
++{
++ if (args->missing) {
++ if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) &&
++ !device->bdev)
++ return true;
++ return false;
++ }
++
++ if (device->devid != args->devid)
++ return false;
++ if (args->uuid && memcmp(device->uuid, args->uuid, BTRFS_UUID_SIZE) != 0)
++ return false;
++ return true;
++}
++
+ /*
+ * Find a device specified by @devid or @uuid in the list of @fs_devices, or
+ * return NULL.
+@@ -6762,31 +6888,25 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ * If devid and uuid are both specified, the match must be exact, otherwise
+ * only devid is used.
+ */
+-struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
+- u64 devid, u8 *uuid, u8 *fsid)
++struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices,
++ const struct btrfs_dev_lookup_args *args)
+ {
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *seed_devs;
+
+- if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
++ if (dev_args_match_fs_devices(args, fs_devices)) {
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+- if (device->devid == devid &&
+- (!uuid || memcmp(device->uuid, uuid,
+- BTRFS_UUID_SIZE) == 0))
++ if (dev_args_match_device(args, device))
+ return device;
+ }
+ }
+
+ list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
+- if (!fsid ||
+- !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
+- list_for_each_entry(device, &seed_devs->devices,
+- dev_list) {
+- if (device->devid == devid &&
+- (!uuid || memcmp(device->uuid, uuid,
+- BTRFS_UUID_SIZE) == 0))
+- return device;
+- }
++ if (!dev_args_match_fs_devices(args, seed_devs))
++ continue;
++ list_for_each_entry(device, &seed_devs->devices, dev_list) {
++ if (dev_args_match_device(args, device))
++ return device;
+ }
+ }
+
+@@ -6949,9 +7069,31 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
+ }
+ #endif
+
++static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info,
++ u64 devid, u8 *uuid)
++{
++ struct btrfs_device *dev;
++
++ if (!btrfs_test_opt(fs_info, DEGRADED)) {
++ btrfs_report_missing_device(fs_info, devid, uuid, true);
++ return ERR_PTR(-ENOENT);
++ }
++
++ dev = add_missing_dev(fs_info->fs_devices, devid, uuid);
++ if (IS_ERR(dev)) {
++ btrfs_err(fs_info, "failed to init missing device %llu: %ld",
++ devid, PTR_ERR(dev));
++ return dev;
++ }
++ btrfs_report_missing_device(fs_info, devid, uuid, false);
++
++ return dev;
++}
++
+ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ struct extent_map_tree *map_tree = &fs_info->mapping_tree;
+ struct map_lookup *map;
+@@ -7029,33 +7171,24 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
+ map->stripes[i].physical =
+ btrfs_stripe_offset_nr(leaf, chunk, i);
+ devid = btrfs_stripe_devid_nr(leaf, chunk, i);
++ args.devid = devid;
+ read_extent_buffer(leaf, uuid, (unsigned long)
+ btrfs_stripe_dev_uuid_nr(chunk, i),
+ BTRFS_UUID_SIZE);
+- map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
+- devid, uuid, NULL);
+- if (!map->stripes[i].dev &&
+- !btrfs_test_opt(fs_info, DEGRADED)) {
+- free_extent_map(em);
+- btrfs_report_missing_device(fs_info, devid, uuid, true);
+- return -ENOENT;
+- }
++ args.uuid = uuid;
++ map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args);
+ if (!map->stripes[i].dev) {
+- map->stripes[i].dev =
+- add_missing_dev(fs_info->fs_devices, devid,
+- uuid);
++ map->stripes[i].dev = handle_missing_device(fs_info,
++ devid, uuid);
+ if (IS_ERR(map->stripes[i].dev)) {
++ ret = PTR_ERR(map->stripes[i].dev);
+ free_extent_map(em);
+- btrfs_err(fs_info,
+- "failed to init missing dev %llu: %ld",
+- devid, PTR_ERR(map->stripes[i].dev));
+- return PTR_ERR(map->stripes[i].dev);
++ return ret;
+ }
+- btrfs_report_missing_device(fs_info, devid, uuid, false);
+ }
++
+ set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+ &(map->stripes[i].dev->dev_state));
+-
+ }
+
+ write_lock(&map_tree->lock);
+@@ -7151,6 +7284,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
+ static int read_one_dev(struct extent_buffer *leaf,
+ struct btrfs_dev_item *dev_item)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_device *device;
+@@ -7159,11 +7293,13 @@ static int read_one_dev(struct extent_buffer *leaf,
+ u8 fs_uuid[BTRFS_FSID_SIZE];
+ u8 dev_uuid[BTRFS_UUID_SIZE];
+
+- devid = btrfs_device_id(leaf, dev_item);
++ devid = args.devid = btrfs_device_id(leaf, dev_item);
+ read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
+ BTRFS_UUID_SIZE);
+ read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
+ BTRFS_FSID_SIZE);
++ args.uuid = dev_uuid;
++ args.fsid = fs_uuid;
+
+ if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
+ fs_devices = open_seed_devices(fs_info, fs_uuid);
+@@ -7171,8 +7307,7 @@ static int read_one_dev(struct extent_buffer *leaf,
+ return PTR_ERR(fs_devices);
+ }
+
+- device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
+- fs_uuid);
++ device = btrfs_find_device(fs_info->fs_devices, &args);
+ if (!device) {
+ if (!btrfs_test_opt(fs_info, DEGRADED)) {
+ btrfs_report_missing_device(fs_info, devid,
+@@ -7481,6 +7616,19 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
+ */
+ fs_info->fs_devices->total_rw_bytes = 0;
+
++ /*
++ * Lockdep complains about possible circular locking dependency between
++ * a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores
++ * used for freeze procection of a fs (struct super_block.s_writers),
++ * which we take when starting a transaction, and extent buffers of the
++ * chunk tree if we call read_one_dev() while holding a lock on an
++ * extent buffer of the chunk tree. Since we are mounting the filesystem
++ * and at this point there can't be any concurrent task modifying the
++ * chunk tree, to keep it simple, just skip locking on the chunk tree.
++ */
++ ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
++ path->skip_locking = 1;
++
+ /*
+ * Read all device items, and then all the chunk items. All
+ * device items are found before any chunk item (their object id
+@@ -7506,10 +7654,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
+ goto error;
+ break;
+ }
+- /*
+- * The nodes on level 1 are not locked but we don't need to do
+- * that during mount time as nothing else can access the tree
+- */
+ node = path->nodes[1];
+ if (node) {
+ if (last_ra_node != node->start) {
+@@ -7537,7 +7681,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
+ * requirement for chunk allocation, see the comment on
+ * top of btrfs_chunk_alloc() for details.
+ */
+- ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
+ chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+ ret = read_one_chunk(&found_key, leaf, chunk);
+ if (ret)
+@@ -7551,12 +7694,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
+ * do another round of validation checks.
+ */
+ if (total_dev != fs_info->fs_devices->total_devices) {
+- btrfs_err(fs_info,
+- "super_num_devices %llu mismatch with num_devices %llu found here",
++ btrfs_warn(fs_info,
++"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit",
+ btrfs_super_num_devices(fs_info->super_copy),
+ total_dev);
+- ret = -EINVAL;
+- goto error;
++ fs_info->fs_devices->total_devices = total_dev;
++ btrfs_set_super_num_devices(fs_info->super_copy, total_dev);
+ }
+ if (btrfs_super_total_bytes(fs_info->super_copy) <
+ fs_info->fs_devices->total_rw_bytes) {
+@@ -7575,10 +7718,11 @@ error:
+ return ret;
+ }
+
+-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
++int btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
+ {
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
+ struct btrfs_device *device;
++ int ret = 0;
+
+ fs_devices->fs_info = fs_info;
+
+@@ -7587,12 +7731,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
+ device->fs_info = fs_info;
+
+ list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
+- list_for_each_entry(device, &seed_devs->devices, dev_list)
++ list_for_each_entry(device, &seed_devs->devices, dev_list) {
+ device->fs_info = fs_info;
++ ret = btrfs_get_dev_zone_info(device, false);
++ if (ret)
++ break;
++ }
+
+ seed_devs->fs_info = fs_info;
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
++
++ return ret;
+ }
+
+ static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
+@@ -7841,12 +7991,14 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
+ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
+ struct btrfs_ioctl_get_dev_stats *stats)
+ {
++ BTRFS_DEV_LOOKUP_ARGS(args);
+ struct btrfs_device *dev;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ int i;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+- dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL);
++ args.devid = stats->devid;
++ dev = btrfs_find_device(fs_info->fs_devices, &args);
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+ if (!dev) {
+@@ -7922,6 +8074,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
+ u64 chunk_offset, u64 devid,
+ u64 physical_offset, u64 physical_len)
+ {
++ struct btrfs_dev_lookup_args args = { .devid = devid };
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree;
+ struct extent_map *em;
+ struct map_lookup *map;
+@@ -7977,7 +8130,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
+ }
+
+ /* Make sure no dev extent is beyond device boundary */
+- dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
++ dev = btrfs_find_device(fs_info->fs_devices, &args);
+ if (!dev) {
+ btrfs_err(fs_info, "failed to find devid %llu", devid);
+ ret = -EUCLEAN;
+@@ -8173,10 +8326,12 @@ static int relocating_repair_kthread(void *data)
+ target = cache->start;
+ btrfs_put_block_group(cache);
+
++ sb_start_write(fs_info->sb);
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
+ btrfs_info(fs_info,
+ "zoned: skip relocating block group %llu to repair: EBUSY",
+ target);
++ sb_end_write(fs_info->sb);
+ return -EBUSY;
+ }
+
+@@ -8204,6 +8359,7 @@ out:
+ btrfs_put_block_group(cache);
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
+ btrfs_exclop_finish(fs_info);
++ sb_end_write(fs_info->sb);
+
+ return ret;
+ }
+diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
+index 2183361db614d..b49fa784e5ba3 100644
+--- a/fs/btrfs/volumes.h
++++ b/fs/btrfs/volumes.h
+@@ -246,7 +246,11 @@ struct btrfs_fs_devices {
+ /* Highest generation number of seen devices */
+ u64 latest_generation;
+
+- struct block_device *latest_bdev;
++ /*
++ * The mount device or a device with highest generation after removal
++ * or replace.
++ */
++ struct btrfs_device *latest_dev;
+
+ /* all of the devices in the FS, protected by a mutex
+ * so we can safely walk it to write out the supers without
+@@ -302,11 +306,11 @@ struct btrfs_fs_devices {
+ /*
+ * we need the mirror number and stripe index to be passed around
+ * the call chain while we are processing end_io (especially errors).
+- * Really, what we need is a btrfs_bio structure that has this info
++ * Really, what we need is a btrfs_io_context structure that has this info
+ * and is properly sized with its stripe array, but we're not there
+ * quite yet. We have our own btrfs bioset, and all of the bios
+ * we allocate are actually btrfs_io_bios. We'll cram as much of
+- * struct btrfs_bio as we can into this over time.
++ * struct btrfs_io_context as we can into this over time.
+ */
+ struct btrfs_io_bio {
+ unsigned int mirror_num;
+@@ -335,13 +339,29 @@ static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio)
+ }
+ }
+
+-struct btrfs_bio_stripe {
++struct btrfs_io_stripe {
+ struct btrfs_device *dev;
+ u64 physical;
+ u64 length; /* only used for discard mappings */
+ };
+
+-struct btrfs_bio {
++/*
++ * Context for IO subsmission for device stripe.
++ *
++ * - Track the unfinished mirrors for mirror based profiles
++ * Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
++ *
++ * - Contain the logical -> physical mapping info
++ * Used by submit_stripe_bio() for mapping logical bio
++ * into physical device address.
++ *
++ * - Contain device replace info
++ * Used by handle_ops_on_dev_replace() to copy logical bios
++ * into the new device.
++ *
++ * - Contain RAID56 full stripe logical bytenrs
++ */
++struct btrfs_io_context {
+ refcount_t refs;
+ atomic_t stripes_pending;
+ struct btrfs_fs_info *fs_info;
+@@ -361,7 +381,7 @@ struct btrfs_bio {
+ * so raid_map[0] is the start of our full stripe
+ */
+ u64 *raid_map;
+- struct btrfs_bio_stripe stripes[];
++ struct btrfs_io_stripe stripes[];
+ };
+
+ struct btrfs_device_info {
+@@ -396,11 +416,11 @@ struct map_lookup {
+ int num_stripes;
+ int sub_stripes;
+ int verified_stripes; /* For mount time dev extent verification */
+- struct btrfs_bio_stripe stripes[];
++ struct btrfs_io_stripe stripes[];
+ };
+
+ #define map_lookup_size(n) (sizeof(struct map_lookup) + \
+- (sizeof(struct btrfs_bio_stripe) * (n)))
++ (sizeof(struct btrfs_io_stripe) * (n)))
+
+ struct btrfs_balance_args;
+ struct btrfs_balance_progress;
+@@ -414,6 +434,22 @@ struct btrfs_balance_control {
+ struct btrfs_balance_progress stat;
+ };
+
++/*
++ * Search for a given device by the set parameters
++ */
++struct btrfs_dev_lookup_args {
++ u64 devid;
++ u8 *uuid;
++ u8 *fsid;
++ bool missing;
++};
++
++/* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */
++#define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 }
++
++#define BTRFS_DEV_LOOKUP_ARGS(name) \
++ struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT
++
+ enum btrfs_map_op {
+ BTRFS_MAP_READ,
+ BTRFS_MAP_WRITE,
+@@ -437,20 +473,20 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
+ }
+ }
+
+-void btrfs_get_bbio(struct btrfs_bio *bbio);
+-void btrfs_put_bbio(struct btrfs_bio *bbio);
++void btrfs_get_bioc(struct btrfs_io_context *bioc);
++void btrfs_put_bioc(struct btrfs_io_context *bioc);
+ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 *length,
+- struct btrfs_bio **bbio_ret, int mirror_num);
++ struct btrfs_io_context **bioc_ret, int mirror_num);
+ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 *length,
+- struct btrfs_bio **bbio_ret);
++ struct btrfs_io_context **bioc_ret);
+ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
+ enum btrfs_map_op op, u64 logical,
+ struct btrfs_io_geometry *io_geom);
+ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
+ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
+-struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
++struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
+ u64 type);
+ void btrfs_mapping_tree_free(struct extent_map_tree *tree);
+ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+@@ -467,19 +503,23 @@ void btrfs_assign_next_active_device(struct btrfs_device *device,
+ struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
+ u64 devid,
+ const char *devpath);
++int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
++ struct btrfs_dev_lookup_args *args,
++ const char *path);
+ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+ const u64 *devid,
+ const u8 *uuid);
++void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
+ void btrfs_free_device(struct btrfs_device *device);
+ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
+- const char *device_path, u64 devid,
++ struct btrfs_dev_lookup_args *args,
+ struct block_device **bdev, fmode_t *mode);
+ void __exit btrfs_cleanup_fs_uuids(void);
+ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
+ int btrfs_grow_device(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 new_size);
+-struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
+- u64 devid, u8 *uuid, u8 *fsid);
++struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices,
++ const struct btrfs_dev_lookup_args *args);
+ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
+ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
+ int btrfs_balance(struct btrfs_fs_info *fs_info,
+@@ -499,7 +539,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
+ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
+ struct btrfs_ioctl_get_dev_stats *stats);
+-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
++int btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
+ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
+ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
+ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
+diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
+index 8a4514283a4b8..43fe2c2a955e2 100644
+--- a/fs/btrfs/xattr.c
++++ b/fs/btrfs/xattr.c
+@@ -264,7 +264,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
+ inode_inc_iversion(inode);
+ inode->i_ctime = current_time(inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+- BUG_ON(ret);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
+ out:
+ if (start_trans)
+ btrfs_end_transaction(trans);
+@@ -390,6 +391,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
+ const char *name, const void *buffer,
+ size_t size, int flags)
+ {
++ if (btrfs_root_readonly(BTRFS_I(inode)->root))
++ return -EROFS;
++
+ name = xattr_full_name(handler, name);
+ return btrfs_setxattr_trans(inode, name, buffer, size, flags);
+ }
+@@ -418,7 +422,8 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
+ inode_inc_iversion(inode);
+ inode->i_ctime = current_time(inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+- BUG_ON(ret);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
+ }
+
+ btrfs_end_transaction(trans);
+diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
+index 767a0c6c9694b..12e674f10baf6 100644
+--- a/fs/btrfs/zlib.c
++++ b/fs/btrfs/zlib.c
+@@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
+
+ workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
+ zlib_inflate_workspacesize());
+- workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL);
++ workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL);
+ workspace->level = level;
+ workspace->buf = NULL;
+ /*
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 47af1ab3bf120..8c858f31bdbc0 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -4,6 +4,7 @@
+ #include <linux/slab.h>
+ #include <linux/blkdev.h>
+ #include <linux/sched/mm.h>
++#include <linux/vmalloc.h>
+ #include "ctree.h"
+ #include "volumes.h"
+ #include "zoned.h"
+@@ -113,7 +114,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
+ super[i] = page_address(page[i]);
+ }
+
+- if (super[0]->generation > super[1]->generation)
++ if (btrfs_super_generation(super[0]) >
++ btrfs_super_generation(super[1]))
+ sector = zones[1].start;
+ else
+ sector = zones[0].start;
+@@ -195,6 +197,8 @@ static int emulate_report_zones(struct btrfs_device *device, u64 pos,
+ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
+ struct blk_zone *zones, unsigned int *nr_zones)
+ {
++ struct btrfs_zoned_device_info *zinfo = device->zone_info;
++ u32 zno;
+ int ret;
+
+ if (!*nr_zones)
+@@ -206,6 +210,34 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
+ return 0;
+ }
+
++ /* Check cache */
++ if (zinfo->zone_cache) {
++ unsigned int i;
++
++ ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
++ zno = pos >> zinfo->zone_size_shift;
++ /*
++ * We cannot report zones beyond the zone end. So, it is OK to
++ * cap *nr_zones to at the end.
++ */
++ *nr_zones = min_t(u32, *nr_zones, zinfo->nr_zones - zno);
++
++ for (i = 0; i < *nr_zones; i++) {
++ struct blk_zone *zone_info;
++
++ zone_info = &zinfo->zone_cache[zno + i];
++ if (!zone_info->len)
++ break;
++ }
++
++ if (i == *nr_zones) {
++ /* Cache hit on all the zones */
++ memcpy(zones, zinfo->zone_cache + zno,
++ sizeof(*zinfo->zone_cache) * *nr_zones);
++ return 0;
++ }
++ }
++
+ ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
+ copy_zone_info_cb, zones);
+ if (ret < 0) {
+@@ -219,6 +251,11 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
+ if (!ret)
+ return -EIO;
+
++ /* Populate cache */
++ if (zinfo->zone_cache)
++ memcpy(zinfo->zone_cache + zno, zones,
++ sizeof(*zinfo->zone_cache) * *nr_zones);
++
+ return 0;
+ }
+
+@@ -282,7 +319,7 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
+ if (!device->bdev)
+ continue;
+
+- ret = btrfs_get_dev_zone_info(device);
++ ret = btrfs_get_dev_zone_info(device, true);
+ if (ret)
+ break;
+ }
+@@ -291,7 +328,7 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
+ return ret;
+ }
+
+-int btrfs_get_dev_zone_info(struct btrfs_device *device)
++int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
+ {
+ struct btrfs_fs_info *fs_info = device->fs_info;
+ struct btrfs_zoned_device_info *zone_info = NULL;
+@@ -318,6 +355,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
+ if (!zone_info)
+ return -ENOMEM;
+
++ device->zone_info = zone_info;
++
+ if (!bdev_is_zoned(bdev)) {
+ if (!fs_info->zone_size) {
+ ret = calculate_emulated_zone_size(fs_info);
+@@ -348,6 +387,25 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
+ nr_sectors = bdev_nr_sectors(bdev);
+ zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+ zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
++ /*
++ * We limit max_zone_append_size also by max_segments *
++ * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
++ * since btrfs adds the pages one by one to a bio, and btrfs cannot
++ * increase the metadata reservation even if it increases the number of
++ * extents, it is safe to stick with the limit.
++ *
++ * With the zoned emulation, we can have non-zoned device on the zoned
++ * mode. In this case, we don't have a valid max zone append size. So,
++ * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
++ */
++ if (bdev_is_zoned(bdev)) {
++ zone_info->max_zone_append_size = min_t(u64,
++ (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
++ (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
++ } else {
++ zone_info->max_zone_append_size =
++ (u64)bdev_max_segments(bdev) << PAGE_SHIFT;
++ }
+ if (!IS_ALIGNED(nr_sectors, zone_sectors))
+ zone_info->nr_zones++;
+
+@@ -363,12 +421,29 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
+ goto out;
+ }
+
+- zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
++ zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
+ if (!zones) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
++ /*
++ * Enable zone cache only for a zoned device. On a non-zoned device, we
++ * fill the zone info with emulated CONVENTIONAL zones, so no need to
++ * use the cache.
++ */
++ if (populate_cache && bdev_is_zoned(device->bdev)) {
++ zone_info->zone_cache = vzalloc(sizeof(struct blk_zone) *
++ zone_info->nr_zones);
++ if (!zone_info->zone_cache) {
++ btrfs_err_in_rcu(device->fs_info,
++ "zoned: failed to allocate zone cache for %s",
++ rcu_str_deref(device->name));
++ ret = -ENOMEM;
++ goto out;
++ }
++ }
++
+ /* Get zones type */
+ while (sector < nr_sectors) {
+ nr_zones = BTRFS_REPORT_NR_ZONES;
+@@ -442,9 +517,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
+ }
+
+
+- kfree(zones);
+-
+- device->zone_info = zone_info;
++ kvfree(zones);
+
+ switch (bdev_zoned_model(bdev)) {
+ case BLK_ZONED_HM:
+@@ -476,12 +549,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
+ return 0;
+
+ out:
+- kfree(zones);
++ kvfree(zones);
+ out_free_zone_info:
+- bitmap_free(zone_info->empty_zones);
+- bitmap_free(zone_info->seq_zones);
+- kfree(zone_info);
+- device->zone_info = NULL;
++ btrfs_destroy_dev_zone_info(device);
+
+ return ret;
+ }
+@@ -495,6 +565,7 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
+
+ bitmap_free(zone_info->seq_zones);
+ bitmap_free(zone_info->empty_zones);
++ vfree(zone_info->zone_cache);
+ kfree(zone_info);
+ device->zone_info = NULL;
+ }
+@@ -519,6 +590,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+ u64 zoned_devices = 0;
+ u64 nr_devices = 0;
+ u64 zone_size = 0;
++ u64 max_zone_append_size = 0;
+ const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
+ int ret = 0;
+
+@@ -554,6 +626,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+ ret = -EINVAL;
+ goto out;
+ }
++ if (!max_zone_append_size ||
++ (zone_info->max_zone_append_size &&
++ zone_info->max_zone_append_size < max_zone_append_size))
++ max_zone_append_size =
++ zone_info->max_zone_append_size;
+ }
+ nr_devices++;
+ }
+@@ -585,7 +662,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+
+ /*
+ * stripe_size is always aligned to BTRFS_STRIPE_LEN in
+- * __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
++ * btrfs_create_chunk(). Since we want stripe_len == zone_size,
+ * check the alignment here.
+ */
+ if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
+@@ -603,7 +680,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+ }
+
+ fs_info->zone_size = zone_size;
++ fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
++ fs_info->sectorsize);
+ fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
++ if (fs_info->max_zone_append_size < fs_info->max_extent_size)
++ fs_info->max_extent_size = fs_info->max_zone_append_size;
+
+ /*
+ * Check mount options here, because we might change fs_info->zoned
+@@ -933,12 +1014,12 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
+ return -ERANGE;
+
+ /* All the zones are conventional */
+- if (find_next_bit(zinfo->seq_zones, begin, end) == end)
++ if (find_next_bit(zinfo->seq_zones, end, begin) == end)
+ return 0;
+
+ /* All the zones are sequential and empty */
+- if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end &&
+- find_next_zero_bit(zinfo->empty_zones, begin, end) == end)
++ if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end &&
++ find_next_zero_bit(zinfo->empty_zones, end, begin) == end)
+ return 0;
+
+ for (pos = start; pos < start + size; pos += zinfo->zone_size) {
+@@ -1266,11 +1347,11 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
+ !list_empty(&eb->release_list))
+ return;
+
++ memzero_extent_buffer(eb, 0, eb->len);
++ set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
+ set_extent_buffer_dirty(eb);
+ set_extent_bits_nowait(&trans->dirty_pages, eb->start,
+ eb->start + eb->len - 1, EXTENT_DIRTY);
+- memzero_extent_buffer(eb, 0, eb->len);
+- set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
+
+ spin_lock(&trans->releasing_ebs_lock);
+ list_add_tail(&eb->release_list, &trans->releasing_ebs);
+@@ -1304,6 +1385,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
+ if (!is_data_inode(&inode->vfs_inode))
+ return false;
+
++ /*
++ * Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the
++ * extent layout the relocation code has.
++ * Furthermore we have set aside own block-group from which only the
++ * relocation "process" can allocate and make sure only one process at a
++ * time can add pages to an extent that gets relocated, so it's safe to
++ * use regular REQ_OP_WRITE for this special case.
++ */
++ if (btrfs_is_data_reloc_root(inode->root))
++ return false;
++
+ cache = btrfs_lookup_block_group(fs_info, start);
+ ASSERT(cache);
+ if (!cache)
+@@ -1440,27 +1532,29 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len
+ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
+ struct blk_zone *zone)
+ {
+- struct btrfs_bio *bbio = NULL;
++ struct btrfs_io_context *bioc = NULL;
+ u64 mapped_length = PAGE_SIZE;
+ unsigned int nofs_flag;
+ int nmirrors;
+ int i, ret;
+
+ ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
+- &mapped_length, &bbio);
+- if (ret || !bbio || mapped_length < PAGE_SIZE) {
+- btrfs_put_bbio(bbio);
+- return -EIO;
++ &mapped_length, &bioc);
++ if (ret || !bioc || mapped_length < PAGE_SIZE) {
++ ret = -EIO;
++ goto out_put_bioc;
+ }
+
+- if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+- return -EINVAL;
++ if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
++ ret = -EINVAL;
++ goto out_put_bioc;
++ }
+
+ nofs_flag = memalloc_nofs_save();
+- nmirrors = (int)bbio->num_stripes;
++ nmirrors = (int)bioc->num_stripes;
+ for (i = 0; i < nmirrors; i++) {
+- u64 physical = bbio->stripes[i].physical;
+- struct btrfs_device *dev = bbio->stripes[i].dev;
++ u64 physical = bioc->stripes[i].physical;
++ struct btrfs_device *dev = bioc->stripes[i].dev;
+
+ /* Missing device */
+ if (!dev->bdev)
+@@ -1473,7 +1567,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
+ break;
+ }
+ memalloc_nofs_restore(nofs_flag);
+-
++out_put_bioc:
++ btrfs_put_bioc(bioc);
+ return ret;
+ }
+
+@@ -1530,3 +1625,58 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
+
+ return device;
+ }
++
++void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
++{
++ struct btrfs_fs_info *fs_info = bg->fs_info;
++
++ spin_lock(&fs_info->relocation_bg_lock);
++ if (fs_info->data_reloc_bg == bg->start)
++ fs_info->data_reloc_bg = 0;
++ spin_unlock(&fs_info->relocation_bg_lock);
++}
++
++void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
++{
++ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
++ struct btrfs_device *device;
++
++ if (!btrfs_is_zoned(fs_info))
++ return;
++
++ mutex_lock(&fs_devices->device_list_mutex);
++ list_for_each_entry(device, &fs_devices->devices, dev_list) {
++ if (device->zone_info) {
++ vfree(device->zone_info->zone_cache);
++ device->zone_info->zone_cache = NULL;
++ }
++ }
++ mutex_unlock(&fs_devices->device_list_mutex);
++}
++
++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
++ u64 length)
++{
++ struct btrfs_block_group *block_group;
++
++ if (!btrfs_is_zoned(fs_info))
++ return;
++
++ block_group = btrfs_lookup_block_group(fs_info, logical);
++ /* It should be called on a previous data relocation block group. */
++ ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));
++
++ spin_lock(&block_group->lock);
++ if (!block_group->zoned_data_reloc_ongoing)
++ goto out;
++
++ /* All relocation extents are written. */
++ if (block_group->start + block_group->alloc_offset == logical + length) {
++ /* Now, release this block group for further allocations. */
++ block_group->zoned_data_reloc_ongoing = 0;
++ }
++
++out:
++ spin_unlock(&block_group->lock);
++ btrfs_put_block_group(block_group);
++}
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 4b299705bb12b..1ef493fcd504e 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -8,6 +8,7 @@
+ #include "volumes.h"
+ #include "disk-io.h"
+ #include "block-group.h"
++#include "btrfs_inode.h"
+
+ /*
+ * Block groups with more than this value (percents) of unusable space will be
+@@ -22,9 +23,11 @@ struct btrfs_zoned_device_info {
+ */
+ u64 zone_size;
+ u8 zone_size_shift;
++ u64 max_zone_append_size;
+ u32 nr_zones;
+ unsigned long *seq_zones;
+ unsigned long *empty_zones;
++ struct blk_zone *zone_cache;
+ struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
+ };
+
+@@ -32,7 +35,7 @@ struct btrfs_zoned_device_info {
+ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+ struct blk_zone *zone);
+ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
+-int btrfs_get_dev_zone_info(struct btrfs_device *device);
++int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
+ void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
+ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
+ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
+@@ -66,6 +69,10 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
+ u64 physical_start, u64 physical_pos);
+ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length);
++void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
++void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
++ u64 length);
+ #else /* CONFIG_BLK_DEV_ZONED */
+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+ struct blk_zone *zone)
+@@ -78,7 +85,8 @@ static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_i
+ return 0;
+ }
+
+-static inline int btrfs_get_dev_zone_info(struct btrfs_device *device)
++static inline int btrfs_get_dev_zone_info(struct btrfs_device *device,
++ bool populate_cache)
+ {
+ return 0;
+ }
+@@ -199,6 +207,12 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
++static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
++
++static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
++
++static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
++ u64 logical, u64 length) { }
+ #endif
+
+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
+@@ -317,4 +331,20 @@ static inline void btrfs_clear_treelog_bg(struct btrfs_block_group *bg)
+ spin_unlock(&fs_info->treelog_bg_lock);
+ }
+
++static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
++{
++ struct btrfs_root *root = inode->root;
++
++ if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
++ mutex_lock(&root->fs_info->zoned_data_reloc_io_lock);
++}
++
++static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
++{
++ struct btrfs_root *root = inode->root;
++
++ if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
++ mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
++}
++
+ #endif
+diff --git a/fs/buffer.c b/fs/buffer.c
+index c615387aedcae..1960e2d43ae2a 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -1235,16 +1235,18 @@ static void bh_lru_install(struct buffer_head *bh)
+ int i;
+
+ check_irqs_on();
++ bh_lru_lock();
++
+ /*
+ * the refcount of buffer_head in bh_lru prevents dropping the
+ * attached page(i.e., try_to_free_buffers) so it could cause
+ * failing page migration.
+ * Skip putting upcoming bh into bh_lru until migration is done.
+ */
+- if (lru_cache_disabled())
++ if (lru_cache_disabled()) {
++ bh_lru_unlock();
+ return;
+-
+- bh_lru_lock();
++ }
+
+ b = this_cpu_ptr(&bh_lrus);
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+@@ -2350,7 +2352,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
+ {
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+- void *fsdata;
++ void *fsdata = NULL;
+ int err;
+
+ err = inode_newsize_ok(inode, size);
+@@ -2376,7 +2378,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
+ struct inode *inode = mapping->host;
+ unsigned int blocksize = i_blocksize(inode);
+ struct page *page;
+- void *fsdata;
++ void *fsdata = NULL;
+ pgoff_t index, curidx;
+ loff_t curpos;
+ unsigned zerofrom, offset, len;
+diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
+index d463d89f5db8c..146291be62637 100644
+--- a/fs/cachefiles/bind.c
++++ b/fs/cachefiles/bind.c
+@@ -117,7 +117,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
+ root = path.dentry;
+
+ ret = -EINVAL;
+- if (mnt_user_ns(path.mnt) != &init_user_ns) {
++ if (is_idmapped_mnt(path.mnt)) {
+ pr_warn("File cache on idmapped mounts not supported");
+ goto error_unsupported;
+ }
+diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
+index 99b80b5c7a931..b218a26291b8e 100644
+--- a/fs/ceph/addr.c
++++ b/fs/ceph/addr.c
+@@ -179,7 +179,7 @@ static int ceph_releasepage(struct page *page, gfp_t gfp)
+
+ static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq)
+ {
+- struct inode *inode = rreq->mapping->host;
++ struct inode *inode = rreq->inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_file_layout *lo = &ci->i_layout;
+ u32 blockoff;
+@@ -196,7 +196,7 @@ static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq)
+
+ static bool ceph_netfs_clamp_length(struct netfs_read_subrequest *subreq)
+ {
+- struct inode *inode = subreq->rreq->mapping->host;
++ struct inode *inode = subreq->rreq->inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 objno, objoff;
+@@ -242,7 +242,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
+ static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq)
+ {
+ struct netfs_read_request *rreq = subreq->rreq;
+- struct inode *inode = rreq->mapping->host;
++ struct inode *inode = rreq->inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_request *req;
+diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
+index 8f537f1d9d1d3..ca92f2d2f3c76 100644
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -1624,6 +1624,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
+ struct inode *inode = &ci->vfs_inode;
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+ struct ceph_mds_session *session = NULL;
++ bool need_put = false;
+ int mds;
+
+ dout("ceph_flush_snaps %p\n", inode);
+@@ -1668,8 +1669,13 @@ out:
+ ceph_put_mds_session(session);
+ /* we flushed them all; remove this inode from the queue */
+ spin_lock(&mdsc->snap_flush_lock);
++ if (!list_empty(&ci->i_snap_flush_item))
++ need_put = true;
+ list_del_init(&ci->i_snap_flush_item);
+ spin_unlock(&mdsc->snap_flush_lock);
++
++ if (need_put)
++ iput(inode);
+ }
+
+ /*
+@@ -2240,33 +2246,29 @@ static int unsafe_request_wait(struct inode *inode)
+ * to wait the journal logs to be flushed by the MDSes periodically.
+ */
+ if (req1 || req2) {
+- struct ceph_mds_session **sessions = NULL;
+- struct ceph_mds_session *s;
+ struct ceph_mds_request *req;
+- unsigned int max;
++ struct ceph_mds_session **sessions;
++ struct ceph_mds_session *s;
++ unsigned int max_sessions;
+ int i;
+
+- /*
+- * The mdsc->max_sessions is unlikely to be changed
+- * mostly, here we will retry it by reallocating the
+- * sessions arrary memory to get rid of the mdsc->mutex
+- * lock.
+- */
+-retry:
+- max = mdsc->max_sessions;
+- sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
+- if (!sessions)
+- return -ENOMEM;
++ mutex_lock(&mdsc->mutex);
++ max_sessions = mdsc->max_sessions;
++
++ sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
++ if (!sessions) {
++ mutex_unlock(&mdsc->mutex);
++ err = -ENOMEM;
++ goto out;
++ }
+
+ spin_lock(&ci->i_unsafe_lock);
+ if (req1) {
+ list_for_each_entry(req, &ci->i_unsafe_dirops,
+ r_unsafe_dir_item) {
+ s = req->r_session;
+- if (unlikely(s->s_mds >= max)) {
+- spin_unlock(&ci->i_unsafe_lock);
+- goto retry;
+- }
++ if (!s)
++ continue;
+ if (!sessions[s->s_mds]) {
+ s = ceph_get_mds_session(s);
+ sessions[s->s_mds] = s;
+@@ -2277,10 +2279,8 @@ retry:
+ list_for_each_entry(req, &ci->i_unsafe_iops,
+ r_unsafe_target_item) {
+ s = req->r_session;
+- if (unlikely(s->s_mds >= max)) {
+- spin_unlock(&ci->i_unsafe_lock);
+- goto retry;
+- }
++ if (!s)
++ continue;
+ if (!sessions[s->s_mds]) {
+ s = ceph_get_mds_session(s);
+ sessions[s->s_mds] = s;
+@@ -2292,14 +2292,15 @@ retry:
+ /* the auth MDS */
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_auth_cap) {
+- s = ci->i_auth_cap->session;
+- if (!sessions[s->s_mds])
+- sessions[s->s_mds] = ceph_get_mds_session(s);
++ s = ci->i_auth_cap->session;
++ if (!sessions[s->s_mds])
++ sessions[s->s_mds] = ceph_get_mds_session(s);
+ }
+ spin_unlock(&ci->i_ceph_lock);
++ mutex_unlock(&mdsc->mutex);
+
+ /* send flush mdlog request to MDSes */
+- for (i = 0; i < max; i++) {
++ for (i = 0; i < max_sessions; i++) {
+ s = sessions[i];
+ if (s) {
+ send_flush_mdlog(s);
+@@ -2316,15 +2317,19 @@ retry:
+ ceph_timeout_jiffies(req1->r_timeout));
+ if (ret)
+ err = -EIO;
+- ceph_mdsc_put_request(req1);
+ }
+ if (req2) {
+ ret = !wait_for_completion_timeout(&req2->r_safe_completion,
+ ceph_timeout_jiffies(req2->r_timeout));
+ if (ret)
+ err = -EIO;
+- ceph_mdsc_put_request(req2);
+ }
++
++out:
++ if (req1)
++ ceph_mdsc_put_request(req1);
++ if (req2)
++ ceph_mdsc_put_request(req2);
+ return err;
+ }
+
+@@ -2873,7 +2878,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
+
+ while (true) {
+ flags &= CEPH_FILE_MODE_MASK;
+- if (atomic_read(&fi->num_locks))
++ if (vfs_inode_has_locks(inode))
+ flags |= CHECK_FILELOCK;
+ _got = 0;
+ ret = try_get_cap_refs(inode, need, want, endoff,
+@@ -3512,6 +3517,15 @@ static void handle_cap_grant(struct inode *inode,
+ }
+ BUG_ON(cap->issued & ~cap->implemented);
+
++ /* don't let check_caps skip sending a response to MDS for revoke msgs */
++ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
++ cap->mds_wanted = 0;
++ if (cap == ci->i_auth_cap)
++ check_caps = 1; /* check auth cap only */
++ else
++ check_caps = 2; /* check all caps */
++ }
++
+ if (extra_info->inline_version > 0 &&
+ extra_info->inline_version >= ci->i_inline_version) {
+ ci->i_inline_version = extra_info->inline_version;
+@@ -3520,24 +3534,23 @@ static void handle_cap_grant(struct inode *inode,
+ fill_inline = true;
+ }
+
+- if (ci->i_auth_cap == cap &&
+- le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
+- if (newcaps & ~extra_info->issued)
+- wake = true;
++ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
++ if (ci->i_auth_cap == cap) {
++ if (newcaps & ~extra_info->issued)
++ wake = true;
+
+- if (ci->i_requested_max_size > max_size ||
+- !(le32_to_cpu(grant->wanted) & CEPH_CAP_ANY_FILE_WR)) {
+- /* re-request max_size if necessary */
+- ci->i_requested_max_size = 0;
+- wake = true;
+- }
++ if (ci->i_requested_max_size > max_size ||
++ !(le32_to_cpu(grant->wanted) & CEPH_CAP_ANY_FILE_WR)) {
++ /* re-request max_size if necessary */
++ ci->i_requested_max_size = 0;
++ wake = true;
++ }
+
+- ceph_kick_flushing_inode_caps(session, ci);
+- spin_unlock(&ci->i_ceph_lock);
++ ceph_kick_flushing_inode_caps(session, ci);
++ }
+ up_read(&session->s_mdsc->snap_rwsem);
+- } else {
+- spin_unlock(&ci->i_ceph_lock);
+ }
++ spin_unlock(&ci->i_ceph_lock);
+
+ if (fill_inline)
+ ceph_fill_inline_data(inode, NULL, extra_info->inline_data,
+@@ -4349,7 +4362,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
+ {
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
+ int bits = (fmode << 1) | 1;
+- bool is_opened = false;
++ bool already_opened = false;
+ int i;
+
+ if (count == 1)
+@@ -4357,19 +4370,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
+
+ spin_lock(&ci->i_ceph_lock);
+ for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
+- if (bits & (1 << i))
+- ci->i_nr_by_mode[i] += count;
+-
+ /*
+- * If any of the mode ref is larger than 1,
++ * If any of the mode ref is larger than 0,
+ * that means it has been already opened by
+ * others. Just skip checking the PIN ref.
+ */
+- if (i && ci->i_nr_by_mode[i] > 1)
+- is_opened = true;
++ if (i && ci->i_nr_by_mode[i])
++ already_opened = true;
++
++ if (bits & (1 << i))
++ ci->i_nr_by_mode[i] += count;
+ }
+
+- if (!is_opened)
++ if (!already_opened)
+ percpu_counter_inc(&mdsc->metric.opened_inodes);
+ spin_unlock(&ci->i_ceph_lock);
+ }
+diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
+index 133dbd9338e73..d91fa53e12b33 100644
+--- a/fs/ceph/dir.c
++++ b/fs/ceph/dir.c
+@@ -478,8 +478,11 @@ more:
+ 2 : (fpos_off(rde->offset) + 1);
+ err = note_last_dentry(dfi, rde->name, rde->name_len,
+ next_offset);
+- if (err)
++ if (err) {
++ ceph_mdsc_put_request(dfi->last_readdir);
++ dfi->last_readdir = NULL;
+ return err;
++ }
+ } else if (req->r_reply_info.dir_end) {
+ dfi->next_offset = 2;
+ /* keep last name */
+@@ -520,6 +523,12 @@ more:
+ if (!dir_emit(ctx, rde->name, rde->name_len,
+ ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
+ le32_to_cpu(rde->inode.in->mode) >> 12)) {
++ /*
++ * NOTE: Here no need to put the 'dfi->last_readdir',
++ * because when dir_emit stops us it's most likely
++ * doesn't have enough memory, etc. So for next readdir
++ * it will continue.
++ */
+ dout("filldir stopping us...\n");
+ return 0;
+ }
+diff --git a/fs/ceph/file.c b/fs/ceph/file.c
+index e61018d9764ee..cb87714fe8861 100644
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -577,6 +577,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
+ struct ceph_inode_info *ci = ceph_inode(dir);
+ struct inode *inode;
+ struct timespec64 now;
++ struct ceph_string *pool_ns;
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+ struct ceph_vino vino = { .ino = req->r_deleg_ino,
+ .snap = CEPH_NOSNAP };
+@@ -591,9 +592,15 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
+ iinfo.change_attr = 1;
+ ceph_encode_timespec64(&iinfo.btime, &now);
+
+- iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
+- iinfo.xattr_data = xattr_buf;
+- memset(iinfo.xattr_data, 0, iinfo.xattr_len);
++ if (req->r_pagelist) {
++ iinfo.xattr_len = req->r_pagelist->length;
++ iinfo.xattr_data = req->r_pagelist->mapped_tail;
++ } else {
++ /* fake it */
++ iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
++ iinfo.xattr_data = xattr_buf;
++ memset(iinfo.xattr_data, 0, iinfo.xattr_len);
++ }
+
+ in.ino = cpu_to_le64(vino.ino);
+ in.snapid = cpu_to_le64(CEPH_NOSNAP);
+@@ -603,17 +610,35 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
+ in.cap.realm = cpu_to_le64(ci->i_snap_realm->ino);
+ in.cap.flags = CEPH_CAP_FLAG_AUTH;
+ in.ctime = in.mtime = in.atime = iinfo.btime;
+- in.mode = cpu_to_le32((u32)mode);
+ in.truncate_seq = cpu_to_le32(1);
+ in.truncate_size = cpu_to_le64(-1ULL);
+ in.xattr_version = cpu_to_le64(1);
+ in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid()));
+- in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_mode & S_ISGID ?
+- dir->i_gid : current_fsgid()));
++ if (dir->i_mode & S_ISGID) {
++ in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid));
++
++ /* Directories always inherit the setgid bit. */
++ if (S_ISDIR(mode))
++ mode |= S_ISGID;
++ else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
++ !in_group_p(dir->i_gid) &&
++ !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID))
++ mode &= ~S_ISGID;
++ } else {
++ in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid()));
++ }
++ in.mode = cpu_to_le32((u32)mode);
++
+ in.nlink = cpu_to_le32(1);
+ in.max_size = cpu_to_le64(lo->stripe_unit);
+
+ ceph_file_layout_to_legacy(lo, &in.layout);
++ /* lo is private, so pool_ns can't change */
++ pool_ns = rcu_dereference_raw(lo->pool_ns);
++ if (pool_ns) {
++ iinfo.pool_ns_len = pool_ns->len;
++ iinfo.pool_ns_data = pool_ns->str;
++ }
+
+ down_read(&mdsc->snap_rwsem);
+ ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
+@@ -678,6 +703,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+ if (dentry->d_name.len > NAME_MAX)
+ return -ENAMETOOLONG;
+
++ /*
++ * Do not truncate the file, since atomic_open is called before the
++ * permission check. The caller will do the truncation afterward.
++ */
++ flags &= ~O_TRUNC;
++
+ if (flags & O_CREAT) {
+ if (ceph_quota_is_max_files_exceeded(dir))
+ return -EDQUOT;
+@@ -687,6 +718,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+ err = ceph_security_init_secctx(dentry, mode, &as_ctx);
+ if (err < 0)
+ goto out_ctx;
++ /* Async create can't handle more than a page of xattrs */
++ if (as_ctx.pagelist &&
++ !list_is_singular(&as_ctx.pagelist->head))
++ try_async = false;
+ } else if (!d_in_lookup(dentry)) {
+ /* If it's not being looked up, it's negative */
+ return -ENOENT;
+@@ -732,16 +767,16 @@ retry:
+ restore_deleg_ino(dir, req->r_deleg_ino);
+ ceph_mdsc_put_request(req);
+ try_async = false;
++ ceph_put_string(rcu_dereference_raw(lo.pool_ns));
+ goto retry;
+ }
++ ceph_put_string(rcu_dereference_raw(lo.pool_ns));
+ goto out_req;
+ }
+ }
+
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+- err = ceph_mdsc_do_request(mdsc,
+- (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
+- req);
++ err = ceph_mdsc_do_request(mdsc, (flags & O_CREAT) ? dir : NULL, req);
+ if (err == -ENOENT) {
+ dentry = ceph_handle_snapdir(req, dentry);
+ if (IS_ERR(dentry)) {
+@@ -2049,6 +2084,9 @@ static long ceph_fallocate(struct file *file, int mode,
+ loff_t endoff = 0;
+ loff_t size;
+
++ dout("%s %p %llx.%llx mode %x, offset %llu length %llu\n", __func__,
++ inode, ceph_vinop(inode), mode, offset, length);
++
+ if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
+@@ -2089,6 +2127,10 @@ static long ceph_fallocate(struct file *file, int mode,
+ if (ret < 0)
+ goto unlock;
+
++ ret = file_modified(file);
++ if (ret)
++ goto put_caps;
++
+ filemap_invalidate_lock(inode->i_mapping);
+ ceph_zero_pagecache_range(inode, offset, length);
+ ret = ceph_zero_objects(inode, offset, length);
+@@ -2104,6 +2146,7 @@ static long ceph_fallocate(struct file *file, int mode,
+ }
+ filemap_invalidate_unlock(inode->i_mapping);
+
++put_caps:
+ ceph_put_cap_refs(ci, got);
+ unlock:
+ inode_unlock(inode);
+diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
+index 1c7574105478f..42e449d3f18b8 100644
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -87,13 +87,13 @@ struct inode *ceph_get_snapdir(struct inode *parent)
+ if (!S_ISDIR(parent->i_mode)) {
+ pr_warn_once("bad snapdir parent type (mode=0%o)\n",
+ parent->i_mode);
+- return ERR_PTR(-ENOTDIR);
++ goto err;
+ }
+
+ if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) {
+ pr_warn_once("bad snapdir inode type (mode=0%o)\n",
+ inode->i_mode);
+- return ERR_PTR(-ENOTDIR);
++ goto err;
+ }
+
+ inode->i_mode = parent->i_mode;
+@@ -113,6 +113,12 @@ struct inode *ceph_get_snapdir(struct inode *parent)
+ }
+
+ return inode;
++err:
++ if ((inode->i_state & I_NEW))
++ discard_new_inode(inode);
++ else
++ iput(inode);
++ return ERR_PTR(-ENOTDIR);
+ }
+
+ const struct inode_operations ceph_file_iops = {
+diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
+index bdeb271f47d95..3e3b8be76b21e 100644
+--- a/fs/ceph/locks.c
++++ b/fs/ceph/locks.c
+@@ -32,18 +32,14 @@ void __init ceph_flock_init(void)
+
+ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
+ {
+- struct ceph_file_info *fi = dst->fl_file->private_data;
+ struct inode *inode = file_inode(dst->fl_file);
+ atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+- atomic_inc(&fi->num_locks);
+ }
+
+ static void ceph_fl_release_lock(struct file_lock *fl)
+ {
+- struct ceph_file_info *fi = fl->fl_file->private_data;
+ struct inode *inode = file_inode(fl->fl_file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+- atomic_dec(&fi->num_locks);
+ if (atomic_dec_and_test(&ci->i_filelock_ref)) {
+ /* clear error when all locks are released */
+ spin_lock(&ci->i_ceph_lock);
+diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
+index d64413adc0fd2..a0b6ae02a70b8 100644
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -1196,14 +1196,17 @@ static int encode_supported_features(void **p, void *end)
+ if (count > 0) {
+ size_t i;
+ size_t size = FEATURE_BYTES(count);
++ unsigned long bit;
+
+ if (WARN_ON_ONCE(*p + 4 + size > end))
+ return -ERANGE;
+
+ ceph_encode_32(p, size);
+ memset(*p, 0, size);
+- for (i = 0; i < count; i++)
+- ((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8);
++ for (i = 0; i < count; i++) {
++ bit = feature_bits[i];
++ ((unsigned char *)(*p))[bit / 8] |= BIT(bit % 8);
++ }
+ *p += size;
+ } else {
+ if (WARN_ON_ONCE(*p + 4 > end))
+@@ -3540,6 +3543,12 @@ static void handle_session(struct ceph_mds_session *session,
+ break;
+
+ case CEPH_SESSION_FLUSHMSG:
++ /* flush cap releases */
++ spin_lock(&session->s_cap_lock);
++ if (session->s_num_cap_releases)
++ ceph_flush_cap_releases(mdsc, session);
++ spin_unlock(&session->s_cap_lock);
++
+ send_flushmsg_ack(mdsc, session, seq);
+ break;
+
+@@ -3772,7 +3781,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
+ struct ceph_pagelist *pagelist = recon_state->pagelist;
+ struct dentry *dentry;
+ char *path;
+- int pathlen, err;
++ int pathlen = 0, err;
+ u64 pathbase;
+ u64 snap_follows;
+
+@@ -3792,7 +3801,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
+ }
+ } else {
+ path = NULL;
+- pathlen = 0;
+ pathbase = 0;
+ }
+
+@@ -4599,7 +4607,7 @@ static void delayed_work(struct work_struct *work)
+
+ dout("mdsc delayed_work\n");
+
+- if (mdsc->stopping)
++ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
+ return;
+
+ mutex_lock(&mdsc->mutex);
+@@ -4778,7 +4786,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
+ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
+ {
+ dout("pre_umount\n");
+- mdsc->stopping = 1;
++ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
+
+ ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
+ ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
+@@ -4795,15 +4803,17 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
+ }
+
+ /*
+- * wait for all write mds requests to flush.
++ * flush the mdlog and wait for all write mds requests to flush.
+ */
+-static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
++static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc,
++ u64 want_tid)
+ {
+ struct ceph_mds_request *req = NULL, *nextreq;
++ struct ceph_mds_session *last_session = NULL;
+ struct rb_node *n;
+
+ mutex_lock(&mdsc->mutex);
+- dout("wait_unsafe_requests want %lld\n", want_tid);
++ dout("%s want %lld\n", __func__, want_tid);
+ restart:
+ req = __get_oldest_req(mdsc);
+ while (req && req->r_tid <= want_tid) {
+@@ -4815,14 +4825,32 @@ restart:
+ nextreq = NULL;
+ if (req->r_op != CEPH_MDS_OP_SETFILELOCK &&
+ (req->r_op & CEPH_MDS_OP_WRITE)) {
++ struct ceph_mds_session *s = req->r_session;
++
++ if (!s) {
++ req = nextreq;
++ continue;
++ }
++
+ /* write op */
+ ceph_mdsc_get_request(req);
+ if (nextreq)
+ ceph_mdsc_get_request(nextreq);
++ s = ceph_get_mds_session(s);
+ mutex_unlock(&mdsc->mutex);
+- dout("wait_unsafe_requests wait on %llu (want %llu)\n",
++
++ /* send flush mdlog request to MDS */
++ if (last_session != s) {
++ send_flush_mdlog(s);
++ ceph_put_mds_session(last_session);
++ last_session = s;
++ } else {
++ ceph_put_mds_session(s);
++ }
++ dout("%s wait on %llu (want %llu)\n", __func__,
+ req->r_tid, want_tid);
+ wait_for_completion(&req->r_safe_completion);
++
+ mutex_lock(&mdsc->mutex);
+ ceph_mdsc_put_request(req);
+ if (!nextreq)
+@@ -4837,7 +4865,8 @@ restart:
+ req = nextreq;
+ }
+ mutex_unlock(&mdsc->mutex);
+- dout("wait_unsafe_requests done\n");
++ ceph_put_mds_session(last_session);
++ dout("%s done\n", __func__);
+ }
+
+ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
+@@ -4866,7 +4895,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
+ dout("sync want tid %lld flush_seq %lld\n",
+ want_tid, want_flush);
+
+- wait_unsafe_requests(mdsc, want_tid);
++ flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid);
+ wait_caps_flush(mdsc, want_flush);
+ }
+
+diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
+index 97c7f7bfa55f3..cd943842f0a3c 100644
+--- a/fs/ceph/mds_client.h
++++ b/fs/ceph/mds_client.h
+@@ -33,10 +33,6 @@ enum ceph_feature_type {
+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
+ };
+
+-/*
+- * This will always have the highest feature bit value
+- * as the last element of the array.
+- */
+ #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
+ 0, 1, 2, 3, 4, 5, 6, 7, \
+ CEPHFS_FEATURE_MIMIC, \
+@@ -45,8 +41,6 @@ enum ceph_feature_type {
+ CEPHFS_FEATURE_MULTI_RECONNECT, \
+ CEPHFS_FEATURE_DELEG_INO, \
+ CEPHFS_FEATURE_METRIC_COLLECT, \
+- \
+- CEPHFS_FEATURE_MAX, \
+ }
+ #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+
+@@ -376,6 +370,11 @@ struct cap_wait {
+ int want;
+ };
+
++enum {
++ CEPH_MDSC_STOPPING_BEGIN = 1,
++ CEPH_MDSC_STOPPING_FLUSHED = 2,
++};
++
+ /*
+ * mds client state
+ */
+diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
+index 61d67cbcb3671..30387733765d5 100644
+--- a/fs/ceph/mdsmap.c
++++ b/fs/ceph/mdsmap.c
+@@ -263,10 +263,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
+ goto nomem;
+ for (j = 0; j < num_export_targets; j++) {
+ target = ceph_decode_32(&pexport_targets);
+- if (target >= m->possible_max_rank) {
+- err = -EIO;
+- goto corrupt;
+- }
+ info->export_targets[j] = target;
+ }
+ } else {
+diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
+index 04d5df29bbbfb..d21ff43543322 100644
+--- a/fs/ceph/metric.c
++++ b/fs/ceph/metric.c
+@@ -202,7 +202,7 @@ static void metric_delayed_work(struct work_struct *work)
+ struct ceph_mds_client *mdsc =
+ container_of(m, struct ceph_mds_client, metric);
+
+- if (mdsc->stopping)
++ if (mdsc->stopping || disable_send_metrics)
+ return;
+
+ if (!m->session || !check_session_state(m->session)) {
+diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
+index b41e6724c5910..bfa7e146f5262 100644
+--- a/fs/ceph/snap.c
++++ b/fs/ceph/snap.c
+@@ -657,8 +657,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
+ capsnap->size);
+
+ spin_lock(&mdsc->snap_flush_lock);
+- if (list_empty(&ci->i_snap_flush_item))
++ if (list_empty(&ci->i_snap_flush_item)) {
++ ihold(inode);
+ list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
++ }
+ spin_unlock(&mdsc->snap_flush_lock);
+ return 1; /* caller may want to ceph_flush_snaps */
+ }
+@@ -705,9 +707,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
+ struct ceph_mds_snap_realm *ri; /* encoded */
+ __le64 *snaps; /* encoded */
+ __le64 *prior_parent_snaps; /* encoded */
+- struct ceph_snap_realm *realm = NULL;
++ struct ceph_snap_realm *realm;
+ struct ceph_snap_realm *first_realm = NULL;
+- int invalidate = 0;
++ struct ceph_snap_realm *realm_to_rebuild = NULL;
++ int rebuild_snapcs;
+ int err = -ENOMEM;
+ LIST_HEAD(dirty_realms);
+
+@@ -715,6 +718,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
+
+ dout("update_snap_trace deletion=%d\n", deletion);
+ more:
++ realm = NULL;
++ rebuild_snapcs = 0;
+ ceph_decode_need(&p, e, sizeof(*ri), bad);
+ ri = p;
+ p += sizeof(*ri);
+@@ -738,7 +743,7 @@ more:
+ err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
+ if (err < 0)
+ goto fail;
+- invalidate += err;
++ rebuild_snapcs += err;
+
+ if (le64_to_cpu(ri->seq) > realm->seq) {
+ dout("update_snap_trace updating %llx %p %lld -> %lld\n",
+@@ -763,22 +768,30 @@ more:
+ if (realm->seq > mdsc->last_snap_seq)
+ mdsc->last_snap_seq = realm->seq;
+
+- invalidate = 1;
++ rebuild_snapcs = 1;
+ } else if (!realm->cached_context) {
+ dout("update_snap_trace %llx %p seq %lld new\n",
+ realm->ino, realm, realm->seq);
+- invalidate = 1;
++ rebuild_snapcs = 1;
+ } else {
+ dout("update_snap_trace %llx %p seq %lld unchanged\n",
+ realm->ino, realm, realm->seq);
+ }
+
+- dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
+- realm, invalidate, p, e);
++ dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino,
++ realm, rebuild_snapcs, p, e);
++
++ /*
++ * this will always track the uppest parent realm from which
++ * we need to rebuild the snapshot contexts _downward_ in
++ * hierarchy.
++ */
++ if (rebuild_snapcs)
++ realm_to_rebuild = realm;
+
+- /* invalidate when we reach the _end_ (root) of the trace */
+- if (invalidate && p >= e)
+- rebuild_snap_realms(realm, &dirty_realms);
++ /* rebuild_snapcs when we reach the _end_ (root) of the trace */
++ if (realm_to_rebuild && p >= e)
++ rebuild_snap_realms(realm_to_rebuild, &dirty_realms);
+
+ if (!first_realm)
+ first_realm = realm;
+@@ -1017,6 +1030,19 @@ skip_inode:
+ continue;
+ adjust_snap_realm_parent(mdsc, child, realm->ino);
+ }
++ } else {
++ /*
++ * In the non-split case both 'num_split_inos' and
++ * 'num_split_realms' should be 0, making this a no-op.
++ * However the MDS happens to populate 'split_realms' list
++ * in one of the UPDATE op cases by mistake.
++ *
++ * Skip both lists just in case to ensure that 'p' is
++ * positioned at the start of realm info, as expected by
++ * ceph_update_snap_trace().
++ */
++ p += sizeof(u64) * num_split_inos;
++ p += sizeof(u64) * num_split_realms;
+ }
+
+ /*
+diff --git a/fs/ceph/super.c b/fs/ceph/super.c
+index fd8742bae8471..1723ec21cd470 100644
+--- a/fs/ceph/super.c
++++ b/fs/ceph/super.c
+@@ -52,8 +52,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
+ struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
+ struct ceph_mon_client *monc = &fsc->client->monc;
+ struct ceph_statfs st;
+- u64 fsid;
+- int err;
++ int i, err;
+ u64 data_pool;
+
+ if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
+@@ -99,12 +98,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
+ buf->f_namelen = NAME_MAX;
+
+ /* Must convert the fsid, for consistent values across arches */
++ buf->f_fsid.val[0] = 0;
+ mutex_lock(&monc->mutex);
+- fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^
+- le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1));
++ for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
++ buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
+ mutex_unlock(&monc->mutex);
+
+- buf->f_fsid = u64_to_fsid(fsid);
++ /* fold the fs_cluster_id into the upper bits */
++ buf->f_fsid.val[1] = monc->fs_cluster_id;
+
+ return 0;
+ }
+@@ -1226,6 +1227,16 @@ static void ceph_kill_sb(struct super_block *s)
+ ceph_mdsc_pre_umount(fsc->mdsc);
+ flush_fs_workqueues(fsc);
+
++ /*
++ * Though the kill_anon_super() will finally trigger the
++ * sync_filesystem() anyway, we still need to do it here
++ * and then bump the stage of shutdown to stop the work
++ * queue as earlier as possible.
++ */
++ sync_filesystem(s);
++
++ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
++
+ kill_anon_super(s);
+
+ fsc->client->extra_mon_dispatch = NULL;
+diff --git a/fs/ceph/super.h b/fs/ceph/super.h
+index 14f951cd5b61b..8c9021d0f8374 100644
+--- a/fs/ceph/super.h
++++ b/fs/ceph/super.h
+@@ -773,7 +773,6 @@ struct ceph_file_info {
+ struct list_head rw_contexts;
+
+ u32 filp_gen;
+- atomic_t num_locks;
+ };
+
+ struct ceph_dir_file_info {
+diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
+index 159a1ffa4f4b8..db288b4aee6d2 100644
+--- a/fs/ceph/xattr.c
++++ b/fs/ceph/xattr.c
+@@ -366,6 +366,14 @@ static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
+ }
+ #define XATTR_RSTAT_FIELD(_type, _name) \
+ XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
++#define XATTR_RSTAT_FIELD_UPDATABLE(_type, _name) \
++ { \
++ .name = CEPH_XATTR_NAME(_type, _name), \
++ .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
++ .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
++ .exists_cb = NULL, \
++ .flags = VXATTR_FLAG_RSTAT, \
++ }
+ #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
+ { \
+ .name = CEPH_XATTR_NAME2(_type, _name, _field), \
+@@ -404,7 +412,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
+ XATTR_RSTAT_FIELD(dir, rsubdirs),
+ XATTR_RSTAT_FIELD(dir, rsnaps),
+ XATTR_RSTAT_FIELD(dir, rbytes),
+- XATTR_RSTAT_FIELD(dir, rctime),
++ XATTR_RSTAT_FIELD_UPDATABLE(dir, rctime),
+ {
+ .name = "ceph.dir.pin",
+ .name_size = sizeof("ceph.dir.pin"),
+diff --git a/fs/char_dev.c b/fs/char_dev.c
+index ba0ded7842a77..3f667292608c0 100644
+--- a/fs/char_dev.c
++++ b/fs/char_dev.c
+@@ -547,7 +547,7 @@ int cdev_device_add(struct cdev *cdev, struct device *dev)
+ }
+
+ rc = device_add(dev);
+- if (rc)
++ if (rc && dev->devt)
+ cdev_del(cdev);
+
+ return rc;
+diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
+index de2c12bcfa4bc..e7501533c2ec9 100644
+--- a/fs/cifs/cifs_debug.c
++++ b/fs/cifs/cifs_debug.c
+@@ -173,7 +173,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
+
+ seq_puts(m, "# Version:1\n");
+ seq_puts(m, "# Format:\n");
+- seq_puts(m, "# <tree id> <persistent fid> <flags> <count> <pid> <uid>");
++ seq_puts(m, "# <tree id> <ses id> <persistent fid> <flags> <count> <pid> <uid>");
+ #ifdef CONFIG_CIFS_DEBUG2
+ seq_printf(m, " <filename> <mid>\n");
+ #else
+@@ -190,8 +190,9 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
+ cfile = list_entry(tmp2, struct cifsFileInfo,
+ tlist);
+ seq_printf(m,
+- "0x%x 0x%llx 0x%x %d %d %d %pd",
++ "0x%x 0x%llx 0x%llx 0x%x %d %d %d %pd",
+ tcon->tid,
++ ses->Suid,
+ cfile->fid.persistent_fid,
+ cfile->f_flags,
+ cfile->count,
+@@ -358,6 +359,8 @@ skip_rdma:
+ seq_printf(m, " signed");
+ if (server->posix_ext_supported)
+ seq_printf(m, " posix");
++ if (server->nosharesock)
++ seq_printf(m, " nosharesock");
+
+ if (server->rdma)
+ seq_printf(m, "\nRDMA ");
+@@ -412,12 +415,14 @@ skip_rdma:
+ from_kuid(&init_user_ns, ses->linux_uid),
+ from_kuid(&init_user_ns, ses->cred_uid));
+
++ spin_lock(&ses->chan_lock);
+ if (ses->chan_count > 1) {
+ seq_printf(m, "\n\n\tExtra Channels: %zu ",
+ ses->chan_count-1);
+ for (j = 1; j < ses->chan_count; j++)
+ cifs_dump_channel(m, j, &ses->chans[j]);
+ }
++ spin_unlock(&ses->chan_lock);
+
+ seq_puts(m, "\n\n\tShares: ");
+ j = 0;
+diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
+index 007427ba75e5f..020e71fe1454e 100644
+--- a/fs/cifs/cifs_dfs_ref.c
++++ b/fs/cifs/cifs_dfs_ref.c
+@@ -258,65 +258,23 @@ compose_mount_options_err:
+ goto compose_mount_options_out;
+ }
+
+-/**
+- * cifs_dfs_do_mount - mounts specified path using DFS full path
+- *
+- * Always pass down @fullpath to smb3_do_mount() so we can use the root server
+- * to perform failover in case we failed to connect to the first target in the
+- * referral.
+- *
+- * @mntpt: directory entry for the path we are trying to automount
+- * @cifs_sb: parent/root superblock
+- * @fullpath: full path in UNC format
+- */
+-static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt,
+- struct cifs_sb_info *cifs_sb,
+- const char *fullpath)
+-{
+- struct vfsmount *mnt;
+- char *mountdata;
+- char *devname;
+-
+- devname = kstrdup(fullpath, GFP_KERNEL);
+- if (!devname)
+- return ERR_PTR(-ENOMEM);
+-
+- convert_delimiter(devname, '/');
+-
+- /* TODO: change to call fs_context_for_mount(), fill in context directly, call fc_mount */
+-
+- /* See afs_mntpt_do_automount in fs/afs/mntpt.c for an example */
+-
+- /* strip first '\' from fullpath */
+- mountdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+- fullpath + 1, NULL, NULL);
+- if (IS_ERR(mountdata)) {
+- kfree(devname);
+- return (struct vfsmount *)mountdata;
+- }
+-
+- mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
+- kfree(mountdata);
+- kfree(devname);
+- return mnt;
+-}
+-
+ /*
+ * Create a vfsmount that we can automount
+ */
+-static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
++static struct vfsmount *cifs_dfs_do_automount(struct path *path)
+ {
+- struct cifs_sb_info *cifs_sb;
+- struct cifs_ses *ses;
+- struct cifs_tcon *tcon;
+- void *page;
+- char *full_path, *root_path;
+- unsigned int xid;
+ int rc;
++ struct dentry *mntpt = path->dentry;
++ struct fs_context *fc;
++ struct cifs_sb_info *cifs_sb;
++ void *page = NULL;
++ struct smb3_fs_context *ctx, *cur_ctx;
++ struct smb3_fs_context tmp;
++ char *full_path;
+ struct vfsmount *mnt;
+
+- cifs_dbg(FYI, "in %s\n", __func__);
+- BUG_ON(IS_ROOT(mntpt));
++ if (IS_ROOT(mntpt))
++ return ERR_PTR(-ESTALE);
+
+ /*
+ * The MSDFS spec states that paths in DFS referral requests and
+@@ -324,81 +282,48 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
+ * the double backslashes usually used in the UNC. This function
+ * gives us the latter, so we must adjust the result.
+ */
+- mnt = ERR_PTR(-ENOMEM);
+-
+ cifs_sb = CIFS_SB(mntpt->d_sb);
+- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) {
+- mnt = ERR_PTR(-EREMOTE);
+- goto cdda_exit;
+- }
++ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
++ return ERR_PTR(-EREMOTE);
++
++ cur_ctx = cifs_sb->ctx;
++
++ fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, mntpt);
++ if (IS_ERR(fc))
++ return ERR_CAST(fc);
++
++ ctx = smb3_fc2context(fc);
+
+ page = alloc_dentry_path();
+ /* always use tree name prefix */
+ full_path = build_path_from_dentry_optional_prefix(mntpt, page, true);
+ if (IS_ERR(full_path)) {
+ mnt = ERR_CAST(full_path);
+- goto free_full_path;
++ goto out;
+ }
+
+- convert_delimiter(full_path, '\\');
+-
++ convert_delimiter(full_path, '/');
+ cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path);
+
+- if (!cifs_sb_master_tlink(cifs_sb)) {
+- cifs_dbg(FYI, "%s: master tlink is NULL\n", __func__);
+- goto free_full_path;
+- }
+-
+- tcon = cifs_sb_master_tcon(cifs_sb);
+- if (!tcon) {
+- cifs_dbg(FYI, "%s: master tcon is NULL\n", __func__);
+- goto free_full_path;
+- }
+-
+- root_path = kstrdup(tcon->treeName, GFP_KERNEL);
+- if (!root_path) {
+- mnt = ERR_PTR(-ENOMEM);
+- goto free_full_path;
+- }
+- cifs_dbg(FYI, "%s: root path: %s\n", __func__, root_path);
+-
+- ses = tcon->ses;
+- xid = get_xid();
+-
+- /*
+- * If DFS root has been expired, then unconditionally fetch it again to
+- * refresh DFS referral cache.
+- */
+- rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+- root_path + 1, NULL, NULL);
+- if (!rc) {
+- rc = dfs_cache_find(xid, ses, cifs_sb->local_nls,
+- cifs_remap(cifs_sb), full_path + 1,
+- NULL, NULL);
+- }
+-
+- free_xid(xid);
++ tmp = *cur_ctx;
++ tmp.source = full_path;
++ tmp.UNC = tmp.prepath = NULL;
+
++ rc = smb3_fs_context_dup(ctx, &tmp);
+ if (rc) {
+ mnt = ERR_PTR(rc);
+- goto free_root_path;
++ goto out;
+ }
+- /*
+- * OK - we were able to get and cache a referral for @full_path.
+- *
+- * Now, pass it down to cifs_mount() and it will retry every available
+- * node server in case of failures - no need to do it here.
+- */
+- mnt = cifs_dfs_do_mount(mntpt, cifs_sb, full_path);
+- cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__,
+- full_path + 1, mnt);
+
+-free_root_path:
+- kfree(root_path);
+-free_full_path:
++ rc = smb3_parse_devname(full_path, ctx);
++ if (!rc)
++ mnt = fc_mount(fc);
++ else
++ mnt = ERR_PTR(rc);
++
++out:
++ put_fs_context(fc);
+ free_dentry_path(page);
+-cdda_exit:
+- cifs_dbg(FYI, "leaving %s\n" , __func__);
+ return mnt;
+ }
+
+@@ -409,9 +334,9 @@ struct vfsmount *cifs_dfs_d_automount(struct path *path)
+ {
+ struct vfsmount *newmnt;
+
+- cifs_dbg(FYI, "in %s\n", __func__);
++ cifs_dbg(FYI, "%s: %pd\n", __func__, path->dentry);
+
+- newmnt = cifs_dfs_do_automount(path->dentry);
++ newmnt = cifs_dfs_do_automount(path);
+ if (IS_ERR(newmnt)) {
+ cifs_dbg(FYI, "leaving %s [automount failed]\n" , __func__);
+ return newmnt;
+diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
+index f97407520ea15..013a4bd65280c 100644
+--- a/fs/cifs/cifs_fs_sb.h
++++ b/fs/cifs/cifs_fs_sb.h
+@@ -61,11 +61,6 @@ struct cifs_sb_info {
+ /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */
+ char *prepath;
+
+- /*
+- * Canonical DFS path initially provided by the mount call. We might connect to something
+- * different via DFS but we want to keep it to do failover properly.
+- */
+- char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */
+ /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */
+ uuid_t dfs_mount_id;
+ /*
+diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
+index ee3aab3dd4ac6..bf861fef2f0c3 100644
+--- a/fs/cifs/cifsacl.c
++++ b/fs/cifs/cifsacl.c
+@@ -949,6 +949,9 @@ static void populate_new_aces(char *nacl_base,
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += setup_special_mode_ACE(pnntace, nmode);
+ num_aces++;
++ pnntace = (struct cifs_ace *) (nacl_base + nsize);
++ nsize += setup_authusers_ACE(pnntace);
++ num_aces++;
+ goto set_size;
+ }
+
+@@ -1297,7 +1300,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
+
+ if (uid_valid(uid)) { /* chown */
+ uid_t id;
+- nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
++ nowner_sid_ptr = kzalloc(sizeof(struct cifs_sid),
+ GFP_KERNEL);
+ if (!nowner_sid_ptr) {
+ rc = -ENOMEM;
+@@ -1326,7 +1329,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
+ }
+ if (gid_valid(gid)) { /* chgrp */
+ gid_t id;
+- ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
++ ngroup_sid_ptr = kzalloc(sizeof(struct cifs_sid),
+ GFP_KERNEL);
+ if (!ngroup_sid_ptr) {
+ rc = -ENOMEM;
+@@ -1613,7 +1616,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
+ nsecdesclen = secdesclen;
+ if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
+ if (mode_from_sid)
+- nsecdesclen += sizeof(struct cifs_ace);
++ nsecdesclen += 2 * sizeof(struct cifs_ace);
+ else /* cifsacl */
+ nsecdesclen += 5 * sizeof(struct cifs_ace);
+ } else { /* chown */
+diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
+index 9fa930dfd78d6..b5ae209539ff1 100644
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -210,6 +210,9 @@ cifs_read_super(struct super_block *sb)
+ if (rc)
+ goto out_no_root;
+ /* tune readahead according to rsize if readahead size not set on mount */
++ if (cifs_sb->ctx->rsize == 0)
++ cifs_sb->ctx->rsize =
++ tcon->ses->server->ops->negotiate_rsize(tcon, cifs_sb->ctx);
+ if (cifs_sb->ctx->rasize)
+ sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE;
+ else
+@@ -254,26 +257,33 @@ static void cifs_kill_sb(struct super_block *sb)
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_tcon *tcon;
+ struct cached_fid *cfid;
++ struct rb_root *root = &cifs_sb->tlink_tree;
++ struct rb_node *node;
++ struct tcon_link *tlink;
+
+ /*
+ * We ned to release all dentries for the cached directories
+ * before we kill the sb.
+ */
+ if (cifs_sb->root) {
++ for (node = rb_first(root); node; node = rb_next(node)) {
++ tlink = rb_entry(node, struct tcon_link, tl_rbnode);
++ tcon = tlink_tcon(tlink);
++ if (IS_ERR(tcon))
++ continue;
++ cfid = &tcon->crfid;
++ mutex_lock(&cfid->fid_mutex);
++ if (cfid->dentry) {
++ dput(cfid->dentry);
++ cfid->dentry = NULL;
++ }
++ mutex_unlock(&cfid->fid_mutex);
++ }
++
++ /* finally release root dentry */
+ dput(cifs_sb->root);
+ cifs_sb->root = NULL;
+ }
+- tcon = cifs_sb_master_tcon(cifs_sb);
+- if (tcon) {
+- cfid = &tcon->crfid;
+- mutex_lock(&cfid->fid_mutex);
+- if (cfid->dentry) {
+-
+- dput(cfid->dentry);
+- cfid->dentry = NULL;
+- }
+- mutex_unlock(&cfid->fid_mutex);
+- }
+
+ kill_anon_super(sb);
+ cifs_umount(cifs_sb);
+@@ -646,9 +656,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
+ seq_printf(s, ",echo_interval=%lu",
+ tcon->ses->server->echo_interval / HZ);
+
+- /* Only display max_credits if it was overridden on mount */
++ /* Only display the following if overridden on mount */
+ if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE)
+ seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits);
++ if (tcon->ses->server->tcp_nodelay)
++ seq_puts(s, ",tcpnodelay");
++ if (tcon->ses->server->noautotune)
++ seq_puts(s, ",noautotune");
++ if (tcon->ses->server->noblocksnd)
++ seq_puts(s, ",noblocksend");
+
+ if (tcon->snapshot_time)
+ seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
+@@ -666,6 +682,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
+ seq_printf(s, ",acdirmax=%lu", cifs_sb->ctx->acdirmax / HZ);
+ seq_printf(s, ",acregmax=%lu", cifs_sb->ctx->acregmax / HZ);
+ }
++ seq_printf(s, ",closetimeo=%lu", cifs_sb->ctx->closetimeo / HZ);
+
+ if (tcon->ses->chan_max > 1)
+ seq_printf(s, ",multichannel,max_channels=%zu",
+@@ -698,6 +715,7 @@ static void cifs_umount_begin(struct super_block *sb)
+ tcon->tidStatus = CifsExiting;
+ spin_unlock(&cifs_tcp_ses_lock);
+
++ cifs_close_all_deferred_files(tcon);
+ /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
+ /* cancel_notify_requests(tcon); */
+ if (tcon->ses && tcon->ses->server) {
+@@ -713,6 +731,20 @@ static void cifs_umount_begin(struct super_block *sb)
+ return;
+ }
+
++static int cifs_freeze(struct super_block *sb)
++{
++ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
++ struct cifs_tcon *tcon;
++
++ if (cifs_sb == NULL)
++ return 0;
++
++ tcon = cifs_sb_master_tcon(cifs_sb);
++
++ cifs_close_all_deferred_files(tcon);
++ return 0;
++}
++
+ #ifdef CONFIG_CIFS_STATS2
+ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
+ {
+@@ -744,6 +776,7 @@ static const struct super_operations cifs_super_ops = {
+ as opens */
+ .show_options = cifs_show_options,
+ .umount_begin = cifs_umount_begin,
++ .freeze_fs = cifs_freeze,
+ #ifdef CONFIG_CIFS_STATS2
+ .show_stats = cifs_show_stats,
+ #endif
+@@ -815,11 +848,11 @@ struct dentry *
+ cifs_smb3_do_mount(struct file_system_type *fs_type,
+ int flags, struct smb3_fs_context *old_ctx)
+ {
+- int rc;
+- struct super_block *sb;
+- struct cifs_sb_info *cifs_sb = NULL;
+ struct cifs_mnt_data mnt_data;
++ struct cifs_sb_info *cifs_sb;
++ struct super_block *sb;
+ struct dentry *root;
++ int rc;
+
+ /*
+ * Prints in Kernel / CIFS log the attempted mount operation
+@@ -830,11 +863,9 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
+ else
+ cifs_info("Attempting to mount %s\n", old_ctx->UNC);
+
+- cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
+- if (cifs_sb == NULL) {
+- root = ERR_PTR(-ENOMEM);
+- goto out;
+- }
++ cifs_sb = kzalloc(sizeof(*cifs_sb), GFP_KERNEL);
++ if (!cifs_sb)
++ return ERR_PTR(-ENOMEM);
+
+ cifs_sb->ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
+ if (!cifs_sb->ctx) {
+@@ -877,10 +908,8 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
+
+ sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data);
+ if (IS_ERR(sb)) {
+- root = ERR_CAST(sb);
+ cifs_umount(cifs_sb);
+- cifs_sb = NULL;
+- goto out;
++ return ERR_CAST(sb);
+ }
+
+ if (sb->s_root) {
+@@ -909,12 +938,11 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
+
+ out_super:
+ deactivate_locked_super(sb);
++ return root;
+ out:
+- if (cifs_sb) {
+- kfree(cifs_sb->prepath);
+- smb3_cleanup_fs_context(cifs_sb->ctx);
+- kfree(cifs_sb);
+- }
++ kfree(cifs_sb->prepath);
++ smb3_cleanup_fs_context(cifs_sb->ctx);
++ kfree(cifs_sb);
+ return root;
+ }
+
+@@ -925,7 +953,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+ ssize_t rc;
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+- if (iocb->ki_filp->f_flags & O_DIRECT)
++ if (iocb->ki_flags & IOCB_DIRECT)
+ return cifs_user_readv(iocb, iter);
+
+ rc = cifs_revalidate_mapping(inode);
+@@ -1061,7 +1089,7 @@ struct file_system_type cifs_fs_type = {
+ };
+ MODULE_ALIAS_FS("cifs");
+
+-static struct file_system_type smb3_fs_type = {
++struct file_system_type smb3_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "smb3",
+ .init_fs_context = smb3_init_fs_context,
+@@ -1250,8 +1278,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
+ ssize_t rc;
+ struct cifsFileInfo *cfile = dst_file->private_data;
+
+- if (cfile->swapfile)
+- return -EOPNOTSUPP;
++ if (cfile->swapfile) {
++ rc = -EOPNOTSUPP;
++ free_xid(xid);
++ return rc;
++ }
+
+ rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
+ len, flags);
+diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
+index b50da1901ebd2..f7c91a3eb9a46 100644
+--- a/fs/cifs/cifsfs.h
++++ b/fs/cifs/cifsfs.h
+@@ -38,7 +38,7 @@ static inline unsigned long cifs_get_time(struct dentry *dentry)
+ return (unsigned long) dentry->d_fsdata;
+ }
+
+-extern struct file_system_type cifs_fs_type;
++extern struct file_system_type cifs_fs_type, smb3_fs_type;
+ extern const struct address_space_operations cifs_addr_ops;
+ extern const struct address_space_operations cifs_addr_ops_smallbuf;
+
+@@ -118,7 +118,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops;
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
+ #else
+-#define cifs_dfs_d_automount NULL
++static inline struct vfsmount *cifs_dfs_d_automount(struct path *path)
++{
++ return ERR_PTR(-EREMOTE);
++}
+ #endif
+
+ /* Functions related to symlinks */
+diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
+index e916470468ea9..2ee67a27020d9 100644
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -13,6 +13,8 @@
+ #include <linux/in6.h>
+ #include <linux/inet.h>
+ #include <linux/slab.h>
++#include <linux/scatterlist.h>
++#include <linux/mm.h>
+ #include <linux/mempool.h>
+ #include <linux/workqueue.h>
+ #include "cifs_fs_sb.h"
+@@ -21,6 +23,7 @@
+ #include <linux/scatterlist.h>
+ #include <uapi/linux/cifs/cifs_mount.h>
+ #include "smb2pdu.h"
++#include "smb2glob.h"
+
+ #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
+
+@@ -74,7 +77,8 @@
+ #define SMB_ECHO_INTERVAL_MAX 600
+ #define SMB_ECHO_INTERVAL_DEFAULT 60
+
+-/* dns resolution interval in seconds */
++/* dns resolution intervals in seconds */
++#define SMB_DNS_RESOLVE_INTERVAL_MIN 120
+ #define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600
+
+ /* maximum number of PDUs in one compound */
+@@ -390,8 +394,8 @@ struct smb_version_operations {
+ /* check for STATUS_NETWORK_SESSION_EXPIRED */
+ bool (*is_session_expired)(char *);
+ /* send oplock break response */
+- int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
+- struct cifsInodeInfo *);
++ int (*oplock_response)(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid,
++ __u16 net_fid, struct cifsInodeInfo *cifs_inode);
+ /* query remote filesystem */
+ int (*queryfs)(const unsigned int, struct cifs_tcon *,
+ struct cifs_sb_info *, struct kstatfs *);
+@@ -591,6 +595,7 @@ struct TCP_Server_Info {
+ struct list_head pending_mid_q;
+ bool noblocksnd; /* use blocking sendmsg */
+ bool noautotune; /* do not autotune send buf sizes */
++ bool nosharesock;
+ bool tcp_nodelay;
+ unsigned int credits; /* send no more requests at once */
+ unsigned int max_credits; /* can override large 32000 default at mnt */
+@@ -691,6 +696,19 @@ struct TCP_Server_Info {
+ #endif
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+ bool is_dfs_conn; /* if a dfs connection */
++ struct mutex refpath_lock; /* protects leaf_fullpath */
++ /*
++ * Canonical DFS full paths that were used to chase referrals in mount and reconnect.
++ *
++ * origin_fullpath: first or original referral path
++ * leaf_fullpath: last referral path (might be changed due to nested links in reconnect)
++ *
++ * current_fullpath: pointer to either origin_fullpath or leaf_fullpath
++ * NOTE: cannot be accessed outside cifs_reconnect() and smb2_reconnect()
++ *
++ * format: \\HOST\SHARE\[OPTIONAL PATH]
++ */
++ char *origin_fullpath, *leaf_fullpath, *current_fullpath;
+ #endif
+ };
+
+@@ -932,16 +950,21 @@ struct cifs_ses {
+ * iface_lock should be taken when accessing any of these fields
+ */
+ spinlock_t iface_lock;
++ /* ========= begin: protected by iface_lock ======== */
+ struct cifs_server_iface *iface_list;
+ size_t iface_count;
+ unsigned long iface_last_update; /* jiffies */
++ /* ========= end: protected by iface_lock ======== */
+
++ spinlock_t chan_lock;
++ /* ========= begin: protected by chan_lock ======== */
+ #define CIFS_MAX_CHANNELS 16
+ struct cifs_chan chans[CIFS_MAX_CHANNELS];
+ struct cifs_chan *binding_chan;
+ size_t chan_count;
+ size_t chan_max;
+ atomic_t chan_seq; /* round robin state */
++ /* ========= end: protected by chan_lock ======== */
+ };
+
+ /*
+@@ -1090,7 +1113,6 @@ struct cifs_tcon {
+ struct cached_fid crfid; /* Cached root fid */
+ /* BB add field for back pointer to sb struct(s)? */
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+- char *dfs_path; /* canonical DFS path */
+ struct list_head ulist; /* cache update list */
+ #endif
+ };
+@@ -1883,11 +1905,13 @@ extern mempool_t *cifs_mid_poolp;
+
+ /* Operations for different SMB versions */
+ #define SMB1_VERSION_STRING "1.0"
++#define SMB20_VERSION_STRING "2.0"
++#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ extern struct smb_version_operations smb1_operations;
+ extern struct smb_version_values smb1_values;
+-#define SMB20_VERSION_STRING "2.0"
+ extern struct smb_version_operations smb20_operations;
+ extern struct smb_version_values smb20_values;
++#endif /* CIFS_ALLOW_INSECURE_LEGACY */
+ #define SMB21_VERSION_STRING "2.1"
+ extern struct smb_version_operations smb21_operations;
+ extern struct smb_version_values smb21_values;
+@@ -1941,4 +1965,80 @@ static inline bool is_tcon_dfs(struct cifs_tcon *tcon)
+ tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT);
+ }
+
++static inline bool cifs_is_referral_server(struct cifs_tcon *tcon,
++ const struct dfs_info3_param *ref)
++{
++ /*
++ * Check if all targets are capable of handling DFS referrals as per
++ * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
++ */
++ return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER));
++}
++
++static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
++ int num_rqst,
++ const u8 *sig)
++{
++ unsigned int len, skip;
++ unsigned int nents = 0;
++ unsigned long addr;
++ int i, j;
++
++ /* Assumes the first rqst has a transform header as the first iov.
++ * I.e.
++ * rqst[0].rq_iov[0] is transform header
++ * rqst[0].rq_iov[1+] data to be encrypted/decrypted
++ * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
++ */
++ for (i = 0; i < num_rqst; i++) {
++ /*
++ * The first rqst has a transform header where the
++ * first 20 bytes are not part of the encrypted blob.
++ */
++ for (j = 0; j < rqst[i].rq_nvec; j++) {
++ struct kvec *iov = &rqst[i].rq_iov[j];
++
++ skip = (i == 0) && (j == 0) ? 20 : 0;
++ addr = (unsigned long)iov->iov_base + skip;
++ if (unlikely(is_vmalloc_addr((void *)addr))) {
++ len = iov->iov_len - skip;
++ nents += DIV_ROUND_UP(offset_in_page(addr) + len,
++ PAGE_SIZE);
++ } else {
++ nents++;
++ }
++ }
++ nents += rqst[i].rq_npages;
++ }
++ nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE);
++ return nents;
++}
++
++/* We can not use the normal sg_set_buf() as we will sometimes pass a
++ * stack object as buf.
++ */
++static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
++ const void *buf,
++ unsigned int buflen)
++{
++ unsigned long addr = (unsigned long)buf;
++ unsigned int off = offset_in_page(addr);
++
++ addr &= PAGE_MASK;
++ if (unlikely(is_vmalloc_addr((void *)addr))) {
++ do {
++ unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off);
++
++ sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off);
++
++ off = 0;
++ addr += PAGE_SIZE;
++ buflen -= len;
++ } while (buflen);
++ } else {
++ sg_set_page(sg++, virt_to_page(addr), buflen, off);
++ }
++ return sg;
++}
++
+ #endif /* _CIFS_GLOB_H */
+diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
+index d0f85b666662d..50844d51da5d9 100644
+--- a/fs/cifs/cifsproto.h
++++ b/fs/cifs/cifsproto.h
+@@ -590,8 +590,8 @@ int cifs_alloc_hash(const char *name, struct crypto_shash **shash,
+ struct sdesc **sdesc);
+ void cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc);
+
+-extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
+- unsigned int *len, unsigned int *offset);
++void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page,
++ unsigned int *len, unsigned int *offset);
+ struct cifs_chan *
+ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
+ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
+@@ -607,7 +607,7 @@ int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov,
+
+ struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server);
+ void cifs_put_tcp_super(struct super_block *sb);
+-int update_super_prepath(struct cifs_tcon *tcon, char *prefix);
++int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix);
+ char *extract_hostname(const char *unc);
+ char *extract_sharename(const char *unc);
+
+@@ -634,4 +634,7 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
+ return options;
+ }
+
++struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
++void cifs_put_tcon_super(struct super_block *sb);
++
+ #endif /* _CIFSPROTO_H */
+diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
+index 243d17696f061..6ca08e473a7e0 100644
+--- a/fs/cifs/cifssmb.c
++++ b/fs/cifs/cifssmb.c
+@@ -4751,8 +4751,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
+ return -ENODEV;
+
+ getDFSRetry:
+- rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB,
+- (void **) &pSMBr);
++ /*
++ * Use smb_init_no_reconnect() instead of smb_init() as
++ * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus
++ * causing an infinite recursion.
++ */
++ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc,
++ (void **)&pSMB, (void **)&pSMBr);
+ if (rc)
+ return rc;
+
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index c3b94c1e45913..a521c705b0d7a 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -61,6 +61,20 @@ extern bool disable_legacy_dialects;
+ /* Drop the connection to not overload the server */
+ #define NUM_STATUS_IO_TIMEOUT 5
+
++struct mount_ctx {
++ struct cifs_sb_info *cifs_sb;
++ struct smb3_fs_context *fs_ctx;
++ unsigned int xid;
++ struct TCP_Server_Info *server;
++ struct cifs_ses *ses;
++ struct cifs_tcon *tcon;
++#ifdef CONFIG_CIFS_DFS_UPCALL
++ struct cifs_ses *root_ses;
++ uuid_t mount_id;
++ char *origin_fullpath, *leaf_fullpath;
++#endif
++};
++
+ static int ip_connect(struct TCP_Server_Info *server);
+ static int generic_ip_connect(struct TCP_Server_Info *server);
+ static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
+@@ -115,7 +129,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
+ * To make sure we don't use the cached entry, retry 1s
+ * after expiry.
+ */
+- ttl = (expiry - now + 1);
++ ttl = max_t(unsigned long, expiry - now, SMB_DNS_RESOLVE_INTERVAL_MIN) + 1;
+ }
+ rc = !rc ? -1 : 0;
+
+@@ -148,131 +162,29 @@ static void cifs_resolve_server(struct work_struct *work)
+ mutex_unlock(&server->srv_mutex);
+ }
+
+-#ifdef CONFIG_CIFS_DFS_UPCALL
+-/* These functions must be called with server->srv_mutex held */
+-static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
+- struct cifs_sb_info *cifs_sb,
+- struct dfs_cache_tgt_list *tgt_list,
+- struct dfs_cache_tgt_iterator **tgt_it)
+-{
+- const char *name;
+- int rc;
+-
+- if (!cifs_sb || !cifs_sb->origin_fullpath)
+- return;
+-
+- if (!*tgt_it) {
+- *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
+- } else {
+- *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
+- if (!*tgt_it)
+- *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
+- }
+-
+- cifs_dbg(FYI, "%s: UNC: %s\n", __func__, cifs_sb->origin_fullpath);
+-
+- name = dfs_cache_get_tgt_name(*tgt_it);
+-
+- kfree(server->hostname);
+-
+- server->hostname = extract_hostname(name);
+- if (IS_ERR(server->hostname)) {
+- cifs_dbg(FYI,
+- "%s: failed to extract hostname from target: %ld\n",
+- __func__, PTR_ERR(server->hostname));
+- return;
+- }
+-
+- rc = reconn_set_ipaddr_from_hostname(server);
+- if (rc) {
+- cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+- __func__, rc);
+- }
+-}
+-
+-static inline int reconn_setup_dfs_targets(struct cifs_sb_info *cifs_sb,
+- struct dfs_cache_tgt_list *tl)
+-{
+- if (!cifs_sb->origin_fullpath)
+- return -EOPNOTSUPP;
+- return dfs_cache_noreq_find(cifs_sb->origin_fullpath + 1, NULL, tl);
+-}
+-#endif
+-
+-/*
+- * cifs tcp session reconnection
++/**
++ * Mark all sessions and tcons for reconnect.
+ *
+- * mark tcp session as reconnecting so temporarily locked
+- * mark all smb sessions as reconnecting for tcp session
+- * reconnect tcp session
+- * wake up waiters on reconnection? - (not needed currently)
++ * @server needs to be previously set to CifsNeedReconnect.
+ */
+-int
+-cifs_reconnect(struct TCP_Server_Info *server)
++static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server)
+ {
+- int rc = 0;
+ struct list_head *tmp, *tmp2;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+ struct mid_q_entry *mid_entry;
+ struct list_head retry_list;
+-#ifdef CONFIG_CIFS_DFS_UPCALL
+- struct super_block *sb = NULL;
+- struct cifs_sb_info *cifs_sb = NULL;
+- struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
+- struct dfs_cache_tgt_iterator *tgt_it = NULL;
+-#endif
+
+- spin_lock(&GlobalMid_Lock);
+- server->nr_targets = 1;
+-#ifdef CONFIG_CIFS_DFS_UPCALL
+- spin_unlock(&GlobalMid_Lock);
+- sb = cifs_get_tcp_super(server);
+- if (IS_ERR(sb)) {
+- rc = PTR_ERR(sb);
+- cifs_dbg(FYI, "%s: will not do DFS failover: rc = %d\n",
+- __func__, rc);
+- sb = NULL;
+- } else {
+- cifs_sb = CIFS_SB(sb);
+- rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list);
+- if (rc) {
+- cifs_sb = NULL;
+- if (rc != -EOPNOTSUPP) {
+- cifs_server_dbg(VFS, "%s: no target servers for DFS failover\n",
+- __func__);
+- }
+- } else {
+- server->nr_targets = dfs_cache_get_nr_tgts(&tgt_list);
+- }
+- }
+- cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__,
+- server->nr_targets);
+- spin_lock(&GlobalMid_Lock);
+-#endif
+- if (server->tcpStatus == CifsExiting) {
+- /* the demux thread will exit normally
+- next time through the loop */
+- spin_unlock(&GlobalMid_Lock);
+-#ifdef CONFIG_CIFS_DFS_UPCALL
+- dfs_cache_free_tgts(&tgt_list);
+- cifs_put_tcp_super(sb);
+-#endif
+- wake_up(&server->response_q);
+- return rc;
+- } else
+- server->tcpStatus = CifsNeedReconnect;
+- spin_unlock(&GlobalMid_Lock);
+ server->maxBuf = 0;
+ server->max_read = 0;
+
+ cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
+ trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
+-
+- /* before reconnecting the tcp session, mark the smb session (uid)
+- and the tid bad so they are not used until reconnected */
+- cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n",
+- __func__);
++ /*
++ * before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they
++ * are not used until reconnected.
++ */
++ cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__);
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &server->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+@@ -290,11 +202,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
+ cifs_dbg(FYI, "%s: tearing down socket\n", __func__);
+ mutex_lock(&server->srv_mutex);
+ if (server->ssocket) {
+- cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n",
+- server->ssocket->state, server->ssocket->flags);
++ cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state,
++ server->ssocket->flags);
+ kernel_sock_shutdown(server->ssocket, SHUT_WR);
+- cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n",
+- server->ssocket->state, server->ssocket->flags);
++ cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", server->ssocket->state,
++ server->ssocket->flags);
+ sock_release(server->ssocket);
+ server->ssocket = NULL;
+ }
+@@ -333,38 +245,48 @@ cifs_reconnect(struct TCP_Server_Info *server)
+ smbd_destroy(server);
+ mutex_unlock(&server->srv_mutex);
+ }
++}
++
++static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets)
++{
++ spin_lock(&GlobalMid_Lock);
++ server->nr_targets = num_targets;
++ if (server->tcpStatus == CifsExiting) {
++ /* the demux thread will exit normally next time through the loop */
++ spin_unlock(&GlobalMid_Lock);
++ wake_up(&server->response_q);
++ return false;
++ }
++ server->tcpStatus = CifsNeedReconnect;
++ spin_unlock(&GlobalMid_Lock);
++ return true;
++}
++
++/*
++ * cifs tcp session reconnection
++ *
++ * mark tcp session as reconnecting so temporarily locked
++ * mark all smb sessions as reconnecting for tcp session
++ * reconnect tcp session
++ * wake up waiters on reconnection? - (not needed currently)
++ */
++static int __cifs_reconnect(struct TCP_Server_Info *server)
++{
++ int rc = 0;
++
++ if (!cifs_tcp_ses_needs_reconnect(server, 1))
++ return 0;
++
++ cifs_mark_tcp_ses_conns_for_reconnect(server);
+
+ do {
+ try_to_freeze();
+-
+ mutex_lock(&server->srv_mutex);
+
+-
+ if (!cifs_swn_set_server_dstaddr(server)) {
+-#ifdef CONFIG_CIFS_DFS_UPCALL
+- if (cifs_sb && cifs_sb->origin_fullpath)
+- /*
+- * Set up next DFS target server (if any) for reconnect. If DFS
+- * feature is disabled, then we will retry last server we
+- * connected to before.
+- */
+- reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
+- else {
+-#endif
+- /*
+- * Resolve the hostname again to make sure that IP address is up-to-date.
+- */
++ /* resolve the hostname again to make sure that IP address is up-to-date */
+ rc = reconn_set_ipaddr_from_hostname(server);
+- if (rc) {
+- cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+- __func__, rc);
+- }
+-
+-#ifdef CONFIG_CIFS_DFS_UPCALL
+- }
+-#endif
+-
+-
++ cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc);
+ }
+
+ if (cifs_rdma_enabled(server))
+@@ -372,8 +294,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
+ else
+ rc = generic_ip_connect(server);
+ if (rc) {
+- cifs_dbg(FYI, "reconnect error %d\n", rc);
+ mutex_unlock(&server->srv_mutex);
++ cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
+ msleep(3000);
+ } else {
+ atomic_inc(&tcpSesReconnectCount);
+@@ -387,19 +309,128 @@ cifs_reconnect(struct TCP_Server_Info *server)
+ }
+ } while (server->tcpStatus == CifsNeedReconnect);
+
++ if (server->tcpStatus == CifsNeedNegotiate)
++ mod_delayed_work(cifsiod_wq, &server->echo, 0);
++
++ wake_up(&server->response_q);
++ return rc;
++}
++
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+- if (tgt_it) {
+- rc = dfs_cache_noreq_update_tgthint(cifs_sb->origin_fullpath + 1,
+- tgt_it);
+- if (rc) {
+- cifs_server_dbg(VFS, "%s: failed to update DFS target hint: rc = %d\n",
+- __func__, rc);
++static int __reconnect_target_unlocked(struct TCP_Server_Info *server, const char *target)
++{
++ int rc;
++ char *hostname;
++
++ if (!cifs_swn_set_server_dstaddr(server)) {
++ if (server->hostname != target) {
++ hostname = extract_hostname(target);
++ if (!IS_ERR(hostname)) {
++ kfree(server->hostname);
++ server->hostname = hostname;
++ } else {
++ cifs_dbg(FYI, "%s: couldn't extract hostname or address from dfs target: %ld\n",
++ __func__, PTR_ERR(hostname));
++ cifs_dbg(FYI, "%s: default to last target server: %s\n", __func__,
++ server->hostname);
++ }
++ }
++ /* resolve the hostname again to make sure that IP address is up-to-date. */
++ rc = reconn_set_ipaddr_from_hostname(server);
++ cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc);
++ }
++ /* Reconnect the socket */
++ if (cifs_rdma_enabled(server))
++ rc = smbd_reconnect(server);
++ else
++ rc = generic_ip_connect(server);
++
++ return rc;
++}
++
++static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_cache_tgt_list *tl,
++ struct dfs_cache_tgt_iterator **target_hint)
++{
++ int rc;
++ struct dfs_cache_tgt_iterator *tit;
++
++ *target_hint = NULL;
++
++ /* If dfs target list is empty, then reconnect to last server */
++ tit = dfs_cache_get_tgt_iterator(tl);
++ if (!tit)
++ return __reconnect_target_unlocked(server, server->hostname);
++
++ /* Otherwise, try every dfs target in @tl */
++ for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) {
++ rc = __reconnect_target_unlocked(server, dfs_cache_get_tgt_name(tit));
++ if (!rc) {
++ *target_hint = tit;
++ break;
+ }
+- dfs_cache_free_tgts(&tgt_list);
+ }
++ return rc;
++}
+
+- cifs_put_tcp_super(sb);
+-#endif
++static int reconnect_dfs_server(struct TCP_Server_Info *server)
++{
++ int rc = 0;
++ const char *refpath = server->current_fullpath + 1;
++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
++ struct dfs_cache_tgt_iterator *target_hint = NULL;
++ int num_targets = 0;
++
++ /*
++ * Determine the number of dfs targets the referral path in @cifs_sb resolves to.
++ *
++ * smb2_reconnect() needs to know how long it should wait based upon the number of dfs
++ * targets (server->nr_targets). It's also possible that the cached referral was cleared
++ * through /proc/fs/cifs/dfscache or the target list is empty due to server settings after
++ * refreshing the referral, so, in this case, default it to 1.
++ */
++ if (!dfs_cache_noreq_find(refpath, NULL, &tl))
++ num_targets = dfs_cache_get_nr_tgts(&tl);
++ if (!num_targets)
++ num_targets = 1;
++
++ if (!cifs_tcp_ses_needs_reconnect(server, num_targets))
++ return 0;
++
++ cifs_mark_tcp_ses_conns_for_reconnect(server);
++
++ do {
++ try_to_freeze();
++ mutex_lock(&server->srv_mutex);
++
++ rc = reconnect_target_unlocked(server, &tl, &target_hint);
++ if (rc) {
++ /* Failed to reconnect socket */
++ mutex_unlock(&server->srv_mutex);
++ cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
++ msleep(3000);
++ continue;
++ }
++ /*
++ * Socket was created. Update tcp session status to CifsNeedNegotiate so that a
++ * process waiting for reconnect will know it needs to re-establish session and tcon
++ * through the reconnected target server.
++ */
++ atomic_inc(&tcpSesReconnectCount);
++ set_credits(server, 1);
++ spin_lock(&GlobalMid_Lock);
++ if (server->tcpStatus != CifsExiting)
++ server->tcpStatus = CifsNeedNegotiate;
++ spin_unlock(&GlobalMid_Lock);
++ cifs_swn_reset_server_dstaddr(server);
++ mutex_unlock(&server->srv_mutex);
++ } while (server->tcpStatus == CifsNeedReconnect);
++
++ if (target_hint)
++ dfs_cache_noreq_update_tgthint(refpath, target_hint);
++
++ dfs_cache_free_tgts(&tl);
++
++ /* Need to set up echo worker again once connection has been established */
+ if (server->tcpStatus == CifsNeedNegotiate)
+ mod_delayed_work(cifsiod_wq, &server->echo, 0);
+
+@@ -407,6 +438,25 @@ cifs_reconnect(struct TCP_Server_Info *server)
+ return rc;
+ }
+
++int cifs_reconnect(struct TCP_Server_Info *server)
++{
++ /* If tcp session is not an dfs connection, then reconnect to last target server */
++ spin_lock(&cifs_tcp_ses_lock);
++ if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) {
++ spin_unlock(&cifs_tcp_ses_lock);
++ return __cifs_reconnect(server);
++ }
++ spin_unlock(&cifs_tcp_ses_lock);
++
++ return reconnect_dfs_server(server);
++}
++#else
++int cifs_reconnect(struct TCP_Server_Info *server)
++{
++ return __cifs_reconnect(server);
++}
++#endif
++
+ static void
+ cifs_echo_request(struct work_struct *work)
+ {
+@@ -519,9 +569,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
+ int length = 0;
+ int total_read;
+
+- smb_msg->msg_control = NULL;
+- smb_msg->msg_controllen = 0;
+-
+ for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
+ try_to_freeze();
+
+@@ -572,7 +619,7 @@ int
+ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
+ unsigned int to_read)
+ {
+- struct msghdr smb_msg;
++ struct msghdr smb_msg = {};
+ struct kvec iov = {.iov_base = buf, .iov_len = to_read};
+ iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
+
+@@ -582,15 +629,13 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
+ ssize_t
+ cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
+ {
+- struct msghdr smb_msg;
++ struct msghdr smb_msg = {};
+
+ /*
+ * iov_iter_discard already sets smb_msg.type and count and iov_offset
+ * and cifs_readv_from_socket sets msg_control and msg_controllen
+ * so little to initialize in struct msghdr
+ */
+- smb_msg.msg_name = NULL;
+- smb_msg.msg_namelen = 0;
+ iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
+
+ return cifs_readv_from_socket(server, &smb_msg);
+@@ -600,7 +645,7 @@ int
+ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
+ unsigned int page_offset, unsigned int to_read)
+ {
+- struct msghdr smb_msg;
++ struct msghdr smb_msg = {};
+ struct bio_vec bv = {
+ .bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
+ iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
+@@ -794,7 +839,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
+ */
+ }
+
+- kfree(server->hostname);
++#ifdef CONFIG_CIFS_DFS_UPCALL
++ kfree(server->origin_fullpath);
++ kfree(server->leaf_fullpath);
++#endif
+ kfree(server);
+
+ length = atomic_dec_return(&tcpSesAllocCount);
+@@ -1221,6 +1269,10 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *
+ if (ctx->nosharesock)
+ return 0;
+
++ /* this server does not share socket */
++ if (server->nosharesock)
++ return 0;
++
+ /* If multidialect negotiation see if existing sessions match one */
+ if (strcmp(ctx->vals->version_string, SMB3ANY_VERSION_STRING) == 0) {
+ if (server->vals->protocol_id < SMB30_PROT_ID)
+@@ -1235,6 +1287,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *
+ if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
+ return 0;
+
++ if (strcasecmp(server->hostname, ctx->server_hostname))
++ return 0;
++
+ if (!match_address(server, addr,
+ (struct sockaddr *)&ctx->srcaddr))
+ return 0;
+@@ -1336,6 +1391,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
+ kfree(server->session_key.response);
+ server->session_key.response = NULL;
+ server->session_key.len = 0;
++ kfree(server->hostname);
++ server->hostname = NULL;
+
+ task = xchg(&server->tsk, NULL);
+ if (task)
+@@ -1361,14 +1418,18 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
+ goto out_err;
+ }
+
++ tcp_ses->hostname = kstrdup(ctx->server_hostname, GFP_KERNEL);
++ if (!tcp_ses->hostname) {
++ rc = -ENOMEM;
++ goto out_err;
++ }
++
++ if (ctx->nosharesock)
++ tcp_ses->nosharesock = true;
++
+ tcp_ses->ops = ctx->ops;
+ tcp_ses->vals = ctx->vals;
+ cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
+- tcp_ses->hostname = extract_hostname(ctx->UNC);
+- if (IS_ERR(tcp_ses->hostname)) {
+- rc = PTR_ERR(tcp_ses->hostname);
+- goto out_err_crypto_release;
+- }
+
+ tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
+ tcp_ses->noblockcnt = ctx->rootfs;
+@@ -1399,6 +1460,9 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
+ INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server);
+ INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
+ mutex_init(&tcp_ses->reconnect_mutex);
++#ifdef CONFIG_CIFS_DFS_UPCALL
++ mutex_init(&tcp_ses->refpath_lock);
++#endif
+ memcpy(&tcp_ses->srcaddr, &ctx->srcaddr,
+ sizeof(tcp_ses->srcaddr));
+ memcpy(&tcp_ses->dstaddr, &ctx->dstaddr,
+@@ -1497,8 +1561,7 @@ out_err_crypto_release:
+
+ out_err:
+ if (tcp_ses) {
+- if (!IS_ERR(tcp_ses->hostname))
+- kfree(tcp_ses->hostname);
++ kfree(tcp_ses->hostname);
+ if (tcp_ses->ssocket)
+ sock_release(tcp_ses->ssocket);
+ kfree(tcp_ses);
+@@ -1516,8 +1579,12 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
+ * If an existing session is limited to less channels than
+ * requested, it should not be reused
+ */
+- if (ses->chan_max < ctx->max_channels)
++ spin_lock(&ses->chan_lock);
++ if (ses->chan_max < ctx->max_channels) {
++ spin_unlock(&ses->chan_lock);
+ return 0;
++ }
++ spin_unlock(&ses->chan_lock);
+
+ switch (ses->sectype) {
+ case Kerberos:
+@@ -1652,6 +1719,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
+ void cifs_put_smb_ses(struct cifs_ses *ses)
+ {
+ unsigned int rc, xid;
++ unsigned int chan_count;
+ struct TCP_Server_Info *server = ses->server;
+ cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
+
+@@ -1693,12 +1761,24 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
+ list_del_init(&ses->smb_ses_list);
+ spin_unlock(&cifs_tcp_ses_lock);
+
++ spin_lock(&ses->chan_lock);
++ chan_count = ses->chan_count;
++ spin_unlock(&ses->chan_lock);
++
+ /* close any extra channels */
+- if (ses->chan_count > 1) {
++ if (chan_count > 1) {
+ int i;
+
+- for (i = 1; i < ses->chan_count; i++)
++ for (i = 1; i < chan_count; i++) {
++ /*
++ * note: for now, we're okay accessing ses->chans
++ * without chan_lock. But when chans can go away, we'll
++ * need to introduce ref counting to make sure that chan
++ * is not freed from under us.
++ */
+ cifs_put_tcp_session(ses->chans[i].server, 0);
++ ses->chans[i].server = NULL;
++ }
+ }
+
+ sesInfoFree(ses);
+@@ -1868,7 +1948,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)),
+ struct cifs_ses *
+ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
+ {
+- int rc = -ENOMEM;
++ int rc = 0;
+ unsigned int xid;
+ struct cifs_ses *ses;
+ struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
+@@ -1910,6 +1990,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
+ return ses;
+ }
+
++ rc = -ENOMEM;
++
+ cifs_dbg(FYI, "Existing smb sess not found\n");
+ ses = sesInfoAlloc();
+ if (ses == NULL)
+@@ -1949,9 +2031,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
+ mutex_lock(&ses->session_mutex);
+
+ /* add server as first channel */
++ spin_lock(&ses->chan_lock);
+ ses->chans[0].server = server;
+ ses->chan_count = 1;
+ ses->chan_max = ctx->multichannel ? ctx->max_channels:1;
++ spin_unlock(&ses->chan_lock);
+
+ rc = cifs_negotiate_protocol(xid, ses);
+ if (!rc)
+@@ -2352,6 +2436,8 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
+ return 0;
+ if (old->ctx->acdirmax != new->ctx->acdirmax)
+ return 0;
++ if (old->ctx->closetimeo != new->ctx->closetimeo)
++ return 0;
+
+ return 1;
+ }
+@@ -2388,6 +2474,13 @@ cifs_match_super(struct super_block *sb, void *data)
+
+ spin_lock(&cifs_tcp_ses_lock);
+ cifs_sb = CIFS_SB(sb);
++
++ /* We do not want to use a superblock that has been shutdown */
++ if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) {
++ spin_unlock(&cifs_tcp_ses_lock);
++ return 0;
++ }
++
+ tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
+ if (tlink == NULL) {
+ /* can not match superblock if tlink were ever null */
+@@ -2845,73 +2938,64 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb)
+ }
+
+ /* Release all succeed connections */
+-static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
+- unsigned int xid,
+- struct TCP_Server_Info *server,
+- struct cifs_ses *ses, struct cifs_tcon *tcon)
++static inline void mount_put_conns(struct mount_ctx *mnt_ctx)
+ {
+ int rc = 0;
+
+- if (tcon)
+- cifs_put_tcon(tcon);
+- else if (ses)
+- cifs_put_smb_ses(ses);
+- else if (server)
+- cifs_put_tcp_session(server, 0);
+- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+- free_xid(xid);
++ if (mnt_ctx->tcon)
++ cifs_put_tcon(mnt_ctx->tcon);
++ else if (mnt_ctx->ses)
++ cifs_put_smb_ses(mnt_ctx->ses);
++ else if (mnt_ctx->server)
++ cifs_put_tcp_session(mnt_ctx->server, 0);
++ mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
++ free_xid(mnt_ctx->xid);
+ }
+
+ /* Get connections for tcp, ses and tcon */
+-static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
+- unsigned int *xid,
+- struct TCP_Server_Info **nserver,
+- struct cifs_ses **nses, struct cifs_tcon **ntcon)
++static int mount_get_conns(struct mount_ctx *mnt_ctx)
+ {
+ int rc = 0;
+- struct TCP_Server_Info *server;
+- struct cifs_ses *ses;
+- struct cifs_tcon *tcon;
+-
+- *nserver = NULL;
+- *nses = NULL;
+- *ntcon = NULL;
++ struct TCP_Server_Info *server = NULL;
++ struct cifs_ses *ses = NULL;
++ struct cifs_tcon *tcon = NULL;
++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ unsigned int xid;
+
+- *xid = get_xid();
++ xid = get_xid();
+
+ /* get a reference to a tcp session */
+ server = cifs_get_tcp_session(ctx);
+ if (IS_ERR(server)) {
+ rc = PTR_ERR(server);
+- return rc;
++ server = NULL;
++ goto out;
+ }
+
+- *nserver = server;
+-
+ /* get a reference to a SMB session */
+ ses = cifs_get_smb_ses(server, ctx);
+ if (IS_ERR(ses)) {
+ rc = PTR_ERR(ses);
+- return rc;
++ ses = NULL;
++ goto out;
+ }
+
+- *nses = ses;
+-
+ if ((ctx->persistent == true) && (!(ses->server->capabilities &
+ SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) {
+ cifs_server_dbg(VFS, "persistent handles not supported by server\n");
+- return -EOPNOTSUPP;
++ rc = -EOPNOTSUPP;
++ goto out;
+ }
+
+ /* search for existing tcon to this server share */
+ tcon = cifs_get_tcon(ses, ctx);
+ if (IS_ERR(tcon)) {
+ rc = PTR_ERR(tcon);
+- return rc;
++ tcon = NULL;
++ goto out;
+ }
+
+- *ntcon = tcon;
+-
+ /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+ if (tcon->posix_extensions)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+@@ -2922,17 +3006,19 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif
+ * reset of caps checks mount to see if unix extensions disabled
+ * for just this mount.
+ */
+- reset_cifs_unix_caps(*xid, tcon, cifs_sb, ctx);
++ reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx);
+ if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
+ (le64_to_cpu(tcon->fsUnixInfo.Capability) &
+- CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP))
+- return -EACCES;
++ CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
++ rc = -EACCES;
++ goto out;
++ }
+ } else
+ tcon->unix_ext = 0; /* server does not support them */
+
+ /* do not care if a following call succeed - informational */
+ if (!tcon->pipe && server->ops->qfs_tcon) {
+- server->ops->qfs_tcon(*xid, tcon, cifs_sb);
++ server->ops->qfs_tcon(xid, tcon, cifs_sb);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
+ if (tcon->fsDevInfo.DeviceCharacteristics &
+ cpu_to_le32(FILE_READ_ONLY_DEVICE))
+@@ -2956,7 +3042,13 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif
+ (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
+ cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
+
+- return 0;
++out:
++ mnt_ctx->server = server;
++ mnt_ctx->ses = ses;
++ mnt_ctx->tcon = tcon;
++ mnt_ctx->xid = xid;
++
++ return rc;
+ }
+
+ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+@@ -2986,18 +3078,17 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+ }
+
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+-static int mount_get_dfs_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
+- unsigned int *xid, struct TCP_Server_Info **nserver,
+- struct cifs_ses **nses, struct cifs_tcon **ntcon)
++/* Get unique dfs connections */
++static int mount_get_dfs_conns(struct mount_ctx *mnt_ctx)
+ {
+ int rc;
+
+- ctx->nosharesock = true;
+- rc = mount_get_conns(ctx, cifs_sb, xid, nserver, nses, ntcon);
+- if (*nserver) {
++ mnt_ctx->fs_ctx->nosharesock = true;
++ rc = mount_get_conns(mnt_ctx);
++ if (mnt_ctx->server) {
+ cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__);
+ spin_lock(&cifs_tcp_ses_lock);
+- (*nserver)->is_dfs_conn = true;
++ mnt_ctx->server->is_dfs_conn = true;
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+ return rc;
+@@ -3039,190 +3130,38 @@ build_unc_path_to_root(const struct smb3_fs_context *ctx,
+ }
+
+ /*
+- * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb
++ * expand_dfs_referral - Update cifs_sb from dfs referral path
+ *
+- * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated
+- * to a string containing updated options for the submount. Otherwise it
+- * will be left untouched.
+- *
+- * Returns the rc from get_dfs_path to the caller, which can be used to
+- * determine whether there were referrals.
++ * cifs_sb->ctx->mount_options will be (re-)allocated to a string containing updated options for the
++ * submount. Otherwise it will be left untouched.
+ */
+-static int
+-expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
+- struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
+- char *ref_path)
++static int expand_dfs_referral(struct mount_ctx *mnt_ctx, const char *full_path,
++ struct dfs_info3_param *referral)
+ {
+ int rc;
+- struct dfs_info3_param referral = {0};
+- char *full_path = NULL, *mdata = NULL;
+-
+- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+- return -EREMOTE;
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
++ char *fake_devname = NULL, *mdata = NULL;
++
++ mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, referral,
++ &fake_devname);
++ if (IS_ERR(mdata)) {
++ rc = PTR_ERR(mdata);
++ mdata = NULL;
++ } else {
++ /*
++ * We can not clear out the whole structure since we no longer have an explicit
++ * function to parse a mount-string. Instead we need to clear out the individual
++ * fields that are no longer valid.
++ */
++ kfree(ctx->prepath);
++ ctx->prepath = NULL;
++ rc = cifs_setup_volume_info(ctx, mdata, fake_devname);
++ }
++ kfree(fake_devname);
++ kfree(cifs_sb->ctx->mount_options);
++ cifs_sb->ctx->mount_options = mdata;
+
+- full_path = build_unc_path_to_root(ctx, cifs_sb, true);
+- if (IS_ERR(full_path))
+- return PTR_ERR(full_path);
+-
+- rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+- ref_path, &referral, NULL);
+- if (!rc) {
+- char *fake_devname = NULL;
+-
+- mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+- full_path + 1, &referral,
+- &fake_devname);
+- free_dfs_info_param(&referral);
+-
+- if (IS_ERR(mdata)) {
+- rc = PTR_ERR(mdata);
+- mdata = NULL;
+- } else {
+- /*
+- * We can not clear out the whole structure since we
+- * no longer have an explicit function to parse
+- * a mount-string. Instead we need to clear out the
+- * individual fields that are no longer valid.
+- */
+- kfree(ctx->prepath);
+- ctx->prepath = NULL;
+- rc = cifs_setup_volume_info(ctx, mdata, fake_devname);
+- }
+- kfree(fake_devname);
+- kfree(cifs_sb->ctx->mount_options);
+- cifs_sb->ctx->mount_options = mdata;
+- }
+- kfree(full_path);
+- return rc;
+-}
+-
+-static int get_next_dfs_tgt(struct dfs_cache_tgt_list *tgt_list,
+- struct dfs_cache_tgt_iterator **tgt_it)
+-{
+- if (!*tgt_it)
+- *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
+- else
+- *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
+- return !*tgt_it ? -EHOSTDOWN : 0;
+-}
+-
+-static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
+- struct smb3_fs_context *fake_ctx, struct smb3_fs_context *ctx)
+-{
+- const char *tgt = dfs_cache_get_tgt_name(tgt_it);
+- int len = strlen(tgt) + 2;
+- char *new_unc;
+-
+- new_unc = kmalloc(len, GFP_KERNEL);
+- if (!new_unc)
+- return -ENOMEM;
+- scnprintf(new_unc, len, "\\%s", tgt);
+-
+- kfree(ctx->UNC);
+- ctx->UNC = new_unc;
+-
+- if (fake_ctx->prepath) {
+- kfree(ctx->prepath);
+- ctx->prepath = fake_ctx->prepath;
+- fake_ctx->prepath = NULL;
+- }
+- memcpy(&ctx->dstaddr, &fake_ctx->dstaddr, sizeof(ctx->dstaddr));
+-
+- return 0;
+-}
+-
+-static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
+- struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
+- unsigned int *xid, struct TCP_Server_Info **server,
+- struct cifs_ses **ses, struct cifs_tcon **tcon)
+-{
+- int rc;
+- char *npath = NULL;
+- struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
+- struct dfs_cache_tgt_iterator *tgt_it = NULL;
+- struct smb3_fs_context tmp_ctx = {NULL};
+-
+- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+- return -EOPNOTSUPP;
+-
+- npath = dfs_cache_canonical_path(path, cifs_sb->local_nls, cifs_remap(cifs_sb));
+- if (IS_ERR(npath))
+- return PTR_ERR(npath);
+-
+- cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, npath, full_path);
+-
+- rc = dfs_cache_noreq_find(npath, NULL, &tgt_list);
+- if (rc)
+- goto out;
+- /*
+- * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
+- * test connection against new DFS targets.
+- */
+- rc = smb3_fs_context_dup(&tmp_ctx, ctx);
+- if (rc)
+- goto out;
+-
+- for (;;) {
+- struct dfs_info3_param ref = {0};
+- char *fake_devname = NULL, *mdata = NULL;
+-
+- /* Get next DFS target server - if any */
+- rc = get_next_dfs_tgt(&tgt_list, &tgt_it);
+- if (rc)
+- break;
+-
+- rc = dfs_cache_get_tgt_referral(npath, tgt_it, &ref);
+- if (rc)
+- break;
+-
+- cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+- tmp_ctx.prepath);
+-
+- mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref,
+- &fake_devname);
+- free_dfs_info_param(&ref);
+-
+- if (IS_ERR(mdata)) {
+- rc = PTR_ERR(mdata);
+- mdata = NULL;
+- } else
+- rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname);
+-
+- kfree(mdata);
+- kfree(fake_devname);
+-
+- if (rc)
+- break;
+-
+- cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+- tmp_ctx.prepath);
+-
+- mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
+- rc = mount_get_dfs_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
+- if (!rc || (*server && *ses)) {
+- /*
+- * We were able to connect to new target server. Update current context with
+- * new target server.
+- */
+- rc = update_vol_info(tgt_it, &tmp_ctx, ctx);
+- break;
+- }
+- }
+- if (!rc) {
+- cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+- tmp_ctx.prepath);
+- /*
+- * Update DFS target hint in DFS referral cache with the target server we
+- * successfully reconnected to.
+- */
+- rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls,
+- cifs_remap(cifs_sb), path, tgt_it);
+- }
+-
+-out:
+- kfree(npath);
+- smb3_cleanup_fs_context_contents(&tmp_ctx);
+- dfs_cache_free_tgts(&tgt_list);
+ return rc;
+ }
+ #endif
+@@ -3329,12 +3268,14 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
+ * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is,
+ * otherwise 0.
+ */
+-static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+- const unsigned int xid,
+- struct TCP_Server_Info *server,
+- struct cifs_tcon *tcon)
++static int is_path_remote(struct mount_ctx *mnt_ctx)
+ {
+ int rc;
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ struct TCP_Server_Info *server = mnt_ctx->server;
++ unsigned int xid = mnt_ctx->xid;
++ struct cifs_tcon *tcon = mnt_ctx->tcon;
++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+ char *full_path;
+
+ if (!server->ops->is_path_accessible)
+@@ -3372,280 +3313,298 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *
+ }
+
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+-static void set_root_ses(struct cifs_sb_info *cifs_sb, const uuid_t *mount_id, struct cifs_ses *ses,
+- struct cifs_ses **root_ses)
++static void set_root_ses(struct mount_ctx *mnt_ctx)
+ {
+- if (ses) {
++ if (mnt_ctx->ses) {
+ spin_lock(&cifs_tcp_ses_lock);
+- ses->ses_count++;
++ mnt_ctx->ses->ses_count++;
+ spin_unlock(&cifs_tcp_ses_lock);
+- dfs_cache_add_refsrv_session(mount_id, ses);
++ dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses);
+ }
+- *root_ses = ses;
++ mnt_ctx->root_ses = mnt_ctx->ses;
+ }
+
+-/* Set up next dfs prefix path in @dfs_path */
+-static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+- const unsigned int xid, struct TCP_Server_Info *server,
+- struct cifs_tcon *tcon, char **dfs_path)
++static int is_dfs_mount(struct mount_ctx *mnt_ctx, bool *isdfs, struct dfs_cache_tgt_list *root_tl)
+ {
+- char *path, *npath;
+- int added_treename = is_tcon_dfs(tcon);
+ int rc;
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+
+- path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
+- if (!path)
+- return -ENOMEM;
++ *isdfs = true;
+
+- rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+- if (rc == -EREMOTE) {
+- struct smb3_fs_context v = {NULL};
+- /* if @path contains a tree name, skip it in the prefix path */
+- if (added_treename) {
+- rc = smb3_parse_devname(path, &v);
+- if (rc)
+- goto out;
+- npath = build_unc_path_to_root(&v, cifs_sb, true);
+- smb3_cleanup_fs_context_contents(&v);
+- } else {
+- v.UNC = ctx->UNC;
+- v.prepath = path + 1;
+- npath = build_unc_path_to_root(&v, cifs_sb, true);
+- }
++ rc = mount_get_conns(mnt_ctx);
++ /*
++ * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
++ * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
++ *
++ * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
++ * to respond with PATH_NOT_COVERED to requests that include the prefix.
++ */
++ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
++ dfs_cache_find(mnt_ctx->xid, mnt_ctx->ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
++ ctx->UNC + 1, NULL, root_tl)) {
++ if (rc)
++ return rc;
++ /* Check if it is fully accessible and then mount it */
++ rc = is_path_remote(mnt_ctx);
++ if (!rc)
++ *isdfs = false;
++ else if (rc != -EREMOTE)
++ return rc;
++ }
++ return 0;
++}
+
+- if (IS_ERR(npath)) {
+- rc = PTR_ERR(npath);
+- goto out;
+- }
++static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path,
++ const char *ref_path, struct dfs_cache_tgt_iterator *tit)
++{
++ int rc;
++ struct dfs_info3_param ref = {};
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ char *oldmnt = cifs_sb->ctx->mount_options;
++
++ rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref);
++ if (rc)
++ goto out;
++
++ rc = expand_dfs_referral(mnt_ctx, full_path, &ref);
++ if (rc)
++ goto out;
+
+- kfree(*dfs_path);
+- *dfs_path = npath;
+- rc = -EREMOTE;
++ /* Connect to new target only if we were redirected (e.g. mount options changed) */
++ if (oldmnt != cifs_sb->ctx->mount_options) {
++ mount_put_conns(mnt_ctx);
++ rc = mount_get_dfs_conns(mnt_ctx);
++ }
++ if (!rc) {
++ if (cifs_is_referral_server(mnt_ctx->tcon, &ref))
++ set_root_ses(mnt_ctx);
++ rc = dfs_cache_update_tgthint(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
++ cifs_remap(cifs_sb), ref_path, tit);
+ }
+
+ out:
+- kfree(path);
++ free_dfs_info_param(&ref);
+ return rc;
+ }
+
+-/* Check if resolved targets can handle any DFS referrals */
+-static int is_referral_server(const char *ref_path, struct cifs_sb_info *cifs_sb,
+- struct cifs_tcon *tcon, bool *ref_server)
++static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list *root_tl)
+ {
+ int rc;
+- struct dfs_info3_param ref = {0};
++ char *full_path;
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
++ struct dfs_cache_tgt_iterator *tit;
+
+- cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
++ /* Put initial connections as they might be shared with other mounts. We need unique dfs
++ * connections per mount to properly failover, so mount_get_dfs_conns() must be used from
++ * now on.
++ */
++ mount_put_conns(mnt_ctx);
++ mount_get_dfs_conns(mnt_ctx);
++ set_root_ses(mnt_ctx);
+
+- if (is_tcon_dfs(tcon)) {
+- *ref_server = true;
+- } else {
+- char *npath;
++ full_path = build_unc_path_to_root(ctx, cifs_sb, true);
++ if (IS_ERR(full_path))
++ return PTR_ERR(full_path);
+
+- npath = dfs_cache_canonical_path(ref_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
+- if (IS_ERR(npath))
+- return PTR_ERR(npath);
++ mnt_ctx->origin_fullpath = dfs_cache_canonical_path(ctx->UNC, cifs_sb->local_nls,
++ cifs_remap(cifs_sb));
++ if (IS_ERR(mnt_ctx->origin_fullpath)) {
++ rc = PTR_ERR(mnt_ctx->origin_fullpath);
++ mnt_ctx->origin_fullpath = NULL;
++ goto out;
++ }
+
+- rc = dfs_cache_noreq_find(npath, &ref, NULL);
+- kfree(npath);
+- if (rc) {
+- cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
+- return rc;
++ /* Try all dfs root targets */
++ for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(root_tl);
++ tit; tit = dfs_cache_get_next_tgt(root_tl, tit)) {
++ rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->origin_fullpath + 1, tit);
++ if (!rc) {
++ mnt_ctx->leaf_fullpath = kstrdup(mnt_ctx->origin_fullpath, GFP_KERNEL);
++ if (!mnt_ctx->leaf_fullpath)
++ rc = -ENOMEM;
++ break;
+ }
+- cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags);
+- /*
+- * Check if all targets are capable of handling DFS referrals as per
+- * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
+- */
+- *ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER);
+- free_dfs_info_param(&ref);
+ }
+- return 0;
++
++out:
++ kfree(full_path);
++ return rc;
+ }
+
+-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
++static int __follow_dfs_link(struct mount_ctx *mnt_ctx)
+ {
+- int rc = 0;
+- unsigned int xid;
+- struct TCP_Server_Info *server = NULL;
+- struct cifs_ses *ses = NULL, *root_ses = NULL;
+- struct cifs_tcon *tcon = NULL;
+- int count = 0;
+- uuid_t mount_id = {0};
+- char *ref_path = NULL, *full_path = NULL;
+- char *oldmnt = NULL;
+- bool ref_server = false;
++ int rc;
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
++ char *full_path;
++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
++ struct dfs_cache_tgt_iterator *tit;
+
+- rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+- /*
+- * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
+- * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
+- *
+- * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
+- * to respond with PATH_NOT_COVERED to requests that include the prefix.
+- */
+- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+- dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
+- NULL)) {
+- if (rc)
+- goto error;
+- /* Check if it is fully accessible and then mount it */
+- rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+- if (!rc)
+- goto out;
+- if (rc != -EREMOTE)
+- goto error;
++ full_path = build_unc_path_to_root(ctx, cifs_sb, true);
++ if (IS_ERR(full_path))
++ return PTR_ERR(full_path);
++
++ kfree(mnt_ctx->leaf_fullpath);
++ mnt_ctx->leaf_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls,
++ cifs_remap(cifs_sb));
++ if (IS_ERR(mnt_ctx->leaf_fullpath)) {
++ rc = PTR_ERR(mnt_ctx->leaf_fullpath);
++ mnt_ctx->leaf_fullpath = NULL;
++ goto out;
+ }
+
+- mount_put_conns(cifs_sb, xid, server, ses, tcon);
+- /*
+- * Ignore error check here because we may failover to other targets from cached a
+- * referral.
+- */
+- (void)mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
++ /* Get referral from dfs link */
++ rc = dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
++ cifs_remap(cifs_sb), mnt_ctx->leaf_fullpath + 1, NULL, &tl);
++ if (rc)
++ goto out;
+
+- /* Get path of DFS root */
+- ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
+- if (IS_ERR(ref_path)) {
+- rc = PTR_ERR(ref_path);
+- ref_path = NULL;
+- goto error;
++ /* Try all dfs link targets */
++ for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl);
++ tit; tit = dfs_cache_get_next_tgt(&tl, tit)) {
++ rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit);
++ if (!rc) {
++ rc = is_path_remote(mnt_ctx);
++ break;
++ }
++ }
++
++out:
++ kfree(full_path);
++ dfs_cache_free_tgts(&tl);
++ return rc;
++}
++
++static int follow_dfs_link(struct mount_ctx *mnt_ctx)
++{
++ int rc;
++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
++ char *full_path;
++ int num_links = 0;
++
++ full_path = build_unc_path_to_root(ctx, cifs_sb, true);
++ if (IS_ERR(full_path))
++ return PTR_ERR(full_path);
++
++ kfree(mnt_ctx->origin_fullpath);
++ mnt_ctx->origin_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls,
++ cifs_remap(cifs_sb));
++ kfree(full_path);
++
++ if (IS_ERR(mnt_ctx->origin_fullpath)) {
++ rc = PTR_ERR(mnt_ctx->origin_fullpath);
++ mnt_ctx->origin_fullpath = NULL;
++ return rc;
+ }
+
+- uuid_gen(&mount_id);
+- set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
+ do {
+- /* Save full path of last DFS path we used to resolve final target server */
+- kfree(full_path);
+- full_path = build_unc_path_to_root(ctx, cifs_sb, !!count);
+- if (IS_ERR(full_path)) {
+- rc = PTR_ERR(full_path);
+- full_path = NULL;
++ rc = __follow_dfs_link(mnt_ctx);
++ if (!rc || rc != -EREMOTE)
+ break;
+- }
+- /* Chase referral */
+- oldmnt = cifs_sb->ctx->mount_options;
+- rc = expand_dfs_referral(xid, root_ses, ctx, cifs_sb, ref_path + 1);
+- if (rc)
+- break;
+- /* Connect to new DFS target only if we were redirected */
+- if (oldmnt != cifs_sb->ctx->mount_options) {
+- mount_put_conns(cifs_sb, xid, server, ses, tcon);
+- rc = mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+- }
+- if (rc && !server && !ses) {
+- /* Failed to connect. Try to connect to other targets in the referral. */
+- rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, ctx, root_ses, &xid,
+- &server, &ses, &tcon);
+- }
+- if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses)
+- break;
+- if (!tcon)
+- continue;
++ } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS);
+
+- /* Make sure that requests go through new root servers */
+- rc = is_referral_server(ref_path + 1, cifs_sb, tcon, &ref_server);
+- if (rc)
+- break;
+- if (ref_server)
+- set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
++ return rc;
++}
+
+- /* Get next dfs path and then continue chasing them if -EREMOTE */
+- rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
+- /* Prevent recursion on broken link referrals */
+- if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
+- rc = -ELOOP;
+- } while (rc == -EREMOTE);
++/* Set up DFS referral paths for failover */
++static void setup_server_referral_paths(struct mount_ctx *mnt_ctx)
++{
++ struct TCP_Server_Info *server = mnt_ctx->server;
++
++ server->origin_fullpath = mnt_ctx->origin_fullpath;
++ server->leaf_fullpath = mnt_ctx->leaf_fullpath;
++ server->current_fullpath = mnt_ctx->leaf_fullpath;
++ mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL;
++}
+
+- if (rc || !tcon || !ses)
++int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
++{
++ int rc;
++ struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, };
++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
++ bool isdfs;
++
++ rc = is_dfs_mount(&mnt_ctx, &isdfs, &tl);
++ if (rc)
+ goto error;
++ if (!isdfs)
++ goto out;
+
+- kfree(ref_path);
+- /*
+- * Store DFS full path in both superblock and tree connect structures.
+- *
+- * For DFS root mounts, the prefix path (cifs_sb->prepath) is preserved during reconnect so
+- * only the root path is set in cifs_sb->origin_fullpath and tcon->dfs_path. And for DFS
+- * links, the prefix path is included in both and may be changed during reconnect. See
+- * cifs_tree_connect().
+- */
+- ref_path = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
+- kfree(full_path);
+- full_path = NULL;
++ uuid_gen(&mnt_ctx.mount_id);
++ rc = connect_dfs_root(&mnt_ctx, &tl);
++ dfs_cache_free_tgts(&tl);
+
+- if (IS_ERR(ref_path)) {
+- rc = PTR_ERR(ref_path);
+- ref_path = NULL;
++ if (rc)
+ goto error;
+- }
+- cifs_sb->origin_fullpath = ref_path;
+
+- ref_path = kstrdup(cifs_sb->origin_fullpath, GFP_KERNEL);
+- if (!ref_path) {
+- rc = -ENOMEM;
++ rc = is_path_remote(&mnt_ctx);
++ if (rc == -EREMOTE)
++ rc = follow_dfs_link(&mnt_ctx);
++ if (rc)
+ goto error;
+- }
+- spin_lock(&cifs_tcp_ses_lock);
+- tcon->dfs_path = ref_path;
+- ref_path = NULL;
+- spin_unlock(&cifs_tcp_ses_lock);
+
++ setup_server_referral_paths(&mnt_ctx);
+ /*
+- * After reconnecting to a different server, unique ids won't
+- * match anymore, so we disable serverino. This prevents
+- * dentry revalidation to think the dentry are stale (ESTALE).
++ * After reconnecting to a different server, unique ids won't match anymore, so we disable
++ * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
+ */
+ cifs_autodisable_serverino(cifs_sb);
+ /*
+- * Force the use of prefix path to support failover on DFS paths that
+- * resolve to targets that have different prefix paths.
++ * Force the use of prefix path to support failover on DFS paths that resolve to targets
++ * that have different prefix paths.
+ */
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ kfree(cifs_sb->prepath);
+ cifs_sb->prepath = ctx->prepath;
+ ctx->prepath = NULL;
+- uuid_copy(&cifs_sb->dfs_mount_id, &mount_id);
++ uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id);
+
+ out:
+- free_xid(xid);
+- cifs_try_adding_channels(cifs_sb, ses);
+- return mount_setup_tlink(cifs_sb, ses, tcon);
++ cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
++ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
++ if (rc)
++ goto error;
++
++ free_xid(mnt_ctx.xid);
++ return rc;
+
+ error:
+- kfree(ref_path);
+- kfree(full_path);
+- kfree(cifs_sb->origin_fullpath);
+- dfs_cache_put_refsrv_sessions(&mount_id);
+- mount_put_conns(cifs_sb, xid, server, ses, tcon);
++ dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id);
++ kfree(mnt_ctx.origin_fullpath);
++ kfree(mnt_ctx.leaf_fullpath);
++ mount_put_conns(&mnt_ctx);
+ return rc;
+ }
+ #else
+ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
+ {
+ int rc = 0;
+- unsigned int xid;
+- struct cifs_ses *ses;
+- struct cifs_tcon *tcon;
+- struct TCP_Server_Info *server;
++ struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, };
+
+- rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
++ rc = mount_get_conns(&mnt_ctx);
+ if (rc)
+ goto error;
+
+- if (tcon) {
+- rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
++ if (mnt_ctx.tcon) {
++ rc = is_path_remote(&mnt_ctx);
+ if (rc == -EREMOTE)
+ rc = -EOPNOTSUPP;
+ if (rc)
+ goto error;
+ }
+
+- free_xid(xid);
++ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
++ if (rc)
++ goto error;
+
+- return mount_setup_tlink(cifs_sb, ses, tcon);
++ free_xid(mnt_ctx.xid);
++ return rc;
+
+ error:
+- mount_put_conns(cifs_sb, xid, server, ses, tcon);
++ mount_put_conns(&mnt_ctx);
+ return rc;
+ }
+ #endif
+@@ -3687,12 +3646,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
+ pSMB->AndXCommand = 0xFF;
+ pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
+ bcc_ptr = &pSMB->Password[0];
+- if (tcon->pipe || (ses->server->sec_mode & SECMODE_USER)) {
+- pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
+- *bcc_ptr = 0; /* password is null byte */
+- bcc_ptr++; /* skip password */
+- /* already aligned so no need to do it below */
+- }
++
++ pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
++ *bcc_ptr = 0; /* password is null byte */
++ bcc_ptr++; /* skip password */
++ /* already aligned so no need to do it below */
+
+ if (ses->server->sign)
+ smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
+@@ -3814,7 +3772,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
+ kfree(cifs_sb->prepath);
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+ dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id);
+- kfree(cifs_sb->origin_fullpath);
+ #endif
+ call_rcu(&cifs_sb->rcu, delayed_free);
+ }
+@@ -4141,104 +4098,249 @@ cifs_prune_tlinks(struct work_struct *work)
+ }
+
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+-int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
++static void mark_tcon_tcp_ses_for_reconnect(struct cifs_tcon *tcon)
++{
++ int i;
++
++ for (i = 0; i < tcon->ses->chan_count; i++) {
++ spin_lock(&GlobalMid_Lock);
++ if (tcon->ses->chans[i].server->tcpStatus != CifsExiting)
++ tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
++ spin_unlock(&GlobalMid_Lock);
++ }
++}
++
++/* Update dfs referral path of superblock */
++static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb,
++ const char *target)
++{
++ int rc = 0;
++ size_t len = strlen(target);
++ char *refpath, *npath;
++
++ if (unlikely(len < 2 || *target != '\\'))
++ return -EINVAL;
++
++ if (target[1] == '\\') {
++ len += 1;
++ refpath = kmalloc(len, GFP_KERNEL);
++ if (!refpath)
++ return -ENOMEM;
++
++ scnprintf(refpath, len, "%s", target);
++ } else {
++ len += sizeof("\\");
++ refpath = kmalloc(len, GFP_KERNEL);
++ if (!refpath)
++ return -ENOMEM;
++
++ scnprintf(refpath, len, "\\%s", target);
++ }
++
++ npath = dfs_cache_canonical_path(refpath, cifs_sb->local_nls, cifs_remap(cifs_sb));
++ kfree(refpath);
++
++ if (IS_ERR(npath)) {
++ rc = PTR_ERR(npath);
++ } else {
++ mutex_lock(&server->refpath_lock);
++ kfree(server->leaf_fullpath);
++ server->leaf_fullpath = npath;
++ mutex_unlock(&server->refpath_lock);
++ server->current_fullpath = server->leaf_fullpath;
++ }
++ return rc;
++}
++
++static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host,
++ size_t tcp_host_len, char *share, bool *target_match)
++{
++ int rc = 0;
++ const char *dfs_host;
++ size_t dfs_host_len;
++
++ *target_match = true;
++ extract_unc_hostname(share, &dfs_host, &dfs_host_len);
++
++ /* Check if hostnames or addresses match */
++ if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
++ cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
++ dfs_host, (int)tcp_host_len, tcp_host);
++ rc = match_target_ip(server, dfs_host, dfs_host_len, target_match);
++ if (rc)
++ cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
++ }
++ return rc;
++}
++
++int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon,
++ struct cifs_sb_info *cifs_sb, char *tree,
++ struct dfs_cache_tgt_list *tl, struct dfs_info3_param *ref)
+ {
+ int rc;
+ struct TCP_Server_Info *server = tcon->ses->server;
+ const struct smb_version_operations *ops = server->ops;
+- struct dfs_cache_tgt_list tl;
+- struct dfs_cache_tgt_iterator *it = NULL;
+- char *tree;
++ struct cifs_tcon *ipc = tcon->ses->tcon_ipc;
++ bool islink;
++ char *share = NULL, *prefix = NULL;
+ const char *tcp_host;
+ size_t tcp_host_len;
+- const char *dfs_host;
+- size_t dfs_host_len;
+- char *share = NULL, *prefix = NULL;
+- struct dfs_info3_param ref = {0};
+- bool isroot;
++ struct dfs_cache_tgt_iterator *tit;
++ bool target_match;
+
+- tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+- if (!tree)
+- return -ENOMEM;
++ extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
+
+- /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
+- if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) {
+- if (tcon->ipc) {
+- scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+- rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+- } else {
+- rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+- }
++ islink = ref->server_type == DFS_TYPE_LINK;
++ free_dfs_info_param(ref);
++
++ tit = dfs_cache_get_tgt_iterator(tl);
++ if (!tit) {
++ rc = -ENOENT;
+ goto out;
+ }
+
+- isroot = ref.server_type == DFS_TYPE_ROOT;
+- free_dfs_info_param(&ref);
+-
+- extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
+-
+- for (it = dfs_cache_get_tgt_iterator(&tl); it; it = dfs_cache_get_next_tgt(&tl, it)) {
+- bool target_match;
++ /* Try to tree connect to all dfs targets */
++ for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) {
++ const char *target = dfs_cache_get_tgt_name(tit);
++ struct dfs_cache_tgt_list ntl = DFS_CACHE_TGT_LIST_INIT(ntl);
+
+ kfree(share);
+ kfree(prefix);
+- share = NULL;
+- prefix = NULL;
+
+- rc = dfs_cache_get_tgt_share(tcon->dfs_path + 1, it, &share, &prefix);
++ /* Check if share matches with tcp ses */
++ rc = dfs_cache_get_tgt_share(server->current_fullpath + 1, tit, &share, &prefix);
+ if (rc) {
+- cifs_dbg(VFS, "%s: failed to parse target share %d\n",
+- __func__, rc);
+- continue;
++ cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc);
++ break;
+ }
+
+- extract_unc_hostname(share, &dfs_host, &dfs_host_len);
+-
+- if (dfs_host_len != tcp_host_len
+- || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
+- cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
+- dfs_host, (int)tcp_host_len, tcp_host);
++ rc = target_share_matches_server(server, tcp_host, tcp_host_len, share,
++ &target_match);
++ if (rc)
++ break;
++ if (!target_match) {
++ rc = -EHOSTUNREACH;
++ continue;
++ }
+
+- rc = match_target_ip(server, dfs_host, dfs_host_len, &target_match);
+- if (rc) {
+- cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
++ if (ipc->need_reconnect) {
++ scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
++ rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls);
++ if (rc)
+ break;
+- }
++ }
+
+- if (!target_match) {
+- cifs_dbg(FYI, "%s: skipping target\n", __func__);
++ scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
++ if (!islink) {
++ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls);
++ break;
++ }
++ /*
++ * If no dfs referrals were returned from link target, then just do a TREE_CONNECT
++ * to it. Otherwise, cache the dfs referral and then mark current tcp ses for
++ * reconnect so either the demultiplex thread or the echo worker will reconnect to
++ * newly resolved target.
++ */
++ if (dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), target,
++ ref, &ntl)) {
++ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls);
++ if (rc)
+ continue;
+- }
++ rc = dfs_cache_noreq_update_tgthint(server->current_fullpath + 1, tit);
++ if (!rc)
++ rc = cifs_update_super_prepath(cifs_sb, prefix);
++ break;
+ }
++ /* Target is another dfs share */
++ rc = update_server_fullpath(server, cifs_sb, target);
++ dfs_cache_free_tgts(tl);
+
+- if (tcon->ipc) {
+- scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", share);
+- rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
++ if (!rc) {
++ rc = -EREMOTE;
++ list_replace_init(&ntl.tl_list, &tl->tl_list);
+ } else {
+- scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
+- rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+- /* Only handle prefix paths of DFS link targets */
+- if (!rc && !isroot) {
+- rc = update_super_prepath(tcon, prefix);
+- break;
+- }
++ dfs_cache_free_tgts(&ntl);
++ free_dfs_info_param(ref);
+ }
+- if (rc == -EREMOTE)
+- break;
++ break;
+ }
+
++out:
+ kfree(share);
+ kfree(prefix);
+
+- if (!rc) {
+- if (it)
+- rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1, it);
+- else
+- rc = -ENOENT;
++ return rc;
++}
++
++int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon,
++ struct cifs_sb_info *cifs_sb, char *tree,
++ struct dfs_cache_tgt_list *tl, struct dfs_info3_param *ref)
++{
++ int rc;
++ int num_links = 0;
++ struct TCP_Server_Info *server = tcon->ses->server;
++
++ do {
++ rc = __tree_connect_dfs_target(xid, tcon, cifs_sb, tree, tl, ref);
++ if (!rc || rc != -EREMOTE)
++ break;
++ } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS);
++ /*
++ * If we couldn't tree connect to any targets from last referral path, then retry from
++ * original referral path.
++ */
++ if (rc && server->current_fullpath != server->origin_fullpath) {
++ server->current_fullpath = server->origin_fullpath;
++ mark_tcon_tcp_ses_for_reconnect(tcon);
+ }
+- dfs_cache_free_tgts(&tl);
++
++ dfs_cache_free_tgts(tl);
++ return rc;
++}
++
++int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
++{
++ int rc;
++ struct TCP_Server_Info *server = tcon->ses->server;
++ const struct smb_version_operations *ops = server->ops;
++ struct super_block *sb = NULL;
++ struct cifs_sb_info *cifs_sb;
++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
++ char *tree;
++ struct dfs_info3_param ref = {0};
++
++ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
++ if (!tree)
++ return -ENOMEM;
++
++ if (tcon->ipc) {
++ scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
++ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
++ goto out;
++ }
++
++ sb = cifs_get_tcp_super(server);
++ if (IS_ERR(sb)) {
++ rc = PTR_ERR(sb);
++ cifs_dbg(VFS, "%s: could not find superblock: %d\n", __func__, rc);
++ goto out;
++ }
++
++ cifs_sb = CIFS_SB(sb);
++
++ /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
++ if (!server->current_fullpath ||
++ dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) {
++ rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, cifs_sb->local_nls);
++ goto out;
++ }
++
++ rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, &tl, &ref);
++
+ out:
+ kfree(tree);
++ cifs_put_tcp_super(sb);
++
+ return rc;
+ }
+ #else
+diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
+index 2837455928441..1864bdadf3ddd 100644
+--- a/fs/cifs/dfs_cache.c
++++ b/fs/cifs/dfs_cache.c
+@@ -792,26 +792,27 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const
+ */
+ static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path)
+ {
+- int rc;
+- struct cache_entry *ce;
+ struct dfs_info3_param *refs = NULL;
++ struct cache_entry *ce;
+ int numrefs = 0;
+- bool newent = false;
++ int rc;
+
+ cifs_dbg(FYI, "%s: search path: %s\n", __func__, path);
+
+- down_write(&htable_rw_lock);
++ down_read(&htable_rw_lock);
+
+ ce = lookup_cache_entry(path);
+- if (!IS_ERR(ce)) {
+- if (!cache_entry_expired(ce)) {
+- dump_ce(ce);
+- up_write(&htable_rw_lock);
+- return 0;
+- }
+- } else {
+- newent = true;
++ if (!IS_ERR(ce) && !cache_entry_expired(ce)) {
++ up_read(&htable_rw_lock);
++ return 0;
+ }
++ /*
++ * Unlock shared access as we don't want to hold any locks while getting
++ * a new referral. The @ses used for performing the I/O could be
++ * reconnecting and it acquires @htable_rw_lock to look up the dfs cache
++ * in order to failover -- if necessary.
++ */
++ up_read(&htable_rw_lock);
+
+ /*
+ * Either the entry was not found, or it is expired.
+@@ -819,19 +820,22 @@ static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, cons
+ */
+ rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
+ if (rc)
+- goto out_unlock;
++ goto out;
+
+ dump_refs(refs, numrefs);
+
+- if (!newent) {
+- rc = update_cache_entry_locked(ce, refs, numrefs);
+- goto out_unlock;
++ down_write(&htable_rw_lock);
++ /* Re-check as another task might have it added or refreshed already */
++ ce = lookup_cache_entry(path);
++ if (!IS_ERR(ce)) {
++ if (cache_entry_expired(ce))
++ rc = update_cache_entry_locked(ce, refs, numrefs);
++ } else {
++ rc = add_cache_entry_locked(refs, numrefs);
+ }
+
+- rc = add_cache_entry_locked(refs, numrefs);
+-
+-out_unlock:
+ up_write(&htable_rw_lock);
++out:
+ free_dfs_info_array(refs, numrefs);
+ return rc;
+ }
+@@ -1046,10 +1050,10 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
+ const struct nls_table *cp, int remap, const char *path,
+ const struct dfs_cache_tgt_iterator *it)
+ {
+- int rc;
+- const char *npath;
+- struct cache_entry *ce;
+ struct cache_dfs_tgt *t;
++ struct cache_entry *ce;
++ const char *npath;
++ int rc = 0;
+
+ npath = dfs_cache_canonical_path(path, cp, remap);
+ if (IS_ERR(npath))
+@@ -1364,9 +1368,9 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach
+ }
+
+ /* Refresh dfs referral of tcon and mark it for reconnect if needed */
+-static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
++static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct cifs_tcon *tcon,
++ bool force_refresh)
+ {
+- const char *path = tcon->dfs_path + 1;
+ struct cifs_ses *ses;
+ struct cache_entry *ce;
+ struct dfs_info3_param *refs = NULL;
+@@ -1422,6 +1426,20 @@ out:
+ return rc;
+ }
+
++static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
++{
++ struct TCP_Server_Info *server = tcon->ses->server;
++
++ mutex_lock(&server->refpath_lock);
++ if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
++ __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh);
++ mutex_unlock(&server->refpath_lock);
++
++ __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh);
++
++ return 0;
++}
++
+ /**
+ * dfs_cache_remount_fs - remount a DFS share
+ *
+@@ -1435,6 +1453,7 @@ out:
+ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
+ {
+ struct cifs_tcon *tcon;
++ struct TCP_Server_Info *server;
+ struct mount_group *mg;
+ struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
+ int rc;
+@@ -1443,13 +1462,15 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
+ return -EINVAL;
+
+ tcon = cifs_sb_master_tcon(cifs_sb);
+- if (!tcon->dfs_path) {
+- cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__);
++ server = tcon->ses->server;
++
++ if (!server->origin_fullpath) {
++ cifs_dbg(FYI, "%s: not a dfs mount\n", __func__);
+ return 0;
+ }
+
+ if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
+- cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__);
++ cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__);
+ return -EINVAL;
+ }
+
+@@ -1457,7 +1478,7 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
+ mg = find_mount_group_locked(&cifs_sb->dfs_mount_id);
+ if (IS_ERR(mg)) {
+ mutex_unlock(&mount_group_list_lock);
+- cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__);
++ cifs_dbg(FYI, "%s: no ipc session for refreshing referral\n", __func__);
+ return PTR_ERR(mg);
+ }
+ kref_get(&mg->refcount);
+@@ -1498,9 +1519,12 @@ static void refresh_mounts(struct cifs_ses **sessions)
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
++ if (!server->is_dfs_conn)
++ continue;
++
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+- if (tcon->dfs_path) {
++ if (!tcon->ipc && !tcon->need_reconnect) {
+ tcon->tc_count++;
+ list_add_tail(&tcon->ulist, &tcons);
+ }
+@@ -1510,8 +1534,16 @@ static void refresh_mounts(struct cifs_ses **sessions)
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
++ struct TCP_Server_Info *server = tcon->ses->server;
++
+ list_del_init(&tcon->ulist);
+- refresh_tcon(sessions, tcon, false);
++
++ mutex_lock(&server->refpath_lock);
++ if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
++ __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false);
++ mutex_unlock(&server->refpath_lock);
++
++ __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false);
+ cifs_put_tcon(tcon);
+ }
+ }
+diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
+index 6e8e7cc26ae24..83c929dd6ed59 100644
+--- a/fs/cifs/dir.c
++++ b/fs/cifs/dir.c
+@@ -538,8 +538,10 @@ int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
+ cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
+ inode, direntry, direntry);
+
+- if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+- return -EIO;
++ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) {
++ rc = -EIO;
++ goto out_free_xid;
++ }
+
+ tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+ rc = PTR_ERR(tlink);
+diff --git a/fs/cifs/file.c b/fs/cifs/file.c
+index 13f3182cf7969..9e8a69f9421e6 100644
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -880,8 +880,8 @@ int cifs_close(struct inode *inode, struct file *file)
+ cfile = file->private_data;
+ file->private_data = NULL;
+ dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
+- if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
+- cinode->lease_granted &&
++ if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
++ && cinode->lease_granted &&
+ !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
+ dclose) {
+ if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
+@@ -897,12 +897,12 @@ int cifs_close(struct inode *inode, struct file *file)
+ * So, Increase the ref count to avoid use-after-free.
+ */
+ if (!mod_delayed_work(deferredclose_wq,
+- &cfile->deferred, cifs_sb->ctx->acregmax))
++ &cfile->deferred, cifs_sb->ctx->closetimeo))
+ cifsFileInfo_get(cfile);
+ } else {
+ /* Deferred close for files */
+ queue_delayed_work(deferredclose_wq,
+- &cfile->deferred, cifs_sb->ctx->acregmax);
++ &cfile->deferred, cifs_sb->ctx->closetimeo);
+ cfile->deferred_close_scheduled = true;
+ spin_unlock(&cinode->deferred_lock);
+ return 0;
+@@ -1806,11 +1806,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
+ struct cifsFileInfo *cfile;
+ __u32 type;
+
+- rc = -EACCES;
+ xid = get_xid();
+
+- if (!(fl->fl_flags & FL_FLOCK))
+- return -ENOLCK;
++ if (!(fl->fl_flags & FL_FLOCK)) {
++ rc = -ENOLCK;
++ free_xid(xid);
++ return rc;
++ }
+
+ cfile = (struct cifsFileInfo *)file->private_data;
+ tcon = tlink_tcon(cfile->tlink);
+@@ -1829,8 +1831,9 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
+ * if no lock or unlock then nothing to do since we do not
+ * know what it is
+ */
++ rc = -EOPNOTSUPP;
+ free_xid(xid);
+- return -EOPNOTSUPP;
++ return rc;
+ }
+
+ rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
+@@ -2692,12 +2695,23 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
+ tcon = tlink_tcon(smbfile->tlink);
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
+ server = tcon->ses->server;
+- if (server->ops->flush)
+- rc = server->ops->flush(xid, tcon, &smbfile->fid);
+- else
++ if (server->ops->flush == NULL) {
+ rc = -ENOSYS;
++ goto strict_fsync_exit;
++ }
++
++ if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
++ smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
++ if (smbfile) {
++ rc = server->ops->flush(xid, tcon, &smbfile->fid);
++ cifsFileInfo_put(smbfile);
++ } else
++ cifs_dbg(FYI, "ignore fsync for file not open for write\n");
++ } else
++ rc = server->ops->flush(xid, tcon, &smbfile->fid);
+ }
+
++strict_fsync_exit:
+ free_xid(xid);
+ return rc;
+ }
+@@ -2709,6 +2723,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ struct cifs_tcon *tcon;
+ struct TCP_Server_Info *server;
+ struct cifsFileInfo *smbfile = file->private_data;
++ struct inode *inode = file_inode(file);
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
+
+ rc = file_write_and_wait_range(file, start, end);
+@@ -2725,12 +2740,23 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ tcon = tlink_tcon(smbfile->tlink);
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
+ server = tcon->ses->server;
+- if (server->ops->flush)
+- rc = server->ops->flush(xid, tcon, &smbfile->fid);
+- else
++ if (server->ops->flush == NULL) {
+ rc = -ENOSYS;
++ goto fsync_exit;
++ }
++
++ if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
++ smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
++ if (smbfile) {
++ rc = server->ops->flush(xid, tcon, &smbfile->fid);
++ cifsFileInfo_put(smbfile);
++ } else
++ cifs_dbg(FYI, "ignore fsync for file not open for write\n");
++ } else
++ rc = server->ops->flush(xid, tcon, &smbfile->fid);
+ }
+
++fsync_exit:
+ free_xid(xid);
+ return rc;
+ }
+@@ -3295,6 +3321,9 @@ static ssize_t __cifs_writev(
+
+ ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
+ {
++ struct file *file = iocb->ki_filp;
++
++ cifs_revalidate_mapping(file->f_inode);
+ return __cifs_writev(iocb, from, true);
+ }
+
+@@ -3584,7 +3613,7 @@ uncached_fill_pages(struct TCP_Server_Info *server,
+ rdata->got_bytes += result;
+ }
+
+- return rdata->got_bytes > 0 && result != -ECONNABORTED ?
++ return result != -ECONNABORTED && rdata->got_bytes > 0 ?
+ rdata->got_bytes : result;
+ }
+
+@@ -3711,6 +3740,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
+ break;
+ }
+
++ if (cifs_sb->ctx->rsize == 0)
++ cifs_sb->ctx->rsize =
++ server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
++ cifs_sb->ctx);
++
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
+ &rsize, credits);
+ if (rc)
+@@ -3984,6 +4018,15 @@ static ssize_t __cifs_readv(
+ len = ctx->len;
+ }
+
++ if (direct) {
++ rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
++ offset, offset + len - 1);
++ if (rc) {
++ kref_put(&ctx->refcount, cifs_aio_ctx_release);
++ return -EAGAIN;
++ }
++ }
++
+ /* grab a lock here due to read response handlers can access ctx */
+ mutex_lock(&ctx->aio_mutex);
+
+@@ -4345,7 +4388,7 @@ readpages_fill_pages(struct TCP_Server_Info *server,
+ rdata->got_bytes += result;
+ }
+
+- return rdata->got_bytes > 0 && result != -ECONNABORTED ?
++ return result != -ECONNABORTED && rdata->got_bytes > 0 ?
+ rdata->got_bytes : result;
+ }
+
+@@ -4489,6 +4532,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
+ break;
+ }
+
++ if (cifs_sb->ctx->rsize == 0)
++ cifs_sb->ctx->rsize =
++ server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
++ cifs_sb->ctx);
++
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
+ &rsize, credits);
+ if (rc)
+@@ -4623,9 +4671,9 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
+
+ io_error:
+ kunmap(page);
+- unlock_page(page);
+
+ read_complete:
++ unlock_page(page);
+ return rc;
+ }
+
+@@ -4817,17 +4865,25 @@ void cifs_oplock_break(struct work_struct *work)
+ struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
+ oplock_break);
+ struct inode *inode = d_inode(cfile->dentry);
++ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+- struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+- struct TCP_Server_Info *server = tcon->ses->server;
++ struct cifs_tcon *tcon;
++ struct TCP_Server_Info *server;
++ struct tcon_link *tlink;
+ int rc = 0;
+- bool purge_cache = false;
+- bool is_deferred = false;
+- struct cifs_deferred_close *dclose;
++ bool purge_cache = false, oplock_break_cancelled;
++ __u64 persistent_fid, volatile_fid;
++ __u16 net_fid;
+
+ wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
+ TASK_UNINTERRUPTIBLE);
+
++ tlink = cifs_sb_tlink(cifs_sb);
++ if (IS_ERR(tlink))
++ goto out;
++ tcon = tlink_tcon(tlink);
++ server = tcon->ses->server;
++
+ server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
+ cfile->oplock_epoch, &purge_cache);
+
+@@ -4864,30 +4920,32 @@ oplock_break_ack:
+ * file handles but cached, then schedule deferred close immediately.
+ * So, new open will not use cached handle.
+ */
+- spin_lock(&CIFS_I(inode)->deferred_lock);
+- is_deferred = cifs_is_deferred_close(cfile, &dclose);
+- spin_unlock(&CIFS_I(inode)->deferred_lock);
+- if (is_deferred &&
+- cfile->deferred_close_scheduled &&
+- delayed_work_pending(&cfile->deferred)) {
+- if (cancel_delayed_work(&cfile->deferred)) {
+- _cifsFileInfo_put(cfile, false, false);
+- goto oplock_break_done;
+- }
+- }
++
++ if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
++ cifs_close_deferred_file(cinode);
++
++ persistent_fid = cfile->fid.persistent_fid;
++ volatile_fid = cfile->fid.volatile_fid;
++ net_fid = cfile->fid.netfid;
++ oplock_break_cancelled = cfile->oplock_break_cancelled;
++
++ _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
+ /*
+- * releasing stale oplock after recent reconnect of smb session using
+- * a now incorrect file handle is not a data integrity issue but do
+- * not bother sending an oplock release if session to server still is
+- * disconnected since oplock already released by the server
++ * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
++ * an acknowledgment to be sent when the file has already been closed.
+ */
+- if (!cfile->oplock_break_cancelled) {
+- rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
+- cinode);
++ spin_lock(&cinode->open_file_lock);
++ /* check list empty since can race with kill_sb calling tree disconnect */
++ if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
++ spin_unlock(&cinode->open_file_lock);
++ rc = server->ops->oplock_response(tcon, persistent_fid,
++ volatile_fid, net_fid, cinode);
+ cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
+- }
+-oplock_break_done:
+- _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
++ } else
++ spin_unlock(&cinode->open_file_lock);
++
++ cifs_put_tlink(tlink);
++out:
+ cifs_done_oplock_break(cinode);
+ }
+
+diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
+index 3109def8e1998..6347e759b5ccf 100644
+--- a/fs/cifs/fs_context.c
++++ b/fs/cifs/fs_context.c
+@@ -143,10 +143,11 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
+ fsparam_u32("actimeo", Opt_actimeo),
+ fsparam_u32("acdirmax", Opt_acdirmax),
+ fsparam_u32("acregmax", Opt_acregmax),
++ fsparam_u32("closetimeo", Opt_closetimeo),
+ fsparam_u32("echo_interval", Opt_echo_interval),
+ fsparam_u32("max_credits", Opt_max_credits),
+ fsparam_u32("handletimeout", Opt_handletimeout),
+- fsparam_u32("snapshot", Opt_snapshot),
++ fsparam_u64("snapshot", Opt_snapshot),
+ fsparam_u32("max_channels", Opt_max_channels),
+
+ /* Mount options which take string value */
+@@ -307,6 +308,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
+ new_ctx->nodename = NULL;
+ new_ctx->username = NULL;
+ new_ctx->password = NULL;
++ new_ctx->server_hostname = NULL;
+ new_ctx->domainname = NULL;
+ new_ctx->UNC = NULL;
+ new_ctx->source = NULL;
+@@ -318,6 +320,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
+ DUP_CTX_STR(mount_options);
+ DUP_CTX_STR(username);
+ DUP_CTX_STR(password);
++ DUP_CTX_STR(server_hostname);
+ DUP_CTX_STR(UNC);
+ DUP_CTX_STR(source);
+ DUP_CTX_STR(domainname);
+@@ -429,6 +432,43 @@ out:
+ return rc;
+ }
+
++/*
++ * Remove duplicate path delimiters. Windows is supposed to do that
++ * but there are some bugs that prevent rename from working if there are
++ * multiple delimiters.
++ *
++ * Returns a sanitized duplicate of @path. @gfp indicates the GFP_* flags
++ * for kstrdup.
++ * The caller is responsible for freeing the original.
++ */
++#define IS_DELIM(c) ((c) == '/' || (c) == '\\')
++char *cifs_sanitize_prepath(char *prepath, gfp_t gfp)
++{
++ char *cursor1 = prepath, *cursor2 = prepath;
++
++ /* skip all prepended delimiters */
++ while (IS_DELIM(*cursor1))
++ cursor1++;
++
++ /* copy the first letter */
++ *cursor2 = *cursor1;
++
++ /* copy the remainder... */
++ while (*(cursor1++)) {
++ /* ... skipping all duplicated delimiters */
++ if (IS_DELIM(*cursor1) && IS_DELIM(*cursor2))
++ continue;
++ *(++cursor2) = *cursor1;
++ }
++
++ /* if the last character is a delimiter, skip it */
++ if (IS_DELIM(*(cursor2 - 1)))
++ cursor2--;
++
++ *(cursor2) = '\0';
++ return kstrdup(prepath, gfp);
++}
++
+ /*
+ * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath
+ * fields with the result. Returns 0 on success and an error otherwise
+@@ -456,6 +496,12 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
+ if (!pos)
+ return -EINVAL;
+
++ /* record the server hostname */
++ kfree(ctx->server_hostname);
++ ctx->server_hostname = kstrndup(devname + 2, pos - devname - 2, GFP_KERNEL);
++ if (!ctx->server_hostname)
++ return -ENOMEM;
++
+ /* skip past delimiter */
+ ++pos;
+
+@@ -482,7 +528,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
+ if (!*pos)
+ return 0;
+
+- ctx->prepath = kstrdup(pos, GFP_KERNEL);
++ ctx->prepath = cifs_sanitize_prepath(pos, GFP_KERNEL);
+ if (!ctx->prepath)
+ return -ENOMEM;
+
+@@ -838,6 +884,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
+ ctx->sfu_remap = false; /* disable SFU mapping */
+ }
+ break;
++ case Opt_mapchars:
++ if (result.negated)
++ ctx->sfu_remap = false;
++ else {
++ ctx->sfu_remap = true;
++ ctx->remap = false; /* disable SFM (mapposix) mapping */
++ }
++ break;
+ case Opt_user_xattr:
+ if (result.negated)
+ ctx->no_xattr = 1;
+@@ -1014,11 +1068,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
+ }
+ ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
+ break;
++ case Opt_closetimeo:
++ ctx->closetimeo = HZ * result.uint_32;
++ if (ctx->closetimeo > SMB3_MAX_DCLOSETIMEO) {
++ cifs_errorf(fc, "closetimeo too large\n");
++ goto cifs_parse_mount_err;
++ }
++ break;
+ case Opt_echo_interval:
+ ctx->echo_interval = result.uint_32;
+ break;
+ case Opt_snapshot:
+- ctx->snapshot_time = result.uint_32;
++ ctx->snapshot_time = result.uint_64;
+ break;
+ case Opt_max_credits:
+ if (result.uint_32 < 20 || result.uint_32 > 60000) {
+@@ -1452,6 +1513,7 @@ int smb3_init_fs_context(struct fs_context *fc)
+
+ ctx->acregmax = CIFS_DEF_ACTIMEO;
+ ctx->acdirmax = CIFS_DEF_ACTIMEO;
++ ctx->closetimeo = SMB3_DEF_DCLOSETIMEO;
+
+ /* Most clients set timeout to 0, allows server to use its default */
+ ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
+@@ -1496,6 +1558,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
+ ctx->username = NULL;
+ kfree_sensitive(ctx->password);
+ ctx->password = NULL;
++ kfree(ctx->server_hostname);
++ ctx->server_hostname = NULL;
+ kfree(ctx->UNC);
+ ctx->UNC = NULL;
+ kfree(ctx->source);
+diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
+index a42ba71d7a81f..3cf8d6235162d 100644
+--- a/fs/cifs/fs_context.h
++++ b/fs/cifs/fs_context.h
+@@ -123,6 +123,7 @@ enum cifs_param {
+ Opt_actimeo,
+ Opt_acdirmax,
+ Opt_acregmax,
++ Opt_closetimeo,
+ Opt_echo_interval,
+ Opt_max_credits,
+ Opt_snapshot,
+@@ -166,6 +167,7 @@ struct smb3_fs_context {
+ char *password;
+ char *domainname;
+ char *source;
++ char *server_hostname;
+ char *UNC;
+ char *nodename;
+ char *iocharset; /* local code page for mapping to and from Unicode */
+@@ -242,6 +244,8 @@ struct smb3_fs_context {
+ /* attribute cache timemout for files and directories in jiffies */
+ unsigned long acregmax;
+ unsigned long acdirmax;
++ /* timeout for deferred close of files in jiffies */
++ unsigned long closetimeo;
+ struct smb_version_operations *ops;
+ struct smb_version_values *vals;
+ char *prepath;
+@@ -274,4 +278,12 @@ static inline struct smb3_fs_context *smb3_fc2context(const struct fs_context *f
+ extern int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx);
+ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
+
++/*
++ * max deferred close timeout (jiffies) - 2^30
++ */
++#define SMB3_MAX_DCLOSETIMEO (1 << 30)
++#define SMB3_DEF_DCLOSETIMEO (1 * HZ) /* even 1 sec enough to help eg open/write/close/open/read */
++
++extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
++
+ #endif
+diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
+index 0359b604bdbc0..71883ba9e5677 100644
+--- a/fs/cifs/ioctl.c
++++ b/fs/cifs/ioctl.c
+@@ -342,7 +342,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
+ rc = put_user(ExtAttrBits &
+ FS_FL_USER_VISIBLE,
+ (int __user *)arg);
+- if (rc != EOPNOTSUPP)
++ if (rc != -EOPNOTSUPP)
+ break;
+ }
+ #endif /* CONFIG_CIFS_POSIX */
+@@ -371,7 +371,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
+ * pSMBFile->fid.netfid,
+ * extAttrBits,
+ * &ExtAttrMask);
+- * if (rc != EOPNOTSUPP)
++ * if (rc != -EOPNOTSUPP)
+ * break;
+ */
+
+diff --git a/fs/cifs/link.c b/fs/cifs/link.c
+index 852e54ee82c28..4308b27ba3464 100644
+--- a/fs/cifs/link.c
++++ b/fs/cifs/link.c
+@@ -85,6 +85,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
+ if (rc != 1)
+ return -EINVAL;
+
++ if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
++ return -EINVAL;
++
+ rc = symlink_hash(link_len, link_str, md5_hash);
+ if (rc) {
+ cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
+@@ -456,6 +459,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
+ oparms.disposition = FILE_CREATE;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
++ oparms.mode = 0644;
+
+ rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL,
+ NULL, NULL);
+diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
+index bb1185fff8cc4..5e4dab5dfb7a3 100644
+--- a/fs/cifs/misc.c
++++ b/fs/cifs/misc.c
+@@ -75,6 +75,7 @@ sesInfoAlloc(void)
+ INIT_LIST_HEAD(&ret_buf->tcon_list);
+ mutex_init(&ret_buf->session_mutex);
+ spin_lock_init(&ret_buf->iface_lock);
++ spin_lock_init(&ret_buf->chan_lock);
+ }
+ return ret_buf;
+ }
+@@ -138,9 +139,6 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
+ kfree(buf_to_free->nativeFileSystem);
+ kfree_sensitive(buf_to_free->password);
+ kfree(buf_to_free->crfid.fid);
+-#ifdef CONFIG_CIFS_DFS_UPCALL
+- kfree(buf_to_free->dfs_path);
+-#endif
+ kfree(buf_to_free);
+ }
+
+@@ -735,6 +733,10 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
+ list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
+ if (delayed_work_pending(&cfile->deferred)) {
+ if (cancel_delayed_work(&cfile->deferred)) {
++ spin_lock(&cifs_inode->deferred_lock);
++ cifs_del_deferred_close(cfile);
++ spin_unlock(&cifs_inode->deferred_lock);
++
+ tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
+ if (tmp_list == NULL)
+ break;
+@@ -746,7 +748,7 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
+ spin_unlock(&cifs_inode->open_file_lock);
+
+ list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) {
+- _cifsFileInfo_put(tmp_list->cfile, true, false);
++ _cifsFileInfo_put(tmp_list->cfile, false, false);
+ list_del(&tmp_list->list);
+ kfree(tmp_list);
+ }
+@@ -766,6 +768,10 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
+ cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+ if (delayed_work_pending(&cfile->deferred)) {
+ if (cancel_delayed_work(&cfile->deferred)) {
++ spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
++ cifs_del_deferred_close(cfile);
++ spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
++
+ tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
+ if (tmp_list == NULL)
+ break;
+@@ -801,6 +807,10 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
+ if (strstr(full_path, path)) {
+ if (delayed_work_pending(&cfile->deferred)) {
+ if (cancel_delayed_work(&cfile->deferred)) {
++ spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
++ cifs_del_deferred_close(cfile);
++ spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
++
+ tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
+ if (tmp_list == NULL)
+ break;
+@@ -1130,8 +1140,8 @@ cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc)
+ * @len: Where to store the length for this page:
+ * @offset: Where to store the offset for this page
+ */
+-void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
+- unsigned int *len, unsigned int *offset)
++void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page,
++ unsigned int *len, unsigned int *offset)
+ {
+ *len = rqst->rq_pagesz;
+ *offset = (page == 0) ? rqst->rq_offset : 0;
+@@ -1211,18 +1221,23 @@ static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void
+ .data = data,
+ .sb = NULL,
+ };
++ struct file_system_type **fs_type = (struct file_system_type *[]) {
++ &cifs_fs_type, &smb3_fs_type, NULL,
++ };
+
+- iterate_supers_type(&cifs_fs_type, f, &sd);
+-
+- if (!sd.sb)
+- return ERR_PTR(-EINVAL);
+- /*
+- * Grab an active reference in order to prevent automounts (DFS links)
+- * of expiring and then freeing up our cifs superblock pointer while
+- * we're doing failover.
+- */
+- cifs_sb_active(sd.sb);
+- return sd.sb;
++ for (; *fs_type; fs_type++) {
++ iterate_supers_type(*fs_type, f, &sd);
++ if (sd.sb) {
++ /*
++ * Grab an active reference in order to prevent automounts (DFS links)
++ * of expiring and then freeing up our cifs superblock pointer while
++ * we're doing failover.
++ */
++ cifs_sb_active(sd.sb);
++ return sd.sb;
++ }
++ }
++ return ERR_PTR(-EINVAL);
+ }
+
+ static void __cifs_put_super(struct super_block *sb)
+@@ -1287,69 +1302,20 @@ out:
+ return rc;
+ }
+
+-static void tcon_super_cb(struct super_block *sb, void *arg)
++int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
+ {
+- struct super_cb_data *sd = arg;
+- struct cifs_tcon *tcon = sd->data;
+- struct cifs_sb_info *cifs_sb;
+-
+- if (sd->sb)
+- return;
+-
+- cifs_sb = CIFS_SB(sb);
+- if (tcon->dfs_path && cifs_sb->origin_fullpath &&
+- !strcasecmp(tcon->dfs_path, cifs_sb->origin_fullpath))
+- sd->sb = sb;
+-}
+-
+-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon)
+-{
+- return __cifs_get_super(tcon_super_cb, tcon);
+-}
+-
+-static inline void cifs_put_tcon_super(struct super_block *sb)
+-{
+- __cifs_put_super(sb);
+-}
+-#else
+-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon)
+-{
+- return ERR_PTR(-EOPNOTSUPP);
+-}
+-
+-static inline void cifs_put_tcon_super(struct super_block *sb)
+-{
+-}
+-#endif
+-
+-int update_super_prepath(struct cifs_tcon *tcon, char *prefix)
+-{
+- struct super_block *sb;
+- struct cifs_sb_info *cifs_sb;
+- int rc = 0;
+-
+- sb = cifs_get_tcon_super(tcon);
+- if (IS_ERR(sb))
+- return PTR_ERR(sb);
+-
+- cifs_sb = CIFS_SB(sb);
+-
+ kfree(cifs_sb->prepath);
+
+ if (prefix && *prefix) {
+- cifs_sb->prepath = kstrdup(prefix, GFP_ATOMIC);
+- if (!cifs_sb->prepath) {
+- rc = -ENOMEM;
+- goto out;
+- }
++ cifs_sb->prepath = cifs_sanitize_prepath(prefix, GFP_ATOMIC);
++ if (!cifs_sb->prepath)
++ return -ENOMEM;
+
+ convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb));
+ } else
+ cifs_sb->prepath = NULL;
+
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+-
+-out:
+- cifs_put_tcon_super(sb);
+- return rc;
++ return 0;
+ }
++#endif
+diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
+index 23e02db7923f6..0fbd0f78f361b 100644
+--- a/fs/cifs/sess.c
++++ b/fs/cifs/sess.c
+@@ -54,32 +54,43 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
+ {
+ int i;
+
++ spin_lock(&ses->chan_lock);
+ for (i = 0; i < ses->chan_count; i++) {
+- if (is_server_using_iface(ses->chans[i].server, iface))
++ if (is_server_using_iface(ses->chans[i].server, iface)) {
++ spin_unlock(&ses->chan_lock);
+ return true;
++ }
+ }
++ spin_unlock(&ses->chan_lock);
+ return false;
+ }
+
+ /* returns number of channels added */
+ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
+ {
+- int old_chan_count = ses->chan_count;
+- int left = ses->chan_max - ses->chan_count;
++ int old_chan_count, new_chan_count;
++ int left;
+ int i = 0;
+ int rc = 0;
+ int tries = 0;
+ struct cifs_server_iface *ifaces = NULL;
+ size_t iface_count;
+
++ spin_lock(&ses->chan_lock);
++
++ new_chan_count = old_chan_count = ses->chan_count;
++ left = ses->chan_max - ses->chan_count;
++
+ if (left <= 0) {
+ cifs_dbg(FYI,
+ "ses already at max_channels (%zu), nothing to open\n",
+ ses->chan_max);
++ spin_unlock(&ses->chan_lock);
+ return 0;
+ }
+
+ if (ses->server->dialect < SMB30_PROT_ID) {
++ spin_unlock(&ses->chan_lock);
+ cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n");
+ return 0;
+ }
+@@ -87,8 +98,10 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
+ if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ cifs_dbg(VFS, "server %s does not support multichannel\n", ses->server->hostname);
+ ses->chan_max = 1;
++ spin_unlock(&ses->chan_lock);
+ return 0;
+ }
++ spin_unlock(&ses->chan_lock);
+
+ /*
+ * Make a copy of the iface list at the time and use that
+@@ -142,10 +155,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
+ cifs_dbg(FYI, "successfully opened new channel on iface#%d\n",
+ i);
+ left--;
++ new_chan_count++;
+ }
+
+ kfree(ifaces);
+- return ses->chan_count - old_chan_count;
++ return new_chan_count - old_chan_count;
+ }
+
+ /*
+@@ -157,10 +171,14 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server)
+ {
+ int i;
+
++ spin_lock(&ses->chan_lock);
+ for (i = 0; i < ses->chan_count; i++) {
+- if (ses->chans[i].server == server)
++ if (ses->chans[i].server == server) {
++ spin_unlock(&ses->chan_lock);
+ return &ses->chans[i];
++ }
+ }
++ spin_unlock(&ses->chan_lock);
+ return NULL;
+ }
+
+@@ -168,6 +186,7 @@ static int
+ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+ struct cifs_server_iface *iface)
+ {
++ struct TCP_Server_Info *chan_server;
+ struct cifs_chan *chan;
+ struct smb3_fs_context ctx = {NULL};
+ static const char unc_fmt[] = "\\%s\\foo";
+@@ -240,15 +259,20 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+ SMB2_CLIENT_GUID_SIZE);
+ ctx.use_client_guid = true;
+
+- mutex_lock(&ses->session_mutex);
++ chan_server = cifs_get_tcp_session(&ctx);
+
++ mutex_lock(&ses->session_mutex);
++ spin_lock(&ses->chan_lock);
+ chan = ses->binding_chan = &ses->chans[ses->chan_count];
+- chan->server = cifs_get_tcp_session(&ctx);
++ chan->server = chan_server;
+ if (IS_ERR(chan->server)) {
+ rc = PTR_ERR(chan->server);
+ chan->server = NULL;
++ spin_unlock(&ses->chan_lock);
+ goto out;
+ }
++ spin_unlock(&ses->chan_lock);
++
+ spin_lock(&cifs_tcp_ses_lock);
+ chan->server->is_channel = true;
+ spin_unlock(&cifs_tcp_ses_lock);
+@@ -283,8 +307,11 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+ * ses to the new server.
+ */
+
++ spin_lock(&ses->chan_lock);
+ ses->chan_count++;
+ atomic_set(&ses->chan_seq, 0);
++ spin_unlock(&ses->chan_lock);
++
+ out:
+ ses->binding = false;
+ ses->binding_chan = NULL;
+@@ -293,6 +320,7 @@ out:
+ if (rc && chan->server)
+ cifs_put_tcp_session(chan->server, 0);
+
++ free_xid(xid);
+ return rc;
+ }
+
+diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
+index 3b83839fc2c27..41e468d06b5b0 100644
+--- a/fs/cifs/smb1ops.c
++++ b/fs/cifs/smb1ops.c
+@@ -864,12 +864,11 @@ cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
+ }
+
+ static int
+-cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
+- struct cifsInodeInfo *cinode)
++cifs_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
++ __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode)
+ {
+- return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0,
+- LOCKING_ANDX_OPLOCK_RELEASE, false,
+- CIFS_CACHE_READ(cinode) ? 1 : 0);
++ return CIFSSMBLock(0, tcon, net_fid, current->tgid, 0, 0, 0, 0,
++ LOCKING_ANDX_OPLOCK_RELEASE, false, CIFS_CACHE_READ(cinode) ? 1 : 0);
+ }
+
+ static int
+diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
+index 8297703492eea..20e9d1bcd96bb 100644
+--- a/fs/cifs/smb2inode.c
++++ b/fs/cifs/smb2inode.c
+@@ -223,15 +223,32 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+ size[0] = 8; /* sizeof __le64 */
+ data[0] = ptr;
+
+- rc = SMB2_set_info_init(tcon, server,
+- &rqst[num_rqst], COMPOUND_FID,
+- COMPOUND_FID, current->tgid,
+- FILE_END_OF_FILE_INFORMATION,
+- SMB2_O_INFO_FILE, 0, data, size);
++ if (cfile) {
++ rc = SMB2_set_info_init(tcon, server,
++ &rqst[num_rqst],
++ cfile->fid.persistent_fid,
++ cfile->fid.volatile_fid,
++ current->tgid,
++ FILE_END_OF_FILE_INFORMATION,
++ SMB2_O_INFO_FILE, 0,
++ data, size);
++ } else {
++ rc = SMB2_set_info_init(tcon, server,
++ &rqst[num_rqst],
++ COMPOUND_FID,
++ COMPOUND_FID,
++ current->tgid,
++ FILE_END_OF_FILE_INFORMATION,
++ SMB2_O_INFO_FILE, 0,
++ data, size);
++ if (!rc) {
++ smb2_set_next_command(tcon, &rqst[num_rqst]);
++ smb2_set_related(&rqst[num_rqst]);
++ }
++ }
+ if (rc)
+ goto finished;
+- smb2_set_next_command(tcon, &rqst[num_rqst]);
+- smb2_set_related(&rqst[num_rqst++]);
++ num_rqst++;
+ trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path);
+ break;
+ case SMB2_OP_SET_INFO:
+@@ -358,8 +375,6 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+ num_rqst++;
+
+ if (cfile) {
+- cifsFileInfo_put(cfile);
+- cfile = NULL;
+ rc = compound_send_recv(xid, ses, server,
+ flags, num_rqst - 2,
+ &rqst[1], &resp_buftype[1],
+diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
+index bda606dc72b1f..560c4ababfe1a 100644
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -653,7 +653,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
+ if (rc == -EOPNOTSUPP) {
+ cifs_dbg(FYI,
+ "server does not support query network interfaces\n");
+- goto out;
++ ret_data_len = 0;
+ } else if (rc != 0) {
+ cifs_tcon_dbg(VFS, "error %d on ioctl to get interface list\n", rc);
+ goto out;
+@@ -745,8 +745,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ struct cached_fid **cfid)
+ {
+- struct cifs_ses *ses = tcon->ses;
+- struct TCP_Server_Info *server = ses->server;
++ struct cifs_ses *ses;
++ struct TCP_Server_Info *server;
+ struct cifs_open_parms oparms;
+ struct smb2_create_rsp *o_rsp = NULL;
+ struct smb2_query_info_rsp *qi_rsp = NULL;
+@@ -764,6 +764,9 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
+ if (tcon->nohandlecache)
+ return -ENOTSUPP;
+
++ ses = tcon->ses;
++ server = ses->server;
++
+ if (cifs_sb->root == NULL)
+ return -ENOENT;
+
+@@ -963,12 +966,13 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_fid fid;
+ struct cached_fid *cfid = NULL;
+
+- oparms.tcon = tcon;
+- oparms.desired_access = FILE_READ_ATTRIBUTES;
+- oparms.disposition = FILE_OPEN;
+- oparms.create_options = cifs_create_options(cifs_sb, 0);
+- oparms.fid = &fid;
+- oparms.reconnect = false;
++ oparms = (struct cifs_open_parms) {
++ .tcon = tcon,
++ .desired_access = FILE_READ_ATTRIBUTES,
++ .disposition = FILE_OPEN,
++ .create_options = cifs_create_options(cifs_sb, 0),
++ .fid = &fid,
++ };
+
+ rc = open_cached_dir(xid, tcon, "", cifs_sb, &cfid);
+ if (rc == 0)
+@@ -1102,9 +1106,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
+ size_t name_len, value_len, user_name_len;
+
+ while (src_size > 0) {
+- name = &src->ea_data[0];
+ name_len = (size_t)src->ea_name_length;
+- value = &src->ea_data[src->ea_name_length + 1];
+ value_len = (size_t)le16_to_cpu(src->ea_value_length);
+
+ if (name_len == 0)
+@@ -1116,6 +1118,9 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
+ goto out;
+ }
+
++ name = &src->ea_data[0];
++ value = &src->ea_data[src->ea_name_length + 1];
++
+ if (ea_name) {
+ if (ea_name_len == name_len &&
+ memcmp(ea_name, name, name_len) == 0) {
+@@ -1357,6 +1362,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
+ COMPOUND_FID, current->tgid,
+ FILE_FULL_EA_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
++ if (rc)
++ goto sea_exit;
+ smb2_set_next_command(tcon, &rqst[1]);
+ smb2_set_related(&rqst[1]);
+
+@@ -1367,6 +1374,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
+ rqst[2].rq_nvec = 1;
+ rc = SMB2_close_init(tcon, server,
+ &rqst[2], COMPOUND_FID, COMPOUND_FID, false);
++ if (rc)
++ goto sea_exit;
+ smb2_set_related(&rqst[2]);
+
+ rc = compound_send_recv(xid, ses, server,
+@@ -1631,6 +1640,7 @@ smb2_ioctl_query_info(const unsigned int xid,
+ unsigned int size[2];
+ void *data[2];
+ int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR;
++ void (*free_req1_func)(struct smb_rqst *r);
+
+ vars = kzalloc(sizeof(*vars), GFP_ATOMIC);
+ if (vars == NULL)
+@@ -1640,27 +1650,29 @@ smb2_ioctl_query_info(const unsigned int xid,
+
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+
+- if (copy_from_user(&qi, arg, sizeof(struct smb_query_info)))
+- goto e_fault;
+-
++ if (copy_from_user(&qi, arg, sizeof(struct smb_query_info))) {
++ rc = -EFAULT;
++ goto free_vars;
++ }
+ if (qi.output_buffer_length > 1024) {
+- kfree(vars);
+- return -EINVAL;
++ rc = -EINVAL;
++ goto free_vars;
+ }
+
+ if (!ses || !server) {
+- kfree(vars);
+- return -EIO;
++ rc = -EIO;
++ goto free_vars;
+ }
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+- buffer = memdup_user(arg + sizeof(struct smb_query_info),
+- qi.output_buffer_length);
+- if (IS_ERR(buffer)) {
+- kfree(vars);
+- return PTR_ERR(buffer);
++ if (qi.output_buffer_length) {
++ buffer = memdup_user(arg + sizeof(struct smb_query_info), qi.output_buffer_length);
++ if (IS_ERR(buffer)) {
++ rc = PTR_ERR(buffer);
++ goto free_vars;
++ }
+ }
+
+ /* Open */
+@@ -1698,45 +1710,45 @@ smb2_ioctl_query_info(const unsigned int xid,
+ rc = SMB2_open_init(tcon, server,
+ &rqst[0], &oplock, &oparms, path);
+ if (rc)
+- goto iqinf_exit;
++ goto free_output_buffer;
+ smb2_set_next_command(tcon, &rqst[0]);
+
+ /* Query */
+ if (qi.flags & PASSTHRU_FSCTL) {
+ /* Can eventually relax perm check since server enforces too */
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_SYS_ADMIN)) {
+ rc = -EPERM;
+- else {
+- rqst[1].rq_iov = &vars->io_iov[0];
+- rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE;
+-
+- rc = SMB2_ioctl_init(tcon, server,
+- &rqst[1],
+- COMPOUND_FID, COMPOUND_FID,
+- qi.info_type, true, buffer,
+- qi.output_buffer_length,
+- CIFSMaxBufSize -
+- MAX_SMB2_CREATE_RESPONSE_SIZE -
+- MAX_SMB2_CLOSE_RESPONSE_SIZE);
++ goto free_open_req;
+ }
++ rqst[1].rq_iov = &vars->io_iov[0];
++ rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE;
++
++ rc = SMB2_ioctl_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID,
++ qi.info_type, true, buffer, qi.output_buffer_length,
++ CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE -
++ MAX_SMB2_CLOSE_RESPONSE_SIZE);
++ free_req1_func = SMB2_ioctl_free;
+ } else if (qi.flags == PASSTHRU_SET_INFO) {
+ /* Can eventually relax perm check since server enforces too */
+- if (!capable(CAP_SYS_ADMIN))
++ if (!capable(CAP_SYS_ADMIN)) {
+ rc = -EPERM;
+- else {
+- rqst[1].rq_iov = &vars->si_iov[0];
+- rqst[1].rq_nvec = 1;
+-
+- size[0] = 8;
+- data[0] = buffer;
+-
+- rc = SMB2_set_info_init(tcon, server,
+- &rqst[1],
+- COMPOUND_FID, COMPOUND_FID,
+- current->tgid,
+- FILE_END_OF_FILE_INFORMATION,
+- SMB2_O_INFO_FILE, 0, data, size);
++ goto free_open_req;
+ }
++ if (qi.output_buffer_length < 8) {
++ rc = -EINVAL;
++ goto free_open_req;
++ }
++ rqst[1].rq_iov = &vars->si_iov[0];
++ rqst[1].rq_nvec = 1;
++
++ /* MS-FSCC 2.4.13 FileEndOfFileInformation */
++ size[0] = 8;
++ data[0] = buffer;
++
++ rc = SMB2_set_info_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID,
++ current->tgid, FILE_END_OF_FILE_INFORMATION,
++ SMB2_O_INFO_FILE, 0, data, size);
++ free_req1_func = SMB2_set_info_free;
+ } else if (qi.flags == PASSTHRU_QUERY_INFO) {
+ rqst[1].rq_iov = &vars->qi_iov[0];
+ rqst[1].rq_nvec = 1;
+@@ -1747,6 +1759,7 @@ smb2_ioctl_query_info(const unsigned int xid,
+ qi.info_type, qi.additional_information,
+ qi.input_buffer_length,
+ qi.output_buffer_length, buffer);
++ free_req1_func = SMB2_query_info_free;
+ } else { /* unknown flags */
+ cifs_tcon_dbg(VFS, "Invalid passthru query flags: 0x%x\n",
+ qi.flags);
+@@ -1754,7 +1767,7 @@ smb2_ioctl_query_info(const unsigned int xid,
+ }
+
+ if (rc)
+- goto iqinf_exit;
++ goto free_open_req;
+ smb2_set_next_command(tcon, &rqst[1]);
+ smb2_set_related(&rqst[1]);
+
+@@ -1765,14 +1778,14 @@ smb2_ioctl_query_info(const unsigned int xid,
+ rc = SMB2_close_init(tcon, server,
+ &rqst[2], COMPOUND_FID, COMPOUND_FID, false);
+ if (rc)
+- goto iqinf_exit;
++ goto free_req_1;
+ smb2_set_related(&rqst[2]);
+
+ rc = compound_send_recv(xid, ses, server,
+ flags, 3, rqst,
+ resp_buftype, rsp_iov);
+ if (rc)
+- goto iqinf_exit;
++ goto out;
+
+ /* No need to bump num_remote_opens since handle immediately closed */
+ if (qi.flags & PASSTHRU_FSCTL) {
+@@ -1782,18 +1795,22 @@ smb2_ioctl_query_info(const unsigned int xid,
+ qi.input_buffer_length = le32_to_cpu(io_rsp->OutputCount);
+ if (qi.input_buffer_length > 0 &&
+ le32_to_cpu(io_rsp->OutputOffset) + qi.input_buffer_length
+- > rsp_iov[1].iov_len)
+- goto e_fault;
++ > rsp_iov[1].iov_len) {
++ rc = -EFAULT;
++ goto out;
++ }
+
+ if (copy_to_user(&pqi->input_buffer_length,
+ &qi.input_buffer_length,
+- sizeof(qi.input_buffer_length)))
+- goto e_fault;
++ sizeof(qi.input_buffer_length))) {
++ rc = -EFAULT;
++ goto out;
++ }
+
+ if (copy_to_user((void __user *)pqi + sizeof(struct smb_query_info),
+ (const void *)io_rsp + le32_to_cpu(io_rsp->OutputOffset),
+ qi.input_buffer_length))
+- goto e_fault;
++ rc = -EFAULT;
+ } else {
+ pqi = (struct smb_query_info __user *)arg;
+ qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+@@ -1801,28 +1818,30 @@ smb2_ioctl_query_info(const unsigned int xid,
+ qi.input_buffer_length = le32_to_cpu(qi_rsp->OutputBufferLength);
+ if (copy_to_user(&pqi->input_buffer_length,
+ &qi.input_buffer_length,
+- sizeof(qi.input_buffer_length)))
+- goto e_fault;
++ sizeof(qi.input_buffer_length))) {
++ rc = -EFAULT;
++ goto out;
++ }
+
+ if (copy_to_user(pqi + 1, qi_rsp->Buffer,
+ qi.input_buffer_length))
+- goto e_fault;
++ rc = -EFAULT;
+ }
+
+- iqinf_exit:
+- cifs_small_buf_release(rqst[0].rq_iov[0].iov_base);
+- cifs_small_buf_release(rqst[1].rq_iov[0].iov_base);
+- cifs_small_buf_release(rqst[2].rq_iov[0].iov_base);
++out:
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+- kfree(vars);
++ SMB2_close_free(&rqst[2]);
++free_req_1:
++ free_req1_func(&rqst[1]);
++free_open_req:
++ SMB2_open_free(&rqst[0]);
++free_output_buffer:
+ kfree(buffer);
++free_vars:
++ kfree(vars);
+ return rc;
+-
+-e_fault:
+- rc = -EFAULT;
+- goto iqinf_exit;
+ }
+
+ static ssize_t
+@@ -1839,9 +1858,17 @@ smb2_copychunk_range(const unsigned int xid,
+ int chunks_copied = 0;
+ bool chunk_sizes_updated = false;
+ ssize_t bytes_written, total_bytes_written = 0;
++ struct inode *inode;
+
+ pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
+
++ /*
++ * We need to flush all unwritten data before we can send the
++ * copychunk ioctl to the server.
++ */
++ inode = d_inode(trgtfile->dentry);
++ filemap_write_and_wait(inode->i_mapping);
++
+ if (pcchunk == NULL)
+ return -ENOMEM;
+
+@@ -1866,7 +1893,7 @@ smb2_copychunk_range(const unsigned int xid,
+ pcchunk->SourceOffset = cpu_to_le64(src_off);
+ pcchunk->TargetOffset = cpu_to_le64(dest_off);
+ pcchunk->Length =
+- cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
++ cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk));
+
+ /* Request server copy to target from src identified by key */
+ kfree(retbuf);
+@@ -2543,15 +2570,14 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
+ }
+
+ static int
+-smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
+- struct cifsInodeInfo *cinode)
++smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
++ __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode)
+ {
+ if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
+ return SMB2_lease_break(0, tcon, cinode->lease_key,
+ smb2_get_lease_state(cinode));
+
+- return SMB2_oplock_break(0, tcon, fid->persistent_fid,
+- fid->volatile_fid,
++ return SMB2_oplock_break(0, tcon, persistent_fid, volatile_fid,
+ CIFS_CACHE_READ(cinode) ? 1 : 0);
+ }
+
+@@ -2843,6 +2869,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
+ struct fsctl_get_dfs_referral_req *dfs_req = NULL;
+ struct get_dfs_referral_rsp *dfs_rsp = NULL;
+ u32 dfs_req_size = 0, dfs_rsp_size = 0;
++ int retry_count = 0;
+
+ cifs_dbg(FYI, "%s: path: %s\n", __func__, search_name);
+
+@@ -2894,11 +2921,14 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
+ true /* is_fsctl */,
+ (char *)dfs_req, dfs_req_size, CIFSMaxBufSize,
+ (char **)&dfs_rsp, &dfs_rsp_size);
+- } while (rc == -EAGAIN);
++ if (!is_retryable_error(rc))
++ break;
++ usleep_range(512, 2048);
++ } while (++retry_count < 5);
+
+ if (rc) {
+- if ((rc != -ENOENT) && (rc != -EOPNOTSUPP))
+- cifs_tcon_dbg(VFS, "ioctl error in %s rc=%d\n", __func__, rc);
++ if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP)
++ cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc);
+ goto out;
+ }
+
+@@ -3577,7 +3607,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
+ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len)
+ {
+- struct inode *inode;
++ struct inode *inode = file_inode(file);
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+ long rc;
+@@ -3586,14 +3616,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+
+ xid = get_xid();
+
+- inode = d_inode(cfile->dentry);
+-
++ inode_lock(inode);
+ /* Need to make file sparse, if not already, before freeing range. */
+ /* Consider adding equivalent for compressed since it could also work */
+ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+ rc = -EOPNOTSUPP;
+- free_xid(xid);
+- return rc;
++ goto out;
+ }
+
+ filemap_invalidate_lock(inode->i_mapping);
+@@ -3613,8 +3641,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+ true /* is_fctl */, (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information),
+ CIFSMaxBufSize, NULL, NULL);
+- free_xid(xid);
+ filemap_invalidate_unlock(inode->i_mapping);
++out:
++ inode_unlock(inode);
++ free_xid(xid);
+ return rc;
+ }
+
+@@ -3773,7 +3803,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
+ if (rc)
+ goto out;
+
+- if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0)
++ if (cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)
+ smb2_set_sparse(xid, tcon, cfile, inode, false);
+
+ eof = cpu_to_le64(off + len);
+@@ -4250,11 +4280,13 @@ smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
+ }
+ }
+
++#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ static bool
+ smb2_is_read_op(__u32 oplock)
+ {
+ return oplock == SMB2_OPLOCK_LEVEL_II;
+ }
++#endif /* CIFS_ALLOW_INSECURE_LEGACY */
+
+ static bool
+ smb21_is_read_op(__u32 oplock)
+@@ -4384,69 +4416,82 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
+ memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
+ }
+
+-/* We can not use the normal sg_set_buf() as we will sometimes pass a
+- * stack object as buf.
+- */
+-static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
+- unsigned int buflen)
++static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst,
++ int num_rqst, const u8 *sig, u8 **iv,
++ struct aead_request **req, struct scatterlist **sgl,
++ unsigned int *num_sgs)
+ {
+- void *addr;
+- /*
+- * VMAP_STACK (at least) puts stack into the vmalloc address space
+- */
+- if (is_vmalloc_addr(buf))
+- addr = vmalloc_to_page(buf);
+- else
+- addr = virt_to_page(buf);
+- sg_set_page(sg, addr, buflen, offset_in_page(buf));
++ unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm);
++ unsigned int iv_size = crypto_aead_ivsize(tfm);
++ unsigned int len;
++ u8 *p;
++
++ *num_sgs = cifs_get_num_sgs(rqst, num_rqst, sig);
++
++ len = iv_size;
++ len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1);
++ len = ALIGN(len, crypto_tfm_ctx_alignment());
++ len += req_size;
++ len = ALIGN(len, __alignof__(struct scatterlist));
++ len += *num_sgs * sizeof(**sgl);
++
++ p = kmalloc(len, GFP_ATOMIC);
++ if (!p)
++ return NULL;
++
++ *iv = (u8 *)PTR_ALIGN(p, crypto_aead_alignmask(tfm) + 1);
++ *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size,
++ crypto_tfm_ctx_alignment());
++ *sgl = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size,
++ __alignof__(struct scatterlist));
++ return p;
+ }
+
+-/* Assumes the first rqst has a transform header as the first iov.
+- * I.e.
+- * rqst[0].rq_iov[0] is transform header
+- * rqst[0].rq_iov[1+] data to be encrypted/decrypted
+- * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
+- */
+-static struct scatterlist *
+-init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign)
++static void *smb2_get_aead_req(struct crypto_aead *tfm, const struct smb_rqst *rqst,
++ int num_rqst, const u8 *sig, u8 **iv,
++ struct aead_request **req, struct scatterlist **sgl)
+ {
+- unsigned int sg_len;
++ unsigned int off, len, skip;
+ struct scatterlist *sg;
+- unsigned int i;
+- unsigned int j;
+- unsigned int idx = 0;
+- int skip;
+-
+- sg_len = 1;
+- for (i = 0; i < num_rqst; i++)
+- sg_len += rqst[i].rq_nvec + rqst[i].rq_npages;
++ unsigned int num_sgs;
++ unsigned long addr;
++ int i, j;
++ void *p;
+
+- sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL);
+- if (!sg)
++ p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, sgl, &num_sgs);
++ if (!p)
+ return NULL;
+
+- sg_init_table(sg, sg_len);
++ sg_init_table(*sgl, num_sgs);
++ sg = *sgl;
++
++ /* Assumes the first rqst has a transform header as the first iov.
++ * I.e.
++ * rqst[0].rq_iov[0] is transform header
++ * rqst[0].rq_iov[1+] data to be encrypted/decrypted
++ * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
++ */
+ for (i = 0; i < num_rqst; i++) {
++ /*
++ * The first rqst has a transform header where the
++ * first 20 bytes are not part of the encrypted blob.
++ */
+ for (j = 0; j < rqst[i].rq_nvec; j++) {
+- /*
+- * The first rqst has a transform header where the
+- * first 20 bytes are not part of the encrypted blob
+- */
+- skip = (i == 0) && (j == 0) ? 20 : 0;
+- smb2_sg_set_buf(&sg[idx++],
+- rqst[i].rq_iov[j].iov_base + skip,
+- rqst[i].rq_iov[j].iov_len - skip);
+- }
++ struct kvec *iov = &rqst[i].rq_iov[j];
+
++ skip = (i == 0) && (j == 0) ? 20 : 0;
++ addr = (unsigned long)iov->iov_base + skip;
++ len = iov->iov_len - skip;
++ sg = cifs_sg_set_buf(sg, (void *)addr, len);
++ }
+ for (j = 0; j < rqst[i].rq_npages; j++) {
+- unsigned int len, offset;
+-
+- rqst_page_get_length(&rqst[i], j, &len, &offset);
+- sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset);
++ rqst_page_get_length(&rqst[i], j, &len, &off);
++ sg_set_page(sg++, rqst[i].rq_pages[j], len, off);
+ }
+ }
+- smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE);
+- return sg;
++ cifs_sg_set_buf(sg, sig, SMB2_SIGNATURE_SIZE);
++
++ return p;
+ }
+
+ static int
+@@ -4490,11 +4535,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
+ u8 sign[SMB2_SIGNATURE_SIZE] = {};
+ u8 key[SMB3_ENC_DEC_KEY_SIZE];
+ struct aead_request *req;
+- char *iv;
+- unsigned int iv_len;
++ u8 *iv;
+ DECLARE_CRYPTO_WAIT(wait);
+ struct crypto_aead *tfm;
+ unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
++ void *creq;
+
+ rc = smb2_get_enc_key(server, tr_hdr->SessionId, enc, key);
+ if (rc) {
+@@ -4529,32 +4574,15 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
+ return rc;
+ }
+
+- req = aead_request_alloc(tfm, GFP_KERNEL);
+- if (!req) {
+- cifs_server_dbg(VFS, "%s: Failed to alloc aead request\n", __func__);
++ creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg);
++ if (unlikely(!creq))
+ return -ENOMEM;
+- }
+
+ if (!enc) {
+ memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE);
+ crypt_len += SMB2_SIGNATURE_SIZE;
+ }
+
+- sg = init_sg(num_rqst, rqst, sign);
+- if (!sg) {
+- cifs_server_dbg(VFS, "%s: Failed to init sg\n", __func__);
+- rc = -ENOMEM;
+- goto free_req;
+- }
+-
+- iv_len = crypto_aead_ivsize(tfm);
+- iv = kzalloc(iv_len, GFP_KERNEL);
+- if (!iv) {
+- cifs_server_dbg(VFS, "%s: Failed to alloc iv\n", __func__);
+- rc = -ENOMEM;
+- goto free_sg;
+- }
+-
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
+ memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
+@@ -4563,6 +4591,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
+ memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
+ }
+
++ aead_request_set_tfm(req, tfm);
+ aead_request_set_crypt(req, sg, sg, crypt_len, iv);
+ aead_request_set_ad(req, assoc_data_len);
+
+@@ -4575,11 +4604,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
+ if (!rc && enc)
+ memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
+
+- kfree(iv);
+-free_sg:
+- kfree(sg);
+-free_req:
+- kfree(req);
++ kfree_sensitive(creq);
+ return rc;
+ }
+
+@@ -5350,7 +5375,7 @@ out:
+ return rc;
+ }
+
+-
++#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ struct smb_version_operations smb20_operations = {
+ .compare_fids = smb2_compare_fids,
+ .setup_request = smb2_setup_request,
+@@ -5449,6 +5474,7 @@ struct smb_version_operations smb20_operations = {
+ .is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
+ };
++#endif /* CIFS_ALLOW_INSECURE_LEGACY */
+
+ struct smb_version_operations smb21_operations = {
+ .compare_fids = smb2_compare_fids,
+@@ -5780,6 +5806,7 @@ struct smb_version_operations smb311_operations = {
+ .is_network_name_deleted = smb2_is_network_name_deleted,
+ };
+
++#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ struct smb_version_values smb20_values = {
+ .version_string = SMB20_VERSION_STRING,
+ .protocol_id = SMB20_PROT_ID,
+@@ -5800,6 +5827,7 @@ struct smb_version_values smb20_values = {
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .create_lease_size = sizeof(struct create_lease),
+ };
++#endif /* ALLOW_INSECURE_LEGACY */
+
+ struct smb_version_values smb21_values = {
+ .version_string = SMB21_VERSION_STRING,
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index 7829c590eeac6..f51fea2e808d1 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -156,7 +156,11 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ if (tcon == NULL)
+ return 0;
+
+- if (smb2_command == SMB2_TREE_CONNECT)
++ /*
++ * Need to also skip SMB2_IOCTL because it is used for checking nested dfs links in
++ * cifs_tree_connect().
++ */
++ if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL)
+ return 0;
+
+ if (tcon->tidStatus == CifsExiting) {
+@@ -268,6 +272,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ ses->binding_chan = NULL;
+ mutex_unlock(&tcon->ses->session_mutex);
+ goto failed;
++ } else if (rc) {
++ mutex_unlock(&ses->session_mutex);
++ goto out;
+ }
+ }
+ /*
+@@ -930,16 +937,17 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+ } else if (rc != 0)
+ goto neg_exit;
+
++ rc = -EIO;
+ if (strcmp(server->vals->version_string,
+ SMB3ANY_VERSION_STRING) == 0) {
+ if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+ cifs_server_dbg(VFS,
+ "SMB2 dialect returned but not requested\n");
+- return -EIO;
++ goto neg_exit;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+ cifs_server_dbg(VFS,
+ "SMB2.1 dialect returned but not requested\n");
+- return -EIO;
++ goto neg_exit;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
+ /* ops set to 3.0 by default for default so update */
+ server->ops = &smb311_operations;
+@@ -950,7 +958,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+ if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+ cifs_server_dbg(VFS,
+ "SMB2 dialect returned but not requested\n");
+- return -EIO;
++ goto neg_exit;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+ /* ops set to 3.0 by default for default so update */
+ server->ops = &smb21_operations;
+@@ -964,7 +972,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+ /* if requested single dialect ensure returned dialect matched */
+ cifs_server_dbg(VFS, "Invalid 0x%x dialect returned: not requested\n",
+ le16_to_cpu(rsp->DialectRevision));
+- return -EIO;
++ goto neg_exit;
+ }
+
+ cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
+@@ -982,9 +990,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+ else {
+ cifs_server_dbg(VFS, "Invalid dialect returned by server 0x%x\n",
+ le16_to_cpu(rsp->DialectRevision));
+- rc = -EIO;
+ goto neg_exit;
+ }
++
++ rc = 0;
+ server->dialect = le16_to_cpu(rsp->DialectRevision);
+
+ /*
+@@ -1132,9 +1141,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
+ pneg_inbuf->Dialects[0] =
+ cpu_to_le16(server->vals->protocol_id);
+ pneg_inbuf->DialectCount = cpu_to_le16(1);
+- /* structure is big enough for 3 dialects, sending only 1 */
++ /* structure is big enough for 4 dialects, sending only 1 */
+ inbuflen = sizeof(*pneg_inbuf) -
+- sizeof(pneg_inbuf->Dialects[0]) * 2;
++ sizeof(pneg_inbuf->Dialects[0]) * 3;
+ }
+
+ rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+@@ -2349,7 +2358,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
+ unsigned int acelen, acl_size, ace_count;
+ unsigned int owner_offset = 0;
+ unsigned int group_offset = 0;
+- struct smb3_acl acl;
++ struct smb3_acl acl = {};
+
+ *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8);
+
+@@ -2422,6 +2431,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
+ acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */
+ acl.AclSize = cpu_to_le16(acl_size);
+ acl.AceCount = cpu_to_le16(ace_count);
++ /* acl.Sbz1 and Sbz2 MBZ so are not set here, but initialized above */
+ memcpy(aclptr, &acl, sizeof(struct smb3_acl));
+
+ buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd);
+@@ -3989,12 +3999,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
+ (struct smb2_sync_hdr *)rdata->iov[0].iov_base;
+ struct cifs_credits credits = { .value = 0, .instance = 0 };
+ struct smb_rqst rqst = { .rq_iov = &rdata->iov[1],
+- .rq_nvec = 1,
+- .rq_pages = rdata->pages,
+- .rq_offset = rdata->page_offset,
+- .rq_npages = rdata->nr_pages,
+- .rq_pagesz = rdata->pagesz,
+- .rq_tailsz = rdata->tailsz };
++ .rq_nvec = 1, };
++
++ if (rdata->got_bytes) {
++ rqst.rq_pages = rdata->pages;
++ rqst.rq_offset = rdata->page_offset;
++ rqst.rq_npages = rdata->nr_pages;
++ rqst.rq_pagesz = rdata->pagesz;
++ rqst.rq_tailsz = rdata->tailsz;
++ }
+
+ WARN_ONCE(rdata->server != mid->server,
+ "rdata server %p != mid server %p",
+diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
+index f59b956f9d250..390cc5e8c7467 100644
+--- a/fs/cifs/smb2transport.c
++++ b/fs/cifs/smb2transport.c
+@@ -221,9 +221,9 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
+ struct smb_rqst drqst;
+
+ ses = smb2_find_smb_ses(server, shdr->SessionId);
+- if (!ses) {
++ if (unlikely(!ses)) {
+ cifs_server_dbg(VFS, "%s: Could not find session\n", __func__);
+- return 0;
++ return -ENOENT;
+ }
+
+ memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
+@@ -542,8 +542,10 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
+ u8 key[SMB3_SIGN_KEY_SIZE];
+
+ rc = smb2_get_sign_key(shdr->SessionId, server, key);
+- if (rc)
+- return 0;
++ if (unlikely(rc)) {
++ cifs_server_dbg(VFS, "%s: Could not get signing key\n", __func__);
++ return rc;
++ }
+
+ if (allocate_crypto) {
+ rc = cifs_alloc_hash("cmac(aes)", &hash, &sdesc);
+diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
+index 31ef64eb7fbb9..a9a5d27b8d38b 100644
+--- a/fs/cifs/smbdirect.c
++++ b/fs/cifs/smbdirect.c
+@@ -1405,6 +1405,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
+ destroy_workqueue(info->workqueue);
+ log_rdma_event(INFO, "rdma session destroyed\n");
+ kfree(info);
++ server->smbd_conn = NULL;
+ }
+
+ /*
+@@ -1701,6 +1702,7 @@ static struct smbd_connection *_smbd_get_connection(
+
+ allocate_mr_failed:
+ /* At this point, need to a full transport shutdown */
++ server->smbd_conn = info;
+ smbd_destroy(server);
+ return NULL;
+
+@@ -2249,6 +2251,7 @@ static int allocate_mr_list(struct smbd_connection *info)
+ atomic_set(&info->mr_ready_count, 0);
+ atomic_set(&info->mr_used_count, 0);
+ init_waitqueue_head(&info->wait_for_mr_cleanup);
++ INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
+ /* Allocate more MRs (2x) than hardware responder_resources */
+ for (i = 0; i < info->responder_resources * 2; i++) {
+ smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
+@@ -2276,13 +2279,13 @@ static int allocate_mr_list(struct smbd_connection *info)
+ list_add_tail(&smbdirect_mr->list, &info->mr_list);
+ atomic_inc(&info->mr_ready_count);
+ }
+- INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
+ return 0;
+
+ out:
+ kfree(smbdirect_mr);
+
+ list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
++ list_del(&smbdirect_mr->list);
+ ib_dereg_mr(smbdirect_mr->mr);
+ kfree(smbdirect_mr->sgl);
+ kfree(smbdirect_mr);
+diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
+index b7379329b741c..49b7edbe34975 100644
+--- a/fs/cifs/transport.c
++++ b/fs/cifs/transport.c
+@@ -196,10 +196,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
+
+ *sent = 0;
+
+- smb_msg->msg_name = (struct sockaddr *) &server->dstaddr;
+- smb_msg->msg_namelen = sizeof(struct sockaddr);
+- smb_msg->msg_control = NULL;
+- smb_msg->msg_controllen = 0;
+ if (server->noblocksnd)
+ smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
+ else
+@@ -303,7 +299,7 @@ static int
+ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *rqst)
+ {
+- int rc = 0;
++ int rc;
+ struct kvec *iov;
+ int n_vec;
+ unsigned int send_length = 0;
+@@ -311,9 +307,10 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ sigset_t mask, oldmask;
+ size_t total_len = 0, sent, size;
+ struct socket *ssocket = server->ssocket;
+- struct msghdr smb_msg;
++ struct msghdr smb_msg = {};
+ __be32 rfc1002_marker;
+
++ cifs_in_send_inc(server);
+ if (cifs_rdma_enabled(server)) {
+ /* return -EAGAIN when connecting or reconnecting */
+ rc = -EAGAIN;
+@@ -322,14 +319,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ goto smbd_done;
+ }
+
++ rc = -EAGAIN;
+ if (ssocket == NULL)
+- return -EAGAIN;
++ goto out;
+
++ rc = -ERESTARTSYS;
+ if (fatal_signal_pending(current)) {
+ cifs_dbg(FYI, "signal pending before send request\n");
+- return -ERESTARTSYS;
++ goto out;
+ }
+
++ rc = 0;
+ /* cork the socket */
+ tcp_sock_set_cork(ssocket->sk, true);
+
+@@ -442,7 +442,8 @@ smbd_done:
+ rc);
+ else if (rc > 0)
+ rc = 0;
+-
++out:
++ cifs_in_send_dec(server);
+ return rc;
+ }
+
+@@ -859,9 +860,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
+ * I/O response may come back and free the mid entry on another thread.
+ */
+ cifs_save_when_sent(mid);
+- cifs_in_send_inc(server);
+ rc = smb_send_rqst(server, 1, rqst, flags);
+- cifs_in_send_dec(server);
+
+ if (rc < 0) {
+ revert_current_mid(server, mid->credits);
+@@ -1044,14 +1043,17 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
+ if (!ses)
+ return NULL;
+
++ spin_lock(&ses->chan_lock);
+ if (!ses->binding) {
+ /* round robin */
+ if (ses->chan_count > 1) {
+ index = (uint)atomic_inc_return(&ses->chan_seq);
+ index %= ses->chan_count;
+ }
++ spin_unlock(&ses->chan_lock);
+ return ses->chans[index].server;
+ } else {
++ spin_unlock(&ses->chan_lock);
+ return cifs_ses_server(ses);
+ }
+ }
+@@ -1150,9 +1152,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ else
+ midQ[i]->callback = cifs_compound_last_callback;
+ }
+- cifs_in_send_inc(server);
+ rc = smb_send_rqst(server, num_rqst, rqst, flags);
+- cifs_in_send_dec(server);
+
+ for (i = 0; i < num_rqst; i++)
+ cifs_save_when_sent(midQ[i]);
+@@ -1389,9 +1389,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
+
+ midQ->mid_state = MID_REQUEST_SUBMITTED;
+
+- cifs_in_send_inc(server);
+ rc = smb_send(server, in_buf, len);
+- cifs_in_send_dec(server);
+ cifs_save_when_sent(midQ);
+
+ if (rc < 0)
+@@ -1528,9 +1526,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
+ }
+
+ midQ->mid_state = MID_REQUEST_SUBMITTED;
+- cifs_in_send_inc(server);
+ rc = smb_send(server, in_buf, len);
+- cifs_in_send_dec(server);
+ cifs_save_when_sent(midQ);
+
+ if (rc < 0)
+diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
+index 7d8b72d67c803..9d486fbbfbbde 100644
+--- a/fs/cifs/xattr.c
++++ b/fs/cifs/xattr.c
+@@ -175,11 +175,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
+ switch (handler->flags) {
+ case XATTR_CIFS_NTSD_FULL:
+ aclflags = (CIFS_ACL_OWNER |
++ CIFS_ACL_GROUP |
+ CIFS_ACL_DACL |
+ CIFS_ACL_SACL);
+ break;
+ case XATTR_CIFS_NTSD:
+ aclflags = (CIFS_ACL_OWNER |
++ CIFS_ACL_GROUP |
+ CIFS_ACL_DACL);
+ break;
+ case XATTR_CIFS_ACL:
+diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
+index eb3b1898da462..610484c90260b 100644
+--- a/fs/coda/upcall.c
++++ b/fs/coda/upcall.c
+@@ -790,7 +790,7 @@ static int coda_upcall(struct venus_comm *vcp,
+ sig_req = kmalloc(sizeof(struct upc_req), GFP_KERNEL);
+ if (!sig_req) goto exit;
+
+- sig_inputArgs = kvzalloc(sizeof(struct coda_in_hdr), GFP_KERNEL);
++ sig_inputArgs = kvzalloc(sizeof(*sig_inputArgs), GFP_KERNEL);
+ if (!sig_inputArgs) {
+ kfree(sig_req);
+ goto exit;
+diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
+index 1466b5d01cbb9..ec6519e1ca3bf 100644
+--- a/fs/configfs/dir.c
++++ b/fs/configfs/dir.c
+@@ -34,6 +34,14 @@
+ */
+ DEFINE_SPINLOCK(configfs_dirent_lock);
+
++/*
++ * All of link_obj/unlink_obj/link_group/unlink_group require that
++ * subsys->su_mutex is held.
++ * But parent configfs_subsystem is NULL when config_item is root.
++ * Use this mutex when config_item is root.
++ */
++static DEFINE_MUTEX(configfs_subsystem_mutex);
++
+ static void configfs_d_iput(struct dentry * dentry,
+ struct inode * inode)
+ {
+@@ -308,6 +316,7 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry,
+ return 0;
+
+ out_remove:
++ configfs_put(dentry->d_fsdata);
+ configfs_remove_dirent(dentry);
+ return PTR_ERR(inode);
+ }
+@@ -374,6 +383,7 @@ int configfs_create_link(struct configfs_dirent *target, struct dentry *parent,
+ return 0;
+
+ out_remove:
++ configfs_put(dentry->d_fsdata);
+ configfs_remove_dirent(dentry);
+ return PTR_ERR(inode);
+ }
+@@ -1780,8 +1790,8 @@ void configfs_unregister_group(struct config_group *group)
+ configfs_detach_group(&group->cg_item);
+ d_inode(dentry)->i_flags |= S_DEAD;
+ dont_mount(dentry);
++ d_drop(dentry);
+ fsnotify_rmdir(d_inode(parent), dentry);
+- d_delete(dentry);
+ inode_unlock(d_inode(parent));
+
+ dput(dentry);
+@@ -1859,7 +1869,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
+ group->cg_item.ci_name = group->cg_item.ci_namebuf;
+
+ sd = root->d_fsdata;
++ mutex_lock(&configfs_subsystem_mutex);
+ link_group(to_config_group(sd->s_element), group);
++ mutex_unlock(&configfs_subsystem_mutex);
+
+ inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
+
+@@ -1884,7 +1896,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
+ inode_unlock(d_inode(root));
+
+ if (err) {
++ mutex_lock(&configfs_subsystem_mutex);
+ unlink_group(group);
++ mutex_unlock(&configfs_subsystem_mutex);
+ configfs_release_fs();
+ }
+ put_fragment(frag);
+@@ -1922,16 +1936,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
+ configfs_detach_group(&group->cg_item);
+ d_inode(dentry)->i_flags |= S_DEAD;
+ dont_mount(dentry);
+- fsnotify_rmdir(d_inode(root), dentry);
+ inode_unlock(d_inode(dentry));
+
+- d_delete(dentry);
++ d_drop(dentry);
++ fsnotify_rmdir(d_inode(root), dentry);
+
+ inode_unlock(d_inode(root));
+
+ dput(dentry);
+
++ mutex_lock(&configfs_subsystem_mutex);
+ unlink_group(group);
++ mutex_unlock(&configfs_subsystem_mutex);
+ configfs_release_fs();
+ }
+
+diff --git a/fs/coredump.c b/fs/coredump.c
+index 3224dee44d30e..26eb5a095832f 100644
+--- a/fs/coredump.c
++++ b/fs/coredump.c
+@@ -41,6 +41,7 @@
+ #include <linux/fs.h>
+ #include <linux/path.h>
+ #include <linux/timekeeping.h>
++#include <linux/elf.h>
+
+ #include <linux/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -52,6 +53,9 @@
+
+ #include <trace/events/sched.h>
+
++static bool dump_vma_snapshot(struct coredump_params *cprm);
++static void free_vma_snapshot(struct coredump_params *cprm);
++
+ int core_uses_pid;
+ unsigned int core_pipe_limit;
+ char core_pattern[CORENAME_MAX_SIZE] = "core";
+@@ -600,6 +604,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
+ * by any locks.
+ */
+ .mm_flags = mm->flags,
++ .vma_meta = NULL,
+ };
+
+ audit_core_dumps(siginfo->si_signo);
+@@ -814,6 +819,9 @@ void do_coredump(const kernel_siginfo_t *siginfo)
+ pr_info("Core dump to |%s disabled\n", cn.corename);
+ goto close_fail;
+ }
++ if (!dump_vma_snapshot(&cprm))
++ goto close_fail;
++
+ file_start_write(cprm.file);
+ core_dumped = binfmt->core_dump(&cprm);
+ /*
+@@ -827,6 +835,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
+ dump_emit(&cprm, "", 1);
+ }
+ file_end_write(cprm.file);
++ free_vma_snapshot(&cprm);
+ }
+ if (ispipe && core_pipe_limit)
+ wait_for_dump_helpers(cprm.file);
+@@ -992,6 +1001,8 @@ static bool always_dump_vma(struct vm_area_struct *vma)
+ return false;
+ }
+
++#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
++
+ /*
+ * Decide how much of @vma's contents should be included in a core dump.
+ */
+@@ -1051,9 +1062,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
+ * dump the first page to aid in determining what was mapped here.
+ */
+ if (FILTER(ELF_HEADERS) &&
+- vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) &&
+- (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
+- return PAGE_SIZE;
++ vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
++ if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
++ return PAGE_SIZE;
++
++ /*
++ * ELF libraries aren't always executable.
++ * We'll want to check whether the mapping starts with the ELF
++ * magic, but not now - we're holding the mmap lock,
++ * so copy_from_user() doesn't work here.
++ * Use a placeholder instead, and fix it up later in
++ * dump_vma_snapshot().
++ */
++ return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
++ }
+
+ #undef FILTER
+
+@@ -1090,18 +1112,29 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+ return gate_vma;
+ }
+
++static void free_vma_snapshot(struct coredump_params *cprm)
++{
++ if (cprm->vma_meta) {
++ int i;
++ for (i = 0; i < cprm->vma_count; i++) {
++ struct file *file = cprm->vma_meta[i].file;
++ if (file)
++ fput(file);
++ }
++ kvfree(cprm->vma_meta);
++ cprm->vma_meta = NULL;
++ }
++}
++
+ /*
+ * Under the mmap_lock, take a snapshot of relevant information about the task's
+ * VMAs.
+ */
+-int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
+- struct core_vma_metadata **vma_meta,
+- size_t *vma_data_size_ptr)
++static bool dump_vma_snapshot(struct coredump_params *cprm)
+ {
+ struct vm_area_struct *vma, *gate_vma;
+ struct mm_struct *mm = current->mm;
+ int i;
+- size_t vma_data_size = 0;
+
+ /*
+ * Once the stack expansion code is fixed to not change VMA bounds
+@@ -1109,36 +1142,51 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
+ * mmap_lock in read mode.
+ */
+ if (mmap_write_lock_killable(mm))
+- return -EINTR;
++ return false;
+
++ cprm->vma_data_size = 0;
+ gate_vma = get_gate_vma(mm);
+- *vma_count = mm->map_count + (gate_vma ? 1 : 0);
++ cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
+
+- *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
+- if (!*vma_meta) {
++ cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
++ if (!cprm->vma_meta) {
+ mmap_write_unlock(mm);
+- return -ENOMEM;
++ return false;
+ }
+
+ for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
+ vma = next_vma(vma, gate_vma), i++) {
+- struct core_vma_metadata *m = (*vma_meta) + i;
++ struct core_vma_metadata *m = cprm->vma_meta + i;
+
+ m->start = vma->vm_start;
+ m->end = vma->vm_end;
+ m->flags = vma->vm_flags;
+ m->dump_size = vma_dump_size(vma, cprm->mm_flags);
++ m->pgoff = vma->vm_pgoff;
+
+- vma_data_size += m->dump_size;
++ m->file = vma->vm_file;
++ if (m->file)
++ get_file(m->file);
+ }
+
+ mmap_write_unlock(mm);
+
+- if (WARN_ON(i != *vma_count)) {
+- kvfree(*vma_meta);
+- return -EFAULT;
++ for (i = 0; i < cprm->vma_count; i++) {
++ struct core_vma_metadata *m = cprm->vma_meta + i;
++
++ if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
++ char elfmag[SELFMAG];
++
++ if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) ||
++ memcmp(elfmag, ELFMAG, SELFMAG) != 0) {
++ m->dump_size = 0;
++ } else {
++ m->dump_size = PAGE_SIZE;
++ }
++ }
++
++ cprm->vma_data_size += m->dump_size;
+ }
+
+- *vma_data_size_ptr = vma_data_size;
+- return 0;
++ return true;
+ }
+diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
+index 3fa965eb3336d..373c434b375c0 100644
+--- a/fs/crypto/fscrypt_private.h
++++ b/fs/crypto/fscrypt_private.h
+@@ -220,7 +220,7 @@ struct fscrypt_info {
+ * will be NULL if the master key was found in a process-subscribed
+ * keyring rather than in the filesystem-level keyring.
+ */
+- struct key *ci_master_key;
++ struct fscrypt_master_key *ci_master_key;
+
+ /*
+ * Link in list of inodes that were unlocked with the master key.
+@@ -430,6 +430,40 @@ struct fscrypt_master_key_secret {
+ */
+ struct fscrypt_master_key {
+
++ /*
++ * Back-pointer to the super_block of the filesystem to which this
++ * master key has been added. Only valid if ->mk_active_refs > 0.
++ */
++ struct super_block *mk_sb;
++
++ /*
++ * Link in ->mk_sb->s_master_keys->key_hashtable.
++ * Only valid if ->mk_active_refs > 0.
++ */
++ struct hlist_node mk_node;
++
++ /* Semaphore that protects ->mk_secret and ->mk_users */
++ struct rw_semaphore mk_sem;
++
++ /*
++ * Active and structural reference counts. An active ref guarantees
++ * that the struct continues to exist, continues to be in the keyring
++ * ->mk_sb->s_master_keys, and that any embedded subkeys (e.g.
++ * ->mk_direct_keys) that have been prepared continue to exist.
++ * A structural ref only guarantees that the struct continues to exist.
++ *
++ * There is one active ref associated with ->mk_secret being present,
++ * and one active ref for each inode in ->mk_decrypted_inodes.
++ *
++ * There is one structural ref associated with the active refcount being
++ * nonzero. Finding a key in the keyring also takes a structural ref,
++ * which is then held temporarily while the key is operated on.
++ */
++ refcount_t mk_active_refs;
++ refcount_t mk_struct_refs;
++
++ struct rcu_head mk_rcu_head;
++
+ /*
+ * The secret key material. After FS_IOC_REMOVE_ENCRYPTION_KEY is
+ * executed, this is wiped and no new inodes can be unlocked with this
+@@ -438,7 +472,10 @@ struct fscrypt_master_key {
+ * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or
+ * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again.
+ *
+- * Locking: protected by this master key's key->sem.
++ * While ->mk_secret is present, one ref in ->mk_active_refs is held.
++ *
++ * Locking: protected by ->mk_sem. The manipulation of ->mk_active_refs
++ * associated with this field is protected by ->mk_sem as well.
+ */
+ struct fscrypt_master_key_secret mk_secret;
+
+@@ -459,22 +496,12 @@ struct fscrypt_master_key {
+ *
+ * This is NULL for v1 policy keys; those can only be added by root.
+ *
+- * Locking: in addition to this keyring's own semaphore, this is
+- * protected by this master key's key->sem, so we can do atomic
+- * search+insert. It can also be searched without taking any locks, but
+- * in that case the returned key may have already been removed.
++ * Locking: protected by ->mk_sem. (We don't just rely on the keyrings
++ * subsystem semaphore ->mk_users->sem, as we need support for atomic
++ * search+insert along with proper synchronization with ->mk_secret.)
+ */
+ struct key *mk_users;
+
+- /*
+- * Length of ->mk_decrypted_inodes, plus one if mk_secret is present.
+- * Once this goes to 0, the master key is removed from ->s_master_keys.
+- * The 'struct fscrypt_master_key' will continue to live as long as the
+- * 'struct key' whose payload it is, but we won't let this reference
+- * count rise again.
+- */
+- refcount_t mk_refcount;
+-
+ /*
+ * List of inodes that were unlocked using this key. This allows the
+ * inodes to be evicted efficiently if the key is removed.
+@@ -500,10 +527,10 @@ static inline bool
+ is_master_key_secret_present(const struct fscrypt_master_key_secret *secret)
+ {
+ /*
+- * The READ_ONCE() is only necessary for fscrypt_drop_inode() and
+- * fscrypt_key_describe(). These run in atomic context, so they can't
+- * take the key semaphore and thus 'secret' can change concurrently
+- * which would be a data race. But they only need to know whether the
++ * The READ_ONCE() is only necessary for fscrypt_drop_inode().
++ * fscrypt_drop_inode() runs in atomic context, so it can't take the key
++ * semaphore and thus 'secret' can change concurrently which would be a
++ * data race. But fscrypt_drop_inode() only need to know whether the
+ * secret *was* present at the time of check, so READ_ONCE() suffices.
+ */
+ return READ_ONCE(secret->size) != 0;
+@@ -532,7 +559,11 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec)
+ return 0;
+ }
+
+-struct key *
++void fscrypt_put_master_key(struct fscrypt_master_key *mk);
++
++void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk);
++
++struct fscrypt_master_key *
+ fscrypt_find_master_key(struct super_block *sb,
+ const struct fscrypt_key_specifier *mk_spec);
+
+@@ -549,8 +580,9 @@ int __init fscrypt_init_keyring(void);
+ struct fscrypt_mode {
+ const char *friendly_name;
+ const char *cipher_str;
+- int keysize;
+- int ivsize;
++ int keysize; /* key size in bytes */
++ int security_strength; /* security strength in bytes */
++ int ivsize; /* IV size in bytes */
+ int logged_impl_name;
+ enum blk_crypto_mode_num blk_crypto_mode;
+ };
+diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c
+index e0ec210555053..7607d18b35fc0 100644
+--- a/fs/crypto/hkdf.c
++++ b/fs/crypto/hkdf.c
+@@ -16,9 +16,14 @@
+
+ /*
+ * HKDF supports any unkeyed cryptographic hash algorithm, but fscrypt uses
+- * SHA-512 because it is reasonably secure and efficient; and since it produces
+- * a 64-byte digest, deriving an AES-256-XTS key preserves all 64 bytes of
+- * entropy from the master key and requires only one iteration of HKDF-Expand.
++ * SHA-512 because it is well-established, secure, and reasonably efficient.
++ *
++ * HKDF-SHA256 was also considered, as its 256-bit security strength would be
++ * sufficient here. A 512-bit security strength is "nice to have", though.
++ * Also, on 64-bit CPUs, SHA-512 is usually just as fast as SHA-256. In the
++ * common case of deriving an AES-256-XTS key (512 bits), that can result in
++ * HKDF-SHA512 being much faster than HKDF-SHA256, as the longer digest size of
++ * SHA-512 causes HKDF-Expand to only need to do one iteration rather than two.
+ */
+ #define HKDF_HMAC_ALG "hmac(sha512)"
+ #define HKDF_HASHLEN SHA512_DIGEST_SIZE
+diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
+index af74599ae1cf0..be5c650e49573 100644
+--- a/fs/crypto/hooks.c
++++ b/fs/crypto/hooks.c
+@@ -5,8 +5,6 @@
+ * Encryption hooks for higher-level filesystem operations.
+ */
+
+-#include <linux/key.h>
+-
+ #include "fscrypt_private.h"
+
+ /**
+@@ -142,7 +140,6 @@ int fscrypt_prepare_setflags(struct inode *inode,
+ unsigned int oldflags, unsigned int flags)
+ {
+ struct fscrypt_info *ci;
+- struct key *key;
+ struct fscrypt_master_key *mk;
+ int err;
+
+@@ -158,14 +155,13 @@ int fscrypt_prepare_setflags(struct inode *inode,
+ ci = inode->i_crypt_info;
+ if (ci->ci_policy.version != FSCRYPT_POLICY_V2)
+ return -EINVAL;
+- key = ci->ci_master_key;
+- mk = key->payload.data[0];
+- down_read(&key->sem);
++ mk = ci->ci_master_key;
++ down_read(&mk->mk_sem);
+ if (is_master_key_secret_present(&mk->mk_secret))
+ err = fscrypt_derive_dirhash_key(ci, mk);
+ else
+ err = -ENOKEY;
+- up_read(&key->sem);
++ up_read(&mk->mk_sem);
+ return err;
+ }
+ return 0;
+diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
+index 0b3ffbb4faf4a..02f8bf8bd54da 100644
+--- a/fs/crypto/keyring.c
++++ b/fs/crypto/keyring.c
+@@ -18,6 +18,7 @@
+ * information about these ioctls.
+ */
+
++#include <asm/unaligned.h>
+ #include <crypto/skcipher.h>
+ #include <linux/key-type.h>
+ #include <linux/random.h>
+@@ -25,6 +26,18 @@
+
+ #include "fscrypt_private.h"
+
++/* The master encryption keys for a filesystem (->s_master_keys) */
++struct fscrypt_keyring {
++ /*
++ * Lock that protects ->key_hashtable. It does *not* protect the
++ * fscrypt_master_key structs themselves.
++ */
++ spinlock_t lock;
++
++ /* Hash table that maps fscrypt_key_specifier to fscrypt_master_key */
++ struct hlist_head key_hashtable[128];
++};
++
+ static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret)
+ {
+ fscrypt_destroy_hkdf(&secret->hkdf);
+@@ -38,20 +51,70 @@ static void move_master_key_secret(struct fscrypt_master_key_secret *dst,
+ memzero_explicit(src, sizeof(*src));
+ }
+
+-static void free_master_key(struct fscrypt_master_key *mk)
++static void fscrypt_free_master_key(struct rcu_head *head)
++{
++ struct fscrypt_master_key *mk =
++ container_of(head, struct fscrypt_master_key, mk_rcu_head);
++ /*
++ * The master key secret and any embedded subkeys should have already
++ * been wiped when the last active reference to the fscrypt_master_key
++ * struct was dropped; doing it here would be unnecessarily late.
++ * Nevertheless, use kfree_sensitive() in case anything was missed.
++ */
++ kfree_sensitive(mk);
++}
++
++void fscrypt_put_master_key(struct fscrypt_master_key *mk)
++{
++ if (!refcount_dec_and_test(&mk->mk_struct_refs))
++ return;
++ /*
++ * No structural references left, so free ->mk_users, and also free the
++ * fscrypt_master_key struct itself after an RCU grace period ensures
++ * that concurrent keyring lookups can no longer find it.
++ */
++ WARN_ON(refcount_read(&mk->mk_active_refs) != 0);
++ key_put(mk->mk_users);
++ mk->mk_users = NULL;
++ call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key);
++}
++
++void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk)
+ {
++ struct super_block *sb = mk->mk_sb;
++ struct fscrypt_keyring *keyring = sb->s_master_keys;
+ size_t i;
+
+- wipe_master_key_secret(&mk->mk_secret);
++ if (!refcount_dec_and_test(&mk->mk_active_refs))
++ return;
++ /*
++ * No active references left, so complete the full removal of this
++ * fscrypt_master_key struct by removing it from the keyring and
++ * destroying any subkeys embedded in it.
++ */
++
++ spin_lock(&keyring->lock);
++ hlist_del_rcu(&mk->mk_node);
++ spin_unlock(&keyring->lock);
++
++ /*
++ * ->mk_active_refs == 0 implies that ->mk_secret is not present and
++ * that ->mk_decrypted_inodes is empty.
++ */
++ WARN_ON(is_master_key_secret_present(&mk->mk_secret));
++ WARN_ON(!list_empty(&mk->mk_decrypted_inodes));
+
+ for (i = 0; i <= FSCRYPT_MODE_MAX; i++) {
+ fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]);
+ fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]);
+ fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]);
+ }
++ memzero_explicit(&mk->mk_ino_hash_key,
++ sizeof(mk->mk_ino_hash_key));
++ mk->mk_ino_hash_key_initialized = false;
+
+- key_put(mk->mk_users);
+- kfree_sensitive(mk);
++ /* Drop the structural ref associated with the active refs. */
++ fscrypt_put_master_key(mk);
+ }
+
+ static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec)
+@@ -61,44 +124,6 @@ static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec)
+ return master_key_spec_len(spec) != 0;
+ }
+
+-static int fscrypt_key_instantiate(struct key *key,
+- struct key_preparsed_payload *prep)
+-{
+- key->payload.data[0] = (struct fscrypt_master_key *)prep->data;
+- return 0;
+-}
+-
+-static void fscrypt_key_destroy(struct key *key)
+-{
+- free_master_key(key->payload.data[0]);
+-}
+-
+-static void fscrypt_key_describe(const struct key *key, struct seq_file *m)
+-{
+- seq_puts(m, key->description);
+-
+- if (key_is_positive(key)) {
+- const struct fscrypt_master_key *mk = key->payload.data[0];
+-
+- if (!is_master_key_secret_present(&mk->mk_secret))
+- seq_puts(m, ": secret removed");
+- }
+-}
+-
+-/*
+- * Type of key in ->s_master_keys. Each key of this type represents a master
+- * key which has been added to the filesystem. Its payload is a
+- * 'struct fscrypt_master_key'. The "." prefix in the key type name prevents
+- * users from adding keys of this type via the keyrings syscalls rather than via
+- * the intended method of FS_IOC_ADD_ENCRYPTION_KEY.
+- */
+-static struct key_type key_type_fscrypt = {
+- .name = "._fscrypt",
+- .instantiate = fscrypt_key_instantiate,
+- .destroy = fscrypt_key_destroy,
+- .describe = fscrypt_key_describe,
+-};
+-
+ static int fscrypt_user_key_instantiate(struct key *key,
+ struct key_preparsed_payload *prep)
+ {
+@@ -131,32 +156,6 @@ static struct key_type key_type_fscrypt_user = {
+ .describe = fscrypt_user_key_describe,
+ };
+
+-/* Search ->s_master_keys or ->mk_users */
+-static struct key *search_fscrypt_keyring(struct key *keyring,
+- struct key_type *type,
+- const char *description)
+-{
+- /*
+- * We need to mark the keyring reference as "possessed" so that we
+- * acquire permission to search it, via the KEY_POS_SEARCH permission.
+- */
+- key_ref_t keyref = make_key_ref(keyring, true /* possessed */);
+-
+- keyref = keyring_search(keyref, type, description, false);
+- if (IS_ERR(keyref)) {
+- if (PTR_ERR(keyref) == -EAGAIN || /* not found */
+- PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */
+- keyref = ERR_PTR(-ENOKEY);
+- return ERR_CAST(keyref);
+- }
+- return key_ref_to_ptr(keyref);
+-}
+-
+-#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \
+- (CONST_STRLEN("fscrypt-") + sizeof_field(struct super_block, s_id))
+-
+-#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
+-
+ #define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \
+ (CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \
+ CONST_STRLEN("-users") + 1)
+@@ -164,21 +163,6 @@ static struct key *search_fscrypt_keyring(struct key *keyring,
+ #define FSCRYPT_MK_USER_DESCRIPTION_SIZE \
+ (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1)
+
+-static void format_fs_keyring_description(
+- char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE],
+- const struct super_block *sb)
+-{
+- sprintf(description, "fscrypt-%s", sb->s_id);
+-}
+-
+-static void format_mk_description(
+- char description[FSCRYPT_MK_DESCRIPTION_SIZE],
+- const struct fscrypt_key_specifier *mk_spec)
+-{
+- sprintf(description, "%*phN",
+- master_key_spec_len(mk_spec), (u8 *)&mk_spec->u);
+-}
+-
+ static void format_mk_users_keyring_description(
+ char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE],
+ const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
+@@ -199,20 +183,15 @@ static void format_mk_user_description(
+ /* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */
+ static int allocate_filesystem_keyring(struct super_block *sb)
+ {
+- char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE];
+- struct key *keyring;
++ struct fscrypt_keyring *keyring;
+
+ if (sb->s_master_keys)
+ return 0;
+
+- format_fs_keyring_description(description, sb);
+- keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+- current_cred(), KEY_POS_SEARCH |
+- KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW,
+- KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+- if (IS_ERR(keyring))
+- return PTR_ERR(keyring);
+-
++ keyring = kzalloc(sizeof(*keyring), GFP_KERNEL);
++ if (!keyring)
++ return -ENOMEM;
++ spin_lock_init(&keyring->lock);
+ /*
+ * Pairs with the smp_load_acquire() in fscrypt_find_master_key().
+ * I.e., here we publish ->s_master_keys with a RELEASE barrier so that
+@@ -222,21 +201,80 @@ static int allocate_filesystem_keyring(struct super_block *sb)
+ return 0;
+ }
+
+-void fscrypt_sb_free(struct super_block *sb)
++/*
++ * Release all encryption keys that have been added to the filesystem, along
++ * with the keyring that contains them.
++ *
++ * This is called at unmount time. The filesystem's underlying block device(s)
++ * are still available at this time; this is important because after user file
++ * accesses have been allowed, this function may need to evict keys from the
++ * keyslots of an inline crypto engine, which requires the block device(s).
++ *
++ * This is also called when the super_block is being freed. This is needed to
++ * avoid a memory leak if mounting fails after the "test_dummy_encryption"
++ * option was processed, as in that case the unmount-time call isn't made.
++ */
++void fscrypt_destroy_keyring(struct super_block *sb)
+ {
+- key_put(sb->s_master_keys);
++ struct fscrypt_keyring *keyring = sb->s_master_keys;
++ size_t i;
++
++ if (!keyring)
++ return;
++
++ for (i = 0; i < ARRAY_SIZE(keyring->key_hashtable); i++) {
++ struct hlist_head *bucket = &keyring->key_hashtable[i];
++ struct fscrypt_master_key *mk;
++ struct hlist_node *tmp;
++
++ hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) {
++ /*
++ * Since all inodes were already evicted, every key
++ * remaining in the keyring should have an empty inode
++ * list, and should only still be in the keyring due to
++ * the single active ref associated with ->mk_secret.
++ * There should be no structural refs beyond the one
++ * associated with the active ref.
++ */
++ WARN_ON(refcount_read(&mk->mk_active_refs) != 1);
++ WARN_ON(refcount_read(&mk->mk_struct_refs) != 1);
++ WARN_ON(!is_master_key_secret_present(&mk->mk_secret));
++ wipe_master_key_secret(&mk->mk_secret);
++ fscrypt_put_master_key_activeref(mk);
++ }
++ }
++ kfree_sensitive(keyring);
+ sb->s_master_keys = NULL;
+ }
+
++static struct hlist_head *
++fscrypt_mk_hash_bucket(struct fscrypt_keyring *keyring,
++ const struct fscrypt_key_specifier *mk_spec)
++{
++ /*
++ * Since key specifiers should be "random" values, it is sufficient to
++ * use a trivial hash function that just takes the first several bits of
++ * the key specifier.
++ */
++ unsigned long i = get_unaligned((unsigned long *)&mk_spec->u);
++
++ return &keyring->key_hashtable[i % ARRAY_SIZE(keyring->key_hashtable)];
++}
++
+ /*
+- * Find the specified master key in ->s_master_keys.
+- * Returns ERR_PTR(-ENOKEY) if not found.
++ * Find the specified master key struct in ->s_master_keys and take a structural
++ * ref to it. The structural ref guarantees that the key struct continues to
++ * exist, but it does *not* guarantee that ->s_master_keys continues to contain
++ * the key struct. The structural ref needs to be dropped by
++ * fscrypt_put_master_key(). Returns NULL if the key struct is not found.
+ */
+-struct key *fscrypt_find_master_key(struct super_block *sb,
+- const struct fscrypt_key_specifier *mk_spec)
++struct fscrypt_master_key *
++fscrypt_find_master_key(struct super_block *sb,
++ const struct fscrypt_key_specifier *mk_spec)
+ {
+- struct key *keyring;
+- char description[FSCRYPT_MK_DESCRIPTION_SIZE];
++ struct fscrypt_keyring *keyring;
++ struct hlist_head *bucket;
++ struct fscrypt_master_key *mk;
+
+ /*
+ * Pairs with the smp_store_release() in allocate_filesystem_keyring().
+@@ -246,10 +284,38 @@ struct key *fscrypt_find_master_key(struct super_block *sb,
+ */
+ keyring = smp_load_acquire(&sb->s_master_keys);
+ if (keyring == NULL)
+- return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */
+-
+- format_mk_description(description, mk_spec);
+- return search_fscrypt_keyring(keyring, &key_type_fscrypt, description);
++ return NULL; /* No keyring yet, so no keys yet. */
++
++ bucket = fscrypt_mk_hash_bucket(keyring, mk_spec);
++ rcu_read_lock();
++ switch (mk_spec->type) {
++ case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
++ hlist_for_each_entry_rcu(mk, bucket, mk_node) {
++ if (mk->mk_spec.type ==
++ FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR &&
++ memcmp(mk->mk_spec.u.descriptor,
++ mk_spec->u.descriptor,
++ FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
++ refcount_inc_not_zero(&mk->mk_struct_refs))
++ goto out;
++ }
++ break;
++ case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
++ hlist_for_each_entry_rcu(mk, bucket, mk_node) {
++ if (mk->mk_spec.type ==
++ FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER &&
++ memcmp(mk->mk_spec.u.identifier,
++ mk_spec->u.identifier,
++ FSCRYPT_KEY_IDENTIFIER_SIZE) == 0 &&
++ refcount_inc_not_zero(&mk->mk_struct_refs))
++ goto out;
++ }
++ break;
++ }
++ mk = NULL;
++out:
++ rcu_read_unlock();
++ return mk;
+ }
+
+ static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk)
+@@ -277,17 +343,30 @@ static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk)
+ static struct key *find_master_key_user(struct fscrypt_master_key *mk)
+ {
+ char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE];
++ key_ref_t keyref;
+
+ format_mk_user_description(description, mk->mk_spec.u.identifier);
+- return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user,
+- description);
++
++ /*
++ * We need to mark the keyring reference as "possessed" so that we
++ * acquire permission to search it, via the KEY_POS_SEARCH permission.
++ */
++ keyref = keyring_search(make_key_ref(mk->mk_users, true /*possessed*/),
++ &key_type_fscrypt_user, description, false);
++ if (IS_ERR(keyref)) {
++ if (PTR_ERR(keyref) == -EAGAIN || /* not found */
++ PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */
++ keyref = ERR_PTR(-ENOKEY);
++ return ERR_CAST(keyref);
++ }
++ return key_ref_to_ptr(keyref);
+ }
+
+ /*
+ * Give the current user a "key" in ->mk_users. This charges the user's quota
+ * and marks the master key as added by the current user, so that it cannot be
+- * removed by another user with the key. Either the master key's key->sem must
+- * be held for write, or the master key must be still undergoing initialization.
++ * removed by another user with the key. Either ->mk_sem must be held for
++ * write, or the master key must be still undergoing initialization.
+ */
+ static int add_master_key_user(struct fscrypt_master_key *mk)
+ {
+@@ -309,7 +388,7 @@ static int add_master_key_user(struct fscrypt_master_key *mk)
+
+ /*
+ * Remove the current user's "key" from ->mk_users.
+- * The master key's key->sem must be held for write.
++ * ->mk_sem must be held for write.
+ *
+ * Returns 0 if removed, -ENOKEY if not found, or another -errno code.
+ */
+@@ -327,63 +406,49 @@ static int remove_master_key_user(struct fscrypt_master_key *mk)
+ }
+
+ /*
+- * Allocate a new fscrypt_master_key which contains the given secret, set it as
+- * the payload of a new 'struct key' of type fscrypt, and link the 'struct key'
+- * into the given keyring. Synchronized by fscrypt_add_key_mutex.
++ * Allocate a new fscrypt_master_key, transfer the given secret over to it, and
++ * insert it into sb->s_master_keys.
+ */
+-static int add_new_master_key(struct fscrypt_master_key_secret *secret,
+- const struct fscrypt_key_specifier *mk_spec,
+- struct key *keyring)
++static int add_new_master_key(struct super_block *sb,
++ struct fscrypt_master_key_secret *secret,
++ const struct fscrypt_key_specifier *mk_spec)
+ {
++ struct fscrypt_keyring *keyring = sb->s_master_keys;
+ struct fscrypt_master_key *mk;
+- char description[FSCRYPT_MK_DESCRIPTION_SIZE];
+- struct key *key;
+ int err;
+
+ mk = kzalloc(sizeof(*mk), GFP_KERNEL);
+ if (!mk)
+ return -ENOMEM;
+
++ mk->mk_sb = sb;
++ init_rwsem(&mk->mk_sem);
++ refcount_set(&mk->mk_struct_refs, 1);
+ mk->mk_spec = *mk_spec;
+
+- move_master_key_secret(&mk->mk_secret, secret);
+-
+- refcount_set(&mk->mk_refcount, 1); /* secret is present */
+ INIT_LIST_HEAD(&mk->mk_decrypted_inodes);
+ spin_lock_init(&mk->mk_decrypted_inodes_lock);
+
+ if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
+ err = allocate_master_key_users_keyring(mk);
+ if (err)
+- goto out_free_mk;
++ goto out_put;
+ err = add_master_key_user(mk);
+ if (err)
+- goto out_free_mk;
++ goto out_put;
+ }
+
+- /*
+- * Note that we don't charge this key to anyone's quota, since when
+- * ->mk_users is in use those keys are charged instead, and otherwise
+- * (when ->mk_users isn't in use) only root can add these keys.
+- */
+- format_mk_description(description, mk_spec);
+- key = key_alloc(&key_type_fscrypt, description,
+- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+- KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW,
+- KEY_ALLOC_NOT_IN_QUOTA, NULL);
+- if (IS_ERR(key)) {
+- err = PTR_ERR(key);
+- goto out_free_mk;
+- }
+- err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL);
+- key_put(key);
+- if (err)
+- goto out_free_mk;
++ move_master_key_secret(&mk->mk_secret, secret);
++ refcount_set(&mk->mk_active_refs, 1); /* ->mk_secret is present */
+
++ spin_lock(&keyring->lock);
++ hlist_add_head_rcu(&mk->mk_node,
++ fscrypt_mk_hash_bucket(keyring, mk_spec));
++ spin_unlock(&keyring->lock);
+ return 0;
+
+-out_free_mk:
+- free_master_key(mk);
++out_put:
++ fscrypt_put_master_key(mk);
+ return err;
+ }
+
+@@ -392,42 +457,34 @@ out_free_mk:
+ static int add_existing_master_key(struct fscrypt_master_key *mk,
+ struct fscrypt_master_key_secret *secret)
+ {
+- struct key *mk_user;
+- bool rekey;
+ int err;
+
+ /*
+ * If the current user is already in ->mk_users, then there's nothing to
+- * do. (Not applicable for v1 policy keys, which have NULL ->mk_users.)
++ * do. Otherwise, we need to add the user to ->mk_users. (Neither is
++ * applicable for v1 policy keys, which have NULL ->mk_users.)
+ */
+ if (mk->mk_users) {
+- mk_user = find_master_key_user(mk);
++ struct key *mk_user = find_master_key_user(mk);
++
+ if (mk_user != ERR_PTR(-ENOKEY)) {
+ if (IS_ERR(mk_user))
+ return PTR_ERR(mk_user);
+ key_put(mk_user);
+ return 0;
+ }
+- }
+-
+- /* If we'll be re-adding ->mk_secret, try to take the reference. */
+- rekey = !is_master_key_secret_present(&mk->mk_secret);
+- if (rekey && !refcount_inc_not_zero(&mk->mk_refcount))
+- return KEY_DEAD;
+-
+- /* Add the current user to ->mk_users, if applicable. */
+- if (mk->mk_users) {
+ err = add_master_key_user(mk);
+- if (err) {
+- if (rekey && refcount_dec_and_test(&mk->mk_refcount))
+- return KEY_DEAD;
++ if (err)
+ return err;
+- }
+ }
+
+ /* Re-add the secret if needed. */
+- if (rekey)
++ if (!is_master_key_secret_present(&mk->mk_secret)) {
++ if (!refcount_inc_not_zero(&mk->mk_active_refs))
++ return KEY_DEAD;
+ move_master_key_secret(&mk->mk_secret, secret);
++ }
++
+ return 0;
+ }
+
+@@ -436,38 +493,36 @@ static int do_add_master_key(struct super_block *sb,
+ const struct fscrypt_key_specifier *mk_spec)
+ {
+ static DEFINE_MUTEX(fscrypt_add_key_mutex);
+- struct key *key;
++ struct fscrypt_master_key *mk;
+ int err;
+
+ mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */
+-retry:
+- key = fscrypt_find_master_key(sb, mk_spec);
+- if (IS_ERR(key)) {
+- err = PTR_ERR(key);
+- if (err != -ENOKEY)
+- goto out_unlock;
++
++ mk = fscrypt_find_master_key(sb, mk_spec);
++ if (!mk) {
+ /* Didn't find the key in ->s_master_keys. Add it. */
+ err = allocate_filesystem_keyring(sb);
+- if (err)
+- goto out_unlock;
+- err = add_new_master_key(secret, mk_spec, sb->s_master_keys);
++ if (!err)
++ err = add_new_master_key(sb, secret, mk_spec);
+ } else {
+ /*
+ * Found the key in ->s_master_keys. Re-add the secret if
+ * needed, and add the user to ->mk_users if needed.
+ */
+- down_write(&key->sem);
+- err = add_existing_master_key(key->payload.data[0], secret);
+- up_write(&key->sem);
++ down_write(&mk->mk_sem);
++ err = add_existing_master_key(mk, secret);
++ up_write(&mk->mk_sem);
+ if (err == KEY_DEAD) {
+- /* Key being removed or needs to be removed */
+- key_invalidate(key);
+- key_put(key);
+- goto retry;
++ /*
++ * We found a key struct, but it's already been fully
++ * removed. Ignore the old struct and add a new one.
++ * fscrypt_add_key_mutex means we don't need to worry
++ * about concurrent adds.
++ */
++ err = add_new_master_key(sb, secret, mk_spec);
+ }
+- key_put(key);
++ fscrypt_put_master_key(mk);
+ }
+-out_unlock:
+ mutex_unlock(&fscrypt_add_key_mutex);
+ return err;
+ }
+@@ -731,19 +786,19 @@ int fscrypt_verify_key_added(struct super_block *sb,
+ const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
+ {
+ struct fscrypt_key_specifier mk_spec;
+- struct key *key, *mk_user;
+ struct fscrypt_master_key *mk;
++ struct key *mk_user;
+ int err;
+
+ mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
+ memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE);
+
+- key = fscrypt_find_master_key(sb, &mk_spec);
+- if (IS_ERR(key)) {
+- err = PTR_ERR(key);
++ mk = fscrypt_find_master_key(sb, &mk_spec);
++ if (!mk) {
++ err = -ENOKEY;
+ goto out;
+ }
+- mk = key->payload.data[0];
++ down_read(&mk->mk_sem);
+ mk_user = find_master_key_user(mk);
+ if (IS_ERR(mk_user)) {
+ err = PTR_ERR(mk_user);
+@@ -751,7 +806,8 @@ int fscrypt_verify_key_added(struct super_block *sb,
+ key_put(mk_user);
+ err = 0;
+ }
+- key_put(key);
++ up_read(&mk->mk_sem);
++ fscrypt_put_master_key(mk);
+ out:
+ if (err == -ENOKEY && capable(CAP_FOWNER))
+ err = 0;
+@@ -913,11 +969,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
+ struct super_block *sb = file_inode(filp)->i_sb;
+ struct fscrypt_remove_key_arg __user *uarg = _uarg;
+ struct fscrypt_remove_key_arg arg;
+- struct key *key;
+ struct fscrypt_master_key *mk;
+ u32 status_flags = 0;
+ int err;
+- bool dead;
++ bool inodes_remain;
+
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+@@ -937,12 +992,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
+ return -EACCES;
+
+ /* Find the key being removed. */
+- key = fscrypt_find_master_key(sb, &arg.key_spec);
+- if (IS_ERR(key))
+- return PTR_ERR(key);
+- mk = key->payload.data[0];
+-
+- down_write(&key->sem);
++ mk = fscrypt_find_master_key(sb, &arg.key_spec);
++ if (!mk)
++ return -ENOKEY;
++ down_write(&mk->mk_sem);
+
+ /* If relevant, remove current user's (or all users) claim to the key */
+ if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) {
+@@ -951,7 +1004,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
+ else
+ err = remove_master_key_user(mk);
+ if (err) {
+- up_write(&key->sem);
++ up_write(&mk->mk_sem);
+ goto out_put_key;
+ }
+ if (mk->mk_users->keys.nr_leaves_on_tree != 0) {
+@@ -963,26 +1016,22 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
+ status_flags |=
+ FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS;
+ err = 0;
+- up_write(&key->sem);
++ up_write(&mk->mk_sem);
+ goto out_put_key;
+ }
+ }
+
+ /* No user claims remaining. Go ahead and wipe the secret. */
+- dead = false;
++ err = -ENOKEY;
+ if (is_master_key_secret_present(&mk->mk_secret)) {
+ wipe_master_key_secret(&mk->mk_secret);
+- dead = refcount_dec_and_test(&mk->mk_refcount);
+- }
+- up_write(&key->sem);
+- if (dead) {
+- /*
+- * No inodes reference the key, and we wiped the secret, so the
+- * key object is free to be removed from the keyring.
+- */
+- key_invalidate(key);
++ fscrypt_put_master_key_activeref(mk);
+ err = 0;
+- } else {
++ }
++ inodes_remain = refcount_read(&mk->mk_active_refs) > 0;
++ up_write(&mk->mk_sem);
++
++ if (inodes_remain) {
+ /* Some inodes still reference this key; try to evict them. */
+ err = try_to_lock_encrypted_files(sb, mk);
+ if (err == -EBUSY) {
+@@ -998,7 +1047,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
+ * has been fully removed including all files locked.
+ */
+ out_put_key:
+- key_put(key);
++ fscrypt_put_master_key(mk);
+ if (err == 0)
+ err = put_user(status_flags, &uarg->removal_status_flags);
+ return err;
+@@ -1045,7 +1094,6 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
+ {
+ struct super_block *sb = file_inode(filp)->i_sb;
+ struct fscrypt_get_key_status_arg arg;
+- struct key *key;
+ struct fscrypt_master_key *mk;
+ int err;
+
+@@ -1062,19 +1110,18 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
+ arg.user_count = 0;
+ memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved));
+
+- key = fscrypt_find_master_key(sb, &arg.key_spec);
+- if (IS_ERR(key)) {
+- if (key != ERR_PTR(-ENOKEY))
+- return PTR_ERR(key);
++ mk = fscrypt_find_master_key(sb, &arg.key_spec);
++ if (!mk) {
+ arg.status = FSCRYPT_KEY_STATUS_ABSENT;
+ err = 0;
+ goto out;
+ }
+- mk = key->payload.data[0];
+- down_read(&key->sem);
++ down_read(&mk->mk_sem);
+
+ if (!is_master_key_secret_present(&mk->mk_secret)) {
+- arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED;
++ arg.status = refcount_read(&mk->mk_active_refs) > 0 ?
++ FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED :
++ FSCRYPT_KEY_STATUS_ABSENT /* raced with full removal */;
+ err = 0;
+ goto out_release_key;
+ }
+@@ -1096,8 +1143,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
+ }
+ err = 0;
+ out_release_key:
+- up_read(&key->sem);
+- key_put(key);
++ up_read(&mk->mk_sem);
++ fscrypt_put_master_key(mk);
+ out:
+ if (!err && copy_to_user(uarg, &arg, sizeof(arg)))
+ err = -EFAULT;
+@@ -1109,13 +1156,9 @@ int __init fscrypt_init_keyring(void)
+ {
+ int err;
+
+- err = register_key_type(&key_type_fscrypt);
+- if (err)
+- return err;
+-
+ err = register_key_type(&key_type_fscrypt_user);
+ if (err)
+- goto err_unregister_fscrypt;
++ return err;
+
+ err = register_key_type(&key_type_fscrypt_provisioning);
+ if (err)
+@@ -1125,7 +1168,5 @@ int __init fscrypt_init_keyring(void)
+
+ err_unregister_fscrypt_user:
+ unregister_key_type(&key_type_fscrypt_user);
+-err_unregister_fscrypt:
+- unregister_key_type(&key_type_fscrypt);
+ return err;
+ }
+diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
+index bca9c6658a7c5..c3fbd594cc79e 100644
+--- a/fs/crypto/keysetup.c
++++ b/fs/crypto/keysetup.c
+@@ -9,7 +9,6 @@
+ */
+
+ #include <crypto/skcipher.h>
+-#include <linux/key.h>
+ #include <linux/random.h>
+
+ #include "fscrypt_private.h"
+@@ -19,6 +18,7 @@ struct fscrypt_mode fscrypt_modes[] = {
+ .friendly_name = "AES-256-XTS",
+ .cipher_str = "xts(aes)",
+ .keysize = 64,
++ .security_strength = 32,
+ .ivsize = 16,
+ .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS,
+ },
+@@ -26,12 +26,14 @@ struct fscrypt_mode fscrypt_modes[] = {
+ .friendly_name = "AES-256-CTS-CBC",
+ .cipher_str = "cts(cbc(aes))",
+ .keysize = 32,
++ .security_strength = 32,
+ .ivsize = 16,
+ },
+ [FSCRYPT_MODE_AES_128_CBC] = {
+ .friendly_name = "AES-128-CBC-ESSIV",
+ .cipher_str = "essiv(cbc(aes),sha256)",
+ .keysize = 16,
++ .security_strength = 16,
+ .ivsize = 16,
+ .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,
+ },
+@@ -39,12 +41,14 @@ struct fscrypt_mode fscrypt_modes[] = {
+ .friendly_name = "AES-128-CTS-CBC",
+ .cipher_str = "cts(cbc(aes))",
+ .keysize = 16,
++ .security_strength = 16,
+ .ivsize = 16,
+ },
+ [FSCRYPT_MODE_ADIANTUM] = {
+ .friendly_name = "Adiantum",
+ .cipher_str = "adiantum(xchacha12,aes)",
+ .keysize = 32,
++ .security_strength = 32,
+ .ivsize = 32,
+ .blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM,
+ },
+@@ -146,6 +150,7 @@ void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key)
+ {
+ crypto_free_skcipher(prep_key->tfm);
+ fscrypt_destroy_inline_crypt_key(prep_key);
++ memzero_explicit(prep_key, sizeof(*prep_key));
+ }
+
+ /* Given a per-file encryption key, set up the file's crypto transform object */
+@@ -357,23 +362,60 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
+ return 0;
+ }
+
++/*
++ * Check whether the size of the given master key (@mk) is appropriate for the
++ * encryption settings which a particular file will use (@ci).
++ *
++ * If the file uses a v1 encryption policy, then the master key must be at least
++ * as long as the derived key, as this is a requirement of the v1 KDF.
++ *
++ * Otherwise, the KDF can accept any size key, so we enforce a slightly looser
++ * requirement: we require that the size of the master key be at least the
++ * maximum security strength of any algorithm whose key will be derived from it
++ * (but in practice we only need to consider @ci->ci_mode, since any other
++ * possible subkeys such as DIRHASH and INODE_HASH will never increase the
++ * required key size over @ci->ci_mode). This allows AES-256-XTS keys to be
++ * derived from a 256-bit master key, which is cryptographically sufficient,
++ * rather than requiring a 512-bit master key which is unnecessarily long. (We
++ * still allow 512-bit master keys if the user chooses to use them, though.)
++ */
++static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk,
++ const struct fscrypt_info *ci)
++{
++ unsigned int min_keysize;
++
++ if (ci->ci_policy.version == FSCRYPT_POLICY_V1)
++ min_keysize = ci->ci_mode->keysize;
++ else
++ min_keysize = ci->ci_mode->security_strength;
++
++ if (mk->mk_secret.size < min_keysize) {
++ fscrypt_warn(NULL,
++ "key with %s %*phN is too short (got %u bytes, need %u+ bytes)",
++ master_key_spec_type(&mk->mk_spec),
++ master_key_spec_len(&mk->mk_spec),
++ (u8 *)&mk->mk_spec.u,
++ mk->mk_secret.size, min_keysize);
++ return false;
++ }
++ return true;
++}
++
+ /*
+ * Find the master key, then set up the inode's actual encryption key.
+ *
+- * If the master key is found in the filesystem-level keyring, then the
+- * corresponding 'struct key' is returned in *master_key_ret with its semaphore
+- * read-locked. This is needed to ensure that only one task links the
+- * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create
+- * an fscrypt_info for the same inode), and to synchronize the master key being
+- * removed with a new inode starting to use it.
++ * If the master key is found in the filesystem-level keyring, then it is
++ * returned in *mk_ret with its semaphore read-locked. This is needed to ensure
++ * that only one task links the fscrypt_info into ->mk_decrypted_inodes (as
++ * multiple tasks may race to create an fscrypt_info for the same inode), and to
++ * synchronize the master key being removed with a new inode starting to use it.
+ */
+ static int setup_file_encryption_key(struct fscrypt_info *ci,
+ bool need_dirhash_key,
+- struct key **master_key_ret)
++ struct fscrypt_master_key **mk_ret)
+ {
+- struct key *key;
+- struct fscrypt_master_key *mk = NULL;
+ struct fscrypt_key_specifier mk_spec;
++ struct fscrypt_master_key *mk;
+ int err;
+
+ err = fscrypt_select_encryption_impl(ci);
+@@ -398,11 +440,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
+ return -EINVAL;
+ }
+
+- key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
+- if (IS_ERR(key)) {
+- if (key != ERR_PTR(-ENOKEY) ||
+- ci->ci_policy.version != FSCRYPT_POLICY_V1)
+- return PTR_ERR(key);
++ mk = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
++ if (!mk) {
++ if (ci->ci_policy.version != FSCRYPT_POLICY_V1)
++ return -ENOKEY;
+
+ /*
+ * As a legacy fallback for v1 policies, search for the key in
+@@ -412,9 +453,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
+ */
+ return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
+ }
+-
+- mk = key->payload.data[0];
+- down_read(&key->sem);
++ down_read(&mk->mk_sem);
+
+ /* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */
+ if (!is_master_key_secret_present(&mk->mk_secret)) {
+@@ -422,18 +461,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
+ goto out_release_key;
+ }
+
+- /*
+- * Require that the master key be at least as long as the derived key.
+- * Otherwise, the derived key cannot possibly contain as much entropy as
+- * that required by the encryption mode it will be used for. For v1
+- * policies it's also required for the KDF to work at all.
+- */
+- if (mk->mk_secret.size < ci->ci_mode->keysize) {
+- fscrypt_warn(NULL,
+- "key with %s %*phN is too short (got %u bytes, need %u+ bytes)",
+- master_key_spec_type(&mk_spec),
+- master_key_spec_len(&mk_spec), (u8 *)&mk_spec.u,
+- mk->mk_secret.size, ci->ci_mode->keysize);
++ if (!fscrypt_valid_master_key_size(mk, ci)) {
+ err = -ENOKEY;
+ goto out_release_key;
+ }
+@@ -453,18 +481,18 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
+ if (err)
+ goto out_release_key;
+
+- *master_key_ret = key;
++ *mk_ret = mk;
+ return 0;
+
+ out_release_key:
+- up_read(&key->sem);
+- key_put(key);
++ up_read(&mk->mk_sem);
++ fscrypt_put_master_key(mk);
+ return err;
+ }
+
+ static void put_crypt_info(struct fscrypt_info *ci)
+ {
+- struct key *key;
++ struct fscrypt_master_key *mk;
+
+ if (!ci)
+ return;
+@@ -474,24 +502,18 @@ static void put_crypt_info(struct fscrypt_info *ci)
+ else if (ci->ci_owns_key)
+ fscrypt_destroy_prepared_key(&ci->ci_enc_key);
+
+- key = ci->ci_master_key;
+- if (key) {
+- struct fscrypt_master_key *mk = key->payload.data[0];
+-
++ mk = ci->ci_master_key;
++ if (mk) {
+ /*
+ * Remove this inode from the list of inodes that were unlocked
+- * with the master key.
+- *
+- * In addition, if we're removing the last inode from a key that
+- * already had its secret removed, invalidate the key so that it
+- * gets removed from ->s_master_keys.
++ * with the master key. In addition, if we're removing the last
++ * inode from a master key struct that already had its secret
++ * removed, then complete the full removal of the struct.
+ */
+ spin_lock(&mk->mk_decrypted_inodes_lock);
+ list_del(&ci->ci_master_key_link);
+ spin_unlock(&mk->mk_decrypted_inodes_lock);
+- if (refcount_dec_and_test(&mk->mk_refcount))
+- key_invalidate(key);
+- key_put(key);
++ fscrypt_put_master_key_activeref(mk);
+ }
+ memzero_explicit(ci, sizeof(*ci));
+ kmem_cache_free(fscrypt_info_cachep, ci);
+@@ -505,7 +527,7 @@ fscrypt_setup_encryption_info(struct inode *inode,
+ {
+ struct fscrypt_info *crypt_info;
+ struct fscrypt_mode *mode;
+- struct key *master_key = NULL;
++ struct fscrypt_master_key *mk = NULL;
+ int res;
+
+ res = fscrypt_initialize(inode->i_sb->s_cop->flags);
+@@ -528,8 +550,7 @@ fscrypt_setup_encryption_info(struct inode *inode,
+ WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
+ crypt_info->ci_mode = mode;
+
+- res = setup_file_encryption_key(crypt_info, need_dirhash_key,
+- &master_key);
++ res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk);
+ if (res)
+ goto out;
+
+@@ -544,12 +565,9 @@ fscrypt_setup_encryption_info(struct inode *inode,
+ * We won the race and set ->i_crypt_info to our crypt_info.
+ * Now link it into the master key's inode list.
+ */
+- if (master_key) {
+- struct fscrypt_master_key *mk =
+- master_key->payload.data[0];
+-
+- refcount_inc(&mk->mk_refcount);
+- crypt_info->ci_master_key = key_get(master_key);
++ if (mk) {
++ crypt_info->ci_master_key = mk;
++ refcount_inc(&mk->mk_active_refs);
+ spin_lock(&mk->mk_decrypted_inodes_lock);
+ list_add(&crypt_info->ci_master_key_link,
+ &mk->mk_decrypted_inodes);
+@@ -559,9 +577,9 @@ fscrypt_setup_encryption_info(struct inode *inode,
+ }
+ res = 0;
+ out:
+- if (master_key) {
+- up_read(&master_key->sem);
+- key_put(master_key);
++ if (mk) {
++ up_read(&mk->mk_sem);
++ fscrypt_put_master_key(mk);
+ }
+ put_crypt_info(crypt_info);
+ return res;
+@@ -726,7 +744,6 @@ EXPORT_SYMBOL(fscrypt_free_inode);
+ int fscrypt_drop_inode(struct inode *inode)
+ {
+ const struct fscrypt_info *ci = fscrypt_get_info(inode);
+- const struct fscrypt_master_key *mk;
+
+ /*
+ * If ci is NULL, then the inode doesn't have an encryption key set up
+@@ -736,7 +753,6 @@ int fscrypt_drop_inode(struct inode *inode)
+ */
+ if (!ci || !ci->ci_master_key)
+ return 0;
+- mk = ci->ci_master_key->payload.data[0];
+
+ /*
+ * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes
+@@ -755,6 +771,6 @@ int fscrypt_drop_inode(struct inode *inode)
+ * then the thread removing the key will either evict the inode itself
+ * or will correctly detect that it wasn't evicted due to the race.
+ */
+- return !is_master_key_secret_present(&mk->mk_secret);
++ return !is_master_key_secret_present(&ci->ci_master_key->mk_secret);
+ }
+ EXPORT_SYMBOL_GPL(fscrypt_drop_inode);
+diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
+index ed3d623724cdd..cad34dbe8e298 100644
+--- a/fs/crypto/policy.c
++++ b/fs/crypto/policy.c
+@@ -692,12 +692,8 @@ int fscrypt_set_context(struct inode *inode, void *fs_data)
+ * delayed key setup that requires the inode number.
+ */
+ if (ci->ci_policy.version == FSCRYPT_POLICY_V2 &&
+- (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) {
+- const struct fscrypt_master_key *mk =
+- ci->ci_master_key->payload.data[0];
+-
+- fscrypt_hash_inode_number(ci, mk);
+- }
++ (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32))
++ fscrypt_hash_inode_number(ci, ci->ci_master_key);
+
+ return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, fs_data);
+ }
+diff --git a/fs/dax.c b/fs/dax.c
+index 4e3e5a283a916..4ab1c493c73f1 100644
+--- a/fs/dax.c
++++ b/fs/dax.c
+@@ -846,7 +846,8 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
+ if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
+ goto unlock_pmd;
+
+- flush_cache_page(vma, address, pfn);
++ flush_cache_range(vma, address,
++ address + HPAGE_PMD_SIZE);
+ pmd = pmdp_invalidate(vma, address, pmdp);
+ pmd = pmd_wrprotect(pmd);
+ pmd = pmd_mkclean(pmd);
+@@ -1278,6 +1279,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t done = 0;
+ int ret;
+
++ if (!iomi.len)
++ return 0;
++
+ if (iov_iter_rw(iter) == WRITE) {
+ lockdep_assert_held_write(&iomi.inode->i_rwsem);
+ iomi.flags |= IOMAP_WRITE;
+diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
+index 7d162b0efbf03..38930d9b0bb73 100644
+--- a/fs/debugfs/file.c
++++ b/fs/debugfs/file.c
+@@ -147,7 +147,7 @@ static int debugfs_locked_down(struct inode *inode,
+ struct file *filp,
+ const struct file_operations *real_fops)
+ {
+- if ((inode->i_mode & 07777) == 0444 &&
++ if ((inode->i_mode & 07777 & ~0444) == 0 &&
+ !(filp->f_mode & FMODE_WRITE) &&
+ !real_fops->unlocked_ioctl &&
+ !real_fops->compat_ioctl &&
+@@ -378,8 +378,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf,
+ }
+ EXPORT_SYMBOL_GPL(debugfs_attr_read);
+
+-ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+- size_t len, loff_t *ppos)
++static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos, bool is_signed)
+ {
+ struct dentry *dentry = F_DENTRY(file);
+ ssize_t ret;
+@@ -387,12 +387,28 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+ ret = debugfs_file_get(dentry);
+ if (unlikely(ret))
+ return ret;
+- ret = simple_attr_write(file, buf, len, ppos);
++ if (is_signed)
++ ret = simple_attr_write_signed(file, buf, len, ppos);
++ else
++ ret = simple_attr_write(file, buf, len, ppos);
+ debugfs_file_put(dentry);
+ return ret;
+ }
++
++ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos)
++{
++ return debugfs_attr_write_xsigned(file, buf, len, ppos, false);
++}
+ EXPORT_SYMBOL_GPL(debugfs_attr_write);
+
++ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos)
++{
++ return debugfs_attr_write_xsigned(file, buf, len, ppos, true);
++}
++EXPORT_SYMBOL_GPL(debugfs_attr_write_signed);
++
+ static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode,
+ struct dentry *parent, void *value,
+ const struct file_operations *fops,
+@@ -738,11 +754,11 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
+ *val = atomic_read((atomic_t *)data);
+ return 0;
+ }
+-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
++DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get,
+ debugfs_atomic_t_set, "%lld\n");
+-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
++DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
+ "%lld\n");
+-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
++DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
+ "%lld\n");
+
+ /**
+diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
+index 2f117c57160dc..26f9cd3282918 100644
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -734,6 +734,28 @@ void debugfs_remove(struct dentry *dentry)
+ }
+ EXPORT_SYMBOL_GPL(debugfs_remove);
+
++/**
++ * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it
++ * @name: a pointer to a string containing the name of the item to look up.
++ * @parent: a pointer to the parent dentry of the item.
++ *
++ * This is the equlivant of doing something like
++ * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting
++ * handled for the directory being looked up.
++ */
++void debugfs_lookup_and_remove(const char *name, struct dentry *parent)
++{
++ struct dentry *dentry;
++
++ dentry = debugfs_lookup(name, parent);
++ if (!dentry)
++ return;
++
++ debugfs_remove(dentry);
++ dput(dentry);
++}
++EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove);
++
+ /**
+ * debugfs_rename - rename a file/directory in the debugfs filesystem
+ * @old_dir: a pointer to the parent dentry for the renamed object. This
+diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
+index 42e5a766d33c7..4f25015aa5342 100644
+--- a/fs/devpts/inode.c
++++ b/fs/devpts/inode.c
+@@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dentry)
+
+ dentry->d_fsdata = NULL;
+ drop_nlink(dentry->d_inode);
+- fsnotify_unlink(d_inode(dentry->d_parent), dentry);
+ d_drop(dentry);
++ fsnotify_unlink(d_inode(dentry->d_parent), dentry);
+ dput(dentry); /* d_alloc_name() in devpts_pty_new() */
+ }
+
+diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
+index 283c7b94eddad..ca06069e95c8c 100644
+--- a/fs/dlm/ast.c
++++ b/fs/dlm/ast.c
+@@ -198,13 +198,13 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+ if (!prev_seq) {
+ kref_get(&lkb->lkb_ref);
+
++ mutex_lock(&ls->ls_cb_mutex);
+ if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
+- mutex_lock(&ls->ls_cb_mutex);
+ list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
+- mutex_unlock(&ls->ls_cb_mutex);
+ } else {
+ queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
+ }
++ mutex_unlock(&ls->ls_cb_mutex);
+ }
+ out:
+ mutex_unlock(&lkb->lkb_cb_mutex);
+@@ -284,7 +284,9 @@ void dlm_callback_stop(struct dlm_ls *ls)
+
+ void dlm_callback_suspend(struct dlm_ls *ls)
+ {
++ mutex_lock(&ls->ls_cb_mutex);
+ set_bit(LSFL_CB_DELAY, &ls->ls_flags);
++ mutex_unlock(&ls->ls_cb_mutex);
+
+ if (ls->ls_callback_wq)
+ flush_workqueue(ls->ls_callback_wq);
+diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
+index c502c065d0075..b9829b873bf2e 100644
+--- a/fs/dlm/lock.c
++++ b/fs/dlm/lock.c
+@@ -1551,6 +1551,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
+ lkb->lkb_wait_type = 0;
+ lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
+ lkb->lkb_wait_count--;
++ unhold_lkb(lkb);
+ goto out_del;
+ }
+
+@@ -1577,6 +1578,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
+ log_error(ls, "remwait error %x reply %d wait_type %d overlap",
+ lkb->lkb_id, mstype, lkb->lkb_wait_type);
+ lkb->lkb_wait_count--;
++ unhold_lkb(lkb);
+ lkb->lkb_wait_type = 0;
+ }
+
+@@ -1854,7 +1856,7 @@ static void del_timeout(struct dlm_lkb *lkb)
+ void dlm_scan_timeout(struct dlm_ls *ls)
+ {
+ struct dlm_rsb *r;
+- struct dlm_lkb *lkb;
++ struct dlm_lkb *lkb = NULL, *iter;
+ int do_cancel, do_warn;
+ s64 wait_us;
+
+@@ -1865,27 +1867,28 @@ void dlm_scan_timeout(struct dlm_ls *ls)
+ do_cancel = 0;
+ do_warn = 0;
+ mutex_lock(&ls->ls_timeout_mutex);
+- list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
++ list_for_each_entry(iter, &ls->ls_timeout, lkb_time_list) {
+
+ wait_us = ktime_to_us(ktime_sub(ktime_get(),
+- lkb->lkb_timestamp));
++ iter->lkb_timestamp));
+
+- if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+- wait_us >= (lkb->lkb_timeout_cs * 10000))
++ if ((iter->lkb_exflags & DLM_LKF_TIMEOUT) &&
++ wait_us >= (iter->lkb_timeout_cs * 10000))
+ do_cancel = 1;
+
+- if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
++ if ((iter->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+ wait_us >= dlm_config.ci_timewarn_cs * 10000)
+ do_warn = 1;
+
+ if (!do_cancel && !do_warn)
+ continue;
+- hold_lkb(lkb);
++ hold_lkb(iter);
++ lkb = iter;
+ break;
+ }
+ mutex_unlock(&ls->ls_timeout_mutex);
+
+- if (!do_cancel && !do_warn)
++ if (!lkb)
+ break;
+
+ r = lkb->lkb_resource;
+@@ -2886,24 +2889,24 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
+ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
+ struct dlm_args *args)
+ {
+- int rv = -EINVAL;
++ int rv = -EBUSY;
+
+ if (args->flags & DLM_LKF_CONVERT) {
+- if (lkb->lkb_flags & DLM_IFL_MSTCPY)
++ if (lkb->lkb_status != DLM_LKSTS_GRANTED)
+ goto out;
+
+- if (args->flags & DLM_LKF_QUECVT &&
+- !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
++ if (lkb->lkb_wait_type)
+ goto out;
+
+- rv = -EBUSY;
+- if (lkb->lkb_status != DLM_LKSTS_GRANTED)
++ if (is_overlap(lkb))
+ goto out;
+
+- if (lkb->lkb_wait_type)
++ rv = -EINVAL;
++ if (lkb->lkb_flags & DLM_IFL_MSTCPY)
+ goto out;
+
+- if (is_overlap(lkb))
++ if (args->flags & DLM_LKF_QUECVT &&
++ !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
+ goto out;
+ }
+
+@@ -3630,7 +3633,7 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
+ case DLM_MSG_REQUEST_REPLY:
+ case DLM_MSG_CONVERT_REPLY:
+ case DLM_MSG_GRANT:
+- if (!lkb->lkb_lvbptr)
++ if (!lkb->lkb_lvbptr || !(lkb->lkb_exflags & DLM_LKF_VALBLK))
+ break;
+ memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
+ break;
+@@ -3973,6 +3976,14 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
+ int from = ms->m_header.h_nodeid;
+ int error = 0;
+
++ /* currently mixing of user/kernel locks are not supported */
++ if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) {
++ log_error(lkb->lkb_resource->res_ls,
++ "got user dlm message for a kernel lock");
++ error = -EINVAL;
++ goto out;
++ }
++
+ switch (ms->m_type) {
+ case DLM_MSG_CONVERT:
+ case DLM_MSG_UNLOCK:
+@@ -4001,6 +4012,7 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
+ error = -EINVAL;
+ }
+
++out:
+ if (error)
+ log_error(lkb->lkb_resource->res_ls,
+ "ignore invalid message %d from %d %x %x %x %d",
+@@ -4054,13 +4066,14 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len)
+ rv = _create_message(ls, sizeof(struct dlm_message) + len,
+ dir_nodeid, DLM_MSG_REMOVE, &ms, &mh);
+ if (rv)
+- return;
++ goto out;
+
+ memcpy(ms->m_extra, name, len);
+ ms->m_hash = hash;
+
+ send_message(mh, ms);
+
++out:
+ spin_lock(&ls->ls_remove_spin);
+ ls->ls_remove_len = 0;
+ memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
+@@ -5227,21 +5240,18 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
+
+ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
+ {
+- struct dlm_lkb *lkb;
+- int found = 0;
++ struct dlm_lkb *lkb = NULL, *iter;
+
+ mutex_lock(&ls->ls_waiters_mutex);
+- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
+- if (lkb->lkb_flags & DLM_IFL_RESEND) {
+- hold_lkb(lkb);
+- found = 1;
++ list_for_each_entry(iter, &ls->ls_waiters, lkb_wait_reply) {
++ if (iter->lkb_flags & DLM_IFL_RESEND) {
++ hold_lkb(iter);
++ lkb = iter;
+ break;
+ }
+ }
+ mutex_unlock(&ls->ls_waiters_mutex);
+
+- if (!found)
+- lkb = NULL;
+ return lkb;
+ }
+
+@@ -5301,11 +5311,16 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
+ lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
+ lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
+ lkb->lkb_wait_type = 0;
+- lkb->lkb_wait_count = 0;
++ /* drop all wait_count references we still
++ * hold a reference for this iteration.
++ */
++ while (lkb->lkb_wait_count) {
++ lkb->lkb_wait_count--;
++ unhold_lkb(lkb);
++ }
+ mutex_lock(&ls->ls_waiters_mutex);
+ list_del_init(&lkb->lkb_wait_reply);
+ mutex_unlock(&ls->ls_waiters_mutex);
+- unhold_lkb(lkb); /* for waiters list */
+
+ if (oc || ou) {
+ /* do an unlock or cancel instead of resending */
+@@ -5895,37 +5910,36 @@ int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, void *name, unsigned int namelen,
+ unsigned long timeout_cs, uint32_t *lkid)
+ {
+- struct dlm_lkb *lkb;
++ struct dlm_lkb *lkb = NULL, *iter;
+ struct dlm_user_args *ua;
+ int found_other_mode = 0;
+- int found = 0;
+ int rv = 0;
+
+ mutex_lock(&ls->ls_orphans_mutex);
+- list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) {
+- if (lkb->lkb_resource->res_length != namelen)
++ list_for_each_entry(iter, &ls->ls_orphans, lkb_ownqueue) {
++ if (iter->lkb_resource->res_length != namelen)
+ continue;
+- if (memcmp(lkb->lkb_resource->res_name, name, namelen))
++ if (memcmp(iter->lkb_resource->res_name, name, namelen))
+ continue;
+- if (lkb->lkb_grmode != mode) {
++ if (iter->lkb_grmode != mode) {
+ found_other_mode = 1;
+ continue;
+ }
+
+- found = 1;
+- list_del_init(&lkb->lkb_ownqueue);
+- lkb->lkb_flags &= ~DLM_IFL_ORPHAN;
+- *lkid = lkb->lkb_id;
++ lkb = iter;
++ list_del_init(&iter->lkb_ownqueue);
++ iter->lkb_flags &= ~DLM_IFL_ORPHAN;
++ *lkid = iter->lkb_id;
+ break;
+ }
+ mutex_unlock(&ls->ls_orphans_mutex);
+
+- if (!found && found_other_mode) {
++ if (!lkb && found_other_mode) {
+ rv = -EAGAIN;
+ goto out;
+ }
+
+- if (!found) {
++ if (!lkb) {
+ rv = -ENOENT;
+ goto out;
+ }
+diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
+index 10eddfa6c3d7b..fa086a81a8476 100644
+--- a/fs/dlm/lockspace.c
++++ b/fs/dlm/lockspace.c
+@@ -17,7 +17,6 @@
+ #include "recoverd.h"
+ #include "dir.h"
+ #include "midcomms.h"
+-#include "lowcomms.h"
+ #include "config.h"
+ #include "memory.h"
+ #include "lock.h"
+@@ -384,23 +383,23 @@ static int threads_start(void)
+ {
+ int error;
+
+- error = dlm_scand_start();
++ /* Thread for sending/receiving messages for all lockspace's */
++ error = dlm_midcomms_start();
+ if (error) {
+- log_print("cannot start dlm_scand thread %d", error);
++ log_print("cannot start dlm midcomms %d", error);
+ goto fail;
+ }
+
+- /* Thread for sending/receiving messages for all lockspace's */
+- error = dlm_midcomms_start();
++ error = dlm_scand_start();
+ if (error) {
+- log_print("cannot start dlm lowcomms %d", error);
+- goto scand_fail;
++ log_print("cannot start dlm_scand thread %d", error);
++ goto midcomms_fail;
+ }
+
+ return 0;
+
+- scand_fail:
+- dlm_scand_stop();
++ midcomms_fail:
++ dlm_midcomms_stop();
+ fail:
+ return error;
+ }
+@@ -705,7 +704,7 @@ int dlm_new_lockspace(const char *name, const char *cluster,
+ if (!ls_count) {
+ dlm_scand_stop();
+ dlm_midcomms_shutdown();
+- dlm_lowcomms_stop();
++ dlm_midcomms_stop();
+ }
+ out:
+ mutex_unlock(&ls_lock);
+@@ -889,7 +888,7 @@ int dlm_release_lockspace(void *lockspace, int force)
+ if (!error)
+ ls_count--;
+ if (!ls_count)
+- dlm_lowcomms_stop();
++ dlm_midcomms_stop();
+ mutex_unlock(&ls_lock);
+
+ return error;
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 8f715c620e1f8..1eb95ba7e7772 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -592,8 +592,8 @@ int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
+ static void lowcomms_error_report(struct sock *sk)
+ {
+ struct connection *con;
+- struct sockaddr_storage saddr;
+ void (*orig_report)(struct sock *) = NULL;
++ struct inet_sock *inet;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sock2con(sk);
+@@ -601,33 +601,33 @@ static void lowcomms_error_report(struct sock *sk)
+ goto out;
+
+ orig_report = listen_sock.sk_error_report;
+- if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
+- printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
+- "sending to node %d, port %d, "
+- "sk_err=%d/%d\n", dlm_our_nodeid(),
+- con->nodeid, dlm_config.ci_tcp_port,
+- sk->sk_err, sk->sk_err_soft);
+- } else if (saddr.ss_family == AF_INET) {
+- struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
+
++ inet = inet_sk(sk);
++ switch (sk->sk_family) {
++ case AF_INET:
+ printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
+- "sending to node %d at %pI4, port %d, "
++ "sending to node %d at %pI4, dport %d, "
+ "sk_err=%d/%d\n", dlm_our_nodeid(),
+- con->nodeid, &sin4->sin_addr.s_addr,
+- dlm_config.ci_tcp_port, sk->sk_err,
++ con->nodeid, &inet->inet_daddr,
++ ntohs(inet->inet_dport), sk->sk_err,
+ sk->sk_err_soft);
+- } else {
+- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
+-
++ break;
++#if IS_ENABLED(CONFIG_IPV6)
++ case AF_INET6:
+ printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
+- "sending to node %d at %u.%u.%u.%u, "
+- "port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
+- con->nodeid, sin6->sin6_addr.s6_addr32[0],
+- sin6->sin6_addr.s6_addr32[1],
+- sin6->sin6_addr.s6_addr32[2],
+- sin6->sin6_addr.s6_addr32[3],
+- dlm_config.ci_tcp_port, sk->sk_err,
++ "sending to node %d at %pI6c, "
++ "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(),
++ con->nodeid, &sk->sk_v6_daddr,
++ ntohs(inet->inet_dport), sk->sk_err,
+ sk->sk_err_soft);
++ break;
++#endif
++ default:
++ printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
++ "invalid socket family %d set, "
++ "sk_err=%d/%d\n", dlm_our_nodeid(),
++ sk->sk_family, sk->sk_err, sk->sk_err_soft);
++ goto out;
+ }
+
+ /* below sendcon only handling */
+@@ -1319,6 +1319,8 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
+ return NULL;
+ }
+
++ /* for dlm_lowcomms_commit_msg() */
++ kref_get(&msg->ref);
+ /* we assume if successful commit must called */
+ msg->idx = idx;
+ return msg;
+@@ -1353,6 +1355,8 @@ void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
+ {
+ _dlm_lowcomms_commit_msg(msg);
+ srcu_read_unlock(&connections_srcu, msg->idx);
++ /* because dlm_lowcomms_new_msg() */
++ kref_put(&msg->ref, dlm_msg_release);
+ }
+
+ void dlm_lowcomms_put_msg(struct dlm_msg *msg)
+@@ -1516,7 +1520,11 @@ static void process_recv_sockets(struct work_struct *work)
+
+ static void process_listen_recv_socket(struct work_struct *work)
+ {
+- accept_from_sock(&listen_con);
++ int ret;
++
++ do {
++ ret = accept_from_sock(&listen_con);
++ } while (!ret);
+ }
+
+ static void dlm_connect(struct connection *con)
+@@ -1776,7 +1784,7 @@ static int dlm_listen_for_all(void)
+ SOCK_STREAM, dlm_proto_ops->proto, &sock);
+ if (result < 0) {
+ log_print("Can't create comms socket, check SCTP is loaded");
+- goto out;
++ return result;
+ }
+
+ sock_set_mark(sock->sk, dlm_config.ci_mark);
+@@ -1793,7 +1801,7 @@ static int dlm_listen_for_all(void)
+ result = sock->ops->listen(sock, 5);
+ if (result < 0) {
+ dlm_close_sock(&listen_con.sock);
+- goto out;
++ return result;
+ }
+
+ return 0;
+@@ -1951,10 +1959,6 @@ static const struct dlm_proto_ops dlm_sctp_ops = {
+ int dlm_lowcomms_start(void)
+ {
+ int error = -EINVAL;
+- int i;
+-
+- for (i = 0; i < CONN_HASH_SIZE; i++)
+- INIT_HLIST_HEAD(&connection_hash[i]);
+
+ init_local();
+ if (!dlm_local_count) {
+@@ -1963,8 +1967,6 @@ int dlm_lowcomms_start(void)
+ goto fail;
+ }
+
+- INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
+-
+ error = work_start();
+ if (error)
+ goto fail_local;
+@@ -1996,7 +1998,6 @@ fail_listen:
+ dlm_proto_ops = NULL;
+ fail_proto_ops:
+ dlm_allow_conn = 0;
+- dlm_close_sock(&listen_con.sock);
+ work_stop();
+ fail_local:
+ deinit_local();
+@@ -2004,6 +2005,16 @@ fail:
+ return error;
+ }
+
++void dlm_lowcomms_init(void)
++{
++ int i;
++
++ for (i = 0; i < CONN_HASH_SIZE; i++)
++ INIT_HLIST_HEAD(&connection_hash[i]);
++
++ INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
++}
++
+ void dlm_lowcomms_exit(void)
+ {
+ struct dlm_node_addr *na, *safe;
+diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
+index 4ccae07cf0058..26433632d1717 100644
+--- a/fs/dlm/lowcomms.h
++++ b/fs/dlm/lowcomms.h
+@@ -35,6 +35,7 @@ extern int dlm_allow_conn;
+ int dlm_lowcomms_start(void);
+ void dlm_lowcomms_shutdown(void);
+ void dlm_lowcomms_stop(void);
++void dlm_lowcomms_init(void);
+ void dlm_lowcomms_exit(void);
+ int dlm_lowcomms_close(int nodeid);
+ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
+diff --git a/fs/dlm/main.c b/fs/dlm/main.c
+index afc66a1346d3d..974f7ebb3fe63 100644
+--- a/fs/dlm/main.c
++++ b/fs/dlm/main.c
+@@ -17,7 +17,7 @@
+ #include "user.h"
+ #include "memory.h"
+ #include "config.h"
+-#include "lowcomms.h"
++#include "midcomms.h"
+
+ static int __init init_dlm(void)
+ {
+@@ -27,6 +27,8 @@ static int __init init_dlm(void)
+ if (error)
+ goto out;
+
++ dlm_midcomms_init();
++
+ error = dlm_lockspace_init();
+ if (error)
+ goto out_mem;
+@@ -63,6 +65,7 @@ static int __init init_dlm(void)
+ out_lockspace:
+ dlm_lockspace_exit();
+ out_mem:
++ dlm_midcomms_exit();
+ dlm_memory_exit();
+ out:
+ return error;
+@@ -76,7 +79,7 @@ static void __exit exit_dlm(void)
+ dlm_config_exit();
+ dlm_memory_exit();
+ dlm_lockspace_exit();
+- dlm_lowcomms_exit();
++ dlm_midcomms_exit();
+ dlm_unregister_debugfs();
+ }
+
+diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
+index 7ae39ec8d9b0a..84a7a39fc12e6 100644
+--- a/fs/dlm/midcomms.c
++++ b/fs/dlm/midcomms.c
+@@ -366,7 +366,7 @@ static int dlm_send_ack(int nodeid, uint32_t seq)
+ struct dlm_msg *msg;
+ char *ppc;
+
+- msg = dlm_lowcomms_new_msg(nodeid, mb_len, GFP_NOFS, &ppc,
++ msg = dlm_lowcomms_new_msg(nodeid, mb_len, GFP_ATOMIC, &ppc,
+ NULL, NULL);
+ if (!msg)
+ return -ENOMEM;
+@@ -394,7 +394,7 @@ static int dlm_send_fin(struct midcomms_node *node,
+ struct dlm_mhandle *mh;
+ char *ppc;
+
+- mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, GFP_NOFS, &ppc);
++ mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, GFP_ATOMIC, &ppc);
+ if (!mh)
+ return -ENOMEM;
+
+@@ -478,15 +478,14 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
+
+ switch (p->header.h_cmd) {
+ case DLM_FIN:
+- /* send ack before fin */
+- dlm_send_ack(node->nodeid, node->seq_next);
+-
+ spin_lock(&node->state_lock);
+ pr_debug("receive fin msg from node %d with state %s\n",
+ node->nodeid, dlm_state_str(node->state));
+
+ switch (node->state) {
+ case DLM_ESTABLISHED:
++ dlm_send_ack(node->nodeid, node->seq_next);
++
+ node->state = DLM_CLOSE_WAIT;
+ pr_debug("switch node %d to state %s\n",
+ node->nodeid, dlm_state_str(node->state));
+@@ -498,16 +497,19 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
+ node->state = DLM_LAST_ACK;
+ pr_debug("switch node %d to state %s case 1\n",
+ node->nodeid, dlm_state_str(node->state));
+- spin_unlock(&node->state_lock);
+- goto send_fin;
++ set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
++ dlm_send_fin(node, dlm_pas_fin_ack_rcv);
+ }
+ break;
+ case DLM_FIN_WAIT1:
++ dlm_send_ack(node->nodeid, node->seq_next);
+ node->state = DLM_CLOSING;
++ set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+ pr_debug("switch node %d to state %s\n",
+ node->nodeid, dlm_state_str(node->state));
+ break;
+ case DLM_FIN_WAIT2:
++ dlm_send_ack(node->nodeid, node->seq_next);
+ midcomms_node_reset(node);
+ pr_debug("switch node %d to state %s\n",
+ node->nodeid, dlm_state_str(node->state));
+@@ -524,8 +526,6 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
+ return;
+ }
+ spin_unlock(&node->state_lock);
+-
+- set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+ break;
+ default:
+ WARN_ON(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
+@@ -543,12 +543,6 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
+ log_print_ratelimited("ignore dlm msg because seq mismatch, seq: %u, expected: %u, nodeid: %d",
+ seq, node->seq_next, node->nodeid);
+ }
+-
+- return;
+-
+-send_fin:
+- set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+- dlm_send_fin(node, dlm_pas_fin_ack_rcv);
+ }
+
+ static struct midcomms_node *
+@@ -1148,13 +1142,28 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
+ }
+
+ int dlm_midcomms_start(void)
++{
++ return dlm_lowcomms_start();
++}
++
++void dlm_midcomms_stop(void)
++{
++ dlm_lowcomms_stop();
++}
++
++void dlm_midcomms_init(void)
+ {
+ int i;
+
+ for (i = 0; i < CONN_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&node_hash[i]);
+
+- return dlm_lowcomms_start();
++ dlm_lowcomms_init();
++}
++
++void dlm_midcomms_exit(void)
++{
++ dlm_lowcomms_exit();
+ }
+
+ static void dlm_act_fin_ack_rcv(struct midcomms_node *node)
+@@ -1269,11 +1278,11 @@ void dlm_midcomms_remove_member(int nodeid)
+ case DLM_CLOSE_WAIT:
+ /* passive shutdown DLM_LAST_ACK case 2 */
+ node->state = DLM_LAST_ACK;
+- spin_unlock(&node->state_lock);
+-
+ pr_debug("switch node %d to state %s case 2\n",
+ node->nodeid, dlm_state_str(node->state));
+- goto send_fin;
++ set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
++ dlm_send_fin(node, dlm_pas_fin_ack_rcv);
++ break;
+ case DLM_LAST_ACK:
+ /* probably receive fin caught it, do nothing */
+ break;
+@@ -1289,12 +1298,6 @@ void dlm_midcomms_remove_member(int nodeid)
+ spin_unlock(&node->state_lock);
+
+ srcu_read_unlock(&nodes_srcu, idx);
+- return;
+-
+-send_fin:
+- set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+- dlm_send_fin(node, dlm_pas_fin_ack_rcv);
+- srcu_read_unlock(&nodes_srcu, idx);
+ }
+
+ static void midcomms_node_release(struct rcu_head *rcu)
+@@ -1325,6 +1328,7 @@ static void midcomms_shutdown(struct midcomms_node *node)
+ node->state = DLM_FIN_WAIT1;
+ pr_debug("switch node %d to state %s case 2\n",
+ node->nodeid, dlm_state_str(node->state));
++ dlm_send_fin(node, dlm_act_fin_ack_rcv);
+ break;
+ case DLM_CLOSED:
+ /* we have what we want */
+@@ -1338,12 +1342,8 @@ static void midcomms_shutdown(struct midcomms_node *node)
+ }
+ spin_unlock(&node->state_lock);
+
+- if (node->state == DLM_FIN_WAIT1) {
+- dlm_send_fin(node, dlm_act_fin_ack_rcv);
+-
+- if (DLM_DEBUG_FENCE_TERMINATION)
+- msleep(5000);
+- }
++ if (DLM_DEBUG_FENCE_TERMINATION)
++ msleep(5000);
+
+ /* wait for other side dlm + fin */
+ ret = wait_event_timeout(node->shutdown_wait,
+diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
+index 579abc6929be2..1a36b7834dfc5 100644
+--- a/fs/dlm/midcomms.h
++++ b/fs/dlm/midcomms.h
+@@ -20,6 +20,9 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
+ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh);
+ int dlm_midcomms_close(int nodeid);
+ int dlm_midcomms_start(void);
++void dlm_midcomms_stop(void);
++void dlm_midcomms_init(void);
++void dlm_midcomms_exit(void);
+ void dlm_midcomms_shutdown(void);
+ void dlm_midcomms_add_member(int nodeid);
+ void dlm_midcomms_remove_member(int nodeid);
+diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
+index c38b2b8ffd1d3..5f2e2fa2ba090 100644
+--- a/fs/dlm/plock.c
++++ b/fs/dlm/plock.c
+@@ -19,20 +19,20 @@ static struct list_head recv_list;
+ static wait_queue_head_t send_wq;
+ static wait_queue_head_t recv_wq;
+
+-struct plock_op {
+- struct list_head list;
+- int done;
+- struct dlm_plock_info info;
+-};
+-
+-struct plock_xop {
+- struct plock_op xop;
+- int (*callback)(struct file_lock *fl, int result);
++struct plock_async_data {
+ void *fl;
+ void *file;
+ struct file_lock flc;
++ int (*callback)(struct file_lock *fl, int result);
+ };
+
++struct plock_op {
++ struct list_head list;
++ int done;
++ struct dlm_plock_info info;
++ /* if set indicates async handling */
++ struct plock_async_data *data;
++};
+
+ static inline void set_version(struct dlm_plock_info *info)
+ {
+@@ -58,6 +58,12 @@ static int check_version(struct dlm_plock_info *info)
+ return 0;
+ }
+
++static void dlm_release_plock_op(struct plock_op *op)
++{
++ kfree(op->data);
++ kfree(op);
++}
++
+ static void send_op(struct plock_op *op)
+ {
+ set_version(&op->info);
+@@ -74,8 +80,7 @@ static void send_op(struct plock_op *op)
+ abandoned waiter. So, we have to insert the unlock-close when the
+ lock call is interrupted. */
+
+-static void do_unlock_close(struct dlm_ls *ls, u64 number,
+- struct file *file, struct file_lock *fl)
++static void do_unlock_close(const struct dlm_plock_info *info)
+ {
+ struct plock_op *op;
+
+@@ -84,15 +89,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
+ return;
+
+ op->info.optype = DLM_PLOCK_OP_UNLOCK;
+- op->info.pid = fl->fl_pid;
+- op->info.fsid = ls->ls_global_id;
+- op->info.number = number;
++ op->info.pid = info->pid;
++ op->info.fsid = info->fsid;
++ op->info.number = info->number;
+ op->info.start = 0;
+ op->info.end = OFFSET_MAX;
+- if (fl->fl_lmops && fl->fl_lmops->lm_grant)
+- op->info.owner = (__u64) fl->fl_pid;
+- else
+- op->info.owner = (__u64)(long) fl->fl_owner;
++ op->info.owner = info->owner;
+
+ op->info.flags |= DLM_PLOCK_FL_CLOSE;
+ send_op(op);
+@@ -101,22 +103,21 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
+ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ int cmd, struct file_lock *fl)
+ {
++ struct plock_async_data *op_data;
+ struct dlm_ls *ls;
+ struct plock_op *op;
+- struct plock_xop *xop;
+ int rv;
+
+ ls = dlm_find_lockspace_local(lockspace);
+ if (!ls)
+ return -EINVAL;
+
+- xop = kzalloc(sizeof(*xop), GFP_NOFS);
+- if (!xop) {
++ op = kzalloc(sizeof(*op), GFP_NOFS);
++ if (!op) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+- op = &xop->xop;
+ op->info.optype = DLM_PLOCK_OP_LOCK;
+ op->info.pid = fl->fl_pid;
+ op->info.ex = (fl->fl_type == F_WRLCK);
+@@ -125,36 +126,45 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ op->info.number = number;
+ op->info.start = fl->fl_start;
+ op->info.end = fl->fl_end;
++ /* async handling */
+ if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
++ op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
++ if (!op_data) {
++ dlm_release_plock_op(op);
++ rv = -ENOMEM;
++ goto out;
++ }
++
+ /* fl_owner is lockd which doesn't distinguish
+ processes on the nfs client */
+ op->info.owner = (__u64) fl->fl_pid;
+- xop->callback = fl->fl_lmops->lm_grant;
+- locks_init_lock(&xop->flc);
+- locks_copy_lock(&xop->flc, fl);
+- xop->fl = fl;
+- xop->file = file;
++ op_data->callback = fl->fl_lmops->lm_grant;
++ locks_init_lock(&op_data->flc);
++ locks_copy_lock(&op_data->flc, fl);
++ op_data->fl = fl;
++ op_data->file = file;
++
++ op->data = op_data;
++
++ send_op(op);
++ rv = FILE_LOCK_DEFERRED;
++ goto out;
+ } else {
+ op->info.owner = (__u64)(long) fl->fl_owner;
+- xop->callback = NULL;
+ }
+
+ send_op(op);
+
+- if (xop->callback == NULL) {
+- rv = wait_event_interruptible(recv_wq, (op->done != 0));
+- if (rv == -ERESTARTSYS) {
+- log_debug(ls, "dlm_posix_lock: wait killed %llx",
+- (unsigned long long)number);
+- spin_lock(&ops_lock);
+- list_del(&op->list);
+- spin_unlock(&ops_lock);
+- kfree(xop);
+- do_unlock_close(ls, number, file, fl);
+- goto out;
+- }
+- } else {
+- rv = FILE_LOCK_DEFERRED;
++ rv = wait_event_killable(recv_wq, (op->done != 0));
++ if (rv == -ERESTARTSYS) {
++ spin_lock(&ops_lock);
++ list_del(&op->list);
++ spin_unlock(&ops_lock);
++ log_debug(ls, "%s: wait interrupted %x %llx pid %d",
++ __func__, ls->ls_global_id,
++ (unsigned long long)number, op->info.pid);
++ dlm_release_plock_op(op);
++ do_unlock_close(&op->info);
+ goto out;
+ }
+
+@@ -174,7 +184,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ (unsigned long long)number);
+ }
+
+- kfree(xop);
++ dlm_release_plock_op(op);
+ out:
+ dlm_put_lockspace(ls);
+ return rv;
+@@ -184,11 +194,11 @@ EXPORT_SYMBOL_GPL(dlm_posix_lock);
+ /* Returns failure iff a successful lock operation should be canceled */
+ static int dlm_plock_callback(struct plock_op *op)
+ {
++ struct plock_async_data *op_data = op->data;
+ struct file *file;
+ struct file_lock *fl;
+ struct file_lock *flc;
+ int (*notify)(struct file_lock *fl, int result) = NULL;
+- struct plock_xop *xop = (struct plock_xop *)op;
+ int rv = 0;
+
+ spin_lock(&ops_lock);
+@@ -200,10 +210,10 @@ static int dlm_plock_callback(struct plock_op *op)
+ spin_unlock(&ops_lock);
+
+ /* check if the following 2 are still valid or make a copy */
+- file = xop->file;
+- flc = &xop->flc;
+- fl = xop->fl;
+- notify = xop->callback;
++ file = op_data->file;
++ flc = &op_data->flc;
++ fl = op_data->fl;
++ notify = op_data->callback;
+
+ if (op->info.rv) {
+ notify(fl, op->info.rv);
+@@ -234,7 +244,7 @@ static int dlm_plock_callback(struct plock_op *op)
+ }
+
+ out:
+- kfree(xop);
++ dlm_release_plock_op(op);
+ return rv;
+ }
+
+@@ -304,7 +314,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ rv = 0;
+
+ out_free:
+- kfree(op);
++ dlm_release_plock_op(op);
+ out:
+ dlm_put_lockspace(ls);
+ fl->fl_flags = fl_flags;
+@@ -364,13 +374,15 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ locks_init_lock(fl);
+ fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
+ fl->fl_flags = FL_POSIX;
+- fl->fl_pid = -op->info.pid;
++ fl->fl_pid = op->info.pid;
++ if (op->info.nodeid != dlm_our_nodeid())
++ fl->fl_pid = -fl->fl_pid;
+ fl->fl_start = op->info.start;
+ fl->fl_end = op->info.end;
+ rv = 0;
+ }
+
+- kfree(op);
++ dlm_release_plock_op(op);
+ out:
+ dlm_put_lockspace(ls);
+ return rv;
+@@ -393,7 +405,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+ if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+ list_del(&op->list);
+ else
+- list_move(&op->list, &recv_list);
++ list_move_tail(&op->list, &recv_list);
+ memcpy(&info, &op->info, sizeof(info));
+ }
+ spin_unlock(&ops_lock);
+@@ -406,7 +418,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+ (the process did not make an unlock call). */
+
+ if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+- kfree(op);
++ dlm_release_plock_op(op);
+
+ if (copy_to_user(u, &info, sizeof(info)))
+ return -EFAULT;
+@@ -418,9 +430,9 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
+ loff_t *ppos)
+ {
++ struct plock_op *op = NULL, *iter;
+ struct dlm_plock_info info;
+- struct plock_op *op;
+- int found = 0, do_callback = 0;
++ int do_callback = 0;
+
+ if (count != sizeof(info))
+ return -EINVAL;
+@@ -431,32 +443,63 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
+ if (check_version(&info))
+ return -EINVAL;
+
++ /*
++ * The results for waiting ops (SETLKW) can be returned in any
++ * order, so match all fields to find the op. The results for
++ * non-waiting ops are returned in the order that they were sent
++ * to userspace, so match the result with the first non-waiting op.
++ */
+ spin_lock(&ops_lock);
+- list_for_each_entry(op, &recv_list, list) {
+- if (op->info.fsid == info.fsid &&
+- op->info.number == info.number &&
+- op->info.owner == info.owner) {
+- struct plock_xop *xop = (struct plock_xop *)op;
+- list_del_init(&op->list);
+- memcpy(&op->info, &info, sizeof(info));
+- if (xop->callback)
+- do_callback = 1;
+- else
+- op->done = 1;
+- found = 1;
+- break;
++ if (info.wait) {
++ list_for_each_entry(iter, &recv_list, list) {
++ if (iter->info.fsid == info.fsid &&
++ iter->info.number == info.number &&
++ iter->info.owner == info.owner &&
++ iter->info.pid == info.pid &&
++ iter->info.start == info.start &&
++ iter->info.end == info.end &&
++ iter->info.ex == info.ex &&
++ iter->info.wait) {
++ op = iter;
++ break;
++ }
+ }
++ } else {
++ list_for_each_entry(iter, &recv_list, list) {
++ if (!iter->info.wait &&
++ iter->info.fsid == info.fsid) {
++ op = iter;
++ break;
++ }
++ }
++ }
++
++ if (op) {
++ /* Sanity check that op and info match. */
++ if (info.wait)
++ WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
++ else
++ WARN_ON(op->info.number != info.number ||
++ op->info.owner != info.owner ||
++ op->info.optype != info.optype);
++
++ list_del_init(&op->list);
++ memcpy(&op->info, &info, sizeof(info));
++ if (op->data)
++ do_callback = 1;
++ else
++ op->done = 1;
+ }
+ spin_unlock(&ops_lock);
+
+- if (found) {
++ if (op) {
+ if (do_callback)
+ dlm_plock_callback(op);
+ else
+ wake_up(&recv_wq);
+ } else
+- log_print("dev_write no op %x %llx", info.fsid,
+- (unsigned long long)info.number);
++ log_print("%s: no op %x %llx", __func__,
++ info.fsid, (unsigned long long)info.number);
+ return count;
+ }
+
+diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
+index 8928e99dfd47d..df18f38a02734 100644
+--- a/fs/dlm/recover.c
++++ b/fs/dlm/recover.c
+@@ -732,10 +732,9 @@ void dlm_recovered_lock(struct dlm_rsb *r)
+
+ static void recover_lvb(struct dlm_rsb *r)
+ {
+- struct dlm_lkb *lkb, *high_lkb = NULL;
++ struct dlm_lkb *big_lkb = NULL, *iter, *high_lkb = NULL;
+ uint32_t high_seq = 0;
+ int lock_lvb_exists = 0;
+- int big_lock_exists = 0;
+ int lvblen = r->res_ls->ls_lvblen;
+
+ if (!rsb_flag(r, RSB_NEW_MASTER2) &&
+@@ -751,37 +750,37 @@ static void recover_lvb(struct dlm_rsb *r)
+ /* we are the new master, so figure out if VALNOTVALID should
+ be set, and set the rsb lvb from the best lkb available. */
+
+- list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+- if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
++ list_for_each_entry(iter, &r->res_grantqueue, lkb_statequeue) {
++ if (!(iter->lkb_exflags & DLM_LKF_VALBLK))
+ continue;
+
+ lock_lvb_exists = 1;
+
+- if (lkb->lkb_grmode > DLM_LOCK_CR) {
+- big_lock_exists = 1;
++ if (iter->lkb_grmode > DLM_LOCK_CR) {
++ big_lkb = iter;
+ goto setflag;
+ }
+
+- if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
+- high_lkb = lkb;
+- high_seq = lkb->lkb_lvbseq;
++ if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) {
++ high_lkb = iter;
++ high_seq = iter->lkb_lvbseq;
+ }
+ }
+
+- list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+- if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
++ list_for_each_entry(iter, &r->res_convertqueue, lkb_statequeue) {
++ if (!(iter->lkb_exflags & DLM_LKF_VALBLK))
+ continue;
+
+ lock_lvb_exists = 1;
+
+- if (lkb->lkb_grmode > DLM_LOCK_CR) {
+- big_lock_exists = 1;
++ if (iter->lkb_grmode > DLM_LOCK_CR) {
++ big_lkb = iter;
+ goto setflag;
+ }
+
+- if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
+- high_lkb = lkb;
+- high_seq = lkb->lkb_lvbseq;
++ if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) {
++ high_lkb = iter;
++ high_seq = iter->lkb_lvbseq;
+ }
+ }
+
+@@ -790,7 +789,7 @@ static void recover_lvb(struct dlm_rsb *r)
+ goto out;
+
+ /* lvb is invalidated if only NL/CR locks remain */
+- if (!big_lock_exists)
++ if (!big_lkb)
+ rsb_set_flag(r, RSB_VALNOTVALID);
+
+ if (!r->res_lvbptr) {
+@@ -799,9 +798,9 @@ static void recover_lvb(struct dlm_rsb *r)
+ goto out;
+ }
+
+- if (big_lock_exists) {
+- r->res_lvbseq = lkb->lkb_lvbseq;
+- memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
++ if (big_lkb) {
++ r->res_lvbseq = big_lkb->lkb_lvbseq;
++ memcpy(r->res_lvbptr, big_lkb->lkb_lvbptr, lvblen);
+ } else if (high_lkb) {
+ r->res_lvbseq = high_lkb->lkb_lvbseq;
+ memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
+diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
+index d66bbd2df191e..2dd23a82e0de5 100644
+--- a/fs/ecryptfs/main.c
++++ b/fs/ecryptfs/main.c
+@@ -537,7 +537,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
+ goto out_free;
+ }
+
+- if (mnt_user_ns(path.mnt) != &init_user_ns) {
++ if (is_idmapped_mnt(path.mnt)) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Mounting on idmapped mounts currently disallowed\n");
+ goto out_free;
+diff --git a/fs/erofs/data.c b/fs/erofs/data.c
+index 9db8297156527..16a41d0db55a3 100644
+--- a/fs/erofs/data.c
++++ b/fs/erofs/data.c
+@@ -287,7 +287,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+
+ if (!err)
+ return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
+- NULL, 0);
++ NULL, 0, 0);
+ if (err < 0)
+ return err;
+ }
+diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
+index a5bc4b1b7813e..8193c14bb1115 100644
+--- a/fs/erofs/decompressor.c
++++ b/fs/erofs/decompressor.c
+@@ -93,14 +93,18 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
+
+ if (page) {
+ __clear_bit(j, bounced);
+- if (kaddr) {
+- if (kaddr + PAGE_SIZE == page_address(page))
++ if (!PageHighMem(page)) {
++ if (!i) {
++ kaddr = page_address(page);
++ continue;
++ }
++ if (kaddr &&
++ kaddr + PAGE_SIZE == page_address(page)) {
+ kaddr += PAGE_SIZE;
+- else
+- kaddr = NULL;
+- } else if (!i) {
+- kaddr = page_address(page);
++ continue;
++ }
+ }
++ kaddr = NULL;
+ continue;
+ }
+ kaddr = NULL;
+@@ -233,7 +237,6 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
+ erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+ ret, rq->inputsize, inputmargin, rq->outputsize);
+
+- WARN_ON(1);
+ print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
+ 16, 1, src + inputmargin, rq->inputsize, true);
+ print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
+diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
+index a552399e211d8..638bb70d0d65b 100644
+--- a/fs/erofs/inode.c
++++ b/fs/erofs/inode.c
+@@ -192,8 +192,9 @@ static struct page *erofs_read_inode(struct inode *inode,
+ inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
+
+ inode->i_flags &= ~S_DAX;
+- if (test_opt(&sbi->ctx, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
+- vi->datalayout == EROFS_INODE_FLAT_PLAIN)
++ if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
++ (vi->datalayout == EROFS_INODE_FLAT_PLAIN ||
++ vi->datalayout == EROFS_INODE_CHUNK_BASED))
+ inode->i_flags |= S_DAX;
+ if (!nblks)
+ /* measure inode.i_blocks as generic filesystems */
+@@ -222,7 +223,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
+
+ /* if it cannot be handled with fast symlink scheme */
+ if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
+- inode->i_size >= PAGE_SIZE) {
++ inode->i_size >= PAGE_SIZE || inode->i_size < 0) {
+ inode->i_op = &erofs_symlink_iops;
+ return 0;
+ }
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index 9524e155b38fa..323e46d800e9e 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -47,7 +47,7 @@ typedef u64 erofs_off_t;
+ /* data type for filesystem-wide blocks number */
+ typedef u32 erofs_blk_t;
+
+-struct erofs_fs_context {
++struct erofs_mount_opts {
+ #ifdef CONFIG_EROFS_FS_ZIP
+ /* current strategy of how to use managed cache */
+ unsigned char cache_strategy;
+@@ -60,6 +60,10 @@ struct erofs_fs_context {
+ unsigned int mount_opt;
+ };
+
++struct erofs_fs_context {
++ struct erofs_mount_opts opt;
++};
++
+ /* all filesystem-wide lz4 configurations */
+ struct erofs_sb_lz4_info {
+ /* # of pages needed for EROFS lz4 rolling decompression */
+@@ -69,6 +73,8 @@ struct erofs_sb_lz4_info {
+ };
+
+ struct erofs_sb_info {
++ struct erofs_mount_opts opt; /* options */
++
+ #ifdef CONFIG_EROFS_FS_ZIP
+ /* list for all registered superblocks, mainly for shrinker */
+ struct list_head list;
+@@ -108,8 +114,6 @@ struct erofs_sb_info {
+ u8 volume_name[16]; /* volume name */
+ u32 feature_compat;
+ u32 feature_incompat;
+-
+- struct erofs_fs_context ctx; /* options */
+ };
+
+ #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
+@@ -121,9 +125,9 @@ struct erofs_sb_info {
+ #define EROFS_MOUNT_DAX_ALWAYS 0x00000040
+ #define EROFS_MOUNT_DAX_NEVER 0x00000080
+
+-#define clear_opt(ctx, option) ((ctx)->mount_opt &= ~EROFS_MOUNT_##option)
+-#define set_opt(ctx, option) ((ctx)->mount_opt |= EROFS_MOUNT_##option)
+-#define test_opt(ctx, option) ((ctx)->mount_opt & EROFS_MOUNT_##option)
++#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
++#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
++#define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option)
+
+ enum {
+ EROFS_ZIP_CACHE_DISABLED,
+@@ -143,7 +147,6 @@ struct erofs_workgroup {
+ atomic_t refcount;
+ };
+
+-#if defined(CONFIG_SMP)
+ static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
+ int val)
+ {
+@@ -172,34 +175,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
+ return atomic_cond_read_relaxed(&grp->refcount,
+ VAL != EROFS_LOCKED_MAGIC);
+ }
+-#else
+-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
+- int val)
+-{
+- preempt_disable();
+- /* no need to spin on UP platforms, let's just disable preemption. */
+- if (val != atomic_read(&grp->refcount)) {
+- preempt_enable();
+- return false;
+- }
+- return true;
+-}
+-
+-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
+- int orig_val)
+-{
+- preempt_enable();
+-}
+-
+-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
+-{
+- int v = atomic_read(&grp->refcount);
+-
+- /* workgroup is never freezed on uniprocessor systems */
+- DBG_BUGON(v == EROFS_LOCKED_MAGIC);
+- return v;
+-}
+-#endif /* !CONFIG_SMP */
+ #endif /* !CONFIG_EROFS_FS_ZIP */
+
+ /* we strictly follow PAGE_SIZE and no buffer head yet */
+@@ -255,7 +230,7 @@ struct erofs_inode {
+
+ unsigned char datalayout;
+ unsigned char inode_isize;
+- unsigned short xattr_isize;
++ unsigned int xattr_isize;
+
+ unsigned int xattr_shared_count;
+ unsigned int *xattr_shared_xattrs;
+diff --git a/fs/erofs/super.c b/fs/erofs/super.c
+index 11b88559f8bfa..25f6b8b37f287 100644
+--- a/fs/erofs/super.c
++++ b/fs/erofs/super.c
+@@ -340,15 +340,15 @@ out:
+ static void erofs_default_options(struct erofs_fs_context *ctx)
+ {
+ #ifdef CONFIG_EROFS_FS_ZIP
+- ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
+- ctx->max_sync_decompress_pages = 3;
+- ctx->readahead_sync_decompress = false;
++ ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
++ ctx->opt.max_sync_decompress_pages = 3;
++ ctx->opt.readahead_sync_decompress = false;
+ #endif
+ #ifdef CONFIG_EROFS_FS_XATTR
+- set_opt(ctx, XATTR_USER);
++ set_opt(&ctx->opt, XATTR_USER);
+ #endif
+ #ifdef CONFIG_EROFS_FS_POSIX_ACL
+- set_opt(ctx, POSIX_ACL);
++ set_opt(&ctx->opt, POSIX_ACL);
+ #endif
+ }
+
+@@ -392,12 +392,12 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
+ switch (mode) {
+ case EROFS_MOUNT_DAX_ALWAYS:
+ warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+- set_opt(ctx, DAX_ALWAYS);
+- clear_opt(ctx, DAX_NEVER);
++ set_opt(&ctx->opt, DAX_ALWAYS);
++ clear_opt(&ctx->opt, DAX_NEVER);
+ return true;
+ case EROFS_MOUNT_DAX_NEVER:
+- set_opt(ctx, DAX_NEVER);
+- clear_opt(ctx, DAX_ALWAYS);
++ set_opt(&ctx->opt, DAX_NEVER);
++ clear_opt(&ctx->opt, DAX_ALWAYS);
+ return true;
+ default:
+ DBG_BUGON(1);
+@@ -424,9 +424,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
+ case Opt_user_xattr:
+ #ifdef CONFIG_EROFS_FS_XATTR
+ if (result.boolean)
+- set_opt(ctx, XATTR_USER);
++ set_opt(&ctx->opt, XATTR_USER);
+ else
+- clear_opt(ctx, XATTR_USER);
++ clear_opt(&ctx->opt, XATTR_USER);
+ #else
+ errorfc(fc, "{,no}user_xattr options not supported");
+ #endif
+@@ -434,16 +434,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
+ case Opt_acl:
+ #ifdef CONFIG_EROFS_FS_POSIX_ACL
+ if (result.boolean)
+- set_opt(ctx, POSIX_ACL);
++ set_opt(&ctx->opt, POSIX_ACL);
+ else
+- clear_opt(ctx, POSIX_ACL);
++ clear_opt(&ctx->opt, POSIX_ACL);
+ #else
+ errorfc(fc, "{,no}acl options not supported");
+ #endif
+ break;
+ case Opt_cache_strategy:
+ #ifdef CONFIG_EROFS_FS_ZIP
+- ctx->cache_strategy = result.uint_32;
++ ctx->opt.cache_strategy = result.uint_32;
+ #else
+ errorfc(fc, "compression not supported, cache_strategy ignored");
+ #endif
+@@ -540,15 +540,16 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
+ return -ENOMEM;
+
+ sb->s_fs_info = sbi;
++ sbi->opt = ctx->opt;
+ sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
+ err = erofs_read_superblock(sb);
+ if (err)
+ return err;
+
+- if (test_opt(ctx, DAX_ALWAYS) &&
++ if (test_opt(&sbi->opt, DAX_ALWAYS) &&
+ !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
+ errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
+- clear_opt(ctx, DAX_ALWAYS);
++ clear_opt(&sbi->opt, DAX_ALWAYS);
+ }
+ sb->s_flags |= SB_RDONLY | SB_NOATIME;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+@@ -557,13 +558,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
+ sb->s_op = &erofs_sops;
+ sb->s_xattr = erofs_xattr_handlers;
+
+- if (test_opt(ctx, POSIX_ACL))
++ if (test_opt(&sbi->opt, POSIX_ACL))
+ sb->s_flags |= SB_POSIXACL;
+ else
+ sb->s_flags &= ~SB_POSIXACL;
+
+- sbi->ctx = *ctx;
+-
+ #ifdef CONFIG_EROFS_FS_ZIP
+ xa_init(&sbi->managed_pslots);
+ #endif
+@@ -607,12 +606,12 @@ static int erofs_fc_reconfigure(struct fs_context *fc)
+
+ DBG_BUGON(!sb_rdonly(sb));
+
+- if (test_opt(ctx, POSIX_ACL))
++ if (test_opt(&ctx->opt, POSIX_ACL))
+ fc->sb_flags |= SB_POSIXACL;
+ else
+ fc->sb_flags &= ~SB_POSIXACL;
+
+- sbi->ctx = *ctx;
++ sbi->opt = ctx->opt;
+
+ fc->sb_flags |= SB_RDONLY;
+ return 0;
+@@ -640,7 +639,6 @@ static int erofs_init_fs_context(struct fs_context *fc)
+ erofs_default_options(fc->fs_private);
+
+ fc->ops = &erofs_context_ops;
+-
+ return 0;
+ }
+
+@@ -763,31 +761,31 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+ {
+ struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
+- struct erofs_fs_context *ctx = &sbi->ctx;
++ struct erofs_mount_opts *opt = &sbi->opt;
+
+ #ifdef CONFIG_EROFS_FS_XATTR
+- if (test_opt(ctx, XATTR_USER))
++ if (test_opt(opt, XATTR_USER))
+ seq_puts(seq, ",user_xattr");
+ else
+ seq_puts(seq, ",nouser_xattr");
+ #endif
+ #ifdef CONFIG_EROFS_FS_POSIX_ACL
+- if (test_opt(ctx, POSIX_ACL))
++ if (test_opt(opt, POSIX_ACL))
+ seq_puts(seq, ",acl");
+ else
+ seq_puts(seq, ",noacl");
+ #endif
+ #ifdef CONFIG_EROFS_FS_ZIP
+- if (ctx->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
++ if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
+ seq_puts(seq, ",cache_strategy=disabled");
+- else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
++ else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
+ seq_puts(seq, ",cache_strategy=readahead");
+- else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
++ else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
+ seq_puts(seq, ",cache_strategy=readaround");
+ #endif
+- if (test_opt(ctx, DAX_ALWAYS))
++ if (test_opt(opt, DAX_ALWAYS))
+ seq_puts(seq, ",dax=always");
+- if (test_opt(ctx, DAX_NEVER))
++ if (test_opt(opt, DAX_NEVER))
+ seq_puts(seq, ",dax=never");
+ return 0;
+ }
+diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
+index bd86067a63f7f..3ca703cd5b24a 100644
+--- a/fs/erofs/utils.c
++++ b/fs/erofs/utils.c
+@@ -141,7 +141,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+ * however in order to avoid some race conditions, add a
+ * DBG_BUGON to observe this in advance.
+ */
+- DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
++ DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
+
+ /* last refcount should be connected with its managed pslot. */
+ erofs_workgroup_unfreeze(grp, 0);
+@@ -156,15 +156,19 @@ static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
+ unsigned int freed = 0;
+ unsigned long index;
+
++ xa_lock(&sbi->managed_pslots);
+ xa_for_each(&sbi->managed_pslots, index, grp) {
+ /* try to shrink each valid workgroup */
+ if (!erofs_try_to_release_workgroup(sbi, grp))
+ continue;
++ xa_unlock(&sbi->managed_pslots);
+
+ ++freed;
+ if (!--nr_shrink)
+- break;
++ return freed;
++ xa_lock(&sbi->managed_pslots);
+ }
++ xa_unlock(&sbi->managed_pslots);
+ return freed;
+ }
+
+diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
+index 778f2c52295d1..01c581e93c5f8 100644
+--- a/fs/erofs/xattr.c
++++ b/fs/erofs/xattr.c
+@@ -429,7 +429,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
+
+ static bool erofs_xattr_user_list(struct dentry *dentry)
+ {
+- return test_opt(&EROFS_SB(dentry->d_sb)->ctx, XATTR_USER);
++ return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER);
+ }
+
+ static bool erofs_xattr_trusted_list(struct dentry *dentry)
+@@ -476,7 +476,7 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
+
+ switch (handler->flags) {
+ case EROFS_XATTR_INDEX_USER:
+- if (!test_opt(&sbi->ctx, XATTR_USER))
++ if (!test_opt(&sbi->opt, XATTR_USER))
+ return -EOPNOTSUPP;
+ break;
+ case EROFS_XATTR_INDEX_TRUSTED:
+diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
+index 11c7a1aaebade..f6536b224586d 100644
+--- a/fs/erofs/zdata.c
++++ b/fs/erofs/zdata.c
+@@ -373,8 +373,8 @@ static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
+
+ /* callers must be with collection lock held */
+ static int z_erofs_attach_page(struct z_erofs_collector *clt,
+- struct page *page,
+- enum z_erofs_page_type type)
++ struct page *page, enum z_erofs_page_type type,
++ bool pvec_safereuse)
+ {
+ int ret;
+
+@@ -384,9 +384,9 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
+ z_erofs_try_inplace_io(clt, page))
+ return 0;
+
+- ret = z_erofs_pagevec_enqueue(&clt->vector, page, type);
++ ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
++ pvec_safereuse);
+ clt->cl->vcnt += (unsigned int)ret;
+-
+ return ret ? 0 : -EAGAIN;
+ }
+
+@@ -695,7 +695,7 @@ restart_now:
+ goto err_out;
+
+ /* preload all compressed pages (maybe downgrade role if necessary) */
+- if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
++ if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la))
+ cache_strategy = TRYALLOC;
+ else
+ cache_strategy = DONTALLOC;
+@@ -713,9 +713,11 @@ hitted:
+ tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
+ clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+
+- cur = end - min_t(unsigned int, offset + end - map->m_la, end);
++ cur = end - min_t(erofs_off_t, offset + end - map->m_la, end);
+ if (!(map->m_flags & EROFS_MAP_MAPPED)) {
+ zero_user_segment(page, cur, end);
++ ++spiltted;
++ tight = false;
+ goto next_part;
+ }
+
+@@ -729,7 +731,8 @@ hitted:
+ tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+
+ retry:
+- err = z_erofs_attach_page(clt, page, page_type);
++ err = z_erofs_attach_page(clt, page, page_type,
++ clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+ /* should allocate an additional short-lived page for pagevec */
+ if (err == -EAGAIN) {
+ struct page *const newpage =
+@@ -737,7 +740,7 @@ retry:
+
+ set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
+ err = z_erofs_attach_page(clt, newpage,
+- Z_EROFS_PAGE_TYPE_EXCLUSIVE);
++ Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
+ if (!err)
+ goto retry;
+ }
+@@ -796,7 +799,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
+ /* Use workqueue and sync decompression for atomic contexts only */
+ if (in_atomic() || irqs_disabled()) {
+ queue_work(z_erofs_workqueue, &io->u.work);
+- sbi->ctx.readahead_sync_decompress = true;
++ sbi->opt.readahead_sync_decompress = true;
+ return;
+ }
+ z_erofs_decompressqueue_work(&io->u.work);
+@@ -1411,8 +1414,8 @@ static void z_erofs_readahead(struct readahead_control *rac)
+ struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+
+ unsigned int nr_pages = readahead_count(rac);
+- bool sync = (sbi->ctx.readahead_sync_decompress &&
+- nr_pages <= sbi->ctx.max_sync_decompress_pages);
++ bool sync = (sbi->opt.readahead_sync_decompress &&
++ nr_pages <= sbi->opt.max_sync_decompress_pages);
+ struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+ struct page *page, *head = NULL;
+ LIST_HEAD(pagepool);
+diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
+index 7a6df35fdc915..15cac6baf6d26 100644
+--- a/fs/erofs/zmap.c
++++ b/fs/erofs/zmap.c
+@@ -191,6 +191,10 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ m->clusterofs = le16_to_cpu(di->di_clusterofs);
++ if (m->clusterofs >= 1 << vi->z_logical_clusterbits) {
++ DBG_BUGON(1);
++ return -EFSCORRUPTED;
++ }
+ m->pblk = le32_to_cpu(di->di_u.blkaddr);
+ break;
+ default:
+@@ -249,7 +253,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
+ u8 *in, type;
+ bool big_pcluster;
+
+- if (1 << amortizedshift == 4)
++ if (1 << amortizedshift == 4 && lclusterbits <= 14)
+ vcnt = 2;
+ else if (1 << amortizedshift == 2 && lclusterbits == 12)
+ vcnt = 16;
+@@ -347,7 +351,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ {
+ struct inode *const inode = m->inode;
+ struct erofs_inode *const vi = EROFS_I(inode);
+- const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
+ vi->inode_isize + vi->xattr_isize, 8) +
+ sizeof(struct z_erofs_map_header);
+@@ -357,9 +360,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ erofs_off_t pos;
+ int err;
+
+- if (lclusterbits != 12)
+- return -EOPNOTSUPP;
+-
+ if (lcn >= totalidx)
+ return -EINVAL;
+
+@@ -700,12 +700,16 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ /*
+- * No strict rule how to describe extents for post EOF, yet
+- * we need do like below. Otherwise, iomap itself will get
++ * No strict rule on how to describe extents for post EOF, yet
++ * we need to do like below. Otherwise, iomap itself will get
+ * into an endless loop on post EOF.
++ *
++ * Calculate the effective offset by subtracting extent start
++ * (map.m_la) from the requested offset, and add it to length.
++ * (NB: offset >= map.m_la always)
+ */
+ if (iomap->offset >= inode->i_size)
+- iomap->length = length + map.m_la - offset;
++ iomap->length = length + offset - map.m_la;
+ }
+ iomap->flags = 0;
+ return 0;
+diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h
+index dfd7fe0503bb1..b05464f4a8083 100644
+--- a/fs/erofs/zpvec.h
++++ b/fs/erofs/zpvec.h
+@@ -106,11 +106,18 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
+
+ static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
+ struct page *page,
+- enum z_erofs_page_type type)
++ enum z_erofs_page_type type,
++ bool pvec_safereuse)
+ {
+- if (!ctor->next && type)
+- if (ctor->index + 1 == ctor->nr)
++ if (!ctor->next) {
++ /* some pages cannot be reused as pvec safely without I/O */
++ if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
++ type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
++
++ if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
++ ctor->index + 1 == ctor->nr)
+ return false;
++ }
+
+ if (ctor->index >= ctor->nr)
+ z_erofs_pagevec_ctor_pagedown(ctor, false);
+diff --git a/fs/eventfd.c b/fs/eventfd.c
+index 3627dd7d25db8..4a60ea932e3d9 100644
+--- a/fs/eventfd.c
++++ b/fs/eventfd.c
+@@ -43,21 +43,7 @@ struct eventfd_ctx {
+ int id;
+ };
+
+-/**
+- * eventfd_signal - Adds @n to the eventfd counter.
+- * @ctx: [in] Pointer to the eventfd context.
+- * @n: [in] Value of the counter to be added to the eventfd internal counter.
+- * The value cannot be negative.
+- *
+- * This function is supposed to be called by the kernel in paths that do not
+- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+- * value, and we signal this as overflow condition by returning a EPOLLERR
+- * to poll(2).
+- *
+- * Returns the amount by which the counter was incremented. This will be less
+- * than @n if the counter has overflowed.
+- */
+-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
+ {
+ unsigned long flags;
+
+@@ -69,21 +55,40 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+ * it returns false, the eventfd_signal() call should be deferred to a
+ * safe context.
+ */
+- if (WARN_ON_ONCE(current->in_eventfd_signal))
++ if (WARN_ON_ONCE(current->in_eventfd))
+ return 0;
+
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+- current->in_eventfd_signal = 1;
++ current->in_eventfd = 1;
+ if (ULLONG_MAX - ctx->count < n)
+ n = ULLONG_MAX - ctx->count;
+ ctx->count += n;
+ if (waitqueue_active(&ctx->wqh))
+- wake_up_locked_poll(&ctx->wqh, EPOLLIN);
+- current->in_eventfd_signal = 0;
++ wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask);
++ current->in_eventfd = 0;
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+
+ return n;
+ }
++
++/**
++ * eventfd_signal - Adds @n to the eventfd counter.
++ * @ctx: [in] Pointer to the eventfd context.
++ * @n: [in] Value of the counter to be added to the eventfd internal counter.
++ * The value cannot be negative.
++ *
++ * This function is supposed to be called by the kernel in paths that do not
++ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
++ * value, and we signal this as overflow condition by returning a EPOLLERR
++ * to poll(2).
++ *
++ * Returns the amount by which the counter was incremented. This will be less
++ * than @n if the counter has overflowed.
++ */
++__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
++{
++ return eventfd_signal_mask(ctx, n, 0);
++}
+ EXPORT_SYMBOL_GPL(eventfd_signal);
+
+ static void eventfd_free_ctx(struct eventfd_ctx *ctx)
+@@ -184,7 +189,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+ {
+ lockdep_assert_held(&ctx->wqh.lock);
+
+- *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
++ *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
+ ctx->count -= *cnt;
+ }
+ EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
+@@ -253,8 +258,10 @@ static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
+ __set_current_state(TASK_RUNNING);
+ }
+ eventfd_ctx_do_read(ctx, &ucnt);
++ current->in_eventfd = 1;
+ if (waitqueue_active(&ctx->wqh))
+ wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
++ current->in_eventfd = 0;
+ spin_unlock_irq(&ctx->wqh.lock);
+ if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
+ return -EFAULT;
+@@ -301,8 +308,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
+ }
+ if (likely(res > 0)) {
+ ctx->count += ucnt;
++ current->in_eventfd = 1;
+ if (waitqueue_active(&ctx->wqh))
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
++ current->in_eventfd = 0;
+ }
+ spin_unlock_irq(&ctx->wqh.lock);
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 06f4c5ae1451e..1c254094c4c36 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -484,7 +484,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
+ */
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
++ unsigned pollflags)
+ {
+ struct eventpoll *ep_src;
+ unsigned long flags;
+@@ -515,16 +516,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
+ }
+ spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
+ ep->nests = nests + 1;
+- wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
++ wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags);
+ ep->nests = 0;
+ spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
+ }
+
+ #else
+
+-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
++ unsigned pollflags)
+ {
+- wake_up_poll(&ep->poll_wait, EPOLLIN);
++ wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
+ }
+
+ #endif
+@@ -735,7 +737,7 @@ static void ep_free(struct eventpoll *ep)
+
+ /* We need to release all tasks waiting for these file */
+ if (waitqueue_active(&ep->poll_wait))
+- ep_poll_safewake(ep, NULL);
++ ep_poll_safewake(ep, NULL, 0);
+
+ /*
+ * We need to lock this because we could be hit by
+@@ -1201,7 +1203,7 @@ out_unlock:
+
+ /* We have to call this outside the lock */
+ if (pwake)
+- ep_poll_safewake(ep, epi);
++ ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
+
+ if (!(epi->event.events & EPOLLEXCLUSIVE))
+ ewake = 1;
+@@ -1546,7 +1548,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
+
+ /* We have to call this outside the lock */
+ if (pwake)
+- ep_poll_safewake(ep, NULL);
++ ep_poll_safewake(ep, NULL, 0);
+
+ return 0;
+ }
+@@ -1622,7 +1624,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
+
+ /* We have to call this outside the lock */
+ if (pwake)
+- ep_poll_safewake(ep, NULL);
++ ep_poll_safewake(ep, NULL, 0);
+
+ return 0;
+ }
+@@ -1740,6 +1742,25 @@ static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms)
+ return to;
+ }
+
++/*
++ * autoremove_wake_function, but remove even on failure to wake up, because we
++ * know that default_wake_function/ttwu will only fail if the thread is already
++ * woken, and in that case the ep_poll loop will remove the entry anyways, not
++ * try to reuse it.
++ */
++static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
++ unsigned int mode, int sync, void *key)
++{
++ int ret = default_wake_function(wq_entry, mode, sync, key);
++
++ /*
++ * Pairs with list_empty_careful in ep_poll, and ensures future loop
++ * iterations see the cause of this wakeup.
++ */
++ list_del_init_careful(&wq_entry->entry);
++ return ret;
++}
++
+ /**
+ * ep_poll - Retrieves ready events, and delivers them to the caller-supplied
+ * event buffer.
+@@ -1821,8 +1842,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
+ * normal wakeup path no need to call __remove_wait_queue()
+ * explicitly, thus ep->lock is not taken, which halts the
+ * event delivery.
++ *
++ * In fact, we now use an even more aggressive function that
++ * unconditionally removes, because we don't reuse the wait
++ * entry between loop iterations. This lets us also avoid the
++ * performance issue if a process is killed, causing all of its
++ * threads to wake up without being removed normally.
+ */
+ init_wait(&wait);
++ wait.func = ep_autoremove_wake_function;
+
+ write_lock_irq(&ep->lock);
+ /*
+diff --git a/fs/exec.c b/fs/exec.c
+index a098c133d8d74..881390b44cfdc 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -494,8 +494,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
+ * the stack. They aren't stored until much later when we can't
+ * signal to the parent that the child has run out of stack space.
+ * Instead, calculate it here so it's possible to fail gracefully.
++ *
++ * In the case of argc = 0, make sure there is space for adding a
++ * empty string (which will bump argc to 1), to ensure confused
++ * userspace programs don't start processing from argv[1], thinking
++ * argc can never be 0, to keep them from walking envp by accident.
++ * See do_execveat_common().
+ */
+- ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
++ ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
+ if (limit <= ptr_size)
+ return -E2BIG;
+ limit -= ptr_size;
+@@ -1192,11 +1198,11 @@ static int unshare_sighand(struct task_struct *me)
+ return -ENOMEM;
+
+ refcount_set(&newsighand->count, 1);
+- memcpy(newsighand->action, oldsighand->action,
+- sizeof(newsighand->action));
+
+ write_lock_irq(&tasklist_lock);
+ spin_lock(&oldsighand->siglock);
++ memcpy(newsighand->action, oldsighand->action,
++ sizeof(newsighand->action));
+ rcu_assign_pointer(me->sighand, newsighand);
+ spin_unlock(&oldsighand->siglock);
+ write_unlock_irq(&tasklist_lock);
+@@ -1292,7 +1298,10 @@ int begin_new_exec(struct linux_binprm * bprm)
+ bprm->mm = NULL;
+
+ #ifdef CONFIG_POSIX_TIMERS
+- exit_itimers(me->signal);
++ spin_lock_irq(&me->sighand->siglock);
++ posix_cpu_timers_exit(me);
++ spin_unlock_irq(&me->sighand->siglock);
++ exit_itimers(me);
+ flush_itimer_signals();
+ #endif
+
+@@ -1852,7 +1861,7 @@ out:
+ * SIGSEGV.
+ */
+ if (bprm->point_of_no_return && !fatal_signal_pending(current))
+- force_sigsegv(SIGSEGV);
++ force_fatal_sig(SIGSEGV);
+
+ out_unmark:
+ current->fs->in_exec = 0;
+@@ -1895,6 +1904,9 @@ static int do_execveat_common(int fd, struct filename *filename,
+ }
+
+ retval = count(argv, MAX_ARG_STRINGS);
++ if (retval == 0)
++ pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
++ current->comm, bprm->filename);
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = retval;
+@@ -1921,6 +1933,19 @@ static int do_execveat_common(int fd, struct filename *filename,
+ if (retval < 0)
+ goto out_free;
+
++ /*
++ * When argv is empty, add an empty string ("") as argv[0] to
++ * ensure confused userspace programs that start processing
++ * from argv[1] won't end up walking envp. See also
++ * bprm_stack_limits().
++ */
++ if (bprm->argc == 0) {
++ retval = copy_string_kernel("", bprm);
++ if (retval < 0)
++ goto out_free;
++ bprm->argc = 1;
++ }
++
+ retval = bprm_execve(bprm, fd, filename, flags);
+ out_free:
+ free_bprm(bprm);
+@@ -1949,6 +1974,8 @@ int kernel_execve(const char *kernel_filename,
+ }
+
+ retval = count_strings_kernel(argv);
++ if (WARN_ON_ONCE(retval == 0))
++ retval = -EINVAL;
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = retval;
+diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
+index cc5cffc4a7691..1dce6b4e90885 100644
+--- a/fs/exfat/balloc.c
++++ b/fs/exfat/balloc.c
+@@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
+ }
+ sbi->map_sectors = ((need_map_size - 1) >>
+ (sb->s_blocksize_bits)) + 1;
+- sbi->vol_amap = kmalloc_array(sbi->map_sectors,
++ sbi->vol_amap = kvmalloc_array(sbi->map_sectors,
+ sizeof(struct buffer_head *), GFP_KERNEL);
+ if (!sbi->vol_amap)
+ return -ENOMEM;
+@@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
+ while (j < i)
+ brelse(sbi->vol_amap[j++]);
+
+- kfree(sbi->vol_amap);
++ kvfree(sbi->vol_amap);
+ sbi->vol_amap = NULL;
+ return -EIO;
+ }
+@@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi)
+ for (i = 0; i < sbi->map_sectors; i++)
+ __brelse(sbi->vol_amap[i]);
+
+- kfree(sbi->vol_amap);
++ kvfree(sbi->vol_amap);
+ }
+
+ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
+@@ -148,7 +148,9 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
+ struct super_block *sb = inode->i_sb;
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
+
+- WARN_ON(clu < EXFAT_FIRST_CLUSTER);
++ if (!is_valid_cluster(sbi, clu))
++ return -EINVAL;
++
+ ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
+ i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
+ b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+@@ -166,7 +168,9 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ struct exfat_mount_options *opts = &sbi->options;
+
+- WARN_ON(clu < EXFAT_FIRST_CLUSTER);
++ if (!is_valid_cluster(sbi, clu))
++ return;
++
+ ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
+ i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
+ b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
+index cb1c0d8c17141..f6dd4fc8eaf45 100644
+--- a/fs/exfat/dir.c
++++ b/fs/exfat/dir.c
+@@ -34,6 +34,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
+ {
+ int i;
+ struct exfat_entry_set_cache *es;
++ unsigned int uni_len = 0, len;
+
+ es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES);
+ if (!es)
+@@ -52,7 +53,10 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
+ if (exfat_get_entry_type(ep) != TYPE_EXTEND)
+ break;
+
+- exfat_extract_uni_name(ep, uniname);
++ len = exfat_extract_uni_name(ep, uniname);
++ uni_len += len;
++ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
++ break;
+ uniname += EXFAT_FILE_NAME_LEN;
+ }
+
+@@ -103,7 +107,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
+ clu.dir = ei->hint_bmap.clu;
+ }
+
+- while (clu_offset > 0) {
++ while (clu_offset > 0 && clu.dir != EXFAT_EOF_CLUSTER) {
+ if (exfat_get_next_cluster(sb, &(clu.dir)))
+ return -EIO;
+
+@@ -211,7 +215,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb)
+ exfat_init_namebuf(nb);
+ }
+
+-/* skip iterating emit_dots when dir is empty */
++/*
++ * Before calling dir_emit*(), sbi->s_lock should be released
++ * because page fault can occur in dir_emit*().
++ */
+ #define ITER_POS_FILLED_DOTS (2)
+ static int exfat_iterate(struct file *filp, struct dir_context *ctx)
+ {
+@@ -226,35 +233,33 @@ static int exfat_iterate(struct file *filp, struct dir_context *ctx)
+ int err = 0, fake_offset = 0;
+
+ exfat_init_namebuf(nb);
+- mutex_lock(&EXFAT_SB(sb)->s_lock);
+
+ cpos = ctx->pos;
+ if (!dir_emit_dots(filp, ctx))
+- goto unlock;
++ goto out;
+
+ if (ctx->pos == ITER_POS_FILLED_DOTS) {
+ cpos = 0;
+ fake_offset = 1;
+ }
+
+- if (cpos & (DENTRY_SIZE - 1)) {
+- err = -ENOENT;
+- goto unlock;
+- }
++ cpos = round_up(cpos, DENTRY_SIZE);
+
+ /* name buffer should be allocated before use */
+ err = exfat_alloc_namebuf(nb);
+ if (err)
+- goto unlock;
++ goto out;
+ get_new:
++ mutex_lock(&EXFAT_SB(sb)->s_lock);
++
+ if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode))
+ goto end_of_dir;
+
+ err = exfat_readdir(inode, &cpos, &de);
+ if (err) {
+ /*
+- * At least we tried to read a sector. Move cpos to next sector
+- * position (should be aligned).
++ * At least we tried to read a sector.
++ * Move cpos to next sector position (should be aligned).
+ */
+ if (err == -EIO) {
+ cpos += 1 << (sb->s_blocksize_bits);
+@@ -277,16 +282,10 @@ get_new:
+ inum = iunique(sb, EXFAT_ROOT_INO);
+ }
+
+- /*
+- * Before calling dir_emit(), sb_lock should be released.
+- * Because page fault can occur in dir_emit() when the size
+- * of buffer given from user is larger than one page size.
+- */
+ mutex_unlock(&EXFAT_SB(sb)->s_lock);
+ if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
+ (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
+- goto out_unlocked;
+- mutex_lock(&EXFAT_SB(sb)->s_lock);
++ goto out;
+ ctx->pos = cpos;
+ goto get_new;
+
+@@ -294,9 +293,8 @@ end_of_dir:
+ if (!cpos && fake_offset)
+ cpos = ITER_POS_FILLED_DOTS;
+ ctx->pos = cpos;
+-unlock:
+ mutex_unlock(&EXFAT_SB(sb)->s_lock);
+-out_unlocked:
++out:
+ /*
+ * To improve performance, free namebuf after unlock sb_lock.
+ * If namebuf is not allocated, this function do nothing
+@@ -1038,7 +1036,8 @@ rewind:
+ if (entry_type == TYPE_EXTEND) {
+ unsigned short entry_uniname[16], unichar;
+
+- if (step != DIRENT_STEP_NAME) {
++ if (step != DIRENT_STEP_NAME ||
++ name_len >= MAX_NAME_LENGTH) {
+ step = DIRENT_STEP_FILE;
+ continue;
+ }
+diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
+index 1d6da61157c93..db538709dafa0 100644
+--- a/fs/exfat/exfat_fs.h
++++ b/fs/exfat/exfat_fs.h
+@@ -42,7 +42,7 @@ enum {
+ #define ES_2_ENTRIES 2
+ #define ES_ALL_ENTRIES 0
+
+-#define DIR_DELETED 0xFFFF0321
++#define DIR_DELETED 0xFFFFFFF7
+
+ /* type values */
+ #define TYPE_UNUSED 0x0000
+@@ -381,6 +381,14 @@ static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi,
+ EXFAT_RESERVED_CLUSTERS;
+ }
+
++static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
++ unsigned int clus)
++{
++ if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus)
++ return false;
++ return true;
++}
++
+ /* super.c */
+ int exfat_set_volume_dirty(struct super_block *sb);
+ int exfat_clear_volume_dirty(struct super_block *sb);
+diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
+index e949e563443c9..421c273531049 100644
+--- a/fs/exfat/fatent.c
++++ b/fs/exfat/fatent.c
+@@ -81,14 +81,6 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc,
+ return 0;
+ }
+
+-static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
+- unsigned int clus)
+-{
+- if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus)
+- return false;
+- return true;
+-}
+-
+ int exfat_ent_get(struct super_block *sb, unsigned int loc,
+ unsigned int *content)
+ {
+diff --git a/fs/exfat/file.c b/fs/exfat/file.c
+index 6af0191b648f1..c40082ae3bd1a 100644
+--- a/fs/exfat/file.c
++++ b/fs/exfat/file.c
+@@ -110,8 +110,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
+ exfat_set_volume_dirty(sb);
+
+ num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi);
+- num_clusters_phys =
+- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi);
++ num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
+
+ exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
+
+@@ -228,12 +227,13 @@ void exfat_truncate(struct inode *inode, loff_t size)
+ {
+ struct super_block *sb = inode->i_sb;
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
++ struct exfat_inode_info *ei = EXFAT_I(inode);
+ unsigned int blocksize = i_blocksize(inode);
+ loff_t aligned_size;
+ int err;
+
+ mutex_lock(&sbi->s_lock);
+- if (EXFAT_I(inode)->start_clu == 0) {
++ if (ei->start_clu == 0) {
+ /*
+ * Empty start_clu != ~0 (not allocated)
+ */
+@@ -251,8 +251,7 @@ void exfat_truncate(struct inode *inode, loff_t size)
+ else
+ mark_inode_dirty(inode);
+
+- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
+- ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
+ write_size:
+ aligned_size = i_size_read(inode);
+ if (aligned_size & (blocksize - 1)) {
+@@ -260,11 +259,11 @@ write_size:
+ aligned_size++;
+ }
+
+- if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode))
+- EXFAT_I(inode)->i_size_ondisk = aligned_size;
++ if (ei->i_size_ondisk > i_size_read(inode))
++ ei->i_size_ondisk = aligned_size;
+
+- if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode))
+- EXFAT_I(inode)->i_size_aligned = aligned_size;
++ if (ei->i_size_aligned > i_size_read(inode))
++ ei->i_size_aligned = aligned_size;
+ mutex_unlock(&sbi->s_lock);
+ }
+
+diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
+index ca37d43443612..23d8c364edff9 100644
+--- a/fs/exfat/inode.c
++++ b/fs/exfat/inode.c
+@@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+ unsigned int local_clu_offset = clu_offset;
+ unsigned int num_to_be_allocated = 0, num_clusters = 0;
+
+- if (EXFAT_I(inode)->i_size_ondisk > 0)
++ if (ei->i_size_ondisk > 0)
+ num_clusters =
+- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk,
+- sbi);
++ EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
+
+ if (clu_offset >= num_clusters)
+ num_to_be_allocated = clu_offset - num_clusters + 1;
+@@ -244,8 +243,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+ return err;
+ } /* end of if != DIR_DELETED */
+
+- inode->i_blocks +=
+- num_to_be_allocated << sbi->sect_per_clus_bits;
++ inode->i_blocks += EXFAT_CLU_TO_B(num_to_be_allocated, sbi) >> 9;
+
+ /*
+ * Move *clu pointer along FAT chains (hole care) because the
+@@ -416,10 +414,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
+
+ err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+
+- if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) {
++ if (ei->i_size_aligned < i_size_read(inode)) {
+ exfat_fs_error(inode->i_sb,
+ "invalid size(size(%llu) > aligned(%llu)\n",
+- i_size_read(inode), EXFAT_I(inode)->i_size_aligned);
++ i_size_read(inode), ei->i_size_aligned);
+ return -EIO;
+ }
+
+@@ -603,8 +601,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
+
+ exfat_save_attr(inode, info->attr);
+
+- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
+- ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
+ inode->i_mtime = info->mtime;
+ inode->i_ctime = info->mtime;
+ ei->i_crtime = info->crtime;
+diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
+index 24b41103d1cc0..b22d6c984f8c7 100644
+--- a/fs/exfat/namei.c
++++ b/fs/exfat/namei.c
+@@ -395,10 +395,10 @@ static int exfat_find_empty_entry(struct inode *inode,
+
+ /* directory inode should be updated in here */
+ i_size_write(inode, size);
+- EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size;
+- EXFAT_I(inode)->i_size_aligned += sbi->cluster_size;
+- EXFAT_I(inode)->flags = p_dir->flags;
+- inode->i_blocks += 1 << sbi->sect_per_clus_bits;
++ ei->i_size_ondisk += sbi->cluster_size;
++ ei->i_size_aligned += sbi->cluster_size;
++ ei->flags = p_dir->flags;
++ inode->i_blocks += sbi->cluster_size >> 9;
+ }
+
+ return dentry;
+@@ -1069,6 +1069,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
+
+ exfat_remove_entries(inode, p_dir, oldentry, 0,
+ num_old_entries);
++ ei->dir = *p_dir;
+ ei->entry = newentry;
+ } else {
+ if (exfat_get_entry_type(epold) == TYPE_FILE) {
+@@ -1159,28 +1160,6 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
+ return 0;
+ }
+
+-static void exfat_update_parent_info(struct exfat_inode_info *ei,
+- struct inode *parent_inode)
+-{
+- struct exfat_sb_info *sbi = EXFAT_SB(parent_inode->i_sb);
+- struct exfat_inode_info *parent_ei = EXFAT_I(parent_inode);
+- loff_t parent_isize = i_size_read(parent_inode);
+-
+- /*
+- * the problem that struct exfat_inode_info caches wrong parent info.
+- *
+- * because of flag-mismatch of ei->dir,
+- * there is abnormal traversing cluster chain.
+- */
+- if (unlikely(parent_ei->flags != ei->dir.flags ||
+- parent_isize != EXFAT_CLU_TO_B(ei->dir.size, sbi) ||
+- parent_ei->start_clu != ei->dir.dir)) {
+- exfat_chain_set(&ei->dir, parent_ei->start_clu,
+- EXFAT_B_TO_CLU_ROUND_UP(parent_isize, sbi),
+- parent_ei->flags);
+- }
+-}
+-
+ /* rename or move a old file into a new file */
+ static int __exfat_rename(struct inode *old_parent_inode,
+ struct exfat_inode_info *ei, struct inode *new_parent_inode,
+@@ -1211,9 +1190,9 @@ static int __exfat_rename(struct inode *old_parent_inode,
+ return -ENOENT;
+ }
+
+- exfat_update_parent_info(ei, old_parent_inode);
+-
+- exfat_chain_dup(&olddir, &ei->dir);
++ exfat_chain_set(&olddir, EXFAT_I(old_parent_inode)->start_clu,
++ EXFAT_B_TO_CLU_ROUND_UP(i_size_read(old_parent_inode), sbi),
++ EXFAT_I(old_parent_inode)->flags);
+ dentry = ei->entry;
+
+ ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh, NULL);
+@@ -1233,8 +1212,6 @@ static int __exfat_rename(struct inode *old_parent_inode,
+ goto out;
+ }
+
+- exfat_update_parent_info(new_ei, new_parent_inode);
+-
+ p_dir = &(new_ei->dir);
+ new_entry = new_ei->entry;
+ ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL);
+diff --git a/fs/exfat/super.c b/fs/exfat/super.c
+index 5539ffc20d164..822976236f446 100644
+--- a/fs/exfat/super.c
++++ b/fs/exfat/super.c
+@@ -364,11 +364,10 @@ static int exfat_read_root(struct inode *inode)
+ inode->i_op = &exfat_dir_inode_operations;
+ inode->i_fop = &exfat_dir_operations;
+
+- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1))
+- & ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
+- EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
+- EXFAT_I(inode)->i_size_aligned = i_size_read(inode);
+- EXFAT_I(inode)->i_size_ondisk = i_size_read(inode);
++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
++ ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
++ ei->i_size_aligned = i_size_read(inode);
++ ei->i_size_ondisk = i_size_read(inode);
+
+ exfat_save_attr(inode, ATTR_SUBDIR);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
+diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
+index 0106eba46d5af..3ef80d000e13d 100644
+--- a/fs/exportfs/expfs.c
++++ b/fs/exportfs/expfs.c
+@@ -145,7 +145,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
+ if (err)
+ goto out_err;
+ dprintk("%s: found name: %s\n", __func__, nbuf);
+- tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf));
++ tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf));
+ if (IS_ERR(tmp)) {
+ dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+ err = PTR_ERR(tmp);
+@@ -525,7 +525,8 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
+ }
+
+ inode_lock(target_dir->d_inode);
+- nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf));
++ nresult = lookup_one(mnt_user_ns(mnt), nbuf,
++ target_dir, strlen(nbuf));
+ if (!IS_ERR(nresult)) {
+ if (unlikely(nresult->d_inode != result->d_inode)) {
+ dput(nresult);
+diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
+index 3be9dd6412b78..5207ce805a399 100644
+--- a/fs/ext2/ext2.h
++++ b/fs/ext2/ext2.h
+@@ -70,10 +70,7 @@ struct mb_cache;
+ * second extended-fs super-block data in memory
+ */
+ struct ext2_sb_info {
+- unsigned long s_frag_size; /* Size of a fragment in bytes */
+- unsigned long s_frags_per_block;/* Number of fragments per block */
+ unsigned long s_inodes_per_block;/* Number of inodes per block */
+- unsigned long s_frags_per_group;/* Number of fragments in a group */
+ unsigned long s_blocks_per_group;/* Number of blocks in a group */
+ unsigned long s_inodes_per_group;/* Number of inodes in a group */
+ unsigned long s_itb_per_group; /* Number of inode table blocks per group */
+@@ -179,6 +176,7 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
+ #define EXT2_MIN_BLOCK_SIZE 1024
+ #define EXT2_MAX_BLOCK_SIZE 4096
+ #define EXT2_MIN_BLOCK_LOG_SIZE 10
++#define EXT2_MAX_BLOCK_LOG_SIZE 16
+ #define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize)
+ #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
+ #define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
+@@ -186,15 +184,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
+ #define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size)
+ #define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino)
+
+-/*
+- * Macro-instructions used to manage fragments
+- */
+-#define EXT2_MIN_FRAG_SIZE 1024
+-#define EXT2_MAX_FRAG_SIZE 4096
+-#define EXT2_MIN_FRAG_LOG_SIZE 10
+-#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size)
+-#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block)
+-
+ /*
+ * Structure of a blocks group descriptor
+ */
+diff --git a/fs/ext2/super.c b/fs/ext2/super.c
+index d8d580b609baa..81798b7cbde2d 100644
+--- a/fs/ext2/super.c
++++ b/fs/ext2/super.c
+@@ -163,7 +163,7 @@ static void ext2_put_super (struct super_block * sb)
+ db_count = sbi->s_gdb_count;
+ for (i = 0; i < db_count; i++)
+ brelse(sbi->s_group_desc[i]);
+- kfree(sbi->s_group_desc);
++ kvfree(sbi->s_group_desc);
+ kfree(sbi->s_debts);
+ percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+@@ -670,10 +670,9 @@ static int ext2_setup_super (struct super_block * sb,
+ es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
+ le16_add_cpu(&es->s_mnt_count, 1);
+ if (test_opt (sb, DEBUG))
+- ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
++ ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, "
+ "bpg=%lu, ipg=%lu, mo=%04lx]",
+ EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
+- sbi->s_frag_size,
+ sbi->s_groups_count,
+ EXT2_BLOCKS_PER_GROUP(sb),
+ EXT2_INODES_PER_GROUP(sb),
+@@ -753,8 +752,12 @@ static loff_t ext2_max_size(int bits)
+ res += 1LL << (bits-2);
+ res += 1LL << (2*(bits-2));
+ res += 1LL << (3*(bits-2));
++ /* Compute how many metadata blocks are needed */
++ meta_blocks = 1;
++ meta_blocks += 1 + ppb;
++ meta_blocks += 1 + ppb + ppb * ppb;
+ /* Does block tree limit file size? */
+- if (res < upper_limit)
++ if (res + meta_blocks <= upper_limit)
+ goto check_lfs;
+
+ res = upper_limit;
+@@ -943,6 +946,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
+ goto failed_mount;
+ }
+
++ if (le32_to_cpu(es->s_log_block_size) >
++ (EXT2_MAX_BLOCK_LOG_SIZE - BLOCK_SIZE_BITS)) {
++ ext2_msg(sb, KERN_ERR,
++ "Invalid log block size: %u",
++ le32_to_cpu(es->s_log_block_size));
++ goto failed_mount;
++ }
+ blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+ if (test_opt(sb, DAX)) {
+@@ -1001,14 +1011,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
+ }
+ }
+
+- sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
+- le32_to_cpu(es->s_log_frag_size);
+- if (sbi->s_frag_size == 0)
+- goto cantfind_ext2;
+- sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
+-
+ sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+- sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
+ sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+
+ sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
+@@ -1034,11 +1037,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
+ goto failed_mount;
+ }
+
+- if (sb->s_blocksize != sbi->s_frag_size) {
++ if (es->s_log_frag_size != es->s_log_block_size) {
+ ext2_msg(sb, KERN_ERR,
+- "error: fragsize %lu != blocksize %lu"
+- "(not supported yet)",
+- sbi->s_frag_size, sb->s_blocksize);
++ "error: fragsize log %u != blocksize log %u",
++ le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits);
+ goto failed_mount;
+ }
+
+@@ -1048,15 +1050,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
+ sbi->s_blocks_per_group);
+ goto failed_mount;
+ }
+- if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
+- ext2_msg(sb, KERN_ERR,
+- "error: #fragments per group too big: %lu",
+- sbi->s_frags_per_group);
+- goto failed_mount;
+- }
+- if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
++ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
++ sbi->s_inodes_per_group > sb->s_blocksize * 8) {
+ ext2_msg(sb, KERN_ERR,
+- "error: #inodes per group too big: %lu",
++ "error: invalid #inodes per group: %lu",
+ sbi->s_inodes_per_group);
+ goto failed_mount;
+ }
+@@ -1066,9 +1063,16 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
+ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
+ le32_to_cpu(es->s_first_data_block) - 1)
+ / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
++ if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group !=
++ le32_to_cpu(es->s_inodes_count)) {
++ ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu",
++ le32_to_cpu(es->s_inodes_count),
++ (u64)sbi->s_groups_count * sbi->s_inodes_per_group);
++ goto failed_mount;
++ }
+ db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
+ EXT2_DESC_PER_BLOCK(sb);
+- sbi->s_group_desc = kmalloc_array(db_count,
++ sbi->s_group_desc = kvmalloc_array(db_count,
+ sizeof(struct buffer_head *),
+ GFP_KERNEL);
+ if (sbi->s_group_desc == NULL) {
+@@ -1194,7 +1198,7 @@ failed_mount2:
+ for (i = 0; i < db_count; i++)
+ brelse(sbi->s_group_desc[i]);
+ failed_mount_group_desc:
+- kfree(sbi->s_group_desc);
++ kvfree(sbi->s_group_desc);
+ kfree(sbi->s_debts);
+ failed_mount:
+ brelse(bh);
+diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
+index 0613dfcbfd4aa..5a35768d6149a 100644
+--- a/fs/ext4/acl.c
++++ b/fs/ext4/acl.c
+@@ -246,7 +246,6 @@ retry:
+ handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+- ext4_fc_start_update(inode);
+
+ if ((type == ACL_TYPE_ACCESS) && acl) {
+ error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl);
+@@ -264,7 +263,6 @@ retry:
+ }
+ out_stop:
+ ext4_journal_stop(handle);
+- ext4_fc_stop_update(inode);
+ if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
+ return error;
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index a0fb0c4bdc7cd..c23ac149601e5 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -303,6 +303,36 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
+ return desc;
+ }
+
++static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
++ ext4_group_t block_group,
++ struct buffer_head *bh)
++{
++ ext4_grpblk_t next_zero_bit;
++ unsigned long bitmap_size = sb->s_blocksize * 8;
++ unsigned int offset = num_clusters_in_group(sb, block_group);
++
++ if (bitmap_size <= offset)
++ return 0;
++
++ next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
++
++ return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
++}
++
++struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
++ ext4_group_t group)
++{
++ struct ext4_group_info **grp_info;
++ long indexv, indexh;
++
++ if (unlikely(group >= EXT4_SB(sb)->s_groups_count))
++ return NULL;
++ indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
++ indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
++ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
++ return grp_info[indexh];
++}
++
+ /*
+ * Return the block number which was discovered to be invalid, or 0 if
+ * the block bitmap is valid.
+@@ -377,7 +407,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
+
+ if (buffer_verified(bh))
+ return 0;
+- if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
++ if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ return -EFSCORRUPTED;
+
+ ext4_lock_group(sb, block_group);
+@@ -401,6 +431,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ return -EFSCORRUPTED;
+ }
++ blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
++ if (unlikely(blk != 0)) {
++ ext4_unlock_group(sb, block_group);
++ ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
++ block_group, blk);
++ ext4_mark_group_bitmap_corrupted(sb, block_group,
++ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
++ return -EFSCORRUPTED;
++ }
+ set_buffer_verified(bh);
+ verified:
+ ext4_unlock_group(sb, block_group);
+@@ -665,7 +704,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
+ * it's possible we've just missed a transaction commit here,
+ * so ignore the returned status
+ */
+- jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
++ ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
+ (void) jbd2_journal_force_commit_nested(sbi->s_journal);
+ return 1;
+ }
+@@ -870,11 +909,11 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
+ }
+
+ /*
+- * This function returns the number of file system metadata clusters at
++ * This function returns the number of file system metadata blocks at
+ * the beginning of a block group, including the reserved gdt blocks.
+ */
+-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+- ext4_group_t block_group)
++unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
++ ext4_group_t block_group)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned num;
+@@ -892,8 +931,15 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+ } else { /* For META_BG_BLOCK_GROUPS */
+ num += ext4_bg_num_gdb(sb, block_group);
+ }
+- return EXT4_NUM_B2C(sbi, num);
++ return num;
++}
++
++static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
++ ext4_group_t block_group)
++{
++ return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
+ }
++
+ /**
+ * ext4_inode_to_goal_block - return a hint for block allocation
+ * @inode: inode for block allocation
+diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
+index 4666b55b736ec..6fe3c941b5651 100644
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_block *sb)
+ struct ext4_system_blocks *system_blks;
+ struct ext4_group_desc *gdp;
+ ext4_group_t i;
+- int flex_size = ext4_flex_bg_size(sbi);
+ int ret;
+
+ system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
+@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_block *sb)
+ return -ENOMEM;
+
+ for (i=0; i < ngroups; i++) {
++ unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
++
+ cond_resched();
+- if (ext4_bg_has_super(sb, i) &&
+- ((i < 5) || ((i % flex_size) == 0))) {
++ if (meta_blks != 0) {
+ ret = add_system_zone(system_blks,
+ ext4_group_first_block_no(sb, i),
+- ext4_bg_num_gdb(sb, i) + 1, 0);
++ meta_blks, 0);
+ if (ret)
+ goto err;
+ }
+@@ -292,15 +292,10 @@ void ext4_release_system_zone(struct super_block *sb)
+ call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
+ }
+
+-/*
+- * Returns 1 if the passed-in block region (start_blk,
+- * start_blk+count) is valid; 0 if some part of the block region
+- * overlaps with some other filesystem metadata blocks.
+- */
+-int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
+- unsigned int count)
++int ext4_sb_block_valid(struct super_block *sb, struct inode *inode,
++ ext4_fsblk_t start_blk, unsigned int count)
+ {
+- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
++ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_system_blocks *system_blks;
+ struct ext4_system_zone *entry;
+ struct rb_node *n;
+@@ -329,7 +324,9 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
+ else if (start_blk >= (entry->start_blk + entry->count))
+ n = n->rb_right;
+ else {
+- ret = (entry->ino == inode->i_ino);
++ ret = 0;
++ if (inode)
++ ret = (entry->ino == inode->i_ino);
+ break;
+ }
+ }
+@@ -338,6 +335,17 @@ out_rcu:
+ return ret;
+ }
+
++/*
++ * Returns 1 if the passed-in block region (start_blk,
++ * start_blk+count) is valid; 0 if some part of the block region
++ * overlaps with some other filesystem metadata blocks.
++ */
++int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
++ unsigned int count)
++{
++ return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count);
++}
++
+ int ext4_check_blockref(const char *function, unsigned int line,
+ struct inode *inode, __le32 *p, unsigned int max)
+ {
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 3825195539d74..976cb4b3ff660 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -559,7 +559,7 @@ enum {
+ *
+ * It's not paranoia if the Murphy's Law really *is* out to get you. :-)
+ */
+-#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
++#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG))
+ #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
+
+ static inline void ext4_check_flag_values(void)
+@@ -995,11 +995,13 @@ do { \
+ * where the second inode has larger inode number
+ * than the first
+ * I_DATA_SEM_QUOTA - Used for quota inodes only
++ * I_DATA_SEM_EA - Used for ea_inodes only
+ */
+ enum {
+ I_DATA_SEM_NORMAL = 0,
+ I_DATA_SEM_OTHER,
+ I_DATA_SEM_QUOTA,
++ I_DATA_SEM_EA
+ };
+
+
+@@ -1435,12 +1437,6 @@ struct ext4_super_block {
+
+ #ifdef __KERNEL__
+
+-#ifdef CONFIG_FS_ENCRYPTION
+-#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL)
+-#else
+-#define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
+-#endif
+-
+ /* Number of quota types we support */
+ #define EXT4_MAXQUOTAS 3
+
+@@ -1724,9 +1720,9 @@ struct ext4_sb_info {
+ */
+ struct work_struct s_error_work;
+
+- /* Ext4 fast commit stuff */
++ /* Ext4 fast commit sub transaction ID */
+ atomic_t s_fc_subtid;
+- atomic_t s_fc_ineligible_updates;
++
+ /*
+ * After commit starts, the main queue gets locked, and the further
+ * updates get added in the staging queue.
+@@ -1746,7 +1742,7 @@ struct ext4_sb_info {
+ spinlock_t s_fc_lock;
+ struct buffer_head *s_fc_bh;
+ struct ext4_fc_stats s_fc_stats;
+- u64 s_fc_avg_commit_time;
++ tid_t s_fc_ineligible_tid;
+ #ifdef CONFIG_EXT4_DEBUG
+ int s_fc_debug_max_replay;
+ #endif
+@@ -1792,10 +1788,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
+ enum {
+ EXT4_MF_MNTDIR_SAMPLED,
+ EXT4_MF_FS_ABORTED, /* Fatal error detected */
+- EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
+- EXT4_MF_FC_COMMITTING /* File system underoing a fast
+- * commit.
+- */
++ EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
+ };
+
+ static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
+@@ -2270,6 +2263,10 @@ static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
+ * Structure of a directory entry
+ */
+ #define EXT4_NAME_LEN 255
++/*
++ * Base length of the ext4 directory entry excluding the name length
++ */
++#define EXT4_BASE_DIR_LEN (sizeof(struct ext4_dir_entry_2) - EXT4_NAME_LEN)
+
+ struct ext4_dir_entry {
+ __le32 inode; /* Inode number */
+@@ -2704,6 +2701,8 @@ extern void ext4_check_blocks_bitmap(struct super_block *);
+ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
+ ext4_group_t block_group,
+ struct buffer_head ** bh);
++extern struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
++ ext4_group_t group);
+ extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+
+ extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
+@@ -2924,9 +2923,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
+ struct dentry *dentry);
+ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
+ void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
+-void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
+-void ext4_fc_start_ineligible(struct super_block *sb, int reason);
+-void ext4_fc_stop_ineligible(struct super_block *sb);
++void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
+ void ext4_fc_start_update(struct inode *inode);
+ void ext4_fc_stop_update(struct inode *inode);
+ void ext4_fc_del(struct inode *inode);
+@@ -2934,6 +2931,10 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
+ void ext4_fc_replay_cleanup(struct super_block *sb);
+ int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
+ int __init ext4_fc_init_dentry_cache(void);
++void ext4_fc_destroy_dentry_cache(void);
++int ext4_fc_record_regions(struct super_block *sb, int ino,
++ ext4_lblk_t lblk, ext4_fsblk_t pblk,
++ int len, int replay);
+
+ /* mballoc.c */
+ extern const struct seq_operations ext4_mb_seq_groups_ops;
+@@ -2999,7 +3000,9 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode,
+ typedef enum {
+ EXT4_IGET_NORMAL = 0,
+ EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
+- EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */
++ EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */
++ EXT4_IGET_BAD = 0x0004, /* Allow to iget a bad inode */
++ EXT4_IGET_EA_INODE = 0x0008 /* Inode should contain an EA value */
+ } ext4_iget_flags;
+
+ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+@@ -3028,7 +3031,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
+ extern int ext4_can_truncate(struct inode *inode);
+ extern int ext4_truncate(struct inode *);
+ extern int ext4_break_layouts(struct inode *);
+-extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
++extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
+ extern void ext4_set_inode_flags(struct inode *, bool init);
+ extern int ext4_alloc_da_blocks(struct inode *inode);
+ extern void ext4_set_aops(struct inode *inode);
+@@ -3117,6 +3120,8 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno,
+ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+ ext4_group_t block_group,
+ unsigned int flags);
++extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
++ ext4_group_t block_group);
+
+ extern __printf(7, 8)
+ void __ext4_error(struct super_block *, const char *, unsigned int, bool,
+@@ -3348,19 +3353,6 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
+ raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
+ }
+
+-static inline
+-struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+- ext4_group_t group)
+-{
+- struct ext4_group_info **grp_info;
+- long indexv, indexh;
+- BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
+- indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+- indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+- grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+- return grp_info[indexh];
+-}
+-
+ /*
+ * Reading s_groups_count requires using smp_rmb() afterwards. See
+ * the locking protocol documented in the comments of ext4_group_add()
+@@ -3649,8 +3641,8 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
+ unsigned int blocksize);
+ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh);
+-extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
+- struct inode *inode);
++extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
++ struct inode *inode, struct dentry *dentry);
+ extern int __ext4_link(struct inode *dir, struct inode *inode,
+ struct dentry *dentry);
+
+@@ -3700,6 +3692,9 @@ extern int ext4_inode_block_valid(struct inode *inode,
+ unsigned int count);
+ extern int ext4_check_blockref(const char *, unsigned int,
+ struct inode *, __le32 *, unsigned int);
++extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode,
++ ext4_fsblk_t start_blk, unsigned int count);
++
+
+ /* extents.c */
+ struct ext4_ext_path;
+diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
+index 6def7339056db..8e1fb18f465ea 100644
+--- a/fs/ext4/ext4_jbd2.c
++++ b/fs/ext4/ext4_jbd2.c
+@@ -162,6 +162,8 @@ int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
+ {
+ if (!ext4_handle_valid(handle))
+ return 0;
++ if (is_handle_aborted(handle))
++ return -EROFS;
+ if (jbd2_handle_buffer_credits(handle) >= check_cred &&
+ handle->h_revoke_credits >= revoke_cred)
+ return 0;
+@@ -265,8 +267,7 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
+ trace_ext4_forget(inode, is_metadata, blocknr);
+ BUFFER_TRACE(bh, "enter");
+
+- jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+- "data mode %x\n",
++ ext4_debug("forgetting bh %p: is_metadata=%d, mode %o, data mode %x\n",
+ bh, is_metadata, inode->i_mode,
+ test_opt(inode->i_sb, DATA_FLAGS));
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 0e02571f2f828..13497bd4e14bb 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -136,15 +136,25 @@ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
+ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
+ {
++ int err = 0;
++
+ if (path->p_bh) {
+ /* path points to block */
+ BUFFER_TRACE(path->p_bh, "get_write_access");
+- return ext4_journal_get_write_access(handle, inode->i_sb,
+- path->p_bh, EXT4_JTR_NONE);
++ err = ext4_journal_get_write_access(handle, inode->i_sb,
++ path->p_bh, EXT4_JTR_NONE);
++ /*
++ * The extent buffer's verified bit will be set again in
++ * __ext4_ext_dirty(). We could leave an inconsistent
++ * buffer if the extents updating procudure break off du
++ * to some error happens, force to check it again.
++ */
++ if (!err)
++ clear_buffer_verified(path->p_bh);
+ }
+ /* path points to leaf/index in inode body */
+ /* we use in-core data, no need to protect them */
+- return 0;
++ return err;
+ }
+
+ /*
+@@ -165,6 +175,9 @@ static int __ext4_ext_dirty(const char *where, unsigned int line,
+ /* path points to block */
+ err = __ext4_handle_dirty_metadata(where, line, handle,
+ inode, path->p_bh);
++ /* Extents updating done, re-set verified flag */
++ if (!err)
++ set_buffer_verified(path->p_bh);
+ } else {
+ /* path points to leaf/index in inode body */
+ err = ext4_mark_inode_dirty(handle, inode);
+@@ -354,9 +367,13 @@ static int ext4_valid_extent_idx(struct inode *inode,
+
+ static int ext4_valid_extent_entries(struct inode *inode,
+ struct ext4_extent_header *eh,
+- ext4_fsblk_t *pblk, int depth)
++ ext4_lblk_t lblk, ext4_fsblk_t *pblk,
++ int depth)
+ {
+ unsigned short entries;
++ ext4_lblk_t lblock = 0;
++ ext4_lblk_t cur = 0;
++
+ if (eh->eh_entries == 0)
+ return 1;
+
+@@ -365,31 +382,51 @@ static int ext4_valid_extent_entries(struct inode *inode,
+ if (depth == 0) {
+ /* leaf entries */
+ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+- ext4_lblk_t lblock = 0;
+- ext4_lblk_t prev = 0;
+- int len = 0;
++
++ /*
++ * The logical block in the first entry should equal to
++ * the number in the index block.
++ */
++ if (depth != ext_depth(inode) &&
++ lblk != le32_to_cpu(ext->ee_block))
++ return 0;
+ while (entries) {
+ if (!ext4_valid_extent(inode, ext))
+ return 0;
+
+ /* Check for overlapping extents */
+ lblock = le32_to_cpu(ext->ee_block);
+- len = ext4_ext_get_actual_len(ext);
+- if ((lblock <= prev) && prev) {
++ if (lblock < cur) {
+ *pblk = ext4_ext_pblock(ext);
+ return 0;
+ }
++ cur = lblock + ext4_ext_get_actual_len(ext);
+ ext++;
+ entries--;
+- prev = lblock + len - 1;
+ }
+ } else {
+ struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
++
++ /*
++ * The logical block in the first entry should equal to
++ * the number in the parent index block.
++ */
++ if (depth != ext_depth(inode) &&
++ lblk != le32_to_cpu(ext_idx->ei_block))
++ return 0;
+ while (entries) {
+ if (!ext4_valid_extent_idx(inode, ext_idx))
+ return 0;
++
++ /* Check for overlapping index extents */
++ lblock = le32_to_cpu(ext_idx->ei_block);
++ if (lblock < cur) {
++ *pblk = ext4_idx_pblock(ext_idx);
++ return 0;
++ }
+ ext_idx++;
+ entries--;
++ cur = lblock + 1;
+ }
+ }
+ return 1;
+@@ -397,7 +434,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
+
+ static int __ext4_ext_check(const char *function, unsigned int line,
+ struct inode *inode, struct ext4_extent_header *eh,
+- int depth, ext4_fsblk_t pblk)
++ int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
+ {
+ const char *error_msg;
+ int max = 0, err = -EFSCORRUPTED;
+@@ -423,7 +460,11 @@ static int __ext4_ext_check(const char *function, unsigned int line,
+ error_msg = "invalid eh_entries";
+ goto corrupted;
+ }
+- if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) {
++ if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
++ error_msg = "eh_entries is 0 but eh_depth is > 0";
++ goto corrupted;
++ }
++ if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
+ error_msg = "invalid extent entries";
+ goto corrupted;
+ }
+@@ -453,7 +494,7 @@ corrupted:
+ }
+
+ #define ext4_ext_check(inode, eh, depth, pblk) \
+- __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk))
++ __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
+
+ int ext4_ext_check_inode(struct inode *inode)
+ {
+@@ -486,16 +527,18 @@ static void ext4_cache_extents(struct inode *inode,
+
+ static struct buffer_head *
+ __read_extent_tree_block(const char *function, unsigned int line,
+- struct inode *inode, ext4_fsblk_t pblk, int depth,
+- int flags)
++ struct inode *inode, struct ext4_extent_idx *idx,
++ int depth, int flags)
+ {
+ struct buffer_head *bh;
+ int err;
+ gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
++ ext4_fsblk_t pblk;
+
+ if (flags & EXT4_EX_NOFAIL)
+ gfp_flags |= __GFP_NOFAIL;
+
++ pblk = ext4_idx_pblock(idx);
+ bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
+ if (unlikely(!bh))
+ return ERR_PTR(-ENOMEM);
+@@ -508,8 +551,8 @@ __read_extent_tree_block(const char *function, unsigned int line,
+ }
+ if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
+ return bh;
+- err = __ext4_ext_check(function, line, inode,
+- ext_block_hdr(bh), depth, pblk);
++ err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
++ depth, pblk, le32_to_cpu(idx->ei_block));
+ if (err)
+ goto errout;
+ set_buffer_verified(bh);
+@@ -527,8 +570,8 @@ errout:
+
+ }
+
+-#define read_extent_tree_block(inode, pblk, depth, flags) \
+- __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \
++#define read_extent_tree_block(inode, idx, depth, flags) \
++ __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \
+ (depth), (flags))
+
+ /*
+@@ -578,8 +621,7 @@ int ext4_ext_precache(struct inode *inode)
+ i--;
+ continue;
+ }
+- bh = read_extent_tree_block(inode,
+- ext4_idx_pblock(path[i].p_idx++),
++ bh = read_extent_tree_block(inode, path[i].p_idx++,
+ depth - i - 1,
+ EXT4_EX_FORCE_CACHE);
+ if (IS_ERR(bh)) {
+@@ -884,8 +926,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
+ path[ppos].p_depth = i;
+ path[ppos].p_ext = NULL;
+
+- bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
+- flags);
++ bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
+ if (IS_ERR(bh)) {
+ ret = PTR_ERR(bh);
+ goto err;
+@@ -1494,7 +1535,6 @@ static int ext4_ext_search_right(struct inode *inode,
+ struct ext4_extent_header *eh;
+ struct ext4_extent_idx *ix;
+ struct ext4_extent *ex;
+- ext4_fsblk_t block;
+ int depth; /* Note, NOT eh_depth; depth from top of tree */
+ int ee_len;
+
+@@ -1561,20 +1601,17 @@ got_index:
+ * follow it and find the closest allocated
+ * block to the right */
+ ix++;
+- block = ext4_idx_pblock(ix);
+ while (++depth < path->p_depth) {
+ /* subtract from p_depth to get proper eh_depth */
+- bh = read_extent_tree_block(inode, block,
+- path->p_depth - depth, 0);
++ bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ eh = ext_block_hdr(bh);
+ ix = EXT_FIRST_INDEX(eh);
+- block = ext4_idx_pblock(ix);
+ put_bh(bh);
+ }
+
+- bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
++ bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ eh = ext_block_hdr(bh);
+@@ -2953,9 +2990,9 @@ again:
+ ext_debug(inode, "move to level %d (block %llu)\n",
+ i + 1, ext4_idx_pblock(path[i].p_idx));
+ memset(path + i + 1, 0, sizeof(*path));
+- bh = read_extent_tree_block(inode,
+- ext4_idx_pblock(path[i].p_idx), depth - i - 1,
+- EXT4_EX_NOCACHE);
++ bh = read_extent_tree_block(inode, path[i].p_idx,
++ depth - i - 1,
++ EXT4_EX_NOCACHE);
+ if (IS_ERR(bh)) {
+ /* should we reset i_size? */
+ err = PTR_ERR(bh);
+@@ -4471,9 +4508,9 @@ retry:
+ return ret > 0 ? ret2 : ret;
+ }
+
+-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
++static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
+
+-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
++static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
+
+ static long ext4_zero_range(struct file *file, loff_t offset,
+ loff_t len, int mode)
+@@ -4545,6 +4582,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ inode_dio_wait(inode);
+
++ ret = file_modified(file);
++ if (ret)
++ goto out_mutex;
++
+ /* Preallocate the range including the unaligned edges */
+ if (partial_begin || partial_end) {
+ ret = ext4_alloc_file_blocks(file,
+@@ -4612,8 +4653,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ ret = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(ret))
+ goto out_handle;
+- ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
+- (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
+ /* Zero out partial block at the edges of the range */
+ ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+ if (ret >= 0)
+@@ -4662,24 +4701,24 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+ FALLOC_FL_INSERT_RANGE))
+ return -EOPNOTSUPP;
+
+- ext4_fc_start_update(inode);
++ inode_lock(inode);
++ ret = ext4_convert_inline_data(inode);
++ inode_unlock(inode);
++ if (ret)
++ goto exit;
+
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+- ret = ext4_punch_hole(inode, offset, len);
++ ret = ext4_punch_hole(file, offset, len);
+ goto exit;
+ }
+
+- ret = ext4_convert_inline_data(inode);
+- if (ret)
+- goto exit;
+-
+ if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+- ret = ext4_collapse_range(inode, offset, len);
++ ret = ext4_collapse_range(file, offset, len);
+ goto exit;
+ }
+
+ if (mode & FALLOC_FL_INSERT_RANGE) {
+- ret = ext4_insert_range(inode, offset, len);
++ ret = ext4_insert_range(file, offset, len);
+ goto exit;
+ }
+
+@@ -4715,6 +4754,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ inode_dio_wait(inode);
+
++ ret = file_modified(file);
++ if (ret)
++ goto out;
++
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
+ if (ret)
+ goto out;
+@@ -4727,7 +4770,6 @@ out:
+ inode_unlock(inode);
+ trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
+ exit:
+- ext4_fc_stop_update(inode);
+ return ret;
+ }
+
+@@ -4977,36 +5019,6 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
+ }
+
+-/*
+- * ext4_access_path:
+- * Function to access the path buffer for marking it dirty.
+- * It also checks if there are sufficient credits left in the journal handle
+- * to update path.
+- */
+-static int
+-ext4_access_path(handle_t *handle, struct inode *inode,
+- struct ext4_ext_path *path)
+-{
+- int credits, err;
+-
+- if (!ext4_handle_valid(handle))
+- return 0;
+-
+- /*
+- * Check if need to extend journal credits
+- * 3 for leaf, sb, and inode plus 2 (bmap and group
+- * descriptor) for each block group; assume two block
+- * groups
+- */
+- credits = ext4_writepage_trans_blocks(inode);
+- err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0);
+- if (err < 0)
+- return err;
+-
+- err = ext4_ext_get_access(handle, inode, path);
+- return err;
+-}
+-
+ /*
+ * ext4_ext_shift_path_extents:
+ * Shift the extents of a path structure lying between path[depth].p_ext
+@@ -5021,6 +5033,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
+ int depth, err = 0;
+ struct ext4_extent *ex_start, *ex_last;
+ bool update = false;
++ int credits, restart_credits;
+ depth = path->p_depth;
+
+ while (depth >= 0) {
+@@ -5030,13 +5043,26 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
+ return -EFSCORRUPTED;
+
+ ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
++ /* leaf + sb + inode */
++ credits = 3;
++ if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
++ update = true;
++ /* extent tree + sb + inode */
++ credits = depth + 2;
++ }
+
+- err = ext4_access_path(handle, inode, path + depth);
+- if (err)
++ restart_credits = ext4_writepage_trans_blocks(inode);
++ err = ext4_datasem_ensure_credits(handle, inode, credits,
++ restart_credits, 0);
++ if (err) {
++ if (err > 0)
++ err = -EAGAIN;
+ goto out;
++ }
+
+- if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
+- update = true;
++ err = ext4_ext_get_access(handle, inode, path + depth);
++ if (err)
++ goto out;
+
+ while (ex_start <= ex_last) {
+ if (SHIFT == SHIFT_LEFT) {
+@@ -5067,7 +5093,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
+ }
+
+ /* Update index too */
+- err = ext4_access_path(handle, inode, path + depth);
++ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+@@ -5106,6 +5132,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
+ int ret = 0, depth;
+ struct ext4_extent *extent;
+ ext4_lblk_t stop, *iterator, ex_start, ex_end;
++ ext4_lblk_t tmp = EXT_MAX_BLOCKS;
+
+ /* Let path point to the last extent */
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
+@@ -5159,11 +5186,16 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
+ * till we reach stop. In case of right shift, iterator points to stop
+ * and it is decreased till we reach start.
+ */
++again:
++ ret = 0;
+ if (SHIFT == SHIFT_LEFT)
+ iterator = &start;
+ else
+ iterator = &stop;
+
++ if (tmp != EXT_MAX_BLOCKS)
++ *iterator = tmp;
++
+ /*
+ * Its safe to start updating extents. Start and stop are unsigned, so
+ * in case of right shift if extent with 0 block is reached, iterator
+@@ -5192,24 +5224,35 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
+ }
+ }
+
++ tmp = *iterator;
+ if (SHIFT == SHIFT_LEFT) {
+ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+ *iterator = le32_to_cpu(extent->ee_block) +
+ ext4_ext_get_actual_len(extent);
+ } else {
+ extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
+- if (le32_to_cpu(extent->ee_block) > 0)
++ if (le32_to_cpu(extent->ee_block) > start)
+ *iterator = le32_to_cpu(extent->ee_block) - 1;
+- else
+- /* Beginning is reached, end of the loop */
++ else if (le32_to_cpu(extent->ee_block) == start)
+ iterator = NULL;
+- /* Update path extent in case we need to stop */
+- while (le32_to_cpu(extent->ee_block) < start)
++ else {
++ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
++ while (le32_to_cpu(extent->ee_block) >= start)
++ extent--;
++
++ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
++ break;
++
+ extent++;
++ iterator = NULL;
++ }
+ path[depth].p_ext = extent;
+ }
+ ret = ext4_ext_shift_path_extents(path, shift, inode,
+ handle, SHIFT);
++ /* iterator can be NULL which means we should break */
++ if (ret == -EAGAIN)
++ goto again;
+ if (ret)
+ break;
+ }
+@@ -5224,8 +5267,9 @@ out:
+ * This implements the fallocate's collapse range functionality for ext4
+ * Returns: 0 and non-zero on error.
+ */
+-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
++static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
+ {
++ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ struct address_space *mapping = inode->i_mapping;
+ ext4_lblk_t punch_start, punch_stop;
+@@ -5277,6 +5321,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ /* Wait for existing dio to complete */
+ inode_dio_wait(inode);
+
++ ret = file_modified(file);
++ if (ret)
++ goto out_mutex;
++
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+@@ -5316,7 +5364,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ ret = PTR_ERR(handle);
+ goto out_mmap;
+ }
+- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
+
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_preallocations(inode, 0);
+@@ -5355,7 +5403,6 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+
+ out_stop:
+ ext4_journal_stop(handle);
+- ext4_fc_stop_ineligible(sb);
+ out_mmap:
+ filemap_invalidate_unlock(mapping);
+ out_mutex:
+@@ -5371,8 +5418,9 @@ out_mutex:
+ * by len bytes.
+ * Returns 0 on success, error otherwise.
+ */
+-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
++static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
+ {
++ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ struct address_space *mapping = inode->i_mapping;
+ handle_t *handle;
+@@ -5429,6 +5477,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ /* Wait for existing dio to complete */
+ inode_dio_wait(inode);
+
++ ret = file_modified(file);
++ if (ret)
++ goto out_mutex;
++
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+@@ -5457,7 +5509,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ ret = PTR_ERR(handle);
+ goto out_mmap;
+ }
+- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
+
+ /* Expand file to avoid data loss if there is error while shifting */
+ inode->i_size += len;
+@@ -5532,7 +5584,6 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+
+ out_stop:
+ ext4_journal_stop(handle);
+- ext4_fc_stop_ineligible(sb);
+ out_mmap:
+ filemap_invalidate_unlock(mapping);
+ out_mutex:
+@@ -5756,6 +5807,15 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
+ struct ext4_extent *extent;
+ ext4_lblk_t first_lblk, first_lclu, last_lclu;
+
++ /*
++ * if data can be stored inline, the logical cluster isn't
++ * mapped - no physical clusters have been allocated, and the
++ * file has no extents
++ */
++ if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ||
++ ext4_has_inline_data(inode))
++ return 0;
++
+ /* search for the extent closest to the first block in the cluster */
+ path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
+ if (IS_ERR(path)) {
+@@ -6072,11 +6132,15 @@ int ext4_ext_clear_bb(struct inode *inode)
+
+ ext4_mb_mark_bb(inode->i_sb,
+ path[j].p_block, 1, 0);
++ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
++ 0, path[j].p_block, 1, 1);
+ }
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+ ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
++ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
++ map.m_lblk, map.m_pblk, map.m_len, 1);
+ }
+ cur = cur + map.m_len;
+ }
+diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
+index 9a3a8996aacf7..fee54ab42bbaa 100644
+--- a/fs/ext4/extents_status.c
++++ b/fs/ext4/extents_status.c
+@@ -269,14 +269,12 @@ static void __es_find_extent_range(struct inode *inode,
+
+ /* see if the extent has been cached */
+ es->es_lblk = es->es_len = es->es_pblk = 0;
+- if (tree->cache_es) {
+- es1 = tree->cache_es;
+- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
+- es_debug("%u cached by [%u/%u) %llu %x\n",
+- lblk, es1->es_lblk, es1->es_len,
+- ext4_es_pblock(es1), ext4_es_status(es1));
+- goto out;
+- }
++ es1 = READ_ONCE(tree->cache_es);
++ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
++ es_debug("%u cached by [%u/%u) %llu %x\n",
++ lblk, es1->es_lblk, es1->es_len,
++ ext4_es_pblock(es1), ext4_es_status(es1));
++ goto out;
+ }
+
+ es1 = __es_tree_search(&tree->root, lblk);
+@@ -295,7 +293,7 @@ out:
+ }
+
+ if (es1 && matching_fn(es1)) {
+- tree->cache_es = es1;
++ WRITE_ONCE(tree->cache_es, es1);
+ es->es_lblk = es1->es_lblk;
+ es->es_len = es1->es_len;
+ es->es_pblk = es1->es_pblk;
+@@ -934,14 +932,12 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
+
+ /* find extent in cache firstly */
+ es->es_lblk = es->es_len = es->es_pblk = 0;
+- if (tree->cache_es) {
+- es1 = tree->cache_es;
+- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
+- es_debug("%u cached by [%u/%u)\n",
+- lblk, es1->es_lblk, es1->es_len);
+- found = 1;
+- goto out;
+- }
++ es1 = READ_ONCE(tree->cache_es);
++ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
++ es_debug("%u cached by [%u/%u)\n",
++ lblk, es1->es_lblk, es1->es_len);
++ found = 1;
++ goto out;
+ }
+
+ node = tree->root.rb_node;
+@@ -1372,7 +1368,7 @@ retry:
+ if (count_reserved)
+ count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
+ &orig_es, &rc);
+- goto out;
++ goto out_get_reserved;
+ }
+
+ if (len1 > 0) {
+@@ -1414,6 +1410,7 @@ retry:
+ }
+ }
+
++out_get_reserved:
+ if (count_reserved)
+ *reserved = get_rsvd(inode, end, es, &rc);
+ out:
+diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
+index 8ea5a81e65548..2660c34c770e3 100644
+--- a/fs/ext4/fast_commit.c
++++ b/fs/ext4/fast_commit.c
+@@ -65,21 +65,11 @@
+ *
+ * Fast Commit Ineligibility
+ * -------------------------
+- * Not all operations are supported by fast commits today (e.g extended
+- * attributes). Fast commit ineligibility is marked by calling one of the
+- * two following functions:
+- *
+- * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
+- * back to full commit. This is useful in case of transient errors.
+ *
+- * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
+- * the fast commits happening between ext4_fc_start_ineligible() and
+- * ext4_fc_stop_ineligible() and one fast commit after the call to
+- * ext4_fc_stop_ineligible() to fall back to full commits. It is important to
+- * make one more fast commit to fall back to full commit after stop call so
+- * that it guaranteed that the fast commit ineligible operation contained
+- * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
+- * followed by at least 1 full commit.
++ * Not all operations are supported by fast commits today (e.g extended
++ * attributes). Fast commit ineligibility is marked by calling
++ * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
++ * to full commit.
+ *
+ * Atomicity of commits
+ * --------------------
+@@ -312,60 +302,36 @@ restart:
+ }
+
+ /*
+- * Mark file system as fast commit ineligible. This means that next commit
+- * operation would result in a full jbd2 commit.
++ * Mark file system as fast commit ineligible, and record latest
++ * ineligible transaction tid. This means until the recorded
++ * transaction, commit operation would result in a full jbd2 commit.
+ */
+-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
++void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
++ tid_t tid;
+
+ if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+ (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
++ if (handle && !IS_ERR(handle))
++ tid = handle->h_transaction->t_tid;
++ else {
++ read_lock(&sbi->s_journal->j_state_lock);
++ tid = sbi->s_journal->j_running_transaction ?
++ sbi->s_journal->j_running_transaction->t_tid : 0;
++ read_unlock(&sbi->s_journal->j_state_lock);
++ }
++ spin_lock(&sbi->s_fc_lock);
++ if (sbi->s_fc_ineligible_tid < tid)
++ sbi->s_fc_ineligible_tid = tid;
++ spin_unlock(&sbi->s_fc_lock);
+ WARN_ON(reason >= EXT4_FC_REASON_MAX);
+ sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
+ }
+
+-/*
+- * Start a fast commit ineligible update. Any commits that happen while
+- * such an operation is in progress fall back to full commits.
+- */
+-void ext4_fc_start_ineligible(struct super_block *sb, int reason)
+-{
+- struct ext4_sb_info *sbi = EXT4_SB(sb);
+-
+- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+- return;
+-
+- WARN_ON(reason >= EXT4_FC_REASON_MAX);
+- sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
+- atomic_inc(&sbi->s_fc_ineligible_updates);
+-}
+-
+-/*
+- * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
+- * to ensure that after stopping the ineligible update, at least one full
+- * commit takes place.
+- */
+-void ext4_fc_stop_ineligible(struct super_block *sb)
+-{
+- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+- return;
+-
+- ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+- atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
+-}
+-
+-static inline int ext4_fc_is_ineligible(struct super_block *sb)
+-{
+- return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
+- atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
+-}
+-
+ /*
+ * Generic fast commit tracking function. If this is the first time this we are
+ * called after a full commit, we initialize fast commit fields and then call
+@@ -391,7 +357,7 @@ static int ext4_fc_track_template(
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return -EOPNOTSUPP;
+
+- if (ext4_fc_is_ineligible(inode->i_sb))
++ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return -EINVAL;
+
+ tid = handle->h_transaction->t_tid;
+@@ -411,7 +377,8 @@ static int ext4_fc_track_template(
+ spin_lock(&sbi->s_fc_lock);
+ if (list_empty(&EXT4_I(inode)->i_fc_list))
+ list_add_tail(&EXT4_I(inode)->i_fc_list,
+- (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
++ (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
++ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
+ &sbi->s_fc_q[FC_Q_STAGING] :
+ &sbi->s_fc_q[FC_Q_MAIN]);
+ spin_unlock(&sbi->s_fc_lock);
+@@ -432,25 +399,34 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
+ struct __track_dentry_update_args *dentry_update =
+ (struct __track_dentry_update_args *)arg;
+ struct dentry *dentry = dentry_update->dentry;
+- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
++ struct inode *dir = dentry->d_parent->d_inode;
++ struct super_block *sb = inode->i_sb;
++ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ mutex_unlock(&ei->i_fc_lock);
++
++ if (IS_ENCRYPTED(dir)) {
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME,
++ NULL);
++ mutex_lock(&ei->i_fc_lock);
++ return -EOPNOTSUPP;
++ }
++
+ node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
+ if (!node) {
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL);
+ mutex_lock(&ei->i_fc_lock);
+ return -ENOMEM;
+ }
+
+ node->fcd_op = dentry_update->op;
+- node->fcd_parent = dentry->d_parent->d_inode->i_ino;
++ node->fcd_parent = dir->i_ino;
+ node->fcd_ino = inode->i_ino;
+ if (dentry->d_name.len > DNAME_INLINE_LEN) {
+ node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
+ if (!node->fcd_name.name) {
+ kmem_cache_free(ext4_fc_dentry_cachep, node);
+- ext4_fc_mark_ineligible(inode->i_sb,
+- EXT4_FC_REASON_NOMEM);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL);
+ mutex_lock(&ei->i_fc_lock);
+ return -ENOMEM;
+ }
+@@ -464,7 +440,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
+ node->fcd_name.len = dentry->d_name.len;
+
+ spin_lock(&sbi->s_fc_lock);
+- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
++ if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
++ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
+ list_add_tail(&node->fcd_list,
+ &sbi->s_fc_dentry_q[FC_Q_STAGING]);
+ else
+@@ -552,7 +529,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
+
+ if (ext4_should_journal_data(inode)) {
+ ext4_fc_mark_ineligible(inode->i_sb,
+- EXT4_FC_REASON_INODE_JOURNAL_DATA);
++ EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
+ return;
+ }
+
+@@ -627,6 +604,15 @@ static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
+
+ /* Ext4 commit path routines */
+
++/* memcpy to fc reserved space and update CRC */
++static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
++ int len, u32 *crc)
++{
++ if (crc)
++ *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
++ return memcpy(dst, src, len);
++}
++
+ /* memzero and update CRC */
+ static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
+ u32 *crc)
+@@ -652,62 +638,59 @@ static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
+ */
+ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
+ {
+- struct ext4_fc_tl *tl;
++ struct ext4_fc_tl tl;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bh;
+ int bsize = sbi->s_journal->j_blocksize;
+ int ret, off = sbi->s_fc_bytes % bsize;
+- int pad_len;
++ int remaining;
++ u8 *dst;
+
+ /*
+- * After allocating len, we should have space at least for a 0 byte
+- * padding.
++ * If 'len' is too long to fit in any block alongside a PAD tlv, then we
++ * cannot fulfill the request.
+ */
+- if (len + sizeof(struct ext4_fc_tl) > bsize)
++ if (len > bsize - EXT4_FC_TAG_BASE_LEN)
+ return NULL;
+
+- if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
+- /*
+- * Only allocate from current buffer if we have enough space for
+- * this request AND we have space to add a zero byte padding.
+- */
+- if (!sbi->s_fc_bh) {
+- ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
+- if (ret)
+- return NULL;
+- sbi->s_fc_bh = bh;
+- }
++ if (!sbi->s_fc_bh) {
++ ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
++ if (ret)
++ return NULL;
++ sbi->s_fc_bh = bh;
++ }
++ dst = sbi->s_fc_bh->b_data + off;
++
++ /*
++ * Allocate the bytes in the current block if we can do so while still
++ * leaving enough space for a PAD tlv.
++ */
++ remaining = bsize - EXT4_FC_TAG_BASE_LEN - off;
++ if (len <= remaining) {
+ sbi->s_fc_bytes += len;
+- return sbi->s_fc_bh->b_data + off;
++ return dst;
+ }
+- /* Need to add PAD tag */
+- tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
+- tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
+- pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
+- tl->fc_len = cpu_to_le16(pad_len);
+- if (crc)
+- *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
+- if (pad_len > 0)
+- ext4_fc_memzero(sb, tl + 1, pad_len, crc);
++
++ /*
++ * Else, terminate the current block with a PAD tlv, then allocate a new
++ * block and allocate the bytes at the start of that new block.
++ */
++
++ tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
++ tl.fc_len = cpu_to_le16(remaining);
++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
++ ext4_fc_memzero(sb, dst + EXT4_FC_TAG_BASE_LEN, remaining, crc);
++
+ ext4_fc_submit_bh(sb, false);
+
+ ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
+ if (ret)
+ return NULL;
+ sbi->s_fc_bh = bh;
+- sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
++ sbi->s_fc_bytes += bsize - off + len;
+ return sbi->s_fc_bh->b_data;
+ }
+
+-/* memcpy to fc reserved space and update CRC */
+-static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
+- int len, u32 *crc)
+-{
+- if (crc)
+- *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
+- return memcpy(dst, src, len);
+-}
+-
+ /*
+ * Complete a fast commit by writing tail tag.
+ *
+@@ -728,23 +711,25 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
+ * ext4_fc_reserve_space takes care of allocating an extra block if
+ * there's no enough space on this block for accommodating this tail.
+ */
+- dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
++ dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc);
+ if (!dst)
+ return -ENOSPC;
+
+ off = sbi->s_fc_bytes % bsize;
+
+ tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
+- tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
++ tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail));
+ sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
+
+- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
+- dst += sizeof(tl);
++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc);
++ dst += EXT4_FC_TAG_BASE_LEN;
+ tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
+ ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
+ dst += sizeof(tail.fc_tid);
+ tail.fc_crc = cpu_to_le32(crc);
+ ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
++ dst += sizeof(tail.fc_crc);
++ memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */
+
+ ext4_fc_submit_bh(sb, true);
+
+@@ -761,15 +746,15 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
+ struct ext4_fc_tl tl;
+ u8 *dst;
+
+- dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
++ dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc);
+ if (!dst)
+ return false;
+
+ tl.fc_tag = cpu_to_le16(tag);
+ tl.fc_len = cpu_to_le16(len);
+
+- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
+- ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
++ ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc);
+
+ return true;
+ }
+@@ -781,8 +766,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
+ struct ext4_fc_dentry_info fcd;
+ struct ext4_fc_tl tl;
+ int dlen = fc_dentry->fcd_name.len;
+- u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
+- crc);
++ u8 *dst = ext4_fc_reserve_space(sb,
++ EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc);
+
+ if (!dst)
+ return false;
+@@ -791,8 +776,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
+ fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
+ tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
+ tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
+- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
+- dst += sizeof(tl);
++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc);
++ dst += EXT4_FC_TAG_BASE_LEN;
+ ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
+ dst += sizeof(fcd);
+ ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
+@@ -826,22 +811,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
+ tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
+ tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
+
++ ret = -ECANCELED;
+ dst = ext4_fc_reserve_space(inode->i_sb,
+- sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
++ EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc);
+ if (!dst)
+- return -ECANCELED;
++ goto err;
+
+- if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
+- return -ECANCELED;
+- dst += sizeof(tl);
++ if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc))
++ goto err;
++ dst += EXT4_FC_TAG_BASE_LEN;
+ if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
+- return -ECANCELED;
++ goto err;
+ dst += sizeof(fc_inode);
+ if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
+ inode_len, crc))
+- return -ECANCELED;
+-
+- return 0;
++ goto err;
++ ret = 0;
++err:
++ brelse(iloc.bh);
++ return ret;
+ }
+
+ /*
+@@ -869,8 +857,8 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
+ mutex_unlock(&ei->i_fc_lock);
+
+ cur_lblk_off = old_blk_size;
+- jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
+- __func__, cur_lblk_off, new_blk_size, inode->i_ino);
++ ext4_debug("will try writing %d to %d for inode %ld\n",
++ cur_lblk_off, new_blk_size, inode->i_ino);
+
+ while (cur_lblk_off <= new_blk_size) {
+ map.m_lblk = cur_lblk_off;
+@@ -928,7 +916,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
+ int ret = 0;
+
+ spin_lock(&sbi->s_fc_lock);
+- ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
+ ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
+ while (atomic_read(&ei->i_fc_updates)) {
+@@ -1121,6 +1108,32 @@ out:
+ return ret;
+ }
+
++static void ext4_fc_update_stats(struct super_block *sb, int status,
++ u64 commit_time, int nblks)
++{
++ struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
++
++ ext4_debug("Fast commit ended with status = %d", status);
++ if (status == EXT4_FC_STATUS_OK) {
++ stats->fc_num_commits++;
++ stats->fc_numblks += nblks;
++ if (likely(stats->s_fc_avg_commit_time))
++ stats->s_fc_avg_commit_time =
++ (commit_time +
++ stats->s_fc_avg_commit_time * 3) / 4;
++ else
++ stats->s_fc_avg_commit_time = commit_time;
++ } else if (status == EXT4_FC_STATUS_FAILED ||
++ status == EXT4_FC_STATUS_INELIGIBLE) {
++ if (status == EXT4_FC_STATUS_FAILED)
++ stats->fc_failed_commits++;
++ stats->fc_ineligible_commits++;
++ } else {
++ stats->fc_skipped_commits++;
++ }
++ trace_ext4_fc_commit_stop(sb, nblks, status);
++}
++
+ /*
+ * The main commit entry point. Performs a fast commit for transaction
+ * commit_tid if needed. If it's not possible to perform a fast commit
+@@ -1133,18 +1146,15 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int nblks = 0, ret, bsize = journal->j_blocksize;
+ int subtid = atomic_read(&sbi->s_fc_subtid);
+- int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
++ int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
+ ktime_t start_time, commit_time;
+
+ trace_ext4_fc_commit_start(sb);
+
+ start_time = ktime_get();
+
+- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+- (ext4_fc_is_ineligible(sb))) {
+- reason = EXT4_FC_REASON_INELIGIBLE;
+- goto out;
+- }
++ if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
++ return jbd2_complete_transaction(journal, commit_tid);
+
+ restart_fc:
+ ret = jbd2_fc_begin_commit(journal, commit_tid);
+@@ -1153,74 +1163,59 @@ restart_fc:
+ if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
+ commit_tid > journal->j_commit_sequence)
+ goto restart_fc;
+- reason = EXT4_FC_REASON_ALREADY_COMMITTED;
+- goto out;
++ ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0);
++ return 0;
+ } else if (ret) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_FC_START_FAILED;
+- goto out;
++ /*
++ * Commit couldn't start. Just update stats and perform a
++ * full commit.
++ */
++ ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0);
++ return jbd2_complete_transaction(journal, commit_tid);
++ }
++
++ /*
++ * After establishing journal barrier via jbd2_fc_begin_commit(), check
++ * if we are fast commit ineligible.
++ */
++ if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
++ status = EXT4_FC_STATUS_INELIGIBLE;
++ goto fallback;
+ }
+
+ fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
+ ret = ext4_fc_perform_commit(journal);
+ if (ret < 0) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_FC_FAILED;
+- goto out;
++ status = EXT4_FC_STATUS_FAILED;
++ goto fallback;
+ }
+ nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
+ ret = jbd2_fc_wait_bufs(journal, nblks);
+ if (ret < 0) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_FC_FAILED;
+- goto out;
++ status = EXT4_FC_STATUS_FAILED;
++ goto fallback;
+ }
+ atomic_inc(&sbi->s_fc_subtid);
+- jbd2_fc_end_commit(journal);
+-out:
+- /* Has any ineligible update happened since we started? */
+- if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_INELIGIBLE;
+- }
+-
+- spin_lock(&sbi->s_fc_lock);
+- if (reason != EXT4_FC_REASON_OK &&
+- reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
+- sbi->s_fc_stats.fc_ineligible_commits++;
+- } else {
+- sbi->s_fc_stats.fc_num_commits++;
+- sbi->s_fc_stats.fc_numblks += nblks;
+- }
+- spin_unlock(&sbi->s_fc_lock);
+- nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
+- trace_ext4_fc_commit_stop(sb, nblks, reason);
+- commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
++ ret = jbd2_fc_end_commit(journal);
+ /*
+- * weight the commit time higher than the average time so we don't
+- * react too strongly to vast changes in the commit time
++ * weight the commit time higher than the average time so we
++ * don't react too strongly to vast changes in the commit time
+ */
+- if (likely(sbi->s_fc_avg_commit_time))
+- sbi->s_fc_avg_commit_time = (commit_time +
+- sbi->s_fc_avg_commit_time * 3) / 4;
+- else
+- sbi->s_fc_avg_commit_time = commit_time;
+- jbd_debug(1,
+- "Fast commit ended with blks = %d, reason = %d, subtid - %d",
+- nblks, reason, subtid);
+- if (reason == EXT4_FC_REASON_FC_FAILED)
+- return jbd2_fc_end_commit_fallback(journal);
+- if (reason == EXT4_FC_REASON_FC_START_FAILED ||
+- reason == EXT4_FC_REASON_INELIGIBLE)
+- return jbd2_complete_transaction(journal, commit_tid);
+- return 0;
++ commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
++ ext4_fc_update_stats(sb, status, commit_time, nblks);
++ return ret;
++
++fallback:
++ ret = jbd2_fc_end_commit_fallback(journal);
++ ext4_fc_update_stats(sb, status, 0, 0);
++ return ret;
+ }
+
+ /*
+ * Fast commit cleanup routine. This is called after every fast commit and
+ * full commit. full is true if we are called after a full commit.
+ */
+-static void ext4_fc_cleanup(journal_t *journal, int full)
++static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
+ {
+ struct super_block *sb = journal->j_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+@@ -1238,7 +1233,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
+ list_del_init(&iter->i_fc_list);
+ ext4_clear_inode_state(&iter->vfs_inode,
+ EXT4_STATE_FC_COMMITTING);
+- ext4_fc_reset_inode(&iter->vfs_inode);
++ if (iter->i_sync_tid <= tid)
++ ext4_fc_reset_inode(&iter->vfs_inode);
+ /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
+ smp_mb();
+ #if (BITS_PER_LONG < 64)
+@@ -1267,8 +1263,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
+ list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
+ &sbi->s_fc_q[FC_Q_MAIN]);
+
+- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
++ if (tid >= sbi->s_fc_ineligible_tid) {
++ sbi->s_fc_ineligible_tid = 0;
++ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
++ }
+
+ if (full)
+ sbi->s_fc_bytes = 0;
+@@ -1284,8 +1282,14 @@ struct dentry_info_args {
+ char *dname;
+ };
+
++/* Same as struct ext4_fc_tl, but uses native endianness fields */
++struct ext4_fc_tl_mem {
++ u16 fc_tag;
++ u16 fc_len;
++};
++
+ static inline void tl_to_darg(struct dentry_info_args *darg,
+- struct ext4_fc_tl *tl, u8 *val)
++ struct ext4_fc_tl_mem *tl, u8 *val)
+ {
+ struct ext4_fc_dentry_info fcd;
+
+@@ -1294,13 +1298,21 @@ static inline void tl_to_darg(struct dentry_info_args *darg,
+ darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
+ darg->ino = le32_to_cpu(fcd.fc_ino);
+ darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
+- darg->dname_len = le16_to_cpu(tl->fc_len) -
+- sizeof(struct ext4_fc_dentry_info);
++ darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
++}
++
++static inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val)
++{
++ struct ext4_fc_tl tl_disk;
++
++ memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN);
++ tl->fc_len = le16_to_cpu(tl_disk.fc_len);
++ tl->fc_tag = le16_to_cpu(tl_disk.fc_tag);
+ }
+
+ /* Unlink replay function */
+-static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
+- u8 *val)
++static int ext4_fc_replay_unlink(struct super_block *sb,
++ struct ext4_fc_tl_mem *tl, u8 *val)
+ {
+ struct inode *inode, *old_parent;
+ struct qstr entry;
+@@ -1317,19 +1329,19 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
+ inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
+
+ if (IS_ERR(inode)) {
+- jbd_debug(1, "Inode %d not found", darg.ino);
++ ext4_debug("Inode %d not found", darg.ino);
+ return 0;
+ }
+
+ old_parent = ext4_iget(sb, darg.parent_ino,
+ EXT4_IGET_NORMAL);
+ if (IS_ERR(old_parent)) {
+- jbd_debug(1, "Dir with inode %d not found", darg.parent_ino);
++ ext4_debug("Dir with inode %d not found", darg.parent_ino);
+ iput(inode);
+ return 0;
+ }
+
+- ret = __ext4_unlink(NULL, old_parent, &entry, inode);
++ ret = __ext4_unlink(old_parent, &entry, inode, NULL);
+ /* -ENOENT ok coz it might not exist anymore. */
+ if (ret == -ENOENT)
+ ret = 0;
+@@ -1349,21 +1361,21 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
+
+ dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
+ if (IS_ERR(dir)) {
+- jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
++ ext4_debug("Dir with inode %d not found.", darg->parent_ino);
+ dir = NULL;
+ goto out;
+ }
+
+ dentry_dir = d_obtain_alias(dir);
+ if (IS_ERR(dentry_dir)) {
+- jbd_debug(1, "Failed to obtain dentry");
++ ext4_debug("Failed to obtain dentry");
+ dentry_dir = NULL;
+ goto out;
+ }
+
+ dentry_inode = d_alloc(dentry_dir, &qstr_dname);
+ if (!dentry_inode) {
+- jbd_debug(1, "Inode dentry not created.");
++ ext4_debug("Inode dentry not created.");
+ ret = -ENOMEM;
+ goto out;
+ }
+@@ -1376,7 +1388,7 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
+ * could complete.
+ */
+ if (ret && ret != -EEXIST) {
+- jbd_debug(1, "Failed to link\n");
++ ext4_debug("Failed to link\n");
+ goto out;
+ }
+
+@@ -1397,8 +1409,8 @@ out:
+ }
+
+ /* Link replay function */
+-static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
+- u8 *val)
++static int ext4_fc_replay_link(struct super_block *sb,
++ struct ext4_fc_tl_mem *tl, u8 *val)
+ {
+ struct inode *inode;
+ struct dentry_info_args darg;
+@@ -1410,7 +1422,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
+
+ inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
+ if (IS_ERR(inode)) {
+- jbd_debug(1, "Inode not found.");
++ ext4_debug("Inode not found.");
+ return 0;
+ }
+
+@@ -1433,14 +1445,17 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
+ if (state->fc_modified_inodes[i] == ino)
+ return 0;
+ if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
++ int *fc_modified_inodes;
++
++ fc_modified_inodes = krealloc(state->fc_modified_inodes,
++ sizeof(int) * (state->fc_modified_inodes_size +
++ EXT4_FC_REPLAY_REALLOC_INCREMENT),
++ GFP_KERNEL);
++ if (!fc_modified_inodes)
++ return -ENOMEM;
++ state->fc_modified_inodes = fc_modified_inodes;
+ state->fc_modified_inodes_size +=
+ EXT4_FC_REPLAY_REALLOC_INCREMENT;
+- state->fc_modified_inodes = krealloc(
+- state->fc_modified_inodes, sizeof(int) *
+- state->fc_modified_inodes_size,
+- GFP_KERNEL);
+- if (!state->fc_modified_inodes)
+- return -ENOMEM;
+ }
+ state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
+ return 0;
+@@ -1449,15 +1464,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
+ /*
+ * Inode replay function
+ */
+-static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
+- u8 *val)
++static int ext4_fc_replay_inode(struct super_block *sb,
++ struct ext4_fc_tl_mem *tl, u8 *val)
+ {
+ struct ext4_fc_inode fc_inode;
+ struct ext4_inode *raw_inode;
+ struct ext4_inode *raw_fc_inode;
+ struct inode *inode = NULL;
+ struct ext4_iloc iloc;
+- int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
++ int inode_len, ino, ret, tag = tl->fc_tag;
+ struct ext4_extent_header *eh;
+
+ memcpy(&fc_inode, val, sizeof(fc_inode));
+@@ -1472,7 +1487,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
+ }
+ inode = NULL;
+
+- ext4_fc_record_modified_inode(sb, ino);
++ ret = ext4_fc_record_modified_inode(sb, ino);
++ if (ret)
++ goto out;
+
+ raw_fc_inode = (struct ext4_inode *)
+ (val + offsetof(struct ext4_fc_inode, fc_raw_inode));
+@@ -1480,7 +1497,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
+ if (ret)
+ goto out;
+
+- inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
++ inode_len = tl->fc_len - sizeof(struct ext4_fc_inode);
+ raw_inode = ext4_raw_inode(&iloc);
+
+ memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
+@@ -1515,7 +1532,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
+ /* Given that we just wrote the inode on disk, this SHOULD succeed. */
+ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
+ if (IS_ERR(inode)) {
+- jbd_debug(1, "Inode not found.");
++ ext4_debug("Inode not found.");
+ return -EFSCORRUPTED;
+ }
+
+@@ -1548,8 +1565,8 @@ out:
+ * inode for which we are trying to create a dentry here, should already have
+ * been replayed before we start here.
+ */
+-static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
+- u8 *val)
++static int ext4_fc_replay_create(struct super_block *sb,
++ struct ext4_fc_tl_mem *tl, u8 *val)
+ {
+ int ret = 0;
+ struct inode *inode = NULL;
+@@ -1568,7 +1585,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
+
+ inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
+ if (IS_ERR(inode)) {
+- jbd_debug(1, "inode %d not found.", darg.ino);
++ ext4_debug("inode %d not found.", darg.ino);
+ inode = NULL;
+ ret = -EINVAL;
+ goto out;
+@@ -1581,7 +1598,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
+ */
+ dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
+ if (IS_ERR(dir)) {
+- jbd_debug(1, "Dir %d not found.", darg.ino);
++ ext4_debug("Dir %d not found.", darg.ino);
+ goto out;
+ }
+ ret = ext4_init_new_dir(NULL, dir, inode);
+@@ -1603,26 +1620,36 @@ out:
+ }
+
+ /*
+- * Record physical disk regions which are in use as per fast commit area. Our
+- * simple replay phase allocator excludes these regions from allocation.
++ * Record physical disk regions which are in use as per fast commit area,
++ * and used by inodes during replay phase. Our simple replay phase
++ * allocator excludes these regions from allocation.
+ */
+-static int ext4_fc_record_regions(struct super_block *sb, int ino,
+- ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
++int ext4_fc_record_regions(struct super_block *sb, int ino,
++ ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
+ {
+ struct ext4_fc_replay_state *state;
+ struct ext4_fc_alloc_region *region;
+
+ state = &EXT4_SB(sb)->s_fc_replay_state;
++ /*
++ * during replay phase, the fc_regions_valid may not same as
++ * fc_regions_used, update it when do new additions.
++ */
++ if (replay && state->fc_regions_used != state->fc_regions_valid)
++ state->fc_regions_used = state->fc_regions_valid;
+ if (state->fc_regions_used == state->fc_regions_size) {
++ struct ext4_fc_alloc_region *fc_regions;
++
++ fc_regions = krealloc(state->fc_regions,
++ sizeof(struct ext4_fc_alloc_region) *
++ (state->fc_regions_size +
++ EXT4_FC_REPLAY_REALLOC_INCREMENT),
++ GFP_KERNEL);
++ if (!fc_regions)
++ return -ENOMEM;
+ state->fc_regions_size +=
+ EXT4_FC_REPLAY_REALLOC_INCREMENT;
+- state->fc_regions = krealloc(
+- state->fc_regions,
+- state->fc_regions_size *
+- sizeof(struct ext4_fc_alloc_region),
+- GFP_KERNEL);
+- if (!state->fc_regions)
+- return -ENOMEM;
++ state->fc_regions = fc_regions;
+ }
+ region = &state->fc_regions[state->fc_regions_used++];
+ region->ino = ino;
+@@ -1630,12 +1657,15 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
+ region->pblk = pblk;
+ region->len = len;
+
++ if (replay)
++ state->fc_regions_valid++;
++
+ return 0;
+ }
+
+ /* Replay add range tag */
+ static int ext4_fc_replay_add_range(struct super_block *sb,
+- struct ext4_fc_tl *tl, u8 *val)
++ struct ext4_fc_tl_mem *tl, u8 *val)
+ {
+ struct ext4_fc_add_range fc_add_ex;
+ struct ext4_extent newex, *ex;
+@@ -1656,11 +1686,13 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
+
+ inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
+ if (IS_ERR(inode)) {
+- jbd_debug(1, "Inode not found.");
++ ext4_debug("Inode not found.");
+ return 0;
+ }
+
+ ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
++ if (ret)
++ goto out;
+
+ start = le32_to_cpu(ex->ee_block);
+ start_pblk = ext4_ext_pblock(ex);
+@@ -1668,7 +1700,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
+
+ cur = start;
+ remaining = len;
+- jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
++ ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
+ start, start_pblk, len, ext4_ext_is_unwritten(ex),
+ inode->i_ino);
+
+@@ -1678,18 +1710,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
+ map.m_pblk = 0;
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+
+- if (ret < 0) {
+- iput(inode);
+- return 0;
+- }
++ if (ret < 0)
++ goto out;
+
+ if (ret == 0) {
+ /* Range is not mapped */
+ path = ext4_find_extent(inode, cur, NULL, 0);
+- if (IS_ERR(path)) {
+- iput(inode);
+- return 0;
+- }
++ if (IS_ERR(path))
++ goto out;
+ memset(&newex, 0, sizeof(newex));
+ newex.ee_block = cpu_to_le32(cur);
+ ext4_ext_store_pblock(
+@@ -1703,10 +1731,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
+ up_write((&EXT4_I(inode)->i_data_sem));
+ ext4_ext_drop_refs(path);
+ kfree(path);
+- if (ret) {
+- iput(inode);
+- return 0;
+- }
++ if (ret)
++ goto out;
+ goto next;
+ }
+
+@@ -1719,10 +1745,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
+ ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
+ ext4_ext_is_unwritten(ex),
+ start_pblk + cur - start);
+- if (ret) {
+- iput(inode);
+- return 0;
+- }
++ if (ret)
++ goto out;
+ /*
+ * Mark the old blocks as free since they aren't used
+ * anymore. We maintain an array of all the modified
+@@ -1737,15 +1761,13 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
+ }
+
+ /* Range is mapped and needs a state change */
+- jbd_debug(1, "Converting from %ld to %d %lld",
++ ext4_debug("Converting from %ld to %d %lld",
+ map.m_flags & EXT4_MAP_UNWRITTEN,
+ ext4_ext_is_unwritten(ex), map.m_pblk);
+ ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
+ ext4_ext_is_unwritten(ex), map.m_pblk);
+- if (ret) {
+- iput(inode);
+- return 0;
+- }
++ if (ret)
++ goto out;
+ /*
+ * We may have split the extent tree while toggling the state.
+ * Try to shrink the extent tree now.
+@@ -1757,14 +1779,15 @@ next:
+ }
+ ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
+ sb->s_blocksize_bits);
++out:
+ iput(inode);
+ return 0;
+ }
+
+ /* Replay DEL_RANGE tag */
+ static int
+-ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
+- u8 *val)
++ext4_fc_replay_del_range(struct super_block *sb,
++ struct ext4_fc_tl_mem *tl, u8 *val)
+ {
+ struct inode *inode;
+ struct ext4_fc_del_range lrange;
+@@ -1781,13 +1804,15 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
+
+ inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
+ if (IS_ERR(inode)) {
+- jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
++ ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino));
+ return 0;
+ }
+
+ ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
++ if (ret)
++ goto out;
+
+- jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
++ ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n",
+ inode->i_ino, le32_to_cpu(lrange.fc_lblk),
+ le32_to_cpu(lrange.fc_len));
+ while (remaining > 0) {
+@@ -1795,10 +1820,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
+ map.m_len = remaining;
+
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+- if (ret < 0) {
+- iput(inode);
+- return 0;
+- }
++ if (ret < 0)
++ goto out;
+ if (ret > 0) {
+ remaining -= ret;
+ cur += ret;
+@@ -1809,16 +1832,18 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
+ }
+ }
+
+- ret = ext4_punch_hole(inode,
+- le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits,
+- le32_to_cpu(lrange.fc_len) << sb->s_blocksize_bits);
++ down_write(&EXT4_I(inode)->i_data_sem);
++ ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
++ le32_to_cpu(lrange.fc_lblk) +
++ le32_to_cpu(lrange.fc_len) - 1);
++ up_write(&EXT4_I(inode)->i_data_sem);
+ if (ret)
+- jbd_debug(1, "ext4_punch_hole returned %d", ret);
++ goto out;
+ ext4_ext_replay_shrink_inode(inode,
+ i_size_read(inode) >> sb->s_blocksize_bits);
+ ext4_mark_inode_dirty(NULL, inode);
++out:
+ iput(inode);
+-
+ return 0;
+ }
+
+@@ -1836,7 +1861,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
+ inode = ext4_iget(sb, state->fc_modified_inodes[i],
+ EXT4_IGET_NORMAL);
+ if (IS_ERR(inode)) {
+- jbd_debug(1, "Inode %d not found.",
++ ext4_debug("Inode %d not found.",
+ state->fc_modified_inodes[i]);
+ continue;
+ }
+@@ -1902,6 +1927,33 @@ void ext4_fc_replay_cleanup(struct super_block *sb)
+ kfree(sbi->s_fc_replay_state.fc_modified_inodes);
+ }
+
++static bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi,
++ int tag, int len)
++{
++ switch (tag) {
++ case EXT4_FC_TAG_ADD_RANGE:
++ return len == sizeof(struct ext4_fc_add_range);
++ case EXT4_FC_TAG_DEL_RANGE:
++ return len == sizeof(struct ext4_fc_del_range);
++ case EXT4_FC_TAG_CREAT:
++ case EXT4_FC_TAG_LINK:
++ case EXT4_FC_TAG_UNLINK:
++ len -= sizeof(struct ext4_fc_dentry_info);
++ return len >= 1 && len <= EXT4_NAME_LEN;
++ case EXT4_FC_TAG_INODE:
++ len -= sizeof(struct ext4_fc_inode);
++ return len >= EXT4_GOOD_OLD_INODE_SIZE &&
++ len <= sbi->s_inode_size;
++ case EXT4_FC_TAG_PAD:
++ return true; /* padding can have any length */
++ case EXT4_FC_TAG_TAIL:
++ return len >= sizeof(struct ext4_fc_tail);
++ case EXT4_FC_TAG_HEAD:
++ return len == sizeof(struct ext4_fc_head);
++ }
++ return false;
++}
++
+ /*
+ * Recovery Scan phase handler
+ *
+@@ -1928,7 +1980,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
+ struct ext4_fc_replay_state *state;
+ int ret = JBD2_FC_REPLAY_CONTINUE;
+ struct ext4_fc_add_range ext;
+- struct ext4_fc_tl tl;
++ struct ext4_fc_tl_mem tl;
+ struct ext4_fc_tail tail;
+ __u8 *start, *end, *cur, *val;
+ struct ext4_fc_head head;
+@@ -1937,7 +1989,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
+ state = &sbi->s_fc_replay_state;
+
+ start = (u8 *)bh->b_data;
+- end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
++ end = start + journal->j_blocksize;
+
+ if (state->fc_replay_expected_off == 0) {
+ state->fc_cur_tag = 0;
+@@ -1958,19 +2010,26 @@ static int ext4_fc_replay_scan(journal_t *journal,
+ }
+
+ state->fc_replay_expected_off++;
+- for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
+- memcpy(&tl, cur, sizeof(tl));
+- val = cur + sizeof(tl);
+- jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
+- tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
+- switch (le16_to_cpu(tl.fc_tag)) {
++ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
++ cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
++ ext4_fc_get_tl(&tl, cur);
++ val = cur + EXT4_FC_TAG_BASE_LEN;
++ if (tl.fc_len > end - val ||
++ !ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len)) {
++ ret = state->fc_replay_num_tags ?
++ JBD2_FC_REPLAY_STOP : -ECANCELED;
++ goto out_err;
++ }
++ ext4_debug("Scan phase, tag:%s, blk %lld\n",
++ tag2str(tl.fc_tag), bh->b_blocknr);
++ switch (tl.fc_tag) {
+ case EXT4_FC_TAG_ADD_RANGE:
+ memcpy(&ext, val, sizeof(ext));
+ ex = (struct ext4_extent *)&ext.fc_ex;
+ ret = ext4_fc_record_regions(sb,
+ le32_to_cpu(ext.fc_ino),
+ le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
+- ext4_ext_get_actual_len(ex));
++ ext4_ext_get_actual_len(ex), 0);
+ if (ret < 0)
+ break;
+ ret = JBD2_FC_REPLAY_CONTINUE;
+@@ -1983,13 +2042,13 @@ static int ext4_fc_replay_scan(journal_t *journal,
+ case EXT4_FC_TAG_PAD:
+ state->fc_cur_tag++;
+ state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
+- sizeof(tl) + le16_to_cpu(tl.fc_len));
++ EXT4_FC_TAG_BASE_LEN + tl.fc_len);
+ break;
+ case EXT4_FC_TAG_TAIL:
+ state->fc_cur_tag++;
+ memcpy(&tail, val, sizeof(tail));
+ state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
+- sizeof(tl) +
++ EXT4_FC_TAG_BASE_LEN +
+ offsetof(struct ext4_fc_tail,
+ fc_crc));
+ if (le32_to_cpu(tail.fc_tid) == expected_tid &&
+@@ -2016,7 +2075,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
+ }
+ state->fc_cur_tag++;
+ state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
+- sizeof(tl) + le16_to_cpu(tl.fc_len));
++ EXT4_FC_TAG_BASE_LEN + tl.fc_len);
+ break;
+ default:
+ ret = state->fc_replay_num_tags ?
+@@ -2040,7 +2099,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
+ {
+ struct super_block *sb = journal->j_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+- struct ext4_fc_tl tl;
++ struct ext4_fc_tl_mem tl;
+ __u8 *start, *end, *cur, *val;
+ int ret = JBD2_FC_REPLAY_CONTINUE;
+ struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
+@@ -2056,7 +2115,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
+ sbi->s_mount_state |= EXT4_FC_REPLAY;
+ }
+ if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
+- jbd_debug(1, "Replay stops\n");
++ ext4_debug("Replay stops\n");
+ ext4_fc_set_bitmaps_and_counters(sb);
+ return 0;
+ }
+@@ -2069,21 +2128,22 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
+ #endif
+
+ start = (u8 *)bh->b_data;
+- end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
++ end = start + journal->j_blocksize;
+
+- for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
+- memcpy(&tl, cur, sizeof(tl));
+- val = cur + sizeof(tl);
++ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN;
++ cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) {
++ ext4_fc_get_tl(&tl, cur);
++ val = cur + EXT4_FC_TAG_BASE_LEN;
+
+ if (state->fc_replay_num_tags == 0) {
+ ret = JBD2_FC_REPLAY_STOP;
+ ext4_fc_set_bitmaps_and_counters(sb);
+ break;
+ }
+- jbd_debug(3, "Replay phase, tag:%s\n",
+- tag2str(le16_to_cpu(tl.fc_tag)));
++
++ ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag));
+ state->fc_replay_num_tags--;
+- switch (le16_to_cpu(tl.fc_tag)) {
++ switch (tl.fc_tag) {
+ case EXT4_FC_TAG_LINK:
+ ret = ext4_fc_replay_link(sb, &tl, val);
+ break;
+@@ -2104,19 +2164,18 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
+ break;
+ case EXT4_FC_TAG_PAD:
+ trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
+- le16_to_cpu(tl.fc_len), 0);
++ tl.fc_len, 0);
+ break;
+ case EXT4_FC_TAG_TAIL:
+- trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
+- le16_to_cpu(tl.fc_len), 0);
++ trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL,
++ 0, tl.fc_len, 0);
+ memcpy(&tail, val, sizeof(tail));
+ WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
+ break;
+ case EXT4_FC_TAG_HEAD:
+ break;
+ default:
+- trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
+- le16_to_cpu(tl.fc_len), 0);
++ trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0);
+ ret = -ECANCELED;
+ break;
+ }
+@@ -2140,17 +2199,17 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
+ journal->j_fc_cleanup_callback = ext4_fc_cleanup;
+ }
+
+-static const char *fc_ineligible_reasons[] = {
+- "Extended attributes changed",
+- "Cross rename",
+- "Journal flag changed",
+- "Insufficient memory",
+- "Swap boot",
+- "Resize",
+- "Dir renamed",
+- "Falloc range op",
+- "Data journalling",
+- "FC Commit Failed"
++static const char * const fc_ineligible_reasons[] = {
++ [EXT4_FC_REASON_XATTR] = "Extended attributes changed",
++ [EXT4_FC_REASON_CROSS_RENAME] = "Cross rename",
++ [EXT4_FC_REASON_JOURNAL_FLAG_CHANGE] = "Journal flag changed",
++ [EXT4_FC_REASON_NOMEM] = "Insufficient memory",
++ [EXT4_FC_REASON_SWAP_BOOT] = "Swap boot",
++ [EXT4_FC_REASON_RESIZE] = "Resize",
++ [EXT4_FC_REASON_RENAME_DIR] = "Dir renamed",
++ [EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op",
++ [EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling",
++ [EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename",
+ };
+
+ int ext4_fc_info_show(struct seq_file *seq, void *v)
+@@ -2166,7 +2225,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v)
+ "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
+ stats->fc_num_commits, stats->fc_ineligible_commits,
+ stats->fc_numblks,
+- div_u64(sbi->s_fc_avg_commit_time, 1000));
++ div_u64(stats->s_fc_avg_commit_time, 1000));
+ seq_puts(seq, "Ineligible reasons:\n");
+ for (i = 0; i < EXT4_FC_REASON_MAX; i++)
+ seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
+@@ -2185,3 +2244,8 @@ int __init ext4_fc_init_dentry_cache(void)
+
+ return 0;
+ }
++
++void ext4_fc_destroy_dentry_cache(void)
++{
++ kmem_cache_destroy(ext4_fc_dentry_cachep);
++}
+diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
+index 937c381b4c85e..2cbd317eda26b 100644
+--- a/fs/ext4/fast_commit.h
++++ b/fs/ext4/fast_commit.h
+@@ -58,7 +58,7 @@ struct ext4_fc_dentry_info {
+ __u8 fc_dname[0];
+ };
+
+-/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */
++/* Value structure for EXT4_FC_TAG_INODE. */
+ struct ext4_fc_inode {
+ __le32 fc_ino;
+ __u8 fc_raw_inode[0];
+@@ -70,22 +70,23 @@ struct ext4_fc_tail {
+ __le32 fc_crc;
+ };
+
++/* Tag base length */
++#define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl))
++
++/*
++ * Fast commit status codes
++ */
++enum {
++ EXT4_FC_STATUS_OK = 0,
++ EXT4_FC_STATUS_INELIGIBLE,
++ EXT4_FC_STATUS_SKIPPED,
++ EXT4_FC_STATUS_FAILED,
++};
++
+ /*
+- * Fast commit reason codes
++ * Fast commit ineligiblity reasons:
+ */
+ enum {
+- /*
+- * Commit status codes:
+- */
+- EXT4_FC_REASON_OK = 0,
+- EXT4_FC_REASON_INELIGIBLE,
+- EXT4_FC_REASON_ALREADY_COMMITTED,
+- EXT4_FC_REASON_FC_START_FAILED,
+- EXT4_FC_REASON_FC_FAILED,
+-
+- /*
+- * Fast commit ineligiblity reasons:
+- */
+ EXT4_FC_REASON_XATTR = 0,
+ EXT4_FC_REASON_CROSS_RENAME,
+ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
+@@ -95,7 +96,7 @@ enum {
+ EXT4_FC_REASON_RENAME_DIR,
+ EXT4_FC_REASON_FALLOC_RANGE,
+ EXT4_FC_REASON_INODE_JOURNAL_DATA,
+- EXT4_FC_COMMIT_FAILED,
++ EXT4_FC_REASON_ENCRYPTED_FILENAME,
+ EXT4_FC_REASON_MAX
+ };
+
+@@ -117,7 +118,10 @@ struct ext4_fc_stats {
+ unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
+ unsigned long fc_num_commits;
+ unsigned long fc_ineligible_commits;
++ unsigned long fc_failed_commits;
++ unsigned long fc_skipped_commits;
+ unsigned long fc_numblks;
++ u64 s_fc_avg_commit_time;
+ };
+
+ #define EXT4_FC_REPLAY_REALLOC_INCREMENT 4
+diff --git a/fs/ext4/file.c b/fs/ext4/file.c
+index ac0e11bbb4450..4704fe627c4e2 100644
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -74,7 +74,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ return generic_file_read_iter(iocb, to);
+ }
+
+- ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0);
++ ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0);
+ inode_unlock_shared(inode);
+
+ file_accessed(iocb->ki_filp);
+@@ -259,7 +259,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+- ext4_fc_start_update(inode);
+ inode_lock(inode);
+ ret = ext4_write_checks(iocb, from);
+ if (ret <= 0)
+@@ -271,7 +270,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
+
+ out:
+ inode_unlock(inode);
+- ext4_fc_stop_update(inode);
+ if (likely(ret > 0)) {
+ iocb->ki_pos += ret;
+ ret = generic_write_sync(iocb, ret);
+@@ -528,6 +526,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ ret = -EAGAIN;
+ goto out;
+ }
++ /*
++ * Make sure inline data cannot be created anymore since we are going
++ * to allocate blocks for DIO. We know the inode does not have any
++ * inline data now because ext4_dio_supported() checked for that.
++ */
++ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+
+ offset = iocb->ki_pos;
+ count = ret;
+@@ -552,9 +556,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ goto out;
+ }
+
+- ext4_fc_start_update(inode);
+ ret = ext4_orphan_add(handle, inode);
+- ext4_fc_stop_update(inode);
+ if (ret) {
+ ext4_journal_stop(handle);
+ goto out;
+@@ -566,7 +568,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ if (ilock_shared)
+ iomap_ops = &ext4_iomap_overwrite_ops;
+ ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
+- (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0);
++ (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
++ 0);
+ if (ret == -ENOTBLK)
+ ret = 0;
+
+diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
+index 4493ef0c715e9..cdf9bfe10137f 100644
+--- a/fs/ext4/fsmap.c
++++ b/fs/ext4/fsmap.c
+@@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
+ keys[0].fmr_physical = bofs;
+ if (keys[1].fmr_physical >= eofs)
+ keys[1].fmr_physical = eofs - 1;
++ if (keys[1].fmr_physical < keys[0].fmr_physical)
++ return 0;
+ start_fsb = keys[0].fmr_physical;
+ end_fsb = keys[1].fmr_physical;
+
+diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
+index f34f4176c1e7c..3b03e6b061db1 100644
+--- a/fs/ext4/hash.c
++++ b/fs/ext4/hash.c
+@@ -277,7 +277,11 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len,
+ }
+ default:
+ hinfo->hash = 0;
+- return -1;
++ hinfo->minor_hash = 0;
++ ext4_warning(dir->i_sb,
++ "invalid/unsupported hash tree version %u",
++ hinfo->hash_version);
++ return -EINVAL;
+ }
+ hash = hash & ~1;
+ if (hash == (EXT4_HTREE_EOF_32BIT << 1))
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+index f73e5eb43eae1..745d781da8915 100644
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -91,7 +91,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
+
+ if (buffer_verified(bh))
+ return 0;
+- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
++ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ return -EFSCORRUPTED;
+
+ ext4_lock_group(sb, block_group);
+@@ -293,7 +293,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
+ }
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ grp = ext4_get_group_info(sb, block_group);
+- if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
++ if (!grp || unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
+ fatal = -EFSCORRUPTED;
+ goto error_return;
+ }
+@@ -510,7 +510,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
+ goto fallback;
+ }
+
+- max_dirs = ndirs / ngroups + inodes_per_group / 16;
++ max_dirs = ndirs / ngroups + inodes_per_group*flex_size / 16;
+ min_inodes = avefreei - inodes_per_group*flex_size / 4;
+ if (min_inodes < 1)
+ min_inodes = 1;
+@@ -1048,7 +1048,7 @@ got_group:
+ * Skip groups with already-known suspicious inode
+ * tables
+ */
+- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
++ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ goto next_group;
+ }
+
+@@ -1186,6 +1186,10 @@ got:
+
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ grp = ext4_get_group_info(sb, group);
++ if (!grp) {
++ err = -EFSCORRUPTED;
++ goto out;
++ }
+ down_read(&grp->alloc_sem); /*
+ * protect vs itable
+ * lazyinit
+@@ -1529,7 +1533,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+ }
+
+ gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+- if (!gdp)
++ if (!gdp || !grp)
+ goto out;
+
+ /*
+diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
+index 89efa78ed4b21..d795ccef04171 100644
+--- a/fs/ext4/indirect.c
++++ b/fs/ext4/indirect.c
+@@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
+ struct super_block *sb = inode->i_sb;
+ Indirect *p = chain;
+ struct buffer_head *bh;
++ unsigned int key;
+ int ret = -EIO;
+
+ *err = 0;
+@@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
+ if (!p->key)
+ goto no_block;
+ while (--depth) {
+- bh = sb_getblk(sb, le32_to_cpu(p->key));
++ key = le32_to_cpu(p->key);
++ if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) {
++ /* the block was out of range */
++ ret = -EFSCORRUPTED;
++ goto failure;
++ }
++ bh = sb_getblk(sb, key);
+ if (unlikely(!bh)) {
+ ret = -ENOMEM;
+ goto failure;
+@@ -460,7 +467,7 @@ static int ext4_splice_branch(handle_t *handle,
+ * the new i_size. But that is not done here - it is done in
+ * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
+ */
+- jbd_debug(5, "splicing indirect only\n");
++ ext4_debug("splicing indirect only\n");
+ BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
+ if (err)
+@@ -472,7 +479,7 @@ static int ext4_splice_branch(handle_t *handle,
+ err = ext4_mark_inode_dirty(handle, ar->inode);
+ if (unlikely(err))
+ goto err_out;
+- jbd_debug(5, "splicing direct\n");
++ ext4_debug("splicing direct\n");
+ }
+ return err;
+
+@@ -644,6 +651,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+ count = ar.len;
++
++ /*
++ * Update reserved blocks/metadata blocks after successful block
++ * allocation which had been deferred till now.
++ */
++ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
++ ext4_da_update_reserve_space(inode, count, 1);
++
+ got_it:
+ map->m_flags |= EXT4_MAP_MAPPED;
+ map->m_pblk = le32_to_cpu(chain[depth-1].key);
+diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
+index 39a1ab129fdc9..6fe665de1b203 100644
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -33,8 +33,12 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
+ struct ext4_xattr_ibody_header *header;
+ struct ext4_xattr_entry *entry;
+ struct ext4_inode *raw_inode;
++ void *end;
+ int free, min_offs;
+
++ if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
++ return 0;
++
+ min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
+ EXT4_GOOD_OLD_INODE_SIZE -
+ EXT4_I(inode)->i_extra_isize -
+@@ -53,14 +57,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
+ raw_inode = ext4_raw_inode(iloc);
+ header = IHDR(inode, raw_inode);
+ entry = IFIRST(header);
++ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+
+ /* Compute min_offs. */
+- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
++ while (!IS_LAST_ENTRY(entry)) {
++ void *next = EXT4_XATTR_NEXT(entry);
++
++ if (next >= end) {
++ EXT4_ERROR_INODE(inode,
++ "corrupt xattr in inline inode");
++ return 0;
++ }
+ if (!entry->e_value_inum && entry->e_value_size) {
+ size_t offs = le16_to_cpu(entry->e_value_offs);
+ if (offs < min_offs)
+ min_offs = offs;
+ }
++ entry = next;
+ }
+ free = min_offs -
+ ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
+@@ -155,7 +168,6 @@ int ext4_find_inline_data_nolock(struct inode *inode)
+ (void *)ext4_raw_inode(&is.iloc));
+ EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
+ le32_to_cpu(is.s.here->e_value_size);
+- ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ }
+ out:
+ brelse(is.iloc.bh);
+@@ -348,7 +360,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
+
+ error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
+ value, len);
+- if (error == -ENODATA)
++ if (error < 0)
+ goto out;
+
+ BUFFER_TRACE(is.iloc.bh, "get_write_access");
+@@ -1133,7 +1145,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
+ struct ext4_iloc *iloc,
+ void *buf, int inline_size)
+ {
+- ext4_create_inline_data(handle, inode, inline_size);
++ int ret;
++
++ ret = ext4_create_inline_data(handle, inode, inline_size);
++ if (ret) {
++ ext4_msg(inode->i_sb, KERN_EMERG,
++ "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)",
++ inode->i_ino, ret);
++ return;
++ }
+ ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
+ ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ }
+@@ -1175,6 +1195,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
+ ext4_initialize_dirent_tail(dir_block,
+ inode->i_sb->s_blocksize);
+ set_buffer_uptodate(dir_block);
++ unlock_buffer(dir_block);
+ err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
+ if (err)
+ return err;
+@@ -1249,6 +1270,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
+ if (!S_ISDIR(inode->i_mode)) {
+ memcpy(data_bh->b_data, buf, inline_size);
+ set_buffer_uptodate(data_bh);
++ unlock_buffer(data_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode, data_bh);
+ } else {
+@@ -1256,7 +1278,6 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
+ buf, inline_size);
+ }
+
+- unlock_buffer(data_bh);
+ out_restore:
+ if (error)
+ ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
+@@ -1780,19 +1801,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
+ void *inline_pos;
+ unsigned int offset;
+ struct ext4_dir_entry_2 *de;
+- bool ret = true;
++ bool ret = false;
+
+ err = ext4_get_inode_loc(dir, &iloc);
+ if (err) {
+ EXT4_ERROR_INODE_ERR(dir, -err,
+ "error %d getting inode %lu block",
+ err, dir->i_ino);
+- return true;
++ return false;
+ }
+
+ down_read(&EXT4_I(dir)->xattr_sem);
+ if (!ext4_has_inline_data(dir)) {
+ *has_inline_data = 0;
++ ret = true;
+ goto out;
+ }
+
+@@ -1801,7 +1823,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
+ ext4_warning(dir->i_sb,
+ "bad inline directory (dir #%lu) - no `..'",
+ dir->i_ino);
+- ret = true;
+ goto out;
+ }
+
+@@ -1820,16 +1841,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
+ dir->i_ino, le32_to_cpu(de->inode),
+ le16_to_cpu(de->rec_len), de->name_len,
+ inline_size);
+- ret = true;
+ goto out;
+ }
+ if (le32_to_cpu(de->inode)) {
+- ret = false;
+ goto out;
+ }
+ offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
+ }
+
++ ret = true;
+ out:
+ up_read(&EXT4_I(dir)->xattr_sem);
+ brelse(iloc.bh);
+@@ -2004,6 +2024,18 @@ int ext4_convert_inline_data(struct inode *inode)
+ if (!ext4_has_inline_data(inode)) {
+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ return 0;
++ } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
++ /*
++ * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is
++ * cleared. This means we are in the middle of moving of
++ * inline data to delay allocated block. Just force writeout
++ * here to finish conversion.
++ */
++ error = filemap_flush(inode->i_mapping);
++ if (error)
++ return error;
++ if (!ext4_has_inline_data(inode))
++ return 0;
+ }
+
+ needed_blocks = ext4_writepage_trans_blocks(inode);
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 0f06305167d5a..64a783f221052 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -179,6 +179,8 @@ void ext4_evict_inode(struct inode *inode)
+
+ trace_ext4_evict_inode(inode);
+
++ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
++ ext4_evict_ea_inode(inode);
+ if (inode->i_nlink) {
+ /*
+ * When journalling data dirty buffers are tracked only in the
+@@ -223,13 +225,13 @@ void ext4_evict_inode(struct inode *inode)
+
+ /*
+ * For inodes with journalled data, transaction commit could have
+- * dirtied the inode. Flush worker is ignoring it because of I_FREEING
+- * flag but we still need to remove the inode from the writeback lists.
++ * dirtied the inode. And for inodes with dioread_nolock, unwritten
++ * extents converting worker could merge extents and also have dirtied
++ * the inode. Flush worker is ignoring it because of I_FREEING flag but
++ * we still need to remove the inode from the writeback lists.
+ */
+- if (!list_empty_careful(&inode->i_io_list)) {
+- WARN_ON_ONCE(!ext4_should_journal_data(inode));
++ if (!list_empty_careful(&inode->i_io_list))
+ inode_io_list_del(inode);
+- }
+
+ /*
+ * Protect us against freezing - iput() caller didn't have to have any
+@@ -336,8 +338,14 @@ stop_handle:
+ ext4_xattr_inode_array_free(ea_inode_array);
+ return;
+ no_delete:
++ /*
++ * Check out some where else accidentally dirty the evicting inode,
++ * which may probably cause inode use-after-free issues later.
++ */
++ WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list));
++
+ if (!list_empty(&EXT4_I(inode)->i_fc_list))
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
+ ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
+ }
+
+@@ -646,16 +654,6 @@ found:
+ */
+ ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
+ }
+-
+- /*
+- * Update reserved blocks/metadata blocks after successful
+- * block allocation which had been deferred till now. We don't
+- * support fallocate for non extent files. So we can update
+- * reserve space here.
+- */
+- if ((retval > 0) &&
+- (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
+- ext4_da_update_reserve_space(inode, retval, 1);
+ }
+
+ if (retval > 0) {
+@@ -741,10 +739,11 @@ out_sem:
+ if (ret)
+ return ret;
+ }
+- ext4_fc_track_range(handle, inode, map->m_lblk,
+- map->m_lblk + map->m_len - 1);
+ }
+-
++ if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN ||
++ map->m_flags & EXT4_MAP_MAPPED))
++ ext4_fc_track_range(handle, inode, map->m_lblk,
++ map->m_lblk + map->m_len - 1);
+ if (retval < 0)
+ ext_debug(inode, "failed with err %d\n", retval);
+ return retval;
+@@ -1174,6 +1173,13 @@ retry_grab:
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ return -ENOMEM;
++ /*
++ * The same as page allocation, we prealloc buffer heads before
++ * starting the handle.
++ */
++ if (!page_has_buffers(page))
++ create_empty_buffers(page, inode->i_sb->s_blocksize, 0);
++
+ unlock_page(page);
+
+ retry_journal:
+@@ -1288,7 +1294,8 @@ static int ext4_write_end(struct file *file,
+
+ trace_ext4_write_end(inode, pos, len, copied);
+
+- if (ext4_has_inline_data(inode))
++ if (ext4_has_inline_data(inode) &&
++ ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
+ return ext4_write_inline_data_end(inode, pos, len, copied, page);
+
+ copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+@@ -1559,7 +1566,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
+ ext4_lblk_t start, last;
+ start = index << (PAGE_SHIFT - inode->i_blkbits);
+ last = end << (PAGE_SHIFT - inode->i_blkbits);
++
++ /*
++ * avoid racing with extent status tree scans made by
++ * ext4_insert_delayed_block()
++ */
++ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_es_remove_extent(inode, start, last - start + 1);
++ up_write(&EXT4_I(inode)->i_data_sem);
+ }
+
+ pagevec_init(&pvec);
+@@ -1711,16 +1725,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ }
+
+ /*
+- * the buffer head associated with a delayed and not unwritten
+- * block found in the extent status cache must contain an
+- * invalid block number and have its BH_New and BH_Delay bits
+- * set, reflecting the state assigned when the block was
+- * initially delayed allocated
++ * Delayed extent could be allocated by fallocate.
++ * So we need to check it.
+ */
+- if (ext4_es_is_delonly(&es)) {
+- BUG_ON(bh->b_blocknr != invalid_block);
+- BUG_ON(!buffer_new(bh));
+- BUG_ON(!buffer_delay(bh));
++ if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
++ map_bh(bh, inode->i_sb, invalid_block);
++ set_buffer_new(bh);
++ set_buffer_delay(bh);
+ return 0;
+ }
+
+@@ -1847,30 +1858,16 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
+ return 0;
+ }
+
+-static int bget_one(handle_t *handle, struct inode *inode,
+- struct buffer_head *bh)
+-{
+- get_bh(bh);
+- return 0;
+-}
+-
+-static int bput_one(handle_t *handle, struct inode *inode,
+- struct buffer_head *bh)
+-{
+- put_bh(bh);
+- return 0;
+-}
+-
+ static int __ext4_journalled_writepage(struct page *page,
+ unsigned int len)
+ {
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+- struct buffer_head *page_bufs = NULL;
+ handle_t *handle = NULL;
+ int ret = 0, err = 0;
+ int inline_data = ext4_has_inline_data(inode);
+ struct buffer_head *inode_bh = NULL;
++ loff_t size;
+
+ ClearPageChecked(page);
+
+@@ -1880,14 +1877,6 @@ static int __ext4_journalled_writepage(struct page *page,
+ inode_bh = ext4_journalled_write_inline_data(inode, len, page);
+ if (inode_bh == NULL)
+ goto out;
+- } else {
+- page_bufs = page_buffers(page);
+- if (!page_bufs) {
+- BUG();
+- goto out;
+- }
+- ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
+- NULL, bget_one);
+ }
+ /*
+ * We need to release the page lock before we start the
+@@ -1908,7 +1897,8 @@ static int __ext4_journalled_writepage(struct page *page,
+
+ lock_page(page);
+ put_page(page);
+- if (page->mapping != mapping) {
++ size = i_size_read(inode);
++ if (page->mapping != mapping || page_offset(page) > size) {
+ /* The page got truncated from under us */
+ ext4_journal_stop(handle);
+ ret = 0;
+@@ -1918,6 +1908,13 @@ static int __ext4_journalled_writepage(struct page *page,
+ if (inline_data) {
+ ret = ext4_mark_inode_dirty(handle, inode);
+ } else {
++ struct buffer_head *page_bufs = page_buffers(page);
++
++ if (page->index == size >> PAGE_SHIFT)
++ len = size & ~PAGE_MASK;
++ else
++ len = PAGE_SIZE;
++
+ ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
+ NULL, do_journal_get_write_access);
+
+@@ -1938,9 +1935,6 @@ static int __ext4_journalled_writepage(struct page *page,
+ out:
+ unlock_page(page);
+ out_no_pagelock:
+- if (!inline_data && page_bufs)
+- ext4_walk_page_buffers(NULL, inode, page_bufs, 0, len,
+- NULL, bput_one);
+ brelse(inode_bh);
+ return ret;
+ }
+@@ -2011,6 +2005,15 @@ static int ext4_writepage(struct page *page,
+ else
+ len = PAGE_SIZE;
+
++ /* Should never happen but for bugs in other kernel subsystems */
++ if (!page_has_buffers(page)) {
++ ext4_warning_inode(inode,
++ "page %lu does not have buffers attached", page->index);
++ ClearPageDirty(page);
++ unlock_page(page);
++ return 0;
++ }
++
+ page_bufs = page_buffers(page);
+ /*
+ * We cannot do block allocation or other extent handling in this
+@@ -2614,6 +2617,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
+ wait_on_page_writeback(page);
+ BUG_ON(PageWriteback(page));
+
++ /*
++ * Should never happen but for buggy code in
++ * other subsystems that call
++ * set_page_dirty() without properly warning
++ * the file system first. See [1] for more
++ * information.
++ *
++ * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
++ */
++ if (!page_has_buffers(page)) {
++ ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index);
++ ClearPageDirty(page);
++ unlock_page(page);
++ continue;
++ }
++
+ if (mpd->map.m_len == 0)
+ mpd->first_page = page->index;
+ mpd->next_page = page->index + 1;
+@@ -3036,6 +3055,9 @@ static int ext4_da_write_end(struct file *file,
+ ext4_has_inline_data(inode))
+ return ext4_write_inline_data_end(inode, pos, len, copied, page);
+
++ if (unlikely(copied < len) && !PageUptodate(page))
++ copied = 0;
++
+ start = pos & (PAGE_SIZE - 1);
+ end = start + copied - 1;
+
+@@ -3124,13 +3146,15 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
+ {
+ struct inode *inode = mapping->host;
+ journal_t *journal;
++ sector_t ret = 0;
+ int err;
+
++ inode_lock_shared(inode);
+ /*
+ * We can get here for an inline file via the FIBMAP ioctl
+ */
+ if (ext4_has_inline_data(inode))
+- return 0;
++ goto out;
+
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+ test_opt(inode->i_sb, DELALLOC)) {
+@@ -3169,10 +3193,14 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
+ jbd2_journal_unlock_updates(journal);
+
+ if (err)
+- return 0;
++ goto out;
+ }
+
+- return iomap_bmap(mapping, block, &ext4_iomap_ops);
++ ret = iomap_bmap(mapping, block, &ext4_iomap_ops);
++
++out:
++ inode_unlock_shared(inode);
++ return ret;
+ }
+
+ static int ext4_readpage(struct file *file, struct page *page)
+@@ -3440,7 +3468,7 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
+ */
+ flags &= ~IOMAP_WRITE;
+ ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
+- WARN_ON_ONCE(iomap->type != IOMAP_MAPPED);
++ WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
+ return ret;
+ }
+
+@@ -3933,27 +3961,20 @@ int ext4_break_layouts(struct inode *inode)
+ * Returns: 0 on success or negative on failure
+ */
+
+-int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
++int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
+ {
++ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ ext4_lblk_t first_block, stop_block;
+ struct address_space *mapping = inode->i_mapping;
+- loff_t first_block_offset, last_block_offset;
++ loff_t first_block_offset, last_block_offset, max_length;
++ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ handle_t *handle;
+ unsigned int credits;
+ int ret = 0, ret2 = 0;
+
+ trace_ext4_punch_hole(inode, offset, length, 0);
+
+- ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+- if (ext4_has_inline_data(inode)) {
+- filemap_invalidate_lock(mapping);
+- ret = ext4_convert_inline_data(inode);
+- filemap_invalidate_unlock(mapping);
+- if (ret)
+- return ret;
+- }
+-
+ /*
+ * Write out all dirty pages to avoid race conditions
+ * Then release them.
+@@ -3981,6 +4002,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ offset;
+ }
+
++ /*
++ * For punch hole the length + offset needs to be within one block
++ * before last range. Adjust the length if it goes beyond that limit.
++ */
++ max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
++ if (offset + length > max_length)
++ length = max_length - offset;
++
+ if (offset & (sb->s_blocksize - 1) ||
+ (offset + length) & (sb->s_blocksize - 1)) {
+ /*
+@@ -3996,6 +4025,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ inode_dio_wait(inode);
+
++ ret = file_modified(file);
++ if (ret)
++ goto out_mutex;
++
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+@@ -4165,7 +4198,8 @@ int ext4_truncate(struct inode *inode)
+
+ /* If we zero-out tail of the page, we have to create jinode for jbd2 */
+ if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
+- if (ext4_inode_attach_jinode(inode) < 0)
++ err = ext4_inode_attach_jinode(inode);
++ if (err)
+ goto out_trace;
+ }
+
+@@ -4266,9 +4300,17 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
+ inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+ inode_offset = ((ino - 1) %
+ EXT4_INODES_PER_GROUP(sb));
+- block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
+ iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+
++ block = ext4_inode_table(sb, gdp);
++ if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) ||
++ (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) {
++ ext4_error(sb, "Invalid inode table block %llu in "
++ "block_group %u", block, iloc->block_group);
++ return -EFSCORRUPTED;
++ }
++ block += (inode_offset / inodes_per_block);
++
+ bh = sb_getblk(sb, block);
+ if (unlikely(!bh))
+ return -ENOMEM;
+@@ -4374,7 +4416,7 @@ has_buffer:
+ static int __ext4_get_inode_loc_noinmem(struct inode *inode,
+ struct ext4_iloc *iloc)
+ {
+- ext4_fsblk_t err_blk;
++ ext4_fsblk_t err_blk = 0;
+ int ret;
+
+ ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0,
+@@ -4389,7 +4431,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode,
+
+ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
+ {
+- ext4_fsblk_t err_blk;
++ ext4_fsblk_t err_blk = 0;
+ int ret;
+
+ /* We have all inode data except xattrs in memory here. */
+@@ -4498,11 +4540,15 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
+ __le32 *magic = (void *)raw_inode +
+ EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
+
+- if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
+- EXT4_INODE_SIZE(inode->i_sb) &&
++ if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
+ *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
++ int err;
++
+ ext4_set_inode_state(inode, EXT4_STATE_XATTR);
+- return ext4_find_inline_data_nolock(inode);
++ err = ext4_find_inline_data_nolock(inode);
++ if (!err && ext4_has_inline_data(inode))
++ ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
++ return err;
+ } else
+ EXT4_I(inode)->i_inline_off = 0;
+ return 0;
+@@ -4536,6 +4582,24 @@ static inline u64 ext4_inode_peek_iversion(const struct inode *inode)
+ return inode_peek_iversion(inode);
+ }
+
++static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
++
++{
++ if (flags & EXT4_IGET_EA_INODE) {
++ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
++ return "missing EA_INODE flag";
++ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
++ EXT4_I(inode)->i_file_acl)
++ return "ea_inode with extended attributes";
++ } else {
++ if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
++ return "unexpected EA_INODE flag";
++ }
++ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
++ return "unexpected bad inode w/o EXT4_IGET_BAD";
++ return NULL;
++}
++
+ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line)
+@@ -4545,6 +4609,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ struct ext4_inode_info *ei;
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ struct inode *inode;
++ const char *err_str;
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+ long ret;
+ loff_t size;
+@@ -4572,8 +4637,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+- if (!(inode->i_state & I_NEW))
++ if (!(inode->i_state & I_NEW)) {
++ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
++ ext4_error_inode(inode, function, line, 0, err_str);
++ iput(inode);
++ return ERR_PTR(-EFSCORRUPTED);
++ }
+ return inode;
++ }
+
+ ei = EXT4_I(inode);
+ iloc.bh = NULL;
+@@ -4583,13 +4654,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ goto bad_inode;
+ raw_inode = ext4_raw_inode(&iloc);
+
+- if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
+- ext4_error_inode(inode, function, line, 0,
+- "iget: root inode unallocated");
+- ret = -EFSCORRUPTED;
+- goto bad_inode;
+- }
+-
+ if ((flags & EXT4_IGET_HANDLE) &&
+ (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
+ ret = -ESTALE;
+@@ -4662,11 +4726,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ * NeilBrown 1999oct15
+ */
+ if (inode->i_nlink == 0) {
+- if ((inode->i_mode == 0 ||
++ if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
+ !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
+ ino != EXT4_BOOT_LOADER_INO) {
+- /* this inode is deleted */
+- ret = -ESTALE;
++ /* this inode is deleted or unallocated */
++ if (flags & EXT4_IGET_SPECIAL) {
++ ext4_error_inode(inode, function, line, 0,
++ "iget: special inode unallocated");
++ ret = -EFSCORRUPTED;
++ } else
++ ret = -ESTALE;
+ goto bad_inode;
+ }
+ /* The only unlinked inodes we let through here have
+@@ -4844,8 +4913,13 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
+ ext4_error_inode(inode, function, line, 0,
+ "casefold flag without casefold feature");
+- brelse(iloc.bh);
++ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
++ ext4_error_inode(inode, function, line, 0, err_str);
++ ret = -EFSCORRUPTED;
++ goto bad_inode;
++ }
+
++ brelse(iloc.bh);
+ unlock_new_inode(inode);
+ return inode;
+
+@@ -5166,7 +5240,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
+
+ if (EXT4_SB(inode->i_sb)->s_journal) {
+ if (ext4_journal_current_handle()) {
+- jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
++ ext4_debug("called recursively, non-PF_MEMALLOC!\n");
+ dump_stack();
+ return -EIO;
+ }
+@@ -5309,7 +5383,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ if (error)
+ return error;
+ }
+- ext4_fc_start_update(inode);
++
+ if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
+ (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
+ handle_t *handle;
+@@ -5333,7 +5407,6 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+
+ if (error) {
+ ext4_journal_stop(handle);
+- ext4_fc_stop_update(inode);
+ return error;
+ }
+ /* Update corresponding info in inode so that everything is in
+@@ -5345,7 +5418,6 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ error = ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+ if (unlikely(error)) {
+- ext4_fc_stop_update(inode);
+ return error;
+ }
+ }
+@@ -5353,18 +5425,17 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ if (attr->ia_valid & ATTR_SIZE) {
+ handle_t *handle;
+ loff_t oldsize = inode->i_size;
++ loff_t old_disksize;
+ int shrink = (attr->ia_size < inode->i_size);
+
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (attr->ia_size > sbi->s_bitmap_maxbytes) {
+- ext4_fc_stop_update(inode);
+ return -EFBIG;
+ }
+ }
+ if (!S_ISREG(inode->i_mode)) {
+- ext4_fc_stop_update(inode);
+ return -EINVAL;
+ }
+
+@@ -5416,8 +5487,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ ext4_fc_track_range(handle, inode,
+ (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
+ inode->i_sb->s_blocksize_bits,
+- (oldsize > 0 ? oldsize - 1 : 0) >>
+- inode->i_sb->s_blocksize_bits);
++ EXT_MAX_BLOCKS - 1);
+ else
+ ext4_fc_track_range(
+ handle, inode,
+@@ -5427,6 +5497,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ inode->i_sb->s_blocksize_bits);
+
+ down_write(&EXT4_I(inode)->i_data_sem);
++ old_disksize = EXT4_I(inode)->i_disksize;
+ EXT4_I(inode)->i_disksize = attr->ia_size;
+ rc = ext4_mark_inode_dirty(handle, inode);
+ if (!error)
+@@ -5438,6 +5509,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ */
+ if (!error)
+ i_size_write(inode, attr->ia_size);
++ else
++ EXT4_I(inode)->i_disksize = old_disksize;
+ up_write(&EXT4_I(inode)->i_data_sem);
+ ext4_journal_stop(handle);
+ if (error)
+@@ -5488,7 +5561,6 @@ err_out:
+ ext4_std_error(inode->i_sb, error);
+ if (!error)
+ error = rc;
+- ext4_fc_stop_update(inode);
+ return error;
+ }
+
+@@ -5673,7 +5745,12 @@ int ext4_mark_iloc_dirty(handle_t *handle,
+ }
+ ext4_fc_track_inode(handle, inode);
+
+- if (IS_I_VERSION(inode))
++ /*
++ * ea_inodes are using i_version for storing reference count, don't
++ * mess with it
++ */
++ if (IS_I_VERSION(inode) &&
++ !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+ inode_inc_iversion(inode);
+
+ /* the do_update_inode consumes one bh->b_count */
+@@ -5751,6 +5828,14 @@ static int __ext4_expand_extra_isize(struct inode *inode,
+ return 0;
+ }
+
++ /*
++ * We may need to allocate external xattr block so we need quotas
++ * initialized. Here we can be called with various locks held so we
++ * cannot affort to initialize quotas ourselves. So just bail.
++ */
++ if (dquot_initialize_needed(inode))
++ return -EAGAIN;
++
+ /* try to expand with EAs present */
+ error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
+ raw_inode, handle);
+@@ -5989,7 +6074,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
+ return PTR_ERR(handle);
+
+ ext4_fc_mark_ineligible(inode->i_sb,
+- EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
++ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
+ err = ext4_mark_inode_dirty(handle, inode);
+ ext4_handle_sync(handle);
+ ext4_journal_stop(handle);
+diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
+index 606dee9e08a32..18002b0a908ce 100644
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -124,7 +124,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
+ blkcnt_t blocks;
+ unsigned short bytes;
+
+- inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
++ inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO,
++ EXT4_IGET_SPECIAL | EXT4_IGET_BAD);
+ if (IS_ERR(inode_bl))
+ return PTR_ERR(inode_bl);
+ ei_bl = EXT4_I(inode_bl);
+@@ -169,12 +170,12 @@ static long swap_inode_boot_loader(struct super_block *sb,
+ err = -EINVAL;
+ goto err_out;
+ }
+- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
+
+ /* Protect extent tree against block allocations via delalloc */
+ ext4_double_down_write_data_sem(inode, inode_bl);
+
+- if (inode_bl->i_nlink == 0) {
++ if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) {
+ /* this inode has never been used as a BOOT_LOADER */
+ set_nlink(inode_bl, 1);
+ i_uid_write(inode_bl, 0);
+@@ -183,6 +184,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
+ ei_bl->i_flags = 0;
+ inode_set_iversion(inode_bl, 1);
+ i_size_write(inode_bl, 0);
++ EXT4_I(inode_bl)->i_disksize = inode_bl->i_size;
+ inode_bl->i_mode = S_IFREG;
+ if (ext4_has_feature_extents(sb)) {
+ ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
+@@ -252,7 +254,6 @@ revert:
+
+ err_out1:
+ ext4_journal_stop(handle);
+- ext4_fc_stop_ineligible(sb);
+ ext4_double_up_write_data_sem(inode, inode_bl);
+
+ err_out:
+@@ -492,6 +493,10 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
+ if (ext4_is_quota_file(inode))
+ return err;
+
++ err = dquot_initialize(inode);
++ if (err)
++ return err;
++
+ err = ext4_get_inode_loc(inode, &iloc);
+ if (err)
+ return err;
+@@ -507,10 +512,6 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
+ brelse(iloc.bh);
+ }
+
+- err = dquot_initialize(inode);
+- if (err)
+- return err;
+-
+ handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+ EXT4_QUOTA_INIT_BLOCKS(sb) +
+ EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+@@ -558,6 +559,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ __u32 flags;
++ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+@@ -576,7 +578,9 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
+
+ switch (flags) {
+ case EXT4_GOING_FLAGS_DEFAULT:
+- freeze_bdev(sb->s_bdev);
++ ret = freeze_bdev(sb->s_bdev);
++ if (ret)
++ return ret;
+ set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+ thaw_bdev(sb->s_bdev);
+ break;
+@@ -743,7 +747,6 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns,
+ u32 flags = fa->flags;
+ int err = -EOPNOTSUPP;
+
+- ext4_fc_start_update(inode);
+ if (flags & ~EXT4_FL_USER_VISIBLE)
+ goto out;
+
+@@ -764,7 +767,6 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns,
+ goto out;
+ err = ext4_ioctl_setproject(inode, fa->fsx_projid);
+ out:
+- ext4_fc_stop_update(inode);
+ return err;
+ }
+
+@@ -1076,7 +1078,7 @@ mext_out:
+
+ err = ext4_resize_fs(sb, n_blocks_count);
+ if (EXT4_SB(sb)->s_journal) {
+- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+@@ -1117,8 +1119,6 @@ resizefs_out:
+ sizeof(range)))
+ return -EFAULT;
+
+- range.minlen = max((unsigned int)range.minlen,
+- q->limits.discard_granularity);
+ ret = ext4_trim_fs(sb, &range);
+ if (ret < 0)
+ return ret;
+@@ -1273,13 +1273,7 @@ resizefs_out:
+
+ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+- long ret;
+-
+- ext4_fc_start_update(file_inode(filp));
+- ret = __ext4_ioctl(filp, cmd, arg);
+- ext4_fc_stop_update(file_inode(filp));
+-
+- return ret;
++ return __ext4_ioctl(filp, cmd, arg);
+ }
+
+ #ifdef CONFIG_COMPAT
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 72bfac2d6dce9..7e7153c673c0d 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -745,6 +745,8 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
+ MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
+
+ grp = ext4_get_group_info(sb, e4b->bd_group);
++ if (!grp)
++ return NULL;
+ list_for_each(cur, &grp->bb_prealloc_list) {
+ ext4_group_t groupnr;
+ struct ext4_prealloc_space *pa;
+@@ -1000,7 +1002,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
+ return 0;
+ if (ac->ac_criteria >= 2)
+ return 0;
+- if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
++ if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
+ return 0;
+ return 1;
+ }
+@@ -1009,8 +1011,9 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
+ * Return next linear group for allocation. If linear traversal should not be
+ * performed, this function just returns the same group
+ */
+-static int
+-next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
++static ext4_group_t
++next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
++ ext4_group_t ngroups)
+ {
+ if (!should_optimize_scan(ac))
+ goto inc_and_return;
+@@ -1052,8 +1055,10 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
+ {
+ *new_cr = ac->ac_criteria;
+
+- if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
++ if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
++ *group = next_linear_group(ac, *group, ngroups);
+ return;
++ }
+
+ if (*new_cr == 0) {
+ ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
+@@ -1078,23 +1083,25 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int i;
+
+- if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
++ for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
++ if (grp->bb_counters[i] > 0)
++ break;
++ /* No need to move between order lists? */
++ if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
++ i == grp->bb_largest_free_order) {
++ grp->bb_largest_free_order = i;
++ return;
++ }
++
++ if (grp->bb_largest_free_order >= 0) {
+ write_lock(&sbi->s_mb_largest_free_orders_locks[
+ grp->bb_largest_free_order]);
+ list_del_init(&grp->bb_largest_free_order_node);
+ write_unlock(&sbi->s_mb_largest_free_orders_locks[
+ grp->bb_largest_free_order]);
+ }
+- grp->bb_largest_free_order = -1; /* uninit */
+-
+- for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
+- if (grp->bb_counters[i] > 0) {
+- grp->bb_largest_free_order = i;
+- break;
+- }
+- }
+- if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
+- grp->bb_largest_free_order >= 0 && grp->bb_free) {
++ grp->bb_largest_free_order = i;
++ if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
+ write_lock(&sbi->s_mb_largest_free_orders_locks[
+ grp->bb_largest_free_order]);
+ list_add_tail(&grp->bb_largest_free_order_node,
+@@ -1106,9 +1113,9 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+
+ static noinline_for_stack
+ void ext4_mb_generate_buddy(struct super_block *sb,
+- void *buddy, void *bitmap, ext4_group_t group)
++ void *buddy, void *bitmap, ext4_group_t group,
++ struct ext4_group_info *grp)
+ {
+- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+ ext4_grpblk_t i = 0;
+@@ -1229,6 +1236,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+ break;
+
+ grinfo = ext4_get_group_info(sb, group);
++ if (!grinfo)
++ continue;
+ /*
+ * If page is uptodate then we came here after online resize
+ * which added some new uninitialized group info structs, so
+@@ -1294,6 +1303,10 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+ group, page->index, i * blocksize);
+ trace_ext4_mb_buddy_bitmap_load(sb, group);
+ grinfo = ext4_get_group_info(sb, group);
++ if (!grinfo) {
++ err = -EFSCORRUPTED;
++ goto out;
++ }
+ grinfo->bb_fragments = 0;
+ memset(grinfo->bb_counters, 0,
+ sizeof(*grinfo->bb_counters) *
+@@ -1304,7 +1317,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+ ext4_lock_group(sb, group);
+ /* init the buddy */
+ memset(data, 0xff, blocksize);
+- ext4_mb_generate_buddy(sb, data, incore, group);
++ ext4_mb_generate_buddy(sb, data, incore, group, grinfo);
+ ext4_unlock_group(sb, group);
+ incore = NULL;
+ } else {
+@@ -1418,6 +1431,9 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
+ might_sleep();
+ mb_debug(sb, "init group %u\n", group);
+ this_grp = ext4_get_group_info(sb, group);
++ if (!this_grp)
++ return -EFSCORRUPTED;
++
+ /*
+ * This ensures that we don't reinit the buddy cache
+ * page which map to the group from which we are already
+@@ -1492,6 +1508,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+
+ blocks_per_page = PAGE_SIZE / sb->s_blocksize;
+ grp = ext4_get_group_info(sb, group);
++ if (!grp)
++ return -EFSCORRUPTED;
+
+ e4b->bd_blkbits = sb->s_blocksize_bits;
+ e4b->bd_info = grp;
+@@ -2202,7 +2220,9 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
+ struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
+ struct ext4_free_extent ex;
+
+- if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
++ if (!grp)
++ return -EFSCORRUPTED;
++ if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY)))
+ return 0;
+ if (grp->bb_free == 0)
+ return 0;
+@@ -2426,7 +2446,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
+
+ BUG_ON(cr < 0 || cr >= 4);
+
+- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
++ if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ return false;
+
+ free = grp->bb_free;
+@@ -2495,6 +2515,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
+ ext4_grpblk_t free;
+ int ret = 0;
+
++ if (!grp)
++ return -EFSCORRUPTED;
+ if (sbi->s_mb_stats)
+ atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
+ if (should_lock) {
+@@ -2575,7 +2597,7 @@ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
+ * prefetch once, so we avoid getblk() call, which can
+ * be expensive.
+ */
+- if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
++ if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ EXT4_MB_GRP_NEED_INIT(grp) &&
+ ext4_free_group_clusters(sb, gdp) > 0 &&
+ !(ext4_has_group_desc_csum(sb) &&
+@@ -2619,7 +2641,7 @@ void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+ group--;
+ grp = ext4_get_group_info(sb, group);
+
+- if (EXT4_MB_GRP_NEED_INIT(grp) &&
++ if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) &&
+ ext4_free_group_clusters(sb, gdp) > 0 &&
+ !(ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+@@ -2633,7 +2655,7 @@ static noinline_for_stack int
+ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+ {
+ ext4_group_t prefetch_grp = 0, ngroups, group, i;
+- int cr = -1;
++ int cr = -1, new_cr;
+ int err = 0, first_err = 0;
+ unsigned int nr = 0, prefetch_ios = 0;
+ struct ext4_sb_info *sbi;
+@@ -2708,13 +2730,11 @@ repeat:
+ ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
+ prefetch_grp = group;
+
+- for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
+- i++) {
+- int ret = 0, new_cr;
++ for (i = 0, new_cr = cr; i < ngroups; i++,
++ ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
++ int ret = 0;
+
+ cond_resched();
+-
+- ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
+ if (new_cr != cr) {
+ cr = new_cr;
+ goto repeat;
+@@ -2881,6 +2901,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
+ sizeof(struct ext4_group_info);
+
+ grinfo = ext4_get_group_info(sb, group);
++ if (!grinfo)
++ return 0;
+ /* Load the group info in memory only if not already loaded. */
+ if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
+ err = ext4_mb_load_buddy(sb, group, &e4b);
+@@ -2891,7 +2913,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
+ buddy_loaded = 1;
+ }
+
+- memcpy(&sg, ext4_get_group_info(sb, group), i);
++ memcpy(&sg, grinfo, i);
+
+ if (buddy_loaded)
+ ext4_mb_unload_buddy(&e4b);
+@@ -3263,8 +3285,12 @@ static int ext4_mb_init_backend(struct super_block *sb)
+
+ err_freebuddy:
+ cachep = get_groupinfo_cache(sb->s_blocksize_bits);
+- while (i-- > 0)
+- kmem_cache_free(cachep, ext4_get_group_info(sb, i));
++ while (i-- > 0) {
++ struct ext4_group_info *grp = ext4_get_group_info(sb, i);
++
++ if (grp)
++ kmem_cache_free(cachep, grp);
++ }
+ i = sbi->s_group_info_size;
+ rcu_read_lock();
+ group_info = rcu_dereference(sbi->s_group_info);
+@@ -3560,6 +3586,8 @@ int ext4_mb_release(struct super_block *sb)
+ for (i = 0; i < ngroups; i++) {
+ cond_resched();
+ grinfo = ext4_get_group_info(sb, i);
++ if (!grinfo)
++ continue;
+ mb_group_bb_bitmap_free(grinfo);
+ ext4_lock_group(sb, i);
+ count = ext4_mb_cleanup_pa(grinfo);
+@@ -3899,69 +3927,95 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_group_t group;
+ ext4_grpblk_t blkoff;
+- int i, clen, err;
++ int i, err;
+ int already;
++ unsigned int clen, clen_changed, thisgrp_len;
+
+- clen = EXT4_B2C(sbi, len);
++ while (len > 0) {
++ ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
+
+- ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
+- bitmap_bh = ext4_read_block_bitmap(sb, group);
+- if (IS_ERR(bitmap_bh)) {
+- err = PTR_ERR(bitmap_bh);
+- bitmap_bh = NULL;
+- goto out_err;
+- }
++ /*
++ * Check to see if we are freeing blocks across a group
++ * boundary.
++ * In case of flex_bg, this can happen that (block, len) may
++ * span across more than one group. In that case we need to
++ * get the corresponding group metadata to work with.
++ * For this we have goto again loop.
++ */
++ thisgrp_len = min_t(unsigned int, (unsigned int)len,
++ EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
++ clen = EXT4_NUM_B2C(sbi, thisgrp_len);
+
+- err = -EIO;
+- gdp = ext4_get_group_desc(sb, group, &gdp_bh);
+- if (!gdp)
+- goto out_err;
++ bitmap_bh = ext4_read_block_bitmap(sb, group);
++ if (IS_ERR(bitmap_bh)) {
++ err = PTR_ERR(bitmap_bh);
++ bitmap_bh = NULL;
++ break;
++ }
+
+- ext4_lock_group(sb, group);
+- already = 0;
+- for (i = 0; i < clen; i++)
+- if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
+- already++;
++ err = -EIO;
++ gdp = ext4_get_group_desc(sb, group, &gdp_bh);
++ if (!gdp)
++ break;
+
+- if (state)
+- ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
+- else
+- mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
+- if (ext4_has_group_desc_csum(sb) &&
+- (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+- ext4_free_group_clusters_set(sb, gdp,
+- ext4_free_clusters_after_init(sb,
+- group, gdp));
+- }
+- if (state)
+- clen = ext4_free_group_clusters(sb, gdp) - clen + already;
+- else
+- clen = ext4_free_group_clusters(sb, gdp) + clen - already;
++ ext4_lock_group(sb, group);
++ already = 0;
++ for (i = 0; i < clen; i++)
++ if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
++ !state)
++ already++;
++
++ clen_changed = clen - already;
++ if (state)
++ ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
++ else
++ mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
++ if (ext4_has_group_desc_csum(sb) &&
++ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
++ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
++ ext4_free_group_clusters_set(sb, gdp,
++ ext4_free_clusters_after_init(sb, group, gdp));
++ }
++ if (state)
++ clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
++ else
++ clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
+
+- ext4_free_group_clusters_set(sb, gdp, clen);
+- ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
+- ext4_group_desc_csum_set(sb, group, gdp);
++ ext4_free_group_clusters_set(sb, gdp, clen);
++ ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
++ ext4_group_desc_csum_set(sb, group, gdp);
+
+- ext4_unlock_group(sb, group);
++ ext4_unlock_group(sb, group);
+
+- if (sbi->s_log_groups_per_flex) {
+- ext4_group_t flex_group = ext4_flex_group(sbi, group);
++ if (sbi->s_log_groups_per_flex) {
++ ext4_group_t flex_group = ext4_flex_group(sbi, group);
++ struct flex_groups *fg = sbi_array_rcu_deref(sbi,
++ s_flex_groups, flex_group);
+
+- atomic64_sub(len,
+- &sbi_array_rcu_deref(sbi, s_flex_groups,
+- flex_group)->free_clusters);
++ if (state)
++ atomic64_sub(clen_changed, &fg->free_clusters);
++ else
++ atomic64_add(clen_changed, &fg->free_clusters);
++
++ }
++
++ err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
++ if (err)
++ break;
++ sync_dirty_buffer(bitmap_bh);
++ err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
++ sync_dirty_buffer(gdp_bh);
++ if (err)
++ break;
++
++ block += thisgrp_len;
++ len -= thisgrp_len;
++ brelse(bitmap_bh);
++ BUG_ON(len < 0);
+ }
+
+- err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
+ if (err)
+- goto out_err;
+- sync_dirty_buffer(bitmap_bh);
+- err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
+- sync_dirty_buffer(gdp_bh);
+-
+-out_err:
+- brelse(bitmap_bh);
++ brelse(bitmap_bh);
+ }
+
+ /*
+@@ -3992,6 +4046,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ struct ext4_allocation_request *ar)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
++ struct ext4_super_block *es = sbi->s_es;
+ int bsbits, max;
+ ext4_lblk_t end;
+ loff_t size, start_off;
+@@ -4073,6 +4128,15 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ size = size >> bsbits;
+ start = start_off >> bsbits;
+
++ /*
++ * For tiny groups (smaller than 8MB) the chosen allocation
++ * alignment may be larger than group size. Make sure the
++ * alignment does not move allocation to a different group which
++ * makes mballoc fail assertions later.
++ */
++ start = max(start, rounddown(ac->ac_o_ex.fe_logical,
++ (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
++
+ /* don't cover already allocated blocks in selected range */
+ if (ar->pleft && start <= ar->lleft) {
+ size -= ar->lleft + 1 - start;
+@@ -4163,18 +4227,21 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
+
+ /* define goal start in order to merge */
+- if (ar->pright && (ar->lright == (start + size))) {
++ if (ar->pright && (ar->lright == (start + size)) &&
++ ar->pright >= size &&
++ ar->pright - size >= le32_to_cpu(es->s_first_data_block)) {
+ /* merge to the right */
+ ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
+- &ac->ac_f_ex.fe_group,
+- &ac->ac_f_ex.fe_start);
++ &ac->ac_g_ex.fe_group,
++ &ac->ac_g_ex.fe_start);
+ ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
+ }
+- if (ar->pleft && (ar->lleft + 1 == start)) {
++ if (ar->pleft && (ar->lleft + 1 == start) &&
++ ar->pleft + 1 < ext4_blocks_count(es)) {
+ /* merge to the left */
+ ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
+- &ac->ac_f_ex.fe_group,
+- &ac->ac_f_ex.fe_start);
++ &ac->ac_g_ex.fe_group,
++ &ac->ac_g_ex.fe_start);
+ ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
+ }
+
+@@ -4267,6 +4334,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
+ BUG_ON(start < pa->pa_pstart);
+ BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
+ BUG_ON(pa->pa_free < len);
++ BUG_ON(ac->ac_b_ex.fe_len <= 0);
+ pa->pa_free -= len;
+
+ mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
+@@ -4429,6 +4497,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+ struct ext4_free_data *entry;
+
+ grp = ext4_get_group_info(sb, group);
++ if (!grp)
++ return;
+ n = rb_first(&(grp->bb_free_root));
+
+ while (n) {
+@@ -4456,6 +4526,9 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ int preallocated = 0;
+ int len;
+
++ if (!grp)
++ return;
++
+ /* all form of preallocation discards first load group,
+ * so the only competing code is preallocation use.
+ * we don't need any locking here
+@@ -4591,10 +4664,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+ pa = ac->ac_pa;
+
+ if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
+- int winl;
+- int wins;
+- int win;
+- int offs;
++ int new_bex_start;
++ int new_bex_end;
+
+ /* we can't allocate as much as normalizer wants.
+ * so, found space must get proper lstart
+@@ -4602,26 +4673,40 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
+ BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
+
+- /* we're limited by original request in that
+- * logical block must be covered any way
+- * winl is window we can move our chunk within */
+- winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
++ /*
++ * Use the below logic for adjusting best extent as it keeps
++ * fragmentation in check while ensuring logical range of best
++ * extent doesn't overflow out of goal extent:
++ *
++ * 1. Check if best ex can be kept at end of goal and still
++ * cover original start
++ * 2. Else, check if best ex can be kept at start of goal and
++ * still cover original start
++ * 3. Else, keep the best ex at start of original request.
++ */
++ new_bex_end = ac->ac_g_ex.fe_logical +
++ EXT4_C2B(sbi, ac->ac_g_ex.fe_len);
++ new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
++ if (ac->ac_o_ex.fe_logical >= new_bex_start)
++ goto adjust_bex;
+
+- /* also, we should cover whole original request */
+- wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
++ new_bex_start = ac->ac_g_ex.fe_logical;
++ new_bex_end =
++ new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
++ if (ac->ac_o_ex.fe_logical < new_bex_end)
++ goto adjust_bex;
+
+- /* the smallest one defines real window */
+- win = min(winl, wins);
++ new_bex_start = ac->ac_o_ex.fe_logical;
++ new_bex_end =
++ new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+
+- offs = ac->ac_o_ex.fe_logical %
+- EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+- if (offs && offs < win)
+- win = offs;
++adjust_bex:
++ ac->ac_b_ex.fe_logical = new_bex_start;
+
+- ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
+- EXT4_NUM_B2C(sbi, win);
+ BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
+ BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
++ BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical +
++ EXT4_C2B(sbi, ac->ac_g_ex.fe_len)));
+ }
+
+ /* preallocation can change ac_b_ex, thus we store actually
+@@ -4647,6 +4732,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+
+ ei = EXT4_I(ac->ac_inode);
+ grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
++ if (!grp)
++ return;
+
+ pa->pa_obj_lock = &ei->i_prealloc_lock;
+ pa->pa_inode = ac->ac_inode;
+@@ -4700,6 +4787,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+ atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
+
+ grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
++ if (!grp)
++ return;
+ lg = ac->ac_lg;
+ BUG_ON(lg == NULL);
+
+@@ -4795,7 +4884,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+ trace_ext4_mb_release_group_pa(sb, pa);
+ BUG_ON(pa->pa_deleted == 0);
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
++ if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
++ ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
++ e4b->bd_group, group, pa->pa_pstart);
++ return 0;
++ }
+ mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
+ atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
+@@ -4814,7 +4907,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+ */
+ static noinline_for_stack int
+ ext4_mb_discard_group_preallocations(struct super_block *sb,
+- ext4_group_t group, int needed)
++ ext4_group_t group, int *busy)
+ {
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct buffer_head *bitmap_bh = NULL;
+@@ -4822,9 +4915,10 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
+ struct list_head list;
+ struct ext4_buddy e4b;
+ int err;
+- int busy = 0;
+- int free, free_total = 0;
++ int free = 0;
+
++ if (!grp)
++ return 0;
+ mb_debug(sb, "discard preallocation for group %u\n", group);
+ if (list_empty(&grp->bb_prealloc_list))
+ goto out_dbg;
+@@ -4846,19 +4940,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
+ goto out_dbg;
+ }
+
+- if (needed == 0)
+- needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
+-
+ INIT_LIST_HEAD(&list);
+-repeat:
+- free = 0;
+ ext4_lock_group(sb, group);
+ list_for_each_entry_safe(pa, tmp,
+ &grp->bb_prealloc_list, pa_group_list) {
+ spin_lock(&pa->pa_lock);
+ if (atomic_read(&pa->pa_count)) {
+ spin_unlock(&pa->pa_lock);
+- busy = 1;
++ *busy = 1;
+ continue;
+ }
+ if (pa->pa_deleted) {
+@@ -4898,22 +4987,13 @@ repeat:
+ call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+ }
+
+- free_total += free;
+-
+- /* if we still need more blocks and some PAs were used, try again */
+- if (free_total < needed && busy) {
+- ext4_unlock_group(sb, group);
+- cond_resched();
+- busy = 0;
+- goto repeat;
+- }
+ ext4_unlock_group(sb, group);
+ ext4_mb_unload_buddy(&e4b);
+ put_bh(bitmap_bh);
+ out_dbg:
+ mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
+- free_total, group, grp->bb_free);
+- return free_total;
++ free, group, grp->bb_free);
++ return free;
+ }
+
+ /*
+@@ -5076,6 +5156,9 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
+ struct ext4_prealloc_space *pa;
+ ext4_grpblk_t start;
+ struct list_head *cur;
++
++ if (!grp)
++ continue;
+ ext4_lock_group(sb, i);
+ list_for_each(cur, &grp->bb_prealloc_list) {
+ pa = list_entry(cur, struct ext4_prealloc_space,
+@@ -5147,6 +5230,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
+ struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ int bsbits = ac->ac_sb->s_blocksize_bits;
+ loff_t size, isize;
++ bool inode_pa_eligible, group_pa_eligible;
+
+ if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
+ return;
+@@ -5154,25 +5238,27 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
+ if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
+ return;
+
++ group_pa_eligible = sbi->s_mb_group_prealloc > 0;
++ inode_pa_eligible = true;
+ size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+ isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
+ >> bsbits;
+
++ /* No point in using inode preallocation for closed files */
+ if ((size == isize) && !ext4_fs_is_busy(sbi) &&
+- !inode_is_open_for_write(ac->ac_inode)) {
+- ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
+- return;
+- }
+-
+- if (sbi->s_mb_group_prealloc <= 0) {
+- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+- return;
+- }
++ !inode_is_open_for_write(ac->ac_inode))
++ inode_pa_eligible = false;
+
+- /* don't use group allocation for large files */
+ size = max(size, isize);
+- if (size > sbi->s_mb_stream_request) {
+- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
++ /* Don't use group allocation for large files */
++ if (size > sbi->s_mb_stream_request)
++ group_pa_eligible = false;
++
++ if (!group_pa_eligible) {
++ if (inode_pa_eligible)
++ ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
++ else
++ ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
+ return;
+ }
+
+@@ -5455,13 +5541,24 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
+ {
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ int ret;
+- int freed = 0;
++ int freed = 0, busy = 0;
++ int retry = 0;
+
+ trace_ext4_mb_discard_preallocations(sb, needed);
++
++ if (needed == 0)
++ needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
++ repeat:
+ for (i = 0; i < ngroups && needed > 0; i++) {
+- ret = ext4_mb_discard_group_preallocations(sb, i, needed);
++ ret = ext4_mb_discard_group_preallocations(sb, i, &busy);
+ freed += ret;
+ needed -= ret;
++ cond_resched();
++ }
++
++ if (needed > 0 && busy && ++retry < 3) {
++ busy = 0;
++ goto repeat;
+ }
+
+ return freed;
+@@ -5508,6 +5605,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
+ ext4_fsblk_t block = 0;
+ unsigned int inquota = 0;
+ unsigned int reserv_clstrs = 0;
++ int retries = 0;
+ u64 seq;
+
+ might_sleep();
+@@ -5610,7 +5708,8 @@ repeat:
+ ar->len = ac->ac_b_ex.fe_len;
+ }
+ } else {
+- if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
++ if (++retries < 3 &&
++ ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
+ goto repeat;
+ /*
+ * If block allocation fails then the pa allocated above
+@@ -5757,7 +5856,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
+ struct super_block *sb = ar->inode->i_sb;
+ ext4_group_t group;
+ ext4_grpblk_t blkoff;
+- int i = sb->s_blocksize;
++ ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
++ ext4_grpblk_t i = 0;
+ ext4_fsblk_t goal, block;
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+@@ -5779,19 +5879,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
+ ext4_get_group_no_and_offset(sb,
+ max(ext4_group_first_block_no(sb, group), goal),
+ NULL, &blkoff);
+- i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
++ while (1) {
++ i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
+ blkoff);
++ if (i >= max)
++ break;
++ if (ext4_fc_replay_check_excluded(sb,
++ ext4_group_first_block_no(sb, group) + i)) {
++ blkoff = i + 1;
++ } else
++ break;
++ }
+ brelse(bitmap_bh);
+- if (i >= sb->s_blocksize)
+- continue;
+- if (ext4_fc_replay_check_excluded(sb,
+- ext4_group_first_block_no(sb, group) + i))
+- continue;
+- break;
++ if (i < max)
++ break;
+ }
+
+- if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
++ if (group >= ext4_get_groups_count(sb) || i >= max) {
++ *errp = -ENOSPC;
+ return 0;
++ }
+
+ block = ext4_group_first_block_no(sb, group) + i;
+ ext4_mb_mark_bb(sb, block, 1, 1);
+@@ -5842,7 +5949,8 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
+ }
+
+ /**
+- * ext4_free_blocks() -- Free given blocks and update quota
++ * ext4_mb_clear_bb() -- helper function for freeing blocks.
++ * Used by ext4_free_blocks()
+ * @handle: handle for this transaction
+ * @inode: inode
+ * @bh: optional buffer of the block to be freed
+@@ -5850,13 +5958,14 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
+ * @count: number of blocks to be freed
+ * @flags: flags used by ext4_free_blocks
+ */
+-void ext4_free_blocks(handle_t *handle, struct inode *inode,
+- struct buffer_head *bh, ext4_fsblk_t block,
+- unsigned long count, int flags)
++static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
++ ext4_fsblk_t block, unsigned long count,
++ int flags)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+ struct super_block *sb = inode->i_sb;
+ struct ext4_group_desc *gdp;
++ struct ext4_group_info *grp;
+ unsigned int overflow;
+ ext4_grpblk_t bit;
+ struct buffer_head *gd_bh;
+@@ -5869,86 +5978,21 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
+
+ sbi = EXT4_SB(sb);
+
+- if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+- ext4_free_blocks_simple(inode, block, count);
+- return;
+- }
+-
+- might_sleep();
+- if (bh) {
+- if (block)
+- BUG_ON(block != bh->b_blocknr);
+- else
+- block = bh->b_blocknr;
+- }
+-
+ if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
+ !ext4_inode_block_valid(inode, block, count)) {
+- ext4_error(sb, "Freeing blocks not in datazone - "
+- "block = %llu, count = %lu", block, count);
++ ext4_error(sb, "Freeing blocks in system zone - "
++ "Block = %llu, count = %lu", block, count);
++ /* err = 0. ext4_std_error should be a no op */
+ goto error_return;
+ }
+-
+- ext4_debug("freeing block %llu\n", block);
+- trace_ext4_free_blocks(inode, block, count, flags);
+-
+- if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
+- BUG_ON(count > 1);
+-
+- ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
+- inode, bh, block);
+- }
+-
+- /*
+- * If the extent to be freed does not begin on a cluster
+- * boundary, we need to deal with partial clusters at the
+- * beginning and end of the extent. Normally we will free
+- * blocks at the beginning or the end unless we are explicitly
+- * requested to avoid doing so.
+- */
+- overflow = EXT4_PBLK_COFF(sbi, block);
+- if (overflow) {
+- if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
+- overflow = sbi->s_cluster_ratio - overflow;
+- block += overflow;
+- if (count > overflow)
+- count -= overflow;
+- else
+- return;
+- } else {
+- block -= overflow;
+- count += overflow;
+- }
+- }
+- overflow = EXT4_LBLK_COFF(sbi, count);
+- if (overflow) {
+- if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
+- if (count > overflow)
+- count -= overflow;
+- else
+- return;
+- } else
+- count += sbi->s_cluster_ratio - overflow;
+- }
+-
+- if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
+- int i;
+- int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
+-
+- for (i = 0; i < count; i++) {
+- cond_resched();
+- if (is_metadata)
+- bh = sb_find_get_block(inode->i_sb, block + i);
+- ext4_forget(handle, is_metadata, inode, bh, block + i);
+- }
+- }
++ flags |= EXT4_FREE_BLOCKS_VALIDATED;
+
+ do_more:
+ overflow = 0;
+ ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
+
+- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
+- ext4_get_group_info(sb, block_group))))
++ grp = ext4_get_group_info(sb, block_group);
++ if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ return;
+
+ /*
+@@ -5959,6 +6003,8 @@ do_more:
+ overflow = EXT4_C2B(sbi, bit) + count -
+ EXT4_BLOCKS_PER_GROUP(sb);
+ count -= overflow;
++ /* The range changed so it's no longer validated */
++ flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
+ }
+ count_clusters = EXT4_NUM_B2C(sbi, count);
+ bitmap_bh = ext4_read_block_bitmap(sb, block_group);
+@@ -5973,13 +6019,8 @@ do_more:
+ goto error_return;
+ }
+
+- if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
+- in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
+- in_range(block, ext4_inode_table(sb, gdp),
+- sbi->s_itb_per_group) ||
+- in_range(block + count - 1, ext4_inode_table(sb, gdp),
+- sbi->s_itb_per_group)) {
+-
++ if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
++ !ext4_inode_block_valid(inode, block, count)) {
+ ext4_error(sb, "Freeing blocks in system zone - "
+ "Block = %llu, count = %lu", block, count);
+ /* err = 0. ext4_std_error should be a no op */
+@@ -6046,11 +6087,11 @@ do_more:
+ * them with group lock_held
+ */
+ if (test_opt(sb, DISCARD)) {
+- err = ext4_issue_discard(sb, block_group, bit, count,
+- NULL);
++ err = ext4_issue_discard(sb, block_group, bit,
++ count_clusters, NULL);
+ if (err && err != -EOPNOTSUPP)
+ ext4_msg(sb, KERN_WARNING, "discard request in"
+- " group:%d block:%d count:%lu failed"
++ " group:%u block:%d count:%lu failed"
+ " with %d", block_group, bit, count,
+ err);
+ } else
+@@ -6102,6 +6143,8 @@ do_more:
+ block += count;
+ count = overflow;
+ put_bh(bitmap_bh);
++ /* The range changed so it's no longer validated */
++ flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
+ goto do_more;
+ }
+ error_return:
+@@ -6110,6 +6153,109 @@ error_return:
+ return;
+ }
+
++/**
++ * ext4_free_blocks() -- Free given blocks and update quota
++ * @handle: handle for this transaction
++ * @inode: inode
++ * @bh: optional buffer of the block to be freed
++ * @block: starting physical block to be freed
++ * @count: number of blocks to be freed
++ * @flags: flags used by ext4_free_blocks
++ */
++void ext4_free_blocks(handle_t *handle, struct inode *inode,
++ struct buffer_head *bh, ext4_fsblk_t block,
++ unsigned long count, int flags)
++{
++ struct super_block *sb = inode->i_sb;
++ unsigned int overflow;
++ struct ext4_sb_info *sbi;
++
++ sbi = EXT4_SB(sb);
++
++ if (bh) {
++ if (block)
++ BUG_ON(block != bh->b_blocknr);
++ else
++ block = bh->b_blocknr;
++ }
++
++ if (sbi->s_mount_state & EXT4_FC_REPLAY) {
++ ext4_free_blocks_simple(inode, block, EXT4_NUM_B2C(sbi, count));
++ return;
++ }
++
++ might_sleep();
++
++ if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
++ !ext4_inode_block_valid(inode, block, count)) {
++ ext4_error(sb, "Freeing blocks not in datazone - "
++ "block = %llu, count = %lu", block, count);
++ return;
++ }
++ flags |= EXT4_FREE_BLOCKS_VALIDATED;
++
++ ext4_debug("freeing block %llu\n", block);
++ trace_ext4_free_blocks(inode, block, count, flags);
++
++ if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
++ BUG_ON(count > 1);
++
++ ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
++ inode, bh, block);
++ }
++
++ /*
++ * If the extent to be freed does not begin on a cluster
++ * boundary, we need to deal with partial clusters at the
++ * beginning and end of the extent. Normally we will free
++ * blocks at the beginning or the end unless we are explicitly
++ * requested to avoid doing so.
++ */
++ overflow = EXT4_PBLK_COFF(sbi, block);
++ if (overflow) {
++ if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
++ overflow = sbi->s_cluster_ratio - overflow;
++ block += overflow;
++ if (count > overflow)
++ count -= overflow;
++ else
++ return;
++ } else {
++ block -= overflow;
++ count += overflow;
++ }
++ /* The range changed so it's no longer validated */
++ flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
++ }
++ overflow = EXT4_LBLK_COFF(sbi, count);
++ if (overflow) {
++ if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
++ if (count > overflow)
++ count -= overflow;
++ else
++ return;
++ } else
++ count += sbi->s_cluster_ratio - overflow;
++ /* The range changed so it's no longer validated */
++ flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
++ }
++
++ if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
++ int i;
++ int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
++
++ for (i = 0; i < count; i++) {
++ cond_resched();
++ if (is_metadata)
++ bh = sb_find_get_block(inode->i_sb, block + i);
++ ext4_forget(handle, is_metadata, inode, bh, block + i);
++ }
++ }
++
++ ext4_mb_clear_bb(handle, inode, block, count, flags);
++ return;
++}
++
+ /**
+ * ext4_group_add_blocks() -- Add given blocks to an existing group
+ * @handle: handle to this transaction
+@@ -6166,11 +6312,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
+ goto error_return;
+ }
+
+- if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
+- in_range(ext4_inode_bitmap(sb, desc), block, count) ||
+- in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
+- in_range(block + count - 1, ext4_inode_table(sb, desc),
+- sbi->s_itb_per_group)) {
++ if (!ext4_sb_block_valid(sb, NULL, block, count)) {
+ ext4_error(sb, "Adding blocks in system zones - "
+ "Block = %llu, count = %lu",
+ block, count);
+@@ -6348,6 +6490,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @minblocks: minimum extent block count
++ * @set_trimmed: set the trimmed flag if at least one block is trimmed
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+@@ -6357,7 +6500,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ static ext4_grpblk_t
+ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+ ext4_grpblk_t start, ext4_grpblk_t max,
+- ext4_grpblk_t minblocks)
++ ext4_grpblk_t minblocks, bool set_trimmed)
+ {
+ struct ext4_buddy e4b;
+ int ret;
+@@ -6376,7 +6519,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+ if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+ minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
+ ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+- if (ret >= 0)
++ if (ret >= 0 && set_trimmed)
+ EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+ } else {
+ ret = 0;
+@@ -6405,6 +6548,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+ */
+ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ {
++ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ struct ext4_group_info *grp;
+ ext4_group_t group, first_group, last_group;
+ ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
+@@ -6412,6 +6556,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ ext4_fsblk_t first_data_blk =
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+ ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
++ bool whole_group, eof = false;
+ int ret = 0;
+
+ start = range->start >> sb->s_blocksize_bits;
+@@ -6423,8 +6568,17 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ start >= max_blks ||
+ range->len < sb->s_blocksize)
+ return -EINVAL;
+- if (end >= max_blks)
++ /* No point to try to trim less than discard granularity */
++ if (range->minlen < q->limits.discard_granularity) {
++ minlen = EXT4_NUM_B2C(EXT4_SB(sb),
++ q->limits.discard_granularity >> sb->s_blocksize_bits);
++ if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
++ goto out;
++ }
++ if (end >= max_blks - 1) {
+ end = max_blks - 1;
++ eof = true;
++ }
+ if (end <= first_data_blk)
+ goto out;
+ if (start < first_data_blk)
+@@ -6438,9 +6592,12 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+
+ /* end now represents the last cluster to discard in this group */
+ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
++ whole_group = true;
+
+ for (group = first_group; group <= last_group; group++) {
+ grp = ext4_get_group_info(sb, group);
++ if (!grp)
++ continue;
+ /* We only do this if the grp has never been initialized */
+ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+ ret = ext4_mb_init_group(sb, group, GFP_NOFS);
+@@ -6454,12 +6611,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ * change it for the last group, note that last_cluster is
+ * already computed earlier by ext4_get_group_no_and_offset()
+ */
+- if (group == last_group)
++ if (group == last_group) {
+ end = last_cluster;
+-
++ whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1;
++ }
+ if (grp->bb_free >= minlen) {
+ cnt = ext4_trim_all_free(sb, group, first_cluster,
+- end, minlen);
++ end, minlen, whole_group);
+ if (cnt < 0) {
+ ret = cnt;
+ break;
+diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
+index 7e0b4f81c6c06..b0ea646454ac8 100644
+--- a/fs/ext4/migrate.c
++++ b/fs/ext4/migrate.c
+@@ -417,7 +417,7 @@ int ext4_ext_migrate(struct inode *inode)
+ struct inode *tmp_inode = NULL;
+ struct migrate_struct lb;
+ unsigned long max_entries;
+- __u32 goal;
++ __u32 goal, tmp_csum_seed;
+ uid_t owner[2];
+
+ /*
+@@ -425,7 +425,8 @@ int ext4_ext_migrate(struct inode *inode)
+ * already is extent-based, error out.
+ */
+ if (!ext4_has_feature_extents(inode->i_sb) ||
+- (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
++ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
++ ext4_has_inline_data(inode))
+ return -EINVAL;
+
+ if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
+@@ -437,12 +438,12 @@ int ext4_ext_migrate(struct inode *inode)
+ percpu_down_write(&sbi->s_writepages_rwsem);
+
+ /*
+- * Worst case we can touch the allocation bitmaps, a bgd
+- * block, and a block to link in the orphan list. We do need
+- * need to worry about credits for modifying the quota inode.
++ * Worst case we can touch the allocation bitmaps and a block
++ * group descriptor block. We do need need to worry about
++ * credits for modifying the quota inode.
+ */
+ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
+- 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
++ 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
+
+ if (IS_ERR(handle)) {
+ retval = PTR_ERR(handle);
+@@ -459,6 +460,14 @@ int ext4_ext_migrate(struct inode *inode)
+ ext4_journal_stop(handle);
+ goto out_unlock;
+ }
++ /*
++ * Use the correct seed for checksum (i.e. the seed from 'inode'). This
++ * is so that the metadata blocks will have the correct checksum after
++ * the migration.
++ */
++ ei = EXT4_I(inode);
++ tmp_csum_seed = EXT4_I(tmp_inode)->i_csum_seed;
++ EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
+ i_size_write(tmp_inode, i_size_read(inode));
+ /*
+ * Set the i_nlink to zero so it will be deleted later
+@@ -467,7 +476,6 @@ int ext4_ext_migrate(struct inode *inode)
+ clear_nlink(tmp_inode);
+
+ ext4_ext_tree_init(handle, tmp_inode);
+- ext4_orphan_add(handle, tmp_inode);
+ ext4_journal_stop(handle);
+
+ /*
+@@ -492,17 +500,10 @@ int ext4_ext_migrate(struct inode *inode)
+
+ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
+ if (IS_ERR(handle)) {
+- /*
+- * It is impossible to update on-disk structures without
+- * a handle, so just rollback in-core changes and live other
+- * work to orphan_list_cleanup()
+- */
+- ext4_orphan_del(NULL, tmp_inode);
+ retval = PTR_ERR(handle);
+ goto out_tmp_inode;
+ }
+
+- ei = EXT4_I(inode);
+ i_data = ei->i_data;
+ memset(&lb, 0, sizeof(lb));
+
+@@ -576,6 +577,7 @@ err_out:
+ * the inode is not visible to user space.
+ */
+ tmp_inode->i_blocks = 0;
++ EXT4_I(tmp_inode)->i_csum_seed = tmp_csum_seed;
+
+ /* Reset the extent details */
+ ext4_ext_tree_init(handle, tmp_inode);
+diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
+index cebea4270817e..3e8bce19ad16d 100644
+--- a/fs/ext4/mmp.c
++++ b/fs/ext4/mmp.c
+@@ -39,28 +39,36 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
+ * Write the MMP block using REQ_SYNC to try to get the block on-disk
+ * faster.
+ */
+-static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
++static int write_mmp_block_thawed(struct super_block *sb,
++ struct buffer_head *bh)
+ {
+ struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
+
+- /*
+- * We protect against freezing so that we don't create dirty buffers
+- * on frozen filesystem.
+- */
+- sb_start_write(sb);
+ ext4_mmp_csum_set(sb, mmp);
+ lock_buffer(bh);
+ bh->b_end_io = end_buffer_write_sync;
+ get_bh(bh);
+ submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh);
+ wait_on_buffer(bh);
+- sb_end_write(sb);
+ if (unlikely(!buffer_uptodate(bh)))
+ return -EIO;
+-
+ return 0;
+ }
+
++static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
++{
++ int err;
++
++ /*
++ * We protect against freezing so that we don't create dirty buffers
++ * on frozen filesystem.
++ */
++ sb_start_write(sb);
++ err = write_mmp_block_thawed(sb, bh);
++ sb_end_write(sb);
++ return err;
++}
++
+ /*
+ * Read the MMP block. It _must_ be read from disk and hence we clear the
+ * uptodate flag on the buffer.
+@@ -290,6 +298,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
+ if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
+ mmp_block >= ext4_blocks_count(es)) {
+ ext4_warning(sb, "Invalid MMP block in superblock");
++ retval = -EINVAL;
+ goto failed;
+ }
+
+@@ -315,6 +324,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
+
+ if (seq == EXT4_MMP_SEQ_FSCK) {
+ dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
++ retval = -EBUSY;
+ goto failed;
+ }
+
+@@ -328,6 +338,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
+
+ if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
+ ext4_warning(sb, "MMP startup interrupted, failing mount\n");
++ retval = -ETIMEDOUT;
+ goto failed;
+ }
+
+@@ -338,6 +349,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
+ if (seq != le32_to_cpu(mmp->mmp_seq)) {
+ dump_mmp_msg(sb, mmp,
+ "Device is already active on another node.");
++ retval = -EBUSY;
+ goto failed;
+ }
+
+@@ -348,7 +360,11 @@ skip:
+ seq = mmp_new_seq();
+ mmp->mmp_seq = cpu_to_le32(seq);
+
+- retval = write_mmp_block(sb, bh);
++ /*
++ * On mount / remount we are protected against fs freezing (by s_umount
++ * semaphore) and grabbing freeze protection upsets lockdep
++ */
++ retval = write_mmp_block_thawed(sb, bh);
+ if (retval)
+ goto failed;
+
+@@ -357,6 +373,7 @@ skip:
+ */
+ if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
+ ext4_warning(sb, "MMP startup interrupted, failing mount");
++ retval = -ETIMEDOUT;
+ goto failed;
+ }
+
+@@ -367,6 +384,7 @@ skip:
+ if (seq != le32_to_cpu(mmp->mmp_seq)) {
+ dump_mmp_msg(sb, mmp,
+ "Device is already active on another node.");
++ retval = -EBUSY;
+ goto failed;
+ }
+
+@@ -383,6 +401,7 @@ skip:
+ EXT4_SB(sb)->s_mmp_tsk = NULL;
+ ext4_warning(sb, "Unable to create kmmpd thread for %s.",
+ sb->s_id);
++ retval = -ENOMEM;
+ goto failed;
+ }
+
+@@ -390,5 +409,5 @@ skip:
+
+ failed:
+ brelse(bh);
+- return 1;
++ return retval;
+ }
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index da7698341d7d3..d44fe5b1a7255 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
+ struct inode *inode,
+ ext4_lblk_t *block)
+ {
++ struct ext4_map_blocks map;
+ struct buffer_head *bh;
+ int err;
+
+@@ -63,21 +64,41 @@ static struct buffer_head *ext4_append(handle_t *handle,
+ return ERR_PTR(-ENOSPC);
+
+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
++ map.m_lblk = *block;
++ map.m_len = 1;
++
++ /*
++ * We're appending new directory block. Make sure the block is not
++ * allocated yet, otherwise we will end up corrupting the
++ * directory.
++ */
++ err = ext4_map_blocks(NULL, inode, &map, 0);
++ if (err < 0)
++ return ERR_PTR(err);
++ if (err) {
++ EXT4_ERROR_INODE(inode, "Logical block already allocated");
++ return ERR_PTR(-EFSCORRUPTED);
++ }
+
+ bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
+ if (IS_ERR(bh))
+ return bh;
+ inode->i_size += inode->i_sb->s_blocksize;
+ EXT4_I(inode)->i_disksize = inode->i_size;
++ err = ext4_mark_inode_dirty(handle, inode);
++ if (err)
++ goto out;
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+ EXT4_JTR_NONE);
+- if (err) {
+- brelse(bh);
+- ext4_std_error(inode->i_sb, err);
+- return ERR_PTR(err);
+- }
++ if (err)
++ goto out;
+ return bh;
++
++out:
++ brelse(bh);
++ ext4_std_error(inode->i_sb, err);
++ return ERR_PTR(err);
+ }
+
+ static int ext4_dx_csum_verify(struct inode *inode,
+@@ -110,6 +131,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
+ struct ext4_dir_entry *dirent;
+ int is_dx_block = 0;
+
++ if (block >= inode->i_size >> inode->i_blkbits) {
++ ext4_error_inode(inode, func, line, block,
++ "Attempting to read directory block (%u) that is past i_size (%llu)",
++ block, inode->i_size);
++ return ERR_PTR(-EFSCORRUPTED);
++ }
++
+ if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
+ bh = ERR_PTR(-EIO);
+ else
+@@ -277,9 +305,9 @@ static struct dx_frame *dx_probe(struct ext4_filename *fname,
+ struct dx_hash_info *hinfo,
+ struct dx_frame *frame);
+ static void dx_release(struct dx_frame *frames);
+-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
+- unsigned blocksize, struct dx_hash_info *hinfo,
+- struct dx_map_entry map[]);
++static int dx_make_map(struct inode *dir, struct buffer_head *bh,
++ struct dx_hash_info *hinfo,
++ struct dx_map_entry *map_tail);
+ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+ static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
+ char *to, struct dx_map_entry *offsets,
+@@ -646,7 +674,7 @@ static struct stats dx_show_leaf(struct inode *dir,
+ len = de->name_len;
+ if (!IS_ENCRYPTED(dir)) {
+ /* Directory is not encrypted */
+- ext4fs_dirhash(dir, de->name,
++ (void) ext4fs_dirhash(dir, de->name,
+ de->name_len, &h);
+ printk("%*.s:(U)%x.%u ", len,
+ name, h.hash,
+@@ -681,8 +709,9 @@ static struct stats dx_show_leaf(struct inode *dir,
+ if (IS_CASEFOLDED(dir))
+ h.hash = EXT4_DIRENT_HASH(de);
+ else
+- ext4fs_dirhash(dir, de->name,
+- de->name_len, &h);
++ (void) ext4fs_dirhash(dir,
++ de->name,
++ de->name_len, &h);
+ printk("%*.s:(E)%x.%u ", len, name,
+ h.hash, (unsigned) ((char *) de
+ - base));
+@@ -692,7 +721,8 @@ static struct stats dx_show_leaf(struct inode *dir,
+ #else
+ int len = de->name_len;
+ char *name = de->name;
+- ext4fs_dirhash(dir, de->name, de->name_len, &h);
++ (void) ext4fs_dirhash(dir, de->name,
++ de->name_len, &h);
+ printk("%*.s:%x.%u ", len, name, h.hash,
+ (unsigned) ((char *) de - base));
+ #endif
+@@ -777,12 +807,14 @@ static struct dx_frame *
+ dx_probe(struct ext4_filename *fname, struct inode *dir,
+ struct dx_hash_info *hinfo, struct dx_frame *frame_in)
+ {
+- unsigned count, indirect;
++ unsigned count, indirect, level, i;
+ struct dx_entry *at, *entries, *p, *q, *m;
+ struct dx_root *root;
+ struct dx_frame *frame = frame_in;
+ struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
+ u32 hash;
++ ext4_lblk_t block;
++ ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
+
+ memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
+ frame->bh = ext4_read_dirblock(dir, 0, INDEX);
+@@ -819,8 +851,14 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
+ hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+ /* hash is already computed for encrypted casefolded directory */
+ if (fname && fname_name(fname) &&
+- !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
+- ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
++ !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir))) {
++ int ret = ext4fs_dirhash(dir, fname_name(fname),
++ fname_len(fname), hinfo);
++ if (ret < 0) {
++ ret_err = ERR_PTR(ret);
++ goto fail;
++ }
++ }
+ hash = hinfo->hash;
+
+ if (root->info.unused_flags & 1) {
+@@ -854,6 +892,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
+ }
+
+ dxtrace(printk("Look up %x", hash));
++ level = 0;
++ blocks[0] = 0;
+ while (1) {
+ count = dx_get_count(entries);
+ if (!count || count > dx_get_limit(entries)) {
+@@ -882,15 +922,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
+ dx_get_block(at)));
+ frame->entries = entries;
+ frame->at = at;
+- if (!indirect--)
++
++ block = dx_get_block(at);
++ for (i = 0; i <= level; i++) {
++ if (blocks[i] == block) {
++ ext4_warning_inode(dir,
++ "dx entry: tree cycle block %u points back to block %u",
++ blocks[level], block);
++ goto fail;
++ }
++ }
++ if (++level > indirect)
+ return frame;
++ blocks[level] = block;
+ frame++;
+- frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
++ frame->bh = ext4_read_dirblock(dir, block, INDEX);
+ if (IS_ERR(frame->bh)) {
+ ret_err = (struct dx_frame *) frame->bh;
+ frame->bh = NULL;
+ goto fail;
+ }
++
+ entries = ((struct dx_node *) frame->bh->b_data)->entries;
+
+ if (dx_get_limit(entries) != dx_node_limit(dir)) {
+@@ -1067,7 +1119,12 @@ static int htree_dirblock_to_tree(struct file *dir_file,
+ hinfo->minor_hash = 0;
+ }
+ } else {
+- ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
++ err = ext4fs_dirhash(dir, de->name,
++ de->name_len, hinfo);
++ if (err < 0) {
++ count = err;
++ goto errout;
++ }
+ }
+ if ((hinfo->hash < start_hash) ||
+ ((hinfo->hash == start_hash) &&
+@@ -1249,20 +1306,32 @@ static inline int search_dirblock(struct buffer_head *bh,
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
+-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
+- unsigned blocksize, struct dx_hash_info *hinfo,
++static int dx_make_map(struct inode *dir, struct buffer_head *bh,
++ struct dx_hash_info *hinfo,
+ struct dx_map_entry *map_tail)
+ {
+ int count = 0;
+- char *base = (char *) de;
++ struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
++ unsigned int buflen = bh->b_size;
++ char *base = bh->b_data;
+ struct dx_hash_info h = *hinfo;
+
+- while ((char *) de < base + blocksize) {
++ if (ext4_has_metadata_csum(dir->i_sb))
++ buflen -= sizeof(struct ext4_dir_entry_tail);
++
++ while ((char *) de < base + buflen) {
++ if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
++ ((char *)de) - base))
++ return -EFSCORRUPTED;
+ if (de->name_len && de->inode) {
+ if (ext4_hash_in_dirent(dir))
+ h.hash = EXT4_DIRENT_HASH(de);
+- else
+- ext4fs_dirhash(dir, de->name, de->name_len, &h);
++ else {
++ int err = ext4fs_dirhash(dir, de->name,
++ de->name_len, &h);
++ if (err < 0)
++ return err;
++ }
+ map_tail--;
+ map_tail->hash = h.hash;
+ map_tail->offs = ((char *) de - base)>>2;
+@@ -1270,8 +1339,7 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
+ count++;
+ cond_resched();
+ }
+- /* XXX: do we need to check rec_len == 0 case? -Chris */
+- de = ext4_next_entry(de, blocksize);
++ de = ext4_next_entry(de, dir->i_sb->s_blocksize);
+ }
+ return count;
+ }
+@@ -1401,10 +1469,9 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+ hinfo->hash_version = DX_HASH_SIPHASH;
+ hinfo->seed = NULL;
+ if (cf_name->name)
+- ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
++ return ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
+ else
+- ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
+- return 0;
++ return ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
+ }
+ #endif
+
+@@ -1466,10 +1533,10 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
+
+ de = (struct ext4_dir_entry_2 *)search_buf;
+ dlimit = search_buf + buf_size;
+- while ((char *) de < dlimit) {
++ while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
+ /* this code is executed quadratically often */
+ /* do minimal checking `by hand' */
+- if ((char *) de + de->name_len <= dlimit &&
++ if (de->name + de->name_len <= dlimit &&
+ ext4_match(dir, fname, de)) {
+ /* found a match - just to be sure, do
+ * a full check */
+@@ -1544,11 +1611,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
+ int has_inline_data = 1;
+ ret = ext4_find_inline_entry(dir, fname, res_dir,
+ &has_inline_data);
+- if (has_inline_data) {
+- if (inlined)
+- *inlined = 1;
++ if (inlined)
++ *inlined = has_inline_data;
++ if (has_inline_data)
+ goto cleanup_and_exit;
+- }
+ }
+
+ if ((namelen <= 2) && (name[0] == '.') &&
+@@ -1906,7 +1972,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+ struct dx_hash_info *hinfo)
+ {
+ unsigned blocksize = dir->i_sb->s_blocksize;
+- unsigned count, continued;
++ unsigned continued;
++ int count;
+ struct buffer_head *bh2;
+ ext4_lblk_t newblock;
+ u32 hash2;
+@@ -1943,8 +2010,11 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+
+ /* create map in the end of data2 block */
+ map = (struct dx_map_entry *) (data2 + blocksize);
+- count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1,
+- blocksize, hinfo, map);
++ count = dx_make_map(dir, *bh, hinfo, map);
++ if (count < 0) {
++ err = count;
++ goto journal_error;
++ }
+ map -= count;
+ dx_sort_map(map, count);
+ /* Ensure that neither split block is over half full */
+@@ -2204,8 +2274,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
+ memset(de, 0, len); /* wipe old data */
+ de = (struct ext4_dir_entry_2 *) data2;
+ top = data2 + len;
+- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
++ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
++ if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len,
++ (data2 + (blocksize - csum_size) -
++ (char *) de))) {
++ brelse(bh2);
++ brelse(bh);
++ return -EFSCORRUPTED;
++ }
+ de = de2;
++ }
+ de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
+ (char *) de, blocksize);
+
+@@ -2236,10 +2314,15 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
+ fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+
+ /* casefolded encrypted hashes are computed on fname setup */
+- if (!ext4_hash_in_dirent(dir))
+- ext4fs_dirhash(dir, fname_name(fname),
+- fname_len(fname), &fname->hinfo);
+-
++ if (!ext4_hash_in_dirent(dir)) {
++ int err = ext4fs_dirhash(dir, fname_name(fname),
++ fname_len(fname), &fname->hinfo);
++ if (err < 0) {
++ brelse(bh2);
++ brelse(bh);
++ return err;
++ }
++ }
+ memset(frames, 0, sizeof(frames));
+ frame = frames;
+ frame->entries = entries;
+@@ -2716,6 +2799,7 @@ static int ext4_add_nondir(handle_t *handle,
+ return err;
+ }
+ drop_nlink(inode);
++ ext4_mark_inode_dirty(handle, inode);
+ ext4_orphan_add(handle, inode);
+ unlock_new_inode(inode);
+ return err;
+@@ -2997,14 +3081,14 @@ bool ext4_empty_dir(struct inode *inode)
+ if (inode->i_size < ext4_dir_rec_len(1, NULL) +
+ ext4_dir_rec_len(2, NULL)) {
+ EXT4_ERROR_INODE(inode, "invalid size");
+- return true;
++ return false;
+ }
+ /* The first directory block must not be a hole,
+ * so treat it as DIRENT_HTREE
+ */
+ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
+ if (IS_ERR(bh))
+- return true;
++ return false;
+
+ de = (struct ext4_dir_entry_2 *) bh->b_data;
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
+@@ -3012,7 +3096,7 @@ bool ext4_empty_dir(struct inode *inode)
+ le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
+ ext4_warning_inode(inode, "directory missing '.'");
+ brelse(bh);
+- return true;
++ return false;
+ }
+ offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
+ de = ext4_next_entry(de, sb->s_blocksize);
+@@ -3021,7 +3105,7 @@ bool ext4_empty_dir(struct inode *inode)
+ le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
+ ext4_warning_inode(inode, "directory missing '..'");
+ brelse(bh);
+- return true;
++ return false;
+ }
+ offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
+ while (offset < inode->i_size) {
+@@ -3035,16 +3119,13 @@ bool ext4_empty_dir(struct inode *inode)
+ continue;
+ }
+ if (IS_ERR(bh))
+- return true;
++ return false;
+ }
+ de = (struct ext4_dir_entry_2 *) (bh->b_data +
+ (offset & (sb->s_blocksize - 1)));
+ if (ext4_check_dir_entry(inode, NULL, de, bh,
+- bh->b_data, bh->b_size, offset)) {
+- offset = (offset | (sb->s_blocksize - 1)) + 1;
+- continue;
+- }
+- if (le32_to_cpu(de->inode)) {
++ bh->b_data, bh->b_size, offset) ||
++ le32_to_cpu(de->inode)) {
+ brelse(bh);
+ return false;
+ }
+@@ -3144,14 +3225,20 @@ end_rmdir:
+ return retval;
+ }
+
+-int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
+- struct inode *inode)
++int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
++ struct inode *inode,
++ struct dentry *dentry /* NULL during fast_commit recovery */)
+ {
+ int retval = -ENOENT;
+ struct buffer_head *bh;
+ struct ext4_dir_entry_2 *de;
++ handle_t *handle;
+ int skip_remove_dentry = 0;
+
++ /*
++ * Keep this outside the transaction; it may have to set up the
++ * directory's encryption key, which isn't GFP_NOFS-safe.
++ */
+ bh = ext4_find_entry(dir, d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+@@ -3168,7 +3255,14 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name
+ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ skip_remove_dentry = 1;
+ else
+- goto out;
++ goto out_bh;
++ }
++
++ handle = ext4_journal_start(dir, EXT4_HT_DIR,
++ EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
++ if (IS_ERR(handle)) {
++ retval = PTR_ERR(handle);
++ goto out_bh;
+ }
+
+ if (IS_DIRSYNC(dir))
+@@ -3177,12 +3271,12 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name
+ if (!skip_remove_dentry) {
+ retval = ext4_delete_entry(handle, dir, de, bh);
+ if (retval)
+- goto out;
++ goto out_handle;
+ dir->i_ctime = dir->i_mtime = current_time(dir);
+ ext4_update_dx_flag(dir);
+ retval = ext4_mark_inode_dirty(handle, dir);
+ if (retval)
+- goto out;
++ goto out_handle;
+ } else {
+ retval = 0;
+ }
+@@ -3195,15 +3289,17 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name
+ ext4_orphan_add(handle, inode);
+ inode->i_ctime = current_time(inode);
+ retval = ext4_mark_inode_dirty(handle, inode);
+-
+-out:
++ if (dentry && !retval)
++ ext4_fc_track_unlink(handle, dentry);
++out_handle:
++ ext4_journal_stop(handle);
++out_bh:
+ brelse(bh);
+ return retval;
+ }
+
+ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
+ {
+- handle_t *handle;
+ int retval;
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+@@ -3221,16 +3317,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
+ if (retval)
+ goto out_trace;
+
+- handle = ext4_journal_start(dir, EXT4_HT_DIR,
+- EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+- if (IS_ERR(handle)) {
+- retval = PTR_ERR(handle);
+- goto out_trace;
+- }
+-
+- retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
+- if (!retval)
+- ext4_fc_track_unlink(handle, dentry);
++ retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry);
+ #ifdef CONFIG_UNICODE
+ /* VFS negative dentries are incompatible with Encoding and
+ * Case-insensitiveness. Eventually we'll want avoid
+@@ -3241,8 +3328,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
+ if (IS_CASEFOLDED(dir))
+ d_invalidate(dentry);
+ #endif
+- if (handle)
+- ext4_journal_stop(handle);
+
+ out_trace:
+ trace_ext4_unlink_exit(dentry, retval);
+@@ -3371,6 +3456,7 @@ err_drop_inode:
+ if (handle)
+ ext4_journal_stop(handle);
+ clear_nlink(inode);
++ ext4_mark_inode_dirty(handle, inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ out_free_encrypted_link:
+@@ -3455,6 +3541,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
+ struct buffer_head *bh;
+
+ if (!ext4_has_inline_data(inode)) {
++ struct ext4_dir_entry_2 *de;
++ unsigned int offset;
++
+ /* The first directory block must not be a hole, so
+ * treat it as DIRENT_HTREE
+ */
+@@ -3463,9 +3552,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
+ *retval = PTR_ERR(bh);
+ return NULL;
+ }
+- *parent_de = ext4_next_entry(
+- (struct ext4_dir_entry_2 *)bh->b_data,
+- inode->i_sb->s_blocksize);
++
++ de = (struct ext4_dir_entry_2 *) bh->b_data;
++ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
++ bh->b_size, 0) ||
++ le32_to_cpu(de->inode) != inode->i_ino ||
++ strcmp(".", de->name)) {
++ EXT4_ERROR_INODE(inode, "directory missing '.'");
++ brelse(bh);
++ *retval = -EFSCORRUPTED;
++ return NULL;
++ }
++ offset = ext4_rec_len_from_disk(de->rec_len,
++ inode->i_sb->s_blocksize);
++ de = ext4_next_entry(de, inode->i_sb->s_blocksize);
++ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
++ bh->b_size, offset) ||
++ le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
++ EXT4_ERROR_INODE(inode, "directory missing '..'");
++ brelse(bh);
++ *retval = -EFSCORRUPTED;
++ return NULL;
++ }
++ *parent_de = de;
++
+ return bh;
+ }
+
+@@ -3572,7 +3682,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
+ * so the old->de may no longer valid and need to find it again
+ * before reset old inode info.
+ */
+- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
++ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
++ &old.inlined);
+ if (IS_ERR(old.bh))
+ retval = PTR_ERR(old.bh);
+ if (!old.bh)
+@@ -3722,6 +3833,9 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ return -EXDEV;
+
+ retval = dquot_initialize(old.dir);
++ if (retval)
++ return retval;
++ retval = dquot_initialize(old.inode);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(new.dir);
+@@ -3736,9 +3850,11 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ return retval;
+ }
+
+- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
++ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
++ &old.inlined);
+ if (IS_ERR(old.bh))
+ return PTR_ERR(old.bh);
++
+ /*
+ * Check for inode number is _not_ due to possible IO errors.
+ * We might rmdir the source, keep it as pwd of some process
+@@ -3889,7 +4005,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ * dirents in directories.
+ */
+ ext4_fc_mark_ineligible(old.inode->i_sb,
+- EXT4_FC_REASON_RENAME_DIR);
++ EXT4_FC_REASON_RENAME_DIR, handle);
+ } else {
+ if (new.inode)
+ ext4_fc_track_unlink(handle, new.dentry);
+@@ -3914,6 +4030,7 @@ end_rename:
+ ext4_resetent(handle, &old,
+ old.inode->i_ino, old_file_type);
+ drop_nlink(whiteout);
++ ext4_mark_inode_dirty(handle, whiteout);
+ ext4_orphan_add(handle, whiteout);
+ }
+ unlock_new_inode(whiteout);
+@@ -3926,6 +4043,7 @@ release_bh:
+ brelse(old.dir_bh);
+ brelse(old.bh);
+ brelse(new.bh);
++
+ return retval;
+ }
+
+@@ -4049,7 +4167,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+ if (unlikely(retval))
+ goto end_rename;
+ ext4_fc_mark_ineligible(new.inode->i_sb,
+- EXT4_FC_REASON_CROSS_RENAME);
++ EXT4_FC_REASON_CROSS_RENAME, handle);
+ if (old.dir_bh) {
+ retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
+ if (retval)
+diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
+index 53adc8f570a3f..c26c404ac58bf 100644
+--- a/fs/ext4/orphan.c
++++ b/fs/ext4/orphan.c
+@@ -181,8 +181,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
+ } else
+ brelse(iloc.bh);
+
+- jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
+- jbd_debug(4, "orphan inode %lu will point to %d\n",
++ ext4_debug("superblock will point to %lu\n", inode->i_ino);
++ ext4_debug("orphan inode %lu will point to %d\n",
+ inode->i_ino, NEXT_ORPHAN(inode));
+ out:
+ ext4_std_error(sb, err);
+@@ -251,7 +251,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
+ }
+
+ mutex_lock(&sbi->s_orphan_lock);
+- jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
++ ext4_debug("remove inode %lu from orphan list\n", inode->i_ino);
+
+ prev = ei->i_orphan.prev;
+ list_del_init(&ei->i_orphan);
+@@ -267,7 +267,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
+
+ ino_next = NEXT_ORPHAN(inode);
+ if (prev == &sbi->s_orphan) {
+- jbd_debug(4, "superblock will point to %u\n", ino_next);
++ ext4_debug("superblock will point to %u\n", ino_next);
+ BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode->i_sb,
+ sbi->s_sbh, EXT4_JTR_NONE);
+@@ -286,7 +286,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
+ struct inode *i_prev =
+ &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
+
+- jbd_debug(4, "orphan inode %lu will point to %u\n",
++ ext4_debug("orphan inode %lu will point to %u\n",
+ i_prev->i_ino, ino_next);
+ err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
+ if (err) {
+@@ -332,8 +332,8 @@ static void ext4_process_orphan(struct inode *inode,
+ ext4_msg(sb, KERN_DEBUG,
+ "%s: truncating inode %lu to %lld bytes",
+ __func__, inode->i_ino, inode->i_size);
+- jbd_debug(2, "truncating inode %lu to %lld bytes\n",
+- inode->i_ino, inode->i_size);
++ ext4_debug("truncating inode %lu to %lld bytes\n",
++ inode->i_ino, inode->i_size);
+ inode_lock(inode);
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ ret = ext4_truncate(inode);
+@@ -353,8 +353,8 @@ static void ext4_process_orphan(struct inode *inode,
+ ext4_msg(sb, KERN_DEBUG,
+ "%s: deleting unreferenced inode %lu",
+ __func__, inode->i_ino);
+- jbd_debug(2, "deleting unreferenced inode %lu\n",
+- inode->i_ino);
++ ext4_debug("deleting unreferenced inode %lu\n",
++ inode->i_ino);
+ (*nr_orphans)++;
+ }
+ iput(inode); /* The delete magic happens here! */
+@@ -391,7 +391,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
+ int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+
+ if (!es->s_last_orphan && !oi->of_blocks) {
+- jbd_debug(4, "no orphan inodes to clean up\n");
++ ext4_debug("no orphan inodes to clean up\n");
+ return;
+ }
+
+@@ -412,10 +412,10 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
+ /* don't clear list on RO mount w/ errors */
+ if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
+ ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
+- "clearing orphan list.\n");
++ "clearing orphan list.");
+ es->s_last_orphan = 0;
+ }
+- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
++ ext4_debug("Skipping orphan recovery on fs with errors.\n");
+ return;
+ }
+
+@@ -459,7 +459,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
+ * so, skip the rest.
+ */
+ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
++ ext4_debug("Skipping orphan recovery on fs with errors.\n");
+ es->s_last_orphan = 0;
+ break;
+ }
+diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
+index f038d578d8d8f..03e224401b239 100644
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -134,8 +134,10 @@ static void ext4_finish_bio(struct bio *bio)
+ continue;
+ }
+ clear_buffer_async_write(bh);
+- if (bio->bi_status)
++ if (bio->bi_status) {
++ set_buffer_write_io_error(bh);
+ buffer_io_error(bh);
++ }
+ } while ((bh = bh->b_this_page) != head);
+ spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
+ if (!under_io) {
+@@ -411,7 +413,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
+
+ static void io_submit_add_bh(struct ext4_io_submit *io,
+ struct inode *inode,
+- struct page *page,
++ struct page *pagecache_page,
++ struct page *bounce_page,
+ struct buffer_head *bh)
+ {
+ int ret;
+@@ -425,10 +428,11 @@ submit_and_retry:
+ io_submit_init_bio(io, bh);
+ io->io_bio->bi_write_hint = inode->i_write_hint;
+ }
+- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
++ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
++ bh->b_size, bh_offset(bh));
+ if (ret != bh->b_size)
+ goto submit_and_retry;
+- wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
++ wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
+ io->io_next_block++;
+ }
+
+@@ -546,8 +550,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
+ do {
+ if (!buffer_async_write(bh))
+ continue;
+- io_submit_add_bh(io, inode,
+- bounce_page ? bounce_page : page, bh);
++ io_submit_add_bh(io, inode, page, bounce_page, bh);
+ nr_submitted++;
+ clear_buffer_dirty(bh);
+ } while ((bh = bh->b_this_page) != head);
+diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
+index b63cb88ccdaed..589ed99856f33 100644
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -52,6 +52,16 @@ int ext4_resize_begin(struct super_block *sb)
+ if (!capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
++ /*
++ * If the reserved GDT blocks is non-zero, the resize_inode feature
++ * should always be set.
++ */
++ if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks &&
++ !ext4_has_feature_resize_inode(sb)) {
++ ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero");
++ return -EFSCORRUPTED;
++ }
++
+ /*
+ * If we are not using the primary superblock/GDT copy don't resize,
+ * because the user tools have no way of handling this. Probably a
+@@ -1435,8 +1445,6 @@ static void ext4_update_super(struct super_block *sb,
+ * active. */
+ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
+ reserved_blocks);
+- ext4_superblock_csum_set(sb);
+- unlock_buffer(sbi->s_sbh);
+
+ /* Update the free space counts */
+ percpu_counter_add(&sbi->s_freeclusters_counter,
+@@ -1462,7 +1470,10 @@ static void ext4_update_super(struct super_block *sb,
+ * Update the fs overhead information
+ */
+ ext4_calculate_overhead(sb);
++ es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
+
++ ext4_superblock_csum_set(sb);
++ unlock_buffer(sbi->s_sbh);
+ if (test_opt(sb, DEBUG))
+ printk(KERN_DEBUG "EXT4-fs: added group %u:"
+ "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
+@@ -1546,8 +1557,8 @@ exit_journal:
+ int meta_bg = ext4_has_feature_meta_bg(sb);
+ sector_t old_gdb = 0;
+
+- update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+- sizeof(struct ext4_super_block), 0);
++ update_backups(sb, ext4_group_first_block_no(sb, 0),
++ (char *)es, sizeof(struct ext4_super_block), 0);
+ for (; gdb_num <= gdb_num_end; gdb_num++) {
+ struct buffer_head *gdb_bh;
+
+@@ -1758,7 +1769,7 @@ errout:
+ if (test_opt(sb, DEBUG))
+ printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
+ "blocks\n", ext4_blocks_count(es));
+- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
++ update_backups(sb, ext4_group_first_block_no(sb, 0),
+ (char *)es, sizeof(struct ext4_super_block), 0);
+ }
+ return err;
+@@ -1966,6 +1977,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
+ }
+ brelse(bh);
+
++ /*
++ * For bigalloc, trim the requested size to the nearest cluster
++ * boundary to avoid creating an unusable filesystem. We do this
++ * silently, instead of returning an error, to avoid breaking
++ * callers that blindly resize the filesystem to the full size of
++ * the underlying block device.
++ */
++ if (ext4_has_feature_bigalloc(sb))
++ n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1);
++
+ retry:
+ o_blocks_count = ext4_blocks_count(es);
+
+@@ -2067,7 +2088,7 @@ retry:
+ goto out;
+ }
+
+- if (ext4_blocks_count(es) == n_blocks_count)
++ if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0)
+ goto out;
+
+ err = ext4_alloc_flex_bg_array(sb, n_group + 1);
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 88d5d274a8684..65716a17059d0 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -186,19 +186,12 @@ int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io)
+
+ int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait)
+ {
+- if (trylock_buffer(bh)) {
+- if (wait)
+- return ext4_read_bh(bh, op_flags, NULL);
++ lock_buffer(bh);
++ if (!wait) {
+ ext4_read_bh_nowait(bh, op_flags, NULL);
+ return 0;
+ }
+- if (wait) {
+- wait_on_buffer(bh);
+- if (buffer_uptodate(bh))
+- return 0;
+- return -EIO;
+- }
+- return 0;
++ return ext4_read_bh(bh, op_flags, NULL);
+ }
+
+ /*
+@@ -245,7 +238,8 @@ void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
+ struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
+
+ if (likely(bh)) {
+- ext4_read_bh_lock(bh, REQ_RAHEAD, false);
++ if (trylock_buffer(bh))
++ ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
+ brelse(bh);
+ }
+ }
+@@ -1023,6 +1017,8 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ int ret;
+
++ if (!grp || !gdp)
++ return;
+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+@@ -1101,6 +1097,12 @@ static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
+ struct block_device *bdev;
+ bdev = sbi->s_journal_bdev;
+ if (bdev) {
++ /*
++ * Invalidate the journal device's buffers. We don't want them
++ * floating about in memory - the physical journal device may
++ * hotswapped, and it breaks the `ro-after' testing code.
++ */
++ invalidate_bdev(bdev);
+ ext4_blkdev_put(bdev);
+ sbi->s_journal_bdev = NULL;
+ }
+@@ -1167,20 +1169,25 @@ static void ext4_put_super(struct super_block *sb)
+ int aborted = 0;
+ int i, err;
+
+- ext4_unregister_li_request(sb);
+- ext4_quota_off_umount(sb);
+-
+- flush_work(&sbi->s_error_work);
+- destroy_workqueue(sbi->rsv_conversion_wq);
+- ext4_release_orphan_info(sb);
+-
+ /*
+ * Unregister sysfs before destroying jbd2 journal.
+ * Since we could still access attr_journal_task attribute via sysfs
+ * path which could have sbi->s_journal->j_task as NULL
++ * Unregister sysfs before flush sbi->s_error_work.
++ * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
++ * read metadata verify failed then will queue error work.
++ * flush_stashed_error_work will call start_this_handle may trigger
++ * BUG_ON.
+ */
+ ext4_unregister_sysfs(sb);
+
++ ext4_unregister_li_request(sb);
++ ext4_quota_off_umount(sb);
++
++ flush_work(&sbi->s_error_work);
++ destroy_workqueue(sbi->rsv_conversion_wq);
++ ext4_release_orphan_info(sb);
++
+ if (sbi->s_journal) {
+ aborted = is_journal_aborted(sbi->s_journal);
+ err = jbd2_journal_destroy(sbi->s_journal);
+@@ -1238,13 +1245,7 @@ static void ext4_put_super(struct super_block *sb)
+ sync_blockdev(sb->s_bdev);
+ invalidate_bdev(sb->s_bdev);
+ if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
+- /*
+- * Invalidate the journal device's buffers. We don't want them
+- * floating about in memory - the physical journal device may
+- * hotswapped, and it breaks the `ro-after' testing code.
+- */
+ sync_blockdev(sbi->s_journal_bdev);
+- invalidate_bdev(sbi->s_journal_bdev);
+ ext4_blkdev_remove(sbi);
+ }
+
+@@ -1289,6 +1290,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
+ return NULL;
+
+ inode_set_iversion(&ei->vfs_inode, 1);
++ ei->i_flags = 0;
+ spin_lock_init(&ei->i_raw_lock);
+ INIT_LIST_HEAD(&ei->i_prealloc_list);
+ atomic_set(&ei->i_prealloc_active, 0);
+@@ -1929,6 +1931,7 @@ static const struct mount_opts {
+ MOPT_EXT4_ONLY | MOPT_CLEAR},
+ {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
+ {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
++ {Opt_commit, 0, MOPT_NO_EXT2},
+ {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
+ MOPT_EXT4_ONLY | MOPT_CLEAR},
+ {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
+@@ -2053,6 +2056,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb,
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
+
++ if (!ext4_has_feature_encrypt(sb)) {
++ ext4_msg(sb, KERN_WARNING,
++ "test_dummy_encryption requires encrypt feature");
++ return -1;
++ }
++
+ /*
+ * This mount option is just for testing, and it's not worthwhile to
+ * implement the extra complexity (e.g. RCU protection) that would be
+@@ -2080,11 +2089,13 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb,
+ return -1;
+ }
+ ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
++ return 1;
+ #else
+ ext4_msg(sb, KERN_WARNING,
+- "Test dummy encryption mount option ignored");
++ "test_dummy_encryption option not supported");
++ return -1;
++
+ #endif
+- return 1;
+ }
+
+ struct ext4_parsed_options {
+@@ -2827,11 +2838,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
+ crc = crc16(crc, (__u8 *)gdp, offset);
+ offset += sizeof(gdp->bg_checksum); /* skip checksum */
+ /* for checksum of struct ext4_group_desc do the rest...*/
+- if (ext4_has_feature_64bit(sb) &&
+- offset < le16_to_cpu(sbi->s_es->s_desc_size))
++ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
+ crc = crc16(crc, (__u8 *)gdp + offset,
+- le16_to_cpu(sbi->s_es->s_desc_size) -
+- offset);
++ sbi->s_desc_size - offset);
+
+ out:
+ return cpu_to_le16(crc);
+@@ -3263,9 +3272,9 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
+ struct super_block *sb = elr->lr_super;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t group = elr->lr_next_group;
+- unsigned long timeout = 0;
+ unsigned int prefetch_ios = 0;
+ int ret = 0;
++ u64 start_time;
+
+ if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
+ elr->lr_next_group = ext4_mb_prefetch(sb, group,
+@@ -3302,14 +3311,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
+ ret = 1;
+
+ if (!ret) {
+- timeout = jiffies;
++ start_time = ktime_get_real_ns();
+ ret = ext4_init_inode_table(sb, group,
+ elr->lr_timeout ? 0 : 1);
+ trace_ext4_lazy_itable_init(sb, group);
+ if (elr->lr_timeout == 0) {
+- timeout = (jiffies - timeout) *
+- EXT4_SB(elr->lr_super)->s_li_wait_mult;
+- elr->lr_timeout = timeout;
++ elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
++ EXT4_SB(elr->lr_super)->s_li_wait_mult);
+ }
+ elr->lr_next_sched = jiffies + elr->lr_timeout;
+ elr->lr_next_group = group + 1;
+@@ -3364,6 +3372,7 @@ static int ext4_lazyinit_thread(void *arg)
+ unsigned long next_wakeup, cur;
+
+ BUG_ON(NULL == eli);
++ set_freezable();
+
+ cont_thread:
+ while (true) {
+@@ -3579,9 +3588,9 @@ int ext4_register_li_request(struct super_block *sb,
+ goto out;
+ }
+
+- if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
+- (first_not_zeroed == ngroups || sb_rdonly(sb) ||
+- !test_opt(sb, INIT_INODE_TABLE)))
++ if (sb_rdonly(sb) ||
++ (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
++ (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
+ goto out;
+
+ elr = ext4_li_request_new(sb, first_not_zeroed);
+@@ -3698,9 +3707,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
+ ext4_fsblk_t first_block, last_block, b;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ int s, j, count = 0;
++ int has_super = ext4_bg_has_super(sb, grp);
+
+ if (!ext4_has_feature_bigalloc(sb))
+- return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
++ return (has_super + ext4_bg_num_gdb(sb, grp) +
++ (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
+ sbi->s_itb_per_group + 2);
+
+ first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
+@@ -4384,7 +4395,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ sbi->s_inodes_per_block;
+ sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
+ sbi->s_sbh = bh;
+- sbi->s_mount_state = le16_to_cpu(es->s_state);
++ sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
+ sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
+ sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+
+@@ -4614,14 +4625,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+
+ /* Initialize fast commit stuff */
+ atomic_set(&sbi->s_fc_subtid, 0);
+- atomic_set(&sbi->s_fc_ineligible_updates, 0);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
+ sbi->s_fc_bytes = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
++ sbi->s_fc_ineligible_tid = 0;
+ spin_lock_init(&sbi->s_fc_lock);
+ memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
+ sbi->s_fc_replay_state.fc_regions = NULL;
+@@ -4638,9 +4648,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ ext4_has_feature_orphan_present(sb) ||
+ ext4_has_feature_journal_needs_recovery(sb));
+
+- if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
+- if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
++ if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
++ err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
++ if (err)
+ goto failed_mount3a;
++ }
+
+ /*
+ * The first inode we look at is the journal inode. Don't try
+@@ -4654,30 +4666,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ ext4_has_feature_journal_needs_recovery(sb)) {
+ ext4_msg(sb, KERN_ERR, "required journal recovery "
+ "suppressed and not mounted read-only");
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ } else {
+ /* Nojournal mode, all journal mount options are illegal */
+- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+- ext4_msg(sb, KERN_ERR, "can't mount with "
+- "journal_checksum, fs mounted w/o journal");
+- goto failed_mount_wq;
+- }
+ if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit, fs mounted w/o journal");
+- goto failed_mount_wq;
++ goto failed_mount3a;
++ }
++
++ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
++ ext4_msg(sb, KERN_ERR, "can't mount with "
++ "journal_checksum, fs mounted w/o journal");
++ goto failed_mount3a;
+ }
+ if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "commit=%lu, fs mounted w/o journal",
+ sbi->s_commit_interval / HZ);
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ }
+ if (EXT4_MOUNT_DATA_FLAGS &
+ (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "data=, fs mounted w/o journal");
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ }
+ sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
+ clear_opt(sb, JOURNAL_CHECKSUM);
+@@ -4778,19 +4791,22 @@ no_journal:
+ goto failed_mount_wq;
+ }
+
+- if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
+- !ext4_has_feature_encrypt(sb)) {
+- ext4_set_feature_encrypt(sb);
+- ext4_commit_super(sb);
+- }
+-
+ /*
+ * Get the # of file system overhead blocks from the
+ * superblock if present.
+ */
+- if (es->s_overhead_clusters)
+- sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+- else {
++ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
++ /* ignore the precalculated value if it is ridiculous */
++ if (sbi->s_overhead > ext4_blocks_count(es))
++ sbi->s_overhead = 0;
++ /*
++ * If the bigalloc feature is not enabled recalculating the
++ * overhead doesn't take long, so we might as well just redo
++ * it to make sure we are using the correct value.
++ */
++ if (!ext4_has_feature_bigalloc(sb))
++ sbi->s_overhead = 0;
++ if (sbi->s_overhead == 0) {
+ err = ext4_calculate_overhead(sb);
+ if (err)
+ goto failed_mount_wq;
+@@ -4892,14 +4908,6 @@ no_journal:
+ err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
+ GFP_KERNEL);
+ }
+- /*
+- * Update the checksum after updating free space/inode
+- * counters. Otherwise the superblock can have an incorrect
+- * checksum in the buffer cache until it is written out and
+- * e2fsprogs programs trying to open a file system immediately
+- * after it is mounted can fail.
+- */
+- ext4_superblock_csum_set(sb);
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb), GFP_KERNEL);
+@@ -4957,11 +4965,19 @@ no_journal:
+ EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
+ ext4_orphan_cleanup(sb, es);
+ EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
++ /*
++ * Update the checksum after updating free space/inode counters and
++ * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
++ * checksum in the buffer cache until it is written out and
++ * e2fsprogs programs trying to open a file system immediately
++ * after it is mounted can fail.
++ */
++ ext4_superblock_csum_set(sb);
+ if (needs_recovery) {
+ ext4_msg(sb, KERN_INFO, "recovery complete");
+ err = ext4_mark_recovery_complete(sb, es);
+ if (err)
+- goto failed_mount9;
++ goto failed_mount10;
+ }
+ if (EXT4_SB(sb)->s_journal) {
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+@@ -5007,7 +5023,9 @@ cantfind_ext4:
+ ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
+ goto failed_mount;
+
+-failed_mount9:
++failed_mount10:
++ ext4_quota_off_umount(sb);
++failed_mount9: __maybe_unused
+ ext4_release_orphan_info(sb);
+ failed_mount8:
+ ext4_unregister_sysfs(sb);
+@@ -5084,6 +5102,7 @@ failed_mount:
+ brelse(bh);
+ ext4_blkdev_remove(sbi);
+ out_fail:
++ invalidate_bdev(sb->s_bdev);
+ sb->s_fs_info = NULL;
+ kfree(sbi->s_blockgroup_lock);
+ out_free_base:
+@@ -5141,9 +5160,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
+ return NULL;
+ }
+
+- jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
++ ext4_debug("Journal inode found at %p: %lld bytes\n",
+ journal_inode, journal_inode->i_size);
+- if (!S_ISREG(journal_inode->i_mode)) {
++ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
+ ext4_msg(sb, KERN_ERR, "invalid journal inode");
+ iput(journal_inode);
+ return NULL;
+@@ -5371,8 +5390,11 @@ static int ext4_load_journal(struct super_block *sb,
+ if (!really_read_only && journal_devnum &&
+ journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+ es->s_journal_dev = cpu_to_le32(journal_devnum);
+-
+- /* Make sure we flush the recovery flag to disk. */
++ ext4_commit_super(sb);
++ }
++ if (!really_read_only && journal_inum &&
++ journal_inum != le32_to_cpu(es->s_journal_inum)) {
++ es->s_journal_inum = cpu_to_le32(journal_inum);
+ ext4_commit_super(sb);
+ }
+
+@@ -5727,10 +5749,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned long old_sb_flags, vfs_flags;
+ struct ext4_mount_options old_opts;
+- int enable_quota = 0;
+ ext4_group_t g;
+ int err = 0;
+ #ifdef CONFIG_QUOTA
++ int enable_quota = 0;
+ int i, j;
+ char *to_free[EXT4_MAXQUOTAS];
+ #endif
+@@ -5922,35 +5944,26 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
+ if (err)
+ goto restore_opts;
+ }
+- sbi->s_mount_state = le16_to_cpu(es->s_state);
++ sbi->s_mount_state = (le16_to_cpu(es->s_state) &
++ ~EXT4_FC_REPLAY);
+
+ err = ext4_setup_super(sb, es, 0);
+ if (err)
+ goto restore_opts;
+
+ sb->s_flags &= ~SB_RDONLY;
+- if (ext4_has_feature_mmp(sb))
+- if (ext4_multi_mount_protect(sb,
+- le64_to_cpu(es->s_mmp_block))) {
+- err = -EROFS;
++ if (ext4_has_feature_mmp(sb)) {
++ err = ext4_multi_mount_protect(sb,
++ le64_to_cpu(es->s_mmp_block));
++ if (err)
+ goto restore_opts;
+- }
++ }
++#ifdef CONFIG_QUOTA
+ enable_quota = 1;
++#endif
+ }
+ }
+
+- /*
+- * Reinitialize lazy itable initialization thread based on
+- * current settings
+- */
+- if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
+- ext4_unregister_li_request(sb);
+- else {
+- ext4_group_t first_not_zeroed;
+- first_not_zeroed = ext4_has_uninit_itable(sb);
+- ext4_register_li_request(sb, first_not_zeroed);
+- }
+-
+ /*
+ * Handle creation of system zone data early because it can fail.
+ * Releasing of existing data is done when we are sure remount will
+@@ -5969,9 +5982,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
+ }
+
+ #ifdef CONFIG_QUOTA
+- /* Release old quota file names */
+- for (i = 0; i < EXT4_MAXQUOTAS; i++)
+- kfree(old_opts.s_qf_names[i]);
+ if (enable_quota) {
+ if (sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
+@@ -5981,10 +5991,25 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
+ goto restore_opts;
+ }
+ }
++ /* Release old quota file names */
++ for (i = 0; i < EXT4_MAXQUOTAS; i++)
++ kfree(old_opts.s_qf_names[i]);
+ #endif
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
+ ext4_release_system_zone(sb);
+
++ /*
++ * Reinitialize lazy itable initialization thread based on
++ * current settings
++ */
++ if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
++ ext4_unregister_li_request(sb);
++ else {
++ ext4_group_t first_not_zeroed;
++ first_not_zeroed = ext4_has_uninit_itable(sb);
++ ext4_register_li_request(sb, first_not_zeroed);
++ }
++
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
+ ext4_stop_mmpd(sbi);
+
+@@ -6001,6 +6026,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
+ return 0;
+
+ restore_opts:
++ /*
++ * If there was a failing r/w to ro transition, we may need to
++ * re-enable quota
++ */
++ if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
++ sb_any_quota_suspended(sb))
++ dquot_resume(sb, -1);
+ sb->s_flags = old_sb_flags;
+ sbi->s_mount_opt = old_opts.s_mount_opt;
+ sbi->s_mount_opt2 = old_opts.s_mount_opt2;
+@@ -6189,7 +6221,7 @@ static int ext4_write_info(struct super_block *sb, int type)
+ handle_t *handle;
+
+ /* Data block + inode block */
+- handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
++ handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ret = dquot_commit_info(sb, type);
+@@ -6267,10 +6299,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
+
+ lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
+ err = dquot_quota_on(sb, type, format_id, path);
+- if (err) {
+- lockdep_set_quota_inode(path->dentry->d_inode,
+- I_DATA_SEM_NORMAL);
+- } else {
++ if (!err) {
+ struct inode *inode = d_inode(path->dentry);
+ handle_t *handle;
+
+@@ -6290,10 +6319,29 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
+ ext4_journal_stop(handle);
+ unlock_inode:
+ inode_unlock(inode);
++ if (err)
++ dquot_quota_off(sb, type);
+ }
++ if (err)
++ lockdep_set_quota_inode(path->dentry->d_inode,
++ I_DATA_SEM_NORMAL);
+ return err;
+ }
+
++static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
++{
++ switch (type) {
++ case USRQUOTA:
++ return qf_inum == EXT4_USR_QUOTA_INO;
++ case GRPQUOTA:
++ return qf_inum == EXT4_GRP_QUOTA_INO;
++ case PRJQUOTA:
++ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
++ default:
++ BUG();
++ }
++}
++
+ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
+ unsigned int flags)
+ {
+@@ -6310,9 +6358,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
+ if (!qf_inums[type])
+ return -EPERM;
+
++ if (!ext4_check_quota_inum(type, qf_inums[type])) {
++ ext4_error(sb, "Bad quota inum: %lu, type: %d",
++ qf_inums[type], type);
++ return -EUCLEAN;
++ }
++
+ qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
+ if (IS_ERR(qf_inode)) {
+- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
++ ext4_error(sb, "Bad quota inode: %lu, type: %d",
++ qf_inums[type], type);
+ return PTR_ERR(qf_inode);
+ }
+
+@@ -6351,10 +6406,22 @@ int ext4_enable_quotas(struct super_block *sb)
+ if (err) {
+ ext4_warning(sb,
+ "Failed to enable quota tracking "
+- "(type=%d, err=%d). Please run "
+- "e2fsck to fix.", type, err);
+- for (type--; type >= 0; type--)
++ "(type=%d, err=%d, ino=%lu). "
++ "Please run e2fsck to fix.", type,
++ err, qf_inums[type]);
++ for (type--; type >= 0; type--) {
++ struct inode *inode;
++
++ inode = sb_dqopt(sb)->files[type];
++ if (inode)
++ inode = igrab(inode);
+ dquot_quota_off(sb, type);
++ if (inode) {
++ lockdep_set_quota_inode(inode,
++ I_DATA_SEM_NORMAL);
++ iput(inode);
++ }
++ }
+
+ return err;
+ }
+@@ -6458,7 +6525,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
+ struct buffer_head *bh;
+ handle_t *handle = journal_current_handle();
+
+- if (EXT4_SB(sb)->s_journal && !handle) {
++ if (!handle) {
+ ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
+ " cancelled because transaction is not started",
+ (unsigned long long)off, (unsigned long long)len);
+@@ -6641,6 +6708,7 @@ static int __init ext4_init_fs(void)
+ out:
+ unregister_as_ext2();
+ unregister_as_ext3();
++ ext4_fc_destroy_dentry_cache();
+ out05:
+ destroy_inodecache();
+ out1:
+@@ -6667,6 +6735,7 @@ static void __exit ext4_exit_fs(void)
+ unregister_as_ext2();
+ unregister_as_ext3();
+ unregister_filesystem(&ext4_fs_type);
++ ext4_fc_destroy_dentry_cache();
+ destroy_inodecache();
+ ext4_exit_mballoc();
+ ext4_exit_sysfs();
+diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
+index 2314f74465924..aa07b78ba9104 100644
+--- a/fs/ext4/sysfs.c
++++ b/fs/ext4/sysfs.c
+@@ -489,6 +489,11 @@ static void ext4_sb_release(struct kobject *kobj)
+ complete(&sbi->s_kobj_unregister);
+ }
+
++static void ext4_feat_release(struct kobject *kobj)
++{
++ kfree(kobj);
++}
++
+ static const struct sysfs_ops ext4_attr_ops = {
+ .show = ext4_attr_show,
+ .store = ext4_attr_store,
+@@ -503,7 +508,7 @@ static struct kobj_type ext4_sb_ktype = {
+ static struct kobj_type ext4_feat_ktype = {
+ .default_groups = ext4_feat_groups,
+ .sysfs_ops = &ext4_attr_ops,
+- .release = (void (*)(struct kobject *))kfree,
++ .release = ext4_feat_release,
+ };
+
+ void ext4_notify_error_sysfs(struct ext4_sb_info *sbi)
+diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
+index eacbd489e3bf1..5ece4d3c62109 100644
+--- a/fs/ext4/verity.c
++++ b/fs/ext4/verity.c
+@@ -76,7 +76,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+- void *fsdata;
++ void *fsdata = NULL;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+@@ -364,13 +364,14 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index,
+ unsigned long num_ra_pages)
+ {
+- DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
+ struct page *page;
+
+ index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
+
+ page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
+ if (!page || !PageUptodate(page)) {
++ DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
++
+ if (page)
+ put_page(page);
+ else if (num_ra_pages > 1)
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 1e0fc1ed845bf..f79705af3acad 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -123,7 +123,11 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
+ #ifdef CONFIG_LOCKDEP
+ void ext4_xattr_inode_set_class(struct inode *ea_inode)
+ {
++ struct ext4_inode_info *ei = EXT4_I(ea_inode);
++
+ lockdep_set_subclass(&ea_inode->i_rwsem, 1);
++ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
++ lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA);
+ }
+ #endif
+
+@@ -386,7 +390,18 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ struct inode *inode;
+ int err;
+
+- inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
++ /*
++ * We have to check for this corruption early as otherwise
++ * iget_locked() could wait indefinitely for the state of our
++ * parent inode.
++ */
++ if (parent->i_ino == ea_ino) {
++ ext4_error(parent->i_sb,
++ "Parent and EA inode have the same ino %lu", ea_ino);
++ return -EFSCORRUPTED;
++ }
++
++ inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_EA_INODE);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ext4_error(parent->i_sb,
+@@ -394,23 +409,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ err);
+ return err;
+ }
+-
+- if (is_bad_inode(inode)) {
+- ext4_error(parent->i_sb,
+- "error while reading EA inode %lu is_bad_inode",
+- ea_ino);
+- err = -EIO;
+- goto error;
+- }
+-
+- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+- ext4_error(parent->i_sb,
+- "EA inode %lu does not have EXT4_EA_INODE_FL flag",
+- ea_ino);
+- err = -EINVAL;
+- goto error;
+- }
+-
+ ext4_xattr_inode_set_class(inode);
+
+ /*
+@@ -431,9 +429,21 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+
+ *ea_inode = inode;
+ return 0;
+-error:
+- iput(inode);
+- return err;
++}
++
++/* Remove entry from mbcache when EA inode is getting evicted */
++void ext4_evict_ea_inode(struct inode *inode)
++{
++ struct mb_cache_entry *oe;
++
++ if (!EA_INODE_CACHE(inode))
++ return;
++ /* Wait for entry to get unused so that we can remove it */
++ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
++ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
++ mb_cache_entry_wait_unused(oe);
++ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
++ }
+ }
+
+ static int
+@@ -976,10 +986,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+ int ref_change)
+ {
+- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
+ struct ext4_iloc iloc;
+ s64 ref_count;
+- u32 hash;
+ int ret;
+
+ inode_lock(ea_inode);
+@@ -1002,14 +1010,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+
+ set_nlink(ea_inode, 1);
+ ext4_orphan_del(handle, ea_inode);
+-
+- if (ea_inode_cache) {
+- hash = ext4_xattr_inode_get_hash(ea_inode);
+- mb_cache_entry_create(ea_inode_cache,
+- GFP_NOFS, hash,
+- ea_inode->i_ino,
+- true /* reusable */);
+- }
+ }
+ } else {
+ WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+@@ -1022,12 +1022,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+
+ clear_nlink(ea_inode);
+ ext4_orphan_add(handle, ea_inode);
+-
+- if (ea_inode_cache) {
+- hash = ext4_xattr_inode_get_hash(ea_inode);
+- mb_cache_entry_delete(ea_inode_cache, hash,
+- ea_inode->i_ino);
+- }
+ }
+ }
+
+@@ -1237,6 +1231,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+ if (error)
+ goto out;
+
++retry_ref:
+ lock_buffer(bh);
+ hash = le32_to_cpu(BHDR(bh)->h_hash);
+ ref = le32_to_cpu(BHDR(bh)->h_refcount);
+@@ -1246,9 +1241,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect freed block
+ */
+- if (ea_block_cache)
+- mb_cache_entry_delete(ea_block_cache, hash,
+- bh->b_blocknr);
++ if (ea_block_cache) {
++ struct mb_cache_entry *oe;
++
++ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
++ bh->b_blocknr);
++ if (oe) {
++ unlock_buffer(bh);
++ mb_cache_entry_wait_unused(oe);
++ mb_cache_entry_put(ea_block_cache, oe);
++ goto retry_ref;
++ }
++ }
+ get_bh(bh);
+ unlock_buffer(bh);
+
+@@ -1272,7 +1276,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+ ce = mb_cache_entry_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+- ce->e_reusable = 1;
++ set_bit(MBE_REUSABLE_B, &ce->e_flags);
+ mb_cache_entry_put(ea_block_cache, ce);
+ }
+ }
+@@ -1413,6 +1417,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
+ uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
+ int err;
+
++ if (inode->i_sb->s_root == NULL) {
++ ext4_warning(inode->i_sb,
++ "refuse to create EA inode when umounting");
++ WARN_ON(1);
++ return ERR_PTR(-EINVAL);
++ }
++
+ /*
+ * Let the next inode be the goal, so we try and allocate the EA inode
+ * in the same group, or nearby one.
+@@ -1432,6 +1443,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
+ if (!err)
+ err = ext4_inode_attach_jinode(ea_inode);
+ if (err) {
++ if (ext4_xattr_inode_dec_ref(handle, ea_inode))
++ ext4_warning_inode(ea_inode,
++ "cleanup dec ref error %d", err);
+ iput(ea_inode);
+ return ERR_PTR(err);
+ }
+@@ -1477,11 +1491,11 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
+
+ while (ce) {
+ ea_inode = ext4_iget(inode->i_sb, ce->e_value,
+- EXT4_IGET_NORMAL);
+- if (!IS_ERR(ea_inode) &&
+- !is_bad_inode(ea_inode) &&
+- (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
+- i_size_read(ea_inode) == value_len &&
++ EXT4_IGET_EA_INODE);
++ if (IS_ERR(ea_inode))
++ goto next_entry;
++ ext4_xattr_inode_set_class(ea_inode);
++ if (i_size_read(ea_inode) == value_len &&
+ !ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
+ !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
+ value_len) &&
+@@ -1491,9 +1505,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
+ kvfree(ea_data);
+ return ea_inode;
+ }
+-
+- if (!IS_ERR(ea_inode))
+- iput(ea_inode);
++ iput(ea_inode);
++ next_entry:
+ ce = mb_cache_entry_find_next(ea_inode_cache, ce);
+ }
+ kvfree(ea_data);
+@@ -1719,6 +1732,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ memmove(here, (void *)here + size,
+ (void *)last - (void *)here + sizeof(__u32));
+ memset(last, 0, size);
++
++ /*
++ * Update i_inline_off - moved ibody region might contain
++ * system.data attribute. Handling a failure here won't
++ * cause other complications for setting an xattr.
++ */
++ if (!is_block && ext4_has_inline_data(inode)) {
++ ret = ext4_find_inline_data_nolock(inode);
++ if (ret) {
++ ext4_warning_inode(inode,
++ "unable to update i_inline_off");
++ goto out;
++ }
++ }
+ } else if (s->not_found) {
+ /* Insert new name. */
+ size_t size = EXT4_XATTR_LEN(name_len);
+@@ -1858,6 +1885,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ #define header(x) ((struct ext4_xattr_header *)(x))
+
+ if (s->base) {
++ int offset = (char *)s->here - bs->bh->b_data;
++
+ BUFFER_TRACE(bs->bh, "get_write_access");
+ error = ext4_journal_get_write_access(handle, sb, bs->bh,
+ EXT4_JTR_NONE);
+@@ -1873,9 +1902,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ * ext4_xattr_block_set() to reliably detect modified
+ * block
+ */
+- if (ea_block_cache)
+- mb_cache_entry_delete(ea_block_cache, hash,
+- bs->bh->b_blocknr);
++ if (ea_block_cache) {
++ struct mb_cache_entry *oe;
++
++ oe = mb_cache_entry_delete_or_get(ea_block_cache,
++ hash, bs->bh->b_blocknr);
++ if (oe) {
++ /*
++ * Xattr block is getting reused. Leave
++ * it alone.
++ */
++ mb_cache_entry_put(ea_block_cache, oe);
++ goto clone_block;
++ }
++ }
+ ea_bdebug(bs->bh, "modifying in-place");
+ error = ext4_xattr_set_entry(i, s, handle, inode,
+ true /* is_block */);
+@@ -1890,50 +1930,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ if (error)
+ goto cleanup;
+ goto inserted;
+- } else {
+- int offset = (char *)s->here - bs->bh->b_data;
++ }
++clone_block:
++ unlock_buffer(bs->bh);
++ ea_bdebug(bs->bh, "cloning");
++ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
++ error = -ENOMEM;
++ if (s->base == NULL)
++ goto cleanup;
++ s->first = ENTRY(header(s->base)+1);
++ header(s->base)->h_refcount = cpu_to_le32(1);
++ s->here = ENTRY(s->base + offset);
++ s->end = s->base + bs->bh->b_size;
+
+- unlock_buffer(bs->bh);
+- ea_bdebug(bs->bh, "cloning");
+- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
+- error = -ENOMEM;
+- if (s->base == NULL)
++ /*
++ * If existing entry points to an xattr inode, we need
++ * to prevent ext4_xattr_set_entry() from decrementing
++ * ref count on it because the reference belongs to the
++ * original block. In this case, make the entry look
++ * like it has an empty value.
++ */
++ if (!s->not_found && s->here->e_value_inum) {
++ ea_ino = le32_to_cpu(s->here->e_value_inum);
++ error = ext4_xattr_inode_iget(inode, ea_ino,
++ le32_to_cpu(s->here->e_hash),
++ &tmp_inode);
++ if (error)
+ goto cleanup;
+- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+- s->first = ENTRY(header(s->base)+1);
+- header(s->base)->h_refcount = cpu_to_le32(1);
+- s->here = ENTRY(s->base + offset);
+- s->end = s->base + bs->bh->b_size;
+
+- /*
+- * If existing entry points to an xattr inode, we need
+- * to prevent ext4_xattr_set_entry() from decrementing
+- * ref count on it because the reference belongs to the
+- * original block. In this case, make the entry look
+- * like it has an empty value.
+- */
+- if (!s->not_found && s->here->e_value_inum) {
+- ea_ino = le32_to_cpu(s->here->e_value_inum);
+- error = ext4_xattr_inode_iget(inode, ea_ino,
+- le32_to_cpu(s->here->e_hash),
+- &tmp_inode);
+- if (error)
+- goto cleanup;
+-
+- if (!ext4_test_inode_state(tmp_inode,
+- EXT4_STATE_LUSTRE_EA_INODE)) {
+- /*
+- * Defer quota free call for previous
+- * inode until success is guaranteed.
+- */
+- old_ea_inode_quota = le32_to_cpu(
+- s->here->e_value_size);
+- }
+- iput(tmp_inode);
+-
+- s->here->e_value_inum = 0;
+- s->here->e_value_size = 0;
++ if (!ext4_test_inode_state(tmp_inode,
++ EXT4_STATE_LUSTRE_EA_INODE)) {
++ /*
++ * Defer quota free call for previous
++ * inode until success is guaranteed.
++ */
++ old_ea_inode_quota = le32_to_cpu(
++ s->here->e_value_size);
+ }
++ iput(tmp_inode);
++
++ s->here->e_value_inum = 0;
++ s->here->e_value_size = 0;
+ }
+ } else {
+ /* Allocate a buffer where we construct the new block. */
+@@ -1983,8 +2020,9 @@ inserted:
+ else {
+ u32 ref;
+
++#ifdef EXT4_XATTR_DEBUG
+ WARN_ON_ONCE(dquot_initialize_needed(inode));
+-
++#endif
+ /* The old block is released after updating
+ the inode. */
+ error = dquot_alloc_block(inode,
+@@ -2000,18 +2038,13 @@ inserted:
+ lock_buffer(new_bh);
+ /*
+ * We have to be careful about races with
+- * freeing, rehashing or adding references to
+- * xattr block. Once we hold buffer lock xattr
+- * block's state is stable so we can check
+- * whether the block got freed / rehashed or
+- * not. Since we unhash mbcache entry under
+- * buffer lock when freeing / rehashing xattr
+- * block, checking whether entry is still
+- * hashed is reliable. Same rules hold for
+- * e_reusable handling.
++ * adding references to xattr block. Once we
++ * hold buffer lock xattr block's state is
++ * stable so we can check the additional
++ * reference fits.
+ */
+- if (hlist_bl_unhashed(&ce->e_hash_list) ||
+- !ce->e_reusable) {
++ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
++ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
+ /*
+ * Undo everything and check mbcache
+ * again.
+@@ -2026,10 +2059,9 @@ inserted:
+ new_bh = NULL;
+ goto inserted;
+ }
+- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+- ce->e_reusable = 0;
++ if (ref == EXT4_XATTR_REFCOUNT_MAX)
++ clear_bit(MBE_REUSABLE_B, &ce->e_flags);
+ ea_bdebug(new_bh, "reusing; refcount now=%d",
+ ref);
+ ext4_xattr_block_csum_set(inode, new_bh);
+@@ -2053,23 +2085,16 @@ inserted:
+ /* We need to allocate a new block */
+ ext4_fsblk_t goal, block;
+
++#ifdef EXT4_XATTR_DEBUG
+ WARN_ON_ONCE(dquot_initialize_needed(inode));
+-
++#endif
+ goal = ext4_group_first_block_no(sb,
+ EXT4_I(inode)->i_block_group);
+-
+- /* non-extent files can't have physical blocks past 2^32 */
+- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
+-
+ block = ext4_new_meta_blocks(handle, inode, goal, 0,
+ NULL, &error);
+ if (error)
+ goto cleanup;
+
+- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+- BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
+-
+ ea_idebug(inode, "creating block %llu",
+ (unsigned long long)block);
+
+@@ -2176,8 +2201,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
+ struct ext4_inode *raw_inode;
+ int error;
+
+- if (EXT4_I(inode)->i_extra_isize == 0)
++ if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+ return 0;
++
+ raw_inode = ext4_raw_inode(&is->iloc);
+ header = IHDR(inode, raw_inode);
+ is->s.base = is->s.first = IFIRST(header);
+@@ -2205,8 +2231,9 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_search *s = &is->s;
+ int error;
+
+- if (EXT4_I(inode)->i_extra_isize == 0)
++ if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+ return -ENOSPC;
++
+ error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
+ if (error)
+ return error;
+@@ -2408,7 +2435,7 @@ retry_inode:
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ }
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
+
+ cleanup:
+ brelse(is.iloc.bh);
+@@ -2486,7 +2513,7 @@ retry:
+ if (error == 0)
+ error = error2;
+ }
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL);
+
+ return error;
+ }
+@@ -2535,13 +2562,13 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
+ .in_inode = !!entry->e_value_inum,
+ };
+ struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
++ int needs_kvfree = 0;
+ int error;
+
+ is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
+ bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
+- buffer = kmalloc(value_size, GFP_NOFS);
+ b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
+- if (!is || !bs || !buffer || !b_entry_name) {
++ if (!is || !bs || !b_entry_name) {
+ error = -ENOMEM;
+ goto out;
+ }
+@@ -2553,12 +2580,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
+
+ /* Save the entry name and the entry value */
+ if (entry->e_value_inum) {
++ buffer = kvmalloc(value_size, GFP_NOFS);
++ if (!buffer) {
++ error = -ENOMEM;
++ goto out;
++ }
++ needs_kvfree = 1;
+ error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
+ if (error)
+ goto out;
+ } else {
+ size_t value_offs = le16_to_cpu(entry->e_value_offs);
+- memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
++ buffer = (void *)IFIRST(header) + value_offs;
+ }
+
+ memcpy(b_entry_name, entry->e_name, entry->e_name_len);
+@@ -2573,25 +2606,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
+ if (error)
+ goto out;
+
+- /* Remove the chosen entry from the inode */
+- error = ext4_xattr_ibody_set(handle, inode, &i, is);
+- if (error)
+- goto out;
+-
+ i.value = buffer;
+ i.value_len = value_size;
+ error = ext4_xattr_block_find(inode, &i, bs);
+ if (error)
+ goto out;
+
+- /* Add entry which was removed from the inode into the block */
++ /* Move ea entry from the inode into the block */
+ error = ext4_xattr_block_set(handle, inode, &i, bs);
+ if (error)
+ goto out;
+- error = 0;
++
++ /* Remove the chosen entry from the inode */
++ i.value = NULL;
++ i.value_len = 0;
++ error = ext4_xattr_ibody_set(handle, inode, &i, is);
++
+ out:
+ kfree(b_entry_name);
+- kfree(buffer);
++ if (needs_kvfree && buffer)
++ kvfree(buffer);
+ if (is)
+ brelse(is->iloc.bh);
+ if (bs)
+@@ -2766,6 +2800,9 @@ shift:
+ (void *)header, total_ino);
+ EXT4_I(inode)->i_extra_isize = new_extra_isize;
+
++ if (ext4_has_inline_data(inode))
++ error = ext4_find_inline_data_nolock(inode);
++
+ cleanup:
+ if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
+ ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
+@@ -2920,7 +2957,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ error);
+ goto cleanup;
+ }
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
+ }
+ error = 0;
+ cleanup:
+diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
+index 77efb9a627ad2..e5e36bd11f055 100644
+--- a/fs/ext4/xattr.h
++++ b/fs/ext4/xattr.h
+@@ -95,6 +95,19 @@ struct ext4_xattr_entry {
+
+ #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
+
++/*
++ * If we want to add an xattr to the inode, we should make sure that
++ * i_extra_isize is not 0 and that the inode size is not less than
++ * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad.
++ * EXT4_GOOD_OLD_INODE_SIZE extra_isize header entry pad data
++ * |--------------------------|------------|------|---------|---|-------|
++ */
++#define EXT4_INODE_HAS_XATTR_SPACE(inode) \
++ ((EXT4_I(inode)->i_extra_isize != 0) && \
++ (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize + \
++ sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <= \
++ EXT4_INODE_SIZE((inode)->i_sb)))
++
+ struct ext4_xattr_info {
+ const char *name;
+ const void *value;
+@@ -178,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
+
+ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+ struct ext4_inode *raw_inode, handle_t *handle);
++extern void ext4_evict_ea_inode(struct inode *inode);
+
+ extern const struct xattr_handler *ext4_xattr_handlers[];
+
+diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
+index 83e9bc0f91ffd..d6ae6de35af20 100644
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -139,7 +139,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
+ unsigned int segno, offset;
+ bool exist;
+
+- if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
++ if (type == DATA_GENERIC)
+ return true;
+
+ segno = GET_SEGNO(sbi, blkaddr);
+@@ -147,11 +147,18 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
+ se = get_seg_entry(sbi, segno);
+
+ exist = f2fs_test_bit(offset, se->cur_valid_map);
++ if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) {
++ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
++ blkaddr, exist);
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ return exist;
++ }
++
+ if (!exist && type == DATA_GENERIC_ENHANCE) {
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+- WARN_ON(1);
++ dump_stack();
+ }
+ return exist;
+ }
+@@ -184,12 +191,13 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ case DATA_GENERIC:
+ case DATA_GENERIC_ENHANCE:
+ case DATA_GENERIC_ENHANCE_READ:
++ case DATA_GENERIC_ENHANCE_UPDATE:
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ f2fs_warn(sbi, "access invalid blkaddr:%u",
+ blkaddr);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+- WARN_ON(1);
++ dump_stack();
+ return false;
+ } else {
+ return __is_bitmap_valid(sbi, blkaddr, type);
+@@ -304,8 +312,15 @@ static int __f2fs_write_meta_page(struct page *page,
+
+ trace_f2fs_writepage(page, META);
+
+- if (unlikely(f2fs_cp_error(sbi)))
++ if (unlikely(f2fs_cp_error(sbi))) {
++ if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) {
++ ClearPageUptodate(page);
++ dec_page_count(sbi, F2FS_DIRTY_META);
++ unlock_page(page);
++ return 0;
++ }
+ goto redirty_out;
++ }
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto redirty_out;
+ if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
+@@ -653,7 +668,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+ return PTR_ERR(inode);
+ }
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err) {
+ iput(inode);
+ goto err_out;
+@@ -664,7 +679,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+ /* truncate all the data during iput */
+ iput(inode);
+
+- err = f2fs_get_node_info(sbi, ino, &ni);
++ err = f2fs_get_node_info(sbi, ino, &ni, false);
+ if (err)
+ goto err_out;
+
+@@ -867,6 +882,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+ struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
+ struct f2fs_checkpoint *cp_block = NULL;
+ unsigned long long cur_version = 0, pre_version = 0;
++ unsigned int cp_blocks;
+ int err;
+
+ err = get_checkpoint_version(sbi, cp_addr, &cp_block,
+@@ -874,15 +890,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+ if (err)
+ return NULL;
+
+- if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+- sbi->blocks_per_seg) {
++ cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);
++
++ if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) {
+ f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
+ pre_version = *version;
+
+- cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
++ cp_addr += cp_blocks - 1;
+ err = get_checkpoint_version(sbi, cp_addr, &cp_block,
+ &cp_page_2, version);
+ if (err)
+@@ -1052,7 +1069,8 @@ void f2fs_remove_dirty_inode(struct inode *inode)
+ spin_unlock(&sbi->inode_lock[type]);
+ }
+
+-int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
++int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type,
++ bool from_cp)
+ {
+ struct list_head *head;
+ struct inode *inode;
+@@ -1087,11 +1105,15 @@ retry:
+ if (inode) {
+ unsigned long cur_ino = inode->i_ino;
+
+- F2FS_I(inode)->cp_task = current;
++ if (from_cp)
++ F2FS_I(inode)->cp_task = current;
++ F2FS_I(inode)->wb_task = current;
+
+ filemap_fdatawrite(inode->i_mapping);
+
+- F2FS_I(inode)->cp_task = NULL;
++ F2FS_I(inode)->wb_task = NULL;
++ if (from_cp)
++ F2FS_I(inode)->cp_task = NULL;
+
+ iput(inode);
+ /* We need to give cpu to another writers. */
+@@ -1162,7 +1184,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+ if (!is_journalled_quota(sbi))
+ return false;
+
+- down_write(&sbi->quota_sem);
++ if (!down_write_trylock(&sbi->quota_sem))
++ return true;
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
+@@ -1219,7 +1242,7 @@ retry_flush_dents:
+ /* write all the dirty dentry pages */
+ if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
+ f2fs_unlock_all(sbi);
+- err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
++ err = f2fs_sync_dirty_inodes(sbi, DIR_INODE, true);
+ if (err)
+ return err;
+ cond_resched();
+@@ -1282,7 +1305,8 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
+ if (!get_pages(sbi, type))
+ break;
+
+- if (unlikely(f2fs_cp_error(sbi)))
++ if (unlikely(f2fs_cp_error(sbi) &&
++ !is_sbi_flag_set(sbi, SBI_IS_CLOSE)))
+ break;
+
+ if (type == F2FS_DIRTY_META)
+@@ -1304,8 +1328,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+ unsigned long flags;
+
+ if (cpc->reason & CP_UMOUNT) {
+- if (le32_to_cpu(ckpt->cp_pack_total_block_count) >
+- sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) {
++ if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
++ NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
+ clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+ f2fs_notice(sbi, "Disable nat_bits due to no space");
+ } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
+@@ -1889,15 +1913,27 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
+ void f2fs_stop_ckpt_thread(struct f2fs_sb_info *sbi)
+ {
+ struct ckpt_req_control *cprc = &sbi->cprc_info;
++ struct task_struct *ckpt_task;
+
+- if (cprc->f2fs_issue_ckpt) {
+- struct task_struct *ckpt_task = cprc->f2fs_issue_ckpt;
++ if (!cprc->f2fs_issue_ckpt)
++ return;
+
+- cprc->f2fs_issue_ckpt = NULL;
+- kthread_stop(ckpt_task);
++ ckpt_task = cprc->f2fs_issue_ckpt;
++ cprc->f2fs_issue_ckpt = NULL;
++ kthread_stop(ckpt_task);
+
+- flush_remained_ckpt_reqs(sbi, NULL);
+- }
++ f2fs_flush_ckpt_thread(sbi);
++}
++
++void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi)
++{
++ struct ckpt_req_control *cprc = &sbi->cprc_info;
++
++ flush_remained_ckpt_reqs(sbi, NULL);
++
++ /* Let's wait for the previous dispatched checkpoint. */
++ while (atomic_read(&cprc->queued_ckpt))
++ io_schedule_timeout(DEFAULT_IO_TIMEOUT);
+ }
+
+ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi)
+diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
+index c1bf9ad4c2207..455fac164fda0 100644
+--- a/fs/f2fs/compress.c
++++ b/fs/f2fs/compress.c
+@@ -312,10 +312,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
+ }
+
+ if (ret != PAGE_SIZE << dic->log_cluster_size) {
+- printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, "
++ printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, "
+ "expected:%lu\n", KERN_ERR,
+- F2FS_I_SB(dic->inode)->sb->s_id,
+- dic->rlen,
++ F2FS_I_SB(dic->inode)->sb->s_id, ret,
+ PAGE_SIZE << dic->log_cluster_size);
+ return -EIO;
+ }
+@@ -1276,7 +1275,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
+
+ psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT;
+
+- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
++ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
+ if (err)
+ goto out_put_dnode;
+
+@@ -1448,25 +1447,44 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
+ enum iostat_type io_type)
+ {
+ struct address_space *mapping = cc->inode->i_mapping;
+- int _submitted, compr_blocks, ret;
+- int i = -1, err = 0;
++ int _submitted, compr_blocks, ret, i;
+
+ compr_blocks = f2fs_compressed_blocks(cc);
+- if (compr_blocks < 0) {
+- err = compr_blocks;
+- goto out_err;
++
++ for (i = 0; i < cc->cluster_size; i++) {
++ if (!cc->rpages[i])
++ continue;
++
++ redirty_page_for_writepage(wbc, cc->rpages[i]);
++ unlock_page(cc->rpages[i]);
+ }
+
++ if (compr_blocks < 0)
++ return compr_blocks;
++
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (!cc->rpages[i])
+ continue;
+ retry_write:
++ lock_page(cc->rpages[i]);
++
+ if (cc->rpages[i]->mapping != mapping) {
++continue_unlock:
+ unlock_page(cc->rpages[i]);
+ continue;
+ }
+
+- BUG_ON(!PageLocked(cc->rpages[i]));
++ if (!PageDirty(cc->rpages[i]))
++ goto continue_unlock;
++
++ if (PageWriteback(cc->rpages[i])) {
++ if (wbc->sync_mode == WB_SYNC_NONE)
++ goto continue_unlock;
++ f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true);
++ }
++
++ if (!clear_page_dirty_for_io(cc->rpages[i]))
++ goto continue_unlock;
+
+ ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
+ NULL, NULL, wbc, io_type,
+@@ -1481,26 +1499,15 @@ retry_write:
+ * avoid deadlock caused by cluster update race
+ * from foreground operation.
+ */
+- if (IS_NOQUOTA(cc->inode)) {
+- err = 0;
+- goto out_err;
+- }
++ if (IS_NOQUOTA(cc->inode))
++ return 0;
+ ret = 0;
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC,
+ DEFAULT_IO_TIMEOUT);
+- lock_page(cc->rpages[i]);
+-
+- if (!PageDirty(cc->rpages[i])) {
+- unlock_page(cc->rpages[i]);
+- continue;
+- }
+-
+- clear_page_dirty_for_io(cc->rpages[i]);
+ goto retry_write;
+ }
+- err = ret;
+- goto out_err;
++ return ret;
+ }
+
+ *submitted += _submitted;
+@@ -1509,14 +1516,6 @@ retry_write:
+ f2fs_balance_fs(F2FS_M_SB(mapping), true);
+
+ return 0;
+-out_err:
+- for (++i; i < cc->cluster_size; i++) {
+- if (!cc->rpages[i])
+- continue;
+- redirty_page_for_writepage(wbc, cc->rpages[i]);
+- unlock_page(cc->rpages[i]);
+- }
+- return err;
+ }
+
+ int f2fs_write_multi_pages(struct compress_ctx *cc,
+@@ -1530,6 +1529,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
+ if (cluster_may_compress(cc)) {
+ err = f2fs_compress_pages(cc);
+ if (err == -EAGAIN) {
++ add_compr_block_stat(cc->inode, cc->cluster_size);
+ goto write;
+ } else if (err) {
+ f2fs_put_rpages_wbc(cc, wbc, true, 1);
+diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
+index f4fd6c246c9a9..3956852ad1de0 100644
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -677,7 +677,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
+ }
+
+ if (fio->io_wbc && !is_read_io(fio->op))
+- wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
++ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+
+ __attach_io_flag(fio);
+ bio_set_op_attrs(bio, fio->op, fio->op_flags);
+@@ -808,6 +808,8 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
+ bool found = false;
+ struct bio *target = bio ? *bio : NULL;
+
++ f2fs_bug_on(sbi, !target && !page);
++
+ for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
+ struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
+ struct list_head *head = &io->bio_list;
+@@ -887,7 +889,7 @@ alloc_new:
+ }
+
+ if (fio->io_wbc)
+- wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
++ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+
+@@ -961,7 +963,7 @@ alloc_new:
+ }
+
+ if (fio->io_wbc)
+- wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
++ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+
+ io->last_block_in_bio = fio->new_blkaddr;
+
+@@ -1354,7 +1356,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
+ if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
+ return -EPERM;
+
+- err = f2fs_get_node_info(sbi, dn->nid, &ni);
++ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
+ if (err)
+ return err;
+
+@@ -1796,7 +1798,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
+ if (!page)
+ return -ENOMEM;
+
+- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
++ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+@@ -1828,7 +1830,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
+ if (!page)
+ return -ENOMEM;
+
+- err = f2fs_get_node_info(sbi, xnid, &ni);
++ err = f2fs_get_node_info(sbi, xnid, &ni, false);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+@@ -2564,6 +2566,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
+ {
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
++ /* The below cases were checked when setting it. */
++ if (f2fs_is_pinned_file(inode))
++ return false;
++ if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
++ return true;
+ if (f2fs_lfs_mode(sbi))
+ return true;
+ if (S_ISDIR(inode->i_mode))
+@@ -2572,8 +2579,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
+ return true;
+ if (f2fs_is_atomic_file(inode))
+ return true;
+- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+- return true;
+
+ /* swap file is migrating in aligned write mode */
+ if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
+@@ -2685,7 +2690,7 @@ got_it:
+ fio->need_lock = LOCK_REQ;
+ }
+
+- err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
++ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
+ if (err)
+ goto out_writepage;
+
+@@ -2759,7 +2764,8 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
+ * don't drop any dirty dentry pages for keeping lastest
+ * directory structure.
+ */
+- if (S_ISDIR(inode->i_mode))
++ if (S_ISDIR(inode->i_mode) &&
++ !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
+ goto redirty_out;
+ goto out;
+ }
+@@ -2859,12 +2865,13 @@ out:
+ }
+ unlock_page(page);
+ if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
+- !F2FS_I(inode)->cp_task && allow_balance)
++ !F2FS_I(inode)->wb_task && allow_balance)
+ f2fs_balance_fs(sbi, need_balance_fs);
+
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_submit_merged_write(sbi, DATA);
+- f2fs_submit_merged_ipu_write(sbi, bio, NULL);
++ if (bio && *bio)
++ f2fs_submit_merged_ipu_write(sbi, bio, NULL);
+ submitted = NULL;
+ }
+
+@@ -3157,7 +3164,7 @@ static inline bool __should_serialize_io(struct inode *inode,
+ struct writeback_control *wbc)
+ {
+ /* to avoid deadlock in path of data flush */
+- if (F2FS_I(inode)->cp_task)
++ if (F2FS_I(inode)->wb_task)
+ return false;
+
+ if (!S_ISREG(inode->i_mode))
+@@ -3211,8 +3218,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
+ /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_inc(&sbi->wb_sync_req[DATA]);
+- else if (atomic_read(&sbi->wb_sync_req[DATA]))
++ else if (atomic_read(&sbi->wb_sync_req[DATA])) {
++ /* to avoid potential deadlock */
++ if (current->plug)
++ blk_finish_plug(current->plug);
+ goto skip_write;
++ }
+
+ if (__should_serialize_io(inode, wbc)) {
+ mutex_lock(&sbi->writepages);
+@@ -3403,7 +3414,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
+
+ *fsdata = NULL;
+
+- if (len == PAGE_SIZE)
++ if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
+ goto repeat;
+
+ ret = f2fs_prepare_compress_overwrite(inode, pagep,
+diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
+index 8c50518475a99..b449c7a372a4b 100644
+--- a/fs/f2fs/debug.c
++++ b/fs/f2fs/debug.c
+@@ -21,7 +21,7 @@
+ #include "gc.h"
+
+ static LIST_HEAD(f2fs_stat_list);
+-static DEFINE_MUTEX(f2fs_stat_mutex);
++static DEFINE_RAW_SPINLOCK(f2fs_stat_lock);
+ #ifdef CONFIG_DEBUG_FS
+ static struct dentry *f2fs_debugfs_root;
+ #endif
+@@ -338,14 +338,16 @@ static char *s_flag[] = {
+ [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush",
+ [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair",
+ [SBI_IS_RESIZEFS] = " resizefs",
++ [SBI_IS_FREEZING] = " freezefs",
+ };
+
+ static int stat_show(struct seq_file *s, void *v)
+ {
+ struct f2fs_stat_info *si;
+ int i = 0, j = 0;
++ unsigned long flags;
+
+- mutex_lock(&f2fs_stat_mutex);
++ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ list_for_each_entry(si, &f2fs_stat_list, stat_list) {
+ update_general_status(si->sbi);
+
+@@ -573,7 +575,7 @@ static int stat_show(struct seq_file *s, void *v)
+ seq_printf(s, " - paged : %llu KB\n",
+ si->page_mem >> 10);
+ }
+- mutex_unlock(&f2fs_stat_mutex);
++ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+ return 0;
+ }
+
+@@ -584,6 +586,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
+ {
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ struct f2fs_stat_info *si;
++ unsigned long flags;
+ int i;
+
+ si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
+@@ -619,9 +622,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
+ atomic_set(&sbi->max_aw_cnt, 0);
+ atomic_set(&sbi->max_vw_cnt, 0);
+
+- mutex_lock(&f2fs_stat_mutex);
++ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ list_add_tail(&si->stat_list, &f2fs_stat_list);
+- mutex_unlock(&f2fs_stat_mutex);
++ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+
+ return 0;
+ }
+@@ -629,10 +632,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
+ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
+ {
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
++ unsigned long flags;
+
+- mutex_lock(&f2fs_stat_mutex);
++ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ list_del(&si->stat_list);
+- mutex_unlock(&f2fs_stat_mutex);
++ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+
+ kfree(si);
+ }
+diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
+index 1820e9c106f7d..5c78350158df1 100644
+--- a/fs/f2fs/dir.c
++++ b/fs/f2fs/dir.c
+@@ -82,7 +82,8 @@ int f2fs_init_casefolded_name(const struct inode *dir,
+ #ifdef CONFIG_UNICODE
+ struct super_block *sb = dir->i_sb;
+
+- if (IS_CASEFOLDED(dir)) {
++ if (IS_CASEFOLDED(dir) &&
++ !is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) {
+ fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
+ GFP_NOFS, false, F2FS_SB(sb));
+ if (!fname->cf_name.name)
+diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
+index 866e72b29bd5a..6a9ab5c11939f 100644
+--- a/fs/f2fs/extent_cache.c
++++ b/fs/f2fs/extent_cache.c
+@@ -415,7 +415,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
+ struct extent_node *en;
+ bool ret = false;
+
+- f2fs_bug_on(sbi, !et);
++ if (!et)
++ return false;
+
+ trace_f2fs_lookup_extent_tree_start(inode, pgofs);
+
+@@ -804,9 +805,8 @@ void f2fs_drop_extent_tree(struct inode *inode)
+ if (!f2fs_may_extent_tree(inode))
+ return;
+
+- set_inode_flag(inode, FI_NO_EXTENT);
+-
+ write_lock(&et->lock);
++ set_inode_flag(inode, FI_NO_EXTENT);
+ __free_extent_tree(sbi, et);
+ if (et->largest.len) {
+ et->largest.len = 0;
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index b339ae89c1ad1..835ef98643bd4 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -55,6 +55,7 @@ enum {
+ FAULT_DISCARD,
+ FAULT_WRITE_IO,
+ FAULT_SLAB_ALLOC,
++ FAULT_DQUOT_INIT,
+ FAULT_MAX,
+ };
+
+@@ -248,6 +249,10 @@ enum {
+ * condition of read on truncated area
+ * by extent_cache
+ */
++ DATA_GENERIC_ENHANCE_UPDATE, /*
++ * strong check on range and segment
++ * bitmap for update case
++ */
+ META_GENERIC,
+ };
+
+@@ -487,11 +492,11 @@ struct f2fs_filename {
+ #ifdef CONFIG_UNICODE
+ /*
+ * For casefolded directories: the casefolded name, but it's left NULL
+- * if the original name is not valid Unicode, if the directory is both
+- * casefolded and encrypted and its encryption key is unavailable, or if
+- * the filesystem is doing an internal operation where usr_fname is also
+- * NULL. In all these cases we fall back to treating the name as an
+- * opaque byte sequence.
++ * if the original name is not valid Unicode, if the original name is
++ * "." or "..", if the directory is both casefolded and encrypted and
++ * its encryption key is unavailable, or if the filesystem is doing an
++ * internal operation where usr_fname is also NULL. In all these cases
++ * we fall back to treating the name as an opaque byte sequence.
+ */
+ struct fscrypt_str cf_name;
+ #endif
+@@ -744,6 +749,7 @@ struct f2fs_inode_info {
+ unsigned int clevel; /* maximum level of given file name */
+ struct task_struct *task; /* lookup and create consistency */
+ struct task_struct *cp_task; /* separate cp/wb IO stats*/
++ struct task_struct *wb_task; /* indicate inode is in context of writeback */
+ nid_t i_xattr_nid; /* node id that contains xattrs */
+ loff_t last_disk_size; /* lastly written file size */
+ spinlock_t i_size_lock; /* protect last_disk_size */
+@@ -1012,6 +1018,7 @@ struct f2fs_sm_info {
+ unsigned int segment_count; /* total # of segments */
+ unsigned int main_segments; /* # of segments in main area */
+ unsigned int reserved_segments; /* # of reserved segments */
++ unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
+ unsigned int ovp_segments; /* # of overprovision segments */
+
+ /* a threshold to reclaim prefree segments */
+@@ -1077,8 +1084,8 @@ enum count_type {
+ */
+ #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
+ enum page_type {
+- DATA,
+- NODE,
++ DATA = 0,
++ NODE = 1, /* should not change this */
+ META,
+ NR_PAGE_TYPE,
+ META_FLUSH,
+@@ -1204,7 +1211,6 @@ struct f2fs_dev_info {
+ #ifdef CONFIG_BLK_DEV_ZONED
+ unsigned int nr_blkz; /* Total number of zones */
+ unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
+- block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
+ #endif
+ };
+
+@@ -1252,6 +1258,7 @@ enum {
+ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
+ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
+ SBI_IS_RESIZEFS, /* resizefs is in process */
++ SBI_IS_FREEZING, /* freezefs is in process */
+ };
+
+ enum {
+@@ -1624,6 +1631,7 @@ struct f2fs_sb_info {
+ unsigned int meta_ino_num; /* meta inode number*/
+ unsigned int log_blocks_per_seg; /* log2 blocks per segment */
+ unsigned int blocks_per_seg; /* blocks per segment */
++ unsigned int unusable_blocks_per_sec; /* unusable blocks per section */
+ unsigned int segs_per_sec; /* segments per section */
+ unsigned int secs_per_zone; /* sections per zone */
+ unsigned int total_sections; /* total section count */
+@@ -2184,6 +2192,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
+
+ if (!__allow_reserved_blocks(sbi, inode, true))
+ avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
++
++ if (F2FS_IO_ALIGNED(sbi))
++ avail_user_block_count -= sbi->blocks_per_seg *
++ SM_I(sbi)->additional_reserved_segments;
++
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (avail_user_block_count > sbi->unusable_block_count)
+ avail_user_block_count -= sbi->unusable_block_count;
+@@ -2430,6 +2443,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
+
+ if (!__allow_reserved_blocks(sbi, inode, false))
+ valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
++
++ if (F2FS_IO_ALIGNED(sbi))
++ valid_block_count += sbi->blocks_per_seg *
++ SM_I(sbi)->additional_reserved_segments;
++
+ user_block_count = sbi->user_block_count;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ user_block_count -= sbi->unusable_block_count;
+@@ -2474,11 +2492,17 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
+ {
+ spin_lock(&sbi->stat_lock);
+
+- f2fs_bug_on(sbi, !sbi->total_valid_block_count);
+- f2fs_bug_on(sbi, !sbi->total_valid_node_count);
++ if (unlikely(!sbi->total_valid_block_count ||
++ !sbi->total_valid_node_count)) {
++ f2fs_warn(sbi, "dec_valid_node_count: inconsistent block counts, total_valid_block:%u, total_valid_node:%u",
++ sbi->total_valid_block_count,
++ sbi->total_valid_node_count);
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ } else {
++ sbi->total_valid_block_count--;
++ sbi->total_valid_node_count--;
++ }
+
+- sbi->total_valid_node_count--;
+- sbi->total_valid_block_count--;
+ if (sbi->reserved_blocks &&
+ sbi->current_reserved_blocks < sbi->reserved_blocks)
+ sbi->current_reserved_blocks++;
+@@ -3363,6 +3387,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
+ */
+ int f2fs_inode_dirtied(struct inode *inode, bool sync);
+ void f2fs_inode_synced(struct inode *inode);
++int f2fs_dquot_initialize(struct inode *inode);
+ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+ int f2fs_quota_sync(struct super_block *sb, int type);
+ loff_t max_file_blocks(struct inode *inode);
+@@ -3391,7 +3416,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
+ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
+ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+- struct node_info *ni);
++ struct node_info *ni, bool checkpoint_context);
+ pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
+ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
+ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
+@@ -3520,6 +3545,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ * checkpoint.c
+ */
+ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
++void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi);
+ struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
+@@ -3548,7 +3574,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi);
+ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
+ void f2fs_update_dirty_page(struct inode *inode, struct page *page);
+ void f2fs_remove_dirty_inode(struct inode *inode);
+-int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
++int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type,
++ bool from_cp);
+ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
+ u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi);
+ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+@@ -3633,7 +3660,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
+ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
+ unsigned int segno);
+ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+-int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
++int f2fs_resize_fs(struct file *filp, __u64 block_count);
+ int __init f2fs_create_garbage_collection_cache(void);
+ void f2fs_destroy_garbage_collection_cache(void);
+
+@@ -3896,6 +3923,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab;
+ * inline.c
+ */
+ bool f2fs_may_inline_data(struct inode *inode);
++bool f2fs_sanity_check_inline_data(struct inode *inode);
+ bool f2fs_may_inline_dentry(struct inode *inode);
+ void f2fs_do_read_inline_data(struct page *page, struct page *ipage);
+ void f2fs_truncate_inline_inode(struct inode *inode,
+@@ -4152,8 +4180,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
+
+ if (!f2fs_compressed_file(inode))
+ return true;
+- if (S_ISREG(inode->i_mode) &&
+- (get_dirty_pages(inode) || atomic_read(&fi->i_compr_blocks)))
++ if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
+ return false;
+
+ fi->i_flags &= ~F2FS_COMPR_FL;
+@@ -4257,6 +4284,11 @@ static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi)
+ return false;
+ }
+
++static inline bool f2fs_dev_is_readonly(struct f2fs_sb_info *sbi)
++{
++ return f2fs_sb_has_readonly(sbi) || f2fs_hw_is_readonly(sbi);
++}
++
+ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
+ {
+ return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
+diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
+index 9c8ef33bd8d32..e1131af0396b9 100644
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -786,7 +786,7 @@ int f2fs_truncate(struct inode *inode)
+ return -EIO;
+ }
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ return err;
+
+@@ -916,7 +916,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ return err;
+
+ if (is_quota_modification(inode, attr)) {
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ return err;
+ }
+@@ -1232,7 +1232,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
+ if (ret)
+ return ret;
+
+- ret = f2fs_get_node_info(sbi, dn.nid, &ni);
++ ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ return ret;
+@@ -1436,11 +1436,19 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
+ ret = -ENOSPC;
+ break;
+ }
+- if (dn->data_blkaddr != NEW_ADDR) {
+- f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
+- dn->data_blkaddr = NEW_ADDR;
+- f2fs_set_data_blkaddr(dn);
++
++ if (dn->data_blkaddr == NEW_ADDR)
++ continue;
++
++ if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
++ DATA_GENERIC_ENHANCE)) {
++ ret = -EFSCORRUPTED;
++ break;
+ }
++
++ f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
++ dn->data_blkaddr = NEW_ADDR;
++ f2fs_set_data_blkaddr(dn);
+ }
+
+ f2fs_update_extent_cache_range(dn, start, 0, index - start);
+@@ -1760,6 +1768,10 @@ static long f2fs_fallocate(struct file *file, int mode,
+
+ inode_lock(inode);
+
++ ret = file_modified(file);
++ if (ret)
++ goto out;
++
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ if (offset >= inode->i_size)
+ goto out;
+@@ -1857,10 +1869,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
+ if (masked_flags & F2FS_COMPR_FL) {
+ if (!f2fs_disable_compressed_file(inode))
+ return -EINVAL;
+- }
+- if (iflags & F2FS_NOCOMP_FL)
+- return -EINVAL;
+- if (iflags & F2FS_COMPR_FL) {
++ } else {
+ if (!f2fs_may_compress(inode))
+ return -EINVAL;
+ if (S_ISREG(inode->i_mode) && inode->i_size)
+@@ -1869,10 +1878,6 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
+ set_compress_context(inode);
+ }
+ }
+- if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) {
+- if (masked_flags & F2FS_COMPR_FL)
+- return -EINVAL;
+- }
+
+ fi->i_flags = iflags | (fi->i_flags & ~mask);
+ f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
+@@ -2002,7 +2007,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
+
+ inode_lock(inode);
+
+- f2fs_disable_compressed_file(inode);
++ if (!f2fs_disable_compressed_file(inode)) {
++ ret = -EINVAL;
++ goto out;
++ }
+
+ if (f2fs_is_atomic_file(inode)) {
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
+@@ -2658,6 +2666,7 @@ do_map:
+ }
+
+ set_page_dirty(page);
++ set_page_private_gcing(page);
+ f2fs_put_page(page, 1);
+
+ idx++;
+@@ -2968,15 +2977,16 @@ int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
+ struct dquot *transfer_to[MAXQUOTAS] = {};
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct super_block *sb = sbi->sb;
+- int err = 0;
++ int err;
+
+ transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+- if (!IS_ERR(transfer_to[PRJQUOTA])) {
+- err = __dquot_transfer(inode, transfer_to);
+- if (err)
+- set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+- dqput(transfer_to[PRJQUOTA]);
+- }
++ if (IS_ERR(transfer_to[PRJQUOTA]))
++ return PTR_ERR(transfer_to[PRJQUOTA]);
++
++ err = __dquot_transfer(inode, transfer_to);
++ if (err)
++ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
++ dqput(transfer_to[PRJQUOTA]);
+ return err;
+ }
+
+@@ -3020,7 +3030,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
+ }
+ f2fs_put_page(ipage, 1);
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ return err;
+
+@@ -3143,17 +3153,17 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
+
+ inode_lock(inode);
+
+- if (f2fs_should_update_outplace(inode, NULL)) {
+- ret = -EINVAL;
+- goto out;
+- }
+-
+ if (!pin) {
+ clear_inode_flag(inode, FI_PIN_FILE);
+ f2fs_i_gc_failures_write(inode, 0);
+ goto done;
+ }
+
++ if (f2fs_should_update_outplace(inode, NULL)) {
++ ret = -EINVAL;
++ goto out;
++ }
++
+ if (f2fs_pin_file_control(inode, false)) {
+ ret = -EAGAIN;
+ goto out;
+@@ -3241,7 +3251,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
+ sizeof(block_count)))
+ return -EFAULT;
+
+- return f2fs_resize_fs(sbi, block_count);
++ return f2fs_resize_fs(filp, block_count);
+ }
+
+ static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
+@@ -3919,7 +3929,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
+ goto out;
+ }
+
+- if (inode->i_size != 0) {
++ if (F2FS_HAS_BLOCKS(inode)) {
+ ret = -EFBIG;
+ goto out;
+ }
+@@ -4002,8 +4012,8 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
+ goto out;
+ }
+
+- if (f2fs_is_mmap_file(inode)) {
+- ret = -EBUSY;
++ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
++ ret = -EINVAL;
+ goto out;
+ }
+
+@@ -4074,8 +4084,8 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
+ goto out;
+ }
+
+- if (f2fs_is_mmap_file(inode)) {
+- ret = -EBUSY;
++ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
++ ret = -EINVAL;
+ goto out;
+ }
+
+@@ -4276,7 +4286,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ size_t target_size = 0;
+ int err;
+
+- if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
++ if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
+ set_inode_flag(inode, FI_NO_PREALLOC);
+
+ if ((iocb->ki_flags & IOCB_NOWAIT)) {
+diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
+index 77391e3b7d68f..7010440cb64c8 100644
+--- a/fs/f2fs/gc.c
++++ b/fs/f2fs/gc.c
+@@ -944,7 +944,7 @@ next_step:
+ continue;
+ }
+
+- if (f2fs_get_node_info(sbi, nid, &ni)) {
++ if (f2fs_get_node_info(sbi, nid, &ni, false)) {
+ f2fs_put_page(node_page, 1);
+ continue;
+ }
+@@ -1002,7 +1002,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ {
+ struct page *node_page;
+ nid_t nid;
+- unsigned int ofs_in_node;
++ unsigned int ofs_in_node, max_addrs, base;
+ block_t source_blkaddr;
+
+ nid = le32_to_cpu(sum->nid);
+@@ -1012,7 +1012,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ if (IS_ERR(node_page))
+ return false;
+
+- if (f2fs_get_node_info(sbi, nid, dni)) {
++ if (f2fs_get_node_info(sbi, nid, dni, false)) {
+ f2fs_put_page(node_page, 1);
+ return false;
+ }
+@@ -1023,6 +1023,26 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
+
++ if (f2fs_check_nid_range(sbi, dni->ino)) {
++ f2fs_put_page(node_page, 1);
++ return false;
++ }
++
++ if (IS_INODE(node_page)) {
++ base = offset_in_addr(F2FS_INODE(node_page));
++ max_addrs = DEF_ADDRS_PER_INODE;
++ } else {
++ base = 0;
++ max_addrs = DEF_ADDRS_PER_BLOCK;
++ }
++
++ if (base + ofs_in_node >= max_addrs) {
++ f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u",
++ base, ofs_in_node, max_addrs, dni->ino, dni->nid);
++ f2fs_put_page(node_page, 1);
++ return false;
++ }
++
+ *nofs = ofs_of_node(node_page);
+ source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
+ f2fs_put_page(node_page, 1);
+@@ -1036,7 +1056,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
+ f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
+ blkaddr, source_blkaddr, segno);
+- f2fs_bug_on(sbi, 1);
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
+ }
+ #endif
+@@ -1203,7 +1223,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
+
+ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
+- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
++ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
+ if (err)
+ goto put_out;
+
+@@ -1454,7 +1474,8 @@ next_step:
+
+ if (phase == 3) {
+ inode = f2fs_iget(sb, dni.ino);
+- if (IS_ERR(inode) || is_bad_inode(inode))
++ if (IS_ERR(inode) || is_bad_inode(inode) ||
++ special_file(inode->i_mode))
+ continue;
+
+ if (!down_write_trylock(
+@@ -1659,8 +1680,9 @@ freed:
+ get_valid_blocks(sbi, segno, false) == 0)
+ seg_freed++;
+
+- if (__is_large_section(sbi) && segno + 1 < end_segno)
+- sbi->next_victim_seg[gc_type] = segno + 1;
++ if (__is_large_section(sbi))
++ sbi->next_victim_seg[gc_type] =
++ (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO;
+ skip:
+ f2fs_put_page(sum_page, 0);
+ }
+@@ -1970,8 +1992,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
+ }
+ }
+
+-int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
++int f2fs_resize_fs(struct file *filp, __u64 block_count)
+ {
++ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ __u64 old_block_count, shrunk_blocks;
+ struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
+ unsigned int secs;
+@@ -2009,12 +2032,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
+ return -EINVAL;
+ }
+
++ err = mnt_want_write_file(filp);
++ if (err)
++ return err;
++
+ shrunk_blocks = old_block_count - block_count;
+ secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
+
+ /* stop other GC */
+- if (!down_write_trylock(&sbi->gc_lock))
+- return -EAGAIN;
++ if (!down_write_trylock(&sbi->gc_lock)) {
++ err = -EAGAIN;
++ goto out_drop_write;
++ }
+
+ /* stop CP to protect MAIN_SEC in free_segment_range */
+ f2fs_lock_op(sbi);
+@@ -2034,12 +2063,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
+ out_unlock:
+ f2fs_unlock_op(sbi);
+ up_write(&sbi->gc_lock);
++out_drop_write:
++ mnt_drop_write_file(filp);
+ if (err)
+ return err;
+
+- set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+-
+ freeze_super(sbi->sb);
++
++ if (f2fs_readonly(sbi->sb)) {
++ thaw_super(sbi->sb);
++ return -EROFS;
++ }
++
+ down_write(&sbi->gc_lock);
+ down_write(&sbi->cp_global_sem);
+
+@@ -2054,6 +2089,7 @@ out_unlock:
+ if (err)
+ goto out_err;
+
++ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ err = free_segment_range(sbi, secs, false);
+ if (err)
+ goto recover_out;
+@@ -2077,6 +2113,7 @@ out_unlock:
+ f2fs_commit_super(sbi, false);
+ }
+ recover_out:
++ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
+@@ -2089,6 +2126,5 @@ out_err:
+ up_write(&sbi->cp_global_sem);
+ up_write(&sbi->gc_lock);
+ thaw_super(sbi->sb);
+- clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ return err;
+ }
+diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
+index e3beac546c63a..2788ceeaf5c22 100644
+--- a/fs/f2fs/hash.c
++++ b/fs/f2fs/hash.c
+@@ -91,7 +91,7 @@ static u32 TEA_hash_name(const u8 *p, size_t len)
+ /*
+ * Compute @fname->hash. For all directories, @fname->disk_name must be set.
+ * For casefolded directories, @fname->usr_fname must be set, and also
+- * @fname->cf_name if the filename is valid Unicode.
++ * @fname->cf_name if the filename is valid Unicode and is not "." or "..".
+ */
+ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
+ {
+@@ -110,10 +110,11 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
+ /*
+ * If the casefolded name is provided, hash it instead of the
+ * on-disk name. If the casefolded name is *not* provided, that
+- * should only be because the name wasn't valid Unicode, so fall
+- * back to treating the name as an opaque byte sequence. Note
+- * that to handle encrypted directories, the fallback must use
+- * usr_fname (plaintext) rather than disk_name (ciphertext).
++ * should only be because the name wasn't valid Unicode or was
++ * "." or "..", so fall back to treating the name as an opaque
++ * byte sequence. Note that to handle encrypted directories,
++ * the fallback must use usr_fname (plaintext) rather than
++ * disk_name (ciphertext).
+ */
+ WARN_ON_ONCE(!fname->usr_fname->name);
+ if (fname->cf_name.name) {
+diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
+index 56a20d5c15dad..e4fc169a07f55 100644
+--- a/fs/f2fs/inline.c
++++ b/fs/f2fs/inline.c
+@@ -14,21 +14,40 @@
+ #include "node.h"
+ #include <trace/events/f2fs.h>
+
+-bool f2fs_may_inline_data(struct inode *inode)
++static bool support_inline_data(struct inode *inode)
+ {
+ if (f2fs_is_atomic_file(inode))
+ return false;
+-
+ if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
+ return false;
+-
+ if (i_size_read(inode) > MAX_INLINE_DATA(inode))
+ return false;
++ return true;
++}
+
+- if (f2fs_post_read_required(inode))
++bool f2fs_may_inline_data(struct inode *inode)
++{
++ if (!support_inline_data(inode))
+ return false;
+
+- return true;
++ return !f2fs_post_read_required(inode);
++}
++
++bool f2fs_sanity_check_inline_data(struct inode *inode)
++{
++ if (!f2fs_has_inline_data(inode))
++ return false;
++
++ if (!support_inline_data(inode))
++ return true;
++
++ /*
++ * used by sanity_check_inode(), when disk layout fields has not
++ * been synchronized to inmem fields.
++ */
++ return (S_ISREG(inode->i_mode) &&
++ (file_is_encrypt(inode) || file_is_verity(inode) ||
++ (F2FS_I(inode)->i_flags & F2FS_COMPR_FL)));
+ }
+
+ bool f2fs_may_inline_dentry(struct inode *inode)
+@@ -45,7 +64,6 @@ bool f2fs_may_inline_dentry(struct inode *inode)
+ void f2fs_do_read_inline_data(struct page *page, struct page *ipage)
+ {
+ struct inode *inode = page->mapping->host;
+- void *src_addr, *dst_addr;
+
+ if (PageUptodate(page))
+ return;
+@@ -55,11 +73,8 @@ void f2fs_do_read_inline_data(struct page *page, struct page *ipage)
+ zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE);
+
+ /* Copy the whole inline data block */
+- src_addr = inline_data_addr(inode, ipage);
+- dst_addr = kmap_atomic(page);
+- memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
+- flush_dcache_page(page);
+- kunmap_atomic(dst_addr);
++ memcpy_to_page(page, 0, inline_data_addr(inode, ipage),
++ MAX_INLINE_DATA(inode));
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ }
+@@ -131,7 +146,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
+ if (err)
+ return err;
+
+- err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
++ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false);
+ if (err) {
+ f2fs_truncate_data_blocks_range(dn, 1);
+ f2fs_put_dnode(dn);
+@@ -192,7 +207,7 @@ int f2fs_convert_inline_inode(struct inode *inode)
+ f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
+ return 0;
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ return err;
+
+@@ -227,7 +242,6 @@ out:
+
+ int f2fs_write_inline_data(struct inode *inode, struct page *page)
+ {
+- void *src_addr, *dst_addr;
+ struct dnode_of_data dn;
+ int err;
+
+@@ -244,10 +258,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
+ f2fs_bug_on(F2FS_I_SB(inode), page->index);
+
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
+- src_addr = kmap_atomic(page);
+- dst_addr = inline_data_addr(inode, dn.inode_page);
+- memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
+- kunmap_atomic(src_addr);
++ memcpy_from_page(inline_data_addr(inode, dn.inode_page),
++ page, 0, MAX_INLINE_DATA(inode));
+ set_page_dirty(dn.inode_page);
+
+ f2fs_clear_page_cache_dirty_tag(page);
+@@ -408,18 +420,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
+
+ dentry_blk = page_address(page);
+
++ /*
++ * Start by zeroing the full block, to ensure that all unused space is
++ * zeroed and no uninitialized memory is leaked to disk.
++ */
++ memset(dentry_blk, 0, F2FS_BLKSIZE);
++
+ make_dentry_ptr_inline(dir, &src, inline_dentry);
+ make_dentry_ptr_block(dir, &dst, dentry_blk);
+
+ /* copy data from inline dentry block to new dentry block */
+ memcpy(dst.bitmap, src.bitmap, src.nr_bitmap);
+- memset(dst.bitmap + src.nr_bitmap, 0, dst.nr_bitmap - src.nr_bitmap);
+- /*
+- * we do not need to zero out remainder part of dentry and filename
+- * field, since we have used bitmap for marking the usage status of
+- * them, besides, we can also ignore copying/zeroing reserved space
+- * of dentry block, because them haven't been used so far.
+- */
+ memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max);
+ memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN);
+
+@@ -786,7 +797,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
+ ilen = start + len;
+ ilen -= start;
+
+- err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
++ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false);
+ if (err)
+ goto out;
+
+diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
+index 9141147b5bb00..94e21136d5790 100644
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -276,8 +276,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+ }
+ }
+
+- if (f2fs_has_inline_data(inode) &&
+- (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
++ if (f2fs_sanity_check_inline_data(inode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+@@ -516,6 +515,11 @@ make_now:
+ } else if (ino == F2FS_COMPRESS_INO(sbi)) {
+ #ifdef CONFIG_F2FS_FS_COMPRESSION
+ inode->i_mapping->a_ops = &f2fs_compress_aops;
++ /*
++ * generic_error_remove_page only truncates pages of regular
++ * inode
++ */
++ inode->i_mode |= S_IFREG;
+ #endif
+ mapping_set_gfp_mask(inode->i_mapping,
+ GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
+@@ -527,7 +531,7 @@ make_now:
+ inode->i_op = &f2fs_dir_inode_operations;
+ inode->i_fop = &f2fs_dir_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+- inode_nohighmem(inode);
++ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ } else if (S_ISLNK(inode->i_mode)) {
+ if (file_is_encrypt(inode))
+ inode->i_op = &f2fs_encrypted_symlink_inode_operations;
+@@ -677,17 +681,19 @@ void f2fs_update_inode_page(struct inode *inode)
+ {
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct page *node_page;
++ int count = 0;
+ retry:
+ node_page = f2fs_get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(node_page)) {
+ int err = PTR_ERR(node_page);
+
+- if (err == -ENOMEM) {
+- cond_resched();
++ /* The node block was truncated. */
++ if (err == -ENOENT)
++ return;
++
++ if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT)
+ goto retry;
+- } else if (err != -ENOENT) {
+- f2fs_stop_checkpoint(sbi, false);
+- }
++ f2fs_stop_checkpoint(sbi, false);
+ return;
+ }
+ f2fs_update_inode(inode, node_page);
+@@ -754,7 +760,7 @@ void f2fs_evict_inode(struct inode *inode)
+ if (inode->i_nlink || is_bad_inode(inode))
+ goto no_delete;
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err) {
+ err = 0;
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+@@ -764,7 +770,8 @@ void f2fs_evict_inode(struct inode *inode)
+ f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
+
+- sb_start_intwrite(inode->i_sb);
++ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
++ sb_start_intwrite(inode->i_sb);
+ set_inode_flag(inode, FI_NO_ALLOC);
+ i_size_write(inode, 0);
+ retry:
+@@ -780,8 +787,22 @@ retry:
+ f2fs_lock_op(sbi);
+ err = f2fs_remove_inode_page(inode);
+ f2fs_unlock_op(sbi);
+- if (err == -ENOENT)
++ if (err == -ENOENT) {
+ err = 0;
++
++ /*
++ * in fuzzed image, another node may has the same
++ * block address as inode's, if it was truncated
++ * previously, truncation of inode node will fail.
++ */
++ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
++ f2fs_warn(F2FS_I_SB(inode),
++ "f2fs_evict_inode: inconsistent node id, ino:%lu",
++ inode->i_ino);
++ f2fs_inode_synced(inode);
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ }
++ }
+ }
+
+ /* give more chances, if ENOMEM case */
+@@ -795,7 +816,8 @@ retry:
+ if (dquot_initialize_needed(inode))
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ }
+- sb_end_intwrite(inode->i_sb);
++ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
++ sb_end_intwrite(inode->i_sb);
+ no_delete:
+ dquot_drop(inode);
+
+@@ -868,9 +890,10 @@ void f2fs_handle_failed_inode(struct inode *inode)
+ * so we can prevent losing this orphan when encoutering checkpoint
+ * and following suddenly power-off.
+ */
+- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
++ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ set_inode_flag(inode, FI_FREE_NID);
+ f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
+ goto out;
+ }
+diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
+index cdcf54ae0db8f..9e0160a02bf4a 100644
+--- a/fs/f2fs/iostat.c
++++ b/fs/f2fs/iostat.c
+@@ -194,8 +194,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
+ return;
+
+ ts_diff = jiffies - iostat_ctx->submit_ts;
+- if (iotype >= META_FLUSH)
++ if (iotype == META_FLUSH) {
+ iotype = META;
++ } else if (iotype >= NR_PAGE_TYPE) {
++ f2fs_warn(sbi, "%s: %d over NR_PAGE_TYPE", __func__, iotype);
++ return;
++ }
+
+ if (rw == 0) {
+ idx = READ_IO;
+diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
+index 9c528e583c9d5..7a86a8dcf4f1c 100644
+--- a/fs/f2fs/namei.c
++++ b/fs/f2fs/namei.c
+@@ -74,7 +74,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+ if (err)
+ goto fail_drop;
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ goto fail_drop;
+
+@@ -91,8 +91,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+ if (test_opt(sbi, INLINE_XATTR))
+ set_inode_flag(inode, FI_INLINE_XATTR);
+
+- if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
+- set_inode_flag(inode, FI_INLINE_DATA);
+ if (f2fs_may_inline_dentry(inode))
+ set_inode_flag(inode, FI_INLINE_DENTRY);
+
+@@ -109,10 +107,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+
+ f2fs_init_extent_tree(inode, NULL);
+
+- stat_inc_inline_xattr(inode);
+- stat_inc_inline_inode(inode);
+- stat_inc_inline_dir(inode);
+-
+ F2FS_I(inode)->i_flags =
+ f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
+
+@@ -129,6 +123,14 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+ set_compress_context(inode);
+ }
+
++ /* Should enable inline_data after compression set */
++ if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
++ set_inode_flag(inode, FI_INLINE_DATA);
++
++ stat_inc_inline_xattr(inode);
++ stat_inc_inline_inode(inode);
++ stat_inc_inline_dir(inode);
++
+ f2fs_set_inode_flags(inode);
+
+ trace_f2fs_new_inode(inode, 0);
+@@ -327,6 +329,9 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
+ if (!is_extension_exist(name, ext[i], false))
+ continue;
+
++ /* Do not use inline_data with compression */
++ stat_dec_inline_inode(inode);
++ clear_inode_flag(inode, FI_INLINE_DATA);
+ set_compress_context(inode);
+ return;
+ }
+@@ -345,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
+
+- err = dquot_initialize(dir);
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ return err;
+
+@@ -404,7 +409,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
+ F2FS_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
+- err = dquot_initialize(dir);
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ return err;
+
+@@ -460,7 +465,14 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
+ return 0;
+ }
+
+- err = dquot_initialize(dir);
++ if (!S_ISDIR(dir->i_mode)) {
++ f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)",
++ dir->i_ino, dir->i_mode, pino);
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ return -ENOTDIR;
++ }
++
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ return err;
+
+@@ -598,10 +610,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
+ goto fail;
+ }
+
+- err = dquot_initialize(dir);
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ goto fail;
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ goto fail;
+
+@@ -675,7 +687,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ if (err)
+ return err;
+
+- err = dquot_initialize(dir);
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ return err;
+
+@@ -746,7 +758,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
+- err = dquot_initialize(dir);
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ return err;
+
+@@ -757,7 +769,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ inode->i_op = &f2fs_dir_inode_operations;
+ inode->i_fop = &f2fs_dir_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+- inode_nohighmem(inode);
++ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+ set_inode_flag(inode, FI_INC_LINK);
+ f2fs_lock_op(sbi);
+@@ -803,7 +815,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
+
+- err = dquot_initialize(dir);
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ return err;
+
+@@ -841,7 +853,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
+ struct inode *inode;
+ int err;
+
+- err = dquot_initialize(dir);
++ err = f2fs_dquot_initialize(dir);
+ if (err)
+ return err;
+
+@@ -965,16 +977,16 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ return err;
+ }
+
+- err = dquot_initialize(old_dir);
++ err = f2fs_dquot_initialize(old_dir);
+ if (err)
+ goto out;
+
+- err = dquot_initialize(new_dir);
++ err = f2fs_dquot_initialize(new_dir);
+ if (err)
+ goto out;
+
+ if (new_inode) {
+- err = dquot_initialize(new_inode);
++ err = f2fs_dquot_initialize(new_inode);
+ if (err)
+ goto out;
+ }
+@@ -1138,11 +1150,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+ F2FS_I(new_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
+- err = dquot_initialize(old_dir);
++ err = f2fs_dquot_initialize(old_dir);
+ if (err)
+ goto out;
+
+- err = dquot_initialize(new_dir);
++ err = f2fs_dquot_initialize(new_dir);
+ if (err)
+ goto out;
+
+diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
+index e863136081b47..195658263f0a4 100644
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -430,6 +430,10 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *new, *e;
+
++ /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
++ if (rwsem_is_locked(&sbi->cp_global_sem))
++ return;
++
+ new = __alloc_nat_entry(sbi, nid, false);
+ if (!new)
+ return;
+@@ -539,7 +543,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+ }
+
+ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+- struct node_info *ni)
++ struct node_info *ni, bool checkpoint_context)
+ {
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+@@ -572,9 +576,10 @@ retry:
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here
+ * while not bothering checkpoint.
+ */
+- if (!rwsem_is_locked(&sbi->cp_global_sem)) {
++ if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
+ down_read(&curseg->journal_rwsem);
+- } else if (!down_read_trylock(&curseg->journal_rwsem)) {
++ } else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
++ !down_read_trylock(&curseg->journal_rwsem)) {
+ up_read(&nm_i->nat_tree_lock);
+ goto retry;
+ }
+@@ -887,7 +892,7 @@ static int truncate_node(struct dnode_of_data *dn)
+ int err;
+ pgoff_t index;
+
+- err = f2fs_get_node_info(sbi, dn->nid, &ni);
++ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
+ if (err)
+ return err;
+
+@@ -937,8 +942,10 @@ static int truncate_dnode(struct dnode_of_data *dn)
+ dn->ofs_in_node = 0;
+ f2fs_truncate_data_blocks(dn);
+ err = truncate_node(dn);
+- if (err)
++ if (err) {
++ f2fs_put_page(page, 1);
+ return err;
++ }
+
+ return 1;
+ }
+@@ -1286,12 +1293,16 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
+ goto fail;
+
+ #ifdef CONFIG_F2FS_CHECK_FS
+- err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
++ err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
+ if (err) {
+ dec_valid_node_count(sbi, dn->inode, !ofs);
+ goto fail;
+ }
+- f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
++ if (unlikely(new_ni.blk_addr != NULL_ADDR)) {
++ err = -EFSCORRUPTED;
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ goto fail;
++ }
+ #endif
+ new_ni.nid = dn->nid;
+ new_ni.ino = dn->inode->i_ino;
+@@ -1348,13 +1359,12 @@ static int read_node_page(struct page *page, int op_flags)
+ return LOCKED_PAGE;
+ }
+
+- err = f2fs_get_node_info(sbi, page->index, &ni);
++ err = f2fs_get_node_info(sbi, page->index, &ni, false);
+ if (err)
+ return err;
+
+ /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
+- if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) ||
+- is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
++ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) {
+ ClearPageUptodate(page);
+ return -ENOENT;
+ }
+@@ -1443,6 +1453,7 @@ page_hit:
+ nid, nid_of_node(page), ino_of_node(page),
+ ofs_of_node(page), cpver_of_node(page),
+ next_blkaddr_of_node(page));
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ out_err:
+ ClearPageUptodate(page);
+@@ -1599,7 +1610,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
+ nid = nid_of_node(page);
+ f2fs_bug_on(sbi, page->index != nid);
+
+- if (f2fs_get_node_info(sbi, nid, &ni))
++ if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
+ goto redirty_out;
+
+ if (wbc->for_reclaim) {
+@@ -2105,8 +2116,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_inc(&sbi->wb_sync_req[NODE]);
+- else if (atomic_read(&sbi->wb_sync_req[NODE]))
++ else if (atomic_read(&sbi->wb_sync_req[NODE])) {
++ /* to avoid potential deadlock */
++ if (current->plug)
++ blk_finish_plug(current->plug);
+ goto skip_write;
++ }
+
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
+@@ -2700,7 +2715,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
+ goto recover_xnid;
+
+ /* 1: invalidate the previous xattr nid */
+- err = f2fs_get_node_info(sbi, prev_xnid, &ni);
++ err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
+ if (err)
+ return err;
+
+@@ -2740,7 +2755,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
+ struct page *ipage;
+ int err;
+
+- err = f2fs_get_node_info(sbi, ino, &old_ni);
++ err = f2fs_get_node_info(sbi, ino, &old_ni, false);
+ if (err)
+ return err;
+
+diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
+index 04655511d7f51..ba7eeb3c27384 100644
+--- a/fs/f2fs/recovery.c
++++ b/fs/f2fs/recovery.c
+@@ -81,7 +81,7 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ goto err_out;
+
+@@ -203,7 +203,7 @@ retry:
+ goto out_put;
+ }
+
+- err = dquot_initialize(einode);
++ err = f2fs_dquot_initialize(einode);
+ if (err) {
+ iput(einode);
+ goto out_put;
+@@ -452,7 +452,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
+ struct dnode_of_data tdn = *dn;
+ nid_t ino, nid;
+ struct inode *inode;
+- unsigned int offset;
++ unsigned int offset, ofs_in_node, max_addrs;
+ block_t bidx;
+ int i;
+
+@@ -479,15 +479,24 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
+ got_it:
+ /* Use the locked dnode page and inode */
+ nid = le32_to_cpu(sum.nid);
++ ofs_in_node = le16_to_cpu(sum.ofs_in_node);
++
++ max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode);
++ if (ofs_in_node >= max_addrs) {
++ f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
++ ofs_in_node, dn->inode->i_ino, nid, max_addrs);
++ return -EFSCORRUPTED;
++ }
++
+ if (dn->inode->i_ino == nid) {
+ tdn.nid = nid;
+ if (!dn->inode_page_locked)
+ lock_page(dn->inode_page);
+ tdn.node_page = dn->inode_page;
+- tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
++ tdn.ofs_in_node = ofs_in_node;
+ goto truncate_out;
+ } else if (dn->nid == nid) {
+- tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
++ tdn.ofs_in_node = ofs_in_node;
+ goto truncate_out;
+ }
+
+@@ -508,7 +517,7 @@ got_it:
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+- ret = dquot_initialize(inode);
++ ret = f2fs_dquot_initialize(inode);
+ if (ret) {
+ iput(inode);
+ return ret;
+@@ -595,7 +604,7 @@ retry_dn:
+
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
+
+- err = f2fs_get_node_info(sbi, dn.nid, &ni);
++ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
+ if (err)
+ goto err;
+
+@@ -677,6 +686,14 @@ retry_prev:
+ goto err;
+ }
+
++ if (f2fs_is_valid_blkaddr(sbi, dest,
++ DATA_GENERIC_ENHANCE_UPDATE)) {
++ f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
++ dest, inode->i_ino, dn.ofs_in_node);
++ err = -EFSCORRUPTED;
++ goto err;
++ }
++
+ /* write dummy data page */
+ f2fs_replace_block(sbi, &dn, src, dest,
+ ni.version, false, false);
+diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
+index a135d22474154..79ad696cddec0 100644
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -253,7 +253,7 @@ retry:
+ goto next;
+ }
+
+- err = f2fs_get_node_info(sbi, dn.nid, &ni);
++ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+@@ -356,16 +356,19 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct list_head *head = &fi->inmem_pages;
+ struct inmem_pages *cur = NULL;
++ struct inmem_pages *tmp;
+
+ f2fs_bug_on(sbi, !page_private_atomic(page));
+
+ mutex_lock(&fi->inmem_lock);
+- list_for_each_entry(cur, head, list) {
+- if (cur->page == page)
++ list_for_each_entry(tmp, head, list) {
++ if (tmp->page == page) {
++ cur = tmp;
+ break;
++ }
+ }
+
+- f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
++ f2fs_bug_on(sbi, !cur);
+ list_del(&cur->list);
+ mutex_unlock(&fi->inmem_lock);
+
+@@ -561,7 +564,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
+ goto do_sync;
+
+ /* checkpoint is the only way to shrink partial cached entries */
+- if (f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
++ if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
+ f2fs_available_free_memory(sbi, INO_ENTRIES))
+ return;
+
+@@ -572,7 +575,7 @@ do_sync:
+ mutex_lock(&sbi->flush_lock);
+
+ blk_start_plug(&plug);
+- f2fs_sync_dirty_inodes(sbi, FILE_INODE);
++ f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
+ blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
+@@ -1548,7 +1551,7 @@ retry:
+ if (i + 1 < dpolicy->granularity)
+ break;
+
+- if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
++ if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+ return __issue_discard_cmd_orderly(sbi, dpolicy);
+
+ pend_list = &dcc->pend_list[i];
+@@ -2126,8 +2129,10 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
+
+ dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+ "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
+- if (IS_ERR(dcc->f2fs_issue_discard))
++ if (IS_ERR(dcc->f2fs_issue_discard)) {
+ err = PTR_ERR(dcc->f2fs_issue_discard);
++ dcc->f2fs_issue_discard = NULL;
++ }
+
+ return err;
+ }
+@@ -4508,7 +4513,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
+ unsigned int i, start, end;
+ unsigned int readed, start_blk = 0;
+ int err = 0;
+- block_t total_node_blocks = 0;
++ block_t sit_valid_blocks[2] = {0, 0};
+
+ do {
+ readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
+@@ -4533,8 +4538,14 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
+ if (err)
+ return err;
+ seg_info_from_raw_sit(se, &sit);
+- if (IS_NODESEG(se->type))
+- total_node_blocks += se->valid_blocks;
++
++ if (se->type >= NR_PERSISTENT_LOG) {
++ f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
++ se->type, start);
++ return -EFSCORRUPTED;
++ }
++
++ sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
+
+ if (f2fs_block_unit_discard(sbi)) {
+ /* build discard map only one time */
+@@ -4574,15 +4585,22 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
+ sit = sit_in_journal(journal, i);
+
+ old_valid_blocks = se->valid_blocks;
+- if (IS_NODESEG(se->type))
+- total_node_blocks -= old_valid_blocks;
++
++ sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
+
+ err = check_block_count(sbi, start, &sit);
+ if (err)
+ break;
+ seg_info_from_raw_sit(se, &sit);
+- if (IS_NODESEG(se->type))
+- total_node_blocks += se->valid_blocks;
++
++ if (se->type >= NR_PERSISTENT_LOG) {
++ f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
++ se->type, start);
++ err = -EFSCORRUPTED;
++ break;
++ }
++
++ sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
+
+ if (f2fs_block_unit_discard(sbi)) {
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+@@ -4604,13 +4622,24 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
+ }
+ up_read(&curseg->journal_rwsem);
+
+- if (!err && total_node_blocks != valid_node_count(sbi)) {
++ if (err)
++ return err;
++
++ if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
+ f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
+- total_node_blocks, valid_node_count(sbi));
+- err = -EFSCORRUPTED;
++ sit_valid_blocks[NODE], valid_node_count(sbi));
++ return -EFSCORRUPTED;
+ }
+
+- return err;
++ if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
++ valid_user_blocks(sbi)) {
++ f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
++ sit_valid_blocks[DATA], sit_valid_blocks[NODE],
++ valid_user_blocks(sbi));
++ return -EFSCORRUPTED;
++ }
++
++ return 0;
+ }
+
+ static void init_free_segmap(struct f2fs_sb_info *sbi)
+@@ -4747,6 +4776,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
+
+ sanity_check_seg_type(sbi, curseg->seg_type);
+
++ if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
++ f2fs_err(sbi,
++ "Current segment has invalid alloc_type:%d",
++ curseg->alloc_type);
++ return -EFSCORRUPTED;
++ }
++
+ if (f2fs_test_bit(blkofs, se->cur_valid_map))
+ goto out;
+
+@@ -5017,54 +5053,6 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
+ return 0;
+ }
+
+-static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
+- unsigned int dev_idx)
+-{
+- if (!bdev_is_zoned(FDEV(dev_idx).bdev))
+- return true;
+- return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
+-}
+-
+-/* Return the zone index in the given device */
+-static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
+- int dev_idx)
+-{
+- block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+-
+- return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
+- sbi->log_blocks_per_blkz;
+-}
+-
+-/*
+- * Return the usable segments in a section based on the zone's
+- * corresponding zone capacity. Zone is equal to a section.
+- */
+-static inline unsigned int f2fs_usable_zone_segs_in_sec(
+- struct f2fs_sb_info *sbi, unsigned int segno)
+-{
+- unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
+-
+- dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
+- zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
+-
+- /* Conventional zone's capacity is always equal to zone size */
+- if (is_conv_zone(sbi, zone_idx, dev_idx))
+- return sbi->segs_per_sec;
+-
+- /*
+- * If the zone_capacity_blocks array is NULL, then zone capacity
+- * is equal to the zone size for all zones
+- */
+- if (!FDEV(dev_idx).zone_capacity_blocks)
+- return sbi->segs_per_sec;
+-
+- /* Get the segment count beyond zone capacity block */
+- unusable_segs_in_sec = (sbi->blocks_per_blkz -
+- FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
+- sbi->log_blocks_per_seg;
+- return sbi->segs_per_sec - unusable_segs_in_sec;
+-}
+-
+ /*
+ * Return the number of usable blocks in a segment. The number of blocks
+ * returned is always equal to the number of blocks in a segment for
+@@ -5077,26 +5065,15 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+ {
+ block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
+- unsigned int zone_idx, dev_idx, secno;
+-
+- secno = GET_SEC_FROM_SEG(sbi, segno);
+- seg_start = START_BLOCK(sbi, segno);
+- dev_idx = f2fs_target_device_index(sbi, seg_start);
+- zone_idx = get_zone_idx(sbi, secno, dev_idx);
++ unsigned int secno;
+
+- /*
+- * Conventional zone's capacity is always equal to zone size,
+- * so, blocks per segment is unchanged.
+- */
+- if (is_conv_zone(sbi, zone_idx, dev_idx))
+- return sbi->blocks_per_seg;
+-
+- if (!FDEV(dev_idx).zone_capacity_blocks)
++ if (!sbi->unusable_blocks_per_sec)
+ return sbi->blocks_per_seg;
+
++ secno = GET_SEC_FROM_SEG(sbi, segno);
++ seg_start = START_BLOCK(sbi, segno);
+ sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+- sec_cap_blkaddr = sec_start_blkaddr +
+- FDEV(dev_idx).zone_capacity_blocks[zone_idx];
++ sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
+
+ /*
+ * If segment starts before zone capacity and spans beyond
+@@ -5128,11 +5105,6 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi
+ return 0;
+ }
+
+-static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
+- unsigned int segno)
+-{
+- return 0;
+-}
+ #endif
+ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+@@ -5147,7 +5119,7 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+ {
+ if (f2fs_sb_has_blkzoned(sbi))
+- return f2fs_usable_zone_segs_in_sec(sbi, segno);
++ return CAP_SEGS_PER_SEC(sbi);
+
+ return sbi->segs_per_sec;
+ }
+diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
+index 89fff258727d1..04f448ddf49ea 100644
+--- a/fs/f2fs/segment.h
++++ b/fs/f2fs/segment.h
+@@ -24,6 +24,7 @@
+
+ #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
+ #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
++#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
+
+ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
+ unsigned short seg_type)
+@@ -100,6 +101,12 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
+ GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
+ #define BLKS_PER_SEC(sbi) \
+ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
++#define CAP_BLKS_PER_SEC(sbi) \
++ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \
++ (sbi)->unusable_blocks_per_sec)
++#define CAP_SEGS_PER_SEC(sbi) \
++ ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\
++ (sbi)->log_blocks_per_seg))
+ #define GET_SEC_FROM_SEG(sbi, segno) \
+ (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec)
+ #define GET_SEG_FROM_SEC(sbi, secno) \
+@@ -537,7 +544,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
+
+ static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
+ {
+- return SM_I(sbi)->reserved_segments;
++ return SM_I(sbi)->reserved_segments +
++ SM_I(sbi)->additional_reserved_segments;
+ }
+
+ static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
+@@ -570,11 +578,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
+ return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
+ }
+
+-static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
++static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
++ unsigned int node_blocks, unsigned int dent_blocks)
+ {
+- unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
+- get_pages(sbi, F2FS_DIRTY_DENTS);
+- unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
++
+ unsigned int segno, left_blocks;
+ int i;
+
+@@ -600,19 +607,28 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
+ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
+ int freed, int needed)
+ {
+- int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
+- int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
+- int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
++ unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
++ get_pages(sbi, F2FS_DIRTY_DENTS) +
++ get_pages(sbi, F2FS_DIRTY_IMETA);
++ unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
++ unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi);
++ unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi);
++ unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi);
++ unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi);
++ unsigned int free, need_lower, need_upper;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ return false;
+
+- if (free_sections(sbi) + freed == reserved_sections(sbi) + needed &&
+- has_curseg_enough_space(sbi))
++ free = free_sections(sbi) + freed;
++ need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed;
++ need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
++
++ if (free > need_upper)
+ return false;
+- return (free_sections(sbi) + freed) <=
+- (node_secs + 2 * dent_secs + imeta_secs +
+- reserved_sections(sbi) + needed);
++ else if (free <= need_lower)
++ return true;
++ return !has_curseg_enough_space(sbi, node_blocks, dent_blocks);
+ }
+
+ static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
+index 78ebc306ee2b5..5c0b2b300aa1b 100644
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -58,6 +58,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
++ [FAULT_DQUOT_INIT] = "dquot initialize",
+ };
+
+ void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+@@ -305,10 +306,10 @@ static void f2fs_destroy_casefold_cache(void) { }
+
+ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
+ {
+- block_t limit = min((sbi->user_block_count << 1) / 1000,
++ block_t limit = min((sbi->user_block_count >> 3),
+ sbi->user_block_count - sbi->reserved_blocks);
+
+- /* limit is 0.2% */
++ /* limit is 12.5% */
+ if (test_opt(sbi, RESERVE_ROOT) &&
+ F2FS_OPTION(sbi).root_reserved_blocks > limit) {
+ F2FS_OPTION(sbi).root_reserved_blocks = limit;
+@@ -327,6 +328,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
+ F2FS_OPTION(sbi).s_resgid));
+ }
+
++static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
++{
++ unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
++ unsigned int avg_vblocks;
++ unsigned int wanted_reserved_segments;
++ block_t avail_user_block_count;
++
++ if (!F2FS_IO_ALIGNED(sbi))
++ return 0;
++
++ /* average valid block count in section in worst case */
++ avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
++
++ /*
++ * we need enough free space when migrating one section in worst case
++ */
++ wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
++ reserved_segments(sbi);
++ wanted_reserved_segments -= reserved_segments(sbi);
++
++ avail_user_block_count = sbi->user_block_count -
++ sbi->current_reserved_blocks -
++ F2FS_OPTION(sbi).root_reserved_blocks;
++
++ if (wanted_reserved_segments * sbi->blocks_per_seg >
++ avail_user_block_count) {
++ f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
++ wanted_reserved_segments,
++ avail_user_block_count >> sbi->log_blocks_per_seg);
++ return -ENOSPC;
++ }
++
++ SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
++
++ f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
++ wanted_reserved_segments);
++
++ return 0;
++}
++
+ static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
+ {
+ if (!F2FS_OPTION(sbi).unusable_cap_perc)
+@@ -1292,7 +1333,7 @@ default_check:
+ /* Not pass down write hints if the number of active logs is lesser
+ * than NR_CURSEG_PERSIST_TYPE.
+ */
+- if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
++ if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE)
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
+
+ if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
+@@ -1474,7 +1515,6 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
+ blkdev_put(FDEV(i).bdev, FMODE_EXCL);
+ #ifdef CONFIG_BLK_DEV_ZONED
+ kvfree(FDEV(i).blkz_seq);
+- kfree(FDEV(i).zone_capacity_blocks);
+ #endif
+ }
+ kvfree(sbi->devs);
+@@ -1620,14 +1660,17 @@ static int f2fs_freeze(struct super_block *sb)
+ if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
+ return -EINVAL;
+
+- /* ensure no checkpoint required */
+- if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
+- return -EINVAL;
++ /* Let's flush checkpoints and stop the thread. */
++ f2fs_flush_ckpt_thread(F2FS_SB(sb));
++
++ /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
++ set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
+ return 0;
+ }
+
+ static int f2fs_unfreeze(struct super_block *sb)
+ {
++ clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
+ return 0;
+ }
+
+@@ -2107,6 +2150,9 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+ up_write(&sbi->gc_lock);
+
+ f2fs_sync_fs(sbi->sb, 1);
++
++ /* Let's ensure there's no pending checkpoint anymore */
++ f2fs_flush_ckpt_thread(sbi);
+ }
+
+ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
+@@ -2179,7 +2225,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
+ if (f2fs_readonly(sb) && (*flags & SB_RDONLY))
+ goto skip;
+
+- if (f2fs_sb_has_readonly(sbi) && !(*flags & SB_RDONLY)) {
++ if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) {
+ err = -EROFS;
+ goto restore_opts;
+ }
+@@ -2273,6 +2319,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
+ f2fs_stop_ckpt_thread(sbi);
+ need_restart_ckpt = true;
+ } else {
++ /* Flush if the prevous checkpoint, if exists. */
++ f2fs_flush_ckpt_thread(sbi);
++
+ err = f2fs_start_ckpt_thread(sbi);
+ if (err) {
+ f2fs_err(sbi,
+@@ -2392,7 +2441,6 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
+ size_t toread;
+ loff_t i_size = i_size_read(inode);
+ struct page *page;
+- char *kaddr;
+
+ if (off > i_size)
+ return 0;
+@@ -2426,9 +2474,7 @@ repeat:
+ return -EIO;
+ }
+
+- kaddr = kmap_atomic(page);
+- memcpy(data, kaddr + offset, tocopy);
+- kunmap_atomic(kaddr);
++ memcpy_from_page(data, page, offset, tocopy);
+ f2fs_put_page(page, 1);
+
+ offset = 0;
+@@ -2450,7 +2496,6 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
+ size_t towrite = len;
+ struct page *page;
+ void *fsdata = NULL;
+- char *kaddr;
+ int err = 0;
+ int tocopy;
+
+@@ -2470,10 +2515,7 @@ retry:
+ break;
+ }
+
+- kaddr = kmap_atomic(page);
+- memcpy(kaddr + offset, data, tocopy);
+- kunmap_atomic(kaddr);
+- flush_dcache_page(page);
++ memcpy_to_page(page, offset, data, tocopy);
+
+ a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
+ page, fsdata);
+@@ -2491,6 +2533,16 @@ retry:
+ return len - towrite;
+ }
+
++int f2fs_dquot_initialize(struct inode *inode)
++{
++ if (time_to_inject(F2FS_I_SB(inode), FAULT_DQUOT_INIT)) {
++ f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_DQUOT_INIT);
++ return -ESRCH;
++ }
++
++ return dquot_initialize(inode);
++}
++
+ static struct dquot **f2fs_get_dquots(struct inode *inode)
+ {
+ return F2FS_I(inode)->i_dquot;
+@@ -2636,7 +2688,7 @@ int f2fs_quota_sync(struct super_block *sb, int type)
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int cnt;
+- int ret;
++ int ret = 0;
+
+ /*
+ * Now when everything is written we can discard the pagecache so
+@@ -2647,10 +2699,11 @@ int f2fs_quota_sync(struct super_block *sb, int type)
+ if (type != -1 && cnt != type)
+ continue;
+
+- if (!sb_has_quota_active(sb, type))
+- return 0;
++ if (!sb_has_quota_active(sb, cnt))
++ continue;
+
+- inode_lock(dqopt->files[cnt]);
++ if (!f2fs_sb_has_quota_ino(sbi))
++ inode_lock(dqopt->files[cnt]);
+
+ /*
+ * do_quotactl
+@@ -2669,7 +2722,8 @@ int f2fs_quota_sync(struct super_block *sb, int type)
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
+
+- inode_unlock(dqopt->files[cnt]);
++ if (!f2fs_sb_has_quota_ino(sbi))
++ inode_unlock(dqopt->files[cnt]);
+
+ if (ret)
+ break;
+@@ -2875,6 +2929,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
+ .get_nextdqblk = dquot_get_next_dqblk,
+ };
+ #else
++int f2fs_dquot_initialize(struct inode *inode)
++{
++ return 0;
++}
++
+ int f2fs_quota_sync(struct super_block *sb, int type)
+ {
+ return 0;
+@@ -3487,7 +3546,7 @@ skip_cross:
+ NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
+ f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
+ cp_payload, nat_bits_blocks);
+- return -EFSCORRUPTED;
++ return 1;
+ }
+
+ if (unlikely(f2fs_cp_error(sbi))) {
+@@ -3571,24 +3630,29 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
+ #ifdef CONFIG_BLK_DEV_ZONED
+
+ struct f2fs_report_zones_args {
++ struct f2fs_sb_info *sbi;
+ struct f2fs_dev_info *dev;
+- bool zone_cap_mismatch;
+ };
+
+ static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+ {
+ struct f2fs_report_zones_args *rz_args = data;
++ block_t unusable_blocks = (zone->len - zone->capacity) >>
++ F2FS_LOG_SECTORS_PER_BLOCK;
+
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return 0;
+
+ set_bit(idx, rz_args->dev->blkz_seq);
+- rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >>
+- F2FS_LOG_SECTORS_PER_BLOCK;
+- if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch)
+- rz_args->zone_cap_mismatch = true;
+-
++ if (!rz_args->sbi->unusable_blocks_per_sec) {
++ rz_args->sbi->unusable_blocks_per_sec = unusable_blocks;
++ return 0;
++ }
++ if (rz_args->sbi->unusable_blocks_per_sec != unusable_blocks) {
++ f2fs_err(rz_args->sbi, "F2FS supports single zone capacity\n");
++ return -EINVAL;
++ }
+ return 0;
+ }
+
+@@ -3622,26 +3686,13 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
+ if (!FDEV(devi).blkz_seq)
+ return -ENOMEM;
+
+- /* Get block zones type and zone-capacity */
+- FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
+- FDEV(devi).nr_blkz * sizeof(block_t),
+- GFP_KERNEL);
+- if (!FDEV(devi).zone_capacity_blocks)
+- return -ENOMEM;
+-
++ rep_zone_arg.sbi = sbi;
+ rep_zone_arg.dev = &FDEV(devi);
+- rep_zone_arg.zone_cap_mismatch = false;
+
+ ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
+ &rep_zone_arg);
+ if (ret < 0)
+ return ret;
+-
+- if (!rep_zone_arg.zone_cap_mismatch) {
+- kfree(FDEV(devi).zone_capacity_blocks);
+- FDEV(devi).zone_capacity_blocks = NULL;
+- }
+-
+ return 0;
+ }
+ #endif
+@@ -4148,6 +4199,10 @@ try_onemore:
+ goto free_nm;
+ }
+
++ err = adjust_reserved_segment(sbi);
++ if (err)
++ goto free_nm;
++
+ /* For write statistics */
+ sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
+
+@@ -4352,12 +4407,14 @@ free_node_inode:
+ free_stats:
+ f2fs_destroy_stats(sbi);
+ free_nm:
++ /* stop discard thread before destroying node manager */
++ f2fs_stop_discard_thread(sbi);
+ f2fs_destroy_node_manager(sbi);
+ free_sm:
+ f2fs_destroy_segment_manager(sbi);
+- f2fs_destroy_post_read_wq(sbi);
+ stop_ckpt_thread:
+ f2fs_stop_ckpt_thread(sbi);
++ f2fs_destroy_post_read_wq(sbi);
+ free_devices:
+ destroy_device_list(sbi);
+ kvfree(sbi->ckpt);
+diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
+index a32fe31c33b8e..63af1573ebcaa 100644
+--- a/fs/f2fs/sysfs.c
++++ b/fs/f2fs/sysfs.c
+@@ -415,7 +415,9 @@ out:
+ if (a->struct_type == RESERVED_BLOCKS) {
+ spin_lock(&sbi->stat_lock);
+ if (t > (unsigned long)(sbi->user_block_count -
+- F2FS_OPTION(sbi).root_reserved_blocks)) {
++ F2FS_OPTION(sbi).root_reserved_blocks -
++ sbi->blocks_per_seg *
++ SM_I(sbi)->additional_reserved_segments)) {
+ spin_unlock(&sbi->stat_lock);
+ return -EINVAL;
+ }
+@@ -471,7 +473,7 @@ out:
+ } else if (t == GC_IDLE_AT) {
+ if (!sbi->am.atgc_enabled)
+ return -EINVAL;
+- sbi->gc_mode = GC_AT;
++ sbi->gc_mode = GC_IDLE_AT;
+ } else {
+ sbi->gc_mode = GC_NORMAL;
+ }
+@@ -489,9 +491,9 @@ out:
+ if (!strcmp(a->attr.name, "iostat_period_ms")) {
+ if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS)
+ return -EINVAL;
+- spin_lock(&sbi->iostat_lock);
++ spin_lock_irq(&sbi->iostat_lock);
+ sbi->iostat_period_ms = (unsigned int)t;
+- spin_unlock(&sbi->iostat_lock);
++ spin_unlock_irq(&sbi->iostat_lock);
+ return count;
+ }
+ #endif
+diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
+index 03549b5ba204a..d5a50e73ec32b 100644
+--- a/fs/f2fs/verity.c
++++ b/fs/f2fs/verity.c
+@@ -47,16 +47,13 @@ static int pagecache_read(struct inode *inode, void *buf, size_t count,
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+- void *addr;
+
+ page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
+ NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+- addr = kmap_atomic(page);
+- memcpy(buf, addr + offset_in_page(pos), n);
+- kunmap_atomic(addr);
++ memcpy_from_page(buf, page, offset_in_page(pos), n);
+
+ put_page(page);
+
+@@ -81,8 +78,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+- void *fsdata;
+- void *addr;
++ void *fsdata = NULL;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+@@ -90,9 +86,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ if (res)
+ return res;
+
+- addr = kmap_atomic(page);
+- memcpy(addr + offset_in_page(pos), buf, n);
+- kunmap_atomic(addr);
++ memcpy_to_page(page, offset_in_page(pos), buf, n);
+
+ res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
+ page, fsdata);
+@@ -136,7 +130,7 @@ static int f2fs_begin_enable_verity(struct file *filp)
+ * here and not rely on ->open() doing it. This must be done before
+ * evicting the inline data.
+ */
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ return err;
+
+@@ -261,13 +255,14 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index,
+ unsigned long num_ra_pages)
+ {
+- DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
+ struct page *page;
+
+ index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
+
+ page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
+ if (!page || !PageUptodate(page)) {
++ DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
++
+ if (page)
+ put_page(page);
+ else if (num_ra_pages > 1)
+diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
+index 1d2d29dcd41ce..797ac505a075a 100644
+--- a/fs/f2fs/xattr.c
++++ b/fs/f2fs/xattr.c
+@@ -684,8 +684,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
+ }
+
+ last = here;
+- while (!IS_XATTR_LAST_ENTRY(last))
++ while (!IS_XATTR_LAST_ENTRY(last)) {
++ if ((void *)(last) + sizeof(__u32) > last_base_addr ||
++ (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
++ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu",
++ inode->i_ino, ENTRY_SIZE(last));
++ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
++ error = -EFSCORRUPTED;
++ goto exit;
++ }
+ last = XATTR_NEXT_ENTRY(last);
++ }
+
+ newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
+
+@@ -773,7 +782,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
+ if (!f2fs_is_checkpoint_ready(sbi))
+ return -ENOSPC;
+
+- err = dquot_initialize(inode);
++ err = f2fs_dquot_initialize(inode);
+ if (err)
+ return err;
+
+diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
+index 978ac6751aeb7..1db348f8f887a 100644
+--- a/fs/fat/fatent.c
++++ b/fs/fat/fatent.c
+@@ -94,7 +94,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
+ err_brelse:
+ brelse(bhs[0]);
+ err:
+- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
++ fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
++ (llu)blocknr);
+ return -EIO;
+ }
+
+@@ -107,8 +108,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
+ fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
+ fatent->bhs[0] = sb_bread(sb, blocknr);
+ if (!fatent->bhs[0]) {
+- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
+- (llu)blocknr);
++ fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
++ (llu)blocknr);
+ return -EIO;
+ }
+ fatent->nr_bhs = 1;
+diff --git a/fs/file.c b/fs/file.c
+index 8627dacfc4246..69a51d37b66d9 100644
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -87,6 +87,21 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
+ copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
+ }
+
++/*
++ * Note how the fdtable bitmap allocations very much have to be a multiple of
++ * BITS_PER_LONG. This is not only because we walk those things in chunks of
++ * 'unsigned long' in some places, but simply because that is how the Linux
++ * kernel bitmaps are defined to work: they are not "bits in an array of bytes",
++ * they are very much "bits in an array of unsigned long".
++ *
++ * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
++ * by that "1024/sizeof(ptr)" before, we already know there are sufficient
++ * clear low bits. Clang seems to realize that, gcc ends up being confused.
++ *
++ * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
++ * let's consider it documentation (and maybe a test-case for gcc to improve
++ * its code generation ;)
++ */
+ static struct fdtable * alloc_fdtable(unsigned int nr)
+ {
+ struct fdtable *fdt;
+@@ -102,6 +117,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
+ nr /= (1024 / sizeof(struct file *));
+ nr = roundup_pow_of_two(nr + 1);
+ nr *= (1024 / sizeof(struct file *));
++ nr = ALIGN(nr, BITS_PER_LONG);
+ /*
+ * Note that this can drive nr *below* what we had passed if sysctl_nr_open
+ * had been set lower between the check in expand_files() and here. Deal
+@@ -269,6 +285,19 @@ static unsigned int count_open_files(struct fdtable *fdt)
+ return i;
+ }
+
++/*
++ * Note that a sane fdtable size always has to be a multiple of
++ * BITS_PER_LONG, since we have bitmaps that are sized by this.
++ *
++ * 'max_fds' will normally already be properly aligned, but it
++ * turns out that in the close_range() -> __close_range() ->
++ * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
++ * up having a 'max_fds' value that isn't already aligned.
++ *
++ * Rather than make close_range() have to worry about this,
++ * just make that BITS_PER_LONG alignment be part of a sane
++ * fdtable size. Becuase that's really what it is.
++ */
+ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
+ {
+ unsigned int count;
+@@ -276,7 +305,7 @@ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
+ count = count_open_files(fdt);
+ if (max_fds < NR_OPEN_DEFAULT)
+ max_fds = NR_OPEN_DEFAULT;
+- return min(count, max_fds);
++ return ALIGN(min(count, max_fds), BITS_PER_LONG);
+ }
+
+ /*
+@@ -617,6 +646,7 @@ static struct file *pick_file(struct files_struct *files, unsigned fd)
+ file = ERR_PTR(-EINVAL);
+ goto out_unlock;
+ }
++ fd = array_index_nospec(fd, fdt->max_fds);
+ file = fdt->fd[fd];
+ if (!file) {
+ file = ERR_PTR(-EBADF);
+@@ -841,24 +871,68 @@ void do_close_on_exec(struct files_struct *files)
+ spin_unlock(&files->file_lock);
+ }
+
++static inline struct file *__fget_files_rcu(struct files_struct *files,
++ unsigned int fd, fmode_t mask, unsigned int refs)
++{
++ for (;;) {
++ struct file *file;
++ struct fdtable *fdt = rcu_dereference_raw(files->fdt);
++ struct file __rcu **fdentry;
++
++ if (unlikely(fd >= fdt->max_fds))
++ return NULL;
++
++ fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
++ file = rcu_dereference_raw(*fdentry);
++ if (unlikely(!file))
++ return NULL;
++
++ if (unlikely(file->f_mode & mask))
++ return NULL;
++
++ /*
++ * Ok, we have a file pointer. However, because we do
++ * this all locklessly under RCU, we may be racing with
++ * that file being closed.
++ *
++ * Such a race can take two forms:
++ *
++ * (a) the file ref already went down to zero,
++ * and get_file_rcu_many() fails. Just try
++ * again:
++ */
++ if (unlikely(!get_file_rcu_many(file, refs)))
++ continue;
++
++ /*
++ * (b) the file table entry has changed under us.
++ * Note that we don't need to re-check the 'fdt->fd'
++ * pointer having changed, because it always goes
++ * hand-in-hand with 'fdt'.
++ *
++ * If so, we need to put our refs and try again.
++ */
++ if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
++ unlikely(rcu_dereference_raw(*fdentry) != file)) {
++ fput_many(file, refs);
++ continue;
++ }
++
++ /*
++ * Ok, we have a ref to the file, and checked that it
++ * still exists.
++ */
++ return file;
++ }
++}
++
+ static struct file *__fget_files(struct files_struct *files, unsigned int fd,
+ fmode_t mask, unsigned int refs)
+ {
+ struct file *file;
+
+ rcu_read_lock();
+-loop:
+- file = files_lookup_fd_rcu(files, fd);
+- if (file) {
+- /* File object ref couldn't be taken.
+- * dup2() atomicity guarantee is the reason
+- * we loop to catch the new file (or NULL pointer)
+- */
+- if (file->f_mode & mask)
+- file = NULL;
+- else if (!get_file_rcu_many(file, refs))
+- goto loop;
+- }
++ file = __fget_files_rcu(files, fd, mask, refs);
+ rcu_read_unlock();
+
+ return file;
+@@ -956,7 +1030,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
+ struct files_struct *files = current->files;
+ struct file *file;
+
+- if (atomic_read(&files->count) == 1) {
++ /*
++ * If another thread is concurrently calling close_fd() followed
++ * by put_files_struct(), we must not observe the old table
++ * entry combined with the new refcount - otherwise we could
++ * return a file that is concurrently being freed.
++ *
++ * atomic_read_acquire() pairs with atomic_dec_and_test() in
++ * put_files_struct().
++ */
++ if (atomic_read_acquire(&files->count) == 1) {
+ file = files_lookup_fd_raw(files, fd);
+ if (!file || unlikely(file->f_mode & mask))
+ return 0;
+@@ -979,16 +1062,30 @@ unsigned long __fdget_raw(unsigned int fd)
+ return __fget_light(fd, 0);
+ }
+
++/*
++ * Try to avoid f_pos locking. We only need it if the
++ * file is marked for FMODE_ATOMIC_POS, and it can be
++ * accessed multiple ways.
++ *
++ * Always do it for directories, because pidfd_getfd()
++ * can make a file accessible even if it otherwise would
++ * not be, and for directories this is a correctness
++ * issue, not a "POSIX requirement".
++ */
++static inline bool file_needs_f_pos_lock(struct file *file)
++{
++ return (file->f_mode & FMODE_ATOMIC_POS) &&
++ (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode));
++}
++
+ unsigned long __fdget_pos(unsigned int fd)
+ {
+ unsigned long v = __fdget(fd);
+ struct file *file = (struct file *)(v & ~3);
+
+- if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
+- if (file_count(file) > 1) {
+- v |= FDPUT_POS_UNLOCK;
+- mutex_lock(&file->f_pos_lock);
+- }
++ if (file && file_needs_f_pos_lock(file)) {
++ v |= FDPUT_POS_UNLOCK;
++ mutex_lock(&file->f_pos_lock);
+ }
+ return v;
+ }
+diff --git a/fs/file_table.c b/fs/file_table.c
+index 45437f8e1003e..6f297f9782fc5 100644
+--- a/fs/file_table.c
++++ b/fs/file_table.c
+@@ -284,12 +284,7 @@ static void __fput(struct file *file)
+ }
+ fops_put(file->f_op);
+ put_pid(file->f_owner.pid);
+- if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+- i_readcount_dec(inode);
+- if (mode & FMODE_WRITER) {
+- put_write_access(inode);
+- __mnt_drop_write(mnt);
+- }
++ put_file_access(file);
+ dput(dentry);
+ if (unlikely(mode & FMODE_NEED_UNMOUNT))
+ dissolve_on_fput(mnt);
+@@ -375,6 +370,7 @@ void __fput_sync(struct file *file)
+ }
+
+ EXPORT_SYMBOL(fput);
++EXPORT_SYMBOL(__fput_sync);
+
+ void __init files_init(void)
+ {
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 81ec192ce0673..c76537a6826a7 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -120,6 +120,7 @@ static bool inode_io_list_move_locked(struct inode *inode,
+ struct list_head *head)
+ {
+ assert_spin_locked(&wb->list_lock);
++ assert_spin_locked(&inode->i_lock);
+
+ list_move(&inode->i_io_list, head);
+
+@@ -133,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode,
+
+ static void wb_wakeup(struct bdi_writeback *wb)
+ {
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (test_bit(WB_registered, &wb->state))
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ }
+
+ static void finish_writeback_work(struct bdi_writeback *wb,
+@@ -163,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
+ if (work->done)
+ atomic_inc(&work->done->cnt);
+
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+
+ if (test_bit(WB_registered, &wb->state)) {
+ list_add_tail(&work->list, &wb->work_list);
+@@ -171,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
+ } else
+ finish_writeback_work(wb, work);
+
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ }
+
+ /**
+@@ -824,7 +825,7 @@ void wbc_detach_inode(struct writeback_control *wbc)
+ * is okay. The main goal is avoiding keeping an inode on
+ * the wrong wb for an extended period of time.
+ */
+- if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
++ if (hweight16(history) > WB_FRN_HIST_THR_SLOTS)
+ inode_switch_wbs(inode, max_id);
+ }
+
+@@ -1008,6 +1009,16 @@ restart:
+ continue;
+ }
+
++ /*
++ * If wb_tryget fails, the wb has been shutdown, skip it.
++ *
++ * Pin @wb so that it stays on @bdi->wb_list. This allows
++ * continuing iteration from @wb after dropping and
++ * regrabbing rcu read lock.
++ */
++ if (!wb_tryget(wb))
++ continue;
++
+ /* alloc failed, execute synchronously using on-stack fallback */
+ work = &fallback_work;
+ *work = *base_work;
+@@ -1016,13 +1027,6 @@ restart:
+ work->done = &fallback_work_done;
+
+ wb_queue_work(wb, work);
+-
+- /*
+- * Pin @wb so that it stays on @bdi->wb_list. This allows
+- * continuing iteration from @wb after dropping and
+- * regrabbing rcu read lock.
+- */
+- wb_get(wb);
+ last_wb = wb;
+
+ rcu_read_unlock();
+@@ -1400,9 +1404,9 @@ static int move_expired_inodes(struct list_head *delaying_queue,
+ inode = wb_inode(delaying_queue->prev);
+ if (inode_dirtied_after(inode, dirtied_before))
+ break;
++ spin_lock(&inode->i_lock);
+ list_move(&inode->i_io_list, &tmp);
+ moved++;
+- spin_lock(&inode->i_lock);
+ inode->i_state |= I_SYNC_QUEUED;
+ spin_unlock(&inode->i_lock);
+ if (sb_is_blkdev_sb(inode->i_sb))
+@@ -1418,7 +1422,12 @@ static int move_expired_inodes(struct list_head *delaying_queue,
+ goto out;
+ }
+
+- /* Move inodes from one superblock together */
++ /*
++ * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue',
++ * we don't take inode->i_lock here because it is just a pointless overhead.
++ * Inode is already marked as I_SYNC_QUEUED so writeback list handling is
++ * fully under our control.
++ */
+ while (!list_empty(&tmp)) {
+ sb = wb_inode(tmp.prev)->i_sb;
+ list_for_each_prev_safe(pos, node, &tmp) {
+@@ -1733,12 +1742,29 @@ static int writeback_single_inode(struct inode *inode,
+ wb = inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
+ /*
+- * If the inode is now fully clean, then it can be safely removed from
+- * its writeback list (if any). Otherwise the flusher threads are
+- * responsible for the writeback lists.
++ * If the inode is freeing, its i_io_list shoudn't be updated
++ * as it can be finally deleted at this moment.
+ */
+- if (!(inode->i_state & I_DIRTY_ALL))
+- inode_cgwb_move_to_attached(inode, wb);
++ if (!(inode->i_state & I_FREEING)) {
++ /*
++ * If the inode is now fully clean, then it can be safely
++ * removed from its writeback list (if any). Otherwise the
++ * flusher threads are responsible for the writeback lists.
++ */
++ if (!(inode->i_state & I_DIRTY_ALL))
++ inode_cgwb_move_to_attached(inode, wb);
++ else if (!(inode->i_state & I_SYNC_QUEUED)) {
++ if ((inode->i_state & I_DIRTY))
++ redirty_tail_locked(inode, wb);
++ else if (inode->i_state & I_DIRTY_TIME) {
++ inode->dirtied_when = jiffies;
++ inode_io_list_move_locked(inode,
++ wb,
++ &wb->b_dirty_time);
++ }
++ }
++ }
++
+ spin_unlock(&wb->list_lock);
+ inode_sync_complete(inode);
+ out:
+@@ -1802,11 +1828,12 @@ static long writeback_sb_inodes(struct super_block *sb,
+ };
+ unsigned long start_time = jiffies;
+ long write_chunk;
+- long wrote = 0; /* count both pages and inodes */
++ long total_wrote = 0; /* count both pages and inodes */
+
+ while (!list_empty(&wb->b_io)) {
+ struct inode *inode = wb_inode(wb->b_io.prev);
+ struct bdi_writeback *tmp_wb;
++ long wrote;
+
+ if (inode->i_sb != sb) {
+ if (work->sb) {
+@@ -1848,8 +1875,8 @@ static long writeback_sb_inodes(struct super_block *sb,
+ * We'll have another go at writing back this inode
+ * when we completed a full scan of b_io.
+ */
+- spin_unlock(&inode->i_lock);
+ requeue_io(inode, wb);
++ spin_unlock(&inode->i_lock);
+ trace_writeback_sb_inodes_requeue(inode);
+ continue;
+ }
+@@ -1882,7 +1909,9 @@ static long writeback_sb_inodes(struct super_block *sb,
+
+ wbc_detach_inode(&wbc);
+ work->nr_pages -= write_chunk - wbc.nr_to_write;
+- wrote += write_chunk - wbc.nr_to_write;
++ wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;
++ wrote = wrote < 0 ? 0 : wrote;
++ total_wrote += wrote;
+
+ if (need_resched()) {
+ /*
+@@ -1904,7 +1933,7 @@ static long writeback_sb_inodes(struct super_block *sb,
+ tmp_wb = inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_state & I_DIRTY_ALL))
+- wrote++;
++ total_wrote++;
+ requeue_inode(inode, tmp_wb, &wbc);
+ inode_sync_complete(inode);
+ spin_unlock(&inode->i_lock);
+@@ -1918,14 +1947,14 @@ static long writeback_sb_inodes(struct super_block *sb,
+ * bail out to wb_writeback() often enough to check
+ * background threshold and other termination conditions.
+ */
+- if (wrote) {
++ if (total_wrote) {
+ if (time_is_before_jiffies(start_time + HZ / 10UL))
+ break;
+ if (work->nr_pages <= 0)
+ break;
+ }
+ }
+- return wrote;
++ return total_wrote;
+ }
+
+ static long __writeback_inodes_wb(struct bdi_writeback *wb,
+@@ -2096,13 +2125,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
+ {
+ struct wb_writeback_work *work = NULL;
+
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (!list_empty(&wb->work_list)) {
+ work = list_entry(wb->work_list.next,
+ struct wb_writeback_work, list);
+ list_del_init(&work->list);
+ }
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ return work;
+ }
+
+@@ -2382,10 +2411,25 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ {
+ struct super_block *sb = inode->i_sb;
+ int dirtytime = 0;
++ struct bdi_writeback *wb = NULL;
+
+ trace_writeback_mark_inode_dirty(inode, flags);
+
+ if (flags & I_DIRTY_INODE) {
++ /*
++ * Inode timestamp update will piggback on this dirtying.
++ * We tell ->dirty_inode callback that timestamps need to
++ * be updated by setting I_DIRTY_TIME in flags.
++ */
++ if (inode->i_state & I_DIRTY_TIME) {
++ spin_lock(&inode->i_lock);
++ if (inode->i_state & I_DIRTY_TIME) {
++ inode->i_state &= ~I_DIRTY_TIME;
++ flags |= I_DIRTY_TIME;
++ }
++ spin_unlock(&inode->i_lock);
++ }
++
+ /*
+ * Notify the filesystem about the inode being dirtied, so that
+ * (if needed) it can update on-disk fields and journal the
+@@ -2395,7 +2439,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ */
+ trace_writeback_dirty_inode_start(inode, flags);
+ if (sb->s_op->dirty_inode)
+- sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
++ sb->s_op->dirty_inode(inode,
++ flags & (I_DIRTY_INODE | I_DIRTY_TIME));
+ trace_writeback_dirty_inode(inode, flags);
+
+ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
+@@ -2416,23 +2461,28 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ */
+ smp_mb();
+
+- if (((inode->i_state & flags) == flags) ||
+- (dirtytime && (inode->i_state & I_DIRTY_INODE)))
++ if ((inode->i_state & flags) == flags)
+ return;
+
+ spin_lock(&inode->i_lock);
+- if (dirtytime && (inode->i_state & I_DIRTY_INODE))
+- goto out_unlock_inode;
+ if ((inode->i_state & flags) != flags) {
+ const int was_dirty = inode->i_state & I_DIRTY;
+
+ inode_attach_wb(inode, NULL);
+
+- /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
+- if (flags & I_DIRTY_INODE)
+- inode->i_state &= ~I_DIRTY_TIME;
+ inode->i_state |= flags;
+
++ /*
++ * Grab inode's wb early because it requires dropping i_lock and we
++ * need to make sure following checks happen atomically with dirty
++ * list handling so that we don't move inodes under flush worker's
++ * hands.
++ */
++ if (!was_dirty) {
++ wb = locked_inode_to_wb_and_lock_list(inode);
++ spin_lock(&inode->i_lock);
++ }
++
+ /*
+ * If the inode is queued for writeback by flush worker, just
+ * update its dirty state. Once the flush worker is done with
+@@ -2440,7 +2490,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ * list, based upon its state.
+ */
+ if (inode->i_state & I_SYNC_QUEUED)
+- goto out_unlock_inode;
++ goto out_unlock;
+
+ /*
+ * Only add valid (hashed) inodes to the superblock's
+@@ -2448,22 +2498,19 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ */
+ if (!S_ISBLK(inode->i_mode)) {
+ if (inode_unhashed(inode))
+- goto out_unlock_inode;
++ goto out_unlock;
+ }
+ if (inode->i_state & I_FREEING)
+- goto out_unlock_inode;
++ goto out_unlock;
+
+ /*
+ * If the inode was already on b_dirty/b_io/b_more_io, don't
+ * reposition it (that would break b_dirty time-ordering).
+ */
+ if (!was_dirty) {
+- struct bdi_writeback *wb;
+ struct list_head *dirty_list;
+ bool wakeup_bdi = false;
+
+- wb = locked_inode_to_wb_and_lock_list(inode);
+-
+ inode->dirtied_when = jiffies;
+ if (dirtytime)
+ inode->dirtied_time_when = jiffies;
+@@ -2477,6 +2524,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ dirty_list);
+
+ spin_unlock(&wb->list_lock);
++ spin_unlock(&inode->i_lock);
+ trace_writeback_dirty_inode_enqueue(inode);
+
+ /*
+@@ -2491,7 +2539,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ return;
+ }
+ }
+-out_unlock_inode:
++out_unlock:
++ if (wb)
++ spin_unlock(&wb->list_lock);
+ spin_unlock(&inode->i_lock);
+ }
+ EXPORT_SYMBOL(__mark_inode_dirty);
+diff --git a/fs/fs_context.c b/fs/fs_context.c
+index b7e43a780a625..851214d1d013d 100644
+--- a/fs/fs_context.c
++++ b/fs/fs_context.c
+@@ -548,7 +548,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
+ param->key);
+ }
+
+- if (len > PAGE_SIZE - 2 - size)
++ if (size + len + 2 > PAGE_SIZE)
+ return invalf(fc, "VFS: Legacy: Cumulative options too large");
+ if (strchr(param->key, ',') ||
+ (param->type == fs_value_is_string &&
+@@ -561,7 +561,8 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
+ return -ENOMEM;
+ }
+
+- ctx->legacy_data[size++] = ',';
++ if (size)
++ ctx->legacy_data[size++] = ',';
+ len = strlen(param->key);
+ memcpy(ctx->legacy_data + size, param->key, len);
+ size += len;
+diff --git a/fs/fuse/control.c b/fs/fuse/control.c
+index 000d2e5627e99..79f01d09c78cb 100644
+--- a/fs/fuse/control.c
++++ b/fs/fuse/control.c
+@@ -275,7 +275,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
+ struct dentry *parent;
+ char name[32];
+
+- if (!fuse_control_sb)
++ if (!fuse_control_sb || fc->no_control)
+ return 0;
+
+ parent = fuse_control_sb->s_root;
+@@ -313,7 +313,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
+ {
+ int i;
+
+- if (!fuse_control_sb)
++ if (!fuse_control_sb || fc->no_control)
+ return;
+
+ for (i = fc->ctl_ndents - 1; i >= 0; i--) {
+diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
+index dde341a6388a1..d6b5339c56e2c 100644
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -852,6 +852,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
+ if (!(buf->flags & PIPE_BUF_FLAG_LRU))
+ lru_cache_add(newpage);
+
++ /*
++ * Release while we have extra ref on stolen page. Otherwise
++ * anon_pipe_buf_release() might think the page can be reused.
++ */
++ pipe_buf_release(cs->pipe, buf);
++
+ err = 0;
+ spin_lock(&cs->req->waitq.lock);
+ if (test_bit(FR_ABORTED, &cs->req->flags))
+@@ -935,7 +941,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+
+ while (count) {
+ if (cs->write && cs->pipebufs && page) {
+- return fuse_ref_page(cs, page, offset, count);
++ /*
++ * Can't control lifetime of pipe buffers, so always
++ * copy user pages.
++ */
++ if (cs->req->args->user_pages) {
++ err = fuse_copy_fill(cs);
++ if (err)
++ return err;
++ } else {
++ return fuse_ref_page(cs, page, offset, count);
++ }
+ } else if (!cs->len) {
+ if (cs->move_pages && page &&
+ offset == 0 && count == PAGE_SIZE) {
+@@ -2031,8 +2047,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
+
+ pipe_lock(pipe);
+ out_free:
+- for (idx = 0; idx < nbuf; idx++)
+- pipe_buf_release(pipe, &bufs[idx]);
++ for (idx = 0; idx < nbuf; idx++) {
++ struct pipe_buffer *buf = &bufs[idx];
++
++ if (buf->ops)
++ pipe_buf_release(pipe, buf);
++ }
+ pipe_unlock(pipe);
+
+ kvfree(bufs);
+diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
+index d9b977c0f38dc..4ea52906ae150 100644
+--- a/fs/fuse/dir.c
++++ b/fs/fuse/dir.c
+@@ -205,7 +205,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
+ if (inode && fuse_is_bad(inode))
+ goto invalid;
+ else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+- (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) {
++ (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
+ struct fuse_entry_out outarg;
+ FUSE_ARGS(args);
+ struct fuse_forget_link *forget;
+@@ -249,7 +249,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
+ spin_unlock(&fi->lock);
+ }
+ kfree(forget);
+- if (ret == -ENOMEM)
++ if (ret == -ENOMEM || ret == -EINTR)
+ goto out;
+ if (ret || fuse_invalid_attr(&outarg.attr) ||
+ fuse_stale_inode(inode, outarg.generation, &outarg.attr))
+@@ -476,6 +476,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
+ struct fuse_entry_out outentry;
+ struct fuse_inode *fi;
+ struct fuse_file *ff;
++ bool trunc = flags & O_TRUNC;
+
+ /* Userspace expects S_IFREG in create mode */
+ BUG_ON((mode & S_IFMT) != S_IFREG);
+@@ -500,7 +501,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
+ inarg.mode = mode;
+ inarg.umask = current_umask();
+
+- if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
++ if (fm->fc->handle_killpriv_v2 && trunc &&
+ !(flags & O_EXCL) && !capable(CAP_FSETID)) {
+ inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+ }
+@@ -549,6 +550,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
+ } else {
+ file->private_data = ff;
+ fuse_finish_open(inode, file);
++ if (fm->fc->atomic_o_trunc && trunc)
++ truncate_pagecache(inode, 0);
++ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
++ invalidate_inode_pages2(inode->i_mapping);
+ }
+ return err;
+
+@@ -738,11 +743,19 @@ static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ return create_new_entry(fm, &args, dir, entry, S_IFLNK);
+ }
+
++void fuse_flush_time_update(struct inode *inode)
++{
++ int err = sync_inode_metadata(inode, 1);
++
++ mapping_set_error(inode->i_mapping, err);
++}
++
+ void fuse_update_ctime(struct inode *inode)
+ {
+ if (!IS_NOCMTIME(inode)) {
+ inode->i_ctime = current_time(inode);
+ mark_inode_dirty_sync(inode);
++ fuse_flush_time_update(inode);
+ }
+ }
+
+@@ -1071,7 +1084,7 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
+ if (!parent)
+ return -ENOENT;
+
+- inode_lock(parent);
++ inode_lock_nested(parent, I_MUTEX_PARENT);
+ if (!S_ISDIR(parent->i_mode))
+ goto unlock;
+
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index 11404f8c21c75..2c4cac6104c91 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -210,12 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ i_size_write(inode, 0);
+ spin_unlock(&fi->lock);
+- truncate_pagecache(inode, 0);
+ fuse_invalidate_attr(inode);
+ if (fc->writeback_cache)
+ file_update_time(file);
+- } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
+- invalidate_inode_pages2(inode->i_mapping);
+ }
+
+ if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+@@ -240,30 +237,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+ if (err)
+ return err;
+
+- if (is_wb_truncate || dax_truncate) {
++ if (is_wb_truncate || dax_truncate)
+ inode_lock(inode);
+- fuse_set_nowrite(inode);
+- }
+
+ if (dax_truncate) {
+ filemap_invalidate_lock(inode->i_mapping);
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err)
+- goto out;
++ goto out_inode_unlock;
+ }
+
++ if (is_wb_truncate || dax_truncate)
++ fuse_set_nowrite(inode);
++
+ err = fuse_do_open(fm, get_node_id(inode), file, isdir);
+ if (!err)
+ fuse_finish_open(inode, file);
+
+-out:
++ if (is_wb_truncate || dax_truncate)
++ fuse_release_nowrite(inode);
++ if (!err) {
++ struct fuse_file *ff = file->private_data;
++
++ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
++ truncate_pagecache(inode, 0);
++ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
++ invalidate_inode_pages2(inode->i_mapping);
++ }
+ if (dax_truncate)
+ filemap_invalidate_unlock(inode->i_mapping);
+-
+- if (is_wb_truncate | dax_truncate) {
+- fuse_release_nowrite(inode);
++out_inode_unlock:
++ if (is_wb_truncate || dax_truncate)
+ inode_unlock(inode);
+- }
+
+ return err;
+ }
+@@ -793,7 +798,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+- if (attr_ver == fi->attr_version && size < inode->i_size &&
++ if (attr_ver >= fi->attr_version && size < inode->i_size &&
+ !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ i_size_write(inode, size);
+@@ -1164,7 +1169,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
+
+ again:
+ err = -EFAULT;
+- if (iov_iter_fault_in_readable(ii, bytes))
++ if (fault_in_iov_iter_readable(ii, bytes))
+ break;
+
+ err = -ENOMEM;
+@@ -1295,7 +1300,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ return err;
+
+ if (fc->handle_killpriv_v2 &&
+- should_remove_suid(file_dentry(file))) {
++ setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) {
+ goto writethrough;
+ }
+
+@@ -1417,6 +1422,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
+ (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
+ }
+
++ ap->args.user_pages = true;
+ if (write)
+ ap->args.in_pages = true;
+ else
+@@ -1848,6 +1854,17 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
+ struct fuse_file *ff;
+ int err;
+
++ /*
++ * Inode is always written before the last reference is dropped and
++ * hence this should not be reached from reclaim.
++ *
++ * Writing back the inode from reclaim can deadlock if the request
++ * processing itself needs an allocation. Allocations triggering
++ * reclaim while serving a request can't be prevented, because it can
++ * involve any number of unrelated userspace processes.
++ */
++ WARN_ON(wbc->for_reclaim);
++
+ ff = __fuse_write_file_get(fi);
+ err = fuse_flush_times(inode, ff);
+ if (ff)
+@@ -2902,7 +2919,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+
+ static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
+ {
+- int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
++ int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX);
+
+ if (!err)
+ fuse_sync_writes(inode);
+@@ -2925,11 +2942,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+ .mode = mode
+ };
+ int err;
+- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
+- (mode & (FALLOC_FL_PUNCH_HOLE |
+- FALLOC_FL_ZERO_RANGE));
+-
+- bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
++ bool block_faults = FUSE_IS_DAX(inode) &&
++ (!(mode & FALLOC_FL_KEEP_SIZE) ||
++ (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)));
+
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_ZERO_RANGE))
+@@ -2938,22 +2953,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+ if (fm->fc->no_fallocate)
+ return -EOPNOTSUPP;
+
+- if (lock_inode) {
+- inode_lock(inode);
+- if (block_faults) {
+- filemap_invalidate_lock(inode->i_mapping);
+- err = fuse_dax_break_layouts(inode, 0, 0);
+- if (err)
+- goto out;
+- }
++ inode_lock(inode);
++ if (block_faults) {
++ filemap_invalidate_lock(inode->i_mapping);
++ err = fuse_dax_break_layouts(inode, 0, 0);
++ if (err)
++ goto out;
++ }
+
+- if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
+- loff_t endbyte = offset + length - 1;
++ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
++ loff_t endbyte = offset + length - 1;
+
+- err = fuse_writeback_range(inode, offset, endbyte);
+- if (err)
+- goto out;
+- }
++ err = fuse_writeback_range(inode, offset, endbyte);
++ if (err)
++ goto out;
+ }
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+@@ -2963,6 +2976,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+ goto out;
+ }
+
++ err = file_modified(file);
++ if (err)
++ goto out;
++
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+@@ -2999,8 +3016,9 @@ out:
+ if (block_faults)
+ filemap_invalidate_unlock(inode->i_mapping);
+
+- if (lock_inode)
+- inode_unlock(inode);
++ inode_unlock(inode);
++
++ fuse_flush_time_update(inode);
+
+ return err;
+ }
+@@ -3111,6 +3129,8 @@ out:
+ inode_unlock(inode_out);
+ file_accessed(file_in);
+
++ fuse_flush_time_update(inode_out);
++
+ return err;
+ }
+
+diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
+index f55f9f94b1a4f..c3a87586a15fa 100644
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -256,6 +256,7 @@ struct fuse_args {
+ bool nocreds:1;
+ bool in_pages:1;
+ bool out_pages:1;
++ bool user_pages:1;
+ bool out_argvar:1;
+ bool page_zeroing:1;
+ bool page_replace:1;
+@@ -1148,6 +1149,7 @@ int fuse_allow_current_process(struct fuse_conn *fc);
+
+ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
+
++void fuse_flush_time_update(struct inode *inode);
+ void fuse_update_ctime(struct inode *inode);
+
+ int fuse_update_attributes(struct inode *inode, struct file *file);
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index 12d49a1914e84..50365143f50e8 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -118,6 +118,9 @@ static void fuse_evict_inode(struct inode *inode)
+ {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
++ /* Will write inode on close/munmap and in all other dirtiers */
++ WARN_ON(inode->i_state & I_DIRTY_INODE);
++
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+ if (inode->i_sb->s_flags & SB_ACTIVE) {
+@@ -178,6 +181,12 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+ inode->i_uid = make_kuid(fc->user_ns, attr->uid);
+ inode->i_gid = make_kgid(fc->user_ns, attr->gid);
+ inode->i_blocks = attr->blocks;
++
++ /* Sanitize nsecs */
++ attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1);
++ attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1);
++ attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1);
++
+ inode->i_atime.tv_sec = attr->atime;
+ inode->i_atime.tv_nsec = attr->atimensec;
+ /* mtime from server may be stale due to local buffered write */
+diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
+index 546ea3d58fb47..2f26b5c8e9550 100644
+--- a/fs/fuse/ioctl.c
++++ b/fs/fuse/ioctl.c
+@@ -9,6 +9,26 @@
+ #include <linux/compat.h>
+ #include <linux/fileattr.h>
+
++static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args,
++ struct fuse_ioctl_out *outarg)
++{
++ ssize_t ret;
++
++ args->out_args[0].size = sizeof(*outarg);
++ args->out_args[0].value = outarg;
++
++ ret = fuse_simple_request(fm, args);
++
++ /* Translate ENOSYS, which shouldn't be returned from fs */
++ if (ret == -ENOSYS)
++ ret = -ENOTTY;
++
++ if (ret >= 0 && outarg->result == -ENOSYS)
++ outarg->result = -ENOTTY;
++
++ return ret;
++}
++
+ /*
+ * CUSE servers compiled on 32bit broke on 64bit kernels because the
+ * ABI was defined to be 'struct iovec' which is different on 32bit
+@@ -253,13 +273,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ }
+
+ ap.args.out_numargs = 2;
+- ap.args.out_args[0].size = sizeof(outarg);
+- ap.args.out_args[0].value = &outarg;
+ ap.args.out_args[1].size = out_size;
+ ap.args.out_pages = true;
+ ap.args.out_argvar = true;
+
+- transferred = fuse_simple_request(fm, &ap.args);
++ transferred = fuse_send_ioctl(fm, &ap.args, &outarg);
+ err = transferred;
+ if (transferred < 0)
+ goto out;
+@@ -388,15 +406,16 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
+ args.in_args[1].size = inarg.in_size;
+ args.in_args[1].value = ptr;
+ args.out_numargs = 2;
+- args.out_args[0].size = sizeof(outarg);
+- args.out_args[0].value = &outarg;
+ args.out_args[1].size = inarg.out_size;
+ args.out_args[1].value = ptr;
+
+- err = fuse_simple_request(fm, &args);
+- if (!err && outarg.flags & FUSE_IOCTL_RETRY)
+- err = -EIO;
+-
++ err = fuse_send_ioctl(fm, &args, &outarg);
++ if (!err) {
++ if (outarg.result < 0)
++ err = outarg.result;
++ else if (outarg.flags & FUSE_IOCTL_RETRY)
++ err = -EIO;
++ }
+ return err;
+ }
+
+@@ -405,6 +424,12 @@ static struct fuse_file *fuse_priv_ioctl_prepare(struct inode *inode)
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ bool isdir = S_ISDIR(inode->i_mode);
+
++ if (!fuse_allow_current_process(fm->fc))
++ return ERR_PTR(-EACCES);
++
++ if (fuse_is_bad(inode))
++ return ERR_PTR(-EIO);
++
+ if (!S_ISREG(inode->i_mode) && !isdir)
+ return ERR_PTR(-ENOTTY);
+
+diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
+index bc267832310c7..14e99ffa57af9 100644
+--- a/fs/fuse/readdir.c
++++ b/fs/fuse/readdir.c
+@@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file,
+ goto unlock;
+
+ addr = kmap_atomic(page);
+- if (!offset)
++ if (!offset) {
+ clear_page(addr);
++ SetPageUptodate(page);
++ }
+ memcpy(addr + offset, dirent, reclen);
+ kunmap_atomic(addr);
+ fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
+@@ -241,8 +243,16 @@ retry:
+ dput(dentry);
+ dentry = alias;
+ }
+- if (IS_ERR(dentry))
++ if (IS_ERR(dentry)) {
++ if (!IS_ERR(inode)) {
++ struct fuse_inode *fi = get_fuse_inode(inode);
++
++ spin_lock(&fi->lock);
++ fi->nlookup--;
++ spin_unlock(&fi->lock);
++ }
+ return PTR_ERR(dentry);
++ }
+ }
+ if (fc->readdirplus_auto)
+ set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
+@@ -516,6 +526,12 @@ retry_locked:
+
+ page = find_get_page_flags(file->f_mapping, index,
+ FGP_ACCESSED | FGP_LOCK);
++ /* Page gone missing, then re-added to cache, but not initialized? */
++ if (page && !PageUptodate(page)) {
++ unlock_page(page);
++ put_page(page);
++ page = NULL;
++ }
+ spin_lock(&fi->rdc.lock);
+ if (!page) {
+ /*
+diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
+index 005e920f5d4a3..2b654c3b918a3 100644
+--- a/fs/gfs2/aops.c
++++ b/fs/gfs2/aops.c
+@@ -152,7 +152,6 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
+ {
+ struct inode *inode = page->mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+- struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+ if (PageChecked(page)) {
+ ClearPageChecked(page);
+@@ -160,7 +159,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
+ create_empty_buffers(page, inode->i_sb->s_blocksize,
+ BIT(BH_Dirty)|BIT(BH_Uptodate));
+ }
+- gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
++ gfs2_page_add_databufs(ip, page, 0, PAGE_SIZE);
+ }
+ return gfs2_write_jdata_page(page, wbc);
+ }
+@@ -208,13 +207,13 @@ static int gfs2_writepages(struct address_space *mapping,
+ int ret;
+
+ /*
+- * Even if we didn't write any pages here, we might still be holding
++ * Even if we didn't write enough pages here, we might still be holding
+ * dirty pages in the ail. We forcibly flush the ail because we don't
+ * want balance_dirty_pages() to loop indefinitely trying to write out
+ * pages held in the ail that it can't find.
+ */
+ ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
+- if (ret == 0)
++ if (ret == 0 && wbc->nr_to_write > 0)
+ set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
+ return ret;
+ }
+@@ -452,8 +451,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+ return error;
+
+ kaddr = kmap_atomic(page);
+- if (dsize > gfs2_max_stuffed_size(ip))
+- dsize = gfs2_max_stuffed_size(ip);
+ memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
+ memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
+ kunmap_atomic(kaddr);
+diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
+index 5414c2c335809..0ec1eaf338338 100644
+--- a/fs/gfs2/bmap.c
++++ b/fs/gfs2/bmap.c
+@@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
+ void *kaddr = kmap(page);
+ u64 dsize = i_size_read(inode);
+
+- if (dsize > gfs2_max_stuffed_size(ip))
+- dsize = gfs2_max_stuffed_size(ip);
+-
+ memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
+ memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
+ kunmap(page);
+@@ -940,7 +937,7 @@ do_alloc:
+ else if (height == ip->i_height)
+ ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
+ else
+- iomap->length = size - pos;
++ iomap->length = size - iomap->offset;
+ } else if (flags & IOMAP_WRITE) {
+ u64 alloc_size;
+
+@@ -961,46 +958,6 @@ hole_found:
+ goto out;
+ }
+
+-static int gfs2_write_lock(struct inode *inode)
+-{
+- struct gfs2_inode *ip = GFS2_I(inode);
+- struct gfs2_sbd *sdp = GFS2_SB(inode);
+- int error;
+-
+- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+- error = gfs2_glock_nq(&ip->i_gh);
+- if (error)
+- goto out_uninit;
+- if (&ip->i_inode == sdp->sd_rindex) {
+- struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+-
+- error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
+- GL_NOCACHE, &m_ip->i_gh);
+- if (error)
+- goto out_unlock;
+- }
+- return 0;
+-
+-out_unlock:
+- gfs2_glock_dq(&ip->i_gh);
+-out_uninit:
+- gfs2_holder_uninit(&ip->i_gh);
+- return error;
+-}
+-
+-static void gfs2_write_unlock(struct inode *inode)
+-{
+- struct gfs2_inode *ip = GFS2_I(inode);
+- struct gfs2_sbd *sdp = GFS2_SB(inode);
+-
+- if (&ip->i_inode == sdp->sd_rindex) {
+- struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+-
+- gfs2_glock_dq_uninit(&m_ip->i_gh);
+- }
+- gfs2_glock_dq_uninit(&ip->i_gh);
+-}
+-
+ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
+ unsigned len)
+ {
+@@ -1118,11 +1075,6 @@ out_qunlock:
+ return ret;
+ }
+
+-static inline bool gfs2_iomap_need_write_lock(unsigned flags)
+-{
+- return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT);
+-}
+-
+ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap,
+ struct iomap *srcmap)
+@@ -1135,12 +1087,6 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+ iomap->flags |= IOMAP_F_BUFFER_HEAD;
+
+ trace_gfs2_iomap_start(ip, pos, length, flags);
+- if (gfs2_iomap_need_write_lock(flags)) {
+- ret = gfs2_write_lock(inode);
+- if (ret)
+- goto out;
+- }
+-
+ ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ if (ret)
+ goto out_unlock;
+@@ -1168,10 +1114,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+ ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
+
+ out_unlock:
+- if (ret && gfs2_iomap_need_write_lock(flags))
+- gfs2_write_unlock(inode);
+ release_metapath(&mp);
+-out:
+ trace_gfs2_iomap_end(ip, iomap, ret);
+ return ret;
+ }
+@@ -1208,26 +1151,21 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+
+ if (length != written && (iomap->flags & IOMAP_F_NEW)) {
+ /* Deallocate blocks that were just allocated. */
+- loff_t blockmask = i_blocksize(inode) - 1;
+- loff_t end = (pos + length) & ~blockmask;
++ loff_t hstart = round_up(pos + written, i_blocksize(inode));
++ loff_t hend = iomap->offset + iomap->length;
+
+- pos = (pos + written + blockmask) & ~blockmask;
+- if (pos < end) {
+- truncate_pagecache_range(inode, pos, end - 1);
+- punch_hole(ip, pos, end - pos);
++ if (hstart < hend) {
++ truncate_pagecache_range(inode, hstart, hend - 1);
++ punch_hole(ip, hstart, hend - hstart);
+ }
+ }
+
+ if (unlikely(!written))
+- goto out_unlock;
++ return 0;
+
+ if (iomap->flags & IOMAP_F_SIZE_CHANGED)
+ mark_inode_dirty(inode);
+ set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
+-
+-out_unlock:
+- if (gfs2_iomap_need_write_lock(flags))
+- gfs2_write_unlock(inode);
+ return 0;
+ }
+
+@@ -2204,7 +2142,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
+
+ ret = do_shrink(inode, newsize);
+ out:
+- gfs2_rs_delete(ip, NULL);
++ gfs2_rs_delete(ip);
+ gfs2_qa_put(ip);
+ return ret;
+ }
+diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
+index c559827cb6f91..e93185d804e0a 100644
+--- a/fs/gfs2/file.c
++++ b/fs/gfs2/file.c
+@@ -711,10 +711,11 @@ static int gfs2_release(struct inode *inode, struct file *file)
+ kfree(file->private_data);
+ file->private_data = NULL;
+
+- if (gfs2_rs_active(&ip->i_res))
+- gfs2_rs_delete(ip, &inode->i_writecount);
+- if (file->f_mode & FMODE_WRITE)
++ if (file->f_mode & FMODE_WRITE) {
++ if (gfs2_rs_active(&ip->i_res))
++ gfs2_rs_delete(ip);
+ gfs2_qa_put(ip);
++ }
+ return 0;
+ }
+
+@@ -776,27 +777,99 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
+ return ret ? ret : ret1;
+ }
+
++static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
++ size_t *prev_count,
++ size_t *window_size)
++{
++ char __user *p = i->iov[0].iov_base + i->iov_offset;
++ size_t count = iov_iter_count(i);
++ int pages = 1;
++
++ if (likely(!count))
++ return false;
++ if (ret <= 0 && ret != -EFAULT)
++ return false;
++ if (!iter_is_iovec(i))
++ return false;
++
++ if (*prev_count != count || !*window_size) {
++ int pages, nr_dirtied;
++
++ pages = min_t(int, BIO_MAX_VECS,
++ DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE));
++ nr_dirtied = max(current->nr_dirtied_pause -
++ current->nr_dirtied, 1);
++ pages = min(pages, nr_dirtied);
++ }
++
++ *prev_count = count;
++ *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
++ return true;
++}
++
+ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
+ struct gfs2_holder *gh)
+ {
+ struct file *file = iocb->ki_filp;
+ struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+- size_t count = iov_iter_count(to);
++ size_t prev_count = 0, window_size = 0;
++ size_t written = 0;
+ ssize_t ret;
+
+- if (!count)
++ /*
++ * In this function, we disable page faults when we're holding the
++ * inode glock while doing I/O. If a page fault occurs, we indicate
++ * that the inode glock may be dropped, fault in the pages manually,
++ * and retry.
++ *
++ * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
++ * physical as well as manual page faults, and we need to disable both
++ * kinds.
++ *
++ * For direct I/O, gfs2 takes the inode glock in deferred mode. This
++ * locking mode is compatible with other deferred holders, so multiple
++ * processes and nodes can do direct I/O to a file at the same time.
++ * There's no guarantee that reads or writes will be atomic. Any
++ * coordination among readers and writers needs to happen externally.
++ */
++
++ if (!iov_iter_count(to))
+ return 0; /* skip atime */
+
+ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
++retry:
+ ret = gfs2_glock_nq(gh);
+ if (ret)
+ goto out_uninit;
++retry_under_glock:
++ pagefault_disable();
++ to->nofault = true;
++ ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
++ IOMAP_DIO_PARTIAL, written);
++ to->nofault = false;
++ pagefault_enable();
++ if (ret > 0)
++ written = ret;
+
+- ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0);
+- gfs2_glock_dq(gh);
++ if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
++ size_t leftover;
++
++ gfs2_holder_allow_demote(gh);
++ leftover = fault_in_iov_iter_writeable(to, window_size);
++ gfs2_holder_disallow_demote(gh);
++ if (leftover != window_size) {
++ if (gfs2_holder_queued(gh))
++ goto retry_under_glock;
++ goto retry;
++ }
++ }
++ if (gfs2_holder_queued(gh))
++ gfs2_glock_dq(gh);
+ out_uninit:
+ gfs2_holder_uninit(gh);
+- return ret;
++ if (ret < 0)
++ return ret;
++ return written;
+ }
+
+ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
+@@ -805,10 +878,20 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+- size_t len = iov_iter_count(from);
+- loff_t offset = iocb->ki_pos;
++ size_t prev_count = 0, window_size = 0;
++ size_t read = 0;
+ ssize_t ret;
+
++ /*
++ * In this function, we disable page faults when we're holding the
++ * inode glock while doing I/O. If a page fault occurs, we indicate
++ * that the inode glock may be dropped, fault in the pages manually,
++ * and retry.
++ *
++ * For writes, iomap_dio_rw only triggers manual page faults, so we
++ * don't need to disable physical ones.
++ */
++
+ /*
+ * Deferred lock, even if its a write, since we do no allocation on
+ * this path. All we need to change is the atime, and this lock mode
+@@ -818,45 +901,78 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
+ * VFS does.
+ */
+ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
++retry:
+ ret = gfs2_glock_nq(gh);
+ if (ret)
+ goto out_uninit;
+-
++retry_under_glock:
+ /* Silently fall back to buffered I/O when writing beyond EOF */
+- if (offset + len > i_size_read(&ip->i_inode))
++ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode))
+ goto out;
+
+- ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0);
++ from->nofault = true;
++ ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
++ IOMAP_DIO_PARTIAL, read);
++ from->nofault = false;
++
+ if (ret == -ENOTBLK)
+ ret = 0;
++ if (ret > 0)
++ read = ret;
++
++ if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
++ size_t leftover;
++
++ gfs2_holder_allow_demote(gh);
++ leftover = fault_in_iov_iter_readable(from, window_size);
++ gfs2_holder_disallow_demote(gh);
++ if (leftover != window_size) {
++ if (gfs2_holder_queued(gh))
++ goto retry_under_glock;
++ goto retry;
++ }
++ }
+ out:
+- gfs2_glock_dq(gh);
++ if (gfs2_holder_queued(gh))
++ gfs2_glock_dq(gh);
+ out_uninit:
+ gfs2_holder_uninit(gh);
+- return ret;
++ if (ret < 0)
++ return ret;
++ return read;
+ }
+
+ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ {
+ struct gfs2_inode *ip;
+ struct gfs2_holder gh;
++ size_t prev_count = 0, window_size = 0;
+ size_t written = 0;
+ ssize_t ret;
+
++ /*
++ * In this function, we disable page faults when we're holding the
++ * inode glock while doing I/O. If a page fault occurs, we indicate
++ * that the inode glock may be dropped, fault in the pages manually,
++ * and retry.
++ */
++
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = gfs2_file_direct_read(iocb, to, &gh);
+ if (likely(ret != -ENOTBLK))
+ return ret;
+ iocb->ki_flags &= ~IOCB_DIRECT;
+ }
++ pagefault_disable();
+ iocb->ki_flags |= IOCB_NOIO;
+ ret = generic_file_read_iter(iocb, to);
+ iocb->ki_flags &= ~IOCB_NOIO;
++ pagefault_enable();
+ if (ret >= 0) {
+ if (!iov_iter_count(to))
+ return ret;
+ written = ret;
+- } else {
++ } else if (ret != -EFAULT) {
+ if (ret != -EAGAIN)
+ return ret;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+@@ -864,18 +980,116 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ }
+ ip = GFS2_I(iocb->ki_filp->f_mapping->host);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
++retry:
+ ret = gfs2_glock_nq(&gh);
+ if (ret)
+ goto out_uninit;
++retry_under_glock:
++ pagefault_disable();
+ ret = generic_file_read_iter(iocb, to);
++ pagefault_enable();
+ if (ret > 0)
+ written += ret;
+- gfs2_glock_dq(&gh);
++
++ if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
++ size_t leftover;
++
++ gfs2_holder_allow_demote(&gh);
++ leftover = fault_in_iov_iter_writeable(to, window_size);
++ gfs2_holder_disallow_demote(&gh);
++ if (leftover != window_size) {
++ if (gfs2_holder_queued(&gh))
++ goto retry_under_glock;
++ goto retry;
++ }
++ }
++ if (gfs2_holder_queued(&gh))
++ gfs2_glock_dq(&gh);
+ out_uninit:
+ gfs2_holder_uninit(&gh);
+ return written ? written : ret;
+ }
+
++static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
++ struct iov_iter *from,
++ struct gfs2_holder *gh)
++{
++ struct file *file = iocb->ki_filp;
++ struct inode *inode = file_inode(file);
++ struct gfs2_inode *ip = GFS2_I(inode);
++ struct gfs2_sbd *sdp = GFS2_SB(inode);
++ struct gfs2_holder *statfs_gh = NULL;
++ size_t prev_count = 0, window_size = 0;
++ size_t orig_count = iov_iter_count(from);
++ size_t read = 0;
++ ssize_t ret;
++
++ /*
++ * In this function, we disable page faults when we're holding the
++ * inode glock while doing I/O. If a page fault occurs, we indicate
++ * that the inode glock may be dropped, fault in the pages manually,
++ * and retry.
++ */
++
++ if (inode == sdp->sd_rindex) {
++ statfs_gh = kmalloc(sizeof(*statfs_gh), GFP_NOFS);
++ if (!statfs_gh)
++ return -ENOMEM;
++ }
++
++ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
++retry:
++ ret = gfs2_glock_nq(gh);
++ if (ret)
++ goto out_uninit;
++retry_under_glock:
++ if (inode == sdp->sd_rindex) {
++ struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
++
++ ret = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
++ GL_NOCACHE, statfs_gh);
++ if (ret)
++ goto out_unlock;
++ }
++
++ current->backing_dev_info = inode_to_bdi(inode);
++ pagefault_disable();
++ ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
++ pagefault_enable();
++ current->backing_dev_info = NULL;
++ if (ret > 0) {
++ iocb->ki_pos += ret;
++ read += ret;
++ }
++
++ if (inode == sdp->sd_rindex)
++ gfs2_glock_dq_uninit(statfs_gh);
++
++ from->count = orig_count - read;
++ if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
++ size_t leftover;
++
++ gfs2_holder_allow_demote(gh);
++ leftover = fault_in_iov_iter_readable(from, window_size);
++ gfs2_holder_disallow_demote(gh);
++ if (leftover != window_size) {
++ from->count = min(from->count, window_size - leftover);
++ if (gfs2_holder_queued(gh))
++ goto retry_under_glock;
++ goto retry;
++ }
++ }
++out_unlock:
++ if (gfs2_holder_queued(gh))
++ gfs2_glock_dq(gh);
++out_uninit:
++ gfs2_holder_uninit(gh);
++ if (statfs_gh)
++ kfree(statfs_gh);
++ from->count = orig_count - read;
++ return read ? read : ret;
++}
++
+ /**
+ * gfs2_file_write_iter - Perform a write to a file
+ * @iocb: The io context
+@@ -927,9 +1141,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ goto out_unlock;
+
+ iocb->ki_flags |= IOCB_DSYNC;
+- current->backing_dev_info = inode_to_bdi(inode);
+- buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+- current->backing_dev_info = NULL;
++ buffered = gfs2_file_buffered_write(iocb, from, &gh);
+ if (unlikely(buffered <= 0)) {
+ if (!ret)
+ ret = buffered;
+@@ -943,7 +1155,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ * the direct I/O range as we don't know if the buffered pages
+ * made it to disk.
+ */
+- iocb->ki_pos += buffered;
+ ret2 = generic_write_sync(iocb, buffered);
+ invalidate_mapping_pages(mapping,
+ (iocb->ki_pos - buffered) >> PAGE_SHIFT,
+@@ -951,13 +1162,9 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ if (!ret || ret2 > 0)
+ ret += ret2;
+ } else {
+- current->backing_dev_info = inode_to_bdi(inode);
+- ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+- current->backing_dev_info = NULL;
+- if (likely(ret > 0)) {
+- iocb->ki_pos += ret;
++ ret = gfs2_file_buffered_write(iocb, from, &gh);
++ if (likely(ret > 0))
+ ret = generic_write_sync(iocb, ret);
+- }
+ }
+
+ out_unlock:
+diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
+index e0eaa9cf9fb6f..e85ef6b14777d 100644
+--- a/fs/gfs2/glock.c
++++ b/fs/gfs2/glock.c
+@@ -58,6 +58,7 @@ struct gfs2_glock_iter {
+ typedef void (*glock_examiner) (struct gfs2_glock * gl);
+
+ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
++static void __gfs2_glock_dq(struct gfs2_holder *gh);
+
+ static struct dentry *gfs2_root;
+ static struct workqueue_struct *glock_workqueue;
+@@ -197,6 +198,12 @@ static int demote_ok(const struct gfs2_glock *gl)
+
+ if (gl->gl_state == LM_ST_UNLOCKED)
+ return 0;
++ /*
++ * Note that demote_ok is used for the lru process of disposing of
++ * glocks. For this purpose, we don't care if the glock's holders
++ * have the HIF_MAY_DEMOTE flag set or not. If someone is using
++ * them, don't demote.
++ */
+ if (!list_empty(&gl->gl_holders))
+ return 0;
+ if (glops->go_demote_ok)
+@@ -301,46 +308,59 @@ void gfs2_glock_put(struct gfs2_glock *gl)
+ }
+
+ /**
+- * may_grant - check if its ok to grant a new lock
++ * may_grant - check if it's ok to grant a new lock
+ * @gl: The glock
++ * @current_gh: One of the current holders of @gl
+ * @gh: The lock request which we wish to grant
+ *
+- * Returns: true if its ok to grant the lock
++ * With our current compatibility rules, if a glock has one or more active
++ * holders (HIF_HOLDER flag set), any of those holders can be passed in as
++ * @current_gh; they are all the same as far as compatibility with the new @gh
++ * goes.
++ *
++ * Returns true if it's ok to grant the lock.
+ */
+
+-static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
+-{
+- const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
++static inline bool may_grant(struct gfs2_glock *gl,
++ struct gfs2_holder *current_gh,
++ struct gfs2_holder *gh)
++{
++ if (current_gh) {
++ GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, &current_gh->gh_iflags));
++
++ switch(current_gh->gh_state) {
++ case LM_ST_EXCLUSIVE:
++ /*
++ * Here we make a special exception to grant holders
++ * who agree to share the EX lock with other holders
++ * who also have the bit set. If the original holder
++ * has the LM_FLAG_NODE_SCOPE bit set, we grant more
++ * holders with the bit set.
++ */
++ return gh->gh_state == LM_ST_EXCLUSIVE &&
++ (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) &&
++ (gh->gh_flags & LM_FLAG_NODE_SCOPE);
+
+- if (gh != gh_head) {
+- /**
+- * Here we make a special exception to grant holders who agree
+- * to share the EX lock with other holders who also have the
+- * bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit
+- * is set, we grant more holders with the bit set.
+- */
+- if (gh_head->gh_state == LM_ST_EXCLUSIVE &&
+- (gh_head->gh_flags & LM_FLAG_NODE_SCOPE) &&
+- gh->gh_state == LM_ST_EXCLUSIVE &&
+- (gh->gh_flags & LM_FLAG_NODE_SCOPE))
+- return 1;
+- if ((gh->gh_state == LM_ST_EXCLUSIVE ||
+- gh_head->gh_state == LM_ST_EXCLUSIVE))
+- return 0;
++ case LM_ST_SHARED:
++ case LM_ST_DEFERRED:
++ return gh->gh_state == current_gh->gh_state;
++
++ default:
++ return false;
++ }
+ }
++
+ if (gl->gl_state == gh->gh_state)
+- return 1;
++ return true;
+ if (gh->gh_flags & GL_EXACT)
+- return 0;
++ return false;
+ if (gl->gl_state == LM_ST_EXCLUSIVE) {
+- if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
+- return 1;
+- if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
+- return 1;
++ return gh->gh_state == LM_ST_SHARED ||
++ gh->gh_state == LM_ST_DEFERRED;
+ }
+- if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
+- return 1;
+- return 0;
++ if (gh->gh_flags & LM_FLAG_ANY)
++ return gl->gl_state != LM_ST_UNLOCKED;
++ return false;
+ }
+
+ static void gfs2_holder_wake(struct gfs2_holder *gh)
+@@ -366,7 +386,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
+ struct gfs2_holder *gh, *tmp;
+
+ list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
+- if (test_bit(HIF_HOLDER, &gh->gh_iflags))
++ if (!test_bit(HIF_WAIT, &gh->gh_iflags))
+ continue;
+ if (ret & LM_OUT_ERROR)
+ gh->gh_error = -EIO;
+@@ -380,6 +400,78 @@ static void do_error(struct gfs2_glock *gl, const int ret)
+ }
+ }
+
++/**
++ * demote_incompat_holders - demote incompatible demoteable holders
++ * @gl: the glock we want to promote
++ * @new_gh: the new holder to be promoted
++ */
++static void demote_incompat_holders(struct gfs2_glock *gl,
++ struct gfs2_holder *new_gh)
++{
++ struct gfs2_holder *gh;
++
++ /*
++ * Demote incompatible holders before we make ourselves eligible.
++ * (This holder may or may not allow auto-demoting, but we don't want
++ * to demote the new holder before it's even granted.)
++ */
++ list_for_each_entry(gh, &gl->gl_holders, gh_list) {
++ /*
++ * Since holders are at the front of the list, we stop when we
++ * find the first non-holder.
++ */
++ if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
++ return;
++ if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
++ !may_grant(gl, new_gh, gh)) {
++ /*
++ * We should not recurse into do_promote because
++ * __gfs2_glock_dq only calls handle_callback,
++ * gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
++ */
++ __gfs2_glock_dq(gh);
++ }
++ }
++}
++
++/**
++ * find_first_holder - find the first "holder" gh
++ * @gl: the glock
++ */
++
++static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
++{
++ struct gfs2_holder *gh;
++
++ if (!list_empty(&gl->gl_holders)) {
++ gh = list_first_entry(&gl->gl_holders, struct gfs2_holder,
++ gh_list);
++ if (test_bit(HIF_HOLDER, &gh->gh_iflags))
++ return gh;
++ }
++ return NULL;
++}
++
++/**
++ * find_first_strong_holder - find the first non-demoteable holder
++ * @gl: the glock
++ *
++ * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
++ */
++static inline struct gfs2_holder *
++find_first_strong_holder(struct gfs2_glock *gl)
++{
++ struct gfs2_holder *gh;
++
++ list_for_each_entry(gh, &gl->gl_holders, gh_list) {
++ if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
++ return NULL;
++ if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
++ return gh;
++ }
++ return NULL;
++}
++
+ /**
+ * do_promote - promote as many requests as possible on the current queue
+ * @gl: The glock
+@@ -393,14 +485,21 @@ __releases(&gl->gl_lockref.lock)
+ __acquires(&gl->gl_lockref.lock)
+ {
+ const struct gfs2_glock_operations *glops = gl->gl_ops;
+- struct gfs2_holder *gh, *tmp;
++ struct gfs2_holder *gh, *tmp, *first_gh;
++ bool incompat_holders_demoted = false;
+ int ret;
+
+ restart:
++ first_gh = find_first_strong_holder(gl);
+ list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
+- if (test_bit(HIF_HOLDER, &gh->gh_iflags))
++ if (!test_bit(HIF_WAIT, &gh->gh_iflags))
+ continue;
+- if (may_grant(gl, gh)) {
++ if (may_grant(gl, first_gh, gh)) {
++ if (!incompat_holders_demoted) {
++ demote_incompat_holders(gl, first_gh);
++ incompat_holders_demoted = true;
++ first_gh = gh;
++ }
+ if (gh->gh_list.prev == &gl->gl_holders &&
+ glops->go_lock) {
+ spin_unlock(&gl->gl_lockref.lock);
+@@ -426,6 +525,11 @@ restart:
+ gfs2_holder_wake(gh);
+ continue;
+ }
++ /*
++ * If we get here, it means we may not grant this holder for
++ * some reason. If this holder is the head of the list, it
++ * means we have a blocked holder at the head, so return 1.
++ */
+ if (gh->gh_list.prev == &gl->gl_holders)
+ return 1;
+ do_error(gl, 0);
+@@ -722,23 +826,6 @@ out:
+ spin_lock(&gl->gl_lockref.lock);
+ }
+
+-/**
+- * find_first_holder - find the first "holder" gh
+- * @gl: the glock
+- */
+-
+-static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
+-{
+- struct gfs2_holder *gh;
+-
+- if (!list_empty(&gl->gl_holders)) {
+- gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
+- if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+- return gh;
+- }
+- return NULL;
+-}
+-
+ /**
+ * run_queue - do all outstanding tasks related to a glock
+ * @gl: The glock in question
+@@ -1354,15 +1441,20 @@ __acquires(&gl->gl_lockref.lock)
+ GLOCK_BUG_ON(gl, true);
+
+ if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
+- if (test_bit(GLF_LOCK, &gl->gl_flags))
+- try_futile = !may_grant(gl, gh);
++ if (test_bit(GLF_LOCK, &gl->gl_flags)) {
++ struct gfs2_holder *first_gh;
++
++ first_gh = find_first_strong_holder(gl);
++ try_futile = !may_grant(gl, first_gh, gh);
++ }
+ if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
+ goto fail;
+ }
+
+ list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
+ if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
+- (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
++ (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
++ !test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
+ goto trap_recursive;
+ if (try_futile &&
+ !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
+@@ -1458,51 +1550,83 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
+ return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
+ }
+
+-/**
+- * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
+- * @gh: the glock holder
+- *
+- */
++static inline bool needs_demote(struct gfs2_glock *gl)
++{
++ return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
++ test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
++}
+
+-void gfs2_glock_dq(struct gfs2_holder *gh)
++static void __gfs2_glock_dq(struct gfs2_holder *gh)
+ {
+ struct gfs2_glock *gl = gh->gh_gl;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ unsigned delay = 0;
+ int fast_path = 0;
+
+- spin_lock(&gl->gl_lockref.lock);
+ /*
+- * If we're in the process of file system withdraw, we cannot just
+- * dequeue any glocks until our journal is recovered, lest we
+- * introduce file system corruption. We need two exceptions to this
+- * rule: We need to allow unlocking of nondisk glocks and the glock
+- * for our own journal that needs recovery.
++ * This while loop is similar to function demote_incompat_holders:
++ * If the glock is due to be demoted (which may be from another node
++ * or even if this holder is GL_NOCACHE), the weak holders are
++ * demoted as well, allowing the glock to be demoted.
+ */
+- if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
+- glock_blocked_by_withdraw(gl) &&
+- gh->gh_gl != sdp->sd_jinode_gl) {
+- sdp->sd_glock_dqs_held++;
+- spin_unlock(&gl->gl_lockref.lock);
+- might_sleep();
+- wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
+- TASK_UNINTERRUPTIBLE);
+- spin_lock(&gl->gl_lockref.lock);
+- }
+- if (gh->gh_flags & GL_NOCACHE)
+- handle_callback(gl, LM_ST_UNLOCKED, 0, false);
++ while (gh) {
++ /*
++ * If we're in the process of file system withdraw, we cannot
++ * just dequeue any glocks until our journal is recovered, lest
++ * we introduce file system corruption. We need two exceptions
++ * to this rule: We need to allow unlocking of nondisk glocks
++ * and the glock for our own journal that needs recovery.
++ */
++ if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
++ glock_blocked_by_withdraw(gl) &&
++ gh->gh_gl != sdp->sd_jinode_gl) {
++ sdp->sd_glock_dqs_held++;
++ spin_unlock(&gl->gl_lockref.lock);
++ might_sleep();
++ wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
++ TASK_UNINTERRUPTIBLE);
++ spin_lock(&gl->gl_lockref.lock);
++ }
+
+- list_del_init(&gh->gh_list);
+- clear_bit(HIF_HOLDER, &gh->gh_iflags);
+- if (list_empty(&gl->gl_holders) &&
+- !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+- !test_bit(GLF_DEMOTE, &gl->gl_flags))
+- fast_path = 1;
++ /*
++ * This holder should not be cached, so mark it for demote.
++ * Note: this should be done before the check for needs_demote
++ * below.
++ */
++ if (gh->gh_flags & GL_NOCACHE)
++ handle_callback(gl, LM_ST_UNLOCKED, 0, false);
++
++ list_del_init(&gh->gh_list);
++ clear_bit(HIF_HOLDER, &gh->gh_iflags);
++ trace_gfs2_glock_queue(gh, 0);
++
++ /*
++ * If there hasn't been a demote request we are done.
++ * (Let the remaining holders, if any, keep holding it.)
++ */
++ if (!needs_demote(gl)) {
++ if (list_empty(&gl->gl_holders))
++ fast_path = 1;
++ break;
++ }
++ /*
++ * If we have another strong holder (we cannot auto-demote)
++ * we are done. It keeps holding it until it is done.
++ */
++ if (find_first_strong_holder(gl))
++ break;
++
++ /*
++ * If we have a weak holder at the head of the list, it
++ * (and all others like it) must be auto-demoted. If there
++ * are no more weak holders, we exit the while loop.
++ */
++ gh = find_first_holder(gl);
++ }
+
+ if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
+ gfs2_glock_add_to_lru(gl);
+
+- trace_gfs2_glock_queue(gh, 0);
+ if (unlikely(!fast_path)) {
+ gl->gl_lockref.count++;
+ if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+@@ -1511,6 +1635,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
+ delay = gl->gl_hold_time;
+ __gfs2_glock_queue_work(gl, delay);
+ }
++}
++
++/**
++ * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
++ * @gh: the glock holder
++ *
++ */
++void gfs2_glock_dq(struct gfs2_holder *gh)
++{
++ struct gfs2_glock *gl = gh->gh_gl;
++
++ spin_lock(&gl->gl_lockref.lock);
++ __gfs2_glock_dq(gh);
+ spin_unlock(&gl->gl_lockref.lock);
+ }
+
+@@ -1673,6 +1810,7 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
+
+ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
+ {
++ struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, };
+ unsigned long delay = 0;
+ unsigned long holdtime;
+ unsigned long now = jiffies;
+@@ -1687,6 +1825,28 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
+ if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
+ delay = gl->gl_hold_time;
+ }
++ /*
++ * Note 1: We cannot call demote_incompat_holders from handle_callback
++ * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
++ * handle_callback -> demote_incompat_holders -> gfs2_glock_dq
++ * Plus, we only want to demote the holders if the request comes from
++ * a remote cluster node because local holder conflicts are resolved
++ * elsewhere.
++ *
++ * Note 2: if a remote node wants this glock in EX mode, lock_dlm will
++ * request that we set our state to UNLOCKED. Here we mock up a holder
++ * to make it look like someone wants the lock EX locally. Any SH
++ * and DF requests should be able to share the lock without demoting.
++ *
++ * Note 3: We only want to demote the demoteable holders when there
++ * are no more strong holders. The demoteable holders might as well
++ * keep the glock until the last strong holder is done with it.
++ */
++ if (!find_first_strong_holder(gl)) {
++ if (state == LM_ST_UNLOCKED)
++ mock_gh.gh_state = LM_ST_EXCLUSIVE;
++ demote_incompat_holders(gl, &mock_gh);
++ }
+ handle_callback(gl, state, delay, true);
+ __gfs2_glock_queue_work(gl, delay);
+ spin_unlock(&gl->gl_lockref.lock);
+@@ -1893,10 +2053,10 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
+ do {
+ rhashtable_walk_start(&iter);
+
+- while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
+- if (gl->gl_name.ln_sbd == sdp &&
+- lockref_get_not_dead(&gl->gl_lockref))
++ while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) {
++ if (gl->gl_name.ln_sbd == sdp)
+ examiner(gl);
++ }
+
+ rhashtable_walk_stop(&iter);
+ } while (cond_resched(), gl == ERR_PTR(-EAGAIN));
+@@ -1919,7 +2079,7 @@ bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
+
+ void gfs2_cancel_delete_work(struct gfs2_glock *gl)
+ {
+- if (cancel_delayed_work_sync(&gl->gl_delete)) {
++ if (cancel_delayed_work(&gl->gl_delete)) {
+ clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
+ gfs2_glock_put(gl);
+ }
+@@ -1938,7 +2098,6 @@ static void flush_delete_work(struct gfs2_glock *gl)
+ &gl->gl_delete, 0);
+ }
+ }
+- gfs2_glock_queue_work(gl, 0);
+ }
+
+ void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
+@@ -1955,10 +2114,10 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
+
+ static void thaw_glock(struct gfs2_glock *gl)
+ {
+- if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) {
+- gfs2_glock_put(gl);
++ if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
++ return;
++ if (!lockref_get_not_dead(&gl->gl_lockref))
+ return;
+- }
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ gfs2_glock_queue_work(gl, 0);
+ }
+@@ -1974,9 +2133,12 @@ static void clear_glock(struct gfs2_glock *gl)
+ gfs2_glock_remove_from_lru(gl);
+
+ spin_lock(&gl->gl_lockref.lock);
+- if (gl->gl_state != LM_ST_UNLOCKED)
+- handle_callback(gl, LM_ST_UNLOCKED, 0, false);
+- __gfs2_glock_queue_work(gl, 0);
++ if (!__lockref_is_dead(&gl->gl_lockref)) {
++ gl->gl_lockref.count++;
++ if (gl->gl_state != LM_ST_UNLOCKED)
++ handle_callback(gl, LM_ST_UNLOCKED, 0, false);
++ __gfs2_glock_queue_work(gl, 0);
++ }
+ spin_unlock(&gl->gl_lockref.lock);
+ }
+
+@@ -2076,6 +2238,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
+ *p++ = 'H';
+ if (test_bit(HIF_WAIT, &iflags))
+ *p++ = 'W';
++ if (test_bit(HIF_MAY_DEMOTE, &iflags))
++ *p++ = 'D';
+ *p = 0;
+ return buf;
+ }
+diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
+index 31a8f2f649b52..9012487da4c69 100644
+--- a/fs/gfs2/glock.h
++++ b/fs/gfs2/glock.h
+@@ -150,6 +150,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
+ list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+ if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
+ break;
++ if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
++ continue;
+ if (gh->gh_owner_pid == pid)
+ goto out;
+ }
+@@ -325,6 +327,24 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
+ spin_unlock(&gl->gl_lockref.lock);
+ }
+
++static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
++{
++ struct gfs2_glock *gl = gh->gh_gl;
++
++ spin_lock(&gl->gl_lockref.lock);
++ set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
++ spin_unlock(&gl->gl_lockref.lock);
++}
++
++static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
++{
++ struct gfs2_glock *gl = gh->gh_gl;
++
++ spin_lock(&gl->gl_lockref.lock);
++ clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
++ spin_unlock(&gl->gl_lockref.lock);
++}
++
+ extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
+ extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
+
+diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
+index 79c621c7863d2..558932ad89d5d 100644
+--- a/fs/gfs2/glops.c
++++ b/fs/gfs2/glops.c
+@@ -394,42 +394,44 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
+
+ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+ {
++ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ const struct gfs2_dinode *str = buf;
+ struct timespec64 atime;
+ u16 height, depth;
+ umode_t mode = be32_to_cpu(str->di_mode);
+- bool is_new = ip->i_inode.i_state & I_NEW;
++ struct inode *inode = &ip->i_inode;
++ bool is_new = inode->i_state & I_NEW;
+
+ if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
+ goto corrupt;
+- if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode)))
++ if (unlikely(!is_new && inode_wrong_type(inode, mode)))
+ goto corrupt;
+ ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
+- ip->i_inode.i_mode = mode;
++ inode->i_mode = mode;
+ if (is_new) {
+- ip->i_inode.i_rdev = 0;
++ inode->i_rdev = 0;
+ switch (mode & S_IFMT) {
+ case S_IFBLK:
+ case S_IFCHR:
+- ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
+- be32_to_cpu(str->di_minor));
++ inode->i_rdev = MKDEV(be32_to_cpu(str->di_major),
++ be32_to_cpu(str->di_minor));
+ break;
+ }
+ }
+
+- i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
+- i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
+- set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
+- i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
+- gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
++ i_uid_write(inode, be32_to_cpu(str->di_uid));
++ i_gid_write(inode, be32_to_cpu(str->di_gid));
++ set_nlink(inode, be32_to_cpu(str->di_nlink));
++ i_size_write(inode, be64_to_cpu(str->di_size));
++ gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks));
+ atime.tv_sec = be64_to_cpu(str->di_atime);
+ atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+- if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0)
+- ip->i_inode.i_atime = atime;
+- ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+- ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
+- ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+- ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
++ if (timespec64_compare(&inode->i_atime, &atime) < 0)
++ inode->i_atime = atime;
++ inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
++ inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
++ inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
++ inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+
+ ip->i_goal = be64_to_cpu(str->di_goal_meta);
+ ip->i_generation = be64_to_cpu(str->di_generation);
+@@ -437,9 +439,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+ ip->i_diskflags = be32_to_cpu(str->di_flags);
+ ip->i_eattr = be64_to_cpu(str->di_eattr);
+ /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
+- gfs2_set_inode_flags(&ip->i_inode);
++ gfs2_set_inode_flags(inode);
+ height = be16_to_cpu(str->di_height);
+- if (unlikely(height > GFS2_MAX_META_HEIGHT))
++ if (unlikely(height > sdp->sd_max_height))
+ goto corrupt;
+ ip->i_height = (u8)height;
+
+@@ -449,8 +451,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+ ip->i_depth = (u8)depth;
+ ip->i_entries = be32_to_cpu(str->di_entries);
+
+- if (S_ISREG(ip->i_inode.i_mode))
+- gfs2_set_aops(&ip->i_inode);
++ if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip))
++ goto corrupt;
++
++ if (S_ISREG(inode->i_mode))
++ gfs2_set_aops(inode);
+
+ return 0;
+ corrupt:
+diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
+index 0fe49770166ea..ca42d310fd4d6 100644
+--- a/fs/gfs2/incore.h
++++ b/fs/gfs2/incore.h
+@@ -252,6 +252,7 @@ struct gfs2_lkstats {
+
+ enum {
+ /* States */
++ HIF_MAY_DEMOTE = 1,
+ HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
+ HIF_WAIT = 10,
+ };
+@@ -386,9 +387,8 @@ struct gfs2_inode {
+ u64 i_generation;
+ u64 i_eattr;
+ unsigned long i_flags; /* GIF_... */
+- struct gfs2_glock *i_gl; /* Move into i_gh? */
++ struct gfs2_glock *i_gl;
+ struct gfs2_holder i_iopen_gh;
+- struct gfs2_holder i_gh; /* for prepare/commit_write only */
+ struct gfs2_qadata *i_qadata; /* quota allocation data */
+ struct gfs2_holder i_rgd_gh;
+ struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */
+diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
+index 3130f85d2b3f4..97ee17843b4d0 100644
+--- a/fs/gfs2/inode.c
++++ b/fs/gfs2/inode.c
+@@ -811,7 +811,7 @@ fail_free_inode:
+ if (free_vfs_inode) /* else evict will do the put for us */
+ gfs2_glock_put(ip->i_gl);
+ }
+- gfs2_rs_delete(ip, NULL);
++ gfs2_rs_deltree(&ip->i_res);
+ gfs2_qa_put(ip);
+ fail_free_acls:
+ posix_acl_release(default_acl);
+diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
+index f0ee3ff6f9a87..9a96842aeab3d 100644
+--- a/fs/gfs2/log.c
++++ b/fs/gfs2/log.c
+@@ -1277,9 +1277,6 @@ static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
+ {
+ unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
+
+- if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
+- return 1;
+-
+ return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
+ atomic_read(&sdp->sd_log_thresh2);
+ }
+@@ -1296,7 +1293,6 @@ int gfs2_logd(void *data)
+ {
+ struct gfs2_sbd *sdp = data;
+ unsigned long t = 1;
+- DEFINE_WAIT(wait);
+
+ while (!kthread_should_stop()) {
+
+@@ -1321,7 +1317,9 @@ int gfs2_logd(void *data)
+ GFS2_LFC_LOGD_JFLUSH_REQD);
+ }
+
+- if (gfs2_ail_flush_reqd(sdp)) {
++ if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
++ gfs2_ail_flush_reqd(sdp)) {
++ clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
+ gfs2_ail1_start(sdp);
+ gfs2_ail1_wait(sdp);
+ gfs2_ail1_empty(sdp, 0);
+@@ -1333,17 +1331,12 @@ int gfs2_logd(void *data)
+
+ try_to_freeze();
+
+- do {
+- prepare_to_wait(&sdp->sd_logd_waitq, &wait,
+- TASK_INTERRUPTIBLE);
+- if (!gfs2_ail_flush_reqd(sdp) &&
+- !gfs2_jrnl_flush_reqd(sdp) &&
+- !kthread_should_stop())
+- t = schedule_timeout(t);
+- } while(t && !gfs2_ail_flush_reqd(sdp) &&
+- !gfs2_jrnl_flush_reqd(sdp) &&
+- !kthread_should_stop());
+- finish_wait(&sdp->sd_logd_waitq, &wait);
++ t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
++ test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
++ gfs2_ail_flush_reqd(sdp) ||
++ gfs2_jrnl_flush_reqd(sdp) ||
++ kthread_should_stop(),
++ t);
+ }
+
+ return 0;
+diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
+index 7f8410d8fdc1d..fb3b488370834 100644
+--- a/fs/gfs2/ops_fstype.c
++++ b/fs/gfs2/ops_fstype.c
+@@ -180,7 +180,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
+ pr_warn("Invalid block size\n");
+ return -EINVAL;
+ }
+-
++ if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) {
++ pr_warn("Invalid block size shift\n");
++ return -EINVAL;
++ }
+ return 0;
+ }
+
+@@ -385,8 +388,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
+ if (!table[0])
+ table = sdp->sd_vfs->s_id;
+
+- strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN);
+- strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN);
++ BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN);
++
++ strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN);
++ strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN);
+
+ table = sdp->sd_table_name;
+ while ((table = strchr(table, '/')))
+@@ -1443,13 +1448,13 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
+
+ switch (o) {
+ case Opt_lockproto:
+- strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
++ strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_locktable:
+- strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
++ strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_hostdata:
+- strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
++ strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_spectator:
+ args->ar_spectator = 1;
+diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
+index be0997e24d60b..dc77080a82bbf 100644
+--- a/fs/gfs2/quota.c
++++ b/fs/gfs2/quota.c
+@@ -531,34 +531,42 @@ static void qdsb_put(struct gfs2_quota_data *qd)
+ */
+ int gfs2_qa_get(struct gfs2_inode *ip)
+ {
+- int error = 0;
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
++ struct inode *inode = &ip->i_inode;
+
+ if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+ return 0;
+
+- down_write(&ip->i_rw_mutex);
++ spin_lock(&inode->i_lock);
+ if (ip->i_qadata == NULL) {
+- ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS);
+- if (!ip->i_qadata) {
+- error = -ENOMEM;
+- goto out;
+- }
++ struct gfs2_qadata *tmp;
++
++ spin_unlock(&inode->i_lock);
++ tmp = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS);
++ if (!tmp)
++ return -ENOMEM;
++
++ spin_lock(&inode->i_lock);
++ if (ip->i_qadata == NULL)
++ ip->i_qadata = tmp;
++ else
++ kmem_cache_free(gfs2_qadata_cachep, tmp);
+ }
+ ip->i_qadata->qa_ref++;
+-out:
+- up_write(&ip->i_rw_mutex);
+- return error;
++ spin_unlock(&inode->i_lock);
++ return 0;
+ }
+
+ void gfs2_qa_put(struct gfs2_inode *ip)
+ {
+- down_write(&ip->i_rw_mutex);
++ struct inode *inode = &ip->i_inode;
++
++ spin_lock(&inode->i_lock);
+ if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) {
+ kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata);
+ ip->i_qadata = NULL;
+ }
+- up_write(&ip->i_rw_mutex);
++ spin_unlock(&inode->i_lock);
+ }
+
+ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
+diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
+index c3b00ba92ed2e..6901cd85f1df7 100644
+--- a/fs/gfs2/rgrp.c
++++ b/fs/gfs2/rgrp.c
+@@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
+ /**
+ * gfs2_rs_delete - delete a multi-block reservation
+ * @ip: The inode for this reservation
+- * @wcount: The inode's write count, or NULL
+ *
+ */
+-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
++void gfs2_rs_delete(struct gfs2_inode *ip)
+ {
++ struct inode *inode = &ip->i_inode;
++
+ down_write(&ip->i_rw_mutex);
+- if ((wcount == NULL) || (atomic_read(wcount) <= 1))
++ if (atomic_read(&inode->i_writecount) <= 1)
+ gfs2_rs_deltree(&ip->i_res);
+ up_write(&ip->i_rw_mutex);
+ }
+@@ -922,15 +923,15 @@ static int read_rindex_entry(struct gfs2_inode *ip)
+ spin_lock_init(&rgd->rd_rsspin);
+ mutex_init(&rgd->rd_mutex);
+
+- error = compute_bitstructs(rgd);
+- if (error)
+- goto fail;
+-
+ error = gfs2_glock_get(sdp, rgd->rd_addr,
+ &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+ if (error)
+ goto fail;
+
++ error = compute_bitstructs(rgd);
++ if (error)
++ goto fail_glock;
++
+ rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
+ rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
+ if (rgd->rd_data > sdp->sd_max_rg_data)
+@@ -944,6 +945,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
+ }
+
+ error = 0; /* someone else read in the rgrp; free it and ignore it */
++fail_glock:
+ gfs2_glock_put(rgd->rd_gl);
+
+ fail:
+@@ -1427,7 +1429,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
+
+ start = r.start >> bs_shift;
+ end = start + (r.len >> bs_shift);
+- minlen = max_t(u64, r.minlen,
++ minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize);
++ minlen = max_t(u64, minlen,
+ q->limits.discard_granularity) >> bs_shift;
+
+ if (end <= start || minlen > sdp->sd_max_rg_data)
+diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
+index a6855fd796e03..2f80f3bbf8767 100644
+--- a/fs/gfs2/rgrp.h
++++ b/fs/gfs2/rgrp.h
+@@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
+ bool dinode, u64 *generation);
+
+ extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
+-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
++extern void gfs2_rs_delete(struct gfs2_inode *ip);
+ extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 bstart, u32 blen, int meta);
+ extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
+index 6e00d15ef0a82..51b44da4a0d64 100644
+--- a/fs/gfs2/super.c
++++ b/fs/gfs2/super.c
+@@ -138,8 +138,10 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
+ return -EIO;
+
+ error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
+- if (error || gfs2_withdrawn(sdp))
++ if (error) {
++ gfs2_consist(sdp);
+ return error;
++ }
+
+ if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+ gfs2_consist(sdp);
+@@ -151,7 +153,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
+ gfs2_log_pointers_init(sdp, head.lh_blkno);
+
+ error = gfs2_quota_init(sdp);
+- if (!error && !gfs2_withdrawn(sdp))
++ if (!error && gfs2_withdrawn(sdp))
++ error = -EIO;
++ if (!error)
+ set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+ return error;
+ }
+@@ -378,6 +382,7 @@ out:
+
+ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+ {
++ const struct inode *inode = &ip->i_inode;
+ struct gfs2_dinode *str = buf;
+
+ str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+@@ -385,15 +390,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+ str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+ str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+ str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+- str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+- str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
+- str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
+- str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+- str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
+- str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
+- str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+- str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+- str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
++ str->di_mode = cpu_to_be32(inode->i_mode);
++ str->di_uid = cpu_to_be32(i_uid_read(inode));
++ str->di_gid = cpu_to_be32(i_gid_read(inode));
++ str->di_nlink = cpu_to_be32(inode->i_nlink);
++ str->di_size = cpu_to_be64(i_size_read(inode));
++ str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
++ str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
++ str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
++ str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
+
+ str->di_goal_meta = cpu_to_be64(ip->i_goal);
+ str->di_goal_data = cpu_to_be64(ip->i_goal);
+@@ -401,16 +406,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+
+ str->di_flags = cpu_to_be32(ip->i_diskflags);
+ str->di_height = cpu_to_be16(ip->i_height);
+- str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
++ str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) &&
+ !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
+ GFS2_FORMAT_DE : 0);
+ str->di_depth = cpu_to_be16(ip->i_depth);
+ str->di_entries = cpu_to_be32(ip->i_entries);
+
+ str->di_eattr = cpu_to_be64(ip->i_eattr);
+- str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+- str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+- str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
++ str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
++ str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
++ str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+ }
+
+ /**
+@@ -975,7 +980,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
+ {
+ struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
+ struct gfs2_args *args = &sdp->sd_args;
+- int val;
++ unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum;
++
++ spin_lock(&sdp->sd_tune.gt_spin);
++ logd_secs = sdp->sd_tune.gt_logd_secs;
++ quota_quantum = sdp->sd_tune.gt_quota_quantum;
++ statfs_quantum = sdp->sd_tune.gt_statfs_quantum;
++ statfs_slow = sdp->sd_tune.gt_statfs_slow;
++ spin_unlock(&sdp->sd_tune.gt_spin);
+
+ if (is_ancestor(root, sdp->sd_master_dir))
+ seq_puts(s, ",meta");
+@@ -1030,17 +1042,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
+ }
+ if (args->ar_discard)
+ seq_puts(s, ",discard");
+- val = sdp->sd_tune.gt_logd_secs;
+- if (val != 30)
+- seq_printf(s, ",commit=%d", val);
+- val = sdp->sd_tune.gt_statfs_quantum;
+- if (val != 30)
+- seq_printf(s, ",statfs_quantum=%d", val);
+- else if (sdp->sd_tune.gt_statfs_slow)
++ if (logd_secs != 30)
++ seq_printf(s, ",commit=%d", logd_secs);
++ if (statfs_quantum != 30)
++ seq_printf(s, ",statfs_quantum=%d", statfs_quantum);
++ else if (statfs_slow)
+ seq_puts(s, ",statfs_quantum=0");
+- val = sdp->sd_tune.gt_quota_quantum;
+- if (val != 60)
+- seq_printf(s, ",quota_quantum=%d", val);
++ if (quota_quantum != 60)
++ seq_printf(s, ",quota_quantum=%d", quota_quantum);
+ if (args->ar_statfs_percent)
+ seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
+ if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
+@@ -1376,6 +1385,14 @@ static void gfs2_evict_inode(struct inode *inode)
+ if (inode->i_nlink || sb_rdonly(sb))
+ goto out;
+
++ /*
++ * In case of an incomplete mount, gfs2_evict_inode() may be called for
++ * system files without having an active journal to write to. In that
++ * case, skip the filesystem evict.
++ */
++ if (!sdp->sd_jdesc)
++ goto out;
++
+ gfs2_holder_mark_uninitialized(&gh);
+ ret = evict_should_delete(inode, &gh);
+ if (ret == SHOULD_DEFER_EVICTION)
+@@ -1398,17 +1415,10 @@ out:
+ truncate_inode_pages_final(&inode->i_data);
+ if (ip->i_qadata)
+ gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
+- gfs2_rs_delete(ip, NULL);
++ gfs2_rs_deltree(&ip->i_res);
+ gfs2_ordered_del_inode(ip);
+ clear_inode(inode);
+ gfs2_dir_hash_inval(ip);
+- if (ip->i_gl) {
+- glock_clear_object(ip->i_gl, ip);
+- wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
+- gfs2_glock_add_to_lru(ip->i_gl);
+- gfs2_glock_put_eventually(ip->i_gl);
+- ip->i_gl = NULL;
+- }
+ if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
+ struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+
+@@ -1421,6 +1431,13 @@ out:
+ gfs2_holder_uninit(&ip->i_iopen_gh);
+ gfs2_glock_put_eventually(gl);
+ }
++ if (ip->i_gl) {
++ glock_clear_object(ip->i_gl, ip);
++ wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
++ gfs2_glock_add_to_lru(ip->i_gl);
++ gfs2_glock_put_eventually(ip->i_gl);
++ ip->i_gl = NULL;
++ }
+ }
+
+ static struct inode *gfs2_alloc_inode(struct super_block *sb)
+diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
+index c0a73a6ffb28b..397e02a566970 100644
+--- a/fs/hfs/bnode.c
++++ b/fs/hfs/bnode.c
+@@ -281,6 +281,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
+ tree->node_hash[hash] = node;
+ tree->node_hash_cnt++;
+ } else {
++ hfs_bnode_get(node2);
+ spin_unlock(&tree->hash_lock);
+ kfree(node);
+ wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags));
+diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
+index 4a95a92546a0d..7c9c6d0b38fd6 100644
+--- a/fs/hfs/inode.c
++++ b/fs/hfs/inode.c
+@@ -456,14 +456,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ /* panic? */
+ return -EIO;
+
++ res = -EIO;
++ if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN)
++ goto out;
+ fd.search_key->cat = HFS_I(main_inode)->cat_key;
+ if (hfs_brec_find(&fd))
+- /* panic? */
+ goto out;
+
+ if (S_ISDIR(main_inode->i_mode)) {
+ if (fd.entrylength < sizeof(struct hfs_cat_dir))
+- /* panic? */;
++ goto out;
+ hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
+ sizeof(struct hfs_cat_dir));
+ if (rec.type != HFS_CDR_DIR ||
+@@ -476,6 +478,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
+ sizeof(struct hfs_cat_dir));
+ } else if (HFS_IS_RSRC(inode)) {
++ if (fd.entrylength < sizeof(struct hfs_cat_file))
++ goto out;
+ hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
+ sizeof(struct hfs_cat_file));
+ hfs_inode_write_fork(inode, rec.file.RExtRec,
+@@ -484,7 +488,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ sizeof(struct hfs_cat_file));
+ } else {
+ if (fd.entrylength < sizeof(struct hfs_cat_file))
+- /* panic? */;
++ goto out;
+ hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
+ sizeof(struct hfs_cat_file));
+ if (rec.type != HFS_CDR_FIL ||
+@@ -501,9 +505,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
+ sizeof(struct hfs_cat_file));
+ }
++ res = 0;
+ out:
+ hfs_find_exit(&fd);
+- return 0;
++ return res;
+ }
+
+ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
+diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
+index 39f5e343bf4d4..fdb0edb8a607d 100644
+--- a/fs/hfs/trans.c
++++ b/fs/hfs/trans.c
+@@ -109,7 +109,7 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
+ if (nls_io) {
+ wchar_t ch;
+
+- while (srclen > 0) {
++ while (srclen > 0 && dstlen > 0) {
+ size = nls_io->char2uni(src, srclen, &ch);
+ if (size < 0) {
+ ch = '?';
+diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
+index 1798949f269bb..ebc0d5c678d0c 100644
+--- a/fs/hfsplus/hfsplus_fs.h
++++ b/fs/hfsplus/hfsplus_fs.h
+@@ -198,6 +198,8 @@ struct hfsplus_sb_info {
+ #define HFSPLUS_SB_HFSX 3
+ #define HFSPLUS_SB_CASEFOLD 4
+ #define HFSPLUS_SB_NOBARRIER 5
++#define HFSPLUS_SB_UID 6
++#define HFSPLUS_SB_GID 7
+
+ static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
+ {
+diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
+index 6fef67c2a9f09..87bc222dc9062 100644
+--- a/fs/hfsplus/inode.c
++++ b/fs/hfsplus/inode.c
+@@ -190,11 +190,11 @@ static void hfsplus_get_perms(struct inode *inode,
+ mode = be16_to_cpu(perms->mode);
+
+ i_uid_write(inode, be32_to_cpu(perms->owner));
+- if (!i_uid_read(inode) && !mode)
++ if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode))
+ inode->i_uid = sbi->uid;
+
+ i_gid_write(inode, be32_to_cpu(perms->group));
+- if (!i_gid_read(inode) && !mode)
++ if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode))
+ inode->i_gid = sbi->gid;
+
+ if (dir) {
+@@ -509,8 +509,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
+ if (type == HFSPLUS_FOLDER) {
+ struct hfsplus_cat_folder *folder = &entry.folder;
+
+- if (fd->entrylength < sizeof(struct hfsplus_cat_folder))
+- /* panic? */;
++ if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) {
++ pr_err("bad catalog folder entry\n");
++ res = -EIO;
++ goto out;
++ }
+ hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
+ sizeof(struct hfsplus_cat_folder));
+ hfsplus_get_perms(inode, &folder->permissions, 1);
+@@ -530,8 +533,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
+ } else if (type == HFSPLUS_FILE) {
+ struct hfsplus_cat_file *file = &entry.file;
+
+- if (fd->entrylength < sizeof(struct hfsplus_cat_file))
+- /* panic? */;
++ if (fd->entrylength < sizeof(struct hfsplus_cat_file)) {
++ pr_err("bad catalog file entry\n");
++ res = -EIO;
++ goto out;
++ }
+ hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
+ sizeof(struct hfsplus_cat_file));
+
+@@ -562,6 +568,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
+ pr_err("bad catalog entry used to create inode\n");
+ res = -EIO;
+ }
++out:
+ return res;
+ }
+
+@@ -570,6 +577,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
+ struct inode *main_inode = inode;
+ struct hfs_find_data fd;
+ hfsplus_cat_entry entry;
++ int res = 0;
+
+ if (HFSPLUS_IS_RSRC(inode))
+ main_inode = HFSPLUS_I(inode)->rsrc_inode;
+@@ -588,8 +596,11 @@ int hfsplus_cat_write_inode(struct inode *inode)
+ if (S_ISDIR(main_inode->i_mode)) {
+ struct hfsplus_cat_folder *folder = &entry.folder;
+
+- if (fd.entrylength < sizeof(struct hfsplus_cat_folder))
+- /* panic? */;
++ if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) {
++ pr_err("bad catalog folder entry\n");
++ res = -EIO;
++ goto out;
++ }
+ hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
+ sizeof(struct hfsplus_cat_folder));
+ /* simple node checks? */
+@@ -614,8 +625,11 @@ int hfsplus_cat_write_inode(struct inode *inode)
+ } else {
+ struct hfsplus_cat_file *file = &entry.file;
+
+- if (fd.entrylength < sizeof(struct hfsplus_cat_file))
+- /* panic? */;
++ if (fd.entrylength < sizeof(struct hfsplus_cat_file)) {
++ pr_err("bad catalog file entry\n");
++ res = -EIO;
++ goto out;
++ }
+ hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
+ sizeof(struct hfsplus_cat_file));
+ hfsplus_inode_write_fork(inode, &file->data_fork);
+@@ -636,7 +650,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
+ set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags);
+ out:
+ hfs_find_exit(&fd);
+- return 0;
++ return res;
+ }
+
+ int hfsplus_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
+index 047e05c575601..c94a58762ad6d 100644
+--- a/fs/hfsplus/options.c
++++ b/fs/hfsplus/options.c
+@@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
+ if (!uid_valid(sbi->uid)) {
+ pr_err("invalid uid specified\n");
+ return 0;
++ } else {
++ set_bit(HFSPLUS_SB_UID, &sbi->flags);
+ }
+ break;
+ case opt_gid:
+@@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
+ if (!gid_valid(sbi->gid)) {
+ pr_err("invalid gid specified\n");
+ return 0;
++ } else {
++ set_bit(HFSPLUS_SB_GID, &sbi->flags);
+ }
+ break;
+ case opt_part:
+diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
+index b9e3db3f855f9..392edb60edd07 100644
+--- a/fs/hfsplus/super.c
++++ b/fs/hfsplus/super.c
+@@ -295,11 +295,11 @@ static void hfsplus_put_super(struct super_block *sb)
+ hfsplus_sync_fs(sb, 1);
+ }
+
++ iput(sbi->alloc_file);
++ iput(sbi->hidden_dir);
+ hfs_btree_close(sbi->attr_tree);
+ hfs_btree_close(sbi->cat_tree);
+ hfs_btree_close(sbi->ext_tree);
+- iput(sbi->alloc_file);
+- iput(sbi->hidden_dir);
+ kfree(sbi->s_vhdr_buf);
+ kfree(sbi->s_backup_vhdr_buf);
+ unload_nls(sbi->nls);
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index cdfb1ae78a3f8..352230a011e08 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -206,7 +206,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+- info.high_limit = TASK_SIZE;
++ info.high_limit = arch_get_mmap_end(addr);
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+@@ -222,7 +222,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+- info.high_limit = current->mm->mmap_base;
++ info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+@@ -237,7 +237,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = current->mm->mmap_base;
+- info.high_limit = TASK_SIZE;
++ info.high_limit = arch_get_mmap_end(addr);
+ addr = vm_unmapped_area(&info);
+ }
+
+@@ -251,6 +251,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct hstate *h = hstate_file(file);
++ const unsigned long mmap_end = arch_get_mmap_end(addr);
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+@@ -266,7 +267,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+- if (TASK_SIZE - len >= addr &&
++ if (mmap_end - len >= addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+@@ -360,6 +361,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ } else {
+ unlock_page(page);
+
++ if (PageHWPoison(page)) {
++ put_page(page);
++ retval = -EIO;
++ break;
++ }
++
+ /*
+ * We have the page, copy it to user space buffer.
+ */
+@@ -409,10 +416,11 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
+ struct vm_area_struct *vma;
+
+ /*
+- * end == 0 indicates that the entire range after
+- * start should be unmapped.
++ * end == 0 indicates that the entire range after start should be
++ * unmapped. Note, end is exclusive, whereas the interval tree takes
++ * an inclusive "last".
+ */
+- vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
++ vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
+ unsigned long v_offset;
+ unsigned long v_end;
+
+@@ -982,13 +990,6 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
+ static int hugetlbfs_error_remove_page(struct address_space *mapping,
+ struct page *page)
+ {
+- struct inode *inode = mapping->host;
+- pgoff_t index = page->index;
+-
+- remove_huge_page(page);
+- if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
+- hugetlb_fix_reserve_counts(inode);
+-
+ return 0;
+ }
+
+@@ -1046,12 +1047,12 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ if (sbinfo->spool) {
+ long free_pages;
+
+- spin_lock(&sbinfo->spool->lock);
++ spin_lock_irq(&sbinfo->spool->lock);
+ buf->f_blocks = sbinfo->spool->max_hpages;
+ free_pages = sbinfo->spool->max_hpages
+ - sbinfo->spool->used_hpages;
+ buf->f_bavail = buf->f_bfree = free_pages;
+- spin_unlock(&sbinfo->spool->lock);
++ spin_unlock_irq(&sbinfo->spool->lock);
+ buf->f_files = sbinfo->max_inodes;
+ buf->f_ffree = sbinfo->free_inodes;
+ }
+@@ -1249,7 +1250,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
+
+ case Opt_size:
+ /* memparse() will accept a K/M/G without a digit */
+- if (!isdigit(param->string[0]))
++ if (!param->string || !isdigit(param->string[0]))
+ goto bad_val;
+ ctx->max_size_opt = memparse(param->string, &rest);
+ ctx->max_val_type = SIZE_STD;
+@@ -1259,7 +1260,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
+
+ case Opt_nr_inodes:
+ /* memparse() will accept a K/M/G without a digit */
+- if (!isdigit(param->string[0]))
++ if (!param->string || !isdigit(param->string[0]))
+ goto bad_val;
+ ctx->nr_inodes = memparse(param->string, &rest);
+ return 0;
+@@ -1275,7 +1276,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
+
+ case Opt_min_size:
+ /* memparse() will accept a K/M/G without a digit */
+- if (!isdigit(param->string[0]))
++ if (!param->string || !isdigit(param->string[0]))
+ goto bad_val;
+ ctx->min_size_opt = memparse(param->string, &rest);
+ ctx->min_val_type = SIZE_STD;
+diff --git a/fs/inode.c b/fs/inode.c
+index ed0cab8a32db1..7cb048a3b3bdb 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -27,7 +27,7 @@
+ * Inode locking rules:
+ *
+ * inode->i_lock protects:
+- * inode->i_state, inode->i_hash, __iget()
++ * inode->i_state, inode->i_hash, __iget(), inode->i_io_list
+ * Inode LRU list locks protect:
+ * inode->i_sb->s_inode_lru, inode->i_lru
+ * inode->i_sb->s_inode_list_lock protects:
+@@ -167,8 +167,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
+ inode->i_wb_frn_history = 0;
+ #endif
+
+- if (security_inode_alloc(inode))
+- goto out;
+ spin_lock_init(&inode->i_lock);
+ lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
+
+@@ -205,11 +203,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
+ inode->i_fsnotify_mask = 0;
+ #endif
+ inode->i_flctx = NULL;
++
++ if (unlikely(security_inode_alloc(inode)))
++ return -ENOMEM;
+ this_cpu_inc(nr_inodes);
+
+ return 0;
+-out:
+- return -ENOMEM;
+ }
+ EXPORT_SYMBOL(inode_init_always);
+
+@@ -1024,6 +1023,48 @@ void discard_new_inode(struct inode *inode)
+ }
+ EXPORT_SYMBOL(discard_new_inode);
+
++/**
++ * lock_two_inodes - lock two inodes (may be regular files but also dirs)
++ *
++ * Lock any non-NULL argument. The caller must make sure that if he is passing
++ * in two directories, one is not ancestor of the other. Zero, one or two
++ * objects may be locked by this function.
++ *
++ * @inode1: first inode to lock
++ * @inode2: second inode to lock
++ * @subclass1: inode lock subclass for the first lock obtained
++ * @subclass2: inode lock subclass for the second lock obtained
++ */
++void lock_two_inodes(struct inode *inode1, struct inode *inode2,
++ unsigned subclass1, unsigned subclass2)
++{
++ if (!inode1 || !inode2) {
++ /*
++ * Make sure @subclass1 will be used for the acquired lock.
++ * This is not strictly necessary (no current caller cares) but
++ * let's keep things consistent.
++ */
++ if (!inode1)
++ swap(inode1, inode2);
++ goto lock;
++ }
++
++ /*
++ * If one object is directory and the other is not, we must make sure
++ * to lock directory first as the other object may be its child.
++ */
++ if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
++ if (inode1 > inode2)
++ swap(inode1, inode2);
++ } else if (!S_ISDIR(inode1->i_mode))
++ swap(inode1, inode2);
++lock:
++ if (inode1)
++ inode_lock_nested(inode1, subclass1);
++ if (inode2 && inode2 != inode1)
++ inode_lock_nested(inode2, subclass2);
++}
++
+ /**
+ * lock_two_nondirectories - take two i_mutexes on non-directory objects
+ *
+@@ -1782,12 +1823,13 @@ EXPORT_SYMBOL(generic_update_time);
+ * This does the actual work of updating an inodes time or version. Must have
+ * had called mnt_want_write() before calling this.
+ */
+-static int update_time(struct inode *inode, struct timespec64 *time, int flags)
++int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
+ {
+ if (inode->i_op->update_time)
+ return inode->i_op->update_time(inode, time, flags);
+ return generic_update_time(inode, time, flags);
+ }
++EXPORT_SYMBOL(inode_update_time);
+
+ /**
+ * atime_needs_update - update the access time
+@@ -1857,48 +1899,20 @@ void touch_atime(const struct path *path)
+ * of the fs read only, e.g. subvolumes in Btrfs.
+ */
+ now = current_time(inode);
+- update_time(inode, &now, S_ATIME);
++ inode_update_time(inode, &now, S_ATIME);
+ __mnt_drop_write(mnt);
+ skip_update:
+ sb_end_write(inode->i_sb);
+ }
+ EXPORT_SYMBOL(touch_atime);
+
+-/*
+- * The logic we want is
+- *
+- * if suid or (sgid and xgrp)
+- * remove privs
+- */
+-int should_remove_suid(struct dentry *dentry)
+-{
+- umode_t mode = d_inode(dentry)->i_mode;
+- int kill = 0;
+-
+- /* suid always must be killed */
+- if (unlikely(mode & S_ISUID))
+- kill = ATTR_KILL_SUID;
+-
+- /*
+- * sgid without any exec bits is just a mandatory locking mark; leave
+- * it alone. If some exec bits are set, it's a real sgid; kill it.
+- */
+- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+- kill |= ATTR_KILL_SGID;
+-
+- if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
+- return kill;
+-
+- return 0;
+-}
+-EXPORT_SYMBOL(should_remove_suid);
+-
+ /*
+ * Return mask of changes for notify_change() that need to be done as a
+ * response to write or truncate. Return 0 if nothing has to be changed.
+ * Negative value on error (change should be denied).
+ */
+-int dentry_needs_remove_privs(struct dentry *dentry)
++int dentry_needs_remove_privs(struct user_namespace *mnt_userns,
++ struct dentry *dentry)
+ {
+ struct inode *inode = d_inode(dentry);
+ int mask = 0;
+@@ -1907,7 +1921,7 @@ int dentry_needs_remove_privs(struct dentry *dentry)
+ if (IS_NOSEC(inode))
+ return 0;
+
+- mask = should_remove_suid(dentry);
++ mask = setattr_should_drop_suidgid(mnt_userns, inode);
+ ret = security_inode_need_killpriv(dentry);
+ if (ret < 0)
+ return ret;
+@@ -1949,7 +1963,7 @@ int file_remove_privs(struct file *file)
+ if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
+ return 0;
+
+- kill = dentry_needs_remove_privs(dentry);
++ kill = dentry_needs_remove_privs(file_mnt_user_ns(file), dentry);
+ if (kill < 0)
+ return kill;
+ if (kill)
+@@ -2002,7 +2016,7 @@ int file_update_time(struct file *file)
+ if (__mnt_want_write_file(file))
+ return 0;
+
+- ret = update_time(inode, &now, sync_it);
++ ret = inode_update_time(inode, &now, sync_it);
+ __mnt_drop_write_file(file);
+
+ return ret;
+@@ -2165,10 +2179,6 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
+ /* Directories are special, and always inherit S_ISGID */
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+- else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+- !in_group_p(i_gid_into_mnt(mnt_userns, dir)) &&
+- !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
+- mode &= ~S_ISGID;
+ } else
+ inode_fsgid_set(inode, mnt_userns);
+ inode->i_mode = mode;
+@@ -2324,3 +2334,53 @@ struct timespec64 current_time(struct inode *inode)
+ return timestamp_truncate(now, inode);
+ }
+ EXPORT_SYMBOL(current_time);
++
++/**
++ * in_group_or_capable - check whether caller is CAP_FSETID privileged
++ * @mnt_userns: user namespace of the mount @inode was found from
++ * @inode: inode to check
++ * @gid: the new/current gid of @inode
++ *
++ * Check wether @gid is in the caller's group list or if the caller is
++ * privileged with CAP_FSETID over @inode. This can be used to determine
++ * whether the setgid bit can be kept or must be dropped.
++ *
++ * Return: true if the caller is sufficiently privileged, false if not.
++ */
++bool in_group_or_capable(struct user_namespace *mnt_userns,
++ const struct inode *inode, kgid_t gid)
++{
++ if (in_group_p(gid))
++ return true;
++ if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
++ return true;
++ return false;
++}
++
++/**
++ * mode_strip_sgid - handle the sgid bit for non-directories
++ * @mnt_userns: User namespace of the mount the inode was created from
++ * @dir: parent directory inode
++ * @mode: mode of the file to be created in @dir
++ *
++ * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
++ * raised and @dir has the S_ISGID bit raised ensure that the caller is
++ * either in the group of the parent directory or they have CAP_FSETID
++ * in their user namespace and are privileged over the parent directory.
++ * In all other cases, strip the S_ISGID bit from @mode.
++ *
++ * Return: the new mode to use for the file
++ */
++umode_t mode_strip_sgid(struct user_namespace *mnt_userns,
++ const struct inode *dir, umode_t mode)
++{
++ if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
++ return mode;
++ if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
++ return mode;
++ if (in_group_or_capable(mnt_userns, dir,
++ i_gid_into_mnt(mnt_userns, dir)))
++ return mode;
++ return mode & ~S_ISGID;
++}
++EXPORT_SYMBOL(mode_strip_sgid);
+diff --git a/fs/internal.h b/fs/internal.h
+index 3cd065c8a66b4..d241eaa0c58b2 100644
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -23,22 +23,11 @@ struct pipe_inode_info;
+ #ifdef CONFIG_BLOCK
+ extern void __init bdev_cache_init(void);
+
+-extern int __sync_blockdev(struct block_device *bdev, int wait);
+-void iterate_bdevs(void (*)(struct block_device *, void *), void *);
+ void emergency_thaw_bdev(struct super_block *sb);
+ #else
+ static inline void bdev_cache_init(void)
+ {
+ }
+-
+-static inline int __sync_blockdev(struct block_device *bdev, int wait)
+-{
+- return 0;
+-}
+-static inline void iterate_bdevs(void (*f)(struct block_device *, void *),
+- void *arg)
+-{
+-}
+ static inline int emergency_thaw_bdev(struct super_block *sb)
+ {
+ return 0;
+@@ -69,8 +58,6 @@ extern int finish_clean_context(struct fs_context *fc);
+ */
+ extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ struct path *path, struct path *root);
+-extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+- const char *, unsigned int, struct path *);
+ int do_rmdir(int dfd, struct filename *name);
+ int do_unlinkat(int dfd, struct filename *name);
+ int may_linkat(struct user_namespace *mnt_userns, struct path *link);
+@@ -111,6 +98,16 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
+ extern struct file *alloc_empty_file(int, const struct cred *);
+ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+
++static inline void put_file_access(struct file *file)
++{
++ if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
++ i_readcount_dec(file->f_inode);
++ } else if (file->f_mode & FMODE_WRITER) {
++ put_write_access(file->f_inode);
++ __mnt_drop_write(file->f_path.mnt);
++ }
++}
++
+ /*
+ * super.c
+ */
+@@ -150,7 +147,11 @@ extern int vfs_open(const struct path *, struct file *);
+ */
+ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
+ extern void inode_add_lru(struct inode *inode);
+-extern int dentry_needs_remove_privs(struct dentry *dentry);
++int dentry_needs_remove_privs(struct user_namespace *, struct dentry *dentry);
++bool in_group_or_capable(struct user_namespace *mnt_userns,
++ const struct inode *inode, kgid_t gid);
++void lock_two_inodes(struct inode *inode1, struct inode *inode2,
++ unsigned subclass1, unsigned subclass2);
+
+ /*
+ * fs-writeback.c
+@@ -206,3 +207,31 @@ long splice_file_to_pipe(struct file *in,
+ struct pipe_inode_info *opipe,
+ loff_t *offset,
+ size_t len, unsigned int flags);
++
++/*
++ * fs/xattr.c:
++ */
++struct xattr_name {
++ char name[XATTR_NAME_MAX + 1];
++};
++
++struct xattr_ctx {
++ /* Value of attribute */
++ union {
++ const void __user *cvalue;
++ void __user *value;
++ };
++ void *kvalue;
++ size_t size;
++ /* Attribute name */
++ struct xattr_name *kname;
++ unsigned int flags;
++};
++
++int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
++int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
++ struct xattr_ctx *ctx);
++
++/*
++ * fs/attr.c
++ */
+diff --git a/fs/io-wq.c b/fs/io-wq.c
+deleted file mode 100644
+index 422a7ed6a9bdb..0000000000000
+--- a/fs/io-wq.c
++++ /dev/null
+@@ -1,1322 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Basic worker thread pool for io_uring
+- *
+- * Copyright (C) 2019 Jens Axboe
+- *
+- */
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/errno.h>
+-#include <linux/sched/signal.h>
+-#include <linux/percpu.h>
+-#include <linux/slab.h>
+-#include <linux/rculist_nulls.h>
+-#include <linux/cpu.h>
+-#include <linux/tracehook.h>
+-#include <uapi/linux/io_uring.h>
+-
+-#include "io-wq.h"
+-
+-#define WORKER_IDLE_TIMEOUT (5 * HZ)
+-
+-enum {
+- IO_WORKER_F_UP = 1, /* up and active */
+- IO_WORKER_F_RUNNING = 2, /* account as running */
+- IO_WORKER_F_FREE = 4, /* worker on free list */
+- IO_WORKER_F_BOUND = 8, /* is doing bounded work */
+-};
+-
+-enum {
+- IO_WQ_BIT_EXIT = 0, /* wq exiting */
+-};
+-
+-enum {
+- IO_ACCT_STALLED_BIT = 0, /* stalled on hash */
+-};
+-
+-/*
+- * One for each thread in a wqe pool
+- */
+-struct io_worker {
+- refcount_t ref;
+- unsigned flags;
+- struct hlist_nulls_node nulls_node;
+- struct list_head all_list;
+- struct task_struct *task;
+- struct io_wqe *wqe;
+-
+- struct io_wq_work *cur_work;
+- spinlock_t lock;
+-
+- struct completion ref_done;
+-
+- unsigned long create_state;
+- struct callback_head create_work;
+- int create_index;
+-
+- union {
+- struct rcu_head rcu;
+- struct work_struct work;
+- };
+-};
+-
+-#if BITS_PER_LONG == 64
+-#define IO_WQ_HASH_ORDER 6
+-#else
+-#define IO_WQ_HASH_ORDER 5
+-#endif
+-
+-#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
+-
+-struct io_wqe_acct {
+- unsigned nr_workers;
+- unsigned max_workers;
+- int index;
+- atomic_t nr_running;
+- struct io_wq_work_list work_list;
+- unsigned long flags;
+-};
+-
+-enum {
+- IO_WQ_ACCT_BOUND,
+- IO_WQ_ACCT_UNBOUND,
+- IO_WQ_ACCT_NR,
+-};
+-
+-/*
+- * Per-node worker thread pool
+- */
+-struct io_wqe {
+- raw_spinlock_t lock;
+- struct io_wqe_acct acct[2];
+-
+- int node;
+-
+- struct hlist_nulls_head free_list;
+- struct list_head all_list;
+-
+- struct wait_queue_entry wait;
+-
+- struct io_wq *wq;
+- struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
+-
+- cpumask_var_t cpu_mask;
+-};
+-
+-/*
+- * Per io_wq state
+- */
+-struct io_wq {
+- unsigned long state;
+-
+- free_work_fn *free_work;
+- io_wq_work_fn *do_work;
+-
+- struct io_wq_hash *hash;
+-
+- atomic_t worker_refs;
+- struct completion worker_done;
+-
+- struct hlist_node cpuhp_node;
+-
+- struct task_struct *task;
+-
+- struct io_wqe *wqes[];
+-};
+-
+-static enum cpuhp_state io_wq_online;
+-
+-struct io_cb_cancel_data {
+- work_cancel_fn *fn;
+- void *data;
+- int nr_running;
+- int nr_pending;
+- bool cancel_all;
+-};
+-
+-static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
+-static void io_wqe_dec_running(struct io_worker *worker);
+-static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
+- struct io_wqe_acct *acct,
+- struct io_cb_cancel_data *match);
+-
+-static bool io_worker_get(struct io_worker *worker)
+-{
+- return refcount_inc_not_zero(&worker->ref);
+-}
+-
+-static void io_worker_release(struct io_worker *worker)
+-{
+- if (refcount_dec_and_test(&worker->ref))
+- complete(&worker->ref_done);
+-}
+-
+-static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound)
+-{
+- return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND];
+-}
+-
+-static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
+- struct io_wq_work *work)
+-{
+- return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND));
+-}
+-
+-static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker)
+-{
+- return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND);
+-}
+-
+-static void io_worker_ref_put(struct io_wq *wq)
+-{
+- if (atomic_dec_and_test(&wq->worker_refs))
+- complete(&wq->worker_done);
+-}
+-
+-static void io_worker_exit(struct io_worker *worker)
+-{
+- struct io_wqe *wqe = worker->wqe;
+-
+- if (refcount_dec_and_test(&worker->ref))
+- complete(&worker->ref_done);
+- wait_for_completion(&worker->ref_done);
+-
+- raw_spin_lock(&wqe->lock);
+- if (worker->flags & IO_WORKER_F_FREE)
+- hlist_nulls_del_rcu(&worker->nulls_node);
+- list_del_rcu(&worker->all_list);
+- preempt_disable();
+- io_wqe_dec_running(worker);
+- worker->flags = 0;
+- current->flags &= ~PF_IO_WORKER;
+- preempt_enable();
+- raw_spin_unlock(&wqe->lock);
+-
+- kfree_rcu(worker, rcu);
+- io_worker_ref_put(wqe->wq);
+- do_exit(0);
+-}
+-
+-static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
+-{
+- if (!wq_list_empty(&acct->work_list) &&
+- !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
+- return true;
+- return false;
+-}
+-
+-/*
+- * Check head of free list for an available worker. If one isn't available,
+- * caller must create one.
+- */
+-static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
+- struct io_wqe_acct *acct)
+- __must_hold(RCU)
+-{
+- struct hlist_nulls_node *n;
+- struct io_worker *worker;
+-
+- /*
+- * Iterate free_list and see if we can find an idle worker to
+- * activate. If a given worker is on the free_list but in the process
+- * of exiting, keep trying.
+- */
+- hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
+- if (!io_worker_get(worker))
+- continue;
+- if (io_wqe_get_acct(worker) != acct) {
+- io_worker_release(worker);
+- continue;
+- }
+- if (wake_up_process(worker->task)) {
+- io_worker_release(worker);
+- return true;
+- }
+- io_worker_release(worker);
+- }
+-
+- return false;
+-}
+-
+-/*
+- * We need a worker. If we find a free one, we're good. If not, and we're
+- * below the max number of workers, create one.
+- */
+-static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
+-{
+- /*
+- * Most likely an attempt to queue unbounded work on an io_wq that
+- * wasn't setup with any unbounded workers.
+- */
+- if (unlikely(!acct->max_workers))
+- pr_warn_once("io-wq is not configured for unbound workers");
+-
+- raw_spin_lock(&wqe->lock);
+- if (acct->nr_workers >= acct->max_workers) {
+- raw_spin_unlock(&wqe->lock);
+- return true;
+- }
+- acct->nr_workers++;
+- raw_spin_unlock(&wqe->lock);
+- atomic_inc(&acct->nr_running);
+- atomic_inc(&wqe->wq->worker_refs);
+- return create_io_worker(wqe->wq, wqe, acct->index);
+-}
+-
+-static void io_wqe_inc_running(struct io_worker *worker)
+-{
+- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+-
+- atomic_inc(&acct->nr_running);
+-}
+-
+-static void create_worker_cb(struct callback_head *cb)
+-{
+- struct io_worker *worker;
+- struct io_wq *wq;
+- struct io_wqe *wqe;
+- struct io_wqe_acct *acct;
+- bool do_create = false;
+-
+- worker = container_of(cb, struct io_worker, create_work);
+- wqe = worker->wqe;
+- wq = wqe->wq;
+- acct = &wqe->acct[worker->create_index];
+- raw_spin_lock(&wqe->lock);
+- if (acct->nr_workers < acct->max_workers) {
+- acct->nr_workers++;
+- do_create = true;
+- }
+- raw_spin_unlock(&wqe->lock);
+- if (do_create) {
+- create_io_worker(wq, wqe, worker->create_index);
+- } else {
+- atomic_dec(&acct->nr_running);
+- io_worker_ref_put(wq);
+- }
+- clear_bit_unlock(0, &worker->create_state);
+- io_worker_release(worker);
+-}
+-
+-static bool io_queue_worker_create(struct io_worker *worker,
+- struct io_wqe_acct *acct,
+- task_work_func_t func)
+-{
+- struct io_wqe *wqe = worker->wqe;
+- struct io_wq *wq = wqe->wq;
+-
+- /* raced with exit, just ignore create call */
+- if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
+- goto fail;
+- if (!io_worker_get(worker))
+- goto fail;
+- /*
+- * create_state manages ownership of create_work/index. We should
+- * only need one entry per worker, as the worker going to sleep
+- * will trigger the condition, and waking will clear it once it
+- * runs the task_work.
+- */
+- if (test_bit(0, &worker->create_state) ||
+- test_and_set_bit_lock(0, &worker->create_state))
+- goto fail_release;
+-
+- init_task_work(&worker->create_work, func);
+- worker->create_index = acct->index;
+- if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
+- return true;
+- clear_bit_unlock(0, &worker->create_state);
+-fail_release:
+- io_worker_release(worker);
+-fail:
+- atomic_dec(&acct->nr_running);
+- io_worker_ref_put(wq);
+- return false;
+-}
+-
+-static void io_wqe_dec_running(struct io_worker *worker)
+- __must_hold(wqe->lock)
+-{
+- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+- struct io_wqe *wqe = worker->wqe;
+-
+- if (!(worker->flags & IO_WORKER_F_UP))
+- return;
+-
+- if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
+- atomic_inc(&acct->nr_running);
+- atomic_inc(&wqe->wq->worker_refs);
+- io_queue_worker_create(worker, acct, create_worker_cb);
+- }
+-}
+-
+-/*
+- * Worker will start processing some work. Move it to the busy list, if
+- * it's currently on the freelist
+- */
+-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
+- struct io_wq_work *work)
+- __must_hold(wqe->lock)
+-{
+- if (worker->flags & IO_WORKER_F_FREE) {
+- worker->flags &= ~IO_WORKER_F_FREE;
+- hlist_nulls_del_init_rcu(&worker->nulls_node);
+- }
+-}
+-
+-/*
+- * No work, worker going to sleep. Move to freelist, and unuse mm if we
+- * have one attached. Dropping the mm may potentially sleep, so we drop
+- * the lock in that case and return success. Since the caller has to
+- * retry the loop in that case (we changed task state), we don't regrab
+- * the lock if we return success.
+- */
+-static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
+- __must_hold(wqe->lock)
+-{
+- if (!(worker->flags & IO_WORKER_F_FREE)) {
+- worker->flags |= IO_WORKER_F_FREE;
+- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+- }
+-}
+-
+-static inline unsigned int io_get_work_hash(struct io_wq_work *work)
+-{
+- return work->flags >> IO_WQ_HASH_SHIFT;
+-}
+-
+-static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+-{
+- struct io_wq *wq = wqe->wq;
+-
+- spin_lock_irq(&wq->hash->wait.lock);
+- if (list_empty(&wqe->wait.entry)) {
+- __add_wait_queue(&wq->hash->wait, &wqe->wait);
+- if (!test_bit(hash, &wq->hash->map)) {
+- __set_current_state(TASK_RUNNING);
+- list_del_init(&wqe->wait.entry);
+- }
+- }
+- spin_unlock_irq(&wq->hash->wait.lock);
+-}
+-
+-static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
+- struct io_worker *worker)
+- __must_hold(wqe->lock)
+-{
+- struct io_wq_work_node *node, *prev;
+- struct io_wq_work *work, *tail;
+- unsigned int stall_hash = -1U;
+- struct io_wqe *wqe = worker->wqe;
+-
+- wq_list_for_each(node, prev, &acct->work_list) {
+- unsigned int hash;
+-
+- work = container_of(node, struct io_wq_work, list);
+-
+- /* not hashed, can run anytime */
+- if (!io_wq_is_hashed(work)) {
+- wq_list_del(&acct->work_list, node, prev);
+- return work;
+- }
+-
+- hash = io_get_work_hash(work);
+- /* all items with this hash lie in [work, tail] */
+- tail = wqe->hash_tail[hash];
+-
+- /* hashed, can run if not already running */
+- if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
+- wqe->hash_tail[hash] = NULL;
+- wq_list_cut(&acct->work_list, &tail->list, prev);
+- return work;
+- }
+- if (stall_hash == -1U)
+- stall_hash = hash;
+- /* fast forward to a next hash, for-each will fix up @prev */
+- node = &tail->list;
+- }
+-
+- if (stall_hash != -1U) {
+- /*
+- * Set this before dropping the lock to avoid racing with new
+- * work being added and clearing the stalled bit.
+- */
+- set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+- raw_spin_unlock(&wqe->lock);
+- io_wait_on_hash(wqe, stall_hash);
+- raw_spin_lock(&wqe->lock);
+- }
+-
+- return NULL;
+-}
+-
+-static bool io_flush_signals(void)
+-{
+- if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
+- __set_current_state(TASK_RUNNING);
+- tracehook_notify_signal();
+- return true;
+- }
+- return false;
+-}
+-
+-static void io_assign_current_work(struct io_worker *worker,
+- struct io_wq_work *work)
+-{
+- if (work) {
+- io_flush_signals();
+- cond_resched();
+- }
+-
+- spin_lock(&worker->lock);
+- worker->cur_work = work;
+- spin_unlock(&worker->lock);
+-}
+-
+-static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
+-
+-static void io_worker_handle_work(struct io_worker *worker)
+- __releases(wqe->lock)
+-{
+- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+- struct io_wqe *wqe = worker->wqe;
+- struct io_wq *wq = wqe->wq;
+- bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
+-
+- do {
+- struct io_wq_work *work;
+-get_next:
+- /*
+- * If we got some work, mark us as busy. If we didn't, but
+- * the list isn't empty, it means we stalled on hashed work.
+- * Mark us stalled so we don't keep looking for work when we
+- * can't make progress, any work completion or insertion will
+- * clear the stalled flag.
+- */
+- work = io_get_next_work(acct, worker);
+- if (work)
+- __io_worker_busy(wqe, worker, work);
+-
+- raw_spin_unlock(&wqe->lock);
+- if (!work)
+- break;
+- io_assign_current_work(worker, work);
+- __set_current_state(TASK_RUNNING);
+-
+- /* handle a whole dependent link */
+- do {
+- struct io_wq_work *next_hashed, *linked;
+- unsigned int hash = io_get_work_hash(work);
+-
+- next_hashed = wq_next_work(work);
+-
+- if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND))
+- work->flags |= IO_WQ_WORK_CANCEL;
+- wq->do_work(work);
+- io_assign_current_work(worker, NULL);
+-
+- linked = wq->free_work(work);
+- work = next_hashed;
+- if (!work && linked && !io_wq_is_hashed(linked)) {
+- work = linked;
+- linked = NULL;
+- }
+- io_assign_current_work(worker, work);
+- if (linked)
+- io_wqe_enqueue(wqe, linked);
+-
+- if (hash != -1U && !next_hashed) {
+- clear_bit(hash, &wq->hash->map);
+- clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+- if (wq_has_sleeper(&wq->hash->wait))
+- wake_up(&wq->hash->wait);
+- raw_spin_lock(&wqe->lock);
+- /* skip unnecessary unlock-lock wqe->lock */
+- if (!work)
+- goto get_next;
+- raw_spin_unlock(&wqe->lock);
+- }
+- } while (work);
+-
+- raw_spin_lock(&wqe->lock);
+- } while (1);
+-}
+-
+-static int io_wqe_worker(void *data)
+-{
+- struct io_worker *worker = data;
+- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+- struct io_wqe *wqe = worker->wqe;
+- struct io_wq *wq = wqe->wq;
+- bool last_timeout = false;
+- char buf[TASK_COMM_LEN];
+-
+- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
+-
+- snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
+- set_task_comm(current, buf);
+-
+- while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+- long ret;
+-
+- set_current_state(TASK_INTERRUPTIBLE);
+-loop:
+- raw_spin_lock(&wqe->lock);
+- if (io_acct_run_queue(acct)) {
+- io_worker_handle_work(worker);
+- goto loop;
+- }
+- /* timed out, exit unless we're the last worker */
+- if (last_timeout && acct->nr_workers > 1) {
+- acct->nr_workers--;
+- raw_spin_unlock(&wqe->lock);
+- __set_current_state(TASK_RUNNING);
+- break;
+- }
+- last_timeout = false;
+- __io_worker_idle(wqe, worker);
+- raw_spin_unlock(&wqe->lock);
+- if (io_flush_signals())
+- continue;
+- ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
+- if (signal_pending(current)) {
+- struct ksignal ksig;
+-
+- if (!get_signal(&ksig))
+- continue;
+- break;
+- }
+- last_timeout = !ret;
+- }
+-
+- if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+- raw_spin_lock(&wqe->lock);
+- io_worker_handle_work(worker);
+- }
+-
+- io_worker_exit(worker);
+- return 0;
+-}
+-
+-/*
+- * Called when a worker is scheduled in. Mark us as currently running.
+- */
+-void io_wq_worker_running(struct task_struct *tsk)
+-{
+- struct io_worker *worker = tsk->pf_io_worker;
+-
+- if (!worker)
+- return;
+- if (!(worker->flags & IO_WORKER_F_UP))
+- return;
+- if (worker->flags & IO_WORKER_F_RUNNING)
+- return;
+- worker->flags |= IO_WORKER_F_RUNNING;
+- io_wqe_inc_running(worker);
+-}
+-
+-/*
+- * Called when worker is going to sleep. If there are no workers currently
+- * running and we have work pending, wake up a free one or create a new one.
+- */
+-void io_wq_worker_sleeping(struct task_struct *tsk)
+-{
+- struct io_worker *worker = tsk->pf_io_worker;
+-
+- if (!worker)
+- return;
+- if (!(worker->flags & IO_WORKER_F_UP))
+- return;
+- if (!(worker->flags & IO_WORKER_F_RUNNING))
+- return;
+-
+- worker->flags &= ~IO_WORKER_F_RUNNING;
+-
+- raw_spin_lock(&worker->wqe->lock);
+- io_wqe_dec_running(worker);
+- raw_spin_unlock(&worker->wqe->lock);
+-}
+-
+-static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
+- struct task_struct *tsk)
+-{
+- tsk->pf_io_worker = worker;
+- worker->task = tsk;
+- set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
+- tsk->flags |= PF_NO_SETAFFINITY;
+-
+- raw_spin_lock(&wqe->lock);
+- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+- list_add_tail_rcu(&worker->all_list, &wqe->all_list);
+- worker->flags |= IO_WORKER_F_FREE;
+- raw_spin_unlock(&wqe->lock);
+- wake_up_new_task(tsk);
+-}
+-
+-static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
+-{
+- return true;
+-}
+-
+-static inline bool io_should_retry_thread(long err)
+-{
+- switch (err) {
+- case -EAGAIN:
+- case -ERESTARTSYS:
+- case -ERESTARTNOINTR:
+- case -ERESTARTNOHAND:
+- return true;
+- default:
+- return false;
+- }
+-}
+-
+-static void create_worker_cont(struct callback_head *cb)
+-{
+- struct io_worker *worker;
+- struct task_struct *tsk;
+- struct io_wqe *wqe;
+-
+- worker = container_of(cb, struct io_worker, create_work);
+- clear_bit_unlock(0, &worker->create_state);
+- wqe = worker->wqe;
+- tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+- if (!IS_ERR(tsk)) {
+- io_init_new_worker(wqe, worker, tsk);
+- io_worker_release(worker);
+- return;
+- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+-
+- atomic_dec(&acct->nr_running);
+- raw_spin_lock(&wqe->lock);
+- acct->nr_workers--;
+- if (!acct->nr_workers) {
+- struct io_cb_cancel_data match = {
+- .fn = io_wq_work_match_all,
+- .cancel_all = true,
+- };
+-
+- while (io_acct_cancel_pending_work(wqe, acct, &match))
+- raw_spin_lock(&wqe->lock);
+- }
+- raw_spin_unlock(&wqe->lock);
+- io_worker_ref_put(wqe->wq);
+- kfree(worker);
+- return;
+- }
+-
+- /* re-create attempts grab a new worker ref, drop the existing one */
+- io_worker_release(worker);
+- schedule_work(&worker->work);
+-}
+-
+-static void io_workqueue_create(struct work_struct *work)
+-{
+- struct io_worker *worker = container_of(work, struct io_worker, work);
+- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+-
+- if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
+- clear_bit_unlock(0, &worker->create_state);
+- io_worker_release(worker);
+- kfree(worker);
+- }
+-}
+-
+-static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+-{
+- struct io_wqe_acct *acct = &wqe->acct[index];
+- struct io_worker *worker;
+- struct task_struct *tsk;
+-
+- __set_current_state(TASK_RUNNING);
+-
+- worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
+- if (!worker) {
+-fail:
+- atomic_dec(&acct->nr_running);
+- raw_spin_lock(&wqe->lock);
+- acct->nr_workers--;
+- raw_spin_unlock(&wqe->lock);
+- io_worker_ref_put(wq);
+- return false;
+- }
+-
+- refcount_set(&worker->ref, 1);
+- worker->wqe = wqe;
+- spin_lock_init(&worker->lock);
+- init_completion(&worker->ref_done);
+-
+- if (index == IO_WQ_ACCT_BOUND)
+- worker->flags |= IO_WORKER_F_BOUND;
+-
+- tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+- if (!IS_ERR(tsk)) {
+- io_init_new_worker(wqe, worker, tsk);
+- } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+- kfree(worker);
+- goto fail;
+- } else {
+- INIT_WORK(&worker->work, io_workqueue_create);
+- schedule_work(&worker->work);
+- }
+-
+- return true;
+-}
+-
+-/*
+- * Iterate the passed in list and call the specific function for each
+- * worker that isn't exiting
+- */
+-static bool io_wq_for_each_worker(struct io_wqe *wqe,
+- bool (*func)(struct io_worker *, void *),
+- void *data)
+-{
+- struct io_worker *worker;
+- bool ret = false;
+-
+- list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
+- if (io_worker_get(worker)) {
+- /* no task if node is/was offline */
+- if (worker->task)
+- ret = func(worker, data);
+- io_worker_release(worker);
+- if (ret)
+- break;
+- }
+- }
+-
+- return ret;
+-}
+-
+-static bool io_wq_worker_wake(struct io_worker *worker, void *data)
+-{
+- set_notify_signal(worker->task);
+- wake_up_process(worker->task);
+- return false;
+-}
+-
+-static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
+-{
+- struct io_wq *wq = wqe->wq;
+-
+- do {
+- work->flags |= IO_WQ_WORK_CANCEL;
+- wq->do_work(work);
+- work = wq->free_work(work);
+- } while (work);
+-}
+-
+-static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+-{
+- struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+- unsigned int hash;
+- struct io_wq_work *tail;
+-
+- if (!io_wq_is_hashed(work)) {
+-append:
+- wq_list_add_tail(&work->list, &acct->work_list);
+- return;
+- }
+-
+- hash = io_get_work_hash(work);
+- tail = wqe->hash_tail[hash];
+- wqe->hash_tail[hash] = work;
+- if (!tail)
+- goto append;
+-
+- wq_list_add_after(&work->list, &tail->list, &acct->work_list);
+-}
+-
+-static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
+-{
+- return work == data;
+-}
+-
+-static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+-{
+- struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+- unsigned work_flags = work->flags;
+- bool do_create;
+-
+- /*
+- * If io-wq is exiting for this task, or if the request has explicitly
+- * been marked as one that should not get executed, cancel it here.
+- */
+- if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
+- (work->flags & IO_WQ_WORK_CANCEL)) {
+- io_run_cancel(work, wqe);
+- return;
+- }
+-
+- raw_spin_lock(&wqe->lock);
+- io_wqe_insert_work(wqe, work);
+- clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+-
+- rcu_read_lock();
+- do_create = !io_wqe_activate_free_worker(wqe, acct);
+- rcu_read_unlock();
+-
+- raw_spin_unlock(&wqe->lock);
+-
+- if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
+- !atomic_read(&acct->nr_running))) {
+- bool did_create;
+-
+- did_create = io_wqe_create_worker(wqe, acct);
+- if (likely(did_create))
+- return;
+-
+- raw_spin_lock(&wqe->lock);
+- /* fatal condition, failed to create the first worker */
+- if (!acct->nr_workers) {
+- struct io_cb_cancel_data match = {
+- .fn = io_wq_work_match_item,
+- .data = work,
+- .cancel_all = false,
+- };
+-
+- if (io_acct_cancel_pending_work(wqe, acct, &match))
+- raw_spin_lock(&wqe->lock);
+- }
+- raw_spin_unlock(&wqe->lock);
+- }
+-}
+-
+-void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
+-{
+- struct io_wqe *wqe = wq->wqes[numa_node_id()];
+-
+- io_wqe_enqueue(wqe, work);
+-}
+-
+-/*
+- * Work items that hash to the same value will not be done in parallel.
+- * Used to limit concurrent writes, generally hashed by inode.
+- */
+-void io_wq_hash_work(struct io_wq_work *work, void *val)
+-{
+- unsigned int bit;
+-
+- bit = hash_ptr(val, IO_WQ_HASH_ORDER);
+- work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
+-}
+-
+-static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
+-{
+- struct io_cb_cancel_data *match = data;
+-
+- /*
+- * Hold the lock to avoid ->cur_work going out of scope, caller
+- * may dereference the passed in work.
+- */
+- spin_lock(&worker->lock);
+- if (worker->cur_work &&
+- match->fn(worker->cur_work, match->data)) {
+- set_notify_signal(worker->task);
+- match->nr_running++;
+- }
+- spin_unlock(&worker->lock);
+-
+- return match->nr_running && !match->cancel_all;
+-}
+-
+-static inline void io_wqe_remove_pending(struct io_wqe *wqe,
+- struct io_wq_work *work,
+- struct io_wq_work_node *prev)
+-{
+- struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+- unsigned int hash = io_get_work_hash(work);
+- struct io_wq_work *prev_work = NULL;
+-
+- if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) {
+- if (prev)
+- prev_work = container_of(prev, struct io_wq_work, list);
+- if (prev_work && io_get_work_hash(prev_work) == hash)
+- wqe->hash_tail[hash] = prev_work;
+- else
+- wqe->hash_tail[hash] = NULL;
+- }
+- wq_list_del(&acct->work_list, &work->list, prev);
+-}
+-
+-static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
+- struct io_wqe_acct *acct,
+- struct io_cb_cancel_data *match)
+- __releases(wqe->lock)
+-{
+- struct io_wq_work_node *node, *prev;
+- struct io_wq_work *work;
+-
+- wq_list_for_each(node, prev, &acct->work_list) {
+- work = container_of(node, struct io_wq_work, list);
+- if (!match->fn(work, match->data))
+- continue;
+- io_wqe_remove_pending(wqe, work, prev);
+- raw_spin_unlock(&wqe->lock);
+- io_run_cancel(work, wqe);
+- match->nr_pending++;
+- /* not safe to continue after unlock */
+- return true;
+- }
+-
+- return false;
+-}
+-
+-static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+- struct io_cb_cancel_data *match)
+-{
+- int i;
+-retry:
+- raw_spin_lock(&wqe->lock);
+- for (i = 0; i < IO_WQ_ACCT_NR; i++) {
+- struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
+-
+- if (io_acct_cancel_pending_work(wqe, acct, match)) {
+- if (match->cancel_all)
+- goto retry;
+- return;
+- }
+- }
+- raw_spin_unlock(&wqe->lock);
+-}
+-
+-static void io_wqe_cancel_running_work(struct io_wqe *wqe,
+- struct io_cb_cancel_data *match)
+-{
+- rcu_read_lock();
+- io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
+- rcu_read_unlock();
+-}
+-
+-enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
+- void *data, bool cancel_all)
+-{
+- struct io_cb_cancel_data match = {
+- .fn = cancel,
+- .data = data,
+- .cancel_all = cancel_all,
+- };
+- int node;
+-
+- /*
+- * First check pending list, if we're lucky we can just remove it
+- * from there. CANCEL_OK means that the work is returned as-new,
+- * no completion will be posted for it.
+- */
+- for_each_node(node) {
+- struct io_wqe *wqe = wq->wqes[node];
+-
+- io_wqe_cancel_pending_work(wqe, &match);
+- if (match.nr_pending && !match.cancel_all)
+- return IO_WQ_CANCEL_OK;
+- }
+-
+- /*
+- * Now check if a free (going busy) or busy worker has the work
+- * currently running. If we find it there, we'll return CANCEL_RUNNING
+- * as an indication that we attempt to signal cancellation. The
+- * completion will run normally in this case.
+- */
+- for_each_node(node) {
+- struct io_wqe *wqe = wq->wqes[node];
+-
+- io_wqe_cancel_running_work(wqe, &match);
+- if (match.nr_running && !match.cancel_all)
+- return IO_WQ_CANCEL_RUNNING;
+- }
+-
+- if (match.nr_running)
+- return IO_WQ_CANCEL_RUNNING;
+- if (match.nr_pending)
+- return IO_WQ_CANCEL_OK;
+- return IO_WQ_CANCEL_NOTFOUND;
+-}
+-
+-static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
+- int sync, void *key)
+-{
+- struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
+- int i;
+-
+- list_del_init(&wait->entry);
+-
+- rcu_read_lock();
+- for (i = 0; i < IO_WQ_ACCT_NR; i++) {
+- struct io_wqe_acct *acct = &wqe->acct[i];
+-
+- if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
+- io_wqe_activate_free_worker(wqe, acct);
+- }
+- rcu_read_unlock();
+- return 1;
+-}
+-
+-struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
+-{
+- int ret, node, i;
+- struct io_wq *wq;
+-
+- if (WARN_ON_ONCE(!data->free_work || !data->do_work))
+- return ERR_PTR(-EINVAL);
+- if (WARN_ON_ONCE(!bounded))
+- return ERR_PTR(-EINVAL);
+-
+- wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL);
+- if (!wq)
+- return ERR_PTR(-ENOMEM);
+- ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+- if (ret)
+- goto err_wq;
+-
+- refcount_inc(&data->hash->refs);
+- wq->hash = data->hash;
+- wq->free_work = data->free_work;
+- wq->do_work = data->do_work;
+-
+- ret = -ENOMEM;
+- for_each_node(node) {
+- struct io_wqe *wqe;
+- int alloc_node = node;
+-
+- if (!node_online(alloc_node))
+- alloc_node = NUMA_NO_NODE;
+- wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
+- if (!wqe)
+- goto err;
+- if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL))
+- goto err;
+- cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
+- wq->wqes[node] = wqe;
+- wqe->node = alloc_node;
+- wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
+- wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+- task_rlimit(current, RLIMIT_NPROC);
+- INIT_LIST_HEAD(&wqe->wait.entry);
+- wqe->wait.func = io_wqe_hash_wake;
+- for (i = 0; i < IO_WQ_ACCT_NR; i++) {
+- struct io_wqe_acct *acct = &wqe->acct[i];
+-
+- acct->index = i;
+- atomic_set(&acct->nr_running, 0);
+- INIT_WQ_LIST(&acct->work_list);
+- }
+- wqe->wq = wq;
+- raw_spin_lock_init(&wqe->lock);
+- INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
+- INIT_LIST_HEAD(&wqe->all_list);
+- }
+-
+- wq->task = get_task_struct(data->task);
+- atomic_set(&wq->worker_refs, 1);
+- init_completion(&wq->worker_done);
+- return wq;
+-err:
+- io_wq_put_hash(data->hash);
+- cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+- for_each_node(node) {
+- if (!wq->wqes[node])
+- continue;
+- free_cpumask_var(wq->wqes[node]->cpu_mask);
+- kfree(wq->wqes[node]);
+- }
+-err_wq:
+- kfree(wq);
+- return ERR_PTR(ret);
+-}
+-
+-static bool io_task_work_match(struct callback_head *cb, void *data)
+-{
+- struct io_worker *worker;
+-
+- if (cb->func != create_worker_cb && cb->func != create_worker_cont)
+- return false;
+- worker = container_of(cb, struct io_worker, create_work);
+- return worker->wqe->wq == data;
+-}
+-
+-void io_wq_exit_start(struct io_wq *wq)
+-{
+- set_bit(IO_WQ_BIT_EXIT, &wq->state);
+-}
+-
+-static void io_wq_exit_workers(struct io_wq *wq)
+-{
+- struct callback_head *cb;
+- int node;
+-
+- if (!wq->task)
+- return;
+-
+- while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
+- struct io_worker *worker;
+- struct io_wqe_acct *acct;
+-
+- worker = container_of(cb, struct io_worker, create_work);
+- acct = io_wqe_get_acct(worker);
+- atomic_dec(&acct->nr_running);
+- raw_spin_lock(&worker->wqe->lock);
+- acct->nr_workers--;
+- raw_spin_unlock(&worker->wqe->lock);
+- io_worker_ref_put(wq);
+- clear_bit_unlock(0, &worker->create_state);
+- io_worker_release(worker);
+- }
+-
+- rcu_read_lock();
+- for_each_node(node) {
+- struct io_wqe *wqe = wq->wqes[node];
+-
+- io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL);
+- }
+- rcu_read_unlock();
+- io_worker_ref_put(wq);
+- wait_for_completion(&wq->worker_done);
+-
+- for_each_node(node) {
+- spin_lock_irq(&wq->hash->wait.lock);
+- list_del_init(&wq->wqes[node]->wait.entry);
+- spin_unlock_irq(&wq->hash->wait.lock);
+- }
+- put_task_struct(wq->task);
+- wq->task = NULL;
+-}
+-
+-static void io_wq_destroy(struct io_wq *wq)
+-{
+- int node;
+-
+- cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+-
+- for_each_node(node) {
+- struct io_wqe *wqe = wq->wqes[node];
+- struct io_cb_cancel_data match = {
+- .fn = io_wq_work_match_all,
+- .cancel_all = true,
+- };
+- io_wqe_cancel_pending_work(wqe, &match);
+- free_cpumask_var(wqe->cpu_mask);
+- kfree(wqe);
+- }
+- io_wq_put_hash(wq->hash);
+- kfree(wq);
+-}
+-
+-void io_wq_put_and_exit(struct io_wq *wq)
+-{
+- WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+-
+- io_wq_exit_workers(wq);
+- io_wq_destroy(wq);
+-}
+-
+-struct online_data {
+- unsigned int cpu;
+- bool online;
+-};
+-
+-static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
+-{
+- struct online_data *od = data;
+-
+- if (od->online)
+- cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask);
+- else
+- cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask);
+- return false;
+-}
+-
+-static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online)
+-{
+- struct online_data od = {
+- .cpu = cpu,
+- .online = online
+- };
+- int i;
+-
+- rcu_read_lock();
+- for_each_node(i)
+- io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od);
+- rcu_read_unlock();
+- return 0;
+-}
+-
+-static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+-{
+- struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+-
+- return __io_wq_cpu_online(wq, cpu, true);
+-}
+-
+-static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
+-{
+- struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+-
+- return __io_wq_cpu_online(wq, cpu, false);
+-}
+-
+-int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
+-{
+- int i;
+-
+- rcu_read_lock();
+- for_each_node(i) {
+- struct io_wqe *wqe = wq->wqes[i];
+-
+- if (mask)
+- cpumask_copy(wqe->cpu_mask, mask);
+- else
+- cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
+- }
+- rcu_read_unlock();
+- return 0;
+-}
+-
+-/*
+- * Set max number of unbounded workers, returns old value. If new_count is 0,
+- * then just return the old value.
+- */
+-int io_wq_max_workers(struct io_wq *wq, int *new_count)
+-{
+- int i, node, prev = 0;
+-
+- BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND);
+- BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
+- BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2);
+-
+- for (i = 0; i < 2; i++) {
+- if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
+- new_count[i] = task_rlimit(current, RLIMIT_NPROC);
+- }
+-
+- rcu_read_lock();
+- for_each_node(node) {
+- struct io_wqe *wqe = wq->wqes[node];
+- struct io_wqe_acct *acct;
+-
+- raw_spin_lock(&wqe->lock);
+- for (i = 0; i < IO_WQ_ACCT_NR; i++) {
+- acct = &wqe->acct[i];
+- prev = max_t(int, acct->max_workers, prev);
+- if (new_count[i])
+- acct->max_workers = new_count[i];
+- new_count[i] = prev;
+- }
+- raw_spin_unlock(&wqe->lock);
+- }
+- rcu_read_unlock();
+- return 0;
+-}
+-
+-static __init int io_wq_init(void)
+-{
+- int ret;
+-
+- ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
+- io_wq_cpu_online, io_wq_cpu_offline);
+- if (ret < 0)
+- return ret;
+- io_wq_online = ret;
+- return 0;
+-}
+-subsys_initcall(io_wq_init);
+diff --git a/fs/io-wq.h b/fs/io-wq.h
+deleted file mode 100644
+index bf5c4c5337605..0000000000000
+--- a/fs/io-wq.h
++++ /dev/null
+@@ -1,160 +0,0 @@
+-#ifndef INTERNAL_IO_WQ_H
+-#define INTERNAL_IO_WQ_H
+-
+-#include <linux/refcount.h>
+-
+-struct io_wq;
+-
+-enum {
+- IO_WQ_WORK_CANCEL = 1,
+- IO_WQ_WORK_HASHED = 2,
+- IO_WQ_WORK_UNBOUND = 4,
+- IO_WQ_WORK_CONCURRENT = 16,
+-
+- IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
+-};
+-
+-enum io_wq_cancel {
+- IO_WQ_CANCEL_OK, /* cancelled before started */
+- IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */
+- IO_WQ_CANCEL_NOTFOUND, /* work not found */
+-};
+-
+-struct io_wq_work_node {
+- struct io_wq_work_node *next;
+-};
+-
+-struct io_wq_work_list {
+- struct io_wq_work_node *first;
+- struct io_wq_work_node *last;
+-};
+-
+-static inline void wq_list_add_after(struct io_wq_work_node *node,
+- struct io_wq_work_node *pos,
+- struct io_wq_work_list *list)
+-{
+- struct io_wq_work_node *next = pos->next;
+-
+- pos->next = node;
+- node->next = next;
+- if (!next)
+- list->last = node;
+-}
+-
+-static inline void wq_list_add_tail(struct io_wq_work_node *node,
+- struct io_wq_work_list *list)
+-{
+- node->next = NULL;
+- if (!list->first) {
+- list->last = node;
+- WRITE_ONCE(list->first, node);
+- } else {
+- list->last->next = node;
+- list->last = node;
+- }
+-}
+-
+-static inline void wq_list_cut(struct io_wq_work_list *list,
+- struct io_wq_work_node *last,
+- struct io_wq_work_node *prev)
+-{
+- /* first in the list, if prev==NULL */
+- if (!prev)
+- WRITE_ONCE(list->first, last->next);
+- else
+- prev->next = last->next;
+-
+- if (last == list->last)
+- list->last = prev;
+- last->next = NULL;
+-}
+-
+-static inline void wq_list_del(struct io_wq_work_list *list,
+- struct io_wq_work_node *node,
+- struct io_wq_work_node *prev)
+-{
+- wq_list_cut(list, node, prev);
+-}
+-
+-#define wq_list_for_each(pos, prv, head) \
+- for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
+-
+-#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
+-#define INIT_WQ_LIST(list) do { \
+- (list)->first = NULL; \
+- (list)->last = NULL; \
+-} while (0)
+-
+-struct io_wq_work {
+- struct io_wq_work_node list;
+- unsigned flags;
+-};
+-
+-static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
+-{
+- if (!work->list.next)
+- return NULL;
+-
+- return container_of(work->list.next, struct io_wq_work, list);
+-}
+-
+-typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
+-typedef void (io_wq_work_fn)(struct io_wq_work *);
+-
+-struct io_wq_hash {
+- refcount_t refs;
+- unsigned long map;
+- struct wait_queue_head wait;
+-};
+-
+-static inline void io_wq_put_hash(struct io_wq_hash *hash)
+-{
+- if (refcount_dec_and_test(&hash->refs))
+- kfree(hash);
+-}
+-
+-struct io_wq_data {
+- struct io_wq_hash *hash;
+- struct task_struct *task;
+- io_wq_work_fn *do_work;
+- free_work_fn *free_work;
+-};
+-
+-struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
+-void io_wq_exit_start(struct io_wq *wq);
+-void io_wq_put_and_exit(struct io_wq *wq);
+-
+-void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
+-void io_wq_hash_work(struct io_wq_work *work, void *val);
+-
+-int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
+-int io_wq_max_workers(struct io_wq *wq, int *new_count);
+-
+-static inline bool io_wq_is_hashed(struct io_wq_work *work)
+-{
+- return work->flags & IO_WQ_WORK_HASHED;
+-}
+-
+-typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
+-
+-enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
+- void *data, bool cancel_all);
+-
+-#if defined(CONFIG_IO_WQ)
+-extern void io_wq_worker_sleeping(struct task_struct *);
+-extern void io_wq_worker_running(struct task_struct *);
+-#else
+-static inline void io_wq_worker_sleeping(struct task_struct *tsk)
+-{
+-}
+-static inline void io_wq_worker_running(struct task_struct *tsk)
+-{
+-}
+-#endif
+-
+-static inline bool io_wq_current_is_worker(void)
+-{
+- return in_task() && (current->flags & PF_IO_WORKER) &&
+- current->pf_io_worker;
+-}
+-#endif
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+deleted file mode 100644
+index bc18af5e0a934..0000000000000
+--- a/fs/io_uring.c
++++ /dev/null
+@@ -1,11013 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * Shared application/kernel submission and completion ring pairs, for
+- * supporting fast/efficient IO.
+- *
+- * A note on the read/write ordering memory barriers that are matched between
+- * the application and kernel side.
+- *
+- * After the application reads the CQ ring tail, it must use an
+- * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses
+- * before writing the tail (using smp_load_acquire to read the tail will
+- * do). It also needs a smp_mb() before updating CQ head (ordering the
+- * entry load(s) with the head store), pairing with an implicit barrier
+- * through a control-dependency in io_get_cqe (smp_store_release to
+- * store head will do). Failure to do so could lead to reading invalid
+- * CQ entries.
+- *
+- * Likewise, the application must use an appropriate smp_wmb() before
+- * writing the SQ tail (ordering SQ entry stores with the tail store),
+- * which pairs with smp_load_acquire in io_get_sqring (smp_store_release
+- * to store the tail will do). And it needs a barrier ordering the SQ
+- * head load before writing new SQ entries (smp_load_acquire to read
+- * head will do).
+- *
+- * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
+- * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after*
+- * updating the SQ tail; a full memory barrier smp_mb() is needed
+- * between.
+- *
+- * Also see the examples in the liburing library:
+- *
+- * git://git.kernel.dk/liburing
+- *
+- * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens
+- * from data shared between the kernel and application. This is done both
+- * for ordering purposes, but also to ensure that once a value is loaded from
+- * data that the application could potentially modify, it remains stable.
+- *
+- * Copyright (C) 2018-2019 Jens Axboe
+- * Copyright (c) 2018-2019 Christoph Hellwig
+- */
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/errno.h>
+-#include <linux/syscalls.h>
+-#include <linux/compat.h>
+-#include <net/compat.h>
+-#include <linux/refcount.h>
+-#include <linux/uio.h>
+-#include <linux/bits.h>
+-
+-#include <linux/sched/signal.h>
+-#include <linux/fs.h>
+-#include <linux/file.h>
+-#include <linux/fdtable.h>
+-#include <linux/mm.h>
+-#include <linux/mman.h>
+-#include <linux/percpu.h>
+-#include <linux/slab.h>
+-#include <linux/blkdev.h>
+-#include <linux/bvec.h>
+-#include <linux/net.h>
+-#include <net/sock.h>
+-#include <net/af_unix.h>
+-#include <net/scm.h>
+-#include <linux/anon_inodes.h>
+-#include <linux/sched/mm.h>
+-#include <linux/uaccess.h>
+-#include <linux/nospec.h>
+-#include <linux/sizes.h>
+-#include <linux/hugetlb.h>
+-#include <linux/highmem.h>
+-#include <linux/namei.h>
+-#include <linux/fsnotify.h>
+-#include <linux/fadvise.h>
+-#include <linux/eventpoll.h>
+-#include <linux/splice.h>
+-#include <linux/task_work.h>
+-#include <linux/pagemap.h>
+-#include <linux/io_uring.h>
+-#include <linux/tracehook.h>
+-
+-#define CREATE_TRACE_POINTS
+-#include <trace/events/io_uring.h>
+-
+-#include <uapi/linux/io_uring.h>
+-
+-#include "internal.h"
+-#include "io-wq.h"
+-
+-#define IORING_MAX_ENTRIES 32768
+-#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
+-#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
+-
+-/* only define max */
+-#define IORING_MAX_FIXED_FILES (1U << 15)
+-#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
+- IORING_REGISTER_LAST + IORING_OP_LAST)
+-
+-#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
+-#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
+-#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
+-
+-#define IORING_MAX_REG_BUFFERS (1U << 14)
+-
+-#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
+- IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+- IOSQE_BUFFER_SELECT)
+-#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
+- REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS)
+-
+-#define IO_TCTX_REFS_CACHE_NR (1U << 10)
+-
+-struct io_uring {
+- u32 head ____cacheline_aligned_in_smp;
+- u32 tail ____cacheline_aligned_in_smp;
+-};
+-
+-/*
+- * This data is shared with the application through the mmap at offsets
+- * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING.
+- *
+- * The offsets to the member fields are published through struct
+- * io_sqring_offsets when calling io_uring_setup.
+- */
+-struct io_rings {
+- /*
+- * Head and tail offsets into the ring; the offsets need to be
+- * masked to get valid indices.
+- *
+- * The kernel controls head of the sq ring and the tail of the cq ring,
+- * and the application controls tail of the sq ring and the head of the
+- * cq ring.
+- */
+- struct io_uring sq, cq;
+- /*
+- * Bitmasks to apply to head and tail offsets (constant, equals
+- * ring_entries - 1)
+- */
+- u32 sq_ring_mask, cq_ring_mask;
+- /* Ring sizes (constant, power of 2) */
+- u32 sq_ring_entries, cq_ring_entries;
+- /*
+- * Number of invalid entries dropped by the kernel due to
+- * invalid index stored in array
+- *
+- * Written by the kernel, shouldn't be modified by the
+- * application (i.e. get number of "new events" by comparing to
+- * cached value).
+- *
+- * After a new SQ head value was read by the application this
+- * counter includes all submissions that were dropped reaching
+- * the new SQ head (and possibly more).
+- */
+- u32 sq_dropped;
+- /*
+- * Runtime SQ flags
+- *
+- * Written by the kernel, shouldn't be modified by the
+- * application.
+- *
+- * The application needs a full memory barrier before checking
+- * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
+- */
+- u32 sq_flags;
+- /*
+- * Runtime CQ flags
+- *
+- * Written by the application, shouldn't be modified by the
+- * kernel.
+- */
+- u32 cq_flags;
+- /*
+- * Number of completion events lost because the queue was full;
+- * this should be avoided by the application by making sure
+- * there are not more requests pending than there is space in
+- * the completion queue.
+- *
+- * Written by the kernel, shouldn't be modified by the
+- * application (i.e. get number of "new events" by comparing to
+- * cached value).
+- *
+- * As completion events come in out of order this counter is not
+- * ordered with any other data.
+- */
+- u32 cq_overflow;
+- /*
+- * Ring buffer of completion events.
+- *
+- * The kernel writes completion events fresh every time they are
+- * produced, so the application is allowed to modify pending
+- * entries.
+- */
+- struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
+-};
+-
+-enum io_uring_cmd_flags {
+- IO_URING_F_NONBLOCK = 1,
+- IO_URING_F_COMPLETE_DEFER = 2,
+-};
+-
+-struct io_mapped_ubuf {
+- u64 ubuf;
+- u64 ubuf_end;
+- unsigned int nr_bvecs;
+- unsigned long acct_pages;
+- struct bio_vec bvec[];
+-};
+-
+-struct io_ring_ctx;
+-
+-struct io_overflow_cqe {
+- struct io_uring_cqe cqe;
+- struct list_head list;
+-};
+-
+-struct io_fixed_file {
+- /* file * with additional FFS_* flags */
+- unsigned long file_ptr;
+-};
+-
+-struct io_rsrc_put {
+- struct list_head list;
+- u64 tag;
+- union {
+- void *rsrc;
+- struct file *file;
+- struct io_mapped_ubuf *buf;
+- };
+-};
+-
+-struct io_file_table {
+- struct io_fixed_file *files;
+-};
+-
+-struct io_rsrc_node {
+- struct percpu_ref refs;
+- struct list_head node;
+- struct list_head rsrc_list;
+- struct io_rsrc_data *rsrc_data;
+- struct llist_node llist;
+- bool done;
+-};
+-
+-typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
+-
+-struct io_rsrc_data {
+- struct io_ring_ctx *ctx;
+-
+- u64 **tags;
+- unsigned int nr;
+- rsrc_put_fn *do_put;
+- atomic_t refs;
+- struct completion done;
+- bool quiesce;
+-};
+-
+-struct io_buffer {
+- struct list_head list;
+- __u64 addr;
+- __u32 len;
+- __u16 bid;
+-};
+-
+-struct io_restriction {
+- DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
+- DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
+- u8 sqe_flags_allowed;
+- u8 sqe_flags_required;
+- bool registered;
+-};
+-
+-enum {
+- IO_SQ_THREAD_SHOULD_STOP = 0,
+- IO_SQ_THREAD_SHOULD_PARK,
+-};
+-
+-struct io_sq_data {
+- refcount_t refs;
+- atomic_t park_pending;
+- struct mutex lock;
+-
+- /* ctx's that are using this sqd */
+- struct list_head ctx_list;
+-
+- struct task_struct *thread;
+- struct wait_queue_head wait;
+-
+- unsigned sq_thread_idle;
+- int sq_cpu;
+- pid_t task_pid;
+- pid_t task_tgid;
+-
+- unsigned long state;
+- struct completion exited;
+-};
+-
+-#define IO_COMPL_BATCH 32
+-#define IO_REQ_CACHE_SIZE 32
+-#define IO_REQ_ALLOC_BATCH 8
+-
+-struct io_submit_link {
+- struct io_kiocb *head;
+- struct io_kiocb *last;
+-};
+-
+-struct io_submit_state {
+- struct blk_plug plug;
+- struct io_submit_link link;
+-
+- /*
+- * io_kiocb alloc cache
+- */
+- void *reqs[IO_REQ_CACHE_SIZE];
+- unsigned int free_reqs;
+-
+- bool plug_started;
+-
+- /*
+- * Batch completion logic
+- */
+- struct io_kiocb *compl_reqs[IO_COMPL_BATCH];
+- unsigned int compl_nr;
+- /* inline/task_work completion list, under ->uring_lock */
+- struct list_head free_list;
+-
+- unsigned int ios_left;
+-};
+-
+-struct io_ring_ctx {
+- /* const or read-mostly hot data */
+- struct {
+- struct percpu_ref refs;
+-
+- struct io_rings *rings;
+- unsigned int flags;
+- unsigned int compat: 1;
+- unsigned int drain_next: 1;
+- unsigned int eventfd_async: 1;
+- unsigned int restricted: 1;
+- unsigned int off_timeout_used: 1;
+- unsigned int drain_active: 1;
+- } ____cacheline_aligned_in_smp;
+-
+- /* submission data */
+- struct {
+- struct mutex uring_lock;
+-
+- /*
+- * Ring buffer of indices into array of io_uring_sqe, which is
+- * mmapped by the application using the IORING_OFF_SQES offset.
+- *
+- * This indirection could e.g. be used to assign fixed
+- * io_uring_sqe entries to operations and only submit them to
+- * the queue when needed.
+- *
+- * The kernel modifies neither the indices array nor the entries
+- * array.
+- */
+- u32 *sq_array;
+- struct io_uring_sqe *sq_sqes;
+- unsigned cached_sq_head;
+- unsigned sq_entries;
+- struct list_head defer_list;
+-
+- /*
+- * Fixed resources fast path, should be accessed only under
+- * uring_lock, and updated through io_uring_register(2)
+- */
+- struct io_rsrc_node *rsrc_node;
+- struct io_file_table file_table;
+- unsigned nr_user_files;
+- unsigned nr_user_bufs;
+- struct io_mapped_ubuf **user_bufs;
+-
+- struct io_submit_state submit_state;
+- struct list_head timeout_list;
+- struct list_head ltimeout_list;
+- struct list_head cq_overflow_list;
+- struct xarray io_buffers;
+- struct xarray personalities;
+- u32 pers_next;
+- unsigned sq_thread_idle;
+- } ____cacheline_aligned_in_smp;
+-
+- /* IRQ completion list, under ->completion_lock */
+- struct list_head locked_free_list;
+- unsigned int locked_free_nr;
+-
+- const struct cred *sq_creds; /* cred used for __io_sq_thread() */
+- struct io_sq_data *sq_data; /* if using sq thread polling */
+-
+- struct wait_queue_head sqo_sq_wait;
+- struct list_head sqd_list;
+-
+- unsigned long check_cq_overflow;
+-
+- struct {
+- unsigned cached_cq_tail;
+- unsigned cq_entries;
+- struct eventfd_ctx *cq_ev_fd;
+- struct wait_queue_head poll_wait;
+- struct wait_queue_head cq_wait;
+- unsigned cq_extra;
+- atomic_t cq_timeouts;
+- unsigned cq_last_tm_flush;
+- } ____cacheline_aligned_in_smp;
+-
+- struct {
+- spinlock_t completion_lock;
+-
+- spinlock_t timeout_lock;
+-
+- /*
+- * ->iopoll_list is protected by the ctx->uring_lock for
+- * io_uring instances that don't use IORING_SETUP_SQPOLL.
+- * For SQPOLL, only the single threaded io_sq_thread() will
+- * manipulate the list, hence no extra locking is needed there.
+- */
+- struct list_head iopoll_list;
+- struct hlist_head *cancel_hash;
+- unsigned cancel_hash_bits;
+- bool poll_multi_queue;
+- } ____cacheline_aligned_in_smp;
+-
+- struct io_restriction restrictions;
+-
+- /* slow path rsrc auxilary data, used by update/register */
+- struct {
+- struct io_rsrc_node *rsrc_backup_node;
+- struct io_mapped_ubuf *dummy_ubuf;
+- struct io_rsrc_data *file_data;
+- struct io_rsrc_data *buf_data;
+-
+- struct delayed_work rsrc_put_work;
+- struct llist_head rsrc_put_llist;
+- struct list_head rsrc_ref_list;
+- spinlock_t rsrc_ref_lock;
+- };
+-
+- /* Keep this last, we don't need it for the fast path */
+- struct {
+- #if defined(CONFIG_UNIX)
+- struct socket *ring_sock;
+- #endif
+- /* hashed buffered write serialization */
+- struct io_wq_hash *hash_map;
+-
+- /* Only used for accounting purposes */
+- struct user_struct *user;
+- struct mm_struct *mm_account;
+-
+- /* ctx exit and cancelation */
+- struct llist_head fallback_llist;
+- struct delayed_work fallback_work;
+- struct work_struct exit_work;
+- struct list_head tctx_list;
+- struct completion ref_comp;
+- u32 iowq_limits[2];
+- bool iowq_limits_set;
+- };
+-};
+-
+-struct io_uring_task {
+- /* submission side */
+- int cached_refs;
+- struct xarray xa;
+- struct wait_queue_head wait;
+- const struct io_ring_ctx *last;
+- struct io_wq *io_wq;
+- struct percpu_counter inflight;
+- atomic_t inflight_tracked;
+- atomic_t in_idle;
+-
+- spinlock_t task_lock;
+- struct io_wq_work_list task_list;
+- struct callback_head task_work;
+- bool task_running;
+-};
+-
+-/*
+- * First field must be the file pointer in all the
+- * iocb unions! See also 'struct kiocb' in <linux/fs.h>
+- */
+-struct io_poll_iocb {
+- struct file *file;
+- struct wait_queue_head *head;
+- __poll_t events;
+- bool done;
+- bool canceled;
+- struct wait_queue_entry wait;
+-};
+-
+-struct io_poll_update {
+- struct file *file;
+- u64 old_user_data;
+- u64 new_user_data;
+- __poll_t events;
+- bool update_events;
+- bool update_user_data;
+-};
+-
+-struct io_close {
+- struct file *file;
+- int fd;
+- u32 file_slot;
+-};
+-
+-struct io_timeout_data {
+- struct io_kiocb *req;
+- struct hrtimer timer;
+- struct timespec64 ts;
+- enum hrtimer_mode mode;
+- u32 flags;
+-};
+-
+-struct io_accept {
+- struct file *file;
+- struct sockaddr __user *addr;
+- int __user *addr_len;
+- int flags;
+- u32 file_slot;
+- unsigned long nofile;
+-};
+-
+-struct io_sync {
+- struct file *file;
+- loff_t len;
+- loff_t off;
+- int flags;
+- int mode;
+-};
+-
+-struct io_cancel {
+- struct file *file;
+- u64 addr;
+-};
+-
+-struct io_timeout {
+- struct file *file;
+- u32 off;
+- u32 target_seq;
+- struct list_head list;
+- /* head of the link, used by linked timeouts only */
+- struct io_kiocb *head;
+- /* for linked completions */
+- struct io_kiocb *prev;
+-};
+-
+-struct io_timeout_rem {
+- struct file *file;
+- u64 addr;
+-
+- /* timeout update */
+- struct timespec64 ts;
+- u32 flags;
+- bool ltimeout;
+-};
+-
+-struct io_rw {
+- /* NOTE: kiocb has the file as the first member, so don't do it here */
+- struct kiocb kiocb;
+- u64 addr;
+- u64 len;
+-};
+-
+-struct io_connect {
+- struct file *file;
+- struct sockaddr __user *addr;
+- int addr_len;
+-};
+-
+-struct io_sr_msg {
+- struct file *file;
+- union {
+- struct compat_msghdr __user *umsg_compat;
+- struct user_msghdr __user *umsg;
+- void __user *buf;
+- };
+- int msg_flags;
+- int bgid;
+- size_t len;
+- struct io_buffer *kbuf;
+-};
+-
+-struct io_open {
+- struct file *file;
+- int dfd;
+- u32 file_slot;
+- struct filename *filename;
+- struct open_how how;
+- unsigned long nofile;
+-};
+-
+-struct io_rsrc_update {
+- struct file *file;
+- u64 arg;
+- u32 nr_args;
+- u32 offset;
+-};
+-
+-struct io_fadvise {
+- struct file *file;
+- u64 offset;
+- u32 len;
+- u32 advice;
+-};
+-
+-struct io_madvise {
+- struct file *file;
+- u64 addr;
+- u32 len;
+- u32 advice;
+-};
+-
+-struct io_epoll {
+- struct file *file;
+- int epfd;
+- int op;
+- int fd;
+- struct epoll_event event;
+-};
+-
+-struct io_splice {
+- struct file *file_out;
+- struct file *file_in;
+- loff_t off_out;
+- loff_t off_in;
+- u64 len;
+- unsigned int flags;
+-};
+-
+-struct io_provide_buf {
+- struct file *file;
+- __u64 addr;
+- __u32 len;
+- __u32 bgid;
+- __u16 nbufs;
+- __u16 bid;
+-};
+-
+-struct io_statx {
+- struct file *file;
+- int dfd;
+- unsigned int mask;
+- unsigned int flags;
+- const char __user *filename;
+- struct statx __user *buffer;
+-};
+-
+-struct io_shutdown {
+- struct file *file;
+- int how;
+-};
+-
+-struct io_rename {
+- struct file *file;
+- int old_dfd;
+- int new_dfd;
+- struct filename *oldpath;
+- struct filename *newpath;
+- int flags;
+-};
+-
+-struct io_unlink {
+- struct file *file;
+- int dfd;
+- int flags;
+- struct filename *filename;
+-};
+-
+-struct io_mkdir {
+- struct file *file;
+- int dfd;
+- umode_t mode;
+- struct filename *filename;
+-};
+-
+-struct io_symlink {
+- struct file *file;
+- int new_dfd;
+- struct filename *oldpath;
+- struct filename *newpath;
+-};
+-
+-struct io_hardlink {
+- struct file *file;
+- int old_dfd;
+- int new_dfd;
+- struct filename *oldpath;
+- struct filename *newpath;
+- int flags;
+-};
+-
+-struct io_completion {
+- struct file *file;
+- u32 cflags;
+-};
+-
+-struct io_async_connect {
+- struct sockaddr_storage address;
+-};
+-
+-struct io_async_msghdr {
+- struct iovec fast_iov[UIO_FASTIOV];
+- /* points to an allocated iov, if NULL we use fast_iov instead */
+- struct iovec *free_iov;
+- struct sockaddr __user *uaddr;
+- struct msghdr msg;
+- struct sockaddr_storage addr;
+-};
+-
+-struct io_async_rw {
+- struct iovec fast_iov[UIO_FASTIOV];
+- const struct iovec *free_iovec;
+- struct iov_iter iter;
+- struct iov_iter_state iter_state;
+- size_t bytes_done;
+- struct wait_page_queue wpq;
+-};
+-
+-enum {
+- REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
+- REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
+- REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
+- REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
+- REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
+- REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
+-
+- /* first byte is taken by user flags, shift it to not overlap */
+- REQ_F_FAIL_BIT = 8,
+- REQ_F_INFLIGHT_BIT,
+- REQ_F_CUR_POS_BIT,
+- REQ_F_NOWAIT_BIT,
+- REQ_F_LINK_TIMEOUT_BIT,
+- REQ_F_NEED_CLEANUP_BIT,
+- REQ_F_POLLED_BIT,
+- REQ_F_BUFFER_SELECTED_BIT,
+- REQ_F_COMPLETE_INLINE_BIT,
+- REQ_F_REISSUE_BIT,
+- REQ_F_CREDS_BIT,
+- REQ_F_REFCOUNT_BIT,
+- REQ_F_ARM_LTIMEOUT_BIT,
+- /* keep async read/write and isreg together and in order */
+- REQ_F_NOWAIT_READ_BIT,
+- REQ_F_NOWAIT_WRITE_BIT,
+- REQ_F_ISREG_BIT,
+-
+- /* not a real bit, just to check we're not overflowing the space */
+- __REQ_F_LAST_BIT,
+-};
+-
+-enum {
+- /* ctx owns file */
+- REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT),
+- /* drain existing IO first */
+- REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT),
+- /* linked sqes */
+- REQ_F_LINK = BIT(REQ_F_LINK_BIT),
+- /* doesn't sever on completion < 0 */
+- REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
+- /* IOSQE_ASYNC */
+- REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
+- /* IOSQE_BUFFER_SELECT */
+- REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
+-
+- /* fail rest of links */
+- REQ_F_FAIL = BIT(REQ_F_FAIL_BIT),
+- /* on inflight list, should be cancelled and waited on exit reliably */
+- REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
+- /* read/write uses file position */
+- REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
+- /* must not punt to workers */
+- REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
+- /* has or had linked timeout */
+- REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
+- /* needs cleanup */
+- REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
+- /* already went through poll handler */
+- REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
+- /* buffer already selected */
+- REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
+- /* completion is deferred through io_comp_state */
+- REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
+- /* caller should reissue async */
+- REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
+- /* supports async reads */
+- REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT),
+- /* supports async writes */
+- REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT),
+- /* regular file */
+- REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
+- /* has creds assigned */
+- REQ_F_CREDS = BIT(REQ_F_CREDS_BIT),
+- /* skip refcounting if not set */
+- REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT),
+- /* there is a linked timeout that has to be armed */
+- REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
+-};
+-
+-struct async_poll {
+- struct io_poll_iocb poll;
+- struct io_poll_iocb *double_poll;
+-};
+-
+-typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
+-
+-struct io_task_work {
+- union {
+- struct io_wq_work_node node;
+- struct llist_node fallback_node;
+- };
+- io_req_tw_func_t func;
+-};
+-
+-enum {
+- IORING_RSRC_FILE = 0,
+- IORING_RSRC_BUFFER = 1,
+-};
+-
+-/*
+- * NOTE! Each of the iocb union members has the file pointer
+- * as the first entry in their struct definition. So you can
+- * access the file pointer through any of the sub-structs,
+- * or directly as just 'ki_filp' in this struct.
+- */
+-struct io_kiocb {
+- union {
+- struct file *file;
+- struct io_rw rw;
+- struct io_poll_iocb poll;
+- struct io_poll_update poll_update;
+- struct io_accept accept;
+- struct io_sync sync;
+- struct io_cancel cancel;
+- struct io_timeout timeout;
+- struct io_timeout_rem timeout_rem;
+- struct io_connect connect;
+- struct io_sr_msg sr_msg;
+- struct io_open open;
+- struct io_close close;
+- struct io_rsrc_update rsrc_update;
+- struct io_fadvise fadvise;
+- struct io_madvise madvise;
+- struct io_epoll epoll;
+- struct io_splice splice;
+- struct io_provide_buf pbuf;
+- struct io_statx statx;
+- struct io_shutdown shutdown;
+- struct io_rename rename;
+- struct io_unlink unlink;
+- struct io_mkdir mkdir;
+- struct io_symlink symlink;
+- struct io_hardlink hardlink;
+- /* use only after cleaning per-op data, see io_clean_op() */
+- struct io_completion compl;
+- };
+-
+- /* opcode allocated if it needs to store data for async defer */
+- void *async_data;
+- u8 opcode;
+- /* polled IO has completed */
+- u8 iopoll_completed;
+-
+- u16 buf_index;
+- u32 result;
+-
+- struct io_ring_ctx *ctx;
+- unsigned int flags;
+- atomic_t refs;
+- struct task_struct *task;
+- u64 user_data;
+-
+- struct io_kiocb *link;
+- struct percpu_ref *fixed_rsrc_refs;
+-
+- /* used with ctx->iopoll_list with reads/writes */
+- struct list_head inflight_entry;
+- struct io_task_work io_task_work;
+- /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
+- struct hlist_node hash_node;
+- struct async_poll *apoll;
+- struct io_wq_work work;
+- const struct cred *creds;
+-
+- /* store used ubuf, so we can prevent reloading */
+- struct io_mapped_ubuf *imu;
+-};
+-
+-struct io_tctx_node {
+- struct list_head ctx_node;
+- struct task_struct *task;
+- struct io_ring_ctx *ctx;
+-};
+-
+-struct io_defer_entry {
+- struct list_head list;
+- struct io_kiocb *req;
+- u32 seq;
+-};
+-
+-struct io_op_def {
+- /* needs req->file assigned */
+- unsigned needs_file : 1;
+- /* hash wq insertion if file is a regular file */
+- unsigned hash_reg_file : 1;
+- /* unbound wq insertion if file is a non-regular file */
+- unsigned unbound_nonreg_file : 1;
+- /* opcode is not supported by this kernel */
+- unsigned not_supported : 1;
+- /* set if opcode supports polled "wait" */
+- unsigned pollin : 1;
+- unsigned pollout : 1;
+- /* op supports buffer selection */
+- unsigned buffer_select : 1;
+- /* do prep async if is going to be punted */
+- unsigned needs_async_setup : 1;
+- /* should block plug */
+- unsigned plug : 1;
+- /* size of async data needed, if any */
+- unsigned short async_size;
+-};
+-
+-static const struct io_op_def io_op_defs[] = {
+- [IORING_OP_NOP] = {},
+- [IORING_OP_READV] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollin = 1,
+- .buffer_select = 1,
+- .needs_async_setup = 1,
+- .plug = 1,
+- .async_size = sizeof(struct io_async_rw),
+- },
+- [IORING_OP_WRITEV] = {
+- .needs_file = 1,
+- .hash_reg_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollout = 1,
+- .needs_async_setup = 1,
+- .plug = 1,
+- .async_size = sizeof(struct io_async_rw),
+- },
+- [IORING_OP_FSYNC] = {
+- .needs_file = 1,
+- },
+- [IORING_OP_READ_FIXED] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollin = 1,
+- .plug = 1,
+- .async_size = sizeof(struct io_async_rw),
+- },
+- [IORING_OP_WRITE_FIXED] = {
+- .needs_file = 1,
+- .hash_reg_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollout = 1,
+- .plug = 1,
+- .async_size = sizeof(struct io_async_rw),
+- },
+- [IORING_OP_POLL_ADD] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- },
+- [IORING_OP_POLL_REMOVE] = {},
+- [IORING_OP_SYNC_FILE_RANGE] = {
+- .needs_file = 1,
+- },
+- [IORING_OP_SENDMSG] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollout = 1,
+- .needs_async_setup = 1,
+- .async_size = sizeof(struct io_async_msghdr),
+- },
+- [IORING_OP_RECVMSG] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollin = 1,
+- .buffer_select = 1,
+- .needs_async_setup = 1,
+- .async_size = sizeof(struct io_async_msghdr),
+- },
+- [IORING_OP_TIMEOUT] = {
+- .async_size = sizeof(struct io_timeout_data),
+- },
+- [IORING_OP_TIMEOUT_REMOVE] = {
+- /* used by timeout updates' prep() */
+- },
+- [IORING_OP_ACCEPT] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollin = 1,
+- },
+- [IORING_OP_ASYNC_CANCEL] = {},
+- [IORING_OP_LINK_TIMEOUT] = {
+- .async_size = sizeof(struct io_timeout_data),
+- },
+- [IORING_OP_CONNECT] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollout = 1,
+- .needs_async_setup = 1,
+- .async_size = sizeof(struct io_async_connect),
+- },
+- [IORING_OP_FALLOCATE] = {
+- .needs_file = 1,
+- },
+- [IORING_OP_OPENAT] = {},
+- [IORING_OP_CLOSE] = {},
+- [IORING_OP_FILES_UPDATE] = {},
+- [IORING_OP_STATX] = {},
+- [IORING_OP_READ] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollin = 1,
+- .buffer_select = 1,
+- .plug = 1,
+- .async_size = sizeof(struct io_async_rw),
+- },
+- [IORING_OP_WRITE] = {
+- .needs_file = 1,
+- .hash_reg_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollout = 1,
+- .plug = 1,
+- .async_size = sizeof(struct io_async_rw),
+- },
+- [IORING_OP_FADVISE] = {
+- .needs_file = 1,
+- },
+- [IORING_OP_MADVISE] = {},
+- [IORING_OP_SEND] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollout = 1,
+- },
+- [IORING_OP_RECV] = {
+- .needs_file = 1,
+- .unbound_nonreg_file = 1,
+- .pollin = 1,
+- .buffer_select = 1,
+- },
+- [IORING_OP_OPENAT2] = {
+- },
+- [IORING_OP_EPOLL_CTL] = {
+- .unbound_nonreg_file = 1,
+- },
+- [IORING_OP_SPLICE] = {
+- .needs_file = 1,
+- .hash_reg_file = 1,
+- .unbound_nonreg_file = 1,
+- },
+- [IORING_OP_PROVIDE_BUFFERS] = {},
+- [IORING_OP_REMOVE_BUFFERS] = {},
+- [IORING_OP_TEE] = {
+- .needs_file = 1,
+- .hash_reg_file = 1,
+- .unbound_nonreg_file = 1,
+- },
+- [IORING_OP_SHUTDOWN] = {
+- .needs_file = 1,
+- },
+- [IORING_OP_RENAMEAT] = {},
+- [IORING_OP_UNLINKAT] = {},
+- [IORING_OP_MKDIRAT] = {},
+- [IORING_OP_SYMLINKAT] = {},
+- [IORING_OP_LINKAT] = {},
+-};
+-
+-/* requests with any of those set should undergo io_disarm_next() */
+-#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
+-
+-static bool io_disarm_next(struct io_kiocb *req);
+-static void io_uring_del_tctx_node(unsigned long index);
+-static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+- struct task_struct *task,
+- bool cancel_all);
+-static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
+-
+-static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+- long res, unsigned int cflags);
+-static void io_put_req(struct io_kiocb *req);
+-static void io_put_req_deferred(struct io_kiocb *req);
+-static void io_dismantle_req(struct io_kiocb *req);
+-static void io_queue_linked_timeout(struct io_kiocb *req);
+-static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
+- struct io_uring_rsrc_update2 *up,
+- unsigned nr_args);
+-static void io_clean_op(struct io_kiocb *req);
+-static struct file *io_file_get(struct io_ring_ctx *ctx,
+- struct io_kiocb *req, int fd, bool fixed);
+-static void __io_queue_sqe(struct io_kiocb *req);
+-static void io_rsrc_put_work(struct work_struct *work);
+-
+-static void io_req_task_queue(struct io_kiocb *req);
+-static void io_submit_flush_completions(struct io_ring_ctx *ctx);
+-static int io_req_prep_async(struct io_kiocb *req);
+-
+-static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+- unsigned int issue_flags, u32 slot_index);
+-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
+-
+-static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
+-
+-static struct kmem_cache *req_cachep;
+-
+-static const struct file_operations io_uring_fops;
+-
+-struct sock *io_uring_get_socket(struct file *file)
+-{
+-#if defined(CONFIG_UNIX)
+- if (file->f_op == &io_uring_fops) {
+- struct io_ring_ctx *ctx = file->private_data;
+-
+- return ctx->ring_sock->sk;
+- }
+-#endif
+- return NULL;
+-}
+-EXPORT_SYMBOL(io_uring_get_socket);
+-
+-static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
+-{
+- if (!*locked) {
+- mutex_lock(&ctx->uring_lock);
+- *locked = true;
+- }
+-}
+-
+-#define io_for_each_link(pos, head) \
+- for (pos = (head); pos; pos = pos->link)
+-
+-/*
+- * Shamelessly stolen from the mm implementation of page reference checking,
+- * see commit f958d7b528b1 for details.
+- */
+-#define req_ref_zero_or_close_to_overflow(req) \
+- ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
+-
+-static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
+-{
+- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+- return atomic_inc_not_zero(&req->refs);
+-}
+-
+-static inline bool req_ref_put_and_test(struct io_kiocb *req)
+-{
+- if (likely(!(req->flags & REQ_F_REFCOUNT)))
+- return true;
+-
+- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+- return atomic_dec_and_test(&req->refs);
+-}
+-
+-static inline void req_ref_put(struct io_kiocb *req)
+-{
+- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+- WARN_ON_ONCE(req_ref_put_and_test(req));
+-}
+-
+-static inline void req_ref_get(struct io_kiocb *req)
+-{
+- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+- atomic_inc(&req->refs);
+-}
+-
+-static inline void __io_req_set_refcount(struct io_kiocb *req, int nr)
+-{
+- if (!(req->flags & REQ_F_REFCOUNT)) {
+- req->flags |= REQ_F_REFCOUNT;
+- atomic_set(&req->refs, nr);
+- }
+-}
+-
+-static inline void io_req_set_refcount(struct io_kiocb *req)
+-{
+- __io_req_set_refcount(req, 1);
+-}
+-
+-static inline void io_req_set_rsrc_node(struct io_kiocb *req)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- if (!req->fixed_rsrc_refs) {
+- req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
+- percpu_ref_get(req->fixed_rsrc_refs);
+- }
+-}
+-
+-static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
+-{
+- bool got = percpu_ref_tryget(ref);
+-
+- /* already at zero, wait for ->release() */
+- if (!got)
+- wait_for_completion(compl);
+- percpu_ref_resurrect(ref);
+- if (got)
+- percpu_ref_put(ref);
+-}
+-
+-static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
+- bool cancel_all)
+-{
+- struct io_kiocb *req;
+-
+- if (task && head->task != task)
+- return false;
+- if (cancel_all)
+- return true;
+-
+- io_for_each_link(req, head) {
+- if (req->flags & REQ_F_INFLIGHT)
+- return true;
+- }
+- return false;
+-}
+-
+-static inline void req_set_fail(struct io_kiocb *req)
+-{
+- req->flags |= REQ_F_FAIL;
+-}
+-
+-static inline void req_fail_link_node(struct io_kiocb *req, int res)
+-{
+- req_set_fail(req);
+- req->result = res;
+-}
+-
+-static void io_ring_ctx_ref_free(struct percpu_ref *ref)
+-{
+- struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
+-
+- complete(&ctx->ref_comp);
+-}
+-
+-static inline bool io_is_timeout_noseq(struct io_kiocb *req)
+-{
+- return !req->timeout.off;
+-}
+-
+-static void io_fallback_req_func(struct work_struct *work)
+-{
+- struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
+- fallback_work.work);
+- struct llist_node *node = llist_del_all(&ctx->fallback_llist);
+- struct io_kiocb *req, *tmp;
+- bool locked = false;
+-
+- percpu_ref_get(&ctx->refs);
+- llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node)
+- req->io_task_work.func(req, &locked);
+-
+- if (locked) {
+- if (ctx->submit_state.compl_nr)
+- io_submit_flush_completions(ctx);
+- mutex_unlock(&ctx->uring_lock);
+- }
+- percpu_ref_put(&ctx->refs);
+-
+-}
+-
+-static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
+-{
+- struct io_ring_ctx *ctx;
+- int hash_bits;
+-
+- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+- if (!ctx)
+- return NULL;
+-
+- /*
+- * Use 5 bits less than the max cq entries, that should give us around
+- * 32 entries per hash list if totally full and uniformly spread.
+- */
+- hash_bits = ilog2(p->cq_entries);
+- hash_bits -= 5;
+- if (hash_bits <= 0)
+- hash_bits = 1;
+- ctx->cancel_hash_bits = hash_bits;
+- ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head),
+- GFP_KERNEL);
+- if (!ctx->cancel_hash)
+- goto err;
+- __hash_init(ctx->cancel_hash, 1U << hash_bits);
+-
+- ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL);
+- if (!ctx->dummy_ubuf)
+- goto err;
+- /* set invalid range, so io_import_fixed() fails meeting it */
+- ctx->dummy_ubuf->ubuf = -1UL;
+-
+- if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
+- PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
+- goto err;
+-
+- ctx->flags = p->flags;
+- init_waitqueue_head(&ctx->sqo_sq_wait);
+- INIT_LIST_HEAD(&ctx->sqd_list);
+- init_waitqueue_head(&ctx->poll_wait);
+- INIT_LIST_HEAD(&ctx->cq_overflow_list);
+- init_completion(&ctx->ref_comp);
+- xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
+- xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
+- mutex_init(&ctx->uring_lock);
+- init_waitqueue_head(&ctx->cq_wait);
+- spin_lock_init(&ctx->completion_lock);
+- spin_lock_init(&ctx->timeout_lock);
+- INIT_LIST_HEAD(&ctx->iopoll_list);
+- INIT_LIST_HEAD(&ctx->defer_list);
+- INIT_LIST_HEAD(&ctx->timeout_list);
+- INIT_LIST_HEAD(&ctx->ltimeout_list);
+- spin_lock_init(&ctx->rsrc_ref_lock);
+- INIT_LIST_HEAD(&ctx->rsrc_ref_list);
+- INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
+- init_llist_head(&ctx->rsrc_put_llist);
+- INIT_LIST_HEAD(&ctx->tctx_list);
+- INIT_LIST_HEAD(&ctx->submit_state.free_list);
+- INIT_LIST_HEAD(&ctx->locked_free_list);
+- INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
+- return ctx;
+-err:
+- kfree(ctx->dummy_ubuf);
+- kfree(ctx->cancel_hash);
+- kfree(ctx);
+- return NULL;
+-}
+-
+-static void io_account_cq_overflow(struct io_ring_ctx *ctx)
+-{
+- struct io_rings *r = ctx->rings;
+-
+- WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
+- ctx->cq_extra--;
+-}
+-
+-static bool req_need_defer(struct io_kiocb *req, u32 seq)
+-{
+- if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail;
+- }
+-
+- return false;
+-}
+-
+-#define FFS_ASYNC_READ 0x1UL
+-#define FFS_ASYNC_WRITE 0x2UL
+-#ifdef CONFIG_64BIT
+-#define FFS_ISREG 0x4UL
+-#else
+-#define FFS_ISREG 0x0UL
+-#endif
+-#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
+-
+-static inline bool io_req_ffs_set(struct io_kiocb *req)
+-{
+- return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE);
+-}
+-
+-static void io_req_track_inflight(struct io_kiocb *req)
+-{
+- if (!(req->flags & REQ_F_INFLIGHT)) {
+- req->flags |= REQ_F_INFLIGHT;
+- atomic_inc(&current->io_uring->inflight_tracked);
+- }
+-}
+-
+-static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
+-{
+- if (WARN_ON_ONCE(!req->link))
+- return NULL;
+-
+- req->flags &= ~REQ_F_ARM_LTIMEOUT;
+- req->flags |= REQ_F_LINK_TIMEOUT;
+-
+- /* linked timeouts should have two refs once prep'ed */
+- io_req_set_refcount(req);
+- __io_req_set_refcount(req->link, 2);
+- return req->link;
+-}
+-
+-static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
+-{
+- if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT)))
+- return NULL;
+- return __io_prep_linked_timeout(req);
+-}
+-
+-static void io_prep_async_work(struct io_kiocb *req)
+-{
+- const struct io_op_def *def = &io_op_defs[req->opcode];
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- if (!(req->flags & REQ_F_CREDS)) {
+- req->flags |= REQ_F_CREDS;
+- req->creds = get_current_cred();
+- }
+-
+- req->work.list.next = NULL;
+- req->work.flags = 0;
+- if (req->flags & REQ_F_FORCE_ASYNC)
+- req->work.flags |= IO_WQ_WORK_CONCURRENT;
+-
+- if (req->flags & REQ_F_ISREG) {
+- if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
+- io_wq_hash_work(&req->work, file_inode(req->file));
+- } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
+- if (def->unbound_nonreg_file)
+- req->work.flags |= IO_WQ_WORK_UNBOUND;
+- }
+-
+- switch (req->opcode) {
+- case IORING_OP_SPLICE:
+- case IORING_OP_TEE:
+- if (!S_ISREG(file_inode(req->splice.file_in)->i_mode))
+- req->work.flags |= IO_WQ_WORK_UNBOUND;
+- break;
+- }
+-}
+-
+-static void io_prep_async_link(struct io_kiocb *req)
+-{
+- struct io_kiocb *cur;
+-
+- if (req->flags & REQ_F_LINK_TIMEOUT) {
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- spin_lock(&ctx->completion_lock);
+- io_for_each_link(cur, req)
+- io_prep_async_work(cur);
+- spin_unlock(&ctx->completion_lock);
+- } else {
+- io_for_each_link(cur, req)
+- io_prep_async_work(cur);
+- }
+-}
+-
+-static void io_queue_async_work(struct io_kiocb *req, bool *locked)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_kiocb *link = io_prep_linked_timeout(req);
+- struct io_uring_task *tctx = req->task->io_uring;
+-
+- /* must not take the lock, NULL it as a precaution */
+- locked = NULL;
+-
+- BUG_ON(!tctx);
+- BUG_ON(!tctx->io_wq);
+-
+- /* init ->work of the whole link before punting */
+- io_prep_async_link(req);
+-
+- /*
+- * Not expected to happen, but if we do have a bug where this _can_
+- * happen, catch it here and ensure the request is marked as
+- * canceled. That will make io-wq go through the usual work cancel
+- * procedure rather than attempt to run this request (or create a new
+- * worker for it).
+- */
+- if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
+- req->work.flags |= IO_WQ_WORK_CANCEL;
+-
+- trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
+- &req->work, req->flags);
+- io_wq_enqueue(tctx->io_wq, &req->work);
+- if (link)
+- io_queue_linked_timeout(link);
+-}
+-
+-static void io_kill_timeout(struct io_kiocb *req, int status)
+- __must_hold(&req->ctx->completion_lock)
+- __must_hold(&req->ctx->timeout_lock)
+-{
+- struct io_timeout_data *io = req->async_data;
+-
+- if (hrtimer_try_to_cancel(&io->timer) != -1) {
+- if (status)
+- req_set_fail(req);
+- atomic_set(&req->ctx->cq_timeouts,
+- atomic_read(&req->ctx->cq_timeouts) + 1);
+- list_del_init(&req->timeout.list);
+- io_cqring_fill_event(req->ctx, req->user_data, status, 0);
+- io_put_req_deferred(req);
+- }
+-}
+-
+-static void io_queue_deferred(struct io_ring_ctx *ctx)
+-{
+- while (!list_empty(&ctx->defer_list)) {
+- struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
+- struct io_defer_entry, list);
+-
+- if (req_need_defer(de->req, de->seq))
+- break;
+- list_del_init(&de->list);
+- io_req_task_queue(de->req);
+- kfree(de);
+- }
+-}
+-
+-static void io_flush_timeouts(struct io_ring_ctx *ctx)
+- __must_hold(&ctx->completion_lock)
+-{
+- u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+-
+- spin_lock_irq(&ctx->timeout_lock);
+- while (!list_empty(&ctx->timeout_list)) {
+- u32 events_needed, events_got;
+- struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
+- struct io_kiocb, timeout.list);
+-
+- if (io_is_timeout_noseq(req))
+- break;
+-
+- /*
+- * Since seq can easily wrap around over time, subtract
+- * the last seq at which timeouts were flushed before comparing.
+- * Assuming not more than 2^31-1 events have happened since,
+- * these subtractions won't have wrapped, so we can check if
+- * target is in [last_seq, current_seq] by comparing the two.
+- */
+- events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush;
+- events_got = seq - ctx->cq_last_tm_flush;
+- if (events_got < events_needed)
+- break;
+-
+- list_del_init(&req->timeout.list);
+- io_kill_timeout(req, 0);
+- }
+- ctx->cq_last_tm_flush = seq;
+- spin_unlock_irq(&ctx->timeout_lock);
+-}
+-
+-static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
+-{
+- if (ctx->off_timeout_used)
+- io_flush_timeouts(ctx);
+- if (ctx->drain_active)
+- io_queue_deferred(ctx);
+-}
+-
+-static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+-{
+- if (unlikely(ctx->off_timeout_used || ctx->drain_active))
+- __io_commit_cqring_flush(ctx);
+- /* order cqe stores with ring update */
+- smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
+-}
+-
+-static inline bool io_sqring_full(struct io_ring_ctx *ctx)
+-{
+- struct io_rings *r = ctx->rings;
+-
+- return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries;
+-}
+-
+-static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
+-{
+- return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
+-}
+-
+-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+-{
+- struct io_rings *rings = ctx->rings;
+- unsigned tail, mask = ctx->cq_entries - 1;
+-
+- /*
+- * writes to the cq entry need to come after reading head; the
+- * control dependency is enough as we're using WRITE_ONCE to
+- * fill the cq entry
+- */
+- if (__io_cqring_events(ctx) == ctx->cq_entries)
+- return NULL;
+-
+- tail = ctx->cached_cq_tail++;
+- return &rings->cqes[tail & mask];
+-}
+-
+-static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
+-{
+- if (likely(!ctx->cq_ev_fd))
+- return false;
+- if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
+- return false;
+- return !ctx->eventfd_async || io_wq_current_is_worker();
+-}
+-
+-/*
+- * This should only get called when at least one event has been posted.
+- * Some applications rely on the eventfd notification count only changing
+- * IFF a new CQE has been added to the CQ ring. There's no depedency on
+- * 1:1 relationship between how many times this function is called (and
+- * hence the eventfd count) and number of CQEs posted to the CQ ring.
+- */
+-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+-{
+- /*
+- * wake_up_all() may seem excessive, but io_wake_function() and
+- * io_should_wake() handle the termination of the loop and only
+- * wake as many waiters as we need to.
+- */
+- if (wq_has_sleeper(&ctx->cq_wait))
+- wake_up_all(&ctx->cq_wait);
+- if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
+- wake_up(&ctx->sq_data->wait);
+- if (io_should_trigger_evfd(ctx))
+- eventfd_signal(ctx->cq_ev_fd, 1);
+- if (waitqueue_active(&ctx->poll_wait))
+- wake_up_interruptible(&ctx->poll_wait);
+-}
+-
+-static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
+-{
+- /* see waitqueue_active() comment */
+- smp_mb();
+-
+- if (ctx->flags & IORING_SETUP_SQPOLL) {
+- if (waitqueue_active(&ctx->cq_wait))
+- wake_up_all(&ctx->cq_wait);
+- }
+- if (io_should_trigger_evfd(ctx))
+- eventfd_signal(ctx->cq_ev_fd, 1);
+- if (waitqueue_active(&ctx->poll_wait))
+- wake_up_interruptible(&ctx->poll_wait);
+-}
+-
+-/* Returns true if there are no backlogged entries after the flush */
+-static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
+-{
+- bool all_flushed, posted;
+-
+- if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
+- return false;
+-
+- posted = false;
+- spin_lock(&ctx->completion_lock);
+- while (!list_empty(&ctx->cq_overflow_list)) {
+- struct io_uring_cqe *cqe = io_get_cqe(ctx);
+- struct io_overflow_cqe *ocqe;
+-
+- if (!cqe && !force)
+- break;
+- ocqe = list_first_entry(&ctx->cq_overflow_list,
+- struct io_overflow_cqe, list);
+- if (cqe)
+- memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
+- else
+- io_account_cq_overflow(ctx);
+-
+- posted = true;
+- list_del(&ocqe->list);
+- kfree(ocqe);
+- }
+-
+- all_flushed = list_empty(&ctx->cq_overflow_list);
+- if (all_flushed) {
+- clear_bit(0, &ctx->check_cq_overflow);
+- WRITE_ONCE(ctx->rings->sq_flags,
+- ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
+- }
+-
+- if (posted)
+- io_commit_cqring(ctx);
+- spin_unlock(&ctx->completion_lock);
+- if (posted)
+- io_cqring_ev_posted(ctx);
+- return all_flushed;
+-}
+-
+-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
+-{
+- bool ret = true;
+-
+- if (test_bit(0, &ctx->check_cq_overflow)) {
+- /* iopoll syncs against uring_lock, not completion_lock */
+- if (ctx->flags & IORING_SETUP_IOPOLL)
+- mutex_lock(&ctx->uring_lock);
+- ret = __io_cqring_overflow_flush(ctx, false);
+- if (ctx->flags & IORING_SETUP_IOPOLL)
+- mutex_unlock(&ctx->uring_lock);
+- }
+-
+- return ret;
+-}
+-
+-/* must to be called somewhat shortly after putting a request */
+-static inline void io_put_task(struct task_struct *task, int nr)
+-{
+- struct io_uring_task *tctx = task->io_uring;
+-
+- if (likely(task == current)) {
+- tctx->cached_refs += nr;
+- } else {
+- percpu_counter_sub(&tctx->inflight, nr);
+- if (unlikely(atomic_read(&tctx->in_idle)))
+- wake_up(&tctx->wait);
+- put_task_struct_many(task, nr);
+- }
+-}
+-
+-static void io_task_refs_refill(struct io_uring_task *tctx)
+-{
+- unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
+-
+- percpu_counter_add(&tctx->inflight, refill);
+- refcount_add(refill, &current->usage);
+- tctx->cached_refs += refill;
+-}
+-
+-static inline void io_get_task_refs(int nr)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+-
+- tctx->cached_refs -= nr;
+- if (unlikely(tctx->cached_refs < 0))
+- io_task_refs_refill(tctx);
+-}
+-
+-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
+- long res, unsigned int cflags)
+-{
+- struct io_overflow_cqe *ocqe;
+-
+- ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+- if (!ocqe) {
+- /*
+- * If we're in ring overflow flush mode, or in task cancel mode,
+- * or cannot allocate an overflow entry, then we need to drop it
+- * on the floor.
+- */
+- io_account_cq_overflow(ctx);
+- return false;
+- }
+- if (list_empty(&ctx->cq_overflow_list)) {
+- set_bit(0, &ctx->check_cq_overflow);
+- WRITE_ONCE(ctx->rings->sq_flags,
+- ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
+-
+- }
+- ocqe->cqe.user_data = user_data;
+- ocqe->cqe.res = res;
+- ocqe->cqe.flags = cflags;
+- list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
+- return true;
+-}
+-
+-static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+- long res, unsigned int cflags)
+-{
+- struct io_uring_cqe *cqe;
+-
+- trace_io_uring_complete(ctx, user_data, res, cflags);
+-
+- /*
+- * If we can't get a cq entry, userspace overflowed the
+- * submission (by quite a lot). Increment the overflow count in
+- * the ring.
+- */
+- cqe = io_get_cqe(ctx);
+- if (likely(cqe)) {
+- WRITE_ONCE(cqe->user_data, user_data);
+- WRITE_ONCE(cqe->res, res);
+- WRITE_ONCE(cqe->flags, cflags);
+- return true;
+- }
+- return io_cqring_event_overflow(ctx, user_data, res, cflags);
+-}
+-
+-/* not as hot to bloat with inlining */
+-static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+- long res, unsigned int cflags)
+-{
+- return __io_cqring_fill_event(ctx, user_data, res, cflags);
+-}
+-
+-static void io_req_complete_post(struct io_kiocb *req, long res,
+- unsigned int cflags)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- spin_lock(&ctx->completion_lock);
+- __io_cqring_fill_event(ctx, req->user_data, res, cflags);
+- /*
+- * If we're the last reference to this request, add to our locked
+- * free_list cache.
+- */
+- if (req_ref_put_and_test(req)) {
+- if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
+- if (req->flags & IO_DISARM_MASK)
+- io_disarm_next(req);
+- if (req->link) {
+- io_req_task_queue(req->link);
+- req->link = NULL;
+- }
+- }
+- io_dismantle_req(req);
+- io_put_task(req->task, 1);
+- list_add(&req->inflight_entry, &ctx->locked_free_list);
+- ctx->locked_free_nr++;
+- } else {
+- if (!percpu_ref_tryget(&ctx->refs))
+- req = NULL;
+- }
+- io_commit_cqring(ctx);
+- spin_unlock(&ctx->completion_lock);
+-
+- if (req) {
+- io_cqring_ev_posted(ctx);
+- percpu_ref_put(&ctx->refs);
+- }
+-}
+-
+-static inline bool io_req_needs_clean(struct io_kiocb *req)
+-{
+- return req->flags & IO_REQ_CLEAN_FLAGS;
+-}
+-
+-static void io_req_complete_state(struct io_kiocb *req, long res,
+- unsigned int cflags)
+-{
+- if (io_req_needs_clean(req))
+- io_clean_op(req);
+- req->result = res;
+- req->compl.cflags = cflags;
+- req->flags |= REQ_F_COMPLETE_INLINE;
+-}
+-
+-static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
+- long res, unsigned cflags)
+-{
+- if (issue_flags & IO_URING_F_COMPLETE_DEFER)
+- io_req_complete_state(req, res, cflags);
+- else
+- io_req_complete_post(req, res, cflags);
+-}
+-
+-static inline void io_req_complete(struct io_kiocb *req, long res)
+-{
+- __io_req_complete(req, 0, res, 0);
+-}
+-
+-static void io_req_complete_failed(struct io_kiocb *req, long res)
+-{
+- req_set_fail(req);
+- io_req_complete_post(req, res, 0);
+-}
+-
+-static void io_req_complete_fail_submit(struct io_kiocb *req)
+-{
+- /*
+- * We don't submit, fail them all, for that replace hardlinks with
+- * normal links. Extra REQ_F_LINK is tolerated.
+- */
+- req->flags &= ~REQ_F_HARDLINK;
+- req->flags |= REQ_F_LINK;
+- io_req_complete_failed(req, req->result);
+-}
+-
+-/*
+- * Don't initialise the fields below on every allocation, but do that in
+- * advance and keep them valid across allocations.
+- */
+-static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
+-{
+- req->ctx = ctx;
+- req->link = NULL;
+- req->async_data = NULL;
+- /* not necessary, but safer to zero */
+- req->result = 0;
+-}
+-
+-static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
+- struct io_submit_state *state)
+-{
+- spin_lock(&ctx->completion_lock);
+- list_splice_init(&ctx->locked_free_list, &state->free_list);
+- ctx->locked_free_nr = 0;
+- spin_unlock(&ctx->completion_lock);
+-}
+-
+-/* Returns true IFF there are requests in the cache */
+-static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
+-{
+- struct io_submit_state *state = &ctx->submit_state;
+- int nr;
+-
+- /*
+- * If we have more than a batch's worth of requests in our IRQ side
+- * locked cache, grab the lock and move them over to our submission
+- * side cache.
+- */
+- if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
+- io_flush_cached_locked_reqs(ctx, state);
+-
+- nr = state->free_reqs;
+- while (!list_empty(&state->free_list)) {
+- struct io_kiocb *req = list_first_entry(&state->free_list,
+- struct io_kiocb, inflight_entry);
+-
+- list_del(&req->inflight_entry);
+- state->reqs[nr++] = req;
+- if (nr == ARRAY_SIZE(state->reqs))
+- break;
+- }
+-
+- state->free_reqs = nr;
+- return nr != 0;
+-}
+-
+-/*
+- * A request might get retired back into the request caches even before opcode
+- * handlers and io_issue_sqe() are done with it, e.g. inline completion path.
+- * Because of that, io_alloc_req() should be called only under ->uring_lock
+- * and with extra caution to not get a request that is still worked on.
+- */
+-static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
+- __must_hold(&ctx->uring_lock)
+-{
+- struct io_submit_state *state = &ctx->submit_state;
+- gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
+- int ret, i;
+-
+- BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH);
+-
+- if (likely(state->free_reqs || io_flush_cached_reqs(ctx)))
+- goto got_req;
+-
+- ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH,
+- state->reqs);
+-
+- /*
+- * Bulk alloc is all-or-nothing. If we fail to get a batch,
+- * retry single alloc to be on the safe side.
+- */
+- if (unlikely(ret <= 0)) {
+- state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
+- if (!state->reqs[0])
+- return NULL;
+- ret = 1;
+- }
+-
+- for (i = 0; i < ret; i++)
+- io_preinit_req(state->reqs[i], ctx);
+- state->free_reqs = ret;
+-got_req:
+- state->free_reqs--;
+- return state->reqs[state->free_reqs];
+-}
+-
+-static inline void io_put_file(struct file *file)
+-{
+- if (file)
+- fput(file);
+-}
+-
+-static void io_dismantle_req(struct io_kiocb *req)
+-{
+- unsigned int flags = req->flags;
+-
+- if (io_req_needs_clean(req))
+- io_clean_op(req);
+- if (!(flags & REQ_F_FIXED_FILE))
+- io_put_file(req->file);
+- if (req->fixed_rsrc_refs)
+- percpu_ref_put(req->fixed_rsrc_refs);
+- if (req->async_data) {
+- kfree(req->async_data);
+- req->async_data = NULL;
+- }
+-}
+-
+-static void __io_free_req(struct io_kiocb *req)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- io_dismantle_req(req);
+- io_put_task(req->task, 1);
+-
+- spin_lock(&ctx->completion_lock);
+- list_add(&req->inflight_entry, &ctx->locked_free_list);
+- ctx->locked_free_nr++;
+- spin_unlock(&ctx->completion_lock);
+-
+- percpu_ref_put(&ctx->refs);
+-}
+-
+-static inline void io_remove_next_linked(struct io_kiocb *req)
+-{
+- struct io_kiocb *nxt = req->link;
+-
+- req->link = nxt->link;
+- nxt->link = NULL;
+-}
+-
+-static bool io_kill_linked_timeout(struct io_kiocb *req)
+- __must_hold(&req->ctx->completion_lock)
+- __must_hold(&req->ctx->timeout_lock)
+-{
+- struct io_kiocb *link = req->link;
+-
+- if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
+- struct io_timeout_data *io = link->async_data;
+-
+- io_remove_next_linked(req);
+- link->timeout.head = NULL;
+- if (hrtimer_try_to_cancel(&io->timer) != -1) {
+- list_del(&link->timeout.list);
+- io_cqring_fill_event(link->ctx, link->user_data,
+- -ECANCELED, 0);
+- io_put_req_deferred(link);
+- return true;
+- }
+- }
+- return false;
+-}
+-
+-static void io_fail_links(struct io_kiocb *req)
+- __must_hold(&req->ctx->completion_lock)
+-{
+- struct io_kiocb *nxt, *link = req->link;
+-
+- req->link = NULL;
+- while (link) {
+- long res = -ECANCELED;
+-
+- if (link->flags & REQ_F_FAIL)
+- res = link->result;
+-
+- nxt = link->link;
+- link->link = NULL;
+-
+- trace_io_uring_fail_link(req, link);
+- io_cqring_fill_event(link->ctx, link->user_data, res, 0);
+- io_put_req_deferred(link);
+- link = nxt;
+- }
+-}
+-
+-static bool io_disarm_next(struct io_kiocb *req)
+- __must_hold(&req->ctx->completion_lock)
+-{
+- bool posted = false;
+-
+- if (req->flags & REQ_F_ARM_LTIMEOUT) {
+- struct io_kiocb *link = req->link;
+-
+- req->flags &= ~REQ_F_ARM_LTIMEOUT;
+- if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
+- io_remove_next_linked(req);
+- io_cqring_fill_event(link->ctx, link->user_data,
+- -ECANCELED, 0);
+- io_put_req_deferred(link);
+- posted = true;
+- }
+- } else if (req->flags & REQ_F_LINK_TIMEOUT) {
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- spin_lock_irq(&ctx->timeout_lock);
+- posted = io_kill_linked_timeout(req);
+- spin_unlock_irq(&ctx->timeout_lock);
+- }
+- if (unlikely((req->flags & REQ_F_FAIL) &&
+- !(req->flags & REQ_F_HARDLINK))) {
+- posted |= (req->link != NULL);
+- io_fail_links(req);
+- }
+- return posted;
+-}
+-
+-static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
+-{
+- struct io_kiocb *nxt;
+-
+- /*
+- * If LINK is set, we have dependent requests in this chain. If we
+- * didn't fail this request, queue the first one up, moving any other
+- * dependencies to the next request. In case of failure, fail the rest
+- * of the chain.
+- */
+- if (req->flags & IO_DISARM_MASK) {
+- struct io_ring_ctx *ctx = req->ctx;
+- bool posted;
+-
+- spin_lock(&ctx->completion_lock);
+- posted = io_disarm_next(req);
+- if (posted)
+- io_commit_cqring(req->ctx);
+- spin_unlock(&ctx->completion_lock);
+- if (posted)
+- io_cqring_ev_posted(ctx);
+- }
+- nxt = req->link;
+- req->link = NULL;
+- return nxt;
+-}
+-
+-static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
+-{
+- if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK))))
+- return NULL;
+- return __io_req_find_next(req);
+-}
+-
+-static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
+-{
+- if (!ctx)
+- return;
+- if (*locked) {
+- if (ctx->submit_state.compl_nr)
+- io_submit_flush_completions(ctx);
+- mutex_unlock(&ctx->uring_lock);
+- *locked = false;
+- }
+- percpu_ref_put(&ctx->refs);
+-}
+-
+-static void tctx_task_work(struct callback_head *cb)
+-{
+- bool locked = false;
+- struct io_ring_ctx *ctx = NULL;
+- struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
+- task_work);
+-
+- while (1) {
+- struct io_wq_work_node *node;
+-
+- if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
+- io_submit_flush_completions(ctx);
+-
+- spin_lock_irq(&tctx->task_lock);
+- node = tctx->task_list.first;
+- INIT_WQ_LIST(&tctx->task_list);
+- if (!node)
+- tctx->task_running = false;
+- spin_unlock_irq(&tctx->task_lock);
+- if (!node)
+- break;
+-
+- do {
+- struct io_wq_work_node *next = node->next;
+- struct io_kiocb *req = container_of(node, struct io_kiocb,
+- io_task_work.node);
+-
+- if (req->ctx != ctx) {
+- ctx_flush_and_put(ctx, &locked);
+- ctx = req->ctx;
+- /* if not contended, grab and improve batching */
+- locked = mutex_trylock(&ctx->uring_lock);
+- percpu_ref_get(&ctx->refs);
+- }
+- req->io_task_work.func(req, &locked);
+- node = next;
+- } while (node);
+-
+- cond_resched();
+- }
+-
+- ctx_flush_and_put(ctx, &locked);
+-}
+-
+-static void io_req_task_work_add(struct io_kiocb *req)
+-{
+- struct task_struct *tsk = req->task;
+- struct io_uring_task *tctx = tsk->io_uring;
+- enum task_work_notify_mode notify;
+- struct io_wq_work_node *node;
+- unsigned long flags;
+- bool running;
+-
+- WARN_ON_ONCE(!tctx);
+-
+- spin_lock_irqsave(&tctx->task_lock, flags);
+- wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+- running = tctx->task_running;
+- if (!running)
+- tctx->task_running = true;
+- spin_unlock_irqrestore(&tctx->task_lock, flags);
+-
+- /* task_work already pending, we're done */
+- if (running)
+- return;
+-
+- /*
+- * SQPOLL kernel thread doesn't need notification, just a wakeup. For
+- * all other cases, use TWA_SIGNAL unconditionally to ensure we're
+- * processing task_work. There's no reliable way to tell if TWA_RESUME
+- * will do the job.
+- */
+- notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL;
+- if (!task_work_add(tsk, &tctx->task_work, notify)) {
+- wake_up_process(tsk);
+- return;
+- }
+-
+- spin_lock_irqsave(&tctx->task_lock, flags);
+- tctx->task_running = false;
+- node = tctx->task_list.first;
+- INIT_WQ_LIST(&tctx->task_list);
+- spin_unlock_irqrestore(&tctx->task_lock, flags);
+-
+- while (node) {
+- req = container_of(node, struct io_kiocb, io_task_work.node);
+- node = node->next;
+- if (llist_add(&req->io_task_work.fallback_node,
+- &req->ctx->fallback_llist))
+- schedule_delayed_work(&req->ctx->fallback_work, 1);
+- }
+-}
+-
+-static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- /* not needed for normal modes, but SQPOLL depends on it */
+- io_tw_lock(ctx, locked);
+- io_req_complete_failed(req, req->result);
+-}
+-
+-static void io_req_task_submit(struct io_kiocb *req, bool *locked)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- io_tw_lock(ctx, locked);
+- /* req->task == current here, checking PF_EXITING is safe */
+- if (likely(!(req->task->flags & PF_EXITING)))
+- __io_queue_sqe(req);
+- else
+- io_req_complete_failed(req, -EFAULT);
+-}
+-
+-static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
+-{
+- req->result = ret;
+- req->io_task_work.func = io_req_task_cancel;
+- io_req_task_work_add(req);
+-}
+-
+-static void io_req_task_queue(struct io_kiocb *req)
+-{
+- req->io_task_work.func = io_req_task_submit;
+- io_req_task_work_add(req);
+-}
+-
+-static void io_req_task_queue_reissue(struct io_kiocb *req)
+-{
+- req->io_task_work.func = io_queue_async_work;
+- io_req_task_work_add(req);
+-}
+-
+-static inline void io_queue_next(struct io_kiocb *req)
+-{
+- struct io_kiocb *nxt = io_req_find_next(req);
+-
+- if (nxt)
+- io_req_task_queue(nxt);
+-}
+-
+-static void io_free_req(struct io_kiocb *req)
+-{
+- io_queue_next(req);
+- __io_free_req(req);
+-}
+-
+-static void io_free_req_work(struct io_kiocb *req, bool *locked)
+-{
+- io_free_req(req);
+-}
+-
+-struct req_batch {
+- struct task_struct *task;
+- int task_refs;
+- int ctx_refs;
+-};
+-
+-static inline void io_init_req_batch(struct req_batch *rb)
+-{
+- rb->task_refs = 0;
+- rb->ctx_refs = 0;
+- rb->task = NULL;
+-}
+-
+-static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
+- struct req_batch *rb)
+-{
+- if (rb->ctx_refs)
+- percpu_ref_put_many(&ctx->refs, rb->ctx_refs);
+- if (rb->task)
+- io_put_task(rb->task, rb->task_refs);
+-}
+-
+-static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
+- struct io_submit_state *state)
+-{
+- io_queue_next(req);
+- io_dismantle_req(req);
+-
+- if (req->task != rb->task) {
+- if (rb->task)
+- io_put_task(rb->task, rb->task_refs);
+- rb->task = req->task;
+- rb->task_refs = 0;
+- }
+- rb->task_refs++;
+- rb->ctx_refs++;
+-
+- if (state->free_reqs != ARRAY_SIZE(state->reqs))
+- state->reqs[state->free_reqs++] = req;
+- else
+- list_add(&req->inflight_entry, &state->free_list);
+-}
+-
+-static void io_submit_flush_completions(struct io_ring_ctx *ctx)
+- __must_hold(&ctx->uring_lock)
+-{
+- struct io_submit_state *state = &ctx->submit_state;
+- int i, nr = state->compl_nr;
+- struct req_batch rb;
+-
+- spin_lock(&ctx->completion_lock);
+- for (i = 0; i < nr; i++) {
+- struct io_kiocb *req = state->compl_reqs[i];
+-
+- __io_cqring_fill_event(ctx, req->user_data, req->result,
+- req->compl.cflags);
+- }
+- io_commit_cqring(ctx);
+- spin_unlock(&ctx->completion_lock);
+- io_cqring_ev_posted(ctx);
+-
+- io_init_req_batch(&rb);
+- for (i = 0; i < nr; i++) {
+- struct io_kiocb *req = state->compl_reqs[i];
+-
+- if (req_ref_put_and_test(req))
+- io_req_free_batch(&rb, req, &ctx->submit_state);
+- }
+-
+- io_req_free_batch_finish(ctx, &rb);
+- state->compl_nr = 0;
+-}
+-
+-/*
+- * Drop reference to request, return next in chain (if there is one) if this
+- * was the last reference to this request.
+- */
+-static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
+-{
+- struct io_kiocb *nxt = NULL;
+-
+- if (req_ref_put_and_test(req)) {
+- nxt = io_req_find_next(req);
+- __io_free_req(req);
+- }
+- return nxt;
+-}
+-
+-static inline void io_put_req(struct io_kiocb *req)
+-{
+- if (req_ref_put_and_test(req))
+- io_free_req(req);
+-}
+-
+-static inline void io_put_req_deferred(struct io_kiocb *req)
+-{
+- if (req_ref_put_and_test(req)) {
+- req->io_task_work.func = io_free_req_work;
+- io_req_task_work_add(req);
+- }
+-}
+-
+-static unsigned io_cqring_events(struct io_ring_ctx *ctx)
+-{
+- /* See comment at the top of this file */
+- smp_rmb();
+- return __io_cqring_events(ctx);
+-}
+-
+-static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
+-{
+- struct io_rings *rings = ctx->rings;
+-
+- /* make sure SQ entry isn't read before tail */
+- return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
+-}
+-
+-static unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf)
+-{
+- unsigned int cflags;
+-
+- cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
+- cflags |= IORING_CQE_F_BUFFER;
+- req->flags &= ~REQ_F_BUFFER_SELECTED;
+- kfree(kbuf);
+- return cflags;
+-}
+-
+-static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
+-{
+- struct io_buffer *kbuf;
+-
+- if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+- return 0;
+- kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+- return io_put_kbuf(req, kbuf);
+-}
+-
+-static inline bool io_run_task_work(void)
+-{
+- if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
+- __set_current_state(TASK_RUNNING);
+- tracehook_notify_signal();
+- return true;
+- }
+-
+- return false;
+-}
+-
+-/*
+- * Find and free completed poll iocbs
+- */
+-static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
+- struct list_head *done)
+-{
+- struct req_batch rb;
+- struct io_kiocb *req;
+-
+- /* order with ->result store in io_complete_rw_iopoll() */
+- smp_rmb();
+-
+- io_init_req_batch(&rb);
+- while (!list_empty(done)) {
+- req = list_first_entry(done, struct io_kiocb, inflight_entry);
+- list_del(&req->inflight_entry);
+-
+- __io_cqring_fill_event(ctx, req->user_data, req->result,
+- io_put_rw_kbuf(req));
+- (*nr_events)++;
+-
+- if (req_ref_put_and_test(req))
+- io_req_free_batch(&rb, req, &ctx->submit_state);
+- }
+-
+- io_commit_cqring(ctx);
+- io_cqring_ev_posted_iopoll(ctx);
+- io_req_free_batch_finish(ctx, &rb);
+-}
+-
+-static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
+- long min)
+-{
+- struct io_kiocb *req, *tmp;
+- LIST_HEAD(done);
+- bool spin;
+-
+- /*
+- * Only spin for completions if we don't have multiple devices hanging
+- * off our complete list, and we're under the requested amount.
+- */
+- spin = !ctx->poll_multi_queue && *nr_events < min;
+-
+- list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
+- struct kiocb *kiocb = &req->rw.kiocb;
+- int ret;
+-
+- /*
+- * Move completed and retryable entries to our local lists.
+- * If we find a request that requires polling, break out
+- * and complete those lists first, if we have entries there.
+- */
+- if (READ_ONCE(req->iopoll_completed)) {
+- list_move_tail(&req->inflight_entry, &done);
+- continue;
+- }
+- if (!list_empty(&done))
+- break;
+-
+- ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
+- if (unlikely(ret < 0))
+- return ret;
+- else if (ret)
+- spin = false;
+-
+- /* iopoll may have completed current req */
+- if (READ_ONCE(req->iopoll_completed))
+- list_move_tail(&req->inflight_entry, &done);
+- }
+-
+- if (!list_empty(&done))
+- io_iopoll_complete(ctx, nr_events, &done);
+-
+- return 0;
+-}
+-
+-/*
+- * We can't just wait for polled events to come to us, we have to actively
+- * find and complete them.
+- */
+-static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
+-{
+- if (!(ctx->flags & IORING_SETUP_IOPOLL))
+- return;
+-
+- mutex_lock(&ctx->uring_lock);
+- while (!list_empty(&ctx->iopoll_list)) {
+- unsigned int nr_events = 0;
+-
+- io_do_iopoll(ctx, &nr_events, 0);
+-
+- /* let it sleep and repeat later if can't complete a request */
+- if (nr_events == 0)
+- break;
+- /*
+- * Ensure we allow local-to-the-cpu processing to take place,
+- * in this case we need to ensure that we reap all events.
+- * Also let task_work, etc. to progress by releasing the mutex
+- */
+- if (need_resched()) {
+- mutex_unlock(&ctx->uring_lock);
+- cond_resched();
+- mutex_lock(&ctx->uring_lock);
+- }
+- }
+- mutex_unlock(&ctx->uring_lock);
+-}
+-
+-static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
+-{
+- unsigned int nr_events = 0;
+- int ret = 0;
+-
+- /*
+- * We disallow the app entering submit/complete with polling, but we
+- * still need to lock the ring to prevent racing with polled issue
+- * that got punted to a workqueue.
+- */
+- mutex_lock(&ctx->uring_lock);
+- /*
+- * Don't enter poll loop if we already have events pending.
+- * If we do, we can potentially be spinning for commands that
+- * already triggered a CQE (eg in error).
+- */
+- if (test_bit(0, &ctx->check_cq_overflow))
+- __io_cqring_overflow_flush(ctx, false);
+- if (io_cqring_events(ctx))
+- goto out;
+- do {
+- /*
+- * If a submit got punted to a workqueue, we can have the
+- * application entering polling for a command before it gets
+- * issued. That app will hold the uring_lock for the duration
+- * of the poll right here, so we need to take a breather every
+- * now and then to ensure that the issue has a chance to add
+- * the poll to the issued list. Otherwise we can spin here
+- * forever, while the workqueue is stuck trying to acquire the
+- * very same mutex.
+- */
+- if (list_empty(&ctx->iopoll_list)) {
+- u32 tail = ctx->cached_cq_tail;
+-
+- mutex_unlock(&ctx->uring_lock);
+- io_run_task_work();
+- mutex_lock(&ctx->uring_lock);
+-
+- /* some requests don't go through iopoll_list */
+- if (tail != ctx->cached_cq_tail ||
+- list_empty(&ctx->iopoll_list))
+- break;
+- }
+- ret = io_do_iopoll(ctx, &nr_events, min);
+- } while (!ret && nr_events < min && !need_resched());
+-out:
+- mutex_unlock(&ctx->uring_lock);
+- return ret;
+-}
+-
+-static void kiocb_end_write(struct io_kiocb *req)
+-{
+- /*
+- * Tell lockdep we inherited freeze protection from submission
+- * thread.
+- */
+- if (req->flags & REQ_F_ISREG) {
+- struct super_block *sb = file_inode(req->file)->i_sb;
+-
+- __sb_writers_acquired(sb, SB_FREEZE_WRITE);
+- sb_end_write(sb);
+- }
+-}
+-
+-#ifdef CONFIG_BLOCK
+-static bool io_resubmit_prep(struct io_kiocb *req)
+-{
+- struct io_async_rw *rw = req->async_data;
+-
+- if (!rw)
+- return !io_req_prep_async(req);
+- iov_iter_restore(&rw->iter, &rw->iter_state);
+- return true;
+-}
+-
+-static bool io_rw_should_reissue(struct io_kiocb *req)
+-{
+- umode_t mode = file_inode(req->file)->i_mode;
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- if (!S_ISBLK(mode) && !S_ISREG(mode))
+- return false;
+- if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
+- !(ctx->flags & IORING_SETUP_IOPOLL)))
+- return false;
+- /*
+- * If ref is dying, we might be running poll reap from the exit work.
+- * Don't attempt to reissue from that path, just let it fail with
+- * -EAGAIN.
+- */
+- if (percpu_ref_is_dying(&ctx->refs))
+- return false;
+- /*
+- * Play it safe and assume not safe to re-import and reissue if we're
+- * not in the original thread group (or in task context).
+- */
+- if (!same_thread_group(req->task, current) || !in_task())
+- return false;
+- return true;
+-}
+-#else
+-static bool io_resubmit_prep(struct io_kiocb *req)
+-{
+- return false;
+-}
+-static bool io_rw_should_reissue(struct io_kiocb *req)
+-{
+- return false;
+-}
+-#endif
+-
+-static bool __io_complete_rw_common(struct io_kiocb *req, long res)
+-{
+- if (req->rw.kiocb.ki_flags & IOCB_WRITE)
+- kiocb_end_write(req);
+- if (res != req->result) {
+- if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
+- io_rw_should_reissue(req)) {
+- req->flags |= REQ_F_REISSUE;
+- return true;
+- }
+- req_set_fail(req);
+- req->result = res;
+- }
+- return false;
+-}
+-
+-static void io_req_task_complete(struct io_kiocb *req, bool *locked)
+-{
+- unsigned int cflags = io_put_rw_kbuf(req);
+- long res = req->result;
+-
+- if (*locked) {
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_submit_state *state = &ctx->submit_state;
+-
+- io_req_complete_state(req, res, cflags);
+- state->compl_reqs[state->compl_nr++] = req;
+- if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
+- io_submit_flush_completions(ctx);
+- } else {
+- io_req_complete_post(req, res, cflags);
+- }
+-}
+-
+-static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+- unsigned int issue_flags)
+-{
+- if (__io_complete_rw_common(req, res))
+- return;
+- __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
+-}
+-
+-static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+-{
+- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+-
+- if (__io_complete_rw_common(req, res))
+- return;
+- req->result = res;
+- req->io_task_work.func = io_req_task_complete;
+- io_req_task_work_add(req);
+-}
+-
+-static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
+-{
+- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+-
+- if (kiocb->ki_flags & IOCB_WRITE)
+- kiocb_end_write(req);
+- if (unlikely(res != req->result)) {
+- if (res == -EAGAIN && io_rw_should_reissue(req)) {
+- req->flags |= REQ_F_REISSUE;
+- return;
+- }
+- }
+-
+- WRITE_ONCE(req->result, res);
+- /* order with io_iopoll_complete() checking ->result */
+- smp_wmb();
+- WRITE_ONCE(req->iopoll_completed, 1);
+-}
+-
+-/*
+- * After the iocb has been issued, it's safe to be found on the poll list.
+- * Adding the kiocb to the list AFTER submission ensures that we don't
+- * find it from a io_do_iopoll() thread before the issuer is done
+- * accessing the kiocb cookie.
+- */
+-static void io_iopoll_req_issued(struct io_kiocb *req)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- const bool in_async = io_wq_current_is_worker();
+-
+- /* workqueue context doesn't hold uring_lock, grab it now */
+- if (unlikely(in_async))
+- mutex_lock(&ctx->uring_lock);
+-
+- /*
+- * Track whether we have multiple files in our lists. This will impact
+- * how we do polling eventually, not spinning if we're on potentially
+- * different devices.
+- */
+- if (list_empty(&ctx->iopoll_list)) {
+- ctx->poll_multi_queue = false;
+- } else if (!ctx->poll_multi_queue) {
+- struct io_kiocb *list_req;
+- unsigned int queue_num0, queue_num1;
+-
+- list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
+- inflight_entry);
+-
+- if (list_req->file != req->file) {
+- ctx->poll_multi_queue = true;
+- } else {
+- queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie);
+- queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie);
+- if (queue_num0 != queue_num1)
+- ctx->poll_multi_queue = true;
+- }
+- }
+-
+- /*
+- * For fast devices, IO may have already completed. If it has, add
+- * it to the front so we find it first.
+- */
+- if (READ_ONCE(req->iopoll_completed))
+- list_add(&req->inflight_entry, &ctx->iopoll_list);
+- else
+- list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
+-
+- if (unlikely(in_async)) {
+- /*
+- * If IORING_SETUP_SQPOLL is enabled, sqes are either handle
+- * in sq thread task context or in io worker task context. If
+- * current task context is sq thread, we don't need to check
+- * whether should wake up sq thread.
+- */
+- if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+- wq_has_sleeper(&ctx->sq_data->wait))
+- wake_up(&ctx->sq_data->wait);
+-
+- mutex_unlock(&ctx->uring_lock);
+- }
+-}
+-
+-static bool io_bdev_nowait(struct block_device *bdev)
+-{
+- return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
+-}
+-
+-/*
+- * If we tracked the file through the SCM inflight mechanism, we could support
+- * any file. For now, just ensure that anything potentially problematic is done
+- * inline.
+- */
+-static bool __io_file_supports_nowait(struct file *file, int rw)
+-{
+- umode_t mode = file_inode(file)->i_mode;
+-
+- if (S_ISBLK(mode)) {
+- if (IS_ENABLED(CONFIG_BLOCK) &&
+- io_bdev_nowait(I_BDEV(file->f_mapping->host)))
+- return true;
+- return false;
+- }
+- if (S_ISSOCK(mode))
+- return true;
+- if (S_ISREG(mode)) {
+- if (IS_ENABLED(CONFIG_BLOCK) &&
+- io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
+- file->f_op != &io_uring_fops)
+- return true;
+- return false;
+- }
+-
+- /* any ->read/write should understand O_NONBLOCK */
+- if (file->f_flags & O_NONBLOCK)
+- return true;
+-
+- if (!(file->f_mode & FMODE_NOWAIT))
+- return false;
+-
+- if (rw == READ)
+- return file->f_op->read_iter != NULL;
+-
+- return file->f_op->write_iter != NULL;
+-}
+-
+-static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
+-{
+- if (rw == READ && (req->flags & REQ_F_NOWAIT_READ))
+- return true;
+- else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE))
+- return true;
+-
+- return __io_file_supports_nowait(req->file, rw);
+-}
+-
+-static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+- int rw)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct kiocb *kiocb = &req->rw.kiocb;
+- struct file *file = req->file;
+- unsigned ioprio;
+- int ret;
+-
+- if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode))
+- req->flags |= REQ_F_ISREG;
+-
+- kiocb->ki_pos = READ_ONCE(sqe->off);
+- if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) {
+- req->flags |= REQ_F_CUR_POS;
+- kiocb->ki_pos = file->f_pos;
+- }
+- kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
+- kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
+- ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+- if (unlikely(ret))
+- return ret;
+-
+- /*
+- * If the file is marked O_NONBLOCK, still allow retry for it if it
+- * supports async. Otherwise it's impossible to use O_NONBLOCK files
+- * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
+- */
+- if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+- ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw)))
+- req->flags |= REQ_F_NOWAIT;
+-
+- ioprio = READ_ONCE(sqe->ioprio);
+- if (ioprio) {
+- ret = ioprio_check_cap(ioprio);
+- if (ret)
+- return ret;
+-
+- kiocb->ki_ioprio = ioprio;
+- } else
+- kiocb->ki_ioprio = get_current_ioprio();
+-
+- if (ctx->flags & IORING_SETUP_IOPOLL) {
+- if (!(kiocb->ki_flags & IOCB_DIRECT) ||
+- !kiocb->ki_filp->f_op->iopoll)
+- return -EOPNOTSUPP;
+-
+- kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
+- kiocb->ki_complete = io_complete_rw_iopoll;
+- req->iopoll_completed = 0;
+- } else {
+- if (kiocb->ki_flags & IOCB_HIPRI)
+- return -EINVAL;
+- kiocb->ki_complete = io_complete_rw;
+- }
+-
+- if (req->opcode == IORING_OP_READ_FIXED ||
+- req->opcode == IORING_OP_WRITE_FIXED) {
+- req->imu = NULL;
+- io_req_set_rsrc_node(req);
+- }
+-
+- req->rw.addr = READ_ONCE(sqe->addr);
+- req->rw.len = READ_ONCE(sqe->len);
+- req->buf_index = READ_ONCE(sqe->buf_index);
+- return 0;
+-}
+-
+-static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+-{
+- switch (ret) {
+- case -EIOCBQUEUED:
+- break;
+- case -ERESTARTSYS:
+- case -ERESTARTNOINTR:
+- case -ERESTARTNOHAND:
+- case -ERESTART_RESTARTBLOCK:
+- /*
+- * We can't just restart the syscall, since previously
+- * submitted sqes may already be in progress. Just fail this
+- * IO with EINTR.
+- */
+- ret = -EINTR;
+- fallthrough;
+- default:
+- kiocb->ki_complete(kiocb, ret, 0);
+- }
+-}
+-
+-static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
+- unsigned int issue_flags)
+-{
+- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+- struct io_async_rw *io = req->async_data;
+-
+- /* add previously done IO, if any */
+- if (io && io->bytes_done > 0) {
+- if (ret < 0)
+- ret = io->bytes_done;
+- else
+- ret += io->bytes_done;
+- }
+-
+- if (req->flags & REQ_F_CUR_POS)
+- req->file->f_pos = kiocb->ki_pos;
+- if (ret >= 0 && (kiocb->ki_complete == io_complete_rw))
+- __io_complete_rw(req, ret, 0, issue_flags);
+- else
+- io_rw_done(kiocb, ret);
+-
+- if (req->flags & REQ_F_REISSUE) {
+- req->flags &= ~REQ_F_REISSUE;
+- if (io_resubmit_prep(req)) {
+- io_req_task_queue_reissue(req);
+- } else {
+- unsigned int cflags = io_put_rw_kbuf(req);
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- req_set_fail(req);
+- if (!(issue_flags & IO_URING_F_NONBLOCK)) {
+- mutex_lock(&ctx->uring_lock);
+- __io_req_complete(req, issue_flags, ret, cflags);
+- mutex_unlock(&ctx->uring_lock);
+- } else {
+- __io_req_complete(req, issue_flags, ret, cflags);
+- }
+- }
+- }
+-}
+-
+-static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
+- struct io_mapped_ubuf *imu)
+-{
+- size_t len = req->rw.len;
+- u64 buf_end, buf_addr = req->rw.addr;
+- size_t offset;
+-
+- if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
+- return -EFAULT;
+- /* not inside the mapped region */
+- if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end))
+- return -EFAULT;
+-
+- /*
+- * May not be a start of buffer, set size appropriately
+- * and advance us to the beginning.
+- */
+- offset = buf_addr - imu->ubuf;
+- iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
+-
+- if (offset) {
+- /*
+- * Don't use iov_iter_advance() here, as it's really slow for
+- * using the latter parts of a big fixed buffer - it iterates
+- * over each segment manually. We can cheat a bit here, because
+- * we know that:
+- *
+- * 1) it's a BVEC iter, we set it up
+- * 2) all bvecs are PAGE_SIZE in size, except potentially the
+- * first and last bvec
+- *
+- * So just find our index, and adjust the iterator afterwards.
+- * If the offset is within the first bvec (or the whole first
+- * bvec, just use iov_iter_advance(). This makes it easier
+- * since we can just skip the first segment, which may not
+- * be PAGE_SIZE aligned.
+- */
+- const struct bio_vec *bvec = imu->bvec;
+-
+- if (offset <= bvec->bv_len) {
+- iov_iter_advance(iter, offset);
+- } else {
+- unsigned long seg_skip;
+-
+- /* skip first vec */
+- offset -= bvec->bv_len;
+- seg_skip = 1 + (offset >> PAGE_SHIFT);
+-
+- iter->bvec = bvec + seg_skip;
+- iter->nr_segs -= seg_skip;
+- iter->count -= bvec->bv_len + offset;
+- iter->iov_offset = offset & ~PAGE_MASK;
+- }
+- }
+-
+- return 0;
+-}
+-
+-static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_mapped_ubuf *imu = req->imu;
+- u16 index, buf_index = req->buf_index;
+-
+- if (likely(!imu)) {
+- if (unlikely(buf_index >= ctx->nr_user_bufs))
+- return -EFAULT;
+- index = array_index_nospec(buf_index, ctx->nr_user_bufs);
+- imu = READ_ONCE(ctx->user_bufs[index]);
+- req->imu = imu;
+- }
+- return __io_import_fixed(req, rw, iter, imu);
+-}
+-
+-static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
+-{
+- if (needs_lock)
+- mutex_unlock(&ctx->uring_lock);
+-}
+-
+-static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
+-{
+- /*
+- * "Normal" inline submissions always hold the uring_lock, since we
+- * grab it from the system call. Same is true for the SQPOLL offload.
+- * The only exception is when we've detached the request and issue it
+- * from an async worker thread, grab the lock for that case.
+- */
+- if (needs_lock)
+- mutex_lock(&ctx->uring_lock);
+-}
+-
+-static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
+- int bgid, struct io_buffer *kbuf,
+- bool needs_lock)
+-{
+- struct io_buffer *head;
+-
+- if (req->flags & REQ_F_BUFFER_SELECTED)
+- return kbuf;
+-
+- io_ring_submit_lock(req->ctx, needs_lock);
+-
+- lockdep_assert_held(&req->ctx->uring_lock);
+-
+- head = xa_load(&req->ctx->io_buffers, bgid);
+- if (head) {
+- if (!list_empty(&head->list)) {
+- kbuf = list_last_entry(&head->list, struct io_buffer,
+- list);
+- list_del(&kbuf->list);
+- } else {
+- kbuf = head;
+- xa_erase(&req->ctx->io_buffers, bgid);
+- }
+- if (*len > kbuf->len)
+- *len = kbuf->len;
+- } else {
+- kbuf = ERR_PTR(-ENOBUFS);
+- }
+-
+- io_ring_submit_unlock(req->ctx, needs_lock);
+-
+- return kbuf;
+-}
+-
+-static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
+- bool needs_lock)
+-{
+- struct io_buffer *kbuf;
+- u16 bgid;
+-
+- kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+- bgid = req->buf_index;
+- kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock);
+- if (IS_ERR(kbuf))
+- return kbuf;
+- req->rw.addr = (u64) (unsigned long) kbuf;
+- req->flags |= REQ_F_BUFFER_SELECTED;
+- return u64_to_user_ptr(kbuf->addr);
+-}
+-
+-#ifdef CONFIG_COMPAT
+-static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
+- bool needs_lock)
+-{
+- struct compat_iovec __user *uiov;
+- compat_ssize_t clen;
+- void __user *buf;
+- ssize_t len;
+-
+- uiov = u64_to_user_ptr(req->rw.addr);
+- if (!access_ok(uiov, sizeof(*uiov)))
+- return -EFAULT;
+- if (__get_user(clen, &uiov->iov_len))
+- return -EFAULT;
+- if (clen < 0)
+- return -EINVAL;
+-
+- len = clen;
+- buf = io_rw_buffer_select(req, &len, needs_lock);
+- if (IS_ERR(buf))
+- return PTR_ERR(buf);
+- iov[0].iov_base = buf;
+- iov[0].iov_len = (compat_size_t) len;
+- return 0;
+-}
+-#endif
+-
+-static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
+- bool needs_lock)
+-{
+- struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr);
+- void __user *buf;
+- ssize_t len;
+-
+- if (copy_from_user(iov, uiov, sizeof(*uiov)))
+- return -EFAULT;
+-
+- len = iov[0].iov_len;
+- if (len < 0)
+- return -EINVAL;
+- buf = io_rw_buffer_select(req, &len, needs_lock);
+- if (IS_ERR(buf))
+- return PTR_ERR(buf);
+- iov[0].iov_base = buf;
+- iov[0].iov_len = len;
+- return 0;
+-}
+-
+-static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
+- bool needs_lock)
+-{
+- if (req->flags & REQ_F_BUFFER_SELECTED) {
+- struct io_buffer *kbuf;
+-
+- kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+- iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+- iov[0].iov_len = kbuf->len;
+- return 0;
+- }
+- if (req->rw.len != 1)
+- return -EINVAL;
+-
+-#ifdef CONFIG_COMPAT
+- if (req->ctx->compat)
+- return io_compat_import(req, iov, needs_lock);
+-#endif
+-
+- return __io_iov_buffer_select(req, iov, needs_lock);
+-}
+-
+-static int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec,
+- struct iov_iter *iter, bool needs_lock)
+-{
+- void __user *buf = u64_to_user_ptr(req->rw.addr);
+- size_t sqe_len = req->rw.len;
+- u8 opcode = req->opcode;
+- ssize_t ret;
+-
+- if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
+- *iovec = NULL;
+- return io_import_fixed(req, rw, iter);
+- }
+-
+- /* buffer index only valid with fixed read/write, or buffer select */
+- if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT))
+- return -EINVAL;
+-
+- if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
+- if (req->flags & REQ_F_BUFFER_SELECT) {
+- buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
+- if (IS_ERR(buf))
+- return PTR_ERR(buf);
+- req->rw.len = sqe_len;
+- }
+-
+- ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
+- *iovec = NULL;
+- return ret;
+- }
+-
+- if (req->flags & REQ_F_BUFFER_SELECT) {
+- ret = io_iov_buffer_select(req, *iovec, needs_lock);
+- if (!ret)
+- iov_iter_init(iter, rw, *iovec, 1, (*iovec)->iov_len);
+- *iovec = NULL;
+- return ret;
+- }
+-
+- return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter,
+- req->ctx->compat);
+-}
+-
+-static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
+-{
+- return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos;
+-}
+-
+-/*
+- * For files that don't have ->read_iter() and ->write_iter(), handle them
+- * by looping over ->read() or ->write() manually.
+- */
+-static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
+-{
+- struct kiocb *kiocb = &req->rw.kiocb;
+- struct file *file = req->file;
+- ssize_t ret = 0;
+-
+- /*
+- * Don't support polled IO through this interface, and we can't
+- * support non-blocking either. For the latter, this just causes
+- * the kiocb to be handled from an async context.
+- */
+- if (kiocb->ki_flags & IOCB_HIPRI)
+- return -EOPNOTSUPP;
+- if (kiocb->ki_flags & IOCB_NOWAIT)
+- return -EAGAIN;
+-
+- while (iov_iter_count(iter)) {
+- struct iovec iovec;
+- ssize_t nr;
+-
+- if (!iov_iter_is_bvec(iter)) {
+- iovec = iov_iter_iovec(iter);
+- } else {
+- iovec.iov_base = u64_to_user_ptr(req->rw.addr);
+- iovec.iov_len = req->rw.len;
+- }
+-
+- if (rw == READ) {
+- nr = file->f_op->read(file, iovec.iov_base,
+- iovec.iov_len, io_kiocb_ppos(kiocb));
+- } else {
+- nr = file->f_op->write(file, iovec.iov_base,
+- iovec.iov_len, io_kiocb_ppos(kiocb));
+- }
+-
+- if (nr < 0) {
+- if (!ret)
+- ret = nr;
+- break;
+- }
+- if (!iov_iter_is_bvec(iter)) {
+- iov_iter_advance(iter, nr);
+- } else {
+- req->rw.len -= nr;
+- req->rw.addr += nr;
+- }
+- ret += nr;
+- if (nr != iovec.iov_len)
+- break;
+- }
+-
+- return ret;
+-}
+-
+-static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
+- const struct iovec *fast_iov, struct iov_iter *iter)
+-{
+- struct io_async_rw *rw = req->async_data;
+-
+- memcpy(&rw->iter, iter, sizeof(*iter));
+- rw->free_iovec = iovec;
+- rw->bytes_done = 0;
+- /* can only be fixed buffers, no need to do anything */
+- if (iov_iter_is_bvec(iter))
+- return;
+- if (!iovec) {
+- unsigned iov_off = 0;
+-
+- rw->iter.iov = rw->fast_iov;
+- if (iter->iov != fast_iov) {
+- iov_off = iter->iov - fast_iov;
+- rw->iter.iov += iov_off;
+- }
+- if (rw->fast_iov != fast_iov)
+- memcpy(rw->fast_iov + iov_off, fast_iov + iov_off,
+- sizeof(struct iovec) * iter->nr_segs);
+- } else {
+- req->flags |= REQ_F_NEED_CLEANUP;
+- }
+-}
+-
+-static inline int io_alloc_async_data(struct io_kiocb *req)
+-{
+- WARN_ON_ONCE(!io_op_defs[req->opcode].async_size);
+- req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL);
+- return req->async_data == NULL;
+-}
+-
+-static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
+- const struct iovec *fast_iov,
+- struct iov_iter *iter, bool force)
+-{
+- if (!force && !io_op_defs[req->opcode].needs_async_setup)
+- return 0;
+- if (!req->async_data) {
+- struct io_async_rw *iorw;
+-
+- if (io_alloc_async_data(req)) {
+- kfree(iovec);
+- return -ENOMEM;
+- }
+-
+- io_req_map_rw(req, iovec, fast_iov, iter);
+- iorw = req->async_data;
+- /* we've copied and mapped the iter, ensure state is saved */
+- iov_iter_save_state(&iorw->iter, &iorw->iter_state);
+- }
+- return 0;
+-}
+-
+-static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
+-{
+- struct io_async_rw *iorw = req->async_data;
+- struct iovec *iov = iorw->fast_iov;
+- int ret;
+-
+- ret = io_import_iovec(rw, req, &iov, &iorw->iter, false);
+- if (unlikely(ret < 0))
+- return ret;
+-
+- iorw->bytes_done = 0;
+- iorw->free_iovec = iov;
+- if (iov)
+- req->flags |= REQ_F_NEED_CLEANUP;
+- iov_iter_save_state(&iorw->iter, &iorw->iter_state);
+- return 0;
+-}
+-
+-static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- if (unlikely(!(req->file->f_mode & FMODE_READ)))
+- return -EBADF;
+- return io_prep_rw(req, sqe, READ);
+-}
+-
+-/*
+- * This is our waitqueue callback handler, registered through lock_page_async()
+- * when we initially tried to do the IO with the iocb armed our waitqueue.
+- * This gets called when the page is unlocked, and we generally expect that to
+- * happen when the page IO is completed and the page is now uptodate. This will
+- * queue a task_work based retry of the operation, attempting to copy the data
+- * again. If the latter fails because the page was NOT uptodate, then we will
+- * do a thread based blocking retry of the operation. That's the unexpected
+- * slow path.
+- */
+-static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
+- int sync, void *arg)
+-{
+- struct wait_page_queue *wpq;
+- struct io_kiocb *req = wait->private;
+- struct wait_page_key *key = arg;
+-
+- wpq = container_of(wait, struct wait_page_queue, wait);
+-
+- if (!wake_page_match(wpq, key))
+- return 0;
+-
+- req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
+- list_del_init(&wait->entry);
+- io_req_task_queue(req);
+- return 1;
+-}
+-
+-/*
+- * This controls whether a given IO request should be armed for async page
+- * based retry. If we return false here, the request is handed to the async
+- * worker threads for retry. If we're doing buffered reads on a regular file,
+- * we prepare a private wait_page_queue entry and retry the operation. This
+- * will either succeed because the page is now uptodate and unlocked, or it
+- * will register a callback when the page is unlocked at IO completion. Through
+- * that callback, io_uring uses task_work to setup a retry of the operation.
+- * That retry will attempt the buffered read again. The retry will generally
+- * succeed, or in rare cases where it fails, we then fall back to using the
+- * async worker threads for a blocking retry.
+- */
+-static bool io_rw_should_retry(struct io_kiocb *req)
+-{
+- struct io_async_rw *rw = req->async_data;
+- struct wait_page_queue *wait = &rw->wpq;
+- struct kiocb *kiocb = &req->rw.kiocb;
+-
+- /* never retry for NOWAIT, we just complete with -EAGAIN */
+- if (req->flags & REQ_F_NOWAIT)
+- return false;
+-
+- /* Only for buffered IO */
+- if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
+- return false;
+-
+- /*
+- * just use poll if we can, and don't attempt if the fs doesn't
+- * support callback based unlocks
+- */
+- if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
+- return false;
+-
+- wait->wait.func = io_async_buf_func;
+- wait->wait.private = req;
+- wait->wait.flags = 0;
+- INIT_LIST_HEAD(&wait->wait.entry);
+- kiocb->ki_flags |= IOCB_WAITQ;
+- kiocb->ki_flags &= ~IOCB_NOWAIT;
+- kiocb->ki_waitq = wait;
+- return true;
+-}
+-
+-static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
+-{
+- if (req->file->f_op->read_iter)
+- return call_read_iter(req->file, &req->rw.kiocb, iter);
+- else if (req->file->f_op->read)
+- return loop_rw_iter(READ, req, iter);
+- else
+- return -EINVAL;
+-}
+-
+-static bool need_read_all(struct io_kiocb *req)
+-{
+- return req->flags & REQ_F_ISREG ||
+- S_ISBLK(file_inode(req->file)->i_mode);
+-}
+-
+-static int io_read(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+- struct kiocb *kiocb = &req->rw.kiocb;
+- struct iov_iter __iter, *iter = &__iter;
+- struct io_async_rw *rw = req->async_data;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+- struct iov_iter_state __state, *state;
+- ssize_t ret, ret2;
+-
+- if (rw) {
+- iter = &rw->iter;
+- state = &rw->iter_state;
+- /*
+- * We come here from an earlier attempt, restore our state to
+- * match in case it doesn't. It's cheap enough that we don't
+- * need to make this conditional.
+- */
+- iov_iter_restore(iter, state);
+- iovec = NULL;
+- } else {
+- ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
+- if (ret < 0)
+- return ret;
+- state = &__state;
+- iov_iter_save_state(iter, state);
+- }
+- req->result = iov_iter_count(iter);
+-
+- /* Ensure we clear previously set non-block flag */
+- if (!force_nonblock)
+- kiocb->ki_flags &= ~IOCB_NOWAIT;
+- else
+- kiocb->ki_flags |= IOCB_NOWAIT;
+-
+- /* If the file doesn't support async, just async punt */
+- if (force_nonblock && !io_file_supports_nowait(req, READ)) {
+- ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
+- return ret ?: -EAGAIN;
+- }
+-
+- ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
+- if (unlikely(ret)) {
+- kfree(iovec);
+- return ret;
+- }
+-
+- ret = io_iter_do_read(req, iter);
+-
+- if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
+- req->flags &= ~REQ_F_REISSUE;
+- /* IOPOLL retry should happen for io-wq threads */
+- if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
+- goto done;
+- /* no retry on NONBLOCK nor RWF_NOWAIT */
+- if (req->flags & REQ_F_NOWAIT)
+- goto done;
+- ret = 0;
+- } else if (ret == -EIOCBQUEUED) {
+- goto out_free;
+- } else if (ret <= 0 || ret == req->result || !force_nonblock ||
+- (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
+- /* read all, failed, already did sync or don't want to retry */
+- goto done;
+- }
+-
+- /*
+- * Don't depend on the iter state matching what was consumed, or being
+- * untouched in case of error. Restore it and we'll advance it
+- * manually if we need to.
+- */
+- iov_iter_restore(iter, state);
+-
+- ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
+- if (ret2)
+- return ret2;
+-
+- iovec = NULL;
+- rw = req->async_data;
+- /*
+- * Now use our persistent iterator and state, if we aren't already.
+- * We've restored and mapped the iter to match.
+- */
+- if (iter != &rw->iter) {
+- iter = &rw->iter;
+- state = &rw->iter_state;
+- }
+-
+- do {
+- /*
+- * We end up here because of a partial read, either from
+- * above or inside this loop. Advance the iter by the bytes
+- * that were consumed.
+- */
+- iov_iter_advance(iter, ret);
+- if (!iov_iter_count(iter))
+- break;
+- rw->bytes_done += ret;
+- iov_iter_save_state(iter, state);
+-
+- /* if we can retry, do so with the callbacks armed */
+- if (!io_rw_should_retry(req)) {
+- kiocb->ki_flags &= ~IOCB_WAITQ;
+- return -EAGAIN;
+- }
+-
+- /*
+- * Now retry read with the IOCB_WAITQ parts set in the iocb. If
+- * we get -EIOCBQUEUED, then we'll get a notification when the
+- * desired page gets unlocked. We can also get a partial read
+- * here, and if we do, then just retry at the new offset.
+- */
+- ret = io_iter_do_read(req, iter);
+- if (ret == -EIOCBQUEUED)
+- return 0;
+- /* we got some bytes, but not all. retry. */
+- kiocb->ki_flags &= ~IOCB_WAITQ;
+- iov_iter_restore(iter, state);
+- } while (ret > 0);
+-done:
+- kiocb_done(kiocb, ret, issue_flags);
+-out_free:
+- /* it's faster to check here then delegate to kfree */
+- if (iovec)
+- kfree(iovec);
+- return 0;
+-}
+-
+-static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
+- return -EBADF;
+- return io_prep_rw(req, sqe, WRITE);
+-}
+-
+-static int io_write(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+- struct kiocb *kiocb = &req->rw.kiocb;
+- struct iov_iter __iter, *iter = &__iter;
+- struct io_async_rw *rw = req->async_data;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+- struct iov_iter_state __state, *state;
+- ssize_t ret, ret2;
+-
+- if (rw) {
+- iter = &rw->iter;
+- state = &rw->iter_state;
+- iov_iter_restore(iter, state);
+- iovec = NULL;
+- } else {
+- ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
+- if (ret < 0)
+- return ret;
+- state = &__state;
+- iov_iter_save_state(iter, state);
+- }
+- req->result = iov_iter_count(iter);
+-
+- /* Ensure we clear previously set non-block flag */
+- if (!force_nonblock)
+- kiocb->ki_flags &= ~IOCB_NOWAIT;
+- else
+- kiocb->ki_flags |= IOCB_NOWAIT;
+-
+- /* If the file doesn't support async, just async punt */
+- if (force_nonblock && !io_file_supports_nowait(req, WRITE))
+- goto copy_iov;
+-
+- /* file path doesn't support NOWAIT for non-direct_IO */
+- if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
+- (req->flags & REQ_F_ISREG))
+- goto copy_iov;
+-
+- ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
+- if (unlikely(ret))
+- goto out_free;
+-
+- /*
+- * Open-code file_start_write here to grab freeze protection,
+- * which will be released by another thread in
+- * io_complete_rw(). Fool lockdep by telling it the lock got
+- * released so that it doesn't complain about the held lock when
+- * we return to userspace.
+- */
+- if (req->flags & REQ_F_ISREG) {
+- sb_start_write(file_inode(req->file)->i_sb);
+- __sb_writers_release(file_inode(req->file)->i_sb,
+- SB_FREEZE_WRITE);
+- }
+- kiocb->ki_flags |= IOCB_WRITE;
+-
+- if (req->file->f_op->write_iter)
+- ret2 = call_write_iter(req->file, kiocb, iter);
+- else if (req->file->f_op->write)
+- ret2 = loop_rw_iter(WRITE, req, iter);
+- else
+- ret2 = -EINVAL;
+-
+- if (req->flags & REQ_F_REISSUE) {
+- req->flags &= ~REQ_F_REISSUE;
+- ret2 = -EAGAIN;
+- }
+-
+- /*
+- * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
+- * retry them without IOCB_NOWAIT.
+- */
+- if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
+- ret2 = -EAGAIN;
+- /* no retry on NONBLOCK nor RWF_NOWAIT */
+- if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
+- goto done;
+- if (!force_nonblock || ret2 != -EAGAIN) {
+- /* IOPOLL retry should happen for io-wq threads */
+- if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
+- goto copy_iov;
+-done:
+- kiocb_done(kiocb, ret2, issue_flags);
+- } else {
+-copy_iov:
+- iov_iter_restore(iter, state);
+- ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
+- return ret ?: -EAGAIN;
+- }
+-out_free:
+- /* it's reportedly faster than delegating the null check to kfree() */
+- if (iovec)
+- kfree(iovec);
+- return ret;
+-}
+-
+-static int io_renameat_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_rename *ren = &req->rename;
+- const char __user *oldf, *newf;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->flags & REQ_F_FIXED_FILE))
+- return -EBADF;
+-
+- ren->old_dfd = READ_ONCE(sqe->fd);
+- oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+- ren->new_dfd = READ_ONCE(sqe->len);
+- ren->flags = READ_ONCE(sqe->rename_flags);
+-
+- ren->oldpath = getname(oldf);
+- if (IS_ERR(ren->oldpath))
+- return PTR_ERR(ren->oldpath);
+-
+- ren->newpath = getname(newf);
+- if (IS_ERR(ren->newpath)) {
+- putname(ren->oldpath);
+- return PTR_ERR(ren->newpath);
+- }
+-
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return 0;
+-}
+-
+-static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_rename *ren = &req->rename;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd,
+- ren->newpath, ren->flags);
+-
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_unlinkat_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_unlink *un = &req->unlink;
+- const char __user *fname;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->flags & REQ_F_FIXED_FILE))
+- return -EBADF;
+-
+- un->dfd = READ_ONCE(sqe->fd);
+-
+- un->flags = READ_ONCE(sqe->unlink_flags);
+- if (un->flags & ~AT_REMOVEDIR)
+- return -EINVAL;
+-
+- fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- un->filename = getname(fname);
+- if (IS_ERR(un->filename))
+- return PTR_ERR(un->filename);
+-
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return 0;
+-}
+-
+-static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_unlink *un = &req->unlink;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- if (un->flags & AT_REMOVEDIR)
+- ret = do_rmdir(un->dfd, un->filename);
+- else
+- ret = do_unlinkat(un->dfd, un->filename);
+-
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_mkdirat_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_mkdir *mkd = &req->mkdir;
+- const char __user *fname;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->flags & REQ_F_FIXED_FILE))
+- return -EBADF;
+-
+- mkd->dfd = READ_ONCE(sqe->fd);
+- mkd->mode = READ_ONCE(sqe->len);
+-
+- fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- mkd->filename = getname(fname);
+- if (IS_ERR(mkd->filename))
+- return PTR_ERR(mkd->filename);
+-
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return 0;
+-}
+-
+-static int io_mkdirat(struct io_kiocb *req, int issue_flags)
+-{
+- struct io_mkdir *mkd = &req->mkdir;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
+-
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_symlinkat_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_symlink *sl = &req->symlink;
+- const char __user *oldpath, *newpath;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->flags & REQ_F_FIXED_FILE))
+- return -EBADF;
+-
+- sl->new_dfd = READ_ONCE(sqe->fd);
+- oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+-
+- sl->oldpath = getname(oldpath);
+- if (IS_ERR(sl->oldpath))
+- return PTR_ERR(sl->oldpath);
+-
+- sl->newpath = getname(newpath);
+- if (IS_ERR(sl->newpath)) {
+- putname(sl->oldpath);
+- return PTR_ERR(sl->newpath);
+- }
+-
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return 0;
+-}
+-
+-static int io_symlinkat(struct io_kiocb *req, int issue_flags)
+-{
+- struct io_symlink *sl = &req->symlink;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
+-
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_linkat_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_hardlink *lnk = &req->hardlink;
+- const char __user *oldf, *newf;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->flags & REQ_F_FIXED_FILE))
+- return -EBADF;
+-
+- lnk->old_dfd = READ_ONCE(sqe->fd);
+- lnk->new_dfd = READ_ONCE(sqe->len);
+- oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+- lnk->flags = READ_ONCE(sqe->hardlink_flags);
+-
+- lnk->oldpath = getname(oldf);
+- if (IS_ERR(lnk->oldpath))
+- return PTR_ERR(lnk->oldpath);
+-
+- lnk->newpath = getname(newf);
+- if (IS_ERR(lnk->newpath)) {
+- putname(lnk->oldpath);
+- return PTR_ERR(lnk->newpath);
+- }
+-
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return 0;
+-}
+-
+-static int io_linkat(struct io_kiocb *req, int issue_flags)
+-{
+- struct io_hardlink *lnk = &req->hardlink;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
+- lnk->newpath, lnk->flags);
+-
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_shutdown_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+-#if defined(CONFIG_NET)
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+- sqe->buf_index || sqe->splice_fd_in))
+- return -EINVAL;
+-
+- req->shutdown.how = READ_ONCE(sqe->len);
+- return 0;
+-#else
+- return -EOPNOTSUPP;
+-#endif
+-}
+-
+-static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
+-{
+-#if defined(CONFIG_NET)
+- struct socket *sock;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- sock = sock_from_file(req->file);
+- if (unlikely(!sock))
+- return -ENOTSOCK;
+-
+- ret = __sys_shutdown_sock(sock, req->shutdown.how);
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-#else
+- return -EOPNOTSUPP;
+-#endif
+-}
+-
+-static int __io_splice_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_splice *sp = &req->splice;
+- unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- sp->file_in = NULL;
+- sp->len = READ_ONCE(sqe->len);
+- sp->flags = READ_ONCE(sqe->splice_flags);
+-
+- if (unlikely(sp->flags & ~valid_flags))
+- return -EINVAL;
+-
+- sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in),
+- (sp->flags & SPLICE_F_FD_IN_FIXED));
+- if (!sp->file_in)
+- return -EBADF;
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return 0;
+-}
+-
+-static int io_tee_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off))
+- return -EINVAL;
+- return __io_splice_prep(req, sqe);
+-}
+-
+-static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_splice *sp = &req->splice;
+- struct file *in = sp->file_in;
+- struct file *out = sp->file_out;
+- unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+- long ret = 0;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+- if (sp->len)
+- ret = do_tee(in, out, sp->len, flags);
+-
+- if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
+- io_put_file(in);
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+-
+- if (ret != sp->len)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_splice *sp = &req->splice;
+-
+- sp->off_in = READ_ONCE(sqe->splice_off_in);
+- sp->off_out = READ_ONCE(sqe->off);
+- return __io_splice_prep(req, sqe);
+-}
+-
+-static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_splice *sp = &req->splice;
+- struct file *in = sp->file_in;
+- struct file *out = sp->file_out;
+- unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
+- loff_t *poff_in, *poff_out;
+- long ret = 0;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
+- poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
+-
+- if (sp->len)
+- ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
+-
+- if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
+- io_put_file(in);
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+-
+- if (ret != sp->len)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-/*
+- * IORING_OP_NOP just posts a completion event, nothing else.
+- */
+-static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- __io_req_complete(req, issue_flags, 0, 0);
+- return 0;
+-}
+-
+-static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- if (!req->file)
+- return -EBADF;
+-
+- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+- sqe->splice_fd_in))
+- return -EINVAL;
+-
+- req->sync.flags = READ_ONCE(sqe->fsync_flags);
+- if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC))
+- return -EINVAL;
+-
+- req->sync.off = READ_ONCE(sqe->off);
+- req->sync.len = READ_ONCE(sqe->len);
+- return 0;
+-}
+-
+-static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- loff_t end = req->sync.off + req->sync.len;
+- int ret;
+-
+- /* fsync always requires a blocking context */
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = vfs_fsync_range(req->file, req->sync.off,
+- end > 0 ? end : LLONG_MAX,
+- req->sync.flags & IORING_FSYNC_DATASYNC);
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_fallocate_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- req->sync.off = READ_ONCE(sqe->off);
+- req->sync.len = READ_ONCE(sqe->addr);
+- req->sync.mode = READ_ONCE(sqe->len);
+- return 0;
+-}
+-
+-static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- int ret;
+-
+- /* fallocate always requiring blocking context */
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+- ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
+- req->sync.len);
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- const char __user *fname;
+- int ret;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (unlikely(sqe->ioprio || sqe->buf_index))
+- return -EINVAL;
+- if (unlikely(req->flags & REQ_F_FIXED_FILE))
+- return -EBADF;
+-
+- /* open.how should be already initialised */
+- if (!(req->open.how.flags & O_PATH) && force_o_largefile())
+- req->open.how.flags |= O_LARGEFILE;
+-
+- req->open.dfd = READ_ONCE(sqe->fd);
+- fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- req->open.filename = getname(fname);
+- if (IS_ERR(req->open.filename)) {
+- ret = PTR_ERR(req->open.filename);
+- req->open.filename = NULL;
+- return ret;
+- }
+-
+- req->open.file_slot = READ_ONCE(sqe->file_index);
+- if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC))
+- return -EINVAL;
+-
+- req->open.nofile = rlimit(RLIMIT_NOFILE);
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return 0;
+-}
+-
+-static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- u64 mode = READ_ONCE(sqe->len);
+- u64 flags = READ_ONCE(sqe->open_flags);
+-
+- req->open.how = build_open_how(flags, mode);
+- return __io_openat_prep(req, sqe);
+-}
+-
+-static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct open_how __user *how;
+- size_t len;
+- int ret;
+-
+- how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+- len = READ_ONCE(sqe->len);
+- if (len < OPEN_HOW_SIZE_VER0)
+- return -EINVAL;
+-
+- ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how,
+- len);
+- if (ret)
+- return ret;
+-
+- return __io_openat_prep(req, sqe);
+-}
+-
+-static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct open_flags op;
+- struct file *file;
+- bool resolve_nonblock, nonblock_set;
+- bool fixed = !!req->open.file_slot;
+- int ret;
+-
+- ret = build_open_flags(&req->open.how, &op);
+- if (ret)
+- goto err;
+- nonblock_set = op.open_flag & O_NONBLOCK;
+- resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED;
+- if (issue_flags & IO_URING_F_NONBLOCK) {
+- /*
+- * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
+- * it'll always -EAGAIN
+- */
+- if (req->open.how.flags & (O_TRUNC | O_CREAT | O_TMPFILE))
+- return -EAGAIN;
+- op.lookup_flags |= LOOKUP_CACHED;
+- op.open_flag |= O_NONBLOCK;
+- }
+-
+- if (!fixed) {
+- ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
+- if (ret < 0)
+- goto err;
+- }
+-
+- file = do_filp_open(req->open.dfd, req->open.filename, &op);
+- if (IS_ERR(file)) {
+- /*
+- * We could hang on to this 'fd' on retrying, but seems like
+- * marginal gain for something that is now known to be a slower
+- * path. So just put it, and we'll get a new one when we retry.
+- */
+- if (!fixed)
+- put_unused_fd(ret);
+-
+- ret = PTR_ERR(file);
+- /* only retry if RESOLVE_CACHED wasn't already set by application */
+- if (ret == -EAGAIN &&
+- (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK)))
+- return -EAGAIN;
+- goto err;
+- }
+-
+- if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
+- file->f_flags &= ~O_NONBLOCK;
+- fsnotify_open(file);
+-
+- if (!fixed)
+- fd_install(ret, file);
+- else
+- ret = io_install_fixed_file(req, file, issue_flags,
+- req->open.file_slot - 1);
+-err:
+- putname(req->open.filename);
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < 0)
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-
+-static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- return io_openat2(req, issue_flags);
+-}
+-
+-static int io_remove_buffers_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_provide_buf *p = &req->pbuf;
+- u64 tmp;
+-
+- if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+-
+- tmp = READ_ONCE(sqe->fd);
+- if (!tmp || tmp > USHRT_MAX)
+- return -EINVAL;
+-
+- memset(p, 0, sizeof(*p));
+- p->nbufs = tmp;
+- p->bgid = READ_ONCE(sqe->buf_group);
+- return 0;
+-}
+-
+-static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
+- int bgid, unsigned nbufs)
+-{
+- unsigned i = 0;
+-
+- /* shouldn't happen */
+- if (!nbufs)
+- return 0;
+-
+- /* the head kbuf is the list itself */
+- while (!list_empty(&buf->list)) {
+- struct io_buffer *nxt;
+-
+- nxt = list_first_entry(&buf->list, struct io_buffer, list);
+- list_del(&nxt->list);
+- kfree(nxt);
+- if (++i == nbufs)
+- return i;
+- }
+- i++;
+- kfree(buf);
+- xa_erase(&ctx->io_buffers, bgid);
+-
+- return i;
+-}
+-
+-static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_provide_buf *p = &req->pbuf;
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_buffer *head;
+- int ret = 0;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+-
+- io_ring_submit_lock(ctx, !force_nonblock);
+-
+- lockdep_assert_held(&ctx->uring_lock);
+-
+- ret = -ENOENT;
+- head = xa_load(&ctx->io_buffers, p->bgid);
+- if (head)
+- ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+- if (ret < 0)
+- req_set_fail(req);
+-
+- /* complete before unlock, IOPOLL may need the lock */
+- __io_req_complete(req, issue_flags, ret, 0);
+- io_ring_submit_unlock(ctx, !force_nonblock);
+- return 0;
+-}
+-
+-static int io_provide_buffers_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- unsigned long size, tmp_check;
+- struct io_provide_buf *p = &req->pbuf;
+- u64 tmp;
+-
+- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+- return -EINVAL;
+-
+- tmp = READ_ONCE(sqe->fd);
+- if (!tmp || tmp > USHRT_MAX)
+- return -E2BIG;
+- p->nbufs = tmp;
+- p->addr = READ_ONCE(sqe->addr);
+- p->len = READ_ONCE(sqe->len);
+-
+- if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
+- &size))
+- return -EOVERFLOW;
+- if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
+- return -EOVERFLOW;
+-
+- size = (unsigned long)p->len * p->nbufs;
+- if (!access_ok(u64_to_user_ptr(p->addr), size))
+- return -EFAULT;
+-
+- p->bgid = READ_ONCE(sqe->buf_group);
+- tmp = READ_ONCE(sqe->off);
+- if (tmp > USHRT_MAX)
+- return -E2BIG;
+- p->bid = tmp;
+- return 0;
+-}
+-
+-static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
+-{
+- struct io_buffer *buf;
+- u64 addr = pbuf->addr;
+- int i, bid = pbuf->bid;
+-
+- for (i = 0; i < pbuf->nbufs; i++) {
+- buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
+- if (!buf)
+- break;
+-
+- buf->addr = addr;
+- buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
+- buf->bid = bid;
+- addr += pbuf->len;
+- bid++;
+- if (!*head) {
+- INIT_LIST_HEAD(&buf->list);
+- *head = buf;
+- } else {
+- list_add_tail(&buf->list, &(*head)->list);
+- }
+- }
+-
+- return i ? i : -ENOMEM;
+-}
+-
+-static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_provide_buf *p = &req->pbuf;
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_buffer *head, *list;
+- int ret = 0;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+-
+- io_ring_submit_lock(ctx, !force_nonblock);
+-
+- lockdep_assert_held(&ctx->uring_lock);
+-
+- list = head = xa_load(&ctx->io_buffers, p->bgid);
+-
+- ret = io_add_buffers(p, &head);
+- if (ret >= 0 && !list) {
+- ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
+- if (ret < 0)
+- __io_remove_buffers(ctx, head, p->bgid, -1U);
+- }
+- if (ret < 0)
+- req_set_fail(req);
+- /* complete before unlock, IOPOLL may need the lock */
+- __io_req_complete(req, issue_flags, ret, 0);
+- io_ring_submit_unlock(ctx, !force_nonblock);
+- return 0;
+-}
+-
+-static int io_epoll_ctl_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+-#if defined(CONFIG_EPOLL)
+- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- req->epoll.epfd = READ_ONCE(sqe->fd);
+- req->epoll.op = READ_ONCE(sqe->len);
+- req->epoll.fd = READ_ONCE(sqe->off);
+-
+- if (ep_op_has_event(req->epoll.op)) {
+- struct epoll_event __user *ev;
+-
+- ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
+- return -EFAULT;
+- }
+-
+- return 0;
+-#else
+- return -EOPNOTSUPP;
+-#endif
+-}
+-
+-static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
+-{
+-#if defined(CONFIG_EPOLL)
+- struct io_epoll *ie = &req->epoll;
+- int ret;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+-
+- ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
+- if (force_nonblock && ret == -EAGAIN)
+- return -EAGAIN;
+-
+- if (ret < 0)
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-#else
+- return -EOPNOTSUPP;
+-#endif
+-}
+-
+-static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+-#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
+- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- req->madvise.addr = READ_ONCE(sqe->addr);
+- req->madvise.len = READ_ONCE(sqe->len);
+- req->madvise.advice = READ_ONCE(sqe->fadvise_advice);
+- return 0;
+-#else
+- return -EOPNOTSUPP;
+-#endif
+-}
+-
+-static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
+-{
+-#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
+- struct io_madvise *ma = &req->madvise;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-#else
+- return -EOPNOTSUPP;
+-#endif
+-}
+-
+-static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
+- return -EINVAL;
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- req->fadvise.offset = READ_ONCE(sqe->off);
+- req->fadvise.len = READ_ONCE(sqe->len);
+- req->fadvise.advice = READ_ONCE(sqe->fadvise_advice);
+- return 0;
+-}
+-
+-static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_fadvise *fa = &req->fadvise;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK) {
+- switch (fa->advice) {
+- case POSIX_FADV_NORMAL:
+- case POSIX_FADV_RANDOM:
+- case POSIX_FADV_SEQUENTIAL:
+- break;
+- default:
+- return -EAGAIN;
+- }
+- }
+-
+- ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
+- if (ret < 0)
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-
+-static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+- return -EINVAL;
+- if (req->flags & REQ_F_FIXED_FILE)
+- return -EBADF;
+-
+- req->statx.dfd = READ_ONCE(sqe->fd);
+- req->statx.mask = READ_ONCE(sqe->len);
+- req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+- req->statx.flags = READ_ONCE(sqe->statx_flags);
+-
+- return 0;
+-}
+-
+-static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_statx *ctx = &req->statx;
+- int ret;
+-
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
+- ctx->buffer);
+-
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
+- sqe->rw_flags || sqe->buf_index)
+- return -EINVAL;
+- if (req->flags & REQ_F_FIXED_FILE)
+- return -EBADF;
+-
+- req->close.fd = READ_ONCE(sqe->fd);
+- req->close.file_slot = READ_ONCE(sqe->file_index);
+- if (req->close.file_slot && req->close.fd)
+- return -EINVAL;
+-
+- return 0;
+-}
+-
+-static int io_close(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct files_struct *files = current->files;
+- struct io_close *close = &req->close;
+- struct fdtable *fdt;
+- struct file *file = NULL;
+- int ret = -EBADF;
+-
+- if (req->close.file_slot) {
+- ret = io_close_fixed(req, issue_flags);
+- goto err;
+- }
+-
+- spin_lock(&files->file_lock);
+- fdt = files_fdtable(files);
+- if (close->fd >= fdt->max_fds) {
+- spin_unlock(&files->file_lock);
+- goto err;
+- }
+- file = fdt->fd[close->fd];
+- if (!file || file->f_op == &io_uring_fops) {
+- spin_unlock(&files->file_lock);
+- file = NULL;
+- goto err;
+- }
+-
+- /* if the file has a flush method, be safe and punt to async */
+- if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) {
+- spin_unlock(&files->file_lock);
+- return -EAGAIN;
+- }
+-
+- ret = __close_fd_get_file(close->fd, &file);
+- spin_unlock(&files->file_lock);
+- if (ret < 0) {
+- if (ret == -ENOENT)
+- ret = -EBADF;
+- goto err;
+- }
+-
+- /* No ->flush() or already async, safely close from here */
+- ret = filp_close(file, current->files);
+-err:
+- if (ret < 0)
+- req_set_fail(req);
+- if (file)
+- fput(file);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-
+-static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+- sqe->splice_fd_in))
+- return -EINVAL;
+-
+- req->sync.off = READ_ONCE(sqe->off);
+- req->sync.len = READ_ONCE(sqe->len);
+- req->sync.flags = READ_ONCE(sqe->sync_range_flags);
+- return 0;
+-}
+-
+-static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- int ret;
+-
+- /* sync_file_range always requires a blocking context */
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- return -EAGAIN;
+-
+- ret = sync_file_range(req->file, req->sync.off, req->sync.len,
+- req->sync.flags);
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+-}
+-
+-#if defined(CONFIG_NET)
+-static int io_setup_async_msg(struct io_kiocb *req,
+- struct io_async_msghdr *kmsg)
+-{
+- struct io_async_msghdr *async_msg = req->async_data;
+-
+- if (async_msg)
+- return -EAGAIN;
+- if (io_alloc_async_data(req)) {
+- kfree(kmsg->free_iov);
+- return -ENOMEM;
+- }
+- async_msg = req->async_data;
+- req->flags |= REQ_F_NEED_CLEANUP;
+- memcpy(async_msg, kmsg, sizeof(*kmsg));
+- async_msg->msg.msg_name = &async_msg->addr;
+- /* if were using fast_iov, set it to the new one */
+- if (!async_msg->free_iov)
+- async_msg->msg.msg_iter.iov = async_msg->fast_iov;
+-
+- return -EAGAIN;
+-}
+-
+-static int io_sendmsg_copy_hdr(struct io_kiocb *req,
+- struct io_async_msghdr *iomsg)
+-{
+- iomsg->msg.msg_name = &iomsg->addr;
+- iomsg->free_iov = iomsg->fast_iov;
+- return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg,
+- req->sr_msg.msg_flags, &iomsg->free_iov);
+-}
+-
+-static int io_sendmsg_prep_async(struct io_kiocb *req)
+-{
+- int ret;
+-
+- ret = io_sendmsg_copy_hdr(req, req->async_data);
+- if (!ret)
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return ret;
+-}
+-
+-static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_sr_msg *sr = &req->sr_msg;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- sr->len = READ_ONCE(sqe->len);
+- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+- if (sr->msg_flags & MSG_DONTWAIT)
+- req->flags |= REQ_F_NOWAIT;
+-
+-#ifdef CONFIG_COMPAT
+- if (req->ctx->compat)
+- sr->msg_flags |= MSG_CMSG_COMPAT;
+-#endif
+- return 0;
+-}
+-
+-static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_async_msghdr iomsg, *kmsg;
+- struct socket *sock;
+- unsigned flags;
+- int min_ret = 0;
+- int ret;
+-
+- sock = sock_from_file(req->file);
+- if (unlikely(!sock))
+- return -ENOTSOCK;
+-
+- kmsg = req->async_data;
+- if (!kmsg) {
+- ret = io_sendmsg_copy_hdr(req, &iomsg);
+- if (ret)
+- return ret;
+- kmsg = &iomsg;
+- }
+-
+- flags = req->sr_msg.msg_flags;
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- flags |= MSG_DONTWAIT;
+- if (flags & MSG_WAITALL)
+- min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+-
+- ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+- if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
+- return io_setup_async_msg(req, kmsg);
+- if (ret == -ERESTARTSYS)
+- ret = -EINTR;
+-
+- /* fast path, check for non-NULL to avoid function call */
+- if (kmsg->free_iov)
+- kfree(kmsg->free_iov);
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < min_ret)
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-
+-static int io_send(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_sr_msg *sr = &req->sr_msg;
+- struct msghdr msg;
+- struct iovec iov;
+- struct socket *sock;
+- unsigned flags;
+- int min_ret = 0;
+- int ret;
+-
+- sock = sock_from_file(req->file);
+- if (unlikely(!sock))
+- return -ENOTSOCK;
+-
+- ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
+- if (unlikely(ret))
+- return ret;
+-
+- msg.msg_name = NULL;
+- msg.msg_control = NULL;
+- msg.msg_controllen = 0;
+- msg.msg_namelen = 0;
+-
+- flags = req->sr_msg.msg_flags;
+- if (issue_flags & IO_URING_F_NONBLOCK)
+- flags |= MSG_DONTWAIT;
+- if (flags & MSG_WAITALL)
+- min_ret = iov_iter_count(&msg.msg_iter);
+-
+- msg.msg_flags = flags;
+- ret = sock_sendmsg(sock, &msg);
+- if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
+- return -EAGAIN;
+- if (ret == -ERESTARTSYS)
+- ret = -EINTR;
+-
+- if (ret < min_ret)
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-
+-static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
+- struct io_async_msghdr *iomsg)
+-{
+- struct io_sr_msg *sr = &req->sr_msg;
+- struct iovec __user *uiov;
+- size_t iov_len;
+- int ret;
+-
+- ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
+- &iomsg->uaddr, &uiov, &iov_len);
+- if (ret)
+- return ret;
+-
+- if (req->flags & REQ_F_BUFFER_SELECT) {
+- if (iov_len > 1)
+- return -EINVAL;
+- if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
+- return -EFAULT;
+- sr->len = iomsg->fast_iov[0].iov_len;
+- iomsg->free_iov = NULL;
+- } else {
+- iomsg->free_iov = iomsg->fast_iov;
+- ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+- &iomsg->free_iov, &iomsg->msg.msg_iter,
+- false);
+- if (ret > 0)
+- ret = 0;
+- }
+-
+- return ret;
+-}
+-
+-#ifdef CONFIG_COMPAT
+-static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
+- struct io_async_msghdr *iomsg)
+-{
+- struct io_sr_msg *sr = &req->sr_msg;
+- struct compat_iovec __user *uiov;
+- compat_uptr_t ptr;
+- compat_size_t len;
+- int ret;
+-
+- ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
+- &ptr, &len);
+- if (ret)
+- return ret;
+-
+- uiov = compat_ptr(ptr);
+- if (req->flags & REQ_F_BUFFER_SELECT) {
+- compat_ssize_t clen;
+-
+- if (len > 1)
+- return -EINVAL;
+- if (!access_ok(uiov, sizeof(*uiov)))
+- return -EFAULT;
+- if (__get_user(clen, &uiov->iov_len))
+- return -EFAULT;
+- if (clen < 0)
+- return -EINVAL;
+- sr->len = clen;
+- iomsg->free_iov = NULL;
+- } else {
+- iomsg->free_iov = iomsg->fast_iov;
+- ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
+- UIO_FASTIOV, &iomsg->free_iov,
+- &iomsg->msg.msg_iter, true);
+- if (ret < 0)
+- return ret;
+- }
+-
+- return 0;
+-}
+-#endif
+-
+-static int io_recvmsg_copy_hdr(struct io_kiocb *req,
+- struct io_async_msghdr *iomsg)
+-{
+- iomsg->msg.msg_name = &iomsg->addr;
+-
+-#ifdef CONFIG_COMPAT
+- if (req->ctx->compat)
+- return __io_compat_recvmsg_copy_hdr(req, iomsg);
+-#endif
+-
+- return __io_recvmsg_copy_hdr(req, iomsg);
+-}
+-
+-static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
+- bool needs_lock)
+-{
+- struct io_sr_msg *sr = &req->sr_msg;
+- struct io_buffer *kbuf;
+-
+- kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock);
+- if (IS_ERR(kbuf))
+- return kbuf;
+-
+- sr->kbuf = kbuf;
+- req->flags |= REQ_F_BUFFER_SELECTED;
+- return kbuf;
+-}
+-
+-static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
+-{
+- return io_put_kbuf(req, req->sr_msg.kbuf);
+-}
+-
+-static int io_recvmsg_prep_async(struct io_kiocb *req)
+-{
+- int ret;
+-
+- ret = io_recvmsg_copy_hdr(req, req->async_data);
+- if (!ret)
+- req->flags |= REQ_F_NEED_CLEANUP;
+- return ret;
+-}
+-
+-static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_sr_msg *sr = &req->sr_msg;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+-
+- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- sr->len = READ_ONCE(sqe->len);
+- sr->bgid = READ_ONCE(sqe->buf_group);
+- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+- if (sr->msg_flags & MSG_DONTWAIT)
+- req->flags |= REQ_F_NOWAIT;
+-
+-#ifdef CONFIG_COMPAT
+- if (req->ctx->compat)
+- sr->msg_flags |= MSG_CMSG_COMPAT;
+-#endif
+- return 0;
+-}
+-
+-static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_async_msghdr iomsg, *kmsg;
+- struct socket *sock;
+- struct io_buffer *kbuf;
+- unsigned flags;
+- int min_ret = 0;
+- int ret, cflags = 0;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+-
+- sock = sock_from_file(req->file);
+- if (unlikely(!sock))
+- return -ENOTSOCK;
+-
+- kmsg = req->async_data;
+- if (!kmsg) {
+- ret = io_recvmsg_copy_hdr(req, &iomsg);
+- if (ret)
+- return ret;
+- kmsg = &iomsg;
+- }
+-
+- if (req->flags & REQ_F_BUFFER_SELECT) {
+- kbuf = io_recv_buffer_select(req, !force_nonblock);
+- if (IS_ERR(kbuf))
+- return PTR_ERR(kbuf);
+- kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+- kmsg->fast_iov[0].iov_len = req->sr_msg.len;
+- iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov,
+- 1, req->sr_msg.len);
+- }
+-
+- flags = req->sr_msg.msg_flags;
+- if (force_nonblock)
+- flags |= MSG_DONTWAIT;
+- if (flags & MSG_WAITALL)
+- min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+-
+- ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
+- kmsg->uaddr, flags);
+- if (force_nonblock && ret == -EAGAIN)
+- return io_setup_async_msg(req, kmsg);
+- if (ret == -ERESTARTSYS)
+- ret = -EINTR;
+-
+- if (req->flags & REQ_F_BUFFER_SELECTED)
+- cflags = io_put_recv_kbuf(req);
+- /* fast path, check for non-NULL to avoid function call */
+- if (kmsg->free_iov)
+- kfree(kmsg->free_iov);
+- req->flags &= ~REQ_F_NEED_CLEANUP;
+- if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, cflags);
+- return 0;
+-}
+-
+-static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_buffer *kbuf;
+- struct io_sr_msg *sr = &req->sr_msg;
+- struct msghdr msg;
+- void __user *buf = sr->buf;
+- struct socket *sock;
+- struct iovec iov;
+- unsigned flags;
+- int min_ret = 0;
+- int ret, cflags = 0;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+-
+- sock = sock_from_file(req->file);
+- if (unlikely(!sock))
+- return -ENOTSOCK;
+-
+- if (req->flags & REQ_F_BUFFER_SELECT) {
+- kbuf = io_recv_buffer_select(req, !force_nonblock);
+- if (IS_ERR(kbuf))
+- return PTR_ERR(kbuf);
+- buf = u64_to_user_ptr(kbuf->addr);
+- }
+-
+- ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
+- if (unlikely(ret))
+- goto out_free;
+-
+- msg.msg_name = NULL;
+- msg.msg_control = NULL;
+- msg.msg_controllen = 0;
+- msg.msg_namelen = 0;
+- msg.msg_iocb = NULL;
+- msg.msg_flags = 0;
+-
+- flags = req->sr_msg.msg_flags;
+- if (force_nonblock)
+- flags |= MSG_DONTWAIT;
+- if (flags & MSG_WAITALL)
+- min_ret = iov_iter_count(&msg.msg_iter);
+-
+- ret = sock_recvmsg(sock, &msg, flags);
+- if (force_nonblock && ret == -EAGAIN)
+- return -EAGAIN;
+- if (ret == -ERESTARTSYS)
+- ret = -EINTR;
+-out_free:
+- if (req->flags & REQ_F_BUFFER_SELECTED)
+- cflags = io_put_recv_kbuf(req);
+- if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, cflags);
+- return 0;
+-}
+-
+-static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_accept *accept = &req->accept;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->len || sqe->buf_index)
+- return -EINVAL;
+-
+- accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+- accept->flags = READ_ONCE(sqe->accept_flags);
+- accept->nofile = rlimit(RLIMIT_NOFILE);
+-
+- accept->file_slot = READ_ONCE(sqe->file_index);
+- if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) ||
+- (accept->flags & SOCK_CLOEXEC)))
+- return -EINVAL;
+- if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+- return -EINVAL;
+- if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
+- accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+- return 0;
+-}
+-
+-static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_accept *accept = &req->accept;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+- unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
+- bool fixed = !!accept->file_slot;
+- struct file *file;
+- int ret, fd;
+-
+- if (req->file->f_flags & O_NONBLOCK)
+- req->flags |= REQ_F_NOWAIT;
+-
+- if (!fixed) {
+- fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+- if (unlikely(fd < 0))
+- return fd;
+- }
+- file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
+- accept->flags);
+- if (IS_ERR(file)) {
+- if (!fixed)
+- put_unused_fd(fd);
+- ret = PTR_ERR(file);
+- if (ret == -EAGAIN && force_nonblock)
+- return -EAGAIN;
+- if (ret == -ERESTARTSYS)
+- ret = -EINTR;
+- req_set_fail(req);
+- } else if (!fixed) {
+- fd_install(fd, file);
+- ret = fd;
+- } else {
+- ret = io_install_fixed_file(req, file, issue_flags,
+- accept->file_slot - 1);
+- }
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-
+-static int io_connect_prep_async(struct io_kiocb *req)
+-{
+- struct io_async_connect *io = req->async_data;
+- struct io_connect *conn = &req->connect;
+-
+- return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
+-}
+-
+-static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_connect *conn = &req->connect;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+-
+- conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+- conn->addr_len = READ_ONCE(sqe->addr2);
+- return 0;
+-}
+-
+-static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_async_connect __io, *io;
+- unsigned file_flags;
+- int ret;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+-
+- if (req->async_data) {
+- io = req->async_data;
+- } else {
+- ret = move_addr_to_kernel(req->connect.addr,
+- req->connect.addr_len,
+- &__io.address);
+- if (ret)
+- goto out;
+- io = &__io;
+- }
+-
+- file_flags = force_nonblock ? O_NONBLOCK : 0;
+-
+- ret = __sys_connect_file(req->file, &io->address,
+- req->connect.addr_len, file_flags);
+- if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
+- if (req->async_data)
+- return -EAGAIN;
+- if (io_alloc_async_data(req)) {
+- ret = -ENOMEM;
+- goto out;
+- }
+- memcpy(req->async_data, &__io, sizeof(__io));
+- return -EAGAIN;
+- }
+- if (ret == -ERESTARTSYS)
+- ret = -EINTR;
+-out:
+- if (ret < 0)
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-#else /* !CONFIG_NET */
+-#define IO_NETOP_FN(op) \
+-static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \
+-{ \
+- return -EOPNOTSUPP; \
+-}
+-
+-#define IO_NETOP_PREP(op) \
+-IO_NETOP_FN(op) \
+-static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
+-{ \
+- return -EOPNOTSUPP; \
+-} \
+-
+-#define IO_NETOP_PREP_ASYNC(op) \
+-IO_NETOP_PREP(op) \
+-static int io_##op##_prep_async(struct io_kiocb *req) \
+-{ \
+- return -EOPNOTSUPP; \
+-}
+-
+-IO_NETOP_PREP_ASYNC(sendmsg);
+-IO_NETOP_PREP_ASYNC(recvmsg);
+-IO_NETOP_PREP_ASYNC(connect);
+-IO_NETOP_PREP(accept);
+-IO_NETOP_FN(send);
+-IO_NETOP_FN(recv);
+-#endif /* CONFIG_NET */
+-
+-struct io_poll_table {
+- struct poll_table_struct pt;
+- struct io_kiocb *req;
+- int nr_entries;
+- int error;
+-};
+-
+-static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+- __poll_t mask, io_req_tw_func_t func)
+-{
+- /* for instances that support it check for an event match first: */
+- if (mask && !(mask & poll->events))
+- return 0;
+-
+- trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
+-
+- list_del_init(&poll->wait.entry);
+-
+- req->result = mask;
+- req->io_task_work.func = func;
+-
+- /*
+- * If this fails, then the task is exiting. When a task exits, the
+- * work gets canceled, so just cancel this request as well instead
+- * of executing it. We can't safely execute it anyway, as we may not
+- * have the needed state needed for it anyway.
+- */
+- io_req_task_work_add(req);
+- return 1;
+-}
+-
+-static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
+- __acquires(&req->ctx->completion_lock)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- /* req->task == current here, checking PF_EXITING is safe */
+- if (unlikely(req->task->flags & PF_EXITING))
+- WRITE_ONCE(poll->canceled, true);
+-
+- if (!req->result && !READ_ONCE(poll->canceled)) {
+- struct poll_table_struct pt = { ._key = poll->events };
+-
+- req->result = vfs_poll(req->file, &pt) & poll->events;
+- }
+-
+- spin_lock(&ctx->completion_lock);
+- if (!req->result && !READ_ONCE(poll->canceled)) {
+- add_wait_queue(poll->head, &poll->wait);
+- return true;
+- }
+-
+- return false;
+-}
+-
+-static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
+-{
+- /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
+- if (req->opcode == IORING_OP_POLL_ADD)
+- return req->async_data;
+- return req->apoll->double_poll;
+-}
+-
+-static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
+-{
+- if (req->opcode == IORING_OP_POLL_ADD)
+- return &req->poll;
+- return &req->apoll->poll;
+-}
+-
+-static void io_poll_remove_double(struct io_kiocb *req)
+- __must_hold(&req->ctx->completion_lock)
+-{
+- struct io_poll_iocb *poll = io_poll_get_double(req);
+-
+- lockdep_assert_held(&req->ctx->completion_lock);
+-
+- if (poll && poll->head) {
+- struct wait_queue_head *head = poll->head;
+-
+- spin_lock_irq(&head->lock);
+- list_del_init(&poll->wait.entry);
+- if (poll->wait.private)
+- req_ref_put(req);
+- poll->head = NULL;
+- spin_unlock_irq(&head->lock);
+- }
+-}
+-
+-static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
+- __must_hold(&req->ctx->completion_lock)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- unsigned flags = IORING_CQE_F_MORE;
+- int error;
+-
+- if (READ_ONCE(req->poll.canceled)) {
+- error = -ECANCELED;
+- req->poll.events |= EPOLLONESHOT;
+- } else {
+- error = mangle_poll(mask);
+- }
+- if (req->poll.events & EPOLLONESHOT)
+- flags = 0;
+- if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
+- req->poll.events |= EPOLLONESHOT;
+- flags = 0;
+- }
+- if (flags & IORING_CQE_F_MORE)
+- ctx->cq_extra++;
+-
+- return !(flags & IORING_CQE_F_MORE);
+-}
+-
+-static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+- __must_hold(&req->ctx->completion_lock)
+-{
+- bool done;
+-
+- done = __io_poll_complete(req, mask);
+- io_commit_cqring(req->ctx);
+- return done;
+-}
+-
+-static void io_poll_task_func(struct io_kiocb *req, bool *locked)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_kiocb *nxt;
+-
+- if (io_poll_rewait(req, &req->poll)) {
+- spin_unlock(&ctx->completion_lock);
+- } else {
+- bool done;
+-
+- if (req->poll.done) {
+- spin_unlock(&ctx->completion_lock);
+- return;
+- }
+- done = __io_poll_complete(req, req->result);
+- if (done) {
+- io_poll_remove_double(req);
+- hash_del(&req->hash_node);
+- req->poll.done = true;
+- } else {
+- req->result = 0;
+- add_wait_queue(req->poll.head, &req->poll.wait);
+- }
+- io_commit_cqring(ctx);
+- spin_unlock(&ctx->completion_lock);
+- io_cqring_ev_posted(ctx);
+-
+- if (done) {
+- nxt = io_put_req_find_next(req);
+- if (nxt)
+- io_req_task_submit(nxt, locked);
+- }
+- }
+-}
+-
+-static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+- int sync, void *key)
+-{
+- struct io_kiocb *req = wait->private;
+- struct io_poll_iocb *poll = io_poll_get_single(req);
+- __poll_t mask = key_to_poll(key);
+- unsigned long flags;
+-
+- /* for instances that support it check for an event match first: */
+- if (mask && !(mask & poll->events))
+- return 0;
+- if (!(poll->events & EPOLLONESHOT))
+- return poll->wait.func(&poll->wait, mode, sync, key);
+-
+- list_del_init(&wait->entry);
+-
+- if (poll->head) {
+- bool done;
+-
+- spin_lock_irqsave(&poll->head->lock, flags);
+- done = list_empty(&poll->wait.entry);
+- if (!done)
+- list_del_init(&poll->wait.entry);
+- /* make sure double remove sees this as being gone */
+- wait->private = NULL;
+- spin_unlock_irqrestore(&poll->head->lock, flags);
+- if (!done) {
+- /* use wait func handler, so it matches the rq type */
+- poll->wait.func(&poll->wait, mode, sync, key);
+- }
+- }
+- req_ref_put(req);
+- return 1;
+-}
+-
+-static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
+- wait_queue_func_t wake_func)
+-{
+- poll->head = NULL;
+- poll->done = false;
+- poll->canceled = false;
+-#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
+- /* mask in events that we always want/need */
+- poll->events = events | IO_POLL_UNMASK;
+- INIT_LIST_HEAD(&poll->wait.entry);
+- init_waitqueue_func_entry(&poll->wait, wake_func);
+-}
+-
+-static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
+- struct wait_queue_head *head,
+- struct io_poll_iocb **poll_ptr)
+-{
+- struct io_kiocb *req = pt->req;
+-
+- /*
+- * The file being polled uses multiple waitqueues for poll handling
+- * (e.g. one for read, one for write). Setup a separate io_poll_iocb
+- * if this happens.
+- */
+- if (unlikely(pt->nr_entries)) {
+- struct io_poll_iocb *poll_one = poll;
+-
+- /* double add on the same waitqueue head, ignore */
+- if (poll_one->head == head)
+- return;
+- /* already have a 2nd entry, fail a third attempt */
+- if (*poll_ptr) {
+- if ((*poll_ptr)->head == head)
+- return;
+- pt->error = -EINVAL;
+- return;
+- }
+- /*
+- * Can't handle multishot for double wait for now, turn it
+- * into one-shot mode.
+- */
+- if (!(poll_one->events & EPOLLONESHOT))
+- poll_one->events |= EPOLLONESHOT;
+- poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+- if (!poll) {
+- pt->error = -ENOMEM;
+- return;
+- }
+- io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake);
+- req_ref_get(req);
+- poll->wait.private = req;
+- *poll_ptr = poll;
+- }
+-
+- pt->nr_entries++;
+- poll->head = head;
+-
+- if (poll->events & EPOLLEXCLUSIVE)
+- add_wait_queue_exclusive(head, &poll->wait);
+- else
+- add_wait_queue(head, &poll->wait);
+-}
+-
+-static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
+- struct poll_table_struct *p)
+-{
+- struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+- struct async_poll *apoll = pt->req->apoll;
+-
+- __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
+-}
+-
+-static void io_async_task_func(struct io_kiocb *req, bool *locked)
+-{
+- struct async_poll *apoll = req->apoll;
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
+-
+- if (io_poll_rewait(req, &apoll->poll)) {
+- spin_unlock(&ctx->completion_lock);
+- return;
+- }
+-
+- hash_del(&req->hash_node);
+- io_poll_remove_double(req);
+- apoll->poll.done = true;
+- spin_unlock(&ctx->completion_lock);
+-
+- if (!READ_ONCE(apoll->poll.canceled))
+- io_req_task_submit(req, locked);
+- else
+- io_req_complete_failed(req, -ECANCELED);
+-}
+-
+-static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+- void *key)
+-{
+- struct io_kiocb *req = wait->private;
+- struct io_poll_iocb *poll = &req->apoll->poll;
+-
+- trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
+- key_to_poll(key));
+-
+- return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
+-}
+-
+-static void io_poll_req_insert(struct io_kiocb *req)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct hlist_head *list;
+-
+- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+- hlist_add_head(&req->hash_node, list);
+-}
+-
+-static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
+- struct io_poll_iocb *poll,
+- struct io_poll_table *ipt, __poll_t mask,
+- wait_queue_func_t wake_func)
+- __acquires(&ctx->completion_lock)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- bool cancel = false;
+-
+- INIT_HLIST_NODE(&req->hash_node);
+- io_init_poll_iocb(poll, mask, wake_func);
+- poll->file = req->file;
+- poll->wait.private = req;
+-
+- ipt->pt._key = mask;
+- ipt->req = req;
+- ipt->error = 0;
+- ipt->nr_entries = 0;
+-
+- mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+- if (unlikely(!ipt->nr_entries) && !ipt->error)
+- ipt->error = -EINVAL;
+-
+- spin_lock(&ctx->completion_lock);
+- if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
+- io_poll_remove_double(req);
+- if (likely(poll->head)) {
+- spin_lock_irq(&poll->head->lock);
+- if (unlikely(list_empty(&poll->wait.entry))) {
+- if (ipt->error)
+- cancel = true;
+- ipt->error = 0;
+- mask = 0;
+- }
+- if ((mask && (poll->events & EPOLLONESHOT)) || ipt->error)
+- list_del_init(&poll->wait.entry);
+- else if (cancel)
+- WRITE_ONCE(poll->canceled, true);
+- else if (!poll->done) /* actually waiting for an event */
+- io_poll_req_insert(req);
+- spin_unlock_irq(&poll->head->lock);
+- }
+-
+- return mask;
+-}
+-
+-enum {
+- IO_APOLL_OK,
+- IO_APOLL_ABORTED,
+- IO_APOLL_READY
+-};
+-
+-static int io_arm_poll_handler(struct io_kiocb *req)
+-{
+- const struct io_op_def *def = &io_op_defs[req->opcode];
+- struct io_ring_ctx *ctx = req->ctx;
+- struct async_poll *apoll;
+- struct io_poll_table ipt;
+- __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI;
+- int rw;
+-
+- if (!req->file || !file_can_poll(req->file))
+- return IO_APOLL_ABORTED;
+- if (req->flags & REQ_F_POLLED)
+- return IO_APOLL_ABORTED;
+- if (!def->pollin && !def->pollout)
+- return IO_APOLL_ABORTED;
+-
+- if (def->pollin) {
+- rw = READ;
+- mask |= POLLIN | POLLRDNORM;
+-
+- /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
+- if ((req->opcode == IORING_OP_RECVMSG) &&
+- (req->sr_msg.msg_flags & MSG_ERRQUEUE))
+- mask &= ~POLLIN;
+- } else {
+- rw = WRITE;
+- mask |= POLLOUT | POLLWRNORM;
+- }
+-
+- /* if we can't nonblock try, then no point in arming a poll handler */
+- if (!io_file_supports_nowait(req, rw))
+- return IO_APOLL_ABORTED;
+-
+- apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+- if (unlikely(!apoll))
+- return IO_APOLL_ABORTED;
+- apoll->double_poll = NULL;
+- req->apoll = apoll;
+- req->flags |= REQ_F_POLLED;
+- ipt.pt._qproc = io_async_queue_proc;
+- io_req_set_refcount(req);
+-
+- ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
+- io_async_wake);
+- spin_unlock(&ctx->completion_lock);
+- if (ret || ipt.error)
+- return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
+-
+- trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
+- mask, apoll->poll.events);
+- return IO_APOLL_OK;
+-}
+-
+-static bool __io_poll_remove_one(struct io_kiocb *req,
+- struct io_poll_iocb *poll, bool do_cancel)
+- __must_hold(&req->ctx->completion_lock)
+-{
+- bool do_complete = false;
+-
+- if (!poll->head)
+- return false;
+- spin_lock_irq(&poll->head->lock);
+- if (do_cancel)
+- WRITE_ONCE(poll->canceled, true);
+- if (!list_empty(&poll->wait.entry)) {
+- list_del_init(&poll->wait.entry);
+- do_complete = true;
+- }
+- spin_unlock_irq(&poll->head->lock);
+- hash_del(&req->hash_node);
+- return do_complete;
+-}
+-
+-static bool io_poll_remove_one(struct io_kiocb *req)
+- __must_hold(&req->ctx->completion_lock)
+-{
+- bool do_complete;
+-
+- io_poll_remove_double(req);
+- do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
+-
+- if (do_complete) {
+- io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0);
+- io_commit_cqring(req->ctx);
+- req_set_fail(req);
+- io_put_req_deferred(req);
+- }
+- return do_complete;
+-}
+-
+-/*
+- * Returns true if we found and killed one or more poll requests
+- */
+-static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
+- bool cancel_all)
+-{
+- struct hlist_node *tmp;
+- struct io_kiocb *req;
+- int posted = 0, i;
+-
+- spin_lock(&ctx->completion_lock);
+- for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+- struct hlist_head *list;
+-
+- list = &ctx->cancel_hash[i];
+- hlist_for_each_entry_safe(req, tmp, list, hash_node) {
+- if (io_match_task(req, tsk, cancel_all))
+- posted += io_poll_remove_one(req);
+- }
+- }
+- spin_unlock(&ctx->completion_lock);
+-
+- if (posted)
+- io_cqring_ev_posted(ctx);
+-
+- return posted != 0;
+-}
+-
+-static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
+- bool poll_only)
+- __must_hold(&ctx->completion_lock)
+-{
+- struct hlist_head *list;
+- struct io_kiocb *req;
+-
+- list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
+- hlist_for_each_entry(req, list, hash_node) {
+- if (sqe_addr != req->user_data)
+- continue;
+- if (poll_only && req->opcode != IORING_OP_POLL_ADD)
+- continue;
+- return req;
+- }
+- return NULL;
+-}
+-
+-static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr,
+- bool poll_only)
+- __must_hold(&ctx->completion_lock)
+-{
+- struct io_kiocb *req;
+-
+- req = io_poll_find(ctx, sqe_addr, poll_only);
+- if (!req)
+- return -ENOENT;
+- if (io_poll_remove_one(req))
+- return 0;
+-
+- return -EALREADY;
+-}
+-
+-static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
+- unsigned int flags)
+-{
+- u32 events;
+-
+- events = READ_ONCE(sqe->poll32_events);
+-#ifdef __BIG_ENDIAN
+- events = swahw32(events);
+-#endif
+- if (!(flags & IORING_POLL_ADD_MULTI))
+- events |= EPOLLONESHOT;
+- return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
+-}
+-
+-static int io_poll_update_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_poll_update *upd = &req->poll_update;
+- u32 flags;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+- return -EINVAL;
+- flags = READ_ONCE(sqe->len);
+- if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
+- IORING_POLL_ADD_MULTI))
+- return -EINVAL;
+- /* meaningless without update */
+- if (flags == IORING_POLL_ADD_MULTI)
+- return -EINVAL;
+-
+- upd->old_user_data = READ_ONCE(sqe->addr);
+- upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+- upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
+-
+- upd->new_user_data = READ_ONCE(sqe->off);
+- if (!upd->update_user_data && upd->new_user_data)
+- return -EINVAL;
+- if (upd->update_events)
+- upd->events = io_poll_parse_events(sqe, flags);
+- else if (sqe->poll32_events)
+- return -EINVAL;
+-
+- return 0;
+-}
+-
+-static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+- void *key)
+-{
+- struct io_kiocb *req = wait->private;
+- struct io_poll_iocb *poll = &req->poll;
+-
+- return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
+-}
+-
+-static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+- struct poll_table_struct *p)
+-{
+- struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+-
+- __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data);
+-}
+-
+-static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- struct io_poll_iocb *poll = &req->poll;
+- u32 flags;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
+- return -EINVAL;
+- flags = READ_ONCE(sqe->len);
+- if (flags & ~IORING_POLL_ADD_MULTI)
+- return -EINVAL;
+-
+- io_req_set_refcount(req);
+- poll->events = io_poll_parse_events(sqe, flags);
+- return 0;
+-}
+-
+-static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_poll_iocb *poll = &req->poll;
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_poll_table ipt;
+- __poll_t mask;
+- bool done;
+-
+- ipt.pt._qproc = io_poll_queue_proc;
+-
+- mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
+- io_poll_wake);
+-
+- if (mask) { /* no async, we'd stolen it */
+- ipt.error = 0;
+- done = io_poll_complete(req, mask);
+- }
+- spin_unlock(&ctx->completion_lock);
+-
+- if (mask) {
+- io_cqring_ev_posted(ctx);
+- if (done)
+- io_put_req(req);
+- }
+- return ipt.error;
+-}
+-
+-static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_kiocb *preq;
+- bool completing;
+- int ret;
+-
+- spin_lock(&ctx->completion_lock);
+- preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
+- if (!preq) {
+- ret = -ENOENT;
+- goto err;
+- }
+-
+- if (!req->poll_update.update_events && !req->poll_update.update_user_data) {
+- completing = true;
+- ret = io_poll_remove_one(preq) ? 0 : -EALREADY;
+- goto err;
+- }
+-
+- /*
+- * Don't allow racy completion with singleshot, as we cannot safely
+- * update those. For multishot, if we're racing with completion, just
+- * let completion re-add it.
+- */
+- completing = !__io_poll_remove_one(preq, &preq->poll, false);
+- if (completing && (preq->poll.events & EPOLLONESHOT)) {
+- ret = -EALREADY;
+- goto err;
+- }
+- /* we now have a detached poll request. reissue. */
+- ret = 0;
+-err:
+- if (ret < 0) {
+- spin_unlock(&ctx->completion_lock);
+- req_set_fail(req);
+- io_req_complete(req, ret);
+- return 0;
+- }
+- /* only mask one event flags, keep behavior flags */
+- if (req->poll_update.update_events) {
+- preq->poll.events &= ~0xffff;
+- preq->poll.events |= req->poll_update.events & 0xffff;
+- preq->poll.events |= IO_POLL_UNMASK;
+- }
+- if (req->poll_update.update_user_data)
+- preq->user_data = req->poll_update.new_user_data;
+- spin_unlock(&ctx->completion_lock);
+-
+- /* complete update request, we're done with it */
+- io_req_complete(req, ret);
+-
+- if (!completing) {
+- ret = io_poll_add(preq, issue_flags);
+- if (ret < 0) {
+- req_set_fail(preq);
+- io_req_complete(preq, ret);
+- }
+- }
+- return 0;
+-}
+-
+-static void io_req_task_timeout(struct io_kiocb *req, bool *locked)
+-{
+- req_set_fail(req);
+- io_req_complete_post(req, -ETIME, 0);
+-}
+-
+-static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+-{
+- struct io_timeout_data *data = container_of(timer,
+- struct io_timeout_data, timer);
+- struct io_kiocb *req = data->req;
+- struct io_ring_ctx *ctx = req->ctx;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&ctx->timeout_lock, flags);
+- list_del_init(&req->timeout.list);
+- atomic_set(&req->ctx->cq_timeouts,
+- atomic_read(&req->ctx->cq_timeouts) + 1);
+- spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+-
+- req->io_task_work.func = io_req_task_timeout;
+- io_req_task_work_add(req);
+- return HRTIMER_NORESTART;
+-}
+-
+-static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
+- __u64 user_data)
+- __must_hold(&ctx->timeout_lock)
+-{
+- struct io_timeout_data *io;
+- struct io_kiocb *req;
+- bool found = false;
+-
+- list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
+- found = user_data == req->user_data;
+- if (found)
+- break;
+- }
+- if (!found)
+- return ERR_PTR(-ENOENT);
+-
+- io = req->async_data;
+- if (hrtimer_try_to_cancel(&io->timer) == -1)
+- return ERR_PTR(-EALREADY);
+- list_del_init(&req->timeout.list);
+- return req;
+-}
+-
+-static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
+- __must_hold(&ctx->completion_lock)
+- __must_hold(&ctx->timeout_lock)
+-{
+- struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+-
+- if (IS_ERR(req))
+- return PTR_ERR(req);
+-
+- req_set_fail(req);
+- io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0);
+- io_put_req_deferred(req);
+- return 0;
+-}
+-
+-static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
+-{
+- switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
+- case IORING_TIMEOUT_BOOTTIME:
+- return CLOCK_BOOTTIME;
+- case IORING_TIMEOUT_REALTIME:
+- return CLOCK_REALTIME;
+- default:
+- /* can't happen, vetted at prep time */
+- WARN_ON_ONCE(1);
+- fallthrough;
+- case 0:
+- return CLOCK_MONOTONIC;
+- }
+-}
+-
+-static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+- struct timespec64 *ts, enum hrtimer_mode mode)
+- __must_hold(&ctx->timeout_lock)
+-{
+- struct io_timeout_data *io;
+- struct io_kiocb *req;
+- bool found = false;
+-
+- list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
+- found = user_data == req->user_data;
+- if (found)
+- break;
+- }
+- if (!found)
+- return -ENOENT;
+-
+- io = req->async_data;
+- if (hrtimer_try_to_cancel(&io->timer) == -1)
+- return -EALREADY;
+- hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
+- io->timer.function = io_link_timeout_fn;
+- hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
+- return 0;
+-}
+-
+-static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+- struct timespec64 *ts, enum hrtimer_mode mode)
+- __must_hold(&ctx->timeout_lock)
+-{
+- struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+- struct io_timeout_data *data;
+-
+- if (IS_ERR(req))
+- return PTR_ERR(req);
+-
+- req->timeout.off = 0; /* noseq */
+- data = req->async_data;
+- list_add_tail(&req->timeout.list, &ctx->timeout_list);
+- hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
+- data->timer.function = io_timeout_fn;
+- hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
+- return 0;
+-}
+-
+-static int io_timeout_remove_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- struct io_timeout_rem *tr = &req->timeout_rem;
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+- return -EINVAL;
+-
+- tr->ltimeout = false;
+- tr->addr = READ_ONCE(sqe->addr);
+- tr->flags = READ_ONCE(sqe->timeout_flags);
+- if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
+- if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
+- return -EINVAL;
+- if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
+- tr->ltimeout = true;
+- if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
+- return -EINVAL;
+- if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
+- return -EFAULT;
+- } else if (tr->flags) {
+- /* timeout removal doesn't support flags */
+- return -EINVAL;
+- }
+-
+- return 0;
+-}
+-
+-static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
+-{
+- return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
+- : HRTIMER_MODE_REL;
+-}
+-
+-/*
+- * Remove or update an existing timeout command
+- */
+-static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_timeout_rem *tr = &req->timeout_rem;
+- struct io_ring_ctx *ctx = req->ctx;
+- int ret;
+-
+- if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) {
+- spin_lock(&ctx->completion_lock);
+- spin_lock_irq(&ctx->timeout_lock);
+- ret = io_timeout_cancel(ctx, tr->addr);
+- spin_unlock_irq(&ctx->timeout_lock);
+- spin_unlock(&ctx->completion_lock);
+- } else {
+- enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
+-
+- spin_lock_irq(&ctx->timeout_lock);
+- if (tr->ltimeout)
+- ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
+- else
+- ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
+- spin_unlock_irq(&ctx->timeout_lock);
+- }
+-
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete_post(req, ret, 0);
+- return 0;
+-}
+-
+-static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+- bool is_timeout_link)
+-{
+- struct io_timeout_data *data;
+- unsigned flags;
+- u32 off = READ_ONCE(sqe->off);
+-
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+- if (off && is_timeout_link)
+- return -EINVAL;
+- flags = READ_ONCE(sqe->timeout_flags);
+- if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK))
+- return -EINVAL;
+- /* more than one clock specified is invalid, obviously */
+- if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
+- return -EINVAL;
+-
+- INIT_LIST_HEAD(&req->timeout.list);
+- req->timeout.off = off;
+- if (unlikely(off && !req->ctx->off_timeout_used))
+- req->ctx->off_timeout_used = true;
+-
+- if (!req->async_data && io_alloc_async_data(req))
+- return -ENOMEM;
+-
+- data = req->async_data;
+- data->req = req;
+- data->flags = flags;
+-
+- if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
+- return -EFAULT;
+-
+- data->mode = io_translate_timeout_mode(flags);
+- hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
+-
+- if (is_timeout_link) {
+- struct io_submit_link *link = &req->ctx->submit_state.link;
+-
+- if (!link->head)
+- return -EINVAL;
+- if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
+- return -EINVAL;
+- req->timeout.head = link->last;
+- link->last->flags |= REQ_F_ARM_LTIMEOUT;
+- }
+- return 0;
+-}
+-
+-static int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_timeout_data *data = req->async_data;
+- struct list_head *entry;
+- u32 tail, off = req->timeout.off;
+-
+- spin_lock_irq(&ctx->timeout_lock);
+-
+- /*
+- * sqe->off holds how many events that need to occur for this
+- * timeout event to be satisfied. If it isn't set, then this is
+- * a pure timeout request, sequence isn't used.
+- */
+- if (io_is_timeout_noseq(req)) {
+- entry = ctx->timeout_list.prev;
+- goto add;
+- }
+-
+- tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+- req->timeout.target_seq = tail + off;
+-
+- /* Update the last seq here in case io_flush_timeouts() hasn't.
+- * This is safe because ->completion_lock is held, and submissions
+- * and completions are never mixed in the same ->completion_lock section.
+- */
+- ctx->cq_last_tm_flush = tail;
+-
+- /*
+- * Insertion sort, ensuring the first entry in the list is always
+- * the one we need first.
+- */
+- list_for_each_prev(entry, &ctx->timeout_list) {
+- struct io_kiocb *nxt = list_entry(entry, struct io_kiocb,
+- timeout.list);
+-
+- if (io_is_timeout_noseq(nxt))
+- continue;
+- /* nxt.seq is behind @tail, otherwise would've been completed */
+- if (off >= nxt->timeout.target_seq - tail)
+- break;
+- }
+-add:
+- list_add(&req->timeout.list, entry);
+- data->timer.function = io_timeout_fn;
+- hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
+- spin_unlock_irq(&ctx->timeout_lock);
+- return 0;
+-}
+-
+-struct io_cancel_data {
+- struct io_ring_ctx *ctx;
+- u64 user_data;
+-};
+-
+-static bool io_cancel_cb(struct io_wq_work *work, void *data)
+-{
+- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+- struct io_cancel_data *cd = data;
+-
+- return req->ctx == cd->ctx && req->user_data == cd->user_data;
+-}
+-
+-static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
+- struct io_ring_ctx *ctx)
+-{
+- struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, };
+- enum io_wq_cancel cancel_ret;
+- int ret = 0;
+-
+- if (!tctx || !tctx->io_wq)
+- return -ENOENT;
+-
+- cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false);
+- switch (cancel_ret) {
+- case IO_WQ_CANCEL_OK:
+- ret = 0;
+- break;
+- case IO_WQ_CANCEL_RUNNING:
+- ret = -EALREADY;
+- break;
+- case IO_WQ_CANCEL_NOTFOUND:
+- ret = -ENOENT;
+- break;
+- }
+-
+- return ret;
+-}
+-
+-static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- int ret;
+-
+- WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
+-
+- ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
+- if (ret != -ENOENT)
+- return ret;
+-
+- spin_lock(&ctx->completion_lock);
+- spin_lock_irq(&ctx->timeout_lock);
+- ret = io_timeout_cancel(ctx, sqe_addr);
+- spin_unlock_irq(&ctx->timeout_lock);
+- if (ret != -ENOENT)
+- goto out;
+- ret = io_poll_cancel(ctx, sqe_addr, false);
+-out:
+- spin_unlock(&ctx->completion_lock);
+- return ret;
+-}
+-
+-static int io_async_cancel_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+- return -EINVAL;
+- if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
+- sqe->splice_fd_in)
+- return -EINVAL;
+-
+- req->cancel.addr = READ_ONCE(sqe->addr);
+- return 0;
+-}
+-
+-static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- u64 sqe_addr = req->cancel.addr;
+- struct io_tctx_node *node;
+- int ret;
+-
+- ret = io_try_cancel_userdata(req, sqe_addr);
+- if (ret != -ENOENT)
+- goto done;
+-
+- /* slow path, try all io-wq's */
+- io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+- ret = -ENOENT;
+- list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+- struct io_uring_task *tctx = node->task->io_uring;
+-
+- ret = io_async_cancel_one(tctx, req->cancel.addr, ctx);
+- if (ret != -ENOENT)
+- break;
+- }
+- io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+-done:
+- if (ret < 0)
+- req_set_fail(req);
+- io_req_complete_post(req, ret, 0);
+- return 0;
+-}
+-
+-static int io_rsrc_update_prep(struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+-{
+- if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+- return -EINVAL;
+- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+- return -EINVAL;
+-
+- req->rsrc_update.offset = READ_ONCE(sqe->off);
+- req->rsrc_update.nr_args = READ_ONCE(sqe->len);
+- if (!req->rsrc_update.nr_args)
+- return -EINVAL;
+- req->rsrc_update.arg = READ_ONCE(sqe->addr);
+- return 0;
+-}
+-
+-static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_uring_rsrc_update2 up;
+- int ret;
+-
+- up.offset = req->rsrc_update.offset;
+- up.data = req->rsrc_update.arg;
+- up.nr = 0;
+- up.tags = 0;
+- up.resv = 0;
+-
+- io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+- ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
+- &up, req->rsrc_update.nr_args);
+- io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+-
+- if (ret < 0)
+- req_set_fail(req);
+- __io_req_complete(req, issue_flags, ret, 0);
+- return 0;
+-}
+-
+-static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+-{
+- switch (req->opcode) {
+- case IORING_OP_NOP:
+- return 0;
+- case IORING_OP_READV:
+- case IORING_OP_READ_FIXED:
+- case IORING_OP_READ:
+- return io_read_prep(req, sqe);
+- case IORING_OP_WRITEV:
+- case IORING_OP_WRITE_FIXED:
+- case IORING_OP_WRITE:
+- return io_write_prep(req, sqe);
+- case IORING_OP_POLL_ADD:
+- return io_poll_add_prep(req, sqe);
+- case IORING_OP_POLL_REMOVE:
+- return io_poll_update_prep(req, sqe);
+- case IORING_OP_FSYNC:
+- return io_fsync_prep(req, sqe);
+- case IORING_OP_SYNC_FILE_RANGE:
+- return io_sfr_prep(req, sqe);
+- case IORING_OP_SENDMSG:
+- case IORING_OP_SEND:
+- return io_sendmsg_prep(req, sqe);
+- case IORING_OP_RECVMSG:
+- case IORING_OP_RECV:
+- return io_recvmsg_prep(req, sqe);
+- case IORING_OP_CONNECT:
+- return io_connect_prep(req, sqe);
+- case IORING_OP_TIMEOUT:
+- return io_timeout_prep(req, sqe, false);
+- case IORING_OP_TIMEOUT_REMOVE:
+- return io_timeout_remove_prep(req, sqe);
+- case IORING_OP_ASYNC_CANCEL:
+- return io_async_cancel_prep(req, sqe);
+- case IORING_OP_LINK_TIMEOUT:
+- return io_timeout_prep(req, sqe, true);
+- case IORING_OP_ACCEPT:
+- return io_accept_prep(req, sqe);
+- case IORING_OP_FALLOCATE:
+- return io_fallocate_prep(req, sqe);
+- case IORING_OP_OPENAT:
+- return io_openat_prep(req, sqe);
+- case IORING_OP_CLOSE:
+- return io_close_prep(req, sqe);
+- case IORING_OP_FILES_UPDATE:
+- return io_rsrc_update_prep(req, sqe);
+- case IORING_OP_STATX:
+- return io_statx_prep(req, sqe);
+- case IORING_OP_FADVISE:
+- return io_fadvise_prep(req, sqe);
+- case IORING_OP_MADVISE:
+- return io_madvise_prep(req, sqe);
+- case IORING_OP_OPENAT2:
+- return io_openat2_prep(req, sqe);
+- case IORING_OP_EPOLL_CTL:
+- return io_epoll_ctl_prep(req, sqe);
+- case IORING_OP_SPLICE:
+- return io_splice_prep(req, sqe);
+- case IORING_OP_PROVIDE_BUFFERS:
+- return io_provide_buffers_prep(req, sqe);
+- case IORING_OP_REMOVE_BUFFERS:
+- return io_remove_buffers_prep(req, sqe);
+- case IORING_OP_TEE:
+- return io_tee_prep(req, sqe);
+- case IORING_OP_SHUTDOWN:
+- return io_shutdown_prep(req, sqe);
+- case IORING_OP_RENAMEAT:
+- return io_renameat_prep(req, sqe);
+- case IORING_OP_UNLINKAT:
+- return io_unlinkat_prep(req, sqe);
+- case IORING_OP_MKDIRAT:
+- return io_mkdirat_prep(req, sqe);
+- case IORING_OP_SYMLINKAT:
+- return io_symlinkat_prep(req, sqe);
+- case IORING_OP_LINKAT:
+- return io_linkat_prep(req, sqe);
+- }
+-
+- printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
+- req->opcode);
+- return -EINVAL;
+-}
+-
+-static int io_req_prep_async(struct io_kiocb *req)
+-{
+- if (!io_op_defs[req->opcode].needs_async_setup)
+- return 0;
+- if (WARN_ON_ONCE(req->async_data))
+- return -EFAULT;
+- if (io_alloc_async_data(req))
+- return -EAGAIN;
+-
+- switch (req->opcode) {
+- case IORING_OP_READV:
+- return io_rw_prep_async(req, READ);
+- case IORING_OP_WRITEV:
+- return io_rw_prep_async(req, WRITE);
+- case IORING_OP_SENDMSG:
+- return io_sendmsg_prep_async(req);
+- case IORING_OP_RECVMSG:
+- return io_recvmsg_prep_async(req);
+- case IORING_OP_CONNECT:
+- return io_connect_prep_async(req);
+- }
+- printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
+- req->opcode);
+- return -EFAULT;
+-}
+-
+-static u32 io_get_sequence(struct io_kiocb *req)
+-{
+- u32 seq = req->ctx->cached_sq_head;
+-
+- /* need original cached_sq_head, but it was increased for each req */
+- io_for_each_link(req, req)
+- seq--;
+- return seq;
+-}
+-
+-static bool io_drain_req(struct io_kiocb *req)
+-{
+- struct io_kiocb *pos;
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_defer_entry *de;
+- int ret;
+- u32 seq;
+-
+- if (req->flags & REQ_F_FAIL) {
+- io_req_complete_fail_submit(req);
+- return true;
+- }
+-
+- /*
+- * If we need to drain a request in the middle of a link, drain the
+- * head request and the next request/link after the current link.
+- * Considering sequential execution of links, IOSQE_IO_DRAIN will be
+- * maintained for every request of our link.
+- */
+- if (ctx->drain_next) {
+- req->flags |= REQ_F_IO_DRAIN;
+- ctx->drain_next = false;
+- }
+- /* not interested in head, start from the first linked */
+- io_for_each_link(pos, req->link) {
+- if (pos->flags & REQ_F_IO_DRAIN) {
+- ctx->drain_next = true;
+- req->flags |= REQ_F_IO_DRAIN;
+- break;
+- }
+- }
+-
+- /* Still need defer if there is pending req in defer list. */
+- if (likely(list_empty_careful(&ctx->defer_list) &&
+- !(req->flags & REQ_F_IO_DRAIN))) {
+- ctx->drain_active = false;
+- return false;
+- }
+-
+- seq = io_get_sequence(req);
+- /* Still a chance to pass the sequence check */
+- if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
+- return false;
+-
+- ret = io_req_prep_async(req);
+- if (ret)
+- goto fail;
+- io_prep_async_link(req);
+- de = kmalloc(sizeof(*de), GFP_KERNEL);
+- if (!de) {
+- ret = -ENOMEM;
+-fail:
+- io_req_complete_failed(req, ret);
+- return true;
+- }
+-
+- spin_lock(&ctx->completion_lock);
+- if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
+- spin_unlock(&ctx->completion_lock);
+- kfree(de);
+- io_queue_async_work(req, NULL);
+- return true;
+- }
+-
+- trace_io_uring_defer(ctx, req, req->user_data);
+- de->req = req;
+- de->seq = seq;
+- list_add_tail(&de->list, &ctx->defer_list);
+- spin_unlock(&ctx->completion_lock);
+- return true;
+-}
+-
+-static void io_clean_op(struct io_kiocb *req)
+-{
+- if (req->flags & REQ_F_BUFFER_SELECTED) {
+- switch (req->opcode) {
+- case IORING_OP_READV:
+- case IORING_OP_READ_FIXED:
+- case IORING_OP_READ:
+- kfree((void *)(unsigned long)req->rw.addr);
+- break;
+- case IORING_OP_RECVMSG:
+- case IORING_OP_RECV:
+- kfree(req->sr_msg.kbuf);
+- break;
+- }
+- }
+-
+- if (req->flags & REQ_F_NEED_CLEANUP) {
+- switch (req->opcode) {
+- case IORING_OP_READV:
+- case IORING_OP_READ_FIXED:
+- case IORING_OP_READ:
+- case IORING_OP_WRITEV:
+- case IORING_OP_WRITE_FIXED:
+- case IORING_OP_WRITE: {
+- struct io_async_rw *io = req->async_data;
+-
+- kfree(io->free_iovec);
+- break;
+- }
+- case IORING_OP_RECVMSG:
+- case IORING_OP_SENDMSG: {
+- struct io_async_msghdr *io = req->async_data;
+-
+- kfree(io->free_iov);
+- break;
+- }
+- case IORING_OP_SPLICE:
+- case IORING_OP_TEE:
+- if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED))
+- io_put_file(req->splice.file_in);
+- break;
+- case IORING_OP_OPENAT:
+- case IORING_OP_OPENAT2:
+- if (req->open.filename)
+- putname(req->open.filename);
+- break;
+- case IORING_OP_RENAMEAT:
+- putname(req->rename.oldpath);
+- putname(req->rename.newpath);
+- break;
+- case IORING_OP_UNLINKAT:
+- putname(req->unlink.filename);
+- break;
+- case IORING_OP_MKDIRAT:
+- putname(req->mkdir.filename);
+- break;
+- case IORING_OP_SYMLINKAT:
+- putname(req->symlink.oldpath);
+- putname(req->symlink.newpath);
+- break;
+- case IORING_OP_LINKAT:
+- putname(req->hardlink.oldpath);
+- putname(req->hardlink.newpath);
+- break;
+- }
+- }
+- if ((req->flags & REQ_F_POLLED) && req->apoll) {
+- kfree(req->apoll->double_poll);
+- kfree(req->apoll);
+- req->apoll = NULL;
+- }
+- if (req->flags & REQ_F_INFLIGHT) {
+- struct io_uring_task *tctx = req->task->io_uring;
+-
+- atomic_dec(&tctx->inflight_tracked);
+- }
+- if (req->flags & REQ_F_CREDS)
+- put_cred(req->creds);
+-
+- req->flags &= ~IO_REQ_CLEAN_FLAGS;
+-}
+-
+-static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- const struct cred *creds = NULL;
+- int ret;
+-
+- if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
+- creds = override_creds(req->creds);
+-
+- switch (req->opcode) {
+- case IORING_OP_NOP:
+- ret = io_nop(req, issue_flags);
+- break;
+- case IORING_OP_READV:
+- case IORING_OP_READ_FIXED:
+- case IORING_OP_READ:
+- ret = io_read(req, issue_flags);
+- break;
+- case IORING_OP_WRITEV:
+- case IORING_OP_WRITE_FIXED:
+- case IORING_OP_WRITE:
+- ret = io_write(req, issue_flags);
+- break;
+- case IORING_OP_FSYNC:
+- ret = io_fsync(req, issue_flags);
+- break;
+- case IORING_OP_POLL_ADD:
+- ret = io_poll_add(req, issue_flags);
+- break;
+- case IORING_OP_POLL_REMOVE:
+- ret = io_poll_update(req, issue_flags);
+- break;
+- case IORING_OP_SYNC_FILE_RANGE:
+- ret = io_sync_file_range(req, issue_flags);
+- break;
+- case IORING_OP_SENDMSG:
+- ret = io_sendmsg(req, issue_flags);
+- break;
+- case IORING_OP_SEND:
+- ret = io_send(req, issue_flags);
+- break;
+- case IORING_OP_RECVMSG:
+- ret = io_recvmsg(req, issue_flags);
+- break;
+- case IORING_OP_RECV:
+- ret = io_recv(req, issue_flags);
+- break;
+- case IORING_OP_TIMEOUT:
+- ret = io_timeout(req, issue_flags);
+- break;
+- case IORING_OP_TIMEOUT_REMOVE:
+- ret = io_timeout_remove(req, issue_flags);
+- break;
+- case IORING_OP_ACCEPT:
+- ret = io_accept(req, issue_flags);
+- break;
+- case IORING_OP_CONNECT:
+- ret = io_connect(req, issue_flags);
+- break;
+- case IORING_OP_ASYNC_CANCEL:
+- ret = io_async_cancel(req, issue_flags);
+- break;
+- case IORING_OP_FALLOCATE:
+- ret = io_fallocate(req, issue_flags);
+- break;
+- case IORING_OP_OPENAT:
+- ret = io_openat(req, issue_flags);
+- break;
+- case IORING_OP_CLOSE:
+- ret = io_close(req, issue_flags);
+- break;
+- case IORING_OP_FILES_UPDATE:
+- ret = io_files_update(req, issue_flags);
+- break;
+- case IORING_OP_STATX:
+- ret = io_statx(req, issue_flags);
+- break;
+- case IORING_OP_FADVISE:
+- ret = io_fadvise(req, issue_flags);
+- break;
+- case IORING_OP_MADVISE:
+- ret = io_madvise(req, issue_flags);
+- break;
+- case IORING_OP_OPENAT2:
+- ret = io_openat2(req, issue_flags);
+- break;
+- case IORING_OP_EPOLL_CTL:
+- ret = io_epoll_ctl(req, issue_flags);
+- break;
+- case IORING_OP_SPLICE:
+- ret = io_splice(req, issue_flags);
+- break;
+- case IORING_OP_PROVIDE_BUFFERS:
+- ret = io_provide_buffers(req, issue_flags);
+- break;
+- case IORING_OP_REMOVE_BUFFERS:
+- ret = io_remove_buffers(req, issue_flags);
+- break;
+- case IORING_OP_TEE:
+- ret = io_tee(req, issue_flags);
+- break;
+- case IORING_OP_SHUTDOWN:
+- ret = io_shutdown(req, issue_flags);
+- break;
+- case IORING_OP_RENAMEAT:
+- ret = io_renameat(req, issue_flags);
+- break;
+- case IORING_OP_UNLINKAT:
+- ret = io_unlinkat(req, issue_flags);
+- break;
+- case IORING_OP_MKDIRAT:
+- ret = io_mkdirat(req, issue_flags);
+- break;
+- case IORING_OP_SYMLINKAT:
+- ret = io_symlinkat(req, issue_flags);
+- break;
+- case IORING_OP_LINKAT:
+- ret = io_linkat(req, issue_flags);
+- break;
+- default:
+- ret = -EINVAL;
+- break;
+- }
+-
+- if (creds)
+- revert_creds(creds);
+- if (ret)
+- return ret;
+- /* If the op doesn't have a file, we're not polling for it */
+- if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file)
+- io_iopoll_req_issued(req);
+-
+- return 0;
+-}
+-
+-static struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
+-{
+- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+-
+- req = io_put_req_find_next(req);
+- return req ? &req->work : NULL;
+-}
+-
+-static void io_wq_submit_work(struct io_wq_work *work)
+-{
+- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+- struct io_kiocb *timeout;
+- int ret = 0;
+-
+- /* one will be dropped by ->io_free_work() after returning to io-wq */
+- if (!(req->flags & REQ_F_REFCOUNT))
+- __io_req_set_refcount(req, 2);
+- else
+- req_ref_get(req);
+-
+- timeout = io_prep_linked_timeout(req);
+- if (timeout)
+- io_queue_linked_timeout(timeout);
+-
+- /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
+- if (work->flags & IO_WQ_WORK_CANCEL)
+- ret = -ECANCELED;
+-
+- if (!ret) {
+- do {
+- ret = io_issue_sqe(req, 0);
+- /*
+- * We can get EAGAIN for polled IO even though we're
+- * forcing a sync submission from here, since we can't
+- * wait for request slots on the block side.
+- */
+- if (ret != -EAGAIN)
+- break;
+- cond_resched();
+- } while (1);
+- }
+-
+- /* avoid locking problems by failing it from a clean context */
+- if (ret)
+- io_req_task_queue_fail(req, ret);
+-}
+-
+-static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table,
+- unsigned i)
+-{
+- return &table->files[i];
+-}
+-
+-static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
+- int index)
+-{
+- struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index);
+-
+- return (struct file *) (slot->file_ptr & FFS_MASK);
+-}
+-
+-static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file)
+-{
+- unsigned long file_ptr = (unsigned long) file;
+-
+- if (__io_file_supports_nowait(file, READ))
+- file_ptr |= FFS_ASYNC_READ;
+- if (__io_file_supports_nowait(file, WRITE))
+- file_ptr |= FFS_ASYNC_WRITE;
+- if (S_ISREG(file_inode(file)->i_mode))
+- file_ptr |= FFS_ISREG;
+- file_slot->file_ptr = file_ptr;
+-}
+-
+-static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
+- struct io_kiocb *req, int fd)
+-{
+- struct file *file;
+- unsigned long file_ptr;
+-
+- if (unlikely((unsigned int)fd >= ctx->nr_user_files))
+- return NULL;
+- fd = array_index_nospec(fd, ctx->nr_user_files);
+- file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
+- file = (struct file *) (file_ptr & FFS_MASK);
+- file_ptr &= ~FFS_MASK;
+- /* mask in overlapping REQ_F and FFS bits */
+- req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT);
+- io_req_set_rsrc_node(req);
+- return file;
+-}
+-
+-static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
+- struct io_kiocb *req, int fd)
+-{
+- struct file *file = fget(fd);
+-
+- trace_io_uring_file_get(ctx, fd);
+-
+- /* we don't allow fixed io_uring files */
+- if (file && unlikely(file->f_op == &io_uring_fops))
+- io_req_track_inflight(req);
+- return file;
+-}
+-
+-static inline struct file *io_file_get(struct io_ring_ctx *ctx,
+- struct io_kiocb *req, int fd, bool fixed)
+-{
+- if (fixed)
+- return io_file_get_fixed(ctx, req, fd);
+- else
+- return io_file_get_normal(ctx, req, fd);
+-}
+-
+-static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
+-{
+- struct io_kiocb *prev = req->timeout.prev;
+- int ret;
+-
+- if (prev) {
+- ret = io_try_cancel_userdata(req, prev->user_data);
+- io_req_complete_post(req, ret ?: -ETIME, 0);
+- io_put_req(prev);
+- } else {
+- io_req_complete_post(req, -ETIME, 0);
+- }
+-}
+-
+-static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
+-{
+- struct io_timeout_data *data = container_of(timer,
+- struct io_timeout_data, timer);
+- struct io_kiocb *prev, *req = data->req;
+- struct io_ring_ctx *ctx = req->ctx;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&ctx->timeout_lock, flags);
+- prev = req->timeout.head;
+- req->timeout.head = NULL;
+-
+- /*
+- * We don't expect the list to be empty, that will only happen if we
+- * race with the completion of the linked work.
+- */
+- if (prev) {
+- io_remove_next_linked(prev);
+- if (!req_ref_inc_not_zero(prev))
+- prev = NULL;
+- }
+- list_del(&req->timeout.list);
+- req->timeout.prev = prev;
+- spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+-
+- req->io_task_work.func = io_req_task_link_timeout;
+- io_req_task_work_add(req);
+- return HRTIMER_NORESTART;
+-}
+-
+-static void io_queue_linked_timeout(struct io_kiocb *req)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- spin_lock_irq(&ctx->timeout_lock);
+- /*
+- * If the back reference is NULL, then our linked request finished
+- * before we got a chance to setup the timer
+- */
+- if (req->timeout.head) {
+- struct io_timeout_data *data = req->async_data;
+-
+- data->timer.function = io_link_timeout_fn;
+- hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
+- data->mode);
+- list_add_tail(&req->timeout.list, &ctx->ltimeout_list);
+- }
+- spin_unlock_irq(&ctx->timeout_lock);
+- /* drop submission reference */
+- io_put_req(req);
+-}
+-
+-static void __io_queue_sqe(struct io_kiocb *req)
+- __must_hold(&req->ctx->uring_lock)
+-{
+- struct io_kiocb *linked_timeout;
+- int ret;
+-
+-issue_sqe:
+- ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
+-
+- /*
+- * We async punt it if the file wasn't marked NOWAIT, or if the file
+- * doesn't support non-blocking read/write attempts
+- */
+- if (likely(!ret)) {
+- if (req->flags & REQ_F_COMPLETE_INLINE) {
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_submit_state *state = &ctx->submit_state;
+-
+- state->compl_reqs[state->compl_nr++] = req;
+- if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
+- io_submit_flush_completions(ctx);
+- return;
+- }
+-
+- linked_timeout = io_prep_linked_timeout(req);
+- if (linked_timeout)
+- io_queue_linked_timeout(linked_timeout);
+- } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+- linked_timeout = io_prep_linked_timeout(req);
+-
+- switch (io_arm_poll_handler(req)) {
+- case IO_APOLL_READY:
+- if (linked_timeout)
+- io_queue_linked_timeout(linked_timeout);
+- goto issue_sqe;
+- case IO_APOLL_ABORTED:
+- /*
+- * Queued up for async execution, worker will release
+- * submit reference when the iocb is actually submitted.
+- */
+- io_queue_async_work(req, NULL);
+- break;
+- }
+-
+- if (linked_timeout)
+- io_queue_linked_timeout(linked_timeout);
+- } else {
+- io_req_complete_failed(req, ret);
+- }
+-}
+-
+-static inline void io_queue_sqe(struct io_kiocb *req)
+- __must_hold(&req->ctx->uring_lock)
+-{
+- if (unlikely(req->ctx->drain_active) && io_drain_req(req))
+- return;
+-
+- if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
+- __io_queue_sqe(req);
+- } else if (req->flags & REQ_F_FAIL) {
+- io_req_complete_fail_submit(req);
+- } else {
+- int ret = io_req_prep_async(req);
+-
+- if (unlikely(ret))
+- io_req_complete_failed(req, ret);
+- else
+- io_queue_async_work(req, NULL);
+- }
+-}
+-
+-/*
+- * Check SQE restrictions (opcode and flags).
+- *
+- * Returns 'true' if SQE is allowed, 'false' otherwise.
+- */
+-static inline bool io_check_restriction(struct io_ring_ctx *ctx,
+- struct io_kiocb *req,
+- unsigned int sqe_flags)
+-{
+- if (likely(!ctx->restricted))
+- return true;
+-
+- if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
+- return false;
+-
+- if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
+- ctx->restrictions.sqe_flags_required)
+- return false;
+-
+- if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
+- ctx->restrictions.sqe_flags_required))
+- return false;
+-
+- return true;
+-}
+-
+-static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+- __must_hold(&ctx->uring_lock)
+-{
+- struct io_submit_state *state;
+- unsigned int sqe_flags;
+- int personality, ret = 0;
+-
+- /* req is partially pre-initialised, see io_preinit_req() */
+- req->opcode = READ_ONCE(sqe->opcode);
+- /* same numerical values with corresponding REQ_F_*, safe to copy */
+- req->flags = sqe_flags = READ_ONCE(sqe->flags);
+- req->user_data = READ_ONCE(sqe->user_data);
+- req->file = NULL;
+- req->fixed_rsrc_refs = NULL;
+- req->task = current;
+-
+- /* enforce forwards compatibility on users */
+- if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
+- return -EINVAL;
+- if (unlikely(req->opcode >= IORING_OP_LAST))
+- return -EINVAL;
+- if (!io_check_restriction(ctx, req, sqe_flags))
+- return -EACCES;
+-
+- if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
+- !io_op_defs[req->opcode].buffer_select)
+- return -EOPNOTSUPP;
+- if (unlikely(sqe_flags & IOSQE_IO_DRAIN))
+- ctx->drain_active = true;
+-
+- personality = READ_ONCE(sqe->personality);
+- if (personality) {
+- req->creds = xa_load(&ctx->personalities, personality);
+- if (!req->creds)
+- return -EINVAL;
+- get_cred(req->creds);
+- req->flags |= REQ_F_CREDS;
+- }
+- state = &ctx->submit_state;
+-
+- /*
+- * Plug now if we have more than 1 IO left after this, and the target
+- * is potentially a read/write to block based storage.
+- */
+- if (!state->plug_started && state->ios_left > 1 &&
+- io_op_defs[req->opcode].plug) {
+- blk_start_plug(&state->plug);
+- state->plug_started = true;
+- }
+-
+- if (io_op_defs[req->opcode].needs_file) {
+- req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
+- (sqe_flags & IOSQE_FIXED_FILE));
+- if (unlikely(!req->file))
+- ret = -EBADF;
+- }
+-
+- state->ios_left--;
+- return ret;
+-}
+-
+-static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+- const struct io_uring_sqe *sqe)
+- __must_hold(&ctx->uring_lock)
+-{
+- struct io_submit_link *link = &ctx->submit_state.link;
+- int ret;
+-
+- ret = io_init_req(ctx, req, sqe);
+- if (unlikely(ret)) {
+-fail_req:
+- /* fail even hard links since we don't submit */
+- if (link->head) {
+- /*
+- * we can judge a link req is failed or cancelled by if
+- * REQ_F_FAIL is set, but the head is an exception since
+- * it may be set REQ_F_FAIL because of other req's failure
+- * so let's leverage req->result to distinguish if a head
+- * is set REQ_F_FAIL because of its failure or other req's
+- * failure so that we can set the correct ret code for it.
+- * init result here to avoid affecting the normal path.
+- */
+- if (!(link->head->flags & REQ_F_FAIL))
+- req_fail_link_node(link->head, -ECANCELED);
+- } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
+- /*
+- * the current req is a normal req, we should return
+- * error and thus break the submittion loop.
+- */
+- io_req_complete_failed(req, ret);
+- return ret;
+- }
+- req_fail_link_node(req, ret);
+- } else {
+- ret = io_req_prep(req, sqe);
+- if (unlikely(ret))
+- goto fail_req;
+- }
+-
+- /* don't need @sqe from now on */
+- trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
+- req->flags, true,
+- ctx->flags & IORING_SETUP_SQPOLL);
+-
+- /*
+- * If we already have a head request, queue this one for async
+- * submittal once the head completes. If we don't have a head but
+- * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be
+- * submitted sync once the chain is complete. If none of those
+- * conditions are true (normal request), then just queue it.
+- */
+- if (link->head) {
+- struct io_kiocb *head = link->head;
+-
+- if (!(req->flags & REQ_F_FAIL)) {
+- ret = io_req_prep_async(req);
+- if (unlikely(ret)) {
+- req_fail_link_node(req, ret);
+- if (!(head->flags & REQ_F_FAIL))
+- req_fail_link_node(head, -ECANCELED);
+- }
+- }
+- trace_io_uring_link(ctx, req, head);
+- link->last->link = req;
+- link->last = req;
+-
+- /* last request of a link, enqueue the link */
+- if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
+- link->head = NULL;
+- io_queue_sqe(head);
+- }
+- } else {
+- if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
+- link->head = req;
+- link->last = req;
+- } else {
+- io_queue_sqe(req);
+- }
+- }
+-
+- return 0;
+-}
+-
+-/*
+- * Batched submission is done, ensure local IO is flushed out.
+- */
+-static void io_submit_state_end(struct io_submit_state *state,
+- struct io_ring_ctx *ctx)
+-{
+- if (state->link.head)
+- io_queue_sqe(state->link.head);
+- if (state->compl_nr)
+- io_submit_flush_completions(ctx);
+- if (state->plug_started)
+- blk_finish_plug(&state->plug);
+-}
+-
+-/*
+- * Start submission side cache.
+- */
+-static void io_submit_state_start(struct io_submit_state *state,
+- unsigned int max_ios)
+-{
+- state->plug_started = false;
+- state->ios_left = max_ios;
+- /* set only head, no need to init link_last in advance */
+- state->link.head = NULL;
+-}
+-
+-static void io_commit_sqring(struct io_ring_ctx *ctx)
+-{
+- struct io_rings *rings = ctx->rings;
+-
+- /*
+- * Ensure any loads from the SQEs are done at this point,
+- * since once we write the new head, the application could
+- * write new data to them.
+- */
+- smp_store_release(&rings->sq.head, ctx->cached_sq_head);
+-}
+-
+-/*
+- * Fetch an sqe, if one is available. Note this returns a pointer to memory
+- * that is mapped by userspace. This means that care needs to be taken to
+- * ensure that reads are stable, as we cannot rely on userspace always
+- * being a good citizen. If members of the sqe are validated and then later
+- * used, it's important that those reads are done through READ_ONCE() to
+- * prevent a re-load down the line.
+- */
+-static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
+-{
+- unsigned head, mask = ctx->sq_entries - 1;
+- unsigned sq_idx = ctx->cached_sq_head++ & mask;
+-
+- /*
+- * The cached sq head (or cq tail) serves two purposes:
+- *
+- * 1) allows us to batch the cost of updating the user visible
+- * head updates.
+- * 2) allows the kernel side to track the head on its own, even
+- * though the application is the one updating it.
+- */
+- head = READ_ONCE(ctx->sq_array[sq_idx]);
+- if (likely(head < ctx->sq_entries))
+- return &ctx->sq_sqes[head];
+-
+- /* drop invalid entries */
+- ctx->cq_extra--;
+- WRITE_ONCE(ctx->rings->sq_dropped,
+- READ_ONCE(ctx->rings->sq_dropped) + 1);
+- return NULL;
+-}
+-
+-static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
+- __must_hold(&ctx->uring_lock)
+-{
+- int submitted = 0;
+-
+- /* make sure SQ entry isn't read before tail */
+- nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
+- if (!percpu_ref_tryget_many(&ctx->refs, nr))
+- return -EAGAIN;
+- io_get_task_refs(nr);
+-
+- io_submit_state_start(&ctx->submit_state, nr);
+- while (submitted < nr) {
+- const struct io_uring_sqe *sqe;
+- struct io_kiocb *req;
+-
+- req = io_alloc_req(ctx);
+- if (unlikely(!req)) {
+- if (!submitted)
+- submitted = -EAGAIN;
+- break;
+- }
+- sqe = io_get_sqe(ctx);
+- if (unlikely(!sqe)) {
+- list_add(&req->inflight_entry, &ctx->submit_state.free_list);
+- break;
+- }
+- /* will complete beyond this point, count as submitted */
+- submitted++;
+- if (io_submit_sqe(ctx, req, sqe))
+- break;
+- }
+-
+- if (unlikely(submitted != nr)) {
+- int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
+- int unused = nr - ref_used;
+-
+- current->io_uring->cached_refs += unused;
+- percpu_ref_put_many(&ctx->refs, unused);
+- }
+-
+- io_submit_state_end(&ctx->submit_state, ctx);
+- /* Commit SQ ring head once we've consumed and submitted all SQEs */
+- io_commit_sqring(ctx);
+-
+- return submitted;
+-}
+-
+-static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
+-{
+- return READ_ONCE(sqd->state);
+-}
+-
+-static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
+-{
+- /* Tell userspace we may need a wakeup call */
+- spin_lock(&ctx->completion_lock);
+- WRITE_ONCE(ctx->rings->sq_flags,
+- ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
+- spin_unlock(&ctx->completion_lock);
+-}
+-
+-static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
+-{
+- spin_lock(&ctx->completion_lock);
+- WRITE_ONCE(ctx->rings->sq_flags,
+- ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
+- spin_unlock(&ctx->completion_lock);
+-}
+-
+-static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
+-{
+- unsigned int to_submit;
+- int ret = 0;
+-
+- to_submit = io_sqring_entries(ctx);
+- /* if we're handling multiple rings, cap submit size for fairness */
+- if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE)
+- to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE;
+-
+- if (!list_empty(&ctx->iopoll_list) || to_submit) {
+- unsigned nr_events = 0;
+- const struct cred *creds = NULL;
+-
+- if (ctx->sq_creds != current_cred())
+- creds = override_creds(ctx->sq_creds);
+-
+- mutex_lock(&ctx->uring_lock);
+- if (!list_empty(&ctx->iopoll_list))
+- io_do_iopoll(ctx, &nr_events, 0);
+-
+- /*
+- * Don't submit if refs are dying, good for io_uring_register(),
+- * but also it is relied upon by io_ring_exit_work()
+- */
+- if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) &&
+- !(ctx->flags & IORING_SETUP_R_DISABLED))
+- ret = io_submit_sqes(ctx, to_submit);
+- mutex_unlock(&ctx->uring_lock);
+-
+- if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
+- wake_up(&ctx->sqo_sq_wait);
+- if (creds)
+- revert_creds(creds);
+- }
+-
+- return ret;
+-}
+-
+-static void io_sqd_update_thread_idle(struct io_sq_data *sqd)
+-{
+- struct io_ring_ctx *ctx;
+- unsigned sq_thread_idle = 0;
+-
+- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+- sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle);
+- sqd->sq_thread_idle = sq_thread_idle;
+-}
+-
+-static bool io_sqd_handle_event(struct io_sq_data *sqd)
+-{
+- bool did_sig = false;
+- struct ksignal ksig;
+-
+- if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
+- signal_pending(current)) {
+- mutex_unlock(&sqd->lock);
+- if (signal_pending(current))
+- did_sig = get_signal(&ksig);
+- cond_resched();
+- mutex_lock(&sqd->lock);
+- }
+- return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+-}
+-
+-static int io_sq_thread(void *data)
+-{
+- struct io_sq_data *sqd = data;
+- struct io_ring_ctx *ctx;
+- unsigned long timeout = 0;
+- char buf[TASK_COMM_LEN];
+- DEFINE_WAIT(wait);
+-
+- snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
+- set_task_comm(current, buf);
+-
+- if (sqd->sq_cpu != -1)
+- set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
+- else
+- set_cpus_allowed_ptr(current, cpu_online_mask);
+- current->flags |= PF_NO_SETAFFINITY;
+-
+- mutex_lock(&sqd->lock);
+- while (1) {
+- bool cap_entries, sqt_spin = false;
+-
+- if (io_sqd_events_pending(sqd) || signal_pending(current)) {
+- if (io_sqd_handle_event(sqd))
+- break;
+- timeout = jiffies + sqd->sq_thread_idle;
+- }
+-
+- cap_entries = !list_is_singular(&sqd->ctx_list);
+- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+- int ret = __io_sq_thread(ctx, cap_entries);
+-
+- if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
+- sqt_spin = true;
+- }
+- if (io_run_task_work())
+- sqt_spin = true;
+-
+- if (sqt_spin || !time_after(jiffies, timeout)) {
+- cond_resched();
+- if (sqt_spin)
+- timeout = jiffies + sqd->sq_thread_idle;
+- continue;
+- }
+-
+- prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
+- if (!io_sqd_events_pending(sqd) && !current->task_works) {
+- bool needs_sched = true;
+-
+- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+- io_ring_set_wakeup_flag(ctx);
+-
+- if ((ctx->flags & IORING_SETUP_IOPOLL) &&
+- !list_empty_careful(&ctx->iopoll_list)) {
+- needs_sched = false;
+- break;
+- }
+- if (io_sqring_entries(ctx)) {
+- needs_sched = false;
+- break;
+- }
+- }
+-
+- if (needs_sched) {
+- mutex_unlock(&sqd->lock);
+- schedule();
+- mutex_lock(&sqd->lock);
+- }
+- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+- io_ring_clear_wakeup_flag(ctx);
+- }
+-
+- finish_wait(&sqd->wait, &wait);
+- timeout = jiffies + sqd->sq_thread_idle;
+- }
+-
+- io_uring_cancel_generic(true, sqd);
+- sqd->thread = NULL;
+- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+- io_ring_set_wakeup_flag(ctx);
+- io_run_task_work();
+- mutex_unlock(&sqd->lock);
+-
+- complete(&sqd->exited);
+- do_exit(0);
+-}
+-
+-struct io_wait_queue {
+- struct wait_queue_entry wq;
+- struct io_ring_ctx *ctx;
+- unsigned cq_tail;
+- unsigned nr_timeouts;
+-};
+-
+-static inline bool io_should_wake(struct io_wait_queue *iowq)
+-{
+- struct io_ring_ctx *ctx = iowq->ctx;
+- int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
+-
+- /*
+- * Wake up if we have enough events, or if a timeout occurred since we
+- * started waiting. For timeouts, we always want to return to userspace,
+- * regardless of event count.
+- */
+- return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+-}
+-
+-static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+- int wake_flags, void *key)
+-{
+- struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
+- wq);
+-
+- /*
+- * Cannot safely flush overflowed CQEs from here, ensure we wake up
+- * the task, and the next invocation will do it.
+- */
+- if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow))
+- return autoremove_wake_function(curr, mode, wake_flags, key);
+- return -1;
+-}
+-
+-static int io_run_task_work_sig(void)
+-{
+- if (io_run_task_work())
+- return 1;
+- if (!signal_pending(current))
+- return 0;
+- if (test_thread_flag(TIF_NOTIFY_SIGNAL))
+- return -ERESTARTSYS;
+- return -EINTR;
+-}
+-
+-/* when returns >0, the caller should retry */
+-static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+- struct io_wait_queue *iowq,
+- signed long *timeout)
+-{
+- int ret;
+-
+- /* make sure we run task_work before checking for signals */
+- ret = io_run_task_work_sig();
+- if (ret || io_should_wake(iowq))
+- return ret;
+- /* let the caller flush overflows, retry */
+- if (test_bit(0, &ctx->check_cq_overflow))
+- return 1;
+-
+- *timeout = schedule_timeout(*timeout);
+- return !*timeout ? -ETIME : 1;
+-}
+-
+-/*
+- * Wait until events become available, if we don't already have some. The
+- * application must reap them itself, as they reside on the shared cq ring.
+- */
+-static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+- const sigset_t __user *sig, size_t sigsz,
+- struct __kernel_timespec __user *uts)
+-{
+- struct io_wait_queue iowq;
+- struct io_rings *rings = ctx->rings;
+- signed long timeout = MAX_SCHEDULE_TIMEOUT;
+- int ret;
+-
+- do {
+- io_cqring_overflow_flush(ctx);
+- if (io_cqring_events(ctx) >= min_events)
+- return 0;
+- if (!io_run_task_work())
+- break;
+- } while (1);
+-
+- if (uts) {
+- struct timespec64 ts;
+-
+- if (get_timespec64(&ts, uts))
+- return -EFAULT;
+- timeout = timespec64_to_jiffies(&ts);
+- }
+-
+- if (sig) {
+-#ifdef CONFIG_COMPAT
+- if (in_compat_syscall())
+- ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+- sigsz);
+- else
+-#endif
+- ret = set_user_sigmask(sig, sigsz);
+-
+- if (ret)
+- return ret;
+- }
+-
+- init_waitqueue_func_entry(&iowq.wq, io_wake_function);
+- iowq.wq.private = current;
+- INIT_LIST_HEAD(&iowq.wq.entry);
+- iowq.ctx = ctx;
+- iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+- iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
+-
+- trace_io_uring_cqring_wait(ctx, min_events);
+- do {
+- /* if we can't even flush overflow, don't wait for more */
+- if (!io_cqring_overflow_flush(ctx)) {
+- ret = -EBUSY;
+- break;
+- }
+- prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
+- TASK_INTERRUPTIBLE);
+- ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
+- finish_wait(&ctx->cq_wait, &iowq.wq);
+- cond_resched();
+- } while (ret > 0);
+-
+- restore_saved_sigmask_unless(ret == -EINTR);
+-
+- return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
+-}
+-
+-static void io_free_page_table(void **table, size_t size)
+-{
+- unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
+-
+- for (i = 0; i < nr_tables; i++)
+- kfree(table[i]);
+- kfree(table);
+-}
+-
+-static void **io_alloc_page_table(size_t size)
+-{
+- unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
+- size_t init_size = size;
+- void **table;
+-
+- table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT);
+- if (!table)
+- return NULL;
+-
+- for (i = 0; i < nr_tables; i++) {
+- unsigned int this_size = min_t(size_t, size, PAGE_SIZE);
+-
+- table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT);
+- if (!table[i]) {
+- io_free_page_table(table, init_size);
+- return NULL;
+- }
+- size -= this_size;
+- }
+- return table;
+-}
+-
+-static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
+-{
+- percpu_ref_exit(&ref_node->refs);
+- kfree(ref_node);
+-}
+-
+-static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
+-{
+- struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
+- struct io_ring_ctx *ctx = node->rsrc_data->ctx;
+- unsigned long flags;
+- bool first_add = false;
+-
+- spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
+- node->done = true;
+-
+- while (!list_empty(&ctx->rsrc_ref_list)) {
+- node = list_first_entry(&ctx->rsrc_ref_list,
+- struct io_rsrc_node, node);
+- /* recycle ref nodes in order */
+- if (!node->done)
+- break;
+- list_del(&node->node);
+- first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
+- }
+- spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
+-
+- if (first_add)
+- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
+-}
+-
+-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
+-{
+- struct io_rsrc_node *ref_node;
+-
+- ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
+- if (!ref_node)
+- return NULL;
+-
+- if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
+- 0, GFP_KERNEL)) {
+- kfree(ref_node);
+- return NULL;
+- }
+- INIT_LIST_HEAD(&ref_node->node);
+- INIT_LIST_HEAD(&ref_node->rsrc_list);
+- ref_node->done = false;
+- return ref_node;
+-}
+-
+-static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
+- struct io_rsrc_data *data_to_kill)
+-{
+- WARN_ON_ONCE(!ctx->rsrc_backup_node);
+- WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node);
+-
+- if (data_to_kill) {
+- struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
+-
+- rsrc_node->rsrc_data = data_to_kill;
+- spin_lock_irq(&ctx->rsrc_ref_lock);
+- list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
+- spin_unlock_irq(&ctx->rsrc_ref_lock);
+-
+- atomic_inc(&data_to_kill->refs);
+- percpu_ref_kill(&rsrc_node->refs);
+- ctx->rsrc_node = NULL;
+- }
+-
+- if (!ctx->rsrc_node) {
+- ctx->rsrc_node = ctx->rsrc_backup_node;
+- ctx->rsrc_backup_node = NULL;
+- }
+-}
+-
+-static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
+-{
+- if (ctx->rsrc_backup_node)
+- return 0;
+- ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx);
+- return ctx->rsrc_backup_node ? 0 : -ENOMEM;
+-}
+-
+-static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx)
+-{
+- int ret;
+-
+- /* As we may drop ->uring_lock, other task may have started quiesce */
+- if (data->quiesce)
+- return -ENXIO;
+-
+- data->quiesce = true;
+- do {
+- ret = io_rsrc_node_switch_start(ctx);
+- if (ret)
+- break;
+- io_rsrc_node_switch(ctx, data);
+-
+- /* kill initial ref, already quiesced if zero */
+- if (atomic_dec_and_test(&data->refs))
+- break;
+- mutex_unlock(&ctx->uring_lock);
+- flush_delayed_work(&ctx->rsrc_put_work);
+- ret = wait_for_completion_interruptible(&data->done);
+- if (!ret) {
+- mutex_lock(&ctx->uring_lock);
+- break;
+- }
+-
+- atomic_inc(&data->refs);
+- /* wait for all works potentially completing data->done */
+- flush_delayed_work(&ctx->rsrc_put_work);
+- reinit_completion(&data->done);
+-
+- ret = io_run_task_work_sig();
+- mutex_lock(&ctx->uring_lock);
+- } while (ret >= 0);
+- data->quiesce = false;
+-
+- return ret;
+-}
+-
+-static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx)
+-{
+- unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK;
+- unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT;
+-
+- return &data->tags[table_idx][off];
+-}
+-
+-static void io_rsrc_data_free(struct io_rsrc_data *data)
+-{
+- size_t size = data->nr * sizeof(data->tags[0][0]);
+-
+- if (data->tags)
+- io_free_page_table((void **)data->tags, size);
+- kfree(data);
+-}
+-
+-static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
+- u64 __user *utags, unsigned nr,
+- struct io_rsrc_data **pdata)
+-{
+- struct io_rsrc_data *data;
+- int ret = -ENOMEM;
+- unsigned i;
+-
+- data = kzalloc(sizeof(*data), GFP_KERNEL);
+- if (!data)
+- return -ENOMEM;
+- data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0]));
+- if (!data->tags) {
+- kfree(data);
+- return -ENOMEM;
+- }
+-
+- data->nr = nr;
+- data->ctx = ctx;
+- data->do_put = do_put;
+- if (utags) {
+- ret = -EFAULT;
+- for (i = 0; i < nr; i++) {
+- u64 *tag_slot = io_get_tag_slot(data, i);
+-
+- if (copy_from_user(tag_slot, &utags[i],
+- sizeof(*tag_slot)))
+- goto fail;
+- }
+- }
+-
+- atomic_set(&data->refs, 1);
+- init_completion(&data->done);
+- *pdata = data;
+- return 0;
+-fail:
+- io_rsrc_data_free(data);
+- return ret;
+-}
+-
+-static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
+-{
+- table->files = kvcalloc(nr_files, sizeof(table->files[0]),
+- GFP_KERNEL_ACCOUNT);
+- return !!table->files;
+-}
+-
+-static void io_free_file_tables(struct io_file_table *table)
+-{
+- kvfree(table->files);
+- table->files = NULL;
+-}
+-
+-static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
+-{
+-#if defined(CONFIG_UNIX)
+- if (ctx->ring_sock) {
+- struct sock *sock = ctx->ring_sock->sk;
+- struct sk_buff *skb;
+-
+- while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
+- kfree_skb(skb);
+- }
+-#else
+- int i;
+-
+- for (i = 0; i < ctx->nr_user_files; i++) {
+- struct file *file;
+-
+- file = io_file_from_index(ctx, i);
+- if (file)
+- fput(file);
+- }
+-#endif
+- io_free_file_tables(&ctx->file_table);
+- io_rsrc_data_free(ctx->file_data);
+- ctx->file_data = NULL;
+- ctx->nr_user_files = 0;
+-}
+-
+-static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
+-{
+- int ret;
+-
+- if (!ctx->file_data)
+- return -ENXIO;
+- ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+- if (!ret)
+- __io_sqe_files_unregister(ctx);
+- return ret;
+-}
+-
+-static void io_sq_thread_unpark(struct io_sq_data *sqd)
+- __releases(&sqd->lock)
+-{
+- WARN_ON_ONCE(sqd->thread == current);
+-
+- /*
+- * Do the dance but not conditional clear_bit() because it'd race with
+- * other threads incrementing park_pending and setting the bit.
+- */
+- clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+- if (atomic_dec_return(&sqd->park_pending))
+- set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+- mutex_unlock(&sqd->lock);
+-}
+-
+-static void io_sq_thread_park(struct io_sq_data *sqd)
+- __acquires(&sqd->lock)
+-{
+- WARN_ON_ONCE(sqd->thread == current);
+-
+- atomic_inc(&sqd->park_pending);
+- set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+- mutex_lock(&sqd->lock);
+- if (sqd->thread)
+- wake_up_process(sqd->thread);
+-}
+-
+-static void io_sq_thread_stop(struct io_sq_data *sqd)
+-{
+- WARN_ON_ONCE(sqd->thread == current);
+- WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
+-
+- set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+- mutex_lock(&sqd->lock);
+- if (sqd->thread)
+- wake_up_process(sqd->thread);
+- mutex_unlock(&sqd->lock);
+- wait_for_completion(&sqd->exited);
+-}
+-
+-static void io_put_sq_data(struct io_sq_data *sqd)
+-{
+- if (refcount_dec_and_test(&sqd->refs)) {
+- WARN_ON_ONCE(atomic_read(&sqd->park_pending));
+-
+- io_sq_thread_stop(sqd);
+- kfree(sqd);
+- }
+-}
+-
+-static void io_sq_thread_finish(struct io_ring_ctx *ctx)
+-{
+- struct io_sq_data *sqd = ctx->sq_data;
+-
+- if (sqd) {
+- io_sq_thread_park(sqd);
+- list_del_init(&ctx->sqd_list);
+- io_sqd_update_thread_idle(sqd);
+- io_sq_thread_unpark(sqd);
+-
+- io_put_sq_data(sqd);
+- ctx->sq_data = NULL;
+- }
+-}
+-
+-static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
+-{
+- struct io_ring_ctx *ctx_attach;
+- struct io_sq_data *sqd;
+- struct fd f;
+-
+- f = fdget(p->wq_fd);
+- if (!f.file)
+- return ERR_PTR(-ENXIO);
+- if (f.file->f_op != &io_uring_fops) {
+- fdput(f);
+- return ERR_PTR(-EINVAL);
+- }
+-
+- ctx_attach = f.file->private_data;
+- sqd = ctx_attach->sq_data;
+- if (!sqd) {
+- fdput(f);
+- return ERR_PTR(-EINVAL);
+- }
+- if (sqd->task_tgid != current->tgid) {
+- fdput(f);
+- return ERR_PTR(-EPERM);
+- }
+-
+- refcount_inc(&sqd->refs);
+- fdput(f);
+- return sqd;
+-}
+-
+-static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
+- bool *attached)
+-{
+- struct io_sq_data *sqd;
+-
+- *attached = false;
+- if (p->flags & IORING_SETUP_ATTACH_WQ) {
+- sqd = io_attach_sq_data(p);
+- if (!IS_ERR(sqd)) {
+- *attached = true;
+- return sqd;
+- }
+- /* fall through for EPERM case, setup new sqd/task */
+- if (PTR_ERR(sqd) != -EPERM)
+- return sqd;
+- }
+-
+- sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
+- if (!sqd)
+- return ERR_PTR(-ENOMEM);
+-
+- atomic_set(&sqd->park_pending, 0);
+- refcount_set(&sqd->refs, 1);
+- INIT_LIST_HEAD(&sqd->ctx_list);
+- mutex_init(&sqd->lock);
+- init_waitqueue_head(&sqd->wait);
+- init_completion(&sqd->exited);
+- return sqd;
+-}
+-
+-#if defined(CONFIG_UNIX)
+-/*
+- * Ensure the UNIX gc is aware of our file set, so we are certain that
+- * the io_uring can be safely unregistered on process exit, even if we have
+- * loops in the file referencing.
+- */
+-static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
+-{
+- struct sock *sk = ctx->ring_sock->sk;
+- struct scm_fp_list *fpl;
+- struct sk_buff *skb;
+- int i, nr_files;
+-
+- fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
+- if (!fpl)
+- return -ENOMEM;
+-
+- skb = alloc_skb(0, GFP_KERNEL);
+- if (!skb) {
+- kfree(fpl);
+- return -ENOMEM;
+- }
+-
+- skb->sk = sk;
+-
+- nr_files = 0;
+- fpl->user = get_uid(current_user());
+- for (i = 0; i < nr; i++) {
+- struct file *file = io_file_from_index(ctx, i + offset);
+-
+- if (!file)
+- continue;
+- fpl->fp[nr_files] = get_file(file);
+- unix_inflight(fpl->user, fpl->fp[nr_files]);
+- nr_files++;
+- }
+-
+- if (nr_files) {
+- fpl->max = SCM_MAX_FD;
+- fpl->count = nr_files;
+- UNIXCB(skb).fp = fpl;
+- skb->destructor = unix_destruct_scm;
+- refcount_add(skb->truesize, &sk->sk_wmem_alloc);
+- skb_queue_head(&sk->sk_receive_queue, skb);
+-
+- for (i = 0; i < nr_files; i++)
+- fput(fpl->fp[i]);
+- } else {
+- kfree_skb(skb);
+- kfree(fpl);
+- }
+-
+- return 0;
+-}
+-
+-/*
+- * If UNIX sockets are enabled, fd passing can cause a reference cycle which
+- * causes regular reference counting to break down. We rely on the UNIX
+- * garbage collection to take care of this problem for us.
+- */
+-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
+-{
+- unsigned left, total;
+- int ret = 0;
+-
+- total = 0;
+- left = ctx->nr_user_files;
+- while (left) {
+- unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
+-
+- ret = __io_sqe_files_scm(ctx, this_files, total);
+- if (ret)
+- break;
+- left -= this_files;
+- total += this_files;
+- }
+-
+- if (!ret)
+- return 0;
+-
+- while (total < ctx->nr_user_files) {
+- struct file *file = io_file_from_index(ctx, total);
+-
+- if (file)
+- fput(file);
+- total++;
+- }
+-
+- return ret;
+-}
+-#else
+-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
+-{
+- return 0;
+-}
+-#endif
+-
+-static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
+-{
+- struct file *file = prsrc->file;
+-#if defined(CONFIG_UNIX)
+- struct sock *sock = ctx->ring_sock->sk;
+- struct sk_buff_head list, *head = &sock->sk_receive_queue;
+- struct sk_buff *skb;
+- int i;
+-
+- __skb_queue_head_init(&list);
+-
+- /*
+- * Find the skb that holds this file in its SCM_RIGHTS. When found,
+- * remove this entry and rearrange the file array.
+- */
+- skb = skb_dequeue(head);
+- while (skb) {
+- struct scm_fp_list *fp;
+-
+- fp = UNIXCB(skb).fp;
+- for (i = 0; i < fp->count; i++) {
+- int left;
+-
+- if (fp->fp[i] != file)
+- continue;
+-
+- unix_notinflight(fp->user, fp->fp[i]);
+- left = fp->count - 1 - i;
+- if (left) {
+- memmove(&fp->fp[i], &fp->fp[i + 1],
+- left * sizeof(struct file *));
+- }
+- fp->count--;
+- if (!fp->count) {
+- kfree_skb(skb);
+- skb = NULL;
+- } else {
+- __skb_queue_tail(&list, skb);
+- }
+- fput(file);
+- file = NULL;
+- break;
+- }
+-
+- if (!file)
+- break;
+-
+- __skb_queue_tail(&list, skb);
+-
+- skb = skb_dequeue(head);
+- }
+-
+- if (skb_peek(&list)) {
+- spin_lock_irq(&head->lock);
+- while ((skb = __skb_dequeue(&list)) != NULL)
+- __skb_queue_tail(head, skb);
+- spin_unlock_irq(&head->lock);
+- }
+-#else
+- fput(file);
+-#endif
+-}
+-
+-static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
+-{
+- struct io_rsrc_data *rsrc_data = ref_node->rsrc_data;
+- struct io_ring_ctx *ctx = rsrc_data->ctx;
+- struct io_rsrc_put *prsrc, *tmp;
+-
+- list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) {
+- list_del(&prsrc->list);
+-
+- if (prsrc->tag) {
+- bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
+-
+- io_ring_submit_lock(ctx, lock_ring);
+- spin_lock(&ctx->completion_lock);
+- io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
+- ctx->cq_extra++;
+- io_commit_cqring(ctx);
+- spin_unlock(&ctx->completion_lock);
+- io_cqring_ev_posted(ctx);
+- io_ring_submit_unlock(ctx, lock_ring);
+- }
+-
+- rsrc_data->do_put(ctx, prsrc);
+- kfree(prsrc);
+- }
+-
+- io_rsrc_node_destroy(ref_node);
+- if (atomic_dec_and_test(&rsrc_data->refs))
+- complete(&rsrc_data->done);
+-}
+-
+-static void io_rsrc_put_work(struct work_struct *work)
+-{
+- struct io_ring_ctx *ctx;
+- struct llist_node *node;
+-
+- ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work);
+- node = llist_del_all(&ctx->rsrc_put_llist);
+-
+- while (node) {
+- struct io_rsrc_node *ref_node;
+- struct llist_node *next = node->next;
+-
+- ref_node = llist_entry(node, struct io_rsrc_node, llist);
+- __io_rsrc_put_work(ref_node);
+- node = next;
+- }
+-}
+-
+-static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+- unsigned nr_args, u64 __user *tags)
+-{
+- __s32 __user *fds = (__s32 __user *) arg;
+- struct file *file;
+- int fd, ret;
+- unsigned i;
+-
+- if (ctx->file_data)
+- return -EBUSY;
+- if (!nr_args)
+- return -EINVAL;
+- if (nr_args > IORING_MAX_FIXED_FILES)
+- return -EMFILE;
+- if (nr_args > rlimit(RLIMIT_NOFILE))
+- return -EMFILE;
+- ret = io_rsrc_node_switch_start(ctx);
+- if (ret)
+- return ret;
+- ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args,
+- &ctx->file_data);
+- if (ret)
+- return ret;
+-
+- ret = -ENOMEM;
+- if (!io_alloc_file_tables(&ctx->file_table, nr_args))
+- goto out_free;
+-
+- for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
+- if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+- ret = -EFAULT;
+- goto out_fput;
+- }
+- /* allow sparse sets */
+- if (fd == -1) {
+- ret = -EINVAL;
+- if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
+- goto out_fput;
+- continue;
+- }
+-
+- file = fget(fd);
+- ret = -EBADF;
+- if (unlikely(!file))
+- goto out_fput;
+-
+- /*
+- * Don't allow io_uring instances to be registered. If UNIX
+- * isn't enabled, then this causes a reference cycle and this
+- * instance can never get freed. If UNIX is enabled we'll
+- * handle it just fine, but there's still no point in allowing
+- * a ring fd as it doesn't support regular read/write anyway.
+- */
+- if (file->f_op == &io_uring_fops) {
+- fput(file);
+- goto out_fput;
+- }
+- io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
+- }
+-
+- ret = io_sqe_files_scm(ctx);
+- if (ret) {
+- __io_sqe_files_unregister(ctx);
+- return ret;
+- }
+-
+- io_rsrc_node_switch(ctx, NULL);
+- return ret;
+-out_fput:
+- for (i = 0; i < ctx->nr_user_files; i++) {
+- file = io_file_from_index(ctx, i);
+- if (file)
+- fput(file);
+- }
+- io_free_file_tables(&ctx->file_table);
+- ctx->nr_user_files = 0;
+-out_free:
+- io_rsrc_data_free(ctx->file_data);
+- ctx->file_data = NULL;
+- return ret;
+-}
+-
+-static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
+- int index)
+-{
+-#if defined(CONFIG_UNIX)
+- struct sock *sock = ctx->ring_sock->sk;
+- struct sk_buff_head *head = &sock->sk_receive_queue;
+- struct sk_buff *skb;
+-
+- /*
+- * See if we can merge this file into an existing skb SCM_RIGHTS
+- * file set. If there's no room, fall back to allocating a new skb
+- * and filling it in.
+- */
+- spin_lock_irq(&head->lock);
+- skb = skb_peek(head);
+- if (skb) {
+- struct scm_fp_list *fpl = UNIXCB(skb).fp;
+-
+- if (fpl->count < SCM_MAX_FD) {
+- __skb_unlink(skb, head);
+- spin_unlock_irq(&head->lock);
+- fpl->fp[fpl->count] = get_file(file);
+- unix_inflight(fpl->user, fpl->fp[fpl->count]);
+- fpl->count++;
+- spin_lock_irq(&head->lock);
+- __skb_queue_head(head, skb);
+- } else {
+- skb = NULL;
+- }
+- }
+- spin_unlock_irq(&head->lock);
+-
+- if (skb) {
+- fput(file);
+- return 0;
+- }
+-
+- return __io_sqe_files_scm(ctx, 1, index);
+-#else
+- return 0;
+-#endif
+-}
+-
+-static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
+- struct io_rsrc_node *node, void *rsrc)
+-{
+- struct io_rsrc_put *prsrc;
+-
+- prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
+- if (!prsrc)
+- return -ENOMEM;
+-
+- prsrc->tag = *io_get_tag_slot(data, idx);
+- prsrc->rsrc = rsrc;
+- list_add(&prsrc->list, &node->rsrc_list);
+- return 0;
+-}
+-
+-static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+- unsigned int issue_flags, u32 slot_index)
+-{
+- struct io_ring_ctx *ctx = req->ctx;
+- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+- bool needs_switch = false;
+- struct io_fixed_file *file_slot;
+- int ret = -EBADF;
+-
+- io_ring_submit_lock(ctx, !force_nonblock);
+- if (file->f_op == &io_uring_fops)
+- goto err;
+- ret = -ENXIO;
+- if (!ctx->file_data)
+- goto err;
+- ret = -EINVAL;
+- if (slot_index >= ctx->nr_user_files)
+- goto err;
+-
+- slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
+- file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
+-
+- if (file_slot->file_ptr) {
+- struct file *old_file;
+-
+- ret = io_rsrc_node_switch_start(ctx);
+- if (ret)
+- goto err;
+-
+- old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+- ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
+- ctx->rsrc_node, old_file);
+- if (ret)
+- goto err;
+- file_slot->file_ptr = 0;
+- needs_switch = true;
+- }
+-
+- *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+- io_fixed_file_set(file_slot, file);
+- ret = io_sqe_file_register(ctx, file, slot_index);
+- if (ret) {
+- file_slot->file_ptr = 0;
+- goto err;
+- }
+-
+- ret = 0;
+-err:
+- if (needs_switch)
+- io_rsrc_node_switch(ctx, ctx->file_data);
+- io_ring_submit_unlock(ctx, !force_nonblock);
+- if (ret)
+- fput(file);
+- return ret;
+-}
+-
+-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
+-{
+- unsigned int offset = req->close.file_slot - 1;
+- struct io_ring_ctx *ctx = req->ctx;
+- struct io_fixed_file *file_slot;
+- struct file *file;
+- int ret, i;
+-
+- io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+- ret = -ENXIO;
+- if (unlikely(!ctx->file_data))
+- goto out;
+- ret = -EINVAL;
+- if (offset >= ctx->nr_user_files)
+- goto out;
+- ret = io_rsrc_node_switch_start(ctx);
+- if (ret)
+- goto out;
+-
+- i = array_index_nospec(offset, ctx->nr_user_files);
+- file_slot = io_fixed_file_slot(&ctx->file_table, i);
+- ret = -EBADF;
+- if (!file_slot->file_ptr)
+- goto out;
+-
+- file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+- ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
+- if (ret)
+- goto out;
+-
+- file_slot->file_ptr = 0;
+- io_rsrc_node_switch(ctx, ctx->file_data);
+- ret = 0;
+-out:
+- io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+- return ret;
+-}
+-
+-static int __io_sqe_files_update(struct io_ring_ctx *ctx,
+- struct io_uring_rsrc_update2 *up,
+- unsigned nr_args)
+-{
+- u64 __user *tags = u64_to_user_ptr(up->tags);
+- __s32 __user *fds = u64_to_user_ptr(up->data);
+- struct io_rsrc_data *data = ctx->file_data;
+- struct io_fixed_file *file_slot;
+- struct file *file;
+- int fd, i, err = 0;
+- unsigned int done;
+- bool needs_switch = false;
+-
+- if (!ctx->file_data)
+- return -ENXIO;
+- if (up->offset + nr_args > ctx->nr_user_files)
+- return -EINVAL;
+-
+- for (done = 0; done < nr_args; done++) {
+- u64 tag = 0;
+-
+- if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) ||
+- copy_from_user(&fd, &fds[done], sizeof(fd))) {
+- err = -EFAULT;
+- break;
+- }
+- if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) {
+- err = -EINVAL;
+- break;
+- }
+- if (fd == IORING_REGISTER_FILES_SKIP)
+- continue;
+-
+- i = array_index_nospec(up->offset + done, ctx->nr_user_files);
+- file_slot = io_fixed_file_slot(&ctx->file_table, i);
+-
+- if (file_slot->file_ptr) {
+- file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+- err = io_queue_rsrc_removal(data, up->offset + done,
+- ctx->rsrc_node, file);
+- if (err)
+- break;
+- file_slot->file_ptr = 0;
+- needs_switch = true;
+- }
+- if (fd != -1) {
+- file = fget(fd);
+- if (!file) {
+- err = -EBADF;
+- break;
+- }
+- /*
+- * Don't allow io_uring instances to be registered. If
+- * UNIX isn't enabled, then this causes a reference
+- * cycle and this instance can never get freed. If UNIX
+- * is enabled we'll handle it just fine, but there's
+- * still no point in allowing a ring fd as it doesn't
+- * support regular read/write anyway.
+- */
+- if (file->f_op == &io_uring_fops) {
+- fput(file);
+- err = -EBADF;
+- break;
+- }
+- *io_get_tag_slot(data, up->offset + done) = tag;
+- io_fixed_file_set(file_slot, file);
+- err = io_sqe_file_register(ctx, file, i);
+- if (err) {
+- file_slot->file_ptr = 0;
+- fput(file);
+- break;
+- }
+- }
+- }
+-
+- if (needs_switch)
+- io_rsrc_node_switch(ctx, data);
+- return done ? done : err;
+-}
+-
+-static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
+- struct task_struct *task)
+-{
+- struct io_wq_hash *hash;
+- struct io_wq_data data;
+- unsigned int concurrency;
+-
+- mutex_lock(&ctx->uring_lock);
+- hash = ctx->hash_map;
+- if (!hash) {
+- hash = kzalloc(sizeof(*hash), GFP_KERNEL);
+- if (!hash) {
+- mutex_unlock(&ctx->uring_lock);
+- return ERR_PTR(-ENOMEM);
+- }
+- refcount_set(&hash->refs, 1);
+- init_waitqueue_head(&hash->wait);
+- ctx->hash_map = hash;
+- }
+- mutex_unlock(&ctx->uring_lock);
+-
+- data.hash = hash;
+- data.task = task;
+- data.free_work = io_wq_free_work;
+- data.do_work = io_wq_submit_work;
+-
+- /* Do QD, or 4 * CPUS, whatever is smallest */
+- concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
+-
+- return io_wq_create(concurrency, &data);
+-}
+-
+-static int io_uring_alloc_task_context(struct task_struct *task,
+- struct io_ring_ctx *ctx)
+-{
+- struct io_uring_task *tctx;
+- int ret;
+-
+- tctx = kzalloc(sizeof(*tctx), GFP_KERNEL);
+- if (unlikely(!tctx))
+- return -ENOMEM;
+-
+- ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
+- if (unlikely(ret)) {
+- kfree(tctx);
+- return ret;
+- }
+-
+- tctx->io_wq = io_init_wq_offload(ctx, task);
+- if (IS_ERR(tctx->io_wq)) {
+- ret = PTR_ERR(tctx->io_wq);
+- percpu_counter_destroy(&tctx->inflight);
+- kfree(tctx);
+- return ret;
+- }
+-
+- xa_init(&tctx->xa);
+- init_waitqueue_head(&tctx->wait);
+- atomic_set(&tctx->in_idle, 0);
+- atomic_set(&tctx->inflight_tracked, 0);
+- task->io_uring = tctx;
+- spin_lock_init(&tctx->task_lock);
+- INIT_WQ_LIST(&tctx->task_list);
+- init_task_work(&tctx->task_work, tctx_task_work);
+- return 0;
+-}
+-
+-void __io_uring_free(struct task_struct *tsk)
+-{
+- struct io_uring_task *tctx = tsk->io_uring;
+-
+- WARN_ON_ONCE(!xa_empty(&tctx->xa));
+- WARN_ON_ONCE(tctx->io_wq);
+- WARN_ON_ONCE(tctx->cached_refs);
+-
+- percpu_counter_destroy(&tctx->inflight);
+- kfree(tctx);
+- tsk->io_uring = NULL;
+-}
+-
+-static int io_sq_offload_create(struct io_ring_ctx *ctx,
+- struct io_uring_params *p)
+-{
+- int ret;
+-
+- /* Retain compatibility with failing for an invalid attach attempt */
+- if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) ==
+- IORING_SETUP_ATTACH_WQ) {
+- struct fd f;
+-
+- f = fdget(p->wq_fd);
+- if (!f.file)
+- return -ENXIO;
+- if (f.file->f_op != &io_uring_fops) {
+- fdput(f);
+- return -EINVAL;
+- }
+- fdput(f);
+- }
+- if (ctx->flags & IORING_SETUP_SQPOLL) {
+- struct task_struct *tsk;
+- struct io_sq_data *sqd;
+- bool attached;
+-
+- sqd = io_get_sq_data(p, &attached);
+- if (IS_ERR(sqd)) {
+- ret = PTR_ERR(sqd);
+- goto err;
+- }
+-
+- ctx->sq_creds = get_current_cred();
+- ctx->sq_data = sqd;
+- ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
+- if (!ctx->sq_thread_idle)
+- ctx->sq_thread_idle = HZ;
+-
+- io_sq_thread_park(sqd);
+- list_add(&ctx->sqd_list, &sqd->ctx_list);
+- io_sqd_update_thread_idle(sqd);
+- /* don't attach to a dying SQPOLL thread, would be racy */
+- ret = (attached && !sqd->thread) ? -ENXIO : 0;
+- io_sq_thread_unpark(sqd);
+-
+- if (ret < 0)
+- goto err;
+- if (attached)
+- return 0;
+-
+- if (p->flags & IORING_SETUP_SQ_AFF) {
+- int cpu = p->sq_thread_cpu;
+-
+- ret = -EINVAL;
+- if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+- goto err_sqpoll;
+- sqd->sq_cpu = cpu;
+- } else {
+- sqd->sq_cpu = -1;
+- }
+-
+- sqd->task_pid = current->pid;
+- sqd->task_tgid = current->tgid;
+- tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
+- if (IS_ERR(tsk)) {
+- ret = PTR_ERR(tsk);
+- goto err_sqpoll;
+- }
+-
+- sqd->thread = tsk;
+- ret = io_uring_alloc_task_context(tsk, ctx);
+- wake_up_new_task(tsk);
+- if (ret)
+- goto err;
+- } else if (p->flags & IORING_SETUP_SQ_AFF) {
+- /* Can't have SQ_AFF without SQPOLL */
+- ret = -EINVAL;
+- goto err;
+- }
+-
+- return 0;
+-err_sqpoll:
+- complete(&ctx->sq_data->exited);
+-err:
+- io_sq_thread_finish(ctx);
+- return ret;
+-}
+-
+-static inline void __io_unaccount_mem(struct user_struct *user,
+- unsigned long nr_pages)
+-{
+- atomic_long_sub(nr_pages, &user->locked_vm);
+-}
+-
+-static inline int __io_account_mem(struct user_struct *user,
+- unsigned long nr_pages)
+-{
+- unsigned long page_limit, cur_pages, new_pages;
+-
+- /* Don't allow more pages than we can safely lock */
+- page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+-
+- do {
+- cur_pages = atomic_long_read(&user->locked_vm);
+- new_pages = cur_pages + nr_pages;
+- if (new_pages > page_limit)
+- return -ENOMEM;
+- } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
+- new_pages) != cur_pages);
+-
+- return 0;
+-}
+-
+-static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
+-{
+- if (ctx->user)
+- __io_unaccount_mem(ctx->user, nr_pages);
+-
+- if (ctx->mm_account)
+- atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
+-}
+-
+-static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
+-{
+- int ret;
+-
+- if (ctx->user) {
+- ret = __io_account_mem(ctx->user, nr_pages);
+- if (ret)
+- return ret;
+- }
+-
+- if (ctx->mm_account)
+- atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
+-
+- return 0;
+-}
+-
+-static void io_mem_free(void *ptr)
+-{
+- struct page *page;
+-
+- if (!ptr)
+- return;
+-
+- page = virt_to_head_page(ptr);
+- if (put_page_testzero(page))
+- free_compound_page(page);
+-}
+-
+-static void *io_mem_alloc(size_t size)
+-{
+- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
+- __GFP_NORETRY | __GFP_ACCOUNT;
+-
+- return (void *) __get_free_pages(gfp_flags, get_order(size));
+-}
+-
+-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
+- size_t *sq_offset)
+-{
+- struct io_rings *rings;
+- size_t off, sq_array_size;
+-
+- off = struct_size(rings, cqes, cq_entries);
+- if (off == SIZE_MAX)
+- return SIZE_MAX;
+-
+-#ifdef CONFIG_SMP
+- off = ALIGN(off, SMP_CACHE_BYTES);
+- if (off == 0)
+- return SIZE_MAX;
+-#endif
+-
+- if (sq_offset)
+- *sq_offset = off;
+-
+- sq_array_size = array_size(sizeof(u32), sq_entries);
+- if (sq_array_size == SIZE_MAX)
+- return SIZE_MAX;
+-
+- if (check_add_overflow(off, sq_array_size, &off))
+- return SIZE_MAX;
+-
+- return off;
+-}
+-
+-static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot)
+-{
+- struct io_mapped_ubuf *imu = *slot;
+- unsigned int i;
+-
+- if (imu != ctx->dummy_ubuf) {
+- for (i = 0; i < imu->nr_bvecs; i++)
+- unpin_user_page(imu->bvec[i].bv_page);
+- if (imu->acct_pages)
+- io_unaccount_mem(ctx, imu->acct_pages);
+- kvfree(imu);
+- }
+- *slot = NULL;
+-}
+-
+-static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
+-{
+- io_buffer_unmap(ctx, &prsrc->buf);
+- prsrc->buf = NULL;
+-}
+-
+-static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < ctx->nr_user_bufs; i++)
+- io_buffer_unmap(ctx, &ctx->user_bufs[i]);
+- kfree(ctx->user_bufs);
+- io_rsrc_data_free(ctx->buf_data);
+- ctx->user_bufs = NULL;
+- ctx->buf_data = NULL;
+- ctx->nr_user_bufs = 0;
+-}
+-
+-static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
+-{
+- int ret;
+-
+- if (!ctx->buf_data)
+- return -ENXIO;
+-
+- ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+- if (!ret)
+- __io_sqe_buffers_unregister(ctx);
+- return ret;
+-}
+-
+-static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
+- void __user *arg, unsigned index)
+-{
+- struct iovec __user *src;
+-
+-#ifdef CONFIG_COMPAT
+- if (ctx->compat) {
+- struct compat_iovec __user *ciovs;
+- struct compat_iovec ciov;
+-
+- ciovs = (struct compat_iovec __user *) arg;
+- if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
+- return -EFAULT;
+-
+- dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
+- dst->iov_len = ciov.iov_len;
+- return 0;
+- }
+-#endif
+- src = (struct iovec __user *) arg;
+- if (copy_from_user(dst, &src[index], sizeof(*dst)))
+- return -EFAULT;
+- return 0;
+-}
+-
+-/*
+- * Not super efficient, but this is just a registration time. And we do cache
+- * the last compound head, so generally we'll only do a full search if we don't
+- * match that one.
+- *
+- * We check if the given compound head page has already been accounted, to
+- * avoid double accounting it. This allows us to account the full size of the
+- * page, not just the constituent pages of a huge page.
+- */
+-static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
+- int nr_pages, struct page *hpage)
+-{
+- int i, j;
+-
+- /* check current page array */
+- for (i = 0; i < nr_pages; i++) {
+- if (!PageCompound(pages[i]))
+- continue;
+- if (compound_head(pages[i]) == hpage)
+- return true;
+- }
+-
+- /* check previously registered pages */
+- for (i = 0; i < ctx->nr_user_bufs; i++) {
+- struct io_mapped_ubuf *imu = ctx->user_bufs[i];
+-
+- for (j = 0; j < imu->nr_bvecs; j++) {
+- if (!PageCompound(imu->bvec[j].bv_page))
+- continue;
+- if (compound_head(imu->bvec[j].bv_page) == hpage)
+- return true;
+- }
+- }
+-
+- return false;
+-}
+-
+-static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
+- int nr_pages, struct io_mapped_ubuf *imu,
+- struct page **last_hpage)
+-{
+- int i, ret;
+-
+- imu->acct_pages = 0;
+- for (i = 0; i < nr_pages; i++) {
+- if (!PageCompound(pages[i])) {
+- imu->acct_pages++;
+- } else {
+- struct page *hpage;
+-
+- hpage = compound_head(pages[i]);
+- if (hpage == *last_hpage)
+- continue;
+- *last_hpage = hpage;
+- if (headpage_already_acct(ctx, pages, i, hpage))
+- continue;
+- imu->acct_pages += page_size(hpage) >> PAGE_SHIFT;
+- }
+- }
+-
+- if (!imu->acct_pages)
+- return 0;
+-
+- ret = io_account_mem(ctx, imu->acct_pages);
+- if (ret)
+- imu->acct_pages = 0;
+- return ret;
+-}
+-
+-static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
+- struct io_mapped_ubuf **pimu,
+- struct page **last_hpage)
+-{
+- struct io_mapped_ubuf *imu = NULL;
+- struct vm_area_struct **vmas = NULL;
+- struct page **pages = NULL;
+- unsigned long off, start, end, ubuf;
+- size_t size;
+- int ret, pret, nr_pages, i;
+-
+- if (!iov->iov_base) {
+- *pimu = ctx->dummy_ubuf;
+- return 0;
+- }
+-
+- ubuf = (unsigned long) iov->iov_base;
+- end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- start = ubuf >> PAGE_SHIFT;
+- nr_pages = end - start;
+-
+- *pimu = NULL;
+- ret = -ENOMEM;
+-
+- pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
+- if (!pages)
+- goto done;
+-
+- vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *),
+- GFP_KERNEL);
+- if (!vmas)
+- goto done;
+-
+- imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
+- if (!imu)
+- goto done;
+-
+- ret = 0;
+- mmap_read_lock(current->mm);
+- pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+- pages, vmas);
+- if (pret == nr_pages) {
+- /* don't support file backed memory */
+- for (i = 0; i < nr_pages; i++) {
+- struct vm_area_struct *vma = vmas[i];
+-
+- if (vma_is_shmem(vma))
+- continue;
+- if (vma->vm_file &&
+- !is_file_hugepages(vma->vm_file)) {
+- ret = -EOPNOTSUPP;
+- break;
+- }
+- }
+- } else {
+- ret = pret < 0 ? pret : -EFAULT;
+- }
+- mmap_read_unlock(current->mm);
+- if (ret) {
+- /*
+- * if we did partial map, or found file backed vmas,
+- * release any pages we did get
+- */
+- if (pret > 0)
+- unpin_user_pages(pages, pret);
+- goto done;
+- }
+-
+- ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
+- if (ret) {
+- unpin_user_pages(pages, pret);
+- goto done;
+- }
+-
+- off = ubuf & ~PAGE_MASK;
+- size = iov->iov_len;
+- for (i = 0; i < nr_pages; i++) {
+- size_t vec_len;
+-
+- vec_len = min_t(size_t, size, PAGE_SIZE - off);
+- imu->bvec[i].bv_page = pages[i];
+- imu->bvec[i].bv_len = vec_len;
+- imu->bvec[i].bv_offset = off;
+- off = 0;
+- size -= vec_len;
+- }
+- /* store original address for later verification */
+- imu->ubuf = ubuf;
+- imu->ubuf_end = ubuf + iov->iov_len;
+- imu->nr_bvecs = nr_pages;
+- *pimu = imu;
+- ret = 0;
+-done:
+- if (ret)
+- kvfree(imu);
+- kvfree(pages);
+- kvfree(vmas);
+- return ret;
+-}
+-
+-static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args)
+-{
+- ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL);
+- return ctx->user_bufs ? 0 : -ENOMEM;
+-}
+-
+-static int io_buffer_validate(struct iovec *iov)
+-{
+- unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
+-
+- /*
+- * Don't impose further limits on the size and buffer
+- * constraints here, we'll -EINVAL later when IO is
+- * submitted if they are wrong.
+- */
+- if (!iov->iov_base)
+- return iov->iov_len ? -EFAULT : 0;
+- if (!iov->iov_len)
+- return -EFAULT;
+-
+- /* arbitrary limit, but we need something */
+- if (iov->iov_len > SZ_1G)
+- return -EFAULT;
+-
+- if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp))
+- return -EOVERFLOW;
+-
+- return 0;
+-}
+-
+-static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
+- unsigned int nr_args, u64 __user *tags)
+-{
+- struct page *last_hpage = NULL;
+- struct io_rsrc_data *data;
+- int i, ret;
+- struct iovec iov;
+-
+- if (ctx->user_bufs)
+- return -EBUSY;
+- if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
+- return -EINVAL;
+- ret = io_rsrc_node_switch_start(ctx);
+- if (ret)
+- return ret;
+- ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data);
+- if (ret)
+- return ret;
+- ret = io_buffers_map_alloc(ctx, nr_args);
+- if (ret) {
+- io_rsrc_data_free(data);
+- return ret;
+- }
+-
+- for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
+- ret = io_copy_iov(ctx, &iov, arg, i);
+- if (ret)
+- break;
+- ret = io_buffer_validate(&iov);
+- if (ret)
+- break;
+- if (!iov.iov_base && *io_get_tag_slot(data, i)) {
+- ret = -EINVAL;
+- break;
+- }
+-
+- ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
+- &last_hpage);
+- if (ret)
+- break;
+- }
+-
+- WARN_ON_ONCE(ctx->buf_data);
+-
+- ctx->buf_data = data;
+- if (ret)
+- __io_sqe_buffers_unregister(ctx);
+- else
+- io_rsrc_node_switch(ctx, NULL);
+- return ret;
+-}
+-
+-static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
+- struct io_uring_rsrc_update2 *up,
+- unsigned int nr_args)
+-{
+- u64 __user *tags = u64_to_user_ptr(up->tags);
+- struct iovec iov, __user *iovs = u64_to_user_ptr(up->data);
+- struct page *last_hpage = NULL;
+- bool needs_switch = false;
+- __u32 done;
+- int i, err;
+-
+- if (!ctx->buf_data)
+- return -ENXIO;
+- if (up->offset + nr_args > ctx->nr_user_bufs)
+- return -EINVAL;
+-
+- for (done = 0; done < nr_args; done++) {
+- struct io_mapped_ubuf *imu;
+- int offset = up->offset + done;
+- u64 tag = 0;
+-
+- err = io_copy_iov(ctx, &iov, iovs, done);
+- if (err)
+- break;
+- if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) {
+- err = -EFAULT;
+- break;
+- }
+- err = io_buffer_validate(&iov);
+- if (err)
+- break;
+- if (!iov.iov_base && tag) {
+- err = -EINVAL;
+- break;
+- }
+- err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
+- if (err)
+- break;
+-
+- i = array_index_nospec(offset, ctx->nr_user_bufs);
+- if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
+- err = io_queue_rsrc_removal(ctx->buf_data, offset,
+- ctx->rsrc_node, ctx->user_bufs[i]);
+- if (unlikely(err)) {
+- io_buffer_unmap(ctx, &imu);
+- break;
+- }
+- ctx->user_bufs[i] = NULL;
+- needs_switch = true;
+- }
+-
+- ctx->user_bufs[i] = imu;
+- *io_get_tag_slot(ctx->buf_data, offset) = tag;
+- }
+-
+- if (needs_switch)
+- io_rsrc_node_switch(ctx, ctx->buf_data);
+- return done ? done : err;
+-}
+-
+-static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
+-{
+- __s32 __user *fds = arg;
+- int fd;
+-
+- if (ctx->cq_ev_fd)
+- return -EBUSY;
+-
+- if (copy_from_user(&fd, fds, sizeof(*fds)))
+- return -EFAULT;
+-
+- ctx->cq_ev_fd = eventfd_ctx_fdget(fd);
+- if (IS_ERR(ctx->cq_ev_fd)) {
+- int ret = PTR_ERR(ctx->cq_ev_fd);
+-
+- ctx->cq_ev_fd = NULL;
+- return ret;
+- }
+-
+- return 0;
+-}
+-
+-static int io_eventfd_unregister(struct io_ring_ctx *ctx)
+-{
+- if (ctx->cq_ev_fd) {
+- eventfd_ctx_put(ctx->cq_ev_fd);
+- ctx->cq_ev_fd = NULL;
+- return 0;
+- }
+-
+- return -ENXIO;
+-}
+-
+-static void io_destroy_buffers(struct io_ring_ctx *ctx)
+-{
+- struct io_buffer *buf;
+- unsigned long index;
+-
+- xa_for_each(&ctx->io_buffers, index, buf) {
+- __io_remove_buffers(ctx, buf, index, -1U);
+- cond_resched();
+- }
+-}
+-
+-static void io_req_cache_free(struct list_head *list)
+-{
+- struct io_kiocb *req, *nxt;
+-
+- list_for_each_entry_safe(req, nxt, list, inflight_entry) {
+- list_del(&req->inflight_entry);
+- kmem_cache_free(req_cachep, req);
+- }
+-}
+-
+-static void io_req_caches_free(struct io_ring_ctx *ctx)
+-{
+- struct io_submit_state *state = &ctx->submit_state;
+-
+- mutex_lock(&ctx->uring_lock);
+-
+- if (state->free_reqs) {
+- kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
+- state->free_reqs = 0;
+- }
+-
+- io_flush_cached_locked_reqs(ctx, state);
+- io_req_cache_free(&state->free_list);
+- mutex_unlock(&ctx->uring_lock);
+-}
+-
+-static void io_wait_rsrc_data(struct io_rsrc_data *data)
+-{
+- if (data && !atomic_dec_and_test(&data->refs))
+- wait_for_completion(&data->done);
+-}
+-
+-static void io_ring_ctx_free(struct io_ring_ctx *ctx)
+-{
+- io_sq_thread_finish(ctx);
+-
+- if (ctx->mm_account) {
+- mmdrop(ctx->mm_account);
+- ctx->mm_account = NULL;
+- }
+-
+- /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
+- io_wait_rsrc_data(ctx->buf_data);
+- io_wait_rsrc_data(ctx->file_data);
+-
+- mutex_lock(&ctx->uring_lock);
+- if (ctx->buf_data)
+- __io_sqe_buffers_unregister(ctx);
+- if (ctx->file_data)
+- __io_sqe_files_unregister(ctx);
+- if (ctx->rings)
+- __io_cqring_overflow_flush(ctx, true);
+- mutex_unlock(&ctx->uring_lock);
+- io_eventfd_unregister(ctx);
+- io_destroy_buffers(ctx);
+- if (ctx->sq_creds)
+- put_cred(ctx->sq_creds);
+-
+- /* there are no registered resources left, nobody uses it */
+- if (ctx->rsrc_node)
+- io_rsrc_node_destroy(ctx->rsrc_node);
+- if (ctx->rsrc_backup_node)
+- io_rsrc_node_destroy(ctx->rsrc_backup_node);
+- flush_delayed_work(&ctx->rsrc_put_work);
+-
+- WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
+- WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
+-
+-#if defined(CONFIG_UNIX)
+- if (ctx->ring_sock) {
+- ctx->ring_sock->file = NULL; /* so that iput() is called */
+- sock_release(ctx->ring_sock);
+- }
+-#endif
+- WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
+-
+- io_mem_free(ctx->rings);
+- io_mem_free(ctx->sq_sqes);
+-
+- percpu_ref_exit(&ctx->refs);
+- free_uid(ctx->user);
+- io_req_caches_free(ctx);
+- if (ctx->hash_map)
+- io_wq_put_hash(ctx->hash_map);
+- kfree(ctx->cancel_hash);
+- kfree(ctx->dummy_ubuf);
+- kfree(ctx);
+-}
+-
+-static __poll_t io_uring_poll(struct file *file, poll_table *wait)
+-{
+- struct io_ring_ctx *ctx = file->private_data;
+- __poll_t mask = 0;
+-
+- poll_wait(file, &ctx->poll_wait, wait);
+- /*
+- * synchronizes with barrier from wq_has_sleeper call in
+- * io_commit_cqring
+- */
+- smp_rmb();
+- if (!io_sqring_full(ctx))
+- mask |= EPOLLOUT | EPOLLWRNORM;
+-
+- /*
+- * Don't flush cqring overflow list here, just do a simple check.
+- * Otherwise there could possible be ABBA deadlock:
+- * CPU0 CPU1
+- * ---- ----
+- * lock(&ctx->uring_lock);
+- * lock(&ep->mtx);
+- * lock(&ctx->uring_lock);
+- * lock(&ep->mtx);
+- *
+- * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
+- * pushs them to do the flush.
+- */
+- if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow))
+- mask |= EPOLLIN | EPOLLRDNORM;
+-
+- return mask;
+-}
+-
+-static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
+-{
+- const struct cred *creds;
+-
+- creds = xa_erase(&ctx->personalities, id);
+- if (creds) {
+- put_cred(creds);
+- return 0;
+- }
+-
+- return -EINVAL;
+-}
+-
+-struct io_tctx_exit {
+- struct callback_head task_work;
+- struct completion completion;
+- struct io_ring_ctx *ctx;
+-};
+-
+-static void io_tctx_exit_cb(struct callback_head *cb)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+- struct io_tctx_exit *work;
+-
+- work = container_of(cb, struct io_tctx_exit, task_work);
+- /*
+- * When @in_idle, we're in cancellation and it's racy to remove the
+- * node. It'll be removed by the end of cancellation, just ignore it.
+- */
+- if (!atomic_read(&tctx->in_idle))
+- io_uring_del_tctx_node((unsigned long)work->ctx);
+- complete(&work->completion);
+-}
+-
+-static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
+-{
+- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+-
+- return req->ctx == data;
+-}
+-
+-static void io_ring_exit_work(struct work_struct *work)
+-{
+- struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
+- unsigned long timeout = jiffies + HZ * 60 * 5;
+- unsigned long interval = HZ / 20;
+- struct io_tctx_exit exit;
+- struct io_tctx_node *node;
+- int ret;
+-
+- /*
+- * If we're doing polled IO and end up having requests being
+- * submitted async (out-of-line), then completions can come in while
+- * we're waiting for refs to drop. We need to reap these manually,
+- * as nobody else will be looking for them.
+- */
+- do {
+- io_uring_try_cancel_requests(ctx, NULL, true);
+- if (ctx->sq_data) {
+- struct io_sq_data *sqd = ctx->sq_data;
+- struct task_struct *tsk;
+-
+- io_sq_thread_park(sqd);
+- tsk = sqd->thread;
+- if (tsk && tsk->io_uring && tsk->io_uring->io_wq)
+- io_wq_cancel_cb(tsk->io_uring->io_wq,
+- io_cancel_ctx_cb, ctx, true);
+- io_sq_thread_unpark(sqd);
+- }
+-
+- if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
+- /* there is little hope left, don't run it too often */
+- interval = HZ * 60;
+- }
+- } while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
+-
+- init_completion(&exit.completion);
+- init_task_work(&exit.task_work, io_tctx_exit_cb);
+- exit.ctx = ctx;
+- /*
+- * Some may use context even when all refs and requests have been put,
+- * and they are free to do so while still holding uring_lock or
+- * completion_lock, see io_req_task_submit(). Apart from other work,
+- * this lock/unlock section also waits them to finish.
+- */
+- mutex_lock(&ctx->uring_lock);
+- while (!list_empty(&ctx->tctx_list)) {
+- WARN_ON_ONCE(time_after(jiffies, timeout));
+-
+- node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
+- ctx_node);
+- /* don't spin on a single task if cancellation failed */
+- list_rotate_left(&ctx->tctx_list);
+- ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL);
+- if (WARN_ON_ONCE(ret))
+- continue;
+- wake_up_process(node->task);
+-
+- mutex_unlock(&ctx->uring_lock);
+- wait_for_completion(&exit.completion);
+- mutex_lock(&ctx->uring_lock);
+- }
+- mutex_unlock(&ctx->uring_lock);
+- spin_lock(&ctx->completion_lock);
+- spin_unlock(&ctx->completion_lock);
+-
+- io_ring_ctx_free(ctx);
+-}
+-
+-/* Returns true if we found and killed one or more timeouts */
+-static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
+- bool cancel_all)
+-{
+- struct io_kiocb *req, *tmp;
+- int canceled = 0;
+-
+- spin_lock(&ctx->completion_lock);
+- spin_lock_irq(&ctx->timeout_lock);
+- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+- if (io_match_task(req, tsk, cancel_all)) {
+- io_kill_timeout(req, -ECANCELED);
+- canceled++;
+- }
+- }
+- spin_unlock_irq(&ctx->timeout_lock);
+- if (canceled != 0)
+- io_commit_cqring(ctx);
+- spin_unlock(&ctx->completion_lock);
+- if (canceled != 0)
+- io_cqring_ev_posted(ctx);
+- return canceled != 0;
+-}
+-
+-static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+-{
+- unsigned long index;
+- struct creds *creds;
+-
+- mutex_lock(&ctx->uring_lock);
+- percpu_ref_kill(&ctx->refs);
+- if (ctx->rings)
+- __io_cqring_overflow_flush(ctx, true);
+- xa_for_each(&ctx->personalities, index, creds)
+- io_unregister_personality(ctx, index);
+- mutex_unlock(&ctx->uring_lock);
+-
+- io_kill_timeouts(ctx, NULL, true);
+- io_poll_remove_all(ctx, NULL, true);
+-
+- /* if we failed setting up the ctx, we might not have any rings */
+- io_iopoll_try_reap_events(ctx);
+-
+- INIT_WORK(&ctx->exit_work, io_ring_exit_work);
+- /*
+- * Use system_unbound_wq to avoid spawning tons of event kworkers
+- * if we're exiting a ton of rings at the same time. It just adds
+- * noise and overhead, there's no discernable change in runtime
+- * over using system_wq.
+- */
+- queue_work(system_unbound_wq, &ctx->exit_work);
+-}
+-
+-static int io_uring_release(struct inode *inode, struct file *file)
+-{
+- struct io_ring_ctx *ctx = file->private_data;
+-
+- file->private_data = NULL;
+- io_ring_ctx_wait_and_kill(ctx);
+- return 0;
+-}
+-
+-struct io_task_cancel {
+- struct task_struct *task;
+- bool all;
+-};
+-
+-static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
+-{
+- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+- struct io_task_cancel *cancel = data;
+- bool ret;
+-
+- if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) {
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- /* protect against races with linked timeouts */
+- spin_lock(&ctx->completion_lock);
+- ret = io_match_task(req, cancel->task, cancel->all);
+- spin_unlock(&ctx->completion_lock);
+- } else {
+- ret = io_match_task(req, cancel->task, cancel->all);
+- }
+- return ret;
+-}
+-
+-static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
+- struct task_struct *task, bool cancel_all)
+-{
+- struct io_defer_entry *de;
+- LIST_HEAD(list);
+-
+- spin_lock(&ctx->completion_lock);
+- list_for_each_entry_reverse(de, &ctx->defer_list, list) {
+- if (io_match_task(de->req, task, cancel_all)) {
+- list_cut_position(&list, &ctx->defer_list, &de->list);
+- break;
+- }
+- }
+- spin_unlock(&ctx->completion_lock);
+- if (list_empty(&list))
+- return false;
+-
+- while (!list_empty(&list)) {
+- de = list_first_entry(&list, struct io_defer_entry, list);
+- list_del_init(&de->list);
+- io_req_complete_failed(de->req, -ECANCELED);
+- kfree(de);
+- }
+- return true;
+-}
+-
+-static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
+-{
+- struct io_tctx_node *node;
+- enum io_wq_cancel cret;
+- bool ret = false;
+-
+- mutex_lock(&ctx->uring_lock);
+- list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+- struct io_uring_task *tctx = node->task->io_uring;
+-
+- /*
+- * io_wq will stay alive while we hold uring_lock, because it's
+- * killed after ctx nodes, which requires to take the lock.
+- */
+- if (!tctx || !tctx->io_wq)
+- continue;
+- cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
+- ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+- }
+- mutex_unlock(&ctx->uring_lock);
+-
+- return ret;
+-}
+-
+-static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+- struct task_struct *task,
+- bool cancel_all)
+-{
+- struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
+- struct io_uring_task *tctx = task ? task->io_uring : NULL;
+-
+- while (1) {
+- enum io_wq_cancel cret;
+- bool ret = false;
+-
+- if (!task) {
+- ret |= io_uring_try_cancel_iowq(ctx);
+- } else if (tctx && tctx->io_wq) {
+- /*
+- * Cancels requests of all rings, not only @ctx, but
+- * it's fine as the task is in exit/exec.
+- */
+- cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
+- &cancel, true);
+- ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+- }
+-
+- /* SQPOLL thread does its own polling */
+- if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
+- (ctx->sq_data && ctx->sq_data->thread == current)) {
+- while (!list_empty_careful(&ctx->iopoll_list)) {
+- io_iopoll_try_reap_events(ctx);
+- ret = true;
+- }
+- }
+-
+- ret |= io_cancel_defer_files(ctx, task, cancel_all);
+- ret |= io_poll_remove_all(ctx, task, cancel_all);
+- ret |= io_kill_timeouts(ctx, task, cancel_all);
+- if (task)
+- ret |= io_run_task_work();
+- if (!ret)
+- break;
+- cond_resched();
+- }
+-}
+-
+-static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+- struct io_tctx_node *node;
+- int ret;
+-
+- if (unlikely(!tctx)) {
+- ret = io_uring_alloc_task_context(current, ctx);
+- if (unlikely(ret))
+- return ret;
+-
+- tctx = current->io_uring;
+- if (ctx->iowq_limits_set) {
+- unsigned int limits[2] = { ctx->iowq_limits[0],
+- ctx->iowq_limits[1], };
+-
+- ret = io_wq_max_workers(tctx->io_wq, limits);
+- if (ret)
+- return ret;
+- }
+- }
+- if (!xa_load(&tctx->xa, (unsigned long)ctx)) {
+- node = kmalloc(sizeof(*node), GFP_KERNEL);
+- if (!node)
+- return -ENOMEM;
+- node->ctx = ctx;
+- node->task = current;
+-
+- ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx,
+- node, GFP_KERNEL));
+- if (ret) {
+- kfree(node);
+- return ret;
+- }
+-
+- mutex_lock(&ctx->uring_lock);
+- list_add(&node->ctx_node, &ctx->tctx_list);
+- mutex_unlock(&ctx->uring_lock);
+- }
+- tctx->last = ctx;
+- return 0;
+-}
+-
+-/*
+- * Note that this task has used io_uring. We use it for cancelation purposes.
+- */
+-static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+-
+- if (likely(tctx && tctx->last == ctx))
+- return 0;
+- return __io_uring_add_tctx_node(ctx);
+-}
+-
+-/*
+- * Remove this io_uring_file -> task mapping.
+- */
+-static void io_uring_del_tctx_node(unsigned long index)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+- struct io_tctx_node *node;
+-
+- if (!tctx)
+- return;
+- node = xa_erase(&tctx->xa, index);
+- if (!node)
+- return;
+-
+- WARN_ON_ONCE(current != node->task);
+- WARN_ON_ONCE(list_empty(&node->ctx_node));
+-
+- mutex_lock(&node->ctx->uring_lock);
+- list_del(&node->ctx_node);
+- mutex_unlock(&node->ctx->uring_lock);
+-
+- if (tctx->last == node->ctx)
+- tctx->last = NULL;
+- kfree(node);
+-}
+-
+-static void io_uring_clean_tctx(struct io_uring_task *tctx)
+-{
+- struct io_wq *wq = tctx->io_wq;
+- struct io_tctx_node *node;
+- unsigned long index;
+-
+- xa_for_each(&tctx->xa, index, node) {
+- io_uring_del_tctx_node(index);
+- cond_resched();
+- }
+- if (wq) {
+- /*
+- * Must be after io_uring_del_task_file() (removes nodes under
+- * uring_lock) to avoid race with io_uring_try_cancel_iowq().
+- */
+- io_wq_put_and_exit(wq);
+- tctx->io_wq = NULL;
+- }
+-}
+-
+-static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
+-{
+- if (tracked)
+- return atomic_read(&tctx->inflight_tracked);
+- return percpu_counter_sum(&tctx->inflight);
+-}
+-
+-static void io_uring_drop_tctx_refs(struct task_struct *task)
+-{
+- struct io_uring_task *tctx = task->io_uring;
+- unsigned int refs = tctx->cached_refs;
+-
+- if (refs) {
+- tctx->cached_refs = 0;
+- percpu_counter_sub(&tctx->inflight, refs);
+- put_task_struct_many(task, refs);
+- }
+-}
+-
+-/*
+- * Find any io_uring ctx that this task has registered or done IO on, and cancel
+- * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation.
+- */
+-static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+- struct io_ring_ctx *ctx;
+- s64 inflight;
+- DEFINE_WAIT(wait);
+-
+- WARN_ON_ONCE(sqd && sqd->thread != current);
+-
+- if (!current->io_uring)
+- return;
+- if (tctx->io_wq)
+- io_wq_exit_start(tctx->io_wq);
+-
+- atomic_inc(&tctx->in_idle);
+- do {
+- io_uring_drop_tctx_refs(current);
+- /* read completions before cancelations */
+- inflight = tctx_inflight(tctx, !cancel_all);
+- if (!inflight)
+- break;
+-
+- if (!sqd) {
+- struct io_tctx_node *node;
+- unsigned long index;
+-
+- xa_for_each(&tctx->xa, index, node) {
+- /* sqpoll task will cancel all its requests */
+- if (node->ctx->sq_data)
+- continue;
+- io_uring_try_cancel_requests(node->ctx, current,
+- cancel_all);
+- }
+- } else {
+- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+- io_uring_try_cancel_requests(ctx, current,
+- cancel_all);
+- }
+-
+- prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+- io_uring_drop_tctx_refs(current);
+- /*
+- * If we've seen completions, retry without waiting. This
+- * avoids a race where a completion comes in before we did
+- * prepare_to_wait().
+- */
+- if (inflight == tctx_inflight(tctx, !cancel_all))
+- schedule();
+- finish_wait(&tctx->wait, &wait);
+- } while (1);
+- atomic_dec(&tctx->in_idle);
+-
+- io_uring_clean_tctx(tctx);
+- if (cancel_all) {
+- /* for exec all current's requests should be gone, kill tctx */
+- __io_uring_free(current);
+- }
+-}
+-
+-void __io_uring_cancel(bool cancel_all)
+-{
+- io_uring_cancel_generic(cancel_all, NULL);
+-}
+-
+-static void *io_uring_validate_mmap_request(struct file *file,
+- loff_t pgoff, size_t sz)
+-{
+- struct io_ring_ctx *ctx = file->private_data;
+- loff_t offset = pgoff << PAGE_SHIFT;
+- struct page *page;
+- void *ptr;
+-
+- switch (offset) {
+- case IORING_OFF_SQ_RING:
+- case IORING_OFF_CQ_RING:
+- ptr = ctx->rings;
+- break;
+- case IORING_OFF_SQES:
+- ptr = ctx->sq_sqes;
+- break;
+- default:
+- return ERR_PTR(-EINVAL);
+- }
+-
+- page = virt_to_head_page(ptr);
+- if (sz > page_size(page))
+- return ERR_PTR(-EINVAL);
+-
+- return ptr;
+-}
+-
+-#ifdef CONFIG_MMU
+-
+-static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
+-{
+- size_t sz = vma->vm_end - vma->vm_start;
+- unsigned long pfn;
+- void *ptr;
+-
+- ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
+- if (IS_ERR(ptr))
+- return PTR_ERR(ptr);
+-
+- pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
+- return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
+-}
+-
+-#else /* !CONFIG_MMU */
+-
+-static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
+-{
+- return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL;
+-}
+-
+-static unsigned int io_uring_nommu_mmap_capabilities(struct file *file)
+-{
+- return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE;
+-}
+-
+-static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
+- unsigned long addr, unsigned long len,
+- unsigned long pgoff, unsigned long flags)
+-{
+- void *ptr;
+-
+- ptr = io_uring_validate_mmap_request(file, pgoff, len);
+- if (IS_ERR(ptr))
+- return PTR_ERR(ptr);
+-
+- return (unsigned long) ptr;
+-}
+-
+-#endif /* !CONFIG_MMU */
+-
+-static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
+-{
+- DEFINE_WAIT(wait);
+-
+- do {
+- if (!io_sqring_full(ctx))
+- break;
+- prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
+-
+- if (!io_sqring_full(ctx))
+- break;
+- schedule();
+- } while (!signal_pending(current));
+-
+- finish_wait(&ctx->sqo_sq_wait, &wait);
+- return 0;
+-}
+-
+-static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
+- struct __kernel_timespec __user **ts,
+- const sigset_t __user **sig)
+-{
+- struct io_uring_getevents_arg arg;
+-
+- /*
+- * If EXT_ARG isn't set, then we have no timespec and the argp pointer
+- * is just a pointer to the sigset_t.
+- */
+- if (!(flags & IORING_ENTER_EXT_ARG)) {
+- *sig = (const sigset_t __user *) argp;
+- *ts = NULL;
+- return 0;
+- }
+-
+- /*
+- * EXT_ARG is set - ensure we agree on the size of it and copy in our
+- * timespec and sigset_t pointers if good.
+- */
+- if (*argsz != sizeof(arg))
+- return -EINVAL;
+- if (copy_from_user(&arg, argp, sizeof(arg)))
+- return -EFAULT;
+- *sig = u64_to_user_ptr(arg.sigmask);
+- *argsz = arg.sigmask_sz;
+- *ts = u64_to_user_ptr(arg.ts);
+- return 0;
+-}
+-
+-SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
+- u32, min_complete, u32, flags, const void __user *, argp,
+- size_t, argsz)
+-{
+- struct io_ring_ctx *ctx;
+- int submitted = 0;
+- struct fd f;
+- long ret;
+-
+- io_run_task_work();
+-
+- if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
+- IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG)))
+- return -EINVAL;
+-
+- f = fdget(fd);
+- if (unlikely(!f.file))
+- return -EBADF;
+-
+- ret = -EOPNOTSUPP;
+- if (unlikely(f.file->f_op != &io_uring_fops))
+- goto out_fput;
+-
+- ret = -ENXIO;
+- ctx = f.file->private_data;
+- if (unlikely(!percpu_ref_tryget(&ctx->refs)))
+- goto out_fput;
+-
+- ret = -EBADFD;
+- if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED))
+- goto out;
+-
+- /*
+- * For SQ polling, the thread will do all submissions and completions.
+- * Just return the requested submit count, and wake the thread if
+- * we were asked to.
+- */
+- ret = 0;
+- if (ctx->flags & IORING_SETUP_SQPOLL) {
+- io_cqring_overflow_flush(ctx);
+-
+- if (unlikely(ctx->sq_data->thread == NULL)) {
+- ret = -EOWNERDEAD;
+- goto out;
+- }
+- if (flags & IORING_ENTER_SQ_WAKEUP)
+- wake_up(&ctx->sq_data->wait);
+- if (flags & IORING_ENTER_SQ_WAIT) {
+- ret = io_sqpoll_wait_sq(ctx);
+- if (ret)
+- goto out;
+- }
+- submitted = to_submit;
+- } else if (to_submit) {
+- ret = io_uring_add_tctx_node(ctx);
+- if (unlikely(ret))
+- goto out;
+- mutex_lock(&ctx->uring_lock);
+- submitted = io_submit_sqes(ctx, to_submit);
+- mutex_unlock(&ctx->uring_lock);
+-
+- if (submitted != to_submit)
+- goto out;
+- }
+- if (flags & IORING_ENTER_GETEVENTS) {
+- const sigset_t __user *sig;
+- struct __kernel_timespec __user *ts;
+-
+- ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+- if (unlikely(ret))
+- goto out;
+-
+- min_complete = min(min_complete, ctx->cq_entries);
+-
+- /*
+- * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+- * space applications don't need to do io completion events
+- * polling again, they can rely on io_sq_thread to do polling
+- * work, which can reduce cpu usage and uring_lock contention.
+- */
+- if (ctx->flags & IORING_SETUP_IOPOLL &&
+- !(ctx->flags & IORING_SETUP_SQPOLL)) {
+- ret = io_iopoll_check(ctx, min_complete);
+- } else {
+- ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
+- }
+- }
+-
+-out:
+- percpu_ref_put(&ctx->refs);
+-out_fput:
+- fdput(f);
+- return submitted ? submitted : ret;
+-}
+-
+-#ifdef CONFIG_PROC_FS
+-static int io_uring_show_cred(struct seq_file *m, unsigned int id,
+- const struct cred *cred)
+-{
+- struct user_namespace *uns = seq_user_ns(m);
+- struct group_info *gi;
+- kernel_cap_t cap;
+- unsigned __capi;
+- int g;
+-
+- seq_printf(m, "%5d\n", id);
+- seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
+- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
+- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
+- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
+- seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
+- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
+- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
+- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
+- seq_puts(m, "\n\tGroups:\t");
+- gi = cred->group_info;
+- for (g = 0; g < gi->ngroups; g++) {
+- seq_put_decimal_ull(m, g ? " " : "",
+- from_kgid_munged(uns, gi->gid[g]));
+- }
+- seq_puts(m, "\n\tCapEff:\t");
+- cap = cred->cap_effective;
+- CAP_FOR_EACH_U32(__capi)
+- seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
+- seq_putc(m, '\n');
+- return 0;
+-}
+-
+-static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
+-{
+- struct io_sq_data *sq = NULL;
+- bool has_lock;
+- int i;
+-
+- /*
+- * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
+- * since fdinfo case grabs it in the opposite direction of normal use
+- * cases. If we fail to get the lock, we just don't iterate any
+- * structures that could be going away outside the io_uring mutex.
+- */
+- has_lock = mutex_trylock(&ctx->uring_lock);
+-
+- if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
+- sq = ctx->sq_data;
+- if (!sq->thread)
+- sq = NULL;
+- }
+-
+- seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
+- seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
+- seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
+- for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
+- struct file *f = io_file_from_index(ctx, i);
+-
+- if (f)
+- seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
+- else
+- seq_printf(m, "%5u: <none>\n", i);
+- }
+- seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
+- for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
+- struct io_mapped_ubuf *buf = ctx->user_bufs[i];
+- unsigned int len = buf->ubuf_end - buf->ubuf;
+-
+- seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len);
+- }
+- if (has_lock && !xa_empty(&ctx->personalities)) {
+- unsigned long index;
+- const struct cred *cred;
+-
+- seq_printf(m, "Personalities:\n");
+- xa_for_each(&ctx->personalities, index, cred)
+- io_uring_show_cred(m, index, cred);
+- }
+- seq_printf(m, "PollList:\n");
+- spin_lock(&ctx->completion_lock);
+- for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+- struct hlist_head *list = &ctx->cancel_hash[i];
+- struct io_kiocb *req;
+-
+- hlist_for_each_entry(req, list, hash_node)
+- seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
+- req->task->task_works != NULL);
+- }
+- spin_unlock(&ctx->completion_lock);
+- if (has_lock)
+- mutex_unlock(&ctx->uring_lock);
+-}
+-
+-static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
+-{
+- struct io_ring_ctx *ctx = f->private_data;
+-
+- if (percpu_ref_tryget(&ctx->refs)) {
+- __io_uring_show_fdinfo(ctx, m);
+- percpu_ref_put(&ctx->refs);
+- }
+-}
+-#endif
+-
+-static const struct file_operations io_uring_fops = {
+- .release = io_uring_release,
+- .mmap = io_uring_mmap,
+-#ifndef CONFIG_MMU
+- .get_unmapped_area = io_uring_nommu_get_unmapped_area,
+- .mmap_capabilities = io_uring_nommu_mmap_capabilities,
+-#endif
+- .poll = io_uring_poll,
+-#ifdef CONFIG_PROC_FS
+- .show_fdinfo = io_uring_show_fdinfo,
+-#endif
+-};
+-
+-static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
+- struct io_uring_params *p)
+-{
+- struct io_rings *rings;
+- size_t size, sq_array_offset;
+-
+- /* make sure these are sane, as we already accounted them */
+- ctx->sq_entries = p->sq_entries;
+- ctx->cq_entries = p->cq_entries;
+-
+- size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
+- if (size == SIZE_MAX)
+- return -EOVERFLOW;
+-
+- rings = io_mem_alloc(size);
+- if (!rings)
+- return -ENOMEM;
+-
+- ctx->rings = rings;
+- ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
+- rings->sq_ring_mask = p->sq_entries - 1;
+- rings->cq_ring_mask = p->cq_entries - 1;
+- rings->sq_ring_entries = p->sq_entries;
+- rings->cq_ring_entries = p->cq_entries;
+-
+- size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+- if (size == SIZE_MAX) {
+- io_mem_free(ctx->rings);
+- ctx->rings = NULL;
+- return -EOVERFLOW;
+- }
+-
+- ctx->sq_sqes = io_mem_alloc(size);
+- if (!ctx->sq_sqes) {
+- io_mem_free(ctx->rings);
+- ctx->rings = NULL;
+- return -ENOMEM;
+- }
+-
+- return 0;
+-}
+-
+-static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
+-{
+- int ret, fd;
+-
+- fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+- if (fd < 0)
+- return fd;
+-
+- ret = io_uring_add_tctx_node(ctx);
+- if (ret) {
+- put_unused_fd(fd);
+- return ret;
+- }
+- fd_install(fd, file);
+- return fd;
+-}
+-
+-/*
+- * Allocate an anonymous fd, this is what constitutes the application
+- * visible backing of an io_uring instance. The application mmaps this
+- * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
+- * we have to tie this fd to a socket for file garbage collection purposes.
+- */
+-static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
+-{
+- struct file *file;
+-#if defined(CONFIG_UNIX)
+- int ret;
+-
+- ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
+- &ctx->ring_sock);
+- if (ret)
+- return ERR_PTR(ret);
+-#endif
+-
+- file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
+- O_RDWR | O_CLOEXEC);
+-#if defined(CONFIG_UNIX)
+- if (IS_ERR(file)) {
+- sock_release(ctx->ring_sock);
+- ctx->ring_sock = NULL;
+- } else {
+- ctx->ring_sock->file = file;
+- }
+-#endif
+- return file;
+-}
+-
+-static int io_uring_create(unsigned entries, struct io_uring_params *p,
+- struct io_uring_params __user *params)
+-{
+- struct io_ring_ctx *ctx;
+- struct file *file;
+- int ret;
+-
+- if (!entries)
+- return -EINVAL;
+- if (entries > IORING_MAX_ENTRIES) {
+- if (!(p->flags & IORING_SETUP_CLAMP))
+- return -EINVAL;
+- entries = IORING_MAX_ENTRIES;
+- }
+-
+- /*
+- * Use twice as many entries for the CQ ring. It's possible for the
+- * application to drive a higher depth than the size of the SQ ring,
+- * since the sqes are only used at submission time. This allows for
+- * some flexibility in overcommitting a bit. If the application has
+- * set IORING_SETUP_CQSIZE, it will have passed in the desired number
+- * of CQ ring entries manually.
+- */
+- p->sq_entries = roundup_pow_of_two(entries);
+- if (p->flags & IORING_SETUP_CQSIZE) {
+- /*
+- * If IORING_SETUP_CQSIZE is set, we do the same roundup
+- * to a power-of-two, if it isn't already. We do NOT impose
+- * any cq vs sq ring sizing.
+- */
+- if (!p->cq_entries)
+- return -EINVAL;
+- if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
+- if (!(p->flags & IORING_SETUP_CLAMP))
+- return -EINVAL;
+- p->cq_entries = IORING_MAX_CQ_ENTRIES;
+- }
+- p->cq_entries = roundup_pow_of_two(p->cq_entries);
+- if (p->cq_entries < p->sq_entries)
+- return -EINVAL;
+- } else {
+- p->cq_entries = 2 * p->sq_entries;
+- }
+-
+- ctx = io_ring_ctx_alloc(p);
+- if (!ctx)
+- return -ENOMEM;
+- ctx->compat = in_compat_syscall();
+- if (!capable(CAP_IPC_LOCK))
+- ctx->user = get_uid(current_user());
+-
+- /*
+- * This is just grabbed for accounting purposes. When a process exits,
+- * the mm is exited and dropped before the files, hence we need to hang
+- * on to this mm purely for the purposes of being able to unaccount
+- * memory (locked/pinned vm). It's not used for anything else.
+- */
+- mmgrab(current->mm);
+- ctx->mm_account = current->mm;
+-
+- ret = io_allocate_scq_urings(ctx, p);
+- if (ret)
+- goto err;
+-
+- ret = io_sq_offload_create(ctx, p);
+- if (ret)
+- goto err;
+- /* always set a rsrc node */
+- ret = io_rsrc_node_switch_start(ctx);
+- if (ret)
+- goto err;
+- io_rsrc_node_switch(ctx, NULL);
+-
+- memset(&p->sq_off, 0, sizeof(p->sq_off));
+- p->sq_off.head = offsetof(struct io_rings, sq.head);
+- p->sq_off.tail = offsetof(struct io_rings, sq.tail);
+- p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
+- p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
+- p->sq_off.flags = offsetof(struct io_rings, sq_flags);
+- p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
+- p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
+-
+- memset(&p->cq_off, 0, sizeof(p->cq_off));
+- p->cq_off.head = offsetof(struct io_rings, cq.head);
+- p->cq_off.tail = offsetof(struct io_rings, cq.tail);
+- p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
+- p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
+- p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
+- p->cq_off.cqes = offsetof(struct io_rings, cqes);
+- p->cq_off.flags = offsetof(struct io_rings, cq_flags);
+-
+- p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
+- IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
+- IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
+- IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
+- IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
+- IORING_FEAT_RSRC_TAGS;
+-
+- if (copy_to_user(params, p, sizeof(*p))) {
+- ret = -EFAULT;
+- goto err;
+- }
+-
+- file = io_uring_get_file(ctx);
+- if (IS_ERR(file)) {
+- ret = PTR_ERR(file);
+- goto err;
+- }
+-
+- /*
+- * Install ring fd as the very last thing, so we don't risk someone
+- * having closed it before we finish setup
+- */
+- ret = io_uring_install_fd(ctx, file);
+- if (ret < 0) {
+- /* fput will clean it up */
+- fput(file);
+- return ret;
+- }
+-
+- trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
+- return ret;
+-err:
+- io_ring_ctx_wait_and_kill(ctx);
+- return ret;
+-}
+-
+-/*
+- * Sets up an aio uring context, and returns the fd. Applications asks for a
+- * ring size, we return the actual sq/cq ring sizes (among other things) in the
+- * params structure passed in.
+- */
+-static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
+-{
+- struct io_uring_params p;
+- int i;
+-
+- if (copy_from_user(&p, params, sizeof(p)))
+- return -EFAULT;
+- for (i = 0; i < ARRAY_SIZE(p.resv); i++) {
+- if (p.resv[i])
+- return -EINVAL;
+- }
+-
+- if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
+- IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
+- IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
+- IORING_SETUP_R_DISABLED))
+- return -EINVAL;
+-
+- return io_uring_create(entries, &p, params);
+-}
+-
+-SYSCALL_DEFINE2(io_uring_setup, u32, entries,
+- struct io_uring_params __user *, params)
+-{
+- return io_uring_setup(entries, params);
+-}
+-
+-static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
+-{
+- struct io_uring_probe *p;
+- size_t size;
+- int i, ret;
+-
+- size = struct_size(p, ops, nr_args);
+- if (size == SIZE_MAX)
+- return -EOVERFLOW;
+- p = kzalloc(size, GFP_KERNEL);
+- if (!p)
+- return -ENOMEM;
+-
+- ret = -EFAULT;
+- if (copy_from_user(p, arg, size))
+- goto out;
+- ret = -EINVAL;
+- if (memchr_inv(p, 0, size))
+- goto out;
+-
+- p->last_op = IORING_OP_LAST - 1;
+- if (nr_args > IORING_OP_LAST)
+- nr_args = IORING_OP_LAST;
+-
+- for (i = 0; i < nr_args; i++) {
+- p->ops[i].op = i;
+- if (!io_op_defs[i].not_supported)
+- p->ops[i].flags = IO_URING_OP_SUPPORTED;
+- }
+- p->ops_len = i;
+-
+- ret = 0;
+- if (copy_to_user(arg, p, size))
+- ret = -EFAULT;
+-out:
+- kfree(p);
+- return ret;
+-}
+-
+-static int io_register_personality(struct io_ring_ctx *ctx)
+-{
+- const struct cred *creds;
+- u32 id;
+- int ret;
+-
+- creds = get_current_cred();
+-
+- ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
+- XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
+- if (ret < 0) {
+- put_cred(creds);
+- return ret;
+- }
+- return id;
+-}
+-
+-static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
+- unsigned int nr_args)
+-{
+- struct io_uring_restriction *res;
+- size_t size;
+- int i, ret;
+-
+- /* Restrictions allowed only if rings started disabled */
+- if (!(ctx->flags & IORING_SETUP_R_DISABLED))
+- return -EBADFD;
+-
+- /* We allow only a single restrictions registration */
+- if (ctx->restrictions.registered)
+- return -EBUSY;
+-
+- if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
+- return -EINVAL;
+-
+- size = array_size(nr_args, sizeof(*res));
+- if (size == SIZE_MAX)
+- return -EOVERFLOW;
+-
+- res = memdup_user(arg, size);
+- if (IS_ERR(res))
+- return PTR_ERR(res);
+-
+- ret = 0;
+-
+- for (i = 0; i < nr_args; i++) {
+- switch (res[i].opcode) {
+- case IORING_RESTRICTION_REGISTER_OP:
+- if (res[i].register_op >= IORING_REGISTER_LAST) {
+- ret = -EINVAL;
+- goto out;
+- }
+-
+- __set_bit(res[i].register_op,
+- ctx->restrictions.register_op);
+- break;
+- case IORING_RESTRICTION_SQE_OP:
+- if (res[i].sqe_op >= IORING_OP_LAST) {
+- ret = -EINVAL;
+- goto out;
+- }
+-
+- __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
+- break;
+- case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
+- ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
+- break;
+- case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
+- ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
+- break;
+- default:
+- ret = -EINVAL;
+- goto out;
+- }
+- }
+-
+-out:
+- /* Reset all restrictions if an error happened */
+- if (ret != 0)
+- memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
+- else
+- ctx->restrictions.registered = true;
+-
+- kfree(res);
+- return ret;
+-}
+-
+-static int io_register_enable_rings(struct io_ring_ctx *ctx)
+-{
+- if (!(ctx->flags & IORING_SETUP_R_DISABLED))
+- return -EBADFD;
+-
+- if (ctx->restrictions.registered)
+- ctx->restricted = 1;
+-
+- ctx->flags &= ~IORING_SETUP_R_DISABLED;
+- if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
+- wake_up(&ctx->sq_data->wait);
+- return 0;
+-}
+-
+-static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
+- struct io_uring_rsrc_update2 *up,
+- unsigned nr_args)
+-{
+- __u32 tmp;
+- int err;
+-
+- if (up->resv)
+- return -EINVAL;
+- if (check_add_overflow(up->offset, nr_args, &tmp))
+- return -EOVERFLOW;
+- err = io_rsrc_node_switch_start(ctx);
+- if (err)
+- return err;
+-
+- switch (type) {
+- case IORING_RSRC_FILE:
+- return __io_sqe_files_update(ctx, up, nr_args);
+- case IORING_RSRC_BUFFER:
+- return __io_sqe_buffers_update(ctx, up, nr_args);
+- }
+- return -EINVAL;
+-}
+-
+-static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
+- unsigned nr_args)
+-{
+- struct io_uring_rsrc_update2 up;
+-
+- if (!nr_args)
+- return -EINVAL;
+- memset(&up, 0, sizeof(up));
+- if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update)))
+- return -EFAULT;
+- return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args);
+-}
+-
+-static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
+- unsigned size, unsigned type)
+-{
+- struct io_uring_rsrc_update2 up;
+-
+- if (size != sizeof(up))
+- return -EINVAL;
+- if (copy_from_user(&up, arg, sizeof(up)))
+- return -EFAULT;
+- if (!up.nr || up.resv)
+- return -EINVAL;
+- return __io_register_rsrc_update(ctx, type, &up, up.nr);
+-}
+-
+-static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
+- unsigned int size, unsigned int type)
+-{
+- struct io_uring_rsrc_register rr;
+-
+- /* keep it extendible */
+- if (size != sizeof(rr))
+- return -EINVAL;
+-
+- memset(&rr, 0, sizeof(rr));
+- if (copy_from_user(&rr, arg, size))
+- return -EFAULT;
+- if (!rr.nr || rr.resv || rr.resv2)
+- return -EINVAL;
+-
+- switch (type) {
+- case IORING_RSRC_FILE:
+- return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
+- rr.nr, u64_to_user_ptr(rr.tags));
+- case IORING_RSRC_BUFFER:
+- return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
+- rr.nr, u64_to_user_ptr(rr.tags));
+- }
+- return -EINVAL;
+-}
+-
+-static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
+- unsigned len)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+- cpumask_var_t new_mask;
+- int ret;
+-
+- if (!tctx || !tctx->io_wq)
+- return -EINVAL;
+-
+- if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+- return -ENOMEM;
+-
+- cpumask_clear(new_mask);
+- if (len > cpumask_size())
+- len = cpumask_size();
+-
+- if (copy_from_user(new_mask, arg, len)) {
+- free_cpumask_var(new_mask);
+- return -EFAULT;
+- }
+-
+- ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
+- free_cpumask_var(new_mask);
+- return ret;
+-}
+-
+-static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
+-{
+- struct io_uring_task *tctx = current->io_uring;
+-
+- if (!tctx || !tctx->io_wq)
+- return -EINVAL;
+-
+- return io_wq_cpu_affinity(tctx->io_wq, NULL);
+-}
+-
+-static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
+- void __user *arg)
+- __must_hold(&ctx->uring_lock)
+-{
+- struct io_tctx_node *node;
+- struct io_uring_task *tctx = NULL;
+- struct io_sq_data *sqd = NULL;
+- __u32 new_count[2];
+- int i, ret;
+-
+- if (copy_from_user(new_count, arg, sizeof(new_count)))
+- return -EFAULT;
+- for (i = 0; i < ARRAY_SIZE(new_count); i++)
+- if (new_count[i] > INT_MAX)
+- return -EINVAL;
+-
+- if (ctx->flags & IORING_SETUP_SQPOLL) {
+- sqd = ctx->sq_data;
+- if (sqd) {
+- /*
+- * Observe the correct sqd->lock -> ctx->uring_lock
+- * ordering. Fine to drop uring_lock here, we hold
+- * a ref to the ctx.
+- */
+- refcount_inc(&sqd->refs);
+- mutex_unlock(&ctx->uring_lock);
+- mutex_lock(&sqd->lock);
+- mutex_lock(&ctx->uring_lock);
+- if (sqd->thread)
+- tctx = sqd->thread->io_uring;
+- }
+- } else {
+- tctx = current->io_uring;
+- }
+-
+- BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
+-
+- memcpy(ctx->iowq_limits, new_count, sizeof(new_count));
+- ctx->iowq_limits_set = true;
+-
+- ret = -EINVAL;
+- if (tctx && tctx->io_wq) {
+- ret = io_wq_max_workers(tctx->io_wq, new_count);
+- if (ret)
+- goto err;
+- } else {
+- memset(new_count, 0, sizeof(new_count));
+- }
+-
+- if (sqd) {
+- mutex_unlock(&sqd->lock);
+- io_put_sq_data(sqd);
+- }
+-
+- if (copy_to_user(arg, new_count, sizeof(new_count)))
+- return -EFAULT;
+-
+- /* that's it for SQPOLL, only the SQPOLL task creates requests */
+- if (sqd)
+- return 0;
+-
+- /* now propagate the restriction to all registered users */
+- list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+- struct io_uring_task *tctx = node->task->io_uring;
+-
+- if (WARN_ON_ONCE(!tctx->io_wq))
+- continue;
+-
+- for (i = 0; i < ARRAY_SIZE(new_count); i++)
+- new_count[i] = ctx->iowq_limits[i];
+- /* ignore errors, it always returns zero anyway */
+- (void)io_wq_max_workers(tctx->io_wq, new_count);
+- }
+- return 0;
+-err:
+- if (sqd) {
+- mutex_unlock(&sqd->lock);
+- io_put_sq_data(sqd);
+- }
+- return ret;
+-}
+-
+-static bool io_register_op_must_quiesce(int op)
+-{
+- switch (op) {
+- case IORING_REGISTER_BUFFERS:
+- case IORING_UNREGISTER_BUFFERS:
+- case IORING_REGISTER_FILES:
+- case IORING_UNREGISTER_FILES:
+- case IORING_REGISTER_FILES_UPDATE:
+- case IORING_REGISTER_PROBE:
+- case IORING_REGISTER_PERSONALITY:
+- case IORING_UNREGISTER_PERSONALITY:
+- case IORING_REGISTER_FILES2:
+- case IORING_REGISTER_FILES_UPDATE2:
+- case IORING_REGISTER_BUFFERS2:
+- case IORING_REGISTER_BUFFERS_UPDATE:
+- case IORING_REGISTER_IOWQ_AFF:
+- case IORING_UNREGISTER_IOWQ_AFF:
+- case IORING_REGISTER_IOWQ_MAX_WORKERS:
+- return false;
+- default:
+- return true;
+- }
+-}
+-
+-static int io_ctx_quiesce(struct io_ring_ctx *ctx)
+-{
+- long ret;
+-
+- percpu_ref_kill(&ctx->refs);
+-
+- /*
+- * Drop uring mutex before waiting for references to exit. If another
+- * thread is currently inside io_uring_enter() it might need to grab the
+- * uring_lock to make progress. If we hold it here across the drain
+- * wait, then we can deadlock. It's safe to drop the mutex here, since
+- * no new references will come in after we've killed the percpu ref.
+- */
+- mutex_unlock(&ctx->uring_lock);
+- do {
+- ret = wait_for_completion_interruptible(&ctx->ref_comp);
+- if (!ret)
+- break;
+- ret = io_run_task_work_sig();
+- } while (ret >= 0);
+- mutex_lock(&ctx->uring_lock);
+-
+- if (ret)
+- io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
+- return ret;
+-}
+-
+-static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
+- void __user *arg, unsigned nr_args)
+- __releases(ctx->uring_lock)
+- __acquires(ctx->uring_lock)
+-{
+- int ret;
+-
+- /*
+- * We're inside the ring mutex, if the ref is already dying, then
+- * someone else killed the ctx or is already going through
+- * io_uring_register().
+- */
+- if (percpu_ref_is_dying(&ctx->refs))
+- return -ENXIO;
+-
+- if (ctx->restricted) {
+- if (opcode >= IORING_REGISTER_LAST)
+- return -EINVAL;
+- opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
+- if (!test_bit(opcode, ctx->restrictions.register_op))
+- return -EACCES;
+- }
+-
+- if (io_register_op_must_quiesce(opcode)) {
+- ret = io_ctx_quiesce(ctx);
+- if (ret)
+- return ret;
+- }
+-
+- switch (opcode) {
+- case IORING_REGISTER_BUFFERS:
+- ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
+- break;
+- case IORING_UNREGISTER_BUFFERS:
+- ret = -EINVAL;
+- if (arg || nr_args)
+- break;
+- ret = io_sqe_buffers_unregister(ctx);
+- break;
+- case IORING_REGISTER_FILES:
+- ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
+- break;
+- case IORING_UNREGISTER_FILES:
+- ret = -EINVAL;
+- if (arg || nr_args)
+- break;
+- ret = io_sqe_files_unregister(ctx);
+- break;
+- case IORING_REGISTER_FILES_UPDATE:
+- ret = io_register_files_update(ctx, arg, nr_args);
+- break;
+- case IORING_REGISTER_EVENTFD:
+- case IORING_REGISTER_EVENTFD_ASYNC:
+- ret = -EINVAL;
+- if (nr_args != 1)
+- break;
+- ret = io_eventfd_register(ctx, arg);
+- if (ret)
+- break;
+- if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
+- ctx->eventfd_async = 1;
+- else
+- ctx->eventfd_async = 0;
+- break;
+- case IORING_UNREGISTER_EVENTFD:
+- ret = -EINVAL;
+- if (arg || nr_args)
+- break;
+- ret = io_eventfd_unregister(ctx);
+- break;
+- case IORING_REGISTER_PROBE:
+- ret = -EINVAL;
+- if (!arg || nr_args > 256)
+- break;
+- ret = io_probe(ctx, arg, nr_args);
+- break;
+- case IORING_REGISTER_PERSONALITY:
+- ret = -EINVAL;
+- if (arg || nr_args)
+- break;
+- ret = io_register_personality(ctx);
+- break;
+- case IORING_UNREGISTER_PERSONALITY:
+- ret = -EINVAL;
+- if (arg)
+- break;
+- ret = io_unregister_personality(ctx, nr_args);
+- break;
+- case IORING_REGISTER_ENABLE_RINGS:
+- ret = -EINVAL;
+- if (arg || nr_args)
+- break;
+- ret = io_register_enable_rings(ctx);
+- break;
+- case IORING_REGISTER_RESTRICTIONS:
+- ret = io_register_restrictions(ctx, arg, nr_args);
+- break;
+- case IORING_REGISTER_FILES2:
+- ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
+- break;
+- case IORING_REGISTER_FILES_UPDATE2:
+- ret = io_register_rsrc_update(ctx, arg, nr_args,
+- IORING_RSRC_FILE);
+- break;
+- case IORING_REGISTER_BUFFERS2:
+- ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
+- break;
+- case IORING_REGISTER_BUFFERS_UPDATE:
+- ret = io_register_rsrc_update(ctx, arg, nr_args,
+- IORING_RSRC_BUFFER);
+- break;
+- case IORING_REGISTER_IOWQ_AFF:
+- ret = -EINVAL;
+- if (!arg || !nr_args)
+- break;
+- ret = io_register_iowq_aff(ctx, arg, nr_args);
+- break;
+- case IORING_UNREGISTER_IOWQ_AFF:
+- ret = -EINVAL;
+- if (arg || nr_args)
+- break;
+- ret = io_unregister_iowq_aff(ctx);
+- break;
+- case IORING_REGISTER_IOWQ_MAX_WORKERS:
+- ret = -EINVAL;
+- if (!arg || nr_args != 2)
+- break;
+- ret = io_register_iowq_max_workers(ctx, arg);
+- break;
+- default:
+- ret = -EINVAL;
+- break;
+- }
+-
+- if (io_register_op_must_quiesce(opcode)) {
+- /* bring the ctx back to life */
+- percpu_ref_reinit(&ctx->refs);
+- reinit_completion(&ctx->ref_comp);
+- }
+- return ret;
+-}
+-
+-SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
+- void __user *, arg, unsigned int, nr_args)
+-{
+- struct io_ring_ctx *ctx;
+- long ret = -EBADF;
+- struct fd f;
+-
+- f = fdget(fd);
+- if (!f.file)
+- return -EBADF;
+-
+- ret = -EOPNOTSUPP;
+- if (f.file->f_op != &io_uring_fops)
+- goto out_fput;
+-
+- ctx = f.file->private_data;
+-
+- io_run_task_work();
+-
+- mutex_lock(&ctx->uring_lock);
+- ret = __io_uring_register(ctx, opcode, arg, nr_args);
+- mutex_unlock(&ctx->uring_lock);
+- trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs,
+- ctx->cq_ev_fd != NULL, ret);
+-out_fput:
+- fdput(f);
+- return ret;
+-}
+-
+-static int __init io_uring_init(void)
+-{
+-#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
+- BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
+- BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
+-} while (0)
+-
+-#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
+- __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
+- BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
+- BUILD_BUG_SQE_ELEM(0, __u8, opcode);
+- BUILD_BUG_SQE_ELEM(1, __u8, flags);
+- BUILD_BUG_SQE_ELEM(2, __u16, ioprio);
+- BUILD_BUG_SQE_ELEM(4, __s32, fd);
+- BUILD_BUG_SQE_ELEM(8, __u64, off);
+- BUILD_BUG_SQE_ELEM(8, __u64, addr2);
+- BUILD_BUG_SQE_ELEM(16, __u64, addr);
+- BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
+- BUILD_BUG_SQE_ELEM(24, __u32, len);
+- BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
+- BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
+- BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags);
+- BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events);
+- BUILD_BUG_SQE_ELEM(28, __u32, poll32_events);
+- BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, msg_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, accept_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
+- BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
+- BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
+- BUILD_BUG_SQE_ELEM(32, __u64, user_data);
+- BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
+- BUILD_BUG_SQE_ELEM(40, __u16, buf_group);
+- BUILD_BUG_SQE_ELEM(42, __u16, personality);
+- BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
+- BUILD_BUG_SQE_ELEM(44, __u32, file_index);
+-
+- BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
+- sizeof(struct io_uring_rsrc_update));
+- BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
+- sizeof(struct io_uring_rsrc_update2));
+-
+- /* ->buf_index is u16 */
+- BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+-
+- /* should fit into one byte */
+- BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
+-
+- BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
+- BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
+-
+- req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+- SLAB_ACCOUNT);
+- return 0;
+-};
+-__initcall(io_uring_init);
+diff --git a/fs/ioctl.c b/fs/ioctl.c
+index 504e695781124..e0a3455f9a0f6 100644
+--- a/fs/ioctl.c
++++ b/fs/ioctl.c
+@@ -173,7 +173,7 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
+
+ if (*len == 0)
+ return -EINVAL;
+- if (start > maxbytes)
++ if (start >= maxbytes)
+ return -EFBIG;
+
+ /*
+diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
+index 9cc5798423d12..87a4f5a2ded0e 100644
+--- a/fs/iomap/buffered-io.c
++++ b/fs/iomap/buffered-io.c
+@@ -256,8 +256,13 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
+ unsigned poff, plen;
+ sector_t sector;
+
+- if (iomap->type == IOMAP_INLINE)
+- return min(iomap_read_inline_data(iter, page), length);
++ if (iomap->type == IOMAP_INLINE) {
++ loff_t ret = iomap_read_inline_data(iter, page);
++
++ if (ret < 0)
++ return ret;
++ return 0;
++ }
+
+ /* zero post-eof blocks as the page may be mapped */
+ iop = iomap_page_create(iter->inode, page);
+@@ -370,6 +375,8 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
+ ctx->cur_page_in_bio = false;
+ }
+ ret = iomap_readpage_iter(iter, ctx, done);
++ if (ret <= 0)
++ return ret;
+ }
+
+ return done;
+@@ -518,7 +525,8 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
+ * write started inside the existing inode size.
+ */
+ if (pos + len > i_size)
+- truncate_pagecache_range(inode, max(pos, i_size), pos + len);
++ truncate_pagecache_range(inode, max(pos, i_size),
++ pos + len - 1);
+ }
+
+ static int
+@@ -750,7 +758,7 @@ again:
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ */
+- if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
++ if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
+diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
+index 4ecd255e0511c..468dcbba45bcb 100644
+--- a/fs/iomap/direct-io.c
++++ b/fs/iomap/direct-io.c
+@@ -31,6 +31,7 @@ struct iomap_dio {
+ atomic_t ref;
+ unsigned flags;
+ int error;
++ size_t done_before;
+ bool wait_for_completion;
+
+ union {
+@@ -124,6 +125,9 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
+ if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
+ ret = generic_write_sync(iocb, ret);
+
++ if (ret > 0)
++ ret += dio->done_before;
++
+ kfree(dio);
+
+ return ret;
+@@ -371,6 +375,8 @@ static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter,
+ loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter);
+
+ dio->size += length;
++ if (!length)
++ return -EFAULT;
+ return length;
+ }
+
+@@ -402,6 +408,8 @@ static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi,
+ copied = copy_to_iter(inline_data, length, iter);
+ }
+ dio->size += copied;
++ if (!copied)
++ return -EFAULT;
+ return copied;
+ }
+
+@@ -446,13 +454,21 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter,
+ * may be pure data writes. In that case, we still need to do a full data sync
+ * completion.
+ *
++ * When page faults are disabled and @dio_flags includes IOMAP_DIO_PARTIAL,
++ * __iomap_dio_rw can return a partial result if it encounters a non-resident
++ * page in @iter after preparing a transfer. In that case, the non-resident
++ * pages can be faulted in and the request resumed with @done_before set to the
++ * number of bytes previously transferred. The request will then complete with
++ * the correct total number of bytes transferred; this is essential for
++ * completing partial requests asynchronously.
++ *
+ * Returns -ENOTBLK In case of a page invalidation invalidation failure for
+ * writes. The callers needs to fall back to buffered I/O in this case.
+ */
+ struct iomap_dio *
+ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+- unsigned int dio_flags)
++ unsigned int dio_flags, size_t done_before)
+ {
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = file_inode(iocb->ki_filp);
+@@ -482,6 +498,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ dio->dops = dops;
+ dio->error = 0;
+ dio->flags = 0;
++ dio->done_before = done_before;
+
+ dio->submit.iter = iter;
+ dio->submit.waiter = current;
+@@ -577,6 +594,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size)
+ iov_iter_revert(iter, iomi.pos - dio->i_size);
+
++ if (ret == -EFAULT && dio->size && (dio_flags & IOMAP_DIO_PARTIAL)) {
++ if (!(iocb->ki_flags & IOCB_NOWAIT))
++ wait_for_completion = true;
++ ret = 0;
++ }
++
+ /* magic error code to fall back to buffered I/O */
+ if (ret == -ENOTBLK) {
+ wait_for_completion = true;
+@@ -642,11 +665,11 @@ EXPORT_SYMBOL_GPL(__iomap_dio_rw);
+ ssize_t
+ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+- unsigned int dio_flags)
++ unsigned int dio_flags, size_t done_before)
+ {
+ struct iomap_dio *dio;
+
+- dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags);
++ dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before);
+ if (IS_ERR_OR_NULL(dio))
+ return PTR_ERR_OR_ZERO(dio);
+ return iomap_dio_complete(dio);
+diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
+index 678e2c51b855c..0c6eacfcbeef1 100644
+--- a/fs/isofs/inode.c
++++ b/fs/isofs/inode.c
+@@ -1322,6 +1322,8 @@ static int isofs_read_inode(struct inode *inode, int relocated)
+
+ de = (struct iso_directory_record *) (bh->b_data + offset);
+ de_len = *(unsigned char *) de;
++ if (de_len < sizeof(struct iso_directory_record))
++ goto fail;
+
+ if (offset + de_len > bufsize) {
+ int frag1 = bufsize - offset;
+diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
+index 746132998c577..95d5bb7d825a6 100644
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -27,7 +27,7 @@
+ *
+ * Called with j_list_lock held.
+ */
+-static inline void __buffer_unlink_first(struct journal_head *jh)
++static inline void __buffer_unlink(struct journal_head *jh)
+ {
+ transaction_t *transaction = jh->b_cp_transaction;
+
+@@ -40,45 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
+ }
+ }
+
+-/*
+- * Unlink a buffer from a transaction checkpoint(io) list.
+- *
+- * Called with j_list_lock held.
+- */
+-static inline void __buffer_unlink(struct journal_head *jh)
+-{
+- transaction_t *transaction = jh->b_cp_transaction;
+-
+- __buffer_unlink_first(jh);
+- if (transaction->t_checkpoint_io_list == jh) {
+- transaction->t_checkpoint_io_list = jh->b_cpnext;
+- if (transaction->t_checkpoint_io_list == jh)
+- transaction->t_checkpoint_io_list = NULL;
+- }
+-}
+-
+-/*
+- * Move a buffer from the checkpoint list to the checkpoint io list
+- *
+- * Called with j_list_lock held
+- */
+-static inline void __buffer_relink_io(struct journal_head *jh)
+-{
+- transaction_t *transaction = jh->b_cp_transaction;
+-
+- __buffer_unlink_first(jh);
+-
+- if (!transaction->t_checkpoint_io_list) {
+- jh->b_cpnext = jh->b_cpprev = jh;
+- } else {
+- jh->b_cpnext = transaction->t_checkpoint_io_list;
+- jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+- jh->b_cpprev->b_cpnext = jh;
+- jh->b_cpnext->b_cpprev = jh;
+- }
+- transaction->t_checkpoint_io_list = jh;
+-}
+-
+ /*
+ * Check a checkpoint buffer could be release or not.
+ *
+@@ -183,6 +144,7 @@ __flush_batch(journal_t *journal, int *batch_count)
+ struct buffer_head *bh = journal->j_chkpt_bhs[i];
+ BUFFER_TRACE(bh, "brelse");
+ __brelse(bh);
++ journal->j_chkpt_bhs[i] = NULL;
+ }
+ *batch_count = 0;
+ }
+@@ -242,15 +204,6 @@ restart:
+ jh = transaction->t_checkpoint_list;
+ bh = jh2bh(jh);
+
+- if (buffer_locked(bh)) {
+- get_bh(bh);
+- spin_unlock(&journal->j_list_lock);
+- wait_on_buffer(bh);
+- /* the journal_head may have gone by now */
+- BUFFER_TRACE(bh, "brelse");
+- __brelse(bh);
+- goto retry;
+- }
+ if (jh->b_transaction != NULL) {
+ transaction_t *t = jh->b_transaction;
+ tid_t tid = t->t_tid;
+@@ -285,30 +238,50 @@ restart:
+ spin_lock(&journal->j_list_lock);
+ goto restart;
+ }
+- if (!buffer_dirty(bh)) {
++ if (!trylock_buffer(bh)) {
++ /*
++ * The buffer is locked, it may be writing back, or
++ * flushing out in the last couple of cycles, or
++ * re-adding into a new transaction, need to check
++ * it again until it's unlocked.
++ */
++ get_bh(bh);
++ spin_unlock(&journal->j_list_lock);
++ wait_on_buffer(bh);
++ /* the journal_head may have gone by now */
++ BUFFER_TRACE(bh, "brelse");
++ __brelse(bh);
++ goto retry;
++ } else if (!buffer_dirty(bh)) {
++ unlock_buffer(bh);
+ BUFFER_TRACE(bh, "remove from checkpoint");
+- if (__jbd2_journal_remove_checkpoint(jh))
+- /* The transaction was released; we're done */
++ /*
++ * If the transaction was released or the checkpoint
++ * list was empty, we're done.
++ */
++ if (__jbd2_journal_remove_checkpoint(jh) ||
++ !transaction->t_checkpoint_list)
+ goto out;
+- continue;
++ } else {
++ unlock_buffer(bh);
++ /*
++ * We are about to write the buffer, it could be
++ * raced by some other transaction shrink or buffer
++ * re-log logic once we release the j_list_lock,
++ * leave it on the checkpoint list and check status
++ * again to make sure it's clean.
++ */
++ BUFFER_TRACE(bh, "queue");
++ get_bh(bh);
++ J_ASSERT_BH(bh, !buffer_jwrite(bh));
++ journal->j_chkpt_bhs[batch_count++] = bh;
++ transaction->t_chp_stats.cs_written++;
++ transaction->t_checkpoint_list = jh->b_cpnext;
+ }
+- /*
+- * Important: we are about to write the buffer, and
+- * possibly block, while still holding the journal
+- * lock. We cannot afford to let the transaction
+- * logic start messing around with this buffer before
+- * we write it to disk, as that would break
+- * recoverability.
+- */
+- BUFFER_TRACE(bh, "queue");
+- get_bh(bh);
+- J_ASSERT_BH(bh, !buffer_jwrite(bh));
+- journal->j_chkpt_bhs[batch_count++] = bh;
+- __buffer_relink_io(jh);
+- transaction->t_chp_stats.cs_written++;
++
+ if ((batch_count == JBD2_NR_BATCH) ||
+- need_resched() ||
+- spin_needbreak(&journal->j_list_lock))
++ need_resched() || spin_needbreak(&journal->j_list_lock) ||
++ jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
+ goto unlock_and_flush;
+ }
+
+@@ -322,38 +295,6 @@ restart:
+ goto restart;
+ }
+
+- /*
+- * Now we issued all of the transaction's buffers, let's deal
+- * with the buffers that are out for I/O.
+- */
+-restart2:
+- /* Did somebody clean up the transaction in the meanwhile? */
+- if (journal->j_checkpoint_transactions != transaction ||
+- transaction->t_tid != this_tid)
+- goto out;
+-
+- while (transaction->t_checkpoint_io_list) {
+- jh = transaction->t_checkpoint_io_list;
+- bh = jh2bh(jh);
+- if (buffer_locked(bh)) {
+- get_bh(bh);
+- spin_unlock(&journal->j_list_lock);
+- wait_on_buffer(bh);
+- /* the journal_head may have gone by now */
+- BUFFER_TRACE(bh, "brelse");
+- __brelse(bh);
+- spin_lock(&journal->j_list_lock);
+- goto restart2;
+- }
+-
+- /*
+- * Now in whatever state the buffer currently is, we
+- * know that it has been written out and so we can
+- * drop it from the list
+- */
+- if (__jbd2_journal_remove_checkpoint(jh))
+- break;
+- }
+ out:
+ spin_unlock(&journal->j_list_lock);
+ result = jbd2_cleanup_journal_tail(journal);
+@@ -408,50 +349,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
+
+ /* Checkpoint list management */
+
+-/*
+- * journal_clean_one_cp_list
+- *
+- * Find all the written-back checkpoint buffers in the given list and
+- * release them. If 'destroy' is set, clean all buffers unconditionally.
+- *
+- * Called with j_list_lock held.
+- * Returns 1 if we freed the transaction, 0 otherwise.
+- */
+-static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
+-{
+- struct journal_head *last_jh;
+- struct journal_head *next_jh = jh;
+-
+- if (!jh)
+- return 0;
+-
+- last_jh = jh->b_cpprev;
+- do {
+- jh = next_jh;
+- next_jh = jh->b_cpnext;
+-
+- if (!destroy && __cp_buffer_busy(jh))
+- return 0;
+-
+- if (__jbd2_journal_remove_checkpoint(jh))
+- return 1;
+- /*
+- * This function only frees up some memory
+- * if possible so we dont have an obligation
+- * to finish processing. Bail out if preemption
+- * requested:
+- */
+- if (need_resched())
+- return 0;
+- } while (jh != last_jh);
+-
+- return 0;
+-}
++enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
+
+ /*
+ * journal_shrink_one_cp_list
+ *
+- * Find 'nr_to_scan' written-back checkpoint buffers in the given list
++ * Find all the written-back checkpoint buffers in the given list
+ * and try to release them. If the whole transaction is released, set
+ * the 'released' parameter. Return the number of released checkpointed
+ * buffers.
+@@ -459,7 +362,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
+ * Called with j_list_lock held.
+ */
+ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
+- unsigned long *nr_to_scan,
++ enum shrink_type type,
+ bool *released)
+ {
+ struct journal_head *last_jh;
+@@ -467,7 +370,8 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
+ unsigned long nr_freed = 0;
+ int ret;
+
+- if (!jh || *nr_to_scan == 0)
++ *released = false;
++ if (!jh)
+ return 0;
+
+ last_jh = jh->b_cpprev;
+@@ -475,12 +379,18 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
+ jh = next_jh;
+ next_jh = jh->b_cpnext;
+
+- (*nr_to_scan)--;
+- if (__cp_buffer_busy(jh))
+- continue;
++ if (type == SHRINK_DESTROY) {
++ ret = __jbd2_journal_remove_checkpoint(jh);
++ } else {
++ ret = jbd2_journal_try_remove_checkpoint(jh);
++ if (ret < 0) {
++ if (type == SHRINK_BUSY_SKIP)
++ continue;
++ break;
++ }
++ }
+
+ nr_freed++;
+- ret = __jbd2_journal_remove_checkpoint(jh);
+ if (ret) {
+ *released = true;
+ break;
+@@ -488,7 +398,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
+
+ if (need_resched())
+ break;
+- } while (jh != last_jh && *nr_to_scan);
++ } while (jh != last_jh);
+
+ return nr_freed;
+ }
+@@ -506,11 +416,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
+ unsigned long *nr_to_scan)
+ {
+ transaction_t *transaction, *last_transaction, *next_transaction;
+- bool released;
++ bool __maybe_unused released;
+ tid_t first_tid = 0, last_tid = 0, next_tid = 0;
+ tid_t tid = 0;
+ unsigned long nr_freed = 0;
+- unsigned long nr_scanned = *nr_to_scan;
++ unsigned long freed;
+
+ again:
+ spin_lock(&journal->j_list_lock);
+@@ -539,19 +449,11 @@ again:
+ transaction = next_transaction;
+ next_transaction = transaction->t_cpnext;
+ tid = transaction->t_tid;
+- released = false;
+-
+- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+- nr_to_scan, &released);
+- if (*nr_to_scan == 0)
+- break;
+- if (need_resched() || spin_needbreak(&journal->j_list_lock))
+- break;
+- if (released)
+- continue;
+
+- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
+- nr_to_scan, &released);
++ freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
++ SHRINK_BUSY_SKIP, &released);
++ nr_freed += freed;
++ (*nr_to_scan) -= min(*nr_to_scan, freed);
+ if (*nr_to_scan == 0)
+ break;
+ if (need_resched() || spin_needbreak(&journal->j_list_lock))
+@@ -572,9 +474,8 @@ again:
+ if (*nr_to_scan && next_tid)
+ goto again;
+ out:
+- nr_scanned -= *nr_to_scan;
+ trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
+- nr_freed, nr_scanned, next_tid);
++ nr_freed, next_tid);
+
+ return nr_freed;
+ }
+@@ -590,19 +491,21 @@ out:
+ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
+ {
+ transaction_t *transaction, *last_transaction, *next_transaction;
+- int ret;
++ enum shrink_type type;
++ bool released;
+
+ transaction = journal->j_checkpoint_transactions;
+ if (!transaction)
+ return;
+
++ type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
+ last_transaction = transaction->t_cpprev;
+ next_transaction = transaction;
+ do {
+ transaction = next_transaction;
+ next_transaction = transaction->t_cpnext;
+- ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
+- destroy);
++ journal_shrink_one_cp_list(transaction->t_checkpoint_list,
++ type, &released);
+ /*
+ * This function only frees up some memory if possible so we
+ * dont have an obligation to finish processing. Bail out if
+@@ -610,23 +513,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
+ */
+ if (need_resched())
+ return;
+- if (ret)
+- continue;
+- /*
+- * It is essential that we are as careful as in the case of
+- * t_checkpoint_list with removing the buffer from the list as
+- * we can possibly see not yet submitted buffers on io_list
+- */
+- ret = journal_clean_one_cp_list(transaction->
+- t_checkpoint_io_list, destroy);
+- if (need_resched())
+- return;
+ /*
+ * Stop scanning if we couldn't free the transaction. This
+ * avoids pointless scanning of transactions which still
+ * weren't checkpointed.
+ */
+- if (!ret)
++ if (!released)
+ return;
+ } while (transaction != last_transaction);
+ }
+@@ -705,7 +597,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
+ jbd2_journal_put_journal_head(jh);
+
+ /* Is this transaction empty? */
+- if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
++ if (transaction->t_checkpoint_list)
+ return 0;
+
+ /*
+@@ -736,6 +628,36 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
+ return 1;
+ }
+
++/*
++ * Check the checkpoint buffer and try to remove it from the checkpoint
++ * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if
++ * it frees the transaction, 0 otherwise.
++ *
++ * This function is called with j_list_lock held.
++ */
++int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
++{
++ struct buffer_head *bh = jh2bh(jh);
++
++ if (jh->b_transaction)
++ return -EBUSY;
++ if (!trylock_buffer(bh))
++ return -EBUSY;
++ if (buffer_dirty(bh)) {
++ unlock_buffer(bh);
++ return -EBUSY;
++ }
++ unlock_buffer(bh);
++
++ /*
++ * Buffer is clean and the IO has finished (we held the buffer
++ * lock) so the checkpoint is done. We can safely remove the
++ * buffer from this transaction.
++ */
++ JBUFFER_TRACE(jh, "remove from checkpoint list");
++ return __jbd2_journal_remove_checkpoint(jh);
++}
++
+ /*
+ * journal_insert_checkpoint: put a committed buffer onto a checkpoint
+ * list so that we know when it is safe to clean the transaction out of
+@@ -797,7 +719,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
+ J_ASSERT(transaction->t_forget == NULL);
+ J_ASSERT(transaction->t_shadow_list == NULL);
+ J_ASSERT(transaction->t_checkpoint_list == NULL);
+- J_ASSERT(transaction->t_checkpoint_io_list == NULL);
+ J_ASSERT(atomic_read(&transaction->t_updates) == 0);
+ J_ASSERT(journal->j_committing_transaction != transaction);
+ J_ASSERT(journal->j_running_transaction != transaction);
+diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
+index 3cc4ab2ba7f4f..20294c1bbeab7 100644
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -501,7 +501,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+ }
+ spin_unlock(&commit_transaction->t_handle_lock);
+ commit_transaction->t_state = T_SWITCH;
+- write_unlock(&journal->j_state_lock);
+
+ J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
+ journal->j_max_transaction_buffers);
+@@ -521,6 +520,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+ * has reserved. This is consistent with the existing behaviour
+ * that multiple jbd2_journal_get_write_access() calls to the same
+ * buffer are perfectly permissible.
++ * We use journal->j_state_lock here to serialize processing of
++ * t_reserved_list with eviction of buffers from journal_unmap_buffer().
+ */
+ while (commit_transaction->t_reserved_list) {
+ jh = commit_transaction->t_reserved_list;
+@@ -540,6 +541,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+ jbd2_journal_refile_buffer(journal, jh);
+ }
+
++ write_unlock(&journal->j_state_lock);
+ /*
+ * Now try to drop any written-back buffers from the journal's
+ * checkpoint lists. We do this *before* commit because it potentially
+@@ -562,13 +564,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+ */
+ jbd2_journal_switch_revoke_table(journal);
+
++ write_lock(&journal->j_state_lock);
+ /*
+ * Reserved credits cannot be claimed anymore, free them
+ */
+ atomic_sub(atomic_read(&journal->j_reserved_credits),
+ &commit_transaction->t_outstanding_credits);
+
+- write_lock(&journal->j_state_lock);
+ trace_jbd2_commit_flushing(journal, commit_transaction);
+ stats.run.rs_flushing = jiffies;
+ stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
+@@ -579,7 +581,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+ journal->j_running_transaction = NULL;
+ start_time = ktime_get();
+ commit_transaction->t_log_start = journal->j_head;
+- wake_up(&journal->j_wait_transaction_locked);
++ wake_up_all(&journal->j_wait_transaction_locked);
+ write_unlock(&journal->j_state_lock);
+
+ jbd_debug(3, "JBD2: commit phase 2a\n");
+@@ -1170,7 +1172,7 @@ restart_loop:
+ if (journal->j_commit_callback)
+ journal->j_commit_callback(journal, commit_transaction);
+ if (journal->j_fc_cleanup_callback)
+- journal->j_fc_cleanup_callback(journal, 1);
++ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
+
+ trace_jbd2_end_commit(journal, commit_transaction);
+ jbd_debug(1, "JBD2: commit %d complete, head %d\n",
+@@ -1182,8 +1184,7 @@ restart_loop:
+ spin_lock(&journal->j_list_lock);
+ commit_transaction->t_state = T_FINISHED;
+ /* Check if the transaction can be dropped now that we are finished */
+- if (commit_transaction->t_checkpoint_list == NULL &&
+- commit_transaction->t_checkpoint_io_list == NULL) {
++ if (commit_transaction->t_checkpoint_list == NULL) {
+ __jbd2_journal_drop_transaction(journal, commit_transaction);
+ jbd2_journal_free_transaction(commit_transaction);
+ }
+diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
+index 35302bc192eb9..580d2fdfe21f5 100644
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -757,6 +757,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
+ }
+ journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
+ write_unlock(&journal->j_state_lock);
++ jbd2_journal_lock_updates(journal);
+
+ return 0;
+ }
+@@ -768,8 +769,9 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
+ */
+ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
+ {
++ jbd2_journal_unlock_updates(journal);
+ if (journal->j_fc_cleanup_callback)
+- journal->j_fc_cleanup_callback(journal, 0);
++ journal->j_fc_cleanup_callback(journal, 0, tid);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
+ if (fallback)
+@@ -924,10 +926,16 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
+ for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
+ bh = journal->j_fc_wbuf[i];
+ wait_on_buffer(bh);
++ /*
++ * Update j_fc_off so jbd2_fc_release_bufs can release remain
++ * buffer head.
++ */
++ if (unlikely(!buffer_uptodate(bh))) {
++ journal->j_fc_off = i + 1;
++ return -EIO;
++ }
+ put_bh(bh);
+ journal->j_fc_wbuf[i] = NULL;
+- if (unlikely(!buffer_uptodate(bh)))
+- return -EIO;
+ }
+
+ return 0;
+@@ -2970,6 +2978,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
+ jbd_unlock_bh_journal_head(bh);
+ return jh;
+ }
++EXPORT_SYMBOL(jbd2_journal_grab_journal_head);
+
+ static void __journal_remove_journal_head(struct buffer_head *bh)
+ {
+@@ -3022,6 +3031,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
+ jbd_unlock_bh_journal_head(bh);
+ }
+ }
++EXPORT_SYMBOL(jbd2_journal_put_journal_head);
+
+ /*
+ * Initialize jbd inode head
+diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
+index 8ca3527189f87..3c5dd010e39d2 100644
+--- a/fs/jbd2/recovery.c
++++ b/fs/jbd2/recovery.c
+@@ -256,6 +256,7 @@ static int fc_do_one_pass(journal_t *journal,
+ err = journal->j_fc_replay_callback(journal, bh, pass,
+ next_fc_block - journal->j_fc_first,
+ expected_commit_id);
++ brelse(bh);
+ next_fc_block++;
+ if (err < 0 || err == JBD2_FC_REPLAY_STOP)
+ break;
+diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
+index 6a3caedd22856..62e68c5b8ec3d 100644
+--- a/fs/jbd2/transaction.c
++++ b/fs/jbd2/transaction.c
+@@ -173,7 +173,7 @@ static void wait_transaction_locked(journal_t *journal)
+ int need_to_start;
+ tid_t tid = journal->j_running_transaction->t_tid;
+
+- prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
++ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
+ TASK_UNINTERRUPTIBLE);
+ need_to_start = !tid_geq(journal->j_commit_request, tid);
+ read_unlock(&journal->j_state_lock);
+@@ -199,7 +199,7 @@ static void wait_transaction_switching(journal_t *journal)
+ read_unlock(&journal->j_state_lock);
+ return;
+ }
+- prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
++ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
+ TASK_UNINTERRUPTIBLE);
+ read_unlock(&journal->j_state_lock);
+ /*
+@@ -911,7 +911,7 @@ void jbd2_journal_unlock_updates (journal_t *journal)
+ write_lock(&journal->j_state_lock);
+ --journal->j_barrier_count;
+ write_unlock(&journal->j_state_lock);
+- wake_up(&journal->j_wait_transaction_locked);
++ wake_up_all(&journal->j_wait_transaction_locked);
+ }
+
+ static void warn_dirty_buffer(struct buffer_head *bh)
+@@ -1001,36 +1001,28 @@ repeat:
+ * ie. locked but not dirty) or tune2fs (which may actually have
+ * the buffer dirtied, ugh.) */
+
+- if (buffer_dirty(bh)) {
++ if (buffer_dirty(bh) && jh->b_transaction) {
++ warn_dirty_buffer(bh);
+ /*
+- * First question: is this buffer already part of the current
+- * transaction or the existing committing transaction?
+- */
+- if (jh->b_transaction) {
+- J_ASSERT_JH(jh,
+- jh->b_transaction == transaction ||
+- jh->b_transaction ==
+- journal->j_committing_transaction);
+- if (jh->b_next_transaction)
+- J_ASSERT_JH(jh, jh->b_next_transaction ==
+- transaction);
+- warn_dirty_buffer(bh);
+- }
+- /*
+- * In any case we need to clean the dirty flag and we must
+- * do it under the buffer lock to be sure we don't race
+- * with running write-out.
++ * We need to clean the dirty flag and we must do it under the
++ * buffer lock to be sure we don't race with running write-out.
+ */
+ JBUFFER_TRACE(jh, "Journalling dirty buffer");
+ clear_buffer_dirty(bh);
++ /*
++ * The buffer is going to be added to BJ_Reserved list now and
++ * nothing guarantees jbd2_journal_dirty_metadata() will be
++ * ever called for it. So we need to set jbddirty bit here to
++ * make sure the buffer is dirtied and written out when the
++ * journaling machinery is done with it.
++ */
+ set_buffer_jbddirty(bh);
+ }
+
+- unlock_buffer(bh);
+-
+ error = -EROFS;
+ if (is_handle_aborted(handle)) {
+ spin_unlock(&jh->b_state_lock);
++ unlock_buffer(bh);
+ goto out;
+ }
+ error = 0;
+@@ -1040,8 +1032,10 @@ repeat:
+ * b_next_transaction points to it
+ */
+ if (jh->b_transaction == transaction ||
+- jh->b_next_transaction == transaction)
++ jh->b_next_transaction == transaction) {
++ unlock_buffer(bh);
+ goto done;
++ }
+
+ /*
+ * this is the first time this transaction is touching this buffer,
+@@ -1065,10 +1059,24 @@ repeat:
+ */
+ smp_wmb();
+ spin_lock(&journal->j_list_lock);
++ if (test_clear_buffer_dirty(bh)) {
++ /*
++ * Execute buffer dirty clearing and jh->b_transaction
++ * assignment under journal->j_list_lock locked to
++ * prevent bh being removed from checkpoint list if
++ * the buffer is in an intermediate state (not dirty
++ * and jh->b_transaction is NULL).
++ */
++ JBUFFER_TRACE(jh, "Journalling dirty buffer");
++ set_buffer_jbddirty(bh);
++ }
+ __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
+ spin_unlock(&journal->j_list_lock);
++ unlock_buffer(bh);
+ goto done;
+ }
++ unlock_buffer(bh);
++
+ /*
+ * If there is already a copy-out version of this buffer, then we don't
+ * need to make another one
+@@ -1477,8 +1485,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
+ struct journal_head *jh;
+ int ret = 0;
+
+- if (is_handle_aborted(handle))
+- return -EROFS;
+ if (!buffer_jbd(bh))
+ return -EUCLEAN;
+
+@@ -1525,6 +1531,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
+ journal = transaction->t_journal;
+ spin_lock(&jh->b_state_lock);
+
++ if (is_handle_aborted(handle)) {
++ /*
++ * Check journal aborting with @jh->b_state_lock locked,
++ * since 'jh->b_transaction' could be replaced with
++ * 'jh->b_next_transaction' during old transaction
++ * committing if journal aborted, which may fail
++ * assertion on 'jh->b_frozen_data == NULL'.
++ */
++ ret = -EROFS;
++ goto out_unlock_bh;
++ }
++
+ if (jh->b_modified == 0) {
+ /*
+ * This buffer's got modified and becoming part
+@@ -1757,8 +1775,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
+ * Otherwise, if the buffer has been written to disk,
+ * it is safe to remove the checkpoint and drop it.
+ */
+- if (!buffer_dirty(bh)) {
+- __jbd2_journal_remove_checkpoint(jh);
++ if (jbd2_journal_try_remove_checkpoint(jh) >= 0) {
+ spin_unlock(&journal->j_list_lock);
+ goto drop;
+ }
+@@ -2085,20 +2102,14 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
+
+ jh = bh2jh(bh);
+
+- if (buffer_locked(bh) || buffer_dirty(bh))
+- goto out;
+-
+ if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
+- goto out;
++ return;
+
+ spin_lock(&journal->j_list_lock);
+- if (jh->b_cp_transaction != NULL) {
+- /* written-back checkpointed metadata buffer */
+- JBUFFER_TRACE(jh, "remove from checkpoint list");
+- __jbd2_journal_remove_checkpoint(jh);
+- }
++ /* Remove written-back checkpointed metadata buffer */
++ if (jh->b_cp_transaction != NULL)
++ jbd2_journal_try_remove_checkpoint(jh);
+ spin_unlock(&journal->j_list_lock);
+-out:
+ return;
+ }
+
+@@ -2360,6 +2371,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
+ spin_unlock(&jh->b_state_lock);
+ write_unlock(&journal->j_state_lock);
+ jbd2_journal_put_journal_head(jh);
++ /* Already zapped buffer? Nothing to do... */
++ if (!bh->b_bdev)
++ return 0;
+ return -EBUSY;
+ }
+ /*
+diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
+index b288c8ae1236b..6ae9d6fefb861 100644
+--- a/fs/jffs2/build.c
++++ b/fs/jffs2/build.c
+@@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
+ ic->scan_dents = NULL;
+ cond_resched();
+ }
+- jffs2_build_xattr_subsystem(c);
++ ret = jffs2_build_xattr_subsystem(c);
++ if (ret)
++ goto exit;
++
+ c->flags &= ~JFFS2_SB_FLAG_BUILDING;
+
+ dbg_fsbuild("FS build complete\n");
+@@ -415,13 +418,15 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
+ jffs2_free_ino_caches(c);
+ jffs2_free_raw_node_refs(c);
+ ret = -EIO;
+- goto out_free;
++ goto out_sum_exit;
+ }
+
+ jffs2_calc_trigger_levels(c);
+
+ return 0;
+
++ out_sum_exit:
++ jffs2_sum_exit(c);
+ out_free:
+ kvfree(c->blocks);
+
+diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
+index 4fc8cd698d1a4..97a3c09fd96b6 100644
+--- a/fs/jffs2/file.c
++++ b/fs/jffs2/file.c
+@@ -136,31 +136,25 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ struct page *pg;
+ struct inode *inode = mapping->host;
+ struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
++ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ pgoff_t index = pos >> PAGE_SHIFT;
+- uint32_t pageofs = index << PAGE_SHIFT;
+ int ret = 0;
+
+- pg = grab_cache_page_write_begin(mapping, index, flags);
+- if (!pg)
+- return -ENOMEM;
+- *pagep = pg;
+-
+ jffs2_dbg(1, "%s()\n", __func__);
+
+- if (pageofs > inode->i_size) {
+- /* Make new hole frag from old EOF to new page */
+- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
++ if (pos > inode->i_size) {
++ /* Make new hole frag from old EOF to new position */
+ struct jffs2_raw_inode ri;
+ struct jffs2_full_dnode *fn;
+ uint32_t alloc_len;
+
+- jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
+- (unsigned int)inode->i_size, pageofs);
++ jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new position\n",
++ (unsigned int)inode->i_size, (uint32_t)pos);
+
+ ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+ ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+ if (ret)
+- goto out_page;
++ goto out_err;
+
+ mutex_lock(&f->sem);
+ memset(&ri, 0, sizeof(ri));
+@@ -175,10 +169,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ ri.mode = cpu_to_jemode(inode->i_mode);
+ ri.uid = cpu_to_je16(i_uid_read(inode));
+ ri.gid = cpu_to_je16(i_gid_read(inode));
+- ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
++ ri.isize = cpu_to_je32((uint32_t)pos);
+ ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW());
+ ri.offset = cpu_to_je32(inode->i_size);
+- ri.dsize = cpu_to_je32(pageofs - inode->i_size);
++ ri.dsize = cpu_to_je32((uint32_t)pos - inode->i_size);
+ ri.csize = cpu_to_je32(0);
+ ri.compr = JFFS2_COMPR_ZERO;
+ ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
+@@ -190,7 +184,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ ret = PTR_ERR(fn);
+ jffs2_complete_reservation(c);
+ mutex_unlock(&f->sem);
+- goto out_page;
++ goto out_err;
+ }
+ ret = jffs2_add_full_dnode_to_inode(c, f, fn);
+ if (f->metadata) {
+@@ -205,13 +199,26 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ jffs2_free_full_dnode(fn);
+ jffs2_complete_reservation(c);
+ mutex_unlock(&f->sem);
+- goto out_page;
++ goto out_err;
+ }
+ jffs2_complete_reservation(c);
+- inode->i_size = pageofs;
++ inode->i_size = pos;
+ mutex_unlock(&f->sem);
+ }
+
++ /*
++ * While getting a page and reading data in, lock c->alloc_sem until
++ * the page is Uptodate. Otherwise GC task may attempt to read the same
++ * page in read_cache_page(), which causes a deadlock.
++ */
++ mutex_lock(&c->alloc_sem);
++ pg = grab_cache_page_write_begin(mapping, index, flags);
++ if (!pg) {
++ ret = -ENOMEM;
++ goto release_sem;
++ }
++ *pagep = pg;
++
+ /*
+ * Read in the page if it wasn't already present. Cannot optimize away
+ * the whole page write case until jffs2_write_end can handle the
+@@ -221,15 +228,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ mutex_lock(&f->sem);
+ ret = jffs2_do_readpage_nolock(inode, pg);
+ mutex_unlock(&f->sem);
+- if (ret)
+- goto out_page;
++ if (ret) {
++ unlock_page(pg);
++ put_page(pg);
++ goto release_sem;
++ }
+ }
+ jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
+- return ret;
+
+-out_page:
+- unlock_page(pg);
+- put_page(pg);
++release_sem:
++ mutex_unlock(&c->alloc_sem);
++out_err:
+ return ret;
+ }
+
+diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
+index 2ac410477c4f4..f83a468b64883 100644
+--- a/fs/jffs2/fs.c
++++ b/fs/jffs2/fs.c
+@@ -603,8 +603,9 @@ out_root:
+ jffs2_free_ino_caches(c);
+ jffs2_free_raw_node_refs(c);
+ kvfree(c->blocks);
+- out_inohash:
+ jffs2_clear_xattr_subsystem(c);
++ jffs2_sum_exit(c);
++ out_inohash:
+ kfree(c->inocache_list);
+ out_wbuf:
+ jffs2_flash_cleanup(c);
+diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
+index b676056826beb..29671e33a1714 100644
+--- a/fs/jffs2/scan.c
++++ b/fs/jffs2/scan.c
+@@ -136,7 +136,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
+ if (!s) {
+ JFFS2_WARNING("Can't allocate memory for summary\n");
+ ret = -ENOMEM;
+- goto out;
++ goto out_buf;
+ }
+ }
+
+@@ -275,13 +275,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
+ }
+ ret = 0;
+ out:
++ jffs2_sum_reset_collected(s);
++ kfree(s);
++ out_buf:
+ if (buf_size)
+ kfree(flashbuf);
+ #ifndef __ECOS
+ else
+ mtd_unpoint(c->mtd, 0, c->mtd->size);
+ #endif
+- kfree(s);
+ return ret;
+ }
+
+diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
+index da3e18503c658..acb4492f5970c 100644
+--- a/fs/jffs2/xattr.c
++++ b/fs/jffs2/xattr.c
+@@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
+ }
+
+ #define XREF_TMPHASH_SIZE (128)
+-void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
++int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
+ {
+ struct jffs2_xattr_ref *ref, *_ref;
+- struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE];
++ struct jffs2_xattr_ref **xref_tmphash;
+ struct jffs2_xattr_datum *xd, *_xd;
+ struct jffs2_inode_cache *ic;
+ struct jffs2_raw_node_ref *raw;
+@@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
+
+ BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
+
++ xref_tmphash = kcalloc(XREF_TMPHASH_SIZE,
++ sizeof(struct jffs2_xattr_ref *), GFP_KERNEL);
++ if (!xref_tmphash)
++ return -ENOMEM;
++
+ /* Phase.1 : Merge same xref */
+- for (i=0; i < XREF_TMPHASH_SIZE; i++)
+- xref_tmphash[i] = NULL;
+ for (ref=c->xref_temp; ref; ref=_ref) {
+ struct jffs2_xattr_ref *tmp;
+
+@@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
+ "%u of xref (%u dead, %u orphan) found.\n",
+ xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
+ xref_count, xref_dead_count, xref_orphan_count);
++ kfree(xref_tmphash);
++ return 0;
+ }
+
+ struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
+diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
+index 720007b2fd65d..1b5030a3349db 100644
+--- a/fs/jffs2/xattr.h
++++ b/fs/jffs2/xattr.h
+@@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
+ #ifdef CONFIG_JFFS2_FS_XATTR
+
+ extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
+-extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
++extern int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
+ extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c);
+
+ extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
+@@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
+ #else
+
+ #define jffs2_init_xattr_subsystem(c)
+-#define jffs2_build_xattr_subsystem(c)
++#define jffs2_build_xattr_subsystem(c) (0)
+ #define jffs2_clear_xattr_subsystem(c)
+
+ #define jffs2_xattr_do_crccheck_inode(c, ic)
+diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
+index 57ab424c05ff0..072821b50ab91 100644
+--- a/fs/jfs/inode.c
++++ b/fs/jfs/inode.c
+@@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode)
+ dquot_initialize(inode);
+
+ if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
++ struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap;
+ truncate_inode_pages_final(&inode->i_data);
+
+ if (test_cflag(COMMIT_Freewmap, inode))
+ jfs_free_zero_link(inode);
+
+- if (JFS_SBI(inode->i_sb)->ipimap)
++ if (ipimap && JFS_IP(ipimap)->i_imap)
+ diFree(inode);
+
+ /*
+diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
+index 91f4ec93dab1f..f235a3d270a01 100644
+--- a/fs/jfs/jfs_dmap.c
++++ b/fs/jfs/jfs_dmap.c
+@@ -148,13 +148,14 @@ static const s8 budtab[256] = {
+ * 0 - success
+ * -ENOMEM - insufficient memory
+ * -EIO - i/o error
++ * -EINVAL - wrong bmap data
+ */
+ int dbMount(struct inode *ipbmap)
+ {
+ struct bmap *bmp;
+ struct dbmap_disk *dbmp_le;
+ struct metapage *mp;
+- int i;
++ int i, err;
+
+ /*
+ * allocate/initialize the in-memory bmap descriptor
+@@ -169,16 +170,27 @@ int dbMount(struct inode *ipbmap)
+ BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
+ PSIZE, 0);
+ if (mp == NULL) {
+- kfree(bmp);
+- return -EIO;
++ err = -EIO;
++ goto err_kfree_bmp;
+ }
+
+ /* copy the on-disk bmap descriptor to its in-memory version. */
+ dbmp_le = (struct dbmap_disk *) mp->data;
+ bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
+ bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
++
+ bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
++ if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE) {
++ err = -EINVAL;
++ goto err_release_metapage;
++ }
++
+ bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
++ if (!bmp->db_numag) {
++ err = -EINVAL;
++ goto err_release_metapage;
++ }
++
+ bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
+ bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
+ bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
+@@ -187,6 +199,17 @@ int dbMount(struct inode *ipbmap)
+ bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
+ bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
+ bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
++ if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG ||
++ bmp->db_agl2size < 0) {
++ err = -EINVAL;
++ goto err_release_metapage;
++ }
++
++ if (((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) {
++ err = -EINVAL;
++ goto err_release_metapage;
++ }
++
+ for (i = 0; i < MAXAG; i++)
+ bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
+ bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
+@@ -207,6 +230,12 @@ int dbMount(struct inode *ipbmap)
+ BMAP_LOCK_INIT(bmp);
+
+ return (0);
++
++err_release_metapage:
++ release_metapage(mp);
++err_kfree_bmp:
++ kfree(bmp);
++ return err;
+ }
+
+
+@@ -378,7 +407,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
+ }
+
+ /* write the last buffer. */
+- write_metapage(mp);
++ if (mp)
++ write_metapage(mp);
+
+ IREAD_UNLOCK(ipbmap);
+
+@@ -1997,6 +2027,9 @@ dbAllocDmapLev(struct bmap * bmp,
+ if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
+ return -ENOSPC;
+
++ if (leafidx < 0)
++ return -EIO;
++
+ /* determine the block number within the file system corresponding
+ * to the leaf at which free space was found.
+ */
+diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
+index bb4a342a193d4..6420b6749d48f 100644
+--- a/fs/jfs/jfs_extent.c
++++ b/fs/jfs/jfs_extent.c
+@@ -508,6 +508,11 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
+ * blocks in the map. in that case, we'll start off with the
+ * maximum free.
+ */
++
++ /* give up if no space left */
++ if (bmp->db_maxfreebud == -1)
++ return -ENOSPC;
++
+ max = (s64) 1 << bmp->db_maxfreebud;
+ if (*nblocks >= max && *nblocks > nbperpage)
+ nb = nblks = (max > nbperpage) ? max : nbperpage;
+diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
+index b5d702df7111a..33ef13a0b1108 100644
+--- a/fs/jfs/jfs_filsys.h
++++ b/fs/jfs/jfs_filsys.h
+@@ -122,7 +122,9 @@
+ #define NUM_INODE_PER_IAG INOSPERIAG
+
+ #define MINBLOCKSIZE 512
++#define L2MINBLOCKSIZE 9
+ #define MAXBLOCKSIZE 4096
++#define L2MAXBLOCKSIZE 12
+ #define MAXFILESIZE ((s64)1 << 52)
+
+ #define JFS_LINK_MAX 0xffffffff
+diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
+index 5d7d7170c03c0..aa4ff7bcaff23 100644
+--- a/fs/jfs/jfs_mount.c
++++ b/fs/jfs/jfs_mount.c
+@@ -81,14 +81,14 @@ int jfs_mount(struct super_block *sb)
+ * (initialize mount inode from the superblock)
+ */
+ if ((rc = chkSuper(sb))) {
+- goto errout20;
++ goto out;
+ }
+
+ ipaimap = diReadSpecial(sb, AGGREGATE_I, 0);
+ if (ipaimap == NULL) {
+ jfs_err("jfs_mount: Failed to read AGGREGATE_I");
+ rc = -EIO;
+- goto errout20;
++ goto out;
+ }
+ sbi->ipaimap = ipaimap;
+
+@@ -99,7 +99,7 @@ int jfs_mount(struct super_block *sb)
+ */
+ if ((rc = diMount(ipaimap))) {
+ jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc);
+- goto errout21;
++ goto err_ipaimap;
+ }
+
+ /*
+@@ -108,7 +108,7 @@ int jfs_mount(struct super_block *sb)
+ ipbmap = diReadSpecial(sb, BMAP_I, 0);
+ if (ipbmap == NULL) {
+ rc = -EIO;
+- goto errout22;
++ goto err_umount_ipaimap;
+ }
+
+ jfs_info("jfs_mount: ipbmap:0x%p", ipbmap);
+@@ -120,7 +120,7 @@ int jfs_mount(struct super_block *sb)
+ */
+ if ((rc = dbMount(ipbmap))) {
+ jfs_err("jfs_mount: dbMount failed w/rc = %d", rc);
+- goto errout22;
++ goto err_ipbmap;
+ }
+
+ /*
+@@ -139,7 +139,7 @@ int jfs_mount(struct super_block *sb)
+ if (!ipaimap2) {
+ jfs_err("jfs_mount: Failed to read AGGREGATE_I");
+ rc = -EIO;
+- goto errout35;
++ goto err_umount_ipbmap;
+ }
+ sbi->ipaimap2 = ipaimap2;
+
+@@ -151,7 +151,7 @@ int jfs_mount(struct super_block *sb)
+ if ((rc = diMount(ipaimap2))) {
+ jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d",
+ rc);
+- goto errout35;
++ goto err_ipaimap2;
+ }
+ } else
+ /* Secondary aggregate inode table is not valid */
+@@ -168,7 +168,7 @@ int jfs_mount(struct super_block *sb)
+ jfs_err("jfs_mount: Failed to read FILESYSTEM_I");
+ /* open fileset secondary inode allocation map */
+ rc = -EIO;
+- goto errout40;
++ goto err_umount_ipaimap2;
+ }
+ jfs_info("jfs_mount: ipimap:0x%p", ipimap);
+
+@@ -178,41 +178,34 @@ int jfs_mount(struct super_block *sb)
+ /* initialize fileset inode allocation map */
+ if ((rc = diMount(ipimap))) {
+ jfs_err("jfs_mount: diMount failed w/rc = %d", rc);
+- goto errout41;
++ goto err_ipimap;
+ }
+
+- goto out;
++ return rc;
+
+ /*
+ * unwind on error
+ */
+- errout41: /* close fileset inode allocation map inode */
++err_ipimap:
++ /* close fileset inode allocation map inode */
+ diFreeSpecial(ipimap);
+-
+- errout40: /* fileset closed */
+-
++err_umount_ipaimap2:
+ /* close secondary aggregate inode allocation map */
+- if (ipaimap2) {
++ if (ipaimap2)
+ diUnmount(ipaimap2, 1);
++err_ipaimap2:
++ /* close aggregate inodes */
++ if (ipaimap2)
+ diFreeSpecial(ipaimap2);
+- }
+-
+- errout35:
+-
+- /* close aggregate block allocation map */
++err_umount_ipbmap: /* close aggregate block allocation map */
+ dbUnmount(ipbmap, 1);
++err_ipbmap: /* close aggregate inodes */
+ diFreeSpecial(ipbmap);
+-
+- errout22: /* close aggregate inode allocation map */
+-
++err_umount_ipaimap: /* close aggregate inode allocation map */
+ diUnmount(ipaimap, 1);
+-
+- errout21: /* close aggregate inodes */
++err_ipaimap: /* close aggregate inodes */
+ diFreeSpecial(ipaimap);
+- errout20: /* aggregate closed */
+-
+- out:
+-
++out:
+ if (rc)
+ jfs_err("Mount JFS Failure: %d", rc);
+
+diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
+index 042bbe6d8ac2e..6c8680d3907af 100644
+--- a/fs/jfs/jfs_txnmgr.c
++++ b/fs/jfs/jfs_txnmgr.c
+@@ -354,6 +354,11 @@ tid_t txBegin(struct super_block *sb, int flag)
+ jfs_info("txBegin: flag = 0x%x", flag);
+ log = JFS_SBI(sb)->log;
+
++ if (!log) {
++ jfs_error(sb, "read-only filesystem\n");
++ return 0;
++ }
++
+ TXN_LOCK();
+
+ INCREMENT(TxStat.txBegin);
+diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
+index 9db4f5789c0ec..b3a0fe0649c49 100644
+--- a/fs/jfs/namei.c
++++ b/fs/jfs/namei.c
+@@ -799,6 +799,11 @@ static int jfs_link(struct dentry *old_dentry,
+ if (rc)
+ goto out;
+
++ if (isReadOnly(ip)) {
++ jfs_error(ip->i_sb, "read-only filesystem\n");
++ return -EROFS;
++ }
++
+ tid = txBegin(ip->i_sb, 0);
+
+ mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
+@@ -946,7 +951,7 @@ static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip,
+ if (ssize <= IDATASIZE) {
+ ip->i_op = &jfs_fast_symlink_inode_operations;
+
+- ip->i_link = JFS_IP(ip)->i_inline;
++ ip->i_link = JFS_IP(ip)->i_inline_all;
+ memcpy(ip->i_link, name, ssize);
+ ip->i_size = ssize - 1;
+
+diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
+index 8e0a1378a4b1f..6a1cb2a182b67 100644
+--- a/fs/kernfs/dir.c
++++ b/fs/kernfs/dir.c
+@@ -19,7 +19,15 @@
+
+ DECLARE_RWSEM(kernfs_rwsem);
+ static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
+-static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
++/*
++ * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
++ * call pr_cont() while holding rename_lock. Because sometimes pr_cont()
++ * will perform wakeups when releasing console_sem. Holding rename_lock
++ * will introduce deadlock if the scheduler reads the kernfs_name in the
++ * wakeup path.
++ */
++static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
++static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */
+ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
+
+ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
+@@ -230,12 +238,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&kernfs_rename_lock, flags);
++ spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
+
+- kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
++ kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
+ pr_cont("%s", kernfs_pr_cont_buf);
+
+- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
++ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
+ }
+
+ /**
+@@ -249,10 +257,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
+ unsigned long flags;
+ int sz;
+
+- spin_lock_irqsave(&kernfs_rename_lock, flags);
++ spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
+
+- sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
+- sizeof(kernfs_pr_cont_buf));
++ sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf,
++ sizeof(kernfs_pr_cont_buf));
+ if (sz < 0) {
+ pr_cont("(error)");
+ goto out;
+@@ -266,7 +274,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
+ pr_cont("%s", kernfs_pr_cont_buf);
+
+ out:
+- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
++ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
+ }
+
+ /**
+@@ -629,7 +637,9 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
+ return kn;
+
+ err_out3:
++ spin_lock(&kernfs_idr_lock);
+ idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
++ spin_unlock(&kernfs_idr_lock);
+ err_out2:
+ kmem_cache_free(kernfs_node_cache, kn);
+ err_out1:
+@@ -822,13 +832,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
+
+ lockdep_assert_held_read(&kernfs_rwsem);
+
+- /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
+- spin_lock_irq(&kernfs_rename_lock);
++ spin_lock_irq(&kernfs_pr_cont_lock);
+
+ len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
+
+ if (len >= sizeof(kernfs_pr_cont_buf)) {
+- spin_unlock_irq(&kernfs_rename_lock);
++ spin_unlock_irq(&kernfs_pr_cont_lock);
+ return NULL;
+ }
+
+@@ -840,7 +849,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
+ parent = kernfs_find_ns(parent, name, ns);
+ }
+
+- spin_unlock_irq(&kernfs_rename_lock);
++ spin_unlock_irq(&kernfs_pr_cont_lock);
+
+ return parent;
+ }
+@@ -1540,8 +1549,11 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
+ down_write(&kernfs_rwsem);
+
+ kn = kernfs_find_ns(parent, name, ns);
+- if (kn)
++ if (kn) {
++ kernfs_get(kn);
+ __kernfs_remove(kn);
++ kernfs_put(kn);
++ }
+
+ up_write(&kernfs_rwsem);
+
+diff --git a/fs/ksmbd/Kconfig b/fs/ksmbd/Kconfig
+index b83cbd756ae50..6af339cfdc041 100644
+--- a/fs/ksmbd/Kconfig
++++ b/fs/ksmbd/Kconfig
+@@ -19,6 +19,7 @@ config SMB_SERVER
+ select CRYPTO_GCM
+ select ASN1
+ select OID_REGISTRY
++ select CRC32
+ default n
+ help
+ Choose Y here if you want to allow SMB3 compliant clients
+diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
+index 30a92ddc18174..59d2059467465 100644
+--- a/fs/ksmbd/auth.c
++++ b/fs/ksmbd/auth.c
+@@ -120,8 +120,8 @@ out:
+ return rc;
+ }
+
+-static int calc_ntlmv2_hash(struct ksmbd_session *sess, char *ntlmv2_hash,
+- char *dname)
++static int calc_ntlmv2_hash(struct ksmbd_conn *conn, struct ksmbd_session *sess,
++ char *ntlmv2_hash, char *dname)
+ {
+ int ret, len, conv_len;
+ wchar_t *domain = NULL;
+@@ -157,7 +157,7 @@ static int calc_ntlmv2_hash(struct ksmbd_session *sess, char *ntlmv2_hash,
+ }
+
+ conv_len = smb_strtoUTF16(uniname, user_name(sess->user), len,
+- sess->conn->local_nls);
++ conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ ret = -EINVAL;
+ goto out;
+@@ -181,7 +181,7 @@ static int calc_ntlmv2_hash(struct ksmbd_session *sess, char *ntlmv2_hash,
+ }
+
+ conv_len = smb_strtoUTF16((__le16 *)domain, dname, len,
+- sess->conn->local_nls);
++ conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ ret = -EINVAL;
+ goto out;
+@@ -214,27 +214,28 @@ out:
+ *
+ * Return: 0 on success, error number on error
+ */
+-int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+- int blen, char *domain_name)
++int ksmbd_auth_ntlmv2(struct ksmbd_conn *conn, struct ksmbd_session *sess,
++ struct ntlmv2_resp *ntlmv2, int blen, char *domain_name,
++ char *cryptkey)
+ {
+ char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+ char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE];
+- struct ksmbd_crypto_ctx *ctx;
++ struct ksmbd_crypto_ctx *ctx = NULL;
+ char *construct = NULL;
+ int rc, len;
+
++ rc = calc_ntlmv2_hash(conn, sess, ntlmv2_hash, domain_name);
++ if (rc) {
++ ksmbd_debug(AUTH, "could not get v2 hash rc %d\n", rc);
++ goto out;
++ }
++
+ ctx = ksmbd_crypto_ctx_find_hmacmd5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+- rc = calc_ntlmv2_hash(sess, ntlmv2_hash, domain_name);
+- if (rc) {
+- ksmbd_debug(AUTH, "could not get v2 hash rc %d\n", rc);
+- goto out;
+- }
+-
+ rc = crypto_shash_setkey(CRYPTO_HMACMD5_TFM(ctx),
+ ntlmv2_hash,
+ CIFS_HMAC_MD5_HASH_SIZE);
+@@ -256,7 +257,7 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+ goto out;
+ }
+
+- memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE);
++ memcpy(construct, cryptkey, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen);
+
+ rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len);
+@@ -270,6 +271,8 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+ ksmbd_debug(AUTH, "Could not generate md5 hash\n");
+ goto out;
+ }
++ ksmbd_release_crypto_ctx(ctx);
++ ctx = NULL;
+
+ rc = ksmbd_gen_sess_key(sess, ntlmv2_hash, ntlmv2_rsp);
+ if (rc) {
+@@ -280,7 +283,8 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+ if (memcmp(ntlmv2->ntlmv2_hash, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE) != 0)
+ rc = -EINVAL;
+ out:
+- ksmbd_release_crypto_ctx(ctx);
++ if (ctx)
++ ksmbd_release_crypto_ctx(ctx);
+ kfree(construct);
+ return rc;
+ }
+@@ -295,7 +299,8 @@ out:
+ * Return: 0 on success, error number on error
+ */
+ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+- int blob_len, struct ksmbd_session *sess)
++ int blob_len, struct ksmbd_conn *conn,
++ struct ksmbd_session *sess)
+ {
+ char *domain_name;
+ unsigned int nt_off, dn_off;
+@@ -319,21 +324,23 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+ dn_off = le32_to_cpu(authblob->DomainName.BufferOffset);
+ dn_len = le16_to_cpu(authblob->DomainName.Length);
+
+- if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len)
++ if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len ||
++ nt_len < CIFS_ENCPWD_SIZE)
+ return -EINVAL;
+
+ /* TODO : use domain name that imported from configuration file */
+ domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off,
+- dn_len, true, sess->conn->local_nls);
++ dn_len, true, conn->local_nls);
+ if (IS_ERR(domain_name))
+ return PTR_ERR(domain_name);
+
+ /* process NTLMv2 authentication */
+ ksmbd_debug(AUTH, "decode_ntlmssp_authenticate_blob dname%s\n",
+ domain_name);
+- ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off),
++ ret = ksmbd_auth_ntlmv2(conn, sess,
++ (struct ntlmv2_resp *)((char *)authblob + nt_off),
+ nt_len - CIFS_ENCPWD_SIZE,
+- domain_name);
++ domain_name, conn->ntlmssp.cryptkey);
+ kfree(domain_name);
+ return ret;
+ }
+@@ -347,7 +354,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+ *
+ */
+ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+- int blob_len, struct ksmbd_session *sess)
++ int blob_len, struct ksmbd_conn *conn)
+ {
+ if (blob_len < sizeof(struct negotiate_message)) {
+ ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
+@@ -361,7 +368,7 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+ return -EINVAL;
+ }
+
+- sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
++ conn->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
+ return 0;
+ }
+
+@@ -375,14 +382,14 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+ */
+ unsigned int
+ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+- struct ksmbd_session *sess)
++ struct ksmbd_conn *conn)
+ {
+ struct target_info *tinfo;
+ wchar_t *name;
+ __u8 *target_name;
+ unsigned int flags, blob_off, blob_len, type, target_info_len = 0;
+ int len, uni_len, conv_len;
+- int cflags = sess->ntlmssp.client_flags;
++ int cflags = conn->ntlmssp.client_flags;
+
+ memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8);
+ chgblob->MessageType = NtLmChallenge;
+@@ -403,7 +410,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+ if (cflags & NTLMSSP_REQUEST_TARGET)
+ flags |= NTLMSSP_REQUEST_TARGET;
+
+- if (sess->conn->use_spnego &&
++ if (conn->use_spnego &&
+ (cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
+ flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+
+@@ -414,7 +421,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+ return -ENOMEM;
+
+ conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len,
+- sess->conn->local_nls);
++ conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ kfree(name);
+ return -EINVAL;
+@@ -430,8 +437,8 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+ chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off);
+
+ /* Initialize random conn challenge */
+- get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64));
+- memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey,
++ get_random_bytes(conn->ntlmssp.cryptkey, sizeof(__u64));
++ memcpy(chgblob->Challenge, conn->ntlmssp.cryptkey,
+ CIFS_CRYPTO_KEY_SIZE);
+
+ /* Add Target Information to security buffer */
+@@ -631,8 +638,9 @@ struct derivation {
+ bool binding;
+ };
+
+-static int generate_key(struct ksmbd_session *sess, struct kvec label,
+- struct kvec context, __u8 *key, unsigned int key_size)
++static int generate_key(struct ksmbd_conn *conn, struct ksmbd_session *sess,
++ struct kvec label, struct kvec context, __u8 *key,
++ unsigned int key_size)
+ {
+ unsigned char zero = 0x0;
+ __u8 i[4] = {0, 0, 0, 1};
+@@ -692,8 +700,8 @@ static int generate_key(struct ksmbd_session *sess, struct kvec label,
+ goto smb3signkey_ret;
+ }
+
+- if (sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+- sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
++ if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
++ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L256, 4);
+ else
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L128, 4);
+@@ -728,17 +736,17 @@ static int generate_smb3signingkey(struct ksmbd_session *sess,
+ if (!chann)
+ return 0;
+
+- if (sess->conn->dialect >= SMB30_PROT_ID && signing->binding)
++ if (conn->dialect >= SMB30_PROT_ID && signing->binding)
+ key = chann->smb3signingkey;
+ else
+ key = sess->smb3signingkey;
+
+- rc = generate_key(sess, signing->label, signing->context, key,
++ rc = generate_key(conn, sess, signing->label, signing->context, key,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+- if (!(sess->conn->dialect >= SMB30_PROT_ID && signing->binding))
++ if (!(conn->dialect >= SMB30_PROT_ID && signing->binding))
+ memcpy(chann->smb3signingkey, key, SMB3_SIGN_KEY_SIZE);
+
+ ksmbd_debug(AUTH, "dumping generated AES signing keys\n");
+@@ -792,30 +800,31 @@ struct derivation_twin {
+ struct derivation decryption;
+ };
+
+-static int generate_smb3encryptionkey(struct ksmbd_session *sess,
++static int generate_smb3encryptionkey(struct ksmbd_conn *conn,
++ struct ksmbd_session *sess,
+ const struct derivation_twin *ptwin)
+ {
+ int rc;
+
+- rc = generate_key(sess, ptwin->encryption.label,
++ rc = generate_key(conn, sess, ptwin->encryption.label,
+ ptwin->encryption.context, sess->smb3encryptionkey,
+ SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
+
+- rc = generate_key(sess, ptwin->decryption.label,
++ rc = generate_key(conn, sess, ptwin->decryption.label,
+ ptwin->decryption.context,
+ sess->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ ksmbd_debug(AUTH, "dumping generated AES encryption keys\n");
+- ksmbd_debug(AUTH, "Cipher type %d\n", sess->conn->cipher_type);
++ ksmbd_debug(AUTH, "Cipher type %d\n", conn->cipher_type);
+ ksmbd_debug(AUTH, "Session Id %llu\n", sess->id);
+ ksmbd_debug(AUTH, "Session Key %*ph\n",
+ SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key);
+- if (sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+- sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) {
++ if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
++ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) {
+ ksmbd_debug(AUTH, "ServerIn Key %*ph\n",
+ SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3encryptionkey);
+ ksmbd_debug(AUTH, "ServerOut Key %*ph\n",
+@@ -829,7 +838,8 @@ static int generate_smb3encryptionkey(struct ksmbd_session *sess,
+ return 0;
+ }
+
+-int ksmbd_gen_smb30_encryptionkey(struct ksmbd_session *sess)
++int ksmbd_gen_smb30_encryptionkey(struct ksmbd_conn *conn,
++ struct ksmbd_session *sess)
+ {
+ struct derivation_twin twin;
+ struct derivation *d;
+@@ -846,10 +856,11 @@ int ksmbd_gen_smb30_encryptionkey(struct ksmbd_session *sess)
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+- return generate_smb3encryptionkey(sess, &twin);
++ return generate_smb3encryptionkey(conn, sess, &twin);
+ }
+
+-int ksmbd_gen_smb311_encryptionkey(struct ksmbd_session *sess)
++int ksmbd_gen_smb311_encryptionkey(struct ksmbd_conn *conn,
++ struct ksmbd_session *sess)
+ {
+ struct derivation_twin twin;
+ struct derivation *d;
+@@ -866,7 +877,7 @@ int ksmbd_gen_smb311_encryptionkey(struct ksmbd_session *sess)
+ d->context.iov_base = sess->Preauth_HashValue;
+ d->context.iov_len = 64;
+
+- return generate_smb3encryptionkey(sess, &twin);
++ return generate_smb3encryptionkey(conn, sess, &twin);
+ }
+
+ int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
+diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
+index 9c2d4badd05d1..25b772653de0a 100644
+--- a/fs/ksmbd/auth.h
++++ b/fs/ksmbd/auth.h
+@@ -38,16 +38,17 @@ struct kvec;
+ int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+ unsigned int nvec, int enc);
+ void ksmbd_copy_gss_neg_header(void *buf);
+-int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf);
+-int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+- int blen, char *domain_name);
++int ksmbd_auth_ntlmv2(struct ksmbd_conn *conn, struct ksmbd_session *sess,
++ struct ntlmv2_resp *ntlmv2, int blen, char *domain_name,
++ char *cryptkey);
+ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+- int blob_len, struct ksmbd_session *sess);
++ int blob_len, struct ksmbd_conn *conn,
++ struct ksmbd_session *sess);
+ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+- int blob_len, struct ksmbd_session *sess);
++ int blob_len, struct ksmbd_conn *conn);
+ unsigned int
+ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+- struct ksmbd_session *sess);
++ struct ksmbd_conn *conn);
+ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
+ int in_len, char *out_blob, int *out_len);
+ int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+@@ -58,8 +59,10 @@ int ksmbd_gen_smb30_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+ int ksmbd_gen_smb311_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+-int ksmbd_gen_smb30_encryptionkey(struct ksmbd_session *sess);
+-int ksmbd_gen_smb311_encryptionkey(struct ksmbd_session *sess);
++int ksmbd_gen_smb30_encryptionkey(struct ksmbd_conn *conn,
++ struct ksmbd_session *sess);
++int ksmbd_gen_smb311_encryptionkey(struct ksmbd_conn *conn,
++ struct ksmbd_session *sess);
+ int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
+ __u8 *pi_hash);
+ int ksmbd_gen_sd_hash(struct ksmbd_conn *conn, char *sd_buf, int len,
+diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
+index b57a0d8a392ff..cab274b77727f 100644
+--- a/fs/ksmbd/connection.c
++++ b/fs/ksmbd/connection.c
+@@ -36,6 +36,7 @@ void ksmbd_conn_free(struct ksmbd_conn *conn)
+ list_del(&conn->conns_list);
+ write_unlock(&conn_list_lock);
+
++ xa_destroy(&conn->sessions);
+ kvfree(conn->request_buf);
+ kfree(conn->preauth_info);
+ kfree(conn);
+@@ -62,15 +63,16 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
+ atomic_set(&conn->req_running, 0);
+ atomic_set(&conn->r_count, 0);
+ conn->total_credits = 1;
++ conn->outstanding_credits = 0;
+
+ init_waitqueue_head(&conn->req_running_q);
+ INIT_LIST_HEAD(&conn->conns_list);
+- INIT_LIST_HEAD(&conn->sessions);
+ INIT_LIST_HEAD(&conn->requests);
+ INIT_LIST_HEAD(&conn->async_requests);
+ spin_lock_init(&conn->request_lock);
+ spin_lock_init(&conn->credits_lock);
+ ida_init(&conn->async_ida);
++ xa_init(&conn->sessions);
+
+ spin_lock_init(&conn->llist_lock);
+ INIT_LIST_HEAD(&conn->lock_list);
+@@ -273,7 +275,7 @@ int ksmbd_conn_handler_loop(void *p)
+ {
+ struct ksmbd_conn *conn = (struct ksmbd_conn *)p;
+ struct ksmbd_transport *t = conn->transport;
+- unsigned int pdu_size;
++ unsigned int pdu_size, max_allowed_pdu_size;
+ char hdr_buf[4] = {0,};
+ int size;
+
+@@ -291,27 +293,38 @@ int ksmbd_conn_handler_loop(void *p)
+ kvfree(conn->request_buf);
+ conn->request_buf = NULL;
+
+- size = t->ops->read(t, hdr_buf, sizeof(hdr_buf));
++ size = t->ops->read(t, hdr_buf, sizeof(hdr_buf), -1);
+ if (size != sizeof(hdr_buf))
+ break;
+
+ pdu_size = get_rfc1002_len(hdr_buf);
+ ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size);
+
++ if (conn->status == KSMBD_SESS_GOOD)
++ max_allowed_pdu_size =
++ SMB3_MAX_MSGSIZE + conn->vals->max_write_size;
++ else
++ max_allowed_pdu_size = SMB3_MAX_MSGSIZE;
++
++ if (pdu_size > max_allowed_pdu_size) {
++ pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n",
++ pdu_size, max_allowed_pdu_size,
++ conn->status);
++ break;
++ }
++
+ /*
+- * Check if pdu size is valid (min : smb header size,
+- * max : 0x00FFFFFF).
++ * Check maximum pdu size(0x00FFFFFF).
+ */
+- if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE ||
+- pdu_size > MAX_STREAM_PROT_LEN) {
+- continue;
+- }
++ if (pdu_size > MAX_STREAM_PROT_LEN)
++ break;
+
+ /* 4 for rfc1002 length field */
+- size = pdu_size + 4;
++ /* 1 for implied bcc[0] */
++ size = pdu_size + 4 + 1;
+ conn->request_buf = kvmalloc(size, GFP_KERNEL);
+ if (!conn->request_buf)
+- continue;
++ break;
+
+ memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf));
+ if (!ksmbd_smb_request(conn))
+@@ -321,7 +334,7 @@ int ksmbd_conn_handler_loop(void *p)
+ * We already read 4 bytes to find out PDU size, now
+ * read in PDU
+ */
+- size = t->ops->read(t, conn->request_buf + 4, pdu_size);
++ size = t->ops->read(t, conn->request_buf + 4, pdu_size, 2);
+ if (size < 0) {
+ pr_err("sock_read failed: %d\n", size);
+ break;
+diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
+index e5403c587a58c..89eb41bbd1601 100644
+--- a/fs/ksmbd/connection.h
++++ b/fs/ksmbd/connection.h
+@@ -20,13 +20,6 @@
+
+ #define KSMBD_SOCKET_BACKLOG 16
+
+-/*
+- * WARNING
+- *
+- * This is nothing but a HACK. Session status should move to channel
+- * or to session. As of now we have 1 tcp_conn : 1 ksmbd_session, but
+- * we need to change it to 1 tcp_conn : N ksmbd_sessions.
+- */
+ enum {
+ KSMBD_SESS_NEW = 0,
+ KSMBD_SESS_GOOD,
+@@ -55,14 +48,14 @@ struct ksmbd_conn {
+ struct nls_table *local_nls;
+ struct list_head conns_list;
+ /* smb session 1 per user */
+- struct list_head sessions;
++ struct xarray sessions;
+ unsigned long last_active;
+ /* How many request are running currently */
+ atomic_t req_running;
+ /* References which are made for this Server object*/
+ atomic_t r_count;
+- unsigned short total_credits;
+- unsigned short max_credits;
++ unsigned int total_credits;
++ unsigned int outstanding_credits;
+ spinlock_t credits_lock;
+ wait_queue_head_t req_running_q;
+ /* Lock to protect requests list*/
+@@ -72,12 +65,7 @@ struct ksmbd_conn {
+ int connection_type;
+ struct ksmbd_stats stats;
+ char ClientGUID[SMB2_CLIENT_GUID_SIZE];
+- union {
+- /* pending trans request table */
+- struct trans_state *recent_trans;
+- /* Used by ntlmssp */
+- char *ntlmssp_cryptkey;
+- };
++ struct ntlmssp_auth ntlmssp;
+
+ spinlock_t llist_lock;
+ struct list_head lock_list;
+@@ -122,7 +110,8 @@ struct ksmbd_conn_ops {
+ struct ksmbd_transport_ops {
+ int (*prepare)(struct ksmbd_transport *t);
+ void (*disconnect)(struct ksmbd_transport *t);
+- int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size);
++ int (*read)(struct ksmbd_transport *t, char *buf,
++ unsigned int size, int max_retries);
+ int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
+ int size, bool need_invalidate_rkey,
+ unsigned int remote_key);
+diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
+index c6718a05d347f..fae859d59c792 100644
+--- a/fs/ksmbd/ksmbd_netlink.h
++++ b/fs/ksmbd/ksmbd_netlink.h
+@@ -103,6 +103,10 @@ struct ksmbd_startup_request {
+ * we set the SPARSE_FILES bit (0x40).
+ */
+ __u32 sub_auth[3]; /* Subauth value for Security ID */
++ __u32 smb2_max_credits; /* MAX credits */
++ __u32 smbd_max_io_size; /* smbd read write size */
++ __u32 max_connections; /* Number of maximum simultaneous connections */
++ __u32 reserved[126]; /* Reserved room */
+ __u32 ifc_list_sz; /* interfaces list size */
+ __s8 ____payload[];
+ };
+@@ -113,7 +117,7 @@ struct ksmbd_startup_request {
+ * IPC request to shutdown ksmbd server.
+ */
+ struct ksmbd_shutdown_request {
+- __s32 reserved;
++ __s32 reserved[16];
+ };
+
+ /*
+@@ -122,6 +126,7 @@ struct ksmbd_shutdown_request {
+ struct ksmbd_login_request {
+ __u32 handle;
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
++ __u32 reserved[16]; /* Reserved room */
+ };
+
+ /*
+@@ -135,6 +140,7 @@ struct ksmbd_login_response {
+ __u16 status;
+ __u16 hash_sz; /* hash size */
+ __s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */
++ __u32 reserved[16]; /* Reserved room */
+ };
+
+ /*
+@@ -143,6 +149,7 @@ struct ksmbd_login_response {
+ struct ksmbd_share_config_request {
+ __u32 handle;
+ __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */
++ __u32 reserved[16]; /* Reserved room */
+ };
+
+ /*
+@@ -157,6 +164,7 @@ struct ksmbd_share_config_response {
+ __u16 force_directory_mode;
+ __u16 force_uid;
+ __u16 force_gid;
++ __u32 reserved[128]; /* Reserved room */
+ __u32 veto_list_sz;
+ __s8 ____payload[];
+ };
+@@ -187,6 +195,7 @@ struct ksmbd_tree_connect_request {
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ];
+ __s8 share[KSMBD_REQ_MAX_SHARE_NAME];
+ __s8 peer_addr[64];
++ __u32 reserved[16]; /* Reserved room */
+ };
+
+ /*
+@@ -196,6 +205,7 @@ struct ksmbd_tree_connect_response {
+ __u32 handle;
+ __u16 status;
+ __u16 connection_flags;
++ __u32 reserved[16]; /* Reserved room */
+ };
+
+ /*
+@@ -204,6 +214,7 @@ struct ksmbd_tree_connect_response {
+ struct ksmbd_tree_disconnect_request {
+ __u64 session_id; /* session id */
+ __u64 connect_id; /* tree connection id */
++ __u32 reserved[16]; /* Reserved room */
+ };
+
+ /*
+@@ -212,6 +223,7 @@ struct ksmbd_tree_disconnect_request {
+ struct ksmbd_logout_request {
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+ __u32 account_flags;
++ __u32 reserved[16]; /* Reserved room */
+ };
+
+ /*
+diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c
+index 0d28e723a28c7..dd262daa2c4a5 100644
+--- a/fs/ksmbd/mgmt/tree_connect.c
++++ b/fs/ksmbd/mgmt/tree_connect.c
+@@ -16,9 +16,10 @@
+ #include "user_session.h"
+
+ struct ksmbd_tree_conn_status
+-ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name)
++ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
++ char *share_name)
+ {
+- struct ksmbd_tree_conn_status status = {-EINVAL, NULL};
++ struct ksmbd_tree_conn_status status = {-ENOENT, NULL};
+ struct ksmbd_tree_connect_response *resp = NULL;
+ struct ksmbd_share_config *sc;
+ struct ksmbd_tree_connect *tree_conn = NULL;
+@@ -41,7 +42,7 @@ ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name)
+ goto out_error;
+ }
+
+- peer_addr = KSMBD_TCP_PEER_SOCKADDR(sess->conn);
++ peer_addr = KSMBD_TCP_PEER_SOCKADDR(conn);
+ resp = ksmbd_ipc_tree_connect_request(sess,
+ sc,
+ tree_conn,
+diff --git a/fs/ksmbd/mgmt/tree_connect.h b/fs/ksmbd/mgmt/tree_connect.h
+index 18e2a996e0aab..71e50271dccf0 100644
+--- a/fs/ksmbd/mgmt/tree_connect.h
++++ b/fs/ksmbd/mgmt/tree_connect.h
+@@ -12,6 +12,7 @@
+
+ struct ksmbd_share_config;
+ struct ksmbd_user;
++struct ksmbd_conn;
+
+ struct ksmbd_tree_connect {
+ int id;
+@@ -40,7 +41,8 @@ static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn,
+ struct ksmbd_session;
+
+ struct ksmbd_tree_conn_status
+-ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name);
++ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
++ char *share_name);
+
+ int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
+ struct ksmbd_tree_connect *tree_conn);
+diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c
+index 1019d3677d553..279d00feff216 100644
+--- a/fs/ksmbd/mgmt/user_config.c
++++ b/fs/ksmbd/mgmt/user_config.c
+@@ -67,3 +67,13 @@ int ksmbd_anonymous_user(struct ksmbd_user *user)
+ return 1;
+ return 0;
+ }
++
++bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2)
++{
++ if (strcmp(u1->name, u2->name))
++ return false;
++ if (memcmp(u1->passkey, u2->passkey, u1->passkey_sz))
++ return false;
++
++ return true;
++}
+diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h
+index aff80b0295790..6a44109617f14 100644
+--- a/fs/ksmbd/mgmt/user_config.h
++++ b/fs/ksmbd/mgmt/user_config.h
+@@ -64,4 +64,5 @@ struct ksmbd_user *ksmbd_login_user(const char *account);
+ struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp);
+ void ksmbd_free_user(struct ksmbd_user *user);
+ int ksmbd_anonymous_user(struct ksmbd_user *user);
++bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2);
+ #endif /* __USER_CONFIG_MANAGEMENT_H__ */
+diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c
+index 8d8ffd8c6f192..92b1603b5abeb 100644
+--- a/fs/ksmbd/mgmt/user_session.c
++++ b/fs/ksmbd/mgmt/user_session.c
+@@ -32,11 +32,13 @@ static void free_channel_list(struct ksmbd_session *sess)
+ {
+ struct channel *chann, *tmp;
+
++ write_lock(&sess->chann_lock);
+ list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
+ chann_list) {
+ list_del(&chann->chann_list);
+ kfree(chann);
+ }
++ write_unlock(&sess->chann_lock);
+ }
+
+ static void __session_rpc_close(struct ksmbd_session *sess,
+@@ -106,15 +108,17 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
+ entry->method = method;
+ entry->id = ksmbd_ipc_id_alloc();
+ if (entry->id < 0)
+- goto error;
++ goto free_entry;
+
+ resp = ksmbd_rpc_open(sess, entry->id);
+ if (!resp)
+- goto error;
++ goto free_id;
+
+ kvfree(resp);
+ return entry->id;
+-error:
++free_id:
++ ksmbd_rpc_id_free(entry->id);
++free_entry:
+ list_del(&entry->list);
+ kfree(entry);
+ return -EINVAL;
+@@ -149,11 +153,6 @@ void ksmbd_session_destroy(struct ksmbd_session *sess)
+ if (!sess)
+ return;
+
+- if (!atomic_dec_and_test(&sess->refcnt))
+- return;
+-
+- list_del(&sess->sessions_entry);
+-
+ down_write(&sessions_table_lock);
+ hash_del(&sess->hlist);
+ up_write(&sessions_table_lock);
+@@ -181,53 +180,70 @@ static struct ksmbd_session *__session_lookup(unsigned long long id)
+ return NULL;
+ }
+
+-void ksmbd_session_register(struct ksmbd_conn *conn,
+- struct ksmbd_session *sess)
++int ksmbd_session_register(struct ksmbd_conn *conn,
++ struct ksmbd_session *sess)
+ {
+- sess->conn = conn;
+- list_add(&sess->sessions_entry, &conn->sessions);
++ sess->dialect = conn->dialect;
++ memcpy(sess->ClientGUID, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
++ return xa_err(xa_store(&conn->sessions, sess->id, sess, GFP_KERNEL));
+ }
+
+-void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
++static int ksmbd_chann_del(struct ksmbd_conn *conn, struct ksmbd_session *sess)
+ {
+- struct ksmbd_session *sess;
+-
+- while (!list_empty(&conn->sessions)) {
+- sess = list_entry(conn->sessions.next,
+- struct ksmbd_session,
+- sessions_entry);
++ struct channel *chann, *tmp;
+
+- ksmbd_session_destroy(sess);
++ write_lock(&sess->chann_lock);
++ list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
++ chann_list) {
++ if (chann->conn == conn) {
++ list_del(&chann->chann_list);
++ kfree(chann);
++ write_unlock(&sess->chann_lock);
++ return 0;
++ }
+ }
+-}
++ write_unlock(&sess->chann_lock);
+
+-static bool ksmbd_session_id_match(struct ksmbd_session *sess,
+- unsigned long long id)
+-{
+- return sess->id == id;
++ return -ENOENT;
+ }
+
+-struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
+- unsigned long long id)
++void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
+ {
+- struct ksmbd_session *sess = NULL;
++ struct ksmbd_session *sess;
+
+- list_for_each_entry(sess, &conn->sessions, sessions_entry) {
+- if (ksmbd_session_id_match(sess, id))
+- return sess;
++ if (conn->binding) {
++ int bkt;
++
++ down_write(&sessions_table_lock);
++ hash_for_each(sessions_table, bkt, sess, hlist) {
++ if (!ksmbd_chann_del(conn, sess)) {
++ up_write(&sessions_table_lock);
++ goto sess_destroy;
++ }
++ }
++ up_write(&sessions_table_lock);
++ } else {
++ unsigned long id;
++
++ xa_for_each(&conn->sessions, id, sess) {
++ if (!ksmbd_chann_del(conn, sess))
++ goto sess_destroy;
++ }
+ }
+- return NULL;
+-}
+
+-int get_session(struct ksmbd_session *sess)
+-{
+- return atomic_inc_not_zero(&sess->refcnt);
++ return;
++
++sess_destroy:
++ if (list_empty(&sess->ksmbd_chann_list)) {
++ xa_erase(&conn->sessions, sess->id);
++ ksmbd_session_destroy(sess);
++ }
+ }
+
+-void put_session(struct ksmbd_session *sess)
++struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
++ unsigned long long id)
+ {
+- if (atomic_dec_and_test(&sess->refcnt))
+- pr_err("get/%s seems to be mismatched.", __func__);
++ return xa_load(&conn->sessions, id);
+ }
+
+ struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
+@@ -236,10 +252,6 @@ struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
+
+ down_read(&sessions_table_lock);
+ sess = __session_lookup(id);
+- if (sess) {
+- if (!get_session(sess))
+- sess = NULL;
+- }
+ up_read(&sessions_table_lock);
+
+ return sess;
+@@ -253,6 +265,8 @@ struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
+ sess = ksmbd_session_lookup(conn, id);
+ if (!sess && conn->binding)
+ sess = ksmbd_session_lookup_slowpath(id);
++ if (sess && sess->state != SMB2_SESSION_VALID)
++ sess = NULL;
+ return sess;
+ }
+
+@@ -314,12 +328,11 @@ static struct ksmbd_session *__session_create(int protocol)
+ goto error;
+
+ set_session_flag(sess, protocol);
+- INIT_LIST_HEAD(&sess->sessions_entry);
+ xa_init(&sess->tree_conns);
+ INIT_LIST_HEAD(&sess->ksmbd_chann_list);
+ INIT_LIST_HEAD(&sess->rpc_handle_list);
+ sess->sequence_number = 1;
+- atomic_set(&sess->refcnt, 1);
++ rwlock_init(&sess->chann_lock);
+
+ switch (protocol) {
+ case CIFDS_SESSION_FLAG_SMB2:
+diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
+index 82289c3cbd2bc..8934b8ee275ba 100644
+--- a/fs/ksmbd/mgmt/user_session.h
++++ b/fs/ksmbd/mgmt/user_session.h
+@@ -33,8 +33,10 @@ struct preauth_session {
+ struct ksmbd_session {
+ u64 id;
+
++ __u16 dialect;
++ char ClientGUID[SMB2_CLIENT_GUID_SIZE];
++
+ struct ksmbd_user *user;
+- struct ksmbd_conn *conn;
+ unsigned int sequence_number;
+ unsigned int flags;
+
+@@ -45,10 +47,10 @@ struct ksmbd_session {
+ int state;
+ __u8 *Preauth_HashValue;
+
+- struct ntlmssp_auth ntlmssp;
+ char sess_key[CIFS_KEY_SIZE];
+
+ struct hlist_node hlist;
++ rwlock_t chann_lock;
+ struct list_head ksmbd_chann_list;
+ struct xarray tree_conns;
+ struct ida tree_conn_ida;
+@@ -58,9 +60,7 @@ struct ksmbd_session {
+ __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+
+- struct list_head sessions_entry;
+ struct ksmbd_file_table file_table;
+- atomic_t refcnt;
+ };
+
+ static inline int test_session_flag(struct ksmbd_session *sess, int bit)
+@@ -85,8 +85,8 @@ void ksmbd_session_destroy(struct ksmbd_session *sess);
+ struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id);
+ struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id);
+-void ksmbd_session_register(struct ksmbd_conn *conn,
+- struct ksmbd_session *sess);
++int ksmbd_session_register(struct ksmbd_conn *conn,
++ struct ksmbd_session *sess);
+ void ksmbd_sessions_deregister(struct ksmbd_conn *conn);
+ struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
+ unsigned long long id);
+@@ -101,6 +101,4 @@ void ksmbd_release_tree_conn_id(struct ksmbd_session *sess, int id);
+ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name);
+ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id);
+ int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id);
+-int get_session(struct ksmbd_session *sess);
+-void put_session(struct ksmbd_session *sess);
+ #endif /* __USER_SESSION_MANAGEMENT_H__ */
+diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
+index 8317f7ca402b4..28f44f0c918c9 100644
+--- a/fs/ksmbd/ndr.c
++++ b/fs/ksmbd/ndr.c
+@@ -148,7 +148,7 @@ static int ndr_read_int16(struct ndr *n, __u16 *value)
+ static int ndr_read_int32(struct ndr *n, __u32 *value)
+ {
+ if (n->offset + sizeof(__u32) > n->length)
+- return 0;
++ return -EINVAL;
+
+ if (value)
+ *value = le32_to_cpu(*(__le32 *)ndr_get_field(n));
+@@ -242,7 +242,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
+ return ret;
+
+ if (da->version != 3 && da->version != 4) {
+- pr_err("v%d version is not supported\n", da->version);
++ ksmbd_debug(VFS, "v%d version is not supported\n", da->version);
+ return -EINVAL;
+ }
+
+@@ -251,7 +251,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
+ return ret;
+
+ if (da->version != version2) {
+- pr_err("ndr version mismatched(version: %d, version2: %d)\n",
++ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n",
+ da->version, version2);
+ return -EINVAL;
+ }
+@@ -453,7 +453,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
+ if (ret)
+ return ret;
+ if (acl->version != 4) {
+- pr_err("v%d version is not supported\n", acl->version);
++ ksmbd_debug(VFS, "v%d version is not supported\n", acl->version);
+ return -EINVAL;
+ }
+
+@@ -461,7 +461,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
+ if (ret)
+ return ret;
+ if (acl->version != version2) {
+- pr_err("ndr version mismatched(version: %d, version2: %d)\n",
++ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n",
+ acl->version, version2);
+ return -EINVAL;
+ }
+diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
+index f9dae6ef21150..9722e52377994 100644
+--- a/fs/ksmbd/oplock.c
++++ b/fs/ksmbd/oplock.c
+@@ -30,6 +30,7 @@ static DEFINE_RWLOCK(lease_list_lock);
+ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work,
+ u64 id, __u16 Tid)
+ {
++ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ struct oplock_info *opinfo;
+
+@@ -38,7 +39,7 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work,
+ return NULL;
+
+ opinfo->sess = sess;
+- opinfo->conn = sess->conn;
++ opinfo->conn = conn;
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ opinfo->pending_break = 0;
+@@ -972,7 +973,7 @@ int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
+ }
+
+ list_for_each_entry(lb, &lease_table_list, l_entry) {
+- if (!memcmp(lb->client_guid, sess->conn->ClientGUID,
++ if (!memcmp(lb->client_guid, sess->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE))
+ goto found;
+ }
+@@ -988,7 +989,7 @@ found:
+ rcu_read_unlock();
+ if (opinfo->o_fp->f_ci == ci)
+ goto op_next;
+- err = compare_guid_key(opinfo, sess->conn->ClientGUID,
++ err = compare_guid_key(opinfo, sess->ClientGUID,
+ lctx->lease_key);
+ if (err) {
+ err = -EINVAL;
+@@ -1122,7 +1123,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
+ struct oplock_info *m_opinfo;
+
+ /* is lease already granted ? */
+- m_opinfo = same_client_has_lease(ci, sess->conn->ClientGUID,
++ m_opinfo = same_client_has_lease(ci, sess->ClientGUID,
+ lctx);
+ if (m_opinfo) {
+ copy_lease(m_opinfo, opinfo);
+@@ -1240,7 +1241,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
+ {
+ struct oplock_info *op, *brk_op;
+ struct ksmbd_inode *ci;
+- struct ksmbd_conn *conn = work->sess->conn;
++ struct ksmbd_conn *conn = work->conn;
+
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_OPLOCKS))
+@@ -1445,11 +1446,12 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
+ * smb2_find_context_vals() - find a particular context info in open request
+ * @open_req: buffer containing smb2 file open(create) request
+ * @tag: context name to search for
++ * @tag_len: the length of tag
+ *
+ * Return: pointer to requested context, NULL if @str context not found
+ * or error pointer if name length is invalid.
+ */
+-struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
++struct create_context *smb2_find_context_vals(void *open_req, const char *tag, int tag_len)
+ {
+ struct create_context *cc;
+ unsigned int next = 0;
+@@ -1483,12 +1485,12 @@ struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
+ name_len < 4 ||
+ name_off + name_len > cc_len ||
+ (value_off & 0x7) != 0 ||
+- (value_off && (value_off < name_off + name_len)) ||
++ (value_len && value_off < name_off + (name_len < 8 ? 8 : name_len)) ||
+ ((u64)value_off + value_len > cc_len))
+ return ERR_PTR(-EINVAL);
+
+ name = (char *)cc + name_off;
+- if (memcmp(name, tag, name_len) == 0)
++ if (name_len == tag_len && !memcmp(name, tag, name_len))
+ return cc;
+
+ remain_len -= next;
+diff --git a/fs/ksmbd/oplock.h b/fs/ksmbd/oplock.h
+index 119b8047cfbd4..2c4f4a0512b71 100644
+--- a/fs/ksmbd/oplock.h
++++ b/fs/ksmbd/oplock.h
+@@ -120,7 +120,7 @@ void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp);
+ void create_mxac_rsp_buf(char *cc, int maximal_access);
+ void create_disk_id_rsp_buf(char *cc, __u64 file_id, __u64 vol_id);
+ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp);
+-struct create_context *smb2_find_context_vals(void *open_req, const char *str);
++struct create_context *smb2_find_context_vals(void *open_req, const char *tag, int tag_len);
+ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
+ char *lease_key);
+ int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
+diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
+index 2a2b2135bfded..eb45d56b3577c 100644
+--- a/fs/ksmbd/server.c
++++ b/fs/ksmbd/server.c
+@@ -184,24 +184,31 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
+ goto send;
+ }
+
+- if (conn->ops->check_user_session) {
+- rc = conn->ops->check_user_session(work);
+- if (rc < 0) {
+- command = conn->ops->get_cmd_val(work);
+- conn->ops->set_rsp_status(work,
+- STATUS_USER_SESSION_DELETED);
+- goto send;
+- } else if (rc > 0) {
+- rc = conn->ops->get_ksmbd_tcon(work);
++ do {
++ if (conn->ops->check_user_session) {
++ rc = conn->ops->check_user_session(work);
+ if (rc < 0) {
+- conn->ops->set_rsp_status(work,
+- STATUS_NETWORK_NAME_DELETED);
++ if (rc == -EINVAL)
++ conn->ops->set_rsp_status(work,
++ STATUS_INVALID_PARAMETER);
++ else
++ conn->ops->set_rsp_status(work,
++ STATUS_USER_SESSION_DELETED);
+ goto send;
++ } else if (rc > 0) {
++ rc = conn->ops->get_ksmbd_tcon(work);
++ if (rc < 0) {
++ if (rc == -EINVAL)
++ conn->ops->set_rsp_status(work,
++ STATUS_INVALID_PARAMETER);
++ else
++ conn->ops->set_rsp_status(work,
++ STATUS_NETWORK_NAME_DELETED);
++ goto send;
++ }
+ }
+ }
+- }
+
+- do {
+ rc = __process_request(work, conn, &command);
+ if (rc == SERVER_HANDLER_ABORT)
+ break;
+@@ -235,10 +242,8 @@ send:
+ if (work->sess && work->sess->enc && work->encrypted &&
+ conn->ops->encrypt_resp) {
+ rc = conn->ops->encrypt_resp(work);
+- if (rc < 0) {
++ if (rc < 0)
+ conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
+- goto send;
+- }
+ }
+
+ ksmbd_conn_write(work);
+@@ -613,6 +618,7 @@ err_unregister:
+ static void __exit ksmbd_server_exit(void)
+ {
+ ksmbd_server_shutdown();
++ rcu_barrier();
+ ksmbd_release_inode_hash();
+ }
+
+@@ -632,5 +638,6 @@ MODULE_SOFTDEP("pre: sha512");
+ MODULE_SOFTDEP("pre: aead2");
+ MODULE_SOFTDEP("pre: ccm");
+ MODULE_SOFTDEP("pre: gcm");
++MODULE_SOFTDEP("pre: crc32");
+ module_init(ksmbd_server_init)
+ module_exit(ksmbd_server_exit)
+diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h
+index ac9d932f8c8aa..db72781817603 100644
+--- a/fs/ksmbd/server.h
++++ b/fs/ksmbd/server.h
+@@ -41,6 +41,7 @@ struct ksmbd_server_config {
+ unsigned int share_fake_fscaps;
+ struct smb_sid domain_sid;
+ unsigned int auth_mechs;
++ unsigned int max_connections;
+
+ char *conf[SERVER_CONF_WORK_GROUP + 1];
+ };
+diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
+index 030ca57c37849..8ef9503c4ab90 100644
+--- a/fs/ksmbd/smb2misc.c
++++ b/fs/ksmbd/smb2misc.c
+@@ -91,11 +91,6 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
+ *off = 0;
+ *len = 0;
+
+- /* error reqeusts do not have data area */
+- if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
+- (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE)
+- return ret;
+-
+ /*
+ * Following commands have data areas so we have to get the location
+ * of the data buffer offset and data buffer length for the particular
+@@ -137,8 +132,11 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
+ *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength);
+ break;
+ case SMB2_WRITE:
+- if (((struct smb2_write_req *)hdr)->DataOffset) {
+- *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset);
++ if (((struct smb2_write_req *)hdr)->DataOffset ||
++ ((struct smb2_write_req *)hdr)->Length) {
++ *off = max_t(unsigned int,
++ le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset),
++ offsetof(struct smb2_write_req, Buffer) - 4);
+ *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
+ break;
+ }
+@@ -152,15 +150,11 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
+ break;
+ case SMB2_LOCK:
+ {
+- int lock_count;
++ unsigned short lock_count;
+
+- /*
+- * smb2_lock request size is 48 included single
+- * smb2_lock_element structure size.
+- */
+- lock_count = le16_to_cpu(((struct smb2_lock_req *)hdr)->LockCount) - 1;
++ lock_count = le16_to_cpu(((struct smb2_lock_req *)hdr)->LockCount);
+ if (lock_count > 0) {
+- *off = __SMB2_HEADER_STRUCTURE_SIZE + 48;
++ *off = offsetof(struct smb2_lock_req, locks);
+ *len = sizeof(struct smb2_lock_element) * lock_count;
+ }
+ break;
+@@ -290,7 +284,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
+ unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
+ unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge);
+ void *__hdr = hdr;
+- int ret;
++ int ret = 0;
+
+ switch (hdr->Command) {
+ case SMB2_QUERY_INFO:
+@@ -327,43 +321,49 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
+ ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n",
+ credit_charge, calc_credit_num);
+ return 1;
+- } else if (credit_charge > conn->max_credits) {
++ } else if (credit_charge > conn->vals->max_credits) {
+ ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge);
+ return 1;
+ }
+
+ spin_lock(&conn->credits_lock);
+- if (credit_charge <= conn->total_credits) {
+- conn->total_credits -= credit_charge;
+- ret = 0;
+- } else {
++ if (credit_charge > conn->total_credits) {
+ ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
+ credit_charge, conn->total_credits);
+ ret = 1;
+ }
++
++ if ((u64)conn->outstanding_credits + credit_charge > conn->total_credits) {
++ ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n",
++ credit_charge, conn->outstanding_credits);
++ ret = 1;
++ } else
++ conn->outstanding_credits += credit_charge;
++
+ spin_unlock(&conn->credits_lock);
++
+ return ret;
+ }
+
+ int ksmbd_smb2_check_message(struct ksmbd_work *work)
+ {
+- struct smb2_pdu *pdu = work->request_buf;
++ struct smb2_pdu *pdu = ksmbd_req_buf_next(work);
+ struct smb2_hdr *hdr = &pdu->hdr;
+ int command;
+ __u32 clc_len; /* calculated length */
+- __u32 len = get_rfc1002_len(pdu);
++ __u32 len = get_rfc1002_len(work->request_buf);
++ __u32 req_struct_size, next_cmd = le32_to_cpu(hdr->NextCommand);
+
+- if (work->next_smb2_rcv_hdr_off) {
+- pdu = ksmbd_req_buf_next(work);
+- hdr = &pdu->hdr;
++ if ((u64)work->next_smb2_rcv_hdr_off + next_cmd > len) {
++ pr_err("next command(%u) offset exceeds smb msg size\n",
++ next_cmd);
++ return 1;
+ }
+
+- if (le32_to_cpu(hdr->NextCommand) > 0) {
+- len = le32_to_cpu(hdr->NextCommand);
+- } else if (work->next_smb2_rcv_hdr_off) {
++ if (next_cmd > 0)
++ len = next_cmd;
++ else if (work->next_smb2_rcv_hdr_off)
+ len -= work->next_smb2_rcv_hdr_off;
+- len = round_up(len, 8);
+- }
+
+ if (check_smb2_hdr(hdr))
+ return 1;
+@@ -381,25 +381,25 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
+ }
+
+ if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
+- if (command != SMB2_OPLOCK_BREAK_HE &&
+- (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) {
+- /* error packets have 9 byte structure size */
+- ksmbd_debug(SMB,
+- "Illegal request size %u for command %d\n",
+- le16_to_cpu(pdu->StructureSize2), command);
+- return 1;
+- } else if (command == SMB2_OPLOCK_BREAK_HE &&
+- hdr->Status == 0 &&
+- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
+- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
++ if (!(command == SMB2_OPLOCK_BREAK_HE &&
++ (le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_20 ||
++ le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_21))) {
+ /* special case for SMB2.1 lease break message */
+ ksmbd_debug(SMB,
+- "Illegal request size %d for oplock break\n",
+- le16_to_cpu(pdu->StructureSize2));
++ "Illegal request size %u for command %d\n",
++ le16_to_cpu(pdu->StructureSize2), command);
+ return 1;
+ }
+ }
+
++ req_struct_size = le16_to_cpu(pdu->StructureSize2) +
++ __SMB2_HEADER_STRUCTURE_SIZE;
++ if (command == SMB2_LOCK_HE)
++ req_struct_size -= sizeof(struct smb2_lock_element);
++
++ if (req_struct_size > len + 1)
++ return 1;
++
+ if (smb2_calc_size(hdr, &clc_len))
+ return 1;
+
+@@ -416,20 +416,22 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
+ goto validate_credit;
+
+ /*
+- * windows client also pad up to 8 bytes when compounding.
+- * If pad is longer than eight bytes, log the server behavior
+- * (once), since may indicate a problem but allow it and
+- * continue since the frame is parseable.
++ * SMB2 NEGOTIATE request will be validated when message
++ * handling proceeds.
+ */
+- if (clc_len < len) {
+- ksmbd_debug(SMB,
+- "cli req padded more than expected. Length %d not %d for cmd:%d mid:%llu\n",
+- len, clc_len, command,
+- le64_to_cpu(hdr->MessageId));
++ if (command == SMB2_NEGOTIATE_HE)
++ goto validate_credit;
++
++ /*
++ * Allow a message that padded to 8byte boundary.
++ * Linux 4.19.217 with smb 3.0.2 are sometimes
++ * sending messages where the cls_len is exactly
++ * 8 bytes less than len.
++ */
++ if (clc_len < len && (len - clc_len) <= 8)
+ goto validate_credit;
+- }
+
+- ksmbd_debug(SMB,
++ pr_err_ratelimited(
+ "cli req too short, len %d not %d. cmd:%d mid:%llu\n",
+ len, clc_len, command,
+ le64_to_cpu(hdr->MessageId));
+diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
+index fb6a65d231391..f0a5b704f301c 100644
+--- a/fs/ksmbd/smb2ops.c
++++ b/fs/ksmbd/smb2ops.c
+@@ -20,6 +20,7 @@ static struct smb_version_values smb21_server_values = {
+ .max_read_size = SMB21_DEFAULT_IOSIZE,
+ .max_write_size = SMB21_DEFAULT_IOSIZE,
+ .max_trans_size = SMB21_DEFAULT_IOSIZE,
++ .max_credits = SMB2_MAX_CREDITS,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+@@ -45,6 +46,7 @@ static struct smb_version_values smb30_server_values = {
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
++ .max_credits = SMB2_MAX_CREDITS,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+@@ -71,6 +73,7 @@ static struct smb_version_values smb302_server_values = {
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
++ .max_credits = SMB2_MAX_CREDITS,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+@@ -97,6 +100,7 @@ static struct smb_version_values smb311_server_values = {
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
++ .max_credits = SMB2_MAX_CREDITS,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+@@ -198,7 +202,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn)
+ conn->ops = &smb2_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+- conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+@@ -216,7 +219,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
+ conn->ops = &smb3_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+- conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+@@ -241,7 +243,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
+ conn->ops = &smb3_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+- conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+@@ -266,15 +267,11 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
+ conn->ops = &smb3_11_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+- conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+
+- if (conn->cipher_type)
+- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+-
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+
+@@ -308,3 +305,11 @@ void init_smb2_max_trans_size(unsigned int sz)
+ smb302_server_values.max_trans_size = sz;
+ smb311_server_values.max_trans_size = sz;
+ }
++
++void init_smb2_max_credits(unsigned int sz)
++{
++ smb21_server_values.max_credits = sz;
++ smb30_server_values.max_credits = sz;
++ smb302_server_values.max_credits = sz;
++ smb311_server_values.max_credits = sz;
++}
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index 7e448df3f8474..b763c6ba636fb 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -11,6 +11,7 @@
+ #include <linux/statfs.h>
+ #include <linux/ethtool.h>
+ #include <linux/falloc.h>
++#include <linux/mount.h>
+
+ #include "glob.h"
+ #include "smb2pdu.h"
+@@ -96,7 +97,6 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
+ struct smb2_hdr *req_hdr = work->request_buf;
+ int tree_id;
+
+- work->tcon = NULL;
+ if (work->conn->ops->get_cmd_val(work) == SMB2_TREE_CONNECT_HE ||
+ work->conn->ops->get_cmd_val(work) == SMB2_CANCEL_HE ||
+ work->conn->ops->get_cmd_val(work) == SMB2_LOGOFF_HE) {
+@@ -110,10 +110,28 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
+ }
+
+ tree_id = le32_to_cpu(req_hdr->Id.SyncId.TreeId);
++
++ /*
++ * If request is not the first in Compound request,
++ * Just validate tree id in header with work->tcon->id.
++ */
++ if (work->next_smb2_rcv_hdr_off) {
++ if (!work->tcon) {
++ pr_err("The first operation in the compound does not have tcon\n");
++ return -EINVAL;
++ }
++ if (work->tcon->id != tree_id) {
++ pr_err("tree id(%u) is different with id(%u) in first operation\n",
++ tree_id, work->tcon->id);
++ return -EINVAL;
++ }
++ return 1;
++ }
++
+ work->tcon = ksmbd_tree_conn_lookup(work->sess, tree_id);
+ if (!work->tcon) {
+ pr_err("Invalid tid %d\n", tree_id);
+- return -EINVAL;
++ return -ENOENT;
+ }
+
+ return 1;
+@@ -301,16 +319,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
+ struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
+ struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
+ struct ksmbd_conn *conn = work->conn;
+- unsigned short credits_requested;
++ unsigned short credits_requested, aux_max;
+ unsigned short credit_charge, credits_granted = 0;
+- unsigned short aux_max, aux_credits;
+
+ if (work->send_no_response)
+ return 0;
+
+ hdr->CreditCharge = req_hdr->CreditCharge;
+
+- if (conn->total_credits > conn->max_credits) {
++ if (conn->total_credits > conn->vals->max_credits) {
+ hdr->CreditRequest = 0;
+ pr_err("Total credits overflow: %d\n", conn->total_credits);
+ return -EINVAL;
+@@ -318,6 +335,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
+
+ credit_charge = max_t(unsigned short,
+ le16_to_cpu(req_hdr->CreditCharge), 1);
++ if (credit_charge > conn->total_credits) {
++ ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
++ credit_charge, conn->total_credits);
++ return -EINVAL;
++ }
++
++ conn->total_credits -= credit_charge;
++ conn->outstanding_credits -= credit_charge;
+ credits_requested = max_t(unsigned short,
+ le16_to_cpu(req_hdr->CreditRequest), 1);
+
+@@ -327,17 +352,11 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
+ * TODO: Need to adjuct CreditRequest value according to
+ * current cpu load
+ */
+- aux_credits = credits_requested - 1;
+ if (hdr->Command == SMB2_NEGOTIATE)
+- aux_max = 0;
++ aux_max = 1;
+ else
+- aux_max = conn->max_credits - credit_charge;
+- aux_credits = min_t(unsigned short, aux_credits, aux_max);
+- credits_granted = credit_charge + aux_credits;
+-
+- if (conn->max_credits - conn->total_credits < credits_granted)
+- credits_granted = conn->max_credits -
+- conn->total_credits;
++ aux_max = conn->vals->max_credits - conn->total_credits;
++ credits_granted = min_t(unsigned short, credits_requested, aux_max);
+
+ conn->total_credits += credits_granted;
+ work->credits_granted += credits_granted;
+@@ -535,9 +554,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
+ struct smb2_query_info_req *req;
+
+ req = work->request_buf;
+- if (req->InfoType == SMB2_O_INFO_FILE &&
+- (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
+- req->FileInfoClass == FILE_ALL_INFORMATION))
++ if ((req->InfoType == SMB2_O_INFO_FILE &&
++ (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
++ req->FileInfoClass == FILE_ALL_INFORMATION)) ||
++ req->InfoType == SMB2_O_INFO_SECURITY)
+ sz = large_sz;
+ }
+
+@@ -566,7 +586,6 @@ int smb2_check_user_session(struct ksmbd_work *work)
+ unsigned int cmd = conn->ops->get_cmd_val(work);
+ unsigned long long sess_id;
+
+- work->sess = NULL;
+ /*
+ * SMB2_ECHO, SMB2_NEGOTIATE, SMB2_SESSION_SETUP command do not
+ * require a session id, so no need to validate user session's for
+@@ -577,21 +596,41 @@ int smb2_check_user_session(struct ksmbd_work *work)
+ return 0;
+
+ if (!ksmbd_conn_good(work))
+- return -EINVAL;
++ return -EIO;
+
+ sess_id = le64_to_cpu(req_hdr->SessionId);
++
++ /*
++ * If request is not the first in Compound request,
++ * Just validate session id in header with work->sess->id.
++ */
++ if (work->next_smb2_rcv_hdr_off) {
++ if (!work->sess) {
++ pr_err("The first operation in the compound does not have sess\n");
++ return -EINVAL;
++ }
++ if (work->sess->id != sess_id) {
++ pr_err("session id(%llu) is different with the first operation(%lld)\n",
++ sess_id, work->sess->id);
++ return -EINVAL;
++ }
++ return 1;
++ }
++
+ /* Check for validity of user session */
+ work->sess = ksmbd_session_lookup_all(conn, sess_id);
+ if (work->sess)
+ return 1;
+ ksmbd_debug(SMB, "Invalid user session, Uid %llu\n", sess_id);
+- return -EINVAL;
++ return -ENOENT;
+ }
+
+-static void destroy_previous_session(struct ksmbd_user *user, u64 id)
++static void destroy_previous_session(struct ksmbd_conn *conn,
++ struct ksmbd_user *user, u64 id)
+ {
+ struct ksmbd_session *prev_sess = ksmbd_session_lookup_slowpath(id);
+ struct ksmbd_user *prev_user;
++ struct channel *chann;
+
+ if (!prev_sess)
+ return;
+@@ -601,13 +640,14 @@ static void destroy_previous_session(struct ksmbd_user *user, u64 id)
+ if (!prev_user ||
+ strcmp(user->name, prev_user->name) ||
+ user->passkey_sz != prev_user->passkey_sz ||
+- memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) {
+- put_session(prev_sess);
++ memcmp(user->passkey, prev_user->passkey, user->passkey_sz))
+ return;
+- }
+
+- put_session(prev_sess);
+- ksmbd_session_destroy(prev_sess);
++ prev_sess->state = SMB2_SESSION_EXPIRED;
++ write_lock(&prev_sess->chann_lock);
++ list_for_each_entry(chann, &prev_sess->ksmbd_chann_list, chann_list)
++ chann->conn->status = KSMBD_SESS_EXITING;
++ write_unlock(&prev_sess->chann_lock);
+ }
+
+ /**
+@@ -917,6 +957,25 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
+ }
+ }
+
++/**
++ * smb3_encryption_negotiated() - checks if server and client agreed on enabling encryption
++ * @conn: smb connection
++ *
++ * Return: true if connection should be encrypted, else false
++ */
++static bool smb3_encryption_negotiated(struct ksmbd_conn *conn)
++{
++ if (!conn->ops->generate_encryptionkey)
++ return false;
++
++ /*
++ * SMB 3.0 and 3.0.2 dialects use the SMB2_GLOBAL_CAP_ENCRYPTION flag.
++ * SMB 3.1.1 uses the cipher_type field.
++ */
++ return (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) ||
++ conn->cipher_type;
++}
++
+ static void decode_compress_ctxt(struct ksmbd_conn *conn,
+ struct smb2_compression_ctx *pneg_ctxt)
+ {
+@@ -1058,16 +1117,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
+ return rc;
+ }
+
+- if (req->DialectCount == 0) {
+- pr_err("malformed packet\n");
++ smb2_buf_len = get_rfc1002_len(work->request_buf);
++ smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects) - 4;
++ if (smb2_neg_size > smb2_buf_len) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ rc = -EINVAL;
+ goto err_out;
+ }
+
+- smb2_buf_len = get_rfc1002_len(work->request_buf);
+- smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects) - 4;
+- if (smb2_neg_size > smb2_buf_len) {
++ if (req->DialectCount == 0) {
++ pr_err("malformed packet\n");
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ rc = -EINVAL;
+ goto err_out;
+@@ -1121,12 +1180,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
+ status);
+ rsp->hdr.Status = status;
+ rc = -EINVAL;
++ kfree(conn->preauth_info);
++ conn->preauth_info = NULL;
+ goto err_out;
+ }
+
+ rc = init_smb3_11_server(conn);
+ if (rc < 0) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
++ kfree(conn->preauth_info);
++ conn->preauth_info = NULL;
+ goto err_out;
+ }
+
+@@ -1286,7 +1349,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
+ int sz, rc;
+
+ ksmbd_debug(SMB, "negotiate phase\n");
+- rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess);
++ rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->conn);
+ if (rc)
+ return rc;
+
+@@ -1296,7 +1359,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
+ memset(chgblob, 0, sizeof(struct challenge_message));
+
+ if (!work->conn->use_spnego) {
+- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
++ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
+ if (sz < 0)
+ return -ENOMEM;
+
+@@ -1312,7 +1375,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
+ return -ENOMEM;
+
+ chgblob = (struct challenge_message *)neg_blob;
+- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
++ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
+ if (sz < 0) {
+ rc = -ENOMEM;
+ goto out;
+@@ -1354,7 +1417,7 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
+ struct authenticate_message *authblob;
+ struct ksmbd_user *user;
+ char *name;
+- unsigned int auth_msg_len, name_off, name_len, secbuf_len;
++ unsigned int name_off, name_len, secbuf_len;
+
+ secbuf_len = le16_to_cpu(req->SecurityBufferLength);
+ if (secbuf_len < sizeof(struct authenticate_message)) {
+@@ -1364,9 +1427,8 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
+ authblob = user_authblob(conn, req);
+ name_off = le32_to_cpu(authblob->UserName.BufferOffset);
+ name_len = le16_to_cpu(authblob->UserName.Length);
+- auth_msg_len = le16_to_cpu(req->SecurityBufferOffset) + secbuf_len;
+
+- if (auth_msg_len < (u64)name_off + name_len)
++ if (secbuf_len < (u64)name_off + name_len)
+ return NULL;
+
+ name = smb_strndup_from_utf16((const char *)authblob + name_off,
+@@ -1422,77 +1484,80 @@ static int ntlm_authenticate(struct ksmbd_work *work)
+ /* Check for previous session */
+ prev_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_id && prev_id != sess->id)
+- destroy_previous_session(user, prev_id);
++ destroy_previous_session(conn, user, prev_id);
+
+ if (sess->state == SMB2_SESSION_VALID) {
+ /*
+ * Reuse session if anonymous try to connect
+ * on reauthetication.
+ */
+- if (ksmbd_anonymous_user(user)) {
++ if (conn->binding == false && ksmbd_anonymous_user(user)) {
+ ksmbd_free_user(user);
+ return 0;
+ }
+- ksmbd_free_user(sess->user);
+- }
+
+- sess->user = user;
+- if (user_guest(sess->user)) {
+- if (conn->sign) {
+- ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n");
++ if (!ksmbd_compare_user(sess->user, user)) {
++ ksmbd_free_user(user);
+ return -EPERM;
+ }
++ ksmbd_free_user(user);
++ } else {
++ sess->user = user;
++ }
+
++ if (conn->binding == false && user_guest(sess->user)) {
+ rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE;
+ } else {
+ struct authenticate_message *authblob;
+
+ authblob = user_authblob(conn, req);
+ sz = le16_to_cpu(req->SecurityBufferLength);
+- rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess);
++ rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess);
+ if (rc) {
+ set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
+ ksmbd_debug(SMB, "authentication failed\n");
+ return -EPERM;
+ }
++ }
+
+- /*
+- * If session state is SMB2_SESSION_VALID, We can assume
+- * that it is reauthentication. And the user/password
+- * has been verified, so return it here.
+- */
+- if (sess->state == SMB2_SESSION_VALID) {
+- if (conn->binding)
+- goto binding_session;
+- return 0;
+- }
++ /*
++ * If session state is SMB2_SESSION_VALID, We can assume
++ * that it is reauthentication. And the user/password
++ * has been verified, so return it here.
++ */
++ if (sess->state == SMB2_SESSION_VALID) {
++ if (conn->binding)
++ goto binding_session;
++ return 0;
++ }
+
+- if ((conn->sign || server_conf.enforced_signing) ||
+- (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+- sess->sign = true;
++ if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE &&
++ (conn->sign || server_conf.enforced_signing)) ||
++ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
++ sess->sign = true;
+
+- if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION &&
+- conn->ops->generate_encryptionkey &&
+- !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+- rc = conn->ops->generate_encryptionkey(sess);
+- if (rc) {
+- ksmbd_debug(SMB,
+- "SMB3 encryption key generation failed\n");
+- return -EINVAL;
+- }
+- sess->enc = true;
+- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+- /*
+- * signing is disable if encryption is enable
+- * on this session
+- */
+- sess->sign = false;
++ if (smb3_encryption_negotiated(conn) &&
++ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
++ rc = conn->ops->generate_encryptionkey(conn, sess);
++ if (rc) {
++ ksmbd_debug(SMB,
++ "SMB3 encryption key generation failed\n");
++ return -EINVAL;
+ }
++ sess->enc = true;
++ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
++ /*
++ * signing is disable if encryption is enable
++ * on this session
++ */
++ sess->sign = false;
+ }
+
+ binding_session:
+ if (conn->dialect >= SMB30_PROT_ID) {
++ read_lock(&sess->chann_lock);
+ chann = lookup_chann_list(sess, conn);
++ read_unlock(&sess->chann_lock);
+ if (!chann) {
+ chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
+ if (!chann)
+@@ -1500,7 +1565,9 @@ binding_session:
+
+ chann->conn = conn;
+ INIT_LIST_HEAD(&chann->chann_list);
++ write_lock(&sess->chann_lock);
+ list_add(&chann->chann_list, &sess->ksmbd_chann_list);
++ write_unlock(&sess->chann_lock);
+ }
+ }
+
+@@ -1544,7 +1611,7 @@ static int krb5_authenticate(struct ksmbd_work *work)
+ /* Check previous session */
+ prev_sess_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_sess_id && prev_sess_id != sess->id)
+- destroy_previous_session(sess->user, prev_sess_id);
++ destroy_previous_session(conn, sess->user, prev_sess_id);
+
+ if (sess->state == SMB2_SESSION_VALID)
+ ksmbd_free_user(sess->user);
+@@ -1562,9 +1629,8 @@ static int krb5_authenticate(struct ksmbd_work *work)
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
+
+- if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) &&
+- conn->ops->generate_encryptionkey) {
+- retval = conn->ops->generate_encryptionkey(sess);
++ if (smb3_encryption_negotiated(conn)) {
++ retval = conn->ops->generate_encryptionkey(conn, sess);
+ if (retval) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+@@ -1576,7 +1642,9 @@ static int krb5_authenticate(struct ksmbd_work *work)
+ }
+
+ if (conn->dialect >= SMB30_PROT_ID) {
++ read_lock(&sess->chann_lock);
+ chann = lookup_chann_list(sess, conn);
++ read_unlock(&sess->chann_lock);
+ if (!chann) {
+ chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
+ if (!chann)
+@@ -1584,7 +1652,9 @@ static int krb5_authenticate(struct ksmbd_work *work)
+
+ chann->conn = conn;
+ INIT_LIST_HEAD(&chann->chann_list);
++ write_lock(&sess->chann_lock);
+ list_add(&chann->chann_list, &sess->ksmbd_chann_list);
++ write_unlock(&sess->chann_lock);
+ }
+ }
+
+@@ -1634,7 +1704,9 @@ int smb2_sess_setup(struct ksmbd_work *work)
+ goto out_err;
+ }
+ rsp->hdr.SessionId = cpu_to_le64(sess->id);
+- ksmbd_session_register(conn, sess);
++ rc = ksmbd_session_register(conn, sess);
++ if (rc)
++ goto out_err;
+ } else if (conn->dialect >= SMB30_PROT_ID &&
+ (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
+ req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) {
+@@ -1646,7 +1718,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
+ goto out_err;
+ }
+
+- if (conn->dialect != sess->conn->dialect) {
++ if (conn->dialect != sess->dialect) {
+ rc = -EINVAL;
+ goto out_err;
+ }
+@@ -1656,7 +1728,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
+ goto out_err;
+ }
+
+- if (strncmp(conn->ClientGUID, sess->conn->ClientGUID,
++ if (strncmp(conn->ClientGUID, sess->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE)) {
+ rc = -ENOENT;
+ goto out_err;
+@@ -1677,6 +1749,11 @@ int smb2_sess_setup(struct ksmbd_work *work)
+ goto out_err;
+ }
+
++ if (user_guest(sess->user)) {
++ rc = -EOPNOTSUPP;
++ goto out_err;
++ }
++
+ conn->binding = true;
+ } else if ((conn->dialect < SMB30_PROT_ID ||
+ server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
+@@ -1700,8 +1777,10 @@ int smb2_sess_setup(struct ksmbd_work *work)
+ negblob_off = le16_to_cpu(req->SecurityBufferOffset);
+ negblob_len = le16_to_cpu(req->SecurityBufferLength);
+ if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) ||
+- negblob_len < offsetof(struct negotiate_message, NegotiateFlags))
+- return -EINVAL;
++ negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) {
++ rc = -EINVAL;
++ goto out_err;
++ }
+
+ negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId +
+ negblob_off);
+@@ -1760,6 +1839,10 @@ int smb2_sess_setup(struct ksmbd_work *work)
+ }
+ kfree(sess->Preauth_HashValue);
+ sess->Preauth_HashValue = NULL;
++ } else {
++ pr_info_ratelimited("Unknown NTLMSSP message type : 0x%x\n",
++ le32_to_cpu(negblob->MessageType));
++ rc = -EINVAL;
+ }
+ } else {
+ /* TODO: need one more negotiation */
+@@ -1782,6 +1865,8 @@ out_err:
+ rsp->hdr.Status = STATUS_NETWORK_SESSION_EXPIRED;
+ else if (rc == -ENOMEM)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
++ else if (rc == -EOPNOTSUPP)
++ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ else if (rc)
+ rsp->hdr.Status = STATUS_LOGON_FAILURE;
+
+@@ -1809,6 +1894,7 @@ out_err:
+ if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION)
+ try_delay = true;
+
++ xa_erase(&conn->sessions, sess->id);
+ ksmbd_session_destroy(sess);
+ work->sess = NULL;
+ if (try_delay)
+@@ -1854,7 +1940,7 @@ int smb2_tree_connect(struct ksmbd_work *work)
+ ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n",
+ name, treename);
+
+- status = ksmbd_tree_conn_connect(sess, name);
++ status = ksmbd_tree_conn_connect(conn, sess, name);
+ if (status.ret == KSMBD_TREE_CONN_STATUS_OK)
+ rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id);
+ else
+@@ -1906,8 +1992,9 @@ out_err1:
+ rsp->hdr.Status = STATUS_SUCCESS;
+ rc = 0;
+ break;
++ case -ENOENT:
+ case KSMBD_TREE_CONN_STATUS_NO_SHARE:
+- rsp->hdr.Status = STATUS_BAD_NETWORK_PATH;
++ rsp->hdr.Status = STATUS_BAD_NETWORK_NAME;
+ break;
+ case -ENOMEM:
+ case KSMBD_TREE_CONN_STATUS_NOMEM:
+@@ -2019,6 +2106,7 @@ int smb2_tree_disconnect(struct ksmbd_work *work)
+
+ ksmbd_close_tree_conn_fds(work);
+ ksmbd_tree_conn_disconnect(sess, tcon);
++ work->tcon = NULL;
+ return 0;
+ }
+
+@@ -2039,9 +2127,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
+
+ ksmbd_debug(SMB, "request\n");
+
+- /* Got a valid session, set connection state */
+- WARN_ON(sess->conn != conn);
+-
+ /* setting CifsExiting here may race with start_tcp_sess */
+ ksmbd_conn_set_need_reconnect(work);
+ ksmbd_close_session_fds(work);
+@@ -2224,9 +2309,16 @@ next:
+ break;
+ buf_len -= next;
+ eabuf = (struct smb2_ea_info *)((char *)eabuf + next);
+- if (next < (u32)eabuf->EaNameLength + le16_to_cpu(eabuf->EaValueLength))
++ if (buf_len < sizeof(struct smb2_ea_info)) {
++ rc = -EINVAL;
+ break;
++ }
+
++ if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength +
++ le16_to_cpu(eabuf->EaValueLength)) {
++ rc = -EINVAL;
++ break;
++ }
+ } while (next != 0);
+
+ kfree(attr_name);
+@@ -2291,15 +2383,15 @@ static int smb2_remove_smb_xattrs(struct path *path)
+ name += strlen(name) + 1) {
+ ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
+
+- if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+- strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX,
+- DOS_ATTRIBUTE_PREFIX_LEN) &&
+- strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN))
+- continue;
+-
+- err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name);
+- if (err)
+- ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
++ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
++ !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
++ STREAM_PREFIX_LEN)) {
++ err = ksmbd_vfs_remove_xattr(user_ns, path->dentry,
++ name);
++ if (err)
++ ksmbd_debug(SMB, "remove xattr failed : %s\n",
++ name);
++ }
+ }
+ out:
+ kvfree(xattr_list);
+@@ -2417,7 +2509,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
+ return -ENOENT;
+
+ /* Parse SD BUFFER create contexts */
+- context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER);
++ context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER, 4);
+ if (!context)
+ return -ENOENT;
+ else if (IS_ERR(context))
+@@ -2618,7 +2710,7 @@ int smb2_open(struct ksmbd_work *work)
+
+ if (req->CreateContextsOffset) {
+ /* Parse non-durable handle create contexts */
+- context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER);
++ context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+@@ -2638,7 +2730,7 @@ int smb2_open(struct ksmbd_work *work)
+ }
+
+ context = smb2_find_context_vals(req,
+- SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST);
++ SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+@@ -2649,7 +2741,7 @@ int smb2_open(struct ksmbd_work *work)
+ }
+
+ context = smb2_find_context_vals(req,
+- SMB2_CREATE_TIMEWARP_REQUEST);
++ SMB2_CREATE_TIMEWARP_REQUEST, 4);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+@@ -2661,7 +2753,7 @@ int smb2_open(struct ksmbd_work *work)
+
+ if (tcon->posix_extensions) {
+ context = smb2_find_context_vals(req,
+- SMB2_CREATE_TAG_POSIX);
++ SMB2_CREATE_TAG_POSIX, 16);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+@@ -2670,7 +2762,7 @@ int smb2_open(struct ksmbd_work *work)
+ (struct create_posix *)context;
+ if (le16_to_cpu(context->DataOffset) +
+ le32_to_cpu(context->DataLength) <
+- sizeof(struct create_posix)) {
++ sizeof(struct create_posix) - 4) {
+ rc = -EINVAL;
+ goto err_out1;
+ }
+@@ -2951,17 +3043,24 @@ int smb2_open(struct ksmbd_work *work)
+ sizeof(struct smb_acl) +
+ sizeof(struct smb_ace) * ace_num * 2,
+ GFP_KERNEL);
+- if (!pntsd)
++ if (!pntsd) {
++ posix_acl_release(fattr.cf_acls);
++ posix_acl_release(fattr.cf_dacls);
+ goto err_out;
++ }
+
+ rc = build_sec_desc(user_ns,
+- pntsd, NULL,
++ pntsd, NULL, 0,
+ OWNER_SECINFO |
+ GROUP_SECINFO |
+ DACL_SECINFO,
+ &pntsd_size, &fattr);
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
++ if (rc) {
++ kfree(pntsd);
++ goto err_out;
++ }
+
+ rc = ksmbd_vfs_set_sd_xattr(conn,
+ user_ns,
+@@ -3059,7 +3158,7 @@ int smb2_open(struct ksmbd_work *work)
+ struct create_alloc_size_req *az_req;
+
+ az_req = (struct create_alloc_size_req *)smb2_find_context_vals(req,
+- SMB2_CREATE_ALLOCATION_SIZE);
++ SMB2_CREATE_ALLOCATION_SIZE, 4);
+ if (IS_ERR(az_req)) {
+ rc = PTR_ERR(az_req);
+ goto err_out;
+@@ -3086,7 +3185,7 @@ int smb2_open(struct ksmbd_work *work)
+ err);
+ }
+
+- context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID);
++ context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out;
+@@ -3398,9 +3497,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
+ goto free_conv_name;
+ }
+
+- struct_sz = readdir_info_level_struct_sz(info_level);
+- next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+- KSMBD_DIR_INFO_ALIGNMENT);
++ struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len;
++ next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT);
++ d_info->last_entry_off_align = next_entry_offset - struct_sz;
+
+ if (next_entry_offset > d_info->out_buf_len) {
+ d_info->out_buf_len = 0;
+@@ -3771,11 +3870,6 @@ static int __query_dir(struct dir_context *ctx, const char *name, int namlen,
+ return 0;
+ }
+
+-static void restart_ctx(struct dir_context *ctx)
+-{
+- ctx->pos = 0;
+-}
+-
+ static int verify_info_level(int info_level)
+ {
+ switch (info_level) {
+@@ -3794,6 +3888,15 @@ static int verify_info_level(int info_level)
+ return 0;
+ }
+
++static int smb2_resp_buf_len(struct ksmbd_work *work, unsigned short hdr2_len)
++{
++ int free_len;
++
++ free_len = (int)(work->response_sz -
++ (get_rfc1002_len(work->response_buf) + 4)) - hdr2_len;
++ return free_len;
++}
++
+ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work,
+ unsigned short hdr2_len,
+ unsigned int out_buf_len)
+@@ -3803,9 +3906,7 @@ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work,
+ if (out_buf_len > work->conn->vals->max_trans_size)
+ return -EINVAL;
+
+- free_len = (int)(work->response_sz -
+- (get_rfc1002_len(work->response_buf) + 4)) -
+- hdr2_len;
++ free_len = smb2_resp_buf_len(work, hdr2_len);
+ if (free_len < 0)
+ return -EINVAL;
+
+@@ -3882,7 +3983,6 @@ int smb2_query_dir(struct ksmbd_work *work)
+ if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) {
+ ksmbd_debug(SMB, "Restart directory scan\n");
+ generic_file_llseek(dir_fp->filp, 0, SEEK_SET);
+- restart_ctx(&dir_fp->readdir_data.ctx);
+ }
+
+ memset(&d_info, 0, sizeof(struct ksmbd_dir_info));
+@@ -3923,11 +4023,15 @@ int smb2_query_dir(struct ksmbd_work *work)
+ set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir);
+
+ rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx);
+- if (rc == 0)
+- restart_ctx(&dir_fp->readdir_data.ctx);
+- if (rc == -ENOSPC)
++ /*
++ * req->OutputBufferLength is too small to contain even one entry.
++ * In this case, it immediately returns OutputBufferLength 0 to client.
++ */
++ if (!d_info.out_buf_len && !d_info.num_entry)
++ goto no_buf_len;
++ if (rc > 0 || rc == -ENOSPC)
+ rc = 0;
+- if (rc)
++ else if (rc)
+ goto err_out;
+
+ d_info.wptr = d_info.rptr;
+@@ -3949,9 +4053,12 @@ int smb2_query_dir(struct ksmbd_work *work)
+ rsp->Buffer[0] = 0;
+ inc_rfc1001_len(rsp_org, 9);
+ } else {
++no_buf_len:
+ ((struct file_directory_info *)
+ ((char *)rsp->Buffer + d_info.last_entry_offset))
+ ->NextEntryOffset = 0;
++ if (d_info.data_count >= d_info.last_entry_off_align)
++ d_info.data_count -= d_info.last_entry_off_align;
+
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(72);
+@@ -3981,6 +4088,8 @@ err_out2:
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ else if (rc == -EFAULT)
+ rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
++ else if (rc == -EIO)
++ rsp->hdr.Status = STATUS_FILE_CORRUPT_ERROR;
+ if (!rsp->hdr.Status)
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+
+@@ -4180,7 +4289,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
+ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ name_len -= XATTR_USER_PREFIX_LEN;
+
+- ptr = (char *)(&eainfo->name + name_len + 1);
++ ptr = eainfo->name + name_len + 1;
+ buf_free_len -= (offsetof(struct smb2_ea_info, name) +
+ name_len + 1);
+ /* bailout if xattr can't fit in buf_free_len */
+@@ -4291,21 +4400,6 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
+ return 0;
+ }
+
+-static unsigned long long get_allocation_size(struct inode *inode,
+- struct kstat *stat)
+-{
+- unsigned long long alloc_size = 0;
+-
+- if (!S_ISDIR(stat->mode)) {
+- if ((inode->i_blocks << 9) <= stat->size)
+- alloc_size = stat->size;
+- else
+- alloc_size = inode->i_blocks << 9;
+- }
+-
+- return alloc_size;
+-}
+-
+ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+ {
+@@ -4320,7 +4414,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
+ sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
+ delete_pending = ksmbd_inode_pending_delete(fp);
+
+- sinfo->AllocationSize = cpu_to_le64(get_allocation_size(inode, &stat));
++ sinfo->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
+ sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
+ sinfo->DeletePending = delete_pending;
+@@ -4385,7 +4479,7 @@ static int get_file_all_info(struct ksmbd_work *work,
+ file_info->Attributes = fp->f_ci->m_fattr;
+ file_info->Pad1 = 0;
+ file_info->AllocationSize =
+- cpu_to_le64(get_allocation_size(inode, &stat));
++ cpu_to_le64(inode->i_blocks << 9);
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ file_info->NumberOfLinks =
+ cpu_to_le32(get_nlink(&stat) - delete_pending);
+@@ -4450,6 +4544,12 @@ static void get_file_stream_info(struct ksmbd_work *work,
+ &stat);
+ file_info = (struct smb2_file_stream_info *)rsp->Buffer;
+
++ buf_free_len =
++ smb2_calc_max_out_buf_len(work, 8,
++ le32_to_cpu(req->OutputBufferLength));
++ if (buf_free_len < 0)
++ goto out;
++
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+@@ -4458,12 +4558,6 @@ static void get_file_stream_info(struct ksmbd_work *work,
+ goto out;
+ }
+
+- buf_free_len =
+- smb2_calc_max_out_buf_len(work, 8,
+- le32_to_cpu(req->OutputBufferLength));
+- if (buf_free_len < 0)
+- goto out;
+-
+ while (idx < xattr_list_len) {
+ stream_name = xattr_list + idx;
+ streamlen = strlen(stream_name);
+@@ -4489,8 +4583,10 @@ static void get_file_stream_info(struct ksmbd_work *work,
+ ":%s", &stream_name[XATTR_NAME_STREAM_LEN]);
+
+ next = sizeof(struct smb2_file_stream_info) + streamlen * 2;
+- if (next > buf_free_len)
++ if (next > buf_free_len) {
++ kfree(stream_buf);
+ break;
++ }
+
+ file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes];
+ streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName,
+@@ -4507,6 +4603,7 @@ static void get_file_stream_info(struct ksmbd_work *work,
+ file_info->NextEntryOffset = cpu_to_le32(next);
+ }
+
++out:
+ if (!S_ISDIR(stat.mode) &&
+ buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) {
+ file_info = (struct smb2_file_stream_info *)
+@@ -4515,14 +4612,13 @@ static void get_file_stream_info(struct ksmbd_work *work,
+ "::$DATA", 7, conn->local_nls, 0);
+ streamlen *= 2;
+ file_info->StreamNameLength = cpu_to_le32(streamlen);
+- file_info->StreamSize = 0;
+- file_info->StreamAllocationSize = 0;
++ file_info->StreamSize = cpu_to_le64(stat.size);
++ file_info->StreamAllocationSize = cpu_to_le64(stat.blocks << 9);
+ nbytes += sizeof(struct smb2_file_stream_info) + streamlen;
+ }
+
+ /* last entry offset should be 0 */
+ file_info->NextEntryOffset = 0;
+-out:
+ kvfree(xattr_list);
+
+ rsp->OutputBufferLength = cpu_to_le32(nbytes);
+@@ -4572,7 +4668,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
+ file_info->ChangeTime = cpu_to_le64(time);
+ file_info->Attributes = fp->f_ci->m_fattr;
+ file_info->AllocationSize =
+- cpu_to_le64(get_allocation_size(inode, &stat));
++ cpu_to_le64(inode->i_blocks << 9);
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ file_info->Reserved = cpu_to_le32(0);
+ rsp->OutputBufferLength =
+@@ -4824,7 +4920,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+ {
+ struct ksmbd_session *sess = work->sess;
+- struct ksmbd_conn *conn = sess->conn;
++ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_share_config *share = work->tcon->share_conf;
+ int fsinfoclass = 0;
+ struct kstatfs stfs;
+@@ -4832,6 +4928,9 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
+ int rc = 0, len;
+ int fs_infoclass_size = 0;
+
++ if (!share->path)
++ return -EIO;
++
+ rc = kern_path(share->path, LOOKUP_NO_SYMLINKS, &path);
+ if (rc) {
+ pr_err("cannot create vfs path\n");
+@@ -4876,6 +4975,10 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
+
+ info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps);
+
++ if (test_share_config_flag(work->tcon->share_conf,
++ KSMBD_SHARE_FLAG_STREAMS))
++ info->Attributes |= cpu_to_le32(FILE_NAMED_STREAMS);
++
+ info->MaxPathNameComponentLength = cpu_to_le32(stfs.f_namelen);
+ len = smbConvertToUTF16((__le16 *)info->FileSystemName,
+ "NTFS", PATH_MAX, conn->local_nls, 0);
+@@ -4891,11 +4994,18 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
+ {
+ struct filesystem_vol_info *info;
+ size_t sz;
++ unsigned int serial_crc = 0;
+
+ info = (struct filesystem_vol_info *)(rsp->Buffer);
+ info->VolumeCreationTime = 0;
++ serial_crc = crc32_le(serial_crc, share->name,
++ strlen(share->name));
++ serial_crc = crc32_le(serial_crc, share->path,
++ strlen(share->path));
++ serial_crc = crc32_le(serial_crc, ksmbd_netbios_name(),
++ strlen(ksmbd_netbios_name()));
+ /* Taking dummy value of serial number*/
+- info->SerialNumber = cpu_to_le32(0xbc3ac512);
++ info->SerialNumber = cpu_to_le32(serial_crc);
+ len = smbConvertToUTF16((__le16 *)info->VolumeLabel,
+ share->name, PATH_MAX,
+ conn->local_nls, 0);
+@@ -4963,15 +5073,17 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
+ case FS_SECTOR_SIZE_INFORMATION:
+ {
+ struct smb3_fs_ss_info *info;
++ unsigned int sector_size =
++ min_t(unsigned int, path.mnt->mnt_sb->s_blocksize, 4096);
+
+ info = (struct smb3_fs_ss_info *)(rsp->Buffer);
+
+- info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize);
++ info->LogicalBytesPerSector = cpu_to_le32(sector_size);
+ info->PhysicalBytesPerSectorForAtomicity =
+- cpu_to_le32(stfs.f_bsize);
+- info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize);
++ cpu_to_le32(sector_size);
++ info->PhysicalBytesPerSectorForPerf = cpu_to_le32(sector_size);
+ info->FSEffPhysicalBytesPerSectorForAtomicity =
+- cpu_to_le32(stfs.f_bsize);
++ cpu_to_le32(sector_size);
+ info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE |
+ SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE);
+ info->ByteOffsetForSectorAlignment = 0;
+@@ -5045,15 +5157,15 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
+ struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
+ struct smb_fattr fattr = {{0}};
+ struct inode *inode;
+- __u32 secdesclen;
++ __u32 secdesclen = 0;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+ int addition_info = le32_to_cpu(req->AdditionalInformation);
+- int rc;
++ int rc = 0, ppntsd_size = 0;
+
+ if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+ PROTECTED_DACL_SECINFO |
+ UNPROTECTED_DACL_SECINFO)) {
+- pr_err("Unsupported addition info: 0x%x)\n",
++ ksmbd_debug(SMB, "Unsupported addition info: 0x%x)\n",
+ addition_info);
+
+ pntsd->revision = cpu_to_le16(1);
+@@ -5094,11 +5206,14 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR))
+- ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
+- fp->filp->f_path.dentry, &ppntsd);
+-
+- rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info,
+- &secdesclen, &fattr);
++ ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
++ fp->filp->f_path.dentry,
++ &ppntsd);
++
++ /* Check if sd buffer size exceeds response buffer size */
++ if (smb2_resp_buf_len(work, 8) > ppntsd_size)
++ rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size,
++ addition_info, &secdesclen, &fattr);
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+ kfree(ppntsd);
+@@ -5734,12 +5849,14 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
+ if (parent_fp) {
+ if (parent_fp->daccess & FILE_DELETE_LE) {
+ pr_err("parent dir is opened with delete access\n");
++ ksmbd_fd_put(work, parent_fp);
+ return -ESHARE;
+ }
++ ksmbd_fd_put(work, parent_fp);
+ }
+ next:
+ return smb2_rename(work, fp, user_ns, rename_info,
+- work->sess->conn->local_nls);
++ work->conn->local_nls);
+ }
+
+ static int set_file_disposition_info(struct ksmbd_file *fp,
+@@ -5873,7 +5990,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
+ return smb2_create_link(work, work->tcon->share_conf,
+ (struct smb2_file_link_info *)req->Buffer,
+ buf_len, fp->filp,
+- work->sess->conn->local_nls);
++ work->conn->local_nls);
+ }
+ case FILE_DISPOSITION_INFORMATION:
+ {
+@@ -6128,7 +6245,7 @@ int smb2_read(struct ksmbd_work *work)
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_read_req *req;
+ struct smb2_read_rsp *rsp, *rsp_org;
+- struct ksmbd_file *fp;
++ struct ksmbd_file *fp = NULL;
+ loff_t offset;
+ size_t length, mincount;
+ ssize_t nbytes = 0, remain_bytes = 0;
+@@ -6136,6 +6253,11 @@ int smb2_read(struct ksmbd_work *work)
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
++ if (work->next_smb2_rcv_hdr_off) {
++ work->send_no_response = 1;
++ err = -EOPNOTSUPP;
++ goto out;
++ }
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_PIPE)) {
+@@ -6427,10 +6549,8 @@ int smb2_write(struct ksmbd_work *work)
+ (offsetof(struct smb2_write_req, Buffer) - 4)) {
+ data_buf = (char *)&req->Buffer[0];
+ } else {
+- if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
+- pr_err("invalid write data offset %u, smb_len %u\n",
+- le16_to_cpu(req->DataOffset),
+- get_rfc1002_len(req));
++ if (le16_to_cpu(req->DataOffset) <
++ offsetof(struct smb2_write_req, Buffer)) {
+ err = -EINVAL;
+ goto out;
+ }
+@@ -7232,6 +7352,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
+ nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex);
+
+ nii_rsp->Capability = 0;
++ if (netdev->real_num_tx_queues > 1)
++ nii_rsp->Capability |= cpu_to_le32(RSS_CAPABLE);
+ if (ksmbd_rdma_capable_netdev(netdev))
+ nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE);
+
+@@ -7312,7 +7434,7 @@ static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn,
+ int ret = 0;
+ int dialect;
+
+- if (in_buf_len < sizeof(struct validate_negotiate_info_req) +
++ if (in_buf_len < offsetof(struct validate_negotiate_info_req, Dialects) +
+ le16_to_cpu(neg_req->DialectCount) * sizeof(__le16))
+ return -EINVAL;
+
+@@ -7359,13 +7481,16 @@ static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id,
+ if (in_count == 0)
+ return -EINVAL;
+
++ start = le64_to_cpu(qar_req->file_offset);
++ length = le64_to_cpu(qar_req->length);
++
++ if (start < 0 || length < 0)
++ return -EINVAL;
++
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp)
+ return -ENOENT;
+
+- start = le64_to_cpu(qar_req->file_offset);
+- length = le64_to_cpu(qar_req->length);
+-
+ ret = ksmbd_vfs_fqar_lseek(fp, start, length,
+ qar_rsp, in_count, out_count);
+ if (ret && ret != -E2BIG)
+@@ -7563,11 +7688,16 @@ int smb2_ioctl(struct ksmbd_work *work)
+ goto out;
+ }
+
+- if (in_buf_len < sizeof(struct validate_negotiate_info_req))
+- return -EINVAL;
++ if (in_buf_len < offsetof(struct validate_negotiate_info_req,
++ Dialects)) {
++ ret = -EINVAL;
++ goto out;
++ }
+
+- if (out_buf_len < sizeof(struct validate_negotiate_info_rsp))
+- return -EINVAL;
++ if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) {
++ ret = -EINVAL;
++ goto out;
++ }
+
+ ret = fsctl_validate_negotiate_info(conn,
+ (struct validate_negotiate_info_req *)&req->Buffer[0],
+@@ -7645,7 +7775,7 @@ int smb2_ioctl(struct ksmbd_work *work)
+ {
+ struct file_zero_data_information *zero_data;
+ struct ksmbd_file *fp;
+- loff_t off, len;
++ loff_t off, len, bfz;
+
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+@@ -7662,19 +7792,26 @@ int smb2_ioctl(struct ksmbd_work *work)
+ zero_data =
+ (struct file_zero_data_information *)&req->Buffer[0];
+
+- fp = ksmbd_lookup_fd_fast(work, id);
+- if (!fp) {
+- ret = -ENOENT;
++ off = le64_to_cpu(zero_data->FileOffset);
++ bfz = le64_to_cpu(zero_data->BeyondFinalZero);
++ if (off < 0 || bfz < 0 || off > bfz) {
++ ret = -EINVAL;
+ goto out;
+ }
+
+- off = le64_to_cpu(zero_data->FileOffset);
+- len = le64_to_cpu(zero_data->BeyondFinalZero) - off;
++ len = bfz - off;
++ if (len) {
++ fp = ksmbd_lookup_fd_fast(work, id);
++ if (!fp) {
++ ret = -ENOENT;
++ goto out;
++ }
+
+- ret = ksmbd_vfs_zero_data(work, fp, off, len);
+- ksmbd_fd_put(work, fp);
+- if (ret < 0)
+- goto out;
++ ret = ksmbd_vfs_zero_data(work, fp, off, len);
++ ksmbd_fd_put(work, fp);
++ if (ret < 0)
++ goto out;
++ }
+ break;
+ }
+ case FSCTL_QUERY_ALLOCATED_RANGES:
+@@ -7748,14 +7885,24 @@ int smb2_ioctl(struct ksmbd_work *work)
+ src_off = le64_to_cpu(dup_ext->SourceFileOffset);
+ dst_off = le64_to_cpu(dup_ext->TargetFileOffset);
+ length = le64_to_cpu(dup_ext->ByteCount);
+- cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp,
+- dst_off, length, 0);
++ /*
++ * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE
++ * should fall back to vfs_copy_file_range(). This could be
++ * beneficial when re-exporting nfs/smb mount, but note that
++ * this can result in partial copy that returns an error status.
++ * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented,
++ * fall back to vfs_copy_file_range(), should be avoided when
++ * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set.
++ */
++ cloned = vfs_clone_file_range(fp_in->filp, src_off,
++ fp_out->filp, dst_off, length, 0);
+ if (cloned == -EXDEV || cloned == -EOPNOTSUPP) {
+ ret = -EOPNOTSUPP;
+ goto dup_ext_out;
+ } else if (cloned != length) {
+ cloned = vfs_copy_file_range(fp_in->filp, src_off,
+- fp_out->filp, dst_off, length, 0);
++ fp_out->filp, dst_off,
++ length, 0);
+ if (cloned != length) {
+ if (cloned < 0)
+ ret = cloned;
+@@ -8283,10 +8430,14 @@ int smb3_check_sign_req(struct ksmbd_work *work)
+ if (le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
+ signing_key = work->sess->smb3signingkey;
+ } else {
++ read_lock(&work->sess->chann_lock);
+ chann = lookup_chann_list(work->sess, conn);
+- if (!chann)
++ if (!chann) {
++ read_unlock(&work->sess->chann_lock);
+ return 0;
++ }
+ signing_key = chann->smb3signingkey;
++ read_unlock(&work->sess->chann_lock);
+ }
+
+ if (!signing_key) {
+@@ -8346,10 +8497,14 @@ void smb3_set_sign_rsp(struct ksmbd_work *work)
+ le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
+ signing_key = work->sess->smb3signingkey;
+ } else {
++ read_lock(&work->sess->chann_lock);
+ chann = lookup_chann_list(work->sess, work->conn);
+- if (!chann)
++ if (!chann) {
++ read_unlock(&work->sess->chann_lock);
+ return;
++ }
+ signing_key = chann->smb3signingkey;
++ read_unlock(&work->sess->chann_lock);
+ }
+
+ if (!signing_key)
+@@ -8499,7 +8654,8 @@ int smb3_decrypt_req(struct ksmbd_work *work)
+ struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+ int rc = 0;
+
+- if (buf_data_size < sizeof(struct smb2_hdr)) {
++ if (pdu_length < sizeof(struct smb2_transform_hdr) ||
++ buf_data_size < sizeof(struct smb2_hdr)) {
+ pr_err("Transform message is too small (%u)\n",
+ pdu_length);
+ return -ECONNABORTED;
+@@ -8535,6 +8691,7 @@ int smb3_decrypt_req(struct ksmbd_work *work)
+ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
+ {
+ struct ksmbd_conn *conn = work->conn;
++ struct ksmbd_session *sess = work->sess;
+ struct smb2_hdr *rsp = work->response_buf;
+
+ if (conn->dialect < SMB30_PROT_ID)
+@@ -8544,6 +8701,7 @@ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
+ rsp = ksmbd_resp_buf_next(work);
+
+ if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE &&
++ sess->user && !user_guest(sess->user) &&
+ rsp->Status == STATUS_SUCCESS)
+ return true;
+ return false;
+diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
+index ff5a2f01d34ae..fa1cd556ab7ac 100644
+--- a/fs/ksmbd/smb2pdu.h
++++ b/fs/ksmbd/smb2pdu.h
+@@ -113,8 +113,9 @@
+ #define SMB21_DEFAULT_IOSIZE (1024 * 1024)
+ #define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
+ #define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024)
+-#define SMB3_MIN_IOSIZE (64 * 1024)
+-#define SMB3_MAX_IOSIZE (8 * 1024 * 1024)
++#define SMB3_MIN_IOSIZE (64 * 1024)
++#define SMB3_MAX_IOSIZE (8 * 1024 * 1024)
++#define SMB3_MAX_MSGSIZE (4 * 4096)
+
+ /*
+ * SMB2 Header Definition
+@@ -1566,7 +1567,7 @@ struct smb2_ea_info {
+ __u8 Flags;
+ __u8 EaNameLength;
+ __le16 EaValueLength;
+- char name[1];
++ char name[];
+ /* optionally followed by value */
+ } __packed; /* level 15 Query */
+
+@@ -1647,6 +1648,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn);
+ void init_smb2_max_read_size(unsigned int sz);
+ void init_smb2_max_write_size(unsigned int sz);
+ void init_smb2_max_trans_size(unsigned int sz);
++void init_smb2_max_credits(unsigned int sz);
+
+ bool is_smb2_neg_cmd(struct ksmbd_work *work);
+ bool is_smb2_rsp(struct ksmbd_work *work);
+diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
+index 707490ab1f4c4..af583e4266215 100644
+--- a/fs/ksmbd/smb_common.c
++++ b/fs/ksmbd/smb_common.c
+@@ -4,6 +4,8 @@
+ * Copyright (C) 2018 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
++#include <linux/user_namespace.h>
++
+ #include "smb_common.h"
+ #include "server.h"
+ #include "misc.h"
+@@ -140,8 +142,10 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work)
+
+ hdr = work->request_buf;
+ if (*(__le32 *)hdr->Protocol == SMB1_PROTO_NUMBER &&
+- hdr->Command == SMB_COM_NEGOTIATE)
++ hdr->Command == SMB_COM_NEGOTIATE) {
++ work->conn->outstanding_credits++;
+ return 0;
++ }
+
+ return -EINVAL;
+ }
+@@ -308,14 +312,17 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
+ for (i = 0; i < 2; i++) {
+ struct kstat kstat;
+ struct ksmbd_kstat ksmbd_kstat;
++ struct dentry *dentry;
+
+ if (!dir->dot_dotdot[i]) { /* fill dot entry info */
+ if (i == 0) {
+ d_info->name = ".";
+ d_info->name_len = 1;
++ dentry = dir->filp->f_path.dentry;
+ } else {
+ d_info->name = "..";
+ d_info->name_len = 2;
++ dentry = dir->filp->f_path.dentry->d_parent;
+ }
+
+ if (!match_pattern(d_info->name, d_info->name_len,
+@@ -327,7 +334,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
+ ksmbd_kstat.kstat = &kstat;
+ ksmbd_vfs_fill_dentry_attrs(work,
+ user_ns,
+- dir->filp->f_path.dentry->d_parent,
++ dentry,
+ &ksmbd_kstat);
+ rc = fn(conn, info_level, d_info, &ksmbd_kstat);
+ if (rc)
+@@ -416,7 +423,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname,
+ out[baselen + 3] = PERIOD;
+
+ if (dot_present)
+- memcpy(&out[baselen + 4], extension, 4);
++ memcpy(out + baselen + 4, extension, 4);
+ else
+ out[baselen + 4] = '\0';
+ smbConvertToUTF16((__le16 *)shortname, out, PATH_MAX,
+@@ -427,7 +434,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname,
+
+ static int __smb2_negotiate(struct ksmbd_conn *conn)
+ {
+- return (conn->dialect >= SMB21_PROT_ID &&
++ return (conn->dialect >= SMB20_PROT_ID &&
+ conn->dialect <= SMB311_PROT_ID);
+ }
+
+@@ -435,9 +442,26 @@ static int smb_handle_negotiate(struct ksmbd_work *work)
+ {
+ struct smb_negotiate_rsp *neg_rsp = work->response_buf;
+
+- ksmbd_debug(SMB, "Unsupported SMB protocol\n");
+- neg_rsp->hdr.Status.CifsError = STATUS_INVALID_LOGON_TYPE;
+- return -EINVAL;
++ ksmbd_debug(SMB, "Unsupported SMB1 protocol\n");
++
++ /*
++ * Remove 4 byte direct TCP header, add 2 byte bcc and
++ * 2 byte DialectIndex.
++ */
++ *(__be32 *)work->response_buf =
++ cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2 + 2);
++ neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS;
++
++ neg_rsp->hdr.Command = SMB_COM_NEGOTIATE;
++ *(__le32 *)neg_rsp->hdr.Protocol = SMB1_PROTO_NUMBER;
++ neg_rsp->hdr.Flags = SMBFLG_RESPONSE;
++ neg_rsp->hdr.Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS |
++ SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME;
++
++ neg_rsp->hdr.WordCount = 1;
++ neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect);
++ neg_rsp->ByteCount = 0;
++ return 0;
+ }
+
+ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command)
+@@ -457,7 +481,7 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command)
+ }
+ }
+
+- if (command == SMB2_NEGOTIATE_HE && __smb2_negotiate(conn)) {
++ if (command == SMB2_NEGOTIATE_HE) {
+ ret = smb2_handle_negotiate(work);
+ init_smb2_neg_rsp(work);
+ return ret;
+@@ -619,8 +643,8 @@ int ksmbd_override_fsids(struct ksmbd_work *work)
+ if (!cred)
+ return -ENOMEM;
+
+- cred->fsuid = make_kuid(current_user_ns(), uid);
+- cred->fsgid = make_kgid(current_user_ns(), gid);
++ cred->fsuid = make_kuid(&init_user_ns, uid);
++ cred->fsgid = make_kgid(&init_user_ns, gid);
+
+ gi = groups_alloc(0);
+ if (!gi) {
+diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
+index 6e79e7577f6b7..48cbaa0321400 100644
+--- a/fs/ksmbd/smb_common.h
++++ b/fs/ksmbd/smb_common.h
+@@ -205,8 +205,15 @@
+
+ #define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff)
+ #define SMB_COM_NEGOTIATE 0x72
+-
+ #define SMB1_CLIENT_GUID_SIZE (16)
++
++#define SMBFLG_RESPONSE 0x80 /* this PDU is a response from server */
++
++#define SMBFLG2_IS_LONG_NAME cpu_to_le16(0x40)
++#define SMBFLG2_EXT_SEC cpu_to_le16(0x800)
++#define SMBFLG2_ERR_STATUS cpu_to_le16(0x4000)
++#define SMBFLG2_UNICODE cpu_to_le16(0x8000)
++
+ struct smb_hdr {
+ __be32 smb_buf_length;
+ __u8 Protocol[4];
+@@ -246,28 +253,7 @@ struct smb_negotiate_req {
+ struct smb_negotiate_rsp {
+ struct smb_hdr hdr; /* wct = 17 */
+ __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */
+- __u8 SecurityMode;
+- __le16 MaxMpxCount;
+- __le16 MaxNumberVcs;
+- __le32 MaxBufferSize;
+- __le32 MaxRawSize;
+- __le32 SessionKey;
+- __le32 Capabilities; /* see below */
+- __le32 SystemTimeLow;
+- __le32 SystemTimeHigh;
+- __le16 ServerTimeZone;
+- __u8 EncryptionKeyLength;
+ __le16 ByteCount;
+- union {
+- unsigned char EncryptionKey[8]; /* cap extended security off */
+- /* followed by Domain name - if extended security is off */
+- /* followed by 16 bytes of server GUID */
+- /* then security blob if cap_extended_security negotiated */
+- struct {
+- unsigned char GUID[SMB1_CLIENT_GUID_SIZE];
+- unsigned char SecurityBlob[1];
+- } __packed extended_response;
+- } __packed u;
+ } __packed;
+
+ struct filesystem_attribute_info {
+@@ -412,6 +398,7 @@ struct smb_version_values {
+ __u32 max_read_size;
+ __u32 max_write_size;
+ __u32 max_trans_size;
++ __u32 max_credits;
+ __u32 large_lock_type;
+ __u32 exclusive_lock_type;
+ __u32 shared_lock_type;
+@@ -467,7 +454,7 @@ struct smb_version_ops {
+ int (*check_sign_req)(struct ksmbd_work *work);
+ void (*set_sign_rsp)(struct ksmbd_work *work);
+ int (*generate_signingkey)(struct ksmbd_session *sess, struct ksmbd_conn *conn);
+- int (*generate_encryptionkey)(struct ksmbd_session *sess);
++ int (*generate_encryptionkey)(struct ksmbd_conn *conn, struct ksmbd_session *sess);
+ bool (*is_transform_hdr)(void *buf);
+ int (*decrypt_req)(struct ksmbd_work *work);
+ int (*encrypt_resp)(struct ksmbd_work *work);
+diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
+index bd792db326239..3781bca2c8fc4 100644
+--- a/fs/ksmbd/smbacl.c
++++ b/fs/ksmbd/smbacl.c
+@@ -9,6 +9,7 @@
+ #include <linux/fs.h>
+ #include <linux/slab.h>
+ #include <linux/string.h>
++#include <linux/mnt_idmapping.h>
+
+ #include "smbacl.h"
+ #include "smb_common.h"
+@@ -274,14 +275,7 @@ static int sid_to_id(struct user_namespace *user_ns,
+ uid_t id;
+
+ id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
+- /*
+- * Translate raw sid into kuid in the server's user
+- * namespace.
+- */
+- uid = make_kuid(&init_user_ns, id);
+-
+- /* If this is an idmapped mount, apply the idmapping. */
+- uid = kuid_from_mnt(user_ns, uid);
++ uid = mapped_kuid_user(user_ns, &init_user_ns, KUIDT_INIT(id));
+ if (uid_valid(uid)) {
+ fattr->cf_uid = uid;
+ rc = 0;
+@@ -291,14 +285,7 @@ static int sid_to_id(struct user_namespace *user_ns,
+ gid_t id;
+
+ id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
+- /*
+- * Translate raw sid into kgid in the server's user
+- * namespace.
+- */
+- gid = make_kgid(&init_user_ns, id);
+-
+- /* If this is an idmapped mount, apply the idmapping. */
+- gid = kgid_from_mnt(user_ns, gid);
++ gid = mapped_kgid_user(user_ns, &init_user_ns, KGIDT_INIT(id));
+ if (gid_valid(gid)) {
+ fattr->cf_gid = gid;
+ rc = 0;
+@@ -703,6 +690,7 @@ posix_default_acl:
+ static void set_ntacl_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pndacl,
+ struct smb_acl *nt_dacl,
++ unsigned int aces_size,
+ const struct smb_sid *pownersid,
+ const struct smb_sid *pgrpsid,
+ struct smb_fattr *fattr)
+@@ -716,9 +704,19 @@ static void set_ntacl_dacl(struct user_namespace *user_ns,
+ if (nt_num_aces) {
+ ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl));
+ for (i = 0; i < nt_num_aces; i++) {
+- memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size));
+- size += le16_to_cpu(ntace->size);
+- ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size));
++ unsigned short nt_ace_size;
++
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++
++ nt_ace_size = le16_to_cpu(ntace->size);
++ if (nt_ace_size > aces_size)
++ break;
++
++ memcpy((char *)pndace + size, ntace, nt_ace_size);
++ size += nt_ace_size;
++ aces_size -= nt_ace_size;
++ ntace = (struct smb_ace *)((char *)ntace + nt_ace_size);
+ num_aces++;
+ }
+ }
+@@ -891,7 +889,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ /* Convert permission bits from mode to equivalent CIFS ACL */
+ int build_sec_desc(struct user_namespace *user_ns,
+ struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
+- int addition_info, __u32 *secdesclen,
++ int ppntsd_size, int addition_info, __u32 *secdesclen,
+ struct smb_fattr *fattr)
+ {
+ int rc = 0;
+@@ -951,15 +949,25 @@ int build_sec_desc(struct user_namespace *user_ns,
+
+ if (!ppntsd) {
+ set_mode_dacl(user_ns, dacl_ptr, fattr);
+- } else if (!ppntsd->dacloffset) {
+- goto out;
+ } else {
+ struct smb_acl *ppdacl_ptr;
++ unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset);
++ int ppdacl_size, ntacl_size = ppntsd_size - dacl_offset;
++
++ if (!dacl_offset ||
++ (dacl_offset + sizeof(struct smb_acl) > ppntsd_size))
++ goto out;
++
++ ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + dacl_offset);
++ ppdacl_size = le16_to_cpu(ppdacl_ptr->size);
++ if (ppdacl_size > ntacl_size ||
++ ppdacl_size < sizeof(struct smb_acl))
++ goto out;
+
+- ppdacl_ptr = (struct smb_acl *)((char *)ppntsd +
+- le32_to_cpu(ppntsd->dacloffset));
+ set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr,
+- nowner_sid_ptr, ngroup_sid_ptr, fattr);
++ ntacl_size - sizeof(struct smb_acl),
++ nowner_sid_ptr, ngroup_sid_ptr,
++ fattr);
+ }
+ pntsd->dacloffset = cpu_to_le32(offset);
+ offset += le16_to_cpu(dacl_ptr->size);
+@@ -993,24 +1001,31 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
+ struct smb_sid owner_sid, group_sid;
+ struct dentry *parent = path->dentry->d_parent;
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+- int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0;
+- int rc = 0, num_aces, dacloffset, pntsd_type, acl_len;
++ int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size;
++ int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size;
+ char *aces_base;
+ bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
+
+- acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+- parent, &parent_pntsd);
+- if (acl_len <= 0)
++ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++ parent, &parent_pntsd);
++ if (pntsd_size <= 0)
+ return -ENOENT;
+ dacloffset = le32_to_cpu(parent_pntsd->dacloffset);
+- if (!dacloffset) {
++ if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) {
+ rc = -EINVAL;
+ goto free_parent_pntsd;
+ }
+
+ parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset);
++ acl_len = pntsd_size - dacloffset;
+ num_aces = le32_to_cpu(parent_pdacl->num_aces);
+ pntsd_type = le16_to_cpu(parent_pntsd->type);
++ pdacl_size = le16_to_cpu(parent_pdacl->size);
++
++ if (pdacl_size > acl_len || pdacl_size < sizeof(struct smb_acl)) {
++ rc = -EINVAL;
++ goto free_parent_pntsd;
++ }
+
+ aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL);
+ if (!aces_base) {
+@@ -1021,11 +1036,23 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
+ aces = (struct smb_ace *)aces_base;
+ parent_aces = (struct smb_ace *)((char *)parent_pdacl +
+ sizeof(struct smb_acl));
++ aces_size = acl_len - sizeof(struct smb_acl);
+
+ if (pntsd_type & DACL_AUTO_INHERITED)
+ inherited_flags = INHERITED_ACE;
+
+ for (i = 0; i < num_aces; i++) {
++ int pace_size;
++
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++
++ pace_size = le16_to_cpu(parent_aces->size);
++ if (pace_size > aces_size)
++ break;
++
++ aces_size -= pace_size;
++
+ flags = parent_aces->flags;
+ if (!smb_inherit_flags(flags, is_dir))
+ goto pass;
+@@ -1070,8 +1097,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
+ aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+ ace_cnt++;
+ pass:
+- parent_aces =
+- (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size));
++ parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size);
+ }
+
+ if (nt_size > 0) {
+@@ -1166,7 +1192,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ struct smb_ntsd *pntsd = NULL;
+ struct smb_acl *pdacl;
+ struct posix_acl *posix_acls;
+- int rc = 0, acl_size;
++ int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset;
+ struct smb_sid sid;
+ int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE);
+ struct smb_ace *ace;
+@@ -1175,37 +1201,33 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ struct smb_ace *others_ace = NULL;
+ struct posix_acl_entry *pa_entry;
+ unsigned int sid_type = SIDOWNER;
+- char *end_of_acl;
++ unsigned short ace_size;
+
+ ksmbd_debug(SMB, "check permission using windows acl\n");
+- acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+- path->dentry, &pntsd);
+- if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) {
+- kfree(pntsd);
+- return 0;
+- }
++ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++ path->dentry, &pntsd);
++ if (pntsd_size <= 0 || !pntsd)
++ goto err_out;
++
++ dacl_offset = le32_to_cpu(pntsd->dacloffset);
++ if (!dacl_offset ||
++ (dacl_offset + sizeof(struct smb_acl) > pntsd_size))
++ goto err_out;
+
+ pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+- end_of_acl = ((char *)pntsd) + acl_size;
+- if (end_of_acl <= (char *)pdacl) {
+- kfree(pntsd);
+- return 0;
+- }
++ acl_size = pntsd_size - dacl_offset;
++ pdacl_size = le16_to_cpu(pdacl->size);
+
+- if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) ||
+- le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) {
+- kfree(pntsd);
+- return 0;
+- }
++ if (pdacl_size > acl_size || pdacl_size < sizeof(struct smb_acl))
++ goto err_out;
+
+ if (!pdacl->num_aces) {
+- if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) &&
++ if (!(pdacl_size - sizeof(struct smb_acl)) &&
+ *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) {
+ rc = -EACCES;
+ goto err_out;
+ }
+- kfree(pntsd);
+- return 0;
++ goto err_out;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) {
+@@ -1213,11 +1235,16 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ DELETE;
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++ aces_size = acl_size - sizeof(struct smb_acl);
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++ ace_size = le16_to_cpu(ace->size);
++ if (ace_size > aces_size)
++ break;
++ aces_size -= ace_size;
+ granted |= le32_to_cpu(ace->access_req);
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+- if (end_of_acl < (char *)ace)
+- goto err_out;
+ }
+
+ if (!pdacl->num_aces)
+@@ -1229,7 +1256,15 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ id_to_sid(uid, sid_type, &sid);
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++ aces_size = acl_size - sizeof(struct smb_acl);
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++ if (offsetof(struct smb_ace, access_req) > aces_size)
++ break;
++ ace_size = le16_to_cpu(ace->size);
++ if (ace_size > aces_size)
++ break;
++ aces_size -= ace_size;
++
+ if (!compare_sids(&sid, &ace->sid) ||
+ !compare_sids(&sid_unix_NFS_mode, &ace->sid)) {
+ found = 1;
+@@ -1239,8 +1274,6 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ others_ace = ace;
+
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+- if (end_of_acl < (char *)ace)
+- goto err_out;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) {
+@@ -1274,6 +1307,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ if (!access_bits)
+ access_bits =
+ SET_MINIMUM_RIGHTS;
++ posix_acl_release(posix_acls);
+ goto check_access_bits;
+ }
+ }
+diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
+index 73e08cad412bd..fcb2c83f29928 100644
+--- a/fs/ksmbd/smbacl.h
++++ b/fs/ksmbd/smbacl.h
+@@ -11,6 +11,7 @@
+ #include <linux/fs.h>
+ #include <linux/namei.h>
+ #include <linux/posix_acl.h>
++#include <linux/mnt_idmapping.h>
+
+ #include "mgmt/tree_connect.h"
+
+@@ -192,7 +193,7 @@ struct posix_acl_state {
+ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ int acl_len, struct smb_fattr *fattr);
+ int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+- struct smb_ntsd *ppntsd, int addition_info,
++ struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info,
+ __u32 *secdesclen, struct smb_fattr *fattr);
+ int init_acl_state(struct posix_acl_state *state, int cnt);
+ void free_acl_state(struct posix_acl_state *state);
+@@ -216,7 +217,7 @@ static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns,
+ kuid_t kuid;
+
+ /* If this is an idmapped mount, apply the idmapping. */
+- kuid = kuid_into_mnt(mnt_userns, pace->e_uid);
++ kuid = mapped_kuid_fs(mnt_userns, &init_user_ns, pace->e_uid);
+
+ /* Translate the kuid into a userspace id ksmbd would see. */
+ return from_kuid(&init_user_ns, kuid);
+@@ -228,7 +229,7 @@ static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns,
+ kgid_t kgid;
+
+ /* If this is an idmapped mount, apply the idmapping. */
+- kgid = kgid_into_mnt(mnt_userns, pace->e_gid);
++ kgid = mapped_kgid_fs(mnt_userns, &init_user_ns, pace->e_gid);
+
+ /* Translate the kgid into a userspace id ksmbd would see. */
+ return from_kgid(&init_user_ns, kgid);
+diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
+index 1acf1892a466c..a8313eed4f10d 100644
+--- a/fs/ksmbd/transport_ipc.c
++++ b/fs/ksmbd/transport_ipc.c
+@@ -26,6 +26,7 @@
+ #include "mgmt/ksmbd_ida.h"
+ #include "connection.h"
+ #include "transport_tcp.h"
++#include "transport_rdma.h"
+
+ #define IPC_WAIT_TIMEOUT (2 * HZ)
+
+@@ -301,6 +302,13 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
+ init_smb2_max_write_size(req->smb2_max_write);
+ if (req->smb2_max_trans)
+ init_smb2_max_trans_size(req->smb2_max_trans);
++ if (req->smb2_max_credits)
++ init_smb2_max_credits(req->smb2_max_credits);
++ if (req->smbd_max_io_size)
++ init_smbd_max_io_size(req->smbd_max_io_size);
++
++ if (req->max_connections)
++ server_conf.max_connections = req->max_connections;
+
+ ret = ksmbd_set_netbios_name(req->netbios_name);
+ ret |= ksmbd_set_server_string(req->server_string);
+diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
+index a2fd5a4d4cd5e..9ca29cdb7898f 100644
+--- a/fs/ksmbd/transport_rdma.c
++++ b/fs/ksmbd/transport_rdma.c
+@@ -75,7 +75,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
+ /* The maximum single-message size which can be received */
+ static int smb_direct_max_receive_size = 8192;
+
+-static int smb_direct_max_read_write_size = 1024 * 1024;
++static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
+
+ static int smb_direct_max_outstanding_rw_ops = 8;
+
+@@ -201,6 +201,12 @@ struct smb_direct_rdma_rw_msg {
+ struct scatterlist sg_list[0];
+ };
+
++void init_smbd_max_io_size(unsigned int sz)
++{
++ sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
++ smb_direct_max_read_write_size = sz;
++}
++
+ static inline int get_buf_page_count(void *buf, int size)
+ {
+ return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
+@@ -657,7 +663,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t,
+ }
+
+ static int smb_direct_read(struct ksmbd_transport *t, char *buf,
+- unsigned int size)
++ unsigned int size, int unused)
+ {
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_data_transfer *data_transfer;
+diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
+index 0fa8adc0776f2..04a7a37685c34 100644
+--- a/fs/ksmbd/transport_rdma.h
++++ b/fs/ksmbd/transport_rdma.h
+@@ -9,6 +9,10 @@
+
+ #define SMB_DIRECT_PORT 5445
+
++#define SMBD_DEFAULT_IOSIZE (8 * 1024 * 1024)
++#define SMBD_MIN_IOSIZE (512 * 1024)
++#define SMBD_MAX_IOSIZE (16 * 1024 * 1024)
++
+ /* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
+ struct smb_direct_negotiate_req {
+ __le16 min_version;
+@@ -54,10 +58,12 @@ struct smb_direct_data_transfer {
+ int ksmbd_rdma_init(void);
+ int ksmbd_rdma_destroy(void);
+ bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
++void init_smbd_max_io_size(unsigned int sz);
+ #else
+ static inline int ksmbd_rdma_init(void) { return 0; }
+ static inline int ksmbd_rdma_destroy(void) { return 0; }
+ static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
++static inline void init_smbd_max_io_size(unsigned int sz) { }
+ #endif
+
+ #endif /* __KSMBD_TRANSPORT_RDMA_H__ */
+diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
+index c14320e03b698..d1d7954368a56 100644
+--- a/fs/ksmbd/transport_tcp.c
++++ b/fs/ksmbd/transport_tcp.c
+@@ -15,6 +15,8 @@
+ #define IFACE_STATE_DOWN BIT(0)
+ #define IFACE_STATE_CONFIGURED BIT(1)
+
++static atomic_t active_num_conn;
++
+ struct interface {
+ struct task_struct *ksmbd_kthread;
+ struct socket *ksmbd_socket;
+@@ -185,8 +187,10 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk)
+ struct tcp_transport *t;
+
+ t = alloc_transport(client_sk);
+- if (!t)
++ if (!t) {
++ sock_release(client_sk);
+ return -ENOMEM;
++ }
+
+ csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn);
+ if (kernel_getpeername(client_sk, csin) < 0) {
+@@ -230,7 +234,7 @@ static int ksmbd_kthread_fn(void *p)
+ break;
+ }
+ ret = kernel_accept(iface->ksmbd_socket, &client_sk,
+- O_NONBLOCK);
++ SOCK_NONBLOCK);
+ mutex_unlock(&iface->sock_release_lock);
+ if (ret) {
+ if (ret == -EAGAIN)
+@@ -239,6 +243,15 @@ static int ksmbd_kthread_fn(void *p)
+ continue;
+ }
+
++ if (server_conf.max_connections &&
++ atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
++ pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
++ atomic_read(&active_num_conn));
++ atomic_dec(&active_num_conn);
++ sock_release(client_sk);
++ continue;
++ }
++
+ ksmbd_debug(CONN, "connect success: accepted new connection\n");
+ client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT;
+ client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT;
+@@ -278,16 +291,18 @@ static int ksmbd_tcp_run_kthread(struct interface *iface)
+
+ /**
+ * ksmbd_tcp_readv() - read data from socket in given iovec
+- * @t: TCP transport instance
+- * @iov_orig: base IO vector
+- * @nr_segs: number of segments in base iov
+- * @to_read: number of bytes to read from socket
++ * @t: TCP transport instance
++ * @iov_orig: base IO vector
++ * @nr_segs: number of segments in base iov
++ * @to_read: number of bytes to read from socket
++ * @max_retries: maximum retry count
+ *
+ * Return: on success return number of bytes read from socket,
+ * otherwise return error number
+ */
+ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
+- unsigned int nr_segs, unsigned int to_read)
++ unsigned int nr_segs, unsigned int to_read,
++ int max_retries)
+ {
+ int length = 0;
+ int total_read;
+@@ -322,11 +337,22 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
+ total_read = -EAGAIN;
+ break;
+ } else if (length == -ERESTARTSYS || length == -EAGAIN) {
++ /*
++ * If max_retries is negative, Allow unlimited
++ * retries to keep connection with inactive sessions.
++ */
++ if (max_retries == 0) {
++ total_read = length;
++ break;
++ } else if (max_retries > 0) {
++ max_retries--;
++ }
++
+ usleep_range(1000, 2000);
+ length = 0;
+ continue;
+ } else if (length <= 0) {
+- total_read = -EAGAIN;
++ total_read = length;
+ break;
+ }
+ }
+@@ -342,14 +368,15 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
+ * Return: on success return number of bytes read from socket,
+ * otherwise return error number
+ */
+-static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, unsigned int to_read)
++static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf,
++ unsigned int to_read, int max_retries)
+ {
+ struct kvec iov;
+
+ iov.iov_base = buf;
+ iov.iov_len = to_read;
+
+- return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read);
++ return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read, max_retries);
+ }
+
+ static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov,
+@@ -365,6 +392,8 @@ static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov,
+ static void ksmbd_tcp_disconnect(struct ksmbd_transport *t)
+ {
+ free_transport(TCP_TRANS(t));
++ if (server_conf.max_connections)
++ atomic_dec(&active_num_conn);
+ }
+
+ static void tcp_destroy_socket(struct socket *ksmbd_socket)
+@@ -404,7 +433,7 @@ static int create_socket(struct interface *iface)
+ &ksmbd_socket);
+ if (ret) {
+ pr_err("Can't create socket for ipv4: %d\n", ret);
+- goto out_error;
++ goto out_clear;
+ }
+
+ sin.sin_family = PF_INET;
+@@ -462,6 +491,7 @@ static int create_socket(struct interface *iface)
+
+ out_error:
+ tcp_destroy_socket(ksmbd_socket);
++out_clear:
+ iface->ksmbd_socket = NULL;
+ return ret;
+ }
+diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
+index 835b384b08959..f76acd83c2944 100644
+--- a/fs/ksmbd/vfs.c
++++ b/fs/ksmbd/vfs.c
+@@ -19,8 +19,6 @@
+ #include <linux/sched/xacct.h>
+ #include <linux/crc32c.h>
+
+-#include "../internal.h" /* for vfs_path_lookup */
+-
+ #include "glob.h"
+ #include "oplock.h"
+ #include "connection.h"
+@@ -483,12 +481,11 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf, size_t count, loff_t *pos, bool sync,
+ ssize_t *written)
+ {
+- struct ksmbd_session *sess = work->sess;
+ struct file *filp;
+ loff_t offset = *pos;
+ int err = 0;
+
+- if (sess->conn->connection_type) {
++ if (work->conn->connection_type) {
+ if (!(fp->daccess & FILE_WRITE_DATA_LE)) {
+ pr_err("no right to write(%pd)\n",
+ fp->filp->f_path.dentry);
+@@ -1018,7 +1015,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ off, len);
+
+- return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len);
++ return vfs_fallocate(fp->filp,
++ FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE,
++ off, len);
+ }
+
+ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
+@@ -1049,7 +1048,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
+ *out_count = 0;
+ end = start + length;
+ while (start < end && *out_count < in_count) {
+- extent_start = f->f_op->llseek(f, start, SEEK_DATA);
++ extent_start = vfs_llseek(f, start, SEEK_DATA);
+ if (extent_start < 0) {
+ if (extent_start != -ENXIO)
+ ret = (int)extent_start;
+@@ -1059,7 +1058,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
+ if (extent_start >= end)
+ break;
+
+- extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE);
++ extent_end = vfs_llseek(f, extent_start, SEEK_HOLE);
+ if (extent_end < 0) {
+ if (extent_end != -ENXIO)
+ ret = (int)extent_end;
+@@ -1541,6 +1540,11 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
+ }
+
+ *pntsd = acl.sd_buf;
++ if (acl.sd_size < sizeof(struct smb_ntsd)) {
++ pr_err("sd size is invalid\n");
++ goto out_free;
++ }
++
+ (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) -
+ NDR_NTSD_OFFSETOF);
+ (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) -
+@@ -1780,6 +1784,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
+
+ ret = vfs_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off, len, 0);
++ if (ret == -EOPNOTSUPP || ret == -EXDEV)
++ ret = vfs_copy_file_range(src_fp->filp, src_off,
++ dst_fp->filp, dst_off, len,
++ COPY_FILE_SPLICE);
+ if (ret < 0)
+ return ret;
+
+diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
+index b0d5b8feb4a36..432c947731779 100644
+--- a/fs/ksmbd/vfs.h
++++ b/fs/ksmbd/vfs.h
+@@ -86,6 +86,7 @@ struct ksmbd_dir_info {
+ int last_entry_offset;
+ bool hide_dot_file;
+ int flags;
++ int last_entry_off_align;
+ };
+
+ struct ksmbd_readdir_data {
+diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
+index 29c1db66bd0f7..0df8467af39af 100644
+--- a/fs/ksmbd/vfs_cache.c
++++ b/fs/ksmbd/vfs_cache.c
+@@ -497,6 +497,7 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
+ list_for_each_entry(lfp, &ci->m_fp_list, node) {
+ if (inode == file_inode(lfp->filp)) {
+ atomic_dec(&ci->m_count);
++ lfp = ksmbd_fp_get(lfp);
+ read_unlock(&ci->m_lock);
+ return lfp;
+ }
+@@ -569,7 +570,7 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp)
+ atomic_set(&fp->refcount, 1);
+
+ fp->filp = filp;
+- fp->conn = work->sess->conn;
++ fp->conn = work->conn;
+ fp->tcon = work->tcon;
+ fp->volatile_id = KSMBD_NO_FID;
+ fp->persistent_id = KSMBD_NO_FID;
+diff --git a/fs/libfs.c b/fs/libfs.c
+index 51b4de3b3447f..7bb5d90319cc6 100644
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -967,8 +967,8 @@ out:
+ EXPORT_SYMBOL_GPL(simple_attr_read);
+
+ /* interpret the buffer as a number to call the set function with */
+-ssize_t simple_attr_write(struct file *file, const char __user *buf,
+- size_t len, loff_t *ppos)
++static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos, bool is_signed)
+ {
+ struct simple_attr *attr;
+ unsigned long long val;
+@@ -989,7 +989,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
+ goto out;
+
+ attr->set_buf[size] = '\0';
+- ret = kstrtoull(attr->set_buf, 0, &val);
++ if (is_signed)
++ ret = kstrtoll(attr->set_buf, 0, &val);
++ else
++ ret = kstrtoull(attr->set_buf, 0, &val);
+ if (ret)
+ goto out;
+ ret = attr->set(attr->data, val);
+@@ -999,8 +1002,21 @@ out:
+ mutex_unlock(&attr->mutex);
+ return ret;
+ }
++
++ssize_t simple_attr_write(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos)
++{
++ return simple_attr_write_xsigned(file, buf, len, ppos, false);
++}
+ EXPORT_SYMBOL_GPL(simple_attr_write);
+
++ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos)
++{
++ return simple_attr_write_xsigned(file, buf, len, ppos, true);
++}
++EXPORT_SYMBOL_GPL(simple_attr_write_signed);
++
+ /**
+ * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
+ * @sb: filesystem to do the file handle conversion on
+diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
+index 7df6324ccb8ab..8161667c976f8 100644
+--- a/fs/lockd/clnt4xdr.c
++++ b/fs/lockd/clnt4xdr.c
+@@ -261,7 +261,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
+ u32 exclusive;
+ int error;
+ __be32 *p;
+- s32 end;
+
+ memset(lock, 0, sizeof(*lock));
+ locks_init_lock(fl);
+@@ -285,13 +284,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
+ fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
+ p = xdr_decode_hyper(p, &l_offset);
+ xdr_decode_hyper(p, &l_len);
+- end = l_offset + l_len - 1;
+-
+- fl->fl_start = (loff_t)l_offset;
+- if (l_len == 0 || end < 0)
+- fl->fl_end = OFFSET_MAX;
+- else
+- fl->fl_end = (loff_t)end;
++ nlm4svc_set_file_lock_range(fl, l_offset, l_len);
+ error = 0;
+ out:
+ return error;
+diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
+index 1d9488cf05348..87a0f207df0b9 100644
+--- a/fs/lockd/mon.c
++++ b/fs/lockd/mon.c
+@@ -276,6 +276,9 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
+ {
+ struct nsm_handle *new;
+
++ if (!hostname)
++ return NULL;
++
+ new = kzalloc(sizeof(*new) + hostname_len + 1, GFP_KERNEL);
+ if (unlikely(new == NULL))
+ return NULL;
+diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
+index e10ae2c41279e..1c9214801e69e 100644
+--- a/fs/lockd/svc4proc.c
++++ b/fs/lockd/svc4proc.c
+@@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
+ if (!nlmsvc_ops)
+ return nlm_lck_denied_nolocks;
+
++ if (lock->lock_start > OFFSET_MAX ||
++ (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start))))
++ return nlm4_fbig;
++
+ /* Obtain host handle */
+ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
+ || (argp->monitor && nsm_monitor(host) < 0))
+@@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
+ /* Set up the missing parts of the file_lock structure */
+ lock->fl.fl_file = file->f_file[mode];
+ lock->fl.fl_pid = current->tgid;
++ lock->fl.fl_start = (loff_t)lock->lock_start;
++ lock->fl.fl_end = lock->lock_len ?
++ (loff_t)(lock->lock_start + lock->lock_len - 1) :
++ OFFSET_MAX;
+ lock->fl.fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
+ if (!lock->fl.fl_owner) {
+diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
+index cb3a7512c33ec..3515f17eaf3fb 100644
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -176,22 +176,28 @@ nlm_delete_file(struct nlm_file *file)
+ }
+ }
+
+-static int nlm_unlock_files(struct nlm_file *file)
++static int nlm_unlock_files(struct nlm_file *file, const struct file_lock *fl)
+ {
+ struct file_lock lock;
+- struct file *f;
+
++ locks_init_lock(&lock);
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
+- for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
+- if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
+- pr_warn("lockd: unlock failure in %s:%d\n",
+- __FILE__, __LINE__);
+- return 1;
+- }
+- }
++ lock.fl_owner = fl->fl_owner;
++ lock.fl_pid = fl->fl_pid;
++ lock.fl_flags = FL_POSIX;
++
++ lock.fl_file = file->f_file[O_RDONLY];
++ if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL))
++ goto out_err;
++ lock.fl_file = file->f_file[O_WRONLY];
++ if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL))
++ goto out_err;
+ return 0;
++out_err:
++ pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__);
++ return 1;
+ }
+
+ /*
+@@ -223,7 +229,7 @@ again:
+ if (match(lockhost, host)) {
+
+ spin_unlock(&flctx->flc_lock);
+- if (nlm_unlock_files(file))
++ if (nlm_unlock_files(file, fl))
+ return 1;
+ goto again;
+ }
+@@ -280,11 +286,10 @@ nlm_file_inuse(struct nlm_file *file)
+
+ static void nlm_close_files(struct nlm_file *file)
+ {
+- struct file *f;
+-
+- for (f = file->f_file[0]; f <= file->f_file[1]; f++)
+- if (f)
+- nlmsvc_ops->fclose(f);
++ if (file->f_file[O_RDONLY])
++ nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
++ if (file->f_file[O_WRONLY])
++ nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
+ }
+
+ /*
+diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
+index 98e957e4566c2..b303ecd74f330 100644
+--- a/fs/lockd/xdr4.c
++++ b/fs/lockd/xdr4.c
+@@ -20,13 +20,6 @@
+
+ #include "svcxdr.h"
+
+-static inline loff_t
+-s64_to_loff_t(__s64 offset)
+-{
+- return (loff_t)offset;
+-}
+-
+-
+ static inline s64
+ loff_t_to_s64(loff_t offset)
+ {
+@@ -40,6 +33,17 @@ loff_t_to_s64(loff_t offset)
+ return res;
+ }
+
++void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len)
++{
++ s64 end = off + len - 1;
++
++ fl->fl_start = off;
++ if (len == 0 || end < 0)
++ fl->fl_end = OFFSET_MAX;
++ else
++ fl->fl_end = end;
++}
++
+ /*
+ * NLM file handles are defined by specification to be a variable-length
+ * XDR opaque no longer than 1024 bytes. However, this implementation
+@@ -70,8 +74,6 @@ static bool
+ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
+ {
+ struct file_lock *fl = &lock->fl;
+- u64 len, start;
+- s64 end;
+
+ if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
+ return false;
+@@ -81,21 +83,15 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &lock->svid) < 0)
+ return false;
+- if (xdr_stream_decode_u64(xdr, &start) < 0)
++ if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0)
+ return false;
+- if (xdr_stream_decode_u64(xdr, &len) < 0)
++ if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0)
+ return false;
+
+ locks_init_lock(fl);
+ fl->fl_flags = FL_POSIX;
+ fl->fl_type = F_RDLCK;
+- end = start + len - 1;
+- fl->fl_start = s64_to_loff_t(start);
+- if (len == 0 || end < 0)
+- fl->fl_end = OFFSET_MAX;
+- else
+- fl->fl_end = s64_to_loff_t(end);
+-
++ nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len);
+ return true;
+ }
+
+diff --git a/fs/locks.c b/fs/locks.c
+index 3d6fb4ae847b4..881fd16905c61 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1901,9 +1901,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
+ void **priv)
+ {
+ struct inode *inode = locks_inode(filp);
++ kuid_t uid = i_uid_into_mnt(file_mnt_user_ns(filp), inode);
+ int error;
+
+- if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
++ if ((!uid_eq(current_fsuid(), uid)) && !capable(CAP_LEASE))
+ return -EACCES;
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+@@ -2703,6 +2704,29 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+ }
+ EXPORT_SYMBOL_GPL(vfs_cancel_lock);
+
++/**
++ * vfs_inode_has_locks - are any file locks held on @inode?
++ * @inode: inode to check for locks
++ *
++ * Return true if there are any FL_POSIX or FL_FLOCK locks currently
++ * set on @inode.
++ */
++bool vfs_inode_has_locks(struct inode *inode)
++{
++ struct file_lock_context *ctx;
++ bool ret;
++
++ ctx = smp_load_acquire(&inode->i_flctx);
++ if (!ctx)
++ return false;
++
++ spin_lock(&ctx->flc_lock);
++ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
++ spin_unlock(&ctx->flc_lock);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
++
+ #ifdef CONFIG_PROC_FS
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 97c54d3a22276..95b047256d093 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -11,7 +11,7 @@
+ /*
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+- * mb_cache_entry_delete()).
++ * mb_cache_entry_delete_or_get()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * Ext4 also uses it for deduplication of xattr values stored in inodes.
+@@ -90,12 +90,19 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&entry->e_list);
+- /* One ref for hash, one ref returned */
+- atomic_set(&entry->e_refcnt, 1);
++ /*
++ * We create entry with two references. One reference is kept by the
++ * hash table, the other reference is used to protect us from
++ * mb_cache_entry_delete_or_get() until the entry is fully setup. This
++ * avoids nesting of cache->c_list_lock into hash table bit locks which
++ * is problematic for RT.
++ */
++ atomic_set(&entry->e_refcnt, 2);
+ entry->e_key = key;
+ entry->e_value = value;
+- entry->e_reusable = reusable;
+- entry->e_referenced = 0;
++ entry->e_flags = 0;
++ if (reusable)
++ set_bit(MBE_REUSABLE_B, &entry->e_flags);
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+@@ -107,24 +114,41 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ }
+ hlist_bl_add_head(&entry->e_hash_list, head);
+ hlist_bl_unlock(head);
+-
+ spin_lock(&cache->c_list_lock);
+ list_add_tail(&entry->e_list, &cache->c_list);
+- /* Grab ref for LRU list */
+- atomic_inc(&entry->e_refcnt);
+ cache->c_entry_count++;
+ spin_unlock(&cache->c_list_lock);
++ mb_cache_entry_put(cache, entry);
+
+ return 0;
+ }
+ EXPORT_SYMBOL(mb_cache_entry_create);
+
+-void __mb_cache_entry_free(struct mb_cache_entry *entry)
++void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
+ {
++ struct hlist_bl_head *head;
++
++ head = mb_cache_entry_head(cache, entry->e_key);
++ hlist_bl_lock(head);
++ hlist_bl_del(&entry->e_hash_list);
++ hlist_bl_unlock(head);
+ kmem_cache_free(mb_entry_cache, entry);
+ }
+ EXPORT_SYMBOL(__mb_cache_entry_free);
+
++/*
++ * mb_cache_entry_wait_unused - wait to be the last user of the entry
++ *
++ * @entry - entry to work on
++ *
++ * Wait to be the last user of the entry.
++ */
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
++{
++ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
++}
++EXPORT_SYMBOL(mb_cache_entry_wait_unused);
++
+ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+ struct mb_cache_entry *entry,
+ u32 key)
+@@ -142,10 +166,10 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+ while (node) {
+ entry = hlist_bl_entry(node, struct mb_cache_entry,
+ e_hash_list);
+- if (entry->e_key == key && entry->e_reusable) {
+- atomic_inc(&entry->e_refcnt);
++ if (entry->e_key == key &&
++ test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
++ atomic_inc_not_zero(&entry->e_refcnt))
+ goto out;
+- }
+ node = node->next;
+ }
+ entry = NULL;
+@@ -205,10 +229,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+- if (entry->e_key == key && entry->e_value == value) {
+- atomic_inc(&entry->e_refcnt);
++ if (entry->e_key == key && entry->e_value == value &&
++ atomic_inc_not_zero(&entry->e_refcnt))
+ goto out;
+- }
+ }
+ entry = NULL;
+ out:
+@@ -217,7 +240,7 @@ out:
+ }
+ EXPORT_SYMBOL(mb_cache_entry_get);
+
+-/* mb_cache_entry_delete - remove a cache entry
++/* mb_cache_entry_delete - try to remove a cache entry
+ * @cache - cache we work with
+ * @key - key
+ * @value - value
+@@ -254,6 +277,43 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
+ }
+ EXPORT_SYMBOL(mb_cache_entry_delete);
+
++/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
++ * @cache - cache we work with
++ * @key - key
++ * @value - value
++ *
++ * Remove entry from cache @cache with key @key and value @value. The removal
++ * happens only if the entry is unused. The function returns NULL in case the
++ * entry was successfully removed or there's no entry in cache. Otherwise the
++ * function grabs reference of the entry that we failed to delete because it
++ * still has users and return it.
++ */
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++ u32 key, u64 value)
++{
++ struct mb_cache_entry *entry;
++
++ entry = mb_cache_entry_get(cache, key, value);
++ if (!entry)
++ return NULL;
++
++ /*
++ * Drop the ref we got from mb_cache_entry_get() and the initial hash
++ * ref if we are the last user
++ */
++ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
++ return entry;
++
++ spin_lock(&cache->c_list_lock);
++ if (!list_empty(&entry->e_list))
++ list_del_init(&entry->e_list);
++ cache->c_entry_count--;
++ spin_unlock(&cache->c_list_lock);
++ __mb_cache_entry_free(cache, entry);
++ return NULL;
++}
++EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
++
+ /* mb_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
+@@ -263,7 +323,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete);
+ void mb_cache_entry_touch(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
+ {
+- entry->e_referenced = 1;
++ set_bit(MBE_REFERENCED_B, &entry->e_flags);
+ }
+ EXPORT_SYMBOL(mb_cache_entry_touch);
+
+@@ -281,34 +341,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ unsigned long nr_to_scan)
+ {
+ struct mb_cache_entry *entry;
+- struct hlist_bl_head *head;
+ unsigned long shrunk = 0;
+
+ spin_lock(&cache->c_list_lock);
+ while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+ entry = list_first_entry(&cache->c_list,
+ struct mb_cache_entry, e_list);
+- if (entry->e_referenced) {
+- entry->e_referenced = 0;
++ /* Drop initial hash reference if there is no user */
++ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
++ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
++ clear_bit(MBE_REFERENCED_B, &entry->e_flags);
+ list_move_tail(&entry->e_list, &cache->c_list);
+ continue;
+ }
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+- /*
+- * We keep LRU list reference so that entry doesn't go away
+- * from under us.
+- */
+ spin_unlock(&cache->c_list_lock);
+- head = mb_cache_entry_head(cache, entry->e_key);
+- hlist_bl_lock(head);
+- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+- hlist_bl_del_init(&entry->e_hash_list);
+- atomic_dec(&entry->e_refcnt);
+- }
+- hlist_bl_unlock(head);
+- if (mb_cache_entry_put(cache, entry))
+- shrunk++;
++ __mb_cache_entry_free(cache, entry);
++ shrunk++;
+ cond_resched();
+ spin_lock(&cache->c_list_lock);
+ }
+@@ -400,11 +450,6 @@ void mb_cache_destroy(struct mb_cache *cache)
+ * point.
+ */
+ list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
+- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+- hlist_bl_del_init(&entry->e_hash_list);
+- atomic_dec(&entry->e_refcnt);
+- } else
+- WARN_ON(1);
+ list_del(&entry->e_list);
+ WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+ mb_cache_entry_put(cache, entry);
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index a71f1cf894b9f..d4bd94234ef73 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -447,7 +447,8 @@ static const struct address_space_operations minix_aops = {
+ .writepage = minix_writepage,
+ .write_begin = minix_write_begin,
+ .write_end = generic_write_end,
+- .bmap = minix_bmap
++ .bmap = minix_bmap,
++ .direct_IO = noop_direct_IO
+ };
+
+ static const struct inode_operations minix_symlink_inode_operations = {
+diff --git a/fs/namei.c b/fs/namei.c
+index 1946d96677908..ea2785103376e 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1461,6 +1461,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
+ * becoming unpinned.
+ */
+ flags = dentry->d_flags;
++ if (read_seqretry(&mount_lock, nd->m_seq))
++ return false;
+ continue;
+ }
+ if (read_seqretry(&mount_lock, nd->m_seq))
+@@ -2718,7 +2720,8 @@ struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
+ EXPORT_SYMBOL(lookup_one);
+
+ /**
+- * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
++ * lookup_one_unlocked - filesystem helper to lookup single pathname component
++ * @mnt_userns: idmapping of the mount the lookup is performed from
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+@@ -2729,14 +2732,15 @@ EXPORT_SYMBOL(lookup_one);
+ * Unlike lookup_one_len, it should be called without the parent
+ * i_mutex held, and will take the i_mutex itself if necessary.
+ */
+-struct dentry *lookup_one_len_unlocked(const char *name,
+- struct dentry *base, int len)
++struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns,
++ const char *name, struct dentry *base,
++ int len)
+ {
+ struct qstr this;
+ int err;
+ struct dentry *ret;
+
+- err = lookup_one_common(&init_user_ns, name, base, len, &this);
++ err = lookup_one_common(mnt_userns, name, base, len, &this);
+ if (err)
+ return ERR_PTR(err);
+
+@@ -2745,6 +2749,59 @@ struct dentry *lookup_one_len_unlocked(const char *name,
+ ret = lookup_slow(&this, base, 0);
+ return ret;
+ }
++EXPORT_SYMBOL(lookup_one_unlocked);
++
++/**
++ * lookup_one_positive_unlocked - filesystem helper to lookup single
++ * pathname component
++ * @mnt_userns: idmapping of the mount the lookup is performed from
++ * @name: pathname component to lookup
++ * @base: base directory to lookup from
++ * @len: maximum length @len should be interpreted to
++ *
++ * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns
++ * known positive or ERR_PTR(). This is what most of the users want.
++ *
++ * Note that pinned negative with unlocked parent _can_ become positive at any
++ * time, so callers of lookup_one_unlocked() need to be very careful; pinned
++ * positives have >d_inode stable, so this one avoids such problems.
++ *
++ * Note that this routine is purely a helper for filesystem usage and should
++ * not be called by generic code.
++ *
++ * The helper should be called without i_mutex held.
++ */
++struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns,
++ const char *name,
++ struct dentry *base, int len)
++{
++ struct dentry *ret = lookup_one_unlocked(mnt_userns, name, base, len);
++
++ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
++ dput(ret);
++ ret = ERR_PTR(-ENOENT);
++ }
++ return ret;
++}
++EXPORT_SYMBOL(lookup_one_positive_unlocked);
++
++/**
++ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
++ * @name: pathname component to lookup
++ * @base: base directory to lookup from
++ * @len: maximum length @len should be interpreted to
++ *
++ * Note that this routine is purely a helper for filesystem usage and should
++ * not be called by generic code.
++ *
++ * Unlike lookup_one_len, it should be called without the parent
++ * i_mutex held, and will take the i_mutex itself if necessary.
++ */
++struct dentry *lookup_one_len_unlocked(const char *name,
++ struct dentry *base, int len)
++{
++ return lookup_one_unlocked(&init_user_ns, name, base, len);
++}
+ EXPORT_SYMBOL(lookup_one_len_unlocked);
+
+ /*
+@@ -2758,12 +2815,7 @@ EXPORT_SYMBOL(lookup_one_len_unlocked);
+ struct dentry *lookup_positive_unlocked(const char *name,
+ struct dentry *base, int len)
+ {
+- struct dentry *ret = lookup_one_len_unlocked(name, base, len);
+- if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+- dput(ret);
+- ret = ERR_PTR(-ENOENT);
+- }
+- return ret;
++ return lookup_one_positive_unlocked(&init_user_ns, name, base, len);
+ }
+ EXPORT_SYMBOL(lookup_positive_unlocked);
+
+@@ -2784,7 +2836,7 @@ int path_pts(struct path *path)
+ dput(path->dentry);
+ path->dentry = parent;
+ child = d_hash_and_lookup(parent, &this);
+- if (!child)
++ if (IS_ERR_OR_NULL(child))
+ return -ENOENT;
+
+ path->dentry = child;
+@@ -2932,8 +2984,8 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
+ return p;
+ }
+
+- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+- inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
++ lock_two_inodes(p1->d_inode, p2->d_inode,
++ I_MUTEX_PARENT, I_MUTEX_PARENT2);
+ return NULL;
+ }
+ EXPORT_SYMBOL(lock_rename);
+@@ -2948,6 +3000,65 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
+ }
+ EXPORT_SYMBOL(unlock_rename);
+
++/**
++ * mode_strip_umask - handle vfs umask stripping
++ * @dir: parent directory of the new inode
++ * @mode: mode of the new inode to be created in @dir
++ *
++ * Umask stripping depends on whether or not the filesystem supports POSIX
++ * ACLs. If the filesystem doesn't support it umask stripping is done directly
++ * in here. If the filesystem does support POSIX ACLs umask stripping is
++ * deferred until the filesystem calls posix_acl_create().
++ *
++ * Returns: mode
++ */
++static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
++{
++ if (!IS_POSIXACL(dir))
++ mode &= ~current_umask();
++ return mode;
++}
++
++/**
++ * vfs_prepare_mode - prepare the mode to be used for a new inode
++ * @mnt_userns: user namespace of the mount the inode was found from
++ * @dir: parent directory of the new inode
++ * @mode: mode of the new inode
++ * @mask_perms: allowed permission by the vfs
++ * @type: type of file to be created
++ *
++ * This helper consolidates and enforces vfs restrictions on the @mode of a new
++ * object to be created.
++ *
++ * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
++ * the kernel documentation for mode_strip_umask()). Moving umask stripping
++ * after setgid stripping allows the same ordering for both non-POSIX ACL and
++ * POSIX ACL supporting filesystems.
++ *
++ * Note that it's currently valid for @type to be 0 if a directory is created.
++ * Filesystems raise that flag individually and we need to check whether each
++ * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
++ * non-zero type.
++ *
++ * Returns: mode to be passed to the filesystem
++ */
++static inline umode_t vfs_prepare_mode(struct user_namespace *mnt_userns,
++ const struct inode *dir, umode_t mode,
++ umode_t mask_perms, umode_t type)
++{
++ mode = mode_strip_sgid(mnt_userns, dir, mode);
++ mode = mode_strip_umask(dir, mode);
++
++ /*
++ * Apply the vfs mandated allowed permission mask and set the type of
++ * file to be created before we call into the filesystem.
++ */
++ mode &= (mask_perms & ~S_IFMT);
++ mode |= (type & S_IFMT);
++
++ return mode;
++}
++
+ /**
+ * vfs_create - create new file
+ * @mnt_userns: user namespace of the mount the inode was found from
+@@ -2973,8 +3084,8 @@ int vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+
+ if (!dir->i_op->create)
+ return -EACCES; /* shouldn't it be ENOSYS? */
+- mode &= S_IALLUGO;
+- mode |= S_IFREG;
++
++ mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IALLUGO, S_IFREG);
+ error = security_inode_create(dir, dentry, mode);
+ if (error)
+ return error;
+@@ -3239,8 +3350,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
+ if (open_flag & O_CREAT) {
+ if (open_flag & O_EXCL)
+ open_flag &= ~O_TRUNC;
+- if (!IS_POSIXACL(dir->d_inode))
+- mode &= ~current_umask();
++ mode = vfs_prepare_mode(mnt_userns, dir->d_inode, mode, mode, mode);
+ if (likely(got_write))
+ create_error = may_o_create(mnt_userns, &nd->path,
+ dentry, mode);
+@@ -3473,6 +3583,7 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
+ child = d_alloc(dentry, &slash_name);
+ if (unlikely(!child))
+ goto out_err;
++ mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode);
+ error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
+ if (error)
+ goto out_err;
+@@ -3625,18 +3736,14 @@ static struct dentry *filename_create(int dfd, struct filename *name,
+ {
+ struct dentry *dentry = ERR_PTR(-EEXIST);
+ struct qstr last;
++ bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
++ unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
++ unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
+ int type;
+ int err2;
+ int error;
+- bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
+-
+- /*
+- * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
+- * other flags passed in are ignored!
+- */
+- lookup_flags &= LOOKUP_REVAL;
+
+- error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
++ error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
+ if (error)
+ return ERR_PTR(error);
+
+@@ -3650,11 +3757,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
+ /* don't fail immediately if it's r/o, at least try to report other errors */
+ err2 = mnt_want_write(path->mnt);
+ /*
+- * Do the final lookup.
++ * Do the final lookup. Suppress 'create' if there is a trailing
++ * '/', and a directory wasn't requested.
+ */
+- lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
++ if (last.name[last.len] && !want_dir)
++ create_flags = 0;
+ inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
+- dentry = __lookup_hash(&last, path->dentry, lookup_flags);
++ dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
+ if (IS_ERR(dentry))
+ goto unlock;
+
+@@ -3668,7 +3777,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
+ * all is fine. Let's be bastards - you had / on the end, you've
+ * been asking for (non-existent) directory. -ENOENT for you.
+ */
+- if (unlikely(!is_dir && last.name[last.len])) {
++ if (unlikely(!create_flags)) {
+ error = -ENOENT;
+ goto fail;
+ }
+@@ -3752,6 +3861,7 @@ int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+ if (!dir->i_op->mknod)
+ return -EPERM;
+
++ mode = vfs_prepare_mode(mnt_userns, dir, mode, mode, mode);
+ error = devcgroup_inode_mknod(mode, dev);
+ if (error)
+ return error;
+@@ -3802,9 +3912,8 @@ retry:
+ if (IS_ERR(dentry))
+ goto out1;
+
+- if (!IS_POSIXACL(path.dentry->d_inode))
+- mode &= ~current_umask();
+- error = security_path_mknod(&path, dentry, mode, dev);
++ error = security_path_mknod(&path, dentry,
++ mode_strip_umask(path.dentry->d_inode, mode), dev);
+ if (error)
+ goto out2;
+
+@@ -3874,7 +3983,7 @@ int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ if (!dir->i_op->mkdir)
+ return -EPERM;
+
+- mode &= (S_IRWXUGO|S_ISVTX);
++ mode = vfs_prepare_mode(mnt_userns, dir, mode, S_IRWXUGO | S_ISVTX, 0);
+ error = security_inode_mkdir(dir, dentry, mode);
+ if (error)
+ return error;
+@@ -3902,9 +4011,8 @@ retry:
+ if (IS_ERR(dentry))
+ goto out_putname;
+
+- if (!IS_POSIXACL(path.dentry->d_inode))
+- mode &= ~current_umask();
+- error = security_path_mkdir(&path, dentry, mode);
++ error = security_path_mkdir(&path, dentry,
++ mode_strip_umask(path.dentry->d_inode, mode));
+ if (!error) {
+ struct user_namespace *mnt_userns;
+ mnt_userns = mnt_user_ns(path.mnt);
+@@ -3975,13 +4083,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir,
+ dentry->d_inode->i_flags |= S_DEAD;
+ dont_mount(dentry);
+ detach_mounts(dentry);
+- fsnotify_rmdir(dir, dentry);
+
+ out:
+ inode_unlock(dentry->d_inode);
+ dput(dentry);
+ if (!error)
+- d_delete(dentry);
++ d_delete_notify(dir, dentry);
+ return error;
+ }
+ EXPORT_SYMBOL(vfs_rmdir);
+@@ -4103,7 +4210,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
+ if (!error) {
+ dont_mount(dentry);
+ detach_mounts(dentry);
+- fsnotify_unlink(dir, dentry);
+ }
+ }
+ }
+@@ -4111,9 +4217,11 @@ out:
+ inode_unlock(target);
+
+ /* We don't d_delete() NFS sillyrenamed files--they still exist. */
+- if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
++ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
++ fsnotify_unlink(dir, dentry);
++ } else if (!error) {
+ fsnotify_link_count(target);
+- d_delete(dentry);
++ d_delete_notify(dir, dentry);
+ }
+
+ return error;
+@@ -4510,7 +4618,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
+ * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
+ * story.
+ * c) we have to lock _four_ objects - parents and victim (if it exists),
+- * and source (if it is not a directory).
++ * and source.
+ * And that - after we got ->i_mutex on parents (until then we don't know
+ * whether the target exists). Solution: try to be smart with locking
+ * order for inodes. We rely on the fact that tree topology may change
+@@ -4594,10 +4702,16 @@ int vfs_rename(struct renamedata *rd)
+
+ take_dentry_name_snapshot(&old_name, old_dentry);
+ dget(new_dentry);
+- if (!is_dir || (flags & RENAME_EXCHANGE))
+- lock_two_nondirectories(source, target);
+- else if (target)
+- inode_lock(target);
++ /*
++ * Lock all moved children. Moved directories may need to change parent
++ * pointer so they need the lock to prevent against concurrent
++ * directory changes moving parent pointer. For regular files we've
++ * historically always done this. The lockdep locking subclasses are
++ * somewhat arbitrary but RENAME_EXCHANGE in particular can swap
++ * regular files and directories so it's difficult to tell which
++ * subclasses to use.
++ */
++ lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
+
+ error = -EPERM;
+ if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
+@@ -4645,9 +4759,8 @@ int vfs_rename(struct renamedata *rd)
+ d_exchange(old_dentry, new_dentry);
+ }
+ out:
+- if (!is_dir || (flags & RENAME_EXCHANGE))
+- unlock_two_nondirectories(source, target);
+- else if (target)
++ inode_unlock(source);
++ if (target)
+ inode_unlock(target);
+ dput(new_dentry);
+ if (!error) {
+@@ -4961,7 +5074,7 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
+ {
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+- void *fsdata;
++ void *fsdata = NULL;
+ int err;
+ unsigned int flags = 0;
+ if (nofs)
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 659a8f39c61af..1a9df6afb90b1 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -31,6 +31,7 @@
+ #include <uapi/linux/mount.h>
+ #include <linux/fs_context.h>
+ #include <linux/shmem_fs.h>
++#include <linux/mnt_idmapping.h>
+
+ #include "pnode.h"
+ #include "internal.h"
+@@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mnt)
+ struct user_namespace *mnt_userns;
+
+ mnt_userns = mnt_user_ns(&mnt->mnt);
+- if (mnt_userns != &init_user_ns)
++ if (!initial_idmapping(mnt_userns))
+ put_user_ns(mnt_userns);
+ kfree_const(mnt->mnt_devname);
+ #ifdef CONFIG_SMP
+@@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struct mount *p)
+ struct vfsmount *vfs_create_mount(struct fs_context *fc)
+ {
+ struct mount *mnt;
++ struct user_namespace *fs_userns;
+
+ if (!fc->root)
+ return ERR_PTR(-EINVAL);
+@@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
+ mnt->mnt_mountpoint = mnt->mnt.mnt_root;
+ mnt->mnt_parent = mnt;
+
++ fs_userns = mnt->mnt.mnt_sb->s_user_ns;
++ if (!initial_idmapping(fs_userns))
++ mnt->mnt.mnt_userns = get_user_ns(fs_userns);
++
+ lock_mount_hash();
+ list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
+ unlock_mount_hash();
+@@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
+
+ atomic_inc(&sb->s_active);
+ mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
+- if (mnt->mnt.mnt_userns != &init_user_ns)
++ if (!initial_idmapping(mnt->mnt.mnt_userns))
+ mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
+ mnt->mnt.mnt_sb = sb;
+ mnt->mnt.mnt_root = dget(root);
+@@ -3927,28 +3933,32 @@ static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
+ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
+ {
+ struct vfsmount *m = &mnt->mnt;
++ struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
+
+ if (!kattr->mnt_userns)
+ return 0;
+
++ /*
++ * Creating an idmapped mount with the filesystem wide idmapping
++ * doesn't make sense so block that. We don't allow mushy semantics.
++ */
++ if (kattr->mnt_userns == fs_userns)
++ return -EINVAL;
++
+ /*
+ * Once a mount has been idmapped we don't allow it to change its
+ * mapping. It makes things simpler and callers can just create
+ * another bind-mount they can idmap if they want to.
+ */
+- if (mnt_user_ns(m) != &init_user_ns)
++ if (is_idmapped_mnt(m))
+ return -EPERM;
+
+ /* The underlying filesystem doesn't support idmapped mounts yet. */
+ if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
+ return -EINVAL;
+
+- /* Don't yet support filesystem mountable in user namespaces. */
+- if (m->mnt_sb->s_user_ns != &init_user_ns)
+- return -EINVAL;
+-
+ /* We're not controlling the superblock. */
+- if (!capable(CAP_SYS_ADMIN))
++ if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* Mount has already been visible in the filesystem hierarchy. */
+@@ -3958,6 +3968,23 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
+ return 0;
+ }
+
++/**
++ * mnt_allow_writers() - check whether the attribute change allows writers
++ * @kattr: the new mount attributes
++ * @mnt: the mount to which @kattr will be applied
++ *
++ * Check whether thew new mount attributes in @kattr allow concurrent writers.
++ *
++ * Return: true if writers need to be held, false if not
++ */
++static inline bool mnt_allow_writers(const struct mount_kattr *kattr,
++ const struct mount *mnt)
++{
++ return (!(kattr->attr_set & MNT_READONLY) ||
++ (mnt->mnt.mnt_flags & MNT_READONLY)) &&
++ !kattr->mnt_userns;
++}
++
+ static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
+ struct mount *mnt, int *err)
+ {
+@@ -3988,8 +4015,7 @@ static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
+
+ last = m;
+
+- if ((kattr->attr_set & MNT_READONLY) &&
+- !(m->mnt.mnt_flags & MNT_READONLY)) {
++ if (!mnt_allow_writers(kattr, m)) {
+ *err = mnt_hold_writers(m);
+ if (*err)
+ goto out;
+@@ -4002,14 +4028,27 @@ out:
+
+ static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
+ {
+- struct user_namespace *mnt_userns;
++ struct user_namespace *mnt_userns, *old_mnt_userns;
+
+ if (!kattr->mnt_userns)
+ return;
+
++ /*
++ * We're the only ones able to change the mount's idmapping. So
++ * mnt->mnt.mnt_userns is stable and we can retrieve it directly.
++ */
++ old_mnt_userns = mnt->mnt.mnt_userns;
++
+ mnt_userns = get_user_ns(kattr->mnt_userns);
+ /* Pairs with smp_load_acquire() in mnt_user_ns(). */
+ smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
++
++ /*
++ * If this is an idmapped filesystem drop the reference we've taken
++ * in vfs_create_mount() before.
++ */
++ if (!initial_idmapping(old_mnt_userns))
++ put_user_ns(old_mnt_userns);
+ }
+
+ static void mount_setattr_commit(struct mount_kattr *kattr,
+@@ -4027,13 +4066,8 @@ static void mount_setattr_commit(struct mount_kattr *kattr,
+ WRITE_ONCE(m->mnt.mnt_flags, flags);
+ }
+
+- /*
+- * We either set MNT_READONLY above so make it visible
+- * before ~MNT_WRITE_HOLD or we failed to recursively
+- * apply mount options.
+- */
+- if ((kattr->attr_set & MNT_READONLY) &&
+- (m->mnt.mnt_flags & MNT_WRITE_HOLD))
++ /* If we had to hold writers unblock them. */
++ if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
+ mnt_unhold_writers(m);
+
+ if (!err && kattr->propagation)
+@@ -4087,9 +4121,9 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
+ unlock_mount_hash();
+
+ if (kattr->propagation) {
+- namespace_unlock();
+ if (err)
+ cleanup_group_ids(mnt, NULL);
++ namespace_unlock();
+ }
+
+ return err;
+@@ -4133,16 +4167,25 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
+ }
+
+ /*
+- * The init_user_ns is used to indicate that a vfsmount is not idmapped.
+- * This is simpler than just having to treat NULL as unmapped. Users
+- * wanting to idmap a mount to init_user_ns can just use a namespace
+- * with an identity mapping.
++ * The initial idmapping cannot be used to create an idmapped
++ * mount. We use the initial idmapping as an indicator of a mount
++ * that is not idmapped. It can simply be passed into helpers that
++ * are aware of idmapped mounts as a convenient shortcut. A user
++ * can just create a dedicated identity mapping to achieve the same
++ * result.
+ */
+ mnt_userns = container_of(ns, struct user_namespace, ns);
+- if (mnt_userns == &init_user_ns) {
++ if (initial_idmapping(mnt_userns)) {
+ err = -EPERM;
+ goto out_fput;
+ }
++
++ /* We're not controlling the target namespace. */
++ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
++ err = -EPERM;
++ goto out_fput;
++ }
++
+ kattr->mnt_userns = get_user_ns(mnt_userns);
+
+ out_fput:
+@@ -4263,12 +4306,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
+ return err;
+
+ err = user_path_at(dfd, path, kattr.lookup_flags, &target);
+- if (err)
+- return err;
+-
+- err = do_mount_setattr(&target, &kattr);
++ if (!err) {
++ err = do_mount_setattr(&target, &kattr);
++ path_put(&target);
++ }
+ finish_mount_kattr(&kattr);
+- path_put(&target);
+ return err;
+ }
+
+diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
+index 994ec22d40402..242f8bcb34a4c 100644
+--- a/fs/netfs/read_helper.c
++++ b/fs/netfs/read_helper.c
+@@ -354,16 +354,11 @@ static void netfs_rreq_write_to_cache_work(struct work_struct *work)
+ netfs_rreq_do_write_to_cache(rreq);
+ }
+
+-static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq,
+- bool was_async)
++static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq)
+ {
+- if (was_async) {
+- rreq->work.func = netfs_rreq_write_to_cache_work;
+- if (!queue_work(system_unbound_wq, &rreq->work))
+- BUG();
+- } else {
+- netfs_rreq_do_write_to_cache(rreq);
+- }
++ rreq->work.func = netfs_rreq_write_to_cache_work;
++ if (!queue_work(system_unbound_wq, &rreq->work))
++ BUG();
+ }
+
+ /*
+@@ -560,7 +555,7 @@ again:
+ wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
+
+ if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags))
+- return netfs_rreq_write_to_cache(rreq, was_async);
++ return netfs_rreq_write_to_cache(rreq);
+
+ netfs_rreq_completed(rreq, was_async);
+ }
+@@ -963,7 +958,7 @@ int netfs_readpage(struct file *file,
+ rreq = netfs_alloc_read_request(ops, netfs_priv, file);
+ if (!rreq) {
+ if (netfs_priv)
+- ops->cleanup(netfs_priv, page_file_mapping(page));
++ ops->cleanup(page_file_mapping(page), netfs_priv);
+ unlock_page(page);
+ return -ENOMEM;
+ }
+@@ -1190,7 +1185,7 @@ have_page:
+ goto error;
+ have_page_no_wait:
+ if (netfs_priv)
+- ops->cleanup(netfs_priv, mapping);
++ ops->cleanup(mapping, netfs_priv);
+ *_page = page;
+ _leave(" = 0");
+ return 0;
+@@ -1201,7 +1196,7 @@ error:
+ unlock_page(page);
+ put_page(page);
+ if (netfs_priv)
+- ops->cleanup(netfs_priv, mapping);
++ ops->cleanup(mapping, netfs_priv);
+ _leave(" = %d", ret);
+ return ret;
+ }
+diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
+index acb1d22907daf..16412d6636e86 100644
+--- a/fs/nfs/blocklayout/dev.c
++++ b/fs/nfs/blocklayout/dev.c
+@@ -422,7 +422,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
+ int ret, i;
+
+ d->children = kcalloc(v->concat.volumes_count,
+- sizeof(struct pnfs_block_dev), GFP_KERNEL);
++ sizeof(struct pnfs_block_dev), gfp_mask);
+ if (!d->children)
+ return -ENOMEM;
+
+@@ -451,7 +451,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
+ int ret, i;
+
+ d->children = kcalloc(v->stripe.volumes_count,
+- sizeof(struct pnfs_block_dev), GFP_KERNEL);
++ sizeof(struct pnfs_block_dev), gfp_mask);
+ if (!d->children)
+ return -ENOMEM;
+
+diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
+index 6a2033131c068..ccd4f245cae24 100644
+--- a/fs/nfs/callback.h
++++ b/fs/nfs/callback.h
+@@ -170,7 +170,7 @@ struct cb_devicenotifyitem {
+ };
+
+ struct cb_devicenotifyargs {
+- int ndevs;
++ uint32_t ndevs;
+ struct cb_devicenotifyitem *devs;
+ };
+
+diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
+index ed9d580826f5a..ccf3132384412 100644
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -288,6 +288,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
+ rv = NFS4_OK;
+ break;
+ case -ENOENT:
++ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
+ /* Embrace your forgetfulness! */
+ rv = NFS4ERR_NOMATCHING_LAYOUT;
+
+@@ -358,12 +359,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp,
+ struct cb_process_state *cps)
+ {
+ struct cb_devicenotifyargs *args = argp;
+- int i;
++ const struct pnfs_layoutdriver_type *ld = NULL;
++ uint32_t i;
+ __be32 res = 0;
+- struct nfs_client *clp = cps->clp;
+- struct nfs_server *server = NULL;
+
+- if (!clp) {
++ if (!cps->clp) {
+ res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ goto out;
+ }
+@@ -371,23 +371,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp,
+ for (i = 0; i < args->ndevs; i++) {
+ struct cb_devicenotifyitem *dev = &args->devs[i];
+
+- if (!server ||
+- server->pnfs_curr_ld->id != dev->cbd_layout_type) {
+- rcu_read_lock();
+- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+- if (server->pnfs_curr_ld &&
+- server->pnfs_curr_ld->id == dev->cbd_layout_type) {
+- rcu_read_unlock();
+- goto found;
+- }
+- rcu_read_unlock();
+- continue;
++ if (!ld || ld->id != dev->cbd_layout_type) {
++ pnfs_put_layoutdriver(ld);
++ ld = pnfs_find_layoutdriver(dev->cbd_layout_type);
++ if (!ld)
++ continue;
+ }
+-
+- found:
+- nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id);
++ nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id);
+ }
+-
++ pnfs_put_layoutdriver(ld);
+ out:
+ kfree(args->devs);
+ return res;
+diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
+index 4c48d85f65170..ea17085ef884b 100644
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
+ void *argp)
+ {
+ struct cb_devicenotifyargs *args = argp;
++ uint32_t tmp, n, i;
+ __be32 *p;
+ __be32 status = 0;
+- u32 tmp;
+- int n, i;
+- args->ndevs = 0;
+
+ /* Num of device notifications */
+ p = xdr_inline_decode(xdr, sizeof(uint32_t));
+@@ -271,12 +269,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
+ goto out;
+ }
+ n = ntohl(*p++);
+- if (n <= 0)
+- goto out;
+- if (n > ULONG_MAX / sizeof(*args->devs)) {
+- status = htonl(NFS4ERR_BADXDR);
++ if (n == 0)
+ goto out;
+- }
+
+ args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL);
+ if (!args->devs) {
+@@ -330,19 +324,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
+ dev->cbd_immediate = 0;
+ }
+
+- args->ndevs++;
+-
+ dprintk("%s: type %d layout 0x%x immediate %d\n",
+ __func__, dev->cbd_notify_type, dev->cbd_layout_type,
+ dev->cbd_immediate);
+ }
++ args->ndevs = n;
++ dprintk("%s: ndevs %d\n", __func__, args->ndevs);
++ return 0;
++err:
++ kfree(args->devs);
+ out:
++ args->devs = NULL;
++ args->ndevs = 0;
+ dprintk("%s: status %d ndevs %d\n",
+ __func__, ntohl(status), args->ndevs);
+ return status;
+-err:
+- kfree(args->devs);
+- goto out;
+ }
+
+ static __be32 decode_sessionid(struct xdr_stream *xdr,
+diff --git a/fs/nfs/client.c b/fs/nfs/client.c
+index 23e165d5ec9ca..090b16890e3d6 100644
+--- a/fs/nfs/client.c
++++ b/fs/nfs/client.c
+@@ -177,6 +177,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
+ INIT_LIST_HEAD(&clp->cl_superblocks);
+ clp->cl_rpcclient = ERR_PTR(-EINVAL);
+
++ clp->cl_flags = cl_init->init_flags;
+ clp->cl_proto = cl_init->proto;
+ clp->cl_nconnect = cl_init->nconnect;
+ clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
+@@ -427,7 +428,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
+ list_add_tail(&new->cl_share_link,
+ &nn->nfs_client_list);
+ spin_unlock(&nn->nfs_client_lock);
+- new->cl_flags = cl_init->init_flags;
+ return rpc_ops->init_client(new, cl_init);
+ }
+
+@@ -860,6 +860,13 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
+ server->namelen = pathinfo.max_namelen;
+ }
+
++ if (clp->rpc_ops->discover_trunking != NULL &&
++ (server->caps & NFS_CAP_FS_LOCATIONS)) {
++ error = clp->rpc_ops->discover_trunking(server, mntfh);
++ if (error < 0)
++ return error;
++ }
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(nfs_probe_fsinfo);
+diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
+index 11118398f495c..6a3ba306c3216 100644
+--- a/fs/nfs/delegation.c
++++ b/fs/nfs/delegation.c
+@@ -228,8 +228,7 @@ again:
+ *
+ */
+ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
+- fmode_t type,
+- const nfs4_stateid *stateid,
++ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit)
+ {
+ struct nfs_delegation *delegation;
+@@ -239,25 +238,24 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL) {
+ spin_lock(&delegation->lock);
+- if (nfs4_is_valid_delegation(delegation, 0)) {
+- nfs4_stateid_copy(&delegation->stateid, stateid);
+- delegation->type = type;
+- delegation->pagemod_limit = pagemod_limit;
+- oldcred = delegation->cred;
+- delegation->cred = get_cred(cred);
+- clear_bit(NFS_DELEGATION_NEED_RECLAIM,
+- &delegation->flags);
+- spin_unlock(&delegation->lock);
+- rcu_read_unlock();
+- put_cred(oldcred);
+- trace_nfs4_reclaim_delegation(inode, type);
+- return;
+- }
+- /* We appear to have raced with a delegation return. */
++ nfs4_stateid_copy(&delegation->stateid, stateid);
++ delegation->type = type;
++ delegation->pagemod_limit = pagemod_limit;
++ oldcred = delegation->cred;
++ delegation->cred = get_cred(cred);
++ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
++ if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
++ &delegation->flags))
++ atomic_long_inc(&nfs_active_delegations);
+ spin_unlock(&delegation->lock);
++ rcu_read_unlock();
++ put_cred(oldcred);
++ trace_nfs4_reclaim_delegation(inode, type);
++ } else {
++ rcu_read_unlock();
++ nfs_inode_set_delegation(inode, cred, type, stateid,
++ pagemod_limit);
+ }
+- rcu_read_unlock();
+- nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit);
+ }
+
+ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
+@@ -755,11 +753,13 @@ int nfs4_inode_return_delegation(struct inode *inode)
+ struct nfs_delegation *delegation;
+
+ delegation = nfs_start_delegation_return(nfsi);
+- /* Synchronous recall of any application leases */
+- break_lease(inode, O_WRONLY | O_RDWR);
+- nfs_wb_all(inode);
+- if (delegation != NULL)
++ if (delegation != NULL) {
++ /* Synchronous recall of any application leases */
++ break_lease(inode, O_WRONLY | O_RDWR);
++ if (S_ISREG(inode->i_mode))
++ nfs_wb_all(inode);
+ return nfs_end_delegation_return(inode, delegation, 1);
++ }
+ return 0;
+ }
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 1a6d2867fba4f..32c3d0c454b19 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
+ ctx->attr_gencount = nfsi->attr_gencount;
+ ctx->dir_cookie = 0;
+ ctx->dup_cookie = 0;
++ ctx->page_index = 0;
+ spin_lock(&dir->i_lock);
+ if (list_empty(&nfsi->open_files) &&
+ (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
+@@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
+ NFS_INO_INVALID_DATA |
+ NFS_INO_REVAL_FORCED);
+ list_add(&ctx->list, &nfsi->open_files);
++ clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+ spin_unlock(&dir->i_lock);
+ return ctx;
+ }
+@@ -626,8 +628,7 @@ void nfs_force_use_readdirplus(struct inode *dir)
+ if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
+ !list_empty(&nfsi->open_files)) {
+ set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
+- invalidate_mapping_pages(dir->i_mapping,
+- nfsi->page_index + 1, -1);
++ set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+ }
+ }
+
+@@ -870,7 +871,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
+
+ status = nfs_readdir_page_filler(desc, entry, pages, pglen,
+ arrays, narrays);
+- } while (!status && nfs_readdir_page_needs_filling(page));
++ } while (!status && nfs_readdir_page_needs_filling(page) &&
++ page_mapping(page));
+
+ nfs_readdir_free_pages(pages, array_size);
+ out_release_label:
+@@ -937,10 +939,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
+ sizeof(nfsi->cookieverf));
+ }
+ res = nfs_readdir_search_array(desc);
+- if (res == 0) {
+- nfsi->page_index = desc->page_index;
++ if (res == 0)
+ return 0;
+- }
+ nfs_readdir_page_unlock_and_put_cached(desc);
+ return res;
+ }
+@@ -1048,6 +1048,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
+ goto out;
+
+ desc->page_index = 0;
++ desc->cache_entry_index = 0;
+ desc->last_cookie = desc->dir_cookie;
+ desc->duped = 0;
+
+@@ -1079,6 +1080,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_dir_context *dir_ctx = file->private_data;
+ struct nfs_readdir_descriptor *desc;
++ pgoff_t page_index;
+ int res;
+
+ dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
+@@ -1109,10 +1111,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
+ desc->dir_cookie = dir_ctx->dir_cookie;
+ desc->dup_cookie = dir_ctx->dup_cookie;
+ desc->duped = dir_ctx->duped;
++ page_index = dir_ctx->page_index;
+ desc->attr_gencount = dir_ctx->attr_gencount;
+ memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
+ spin_unlock(&file->f_lock);
+
++ if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
++ list_is_singular(&nfsi->open_files))
++ invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
++
+ do {
+ res = readdir_search_pagecache(desc);
+
+@@ -1149,6 +1156,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
+ dir_ctx->dup_cookie = desc->dup_cookie;
+ dir_ctx->duped = desc->duped;
+ dir_ctx->attr_gencount = desc->attr_gencount;
++ dir_ctx->page_index = desc->page_index;
+ memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
+ spin_unlock(&file->f_lock);
+
+@@ -1269,13 +1277,12 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry)
+ static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
+ {
+ struct inode *inode = d_inode(dentry);
++ struct inode *dir = d_inode(dentry->d_parent);
+
+- if (!nfs_verifier_is_delegated(dentry) &&
+- !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf))
+- goto out;
++ if (!nfs_verify_change_attribute(dir, verf))
++ return;
+ if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ nfs_set_verifier_delegated(&verf);
+-out:
+ dentry->d_time = verf;
+ }
+
+@@ -1413,7 +1420,7 @@ out_force:
+ static void nfs_mark_dir_for_revalidate(struct inode *inode)
+ {
+ spin_lock(&inode->i_lock);
+- nfs_set_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE);
++ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE);
+ spin_unlock(&inode->i_lock);
+ }
+
+@@ -1834,16 +1841,6 @@ const struct dentry_operations nfs4_dentry_operations = {
+ };
+ EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
+
+-static fmode_t flags_to_mode(int flags)
+-{
+- fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
+- if ((flags & O_ACCMODE) != O_WRONLY)
+- res |= FMODE_READ;
+- if ((flags & O_ACCMODE) != O_RDONLY)
+- res |= FMODE_WRITE;
+- return res;
+-}
+-
+ static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
+ {
+ return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
+@@ -1983,6 +1980,24 @@ out:
+
+ no_open:
+ res = nfs_lookup(dir, dentry, lookup_flags);
++ if (!res) {
++ inode = d_inode(dentry);
++ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
++ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
++ res = ERR_PTR(-ENOTDIR);
++ else if (inode && S_ISREG(inode->i_mode))
++ res = ERR_PTR(-EOPENSTALE);
++ } else if (!IS_ERR(res)) {
++ inode = d_inode(res);
++ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
++ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
++ dput(res);
++ res = ERR_PTR(-ENOTDIR);
++ } else if (inode && S_ISREG(inode->i_mode)) {
++ dput(res);
++ res = ERR_PTR(-EOPENSTALE);
++ }
++ }
+ if (switched) {
+ d_lookup_done(dentry);
+ if (!res)
+@@ -2383,6 +2398,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+
+ trace_nfs_link_enter(inode, dir, dentry);
+ d_drop(dentry);
++ if (S_ISREG(inode->i_mode))
++ nfs_sync_inode(inode);
+ error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
+ if (error == 0) {
+ ihold(inode);
+@@ -2471,6 +2488,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ }
+ }
+
++ if (S_ISREG(old_inode->i_mode))
++ nfs_sync_inode(old_inode);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+@@ -2676,7 +2695,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
+ return NULL;
+ }
+
+-static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
++static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_access_entry *cache;
+@@ -2706,8 +2725,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *
+ spin_lock(&inode->i_lock);
+ retry = false;
+ }
+- res->cred = cache->cred;
+- res->mask = cache->mask;
++ *mask = cache->mask;
+ list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
+ err = 0;
+ out:
+@@ -2719,7 +2737,7 @@ out_zap:
+ return -ENOENT;
+ }
+
+-static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
++static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
+ {
+ /* Only check the most recently returned cache entry,
+ * but do it without locking.
+@@ -2741,22 +2759,21 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
+ goto out;
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
+ goto out;
+- res->cred = cache->cred;
+- res->mask = cache->mask;
++ *mask = cache->mask;
+ err = 0;
+ out:
+ rcu_read_unlock();
+ return err;
+ }
+
+-int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct
+-nfs_access_entry *res, bool may_block)
++int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
++ u32 *mask, bool may_block)
+ {
+ int status;
+
+- status = nfs_access_get_cached_rcu(inode, cred, res);
++ status = nfs_access_get_cached_rcu(inode, cred, mask);
+ if (status != 0)
+- status = nfs_access_get_cached_locked(inode, cred, res,
++ status = nfs_access_get_cached_locked(inode, cred, mask,
+ may_block);
+
+ return status;
+@@ -2877,7 +2894,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
+
+ trace_nfs_access_enter(inode);
+
+- status = nfs_access_get_cached(inode, cred, &cache, may_block);
++ status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
+ if (status == 0)
+ goto out_cached;
+
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index 2e894fec036b0..018af6ec97b40 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -172,8 +172,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+ VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
+
+ if (iov_iter_rw(iter) == READ)
+- return nfs_file_direct_read(iocb, iter);
+- return nfs_file_direct_write(iocb, iter);
++ return nfs_file_direct_read(iocb, iter, true);
++ return nfs_file_direct_write(iocb, iter, true);
+ }
+
+ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
+@@ -424,6 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+ * nfs_file_direct_read - file direct read operation for NFS files
+ * @iocb: target I/O control block
+ * @iter: vector of user buffers into which to read data
++ * @swap: flag indicating this is swap IO, not O_DIRECT IO
+ *
+ * We use this function for direct reads instead of calling
+ * generic_file_aio_read() in order to avoid gfar's check to see if
+@@ -439,7 +440,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+ * client must read the updated atime from the server back into its
+ * cache.
+ */
+-ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
++ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
++ bool swap)
+ {
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+@@ -481,12 +483,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
+ if (iter_is_iovec(iter))
+ dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
+
+- nfs_start_io_direct(inode);
++ if (!swap)
++ nfs_start_io_direct(inode);
+
+ NFS_I(inode)->read_io += count;
+ requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
+
+- nfs_end_io_direct(inode);
++ if (!swap)
++ nfs_end_io_direct(inode);
+
+ if (requested > 0) {
+ result = nfs_direct_wait(dreq);
+@@ -505,20 +509,44 @@ out:
+ return result;
+ }
+
+-static void
+-nfs_direct_join_group(struct list_head *list, struct inode *inode)
++static void nfs_direct_add_page_head(struct list_head *list,
++ struct nfs_page *req)
++{
++ struct nfs_page *head = req->wb_head;
++
++ if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
++ return;
++ if (!list_empty(&head->wb_list)) {
++ nfs_unlock_request(head);
++ return;
++ }
++ list_add(&head->wb_list, list);
++ kref_get(&head->wb_kref);
++ kref_get(&head->wb_kref);
++}
++
++static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
+ {
+- struct nfs_page *req, *next;
++ struct nfs_page *req, *subreq;
+
+ list_for_each_entry(req, list, wb_list) {
+- if (req->wb_head != req || req->wb_this_page == req)
++ if (req->wb_head != req) {
++ nfs_direct_add_page_head(&req->wb_list, req);
+ continue;
+- for (next = req->wb_this_page;
+- next != req->wb_head;
+- next = next->wb_this_page) {
+- nfs_list_remove_request(next);
+- nfs_release_request(next);
+ }
++ subreq = req->wb_this_page;
++ if (subreq == req)
++ continue;
++ do {
++ /*
++ * Remove subrequests from this list before freeing
++ * them in the call to nfs_join_page_group().
++ */
++ if (!list_empty(&subreq->wb_list)) {
++ nfs_list_remove_request(subreq);
++ nfs_release_request(subreq);
++ }
++ } while ((subreq = subreq->wb_this_page) != req);
+ nfs_join_page_group(req, inode);
+ }
+ }
+@@ -620,7 +648,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
+ nfs_unlock_and_release_request(req);
+ }
+
+- if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
++ if (nfs_commit_end(cinfo.mds))
+ nfs_direct_write_complete(dreq);
+ }
+
+@@ -789,7 +817,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
+ */
+ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
+ struct iov_iter *iter,
+- loff_t pos)
++ loff_t pos, int ioflags)
+ {
+ struct nfs_pageio_descriptor desc;
+ struct inode *inode = dreq->inode;
+@@ -797,7 +825,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
+ size_t requested_bytes = 0;
+ size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
+
+- nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
++ nfs_pageio_init_write(&desc, inode, ioflags, false,
+ &nfs_direct_write_completion_ops);
+ desc.pg_dreq = dreq;
+ get_dreq(dreq);
+@@ -875,6 +903,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
+ * nfs_file_direct_write - file direct write operation for NFS files
+ * @iocb: target I/O control block
+ * @iter: vector of user buffers from which to write data
++ * @swap: flag indicating this is swap IO, not O_DIRECT IO
+ *
+ * We use this function for direct writes instead of calling
+ * generic_file_aio_write() in order to avoid taking the inode
+@@ -891,7 +920,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
+ * Note that O_APPEND is not supported for NFS direct writes, as there
+ * is no atomic O_APPEND write facility in the NFS protocol.
+ */
+-ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
++ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
++ bool swap)
+ {
+ ssize_t result, requested;
+ size_t count;
+@@ -905,7 +935,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
+ dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
+ file, iov_iter_count(iter), (long long) iocb->ki_pos);
+
+- result = generic_write_checks(iocb, iter);
++ if (swap)
++ /* bypass generic checks */
++ result = iov_iter_count(iter);
++ else
++ result = generic_write_checks(iocb, iter);
+ if (result <= 0)
+ return result;
+ count = result;
+@@ -936,16 +970,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
+ dreq->iocb = iocb;
+ pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
+
+- nfs_start_io_direct(inode);
++ if (swap) {
++ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
++ FLUSH_STABLE);
++ } else {
++ nfs_start_io_direct(inode);
+
+- requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);
++ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
++ FLUSH_COND_STABLE);
+
+- if (mapping->nrpages) {
+- invalidate_inode_pages2_range(mapping,
+- pos >> PAGE_SHIFT, end);
+- }
++ if (mapping->nrpages) {
++ invalidate_inode_pages2_range(mapping,
++ pos >> PAGE_SHIFT, end);
++ }
+
+- nfs_end_io_direct(inode);
++ nfs_end_io_direct(inode);
++ }
+
+ if (requested > 0) {
+ result = nfs_direct_wait(dreq);
+diff --git a/fs/nfs/file.c b/fs/nfs/file.c
+index aa353fd582404..dd53d0f97c57d 100644
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -161,7 +161,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
+ ssize_t result;
+
+ if (iocb->ki_flags & IOCB_DIRECT)
+- return nfs_file_direct_read(iocb, to);
++ return nfs_file_direct_read(iocb, to, false);
+
+ dprintk("NFS: read(%pD2, %zu@%lu)\n",
+ iocb->ki_filp,
+@@ -208,22 +208,25 @@ static int
+ nfs_file_fsync_commit(struct file *file, int datasync)
+ {
+ struct inode *inode = file_inode(file);
+- int ret;
++ int ret, ret2;
+
+ dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
+
+ nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+- if (ret < 0)
+- return ret;
+- return file_check_and_advance_wb_err(file);
++ ret2 = file_check_and_advance_wb_err(file);
++ if (ret2 < 0)
++ return ret2;
++ return ret;
+ }
+
+ int
+ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ {
+- struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct inode *inode = file_inode(file);
++ struct nfs_inode *nfsi = NFS_I(inode);
++ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
++ long nredirtied;
+ int ret;
+
+ trace_nfs_fsync_enter(inode);
+@@ -238,15 +241,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ ret = pnfs_sync_inode(inode, !!datasync);
+ if (ret != 0)
+ break;
+- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
++ nredirtied = atomic_long_read(&nfsi->redirtied_pages);
++ if (nredirtied == save_nredirtied)
+ break;
+- /*
+- * If nfs_file_fsync_commit detected a server reboot, then
+- * resend all dirty pages that might have been covered by
+- * the NFS_CONTEXT_RESEND_WRITES flag
+- */
+- start = 0;
+- end = LLONG_MAX;
++ save_nredirtied = nredirtied;
+ }
+
+ trace_nfs_fsync_exit(inode, ret);
+@@ -389,11 +387,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
+ return status;
+ NFS_I(mapping->host)->write_io += copied;
+
+- if (nfs_ctx_key_to_expire(ctx, mapping->host)) {
+- status = nfs_wb_all(mapping->host);
+- if (status < 0)
+- return status;
+- }
++ if (nfs_ctx_key_to_expire(ctx, mapping->host))
++ nfs_wb_all(mapping->host);
+
+ return copied;
+ }
+@@ -489,8 +484,9 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ {
+ unsigned long blocks;
+ long long isize;
+- struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
+- struct inode *inode = file->f_mapping->host;
++ struct inode *inode = file_inode(file);
++ struct rpc_clnt *clnt = NFS_CLIENT(inode);
++ struct nfs_client *cl = NFS_SERVER(inode)->nfs_client;
+
+ spin_lock(&inode->i_lock);
+ blocks = inode->i_blocks;
+@@ -503,14 +499,22 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+
+ *span = sis->pages;
+
++
++ if (cl->rpc_ops->enable_swap)
++ cl->rpc_ops->enable_swap(inode);
++
+ return rpc_clnt_swap_activate(clnt);
+ }
+
+ static void nfs_swap_deactivate(struct file *file)
+ {
+- struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
++ struct inode *inode = file_inode(file);
++ struct rpc_clnt *clnt = NFS_CLIENT(inode);
++ struct nfs_client *cl = NFS_SERVER(inode)->nfs_client;
+
+ rpc_clnt_swap_deactivate(clnt);
++ if (cl->rpc_ops->disable_swap)
++ cl->rpc_ops->disable_swap(file_inode(file));
+ }
+
+ const struct address_space_operations nfs_file_aops = {
+@@ -590,18 +594,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
+ .page_mkwrite = nfs_vm_page_mkwrite,
+ };
+
+-static int nfs_need_check_write(struct file *filp, struct inode *inode,
+- int error)
+-{
+- struct nfs_open_context *ctx;
+-
+- ctx = nfs_file_open_context(filp);
+- if (nfs_error_is_fatal_on_server(error) ||
+- nfs_ctx_key_to_expire(ctx, inode))
+- return 1;
+- return 0;
+-}
+-
+ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
+ {
+ struct file *file = iocb->ki_filp;
+@@ -616,7 +608,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
+ return result;
+
+ if (iocb->ki_flags & IOCB_DIRECT)
+- return nfs_file_direct_write(iocb, from);
++ return nfs_file_direct_write(iocb, from, false);
+
+ dprintk("NFS: write(%pD2, %zu@%Ld)\n",
+ file, iov_iter_count(from), (long long) iocb->ki_pos);
+@@ -629,7 +621,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
+ if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
+ result = nfs_revalidate_file_size(inode, file);
+ if (result)
+- goto out;
++ return result;
+ }
+
+ nfs_clear_invalid_mapping(file->f_mapping);
+@@ -648,6 +640,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
+
+ written = result;
+ iocb->ki_pos += written;
++ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
+
+ if (mntflags & NFS_MOUNT_WRITE_EAGER) {
+ result = filemap_fdatawrite_range(file->f_mapping,
+@@ -665,17 +658,22 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
+ }
+ result = generic_write_sync(iocb, written);
+ if (result < 0)
+- goto out;
++ return result;
+
++out:
+ /* Return error values */
+ error = filemap_check_wb_err(file->f_mapping, since);
+- if (nfs_need_check_write(file, inode, error)) {
+- int err = nfs_wb_all(inode);
+- if (err < 0)
+- result = err;
++ switch (error) {
++ default:
++ break;
++ case -EDQUOT:
++ case -EFBIG:
++ case -ENOSPC:
++ nfs_wb_all(inode);
++ error = file_check_and_advance_wb_err(file);
++ if (error < 0)
++ result = error;
+ }
+- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
+-out:
+ return result;
+
+ out_swapfile:
+diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
+index d2103852475fa..45eec08ec904f 100644
+--- a/fs/nfs/filelayout/filelayout.c
++++ b/fs/nfs/filelayout/filelayout.c
+@@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
+ return &fl->generic_hdr;
+ }
+
++static bool
++filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg)
++{
++ return flseg->num_fh > 1;
++}
++
+ /*
+ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
+ *
+@@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ size = pnfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
++ else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg)))
++ return size;
+
+ /* see if req and prev are in the same stripe */
+ if (prev) {
+diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
+index d383de00d4868..ceef75b4d2494 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -1140,6 +1140,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+ case -EIO:
+ case -ETIMEDOUT:
+ case -EPIPE:
++ case -EPROTO:
++ case -ENODEV:
+ dprintk("%s DS connection error %d\n", __func__,
+ task->tk_status);
+ nfs4_delete_deviceid(devid->ld, devid->nfs_client,
+@@ -1245,6 +1247,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
+ case -ENOBUFS:
+ case -EPIPE:
+ case -EPERM:
++ case -EPROTO:
++ case -ENODEV:
+ *op_status = status = NFS4ERR_NXIO;
+ break;
+ case -EACCES:
+diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+index c9b61b818ec11..bfa7202ca7be1 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+@@ -378,10 +378,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
+ goto noconnect;
+
+ ds = mirror->mirror_ds->ds;
++ if (READ_ONCE(ds->ds_clp))
++ goto out;
+ /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
+ smp_rmb();
+- if (ds->ds_clp)
+- goto out;
+
+ /* FIXME: For now we assume the server sent only one version of NFS
+ * to use for the DS.
+diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
+index 0d444a90f513a..fb3cad38b1497 100644
+--- a/fs/nfs/fs_context.c
++++ b/fs/nfs/fs_context.c
+@@ -514,7 +514,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
+ else
+- ctx->flags &= NFS_MOUNT_SOFTREVAL;
++ ctx->flags |= NFS_MOUNT_SOFTREVAL;
+ break;
+ case Opt_posix:
+ if (result.negated)
+diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
+index 59355c106eceb..7604cb6a0ac23 100644
+--- a/fs/nfs/getroot.c
++++ b/fs/nfs/getroot.c
+@@ -80,18 +80,15 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
+ goto out;
+
+ /* get the actual root for this mount */
+- fsinfo.fattr = nfs_alloc_fattr();
++ fsinfo.fattr = nfs_alloc_fattr_with_label(server);
+ if (fsinfo.fattr == NULL)
+ goto out_name;
+
+- fsinfo.fattr->label = nfs4_label_alloc(server, GFP_KERNEL);
+- if (IS_ERR(fsinfo.fattr->label))
+- goto out_fattr;
+ error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo);
+ if (error < 0) {
+ dprintk("nfs_get_root: getattr error = %d\n", -error);
+ nfs_errorf(fc, "NFS: Couldn't getattr on root");
+- goto out_label;
++ goto out_fattr;
+ }
+
+ inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL);
+@@ -99,12 +96,12 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
+ dprintk("nfs_get_root: get root inode failed\n");
+ error = PTR_ERR(inode);
+ nfs_errorf(fc, "NFS: Couldn't get root inode");
+- goto out_label;
++ goto out_fattr;
+ }
+
+ error = nfs_superblock_set_dummy_root(s, inode);
+ if (error != 0)
+- goto out_label;
++ goto out_fattr;
+
+ /* root dentries normally start off anonymous and get spliced in later
+ * if the dentry tree reaches them; however if the dentry already
+@@ -115,7 +112,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
+ dprintk("nfs_get_root: get root dentry failed\n");
+ error = PTR_ERR(root);
+ nfs_errorf(fc, "NFS: Couldn't get root dentry");
+- goto out_label;
++ goto out_fattr;
+ }
+
+ security_d_instantiate(root, inode);
+@@ -154,8 +151,6 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
+ nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label);
+ error = 0;
+
+-out_label:
+- nfs4_label_free(fsinfo.fattr->label);
+ out_fattr:
+ nfs_free_fattr(fsinfo.fattr);
+ out_name:
+@@ -165,5 +160,5 @@ out:
+ error_splat_root:
+ dput(fc->root);
+ fc->root = NULL;
+- goto out_label;
++ goto out_fattr;
+ }
+diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
+index 853213b3a2095..d8f01d222c499 100644
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -210,10 +210,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
+ flags &= ~NFS_INO_INVALID_XATTR;
+ if (flags & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
+- if (inode->i_mapping->nrpages == 0)
+- flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
+ flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED);
++
+ nfsi->cache_validity |= flags;
++
++ if (inode->i_mapping->nrpages == 0)
++ nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA |
++ NFS_INO_DATA_INVAL_DEFER);
++ else if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
++ nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER;
+ }
+ EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
+
+@@ -426,6 +431,23 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
+ return inode;
+ }
+
++static void nfs_inode_init_regular(struct nfs_inode *nfsi)
++{
++ atomic_long_set(&nfsi->nrequests, 0);
++ atomic_long_set(&nfsi->redirtied_pages, 0);
++ INIT_LIST_HEAD(&nfsi->commit_info.list);
++ atomic_long_set(&nfsi->commit_info.ncommit, 0);
++ atomic_set(&nfsi->commit_info.rpcs_out, 0);
++ mutex_init(&nfsi->commit_mutex);
++}
++
++static void nfs_inode_init_dir(struct nfs_inode *nfsi)
++{
++ nfsi->cache_change_attribute = 0;
++ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
++ init_rwsem(&nfsi->rmdir_sem);
++}
++
+ /*
+ * This is our front-end to iget that looks up inodes by file handle
+ * instead of inode number.
+@@ -480,10 +502,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ inode->i_data.a_ops = &nfs_file_aops;
++ nfs_inode_init_regular(nfsi);
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
+ inode->i_fop = &nfs_dir_operations;
+ inode->i_data.a_ops = &nfs_dir_aops;
++ nfs_inode_init_dir(nfsi);
+ /* Deal with crossing mountpoints */
+ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
+ fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+@@ -509,7 +533,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
+ inode->i_uid = make_kuid(&init_user_ns, -2);
+ inode->i_gid = make_kgid(&init_user_ns, -2);
+ inode->i_blocks = 0;
+- memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->write_io = 0;
+ nfsi->read_io = 0;
+
+@@ -708,9 +731,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
+ if ((attr->ia_valid & ATTR_KILL_SUID) != 0 &&
+ inode->i_mode & S_ISUID)
+ inode->i_mode &= ~S_ISUID;
+- if ((attr->ia_valid & ATTR_KILL_SGID) != 0 &&
+- (inode->i_mode & (S_ISGID | S_IXGRP)) ==
+- (S_ISGID | S_IXGRP))
++ if (setattr_should_drop_sgid(&init_user_ns, inode))
+ inode->i_mode &= ~S_ISGID;
+ if ((attr->ia_valid & ATTR_MODE) != 0) {
+ int mode = attr->ia_mode & S_IALLUGO;
+@@ -835,12 +856,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ }
+
+ /* Flush out writes to the server in order to update c/mtime. */
+- if ((request_mask & (STATX_CTIME|STATX_MTIME)) &&
+- S_ISREG(inode->i_mode)) {
+- err = filemap_write_and_wait(inode->i_mapping);
+- if (err)
+- goto out;
+- }
++ if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
++ S_ISREG(inode->i_mode))
++ filemap_write_and_wait(inode->i_mapping);
+
+ /*
+ * We may force a getattr if the user cares about atime.
+@@ -1165,7 +1183,6 @@ int nfs_open(struct inode *inode, struct file *filp)
+ nfs_fscache_open_file(inode, filp);
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(nfs_open);
+
+ /*
+ * This function is called whenever some part of NFS notices that
+@@ -1579,18 +1596,37 @@ struct nfs_fattr *nfs_alloc_fattr(void)
+ {
+ struct nfs_fattr *fattr;
+
+- fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
+- if (fattr != NULL)
++ fattr = kmalloc(sizeof(*fattr), GFP_KERNEL);
++ if (fattr != NULL) {
+ nfs_fattr_init(fattr);
++ fattr->label = NULL;
++ }
+ return fattr;
+ }
+ EXPORT_SYMBOL_GPL(nfs_alloc_fattr);
+
++struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server)
++{
++ struct nfs_fattr *fattr = nfs_alloc_fattr();
++
++ if (!fattr)
++ return NULL;
++
++ fattr->label = nfs4_label_alloc(server, GFP_KERNEL);
++ if (IS_ERR(fattr->label)) {
++ kfree(fattr);
++ return NULL;
++ }
++
++ return fattr;
++}
++EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label);
++
+ struct nfs_fh *nfs_alloc_fhandle(void)
+ {
+ struct nfs_fh *fh;
+
+- fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
++ fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
+ if (fh != NULL)
+ fh->size = 0;
+ return fh;
+@@ -1777,8 +1813,10 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK;
+ unsigned long cache_validity = NFS_I(inode)->cache_validity;
++ enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type;
+
+- if (!(cache_validity & NFS_INO_INVALID_CHANGE) &&
++ if (ctype != NFS4_CHANGE_TYPE_IS_UNDEFINED &&
++ !(cache_validity & NFS_INO_INVALID_CHANGE) &&
+ (cache_validity & check_valid) != 0 &&
+ (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
+ nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0)
+@@ -2260,14 +2298,7 @@ static void init_once(void *foo)
+ INIT_LIST_HEAD(&nfsi->open_files);
+ INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+ INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
+- INIT_LIST_HEAD(&nfsi->commit_info.list);
+- atomic_long_set(&nfsi->nrequests, 0);
+- atomic_long_set(&nfsi->commit_info.ncommit, 0);
+- atomic_set(&nfsi->commit_info.rpcs_out, 0);
+- init_rwsem(&nfsi->rmdir_sem);
+- mutex_init(&nfsi->commit_mutex);
+ nfs4_init_once(nfsi);
+- nfsi->cache_change_attribute = 0;
+ }
+
+ static int __init nfs_init_inodecache(void)
+diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
+index 66fc936834f23..2ceb4b98ec15f 100644
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
+ return true;
+ }
+
++static inline fmode_t flags_to_mode(int flags)
++{
++ fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
++ if ((flags & O_ACCMODE) != O_WRONLY)
++ res |= FMODE_READ;
++ if ((flags & O_ACCMODE) != O_RDONLY)
++ res |= FMODE_WRITE;
++ return res;
++}
++
+ /*
+ * Note: RFC 1813 doesn't limit the number of auth flavors that
+ * a server can return, so make something up.
+@@ -341,14 +351,6 @@ nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src)
+
+ return dst;
+ }
+-static inline void nfs4_label_free(struct nfs4_label *label)
+-{
+- if (label) {
+- kfree(label->label);
+- kfree(label);
+- }
+- return;
+-}
+
+ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+ {
+@@ -357,7 +359,6 @@ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+ }
+ #else
+ static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
+-static inline void nfs4_label_free(void *label) {}
+ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+ {
+ }
+@@ -580,6 +581,13 @@ nfs_write_match_verf(const struct nfs_writeverf *verf,
+ !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier);
+ }
+
++static inline gfp_t nfs_io_gfp_mask(void)
++{
++ if (current->flags & PF_WQ_WORKER)
++ return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
++ return GFP_KERNEL;
++}
++
+ /* unlink.c */
+ extern struct rpc_task *
+ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+@@ -817,6 +825,7 @@ static inline bool nfs_error_is_fatal_on_server(int err)
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
++ case -ENOMEM:
+ return false;
+ }
+ return nfs_error_is_fatal(err);
+diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
+index bc0c698f33508..565421c6682ed 100644
+--- a/fs/nfs/namespace.c
++++ b/fs/nfs/namespace.c
+@@ -147,7 +147,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
+ struct nfs_fs_context *ctx;
+ struct fs_context *fc;
+ struct vfsmount *mnt = ERR_PTR(-ENOMEM);
+- struct nfs_server *server = NFS_SERVER(d_inode(path->dentry));
++ struct nfs_server *server = NFS_SB(path->dentry->d_sb);
+ struct nfs_client *client = server->nfs_client;
+ int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout);
+ int ret;
+diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
+index 7fba7711e6b3a..266a4badf1dfc 100644
+--- a/fs/nfs/nfs2xdr.c
++++ b/fs/nfs/nfs2xdr.c
+@@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+
+ error = decode_filename_inline(xdr, &entry->name, &entry->len);
+ if (unlikely(error))
+- return error;
++ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
+
+ /*
+ * The type (size and byte order) of nfscookie isn't defined in
+diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
+index 5601e47360c28..b49359afac883 100644
+--- a/fs/nfs/nfs3client.c
++++ b/fs/nfs/nfs3client.c
+@@ -108,7 +108,6 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
+ if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
+ __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
+- __set_bit(NFS_CS_NOPING, &cl_init.init_flags);
+ __set_bit(NFS_CS_DS, &cl_init.init_flags);
+
+ /* Use the MDS nfs_client cl_ipaddr. */
+diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
+index e6eca1d7481b8..d48db2f6f4f02 100644
+--- a/fs/nfs/nfs3xdr.c
++++ b/fs/nfs/nfs3xdr.c
+@@ -1967,7 +1967,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ bool plus)
+ {
+ struct user_namespace *userns = rpc_userns(entry->server->client);
+- struct nfs_entry old = *entry;
+ __be32 *p;
+ int error;
+ u64 new_cookie;
+@@ -1987,15 +1986,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+
+ error = decode_fileid3(xdr, &entry->ino);
+ if (unlikely(error))
+- return error;
++ return -EAGAIN;
+
+ error = decode_inline_filename3(xdr, &entry->name, &entry->len);
+ if (unlikely(error))
+- return error;
++ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
+
+ error = decode_cookie3(xdr, &new_cookie);
+ if (unlikely(error))
+- return error;
++ return -EAGAIN;
+
+ entry->d_type = DT_UNKNOWN;
+
+@@ -2003,7 +2002,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ entry->fattr->valid = 0;
+ error = decode_post_op_attr(xdr, entry->fattr, userns);
+ if (unlikely(error))
+- return error;
++ return -EAGAIN;
+ if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
+ entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+
+@@ -2018,11 +2017,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ return -EAGAIN;
+ if (*p != xdr_zero) {
+ error = decode_nfs_fh3(xdr, entry->fh);
+- if (unlikely(error)) {
+- if (error == -E2BIG)
+- goto out_truncated;
+- return error;
+- }
++ if (unlikely(error))
++ return -EAGAIN;
+ } else
+ zero_nfs_fh3(entry->fh);
+ }
+@@ -2031,11 +2027,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+ entry->cookie = new_cookie;
+
+ return 0;
+-
+-out_truncated:
+- dprintk("NFS: directory entry contains invalid file handle\n");
+- *entry = old;
+- return -EAGAIN;
+ }
+
+ /*
+@@ -2227,7 +2218,8 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
+
+ /* ignore properties */
+ result->lease_time = 0;
+- result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
++ result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
++ result->xattr_support = 0;
+ return 0;
+ }
+
+diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
+index a24349512ffe9..2975bbc33d280 100644
+--- a/fs/nfs/nfs42proc.c
++++ b/fs/nfs/nfs42proc.c
+@@ -285,7 +285,9 @@ static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
+ loff_t newsize = pos + len;
+ loff_t end = newsize - 1;
+
+- truncate_pagecache_range(inode, pos, end);
++ WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping,
++ pos >> PAGE_SHIFT, end >> PAGE_SHIFT));
++
+ spin_lock(&inode->i_lock);
+ if (newsize > i_size_read(inode))
+ i_size_write(inode, newsize);
+@@ -460,8 +462,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
+ continue;
+ }
+ break;
+- } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
+- args.sync = true;
++ } else if (err == -NFS4ERR_OFFLOAD_NO_REQS &&
++ args.sync != res.synchronous) {
++ args.sync = res.synchronous;
+ dst_exception.retry = 1;
+ continue;
+ } else if ((err == -ESTALE ||
+@@ -584,8 +587,10 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
+
+ ctx = get_nfs_open_context(nfs_file_open_context(src));
+ l_ctx = nfs_get_lock_context(ctx);
+- if (IS_ERR(l_ctx))
+- return PTR_ERR(l_ctx);
++ if (IS_ERR(l_ctx)) {
++ status = PTR_ERR(l_ctx);
++ goto out;
++ }
+
+ status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
+ FMODE_READ);
+@@ -593,7 +598,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
+- return status;
++ goto out;
+ }
+
+ status = nfs4_call_sync(src_server->client, src_server, &msg,
+@@ -601,6 +606,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
+ if (status == -ENOTSUPP)
+ src_server->caps &= ~NFS_CAP_COPY_NOTIFY;
+
++out:
+ put_nfs_open_context(nfs_file_open_context(src));
+ return status;
+ }
+@@ -1072,6 +1078,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
+ status = nfs4_call_sync(server->client, server, msg,
+ &args.seq_args, &res.seq_res, 0);
+ if (status == 0) {
++ /* a zero-length count means clone to EOF in src */
++ if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE)
++ count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset;
+ nfs42_copy_dest_done(dst_inode, dst_offset, count);
+ status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
+ }
+@@ -1331,7 +1340,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
+ for (i = 0; i < np; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i]) {
+- np = i + 1;
+ err = -ENOMEM;
+ goto out;
+ }
+@@ -1355,8 +1363,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
+ } while (exception.retry);
+
+ out:
+- while (--np >= 0)
+- __free_page(pages[np]);
++ while (--i >= 0)
++ __free_page(pages[i]);
+ kfree(pages);
+
+ return err;
+diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
+index c8bad735e4c19..271e5f92ed019 100644
+--- a/fs/nfs/nfs42xdr.c
++++ b/fs/nfs/nfs42xdr.c
+@@ -1434,8 +1434,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
+ status = decode_clone(xdr);
+ if (status)
+ goto out;
+- status = decode_getfattr(xdr, res->dst_fattr, res->server);
+-
++ decode_getfattr(xdr, res->dst_fattr, res->server);
+ out:
+ res->rpc_status = status;
+ return status;
+diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
+index ba78df4b13d94..0a1e1c64b131a 100644
+--- a/fs/nfs/nfs4_fs.h
++++ b/fs/nfs/nfs4_fs.h
+@@ -42,6 +42,7 @@ enum nfs4_client_state {
+ NFS4CLNT_LEASE_MOVED,
+ NFS4CLNT_DELEGATION_EXPIRED,
+ NFS4CLNT_RUN_MANAGER,
++ NFS4CLNT_MANAGER_AVAILABLE,
+ NFS4CLNT_RECALL_RUNNING,
+ NFS4CLNT_RECALL_ANY_LAYOUT_READ,
+ NFS4CLNT_RECALL_ANY_LAYOUT_RW,
+@@ -261,8 +262,8 @@ struct nfs4_state_maintenance_ops {
+ };
+
+ struct nfs4_mig_recovery_ops {
+- int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
+- struct page *, const struct cred *);
++ int (*get_locations)(struct nfs_server *, struct nfs_fh *,
++ struct nfs4_fs_locations *, struct page *, const struct cred *);
+ int (*fsid_present)(struct inode *, const struct cred *);
+ };
+
+@@ -281,7 +282,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
+ int nfs4_submount(struct fs_context *, struct nfs_server *);
+ int nfs4_replace_transport(struct nfs_server *server,
+ const struct nfs4_fs_locations *locations);
+-
++size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
++ size_t salen, struct net *net, int port);
+ /* nfs4proc.c */
+ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
+ extern int nfs4_async_handle_error(struct rpc_task *task,
+@@ -303,8 +305,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+ extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
+ struct nfs4_fs_locations *, struct page *);
+-extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
+- struct page *page, const struct cred *);
++extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *,
++ struct nfs4_fs_locations *,
++ struct page *page, const struct cred *);
+ extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
+ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
+ struct dentry *,
+diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
+index af57332503bed..1bf7a72ebda6e 100644
+--- a/fs/nfs/nfs4client.c
++++ b/fs/nfs/nfs4client.c
+@@ -346,6 +346,7 @@ int nfs40_init_client(struct nfs_client *clp)
+ ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE,
+ "NFSv4.0 transport Slot table");
+ if (ret) {
++ nfs4_shutdown_slot_table(tbl);
+ kfree(tbl);
+ return ret;
+ }
+@@ -1368,8 +1369,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
+ }
+ nfs_put_client(clp);
+
+- if (server->nfs_client->cl_hostname == NULL)
++ if (server->nfs_client->cl_hostname == NULL) {
+ server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
++ if (server->nfs_client->cl_hostname == NULL)
++ return -ENOMEM;
++ }
+ nfs_server_insert_lists(server);
+
+ return nfs_probe_destination(server);
+diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
+index c91565227ea2a..14f2efdecc2f8 100644
+--- a/fs/nfs/nfs4file.c
++++ b/fs/nfs/nfs4file.c
+@@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
+ struct dentry *parent = NULL;
+ struct inode *dir;
+ unsigned openflags = filp->f_flags;
++ fmode_t f_mode;
+ struct iattr attr;
+ int err;
+
+@@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp)
+ if (err)
+ return err;
+
++ f_mode = filp->f_mode;
+ if ((openflags & O_ACCMODE) == 3)
+- return nfs_open(inode, filp);
++ f_mode |= flags_to_mode(openflags);
+
+ /* We can't create new files here */
+ openflags &= ~(O_CREAT|O_EXCL);
+@@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
+ parent = dget_parent(dentry);
+ dir = d_inode(parent);
+
+- ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp);
++ ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp);
+ err = PTR_ERR(ctx);
+ if (IS_ERR(ctx))
+ goto out;
+@@ -317,7 +319,7 @@ static int read_name_gen = 1;
+ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh, nfs4_stateid *stateid)
+ {
+- struct nfs_fattr fattr;
++ struct nfs_fattr *fattr = nfs_alloc_fattr();
+ struct file *filep, *res;
+ struct nfs_server *server;
+ struct inode *r_ino = NULL;
+@@ -328,14 +330,20 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+
+ server = NFS_SERVER(ss_mnt->mnt_root->d_inode);
+
+- nfs_fattr_init(&fattr);
++ if (!fattr)
++ return ERR_PTR(-ENOMEM);
+
+- status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL);
++ status = nfs4_proc_getattr(server, src_fh, fattr, NULL, NULL);
+ if (status < 0) {
+ res = ERR_PTR(status);
+ goto out;
+ }
+
++ if (!S_ISREG(fattr->mode)) {
++ res = ERR_PTR(-EBADF);
++ goto out;
++ }
++
+ res = ERR_PTR(-ENOMEM);
+ len = strlen(SSC_READ_NAME_BODY) + 16;
+ read_name = kzalloc(len, GFP_NOFS);
+@@ -343,7 +351,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+ goto out;
+ snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
+
+- r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr,
++ r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr,
+ NULL);
+ if (IS_ERR(r_ino)) {
+ res = ERR_CAST(r_ino);
+@@ -354,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+ r_ino->i_fop);
+ if (IS_ERR(filep)) {
+ res = ERR_CAST(filep);
++ iput(r_ino);
+ goto out_free_name;
+ }
+ filep->f_mode |= FMODE_READ;
+@@ -388,6 +397,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+ out_free_name:
+ kfree(read_name);
+ out:
++ nfs_free_fattr(fattr);
+ return res;
+ out_stateowner:
+ nfs4_put_state_owner(sp);
+diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
+index 8d8aba305ecca..ec6afd3c4bca6 100644
+--- a/fs/nfs/nfs4idmap.c
++++ b/fs/nfs/nfs4idmap.c
+@@ -487,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp)
+ err_destroy_pipe:
+ rpc_destroy_pipe_data(idmap->idmap_pipe);
+ err:
+- get_user_ns(idmap->user_ns);
++ put_user_ns(idmap->user_ns);
+ kfree(idmap);
+ return error;
+ }
+@@ -561,22 +561,20 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
+ return true;
+ }
+
+-static void
+-nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
++static void nfs_idmap_complete_pipe_upcall(struct idmap_legacy_upcalldata *data,
++ int ret)
+ {
+- struct key *authkey = idmap->idmap_upcall_data->authkey;
+-
+- kfree(idmap->idmap_upcall_data);
+- idmap->idmap_upcall_data = NULL;
+- complete_request_key(authkey, ret);
+- key_put(authkey);
++ complete_request_key(data->authkey, ret);
++ key_put(data->authkey);
++ kfree(data);
+ }
+
+-static void
+-nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
++static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap,
++ struct idmap_legacy_upcalldata *data,
++ int ret)
+ {
+- if (idmap->idmap_upcall_data != NULL)
+- nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
++ if (cmpxchg(&idmap->idmap_upcall_data, data, NULL) == data)
++ nfs_idmap_complete_pipe_upcall(data, ret);
+ }
+
+ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
+@@ -613,7 +611,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
+
+ ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
+ if (ret < 0)
+- nfs_idmap_abort_pipe_upcall(idmap, ret);
++ nfs_idmap_abort_pipe_upcall(idmap, data, ret);
+
+ return ret;
+ out2:
+@@ -669,6 +667,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ struct request_key_auth *rka;
+ struct rpc_inode *rpci = RPC_I(file_inode(filp));
+ struct idmap *idmap = (struct idmap *)rpci->private;
++ struct idmap_legacy_upcalldata *data;
+ struct key *authkey;
+ struct idmap_msg im;
+ size_t namelen_in;
+@@ -678,10 +677,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ * will have been woken up and someone else may now have used
+ * idmap_key_cons - so after this point we may no longer touch it.
+ */
+- if (idmap->idmap_upcall_data == NULL)
++ data = xchg(&idmap->idmap_upcall_data, NULL);
++ if (data == NULL)
+ goto out_noupcall;
+
+- authkey = idmap->idmap_upcall_data->authkey;
++ authkey = data->authkey;
+ rka = get_request_key_auth(authkey);
+
+ if (mlen != sizeof(im)) {
+@@ -703,18 +703,17 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
+ ret = -EINVAL;
+ goto out;
+-}
++ }
+
+- ret = nfs_idmap_read_and_verify_message(&im,
+- &idmap->idmap_upcall_data->idmap_msg,
+- rka->target_key, authkey);
++ ret = nfs_idmap_read_and_verify_message(&im, &data->idmap_msg,
++ rka->target_key, authkey);
+ if (ret >= 0) {
+ key_set_timeout(rka->target_key, nfs_idmap_cache_timeout);
+ ret = mlen;
+ }
+
+ out:
+- nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
++ nfs_idmap_complete_pipe_upcall(data, ret);
+ out_noupcall:
+ return ret;
+ }
+@@ -728,7 +727,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+ struct idmap *idmap = data->idmap;
+
+ if (msg->errno)
+- nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
++ nfs_idmap_abort_pipe_upcall(idmap, data, msg->errno);
+ }
+
+ static void
+@@ -736,8 +735,11 @@ idmap_release_pipe(struct inode *inode)
+ {
+ struct rpc_inode *rpci = RPC_I(inode);
+ struct idmap *idmap = (struct idmap *)rpci->private;
++ struct idmap_legacy_upcalldata *data;
+
+- nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
++ data = xchg(&idmap->idmap_upcall_data, NULL);
++ if (data)
++ nfs_idmap_complete_pipe_upcall(data, -EPIPE);
+ }
+
+ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
+diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
+index 873342308dc0d..f2dbf904c5989 100644
+--- a/fs/nfs/nfs4namespace.c
++++ b/fs/nfs/nfs4namespace.c
+@@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry,
+ return 0;
+ }
+
+-static size_t nfs_parse_server_name(char *string, size_t len,
+- struct sockaddr *sa, size_t salen, struct net *net)
++size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
++ size_t salen, struct net *net, int port)
+ {
+ ssize_t ret;
+
+ ret = rpc_pton(net, string, len, sa, salen);
+ if (ret == 0) {
+- ret = nfs_dns_resolve_name(net, string, len, sa, salen);
+- if (ret < 0)
+- ret = 0;
++ ret = rpc_uaddr2sockaddr(net, string, len, sa, salen);
++ if (ret == 0) {
++ ret = nfs_dns_resolve_name(net, string, len, sa, salen);
++ if (ret < 0)
++ ret = 0;
++ }
++ } else if (port) {
++ rpc_set_port(sa, port);
+ }
+ return ret;
+ }
+@@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc,
+ nfs_parse_server_name(buf->data, buf->len,
+ &ctx->nfs_server.address,
+ sizeof(ctx->nfs_server._address),
+- fc->net_ns);
++ fc->net_ns, 0);
+ if (ctx->nfs_server.addrlen == 0)
+ continue;
+
+@@ -412,6 +417,9 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client)
+ fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+ if (!fs_locations)
+ goto out_free;
++ fs_locations->fattr = nfs_alloc_fattr();
++ if (!fs_locations->fattr)
++ goto out_free_2;
+
+ /* Get locations */
+ dentry = ctx->clone_data.dentry;
+@@ -422,14 +430,16 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client)
+ err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page);
+ dput(parent);
+ if (err != 0)
+- goto out_free_2;
++ goto out_free_3;
+
+ err = -ENOENT;
+ if (fs_locations->nlocations <= 0 ||
+ fs_locations->fs_path.ncomponents <= 0)
+- goto out_free_2;
++ goto out_free_3;
+
+ err = nfs_follow_referral(fc, fs_locations);
++out_free_3:
++ kfree(fs_locations->fattr);
+ out_free_2:
+ kfree(fs_locations);
+ out_free:
+@@ -496,7 +506,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
+ continue;
+
+ salen = nfs_parse_server_name(buf->data, buf->len,
+- sap, addr_bufsize, net);
++ sap, addr_bufsize, net, 0);
+ if (salen == 0)
+ continue;
+ rpc_set_port(sap, NFS_PORT);
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index e1214bb6b7ee5..a21e25cbd4515 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -126,6 +126,11 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
+ if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0)
+ return NULL;
+
++ label->lfs = 0;
++ label->pi = 0;
++ label->len = 0;
++ label->label = NULL;
++
+ err = security_dentry_init_security(dentry, sattr->ia_mode,
+ &dentry->d_name, (void **)&label->label, &label->len);
+ if (err == 0)
+@@ -366,6 +371,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
+ kunmap_atomic(start);
+ }
+
++static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version)
++{
++ if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) {
++ fattr->pre_change_attr = version;
++ fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
++ }
++}
++
+ static void nfs4_test_and_free_stateid(struct nfs_server *server,
+ nfs4_stateid *stateid,
+ const struct cred *cred)
+@@ -779,10 +792,9 @@ static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot,
+ if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0)
+ slot->seq_nr_highest_sent = seqnr;
+ }
+-static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
+- u32 seqnr)
++static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, u32 seqnr)
+ {
+- slot->seq_nr_highest_sent = seqnr;
++ nfs4_slot_sequence_record_sent(slot, seqnr);
+ slot->seq_nr_last_acked = seqnr;
+ }
+
+@@ -849,7 +861,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
+ __func__,
+ slot->slot_nr,
+ slot->seq_nr);
+- nfs4_slot_sequence_acked(slot, slot->seq_nr);
+ goto out_retry;
+ case -NFS4ERR_RETRY_UNCACHED_REP:
+ case -NFS4ERR_SEQ_FALSE_RETRY:
+@@ -912,6 +923,7 @@ out:
+ out_noaction:
+ return ret;
+ session_recover:
++ set_bit(NFS4_SLOT_TBL_DRAINING, &session->fc_slot_table.slot_tbl_state);
+ nfs4_schedule_session_recovery(session, status);
+ dprintk("%s ERROR: %d Reset session\n", __func__, status);
+ nfs41_sequence_free_slot(res);
+@@ -1157,7 +1169,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
+ {
+ unsigned short task_flags = 0;
+
+- if (server->nfs_client->cl_minorversion)
++ if (server->caps & NFS_CAP_MOVEABLE)
+ task_flags = RPC_TASK_MOVEABLE;
+ return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags);
+ }
+@@ -1232,8 +1244,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
+ NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
+- NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR |
+- NFS_INO_REVAL_PAGECACHE;
++ NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR;
+ nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ }
+ nfsi->attrtimeo_timestamp = jiffies;
+@@ -1609,15 +1620,16 @@ static bool nfs_stateid_is_sequential(struct nfs4_state *state,
+ {
+ if (test_bit(NFS_OPEN_STATE, &state->flags)) {
+ /* The common case - we're updating to a new sequence number */
+- if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+- nfs4_stateid_is_next(&state->open_stateid, stateid)) {
+- return true;
++ if (nfs4_stateid_match_other(stateid, &state->open_stateid)) {
++ if (nfs4_stateid_is_next(&state->open_stateid, stateid))
++ return true;
++ return false;
+ }
+- } else {
+- /* This is the first OPEN in this generation */
+- if (stateid->seqid == cpu_to_be32(1))
+- return true;
++ /* The server returned a new stateid */
+ }
++ /* This is the first OPEN in this generation */
++ if (stateid->seqid == cpu_to_be32(1))
++ return true;
+ return false;
+ }
+
+@@ -1982,8 +1994,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
+ if (!data->rpc_done) {
+ if (data->rpc_status)
+ return ERR_PTR(data->rpc_status);
+- /* cached opens have already been processed */
+- goto update;
++ return nfs4_try_open_cached(data);
+ }
+
+ ret = nfs_refresh_inode(inode, &data->f_attr);
+@@ -1992,7 +2003,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
+
+ if (data->o_res.delegation_type != 0)
+ nfs4_opendata_check_deleg(data, state);
+-update:
++
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode))
+ return ERR_PTR(-EAGAIN);
+@@ -2133,18 +2144,18 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
+ }
+
+ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata,
+- fmode_t fmode)
++ fmode_t fmode)
+ {
+ struct nfs4_state *newstate;
++ struct nfs_server *server = NFS_SB(opendata->dentry->d_sb);
++ int openflags = opendata->o_arg.open_flags;
+ int ret;
+
+ if (!nfs4_mode_match_open_stateid(opendata->state, fmode))
+ return 0;
+- opendata->o_arg.open_flags = 0;
+ opendata->o_arg.fmode = fmode;
+- opendata->o_arg.share_access = nfs4_map_atomic_open_share(
+- NFS_SB(opendata->dentry->d_sb),
+- fmode, 0);
++ opendata->o_arg.share_access =
++ nfs4_map_atomic_open_share(server, fmode, openflags);
+ memset(&opendata->o_res, 0, sizeof(opendata->o_res));
+ memset(&opendata->c_res, 0, sizeof(opendata->c_res));
+ nfs4_init_opendata_res(opendata);
+@@ -2570,7 +2581,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data,
+ };
+ int status;
+
+- if (server->nfs_client->cl_minorversion)
++ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
+ kref_get(&data->kref);
+@@ -2724,10 +2735,15 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
+ struct nfs4_opendata *opendata;
+ int ret;
+
+- opendata = nfs4_open_recoverdata_alloc(ctx, state,
+- NFS4_OPEN_CLAIM_FH);
++ opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_FH);
+ if (IS_ERR(opendata))
+ return PTR_ERR(opendata);
++ /*
++ * We're not recovering a delegation, so ask for no delegation.
++ * Otherwise the recovery thread could deadlock with an outstanding
++ * delegation return.
++ */
++ opendata->o_arg.open_flags = O_DIRECT;
+ ret = nfs4_open_recover(opendata, state);
+ if (ret == -ESTALE)
+ d_drop(ctx->dentry);
+@@ -3100,8 +3116,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
+ }
+
+ out:
+- if (!opendata->cancelled)
++ if (!opendata->cancelled) {
++ if (opendata->lgp) {
++ nfs4_lgopen_release(opendata->lgp);
++ opendata->lgp = NULL;
++ }
+ nfs4_sequence_free_slot(&opendata->o_res.seq_res);
++ }
+ return ret;
+ }
+
+@@ -3753,7 +3774,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
+ };
+ int status = -ENOMEM;
+
+- if (server->nfs_client->cl_minorversion)
++ if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
+ nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP,
+@@ -3812,7 +3833,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
+ int open_flags, struct iattr *attr, int *opened)
+ {
+ struct nfs4_state *state;
+- struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL;
++ struct nfs4_label l, *label;
+
+ label = nfs4_label_init_security(dir, ctx->dentry, attr, &l);
+
+@@ -3893,6 +3914,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
+ if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
+ server->caps |= NFS_CAP_SECURITY_LABEL;
+ #endif
++ if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS)
++ server->caps |= NFS_CAP_FS_LOCATIONS;
+ if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
+@@ -3949,6 +3972,67 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
+ return err;
+ }
+
++static int _nfs4_discover_trunking(struct nfs_server *server,
++ struct nfs_fh *fhandle)
++{
++ struct nfs4_fs_locations *locations = NULL;
++ struct page *page;
++ const struct cred *cred;
++ struct nfs_client *clp = server->nfs_client;
++ const struct nfs4_state_maintenance_ops *ops =
++ clp->cl_mvops->state_renewal_ops;
++ int status = -ENOMEM;
++
++ cred = ops->get_state_renewal_cred(clp);
++ if (cred == NULL) {
++ cred = nfs4_get_clid_cred(clp);
++ if (cred == NULL)
++ return -ENOKEY;
++ }
++
++ page = alloc_page(GFP_KERNEL);
++ if (!page)
++ goto out_put_cred;
++ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
++ if (!locations)
++ goto out_free;
++ locations->fattr = nfs_alloc_fattr();
++ if (!locations->fattr)
++ goto out_free_2;
++
++ status = nfs4_proc_get_locations(server, fhandle, locations, page,
++ cred);
++
++ kfree(locations->fattr);
++out_free_2:
++ kfree(locations);
++out_free:
++ __free_page(page);
++out_put_cred:
++ put_cred(cred);
++ return status;
++}
++
++static int nfs4_discover_trunking(struct nfs_server *server,
++ struct nfs_fh *fhandle)
++{
++ struct nfs4_exception exception = {
++ .interruptible = true,
++ };
++ struct nfs_client *clp = server->nfs_client;
++ int err = 0;
++
++ if (!nfs4_has_session(clp))
++ goto out;
++ do {
++ err = nfs4_handle_exception(server,
++ _nfs4_discover_trunking(server, fhandle),
++ &exception);
++ } while (exception.retry);
++out:
++ return err;
++}
++
+ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+ {
+@@ -4146,6 +4230,8 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
+ if (locations == NULL)
+ goto out;
+
++ locations->fattr = fattr;
++
+ status = nfs4_proc_fs_locations(client, dir, name, locations, page);
+ if (status != 0)
+ goto out;
+@@ -4155,17 +4241,14 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
+ * referral. Cause us to drop into the exception handler, which
+ * will kick off migration recovery.
+ */
+- if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
++ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &fattr->fsid)) {
+ dprintk("%s: server did not return a different fsid for"
+ " a referral at %s\n", __func__, name->name);
+ status = -NFS4ERR_MOVED;
+ goto out;
+ }
+ /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
+- nfs_fixup_referral_attributes(&locations->fattr);
+-
+- /* replace the lookup nfs_fattr with the locations nfs_fattr */
+- memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
++ nfs_fixup_referral_attributes(fattr);
+ memset(fhandle, 0, sizeof(struct nfs_fh));
+ out:
+ if (page)
+@@ -4317,7 +4400,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
+ };
+ unsigned short task_flags = 0;
+
+- if (server->nfs_client->cl_minorversion)
++ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE))
+ task_flags = RPC_TASK_MOVEABLE;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+@@ -4586,7 +4669,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ int flags)
+ {
+ struct nfs_server *server = NFS_SERVER(dir);
+- struct nfs4_label l, *ilabel = NULL;
++ struct nfs4_label l, *ilabel;
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state;
+ int status = 0;
+@@ -4946,7 +5029,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
+- struct nfs4_label l, *label = NULL;
++ struct nfs4_label l, *label;
+ int err;
+
+ label = nfs4_label_init_security(dir, dentry, sattr, &l);
+@@ -4987,7 +5070,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
+- struct nfs4_label l, *label = NULL;
++ struct nfs4_label l, *label;
+ int err;
+
+ label = nfs4_label_init_security(dir, dentry, sattr, &l);
+@@ -5106,7 +5189,7 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
+- struct nfs4_label l, *label = NULL;
++ struct nfs4_label l, *label;
+ int err;
+
+ label = nfs4_label_init_security(dir, dentry, sattr, &l);
+@@ -5836,7 +5919,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
+ buflen = server->rsize;
+
+ npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
+- pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS);
++ pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+@@ -5881,9 +5964,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
+ out_ok:
+ ret = res.acl_len;
+ out_free:
+- for (i = 0; i < npages; i++)
+- if (pages[i])
+- __free_page(pages[i]);
++ while (--i >= 0)
++ __free_page(pages[i]);
+ if (res.acl_scratch)
+ __free_page(res.acl_scratch);
+ kfree(pages);
+@@ -6502,7 +6584,9 @@ static void nfs4_delegreturn_release(void *calldata)
+ pnfs_roc_release(&data->lr.arg, &data->lr.res,
+ data->res.lr_ret);
+ if (inode) {
+- nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
++ nfs4_fattr_set_prechange(&data->fattr,
++ inode_peek_iversion_raw(inode));
++ nfs_refresh_inode(inode, &data->fattr);
+ nfs_iput_and_deactive(inode);
+ }
+ kfree(calldata);
+@@ -6551,11 +6635,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_delegreturn_ops,
+- .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE,
++ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
+ };
+ int status = 0;
+
+- data = kzalloc(sizeof(*data), GFP_NOFS);
++ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE))
++ task_setup_data.flags |= RPC_TASK_MOVEABLE;
++
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+
+@@ -6744,7 +6831,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
+ struct nfs4_state *state = lsp->ls_state;
+ struct inode *inode = state->inode;
+
+- p = kzalloc(sizeof(*p), GFP_NOFS);
++ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (p == NULL)
+ return NULL;
+ p->arg.fh = NFS_FH(inode);
+@@ -6869,10 +6956,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC,
+ };
+- struct nfs_client *client =
+- NFS_SERVER(lsp->ls_state->inode)->nfs_client;
+
+- if (client->cl_minorversion)
++ if (nfs_server_capable(lsp->ls_state->inode, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
+ nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client,
+@@ -7044,6 +7129,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+ {
+ struct nfs4_lockdata *data = calldata;
+ struct nfs4_lock_state *lsp = data->lsp;
++ struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));
+
+ dprintk("%s: begin!\n", __func__);
+
+@@ -7053,8 +7139,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+ data->rpc_status = task->tk_status;
+ switch (task->tk_status) {
+ case 0:
+- renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
+- data->timestamp);
++ renew_lease(server, data->timestamp);
+ if (data->arg.new_lock && !data->cancelled) {
+ data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
+ if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
+@@ -7067,14 +7152,23 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+ } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
+ goto out_restart;
+ break;
+- case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
++ if (data->arg.new_lock_owner != 0 &&
++ nfs4_refresh_open_old_stateid(&data->arg.open_stateid,
++ lsp->ls_state))
++ goto out_restart;
++ if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp))
++ goto out_restart;
++ fallthrough;
++ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ if (data->arg.new_lock_owner != 0) {
+ if (!nfs4_stateid_match(&data->arg.open_stateid,
+ &lsp->ls_state->open_stateid))
+ goto out_restart;
++ else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
++ goto out_restart;
+ } else if (!nfs4_stateid_match(&data->arg.lock_stateid,
+ &lsp->ls_stateid))
+ goto out_restart;
+@@ -7148,15 +7242,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
+ };
+ int ret;
+- struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client;
+
+- if (client->cl_minorversion)
++ if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
+ dprintk("%s: begin!\n", __func__);
+ data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
+- fl->fl_u.nfs4_fl.owner,
+- recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
++ fl->fl_u.nfs4_fl.owner, GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+ if (IS_SETLKW(cmd))
+@@ -7579,7 +7671,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
+ if (server->nfs_client->cl_mvops->minor_version != 0)
+ return;
+
+- data = kmalloc(sizeof(*data), GFP_NOFS);
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+ data->lsp = lsp;
+@@ -7676,7 +7768,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
+ const char *key, const void *buf,
+ size_t buflen, int flags)
+ {
+- struct nfs_access_entry cache;
++ u32 mask;
+ int ret;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+@@ -7691,8 +7783,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
+ * do a cached access check for the XA* flags to possibly avoid
+ * doing an RPC and getting EACCES back.
+ */
+- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
+- if (!(cache.mask & NFS_ACCESS_XAWRITE))
++ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
++ if (!(mask & NFS_ACCESS_XAWRITE))
+ return -EACCES;
+ }
+
+@@ -7713,14 +7805,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *key, void *buf, size_t buflen)
+ {
+- struct nfs_access_entry cache;
++ u32 mask;
+ ssize_t ret;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return -EOPNOTSUPP;
+
+- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
+- if (!(cache.mask & NFS_ACCESS_XAREAD))
++ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
++ if (!(mask & NFS_ACCESS_XAREAD))
+ return -EACCES;
+ }
+
+@@ -7745,13 +7837,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
+ ssize_t ret, size;
+ char *buf;
+ size_t buflen;
+- struct nfs_access_entry cache;
++ u32 mask;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return 0;
+
+- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
+- if (!(cache.mask & NFS_ACCESS_XALIST))
++ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
++ if (!(mask & NFS_ACCESS_XALIST))
+ return 0;
+ }
+
+@@ -7849,7 +7941,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
+ else
+ bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+
+- nfs_fattr_init(&fs_locations->fattr);
++ nfs_fattr_init(fs_locations->fattr);
+ fs_locations->server = server;
+ fs_locations->nlocations = 0;
+ status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0);
+@@ -7883,18 +7975,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
+ * appended to this compound to identify the client ID which is
+ * performing recovery.
+ */
+-static int _nfs40_proc_get_locations(struct inode *inode,
++static int _nfs40_proc_get_locations(struct nfs_server *server,
++ struct nfs_fh *fhandle,
+ struct nfs4_fs_locations *locations,
+ struct page *page, const struct cred *cred)
+ {
+- struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ u32 bitmask[2] = {
+ [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+ };
+ struct nfs4_fs_locations_arg args = {
+ .clientid = server->nfs_client->cl_clientid,
+- .fh = NFS_FH(inode),
++ .fh = fhandle,
+ .page = page,
+ .bitmask = bitmask,
+ .migration = 1, /* skip LOOKUP */
+@@ -7914,7 +8006,7 @@ static int _nfs40_proc_get_locations(struct inode *inode,
+ unsigned long now = jiffies;
+ int status;
+
+- nfs_fattr_init(&locations->fattr);
++ nfs_fattr_init(locations->fattr);
+ locations->server = server;
+ locations->nlocations = 0;
+
+@@ -7940,17 +8032,17 @@ static int _nfs40_proc_get_locations(struct inode *inode,
+ * When the client supports GETATTR(fs_locations_info), it can
+ * be plumbed in here.
+ */
+-static int _nfs41_proc_get_locations(struct inode *inode,
++static int _nfs41_proc_get_locations(struct nfs_server *server,
++ struct nfs_fh *fhandle,
+ struct nfs4_fs_locations *locations,
+ struct page *page, const struct cred *cred)
+ {
+- struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ u32 bitmask[2] = {
+ [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+ };
+ struct nfs4_fs_locations_arg args = {
+- .fh = NFS_FH(inode),
++ .fh = fhandle,
+ .page = page,
+ .bitmask = bitmask,
+ .migration = 1, /* skip LOOKUP */
+@@ -7967,7 +8059,7 @@ static int _nfs41_proc_get_locations(struct inode *inode,
+ };
+ int status;
+
+- nfs_fattr_init(&locations->fattr);
++ nfs_fattr_init(locations->fattr);
+ locations->server = server;
+ locations->nlocations = 0;
+
+@@ -7999,11 +8091,11 @@ static int _nfs41_proc_get_locations(struct inode *inode,
+ * -NFS4ERR_LEASE_MOVED is returned if the server still has leases
+ * from this client that require migration recovery.
+ */
+-int nfs4_proc_get_locations(struct inode *inode,
++int nfs4_proc_get_locations(struct nfs_server *server,
++ struct nfs_fh *fhandle,
+ struct nfs4_fs_locations *locations,
+ struct page *page, const struct cred *cred)
+ {
+- struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_mig_recovery_ops *ops =
+ clp->cl_mvops->mig_recovery_ops;
+@@ -8016,10 +8108,11 @@ int nfs4_proc_get_locations(struct inode *inode,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+- nfs_display_fhandle(NFS_FH(inode), __func__);
++ nfs_display_fhandle(fhandle, __func__);
+
+ do {
+- status = ops->get_locations(inode, locations, page, cred);
++ status = ops->get_locations(server, fhandle, locations, page,
++ cred);
+ if (status != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, status, &exception);
+@@ -8284,6 +8377,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
+ case -NFS4ERR_DEADSESSION:
+ nfs4_schedule_session_recovery(clp->cl_session,
+ task->tk_status);
++ return;
+ }
+ if (args->dir == NFS4_CDFC4_FORE_OR_BOTH &&
+ res->dir != NFS4_CDFS4_BOTH) {
+@@ -9254,7 +9348,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
+ goto out_err;
+
+ ret = ERR_PTR(-ENOMEM);
+- calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
++ calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
+ if (calldata == NULL)
+ goto out_put_clp;
+ nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged);
+@@ -9339,6 +9433,9 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
+ rpc_delay(task, NFS4_POLL_RETRY_MAX);
+ fallthrough;
+ case -NFS4ERR_RETRY_UNCACHED_REP:
++ case -EACCES:
++ dprintk("%s: failed to reclaim complete error %d for server %s, retrying\n",
++ __func__, task->tk_status, clp->cl_hostname);
+ return -EAGAIN;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+@@ -10198,7 +10295,7 @@ static int nfs41_free_stateid(struct nfs_server *server,
+ &task_setup.rpc_client, &msg);
+
+ dprintk("NFS call free_stateid %p\n", stateid);
+- data = kmalloc(sizeof(*data), GFP_NOFS);
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ data->server = server;
+@@ -10347,7 +10444,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
+ | NFS_CAP_POSIX_LOCK
+ | NFS_CAP_STATEID_NFSV41
+ | NFS_CAP_ATOMIC_OPEN_V1
+- | NFS_CAP_LGOPEN,
++ | NFS_CAP_LGOPEN
++ | NFS_CAP_MOVEABLE,
+ .init_client = nfs41_init_client,
+ .shutdown_client = nfs41_shutdown_client,
+ .match_stateid = nfs41_match_stateid,
+@@ -10382,7 +10480,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
+ | NFS_CAP_LAYOUTSTATS
+ | NFS_CAP_CLONE
+ | NFS_CAP_LAYOUTERROR
+- | NFS_CAP_READ_PLUS,
++ | NFS_CAP_READ_PLUS
++ | NFS_CAP_MOVEABLE,
+ .init_client = nfs41_init_client,
+ .shutdown_client = nfs41_shutdown_client,
+ .match_stateid = nfs41_match_stateid,
+@@ -10437,6 +10536,26 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
+ return error + error2 + error3;
+ }
+
++static void nfs4_enable_swap(struct inode *inode)
++{
++ /* The state manager thread must always be running.
++ * It will notice the client is a swapper, and stay put.
++ */
++ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
++
++ nfs4_schedule_state_manager(clp);
++}
++
++static void nfs4_disable_swap(struct inode *inode)
++{
++ /* The state manager thread will now exit once it is
++ * woken.
++ */
++ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
++
++ nfs4_schedule_state_manager(clp);
++}
++
+ static const struct inode_operations nfs4_dir_inode_operations = {
+ .create = nfs_create,
+ .lookup = nfs_lookup,
+@@ -10513,6 +10632,9 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
+ .free_client = nfs4_free_client,
+ .create_server = nfs4_create_server,
+ .clone_server = nfs_clone_server,
++ .discover_trunking = nfs4_discover_trunking,
++ .enable_swap = nfs4_enable_swap,
++ .disable_swap = nfs4_disable_swap,
+ };
+
+ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index f22818a80c2c7..15ba6ad1c571f 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -49,6 +49,7 @@
+ #include <linux/workqueue.h>
+ #include <linux/bitops.h>
+ #include <linux/jiffies.h>
++#include <linux/sched/mm.h>
+
+ #include <linux/sunrpc/clnt.h>
+
+@@ -66,6 +67,8 @@
+
+ #define OPENOWNER_POOL_SIZE 8
+
++static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp);
++
+ const nfs4_stateid zero_stateid = {
+ { .data = { 0 } },
+ .type = NFS4_SPECIAL_STATEID_TYPE,
+@@ -329,6 +332,8 @@ do_confirm:
+ status = nfs4_proc_create_session(clp, cred);
+ if (status != 0)
+ goto out;
++ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R))
++ nfs4_state_start_reclaim_reboot(clp);
+ nfs41_finish_session_reset(clp);
+ nfs_mark_client_ready(clp, NFS_CS_READY);
+ out:
+@@ -820,7 +825,7 @@ static void __nfs4_close(struct nfs4_state *state,
+
+ void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
+ {
+- __nfs4_close(state, fmode, GFP_NOFS, 0);
++ __nfs4_close(state, fmode, GFP_KERNEL, 0);
+ }
+
+ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
+@@ -1208,10 +1213,17 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
+ {
+ struct task_struct *task;
+ char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
++ struct rpc_clnt *cl = clp->cl_rpcclient;
++
++ while (cl != cl->cl_parent)
++ cl = cl->cl_parent;
+
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+- if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
++ if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
++ wake_up_var(&clp->cl_state);
+ return;
++ }
++ set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+ __module_get(THIS_MODULE);
+ refcount_inc(&clp->cl_count);
+
+@@ -1226,7 +1238,10 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
+ if (IS_ERR(task)) {
+ printk(KERN_ERR "%s: kthread_run: %ld\n",
+ __func__, PTR_ERR(task));
++ if (!nfs_client_init_is_complete(clp))
++ nfs_mark_client_ready(clp, PTR_ERR(task));
+ nfs4_clear_state_manager_bit(clp);
++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ nfs_put_client(clp);
+ module_put(THIS_MODULE);
+ }
+@@ -1777,6 +1792,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
+
+ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
+ {
++ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+ /* Mark all delegations for reclaim */
+ nfs_delegation_mark_reclaim(clp);
+ nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
+@@ -2095,9 +2111,15 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
+ dprintk("<-- %s: no memory\n", __func__);
+ goto out;
+ }
++ locations->fattr = nfs_alloc_fattr();
++ if (locations->fattr == NULL) {
++ dprintk("<-- %s: no memory\n", __func__);
++ goto out;
++ }
+
+ inode = d_inode(server->super->s_root);
+- result = nfs4_proc_get_locations(inode, locations, page, cred);
++ result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
++ page, cred);
+ if (result) {
+ dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
+ __func__, result);
+@@ -2105,7 +2127,10 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
+ }
+
+ result = -NFS4ERR_NXIO;
+- if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
++ if (!locations->nlocations)
++ goto out;
++
++ if (!(locations->fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
+ dprintk("<-- %s: No fs_locations data, migration skipped\n",
+ __func__);
+ goto out;
+@@ -2130,6 +2155,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
+ out:
+ if (page != NULL)
+ __free_page(page);
++ if (locations != NULL)
++ kfree(locations->fattr);
+ kfree(locations);
+ if (result) {
+ pr_err("NFS: migration recovery failed (server %s)\n",
+@@ -2555,9 +2582,17 @@ static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
+
+ static void nfs4_state_manager(struct nfs_client *clp)
+ {
++ unsigned int memflags;
+ int status = 0;
+ const char *section = "", *section_sep = "";
+
++ /*
++ * State recovery can deadlock if the direct reclaim code tries
++ * start NFS writeback. So ensure memory allocations are all
++ * GFP_NOFS.
++ */
++ memflags = memalloc_nofs_save();
++
+ /* Ensure exclusive access to NFSv4 state */
+ do {
+ trace_nfs4_state_mgr(clp);
+@@ -2631,6 +2666,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
+ if (status < 0)
+ goto out_error;
+ nfs4_state_end_reclaim_reboot(clp);
++ continue;
+ }
+
+ /* Detect expired delegations... */
+@@ -2652,6 +2688,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
+ clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
+ }
+
++ memalloc_nofs_restore(memflags);
+ nfs4_end_drain_session(clp);
+ nfs4_clear_state_manager_bit(clp);
+
+@@ -2664,11 +2701,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
+ clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state);
+ }
+
+- /* Did we race with an attempt to give us more work? */
+- if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
+- return;
+- if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
+- return;
++ return;
++
+ } while (refcount_read(&clp->cl_count) > 1 && !signalled());
+ goto out_drain;
+
+@@ -2681,6 +2715,7 @@ out_error:
+ clp->cl_hostname, -status);
+ ssleep(1);
+ out_drain:
++ memalloc_nofs_restore(memflags);
+ nfs4_end_drain_session(clp);
+ nfs4_clear_state_manager_bit(clp);
+ }
+@@ -2688,9 +2723,31 @@ out_drain:
+ static int nfs4_run_state_manager(void *ptr)
+ {
+ struct nfs_client *clp = ptr;
++ struct rpc_clnt *cl = clp->cl_rpcclient;
++
++ while (cl != cl->cl_parent)
++ cl = cl->cl_parent;
+
+ allow_signal(SIGKILL);
++again:
++ set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+ nfs4_state_manager(clp);
++ if (atomic_read(&cl->cl_swapper)) {
++ wait_var_event_interruptible(&clp->cl_state,
++ test_bit(NFS4CLNT_RUN_MANAGER,
++ &clp->cl_state));
++ if (atomic_read(&cl->cl_swapper) &&
++ test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
++ goto again;
++ /* Either no longer a swapper, or were signalled */
++ }
++ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
++
++ if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
++ test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
++ !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
++ goto again;
++
+ nfs_put_client(clp);
+ module_put_and_exit(0);
+ return 0;
+diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
+index 7a2567aa2b86d..bcd18e96b44fa 100644
+--- a/fs/nfs/nfs4trace.h
++++ b/fs/nfs/nfs4trace.h
+@@ -584,32 +584,34 @@ TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
+ TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
+ TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
+ TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
++TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_AVAILABLE);
+ TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING);
+ TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ);
+ TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW);
++TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_DELAYED);
+
+ #define show_nfs4_clp_state(state) \
+ __print_flags(state, "|", \
+- { NFS4CLNT_MANAGER_RUNNING, "MANAGER_RUNNING" }, \
+- { NFS4CLNT_CHECK_LEASE, "CHECK_LEASE" }, \
+- { NFS4CLNT_LEASE_EXPIRED, "LEASE_EXPIRED" }, \
+- { NFS4CLNT_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \
+- { NFS4CLNT_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \
+- { NFS4CLNT_DELEGRETURN, "DELEGRETURN" }, \
+- { NFS4CLNT_SESSION_RESET, "SESSION_RESET" }, \
+- { NFS4CLNT_LEASE_CONFIRM, "LEASE_CONFIRM" }, \
+- { NFS4CLNT_SERVER_SCOPE_MISMATCH, \
+- "SERVER_SCOPE_MISMATCH" }, \
+- { NFS4CLNT_PURGE_STATE, "PURGE_STATE" }, \
+- { NFS4CLNT_BIND_CONN_TO_SESSION, \
+- "BIND_CONN_TO_SESSION" }, \
+- { NFS4CLNT_MOVED, "MOVED" }, \
+- { NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \
+- { NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \
+- { NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \
+- { NFS4CLNT_RECALL_RUNNING, "RECALL_RUNNING" }, \
+- { NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \
+- { NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" })
++ { BIT(NFS4CLNT_MANAGER_RUNNING), "MANAGER_RUNNING" }, \
++ { BIT(NFS4CLNT_CHECK_LEASE), "CHECK_LEASE" }, \
++ { BIT(NFS4CLNT_LEASE_EXPIRED), "LEASE_EXPIRED" }, \
++ { BIT(NFS4CLNT_RECLAIM_REBOOT), "RECLAIM_REBOOT" }, \
++ { BIT(NFS4CLNT_RECLAIM_NOGRACE), "RECLAIM_NOGRACE" }, \
++ { BIT(NFS4CLNT_DELEGRETURN), "DELEGRETURN" }, \
++ { BIT(NFS4CLNT_SESSION_RESET), "SESSION_RESET" }, \
++ { BIT(NFS4CLNT_LEASE_CONFIRM), "LEASE_CONFIRM" }, \
++ { BIT(NFS4CLNT_SERVER_SCOPE_MISMATCH), "SERVER_SCOPE_MISMATCH" }, \
++ { BIT(NFS4CLNT_PURGE_STATE), "PURGE_STATE" }, \
++ { BIT(NFS4CLNT_BIND_CONN_TO_SESSION), "BIND_CONN_TO_SESSION" }, \
++ { BIT(NFS4CLNT_MOVED), "MOVED" }, \
++ { BIT(NFS4CLNT_LEASE_MOVED), "LEASE_MOVED" }, \
++ { BIT(NFS4CLNT_DELEGATION_EXPIRED), "DELEGATION_EXPIRED" }, \
++ { BIT(NFS4CLNT_RUN_MANAGER), "RUN_MANAGER" }, \
++ { BIT(NFS4CLNT_MANAGER_AVAILABLE), "MANAGER_AVAILABLE" }, \
++ { BIT(NFS4CLNT_RECALL_RUNNING), "RECALL_RUNNING" }, \
++ { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_READ), "RECALL_ANY_LAYOUT_READ" }, \
++ { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_RW), "RECALL_ANY_LAYOUT_RW" }, \
++ { BIT(NFS4CLNT_DELEGRETURN_DELAYED), "DELERETURN_DELAYED" })
+
+ TRACE_EVENT(nfs4_state_mgr,
+ TP_PROTO(
+diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
+index a8cff19c6f00c..0ae9e06a0bba2 100644
+--- a/fs/nfs/nfs4xdr.c
++++ b/fs/nfs/nfs4xdr.c
+@@ -3693,8 +3693,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
+ if (unlikely(!p))
+ goto out_eio;
+ n = be32_to_cpup(p);
+- if (n <= 0)
+- goto out_eio;
+ for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
+ u32 m;
+ struct nfs4_fs_location *loc;
+@@ -4181,26 +4179,25 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ return -EIO;
++ bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (len < NFS4_MAXLABELLEN) {
+- if (label) {
+- if (label->len) {
+- if (label->len < len)
+- return -ERANGE;
+- memcpy(label->label, p, len);
+- }
++ if (label && label->len) {
++ if (label->len < len)
++ return -ERANGE;
++ memcpy(label->label, p, len);
+ label->len = len;
+ label->pi = pi;
+ label->lfs = lfs;
+ status = NFS_ATTR_FATTR_V4_SECURITY_LABEL;
+ }
+- bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ } else
+ printk(KERN_WARNING "%s: label too long (%u)!\n",
+ __func__, len);
++ if (label && label->label)
++ dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n",
++ __func__, label->len, (char *)label->label,
++ label->len, label->pi, label->lfs);
+ }
+- if (label && label->label)
+- dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
+- (char *)label->label, label->len, label->pi, label->lfs);
+ return status;
+ }
+
+@@ -7029,7 +7026,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
+ if (res->migration) {
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr_generic(xdr,
+- &res->fs_locations->fattr,
++ res->fs_locations->fattr,
+ NULL, res->fs_locations,
+ NULL, res->fs_locations->server);
+ if (status)
+@@ -7042,7 +7039,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
+ goto out;
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr_generic(xdr,
+- &res->fs_locations->fattr,
++ res->fs_locations->fattr,
+ NULL, res->fs_locations,
+ NULL, res->fs_locations->server);
+ }
+diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
+index cc232d1f16f2f..fdecf729fa92b 100644
+--- a/fs/nfs/pagelist.c
++++ b/fs/nfs/pagelist.c
+@@ -90,10 +90,10 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
+ }
+ }
+
+-static inline struct nfs_page *
+-nfs_page_alloc(void)
++static inline struct nfs_page *nfs_page_alloc(void)
+ {
+- struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
++ struct nfs_page *p =
++ kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask());
+ if (p)
+ INIT_LIST_HEAD(&p->wb_list);
+ return p;
+@@ -773,6 +773,9 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
+ .flags = RPC_TASK_ASYNC | flags,
+ };
+
++ if (nfs_server_capable(hdr->inode, NFS_CAP_MOVEABLE))
++ task_setup_data.flags |= RPC_TASK_MOVEABLE;
++
+ hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how);
+
+ dprintk("NFS: initiated pgio call "
+@@ -901,7 +904,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
+ struct nfs_commit_info cinfo;
+ struct nfs_page_array *pg_array = &hdr->page_array;
+ unsigned int pagecount, pageused;
+- gfp_t gfp_flags = GFP_KERNEL;
++ gfp_t gfp_flags = nfs_io_gfp_mask();
+
+ pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
+ pg_array->npages = pagecount;
+@@ -988,7 +991,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc,
+ desc->pg_mirrors_dynamic = NULL;
+ if (mirror_count == 1)
+ return desc->pg_mirrors_static;
+- ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL);
++ ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask());
+ if (ret != NULL) {
+ for (i = 0; i < mirror_count; i++)
+ nfs_pageio_mirror_init(&ret[i], desc->pg_bsize);
+@@ -1227,6 +1230,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
+
+ do {
+ list_splice_init(&mirror->pg_list, &head);
++ mirror->pg_recoalesce = 0;
+
+ while (!list_empty(&head)) {
+ struct nfs_page *req;
+diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
+index 7c9090a28e5c3..7217f3eeb0692 100644
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -92,6 +92,17 @@ find_pnfs_driver(u32 id)
+ return local;
+ }
+
++const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id)
++{
++ return find_pnfs_driver(id);
++}
++
++void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld)
++{
++ if (ld)
++ module_put(ld->owner);
++}
++
+ void
+ unset_pnfs_layoutdriver(struct nfs_server *nfss)
+ {
+@@ -458,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+ pnfs_clear_lseg_state(lseg, lseg_list);
+ pnfs_clear_layoutreturn_info(lo);
+ pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
++ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
+ if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
+ !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
+ pnfs_clear_layoutreturn_waitbit(lo);
+@@ -1233,7 +1245,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
+ int status = 0;
+
+ *pcred = NULL;
+- lrp = kzalloc(sizeof(*lrp), GFP_NOFS);
++ lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+ if (unlikely(lrp == NULL)) {
+ status = -ENOMEM;
+ spin_lock(&ino->i_lock);
+@@ -1906,8 +1918,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
+
+ static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
+ {
+- if (atomic_dec_and_test(&lo->plh_outstanding))
+- wake_up_var(&lo->plh_outstanding);
++ if (atomic_dec_and_test(&lo->plh_outstanding) &&
++ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
++ wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
+ }
+
+ static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
+@@ -1989,6 +2002,7 @@ lookup_again:
+ lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
+ if (lo == NULL) {
+ spin_unlock(&ino->i_lock);
++ lseg = ERR_PTR(-ENOMEM);
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
+ PNFS_UPDATE_LAYOUT_NOMEM);
+ goto out;
+@@ -2013,11 +2027,11 @@ lookup_again:
+ * If the layout segment list is empty, but there are outstanding
+ * layoutget calls, then they might be subject to a layoutrecall.
+ */
+- if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
++ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
+ atomic_read(&lo->plh_outstanding) != 0) {
+ spin_unlock(&ino->i_lock);
+- lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
+- !atomic_read(&lo->plh_outstanding)));
++ lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
++ TASK_KILLABLE));
+ if (IS_ERR(lseg))
+ goto out_put_layout_hdr;
+ pnfs_put_layout_hdr(lo);
+@@ -2117,6 +2131,7 @@ lookup_again:
+
+ lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
+ if (!lgp) {
++ lseg = ERR_PTR(-ENOMEM);
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
+ PNFS_UPDATE_LAYOUT_NOMEM);
+ nfs_layoutget_end(lo);
+@@ -2139,6 +2154,12 @@ lookup_again:
+ case -ERECALLCONFLICT:
+ case -EAGAIN:
+ break;
++ case -ENODATA:
++ /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
++ pnfs_layout_set_fail_bit(
++ lo, pnfs_iomode_to_fail_bit(iomode));
++ lseg = NULL;
++ goto out_put_layout_hdr;
+ default:
+ if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
+ pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
+@@ -2394,7 +2415,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
+ goto out_forget;
+ }
+
+- if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
++ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
++ !pnfs_is_first_layoutget(lo))
+ goto out_forget;
+
+ if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
+@@ -3250,7 +3272,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+ {
+ struct nfs4_threshold *thp;
+
+- thp = kzalloc(sizeof(*thp), GFP_NOFS);
++ thp = kzalloc(sizeof(*thp), GFP_KERNEL);
+ if (!thp) {
+ dprintk("%s mdsthreshold allocation failed\n", __func__);
+ return NULL;
+diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
+index d810ae674f4e8..3307361c79560 100644
+--- a/fs/nfs/pnfs.h
++++ b/fs/nfs/pnfs.h
+@@ -109,6 +109,7 @@ enum {
+ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
+ NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */
+ NFS_LAYOUT_HASHED, /* The layout visible */
++ NFS_LAYOUT_DRAIN,
+ };
+
+ enum layoutdriver_policy_flags {
+@@ -238,6 +239,8 @@ struct pnfs_devicelist {
+
+ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
+ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
++extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id);
++extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld);
+
+ /* nfs4proc.c */
+ extern size_t max_response_pages(struct nfs_server *server);
+@@ -517,7 +520,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ {
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+
+- if (!lseg || !fl_cinfo->ops->mark_request_commit)
++ if (!lseg || !fl_cinfo->ops || !fl_cinfo->ops->mark_request_commit)
+ return false;
+ fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx);
+ return true;
+diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
+index ddbbf4fcda867..178001c90156f 100644
+--- a/fs/nfs/pnfs_dev.c
++++ b/fs/nfs/pnfs_dev.c
+@@ -154,7 +154,7 @@ nfs4_get_device_info(struct nfs_server *server,
+ set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
+
+ out_free_pages:
+- for (i = 0; i < max_pages; i++)
++ while (--i >= 0)
+ __free_page(pages[i]);
+ kfree(pages);
+ out_free_pdev:
+diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
+index cf19914fec817..6b681f0c5df0d 100644
+--- a/fs/nfs/pnfs_nfs.c
++++ b/fs/nfs/pnfs_nfs.c
+@@ -419,7 +419,7 @@ static struct nfs_commit_data *
+ pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo)
+ {
+- struct nfs_commit_data *data = nfs_commitdata_alloc(false);
++ struct nfs_commit_data *data = nfs_commitdata_alloc();
+
+ if (!data)
+ return NULL;
+@@ -468,7 +468,6 @@ pnfs_bucket_alloc_ds_commits(struct list_head *list,
+ goto out_error;
+ data->ds_commit_index = i;
+ list_add_tail(&data->list, list);
+- atomic_inc(&cinfo->mds->rpcs_out);
+ nreq++;
+ }
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
+@@ -516,11 +515,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
+ unsigned int nreq = 0;
+
+ if (!list_empty(mds_pages)) {
+- data = nfs_commitdata_alloc(true);
++ data = nfs_commitdata_alloc();
++ if (!data) {
++ nfs_retry_commit(mds_pages, NULL, cinfo, -1);
++ return -ENOMEM;
++ }
+ data->ds_commit_index = -1;
+ list_splice_init(mds_pages, &data->pages);
+ list_add_tail(&data->list, &list);
+- atomic_inc(&cinfo->mds->rpcs_out);
+ nreq++;
+ }
+
+@@ -895,7 +897,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
+ }
+
+ smp_wmb();
+- ds->ds_clp = clp;
++ WRITE_ONCE(ds->ds_clp, clp);
+ dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
+ out:
+ return status;
+@@ -941,7 +943,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
+ * Test this address for session trunking and
+ * add as an alias
+ */
+- xprtdata.cred = nfs4_get_clid_cred(clp),
++ xprtdata.cred = nfs4_get_clid_cred(clp);
+ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
+ rpc_clnt_setup_test_and_add_xprt,
+ &rpcdata);
+@@ -973,7 +975,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
+ }
+
+ smp_wmb();
+- ds->ds_clp = clp;
++ WRITE_ONCE(ds->ds_clp, clp);
+ dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
+ out:
+ return status;
+diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
+index ea19dbf123014..a5b0bdcb53963 100644
+--- a/fs/nfs/proc.c
++++ b/fs/nfs/proc.c
+@@ -91,7 +91,8 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+ info->dtpref = fsinfo.tsize;
+ info->maxfilesize = 0x7FFFFFFF;
+ info->lease_time = 0;
+- info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
++ info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
++ info->xattr_support = 0;
+ return 0;
+ }
+
+diff --git a/fs/nfs/super.c b/fs/nfs/super.c
+index e65c83494c052..a847011f36c96 100644
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -1046,22 +1046,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
+ if (ctx->bsize)
+ sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
+
+- if (server->nfs_client->rpc_ops->version != 2) {
+- /* The VFS shouldn't apply the umask to mode bits. We will do
+- * so ourselves when necessary.
++ switch (server->nfs_client->rpc_ops->version) {
++ case 2:
++ sb->s_time_gran = 1000;
++ sb->s_time_min = 0;
++ sb->s_time_max = U32_MAX;
++ break;
++ case 3:
++ /*
++ * The VFS shouldn't apply the umask to mode bits.
++ * We will do so ourselves when necessary.
+ */
+ sb->s_flags |= SB_POSIXACL;
+ sb->s_time_gran = 1;
+- sb->s_export_op = &nfs_export_ops;
+- } else
+- sb->s_time_gran = 1000;
+-
+- if (server->nfs_client->rpc_ops->version != 4) {
+ sb->s_time_min = 0;
+ sb->s_time_max = U32_MAX;
+- } else {
++ sb->s_export_op = &nfs_export_ops;
++ break;
++ case 4:
++ sb->s_flags |= SB_POSIXACL;
++ sb->s_time_gran = 1;
+ sb->s_time_min = S64_MIN;
+ sb->s_time_max = S64_MAX;
++ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
++ sb->s_export_op = &nfs_export_ops;
++ break;
+ }
+
+ sb->s_magic = NFS_SUPER_MAGIC;
+diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
+index 5fa11e1aca4c2..d5ccf095b2a7d 100644
+--- a/fs/nfs/unlink.c
++++ b/fs/nfs/unlink.c
+@@ -102,6 +102,10 @@ static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data)
+ };
+ struct rpc_task *task;
+ struct inode *dir = d_inode(data->dentry->d_parent);
++
++ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE))
++ task_setup_data.flags |= RPC_TASK_MOVEABLE;
++
+ nfs_sb_active(dir->i_sb);
+ data->args.fh = NFS_FH(dir);
+ nfs_fattr_init(data->res.dir_attr);
+@@ -344,6 +348,10 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
+ };
+
++ if (nfs_server_capable(old_dir, NFS_CAP_MOVEABLE) &&
++ nfs_server_capable(new_dir, NFS_CAP_MOVEABLE))
++ task_setup_data.flags |= RPC_TASK_MOVEABLE;
++
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data == NULL)
+ return ERR_PTR(-ENOMEM);
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index eae9bf1140417..be70874bc3292 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -70,27 +70,17 @@ static mempool_t *nfs_wdata_mempool;
+ static struct kmem_cache *nfs_cdata_cachep;
+ static mempool_t *nfs_commit_mempool;
+
+-struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
++struct nfs_commit_data *nfs_commitdata_alloc(void)
+ {
+ struct nfs_commit_data *p;
+
+- if (never_fail)
+- p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
+- else {
+- /* It is OK to do some reclaim, not no safe to wait
+- * for anything to be returned to the pool.
+- * mempool_alloc() cannot handle that particular combination,
+- * so we need two separate attempts.
+- */
++ p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask());
++ if (!p) {
+ p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
+- if (!p)
+- p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
+- __GFP_NOWARN | __GFP_NORETRY);
+ if (!p)
+ return NULL;
++ memset(p, 0, sizeof(*p));
+ }
+-
+- memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ return p;
+ }
+@@ -104,9 +94,15 @@ EXPORT_SYMBOL_GPL(nfs_commit_free);
+
+ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
+ {
+- struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL);
++ struct nfs_pgio_header *p;
+
+- memset(p, 0, sizeof(*p));
++ p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask());
++ if (!p) {
++ p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT);
++ if (!p)
++ return NULL;
++ memset(p, 0, sizeof(*p));
++ }
+ p->rw_mode = FMODE_WRITE;
+ return p;
+ }
+@@ -314,7 +310,10 @@ static void nfs_mapping_set_error(struct page *page, int error)
+ struct address_space *mapping = page_file_mapping(page);
+
+ SetPageError(page);
+- mapping_set_error(mapping, error);
++ filemap_set_wb_err(mapping, error);
++ if (mapping->host)
++ errseq_set(&mapping->host->i_sb->s_wb_err,
++ error == -ENOSPC ? -ENOSPC : -EIO);
+ nfs_set_pageerror(mapping);
+ }
+
+@@ -602,8 +601,9 @@ static void nfs_write_error(struct nfs_page *req, int error)
+ * Find an associated nfs write request, and prepare to flush it out
+ * May return an error if the user signalled nfs_wait_on_request().
+ */
+-static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
+- struct page *page)
++static int nfs_page_async_flush(struct page *page,
++ struct writeback_control *wbc,
++ struct nfs_pageio_descriptor *pgio)
+ {
+ struct nfs_page *req;
+ int ret = 0;
+@@ -629,11 +629,11 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
+ /*
+ * Remove the problematic req upon fatal errors on the server
+ */
+- if (nfs_error_is_fatal(ret)) {
+- if (nfs_error_is_fatal_on_server(ret))
+- goto out_launder;
+- } else
+- ret = -EAGAIN;
++ if (nfs_error_is_fatal_on_server(ret))
++ goto out_launder;
++ if (wbc->sync_mode == WB_SYNC_NONE)
++ ret = AOP_WRITEPAGE_ACTIVATE;
++ redirty_page_for_writepage(wbc, page);
+ nfs_redirty_request(req);
+ pgio->pg_error = 0;
+ } else
+@@ -649,15 +649,8 @@ out_launder:
+ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
+ struct nfs_pageio_descriptor *pgio)
+ {
+- int ret;
+-
+ nfs_pageio_cond_complete(pgio, page_index(page));
+- ret = nfs_page_async_flush(pgio, page);
+- if (ret == -EAGAIN) {
+- redirty_page_for_writepage(wbc, page);
+- ret = AOP_WRITEPAGE_ACTIVATE;
+- }
+- return ret;
++ return nfs_page_async_flush(page, wbc, pgio);
+ }
+
+ /*
+@@ -676,11 +669,7 @@ static int nfs_writepage_locked(struct page *page,
+ err = nfs_do_writepage(page, wbc, &pgio);
+ pgio.pg_error = 0;
+ nfs_pageio_complete(&pgio);
+- if (err < 0)
+- return err;
+- if (nfs_error_is_fatal(pgio.pg_error))
+- return pgio.pg_error;
+- return 0;
++ return err;
+ }
+
+ int nfs_writepage(struct page *page, struct writeback_control *wbc)
+@@ -728,19 +717,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
+ priority = wb_priority(wbc);
+ }
+
+- nfs_pageio_init_write(&pgio, inode, priority, false,
+- &nfs_async_write_completion_ops);
+- pgio.pg_io_completion = ioc;
+- err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
+- pgio.pg_error = 0;
+- nfs_pageio_complete(&pgio);
++ do {
++ nfs_pageio_init_write(&pgio, inode, priority, false,
++ &nfs_async_write_completion_ops);
++ pgio.pg_io_completion = ioc;
++ err = write_cache_pages(mapping, wbc, nfs_writepages_callback,
++ &pgio);
++ pgio.pg_error = 0;
++ nfs_pageio_complete(&pgio);
++ } while (err < 0 && !nfs_error_is_fatal(err));
+ nfs_io_completion_put(ioc);
+
+ if (err < 0)
+ goto out_err;
+- err = pgio.pg_error;
+- if (nfs_error_is_fatal(err))
+- goto out_err;
+ return 0;
+ out_err:
+ return err;
+@@ -1038,25 +1027,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_page *req, *tmp;
+ int ret = 0;
+
+-restart:
+ list_for_each_entry_safe(req, tmp, src, wb_list) {
+ kref_get(&req->wb_kref);
+ if (!nfs_lock_request(req)) {
+- int status;
+-
+- /* Prevent deadlock with nfs_lock_and_join_requests */
+- if (!list_empty(dst)) {
+- nfs_release_request(req);
+- continue;
+- }
+- /* Ensure we make progress to prevent livelock */
+- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
+- status = nfs_wait_on_request(req);
+ nfs_release_request(req);
+- mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
+- if (status < 0)
+- break;
+- goto restart;
++ continue;
+ }
+ nfs_request_remove_commit_list(req, cinfo);
+ clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+@@ -1419,10 +1394,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
+ */
+ static void nfs_redirty_request(struct nfs_page *req)
+ {
++ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
++
+ /* Bump the transmission count */
+ req->wb_nio++;
+ nfs_mark_request_dirty(req);
+- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
++ atomic_long_inc(&nfsi->redirtied_pages);
+ nfs_end_page_writeback(req);
+ nfs_release_request(req);
+ }
+@@ -1434,7 +1411,7 @@ static void nfs_async_write_error(struct list_head *head, int error)
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+- if (nfs_error_is_fatal(error))
++ if (nfs_error_is_fatal_on_server(error))
+ nfs_write_error(req, error);
+ else
+ nfs_redirty_request(req);
+@@ -1671,10 +1648,13 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+ atomic_inc(&cinfo->rpcs_out);
+ }
+
+-static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
++bool nfs_commit_end(struct nfs_mds_commit_info *cinfo)
+ {
+- if (atomic_dec_and_test(&cinfo->rpcs_out))
++ if (atomic_dec_and_test(&cinfo->rpcs_out)) {
+ wake_up_var(&cinfo->rpcs_out);
++ return true;
++ }
++ return false;
+ }
+
+ void nfs_commitdata_release(struct nfs_commit_data *data)
+@@ -1706,6 +1686,10 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
+ .flags = RPC_TASK_ASYNC | flags,
+ .priority = priority,
+ };
++
++ if (nfs_server_capable(data->inode, NFS_CAP_MOVEABLE))
++ task_setup_data.flags |= RPC_TASK_MOVEABLE;
++
+ /* Set up the initial task struct. */
+ nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
+ trace_nfs_initiate_commit(data);
+@@ -1774,6 +1758,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
+ nfs_fattr_init(&data->fattr);
++ nfs_commit_begin(cinfo->mds);
+ }
+ EXPORT_SYMBOL_GPL(nfs_init_commit);
+
+@@ -1816,11 +1801,14 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
+ if (list_empty(head))
+ return 0;
+
+- data = nfs_commitdata_alloc(true);
++ data = nfs_commitdata_alloc();
++ if (!data) {
++ nfs_retry_commit(head, NULL, cinfo, -1);
++ return -ENOMEM;
++ }
+
+ /* Set up the argument struct */
+ nfs_init_commit(data, head, NULL, cinfo);
+- atomic_inc(&cinfo->mds->rpcs_out);
+ if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
+ task_flags = RPC_TASK_MOVEABLE;
+ return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
+@@ -1884,7 +1872,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
+ /* We have a mismatch. Write the page again */
+ dprintk_cont(" mismatch\n");
+ nfs_mark_request_dirty(req);
+- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
++ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
+ next:
+ nfs_unlock_and_release_request(req);
+ /* Latency breaker */
+@@ -1936,6 +1924,7 @@ static int __nfs_commit_inode(struct inode *inode, int how,
+ int may_wait = how & FLUSH_SYNC;
+ int ret, nscan;
+
++ how &= ~FLUSH_SYNC;
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ nfs_commit_begin(cinfo.mds);
+ for (;;) {
+diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
+index 442543304930b..2455dc8be18a8 100644
+--- a/fs/nfsd/blocklayoutxdr.c
++++ b/fs/nfsd/blocklayoutxdr.c
+@@ -82,6 +82,15 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
+ int len = sizeof(__be32), ret, i;
+ __be32 *p;
+
++ /*
++ * See paragraph 5 of RFC 8881 S18.40.3.
++ */
++ if (!gdp->gd_maxcount) {
++ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
++ return nfserr_resource;
++ return nfs_ok;
++ }
++
+ p = xdr_reserve_space(xdr, len + sizeof(__be32));
+ if (!p)
+ return nfserr_resource;
+diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
+index 9421dae227374..668c7527b17e8 100644
+--- a/fs/nfsd/export.c
++++ b/fs/nfsd/export.c
+@@ -427,7 +427,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
+ return -EINVAL;
+ }
+
+- if (mnt_user_ns(path->mnt) != &init_user_ns) {
++ if (is_idmapped_mnt(path->mnt)) {
+ dprintk("exp_export: export of idmapped mounts not yet supported.\n");
+ return -EINVAL;
+ }
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index be3c1aad50ea3..1e8c31ed6c7c4 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -187,14 +187,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+ nf->nf_hashval = hashval;
+ refcount_set(&nf->nf_ref, 1);
+ nf->nf_may = may & NFSD_FILE_MAY_MASK;
+- if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+- if (may & NFSD_MAY_WRITE)
+- __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+- if (may & NFSD_MAY_READ)
+- __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+- }
+ nf->nf_mark = NULL;
+- init_rwsem(&nf->nf_rwsem);
+ trace_nfsd_file_alloc(nf);
+ }
+ return nf;
+@@ -641,7 +634,7 @@ nfsd_file_cache_init(void)
+ if (!nfsd_filecache_wq)
+ goto out;
+
+- nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
++ nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
+ sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+ if (!nfsd_file_hashtbl) {
+ pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+@@ -709,7 +702,7 @@ out_err:
+ nfsd_file_slab = NULL;
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+- kfree(nfsd_file_hashtbl);
++ kvfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+@@ -855,7 +848,7 @@ nfsd_file_cache_shutdown(void)
+ fsnotify_wait_marks_destroyed();
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+- kfree(nfsd_file_hashtbl);
++ kvfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+@@ -991,21 +984,7 @@ wait_for_construction:
+
+ this_cpu_inc(nfsd_file_cache_hits);
+
+- if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+- bool write = (may_flags & NFSD_MAY_WRITE);
+-
+- if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+- (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+- status = nfserrno(nfsd_open_break_lease(
+- file_inode(nf->nf_file), may_flags));
+- if (status == nfs_ok) {
+- clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+- if (write)
+- clear_bit(NFSD_FILE_BREAK_WRITE,
+- &nf->nf_flags);
+- }
+- }
+- }
++ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
+ out:
+ if (status == nfs_ok) {
+ *pnf = nf;
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 7872df5a0fe3a..63104be2865c5 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -37,16 +37,13 @@ struct nfsd_file {
+ struct net *nf_net;
+ #define NFSD_FILE_HASHED (0)
+ #define NFSD_FILE_PENDING (1)
+-#define NFSD_FILE_BREAK_READ (2)
+-#define NFSD_FILE_BREAK_WRITE (3)
+-#define NFSD_FILE_REFERENCED (4)
++#define NFSD_FILE_REFERENCED (2)
+ unsigned long nf_flags;
+ struct inode *nf_inode;
+ unsigned int nf_hashval;
+ refcount_t nf_ref;
+ unsigned char nf_may;
+ struct nfsd_file_mark *nf_mark;
+- struct rw_semaphore nf_rwsem;
+ };
+
+ int nfsd_file_cache_init(void);
+diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
+index e81d2a5cf381e..bb205328e043d 100644
+--- a/fs/nfsd/flexfilelayoutxdr.c
++++ b/fs/nfsd/flexfilelayoutxdr.c
+@@ -85,6 +85,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
+ int addr_len;
+ __be32 *p;
+
++ /*
++ * See paragraph 5 of RFC 8881 S18.40.3.
++ */
++ if (!gdp->gd_maxcount) {
++ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
++ return nfserr_resource;
++ return nfs_ok;
++ }
++
+ /* len + padding for two strings */
+ addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
+ ver_len = 20;
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index 4b43929c1f255..30a1782a03f01 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -246,37 +246,27 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+ struct nfsd3_getaclres *resp = rqstp->rq_resp;
+ struct dentry *dentry = resp->fh.fh_dentry;
+ struct inode *inode;
+- int w;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+
+ if (dentry == NULL || d_really_is_negative(dentry))
+- return 1;
++ return true;
+ inode = d_inode(dentry);
+
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
+- return 0;
+-
+- rqstp->rq_res.page_len = w = nfsacl_size(
+- (resp->mask & NFS_ACL) ? resp->acl_access : NULL,
+- (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+- while (w > 0) {
+- if (!*(rqstp->rq_next_page++))
+- return 1;
+- w -= PAGE_SIZE;
+- }
++ return false;
+
+ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
+ resp->mask & NFS_ACL, 0))
+- return 0;
++ return false;
+ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
+ resp->mask & NFS_DFACL, NFS_ACL_DEFAULT))
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+ /* ACCESS */
+@@ -286,17 +276,17 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
+ struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->access) < 0)
+- return 0;
++ return false;
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /*
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 17715a6c7a409..eaf785aec0708 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -146,17 +146,21 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_readargs *argp = rqstp->rq_argp;
+ struct nfsd3_readres *resp = rqstp->rq_resp;
+- u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned int len;
+ int v;
+
+- argp->count = min_t(u32, argp->count, max_blocksize);
+-
+ dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
+ SVCFH_fmt(&argp->fh),
+ (unsigned long) argp->count,
+ (unsigned long long) argp->offset);
+
++ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp));
++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
++ if (argp->offset > (u64)OFFSET_MAX)
++ argp->offset = (u64)OFFSET_MAX;
++ if (argp->offset + argp->count > (u64)OFFSET_MAX)
++ argp->count = (u64)OFFSET_MAX - argp->offset;
++
+ v = 0;
+ len = argp->count;
+ resp->pages = rqstp->rq_next_page;
+@@ -199,19 +203,19 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
+ (unsigned long long) argp->offset,
+ argp->stable? " stable" : "");
+
++ resp->status = nfserr_fbig;
++ if (argp->offset > (u64)OFFSET_MAX ||
++ argp->offset + argp->len > (u64)OFFSET_MAX)
++ return rpc_success;
++
+ fh_copy(&resp->fh, &argp->fh);
+ resp->committed = argp->stable;
+- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+- &argp->first, cnt);
+- if (!nvecs) {
+- resp->status = nfserr_io;
+- goto out;
+- }
++ nvecs = svc_fill_write_vector(rqstp, &argp->payload);
++
+ resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
+ rqstp->rq_vec, nvecs, &cnt,
+ resp->committed, resp->verf);
+ resp->count = cnt;
+-out:
+ return rpc_success;
+ }
+
+@@ -439,22 +443,20 @@ nfsd3_proc_link(struct svc_rqst *rqstp)
+
+ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
+ struct nfsd3_readdirres *resp,
+- int count)
++ u32 count)
+ {
+ struct xdr_buf *buf = &resp->dirlist;
+ struct xdr_stream *xdr = &resp->xdr;
+-
+- count = min_t(u32, count, svc_max_payload(rqstp));
++ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen,
++ svc_max_payload(rqstp));
+
+ memset(buf, 0, sizeof(*buf));
+
+ /* Reserve room for the NULL ptr & eof flag (-2 words) */
+- buf->buflen = count - XDR_UNIT * 2;
++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf);
++ buf->buflen -= XDR_UNIT * 2;
+ buf->pages = rqstp->rq_next_page;
+- while (count > 0) {
+- rqstp->rq_next_page++;
+- count -= PAGE_SIZE;
+- }
++ rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ /* This is xdr_init_encode(), but it assumes that
+ * the head kvec has already been consumed. */
+@@ -463,7 +465,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
+ xdr->page_ptr = buf->pages;
+ xdr->iov = NULL;
+ xdr->p = page_address(*buf->pages);
+- xdr->end = xdr->p + (PAGE_SIZE >> 2);
++ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
+ xdr->rqst = NULL;
+ }
+
+@@ -659,15 +661,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
+ argp->count,
+ (unsigned long long) argp->offset);
+
+- if (argp->offset > NFS_OFFSET_MAX) {
+- resp->status = nfserr_inval;
+- goto out;
+- }
+-
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
+ argp->count, resp->verf);
+-out:
+ return rpc_success;
+ }
+
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 0a5ebc52e6a9c..48d4f99b7f901 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -254,7 +254,7 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ if (xdr_stream_decode_u64(xdr, &newsize) < 0)
+ return false;
+ iap->ia_valid |= ATTR_SIZE;
+- iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
++ iap->ia_size = newsize;
+ }
+ if (xdr_stream_decode_u32(xdr, &set_it) < 0)
+ return false;
+@@ -487,11 +487,6 @@ neither:
+ return true;
+ }
+
+-static bool fs_supports_change_attribute(struct super_block *sb)
+-{
+- return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion;
+-}
+-
+ /*
+ * Fill in the pre_op attr for the wcc data
+ */
+@@ -500,26 +495,24 @@ void fill_pre_wcc(struct svc_fh *fhp)
+ struct inode *inode;
+ struct kstat stat;
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
++ __be32 err;
+
+ if (fhp->fh_no_wcc || fhp->fh_pre_saved)
+ return;
+ inode = d_inode(fhp->fh_dentry);
+- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
+- __be32 err = fh_getattr(fhp, &stat);
+-
+- if (err) {
+- /* Grab the times from inode anyway */
+- stat.mtime = inode->i_mtime;
+- stat.ctime = inode->i_ctime;
+- stat.size = inode->i_size;
+- }
+- fhp->fh_pre_mtime = stat.mtime;
+- fhp->fh_pre_ctime = stat.ctime;
+- fhp->fh_pre_size = stat.size;
++ err = fh_getattr(fhp, &stat);
++ if (err) {
++ /* Grab the times from inode anyway */
++ stat.mtime = inode->i_mtime;
++ stat.ctime = inode->i_ctime;
++ stat.size = inode->i_size;
+ }
+ if (v4)
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+
++ fhp->fh_pre_mtime = stat.mtime;
++ fhp->fh_pre_ctime = stat.ctime;
++ fhp->fh_pre_size = stat.size;
+ fhp->fh_pre_saved = true;
+ }
+
+@@ -530,6 +523,7 @@ void fill_post_wcc(struct svc_fh *fhp)
+ {
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode = d_inode(fhp->fh_dentry);
++ __be32 err;
+
+ if (fhp->fh_no_wcc)
+ return;
+@@ -537,16 +531,12 @@ void fill_post_wcc(struct svc_fh *fhp)
+ if (fhp->fh_post_saved)
+ printk("nfsd: inode locked twice during operation.\n");
+
+- fhp->fh_post_saved = true;
+-
+- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
+- __be32 err = fh_getattr(fhp, &fhp->fh_post_attr);
+-
+- if (err) {
+- fhp->fh_post_saved = false;
+- fhp->fh_post_attr.ctime = inode->i_ctime;
+- }
+- }
++ err = fh_getattr(fhp, &fhp->fh_post_attr);
++ if (err) {
++ fhp->fh_post_saved = false;
++ fhp->fh_post_attr.ctime = inode->i_ctime;
++ } else
++ fhp->fh_post_saved = true;
+ if (v4)
+ fhp->fh_post_change =
+ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+@@ -621,9 +611,6 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_writeargs *args = rqstp->rq_argp;
+ u32 max_blocksize = svc_max_payload(rqstp);
+- struct kvec *head = rqstp->rq_arg.head;
+- struct kvec *tail = rqstp->rq_arg.tail;
+- size_t remaining;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return 0;
+@@ -641,17 +628,12 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+ /* request sanity */
+ if (args->count != args->len)
+ return 0;
+- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
+- remaining -= xdr_stream_pos(xdr);
+- if (remaining < xdr_align_size(args->len))
+- return 0;
+ if (args->count > max_blocksize) {
+ args->count = max_blocksize;
+ args->len = max_blocksize;
+ }
+-
+- args->first.iov_base = xdr->p;
+- args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
++ if (!xdr_stream_subsegment(xdr, &args->payload, args->count))
++ return 0;
+
+ return 1;
+ }
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 0f8b10f363e7f..97f517e9b4189 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -875,8 +875,8 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r
+ if (!kcred)
+ return NULL;
+
+- kcred->uid = ses->se_cb_sec.uid;
+- kcred->gid = ses->se_cb_sec.gid;
++ kcred->fsuid = ses->se_cb_sec.uid;
++ kcred->fsgid = ses->se_cb_sec.gid;
+ return kcred;
+ }
+ }
+@@ -917,7 +917,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
+ } else {
+ if (!conn->cb_xprt)
+ return -EINVAL;
+- clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
+ clp->cl_cb_session = ses;
+ args.bc_xprt = conn->cb_xprt;
+ args.prognumber = clp->cl_cb_session->se_cb_prog;
+@@ -937,6 +936,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
+ rpc_shutdown_client(client);
+ return -ENOMEM;
+ }
++
++ if (clp->cl_minorversion != 0)
++ clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
+ clp->cl_cb_client = client;
+ clp->cl_cb_cred = cred;
+ rcu_read_lock();
+diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
+index a97873f2d22b0..2673019d30ecd 100644
+--- a/fs/nfsd/nfs4layouts.c
++++ b/fs/nfsd/nfs4layouts.c
+@@ -322,11 +322,11 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
+ if (ls->ls_recalled)
+ goto out_unlock;
+
+- ls->ls_recalled = true;
+- atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
+ if (list_empty(&ls->ls_layouts))
+ goto out_unlock;
+
++ ls->ls_recalled = true;
++ atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
+ trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
+
+ refcount_inc(&ls->ls_stid.sc_count);
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 486c5dba4b650..3eb500adcda2e 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ __be32 status;
+
+ read->rd_nf = NULL;
+- if (read->rd_offset >= OFFSET_MAX)
+- return nfserr_inval;
+
+ trace_nfsd_read_start(rqstp, &cstate->current_fh,
+ read->rd_offset, read->rd_length);
+
++ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
++ if (read->rd_offset > (u64)OFFSET_MAX)
++ read->rd_offset = (u64)OFFSET_MAX;
++ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
++ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
++
+ /*
+ * If we do a zero copy read, then a client will see read data
+ * that reflects the state of the file *after* performing the
+@@ -1018,8 +1022,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ unsigned long cnt;
+ int nvecs;
+
+- if (write->wr_offset >= OFFSET_MAX)
+- return nfserr_inval;
++ if (write->wr_offset > (u64)OFFSET_MAX ||
++ write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX)
++ return nfserr_fbig;
+
+ cnt = write->wr_buflen;
+ trace_nfsd_write_start(rqstp, &cstate->current_fh,
+@@ -1033,8 +1038,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+
+ write->wr_how_written = write->wr_stable_how;
+
+- nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages,
+- write->wr_payload.head, write->wr_buflen);
++ nvecs = svc_fill_write_vector(rqstp, &write->wr_payload);
+ WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
+
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
+@@ -1084,8 +1088,10 @@ out:
+ return status;
+ out_put_dst:
+ nfsd_file_put(*dst);
++ *dst = NULL;
+ out_put_src:
+ nfsd_file_put(*src);
++ *src = NULL;
+ goto out;
+ }
+
+@@ -1201,6 +1207,7 @@ try_again:
+ /* allow 20secs for mount/unmount for now - revisit */
+ if (signal_pending(current) ||
+ (schedule_timeout(20*HZ) == 0)) {
++ finish_wait(&nn->nfsd_ssc_waitq, &wait);
+ kfree(work);
+ return nfserr_eagain;
+ }
+@@ -1344,13 +1351,6 @@ out_err:
+ return status;
+ }
+
+-static void
+-nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+-{
+- nfs_do_sb_deactive(ss_mnt->mnt_sb);
+- mntput(ss_mnt);
+-}
+-
+ /*
+ * Verify COPY destination stateid.
+ *
+@@ -1453,11 +1453,6 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+ {
+ }
+
+-static void
+-nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+-{
+-}
+-
+ static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh,
+ nfs4_stateid *stateid)
+@@ -1511,11 +1506,15 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
+
+ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
+ {
++ struct file *dst = copy->nf_dst->nf_file;
++ struct file *src = copy->nf_src->nf_file;
++ errseq_t since;
+ ssize_t bytes_copied = 0;
+ u64 bytes_total = copy->cp_count;
+ u64 src_pos = copy->cp_src_pos;
+ u64 dst_pos = copy->cp_dst_pos;
+- __be32 status;
++ int status;
++ loff_t end;
+
+ /* See RFC 7862 p.67: */
+ if (bytes_total == 0)
+@@ -1523,9 +1522,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
+ do {
+ if (kthread_should_stop())
+ break;
+- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
+- src_pos, copy->nf_dst->nf_file, dst_pos,
+- bytes_total);
++ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
++ bytes_total);
+ if (bytes_copied <= 0)
+ break;
+ bytes_total -= bytes_copied;
+@@ -1535,11 +1533,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
+ } while (bytes_total > 0 && !copy->cp_synchronous);
+ /* for a non-zero asynchronous copy do a commit of data */
+ if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
+- down_write(&copy->nf_dst->nf_rwsem);
+- status = vfs_fsync_range(copy->nf_dst->nf_file,
+- copy->cp_dst_pos,
+- copy->cp_res.wr_bytes_written, 0);
+- up_write(&copy->nf_dst->nf_rwsem);
++ since = READ_ONCE(dst->f_wb_err);
++ end = copy->cp_dst_pos + copy->cp_res.wr_bytes_written - 1;
++ status = vfs_fsync_range(dst, copy->cp_dst_pos, end, 0);
++ if (!status)
++ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
+ copy->committed = true;
+ }
+@@ -1613,14 +1611,14 @@ static int nfsd4_do_async_copy(void *data)
+ copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL);
+ if (!copy->nf_src) {
+ copy->nfserr = nfserr_serverfault;
+- nfsd4_interssc_disconnect(copy->ss_mnt);
++ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
+ copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, &copy->c_fh,
+ &copy->stateid);
+ if (IS_ERR(copy->nf_src->nf_file)) {
+ copy->nfserr = nfserr_offload_denied;
+- nfsd4_interssc_disconnect(copy->ss_mnt);
++ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
+ }
+@@ -1705,8 +1703,10 @@ out_err:
+ if (async_copy)
+ cleanup_async_copy(async_copy);
+ status = nfserrno(-ENOMEM);
+- if (!copy->cp_intra)
+- nfsd4_interssc_disconnect(copy->ss_mnt);
++ /*
++ * source's vfsmount of inter-copy will be unmounted
++ * by the laundromat
++ */
+ goto out;
+ }
+
+@@ -2487,9 +2487,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ status = nfserr_minor_vers_mismatch;
+ if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0)
+ goto out;
+- status = nfserr_resource;
+- if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
+- goto out;
+
+ status = nfs41_check_op_ordering(args);
+ if (status) {
+@@ -2502,10 +2499,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+
+ rqstp->rq_lease_breaker = (void **)&cstate->clp;
+
+- trace_nfsd_compound(rqstp, args->opcnt);
++ trace_nfsd_compound(rqstp, args->client_opcnt);
+ while (!status && resp->opcnt < args->opcnt) {
+ op = &args->ops[resp->opcnt++];
+
++ if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
++ /* If there are still more operations to process,
++ * stop here and report NFS4ERR_RESOURCE. */
++ if (cstate->minorversion == 0 &&
++ args->client_opcnt > resp->opcnt) {
++ op->status = nfserr_resource;
++ goto encode_op;
++ }
++ }
++
+ /*
+ * The XDR decode routines may have pre-set op->status;
+ * for example, if there is a miscellaneous XDR error
+@@ -2581,8 +2588,8 @@ encode_op:
+ status = op->status;
+ }
+
+- trace_nfsd_compound_status(args->opcnt, resp->opcnt, status,
+- nfsd4_op_name(op->opnum));
++ trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
++ status, nfsd4_op_name(op->opnum));
+
+ nfsd4_cstate_clear_replay(cstate);
+ nfsd4_increment_op_stats(op->opnum);
+diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
+index 6fedc49726bf7..8f24485e0f04f 100644
+--- a/fs/nfsd/nfs4recover.c
++++ b/fs/nfsd/nfs4recover.c
+@@ -815,8 +815,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ princhash.data = memdup_user(
+ &ci->cc_princhash.cp_data,
+ princhashlen);
+- if (IS_ERR_OR_NULL(princhash.data))
++ if (IS_ERR_OR_NULL(princhash.data)) {
++ kfree(name.data);
+ return -EFAULT;
++ }
+ princhash.len = princhashlen;
+ } else
+ princhash.len = 0;
+@@ -2156,6 +2158,7 @@ static struct notifier_block nfsd4_cld_block = {
+ int
+ register_cld_notifier(void)
+ {
++ WARN_ON(!nfsd_net_id);
+ return rpc_pipefs_notifier_register(&nfsd4_cld_block);
+ }
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 3f4027a5de883..f54ef526f25d0 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -627,15 +627,26 @@ find_any_file(struct nfs4_file *f)
+ return ret;
+ }
+
+-static struct nfsd_file *find_deleg_file(struct nfs4_file *f)
++static struct nfsd_file *find_any_file_locked(struct nfs4_file *f)
+ {
+- struct nfsd_file *ret = NULL;
++ lockdep_assert_held(&f->fi_lock);
++
++ if (f->fi_fds[O_RDWR])
++ return f->fi_fds[O_RDWR];
++ if (f->fi_fds[O_WRONLY])
++ return f->fi_fds[O_WRONLY];
++ if (f->fi_fds[O_RDONLY])
++ return f->fi_fds[O_RDONLY];
++ return NULL;
++}
++
++static struct nfsd_file *find_deleg_file_locked(struct nfs4_file *f)
++{
++ lockdep_assert_held(&f->fi_lock);
+
+- spin_lock(&f->fi_lock);
+ if (f->fi_deleg_file)
+- ret = nfsd_file_get(f->fi_deleg_file);
+- spin_unlock(&f->fi_lock);
+- return ret;
++ return f->fi_deleg_file;
++ return NULL;
+ }
+
+ static atomic_long_t num_delegations;
+@@ -961,6 +972,7 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
+
+ static void nfs4_free_deleg(struct nfs4_stid *stid)
+ {
++ WARN_ON(!list_empty(&stid->sc_cp_list));
+ kmem_cache_free(deleg_slab, stid);
+ atomic_long_dec(&num_delegations);
+ }
+@@ -1207,6 +1219,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
+ return 0;
+ }
+
++static bool delegation_hashed(struct nfs4_delegation *dp)
++{
++ return !(list_empty(&dp->dl_perfile));
++}
++
+ static bool
+ unhash_delegation_locked(struct nfs4_delegation *dp)
+ {
+@@ -1214,7 +1231,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
+
+ lockdep_assert_held(&state_lock);
+
+- if (list_empty(&dp->dl_perfile))
++ if (!delegation_hashed(dp))
+ return false;
+
+ dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
+@@ -1246,9 +1263,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
+ WARN_ON(!list_empty(&dp->dl_recall_lru));
+
+ if (clp->cl_minorversion) {
++ spin_lock(&clp->cl_lock);
+ dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+ refcount_inc(&dp->dl_stid.sc_count);
+- spin_lock(&clp->cl_lock);
+ list_add(&dp->dl_recall_lru, &clp->cl_revoked);
+ spin_unlock(&clp->cl_lock);
+ }
+@@ -1369,6 +1386,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
+ release_all_access(stp);
+ if (stp->st_stateowner)
+ nfs4_put_stateowner(stp->st_stateowner);
++ WARN_ON(!list_empty(&stid->sc_cp_list));
+ kmem_cache_free(stateid_slab, stid);
+ }
+
+@@ -2494,9 +2512,11 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+- file = find_any_file(nf);
++
++ spin_lock(&nf->fi_lock);
++ file = find_any_file_locked(nf);
+ if (!file)
+- return 0;
++ goto out;
+
+ seq_printf(s, "- ");
+ nfs4_show_stateid(s, &st->sc_stateid);
+@@ -2518,8 +2538,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+- nfsd_file_put(file);
+-
++out:
++ spin_unlock(&nf->fi_lock);
+ return 0;
+ }
+
+@@ -2533,9 +2553,10 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+- file = find_any_file(nf);
++ spin_lock(&nf->fi_lock);
++ file = find_any_file_locked(nf);
+ if (!file)
+- return 0;
++ goto out;
+
+ seq_printf(s, "- ");
+ nfs4_show_stateid(s, &st->sc_stateid);
+@@ -2555,8 +2576,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+- nfsd_file_put(file);
+-
++out:
++ spin_unlock(&nf->fi_lock);
+ return 0;
+ }
+
+@@ -2568,9 +2589,10 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
+
+ ds = delegstateid(st);
+ nf = st->sc_file;
+- file = find_deleg_file(nf);
++ spin_lock(&nf->fi_lock);
++ file = find_deleg_file_locked(nf);
+ if (!file)
+- return 0;
++ goto out;
+
+ seq_printf(s, "- ");
+ nfs4_show_stateid(s, &st->sc_stateid);
+@@ -2586,8 +2608,8 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
+ seq_printf(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_printf(s, " }\n");
+- nfsd_file_put(file);
+-
++out:
++ spin_unlock(&nf->fi_lock);
+ return 0;
+ }
+
+@@ -4107,8 +4129,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+ status = nfserr_clid_inuse;
+ if (client_has_state(old)
+ && !same_creds(&unconf->cl_cred,
+- &old->cl_cred))
++ &old->cl_cred)) {
++ old = NULL;
+ goto out;
++ }
+ status = mark_client_expired_locked(old);
+ if (status) {
+ old = NULL;
+@@ -4598,7 +4622,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
+ * queued for a lease break. Don't queue it again.
+ */
+ spin_lock(&state_lock);
+- if (dp->dl_time == 0) {
++ if (delegation_hashed(dp) && dp->dl_time == 0) {
+ dp->dl_time = ktime_get_boottime_seconds();
+ list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
+ }
+@@ -4686,6 +4710,14 @@ nfsd_break_deleg_cb(struct file_lock *fl)
+ return ret;
+ }
+
++/**
++ * nfsd_breaker_owns_lease - Check if lease conflict was resolved
++ * @fl: Lock state to check
++ *
++ * Return values:
++ * %true: Lease conflict was resolved
++ * %false: Lease conflict was not resolved.
++ */
+ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
+ {
+ struct nfs4_delegation *dl = fl->fl_owner;
+@@ -4693,11 +4725,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
+ struct nfs4_client *clp;
+
+ if (!i_am_nfsd())
+- return NULL;
++ return false;
+ rqst = kthread_data(current);
+ /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
+ if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
+- return NULL;
++ return false;
+ clp = *(rqst->rq_lease_breaker);
+ return dl->dl_stid.sc_client == clp;
+ }
+@@ -5801,8 +5833,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
+ CLOSE_STATEID(stateid))
+ return status;
+- if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid))
+- return status;
+ spin_lock(&cl->cl_lock);
+ s = find_stateid_locked(cl, stateid);
+ if (!s)
+@@ -6035,7 +6065,11 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
+ *nfp = NULL;
+
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+- status = check_special_stateids(net, fhp, stateid, flags);
++ if (cstid)
++ status = nfserr_bad_stateid;
++ else
++ status = check_special_stateids(net, fhp, stateid,
++ flags);
+ goto done;
+ }
+
+@@ -6370,6 +6404,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+ struct nfs4_client *clp = s->st_stid.sc_client;
+ bool unhashed;
+ LIST_HEAD(reaplist);
++ struct nfs4_ol_stateid *stp;
+
+ spin_lock(&clp->cl_lock);
+ unhashed = unhash_open_stateid(s, &reaplist);
+@@ -6378,6 +6413,8 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+ if (unhashed)
+ put_ol_stateid_locked(s, &reaplist);
+ spin_unlock(&clp->cl_lock);
++ list_for_each_entry(stp, &reaplist, st_locks)
++ nfs4_free_cpntf_statelist(clp->net, &stp->st_stid);
+ free_ol_stateid_reaplist(&reaplist);
+ } else {
+ spin_unlock(&clp->cl_lock);
+@@ -7280,16 +7317,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ if (sop->so_is_open_owner || !same_owner_str(sop, owner))
+ continue;
+
+- /* see if there are still any locks associated with it */
+- lo = lockowner(sop);
+- list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
+- if (check_for_locks(stp->st_stid.sc_file, lo)) {
+- status = nfserr_locks_held;
+- spin_unlock(&clp->cl_lock);
+- return status;
+- }
++ if (atomic_read(&sop->so_count) != 1) {
++ spin_unlock(&clp->cl_lock);
++ return nfserr_locks_held;
+ }
+
++ lo = lockowner(sop);
+ nfs4_get_stateowner(sop);
+ break;
+ }
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index cf030ebe28275..d28b75909de89 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -288,11 +288,8 @@ nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
+ p = xdr_inline_decode(argp->xdr, count << 2);
+ if (!p)
+ return nfserr_bad_xdr;
+- i = 0;
+- while (i < count)
+- bmval[i++] = be32_to_cpup(p++);
+- while (i < bmlen)
+- bmval[i++] = 0;
++ for (i = 0; i < bmlen; i++)
++ bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
+
+ return nfs_ok;
+ }
+@@ -2352,16 +2349,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+
+ if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
+ return 0;
+- if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
++ if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
+ return 0;
+
+- /*
+- * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS
+- * here, so we return success at the xdr level so that
+- * nfsd4_proc can handle this is an NFS-level error.
+- */
+- if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
+- return 1;
++ argp->opcnt = min_t(u32, argp->client_opcnt,
++ NFSD_MAX_OPS_PER_COMPOUND);
+
+ if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
+ argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
+@@ -2378,10 +2370,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ for (i = 0; i < argp->opcnt; i++) {
+ op = &argp->ops[i];
+ op->replay = NULL;
++ op->opdesc = NULL;
+
+ if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
+ return 0;
+ if (nfsd4_opnum_in_range(argp, op)) {
++ op->opdesc = OPDESC(op);
+ op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
+ if (op->status != nfs_ok)
+ trace_nfsd_compound_decode_err(argp->rqstp,
+@@ -2392,7 +2386,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ op->opnum = OP_ILLEGAL;
+ op->status = nfserr_op_illegal;
+ }
+- op->opdesc = OPDESC(op);
++
+ /*
+ * We'll try to cache the result in the DRC if any one
+ * op in the compound wants to be cached:
+@@ -3522,6 +3516,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
+ case nfserr_noent:
+ xdr_truncate_encode(xdr, start_offset);
+ goto skip_entry;
++ case nfserr_jukebox:
++ /*
++ * The pseudoroot should only display dentries that lead to
++ * exports. If we get EJUKEBOX here, then we can't tell whether
++ * this entry should be included. Just fail the whole READDIR
++ * with NFS4ERR_DELAY in that case, and hope that the situation
++ * will resolve itself by the client's next attempt.
++ */
++ if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT)
++ goto fail;
++ fallthrough;
+ default:
+ /*
+ * If the client requested the RDATTR_ERROR attribute,
+@@ -3811,7 +3816,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
+ p = xdr_reserve_space(xdr, 32);
+ if (!p)
+ return nfserr_resource;
+- *p++ = cpu_to_be32(0);
++ *p++ = cpu_to_be32(open->op_recall);
+
+ /*
+ * TODO: space_limit's in delegations
+@@ -3996,14 +4001,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ if (resp->xdr->buf->page_len &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
+ WARN_ON_ONCE(1);
+- return nfserr_resource;
++ return nfserr_serverfault;
+ }
+ xdr_commit_encode(xdr);
+
+- maxcount = svc_max_payload(resp->rqstp);
+- maxcount = min_t(unsigned long, maxcount,
++ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));
+- maxcount = min_t(unsigned long, maxcount, read->rd_length);
+
+ if (file->f_op->splice_read &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
+@@ -4530,20 +4533,17 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ *p++ = cpu_to_be32(gdev->gd_layout_type);
+
+- /* If maxcount is 0 then just update notifications */
+- if (gdev->gd_maxcount != 0) {
+- ops = nfsd4_layout_ops[gdev->gd_layout_type];
+- nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+- if (nfserr) {
+- /*
+- * We don't bother to burden the layout drivers with
+- * enforcing gd_maxcount, just tell the client to
+- * come back with a bigger buffer if it's not enough.
+- */
+- if (xdr->buf->len + 4 > gdev->gd_maxcount)
+- goto toosmall;
+- return nfserr;
+- }
++ ops = nfsd4_layout_ops[gdev->gd_layout_type];
++ nfserr = ops->encode_getdeviceinfo(xdr, gdev);
++ if (nfserr) {
++ /*
++ * We don't bother to burden the layout drivers with
++ * enforcing gd_maxcount, just tell the client to
++ * come back with a bigger buffer if it's not enough.
++ */
++ if (xdr->buf->len + 4 > gdev->gd_maxcount)
++ goto toosmall;
++ return nfserr;
+ }
+
+ if (gdev->gd_notify_types) {
+@@ -4840,10 +4840,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+ return nfserr_resource;
+ xdr_commit_encode(xdr);
+
+- maxcount = svc_max_payload(resp->rqstp);
+- maxcount = min_t(unsigned long, maxcount,
++ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));
+- maxcount = min_t(unsigned long, maxcount, read->rd_length);
+ count = maxcount;
+
+ eof = read->rd_offset >= i_size_read(file_inode(file));
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index 96cdf77925f33..830bb8493c7fd 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -212,7 +212,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
+ struct svc_cacherep *rp;
+ unsigned int i;
+
+- nfsd_reply_cache_stats_destroy(nn);
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
+
+ for (i = 0; i < nn->drc_hashsize; i++) {
+@@ -223,6 +222,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
+ rp, nn);
+ }
+ }
++ nfsd_reply_cache_stats_destroy(nn);
+
+ kvfree(nn->drc_hashtbl);
+ nn->drc_hashtbl = NULL;
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 070e5dd03e26f..cb73c12925629 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1249,7 +1249,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
+ clear_ncl(d_inode(dentry));
+ dget(dentry);
+ ret = simple_unlink(dir, dentry);
+- d_delete(dentry);
++ d_drop(dentry);
++ fsnotify_unlink(dir, dentry);
+ dput(dentry);
+ WARN_ON_ONCE(ret);
+ }
+@@ -1340,8 +1341,8 @@ void nfsd_client_rmdir(struct dentry *dentry)
+ dget(dentry);
+ ret = simple_rmdir(dir, dentry);
+ WARN_ON_ONCE(ret);
++ d_drop(dentry);
+ fsnotify_rmdir(dir, dentry);
+- d_delete(dentry);
+ dput(dentry);
+ inode_unlock(dir);
+ }
+@@ -1521,12 +1522,9 @@ static int __init init_nfsd(void)
+ int retval;
+ printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
+
+- retval = register_cld_notifier();
+- if (retval)
+- return retval;
+ retval = nfsd4_init_slabs();
+ if (retval)
+- goto out_unregister_notifier;
++ return retval;
+ retval = nfsd4_init_pnfs();
+ if (retval)
+ goto out_free_slabs;
+@@ -1545,9 +1543,14 @@ static int __init init_nfsd(void)
+ goto out_free_exports;
+ retval = register_pernet_subsys(&nfsd_net_ops);
+ if (retval < 0)
++ goto out_free_filesystem;
++ retval = register_cld_notifier();
++ if (retval)
+ goto out_free_all;
+ return 0;
+ out_free_all:
++ unregister_pernet_subsys(&nfsd_net_ops);
++out_free_filesystem:
+ unregister_filesystem(&nfsd_fs_type);
+ out_free_exports:
+ remove_proc_entry("fs/nfs/exports", NULL);
+@@ -1561,13 +1564,12 @@ out_free_pnfs:
+ nfsd4_exit_pnfs();
+ out_free_slabs:
+ nfsd4_free_slabs();
+-out_unregister_notifier:
+- unregister_cld_notifier();
+ return retval;
+ }
+
+ static void __exit exit_nfsd(void)
+ {
++ unregister_cld_notifier();
+ unregister_pernet_subsys(&nfsd_net_ops);
+ nfsd_drc_slab_free();
+ remove_proc_entry("fs/nfs/exports", NULL);
+@@ -1577,7 +1579,6 @@ static void __exit exit_nfsd(void)
+ nfsd4_free_slabs();
+ nfsd4_exit_pnfs();
+ unregister_filesystem(&nfsd_fs_type);
+- unregister_cld_notifier();
+ }
+
+ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 90fcd6178823b..b009da1dcbb50 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -182,6 +182,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
+ argp->count, argp->offset);
+
+ argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
+
+ v = 0;
+ len = argp->count;
+@@ -230,16 +231,11 @@ nfsd_proc_write(struct svc_rqst *rqstp)
+ unsigned long cnt = argp->len;
+ unsigned int nvecs;
+
+- dprintk("nfsd: WRITE %s %d bytes at %d\n",
++ dprintk("nfsd: WRITE %s %u bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len, argp->offset);
+
+- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+- &argp->first, cnt);
+- if (!nvecs) {
+- resp->status = nfserr_io;
+- goto out;
+- }
++ nvecs = svc_fill_write_vector(rqstp, &argp->payload);
+
+ resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
+ argp->offset, rqstp->rq_vec, nvecs,
+@@ -248,7 +244,6 @@ nfsd_proc_write(struct svc_rqst *rqstp)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+ return rpc_drop_reply;
+-out:
+ return rpc_success;
+ }
+
+@@ -557,17 +552,16 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp)
+
+ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
+ struct nfsd_readdirres *resp,
+- int count)
++ u32 count)
+ {
+ struct xdr_buf *buf = &resp->dirlist;
+ struct xdr_stream *xdr = &resp->xdr;
+
+- count = min_t(u32, count, PAGE_SIZE);
+-
+ memset(buf, 0, sizeof(*buf));
+
+ /* Reserve room for the NULL ptr & eof flag (-2 words) */
+- buf->buflen = count - sizeof(__be32) * 2;
++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE);
++ buf->buflen -= XDR_UNIT * 2;
+ buf->pages = rqstp->rq_next_page;
+ rqstp->rq_next_page++;
+
+@@ -578,7 +572,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
+ xdr->page_ptr = buf->pages;
+ xdr->iov = NULL;
+ xdr->p = page_address(*buf->pages);
+- xdr->end = xdr->p + (PAGE_SIZE >> 2);
++ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
+ xdr->rqst = NULL;
+ }
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index ccb59e91011b7..373695cc62a7a 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -425,8 +425,8 @@ static void nfsd_shutdown_net(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+- nfsd_file_cache_shutdown_net(net);
+ nfs4_state_shutdown_net(net);
++ nfsd_file_cache_shutdown_net(net);
+ if (nn->lockd_up) {
+ lockd_down(net);
+ nn->lockd_up = false;
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index a06c05fe3b421..26a42f87c2409 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -325,10 +325,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+ {
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_writeargs *args = rqstp->rq_argp;
+- struct kvec *head = rqstp->rq_arg.head;
+- struct kvec *tail = rqstp->rq_arg.tail;
+ u32 beginoffset, totalcount;
+- size_t remaining;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+ return 0;
+@@ -346,12 +343,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+ return 0;
+ if (args->len > NFSSVC_MAXBLKSIZE_V2)
+ return 0;
+- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
+- remaining -= xdr_stream_pos(xdr);
+- if (remaining < xdr_align_size(args->len))
++ if (!xdr_stream_subsegment(xdr, &args->payload, args->len))
+ return 0;
+- args->first.iov_base = xdr->p;
+- args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
+
+ return 1;
+ }
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 538520957a815..0fc1fa6f28e0b 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -319,14 +319,14 @@ TRACE_EVENT(nfsd_export_update,
+ DECLARE_EVENT_CLASS(nfsd_io_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *fhp,
+- loff_t offset,
+- unsigned long len),
++ u64 offset,
++ u32 len),
+ TP_ARGS(rqstp, fhp, offset, len),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, fh_hash)
+- __field(loff_t, offset)
+- __field(unsigned long, len)
++ __field(u64, offset)
++ __field(u32, len)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+@@ -334,7 +334,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
+ __entry->offset = offset;
+ __entry->len = len;
+ ),
+- TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu",
++ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u",
+ __entry->xid, __entry->fh_hash,
+ __entry->offset, __entry->len)
+ )
+@@ -343,8 +343,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
+ DEFINE_EVENT(nfsd_io_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *fhp, \
+- loff_t offset, \
+- unsigned long len), \
++ u64 offset, \
++ u32 len), \
+ TP_ARGS(rqstp, fhp, offset, len))
+
+ DEFINE_NFSD_IO_EVENT(read_start);
+@@ -636,18 +636,10 @@ DEFINE_CLID_EVENT(confirmed_r);
+ /*
+ * from fs/nfsd/filecache.h
+ */
+-TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
+-TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
+-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
+-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
+-TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
+-
+ #define show_nf_flags(val) \
+ __print_flags(val, "|", \
+ { 1 << NFSD_FILE_HASHED, "HASHED" }, \
+ { 1 << NFSD_FILE_PENDING, "PENDING" }, \
+- { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
+- { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
+ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
+
+ DECLARE_EVENT_CLASS(nfsd_file_class,
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 738d564ca4ce3..15a86876e3d90 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -322,7 +322,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
+ iap->ia_mode &= ~S_ISGID;
+ } else {
+ /* set ATTR_KILL_* bits and let VFS handle it */
+- iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
++ iap->ia_valid |= ATTR_KILL_SUID;
++ iap->ia_valid |=
++ setattr_should_drop_sgid(&init_user_ns, inode);
+ }
+ }
+ }
+@@ -433,6 +435,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ .ia_size = iap->ia_size,
+ };
+
++ host_err = -EFBIG;
++ if (iap->ia_size < 0)
++ goto out_unlock;
++
+ host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
+ if (host_err)
+ goto out_unlock;
+@@ -521,10 +527,11 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ {
+ struct file *src = nf_src->nf_file;
+ struct file *dst = nf_dst->nf_file;
++ errseq_t since;
+ loff_t cloned;
+ __be32 ret = 0;
+
+- down_write(&nf_dst->nf_rwsem);
++ since = READ_ONCE(dst->f_wb_err);
+ cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
+ if (cloned < 0) {
+ ret = nfserrno(cloned);
+@@ -538,6 +545,8 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
+ int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
+
++ if (!status)
++ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
+ status = commit_inode_metadata(file_inode(src));
+ if (status < 0) {
+@@ -547,13 +556,13 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ }
+ }
+ out_err:
+- up_write(&nf_dst->nf_rwsem);
+ return ret;
+ }
+
+ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
+ u64 dst_pos, u64 count)
+ {
++ ssize_t ret;
+
+ /*
+ * Limit copy to 4MB to prevent indefinitely blocking an nfsd
+@@ -564,7 +573,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
+ * limit like this and pipeline multiple COPY requests.
+ */
+ count = min_t(u64, count, 1 << 22);
+- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
++ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
++
++ if (ret == -EOPNOTSUPP || ret == -EXDEV)
++ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
++ COPY_FILE_SPLICE);
++ return ret;
+ }
+
+ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -950,6 +964,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ struct super_block *sb = file_inode(file)->i_sb;
+ struct svc_export *exp;
+ struct iov_iter iter;
++ errseq_t since;
+ __be32 nfserr;
+ int host_err;
+ int use_wgather;
+@@ -987,21 +1002,22 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ flags |= RWF_SYNC;
+
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
++ since = READ_ONCE(file->f_wb_err);
+ if (flags & RWF_SYNC) {
+- down_write(&nf->nf_rwsem);
++ if (verf)
++ nfsd_copy_boot_verifier(verf,
++ net_generic(SVC_NET(rqstp),
++ nfsd_net_id));
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0)
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+- up_write(&nf->nf_rwsem);
+ } else {
+- down_read(&nf->nf_rwsem);
+ if (verf)
+ nfsd_copy_boot_verifier(verf,
+ net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+- up_read(&nf->nf_rwsem);
+ }
+ if (host_err < 0) {
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+@@ -1011,6 +1027,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ *cnt = host_err;
+ nfsd_stats_io_write_add(exp, *cnt);
+ fsnotify_modify(file);
++ host_err = filemap_check_wb_err(file->f_mapping, since);
++ if (host_err < 0)
++ goto out_nfserr;
+
+ if (stable && use_wgather) {
+ host_err = wait_for_concurrent_writes(file);
+@@ -1091,71 +1110,77 @@ out:
+ }
+
+ #ifdef CONFIG_NFSD_V3
+-static int
+-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
+- loff_t end)
+-{
+- struct address_space *mapping = nf->nf_file->f_mapping;
+- int ret = filemap_fdatawrite_range(mapping, offset, end);
+-
+- if (ret)
+- return ret;
+- filemap_fdatawait_range_keep_errors(mapping, offset, end);
+- return 0;
+-}
+-
+-/*
+- * Commit all pending writes to stable storage.
++/**
++ * nfsd_commit - Commit pending writes to stable storage
++ * @rqstp: RPC request being processed
++ * @fhp: NFS filehandle
++ * @offset: raw offset from beginning of file
++ * @count: raw count of bytes to sync
++ * @verf: filled in with the server's current write verifier
+ *
+- * Note: we only guarantee that data that lies within the range specified
+- * by the 'offset' and 'count' parameters will be synced.
++ * Note: we guarantee that data that lies within the range specified
++ * by the 'offset' and 'count' parameters will be synced. The server
++ * is permitted to sync data that lies outside this range at the
++ * same time.
+ *
+ * Unfortunately we cannot lock the file to make sure we return full WCC
+ * data to the client, as locking happens lower down in the filesystem.
++ *
++ * Return values:
++ * An nfsstat value in network byte order.
+ */
+ __be32
+-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- loff_t offset, unsigned long count, __be32 *verf)
++nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
++ u32 count, __be32 *verf)
+ {
++ u64 maxbytes;
++ loff_t start, end;
++ struct nfsd_net *nn;
+ struct nfsd_file *nf;
+- loff_t end = LLONG_MAX;
+- __be32 err = nfserr_inval;
+-
+- if (offset < 0)
+- goto out;
+- if (count != 0) {
+- end = offset + (loff_t)count - 1;
+- if (end < offset)
+- goto out;
+- }
++ __be32 err;
+
+ err = nfsd_file_acquire(rqstp, fhp,
+ NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
+ if (err)
+ goto out;
++
++ /*
++ * Convert the client-provided (offset, count) range to a
++ * (start, end) range. If the client-provided range falls
++ * outside the maximum file size of the underlying FS,
++ * clamp the sync range appropriately.
++ */
++ start = 0;
++ end = LLONG_MAX;
++ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
++ if (offset < maxbytes) {
++ start = offset;
++ if (count && (offset + count - 1 < maxbytes))
++ end = offset + count - 1;
++ }
++
++ nn = net_generic(nf->nf_net, nfsd_net_id);
+ if (EX_ISSYNC(fhp->fh_export)) {
+- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end);
++ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
++ int err2;
+
+- down_write(&nf->nf_rwsem);
+- if (!err2)
+- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
++ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
+ switch (err2) {
+ case 0:
+- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+- nfsd_net_id));
++ nfsd_copy_boot_verifier(verf, nn);
++ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
++ since);
++ err = nfserrno(err2);
+ break;
+ case -EINVAL:
+ err = nfserr_notsupp;
+ break;
+ default:
++ nfsd_reset_boot_verifier(nn);
+ err = nfserrno(err2);
+- nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+- nfsd_net_id));
+ }
+- up_write(&nf->nf_rwsem);
+ } else
+- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+- nfsd_net_id));
++ nfsd_copy_boot_verifier(verf, nn);
+
+ nfsd_file_put(nf);
+ out:
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index b21b76e6b9a87..3cf5a8a13da50 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -73,8 +73,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ struct svc_fh *res, int createmode,
+ u32 *verifier, bool *truncp, bool *created);
+-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
+- loff_t, unsigned long, __be32 *verf);
++__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
++ u64 offset, u32 count, __be32 *verf);
+ #endif /* CONFIG_NFSD_V3 */
+ #ifdef CONFIG_NFSD_V4
+ __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index f45b4bc93f527..863a35f24910a 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -32,8 +32,8 @@ struct nfsd_readargs {
+ struct nfsd_writeargs {
+ svc_fh fh;
+ __u32 offset;
+- int len;
+- struct kvec first;
++ __u32 len;
++ struct xdr_buf payload;
+ };
+
+ struct nfsd_createargs {
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index 933008382bbeb..712c117300cb7 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -40,7 +40,7 @@ struct nfsd3_writeargs {
+ __u32 count;
+ int stable;
+ __u32 len;
+- struct kvec first;
++ struct xdr_buf payload;
+ };
+
+ struct nfsd3_createargs {
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 3e4052e3bd50e..45257666a6888 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -688,9 +688,10 @@ struct nfsd4_compoundargs {
+ struct svcxdr_tmpbuf *to_free;
+ struct svc_rqst *rqstp;
+
+- u32 taglen;
+ char * tag;
++ u32 taglen;
+ u32 minorversion;
++ u32 client_opcnt;
+ u32 opcnt;
+ struct nfsd4_op *ops;
+ struct nfsd4_op iops[8];
+diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
+index adf3bb0a80482..279d945d4ebee 100644
+--- a/fs/nilfs2/alloc.c
++++ b/fs/nilfs2/alloc.c
+@@ -205,7 +205,8 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
+ int ret;
+
+ spin_lock(lock);
+- if (prev->bh && blkoff == prev->blkoff) {
++ if (prev->bh && blkoff == prev->blkoff &&
++ likely(buffer_uptodate(prev->bh))) {
+ get_bh(prev->bh);
+ *bhp = prev->bh;
+ spin_unlock(lock);
+diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
+index 5900879d5693c..8ebb69c4ad186 100644
+--- a/fs/nilfs2/bmap.c
++++ b/fs/nilfs2/bmap.c
+@@ -67,20 +67,28 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
+
+ down_read(&bmap->b_sem);
+ ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
+- if (ret < 0) {
+- ret = nilfs_bmap_convert_error(bmap, __func__, ret);
++ if (ret < 0)
+ goto out;
+- }
++
+ if (NILFS_BMAP_USE_VBN(bmap)) {
+ ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
+ &blocknr);
+ if (!ret)
+ *ptrp = blocknr;
++ else if (ret == -ENOENT) {
++ /*
++ * If there was no valid entry in DAT for the block
++ * address obtained by b_ops->bop_lookup, then pass
++ * internal code -EINVAL to nilfs_bmap_convert_error
++ * to treat it as metadata corruption.
++ */
++ ret = -EINVAL;
++ }
+ }
+
+ out:
+ up_read(&bmap->b_sem);
+- return ret;
++ return nilfs_bmap_convert_error(bmap, __func__, ret);
+ }
+
+ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
+diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
+index 4391fd3abd8f8..1776121677e28 100644
+--- a/fs/nilfs2/btnode.c
++++ b/fs/nilfs2/btnode.c
+@@ -20,6 +20,23 @@
+ #include "page.h"
+ #include "btnode.h"
+
++
++/**
++ * nilfs_init_btnc_inode - initialize B-tree node cache inode
++ * @btnc_inode: inode to be initialized
++ *
++ * nilfs_init_btnc_inode() sets up an inode for B-tree node cache.
++ */
++void nilfs_init_btnc_inode(struct inode *btnc_inode)
++{
++ struct nilfs_inode_info *ii = NILFS_I(btnc_inode);
++
++ btnc_inode->i_mode = S_IFREG;
++ ii->i_flags = 0;
++ memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap));
++ mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS);
++}
++
+ void nilfs_btnode_cache_clear(struct address_space *btnc)
+ {
+ invalidate_mapping_pages(btnc, 0, -1);
+@@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc)
+ struct buffer_head *
+ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
+ {
+- struct inode *inode = NILFS_BTNC_I(btnc);
++ struct inode *inode = btnc->host;
+ struct buffer_head *bh;
+
+ bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
+@@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
+ struct buffer_head **pbh, sector_t *submit_ptr)
+ {
+ struct buffer_head *bh;
+- struct inode *inode = NILFS_BTNC_I(btnc);
++ struct inode *inode = btnc->host;
+ struct page *page;
+ int err;
+
+@@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
+ struct nilfs_btnode_chkey_ctxt *ctxt)
+ {
+ struct buffer_head *obh, *nbh;
+- struct inode *inode = NILFS_BTNC_I(btnc);
++ struct inode *inode = btnc->host;
+ __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
+ int err;
+
+@@ -268,6 +285,14 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
+ if (nbh == NULL) { /* blocksize == pagesize */
+ xa_erase_irq(&btnc->i_pages, newkey);
+ unlock_page(ctxt->bh->b_page);
+- } else
+- brelse(nbh);
++ } else {
++ /*
++ * When canceling a buffer that a prepare operation has
++ * allocated to copy a node block to another location, use
++ * nilfs_btnode_delete() to initialize and release the buffer
++ * so that the buffer flags will not be in an inconsistent
++ * state when it is reallocated.
++ */
++ nilfs_btnode_delete(nbh);
++ }
+ }
+diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
+index 0f88dbc9bcb3e..05ab64d354dc9 100644
+--- a/fs/nilfs2/btnode.h
++++ b/fs/nilfs2/btnode.h
+@@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt {
+ struct buffer_head *newbh;
+ };
+
++void nilfs_init_btnc_inode(struct inode *btnc_inode);
+ void nilfs_btnode_cache_clear(struct address_space *);
+ struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
+ __u64 blocknr);
+diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
+index ab9ec073330f1..def9121a466ef 100644
+--- a/fs/nilfs2/btree.c
++++ b/fs/nilfs2/btree.c
+@@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path)
+ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
+ __u64 ptr, struct buffer_head **bhp)
+ {
+- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
++ struct address_space *btnc = btnc_inode->i_mapping;
+ struct buffer_head *bh;
+
+ bh = nilfs_btnode_create_block(btnc, ptr);
+@@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
+ struct buffer_head **bhp,
+ const struct nilfs_btree_readahead_info *ra)
+ {
+- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
++ struct address_space *btnc = btnc_inode->i_mapping;
+ struct buffer_head *bh, *ra_bh;
+ sector_t submit_ptr = 0;
+ int ret;
+@@ -478,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
+ ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
+ &submit_ptr);
+ if (ret) {
+- if (ret != -EEXIST)
+- return ret;
+- goto out_check;
++ if (likely(ret == -EEXIST))
++ goto out_check;
++ if (ret == -ENOENT) {
++ /*
++ * Block address translation failed due to invalid
++ * value of 'ptr'. In this case, return internal code
++ * -EINVAL (broken bmap) to notify bmap layer of fatal
++ * metadata corruption.
++ */
++ ret = -EINVAL;
++ }
++ return ret;
+ }
+
+ if (ra) {
+@@ -1741,6 +1752,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
+ dat = nilfs_bmap_get_dat(btree);
+ }
+
++ ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
++ if (ret < 0)
++ return ret;
++
+ ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
+ if (ret < 0)
+ return ret;
+@@ -1913,7 +1928,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
+ path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
+ path[level].bp_ctxt.bh = path[level].bp_bh;
+ ret = nilfs_btnode_prepare_change_key(
+- &NILFS_BMAP_I(btree)->i_btnode_cache,
++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
+ &path[level].bp_ctxt);
+ if (ret < 0) {
+ nilfs_dat_abort_update(dat,
+@@ -1939,7 +1954,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
+
+ if (buffer_nilfs_node(path[level].bp_bh)) {
+ nilfs_btnode_commit_change_key(
+- &NILFS_BMAP_I(btree)->i_btnode_cache,
++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
+ &path[level].bp_ctxt);
+ path[level].bp_bh = path[level].bp_ctxt.bh;
+ }
+@@ -1958,7 +1973,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
+ &path[level].bp_newreq.bpr_req);
+ if (buffer_nilfs_node(path[level].bp_bh))
+ nilfs_btnode_abort_change_key(
+- &NILFS_BMAP_I(btree)->i_btnode_cache,
++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
+ &path[level].bp_ctxt);
+ }
+
+@@ -2134,7 +2149,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
+ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
+ struct list_head *listp)
+ {
+- struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
++ struct address_space *btcache = btnc_inode->i_mapping;
+ struct list_head lists[NILFS_BTREE_LEVEL_MAX];
+ struct pagevec pvec;
+ struct buffer_head *bh, *head;
+@@ -2188,12 +2204,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
+ path[level].bp_ctxt.newkey = blocknr;
+ path[level].bp_ctxt.bh = *bh;
+ ret = nilfs_btnode_prepare_change_key(
+- &NILFS_BMAP_I(btree)->i_btnode_cache,
++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
+ &path[level].bp_ctxt);
+ if (ret < 0)
+ return ret;
+ nilfs_btnode_commit_change_key(
+- &NILFS_BMAP_I(btree)->i_btnode_cache,
++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
+ &path[level].bp_ctxt);
+ *bh = path[level].bp_ctxt.bh;
+ }
+@@ -2398,6 +2414,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap)
+
+ if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
+ ret = -EIO;
++ else
++ ret = nilfs_attach_btree_node_cache(
++ &NILFS_BMAP_I(bmap)->vfs_inode);
++
+ return ret;
+ }
+
+diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
+index 8bccdf1158fce..8fedc7104320d 100644
+--- a/fs/nilfs2/dat.c
++++ b/fs/nilfs2/dat.c
+@@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
+ kunmap_atomic(kaddr);
+
+ nilfs_dat_commit_entry(dat, req);
++
++ if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
++ nilfs_error(dat->i_sb,
++ "state inconsistency probably due to duplicate use of vblocknr = %llu",
++ (unsigned long long)req->pr_entry_nr);
++ return;
++ }
+ nilfs_palloc_commit_free_entry(dat, req);
+ }
+
+@@ -497,7 +504,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size,
+ di = NILFS_DAT_I(dat);
+ lockdep_set_class(&di->mi.mi_sem, &dat_lock_key);
+ nilfs_palloc_setup_cache(dat, &di->palloc_cache);
+- nilfs_mdt_setup_shadow_map(dat, &di->shadow);
++ err = nilfs_mdt_setup_shadow_map(dat, &di->shadow);
++ if (err)
++ goto failed;
+
+ err = nilfs_read_inode_common(dat, raw_inode);
+ if (err)
+diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
+index 4483204968568..aadea660c66c9 100644
+--- a/fs/nilfs2/gcinode.c
++++ b/fs/nilfs2/gcinode.c
+@@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
+ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
+ __u64 vbn, struct buffer_head **out_bh)
+ {
++ struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode;
+ int ret;
+
+- ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
++ ret = nilfs_btnode_submit_block(btnc_inode->i_mapping,
+ vbn ? : pbn, pbn, REQ_OP_READ, 0,
+ out_bh, &pbn);
+ if (ret == -EEXIST) /* internal code (cache hit) */
+@@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
+ ii->i_flags = 0;
+ nilfs_bmap_init_gc(ii->i_bmap);
+
+- return 0;
++ return nilfs_attach_btree_node_cache(inode);
+ }
+
+ /**
+@@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
+ ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
+ list_del_init(&ii->i_dirty);
+ truncate_inode_pages(&ii->vfs_inode.i_data, 0);
+- nilfs_btnode_cache_clear(&ii->i_btnode_cache);
++ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
+ iput(&ii->vfs_inode);
+ }
+ }
+diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
+index 2e8eb263cf0f6..324e23236c341 100644
+--- a/fs/nilfs2/inode.c
++++ b/fs/nilfs2/inode.c
+@@ -29,12 +29,16 @@
+ * @cno: checkpoint number
+ * @root: pointer on NILFS root object (mounted checkpoint)
+ * @for_gc: inode for GC flag
++ * @for_btnc: inode for B-tree node cache flag
++ * @for_shadow: inode for shadowed page cache flag
+ */
+ struct nilfs_iget_args {
+ u64 ino;
+ __u64 cno;
+ struct nilfs_root *root;
+- int for_gc;
++ bool for_gc;
++ bool for_btnc;
++ bool for_shadow;
+ };
+
+ static int nilfs_iget_test(struct inode *inode, void *opaque);
+@@ -314,7 +318,8 @@ static int nilfs_insert_inode_locked(struct inode *inode,
+ unsigned long ino)
+ {
+ struct nilfs_iget_args args = {
+- .ino = ino, .root = root, .cno = 0, .for_gc = 0
++ .ino = ino, .root = root, .cno = 0, .for_gc = false,
++ .for_btnc = false, .for_shadow = false
+ };
+
+ return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
+@@ -327,6 +332,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
+ struct inode *inode;
+ struct nilfs_inode_info *ii;
+ struct nilfs_root *root;
++ struct buffer_head *bh;
+ int err = -ENOMEM;
+ ino_t ino;
+
+@@ -342,11 +348,25 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
+ ii->i_state = BIT(NILFS_I_NEW);
+ ii->i_root = root;
+
+- err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh);
++ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
+ if (unlikely(err))
+ goto failed_ifile_create_inode;
+ /* reference count of i_bh inherits from nilfs_mdt_read_block() */
+
++ if (unlikely(ino < NILFS_USER_INO)) {
++ nilfs_warn(sb,
++ "inode bitmap is inconsistent for reserved inodes");
++ do {
++ brelse(bh);
++ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
++ if (unlikely(err))
++ goto failed_ifile_create_inode;
++ } while (ino < NILFS_USER_INO);
++
++ nilfs_info(sb, "repaired inode bitmap for reserved inodes");
++ }
++ ii->i_bh = bh;
++
+ atomic64_inc(&root->inodes_count);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode->i_ino = ino;
+@@ -439,6 +459,8 @@ int nilfs_read_inode_common(struct inode *inode,
+ inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
+ inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
+ inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
++ if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
++ return -EIO; /* this inode is for metadata and corrupted */
+ if (inode->i_nlink == 0)
+ return -ESTALE; /* this inode is deleted */
+
+@@ -527,6 +549,19 @@ static int nilfs_iget_test(struct inode *inode, void *opaque)
+ return 0;
+
+ ii = NILFS_I(inode);
++ if (test_bit(NILFS_I_BTNC, &ii->i_state)) {
++ if (!args->for_btnc)
++ return 0;
++ } else if (args->for_btnc) {
++ return 0;
++ }
++ if (test_bit(NILFS_I_SHADOW, &ii->i_state)) {
++ if (!args->for_shadow)
++ return 0;
++ } else if (args->for_shadow) {
++ return 0;
++ }
++
+ if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
+ return !args->for_gc;
+
+@@ -538,15 +573,17 @@ static int nilfs_iget_set(struct inode *inode, void *opaque)
+ struct nilfs_iget_args *args = opaque;
+
+ inode->i_ino = args->ino;
+- if (args->for_gc) {
++ NILFS_I(inode)->i_cno = args->cno;
++ NILFS_I(inode)->i_root = args->root;
++ if (args->root && args->ino == NILFS_ROOT_INO)
++ nilfs_get_root(args->root);
++
++ if (args->for_gc)
+ NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE);
+- NILFS_I(inode)->i_cno = args->cno;
+- NILFS_I(inode)->i_root = NULL;
+- } else {
+- if (args->root && args->ino == NILFS_ROOT_INO)
+- nilfs_get_root(args->root);
+- NILFS_I(inode)->i_root = args->root;
+- }
++ if (args->for_btnc)
++ NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC);
++ if (args->for_shadow)
++ NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW);
+ return 0;
+ }
+
+@@ -554,7 +591,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
+ unsigned long ino)
+ {
+ struct nilfs_iget_args args = {
+- .ino = ino, .root = root, .cno = 0, .for_gc = 0
++ .ino = ino, .root = root, .cno = 0, .for_gc = false,
++ .for_btnc = false, .for_shadow = false
+ };
+
+ return ilookup5(sb, ino, nilfs_iget_test, &args);
+@@ -564,7 +602,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
+ unsigned long ino)
+ {
+ struct nilfs_iget_args args = {
+- .ino = ino, .root = root, .cno = 0, .for_gc = 0
++ .ino = ino, .root = root, .cno = 0, .for_gc = false,
++ .for_btnc = false, .for_shadow = false
+ };
+
+ return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
+@@ -595,7 +634,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
+ __u64 cno)
+ {
+ struct nilfs_iget_args args = {
+- .ino = ino, .root = NULL, .cno = cno, .for_gc = 1
++ .ino = ino, .root = NULL, .cno = cno, .for_gc = true,
++ .for_btnc = false, .for_shadow = false
+ };
+ struct inode *inode;
+ int err;
+@@ -615,6 +655,113 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
+ return inode;
+ }
+
++/**
++ * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
++ * @inode: inode object
++ *
++ * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
++ * or does nothing if the inode already has it. This function allocates
++ * an additional inode to maintain page cache of B-tree nodes one-on-one.
++ *
++ * Return Value: On success, 0 is returned. On errors, one of the following
++ * negative error code is returned.
++ *
++ * %-ENOMEM - Insufficient memory available.
++ */
++int nilfs_attach_btree_node_cache(struct inode *inode)
++{
++ struct nilfs_inode_info *ii = NILFS_I(inode);
++ struct inode *btnc_inode;
++ struct nilfs_iget_args args;
++
++ if (ii->i_assoc_inode)
++ return 0;
++
++ args.ino = inode->i_ino;
++ args.root = ii->i_root;
++ args.cno = ii->i_cno;
++ args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0;
++ args.for_btnc = true;
++ args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0;
++
++ btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
++ nilfs_iget_set, &args);
++ if (unlikely(!btnc_inode))
++ return -ENOMEM;
++ if (btnc_inode->i_state & I_NEW) {
++ nilfs_init_btnc_inode(btnc_inode);
++ unlock_new_inode(btnc_inode);
++ }
++ NILFS_I(btnc_inode)->i_assoc_inode = inode;
++ NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
++ ii->i_assoc_inode = btnc_inode;
++
++ return 0;
++}
++
++/**
++ * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
++ * @inode: inode object
++ *
++ * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
++ * holder inode bound to @inode, or does nothing if @inode doesn't have it.
++ */
++void nilfs_detach_btree_node_cache(struct inode *inode)
++{
++ struct nilfs_inode_info *ii = NILFS_I(inode);
++ struct inode *btnc_inode = ii->i_assoc_inode;
++
++ if (btnc_inode) {
++ NILFS_I(btnc_inode)->i_assoc_inode = NULL;
++ ii->i_assoc_inode = NULL;
++ iput(btnc_inode);
++ }
++}
++
++/**
++ * nilfs_iget_for_shadow - obtain inode for shadow mapping
++ * @inode: inode object that uses shadow mapping
++ *
++ * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
++ * caches for shadow mapping. The page cache for data pages is set up
++ * in one inode and the one for b-tree node pages is set up in the
++ * other inode, which is attached to the former inode.
++ *
++ * Return Value: On success, a pointer to the inode for data pages is
++ * returned. On errors, one of the following negative error code is returned
++ * in a pointer type.
++ *
++ * %-ENOMEM - Insufficient memory available.
++ */
++struct inode *nilfs_iget_for_shadow(struct inode *inode)
++{
++ struct nilfs_iget_args args = {
++ .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false,
++ .for_btnc = false, .for_shadow = true
++ };
++ struct inode *s_inode;
++ int err;
++
++ s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
++ nilfs_iget_set, &args);
++ if (unlikely(!s_inode))
++ return ERR_PTR(-ENOMEM);
++ if (!(s_inode->i_state & I_NEW))
++ return inode;
++
++ NILFS_I(s_inode)->i_flags = 0;
++ memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
++ mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
++
++ err = nilfs_attach_btree_node_cache(s_inode);
++ if (unlikely(err)) {
++ iget_failed(s_inode);
++ return ERR_PTR(err);
++ }
++ unlock_new_inode(s_inode);
++ return s_inode;
++}
++
+ void nilfs_write_inode_common(struct inode *inode,
+ struct nilfs_inode *raw_inode, int has_bmap)
+ {
+@@ -762,7 +909,8 @@ static void nilfs_clear_inode(struct inode *inode)
+ if (test_bit(NILFS_I_BMAP, &ii->i_state))
+ nilfs_bmap_clear(ii->i_bmap);
+
+- nilfs_btnode_cache_clear(&ii->i_btnode_cache);
++ if (!test_bit(NILFS_I_BTNC, &ii->i_state))
++ nilfs_detach_btree_node_cache(inode);
+
+ if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
+ nilfs_put_root(ii->i_root);
+@@ -773,6 +921,7 @@ void nilfs_evict_inode(struct inode *inode)
+ struct nilfs_transaction_info ti;
+ struct super_block *sb = inode->i_sb;
+ struct nilfs_inode_info *ii = NILFS_I(inode);
++ struct the_nilfs *nilfs;
+ int ret;
+
+ if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
+@@ -785,6 +934,23 @@ void nilfs_evict_inode(struct inode *inode)
+
+ truncate_inode_pages_final(&inode->i_data);
+
++ nilfs = sb->s_fs_info;
++ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
++ /*
++ * If this inode is about to be disposed after the file system
++ * has been degraded to read-only due to file system corruption
++ * or after the writer has been detached, do not make any
++ * changes that cause writes, just clear it.
++ * Do this check after read-locking ns_segctor_sem by
++ * nilfs_transaction_begin() in order to avoid a race with
++ * the writer detach operation.
++ */
++ clear_inode(inode);
++ nilfs_clear_inode(inode);
++ nilfs_transaction_abort(sb);
++ return;
++ }
++
+ /* TODO: some of the following operations may fail. */
+ nilfs_truncate_bmap(ii, 0);
+ nilfs_mark_inode_dirty(inode);
+@@ -863,7 +1029,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
+ int err;
+
+ spin_lock(&nilfs->ns_inode_lock);
+- if (ii->i_bh == NULL) {
++ if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
+ spin_unlock(&nilfs->ns_inode_lock);
+ err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
+ inode->i_ino, pbh);
+@@ -872,7 +1038,10 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
+ spin_lock(&nilfs->ns_inode_lock);
+ if (ii->i_bh == NULL)
+ ii->i_bh = *pbh;
+- else {
++ else if (unlikely(!buffer_uptodate(ii->i_bh))) {
++ __brelse(ii->i_bh);
++ ii->i_bh = *pbh;
++ } else {
+ brelse(*pbh);
+ *pbh = ii->i_bh;
+ }
+@@ -939,9 +1108,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
+
+ int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
+ {
++ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+ struct buffer_head *ibh;
+ int err;
+
++ /*
++ * Do not dirty inodes after the log writer has been detached
++ * and its nilfs_root struct has been freed.
++ */
++ if (unlikely(nilfs_purging(nilfs)))
++ return 0;
++
+ err = nilfs_load_inode_block(inode, &ibh);
+ if (unlikely(err)) {
+ nilfs_warn(inode->i_sb,
+diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
+index 640ac8fe891e6..a39206705dd12 100644
+--- a/fs/nilfs2/ioctl.c
++++ b/fs/nilfs2/ioctl.c
+@@ -71,7 +71,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
+ if (argv->v_index > ~(__u64)0 - argv->v_nmembs)
+ return -EINVAL;
+
+- buf = (void *)__get_free_pages(GFP_NOFS, 0);
++ buf = (void *)get_zeroed_page(GFP_NOFS);
+ if (unlikely(!buf))
+ return -ENOMEM;
+ maxmembs = PAGE_SIZE / argv->v_size;
+@@ -1114,7 +1114,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
+
+ minseg = range[0] + segbytes - 1;
+ do_div(minseg, segbytes);
++
++ if (range[1] < 4096)
++ goto out;
++
+ maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
++ if (maxseg < segbytes)
++ goto out;
++
+ do_div(maxseg, segbytes);
+ maxseg--;
+
+diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
+index 97769fe4d5885..131b5add32eeb 100644
+--- a/fs/nilfs2/mdt.c
++++ b/fs/nilfs2/mdt.c
+@@ -470,9 +470,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
+ void nilfs_mdt_clear(struct inode *inode)
+ {
+ struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
++ struct nilfs_shadow_map *shadow = mdi->mi_shadow;
+
+ if (mdi->mi_palloc_cache)
+ nilfs_palloc_destroy_cache(inode);
++
++ if (shadow) {
++ struct inode *s_inode = shadow->inode;
++
++ shadow->inode = NULL;
++ iput(s_inode);
++ mdi->mi_shadow = NULL;
++ }
+ }
+
+ /**
+@@ -506,12 +515,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
+ struct nilfs_shadow_map *shadow)
+ {
+ struct nilfs_mdt_info *mi = NILFS_MDT(inode);
++ struct inode *s_inode;
+
+ INIT_LIST_HEAD(&shadow->frozen_buffers);
+- address_space_init_once(&shadow->frozen_data);
+- nilfs_mapping_init(&shadow->frozen_data, inode);
+- address_space_init_once(&shadow->frozen_btnodes);
+- nilfs_mapping_init(&shadow->frozen_btnodes, inode);
++
++ s_inode = nilfs_iget_for_shadow(inode);
++ if (IS_ERR(s_inode))
++ return PTR_ERR(s_inode);
++
++ shadow->inode = s_inode;
+ mi->mi_shadow = shadow;
+ return 0;
+ }
+@@ -525,14 +537,15 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode)
+ struct nilfs_mdt_info *mi = NILFS_MDT(inode);
+ struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct nilfs_shadow_map *shadow = mi->mi_shadow;
++ struct inode *s_inode = shadow->inode;
+ int ret;
+
+- ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping);
++ ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping);
+ if (ret)
+ goto out;
+
+- ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes,
+- &ii->i_btnode_cache);
++ ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping,
++ ii->i_assoc_inode->i_mapping);
+ if (ret)
+ goto out;
+
+@@ -548,7 +561,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
+ struct page *page;
+ int blkbits = inode->i_blkbits;
+
+- page = grab_cache_page(&shadow->frozen_data, bh->b_page->index);
++ page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index);
+ if (!page)
+ return -ENOMEM;
+
+@@ -580,7 +593,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
+ struct page *page;
+ int n;
+
+- page = find_lock_page(&shadow->frozen_data, bh->b_page->index);
++ page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index);
+ if (page) {
+ if (page_has_buffers(page)) {
+ n = bh_offset(bh) >> inode->i_blkbits;
+@@ -621,10 +634,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
+ nilfs_palloc_clear_cache(inode);
+
+ nilfs_clear_dirty_pages(inode->i_mapping, true);
+- nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data);
++ nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping);
+
+- nilfs_clear_dirty_pages(&ii->i_btnode_cache, true);
+- nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes);
++ nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true);
++ nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping,
++ NILFS_I(shadow->inode)->i_assoc_inode->i_mapping);
+
+ nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
+
+@@ -639,10 +653,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode)
+ {
+ struct nilfs_mdt_info *mi = NILFS_MDT(inode);
+ struct nilfs_shadow_map *shadow = mi->mi_shadow;
++ struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode;
+
+ down_write(&mi->mi_sem);
+ nilfs_release_frozen_buffers(shadow);
+- truncate_inode_pages(&shadow->frozen_data, 0);
+- truncate_inode_pages(&shadow->frozen_btnodes, 0);
++ truncate_inode_pages(shadow->inode->i_mapping, 0);
++ truncate_inode_pages(shadow_btnc_inode->i_mapping, 0);
+ up_write(&mi->mi_sem);
+ }
+diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
+index e77aea4bb921c..9d8ac0d27c16e 100644
+--- a/fs/nilfs2/mdt.h
++++ b/fs/nilfs2/mdt.h
+@@ -18,14 +18,12 @@
+ /**
+ * struct nilfs_shadow_map - shadow mapping of meta data file
+ * @bmap_store: shadow copy of bmap state
+- * @frozen_data: shadowed dirty data pages
+- * @frozen_btnodes: shadowed dirty b-tree nodes' pages
++ * @inode: holder of page caches used in shadow mapping
+ * @frozen_buffers: list of frozen buffers
+ */
+ struct nilfs_shadow_map {
+ struct nilfs_bmap_store bmap_store;
+- struct address_space frozen_data;
+- struct address_space frozen_btnodes;
++ struct inode *inode;
+ struct list_head frozen_buffers;
+ };
+
+diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
+index 60b21b6eeac06..aceb8aadca148 100644
+--- a/fs/nilfs2/nilfs.h
++++ b/fs/nilfs2/nilfs.h
+@@ -28,7 +28,7 @@
+ * @i_xattr: <TODO>
+ * @i_dir_start_lookup: page index of last successful search
+ * @i_cno: checkpoint number for GC inode
+- * @i_btnode_cache: cached pages of b-tree nodes
++ * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer)
+ * @i_dirty: list for connecting dirty files
+ * @xattr_sem: semaphore for extended attributes processing
+ * @i_bh: buffer contains disk inode
+@@ -43,7 +43,7 @@ struct nilfs_inode_info {
+ __u64 i_xattr; /* sector_t ??? */
+ __u32 i_dir_start_lookup;
+ __u64 i_cno; /* check point number for GC inode */
+- struct address_space i_btnode_cache;
++ struct inode *i_assoc_inode;
+ struct list_head i_dirty; /* List for connecting dirty files */
+
+ #ifdef CONFIG_NILFS_XATTR
+@@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap)
+ return container_of(bmap, struct nilfs_inode_info, i_bmap_data);
+ }
+
+-static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
+-{
+- struct nilfs_inode_info *ii =
+- container_of(btnc, struct nilfs_inode_info, i_btnode_cache);
+- return &ii->vfs_inode;
+-}
+-
+ /*
+ * Dynamic state flags of NILFS on-memory inode (i_state)
+ */
+@@ -98,6 +91,8 @@ enum {
+ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */
+ NILFS_I_BMAP, /* has bmap and btnode_cache */
+ NILFS_I_GCINODE, /* inode for GC, on memory only */
++ NILFS_I_BTNC, /* inode for btree node cache */
++ NILFS_I_SHADOW, /* inode for shadowed page cache */
+ };
+
+ /*
+@@ -203,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode)
+
+ static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
+ {
++ if (S_ISLNK(inode->i_mode))
++ return 0;
++
+ inode->i_mode &= ~current_umask();
+ return 0;
+ }
+@@ -267,6 +265,9 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
+ unsigned long ino);
+ extern struct inode *nilfs_iget_for_gc(struct super_block *sb,
+ unsigned long ino, __u64 cno);
++int nilfs_attach_btree_node_cache(struct inode *inode);
++void nilfs_detach_btree_node_cache(struct inode *inode);
++struct inode *nilfs_iget_for_shadow(struct inode *inode);
+ extern void nilfs_update_inode(struct inode *, struct buffer_head *, int);
+ extern void nilfs_truncate(struct inode *);
+ extern void nilfs_evict_inode(struct inode *);
+diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
+index 171fb5cd427fd..81992b9a219b2 100644
+--- a/fs/nilfs2/page.c
++++ b/fs/nilfs2/page.c
+@@ -369,7 +369,15 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
+ struct page *page = pvec.pages[i];
+
+ lock_page(page);
+- nilfs_clear_dirty_page(page, silent);
++
++ /*
++ * This page may have been removed from the address
++ * space by truncation or invalidation when the lock
++ * was acquired. Skip processing in that case.
++ */
++ if (likely(page->mapping == mapping))
++ nilfs_clear_dirty_page(page, silent);
++
+ unlock_page(page);
+ }
+ pagevec_release(&pvec);
+@@ -448,10 +456,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode)
+ /*
+ * NILFS2 needs clear_page_dirty() in the following two cases:
+ *
+- * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
+- * page dirty flags when it copies back pages from the shadow cache
+- * (gcdat->{i_mapping,i_btnode_cache}) to its original cache
+- * (dat->{i_mapping,i_btnode_cache}).
++ * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty
++ * flag of pages when it copies back pages from shadow cache to the
++ * original cache.
+ *
+ * 2) Some B-tree operations like insertion or deletion may dispose buffers
+ * in dirty state, and this needs to cancel the dirty state of their pages.
+diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
+index 56872e93823da..5e845eea1af08 100644
+--- a/fs/nilfs2/segbuf.c
++++ b/fs/nilfs2/segbuf.c
+@@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
+ if (unlikely(!bh))
+ return -ENOMEM;
+
++ lock_buffer(bh);
++ if (!buffer_uptodate(bh)) {
++ memset(bh->b_data, 0, bh->b_size);
++ set_buffer_uptodate(bh);
++ }
++ unlock_buffer(bh);
+ nilfs_segbuf_add_segsum_buffer(segbuf, bh);
+ return 0;
+ }
+diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
+index 686c8ee7b29ce..d61d702215db2 100644
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -317,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb)
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ struct nilfs_sc_info *sci = nilfs->ns_writer;
+
+- if (!sci || !sci->sc_flush_request)
++ if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request)
+ return;
+
+ set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
+@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
+ return 0;
+ }
+
++/**
++ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
++ * @sci: segment constructor object
++ *
++ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
++ * the current segment summary block.
++ */
++static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
++{
++ struct nilfs_segsum_pointer *ssp;
++
++ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
++ if (ssp->offset < ssp->bh->b_size)
++ memset(ssp->bh->b_data + ssp->offset, 0,
++ ssp->bh->b_size - ssp->offset);
++}
++
+ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
+ {
+ sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
+@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
+ * The current segment is filled up
+ * (internal code)
+ */
++ nilfs_segctor_zeropad_segsum(sci);
+ sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
+ return nilfs_segctor_reset_segment_buffer(sci);
+ }
+@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
+ goto retry;
+ }
+ if (unlikely(required)) {
++ nilfs_segctor_zeropad_segsum(sci);
+ err = nilfs_segbuf_extend_segsum(segbuf);
+ if (unlikely(err))
+ goto failed;
+@@ -706,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
+ struct page *page = pvec.pages[i];
+
+ lock_page(page);
++ if (unlikely(page->mapping != mapping)) {
++ /* Exclude pages removed from the address space */
++ unlock_page(page);
++ continue;
++ }
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, i_blocksize(inode), 0);
+ unlock_page(page);
+@@ -733,15 +757,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
+ struct list_head *listp)
+ {
+ struct nilfs_inode_info *ii = NILFS_I(inode);
+- struct address_space *mapping = &ii->i_btnode_cache;
++ struct inode *btnc_inode = ii->i_assoc_inode;
+ struct pagevec pvec;
+ struct buffer_head *bh, *head;
+ unsigned int i;
+ pgoff_t index = 0;
+
++ if (!btnc_inode)
++ return;
++
+ pagevec_init(&pvec);
+
+- while (pagevec_lookup_tag(&pvec, mapping, &index,
++ while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index,
+ PAGECACHE_TAG_DIRTY)) {
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ bh = head = page_buffers(pvec.pages[i]);
+@@ -872,9 +899,11 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
+ nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
+ nilfs_cpfile_put_checkpoint(
+ nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
+- } else
+- WARN_ON(err == -EINVAL || err == -ENOENT);
+-
++ } else if (err == -EINVAL || err == -ENOENT) {
++ nilfs_error(sci->sc_super,
++ "checkpoint creation failed due to metadata corruption.");
++ err = -EIO;
++ }
+ return err;
+ }
+
+@@ -888,7 +917,11 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
+ err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
+ &raw_cp, &bh_cp);
+ if (unlikely(err)) {
+- WARN_ON(err == -EINVAL || err == -ENOENT);
++ if (err == -EINVAL || err == -ENOENT) {
++ nilfs_error(sci->sc_super,
++ "checkpoint finalization failed due to metadata corruption.");
++ err = -EIO;
++ }
+ goto failed_ibh;
+ }
+ raw_cp->cp_snapshot_list.ssl_next = 0;
+@@ -951,10 +984,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
+ unsigned int isz, srsz;
+
+ bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
++
++ lock_buffer(bh_sr);
+ raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
+ isz = nilfs->ns_inode_size;
+ srsz = NILFS_SR_BYTES(isz);
+
++ raw_sr->sr_sum = 0; /* Ensure initialization within this update */
+ raw_sr->sr_bytes = cpu_to_le16(srsz);
+ raw_sr->sr_nongc_ctime
+ = cpu_to_le64(nilfs_doing_gc() ?
+@@ -968,6 +1004,8 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
+ nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
+ NILFS_SR_SUFILE_OFFSET(isz), 1);
+ memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
++ set_buffer_uptodate(bh_sr);
++ unlock_buffer(bh_sr);
+ }
+
+ static void nilfs_redirty_inodes(struct list_head *head)
+@@ -1522,6 +1560,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
+ nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
+ sci->sc_stage = prev_stage;
+ }
++ nilfs_segctor_zeropad_segsum(sci);
+ nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
+ return 0;
+
+@@ -1749,6 +1788,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
+ list_for_each_entry(segbuf, logs, sb_list) {
+ list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
+ b_assoc_buffers) {
++ clear_buffer_uptodate(bh);
+ if (bh->b_page != bd_page) {
+ if (bd_page)
+ end_page_writeback(bd_page);
+@@ -1760,6 +1800,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
+ b_assoc_buffers) {
+ clear_buffer_async_write(bh);
+ if (bh == segbuf->sb_super_root) {
++ clear_buffer_uptodate(bh);
+ if (bh->b_page != bd_page) {
+ end_page_writeback(bd_page);
+ bd_page = bh->b_page;
+@@ -2010,6 +2051,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
+ struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
+ int err;
+
++ if (sb_rdonly(sci->sc_super))
++ return -EROFS;
++
+ nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
+ sci->sc_cno = nilfs->ns_cno;
+
+@@ -2234,7 +2278,7 @@ int nilfs_construct_segment(struct super_block *sb)
+ struct nilfs_transaction_info *ti;
+ int err;
+
+- if (!sci)
++ if (sb_rdonly(sb) || unlikely(!sci))
+ return -EROFS;
+
+ /* A call inside transactions causes a deadlock. */
+@@ -2273,7 +2317,7 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
+ struct nilfs_transaction_info ti;
+ int err = 0;
+
+- if (!sci)
++ if (sb_rdonly(sb) || unlikely(!sci))
+ return -EROFS;
+
+ nilfs_transaction_lock(sb, &ti, 0);
+@@ -2410,7 +2454,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
+ continue;
+ list_del_init(&ii->i_dirty);
+ truncate_inode_pages(&ii->vfs_inode.i_data, 0);
+- nilfs_btnode_cache_clear(&ii->i_btnode_cache);
++ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
+ iput(&ii->vfs_inode);
+ }
+ }
+@@ -2600,11 +2644,10 @@ static int nilfs_segctor_thread(void *arg)
+ goto loop;
+
+ end_thread:
+- spin_unlock(&sci->sc_state_lock);
+-
+ /* end sync. */
+ sci->sc_task = NULL;
+ wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
++ spin_unlock(&sci->sc_state_lock);
+ return 0;
+ }
+
+@@ -2696,7 +2739,7 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
+
+ flush_work(&sci->sc_iput_work);
+
+- } while (ret && retrycount-- > 0);
++ } while (ret && ret != -EROFS && retrycount-- > 0);
+ }
+
+ /**
+@@ -2769,11 +2812,12 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
+
+ if (nilfs->ns_writer) {
+ /*
+- * This happens if the filesystem was remounted
+- * read/write after nilfs_error degenerated it into a
+- * read-only mount.
++ * This happens if the filesystem is made read-only by
++ * __nilfs_error or nilfs_remount and then remounted
++ * read/write. In these cases, reuse the existing
++ * writer.
+ */
+- nilfs_detach_log_writer(sb);
++ return 0;
+ }
+
+ nilfs->ns_writer = nilfs_segctor_new(sb, root);
+@@ -2783,10 +2827,9 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
+ inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+
+ err = nilfs_segctor_start_thread(nilfs->ns_writer);
+- if (err) {
+- kfree(nilfs->ns_writer);
+- nilfs->ns_writer = NULL;
+- }
++ if (unlikely(err))
++ nilfs_detach_log_writer(sb);
++
+ return err;
+ }
+
+@@ -2807,6 +2850,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
+ nilfs_segctor_destroy(nilfs->ns_writer);
+ nilfs->ns_writer = NULL;
+ }
++ set_nilfs_purging(nilfs);
+
+ /* Force to free the list of dirty files */
+ spin_lock(&nilfs->ns_inode_lock);
+@@ -2819,4 +2863,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
+ up_write(&nilfs->ns_segctor_sem);
+
+ nilfs_dispose_list(nilfs, &garbage_list, 1);
++ clear_nilfs_purging(nilfs);
+ }
+diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
+index 63722475e17e1..b3abe69382fd0 100644
+--- a/fs/nilfs2/sufile.c
++++ b/fs/nilfs2/sufile.c
+@@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
+ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
+ {
+ struct buffer_head *bh;
++ void *kaddr;
++ struct nilfs_segment_usage *su;
+ int ret;
+
++ down_write(&NILFS_MDT(sufile)->mi_sem);
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
+ if (!ret) {
+ mark_buffer_dirty(bh);
+ nilfs_mdt_mark_dirty(sufile);
++ kaddr = kmap_atomic(bh->b_page);
++ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
++ nilfs_segment_usage_set_dirty(su);
++ kunmap_atomic(kaddr);
+ brelse(bh);
+ }
++ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
+ }
+
+@@ -771,6 +779,15 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
+ goto out_header;
+
+ sui->ncleansegs -= nsegs - newnsegs;
++
++ /*
++ * If the sufile is successfully truncated, immediately adjust
++ * the segment allocation space while locking the semaphore
++ * "mi_sem" so that nilfs_sufile_alloc() never allocates
++ * segments in the truncated space.
++ */
++ sui->allocmax = newnsegs - 1;
++ sui->allocmin = 0;
+ }
+
+ kaddr = kmap_atomic(header_bh->b_page);
+diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
+index f6b2d280aab5a..130ffa8a9bed2 100644
+--- a/fs/nilfs2/super.c
++++ b/fs/nilfs2/super.c
+@@ -157,7 +157,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
+ ii->i_bh = NULL;
+ ii->i_state = 0;
+ ii->i_cno = 0;
+- nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode);
++ ii->i_assoc_inode = NULL;
++ ii->i_bmap = &ii->i_bmap_data;
+ return &ii->vfs_inode;
+ }
+
+@@ -371,10 +372,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
+ goto out;
+ }
+ nsbp = (void *)nsbh->b_data + offset;
+- memset(nsbp, 0, nilfs->ns_blocksize);
+
++ lock_buffer(nsbh);
+ if (sb2i >= 0) {
++ /*
++ * The position of the second superblock only changes by 4KiB,
++ * which is larger than the maximum superblock data size
++ * (= 1KiB), so there is no need to use memmove() to allow
++ * overlap between source and destination.
++ */
+ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
++
++ /*
++ * Zero fill after copy to avoid overwriting in case of move
++ * within the same block.
++ */
++ memset(nsbh->b_data, 0, offset);
++ memset((void *)nsbp + nilfs->ns_sbsize, 0,
++ nsbh->b_size - offset - nilfs->ns_sbsize);
++ } else {
++ memset(nsbh->b_data, 0, nsbh->b_size);
++ }
++ set_buffer_uptodate(nsbh);
++ unlock_buffer(nsbh);
++
++ if (sb2i >= 0) {
+ brelse(nilfs->ns_sbh[sb2i]);
+ nilfs->ns_sbh[sb2i] = nsbh;
+ nilfs->ns_sbp[sb2i] = nsbp;
+@@ -407,6 +429,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
+ if (newsize > devsize)
+ goto out;
+
++ /*
++ * Prevent underflow in second superblock position calculation.
++ * The exact minimum size check is done in nilfs_sufile_resize().
++ */
++ if (newsize < 4096) {
++ ret = -ENOSPC;
++ goto out;
++ }
++
+ /*
+ * Write lock is required to protect some functions depending
+ * on the number of segments, the number of reserved segments,
+@@ -472,6 +503,7 @@ static void nilfs_put_super(struct super_block *sb)
+ up_write(&nilfs->ns_sem);
+ }
+
++ nilfs_sysfs_delete_device_group(nilfs);
+ iput(nilfs->ns_sufile);
+ iput(nilfs->ns_cpfile);
+ iput(nilfs->ns_dat);
+@@ -1095,6 +1127,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
+ nilfs_put_root(fsroot);
+
+ failed_unload:
++ nilfs_sysfs_delete_device_group(nilfs);
+ iput(nilfs->ns_sufile);
+ iput(nilfs->ns_cpfile);
+ iput(nilfs->ns_dat);
+@@ -1132,8 +1165,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
+ if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
+ goto out;
+ if (*flags & SB_RDONLY) {
+- /* Shutting down log writer */
+- nilfs_detach_log_writer(sb);
+ sb->s_flags |= SB_RDONLY;
+
+ /*
+@@ -1377,8 +1408,6 @@ static void nilfs_inode_init_once(void *obj)
+ #ifdef CONFIG_NILFS_XATTR
+ init_rwsem(&ii->xattr_sem);
+ #endif
+- address_space_init_once(&ii->i_btnode_cache);
+- ii->i_bmap = &ii->i_bmap_data;
+ inode_init_once(&ii->vfs_inode);
+ }
+
+diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
+index c8bfc01da5d71..fe2e7197268b7 100644
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -13,6 +13,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/backing-dev.h>
+ #include <linux/random.h>
++#include <linux/log2.h>
+ #include <linux/crc32.h>
+ #include "nilfs.h"
+ #include "segment.h"
+@@ -86,7 +87,6 @@ void destroy_nilfs(struct the_nilfs *nilfs)
+ {
+ might_sleep();
+ if (nilfs_init(nilfs)) {
+- nilfs_sysfs_delete_device_group(nilfs);
+ brelse(nilfs->ns_sbh[0]);
+ brelse(nilfs->ns_sbh[1]);
+ }
+@@ -192,6 +192,34 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
+ return ret;
+ }
+
++/**
++ * nilfs_get_blocksize - get block size from raw superblock data
++ * @sb: super block instance
++ * @sbp: superblock raw data buffer
++ * @blocksize: place to store block size
++ *
++ * nilfs_get_blocksize() calculates the block size from the block size
++ * exponent information written in @sbp and stores it in @blocksize,
++ * or aborts with an error message if it's too large.
++ *
++ * Return Value: On success, 0 is returned. If the block size is too
++ * large, -EINVAL is returned.
++ */
++static int nilfs_get_blocksize(struct super_block *sb,
++ struct nilfs_super_block *sbp, int *blocksize)
++{
++ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
++
++ if (unlikely(shift_bits >
++ ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)) {
++ nilfs_err(sb, "too large filesystem blocksize: 2 ^ %u KiB",
++ shift_bits);
++ return -EINVAL;
++ }
++ *blocksize = BLOCK_SIZE << shift_bits;
++ return 0;
++}
++
+ /**
+ * load_nilfs - load and recover the nilfs
+ * @nilfs: the_nilfs structure to be released
+@@ -245,11 +273,15 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
+ nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
+
+ /* verify consistency between two super blocks */
+- blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size);
++ err = nilfs_get_blocksize(sb, sbp[0], &blocksize);
++ if (err)
++ goto scan_error;
++
+ if (blocksize != nilfs->ns_blocksize) {
+ nilfs_warn(sb,
+ "blocksize differs between two super blocks (%d != %d)",
+ blocksize, nilfs->ns_blocksize);
++ err = -EINVAL;
+ goto scan_error;
+ }
+
+@@ -272,6 +304,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
+ goto failed;
+ }
+
++ err = nilfs_sysfs_create_device_group(sb);
++ if (unlikely(err))
++ goto sysfs_error;
++
+ if (valid_fs)
+ goto skip_recovery;
+
+@@ -333,6 +369,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
+ goto failed;
+
+ failed_unload:
++ nilfs_sysfs_delete_device_group(nilfs);
++
++ sysfs_error:
+ iput(nilfs->ns_cpfile);
+ iput(nilfs->ns_sufile);
+ iput(nilfs->ns_dat);
+@@ -366,6 +405,18 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
+ 100));
+ }
+
++/**
++ * nilfs_max_segment_count - calculate the maximum number of segments
++ * @nilfs: nilfs object
++ */
++static u64 nilfs_max_segment_count(struct the_nilfs *nilfs)
++{
++ u64 max_count = U64_MAX;
++
++ do_div(max_count, nilfs->ns_blocks_per_segment);
++ return min_t(u64, max_count, ULONG_MAX);
++}
++
+ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
+ {
+ nilfs->ns_nsegments = nsegs;
+@@ -375,6 +426,8 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
+ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
+ struct nilfs_super_block *sbp)
+ {
++ u64 nsegments, nblocks;
++
+ if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) {
+ nilfs_err(nilfs->ns_sb,
+ "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).",
+@@ -418,7 +471,35 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
+ return -EINVAL;
+ }
+
+- nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
++ nsegments = le64_to_cpu(sbp->s_nsegments);
++ if (nsegments > nilfs_max_segment_count(nilfs)) {
++ nilfs_err(nilfs->ns_sb,
++ "segment count %llu exceeds upper limit (%llu segments)",
++ (unsigned long long)nsegments,
++ (unsigned long long)nilfs_max_segment_count(nilfs));
++ return -EINVAL;
++ }
++
++ nblocks = (u64)i_size_read(nilfs->ns_sb->s_bdev->bd_inode) >>
++ nilfs->ns_sb->s_blocksize_bits;
++ if (nblocks) {
++ u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment;
++ /*
++ * To avoid failing to mount early device images without a
++ * second superblock, exclude that block count from the
++ * "min_block_count" calculation.
++ */
++
++ if (nblocks < min_block_count) {
++ nilfs_err(nilfs->ns_sb,
++ "total number of segment blocks %llu exceeds device size (%llu blocks)",
++ (unsigned long long)min_block_count,
++ (unsigned long long)nblocks);
++ return -EINVAL;
++ }
++ }
++
++ nilfs_set_nsegments(nilfs, nsegments);
+ nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
+ return 0;
+ }
+@@ -443,11 +524,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
+ return crc == le32_to_cpu(sbp->s_sum);
+ }
+
+-static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
++/**
++ * nilfs_sb2_bad_offset - check the location of the second superblock
++ * @sbp: superblock raw data buffer
++ * @offset: byte offset of second superblock calculated from device size
++ *
++ * nilfs_sb2_bad_offset() checks if the position on the second
++ * superblock is valid or not based on the filesystem parameters
++ * stored in @sbp. If @offset points to a location within the segment
++ * area, or if the parameters themselves are not normal, it is
++ * determined to be invalid.
++ *
++ * Return Value: true if invalid, false if valid.
++ */
++static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
+ {
+- return offset < ((le64_to_cpu(sbp->s_nsegments) *
+- le32_to_cpu(sbp->s_blocks_per_segment)) <<
+- (le32_to_cpu(sbp->s_log_block_size) + 10));
++ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
++ u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
++ u64 nsegments = le64_to_cpu(sbp->s_nsegments);
++ u64 index;
++
++ if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS ||
++ shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)
++ return true;
++
++ index = offset >> (shift_bits + BLOCK_SIZE_BITS);
++ do_div(index, blocks_per_segment);
++ return index < nsegments;
+ }
+
+ static void nilfs_release_super_block(struct the_nilfs *nilfs)
+@@ -489,9 +592,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
+ {
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ struct buffer_head **sbh = nilfs->ns_sbh;
+- u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size);
++ u64 sb2off, devsize = nilfs->ns_bdev->bd_inode->i_size;
+ int valid[2], swp = 0;
+
++ if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) {
++ nilfs_err(sb, "device size too small");
++ return -EINVAL;
++ }
++ sb2off = NILFS_SB2_OFFSET_BYTES(devsize);
++
+ sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
+ &sbh[0]);
+ sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]);
+@@ -586,9 +695,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
+ if (err)
+ goto failed_sbh;
+
+- blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
+- if (blocksize < NILFS_MIN_BLOCK_SIZE ||
+- blocksize > NILFS_MAX_BLOCK_SIZE) {
++ err = nilfs_get_blocksize(sb, sbp, &blocksize);
++ if (err)
++ goto failed_sbh;
++
++ if (blocksize < NILFS_MIN_BLOCK_SIZE) {
+ nilfs_err(sb,
+ "couldn't mount because of unsupported filesystem blocksize %d",
+ blocksize);
+@@ -634,10 +745,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
+ if (err)
+ goto failed_sbh;
+
+- err = nilfs_sysfs_create_device_group(sb);
+- if (err)
+- goto failed_sbh;
+-
+ set_nilfs_init(nilfs);
+ err = 0;
+ out:
+@@ -690,9 +797,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
+ {
+ unsigned long ncleansegs;
+
+- down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+- up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
+ return 0;
+ }
+diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
+index 987c8ab02aeee..b36ba588ee69a 100644
+--- a/fs/nilfs2/the_nilfs.h
++++ b/fs/nilfs2/the_nilfs.h
+@@ -29,6 +29,7 @@ enum {
+ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
+ THE_NILFS_GC_RUNNING, /* gc process is running */
+ THE_NILFS_SB_DIRTY, /* super block is dirty */
++ THE_NILFS_PURGING, /* disposing dirty files for cleanup */
+ };
+
+ /**
+@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
+ THE_NILFS_FNS(DISCONTINUED, discontinued)
+ THE_NILFS_FNS(GC_RUNNING, gc_running)
+ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
++THE_NILFS_FNS(PURGING, purging)
+
+ /*
+ * Mount option operations
+diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
+index 52ccd34b1e792..a026dbd3593f6 100644
+--- a/fs/nls/nls_base.c
++++ b/fs/nls/nls_base.c
+@@ -272,7 +272,7 @@ int unregister_nls(struct nls_table * nls)
+ return -EINVAL;
+ }
+
+-static struct nls_table *find_nls(char *charset)
++static struct nls_table *find_nls(const char *charset)
+ {
+ struct nls_table *nls;
+ spin_lock(&nls_lock);
+@@ -288,7 +288,7 @@ static struct nls_table *find_nls(char *charset)
+ return nls;
+ }
+
+-struct nls_table *load_nls(char *charset)
++struct nls_table *load_nls(const char *charset)
+ {
+ return try_then_request_module(find_nls(charset), "nls_%s", charset);
+ }
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 6facdf476255d..0e2a0eb7cb9e0 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -611,9 +611,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
+ if (fanotify_is_perm_event(event->mask))
+ FANOTIFY_PERM(event)->fd = fd;
+
+- if (f)
+- fd_install(fd, f);
+-
+ if (info_mode) {
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
+@@ -621,6 +618,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
+ goto out_close_fd;
+ }
+
++ if (f)
++ fd_install(fd, f);
++
+ return metadata.event_len;
+
+ out_close_fd:
+@@ -1337,8 +1337,11 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
+ return 0;
+ }
+
+-static int fanotify_events_supported(struct path *path, __u64 mask)
++static int fanotify_events_supported(struct path *path, __u64 mask,
++ unsigned int flags)
+ {
++ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
++
+ /*
+ * Some filesystems such as 'proc' acquire unusual locks when opening
+ * files. For them fanotify permission events have high chances of
+@@ -1350,6 +1353,21 @@ static int fanotify_events_supported(struct path *path, __u64 mask)
+ if (mask & FANOTIFY_PERM_EVENTS &&
+ path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
+ return -EINVAL;
++
++ /*
++ * mount and sb marks are not allowed on kernel internal pseudo fs,
++ * like pipe_mnt, because that would subscribe to events on all the
++ * anonynous pipes in the system.
++ *
++ * SB_NOUSER covers all of the internal pseudo fs whose objects are not
++ * exposed to user's mount namespace, but there are other SB_KERNMOUNT
++ * fs, like nsfs, debugfs, for which the value of allowing sb and mount
++ * mark is questionable. For now we leave them alone.
++ */
++ if (mark_type != FAN_MARK_INODE &&
++ path->mnt->mnt_sb->s_flags & SB_NOUSER)
++ return -EINVAL;
++
+ return 0;
+ }
+
+@@ -1476,7 +1494,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ goto fput_and_out;
+
+ if (flags & FAN_MARK_ADD) {
+- ret = fanotify_events_supported(&path, mask);
++ ret = fanotify_events_supported(&path, mask, flags);
+ if (ret)
+ goto path_put_and_out;
+ }
+diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
+index 57f0d5d9f934e..3451708fd035c 100644
+--- a/fs/notify/fdinfo.c
++++ b/fs/notify/fdinfo.c
+@@ -83,16 +83,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+ inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+ inode = igrab(fsnotify_conn_inode(mark->connector));
+ if (inode) {
+- /*
+- * IN_ALL_EVENTS represents all of the mask bits
+- * that we expose to userspace. There is at
+- * least one bit (FS_EVENT_ON_CHILD) which is
+- * used only internally to the kernel.
+- */
+- u32 mask = mark->mask & IN_ALL_EVENTS;
+- seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ",
++ seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ",
+ inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
+- mask, mark->ignored_mask);
++ inotify_mark_user_mask(mark));
+ show_mark_fhandle(m, inode);
+ seq_putc(m, '\n');
+ iput(inode);
+diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
+index 2007e37119160..8f00151eb731f 100644
+--- a/fs/notify/inotify/inotify.h
++++ b/fs/notify/inotify/inotify.h
+@@ -22,6 +22,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
+ return container_of(fse, struct inotify_event_info, fse);
+ }
+
++/*
++ * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to
++ * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is
++ * used only internally to the kernel.
++ */
++#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)
++
++static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark)
++{
++ return fsn_mark->mask & INOTIFY_USER_MASK;
++}
++
+ extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
+ struct fsnotify_group *group);
+ extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
+diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
+index d1a64daa0171e..b0530f75b274a 100644
+--- a/fs/notify/inotify/inotify_fsnotify.c
++++ b/fs/notify/inotify/inotify_fsnotify.c
+@@ -65,7 +65,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+ struct fsnotify_event *fsn_event;
+ struct fsnotify_group *group = inode_mark->group;
+ int ret;
+- int len = 0;
++ int len = 0, wd;
+ int alloc_len = sizeof(struct inotify_event_info);
+ struct mem_cgroup *old_memcg;
+
+@@ -80,6 +80,13 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+ i_mark = container_of(inode_mark, struct inotify_inode_mark,
+ fsn_mark);
+
++ /*
++ * We can be racing with mark being detached. Don't report event with
++ * invalid wd.
++ */
++ wd = READ_ONCE(i_mark->wd);
++ if (wd == -1)
++ return 0;
+ /*
+ * Whoever is interested in the event, pays for the allocation. Do not
+ * trigger OOM killer in the target monitoring memcg as it may have
+@@ -110,7 +117,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+ fsn_event = &event->fse;
+ fsnotify_init_event(fsn_event);
+ event->mask = mask;
+- event->wd = i_mark->wd;
++ event->wd = wd;
+ event->sync_cookie = cookie;
+ event->name_len = len;
+ if (len)
+diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
+index 62051247f6d21..9fb7701d2f8a0 100644
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -102,7 +102,7 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
+ mask |= FS_EVENT_ON_CHILD;
+
+ /* mask off the flags used to open the fd */
+- mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
++ mask |= (arg & INOTIFY_USER_MASK);
+
+ return mask;
+ }
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index fa1d99101f895..bea106fac0901 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -452,7 +452,7 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
+ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
+ {
+- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
++ mutex_lock(&group->mark_mutex);
+ fsnotify_detach_mark(mark);
+ mutex_unlock(&group->mark_mutex);
+ fsnotify_free_mark(mark);
+@@ -767,7 +767,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+ * move marks to free to to_free list in one go and then free marks in
+ * to_free list one by one.
+ */
+- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
++ mutex_lock(&group->mark_mutex);
+ list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
+ if ((1U << mark->connector->type) & type_mask)
+ list_move(&mark->g_list, &to_free);
+@@ -776,7 +776,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+
+ clear:
+ while (1) {
+- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
++ mutex_lock(&group->mark_mutex);
+ if (list_empty(head)) {
+ mutex_unlock(&group->mark_mutex);
+ break;
+diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
+index d563abc3e1364..c0881d39d36a9 100644
+--- a/fs/ntfs/attrib.c
++++ b/fs/ntfs/attrib.c
+@@ -592,15 +592,39 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
+ a = (ATTR_RECORD*)((u8*)ctx->attr +
+ le32_to_cpu(ctx->attr->length));
+ for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
+- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
+- le32_to_cpu(ctx->mrec->bytes_allocated))
++ u8 *mrec_end = (u8 *)ctx->mrec +
++ le32_to_cpu(ctx->mrec->bytes_allocated);
++ u8 *name_end;
++
++ /* check whether ATTR_RECORD wrap */
++ if ((u8 *)a < (u8 *)ctx->mrec)
++ break;
++
++ /* check whether Attribute Record Header is within bounds */
++ if ((u8 *)a > mrec_end ||
++ (u8 *)a + sizeof(ATTR_RECORD) > mrec_end)
++ break;
++
++ /* check whether ATTR_RECORD's name is within bounds */
++ name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
++ a->name_length * sizeof(ntfschar);
++ if (name_end > mrec_end)
+ break;
++
+ ctx->attr = a;
+ if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
+ a->type == AT_END))
+ return -ENOENT;
+ if (unlikely(!a->length))
+ break;
++
++ /* check whether ATTR_RECORD's length wrap */
++ if ((u8 *)a + le32_to_cpu(a->length) < (u8 *)a)
++ break;
++ /* check whether ATTR_RECORD's length is within bounds */
++ if ((u8 *)a + le32_to_cpu(a->length) > mrec_end)
++ break;
++
+ if (a->type != type)
+ continue;
+ /*
+diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
+index ab4f3362466d0..a43adeacd930c 100644
+--- a/fs/ntfs/file.c
++++ b/fs/ntfs/file.c
+@@ -1829,7 +1829,7 @@ again:
+ * pages being swapped out between us bringing them into memory
+ * and doing the actual copying.
+ */
+- if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
++ if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
+diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
+index 4474adb393ca8..dc4aefd024b54 100644
+--- a/fs/ntfs/inode.c
++++ b/fs/ntfs/inode.c
+@@ -1829,6 +1829,13 @@ int ntfs_read_inode_mount(struct inode *vi)
+ goto err_out;
+ }
+
++ /* Sanity check offset to the first attribute */
++ if (le16_to_cpu(m->attrs_offset) >= le32_to_cpu(m->bytes_allocated)) {
++ ntfs_error(sb, "Incorrect mft offset to the first attribute %u in superblock.",
++ le16_to_cpu(m->attrs_offset));
++ goto err_out;
++ }
++
+ /* Need this to sanity check attribute list references to $MFT. */
+ vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
+
+@@ -1881,6 +1888,10 @@ int ntfs_read_inode_mount(struct inode *vi)
+ }
+ /* Now allocate memory for the attribute list. */
+ ni->attr_list_size = (u32)ntfs_attr_size(a);
++ if (!ni->attr_list_size) {
++ ntfs_error(sb, "Attr_list_size is zero");
++ goto put_err_out;
++ }
+ ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
+ if (!ni->attr_list) {
+ ntfs_error(sb, "Not enough memory to allocate buffer "
+diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
+index 0d7e948cb29c9..7f69422d5191d 100644
+--- a/fs/ntfs/super.c
++++ b/fs/ntfs/super.c
+@@ -2092,7 +2092,8 @@ get_ctx_vol_failed:
+ // TODO: Initialize security.
+ /* Get the extended system files' directory inode. */
+ vol->extend_ino = ntfs_iget(sb, FILE_Extend);
+- if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
++ if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) ||
++ !S_ISDIR(vol->extend_ino->i_mode)) {
+ if (!IS_ERR(vol->extend_ino))
+ iput(vol->extend_ino);
+ ntfs_error(sb, "Failed to load $Extend.");
+diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
+index e8c00dda42adb..321d55b3ca17d 100644
+--- a/fs/ntfs3/attrib.c
++++ b/fs/ntfs3/attrib.c
+@@ -101,6 +101,10 @@ int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
+
+ asize = le32_to_cpu(attr->size);
+ run_off = le16_to_cpu(attr->nres.run_off);
++
++ if (run_off > asize)
++ return -EINVAL;
++
+ err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn,
+ vcn ? *vcn : svcn, Add2Ptr(attr, run_off),
+ asize - run_off);
+@@ -1142,6 +1146,11 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ CLST svcn, evcn;
+ u16 ro;
+
++ if (!ni) {
++ /* Is record corrupted? */
++ return -ENOENT;
++ }
++
+ attr = ni_find_attr(ni, NULL, NULL, type, name, name_len, &vcn, NULL);
+ if (!attr) {
+ /* Is record corrupted? */
+@@ -1157,6 +1166,10 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ }
+
+ ro = le16_to_cpu(attr->nres.run_off);
++
++ if (ro > le32_to_cpu(attr->size))
++ return -EINVAL;
++
+ err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, svcn,
+ Add2Ptr(attr, ro), le32_to_cpu(attr->size) - ro);
+ if (err < 0)
+@@ -1832,6 +1845,11 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
+ u16 le_sz;
+ u16 roff = le16_to_cpu(attr->nres.run_off);
+
++ if (roff > le32_to_cpu(attr->size)) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn,
+ evcn1 - 1, svcn, Add2Ptr(attr, roff),
+ le32_to_cpu(attr->size) - roff);
+@@ -1949,7 +1967,7 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
+ return -ENOENT;
+
+ if (!attr_b->non_res) {
+- u32 data_size = le32_to_cpu(attr->res.data_size);
++ u32 data_size = le32_to_cpu(attr_b->res.data_size);
+ u32 from, to;
+
+ if (vbo > data_size)
+diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
+index bad6d8a849a24..81c22df27c725 100644
+--- a/fs/ntfs3/attrlist.c
++++ b/fs/ntfs3/attrlist.c
+@@ -52,7 +52,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
+
+ if (!attr->non_res) {
+ lsize = le32_to_cpu(attr->res.data_size);
+- le = kmalloc(al_aligned(lsize), GFP_NOFS);
++ le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
+ if (!le) {
+ err = -ENOMEM;
+ goto out;
+@@ -68,6 +68,11 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
+
+ run_init(&ni->attr_list.run);
+
++ if (run_off > le32_to_cpu(attr->size)) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ err = run_unpack_ex(&ni->attr_list.run, ni->mi.sbi, ni->mi.rno,
+ 0, le64_to_cpu(attr->nres.evcn), 0,
+ Add2Ptr(attr, run_off),
+@@ -75,7 +80,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
+ if (err < 0)
+ goto out;
+
+- le = kmalloc(al_aligned(lsize), GFP_NOFS);
++ le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
+ if (!le) {
+ err = -ENOMEM;
+ goto out;
+diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
+index aa184407520f0..3261b69cdac33 100644
+--- a/fs/ntfs3/bitmap.c
++++ b/fs/ntfs3/bitmap.c
+@@ -666,7 +666,8 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
+ if (!wnd->bits_last)
+ wnd->bits_last = wbits;
+
+- wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS);
++ wnd->free_bits =
++ kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN);
+ if (!wnd->free_bits)
+ return -ENOMEM;
+
+@@ -1432,7 +1433,7 @@ int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
+
+ down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
+
+- for (; iw < wnd->nbits; iw++, wbit = 0) {
++ for (; iw < wnd->nwnd; iw++, wbit = 0) {
+ CLST lcn_wnd = iw * wbits;
+ struct buffer_head *bh;
+
+diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
+index 43b1451bff539..c526e0427f2bf 100644
+--- a/fs/ntfs3/file.c
++++ b/fs/ntfs3/file.c
+@@ -488,13 +488,13 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
+
+ new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size));
+
+- ni_lock(ni);
+-
+ truncate_setsize(inode, new_size);
+
++ ni_lock(ni);
++
+ down_write(&ni->file.run_lock);
+ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
+- &new_valid, true, NULL);
++ &new_valid, ni->mi.sbi->options->prealloc, NULL);
+ up_write(&ni->file.run_lock);
+
+ if (new_valid < ni->i_valid)
+@@ -661,7 +661,13 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
+ /*
+ * Normal file: Allocate clusters, do not change 'valid' size.
+ */
+- err = ntfs_set_size(inode, max(end, i_size));
++ loff_t new_size = max(end, i_size);
++
++ err = inode_newsize_ok(inode, new_size);
++ if (err)
++ goto out;
++
++ err = ntfs_set_size(inode, new_size);
+ if (err)
+ goto out;
+
+@@ -761,7 +767,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ }
+ inode_dio_wait(inode);
+
+- if (attr->ia_size < oldsize)
++ if (attr->ia_size <= oldsize)
+ err = ntfs_truncate(inode, attr->ia_size);
+ else if (attr->ia_size > oldsize)
+ err = ntfs_extend(inode, attr->ia_size, 0, NULL);
+@@ -989,7 +995,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
+ frame_vbo = pos & ~(frame_size - 1);
+ index = frame_vbo >> PAGE_SHIFT;
+
+- if (unlikely(iov_iter_fault_in_readable(from, bytes))) {
++ if (unlikely(fault_in_iov_iter_readable(from, bytes))) {
+ err = -EFAULT;
+ goto out;
+ }
+diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
+index 6f47a9c17f896..9a1744955d1cf 100644
+--- a/fs/ntfs3/frecord.c
++++ b/fs/ntfs3/frecord.c
+@@ -101,7 +101,7 @@ void ni_clear(struct ntfs_inode *ni)
+ {
+ struct rb_node *node;
+
+- if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec))
++ if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec))
+ ni_delete_all(ni);
+
+ al_destroy(ni);
+@@ -567,6 +567,12 @@ static int ni_repack(struct ntfs_inode *ni)
+ }
+
+ roff = le16_to_cpu(attr->nres.run_off);
++
++ if (roff > le32_to_cpu(attr->size)) {
++ err = -EINVAL;
++ break;
++ }
++
+ err = run_unpack(&run, sbi, ni->mi.rno, svcn, evcn, svcn,
+ Add2Ptr(attr, roff),
+ le32_to_cpu(attr->size) - roff);
+@@ -843,6 +849,7 @@ int ni_create_attr_list(struct ntfs_inode *ni)
+ if (err)
+ goto out1;
+
++ err = -EINVAL;
+ /* Call mi_remove_attr() in reverse order to keep pointers 'arr_move' valid. */
+ while (to_free > 0) {
+ struct ATTRIB *b = arr_move[--nb];
+@@ -851,7 +858,8 @@ int ni_create_attr_list(struct ntfs_inode *ni)
+
+ attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off),
+ b->name_len, asize, name_off);
+- WARN_ON(!attr);
++ if (!attr)
++ goto out1;
+
+ mi_get_ref(mi, &le_b[nb]->ref);
+ le_b[nb]->id = attr->id;
+@@ -861,17 +869,20 @@ int ni_create_attr_list(struct ntfs_inode *ni)
+ attr->id = le_b[nb]->id;
+
+ /* Remove from primary record. */
+- WARN_ON(!mi_remove_attr(NULL, &ni->mi, b));
++ if (!mi_remove_attr(NULL, &ni->mi, b))
++ goto out1;
+
+ if (to_free <= asize)
+ break;
+ to_free -= asize;
+- WARN_ON(!nb);
++ if (!nb)
++ goto out1;
+ }
+
+ attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0,
+ lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT);
+- WARN_ON(!attr);
++ if (!attr)
++ goto out1;
+
+ attr->non_res = 0;
+ attr->flags = 0;
+@@ -891,9 +902,10 @@ out1:
+ kfree(ni->attr_list.le);
+ ni->attr_list.le = NULL;
+ ni->attr_list.size = 0;
++ return err;
+
+ out:
+- return err;
++ return 0;
+ }
+
+ /*
+@@ -1541,6 +1553,9 @@ int ni_delete_all(struct ntfs_inode *ni)
+ asize = le32_to_cpu(attr->size);
+ roff = le16_to_cpu(attr->nres.run_off);
+
++ if (roff > asize)
++ return -EINVAL;
++
+ /* run==1 means unpack and deallocate. */
+ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn,
+ Add2Ptr(attr, roff), asize - roff);
+@@ -1964,10 +1979,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+
+ vcn += clen;
+
+- if (vbo + bytes >= end) {
++ if (vbo + bytes >= end)
+ bytes = end - vbo;
+- flags |= FIEMAP_EXTENT_LAST;
+- }
+
+ if (vbo + bytes <= valid) {
+ ;
+@@ -1977,6 +1990,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+ /* vbo < valid && valid < vbo + bytes */
+ u64 dlen = valid - vbo;
+
++ if (vbo + dlen >= end)
++ flags |= FIEMAP_EXTENT_LAST;
++
+ err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen,
+ flags);
+ if (err < 0)
+@@ -1995,6 +2011,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+ flags |= FIEMAP_EXTENT_UNWRITTEN;
+ }
+
++ if (vbo + bytes >= end)
++ flags |= FIEMAP_EXTENT_LAST;
++
+ err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags);
+ if (err < 0)
+ break;
+@@ -2238,6 +2257,11 @@ remove_wof:
+ asize = le32_to_cpu(attr->size);
+ roff = le16_to_cpu(attr->nres.run_off);
+
++ if (roff > asize) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ /*run==1 Means unpack and deallocate. */
+ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn,
+ Add2Ptr(attr, roff), asize - roff);
+@@ -3171,6 +3195,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
+ return 0;
+ }
+
++ if (!ni->mi.mrec)
++ goto out;
++
+ if (is_rec_inuse(ni->mi.mrec) &&
+ !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) {
+ bool modified = false;
+diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
+index 06492f088d602..59f813cbdaa8e 100644
+--- a/fs/ntfs3/fslog.c
++++ b/fs/ntfs3/fslog.c
+@@ -1132,7 +1132,7 @@ static int read_log_page(struct ntfs_log *log, u32 vbo,
+ return -EINVAL;
+
+ if (!*buffer) {
+- to_free = kmalloc(bytes, GFP_NOFS);
++ to_free = kmalloc(log->page_size, GFP_NOFS);
+ if (!to_free)
+ return -ENOMEM;
+ *buffer = to_free;
+@@ -1180,12 +1180,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+ struct restart_info *info)
+ {
+ u32 skip, vbo;
+- struct RESTART_HDR *r_page = kmalloc(DefaultLogPageSize, GFP_NOFS);
+-
+- if (!r_page)
+- return -ENOMEM;
+-
+- memset(info, 0, sizeof(struct restart_info));
++ struct RESTART_HDR *r_page = NULL;
+
+ /* Determine which restart area we are looking for. */
+ if (first) {
+@@ -1199,7 +1194,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+ /* Loop continuously until we succeed. */
+ for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) {
+ bool usa_error;
+- u32 sys_page_size;
+ bool brst, bchk;
+ struct RESTART_AREA *ra;
+
+@@ -1253,24 +1247,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+ goto check_result;
+ }
+
+- /* Read the entire restart area. */
+- sys_page_size = le32_to_cpu(r_page->sys_page_size);
+- if (DefaultLogPageSize != sys_page_size) {
+- kfree(r_page);
+- r_page = kzalloc(sys_page_size, GFP_NOFS);
+- if (!r_page)
+- return -ENOMEM;
+-
+- if (read_log_page(log, vbo,
+- (struct RECORD_PAGE_HDR **)&r_page,
+- &usa_error)) {
+- /* Ignore any errors. */
+- kfree(r_page);
+- r_page = NULL;
+- continue;
+- }
+- }
+-
+ if (is_client_area_valid(r_page, usa_error)) {
+ info->valid_page = true;
+ ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
+@@ -2599,7 +2575,7 @@ static int read_next_log_rec(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
+ return find_log_rec(log, *lsn, lcb);
+ }
+
+-static inline bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes)
++bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes)
+ {
+ __le16 mask;
+ u32 min_de, de_off, used, total;
+@@ -2729,6 +2705,9 @@ static inline bool check_attr(const struct MFT_REC *rec,
+ return false;
+ }
+
++ if (run_off > asize)
++ return false;
++
+ if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn,
+ Add2Ptr(attr, run_off), asize - run_off) < 0) {
+ return false;
+@@ -3791,10 +3770,11 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
+ if (!log)
+ return -ENOMEM;
+
++ memset(&rst_info, 0, sizeof(struct restart_info));
++
+ log->ni = ni;
+ log->l_size = l_size;
+ log->one_page_buf = kmalloc(page_size, GFP_NOFS);
+-
+ if (!log->one_page_buf) {
+ err = -ENOMEM;
+ goto out;
+@@ -3842,6 +3822,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
+ if (rst_info.vbo)
+ goto check_restart_area;
+
++ memset(&rst_info2, 0, sizeof(struct restart_info));
+ err = log_read_rst(log, l_size, false, &rst_info2);
+
+ /* Determine which restart area to use. */
+@@ -4085,8 +4066,10 @@ process_log:
+ if (client == LFS_NO_CLIENT_LE) {
+ /* Insert "NTFS" client LogFile. */
+ client = ra->client_idx[0];
+- if (client == LFS_NO_CLIENT_LE)
+- return -EINVAL;
++ if (client == LFS_NO_CLIENT_LE) {
++ err = -EINVAL;
++ goto out;
++ }
+
+ t16 = le16_to_cpu(client);
+ cr = ca + t16;
+@@ -4273,6 +4256,10 @@ check_attribute_names:
+ rec_len -= t32;
+
+ attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS);
++ if (!attr_names) {
++ err = -ENOMEM;
++ goto out;
++ }
+
+ lcb_put(lcb);
+ lcb = NULL;
+@@ -4767,6 +4754,12 @@ fake_attr:
+ u16 roff = le16_to_cpu(attr->nres.run_off);
+ CLST svcn = le64_to_cpu(attr->nres.svcn);
+
++ if (roff > t32) {
++ kfree(oa->attr);
++ oa->attr = NULL;
++ goto fake_attr;
++ }
++
+ err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn,
+ le64_to_cpu(attr->nres.evcn), svcn,
+ Add2Ptr(attr, roff), t32 - roff);
+@@ -5055,7 +5048,7 @@ undo_action_next:
+ goto add_allocated_vcns;
+
+ vcn = le64_to_cpu(lrh->target_vcn);
+- vcn &= ~(log->clst_per_page - 1);
++ vcn &= ~(u64)(log->clst_per_page - 1);
+
+ add_allocated_vcns:
+ for (i = 0, vcn = le64_to_cpu(lrh->target_vcn),
+diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
+index 4de9acb169689..0ae70010b01d3 100644
+--- a/fs/ntfs3/fsntfs.c
++++ b/fs/ntfs3/fsntfs.c
+@@ -154,7 +154,7 @@ int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
+ /* Check errors. */
+ if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
+ fn * SECTOR_SIZE > bytes) {
+- return -EINVAL; /* Native chkntfs returns ok! */
++ return -E_NTFS_CORRUPT;
+ }
+
+ /* Get fixup pointer. */
+@@ -831,10 +831,15 @@ int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
+ {
+ int err;
+ struct super_block *sb = sbi->sb;
+- u32 blocksize = sb->s_blocksize;
++ u32 blocksize;
+ sector_t block1, block2;
+ u32 bytes;
+
++ if (!sb)
++ return -EINVAL;
++
++ blocksize = sb->s_blocksize;
++
+ if (!(sbi->flags & NTFS_FLAGS_MFTMIRR))
+ return 0;
+
+@@ -1681,6 +1686,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
+
+ out:
+ if (err) {
++ make_bad_inode(inode);
+ iput(inode);
+ ni = ERR_PTR(err);
+ }
+@@ -1873,9 +1879,10 @@ int ntfs_security_init(struct ntfs_sb_info *sbi)
+ goto out;
+ }
+
+- root_sdh = resident_data(attr);
++ root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT));
+ if (root_sdh->type != ATTR_ZERO ||
+- root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) {
++ root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH ||
++ offsetof(struct INDEX_ROOT, ihdr) + root_sdh->ihdr.used > attr->res.data_size) {
+ err = -EINVAL;
+ goto out;
+ }
+@@ -1891,9 +1898,10 @@ int ntfs_security_init(struct ntfs_sb_info *sbi)
+ goto out;
+ }
+
+- root_sii = resident_data(attr);
++ root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT));
+ if (root_sii->type != ATTR_ZERO ||
+- root_sii->rule != NTFS_COLLATION_TYPE_UINT) {
++ root_sii->rule != NTFS_COLLATION_TYPE_UINT ||
++ offsetof(struct INDEX_ROOT, ihdr) + root_sii->ihdr.used > attr->res.data_size) {
+ err = -EINVAL;
+ goto out;
+ }
+diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
+index 6f81e3a49abfb..7705adc926b86 100644
+--- a/fs/ntfs3/index.c
++++ b/fs/ntfs3/index.c
+@@ -605,11 +605,58 @@ static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr,
+ return e;
+ }
+
++/*
++ * index_hdr_check
++ *
++ * return true if INDEX_HDR is valid
++ */
++static bool index_hdr_check(const struct INDEX_HDR *hdr, u32 bytes)
++{
++ u32 end = le32_to_cpu(hdr->used);
++ u32 tot = le32_to_cpu(hdr->total);
++ u32 off = le32_to_cpu(hdr->de_off);
++
++ if (!IS_ALIGNED(off, 8) || tot > bytes || end > tot ||
++ off + sizeof(struct NTFS_DE) > end) {
++ /* incorrect index buffer. */
++ return false;
++ }
++
++ return true;
++}
++
++/*
++ * index_buf_check
++ *
++ * return true if INDEX_BUFFER seems is valid
++ */
++static bool index_buf_check(const struct INDEX_BUFFER *ib, u32 bytes,
++ const CLST *vbn)
++{
++ const struct NTFS_RECORD_HEADER *rhdr = &ib->rhdr;
++ u16 fo = le16_to_cpu(rhdr->fix_off);
++ u16 fn = le16_to_cpu(rhdr->fix_num);
++
++ if (bytes <= offsetof(struct INDEX_BUFFER, ihdr) ||
++ rhdr->sign != NTFS_INDX_SIGNATURE ||
++ fo < sizeof(struct INDEX_BUFFER)
++ /* Check index buffer vbn. */
++ || (vbn && *vbn != le64_to_cpu(ib->vbn)) || (fo % sizeof(short)) ||
++ fo + fn * sizeof(short) >= bytes ||
++ fn != ((bytes >> SECTOR_SHIFT) + 1)) {
++ /* incorrect index buffer. */
++ return false;
++ }
++
++ return index_hdr_check(&ib->ihdr,
++ bytes - offsetof(struct INDEX_BUFFER, ihdr));
++}
++
+ void fnd_clear(struct ntfs_fnd *fnd)
+ {
+ int i;
+
+- for (i = 0; i < fnd->level; i++) {
++ for (i = fnd->level - 1; i >= 0; i--) {
+ struct indx_node *n = fnd->nodes[i];
+
+ if (!n)
+@@ -679,9 +726,13 @@ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx,
+ u32 e_size, e_key_len;
+ u32 end = le32_to_cpu(hdr->used);
+ u32 off = le32_to_cpu(hdr->de_off);
++ u32 total = le32_to_cpu(hdr->total);
+ u16 offs[128];
+
+ fill_table:
++ if (end > total)
++ return NULL;
++
+ if (off + sizeof(struct NTFS_DE) > end)
+ return NULL;
+
+@@ -798,6 +849,10 @@ static inline struct NTFS_DE *hdr_delete_de(struct INDEX_HDR *hdr,
+ u32 off = PtrOffset(hdr, re);
+ int bytes = used - (off + esize);
+
++ /* check INDEX_HDR valid before using INDEX_HDR */
++ if (!check_index_header(hdr, le32_to_cpu(hdr->total)))
++ return NULL;
++
+ if (off >= used || esize < sizeof(struct NTFS_DE) ||
+ bytes < sizeof(struct NTFS_DE))
+ return NULL;
+@@ -820,9 +875,16 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi,
+ u32 t32;
+ const struct INDEX_ROOT *root = resident_data(attr);
+
++ t32 = le32_to_cpu(attr->res.data_size);
++ if (t32 <= offsetof(struct INDEX_ROOT, ihdr) ||
++ !index_hdr_check(&root->ihdr,
++ t32 - offsetof(struct INDEX_ROOT, ihdr))) {
++ goto out;
++ }
++
+ /* Check root fields. */
+ if (!root->index_block_clst)
+- return -EINVAL;
++ goto out;
+
+ indx->type = type;
+ indx->idx2vbn_bits = __ffs(root->index_block_clst);
+@@ -834,19 +896,19 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi,
+ if (t32 < sbi->cluster_size) {
+ /* Index record is smaller than a cluster, use 512 blocks. */
+ if (t32 != root->index_block_clst * SECTOR_SIZE)
+- return -EINVAL;
++ goto out;
+
+ /* Check alignment to a cluster. */
+ if ((sbi->cluster_size >> SECTOR_SHIFT) &
+ (root->index_block_clst - 1)) {
+- return -EINVAL;
++ goto out;
+ }
+
+ indx->vbn2vbo_bits = SECTOR_SHIFT;
+ } else {
+ /* Index record must be a multiple of cluster size. */
+ if (t32 != root->index_block_clst << sbi->cluster_bits)
+- return -EINVAL;
++ goto out;
+
+ indx->vbn2vbo_bits = sbi->cluster_bits;
+ }
+@@ -854,7 +916,14 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi,
+ init_rwsem(&indx->run_lock);
+
+ indx->cmp = get_cmp_func(root);
+- return indx->cmp ? 0 : -EINVAL;
++ if (!indx->cmp)
++ goto out;
++
++ return 0;
++
++out:
++ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
++ return -EINVAL;
+ }
+
+ static struct indx_node *indx_new(struct ntfs_index *indx,
+@@ -926,6 +995,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+ struct ATTR_LIST_ENTRY *le = NULL;
+ struct ATTRIB *a;
+ const struct INDEX_NAMES *in = &s_index_names[indx->type];
++ struct INDEX_ROOT *root = NULL;
+
+ a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL,
+ mi);
+@@ -935,7 +1005,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+ if (attr)
+ *attr = a;
+
+- return resident_data_ex(a, sizeof(struct INDEX_ROOT));
++ root = resident_data_ex(a, sizeof(struct INDEX_ROOT));
++
++ /* length check */
++ if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) >
++ le32_to_cpu(a->res.data_size)) {
++ return NULL;
++ }
++
++ return root;
+ }
+
+ static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni,
+@@ -1012,15 +1090,34 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn,
+ goto out;
+
+ ok:
++ if (!index_buf_check(ib, bytes, &vbn)) {
++ ntfs_inode_err(&ni->vfs_inode, "directory corrupted");
++ ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
++ err = -EINVAL;
++ goto out;
++ }
++
+ if (err == -E_NTFS_FIXUP) {
+ ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0);
+ err = 0;
+ }
+
++ /* check for index header length */
++ if (offsetof(struct INDEX_BUFFER, ihdr) + ib->ihdr.used > bytes) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ in->index = ib;
+ *node = in;
+
+ out:
++ if (err == -E_NTFS_CORRUPT) {
++ ntfs_inode_err(&ni->vfs_inode, "directory corrupted");
++ ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
++ err = -EINVAL;
++ }
++
+ if (ib != in->index)
+ kfree(ib);
+
+@@ -1600,9 +1697,9 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
+
+ if (err) {
+ /* Restore root. */
+- if (mi_resize_attr(mi, attr, -ds_root))
++ if (mi_resize_attr(mi, attr, -ds_root)) {
+ memcpy(attr, a_root, asize);
+- else {
++ } else {
+ /* Bug? */
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ }
+@@ -1994,7 +2091,7 @@ static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni,
+ const struct NTFS_DE *e, bool trim)
+ {
+ int err;
+- struct indx_node *n;
++ struct indx_node *n = NULL;
+ struct INDEX_HDR *hdr;
+ CLST vbn = de_get_vbn(e);
+ size_t i;
+diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
+index 859951d785cb2..176b04a5d1adb 100644
+--- a/fs/ntfs3/inode.c
++++ b/fs/ntfs3/inode.c
+@@ -81,7 +81,7 @@ static struct inode *ntfs_read_mft(struct inode *inode,
+ le16_to_cpu(ref->seq), le16_to_cpu(rec->seq));
+ goto out;
+ } else if (!is_rec_inuse(rec)) {
+- err = -EINVAL;
++ err = -ESTALE;
+ ntfs_err(sb, "Inode r=%x is not in use!", (u32)ino);
+ goto out;
+ }
+@@ -92,12 +92,20 @@ static struct inode *ntfs_read_mft(struct inode *inode,
+ goto out;
+ }
+
+- if (!is_rec_base(rec))
+- goto Ok;
++ if (!is_rec_base(rec)) {
++ err = -EINVAL;
++ goto out;
++ }
+
+ /* Record should contain $I30 root. */
+ is_dir = rec->flags & RECORD_FLAG_DIR;
+
++ /* MFT_REC_MFT is not a dir */
++ if (is_dir && ino == MFT_REC_MFT) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ inode->i_generation = le16_to_cpu(rec->seq);
+
+ /* Enumerate all struct Attributes MFT. */
+@@ -129,6 +137,16 @@ next_attr:
+ rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size);
+ asize = le32_to_cpu(attr->size);
+
++ if (le16_to_cpu(attr->name_off) + attr->name_len > asize)
++ goto out;
++
++ if (attr->non_res) {
++ t64 = le64_to_cpu(attr->nres.alloc_size);
++ if (le64_to_cpu(attr->nres.data_size) > t64 ||
++ le64_to_cpu(attr->nres.valid_size) > t64)
++ goto out;
++ }
++
+ switch (attr->type) {
+ case ATTR_STD:
+ if (attr->non_res ||
+@@ -247,7 +265,6 @@ next_attr:
+ goto out;
+
+ root = Add2Ptr(attr, roff);
+- is_root = true;
+
+ if (attr->name_len != ARRAY_SIZE(I30_NAME) ||
+ memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME)))
+@@ -260,6 +277,7 @@ next_attr:
+ if (!is_dir)
+ goto next_attr;
+
++ is_root = true;
+ ni->ni_flags |= NI_FLAG_DIR;
+
+ err = indx_init(&ni->dir, sbi, attr, INDEX_MUTEX_I30);
+@@ -364,7 +382,13 @@ next_attr:
+ attr_unpack_run:
+ roff = le16_to_cpu(attr->nres.run_off);
+
++ if (roff > asize) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ t64 = le64_to_cpu(attr->nres.svcn);
++
+ err = run_unpack_ex(run, sbi, ino, t64, le64_to_cpu(attr->nres.evcn),
+ t64, Add2Ptr(attr, roff), asize - roff);
+ if (err < 0)
+@@ -430,6 +454,7 @@ end_enum:
+ } else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) &&
+ fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) {
+ /* Records in $Extend are not a files or general directories. */
++ inode->i_op = &ntfs_file_inode_operations;
+ } else {
+ err = -EINVAL;
+ goto out;
+@@ -449,7 +474,6 @@ end_enum:
+ inode->i_flags |= S_NOSEC;
+ }
+
+-Ok:
+ if (ino == MFT_REC_MFT && !sb->s_root)
+ sbi->mft.ni = NULL;
+
+@@ -503,6 +527,9 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
+ make_bad_inode(inode);
+ }
+
++ if (IS_ERR(inode) && name)
++ ntfs_set_state(sb->s_fs_info, NTFS_DIRTY_ERROR);
++
+ return inode;
+ }
+
+@@ -757,6 +784,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+ loff_t vbo = iocb->ki_pos;
+ loff_t end;
+ int wr = iov_iter_rw(iter) & WRITE;
++ size_t iter_count = iov_iter_count(iter);
+ loff_t valid;
+ ssize_t ret;
+
+@@ -770,10 +798,13 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+ wr ? ntfs_get_block_direct_IO_W
+ : ntfs_get_block_direct_IO_R);
+
+- if (ret <= 0)
++ if (ret > 0)
++ end = vbo + ret;
++ else if (wr && ret == -EIOCBQUEUED)
++ end = vbo + iter_count;
++ else
+ goto out;
+
+- end = vbo + ret;
+ valid = ni->i_valid;
+ if (wr) {
+ if (end > valid && !S_ISBLK(inode->i_mode)) {
+@@ -1620,10 +1651,8 @@ out6:
+ ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref);
+
+ out5:
+- if (S_ISDIR(mode) || run_is_empty(&ni->file.run))
+- goto out4;
+-
+- run_deallocate(sbi, &ni->file.run, false);
++ if (!S_ISDIR(mode))
++ run_deallocate(sbi, &ni->file.run, false);
+
+ out4:
+ clear_rec_inuse(rec);
+@@ -1937,8 +1966,6 @@ const struct inode_operations ntfs_link_inode_operations = {
+ .setattr = ntfs3_setattr,
+ .listxattr = ntfs_listxattr,
+ .permission = ntfs_permission,
+- .get_acl = ntfs_get_acl,
+- .set_acl = ntfs_set_acl,
+ };
+
+ const struct address_space_operations ntfs_aops = {
+diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
+index bc741213ad848..bff1934e044e5 100644
+--- a/fs/ntfs3/namei.c
++++ b/fs/ntfs3/namei.c
+@@ -86,6 +86,16 @@ static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry,
+ __putname(uni);
+ }
+
++ /*
++ * Check for a null pointer
++ * If the MFT record of ntfs inode is not a base record, inode->i_op can be NULL.
++ * This causes null pointer dereference in d_splice_alias().
++ */
++ if (!IS_ERR_OR_NULL(inode) && !inode->i_op) {
++ iput(inode);
++ inode = ERR_PTR(-EINVAL);
++ }
++
+ return d_splice_alias(inode, dentry);
+ }
+
+diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
+index 9cc396b117bfd..0f38d558169a1 100644
+--- a/fs/ntfs3/ntfs.h
++++ b/fs/ntfs3/ntfs.h
+@@ -436,9 +436,6 @@ static inline u64 attr_svcn(const struct ATTRIB *attr)
+ return attr->non_res ? le64_to_cpu(attr->nres.svcn) : 0;
+ }
+
+-/* The size of resident attribute by its resident size. */
+-#define BYTES_PER_RESIDENT(b) (0x18 + (b))
+-
+ static_assert(sizeof(struct ATTRIB) == 0x48);
+ static_assert(sizeof(((struct ATTRIB *)NULL)->res) == 0x08);
+ static_assert(sizeof(((struct ATTRIB *)NULL)->nres) == 0x38);
+diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
+index 8aaec7e0804ef..510ed2ea1c483 100644
+--- a/fs/ntfs3/ntfs_fs.h
++++ b/fs/ntfs3/ntfs_fs.h
+@@ -54,6 +54,8 @@ enum utf16_endian;
+ #define E_NTFS_NONRESIDENT 556
+ /* NTFS specific error code about punch hole. */
+ #define E_NTFS_NOTALIGNED 557
++/* NTFS specific error code when on-disk struct is corrupted. */
++#define E_NTFS_CORRUPT 558
+
+
+ /* sbi->flags */
+@@ -575,6 +577,7 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
+ bool ni_is_dirty(struct inode *inode);
+
+ /* Globals from fslog.c */
++bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes);
+ int log_replay(struct ntfs_inode *ni, bool *initialized);
+
+ /* Globals from fsntfs.c */
+@@ -788,12 +791,12 @@ int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf,
+ u32 run_buf_size, CLST *packed_vcns);
+ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+ CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+- u32 run_buf_size);
++ int run_buf_size);
+
+ #ifdef NTFS3_CHECK_FREE_CLST
+ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+ CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+- u32 run_buf_size);
++ int run_buf_size);
+ #else
+ #define run_unpack_ex run_unpack
+ #endif
+diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
+index 861e35791506e..938fc286963f2 100644
+--- a/fs/ntfs3/record.c
++++ b/fs/ntfs3/record.c
+@@ -124,7 +124,7 @@ int mi_read(struct mft_inode *mi, bool is_mft)
+ struct rw_semaphore *rw_lock = NULL;
+
+ if (is_mounted(sbi)) {
+- if (!is_mft) {
++ if (!is_mft && mft_ni) {
+ rw_lock = &mft_ni->file.run_lock;
+ down_read(rw_lock);
+ }
+@@ -148,7 +148,7 @@ int mi_read(struct mft_inode *mi, bool is_mft)
+ ni_lock(mft_ni);
+ down_write(rw_lock);
+ }
+- err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, &mft_ni->file.run,
++ err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, run,
+ vbo >> sbi->cluster_bits);
+ if (rw_lock) {
+ up_write(rw_lock);
+@@ -180,6 +180,12 @@ ok:
+ return 0;
+
+ out:
++ if (err == -E_NTFS_CORRUPT) {
++ ntfs_err(sbi->sb, "mft corrupted");
++ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
++ err = -EINVAL;
++ }
++
+ return err;
+ }
+
+@@ -242,8 +248,8 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
+ if ((t32 & 0xf) || (t32 > 0x100))
+ return NULL;
+
+- /* Check boundary. */
+- if (off + asize > used)
++ /* Check overflow and boundary. */
++ if (off + asize < off || off + asize > used)
+ return NULL;
+
+ /* Check size of attribute. */
+@@ -260,6 +266,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
+ if (t16 + t32 > asize)
+ return NULL;
+
++ if (attr->name_len &&
++ le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len > t16) {
++ return NULL;
++ }
++
+ return attr;
+ }
+
+diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
+index a8fec651f9732..6940ee5432ba3 100644
+--- a/fs/ntfs3/run.c
++++ b/fs/ntfs3/run.c
+@@ -872,12 +872,15 @@ error:
+ */
+ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+ CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+- u32 run_buf_size)
++ int run_buf_size)
+ {
+ u64 prev_lcn, vcn64, lcn, next_vcn;
+ const u8 *run_last, *run_0;
+ bool is_mft = ino == MFT_REC_MFT;
+
++ if (run_buf_size < 0)
++ return -EINVAL;
++
+ /* Check for empty. */
+ if (evcn + 1 == svcn)
+ return 0;
+@@ -999,7 +1002,7 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+ */
+ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
+ CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf,
+- u32 run_buf_size)
++ int run_buf_size)
+ {
+ int ret, err;
+ CLST next_vcn, lcn, len;
+diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
+index d41d76979e121..33b1833ad525c 100644
+--- a/fs/ntfs3/super.c
++++ b/fs/ntfs3/super.c
+@@ -30,6 +30,7 @@
+ #include <linux/fs_context.h>
+ #include <linux/fs_parser.h>
+ #include <linux/log2.h>
++#include <linux/minmax.h>
+ #include <linux/module.h>
+ #include <linux/nls.h>
+ #include <linux/seq_file.h>
+@@ -390,7 +391,7 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
+ return -EINVAL;
+ }
+
+- memcpy(sbi->options, new_opts, sizeof(*new_opts));
++ swap(sbi->options, fc->fs_private);
+
+ return 0;
+ }
+@@ -668,9 +669,11 @@ static u32 format_size_gb(const u64 bytes, u32 *mb)
+
+ static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot)
+ {
+- return boot->sectors_per_clusters <= 0x80
+- ? boot->sectors_per_clusters
+- : (1u << (0 - boot->sectors_per_clusters));
++ if (boot->sectors_per_clusters <= 0x80)
++ return boot->sectors_per_clusters;
++ if (boot->sectors_per_clusters >= 0xf4) /* limit shift to 2MB max */
++ return 1U << -(s8)boot->sectors_per_clusters;
++ return -EINVAL;
+ }
+
+ /*
+@@ -713,6 +716,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
+
+ /* cluster size: 512, 1K, 2K, 4K, ... 2M */
+ sct_per_clst = true_sectors_per_clst(boot);
++ if ((int)sct_per_clst < 0)
++ goto out;
+ if (!is_power_of_2(sct_per_clst))
+ goto out;
+
+@@ -784,7 +789,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
+ : (u32)boot->record_size
+ << sbi->cluster_bits;
+
+- if (record_size > MAXIMUM_BYTES_PER_MFT)
++ if (record_size > MAXIMUM_BYTES_PER_MFT || record_size < SECTOR_SIZE)
+ goto out;
+
+ sbi->record_bits = blksize_bits(record_size);
+@@ -897,6 +902,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
+ ref.high = 0;
+
+ sbi->sb = sb;
++ sbi->options = fc->fs_private;
++ fc->fs_private = NULL;
+ sb->s_flags |= SB_NODIRATIME;
+ sb->s_magic = 0x7366746e; // "ntfs"
+ sb->s_op = &ntfs_sops;
+@@ -1129,7 +1136,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
+ goto put_inode_out;
+ }
+ bytes = inode->i_size;
+- sbi->def_table = t = kmalloc(bytes, GFP_NOFS);
++ sbi->def_table = t = kmalloc(bytes, GFP_NOFS | __GFP_NOWARN);
+ if (!t) {
+ err = -ENOMEM;
+ goto put_inode_out;
+@@ -1248,9 +1255,9 @@ load_root:
+ ref.low = cpu_to_le32(MFT_REC_ROOT);
+ ref.seq = cpu_to_le16(MFT_REC_ROOT);
+ inode = ntfs_iget5(sb, &ref, &NAME_ROOT);
+- if (IS_ERR(inode)) {
++ if (IS_ERR(inode) || !inode->i_op) {
+ ntfs_err(sb, "Failed to load root.");
+- err = PTR_ERR(inode);
++ err = IS_ERR(inode) ? PTR_ERR(inode) : -EINVAL;
+ goto out;
+ }
+
+@@ -1260,8 +1267,6 @@ load_root:
+ goto put_inode_out;
+ }
+
+- fc->fs_private = NULL;
+-
+ return 0;
+
+ put_inode_out:
+@@ -1271,6 +1276,7 @@ out:
+ * Free resources here.
+ * ntfs_fs_free will be called with fc->s_fs_info = NULL
+ */
++ put_mount_options(sbi->options);
+ put_ntfs(sbi);
+ sb->s_fs_info = NULL;
+
+@@ -1414,7 +1420,6 @@ static int ntfs_init_fs_context(struct fs_context *fc)
+ mutex_init(&sbi->compress.mtx_lzx);
+ #endif
+
+- sbi->options = opts;
+ fc->s_fs_info = sbi;
+ ok:
+ fc->fs_private = opts;
+diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
+index afd0ddad826ff..aaf4bafa2c702 100644
+--- a/fs/ntfs3/xattr.c
++++ b/fs/ntfs3/xattr.c
+@@ -42,28 +42,26 @@ static inline size_t packed_ea_size(const struct EA_FULL *ea)
+ * Assume there is at least one xattr in the list.
+ */
+ static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes,
+- const char *name, u8 name_len, u32 *off)
++ const char *name, u8 name_len, u32 *off, u32 *ea_sz)
+ {
+- *off = 0;
++ u32 ea_size;
+
+- if (!ea_all || !bytes)
++ *off = 0;
++ if (!ea_all)
+ return false;
+
+- for (;;) {
++ for (; *off < bytes; *off += ea_size) {
+ const struct EA_FULL *ea = Add2Ptr(ea_all, *off);
+- u32 next_off = *off + unpacked_ea_size(ea);
+-
+- if (next_off > bytes)
+- return false;
+-
++ ea_size = unpacked_ea_size(ea);
+ if (ea->name_len == name_len &&
+- !memcmp(ea->name, name, name_len))
++ !memcmp(ea->name, name, name_len)) {
++ if (ea_sz)
++ *ea_sz = ea_size;
+ return true;
+-
+- *off = next_off;
+- if (next_off >= bytes)
+- return false;
++ }
+ }
++
++ return false;
+ }
+
+ /*
+@@ -74,12 +72,12 @@ static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes,
+ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
+ size_t add_bytes, const struct EA_INFO **info)
+ {
+- int err;
++ int err = -EINVAL;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct ATTR_LIST_ENTRY *le = NULL;
+ struct ATTRIB *attr_info, *attr_ea;
+ void *ea_p;
+- u32 size;
++ u32 size, off, ea_size;
+
+ static_assert(le32_to_cpu(ATTR_EA_INFO) < le32_to_cpu(ATTR_EA));
+
+@@ -96,52 +94,87 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
+
+ *info = resident_data_ex(attr_info, sizeof(struct EA_INFO));
+ if (!*info)
+- return -EINVAL;
++ goto out;
+
+ /* Check Ea limit. */
+ size = le32_to_cpu((*info)->size);
+- if (size > sbi->ea_max_size)
+- return -EFBIG;
++ if (size > sbi->ea_max_size) {
++ err = -EFBIG;
++ goto out;
++ }
+
+- if (attr_size(attr_ea) > sbi->ea_max_size)
+- return -EFBIG;
++ if (attr_size(attr_ea) > sbi->ea_max_size) {
++ err = -EFBIG;
++ goto out;
++ }
++
++ if (!size) {
++ /* EA info persists, but xattr is empty. Looks like EA problem. */
++ goto out;
++ }
+
+ /* Allocate memory for packed Ea. */
+- ea_p = kmalloc(size + add_bytes, GFP_NOFS);
++ ea_p = kmalloc(size_add(size, add_bytes), GFP_NOFS);
+ if (!ea_p)
+ return -ENOMEM;
+
+- if (!size) {
+- ;
+- } else if (attr_ea->non_res) {
++ if (attr_ea->non_res) {
+ struct runs_tree run;
+
+ run_init(&run);
+
+- err = attr_load_runs(attr_ea, ni, &run, NULL);
++ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &run, 0, size);
+ if (!err)
+ err = ntfs_read_run_nb(sbi, &run, 0, ea_p, size, NULL);
+ run_close(&run);
+
+ if (err)
+- goto out;
++ goto out1;
+ } else {
+ void *p = resident_data_ex(attr_ea, size);
+
+- if (!p) {
+- err = -EINVAL;
+- goto out;
+- }
++ if (!p)
++ goto out1;
+ memcpy(ea_p, p, size);
+ }
+
+ memset(Add2Ptr(ea_p, size), 0, add_bytes);
++
++ /* Check all attributes for consistency. */
++ for (off = 0; off < size; off += ea_size) {
++ const struct EA_FULL *ef = Add2Ptr(ea_p, off);
++ u32 bytes = size - off;
++
++ /* Check if we can use field ea->size. */
++ if (bytes < sizeof(ef->size))
++ goto out1;
++
++ if (ef->size) {
++ ea_size = le32_to_cpu(ef->size);
++ if (ea_size > bytes)
++ goto out1;
++ continue;
++ }
++
++ /* Check if we can use fields ef->name_len and ef->elength. */
++ if (bytes < offsetof(struct EA_FULL, name))
++ goto out1;
++
++ ea_size = ALIGN(struct_size(ef, name,
++ 1 + ef->name_len +
++ le16_to_cpu(ef->elength)),
++ 4);
++ if (ea_size > bytes)
++ goto out1;
++ }
++
+ *ea = ea_p;
+ return 0;
+
+-out:
++out1:
+ kfree(ea_p);
+- *ea = NULL;
++out:
++ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
+ return err;
+ }
+
+@@ -163,6 +196,7 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
+ const struct EA_FULL *ea;
+ u32 off, size;
+ int err;
++ int ea_size;
+ size_t ret;
+
+ err = ntfs_read_ea(ni, &ea_all, 0, &info);
+@@ -175,8 +209,12 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
+ size = le32_to_cpu(info->size);
+
+ /* Enumerate all xattrs. */
+- for (ret = 0, off = 0; off < size; off += unpacked_ea_size(ea)) {
++ for (ret = 0, off = 0; off < size; off += ea_size) {
+ ea = Add2Ptr(ea_all, off);
++ ea_size = unpacked_ea_size(ea);
++
++ if (!ea->name_len)
++ break;
+
+ if (buffer) {
+ if (ret + ea->name_len + 1 > bytes_per_buffer) {
+@@ -227,7 +265,8 @@ static int ntfs_get_ea(struct inode *inode, const char *name, size_t name_len,
+ goto out;
+
+ /* Enumerate all xattrs. */
+- if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off)) {
++ if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off,
++ NULL)) {
+ err = -ENODATA;
+ goto out;
+ }
+@@ -269,7 +308,7 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name,
+ struct EA_FULL *new_ea;
+ struct EA_FULL *ea_all = NULL;
+ size_t add, new_pack;
+- u32 off, size;
++ u32 off, size, ea_sz;
+ __le16 size_pack;
+ struct ATTRIB *attr;
+ struct ATTR_LIST_ENTRY *le;
+@@ -303,9 +342,8 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name,
+ size_pack = ea_info.size_pack;
+ }
+
+- if (info && find_ea(ea_all, size, name, name_len, &off)) {
++ if (info && find_ea(ea_all, size, name, name_len, &off, &ea_sz)) {
+ struct EA_FULL *ea;
+- size_t ea_sz;
+
+ if (flags & XATTR_CREATE) {
+ err = -EEXIST;
+@@ -328,8 +366,6 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name,
+ if (ea->flags & FILE_NEED_EA)
+ le16_add_cpu(&ea_info.count, -1);
+
+- ea_sz = unpacked_ea_size(ea);
+-
+ le16_add_cpu(&ea_info.size_pack, 0 - packed_ea_size(ea));
+
+ memmove(ea, Add2Ptr(ea, ea_sz), size - off - ea_sz);
+@@ -443,6 +479,11 @@ update_ea:
+ /* Delete xattr, ATTR_EA */
+ ni_remove_attr_le(ni, attr, mi, le);
+ } else if (attr->non_res) {
++ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &ea_run, 0,
++ size);
++ if (err)
++ goto out;
++
+ err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size, 0);
+ if (err)
+ goto out;
+@@ -476,8 +517,7 @@ out:
+ }
+
+ #ifdef CONFIG_NTFS3_FS_POSIX_ACL
+-static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
+- struct inode *inode, int type,
++static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type,
+ int locked)
+ {
+ struct ntfs_inode *ni = ntfs_i(inode);
+@@ -512,7 +552,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
+
+ /* Translate extended attribute to acl. */
+ if (err >= 0) {
+- acl = posix_acl_from_xattr(mnt_userns, buf, err);
++ acl = posix_acl_from_xattr(&init_user_ns, buf, err);
+ } else if (err == -ENODATA) {
+ acl = NULL;
+ } else {
+@@ -535,37 +575,32 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
+ if (rcu)
+ return ERR_PTR(-ECHILD);
+
+- /* TODO: init_user_ns? */
+- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0);
++ return ntfs_get_acl_ex(inode, type, 0);
+ }
+
+ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
+ struct inode *inode, struct posix_acl *acl,
+- int type)
++ int type, bool init_acl)
+ {
+ const char *name;
+ size_t size, name_len;
+- void *value = NULL;
+- int err = 0;
++ void *value;
++ int err;
+ int flags;
++ umode_t mode;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
++ mode = inode->i_mode;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+- if (acl) {
+- umode_t mode = inode->i_mode;
+-
++ /* Do not change i_mode if we are in init_acl */
++ if (acl && !init_acl) {
+ err = posix_acl_update_mode(mnt_userns, inode, &mode,
+ &acl);
+ if (err)
+- goto out;
+-
+- if (inode->i_mode != mode) {
+- inode->i_mode = mode;
+- mark_inode_dirty(inode);
+- }
++ return err;
+ }
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1;
+@@ -592,7 +627,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
+ value = kmalloc(size, GFP_NOFS);
+ if (!value)
+ return -ENOMEM;
+- err = posix_acl_to_xattr(mnt_userns, acl, value, size);
++ err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+ if (err < 0)
+ goto out;
+ flags = 0;
+@@ -601,8 +636,13 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
+ err = ntfs_set_ea(inode, name, name_len, value, size, flags);
+ if (err == -ENODATA && !size)
+ err = 0; /* Removing non existed xattr. */
+- if (!err)
++ if (!err) {
+ set_cached_acl(inode, type, acl);
++ if (inode->i_mode != mode) {
++ inode->i_mode = mode;
++ mark_inode_dirty(inode);
++ }
++ }
+
+ out:
+ kfree(value);
+@@ -616,7 +656,7 @@ out:
+ int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ struct posix_acl *acl, int type)
+ {
+- return ntfs_set_acl_ex(mnt_userns, inode, acl, type);
++ return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false);
+ }
+
+ /*
+@@ -636,7 +676,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
+
+ if (default_acl) {
+ err = ntfs_set_acl_ex(mnt_userns, inode, default_acl,
+- ACL_TYPE_DEFAULT);
++ ACL_TYPE_DEFAULT, true);
+ posix_acl_release(default_acl);
+ } else {
+ inode->i_default_acl = NULL;
+@@ -647,7 +687,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
+ else {
+ if (!err)
+ err = ntfs_set_acl_ex(mnt_userns, inode, acl,
+- ACL_TYPE_ACCESS);
++ ACL_TYPE_ACCESS, true);
+ posix_acl_release(acl);
+ }
+
+@@ -901,6 +941,9 @@ set_new_fa:
+ err = ntfs_set_ea(inode, name, name_len, value, size, flags);
+
+ out:
++ inode->i_ctime = current_time(inode);
++ mark_inode_dirty(inode);
++
+ return err;
+ }
+
+@@ -981,7 +1024,7 @@ static bool ntfs_xattr_user_list(struct dentry *dentry)
+ }
+
+ // clang-format off
+-static const struct xattr_handler ntfs_xattr_handler = {
++static const struct xattr_handler ntfs_other_xattr_handler = {
+ .prefix = "",
+ .get = ntfs_getxattr,
+ .set = ntfs_setxattr,
+@@ -989,7 +1032,11 @@ static const struct xattr_handler ntfs_xattr_handler = {
+ };
+
+ const struct xattr_handler *ntfs_xattr_handlers[] = {
+- &ntfs_xattr_handler,
++#ifdef CONFIG_NTFS3_FS_POSIX_ACL
++ &posix_acl_access_xattr_handler,
++ &posix_acl_default_xattr_handler,
++#endif
++ &ntfs_other_xattr_handler,
+ NULL,
+ };
+ // clang-format on
+diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
+index 68d11c295dd34..c051074016cec 100644
+--- a/fs/ocfs2/aops.c
++++ b/fs/ocfs2/aops.c
+@@ -1978,11 +1978,25 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
+ }
+
+ if (unlikely(copied < len) && wc->w_target_page) {
++ loff_t new_isize;
++
+ if (!PageUptodate(wc->w_target_page))
+ copied = 0;
+
+- ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
+- start+len);
++ new_isize = max_t(loff_t, i_size_read(inode), pos + copied);
++ if (new_isize > page_offset(wc->w_target_page))
++ ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
++ start+len);
++ else {
++ /*
++ * When page is fully beyond new isize (data copy
++ * failed), do not bother zeroing the page. Invalidate
++ * it instead so that writeback does not get confused
++ * put page & buffer dirty bits into inconsistent
++ * state.
++ */
++ block_invalidatepage(wc->w_target_page, 0, PAGE_SIZE);
++ }
+ }
+ if (wc->w_target_page)
+ flush_dcache_page(wc->w_target_page);
+diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c
+index 29f183a15798e..c1d67c806e1d3 100644
+--- a/fs/ocfs2/dlmfs/userdlm.c
++++ b/fs/ocfs2/dlmfs/userdlm.c
+@@ -433,6 +433,11 @@ again:
+ }
+
+ spin_lock(&lockres->l_lock);
++ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
++ spin_unlock(&lockres->l_lock);
++ status = -EAGAIN;
++ goto bail;
++ }
+
+ /* We only compare against the currently granted level
+ * here. If the lock is blocked waiting on a downconvert,
+@@ -595,7 +600,7 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
+ spin_lock(&lockres->l_lock);
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
+ spin_unlock(&lockres->l_lock);
+- return 0;
++ goto bail;
+ }
+
+ lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
+@@ -609,12 +614,17 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
+ }
+
+ if (lockres->l_ro_holders || lockres->l_ex_holders) {
++ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
+ spin_unlock(&lockres->l_lock);
+ goto bail;
+ }
+
+ status = 0;
+ if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
++ /*
++ * lock is never requested, leave USER_LOCK_IN_TEARDOWN set
++ * to avoid new lock request coming in.
++ */
+ spin_unlock(&lockres->l_lock);
+ goto bail;
+ }
+@@ -625,6 +635,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
+
+ status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK);
+ if (status) {
++ spin_lock(&lockres->l_lock);
++ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
++ lockres->l_flags &= ~USER_LOCK_BUSY;
++ spin_unlock(&lockres->l_lock);
+ user_log_dlm_error("ocfs2_dlm_unlock", status, lockres);
+ goto bail;
+ }
+diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
+index 801e60bab9555..c28bc983a7b1c 100644
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
+ ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
+ ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
+
+- ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
+- osb->cconn = NULL;
++ if (osb->cconn) {
++ ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
++ osb->cconn = NULL;
+
+- ocfs2_dlm_shutdown_debug(osb);
++ ocfs2_dlm_shutdown_debug(osb);
++ }
+ }
+
+ static int ocfs2_drop_lock(struct ocfs2_super *osb,
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 54d7843c02114..d64aea53e1506 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -476,10 +476,11 @@ int ocfs2_truncate_file(struct inode *inode,
+ * greater than page size, so we have to truncate them
+ * anyway.
+ */
+- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
+- truncate_inode_pages(inode->i_mapping, new_i_size);
+
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
++ unmap_mapping_range(inode->i_mapping,
++ new_i_size + PAGE_SIZE - 1, 0, 1);
++ truncate_inode_pages(inode->i_mapping, new_i_size);
+ status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
+ i_size_read(inode), 1);
+ if (status)
+@@ -498,6 +499,9 @@ int ocfs2_truncate_file(struct inode *inode,
+ goto bail_unlock_sem;
+ }
+
++ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
++ truncate_inode_pages(inode->i_mapping, new_i_size);
++
+ status = ocfs2_commit_truncate(osb, inode, di_bh);
+ if (status < 0) {
+ mlog_errno(status);
+@@ -1990,7 +1994,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
+ }
+ }
+
+- if (file && should_remove_suid(file->f_path.dentry)) {
++ if (file && setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) {
+ ret = __ocfs2_write_remove_suid(inode, di_bh);
+ if (ret) {
+ mlog_errno(ret);
+@@ -2099,14 +2103,20 @@ static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
+ struct ocfs2_space_resv sr;
+ int change_size = 1;
+ int cmd = OCFS2_IOC_RESVSP64;
++ int ret = 0;
+
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+ if (!ocfs2_writes_unwritten_extents(osb))
+ return -EOPNOTSUPP;
+
+- if (mode & FALLOC_FL_KEEP_SIZE)
++ if (mode & FALLOC_FL_KEEP_SIZE) {
+ change_size = 0;
++ } else {
++ ret = inode_newsize_ok(inode, offset + len);
++ if (ret)
++ return ret;
++ }
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ cmd = OCFS2_IOC_UNRESVSP64;
+@@ -2278,7 +2288,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
+ * inode. There's also the dinode i_size state which
+ * can be lost via setattr during extending writes (we
+ * set inode->i_size at the end of a write. */
+- if (should_remove_suid(dentry)) {
++ if (setattr_should_drop_suidgid(&init_user_ns, inode)) {
+ if (meta_level == 0) {
+ ocfs2_inode_unlock_for_extent_tree(inode,
+ &di_bh,
+diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
+index 4f15750aac5d5..86864a90de2cd 100644
+--- a/fs/ocfs2/journal.c
++++ b/fs/ocfs2/journal.c
+@@ -157,7 +157,7 @@ static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
+ replay_map->rm_state = REPLAY_DONE;
+ }
+
+-static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
++void ocfs2_free_replay_slots(struct ocfs2_super *osb)
+ {
+ struct ocfs2_replay_map *replay_map = osb->replay_map;
+
+diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
+index d158acb8b38a8..a54f20bce9fef 100644
+--- a/fs/ocfs2/journal.h
++++ b/fs/ocfs2/journal.h
+@@ -150,6 +150,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb);
+ void ocfs2_recovery_exit(struct ocfs2_super *osb);
+
+ int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
++void ocfs2_free_replay_slots(struct ocfs2_super *osb);
+ /*
+ * Journal Control:
+ * Initialize, Load, Shutdown, Wipe a journal.
+diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
+index 192cad0662d8b..b1e32ec4a9d41 100644
+--- a/fs/ocfs2/move_extents.c
++++ b/fs/ocfs2/move_extents.c
+@@ -105,14 +105,6 @@ static int __ocfs2_move_extent(handle_t *handle,
+ */
+ replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED;
+
+- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+- context->et.et_root_bh,
+- OCFS2_JOURNAL_ACCESS_WRITE);
+- if (ret) {
+- mlog_errno(ret);
+- goto out;
+- }
+-
+ ret = ocfs2_split_extent(handle, &context->et, path, index,
+ &replace_rec, context->meta_ac,
+ &context->dealloc);
+@@ -121,8 +113,6 @@ static int __ocfs2_move_extent(handle_t *handle,
+ goto out;
+ }
+
+- ocfs2_journal_dirty(handle, context->et.et_root_bh);
+-
+ context->new_phys_cpos = new_p_cpos;
+
+ /*
+@@ -444,7 +434,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
+ bg = (struct ocfs2_group_desc *)gd_bh->b_data;
+
+ if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
+- le16_to_cpu(bg->bg_bits))) {
++ (le16_to_cpu(bg->bg_bits) << bits_per_unit))) {
+
+ *ret_bh = gd_bh;
+ *vict_bit = (vict_blkno - blkno) >>
+@@ -559,6 +549,7 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
+ last_free_bits++;
+
+ if (last_free_bits == move_len) {
++ i -= move_len;
+ *goal_bit = i;
+ *phys_cpos = base_cpos + i;
+ break;
+@@ -1030,18 +1021,19 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
+
+ context->range = &range;
+
++ /*
++ * ok, the default theshold for the defragmentation
++ * is 1M, since our maximum clustersize was 1M also.
++ * any thought?
++ */
++ if (!range.me_threshold)
++ range.me_threshold = 1024 * 1024;
++
++ if (range.me_threshold > i_size_read(inode))
++ range.me_threshold = i_size_read(inode);
++
+ if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
+ context->auto_defrag = 1;
+- /*
+- * ok, the default theshold for the defragmentation
+- * is 1M, since our maximum clustersize was 1M also.
+- * any thought?
+- */
+- if (!range.me_threshold)
+- range.me_threshold = 1024 * 1024;
+-
+- if (range.me_threshold > i_size_read(inode))
+- range.me_threshold = i_size_read(inode);
+
+ if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
+ context->partial = 1;
+diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
+index 2c46ff6ba4ea2..0b4f3d287cbc0 100644
+--- a/fs/ocfs2/namei.c
++++ b/fs/ocfs2/namei.c
+@@ -197,6 +197,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
+ * callers. */
+ if (S_ISDIR(mode))
+ set_nlink(inode, 2);
++ mode = mode_strip_sgid(&init_user_ns, dir, mode);
+ inode_init_owner(&init_user_ns, inode, dir, mode);
+ status = dquot_initialize(inode);
+ if (status)
+@@ -231,6 +232,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns,
+ handle_t *handle = NULL;
+ struct ocfs2_super *osb;
+ struct ocfs2_dinode *dirfe;
++ struct ocfs2_dinode *fe = NULL;
+ struct buffer_head *new_fe_bh = NULL;
+ struct inode *inode = NULL;
+ struct ocfs2_alloc_context *inode_ac = NULL;
+@@ -240,6 +242,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns,
+ int want_meta = 0;
+ int xattr_credits = 0;
+ struct ocfs2_security_xattr_info si = {
++ .name = NULL,
+ .enable = 1,
+ };
+ int did_quota_inode = 0;
+@@ -381,6 +384,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns,
+ goto leave;
+ }
+
++ fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
+ if (S_ISDIR(mode)) {
+ status = ocfs2_fill_new_dir(osb, handle, dir, inode,
+ new_fe_bh, data_ac, meta_ac);
+@@ -453,8 +457,11 @@ roll_back:
+ leave:
+ if (status < 0 && did_quota_inode)
+ dquot_free_inode(inode);
+- if (handle)
++ if (handle) {
++ if (status < 0 && fe)
++ ocfs2_set_links_count(fe, 0);
+ ocfs2_commit_trans(osb, handle);
++ }
+
+ ocfs2_inode_unlock(dir, 1);
+ if (did_block_signals)
+@@ -631,18 +638,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
+ return status;
+ }
+
+- status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
++ return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
+ parent_fe_bh, handle, inode_ac,
+ fe_blkno, suballoc_loc, suballoc_bit);
+- if (status < 0) {
+- u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit);
+- int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode,
+- inode_ac->ac_bh, suballoc_bit, bg_blkno, 1);
+- if (tmp)
+- mlog_errno(tmp);
+- }
+-
+- return status;
+ }
+
+ static int ocfs2_mkdir(struct user_namespace *mnt_userns,
+@@ -1537,6 +1535,10 @@ static int ocfs2_rename(struct user_namespace *mnt_userns,
+ status = ocfs2_add_entry(handle, new_dentry, old_inode,
+ OCFS2_I(old_inode)->ip_blkno,
+ new_dir_bh, &target_insert);
++ if (status < 0) {
++ mlog_errno(status);
++ goto bail;
++ }
+ }
+
+ old_inode->i_ctime = current_time(old_inode);
+@@ -1808,6 +1810,7 @@ static int ocfs2_symlink(struct user_namespace *mnt_userns,
+ int want_clusters = 0;
+ int xattr_credits = 0;
+ struct ocfs2_security_xattr_info si = {
++ .name = NULL,
+ .enable = 1,
+ };
+ int did_quota = 0, did_quota_inode = 0;
+@@ -2027,8 +2030,11 @@ bail:
+ ocfs2_clusters_to_bytes(osb->sb, 1));
+ if (status < 0 && did_quota_inode)
+ dquot_free_inode(inode);
+- if (handle)
++ if (handle) {
++ if (status < 0 && fe)
++ ocfs2_set_links_count(fe, 0);
+ ocfs2_commit_trans(osb, handle);
++ }
+
+ ocfs2_inode_unlock(dir, 1);
+ if (did_block_signals)
+diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
+index bb62cc2e0211b..cf21aecdf5476 100644
+--- a/fs/ocfs2/ocfs2.h
++++ b/fs/ocfs2/ocfs2.h
+@@ -277,7 +277,6 @@ enum ocfs2_mount_options
+ OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
+ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
+ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
+- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
+ };
+
+ #define OCFS2_OSB_SOFT_RO 0x0001
+@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
+
+ static inline int ocfs2_mount_local(struct ocfs2_super *osb)
+ {
+- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
+- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
++ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
+ }
+
+ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
+diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
+index f033de733adb3..effe92c7d6937 100644
+--- a/fs/ocfs2/quota_global.c
++++ b/fs/ocfs2/quota_global.c
+@@ -337,7 +337,6 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
+ /* Read information header from global quota file */
+ int ocfs2_global_read_info(struct super_block *sb, int type)
+ {
+- struct inode *gqinode = NULL;
+ unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
+ GROUP_QUOTA_SYSTEM_INODE };
+ struct ocfs2_global_disk_dqinfo dinfo;
+@@ -346,29 +345,31 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
+ u64 pcount;
+ int status;
+
++ oinfo->dqi_gi.dqi_sb = sb;
++ oinfo->dqi_gi.dqi_type = type;
++ ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
++ oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
++ oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
++ oinfo->dqi_gqi_bh = NULL;
++ oinfo->dqi_gqi_count = 0;
++
+ /* Read global header */
+- gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
++ oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
+ OCFS2_INVALID_SLOT);
+- if (!gqinode) {
++ if (!oinfo->dqi_gqinode) {
+ mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n",
+ type);
+ status = -EINVAL;
+ goto out_err;
+ }
+- oinfo->dqi_gi.dqi_sb = sb;
+- oinfo->dqi_gi.dqi_type = type;
+- oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
+- oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
+- oinfo->dqi_gqi_bh = NULL;
+- oinfo->dqi_gqi_count = 0;
+- oinfo->dqi_gqinode = gqinode;
++
+ status = ocfs2_lock_global_qf(oinfo, 0);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+
+- status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk,
++ status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk,
+ &pcount, NULL);
+ if (status < 0)
+ goto out_unlock;
+diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
+index 0e4b16d4c037f..b1a8b046f4c22 100644
+--- a/fs/ocfs2/quota_local.c
++++ b/fs/ocfs2/quota_local.c
+@@ -702,8 +702,6 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
+ info->dqi_priv = oinfo;
+ oinfo->dqi_type = type;
+ INIT_LIST_HEAD(&oinfo->dqi_chunk);
+- oinfo->dqi_gqinode = NULL;
+- ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
+ oinfo->dqi_rec = NULL;
+ oinfo->dqi_lqi_bh = NULL;
+ oinfo->dqi_libh = NULL;
+diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
+index 0b0ae3ebb0cf5..da7718cef735e 100644
+--- a/fs/ocfs2/slot_map.c
++++ b/fs/ocfs2/slot_map.c
+@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
+ int i, ret = -ENOSPC;
+
+ if ((preferred >= 0) && (preferred < si->si_num_slots)) {
+- if (!si->si_slots[preferred].sl_valid ||
+- !si->si_slots[preferred].sl_node_num) {
++ if (!si->si_slots[preferred].sl_valid) {
+ ret = preferred;
+ goto out;
+ }
+ }
+
+ for(i = 0; i < si->si_num_slots; i++) {
+- if (!si->si_slots[i].sl_valid ||
+- !si->si_slots[i].sl_node_num) {
++ if (!si->si_slots[i].sl_valid) {
+ ret = i;
+ break;
+ }
+@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
+ spin_lock(&osb->osb_lock);
+ ocfs2_update_slot_info(si);
+
+- if (ocfs2_mount_local(osb))
+- /* use slot 0 directly in local mode */
+- slot = 0;
+- else {
+- /* search for ourselves first and take the slot if it already
+- * exists. Perhaps we need to mark this in a variable for our
+- * own journal recovery? Possibly not, though we certainly
+- * need to warn to the user */
+- slot = __ocfs2_node_num_to_slot(si, osb->node_num);
++ /* search for ourselves first and take the slot if it already
++ * exists. Perhaps we need to mark this in a variable for our
++ * own journal recovery? Possibly not, though we certainly
++ * need to warn to the user */
++ slot = __ocfs2_node_num_to_slot(si, osb->node_num);
++ if (slot < 0) {
++ /* if no slot yet, then just take 1st available
++ * one. */
++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+ if (slot < 0) {
+- /* if no slot yet, then just take 1st available
+- * one. */
+- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+- if (slot < 0) {
+- spin_unlock(&osb->osb_lock);
+- mlog(ML_ERROR, "no free slots available!\n");
+- status = -EINVAL;
+- goto bail;
+- }
+- } else
+- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
+- "already allocated to this node!\n",
+- slot, osb->dev_str);
+- }
++ spin_unlock(&osb->osb_lock);
++ mlog(ML_ERROR, "no free slots available!\n");
++ status = -EINVAL;
++ goto bail;
++ }
++ } else
++ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
++ "allocated to this node!\n", slot, osb->dev_str);
+
+ ocfs2_set_slot(si, slot, osb->node_num);
+ osb->slot_num = slot;
+diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
+index 16f1bfc407f2a..955f475f9aca6 100644
+--- a/fs/ocfs2/stackglue.c
++++ b/fs/ocfs2/stackglue.c
+@@ -703,6 +703,8 @@ static struct ctl_table_header *ocfs2_table_header;
+
+ static int __init ocfs2_stack_glue_init(void)
+ {
++ int ret;
++
+ strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
+
+ ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
+@@ -712,7 +714,11 @@ static int __init ocfs2_stack_glue_init(void)
+ return -ENOMEM; /* or something. */
+ }
+
+- return ocfs2_sysfs_init();
++ ret = ocfs2_sysfs_init();
++ if (ret)
++ unregister_sysctl_table(ocfs2_table_header);
++
++ return ret;
+ }
+
+ static void __exit ocfs2_stack_glue_exit(void)
+diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
+index 481017e1dac5a..166c8918c825a 100644
+--- a/fs/ocfs2/suballoc.c
++++ b/fs/ocfs2/suballoc.c
+@@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
+ {
+ struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+ struct journal_head *jh;
+- int ret = 1;
++ int ret;
+
+ if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
+ return 0;
+
+- if (!buffer_jbd(bg_bh))
++ jh = jbd2_journal_grab_journal_head(bg_bh);
++ if (!jh)
+ return 1;
+
+- jbd_lock_bh_journal_head(bg_bh);
+- if (buffer_jbd(bg_bh)) {
+- jh = bh2jh(bg_bh);
+- spin_lock(&jh->b_state_lock);
+- bg = (struct ocfs2_group_desc *) jh->b_committed_data;
+- if (bg)
+- ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+- else
+- ret = 1;
+- spin_unlock(&jh->b_state_lock);
+- }
+- jbd_unlock_bh_journal_head(bg_bh);
++ spin_lock(&jh->b_state_lock);
++ bg = (struct ocfs2_group_desc *) jh->b_committed_data;
++ if (bg)
++ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
++ else
++ ret = 1;
++ spin_unlock(&jh->b_state_lock);
++ jbd2_journal_put_journal_head(jh);
+
+ return ret;
+ }
+diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
+index 5c914ce9b3ac9..1904f782bd41a 100644
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -173,7 +173,6 @@ enum {
+ Opt_dir_resv_level,
+ Opt_journal_async_commit,
+ Opt_err_cont,
+- Opt_nocluster,
+ Opt_err,
+ };
+
+@@ -207,7 +206,6 @@ static const match_table_t tokens = {
+ {Opt_dir_resv_level, "dir_resv_level=%u"},
+ {Opt_journal_async_commit, "journal_async_commit"},
+ {Opt_err_cont, "errors=continue"},
+- {Opt_nocluster, "nocluster"},
+ {Opt_err, NULL}
+ };
+
+@@ -619,13 +617,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
+ goto out;
+ }
+
+- tmp = OCFS2_MOUNT_NOCLUSTER;
+- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+- ret = -EINVAL;
+- mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
+- goto out;
+- }
+-
+ tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE;
+ if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+@@ -866,7 +857,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
+ }
+
+ if (ocfs2_userspace_stack(osb) &&
+- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+ strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
+ OCFS2_STACK_LABEL_LEN)) {
+ mlog(ML_ERROR,
+@@ -963,8 +953,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
+ for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
+ if (!sb_has_quota_loaded(sb, type))
+ continue;
+- oinfo = sb_dqinfo(sb, type)->dqi_priv;
+- cancel_delayed_work_sync(&oinfo->dqi_sync_work);
++ if (!sb_has_quota_suspended(sb, type)) {
++ oinfo = sb_dqinfo(sb, type)->dqi_priv;
++ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
++ }
+ inode = igrab(sb->s_dquot.files[type]);
+ /* Turn off quotas. This will remove all dquot structures from
+ * memory and so they will be automatically synced to global
+@@ -990,28 +982,27 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+
+ if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
+ status = -EINVAL;
+- goto read_super_error;
++ goto out;
+ }
+
+ /* probe for superblock */
+ status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
+ if (status < 0) {
+ mlog(ML_ERROR, "superblock probe failed!\n");
+- goto read_super_error;
++ goto out;
+ }
+
+ status = ocfs2_initialize_super(sb, bh, sector_size, &stats);
+- osb = OCFS2_SB(sb);
+- if (status < 0) {
+- mlog_errno(status);
+- goto read_super_error;
+- }
+ brelse(bh);
+ bh = NULL;
++ if (status < 0)
++ goto out;
++
++ osb = OCFS2_SB(sb);
+
+ if (!ocfs2_check_set_options(sb, &parsed_options)) {
+ status = -EINVAL;
+- goto read_super_error;
++ goto out_super;
+ }
+ osb->s_mount_opt = parsed_options.mount_opt;
+ osb->s_atime_quantum = parsed_options.atime_quantum;
+@@ -1028,7 +1019,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+
+ status = ocfs2_verify_userspace_stack(osb, &parsed_options);
+ if (status)
+- goto read_super_error;
++ goto out_super;
+
+ sb->s_magic = OCFS2_SUPER_MAGIC;
+
+@@ -1042,7 +1033,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+ status = -EACCES;
+ mlog(ML_ERROR, "Readonly device detected but readonly "
+ "mount was not specified.\n");
+- goto read_super_error;
++ goto out_super;
+ }
+
+ /* You should not be able to start a local heartbeat
+@@ -1051,7 +1042,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+ status = -EROFS;
+ mlog(ML_ERROR, "Local heartbeat specified on readonly "
+ "device.\n");
+- goto read_super_error;
++ goto out_super;
+ }
+
+ status = ocfs2_check_journals_nolocks(osb);
+@@ -1060,9 +1051,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+ mlog(ML_ERROR, "Recovery required on readonly "
+ "file system, but write access is "
+ "unavailable.\n");
+- else
+- mlog_errno(status);
+- goto read_super_error;
++ goto out_super;
+ }
+
+ ocfs2_set_ro_flag(osb, 1);
+@@ -1078,10 +1067,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+ }
+
+ status = ocfs2_verify_heartbeat(osb);
+- if (status < 0) {
+- mlog_errno(status);
+- goto read_super_error;
+- }
++ if (status < 0)
++ goto out_super;
+
+ osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
+ ocfs2_debugfs_root);
+@@ -1095,34 +1082,22 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+
+ status = ocfs2_mount_volume(sb);
+ if (status < 0)
+- goto read_super_error;
++ goto out_debugfs;
+
+ if (osb->root_inode)
+ inode = igrab(osb->root_inode);
+
+ if (!inode) {
+ status = -EIO;
+- mlog_errno(status);
+- goto read_super_error;
++ goto out_dismount;
+ }
+
+- root = d_make_root(inode);
+- if (!root) {
+- status = -ENOMEM;
+- mlog_errno(status);
+- goto read_super_error;
+- }
+-
+- sb->s_root = root;
+-
+- ocfs2_complete_mount_recovery(osb);
+-
+ osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
+ &ocfs2_kset->kobj);
+ if (!osb->osb_dev_kset) {
+ status = -ENOMEM;
+ mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id);
+- goto read_super_error;
++ goto out_dismount;
+ }
+
+ /* Create filecheck sysfs related directories/files at
+@@ -1131,9 +1106,19 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+ status = -ENOMEM;
+ mlog(ML_ERROR, "Unable to create filecheck sysfs directory at "
+ "/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id);
+- goto read_super_error;
++ goto out_dismount;
++ }
++
++ root = d_make_root(inode);
++ if (!root) {
++ status = -ENOMEM;
++ goto out_dismount;
+ }
+
++ sb->s_root = root;
++
++ ocfs2_complete_mount_recovery(osb);
++
+ if (ocfs2_mount_local(osb))
+ snprintf(nodestr, sizeof(nodestr), "local");
+ else
+@@ -1145,11 +1130,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+ osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
+ "ordered");
+
+- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
+- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
+- "without cluster aware mode.\n", osb->dev_str);
+-
+ atomic_set(&osb->vol_state, VOLUME_MOUNTED);
+ wake_up(&osb->osb_mount_event);
+
+@@ -1179,17 +1159,22 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+
+ return status;
+
+-read_super_error:
+- brelse(bh);
+-
+- if (status)
+- mlog_errno(status);
++out_dismount:
++ atomic_set(&osb->vol_state, VOLUME_DISABLED);
++ wake_up(&osb->osb_mount_event);
++ ocfs2_free_replay_slots(osb);
++ ocfs2_dismount_volume(sb, 1);
++ goto out;
+
+- if (osb) {
+- atomic_set(&osb->vol_state, VOLUME_DISABLED);
+- wake_up(&osb->osb_mount_event);
+- ocfs2_dismount_volume(sb, 1);
+- }
++out_debugfs:
++ debugfs_remove_recursive(osb->osb_debug_root);
++out_super:
++ ocfs2_release_system_inodes(osb);
++ kfree(osb->recovery_map);
++ ocfs2_delete_osb(osb);
++ kfree(osb);
++out:
++ mlog_errno(status);
+
+ return status;
+ }
+@@ -1456,9 +1441,6 @@ static int ocfs2_parse_options(struct super_block *sb,
+ case Opt_journal_async_commit:
+ mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+ break;
+- case Opt_nocluster:
+- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
+- break;
+ default:
+ mlog(ML_ERROR,
+ "Unrecognized mount option \"%s\" "
+@@ -1570,9 +1552,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
+ if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
+ seq_printf(s, ",journal_async_commit");
+
+- if (opts & OCFS2_MOUNT_NOCLUSTER)
+- seq_printf(s, ",nocluster");
+-
+ return 0;
+ }
+
+@@ -1804,11 +1783,10 @@ static int ocfs2_get_sector(struct super_block *sb,
+ static int ocfs2_mount_volume(struct super_block *sb)
+ {
+ int status = 0;
+- int unlock_super = 0;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+
+ if (ocfs2_is_hard_readonly(osb))
+- goto leave;
++ goto out;
+
+ mutex_init(&osb->obs_trim_fs_mutex);
+
+@@ -1818,44 +1796,58 @@ static int ocfs2_mount_volume(struct super_block *sb)
+ if (status == -EBADR && ocfs2_userspace_stack(osb))
+ mlog(ML_ERROR, "couldn't mount because cluster name on"
+ " disk does not match the running cluster name.\n");
+- goto leave;
++ goto out;
+ }
+
+ status = ocfs2_super_lock(osb, 1);
+ if (status < 0) {
+ mlog_errno(status);
+- goto leave;
++ goto out_dlm;
+ }
+- unlock_super = 1;
+
+ /* This will load up the node map and add ourselves to it. */
+ status = ocfs2_find_slot(osb);
+ if (status < 0) {
+ mlog_errno(status);
+- goto leave;
++ goto out_super_lock;
+ }
+
+ /* load all node-local system inodes */
+ status = ocfs2_init_local_system_inodes(osb);
+ if (status < 0) {
+ mlog_errno(status);
+- goto leave;
++ goto out_super_lock;
+ }
+
+ status = ocfs2_check_volume(osb);
+ if (status < 0) {
+ mlog_errno(status);
+- goto leave;
++ goto out_system_inodes;
+ }
+
+ status = ocfs2_truncate_log_init(osb);
+- if (status < 0)
++ if (status < 0) {
+ mlog_errno(status);
++ goto out_check_volume;
++ }
+
+-leave:
+- if (unlock_super)
+- ocfs2_super_unlock(osb, 1);
++ ocfs2_super_unlock(osb, 1);
++ return 0;
+
++out_check_volume:
++ ocfs2_free_replay_slots(osb);
++out_system_inodes:
++ if (osb->local_alloc_state == OCFS2_LA_ENABLED)
++ ocfs2_shutdown_local_alloc(osb);
++ ocfs2_release_system_inodes(osb);
++ /* before journal shutdown, we should release slot_info */
++ ocfs2_free_slot_info(osb);
++ ocfs2_journal_shutdown(osb);
++out_super_lock:
++ ocfs2_super_unlock(osb, 1);
++out_dlm:
++ ocfs2_dlm_shutdown(osb, 0);
++out:
+ return status;
+ }
+
+@@ -1928,8 +1920,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
+ !ocfs2_is_hard_readonly(osb))
+ hangup_needed = 1;
+
+- if (osb->cconn)
+- ocfs2_dlm_shutdown(osb, hangup_needed);
++ ocfs2_dlm_shutdown(osb, hangup_needed);
+
+ ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
+ debugfs_remove_recursive(osb->osb_debug_root);
+diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
+index dd784eb0cd7c4..c101a71a52ae8 100644
+--- a/fs/ocfs2/xattr.c
++++ b/fs/ocfs2/xattr.c
+@@ -7259,9 +7259,21 @@ static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
+ static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *fs_info)
+ {
++ struct ocfs2_security_xattr_info *si = fs_info;
+ const struct xattr *xattr;
+ int err = 0;
+
++ if (si) {
++ si->value = kmemdup(xattr_array->value, xattr_array->value_len,
++ GFP_KERNEL);
++ if (!si->value)
++ return -ENOMEM;
++
++ si->name = xattr_array->name;
++ si->value_len = xattr_array->value_len;
++ return 0;
++ }
++
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
+ xattr->name, xattr->value,
+@@ -7277,13 +7289,23 @@ int ocfs2_init_security_get(struct inode *inode,
+ const struct qstr *qstr,
+ struct ocfs2_security_xattr_info *si)
+ {
++ int ret;
++
+ /* check whether ocfs2 support feature xattr */
+ if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
+ return -EOPNOTSUPP;
+- if (si)
+- return security_old_inode_init_security(inode, dir, qstr,
+- &si->name, &si->value,
+- &si->value_len);
++ if (si) {
++ ret = security_inode_init_security(inode, dir, qstr,
++ &ocfs2_initxattrs, si);
++ /*
++ * security_inode_init_security() does not return -EOPNOTSUPP,
++ * we have to check the xattr ourselves.
++ */
++ if (!ret && !si->name)
++ si->enable = 0;
++
++ return ret;
++ }
+
+ return security_inode_init_security(inode, dir, qstr,
+ &ocfs2_initxattrs, NULL);
+diff --git a/fs/open.c b/fs/open.c
+index daa324606a41f..159a2765b7eb2 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -32,6 +32,7 @@
+ #include <linux/ima.h>
+ #include <linux/dnotify.h>
+ #include <linux/compat.h>
++#include <linux/mnt_idmapping.h>
+
+ #include "internal.h"
+
+@@ -53,7 +54,7 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
+ }
+
+ /* Remove suid, sgid, and file capabilities on truncate too */
+- ret = dentry_needs_remove_privs(dentry);
++ ret = dentry_needs_remove_privs(mnt_userns, dentry);
+ if (ret < 0)
+ return ret;
+ if (ret)
+@@ -640,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
+
+ int chown_common(const struct path *path, uid_t user, gid_t group)
+ {
+- struct user_namespace *mnt_userns;
++ struct user_namespace *mnt_userns, *fs_userns;
+ struct inode *inode = path->dentry->d_inode;
+ struct inode *delegated_inode = NULL;
+ int error;
+@@ -652,8 +653,9 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
+ gid = make_kgid(current_user_ns(), group);
+
+ mnt_userns = mnt_user_ns(path->mnt);
+- uid = kuid_from_mnt(mnt_userns, uid);
+- gid = kgid_from_mnt(mnt_userns, gid);
++ fs_userns = i_user_ns(inode);
++ uid = mapped_kuid_user(mnt_userns, fs_userns, uid);
++ gid = mapped_kgid_user(mnt_userns, fs_userns, gid);
+
+ retry_deleg:
+ newattrs.ia_valid = ATTR_CTIME;
+@@ -669,10 +671,10 @@ retry_deleg:
+ newattrs.ia_valid |= ATTR_GID;
+ newattrs.ia_gid = gid;
+ }
+- if (!S_ISDIR(inode->i_mode))
+- newattrs.ia_valid |=
+- ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+ inode_lock(inode);
++ if (!S_ISDIR(inode->i_mode))
++ newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
++ setattr_should_drop_sgid(mnt_userns, inode);
+ error = security_path_chown(path, uid, gid);
+ if (!error)
+ error = notify_change(mnt_userns, path->dentry, &newattrs,
+@@ -784,7 +786,9 @@ static int do_dentry_open(struct file *f,
+ return 0;
+ }
+
+- if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
++ if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
++ i_readcount_inc(inode);
++ } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
+ error = get_write_access(inode);
+ if (unlikely(error))
+ goto cleanup_file;
+@@ -824,8 +828,6 @@ static int do_dentry_open(struct file *f,
+ goto cleanup_all;
+ }
+ f->f_mode |= FMODE_OPENED;
+- if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+- i_readcount_inc(inode);
+ if ((f->f_mode & FMODE_READ) &&
+ likely(f->f_op->read || f->f_op->read_iter))
+ f->f_mode |= FMODE_CAN_READ;
+@@ -856,8 +858,20 @@ static int do_dentry_open(struct file *f,
+ * of THPs into the page cache will fail.
+ */
+ smp_mb();
+- if (filemap_nr_thps(inode->i_mapping))
+- truncate_pagecache(inode, 0);
++ if (filemap_nr_thps(inode->i_mapping)) {
++ struct address_space *mapping = inode->i_mapping;
++
++ filemap_invalidate_lock(inode->i_mapping);
++ /*
++ * unmap_mapping_range just need to be called once
++ * here, because the private pages is not need to be
++ * unmapped mapping (e.g. data segment of dynamic
++ * shared libraries here).
++ */
++ unmap_mapping_range(mapping, 0, 0, 0);
++ truncate_inode_pages(mapping, 0);
++ filemap_invalidate_unlock(inode->i_mapping);
++ }
+ }
+
+ return 0;
+@@ -866,10 +880,7 @@ cleanup_all:
+ if (WARN_ON_ONCE(error > 0))
+ error = -EINVAL;
+ fops_put(f->f_op);
+- if (f->f_mode & FMODE_WRITER) {
+- put_write_access(inode);
+- __mnt_drop_write(f->f_path.mnt);
+- }
++ put_file_access(f);
+ cleanup_file:
+ path_put(&f->f_path);
+ f->f_path.mnt = NULL;
+@@ -1115,7 +1126,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
+ lookup_flags |= LOOKUP_IN_ROOT;
+ if (how->resolve & RESOLVE_CACHED) {
+ /* Don't bother even trying for create/truncate/tmpfile open */
+- if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
++ if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
+ return -EAGAIN;
+ lookup_flags |= LOOKUP_CACHED;
+ }
+diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
+index fe484cf93e5cd..8bbe9486e3a62 100644
+--- a/fs/orangefs/dcache.c
++++ b/fs/orangefs/dcache.c
+@@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
+- if (!new_op)
++ if (!new_op) {
++ ret = -ENOMEM;
+ goto out_put_parent;
++ }
+
+ new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
+ new_op->upcall.req.lookup.parent_refn = parent->refn;
+diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
+index 538e839590ef5..b501dc07f9222 100644
+--- a/fs/orangefs/orangefs-bufmap.c
++++ b/fs/orangefs/orangefs-bufmap.c
+@@ -176,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
+ {
+ kfree(bufmap->page_array);
+ kfree(bufmap->desc_array);
+- kfree(bufmap->buffer_index_array);
++ bitmap_free(bufmap->buffer_index_array);
+ kfree(bufmap);
+ }
+
+@@ -226,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
+ bufmap->desc_size = user_desc->size;
+ bufmap->desc_shift = ilog2(bufmap->desc_size);
+
+- bufmap->buffer_index_array =
+- kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL);
++ bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL);
+ if (!bufmap->buffer_index_array)
+ goto out_free_bufmap;
+
+@@ -250,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
+ out_free_desc_array:
+ kfree(bufmap->desc_array);
+ out_free_index_array:
+- kfree(bufmap->buffer_index_array);
++ bitmap_free(bufmap->buffer_index_array);
+ out_free_bufmap:
+ kfree(bufmap);
+ out:
+diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
+index 29eaa45443727..1b508f5433846 100644
+--- a/fs/orangefs/orangefs-debugfs.c
++++ b/fs/orangefs/orangefs-debugfs.c
+@@ -194,15 +194,10 @@ void orangefs_debugfs_init(int debug_mask)
+ */
+ static void orangefs_kernel_debug_init(void)
+ {
+- int rc = -ENOMEM;
+- char *k_buffer = NULL;
++ static char k_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { };
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
+
+- k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+- if (!k_buffer)
+- goto out;
+-
+ if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
+ strcpy(k_buffer, kernel_debug_string);
+ strcat(k_buffer, "\n");
+@@ -213,15 +208,14 @@ static void orangefs_kernel_debug_init(void)
+
+ debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer,
+ &kernel_debug_fops);
+-
+-out:
+- gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+ }
+
+
+ void orangefs_debugfs_cleanup(void)
+ {
+ debugfs_remove_recursive(debug_dir);
++ kfree(debug_help_string);
++ debug_help_string = NULL;
+ }
+
+ /* open ORANGEFS_KMOD_DEBUG_HELP_FILE */
+@@ -297,18 +291,13 @@ static int help_show(struct seq_file *m, void *v)
+ /*
+ * initialize the client-debug file.
+ */
+-static int orangefs_client_debug_init(void)
++static void orangefs_client_debug_init(void)
+ {
+
+- int rc = -ENOMEM;
+- char *c_buffer = NULL;
++ static char c_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { };
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
+
+- c_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+- if (!c_buffer)
+- goto out;
+-
+ if (strlen(client_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
+ strcpy(c_buffer, client_debug_string);
+ strcat(c_buffer, "\n");
+@@ -322,13 +311,6 @@ static int orangefs_client_debug_init(void)
+ debug_dir,
+ c_buffer,
+ &kernel_debug_fops);
+-
+- rc = 0;
+-
+-out:
+-
+- gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+- return rc;
+ }
+
+ /* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
+@@ -671,6 +653,7 @@ int orangefs_prepare_debugfs_help_string(int at_boot)
+ memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE);
+ strlcat(debug_help_string, new, string_size);
+ mutex_unlock(&orangefs_help_file_lock);
++ kfree(new);
+ }
+
+ rc = 0;
+diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
+index cd7297815f91e..5ab741c60b7e2 100644
+--- a/fs/orangefs/orangefs-mod.c
++++ b/fs/orangefs/orangefs-mod.c
+@@ -141,7 +141,7 @@ static int __init orangefs_init(void)
+ gossip_err("%s: could not initialize device subsystem %d!\n",
+ __func__,
+ ret);
+- goto cleanup_device;
++ goto cleanup_sysfs;
+ }
+
+ ret = register_filesystem(&orangefs_fs_type);
+@@ -152,11 +152,11 @@ static int __init orangefs_init(void)
+ goto out;
+ }
+
+- orangefs_sysfs_exit();
+-
+-cleanup_device:
+ orangefs_dev_cleanup();
+
++cleanup_sysfs:
++ orangefs_sysfs_exit();
++
+ sysfs_init_failed:
+ orangefs_debugfs_cleanup();
+
+diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
+index 4e7d5bfa2949f..46cc429c44f7e 100644
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -140,12 +140,14 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old,
+ int err;
+
+ err = ovl_real_fileattr_get(old, &oldfa);
+- if (err)
+- return err;
+-
+- err = ovl_real_fileattr_get(new, &newfa);
+- if (err)
++ if (err) {
++ /* Ntfs-3g returns -EINVAL for "no fileattr support" */
++ if (err == -ENOTTY || err == -EINVAL)
++ return 0;
++ pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n",
++ old->dentry, err);
+ return err;
++ }
+
+ /*
+ * We cannot set immutable and append-only flags on upper inode,
+@@ -155,10 +157,31 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old,
+ */
+ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+ err = ovl_set_protattr(inode, new->dentry, &oldfa);
+- if (err)
++ if (err == -EPERM)
++ pr_warn_once("copying fileattr: no xattr on upper\n");
++ else if (err)
+ return err;
+ }
+
++ /* Don't bother copying flags if none are set */
++ if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK))
++ return 0;
++
++ err = ovl_real_fileattr_get(new, &newfa);
++ if (err) {
++ /*
++ * Returning an error if upper doesn't support fileattr will
++ * result in a regression, so revert to the old behavior.
++ */
++ if (err == -ENOTTY || err == -EINVAL) {
++ pr_warn_once("copying fileattr: no support on upper\n");
++ return 0;
++ }
++ pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
++ new->dentry, err);
++ return err;
++ }
++
+ BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL);
+ newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK;
+ newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK);
+@@ -519,6 +542,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
+ /* Restore timestamps on parent (best effort) */
+ ovl_set_timestamps(upperdir, &c->pstat);
+ ovl_dentry_set_upper_alias(c->dentry);
++ ovl_dentry_update_reval(c->dentry, upper);
+ }
+ }
+ inode_unlock(udir);
+@@ -817,6 +841,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+ inode_unlock(udir);
+
+ ovl_dentry_set_upper_alias(c->dentry);
++ ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
+ }
+
+ out:
+@@ -937,6 +962,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+ if (err)
+ return err;
+
++ if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) ||
++ !kgid_has_mapping(current_user_ns(), ctx.stat.gid))
++ return -EOVERFLOW;
++
+ ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
+
+ if (parent) {
+diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
+index 93c7c267de934..519193ce7d575 100644
+--- a/fs/overlayfs/dir.c
++++ b/fs/overlayfs/dir.c
+@@ -137,8 +137,7 @@ kill_whiteout:
+ goto out;
+ }
+
+-static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
+- umode_t mode)
++int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode)
+ {
+ int err;
+ struct dentry *d, *dentry = *newdentry;
+@@ -268,8 +267,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
+
+ ovl_dir_modified(dentry->d_parent, false);
+ ovl_dentry_set_upper_alias(dentry);
+- ovl_dentry_update_reval(dentry, newdentry,
+- DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
++ ovl_dentry_init_reval(dentry, newdentry);
+
+ if (!hardlink) {
+ /*
+@@ -590,28 +588,42 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
+ goto out_revert_creds;
+ }
+
+- err = -ENOMEM;
+- override_cred = prepare_creds();
+- if (override_cred) {
++ if (!attr->hardlink) {
++ err = -ENOMEM;
++ override_cred = prepare_creds();
++ if (!override_cred)
++ goto out_revert_creds;
++ /*
++ * In the creation cases(create, mkdir, mknod, symlink),
++ * ovl should transfer current's fs{u,g}id to underlying
++ * fs. Because underlying fs want to initialize its new
++ * inode owner using current's fs{u,g}id. And in this
++ * case, the @inode is a new inode that is initialized
++ * in inode_init_owner() to current's fs{u,g}id. So use
++ * the inode's i_{u,g}id to override the cred's fs{u,g}id.
++ *
++ * But in the other hardlink case, ovl_link() does not
++ * create a new inode, so just use the ovl mounter's
++ * fs{u,g}id.
++ */
+ override_cred->fsuid = inode->i_uid;
+ override_cred->fsgid = inode->i_gid;
+- if (!attr->hardlink) {
+- err = security_dentry_create_files_as(dentry,
+- attr->mode, &dentry->d_name, old_cred,
+- override_cred);
+- if (err) {
+- put_cred(override_cred);
+- goto out_revert_creds;
+- }
++ err = security_dentry_create_files_as(dentry,
++ attr->mode, &dentry->d_name, old_cred,
++ override_cred);
++ if (err) {
++ put_cred(override_cred);
++ goto out_revert_creds;
+ }
+ put_cred(override_creds(override_cred));
+ put_cred(override_cred);
+-
+- if (!ovl_dentry_is_whiteout(dentry))
+- err = ovl_create_upper(dentry, inode, attr);
+- else
+- err = ovl_create_over_whiteout(dentry, inode, attr);
+ }
++
++ if (!ovl_dentry_is_whiteout(dentry))
++ err = ovl_create_upper(dentry, inode, attr);
++ else
++ err = ovl_create_over_whiteout(dentry, inode, attr);
++
+ out_revert_creds:
+ revert_creds(old_cred);
+ return err;
+@@ -881,7 +893,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
+ {
+ int err;
+ const struct cred *old_cred;
+- struct dentry *upperdentry;
+ bool lower_positive = ovl_lower_positive(dentry);
+ LIST_HEAD(list);
+
+@@ -924,9 +935,8 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
+ * Note: we fail to update ctime if there was no copy-up, only a
+ * whiteout
+ */
+- upperdentry = ovl_dentry_upper(dentry);
+- if (upperdentry)
+- ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
++ if (ovl_dentry_upper(dentry))
++ ovl_copyattr(d_inode(dentry));
+
+ out_drop_write:
+ ovl_drop_write(dentry);
+@@ -1273,9 +1283,9 @@ static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
+ (d_inode(new) && ovl_type_origin(new)));
+
+ /* copy ctime: */
+- ovl_copyattr(d_inode(olddentry), d_inode(old));
++ ovl_copyattr(d_inode(old));
+ if (d_inode(new) && ovl_dentry_upper(new))
+- ovl_copyattr(d_inode(newdentry), d_inode(new));
++ ovl_copyattr(d_inode(new));
+
+ out_dput:
+ dput(newdentry);
+diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
+index ebde05c9cf62e..baa50ece0bc53 100644
+--- a/fs/overlayfs/export.c
++++ b/fs/overlayfs/export.c
+@@ -259,7 +259,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
+ return FILEID_INVALID;
+
+ dentry = d_find_any_alias(inode);
+- if (WARN_ON(!dentry))
++ if (!dentry)
+ return FILEID_INVALID;
+
+ bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
+@@ -326,8 +326,7 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
+ if (upper_alias)
+ ovl_dentry_set_upper_alias(dentry);
+
+- ovl_dentry_update_reval(dentry, upper,
+- DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
++ ovl_dentry_init_reval(dentry, upper);
+
+ return d_instantiate_anon(dentry, inode);
+
+@@ -791,7 +790,7 @@ static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
+ return ERR_PTR(-ENOMEM);
+
+ /* Copy unaligned inner fh into aligned buffer */
+- memcpy(&fh->fb, fid, buflen - OVL_FH_WIRE_OFFSET);
++ memcpy(fh->buf, fid, buflen - OVL_FH_WIRE_OFFSET);
+ return fh;
+ }
+
+diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
+index c88ac571593dc..28cb05ef018c7 100644
+--- a/fs/overlayfs/file.c
++++ b/fs/overlayfs/file.c
+@@ -17,6 +17,7 @@
+
+ struct ovl_aio_req {
+ struct kiocb iocb;
++ refcount_t ref;
+ struct kiocb *orig_iocb;
+ struct fd fd;
+ };
+@@ -252,6 +253,14 @@ static rwf_t ovl_iocb_to_rwf(int ifl)
+ return flags;
+ }
+
++static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
++{
++ if (refcount_dec_and_test(&aio_req->ref)) {
++ fdput(aio_req->fd);
++ kmem_cache_free(ovl_aio_request_cachep, aio_req);
++ }
++}
++
+ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+ {
+ struct kiocb *iocb = &aio_req->iocb;
+@@ -264,12 +273,11 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
+ file_end_write(iocb->ki_filp);
+- ovl_copyattr(ovl_inode_real(inode), inode);
++ ovl_copyattr(inode);
+ }
+
+ orig_iocb->ki_pos = iocb->ki_pos;
+- fdput(aio_req->fd);
+- kmem_cache_free(ovl_aio_request_cachep, aio_req);
++ ovl_aio_put(aio_req);
+ }
+
+ static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+@@ -319,7 +327,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
++ refcount_set(&aio_req->ref, 2);
+ ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
++ ovl_aio_put(aio_req);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+@@ -346,7 +356,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+
+ inode_lock(inode);
+ /* Update mode */
+- ovl_copyattr(ovl_inode_real(inode), inode);
++ ovl_copyattr(inode);
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out_unlock;
+@@ -371,7 +381,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+ ovl_iocb_to_rwf(ifl));
+ file_end_write(real.file);
+ /* Update size */
+- ovl_copyattr(ovl_inode_real(inode), inode);
++ ovl_copyattr(inode);
+ } else {
+ struct ovl_aio_req *aio_req;
+
+@@ -390,7 +400,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_flags = ifl;
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
++ refcount_set(&aio_req->ref, 2);
+ ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
++ ovl_aio_put(aio_req);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+@@ -419,12 +431,11 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ struct fd real;
+ const struct cred *old_cred;
+ struct inode *inode = file_inode(out);
+- struct inode *realinode = ovl_inode_real(inode);
+ ssize_t ret;
+
+ inode_lock(inode);
+ /* Update mode */
+- ovl_copyattr(realinode, inode);
++ ovl_copyattr(inode);
+ ret = file_remove_privs(out);
+ if (ret)
+ goto out_unlock;
+@@ -440,7 +451,7 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+
+ file_end_write(real.file);
+ /* Update size */
+- ovl_copyattr(realinode, inode);
++ ovl_copyattr(inode);
+ revert_creds(old_cred);
+ fdput(real);
+
+@@ -505,19 +516,29 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
+ const struct cred *old_cred;
+ int ret;
+
++ inode_lock(inode);
++ /* Update mode */
++ ovl_copyattr(inode);
++ ret = file_remove_privs(file);
++ if (ret)
++ goto out_unlock;
++
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+- return ret;
++ goto out_unlock;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_fallocate(real.file, mode, offset, len);
+ revert_creds(old_cred);
+
+ /* Update size */
+- ovl_copyattr(ovl_inode_real(inode), inode);
++ ovl_copyattr(inode);
+
+ fdput(real);
+
++out_unlock:
++ inode_unlock(inode);
++
+ return ret;
+ }
+
+@@ -555,14 +576,23 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+ const struct cred *old_cred;
+ loff_t ret;
+
++ inode_lock(inode_out);
++ if (op != OVL_DEDUPE) {
++ /* Update mode */
++ ovl_copyattr(inode_out);
++ ret = file_remove_privs(file_out);
++ if (ret)
++ goto out_unlock;
++ }
++
+ ret = ovl_real_fdget(file_out, &real_out);
+ if (ret)
+- return ret;
++ goto out_unlock;
+
+ ret = ovl_real_fdget(file_in, &real_in);
+ if (ret) {
+ fdput(real_out);
+- return ret;
++ goto out_unlock;
+ }
+
+ old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
+@@ -586,11 +616,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+ revert_creds(old_cred);
+
+ /* Update size */
+- ovl_copyattr(ovl_inode_real(inode_out), inode_out);
++ ovl_copyattr(inode_out);
+
+ fdput(real_in);
+ fdput(real_out);
+
++out_unlock:
++ inode_unlock(inode_out);
++
+ return ret;
+ }
+
+diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
+index 832b17589733a..65e5e6eb761a9 100644
+--- a/fs/overlayfs/inode.c
++++ b/fs/overlayfs/inode.c
+@@ -80,7 +80,7 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ err = notify_change(&init_user_ns, upperdentry, attr, NULL);
+ revert_creds(old_cred);
+ if (!err)
+- ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
++ ovl_copyattr(dentry->d_inode);
+ inode_unlock(upperdentry->d_inode);
+
+ if (winode)
+@@ -377,7 +377,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+ revert_creds(old_cred);
+
+ /* copy c/mtime */
+- ovl_copyattr(d_inode(realdentry), inode);
++ ovl_copyattr(inode);
+
+ out_drop_write:
+ ovl_drop_write(dentry);
+@@ -453,7 +453,15 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
+ const struct cred *old_cred;
+ struct posix_acl *acl;
+
+- if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
++ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
++ return NULL;
++
++ if (!realinode) {
++ WARN_ON(!rcu);
++ return ERR_PTR(-ECHILD);
++ }
++
++ if (!IS_POSIXACL(realinode))
+ return NULL;
+
+ if (rcu)
+@@ -579,7 +587,7 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns,
+ inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK);
+
+ /* Update ctime */
+- ovl_copyattr(ovl_inode_real(inode), inode);
++ ovl_copyattr(inode);
+ }
+ ovl_drop_write(dentry);
+ out:
+@@ -610,7 +618,10 @@ int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa)
+ if (err)
+ return err;
+
+- return vfs_fileattr_get(realpath->dentry, fa);
++ err = vfs_fileattr_get(realpath->dentry, fa);
++ if (err == -ENOIOCTLCMD)
++ err = -ENOTTY;
++ return err;
+ }
+
+ int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+@@ -774,16 +785,19 @@ void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip,
+ unsigned long ino, int fsid)
+ {
+ struct inode *realinode;
++ struct ovl_inode *oi = OVL_I(inode);
+
+ if (oip->upperdentry)
+- OVL_I(inode)->__upperdentry = oip->upperdentry;
+- if (oip->lowerpath && oip->lowerpath->dentry)
+- OVL_I(inode)->lower = igrab(d_inode(oip->lowerpath->dentry));
++ oi->__upperdentry = oip->upperdentry;
++ if (oip->lowerpath && oip->lowerpath->dentry) {
++ oi->lowerpath.dentry = dget(oip->lowerpath->dentry);
++ oi->lowerpath.layer = oip->lowerpath->layer;
++ }
+ if (oip->lowerdata)
+- OVL_I(inode)->lowerdata = igrab(d_inode(oip->lowerdata));
++ oi->lowerdata = igrab(d_inode(oip->lowerdata));
+
+ realinode = ovl_inode_real(inode);
+- ovl_copyattr(realinode, inode);
++ ovl_copyattr(inode);
+ ovl_copyflags(realinode, inode);
+ ovl_map_ino(inode, ino, fsid);
+ }
+diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
+index 1a9b515fc45d4..9c055d11a95de 100644
+--- a/fs/overlayfs/namei.c
++++ b/fs/overlayfs/namei.c
+@@ -1103,8 +1103,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+ ovl_set_flag(OVL_UPPERDATA, inode);
+ }
+
+- ovl_dentry_update_reval(dentry, upperdentry,
+- DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
++ ovl_dentry_init_reval(dentry, upperdentry);
+
+ revert_creds(old_cred);
+ if (origin_path) {
+diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
+index 3894f33479552..a96b67586f817 100644
+--- a/fs/overlayfs/overlayfs.h
++++ b/fs/overlayfs/overlayfs.h
+@@ -107,7 +107,7 @@ struct ovl_fh {
+ u8 padding[3]; /* make sure fb.fid is 32bit aligned */
+ union {
+ struct ovl_fb fb;
+- u8 buf[0];
++ DECLARE_FLEX_ARRAY(u8, buf);
+ };
+ } __packed;
+
+@@ -286,17 +286,21 @@ bool ovl_index_all(struct super_block *sb);
+ bool ovl_verify_lower(struct super_block *sb);
+ struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+ bool ovl_dentry_remote(struct dentry *dentry);
+-void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry,
+- unsigned int mask);
++void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry);
++void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry);
++void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
++ unsigned int mask);
+ bool ovl_dentry_weird(struct dentry *dentry);
+ enum ovl_path_type ovl_path_type(struct dentry *dentry);
+ void ovl_path_upper(struct dentry *dentry, struct path *path);
+ void ovl_path_lower(struct dentry *dentry, struct path *path);
+ void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
++void ovl_i_path_real(struct inode *inode, struct path *path);
+ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
+ struct dentry *ovl_dentry_upper(struct dentry *dentry);
+ struct dentry *ovl_dentry_lower(struct dentry *dentry);
+ struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
++const struct ovl_layer *ovl_i_layer_lower(struct inode *inode);
+ const struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
+ struct dentry *ovl_dentry_real(struct dentry *dentry);
+ struct dentry *ovl_i_dentry_upper(struct inode *inode);
+@@ -520,16 +524,7 @@ bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir);
+ struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir);
+ struct inode *ovl_get_inode(struct super_block *sb,
+ struct ovl_inode_params *oip);
+-static inline void ovl_copyattr(struct inode *from, struct inode *to)
+-{
+- to->i_uid = from->i_uid;
+- to->i_gid = from->i_gid;
+- to->i_mode = from->i_mode;
+- to->i_atime = from->i_atime;
+- to->i_mtime = from->i_mtime;
+- to->i_ctime = from->i_ctime;
+- i_size_write(to, i_size_read(from));
+-}
++void ovl_copyattr(struct inode *to);
+
+ /* vfs inode flags copied from real to ovl inode */
+ #define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE)
+@@ -570,6 +565,7 @@ struct ovl_cattr {
+
+ #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
+
++int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode);
+ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
+ struct ovl_cattr *attr);
+ int ovl_cleanup(struct inode *dir, struct dentry *dentry);
+diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
+index 63efee554f69a..08031638bbeec 100644
+--- a/fs/overlayfs/ovl_entry.h
++++ b/fs/overlayfs/ovl_entry.h
+@@ -32,6 +32,7 @@ struct ovl_sb {
+ };
+
+ struct ovl_layer {
++ /* ovl_free_fs() relies on @mnt being the first member! */
+ struct vfsmount *mnt;
+ /* Trap in ovl inode cache */
+ struct inode *trap;
+@@ -42,6 +43,14 @@ struct ovl_layer {
+ int fsid;
+ };
+
++/*
++ * ovl_free_fs() relies on @mnt being the first member when unmounting
++ * the private mounts created for each layer. Let's check both the
++ * offset and type.
++ */
++static_assert(offsetof(struct ovl_layer, mnt) == 0);
++static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *));
++
+ struct ovl_path {
+ const struct ovl_layer *layer;
+ struct dentry *dentry;
+@@ -129,7 +138,7 @@ struct ovl_inode {
+ unsigned long flags;
+ struct inode vfs_inode;
+ struct dentry *__upperdentry;
+- struct inode *lower;
++ struct ovl_path lowerpath;
+
+ /* synchronize copy up and more */
+ struct mutex lock;
+diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
+index 178daa5e82c9d..e18025b5c8872 100644
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -138,11 +138,16 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry,
+ unsigned int flags, bool weak)
+ {
+ struct ovl_entry *oe = dentry->d_fsdata;
++ struct inode *inode = d_inode_rcu(dentry);
+ struct dentry *upper;
+ unsigned int i;
+ int ret = 1;
+
+- upper = ovl_dentry_upper(dentry);
++ /* Careful in RCU mode */
++ if (!inode)
++ return -ECHILD;
++
++ upper = ovl_i_dentry_upper(inode);
+ if (upper)
+ ret = ovl_revalidate_real(upper, flags, weak);
+
+@@ -184,7 +189,8 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
+ oi->version = 0;
+ oi->flags = 0;
+ oi->__upperdentry = NULL;
+- oi->lower = NULL;
++ oi->lowerpath.dentry = NULL;
++ oi->lowerpath.layer = NULL;
+ oi->lowerdata = NULL;
+ mutex_init(&oi->lock);
+
+@@ -205,7 +211,7 @@ static void ovl_destroy_inode(struct inode *inode)
+ struct ovl_inode *oi = OVL_I(inode);
+
+ dput(oi->__upperdentry);
+- iput(oi->lower);
++ dput(oi->lowerpath.dentry);
+ if (S_ISDIR(inode->i_mode))
+ ovl_dir_cache_free(inode);
+ else
+@@ -787,10 +793,14 @@ retry:
+ goto retry;
+ }
+
+- work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
+- err = PTR_ERR(work);
+- if (IS_ERR(work))
+- goto out_err;
++ err = ovl_mkdir_real(dir, &work, attr.ia_mode);
++ if (err)
++ goto out_dput;
++
++ /* Weird filesystem returning with hashed negative (kernfs)? */
++ err = -EINVAL;
++ if (d_really_is_negative(work))
++ goto out_dput;
+
+ /*
+ * Try to remove POSIX ACL xattrs from workdir. We are good if:
+@@ -869,7 +879,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
+ pr_err("filesystem on '%s' not supported\n", name);
+ goto out_put;
+ }
+- if (mnt_user_ns(path->mnt) != &init_user_ns) {
++ if (is_idmapped_mnt(path->mnt)) {
+ pr_err("idmapped layers are currently not supported\n");
+ goto out_put;
+ }
+@@ -1409,11 +1419,12 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
+ */
+ err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
+ if (err) {
++ pr_warn("failed to set xattr on upper\n");
+ ofs->noxattr = true;
+ if (ofs->config.index || ofs->config.metacopy) {
+ ofs->config.index = false;
+ ofs->config.metacopy = false;
+- pr_warn("upper fs does not support xattr, falling back to index=off,metacopy=off.\n");
++ pr_warn("...falling back to index=off,metacopy=off.\n");
+ }
+ /*
+ * xattr support is required for persistent st_ino.
+@@ -1421,8 +1432,10 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
+ */
+ if (ofs->config.xino == OVL_XINO_AUTO) {
+ ofs->config.xino = OVL_XINO_OFF;
+- pr_warn("upper fs does not support xattr, falling back to xino=off.\n");
++ pr_warn("...falling back to xino=off.\n");
+ }
++ if (err == -EPERM && !ofs->config.userxattr)
++ pr_info("try mounting with 'userxattr' option\n");
+ err = 0;
+ } else {
+ ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
+@@ -1952,7 +1965,7 @@ static struct dentry *ovl_get_root(struct super_block *sb,
+ ovl_dentry_set_flag(OVL_E_CONNECTED, root);
+ ovl_set_upperdata(d_inode(root));
+ ovl_inode_init(d_inode(root), &oip, ino, fsid);
+- ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
++ ovl_dentry_init_flags(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
+
+ return root;
+ }
+@@ -2127,7 +2140,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+ ovl_trusted_xattr_handlers;
+ sb->s_fs_info = ofs;
+ sb->s_flags |= SB_POSIXACL;
+- sb->s_iflags |= SB_I_SKIP_SYNC;
++ sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE;
+
+ err = -ENOMEM;
+ root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
+diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
+index f48284a2a8960..d62d5ede60dfd 100644
+--- a/fs/overlayfs/util.c
++++ b/fs/overlayfs/util.c
+@@ -94,14 +94,30 @@ struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
+ return oe;
+ }
+
++#define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE)
++
+ bool ovl_dentry_remote(struct dentry *dentry)
+ {
+- return dentry->d_flags &
+- (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
++ return dentry->d_flags & OVL_D_REVALIDATE;
++}
++
++void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry)
++{
++ if (!ovl_dentry_remote(realdentry))
++ return;
++
++ spin_lock(&dentry->d_lock);
++ dentry->d_flags |= realdentry->d_flags & OVL_D_REVALIDATE;
++ spin_unlock(&dentry->d_lock);
++}
++
++void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry)
++{
++ return ovl_dentry_init_flags(dentry, upperdentry, OVL_D_REVALIDATE);
+ }
+
+-void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry,
+- unsigned int mask)
++void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry,
++ unsigned int mask)
+ {
+ struct ovl_entry *oe = OVL_E(dentry);
+ unsigned int i, flags = 0;
+@@ -236,6 +252,17 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode)
+ return ovl_upperdentry_dereference(OVL_I(inode));
+ }
+
++void ovl_i_path_real(struct inode *inode, struct path *path)
++{
++ path->dentry = ovl_i_dentry_upper(inode);
++ if (!path->dentry) {
++ path->dentry = OVL_I(inode)->lowerpath.dentry;
++ path->mnt = OVL_I(inode)->lowerpath.layer->mnt;
++ } else {
++ path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb));
++ }
++}
++
+ struct inode *ovl_inode_upper(struct inode *inode)
+ {
+ struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+@@ -245,7 +272,9 @@ struct inode *ovl_inode_upper(struct inode *inode)
+
+ struct inode *ovl_inode_lower(struct inode *inode)
+ {
+- return OVL_I(inode)->lower;
++ struct dentry *lowerdentry = OVL_I(inode)->lowerpath.dentry;
++
++ return lowerdentry ? d_inode(lowerdentry) : NULL;
+ }
+
+ struct inode *ovl_inode_real(struct inode *inode)
+@@ -443,7 +472,7 @@ static void ovl_dir_version_inc(struct dentry *dentry, bool impurity)
+ void ovl_dir_modified(struct dentry *dentry, bool impurity)
+ {
+ /* Copy mtime/ctime */
+- ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
++ ovl_copyattr(d_inode(dentry));
+
+ ovl_dir_version_inc(dentry, impurity);
+ }
+@@ -1060,3 +1089,33 @@ int ovl_sync_status(struct ovl_fs *ofs)
+
+ return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
+ }
++
++/*
++ * ovl_copyattr() - copy inode attributes from layer to ovl inode
++ *
++ * When overlay copies inode information from an upper or lower layer to the
++ * relevant overlay inode it will apply the idmapping of the upper or lower
++ * layer when doing so ensuring that the ovl inode ownership will correctly
++ * reflect the ownership of the idmapped upper or lower layer. For example, an
++ * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to
++ * map any lower or upper inode owned by id 1001 to id 1000. These mapping
++ * helpers are nops when the relevant layer isn't idmapped.
++ */
++void ovl_copyattr(struct inode *inode)
++{
++ struct path realpath;
++ struct inode *realinode;
++ struct user_namespace *real_mnt_userns;
++
++ ovl_i_path_real(inode, &realpath);
++ realinode = d_inode(realpath.dentry);
++ real_mnt_userns = mnt_user_ns(realpath.mnt);
++
++ inode->i_uid = i_uid_into_mnt(real_mnt_userns, realinode);
++ inode->i_gid = i_gid_into_mnt(real_mnt_userns, realinode);
++ inode->i_mode = realinode->i_mode;
++ inode->i_atime = realinode->i_atime;
++ inode->i_mtime = realinode->i_mtime;
++ inode->i_ctime = realinode->i_ctime;
++ i_size_write(inode, i_size_read(realinode));
++}
+diff --git a/fs/pipe.c b/fs/pipe.c
+index 6d4342bad9f15..e08f0fe55584b 100644
+--- a/fs/pipe.c
++++ b/fs/pipe.c
+@@ -252,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
+ */
+ was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+ for (;;) {
+- unsigned int head = pipe->head;
++ /* Read ->head with a barrier vs post_one_notification() */
++ unsigned int head = smp_load_acquire(&pipe->head);
+ unsigned int tail = pipe->tail;
+ unsigned int mask = pipe->ring_size - 1;
+
+@@ -651,7 +652,7 @@ pipe_poll(struct file *filp, poll_table *wait)
+ unsigned int head, tail;
+
+ /* Epoll has some historical nasty semantics, this enables them */
+- pipe->poll_usage = 1;
++ WRITE_ONCE(pipe->poll_usage, true);
+
+ /*
+ * Reading pipe state only -- no need for acquiring the semaphore.
+@@ -830,10 +831,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
+ int i;
+
+ #ifdef CONFIG_WATCH_QUEUE
+- if (pipe->watch_queue) {
++ if (pipe->watch_queue)
+ watch_queue_clear(pipe->watch_queue);
+- put_watch_queue(pipe->watch_queue);
+- }
+ #endif
+
+ (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
+@@ -843,6 +842,10 @@ void free_pipe_info(struct pipe_inode_info *pipe)
+ if (buf->ops)
+ pipe_buf_release(pipe, buf);
+ }
++#ifdef CONFIG_WATCH_QUEUE
++ if (pipe->watch_queue)
++ put_watch_queue(pipe->watch_queue);
++#endif
+ if (pipe->tmp_page)
+ __free_page(pipe->tmp_page);
+ kfree(pipe->bufs);
+@@ -1241,30 +1244,33 @@ unsigned int round_pipe_size(unsigned long size)
+
+ /*
+ * Resize the pipe ring to a number of slots.
++ *
++ * Note the pipe can be reduced in capacity, but only if the current
++ * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
++ * returned instead.
+ */
+ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
+ {
+ struct pipe_buffer *bufs;
+ unsigned int head, tail, mask, n;
+
+- /*
+- * We can shrink the pipe, if arg is greater than the ring occupancy.
+- * Since we don't expect a lot of shrink+grow operations, just free and
+- * allocate again like we would do for growing. If the pipe currently
+- * contains more buffers than arg, then return busy.
+- */
+- mask = pipe->ring_size - 1;
+- head = pipe->head;
+- tail = pipe->tail;
+- n = pipe_occupancy(pipe->head, pipe->tail);
+- if (nr_slots < n)
+- return -EBUSY;
+-
+ bufs = kcalloc(nr_slots, sizeof(*bufs),
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ if (unlikely(!bufs))
+ return -ENOMEM;
+
++ spin_lock_irq(&pipe->rd_wait.lock);
++ mask = pipe->ring_size - 1;
++ head = pipe->head;
++ tail = pipe->tail;
++
++ n = pipe_occupancy(head, tail);
++ if (nr_slots < n) {
++ spin_unlock_irq(&pipe->rd_wait.lock);
++ kfree(bufs);
++ return -EBUSY;
++ }
++
+ /*
+ * The pipe array wraps around, so just start the new one at zero
+ * and adjust the indices.
+@@ -1296,6 +1302,8 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
+ pipe->tail = tail;
+ pipe->head = head;
+
++ spin_unlock_irq(&pipe->rd_wait.lock);
++
+ /* This might have made more room for writers */
+ wake_up_interruptible(&pipe->wr_wait);
+ return 0;
+diff --git a/fs/pnode.c b/fs/pnode.c
+index 1106137c747a3..468e4e65a615d 100644
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -244,7 +244,7 @@ static int propagate_one(struct mount *m)
+ }
+ do {
+ struct mount *parent = last_source->mnt_parent;
+- if (last_source == first_source)
++ if (peers(last_source, first_source))
+ break;
+ done = parent->mnt_master == p;
+ if (done && peers(n, parent))
+diff --git a/fs/posix_acl.c b/fs/posix_acl.c
+index f5c25f580dd92..ceb1e3b868577 100644
+--- a/fs/posix_acl.c
++++ b/fs/posix_acl.c
+@@ -23,6 +23,7 @@
+ #include <linux/export.h>
+ #include <linux/user_namespace.h>
+ #include <linux/namei.h>
++#include <linux/mnt_idmapping.h>
+
+ static struct posix_acl **acl_by_type(struct inode *inode, int type)
+ {
+@@ -375,7 +376,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ goto check_perm;
+ break;
+ case ACL_USER:
+- uid = kuid_into_mnt(mnt_userns, pa->e_uid);
++ uid = mapped_kuid_fs(mnt_userns,
++ i_user_ns(inode),
++ pa->e_uid);
+ if (uid_eq(uid, current_fsuid()))
+ goto mask;
+ break;
+@@ -388,7 +391,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ }
+ break;
+ case ACL_GROUP:
+- gid = kgid_into_mnt(mnt_userns, pa->e_gid);
++ gid = mapped_kgid_fs(mnt_userns,
++ i_user_ns(inode),
++ pa->e_gid);
+ if (in_group_p(gid)) {
+ found = 1;
+ if ((pa->e_perm & want) == want)
+@@ -735,17 +740,17 @@ static void posix_acl_fix_xattr_userns(
+ case ACL_USER:
+ uid = make_kuid(from, le32_to_cpu(entry->e_id));
+ if (from_user)
+- uid = kuid_from_mnt(mnt_userns, uid);
++ uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid);
+ else
+- uid = kuid_into_mnt(mnt_userns, uid);
++ uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid);
+ entry->e_id = cpu_to_le32(from_kuid(to, uid));
+ break;
+ case ACL_GROUP:
+ gid = make_kgid(from, le32_to_cpu(entry->e_id));
+ if (from_user)
+- gid = kgid_from_mnt(mnt_userns, gid);
++ gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid);
+ else
+- gid = kgid_into_mnt(mnt_userns, gid);
++ gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid);
+ entry->e_id = cpu_to_le32(from_kgid(to, gid));
+ break;
+ default:
+@@ -755,9 +760,14 @@ static void posix_acl_fix_xattr_userns(
+ }
+
+ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
++ struct inode *inode,
+ void *value, size_t size)
+ {
+ struct user_namespace *user_ns = current_user_ns();
++
++ /* Leave ids untouched on non-idmapped mounts. */
++ if (no_idmapping(mnt_userns, i_user_ns(inode)))
++ mnt_userns = &init_user_ns;
+ if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
+ return;
+ posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value,
+@@ -765,9 +775,14 @@ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+ }
+
+ void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
++ struct inode *inode,
+ void *value, size_t size)
+ {
+ struct user_namespace *user_ns = current_user_ns();
++
++ /* Leave ids untouched on non-idmapped mounts. */
++ if (no_idmapping(mnt_userns, i_user_ns(inode)))
++ mnt_userns = &init_user_ns;
+ if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
+ return;
+ posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value,
+diff --git a/fs/proc/base.c b/fs/proc/base.c
+index 533d5836eb9a4..e5d7a5a75aff9 100644
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -67,6 +67,7 @@
+ #include <linux/mm.h>
+ #include <linux/swap.h>
+ #include <linux/rcupdate.h>
++#include <linux/kallsyms.h>
+ #include <linux/stacktrace.h>
+ #include <linux/resource.h>
+ #include <linux/module.h>
+@@ -386,17 +387,19 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+ {
+ unsigned long wchan;
++ char symname[KSYM_NAME_LEN];
+
+- if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+- wchan = get_wchan(task);
+- else
+- wchan = 0;
++ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
++ goto print0;
+
+- if (wchan)
+- seq_printf(m, "%ps", (void *) wchan);
+- else
+- seq_putc(m, '0');
++ wchan = get_wchan(task);
++ if (wchan && !lookup_symbol_name(wchan, symname)) {
++ seq_puts(m, symname);
++ return 0;
++ }
+
++print0:
++ seq_putc(m, '0');
+ return 0;
+ }
+ #endif /* CONFIG_KALLSYMS */
+@@ -1883,7 +1886,7 @@ void proc_pid_evict_inode(struct proc_inode *ei)
+ put_pid(pid);
+ }
+
+-struct inode *proc_pid_make_inode(struct super_block * sb,
++struct inode *proc_pid_make_inode(struct super_block *sb,
+ struct task_struct *task, umode_t mode)
+ {
+ struct inode * inode;
+@@ -1912,11 +1915,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
+
+ /* Let the pid remember us for quick removal */
+ ei->pid = pid;
+- if (S_ISDIR(mode)) {
+- spin_lock(&pid->lock);
+- hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
+- spin_unlock(&pid->lock);
+- }
+
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+ security_task_to_inode(task, inode);
+@@ -1929,6 +1927,39 @@ out_unlock:
+ return NULL;
+ }
+
++/*
++ * Generating an inode and adding it into @pid->inodes, so that task will
++ * invalidate inode's dentry before being released.
++ *
++ * This helper is used for creating dir-type entries under '/proc' and
++ * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>'
++ * can be released by invalidating '/proc/<tgid>' dentry.
++ * In theory, dentries under '/proc/<tgid>/task' can also be released by
++ * invalidating '/proc/<tgid>' dentry, we reserve it to handle single
++ * thread exiting situation: Any one of threads should invalidate its
++ * '/proc/<tgid>/task/<pid>' dentry before released.
++ */
++static struct inode *proc_pid_make_base_inode(struct super_block *sb,
++ struct task_struct *task, umode_t mode)
++{
++ struct inode *inode;
++ struct proc_inode *ei;
++ struct pid *pid;
++
++ inode = proc_pid_make_inode(sb, task, mode);
++ if (!inode)
++ return NULL;
++
++ /* Let proc_flush_pid find this directory inode */
++ ei = PROC_I(inode);
++ pid = ei->pid;
++ spin_lock(&pid->lock);
++ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
++ spin_unlock(&pid->lock);
++
++ return inode;
++}
++
+ int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
+ struct kstat *stat, u32 request_mask, unsigned int query_flags)
+ {
+@@ -3346,7 +3377,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry,
+ {
+ struct inode *inode;
+
+- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
++ inode = proc_pid_make_base_inode(dentry->d_sb, task,
++ S_IFDIR | S_IRUGO | S_IXUGO);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
+
+@@ -3512,7 +3544,8 @@ static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
+ }
+
+ static const struct inode_operations proc_tid_comm_inode_operations = {
+- .permission = proc_tid_comm_permission,
++ .setattr = proc_setattr,
++ .permission = proc_tid_comm_permission,
+ };
+
+ /*
+@@ -3645,7 +3678,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
+ {
+ struct inode *inode;
+- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
++ inode = proc_pid_make_base_inode(dentry->d_sb, task,
++ S_IFDIR | S_IRUGO | S_IXUGO);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
+
+diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c
+index 6d8d4bf208377..2e244ada1f970 100644
+--- a/fs/proc/bootconfig.c
++++ b/fs/proc/bootconfig.c
+@@ -32,6 +32,8 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
+ int ret = 0;
+
+ key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL);
++ if (!key)
++ return -ENOMEM;
+
+ xbc_for_each_key_value(leaf, val) {
+ ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+diff --git a/fs/proc/fd.c b/fs/proc/fd.c
+index 172c86270b312..913bef0d2a36c 100644
+--- a/fs/proc/fd.c
++++ b/fs/proc/fd.c
+@@ -72,7 +72,7 @@ out:
+ return 0;
+ }
+
+-static int seq_fdinfo_open(struct inode *inode, struct file *file)
++static int proc_fdinfo_access_allowed(struct inode *inode)
+ {
+ bool allowed = false;
+ struct task_struct *task = get_proc_task(inode);
+@@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file)
+ if (!allowed)
+ return -EACCES;
+
++ return 0;
++}
++
++static int seq_fdinfo_open(struct inode *inode, struct file *file)
++{
++ int ret = proc_fdinfo_access_allowed(inode);
++
++ if (ret)
++ return ret;
++
+ return single_open(file, seq_show, inode);
+ }
+
+@@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
+ proc_fdinfo_instantiate);
+ }
+
++static int proc_open_fdinfo(struct inode *inode, struct file *file)
++{
++ int ret = proc_fdinfo_access_allowed(inode);
++
++ if (ret)
++ return ret;
++
++ return 0;
++}
++
+ const struct inode_operations proc_fdinfo_inode_operations = {
+ .lookup = proc_lookupfdinfo,
+ .setattr = proc_setattr,
+ };
+
+ const struct file_operations proc_fdinfo_operations = {
++ .open = proc_open_fdinfo,
+ .read = generic_read_dir,
+ .iterate_shared = proc_readfdinfo,
+ .llseek = generic_file_llseek,
+diff --git a/fs/proc/generic.c b/fs/proc/generic.c
+index 5b78739e60e40..d32f69aaaa36f 100644
+--- a/fs/proc/generic.c
++++ b/fs/proc/generic.c
+@@ -448,6 +448,9 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
+ proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+
+ ent->proc_dops = &proc_misc_dentry_ops;
++ /* Revalidate everything under /proc/${pid}/net */
++ if ((*parent)->proc_dops == &proc_net_dentry_ops)
++ pde_force_lookup(ent);
+
+ out:
+ return ent;
+diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
+index 15c2e55d2ed2c..123e3c9d8674b 100644
+--- a/fs/proc/proc_net.c
++++ b/fs/proc/proc_net.c
+@@ -363,6 +363,9 @@ static __net_init int proc_net_ns_init(struct net *net)
+
+ proc_set_user(netd, uid, gid);
+
++ /* Seed dentry revalidation for /proc/${pid}/net */
++ pde_force_lookup(netd);
++
+ err = -EEXIST;
+ net_statd = proc_net_mkdir(net, "stat", netd);
+ if (!net_statd)
+diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
+index 5d66faecd4ef0..0b7a00ed6c49b 100644
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -16,6 +16,7 @@
+ #include <linux/module.h>
+ #include <linux/bpf-cgroup.h>
+ #include <linux/mount.h>
++#include <linux/kmemleak.h>
+ #include "internal.h"
+
+ static const struct dentry_operations proc_sys_dentry_operations;
+@@ -25,7 +26,7 @@ static const struct file_operations proc_sys_dir_file_operations;
+ static const struct inode_operations proc_sys_dir_operations;
+
+ /* shared constants to be used in various sysctls */
+-const int sysctl_vals[] = { 0, 1, INT_MAX };
++const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX };
+ EXPORT_SYMBOL(sysctl_vals);
+
+ /* Support for permanently empty directories */
+@@ -1384,6 +1385,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab
+ }
+ EXPORT_SYMBOL(register_sysctl);
+
++/**
++ * __register_sysctl_init() - register sysctl table to path
++ * @path: path name for sysctl base
++ * @table: This is the sysctl table that needs to be registered to the path
++ * @table_name: The name of sysctl table, only used for log printing when
++ * registration fails
++ *
++ * The sysctl interface is used by userspace to query or modify at runtime
++ * a predefined value set on a variable. These variables however have default
++ * values pre-set. Code which depends on these variables will always work even
++ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
++ * ability to query or modify the sysctls dynamically at run time. Chances of
++ * register_sysctl() failing on init are extremely low, and so for both reasons
++ * this function does not return any error as it is used by initialization code.
++ *
++ * Context: Can only be called after your respective sysctl base path has been
++ * registered. So for instance, most base directories are registered early on
++ * init before init levels are processed through proc_sys_init() and
++ * sysctl_init().
++ */
++void __init __register_sysctl_init(const char *path, struct ctl_table *table,
++ const char *table_name)
++{
++ struct ctl_table_header *hdr = register_sysctl(path, table);
++
++ if (unlikely(!hdr)) {
++ pr_err("failed when register_sysctl %s to %s\n", table_name, path);
++ return;
++ }
++ kmemleak_not_leak(hdr);
++}
++
+ static char *append_path(const char *path, char *pos, const char *name)
+ {
+ int namelen;
+diff --git a/fs/proc/stat.c b/fs/proc/stat.c
+index 6561a06ef9059..4fb8729a68d4e 100644
+--- a/fs/proc/stat.c
++++ b/fs/proc/stat.c
+@@ -24,7 +24,7 @@
+
+ #ifdef arch_idle_time
+
+-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
++u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+ {
+ u64 idle;
+
+@@ -46,7 +46,7 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
+
+ #else
+
+-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
++u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+ {
+ u64 idle, idle_usecs = -1ULL;
+
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index cf25be3e03212..705a41f4d6b36 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -430,7 +430,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
+ }
+
+ static void smaps_account(struct mem_size_stats *mss, struct page *page,
+- bool compound, bool young, bool dirty, bool locked)
++ bool compound, bool young, bool dirty, bool locked,
++ bool migration)
+ {
+ int i, nr = compound ? compound_nr(page) : 1;
+ unsigned long size = nr * PAGE_SIZE;
+@@ -457,8 +458,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
+ * page_count(page) == 1 guarantees the page is mapped exactly once.
+ * If any subpage of the compound page mapped with PTE it would elevate
+ * page_count().
++ *
++ * The page_mapcount() is called to get a snapshot of the mapcount.
++ * Without holding the page lock this snapshot can be slightly wrong as
++ * we cannot always read the mapcount atomically. It is not safe to
++ * call page_mapcount() even with PTL held if the page is not mapped,
++ * especially for migration entries. Treat regular migration entries
++ * as mapcount == 1.
+ */
+- if (page_count(page) == 1) {
++ if ((page_count(page) == 1) || migration) {
+ smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
+ locked, true);
+ return;
+@@ -495,9 +503,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+ struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
+ struct page *page = NULL;
++ bool migration = false, young = false, dirty = false;
+
+ if (pte_present(*pte)) {
+ page = vm_normal_page(vma, addr, *pte);
++ young = pte_young(*pte);
++ dirty = pte_dirty(*pte);
+ } else if (is_swap_pte(*pte)) {
+ swp_entry_t swpent = pte_to_swp_entry(*pte);
+
+@@ -514,8 +525,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+ } else {
+ mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
+ }
+- } else if (is_pfn_swap_entry(swpent))
++ } else if (is_pfn_swap_entry(swpent)) {
++ if (is_migration_entry(swpent))
++ migration = true;
+ page = pfn_swap_entry_to_page(swpent);
++ }
+ } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
+ && pte_none(*pte))) {
+ page = xa_load(&vma->vm_file->f_mapping->i_pages,
+@@ -528,7 +542,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+ if (!page)
+ return;
+
+- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
++ smaps_account(mss, page, false, young, dirty, locked, migration);
+ }
+
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+@@ -539,6 +553,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+ struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
+ struct page *page = NULL;
++ bool migration = false;
+
+ if (pmd_present(*pmd)) {
+ /* FOLL_DUMP will return -EFAULT on huge zero page */
+@@ -546,8 +561,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+ } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
+ swp_entry_t entry = pmd_to_swp_entry(*pmd);
+
+- if (is_migration_entry(entry))
++ if (is_migration_entry(entry)) {
++ migration = true;
+ page = pfn_swap_entry_to_page(entry);
++ }
+ }
+ if (IS_ERR_OR_NULL(page))
+ return;
+@@ -559,7 +576,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+ /* pass */;
+ else
+ mss->file_thp += HPAGE_PMD_SIZE;
+- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
++
++ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
++ locked, migration);
+ }
+ #else
+ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+@@ -695,9 +714,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
+ page = pfn_swap_entry_to_page(swpent);
+ }
+ if (page) {
+- int mapcount = page_mapcount(page);
+-
+- if (mapcount >= 2)
++ if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
+ mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
+ else
+ mss->private_hugetlb += huge_page_size(hstate_vma(vma));
+@@ -932,7 +949,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
+ vma = vma->vm_next;
+ }
+
+- show_vma_header_prefix(m, priv->mm->mmap->vm_start,
++ show_vma_header_prefix(m, priv->mm->mmap ? priv->mm->mmap->vm_start : 0,
+ last_vma_end, 0, 0, 0, 0);
+ seq_pad(m, ' ');
+ seq_puts(m, "[rollup]\n");
+@@ -1363,6 +1380,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
+ {
+ u64 frame = 0, flags = 0;
+ struct page *page = NULL;
++ bool migration = false;
+
+ if (pte_present(pte)) {
+ if (pm->show_pfn)
+@@ -1384,13 +1402,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
+ frame = swp_type(entry) |
+ (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ flags |= PM_SWAP;
++ migration = is_migration_entry(entry);
+ if (is_pfn_swap_entry(entry))
+ page = pfn_swap_entry_to_page(entry);
+ }
+
+ if (page && !PageAnon(page))
+ flags |= PM_FILE;
+- if (page && page_mapcount(page) == 1)
++ if (page && !migration && page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ flags |= PM_SOFT_DIRTY;
+@@ -1406,8 +1425,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ spinlock_t *ptl;
+ pte_t *pte, *orig_pte;
+ int err = 0;
+-
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
++ bool migration = false;
++
+ ptl = pmd_trans_huge_lock(pmdp, vma);
+ if (ptl) {
+ u64 flags = 0, frame = 0;
+@@ -1446,11 +1466,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ if (pmd_swp_uffd_wp(pmd))
+ flags |= PM_UFFD_WP;
+ VM_BUG_ON(!is_pmd_migration_entry(pmd));
++ migration = is_migration_entry(entry);
+ page = pfn_swap_entry_to_page(entry);
+ }
+ #endif
+
+- if (page && page_mapcount(page) == 1)
++ if (page && !migration && page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+
+ for (; addr != end; addr += PAGE_SIZE) {
+@@ -1560,7 +1581,8 @@ static const struct mm_walk_ops pagemap_ops = {
+ * Bits 5-54 swap offset if swapped
+ * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
+ * Bit 56 page exclusively mapped
+- * Bits 57-60 zero
++ * Bit 57 pte is uffd-wp write-protected
++ * Bits 58-60 zero
+ * Bit 61 page is file-page or shared-anon
+ * Bit 62 page swapped
+ * Bit 63 page present
+diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
+index 5a1b228964fb7..deb99bc9b7e6b 100644
+--- a/fs/proc/uptime.c
++++ b/fs/proc/uptime.c
+@@ -12,18 +12,22 @@ static int uptime_proc_show(struct seq_file *m, void *v)
+ {
+ struct timespec64 uptime;
+ struct timespec64 idle;
+- u64 nsec;
++ u64 idle_nsec;
+ u32 rem;
+ int i;
+
+- nsec = 0;
+- for_each_possible_cpu(i)
+- nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
++ idle_nsec = 0;
++ for_each_possible_cpu(i) {
++ struct kernel_cpustat kcs;
++
++ kcpustat_cpu_fetch(&kcs, i);
++ idle_nsec += get_idle_time(&kcs, i);
++ }
+
+ ktime_get_boottime_ts64(&uptime);
+ timens_add_boottime(&uptime);
+
+- idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
++ idle.tv_sec = div_u64_rem(idle_nsec, NSEC_PER_SEC, &rem);
+ idle.tv_nsec = rem;
+ seq_printf(m, "%lu.%02lu %lu.%02lu\n",
+ (unsigned long) uptime.tv_sec,
+diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
+index 9a15334da2086..e5730986758fa 100644
+--- a/fs/proc/vmcore.c
++++ b/fs/proc/vmcore.c
+@@ -124,9 +124,13 @@ ssize_t read_from_oldmem(char *buf, size_t count,
+ nr_bytes = count;
+
+ /* If pfn is not ram, return zeros for sparse dump files */
+- if (pfn_is_ram(pfn) == 0)
+- memset(buf, 0, nr_bytes);
+- else {
++ if (pfn_is_ram(pfn) == 0) {
++ tmp = 0;
++ if (!userbuf)
++ memset(buf, 0, nr_bytes);
++ else if (clear_user(buf, nr_bytes))
++ tmp = -EFAULT;
++ } else {
+ if (encrypted)
+ tmp = copy_oldmem_page_encrypted(pfn, buf,
+ nr_bytes,
+@@ -135,10 +139,10 @@ ssize_t read_from_oldmem(char *buf, size_t count,
+ else
+ tmp = copy_oldmem_page(pfn, buf, nr_bytes,
+ offset, userbuf);
+-
+- if (tmp < 0)
+- return tmp;
+ }
++ if (tmp < 0)
++ return tmp;
++
+ *ppos += nr_bytes;
+ count -= nr_bytes;
+ buf += nr_bytes;
+diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
+index 392ef5162655b..49650e54d2f88 100644
+--- a/fs/proc_namespace.c
++++ b/fs/proc_namespace.c
+@@ -80,7 +80,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
+ seq_puts(m, fs_infop->str);
+ }
+
+- if (mnt_user_ns(mnt) != &init_user_ns)
++ if (is_idmapped_mnt(mnt))
+ seq_puts(m, ",idmapped");
+ }
+
+diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
+index 328da35da3908..c49d554cc9ae9 100644
+--- a/fs/pstore/Kconfig
++++ b/fs/pstore/Kconfig
+@@ -126,6 +126,7 @@ config PSTORE_CONSOLE
+ config PSTORE_PMSG
+ bool "Log user space messages"
+ depends on PSTORE
++ select RT_MUTEXES
+ help
+ When the option is enabled, pstore will export a character
+ interface /dev/pmsg0 to log user space messages. On reboot
+@@ -173,7 +174,6 @@ config PSTORE_BLK
+ tristate "Log panic/oops to a block device"
+ depends on PSTORE
+ depends on BLOCK
+- depends on BROKEN
+ select PSTORE_ZONE
+ default n
+ help
+diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
+index 04ce58c939a0b..6093088de49fd 100644
+--- a/fs/pstore/blk.c
++++ b/fs/pstore/blk.c
+@@ -311,7 +311,7 @@ static int __init __best_effort_init(void)
+ if (ret)
+ kfree(best_effort_dev);
+ else
+- pr_info("attached %s (%zu) (no dedicated panic_write!)\n",
++ pr_info("attached %s (%lu) (no dedicated panic_write!)\n",
+ blkdev, best_effort_dev->zone.total_size);
+
+ return ret;
+diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
+index b9614db48b1de..ad96ba97d8f97 100644
+--- a/fs/pstore/platform.c
++++ b/fs/pstore/platform.c
+@@ -143,21 +143,22 @@ static void pstore_timer_kick(void)
+ mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms));
+ }
+
+-/*
+- * Should pstore_dump() wait for a concurrent pstore_dump()? If
+- * not, the current pstore_dump() will report a failure to dump
+- * and return.
+- */
+-static bool pstore_cannot_wait(enum kmsg_dump_reason reason)
++static bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
+ {
+- /* In NMI path, pstore shouldn't block regardless of reason. */
++ /*
++ * In case of NMI path, pstore shouldn't be blocked
++ * regardless of reason.
++ */
+ if (in_nmi())
+ return true;
+
+ switch (reason) {
+ /* In panic case, other cpus are stopped by smp_send_stop(). */
+ case KMSG_DUMP_PANIC:
+- /* Emergency restart shouldn't be blocked. */
++ /*
++ * Emergency restart shouldn't be blocked by spinning on
++ * pstore_info::buf_lock.
++ */
+ case KMSG_DUMP_EMERG:
+ return true;
+ default:
+@@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper,
+ unsigned long total = 0;
+ const char *why;
+ unsigned int part = 1;
++ unsigned long flags = 0;
+ int ret;
+
+ why = kmsg_dump_reason_str(reason);
+
+- if (down_trylock(&psinfo->buf_lock)) {
+- /* Failed to acquire lock: give up if we cannot wait. */
+- if (pstore_cannot_wait(reason)) {
+- pr_err("dump skipped in %s path: may corrupt error record\n",
+- in_nmi() ? "NMI" : why);
+- return;
+- }
+- if (down_interruptible(&psinfo->buf_lock)) {
+- pr_err("could not grab semaphore?!\n");
++ if (pstore_cannot_block_path(reason)) {
++ if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) {
++ pr_err("dump skipped in %s path because of concurrent dump\n",
++ in_nmi() ? "NMI" : why);
+ return;
+ }
++ } else {
++ spin_lock_irqsave(&psinfo->buf_lock, flags);
+ }
+
+ kmsg_dump_rewind(&iter);
+@@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
+ total += record.size;
+ part++;
+ }
+-
+- up(&psinfo->buf_lock);
++ spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+ }
+
+ static struct kmsg_dumper pstore_dumper = {
+@@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi)
+ psi->write_user = pstore_write_user_compat;
+ psinfo = psi;
+ mutex_init(&psinfo->read_mutex);
+- sema_init(&psinfo->buf_lock, 1);
++ spin_lock_init(&psinfo->buf_lock);
+
+ if (psi->flags & PSTORE_FLAGS_DMESG)
+ allocate_buf_for_compression();
+diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
+index fefe3d391d3af..f3fa3625d772c 100644
+--- a/fs/pstore/ram.c
++++ b/fs/pstore/ram.c
+@@ -670,7 +670,7 @@ static int ramoops_parse_dt(struct platform_device *pdev,
+ field = value; \
+ }
+
+- parse_u32("mem-type", pdata->record_size, pdata->mem_type);
++ parse_u32("mem-type", pdata->mem_type, pdata->mem_type);
+ parse_u32("record-size", pdata->record_size, 0);
+ parse_u32("console-size", pdata->console_size, 0);
+ parse_u32("ftrace-size", pdata->ftrace_size, 0);
+@@ -735,6 +735,7 @@ static int ramoops_probe(struct platform_device *pdev)
+ /* Make sure we didn't get bogus platform data pointer. */
+ if (!pdata) {
+ pr_err("NULL platform data\n");
++ err = -EINVAL;
+ goto fail_out;
+ }
+
+@@ -742,6 +743,7 @@ static int ramoops_probe(struct platform_device *pdev)
+ !pdata->ftrace_size && !pdata->pmsg_size)) {
+ pr_err("The memory size and the record/console size must be "
+ "non-zero\n");
++ err = -EINVAL;
+ goto fail_out;
+ }
+
+diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
+index fe5305028c6e2..ccdb71c3dc51d 100644
+--- a/fs/pstore/ram_core.c
++++ b/fs/pstore/ram_core.c
+@@ -439,7 +439,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size,
+ phys_addr_t addr = page_start + i * PAGE_SIZE;
+ pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+ }
+- vaddr = vmap(pages, page_count, VM_MAP, prot);
++ /*
++ * VM_IOREMAP used here to bypass this region during vread()
++ * and kmap_atomic() (i.e. kcore) to avoid __va() failures.
++ */
++ vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot);
+ kfree(pages);
+
+ /*
+@@ -514,7 +518,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
+ sig ^= PERSISTENT_RAM_SIG;
+
+ if (prz->buffer->sig == sig) {
+- if (buffer_size(prz) == 0) {
++ if (buffer_size(prz) == 0 && buffer_start(prz) == 0) {
+ pr_debug("found existing empty buffer\n");
+ return 0;
+ }
+@@ -587,6 +591,8 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+ raw_spin_lock_init(&prz->buffer_lock);
+ prz->flags = flags;
+ prz->label = kstrdup(label, GFP_KERNEL);
++ if (!prz->label)
++ goto err;
+
+ ret = persistent_ram_buffer_map(start, size, prz, memtype);
+ if (ret)
+diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
+index 7c8f8feac6c34..5d3f944f60185 100644
+--- a/fs/pstore/zone.c
++++ b/fs/pstore/zone.c
+@@ -761,7 +761,7 @@ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt,
+ /* avoid destroying old data, allocate a new one */
+ len = zone->buffer_size + sizeof(*zone->buffer);
+ zone->oldbuf = zone->buffer;
+- zone->buffer = kzalloc(len, GFP_KERNEL);
++ zone->buffer = kzalloc(len, GFP_ATOMIC);
+ if (!zone->buffer) {
+ zone->buffer = zone->oldbuf;
+ return -ENOMEM;
+diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
+index 22d904bde6ab9..889eaea56a784 100644
+--- a/fs/quota/dquot.c
++++ b/fs/quota/dquot.c
+@@ -79,6 +79,7 @@
+ #include <linux/capability.h>
+ #include <linux/quotaops.h>
+ #include <linux/blkdev.h>
++#include <linux/sched/mm.h>
+ #include "../internal.h" /* ugh */
+
+ #include <linux/uaccess.h>
+@@ -224,13 +225,22 @@ static void put_quota_format(struct quota_format_type *fmt)
+
+ /*
+ * Dquot List Management:
+- * The quota code uses four lists for dquot management: the inuse_list,
+- * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot
+- * structure may be on some of those lists, depending on its current state.
++ * The quota code uses five lists for dquot management: the inuse_list,
++ * releasing_dquots, free_dquots, dqi_dirty_list, and dquot_hash[] array.
++ * A single dquot structure may be on some of those lists, depending on
++ * its current state.
+ *
+ * All dquots are placed to the end of inuse_list when first created, and this
+ * list is used for invalidate operation, which must look at every dquot.
+ *
++ * When the last reference of a dquot will be dropped, the dquot will be
++ * added to releasing_dquots. We'd then queue work item which would call
++ * synchronize_srcu() and after that perform the final cleanup of all the
++ * dquots on the list. Both releasing_dquots and free_dquots use the
++ * dq_free list_head in the dquot struct. When a dquot is removed from
++ * releasing_dquots, a reference count is always subtracted, and if
++ * dq_count == 0 at that point, the dquot will be added to the free_dquots.
++ *
+ * Unused dquots (dq_count == 0) are added to the free_dquots list when freed,
+ * and this list is searched whenever we need an available dquot. Dquots are
+ * removed from the list as soon as they are used again, and
+@@ -249,6 +259,7 @@ static void put_quota_format(struct quota_format_type *fmt)
+
+ static LIST_HEAD(inuse_list);
+ static LIST_HEAD(free_dquots);
++static LIST_HEAD(releasing_dquots);
+ static unsigned int dq_hash_bits, dq_hash_mask;
+ static struct hlist_head *dquot_hash;
+
+@@ -259,6 +270,9 @@ static qsize_t inode_get_rsv_space(struct inode *inode);
+ static qsize_t __inode_get_rsv_space(struct inode *inode);
+ static int __dquot_initialize(struct inode *inode, int type);
+
++static void quota_release_workfn(struct work_struct *work);
++static DECLARE_DELAYED_WORK(quota_release_work, quota_release_workfn);
++
+ static inline unsigned int
+ hashfn(const struct super_block *sb, struct kqid qid)
+ {
+@@ -304,12 +318,18 @@ static inline void put_dquot_last(struct dquot *dquot)
+ dqstats_inc(DQST_FREE_DQUOTS);
+ }
+
++static inline void put_releasing_dquots(struct dquot *dquot)
++{
++ list_add_tail(&dquot->dq_free, &releasing_dquots);
++}
++
+ static inline void remove_free_dquot(struct dquot *dquot)
+ {
+ if (list_empty(&dquot->dq_free))
+ return;
+ list_del_init(&dquot->dq_free);
+- dqstats_dec(DQST_FREE_DQUOTS);
++ if (!atomic_read(&dquot->dq_count))
++ dqstats_dec(DQST_FREE_DQUOTS);
+ }
+
+ static inline void put_inuse(struct dquot *dquot)
+@@ -335,6 +355,11 @@ static void wait_on_dquot(struct dquot *dquot)
+ mutex_unlock(&dquot->dq_lock);
+ }
+
++static inline int dquot_active(struct dquot *dquot)
++{
++ return test_bit(DQ_ACTIVE_B, &dquot->dq_flags);
++}
++
+ static inline int dquot_dirty(struct dquot *dquot)
+ {
+ return test_bit(DQ_MOD_B, &dquot->dq_flags);
+@@ -350,14 +375,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
+ {
+ int ret = 1;
+
+- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
++ if (!dquot_active(dquot))
+ return 0;
+
+ if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
+ return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags);
+
+ /* If quota is dirty already, we don't have to acquire dq_list_lock */
+- if (test_bit(DQ_MOD_B, &dquot->dq_flags))
++ if (dquot_dirty(dquot))
+ return 1;
+
+ spin_lock(&dq_list_lock);
+@@ -425,9 +450,11 @@ EXPORT_SYMBOL(mark_info_dirty);
+ int dquot_acquire(struct dquot *dquot)
+ {
+ int ret = 0, ret2 = 0;
++ unsigned int memalloc;
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+
+ mutex_lock(&dquot->dq_lock);
++ memalloc = memalloc_nofs_save();
+ if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
+ ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
+ if (ret < 0)
+@@ -437,7 +464,7 @@ int dquot_acquire(struct dquot *dquot)
+ smp_mb__before_atomic();
+ set_bit(DQ_READ_B, &dquot->dq_flags);
+ /* Instantiate dquot if needed */
+- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) {
++ if (!dquot_active(dquot) && !dquot->dq_off) {
+ ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
+ /* Write the info if needed */
+ if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
+@@ -458,6 +485,7 @@ int dquot_acquire(struct dquot *dquot)
+ smp_mb__before_atomic();
+ set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+ out_iolock:
++ memalloc_nofs_restore(memalloc);
+ mutex_unlock(&dquot->dq_lock);
+ return ret;
+ }
+@@ -469,18 +497,21 @@ EXPORT_SYMBOL(dquot_acquire);
+ int dquot_commit(struct dquot *dquot)
+ {
+ int ret = 0;
++ unsigned int memalloc;
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+
+ mutex_lock(&dquot->dq_lock);
++ memalloc = memalloc_nofs_save();
+ if (!clear_dquot_dirty(dquot))
+ goto out_lock;
+ /* Inactive dquot can be only if there was error during read/init
+ * => we have better not writing it */
+- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
++ if (dquot_active(dquot))
+ ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
+ else
+ ret = -EIO;
+ out_lock:
++ memalloc_nofs_restore(memalloc);
+ mutex_unlock(&dquot->dq_lock);
+ return ret;
+ }
+@@ -492,9 +523,11 @@ EXPORT_SYMBOL(dquot_commit);
+ int dquot_release(struct dquot *dquot)
+ {
+ int ret = 0, ret2 = 0;
++ unsigned int memalloc;
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+
+ mutex_lock(&dquot->dq_lock);
++ memalloc = memalloc_nofs_save();
+ /* Check whether we are not racing with some other dqget() */
+ if (dquot_is_busy(dquot))
+ goto out_dqlock;
+@@ -510,6 +543,7 @@ int dquot_release(struct dquot *dquot)
+ }
+ clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+ out_dqlock:
++ memalloc_nofs_restore(memalloc);
+ mutex_unlock(&dquot->dq_lock);
+ return ret;
+ }
+@@ -537,6 +571,8 @@ static void invalidate_dquots(struct super_block *sb, int type)
+ struct dquot *dquot, *tmp;
+
+ restart:
++ flush_delayed_work(&quota_release_work);
++
+ spin_lock(&dq_list_lock);
+ list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
+ if (dquot->dq_sb != sb)
+@@ -545,7 +581,13 @@ restart:
+ continue;
+ /* Wait for dquot users */
+ if (atomic_read(&dquot->dq_count)) {
+- dqgrab(dquot);
++ /* dquot in releasing_dquots, flush and retry */
++ if (!list_empty(&dquot->dq_free)) {
++ spin_unlock(&dq_list_lock);
++ goto restart;
++ }
++
++ atomic_inc(&dquot->dq_count);
+ spin_unlock(&dq_list_lock);
+ /*
+ * Once dqput() wakes us up, we know it's time to free
+@@ -587,7 +629,7 @@ int dquot_scan_active(struct super_block *sb,
+
+ spin_lock(&dq_list_lock);
+ list_for_each_entry(dquot, &inuse_list, dq_inuse) {
+- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
++ if (!dquot_active(dquot))
+ continue;
+ if (dquot->dq_sb != sb)
+ continue;
+@@ -602,7 +644,7 @@ int dquot_scan_active(struct super_block *sb,
+ * outstanding call and recheck the DQ_ACTIVE_B after that.
+ */
+ wait_on_dquot(dquot);
+- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
++ if (dquot_active(dquot)) {
+ ret = fn(dquot, priv);
+ if (ret < 0)
+ goto out;
+@@ -618,6 +660,18 @@ out:
+ }
+ EXPORT_SYMBOL(dquot_scan_active);
+
++static inline int dquot_write_dquot(struct dquot *dquot)
++{
++ int ret = dquot->dq_sb->dq_op->write_dquot(dquot);
++ if (ret < 0) {
++ quota_error(dquot->dq_sb, "Can't write quota structure "
++ "(error %d). Quota may get out of sync!", ret);
++ /* Clear dirty bit anyway to avoid infinite loop. */
++ clear_dquot_dirty(dquot);
++ }
++ return ret;
++}
++
+ /* Write all dquot structures to quota files */
+ int dquot_writeback_dquots(struct super_block *sb, int type)
+ {
+@@ -641,23 +695,16 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
+ dquot = list_first_entry(&dirty, struct dquot,
+ dq_dirty);
+
+- WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags));
++ WARN_ON(!dquot_active(dquot));
+
+ /* Now we have active dquot from which someone is
+ * holding reference so we can safely just increase
+ * use count */
+ dqgrab(dquot);
+ spin_unlock(&dq_list_lock);
+- err = sb->dq_op->write_dquot(dquot);
+- if (err) {
+- /*
+- * Clear dirty bit anyway to avoid infinite
+- * loop here.
+- */
+- clear_dquot_dirty(dquot);
+- if (!ret)
+- ret = err;
+- }
++ err = dquot_write_dquot(dquot);
++ if (err && !ret)
++ ret = err;
+ dqput(dquot);
+ spin_lock(&dq_list_lock);
+ }
+@@ -690,9 +737,14 @@ int dquot_quota_sync(struct super_block *sb, int type)
+ /* This is not very clever (and fast) but currently I don't know about
+ * any other simple way of getting quota data to disk and we must get
+ * them there for userspace to be visible... */
+- if (sb->s_op->sync_fs)
+- sb->s_op->sync_fs(sb, 1);
+- sync_blockdev(sb->s_bdev);
++ if (sb->s_op->sync_fs) {
++ ret = sb->s_op->sync_fs(sb, 1);
++ if (ret)
++ return ret;
++ }
++ ret = sync_blockdev(sb->s_bdev);
++ if (ret)
++ return ret;
+
+ /*
+ * Now when everything is written we can discard the pagecache so
+@@ -745,13 +797,54 @@ static struct shrinker dqcache_shrinker = {
+ .seeks = DEFAULT_SEEKS,
+ };
+
++/*
++ * Safely release dquot and put reference to dquot.
++ */
++static void quota_release_workfn(struct work_struct *work)
++{
++ struct dquot *dquot;
++ struct list_head rls_head;
++
++ spin_lock(&dq_list_lock);
++ /* Exchange the list head to avoid livelock. */
++ list_replace_init(&releasing_dquots, &rls_head);
++ spin_unlock(&dq_list_lock);
++
++restart:
++ synchronize_srcu(&dquot_srcu);
++ spin_lock(&dq_list_lock);
++ while (!list_empty(&rls_head)) {
++ dquot = list_first_entry(&rls_head, struct dquot, dq_free);
++ /* Dquot got used again? */
++ if (atomic_read(&dquot->dq_count) > 1) {
++ remove_free_dquot(dquot);
++ atomic_dec(&dquot->dq_count);
++ continue;
++ }
++ if (dquot_dirty(dquot)) {
++ spin_unlock(&dq_list_lock);
++ /* Commit dquot before releasing */
++ dquot_write_dquot(dquot);
++ goto restart;
++ }
++ if (dquot_active(dquot)) {
++ spin_unlock(&dq_list_lock);
++ dquot->dq_sb->dq_op->release_dquot(dquot);
++ goto restart;
++ }
++ /* Dquot is inactive and clean, now move it to free list */
++ remove_free_dquot(dquot);
++ atomic_dec(&dquot->dq_count);
++ put_dquot_last(dquot);
++ }
++ spin_unlock(&dq_list_lock);
++}
++
+ /*
+ * Put reference to dquot
+ */
+ void dqput(struct dquot *dquot)
+ {
+- int ret;
+-
+ if (!dquot)
+ return;
+ #ifdef CONFIG_QUOTA_DEBUG
+@@ -763,7 +856,7 @@ void dqput(struct dquot *dquot)
+ }
+ #endif
+ dqstats_inc(DQST_DROPS);
+-we_slept:
++
+ spin_lock(&dq_list_lock);
+ if (atomic_read(&dquot->dq_count) > 1) {
+ /* We have more than one user... nothing to do */
+@@ -775,35 +868,15 @@ we_slept:
+ spin_unlock(&dq_list_lock);
+ return;
+ }
++
+ /* Need to release dquot? */
+- if (dquot_dirty(dquot)) {
+- spin_unlock(&dq_list_lock);
+- /* Commit dquot before releasing */
+- ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+- if (ret < 0) {
+- quota_error(dquot->dq_sb, "Can't write quota structure"
+- " (error %d). Quota may get out of sync!",
+- ret);
+- /*
+- * We clear dirty bit anyway, so that we avoid
+- * infinite loop here
+- */
+- clear_dquot_dirty(dquot);
+- }
+- goto we_slept;
+- }
+- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+- spin_unlock(&dq_list_lock);
+- dquot->dq_sb->dq_op->release_dquot(dquot);
+- goto we_slept;
+- }
+- atomic_dec(&dquot->dq_count);
+ #ifdef CONFIG_QUOTA_DEBUG
+ /* sanity check */
+ BUG_ON(!list_empty(&dquot->dq_free));
+ #endif
+- put_dquot_last(dquot);
++ put_releasing_dquots(dquot);
+ spin_unlock(&dq_list_lock);
++ queue_delayed_work(system_unbound_wq, &quota_release_work, 1);
+ }
+ EXPORT_SYMBOL(dqput);
+
+@@ -893,7 +966,7 @@ we_slept:
+ * already finished or it will be canceled due to dq_count > 1 test */
+ wait_on_dquot(dquot);
+ /* Read the dquot / allocate space in quota file */
+- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
++ if (!dquot_active(dquot)) {
+ int err;
+
+ err = sb->dq_op->acquire_dquot(dquot);
+@@ -1410,7 +1483,7 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
+ return QUOTA_NL_NOWARN;
+ }
+
+-static int dquot_active(const struct inode *inode)
++static int inode_quota_active(const struct inode *inode)
+ {
+ struct super_block *sb = inode->i_sb;
+
+@@ -1433,7 +1506,7 @@ static int __dquot_initialize(struct inode *inode, int type)
+ qsize_t rsv;
+ int ret = 0;
+
+- if (!dquot_active(inode))
++ if (!inode_quota_active(inode))
+ return 0;
+
+ dquots = i_dquot(inode);
+@@ -1541,7 +1614,7 @@ bool dquot_initialize_needed(struct inode *inode)
+ struct dquot **dquots;
+ int i;
+
+- if (!dquot_active(inode))
++ if (!inode_quota_active(inode))
+ return false;
+
+ dquots = i_dquot(inode);
+@@ -1652,7 +1725,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
+ int reserve = flags & DQUOT_SPACE_RESERVE;
+ struct dquot **dquots;
+
+- if (!dquot_active(inode)) {
++ if (!inode_quota_active(inode)) {
+ if (reserve) {
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+@@ -1722,7 +1795,7 @@ int dquot_alloc_inode(struct inode *inode)
+ struct dquot_warn warn[MAXQUOTAS];
+ struct dquot * const *dquots;
+
+- if (!dquot_active(inode))
++ if (!inode_quota_active(inode))
+ return 0;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ warn[cnt].w_type = QUOTA_NL_NOWARN;
+@@ -1765,7 +1838,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
+ struct dquot **dquots;
+ int cnt, index;
+
+- if (!dquot_active(inode)) {
++ if (!inode_quota_active(inode)) {
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ __inode_add_bytes(inode, number);
+@@ -1807,7 +1880,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
+ struct dquot **dquots;
+ int cnt, index;
+
+- if (!dquot_active(inode)) {
++ if (!inode_quota_active(inode)) {
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+ __inode_sub_bytes(inode, number);
+@@ -1851,7 +1924,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
+ struct dquot **dquots;
+ int reserve = flags & DQUOT_SPACE_RESERVE, index;
+
+- if (!dquot_active(inode)) {
++ if (!inode_quota_active(inode)) {
+ if (reserve) {
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+@@ -1906,7 +1979,7 @@ void dquot_free_inode(struct inode *inode)
+ struct dquot * const *dquots;
+ int index;
+
+- if (!dquot_active(inode))
++ if (!inode_quota_active(inode))
+ return;
+
+ dquots = i_dquot(inode);
+@@ -2077,7 +2150,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
+ struct super_block *sb = inode->i_sb;
+ int ret;
+
+- if (!dquot_active(inode))
++ if (!inode_quota_active(inode))
+ return 0;
+
+ if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)){
+@@ -2302,6 +2375,8 @@ static int vfs_setup_quota_inode(struct inode *inode, int type)
+ struct super_block *sb = inode->i_sb;
+ struct quota_info *dqopt = sb_dqopt(sb);
+
++ if (is_bad_inode(inode))
++ return -EUCLEAN;
+ if (!S_ISREG(inode->i_mode))
+ return -EACCES;
+ if (IS_RDONLY(inode))
+@@ -2396,7 +2471,8 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
+
+ error = add_dquot_ref(sb, type);
+ if (error)
+- dquot_disable(sb, type, flags);
++ dquot_disable(sb, type,
++ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
+ return error;
+ out_fmt:
+diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
+index d3e995e1046fb..7e65d67de9f33 100644
+--- a/fs/quota/quota_tree.c
++++ b/fs/quota/quota_tree.c
+@@ -71,6 +71,35 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
+ return ret;
+ }
+
++static inline int do_check_range(struct super_block *sb, const char *val_name,
++ uint val, uint min_val, uint max_val)
++{
++ if (val < min_val || val > max_val) {
++ quota_error(sb, "Getting %s %u out of range %u-%u",
++ val_name, val, min_val, max_val);
++ return -EUCLEAN;
++ }
++
++ return 0;
++}
++
++static int check_dquot_block_header(struct qtree_mem_dqinfo *info,
++ struct qt_disk_dqdbheader *dh)
++{
++ int err = 0;
++
++ err = do_check_range(info->dqi_sb, "dqdh_next_free",
++ le32_to_cpu(dh->dqdh_next_free), 0,
++ info->dqi_blocks - 1);
++ if (err)
++ return err;
++ err = do_check_range(info->dqi_sb, "dqdh_prev_free",
++ le32_to_cpu(dh->dqdh_prev_free), 0,
++ info->dqi_blocks - 1);
++
++ return err;
++}
++
+ /* Remove empty block from list and return it */
+ static int get_free_dqblk(struct qtree_mem_dqinfo *info)
+ {
+@@ -85,6 +114,9 @@ static int get_free_dqblk(struct qtree_mem_dqinfo *info)
+ ret = read_blk(info, blk, buf);
+ if (ret < 0)
+ goto out_buf;
++ ret = check_dquot_block_header(info, dh);
++ if (ret)
++ goto out_buf;
+ info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
+ }
+ else {
+@@ -232,6 +264,9 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
+ *err = read_blk(info, blk, buf);
+ if (*err < 0)
+ goto out_buf;
++ *err = check_dquot_block_header(info, dh);
++ if (*err)
++ goto out_buf;
+ } else {
+ blk = get_free_dqblk(info);
+ if ((int)blk < 0) {
+@@ -414,6 +449,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ quota_error(dquot->dq_sb, "Quota structure has offset to "
+ "other block (%u) than it should (%u)", blk,
+ (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
++ ret = -EIO;
+ goto out_buf;
+ }
+ ret = read_blk(info, blk, buf);
+@@ -423,6 +459,9 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ goto out_buf;
+ }
+ dh = (struct qt_disk_dqdbheader *)buf;
++ ret = check_dquot_block_header(info, dh);
++ if (ret)
++ goto out_buf;
+ le16_add_cpu(&dh->dqdh_entries, -1);
+ if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
+ ret = remove_free_dqentry(info, buf, blk);
+@@ -479,6 +518,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ goto out_buf;
+ }
+ newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
++ if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) {
++ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
++ newblk, info->dqi_blocks);
++ ret = -EUCLEAN;
++ goto out_buf;
++ }
++
+ if (depth == info->dqi_qtree_depth - 1) {
+ ret = free_dqentry(info, dquot, newblk);
+ newblk = 0;
+@@ -578,6 +624,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+ blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (!blk) /* No reference? */
+ goto out_buf;
++ if (blk < QT_TREEOFF || blk >= info->dqi_blocks) {
++ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
++ blk, info->dqi_blocks);
++ ret = -EUCLEAN;
++ goto out_buf;
++ }
++
+ if (depth < info->dqi_qtree_depth - 1)
+ ret = find_tree_dqentry(info, dquot, blk, depth+1);
+ else
+diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
+index 65e7e56005b8f..dc1f9e6d95a5c 100644
+--- a/fs/ramfs/inode.c
++++ b/fs/ramfs/inode.c
+@@ -274,7 +274,7 @@ int ramfs_init_fs_context(struct fs_context *fc)
+ return 0;
+ }
+
+-static void ramfs_kill_sb(struct super_block *sb)
++void ramfs_kill_sb(struct super_block *sb)
+ {
+ kfree(sb->s_fs_info);
+ kill_litter_super(sb);
+diff --git a/fs/read_write.c b/fs/read_write.c
+index af057c57bdc64..b4b15279b66b6 100644
+--- a/fs/read_write.c
++++ b/fs/read_write.c
+@@ -1250,6 +1250,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
+ count, fl);
+ file_end_write(out.file);
+ } else {
++ if (out.file->f_flags & O_NONBLOCK)
++ fl |= SPLICE_F_NONBLOCK;
++
+ retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl);
+ }
+
+@@ -1384,28 +1387,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
+ }
+ EXPORT_SYMBOL(generic_copy_file_range);
+
+-static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
+- struct file *file_out, loff_t pos_out,
+- size_t len, unsigned int flags)
+-{
+- /*
+- * Although we now allow filesystems to handle cross sb copy, passing
+- * a file of the wrong filesystem type to filesystem driver can result
+- * in an attempt to dereference the wrong type of ->private_data, so
+- * avoid doing that until we really have a good reason. NFS defines
+- * several different file_system_type structures, but they all end up
+- * using the same ->copy_file_range() function pointer.
+- */
+- if (file_out->f_op->copy_file_range &&
+- file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
+- return file_out->f_op->copy_file_range(file_in, pos_in,
+- file_out, pos_out,
+- len, flags);
+-
+- return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+- flags);
+-}
+-
+ /*
+ * Performs necessary checks before doing a file copy
+ *
+@@ -1427,6 +1408,26 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
+ if (ret)
+ return ret;
+
++ /*
++ * We allow some filesystems to handle cross sb copy, but passing
++ * a file of the wrong filesystem type to filesystem driver can result
++ * in an attempt to dereference the wrong type of ->private_data, so
++ * avoid doing that until we really have a good reason.
++ *
++ * nfs and cifs define several different file_system_type structures
++ * and several different sets of file_operations, but they all end up
++ * using the same ->copy_file_range() function pointer.
++ */
++ if (flags & COPY_FILE_SPLICE) {
++ /* cross sb splice is allowed */
++ } else if (file_out->f_op->copy_file_range) {
++ if (file_in->f_op->copy_file_range !=
++ file_out->f_op->copy_file_range)
++ return -EXDEV;
++ } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
++ return -EXDEV;
++ }
++
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+@@ -1469,8 +1470,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ size_t len, unsigned int flags)
+ {
+ ssize_t ret;
++ bool splice = flags & COPY_FILE_SPLICE;
+
+- if (flags != 0)
++ if (flags & ~COPY_FILE_SPLICE)
+ return -EINVAL;
+
+ ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
+@@ -1492,26 +1494,43 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ file_start_write(file_out);
+
+ /*
+- * Try cloning first, this is supported by more file systems, and
+- * more efficient if both clone and copy are supported (e.g. NFS).
++ * Cloning is supported by more file systems, so we implement copy on
++ * same sb using clone, but for filesystems where both clone and copy
++ * are supported (e.g. nfs,cifs), we only call the copy method.
+ */
+- if (file_in->f_op->remap_file_range &&
+- file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
+- loff_t cloned;
++ if (!splice && file_out->f_op->copy_file_range) {
++ ret = file_out->f_op->copy_file_range(file_in, pos_in,
++ file_out, pos_out,
++ len, flags);
++ goto done;
++ }
+
+- cloned = file_in->f_op->remap_file_range(file_in, pos_in,
++ if (!splice && file_in->f_op->remap_file_range &&
++ file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
++ ret = file_in->f_op->remap_file_range(file_in, pos_in,
+ file_out, pos_out,
+ min_t(loff_t, MAX_RW_COUNT, len),
+ REMAP_FILE_CAN_SHORTEN);
+- if (cloned > 0) {
+- ret = cloned;
++ if (ret > 0)
+ goto done;
+- }
+ }
+
+- ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+- flags);
+- WARN_ON_ONCE(ret == -EOPNOTSUPP);
++ /*
++ * We can get here for same sb copy of filesystems that do not implement
++ * ->copy_file_range() in case filesystem does not support clone or in
++ * case filesystem supports clone but rejected the clone request (e.g.
++ * because it was not block aligned).
++ *
++ * In both cases, fall back to kernel copy so we are able to maintain a
++ * consistent story about which filesystems support copy_file_range()
++ * and which filesystems do not, that will allow userspace tools to
++ * make consistent desicions w.r.t using copy_file_range().
++ *
++ * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
++ */
++ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
++ flags);
++
+ done:
+ if (ret > 0) {
+ fsnotify_access(file_in);
+@@ -1562,6 +1581,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
+ pos_out = f_out.file->f_pos;
+ }
+
++ ret = -EINVAL;
++ if (flags != 0)
++ goto out;
++
+ ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
+ flags);
+ if (ret > 0) {
+diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
+index 0834b101c316d..86a1dee6e2e7d 100644
+--- a/fs/reiserfs/journal.c
++++ b/fs/reiserfs/journal.c
+@@ -2323,7 +2323,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
+ int i, j;
+
+ bh = __getblk(dev, block, bufsize);
+- if (buffer_uptodate(bh))
++ if (!bh || buffer_uptodate(bh))
+ return (bh);
+
+ if (block + BUFNR > max_block) {
+@@ -2333,6 +2333,8 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
+ j = 1;
+ for (i = 1; i < blocks; i++) {
+ bh = __getblk(dev, block + i, bufsize);
++ if (!bh)
++ break;
+ if (buffer_uptodate(bh)) {
+ brelse(bh);
+ break;
+diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
+index 3d7a35d6a18bc..b916859992ec8 100644
+--- a/fs/reiserfs/namei.c
++++ b/fs/reiserfs/namei.c
+@@ -696,6 +696,7 @@ static int reiserfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+
+ out_failed:
+ reiserfs_write_unlock(dir->i_sb);
++ reiserfs_security_free(&security);
+ return retval;
+ }
+
+@@ -779,6 +780,7 @@ static int reiserfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+
+ out_failed:
+ reiserfs_write_unlock(dir->i_sb);
++ reiserfs_security_free(&security);
+ return retval;
+ }
+
+@@ -878,6 +880,7 @@ static int reiserfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ retval = journal_end(&th);
+ out_failed:
+ reiserfs_write_unlock(dir->i_sb);
++ reiserfs_security_free(&security);
+ return retval;
+ }
+
+@@ -1194,6 +1197,7 @@ static int reiserfs_symlink(struct user_namespace *mnt_userns,
+ retval = journal_end(&th);
+ out_failed:
+ reiserfs_write_unlock(parent_dir->i_sb);
++ reiserfs_security_free(&security);
+ return retval;
+ }
+
+diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
+index 58481f8d63d5b..f7b05c6b3dcf4 100644
+--- a/fs/reiserfs/super.c
++++ b/fs/reiserfs/super.c
+@@ -1437,7 +1437,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
+ unsigned long safe_mask = 0;
+ unsigned int commit_max_age = (unsigned int)-1;
+ struct reiserfs_journal *journal = SB_JOURNAL(s);
+- char *new_opts;
+ int err;
+ char *qf_names[REISERFS_MAXQUOTAS];
+ unsigned int qfmt = 0;
+@@ -1445,10 +1444,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
+ int i;
+ #endif
+
+- new_opts = kstrdup(arg, GFP_KERNEL);
+- if (arg && !new_opts)
+- return -ENOMEM;
+-
+ sync_filesystem(s);
+ reiserfs_write_lock(s);
+
+@@ -1599,7 +1594,6 @@ out_ok_unlocked:
+ out_err_unlock:
+ reiserfs_write_unlock(s);
+ out_err:
+- kfree(new_opts);
+ return err;
+ }
+
+diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
+index 8965c8e5e172b..157ebfe2456bb 100644
+--- a/fs/reiserfs/xattr_security.c
++++ b/fs/reiserfs/xattr_security.c
+@@ -50,6 +50,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
+ int error;
+
+ sec->name = NULL;
++ sec->value = NULL;
+
+ /* Don't add selinux attributes on xattrs - they'll never get used */
+ if (IS_PRIVATE(dir))
+@@ -81,11 +82,15 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th,
+ struct inode *inode,
+ struct reiserfs_security_handle *sec)
+ {
++ char xattr_name[XATTR_NAME_MAX + 1] = XATTR_SECURITY_PREFIX;
+ int error;
+- if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX))
++
++ if (XATTR_SECURITY_PREFIX_LEN + strlen(sec->name) > XATTR_NAME_MAX)
+ return -EINVAL;
+
+- error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value,
++ strlcat(xattr_name, sec->name, sizeof(xattr_name));
++
++ error = reiserfs_xattr_set_handle(th, inode, xattr_name, sec->value,
+ sec->length, XATTR_CREATE);
+ if (error == -ENODATA || error == -EOPNOTSUPP)
+ error = 0;
+@@ -95,7 +100,6 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th,
+
+ void reiserfs_security_free(struct reiserfs_security_handle *sec)
+ {
+- kfree(sec->name);
+ kfree(sec->value);
+ sec->name = NULL;
+ sec->value = NULL;
+diff --git a/fs/remap_range.c b/fs/remap_range.c
+index 6d4a9beaa0974..e69bafb96f093 100644
+--- a/fs/remap_range.c
++++ b/fs/remap_range.c
+@@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
+ * Otherwise, make sure the count is also block-aligned, having
+ * already confirmed the starting offsets' block alignment.
+ */
+- if (pos_in + count == size_in) {
++ if (pos_in + count == size_in &&
++ (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
+ bcount = ALIGN(size_in, bs) - pos_in;
+ } else {
+ if (!IS_ALIGNED(count, bs))
+diff --git a/fs/select.c b/fs/select.c
+index 945896d0ac9e7..5edffee1162c2 100644
+--- a/fs/select.c
++++ b/fs/select.c
+@@ -458,9 +458,11 @@ get_max:
+ return max;
+ }
+
+-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
+-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
+-#define POLLEX_SET (EPOLLPRI)
++#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
++ EPOLLNVAL)
++#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
++ EPOLLNVAL)
++#define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
+
+ static inline void wait_key_set(poll_table *wait, unsigned long in,
+ unsigned long out, unsigned long bit,
+@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
+ break;
+ if (!(bit & all_bits))
+ continue;
++ mask = EPOLLNVAL;
+ f = fdget(i);
+ if (f.file) {
+ wait_key_set(wait, in, out, bit,
+@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
+ mask = vfs_poll(f.file, wait);
+
+ fdput(f);
+- if ((mask & POLLIN_SET) && (in & bit)) {
+- res_in |= bit;
+- retval++;
+- wait->_qproc = NULL;
+- }
+- if ((mask & POLLOUT_SET) && (out & bit)) {
+- res_out |= bit;
+- retval++;
+- wait->_qproc = NULL;
+- }
+- if ((mask & POLLEX_SET) && (ex & bit)) {
+- res_ex |= bit;
+- retval++;
+- wait->_qproc = NULL;
+- }
+- /* got something, stop busy polling */
+- if (retval) {
+- can_busy_loop = false;
+- busy_flag = 0;
+-
+- /*
+- * only remember a returned
+- * POLL_BUSY_LOOP if we asked for it
+- */
+- } else if (busy_flag & mask)
+- can_busy_loop = true;
+-
+ }
++ if ((mask & POLLIN_SET) && (in & bit)) {
++ res_in |= bit;
++ retval++;
++ wait->_qproc = NULL;
++ }
++ if ((mask & POLLOUT_SET) && (out & bit)) {
++ res_out |= bit;
++ retval++;
++ wait->_qproc = NULL;
++ }
++ if ((mask & POLLEX_SET) && (ex & bit)) {
++ res_ex |= bit;
++ retval++;
++ wait->_qproc = NULL;
++ }
++ /* got something, stop busy polling */
++ if (retval) {
++ can_busy_loop = false;
++ busy_flag = 0;
++
++ /*
++ * only remember a returned
++ * POLL_BUSY_LOOP if we asked for it
++ */
++ } else if (busy_flag & mask)
++ can_busy_loop = true;
++
+ }
+ if (res_in)
+ *rinp = res_in;
+diff --git a/fs/seq_file.c b/fs/seq_file.c
+index 4a2cda04d3e29..b17ee4c4f618a 100644
+--- a/fs/seq_file.c
++++ b/fs/seq_file.c
+@@ -947,6 +947,38 @@ struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
+ }
+ EXPORT_SYMBOL(seq_list_next);
+
++struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos)
++{
++ struct list_head *lh;
++
++ list_for_each_rcu(lh, head)
++ if (pos-- == 0)
++ return lh;
++
++ return NULL;
++}
++EXPORT_SYMBOL(seq_list_start_rcu);
++
++struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos)
++{
++ if (!pos)
++ return head;
++
++ return seq_list_start_rcu(head, pos - 1);
++}
++EXPORT_SYMBOL(seq_list_start_head_rcu);
++
++struct list_head *seq_list_next_rcu(void *v, struct list_head *head,
++ loff_t *ppos)
++{
++ struct list_head *lh;
++
++ lh = list_next_rcu((struct list_head *)v);
++ ++*ppos;
++ return lh == head ? NULL : lh;
++}
++EXPORT_SYMBOL(seq_list_next_rcu);
++
+ /**
+ * seq_hlist_start - start an iteration of a hlist
+ * @head: the head of the hlist
+diff --git a/fs/signalfd.c b/fs/signalfd.c
+index 040e1cf905282..65ce0e72e7b95 100644
+--- a/fs/signalfd.c
++++ b/fs/signalfd.c
+@@ -35,17 +35,7 @@
+
+ void signalfd_cleanup(struct sighand_struct *sighand)
+ {
+- wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+- /*
+- * The lockless check can race with remove_wait_queue() in progress,
+- * but in this case its caller should run under rcu_read_lock() and
+- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
+- */
+- if (likely(!waitqueue_active(wqh)))
+- return;
+-
+- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
+- wake_up_poll(wqh, EPOLLHUP | POLLFREE);
++ wake_up_pollfree(&sighand->signalfd_wqh);
+ }
+
+ struct signalfd_ctx {
+diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c
+index 85ba15a60b13b..043e4cb839fa2 100644
+--- a/fs/smbfs_common/cifs_arc4.c
++++ b/fs/smbfs_common/cifs_arc4.c
+@@ -72,16 +72,3 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l
+ ctx->y = y;
+ }
+ EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
+-
+-static int __init
+-init_smbfs_common(void)
+-{
+- return 0;
+-}
+-static void __init
+-exit_smbfs_common(void)
+-{
+-}
+-
+-module_init(init_smbfs_common)
+-module_exit(exit_smbfs_common)
+diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
+index b3fdc8212c5f5..95f8e89017689 100644
+--- a/fs/squashfs/squashfs_fs.h
++++ b/fs/squashfs/squashfs_fs.h
+@@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw)
+ #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
+ sizeof(u64))
+ /* xattr id lookup table defines */
+-#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
++#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
+
+ #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
+ SQUASHFS_METADATA_SIZE)
+diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
+index 1e90c2575f9bf..0c1ae97897317 100644
+--- a/fs/squashfs/squashfs_fs_sb.h
++++ b/fs/squashfs/squashfs_fs_sb.h
+@@ -63,7 +63,7 @@ struct squashfs_sb_info {
+ long long bytes_used;
+ unsigned int inodes;
+ unsigned int fragments;
+- int xattr_ids;
++ unsigned int xattr_ids;
+ unsigned int ids;
+ bool panic_on_errors;
+ };
+diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
+index d8a270d3ac4cb..f1a463d8bfa02 100644
+--- a/fs/squashfs/xattr.h
++++ b/fs/squashfs/xattr.h
+@@ -10,12 +10,12 @@
+
+ #ifdef CONFIG_SQUASHFS_XATTR
+ extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
+- u64 *, int *);
++ u64 *, unsigned int *);
+ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
+ unsigned int *, unsigned long long *);
+ #else
+ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
+- u64 start, u64 *xattr_table_start, int *xattr_ids)
++ u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
+ {
+ struct squashfs_xattr_id_table *id_table;
+
+diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
+index 087cab8c78f4e..c8469c656e0dc 100644
+--- a/fs/squashfs/xattr_id.c
++++ b/fs/squashfs/xattr_id.c
+@@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
+ * Read uncompressed xattr id lookup table indexes from disk into memory
+ */
+ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
+- u64 *xattr_table_start, int *xattr_ids)
++ u64 *xattr_table_start, unsigned int *xattr_ids)
+ {
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ unsigned int len, indexes;
+diff --git a/fs/stat.c b/fs/stat.c
+index 28d2020ba1f42..246d138ec0669 100644
+--- a/fs/stat.c
++++ b/fs/stat.c
+@@ -334,9 +334,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
+ # define choose_32_64(a,b) b
+ #endif
+
+-#define valid_dev(x) choose_32_64(old_valid_dev(x),true)
+-#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
+-
+ #ifndef INIT_STRUCT_STAT_PADDING
+ # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
+ #endif
+@@ -345,7 +342,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
+ {
+ struct stat tmp;
+
+- if (!valid_dev(stat->dev) || !valid_dev(stat->rdev))
++ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
++ return -EOVERFLOW;
++ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
+ return -EOVERFLOW;
+ #if BITS_PER_LONG == 32
+ if (stat->size > MAX_NON_LFS)
+@@ -353,7 +352,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
+ #endif
+
+ INIT_STRUCT_STAT_PADDING(tmp);
+- tmp.st_dev = encode_dev(stat->dev);
++ tmp.st_dev = new_encode_dev(stat->dev);
+ tmp.st_ino = stat->ino;
+ if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
+ return -EOVERFLOW;
+@@ -363,7 +362,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
+ return -EOVERFLOW;
+ SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+ SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+- tmp.st_rdev = encode_dev(stat->rdev);
++ tmp.st_rdev = new_encode_dev(stat->rdev);
+ tmp.st_size = stat->size;
+ tmp.st_atime = stat->atime.tv_sec;
+ tmp.st_mtime = stat->mtime.tv_sec;
+@@ -644,11 +643,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
+ {
+ struct compat_stat tmp;
+
+- if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
++ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
++ return -EOVERFLOW;
++ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
+ return -EOVERFLOW;
+
+ memset(&tmp, 0, sizeof(tmp));
+- tmp.st_dev = old_encode_dev(stat->dev);
++ tmp.st_dev = new_encode_dev(stat->dev);
+ tmp.st_ino = stat->ino;
+ if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
+ return -EOVERFLOW;
+@@ -658,7 +659,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
+ return -EOVERFLOW;
+ SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+ SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+- tmp.st_rdev = old_encode_dev(stat->rdev);
++ tmp.st_rdev = new_encode_dev(stat->rdev);
+ if ((u64) stat->size > MAX_NON_LFS)
+ return -EOVERFLOW;
+ tmp.st_size = stat->size;
+diff --git a/fs/statfs.c b/fs/statfs.c
+index 0ba34c1355932..96d1c3edf289c 100644
+--- a/fs/statfs.c
++++ b/fs/statfs.c
+@@ -130,6 +130,7 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
+ else {
++ memset(&buf, 0, sizeof(buf));
+ if (sizeof buf.f_blocks == 4) {
+ if ((st->f_blocks | st->f_bfree | st->f_bavail |
+ st->f_bsize | st->f_frsize) &
+@@ -158,7 +159,6 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+- memset(buf.f_spare, 0, sizeof(buf.f_spare));
+ }
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
+@@ -171,6 +171,7 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
+ else {
++ memset(&buf, 0, sizeof(buf));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+@@ -182,7 +183,6 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+- memset(buf.f_spare, 0, sizeof(buf.f_spare));
+ }
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
+diff --git a/fs/super.c b/fs/super.c
+index bcef3a6f4c4b5..048576b19af63 100644
+--- a/fs/super.c
++++ b/fs/super.c
+@@ -293,7 +293,7 @@ static void __put_super(struct super_block *s)
+ WARN_ON(s->s_inode_lru.node);
+ WARN_ON(!list_empty(&s->s_mounts));
+ security_sb_free(s);
+- fscrypt_sb_free(s);
++ fscrypt_destroy_keyring(s);
+ put_user_ns(s->s_user_ns);
+ kfree(s->s_subtype);
+ call_rcu(&s->rcu, destroy_super_rcu);
+@@ -450,12 +450,22 @@ void generic_shutdown_super(struct super_block *sb)
+
+ cgroup_writeback_umount();
+
+- /* evict all inodes with zero refcount */
++ /* Evict all inodes with zero refcount. */
+ evict_inodes(sb);
+- /* only nonzero refcount inodes can have marks */
++
++ /*
++ * Clean up and evict any inodes that still have references due
++ * to fsnotify or the security policy.
++ */
+ fsnotify_sb_delete(sb);
+ security_sb_delete(sb);
+
++ /*
++ * Now that all potentially-encrypted inodes have been evicted,
++ * the fscrypt keyring can be destroyed.
++ */
++ fscrypt_destroy_keyring(sb);
++
+ if (sb->s_dio_done_wq) {
+ destroy_workqueue(sb->s_dio_done_wq);
+ sb->s_dio_done_wq = NULL;
+@@ -853,6 +863,7 @@ int reconfigure_super(struct fs_context *fc)
+ struct super_block *sb = fc->root->d_sb;
+ int retval;
+ bool remount_ro = false;
++ bool remount_rw = false;
+ bool force = fc->sb_flags & SB_FORCE;
+
+ if (fc->sb_flags_mask & ~MS_RMT_MASK)
+@@ -870,7 +881,7 @@ int reconfigure_super(struct fs_context *fc)
+ bdev_read_only(sb->s_bdev))
+ return -EACCES;
+ #endif
+-
++ remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
+ remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
+ }
+
+@@ -900,6 +911,14 @@ int reconfigure_super(struct fs_context *fc)
+ if (retval)
+ return retval;
+ }
++ } else if (remount_rw) {
++ /*
++ * We set s_readonly_remount here to protect filesystem's
++ * reconfigure code from writes from userspace until
++ * reconfigure finishes.
++ */
++ sb->s_readonly_remount = 1;
++ smp_wmb();
+ }
+
+ if (fc->ops->reconfigure) {
+@@ -1421,8 +1440,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
+ }
+ EXPORT_SYMBOL(mount_nodev);
+
+-static int reconfigure_single(struct super_block *s,
+- int flags, void *data)
++int reconfigure_single(struct super_block *s,
++ int flags, void *data)
+ {
+ struct fs_context *fc;
+ int ret;
+@@ -1616,11 +1635,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb)
+ percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+ }
+
+-static void sb_freeze_unlock(struct super_block *sb)
++static void sb_freeze_unlock(struct super_block *sb, int level)
+ {
+- int level;
+-
+- for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
++ for (level--; level >= 0; level--)
+ percpu_up_write(sb->s_writers.rw_sem + level);
+ }
+
+@@ -1691,7 +1708,14 @@ int freeze_super(struct super_block *sb)
+ sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
+
+ /* All writers are done so after syncing there won't be dirty data */
+- sync_filesystem(sb);
++ ret = sync_filesystem(sb);
++ if (ret) {
++ sb->s_writers.frozen = SB_UNFROZEN;
++ sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
++ wake_up(&sb->s_writers.wait_unfrozen);
++ deactivate_locked_super(sb);
++ return ret;
++ }
+
+ /* Now wait for internal filesystem counter */
+ sb->s_writers.frozen = SB_FREEZE_FS;
+@@ -1703,7 +1727,7 @@ int freeze_super(struct super_block *sb)
+ printk(KERN_ERR
+ "VFS:Filesystem freeze failed\n");
+ sb->s_writers.frozen = SB_UNFROZEN;
+- sb_freeze_unlock(sb);
++ sb_freeze_unlock(sb, SB_FREEZE_FS);
+ wake_up(&sb->s_writers.wait_unfrozen);
+ deactivate_locked_super(sb);
+ return ret;
+@@ -1748,7 +1772,7 @@ static int thaw_super_locked(struct super_block *sb)
+ }
+
+ sb->s_writers.frozen = SB_UNFROZEN;
+- sb_freeze_unlock(sb);
++ sb_freeze_unlock(sb, SB_FREEZE_FS);
+ out:
+ wake_up(&sb->s_writers.wait_unfrozen);
+ deactivate_locked_super(sb);
+diff --git a/fs/sync.c b/fs/sync.c
+index 1373a610dc784..c7690016453e4 100644
+--- a/fs/sync.c
++++ b/fs/sync.c
+@@ -3,6 +3,7 @@
+ * High-level sync()-related operations
+ */
+
++#include <linux/blkdev.h>
+ #include <linux/kernel.h>
+ #include <linux/file.h>
+ #include <linux/fs.h>
+@@ -21,25 +22,6 @@
+ #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
+ SYNC_FILE_RANGE_WAIT_AFTER)
+
+-/*
+- * Do the filesystem syncing work. For simple filesystems
+- * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
+- * submit IO for these buffers via __sync_blockdev(). This also speeds up the
+- * wait == 1 case since in that case write_inode() functions do
+- * sync_dirty_buffer() and thus effectively write one block at a time.
+- */
+-static int __sync_filesystem(struct super_block *sb, int wait)
+-{
+- if (wait)
+- sync_inodes_sb(sb);
+- else
+- writeback_inodes_sb(sb, WB_REASON_SYNC);
+-
+- if (sb->s_op->sync_fs)
+- sb->s_op->sync_fs(sb, wait);
+- return __sync_blockdev(sb->s_bdev, wait);
+-}
+-
+ /*
+ * Write out and wait upon all dirty data associated with this
+ * superblock. Filesystem data as well as the underlying block
+@@ -47,7 +29,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
+ */
+ int sync_filesystem(struct super_block *sb)
+ {
+- int ret;
++ int ret = 0;
+
+ /*
+ * We need to be protected against the filesystem going from
+@@ -61,10 +43,31 @@ int sync_filesystem(struct super_block *sb)
+ if (sb_rdonly(sb))
+ return 0;
+
+- ret = __sync_filesystem(sb, 0);
+- if (ret < 0)
++ /*
++ * Do the filesystem syncing work. For simple filesystems
++ * writeback_inodes_sb(sb) just dirties buffers with inodes so we have
++ * to submit I/O for these buffers via sync_blockdev(). This also
++ * speeds up the wait == 1 case since in that case write_inode()
++ * methods call sync_dirty_buffer() and thus effectively write one block
++ * at a time.
++ */
++ writeback_inodes_sb(sb, WB_REASON_SYNC);
++ if (sb->s_op->sync_fs) {
++ ret = sb->s_op->sync_fs(sb, 0);
++ if (ret)
++ return ret;
++ }
++ ret = sync_blockdev_nowait(sb->s_bdev);
++ if (ret)
+ return ret;
+- return __sync_filesystem(sb, 1);
++
++ sync_inodes_sb(sb);
++ if (sb->s_op->sync_fs) {
++ ret = sb->s_op->sync_fs(sb, 1);
++ if (ret)
++ return ret;
++ }
++ return sync_blockdev(sb->s_bdev);
+ }
+ EXPORT_SYMBOL(sync_filesystem);
+
+@@ -81,21 +84,6 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg)
+ sb->s_op->sync_fs(sb, *(int *)arg);
+ }
+
+-static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
+-{
+- filemap_fdatawrite(bdev->bd_inode->i_mapping);
+-}
+-
+-static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
+-{
+- /*
+- * We keep the error status of individual mapping so that
+- * applications can catch the writeback error using fsync(2).
+- * See filemap_fdatawait_keep_errors() for details.
+- */
+- filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
+-}
+-
+ /*
+ * Sync everything. We start by waking flusher threads so that most of
+ * writeback runs on all devices in parallel. Then we sync all inodes reliably
+@@ -114,8 +102,8 @@ void ksys_sync(void)
+ iterate_supers(sync_inodes_one_sb, NULL);
+ iterate_supers(sync_fs_one_sb, &nowait);
+ iterate_supers(sync_fs_one_sb, &wait);
+- iterate_bdevs(fdatawrite_one_bdev, NULL);
+- iterate_bdevs(fdatawait_one_bdev, NULL);
++ sync_bdevs(false);
++ sync_bdevs(true);
+ if (unlikely(laptop_mode))
+ laptop_sync_completion();
+ }
+@@ -136,10 +124,10 @@ static void do_sync_work(struct work_struct *work)
+ */
+ iterate_supers(sync_inodes_one_sb, &nowait);
+ iterate_supers(sync_fs_one_sb, &nowait);
+- iterate_bdevs(fdatawrite_one_bdev, NULL);
++ sync_bdevs(false);
+ iterate_supers(sync_inodes_one_sb, &nowait);
+ iterate_supers(sync_fs_one_sb, &nowait);
+- iterate_bdevs(fdatawrite_one_bdev, NULL);
++ sync_bdevs(false);
+ printk("Emergency Sync complete\n");
+ kfree(work);
+ }
+diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
+index 749385015a8d3..1e9c520411f84 100644
+--- a/fs/sysv/itree.c
++++ b/fs/sysv/itree.c
+@@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode,
+ */
+ parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key);
+ bh = sb_getblk(inode->i_sb, parent);
++ if (!bh) {
++ sysv_free_block(inode->i_sb, branch[n].key);
++ break;
++ }
+ lock_buffer(bh);
+ memset(bh->b_data, 0, blocksize);
+ branch[n].bh = bh;
+@@ -438,7 +442,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
+ res += blocks;
+ direct = 1;
+ }
+- return blocks;
++ return res;
+ }
+
+ int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path,
+diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
+index 1261e8b41edb4..066e8344934de 100644
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -141,6 +141,8 @@ struct tracefs_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
++ /* Opt_* bitfield. */
++ unsigned int opts;
+ };
+
+ enum {
+@@ -161,6 +163,77 @@ struct tracefs_fs_info {
+ struct tracefs_mount_opts mount_opts;
+ };
+
++static void change_gid(struct dentry *dentry, kgid_t gid)
++{
++ if (!dentry->d_inode)
++ return;
++ dentry->d_inode->i_gid = gid;
++}
++
++/*
++ * Taken from d_walk, but without he need for handling renames.
++ * Nothing can be renamed while walking the list, as tracefs
++ * does not support renames. This is only called when mounting
++ * or remounting the file system, to set all the files to
++ * the given gid.
++ */
++static void set_gid(struct dentry *parent, kgid_t gid)
++{
++ struct dentry *this_parent;
++ struct list_head *next;
++
++ this_parent = parent;
++ spin_lock(&this_parent->d_lock);
++
++ change_gid(this_parent, gid);
++repeat:
++ next = this_parent->d_subdirs.next;
++resume:
++ while (next != &this_parent->d_subdirs) {
++ struct list_head *tmp = next;
++ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
++ next = tmp->next;
++
++ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
++
++ change_gid(dentry, gid);
++
++ if (!list_empty(&dentry->d_subdirs)) {
++ spin_unlock(&this_parent->d_lock);
++ spin_release(&dentry->d_lock.dep_map, _RET_IP_);
++ this_parent = dentry;
++ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
++ goto repeat;
++ }
++ spin_unlock(&dentry->d_lock);
++ }
++ /*
++ * All done at this level ... ascend and resume the search.
++ */
++ rcu_read_lock();
++ascend:
++ if (this_parent != parent) {
++ struct dentry *child = this_parent;
++ this_parent = child->d_parent;
++
++ spin_unlock(&child->d_lock);
++ spin_lock(&this_parent->d_lock);
++
++ /* go into the first sibling still alive */
++ do {
++ next = child->d_child.next;
++ if (next == &this_parent->d_subdirs)
++ goto ascend;
++ child = list_entry(next, struct dentry, d_child);
++ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
++ rcu_read_unlock();
++ goto resume;
++ }
++ rcu_read_unlock();
++ spin_unlock(&this_parent->d_lock);
++ return;
++}
++
+ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+ {
+ substring_t args[MAX_OPT_ARGS];
+@@ -170,6 +243,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+ kgid_t gid;
+ char *p;
+
++ opts->opts = 0;
+ opts->mode = TRACEFS_DEFAULT_MODE;
+
+ while ((p = strsep(&data, ",")) != NULL) {
+@@ -204,22 +278,36 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+ * but traditionally tracefs has ignored all mount options
+ */
+ }
++
++ opts->opts |= BIT(token);
+ }
+
+ return 0;
+ }
+
+-static int tracefs_apply_options(struct super_block *sb)
++static int tracefs_apply_options(struct super_block *sb, bool remount)
+ {
+ struct tracefs_fs_info *fsi = sb->s_fs_info;
+ struct inode *inode = sb->s_root->d_inode;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+- inode->i_mode &= ~S_IALLUGO;
+- inode->i_mode |= opts->mode;
++ /*
++ * On remount, only reset mode/uid/gid if they were provided as mount
++ * options.
++ */
++
++ if (!remount || opts->opts & BIT(Opt_mode)) {
++ inode->i_mode &= ~S_IALLUGO;
++ inode->i_mode |= opts->mode;
++ }
+
+- inode->i_uid = opts->uid;
+- inode->i_gid = opts->gid;
++ if (!remount || opts->opts & BIT(Opt_uid))
++ inode->i_uid = opts->uid;
++
++ if (!remount || opts->opts & BIT(Opt_gid)) {
++ /* Set all the group ids to the mount option */
++ set_gid(sb->s_root, opts->gid);
++ }
+
+ return 0;
+ }
+@@ -234,7 +322,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data)
+ if (err)
+ goto fail;
+
+- tracefs_apply_options(sb);
++ tracefs_apply_options(sb, true);
+
+ fail:
+ return err;
+@@ -286,7 +374,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
+
+ sb->s_op = &tracefs_super_operations;
+
+- tracefs_apply_options(sb);
++ tracefs_apply_options(sb, false);
+
+ return 0;
+
+@@ -414,6 +502,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
+ inode->i_mode = mode;
+ inode->i_fop = fops ? fops : &tracefs_file_operations;
+ inode->i_private = data;
++ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
++ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
+ d_instantiate(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+ return end_creating(dentry);
+@@ -432,9 +522,12 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
+ if (unlikely(!inode))
+ return failed_creating(dentry);
+
+- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
++ /* Do not set bits for OTH */
++ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
+ inode->i_op = ops;
+ inode->i_fop = &simple_dir_operations;
++ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
++ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
+index c0b84e960b20c..9cb05ef9b9dd9 100644
+--- a/fs/ubifs/budget.c
++++ b/fs/ubifs/budget.c
+@@ -212,11 +212,10 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
+ subtract_lebs += 1;
+
+ /*
+- * The GC journal head LEB is not really accessible. And since
+- * different write types go to different heads, we may count only on
+- * one head's space.
++ * Since different write types go to different heads, we should
++ * reserve one leb for each head.
+ */
+- subtract_lebs += c->jhead_cnt - 1;
++ subtract_lebs += c->jhead_cnt;
+
+ /* We also reserve one LEB for deletions, which bypass budgeting */
+ subtract_lebs += 1;
+@@ -403,7 +402,7 @@ static int calc_dd_growth(const struct ubifs_info *c,
+ dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
+
+ if (req->dirtied_ino)
+- dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
++ dd_growth += c->bi.inode_budget * req->dirtied_ino;
+ if (req->mod_dent)
+ dd_growth += c->bi.dent_budget;
+ dd_growth += req->dirtied_ino_d;
+diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
+index 7c61d0ec0159e..6a054df8b991d 100644
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -68,13 +68,14 @@ static int inherit_flags(const struct inode *dir, umode_t mode)
+ * @c: UBIFS file-system description object
+ * @dir: parent directory inode
+ * @mode: inode mode flags
++ * @is_xattr: whether the inode is xattr inode
+ *
+ * This function finds an unused inode number, allocates new inode and
+ * initializes it. Returns new inode in case of success and an error code in
+ * case of failure.
+ */
+ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
+- umode_t mode)
++ umode_t mode, bool is_xattr)
+ {
+ int err;
+ struct inode *inode;
+@@ -99,10 +100,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
+ current_time(inode);
+ inode->i_mapping->nrpages = 0;
+
+- err = fscrypt_prepare_new_inode(dir, inode, &encrypted);
+- if (err) {
+- ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err);
+- goto out_iput;
++ if (!is_xattr) {
++ err = fscrypt_prepare_new_inode(dir, inode, &encrypted);
++ if (err) {
++ ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err);
++ goto out_iput;
++ }
+ }
+
+ switch (mode & S_IFMT) {
+@@ -309,7 +312,7 @@ static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir,
+
+ sz_change = CALC_DENT_SIZE(fname_len(&nm));
+
+- inode = ubifs_new_inode(c, dir, mode);
++ inode = ubifs_new_inode(c, dir, mode, false);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_fname;
+@@ -349,20 +352,91 @@ out_budg:
+ return err;
+ }
+
+-static int do_tmpfile(struct inode *dir, struct dentry *dentry,
+- umode_t mode, struct inode **whiteout)
++static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry)
+ {
++ int err;
++ umode_t mode = S_IFCHR | WHITEOUT_MODE;
+ struct inode *inode;
+ struct ubifs_info *c = dir->i_sb->s_fs_info;
+- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1};
++
++ /*
++ * Create an inode('nlink = 1') for whiteout without updating journal,
++ * let ubifs_jnl_rename() store it on flash to complete rename whiteout
++ * atomically.
++ */
++
++ dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
++ dentry, mode, dir->i_ino);
++
++ inode = ubifs_new_inode(c, dir, mode, false);
++ if (IS_ERR(inode)) {
++ err = PTR_ERR(inode);
++ goto out_free;
++ }
++
++ init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
++ ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations);
++
++ err = ubifs_init_security(dir, inode, &dentry->d_name);
++ if (err)
++ goto out_inode;
++
++ /* The dir size is updated by do_rename. */
++ insert_inode_hash(inode);
++
++ return inode;
++
++out_inode:
++ make_bad_inode(inode);
++ iput(inode);
++out_free:
++ ubifs_err(c, "cannot create whiteout file, error %d", err);
++ return ERR_PTR(err);
++}
++
++/**
++ * lock_2_inodes - a wrapper for locking two UBIFS inodes.
++ * @inode1: first inode
++ * @inode2: second inode
++ *
++ * We do not implement any tricks to guarantee strict lock ordering, because
++ * VFS has already done it for us on the @i_mutex. So this is just a simple
++ * wrapper function.
++ */
++static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
++{
++ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
++ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
++}
++
++/**
++ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
++ * @inode1: first inode
++ * @inode2: second inode
++ */
++static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
++{
++ mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
++ mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
++}
++
++static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
++ struct dentry *dentry, umode_t mode)
++{
++ struct inode *inode;
++ struct ubifs_info *c = dir->i_sb->s_fs_info;
++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
++ .dirtied_ino = 1};
+ struct ubifs_budget_req ino_req = { .dirtied_ino = 1 };
+- struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir);
++ struct ubifs_inode *ui;
+ int err, instantiated = 0;
+ struct fscrypt_name nm;
+
+ /*
+- * Budget request settings: new dirty inode, new direntry,
+- * budget for dirtied inode will be released via writeback.
++ * Budget request settings: new inode, new direntry, changing the
++ * parent directory inode.
++ * Allocate budget separately for new dirtied inode, the budget will
++ * be released via writeback.
+ */
+
+ dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+@@ -385,49 +459,38 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
+ return err;
+ }
+
+- inode = ubifs_new_inode(c, dir, mode);
++ inode = ubifs_new_inode(c, dir, mode, false);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_budg;
+ }
+ ui = ubifs_inode(inode);
+
+- if (whiteout) {
+- init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
+- ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations);
+- }
+-
+ err = ubifs_init_security(dir, inode, &dentry->d_name);
+ if (err)
+ goto out_inode;
+
+ mutex_lock(&ui->ui_mutex);
+ insert_inode_hash(inode);
+-
+- if (whiteout) {
+- mark_inode_dirty(inode);
+- drop_nlink(inode);
+- *whiteout = inode;
+- } else {
+- d_tmpfile(dentry, inode);
+- }
++ d_tmpfile(dentry, inode);
+ ubifs_assert(c, ui->dirty);
+
+ instantiated = 1;
+ mutex_unlock(&ui->ui_mutex);
+
+- mutex_lock(&dir_ui->ui_mutex);
++ lock_2_inodes(dir, inode);
+ err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
+ if (err)
+ goto out_cancel;
+- mutex_unlock(&dir_ui->ui_mutex);
++ unlock_2_inodes(dir, inode);
+
+ ubifs_release_budget(c, &req);
++ fscrypt_free_filename(&nm);
+
+ return 0;
+
+ out_cancel:
+- mutex_unlock(&dir_ui->ui_mutex);
++ unlock_2_inodes(dir, inode);
+ out_inode:
+ make_bad_inode(inode);
+ if (!instantiated)
+@@ -441,12 +504,6 @@ out_budg:
+ return err;
+ }
+
+-static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+- struct dentry *dentry, umode_t mode)
+-{
+- return do_tmpfile(dir, dentry, mode, NULL);
+-}
+-
+ /**
+ * vfs_dent_type - get VFS directory entry type.
+ * @type: UBIFS directory entry type
+@@ -660,32 +717,6 @@ static int ubifs_dir_release(struct inode *dir, struct file *file)
+ return 0;
+ }
+
+-/**
+- * lock_2_inodes - a wrapper for locking two UBIFS inodes.
+- * @inode1: first inode
+- * @inode2: second inode
+- *
+- * We do not implement any tricks to guarantee strict lock ordering, because
+- * VFS has already done it for us on the @i_mutex. So this is just a simple
+- * wrapper function.
+- */
+-static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
+-{
+- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
+- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
+-}
+-
+-/**
+- * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
+- * @inode1: first inode
+- * @inode2: second inode
+- */
+-static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
+-{
+- mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+- mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
+-}
+-
+ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+ {
+@@ -949,7 +980,8 @@ static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+ struct ubifs_inode *dir_ui = ubifs_inode(dir);
+ struct ubifs_info *c = dir->i_sb->s_fs_info;
+ int err, sz_change;
+- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
++ .dirtied_ino = 1};
+ struct fscrypt_name nm;
+
+ /*
+@@ -970,7 +1002,7 @@ static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+
+ sz_change = CALC_DENT_SIZE(fname_len(&nm));
+
+- inode = ubifs_new_inode(c, dir, S_IFDIR | mode);
++ inode = ubifs_new_inode(c, dir, S_IFDIR | mode, false);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_fname;
+@@ -1057,7 +1089,7 @@ static int ubifs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+
+ sz_change = CALC_DENT_SIZE(fname_len(&nm));
+
+- inode = ubifs_new_inode(c, dir, mode);
++ inode = ubifs_new_inode(c, dir, mode, false);
+ if (IS_ERR(inode)) {
+ kfree(dev);
+ err = PTR_ERR(inode);
+@@ -1113,7 +1145,6 @@ static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ int err, sz_change, len = strlen(symname);
+ struct fscrypt_str disk_link;
+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
+- .new_ino_d = ALIGN(len, 8),
+ .dirtied_ino = 1 };
+ struct fscrypt_name nm;
+
+@@ -1129,6 +1160,7 @@ static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+ * Budget request settings: new inode, new direntry and changing parent
+ * directory inode.
+ */
++ req.new_ino_d = ALIGN(disk_link.len - 1, 8);
+ err = ubifs_budget_space(c, &req);
+ if (err)
+ return err;
+@@ -1139,7 +1171,7 @@ static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+
+ sz_change = CALC_DENT_SIZE(fname_len(&nm));
+
+- inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
++ inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO, false);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_fname;
+@@ -1264,17 +1296,19 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+ .dirtied_ino = 3 };
+ struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
+ .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
++ struct ubifs_budget_req wht_req;
+ struct timespec64 time;
+ unsigned int saved_nlink;
+ struct fscrypt_name old_nm, new_nm;
+
+ /*
+- * Budget request settings: deletion direntry, new direntry, removing
+- * the old inode, and changing old and new parent directory inodes.
++ * Budget request settings:
++ * req: deletion direntry, new direntry, removing the old inode,
++ * and changing old and new parent directory inodes.
+ *
+- * However, this operation also marks the target inode as dirty and
+- * does not write it, so we allocate budget for the target inode
+- * separately.
++ * wht_req: new whiteout inode for RENAME_WHITEOUT.
++ *
++ * ino_req: marks the target inode as dirty and does not write it.
+ */
+
+ dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x",
+@@ -1284,6 +1318,8 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+ if (unlink) {
+ ubifs_assert(c, inode_is_locked(new_inode));
+
++ /* Budget for old inode's data when its nlink > 1. */
++ req.dirtied_ino_d = ALIGN(ubifs_inode(new_inode)->data_len, 8);
+ err = ubifs_purge_xattrs(new_inode);
+ if (err)
+ return err;
+@@ -1331,20 +1367,44 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+ goto out_release;
+ }
+
+- err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout);
+- if (err) {
++ /*
++ * The whiteout inode without dentry is pinned in memory,
++ * umount won't happen during rename process because we
++ * got parent dentry.
++ */
++ whiteout = create_whiteout(old_dir, old_dentry);
++ if (IS_ERR(whiteout)) {
++ err = PTR_ERR(whiteout);
+ kfree(dev);
+ goto out_release;
+ }
+
+- spin_lock(&whiteout->i_lock);
+- whiteout->i_state |= I_LINKABLE;
+- spin_unlock(&whiteout->i_lock);
+-
+ whiteout_ui = ubifs_inode(whiteout);
+ whiteout_ui->data = dev;
+ whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0));
+ ubifs_assert(c, !whiteout_ui->dirty);
++
++ memset(&wht_req, 0, sizeof(struct ubifs_budget_req));
++ wht_req.new_ino = 1;
++ wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8);
++ /*
++ * To avoid deadlock between space budget (holds ui_mutex and
++ * waits wb work) and writeback work(waits ui_mutex), do space
++ * budget before ubifs inodes locked.
++ */
++ err = ubifs_budget_space(c, &wht_req);
++ if (err) {
++ /*
++ * Whiteout inode can not be written on flash by
++ * ubifs_jnl_write_inode(), because it's neither
++ * dirty nor zero-nlink.
++ */
++ iput(whiteout);
++ goto out_release;
++ }
++
++ /* Add the old_dentry size to the old_dir size. */
++ old_sz -= CALC_DENT_SIZE(fname_len(&old_nm));
+ }
+
+ lock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+@@ -1416,29 +1476,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+ sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
+ if (unlink && IS_SYNC(new_inode))
+ sync = 1;
+- }
+-
+- if (whiteout) {
+- struct ubifs_budget_req wht_req = { .dirtied_ino = 1,
+- .dirtied_ino_d = \
+- ALIGN(ubifs_inode(whiteout)->data_len, 8) };
+-
+- err = ubifs_budget_space(c, &wht_req);
+- if (err) {
+- kfree(whiteout_ui->data);
+- whiteout_ui->data_len = 0;
+- iput(whiteout);
+- goto out_release;
+- }
+-
+- inc_nlink(whiteout);
+- mark_inode_dirty(whiteout);
+-
+- spin_lock(&whiteout->i_lock);
+- whiteout->i_state &= ~I_LINKABLE;
+- spin_unlock(&whiteout->i_lock);
+-
+- iput(whiteout);
++ /*
++ * S_SYNC flag of whiteout inherits from the old_dir, and we
++ * have already checked the old dir inode. So there is no need
++ * to check whiteout.
++ */
+ }
+
+ err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir,
+@@ -1449,6 +1491,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+ unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+ ubifs_release_budget(c, &req);
+
++ if (whiteout) {
++ ubifs_release_budget(c, &wht_req);
++ iput(whiteout);
++ }
++
+ mutex_lock(&old_inode_ui->ui_mutex);
+ release = old_inode_ui->dirty;
+ mark_inode_dirty_sync(old_inode);
+@@ -1457,11 +1504,16 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+ if (release)
+ ubifs_release_budget(c, &ino_req);
+ if (IS_SYNC(old_inode))
+- err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
++ /*
++ * Rename finished here. Although old inode cannot be updated
++ * on flash, old ctime is not a big problem, don't return err
++ * code to userspace.
++ */
++ old_inode->i_sb->s_op->write_inode(old_inode, NULL);
+
+ fscrypt_free_filename(&old_nm);
+ fscrypt_free_filename(&new_nm);
+- return err;
++ return 0;
+
+ out_cancel:
+ if (unlink) {
+@@ -1482,11 +1534,11 @@ out_cancel:
+ inc_nlink(old_dir);
+ }
+ }
++ unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+ if (whiteout) {
+- drop_nlink(whiteout);
++ ubifs_release_budget(c, &wht_req);
+ iput(whiteout);
+ }
+- unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+ out_release:
+ ubifs_release_budget(c, &ino_req);
+ ubifs_release_budget(c, &req);
+@@ -1520,6 +1572,10 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
+ return err;
+ }
+
++ err = ubifs_budget_space(c, &req);
++ if (err)
++ goto out;
++
+ lock_4_inodes(old_dir, new_dir, NULL, NULL);
+
+ time = current_time(old_dir);
+@@ -1545,6 +1601,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
+ unlock_4_inodes(old_dir, new_dir, NULL, NULL);
+ ubifs_release_budget(c, &req);
+
++out:
+ fscrypt_free_filename(&fst_nm);
+ fscrypt_free_filename(&snd_nm);
+ return err;
+diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
+index 5cfa28cd00cdc..7cc2abcb70ae8 100644
+--- a/fs/ubifs/file.c
++++ b/fs/ubifs/file.c
+@@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
+ }
+
+ if (!PagePrivate(page)) {
+- SetPagePrivate(page);
++ attach_page_private(page, (void *)1);
+ atomic_long_inc(&c->dirty_pg_cnt);
+ __set_page_dirty_nobuffers(page);
+ }
+@@ -947,7 +947,7 @@ static int do_writepage(struct page *page, int len)
+ release_existing_page_budget(c);
+
+ atomic_long_dec(&c->dirty_pg_cnt);
+- ClearPagePrivate(page);
++ detach_page_private(page);
+ ClearPageChecked(page);
+
+ kunmap(page);
+@@ -1031,7 +1031,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
+ if (page->index >= synced_i_size >> PAGE_SHIFT) {
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
+ if (err)
+- goto out_unlock;
++ goto out_redirty;
+ /*
+ * The inode has been written, but the write-buffer has
+ * not been synchronized, so in case of an unclean
+@@ -1059,11 +1059,17 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
+ if (i_size > synced_i_size) {
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
+ if (err)
+- goto out_unlock;
++ goto out_redirty;
+ }
+
+ return do_writepage(page, len);
+-
++out_redirty:
++ /*
++ * redirty_page_for_writepage() won't call ubifs_dirty_inode() because
++ * it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so
++ * there is no need to do space budget for dirty inode.
++ */
++ redirty_page_for_writepage(wbc, page);
+ out_unlock:
+ unlock_page(page);
+ return err;
+@@ -1304,7 +1310,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
+ release_existing_page_budget(c);
+
+ atomic_long_dec(&c->dirty_pg_cnt);
+- ClearPagePrivate(page);
++ detach_page_private(page);
+ ClearPageChecked(page);
+ }
+
+@@ -1471,8 +1477,8 @@ static int ubifs_migrate_page(struct address_space *mapping,
+ return rc;
+
+ if (PagePrivate(page)) {
+- ClearPagePrivate(page);
+- SetPagePrivate(newpage);
++ detach_page_private(page);
++ attach_page_private(newpage, (void *)1);
+ }
+
+ if (mode != MIGRATE_SYNC_NO_COPY)
+@@ -1496,7 +1502,7 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
+ return 0;
+ ubifs_assert(c, PagePrivate(page));
+ ubifs_assert(c, 0);
+- ClearPagePrivate(page);
++ detach_page_private(page);
+ ClearPageChecked(page);
+ return 1;
+ }
+@@ -1567,7 +1573,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
+ else {
+ if (!PageChecked(page))
+ ubifs_convert_page_budget(c);
+- SetPagePrivate(page);
++ attach_page_private(page, (void *)1);
+ atomic_long_inc(&c->dirty_pg_cnt);
+ __set_page_dirty_nobuffers(page);
+ }
+diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
+index 00b61dba62b70..b019dd6f7fa06 100644
+--- a/fs/ubifs/io.c
++++ b/fs/ubifs/io.c
+@@ -833,16 +833,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
+ */
+ n = aligned_len >> c->max_write_shift;
+ if (n) {
+- n <<= c->max_write_shift;
++ int m = n - 1;
++
+ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
+ wbuf->offs);
+- err = ubifs_leb_write(c, wbuf->lnum, buf + written,
+- wbuf->offs, n);
++
++ if (m) {
++ /* '(n-1)<<c->max_write_shift < len' is always true. */
++ m <<= c->max_write_shift;
++ err = ubifs_leb_write(c, wbuf->lnum, buf + written,
++ wbuf->offs, m);
++ if (err)
++ goto out;
++ wbuf->offs += m;
++ aligned_len -= m;
++ len -= m;
++ written += m;
++ }
++
++ /*
++ * The non-written len of buf may be less than 'n' because
++ * parameter 'len' is not 8 bytes aligned, so here we read
++ * min(len, n) bytes from buf.
++ */
++ n = 1 << c->max_write_shift;
++ memcpy(wbuf->buf, buf + written, min(len, n));
++ if (n > len) {
++ ubifs_assert(c, n - len < 8);
++ ubifs_pad(c, wbuf->buf + len, n - len);
++ }
++
++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n);
+ if (err)
+ goto out;
+ wbuf->offs += n;
+ aligned_len -= n;
+- len -= n;
++ len -= min(len, n);
+ written += n;
+ }
+
+diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
+index c6a8634877803..71bcebe45f9c5 100644
+--- a/fs/ubifs/ioctl.c
++++ b/fs/ubifs/ioctl.c
+@@ -108,7 +108,7 @@ static int setflags(struct inode *inode, int flags)
+ struct ubifs_inode *ui = ubifs_inode(inode);
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
+ struct ubifs_budget_req req = { .dirtied_ino = 1,
+- .dirtied_ino_d = ui->data_len };
++ .dirtied_ino_d = ALIGN(ui->data_len, 8) };
+
+ err = ubifs_budget_space(c, &req);
+ if (err)
+diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
+index 8ea680dba61e3..75dab0ae3939d 100644
+--- a/fs/ubifs/journal.c
++++ b/fs/ubifs/journal.c
+@@ -1207,9 +1207,9 @@ out_free:
+ * @sync: non-zero if the write-buffer has to be synchronized
+ *
+ * This function implements the re-name operation which may involve writing up
+- * to 4 inodes and 2 directory entries. It marks the written inodes as clean
+- * and returns zero on success. In case of failure, a negative error code is
+- * returned.
++ * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes)
++ * and 2 directory entries. It marks the written inodes as clean and returns
++ * zero on success. In case of failure, a negative error code is returned.
+ */
+ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ const struct inode *old_inode,
+@@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ void *p;
+ union ubifs_key key;
+ struct ubifs_dent_node *dent, *dent2;
+- int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0;
++ int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0;
+ int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
+ int last_reference = !!(new_inode && new_inode->i_nlink == 0);
+ int move = (old_dir != new_dir);
+- struct ubifs_inode *new_ui;
++ struct ubifs_inode *new_ui, *whiteout_ui;
+ u8 hash_old_dir[UBIFS_HASH_ARR_SZ];
+ u8 hash_new_dir[UBIFS_HASH_ARR_SZ];
+ u8 hash_new_inode[UBIFS_HASH_ARR_SZ];
++ u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ];
+ u8 hash_dent1[UBIFS_HASH_ARR_SZ];
+ u8 hash_dent2[UBIFS_HASH_ARR_SZ];
+
+@@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ } else
+ ilen = 0;
+
++ if (whiteout) {
++ whiteout_ui = ubifs_inode(whiteout);
++ ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex));
++ ubifs_assert(c, whiteout->i_nlink == 1);
++ ubifs_assert(c, !whiteout_ui->dirty);
++ wlen = UBIFS_INO_NODE_SZ;
++ wlen += whiteout_ui->data_len;
++ } else
++ wlen = 0;
++
+ aligned_dlen1 = ALIGN(dlen1, 8);
+ aligned_dlen2 = ALIGN(dlen2, 8);
+- len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
++ len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) +
++ ALIGN(wlen, 8) + ALIGN(plen, 8);
+ if (move)
+ len += plen;
+
+@@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ p += ALIGN(ilen, 8);
+ }
+
++ if (whiteout) {
++ pack_inode(c, p, whiteout, 0);
++ err = ubifs_node_calc_hash(c, p, hash_whiteout_inode);
++ if (err)
++ goto out_release;
++
++ p += ALIGN(wlen, 8);
++ }
++
+ if (!move) {
+ pack_inode(c, p, old_dir, 1);
+ err = ubifs_node_calc_hash(c, p, hash_old_dir);
+@@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ if (new_inode)
+ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
+ new_inode->i_ino);
++ if (whiteout)
++ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
++ whiteout->i_ino);
+ }
+ release_head(c, BASEHD);
+
+@@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm);
+ if (err)
+ goto out_ro;
+-
+- ubifs_delete_orphan(c, whiteout->i_ino);
+ } else {
+ err = ubifs_add_dirt(c, lnum, dlen2);
+ if (err)
+@@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ offs += ALIGN(ilen, 8);
+ }
+
++ if (whiteout) {
++ ino_key_init(c, &key, whiteout->i_ino);
++ err = ubifs_tnc_add(c, &key, lnum, offs, wlen,
++ hash_whiteout_inode);
++ if (err)
++ goto out_ro;
++ offs += ALIGN(wlen, 8);
++ }
++
+ ino_key_init(c, &key, old_dir->i_ino);
+ err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir);
+ if (err)
+@@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+ new_ui->synced_i_size = new_ui->ui_size;
+ spin_unlock(&new_ui->ui_lock);
+ }
++ /*
++ * No need to mark whiteout inode clean.
++ * Whiteout doesn't have non-zero size, no need to update
++ * synced_i_size for whiteout_ui.
++ */
+ mark_inode_clean(c, ubifs_inode(old_dir));
+ if (move)
+ mark_inode_clean(c, ubifs_inode(new_dir));
+diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
+index f0fb25727d961..32c1f428054b7 100644
+--- a/fs/ubifs/super.c
++++ b/fs/ubifs/super.c
+@@ -833,7 +833,7 @@ static int alloc_wbufs(struct ubifs_info *c)
+ INIT_LIST_HEAD(&c->jheads[i].buds_list);
+ err = ubifs_wbuf_init(c, &c->jheads[i].wbuf);
+ if (err)
+- return err;
++ goto out_wbuf;
+
+ c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
+ c->jheads[i].wbuf.jhead = i;
+@@ -841,7 +841,7 @@ static int alloc_wbufs(struct ubifs_info *c)
+ c->jheads[i].log_hash = ubifs_hash_get_desc(c);
+ if (IS_ERR(c->jheads[i].log_hash)) {
+ err = PTR_ERR(c->jheads[i].log_hash);
+- goto out;
++ goto out_log_hash;
+ }
+ }
+
+@@ -854,9 +854,18 @@ static int alloc_wbufs(struct ubifs_info *c)
+
+ return 0;
+
+-out:
+- while (i--)
++out_log_hash:
++ kfree(c->jheads[i].wbuf.buf);
++ kfree(c->jheads[i].wbuf.inodes);
++
++out_wbuf:
++ while (i--) {
++ kfree(c->jheads[i].wbuf.buf);
++ kfree(c->jheads[i].wbuf.inodes);
+ kfree(c->jheads[i].log_hash);
++ }
++ kfree(c->jheads);
++ c->jheads = NULL;
+
+ return err;
+ }
+@@ -1853,7 +1862,6 @@ out:
+ kthread_stop(c->bgt);
+ c->bgt = NULL;
+ }
+- free_wbufs(c);
+ kfree(c->write_reserve_buf);
+ c->write_reserve_buf = NULL;
+ vfree(c->ileb_buf);
+diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
+index 488f3da7a6c6c..6b7d95b65f4b6 100644
+--- a/fs/ubifs/tnc.c
++++ b/fs/ubifs/tnc.c
+@@ -44,6 +44,33 @@ enum {
+ NOT_ON_MEDIA = 3,
+ };
+
++static void do_insert_old_idx(struct ubifs_info *c,
++ struct ubifs_old_idx *old_idx)
++{
++ struct ubifs_old_idx *o;
++ struct rb_node **p, *parent = NULL;
++
++ p = &c->old_idx.rb_node;
++ while (*p) {
++ parent = *p;
++ o = rb_entry(parent, struct ubifs_old_idx, rb);
++ if (old_idx->lnum < o->lnum)
++ p = &(*p)->rb_left;
++ else if (old_idx->lnum > o->lnum)
++ p = &(*p)->rb_right;
++ else if (old_idx->offs < o->offs)
++ p = &(*p)->rb_left;
++ else if (old_idx->offs > o->offs)
++ p = &(*p)->rb_right;
++ else {
++ ubifs_err(c, "old idx added twice!");
++ kfree(old_idx);
++ }
++ }
++ rb_link_node(&old_idx->rb, parent, p);
++ rb_insert_color(&old_idx->rb, &c->old_idx);
++}
++
+ /**
+ * insert_old_idx - record an index node obsoleted since the last commit start.
+ * @c: UBIFS file-system description object
+@@ -69,35 +96,15 @@ enum {
+ */
+ static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
+ {
+- struct ubifs_old_idx *old_idx, *o;
+- struct rb_node **p, *parent = NULL;
++ struct ubifs_old_idx *old_idx;
+
+ old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
+ if (unlikely(!old_idx))
+ return -ENOMEM;
+ old_idx->lnum = lnum;
+ old_idx->offs = offs;
++ do_insert_old_idx(c, old_idx);
+
+- p = &c->old_idx.rb_node;
+- while (*p) {
+- parent = *p;
+- o = rb_entry(parent, struct ubifs_old_idx, rb);
+- if (lnum < o->lnum)
+- p = &(*p)->rb_left;
+- else if (lnum > o->lnum)
+- p = &(*p)->rb_right;
+- else if (offs < o->offs)
+- p = &(*p)->rb_left;
+- else if (offs > o->offs)
+- p = &(*p)->rb_right;
+- else {
+- ubifs_err(c, "old idx added twice!");
+- kfree(old_idx);
+- return 0;
+- }
+- }
+- rb_link_node(&old_idx->rb, parent, p);
+- rb_insert_color(&old_idx->rb, &c->old_idx);
+ return 0;
+ }
+
+@@ -199,23 +206,6 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c,
+ __set_bit(DIRTY_ZNODE, &zn->flags);
+ __clear_bit(COW_ZNODE, &zn->flags);
+
+- ubifs_assert(c, !ubifs_zn_obsolete(znode));
+- __set_bit(OBSOLETE_ZNODE, &znode->flags);
+-
+- if (znode->level != 0) {
+- int i;
+- const int n = zn->child_cnt;
+-
+- /* The children now have new parent */
+- for (i = 0; i < n; i++) {
+- struct ubifs_zbranch *zbr = &zn->zbranch[i];
+-
+- if (zbr->znode)
+- zbr->znode->parent = zn;
+- }
+- }
+-
+- atomic_long_inc(&c->dirty_zn_cnt);
+ return zn;
+ }
+
+@@ -233,6 +223,42 @@ static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt)
+ return ubifs_add_dirt(c, lnum, dirt);
+ }
+
++/**
++ * replace_znode - replace old znode with new znode.
++ * @c: UBIFS file-system description object
++ * @new_zn: new znode
++ * @old_zn: old znode
++ * @zbr: the branch of parent znode
++ *
++ * Replace old znode with new znode in TNC.
++ */
++static void replace_znode(struct ubifs_info *c, struct ubifs_znode *new_zn,
++ struct ubifs_znode *old_zn, struct ubifs_zbranch *zbr)
++{
++ ubifs_assert(c, !ubifs_zn_obsolete(old_zn));
++ __set_bit(OBSOLETE_ZNODE, &old_zn->flags);
++
++ if (old_zn->level != 0) {
++ int i;
++ const int n = new_zn->child_cnt;
++
++ /* The children now have new parent */
++ for (i = 0; i < n; i++) {
++ struct ubifs_zbranch *child = &new_zn->zbranch[i];
++
++ if (child->znode)
++ child->znode->parent = new_zn;
++ }
++ }
++
++ zbr->znode = new_zn;
++ zbr->lnum = 0;
++ zbr->offs = 0;
++ zbr->len = 0;
++
++ atomic_long_inc(&c->dirty_zn_cnt);
++}
++
+ /**
+ * dirty_cow_znode - ensure a znode is not being committed.
+ * @c: UBIFS file-system description object
+@@ -265,21 +291,32 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
+ return zn;
+
+ if (zbr->len) {
+- err = insert_old_idx(c, zbr->lnum, zbr->offs);
+- if (unlikely(err))
+- return ERR_PTR(err);
++ struct ubifs_old_idx *old_idx;
++
++ old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
++ if (unlikely(!old_idx)) {
++ err = -ENOMEM;
++ goto out;
++ }
++ old_idx->lnum = zbr->lnum;
++ old_idx->offs = zbr->offs;
++
+ err = add_idx_dirt(c, zbr->lnum, zbr->len);
+- } else
+- err = 0;
++ if (err) {
++ kfree(old_idx);
++ goto out;
++ }
+
+- zbr->znode = zn;
+- zbr->lnum = 0;
+- zbr->offs = 0;
+- zbr->len = 0;
++ do_insert_old_idx(c, old_idx);
++ }
++
++ replace_znode(c, zn, znode, zbr);
+
+- if (unlikely(err))
+- return ERR_PTR(err);
+ return zn;
++
++out:
++ kfree(zn);
++ return ERR_PTR(err);
+ }
+
+ /**
+@@ -3053,6 +3090,21 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
+ cnext = cnext->cnext;
+ if (ubifs_zn_obsolete(znode))
+ kfree(znode);
++ else if (!ubifs_zn_cow(znode)) {
++ /*
++ * Don't forget to update clean znode count after
++ * committing failed, because ubifs will check this
++ * count while closing tnc. Non-obsolete znode could
++ * be re-dirtied during committing process, so dirty
++ * flag is untrustable. The flag 'COW_ZNODE' is set
++ * for each dirty znode before committing, and it is
++ * cleared as long as the znode become clean, so we
++ * can statistic clean znode count according to this
++ * flag.
++ */
++ atomic_long_inc(&c->clean_zn_cnt);
++ atomic_long_inc(&ubifs_clean_zn_cnt);
++ }
+ } while (cnext && cnext != c->cnext);
+ }
+
+diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
+index c38066ce9ab03..398551bef5986 100644
+--- a/fs/ubifs/ubifs.h
++++ b/fs/ubifs/ubifs.h
+@@ -1594,8 +1594,13 @@ static inline int ubifs_check_hmac(const struct ubifs_info *c,
+ return crypto_memneq(expected, got, c->hmac_desc_len);
+ }
+
++#ifdef CONFIG_UBIFS_FS_AUTHENTICATION
+ void ubifs_bad_hash(const struct ubifs_info *c, const void *node,
+ const u8 *hash, int lnum, int offs);
++#else
++static inline void ubifs_bad_hash(const struct ubifs_info *c, const void *node,
++ const u8 *hash, int lnum, int offs) {};
++#endif
+
+ int __ubifs_node_check_hash(const struct ubifs_info *c, const void *buf,
+ const u8 *expected);
+@@ -1997,7 +2002,7 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
+
+ /* dir.c */
+ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
+- umode_t mode);
++ umode_t mode, bool is_xattr);
+ int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
+ int ubifs_check_dir_empty(struct inode *dir);
+diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
+index e4f193eae4b2b..9ff2614bdeca0 100644
+--- a/fs/ubifs/xattr.c
++++ b/fs/ubifs/xattr.c
+@@ -110,7 +110,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
+ if (err)
+ return err;
+
+- inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO);
++ inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO, true);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_budg;
+diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
+index 8e597db4d9710..f416b7fe092fc 100644
+--- a/fs/udf/balloc.c
++++ b/fs/udf/balloc.c
+@@ -36,18 +36,41 @@ static int read_block_bitmap(struct super_block *sb,
+ unsigned long bitmap_nr)
+ {
+ struct buffer_head *bh = NULL;
+- int retval = 0;
++ int i;
++ int max_bits, off, count;
+ struct kernel_lb_addr loc;
+
+ loc.logicalBlockNum = bitmap->s_extPosition;
+ loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
+
+ bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
++ bitmap->s_block_bitmap[bitmap_nr] = bh;
+ if (!bh)
+- retval = -EIO;
++ return -EIO;
+
+- bitmap->s_block_bitmap[bitmap_nr] = bh;
+- return retval;
++ /* Check consistency of Space Bitmap buffer. */
++ max_bits = sb->s_blocksize * 8;
++ if (!bitmap_nr) {
++ off = sizeof(struct spaceBitmapDesc) << 3;
++ count = min(max_bits - off, bitmap->s_nr_groups);
++ } else {
++ /*
++ * Rough check if bitmap number is too big to have any bitmap
++ * blocks reserved.
++ */
++ if (bitmap_nr >
++ (bitmap->s_nr_groups >> (sb->s_blocksize_bits + 3)) + 2)
++ return 0;
++ off = 0;
++ count = bitmap->s_nr_groups - bitmap_nr * max_bits +
++ (sizeof(struct spaceBitmapDesc) << 3);
++ count = min(count, max_bits);
++ }
++
++ for (i = 0; i < count; i++)
++ if (udf_test_bit(i + off, bh->b_data))
++ return -EFSCORRUPTED;
++ return 0;
+ }
+
+ static int __load_block_bitmap(struct super_block *sb,
+diff --git a/fs/udf/dir.c b/fs/udf/dir.c
+index 70abdfad2df17..42e3e551fa4c3 100644
+--- a/fs/udf/dir.c
++++ b/fs/udf/dir.c
+@@ -31,6 +31,7 @@
+ #include <linux/mm.h>
+ #include <linux/slab.h>
+ #include <linux/bio.h>
++#include <linux/iversion.h>
+
+ #include "udf_i.h"
+ #include "udf_sb.h"
+@@ -43,7 +44,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
+ struct fileIdentDesc *fi = NULL;
+ struct fileIdentDesc cfi;
+ udf_pblk_t block, iblock;
+- loff_t nf_pos;
++ loff_t nf_pos, emit_pos = 0;
+ int flen;
+ unsigned char *fname = NULL, *copy_name = NULL;
+ unsigned char *nameptr;
+@@ -57,6 +58,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
+ int i, num, ret = 0;
+ struct extent_position epos = { NULL, 0, {0, 0} };
+ struct super_block *sb = dir->i_sb;
++ bool pos_valid = false;
+
+ if (ctx->pos == 0) {
+ if (!dir_emit_dot(file, ctx))
+@@ -67,6 +69,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
+ if (nf_pos >= size)
+ goto out;
+
++ /*
++ * Something changed since last readdir (either lseek was called or dir
++ * changed)? We need to verify the position correctly points at the
++ * beginning of some dir entry so that the directory parsing code does
++ * not get confused. Since UDF does not have any reliable way of
++ * identifying beginning of dir entry (names are under user control),
++ * we need to scan the directory from the beginning.
++ */
++ if (!inode_eq_iversion(dir, file->f_version)) {
++ emit_pos = nf_pos;
++ nf_pos = 0;
++ } else {
++ pos_valid = true;
++ }
++
+ fname = kmalloc(UDF_NAME_LEN, GFP_NOFS);
+ if (!fname) {
+ ret = -ENOMEM;
+@@ -122,13 +139,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
+
+ while (nf_pos < size) {
+ struct kernel_lb_addr tloc;
++ loff_t cur_pos = nf_pos;
+
+- ctx->pos = (nf_pos >> 2) + 1;
++ /* Update file position only if we got past the current one */
++ if (nf_pos >= emit_pos) {
++ ctx->pos = (nf_pos >> 2) + 1;
++ pos_valid = true;
++ }
+
+ fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc,
+ &elen, &offset);
+ if (!fi)
+ goto out;
++ /* Still not at offset where user asked us to read from? */
++ if (cur_pos < emit_pos)
++ continue;
+
+ liu = le16_to_cpu(cfi.lengthOfImpUse);
+ lfi = cfi.lengthFileIdent;
+@@ -186,8 +211,11 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
+ } /* end while */
+
+ ctx->pos = (nf_pos >> 2) + 1;
++ pos_valid = true;
+
+ out:
++ if (pos_valid)
++ file->f_version = inode_query_iversion(dir);
+ if (fibh.sbh != fibh.ebh)
+ brelse(fibh.ebh);
+ brelse(fibh.sbh);
+diff --git a/fs/udf/file.c b/fs/udf/file.c
+index 1baff8ddb7544..83410fe1d16c7 100644
+--- a/fs/udf/file.c
++++ b/fs/udf/file.c
+@@ -148,26 +148,24 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ goto out;
+
+ down_write(&iinfo->i_data_sem);
+- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
+- loff_t end = iocb->ki_pos + iov_iter_count(from);
+-
+- if (inode->i_sb->s_blocksize <
+- (udf_file_entry_alloc_offset(inode) + end)) {
+- err = udf_expand_file_adinicb(inode);
+- if (err) {
+- inode_unlock(inode);
+- udf_debug("udf_expand_adinicb: err=%d\n", err);
+- return err;
+- }
+- } else {
+- iinfo->i_lenAlloc = max(end, inode->i_size);
+- up_write(&iinfo->i_data_sem);
++ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB &&
++ inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) +
++ iocb->ki_pos + iov_iter_count(from))) {
++ err = udf_expand_file_adinicb(inode);
++ if (err) {
++ inode_unlock(inode);
++ udf_debug("udf_expand_adinicb: err=%d\n", err);
++ return err;
+ }
+ } else
+ up_write(&iinfo->i_data_sem);
+
+ retval = __generic_file_write_iter(iocb, from);
+ out:
++ down_write(&iinfo->i_data_sem);
++ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB && retval > 0)
++ iinfo->i_lenAlloc = inode->i_size;
++ up_write(&iinfo->i_data_sem);
+ inode_unlock(inode);
+
+ if (retval > 0) {
+diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
+index 2ecf0e87660e3..b5d611cee749c 100644
+--- a/fs/udf/ialloc.c
++++ b/fs/udf/ialloc.c
+@@ -77,6 +77,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
+ GFP_KERNEL);
+ }
+ if (!iinfo->i_data) {
++ make_bad_inode(inode);
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+@@ -86,6 +87,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
+ dinfo->i_location.partitionReferenceNum,
+ start, &err);
+ if (err) {
++ make_bad_inode(inode);
+ iput(inode);
+ return ERR_PTR(err);
+ }
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index 1d6b7a50736ba..da6fb28b4eeac 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -57,15 +57,15 @@ static int udf_update_inode(struct inode *, int);
+ static int udf_sync_inode(struct inode *inode);
+ static int udf_alloc_i_data(struct inode *inode, size_t size);
+ static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
+-static int8_t udf_insert_aext(struct inode *, struct extent_position,
+- struct kernel_lb_addr, uint32_t);
++static int udf_insert_aext(struct inode *, struct extent_position,
++ struct kernel_lb_addr, uint32_t);
+ static void udf_split_extents(struct inode *, int *, int, udf_pblk_t,
+ struct kernel_long_ad *, int *);
+ static void udf_prealloc_extents(struct inode *, int, int,
+ struct kernel_long_ad *, int *);
+ static void udf_merge_extents(struct inode *, struct kernel_long_ad *, int *);
+-static void udf_update_extents(struct inode *, struct kernel_long_ad *, int,
+- int, struct extent_position *);
++static int udf_update_extents(struct inode *, struct kernel_long_ad *, int,
++ int, struct extent_position *);
+ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
+
+ static void __udf_clear_extent_cache(struct inode *inode)
+@@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode)
+ char *kaddr;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+ int err;
+- struct writeback_control udf_wbc = {
+- .sync_mode = WB_SYNC_NONE,
+- .nr_to_write = 1,
+- };
+
+ WARN_ON_ONCE(!inode_is_locked(inode));
+ if (!iinfo->i_lenAlloc) {
+@@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode)
+ iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
+ /* from now on we have normal address_space methods */
+ inode->i_data.a_ops = &udf_aops;
++ set_page_dirty(page);
++ unlock_page(page);
+ up_write(&iinfo->i_data_sem);
+- err = inode->i_data.a_ops->writepage(page, &udf_wbc);
++ err = filemap_fdatawrite(inode->i_mapping);
+ if (err) {
+ /* Restore everything back so that we don't lose data... */
+ lock_page(page);
+@@ -317,6 +315,7 @@ int udf_expand_file_adinicb(struct inode *inode)
+ unlock_page(page);
+ iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
+ inode->i_data.a_ops = &udf_adinicb_aops;
++ iinfo->i_lenAlloc = inode->i_size;
+ up_write(&iinfo->i_data_sem);
+ }
+ put_page(page);
+@@ -439,6 +438,12 @@ static int udf_get_block(struct inode *inode, sector_t block,
+ iinfo->i_next_alloc_goal++;
+ }
+
++ /*
++ * Block beyond EOF and prealloc extents? Just discard preallocation
++ * as it is not useful and complicates things.
++ */
++ if (((loff_t)block) << inode->i_blkbits >= iinfo->i_lenExtents)
++ udf_discard_prealloc(inode);
+ udf_clear_extent_cache(inode);
+ phys = inode_getblk(inode, block, &err, &new);
+ if (!phys)
+@@ -488,8 +493,6 @@ static int udf_do_extend_file(struct inode *inode,
+ uint32_t add;
+ int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+ struct super_block *sb = inode->i_sb;
+- struct kernel_lb_addr prealloc_loc = {};
+- uint32_t prealloc_len = 0;
+ struct udf_inode_info *iinfo;
+ int err;
+
+@@ -510,19 +513,6 @@ static int udf_do_extend_file(struct inode *inode,
+ ~(sb->s_blocksize - 1);
+ }
+
+- /* Last extent are just preallocated blocks? */
+- if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
+- EXT_NOT_RECORDED_ALLOCATED) {
+- /* Save the extent so that we can reattach it to the end */
+- prealloc_loc = last_ext->extLocation;
+- prealloc_len = last_ext->extLength;
+- /* Mark the extent as a hole */
+- last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
+- (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+- last_ext->extLocation.logicalBlockNum = 0;
+- last_ext->extLocation.partitionReferenceNum = 0;
+- }
+-
+ /* Can we merge with the previous extent? */
+ if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
+ EXT_NOT_RECORDED_NOT_ALLOCATED) {
+@@ -535,8 +525,10 @@ static int udf_do_extend_file(struct inode *inode,
+ }
+
+ if (fake) {
+- udf_add_aext(inode, last_pos, &last_ext->extLocation,
+- last_ext->extLength, 1);
++ err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
++ last_ext->extLength, 1);
++ if (err < 0)
++ goto out_err;
+ count++;
+ } else {
+ struct kernel_lb_addr tmploc;
+@@ -550,7 +542,7 @@ static int udf_do_extend_file(struct inode *inode,
+ * more extents, we may need to enter possible following
+ * empty indirect extent.
+ */
+- if (new_block_bytes || prealloc_len)
++ if (new_block_bytes)
+ udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
+ }
+
+@@ -570,7 +562,7 @@ static int udf_do_extend_file(struct inode *inode,
+ err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+ if (err)
+- return err;
++ goto out_err;
+ count++;
+ }
+ if (new_block_bytes) {
+@@ -579,22 +571,11 @@ static int udf_do_extend_file(struct inode *inode,
+ err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+ if (err)
+- return err;
++ goto out_err;
+ count++;
+ }
+
+ out:
+- /* Do we have some preallocated blocks saved? */
+- if (prealloc_len) {
+- err = udf_add_aext(inode, last_pos, &prealloc_loc,
+- prealloc_len, 1);
+- if (err)
+- return err;
+- last_ext->extLocation = prealloc_loc;
+- last_ext->extLength = prealloc_len;
+- count++;
+- }
+-
+ /* last_pos should point to the last written extent... */
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+ last_pos->offset -= sizeof(struct short_ad);
+@@ -604,19 +585,28 @@ out:
+ return -EIO;
+
+ return count;
++out_err:
++ /* Remove extents we've created so far */
++ udf_clear_extent_cache(inode);
++ udf_truncate_extents(inode);
++ return err;
+ }
+
+ /* Extend the final block of the file to final_block_len bytes */
+ static void udf_do_extend_final_block(struct inode *inode,
+ struct extent_position *last_pos,
+ struct kernel_long_ad *last_ext,
+- uint32_t final_block_len)
++ uint32_t new_elen)
+ {
+- struct super_block *sb = inode->i_sb;
+ uint32_t added_bytes;
+
+- added_bytes = final_block_len -
+- (last_ext->extLength & (sb->s_blocksize - 1));
++ /*
++ * Extent already large enough? It may be already rounded up to block
++ * size...
++ */
++ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK))
++ return;
++ added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+ last_ext->extLength += added_bytes;
+ UDF_I(inode)->i_lenExtents += added_bytes;
+
+@@ -633,12 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
+ int8_t etype;
+ struct super_block *sb = inode->i_sb;
+ sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
+- unsigned long partial_final_block;
++ loff_t new_elen;
+ int adsize;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+ struct kernel_long_ad extent;
+ int err = 0;
+- int within_final_block;
++ bool within_last_ext;
+
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+ adsize = sizeof(struct short_ad);
+@@ -647,8 +637,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
+ else
+ BUG();
+
++ /*
++ * When creating hole in file, just don't bother with preserving
++ * preallocation. It likely won't be very useful anyway.
++ */
++ udf_discard_prealloc(inode);
++
+ etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
+- within_final_block = (etype != -1);
++ within_last_ext = (etype != -1);
++ /* We don't expect extents past EOF... */
++ WARN_ON_ONCE(within_last_ext &&
++ elen > ((loff_t)offset + 1) << inode->i_blkbits);
+
+ if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
+ (epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
+@@ -664,19 +663,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
+ extent.extLength |= etype << 30;
+ }
+
+- partial_final_block = newsize & (sb->s_blocksize - 1);
++ new_elen = ((loff_t)offset << inode->i_blkbits) |
++ (newsize & (sb->s_blocksize - 1));
+
+ /* File has extent covering the new size (could happen when extending
+ * inside a block)?
+ */
+- if (within_final_block) {
++ if (within_last_ext) {
+ /* Extending file within the last file block */
+- udf_do_extend_final_block(inode, &epos, &extent,
+- partial_final_block);
++ udf_do_extend_final_block(inode, &epos, &extent, new_elen);
+ } else {
+- loff_t add = ((loff_t)offset << sb->s_blocksize_bits) |
+- partial_final_block;
+- err = udf_do_extend_file(inode, &epos, &extent, add);
++ err = udf_do_extend_file(inode, &epos, &extent, new_elen);
+ }
+
+ if (err < 0)
+@@ -698,7 +695,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
+ struct kernel_lb_addr eloc, tmpeloc;
+ int c = 1;
+ loff_t lbcount = 0, b_off = 0;
+- udf_pblk_t newblocknum, newblock;
++ udf_pblk_t newblocknum, newblock = 0;
+ sector_t offset = 0;
+ int8_t etype;
+ struct udf_inode_info *iinfo = UDF_I(inode);
+@@ -777,10 +774,11 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
+ goto out_free;
+ }
+
+- /* Are we beyond EOF? */
++ /* Are we beyond EOF and preallocated extent? */
+ if (etype == -1) {
+ int ret;
+ loff_t hole_len;
++
+ isBeyondEOF = true;
+ if (count) {
+ if (c)
+@@ -800,25 +798,22 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
+ ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
+ if (ret < 0) {
+ *err = ret;
+- newblock = 0;
+ goto out_free;
+ }
+ c = 0;
+ offset = 0;
+ count += ret;
+- /* We are not covered by a preallocated extent? */
+- if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) !=
+- EXT_NOT_RECORDED_ALLOCATED) {
+- /* Is there any real extent? - otherwise we overwrite
+- * the fake one... */
+- if (count)
+- c = !c;
+- laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
+- inode->i_sb->s_blocksize;
+- memset(&laarr[c].extLocation, 0x00,
+- sizeof(struct kernel_lb_addr));
+- count++;
+- }
++ /*
++ * Is there any real extent? - otherwise we overwrite the fake
++ * one...
++ */
++ if (count)
++ c = !c;
++ laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
++ inode->i_sb->s_blocksize;
++ memset(&laarr[c].extLocation, 0x00,
++ sizeof(struct kernel_lb_addr));
++ count++;
+ endnum = c + 1;
+ lastblock = 1;
+ } else {
+@@ -865,7 +860,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
+ goal, err);
+ if (!newblocknum) {
+ *err = -ENOSPC;
+- newblock = 0;
+ goto out_free;
+ }
+ if (isBeyondEOF)
+@@ -891,7 +885,9 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
+ /* write back the new extents, inserting new extents if the new number
+ * of extents is greater than the old number, and deleting extents if
+ * the new number of extents is less than the old number */
+- udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
++ *err = udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
++ if (*err < 0)
++ goto out_free;
+
+ newblock = udf_get_pblock(inode->i_sb, newblocknum,
+ iinfo->i_location.partitionReferenceNum, 0);
+@@ -1095,23 +1091,8 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
+ blocksize - 1) >> blocksize_bits)))) {
+
+ if (((li->extLength & UDF_EXTENT_LENGTH_MASK) +
+- (lip1->extLength & UDF_EXTENT_LENGTH_MASK) +
+- blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) {
+- lip1->extLength = (lip1->extLength -
+- (li->extLength &
+- UDF_EXTENT_LENGTH_MASK) +
+- UDF_EXTENT_LENGTH_MASK) &
+- ~(blocksize - 1);
+- li->extLength = (li->extLength &
+- UDF_EXTENT_FLAG_MASK) +
+- (UDF_EXTENT_LENGTH_MASK + 1) -
+- blocksize;
+- lip1->extLocation.logicalBlockNum =
+- li->extLocation.logicalBlockNum +
+- ((li->extLength &
+- UDF_EXTENT_LENGTH_MASK) >>
+- blocksize_bits);
+- } else {
++ (lip1->extLength & UDF_EXTENT_LENGTH_MASK) +
++ blocksize - 1) <= UDF_EXTENT_LENGTH_MASK) {
+ li->extLength = lip1->extLength +
+ (((li->extLength &
+ UDF_EXTENT_LENGTH_MASK) +
+@@ -1174,21 +1155,30 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
+ }
+ }
+
+-static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
+- int startnum, int endnum,
+- struct extent_position *epos)
++static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
++ int startnum, int endnum,
++ struct extent_position *epos)
+ {
+ int start = 0, i;
+ struct kernel_lb_addr tmploc;
+ uint32_t tmplen;
++ int err;
+
+ if (startnum > endnum) {
+ for (i = 0; i < (startnum - endnum); i++)
+ udf_delete_aext(inode, *epos);
+ } else if (startnum < endnum) {
+ for (i = 0; i < (endnum - startnum); i++) {
+- udf_insert_aext(inode, *epos, laarr[i].extLocation,
+- laarr[i].extLength);
++ err = udf_insert_aext(inode, *epos,
++ laarr[i].extLocation,
++ laarr[i].extLength);
++ /*
++ * If we fail here, we are likely corrupting the extent
++ * list and leaking blocks. At least stop early to
++ * limit the damage.
++ */
++ if (err < 0)
++ return err;
+ udf_next_aext(inode, epos, &laarr[i].extLocation,
+ &laarr[i].extLength, 1);
+ start++;
+@@ -1200,6 +1190,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr
+ udf_write_aext(inode, epos, &laarr[i].extLocation,
+ laarr[i].extLength, 1);
+ }
++ return 0;
+ }
+
+ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
+@@ -1402,6 +1393,7 @@ reread:
+ ret = -EIO;
+ goto out;
+ }
++ iinfo->i_hidden = hidden_inode;
+ iinfo->i_unique = 0;
+ iinfo->i_lenEAttr = 0;
+ iinfo->i_lenExtents = 0;
+@@ -1737,8 +1729,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
+
+ if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0)
+ fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1);
+- else
+- fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
++ else {
++ if (iinfo->i_hidden)
++ fe->fileLinkCount = cpu_to_le16(0);
++ else
++ fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
++ }
+
+ fe->informationLength = cpu_to_le64(inode->i_size);
+
+@@ -1909,8 +1905,13 @@ struct inode *__udf_iget(struct super_block *sb, struct kernel_lb_addr *ino,
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+- if (!(inode->i_state & I_NEW))
++ if (!(inode->i_state & I_NEW)) {
++ if (UDF_I(inode)->i_hidden != hidden_inode) {
++ iput(inode);
++ return ERR_PTR(-EFSCORRUPTED);
++ }
+ return inode;
++ }
+
+ memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
+ err = udf_read_inode(inode, hidden_inode);
+@@ -2224,12 +2225,13 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
+ return etype;
+ }
+
+-static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
+- struct kernel_lb_addr neloc, uint32_t nelen)
++static int udf_insert_aext(struct inode *inode, struct extent_position epos,
++ struct kernel_lb_addr neloc, uint32_t nelen)
+ {
+ struct kernel_lb_addr oeloc;
+ uint32_t oelen;
+ int8_t etype;
++ int err;
+
+ if (epos.bh)
+ get_bh(epos.bh);
+@@ -2239,10 +2241,10 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
+ neloc = oeloc;
+ nelen = (etype << 30) | oelen;
+ }
+- udf_add_aext(inode, &epos, &neloc, nelen, 1);
++ err = udf_add_aext(inode, &epos, &neloc, nelen, 1);
+ brelse(epos.bh);
+
+- return (nelen >> 30);
++ return err;
+ }
+
+ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
+diff --git a/fs/udf/namei.c b/fs/udf/namei.c
+index caeef08efed23..0e30a50060d9d 100644
+--- a/fs/udf/namei.c
++++ b/fs/udf/namei.c
+@@ -30,6 +30,7 @@
+ #include <linux/sched.h>
+ #include <linux/crc-itu-t.h>
+ #include <linux/exportfs.h>
++#include <linux/iversion.h>
+
+ static inline int udf_match(int len1, const unsigned char *name1, int len2,
+ const unsigned char *name2)
+@@ -74,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
+
+ if (fileident) {
+ if (adinicb || (offset + lfi < 0)) {
+- memcpy(udf_get_fi_ident(sfi), fileident, lfi);
++ memcpy(sfi->impUse + liu, fileident, lfi);
+ } else if (offset >= 0) {
+ memcpy(fibh->ebh->b_data + offset, fileident, lfi);
+ } else {
+- memcpy(udf_get_fi_ident(sfi), fileident, -offset);
++ memcpy(sfi->impUse + liu, fileident, -offset);
+ memcpy(fibh->ebh->b_data, fileident - offset,
+ lfi + offset);
+ }
+@@ -87,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
+ offset += lfi;
+
+ if (adinicb || (offset + padlen < 0)) {
+- memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen);
++ memset(sfi->impUse + liu + lfi, 0x00, padlen);
+ } else if (offset >= 0) {
+ memset(fibh->ebh->b_data + offset, 0x00, padlen);
+ } else {
+- memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset);
++ memset(sfi->impUse + liu + lfi, 0x00, -offset);
+ memset(fibh->ebh->b_data, 0x00, padlen + offset);
+ }
+
+@@ -134,6 +135,8 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
+ mark_buffer_dirty_inode(fibh->ebh, inode);
+ mark_buffer_dirty_inode(fibh->sbh, inode);
+ }
++ inode_inc_iversion(inode);
++
+ return 0;
+ }
+
+@@ -237,7 +240,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
+ poffset - lfi);
+ else {
+ if (!copy_name) {
+- copy_name = kmalloc(UDF_NAME_LEN,
++ copy_name = kmalloc(UDF_NAME_LEN_CS0,
+ GFP_NOFS);
+ if (!copy_name) {
+ fi = ERR_PTR(-ENOMEM);
+@@ -1088,8 +1091,9 @@ static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ return -EINVAL;
+
+ ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
+- if (IS_ERR(ofi)) {
+- retval = PTR_ERR(ofi);
++ if (!ofi || IS_ERR(ofi)) {
++ if (IS_ERR(ofi))
++ retval = PTR_ERR(ofi);
+ goto end_rename;
+ }
+
+@@ -1098,8 +1102,7 @@ static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+
+ brelse(ofibh.sbh);
+ tloc = lelb_to_cpu(ocfi.icb.extLocation);
+- if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0)
+- != old_inode->i_ino)
++ if (udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) != old_inode->i_ino)
+ goto end_rename;
+
+ nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi);
+diff --git a/fs/udf/super.c b/fs/udf/super.c
+index b2d7c57d06881..6b85c66722d3a 100644
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -57,6 +57,7 @@
+ #include <linux/crc-itu-t.h>
+ #include <linux/log2.h>
+ #include <asm/byteorder.h>
++#include <linux/iversion.h>
+
+ #include "udf_sb.h"
+ #include "udf_i.h"
+@@ -146,9 +147,11 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
+ ei->i_next_alloc_goal = 0;
+ ei->i_strat4096 = 0;
+ ei->i_streamdir = 0;
++ ei->i_hidden = 0;
+ init_rwsem(&ei->i_data_sem);
+ ei->cached_extent.lstart = -1;
+ spin_lock_init(&ei->i_extent_cache_lock);
++ inode_set_iversion(&ei->vfs_inode, 1);
+
+ return &ei->vfs_inode;
+ }
+diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
+index 532cda99644ee..036ebd892b852 100644
+--- a/fs/udf/truncate.c
++++ b/fs/udf/truncate.c
+@@ -120,60 +120,42 @@ void udf_truncate_tail_extent(struct inode *inode)
+
+ void udf_discard_prealloc(struct inode *inode)
+ {
+- struct extent_position epos = { NULL, 0, {0, 0} };
++ struct extent_position epos = {};
++ struct extent_position prev_epos = {};
+ struct kernel_lb_addr eloc;
+ uint32_t elen;
+ uint64_t lbcount = 0;
+ int8_t etype = -1, netype;
+- int adsize;
+ struct udf_inode_info *iinfo = UDF_I(inode);
++ int bsize = 1 << inode->i_blkbits;
+
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ||
+- inode->i_size == iinfo->i_lenExtents)
++ ALIGN(inode->i_size, bsize) == ALIGN(iinfo->i_lenExtents, bsize))
+ return;
+
+- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+- adsize = sizeof(struct short_ad);
+- else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
+- adsize = sizeof(struct long_ad);
+- else
+- adsize = 0;
+-
+ epos.block = iinfo->i_location;
+
+ /* Find the last extent in the file */
+- while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
+- etype = netype;
++ while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 0)) != -1) {
++ brelse(prev_epos.bh);
++ prev_epos = epos;
++ if (prev_epos.bh)
++ get_bh(prev_epos.bh);
++
++ etype = udf_next_aext(inode, &epos, &eloc, &elen, 1);
+ lbcount += elen;
+ }
+ if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
+- epos.offset -= adsize;
+ lbcount -= elen;
+- extent_trunc(inode, &epos, &eloc, etype, elen, 0);
+- if (!epos.bh) {
+- iinfo->i_lenAlloc =
+- epos.offset -
+- udf_file_entry_alloc_offset(inode);
+- mark_inode_dirty(inode);
+- } else {
+- struct allocExtDesc *aed =
+- (struct allocExtDesc *)(epos.bh->b_data);
+- aed->lengthAllocDescs =
+- cpu_to_le32(epos.offset -
+- sizeof(struct allocExtDesc));
+- if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
+- UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
+- udf_update_tag(epos.bh->b_data, epos.offset);
+- else
+- udf_update_tag(epos.bh->b_data,
+- sizeof(struct allocExtDesc));
+- mark_buffer_dirty_inode(epos.bh, inode);
+- }
++ udf_delete_aext(inode, prev_epos);
++ udf_free_blocks(inode->i_sb, inode, &eloc, 0,
++ DIV_ROUND_UP(elen, 1 << inode->i_blkbits));
+ }
+ /* This inode entry is in-memory only and thus we don't have to mark
+ * the inode dirty */
+ iinfo->i_lenExtents = lbcount;
+ brelse(epos.bh);
++ brelse(prev_epos.bh);
+ }
+
+ static void udf_update_alloc_ext_desc(struct inode *inode,
+diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
+index 06ff7006b8227..312b7c9ef10e2 100644
+--- a/fs/udf/udf_i.h
++++ b/fs/udf/udf_i.h
+@@ -44,7 +44,8 @@ struct udf_inode_info {
+ unsigned i_use : 1; /* unallocSpaceEntry */
+ unsigned i_strat4096 : 1;
+ unsigned i_streamdir : 1;
+- unsigned reserved : 25;
++ unsigned i_hidden : 1; /* hidden system inode */
++ unsigned reserved : 24;
+ __u8 *i_data;
+ struct kernel_lb_addr i_locStreamdir;
+ __u64 i_lenStreams;
+diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
+index 4fa620543d302..2205859731dc2 100644
+--- a/fs/udf/udf_sb.h
++++ b/fs/udf/udf_sb.h
+@@ -51,6 +51,8 @@
+ #define MF_DUPLICATE_MD 0x01
+ #define MF_MIRROR_FE_LOADED 0x02
+
++#define EFSCORRUPTED EUCLEAN
++
+ struct udf_meta_data {
+ __u32 s_meta_file_loc;
+ __u32 s_mirror_file_loc;
+diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
+index 622569007b530..2142cbd1dde24 100644
+--- a/fs/udf/unicode.c
++++ b/fs/udf/unicode.c
+@@ -247,7 +247,7 @@ static int udf_name_from_CS0(struct super_block *sb,
+ }
+
+ if (translate) {
+- if (str_o_len <= 2 && str_o[0] == '.' &&
++ if (str_o_len > 0 && str_o_len <= 2 && str_o[0] == '.' &&
+ (str_o_len == 1 || str_o[1] == '.'))
+ needsCRC = 1;
+ if (needsCRC) {
+diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
+index 22bf14ab2d163..b56e8e31d967f 100644
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -982,7 +982,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new,
+ int fd;
+
+ fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
+- O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
++ O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
+ if (fd < 0)
+ return fd;
+
+@@ -2097,7 +2097,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
+ mmgrab(ctx->mm);
+
+ fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
+- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
++ O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
+ if (fd < 0) {
+ mmdrop(ctx->mm);
+ kmem_cache_free(userfaultfd_ctx_cachep, ctx);
+diff --git a/fs/verity/enable.c b/fs/verity/enable.c
+index 60a4372aa4d75..dfe8acc32df66 100644
+--- a/fs/verity/enable.c
++++ b/fs/verity/enable.c
+@@ -391,25 +391,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
+ goto out_drop_write;
+
+ err = enable_verity(filp, &arg);
+- if (err)
+- goto out_allow_write_access;
+
+ /*
+- * Some pages of the file may have been evicted from pagecache after
+- * being used in the Merkle tree construction, then read into pagecache
+- * again by another process reading from the file concurrently. Since
+- * these pages didn't undergo verification against the file digest which
+- * fs-verity now claims to be enforcing, we have to wipe the pagecache
+- * to ensure that all future reads are verified.
++ * We no longer drop the inode's pagecache after enabling verity. This
++ * used to be done to try to avoid a race condition where pages could be
++ * evicted after being used in the Merkle tree construction, then
++ * re-instantiated by a concurrent read. Such pages are unverified, and
++ * the backing storage could have filled them with different content, so
++ * they shouldn't be used to fulfill reads once verity is enabled.
++ *
++ * But, dropping the pagecache has a big performance impact, and it
++ * doesn't fully solve the race condition anyway. So for those reasons,
++ * and also because this race condition isn't very important relatively
++ * speaking (especially for small-ish files, where the chance of a page
++ * being used, evicted, *and* re-instantiated all while enabling verity
++ * is quite small), we no longer drop the inode's pagecache.
+ */
+- filemap_write_and_wait(inode->i_mapping);
+- invalidate_inode_pages2(inode->i_mapping);
+
+ /*
+ * allow_write_access() is needed to pair with deny_write_access().
+ * Regardless, the filesystem won't allow writing to verity files.
+ */
+-out_allow_write_access:
+ allow_write_access(filp);
+ out_drop_write:
+ mnt_drop_write_file(filp);
+diff --git a/fs/verity/signature.c b/fs/verity/signature.c
+index 143a530a80088..b59de03055e1e 100644
+--- a/fs/verity/signature.c
++++ b/fs/verity/signature.c
+@@ -54,6 +54,22 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
+ return 0;
+ }
+
++ if (fsverity_keyring->keys.nr_leaves_on_tree == 0) {
++ /*
++ * The ".fs-verity" keyring is empty, due to builtin signatures
++ * being supported by the kernel but not actually being used.
++ * In this case, verify_pkcs7_signature() would always return an
++ * error, usually ENOKEY. It could also be EBADMSG if the
++ * PKCS#7 is malformed, but that isn't very important to
++ * distinguish. So, just skip to ENOKEY to avoid the attack
++ * surface of the PKCS#7 parser, which would otherwise be
++ * reachable by any task able to execute FS_IOC_ENABLE_VERITY.
++ */
++ fsverity_err(inode,
++ "fs-verity keyring is empty, rejecting signed file!");
++ return -ENOKEY;
++ }
++
+ d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+diff --git a/fs/verity/verify.c b/fs/verity/verify.c
+index 0adb970f4e736..10e41883dfa17 100644
+--- a/fs/verity/verify.c
++++ b/fs/verity/verify.c
+@@ -279,15 +279,15 @@ EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
+ int __init fsverity_init_workqueue(void)
+ {
+ /*
+- * Use an unbound workqueue to allow bios to be verified in parallel
+- * even when they happen to complete on the same CPU. This sacrifices
+- * locality, but it's worthwhile since hashing is CPU-intensive.
++ * Use a high-priority workqueue to prioritize verification work, which
++ * blocks reads from completing, over regular application tasks.
+ *
+- * Also use a high-priority workqueue to prioritize verification work,
+- * which blocks reads from completing, over regular application tasks.
++ * For performance reasons, don't use an unbound workqueue. Using an
++ * unbound workqueue for crypto operations causes excessive scheduler
++ * latency on ARM64.
+ */
+ fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
+- WQ_UNBOUND | WQ_HIGHPRI,
++ WQ_HIGHPRI,
+ num_online_cpus());
+ if (!fsverity_read_workqueue)
+ return -ENOMEM;
+diff --git a/fs/xattr.c b/fs/xattr.c
+index 5c8c5175b385c..4c82f271f4aa3 100644
+--- a/fs/xattr.c
++++ b/fs/xattr.c
+@@ -25,6 +25,8 @@
+
+ #include <linux/uaccess.h>
+
++#include "internal.h"
++
+ static const char *
+ strcmp_prefix(const char *a, const char *a_prefix)
+ {
+@@ -539,43 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
+ /*
+ * Extended attribute SET operations
+ */
+-static long
+-setxattr(struct user_namespace *mnt_userns, struct dentry *d,
+- const char __user *name, const void __user *value, size_t size,
+- int flags)
++
++int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
+ {
+ int error;
+- void *kvalue = NULL;
+- char kname[XATTR_NAME_MAX + 1];
+
+- if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
++ if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
+ return -EINVAL;
+
+- error = strncpy_from_user(kname, name, sizeof(kname));
+- if (error == 0 || error == sizeof(kname))
+- error = -ERANGE;
++ error = strncpy_from_user(ctx->kname->name, name,
++ sizeof(ctx->kname->name));
++ if (error == 0 || error == sizeof(ctx->kname->name))
++ return -ERANGE;
+ if (error < 0)
+ return error;
+
+- if (size) {
+- if (size > XATTR_SIZE_MAX)
++ error = 0;
++ if (ctx->size) {
++ if (ctx->size > XATTR_SIZE_MAX)
+ return -E2BIG;
+- kvalue = kvmalloc(size, GFP_KERNEL);
+- if (!kvalue)
+- return -ENOMEM;
+- if (copy_from_user(kvalue, value, size)) {
+- error = -EFAULT;
+- goto out;
++
++ ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
++ if (IS_ERR(ctx->kvalue)) {
++ error = PTR_ERR(ctx->kvalue);
++ ctx->kvalue = NULL;
+ }
+- if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+- (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
+- posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size);
+ }
+
+- error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
+-out:
+- kvfree(kvalue);
++ return error;
++}
++
++static void setxattr_convert(struct user_namespace *mnt_userns,
++ struct dentry *d, struct xattr_ctx *ctx)
++{
++ if (ctx->size &&
++ ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
++ (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
++ posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
++ ctx->kvalue, ctx->size);
++}
++
++int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
++ struct xattr_ctx *ctx)
++{
++ setxattr_convert(mnt_userns, dentry, ctx);
++ return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
++ ctx->kvalue, ctx->size, ctx->flags);
++}
++
++static long
++setxattr(struct user_namespace *mnt_userns, struct dentry *d,
++ const char __user *name, const void __user *value, size_t size,
++ int flags)
++{
++ struct xattr_name kname;
++ struct xattr_ctx ctx = {
++ .cvalue = value,
++ .kvalue = NULL,
++ .size = size,
++ .kname = &kname,
++ .flags = flags,
++ };
++ int error;
++
++ error = setxattr_copy(name, &ctx);
++ if (error)
++ return error;
++
++ error = do_setxattr(mnt_userns, d, &ctx);
+
++ kvfree(ctx.kvalue);
+ return error;
+ }
+
+@@ -667,7 +702,8 @@ getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ if (error > 0) {
+ if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+ (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
+- posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error);
++ posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
++ kvalue, error);
+ if (size && copy_to_user(value, kvalue, error))
+ error = -EFAULT;
+ } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
+@@ -1083,7 +1119,7 @@ static int xattr_list_one(char **buffer, ssize_t *remaining_size,
+ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
+ char *buffer, size_t size)
+ {
+- bool trusted = capable(CAP_SYS_ADMIN);
++ bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+ struct simple_xattr *xattr;
+ ssize_t remaining_size = size;
+ int err = 0;
+diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
+index 005abfd9fd347..aff6fb5281f63 100644
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -173,7 +173,6 @@ __xfs_free_perag(
+ struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+
+ ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
+- ASSERT(atomic_read(&pag->pag_ref) == 0);
+ kmem_free(pag);
+ }
+
+@@ -192,7 +191,7 @@ xfs_free_perag(
+ pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ spin_unlock(&mp->m_perag_lock);
+ ASSERT(pag);
+- ASSERT(atomic_read(&pag->pag_ref) == 0);
++ XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
+
+ cancel_delayed_work_sync(&pag->pag_blockgc_work);
+ xfs_iunlink_destroy(pag);
+diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
+index 4c6f9045baca0..3f597cad2c333 100644
+--- a/fs/xfs/libxfs/xfs_ag.h
++++ b/fs/xfs/libxfs/xfs_ag.h
+@@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag);
+
+ /*
+ * Perag iteration APIs
+- *
+- * XXX: for_each_perag_range() usage really needs an iterator to clean up when
+- * we terminate at end_agno because we may have taken a reference to the perag
+- * beyond end_agno. Right now callers have to be careful to catch and clean that
+- * up themselves. This is not necessary for the callers of for_each_perag() and
+- * for_each_perag_from() because they terminate at sb_agcount where there are
+- * no perag structures in tree beyond end_agno.
+ */
+-#define for_each_perag_range(mp, next_agno, end_agno, pag) \
+- for ((pag) = xfs_perag_get((mp), (next_agno)); \
+- (pag) != NULL && (next_agno) <= (end_agno); \
+- (next_agno) = (pag)->pag_agno + 1, \
+- xfs_perag_put(pag), \
+- (pag) = xfs_perag_get((mp), (next_agno)))
++static inline struct xfs_perag *
++xfs_perag_next(
++ struct xfs_perag *pag,
++ xfs_agnumber_t *agno,
++ xfs_agnumber_t end_agno)
++{
++ struct xfs_mount *mp = pag->pag_mount;
++
++ *agno = pag->pag_agno + 1;
++ xfs_perag_put(pag);
++ if (*agno > end_agno)
++ return NULL;
++ return xfs_perag_get(mp, *agno);
++}
++
++#define for_each_perag_range(mp, agno, end_agno, pag) \
++ for ((pag) = xfs_perag_get((mp), (agno)); \
++ (pag) != NULL; \
++ (pag) = xfs_perag_next((pag), &(agno), (end_agno)))
+
+-#define for_each_perag_from(mp, next_agno, pag) \
+- for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag))
++#define for_each_perag_from(mp, agno, pag) \
++ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
+
+
+ #define for_each_perag(mp, agno, pag) \
+diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
+index fbc9d816882ce..23523b802539e 100644
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -1077,21 +1077,18 @@ xfs_attr_node_hasname(
+
+ state = xfs_da_state_alloc(args);
+ if (statep != NULL)
+- *statep = NULL;
++ *statep = state;
+
+ /*
+ * Search to see if name exists, and get back a pointer to it.
+ */
+ error = xfs_da3_node_lookup_int(state, &retval);
+- if (error) {
+- xfs_da_state_free(state);
+- return error;
+- }
++ if (error)
++ retval = error;
+
+- if (statep != NULL)
+- *statep = state;
+- else
++ if (!statep)
+ xfs_da_state_free(state);
++
+ return retval;
+ }
+
+@@ -1112,7 +1109,7 @@ xfs_attr_node_addname_find_attr(
+ */
+ retval = xfs_attr_node_hasname(args, &dac->da_state);
+ if (retval != -ENOATTR && retval != -EEXIST)
+- return retval;
++ goto error;
+
+ if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
+ goto error;
+@@ -1337,7 +1334,7 @@ int xfs_attr_node_removename_setup(
+
+ error = xfs_attr_node_hasname(args, state);
+ if (error != -EEXIST)
+- return error;
++ goto out;
+ error = 0;
+
+ ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL);
+diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
+index 2983954817135..dffe4ca584935 100644
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -51,6 +51,71 @@ xfs_btree_magic(
+ return magic;
+ }
+
++/*
++ * These sibling pointer checks are optimised for null sibling pointers. This
++ * happens a lot, and we don't need to byte swap at runtime if the sibling
++ * pointer is NULL.
++ *
++ * These are explicitly marked at inline because the cost of calling them as
++ * functions instead of inlining them is about 36 bytes extra code per call site
++ * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
++ * two sibling check functions reduces the compiled code size by over 300
++ * bytes.
++ */
++static inline xfs_failaddr_t
++xfs_btree_check_lblock_siblings(
++ struct xfs_mount *mp,
++ struct xfs_btree_cur *cur,
++ int level,
++ xfs_fsblock_t fsb,
++ __be64 dsibling)
++{
++ xfs_fsblock_t sibling;
++
++ if (dsibling == cpu_to_be64(NULLFSBLOCK))
++ return NULL;
++
++ sibling = be64_to_cpu(dsibling);
++ if (sibling == fsb)
++ return __this_address;
++ if (level >= 0) {
++ if (!xfs_btree_check_lptr(cur, sibling, level + 1))
++ return __this_address;
++ } else {
++ if (!xfs_verify_fsbno(mp, sibling))
++ return __this_address;
++ }
++
++ return NULL;
++}
++
++static inline xfs_failaddr_t
++xfs_btree_check_sblock_siblings(
++ struct xfs_mount *mp,
++ struct xfs_btree_cur *cur,
++ int level,
++ xfs_agnumber_t agno,
++ xfs_agblock_t agbno,
++ __be32 dsibling)
++{
++ xfs_agblock_t sibling;
++
++ if (dsibling == cpu_to_be32(NULLAGBLOCK))
++ return NULL;
++
++ sibling = be32_to_cpu(dsibling);
++ if (sibling == agbno)
++ return __this_address;
++ if (level >= 0) {
++ if (!xfs_btree_check_sptr(cur, sibling, level + 1))
++ return __this_address;
++ } else {
++ if (!xfs_verify_agbno(mp, agno, sibling))
++ return __this_address;
++ }
++ return NULL;
++}
++
+ /*
+ * Check a long btree block header. Return the address of the failing check,
+ * or NULL if everything is ok.
+@@ -65,6 +130,8 @@ __xfs_btree_check_lblock(
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_btnum_t btnum = cur->bc_btnum;
+ int crc = xfs_has_crc(mp);
++ xfs_failaddr_t fa;
++ xfs_fsblock_t fsb = NULLFSBLOCK;
+
+ if (crc) {
+ if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
+@@ -83,16 +150,16 @@ __xfs_btree_check_lblock(
+ if (be16_to_cpu(block->bb_numrecs) >
+ cur->bc_ops->get_maxrecs(cur, level))
+ return __this_address;
+- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
+- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib),
+- level + 1))
+- return __this_address;
+- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
+- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib),
+- level + 1))
+- return __this_address;
+
+- return NULL;
++ if (bp)
++ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
++
++ fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
++ block->bb_u.l.bb_leftsib);
++ if (!fa)
++ fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
++ block->bb_u.l.bb_rightsib);
++ return fa;
+ }
+
+ /* Check a long btree block header. */
+@@ -130,6 +197,9 @@ __xfs_btree_check_sblock(
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_btnum_t btnum = cur->bc_btnum;
+ int crc = xfs_has_crc(mp);
++ xfs_failaddr_t fa;
++ xfs_agblock_t agbno = NULLAGBLOCK;
++ xfs_agnumber_t agno = NULLAGNUMBER;
+
+ if (crc) {
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
+@@ -146,16 +216,18 @@ __xfs_btree_check_sblock(
+ if (be16_to_cpu(block->bb_numrecs) >
+ cur->bc_ops->get_maxrecs(cur, level))
+ return __this_address;
+- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
+- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib),
+- level + 1))
+- return __this_address;
+- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
+- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib),
+- level + 1))
+- return __this_address;
+
+- return NULL;
++ if (bp) {
++ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
++ agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
++ }
++
++ fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno,
++ block->bb_u.s.bb_leftsib);
++ if (!fa)
++ fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno,
++ agbno, block->bb_u.s.bb_rightsib);
++ return fa;
+ }
+
+ /* Check a short btree block header. */
+@@ -373,8 +445,14 @@ xfs_btree_del_cursor(
+ break;
+ }
+
++ /*
++ * If we are doing a BMBT update, the number of unaccounted blocks
++ * allocated during this cursor life time should be zero. If it's not
++ * zero, then we should be shut down or on our way to shutdown due to
++ * cancelling a dirty transaction on error.
++ */
+ ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
+- xfs_is_shutdown(cur->bc_mp));
++ xfs_is_shutdown(cur->bc_mp) || error != 0);
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+ kmem_free(cur->bc_ops);
+ if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag)
+@@ -3188,7 +3266,7 @@ xfs_btree_insrec(
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_buf *bp; /* buffer for block */
+ union xfs_btree_ptr nptr; /* new block ptr */
+- struct xfs_btree_cur *ncur; /* new btree cursor */
++ struct xfs_btree_cur *ncur = NULL; /* new btree cursor */
+ union xfs_btree_key nkey; /* new block key */
+ union xfs_btree_key *lkey;
+ int optr; /* old key/record index */
+@@ -3268,7 +3346,7 @@ xfs_btree_insrec(
+ #ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+- return error;
++ goto error0;
+ #endif
+
+ /*
+@@ -3288,7 +3366,7 @@ xfs_btree_insrec(
+ for (i = numrecs - ptr; i >= 0; i--) {
+ error = xfs_btree_debug_check_ptr(cur, pp, i, level);
+ if (error)
+- return error;
++ goto error0;
+ }
+
+ xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
+@@ -3373,6 +3451,8 @@ xfs_btree_insrec(
+ return 0;
+
+ error0:
++ if (ncur)
++ xfs_btree_del_cursor(ncur, error);
+ return error;
+ }
+
+@@ -4265,6 +4345,21 @@ xfs_btree_visit_block(
+ if (xfs_btree_ptr_is_null(cur, &rptr))
+ return -ENOENT;
+
++ /*
++ * We only visit blocks once in this walk, so we have to avoid the
++ * internal xfs_btree_lookup_get_block() optimisation where it will
++ * return the same block without checking if the right sibling points
++ * back to us and creates a cyclic reference in the btree.
++ */
++ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
++ if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp,
++ xfs_buf_daddr(bp)))
++ return -EFSCORRUPTED;
++ } else {
++ if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp,
++ xfs_buf_daddr(bp)))
++ return -EFSCORRUPTED;
++ }
+ return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
+ }
+
+@@ -4439,20 +4534,21 @@ xfs_btree_lblock_verify(
+ {
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
++ xfs_fsblock_t fsb;
++ xfs_failaddr_t fa;
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+ return __this_address;
+
+ /* sibling pointer verification */
+- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
+- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
+- return __this_address;
+- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
+- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
+- return __this_address;
+-
+- return NULL;
++ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
++ fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
++ block->bb_u.l.bb_leftsib);
++ if (!fa)
++ fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
++ block->bb_u.l.bb_rightsib);
++ return fa;
+ }
+
+ /**
+@@ -4493,7 +4589,9 @@ xfs_btree_sblock_verify(
+ {
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+- xfs_agblock_t agno;
++ xfs_agnumber_t agno;
++ xfs_agblock_t agbno;
++ xfs_failaddr_t fa;
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+@@ -4501,14 +4599,13 @@ xfs_btree_sblock_verify(
+
+ /* sibling pointer verification */
+ agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
+- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
+- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
+- return __this_address;
+- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
+- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
+- return __this_address;
+-
+- return NULL;
++ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
++ fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
++ block->bb_u.s.bb_leftsib);
++ if (!fa)
++ fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
++ block->bb_u.s.bb_rightsib);
++ return fa;
+ }
+
+ /*
+diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
+index ac9e80152b5cf..89c8a1498df1d 100644
+--- a/fs/xfs/libxfs/xfs_btree_staging.c
++++ b/fs/xfs/libxfs/xfs_btree_staging.c
+@@ -662,7 +662,7 @@ xfs_btree_bload_compute_geometry(
+ xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1);
+
+ bbl->nr_records = nr_this_level = nr_records;
+- for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) {
++ for (cur->bc_nlevels = 1; cur->bc_nlevels <= XFS_BTREE_MAXLEVELS;) {
+ uint64_t level_blocks;
+ uint64_t dontcare64;
+ unsigned int level = cur->bc_nlevels - 1;
+@@ -724,7 +724,7 @@ xfs_btree_bload_compute_geometry(
+ nr_this_level = level_blocks;
+ }
+
+- if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS)
++ if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS)
+ return -EOVERFLOW;
+
+ bbl->btree_height = cur->bc_nlevels;
+diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
+index 3932b4ebf9037..f84d3fbb9d3da 100644
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -337,19 +337,36 @@ xfs_dinode_verify_fork(
+ int whichfork)
+ {
+ uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
++ mode_t mode = be16_to_cpu(dip->di_mode);
++ uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork);
++ uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork);
+
+- switch (XFS_DFORK_FORMAT(dip, whichfork)) {
++ /*
++ * For fork types that can contain local data, check that the fork
++ * format matches the size of local data contained within the fork.
++ *
++ * For all types, check that when the size says the should be in extent
++ * or btree format, the inode isn't claiming it is in local format.
++ */
++ if (whichfork == XFS_DATA_FORK) {
++ if (S_ISDIR(mode) || S_ISLNK(mode)) {
++ if (be64_to_cpu(dip->di_size) <= fork_size &&
++ fork_format != XFS_DINODE_FMT_LOCAL)
++ return __this_address;
++ }
++
++ if (be64_to_cpu(dip->di_size) > fork_size &&
++ fork_format == XFS_DINODE_FMT_LOCAL)
++ return __this_address;
++ }
++
++ switch (fork_format) {
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+- * no local regular files yet
++ * No local regular files yet.
+ */
+- if (whichfork == XFS_DATA_FORK) {
+- if (S_ISREG(be16_to_cpu(dip->di_mode)))
+- return __this_address;
+- if (be64_to_cpu(dip->di_size) >
+- XFS_DFORK_SIZE(dip, mp, whichfork))
+- return __this_address;
+- }
++ if (S_ISREG(mode) && whichfork == XFS_DATA_FORK)
++ return __this_address;
+ if (di_nextents)
+ return __this_address;
+ break;
+diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
+index 1d174909f9bdf..20095233d7bc0 100644
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -50,8 +50,13 @@ xfs_init_local_fork(
+ mem_size++;
+
+ if (size) {
++ /*
++ * As we round up the allocation here, we need to ensure the
++ * bytes we don't copy data into are zeroed because the log
++ * vectors still copy them into the journal.
++ */
+ real_size = roundup(mem_size, 4);
+- ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS);
++ ifp->if_u1.if_data = kmem_zalloc(real_size, KM_NOFS);
+ memcpy(ifp->if_u1.if_data, data, size);
+ if (zero_terminate)
+ ifp->if_u1.if_data[size] = '\0';
+@@ -500,10 +505,11 @@ xfs_idata_realloc(
+ /*
+ * For inline data, the underlying buffer must be a multiple of 4 bytes
+ * in size so that it can be logged and stay on word boundaries.
+- * We enforce that here.
++ * We enforce that here, and use __GFP_ZERO to ensure that size
++ * extensions always zero the unused roundup area.
+ */
+ ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4),
+- GFP_NOFS | __GFP_NOFAIL);
++ GFP_NOFS | __GFP_NOFAIL | __GFP_ZERO);
+ ifp->if_bytes = new_size;
+ }
+
+diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
+index e58349be78bd5..26dd9ceb44b42 100644
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -31,18 +31,70 @@
+ */
+
+ /*
+- * We support all XFS versions newer than a v4 superblock with V2 directories.
++ * Check that all the V4 feature bits that the V5 filesystem format requires are
++ * correctly set.
++ */
++static bool
++xfs_sb_validate_v5_features(
++ struct xfs_sb *sbp)
++{
++ /* We must not have any unknown V4 feature bits set */
++ if (sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS)
++ return false;
++
++ /*
++ * The CRC bit is considered an invalid V4 flag, so we have to add it
++ * manually to the OKBITS mask.
++ */
++ if (sbp->sb_features2 & ~(XFS_SB_VERSION2_OKBITS |
++ XFS_SB_VERSION2_CRCBIT))
++ return false;
++
++ /* Now check all the required V4 feature flags are set. */
++
++#define V5_VERS_FLAGS (XFS_SB_VERSION_NLINKBIT | \
++ XFS_SB_VERSION_ALIGNBIT | \
++ XFS_SB_VERSION_LOGV2BIT | \
++ XFS_SB_VERSION_EXTFLGBIT | \
++ XFS_SB_VERSION_DIRV2BIT | \
++ XFS_SB_VERSION_MOREBITSBIT)
++
++#define V5_FEAT_FLAGS (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
++ XFS_SB_VERSION2_ATTR2BIT | \
++ XFS_SB_VERSION2_PROJID32BIT | \
++ XFS_SB_VERSION2_CRCBIT)
++
++ if ((sbp->sb_versionnum & V5_VERS_FLAGS) != V5_VERS_FLAGS)
++ return false;
++ if ((sbp->sb_features2 & V5_FEAT_FLAGS) != V5_FEAT_FLAGS)
++ return false;
++ return true;
++}
++
++/*
++ * We current support XFS v5 formats with known features and v4 superblocks with
++ * at least V2 directories.
+ */
+ bool
+ xfs_sb_good_version(
+ struct xfs_sb *sbp)
+ {
+- /* all v5 filesystems are supported */
++ /*
++ * All v5 filesystems are supported, but we must check that all the
++ * required v4 feature flags are enabled correctly as the code checks
++ * those flags and not for v5 support.
++ */
+ if (xfs_sb_is_v5(sbp))
+- return true;
++ return xfs_sb_validate_v5_features(sbp);
+
+ /* versions prior to v4 are not supported */
+- if (XFS_SB_VERSION_NUM(sbp) < XFS_SB_VERSION_4)
++ if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_4)
++ return false;
++
++ /* We must not have any unknown v4 feature bits set */
++ if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
++ ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
++ (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
+ return false;
+
+ /* V4 filesystems need v2 directories and unwritten extents */
+@@ -51,12 +103,6 @@ xfs_sb_good_version(
+ if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
+ return false;
+
+- /* And must not have any unknown v4 feature bits set */
+- if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
+- ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
+- (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
+- return false;
+-
+ /* It's a supported v4 filesystem */
+ return true;
+ }
+@@ -70,6 +116,8 @@ xfs_sb_version_to_features(
+ /* optional V4 features */
+ if (sbp->sb_rblocks > 0)
+ features |= XFS_FEAT_REALTIME;
++ if (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)
++ features |= XFS_FEAT_NLINK;
+ if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)
+ features |= XFS_FEAT_ATTR;
+ if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT)
+@@ -262,12 +310,15 @@ xfs_validate_sb_common(
+ bool has_dalign;
+
+ if (!xfs_verify_magic(bp, dsb->sb_magicnum)) {
+- xfs_warn(mp, "bad magic number");
++ xfs_warn(mp,
++"Superblock has bad magic number 0x%x. Not an XFS filesystem?",
++ be32_to_cpu(dsb->sb_magicnum));
+ return -EWRONGFS;
+ }
+
+ if (!xfs_sb_good_version(sbp)) {
+- xfs_warn(mp, "bad version");
++ xfs_warn(mp,
++"Superblock has unknown features enabled or corrupted feature masks.");
+ return -EWRONGFS;
+ }
+
+diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
+index 34fc6148032a3..c8c15c3c31471 100644
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -82,6 +82,7 @@ xfs_end_ioend(
+ struct iomap_ioend *ioend)
+ {
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
++ struct xfs_mount *mp = ip->i_mount;
+ xfs_off_t offset = ioend->io_offset;
+ size_t size = ioend->io_size;
+ unsigned int nofs_flag;
+@@ -97,18 +98,26 @@ xfs_end_ioend(
+ /*
+ * Just clean up the in-memory structures if the fs has been shut down.
+ */
+- if (xfs_is_shutdown(ip->i_mount)) {
++ if (xfs_is_shutdown(mp)) {
+ error = -EIO;
+ goto done;
+ }
+
+ /*
+- * Clean up any COW blocks on an I/O error.
++ * Clean up all COW blocks and underlying data fork delalloc blocks on
++ * I/O error. The delalloc punch is required because this ioend was
++ * mapped to blocks in the COW fork and the associated pages are no
++ * longer dirty. If we don't remove delalloc blocks here, they become
++ * stale and can corrupt free space accounting on unmount.
+ */
+ error = blk_status_to_errno(ioend->io_bio->bi_status);
+ if (unlikely(error)) {
+- if (ioend->io_flags & IOMAP_F_SHARED)
++ if (ioend->io_flags & IOMAP_F_SHARED) {
+ xfs_reflink_cancel_cow_range(ip, offset, size, true);
++ xfs_bmap_punch_delalloc_range(ip,
++ XFS_B_TO_FSBT(mp, offset),
++ XFS_B_TO_FSB(mp, size));
++ }
+ goto done;
+ }
+
+diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
+index 667e297f59b16..17f36db2f7928 100644
+--- a/fs/xfs/xfs_bio_io.c
++++ b/fs/xfs/xfs_bio_io.c
+@@ -9,41 +9,6 @@ static inline unsigned int bio_max_vecs(unsigned int count)
+ return bio_max_segs(howmany(count, PAGE_SIZE));
+ }
+
+-static void
+-xfs_flush_bdev_async_endio(
+- struct bio *bio)
+-{
+- complete(bio->bi_private);
+-}
+-
+-/*
+- * Submit a request for an async cache flush to run. If the request queue does
+- * not require flush operations, just skip it altogether. If the caller needs
+- * to wait for the flush completion at a later point in time, they must supply a
+- * valid completion. This will be signalled when the flush completes. The
+- * caller never sees the bio that is issued here.
+- */
+-void
+-xfs_flush_bdev_async(
+- struct bio *bio,
+- struct block_device *bdev,
+- struct completion *done)
+-{
+- struct request_queue *q = bdev->bd_disk->queue;
+-
+- if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
+- complete(done);
+- return;
+- }
+-
+- bio_init(bio, NULL, 0);
+- bio_set_dev(bio, bdev);
+- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+- bio->bi_private = done;
+- bio->bi_end_io = xfs_flush_bdev_async_endio;
+-
+- submit_bio(bio);
+-}
+ int
+ xfs_rw_bdev(
+ struct block_device *bdev,
+diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
+index 03159970133ff..51ffdec5e4faa 100644
+--- a/fs/xfs/xfs_bmap_item.c
++++ b/fs/xfs/xfs_bmap_item.c
+@@ -39,6 +39,7 @@ STATIC void
+ xfs_bui_item_free(
+ struct xfs_bui_log_item *buip)
+ {
++ kmem_free(buip->bui_item.li_lv_shadow);
+ kmem_cache_free(xfs_bui_zone, buip);
+ }
+
+@@ -198,6 +199,7 @@ xfs_bud_item_release(
+ struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+
+ xfs_bui_release(budp->bud_buip);
++ kmem_free(budp->bud_item.li_lv_shadow);
+ kmem_cache_free(xfs_bud_zone, budp);
+ }
+
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index 73a36b7be3bd1..fd2ad6a3019ca 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -851,9 +851,6 @@ xfs_alloc_file_space(
+ rblocks = 0;
+ }
+
+- /*
+- * Allocate and setup the transaction.
+- */
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
+ dblocks, rblocks, false, &tp);
+ if (error)
+@@ -870,9 +867,9 @@ xfs_alloc_file_space(
+ if (error)
+ goto error;
+
+- /*
+- * Complete the transaction
+- */
++ ip->i_diflags |= XFS_DIFLAG_PREALLOC;
++ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
++
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (error)
+diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
+index a476c7ef5d533..e04e44ef14c6d 100644
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -816,7 +816,7 @@ xlog_recover_get_buf_lsn(
+ }
+
+ if (lsn != (xfs_lsn_t)-1) {
+- if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
++ if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
+ goto recover_immediately;
+ return lsn;
+ }
+@@ -934,6 +934,16 @@ xlog_recover_buf_commit_pass2(
+ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+ trace_xfs_log_recover_buf_skip(log, buf_f);
+ xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
++
++ /*
++ * We're skipping replay of this buffer log item due to the log
++ * item LSN being behind the ondisk buffer. Verify the buffer
++ * contents since we aren't going to run the write verifier.
++ */
++ if (bp->b_ops) {
++ bp->b_ops->verify_read(bp);
++ error = bp->b_error;
++ }
+ goto out_release;
+ }
+
+diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
+index 3f8a0713573ad..a4b8caa2c601d 100644
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -482,7 +482,7 @@ xfs_extent_free_finish_item(
+ free->xefi_startblock,
+ free->xefi_blockcount,
+ &free->xefi_oinfo, free->xefi_skip_discard);
+- kmem_free(free);
++ kmem_cache_free(xfs_bmap_free_item_zone, free);
+ return error;
+ }
+
+@@ -502,7 +502,7 @@ xfs_extent_free_cancel_item(
+ struct xfs_extent_free_item *free;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+- kmem_free(free);
++ kmem_cache_free(xfs_bmap_free_item_zone, free);
+ }
+
+ const struct xfs_defer_op_type xfs_extent_free_defer_type = {
+@@ -564,7 +564,7 @@ xfs_agfl_free_finish_item(
+ extp->ext_len = free->xefi_blockcount;
+ efdp->efd_next_extent++;
+
+- kmem_free(free);
++ kmem_cache_free(xfs_bmap_free_item_zone, free);
+ return error;
+ }
+
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
+index 7aa943edfc02f..8cd0c3df253f9 100644
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -95,8 +95,6 @@ xfs_update_prealloc_flags(
+ ip->i_diflags &= ~XFS_DIFLAG_PREALLOC;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+- if (flags & XFS_PREALLOC_SYNC)
+- xfs_trans_set_sync(tp);
+ return xfs_trans_commit(tp);
+ }
+
+@@ -259,7 +257,7 @@ xfs_file_dio_read(
+ ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
+ if (ret)
+ return ret;
+- ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0);
++ ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ return ret;
+@@ -569,7 +567,7 @@ xfs_file_dio_write_aligned(
+ }
+ trace_xfs_file_direct_write(iocb, from);
+ ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
+- &xfs_dio_write_ops, 0);
++ &xfs_dio_write_ops, 0, 0);
+ out_unlock:
+ if (iolock)
+ xfs_iunlock(ip, iolock);
+@@ -647,7 +645,7 @@ retry_exclusive:
+
+ trace_xfs_file_direct_write(iocb, from);
+ ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
+- &xfs_dio_write_ops, flags);
++ &xfs_dio_write_ops, flags, 0);
+
+ /*
+ * Retry unaligned I/O with exclusive blocking semantics if the DIO
+@@ -911,7 +909,6 @@ xfs_file_fallocate(
+ struct inode *inode = file_inode(file);
+ struct xfs_inode *ip = XFS_I(inode);
+ long error;
+- enum xfs_prealloc_flags flags = 0;
+ uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
+ loff_t new_size = 0;
+ bool do_file_insert = false;
+@@ -956,6 +953,10 @@ xfs_file_fallocate(
+ goto out_unlock;
+ }
+
++ error = file_modified(file);
++ if (error)
++ goto out_unlock;
++
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+@@ -1005,8 +1006,6 @@ xfs_file_fallocate(
+ }
+ do_file_insert = true;
+ } else {
+- flags |= XFS_PREALLOC_SET;
+-
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+@@ -1059,13 +1058,6 @@ xfs_file_fallocate(
+ }
+ }
+
+- if (file->f_flags & O_DSYNC)
+- flags |= XFS_PREALLOC_SYNC;
+-
+- error = xfs_update_prealloc_flags(ip, flags);
+- if (error)
+- goto out_unlock;
+-
+ /* Change file size if needed */
+ if (new_size) {
+ struct iattr iattr;
+@@ -1084,8 +1076,14 @@ xfs_file_fallocate(
+ * leave shifted extents past EOF and hence losing access to
+ * the data that is contained within them.
+ */
+- if (do_file_insert)
++ if (do_file_insert) {
+ error = xfs_insert_file_space(ip, offset, len);
++ if (error)
++ goto out_unlock;
++ }
++
++ if (file->f_flags & O_DSYNC)
++ error = xfs_log_force_inode(ip);
+
+ out_unlock:
+ xfs_iunlock(ip, iolock);
+diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
+index 6a3ce0f6dc9e9..be9bcf8a1f991 100644
+--- a/fs/xfs/xfs_filestream.c
++++ b/fs/xfs/xfs_filestream.c
+@@ -128,11 +128,12 @@ xfs_filestream_pick_ag(
+ if (!pag->pagf_init) {
+ err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
+ if (err) {
+- xfs_perag_put(pag);
+- if (err != -EAGAIN)
++ if (err != -EAGAIN) {
++ xfs_perag_put(pag);
+ return err;
++ }
+ /* Couldn't lock the AGF, skip this AG. */
+- continue;
++ goto next_ag;
+ }
+ }
+
+diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
+index 33e26690a8c4f..5b5b68affe66d 100644
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -430,46 +430,36 @@ xfs_reserve_blocks(
+ * If the request is larger than the current reservation, reserve the
+ * blocks before we update the reserve counters. Sample m_fdblocks and
+ * perform a partial reservation if the request exceeds free space.
++ *
++ * The code below estimates how many blocks it can request from
++ * fdblocks to stash in the reserve pool. This is a classic TOCTOU
++ * race since fdblocks updates are not always coordinated via
++ * m_sb_lock. Set the reserve size even if there's not enough free
++ * space to fill it because mod_fdblocks will refill an undersized
++ * reserve when it can.
+ */
+- error = -ENOSPC;
+- do {
+- free = percpu_counter_sum(&mp->m_fdblocks) -
+- mp->m_alloc_set_aside;
+- if (free <= 0)
+- break;
+-
+- delta = request - mp->m_resblks;
+- lcounter = free - delta;
+- if (lcounter < 0)
+- /* We can't satisfy the request, just get what we can */
+- fdblks_delta = free;
+- else
+- fdblks_delta = delta;
+-
++ free = percpu_counter_sum(&mp->m_fdblocks) -
++ xfs_fdblocks_unavailable(mp);
++ delta = request - mp->m_resblks;
++ mp->m_resblks = request;
++ if (delta > 0 && free > 0) {
+ /*
+ * We'll either succeed in getting space from the free block
+- * count or we'll get an ENOSPC. If we get a ENOSPC, it means
+- * things changed while we were calculating fdblks_delta and so
+- * we should try again to see if there is anything left to
+- * reserve.
++ * count or we'll get an ENOSPC. Don't set the reserved flag
++ * here - we don't want to reserve the extra reserve blocks
++ * from the reserve.
+ *
+- * Don't set the reserved flag here - we don't want to reserve
+- * the extra reserve blocks from the reserve.....
++ * The desired reserve size can change after we drop the lock.
++ * Use mod_fdblocks to put the space into the reserve or into
++ * fdblocks as appropriate.
+ */
++ fdblks_delta = min(free, delta);
+ spin_unlock(&mp->m_sb_lock);
+ error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
++ if (!error)
++ xfs_mod_fdblocks(mp, fdblks_delta, 0);
+ spin_lock(&mp->m_sb_lock);
+- } while (error == -ENOSPC);
+-
+- /*
+- * Update the reserve counters if blocks have been successfully
+- * allocated.
+- */
+- if (!error && fdblks_delta) {
+- mp->m_resblks += fdblks_delta;
+- mp->m_resblks_avail += fdblks_delta;
+ }
+-
+ out:
+ if (outval) {
+ outval->resblks = mp->m_resblks;
+diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
+index f2210d927481b..5e44d7bbd8fca 100644
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1872,28 +1872,20 @@ xfs_inodegc_worker(
+ }
+
+ /*
+- * Force all currently queued inode inactivation work to run immediately, and
+- * wait for the work to finish. Two pass - queue all the work first pass, wait
+- * for it in a second pass.
++ * Force all currently queued inode inactivation work to run immediately and
++ * wait for the work to finish.
+ */
+ void
+ xfs_inodegc_flush(
+ struct xfs_mount *mp)
+ {
+- struct xfs_inodegc *gc;
+- int cpu;
+-
+ if (!xfs_is_inodegc_enabled(mp))
+ return;
+
+ trace_xfs_inodegc_flush(mp, __return_address);
+
+ xfs_inodegc_queue_all(mp);
+-
+- for_each_online_cpu(cpu) {
+- gc = per_cpu_ptr(mp->m_inodegc, cpu);
+- flush_work(&gc->work);
+- }
++ flush_workqueue(mp->m_inodegc_wq);
+ }
+
+ /*
+@@ -1904,18 +1896,12 @@ void
+ xfs_inodegc_stop(
+ struct xfs_mount *mp)
+ {
+- struct xfs_inodegc *gc;
+- int cpu;
+-
+ if (!xfs_clear_inodegc_enabled(mp))
+ return;
+
+ xfs_inodegc_queue_all(mp);
++ drain_workqueue(mp->m_inodegc_wq);
+
+- for_each_online_cpu(cpu) {
+- gc = per_cpu_ptr(mp->m_inodegc, cpu);
+- cancel_work_sync(&gc->work);
+- }
+ trace_xfs_inodegc_stop(mp, __return_address);
+ }
+
+diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
+index 017904a34c023..c265ae20946d5 100644
+--- a/fs/xfs/xfs_icreate_item.c
++++ b/fs/xfs/xfs_icreate_item.c
+@@ -63,6 +63,7 @@ STATIC void
+ xfs_icreate_item_release(
+ struct xfs_log_item *lip)
+ {
++ kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow);
+ kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip));
+ }
+
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
+index a4f6f034fb813..b2ea853182141 100644
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -994,8 +994,8 @@ xfs_create(
+ /*
+ * Make sure that we have allocated dquot(s) on disk.
+ */
+- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
+- mapped_fsgid(mnt_userns), prid,
++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
++ mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+ &udqp, &gdqp, &pdqp);
+ if (error)
+@@ -1148,8 +1148,8 @@ xfs_create_tmpfile(
+ /*
+ * Make sure that we have allocated dquot(s) on disk.
+ */
+- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
+- mapped_fsgid(mnt_userns), prid,
++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
++ mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+ &udqp, &gdqp, &pdqp);
+ if (error)
+@@ -1223,7 +1223,7 @@ xfs_link(
+ {
+ xfs_mount_t *mp = tdp->i_mount;
+ xfs_trans_t *tp;
+- int error;
++ int error, nospace_error = 0;
+ int resblks;
+
+ trace_xfs_link(tdp, target_name);
+@@ -1242,19 +1242,11 @@ xfs_link(
+ goto std_return;
+
+ resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
+- if (error == -ENOSPC) {
+- resblks = 0;
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
+- }
++ error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks,
++ &tp, &nospace_error);
+ if (error)
+ goto std_return;
+
+- xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
+-
+- xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
+- xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
+-
+ error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
+ XFS_IEXT_DIR_MANIP_CNT(mp));
+ if (error)
+@@ -1312,6 +1304,8 @@ xfs_link(
+ error_return:
+ xfs_trans_cancel(tp);
+ std_return:
++ if (error == -ENOSPC && nospace_error)
++ error = nospace_error;
+ return error;
+ }
+
+@@ -2605,14 +2599,13 @@ xfs_ifree_cluster(
+ }
+
+ /*
+- * This is called to return an inode to the inode free list.
+- * The inode should already be truncated to 0 length and have
+- * no pages associated with it. This routine also assumes that
+- * the inode is already a part of the transaction.
++ * This is called to return an inode to the inode free list. The inode should
++ * already be truncated to 0 length and have no pages associated with it. This
++ * routine also assumes that the inode is already a part of the transaction.
+ *
+- * The on-disk copy of the inode will have been added to the list
+- * of unlinked inodes in the AGI. We need to remove the inode from
+- * that list atomically with respect to freeing it here.
++ * The on-disk copy of the inode will have been added to the list of unlinked
++ * inodes in the AGI. We need to remove the inode from that list atomically with
++ * respect to freeing it here.
+ */
+ int
+ xfs_ifree(
+@@ -2634,13 +2627,16 @@ xfs_ifree(
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+
+ /*
+- * Pull the on-disk inode from the AGI unlinked list.
++ * Free the inode first so that we guarantee that the AGI lock is going
++ * to be taken before we remove the inode from the unlinked list. This
++ * makes the AGI lock -> unlinked list modification order the same as
++ * used in O_TMPFILE creation.
+ */
+- error = xfs_iunlink_remove(tp, pag, ip);
++ error = xfs_difree(tp, pag, ip->i_ino, &xic);
+ if (error)
+ goto out;
+
+- error = xfs_difree(tp, pag, ip->i_ino, &xic);
++ error = xfs_iunlink_remove(tp, pag, ip);
+ if (error)
+ goto out;
+
+@@ -2761,6 +2757,7 @@ xfs_remove(
+ xfs_mount_t *mp = dp->i_mount;
+ xfs_trans_t *tp = NULL;
+ int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
++ int dontcare;
+ int error = 0;
+ uint resblks;
+
+@@ -2778,31 +2775,24 @@ xfs_remove(
+ goto std_return;
+
+ /*
+- * We try to get the real space reservation first,
+- * allowing for directory btree deletion(s) implying
+- * possible bmap insert(s). If we can't get the space
+- * reservation then we use 0 instead, and avoid the bmap
+- * btree insert(s) in the directory code by, if the bmap
+- * insert tries to happen, instead trimming the LAST
+- * block from the directory.
++ * We try to get the real space reservation first, allowing for
++ * directory btree deletion(s) implying possible bmap insert(s). If we
++ * can't get the space reservation then we use 0 instead, and avoid the
++ * bmap btree insert(s) in the directory code by, if the bmap insert
++ * tries to happen, instead trimming the LAST block from the directory.
++ *
++ * Ignore EDQUOT and ENOSPC being returned via nospace_error because
++ * the directory code can handle a reservationless update and we don't
++ * want to prevent a user from trying to free space by deleting things.
+ */
+ resblks = XFS_REMOVE_SPACE_RES(mp);
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
+- if (error == -ENOSPC) {
+- resblks = 0;
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
+- &tp);
+- }
++ error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks,
++ &tp, &dontcare);
+ if (error) {
+ ASSERT(error != -ENOSPC);
+ goto std_return;
+ }
+
+- xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
+-
+- xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+-
+ /*
+ * If we're removing a directory perform some additional validation.
+ */
+@@ -3115,7 +3105,8 @@ xfs_rename(
+ bool new_parent = (src_dp != target_dp);
+ bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
+ int spaceres;
+- int error;
++ bool retried = false;
++ int error, nospace_error = 0;
+
+ trace_xfs_rename(src_dp, target_dp, src_name, target_name);
+
+@@ -3128,7 +3119,6 @@ xfs_rename(
+ * appropriately.
+ */
+ if (flags & RENAME_WHITEOUT) {
+- ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
+ error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
+ if (error)
+ return error;
+@@ -3140,9 +3130,12 @@ xfs_rename(
+ xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
+ inodes, &num_inodes);
+
++retry:
++ nospace_error = 0;
+ spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
+ if (error == -ENOSPC) {
++ nospace_error = error;
+ spaceres = 0;
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
+ &tp);
+@@ -3196,6 +3189,31 @@ xfs_rename(
+ target_dp, target_name, target_ip,
+ spaceres);
+
++ /*
++ * Try to reserve quota to handle an expansion of the target directory.
++ * We'll allow the rename to continue in reservationless mode if we hit
++ * a space usage constraint. If we trigger reservationless mode, save
++ * the errno if there isn't any free space in the target directory.
++ */
++ if (spaceres != 0) {
++ error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres,
++ 0, false);
++ if (error == -EDQUOT || error == -ENOSPC) {
++ if (!retried) {
++ xfs_trans_cancel(tp);
++ xfs_blockgc_free_quota(target_dp, 0);
++ retried = true;
++ goto retry;
++ }
++
++ nospace_error = error;
++ spaceres = 0;
++ error = 0;
++ }
++ if (error)
++ goto out_trans_cancel;
++ }
++
+ /*
+ * Check for expected errors before we dirty the transaction
+ * so we can return an error without a transaction abort.
+@@ -3442,6 +3460,8 @@ out_trans_cancel:
+ out_release_wip:
+ if (wip)
+ xfs_irele(wip);
++ if (error == -ENOSPC && nospace_error)
++ error = nospace_error;
+ return error;
+ }
+
+diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
+index 0c795dc093efa..bcc3c18c8080b 100644
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -372,7 +372,7 @@ int
+ xfs_ioc_attr_list(
+ struct xfs_inode *dp,
+ void __user *ubuf,
+- int bufsize,
++ size_t bufsize,
+ int flags,
+ struct xfs_attrlist_cursor __user *ucursor)
+ {
+@@ -687,7 +687,8 @@ xfs_ioc_space(
+
+ if (bf->l_start > XFS_ISIZE(ip)) {
+ error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
+- bf->l_start - XFS_ISIZE(ip), 0);
++ bf->l_start - XFS_ISIZE(ip),
++ XFS_BMAPI_PREALLOC);
+ if (error)
+ goto out_unlock;
+ }
+@@ -1544,7 +1545,7 @@ xfs_ioc_getbmap(
+
+ if (bmx.bmv_count < 2)
+ return -EINVAL;
+- if (bmx.bmv_count > ULONG_MAX / recsize)
++ if (bmx.bmv_count >= INT_MAX / recsize)
+ return -ENOMEM;
+
+ buf = kvzalloc(bmx.bmv_count * sizeof(*buf), GFP_KERNEL);
+diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
+index 28453a6d44618..845d3bcab74b4 100644
+--- a/fs/xfs/xfs_ioctl.h
++++ b/fs/xfs/xfs_ioctl.h
+@@ -38,8 +38,9 @@ xfs_readlink_by_handle(
+ int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode,
+ uint32_t opcode, void __user *uname, void __user *value,
+ uint32_t *len, uint32_t flags);
+-int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, int bufsize,
+- int flags, struct xfs_attrlist_cursor __user *ucursor);
++int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf,
++ size_t bufsize, int flags,
++ struct xfs_attrlist_cursor __user *ucursor);
+
+ extern struct dentry *
+ xfs_handle_to_dentry(
+diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
+index a607d6aca5c4d..1eb71275e5b09 100644
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -634,37 +634,6 @@ xfs_vn_getattr(
+ return 0;
+ }
+
+-static void
+-xfs_setattr_mode(
+- struct xfs_inode *ip,
+- struct iattr *iattr)
+-{
+- struct inode *inode = VFS_I(ip);
+- umode_t mode = iattr->ia_mode;
+-
+- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+-
+- inode->i_mode &= S_IFMT;
+- inode->i_mode |= mode & ~S_IFMT;
+-}
+-
+-void
+-xfs_setattr_time(
+- struct xfs_inode *ip,
+- struct iattr *iattr)
+-{
+- struct inode *inode = VFS_I(ip);
+-
+- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+-
+- if (iattr->ia_valid & ATTR_ATIME)
+- inode->i_atime = iattr->ia_atime;
+- if (iattr->ia_valid & ATTR_CTIME)
+- inode->i_ctime = iattr->ia_ctime;
+- if (iattr->ia_valid & ATTR_MTIME)
+- inode->i_mtime = iattr->ia_mtime;
+-}
+-
+ static int
+ xfs_vn_change_ok(
+ struct user_namespace *mnt_userns,
+@@ -763,16 +732,6 @@ xfs_setattr_nonsize(
+ gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+ uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+- /*
+- * CAP_FSETID overrides the following restrictions:
+- *
+- * The set-user-ID and set-group-ID bits of a file will be
+- * cleared upon successful return from chown()
+- */
+- if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
+- !capable(CAP_FSETID))
+- inode->i_mode &= ~(S_ISUID|S_ISGID);
+-
+ /*
+ * Change the ownerships and register quota modifications
+ * in the transaction.
+@@ -784,7 +743,6 @@ xfs_setattr_nonsize(
+ olddquot1 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_udquot, udqp);
+ }
+- inode->i_uid = uid;
+ }
+ if (!gid_eq(igid, gid)) {
+ if (XFS_IS_GQUOTA_ON(mp)) {
+@@ -795,15 +753,10 @@ xfs_setattr_nonsize(
+ olddquot2 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_gdquot, gdqp);
+ }
+- inode->i_gid = gid;
+ }
+ }
+
+- if (mask & ATTR_MODE)
+- xfs_setattr_mode(ip, iattr);
+- if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+- xfs_setattr_time(ip, iattr);
+-
++ setattr_copy(mnt_userns, inode, iattr);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(mp, xs_ig_attrchg);
+@@ -1028,11 +981,8 @@ xfs_setattr_size(
+ xfs_inode_clear_eofblocks_tag(ip);
+ }
+
+- if (iattr->ia_valid & ATTR_MODE)
+- xfs_setattr_mode(ip, iattr);
+- if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+- xfs_setattr_time(ip, iattr);
+-
++ ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
++ setattr_copy(mnt_userns, inode, iattr);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(mp, xs_ig_attrchg);
+diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
+index 2789490560482..6a7909fdf4463 100644
+--- a/fs/xfs/xfs_iops.h
++++ b/fs/xfs/xfs_iops.h
+@@ -13,7 +13,6 @@ extern const struct file_operations xfs_dir_file_operations;
+
+ extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
+
+-extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
+ int xfs_vn_setattr_size(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *vap);
+
+diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
+index c174262a074e3..cb9105d667db4 100644
+--- a/fs/xfs/xfs_linux.h
++++ b/fs/xfs/xfs_linux.h
+@@ -61,6 +61,7 @@ typedef __u32 xfs_nlink_t;
+ #include <linux/ratelimit.h>
+ #include <linux/rhashtable.h>
+ #include <linux/xattr.h>
++#include <linux/mnt_idmapping.h>
+
+ #include <asm/page.h>
+ #include <asm/div64.h>
+@@ -196,8 +197,6 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
+
+ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
+ char *data, unsigned int op);
+-void xfs_flush_bdev_async(struct bio *bio, struct block_device *bdev,
+- struct completion *done);
+
+ #define ASSERT_ALWAYS(expr) \
+ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__))
+diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
+index f6cd2d4aa770d..0fb7d05ca308d 100644
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -487,7 +487,10 @@ out_error:
+ * Run all the pending iclog callbacks and wake log force waiters and iclog
+ * space waiters so they can process the newly set shutdown state. We really
+ * don't care what order we process callbacks here because the log is shut down
+- * and so state cannot change on disk anymore.
++ * and so state cannot change on disk anymore. However, we cannot wake waiters
++ * until the callbacks have been processed because we may be in unmount and
++ * we must ensure that all AIL operations the callbacks perform have completed
++ * before we tear down the AIL.
+ *
+ * We avoid processing actively referenced iclogs so that we don't run callbacks
+ * while the iclog owner might still be preparing the iclog for IO submssion.
+@@ -501,7 +504,6 @@ xlog_state_shutdown_callbacks(
+ struct xlog_in_core *iclog;
+ LIST_HEAD(cb_list);
+
+- spin_lock(&log->l_icloglock);
+ iclog = log->l_iclog;
+ do {
+ if (atomic_read(&iclog->ic_refcnt)) {
+@@ -509,26 +511,22 @@ xlog_state_shutdown_callbacks(
+ continue;
+ }
+ list_splice_init(&iclog->ic_callbacks, &cb_list);
++ spin_unlock(&log->l_icloglock);
++
++ xlog_cil_process_committed(&cb_list);
++
++ spin_lock(&log->l_icloglock);
+ wake_up_all(&iclog->ic_write_wait);
+ wake_up_all(&iclog->ic_force_wait);
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
+
+ wake_up_all(&log->l_flush_wait);
+- spin_unlock(&log->l_icloglock);
+-
+- xlog_cil_process_committed(&cb_list);
+ }
+
+ /*
+ * Flush iclog to disk if this is the last reference to the given iclog and the
+ * it is in the WANT_SYNC state.
+ *
+- * If the caller passes in a non-zero @old_tail_lsn and the current log tail
+- * does not match, there may be metadata on disk that must be persisted before
+- * this iclog is written. To satisfy that requirement, set the
+- * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new
+- * log tail value.
+- *
+ * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the
+ * log tail is updated correctly. NEED_FUA indicates that the iclog will be
+ * written to stable storage, and implies that a commit record is contained
+@@ -545,12 +543,10 @@ xlog_state_shutdown_callbacks(
+ * always capture the tail lsn on the iclog on the first NEED_FUA release
+ * regardless of the number of active reference counts on this iclog.
+ */
+-
+ int
+ xlog_state_release_iclog(
+ struct xlog *log,
+- struct xlog_in_core *iclog,
+- xfs_lsn_t old_tail_lsn)
++ struct xlog_in_core *iclog)
+ {
+ xfs_lsn_t tail_lsn;
+ bool last_ref;
+@@ -561,18 +557,14 @@ xlog_state_release_iclog(
+ /*
+ * Grabbing the current log tail needs to be atomic w.r.t. the writing
+ * of the tail LSN into the iclog so we guarantee that the log tail does
+- * not move between deciding if a cache flush is required and writing
+- * the LSN into the iclog below.
++ * not move between the first time we know that the iclog needs to be
++ * made stable and when we eventually submit it.
+ */
+- if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) {
++ if ((iclog->ic_state == XLOG_STATE_WANT_SYNC ||
++ (iclog->ic_flags & XLOG_ICL_NEED_FUA)) &&
++ !iclog->ic_header.h_tail_lsn) {
+ tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+-
+- if (old_tail_lsn && tail_lsn != old_tail_lsn)
+- iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+-
+- if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) &&
+- !iclog->ic_header.h_tail_lsn)
+- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
++ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ }
+
+ last_ref = atomic_dec_and_test(&iclog->ic_refcnt);
+@@ -583,11 +575,8 @@ xlog_state_release_iclog(
+ * pending iclog callbacks that were waiting on the release of
+ * this iclog.
+ */
+- if (last_ref) {
+- spin_unlock(&log->l_icloglock);
++ if (last_ref)
+ xlog_state_shutdown_callbacks(log);
+- spin_lock(&log->l_icloglock);
+- }
+ return -EIO;
+ }
+
+@@ -600,8 +589,6 @@ xlog_state_release_iclog(
+ }
+
+ iclog->ic_state = XLOG_STATE_SYNCING;
+- if (!iclog->ic_header.h_tail_lsn)
+- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog);
+ trace_xlog_iclog_syncing(iclog, _RET_IP_);
+
+@@ -874,7 +861,7 @@ xlog_force_iclog(
+ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+ if (iclog->ic_state == XLOG_STATE_ACTIVE)
+ xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
+- return xlog_state_release_iclog(iclog->ic_log, iclog, 0);
++ return xlog_state_release_iclog(iclog->ic_log, iclog);
+ }
+
+ /*
+@@ -2412,7 +2399,7 @@ xlog_write_copy_finish(
+ ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
+ xlog_is_shutdown(log));
+ release_iclog:
+- error = xlog_state_release_iclog(log, iclog, 0);
++ error = xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ return error;
+ }
+@@ -2629,7 +2616,7 @@ next_lv:
+
+ spin_lock(&log->l_icloglock);
+ xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
+- error = xlog_state_release_iclog(log, iclog, 0);
++ error = xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+
+ return error;
+@@ -3053,7 +3040,7 @@ restart:
+ * reference to the iclog.
+ */
+ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
+- error = xlog_state_release_iclog(log, iclog, 0);
++ error = xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ if (error)
+ return error;
+@@ -3904,7 +3891,10 @@ xlog_force_shutdown(
+ wake_up_all(&log->l_cilp->xc_start_wait);
+ wake_up_all(&log->l_cilp->xc_commit_wait);
+ spin_unlock(&log->l_cilp->xc_push_lock);
++
++ spin_lock(&log->l_icloglock);
+ xlog_state_shutdown_callbacks(log);
++ spin_unlock(&log->l_icloglock);
+
+ return log_error;
+ }
+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
+index 6c93c8ada6f35..eafe30843ff0f 100644
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -681,11 +681,21 @@ xlog_cil_set_ctx_write_state(
+ * The LSN we need to pass to the log items on transaction
+ * commit is the LSN reported by the first log vector write, not
+ * the commit lsn. If we use the commit record lsn then we can
+- * move the tail beyond the grant write head.
++ * move the grant write head beyond the tail LSN and overwrite
++ * it.
+ */
+ ctx->start_lsn = lsn;
+ wake_up_all(&cil->xc_start_wait);
+ spin_unlock(&cil->xc_push_lock);
++
++ /*
++ * Make sure the metadata we are about to overwrite in the log
++ * has been flushed to stable storage before this iclog is
++ * issued.
++ */
++ spin_lock(&cil->xc_log->l_icloglock);
++ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
++ spin_unlock(&cil->xc_log->l_icloglock);
+ return;
+ }
+
+@@ -864,10 +874,7 @@ xlog_cil_push_work(
+ struct xfs_trans_header thdr;
+ struct xfs_log_iovec lhdr;
+ struct xfs_log_vec lvhdr = { NULL };
+- xfs_lsn_t preflush_tail_lsn;
+ xfs_csn_t push_seq;
+- struct bio bio;
+- DECLARE_COMPLETION_ONSTACK(bdev_flush);
+ bool push_commit_stable;
+
+ new_ctx = xlog_cil_ctx_alloc();
+@@ -937,23 +944,6 @@ xlog_cil_push_work(
+ list_add(&ctx->committing, &cil->xc_committing);
+ spin_unlock(&cil->xc_push_lock);
+
+- /*
+- * The CIL is stable at this point - nothing new will be added to it
+- * because we hold the flush lock exclusively. Hence we can now issue
+- * a cache flush to ensure all the completed metadata in the journal we
+- * are about to overwrite is on stable storage.
+- *
+- * Because we are issuing this cache flush before we've written the
+- * tail lsn to the iclog, we can have metadata IO completions move the
+- * tail forwards between the completion of this flush and the iclog
+- * being written. In this case, we need to re-issue the cache flush
+- * before the iclog write. To detect whether the log tail moves, sample
+- * the tail LSN *before* we issue the flush.
+- */
+- preflush_tail_lsn = atomic64_read(&log->l_tail_lsn);
+- xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev,
+- &bdev_flush);
+-
+ /*
+ * Pull all the log vectors off the items in the CIL, and remove the
+ * items from the CIL. We don't need the CIL lock here because it's only
+@@ -1030,12 +1020,6 @@ xlog_cil_push_work(
+ lvhdr.lv_iovecp = &lhdr;
+ lvhdr.lv_next = ctx->lv_chain;
+
+- /*
+- * Before we format and submit the first iclog, we have to ensure that
+- * the metadata writeback ordering cache flush is complete.
+- */
+- wait_for_completion(&bdev_flush);
+-
+ error = xlog_cil_write_chain(ctx, &lvhdr);
+ if (error)
+ goto out_abort_free_ticket;
+@@ -1094,7 +1078,7 @@ xlog_cil_push_work(
+ if (push_commit_stable &&
+ ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE)
+ xlog_state_switch_iclogs(log, ctx->commit_iclog, 0);
+- xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn);
++ xlog_state_release_iclog(log, ctx->commit_iclog);
+
+ /* Not safe to reference ctx now! */
+
+@@ -1115,7 +1099,7 @@ out_abort_free_ticket:
+ return;
+ }
+ spin_lock(&log->l_icloglock);
+- xlog_state_release_iclog(log, ctx->commit_iclog, 0);
++ xlog_state_release_iclog(log, ctx->commit_iclog);
+ /* Not safe to reference ctx now! */
+ spin_unlock(&log->l_icloglock);
+ }
+@@ -1442,9 +1426,9 @@ out_shutdown:
+ */
+ bool
+ xfs_log_item_in_current_chkpt(
+- struct xfs_log_item *lip)
++ struct xfs_log_item *lip)
+ {
+- struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
++ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp;
+
+ if (list_empty(&lip->li_cil))
+ return false;
+@@ -1454,7 +1438,7 @@ xfs_log_item_in_current_chkpt(
+ * first checkpoint it is written to. Hence if it is different to the
+ * current sequence, we're in a new checkpoint.
+ */
+- return lip->li_seq == ctx->sequence;
++ return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
+ }
+
+ /*
+diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
+index 844fbeec3545a..f3d68ca39f45c 100644
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -524,8 +524,7 @@ void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
+
+ void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog,
+ int eventual_size);
+-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
+- xfs_lsn_t log_tail_lsn);
++int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog);
+
+ /*
+ * When we crack an atomic LSN, we sample it first so that the value will not
+diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
+index 10562ecbd9eac..581aeb288b32b 100644
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -27,7 +27,7 @@
+ #include "xfs_buf_item.h"
+ #include "xfs_ag.h"
+ #include "xfs_quota.h"
+-
++#include "xfs_reflink.h"
+
+ #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
+
+@@ -3502,6 +3502,28 @@ xlog_recover_finish(
+
+ xlog_recover_process_iunlinks(log);
+ xlog_recover_check_summary(log);
++
++ /*
++ * Recover any CoW staging blocks that are still referenced by the
++ * ondisk refcount metadata. During mount there cannot be any live
++ * staging extents as we have not permitted any user modifications.
++ * Therefore, it is safe to free them all right now, even on a
++ * read-only mount.
++ */
++ error = xfs_reflink_recover_cow(log->l_mp);
++ if (error) {
++ xfs_alert(log->l_mp,
++ "Failed to recover leftover CoW staging extents, err %d.",
++ error);
++ /*
++ * If we get an error here, make sure the log is shut down
++ * but return zero so that any log items committed since the
++ * end of intents processing can be pushed through the CIL
++ * and AIL.
++ */
++ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
++ }
++
+ return 0;
+ }
+
+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
+index 06dac09eddbd8..76056de83971c 100644
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -922,15 +922,6 @@ xfs_mountfs(
+ xfs_warn(mp,
+ "Unable to allocate reserve blocks. Continuing without reserve pool.");
+
+- /* Recover any CoW blocks that never got remapped. */
+- error = xfs_reflink_recover_cow(mp);
+- if (error) {
+- xfs_err(mp,
+- "Error %d recovering leftover CoW allocations.", error);
+- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+- goto out_quota;
+- }
+-
+ /* Reserve AG blocks for future btree expansion. */
+ error = xfs_fs_reserve_ag_blocks(mp);
+ if (error && error != -ENOSPC)
+@@ -941,7 +932,6 @@ xfs_mountfs(
+
+ out_agresv:
+ xfs_fs_unreserve_ag_blocks(mp);
+- out_quota:
+ xfs_qm_unmount_quotas(mp);
+ out_rtunmount:
+ xfs_rtunmount_inodes(mp);
+@@ -1142,7 +1132,7 @@ xfs_mod_fdblocks(
+ * problems (i.e. transaction abort, pagecache discards, etc.) than
+ * slightly premature -ENOSPC.
+ */
+- set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
++ set_aside = xfs_fdblocks_unavailable(mp);
+ percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
+ if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
+ XFS_FDBLOCKS_BATCH) >= 0) {
+diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
+index e091f3b3fa158..86564295fce6d 100644
+--- a/fs/xfs/xfs_mount.h
++++ b/fs/xfs/xfs_mount.h
+@@ -478,6 +478,21 @@ extern void xfs_unmountfs(xfs_mount_t *);
+ */
+ #define XFS_FDBLOCKS_BATCH 1024
+
++/*
++ * Estimate the amount of free space that is not available to userspace and is
++ * not explicitly reserved from the incore fdblocks. This includes:
++ *
++ * - The minimum number of blocks needed to support splitting a bmap btree
++ * - The blocks currently in use by the freespace btrees because they record
++ * the actual blocks that will fill per-AG metadata space reservations
++ */
++static inline uint64_t
++xfs_fdblocks_unavailable(
++ struct xfs_mount *mp)
++{
++ return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
++}
++
+ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
+ bool reserved);
+ extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
+diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
+index 5e1d29d8b2e73..3a82a13d880c2 100644
+--- a/fs/xfs/xfs_pnfs.c
++++ b/fs/xfs/xfs_pnfs.c
+@@ -164,10 +164,12 @@ xfs_fs_map_blocks(
+ * that the blocks allocated and handed out to the client are
+ * guaranteed to be present even after a server crash.
+ */
+- error = xfs_update_prealloc_flags(ip,
+- XFS_PREALLOC_SET | XFS_PREALLOC_SYNC);
++ error = xfs_update_prealloc_flags(ip, XFS_PREALLOC_SET);
++ if (!error)
++ error = xfs_log_force_inode(ip);
+ if (error)
+ goto out_unlock;
++
+ } else {
+ xfs_iunlock(ip, lock_flags);
+ }
+@@ -283,7 +285,8 @@ xfs_fs_commit_blocks(
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+- xfs_setattr_time(ip, iattr);
++ ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
++ setattr_copy(&init_user_ns, inode, iattr);
+ if (update_isize) {
+ i_size_write(inode, iattr->ia_size);
+ ip->i_disk_size = iattr->ia_size;
+diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
+index 5608066d6e539..623244650a2f0 100644
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1317,8 +1317,15 @@ xfs_qm_quotacheck(
+
+ error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
+ NULL);
+- if (error)
++ if (error) {
++ /*
++ * The inode walk may have partially populated the dquot
++ * caches. We must purge them before disabling quota and
++ * tearing down the quotainfo, or else the dquots will leak.
++ */
++ xfs_qm_dqpurge_all(mp);
+ goto error_return;
++ }
+
+ /*
+ * We've made all the changes that we need to make incore. Flush them
+diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
+index 46904b793bd48..8ef842d17916a 100644
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -35,6 +35,7 @@ STATIC void
+ xfs_cui_item_free(
+ struct xfs_cui_log_item *cuip)
+ {
++ kmem_free(cuip->cui_item.li_lv_shadow);
+ if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
+ kmem_free(cuip);
+ else
+@@ -204,6 +205,7 @@ xfs_cud_item_release(
+ struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+
+ xfs_cui_release(cudp->cud_cuip);
++ kmem_free(cudp->cud_item.li_lv_shadow);
+ kmem_cache_free(xfs_cud_zone, cudp);
+ }
+
+diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
+index 76355f2934884..36832e4bc803c 100644
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -749,7 +749,10 @@ xfs_reflink_end_cow(
+ }
+
+ /*
+- * Free leftover CoW reservations that didn't get cleaned out.
++ * Free all CoW staging blocks that are still referenced by the ondisk refcount
++ * metadata. The ondisk metadata does not track which inode created the
++ * staging extent, so callers must ensure that there are no cached inodes with
++ * live CoW staging extents.
+ */
+ int
+ xfs_reflink_recover_cow(
+diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
+index 5f06959804678..15e7b01740a77 100644
+--- a/fs/xfs/xfs_rmap_item.c
++++ b/fs/xfs/xfs_rmap_item.c
+@@ -35,6 +35,7 @@ STATIC void
+ xfs_rui_item_free(
+ struct xfs_rui_log_item *ruip)
+ {
++ kmem_free(ruip->rui_item.li_lv_shadow);
+ if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
+ kmem_free(ruip);
+ else
+@@ -227,6 +228,7 @@ xfs_rud_item_release(
+ struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+
+ xfs_rui_release(rudp->rud_ruip);
++ kmem_free(rudp->rud_item.li_lv_shadow);
+ kmem_cache_free(xfs_rud_zone, rudp);
+ }
+
+diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
+index c4e0cd1c1c8ca..df1d6be61bfa3 100644
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -642,7 +642,7 @@ xfs_fs_destroy_inode(
+ static void
+ xfs_fs_dirty_inode(
+ struct inode *inode,
+- int flag)
++ int flags)
+ {
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+@@ -650,7 +650,13 @@ xfs_fs_dirty_inode(
+
+ if (!(inode->i_sb->s_flags & SB_LAZYTIME))
+ return;
+- if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
++
++ /*
++ * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
++ * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
++ * in flags possibly together with I_DIRTY_SYNC.
++ */
++ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
+ return;
+
+ if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+@@ -729,6 +735,7 @@ xfs_fs_sync_fs(
+ int wait)
+ {
+ struct xfs_mount *mp = XFS_M(sb);
++ int error;
+
+ trace_xfs_fs_sync_fs(mp, __return_address);
+
+@@ -738,7 +745,10 @@ xfs_fs_sync_fs(
+ if (!wait)
+ return 0;
+
+- xfs_log_force(mp, XFS_LOG_SYNC);
++ error = xfs_log_force(mp, XFS_LOG_SYNC);
++ if (error)
++ return error;
++
+ if (laptop_mode) {
+ /*
+ * The disk must be active because we're syncing.
+@@ -1738,15 +1748,6 @@ xfs_remount_rw(
+ */
+ xfs_restore_resvblks(mp);
+ xfs_log_work_queue(mp);
+-
+- /* Recover any CoW blocks that never got remapped. */
+- error = xfs_reflink_recover_cow(mp);
+- if (error) {
+- xfs_err(mp,
+- "Error %d recovering leftover CoW allocations.", error);
+- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+- return error;
+- }
+ xfs_blockgc_start(mp);
+
+ /* Create the per-AG metadata reservation pool .*/
+@@ -1764,7 +1765,15 @@ static int
+ xfs_remount_ro(
+ struct xfs_mount *mp)
+ {
+- int error;
++ struct xfs_icwalk icw = {
++ .icw_flags = XFS_ICWALK_FLAG_SYNC,
++ };
++ int error;
++
++ /* Flush all the dirty data to disk. */
++ error = sync_filesystem(mp->m_super);
++ if (error)
++ return error;
+
+ /*
+ * Cancel background eofb scanning so it cannot race with the final
+@@ -1772,8 +1781,13 @@ xfs_remount_ro(
+ */
+ xfs_blockgc_stop(mp);
+
+- /* Get rid of any leftover CoW reservations... */
+- error = xfs_blockgc_free_space(mp, NULL);
++ /*
++ * Clear out all remaining COW staging extents and speculative post-EOF
++ * preallocations so that we don't leave inodes requiring inactivation
++ * cleanups during reclaim on a read-only mount. We must process every
++ * cached inode, so this requires a synchronous cache scan.
++ */
++ error = xfs_blockgc_free_space(mp, &icw);
+ if (error) {
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+@@ -1839,8 +1853,6 @@ xfs_fs_reconfigure(
+ if (error)
+ return error;
+
+- sync_filesystem(mp->m_super);
+-
+ /* inode32 -> inode64 */
+ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
+ mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
+diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
+index fc2c6a4046471..a31d2e5d03214 100644
+--- a/fs/xfs/xfs_symlink.c
++++ b/fs/xfs/xfs_symlink.c
+@@ -184,8 +184,8 @@ xfs_symlink(
+ /*
+ * Make sure that we have allocated dquot(s) on disk.
+ */
+- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
+- mapped_fsgid(mnt_userns), prid,
++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
++ mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+ &udqp, &gdqp, &pdqp);
+ if (error)
+diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
+index 67dec11e34c7e..95c183072e7a2 100644
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -1201,3 +1201,89 @@ out_cancel:
+ xfs_trans_cancel(tp);
+ return error;
+ }
++
++/*
++ * Allocate an transaction, lock and join the directory and child inodes to it,
++ * and reserve quota for a directory update. If there isn't sufficient space,
++ * @dblocks will be set to zero for a reservationless directory update and
++ * @nospace_error will be set to a negative errno describing the space
++ * constraint we hit.
++ *
++ * The caller must ensure that the on-disk dquots attached to this inode have
++ * already been allocated and initialized. The ILOCKs will be dropped when the
++ * transaction is committed or cancelled.
++ */
++int
++xfs_trans_alloc_dir(
++ struct xfs_inode *dp,
++ struct xfs_trans_res *resv,
++ struct xfs_inode *ip,
++ unsigned int *dblocks,
++ struct xfs_trans **tpp,
++ int *nospace_error)
++{
++ struct xfs_trans *tp;
++ struct xfs_mount *mp = ip->i_mount;
++ unsigned int resblks;
++ bool retried = false;
++ int error;
++
++retry:
++ *nospace_error = 0;
++ resblks = *dblocks;
++ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
++ if (error == -ENOSPC) {
++ *nospace_error = error;
++ resblks = 0;
++ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
++ }
++ if (error)
++ return error;
++
++ xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
++
++ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
++ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
++
++ error = xfs_qm_dqattach_locked(dp, false);
++ if (error) {
++ /* Caller should have allocated the dquots! */
++ ASSERT(error != -ENOENT);
++ goto out_cancel;
++ }
++
++ error = xfs_qm_dqattach_locked(ip, false);
++ if (error) {
++ /* Caller should have allocated the dquots! */
++ ASSERT(error != -ENOENT);
++ goto out_cancel;
++ }
++
++ if (resblks == 0)
++ goto done;
++
++ error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
++ if (error == -EDQUOT || error == -ENOSPC) {
++ if (!retried) {
++ xfs_trans_cancel(tp);
++ xfs_blockgc_free_quota(dp, 0);
++ retried = true;
++ goto retry;
++ }
++
++ *nospace_error = error;
++ resblks = 0;
++ error = 0;
++ }
++ if (error)
++ goto out_cancel;
++
++done:
++ *tpp = tp;
++ *dblocks = resblks;
++ return 0;
++
++out_cancel:
++ xfs_trans_cancel(tp);
++ return error;
++}
+diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
+index 50da47f23a077..faba74d4c7026 100644
+--- a/fs/xfs/xfs_trans.h
++++ b/fs/xfs/xfs_trans.h
+@@ -265,6 +265,9 @@ int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv,
+ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
+ struct xfs_trans **tpp);
++int xfs_trans_alloc_dir(struct xfs_inode *dp, struct xfs_trans_res *resv,
++ struct xfs_inode *ip, unsigned int *dblocks,
++ struct xfs_trans **tpp, int *nospace_error);
+
+ static inline void
+ xfs_trans_set_context(
+diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
+index 3872ce6714119..955c457e585a3 100644
+--- a/fs/xfs/xfs_trans_dquot.c
++++ b/fs/xfs/xfs_trans_dquot.c
+@@ -603,7 +603,6 @@ xfs_dqresv_check(
+ return QUOTA_NL_ISOFTLONGWARN;
+ }
+
+- res->warnings++;
+ return QUOTA_NL_ISOFTWARN;
+ }
+
+diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
+index ddc346a9df9ba..9619a68df163f 100644
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -35,6 +35,17 @@ static inline int zonefs_zone_mgmt(struct inode *inode,
+
+ lockdep_assert_held(&zi->i_truncate_mutex);
+
++ /*
++ * With ZNS drives, closing an explicitly open zone that has not been
++ * written will change the zone state to "closed", that is, the zone
++ * will remain active. Since this can then cause failure of explicit
++ * open operation on other zones if the drive active zone resources
++ * are exceeded, make sure that the zone does not remain active by
++ * resetting it.
++ */
++ if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
++ op = REQ_OP_ZONE_RESET;
++
+ trace_zonefs_zone_mgmt(inode, op);
+ ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
+ zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
+@@ -61,15 +72,51 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
+ zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+ }
+
+-static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+- unsigned int flags, struct iomap *iomap,
+- struct iomap *srcmap)
++static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
++ loff_t length, unsigned int flags,
++ struct iomap *iomap, struct iomap *srcmap)
++{
++ struct zonefs_inode_info *zi = ZONEFS_I(inode);
++ struct super_block *sb = inode->i_sb;
++ loff_t isize;
++
++ /*
++ * All blocks are always mapped below EOF. If reading past EOF,
++ * act as if there is a hole up to the file maximum size.
++ */
++ mutex_lock(&zi->i_truncate_mutex);
++ iomap->bdev = inode->i_sb->s_bdev;
++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
++ isize = i_size_read(inode);
++ if (iomap->offset >= isize) {
++ iomap->type = IOMAP_HOLE;
++ iomap->addr = IOMAP_NULL_ADDR;
++ iomap->length = length;
++ } else {
++ iomap->type = IOMAP_MAPPED;
++ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
++ iomap->length = isize - iomap->offset;
++ }
++ mutex_unlock(&zi->i_truncate_mutex);
++
++ trace_zonefs_iomap_begin(inode, iomap);
++
++ return 0;
++}
++
++static const struct iomap_ops zonefs_read_iomap_ops = {
++ .iomap_begin = zonefs_read_iomap_begin,
++};
++
++static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
++ loff_t length, unsigned int flags,
++ struct iomap *iomap, struct iomap *srcmap)
+ {
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ loff_t isize;
+
+- /* All I/Os should always be within the file maximum size */
++ /* All write I/Os should always be within the file maximum size */
+ if (WARN_ON_ONCE(offset + length > zi->i_max_size))
+ return -EIO;
+
+@@ -79,7 +126,7 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ * operation.
+ */
+ if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+- (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)))
++ !(flags & IOMAP_DIRECT)))
+ return -EIO;
+
+ /*
+@@ -88,47 +135,44 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ * write pointer) and unwriten beyond.
+ */
+ mutex_lock(&zi->i_truncate_mutex);
++ iomap->bdev = inode->i_sb->s_bdev;
++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
++ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+ isize = i_size_read(inode);
+- if (offset >= isize)
++ if (iomap->offset >= isize) {
+ iomap->type = IOMAP_UNWRITTEN;
+- else
++ iomap->length = zi->i_max_size - iomap->offset;
++ } else {
+ iomap->type = IOMAP_MAPPED;
+- if (flags & IOMAP_WRITE)
+- length = zi->i_max_size - offset;
+- else
+- length = min(length, isize - offset);
++ iomap->length = isize - iomap->offset;
++ }
+ mutex_unlock(&zi->i_truncate_mutex);
+
+- iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+- iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset;
+- iomap->bdev = inode->i_sb->s_bdev;
+- iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+-
+ trace_zonefs_iomap_begin(inode, iomap);
+
+ return 0;
+ }
+
+-static const struct iomap_ops zonefs_iomap_ops = {
+- .iomap_begin = zonefs_iomap_begin,
++static const struct iomap_ops zonefs_write_iomap_ops = {
++ .iomap_begin = zonefs_write_iomap_begin,
+ };
+
+ static int zonefs_readpage(struct file *unused, struct page *page)
+ {
+- return iomap_readpage(page, &zonefs_iomap_ops);
++ return iomap_readpage(page, &zonefs_read_iomap_ops);
+ }
+
+ static void zonefs_readahead(struct readahead_control *rac)
+ {
+- iomap_readahead(rac, &zonefs_iomap_ops);
++ iomap_readahead(rac, &zonefs_read_iomap_ops);
+ }
+
+ /*
+ * Map blocks for page writeback. This is used only on conventional zone files,
+ * which implies that the page range can only be within the fixed inode size.
+ */
+-static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc,
+- struct inode *inode, loff_t offset)
++static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
++ struct inode *inode, loff_t offset)
+ {
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+@@ -142,12 +186,12 @@ static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc,
+ offset < wpc->iomap.offset + wpc->iomap.length)
+ return 0;
+
+- return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset,
+- IOMAP_WRITE, &wpc->iomap, NULL);
++ return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
++ IOMAP_WRITE, &wpc->iomap, NULL);
+ }
+
+ static const struct iomap_writeback_ops zonefs_writeback_ops = {
+- .map_blocks = zonefs_map_blocks,
++ .map_blocks = zonefs_write_map_blocks,
+ };
+
+ static int zonefs_writepage(struct page *page, struct writeback_control *wbc)
+@@ -177,7 +221,8 @@ static int zonefs_swap_activate(struct swap_info_struct *sis,
+ return -EINVAL;
+ }
+
+- return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops);
++ return iomap_swapfile_activate(sis, swap_file, span,
++ &zonefs_read_iomap_ops);
+ }
+
+ static const struct address_space_operations zonefs_file_aops = {
+@@ -357,6 +402,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ data_size = zonefs_check_zone_condition(inode, zone,
+ false, false);
+ }
++ } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
++ data_size > isize) {
++ /* Do not expose garbage data */
++ data_size = isize;
+ }
+
+ /*
+@@ -403,14 +452,22 @@ static void __zonefs_io_error(struct inode *inode, bool write)
+ struct super_block *sb = inode->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ unsigned int noio_flag;
+- unsigned int nr_zones =
+- zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
++ unsigned int nr_zones = 1;
+ struct zonefs_ioerr_data err = {
+ .inode = inode,
+ .write = write,
+ };
+ int ret;
+
++ /*
++ * The only files that have more than one zone are conventional zone
++ * files with aggregated conventional zones, for which the inode zone
++ * size is always larger than the device zone size.
++ */
++ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
++ nr_zones = zi->i_zone_size >>
++ (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
++
+ /*
+ * Memory allocations in blkdev_report_zones() can trigger a memory
+ * reclaim which may in turn cause a recursion into zonefs as well as
+@@ -596,7 +653,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
+
+ /* Serialize against truncates */
+ filemap_invalidate_lock_shared(inode->i_mapping);
+- ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
++ ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
+
+ sb_end_pagefault(inode->i_sb);
+@@ -678,16 +735,29 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+- unsigned int max;
++ unsigned int max = bdev_max_zone_append_sectors(bdev);
++ pgoff_t start, end;
+ struct bio *bio;
+ ssize_t size;
+ int nr_pages;
+ ssize_t ret;
+
+- max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+ iov_iter_truncate(from, max);
+
++ /*
++ * If the inode block size (zone write granularity) is smaller than the
++ * page size, we may be appending data belonging to the last page of the
++ * inode straddling inode->i_size, with that page already cached due to
++ * a buffered read or readahead. So make sure to invalidate that page.
++ * This will always be a no-op for the case where the block size is
++ * equal to the page size.
++ */
++ start = iocb->ki_pos >> PAGE_SHIFT;
++ end = (iocb->ki_pos + iov_iter_count(from) - 1) >> PAGE_SHIFT;
++ if (invalidate_inode_pages2_range(inode->i_mapping, start, end))
++ return -EBUSY;
++
+ nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
+ if (!nr_pages)
+ return 0;
+@@ -713,6 +783,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+
+ ret = submit_bio_wait(bio);
+
++ /*
++ * If the file zone was written underneath the file system, the zone
++ * write pointer may not be where we expect it to be, but the zone
++ * append write can still succeed. So check manually that we wrote where
++ * we intended to, that is, at zi->i_wpoffset.
++ */
++ if (!ret) {
++ sector_t wpsector =
++ zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
++
++ if (bio->bi_iter.bi_sector != wpsector) {
++ zonefs_warn(inode->i_sb,
++ "Corrupted write pointer %llu for zone at %llu\n",
++ bio->bi_iter.bi_sector, zi->i_zsector);
++ ret = -EIO;
++ }
++ }
++
+ zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+ trace_zonefs_file_dio_append(inode, size, ret);
+
+@@ -851,8 +939,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+ if (append)
+ ret = zonefs_file_dio_append(iocb, from);
+ else
+- ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
+- &zonefs_write_dio_ops, 0);
++ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
++ &zonefs_write_dio_ops, 0, 0);
+ if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+ (ret > 0 || ret == -EIOCBQUEUED)) {
+ if (ret > 0)
+@@ -893,7 +981,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
+ if (ret <= 0)
+ goto inode_unlock;
+
+- ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
++ ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
+ if (ret > 0)
+ iocb->ki_pos += ret;
+ else if (ret == -EIO)
+@@ -986,8 +1074,8 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ goto inode_unlock;
+ }
+ file_accessed(iocb->ki_filp);
+- ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops,
+- &zonefs_read_dio_ops, 0);
++ ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
++ &zonefs_read_dio_ops, 0, 0);
+ } else {
+ ret = generic_file_read_iter(iocb, to);
+ if (ret == -EIO)
+@@ -1144,6 +1232,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
+ inode_init_once(&zi->i_vnode);
+ mutex_init(&zi->i_truncate_mutex);
+ zi->i_wr_refcnt = 0;
++ zi->i_flags = 0;
+
+ return &zi->i_vnode;
+ }
+@@ -1295,12 +1384,13 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
+ inc_nlink(parent);
+ }
+
+-static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+- enum zonefs_ztype type)
++static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
++ enum zonefs_ztype type)
+ {
+ struct super_block *sb = inode->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
++ int ret = 0;
+
+ inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
+ inode->i_mode = S_IFREG | sbi->s_perm;
+@@ -1308,6 +1398,14 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ zi->i_ztype = type;
+ zi->i_zsector = zone->start;
+ zi->i_zone_size = zone->len << SECTOR_SHIFT;
++ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
++ !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
++ zonefs_err(sb,
++ "zone size %llu doesn't match device's zone sectors %llu\n",
++ zi->i_zone_size,
++ bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
++ return -EINVAL;
++ }
+
+ zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
+ zone->capacity << SECTOR_SHIFT);
+@@ -1325,6 +1423,22 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
+ sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
+ sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
++
++ /*
++ * For sequential zones, make sure that any open zone is closed first
++ * to ensure that the initial number of open zones is 0, in sync with
++ * the open zone accounting done when the mount option
++ * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
++ */
++ if (type == ZONEFS_ZTYPE_SEQ &&
++ (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
++ zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
++ mutex_lock(&zi->i_truncate_mutex);
++ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
++ mutex_unlock(&zi->i_truncate_mutex);
++ }
++
++ return ret;
+ }
+
+ static struct dentry *zonefs_create_inode(struct dentry *parent,
+@@ -1334,20 +1448,27 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
+ struct inode *dir = d_inode(parent);
+ struct dentry *dentry;
+ struct inode *inode;
++ int ret = -ENOMEM;
+
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+- return NULL;
++ return ERR_PTR(ret);
+
+ inode = new_inode(parent->d_sb);
+ if (!inode)
+ goto dput;
+
+ inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+- if (zone)
+- zonefs_init_file_inode(inode, zone, type);
+- else
++ if (zone) {
++ ret = zonefs_init_file_inode(inode, zone, type);
++ if (ret) {
++ iput(inode);
++ goto dput;
++ }
++ } else {
+ zonefs_init_dir_inode(dir, inode, type);
++ }
++
+ d_add(dentry, inode);
+ dir->i_size++;
+
+@@ -1356,7 +1477,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
+ dput:
+ dput(dentry);
+
+- return NULL;
++ return ERR_PTR(ret);
+ }
+
+ struct zonefs_zone_data {
+@@ -1376,7 +1497,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
+ struct blk_zone *zone, *next, *end;
+ const char *zgroup_name;
+ char *file_name;
+- struct dentry *dir;
++ struct dentry *dir, *dent;
+ unsigned int n = 0;
+ int ret;
+
+@@ -1394,8 +1515,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
+ zgroup_name = "seq";
+
+ dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
+- if (!dir) {
+- ret = -ENOMEM;
++ if (IS_ERR(dir)) {
++ ret = PTR_ERR(dir);
+ goto free;
+ }
+
+@@ -1441,8 +1562,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
+ * Use the file number within its group as file name.
+ */
+ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
+- if (!zonefs_create_inode(dir, file_name, zone, type)) {
+- ret = -ENOMEM;
++ dent = zonefs_create_inode(dir, file_name, zone, type);
++ if (IS_ERR(dent)) {
++ ret = PTR_ERR(dent);
+ goto free;
+ }
+
+@@ -1658,11 +1780,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
+ sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev);
+ atomic_set(&sbi->s_open_zones, 0);
+- if (!sbi->s_max_open_zones &&
+- sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+- zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
+- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
+- }
+
+ ret = zonefs_read_super(sb);
+ if (ret)
+@@ -1681,6 +1798,12 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ zonefs_info(sb, "Mounting %u zones",
+ blkdev_nr_zones(sb->s_bdev->bd_disk));
+
++ if (!sbi->s_max_open_zones &&
++ sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
++ zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
++ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
++ }
++
+ /* Create root directory inode */
+ ret = -ENOMEM;
+ inode = new_inode(sb);
+@@ -1787,5 +1910,6 @@ static void __exit zonefs_exit(void)
+ MODULE_AUTHOR("Damien Le Moal");
+ MODULE_DESCRIPTION("Zone file system for zoned block devices");
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_FS("zonefs");
+ module_init(zonefs_init);
+ module_exit(zonefs_exit);
+diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
+index 13d93371790ec..7a85ae6b7b005 100644
+--- a/include/acpi/acpi_bus.h
++++ b/include/acpi/acpi_bus.h
+@@ -52,7 +52,7 @@ bool acpi_dock_match(acpi_handle handle);
+ bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs);
+ union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
+ u64 rev, u64 func, union acpi_object *argv4);
+-
++#ifdef CONFIG_ACPI
+ static inline union acpi_object *
+ acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
+ u64 func, union acpi_object *argv4,
+@@ -68,6 +68,7 @@ acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
+
+ return obj;
+ }
++#endif
+
+ #define ACPI_INIT_DSM_ARGV4(cnt, eles) \
+ { \
+@@ -613,9 +614,10 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
+ int acpi_disable_wakeup_device_power(struct acpi_device *dev);
+
+ #ifdef CONFIG_X86
+-bool acpi_device_always_present(struct acpi_device *adev);
++bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status);
+ #else
+-static inline bool acpi_device_always_present(struct acpi_device *adev)
++static inline bool acpi_device_override_status(struct acpi_device *adev,
++ unsigned long long *status)
+ {
+ return false;
+ }
+diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
+index fa02e3ff0faf3..9d45a6001bc00 100644
+--- a/include/acpi/acpixf.h
++++ b/include/acpi/acpixf.h
+@@ -749,6 +749,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
+ acpi_event_status
+ *event_status))
+ ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number))
++ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_hw_disable_all_gpes(void))
+ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void))
+ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void))
+ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void))
+diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
+index 92c71dfce0d5d..cefbb7ad253e0 100644
+--- a/include/acpi/actypes.h
++++ b/include/acpi/actypes.h
+@@ -536,8 +536,14 @@ typedef u64 acpi_integer;
+ * Can be used with access_width of struct acpi_generic_address and access_size of
+ * struct acpi_resource_generic_register.
+ */
+-#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2))
+-#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1))
++#define ACPI_ACCESS_BIT_SHIFT 2
++#define ACPI_ACCESS_BYTE_SHIFT -1
++#define ACPI_ACCESS_BIT_MAX (31 - ACPI_ACCESS_BIT_SHIFT)
++#define ACPI_ACCESS_BYTE_MAX (31 - ACPI_ACCESS_BYTE_SHIFT)
++#define ACPI_ACCESS_BIT_DEFAULT (8 - ACPI_ACCESS_BIT_SHIFT)
++#define ACPI_ACCESS_BYTE_DEFAULT (8 - ACPI_ACCESS_BYTE_SHIFT)
++#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BIT_SHIFT))
++#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT))
+
+ /*******************************************************************************
+ *
+diff --git a/include/acpi/apei.h b/include/acpi/apei.h
+index 680f80960c3dc..a6ac2e8b72da8 100644
+--- a/include/acpi/apei.h
++++ b/include/acpi/apei.h
+@@ -27,14 +27,16 @@ extern int hest_disable;
+ extern int erst_disable;
+ #ifdef CONFIG_ACPI_APEI_GHES
+ extern bool ghes_disable;
++void __init ghes_init(void);
+ #else
+ #define ghes_disable 1
++static inline void ghes_init(void) { }
+ #endif
+
+ #ifdef CONFIG_ACPI_APEI
+ void __init acpi_hest_init(void);
+ #else
+-static inline void acpi_hest_init(void) { return; }
++static inline void acpi_hest_init(void) { }
+ #endif
+
+ typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
+diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
+index bc159a9b4a733..6b14414b9ec12 100644
+--- a/include/acpi/cppc_acpi.h
++++ b/include/acpi/cppc_acpi.h
+@@ -17,7 +17,7 @@
+ #include <acpi/pcc.h>
+ #include <acpi/processor.h>
+
+-/* Support CPPCv2 and CPPCv3 */
++/* CPPCv2 and CPPCv3 support */
+ #define CPPC_V2_REV 2
+ #define CPPC_V3_REV 3
+ #define CPPC_V2_NUM_ENT 21
+diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
+index 34fb3431a8f36..292a5c40bd0c6 100644
+--- a/include/acpi/ghes.h
++++ b/include/acpi/ghes.h
+@@ -71,7 +71,7 @@ int ghes_register_vendor_record_notifier(struct notifier_block *nb);
+ void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
+ #endif
+
+-int ghes_estatus_pool_init(int num_ghes);
++int ghes_estatus_pool_init(unsigned int num_ghes);
+
+ /* From drivers/edac/ghes_edac.c */
+
+diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
+index 3096f086b5a32..71ab4ba9c25d1 100644
+--- a/include/asm-generic/bitops/atomic.h
++++ b/include/asm-generic/bitops/atomic.h
+@@ -39,9 +39,6 @@ arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p)
+ unsigned long mask = BIT_MASK(nr);
+
+ p += BIT_WORD(nr);
+- if (READ_ONCE(*p) & mask)
+- return 1;
+-
+ old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p);
+ return !!(old & mask);
+ }
+@@ -53,9 +50,6 @@ arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p)
+ unsigned long mask = BIT_MASK(nr);
+
+ p += BIT_WORD(nr);
+- if (!(READ_ONCE(*p) & mask))
+- return 0;
+-
+ old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p);
+ return !!(old & mask);
+ }
+diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
+index 0d132ee2a2913..835f959a25f25 100644
+--- a/include/asm-generic/bitops/find.h
++++ b/include/asm-generic/bitops/find.h
+@@ -97,6 +97,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+
+ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+
++#ifndef find_first_bit
+ /**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+@@ -116,7 +117,9 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+
+ return _find_first_bit(addr, size);
+ }
++#endif
+
++#ifndef find_first_zero_bit
+ /**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+@@ -136,6 +139,8 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+
+ return _find_first_zero_bit(addr, size);
+ }
++#endif
++
+ #else /* CONFIG_GENERIC_FIND_FIRST_BIT */
+
+ #ifndef find_first_bit
+diff --git a/include/asm-generic/bugs.h b/include/asm-generic/bugs.h
+deleted file mode 100644
+index 69021830f078d..0000000000000
+--- a/include/asm-generic/bugs.h
++++ /dev/null
+@@ -1,11 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef __ASM_GENERIC_BUGS_H
+-#define __ASM_GENERIC_BUGS_H
+-/*
+- * This file is included by 'init/main.c' to check for
+- * architecture-dependent bugs.
+- */
+-
+-static inline void check_bugs(void) { }
+-
+-#endif /* __ASM_GENERIC_BUGS_H */
+diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
+index 7ce93aaf69f8d..82f2c01accbb9 100644
+--- a/include/asm-generic/io.h
++++ b/include/asm-generic/io.h
+@@ -190,7 +190,7 @@ static inline u64 readq(const volatile void __iomem *addr)
+ u64 val;
+
+ __io_br();
+- val = __le64_to_cpu(__raw_readq(addr));
++ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ __io_ar(val);
+ return val;
+ }
+@@ -233,7 +233,7 @@ static inline void writel(u32 value, volatile void __iomem *addr)
+ static inline void writeq(u64 value, volatile void __iomem *addr)
+ {
+ __io_bw();
+- __raw_writeq(__cpu_to_le64(value), addr);
++ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ __io_aw();
+ }
+ #endif
+@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
+ }
+ #endif
+
+-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
+ extern int devmem_is_allowed(unsigned long pfn);
+-#endif
+
+ #endif /* __KERNEL__ */
+
+diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
+index d16302d3eb597..72f1e2a8c1670 100644
+--- a/include/asm-generic/sections.h
++++ b/include/asm-generic/sections.h
+@@ -114,7 +114,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt,
+ /**
+ * memory_intersects - checks if the region occupied by an object intersects
+ * with another memory region
+- * @begin: virtual address of the beginning of the memory regien
++ * @begin: virtual address of the beginning of the memory region
+ * @end: virtual address of the end of the memory region
+ * @virt: virtual address of the memory object
+ * @size: size of the memory object
+@@ -127,7 +127,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt,
+ {
+ void *vend = virt + size;
+
+- return (virt >= begin && virt < end) || (vend >= begin && vend < end);
++ if (virt < end && vend > begin)
++ return true;
++
++ return false;
+ }
+
+ /**
+diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
+index 2c68a545ffa7d..c99710b3027a0 100644
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -207,12 +207,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+ #define tlb_needs_table_invalidate() (true)
+ #endif
+
++void tlb_remove_table_sync_one(void);
++
+ #else
+
+ #ifdef tlb_needs_table_invalidate
+ #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
+ #endif
+
++static inline void tlb_remove_table_sync_one(void) { }
++
+ #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+
+
+@@ -565,10 +569,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
+ #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
+ do { \
+ unsigned long _sz = huge_page_size(h); \
+- if (_sz == PMD_SIZE) \
+- tlb_flush_pmd_range(tlb, address, _sz); \
+- else if (_sz == PUD_SIZE) \
++ if (_sz >= P4D_SIZE) \
++ tlb_flush_p4d_range(tlb, address, _sz); \
++ else if (_sz >= PUD_SIZE) \
+ tlb_flush_pud_range(tlb, address, _sz); \
++ else if (_sz >= PMD_SIZE) \
++ tlb_flush_pmd_range(tlb, address, _sz); \
++ else \
++ tlb_flush_pte_range(tlb, address, _sz); \
+ __tlb_remove_tlb_entry(tlb, ptep, address); \
+ } while (0)
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index f2984af2b85bd..8471717c5085a 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -337,6 +337,7 @@
+ #define DATA_DATA \
+ *(.xiptext) \
+ *(DATA_MAIN) \
++ *(.data..decrypted) \
+ *(.ref.data) \
+ *(.data..shared_aligned) /* percpu related */ \
+ MEM_KEEP(init.data*) \
+@@ -549,10 +550,9 @@
+ */
+ #ifdef CONFIG_CFI_CLANG
+ #define TEXT_CFI_JT \
+- . = ALIGN(PMD_SIZE); \
++ ALIGN_FUNCTION(); \
+ __cfi_jt_start = .; \
+ *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) \
+- . = ALIGN(PMD_SIZE); \
+ __cfi_jt_end = .;
+ #else
+ #define TEXT_CFI_JT
+@@ -903,7 +903,12 @@
+ #define PRINTK_INDEX
+ #endif
+
++/*
++ * Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler.
++ * Otherwise, the type of .notes section would become PROGBITS instead of NOTES.
++ */
+ #define NOTES \
++ /DISCARD/ : { *(.note.GNU-stack) } \
+ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \
+ __start_notes = .; \
+ KEEP(*(.note.*)) \
+@@ -970,7 +975,6 @@
+ #ifdef CONFIG_AMD_MEM_ENCRYPT
+ #define PERCPU_DECRYPTED_SECTION \
+ . = ALIGN(PAGE_SIZE); \
+- *(.data..decrypted) \
+ *(.data..percpu..decrypted) \
+ . = ALIGN(PAGE_SIZE);
+ #else
+diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
+index 20c93f08c9933..95a1d214108a5 100644
+--- a/include/asm-generic/word-at-a-time.h
++++ b/include/asm-generic/word-at-a-time.h
+@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask)
+ return (mask >> 8) ? byte : byte + 1;
+ }
+
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+ unsigned long rhs = val | c->low_bits;
+ *data = rhs;
+diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
+index 5f6841c73e5a7..62fa7e82ff5b9 100644
+--- a/include/crypto/algapi.h
++++ b/include/crypto/algapi.h
+@@ -10,6 +10,7 @@
+ #include <linux/crypto.h>
+ #include <linux/list.h>
+ #include <linux/kernel.h>
++#include <linux/workqueue.h>
+
+ /*
+ * Maximum values for blocksize and alignmask, used to allocate
+@@ -55,6 +56,8 @@ struct crypto_instance {
+ struct crypto_spawn *spawns;
+ };
+
++ struct work_struct free_work;
++
+ void *__ctx[] CRYPTO_MINALIGN_ATTR;
+ };
+
+@@ -256,4 +259,11 @@ enum {
+ CRYPTO_MSG_ALG_LOADED,
+ };
+
++static inline void crypto_request_complete(struct crypto_async_request *req,
++ int err)
++{
++ crypto_completion_t complete = req->complete;
++ complete(req, err);
++}
++
+ #endif /* _CRYPTO_ALGAPI_H */
+diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
+index bc3fb59442ce5..4e30e1799e614 100644
+--- a/include/crypto/blake2s.h
++++ b/include/crypto/blake2s.h
+@@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
+ blake2s_final(&state, out);
+ }
+
+-void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
+- const size_t keylen);
+-
+ #endif /* _CRYPTO_BLAKE2S_H */
+diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
+index dabaee6987186..b3ea73b819443 100644
+--- a/include/crypto/chacha.h
++++ b/include/crypto/chacha.h
+@@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
+ hchacha_block_generic(state, out, nrounds);
+ }
+
++enum chacha_constants { /* expand 32-byte k */
++ CHACHA_CONSTANT_EXPA = 0x61707865U,
++ CHACHA_CONSTANT_ND_3 = 0x3320646eU,
++ CHACHA_CONSTANT_2_BY = 0x79622d32U,
++ CHACHA_CONSTANT_TE_K = 0x6b206574U
++};
++
+ static inline void chacha_init_consts(u32 *state)
+ {
+- state[0] = 0x61707865; /* "expa" */
+- state[1] = 0x3320646e; /* "nd 3" */
+- state[2] = 0x79622d32; /* "2-by" */
+- state[3] = 0x6b206574; /* "te k" */
++ state[0] = CHACHA_CONSTANT_EXPA;
++ state[1] = CHACHA_CONSTANT_ND_3;
++ state[2] = CHACHA_CONSTANT_2_BY;
++ state[3] = CHACHA_CONSTANT_TE_K;
+ }
+
+ void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv);
+diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
+index c4165126937e4..a6c3b8e7deb64 100644
+--- a/include/crypto/drbg.h
++++ b/include/crypto/drbg.h
+@@ -105,6 +105,12 @@ struct drbg_test_data {
+ struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */
+ };
+
++enum drbg_seed_state {
++ DRBG_SEED_STATE_UNSEEDED,
++ DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */
++ DRBG_SEED_STATE_FULL,
++};
++
+ struct drbg_state {
+ struct mutex drbg_mutex; /* lock around DRBG */
+ unsigned char *V; /* internal state 10.1.1.1 1a) */
+@@ -127,16 +133,14 @@ struct drbg_state {
+ struct crypto_wait ctr_wait; /* CTR mode async wait obj */
+ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */
+
+- bool seeded; /* DRBG fully seeded? */
++ enum drbg_seed_state seeded; /* DRBG fully seeded? */
+ bool pr; /* Prediction resistance enabled? */
+ bool fips_primed; /* Continuous test primed? */
+ unsigned char *prev; /* FIPS 140-2 continuous test value */
+- struct work_struct seed_work; /* asynchronous seeding support */
+ struct crypto_rng *jent;
+ const struct drbg_state_ops *d_ops;
+ const struct drbg_core *core;
+ struct drbg_string test_data;
+- struct random_ready_callback random_ready;
+ };
+
+ static inline __u8 drbg_statelen(struct drbg_state *drbg)
+diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
+index 8e50d487500f2..506d56530ca93 100644
+--- a/include/crypto/internal/blake2s.h
++++ b/include/crypto/internal/blake2s.h
+@@ -8,112 +8,14 @@
+ #define _CRYPTO_INTERNAL_BLAKE2S_H
+
+ #include <crypto/blake2s.h>
+-#include <crypto/internal/hash.h>
+ #include <linux/string.h>
+
+-void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
++void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
+ size_t nblocks, const u32 inc);
+
+-void blake2s_compress_arch(struct blake2s_state *state,const u8 *block,
+- size_t nblocks, const u32 inc);
++void blake2s_compress(struct blake2s_state *state, const u8 *block,
++ size_t nblocks, const u32 inc);
+
+ bool blake2s_selftest(void);
+
+-static inline void blake2s_set_lastblock(struct blake2s_state *state)
+-{
+- state->f[0] = -1;
+-}
+-
+-typedef void (*blake2s_compress_t)(struct blake2s_state *state,
+- const u8 *block, size_t nblocks, u32 inc);
+-
+-/* Helper functions for BLAKE2s shared by the library and shash APIs */
+-
+-static inline void __blake2s_update(struct blake2s_state *state,
+- const u8 *in, size_t inlen,
+- blake2s_compress_t compress)
+-{
+- const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+-
+- if (unlikely(!inlen))
+- return;
+- if (inlen > fill) {
+- memcpy(state->buf + state->buflen, in, fill);
+- (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+- state->buflen = 0;
+- in += fill;
+- inlen -= fill;
+- }
+- if (inlen > BLAKE2S_BLOCK_SIZE) {
+- const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+- /* Hash one less (full) block than strictly possible */
+- (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+- in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+- inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+- }
+- memcpy(state->buf + state->buflen, in, inlen);
+- state->buflen += inlen;
+-}
+-
+-static inline void __blake2s_final(struct blake2s_state *state, u8 *out,
+- blake2s_compress_t compress)
+-{
+- blake2s_set_lastblock(state);
+- memset(state->buf + state->buflen, 0,
+- BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+- (*compress)(state, state->buf, 1, state->buflen);
+- cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+- memcpy(out, state->h, state->outlen);
+-}
+-
+-/* Helper functions for shash implementations of BLAKE2s */
+-
+-struct blake2s_tfm_ctx {
+- u8 key[BLAKE2S_KEY_SIZE];
+- unsigned int keylen;
+-};
+-
+-static inline int crypto_blake2s_setkey(struct crypto_shash *tfm,
+- const u8 *key, unsigned int keylen)
+-{
+- struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+-
+- if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
+- return -EINVAL;
+-
+- memcpy(tctx->key, key, keylen);
+- tctx->keylen = keylen;
+-
+- return 0;
+-}
+-
+-static inline int crypto_blake2s_init(struct shash_desc *desc)
+-{
+- const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+- struct blake2s_state *state = shash_desc_ctx(desc);
+- unsigned int outlen = crypto_shash_digestsize(desc->tfm);
+-
+- __blake2s_init(state, outlen, tctx->key, tctx->keylen);
+- return 0;
+-}
+-
+-static inline int crypto_blake2s_update(struct shash_desc *desc,
+- const u8 *in, unsigned int inlen,
+- blake2s_compress_t compress)
+-{
+- struct blake2s_state *state = shash_desc_ctx(desc);
+-
+- __blake2s_update(state, in, inlen, compress);
+- return 0;
+-}
+-
+-static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
+- blake2s_compress_t compress)
+-{
+- struct blake2s_state *state = shash_desc_ctx(desc);
+-
+- __blake2s_final(state, out, compress);
+- return 0;
+-}
+-
+ #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */
+diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
+index 659b642efada1..05b25a819d0d1 100644
+--- a/include/crypto/internal/kpp.h
++++ b/include/crypto/internal/kpp.h
+@@ -18,6 +18,12 @@ static inline void *kpp_request_ctx(struct kpp_request *req)
+ return req->__ctx;
+ }
+
++static inline void kpp_set_reqsize(struct crypto_kpp *kpp,
++ unsigned int reqsize)
++{
++ crypto_kpp_alg(kpp)->reqsize = reqsize;
++}
++
+ static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm)
+ {
+ return tfm->base.__crt_ctx;
+diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
+index 46bdfa48c4134..c84783cd5abd7 100644
+--- a/include/drm/drm_bridge.h
++++ b/include/drm/drm_bridge.h
+@@ -447,11 +447,11 @@ struct drm_bridge_funcs {
+ *
+ * The returned array must be allocated with kmalloc() and will be
+ * freed by the caller. If the allocation fails, NULL should be
+- * returned. num_output_fmts must be set to the returned array size.
++ * returned. num_input_fmts must be set to the returned array size.
+ * Formats listed in the returned array should be listed in decreasing
+ * preference order (the core will try all formats until it finds one
+ * that works). When the format is not supported NULL should be
+- * returned and num_output_fmts should be set to 0.
++ * returned and num_input_fmts should be set to 0.
+ *
+ * This method is called on all elements of the bridge chain as part of
+ * the bus format negotiation process that happens in
+@@ -914,4 +914,17 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev,
+ struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge);
+ #endif
+
++#if defined(CONFIG_OF) && defined(CONFIG_DRM_PANEL_BRIDGE)
++struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node,
++ u32 port, u32 endpoint);
++#else
++static inline struct drm_bridge *devm_drm_of_get_bridge(struct device *dev,
++ struct device_node *node,
++ u32 port,
++ u32 endpoint)
++{
++ return ERR_PTR(-ENODEV);
++}
++#endif
++
+ #endif
+diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
+index 1647960c9e506..1f43d7c6724aa 100644
+--- a/include/drm/drm_connector.h
++++ b/include/drm/drm_connector.h
+@@ -566,10 +566,16 @@ struct drm_display_info {
+ bool rgb_quant_range_selectable;
+
+ /**
+- * @edid_hdmi_dc_modes: Mask of supported hdmi deep color modes. Even
+- * more stuff redundant with @bus_formats.
++ * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes
++ * in RGB 4:4:4. Even more stuff redundant with @bus_formats.
+ */
+- u8 edid_hdmi_dc_modes;
++ u8 edid_hdmi_rgb444_dc_modes;
++
++ /**
++ * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color
++ * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats.
++ */
++ u8 edid_hdmi_ycbcr444_dc_modes;
+
+ /**
+ * @cea_rev: CEA revision of the HDMI sink.
+diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
+index 1d5b3dbb6e563..9c7949ebc159e 100644
+--- a/include/drm/drm_dp_helper.h
++++ b/include/drm/drm_dp_helper.h
+@@ -455,7 +455,7 @@ struct drm_panel;
+ # define DP_FEC_BIT_ERROR_COUNT_CAP (1 << 3)
+
+ /* DP-HDMI2.1 PCON DSC ENCODER SUPPORT */
+-#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xC /* 0x9E - 0x92 */
++#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xD /* 0x92 through 0x9E */
+ #define DP_PCON_DSC_ENCODER 0x092
+ # define DP_PCON_DSC_ENCODER_SUPPORTED (1 << 0)
+ # define DP_PCON_DSC_PPS_ENC_OVERRIDE (1 << 1)
+@@ -1495,7 +1495,7 @@ u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZ
+
+ #define DP_BRANCH_OUI_HEADER_SIZE 0xc
+ #define DP_RECEIVER_CAP_SIZE 0xf
+-#define DP_DSC_RECEIVER_CAP_SIZE 0xf
++#define DP_DSC_RECEIVER_CAP_SIZE 0x10 /* DSC Capabilities 0x60 through 0x6F */
+ #define EDP_PSR_RECEIVER_CAP_SIZE 2
+ #define EDP_DISPLAY_CTL_CAP_SIZE 3
+ #define DP_LTTPR_COMMON_CAP_SIZE 8
+diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
+index deccfd39e6db2..c24559f5329dd 100644
+--- a/include/drm/drm_edid.h
++++ b/include/drm/drm_edid.h
+@@ -121,7 +121,7 @@ struct detailed_data_monitor_range {
+ u8 supported_scalings;
+ u8 preferred_refresh;
+ } __attribute__((packed)) cvt;
+- } formula;
++ } __attribute__((packed)) formula;
+ } __attribute__((packed));
+
+ struct detailed_data_wpindex {
+@@ -154,7 +154,7 @@ struct detailed_non_pixel {
+ struct detailed_data_wpindex color;
+ struct std_timing timings[6];
+ struct cvt_timing cvt[4];
+- } data;
++ } __attribute__((packed)) data;
+ } __attribute__((packed));
+
+ #define EDID_DETAIL_EST_TIMINGS 0xf7
+@@ -172,7 +172,7 @@ struct detailed_timing {
+ union {
+ struct detailed_pixel_timing pixel_data;
+ struct detailed_non_pixel other_data;
+- } data;
++ } __attribute__((packed)) data;
+ } __attribute__((packed));
+
+ #define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 0)
+diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
+index 434328d8a0d90..311d66c9cf4b1 100644
+--- a/include/drm/drm_gem_shmem_helper.h
++++ b/include/drm/drm_gem_shmem_helper.h
+@@ -107,16 +107,17 @@ struct drm_gem_shmem_object {
+ container_of(obj, struct drm_gem_shmem_object, base)
+
+ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size);
+-void drm_gem_shmem_free_object(struct drm_gem_object *obj);
++void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem);
+
+ int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem);
+ void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem);
+-int drm_gem_shmem_pin(struct drm_gem_object *obj);
+-void drm_gem_shmem_unpin(struct drm_gem_object *obj);
+-int drm_gem_shmem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map);
+-void drm_gem_shmem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map);
++int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem);
++void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem);
++int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map);
++void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map);
++int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma);
+
+-int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv);
++int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv);
+
+ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem)
+ {
+@@ -125,29 +126,156 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem
+ !shmem->base.dma_buf && !shmem->base.import_attach;
+ }
+
+-void drm_gem_shmem_purge_locked(struct drm_gem_object *obj);
+-bool drm_gem_shmem_purge(struct drm_gem_object *obj);
++void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem);
++bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem);
+
+-struct drm_gem_shmem_object *
+-drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
+- struct drm_device *dev, size_t size,
+- uint32_t *handle);
++struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem);
++struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem);
+
+-int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
+- struct drm_mode_create_dumb *args);
++void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem,
++ struct drm_printer *p, unsigned int indent);
++
++/*
++ * GEM object functions
++ */
++
++/**
++ * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free()
++ * @obj: GEM object to free
++ *
++ * This function wraps drm_gem_shmem_free(). Drivers that employ the shmem helpers
++ * should use it as their &drm_gem_object_funcs.free handler.
++ */
++static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj)
++{
++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ drm_gem_shmem_free(shmem);
++}
++
++/**
++ * drm_gem_shmem_object_print_info() - Print &drm_gem_shmem_object info for debugfs
++ * @p: DRM printer
++ * @indent: Tab indentation level
++ * @obj: GEM object
++ *
++ * This function wraps drm_gem_shmem_print_info(). Drivers that employ the shmem helpers should
++ * use this function as their &drm_gem_object_funcs.print_info handler.
++ */
++static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent,
++ const struct drm_gem_object *obj)
++{
++ const struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ drm_gem_shmem_print_info(shmem, p, indent);
++}
++
++/**
++ * drm_gem_shmem_object_pin - GEM object function for drm_gem_shmem_pin()
++ * @obj: GEM object
++ *
++ * This function wraps drm_gem_shmem_pin(). Drivers that employ the shmem helpers should
++ * use it as their &drm_gem_object_funcs.pin handler.
++ */
++static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj)
++{
++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ return drm_gem_shmem_pin(shmem);
++}
++
++/**
++ * drm_gem_shmem_object_unpin - GEM object function for drm_gem_shmem_unpin()
++ * @obj: GEM object
++ *
++ * This function wraps drm_gem_shmem_unpin(). Drivers that employ the shmem helpers should
++ * use it as their &drm_gem_object_funcs.unpin handler.
++ */
++static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj)
++{
++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ drm_gem_shmem_unpin(shmem);
++}
+
+-int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
++/**
++ * drm_gem_shmem_object_get_sg_table - GEM object function for drm_gem_shmem_get_sg_table()
++ * @obj: GEM object
++ *
++ * This function wraps drm_gem_shmem_get_sg_table(). Drivers that employ the shmem helpers should
++ * use it as their &drm_gem_object_funcs.get_sg_table handler.
++ *
++ * Returns:
++ * A pointer to the scatter/gather table of pinned pages or NULL on failure.
++ */
++static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj)
++{
++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ return drm_gem_shmem_get_sg_table(shmem);
++}
++
++/*
++ * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap()
++ * @obj: GEM object
++ * @map: Returns the kernel virtual address of the SHMEM GEM object's backing store.
++ *
++ * This function wraps drm_gem_shmem_vmap(). Drivers that employ the shmem helpers should
++ * use it as their &drm_gem_object_funcs.vmap handler.
++ *
++ * Returns:
++ * 0 on success or a negative error code on failure.
++ */
++static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map)
++{
++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ return drm_gem_shmem_vmap(shmem, map);
++}
++
++/*
++ * drm_gem_shmem_object_vunmap - GEM object function for drm_gem_shmem_vunmap()
++ * @obj: GEM object
++ * @map: Kernel virtual address where the SHMEM GEM object was mapped
++ *
++ * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should
++ * use it as their &drm_gem_object_funcs.vunmap handler.
++ */
++static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map)
++{
++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ drm_gem_shmem_vunmap(shmem, map);
++}
++
++/**
++ * drm_gem_shmem_object_mmap - GEM object function for drm_gem_shmem_mmap()
++ * @obj: GEM object
++ * @vma: VMA for the area to be mapped
++ *
++ * This function wraps drm_gem_shmem_mmap(). Drivers that employ the shmem helpers should
++ * use it as their &drm_gem_object_funcs.mmap handler.
++ *
++ * Returns:
++ * 0 on success or a negative error code on failure.
++ */
++static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
++{
++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
++
++ return drm_gem_shmem_mmap(shmem, vma);
++}
+
+-void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent,
+- const struct drm_gem_object *obj);
++/*
++ * Driver ops
++ */
+
+-struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj);
+ struct drm_gem_object *
+ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt);
+-
+-struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj);
++int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
++ struct drm_mode_create_dumb *args);
+
+ /**
+ * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations
+diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
+index af7ba8071eb08..1d263eb0b2e12 100644
+--- a/include/drm/drm_mipi_dsi.h
++++ b/include/drm/drm_mipi_dsi.h
+@@ -288,6 +288,10 @@ int mipi_dsi_dcs_set_display_brightness(struct mipi_dsi_device *dsi,
+ u16 brightness);
+ int mipi_dsi_dcs_get_display_brightness(struct mipi_dsi_device *dsi,
+ u16 *brightness);
++int mipi_dsi_dcs_set_display_brightness_large(struct mipi_dsi_device *dsi,
++ u16 brightness);
++int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
++ u16 *brightness);
+
+ /**
+ * struct mipi_dsi_driver - DSI driver
+diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
+index f681bbdbc6982..36f7eb9d06639 100644
+--- a/include/drm/ttm/ttm_bo_api.h
++++ b/include/drm/ttm/ttm_bo_api.h
+@@ -594,8 +594,7 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+
+ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ pgprot_t prot,
+- pgoff_t num_prefault,
+- pgoff_t fault_page_size);
++ pgoff_t num_prefault);
+
+ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf);
+
+diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h
+index 01e8bab1d767a..1aa462e5cafd4 100644
+--- a/include/dt-bindings/clock/imx8mn-clock.h
++++ b/include/dt-bindings/clock/imx8mn-clock.h
+@@ -19,7 +19,8 @@
+ #define IMX8MN_VIDEO_PLL1_REF_SEL 10
+ #define IMX8MN_DRAM_PLL_REF_SEL 11
+ #define IMX8MN_GPU_PLL_REF_SEL 12
+-#define IMX8MN_VPU_PLL_REF_SEL 13
++#define IMX8MN_M7_ALT_PLL_REF_SEL 13
++#define IMX8MN_VPU_PLL_REF_SEL IMX8MN_M7_ALT_PLL_REF_SEL
+ #define IMX8MN_ARM_PLL_REF_SEL 14
+ #define IMX8MN_SYS_PLL1_REF_SEL 15
+ #define IMX8MN_SYS_PLL2_REF_SEL 16
+@@ -29,7 +30,8 @@
+ #define IMX8MN_VIDEO_PLL1 20
+ #define IMX8MN_DRAM_PLL 21
+ #define IMX8MN_GPU_PLL 22
+-#define IMX8MN_VPU_PLL 23
++#define IMX8MN_M7_ALT_PLL 23
++#define IMX8MN_VPU_PLL IMX8MN_M7_ALT_PLL
+ #define IMX8MN_ARM_PLL 24
+ #define IMX8MN_SYS_PLL1 25
+ #define IMX8MN_SYS_PLL2 26
+@@ -39,7 +41,8 @@
+ #define IMX8MN_VIDEO_PLL1_BYPASS 30
+ #define IMX8MN_DRAM_PLL_BYPASS 31
+ #define IMX8MN_GPU_PLL_BYPASS 32
+-#define IMX8MN_VPU_PLL_BYPASS 33
++#define IMX8MN_M7_ALT_PLL_BYPASS 33
++#define IMX8MN_VPU_PLL_BYPASS IMX8MN_M7_ALT_PLL_BYPASS
+ #define IMX8MN_ARM_PLL_BYPASS 34
+ #define IMX8MN_SYS_PLL1_BYPASS 35
+ #define IMX8MN_SYS_PLL2_BYPASS 36
+@@ -49,7 +52,8 @@
+ #define IMX8MN_VIDEO_PLL1_OUT 40
+ #define IMX8MN_DRAM_PLL_OUT 41
+ #define IMX8MN_GPU_PLL_OUT 42
+-#define IMX8MN_VPU_PLL_OUT 43
++#define IMX8MN_M7_ALT_PLL_OUT 43
++#define IMX8MN_VPU_PLL_OUT IMX8MN_M7_ALT_PLL_OUT
+ #define IMX8MN_ARM_PLL_OUT 44
+ #define IMX8MN_SYS_PLL1_OUT 45
+ #define IMX8MN_SYS_PLL2_OUT 46
+diff --git a/include/dt-bindings/clock/qcom,gcc-msm8939.h b/include/dt-bindings/clock/qcom,gcc-msm8939.h
+index 0634467c4ce5a..2d545ed0d35ab 100644
+--- a/include/dt-bindings/clock/qcom,gcc-msm8939.h
++++ b/include/dt-bindings/clock/qcom,gcc-msm8939.h
+@@ -192,6 +192,7 @@
+ #define GCC_VENUS0_CORE0_VCODEC0_CLK 183
+ #define GCC_VENUS0_CORE1_VCODEC0_CLK 184
+ #define GCC_OXILI_TIMER_CLK 185
++#define SYSTEM_MM_NOC_BFDCD_CLK_SRC 186
+
+ /* Indexes for GDSCs */
+ #define BIMC_GDSC 0
+diff --git a/include/linux/acpi.h b/include/linux/acpi.h
+index 974d497a897dc..a23a5aea9c817 100644
+--- a/include/linux/acpi.h
++++ b/include/linux/acpi.h
+@@ -484,6 +484,7 @@ int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list,
+ void *preproc_data);
+ int acpi_dev_get_dma_resources(struct acpi_device *adev,
+ struct list_head *list);
++int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list);
+ int acpi_dev_filter_resource_type(struct acpi_resource *ares,
+ unsigned long types);
+
+@@ -976,6 +977,15 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr)
+ return -ENODEV;
+ }
+
++static inline int acpi_register_wakeup_handler(int wake_irq,
++ bool (*wakeup)(void *context), void *context)
++{
++ return -ENXIO;
++}
++
++static inline void acpi_unregister_wakeup_handler(
++ bool (*wakeup)(void *context), void *context) { }
++
+ #endif /* !CONFIG_ACPI */
+
+ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+@@ -996,7 +1006,15 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
+
+ acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
+ u32 val_a, u32 val_b);
+-
++#ifdef CONFIG_X86
++struct acpi_s2idle_dev_ops {
++ struct list_head list_node;
++ void (*prepare)(void);
++ void (*restore)(void);
++};
++int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
++void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
++#endif /* CONFIG_X86 */
+ #ifndef CONFIG_IA64
+ void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
+ #else
+diff --git a/include/linux/acpi_mdio.h b/include/linux/acpi_mdio.h
+index 0a24ab7cb66fa..8e2eefa9fbc0f 100644
+--- a/include/linux/acpi_mdio.h
++++ b/include/linux/acpi_mdio.h
+@@ -9,7 +9,14 @@
+ #include <linux/phy.h>
+
+ #if IS_ENABLED(CONFIG_ACPI_MDIO)
+-int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode);
++int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode,
++ struct module *owner);
++
++static inline int
++acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *handle)
++{
++ return __acpi_mdiobus_register(mdio, handle, THIS_MODULE);
++}
+ #else /* CONFIG_ACPI_MDIO */
+ static inline int
+ acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode)
+diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h
+index 1eb8ee5b0e5fe..a5a1224315637 100644
+--- a/include/linux/acpi_viot.h
++++ b/include/linux/acpi_viot.h
+@@ -6,9 +6,11 @@
+ #include <linux/acpi.h>
+
+ #ifdef CONFIG_ACPI_VIOT
++void __init acpi_viot_early_init(void);
+ void __init acpi_viot_init(void);
+ int viot_iommu_configure(struct device *dev);
+ #else
++static inline void acpi_viot_early_init(void) {}
+ static inline void acpi_viot_init(void) {}
+ static inline int viot_iommu_configure(struct device *dev)
+ {
+diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
+index 63ccb52521902..220c8c60e021a 100644
+--- a/include/linux/arm-smccc.h
++++ b/include/linux/arm-smccc.h
+@@ -92,6 +92,11 @@
+ ARM_SMCCC_SMC_32, \
+ 0, 0x7fff)
+
++#define ARM_SMCCC_ARCH_WORKAROUND_3 \
++ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
++ ARM_SMCCC_SMC_32, \
++ 0, 0x3fff)
++
+ #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
+index 505c679b6a9b7..99e2127656324 100644
+--- a/include/linux/arm_ffa.h
++++ b/include/linux/arm_ffa.h
+@@ -13,6 +13,7 @@
+
+ /* FFA Bus/Device/Driver related */
+ struct ffa_device {
++ u32 id;
+ int vm_id;
+ bool mode_32bit;
+ uuid_t uuid;
+diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
+index 0a241c5c911d8..255701e1251b4 100644
+--- a/include/linux/arm_sdei.h
++++ b/include/linux/arm_sdei.h
+@@ -46,9 +46,13 @@ int sdei_unregister_ghes(struct ghes *ghes);
+ /* For use by arch code when CPU hotplug notifiers are not appropriate. */
+ int sdei_mask_local_cpu(void);
+ int sdei_unmask_local_cpu(void);
++void __init sdei_init(void);
++void sdei_handler_abort(void);
+ #else
+ static inline int sdei_mask_local_cpu(void) { return 0; }
+ static inline int sdei_unmask_local_cpu(void) { return 0; }
++static inline void sdei_init(void) { }
++static inline void sdei_handler_abort(void) { }
+ #endif /* CONFIG_ARM_SDE_INTERFACE */
+
+
+diff --git a/include/linux/ata.h b/include/linux/ata.h
+index 1b44f40c7700b..3b1ad57d0e017 100644
+--- a/include/linux/ata.h
++++ b/include/linux/ata.h
+@@ -565,6 +565,18 @@ struct ata_bmdma_prd {
+ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
+ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
+ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 2)))
++#define ata_id_has_devslp(id) \
++ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
++ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
++ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)))
++#define ata_id_has_ncq_autosense(id) \
++ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
++ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
++ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)))
++#define ata_id_has_dipm(id) \
++ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
++ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
++ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 3)))
+ #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10))
+ #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11))
+ #define ata_id_u32(id,n) \
+@@ -577,9 +589,6 @@ struct ata_bmdma_prd {
+
+ #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20)
+ #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4))
+-#define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))
+-#define ata_id_has_ncq_autosense(id) \
+- ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))
+
+ static inline bool ata_id_has_hipm(const u16 *id)
+ {
+@@ -591,17 +600,6 @@ static inline bool ata_id_has_hipm(const u16 *id)
+ return val & (1 << 9);
+ }
+
+-static inline bool ata_id_has_dipm(const u16 *id)
+-{
+- u16 val = id[ATA_ID_FEATURE_SUPP];
+-
+- if (val == 0 || val == 0xffff)
+- return false;
+-
+- return val & (1 << 3);
+-}
+-
+-
+ static inline bool ata_id_has_fua(const u16 *id)
+ {
+ if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000)
+@@ -770,16 +768,21 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id)
+
+ static inline bool ata_id_has_sense_reporting(const u16 *id)
+ {
+- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
++ if (!(id[ATA_ID_CFS_ENABLE_2] & BIT(15)))
++ return false;
++ if ((id[ATA_ID_COMMAND_SET_3] & (BIT(15) | BIT(14))) != BIT(14))
+ return false;
+- return id[ATA_ID_COMMAND_SET_3] & (1 << 6);
++ return id[ATA_ID_COMMAND_SET_3] & BIT(6);
+ }
+
+ static inline bool ata_id_sense_reporting_enabled(const u16 *id)
+ {
+- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15)))
++ if (!ata_id_has_sense_reporting(id))
++ return false;
++ /* ata_id_has_sense_reporting() == true, word 86 must have bit 15 set */
++ if ((id[ATA_ID_COMMAND_SET_4] & (BIT(15) | BIT(14))) != BIT(14))
+ return false;
+- return id[ATA_ID_COMMAND_SET_4] & (1 << 6);
++ return id[ATA_ID_COMMAND_SET_4] & BIT(6);
+ }
+
+ /**
+diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
+index a3dba31df01e9..6db58d1808665 100644
+--- a/include/linux/atomic/atomic-arch-fallback.h
++++ b/include/linux/atomic/atomic-arch-fallback.h
+@@ -151,7 +151,16 @@
+ static __always_inline int
+ arch_atomic_read_acquire(const atomic_t *v)
+ {
+- return smp_load_acquire(&(v)->counter);
++ int ret;
++
++ if (__native_word(atomic_t)) {
++ ret = smp_load_acquire(&(v)->counter);
++ } else {
++ ret = arch_atomic_read(v);
++ __atomic_acquire_fence();
++ }
++
++ return ret;
+ }
+ #define arch_atomic_read_acquire arch_atomic_read_acquire
+ #endif
+@@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v)
+ static __always_inline void
+ arch_atomic_set_release(atomic_t *v, int i)
+ {
+- smp_store_release(&(v)->counter, i);
++ if (__native_word(atomic_t)) {
++ smp_store_release(&(v)->counter, i);
++ } else {
++ __atomic_release_fence();
++ arch_atomic_set(v, i);
++ }
+ }
+ #define arch_atomic_set_release arch_atomic_set_release
+ #endif
+@@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v)
+ static __always_inline s64
+ arch_atomic64_read_acquire(const atomic64_t *v)
+ {
+- return smp_load_acquire(&(v)->counter);
++ s64 ret;
++
++ if (__native_word(atomic64_t)) {
++ ret = smp_load_acquire(&(v)->counter);
++ } else {
++ ret = arch_atomic64_read(v);
++ __atomic_acquire_fence();
++ }
++
++ return ret;
+ }
+ #define arch_atomic64_read_acquire arch_atomic64_read_acquire
+ #endif
+@@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v)
+ static __always_inline void
+ arch_atomic64_set_release(atomic64_t *v, s64 i)
+ {
+- smp_store_release(&(v)->counter, i);
++ if (__native_word(atomic64_t)) {
++ smp_store_release(&(v)->counter, i);
++ } else {
++ __atomic_release_fence();
++ arch_atomic64_set(v, i);
++ }
+ }
+ #define arch_atomic64_set_release arch_atomic64_set_release
+ #endif
+@@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
+ #endif
+
+ #endif /* _LINUX_ATOMIC_FALLBACK_H */
+-// cca554917d7ea73d5e3e7397dd70c484cad9b2c4
++// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae
+diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
+index ac7f231b88258..eed9a98eae0d0 100644
+--- a/include/linux/backing-dev.h
++++ b/include/linux/backing-dev.h
+@@ -121,6 +121,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
+
+ extern struct backing_dev_info noop_backing_dev_info;
+
++int bdi_init(struct backing_dev_info *bdi);
++
+ /**
+ * writeback_in_progress - determine whether there is writeback in progress
+ * @wb: bdi_writeback of interest
+diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
+index 049cf9421d831..f821b72433613 100644
+--- a/include/linux/binfmts.h
++++ b/include/linux/binfmts.h
+@@ -87,6 +87,9 @@ struct coredump_params {
+ loff_t written;
+ loff_t pos;
+ loff_t to_skip;
++ int vma_count;
++ size_t vma_data_size;
++ struct core_vma_metadata *vma_meta;
+ };
+
+ /*
+diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
+index 4e035aca6f7e6..6093fa6db2600 100644
+--- a/include/linux/bitfield.h
++++ b/include/linux/bitfield.h
+@@ -41,6 +41,22 @@
+
+ #define __bf_shf(x) (__builtin_ffsll(x) - 1)
+
++#define __scalar_type_to_unsigned_cases(type) \
++ unsigned type: (unsigned type)0, \
++ signed type: (unsigned type)0
++
++#define __unsigned_scalar_typeof(x) typeof( \
++ _Generic((x), \
++ char: (unsigned char)0, \
++ __scalar_type_to_unsigned_cases(char), \
++ __scalar_type_to_unsigned_cases(short), \
++ __scalar_type_to_unsigned_cases(int), \
++ __scalar_type_to_unsigned_cases(long), \
++ __scalar_type_to_unsigned_cases(long long), \
++ default: (x)))
++
++#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x))
++
+ #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \
+ ({ \
+ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \
+@@ -49,7 +65,8 @@
+ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \
+ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
+ _pfx "value too large for the field"); \
+- BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \
++ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \
++ __bf_cast_unsigned(_reg, ~0ull), \
+ _pfx "type of reg too small for mask"); \
+ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \
+ (1ULL << __bf_shf(_mask))); \
+diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
+index b4de2010fba55..bc5c04d711bbc 100644
+--- a/include/linux/blk-cgroup.h
++++ b/include/linux/blk-cgroup.h
+@@ -24,6 +24,7 @@
+ #include <linux/atomic.h>
+ #include <linux/kthread.h>
+ #include <linux/fs.h>
++#include <linux/blk-mq.h>
+
+ /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
+ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
+@@ -604,6 +605,21 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+ }
+
++/**
++ * blk_cgroup_mergeable - Determine whether to allow or disallow merges
++ * @rq: request to merge into
++ * @bio: bio to merge
++ *
++ * @bio and @rq should belong to the same cgroup and their issue_as_root should
++ * match. The latter is necessary as we don't want to throttle e.g. a metadata
++ * update because it happens to be next to a regular IO.
++ */
++static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
++{
++ return rq->bio->bi_blkg == bio->bi_blkg &&
++ bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
++}
++
+ void blk_cgroup_bio_start(struct bio *bio);
+ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+@@ -659,6 +675,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { }
+ static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
+ static inline void blkcg_bio_issue_init(struct bio *bio) { }
+ static inline void blk_cgroup_bio_start(struct bio *bio) { }
++static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
+
+ #define blk_queue_for_each_rl(rl, q) \
+ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
+diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h
+index 69b24fe92cbf1..5e96bad548047 100644
+--- a/include/linux/blk-crypto.h
++++ b/include/linux/blk-crypto.h
+@@ -97,8 +97,8 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
+ int blk_crypto_start_using_key(const struct blk_crypto_key *key,
+ struct request_queue *q);
+
+-int blk_crypto_evict_key(struct request_queue *q,
+- const struct blk_crypto_key *key);
++void blk_crypto_evict_key(struct request_queue *q,
++ const struct blk_crypto_key *key);
+
+ bool blk_crypto_config_supported(struct request_queue *q,
+ const struct blk_crypto_config *cfg);
+diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h
+index b80c65aba2493..2580e05a8ab67 100644
+--- a/include/linux/blk-pm.h
++++ b/include/linux/blk-pm.h
+@@ -14,7 +14,7 @@ extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
+ extern int blk_pre_runtime_suspend(struct request_queue *q);
+ extern void blk_post_runtime_suspend(struct request_queue *q, int err);
+ extern void blk_pre_runtime_resume(struct request_queue *q);
+-extern void blk_post_runtime_resume(struct request_queue *q, int err);
++extern void blk_post_runtime_resume(struct request_queue *q);
+ extern void blk_set_runtime_active(struct request_queue *q);
+ #else
+ static inline void blk_pm_runtime_init(struct request_queue *q,
+diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
+index be622b5a21ed5..36ce3d0fb9f3b 100644
+--- a/include/linux/blk_types.h
++++ b/include/linux/blk_types.h
+@@ -215,9 +215,8 @@ static inline void bio_issue_init(struct bio_issue *issue,
+ struct bio {
+ struct bio *bi_next; /* request queue link */
+ struct block_device *bi_bdev;
+- unsigned int bi_opf; /* bottom bits req flags,
+- * top bits REQ_OP. Use
+- * accessors.
++ unsigned int bi_opf; /* bottom bits REQ_OP, top bits
++ * req_flags.
+ */
+ unsigned short bi_flags; /* BIO_* below */
+ unsigned short bi_ioprio;
+@@ -295,7 +294,8 @@ enum {
+ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion
+ * of this bio. */
+ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
+- BIO_TRACKED, /* set if bio goes through the rq_qos path */
++ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
++ BIO_QOS_MERGED, /* but went through rq_qos merge path */
+ BIO_REMAPPED,
+ BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
+ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 12b9dbcc980ee..67344dfe07a7c 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -235,6 +235,14 @@ struct request {
+ void *end_io_data;
+ };
+
++static inline int blk_validate_block_size(unsigned int bsize)
++{
++ if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
++ return -EINVAL;
++
++ return 0;
++}
++
+ static inline bool blk_op_is_passthrough(unsigned int op)
+ {
+ op &= REQ_OP_MASK;
+@@ -253,8 +261,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
+
+ #include <linux/elevator.h>
+
+-struct blk_queue_ctx;
+-
+ struct bio_vec;
+
+ enum blk_eh_timer_return {
+@@ -1176,7 +1182,8 @@ extern void blk_dump_rq_flags(struct request *, char *);
+
+ bool __must_check blk_get_queue(struct request_queue *);
+ extern void blk_put_queue(struct request_queue *);
+-extern void blk_set_queue_dying(struct request_queue *);
++
++void blk_mark_disk_dead(struct gendisk *disk);
+
+ #ifdef CONFIG_BLOCK
+ /*
+@@ -1198,8 +1205,6 @@ struct blk_plug {
+ bool multiple_queues;
+ bool nowait;
+ };
+-#define BLK_MAX_REQUEST_COUNT 16
+-#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
+
+ struct blk_plug_cb;
+ typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
+@@ -1382,6 +1387,17 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu
+ return min(l->max_zone_append_sectors, l->max_sectors);
+ }
+
++static inline unsigned int
++bdev_max_zone_append_sectors(struct block_device *bdev)
++{
++ return queue_max_zone_append_sectors(bdev_get_queue(bdev));
++}
++
++static inline unsigned int bdev_max_segments(struct block_device *bdev)
++{
++ return queue_max_segments(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+ int retval = 512;
+@@ -1941,6 +1957,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+ unsigned long start_time);
+
++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
+ unsigned long bio_start_io_acct(struct bio *bio);
+ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
+ struct block_device *orig_bdev);
+@@ -1991,6 +2008,8 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
+ #ifdef CONFIG_BLOCK
+ void invalidate_bdev(struct block_device *bdev);
+ int sync_blockdev(struct block_device *bdev);
++int sync_blockdev_nowait(struct block_device *bdev);
++void sync_bdevs(bool wait);
+ #else
+ static inline void invalidate_bdev(struct block_device *bdev)
+ {
+@@ -1999,6 +2018,13 @@ static inline int sync_blockdev(struct block_device *bdev)
+ {
+ return 0;
+ }
++static inline int sync_blockdev_nowait(struct block_device *bdev)
++{
++ return 0;
++}
++static inline void sync_bdevs(bool wait)
++{
++}
+ #endif
+ int fsync_bdev(struct block_device *bdev);
+
+diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
+index 537e1b991f115..5296fbb8408ca 100644
+--- a/include/linux/bootconfig.h
++++ b/include/linux/bootconfig.h
+@@ -49,7 +49,7 @@ struct xbc_node {
+ /* Maximum size of boot config is 32KB - 1 */
+ #define XBC_DATA_MAX (XBC_VALUE - 1)
+
+-#define XBC_NODE_MAX 1024
++#define XBC_NODE_MAX 8192
+ #define XBC_KEYLEN_MAX 256
+ #define XBC_DEPTH_MAX 16
+
+diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
+index 2bc8b1f69c93c..888eb660d3f6a 100644
+--- a/include/linux/bootmem_info.h
++++ b/include/linux/bootmem_info.h
+@@ -3,6 +3,7 @@
+ #define __LINUX_BOOTMEM_INFO_H
+
+ #include <linux/mm.h>
++#include <linux/kmemleak.h>
+
+ /*
+ * Types for free bootmem stored in page->lru.next. These have to be in
+@@ -59,6 +60,7 @@ static inline void get_page_bootmem(unsigned long info, struct page *page,
+
+ static inline void free_bootmem_page(struct page *page)
+ {
++ kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
+ free_reserved_page(page);
+ }
+ #endif
+diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
+index 2746fd8042162..3536ab432b30c 100644
+--- a/include/linux/bpf-cgroup.h
++++ b/include/linux/bpf-cgroup.h
+@@ -517,6 +517,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
+
+ #define cgroup_bpf_enabled(atype) (0)
+ #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
++#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
+ #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
+ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
+ #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index 3db6f6c95489e..84efd8dd139d9 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -190,7 +190,7 @@ struct bpf_map {
+ atomic64_t usercnt;
+ struct work_struct work;
+ struct mutex freeze_mutex;
+- u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
++ atomic64_t writecnt;
+ };
+
+ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
+@@ -206,11 +206,9 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
+ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+ {
+ if (unlikely(map_value_has_spin_lock(map)))
+- *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
+- (struct bpf_spin_lock){};
++ memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
+ if (unlikely(map_value_has_timer(map)))
+- *(struct bpf_timer *)(dst + map->timer_off) =
+- (struct bpf_timer){};
++ memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
+ }
+
+ /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
+@@ -221,7 +219,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
+ if (unlikely(map_value_has_spin_lock(map))) {
+ s_off = map->spin_lock_off;
+ s_sz = sizeof(struct bpf_spin_lock);
+- } else if (unlikely(map_value_has_timer(map))) {
++ }
++ if (unlikely(map_value_has_timer(map))) {
+ t_off = map->timer_off;
+ t_sz = sizeof(struct bpf_timer);
+ }
+@@ -294,6 +293,34 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
+
+ extern const struct bpf_map_ops bpf_map_offload_ops;
+
++/* bpf_type_flag contains a set of flags that are applicable to the values of
++ * arg_type, ret_type and reg_type. For example, a pointer value may be null,
++ * or a memory is read-only. We classify types into two categories: base types
++ * and extended types. Extended types are base types combined with a type flag.
++ *
++ * Currently there are no more than 32 base types in arg_type, ret_type and
++ * reg_types.
++ */
++#define BPF_BASE_TYPE_BITS 8
++
++enum bpf_type_flag {
++ /* PTR may be NULL. */
++ PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS),
++
++ /* MEM is read-only. When applied on bpf_arg, it indicates the arg is
++ * compatible with both mutable and immutable memory.
++ */
++ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS),
++
++ __BPF_TYPE_LAST_FLAG = MEM_RDONLY,
++};
++
++/* Max number of base types. */
++#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
++
++/* Max number of all types. */
++#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1))
++
+ /* function argument constraints */
+ enum bpf_arg_type {
+ ARG_DONTCARE = 0, /* unused argument in helper function */
+@@ -305,13 +332,11 @@ enum bpf_arg_type {
+ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
+ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
+ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
+- ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */
+
+ /* the following constraints used to prototype bpf_memcmp() and other
+ * functions that access data on eBPF program stack
+ */
+ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
+- ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
+ ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized,
+ * helper function must fill all bytes or clear
+ * them in error case.
+@@ -321,42 +346,65 @@ enum bpf_arg_type {
+ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
+
+ ARG_PTR_TO_CTX, /* pointer to context */
+- ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */
+ ARG_ANYTHING, /* any (initialized) argument is ok */
+ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
+ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
+ ARG_PTR_TO_INT, /* pointer to int */
+ ARG_PTR_TO_LONG, /* pointer to long */
+ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
+- ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */
+ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
+ ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
+- ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
+ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
+ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
+ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
+ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
+- ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
++ ARG_PTR_TO_STACK, /* pointer to stack */
+ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
+ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
+ __BPF_ARG_TYPE_MAX,
++
++ /* Extended arg_types. */
++ ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE,
++ ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
++ ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
++ ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
++ ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
++ ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
++
++ /* This must be the last entry. Its purpose is to ensure the enum is
++ * wide enough to hold the higher bits reserved for bpf_type_flag.
++ */
++ __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT,
+ };
++static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
+
+ /* type of values returned from helper functions */
+ enum bpf_return_type {
+ RET_INTEGER, /* function returns integer */
+ RET_VOID, /* function doesn't return anything */
+ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
+- RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
+- RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
+- RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
+- RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
+- RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
+- RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
+- RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
++ RET_PTR_TO_SOCKET, /* returns a pointer to a socket */
++ RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */
++ RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */
++ RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */
+ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
+ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */
++ __BPF_RET_TYPE_MAX,
++
++ /* Extended ret_types. */
++ RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE,
++ RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
++ RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
++ RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
++ RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
++ RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
++
++ /* This must be the last entry. Its purpose is to ensure the enum is
++ * wide enough to hold the higher bits reserved for bpf_type_flag.
++ */
++ __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT,
+ };
++static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
+
+ /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
+ * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
+@@ -418,18 +466,15 @@ enum bpf_reg_type {
+ PTR_TO_CTX, /* reg points to bpf_context */
+ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
+ PTR_TO_MAP_VALUE, /* reg points to map element value */
+- PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
++ PTR_TO_MAP_KEY, /* reg points to a map element key */
+ PTR_TO_STACK, /* reg == frame_pointer + offset */
+ PTR_TO_PACKET_META, /* skb->data - meta_len */
+ PTR_TO_PACKET, /* reg points to skb->data */
+ PTR_TO_PACKET_END, /* skb->data + headlen */
+ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
+ PTR_TO_SOCKET, /* reg points to struct bpf_sock */
+- PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
+ PTR_TO_SOCK_COMMON, /* reg points to sock_common */
+- PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
+ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
+- PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
+ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
+ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
+ /* PTR_TO_BTF_ID points to a kernel struct that does not need
+@@ -447,18 +492,25 @@ enum bpf_reg_type {
+ * been checked for null. Used primarily to inform the verifier
+ * an explicit null check is required for this struct.
+ */
+- PTR_TO_BTF_ID_OR_NULL,
+ PTR_TO_MEM, /* reg points to valid memory region */
+- PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */
+- PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */
+- PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
+- PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
+- PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
++ PTR_TO_BUF, /* reg points to a read/write buffer */
+ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
+ PTR_TO_FUNC, /* reg points to a bpf program function */
+- PTR_TO_MAP_KEY, /* reg points to a map element key */
+ __BPF_REG_TYPE_MAX,
++
++ /* Extended reg_types. */
++ PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE,
++ PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET,
++ PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
++ PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
++ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID,
++
++ /* This must be the last entry. Its purpose is to ensure the enum is
++ * wide enough to hold the higher bits reserved for bpf_type_flag.
++ */
++ __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT,
+ };
++static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
+
+ /* The information passed from prog-specific *_is_valid_access
+ * back to the verifier.
+@@ -481,6 +533,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
+ aux->ctx_field_size = size;
+ }
+
++static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
++{
++ return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
++ insn->src_reg == BPF_PSEUDO_FUNC;
++}
++
+ struct bpf_prog_ops {
+ int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+@@ -723,6 +781,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+ struct bpf_trampoline *bpf_trampoline_get(u64 key,
+ struct bpf_attach_target_info *tgt_info);
+ void bpf_trampoline_put(struct bpf_trampoline *tr);
++int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
+ #define BPF_DISPATCHER_INIT(_name) { \
+ .mutex = __MUTEX_INITIALIZER(_name.mutex), \
+ .func = &_name##_func, \
+@@ -1320,28 +1379,16 @@ extern struct mutex bpf_stats_enabled_mutex;
+ * kprobes, tracepoints) to prevent deadlocks on map operations as any of
+ * these events can happen inside a region which holds a map bucket lock
+ * and can deadlock on it.
+- *
+- * Use the preemption safe inc/dec variants on RT because migrate disable
+- * is preemptible on RT and preemption in the middle of the RMW operation
+- * might lead to inconsistent state. Use the raw variants for non RT
+- * kernels as migrate_disable() maps to preempt_disable() so the slightly
+- * more expensive save operation can be avoided.
+ */
+ static inline void bpf_disable_instrumentation(void)
+ {
+ migrate_disable();
+- if (IS_ENABLED(CONFIG_PREEMPT_RT))
+- this_cpu_inc(bpf_prog_active);
+- else
+- __this_cpu_inc(bpf_prog_active);
++ this_cpu_inc(bpf_prog_active);
+ }
+
+ static inline void bpf_enable_instrumentation(void)
+ {
+- if (IS_ENABLED(CONFIG_PREEMPT_RT))
+- this_cpu_dec(bpf_prog_active);
+- else
+- __this_cpu_dec(bpf_prog_active);
++ this_cpu_dec(bpf_prog_active);
+ migrate_enable();
+ }
+
+@@ -1387,6 +1434,7 @@ void bpf_map_put(struct bpf_map *map);
+ void *bpf_map_area_alloc(u64 size, int numa_node);
+ void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
+ void bpf_map_area_free(void *base);
++bool bpf_map_write_active(const struct bpf_map *map);
+ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
+ int generic_map_lookup_batch(struct bpf_map *map,
+ const union bpf_attr *attr,
+@@ -1677,6 +1725,12 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
+ const struct btf_func_model *
+ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn);
++
++static inline bool unprivileged_ebpf_enabled(void)
++{
++ return !sysctl_unprivileged_bpf_disabled;
++}
++
+ #else /* !CONFIG_BPF_SYSCALL */
+ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
+ {
+@@ -1895,6 +1949,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ {
+ return NULL;
+ }
++
++static inline bool unprivileged_ebpf_enabled(void)
++{
++ return false;
++}
++
+ #endif /* CONFIG_BPF_SYSCALL */
+
+ void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
+@@ -1937,6 +1997,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+ struct net_device *netdev);
+ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
+
++void unpriv_ebpf_notify(int new_state);
++
+ #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
+ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
+
+@@ -1960,6 +2022,7 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
+ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
+ int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+ void sock_map_unhash(struct sock *sk);
++void sock_map_destroy(struct sock *sk);
+ void sock_map_close(struct sock *sk, long timeout);
+ #else
+ static inline int bpf_prog_offload_init(struct bpf_prog *prog,
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index 5424124dbe365..3d04b48e502de 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -18,6 +18,8 @@
+ * that converting umax_value to int cannot overflow.
+ */
+ #define BPF_MAX_VAR_SIZ (1 << 29)
++/* size of type_str_buf in bpf_verifier. */
++#define TYPE_STR_BUF_LEN 64
+
+ /* Liveness marks, used for registers and spilled-regs (in stack slots).
+ * Read marks propagate upwards until they find a write mark; they record that
+@@ -190,6 +192,17 @@ struct bpf_reference_state {
+ * is used purely to inform the user of a reference leak.
+ */
+ int insn_idx;
++ /* There can be a case like:
++ * main (frame 0)
++ * cb (frame 1)
++ * func (frame 3)
++ * cb (frame 4)
++ * Hence for frame 4, if callback_ref just stored boolean, it would be
++ * impossible to distinguish nested callback refs. Hence store the
++ * frameno and compare that to callback_ref in check_reference_leak when
++ * exiting a callback function.
++ */
++ int callback_ref;
+ };
+
+ /* state of the program:
+@@ -315,6 +328,27 @@ struct bpf_verifier_state {
+ iter < frame->allocated_stack / BPF_REG_SIZE; \
+ iter++, reg = bpf_get_spilled_reg(iter, frame))
+
++/* Invoke __expr over regsiters in __vst, setting __state and __reg */
++#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
++ ({ \
++ struct bpf_verifier_state *___vstate = __vst; \
++ int ___i, ___j; \
++ for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \
++ struct bpf_reg_state *___regs; \
++ __state = ___vstate->frame[___i]; \
++ ___regs = __state->regs; \
++ for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \
++ __reg = &___regs[___j]; \
++ (void)(__expr); \
++ } \
++ bpf_for_each_spilled_reg(___j, __state, __reg) { \
++ if (!__reg) \
++ continue; \
++ (void)(__expr); \
++ } \
++ } \
++ })
++
+ /* linked list of verifier states used to prune search */
+ struct bpf_verifier_state_list {
+ struct bpf_verifier_state state;
+@@ -396,6 +430,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
+ log->level == BPF_LOG_KERNEL);
+ }
+
++static inline bool
++bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
++{
++ return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 &&
++ log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK);
++}
++
+ #define BPF_MAX_SUBPROGS 256
+
+ struct bpf_subprog_info {
+@@ -467,6 +508,8 @@ struct bpf_verifier_env {
+ /* longest register parentage chain walked for liveness marking */
+ u32 longest_mark_read_walk;
+ bpfptr_t fd_array;
++ /* buffer used in reg_type_str() to generate reg_type string */
++ char type_str_buf[TYPE_STR_BUF_LEN];
+ };
+
+ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
+@@ -528,4 +571,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
+ u32 btf_id,
+ struct bpf_attach_target_info *tgt_info);
+
++#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0)
++
++/* extract base type from bpf_{arg, return, reg}_type. */
++static inline u32 base_type(u32 type)
++{
++ return type & BPF_BASE_TYPE_MASK;
++}
++
++/* extract flags from an extended type. See bpf_type_flag in bpf.h. */
++static inline u32 type_flag(u32 type)
++{
++ return type & ~BPF_BASE_TYPE_MASK;
++}
++
+ #endif /* _LINUX_BPF_VERIFIER_H */
+diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
+index 546e27fc6d462..ee28d2b0a3091 100644
+--- a/include/linux/bpfptr.h
++++ b/include/linux/bpfptr.h
+@@ -48,7 +48,9 @@ static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val)
+ static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src,
+ size_t offset, size_t size)
+ {
+- return copy_from_sockptr_offset(dst, (sockptr_t) src, offset, size);
++ if (!bpfptr_is_kernel(src))
++ return copy_from_user(dst, src.user + offset, size);
++ return copy_from_kernel_nofault(dst, src.kernel + offset, size);
+ }
+
+ static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size)
+@@ -77,7 +79,9 @@ static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len)
+
+ static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
+ {
+- return strncpy_from_sockptr(dst, (sockptr_t) src, count);
++ if (bpfptr_is_kernel(src))
++ return strncpy_from_kernel_nofault(dst, src.kernel, count);
++ return strncpy_from_user(dst, src.user, count);
+ }
+
+ #endif /* _LINUX_BPFPTR_H */
+diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
+index 36f33685c8c00..25b4263d66d70 100644
+--- a/include/linux/buffer_head.h
++++ b/include/linux/buffer_head.h
+@@ -117,7 +117,6 @@ static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \
+ * of the form "mark_buffer_foo()". These are higher-level functions which
+ * do something in addition to setting a b_state bit.
+ */
+-BUFFER_FNS(Uptodate, uptodate)
+ BUFFER_FNS(Dirty, dirty)
+ TAS_BUFFER_FNS(Dirty, dirty)
+ BUFFER_FNS(Lock, locked)
+@@ -135,6 +134,41 @@ BUFFER_FNS(Meta, meta)
+ BUFFER_FNS(Prio, prio)
+ BUFFER_FNS(Defer_Completion, defer_completion)
+
++static __always_inline void set_buffer_uptodate(struct buffer_head *bh)
++{
++ /*
++ * If somebody else already set this uptodate, they will
++ * have done the memory barrier, and a reader will thus
++ * see *some* valid buffer state.
++ *
++ * Any other serialization (with IO errors or whatever that
++ * might clear the bit) has to come from other state (eg BH_Lock).
++ */
++ if (test_bit(BH_Uptodate, &bh->b_state))
++ return;
++
++ /*
++ * make it consistent with folio_mark_uptodate
++ * pairs with smp_load_acquire in buffer_uptodate
++ */
++ smp_mb__before_atomic();
++ set_bit(BH_Uptodate, &bh->b_state);
++}
++
++static __always_inline void clear_buffer_uptodate(struct buffer_head *bh)
++{
++ clear_bit(BH_Uptodate, &bh->b_state);
++}
++
++static __always_inline int buffer_uptodate(const struct buffer_head *bh)
++{
++ /*
++ * make it consistent with folio_test_uptodate
++ * pairs with smp_mb__before_atomic in set_buffer_uptodate
++ */
++ return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0;
++}
++
+ #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
+
+ /* If we *know* page->private refers to buffer_heads */
+diff --git a/include/linux/can/length.h b/include/linux/can/length.h
+index 6995092b774ec..ef1fd32cef16b 100644
+--- a/include/linux/can/length.h
++++ b/include/linux/can/length.h
+@@ -69,17 +69,18 @@
+ * Error Status Indicator (ESI) 1
+ * Data length code (DLC) 4
+ * Data field 0...512
+- * Stuff Bit Count (SBC) 0...16: 4 20...64:5
++ * Stuff Bit Count (SBC) 4
+ * CRC 0...16: 17 20...64:21
+ * CRC delimiter (CD) 1
++ * Fixed Stuff bits (FSB) 0...16: 6 20...64:7
+ * ACK slot (AS) 1
+ * ACK delimiter (AD) 1
+ * End-of-frame (EOF) 7
+ * Inter frame spacing 3
+ *
+- * assuming CRC21, rounded up and ignoring bitstuffing
++ * assuming CRC21, rounded up and ignoring dynamic bitstuffing
+ */
+-#define CANFD_FRAME_OVERHEAD_SFF DIV_ROUND_UP(61, 8)
++#define CANFD_FRAME_OVERHEAD_SFF DIV_ROUND_UP(67, 8)
+
+ /*
+ * Size of a CAN-FD Extended Frame
+@@ -98,17 +99,18 @@
+ * Error Status Indicator (ESI) 1
+ * Data length code (DLC) 4
+ * Data field 0...512
+- * Stuff Bit Count (SBC) 0...16: 4 20...64:5
++ * Stuff Bit Count (SBC) 4
+ * CRC 0...16: 17 20...64:21
+ * CRC delimiter (CD) 1
++ * Fixed Stuff bits (FSB) 0...16: 6 20...64:7
+ * ACK slot (AS) 1
+ * ACK delimiter (AD) 1
+ * End-of-frame (EOF) 7
+ * Inter frame spacing 3
+ *
+- * assuming CRC21, rounded up and ignoring bitstuffing
++ * assuming CRC21, rounded up and ignoring dynamic bitstuffing
+ */
+-#define CANFD_FRAME_OVERHEAD_EFF DIV_ROUND_UP(80, 8)
++#define CANFD_FRAME_OVERHEAD_EFF DIV_ROUND_UP(86, 8)
+
+ /*
+ * Maximum size of a Classical CAN frame
+diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
+index 5755ae5a47122..6a869682c1207 100644
+--- a/include/linux/can/platform/sja1000.h
++++ b/include/linux/can/platform/sja1000.h
+@@ -14,7 +14,7 @@
+ #define OCR_MODE_TEST 0x01
+ #define OCR_MODE_NORMAL 0x02
+ #define OCR_MODE_CLOCK 0x03
+-#define OCR_MODE_MASK 0x07
++#define OCR_MODE_MASK 0x03
+ #define OCR_TX0_INVERT 0x04
+ #define OCR_TX0_PULLDOWN 0x08
+ #define OCR_TX0_PULLUP 0x10
+diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
+new file mode 100644
+index 0000000000000..a075b70b9a70c
+--- /dev/null
++++ b/include/linux/cc_platform.h
+@@ -0,0 +1,88 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Confidential Computing Platform Capability checks
++ *
++ * Copyright (C) 2021 Advanced Micro Devices, Inc.
++ *
++ * Author: Tom Lendacky <thomas.lendacky@amd.com>
++ */
++
++#ifndef _LINUX_CC_PLATFORM_H
++#define _LINUX_CC_PLATFORM_H
++
++#include <linux/types.h>
++#include <linux/stddef.h>
++
++/**
++ * enum cc_attr - Confidential computing attributes
++ *
++ * These attributes represent confidential computing features that are
++ * currently active.
++ */
++enum cc_attr {
++ /**
++ * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active
++ *
++ * The platform/OS is running with active memory encryption. This
++ * includes running either as a bare-metal system or a hypervisor
++ * and actively using memory encryption or as a guest/virtual machine
++ * and actively using memory encryption.
++ *
++ * Examples include SME, SEV and SEV-ES.
++ */
++ CC_ATTR_MEM_ENCRYPT,
++
++ /**
++ * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active
++ *
++ * The platform/OS is running as a bare-metal system or a hypervisor
++ * and actively using memory encryption.
++ *
++ * Examples include SME.
++ */
++ CC_ATTR_HOST_MEM_ENCRYPT,
++
++ /**
++ * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active
++ *
++ * The platform/OS is running as a guest/virtual machine and actively
++ * using memory encryption.
++ *
++ * Examples include SEV and SEV-ES.
++ */
++ CC_ATTR_GUEST_MEM_ENCRYPT,
++
++ /**
++ * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active
++ *
++ * The platform/OS is running as a guest/virtual machine and actively
++ * using memory encryption and register state encryption.
++ *
++ * Examples include SEV-ES.
++ */
++ CC_ATTR_GUEST_STATE_ENCRYPT,
++};
++
++#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
++
++/**
++ * cc_platform_has() - Checks if the specified cc_attr attribute is active
++ * @attr: Confidential computing attribute to check
++ *
++ * The cc_platform_has() function will return an indicator as to whether the
++ * specified Confidential Computing attribute is currently active.
++ *
++ * Context: Any context
++ * Return:
++ * * TRUE - Specified Confidential Computing attribute is active
++ * * FALSE - Specified Confidential Computing attribute is not active
++ */
++bool cc_platform_has(enum cc_attr attr);
++
++#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */
++
++static inline bool cc_platform_has(enum cc_attr attr) { return false; }
++
++#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */
++
++#endif /* _LINUX_CC_PLATFORM_H */
+diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
+index 83fa08a065071..787fff5ec7f58 100644
+--- a/include/linux/ceph/osd_client.h
++++ b/include/linux/ceph/osd_client.h
+@@ -287,6 +287,9 @@ struct ceph_osd_linger_request {
+ rados_watcherrcb_t errcb;
+ void *data;
+
++ struct ceph_pagelist *request_pl;
++ struct page **notify_id_pages;
++
+ struct page ***preply_pages;
+ size_t *preply_len;
+ };
+diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
+index db2e147e069fe..cd8b8bd5ec4d5 100644
+--- a/include/linux/cgroup-defs.h
++++ b/include/linux/cgroup-defs.h
+@@ -264,7 +264,8 @@ struct css_set {
+ * List of csets participating in the on-going migration either as
+ * source or destination. Protected by cgroup_mutex.
+ */
+- struct list_head mg_preload_node;
++ struct list_head mg_src_preload_node;
++ struct list_head mg_dst_preload_node;
+ struct list_head mg_node;
+
+ /*
+diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
+index 75c151413fda8..45cdb12243e3f 100644
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -68,6 +68,7 @@ struct css_task_iter {
+ struct list_head iters_node; /* css_set->task_iters */
+ };
+
++extern struct file_system_type cgroup_fs_type;
+ extern struct cgroup_root cgrp_dfl_root;
+ extern struct css_set init_css_set;
+
+diff --git a/include/linux/clk.h b/include/linux/clk.h
+index 266e8de3cb515..05ab315aa84bc 100644
+--- a/include/linux/clk.h
++++ b/include/linux/clk.h
+@@ -183,6 +183,39 @@ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
+ */
+ bool clk_is_match(const struct clk *p, const struct clk *q);
+
++/**
++ * clk_rate_exclusive_get - get exclusivity over the rate control of a
++ * producer
++ * @clk: clock source
++ *
++ * This function allows drivers to get exclusive control over the rate of a
++ * provider. It prevents any other consumer to execute, even indirectly,
++ * opereation which could alter the rate of the provider or cause glitches
++ *
++ * If exlusivity is claimed more than once on clock, even by the same driver,
++ * the rate effectively gets locked as exclusivity can't be preempted.
++ *
++ * Must not be called from within atomic context.
++ *
++ * Returns success (0) or negative errno.
++ */
++int clk_rate_exclusive_get(struct clk *clk);
++
++/**
++ * clk_rate_exclusive_put - release exclusivity over the rate control of a
++ * producer
++ * @clk: clock source
++ *
++ * This function allows drivers to release the exclusivity it previously got
++ * from clk_rate_exclusive_get()
++ *
++ * The caller must balance the number of clk_rate_exclusive_get() and
++ * clk_rate_exclusive_put() calls.
++ *
++ * Must not be called from within atomic context.
++ */
++void clk_rate_exclusive_put(struct clk *clk);
++
+ #else
+
+ static inline int clk_notifier_register(struct clk *clk,
+@@ -236,6 +269,13 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q)
+ return p == q;
+ }
+
++static inline int clk_rate_exclusive_get(struct clk *clk)
++{
++ return 0;
++}
++
++static inline void clk_rate_exclusive_put(struct clk *clk) {}
++
+ #endif
+
+ #ifdef CONFIG_HAVE_CLK_PREPARE
+@@ -458,6 +498,47 @@ int __must_check devm_clk_bulk_get_all(struct device *dev,
+ */
+ struct clk *devm_clk_get(struct device *dev, const char *id);
+
++/**
++ * devm_clk_get_prepared - devm_clk_get() + clk_prepare()
++ * @dev: device for clock "consumer"
++ * @id: clock consumer ID
++ *
++ * Context: May sleep.
++ *
++ * Return: a struct clk corresponding to the clock producer, or
++ * valid IS_ERR() condition containing errno. The implementation
++ * uses @dev and @id to determine the clock consumer, and thereby
++ * the clock producer. (IOW, @id may be identical strings, but
++ * clk_get may return different clock producers depending on @dev.)
++ *
++ * The returned clk (if valid) is prepared. Drivers must however assume
++ * that the clock is not enabled.
++ *
++ * The clock will automatically be unprepared and freed when the device
++ * is unbound from the bus.
++ */
++struct clk *devm_clk_get_prepared(struct device *dev, const char *id);
++
++/**
++ * devm_clk_get_enabled - devm_clk_get() + clk_prepare_enable()
++ * @dev: device for clock "consumer"
++ * @id: clock consumer ID
++ *
++ * Context: May sleep.
++ *
++ * Return: a struct clk corresponding to the clock producer, or
++ * valid IS_ERR() condition containing errno. The implementation
++ * uses @dev and @id to determine the clock consumer, and thereby
++ * the clock producer. (IOW, @id may be identical strings, but
++ * clk_get may return different clock producers depending on @dev.)
++ *
++ * The returned clk (if valid) is prepared and enabled.
++ *
++ * The clock will automatically be disabled, unprepared and freed
++ * when the device is unbound from the bus.
++ */
++struct clk *devm_clk_get_enabled(struct device *dev, const char *id);
++
+ /**
+ * devm_clk_get_optional - lookup and obtain a managed reference to an optional
+ * clock producer.
+@@ -470,53 +551,65 @@ struct clk *devm_clk_get(struct device *dev, const char *id);
+ struct clk *devm_clk_get_optional(struct device *dev, const char *id);
+
+ /**
+- * devm_get_clk_from_child - lookup and obtain a managed reference to a
+- * clock producer from child node.
++ * devm_clk_get_optional_prepared - devm_clk_get_optional() + clk_prepare()
+ * @dev: device for clock "consumer"
+- * @np: pointer to clock consumer node
+- * @con_id: clock consumer ID
++ * @id: clock consumer ID
+ *
+- * This function parses the clocks, and uses them to look up the
+- * struct clk from the registered list of clock providers by using
+- * @np and @con_id
++ * Context: May sleep.
+ *
+- * The clock will automatically be freed when the device is unbound
+- * from the bus.
++ * Return: a struct clk corresponding to the clock producer, or
++ * valid IS_ERR() condition containing errno. The implementation
++ * uses @dev and @id to determine the clock consumer, and thereby
++ * the clock producer. If no such clk is found, it returns NULL
++ * which serves as a dummy clk. That's the only difference compared
++ * to devm_clk_get_prepared().
++ *
++ * The returned clk (if valid) is prepared. Drivers must however
++ * assume that the clock is not enabled.
++ *
++ * The clock will automatically be unprepared and freed when the
++ * device is unbound from the bus.
+ */
+-struct clk *devm_get_clk_from_child(struct device *dev,
+- struct device_node *np, const char *con_id);
++struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id);
++
+ /**
+- * clk_rate_exclusive_get - get exclusivity over the rate control of a
+- * producer
+- * @clk: clock source
++ * devm_clk_get_optional_enabled - devm_clk_get_optional() +
++ * clk_prepare_enable()
++ * @dev: device for clock "consumer"
++ * @id: clock consumer ID
+ *
+- * This function allows drivers to get exclusive control over the rate of a
+- * provider. It prevents any other consumer to execute, even indirectly,
+- * opereation which could alter the rate of the provider or cause glitches
++ * Context: May sleep.
+ *
+- * If exlusivity is claimed more than once on clock, even by the same driver,
+- * the rate effectively gets locked as exclusivity can't be preempted.
++ * Return: a struct clk corresponding to the clock producer, or
++ * valid IS_ERR() condition containing errno. The implementation
++ * uses @dev and @id to determine the clock consumer, and thereby
++ * the clock producer. If no such clk is found, it returns NULL
++ * which serves as a dummy clk. That's the only difference compared
++ * to devm_clk_get_enabled().
+ *
+- * Must not be called from within atomic context.
++ * The returned clk (if valid) is prepared and enabled.
+ *
+- * Returns success (0) or negative errno.
++ * The clock will automatically be disabled, unprepared and freed
++ * when the device is unbound from the bus.
+ */
+-int clk_rate_exclusive_get(struct clk *clk);
++struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
+
+ /**
+- * clk_rate_exclusive_put - release exclusivity over the rate control of a
+- * producer
+- * @clk: clock source
+- *
+- * This function allows drivers to release the exclusivity it previously got
+- * from clk_rate_exclusive_get()
++ * devm_get_clk_from_child - lookup and obtain a managed reference to a
++ * clock producer from child node.
++ * @dev: device for clock "consumer"
++ * @np: pointer to clock consumer node
++ * @con_id: clock consumer ID
+ *
+- * The caller must balance the number of clk_rate_exclusive_get() and
+- * clk_rate_exclusive_put() calls.
++ * This function parses the clocks, and uses them to look up the
++ * struct clk from the registered list of clock providers by using
++ * @np and @con_id
+ *
+- * Must not be called from within atomic context.
++ * The clock will automatically be freed when the device is unbound
++ * from the bus.
+ */
+-void clk_rate_exclusive_put(struct clk *clk);
++struct clk *devm_get_clk_from_child(struct device *dev,
++ struct device_node *np, const char *con_id);
+
+ /**
+ * clk_enable - inform the system when the clock source should be running.
+@@ -813,12 +906,36 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id)
+ return NULL;
+ }
+
++static inline struct clk *devm_clk_get_prepared(struct device *dev,
++ const char *id)
++{
++ return NULL;
++}
++
++static inline struct clk *devm_clk_get_enabled(struct device *dev,
++ const char *id)
++{
++ return NULL;
++}
++
+ static inline struct clk *devm_clk_get_optional(struct device *dev,
+ const char *id)
+ {
+ return NULL;
+ }
+
++static inline struct clk *devm_clk_get_optional_prepared(struct device *dev,
++ const char *id)
++{
++ return NULL;
++}
++
++static inline struct clk *devm_clk_get_optional_enabled(struct device *dev,
++ const char *id)
++{
++ return NULL;
++}
++
+ static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
+ struct clk_bulk_data *clks)
+ {
+@@ -852,14 +969,6 @@ static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
+
+ static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
+
+-
+-static inline int clk_rate_exclusive_get(struct clk *clk)
+-{
+- return 0;
+-}
+-
+-static inline void clk_rate_exclusive_put(struct clk *clk) {}
+-
+ static inline int clk_enable(struct clk *clk)
+ {
+ return 0;
+diff --git a/include/linux/compat.h b/include/linux/compat.h
+index 1c758b0e03598..01fddf72a81f0 100644
+--- a/include/linux/compat.h
++++ b/include/linux/compat.h
+@@ -235,6 +235,7 @@ typedef struct compat_siginfo {
+ struct {
+ compat_ulong_t _data;
+ u32 _type;
++ u32 _flags;
+ } _perf;
+ };
+ } _sigfault;
+diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
+index bd2b881c6b63a..b9d5f9c373a09 100644
+--- a/include/linux/compiler-gcc.h
++++ b/include/linux/compiler-gcc.h
+@@ -144,3 +144,11 @@
+ #else
+ #define __diag_GCC_8(s)
+ #endif
++
++/*
++ * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size"
++ * attribute) do not work, and must be disabled.
++ */
++#if GCC_VERSION < 90100
++#undef __alloc_size__
++#endif
+diff --git a/include/linux/compiler.h b/include/linux/compiler.h
+index 3d5af56337bdb..0f7fd205ab7ea 100644
+--- a/include/linux/compiler.h
++++ b/include/linux/compiler.h
+@@ -117,40 +117,29 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
+ */
+ #define __stringify_label(n) #n
+
+-#define __annotate_reachable(c) ({ \
+- asm volatile(__stringify_label(c) ":\n\t" \
+- ".pushsection .discard.reachable\n\t" \
+- ".long " __stringify_label(c) "b - .\n\t" \
+- ".popsection\n\t"); \
+-})
+-#define annotate_reachable() __annotate_reachable(__COUNTER__)
+-
+ #define __annotate_unreachable(c) ({ \
+ asm volatile(__stringify_label(c) ":\n\t" \
+ ".pushsection .discard.unreachable\n\t" \
+ ".long " __stringify_label(c) "b - .\n\t" \
+- ".popsection\n\t"); \
++ ".popsection\n\t" : : "i" (c)); \
+ })
+ #define annotate_unreachable() __annotate_unreachable(__COUNTER__)
+
+-#define ASM_UNREACHABLE \
+- "999:\n\t" \
+- ".pushsection .discard.unreachable\n\t" \
+- ".long 999b - .\n\t" \
++#define ASM_REACHABLE \
++ "998:\n\t" \
++ ".pushsection .discard.reachable\n\t" \
++ ".long 998b - .\n\t" \
+ ".popsection\n\t"
+
+ /* Annotate a C jump table to allow objtool to follow the code flow */
+ #define __annotate_jump_table __section(".rodata..c_jump_table")
+
+ #else
+-#define annotate_reachable()
+ #define annotate_unreachable()
++# define ASM_REACHABLE
+ #define __annotate_jump_table
+ #endif
+
+-#ifndef ASM_UNREACHABLE
+-# define ASM_UNREACHABLE
+-#endif
+ #ifndef unreachable
+ # define unreachable() do { \
+ annotate_unreachable(); \
+diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
+index e6ec634039658..3de06a8fae73b 100644
+--- a/include/linux/compiler_attributes.h
++++ b/include/linux/compiler_attributes.h
+@@ -33,6 +33,15 @@
+ #define __aligned(x) __attribute__((__aligned__(x)))
+ #define __aligned_largest __attribute__((__aligned__))
+
++/*
++ * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally
++ * available and includes other attributes.
++ *
++ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute
++ * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size
++ */
++#define __alloc_size__(x, ...) __attribute__((__alloc_size__(x, ## __VA_ARGS__)))
++
+ /*
+ * Note: users of __always_inline currently do not write "inline" themselves,
+ * which seems to be required by gcc to apply the attribute according
+@@ -153,6 +162,7 @@
+
+ /*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute
++ * clang: https://clang.llvm.org/docs/AttributeReference.html#malloc
+ */
+ #define __malloc __attribute__((__malloc__))
+
+diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
+index b6ff83a714ca9..4f2203c4a2574 100644
+--- a/include/linux/compiler_types.h
++++ b/include/linux/compiler_types.h
+@@ -250,6 +250,18 @@ struct ftrace_likely_data {
+ # define __cficanonical
+ #endif
+
++/*
++ * Any place that could be marked with the "alloc_size" attribute is also
++ * a place to be marked with the "malloc" attribute. Do this as part of the
++ * __alloc_size macro to avoid redundant attributes and to avoid missing a
++ * __malloc marking.
++ */
++#ifdef __alloc_size__
++# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc
++#else
++# define __alloc_size(x, ...) __malloc
++#endif
++
+ #ifndef asm_volatile_goto
+ #define asm_volatile_goto(x...) asm goto(x)
+ #endif
+diff --git a/include/linux/console.h b/include/linux/console.h
+index 20874db50bc8a..a97f277cfdfa3 100644
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -149,6 +149,8 @@ struct console {
+ short flags;
+ short index;
+ int cflag;
++ uint ispeed;
++ uint ospeed;
+ void *data;
+ struct console *next;
+ };
+diff --git a/include/linux/coredump.h b/include/linux/coredump.h
+index 78fcd776b185a..4b95e46d215f1 100644
+--- a/include/linux/coredump.h
++++ b/include/linux/coredump.h
+@@ -12,6 +12,8 @@ struct core_vma_metadata {
+ unsigned long start, end;
+ unsigned long flags;
+ unsigned long dump_size;
++ unsigned long pgoff;
++ struct file *file;
+ };
+
+ extern int core_uses_pid;
+@@ -29,9 +31,6 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
+ extern int dump_align(struct coredump_params *cprm, int align);
+ int dump_user_range(struct coredump_params *cprm, unsigned long start,
+ unsigned long len);
+-int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
+- struct core_vma_metadata **vma_meta,
+- size_t *vma_data_size_ptr);
+ extern void do_coredump(const kernel_siginfo_t *siginfo);
+ #else
+ static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
+diff --git a/include/linux/counter.h b/include/linux/counter.h
+index d16ce2819b48b..c72dda6d6af54 100644
+--- a/include/linux/counter.h
++++ b/include/linux/counter.h
+@@ -6,42 +6,184 @@
+ #ifndef _COUNTER_H_
+ #define _COUNTER_H_
+
+-#include <linux/counter_enum.h>
+ #include <linux/device.h>
++#include <linux/kernel.h>
+ #include <linux/types.h>
+
++struct counter_device;
++struct counter_count;
++struct counter_synapse;
++struct counter_signal;
++
++enum counter_comp_type {
++ COUNTER_COMP_U8,
++ COUNTER_COMP_U64,
++ COUNTER_COMP_BOOL,
++ COUNTER_COMP_SIGNAL_LEVEL,
++ COUNTER_COMP_FUNCTION,
++ COUNTER_COMP_SYNAPSE_ACTION,
++ COUNTER_COMP_ENUM,
++ COUNTER_COMP_COUNT_DIRECTION,
++ COUNTER_COMP_COUNT_MODE,
++};
++
++enum counter_scope {
++ COUNTER_SCOPE_DEVICE,
++ COUNTER_SCOPE_SIGNAL,
++ COUNTER_SCOPE_COUNT,
++};
++
+ enum counter_count_direction {
+- COUNTER_COUNT_DIRECTION_FORWARD = 0,
+- COUNTER_COUNT_DIRECTION_BACKWARD
++ COUNTER_COUNT_DIRECTION_FORWARD,
++ COUNTER_COUNT_DIRECTION_BACKWARD,
+ };
+-extern const char *const counter_count_direction_str[2];
+
+ enum counter_count_mode {
+- COUNTER_COUNT_MODE_NORMAL = 0,
++ COUNTER_COUNT_MODE_NORMAL,
+ COUNTER_COUNT_MODE_RANGE_LIMIT,
+ COUNTER_COUNT_MODE_NON_RECYCLE,
+- COUNTER_COUNT_MODE_MODULO_N
++ COUNTER_COUNT_MODE_MODULO_N,
+ };
+-extern const char *const counter_count_mode_str[4];
+
+-struct counter_device;
+-struct counter_signal;
++enum counter_function {
++ COUNTER_FUNCTION_INCREASE,
++ COUNTER_FUNCTION_DECREASE,
++ COUNTER_FUNCTION_PULSE_DIRECTION,
++ COUNTER_FUNCTION_QUADRATURE_X1_A,
++ COUNTER_FUNCTION_QUADRATURE_X1_B,
++ COUNTER_FUNCTION_QUADRATURE_X2_A,
++ COUNTER_FUNCTION_QUADRATURE_X2_B,
++ COUNTER_FUNCTION_QUADRATURE_X4,
++};
++
++enum counter_signal_level {
++ COUNTER_SIGNAL_LEVEL_LOW,
++ COUNTER_SIGNAL_LEVEL_HIGH,
++};
++
++enum counter_synapse_action {
++ COUNTER_SYNAPSE_ACTION_NONE,
++ COUNTER_SYNAPSE_ACTION_RISING_EDGE,
++ COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
++ COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
++};
+
+ /**
+- * struct counter_signal_ext - Counter Signal extensions
+- * @name: attribute name
+- * @read: read callback for this attribute; may be NULL
+- * @write: write callback for this attribute; may be NULL
+- * @priv: data private to the driver
++ * struct counter_comp - Counter component node
++ * @type: Counter component data type
++ * @name: device-specific component name
++ * @priv: component-relevant data
++ * @action_read: Synapse action mode read callback. The read value of the
++ * respective Synapse action mode should be passed back via
++ * the action parameter.
++ * @device_u8_read: Device u8 component read callback. The read value of the
++ * respective Device u8 component should be passed back via
++ * the val parameter.
++ * @count_u8_read: Count u8 component read callback. The read value of the
++ * respective Count u8 component should be passed back via
++ * the val parameter.
++ * @signal_u8_read: Signal u8 component read callback. The read value of the
++ * respective Signal u8 component should be passed back via
++ * the val parameter.
++ * @device_u32_read: Device u32 component read callback. The read value of
++ * the respective Device u32 component should be passed
++ * back via the val parameter.
++ * @count_u32_read: Count u32 component read callback. The read value of the
++ * respective Count u32 component should be passed back via
++ * the val parameter.
++ * @signal_u32_read: Signal u32 component read callback. The read value of
++ * the respective Signal u32 component should be passed
++ * back via the val parameter.
++ * @device_u64_read: Device u64 component read callback. The read value of
++ * the respective Device u64 component should be passed
++ * back via the val parameter.
++ * @count_u64_read: Count u64 component read callback. The read value of the
++ * respective Count u64 component should be passed back via
++ * the val parameter.
++ * @signal_u64_read: Signal u64 component read callback. The read value of
++ * the respective Signal u64 component should be passed
++ * back via the val parameter.
++ * @action_write: Synapse action mode write callback. The write value of
++ * the respective Synapse action mode is passed via the
++ * action parameter.
++ * @device_u8_write: Device u8 component write callback. The write value of
++ * the respective Device u8 component is passed via the val
++ * parameter.
++ * @count_u8_write: Count u8 component write callback. The write value of
++ * the respective Count u8 component is passed via the val
++ * parameter.
++ * @signal_u8_write: Signal u8 component write callback. The write value of
++ * the respective Signal u8 component is passed via the val
++ * parameter.
++ * @device_u32_write: Device u32 component write callback. The write value of
++ * the respective Device u32 component is passed via the
++ * val parameter.
++ * @count_u32_write: Count u32 component write callback. The write value of
++ * the respective Count u32 component is passed via the val
++ * parameter.
++ * @signal_u32_write: Signal u32 component write callback. The write value of
++ * the respective Signal u32 component is passed via the
++ * val parameter.
++ * @device_u64_write: Device u64 component write callback. The write value of
++ * the respective Device u64 component is passed via the
++ * val parameter.
++ * @count_u64_write: Count u64 component write callback. The write value of
++ * the respective Count u64 component is passed via the val
++ * parameter.
++ * @signal_u64_write: Signal u64 component write callback. The write value of
++ * the respective Signal u64 component is passed via the
++ * val parameter.
+ */
+-struct counter_signal_ext {
++struct counter_comp {
++ enum counter_comp_type type;
+ const char *name;
+- ssize_t (*read)(struct counter_device *counter,
+- struct counter_signal *signal, void *priv, char *buf);
+- ssize_t (*write)(struct counter_device *counter,
+- struct counter_signal *signal, void *priv,
+- const char *buf, size_t len);
+ void *priv;
++ union {
++ int (*action_read)(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action);
++ int (*device_u8_read)(struct counter_device *counter, u8 *val);
++ int (*count_u8_read)(struct counter_device *counter,
++ struct counter_count *count, u8 *val);
++ int (*signal_u8_read)(struct counter_device *counter,
++ struct counter_signal *signal, u8 *val);
++ int (*device_u32_read)(struct counter_device *counter,
++ u32 *val);
++ int (*count_u32_read)(struct counter_device *counter,
++ struct counter_count *count, u32 *val);
++ int (*signal_u32_read)(struct counter_device *counter,
++ struct counter_signal *signal, u32 *val);
++ int (*device_u64_read)(struct counter_device *counter,
++ u64 *val);
++ int (*count_u64_read)(struct counter_device *counter,
++ struct counter_count *count, u64 *val);
++ int (*signal_u64_read)(struct counter_device *counter,
++ struct counter_signal *signal, u64 *val);
++ };
++ union {
++ int (*action_write)(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action action);
++ int (*device_u8_write)(struct counter_device *counter, u8 val);
++ int (*count_u8_write)(struct counter_device *counter,
++ struct counter_count *count, u8 val);
++ int (*signal_u8_write)(struct counter_device *counter,
++ struct counter_signal *signal, u8 val);
++ int (*device_u32_write)(struct counter_device *counter,
++ u32 val);
++ int (*count_u32_write)(struct counter_device *counter,
++ struct counter_count *count, u32 val);
++ int (*signal_u32_write)(struct counter_device *counter,
++ struct counter_signal *signal, u32 val);
++ int (*device_u64_write)(struct counter_device *counter,
++ u64 val);
++ int (*count_u64_write)(struct counter_device *counter,
++ struct counter_count *count, u64 val);
++ int (*signal_u64_write)(struct counter_device *counter,
++ struct counter_signal *signal, u64 val);
++ };
+ };
+
+ /**
+@@ -51,248 +193,52 @@ struct counter_signal_ext {
+ * as it appears in the datasheet documentation
+ * @ext: optional array of Counter Signal extensions
+ * @num_ext: number of Counter Signal extensions specified in @ext
+- * @priv: optional private data supplied by driver
+ */
+ struct counter_signal {
+ int id;
+ const char *name;
+
+- const struct counter_signal_ext *ext;
++ struct counter_comp *ext;
+ size_t num_ext;
+-
+- void *priv;
+-};
+-
+-/**
+- * struct counter_signal_enum_ext - Signal enum extension attribute
+- * @items: Array of strings
+- * @num_items: Number of items specified in @items
+- * @set: Set callback function; may be NULL
+- * @get: Get callback function; may be NULL
+- *
+- * The counter_signal_enum_ext structure can be used to implement enum style
+- * Signal extension attributes. Enum style attributes are those which have a set
+- * of strings that map to unsigned integer values. The Generic Counter Signal
+- * enum extension helper code takes care of mapping between value and string, as
+- * well as generating a "_available" file which contains a list of all available
+- * items. The get callback is used to query the currently active item; the index
+- * of the item within the respective items array is returned via the 'item'
+- * parameter. The set callback is called when the attribute is updated; the
+- * 'item' parameter contains the index of the newly activated item within the
+- * respective items array.
+- */
+-struct counter_signal_enum_ext {
+- const char * const *items;
+- size_t num_items;
+- int (*get)(struct counter_device *counter,
+- struct counter_signal *signal, size_t *item);
+- int (*set)(struct counter_device *counter,
+- struct counter_signal *signal, size_t item);
+-};
+-
+-/**
+- * COUNTER_SIGNAL_ENUM() - Initialize Signal enum extension
+- * @_name: Attribute name
+- * @_e: Pointer to a counter_signal_enum_ext structure
+- *
+- * This should usually be used together with COUNTER_SIGNAL_ENUM_AVAILABLE()
+- */
+-#define COUNTER_SIGNAL_ENUM(_name, _e) \
+-{ \
+- .name = (_name), \
+- .read = counter_signal_enum_read, \
+- .write = counter_signal_enum_write, \
+- .priv = (_e) \
+-}
+-
+-/**
+- * COUNTER_SIGNAL_ENUM_AVAILABLE() - Initialize Signal enum available extension
+- * @_name: Attribute name ("_available" will be appended to the name)
+- * @_e: Pointer to a counter_signal_enum_ext structure
+- *
+- * Creates a read only attribute that lists all the available enum items in a
+- * newline separated list. This should usually be used together with
+- * COUNTER_SIGNAL_ENUM()
+- */
+-#define COUNTER_SIGNAL_ENUM_AVAILABLE(_name, _e) \
+-{ \
+- .name = (_name "_available"), \
+- .read = counter_signal_enum_available_read, \
+- .priv = (_e) \
+-}
+-
+-enum counter_synapse_action {
+- COUNTER_SYNAPSE_ACTION_NONE = 0,
+- COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+- COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+- COUNTER_SYNAPSE_ACTION_BOTH_EDGES
+ };
+
+ /**
+ * struct counter_synapse - Counter Synapse node
+- * @action: index of current action mode
+ * @actions_list: array of available action modes
+ * @num_actions: number of action modes specified in @actions_list
+ * @signal: pointer to associated signal
+ */
+ struct counter_synapse {
+- size_t action;
+ const enum counter_synapse_action *actions_list;
+ size_t num_actions;
+
+ struct counter_signal *signal;
+ };
+
+-struct counter_count;
+-
+-/**
+- * struct counter_count_ext - Counter Count extension
+- * @name: attribute name
+- * @read: read callback for this attribute; may be NULL
+- * @write: write callback for this attribute; may be NULL
+- * @priv: data private to the driver
+- */
+-struct counter_count_ext {
+- const char *name;
+- ssize_t (*read)(struct counter_device *counter,
+- struct counter_count *count, void *priv, char *buf);
+- ssize_t (*write)(struct counter_device *counter,
+- struct counter_count *count, void *priv,
+- const char *buf, size_t len);
+- void *priv;
+-};
+-
+-enum counter_function {
+- COUNTER_FUNCTION_INCREASE = 0,
+- COUNTER_FUNCTION_DECREASE,
+- COUNTER_FUNCTION_PULSE_DIRECTION,
+- COUNTER_FUNCTION_QUADRATURE_X1_A,
+- COUNTER_FUNCTION_QUADRATURE_X1_B,
+- COUNTER_FUNCTION_QUADRATURE_X2_A,
+- COUNTER_FUNCTION_QUADRATURE_X2_B,
+- COUNTER_FUNCTION_QUADRATURE_X4
+-};
+-
+ /**
+ * struct counter_count - Counter Count node
+ * @id: unique ID used to identify Count
+ * @name: device-specific Count name; ideally, this should match
+ * the name as it appears in the datasheet documentation
+- * @function: index of current function mode
+ * @functions_list: array available function modes
+ * @num_functions: number of function modes specified in @functions_list
+ * @synapses: array of synapses for initialization
+ * @num_synapses: number of synapses specified in @synapses
+ * @ext: optional array of Counter Count extensions
+ * @num_ext: number of Counter Count extensions specified in @ext
+- * @priv: optional private data supplied by driver
+ */
+ struct counter_count {
+ int id;
+ const char *name;
+
+- size_t function;
+ const enum counter_function *functions_list;
+ size_t num_functions;
+
+ struct counter_synapse *synapses;
+ size_t num_synapses;
+
+- const struct counter_count_ext *ext;
++ struct counter_comp *ext;
+ size_t num_ext;
+-
+- void *priv;
+-};
+-
+-/**
+- * struct counter_count_enum_ext - Count enum extension attribute
+- * @items: Array of strings
+- * @num_items: Number of items specified in @items
+- * @set: Set callback function; may be NULL
+- * @get: Get callback function; may be NULL
+- *
+- * The counter_count_enum_ext structure can be used to implement enum style
+- * Count extension attributes. Enum style attributes are those which have a set
+- * of strings that map to unsigned integer values. The Generic Counter Count
+- * enum extension helper code takes care of mapping between value and string, as
+- * well as generating a "_available" file which contains a list of all available
+- * items. The get callback is used to query the currently active item; the index
+- * of the item within the respective items array is returned via the 'item'
+- * parameter. The set callback is called when the attribute is updated; the
+- * 'item' parameter contains the index of the newly activated item within the
+- * respective items array.
+- */
+-struct counter_count_enum_ext {
+- const char * const *items;
+- size_t num_items;
+- int (*get)(struct counter_device *counter, struct counter_count *count,
+- size_t *item);
+- int (*set)(struct counter_device *counter, struct counter_count *count,
+- size_t item);
+-};
+-
+-/**
+- * COUNTER_COUNT_ENUM() - Initialize Count enum extension
+- * @_name: Attribute name
+- * @_e: Pointer to a counter_count_enum_ext structure
+- *
+- * This should usually be used together with COUNTER_COUNT_ENUM_AVAILABLE()
+- */
+-#define COUNTER_COUNT_ENUM(_name, _e) \
+-{ \
+- .name = (_name), \
+- .read = counter_count_enum_read, \
+- .write = counter_count_enum_write, \
+- .priv = (_e) \
+-}
+-
+-/**
+- * COUNTER_COUNT_ENUM_AVAILABLE() - Initialize Count enum available extension
+- * @_name: Attribute name ("_available" will be appended to the name)
+- * @_e: Pointer to a counter_count_enum_ext structure
+- *
+- * Creates a read only attribute that lists all the available enum items in a
+- * newline separated list. This should usually be used together with
+- * COUNTER_COUNT_ENUM()
+- */
+-#define COUNTER_COUNT_ENUM_AVAILABLE(_name, _e) \
+-{ \
+- .name = (_name "_available"), \
+- .read = counter_count_enum_available_read, \
+- .priv = (_e) \
+-}
+-
+-/**
+- * struct counter_device_attr_group - internal container for attribute group
+- * @attr_group: Counter sysfs attributes group
+- * @attr_list: list to keep track of created Counter sysfs attributes
+- * @num_attr: number of Counter sysfs attributes
+- */
+-struct counter_device_attr_group {
+- struct attribute_group attr_group;
+- struct list_head attr_list;
+- size_t num_attr;
+-};
+-
+-/**
+- * struct counter_device_state - internal state container for a Counter device
+- * @id: unique ID used to identify the Counter
+- * @dev: internal device structure
+- * @groups_list: attribute groups list (for Signals, Counts, and ext)
+- * @num_groups: number of attribute groups containers
+- * @groups: Counter sysfs attribute groups (to populate @dev.groups)
+- */
+-struct counter_device_state {
+- int id;
+- struct device dev;
+- struct counter_device_attr_group *groups_list;
+- size_t num_groups;
+- const struct attribute_group **groups;
+-};
+-
+-enum counter_signal_level {
+- COUNTER_SIGNAL_LEVEL_LOW,
+- COUNTER_SIGNAL_LEVEL_HIGH,
+ };
+
+ /**
+@@ -306,117 +252,47 @@ enum counter_signal_level {
+ * @count_write: optional write callback for Count attribute. The write
+ * value for the respective Count is passed in via the val
+ * parameter.
+- * @function_get: function to get the current count function mode. Returns
+- * 0 on success and negative error code on error. The index
+- * of the respective Count's returned function mode should
+- * be passed back via the function parameter.
+- * @function_set: function to set the count function mode. function is the
+- * index of the requested function mode from the respective
+- * Count's functions_list array.
+- * @action_get: function to get the current action mode. Returns 0 on
+- * success and negative error code on error. The index of
+- * the respective Synapse's returned action mode should be
+- * passed back via the action parameter.
+- * @action_set: function to set the action mode. action is the index of
+- * the requested action mode from the respective Synapse's
+- * actions_list array.
++ * @function_read: read callback the Count function modes. The read
++ * function mode of the respective Count should be passed
++ * back via the function parameter.
++ * @function_write: write callback for Count function modes. The function
++ * mode to write for the respective Count is passed in via
++ * the function parameter.
++ * @action_read: read callback the Synapse action modes. The read action
++ * mode of the respective Synapse should be passed back via
++ * the action parameter.
++ * @action_write: write callback for Synapse action modes. The action mode
++ * to write for the respective Synapse is passed in via the
++ * action parameter.
+ */
+ struct counter_ops {
+ int (*signal_read)(struct counter_device *counter,
+ struct counter_signal *signal,
+ enum counter_signal_level *level);
+ int (*count_read)(struct counter_device *counter,
+- struct counter_count *count, unsigned long *val);
++ struct counter_count *count, u64 *value);
+ int (*count_write)(struct counter_device *counter,
+- struct counter_count *count, unsigned long val);
+- int (*function_get)(struct counter_device *counter,
+- struct counter_count *count, size_t *function);
+- int (*function_set)(struct counter_device *counter,
+- struct counter_count *count, size_t function);
+- int (*action_get)(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse, size_t *action);
+- int (*action_set)(struct counter_device *counter,
+- struct counter_count *count,
+- struct counter_synapse *synapse, size_t action);
+-};
+-
+-/**
+- * struct counter_device_ext - Counter device extension
+- * @name: attribute name
+- * @read: read callback for this attribute; may be NULL
+- * @write: write callback for this attribute; may be NULL
+- * @priv: data private to the driver
+- */
+-struct counter_device_ext {
+- const char *name;
+- ssize_t (*read)(struct counter_device *counter, void *priv, char *buf);
+- ssize_t (*write)(struct counter_device *counter, void *priv,
+- const char *buf, size_t len);
+- void *priv;
++ struct counter_count *count, u64 value);
++ int (*function_read)(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function *function);
++ int (*function_write)(struct counter_device *counter,
++ struct counter_count *count,
++ enum counter_function function);
++ int (*action_read)(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action *action);
++ int (*action_write)(struct counter_device *counter,
++ struct counter_count *count,
++ struct counter_synapse *synapse,
++ enum counter_synapse_action action);
+ };
+
+-/**
+- * struct counter_device_enum_ext - Counter enum extension attribute
+- * @items: Array of strings
+- * @num_items: Number of items specified in @items
+- * @set: Set callback function; may be NULL
+- * @get: Get callback function; may be NULL
+- *
+- * The counter_device_enum_ext structure can be used to implement enum style
+- * Counter extension attributes. Enum style attributes are those which have a
+- * set of strings that map to unsigned integer values. The Generic Counter enum
+- * extension helper code takes care of mapping between value and string, as well
+- * as generating a "_available" file which contains a list of all available
+- * items. The get callback is used to query the currently active item; the index
+- * of the item within the respective items array is returned via the 'item'
+- * parameter. The set callback is called when the attribute is updated; the
+- * 'item' parameter contains the index of the newly activated item within the
+- * respective items array.
+- */
+-struct counter_device_enum_ext {
+- const char * const *items;
+- size_t num_items;
+- int (*get)(struct counter_device *counter, size_t *item);
+- int (*set)(struct counter_device *counter, size_t item);
+-};
+-
+-/**
+- * COUNTER_DEVICE_ENUM() - Initialize Counter enum extension
+- * @_name: Attribute name
+- * @_e: Pointer to a counter_device_enum_ext structure
+- *
+- * This should usually be used together with COUNTER_DEVICE_ENUM_AVAILABLE()
+- */
+-#define COUNTER_DEVICE_ENUM(_name, _e) \
+-{ \
+- .name = (_name), \
+- .read = counter_device_enum_read, \
+- .write = counter_device_enum_write, \
+- .priv = (_e) \
+-}
+-
+-/**
+- * COUNTER_DEVICE_ENUM_AVAILABLE() - Initialize Counter enum available extension
+- * @_name: Attribute name ("_available" will be appended to the name)
+- * @_e: Pointer to a counter_device_enum_ext structure
+- *
+- * Creates a read only attribute that lists all the available enum items in a
+- * newline separated list. This should usually be used together with
+- * COUNTER_DEVICE_ENUM()
+- */
+-#define COUNTER_DEVICE_ENUM_AVAILABLE(_name, _e) \
+-{ \
+- .name = (_name "_available"), \
+- .read = counter_device_enum_available_read, \
+- .priv = (_e) \
+-}
+-
+ /**
+ * struct counter_device - Counter data structure
+ * @name: name of the device as it appears in the datasheet
+ * @parent: optional parent device providing the counters
+- * @device_state: internal device state container
+ * @ops: callbacks from driver
+ * @signals: array of Signals
+ * @num_signals: number of Signals specified in @signals
+@@ -425,11 +301,11 @@ struct counter_device_enum_ext {
+ * @ext: optional array of Counter device extensions
+ * @num_ext: number of Counter device extensions specified in @ext
+ * @priv: optional private data supplied by driver
++ * @dev: internal device structure
+ */
+ struct counter_device {
+ const char *name;
+ struct device *parent;
+- struct counter_device_state *device_state;
+
+ const struct counter_ops *ops;
+
+@@ -438,17 +314,159 @@ struct counter_device {
+ struct counter_count *counts;
+ size_t num_counts;
+
+- const struct counter_device_ext *ext;
++ struct counter_comp *ext;
+ size_t num_ext;
+
+ void *priv;
++
++ struct device dev;
+ };
+
+ int counter_register(struct counter_device *const counter);
+ void counter_unregister(struct counter_device *const counter);
+ int devm_counter_register(struct device *dev,
+ struct counter_device *const counter);
+-void devm_counter_unregister(struct device *dev,
+- struct counter_device *const counter);
++
++#define COUNTER_COMP_DEVICE_U8(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_U8, \
++ .name = (_name), \
++ .device_u8_read = (_read), \
++ .device_u8_write = (_write), \
++}
++#define COUNTER_COMP_COUNT_U8(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_U8, \
++ .name = (_name), \
++ .count_u8_read = (_read), \
++ .count_u8_write = (_write), \
++}
++#define COUNTER_COMP_SIGNAL_U8(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_U8, \
++ .name = (_name), \
++ .signal_u8_read = (_read), \
++ .signal_u8_write = (_write), \
++}
++
++#define COUNTER_COMP_DEVICE_U64(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_U64, \
++ .name = (_name), \
++ .device_u64_read = (_read), \
++ .device_u64_write = (_write), \
++}
++#define COUNTER_COMP_COUNT_U64(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_U64, \
++ .name = (_name), \
++ .count_u64_read = (_read), \
++ .count_u64_write = (_write), \
++}
++#define COUNTER_COMP_SIGNAL_U64(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_U64, \
++ .name = (_name), \
++ .signal_u64_read = (_read), \
++ .signal_u64_write = (_write), \
++}
++
++#define COUNTER_COMP_DEVICE_BOOL(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_BOOL, \
++ .name = (_name), \
++ .device_u8_read = (_read), \
++ .device_u8_write = (_write), \
++}
++#define COUNTER_COMP_COUNT_BOOL(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_BOOL, \
++ .name = (_name), \
++ .count_u8_read = (_read), \
++ .count_u8_write = (_write), \
++}
++#define COUNTER_COMP_SIGNAL_BOOL(_name, _read, _write) \
++{ \
++ .type = COUNTER_COMP_BOOL, \
++ .name = (_name), \
++ .signal_u8_read = (_read), \
++ .signal_u8_write = (_write), \
++}
++
++struct counter_available {
++ union {
++ const u32 *enums;
++ const char *const *strs;
++ };
++ size_t num_items;
++};
++
++#define DEFINE_COUNTER_AVAILABLE(_name, _enums) \
++ struct counter_available _name = { \
++ .enums = (_enums), \
++ .num_items = ARRAY_SIZE(_enums), \
++ }
++
++#define DEFINE_COUNTER_ENUM(_name, _strs) \
++ struct counter_available _name = { \
++ .strs = (_strs), \
++ .num_items = ARRAY_SIZE(_strs), \
++ }
++
++#define COUNTER_COMP_DEVICE_ENUM(_name, _get, _set, _available) \
++{ \
++ .type = COUNTER_COMP_ENUM, \
++ .name = (_name), \
++ .device_u32_read = (_get), \
++ .device_u32_write = (_set), \
++ .priv = &(_available), \
++}
++#define COUNTER_COMP_COUNT_ENUM(_name, _get, _set, _available) \
++{ \
++ .type = COUNTER_COMP_ENUM, \
++ .name = (_name), \
++ .count_u32_read = (_get), \
++ .count_u32_write = (_set), \
++ .priv = &(_available), \
++}
++#define COUNTER_COMP_SIGNAL_ENUM(_name, _get, _set, _available) \
++{ \
++ .type = COUNTER_COMP_ENUM, \
++ .name = (_name), \
++ .signal_u32_read = (_get), \
++ .signal_u32_write = (_set), \
++ .priv = &(_available), \
++}
++
++#define COUNTER_COMP_CEILING(_read, _write) \
++ COUNTER_COMP_COUNT_U64("ceiling", _read, _write)
++
++#define COUNTER_COMP_COUNT_MODE(_read, _write, _available) \
++{ \
++ .type = COUNTER_COMP_COUNT_MODE, \
++ .name = "count_mode", \
++ .count_u32_read = (_read), \
++ .count_u32_write = (_write), \
++ .priv = &(_available), \
++}
++
++#define COUNTER_COMP_DIRECTION(_read) \
++{ \
++ .type = COUNTER_COMP_COUNT_DIRECTION, \
++ .name = "direction", \
++ .count_u32_read = (_read), \
++}
++
++#define COUNTER_COMP_ENABLE(_read, _write) \
++ COUNTER_COMP_COUNT_BOOL("enable", _read, _write)
++
++#define COUNTER_COMP_FLOOR(_read, _write) \
++ COUNTER_COMP_COUNT_U64("floor", _read, _write)
++
++#define COUNTER_COMP_PRESET(_read, _write) \
++ COUNTER_COMP_COUNT_U64("preset", _read, _write)
++
++#define COUNTER_COMP_PRESET_ENABLE(_read, _write) \
++ COUNTER_COMP_COUNT_BOOL("preset_enable", _read, _write)
+
+ #endif /* _COUNTER_H_ */
+diff --git a/include/linux/counter_enum.h b/include/linux/counter_enum.h
+deleted file mode 100644
+index 9f917298a88f8..0000000000000
+--- a/include/linux/counter_enum.h
++++ /dev/null
+@@ -1,45 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Counter interface enum functions
+- * Copyright (C) 2018 William Breathitt Gray
+- */
+-#ifndef _COUNTER_ENUM_H_
+-#define _COUNTER_ENUM_H_
+-
+-#include <linux/types.h>
+-
+-struct counter_device;
+-struct counter_signal;
+-struct counter_count;
+-
+-ssize_t counter_signal_enum_read(struct counter_device *counter,
+- struct counter_signal *signal, void *priv,
+- char *buf);
+-ssize_t counter_signal_enum_write(struct counter_device *counter,
+- struct counter_signal *signal, void *priv,
+- const char *buf, size_t len);
+-
+-ssize_t counter_signal_enum_available_read(struct counter_device *counter,
+- struct counter_signal *signal,
+- void *priv, char *buf);
+-
+-ssize_t counter_count_enum_read(struct counter_device *counter,
+- struct counter_count *count, void *priv,
+- char *buf);
+-ssize_t counter_count_enum_write(struct counter_device *counter,
+- struct counter_count *count, void *priv,
+- const char *buf, size_t len);
+-
+-ssize_t counter_count_enum_available_read(struct counter_device *counter,
+- struct counter_count *count,
+- void *priv, char *buf);
+-
+-ssize_t counter_device_enum_read(struct counter_device *counter, void *priv,
+- char *buf);
+-ssize_t counter_device_enum_write(struct counter_device *counter, void *priv,
+- const char *buf, size_t len);
+-
+-ssize_t counter_device_enum_available_read(struct counter_device *counter,
+- void *priv, char *buf);
+-
+-#endif /* _COUNTER_ENUM_H_ */
+diff --git a/include/linux/cpu.h b/include/linux/cpu.h
+index 9cf51e41e6972..caf3b95017bfe 100644
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -65,6 +65,15 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
+ extern ssize_t cpu_show_itlb_multihit(struct device *dev,
+ struct device_attribute *attr, char *buf);
+ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
++ struct device_attribute *attr,
++ char *buf);
++extern ssize_t cpu_show_retbleed(struct device *dev,
++ struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
++ struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_gds(struct device *dev,
++ struct device_attribute *attr, char *buf);
+
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
+@@ -182,6 +191,12 @@ void arch_cpu_idle_enter(void);
+ void arch_cpu_idle_exit(void);
+ void arch_cpu_idle_dead(void);
+
++#ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT
++void arch_cpu_finalize_init(void);
++#else
++static inline void arch_cpu_finalize_init(void) { }
++#endif
++
+ int cpu_report_state(int cpu);
+ int cpu_check_up_prepare(int cpu);
+ void cpu_set_state_online(int cpu);
+diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
+index ff88bb3e44fca..025391be1b199 100644
+--- a/include/linux/cpufreq.h
++++ b/include/linux/cpufreq.h
+@@ -643,6 +643,11 @@ struct gov_attr_set {
+ /* sysfs ops for cpufreq governors */
+ extern const struct sysfs_ops governor_sysfs_ops;
+
++static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
++{
++ return container_of(kobj, struct gov_attr_set, kobj);
++}
++
+ void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
+ void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
+ unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
+@@ -1041,7 +1046,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
+ if (cpu == pcpu)
+ continue;
+
+- ret = parse_perf_domain(pcpu, list_name, cell_name);
++ ret = parse_perf_domain(cpu, list_name, cell_name);
+ if (ret < 0)
+ continue;
+
+diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
+index 991911048857a..dbca858ffa6da 100644
+--- a/include/linux/cpuhotplug.h
++++ b/include/linux/cpuhotplug.h
+@@ -99,6 +99,7 @@ enum cpuhp_state {
+ CPUHP_LUSTRE_CFS_DEAD,
+ CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
+ CPUHP_PADATA_DEAD,
++ CPUHP_RANDOM_PREPARE,
+ CPUHP_WORKQUEUE_PREP,
+ CPUHP_POWER_NUMA_PREPARE,
+ CPUHP_HRTIMERS_PREPARE,
+@@ -158,7 +159,6 @@ enum cpuhp_state {
+ CPUHP_AP_PERF_X86_CSTATE_STARTING,
+ CPUHP_AP_PERF_XTENSA_STARTING,
+ CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
+- CPUHP_AP_ARM_SDEI_STARTING,
+ CPUHP_AP_ARM_VFP_STARTING,
+ CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
+ CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
+@@ -238,6 +238,7 @@ enum cpuhp_state {
+ CPUHP_AP_PERF_CSKY_ONLINE,
+ CPUHP_AP_WATCHDOG_ONLINE,
+ CPUHP_AP_WORKQUEUE_ONLINE,
++ CPUHP_AP_RANDOM_ONLINE,
+ CPUHP_AP_RCUTREE_ONLINE,
+ CPUHP_AP_BASE_CACHEINFO_ONLINE,
+ CPUHP_AP_ONLINE_DYN,
+diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
+index 1e7399fc69c0a..b3c230dea0713 100644
+--- a/include/linux/cpumask.h
++++ b/include/linux/cpumask.h
+@@ -1045,4 +1045,23 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
+ [0] = 1UL \
+ } }
+
++/*
++ * Provide a valid theoretical max size for cpumap and cpulist sysfs files
++ * to avoid breaking userspace which may allocate a buffer based on the size
++ * reported by e.g. fstat.
++ *
++ * for cpumap NR_CPUS * 9/32 - 1 should be an exact length.
++ *
++ * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up
++ * to 2 orders of magnitude larger than 8192. And then we divide by 2 to
++ * cover a worst-case of every other cpu being on one of two nodes for a
++ * very large NR_CPUS.
++ *
++ * Use PAGE_SIZE as a minimum for smaller configurations while avoiding
++ * unsigned comparison to -1.
++ */
++#define CPUMAP_FILE_MAX_BYTES (((NR_CPUS * 9)/32 > PAGE_SIZE) \
++ ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
++#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)
++
+ #endif /* __LINUX_CPUMASK_H */
+diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
+index d2b9c41c8edf5..82fb7e24d1cbe 100644
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -56,8 +56,10 @@ extern void cpuset_init_smp(void);
+ extern void cpuset_force_rebuild(void);
+ extern void cpuset_update_active_cpus(void);
+ extern void cpuset_wait_for_hotplug(void);
+-extern void cpuset_read_lock(void);
+-extern void cpuset_read_unlock(void);
++extern void inc_dl_tasks_cs(struct task_struct *task);
++extern void dec_dl_tasks_cs(struct task_struct *task);
++extern void cpuset_lock(void);
++extern void cpuset_unlock(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+ extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+@@ -179,8 +181,10 @@ static inline void cpuset_update_active_cpus(void)
+
+ static inline void cpuset_wait_for_hotplug(void) { }
+
+-static inline void cpuset_read_lock(void) { }
+-static inline void cpuset_read_unlock(void) { }
++static inline void inc_dl_tasks_cs(struct task_struct *task) { }
++static inline void dec_dl_tasks_cs(struct task_struct *task) { }
++static inline void cpuset_lock(void) { }
++static inline void cpuset_unlock(void) { }
+
+ static inline void cpuset_cpus_allowed(struct task_struct *p,
+ struct cpumask *mask)
+diff --git a/include/linux/crypto.h b/include/linux/crypto.h
+index 855869e1fd327..987eeb94bb70b 100644
+--- a/include/linux/crypto.h
++++ b/include/linux/crypto.h
+@@ -167,6 +167,7 @@ struct crypto_async_request;
+ struct crypto_tfm;
+ struct crypto_type;
+
++typedef struct crypto_async_request crypto_completion_data_t;
+ typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err);
+
+ /**
+@@ -586,6 +587,11 @@ struct crypto_wait {
+ /*
+ * Async ops completion helper functioons
+ */
++static inline void *crypto_get_completion_data(crypto_completion_data_t *req)
++{
++ return req->data;
++}
++
+ void crypto_req_done(struct crypto_async_request *req, int err);
+
+ static inline int crypto_wait_req(int err, struct crypto_wait *wait)
+diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
+index c869f1e73d755..ea2d919fd9c79 100644
+--- a/include/linux/debugfs.h
++++ b/include/linux/debugfs.h
+@@ -45,7 +45,7 @@ struct debugfs_u32_array {
+
+ extern struct dentry *arch_debugfs_dir;
+
+-#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
++#define DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \
+ static int __fops ## _open(struct inode *inode, struct file *file) \
+ { \
+ __simple_attr_check_format(__fmt, 0ull); \
+@@ -56,10 +56,16 @@ static const struct file_operations __fops = { \
+ .open = __fops ## _open, \
+ .release = simple_attr_release, \
+ .read = debugfs_attr_read, \
+- .write = debugfs_attr_write, \
++ .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \
+ .llseek = no_llseek, \
+ }
+
++#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
++ DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
++
++#define DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \
++ DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
++
+ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
+
+ #if defined(CONFIG_DEBUG_FS)
+@@ -91,6 +97,8 @@ struct dentry *debugfs_create_automount(const char *name,
+ void debugfs_remove(struct dentry *dentry);
+ #define debugfs_remove_recursive debugfs_remove
+
++void debugfs_lookup_and_remove(const char *name, struct dentry *parent);
++
+ const struct file_operations *debugfs_real_fops(const struct file *filp);
+
+ int debugfs_file_get(struct dentry *dentry);
+@@ -100,6 +108,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos);
+ ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos);
++ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos);
+
+ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
+ struct dentry *new_dir, const char *new_name);
+@@ -225,6 +235,10 @@ static inline void debugfs_remove(struct dentry *dentry)
+ static inline void debugfs_remove_recursive(struct dentry *dentry)
+ { }
+
++static inline void debugfs_lookup_and_remove(const char *name,
++ struct dentry *parent)
++{ }
++
+ const struct file_operations *debugfs_real_fops(const struct file *filp);
+
+ static inline int debugfs_file_get(struct dentry *dentry)
+@@ -248,6 +262,13 @@ static inline ssize_t debugfs_attr_write(struct file *file,
+ return -ENODEV;
+ }
+
++static inline ssize_t debugfs_attr_write_signed(struct file *file,
++ const char __user *buf,
++ size_t len, loff_t *ppos)
++{
++ return -ENODEV;
++}
++
+ static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
+ struct dentry *new_dir, char *new_name)
+ {
+diff --git a/include/linux/delay.h b/include/linux/delay.h
+index 1d0e2ce6b6d9f..e8607992c68a5 100644
+--- a/include/linux/delay.h
++++ b/include/linux/delay.h
+@@ -20,6 +20,7 @@
+ */
+
+ #include <linux/kernel.h>
++#include <linux/sched.h>
+
+ extern unsigned long loops_per_jiffy;
+
+@@ -58,7 +59,18 @@ void calibrate_delay(void);
+ void __attribute__((weak)) calibration_delay_done(void);
+ void msleep(unsigned int msecs);
+ unsigned long msleep_interruptible(unsigned int msecs);
+-void usleep_range(unsigned long min, unsigned long max);
++void usleep_range_state(unsigned long min, unsigned long max,
++ unsigned int state);
++
++static inline void usleep_range(unsigned long min, unsigned long max)
++{
++ usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
++}
++
++static inline void usleep_idle_range(unsigned long min, unsigned long max)
++{
++ usleep_range_state(min, max, TASK_IDLE);
++}
+
+ static inline void ssleep(unsigned int seconds)
+ {
+diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
+index 142474b4af963..d94b9ed9443e5 100644
+--- a/include/linux/devfreq.h
++++ b/include/linux/devfreq.h
+@@ -149,8 +149,8 @@ struct devfreq_stats {
+ * @work: delayed work for load monitoring.
+ * @previous_freq: previously configured frequency value.
+ * @last_status: devfreq user device info, performance statistics
+- * @data: Private data of the governor. The devfreq framework does not
+- * touch this.
++ * @data: devfreq driver pass to governors, governor should not change it.
++ * @governor_data: private data for governors, devfreq core doesn't touch it.
+ * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs)
+ * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs)
+ * @scaling_min_freq: Limit minimum frequency requested by OPP interface
+@@ -187,7 +187,8 @@ struct devfreq {
+ unsigned long previous_freq;
+ struct devfreq_dev_status last_status;
+
+- void *data; /* private data for governors */
++ void *data;
++ void *governor_data;
+
+ struct dev_pm_qos_request user_min_freq_req;
+ struct dev_pm_qos_request user_max_freq_req;
+diff --git a/include/linux/dim.h b/include/linux/dim.h
+index b698266d00356..f343bc9aa2ec9 100644
+--- a/include/linux/dim.h
++++ b/include/linux/dim.h
+@@ -21,7 +21,7 @@
+ * We consider 10% difference as significant.
+ */
+ #define IS_SIGNIFICANT_DIFF(val, ref) \
+- (((100UL * abs((val) - (ref))) / (ref)) > 10)
++ ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10))
+
+ /*
+ * Calculate the gap between two values.
+@@ -236,8 +236,9 @@ void dim_park_tired(struct dim *dim);
+ *
+ * Calculate the delta between two samples (in data rates).
+ * Takes into consideration counter wrap-around.
++ * Returned boolean indicates whether curr_stats are reliable.
+ */
+-void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
++bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+ struct dim_stats *curr_stats);
+
+ /**
+diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
+index 8b32b4bdd5908..3ad636a13b8e9 100644
+--- a/include/linux/dma-buf.h
++++ b/include/linux/dma-buf.h
+@@ -433,7 +433,7 @@ struct dma_buf {
+ wait_queue_head_t *poll;
+
+ __poll_t active;
+- } cb_excl, cb_shared;
++ } cb_in, cb_out;
+ #ifdef CONFIG_DMABUF_SYSFS_STATS
+ /**
+ * @sysfs_entry:
+diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
+index e5c2c9e71bf10..9000f3ffce8b3 100644
+--- a/include/linux/dmaengine.h
++++ b/include/linux/dmaengine.h
+@@ -944,10 +944,8 @@ struct dma_device {
+ void (*device_issue_pending)(struct dma_chan *chan);
+ void (*device_release)(struct dma_device *dev);
+ /* debugfs support */
+-#ifdef CONFIG_DEBUG_FS
+ void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev);
+ struct dentry *dbg_dev_root;
+-#endif
+ };
+
+ static inline int dmaengine_slave_config(struct dma_chan *chan,
+diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
+index 8ae999f587c48..289064b51fa9a 100644
+--- a/include/linux/dsa/ocelot.h
++++ b/include/linux/dsa/ocelot.h
+@@ -12,6 +12,7 @@
+ struct ocelot_skb_cb {
+ struct sk_buff *clone;
+ unsigned int ptp_class; /* valid only for clones */
++ u32 tstamp_lo;
+ u8 ptp_cmd;
+ u8 ts_id;
+ };
+diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
+index dce631e678dd6..8d9eec5f6d8bb 100644
+--- a/include/linux/dynamic_debug.h
++++ b/include/linux/dynamic_debug.h
+@@ -55,9 +55,6 @@ struct _ddebug {
+
+ #if defined(CONFIG_DYNAMIC_DEBUG_CORE)
+
+-/* exported for module authors to exercise >control */
+-int dynamic_debug_exec_queries(const char *query, const char *modname);
+-
+ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
+ const char *modname);
+ extern int ddebug_remove_module(const char *mod_name);
+@@ -201,7 +198,7 @@ static inline int ddebug_remove_module(const char *mod)
+ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val,
+ const char *modname)
+ {
+- if (strstr(param, "dyndbg")) {
++ if (!strcmp(param, "dyndbg")) {
+ /* avoid pr_warn(), which wants pr_fmt() fully defined */
+ printk(KERN_WARNING "dyndbg param is supported only in "
+ "CONFIG_DYNAMIC_DEBUG builds\n");
+@@ -221,12 +218,6 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val,
+ rowsize, groupsize, buf, len, ascii); \
+ } while (0)
+
+-static inline int dynamic_debug_exec_queries(const char *query, const char *modname)
+-{
+- pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n");
+- return 0;
+-}
+-
+ #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */
+
+ #endif
+diff --git a/include/linux/efi.h b/include/linux/efi.h
+index 6b5d36babfcc4..5598fc348c69a 100644
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -167,6 +167,8 @@ struct capsule_info {
+ size_t page_bytes_remain;
+ };
+
++int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
++ size_t hdr_bytes);
+ int __efi_capsule_setup_info(struct capsule_info *cap_info);
+
+ /*
+@@ -1112,8 +1114,6 @@ void efi_check_for_embedded_firmwares(void);
+ static inline void efi_check_for_embedded_firmwares(void) { }
+ #endif
+
+-efi_status_t efi_random_get_seed(void);
+-
+ /*
+ * Arch code can implement the following three template macros, avoiding
+ * reptition for the void/non-void return cases of {__,}efi_call_virt():
+@@ -1163,7 +1163,7 @@ efi_status_t efi_random_get_seed(void);
+ arch_efi_call_virt_teardown(); \
+ })
+
+-#define EFI_RANDOM_SEED_SIZE 64U
++#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE
+
+ struct linux_efi_random_seed {
+ u32 size;
+@@ -1282,4 +1282,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find(
+ }
+ #endif
+
++#ifdef CONFIG_SYSFB
++extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
++#else
++static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { }
++#endif
++
+ #endif /* _LINUX_EFI_H */
+diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
+index 0d7865a0731ce..07c878d6e323e 100644
+--- a/include/linux/entry-kvm.h
++++ b/include/linux/entry-kvm.h
+@@ -75,7 +75,7 @@ static inline void xfer_to_guest_mode_prepare(void)
+ */
+ static inline bool __xfer_to_guest_mode_work_pending(void)
+ {
+- unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
++ unsigned long ti_work = read_thread_flags();
+
+ return !!(ti_work & XFER_TO_GUEST_MODE_WORK);
+ }
+diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
+index c58d504514854..ca0e26a858bee 100644
+--- a/include/linux/etherdevice.h
++++ b/include/linux/etherdevice.h
+@@ -127,7 +127,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr)
+ #endif
+ }
+
+-static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2])
++static inline bool is_multicast_ether_addr_64bits(const u8 *addr)
+ {
+ #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ #ifdef __BIG_ENDIAN
+@@ -323,7 +323,7 @@ static inline void eth_hw_addr_inherit(struct net_device *dst,
+ struct net_device *src)
+ {
+ dst->addr_assign_type = src->addr_assign_type;
+- ether_addr_copy(dst->dev_addr, src->dev_addr);
++ eth_hw_addr_set(dst, src->dev_addr);
+ }
+
+ /**
+@@ -364,8 +364,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
+ * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits.
+ */
+
+-static inline bool ether_addr_equal_64bits(const u8 addr1[6+2],
+- const u8 addr2[6+2])
++static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2)
+ {
+ #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2);
+diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
+index 849524b55d89a..3fad741df53ef 100644
+--- a/include/linux/ethtool.h
++++ b/include/linux/ethtool.h
+@@ -94,7 +94,7 @@ struct ethtool_link_ext_state_info {
+ enum ethtool_link_ext_substate_link_logical_mismatch link_logical_mismatch;
+ enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity;
+ enum ethtool_link_ext_substate_cable_issue cable_issue;
+- u8 __link_ext_substate;
++ u32 __link_ext_substate;
+ };
+ };
+
+diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
+index 1e7bf78cb3829..aba348d58ff61 100644
+--- a/include/linux/ethtool_netlink.h
++++ b/include/linux/ethtool_netlink.h
+@@ -10,6 +10,9 @@
+ #define __ETHTOOL_LINK_MODE_MASK_NWORDS \
+ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
+
++#define ETHTOOL_PAUSE_STAT_CNT (__ETHTOOL_A_PAUSE_STAT_CNT - \
++ ETHTOOL_A_PAUSE_STAT_TX_FRAMES)
++
+ enum ethtool_multicast_groups {
+ ETHNL_MCGRP_MONITOR,
+ };
+diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
+index 305d5f19093b9..36a486505b081 100644
+--- a/include/linux/eventfd.h
++++ b/include/linux/eventfd.h
+@@ -40,13 +40,14 @@ struct file *eventfd_fget(int fd);
+ struct eventfd_ctx *eventfd_ctx_fdget(int fd);
+ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
+ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
+ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
+ __u64 *cnt);
+ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
+
+ static inline bool eventfd_signal_allowed(void)
+ {
+- return !current->in_eventfd_signal;
++ return !current->in_eventfd;
+ }
+
+ #else /* CONFIG_EVENTFD */
+@@ -61,7 +62,13 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
+ return ERR_PTR(-ENOSYS);
+ }
+
+-static inline int eventfd_signal(struct eventfd_ctx *ctx, int n)
++static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
++{
++ return -ENOSYS;
++}
++
++static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
++ unsigned mask)
+ {
+ return -ENOSYS;
+ }
+diff --git a/include/linux/export.h b/include/linux/export.h
+index 27d848712b90b..5910ccb66ca2d 100644
+--- a/include/linux/export.h
++++ b/include/linux/export.h
+@@ -2,6 +2,8 @@
+ #ifndef _LINUX_EXPORT_H
+ #define _LINUX_EXPORT_H
+
++#include <linux/stringify.h>
++
+ /*
+ * Export symbols from the kernel to modules. Forked from module.h
+ * to reduce the amount of pointless cruft we feed to gcc when only
+@@ -154,7 +156,6 @@ struct kernel_symbol {
+ #endif /* CONFIG_MODULES */
+
+ #ifdef DEFAULT_SYMBOL_NAMESPACE
+-#include <linux/stringify.h>
+ #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, __stringify(DEFAULT_SYMBOL_NAMESPACE))
+ #else
+ #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
+@@ -162,8 +163,8 @@ struct kernel_symbol {
+
+ #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "")
+ #define EXPORT_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_gpl")
+-#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns)
+-#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns)
++#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", __stringify(ns))
++#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", __stringify(ns))
+
+ #endif /* !__ASSEMBLY__ */
+
+diff --git a/include/linux/extcon.h b/include/linux/extcon.h
+index 0c19010da77fa..685401d94d398 100644
+--- a/include/linux/extcon.h
++++ b/include/linux/extcon.h
+@@ -296,7 +296,7 @@ static inline void devm_extcon_unregister_notifier_all(struct device *dev,
+
+ static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name)
+ {
+- return ERR_PTR(-ENODEV);
++ return NULL;
+ }
+
+ static inline struct extcon_dev *extcon_find_edev_by_node(struct device_node *node)
+diff --git a/include/linux/fb.h b/include/linux/fb.h
+index 5950f8f5dc74d..3d7306c9a7065 100644
+--- a/include/linux/fb.h
++++ b/include/linux/fb.h
+@@ -502,6 +502,7 @@ struct fb_info {
+ } *apertures;
+
+ bool skip_vt_switch; /* no VT switch on suspend/resume required */
++ bool forced_out; /* set when being removed by another driver */
+ };
+
+ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
+@@ -610,6 +611,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
+ const char *name);
+ extern int remove_conflicting_framebuffers(struct apertures_struct *a,
+ const char *name, bool primary);
++extern bool is_firmware_framebuffer(struct apertures_struct *a);
+ extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
+ extern int fb_show_logo(struct fb_info *fb_info, int rotate);
+ extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
+diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h
+index ff5596dd30f85..2382dec6d6ab8 100644
+--- a/include/linux/fbcon.h
++++ b/include/linux/fbcon.h
+@@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info);
+ void fbcon_get_requirement(struct fb_info *info,
+ struct fb_blit_caps *caps);
+ void fbcon_fb_blanked(struct fb_info *info, int blank);
++int fbcon_modechange_possible(struct fb_info *info,
++ struct fb_var_screeninfo *var);
+ void fbcon_update_vcs(struct fb_info *info, bool all);
+ void fbcon_remap_all(struct fb_info *info);
+ int fbcon_set_con2fb_map_ioctl(void __user *argp);
+@@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {}
+ static inline void fbcon_get_requirement(struct fb_info *info,
+ struct fb_blit_caps *caps) {}
+ static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {}
++static inline int fbcon_modechange_possible(struct fb_info *info,
++ struct fb_var_screeninfo *var) { return 0; }
+ static inline void fbcon_update_vcs(struct fb_info *info, bool all) {}
+ static inline void fbcon_remap_all(struct fb_info *info) {}
+ static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; }
+diff --git a/include/linux/filter.h b/include/linux/filter.h
+index ef03ff34234d8..a9956b681f090 100644
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -554,9 +554,9 @@ struct bpf_binary_header {
+ };
+
+ struct bpf_prog_stats {
+- u64 cnt;
+- u64 nsecs;
+- u64 misses;
++ u64_stats_t cnt;
++ u64_stats_t nsecs;
++ u64_stats_t misses;
+ struct u64_stats_sync syncp;
+ } __aligned(2 * sizeof(u64));
+
+@@ -613,13 +613,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
+ if (static_branch_unlikely(&bpf_stats_enabled_key)) {
+ struct bpf_prog_stats *stats;
+ u64 start = sched_clock();
++ unsigned long flags;
+
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ stats = this_cpu_ptr(prog->stats);
+- u64_stats_update_begin(&stats->syncp);
+- stats->cnt++;
+- stats->nsecs += sched_clock() - start;
+- u64_stats_update_end(&stats->syncp);
++ flags = u64_stats_update_begin_irqsave(&stats->syncp);
++ u64_stats_inc(&stats->cnt);
++ u64_stats_add(&stats->nsecs, sched_clock() - start);
++ u64_stats_update_end_irqrestore(&stats->syncp, flags);
+ } else {
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ }
+@@ -638,9 +639,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void
+ * This uses migrate_disable/enable() explicitly to document that the
+ * invocation of a BPF program does not require reentrancy protection
+ * against a BPF program which is invoked from a preempting task.
+- *
+- * For non RT enabled kernels migrate_disable/enable() maps to
+- * preempt_disable/enable(), i.e. it disables also preemption.
+ */
+ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
+ const void *ctx)
+diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
+index c1be37437e778..0c70febd03e95 100644
+--- a/include/linux/fortify-string.h
++++ b/include/linux/fortify-string.h
+@@ -280,7 +280,10 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __underlying_strcpy(p, q);
+ size = strlen(q) + 1;
+- /* test here to use the more stringent object size */
++ /* Compile-time check for const size overflow. */
++ if (__builtin_constant_p(size) && p_size < size)
++ __write_overflow();
++ /* Run-time check for dynamic size overflow. */
+ if (p_size < size)
+ fortify_panic(__func__);
+ memcpy(p, q, size);
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index e7a633353fd20..6bba7a58c95c6 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -41,6 +41,7 @@
+ #include <linux/stddef.h>
+ #include <linux/mount.h>
+ #include <linux/cred.h>
++#include <linux/mnt_idmapping.h>
+
+ #include <asm/byteorder.h>
+ #include <uapi/linux/fs.h>
+@@ -1194,6 +1195,7 @@ extern int locks_delete_block(struct file_lock *);
+ extern int vfs_test_lock(struct file *, struct file_lock *);
+ extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
+ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
++bool vfs_inode_has_locks(struct inode *inode);
+ extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
+ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
+@@ -1306,6 +1308,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+ return 0;
+ }
+
++static inline bool vfs_inode_has_locks(struct inode *inode)
++{
++ return false;
++}
++
+ static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+ {
+ return -ENOLCK;
+@@ -1391,29 +1398,29 @@ extern int send_sigurg(struct fown_struct *fown);
+ * sb->s_flags. Note that these mirror the equivalent MS_* flags where
+ * represented in both.
+ */
+-#define SB_RDONLY 1 /* Mount read-only */
+-#define SB_NOSUID 2 /* Ignore suid and sgid bits */
+-#define SB_NODEV 4 /* Disallow access to device special files */
+-#define SB_NOEXEC 8 /* Disallow program execution */
+-#define SB_SYNCHRONOUS 16 /* Writes are synced at once */
+-#define SB_MANDLOCK 64 /* Allow mandatory locks on an FS */
+-#define SB_DIRSYNC 128 /* Directory modifications are synchronous */
+-#define SB_NOATIME 1024 /* Do not update access times. */
+-#define SB_NODIRATIME 2048 /* Do not update directory access times */
+-#define SB_SILENT 32768
+-#define SB_POSIXACL (1<<16) /* VFS does not apply the umask */
+-#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */
+-#define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */
+-#define SB_I_VERSION (1<<23) /* Update inode I_version field */
+-#define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
++#define SB_RDONLY BIT(0) /* Mount read-only */
++#define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */
++#define SB_NODEV BIT(2) /* Disallow access to device special files */
++#define SB_NOEXEC BIT(3) /* Disallow program execution */
++#define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */
++#define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */
++#define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */
++#define SB_NOATIME BIT(10) /* Do not update access times. */
++#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
++#define SB_SILENT BIT(15)
++#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */
++#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
++#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
++#define SB_I_VERSION BIT(23) /* Update inode I_version field */
++#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
+
+ /* These sb flags are internal to the kernel */
+-#define SB_SUBMOUNT (1<<26)
+-#define SB_FORCE (1<<27)
+-#define SB_NOSEC (1<<28)
+-#define SB_BORN (1<<29)
+-#define SB_ACTIVE (1<<30)
+-#define SB_NOUSER (1<<31)
++#define SB_SUBMOUNT BIT(26)
++#define SB_FORCE BIT(27)
++#define SB_NOSEC BIT(28)
++#define SB_BORN BIT(29)
++#define SB_ACTIVE BIT(30)
++#define SB_NOUSER BIT(31)
+
+ /* These flags relate to encoding and casefolding */
+ #define SB_ENC_STRICT_MODE_FL (1 << 0)
+@@ -1486,7 +1493,7 @@ struct super_block {
+ const struct xattr_handler **s_xattr;
+ #ifdef CONFIG_FS_ENCRYPTION
+ const struct fscrypt_operations *s_cop;
+- struct key *s_master_keys; /* master crypto keys in use */
++ struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */
+ #endif
+ #ifdef CONFIG_FS_VERITY
+ const struct fsverity_operations *s_vop;
+@@ -1601,6 +1608,11 @@ struct super_block {
+ struct list_head s_inodes_wb; /* writeback inodes */
+ } __randomize_layout;
+
++static inline struct user_namespace *i_user_ns(const struct inode *inode)
++{
++ return inode->i_sb->s_user_ns;
++}
++
+ /* Helper functions so that in most cases filesystems will
+ * not need to deal directly with kuid_t and kgid_t and can
+ * instead deal with the raw numeric values that are stored
+@@ -1608,50 +1620,22 @@ struct super_block {
+ */
+ static inline uid_t i_uid_read(const struct inode *inode)
+ {
+- return from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
++ return from_kuid(i_user_ns(inode), inode->i_uid);
+ }
+
+ static inline gid_t i_gid_read(const struct inode *inode)
+ {
+- return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
++ return from_kgid(i_user_ns(inode), inode->i_gid);
+ }
+
+ static inline void i_uid_write(struct inode *inode, uid_t uid)
+ {
+- inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
++ inode->i_uid = make_kuid(i_user_ns(inode), uid);
+ }
+
+ static inline void i_gid_write(struct inode *inode, gid_t gid)
+ {
+- inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
+-}
+-
+-/**
+- * kuid_into_mnt - map a kuid down into a mnt_userns
+- * @mnt_userns: user namespace of the relevant mount
+- * @kuid: kuid to be mapped
+- *
+- * Return: @kuid mapped according to @mnt_userns.
+- * If @kuid has no mapping INVALID_UID is returned.
+- */
+-static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
+- kuid_t kuid)
+-{
+- return make_kuid(mnt_userns, __kuid_val(kuid));
+-}
+-
+-/**
+- * kgid_into_mnt - map a kgid down into a mnt_userns
+- * @mnt_userns: user namespace of the relevant mount
+- * @kgid: kgid to be mapped
+- *
+- * Return: @kgid mapped according to @mnt_userns.
+- * If @kgid has no mapping INVALID_GID is returned.
+- */
+-static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
+- kgid_t kgid)
+-{
+- return make_kgid(mnt_userns, __kgid_val(kgid));
++ inode->i_gid = make_kgid(i_user_ns(inode), gid);
+ }
+
+ /**
+@@ -1665,7 +1649,7 @@ static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
+ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode)
+ {
+- return kuid_into_mnt(mnt_userns, inode->i_uid);
++ return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
+ }
+
+ /**
+@@ -1679,69 +1663,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
+ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode)
+ {
+- return kgid_into_mnt(mnt_userns, inode->i_gid);
+-}
+-
+-/**
+- * kuid_from_mnt - map a kuid up into a mnt_userns
+- * @mnt_userns: user namespace of the relevant mount
+- * @kuid: kuid to be mapped
+- *
+- * Return: @kuid mapped up according to @mnt_userns.
+- * If @kuid has no mapping INVALID_UID is returned.
+- */
+-static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
+- kuid_t kuid)
+-{
+- return KUIDT_INIT(from_kuid(mnt_userns, kuid));
+-}
+-
+-/**
+- * kgid_from_mnt - map a kgid up into a mnt_userns
+- * @mnt_userns: user namespace of the relevant mount
+- * @kgid: kgid to be mapped
+- *
+- * Return: @kgid mapped up according to @mnt_userns.
+- * If @kgid has no mapping INVALID_GID is returned.
+- */
+-static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
+- kgid_t kgid)
+-{
+- return KGIDT_INIT(from_kgid(mnt_userns, kgid));
+-}
+-
+-/**
+- * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
+- * @mnt_userns: user namespace of the relevant mount
+- *
+- * Use this helper to initialize a new vfs or filesystem object based on
+- * the caller's fsuid. A common example is initializing the i_uid field of
+- * a newly allocated inode triggered by a creation event such as mkdir or
+- * O_CREAT. Other examples include the allocation of quotas for a specific
+- * user.
+- *
+- * Return: the caller's current fsuid mapped up according to @mnt_userns.
+- */
+-static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
+-{
+- return kuid_from_mnt(mnt_userns, current_fsuid());
+-}
+-
+-/**
+- * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
+- * @mnt_userns: user namespace of the relevant mount
+- *
+- * Use this helper to initialize a new vfs or filesystem object based on
+- * the caller's fsgid. A common example is initializing the i_gid field of
+- * a newly allocated inode triggered by a creation event such as mkdir or
+- * O_CREAT. Other examples include the allocation of quotas for a specific
+- * user.
+- *
+- * Return: the caller's current fsgid mapped up according to @mnt_userns.
+- */
+-static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
+-{
+- return kgid_from_mnt(mnt_userns, current_fsgid());
++ return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
+ }
+
+ /**
+@@ -1755,7 +1677,7 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
+ static inline void inode_fsuid_set(struct inode *inode,
+ struct user_namespace *mnt_userns)
+ {
+- inode->i_uid = mapped_fsuid(mnt_userns);
++ inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
+ }
+
+ /**
+@@ -1769,7 +1691,7 @@ static inline void inode_fsuid_set(struct inode *inode,
+ static inline void inode_fsgid_set(struct inode *inode,
+ struct user_namespace *mnt_userns)
+ {
+- inode->i_gid = mapped_fsgid(mnt_userns);
++ inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
+ }
+
+ /**
+@@ -1786,10 +1708,18 @@ static inline void inode_fsgid_set(struct inode *inode,
+ static inline bool fsuidgid_has_mapping(struct super_block *sb,
+ struct user_namespace *mnt_userns)
+ {
+- struct user_namespace *s_user_ns = sb->s_user_ns;
++ struct user_namespace *fs_userns = sb->s_user_ns;
++ kuid_t kuid;
++ kgid_t kgid;
+
+- return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) &&
+- kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns));
++ kuid = mapped_fsuid(mnt_userns, fs_userns);
++ if (!uid_valid(kuid))
++ return false;
++ kgid = mapped_fsgid(mnt_userns, fs_userns);
++ if (!gid_valid(kgid))
++ return false;
++ return kuid_has_mapping(fs_userns, kuid) &&
++ kgid_has_mapping(fs_userns, kgid);
+ }
+
+ extern struct timespec64 current_time(struct inode *inode);
+@@ -2011,6 +1941,8 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
+ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
+ const struct inode *dir, umode_t mode);
+ extern bool may_open_dev(const struct path *path);
++umode_t mode_strip_sgid(struct user_namespace *mnt_userns,
++ const struct inode *dir, umode_t mode);
+
+ /*
+ * This is the "filldir" function type, used by readdir() to let
+@@ -2066,6 +1998,14 @@ struct dir_context {
+ */
+ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
+
++/*
++ * These flags control the behavior of vfs_copy_file_range().
++ * They are not available to the user via syscall.
++ *
++ * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
++ */
++#define COPY_FILE_SPLICE (1 << 0)
++
+ struct iov_iter;
+
+ struct file_operations {
+@@ -2364,13 +2304,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+ * don't have to write inode on fdatasync() when only
+ * e.g. the timestamps have changed.
+ * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
+- * I_DIRTY_TIME The inode itself only has dirty timestamps, and the
++ * I_DIRTY_TIME The inode itself has dirty timestamps, and the
+ * lazytime mount option is enabled. We keep track of this
+ * separately from I_DIRTY_SYNC in order to implement
+ * lazytime. This gets cleared if I_DIRTY_INODE
+- * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
+- * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
+- * i_state, but not both. I_DIRTY_PAGES may still be set.
++ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
++ * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
++ * in place because writeback might already be in progress
++ * and we don't want to lose the time update
+ * I_NEW Serves as both a mutex and completion notification.
+ * New inodes set I_NEW. If two processes both create
+ * the same inode, one of them will release its inode and
+@@ -2498,6 +2439,8 @@ enum file_time_flags {
+
+ extern bool atime_needs_update(const struct path *, struct inode *);
+ extern void touch_atime(const struct path *);
++int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
++
+ static inline void file_accessed(struct file *file)
+ {
+ if (!(file->f_flags & O_NOATIME))
+@@ -2724,6 +2667,21 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file)
+ {
+ return mnt_user_ns(file->f_path.mnt);
+ }
++
++/**
++ * is_idmapped_mnt - check whether a mount is mapped
++ * @mnt: the mount to check
++ *
++ * If @mnt has an idmapping attached different from the
++ * filesystem's idmapping then @mnt is mapped.
++ *
++ * Return: true if mount is mapped, false if not.
++ */
++static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
++{
++ return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
++}
++
+ extern long vfs_truncate(const struct path *, loff_t);
+ int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
+ unsigned int time_attrs, struct file *filp);
+@@ -3175,8 +3133,10 @@ extern void __destroy_inode(struct inode *);
+ extern struct inode *new_inode_pseudo(struct super_block *sb);
+ extern struct inode *new_inode(struct super_block *sb);
+ extern void free_inode_nonrcu(struct inode *inode);
+-extern int should_remove_suid(struct dentry *);
++extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *);
+ extern int file_remove_privs(struct file *);
++int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
++ const struct inode *inode);
+
+ extern void __insert_inode_hash(struct inode *, unsigned long hashval);
+ static inline void insert_inode_hash(struct inode *inode)
+@@ -3557,7 +3517,7 @@ void simple_transaction_set(struct file *file, size_t n);
+ * All attributes contain a text representation of a numeric value
+ * that are accessed with the get() and set() functions.
+ */
+-#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
++#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \
+ static int __fops ## _open(struct inode *inode, struct file *file) \
+ { \
+ __simple_attr_check_format(__fmt, 0ull); \
+@@ -3568,10 +3528,16 @@ static const struct file_operations __fops = { \
+ .open = __fops ## _open, \
+ .release = simple_attr_release, \
+ .read = simple_attr_read, \
+- .write = simple_attr_write, \
++ .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \
+ .llseek = generic_file_llseek, \
+ }
+
++#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
++ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
++
++#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \
++ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
++
+ static inline __printf(1, 2)
+ void __simple_attr_check_format(const char *fmt, ...)
+ {
+@@ -3586,6 +3552,8 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos);
+ ssize_t simple_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos);
++ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
++ size_t len, loff_t *ppos);
+
+ struct ctl_table;
+ int proc_nr_files(struct ctl_table *table, int write,
+@@ -3605,7 +3573,7 @@ int __init list_bdev_fs_names(char *buf, size_t size);
+
+ static inline bool is_sxid(umode_t mode)
+ {
+- return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
++ return mode & (S_ISUID | S_ISGID);
+ }
+
+ static inline int check_sticky(struct user_namespace *mnt_userns,
+diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
+index 6b54982fc5f37..13fa6f3df8e46 100644
+--- a/include/linux/fs_context.h
++++ b/include/linux/fs_context.h
+@@ -142,6 +142,8 @@ extern void put_fs_context(struct fs_context *fc);
+ extern int vfs_parse_fs_param_source(struct fs_context *fc,
+ struct fs_parameter *param);
+ extern void fc_drop_locked(struct fs_context *fc);
++int reconfigure_single(struct super_block *s,
++ int flags, void *data);
+
+ /*
+ * sget() wrappers to be called from the ->get_tree() op.
+diff --git a/include/linux/fscache.h b/include/linux/fscache.h
+index a4dab59986137..3b2282c157f79 100644
+--- a/include/linux/fscache.h
++++ b/include/linux/fscache.h
+@@ -167,7 +167,7 @@ struct fscache_cookie {
+
+ static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie)
+ {
+- return test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
++ return fscache_cookie_valid(cookie) && test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
+ }
+
+ /*
+diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
+index e912ed9141d9d..3c7ea2cf85a58 100644
+--- a/include/linux/fscrypt.h
++++ b/include/linux/fscrypt.h
+@@ -294,7 +294,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
+ }
+
+ /* keyring.c */
+-void fscrypt_sb_free(struct super_block *sb);
++void fscrypt_destroy_keyring(struct super_block *sb);
+ int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
+ int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
+ int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg);
+@@ -482,7 +482,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy)
+ }
+
+ /* keyring.c */
+-static inline void fscrypt_sb_free(struct super_block *sb)
++static inline void fscrypt_destroy_keyring(struct super_block *sb)
+ {
+ }
+
+diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
+index 12d3a7d308ab9..a9477c14fad5c 100644
+--- a/include/linux/fsnotify.h
++++ b/include/linux/fsnotify.h
+@@ -212,6 +212,42 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode,
+ fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0);
+ }
+
++/*
++ * fsnotify_delete - @dentry was unlinked and unhashed
++ *
++ * Caller must make sure that dentry->d_name is stable.
++ *
++ * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode
++ * as this may be called after d_delete() and old_dentry may be negative.
++ */
++static inline void fsnotify_delete(struct inode *dir, struct inode *inode,
++ struct dentry *dentry)
++{
++ __u32 mask = FS_DELETE;
++
++ if (S_ISDIR(inode->i_mode))
++ mask |= FS_ISDIR;
++
++ fsnotify_name(dir, mask, inode, &dentry->d_name, 0);
++}
++
++/**
++ * d_delete_notify - delete a dentry and call fsnotify_delete()
++ * @dentry: The dentry to delete
++ *
++ * This helper is used to guaranty that the unlinked inode cannot be found
++ * by lookup of this name after fsnotify_delete() event has been delivered.
++ */
++static inline void d_delete_notify(struct inode *dir, struct dentry *dentry)
++{
++ struct inode *inode = d_inode(dentry);
++
++ ihold(inode);
++ d_delete(dentry);
++ fsnotify_delete(dir, inode, dentry);
++ iput(inode);
++}
++
+ /*
+ * fsnotify_unlink - 'name' was unlinked
+ *
+@@ -219,10 +255,10 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode,
+ */
+ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
+ {
+- /* Expected to be called before d_delete() */
+- WARN_ON_ONCE(d_is_negative(dentry));
++ if (WARN_ON_ONCE(d_is_negative(dentry)))
++ return;
+
+- fsnotify_dirent(dir, dentry, FS_DELETE);
++ fsnotify_delete(dir, d_inode(dentry), dentry);
+ }
+
+ /*
+@@ -242,10 +278,10 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
+ */
+ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry)
+ {
+- /* Expected to be called before d_delete() */
+- WARN_ON_ONCE(d_is_negative(dentry));
++ if (WARN_ON_ONCE(d_is_negative(dentry)))
++ return;
+
+- fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR);
++ fsnotify_delete(dir, d_inode(dentry), dentry);
+ }
+
+ /*
+diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
+index 832e65f067542..afc678d7fc869 100644
+--- a/include/linux/ftrace.h
++++ b/include/linux/ftrace.h
+@@ -850,7 +850,7 @@ static inline void __ftrace_enabled_restore(int enabled)
+ #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
+ #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
+
+-static inline unsigned long get_lock_parent_ip(void)
++static __always_inline unsigned long get_lock_parent_ip(void)
+ {
+ unsigned long addr = CALLER_ADDR0;
+
+diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
+index 9f4ad719bfe3f..2d68606fb725d 100644
+--- a/include/linux/fwnode.h
++++ b/include/linux/fwnode.h
+@@ -147,12 +147,12 @@ struct fwnode_operations {
+ int (*add_links)(struct fwnode_handle *fwnode);
+ };
+
+-#define fwnode_has_op(fwnode, op) \
+- ((fwnode) && (fwnode)->ops && (fwnode)->ops->op)
++#define fwnode_has_op(fwnode, op) \
++ (!IS_ERR_OR_NULL(fwnode) && (fwnode)->ops && (fwnode)->ops->op)
++
+ #define fwnode_call_int_op(fwnode, op, ...) \
+- (fwnode ? (fwnode_has_op(fwnode, op) ? \
+- (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
+- -EINVAL)
++ (fwnode_has_op(fwnode, op) ? \
++ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : (IS_ERR_OR_NULL(fwnode) ? -EINVAL : -ENXIO))
+
+ #define fwnode_call_bool_op(fwnode, op, ...) \
+ (fwnode_has_op(fwnode, op) ? \
+diff --git a/include/linux/genhd.h b/include/linux/genhd.h
+index 0f5315c2b5a34..690b7f7996d15 100644
+--- a/include/linux/genhd.h
++++ b/include/linux/genhd.h
+@@ -12,12 +12,10 @@
+
+ #include <linux/types.h>
+ #include <linux/kdev_t.h>
+-#include <linux/rcupdate.h>
+-#include <linux/slab.h>
+-#include <linux/percpu-refcount.h>
+ #include <linux/uuid.h>
+ #include <linux/blk_types.h>
+-#include <asm/local.h>
++#include <linux/device.h>
++#include <linux/xarray.h>
+
+ extern const struct device_type disk_type;
+ extern struct device_type part_type;
+@@ -26,14 +24,6 @@ extern struct class block_class;
+ #define DISK_MAX_PARTS 256
+ #define DISK_NAME_LEN 32
+
+-#include <linux/major.h>
+-#include <linux/device.h>
+-#include <linux/smp.h>
+-#include <linux/string.h>
+-#include <linux/fs.h>
+-#include <linux/workqueue.h>
+-#include <linux/xarray.h>
+-
+ #define PARTITION_META_INFO_VOLNAMELTH 64
+ /*
+ * Enough for the string representation of any kind of UUID plus NULL.
+@@ -70,24 +60,15 @@ struct partition_meta_info {
+ * (``BLOCK_EXT_MAJOR``).
+ * This affects the maximum number of partitions.
+ *
+- * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the
+- * partition table, the device's capacity has been extended to its
+- * native capacity; i.e. the device has hidden capacity used by one
+- * of the partitions (this is a flag used so that native capacity is
+- * only ever unlocked once).
+- *
+- * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is
+- * blocked whenever a writer holds an exclusive lock.
+- *
+- * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled.
+- * Used for loop devices in their default settings and some MMC
+- * devices.
++ * ``GENHD_FL_NO_PART`` (0x0200): partition support is disabled.
++ * The kernel will not scan for partitions from add_disk, and users
++ * can't add partitions manually.
+ *
+ * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
+ * doesn't produce events, doesn't appear in sysfs, and doesn't have
+ * an associated ``bdev``.
+ * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and
+- * ``GENHD_FL_NO_PART_SCAN``.
++ * ``GENHD_FL_NO_PART``.
+ * Used for multipath devices.
+ */
+ #define GENHD_FL_REMOVABLE 0x0001
+@@ -96,9 +77,7 @@ struct partition_meta_info {
+ #define GENHD_FL_CD 0x0008
+ #define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020
+ #define GENHD_FL_EXT_DEVT 0x0040
+-#define GENHD_FL_NATIVE_CAPACITY 0x0080
+-#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100
+-#define GENHD_FL_NO_PART_SCAN 0x0200
++#define GENHD_FL_NO_PART 0x0200
+ #define GENHD_FL_HIDDEN 0x0400
+
+ enum {
+@@ -111,6 +90,8 @@ enum {
+ DISK_EVENT_FLAG_POLL = 1 << 0,
+ /* Forward events to udev */
+ DISK_EVENT_FLAG_UEVENT = 1 << 1,
++ /* Block event polling when open for exclusive write */
++ DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2,
+ };
+
+ struct disk_events;
+@@ -150,6 +131,7 @@ struct gendisk {
+ #define GD_NEED_PART_SCAN 0
+ #define GD_READ_ONLY 1
+ #define GD_DEAD 2
++#define GD_NATIVE_CAPACITY 3
+
+ struct mutex open_mutex; /* open/close mutex */
+ unsigned open_partitions; /* number of open partitions */
+@@ -203,8 +185,7 @@ static inline int disk_max_parts(struct gendisk *disk)
+
+ static inline bool disk_part_scan_enabled(struct gendisk *disk)
+ {
+- return disk_max_parts(disk) > 1 &&
+- !(disk->flags & GENHD_FL_NO_PART_SCAN);
++ return disk_max_parts(disk) > 1 && !(disk->flags & GENHD_FL_NO_PART);
+ }
+
+ static inline dev_t disk_devt(struct gendisk *disk)
+diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
+index 12be1601fd845..bcc17f95b9066 100644
+--- a/include/linux/goldfish.h
++++ b/include/linux/goldfish.h
+@@ -8,14 +8,21 @@
+
+ /* Helpers for Goldfish virtual platform */
+
++#ifndef gf_ioread32
++#define gf_ioread32 ioread32
++#endif
++#ifndef gf_iowrite32
++#define gf_iowrite32 iowrite32
++#endif
++
+ static inline void gf_write_ptr(const void *ptr, void __iomem *portl,
+ void __iomem *porth)
+ {
+ const unsigned long addr = (unsigned long)ptr;
+
+- __raw_writel(lower_32_bits(addr), portl);
++ gf_iowrite32(lower_32_bits(addr), portl);
+ #ifdef CONFIG_64BIT
+- __raw_writel(upper_32_bits(addr), porth);
++ gf_iowrite32(upper_32_bits(addr), porth);
+ #endif
+ }
+
+@@ -23,9 +30,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr,
+ void __iomem *portl,
+ void __iomem *porth)
+ {
+- __raw_writel(lower_32_bits(addr), portl);
++ gf_iowrite32(lower_32_bits(addr), portl);
+ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+- __raw_writel(upper_32_bits(addr), porth);
++ gf_iowrite32(upper_32_bits(addr), porth);
+ #endif
+ }
+
+diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
+index 97a28ad3393b5..369902d52f9cd 100644
+--- a/include/linux/gpio/consumer.h
++++ b/include/linux/gpio/consumer.h
+@@ -8,27 +8,16 @@
+ #include <linux/err.h>
+
+ struct device;
+-
+-/**
+- * Opaque descriptor for a GPIO. These are obtained using gpiod_get() and are
+- * preferable to the old integer-based handles.
+- *
+- * Contrary to integers, a pointer to a gpio_desc is guaranteed to be valid
+- * until the GPIO is released.
+- */
+ struct gpio_desc;
+-
+-/**
+- * Opaque descriptor for a structure of GPIO array attributes. This structure
+- * is attached to struct gpiod_descs obtained from gpiod_get_array() and can be
+- * passed back to get/set array functions in order to activate fast processing
+- * path if applicable.
+- */
+ struct gpio_array;
+
+ /**
+- * Struct containing an array of descriptors that can be obtained using
+- * gpiod_get_array().
++ * struct gpio_descs - Struct containing an array of descriptors that can be
++ * obtained using gpiod_get_array()
++ *
++ * @info: Pointer to the opaque gpio_array structure
++ * @ndescs: Number of held descriptors
++ * @desc: Array of pointers to GPIO descriptors
+ */
+ struct gpio_descs {
+ struct gpio_array *info;
+@@ -43,8 +32,16 @@ struct gpio_descs {
+ #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4)
+
+ /**
+- * Optional flags that can be passed to one of gpiod_* to configure direction
+- * and output value. These values cannot be OR'd.
++ * enum gpiod_flags - Optional flags that can be passed to one of gpiod_* to
++ * configure direction and output value. These values
++ * cannot be OR'd.
++ *
++ * @GPIOD_ASIS: Don't change anything
++ * @GPIOD_IN: Set lines to input mode
++ * @GPIOD_OUT_LOW: Set lines to output and drive them low
++ * @GPIOD_OUT_HIGH: Set lines to output and drive them high
++ * @GPIOD_OUT_LOW_OPEN_DRAIN: Set lines to open-drain output and drive them low
++ * @GPIOD_OUT_HIGH_OPEN_DRAIN: Set lines to open-drain output and drive them high
+ */
+ enum gpiod_flags {
+ GPIOD_ASIS = 0,
+diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
+index a0f9901dcae60..65df2ce96f0b1 100644
+--- a/include/linux/gpio/driver.h
++++ b/include/linux/gpio/driver.h
+@@ -168,11 +168,18 @@ struct gpio_irq_chip {
+
+ /**
+ * @parent_handler_data:
++ * @parent_handler_data_array:
+ *
+ * Data associated, and passed to, the handler for the parent
+- * interrupt.
++ * interrupt. Can either be a single pointer if @per_parent_data
++ * is false, or an array of @num_parents pointers otherwise. If
++ * @per_parent_data is true, @parent_handler_data_array cannot be
++ * NULL.
+ */
+- void *parent_handler_data;
++ union {
++ void *parent_handler_data;
++ void **parent_handler_data_array;
++ };
+
+ /**
+ * @num_parents:
+@@ -203,6 +210,14 @@ struct gpio_irq_chip {
+ */
+ bool threaded;
+
++ /**
++ * @per_parent_data:
++ *
++ * True if parent_handler_data_array describes a @num_parents
++ * sized array to be used as parent data.
++ */
++ bool per_parent_data;
++
+ /**
+ * @init_hw: optional routine to initialize hardware before
+ * an IRQ chip will be added. This is quite useful when
+@@ -224,6 +239,15 @@ struct gpio_irq_chip {
+ unsigned long *valid_mask,
+ unsigned int ngpios);
+
++ /**
++ * @initialized:
++ *
++ * Flag to track GPIO chip irq member's initialization.
++ * This flag will make sure GPIO chip irq members are not used
++ * before they are initialized.
++ */
++ bool initialized;
++
+ /**
+ * @valid_mask:
+ *
+@@ -416,7 +440,7 @@ struct gpio_chip {
+ void __iomem *reg_dir_in;
+ bool bgpio_dir_unreadable;
+ int bgpio_bits;
+- spinlock_t bgpio_lock;
++ raw_spinlock_t bgpio_lock;
+ unsigned long bgpio_data;
+ unsigned long bgpio_dir;
+ #endif /* CONFIG_GPIO_GENERIC */
+@@ -472,6 +496,18 @@ struct gpio_chip {
+ */
+ int (*of_xlate)(struct gpio_chip *gc,
+ const struct of_phandle_args *gpiospec, u32 *flags);
++
++ /**
++ * @of_gpio_ranges_fallback:
++ *
++ * Optional hook for the case that no gpio-ranges property is defined
++ * within the device tree node "np" (usually DT before introduction
++ * of gpio-ranges). So this callback is helpful to provide the
++ * necessary backward compatibility for the pin ranges.
++ */
++ int (*of_gpio_ranges_fallback)(struct gpio_chip *gc,
++ struct device_node *np);
++
+ #endif /* CONFIG_OF_GPIO */
+ };
+
+diff --git a/include/linux/hid.h b/include/linux/hid.h
+index 9e067f937dbc2..c3478e396829e 100644
+--- a/include/linux/hid.h
++++ b/include/linux/hid.h
+@@ -349,6 +349,8 @@ struct hid_item {
+ /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */
+ #define HID_QUIRK_ALWAYS_POLL BIT(10)
+ #define HID_QUIRK_INPUT_PER_APP BIT(11)
++#define HID_QUIRK_X_INVERT BIT(12)
++#define HID_QUIRK_Y_INVERT BIT(13)
+ #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16)
+ #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17)
+ #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18)
+@@ -597,6 +599,7 @@ struct hid_device { /* device report descriptor */
+ unsigned long status; /* see STAT flags above */
+ unsigned claimed; /* Claimed by hidinput, hiddev? */
+ unsigned quirks; /* Various quirks the device can pull on us */
++ unsigned initial_quirks; /* Initial set of quirks supplied when creating device */
+ bool io_started; /* If IO has started */
+
+ struct list_head inputs; /* The list of inputs */
+@@ -802,6 +805,7 @@ struct hid_driver {
+ * @output_report: send output report to device
+ * @idle: send idle request to device
+ * @may_wakeup: return if device may act as a wakeup source during system-suspend
++ * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE)
+ */
+ struct hid_ll_driver {
+ int (*start)(struct hid_device *hdev);
+@@ -827,6 +831,8 @@ struct hid_ll_driver {
+
+ int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
+ bool (*may_wakeup)(struct hid_device *hdev);
++
++ unsigned int max_buffer_size;
+ };
+
+ extern struct hid_ll_driver i2c_hid_ll_driver;
+@@ -840,6 +846,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev,
+ return hdev->ll_driver == driver;
+ }
+
++static inline bool hid_is_usb(struct hid_device *hdev)
++{
++ return hid_is_using_ll_driver(hdev, &usb_hid_driver);
++}
++
+ #define PM_HINT_FULLON 1<<5
+ #define PM_HINT_NORMAL 1<<1
+
+diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
+index 4aa1031d3e4c3..de17904b7cb44 100644
+--- a/include/linux/highmem-internal.h
++++ b/include/linux/highmem-internal.h
+@@ -184,7 +184,7 @@ static inline void *kmap_local_pfn(unsigned long pfn)
+ static inline void __kunmap_local(void *addr)
+ {
+ #ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+- kunmap_flush_on_unmap(addr);
++ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
+ #endif
+ }
+
+@@ -211,7 +211,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn)
+ static inline void __kunmap_atomic(void *addr)
+ {
+ #ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+- kunmap_flush_on_unmap(addr);
++ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
+ #endif
+ pagefault_enable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+diff --git a/include/linux/highmem.h b/include/linux/highmem.h
+index b4c49f9cc379e..87763f48c6c3d 100644
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -247,6 +247,30 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
+
+ #endif
+
++#ifdef copy_mc_to_kernel
++static inline int copy_mc_user_highpage(struct page *to, struct page *from,
++ unsigned long vaddr, struct vm_area_struct *vma)
++{
++ unsigned long ret;
++ char *vfrom, *vto;
++
++ vfrom = kmap_local_page(from);
++ vto = kmap_local_page(to);
++ ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
++ kunmap_local(vto);
++ kunmap_local(vfrom);
++
++ return ret;
++}
++#else
++static inline int copy_mc_user_highpage(struct page *to, struct page *from,
++ unsigned long vaddr, struct vm_area_struct *vma)
++{
++ copy_user_highpage(to, from, vaddr, vma);
++ return 0;
++}
++#endif
++
+ #ifndef __HAVE_ARCH_COPY_HIGHPAGE
+
+ static inline void copy_highpage(struct page *to, struct page *from)
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index 1faebe1cd0ed5..4ede8df5818e1 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -7,6 +7,7 @@
+ #include <linux/fs.h>
+ #include <linux/hugetlb_inline.h>
+ #include <linux/cgroup.h>
++#include <linux/page_ref.h>
+ #include <linux/list.h>
+ #include <linux/kref.h>
+ #include <linux/pgtable.h>
+@@ -165,8 +166,9 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
+ vm_flags_t vm_flags);
+ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+ long freed);
+-bool isolate_huge_page(struct page *page, struct list_head *list);
++int isolate_hugetlb(struct page *page, struct list_head *list);
+ int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
++int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+ void putback_active_hugepage(struct page *page);
+ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
+ void free_huge_page(struct page *page);
+@@ -197,8 +199,8 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+ struct page *follow_huge_pd(struct vm_area_struct *vma,
+ unsigned long address, hugepd_t hpd,
+ int flags, int pdshift);
+-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+- pmd_t *pmd, int flags);
++struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address,
++ int flags);
+ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
+ pud_t *pud, int flags);
+ struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
+@@ -285,8 +287,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma,
+ return NULL;
+ }
+
+-static inline struct page *follow_huge_pmd(struct mm_struct *mm,
+- unsigned long address, pmd_t *pmd, int flags)
++static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma,
++ unsigned long address, int flags)
+ {
+ return NULL;
+ }
+@@ -352,9 +354,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
+ return NULL;
+ }
+
+-static inline bool isolate_huge_page(struct page *page, struct list_head *list)
++static inline int isolate_hugetlb(struct page *page, struct list_head *list)
+ {
+- return false;
++ return -EBUSY;
+ }
+
+ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+@@ -362,6 +364,11 @@ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+ return 0;
+ }
+
++static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
++{
++ return 0;
++}
++
+ static inline void putback_active_hugepage(struct page *page)
+ {
+ }
+@@ -677,7 +684,10 @@ static inline struct hstate *hstate_sizelog(int page_size_log)
+ if (!page_size_log)
+ return &default_hstate;
+
+- return size_to_hstate(1UL << page_size_log);
++ if (page_size_log < BITS_PER_LONG)
++ return size_to_hstate(1UL << page_size_log);
++
++ return NULL;
+ }
+
+ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+@@ -1093,6 +1103,18 @@ static inline __init void hugetlb_cma_check(void)
+ }
+ #endif
+
++#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
++static inline bool hugetlb_pmd_shared(pte_t *pte)
++{
++ return page_count(virt_to_page(pte)) > 1;
++}
++#else
++static inline bool hugetlb_pmd_shared(pte_t *pte)
++{
++ return false;
++}
++#endif
++
+ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
+
+ #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
+index 8e6dd908da216..aa1d4da03538b 100644
+--- a/include/linux/hw_random.h
++++ b/include/linux/hw_random.h
+@@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng);
+ /** Unregister a Hardware Random Number Generator driver. */
+ extern void hwrng_unregister(struct hwrng *rng);
+ extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
+-/** Feed random bits into the pool. */
+-extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy);
+
+ #endif /* LINUX_HWRANDOM_H_ */
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+index ddc8713ce57b7..8499fc9220e07 100644
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -1307,6 +1307,8 @@ struct hv_ring_buffer_debug_info {
+ int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
+ struct hv_ring_buffer_debug_info *debug_info);
+
++bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel);
++
+ /* Vmbus interface */
+ #define vmbus_driver_register(driver) \
+ __vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
+index 694264503119d..00ed7c17698d1 100644
+--- a/include/linux/ieee80211.h
++++ b/include/linux/ieee80211.h
+@@ -1023,6 +1023,8 @@ struct ieee80211_tpc_report_ie {
+ #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK GENMASK(2, 1)
+ #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT 1
+ #define IEEE80211_ADDBA_EXT_NO_FRAG BIT(0)
++#define IEEE80211_ADDBA_EXT_BUF_SIZE_MASK GENMASK(7, 5)
++#define IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT 10
+
+ struct ieee80211_addba_ext_ie {
+ u8 data;
+@@ -1697,10 +1699,12 @@ struct ieee80211_ht_operation {
+ * A-MPDU buffer sizes
+ * According to HT size varies from 8 to 64 frames
+ * HE adds the ability to have up to 256 frames.
++ * EHT adds the ability to have up to 1K frames.
+ */
+ #define IEEE80211_MIN_AMPDU_BUF 0x8
+ #define IEEE80211_MAX_AMPDU_BUF_HT 0x40
+-#define IEEE80211_MAX_AMPDU_BUF 0x100
++#define IEEE80211_MAX_AMPDU_BUF_HE 0x100
++#define IEEE80211_MAX_AMPDU_BUF_EHT 0x400
+
+
+ /* Spatial Multiplexing Power Save Modes (for capability) */
+diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
+index b712217f70304..10a1e81434cb9 100644
+--- a/include/linux/if_arp.h
++++ b/include/linux/if_arp.h
+@@ -52,6 +52,11 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+ case ARPHRD_RAWIP:
++ case ARPHRD_PIMREG:
++ /* PPP adds its l2 header automatically in ppp_start_xmit().
++ * This makes it look like an l3 device to __bpf_redirect() and tcf_mirred_init().
++ */
++ case ARPHRD_PPP:
+ return false;
+ default:
+ return true;
+diff --git a/include/linux/if_team.h b/include/linux/if_team.h
+index add607943c956..5dd1657947b75 100644
+--- a/include/linux/if_team.h
++++ b/include/linux/if_team.h
+@@ -208,6 +208,7 @@ struct team {
+ bool queue_override_enabled;
+ struct list_head *qom_lists; /* array of queue override mapping lists */
+ bool port_mtu_change_allowed;
++ bool notifier_ctx;
+ struct {
+ unsigned int count;
+ unsigned int interval; /* in ms */
+diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
+index 41a518336673b..4e7e72f3da5bd 100644
+--- a/include/linux/if_vlan.h
++++ b/include/linux/if_vlan.h
+@@ -626,6 +626,23 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
+ return __vlan_get_protocol(skb, skb->protocol, NULL);
+ }
+
++/* This version of __vlan_get_protocol() also pulls mac header in skb->head */
++static inline __be16 vlan_get_protocol_and_depth(struct sk_buff *skb,
++ __be16 type, int *depth)
++{
++ int maclen;
++
++ type = __vlan_get_protocol(skb, type, &maclen);
++
++ if (type) {
++ if (!pskb_may_pull(skb, maclen))
++ type = 0;
++ else if (depth)
++ *depth = maclen;
++ }
++ return type;
++}
++
+ /* A getter for the SKB protocol field which will handle VLAN tags consistently
+ * whether VLAN acceleration is enabled or not.
+ */
+diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h
+index c582e1a142320..7b5dbd7499957 100644
+--- a/include/linux/iio/common/cros_ec_sensors_core.h
++++ b/include/linux/iio/common/cros_ec_sensors_core.h
+@@ -95,8 +95,11 @@ int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask,
+ struct platform_device;
+ int cros_ec_sensors_core_init(struct platform_device *pdev,
+ struct iio_dev *indio_dev, bool physical_device,
+- cros_ec_sensors_capture_t trigger_capture,
+- cros_ec_sensorhub_push_data_cb_t push_data);
++ cros_ec_sensors_capture_t trigger_capture);
++
++int cros_ec_sensors_core_register(struct device *dev,
++ struct iio_dev *indio_dev,
++ cros_ec_sensorhub_push_data_cb_t push_data);
+
+ irqreturn_t cros_ec_sensors_capture(int irq, void *p);
+ int cros_ec_sensors_push_data(struct iio_dev *indio_dev,
+diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
+index 8bdbaf3f3796b..69f4a1f6b536d 100644
+--- a/include/linux/iio/common/st_sensors.h
++++ b/include/linux/iio/common/st_sensors.h
+@@ -238,6 +238,7 @@ struct st_sensor_settings {
+ * @hw_irq_trigger: if we're using the hardware interrupt on the sensor.
+ * @hw_timestamp: Latest timestamp from the interrupt handler, when in use.
+ * @buffer_data: Data used by buffer part.
++ * @odr_lock: Local lock for preventing concurrent ODR accesses/changes
+ */
+ struct st_sensor_data {
+ struct device *dev;
+@@ -263,6 +264,8 @@ struct st_sensor_data {
+ s64 hw_timestamp;
+
+ char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned;
++
++ struct mutex odr_lock;
+ };
+
+ #ifdef CONFIG_IIO_BUFFER
+diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
+index cf49997d5903e..8210a9e682154 100644
+--- a/include/linux/iio/imu/adis.h
++++ b/include/linux/iio/imu/adis.h
+@@ -32,6 +32,7 @@ struct adis_timeout {
+ u16 sw_reset_ms;
+ u16 self_test_ms;
+ };
++
+ /**
+ * struct adis_data - ADIS chip variant specific data
+ * @read_delay: SPI delay for read operations in us
+@@ -45,10 +46,11 @@ struct adis_timeout {
+ * @self_test_mask: Bitmask of supported self-test operations
+ * @self_test_reg: Register address to request self test command
+ * @self_test_no_autoclear: True if device's self-test needs clear of ctrl reg
+- * @status_error_msgs: Array of error messgaes
++ * @status_error_msgs: Array of error messages
+ * @status_error_mask: Bitmask of errors supported by the device
+ * @timeouts: Chip specific delays
+ * @enable_irq: Hook for ADIS devices that have a special IRQ enable/disable
++ * @unmasked_drdy: True for devices that cannot mask/unmask the data ready pin
+ * @has_paging: True if ADIS device has paged registers
+ * @burst_reg_cmd: Register command that triggers burst
+ * @burst_len: Burst size in the SPI RX buffer. If @burst_max_len is defined,
+@@ -78,6 +80,7 @@ struct adis_data {
+ unsigned int status_error_mask;
+
+ int (*enable_irq)(struct adis *adis, bool enable);
++ bool unmasked_drdy;
+
+ bool has_paging;
+
+@@ -128,12 +131,12 @@ struct adis {
+ unsigned long irq_flag;
+ void *buffer;
+
+- uint8_t tx[10] ____cacheline_aligned;
+- uint8_t rx[4];
++ u8 tx[10] ____cacheline_aligned;
++ u8 rx[4];
+ };
+
+ int adis_init(struct adis *adis, struct iio_dev *indio_dev,
+- struct spi_device *spi, const struct adis_data *data);
++ struct spi_device *spi, const struct adis_data *data);
+ int __adis_reset(struct adis *adis);
+
+ /**
+@@ -154,9 +157,9 @@ static inline int adis_reset(struct adis *adis)
+ }
+
+ int __adis_write_reg(struct adis *adis, unsigned int reg,
+- unsigned int val, unsigned int size);
++ unsigned int val, unsigned int size);
+ int __adis_read_reg(struct adis *adis, unsigned int reg,
+- unsigned int *val, unsigned int size);
++ unsigned int *val, unsigned int size);
+
+ /**
+ * __adis_write_reg_8() - Write single byte to a register (unlocked)
+@@ -165,7 +168,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg,
+ * @value: The value to write
+ */
+ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg,
+- uint8_t val)
++ u8 val)
+ {
+ return __adis_write_reg(adis, reg, val, 1);
+ }
+@@ -177,7 +180,7 @@ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg,
+ * @value: Value to be written
+ */
+ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg,
+- uint16_t val)
++ u16 val)
+ {
+ return __adis_write_reg(adis, reg, val, 2);
+ }
+@@ -189,7 +192,7 @@ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg,
+ * @value: Value to be written
+ */
+ static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg,
+- uint32_t val)
++ u32 val)
+ {
+ return __adis_write_reg(adis, reg, val, 4);
+ }
+@@ -201,7 +204,7 @@ static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg,
+ * @val: The value read back from the device
+ */
+ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg,
+- uint16_t *val)
++ u16 *val)
+ {
+ unsigned int tmp;
+ int ret;
+@@ -220,7 +223,7 @@ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg,
+ * @val: The value read back from the device
+ */
+ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg,
+- uint32_t *val)
++ u32 *val)
+ {
+ unsigned int tmp;
+ int ret;
+@@ -240,7 +243,7 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg,
+ * @size: The size of the @value (in bytes)
+ */
+ static inline int adis_write_reg(struct adis *adis, unsigned int reg,
+- unsigned int val, unsigned int size)
++ unsigned int val, unsigned int size)
+ {
+ int ret;
+
+@@ -259,7 +262,7 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg,
+ * @size: The size of the @val buffer
+ */
+ static int adis_read_reg(struct adis *adis, unsigned int reg,
+- unsigned int *val, unsigned int size)
++ unsigned int *val, unsigned int size)
+ {
+ int ret;
+
+@@ -277,7 +280,7 @@ static int adis_read_reg(struct adis *adis, unsigned int reg,
+ * @value: The value to write
+ */
+ static inline int adis_write_reg_8(struct adis *adis, unsigned int reg,
+- uint8_t val)
++ u8 val)
+ {
+ return adis_write_reg(adis, reg, val, 1);
+ }
+@@ -289,7 +292,7 @@ static inline int adis_write_reg_8(struct adis *adis, unsigned int reg,
+ * @value: Value to be written
+ */
+ static inline int adis_write_reg_16(struct adis *adis, unsigned int reg,
+- uint16_t val)
++ u16 val)
+ {
+ return adis_write_reg(adis, reg, val, 2);
+ }
+@@ -301,7 +304,7 @@ static inline int adis_write_reg_16(struct adis *adis, unsigned int reg,
+ * @value: Value to be written
+ */
+ static inline int adis_write_reg_32(struct adis *adis, unsigned int reg,
+- uint32_t val)
++ u32 val)
+ {
+ return adis_write_reg(adis, reg, val, 4);
+ }
+@@ -313,7 +316,7 @@ static inline int adis_write_reg_32(struct adis *adis, unsigned int reg,
+ * @val: The value read back from the device
+ */
+ static inline int adis_read_reg_16(struct adis *adis, unsigned int reg,
+- uint16_t *val)
++ u16 *val)
+ {
+ unsigned int tmp;
+ int ret;
+@@ -332,7 +335,7 @@ static inline int adis_read_reg_16(struct adis *adis, unsigned int reg,
+ * @val: The value read back from the device
+ */
+ static inline int adis_read_reg_32(struct adis *adis, unsigned int reg,
+- uint32_t *val)
++ u32 *val)
+ {
+ unsigned int tmp;
+ int ret;
+@@ -403,9 +406,20 @@ static inline int adis_update_bits_base(struct adis *adis, unsigned int reg,
+ __adis_update_bits_base(adis, reg, mask, val, 2)); \
+ })
+
+-int adis_enable_irq(struct adis *adis, bool enable);
+ int __adis_check_status(struct adis *adis);
+ int __adis_initial_startup(struct adis *adis);
++int __adis_enable_irq(struct adis *adis, bool enable);
++
++static inline int adis_enable_irq(struct adis *adis, bool enable)
++{
++ int ret;
++
++ mutex_lock(&adis->state_lock);
++ ret = __adis_enable_irq(adis, enable);
++ mutex_unlock(&adis->state_lock);
++
++ return ret;
++}
+
+ static inline int adis_check_status(struct adis *adis)
+ {
+@@ -441,8 +455,8 @@ static inline void adis_dev_unlock(struct adis *adis)
+ }
+
+ int adis_single_conversion(struct iio_dev *indio_dev,
+- const struct iio_chan_spec *chan, unsigned int error_mask,
+- int *val);
++ const struct iio_chan_spec *chan,
++ unsigned int error_mask, int *val);
+
+ #define ADIS_VOLTAGE_CHAN(addr, si, chan, name, info_all, bits) { \
+ .type = IIO_VOLTAGE, \
+@@ -491,7 +505,7 @@ int adis_single_conversion(struct iio_dev *indio_dev,
+ .modified = 1, \
+ .channel2 = IIO_MOD_ ## mod, \
+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
+- info_sep, \
++ (info_sep), \
+ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \
+ .info_mask_shared_by_all = info_all, \
+ .address = (addr), \
+@@ -525,7 +539,7 @@ devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev,
+ int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev);
+
+ int adis_update_scan_mode(struct iio_dev *indio_dev,
+- const unsigned long *scan_mask);
++ const unsigned long *scan_mask);
+
+ #else /* CONFIG_IIO_BUFFER */
+
+@@ -549,7 +563,8 @@ static inline int devm_adis_probe_trigger(struct adis *adis,
+ #ifdef CONFIG_DEBUG_FS
+
+ int adis_debugfs_reg_access(struct iio_dev *indio_dev,
+- unsigned int reg, unsigned int writeval, unsigned int *readval);
++ unsigned int reg, unsigned int writeval,
++ unsigned int *readval);
+
+ #else
+
+diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
+index 096f68dd2e0ca..4c69b144677b1 100644
+--- a/include/linux/iio/trigger.h
++++ b/include/linux/iio/trigger.h
+@@ -55,6 +55,7 @@ struct iio_trigger_ops {
+ * @attached_own_device:[INTERN] if we are using our own device as trigger,
+ * i.e. if we registered a poll function to the same
+ * device as the one providing the trigger.
++ * @reenable_work: [INTERN] work item used to ensure reenable can sleep.
+ **/
+ struct iio_trigger {
+ const struct iio_trigger_ops *ops;
+@@ -74,6 +75,7 @@ struct iio_trigger {
+ unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)];
+ struct mutex pool_lock;
+ bool attached_own_device;
++ struct work_struct reenable_work;
+ };
+
+
+diff --git a/include/linux/ima.h b/include/linux/ima.h
+index b6ab66a546aef..6e1bca75c73bb 100644
+--- a/include/linux/ima.h
++++ b/include/linux/ima.h
+@@ -21,7 +21,8 @@ extern int ima_file_check(struct file *file, int mask);
+ extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
+ struct inode *inode);
+ extern void ima_file_free(struct file *file);
+-extern int ima_file_mmap(struct file *file, unsigned long prot);
++extern int ima_file_mmap(struct file *file, unsigned long reqprot,
++ unsigned long prot, unsigned long flags);
+ extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot);
+ extern int ima_load_data(enum kernel_load_data_id id, bool contents);
+ extern int ima_post_load_data(char *buf, loff_t size,
+@@ -91,7 +92,8 @@ static inline void ima_file_free(struct file *file)
+ return;
+ }
+
+-static inline int ima_file_mmap(struct file *file, unsigned long prot)
++static inline int ima_file_mmap(struct file *file, unsigned long reqprot,
++ unsigned long prot, unsigned long flags)
+ {
+ return 0;
+ }
+diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h
+index fa2cd8c63dcc9..24359b4a96053 100644
+--- a/include/linux/instrumentation.h
++++ b/include/linux/instrumentation.h
+@@ -11,7 +11,7 @@
+ asm volatile(__stringify(c) ": nop\n\t" \
+ ".pushsection .discard.instr_begin\n\t" \
+ ".long " __stringify(c) "b - .\n\t" \
+- ".popsection\n\t"); \
++ ".popsection\n\t" : : "i" (c)); \
+ })
+ #define instrumentation_begin() __instrumentation_begin(__COUNTER__)
+
+@@ -50,7 +50,7 @@
+ asm volatile(__stringify(c) ": nop\n\t" \
+ ".pushsection .discard.instr_end\n\t" \
+ ".long " __stringify(c) "b - .\n\t" \
+- ".popsection\n\t"); \
++ ".popsection\n\t" : : "i" (c)); \
+ })
+ #define instrumentation_end() __instrumentation_end(__COUNTER__)
+ #else
+diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
+index 05a65eb155f76..0cf00786a164f 100644
+--- a/include/linux/intel-iommu.h
++++ b/include/linux/intel-iommu.h
+@@ -196,7 +196,6 @@
+ #define ecap_dis(e) (((e) >> 27) & 0x1)
+ #define ecap_nest(e) (((e) >> 26) & 0x1)
+ #define ecap_mts(e) (((e) >> 25) & 0x1)
+-#define ecap_ecs(e) (((e) >> 24) & 0x1)
+ #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
+ #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
+ #define ecap_coherent(e) ((e) & 0x1)
+@@ -264,7 +263,6 @@
+ #define DMA_GSTS_CFIS (((u32)1) << 23)
+
+ /* DMA_RTADDR_REG */
+-#define DMA_RTADDR_RTT (((u64)1) << 11)
+ #define DMA_RTADDR_SMT (((u64)1) << 10)
+
+ /* CCMD_REG */
+@@ -517,9 +515,6 @@ struct context_entry {
+ u64 hi;
+ };
+
+-/* si_domain contains mulitple devices */
+-#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
+-
+ /*
+ * When VT-d works in the scalable mode, it allows DMA translation to
+ * happen through either first level or second level page table. This
+@@ -594,6 +589,7 @@ struct intel_iommu {
+ #ifdef CONFIG_INTEL_IOMMU
+ unsigned long *domain_ids; /* bitmap of domains */
+ struct dmar_domain ***domains; /* ptr to domains */
++ unsigned long *copied_tables; /* bitmap of copied tables */
+ spinlock_t lock; /* protect context, domain ids */
+ struct root_entry *root_entry; /* virtual address */
+
+@@ -713,6 +709,11 @@ static inline int first_pte_in_page(struct dma_pte *pte)
+ return !((unsigned long)pte & ~VTD_PAGE_MASK);
+ }
+
++static inline bool context_present(struct context_entry *context)
++{
++ return (context->lo & 1);
++}
++
+ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+ extern int dmar_find_matched_atsr_unit(struct pci_dev *dev);
+
+@@ -806,7 +807,6 @@ static inline void intel_iommu_debugfs_init(void) {}
+ #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+
+ extern const struct attribute_group *intel_iommu_groups[];
+-bool context_present(struct context_entry *context);
+ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ u8 devfn, int alloc);
+
+diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
+index 86af6f0a00a2a..ca98aeadcc804 100644
+--- a/include/linux/io-pgtable.h
++++ b/include/linux/io-pgtable.h
+@@ -74,17 +74,22 @@ struct io_pgtable_cfg {
+ * to support up to 35 bits PA where the bit32, bit33 and bit34 are
+ * encoded in the bit9, bit4 and bit5 of the PTE respectively.
+ *
++ * IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT: (ARM v7s format) MediaTek IOMMUs
++ * extend the translation table base support up to 35 bits PA, the
++ * encoding format is same with IO_PGTABLE_QUIRK_ARM_MTK_EXT.
++ *
+ * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table
+ * for use in the upper half of a split address space.
+ *
+ * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
+ * attributes set in the TCR for a non-coherent page-table walker.
+ */
+- #define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
+- #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
+- #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3)
+- #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5)
+- #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
++ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
++ #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
++ #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3)
++ #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4)
++ #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5)
++ #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
+ unsigned long quirks;
+ unsigned long pgsize_bitmap;
+ unsigned int ias;
+diff --git a/include/linux/iomap.h b/include/linux/iomap.h
+index 24f8489583ca7..829f2325ecbab 100644
+--- a/include/linux/iomap.h
++++ b/include/linux/iomap.h
+@@ -330,12 +330,19 @@ struct iomap_dio_ops {
+ */
+ #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1)
+
++/*
++ * When a page fault occurs, return a partial synchronous result and allow
++ * the caller to retry the rest of the operation after dealing with the page
++ * fault.
++ */
++#define IOMAP_DIO_PARTIAL (1 << 2)
++
+ ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+- unsigned int dio_flags);
++ unsigned int dio_flags, size_t done_before);
+ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+ const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+- unsigned int dio_flags);
++ unsigned int dio_flags, size_t done_before);
+ ssize_t iomap_dio_complete(struct iomap_dio *dio);
+ int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
+
+diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
+index 2c8860e406bd8..0417360a6db9b 100644
+--- a/include/linux/iopoll.h
++++ b/include/linux/iopoll.h
+@@ -53,6 +53,7 @@
+ } \
+ if (__sleep_us) \
+ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
++ cpu_relax(); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+ })
+@@ -95,6 +96,7 @@
+ } \
+ if (__delay_us) \
+ udelay(__delay_us); \
++ cpu_relax(); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+ })
+diff --git a/include/linux/ioport.h b/include/linux/ioport.h
+index 8359c50f99884..ec5f71f7135b0 100644
+--- a/include/linux/ioport.h
++++ b/include/linux/ioport.h
+@@ -262,6 +262,8 @@ resource_union(struct resource *r1, struct resource *r2, struct resource *r)
+ #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED)
+ #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl)
+ #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0)
++#define request_mem_region_muxed(start, n, name) \
++ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_MUXED)
+ #define request_mem_region_exclusive(start,n,name) \
+ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE)
+ #define rename_region(region, newname) do { (region)->name = (newname); } while (0)
+diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
+index 3f53bc27a19bf..3d088a88f8320 100644
+--- a/include/linux/ioprio.h
++++ b/include/linux/ioprio.h
+@@ -11,7 +11,7 @@
+ /*
+ * Default IO priority.
+ */
+-#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM)
++#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0)
+
+ /*
+ * Check that a priority value has a valid class.
+diff --git a/include/linux/iova.h b/include/linux/iova.h
+index 71d8a2de66354..6b6cc104e300d 100644
+--- a/include/linux/iova.h
++++ b/include/linux/iova.h
+@@ -133,7 +133,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova)
+ return iova >> iova_shift(iovad);
+ }
+
+-#if IS_ENABLED(CONFIG_IOMMU_IOVA)
++#if IS_REACHABLE(CONFIG_IOMMU_IOVA)
+ int iova_cache_get(void);
+ void iova_cache_put(void);
+
+diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
+index 05e22770af517..b75395ec8d521 100644
+--- a/include/linux/ipc_namespace.h
++++ b/include/linux/ipc_namespace.h
+@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+ return ns;
+ }
+
++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
++{
++ if (ns) {
++ if (refcount_inc_not_zero(&ns->ns.count))
++ return ns;
++ }
++
++ return NULL;
++}
++
+ extern void put_ipc_ns(struct ipc_namespace *ns);
+ #else
+ static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+@@ -147,6 +157,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+ return ns;
+ }
+
++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
++{
++ return ns;
++}
++
+ static inline void put_ipc_ns(struct ipc_namespace *ns)
+ {
+ }
+diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
+index ef4a69865737c..d1f3864307959 100644
+--- a/include/linux/ipv6.h
++++ b/include/linux/ipv6.h
+@@ -51,7 +51,7 @@ struct ipv6_devconf {
+ __s32 use_optimistic;
+ #endif
+ #ifdef CONFIG_IPV6_MROUTE
+- __s32 mc_forwarding;
++ atomic_t mc_forwarding;
+ #endif
+ __s32 disable_ipv6;
+ __s32 drop_unicast_in_l2_multicast;
+@@ -132,6 +132,7 @@ struct inet6_skb_parm {
+ __u16 dsthao;
+ #endif
+ __u16 frag_max_size;
++ __u16 srhoff;
+
+ #define IP6SKB_XFRM_TRANSFORMED 1
+ #define IP6SKB_FORWARDED 2
+@@ -141,6 +142,7 @@ struct inet6_skb_parm {
+ #define IP6SKB_HOPBYHOP 32
+ #define IP6SKB_L3SLAVE 64
+ #define IP6SKB_JUMBOGRAM 128
++#define IP6SKB_SEG6 256
+ };
+
+ #if defined(CONFIG_NET_L3_MASTER_DEV)
+@@ -282,7 +284,6 @@ struct ipv6_pinfo {
+ __be32 rcv_flowinfo;
+
+ __u32 dst_cookie;
+- __u32 rx_dst_cookie;
+
+ struct ipv6_mc_socklist __rcu *ipv6_mc_list;
+ struct ipv6_ac_socklist *ipv6_ac_list;
+diff --git a/include/linux/irq.h b/include/linux/irq.h
+index c8293c817646c..f9e6449fbbbae 100644
+--- a/include/linux/irq.h
++++ b/include/linux/irq.h
+@@ -875,16 +875,22 @@ static inline int irq_data_get_node(struct irq_data *d)
+ return irq_common_data_get_node(d->common);
+ }
+
+-static inline struct cpumask *irq_get_affinity_mask(int irq)
++static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
+ {
+- struct irq_data *d = irq_get_irq_data(irq);
++ return d->common->affinity;
++}
+
+- return d ? d->common->affinity : NULL;
++static inline void irq_data_update_affinity(struct irq_data *d,
++ const struct cpumask *m)
++{
++ cpumask_copy(d->common->affinity, m);
+ }
+
+-static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
++static inline struct cpumask *irq_get_affinity_mask(int irq)
+ {
+- return d->common->affinity;
++ struct irq_data *d = irq_get_irq_data(irq);
++
++ return d ? irq_data_get_affinity_mask(d) : NULL;
+ }
+
+ #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+@@ -906,7 +912,7 @@ static inline void irq_data_update_effective_affinity(struct irq_data *d,
+ static inline
+ struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+ {
+- return d->common->affinity;
++ return irq_data_get_affinity_mask(d);
+ }
+ #endif
+
+diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
+index 600c10da321a7..747f40e0c3260 100644
+--- a/include/linux/irqflags.h
++++ b/include/linux/irqflags.h
+@@ -20,13 +20,13 @@
+ #ifdef CONFIG_PROVE_LOCKING
+ extern void lockdep_softirqs_on(unsigned long ip);
+ extern void lockdep_softirqs_off(unsigned long ip);
+- extern void lockdep_hardirqs_on_prepare(unsigned long ip);
++ extern void lockdep_hardirqs_on_prepare(void);
+ extern void lockdep_hardirqs_on(unsigned long ip);
+ extern void lockdep_hardirqs_off(unsigned long ip);
+ #else
+ static inline void lockdep_softirqs_on(unsigned long ip) { }
+ static inline void lockdep_softirqs_off(unsigned long ip) { }
+- static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { }
++ static inline void lockdep_hardirqs_on_prepare(void) { }
+ static inline void lockdep_hardirqs_on(unsigned long ip) { }
+ static inline void lockdep_hardirqs_off(unsigned long ip) { }
+ #endif
+diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
+index fd933c45281af..ade8a6d7acff9 100644
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -626,12 +626,6 @@ struct transaction_s
+ */
+ struct journal_head *t_checkpoint_list;
+
+- /*
+- * Doubly-linked circular list of all buffers submitted for IO while
+- * checkpointing. [j_list_lock]
+- */
+- struct journal_head *t_checkpoint_io_list;
+-
+ /*
+ * Doubly-linked circular list of metadata buffers being
+ * shadowed by log IO. The IO buffers on the iobuf list and
+@@ -1295,7 +1289,7 @@ struct journal_s
+ * Clean-up after fast commit or full commit. JBD2 calls this function
+ * after every commit operation.
+ */
+- void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
++ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
+
+ /**
+ * @j_fc_replay_callback:
+@@ -1447,6 +1441,7 @@ extern void jbd2_journal_commit_transaction(journal_t *);
+ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
+ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
+ int __jbd2_journal_remove_checkpoint(struct journal_head *);
++int jbd2_journal_try_remove_checkpoint(struct journal_head *jh);
+ void jbd2_journal_destroy_checkpoint(journal_t *journal);
+ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
+
+diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
+index 48b9b2a82767d..019e55c13248b 100644
+--- a/include/linux/jump_label.h
++++ b/include/linux/jump_label.h
+@@ -261,9 +261,9 @@ extern void static_key_disable_cpuslocked(struct static_key *key);
+ #include <linux/atomic.h>
+ #include <linux/bug.h>
+
+-static inline int static_key_count(struct static_key *key)
++static __always_inline int static_key_count(struct static_key *key)
+ {
+- return atomic_read(&key->enabled);
++ return arch_atomic_read(&key->enabled);
+ }
+
+ static __always_inline void jump_label_init(void)
+diff --git a/include/linux/kasan.h b/include/linux/kasan.h
+index dd874a1ee862a..f407e937241af 100644
+--- a/include/linux/kasan.h
++++ b/include/linux/kasan.h
+@@ -461,12 +461,12 @@ static inline void kasan_release_vmalloc(unsigned long start,
+ * allocations with real shadow memory. With KASAN vmalloc, the special
+ * case is unnecessary, as the work is handled in the generic case.
+ */
+-int kasan_module_alloc(void *addr, size_t size);
++int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask);
+ void kasan_free_shadow(const struct vm_struct *vm);
+
+ #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
+
+-static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
++static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
+ static inline void kasan_free_shadow(const struct vm_struct *vm) {}
+
+ #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
+diff --git a/include/linux/kernel.h b/include/linux/kernel.h
+index 2776423a587e4..f56cd8879a594 100644
+--- a/include/linux/kernel.h
++++ b/include/linux/kernel.h
+@@ -277,7 +277,7 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
+ return buf;
+ }
+
+-extern int hex_to_bin(char ch);
++extern int hex_to_bin(unsigned char ch);
+ extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
+ extern char *bin2hex(char *dst, const void *src, size_t count);
+
+diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
+index 44ae1a7eb9e39..f9460fbea0a81 100644
+--- a/include/linux/kernel_stat.h
++++ b/include/linux/kernel_stat.h
+@@ -72,7 +72,7 @@ extern unsigned int kstat_irqs_usr(unsigned int irq);
+ /*
+ * Number of interrupts per cpu, since bootup
+ */
+-static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
++static inline unsigned long kstat_cpu_irqs_sum(unsigned int cpu)
+ {
+ return kstat_cpu(cpu).irqs_sum;
+ }
+@@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64,
+ enum cpu_usage_stat);
+ extern void account_steal_time(u64);
+ extern void account_idle_time(u64);
++extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu);
+
+ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ static inline void account_process_tick(struct task_struct *tsk, int user)
+diff --git a/include/linux/kexec.h b/include/linux/kexec.h
+index 0c994ae37729e..88c289ce3039c 100644
+--- a/include/linux/kexec.h
++++ b/include/linux/kexec.h
+@@ -187,14 +187,6 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
+ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len);
+ void *arch_kexec_kernel_image_load(struct kimage *image);
+-int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+- Elf_Shdr *section,
+- const Elf_Shdr *relsec,
+- const Elf_Shdr *symtab);
+-int arch_kexec_apply_relocations(struct purgatory_info *pi,
+- Elf_Shdr *section,
+- const Elf_Shdr *relsec,
+- const Elf_Shdr *symtab);
+ int arch_kimage_file_post_load_cleanup(struct kimage *image);
+ #ifdef CONFIG_KEXEC_SIG
+ int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+@@ -223,6 +215,44 @@ extern int crash_exclude_mem_range(struct crash_mem *mem,
+ unsigned long long mend);
+ extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
+ void **addr, unsigned long *sz);
++
++#ifndef arch_kexec_apply_relocations_add
++/*
++ * arch_kexec_apply_relocations_add - apply relocations of type RELA
++ * @pi: Purgatory to be relocated.
++ * @section: Section relocations applying to.
++ * @relsec: Section containing RELAs.
++ * @symtab: Corresponding symtab.
++ *
++ * Return: 0 on success, negative errno on error.
++ */
++static inline int
++arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section,
++ const Elf_Shdr *relsec, const Elf_Shdr *symtab)
++{
++ pr_err("RELA relocation unsupported.\n");
++ return -ENOEXEC;
++}
++#endif
++
++#ifndef arch_kexec_apply_relocations
++/*
++ * arch_kexec_apply_relocations - apply relocations of type REL
++ * @pi: Purgatory to be relocated.
++ * @section: Section relocations applying to.
++ * @relsec: Section containing RELs.
++ * @symtab: Corresponding symtab.
++ *
++ * Return: 0 on success, negative errno on error.
++ */
++static inline int
++arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section,
++ const Elf_Shdr *relsec, const Elf_Shdr *symtab)
++{
++ pr_err("REL relocation unsupported.\n");
++ return -ENOEXEC;
++}
++#endif
+ #endif /* CONFIG_KEXEC_FILE */
+
+ #ifdef CONFIG_KEXEC_ELF
+@@ -360,8 +390,8 @@ extern note_buf_t __percpu *crash_notes;
+ extern bool kexec_in_progress;
+
+ int crash_shrink_memory(unsigned long new_size);
+-size_t crash_get_memory_size(void);
+ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
++ssize_t crash_get_memory_size(void);
+
+ void arch_kexec_protect_crashkres(void);
+ void arch_kexec_unprotect_crashkres(void);
+@@ -422,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; }
+ #define kexec_in_progress false
+ #endif /* CONFIG_KEXEC_CORE */
+
++#ifdef CONFIG_KEXEC_SIG
++void set_kexec_sig_enforced(void);
++#else
++static inline void set_kexec_sig_enforced(void) {}
++#endif
++
+ #endif /* !defined(__ASSEBMLY__) */
+
+ #endif /* LINUX_KEXEC_H */
+diff --git a/include/linux/kfence.h b/include/linux/kfence.h
+index 3fe6dd8a18c19..3c75209a545e1 100644
+--- a/include/linux/kfence.h
++++ b/include/linux/kfence.h
+@@ -14,6 +14,9 @@
+
+ #ifdef CONFIG_KFENCE
+
++#include <linux/atomic.h>
++#include <linux/static_key.h>
++
+ /*
+ * We allocate an even number of pages, as it simplifies calculations to map
+ * address to metadata indices; effectively, the very first page serves as an
+@@ -22,13 +25,8 @@
+ #define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE)
+ extern char *__kfence_pool;
+
+-#ifdef CONFIG_KFENCE_STATIC_KEYS
+-#include <linux/static_key.h>
+ DECLARE_STATIC_KEY_FALSE(kfence_allocation_key);
+-#else
+-#include <linux/atomic.h>
+ extern atomic_t kfence_allocation_gate;
+-#endif
+
+ /**
+ * is_kfence_address() - check if an address belongs to KFENCE pool
+@@ -116,13 +114,16 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
+ */
+ static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+ {
+-#ifdef CONFIG_KFENCE_STATIC_KEYS
+- if (static_branch_unlikely(&kfence_allocation_key))
++#if defined(CONFIG_KFENCE_STATIC_KEYS) || CONFIG_KFENCE_SAMPLE_INTERVAL == 0
++ if (!static_branch_unlikely(&kfence_allocation_key))
++ return NULL;
+ #else
+- if (unlikely(!atomic_read(&kfence_allocation_gate)))
++ if (!static_branch_likely(&kfence_allocation_key))
++ return NULL;
+ #endif
+- return __kfence_alloc(s, size, flags);
+- return NULL;
++ if (likely(atomic_read(&kfence_allocation_gate)))
++ return NULL;
++ return __kfence_alloc(s, size, flags);
+ }
+
+ /**
+@@ -201,6 +202,22 @@ static __always_inline __must_check bool kfence_free(void *addr)
+ */
+ bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs);
+
++#ifdef CONFIG_PRINTK
++struct kmem_obj_info;
++/**
++ * __kfence_obj_info() - fill kmem_obj_info struct
++ * @kpp: kmem_obj_info to be filled
++ * @object: the object
++ *
++ * Return:
++ * * false - not a KFENCE object
++ * * true - a KFENCE object, filled @kpp
++ *
++ * Copies information to @kpp for KFENCE objects.
++ */
++bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
++#endif
++
+ #else /* CONFIG_KFENCE */
+
+ static inline bool is_kfence_address(const void *addr) { return false; }
+@@ -218,6 +235,14 @@ static inline bool __must_check kfence_handle_page_fault(unsigned long addr, boo
+ return false;
+ }
+
++#ifdef CONFIG_PRINTK
++struct kmem_obj_info;
++static inline bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
++{
++ return false;
++}
++#endif
++
+ #endif
+
+ #endif /* _LINUX_KFENCE_H */
+diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
+index 86249476b57f4..0b35a41440ff1 100644
+--- a/include/linux/kfifo.h
++++ b/include/linux/kfifo.h
+@@ -688,7 +688,7 @@ __kfifo_uint_must_check_helper( \
+ * writer, you don't need extra locking to use these macro.
+ */
+ #define kfifo_to_user(fifo, to, len, copied) \
+-__kfifo_uint_must_check_helper( \
++__kfifo_int_must_check_helper( \
+ ({ \
+ typeof((fifo) + 1) __tmp = (fifo); \
+ void __user *__to = (to); \
+diff --git a/include/linux/kobject.h b/include/linux/kobject.h
+index ea30529fba08a..d38916e598a59 100644
+--- a/include/linux/kobject.h
++++ b/include/linux/kobject.h
+@@ -116,7 +116,7 @@ extern void kobject_put(struct kobject *kobj);
+ extern const void *kobject_namespace(struct kobject *kobj);
+ extern void kobject_get_ownership(struct kobject *kobj,
+ kuid_t *uid, kgid_t *gid);
+-extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
++extern char *kobject_get_path(const struct kobject *kobj, gfp_t flag);
+
+ /**
+ * kobject_has_children - Returns whether a kobject has children.
+diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
+index e4f3bfe087570..2cbb6a51c2912 100644
+--- a/include/linux/kprobes.h
++++ b/include/linux/kprobes.h
+@@ -154,6 +154,8 @@ struct kretprobe {
+ struct kretprobe_holder *rph;
+ };
+
++#define KRETPROBE_MAX_DATA_SIZE 4096
++
+ struct kretprobe_instance {
+ union {
+ struct freelist_node freelist;
+@@ -347,6 +349,8 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
+ size_t *length, loff_t *ppos);
+ #endif
+ extern void wait_for_kprobe_optimizer(void);
++bool optprobe_queued_unopt(struct optimized_kprobe *op);
++bool kprobe_disarmed(struct kprobe *p);
+ #else
+ static inline void wait_for_kprobe_optimizer(void) { }
+ #endif /* CONFIG_OPTPROBES */
+diff --git a/include/linux/kthread.h b/include/linux/kthread.h
+index 346b0f269161a..db47aae7c481b 100644
+--- a/include/linux/kthread.h
++++ b/include/linux/kthread.h
+@@ -56,6 +56,31 @@ bool kthread_is_per_cpu(struct task_struct *k);
+ __k; \
+ })
+
++/**
++ * kthread_run_on_cpu - create and wake a cpu bound thread.
++ * @threadfn: the function to run until signal_pending(current).
++ * @data: data ptr for @threadfn.
++ * @cpu: The cpu on which the thread should be bound,
++ * @namefmt: printf-style name for the thread. Format is restricted
++ * to "name.*%u". Code fills in cpu number.
++ *
++ * Description: Convenient wrapper for kthread_create_on_cpu()
++ * followed by wake_up_process(). Returns the kthread or
++ * ERR_PTR(-ENOMEM).
++ */
++static inline struct task_struct *
++kthread_run_on_cpu(int (*threadfn)(void *data), void *data,
++ unsigned int cpu, const char *namefmt)
++{
++ struct task_struct *p;
++
++ p = kthread_create_on_cpu(threadfn, data, cpu, namefmt);
++ if (!IS_ERR(p))
++ wake_up_process(p);
++
++ return p;
++}
++
+ void free_kthread_struct(struct task_struct *k);
+ void kthread_bind(struct task_struct *k, unsigned int cpu);
+ void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index 0f18df7fe8749..7e2423ffaf593 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -15,6 +15,8 @@
+ #include <linux/minmax.h>
+ #include <linux/mm.h>
+ #include <linux/mmu_notifier.h>
++#include <linux/ftrace.h>
++#include <linux/instrumentation.h>
+ #include <linux/preempt.h>
+ #include <linux/msi.h>
+ #include <linux/slab.h>
+@@ -363,8 +365,11 @@ struct kvm_vcpu {
+ int last_used_slot;
+ };
+
+-/* must be called with irqs disabled */
+-static __always_inline void guest_enter_irqoff(void)
++/*
++ * Start accounting time towards a guest.
++ * Must be called before entering guest context.
++ */
++static __always_inline void guest_timing_enter_irqoff(void)
+ {
+ /*
+ * This is running in ioctl context so its safe to assume that it's the
+@@ -373,7 +378,18 @@ static __always_inline void guest_enter_irqoff(void)
+ instrumentation_begin();
+ vtime_account_guest_enter();
+ instrumentation_end();
++}
+
++/*
++ * Enter guest context and enter an RCU extended quiescent state.
++ *
++ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
++ * unsafe to use any code which may directly or indirectly use RCU, tracing
++ * (including IRQ flag tracing), or lockdep. All code in this period must be
++ * non-instrumentable.
++ */
++static __always_inline void guest_context_enter_irqoff(void)
++{
+ /*
+ * KVM does not hold any references to rcu protected data when it
+ * switches CPU into a guest mode. In fact switching to a guest mode
+@@ -389,16 +405,79 @@ static __always_inline void guest_enter_irqoff(void)
+ }
+ }
+
+-static __always_inline void guest_exit_irqoff(void)
++/*
++ * Deprecated. Architectures should move to guest_timing_enter_irqoff() and
++ * guest_state_enter_irqoff().
++ */
++static __always_inline void guest_enter_irqoff(void)
++{
++ guest_timing_enter_irqoff();
++ guest_context_enter_irqoff();
++}
++
++/**
++ * guest_state_enter_irqoff - Fixup state when entering a guest
++ *
++ * Entry to a guest will enable interrupts, but the kernel state is interrupts
++ * disabled when this is invoked. Also tell RCU about it.
++ *
++ * 1) Trace interrupts on state
++ * 2) Invoke context tracking if enabled to adjust RCU state
++ * 3) Tell lockdep that interrupts are enabled
++ *
++ * Invoked from architecture specific code before entering a guest.
++ * Must be called with interrupts disabled and the caller must be
++ * non-instrumentable.
++ * The caller has to invoke guest_timing_enter_irqoff() before this.
++ *
++ * Note: this is analogous to exit_to_user_mode().
++ */
++static __always_inline void guest_state_enter_irqoff(void)
++{
++ instrumentation_begin();
++ trace_hardirqs_on_prepare();
++ lockdep_hardirqs_on_prepare();
++ instrumentation_end();
++
++ guest_context_enter_irqoff();
++ lockdep_hardirqs_on(CALLER_ADDR0);
++}
++
++/*
++ * Exit guest context and exit an RCU extended quiescent state.
++ *
++ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
++ * unsafe to use any code which may directly or indirectly use RCU, tracing
++ * (including IRQ flag tracing), or lockdep. All code in this period must be
++ * non-instrumentable.
++ */
++static __always_inline void guest_context_exit_irqoff(void)
+ {
+ context_tracking_guest_exit();
++}
+
++/*
++ * Stop accounting time towards a guest.
++ * Must be called after exiting guest context.
++ */
++static __always_inline void guest_timing_exit_irqoff(void)
++{
+ instrumentation_begin();
+ /* Flush the guest cputime we spent on the guest */
+ vtime_account_guest_exit();
+ instrumentation_end();
+ }
+
++/*
++ * Deprecated. Architectures should move to guest_state_exit_irqoff() and
++ * guest_timing_exit_irqoff().
++ */
++static __always_inline void guest_exit_irqoff(void)
++{
++ guest_context_exit_irqoff();
++ guest_timing_exit_irqoff();
++}
++
+ static inline void guest_exit(void)
+ {
+ unsigned long flags;
+@@ -408,6 +487,33 @@ static inline void guest_exit(void)
+ local_irq_restore(flags);
+ }
+
++/**
++ * guest_state_exit_irqoff - Establish state when returning from guest mode
++ *
++ * Entry from a guest disables interrupts, but guest mode is traced as
++ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
++ *
++ * 1) Tell lockdep that interrupts are disabled
++ * 2) Invoke context tracking if enabled to reactivate RCU
++ * 3) Trace interrupts off state
++ *
++ * Invoked from architecture specific code after exiting a guest.
++ * Must be invoked with interrupts disabled and the caller must be
++ * non-instrumentable.
++ * The caller has to invoke guest_timing_exit_irqoff() after this.
++ *
++ * Note: this is analogous to enter_from_user_mode().
++ */
++static __always_inline void guest_state_exit_irqoff(void)
++{
++ lockdep_hardirqs_off(CALLER_ADDR0);
++ guest_context_exit_irqoff();
++
++ instrumentation_begin();
++ trace_hardirqs_off_finish();
++ instrumentation_end();
++}
++
+ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
+ {
+ /*
+@@ -1018,6 +1124,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap);
+ long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg);
++long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
++ unsigned long arg);
+
+ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
+ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
+@@ -1127,7 +1235,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm)
+ {
+ }
+
+-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+ {
+ return false;
+ }
+@@ -1806,6 +1914,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
+
++void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
++
+ #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
+ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
+ #else
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index c0c64f03e1074..d890c43cff146 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -394,7 +394,7 @@ enum {
+ /* This should match the actual table size of
+ * ata_eh_cmd_timeout_table in libata-eh.c.
+ */
+- ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6,
++ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7,
+
+ /* Horkage types. May be set by libata or controller on drives
+ (some horkage may be drive/controller pair dependent */
+@@ -565,7 +565,10 @@ struct ata_taskfile {
+ u8 hob_lbam;
+ u8 hob_lbah;
+
+- u8 feature;
++ union {
++ u8 error;
++ u8 feature;
++ };
+ u8 nsect;
+ u8 lbal;
+ u8 lbam;
+@@ -573,7 +576,10 @@ struct ata_taskfile {
+
+ u8 device;
+
+- u8 command; /* IO operation */
++ union {
++ u8 status;
++ u8 command;
++ };
+
+ u32 auxiliary; /* auxiliary field */
+ /* from SATA 3.1 and */
+@@ -1471,51 +1477,61 @@ static inline int sata_srst_pmp(struct ata_link *link)
+ return link->pmp;
+ }
+
+-/*
+- * printk helpers
+- */
+-__printf(3, 4)
+-void ata_port_printk(const struct ata_port *ap, const char *level,
+- const char *fmt, ...);
+-__printf(3, 4)
+-void ata_link_printk(const struct ata_link *link, const char *level,
+- const char *fmt, ...);
+-__printf(3, 4)
+-void ata_dev_printk(const struct ata_device *dev, const char *level,
+- const char *fmt, ...);
++#define ata_port_printk(level, ap, fmt, ...) \
++ pr_ ## level ("ata%u: " fmt, (ap)->print_id, ##__VA_ARGS__)
+
+ #define ata_port_err(ap, fmt, ...) \
+- ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__)
++ ata_port_printk(err, ap, fmt, ##__VA_ARGS__)
+ #define ata_port_warn(ap, fmt, ...) \
+- ata_port_printk(ap, KERN_WARNING, fmt, ##__VA_ARGS__)
++ ata_port_printk(warn, ap, fmt, ##__VA_ARGS__)
+ #define ata_port_notice(ap, fmt, ...) \
+- ata_port_printk(ap, KERN_NOTICE, fmt, ##__VA_ARGS__)
++ ata_port_printk(notice, ap, fmt, ##__VA_ARGS__)
+ #define ata_port_info(ap, fmt, ...) \
+- ata_port_printk(ap, KERN_INFO, fmt, ##__VA_ARGS__)
++ ata_port_printk(info, ap, fmt, ##__VA_ARGS__)
+ #define ata_port_dbg(ap, fmt, ...) \
+- ata_port_printk(ap, KERN_DEBUG, fmt, ##__VA_ARGS__)
++ ata_port_printk(debug, ap, fmt, ##__VA_ARGS__)
++
++#define ata_link_printk(level, link, fmt, ...) \
++do { \
++ if (sata_pmp_attached((link)->ap) || \
++ (link)->ap->slave_link) \
++ pr_ ## level ("ata%u.%02u: " fmt, \
++ (link)->ap->print_id, \
++ (link)->pmp, \
++ ##__VA_ARGS__); \
++ else \
++ pr_ ## level ("ata%u: " fmt, \
++ (link)->ap->print_id, \
++ ##__VA_ARGS__); \
++} while (0)
+
+ #define ata_link_err(link, fmt, ...) \
+- ata_link_printk(link, KERN_ERR, fmt, ##__VA_ARGS__)
++ ata_link_printk(err, link, fmt, ##__VA_ARGS__)
+ #define ata_link_warn(link, fmt, ...) \
+- ata_link_printk(link, KERN_WARNING, fmt, ##__VA_ARGS__)
++ ata_link_printk(warn, link, fmt, ##__VA_ARGS__)
+ #define ata_link_notice(link, fmt, ...) \
+- ata_link_printk(link, KERN_NOTICE, fmt, ##__VA_ARGS__)
++ ata_link_printk(notice, link, fmt, ##__VA_ARGS__)
+ #define ata_link_info(link, fmt, ...) \
+- ata_link_printk(link, KERN_INFO, fmt, ##__VA_ARGS__)
++ ata_link_printk(info, link, fmt, ##__VA_ARGS__)
+ #define ata_link_dbg(link, fmt, ...) \
+- ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__)
++ ata_link_printk(debug, link, fmt, ##__VA_ARGS__)
++
++#define ata_dev_printk(level, dev, fmt, ...) \
++ pr_ ## level("ata%u.%02u: " fmt, \
++ (dev)->link->ap->print_id, \
++ (dev)->link->pmp + (dev)->devno, \
++ ##__VA_ARGS__)
+
+ #define ata_dev_err(dev, fmt, ...) \
+- ata_dev_printk(dev, KERN_ERR, fmt, ##__VA_ARGS__)
++ ata_dev_printk(err, dev, fmt, ##__VA_ARGS__)
+ #define ata_dev_warn(dev, fmt, ...) \
+- ata_dev_printk(dev, KERN_WARNING, fmt, ##__VA_ARGS__)
++ ata_dev_printk(warn, dev, fmt, ##__VA_ARGS__)
+ #define ata_dev_notice(dev, fmt, ...) \
+- ata_dev_printk(dev, KERN_NOTICE, fmt, ##__VA_ARGS__)
++ ata_dev_printk(notice, dev, fmt, ##__VA_ARGS__)
+ #define ata_dev_info(dev, fmt, ...) \
+- ata_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__)
++ ata_dev_printk(info, dev, fmt, ##__VA_ARGS__)
+ #define ata_dev_dbg(dev, fmt, ...) \
+- ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__)
++ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__)
+
+ void ata_print_version(const struct device *dev, const char *version);
+
+@@ -2049,11 +2065,8 @@ static inline u8 ata_wait_idle(struct ata_port *ap)
+ {
+ u8 status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
+
+-#ifdef ATA_DEBUG
+ if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ)))
+- ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n",
+- status);
+-#endif
++ ata_port_dbg(ap, "abnormal Status 0x%X\n", status);
+
+ return status;
+ }
+diff --git a/include/linux/list.h b/include/linux/list.h
+index f2af4b4aa4e9a..d206ae93c06da 100644
+--- a/include/linux/list.h
++++ b/include/linux/list.h
+@@ -33,7 +33,7 @@
+ static inline void INIT_LIST_HEAD(struct list_head *list)
+ {
+ WRITE_ONCE(list->next, list);
+- list->prev = list;
++ WRITE_ONCE(list->prev, list);
+ }
+
+ #ifdef CONFIG_DEBUG_LIST
+@@ -256,8 +256,7 @@ static inline void list_bulk_move_tail(struct list_head *head,
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+-static inline int list_is_first(const struct list_head *list,
+- const struct list_head *head)
++static inline int list_is_first(const struct list_head *list, const struct list_head *head)
+ {
+ return list->prev == head;
+ }
+@@ -267,12 +266,21 @@ static inline int list_is_first(const struct list_head *list,
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+-static inline int list_is_last(const struct list_head *list,
+- const struct list_head *head)
++static inline int list_is_last(const struct list_head *list, const struct list_head *head)
+ {
+ return list->next == head;
+ }
+
++/**
++ * list_is_head - tests whether @list is the list @head
++ * @list: the entry to test
++ * @head: the head of the list
++ */
++static inline int list_is_head(const struct list_head *list, const struct list_head *head)
++{
++ return list == head;
++}
++
+ /**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+@@ -296,7 +304,7 @@ static inline int list_empty(const struct list_head *head)
+ static inline void list_del_init_careful(struct list_head *entry)
+ {
+ __list_del_entry(entry);
+- entry->prev = entry;
++ WRITE_ONCE(entry->prev, entry);
+ smp_store_release(&entry->next, entry);
+ }
+
+@@ -316,7 +324,7 @@ static inline void list_del_init_careful(struct list_head *entry)
+ static inline int list_empty_careful(const struct list_head *head)
+ {
+ struct list_head *next = smp_load_acquire(&head->next);
+- return (next == head) && (next == head->prev);
++ return list_is_head(next, head) && (next == READ_ONCE(head->prev));
+ }
+
+ /**
+@@ -391,10 +399,9 @@ static inline void list_cut_position(struct list_head *list,
+ {
+ if (list_empty(head))
+ return;
+- if (list_is_singular(head) &&
+- (head->next != entry && head != entry))
++ if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next))
+ return;
+- if (entry == head)
++ if (list_is_head(entry, head))
+ INIT_LIST_HEAD(list);
+ else
+ __list_cut_position(list, head, entry);
+@@ -568,7 +575,17 @@ static inline void list_splice_tail_init(struct list_head *list,
+ * @head: the head for your list.
+ */
+ #define list_for_each(pos, head) \
+- for (pos = (head)->next; pos != (head); pos = pos->next)
++ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
++
++/**
++ * list_for_each_rcu - Iterate over a list in an RCU-safe fashion
++ * @pos: the &struct list_head to use as a loop cursor.
++ * @head: the head for your list.
++ */
++#define list_for_each_rcu(pos, head) \
++ for (pos = rcu_dereference((head)->next); \
++ !list_is_head(pos, (head)); \
++ pos = rcu_dereference(pos->next))
+
+ /**
+ * list_for_each_continue - continue iteration over a list
+@@ -578,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list,
+ * Continue to iterate over a list, continuing after the current position.
+ */
+ #define list_for_each_continue(pos, head) \
+- for (pos = pos->next; pos != (head); pos = pos->next)
++ for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next)
+
+ /**
+ * list_for_each_prev - iterate over a list backwards
+@@ -586,7 +603,7 @@ static inline void list_splice_tail_init(struct list_head *list,
+ * @head: the head for your list.
+ */
+ #define list_for_each_prev(pos, head) \
+- for (pos = (head)->prev; pos != (head); pos = pos->prev)
++ for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
+
+ /**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+@@ -595,8 +612,9 @@ static inline void list_splice_tail_init(struct list_head *list,
+ * @head: the head for your list.
+ */
+ #define list_for_each_safe(pos, n, head) \
+- for (pos = (head)->next, n = pos->next; pos != (head); \
+- pos = n, n = pos->next)
++ for (pos = (head)->next, n = pos->next; \
++ !list_is_head(pos, (head)); \
++ pos = n, n = pos->next)
+
+ /**
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
+@@ -606,7 +624,7 @@ static inline void list_splice_tail_init(struct list_head *list,
+ */
+ #define list_for_each_prev_safe(pos, n, head) \
+ for (pos = (head)->prev, n = pos->prev; \
+- pos != (head); \
++ !list_is_head(pos, (head)); \
+ pos = n, n = pos->prev)
+
+ /**
+diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
+index a98309c0121cb..bed63156b0521 100644
+--- a/include/linux/lockd/xdr.h
++++ b/include/linux/lockd/xdr.h
+@@ -41,6 +41,8 @@ struct nlm_lock {
+ struct nfs_fh fh;
+ struct xdr_netobj oh;
+ u32 svid;
++ u64 lock_start;
++ u64 lock_len;
+ struct file_lock fl;
+ };
+
+diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
+index 5ae766f26e04f..025250ade98e2 100644
+--- a/include/linux/lockd/xdr4.h
++++ b/include/linux/lockd/xdr4.h
+@@ -24,6 +24,7 @@
+
+
+
++void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
+ int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
+ int nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
+ int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
+index 9fe165beb0f9e..aa0ecfc6cdb4b 100644
+--- a/include/linux/lockdep.h
++++ b/include/linux/lockdep.h
+@@ -192,7 +192,7 @@ static inline void
+ lockdep_init_map_waits(struct lockdep_map *lock, const char *name,
+ struct lock_class_key *key, int subclass, u8 inner, u8 outer)
+ {
+- lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL);
++ lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL);
+ }
+
+ static inline void
+@@ -215,24 +215,28 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
+ * or they are too narrow (they suffer from a false class-split):
+ */
+ #define lockdep_set_class(lock, key) \
+- lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \
+- (lock)->dep_map.wait_type_inner, \
+- (lock)->dep_map.wait_type_outer)
++ lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \
++ (lock)->dep_map.wait_type_inner, \
++ (lock)->dep_map.wait_type_outer, \
++ (lock)->dep_map.lock_type)
+
+ #define lockdep_set_class_and_name(lock, key, name) \
+- lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \
+- (lock)->dep_map.wait_type_inner, \
+- (lock)->dep_map.wait_type_outer)
++ lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \
++ (lock)->dep_map.wait_type_inner, \
++ (lock)->dep_map.wait_type_outer, \
++ (lock)->dep_map.lock_type)
+
+ #define lockdep_set_class_and_subclass(lock, key, sub) \
+- lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\
+- (lock)->dep_map.wait_type_inner, \
+- (lock)->dep_map.wait_type_outer)
++ lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \
++ (lock)->dep_map.wait_type_inner, \
++ (lock)->dep_map.wait_type_outer, \
++ (lock)->dep_map.lock_type)
+
+ #define lockdep_set_subclass(lock, sub) \
+- lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
+- (lock)->dep_map.wait_type_inner, \
+- (lock)->dep_map.wait_type_outer)
++ lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
++ (lock)->dep_map.wait_type_inner, \
++ (lock)->dep_map.wait_type_outer, \
++ (lock)->dep_map.lock_type)
+
+ #define lockdep_set_novalidate_class(lock) \
+ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock)
+diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
+index 2adeea44c0d53..61590c1f2d333 100644
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -26,13 +26,13 @@
+ * #undef LSM_HOOK
+ * };
+ */
+-LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr)
+-LSM_HOOK(int, 0, binder_transaction, struct task_struct *from,
+- struct task_struct *to)
+-LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from,
+- struct task_struct *to)
+-LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from,
+- struct task_struct *to, struct file *file)
++LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr)
++LSM_HOOK(int, 0, binder_transaction, const struct cred *from,
++ const struct cred *to)
++LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from,
++ const struct cred *to)
++LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from,
++ const struct cred *to, struct file *file)
+ LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child,
+ unsigned int mode)
+ LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent)
+diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
+index 5c4c5c0602cb7..59024618554e2 100644
+--- a/include/linux/lsm_hooks.h
++++ b/include/linux/lsm_hooks.h
+@@ -1313,22 +1313,22 @@
+ *
+ * @binder_set_context_mgr:
+ * Check whether @mgr is allowed to be the binder context manager.
+- * @mgr contains the task_struct for the task being registered.
++ * @mgr contains the struct cred for the current binder process.
+ * Return 0 if permission is granted.
+ * @binder_transaction:
+ * Check whether @from is allowed to invoke a binder transaction call
+ * to @to.
+- * @from contains the task_struct for the sending task.
+- * @to contains the task_struct for the receiving task.
++ * @from contains the struct cred for the sending process.
++ * @to contains the struct cred for the receiving process.
+ * @binder_transfer_binder:
+ * Check whether @from is allowed to transfer a binder reference to @to.
+- * @from contains the task_struct for the sending task.
+- * @to contains the task_struct for the receiving task.
++ * @from contains the struct cred for the sending process.
++ * @to contains the struct cred for the receiving process.
+ * @binder_transfer_file:
+ * Check whether @from is allowed to transfer @file to @to.
+- * @from contains the task_struct for the sending task.
++ * @from contains the struct cred for the sending process.
+ * @file contains the struct file being transferred.
+- * @to contains the task_struct for the receiving task.
++ * @to contains the struct cred for the receiving process.
+ *
+ * @ptrace_access_check:
+ * Check permission before allowing the current process to trace the
+diff --git a/include/linux/mailbox/zynqmp-ipi-message.h b/include/linux/mailbox/zynqmp-ipi-message.h
+index 35ce84c8ca02c..31d8046d945e7 100644
+--- a/include/linux/mailbox/zynqmp-ipi-message.h
++++ b/include/linux/mailbox/zynqmp-ipi-message.h
+@@ -9,7 +9,7 @@
+ * @data: message payload
+ *
+ * This is the structure for data used in mbox_send_message
+- * the maximum length of data buffer is fixed to 12 bytes.
++ * the maximum length of data buffer is fixed to 32 bytes.
+ * Client is supposed to be aware of this.
+ */
+ struct zynqmp_ipi_message {
+diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
+index 36d6ce673503c..6fee33cb52f58 100644
+--- a/include/linux/mailbox_controller.h
++++ b/include/linux/mailbox_controller.h
+@@ -83,6 +83,7 @@ struct mbox_controller {
+ const struct of_phandle_args *sp);
+ /* Internal to API */
+ struct hrtimer poll_hrt;
++ spinlock_t poll_hrt_lock;
+ struct list_head node;
+ };
+
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index 20f1e3ff60130..591bc4cefe1d6 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -10,16 +10,29 @@
+
+ struct mb_cache;
+
++/* Cache entry flags */
++enum {
++ MBE_REFERENCED_B = 0,
++ MBE_REUSABLE_B
++};
++
+ struct mb_cache_entry {
+ /* List of entries in cache - protected by cache->c_list_lock */
+ struct list_head e_list;
+- /* Hash table list - protected by hash chain bitlock */
++ /*
++ * Hash table list - protected by hash chain bitlock. The entry is
++ * guaranteed to be hashed while e_refcnt > 0.
++ */
+ struct hlist_bl_node e_hash_list;
++ /*
++ * Entry refcount. Once it reaches zero, entry is unhashed and freed.
++ * While refcount > 0, the entry is guaranteed to stay in the hash and
++ * e.g. mb_cache_entry_try_delete() will fail.
++ */
+ atomic_t e_refcnt;
+ /* Key in hash - stable during lifetime of the entry */
+ u32 e_key;
+- u32 e_referenced:1;
+- u32 e_reusable:1;
++ unsigned long e_flags;
+ /* User provided value - stable during lifetime of the entry */
+ u64 e_value;
+ };
+@@ -29,16 +42,24 @@ void mb_cache_destroy(struct mb_cache *cache);
+
+ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ u64 value, bool reusable);
+-void __mb_cache_entry_free(struct mb_cache_entry *entry);
+-static inline int mb_cache_entry_put(struct mb_cache *cache,
+- struct mb_cache_entry *entry)
++void __mb_cache_entry_free(struct mb_cache *cache,
++ struct mb_cache_entry *entry);
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
++static inline void mb_cache_entry_put(struct mb_cache *cache,
++ struct mb_cache_entry *entry)
+ {
+- if (!atomic_dec_and_test(&entry->e_refcnt))
+- return 0;
+- __mb_cache_entry_free(entry);
+- return 1;
++ unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
++
++ if (cnt > 0) {
++ if (cnt <= 2)
++ wake_up_var(&entry->e_refcnt);
++ return;
++ }
++ __mb_cache_entry_free(cache, entry);
+ }
+
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++ u32 key, u64 value);
+ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
+ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ u64 value);
+diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
+index 0661af17a7584..b0da04fe087bb 100644
+--- a/include/linux/mc146818rtc.h
++++ b/include/linux/mc146818rtc.h
+@@ -86,6 +86,8 @@ struct cmos_rtc_board_info {
+ /* 2 values for divider stage reset, others for "testing purposes only" */
+ # define RTC_DIV_RESET1 0x60
+ # define RTC_DIV_RESET2 0x70
++ /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */
++# define RTC_AMD_BANK_SELECT 0x10
+ /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */
+ # define RTC_RATE_SELECT 0x0F
+
+@@ -123,7 +125,11 @@ struct cmos_rtc_board_info {
+ #define RTC_IO_EXTENT_USED RTC_IO_EXTENT
+ #endif /* ARCH_RTC_LOCATION */
+
+-unsigned int mc146818_get_time(struct rtc_time *time);
++bool mc146818_does_rtc_work(void);
++int mc146818_get_time(struct rtc_time *time);
+ int mc146818_set_time(struct rtc_time *time);
+
++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
++ void *param);
++
+ #endif /* _MC146818RTC_H */
+diff --git a/include/linux/memblock.h b/include/linux/memblock.h
+index 34de69b3b8bad..5df38332e4139 100644
+--- a/include/linux/memblock.h
++++ b/include/linux/memblock.h
+@@ -388,8 +388,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size,
+ phys_addr_t end, int nid, bool exact_nid);
+ phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
+
+-static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
+- phys_addr_t align)
++static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
++ phys_addr_t align)
+ {
+ return memblock_phys_alloc_range(size, align, 0,
+ MEMBLOCK_ALLOC_ACCESSIBLE);
+diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
+index 3096c9a0ee014..94df87cb69c3b 100644
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -223,7 +223,7 @@ struct obj_cgroup {
+ struct mem_cgroup *memcg;
+ atomic_t nr_charged_bytes;
+ union {
+- struct list_head list;
++ struct list_head list; /* protected by objcg_lock */
+ struct rcu_head rcu;
+ };
+ };
+@@ -310,6 +310,11 @@ struct mem_cgroup {
+ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
+ atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS];
+
++ /*
++ * Hint of reclaim pressure for socket memroy management. Note
++ * that this indicator should NOT be used in legacy cgroup mode
++ * where socket memory is accounted/charged separately.
++ */
+ unsigned long socket_pressure;
+
+ /* Legacy tcp memory accounting */
+@@ -320,7 +325,8 @@ struct mem_cgroup {
+ int kmemcg_id;
+ enum memcg_kmem_state kmem_state;
+ struct obj_cgroup __rcu *objcg;
+- struct list_head objcg_list; /* list of inherited objcgs */
++ /* list of inherited objcgs, protected by objcg_lock */
++ struct list_head objcg_list;
+ #endif
+
+ MEMCG_PADDING(_pad2_);
+@@ -965,19 +971,30 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
+
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+ {
+- return READ_ONCE(memcg->vmstats.state[idx]);
++ long x = READ_ONCE(memcg->vmstats.state[idx]);
++#ifdef CONFIG_SMP
++ if (x < 0)
++ x = 0;
++#endif
++ return x;
+ }
+
+ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+ {
+ struct mem_cgroup_per_node *pn;
++ long x;
+
+ if (mem_cgroup_disabled())
+ return node_page_state(lruvec_pgdat(lruvec), idx);
+
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+- return READ_ONCE(pn->lruvec_stats.state[idx]);
++ x = READ_ONCE(pn->lruvec_stats.state[idx]);
++#ifdef CONFIG_SMP
++ if (x < 0)
++ x = 0;
++#endif
++ return x;
+ }
+
+ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
+@@ -1001,6 +1018,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
+ }
+
+ void mem_cgroup_flush_stats(void);
++void mem_cgroup_flush_stats_delayed(void);
+
+ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val);
+@@ -1421,6 +1439,10 @@ static inline void mem_cgroup_flush_stats(void)
+ {
+ }
+
++static inline void mem_cgroup_flush_stats_delayed(void)
++{
++}
++
+ static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+ {
+@@ -1610,8 +1632,8 @@ void mem_cgroup_sk_alloc(struct sock *sk);
+ void mem_cgroup_sk_free(struct sock *sk);
+ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
+ {
+- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure)
+- return true;
++ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
++ return !!memcg->tcpmem_pressure;
+ do {
+ if (time_before(jiffies, memcg->socket_pressure))
+ return true;
+diff --git a/include/linux/memregion.h b/include/linux/memregion.h
+index e11595256cac0..c04c4fd2e2091 100644
+--- a/include/linux/memregion.h
++++ b/include/linux/memregion.h
+@@ -16,7 +16,7 @@ static inline int memregion_alloc(gfp_t gfp)
+ {
+ return -ENOMEM;
+ }
+-void memregion_free(int id)
++static inline void memregion_free(int id)
+ {
+ }
+ #endif
+diff --git a/include/linux/memremap.h b/include/linux/memremap.h
+index c0e9d35889e8d..a8bc588fe7aa8 100644
+--- a/include/linux/memremap.h
++++ b/include/linux/memremap.h
+@@ -72,16 +72,6 @@ struct dev_pagemap_ops {
+ */
+ void (*page_free)(struct page *page);
+
+- /*
+- * Transition the refcount in struct dev_pagemap to the dead state.
+- */
+- void (*kill)(struct dev_pagemap *pgmap);
+-
+- /*
+- * Wait for refcount in struct dev_pagemap to be idle and reap it.
+- */
+- void (*cleanup)(struct dev_pagemap *pgmap);
+-
+ /*
+ * Used for private (un-addressable) device memory only. Must migrate
+ * the page back to a CPU accessible page.
+@@ -95,8 +85,7 @@ struct dev_pagemap_ops {
+ * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @altmap: pre-allocated/reserved memory for vmemmap allocations
+ * @ref: reference count that pins the devm_memremap_pages() mapping
+- * @internal_ref: internal reference if @ref is not provided by the caller
+- * @done: completion for @internal_ref
++ * @done: completion for @ref
+ * @type: memory type: see MEMORY_* in memory_hotplug.h
+ * @flags: PGMAP_* flags to specify defailed behavior
+ * @ops: method table
+@@ -109,8 +98,7 @@ struct dev_pagemap_ops {
+ */
+ struct dev_pagemap {
+ struct vmem_altmap altmap;
+- struct percpu_ref *ref;
+- struct percpu_ref internal_ref;
++ struct percpu_ref ref;
+ struct completion done;
+ enum memory_type type;
+ unsigned int flags;
+@@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void)
+ static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
+ {
+ if (pgmap)
+- percpu_ref_put(pgmap->ref);
++ percpu_ref_put(&pgmap->ref);
+ }
+
+ #endif /* _LINUX_MEMREMAP_H_ */
+diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h
+index 90b20550c1c8b..06d3f11dc3c9f 100644
+--- a/include/linux/mfd/stm32-lptimer.h
++++ b/include/linux/mfd/stm32-lptimer.h
+@@ -45,6 +45,11 @@
+ #define STM32_LPTIM_PRESC GENMASK(11, 9)
+ #define STM32_LPTIM_CKPOL GENMASK(2, 1)
+
++/* STM32_LPTIM_CKPOL */
++#define STM32_LPTIM_CKPOL_RISING_EDGE 0
++#define STM32_LPTIM_CKPOL_FALLING_EDGE 1
++#define STM32_LPTIM_CKPOL_BOTH_EDGES 2
++
+ /* STM32_LPTIM_ARR */
+ #define STM32_LPTIM_MAX_ARR 0xFFFF
+
+diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
+index f8db83aedb2b5..5f5c43fd69ddd 100644
+--- a/include/linux/mfd/stm32-timers.h
++++ b/include/linux/mfd/stm32-timers.h
+@@ -82,6 +82,10 @@
+ #define MAX_TIM_ICPSC 0x3
+ #define TIM_CR2_MMS_SHIFT 4
+ #define TIM_CR2_MMS2_SHIFT 20
++#define TIM_SMCR_SMS_SLAVE_MODE_DISABLED 0 /* counts on internal clock when CEN=1 */
++#define TIM_SMCR_SMS_ENCODER_MODE_1 1 /* counts TI1FP1 edges, depending on TI2FP2 level */
++#define TIM_SMCR_SMS_ENCODER_MODE_2 2 /* counts TI2FP2 edges, depending on TI1FP1 level */
++#define TIM_SMCR_SMS_ENCODER_MODE_3 3 /* counts on both TI1FP1 and TI2FP2 edges */
+ #define TIM_SMCR_TS_SHIFT 4
+ #define TIM_BDTR_BKF_MASK 0xF
+ #define TIM_BDTR_BKF_SHIFT(x) (16 + (x) * 4)
+diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h
+index 69632c1b07bd8..ae3e7a5c5219b 100644
+--- a/include/linux/mfd/t7l66xb.h
++++ b/include/linux/mfd/t7l66xb.h
+@@ -12,7 +12,6 @@
+
+ struct t7l66xb_platform_data {
+ int (*enable)(struct platform_device *dev);
+- int (*disable)(struct platform_device *dev);
+ int (*suspend)(struct platform_device *dev);
+ int (*resume)(struct platform_device *dev);
+
+diff --git a/include/linux/mhi.h b/include/linux/mhi.h
+index 7239858790353..a5cc4cdf9cc86 100644
+--- a/include/linux/mhi.h
++++ b/include/linux/mhi.h
+@@ -663,6 +663,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl);
+ */
+ int mhi_pm_resume(struct mhi_controller *mhi_cntrl);
+
++/**
++ * mhi_pm_resume_force - Force resume MHI from suspended state
++ * @mhi_cntrl: MHI controller
++ *
++ * Resume the device irrespective of its MHI state. As per the MHI spec, devices
++ * has to be in M3 state during resume. But some devices seem to be in a
++ * different MHI state other than M3 but they continue working fine if allowed.
++ * This API is intented to be used for such devices.
++ *
++ * Return: 0 if the resume succeeds, a negative error code otherwise
++ */
++int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl);
++
+ /**
+ * mhi_download_rddm_image - Download ramdump image from device for
+ * debugging purpose.
+diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
+index 3d43c60b49fa8..2d0f75ffa3233 100644
+--- a/include/linux/micrel_phy.h
++++ b/include/linux/micrel_phy.h
+@@ -37,9 +37,9 @@
+ #define PHY_ID_KSZ9477 0x00221631
+
+ /* struct phy_device dev_flags definitions */
+-#define MICREL_PHY_50MHZ_CLK 0x00000001
+-#define MICREL_PHY_FXEN 0x00000002
+-#define MICREL_KSZ8_P1_ERRATA 0x00000003
++#define MICREL_PHY_50MHZ_CLK BIT(0)
++#define MICREL_PHY_FXEN BIT(1)
++#define MICREL_KSZ8_P1_ERRATA BIT(2)
+
+ #define MICREL_KSZ9021_EXTREG_CTRL 0xB
+ #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index 66eaf0aa7f698..3e72133545caf 100644
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -1074,6 +1074,11 @@ enum {
+ MLX5_VPORT_ADMIN_STATE_AUTO = 0x2,
+ };
+
++enum {
++ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN = 0x1,
++ MLX5_VPORT_CVLAN_INSERT_ALWAYS = 0x3,
++};
++
+ enum {
+ MLX5_L3_PROT_TYPE_IPV4 = 0,
+ MLX5_L3_PROT_TYPE_IPV6 = 1,
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index f17d2101af7a0..26095c0fd781d 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -759,6 +759,7 @@ struct mlx5_core_dev {
+ enum mlx5_device_state state;
+ /* sync interface state */
+ struct mutex intf_state_mutex;
++ struct lock_class_key lock_key;
+ unsigned long intf_state;
+ struct mlx5_priv priv;
+ struct mlx5_profile profile;
+@@ -965,7 +966,7 @@ void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
+ struct mlx5_async_ctx {
+ struct mlx5_core_dev *dev;
+ atomic_t num_inflight;
+- struct wait_queue_head wait;
++ struct completion inflight_done;
+ };
+
+ struct mlx5_async_work;
+diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
+index 4ab5c1fc1270d..a09ed4c8361b6 100644
+--- a/include/linux/mlx5/eswitch.h
++++ b/include/linux/mlx5/eswitch.h
+@@ -136,13 +136,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
+ ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT)
+ #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK
+
+-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
++u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
+ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+ struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
+
+ #else /* CONFIG_MLX5_ESWITCH */
+
+-static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
++static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
+ {
+ return MLX5_ESWITCH_NONE;
+ }
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index 993204a6c1a13..b89992e8a3c81 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -822,7 +822,8 @@ struct mlx5_ifc_e_switch_cap_bits {
+ u8 vport_svlan_insert[0x1];
+ u8 vport_cvlan_insert_if_not_exist[0x1];
+ u8 vport_cvlan_insert_overwrite[0x1];
+- u8 reserved_at_5[0x2];
++ u8 reserved_at_5[0x1];
++ u8 vport_cvlan_insert_always[0x1];
+ u8 esw_shared_ingress_acl[0x1];
+ u8 esw_uplink_ingress_acl[0x1];
+ u8 root_ft_on_other_esw[0x1];
+@@ -1512,7 +1513,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
+ u8 rc[0x1];
+
+ u8 uar_4k[0x1];
+- u8 reserved_at_241[0x9];
++ u8 reserved_at_241[0x7];
++ u8 fl_rc_qp_when_roce_disabled[0x1];
++ u8 regexp_params[0x1];
+ u8 uar_sz[0x6];
+ u8 reserved_at_248[0x2];
+ u8 umem_uid_0[0x1];
+@@ -3309,8 +3312,8 @@ enum {
+ };
+
+ enum {
+- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1,
+- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2,
++ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0),
++ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1),
+ };
+
+ enum {
+@@ -3335,7 +3338,7 @@ struct mlx5_ifc_tirc_bits {
+
+ u8 reserved_at_80[0x4];
+ u8 lro_timeout_period_usecs[0x10];
+- u8 lro_enable_mask[0x4];
++ u8 packet_merge_mask[0x4];
+ u8 lro_max_ip_payload_size[0x8];
+
+ u8 reserved_at_a0[0x40];
+@@ -5028,12 +5031,11 @@ struct mlx5_ifc_query_qp_out_bits {
+
+ u8 syndrome[0x20];
+
+- u8 reserved_at_40[0x20];
+- u8 ece[0x20];
++ u8 reserved_at_40[0x40];
+
+ u8 opt_param_mask[0x20];
+
+- u8 reserved_at_a0[0x20];
++ u8 ece[0x20];
+
+ struct mlx5_ifc_qpc_bits qpc;
+
+@@ -6369,7 +6371,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits {
+ u8 reserved_at_3c[0x1];
+ u8 hash[0x1];
+ u8 reserved_at_3e[0x1];
+- u8 lro[0x1];
++ u8 packet_merge[0x1];
+ };
+
+ struct mlx5_ifc_modify_tir_out_bits {
+@@ -8508,7 +8510,8 @@ struct mlx5_ifc_alloc_flow_counter_in_bits {
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+- u8 reserved_at_40[0x38];
++ u8 reserved_at_40[0x33];
++ u8 flow_counter_bulk_log_size[0x5];
+ u8 flow_counter_bulk[0x8];
+ };
+
+@@ -9508,8 +9511,8 @@ struct mlx5_ifc_bufferx_reg_bits {
+ u8 reserved_at_0[0x6];
+ u8 lossy[0x1];
+ u8 epsb[0x1];
+- u8 reserved_at_8[0xc];
+- u8 size[0xc];
++ u8 reserved_at_8[0x8];
++ u8 size[0x10];
+
+ u8 xoff_threshold[0x10];
+ u8 xon_threshold[0x10];
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 73a52aba448f9..a27a6b58d3740 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1511,11 +1511,18 @@ static inline u8 page_kasan_tag(const struct page *page)
+
+ static inline void page_kasan_tag_set(struct page *page, u8 tag)
+ {
+- if (kasan_enabled()) {
+- tag ^= 0xff;
+- page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
+- page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+- }
++ unsigned long old_flags, flags;
++
++ if (!kasan_enabled())
++ return;
++
++ tag ^= 0xff;
++ old_flags = READ_ONCE(page->flags);
++ do {
++ flags = old_flags;
++ flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
++ flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
++ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
+ }
+
+ static inline void page_kasan_tag_reset(struct page *page)
+@@ -2600,6 +2607,7 @@ extern int install_special_mapping(struct mm_struct *mm,
+ unsigned long flags, struct page **pages);
+
+ unsigned long randomize_stack_top(unsigned long stack_top);
++unsigned long randomize_page(unsigned long start, unsigned long range);
+
+ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+
+@@ -2851,7 +2859,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+ #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
+ #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
+ * and return without waiting upon it */
+-#define FOLL_POPULATE 0x40 /* fault in page */
++#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */
++#define FOLL_NOFAULT 0x80 /* do not fault in pages */
+ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
+ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
+ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
+@@ -3115,7 +3124,6 @@ enum mf_flags {
+ MF_SOFT_OFFLINE = 1 << 3,
+ };
+ extern int memory_failure(unsigned long pfn, int flags);
+-extern void memory_failure_queue(unsigned long pfn, int flags);
+ extern void memory_failure_queue_kick(int cpu);
+ extern int unpoison_memory(unsigned long pfn);
+ extern int sysctl_memory_failure_early_kill;
+@@ -3123,6 +3131,18 @@ extern int sysctl_memory_failure_recovery;
+ extern void shake_page(struct page *p);
+ extern atomic_long_t num_poisoned_pages __read_mostly;
+ extern int soft_offline_page(unsigned long pfn, int flags);
++#ifdef CONFIG_MEMORY_FAILURE
++extern void memory_failure_queue(unsigned long pfn, int flags);
++extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
++#else
++static inline void memory_failure_queue(unsigned long pfn, int flags)
++{
++}
++static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
++{
++ return 0;
++}
++#endif
+
+
+ /*
+diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
+index 37f9758751020..12c7f2d3e2107 100644
+--- a/include/linux/mmc/card.h
++++ b/include/linux/mmc/card.h
+@@ -292,6 +292,7 @@ struct mmc_card {
+ #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */
+ #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */
+ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */
++#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */
+
+ bool reenable_cmdq; /* Re-enable Command Queue */
+
+diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
+index d9a65c6a8816f..545578fb814b0 100644
+--- a/include/linux/mmc/mmc.h
++++ b/include/linux/mmc/mmc.h
+@@ -445,7 +445,7 @@ static inline bool mmc_ready_for_data(u32 status)
+ #define MMC_SECURE_TRIM1_ARG 0x80000001
+ #define MMC_SECURE_TRIM2_ARG 0x80008000
+ #define MMC_SECURE_ARGS 0x80000000
+-#define MMC_TRIM_ARGS 0x00008001
++#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003
+
+ #define mmc_driver_type_mask(n) (1 << (n))
+
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 6a1d79d84675a..6ba1002165302 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -1031,6 +1031,15 @@ static inline int is_highmem_idx(enum zone_type idx)
+ #endif
+ }
+
++#ifdef CONFIG_ZONE_DMA
++bool has_managed_dma(void);
++#else
++static inline bool has_managed_dma(void)
++{
++ return false;
++}
++#endif
++
+ /**
+ * is_highmem - helper function to quickly check if a struct zone is a
+ * highmem zone or not. This is an attempt to keep references
+@@ -1342,13 +1351,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms)
+
+ static inline struct mem_section *__nr_to_section(unsigned long nr)
+ {
++ unsigned long root = SECTION_NR_TO_ROOT(nr);
++
++ if (unlikely(root >= NR_SECTION_ROOTS))
++ return NULL;
++
+ #ifdef CONFIG_SPARSEMEM_EXTREME
+- if (!mem_section)
++ if (!mem_section || !mem_section[root])
+ return NULL;
+ #endif
+- if (!mem_section[SECTION_NR_TO_ROOT(nr)])
+- return NULL;
+- return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
++ return &mem_section[root][nr & SECTION_ROOT_MASK];
+ }
+ extern size_t mem_section_usage_size(void);
+
+diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
+new file mode 100644
+index 0000000000000..ee5a217de2a88
+--- /dev/null
++++ b/include/linux/mnt_idmapping.h
+@@ -0,0 +1,234 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_MNT_IDMAPPING_H
++#define _LINUX_MNT_IDMAPPING_H
++
++#include <linux/types.h>
++#include <linux/uidgid.h>
++
++struct user_namespace;
++/*
++ * Carries the initial idmapping of 0:0:4294967295 which is an identity
++ * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
++ * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
++ */
++extern struct user_namespace init_user_ns;
++
++/**
++ * initial_idmapping - check whether this is the initial mapping
++ * @ns: idmapping to check
++ *
++ * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1,
++ * [...], 1000 to 1000 [...].
++ *
++ * Return: true if this is the initial mapping, false if not.
++ */
++static inline bool initial_idmapping(const struct user_namespace *ns)
++{
++ return ns == &init_user_ns;
++}
++
++/**
++ * no_idmapping - check whether we can skip remapping a kuid/gid
++ * @mnt_userns: the mount's idmapping
++ * @fs_userns: the filesystem's idmapping
++ *
++ * This function can be used to check whether a remapping between two
++ * idmappings is required.
++ * An idmapped mount is a mount that has an idmapping attached to it that
++ * is different from the filsystem's idmapping and the initial idmapping.
++ * If the initial mapping is used or the idmapping of the mount and the
++ * filesystem are identical no remapping is required.
++ *
++ * Return: true if remapping can be skipped, false if not.
++ */
++static inline bool no_idmapping(const struct user_namespace *mnt_userns,
++ const struct user_namespace *fs_userns)
++{
++ return initial_idmapping(mnt_userns) || mnt_userns == fs_userns;
++}
++
++/**
++ * mapped_kuid_fs - map a filesystem kuid into a mnt_userns
++ * @mnt_userns: the mount's idmapping
++ * @fs_userns: the filesystem's idmapping
++ * @kuid : kuid to be mapped
++ *
++ * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this
++ * function when preparing a @kuid to be reported to userspace.
++ *
++ * If no_idmapping() determines that this is not an idmapped mount we can
++ * simply return @kuid unchanged.
++ * If initial_idmapping() tells us that the filesystem is not mounted with an
++ * idmapping we know the value of @kuid won't change when calling
++ * from_kuid() so we can simply retrieve the value via __kuid_val()
++ * directly.
++ *
++ * Return: @kuid mapped according to @mnt_userns.
++ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
++ * returned.
++ */
++static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
++ struct user_namespace *fs_userns,
++ kuid_t kuid)
++{
++ uid_t uid;
++
++ if (no_idmapping(mnt_userns, fs_userns))
++ return kuid;
++ if (initial_idmapping(fs_userns))
++ uid = __kuid_val(kuid);
++ else
++ uid = from_kuid(fs_userns, kuid);
++ if (uid == (uid_t)-1)
++ return INVALID_UID;
++ return make_kuid(mnt_userns, uid);
++}
++
++/**
++ * mapped_kgid_fs - map a filesystem kgid into a mnt_userns
++ * @mnt_userns: the mount's idmapping
++ * @fs_userns: the filesystem's idmapping
++ * @kgid : kgid to be mapped
++ *
++ * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this
++ * function when preparing a @kgid to be reported to userspace.
++ *
++ * If no_idmapping() determines that this is not an idmapped mount we can
++ * simply return @kgid unchanged.
++ * If initial_idmapping() tells us that the filesystem is not mounted with an
++ * idmapping we know the value of @kgid won't change when calling
++ * from_kgid() so we can simply retrieve the value via __kgid_val()
++ * directly.
++ *
++ * Return: @kgid mapped according to @mnt_userns.
++ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
++ * returned.
++ */
++static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
++ struct user_namespace *fs_userns,
++ kgid_t kgid)
++{
++ gid_t gid;
++
++ if (no_idmapping(mnt_userns, fs_userns))
++ return kgid;
++ if (initial_idmapping(fs_userns))
++ gid = __kgid_val(kgid);
++ else
++ gid = from_kgid(fs_userns, kgid);
++ if (gid == (gid_t)-1)
++ return INVALID_GID;
++ return make_kgid(mnt_userns, gid);
++}
++
++/**
++ * mapped_kuid_user - map a user kuid into a mnt_userns
++ * @mnt_userns: the mount's idmapping
++ * @fs_userns: the filesystem's idmapping
++ * @kuid : kuid to be mapped
++ *
++ * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this
++ * function when preparing a @kuid to be written to disk or inode.
++ *
++ * If no_idmapping() determines that this is not an idmapped mount we can
++ * simply return @kuid unchanged.
++ * If initial_idmapping() tells us that the filesystem is not mounted with an
++ * idmapping we know the value of @kuid won't change when calling
++ * make_kuid() so we can simply retrieve the value via KUIDT_INIT()
++ * directly.
++ *
++ * Return: @kuid mapped according to @mnt_userns.
++ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
++ * returned.
++ */
++static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns,
++ struct user_namespace *fs_userns,
++ kuid_t kuid)
++{
++ uid_t uid;
++
++ if (no_idmapping(mnt_userns, fs_userns))
++ return kuid;
++ uid = from_kuid(mnt_userns, kuid);
++ if (uid == (uid_t)-1)
++ return INVALID_UID;
++ if (initial_idmapping(fs_userns))
++ return KUIDT_INIT(uid);
++ return make_kuid(fs_userns, uid);
++}
++
++/**
++ * mapped_kgid_user - map a user kgid into a mnt_userns
++ * @mnt_userns: the mount's idmapping
++ * @fs_userns: the filesystem's idmapping
++ * @kgid : kgid to be mapped
++ *
++ * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this
++ * function when preparing a @kgid to be written to disk or inode.
++ *
++ * If no_idmapping() determines that this is not an idmapped mount we can
++ * simply return @kgid unchanged.
++ * If initial_idmapping() tells us that the filesystem is not mounted with an
++ * idmapping we know the value of @kgid won't change when calling
++ * make_kgid() so we can simply retrieve the value via KGIDT_INIT()
++ * directly.
++ *
++ * Return: @kgid mapped according to @mnt_userns.
++ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
++ * returned.
++ */
++static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
++ struct user_namespace *fs_userns,
++ kgid_t kgid)
++{
++ gid_t gid;
++
++ if (no_idmapping(mnt_userns, fs_userns))
++ return kgid;
++ gid = from_kgid(mnt_userns, kgid);
++ if (gid == (gid_t)-1)
++ return INVALID_GID;
++ if (initial_idmapping(fs_userns))
++ return KGIDT_INIT(gid);
++ return make_kgid(fs_userns, gid);
++}
++
++/**
++ * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
++ * @mnt_userns: the mount's idmapping
++ * @fs_userns: the filesystem's idmapping
++ *
++ * Use this helper to initialize a new vfs or filesystem object based on
++ * the caller's fsuid. A common example is initializing the i_uid field of
++ * a newly allocated inode triggered by a creation event such as mkdir or
++ * O_CREAT. Other examples include the allocation of quotas for a specific
++ * user.
++ *
++ * Return: the caller's current fsuid mapped up according to @mnt_userns.
++ */
++static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
++ struct user_namespace *fs_userns)
++{
++ return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid());
++}
++
++/**
++ * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
++ * @mnt_userns: the mount's idmapping
++ * @fs_userns: the filesystem's idmapping
++ *
++ * Use this helper to initialize a new vfs or filesystem object based on
++ * the caller's fsgid. A common example is initializing the i_gid field of
++ * a newly allocated inode triggered by a creation event such as mkdir or
++ * O_CREAT. Other examples include the allocation of quotas for a specific
++ * user.
++ *
++ * Return: the caller's current fsgid mapped up according to @mnt_userns.
++ */
++static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
++ struct user_namespace *fs_userns)
++{
++ return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid());
++}
++
++#endif /* _LINUX_MNT_IDMAPPING_H */
+diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
+index 9e09d11ffe5b3..1322652a9d0d9 100644
+--- a/include/linux/moduleloader.h
++++ b/include/linux/moduleloader.h
+@@ -39,6 +39,11 @@ bool module_init_section(const char *name);
+ */
+ bool module_exit_section(const char *name);
+
++/* Describes whether within_module_init() will consider this an init section
++ * or not. This behaviour changes with CONFIG_MODULE_UNLOAD.
++ */
++bool module_init_layout_section(const char *sname);
++
+ /*
+ * Apply the given relocation to the (simplified) ELF. Return -error
+ * or 0.
+diff --git a/include/linux/msi.h b/include/linux/msi.h
+index 49cf6eb222e76..e616f94c7c585 100644
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -148,7 +148,7 @@ struct msi_desc {
+ u8 is_msix : 1;
+ u8 multiple : 3;
+ u8 multi_cap : 3;
+- u8 maskbit : 1;
++ u8 can_mask : 1;
+ u8 is_64 : 1;
+ u8 is_virtual : 1;
+ u16 entry_nr;
+diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
+index fd1ecb8211060..d88bb56c18e2e 100644
+--- a/include/linux/mtd/cfi.h
++++ b/include/linux/mtd/cfi.h
+@@ -286,6 +286,7 @@ struct cfi_private {
+ map_word sector_erase_cmd;
+ unsigned long chipshift; /* Because they're of the same type */
+ const char *im_name; /* inter_module name for cmdset_setup */
++ unsigned long quirks;
+ struct flchip chips[]; /* per-chip data structure for each chip */
+ };
+
+diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
+index 88227044fc86c..8a2c60235ebb8 100644
+--- a/include/linux/mtd/mtd.h
++++ b/include/linux/mtd/mtd.h
+@@ -394,10 +394,8 @@ struct mtd_info {
+ /* List of partitions attached to this MTD device */
+ struct list_head partitions;
+
+- union {
+- struct mtd_part part;
+- struct mtd_master master;
+- };
++ struct mtd_part part;
++ struct mtd_master master;
+ };
+
+ static inline struct mtd_info *mtd_get_master(struct mtd_info *mtd)
+diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
+index b2f9dd3cbd695..dcf90144d70b7 100644
+--- a/include/linux/mtd/rawnand.h
++++ b/include/linux/mtd/rawnand.h
+@@ -1240,6 +1240,7 @@ struct nand_secure_region {
+ * @lock: Lock protecting the suspended field. Also used to serialize accesses
+ * to the NAND device
+ * @suspended: Set to 1 when the device is suspended, 0 when it's not
++ * @resume_wq: wait queue to sleep if rawnand is in suspended state.
+ * @cur_cs: Currently selected target. -1 means no target selected, otherwise we
+ * should always have cur_cs >= 0 && cur_cs < nanddev_ntargets().
+ * NAND Controller drivers should not modify this value, but they're
+@@ -1294,6 +1295,7 @@ struct nand_chip {
+ /* Internals */
+ struct mutex lock;
+ unsigned int suspended : 1;
++ wait_queue_head_t resume_wq;
+ int cur_cs;
+ int read_retries;
+ struct nand_secure_region *secure_regions;
+@@ -1539,6 +1541,8 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
+ bool force_8bit, bool check_only);
+ int nand_write_data_op(struct nand_chip *chip, const void *buf,
+ unsigned int len, bool force_8bit);
++int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf,
++ int oob_required, int page);
+
+ /* Scan and identify a NAND device */
+ int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips,
+diff --git a/include/linux/namei.h b/include/linux/namei.h
+index e89329bb3134e..40c693525f796 100644
+--- a/include/linux/namei.h
++++ b/include/linux/namei.h
+@@ -63,12 +63,20 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne
+ extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
+ extern void done_path_create(struct path *, struct dentry *);
+ extern struct dentry *kern_path_locked(const char *, struct path *);
++int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *,
++ unsigned int, struct path *);
+
+ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
+ extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
+ extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
+ extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
+ struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int);
++struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns,
++ const char *name, struct dentry *base,
++ int len);
++struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns,
++ const char *name,
++ struct dentry *base, int len);
+
+ extern int follow_down_one(struct path *);
+ extern int follow_down(struct path *);
+diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
+index 2c6b9e4162254..7c2d77d75a888 100644
+--- a/include/linux/netdev_features.h
++++ b/include/linux/netdev_features.h
+@@ -169,7 +169,7 @@ enum {
+ #define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD)
+ #define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP)
+
+-/* Finds the next feature with the highest number of the range of start till 0.
++/* Finds the next feature with the highest number of the range of start-1 till 0.
+ */
+ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
+ {
+@@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
+ for ((bit) = find_next_netdev_feature((mask_addr), \
+ NETDEV_FEATURE_COUNT); \
+ (bit) >= 0; \
+- (bit) = find_next_netdev_feature((mask_addr), (bit) - 1))
++ (bit) = find_next_netdev_feature((mask_addr), (bit)))
+
+ /* Features valid for ethtool to change */
+ /* = all defined minus driver/device-class-related */
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index d79163208dfdb..b5df2e59a51d3 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -167,31 +167,38 @@ static inline bool dev_xmit_complete(int rc)
+ * (unsigned long) so they can be read and written atomically.
+ */
+
++#define NET_DEV_STAT(FIELD) \
++ union { \
++ unsigned long FIELD; \
++ atomic_long_t __##FIELD; \
++ }
++
+ struct net_device_stats {
+- unsigned long rx_packets;
+- unsigned long tx_packets;
+- unsigned long rx_bytes;
+- unsigned long tx_bytes;
+- unsigned long rx_errors;
+- unsigned long tx_errors;
+- unsigned long rx_dropped;
+- unsigned long tx_dropped;
+- unsigned long multicast;
+- unsigned long collisions;
+- unsigned long rx_length_errors;
+- unsigned long rx_over_errors;
+- unsigned long rx_crc_errors;
+- unsigned long rx_frame_errors;
+- unsigned long rx_fifo_errors;
+- unsigned long rx_missed_errors;
+- unsigned long tx_aborted_errors;
+- unsigned long tx_carrier_errors;
+- unsigned long tx_fifo_errors;
+- unsigned long tx_heartbeat_errors;
+- unsigned long tx_window_errors;
+- unsigned long rx_compressed;
+- unsigned long tx_compressed;
++ NET_DEV_STAT(rx_packets);
++ NET_DEV_STAT(tx_packets);
++ NET_DEV_STAT(rx_bytes);
++ NET_DEV_STAT(tx_bytes);
++ NET_DEV_STAT(rx_errors);
++ NET_DEV_STAT(tx_errors);
++ NET_DEV_STAT(rx_dropped);
++ NET_DEV_STAT(tx_dropped);
++ NET_DEV_STAT(multicast);
++ NET_DEV_STAT(collisions);
++ NET_DEV_STAT(rx_length_errors);
++ NET_DEV_STAT(rx_over_errors);
++ NET_DEV_STAT(rx_crc_errors);
++ NET_DEV_STAT(rx_frame_errors);
++ NET_DEV_STAT(rx_fifo_errors);
++ NET_DEV_STAT(rx_missed_errors);
++ NET_DEV_STAT(tx_aborted_errors);
++ NET_DEV_STAT(tx_carrier_errors);
++ NET_DEV_STAT(tx_fifo_errors);
++ NET_DEV_STAT(tx_heartbeat_errors);
++ NET_DEV_STAT(tx_window_errors);
++ NET_DEV_STAT(rx_compressed);
++ NET_DEV_STAT(tx_compressed);
+ };
++#undef NET_DEV_STAT
+
+
+ #include <linux/cache.h>
+@@ -268,9 +275,11 @@ struct hh_cache {
+ * relationship HH alignment <= LL alignment.
+ */
+ #define LL_RESERVED_SPACE(dev) \
+- ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
++ ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \
++ & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+ #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
+- ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
++ ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \
++ & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+
+ struct header_ops {
+ int (*create) (struct sk_buff *skb, struct net_device *dev,
+@@ -626,9 +635,23 @@ extern int sysctl_devconf_inherit_init_net;
+ */
+ static inline bool net_has_fallback_tunnels(const struct net *net)
+ {
+- return !IS_ENABLED(CONFIG_SYSCTL) ||
+- !sysctl_fb_tunnels_only_for_init_net ||
+- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
++#if IS_ENABLED(CONFIG_SYSCTL)
++ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);
++
++ return !fb_tunnels_only_for_init_net ||
++ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1);
++#else
++ return true;
++#endif
++}
++
++static inline int net_inherit_devconf(void)
++{
++#if IS_ENABLED(CONFIG_SYSCTL)
++ return READ_ONCE(sysctl_devconf_inherit_init_net);
++#else
++ return 0;
++#endif
+ }
+
+ static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
+@@ -714,8 +737,11 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+ /* We only give a hint, preemption can change CPU under us */
+ val |= raw_smp_processor_id();
+
+- if (table->ents[index] != val)
+- table->ents[index] = val;
++ /* The following WRITE_ONCE() is paired with the READ_ONCE()
++ * here, and another one in get_rps_cpu().
++ */
++ if (READ_ONCE(table->ents[index]) != val)
++ WRITE_ONCE(table->ents[index], val);
+ }
+ }
+
+@@ -887,7 +913,7 @@ struct net_device_path_stack {
+
+ struct net_device_path_ctx {
+ const struct net_device *dev;
+- const u8 *daddr;
++ u8 daddr[ETH_ALEN];
+
+ int num_vlans;
+ struct {
+@@ -1645,7 +1671,7 @@ enum netdev_priv_flags {
+ IFF_FAILOVER_SLAVE = 1<<28,
+ IFF_L3MDEV_RX_HANDLER = 1<<29,
+ IFF_LIVE_RENAME_OK = 1<<30,
+- IFF_TX_SKB_NO_LINEAR = 1<<31,
++ IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
+ };
+
+ #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
+@@ -1814,7 +1840,6 @@ enum netdev_ml_priv_type {
+ * @tipc_ptr: TIPC specific data
+ * @atalk_ptr: AppleTalk link
+ * @ip_ptr: IPv4 specific data
+- * @dn_ptr: DECnet specific data
+ * @ip6_ptr: IPv6 specific data
+ * @ax25_ptr: AX.25 specific data
+ * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
+@@ -2097,9 +2122,6 @@ struct net_device {
+ void *atalk_ptr;
+ #endif
+ struct in_device __rcu *ip_ptr;
+-#if IS_ENABLED(CONFIG_DECNET)
+- struct dn_dev __rcu *dn_ptr;
+-#endif
+ struct inet6_dev __rcu *ip6_ptr;
+ #if IS_ENABLED(CONFIG_AX25)
+ void *ax25_ptr;
+@@ -2149,7 +2171,7 @@ struct net_device {
+ struct netdev_queue *_tx ____cacheline_aligned_in_smp;
+ unsigned int num_tx_queues;
+ unsigned int real_num_tx_queues;
+- struct Qdisc *qdisc;
++ struct Qdisc __rcu *qdisc;
+ unsigned int tx_queue_len;
+ spinlock_t tx_global_lock;
+
+@@ -2636,6 +2658,7 @@ struct packet_type {
+ struct net_device *);
+ bool (*id_match)(struct packet_type *ptype,
+ struct sock *sk);
++ struct net *af_packet_net;
+ void *af_packet_priv;
+ struct list_head list;
+ };
+@@ -4403,7 +4426,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
+ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
+ {
+ spin_lock(&txq->_xmit_lock);
+- txq->xmit_lock_owner = cpu;
++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */
++ WRITE_ONCE(txq->xmit_lock_owner, cpu);
+ }
+
+ static inline bool __netif_tx_acquire(struct netdev_queue *txq)
+@@ -4420,26 +4444,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq)
+ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
+ {
+ spin_lock_bh(&txq->_xmit_lock);
+- txq->xmit_lock_owner = smp_processor_id();
++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */
++ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
+ }
+
+ static inline bool __netif_tx_trylock(struct netdev_queue *txq)
+ {
+ bool ok = spin_trylock(&txq->_xmit_lock);
+- if (likely(ok))
+- txq->xmit_lock_owner = smp_processor_id();
++
++ if (likely(ok)) {
++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */
++ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
++ }
+ return ok;
+ }
+
+ static inline void __netif_tx_unlock(struct netdev_queue *txq)
+ {
+- txq->xmit_lock_owner = -1;
++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */
++ WRITE_ONCE(txq->xmit_lock_owner, -1);
+ spin_unlock(&txq->_xmit_lock);
+ }
+
+ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
+ {
+- txq->xmit_lock_owner = -1;
++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */
++ WRITE_ONCE(txq->xmit_lock_owner, -1);
+ spin_unlock_bh(&txq->_xmit_lock);
+ }
+
+@@ -5166,6 +5196,15 @@ static inline bool netif_is_l3_slave(const struct net_device *dev)
+ return dev->priv_flags & IFF_L3MDEV_SLAVE;
+ }
+
++static inline int dev_sdif(const struct net_device *dev)
++{
++#ifdef CONFIG_NET_L3_MASTER_DEV
++ if (netif_is_l3_slave(dev))
++ return dev->ifindex;
++#endif
++ return 0;
++}
++
+ static inline bool netif_is_bridge_master(const struct net_device *dev)
+ {
+ return dev->priv_flags & IFF_EBRIDGE;
+@@ -5455,4 +5494,9 @@ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+
+ extern struct net_device *blackhole_netdev;
+
++/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */
++#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD)
++#define DEV_STATS_ADD(DEV, FIELD, VAL) \
++ atomic_long_add((VAL), &(DEV)->stats.__##FIELD)
++
+ #endif /* _LINUX_NETDEVICE_H */
+diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
+index 3fda1a5087334..e20c2db0f2c16 100644
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -243,11 +243,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
+ hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
+ #endif
+ break;
+-#if IS_ENABLED(CONFIG_DECNET)
+- case NFPROTO_DECNET:
+- hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
+- break;
+-#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
+index ada1296c87d50..72f5ebc5c97a9 100644
+--- a/include/linux/netfilter/ipset/ip_set.h
++++ b/include/linux/netfilter/ipset/ip_set.h
+@@ -197,7 +197,7 @@ struct ip_set_region {
+ };
+
+ /* Max range where every element is added/deleted in one step */
+-#define IPSET_MAX_RANGE (1<<20)
++#define IPSET_MAX_RANGE (1<<14)
+
+ /* The max revision number supported by any set type + 1 */
+ #define IPSET_REVISION_MAX 9
+diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
+index 700ea077ce2d6..2770db2fa080d 100644
+--- a/include/linux/netfilter/nf_conntrack_common.h
++++ b/include/linux/netfilter/nf_conntrack_common.h
+@@ -2,7 +2,7 @@
+ #ifndef _NF_CONNTRACK_COMMON_H
+ #define _NF_CONNTRACK_COMMON_H
+
+-#include <linux/atomic.h>
++#include <linux/refcount.h>
+ #include <uapi/linux/netfilter/nf_conntrack_common.h>
+
+ struct ip_conntrack_stat {
+@@ -25,19 +25,21 @@ struct ip_conntrack_stat {
+ #define NFCT_PTRMASK ~(NFCT_INFOMASK)
+
+ struct nf_conntrack {
+- atomic_t use;
++ refcount_t use;
+ };
+
+ void nf_conntrack_destroy(struct nf_conntrack *nfct);
++
++/* like nf_ct_put, but without module dependency on nf_conntrack */
+ static inline void nf_conntrack_put(struct nf_conntrack *nfct)
+ {
+- if (nfct && atomic_dec_and_test(&nfct->use))
++ if (nfct && refcount_dec_and_test(&nfct->use))
+ nf_conntrack_destroy(nfct);
+ }
+ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
+ {
+ if (nfct)
+- atomic_inc(&nfct->use);
++ refcount_inc(&nfct->use);
+ }
+
+ #endif /* _NF_CONNTRACK_COMMON_H */
+diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
+index 241e005f290ad..e9a9ab34a7ccc 100644
+--- a/include/linux/netfilter/nfnetlink.h
++++ b/include/linux/netfilter/nfnetlink.h
+@@ -45,7 +45,6 @@ struct nfnetlink_subsystem {
+ int (*commit)(struct net *net, struct sk_buff *skb);
+ int (*abort)(struct net *net, struct sk_buff *skb,
+ enum nfnl_abort_action action);
+- void (*cleanup)(struct net *net);
+ bool (*valid_genid)(struct net *net, u32 genid);
+ };
+
+diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
+index 10a01978bc0d3..bde9db771ae41 100644
+--- a/include/linux/netfilter_bridge/ebtables.h
++++ b/include/linux/netfilter_bridge/ebtables.h
+@@ -94,10 +94,6 @@ struct ebt_table {
+ struct ebt_replace_kernel *table;
+ unsigned int valid_hooks;
+ rwlock_t lock;
+- /* e.g. could be the table explicitly only allows certain
+- * matches, targets, ... 0 == let it in */
+- int (*check)(const struct ebt_table_info *info,
+- unsigned int valid_hooks);
+ /* the data used by the kernel */
+ struct ebt_table_info *private;
+ struct nf_hook_ops *ops;
+diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h
+index 8dddfb151f004..a5f7bef1b3a47 100644
+--- a/include/linux/netfilter_defs.h
++++ b/include/linux/netfilter_defs.h
+@@ -7,14 +7,6 @@
+ /* in/out/forward only */
+ #define NF_ARP_NUMHOOKS 3
+
+-/* max hook is NF_DN_ROUTE (6), also see uapi/linux/netfilter_decnet.h */
+-#define NF_DN_NUMHOOKS 7
+-
+-#if IS_ENABLED(CONFIG_DECNET)
+-/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
+-#define NF_MAX_HOOKS NF_DN_NUMHOOKS
+-#else
+ #define NF_MAX_HOOKS NF_INET_NUMHOOKS
+-#endif
+
+ #endif
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index b9a8b925db430..5ddc30405f7f4 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -103,6 +103,7 @@ struct nfs_open_dir_context {
+ __be32 verf[NFS_DIR_VERIFIER_SIZE];
+ __u64 dir_cookie;
+ __u64 dup_cookie;
++ pgoff_t page_index;
+ signed char duped;
+ };
+
+@@ -154,36 +155,40 @@ struct nfs_inode {
+ unsigned long attrtimeo_timestamp;
+
+ unsigned long attr_gencount;
+- /* "Generation counter" for the attribute cache. This is
+- * bumped whenever we update the metadata on the
+- * server.
+- */
+- unsigned long cache_change_attribute;
+
+ struct rb_root access_cache;
+ struct list_head access_cache_entry_lru;
+ struct list_head access_cache_inode_lru;
+
+- /*
+- * This is the cookie verifier used for NFSv3 readdir
+- * operations
+- */
+- __be32 cookieverf[NFS_DIR_VERIFIER_SIZE];
+-
+- atomic_long_t nrequests;
+- struct nfs_mds_commit_info commit_info;
++ union {
++ /* Directory */
++ struct {
++ /* "Generation counter" for the attribute cache.
++ * This is bumped whenever we update the metadata
++ * on the server.
++ */
++ unsigned long cache_change_attribute;
++ /*
++ * This is the cookie verifier used for NFSv3 readdir
++ * operations
++ */
++ __be32 cookieverf[NFS_DIR_VERIFIER_SIZE];
++ /* Readers: in-flight sillydelete RPC calls */
++ /* Writers: rmdir */
++ struct rw_semaphore rmdir_sem;
++ };
++ /* Regular file */
++ struct {
++ atomic_long_t nrequests;
++ atomic_long_t redirtied_pages;
++ struct nfs_mds_commit_info commit_info;
++ struct mutex commit_mutex;
++ };
++ };
+
+ /* Open contexts for shared mmap writes */
+ struct list_head open_files;
+
+- /* Readers: in-flight sillydelete RPC calls */
+- /* Writers: rmdir */
+- struct rw_semaphore rmdir_sem;
+- struct mutex commit_mutex;
+-
+- /* track last access to cached pages */
+- unsigned long page_index;
+-
+ #if IS_ENABLED(CONFIG_NFS_V4)
+ struct nfs4_cached_acl *nfs4_acl;
+ /* NFSv4 state */
+@@ -272,6 +277,7 @@ struct nfs4_copy_state {
+ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */
+ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
+ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
++#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */
+ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
+ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
+ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
+@@ -421,9 +427,22 @@ extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr);
+ extern unsigned long nfs_inc_attr_generation_counter(void);
+
+ extern struct nfs_fattr *nfs_alloc_fattr(void);
++extern struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server);
++
++static inline void nfs4_label_free(struct nfs4_label *label)
++{
++#ifdef CONFIG_NFS_V4_SECURITY_LABEL
++ if (label) {
++ kfree(label->label);
++ kfree(label);
++ }
++#endif
++}
+
+ static inline void nfs_free_fattr(const struct nfs_fattr *fattr)
+ {
++ if (fattr)
++ nfs4_label_free(fattr->label);
+ kfree(fattr);
+ }
+
+@@ -494,10 +513,10 @@ static inline const struct cred *nfs_file_cred(struct file *file)
+ * linux/fs/nfs/direct.c
+ */
+ extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *);
+-extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
+- struct iov_iter *iter);
+-extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
+- struct iov_iter *iter);
++ssize_t nfs_file_direct_read(struct kiocb *iocb,
++ struct iov_iter *iter, bool swap);
++ssize_t nfs_file_direct_write(struct kiocb *iocb,
++ struct iov_iter *iter, bool swap);
+
+ /*
+ * linux/fs/nfs/dir.c
+@@ -517,8 +536,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
+ struct nfs_fattr *fattr, struct nfs4_label *label);
+ extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
+ extern void nfs_access_zap_cache(struct inode *inode);
+-extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res,
+- bool may_block);
++extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
++ u32 *mask, bool may_block);
+
+ /*
+ * linux/fs/nfs/symlink.c
+@@ -567,13 +586,16 @@ extern int nfs_wb_all(struct inode *inode);
+ extern int nfs_wb_page(struct inode *inode, struct page *page);
+ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
+ extern int nfs_commit_inode(struct inode *, int);
+-extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
++extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+ extern void nfs_commit_free(struct nfs_commit_data *data);
++bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
+
+ static inline int
+ nfs_have_writebacks(struct inode *inode)
+ {
+- return atomic_long_read(&NFS_I(inode)->nrequests) != 0;
++ if (S_ISREG(inode->i_mode))
++ return atomic_long_read(&NFS_I(inode)->nrequests) != 0;
++ return 0;
+ }
+
+ /*
+diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
+index 2a9acbfe00f0f..da9ef0ab9b4b6 100644
+--- a/include/linux/nfs_fs_sb.h
++++ b/include/linux/nfs_fs_sb.h
+@@ -287,5 +287,6 @@ struct nfs_server {
+ #define NFS_CAP_COPY_NOTIFY (1U << 27)
+ #define NFS_CAP_XATTR (1U << 28)
+ #define NFS_CAP_READ_PLUS (1U << 29)
+-
++#define NFS_CAP_FS_LOCATIONS (1U << 30)
++#define NFS_CAP_MOVEABLE (1U << 31)
+ #endif
+diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
+index e9698b6278a52..7fcd56c6ded65 100644
+--- a/include/linux/nfs_xdr.h
++++ b/include/linux/nfs_xdr.h
+@@ -1219,7 +1219,7 @@ struct nfs4_fs_location {
+
+ #define NFS4_FS_LOCATIONS_MAXENTRIES 10
+ struct nfs4_fs_locations {
+- struct nfs_fattr fattr;
++ struct nfs_fattr *fattr;
+ const struct nfs_server *server;
+ struct nfs4_pathname fs_path;
+ int nlocations;
+@@ -1805,6 +1805,9 @@ struct nfs_rpc_ops {
+ struct nfs_server *(*create_server)(struct fs_context *);
+ struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *,
+ struct nfs_fattr *, rpc_authflavor_t);
++ int (*discover_trunking)(struct nfs_server *, struct nfs_fh *);
++ void (*enable_swap)(struct inode *inode);
++ void (*disable_swap)(struct inode *inode);
+ };
+
+ /*
+diff --git a/include/linux/nls.h b/include/linux/nls.h
+index 499e486b3722d..e0bf8367b274a 100644
+--- a/include/linux/nls.h
++++ b/include/linux/nls.h
+@@ -47,7 +47,7 @@ enum utf16_endian {
+ /* nls_base.c */
+ extern int __register_nls(struct nls_table *, struct module *);
+ extern int unregister_nls(struct nls_table *);
+-extern struct nls_table *load_nls(char *);
++extern struct nls_table *load_nls(const char *charset);
+ extern void unload_nls(struct nls_table *);
+ extern struct nls_table *load_nls_default(void);
+ #define register_nls(nls) __register_nls((nls), THIS_MODULE)
+diff --git a/include/linux/nmi.h b/include/linux/nmi.h
+index 750c7f395ca90..0db377ff8f608 100644
+--- a/include/linux/nmi.h
++++ b/include/linux/nmi.h
+@@ -122,6 +122,8 @@ int watchdog_nmi_probe(void);
+ int watchdog_nmi_enable(unsigned int cpu);
+ void watchdog_nmi_disable(unsigned int cpu);
+
++void lockup_detector_reconfigure(void);
++
+ /**
+ * touch_nmi_watchdog - restart NMI watchdog timeout.
+ *
+@@ -195,7 +197,7 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh);
+ #endif
+
+ #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
+- defined(CONFIG_HARDLOCKUP_DETECTOR)
++ defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+ void watchdog_update_hrtimer_threshold(u64 period);
+ #else
+ static inline void watchdog_update_hrtimer_threshold(u64 period) { }
+diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
+index 567c3ddba2c42..0f233b76c9cec 100644
+--- a/include/linux/nodemask.h
++++ b/include/linux/nodemask.h
+@@ -42,11 +42,11 @@
+ * void nodes_shift_right(dst, src, n) Shift right
+ * void nodes_shift_left(dst, src, n) Shift left
+ *
+- * int first_node(mask) Number lowest set bit, or MAX_NUMNODES
+- * int next_node(node, mask) Next node past 'node', or MAX_NUMNODES
+- * int next_node_in(node, mask) Next node past 'node', or wrap to first,
++ * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES
++ * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES
++ * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first,
+ * or MAX_NUMNODES
+- * int first_unset_node(mask) First node not set in mask, or
++ * unsigned int first_unset_node(mask) First node not set in mask, or
+ * MAX_NUMNODES
+ *
+ * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set
+@@ -153,7 +153,7 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
+
+ #define node_test_and_set(node, nodemask) \
+ __node_test_and_set((node), &(nodemask))
+-static inline int __node_test_and_set(int node, nodemask_t *addr)
++static inline bool __node_test_and_set(int node, nodemask_t *addr)
+ {
+ return test_and_set_bit(node, addr->bits);
+ }
+@@ -200,7 +200,7 @@ static inline void __nodes_complement(nodemask_t *dstp,
+
+ #define nodes_equal(src1, src2) \
+ __nodes_equal(&(src1), &(src2), MAX_NUMNODES)
+-static inline int __nodes_equal(const nodemask_t *src1p,
++static inline bool __nodes_equal(const nodemask_t *src1p,
+ const nodemask_t *src2p, unsigned int nbits)
+ {
+ return bitmap_equal(src1p->bits, src2p->bits, nbits);
+@@ -208,7 +208,7 @@ static inline int __nodes_equal(const nodemask_t *src1p,
+
+ #define nodes_intersects(src1, src2) \
+ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
+-static inline int __nodes_intersects(const nodemask_t *src1p,
++static inline bool __nodes_intersects(const nodemask_t *src1p,
+ const nodemask_t *src2p, unsigned int nbits)
+ {
+ return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+@@ -216,20 +216,20 @@ static inline int __nodes_intersects(const nodemask_t *src1p,
+
+ #define nodes_subset(src1, src2) \
+ __nodes_subset(&(src1), &(src2), MAX_NUMNODES)
+-static inline int __nodes_subset(const nodemask_t *src1p,
++static inline bool __nodes_subset(const nodemask_t *src1p,
+ const nodemask_t *src2p, unsigned int nbits)
+ {
+ return bitmap_subset(src1p->bits, src2p->bits, nbits);
+ }
+
+ #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
+-static inline int __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
++static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
+ {
+ return bitmap_empty(srcp->bits, nbits);
+ }
+
+ #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
+-static inline int __nodes_full(const nodemask_t *srcp, unsigned int nbits)
++static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
+ {
+ return bitmap_full(srcp->bits, nbits);
+ }
+@@ -260,15 +260,15 @@ static inline void __nodes_shift_left(nodemask_t *dstp,
+ > MAX_NUMNODES, then the silly min_ts could be dropped. */
+
+ #define first_node(src) __first_node(&(src))
+-static inline int __first_node(const nodemask_t *srcp)
++static inline unsigned int __first_node(const nodemask_t *srcp)
+ {
+- return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
++ return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
+ }
+
+ #define next_node(n, src) __next_node((n), &(src))
+-static inline int __next_node(int n, const nodemask_t *srcp)
++static inline unsigned int __next_node(int n, const nodemask_t *srcp)
+ {
+- return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
++ return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
+ }
+
+ /*
+@@ -276,7 +276,7 @@ static inline int __next_node(int n, const nodemask_t *srcp)
+ * the first node in src if needed. Returns MAX_NUMNODES if src is empty.
+ */
+ #define next_node_in(n, src) __next_node_in((n), &(src))
+-int __next_node_in(int node, const nodemask_t *srcp);
++unsigned int __next_node_in(int node, const nodemask_t *srcp);
+
+ static inline void init_nodemask_of_node(nodemask_t *mask, int node)
+ {
+@@ -296,9 +296,9 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node)
+ })
+
+ #define first_unset_node(mask) __first_unset_node(&(mask))
+-static inline int __first_unset_node(const nodemask_t *maskp)
++static inline unsigned int __first_unset_node(const nodemask_t *maskp)
+ {
+- return min_t(int,MAX_NUMNODES,
++ return min_t(unsigned int, MAX_NUMNODES,
+ find_first_zero_bit(maskp->bits, MAX_NUMNODES));
+ }
+
+@@ -375,14 +375,13 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
+ }
+
+ #if MAX_NUMNODES > 1
+-#define for_each_node_mask(node, mask) \
+- for ((node) = first_node(mask); \
+- (node) < MAX_NUMNODES; \
+- (node) = next_node((node), (mask)))
++#define for_each_node_mask(node, mask) \
++ for ((node) = first_node(mask); \
++ (node >= 0) && (node) < MAX_NUMNODES; \
++ (node) = next_node((node), (mask)))
+ #else /* MAX_NUMNODES == 1 */
+-#define for_each_node_mask(node, mask) \
+- if (!nodes_empty(mask)) \
+- for ((node) = 0; (node) < 1; (node)++)
++#define for_each_node_mask(node, mask) \
++ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++)
+ #endif /* MAX_NUMNODES */
+
+ /*
+@@ -436,11 +435,11 @@ static inline int num_node_state(enum node_states state)
+
+ #define first_online_node first_node(node_states[N_ONLINE])
+ #define first_memory_node first_node(node_states[N_MEMORY])
+-static inline int next_online_node(int nid)
++static inline unsigned int next_online_node(int nid)
+ {
+ return next_node(nid, node_states[N_ONLINE]);
+ }
+-static inline int next_memory_node(int nid)
++static inline unsigned int next_memory_node(int nid)
+ {
+ return next_node(nid, node_states[N_MEMORY]);
+ }
+diff --git a/include/linux/nospec.h b/include/linux/nospec.h
+index c1e79f72cd892..9f0af4f116d98 100644
+--- a/include/linux/nospec.h
++++ b/include/linux/nospec.h
+@@ -11,6 +11,10 @@
+
+ struct task_struct;
+
++#ifndef barrier_nospec
++# define barrier_nospec() do { } while (0)
++#endif
++
+ /**
+ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
+ * @index: array element index
+diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
+index 959e0bd9a913e..73364ae916890 100644
+--- a/include/linux/nvme-tcp.h
++++ b/include/linux/nvme-tcp.h
+@@ -114,8 +114,9 @@ struct nvme_tcp_icresp_pdu {
+ struct nvme_tcp_term_pdu {
+ struct nvme_tcp_hdr hdr;
+ __le16 fes;
+- __le32 fei;
+- __u8 rsvd[8];
++ __le16 feil;
++ __le16 feiu;
++ __u8 rsvd[10];
+ };
+
+ /**
+diff --git a/include/linux/nvme.h b/include/linux/nvme.h
+index b7c4c4130b65e..461ee0ee59fe4 100644
+--- a/include/linux/nvme.h
++++ b/include/linux/nvme.h
+@@ -7,6 +7,7 @@
+ #ifndef _LINUX_NVME_H
+ #define _LINUX_NVME_H
+
++#include <linux/bits.h>
+ #include <linux/types.h>
+ #include <linux/uuid.h>
+
+@@ -322,6 +323,7 @@ enum {
+ NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
+ NVME_CTRL_VWC_PRESENT = 1 << 0,
+ NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
++ NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3,
+ NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
+ NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
+ NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
+@@ -538,7 +540,7 @@ enum {
+ NVME_CMD_EFFECTS_NCC = 1 << 2,
+ NVME_CMD_EFFECTS_NIC = 1 << 3,
+ NVME_CMD_EFFECTS_CCC = 1 << 4,
+- NVME_CMD_EFFECTS_CSE_MASK = 3 << 16,
++ NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16),
+ NVME_CMD_EFFECTS_UUID_SEL = 1 << 19,
+ };
+
+@@ -611,6 +613,10 @@ enum {
+ NVME_AER_VS = 7,
+ };
+
++enum {
++ NVME_AER_ERROR_PERSIST_INT_ERR = 0x03,
++};
++
+ enum {
+ NVME_AER_NOTICE_NS_CHANGED = 0x00,
+ NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
+diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
+index 104505e9028f7..089597600e26c 100644
+--- a/include/linux/nvmem-provider.h
++++ b/include/linux/nvmem-provider.h
+@@ -66,7 +66,7 @@ struct nvmem_keepout {
+ * @word_size: Minimum read/write access granularity.
+ * @stride: Minimum read/write access stride.
+ * @priv: User context passed to read/write callbacks.
+- * @wp-gpio: Write protect pin
++ * @ignore_wp: Write Protect pin is managed by the provider.
+ *
+ * Note: A default "nvmem<id>" name will be assigned to the device if
+ * no name is specified in its configuration. In such case "<id>" is
+@@ -80,7 +80,6 @@ struct nvmem_config {
+ const char *name;
+ int id;
+ struct module *owner;
+- struct gpio_desc *wp_gpio;
+ const struct nvmem_cell_info *cells;
+ int ncells;
+ const struct nvmem_keepout *keepout;
+@@ -88,6 +87,7 @@ struct nvmem_config {
+ enum nvmem_type type;
+ bool read_only;
+ bool root_only;
++ bool ignore_wp;
+ struct device_node *of_node;
+ bool no_of_node;
+ nvmem_reg_read_t reg_read;
+diff --git a/include/linux/objtool.h b/include/linux/objtool.h
+index 7e72d975cb761..51f5b24af8342 100644
+--- a/include/linux/objtool.h
++++ b/include/linux/objtool.h
+@@ -32,11 +32,16 @@ struct unwind_hint {
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+ */
+ #define UNWIND_HINT_TYPE_CALL 0
+ #define UNWIND_HINT_TYPE_REGS 1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC 3
++#define UNWIND_HINT_TYPE_ENTRY 4
++#define UNWIND_HINT_TYPE_SAVE 5
++#define UNWIND_HINT_TYPE_RESTORE 6
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+@@ -66,6 +71,23 @@ struct unwind_hint {
+ static void __used __section(".discard.func_stack_frame_non_standard") \
+ *__func_stack_frame_non_standard_##func = func
+
++/*
++ * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore
++ * for the case where a function is intentionally missing frame pointer setup,
++ * but otherwise needs objtool/ORC coverage when frame pointers are disabled.
++ */
++#ifdef CONFIG_FRAME_POINTER
++#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func)
++#else
++#define STACK_FRAME_NON_STANDARD_FP(func)
++#endif
++
++#define ANNOTATE_NOENDBR \
++ "986: \n\t" \
++ ".pushsection .discard.noendbr\n\t" \
++ _ASM_PTR " 986b\n\t" \
++ ".popsection\n\t"
++
+ #else /* __ASSEMBLY__ */
+
+ /*
+@@ -99,7 +121,7 @@ struct unwind_hint {
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+@@ -118,6 +140,13 @@ struct unwind_hint {
+ .popsection
+ .endm
+
++.macro ANNOTATE_NOENDBR
++.Lhere_\@:
++ .pushsection .discard.noendbr
++ .quad .Lhere_\@
++ .popsection
++.endm
++
+ #endif /* __ASSEMBLY__ */
+
+ #else /* !CONFIG_STACK_VALIDATION */
+@@ -127,12 +156,16 @@ struct unwind_hint {
+ #define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "\n\t"
+ #define STACK_FRAME_NON_STANDARD(func)
++#define STACK_FRAME_NON_STANDARD_FP(func)
++#define ANNOTATE_NOENDBR
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ .macro STACK_FRAME_NON_STANDARD func:req
+ .endm
++.macro ANNOTATE_NOENDBR
++.endm
+ #endif
+
+ #endif /* CONFIG_STACK_VALIDATION */
+diff --git a/include/linux/of.h b/include/linux/of.h
+index 6f1c41f109bbe..6f15e8b0f9d10 100644
+--- a/include/linux/of.h
++++ b/include/linux/of.h
+@@ -574,7 +574,7 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
+ unsigned long initrd_len,
+ const char *cmdline, size_t extra_fdt_size);
+ int ima_get_kexec_buffer(void **addr, size_t *size);
+-int ima_free_kexec_buffer(void);
++int __init ima_free_kexec_buffer(void);
+ #else /* CONFIG_OF */
+
+ static inline void of_core_init(void)
+@@ -1486,12 +1486,26 @@ static inline bool of_device_is_system_power_controller(const struct device_node
+ */
+
+ enum of_overlay_notify_action {
+- OF_OVERLAY_PRE_APPLY = 0,
++ OF_OVERLAY_INIT = 0, /* kzalloc() of ovcs sets this value */
++ OF_OVERLAY_PRE_APPLY,
+ OF_OVERLAY_POST_APPLY,
+ OF_OVERLAY_PRE_REMOVE,
+ OF_OVERLAY_POST_REMOVE,
+ };
+
++static inline char *of_overlay_action_name(enum of_overlay_notify_action action)
++{
++ static char *of_overlay_action_name[] = {
++ "init",
++ "pre-apply",
++ "post-apply",
++ "pre-remove",
++ "post-remove",
++ };
++
++ return of_overlay_action_name[action];
++}
++
+ struct of_overlay_notify_data {
+ struct device_node *overlay;
+ struct device_node *target;
+diff --git a/include/linux/of_device.h b/include/linux/of_device.h
+index 1d7992a02e36e..1a803e4335d30 100644
+--- a/include/linux/of_device.h
++++ b/include/linux/of_device.h
+@@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
+ }
+
+ static inline int of_dma_configure_id(struct device *dev,
+- struct device_node *np,
+- bool force_dma)
++ struct device_node *np,
++ bool force_dma,
++ const u32 *id)
+ {
+ return 0;
+ }
+diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
+index cf6a65b94d40e..6508b97dbf1d2 100644
+--- a/include/linux/of_fdt.h
++++ b/include/linux/of_fdt.h
+@@ -62,6 +62,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
+ int depth, void *data);
+ extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
+ int depth, void *data);
++extern void early_init_dt_check_for_usable_mem_range(void);
+ extern int early_init_dt_scan_chosen_stdout(void);
+ extern void early_init_fdt_scan_reserved_mem(void);
+ extern void early_init_fdt_reserve_self(void);
+@@ -87,6 +88,7 @@ extern void unflatten_and_copy_device_tree(void);
+ extern void early_init_devtree(void *);
+ extern void early_get_first_memblock_info(void *, phys_addr_t *);
+ #else /* CONFIG_OF_EARLY_FLATTREE */
++static inline void early_init_dt_check_for_usable_mem_range(void) {}
+ static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; }
+ static inline void early_init_fdt_scan_reserved_mem(void) {}
+ static inline void early_init_fdt_reserve_self(void) {}
+diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
+index da633d34ab866..8a52ef2e6fa6b 100644
+--- a/include/linux/of_mdio.h
++++ b/include/linux/of_mdio.h
+@@ -14,9 +14,25 @@
+
+ #if IS_ENABLED(CONFIG_OF_MDIO)
+ bool of_mdiobus_child_is_phy(struct device_node *child);
+-int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);
+-int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
+- struct device_node *np);
++int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np,
++ struct module *owner);
++
++static inline int of_mdiobus_register(struct mii_bus *mdio,
++ struct device_node *np)
++{
++ return __of_mdiobus_register(mdio, np, THIS_MODULE);
++}
++
++int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
++ struct device_node *np, struct module *owner);
++
++static inline int devm_of_mdiobus_register(struct device *dev,
++ struct mii_bus *mdio,
++ struct device_node *np)
++{
++ return __devm_of_mdiobus_register(dev, mdio, np, THIS_MODULE);
++}
++
+ struct mdio_device *of_mdio_find_device(struct device_node *np);
+ struct phy_device *of_phy_find_device(struct device_node *phy_np);
+ struct phy_device *
+diff --git a/include/linux/of_net.h b/include/linux/of_net.h
+index daef3b0d9270d..0484b613ca647 100644
+--- a/include/linux/of_net.h
++++ b/include/linux/of_net.h
+@@ -8,12 +8,13 @@
+
+ #include <linux/phy.h>
+
+-#ifdef CONFIG_OF_NET
++#if defined(CONFIG_OF) && defined(CONFIG_NET)
+ #include <linux/of.h>
+
+ struct net_device;
+ extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface);
+ extern int of_get_mac_address(struct device_node *np, u8 *mac);
++int of_get_ethdev_address(struct device_node *np, struct net_device *dev);
+ extern struct net_device *of_find_net_device_by_node(struct device_node *np);
+ #else
+ static inline int of_get_phy_mode(struct device_node *np,
+@@ -27,6 +28,11 @@ static inline int of_get_mac_address(struct device_node *np, u8 *mac)
+ return -ENODEV;
+ }
+
++static inline int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
++{
++ return -ENODEV;
++}
++
+ static inline struct net_device *of_find_net_device_by_node(struct device_node *np)
+ {
+ return NULL;
+diff --git a/include/linux/once.h b/include/linux/once.h
+index d361fb14ac3a2..1528625087b69 100644
+--- a/include/linux/once.h
++++ b/include/linux/once.h
+@@ -5,10 +5,18 @@
+ #include <linux/types.h>
+ #include <linux/jump_label.h>
+
++/* Helpers used from arbitrary contexts.
++ * Hard irqs are blocked, be cautious.
++ */
+ bool __do_once_start(bool *done, unsigned long *flags);
+ void __do_once_done(bool *done, struct static_key_true *once_key,
+ unsigned long *flags, struct module *mod);
+
++/* Variant for process contexts only. */
++bool __do_once_slow_start(bool *done);
++void __do_once_slow_done(bool *done, struct static_key_true *once_key,
++ struct module *mod);
++
+ /* Call a function exactly once. The idea of DO_ONCE() is to perform
+ * a function call such as initialization of random seeds, etc, only
+ * once, where DO_ONCE() can live in the fast-path. After @func has
+@@ -52,9 +60,29 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
+ ___ret; \
+ })
+
++/* Variant of DO_ONCE() for process/sleepable contexts. */
++#define DO_ONCE_SLOW(func, ...) \
++ ({ \
++ bool ___ret = false; \
++ static bool __section(".data.once") ___done = false; \
++ static DEFINE_STATIC_KEY_TRUE(___once_key); \
++ if (static_branch_unlikely(&___once_key)) { \
++ ___ret = __do_once_slow_start(&___done); \
++ if (unlikely(___ret)) { \
++ func(__VA_ARGS__); \
++ __do_once_slow_done(&___done, &___once_key, \
++ THIS_MODULE); \
++ } \
++ } \
++ ___ret; \
++ })
++
+ #define get_random_once(buf, nbytes) \
+ DO_ONCE(get_random_bytes, (buf), (nbytes))
+ #define get_random_once_wait(buf, nbytes) \
+ DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \
+
++#define get_random_slow_once(buf, nbytes) \
++ DO_ONCE_SLOW(get_random_bytes, (buf), (nbytes))
++
+ #endif /* _LINUX_ONCE_H */
+diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h
+index 861e606b820fa..b7bce4983638f 100644
+--- a/include/linux/once_lite.h
++++ b/include/linux/once_lite.h
+@@ -9,15 +9,27 @@
+ */
+ #define DO_ONCE_LITE(func, ...) \
+ DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__)
+-#define DO_ONCE_LITE_IF(condition, func, ...) \
++
++#define __ONCE_LITE_IF(condition) \
+ ({ \
+ static bool __section(".data.once") __already_done; \
+- bool __ret_do_once = !!(condition); \
++ bool __ret_cond = !!(condition); \
++ bool __ret_once = false; \
+ \
+- if (unlikely(__ret_do_once && !__already_done)) { \
++ if (unlikely(__ret_cond && !__already_done)) { \
+ __already_done = true; \
+- func(__VA_ARGS__); \
++ __ret_once = true; \
+ } \
++ unlikely(__ret_once); \
++ })
++
++#define DO_ONCE_LITE_IF(condition, func, ...) \
++ ({ \
++ bool __ret_do_once = !!(condition); \
++ \
++ if (__ONCE_LITE_IF(__ret_do_once)) \
++ func(__VA_ARGS__); \
++ \
+ unlikely(__ret_do_once); \
+ })
+
+diff --git a/include/linux/overflow.h b/include/linux/overflow.h
+index 4669632bd72bc..59d7228104d02 100644
+--- a/include/linux/overflow.h
++++ b/include/linux/overflow.h
+@@ -118,81 +118,94 @@ static inline bool __must_check __must_check_overflow(bool overflow)
+ }))
+
+ /**
+- * array_size() - Calculate size of 2-dimensional array.
+- *
+- * @a: dimension one
+- * @b: dimension two
++ * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
+ *
+- * Calculates size of 2-dimensional array: @a * @b.
++ * @factor1: first factor
++ * @factor2: second factor
+ *
+- * Returns: number of bytes needed to represent the array or SIZE_MAX on
+- * overflow.
++ * Returns: calculate @factor1 * @factor2, both promoted to size_t,
++ * with any overflow causing the return value to be SIZE_MAX. The
++ * lvalue must be size_t to avoid implicit type conversion.
+ */
+-static inline __must_check size_t array_size(size_t a, size_t b)
++static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
+ {
+ size_t bytes;
+
+- if (check_mul_overflow(a, b, &bytes))
++ if (check_mul_overflow(factor1, factor2, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+ }
+
+ /**
+- * array3_size() - Calculate size of 3-dimensional array.
++ * size_add() - Calculate size_t addition with saturation at SIZE_MAX
+ *
+- * @a: dimension one
+- * @b: dimension two
+- * @c: dimension three
+- *
+- * Calculates size of 3-dimensional array: @a * @b * @c.
++ * @addend1: first addend
++ * @addend2: second addend
+ *
+- * Returns: number of bytes needed to represent the array or SIZE_MAX on
+- * overflow.
++ * Returns: calculate @addend1 + @addend2, both promoted to size_t,
++ * with any overflow causing the return value to be SIZE_MAX. The
++ * lvalue must be size_t to avoid implicit type conversion.
+ */
+-static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
++static inline size_t __must_check size_add(size_t addend1, size_t addend2)
+ {
+ size_t bytes;
+
+- if (check_mul_overflow(a, b, &bytes))
+- return SIZE_MAX;
+- if (check_mul_overflow(bytes, c, &bytes))
++ if (check_add_overflow(addend1, addend2, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+ }
+
+-/*
+- * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
+- * struct_size() below.
++/**
++ * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
++ *
++ * @minuend: value to subtract from
++ * @subtrahend: value to subtract from @minuend
++ *
++ * Returns: calculate @minuend - @subtrahend, both promoted to size_t,
++ * with any overflow causing the return value to be SIZE_MAX. For
++ * composition with the size_add() and size_mul() helpers, neither
++ * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
++ * The lvalue must be size_t to avoid implicit type conversion.
+ */
+-static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
++static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
+ {
+ size_t bytes;
+
+- if (check_mul_overflow(a, b, &bytes))
+- return SIZE_MAX;
+- if (check_add_overflow(bytes, c, &bytes))
++ if (minuend == SIZE_MAX || subtrahend == SIZE_MAX ||
++ check_sub_overflow(minuend, subtrahend, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+ }
+
+ /**
+- * struct_size() - Calculate size of structure with trailing array.
+- * @p: Pointer to the structure.
+- * @member: Name of the array member.
+- * @count: Number of elements in the array.
++ * array_size() - Calculate size of 2-dimensional array.
+ *
+- * Calculates size of memory needed for structure @p followed by an
+- * array of @count number of @member elements.
++ * @a: dimension one
++ * @b: dimension two
+ *
+- * Return: number of bytes needed or SIZE_MAX on overflow.
++ * Calculates size of 2-dimensional array: @a * @b.
++ *
++ * Returns: number of bytes needed to represent the array or SIZE_MAX on
++ * overflow.
+ */
+-#define struct_size(p, member, count) \
+- __ab_c_size(count, \
+- sizeof(*(p)->member) + __must_be_array((p)->member),\
+- sizeof(*(p)))
++#define array_size(a, b) size_mul(a, b)
++
++/**
++ * array3_size() - Calculate size of 3-dimensional array.
++ *
++ * @a: dimension one
++ * @b: dimension two
++ * @c: dimension three
++ *
++ * Calculates size of 3-dimensional array: @a * @b * @c.
++ *
++ * Returns: number of bytes needed to represent the array or SIZE_MAX on
++ * overflow.
++ */
++#define array3_size(a, b, c) size_mul(size_mul(a, b), c)
+
+ /**
+ * flex_array_size() - Calculate size of a flexible array member
+@@ -208,7 +221,22 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+ #define flex_array_size(p, member, count) \
+- array_size(count, \
+- sizeof(*(p)->member) + __must_be_array((p)->member))
++ size_mul(count, \
++ sizeof(*(p)->member) + __must_be_array((p)->member))
++
++/**
++ * struct_size() - Calculate size of structure with trailing flexible array.
++ *
++ * @p: Pointer to the structure.
++ * @member: Name of the array member.
++ * @count: Number of elements in the array.
++ *
++ * Calculates size of memory needed for structure @p followed by an
++ * array of @count number of @member elements.
++ *
++ * Return: number of bytes needed or SIZE_MAX on overflow.
++ */
++#define struct_size(p, member, count) \
++ size_add(sizeof(*(p)), flex_array_size(p, member, count))
+
+ #endif /* __LINUX_OVERFLOW_H */
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index 62db6b0176b95..2f7dd14083d94 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -733,61 +733,11 @@ int wait_on_page_private_2_killable(struct page *page);
+ extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
+
+ /*
+- * Fault everything in given userspace address range in.
++ * Fault in userspace address range.
+ */
+-static inline int fault_in_pages_writeable(char __user *uaddr, size_t size)
+-{
+- char __user *end = uaddr + size - 1;
+-
+- if (unlikely(size == 0))
+- return 0;
+-
+- if (unlikely(uaddr > end))
+- return -EFAULT;
+- /*
+- * Writing zeroes into userspace here is OK, because we know that if
+- * the zero gets there, we'll be overwriting it.
+- */
+- do {
+- if (unlikely(__put_user(0, uaddr) != 0))
+- return -EFAULT;
+- uaddr += PAGE_SIZE;
+- } while (uaddr <= end);
+-
+- /* Check whether the range spilled into the next page. */
+- if (((unsigned long)uaddr & PAGE_MASK) ==
+- ((unsigned long)end & PAGE_MASK))
+- return __put_user(0, end);
+-
+- return 0;
+-}
+-
+-static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
+-{
+- volatile char c;
+- const char __user *end = uaddr + size - 1;
+-
+- if (unlikely(size == 0))
+- return 0;
+-
+- if (unlikely(uaddr > end))
+- return -EFAULT;
+-
+- do {
+- if (unlikely(__get_user(c, uaddr) != 0))
+- return -EFAULT;
+- uaddr += PAGE_SIZE;
+- } while (uaddr <= end);
+-
+- /* Check whether the range spilled into the next page. */
+- if (((unsigned long)uaddr & PAGE_MASK) ==
+- ((unsigned long)end & PAGE_MASK)) {
+- return __get_user(c, end);
+- }
+-
+- (void)c;
+- return 0;
+-}
++size_t fault_in_writeable(char __user *uaddr, size_t size);
++size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
++size_t fault_in_readable(const char __user *uaddr, size_t size);
+
+ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
+ pgoff_t index, gfp_t gfp_mask);
+diff --git a/include/linux/panic.h b/include/linux/panic.h
+index f5844908a089e..8eb5897c164fc 100644
+--- a/include/linux/panic.h
++++ b/include/linux/panic.h
+@@ -11,16 +11,11 @@ extern long (*panic_blink)(int state);
+ __printf(1, 2)
+ void panic(const char *fmt, ...) __noreturn __cold;
+ void nmi_panic(struct pt_regs *regs, const char *msg);
++void check_panic_on_warn(const char *origin);
+ extern void oops_enter(void);
+ extern void oops_exit(void);
+ extern bool oops_may_print(void);
+
+-#ifdef CONFIG_SMP
+-extern unsigned int sysctl_oops_all_cpu_backtrace;
+-#else
+-#define sysctl_oops_all_cpu_backtrace 0
+-#endif /* CONFIG_SMP */
+-
+ extern int panic_timeout;
+ extern unsigned long panic_print;
+ extern int panic_on_oops;
+diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
+index d2558121d48c0..6f7949b2fd8dc 100644
+--- a/include/linux/part_stat.h
++++ b/include/linux/part_stat.h
+@@ -3,6 +3,7 @@
+ #define _LINUX_PART_STAT_H
+
+ #include <linux/genhd.h>
++#include <asm/local.h>
+
+ struct disk_stats {
+ u64 nsecs[NR_STAT_GROUPS];
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index cd8aa6fce2041..99dfb8c1993a6 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -233,6 +233,8 @@ enum pci_dev_flags {
+ PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+ /* Don't use Relaxed Ordering for TLPs directed at this device */
+ PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11),
++ /* Device does honor MSI masking despite saying otherwise */
++ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12),
+ };
+
+ enum pci_irq_reroute_variant {
+@@ -555,6 +557,7 @@ struct pci_host_bridge {
+ void *release_data;
+ unsigned int ignore_reset_delay:1; /* For entire hierarchy */
+ unsigned int no_ext_tags:1; /* No Extended Tags */
++ unsigned int no_inc_mrrs:1; /* No Increase MRRS */
+ unsigned int native_aer:1; /* OS may use PCIe AER */
+ unsigned int native_pcie_hotplug:1; /* OS may use PCIe hotplug */
+ unsigned int native_shpc_hotplug:1; /* OS may use SHPC hotplug */
+@@ -654,6 +657,7 @@ struct pci_bus {
+ struct bin_attribute *legacy_io; /* Legacy I/O for this bus */
+ struct bin_attribute *legacy_mem; /* Legacy mem */
+ unsigned int is_added:1;
++ unsigned int unsafe_warn:1; /* warned about RW1C config write */
+ };
+
+ #define to_pci_bus(n) container_of(n, struct pci_bus, dev)
+@@ -1386,6 +1390,7 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
+ unsigned int flags);
+ struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
+ void pci_bus_remove_resources(struct pci_bus *bus);
++void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res);
+ int devm_request_pci_bus_resources(struct device *dev,
+ struct list_head *resources);
+
+@@ -1782,6 +1787,7 @@ static inline struct pci_dev *pci_get_class(unsigned int class,
+ #define pci_dev_put(dev) do { } while (0)
+
+ static inline void pci_set_master(struct pci_dev *dev) { }
++static inline void pci_clear_master(struct pci_dev *dev) { }
+ static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
+ static inline void pci_disable_device(struct pci_dev *dev) { }
+ static inline int pcim_enable_device(struct pci_dev *pdev) { return -EIO; }
+diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
+index 011f2f1ea5bb5..8a1e264735668 100644
+--- a/include/linux/pci_ids.h
++++ b/include/linux/pci_ids.h
+@@ -60,6 +60,8 @@
+ #define PCI_CLASS_BRIDGE_EISA 0x0602
+ #define PCI_CLASS_BRIDGE_MC 0x0603
+ #define PCI_CLASS_BRIDGE_PCI 0x0604
++#define PCI_CLASS_BRIDGE_PCI_NORMAL 0x060400
++#define PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE 0x060401
+ #define PCI_CLASS_BRIDGE_PCMCIA 0x0605
+ #define PCI_CLASS_BRIDGE_NUBUS 0x0606
+ #define PCI_CLASS_BRIDGE_CARDBUS 0x0607
+@@ -3010,6 +3012,8 @@
+ #define PCI_DEVICE_ID_INTEL_VMD_9A0B 0x9a0b
+ #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152
+
++#define PCI_VENDOR_ID_WANGXUN 0x8088
++
+ #define PCI_VENDOR_ID_SCALEMP 0x8686
+ #define PCI_DEVICE_ID_SCALEMP_VSMP_CTL 0x1010
+
+@@ -3090,6 +3094,8 @@
+
+ #define PCI_VENDOR_ID_3COM_2 0xa727
+
++#define PCI_VENDOR_ID_SOLIDRUN 0xd063
++
+ #define PCI_VENDOR_ID_DIGIUM 0xd161
+ #define PCI_DEVICE_ID_DIGIUM_HFC4S 0xb410
+
+diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
+index 9b60bb89d86ab..014eb0a963fcb 100644
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -680,18 +680,6 @@ struct perf_event {
+ u64 total_time_running;
+ u64 tstamp;
+
+- /*
+- * timestamp shadows the actual context timing but it can
+- * be safely used in NMI interrupt context. It reflects the
+- * context time as it was when the event was last scheduled in,
+- * or when ctx_sched_in failed to schedule the event because we
+- * run out of PMC.
+- *
+- * ctx_time already accounts for ctx->timestamp. Therefore to
+- * compute ctx_time for a sample, simply add perf_clock().
+- */
+- u64 shadow_ctx_time;
+-
+ struct perf_event_attr attr;
+ u16 header_size;
+ u16 id_header_size;
+@@ -735,11 +723,14 @@ struct perf_event {
+ struct fasync_struct *fasync;
+
+ /* delayed work for NMIs and such */
+- int pending_wakeup;
+- int pending_kill;
+- int pending_disable;
++ unsigned int pending_wakeup;
++ unsigned int pending_kill;
++ unsigned int pending_disable;
++ unsigned int pending_sigtrap;
+ unsigned long pending_addr; /* SIGTRAP */
+- struct irq_work pending;
++ struct irq_work pending_irq;
++ struct callback_head pending_task;
++ unsigned int pending_work;
+
+ atomic_t event_limit;
+
+@@ -838,6 +829,7 @@ struct perf_event_context {
+ */
+ u64 time;
+ u64 timestamp;
++ u64 timeoffset;
+
+ /*
+ * These fields let us detect when two contexts have both
+@@ -852,6 +844,14 @@ struct perf_event_context {
+ #endif
+ void *task_ctx_data; /* pmu specific data */
+ struct rcu_head rcu_head;
++
++ /*
++ * Sum (event->pending_sigtrap + event->pending_work)
++ *
++ * The SIGTRAP is targeted at ctx->task, as such it won't do changing
++ * that until the signal is delivered.
++ */
++ local_t nr_pending;
+ };
+
+ /*
+@@ -920,6 +920,8 @@ struct bpf_perf_event_data_kern {
+ struct perf_cgroup_info {
+ u64 time;
+ u64 timestamp;
++ u64 timeoffset;
++ int active;
+ };
+
+ struct perf_cgroup {
+@@ -1239,7 +1241,18 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags);
+
+-extern struct perf_guest_info_callbacks *perf_guest_cbs;
++extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
++static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
++{
++ /*
++ * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading
++ * the callbacks between a !NULL check and dereferences, to ensure
++ * pending stores/changes to the callback pointers are visible before a
++ * non-NULL perf_guest_cbs is visible to readers, and to prevent a
++ * module from unloading callbacks while readers are active.
++ */
++ return rcu_dereference(perf_guest_cbs);
++}
+ extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
+ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
+
+diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
+index e24d2c992b112..d468efcf48f45 100644
+--- a/include/linux/pgtable.h
++++ b/include/linux/pgtable.h
+@@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address)
+ {
+ return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ }
++#define pte_index pte_index
+
+ #ifndef pmd_index
+ static inline unsigned long pmd_index(unsigned long address)
+diff --git a/include/linux/phy.h b/include/linux/phy.h
+index 736e1d1a47c40..946ccec178588 100644
+--- a/include/linux/phy.h
++++ b/include/linux/phy.h
+@@ -536,6 +536,10 @@ struct macsec_ops;
+ * @mdix: Current crossover
+ * @mdix_ctrl: User setting of crossover
+ * @interrupts: Flag interrupts have been enabled
++ * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt
++ * handling shall be postponed until PHY has resumed
++ * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended,
++ * requiring a rerun of the interrupt handler after resume
+ * @interface: enum phy_interface_t value
+ * @skb: Netlink message for cable diagnostics
+ * @nest: Netlink nest used for cable diagnostics
+@@ -590,6 +594,8 @@ struct phy_device {
+
+ /* Interrupts are enabled */
+ unsigned interrupts:1;
++ unsigned irq_suspended:1;
++ unsigned irq_rerun:1;
+
+ enum phy_state state;
+
+diff --git a/include/linux/phylink.h b/include/linux/phylink.h
+index 237291196ce28..b306159c1fada 100644
+--- a/include/linux/phylink.h
++++ b/include/linux/phylink.h
+@@ -64,6 +64,7 @@ enum phylink_op_type {
+ * @pcs_poll: MAC PCS cannot provide link change interrupt
+ * @poll_fixed_state: if true, starts link_poll,
+ * if MAC link is at %MLO_AN_FIXED mode.
++ * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM.
+ * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
+ * @get_fixed_state: callback to execute to determine the fixed link state,
+ * if MAC link is at %MLO_AN_FIXED mode.
+@@ -73,6 +74,7 @@ struct phylink_config {
+ enum phylink_op_type type;
+ bool pcs_poll;
+ bool poll_fixed_state;
++ bool mac_managed_pm;
+ bool ovr_an_inband;
+ void (*get_fixed_state)(struct phylink_config *config,
+ struct phylink_link_state *state);
+diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
+index fc5642431b923..d15190b3e0326 100644
+--- a/include/linux/pipe_fs_i.h
++++ b/include/linux/pipe_fs_i.h
+@@ -71,7 +71,7 @@ struct pipe_inode_info {
+ unsigned int files;
+ unsigned int r_counter;
+ unsigned int w_counter;
+- unsigned int poll_usage;
++ bool poll_usage;
+ struct page *tmp_page;
+ struct fasync_struct *fasync_readers;
+ struct fasync_struct *fasync_writers;
+@@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
+ return buf->ops->try_steal(pipe, buf);
+ }
+
++static inline void pipe_discard_from(struct pipe_inode_info *pipe,
++ unsigned int old_head)
++{
++ unsigned int mask = pipe->ring_size - 1;
++
++ while (pipe->head > old_head)
++ pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]);
++}
++
+ /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
+ memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
+ #define PIPE_SIZE PAGE_SIZE
+@@ -256,18 +265,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
+
+ extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
+
+-#ifdef CONFIG_WATCH_QUEUE
+ unsigned long account_pipe_buffers(struct user_struct *user,
+ unsigned long old, unsigned long new);
+ bool too_many_pipe_buffers_soft(unsigned long user_bufs);
+ bool too_many_pipe_buffers_hard(unsigned long user_bufs);
+ bool pipe_is_unprivileged_user(void);
+-#endif
+
+ /* for F_SETPIPE_SZ and F_GETPIPE_SZ */
+-#ifdef CONFIG_WATCH_QUEUE
+ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
+-#endif
+ long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
+ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
+
+diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
+index 02599687770c5..7f03e02c48cd4 100644
+--- a/include/linux/platform_data/cros_ec_proto.h
++++ b/include/linux/platform_data/cros_ec_proto.h
+@@ -216,6 +216,9 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
+ int cros_ec_check_result(struct cros_ec_device *ec_dev,
+ struct cros_ec_command *msg);
+
++int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
++ struct cros_ec_command *msg);
++
+ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
+ struct cros_ec_command *msg);
+
+diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
+index 9837fb011f2fb..989aa30c598dc 100644
+--- a/include/linux/platform_data/ti-sysc.h
++++ b/include/linux/platform_data/ti-sysc.h
+@@ -50,6 +50,7 @@ struct sysc_regbits {
+ s8 emufree_shift;
+ };
+
++#define SYSC_QUIRK_REINIT_ON_CTX_LOST BIT(28)
+ #define SYSC_QUIRK_REINIT_ON_RESUME BIT(27)
+ #define SYSC_QUIRK_GPMC_DEBUG BIT(26)
+ #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25)
+diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/intel-spi.h
+index 7f53a5c6f35e8..7dda3f6904654 100644
+--- a/include/linux/platform_data/x86/intel-spi.h
++++ b/include/linux/platform_data/x86/intel-spi.h
+@@ -19,11 +19,13 @@ enum intel_spi_type {
+ /**
+ * struct intel_spi_boardinfo - Board specific data for Intel SPI driver
+ * @type: Type which this controller is compatible with
+- * @writeable: The chip is writeable
++ * @set_writeable: Try to make the chip writeable (optional)
++ * @data: Data to be passed to @set_writeable can be %NULL
+ */
+ struct intel_spi_boardinfo {
+ enum intel_spi_type type;
+- bool writeable;
++ bool (*set_writeable)(void __iomem *base, void *data);
++ void *data;
+ };
+
+ #endif /* INTEL_SPI_PDATA_H */
+diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h
+index 022bcea9edec5..99a9b09dc839d 100644
+--- a/include/linux/platform_data/x86/pmc_atom.h
++++ b/include/linux/platform_data/x86/pmc_atom.h
+@@ -7,6 +7,8 @@
+ #ifndef PMC_ATOM_H
+ #define PMC_ATOM_H
+
++#include <linux/bits.h>
++
+ /* ValleyView Power Control Unit PCI Device ID */
+ #define PCI_DEVICE_ID_VLV_PMC 0x0F1C
+ /* CherryTrail Power Control Unit PCI Device ID */
+@@ -139,9 +141,9 @@
+ #define ACPI_MMIO_REG_LEN 0x100
+
+ #define PM1_CNT 0x4
+-#define SLEEP_TYPE_MASK 0xFFFFECFF
++#define SLEEP_TYPE_MASK GENMASK(12, 10)
+ #define SLEEP_TYPE_S5 0x1C00
+-#define SLEEP_ENABLE 0x2000
++#define SLEEP_ENABLE BIT(13)
+
+ extern int pmc_atom_read(int offset, u32 *value);
+ extern int pmc_atom_write(int offset, u32 value);
+diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
+index 7c96f169d2740..8aefdc0099c86 100644
+--- a/include/linux/platform_device.h
++++ b/include/linux/platform_device.h
+@@ -203,7 +203,18 @@ extern void platform_device_put(struct platform_device *pdev);
+
+ struct platform_driver {
+ int (*probe)(struct platform_device *);
++
++ /*
++ * Traditionally the remove callback returned an int which however is
++ * ignored by the driver core. This led to wrong expectations by driver
++ * authors who thought returning an error code was a valid error
++ * handling strategy. To convert to a callback returning void, new
++ * drivers should implement .remove_new() until the conversion it done
++ * that eventually makes .remove() return void.
++ */
+ int (*remove)(struct platform_device *);
++ void (*remove_new)(struct platform_device *);
++
+ void (*shutdown)(struct platform_device *);
+ int (*suspend)(struct platform_device *, pm_message_t state);
+ int (*resume)(struct platform_device *);
+diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
+index 222da43b7096d..90eaff8b78fc9 100644
+--- a/include/linux/pm_runtime.h
++++ b/include/linux/pm_runtime.h
+@@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev);
+ extern void pm_runtime_put_suppliers(struct device *dev);
+ extern void pm_runtime_new_link(struct device *dev);
+ extern void pm_runtime_drop_link(struct device_link *link);
++extern void pm_runtime_release_supplier(struct device_link *link);
+
+ extern int devm_pm_runtime_enable(struct device *dev);
+
+@@ -129,7 +130,7 @@ static inline bool pm_runtime_suspended(struct device *dev)
+ * pm_runtime_active - Check whether or not a device is runtime-active.
+ * @dev: Target device.
+ *
+- * Return %true if runtime PM is enabled for @dev and its runtime PM status is
++ * Return %true if runtime PM is disabled for @dev or its runtime PM status is
+ * %RPM_ACTIVE, or %false otherwise.
+ *
+ * Note that the return value of this function can only be trusted if it is
+@@ -283,6 +284,7 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {}
+ static inline void pm_runtime_put_suppliers(struct device *dev) {}
+ static inline void pm_runtime_new_link(struct device *dev) {}
+ static inline void pm_runtime_drop_link(struct device_link *link) {}
++static inline void pm_runtime_release_supplier(struct device_link *link) {}
+
+ #endif /* !CONFIG_PM */
+
+diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
+index cd5b62db90845..e63a63aa47a37 100644
+--- a/include/linux/pm_wakeirq.h
++++ b/include/linux/pm_wakeirq.h
+@@ -17,8 +17,8 @@
+ #ifdef CONFIG_PM
+
+ extern int dev_pm_set_wake_irq(struct device *dev, int irq);
+-extern int dev_pm_set_dedicated_wake_irq(struct device *dev,
+- int irq);
++extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq);
++extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq);
+ extern void dev_pm_clear_wake_irq(struct device *dev);
+ extern void dev_pm_enable_wake_irq(struct device *dev);
+ extern void dev_pm_disable_wake_irq(struct device *dev);
+@@ -35,6 +35,11 @@ static inline int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
+ return 0;
+ }
+
++static inline int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
++{
++ return 0;
++}
++
+ static inline void dev_pm_clear_wake_irq(struct device *dev)
+ {
+ }
+diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
+index 00fef0064355f..65d5759be1562 100644
+--- a/include/linux/posix-timers.h
++++ b/include/linux/posix-timers.h
+@@ -4,6 +4,7 @@
+
+ #include <linux/spinlock.h>
+ #include <linux/list.h>
++#include <linux/mutex.h>
+ #include <linux/alarmtimer.h>
+ #include <linux/timerqueue.h>
+ #include <linux/task_work.h>
+@@ -63,16 +64,18 @@ static inline int clockid_to_fd(const clockid_t clk)
+ * cpu_timer - Posix CPU timer representation for k_itimer
+ * @node: timerqueue node to queue in the task/sig
+ * @head: timerqueue head on which this timer is queued
+- * @task: Pointer to target task
++ * @pid: Pointer to target task PID
+ * @elist: List head for the expiry list
+ * @firing: Timer is currently firing
++ * @handling: Pointer to the task which handles expiry
+ */
+ struct cpu_timer {
+- struct timerqueue_node node;
+- struct timerqueue_head *head;
+- struct pid *pid;
+- struct list_head elist;
+- int firing;
++ struct timerqueue_node node;
++ struct timerqueue_head *head;
++ struct pid *pid;
++ struct list_head elist;
++ int firing;
++ struct task_struct __rcu *handling;
+ };
+
+ static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
+@@ -136,10 +139,12 @@ struct posix_cputimers {
+ /**
+ * posix_cputimers_work - Container for task work based posix CPU timer expiry
+ * @work: The task work to be scheduled
++ * @mutex: Mutex held around expiry in context of this task work
+ * @scheduled: @work has been scheduled already, no further processing
+ */
+ struct posix_cputimers_work {
+ struct callback_head work;
++ struct mutex mutex;
+ unsigned int scheduled;
+ };
+
+@@ -184,8 +189,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
+ #endif
+
+ #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
++void clear_posix_cputimers_work(struct task_struct *p);
+ void posix_cputimers_init_work(void);
+ #else
++static inline void clear_posix_cputimers_work(struct task_struct *p) { }
+ static inline void posix_cputimers_init_work(void) { }
+ #endif
+
+diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
+index 060e8d2031814..1766e1de69560 100644
+--- a/include/linux/posix_acl_xattr.h
++++ b/include/linux/posix_acl_xattr.h
+@@ -34,15 +34,19 @@ posix_acl_xattr_count(size_t size)
+
+ #ifdef CONFIG_FS_POSIX_ACL
+ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
++ struct inode *inode,
+ void *value, size_t size);
+ void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
++ struct inode *inode,
+ void *value, size_t size);
+ #else
+ static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
++ struct inode *inode,
+ void *value, size_t size)
+ {
+ }
+ static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
++ struct inode *inode,
+ void *value, size_t size)
+ {
+ }
+diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
+index a1aa68141d0b5..7c8d65414a70a 100644
+--- a/include/linux/power/bq27xxx_battery.h
++++ b/include/linux/power/bq27xxx_battery.h
+@@ -2,6 +2,8 @@
+ #ifndef __LINUX_BQ27X00_BATTERY_H__
+ #define __LINUX_BQ27X00_BATTERY_H__
+
++#include <linux/power_supply.h>
++
+ enum bq27xxx_chip {
+ BQ27000 = 1, /* bq27000, bq27200 */
+ BQ27010, /* bq27010, bq27210 */
+@@ -68,7 +70,9 @@ struct bq27xxx_device_info {
+ struct bq27xxx_access_methods bus;
+ struct bq27xxx_reg_cache cache;
+ int charge_design_full;
++ bool removed;
+ unsigned long last_update;
++ union power_supply_propval last_status;
+ struct delayed_work work;
+ struct power_supply *bat;
+ struct list_head list;
+diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
+index 9ca1f120a2117..0735b8963e0af 100644
+--- a/include/linux/power_supply.h
++++ b/include/linux/power_supply.h
+@@ -420,8 +420,9 @@ power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table
+ int table_len, int temp);
+ extern void power_supply_changed(struct power_supply *psy);
+ extern int power_supply_am_i_supplied(struct power_supply *psy);
+-extern int power_supply_set_input_current_limit_from_supplier(
+- struct power_supply *psy);
++int power_supply_get_property_from_supplier(struct power_supply *psy,
++ enum power_supply_property psp,
++ union power_supply_propval *val);
+ extern int power_supply_set_battery_charged(struct power_supply *psy);
+
+ #ifdef CONFIG_POWER_SUPPLY
+diff --git a/include/linux/prandom.h b/include/linux/prandom.h
+index 056d31317e499..a4aadd2dc153e 100644
+--- a/include/linux/prandom.h
++++ b/include/linux/prandom.h
+@@ -10,6 +10,7 @@
+
+ #include <linux/types.h>
+ #include <linux/percpu.h>
++#include <linux/siphash.h>
+
+ u32 prandom_u32(void);
+ void prandom_bytes(void *buf, size_t nbytes);
+@@ -27,15 +28,10 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise);
+ * The core SipHash round function. Each line can be executed in
+ * parallel given enough CPU resources.
+ */
+-#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+- v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \
+- v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \
+- v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \
+- v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \
+-)
++#define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3)
+
+-#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261)
+-#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573)
++#define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2)
++#define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3)
+
+ #elif BITS_PER_LONG == 32
+ /*
+@@ -43,14 +39,9 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise);
+ * This is weaker, but 32-bit machines are not used for high-traffic
+ * applications, so there is less output for an attacker to analyze.
+ */
+-#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+- v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \
+- v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \
+- v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \
+- v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \
+-)
+-#define PRND_K0 0x6c796765
+-#define PRND_K1 0x74656462
++#define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3)
++#define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2)
++#define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3)
+
+ #else
+ #error Unsupported BITS_PER_LONG
+diff --git a/include/linux/printk.h b/include/linux/printk.h
+index 85b656f82d752..9497f6b983399 100644
+--- a/include/linux/printk.h
++++ b/include/linux/printk.h
+@@ -198,6 +198,7 @@ void dump_stack_print_info(const char *log_lvl);
+ void show_regs_print_info(const char *log_lvl);
+ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
+ extern asmlinkage void dump_stack(void) __cold;
++void printk_trigger_flush(void);
+ #else
+ static inline __printf(1, 0)
+ int vprintk(const char *s, va_list args)
+@@ -274,6 +275,9 @@ static inline void dump_stack_lvl(const char *log_lvl)
+ static inline void dump_stack(void)
+ {
+ }
++static inline void printk_trigger_flush(void)
++{
++}
+ #endif
+
+ #ifdef CONFIG_SMP
+diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
+index 069c7fd953961..a2f25b26ae1ec 100644
+--- a/include/linux/proc_fs.h
++++ b/include/linux/proc_fs.h
+@@ -191,8 +191,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {}
+ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
+
+ #define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;})
++#define proc_create_net_data_write(name, mode, parent, ops, write, state_size, data) ({NULL;})
+ #define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
+ #define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
++#define proc_create_net_single_write(name, mode, parent, show, write, data) ({NULL;})
+
+ static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
+ {
+diff --git a/include/linux/psi.h b/include/linux/psi.h
+index 65eb1476ac705..57823b30c2d3d 100644
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -24,18 +24,17 @@ void psi_memstall_enter(unsigned long *flags);
+ void psi_memstall_leave(unsigned long *flags);
+
+ int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+-
+-#ifdef CONFIG_CGROUPS
+-int psi_cgroup_alloc(struct cgroup *cgrp);
+-void psi_cgroup_free(struct cgroup *cgrp);
+-void cgroup_move_task(struct task_struct *p, struct css_set *to);
+-
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ char *buf, size_t nbytes, enum psi_res res);
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
++void psi_trigger_destroy(struct psi_trigger *t);
+
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+ poll_table *wait);
++
++#ifdef CONFIG_CGROUPS
++int psi_cgroup_alloc(struct cgroup *cgrp);
++void psi_cgroup_free(struct cgroup *cgrp);
++void cgroup_move_task(struct task_struct *p, struct css_set *to);
+ #endif
+
+ #else /* CONFIG_PSI */
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 0a23300d49af7..6f190002a2022 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -21,7 +21,17 @@ enum psi_task_count {
+ * don't have to special case any state tracking for it.
+ */
+ NR_ONCPU,
+- NR_PSI_TASK_COUNTS = 4,
++ /*
++ * For IO and CPU stalls the presence of running/oncpu tasks
++ * in the domain means a partial rather than a full stall.
++ * For memory it's not so simple because of page reclaimers:
++ * they are running/oncpu while representing a stall. To tell
++ * whether a domain has productivity left or not, we need to
++ * distinguish between regular running (i.e. productive)
++ * threads and memstall ones.
++ */
++ NR_MEMSTALL_RUNNING,
++ NR_PSI_TASK_COUNTS = 5,
+ };
+
+ /* Task state bitmasks */
+@@ -29,6 +39,7 @@ enum psi_task_count {
+ #define TSK_MEMSTALL (1 << NR_MEMSTALL)
+ #define TSK_RUNNING (1 << NR_RUNNING)
+ #define TSK_ONCPU (1 << NR_ONCPU)
++#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
+
+ /* Resources that workloads could be stalled on */
+ enum psi_res {
+@@ -129,9 +140,6 @@ struct psi_trigger {
+ * events to one per window
+ */
+ u64 last_event_time;
+-
+- /* Refcounting to prevent premature destruction */
+- struct kref refcount;
+ };
+
+ struct psi_group {
+diff --git a/include/linux/pstore.h b/include/linux/pstore.h
+index eb93a54cff31f..e97a8188f0fd8 100644
+--- a/include/linux/pstore.h
++++ b/include/linux/pstore.h
+@@ -14,7 +14,7 @@
+ #include <linux/errno.h>
+ #include <linux/kmsg_dump.h>
+ #include <linux/mutex.h>
+-#include <linux/semaphore.h>
++#include <linux/spinlock.h>
+ #include <linux/time.h>
+ #include <linux/types.h>
+
+@@ -87,7 +87,7 @@ struct pstore_record {
+ * @owner: module which is responsible for this backend driver
+ * @name: name of the backend driver
+ *
+- * @buf_lock: semaphore to serialize access to @buf
++ * @buf_lock: spinlock to serialize access to @buf
+ * @buf: preallocated crash dump buffer
+ * @bufsize: size of @buf available for crash dump bytes (must match
+ * smallest number of bytes available for writing to a
+@@ -178,7 +178,7 @@ struct pstore_info {
+ struct module *owner;
+ const char *name;
+
+- struct semaphore buf_lock;
++ spinlock_t buf_lock;
+ char *buf;
+ size_t bufsize;
+
+diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
+index ae04968a3a472..7a526b52bd748 100644
+--- a/include/linux/ptp_classify.h
++++ b/include/linux/ptp_classify.h
+@@ -42,6 +42,9 @@
+ #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */
+ #define OFF_PTP_SEQUENCE_ID 30
+
++/* PTP header flag fields */
++#define PTP_FLAG_TWOSTEP BIT(1)
++
+ /* Below defines should actually be removed at some point in time. */
+ #define IP6_HLEN 40
+ #define UDP_HLEN 8
+diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
+index 2e5565067355b..554454cb86931 100644
+--- a/include/linux/ptp_clock_kernel.h
++++ b/include/linux/ptp_clock_kernel.h
+@@ -351,15 +351,17 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index);
+ *
+ * @hwtstamps: skb_shared_hwtstamps structure pointer
+ * @vclock_index: phc index of ptp vclock.
++ *
++ * Returns converted timestamp, or 0 on error.
+ */
+-void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
+- int vclock_index);
++ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps,
++ int vclock_index);
+ #else
+ static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index)
+ { return 0; }
+-static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
+- int vclock_index)
+-{ }
++static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps,
++ int vclock_index)
++{ return 0; }
+
+ #endif
+
+diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
+index b5ebf6c012924..d695c43fd740d 100644
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
+
+ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */
+ #define PT_PTRACED 0x00000001
+-#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */
+
+ #define PT_OPT_FLAG_SHIFT 3
+ /* PT_TRACE_* event enable flags */
+@@ -47,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
+ #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+ #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
+
+-/* single stepping state bits (used on ARM and PA-RISC) */
+-#define PT_SINGLESTEP_BIT 31
+-#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT)
+-#define PT_BLOCKSTEP_BIT 30
+-#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT)
+-
+ extern long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data);
+ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
+diff --git a/include/linux/pwm.h b/include/linux/pwm.h
+index 725c9b784e609..c7bfa64aeb142 100644
+--- a/include/linux/pwm.h
++++ b/include/linux/pwm.h
+@@ -489,6 +489,11 @@ static inline int pwmchip_remove(struct pwm_chip *chip)
+ return -EINVAL;
+ }
+
++static inline int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip)
++{
++ return -EINVAL;
++}
++
+ static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
+ unsigned int index,
+ const char *label)
+diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
+index f34dbd0db7952..a84063492c71a 100644
+--- a/include/linux/qed/qed_chain.h
++++ b/include/linux/qed/qed_chain.h
+@@ -268,14 +268,15 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain)
+ }
+
+ /**
+- * @brief qed_chain_advance_page -
++ * qed_chain_advance_page(): Advance the next element across pages for a
++ * linked chain.
+ *
+- * Advance the next element across pages for a linked chain
++ * @p_chain: P_chain.
++ * @p_next_elem: P_next_elem.
++ * @idx_to_inc: Idx_to_inc.
++ * @page_to_inc: page_to_inc.
+ *
+- * @param p_chain
+- * @param p_next_elem
+- * @param idx_to_inc
+- * @param page_to_inc
++ * Return: Void.
+ */
+ static inline void
+ qed_chain_advance_page(struct qed_chain *p_chain,
+@@ -336,12 +337,14 @@ qed_chain_advance_page(struct qed_chain *p_chain,
+ } while (0)
+
+ /**
+- * @brief qed_chain_return_produced -
++ * qed_chain_return_produced(): A chain in which the driver "Produces"
++ * elements should use this API
++ * to indicate previous produced elements
++ * are now consumed.
+ *
+- * A chain in which the driver "Produces" elements should use this API
+- * to indicate previous produced elements are now consumed.
++ * @p_chain: Chain.
+ *
+- * @param p_chain
++ * Return: Void.
+ */
+ static inline void qed_chain_return_produced(struct qed_chain *p_chain)
+ {
+@@ -353,15 +356,15 @@ static inline void qed_chain_return_produced(struct qed_chain *p_chain)
+ }
+
+ /**
+- * @brief qed_chain_produce -
++ * qed_chain_produce(): A chain in which the driver "Produces"
++ * elements should use this to get a pointer to
++ * the next element which can be "Produced". It's driver
++ * responsibility to validate that the chain has room for
++ * new element.
+ *
+- * A chain in which the driver "Produces" elements should use this to get
+- * a pointer to the next element which can be "Produced". It's driver
+- * responsibility to validate that the chain has room for new element.
++ * @p_chain: Chain.
+ *
+- * @param p_chain
+- *
+- * @return void*, a pointer to next element
++ * Return: void*, a pointer to next element.
+ */
+ static inline void *qed_chain_produce(struct qed_chain *p_chain)
+ {
+@@ -395,14 +398,11 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
+ }
+
+ /**
+- * @brief qed_chain_get_capacity -
+- *
+- * Get the maximum number of BDs in chain
++ * qed_chain_get_capacity(): Get the maximum number of BDs in chain
+ *
+- * @param p_chain
+- * @param num
++ * @p_chain: Chain.
+ *
+- * @return number of unusable BDs
++ * Return: number of unusable BDs.
+ */
+ static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain)
+ {
+@@ -410,12 +410,14 @@ static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain)
+ }
+
+ /**
+- * @brief qed_chain_recycle_consumed -
++ * qed_chain_recycle_consumed(): Returns an element which was
++ * previously consumed;
++ * Increments producers so they could
++ * be written to FW.
+ *
+- * Returns an element which was previously consumed;
+- * Increments producers so they could be written to FW.
++ * @p_chain: Chain.
+ *
+- * @param p_chain
++ * Return: Void.
+ */
+ static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain)
+ {
+@@ -427,14 +429,13 @@ static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain)
+ }
+
+ /**
+- * @brief qed_chain_consume -
++ * qed_chain_consume(): A Chain in which the driver utilizes data written
++ * by a different source (i.e., FW) should use this to
++ * access passed buffers.
+ *
+- * A Chain in which the driver utilizes data written by a different source
+- * (i.e., FW) should use this to access passed buffers.
++ * @p_chain: Chain.
+ *
+- * @param p_chain
+- *
+- * @return void*, a pointer to the next buffer written
++ * Return: void*, a pointer to the next buffer written.
+ */
+ static inline void *qed_chain_consume(struct qed_chain *p_chain)
+ {
+@@ -468,9 +469,11 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
+ }
+
+ /**
+- * @brief qed_chain_reset - Resets the chain to its start state
++ * qed_chain_reset(): Resets the chain to its start state.
++ *
++ * @p_chain: pointer to a previously allocated chain.
+ *
+- * @param p_chain pointer to a previously allocated chain
++ * Return Void.
+ */
+ static inline void qed_chain_reset(struct qed_chain *p_chain)
+ {
+@@ -519,13 +522,12 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
+ }
+
+ /**
+- * @brief qed_chain_get_last_elem -
++ * qed_chain_get_last_elem(): Returns a pointer to the last element of the
++ * chain.
+ *
+- * Returns a pointer to the last element of the chain
++ * @p_chain: Chain.
+ *
+- * @param p_chain
+- *
+- * @return void*
++ * Return: void*.
+ */
+ static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain)
+ {
+@@ -563,10 +565,13 @@ out:
+ }
+
+ /**
+- * @brief qed_chain_set_prod - sets the prod to the given value
++ * qed_chain_set_prod(): sets the prod to the given value.
++ *
++ * @p_chain: Chain.
++ * @prod_idx: Prod Idx.
++ * @p_prod_elem: Prod elem.
+ *
+- * @param prod_idx
+- * @param p_prod_elem
++ * Return Void.
+ */
+ static inline void qed_chain_set_prod(struct qed_chain *p_chain,
+ u32 prod_idx, void *p_prod_elem)
+@@ -610,9 +615,11 @@ static inline void qed_chain_set_prod(struct qed_chain *p_chain,
+ }
+
+ /**
+- * @brief qed_chain_pbl_zero_mem - set chain memory to 0
++ * qed_chain_pbl_zero_mem(): set chain memory to 0.
++ *
++ * @p_chain: Chain.
+ *
+- * @param p_chain
++ * Return: Void.
+ */
+ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain)
+ {
+diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
+index 812a4d7511633..4df0bf0a0864e 100644
+--- a/include/linux/qed/qed_eth_if.h
++++ b/include/linux/qed/qed_eth_if.h
+@@ -145,12 +145,6 @@ struct qed_filter_mcast_params {
+ unsigned char mac[64][ETH_ALEN];
+ };
+
+-union qed_filter_type_params {
+- enum qed_filter_rx_mode_type accept_flags;
+- struct qed_filter_ucast_params ucast;
+- struct qed_filter_mcast_params mcast;
+-};
+-
+ enum qed_filter_type {
+ QED_FILTER_TYPE_UCAST,
+ QED_FILTER_TYPE_MCAST,
+@@ -158,11 +152,6 @@ enum qed_filter_type {
+ QED_MAX_FILTER_TYPES,
+ };
+
+-struct qed_filter_params {
+- enum qed_filter_type type;
+- union qed_filter_type_params filter;
+-};
+-
+ struct qed_tunn_params {
+ u16 vxlan_port;
+ u8 update_vxlan_port;
+@@ -314,8 +303,14 @@ struct qed_eth_ops {
+
+ int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle);
+
+- int (*filter_config)(struct qed_dev *cdev,
+- struct qed_filter_params *params);
++ int (*filter_config_rx_mode)(struct qed_dev *cdev,
++ enum qed_filter_rx_mode_type type);
++
++ int (*filter_config_ucast)(struct qed_dev *cdev,
++ struct qed_filter_ucast_params *params);
++
++ int (*filter_config_mcast)(struct qed_dev *cdev,
++ struct qed_filter_mcast_params *params);
+
+ int (*fastpath_stop)(struct qed_dev *cdev);
+
+diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
+index 850b989916703..f39451aaaeec2 100644
+--- a/include/linux/qed/qed_if.h
++++ b/include/linux/qed/qed_if.h
+@@ -819,47 +819,47 @@ struct qed_common_cb_ops {
+
+ struct qed_selftest_ops {
+ /**
+- * @brief selftest_interrupt - Perform interrupt test
++ * selftest_interrupt(): Perform interrupt test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*selftest_interrupt)(struct qed_dev *cdev);
+
+ /**
+- * @brief selftest_memory - Perform memory test
++ * selftest_memory(): Perform memory test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*selftest_memory)(struct qed_dev *cdev);
+
+ /**
+- * @brief selftest_register - Perform register test
++ * selftest_register(): Perform register test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*selftest_register)(struct qed_dev *cdev);
+
+ /**
+- * @brief selftest_clock - Perform clock test
++ * selftest_clock(): Perform clock test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*selftest_clock)(struct qed_dev *cdev);
+
+ /**
+- * @brief selftest_nvram - Perform nvram test
++ * selftest_nvram(): Perform nvram test.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*selftest_nvram) (struct qed_dev *cdev);
+ };
+@@ -927,47 +927,53 @@ struct qed_common_ops {
+ enum qed_hw_err_type err_type);
+
+ /**
+- * @brief can_link_change - can the instance change the link or not
++ * can_link_change(): can the instance change the link or not.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return true if link-change is allowed, false otherwise.
++ * Return: true if link-change is allowed, false otherwise.
+ */
+ bool (*can_link_change)(struct qed_dev *cdev);
+
+ /**
+- * @brief set_link - set links according to params
++ * set_link(): set links according to params.
+ *
+- * @param cdev
+- * @param params - values used to override the default link configuration
++ * @cdev: Qed dev pointer.
++ * @params: values used to override the default link configuration.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*set_link)(struct qed_dev *cdev,
+ struct qed_link_params *params);
+
+ /**
+- * @brief get_link - returns the current link state.
++ * get_link(): returns the current link state.
+ *
+- * @param cdev
+- * @param if_link - structure to be filled with current link configuration.
++ * @cdev: Qed dev pointer.
++ * @if_link: structure to be filled with current link configuration.
++ *
++ * Return: Void.
+ */
+ void (*get_link)(struct qed_dev *cdev,
+ struct qed_link_output *if_link);
+
+ /**
+- * @brief - drains chip in case Tx completions fail to arrive due to pause.
++ * drain(): drains chip in case Tx completions fail to arrive due to pause.
++ *
++ * @cdev: Qed dev pointer.
+ *
+- * @param cdev
++ * Return: Int.
+ */
+ int (*drain)(struct qed_dev *cdev);
+
+ /**
+- * @brief update_msglvl - update module debug level
++ * update_msglvl(): update module debug level.
+ *
+- * @param cdev
+- * @param dp_module
+- * @param dp_level
++ * @cdev: Qed dev pointer.
++ * @dp_module: Debug module.
++ * @dp_level: Debug level.
++ *
++ * Return: Void.
+ */
+ void (*update_msglvl)(struct qed_dev *cdev,
+ u32 dp_module,
+@@ -981,70 +987,73 @@ struct qed_common_ops {
+ struct qed_chain *p_chain);
+
+ /**
+- * @brief nvm_flash - Flash nvm data.
++ * nvm_flash(): Flash nvm data.
+ *
+- * @param cdev
+- * @param name - file containing the data
++ * @cdev: Qed dev pointer.
++ * @name: file containing the data.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*nvm_flash)(struct qed_dev *cdev, const char *name);
+
+ /**
+- * @brief nvm_get_image - reads an entire image from nvram
++ * nvm_get_image(): reads an entire image from nvram.
+ *
+- * @param cdev
+- * @param type - type of the request nvram image
+- * @param buf - preallocated buffer to fill with the image
+- * @param len - length of the allocated buffer
++ * @cdev: Qed dev pointer.
++ * @type: type of the request nvram image.
++ * @buf: preallocated buffer to fill with the image.
++ * @len: length of the allocated buffer.
+ *
+- * @return 0 on success, error otherwise
++ * Return: 0 on success, error otherwise.
+ */
+ int (*nvm_get_image)(struct qed_dev *cdev,
+ enum qed_nvm_images type, u8 *buf, u16 len);
+
+ /**
+- * @brief set_coalesce - Configure Rx coalesce value in usec
++ * set_coalesce(): Configure Rx coalesce value in usec.
+ *
+- * @param cdev
+- * @param rx_coal - Rx coalesce value in usec
+- * @param tx_coal - Tx coalesce value in usec
+- * @param qid - Queue index
+- * @param sb_id - Status Block Id
++ * @cdev: Qed dev pointer.
++ * @rx_coal: Rx coalesce value in usec.
++ * @tx_coal: Tx coalesce value in usec.
++ * @handle: Handle.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*set_coalesce)(struct qed_dev *cdev,
+ u16 rx_coal, u16 tx_coal, void *handle);
+
+ /**
+- * @brief set_led - Configure LED mode
++ * set_led() - Configure LED mode.
+ *
+- * @param cdev
+- * @param mode - LED mode
++ * @cdev: Qed dev pointer.
++ * @mode: LED mode.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*set_led)(struct qed_dev *cdev,
+ enum qed_led_mode mode);
+
+ /**
+- * @brief attn_clr_enable - Prevent attentions from being reasserted
++ * attn_clr_enable(): Prevent attentions from being reasserted.
++ *
++ * @cdev: Qed dev pointer.
++ * @clr_enable: Clear enable.
+ *
+- * @param cdev
+- * @param clr_enable
++ * Return: Void.
+ */
+ void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable);
+
+ /**
+- * @brief db_recovery_add - add doorbell information to the doorbell
+- * recovery mechanism.
++ * db_recovery_add(): add doorbell information to the doorbell
++ * recovery mechanism.
+ *
+- * @param cdev
+- * @param db_addr - doorbell address
+- * @param db_data - address of where db_data is stored
+- * @param db_is_32b - doorbell is 32b pr 64b
+- * @param db_is_user - doorbell recovery addresses are user or kernel space
++ * @cdev: Qed dev pointer.
++ * @db_addr: Doorbell address.
++ * @db_data: Dddress of where db_data is stored.
++ * @db_width: Doorbell is 32b or 64b.
++ * @db_space: Doorbell recovery addresses are user or kernel space.
++ *
++ * Return: Int.
+ */
+ int (*db_recovery_add)(struct qed_dev *cdev,
+ void __iomem *db_addr,
+@@ -1053,114 +1062,130 @@ struct qed_common_ops {
+ enum qed_db_rec_space db_space);
+
+ /**
+- * @brief db_recovery_del - remove doorbell information from the doorbell
++ * db_recovery_del(): remove doorbell information from the doorbell
+ * recovery mechanism. db_data serves as key (db_addr is not unique).
+ *
+- * @param cdev
+- * @param db_addr - doorbell address
+- * @param db_data - address where db_data is stored. Serves as key for the
+- * entry to delete.
++ * @cdev: Qed dev pointer.
++ * @db_addr: Doorbell address.
++ * @db_data: Address where db_data is stored. Serves as key for the
++ * entry to delete.
++ *
++ * Return: Int.
+ */
+ int (*db_recovery_del)(struct qed_dev *cdev,
+ void __iomem *db_addr, void *db_data);
+
+ /**
+- * @brief recovery_process - Trigger a recovery process
++ * recovery_process(): Trigger a recovery process.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*recovery_process)(struct qed_dev *cdev);
+
+ /**
+- * @brief recovery_prolog - Execute the prolog operations of a recovery process
++ * recovery_prolog(): Execute the prolog operations of a recovery process.
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*recovery_prolog)(struct qed_dev *cdev);
+
+ /**
+- * @brief update_drv_state - API to inform the change in the driver state.
++ * update_drv_state(): API to inform the change in the driver state.
+ *
+- * @param cdev
+- * @param active
++ * @cdev: Qed dev pointer.
++ * @active: Active
+ *
++ * Return: Int.
+ */
+ int (*update_drv_state)(struct qed_dev *cdev, bool active);
+
+ /**
+- * @brief update_mac - API to inform the change in the mac address
++ * update_mac(): API to inform the change in the mac address.
+ *
+- * @param cdev
+- * @param mac
++ * @cdev: Qed dev pointer.
++ * @mac: MAC.
+ *
++ * Return: Int.
+ */
+ int (*update_mac)(struct qed_dev *cdev, u8 *mac);
+
+ /**
+- * @brief update_mtu - API to inform the change in the mtu
++ * update_mtu(): API to inform the change in the mtu.
+ *
+- * @param cdev
+- * @param mtu
++ * @cdev: Qed dev pointer.
++ * @mtu: MTU.
+ *
++ * Return: Int.
+ */
+ int (*update_mtu)(struct qed_dev *cdev, u16 mtu);
+
+ /**
+- * @brief update_wol - update of changes in the WoL configuration
++ * update_wol(): Update of changes in the WoL configuration.
++ *
++ * @cdev: Qed dev pointer.
++ * @enabled: true iff WoL should be enabled.
+ *
+- * @param cdev
+- * @param enabled - true iff WoL should be enabled.
++ * Return: Int.
+ */
+ int (*update_wol) (struct qed_dev *cdev, bool enabled);
+
+ /**
+- * @brief read_module_eeprom
++ * read_module_eeprom(): Read EEPROM.
+ *
+- * @param cdev
+- * @param buf - buffer
+- * @param dev_addr - PHY device memory region
+- * @param offset - offset into eeprom contents to be read
+- * @param len - buffer length, i.e., max bytes to be read
++ * @cdev: Qed dev pointer.
++ * @buf: buffer.
++ * @dev_addr: PHY device memory region.
++ * @offset: offset into eeprom contents to be read.
++ * @len: buffer length, i.e., max bytes to be read.
++ *
++ * Return: Int.
+ */
+ int (*read_module_eeprom)(struct qed_dev *cdev,
+ char *buf, u8 dev_addr, u32 offset, u32 len);
+
+ /**
+- * @brief get_affin_hwfn_idx
++ * get_affin_hwfn_idx(): Get affine HW function.
++ *
++ * @cdev: Qed dev pointer.
+ *
+- * @param cdev
++ * Return: u8.
+ */
+ u8 (*get_affin_hwfn_idx)(struct qed_dev *cdev);
+
+ /**
+- * @brief read_nvm_cfg - Read NVM config attribute value.
+- * @param cdev
+- * @param buf - buffer
+- * @param cmd - NVM CFG command id
+- * @param entity_id - Entity id
++ * read_nvm_cfg(): Read NVM config attribute value.
++ *
++ * @cdev: Qed dev pointer.
++ * @buf: Buffer.
++ * @cmd: NVM CFG command id.
++ * @entity_id: Entity id.
+ *
++ * Return: Int.
+ */
+ int (*read_nvm_cfg)(struct qed_dev *cdev, u8 **buf, u32 cmd,
+ u32 entity_id);
+ /**
+- * @brief read_nvm_cfg - Read NVM config attribute value.
+- * @param cdev
+- * @param cmd - NVM CFG command id
++ * read_nvm_cfg_len(): Read NVM config attribute value.
+ *
+- * @return config id length, 0 on error.
++ * @cdev: Qed dev pointer.
++ * @cmd: NVM CFG command id.
++ *
++ * Return: config id length, 0 on error.
+ */
+ int (*read_nvm_cfg_len)(struct qed_dev *cdev, u32 cmd);
+
+ /**
+- * @brief set_grc_config - Configure value for grc config id.
+- * @param cdev
+- * @param cfg_id - grc config id
+- * @param val - grc config value
++ * set_grc_config(): Configure value for grc config id.
++ *
++ * @cdev: Qed dev pointer.
++ * @cfg_id: grc config id
++ * @val: grc config value
+ *
++ * Return: Int.
+ */
+ int (*set_grc_config)(struct qed_dev *cdev, u32 cfg_id, u32 val);
+
+@@ -1397,18 +1422,16 @@ static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info)
+ }
+
+ /**
++ * qed_sb_ack(): This function creates an update command for interrupts
++ * that is written to the IGU.
+ *
+- * @brief This function creates an update command for interrupts that is
+- * written to the IGU.
+- *
+- * @param sb_info - This is the structure allocated and
+- * initialized per status block. Assumption is
+- * that it was initialized using qed_sb_init
+- * @param int_cmd - Enable/Disable/Nop
+- * @param upd_flg - whether igu consumer should be
+- * updated.
++ * @sb_info: This is the structure allocated and
++ * initialized per status block. Assumption is
++ * that it was initialized using qed_sb_init
++ * @int_cmd: Enable/Disable/Nop
++ * @upd_flg: Whether igu consumer should be updated.
+ *
+- * @return inline void
++ * Return: inline void.
+ */
+ static inline void qed_sb_ack(struct qed_sb_info *sb_info,
+ enum igu_int_cmd int_cmd,
+diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h
+index 04180d9af560e..494cdc3cd840b 100644
+--- a/include/linux/qed/qed_iscsi_if.h
++++ b/include/linux/qed/qed_iscsi_if.h
+@@ -182,7 +182,7 @@ struct qed_iscsi_cb_ops {
+ * @param stats - pointer to struck that would be filled
+ * we stats
+ * @return 0 on success, error otherwise.
+- * @change_mac Change MAC of interface
++ * @change_mac: Change MAC of interface
+ * @param cdev
+ * @param handle - the connection handle.
+ * @param mac - new MAC to configure.
+diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
+index ff808d2488835..5b67cd03276eb 100644
+--- a/include/linux/qed/qed_ll2_if.h
++++ b/include/linux/qed/qed_ll2_if.h
+@@ -208,57 +208,57 @@ enum qed_ll2_xmit_flags {
+
+ struct qed_ll2_ops {
+ /**
+- * @brief start - initializes ll2
++ * start(): Initializes ll2.
+ *
+- * @param cdev
+- * @param params - protocol driver configuration for the ll2.
++ * @cdev: Qed dev pointer.
++ * @params: Protocol driver configuration for the ll2.
+ *
+- * @return 0 on success, otherwise error value.
++ * Return: 0 on success, otherwise error value.
+ */
+ int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params);
+
+ /**
+- * @brief stop - stops the ll2
++ * stop(): Stops the ll2
+ *
+- * @param cdev
++ * @cdev: Qed dev pointer.
+ *
+- * @return 0 on success, otherwise error value.
++ * Return: 0 on success, otherwise error value.
+ */
+ int (*stop)(struct qed_dev *cdev);
+
+ /**
+- * @brief start_xmit - transmits an skb over the ll2 interface
++ * start_xmit(): Transmits an skb over the ll2 interface
+ *
+- * @param cdev
+- * @param skb
+- * @param xmit_flags - Transmit options defined by the enum qed_ll2_xmit_flags.
++ * @cdev: Qed dev pointer.
++ * @skb: SKB.
++ * @xmit_flags: Transmit options defined by the enum qed_ll2_xmit_flags.
+ *
+- * @return 0 on success, otherwise error value.
++ * Return: 0 on success, otherwise error value.
+ */
+ int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb,
+ unsigned long xmit_flags);
+
+ /**
+- * @brief register_cb_ops - protocol driver register the callback for Rx/Tx
++ * register_cb_ops(): Protocol driver register the callback for Rx/Tx
+ * packets. Should be called before `start'.
+ *
+- * @param cdev
+- * @param cookie - to be passed to the callback functions.
+- * @param ops - the callback functions to register for Rx / Tx.
++ * @cdev: Qed dev pointer.
++ * @cookie: to be passed to the callback functions.
++ * @ops: the callback functions to register for Rx / Tx.
+ *
+- * @return 0 on success, otherwise error value.
++ * Return: 0 on success, otherwise error value.
+ */
+ void (*register_cb_ops)(struct qed_dev *cdev,
+ const struct qed_ll2_cb_ops *ops,
+ void *cookie);
+
+ /**
+- * @brief get LL2 related statistics
++ * get_stats(): Get LL2 related statistics.
+ *
+- * @param cdev
+- * @param stats - pointer to struct that would be filled with stats
++ * @cdev: Qed dev pointer.
++ * @stats: Pointer to struct that would be filled with stats.
+ *
+- * @return 0 on success, error otherwise.
++ * Return: 0 on success, error otherwise.
+ */
+ int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats);
+ };
+diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h
+index 14671bc19ed11..1d51df347560d 100644
+--- a/include/linux/qed/qed_nvmetcp_if.h
++++ b/include/linux/qed/qed_nvmetcp_if.h
+@@ -171,6 +171,23 @@ struct nvmetcp_task_params {
+ * @param dest_port
+ * @clear_all_filters: Clear all filters.
+ * @param cdev
++ * @init_read_io: Init read IO.
++ * @task_params
++ * @cmd_pdu_header
++ * @nvme_cmd
++ * @sgl_task_params
++ * @init_write_io: Init write IO.
++ * @task_params
++ * @cmd_pdu_header
++ * @nvme_cmd
++ * @sgl_task_params
++ * @init_icreq_exchange: Exchange ICReq.
++ * @task_params
++ * @init_conn_req_pdu_hdr
++ * @tx_sgl_task_params
++ * @rx_sgl_task_params
++ * @init_task_cleanup: Init task cleanup.
++ * @task_params
+ */
+ struct qed_nvmetcp_ops {
+ const struct qed_common_ops *common;
+diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h
+index 5cdfcb873a8f0..772d45b2a60a0 100644
+--- a/include/linux/raid_class.h
++++ b/include/linux/raid_class.h
+@@ -77,7 +77,3 @@ DEFINE_RAID_ATTRIBUTE(enum raid_state, state)
+
+ struct raid_template *raid_class_attach(struct raid_function_template *);
+ void raid_class_release(struct raid_template *);
+-
+-int __must_check raid_component_add(struct raid_template *, struct device *,
+- struct device *);
+-
+diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
+index 917528d102c4e..d506dc63dd47c 100644
+--- a/include/linux/ramfs.h
++++ b/include/linux/ramfs.h
+@@ -7,6 +7,7 @@
+ struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir,
+ umode_t mode, dev_t dev);
+ extern int ramfs_init_fs_context(struct fs_context *fc);
++extern void ramfs_kill_sb(struct super_block *sb);
+
+ #ifdef CONFIG_MMU
+ static inline int
+diff --git a/include/linux/random.h b/include/linux/random.h
+index f45b8be3e3c4e..ed75fb2b0ca94 100644
+--- a/include/linux/random.h
++++ b/include/linux/random.h
+@@ -1,9 +1,5 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * include/linux/random.h
+- *
+- * Include file for the random number generator.
+- */
++
+ #ifndef _LINUX_RANDOM_H
+ #define _LINUX_RANDOM_H
+
+@@ -14,41 +10,26 @@
+
+ #include <uapi/linux/random.h>
+
+-struct random_ready_callback {
+- struct list_head list;
+- void (*func)(struct random_ready_callback *rdy);
+- struct module *owner;
+-};
++struct notifier_block;
+
+-extern void add_device_randomness(const void *, unsigned int);
+-extern void add_bootloader_randomness(const void *, unsigned int);
++void add_device_randomness(const void *buf, size_t len);
++void __init add_bootloader_randomness(const void *buf, size_t len);
++void add_input_randomness(unsigned int type, unsigned int code,
++ unsigned int value) __latent_entropy;
++void add_interrupt_randomness(int irq) __latent_entropy;
++void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy);
+
+-#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
+ static inline void add_latent_entropy(void)
+ {
+- add_device_randomness((const void *)&latent_entropy,
+- sizeof(latent_entropy));
+-}
++#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
++ add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
+ #else
+-static inline void add_latent_entropy(void) {}
+-#endif
+-
+-extern void add_input_randomness(unsigned int type, unsigned int code,
+- unsigned int value) __latent_entropy;
+-extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
+-
+-extern void get_random_bytes(void *buf, int nbytes);
+-extern int wait_for_random_bytes(void);
+-extern int __init rand_initialize(void);
+-extern bool rng_is_initialized(void);
+-extern int add_random_ready_callback(struct random_ready_callback *rdy);
+-extern void del_random_ready_callback(struct random_ready_callback *rdy);
+-extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
+-
+-#ifndef MODULE
+-extern const struct file_operations random_fops, urandom_fops;
++ add_device_randomness(NULL, 0);
+ #endif
++}
+
++void get_random_bytes(void *buf, size_t len);
++size_t __must_check get_random_bytes_arch(void *buf, size_t len);
+ u32 get_random_u32(void);
+ u64 get_random_u64(void);
+ static inline unsigned int get_random_int(void)
+@@ -80,36 +61,38 @@ static inline unsigned long get_random_long(void)
+
+ static inline unsigned long get_random_canary(void)
+ {
+- unsigned long val = get_random_long();
+-
+- return val & CANARY_MASK;
++ return get_random_long() & CANARY_MASK;
+ }
+
++int __init random_init(const char *command_line);
++bool rng_is_initialized(void);
++int wait_for_random_bytes(void);
++int register_random_ready_notifier(struct notifier_block *nb);
++int unregister_random_ready_notifier(struct notifier_block *nb);
++
+ /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
+ * Returns the result of the call to wait_for_random_bytes. */
+-static inline int get_random_bytes_wait(void *buf, int nbytes)
++static inline int get_random_bytes_wait(void *buf, size_t nbytes)
+ {
+ int ret = wait_for_random_bytes();
+ get_random_bytes(buf, nbytes);
+ return ret;
+ }
+
+-#define declare_get_random_var_wait(var) \
+- static inline int get_random_ ## var ## _wait(var *out) { \
++#define declare_get_random_var_wait(name, ret_type) \
++ static inline int get_random_ ## name ## _wait(ret_type *out) { \
+ int ret = wait_for_random_bytes(); \
+ if (unlikely(ret)) \
+ return ret; \
+- *out = get_random_ ## var(); \
++ *out = get_random_ ## name(); \
+ return 0; \
+ }
+-declare_get_random_var_wait(u32)
+-declare_get_random_var_wait(u64)
+-declare_get_random_var_wait(int)
+-declare_get_random_var_wait(long)
++declare_get_random_var_wait(u32, u32)
++declare_get_random_var_wait(u64, u32)
++declare_get_random_var_wait(int, unsigned int)
++declare_get_random_var_wait(long, unsigned long)
+ #undef declare_get_random_var
+
+-unsigned long randomize_page(unsigned long start, unsigned long range);
+-
+ /*
+ * This is designed to be standalone for just prandom
+ * users, but for now we include it from <linux/random.h>
+@@ -120,22 +103,10 @@ unsigned long randomize_page(unsigned long start, unsigned long range);
+ #ifdef CONFIG_ARCH_RANDOM
+ # include <asm/archrandom.h>
+ #else
+-static inline bool __must_check arch_get_random_long(unsigned long *v)
+-{
+- return false;
+-}
+-static inline bool __must_check arch_get_random_int(unsigned int *v)
+-{
+- return false;
+-}
+-static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+-{
+- return false;
+-}
+-static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+-{
+- return false;
+-}
++static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; }
++static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; }
++static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; }
++static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; }
+ #endif
+
+ /*
+@@ -158,4 +129,13 @@ static inline bool __init arch_get_random_long_early(unsigned long *v)
+ }
+ #endif
+
++#ifdef CONFIG_SMP
++int random_prepare_cpu(unsigned int cpu);
++int random_online_cpu(unsigned int cpu);
++#endif
++
++#ifndef MODULE
++extern const struct file_operations random_fops, urandom_fops;
++#endif
++
+ #endif /* _LINUX_RANDOM_H */
+diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
+index bebc911161b6f..d373f1bcbf7ca 100644
+--- a/include/linux/randomize_kstack.h
++++ b/include/linux/randomize_kstack.h
+@@ -16,8 +16,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
+ * alignment. Also, since this use is being explicitly masked to a max of
+ * 10 bits, stack-clash style attacks are unlikely. For more details see
+ * "VLAs" in Documentation/process/deprecated.rst
++ *
++ * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently
++ * only with Clang and not GCC). Initializing the unused area on each syscall
++ * entry is expensive, and generating an implicit call to memset() may also be
++ * problematic (such as in noinstr functions). Therefore, if the compiler
++ * supports it (which it should if it initializes allocas), always use the
++ * "uninitialized" variant of the builtin.
+ */
+-void *__builtin_alloca(size_t size);
++#if __has_builtin(__builtin_alloca_uninitialized)
++#define __kstack_alloca __builtin_alloca_uninitialized
++#else
++#define __kstack_alloca __builtin_alloca
++#endif
++
+ /*
+ * Use, at most, 10 bits of entropy. We explicitly cap this to keep the
+ * "VLA" from being unbounded (see above). 10 bits leaves enough room for
+@@ -36,7 +48,7 @@ void *__builtin_alloca(size_t size);
+ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+ &randomize_kstack_offset)) { \
+ u32 offset = raw_cpu_read(kstack_offset); \
+- u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \
++ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
+ /* Keep allocation even after "ptr" loses scope. */ \
+ asm volatile("" :: "r"(ptr) : "memory"); \
+ } \
+diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
+index b676aa419eef8..f0e535f199bef 100644
+--- a/include/linux/ratelimit_types.h
++++ b/include/linux/ratelimit_types.h
+@@ -23,12 +23,16 @@ struct ratelimit_state {
+ unsigned long flags;
+ };
+
+-#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \
+- .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+- .interval = interval_init, \
+- .burst = burst_init, \
++#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \
++ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
++ .interval = interval_init, \
++ .burst = burst_init, \
++ .flags = flags_init, \
+ }
+
++#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
++ RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0)
++
+ #define RATELIMIT_STATE_INIT_DISABLED \
+ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
+
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index 434d12fe2d4f5..13bddb841ceb1 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -193,6 +193,7 @@ void synchronize_rcu_tasks_rude(void);
+
+ #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
+ void exit_tasks_rcu_start(void);
++void exit_tasks_rcu_stop(void);
+ void exit_tasks_rcu_finish(void);
+ #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+ #define rcu_tasks_qs(t, preempt) do { } while (0)
+@@ -200,6 +201,7 @@ void exit_tasks_rcu_finish(void);
+ #define call_rcu_tasks call_rcu
+ #define synchronize_rcu_tasks synchronize_rcu
+ static inline void exit_tasks_rcu_start(void) { }
++static inline void exit_tasks_rcu_stop(void) { }
+ static inline void exit_tasks_rcu_finish(void) { }
+ #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
+
+@@ -311,11 +313,18 @@ static inline int rcu_read_lock_any_held(void)
+ * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
+ * @c: condition to check
+ * @s: informative message
++ *
++ * This checks debug_lockdep_rcu_enabled() before checking (c) to
++ * prevent early boot splats due to lockdep not yet being initialized,
++ * and rechecks it after checking (c) to prevent false-positive splats
++ * due to races with lockdep being disabled. See commit 3066820034b5dd
++ * ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail.
+ */
+ #define RCU_LOCKDEP_WARN(c, s) \
+ do { \
+ static bool __section(".data.unlikely") __warned; \
+- if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \
++ if (debug_lockdep_rcu_enabled() && (c) && \
++ debug_lockdep_rcu_enabled() && !__warned) { \
+ __warned = true; \
+ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
+ } \
+diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h
+index 71902f41c9199..0c3edff6bdfff 100644
+--- a/include/linux/regulator/pca9450.h
++++ b/include/linux/regulator/pca9450.h
+@@ -196,11 +196,11 @@ enum {
+
+ /* PCA9450_REG_LDO3_VOLT bits */
+ #define LDO3_EN_MASK 0xC0
+-#define LDO3OUT_MASK 0x0F
++#define LDO3OUT_MASK 0x1F
+
+ /* PCA9450_REG_LDO4_VOLT bits */
+ #define LDO4_EN_MASK 0xC0
+-#define LDO4OUT_MASK 0x0F
++#define LDO4OUT_MASK 0x1F
+
+ /* PCA9450_REG_LDO5_VOLT bits */
+ #define LDO5L_EN_MASK 0xC0
+diff --git a/include/linux/reset.h b/include/linux/reset.h
+index db0e6115a2f6a..7bb5837375289 100644
+--- a/include/linux/reset.h
++++ b/include/linux/reset.h
+@@ -711,7 +711,7 @@ static inline int __must_check
+ devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs,
+ struct reset_control_bulk_data *rstcs)
+ {
+- return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true);
++ return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true);
+ }
+
+ /**
+diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
+index dac53fd3afea3..3c7d295746f67 100644
+--- a/include/linux/ring_buffer.h
++++ b/include/linux/ring_buffer.h
+@@ -100,8 +100,8 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
+
+ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
+ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
+- struct file *filp, poll_table *poll_table);
+-
++ struct file *filp, poll_table *poll_table, int full);
++void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
+
+ #define RING_BUFFER_ALL_CPUS -1
+
+diff --git a/include/linux/rmap.h b/include/linux/rmap.h
+index c976cc6de2574..c29d9c13378b3 100644
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -39,12 +39,15 @@ struct anon_vma {
+ atomic_t refcount;
+
+ /*
+- * Count of child anon_vmas and VMAs which points to this anon_vma.
++ * Count of child anon_vmas. Equals to the count of all anon_vmas that
++ * have ->parent pointing to this one, including itself.
+ *
+ * This counter is used for making decision about reusing anon_vma
+ * instead of forking new one. See comments in function anon_vma_clone.
+ */
+- unsigned degree;
++ unsigned long num_children;
++ /* Count of VMAs whose ->anon_vma pointer points to this object. */
++ unsigned long num_active_vmas;
+
+ struct anon_vma *parent; /* Parent of this anon_vma */
+
+diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
+index d97dcd049f18f..a8dcf8a9ae885 100644
+--- a/include/linux/rpmsg.h
++++ b/include/linux/rpmsg.h
+@@ -231,7 +231,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev
+ /* This shouldn't be possible */
+ WARN_ON(1);
+
+- return ERR_PTR(-ENXIO);
++ return NULL;
+ }
+
+ static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
+diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h
+index 159729cffd8e1..3247ed8e9ff0f 100644
+--- a/include/linux/rtsx_usb.h
++++ b/include/linux/rtsx_usb.h
+@@ -54,8 +54,6 @@ struct rtsx_ucr {
+ struct usb_device *pusb_dev;
+ struct usb_interface *pusb_intf;
+ struct usb_sg_request current_sg;
+- unsigned char *iobuf;
+- dma_addr_t iobuf_dma;
+
+ struct timer_list sg_timer;
+ struct mutex dev_mutex;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index c1a927ddec646..7bfc2b45cd99b 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -522,7 +522,7 @@ struct sched_statistics {
+ u64 nr_wakeups_passive;
+ u64 nr_wakeups_idle;
+ #endif
+-};
++} ____cacheline_aligned;
+
+ struct sched_entity {
+ /* For load-balancing: */
+@@ -538,8 +538,6 @@ struct sched_entity {
+
+ u64 nr_migrations;
+
+- struct sched_statistics statistics;
+-
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ int depth;
+ struct sched_entity *parent;
+@@ -803,6 +801,8 @@ struct task_struct {
+ struct uclamp_se uclamp[UCLAMP_CNT];
+ #endif
+
++ struct sched_statistics stats;
++
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ /* List of struct preempt_notifier: */
+ struct hlist_head preempt_notifiers;
+@@ -933,7 +933,7 @@ struct task_struct {
+ #endif
+ #ifdef CONFIG_EVENTFD
+ /* Recursion prevention for eventfd_signal() */
+- unsigned in_eventfd_signal:1;
++ unsigned in_eventfd:1;
+ #endif
+
+ unsigned long atomic_flags; /* Flags requiring atomic access. */
+@@ -1436,6 +1436,7 @@ struct task_struct {
+ int pagefault_disabled;
+ #ifdef CONFIG_MMU
+ struct task_struct *oom_reaper_list;
++ struct timer_list oom_reaper_timer;
+ #endif
+ #ifdef CONFIG_VMAP_STACK
+ struct vm_struct *stack_vm_area;
+@@ -1626,6 +1627,14 @@ static inline unsigned int task_state_index(struct task_struct *tsk)
+ if (tsk_state == TASK_IDLE)
+ state = TASK_REPORT_IDLE;
+
++ /*
++ * We're lying here, but rather than expose a completely new task state
++ * to userspace, we can make this appear as if the task has gone through
++ * a regular rt_mutex_lock() call.
++ */
++ if (tsk_state == TASK_RTLOCK_WAIT)
++ state = TASK_UNINTERRUPTIBLE;
++
+ return fls(state);
+ }
+
+@@ -1675,7 +1684,6 @@ extern struct pid *cad_pid;
+ #define PF_MEMALLOC 0x00000800 /* Allocating memory */
+ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
+ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
+-#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */
+ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
+ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */
+ #define PF_KSWAPD 0x00020000 /* I am kswapd */
+@@ -1789,7 +1797,9 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags)
+ }
+
+ extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
+-extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
++extern int task_can_attach(struct task_struct *p);
++extern int dl_bw_alloc(int cpu, u64 dl_bw);
++extern void dl_bw_free(int cpu, u64 dl_bw);
+ #ifdef CONFIG_SMP
+ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
+ extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
+diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
+index 5561486fddef7..95fb7aaaec8de 100644
+--- a/include/linux/sched/mm.h
++++ b/include/linux/sched/mm.h
+@@ -106,6 +106,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
+ #endif /* CONFIG_MEMCG */
+
+ #ifdef CONFIG_MMU
++#ifndef arch_get_mmap_end
++#define arch_get_mmap_end(addr) (TASK_SIZE)
++#endif
++
++#ifndef arch_get_mmap_base
++#define arch_get_mmap_base(addr, base) (base)
++#endif
++
+ extern void arch_pick_mmap_layout(struct mm_struct *mm,
+ struct rlimit *rlim_stack);
+ extern unsigned long
+diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
+index e5af028c08b49..994c25640e156 100644
+--- a/include/linux/sched/rt.h
++++ b/include/linux/sched/rt.h
+@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
+ }
+ extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+-{
+- return tsk->pi_blocked_on != NULL;
+-}
+ #else
+ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+ {
+ return NULL;
+ }
+ # define rt_mutex_adjust_pi(p) do { } while (0)
+-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+-{
+- return false;
+-}
+ #endif
+
+ extern void normalize_rt_tasks(void);
+diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
+index e5f4ce622ee61..5f0e8403e8ceb 100644
+--- a/include/linux/sched/signal.h
++++ b/include/linux/sched/signal.h
+@@ -318,7 +318,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *);
+
+ int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper);
+ int force_sig_pkuerr(void __user *addr, u32 pkey);
+-int force_sig_perf(void __user *addr, u32 type, u64 sig_data);
++int send_sig_perf(void __user *addr, u32 type, u64 sig_data);
+
+ int force_sig_ptrace_errno_trap(int errno, void __user *addr);
+ int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno);
+@@ -338,6 +338,8 @@ extern int kill_pid(struct pid *pid, int sig, int priv);
+ extern __must_check bool do_notify_parent(struct task_struct *, int);
+ extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
+ extern void force_sig(int);
++extern void force_fatal_sig(int);
++extern void force_exit_sig(int);
+ extern int send_sig(int, struct task_struct *, int);
+ extern int zap_other_threads(struct task_struct *p);
+ extern struct sigqueue *sigqueue_alloc(void);
+diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
+index ef02be869cf28..d23977e9035d4 100644
+--- a/include/linux/sched/task.h
++++ b/include/linux/sched/task.h
+@@ -54,11 +54,14 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
+ extern void init_idle(struct task_struct *idle, int cpu);
+
+ extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
++extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
+ extern void sched_post_fork(struct task_struct *p);
+ extern void sched_dead(struct task_struct *p);
+
+ void __noreturn do_task_dead(void);
++void __noreturn make_task_dead(int signr);
+
++extern void mm_cache_init(void);
+ extern void proc_caches_init(void);
+
+ extern void fork_init(void);
+@@ -80,12 +83,11 @@ static inline void exit_thread(struct task_struct *tsk)
+ extern void do_group_exit(int);
+
+ extern void exit_files(struct task_struct *);
+-extern void exit_itimers(struct signal_struct *);
++extern void exit_itimers(struct task_struct *);
+
+ extern pid_t kernel_clone(struct kernel_clone_args *kargs);
+ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
+ struct task_struct *fork_idle(int);
+-struct mm_struct *copy_init_mm(void);
+ extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
+ int kernel_wait(pid_t pid, int *stat);
+@@ -157,7 +159,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+ * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4(). Also used in procfs. Also
+ * pins the final release of task.io_context. Also protects ->cpuset and
+- * ->cgroup.subsys[]. And ->vfork_done.
++ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
+index 2413427e439c7..879a5c8f930b6 100644
+--- a/include/linux/sched/task_stack.h
++++ b/include/linux/sched/task_stack.h
+@@ -16,16 +16,20 @@
+ * try_get_task_stack() instead. task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+-static inline void *task_stack_page(const struct task_struct *task)
++static __always_inline void *task_stack_page(const struct task_struct *task)
+ {
+ return task->stack;
+ }
+
+ #define setup_thread_stack(new,old) do { } while(0)
+
+-static inline unsigned long *end_of_stack(const struct task_struct *task)
++static __always_inline unsigned long *end_of_stack(const struct task_struct *task)
+ {
++#ifdef CONFIG_STACK_GROWSUP
++ return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1;
++#else
+ return task->stack;
++#endif
+ }
+
+ #elif !defined(__HAVE_THREAD_FUNCTIONS)
+diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
+index 8f0f778b7c911..63a04a65e3106 100644
+--- a/include/linux/sched/topology.h
++++ b/include/linux/sched/topology.h
+@@ -74,6 +74,7 @@ struct sched_domain_shared {
+ atomic_t ref;
+ atomic_t nr_busy_cpus;
+ int has_idle_cores;
++ int nr_idle_scan;
+ };
+
+ struct sched_domain {
+diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
+index 80e781c51ddc1..d22f62203ee36 100644
+--- a/include/linux/scmi_protocol.h
++++ b/include/linux/scmi_protocol.h
+@@ -74,7 +74,7 @@ struct scmi_protocol_handle;
+ struct scmi_clk_proto_ops {
+ int (*count_get)(const struct scmi_protocol_handle *ph);
+
+- const struct scmi_clock_info *(*info_get)
++ const struct scmi_clock_info __must_check *(*info_get)
+ (const struct scmi_protocol_handle *ph, u32 clk_id);
+ int (*rate_get)(const struct scmi_protocol_handle *ph, u32 clk_id,
+ u64 *rate);
+@@ -452,7 +452,7 @@ enum scmi_sensor_class {
+ */
+ struct scmi_sensor_proto_ops {
+ int (*count_get)(const struct scmi_protocol_handle *ph);
+- const struct scmi_sensor_info *(*info_get)
++ const struct scmi_sensor_info __must_check *(*info_get)
+ (const struct scmi_protocol_handle *ph, u32 sensor_id);
+ int (*trip_point_config)(const struct scmi_protocol_handle *ph,
+ u32 sensor_id, u8 trip_id, u64 trip_value);
+diff --git a/include/linux/security.h b/include/linux/security.h
+index 5b7288521300b..da184e7b361f4 100644
+--- a/include/linux/security.h
++++ b/include/linux/security.h
+@@ -121,10 +121,12 @@ enum lockdown_reason {
+ LOCKDOWN_DEBUGFS,
+ LOCKDOWN_XMON_WR,
+ LOCKDOWN_BPF_WRITE_USER,
++ LOCKDOWN_DBG_WRITE_KERNEL,
+ LOCKDOWN_INTEGRITY_MAX,
+ LOCKDOWN_KCORE,
+ LOCKDOWN_KPROBES,
+ LOCKDOWN_BPF_READ_KERNEL,
++ LOCKDOWN_DBG_READ_KERNEL,
+ LOCKDOWN_PERF,
+ LOCKDOWN_TRACEFS,
+ LOCKDOWN_XMON_RW,
+@@ -258,13 +260,13 @@ extern int security_init(void);
+ extern int early_security_init(void);
+
+ /* Security operations */
+-int security_binder_set_context_mgr(struct task_struct *mgr);
+-int security_binder_transaction(struct task_struct *from,
+- struct task_struct *to);
+-int security_binder_transfer_binder(struct task_struct *from,
+- struct task_struct *to);
+-int security_binder_transfer_file(struct task_struct *from,
+- struct task_struct *to, struct file *file);
++int security_binder_set_context_mgr(const struct cred *mgr);
++int security_binder_transaction(const struct cred *from,
++ const struct cred *to);
++int security_binder_transfer_binder(const struct cred *from,
++ const struct cred *to);
++int security_binder_transfer_file(const struct cred *from,
++ const struct cred *to, struct file *file);
+ int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
+ int security_ptrace_traceme(struct task_struct *parent);
+ int security_capget(struct task_struct *target,
+@@ -508,25 +510,25 @@ static inline int early_security_init(void)
+ return 0;
+ }
+
+-static inline int security_binder_set_context_mgr(struct task_struct *mgr)
++static inline int security_binder_set_context_mgr(const struct cred *mgr)
+ {
+ return 0;
+ }
+
+-static inline int security_binder_transaction(struct task_struct *from,
+- struct task_struct *to)
++static inline int security_binder_transaction(const struct cred *from,
++ const struct cred *to)
+ {
+ return 0;
+ }
+
+-static inline int security_binder_transfer_binder(struct task_struct *from,
+- struct task_struct *to)
++static inline int security_binder_transfer_binder(const struct cred *from,
++ const struct cred *to)
+ {
+ return 0;
+ }
+
+-static inline int security_binder_transfer_file(struct task_struct *from,
+- struct task_struct *to,
++static inline int security_binder_transfer_file(const struct cred *from,
++ const struct cred *to,
+ struct file *file)
+ {
+ return 0;
+@@ -1041,6 +1043,11 @@ static inline void security_transfer_creds(struct cred *new,
+ {
+ }
+
++static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
++{
++ *secid = 0;
++}
++
+ static inline int security_kernel_act_as(struct cred *cred, u32 secid)
+ {
+ return 0;
+diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
+index dd99569595fd3..0b429111f85e4 100644
+--- a/include/linux/seq_file.h
++++ b/include/linux/seq_file.h
+@@ -194,7 +194,7 @@ static const struct file_operations __name ## _fops = { \
+ #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \
+ static int __name ## _open(struct inode *inode, struct file *file) \
+ { \
+- return single_open(file, __name ## _show, inode->i_private); \
++ return single_open(file, __name ## _show, PDE_DATA(inode)); \
+ } \
+ \
+ static const struct proc_ops __name ## _proc_ops = { \
+@@ -261,6 +261,10 @@ extern struct list_head *seq_list_start_head(struct list_head *head,
+ extern struct list_head *seq_list_next(void *v, struct list_head *head,
+ loff_t *ppos);
+
++extern struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos);
++extern struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos);
++extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos);
++
+ /*
+ * Helpers for iteration over hlist_head-s in seq_files
+ */
+diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
+index 5db211f43b29e..979586e34da8f 100644
+--- a/include/linux/serial_8250.h
++++ b/include/linux/serial_8250.h
+@@ -74,6 +74,7 @@ struct uart_8250_port;
+ struct uart_8250_ops {
+ int (*setup_irq)(struct uart_8250_port *);
+ void (*release_irq)(struct uart_8250_port *);
++ void (*setup_timer)(struct uart_8250_port *);
+ };
+
+ struct uart_8250_em485 {
+@@ -97,7 +98,6 @@ struct uart_8250_port {
+ struct list_head list; /* ports on this IRQ */
+ u32 capabilities; /* port capabilities */
+ unsigned short bugs; /* port bugs */
+- bool fifo_bug; /* min RX trigger if enabled */
+ unsigned int tx_loadsz; /* transmit fifo load size */
+ unsigned char acr;
+ unsigned char fcr;
+diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
+index c58cc142d23f4..d5b6b1550d596 100644
+--- a/include/linux/serial_core.h
++++ b/include/linux/serial_core.h
+@@ -100,7 +100,7 @@ struct uart_icount {
+ __u32 buf_overrun;
+ };
+
+-typedef unsigned int __bitwise upf_t;
++typedef u64 __bitwise upf_t;
+ typedef unsigned int __bitwise upstat_t;
+
+ struct uart_port {
+@@ -207,6 +207,7 @@ struct uart_port {
+ #define UPF_FIXED_PORT ((__force upf_t) (1 << 29))
+ #define UPF_DEAD ((__force upf_t) (1 << 30))
+ #define UPF_IOREMAP ((__force upf_t) (1 << 31))
++#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32))
+
+ #define __UPF_CHANGE_MASK 0x17fff
+ #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK)
+@@ -253,6 +254,7 @@ struct uart_port {
+ struct attribute_group *attr_group; /* port specific attributes */
+ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */
+ struct serial_rs485 rs485;
++ const struct serial_rs485 *rs485_supported; /* Supported mask for serial_rs485 */
+ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */
+ struct serial_iso7816 iso7816;
+ void *private_data; /* generic platform data pointer */
+@@ -300,6 +302,23 @@ struct uart_state {
+ /* number of characters left in xmit buffer before we ask for more */
+ #define WAKEUP_CHARS 256
+
++/**
++ * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars
++ * @up: uart_port structure describing the port
++ * @chars: number of characters sent
++ *
++ * This function advances the tail of circular xmit buffer by the number of
++ * @chars transmitted and handles accounting of transmitted bytes (into
++ * @up's icount.tx).
++ */
++static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars)
++{
++ struct circ_buf *xmit = &up->state->xmit;
++
++ xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1);
++ up->icount.tx += chars;
++}
++
+ struct module;
+ struct tty_driver;
+
+@@ -388,6 +407,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED;
+ static inline int setup_earlycon(char *buf) { return 0; }
+ #endif
+
++static inline bool uart_console_enabled(struct uart_port *port)
++{
++ return uart_console(port) && (port->cons->flags & CON_ENABLED);
++}
++
+ struct uart_port *uart_get_console(struct uart_port *ports, int nr,
+ struct console *c);
+ int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
+@@ -458,6 +482,8 @@ extern void uart_handle_cts_change(struct uart_port *uport,
+ extern void uart_insert_char(struct uart_port *port, unsigned int status,
+ unsigned int overrun, unsigned int ch, unsigned int flag);
+
++void uart_xchar_out(struct uart_port *uport, int offset);
++
+ #ifdef CONFIG_MAGIC_SYSRQ_SERIAL
+ #define SYSRQ_TIMEOUT (HZ * 5)
+
+diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
+index c255273b02810..37ad81058d6ae 100644
+--- a/include/linux/sh_intc.h
++++ b/include/linux/sh_intc.h
+@@ -97,7 +97,10 @@ struct intc_hw_desc {
+ unsigned int nr_subgroups;
+ };
+
+-#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a)
++#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a, \
++ typeof(NULL): 0, \
++ default: sizeof(a)))
++#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a)
+
+ #define INTC_HW_DESC(vectors, groups, mask_regs, \
+ prio_regs, sense_regs, ack_regs) \
+diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
+index 34cb28b8f16ca..a70b2bdbf4d96 100644
+--- a/include/linux/signal_types.h
++++ b/include/linux/signal_types.h
+@@ -70,6 +70,9 @@ struct ksignal {
+ int sig;
+ };
+
++/* Used to kill the race between sigaction and forced signals */
++#define SA_IMMUTABLE 0x00800000
++
+ #ifndef __ARCH_UAPI_SA_FLAGS
+ #ifdef SA_RESTORER
+ #define __ARCH_UAPI_SA_FLAGS SA_RESTORER
+diff --git a/include/linux/siphash.h b/include/linux/siphash.h
+index bf21591a9e5e6..0bb5ecd507bef 100644
+--- a/include/linux/siphash.h
++++ b/include/linux/siphash.h
+@@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key)
+ }
+
+ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
+-#endif
+
+ u64 siphash_1u64(const u64 a, const siphash_key_t *key);
+ u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
+@@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
+ static inline u64 siphash(const void *data, size_t len,
+ const siphash_key_t *key)
+ {
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+- if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
++ !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+ return __siphash_unaligned(data, len, key);
+-#endif
+ return ___siphash_aligned(data, len, key);
+ }
+
+@@ -96,10 +93,8 @@ typedef struct {
+
+ u32 __hsiphash_aligned(const void *data, size_t len,
+ const hsiphash_key_t *key);
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key);
+-#endif
+
+ u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
+ u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
+@@ -135,11 +130,38 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
+ static inline u32 hsiphash(const void *data, size_t len,
+ const hsiphash_key_t *key)
+ {
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+- if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
++ !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+ return __hsiphash_unaligned(data, len, key);
+-#endif
+ return ___hsiphash_aligned(data, len, key);
+ }
+
++/*
++ * These macros expose the raw SipHash and HalfSipHash permutations.
++ * Do not use them directly! If you think you have a use for them,
++ * be sure to CC the maintainer of this file explaining why.
++ */
++
++#define SIPHASH_PERMUTATION(a, b, c, d) ( \
++ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \
++ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \
++ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \
++ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32))
++
++#define SIPHASH_CONST_0 0x736f6d6570736575ULL
++#define SIPHASH_CONST_1 0x646f72616e646f6dULL
++#define SIPHASH_CONST_2 0x6c7967656e657261ULL
++#define SIPHASH_CONST_3 0x7465646279746573ULL
++
++#define HSIPHASH_PERMUTATION(a, b, c, d) ( \
++ (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \
++ (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \
++ (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \
++ (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16))
++
++#define HSIPHASH_CONST_0 0U
++#define HSIPHASH_CONST_1 0U
++#define HSIPHASH_CONST_2 0x6c796765U
++#define HSIPHASH_CONST_3 0x74656462U
++
+ #endif /* _LINUX_SIPHASH_H */
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 841e2f0f5240b..7ed1d4472c0c8 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -259,6 +259,7 @@ struct nf_bridge_info {
+ u8 pkt_otherhost:1;
+ u8 in_prerouting:1;
+ u8 bridged_dnat:1;
++ u8 sabotage_in_done:1;
+ __u16 frag_max_size;
+ struct net_device *physindev;
+
+@@ -286,7 +287,10 @@ struct nf_bridge_info {
+ struct tc_skb_ext {
+ __u32 chain;
+ __u16 mru;
+- bool post_ct;
++ __u16 zone;
++ u8 post_ct:1;
++ u8 post_ct_snat:1;
++ u8 post_ct_dnat:1;
+ };
+ #endif
+
+@@ -301,6 +305,41 @@ struct sk_buff_head {
+
+ struct sk_buff;
+
++/* The reason of skb drop, which is used in kfree_skb_reason().
++ * en...maybe they should be splited by group?
++ *
++ * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is
++ * used to translate the reason to string.
++ */
++enum skb_drop_reason {
++ SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */
++ SKB_DROP_REASON_NO_SOCKET, /* socket not found */
++ SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */
++ SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */
++ SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */
++ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */
++ SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */
++ SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current
++ * host (interface is in promisc
++ * mode)
++ */
++ SKB_DROP_REASON_IP_CSUM, /* IP checksum error */
++ SKB_DROP_REASON_IP_INHDR, /* there is something wrong with
++ * IP header (see
++ * IPSTATS_MIB_INHDRERRORS)
++ */
++ SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed.
++ * see the document for rp_filter
++ * in ip-sysctl.rst for more
++ * information
++ */
++ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2
++ * is multicast, but L3 is
++ * unicast.
++ */
++ SKB_DROP_REASON_MAX,
++};
++
+ /* To allow 64K frame to be packed as single skb without frag_list we
+ * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
+ * buffers which do not start on a page boundary.
+@@ -687,6 +726,7 @@ typedef unsigned char *sk_buff_data_t;
+ * @csum_level: indicates the number of consecutive checksums found in
+ * the packet minus one that have been verified as
+ * CHECKSUM_UNNECESSARY (max 3)
++ * @scm_io_uring: SKB holds io_uring registered files
+ * @dst_pending_confirm: need to confirm neighbour
+ * @decrypted: Decrypted SKB
+ * @slow_gro: state present at GRO time, slower prepare step required
+@@ -872,6 +912,7 @@ struct sk_buff {
+ __u8 decrypted:1;
+ #endif
+ __u8 slow_gro:1;
++ __u8 scm_io_uring:1;
+
+ #ifdef CONFIG_NET_SCHED
+ __u16 tc_index; /* traffic control index */
+@@ -1071,8 +1112,18 @@ static inline bool skb_unref(struct sk_buff *skb)
+ return true;
+ }
+
++void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
++
++/**
++ * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
++ * @skb: buffer to free
++ */
++static inline void kfree_skb(struct sk_buff *skb)
++{
++ kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
++}
++
+ void skb_release_head_state(struct sk_buff *skb);
+-void kfree_skb(struct sk_buff *skb);
+ void kfree_skb_list(struct sk_buff *segs);
+ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
+ void skb_tx_error(struct sk_buff *skb);
+@@ -1370,7 +1421,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
+ u16 *ctinfo_map, size_t mapsize,
+- bool post_ct);
++ bool post_ct, u16 zone);
+ void
+ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+@@ -1433,6 +1484,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb)
+ {
+ return skb->end;
+ }
++
++static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
++{
++ skb->end = offset;
++}
+ #else
+ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
+ {
+@@ -1443,6 +1499,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb)
+ {
+ return skb->end - skb->head;
+ }
++
++static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
++{
++ skb->end = skb->head + offset;
++}
+ #endif
+
+ /* Internal */
+@@ -1671,6 +1732,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
+ return 0;
+ }
+
++/* This variant of skb_unclone() makes sure skb->truesize
++ * and skb_end_offset() are not changed, whenever a new skb->head is needed.
++ *
++ * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X))
++ * when various debugging features are in place.
++ */
++int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri);
++static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
++{
++ might_sleep_if(gfpflags_allow_blocking(pri));
++
++ if (skb_cloned(skb))
++ return __skb_unclone_keeptruesize(skb, pri);
++ return 0;
++}
++
+ /**
+ * skb_header_cloned - is the header a clone
+ * @skb: buffer to check
+@@ -2158,6 +2235,22 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+ return skb_headlen(skb) + __skb_pagelen(skb);
+ }
+
++static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
++ int i, struct page *page,
++ int off, int size)
++{
++ skb_frag_t *frag = &shinfo->frags[i];
++
++ /*
++ * Propagate page pfmemalloc to the skb if we can. The problem is
++ * that not all callers have unique ownership of the page but rely
++ * on page_is_pfmemalloc doing the right thing(tm).
++ */
++ frag->bv_page = page;
++ frag->bv_offset = off;
++ skb_frag_size_set(frag, size);
++}
++
+ /**
+ * __skb_fill_page_desc - initialise a paged fragment in an skb
+ * @skb: buffer containing fragment to be initialised
+@@ -2174,17 +2267,7 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off, int size)
+ {
+- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+-
+- /*
+- * Propagate page pfmemalloc to the skb if we can. The problem is
+- * that not all callers have unique ownership of the page but rely
+- * on page_is_pfmemalloc doing the right thing(tm).
+- */
+- frag->bv_page = page;
+- frag->bv_offset = off;
+- skb_frag_size_set(frag, size);
+-
++ __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
+ page = compound_head(page);
+ if (page_is_pfmemalloc(page))
+ skb->pfmemalloc = true;
+@@ -2211,6 +2294,27 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
+ skb_shinfo(skb)->nr_frags = i + 1;
+ }
+
++/**
++ * skb_fill_page_desc_noacc - initialise a paged fragment in an skb
++ * @skb: buffer containing fragment to be initialised
++ * @i: paged fragment index to initialise
++ * @page: the page to use for this fragment
++ * @off: the offset to the data with @page
++ * @size: the length of the data
++ *
++ * Variant of skb_fill_page_desc() which does not deal with
++ * pfmemalloc, if page is not owned by us.
++ */
++static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
++ struct page *page, int off,
++ int size)
++{
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++
++ __skb_fill_page_desc_noacc(shinfo, i, page, off, size);
++ shinfo->nr_frags = i + 1;
++}
++
+ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
+ int size, unsigned int truesize);
+
+@@ -2254,6 +2358,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
+
+ #endif /* NET_SKBUFF_DATA_USES_OFFSET */
+
++static inline void skb_assert_len(struct sk_buff *skb)
++{
++#ifdef CONFIG_DEBUG_NET
++ if (WARN_ONCE(!skb->len, "%s\n", __func__))
++ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
++#endif /* CONFIG_DEBUG_NET */
++}
++
+ /*
+ * Add data to an sk_buff
+ */
+@@ -4350,7 +4462,7 @@ static inline void nf_reset_ct(struct sk_buff *skb)
+
+ static inline void nf_reset_trace(struct sk_buff *skb)
+ {
+-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
++#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
+ skb->nf_trace = 0;
+ #endif
+ }
+@@ -4370,7 +4482,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
+ dst->_nfct = src->_nfct;
+ nf_conntrack_get(skb_nfct(src));
+ #endif
+-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
++#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
+ if (copy)
+ dst->nf_trace = src->nf_trace;
+ #endif
+diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
+index 1ce9a9eb223b6..4273505d309a7 100644
+--- a/include/linux/skmsg.h
++++ b/include/linux/skmsg.h
+@@ -63,6 +63,7 @@ struct sk_psock_progs {
+
+ enum sk_psock_state_bits {
+ SK_PSOCK_TX_ENABLED,
++ SK_PSOCK_RX_STRP_ENABLED,
+ };
+
+ struct sk_psock_link {
+@@ -83,6 +84,7 @@ struct sk_psock {
+ u32 apply_bytes;
+ u32 cork_bytes;
+ u32 eval;
++ bool redir_ingress; /* undefined if sk_redir is null */
+ struct sk_msg *cork;
+ struct sk_psock_progs progs;
+ #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+@@ -96,6 +98,7 @@ struct sk_psock {
+ spinlock_t link_lock;
+ refcount_t refcnt;
+ void (*saved_unhash)(struct sock *sk);
++ void (*saved_destroy)(struct sock *sk);
+ void (*saved_close)(struct sock *sk, long timeout);
+ void (*saved_write_space)(struct sock *sk);
+ void (*saved_data_ready)(struct sock *sk);
+@@ -283,7 +286,8 @@ static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start)
+
+ static inline struct sk_psock *sk_psock(const struct sock *sk)
+ {
+- return rcu_dereference_sk_user_data(sk);
++ return __rcu_dereference_sk_user_data_with_flags(sk,
++ SK_USER_DATA_PSOCK);
+ }
+
+ static inline void sk_psock_set_state(struct sk_psock *psock,
+@@ -310,21 +314,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
+ kfree_skb(skb);
+ }
+
+-static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
+-{
+- if (msg->skb)
+- sock_drop(psock->sk, msg->skb);
+- kfree(msg);
+-}
+-
+ static inline void sk_psock_queue_msg(struct sk_psock *psock,
+ struct sk_msg *msg)
+ {
+ spin_lock_bh(&psock->ingress_lock);
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ list_add_tail(&msg->list, &psock->ingress_msg);
+- else
+- drop_sk_msg(psock, msg);
++ else {
++ sk_msg_free(psock->sk, msg);
++ kfree(msg);
++ }
+ spin_unlock_bh(&psock->ingress_lock);
+ }
+
+@@ -385,7 +384,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
+ }
+
+ struct sk_psock *sk_psock_init(struct sock *sk, int node);
+-void sk_psock_stop(struct sk_psock *psock, bool wait);
++void sk_psock_stop(struct sk_psock *psock);
+
+ #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
+@@ -509,8 +508,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
+
+ #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
+
+-/* We only have one bit so far. */
+-#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
++#define BPF_F_STRPARSER (1UL << 1)
++
++/* We only have two bits so far. */
++#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER)
++
++static inline bool skb_bpf_strparser(const struct sk_buff *skb)
++{
++ unsigned long sk_redir = skb->_sk_redir;
++
++ return sk_redir & BPF_F_STRPARSER;
++}
++
++static inline void skb_bpf_set_strparser(struct sk_buff *skb)
++{
++ skb->_sk_redir |= BPF_F_STRPARSER;
++}
+
+ static inline bool skb_bpf_ingress(const struct sk_buff *skb)
+ {
+diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h
+index fa1d6af0164ee..d683251a0b409 100644
+--- a/include/linux/soc/marvell/octeontx2/asm.h
++++ b/include/linux/soc/marvell/octeontx2/asm.h
+@@ -5,6 +5,7 @@
+ #ifndef __SOC_OTX2_ASM_H
+ #define __SOC_OTX2_ASM_H
+
++#include <linux/types.h>
+ #if defined(CONFIG_ARM64)
+ /*
+ * otx2_lmt_flush is used for LMT store operation.
+@@ -34,9 +35,23 @@
+ : [rf] "+r"(val) \
+ : [rs] "r"(addr)); \
+ })
++
++static inline u64 otx2_atomic64_fetch_add(u64 incr, u64 *ptr)
++{
++ u64 result;
++
++ asm volatile (".cpu generic+lse\n"
++ "ldadda %x[i], %x[r], [%[b]]"
++ : [r] "=r" (result), "+m" (*ptr)
++ : [i] "r" (incr), [b] "r" (ptr)
++ : "memory");
++ return result;
++}
++
+ #else
+ #define otx2_lmt_flush(ioaddr) ({ 0; })
+ #define cn10k_lmt_flush(val, addr) ({ addr = val; })
++#define otx2_atomic64_fetch_add(incr, ptr) ({ incr; })
+ #endif
+
+ #endif /* __SOC_OTX2_ASM_H */
+diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
+index 137f9f2ac4c3c..7bca213a3f838 100644
+--- a/include/linux/soc/qcom/apr.h
++++ b/include/linux/soc/qcom/apr.h
+@@ -79,6 +79,15 @@ struct apr_resp_pkt {
+ #define APR_SVC_MAJOR_VERSION(v) ((v >> 16) & 0xFF)
+ #define APR_SVC_MINOR_VERSION(v) (v & 0xFF)
+
++struct packet_router;
++struct pkt_router_svc {
++ struct device *dev;
++ struct packet_router *pr;
++ spinlock_t lock;
++ int id;
++ void *priv;
++};
++
+ struct apr_device {
+ struct device dev;
+ uint16_t svc_id;
+@@ -86,11 +95,12 @@ struct apr_device {
+ uint32_t version;
+ char name[APR_NAME_SIZE];
+ const char *service_path;
+- spinlock_t lock;
++ struct pkt_router_svc svc;
+ struct list_head node;
+ };
+
+ #define to_apr_device(d) container_of(d, struct apr_device, dev)
++#define svc_to_apr_device(d) container_of(d, struct apr_device, svc)
+
+ struct apr_driver {
+ int (*probe)(struct apr_device *sl);
+diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h
+new file mode 100644
+index 0000000000000..3c2a82e606f81
+--- /dev/null
++++ b/include/linux/soc/qcom/qcom_aoss.h
+@@ -0,0 +1,38 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
++ */
++
++#ifndef __QCOM_AOSS_H__
++#define __QCOM_AOSS_H__
++
++#include <linux/err.h>
++#include <linux/device.h>
++
++struct qmp;
++
++#if IS_ENABLED(CONFIG_QCOM_AOSS_QMP)
++
++int qmp_send(struct qmp *qmp, const void *data, size_t len);
++struct qmp *qmp_get(struct device *dev);
++void qmp_put(struct qmp *qmp);
++
++#else
++
++static inline int qmp_send(struct qmp *qmp, const void *data, size_t len)
++{
++ return -ENODEV;
++}
++
++static inline struct qmp *qmp_get(struct device *dev)
++{
++ return ERR_PTR(-ENODEV);
++}
++
++static inline void qmp_put(struct qmp *qmp)
++{
++}
++
++#endif
++
++#endif
+diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
+index 0aad7009b50e6..bd0d11af76c5e 100644
+--- a/include/linux/soc/ti/ti_sci_protocol.h
++++ b/include/linux/soc/ti/ti_sci_protocol.h
+@@ -645,7 +645,7 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle,
+
+ static inline struct ti_sci_resource *
+ devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev,
+- u32 dev_id, u32 sub_type);
++ u32 dev_id, u32 sub_type)
+ {
+ return ERR_PTR(-EINVAL);
+ }
+diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
+index 76ce3f3ac0f22..bf6f0decb3f6d 100644
+--- a/include/linux/soundwire/sdw.h
++++ b/include/linux/soundwire/sdw.h
+@@ -646,9 +646,6 @@ struct sdw_slave_ops {
+ * @dev_num: Current Device Number, values can be 0 or dev_num_sticky
+ * @dev_num_sticky: one-time static Device Number assigned by Bus
+ * @probed: boolean tracking driver state
+- * @probe_complete: completion utility to control potential races
+- * on startup between driver probe/initialization and SoundWire
+- * Slave state changes/implementation-defined interrupts
+ * @enumeration_complete: completion utility to control potential races
+ * on startup between device enumeration and read/write access to the
+ * Slave device
+@@ -663,6 +660,7 @@ struct sdw_slave_ops {
+ * for a Slave happens for the first time after enumeration
+ * @is_mockup_device: status flag used to squelch errors in the command/control
+ * protocol for SoundWire mockup devices
++ * @sdw_dev_lock: mutex used to protect callbacks/remove races
+ */
+ struct sdw_slave {
+ struct sdw_slave_id id;
+@@ -680,12 +678,12 @@ struct sdw_slave {
+ u16 dev_num;
+ u16 dev_num_sticky;
+ bool probed;
+- struct completion probe_complete;
+ struct completion enumeration_complete;
+ struct completion initialization_complete;
+ u32 unattach_request;
+ bool first_interrupt_done;
+ bool is_mockup_device;
++ struct mutex sdw_dev_lock; /* protect callbacks/remove races */
+ };
+
+ #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev)
+diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
+index 6bb4bc1a5f545..22919a94ca19d 100644
+--- a/include/linux/stackdepot.h
++++ b/include/linux/stackdepot.h
+@@ -19,8 +19,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
+ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
+ unsigned long **entries);
+
+-unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
+-
+ #ifdef CONFIG_STACKDEPOT
+ int stack_depot_init(void);
+ #else
+diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
+index 9edecb494e9e2..bef158815e83d 100644
+--- a/include/linux/stacktrace.h
++++ b/include/linux/stacktrace.h
+@@ -21,6 +21,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task,
+ unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
+ unsigned int size, unsigned int skipnr);
+ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
++unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
+
+ /* Internal interfaces. Do not use in generic code */
+ #ifdef CONFIG_ARCH_STACKWALK
+diff --git a/include/linux/static_call.h b/include/linux/static_call.h
+index 3e56a9751c062..fcc5b48989b3c 100644
+--- a/include/linux/static_call.h
++++ b/include/linux/static_call.h
+@@ -248,10 +248,7 @@ static inline int static_call_text_reserved(void *start, void *end)
+ return 0;
+ }
+
+-static inline long __static_call_return0(void)
+-{
+- return 0;
+-}
++extern long __static_call_return0(void);
+
+ #define EXPORT_STATIC_CALL(name) \
+ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
+diff --git a/include/linux/stddef.h b/include/linux/stddef.h
+index 998a4ba28eba4..31fdbb784c24e 100644
+--- a/include/linux/stddef.h
++++ b/include/linux/stddef.h
+@@ -36,4 +36,65 @@ enum {
+ #define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
++/**
++ * struct_group() - Wrap a set of declarations in a mirrored struct
++ *
++ * @NAME: The identifier name of the mirrored sub-struct
++ * @MEMBERS: The member declarations for the mirrored structs
++ *
++ * Used to create an anonymous union of two structs with identical
++ * layout and size: one anonymous and one named. The former can be
++ * used normally without sub-struct naming, and the latter can be
++ * used to reason about the start, end, and size of the group of
++ * struct members.
++ */
++#define struct_group(NAME, MEMBERS...) \
++ __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS)
++
++/**
++ * struct_group_attr() - Create a struct_group() with trailing attributes
++ *
++ * @NAME: The identifier name of the mirrored sub-struct
++ * @ATTRS: Any struct attributes to apply
++ * @MEMBERS: The member declarations for the mirrored structs
++ *
++ * Used to create an anonymous union of two structs with identical
++ * layout and size: one anonymous and one named. The former can be
++ * used normally without sub-struct naming, and the latter can be
++ * used to reason about the start, end, and size of the group of
++ * struct members. Includes structure attributes argument.
++ */
++#define struct_group_attr(NAME, ATTRS, MEMBERS...) \
++ __struct_group(/* no tag */, NAME, ATTRS, MEMBERS)
++
++/**
++ * struct_group_tagged() - Create a struct_group with a reusable tag
++ *
++ * @TAG: The tag name for the named sub-struct
++ * @NAME: The identifier name of the mirrored sub-struct
++ * @MEMBERS: The member declarations for the mirrored structs
++ *
++ * Used to create an anonymous union of two structs with identical
++ * layout and size: one anonymous and one named. The former can be
++ * used normally without sub-struct naming, and the latter can be
++ * used to reason about the start, end, and size of the group of
++ * struct members. Includes struct tag argument for the named copy,
++ * so the specified layout can be reused later.
++ */
++#define struct_group_tagged(TAG, NAME, MEMBERS...) \
++ __struct_group(TAG, NAME, /* no attrs */, MEMBERS)
++
++/**
++ * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
++ *
++ * @TYPE: The type of each flexible array element
++ * @NAME: The name of the flexible array member
++ *
++ * In order to have a flexible array member in a union or alone in a
++ * struct, it needs to be wrapped in an anonymous struct with at least 1
++ * named member, but that member can be empty.
++ */
++#define DECLARE_FLEX_ARRAY(TYPE, NAME) \
++ __DECLARE_FLEX_ARRAY(TYPE, NAME)
++
+ #endif
+diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
+index a6f03b36fc4f7..24bc3f7967c3b 100644
+--- a/include/linux/stmmac.h
++++ b/include/linux/stmmac.h
+@@ -233,6 +233,7 @@ struct plat_stmmacenet_data {
+ int (*clks_config)(void *priv, bool enabled);
+ int (*crosststamp)(ktime_t *device, struct system_counterval_t *system,
+ void *ctx);
++ void (*dump_debug_regs)(void *priv);
+ void *bsp_priv;
+ struct clk *stmmac_clk;
+ struct clk *pclk;
+@@ -250,6 +251,7 @@ struct plat_stmmacenet_data {
+ int rss_en;
+ int mac_port_sel_speed;
+ bool en_tx_lpi_clockgating;
++ bool rx_clk_runs_in_lpi;
+ int has_xgmac;
+ bool vlan_fail_q_en;
+ u8 vlan_fail_q;
+@@ -258,6 +260,7 @@ struct plat_stmmacenet_data {
+ bool has_crossts;
+ int int_snapshot_num;
+ int ext_snapshot_num;
++ bool int_snapshot_en;
+ bool ext_snapshot_en;
+ bool multi_msi_en;
+ int msi_mac_vec;
+@@ -268,5 +271,6 @@ struct plat_stmmacenet_data {
+ int msi_rx_base_vec;
+ int msi_tx_base_vec;
+ bool use_phy_wol;
++ bool sph_disable;
+ };
+ #endif
+diff --git a/include/linux/string.h b/include/linux/string.h
+index 5e96d656be7ae..d68097b4f600b 100644
+--- a/include/linux/string.h
++++ b/include/linux/string.h
+@@ -262,23 +262,8 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o
+ #include <linux/fortify-string.h>
+ #endif
+
+-/**
+- * memcpy_and_pad - Copy one buffer to another with padding
+- * @dest: Where to copy to
+- * @dest_len: The destination buffer size
+- * @src: Where to copy from
+- * @count: The number of bytes to copy
+- * @pad: Character to use for padding if space is left in destination.
+- */
+-static inline void memcpy_and_pad(void *dest, size_t dest_len,
+- const void *src, size_t count, int pad)
+-{
+- if (dest_len > count) {
+- memcpy(dest, src, count);
+- memset(dest + count, pad, dest_len - count);
+- } else
+- memcpy(dest, src, dest_len);
+-}
++void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
++ int pad);
+
+ /**
+ * str_has_prefix - Test if a string has a given prefix
+diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
+index a4661646adc9c..9fcf5ffc4f9ad 100644
+--- a/include/linux/sunrpc/clnt.h
++++ b/include/linux/sunrpc/clnt.h
+@@ -159,6 +159,7 @@ struct rpc_add_xprt_test {
+ #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9)
+ #define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
+ #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
++#define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
+
+ struct rpc_clnt *rpc_create(struct rpc_create_args *args);
+ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
+diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
+index cd188a527d169..3b35b6f6533aa 100644
+--- a/include/linux/sunrpc/rpc_pipe_fs.h
++++ b/include/linux/sunrpc/rpc_pipe_fs.h
+@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
+ char __user *, size_t);
+ extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
+
++/* returns true if the msg is in-flight, i.e., already eaten by the peer */
++static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
++ return (msg->copied != 0 && list_empty(&msg->list));
++}
++
+ struct rpc_clnt;
+ extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
+ extern int rpc_remove_client_dir(struct rpc_clnt *);
+diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
+index a237b8dbf6086..ba047a145e092 100644
+--- a/include/linux/sunrpc/sched.h
++++ b/include/linux/sunrpc/sched.h
+@@ -90,8 +90,7 @@ struct rpc_task {
+ #endif
+ unsigned char tk_priority : 2,/* Task priority */
+ tk_garb_retry : 2,
+- tk_cred_retry : 2,
+- tk_rebind_retry : 2;
++ tk_cred_retry : 2;
+ };
+
+ typedef void (*rpc_action)(struct rpc_task *);
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 064c96157d1f0..664a54e330af3 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -246,7 +246,6 @@ struct svc_rqst {
+ void * rq_xprt_ctxt; /* transport specific context ptr */
+ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
+
+- size_t rq_xprt_hlen; /* xprt header len */
+ struct xdr_buf rq_arg;
+ struct xdr_stream rq_arg_stream;
+ struct xdr_stream rq_res_stream;
+@@ -384,8 +383,8 @@ struct svc_deferred_req {
+ size_t addrlen;
+ struct sockaddr_storage daddr; /* where reply must come from */
+ size_t daddrlen;
++ void *xprt_ctxt;
+ struct cache_deferred_req handle;
+- size_t xprt_hlen;
+ int argslen;
+ __be32 args[];
+ };
+@@ -532,8 +531,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp,
+ unsigned int offset,
+ unsigned int length);
+ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
+- struct page **pages,
+- struct kvec *first, size_t total);
++ struct xdr_buf *payload);
+ char *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
+ struct kvec *first, void *p,
+ size_t total);
+@@ -566,16 +564,27 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
+ }
+
+ /**
+- * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding
++ * svcxdr_init_decode - Prepare an xdr_stream for Call decoding
+ * @rqstp: controlling server RPC transaction context
+ *
++ * This function currently assumes the RPC header in rq_arg has
++ * already been decoded. Upon return, xdr->p points to the
++ * location of the upper layer header.
+ */
+ static inline void svcxdr_init_decode(struct svc_rqst *rqstp)
+ {
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+- struct kvec *argv = rqstp->rq_arg.head;
++ struct xdr_buf *buf = &rqstp->rq_arg;
++ struct kvec *argv = buf->head;
+
+- xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL);
++ /*
++ * svc_getnl() and friends do not keep the xdr_buf's ::len
++ * field up to date. Refresh that field before initializing
++ * the argument decoding stream.
++ */
++ buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len;
++
++ xdr_init_decode(xdr, buf, argv->iov_base, NULL);
+ xdr_set_scratch_page(xdr, rqstp->rq_scratch_page);
+ }
+
+@@ -598,7 +607,7 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
+ xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
+ buf->len = resv->iov_len;
+ xdr->page_ptr = buf->pages - 1;
+- buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages);
++ buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages);
+ buf->buflen -= rqstp->rq_auth_slack;
+ xdr->rqst = NULL;
+ }
+diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
+index 24aa159d29a7f..fbc4bd423b355 100644
+--- a/include/linux/sunrpc/svc_rdma.h
++++ b/include/linux/sunrpc/svc_rdma.h
+@@ -176,7 +176,7 @@ extern struct svc_rdma_recv_ctxt *
+ extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *ctxt);
+ extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
+-extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
++extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
+ extern int svc_rdma_recvfrom(struct svc_rqst *);
+
+ /* svc_rdma_rw.c */
+diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
+index 571f605bc91ef..154eee6bc6a01 100644
+--- a/include/linux/sunrpc/svc_xprt.h
++++ b/include/linux/sunrpc/svc_xprt.h
+@@ -23,7 +23,7 @@ struct svc_xprt_ops {
+ int (*xpo_sendto)(struct svc_rqst *);
+ int (*xpo_result_payload)(struct svc_rqst *, unsigned int,
+ unsigned int);
+- void (*xpo_release_rqst)(struct svc_rqst *);
++ void (*xpo_release_ctxt)(struct svc_xprt *xprt, void *ctxt);
+ void (*xpo_detach)(struct svc_xprt *);
+ void (*xpo_free)(struct svc_xprt *);
+ void (*xpo_secure_port)(struct svc_rqst *rqstp);
+diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
+index b519609af1d02..3a2c714d6b629 100644
+--- a/include/linux/sunrpc/xdr.h
++++ b/include/linux/sunrpc/xdr.h
+@@ -405,8 +405,8 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
+ */
+ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
+ {
+- *p = n ? xdr_one : xdr_zero;
+- return p++;
++ *p++ = n ? xdr_one : xdr_zero;
++ return p;
+ }
+
+ /**
+@@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr,
+
+ if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
+ return -EBADMSG;
++ if (len > SIZE_MAX / sizeof(*p))
++ return -EBADMSG;
+ p = xdr_inline_decode(xdr, len * sizeof(*p));
+ if (unlikely(!p))
+ return -EBADMSG;
+diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
+index 8c2a712cb2420..689062afdd610 100644
+--- a/include/linux/sunrpc/xprtsock.h
++++ b/include/linux/sunrpc/xprtsock.h
+@@ -89,5 +89,6 @@ struct sock_xprt {
+ #define XPRT_SOCK_WAKE_WRITE (5)
+ #define XPRT_SOCK_WAKE_PENDING (6)
+ #define XPRT_SOCK_WAKE_DISCONNECT (7)
++#define XPRT_SOCK_CONNECT_SENT (8)
+
+ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
+diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h
+index 068e1982ad371..74bfdffaf7b0e 100644
+--- a/include/linux/surface_aggregator/controller.h
++++ b/include/linux/surface_aggregator/controller.h
+@@ -792,8 +792,8 @@ enum ssam_event_mask {
+ #define SSAM_EVENT_REGISTRY_KIP \
+ SSAM_EVENT_REGISTRY(SSAM_SSH_TC_KIP, 0x02, 0x27, 0x28)
+
+-#define SSAM_EVENT_REGISTRY_REG \
+- SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, 0x02, 0x01, 0x02)
++#define SSAM_EVENT_REGISTRY_REG(tid)\
++ SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, tid, 0x01, 0x02)
+
+ /**
+ * enum ssam_event_notifier_flags - Flags for event notifiers.
+diff --git a/include/linux/suspend.h b/include/linux/suspend.h
+index 8af13ba60c7e4..4bcd65679cee0 100644
+--- a/include/linux/suspend.h
++++ b/include/linux/suspend.h
+@@ -430,15 +430,7 @@ struct platform_hibernation_ops {
+
+ #ifdef CONFIG_HIBERNATION
+ /* kernel/power/snapshot.c */
+-extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
+-static inline void __init register_nosave_region(unsigned long b, unsigned long e)
+-{
+- __register_nosave_region(b, e, 0);
+-}
+-static inline void __init register_nosave_region_late(unsigned long b, unsigned long e)
+-{
+- __register_nosave_region(b, e, 1);
+-}
++extern void register_nosave_region(unsigned long b, unsigned long e);
+ extern int swsusp_page_is_forbidden(struct page *);
+ extern void swsusp_set_page_free(struct page *);
+ extern void swsusp_unset_page_free(struct page *);
+@@ -457,7 +449,6 @@ int pfn_is_nosave(unsigned long pfn);
+ int hibernate_quiet_exec(int (*func)(void *data), void *data);
+ #else /* CONFIG_HIBERNATION */
+ static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+-static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
+ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
+ static inline void swsusp_set_page_free(struct page *p) {}
+ static inline void swsusp_unset_page_free(struct page *p) {}
+@@ -505,14 +496,14 @@ extern void ksys_sync_helper(void);
+
+ /* drivers/base/power/wakeup.c */
+ extern bool events_check_enabled;
+-extern unsigned int pm_wakeup_irq;
+ extern suspend_state_t pm_suspend_target_state;
+
+ extern bool pm_wakeup_pending(void);
+ extern void pm_system_wakeup(void);
+ extern void pm_system_cancel_wakeup(void);
+-extern void pm_wakeup_clear(bool reset);
++extern void pm_wakeup_clear(unsigned int irq_number);
+ extern void pm_system_irq_wakeup(unsigned int irq_number);
++extern unsigned int pm_wakeup_irq(void);
+ extern bool pm_get_wakeup_count(unsigned int *count, bool block);
+ extern bool pm_save_wakeup_count(unsigned int count);
+ extern void pm_wakep_autosleep_enabled(bool set);
+diff --git a/include/linux/swap.h b/include/linux/swap.h
+index ba52f3a3478e3..4efd267e2937e 100644
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -378,7 +378,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
+ extern unsigned long zone_reclaimable_pages(struct zone *zone);
+ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+ gfp_t gfp_mask, nodemask_t *mask);
+-extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+ unsigned long nr_pages,
+ gfp_t gfp_mask,
+diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
+index b0cb2a9973f49..569272871375c 100644
+--- a/include/linux/swiotlb.h
++++ b/include/linux/swiotlb.h
+@@ -45,7 +45,8 @@ extern void __init swiotlb_update_mem_attributes(void);
+
+ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
+ size_t mapping_size, size_t alloc_size,
+- enum dma_data_direction dir, unsigned long attrs);
++ unsigned int alloc_aligned_mask, enum dma_data_direction dir,
++ unsigned long attrs);
+
+ extern void swiotlb_tbl_unmap_single(struct device *hwdev,
+ phys_addr_t tlb_addr,
+diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
+index 1fa2b69c6fc3d..47cf70c8eb93c 100644
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -38,9 +38,16 @@ struct ctl_table_header;
+ struct ctl_dir;
+
+ /* Keep the same order as in fs/proc/proc_sysctl.c */
+-#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
+-#define SYSCTL_ONE ((void *)&sysctl_vals[1])
+-#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
++#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0])
++#define SYSCTL_ZERO ((void *)&sysctl_vals[1])
++#define SYSCTL_ONE ((void *)&sysctl_vals[2])
++#define SYSCTL_TWO ((void *)&sysctl_vals[3])
++#define SYSCTL_FOUR ((void *)&sysctl_vals[4])
++#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5])
++#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6])
++#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7])
++#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8])
++#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9])
+
+ extern const int sysctl_vals[];
+
+@@ -199,6 +206,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
+ void unregister_sysctl_table(struct ctl_table_header * table);
+
+ extern int sysctl_init(void);
++extern void __register_sysctl_init(const char *path, struct ctl_table *table,
++ const char *table_name);
++#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+ void do_sysctl_args(void);
+
+ extern int pwrsw_enabled;
+diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h
+index b0dcfa26d07bd..c1ef5fc60a3cb 100644
+--- a/include/linux/sysfb.h
++++ b/include/linux/sysfb.h
+@@ -55,14 +55,31 @@ struct efifb_dmi_info {
+ int flags;
+ };
+
++#ifdef CONFIG_SYSFB
++
++void sysfb_disable(void);
++
++#else /* CONFIG_SYSFB */
++
++static inline void sysfb_disable(void)
++{
++}
++
++#endif /* CONFIG_SYSFB */
++
+ #ifdef CONFIG_EFI
+
+ extern struct efifb_dmi_info efifb_dmi_list[];
+-void sysfb_apply_efi_quirks(struct platform_device *pd);
++void sysfb_apply_efi_quirks(void);
++void sysfb_set_efifb_fwnode(struct platform_device *pd);
+
+ #else /* CONFIG_EFI */
+
+-static inline void sysfb_apply_efi_quirks(struct platform_device *pd)
++static inline void sysfb_apply_efi_quirks(void)
++{
++}
++
++static inline void sysfb_set_efifb_fwnode(struct platform_device *pd)
+ {
+ }
+
+@@ -72,8 +89,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd)
+
+ bool sysfb_parse_mode(const struct screen_info *si,
+ struct simplefb_platform_data *mode);
+-int sysfb_create_simplefb(const struct screen_info *si,
+- const struct simplefb_platform_data *mode);
++struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
++ const struct simplefb_platform_data *mode);
+
+ #else /* CONFIG_SYSFB_SIMPLE */
+
+@@ -83,10 +100,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si,
+ return false;
+ }
+
+-static inline int sysfb_create_simplefb(const struct screen_info *si,
+- const struct simplefb_platform_data *mode)
++static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
++ const struct simplefb_platform_data *mode)
+ {
+- return -EINVAL;
++ return ERR_PTR(-EINVAL);
+ }
+
+ #endif /* CONFIG_SYSFB_SIMPLE */
+diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h
+index b0d36a9934ccd..5cf6f6f82aa70 100644
+--- a/include/linux/tca6416_keypad.h
++++ b/include/linux/tca6416_keypad.h
+@@ -25,7 +25,6 @@ struct tca6416_keys_platform_data {
+ unsigned int rep:1; /* enable input subsystem auto repeat */
+ uint16_t pinmask;
+ uint16_t invert;
+- int irq_is_gpio;
+ int use_polling; /* use polling if Interrupt is not connected*/
+ };
+ #endif
+diff --git a/include/linux/tcp.h b/include/linux/tcp.h
+index 48d8a363319e5..07a84ae6bf81c 100644
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -265,7 +265,7 @@ struct tcp_sock {
+ u32 packets_out; /* Packets which are "in flight" */
+ u32 retrans_out; /* Retransmitted packets out */
+ u32 max_packets_out; /* max packets_out in last window */
+- u32 max_packets_seq; /* right edge of max_packets_out flight */
++ u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */
+
+ u16 urg_data; /* Saved octet of OOB data and control flags */
+ u8 ecn_flags; /* ECN status bits. */
+@@ -473,7 +473,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+ int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);
+
+- queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
++ WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn));
+ }
+
+ static inline void tcp_move_syn(struct tcp_sock *tp,
+diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
+index 3ebfea0781f10..38b701b7af4cf 100644
+--- a/include/linux/tee_drv.h
++++ b/include/linux/tee_drv.h
+@@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method,
+ * @offset: offset of buffer in user space
+ * @pages: locked pages from userspace
+ * @num_pages: number of locked pages
+- * @dmabuf: dmabuf used to for exporting to user space
++ * @refcount: reference counter
+ * @flags: defined by TEE_SHM_* in tee_drv.h
+ * @id: unique id of a shared memory object on this device
+ *
+@@ -210,7 +210,7 @@ struct tee_shm {
+ unsigned int offset;
+ struct page **pages;
+ size_t num_pages;
+- struct dma_buf *dmabuf;
++ refcount_t refcount;
+ u32 flags;
+ int id;
+ };
+@@ -582,4 +582,18 @@ struct tee_client_driver {
+ #define to_tee_client_driver(d) \
+ container_of(d, struct tee_client_driver, driver)
+
++/**
++ * teedev_open() - Open a struct tee_device
++ * @teedev: Device to open
++ *
++ * @return a pointer to struct tee_context on success or an ERR_PTR on failure.
++ */
++struct tee_context *teedev_open(struct tee_device *teedev);
++
++/**
++ * teedev_close_context() - closes a struct tee_context
++ * @ctx: The struct tee_context to close
++ */
++void teedev_close_context(struct tee_context *ctx);
++
+ #endif /*__TEE_DRV_H*/
+diff --git a/include/linux/thermal.h b/include/linux/thermal.h
+index c314893970b35..b94314ed0c965 100644
+--- a/include/linux/thermal.h
++++ b/include/linux/thermal.h
+@@ -92,6 +92,7 @@ struct thermal_cooling_device_ops {
+ struct thermal_cooling_device {
+ int id;
+ char *type;
++ unsigned long max_state;
+ struct device device;
+ struct device_node *np;
+ void *devdata;
+@@ -113,7 +114,7 @@ struct thermal_cooling_device {
+ * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis
+ * @mode: current mode of this thermal zone
+ * @devdata: private pointer for device private data
+- * @trips: number of trip points the thermal zone supports
++ * @num_trips: number of trip points the thermal zone supports
+ * @trips_disabled; bitmap for disabled trips
+ * @passive_delay_jiffies: number of jiffies to wait between polls when
+ * performing passive cooling.
+@@ -153,7 +154,7 @@ struct thermal_zone_device {
+ struct thermal_attr *trip_hyst_attrs;
+ enum thermal_device_mode mode;
+ void *devdata;
+- int trips;
++ int num_trips;
+ unsigned long trips_disabled; /* bitmap for disabled trips */
+ unsigned long passive_delay_jiffies;
+ unsigned long polling_delay_jiffies;
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index 0999f6317978f..9a073535c0bdd 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -118,6 +118,15 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
+ return test_bit(flag, (unsigned long *)&ti->flags);
+ }
+
++/*
++ * This may be used in noinstr code, and needs to be __always_inline to prevent
++ * inadvertent instrumentation.
++ */
++static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti)
++{
++ return READ_ONCE(ti->flags);
++}
++
+ #define set_thread_flag(flag) \
+ set_ti_thread_flag(current_thread_info(), flag)
+ #define clear_thread_flag(flag) \
+@@ -130,6 +139,11 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
+ test_and_clear_ti_thread_flag(current_thread_info(), flag)
+ #define test_thread_flag(flag) \
+ test_ti_thread_flag(current_thread_info(), flag)
++#define read_thread_flags() \
++ read_ti_thread_flags(current_thread_info())
++
++#define read_task_thread_flags(t) \
++ read_ti_thread_flags(task_thread_info(t))
+
+ #ifdef CONFIG_GENERIC_ENTRY
+ #define set_syscall_work(fl) \
+diff --git a/include/linux/tick.h b/include/linux/tick.h
+index bfd571f18cfdc..9459fef5b8573 100644
+--- a/include/linux/tick.h
++++ b/include/linux/tick.h
+@@ -216,6 +216,7 @@ extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
+ enum tick_dep_bits bit);
+ extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
+ enum tick_dep_bits bit);
++extern bool tick_nohz_cpu_hotpluggable(unsigned int cpu);
+
+ /*
+ * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
+@@ -280,6 +281,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
+
+ static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
+ static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
++static inline bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { return true; }
+
+ static inline void tick_dep_set(enum tick_dep_bits bit) { }
+ static inline void tick_dep_clear(enum tick_dep_bits bit) { }
+diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
+index 93884086f3924..adc80e29168ea 100644
+--- a/include/linux/timerqueue.h
++++ b/include/linux/timerqueue.h
+@@ -35,7 +35,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
+ {
+ struct rb_node *leftmost = rb_first_cached(&head->rb_root);
+
+- return rb_entry(leftmost, struct timerqueue_node, node);
++ return rb_entry_safe(leftmost, struct timerqueue_node, node);
+ }
+
+ static inline void timerqueue_init(struct timerqueue_node *node)
+diff --git a/include/linux/timex.h b/include/linux/timex.h
+index 059b18eb1f1fa..3871b06bd302c 100644
+--- a/include/linux/timex.h
++++ b/include/linux/timex.h
+@@ -62,6 +62,8 @@
+ #include <linux/types.h>
+ #include <linux/param.h>
+
++unsigned long random_get_entropy_fallback(void);
++
+ #include <asm/timex.h>
+
+ #ifndef random_get_entropy
+@@ -74,8 +76,14 @@
+ *
+ * By default we use get_cycles() for this purpose, but individual
+ * architectures may override this in their asm/timex.h header file.
++ * If a given arch does not have get_cycles(), then we fallback to
++ * using random_get_entropy_fallback().
+ */
+-#define random_get_entropy() get_cycles()
++#ifdef get_cycles
++#define random_get_entropy() ((unsigned long)get_cycles())
++#else
++#define random_get_entropy() random_get_entropy_fallback()
++#endif
+ #endif
+
+ /*
+diff --git a/include/linux/torture.h b/include/linux/torture.h
+index 0910c5803f35a..24f58e50a94b8 100644
+--- a/include/linux/torture.h
++++ b/include/linux/torture.h
+@@ -47,6 +47,14 @@ do { \
+ } while (0)
+ void verbose_torout_sleep(void);
+
++#define torture_init_error(firsterr) \
++({ \
++ int ___firsterr = (firsterr); \
++ \
++ WARN_ONCE(!IS_MODULE(CONFIG_RCU_TORTURE_TEST) && ___firsterr < 0, "Torture-test initialization failed with error code %d\n", ___firsterr); \
++ ___firsterr < 0; \
++})
++
+ /* Definitions for online/offline exerciser. */
+ #ifdef CONFIG_HOTPLUG_CPU
+ int torture_num_online_cpus(void);
+diff --git a/include/linux/tpm.h b/include/linux/tpm.h
+index aa11fe323c56b..12d827734686d 100644
+--- a/include/linux/tpm.h
++++ b/include/linux/tpm.h
+@@ -269,6 +269,7 @@ enum tpm2_cc_attrs {
+ #define TPM_VID_INTEL 0x8086
+ #define TPM_VID_WINBOND 0x1050
+ #define TPM_VID_STM 0x104A
++#define TPM_VID_ATML 0x1114
+
+ enum tpm_chip_flags {
+ TPM_CHIP_FLAG_TPM2 = BIT(1),
+diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
+index 739ba9a03ec16..7d68a5cc58816 100644
+--- a/include/linux/tpm_eventlog.h
++++ b/include/linux/tpm_eventlog.h
+@@ -157,7 +157,7 @@ struct tcg_algorithm_info {
+ * Return: size of the event on success, 0 on failure
+ */
+
+-static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
++static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+ struct tcg_pcr_event *event_header,
+ bool do_mapping)
+ {
+@@ -198,8 +198,8 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+ * The loop below will unmap these fields if the log is larger than
+ * one page, so save them here for reference:
+ */
+- count = READ_ONCE(event->count);
+- event_type = READ_ONCE(event->event_type);
++ count = event->count;
++ event_type = event->event_type;
+
+ /* Verify that it's the log header */
+ if (event_header->pcr_idx != 0 ||
+diff --git a/include/linux/trace.h b/include/linux/trace.h
+index bf169612ffe12..2a70a447184c9 100644
+--- a/include/linux/trace.h
++++ b/include/linux/trace.h
+@@ -2,8 +2,6 @@
+ #ifndef _LINUX_TRACE_H
+ #define _LINUX_TRACE_H
+
+-#ifdef CONFIG_TRACING
+-
+ #define TRACE_EXPORT_FUNCTION BIT(0)
+ #define TRACE_EXPORT_EVENT BIT(1)
+ #define TRACE_EXPORT_MARKER BIT(2)
+@@ -28,10 +26,24 @@ struct trace_export {
+ int flags;
+ };
+
++struct trace_array;
++
++#ifdef CONFIG_TRACING
++
+ int register_ftrace_export(struct trace_export *export);
+ int unregister_ftrace_export(struct trace_export *export);
+
+-struct trace_array;
++/**
++ * trace_array_puts - write a constant string into the trace buffer.
++ * @tr: The trace array to write to
++ * @str: The constant string to write
++ */
++#define trace_array_puts(tr, str) \
++ ({ \
++ str ? __trace_array_puts(tr, _THIS_IP_, str, strlen(str)) : -1; \
++ })
++int __trace_array_puts(struct trace_array *tr, unsigned long ip,
++ const char *str, int size);
+
+ void trace_printk_init_buffers(void);
+ __printf(3, 4)
+@@ -48,6 +60,38 @@ void osnoise_arch_unregister(void);
+ void osnoise_trace_irq_entry(int id);
+ void osnoise_trace_irq_exit(int id, const char *desc);
+
++#else /* CONFIG_TRACING */
++static inline int register_ftrace_export(struct trace_export *export)
++{
++ return -EINVAL;
++}
++static inline int unregister_ftrace_export(struct trace_export *export)
++{
++ return 0;
++}
++static inline void trace_printk_init_buffers(void)
++{
++}
++static inline int trace_array_printk(struct trace_array *tr, unsigned long ip,
++ const char *fmt, ...)
++{
++ return 0;
++}
++static inline int trace_array_init_printk(struct trace_array *tr)
++{
++ return -EINVAL;
++}
++static inline void trace_array_put(struct trace_array *tr)
++{
++}
++static inline struct trace_array *trace_array_get_by_name(const char *name)
++{
++ return NULL;
++}
++static inline int trace_array_destroy(struct trace_array *tr)
++{
++ return 0;
++}
+ #endif /* CONFIG_TRACING */
+
+ #endif /* _LINUX_TRACE_H */
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index 3e475eeb5a995..9c91c3531d830 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -91,6 +91,7 @@ struct trace_iterator {
+ unsigned int temp_size;
+ char *fmt; /* modified format holder */
+ unsigned int fmt_size;
++ long wait_index;
+
+ /* trace_seq for __print_flags() and __print_symbolic() etc. */
+ struct trace_seq tmp_seq;
+@@ -673,7 +674,7 @@ struct trace_event_file {
+
+ #define PERF_MAX_TRACE_SIZE 2048
+
+-#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
++#define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */
+
+ enum event_trigger_type {
+ ETT_NONE = (0),
+@@ -845,7 +846,8 @@ extern int perf_uprobe_init(struct perf_event *event,
+ extern void perf_uprobe_destroy(struct perf_event *event);
+ extern int bpf_get_uprobe_info(const struct perf_event *event,
+ u32 *fd_type, const char **filename,
+- u64 *probe_offset, bool perf_type_tracepoint);
++ u64 *probe_offset, u64 *probe_addr,
++ bool perf_type_tracepoint);
+ #endif
+ extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+ char *filter_str);
+diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
+index 28031b15f8783..cd01da18e4050 100644
+--- a/include/linux/tracepoint.h
++++ b/include/linux/tracepoint.h
+@@ -231,12 +231,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ *
+- * When lockdep is enabled, we make sure to always do the RCU portions of
+- * the tracepoint code, regardless of whether tracing is on. However,
+- * don't check if the condition is false, due to interaction with idle
+- * instrumentation. This lets us find RCU issues triggered with tracepoints
+- * even when this tracepoint is off. This code has no purpose other than
+- * poking RCU a bit.
++ * When lockdep is enabled, we make sure to always test if RCU is
++ * "watching" regardless if the tracepoint is enabled or not. Tracepoints
++ * require RCU to be active, and it should always warn at the tracepoint
++ * site if it is not watching, as it will need to be active when the
++ * tracepoint is enabled.
+ */
+ #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
+ extern int __traceiter_##name(data_proto); \
+@@ -249,9 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+ TP_ARGS(args), \
+ TP_CONDITION(cond), 0); \
+ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
+- rcu_read_lock_sched_notrace(); \
+- rcu_dereference_sched(__tracepoint_##name.funcs);\
+- rcu_read_unlock_sched_notrace(); \
++ WARN_ON_ONCE(!rcu_is_watching()); \
+ } \
+ } \
+ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
+diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h
+index 63076fb835e34..2efc271a96fa6 100644
+--- a/include/linux/transport_class.h
++++ b/include/linux/transport_class.h
+@@ -70,8 +70,14 @@ void transport_destroy_device(struct device *);
+ static inline int
+ transport_register_device(struct device *dev)
+ {
++ int ret;
++
+ transport_setup_device(dev);
+- return transport_add_device(dev);
++ ret = transport_add_device(dev);
++ if (ret)
++ transport_destroy_device(dev);
++
++ return ret;
+ }
+
+ static inline void
+diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
+index 32284992b31a0..1fb727b7b969a 100644
+--- a/include/linux/tty_flip.h
++++ b/include/linux/tty_flip.h
+@@ -17,7 +17,6 @@ extern int tty_insert_flip_string_fixed_flag(struct tty_port *port,
+ extern int tty_prepare_flip_string(struct tty_port *port,
+ unsigned char **chars, size_t size);
+ extern void tty_flip_buffer_push(struct tty_port *port);
+-void tty_schedule_flip(struct tty_port *port);
+ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag);
+
+ static inline int tty_insert_flip_char(struct tty_port *port,
+diff --git a/include/linux/uacce.h b/include/linux/uacce.h
+index 48e319f402751..9ce88c28b0a87 100644
+--- a/include/linux/uacce.h
++++ b/include/linux/uacce.h
+@@ -70,6 +70,7 @@ enum uacce_q_state {
+ * @wait: wait queue head
+ * @list: index into uacce queues list
+ * @qfrs: pointer of qfr regions
++ * @mutex: protects queue state
+ * @state: queue state machine
+ * @pasid: pasid associated to the mm
+ * @handle: iommu_sva handle returned by iommu_sva_bind_device()
+@@ -80,6 +81,7 @@ struct uacce_queue {
+ wait_queue_head_t wait;
+ struct list_head list;
+ struct uacce_qfile_region *qfrs[UACCE_MAX_REGION];
++ struct mutex mutex;
+ enum uacce_q_state state;
+ u32 pasid;
+ struct iommu_sva *handle;
+@@ -97,9 +99,9 @@ struct uacce_queue {
+ * @dev_id: id of the uacce device
+ * @cdev: cdev of the uacce
+ * @dev: dev of the uacce
++ * @mutex: protects uacce operation
+ * @priv: private pointer of the uacce
+ * @queues: list of queues
+- * @queues_lock: lock for queues list
+ * @inode: core vfs
+ */
+ struct uacce_device {
+@@ -113,9 +115,9 @@ struct uacce_device {
+ u32 dev_id;
+ struct cdev *cdev;
+ struct device dev;
++ struct mutex mutex;
+ void *priv;
+ struct list_head queues;
+- struct mutex queues_lock;
+ struct inode *inode;
+ };
+
+diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
+index ac0394087f7d4..e1d59ca6530da 100644
+--- a/include/linux/uaccess.h
++++ b/include/linux/uaccess.h
+@@ -338,6 +338,10 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
+ size_t size = min(ksize, usize);
+ size_t rest = max(ksize, usize) - size;
+
++ /* Double check if ksize is larger than a known object size. */
++ if (WARN_ON_ONCE(ksize > __builtin_object_size(dst, 1)))
++ return -E2BIG;
++
+ /* Deal with trailing bytes. */
+ if (usize < ksize) {
+ memset(dst + size, 0, rest);
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index ae66dadd85434..0727276e7538c 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -75,6 +75,7 @@ struct udp_sock {
+ * For encapsulation sockets.
+ */
+ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
++ void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
+ int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
+ void (*encap_destroy)(struct sock *sk);
+
+diff --git a/include/linux/uio.h b/include/linux/uio.h
+index 207101a9c5c32..6350354f97e90 100644
+--- a/include/linux/uio.h
++++ b/include/linux/uio.h
+@@ -35,6 +35,7 @@ struct iov_iter_state {
+
+ struct iov_iter {
+ u8 iter_type;
++ bool nofault;
+ bool data_source;
+ size_t iov_offset;
+ size_t count;
+@@ -133,7 +134,8 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+ size_t bytes, struct iov_iter *i);
+ void iov_iter_advance(struct iov_iter *i, size_t bytes);
+ void iov_iter_revert(struct iov_iter *i, size_t bytes);
+-int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes);
++size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
++size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
+ size_t iov_iter_single_seg_count(const struct iov_iter *i);
+ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+ struct iov_iter *i);
+diff --git a/include/linux/usb.h b/include/linux/usb.h
+index 7ccaa76a9a968..987550fd46faf 100644
+--- a/include/linux/usb.h
++++ b/include/linux/usb.h
+@@ -279,6 +279,11 @@ void usb_put_intf(struct usb_interface *intf);
+ #define USB_MAXINTERFACES 32
+ #define USB_MAXIADS (USB_MAXINTERFACES/2)
+
++bool usb_check_bulk_endpoints(
++ const struct usb_interface *intf, const u8 *ep_addrs);
++bool usb_check_int_endpoints(
++ const struct usb_interface *intf, const u8 *ep_addrs);
++
+ /*
+ * USB Resume Timer: Every Host controller driver should drive the resume
+ * signalling on the bus for the amount of time defined by this macro.
+@@ -575,6 +580,7 @@ struct usb3_lpm_parameters {
+ * @devaddr: device address, XHCI: assigned by HW, others: same as devnum
+ * @can_submit: URBs may be submitted
+ * @persist_enabled: USB_PERSIST enabled for this device
++ * @reset_in_progress: the device is being reset
+ * @have_langid: whether string_langid is valid
+ * @authorized: policy has said we can use it;
+ * (user space) policy determines if we authorize this device to be
+@@ -661,6 +667,7 @@ struct usb_device {
+
+ unsigned can_submit:1;
+ unsigned persist_enabled:1;
++ unsigned reset_in_progress:1;
+ unsigned have_langid:1;
+ unsigned authorized:1;
+ unsigned authenticated:1;
+@@ -747,11 +754,14 @@ extern struct device *usb_intf_get_dma_device(struct usb_interface *intf);
+ extern int usb_acpi_set_power_state(struct usb_device *hdev, int index,
+ bool enable);
+ extern bool usb_acpi_power_manageable(struct usb_device *hdev, int index);
++extern int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index);
+ #else
+ static inline int usb_acpi_set_power_state(struct usb_device *hdev, int index,
+ bool enable) { return 0; }
+ static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
+ { return true; }
++static inline int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
++ { return 0; }
+ #endif
+
+ /* USB autosuspend and autoresume */
+diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
+index 9d27622792867..0399d1226323b 100644
+--- a/include/linux/usb/composite.h
++++ b/include/linux/usb/composite.h
+@@ -426,6 +426,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite,
+ extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
+ struct usb_ep *ep0);
+ void composite_dev_cleanup(struct usb_composite_dev *cdev);
++void check_remote_wakeup_config(struct usb_gadget *g,
++ struct usb_configuration *c);
+
+ static inline struct usb_composite_driver *to_cdriver(
+ struct usb_gadget_driver *gdrv)
+diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
+index 10fe57cf40bec..c5bc739266ed6 100644
+--- a/include/linux/usb/gadget.h
++++ b/include/linux/usb/gadget.h
+@@ -311,6 +311,7 @@ struct usb_udc;
+ struct usb_gadget_ops {
+ int (*get_frame)(struct usb_gadget *);
+ int (*wakeup)(struct usb_gadget *);
++ int (*set_remote_wakeup)(struct usb_gadget *, int set);
+ int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered);
+ int (*vbus_session) (struct usb_gadget *, int is_active);
+ int (*vbus_draw) (struct usb_gadget *, unsigned mA);
+@@ -385,6 +386,8 @@ struct usb_gadget_ops {
+ * @connected: True if gadget is connected.
+ * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag
+ * indicates that it supports LPM as per the LPM ECN & errata.
++ * @wakeup_capable: True if gadget is capable of sending remote wakeup.
++ * @wakeup_armed: True if gadget is armed by the host for remote wakeup.
+ * @irq: the interrupt number for device controller.
+ *
+ * Gadgets have a mostly-portable "gadget driver" implementing device
+@@ -445,6 +448,8 @@ struct usb_gadget {
+ unsigned deactivated:1;
+ unsigned connected:1;
+ unsigned lpm_capable:1;
++ unsigned wakeup_capable:1;
++ unsigned wakeup_armed:1;
+ int irq;
+ };
+ #define work_to_gadget(w) (container_of((w), struct usb_gadget, work))
+@@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g)
+ #if IS_ENABLED(CONFIG_USB_GADGET)
+ int usb_gadget_frame_number(struct usb_gadget *gadget);
+ int usb_gadget_wakeup(struct usb_gadget *gadget);
++int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set);
+ int usb_gadget_set_selfpowered(struct usb_gadget *gadget);
+ int usb_gadget_clear_selfpowered(struct usb_gadget *gadget);
+ int usb_gadget_vbus_connect(struct usb_gadget *gadget);
+@@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget)
+ { return 0; }
+ static inline int usb_gadget_wakeup(struct usb_gadget *gadget)
+ { return 0; }
++static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
++{ return 0; }
+ static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget)
+ { return 0; }
+ static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget)
+diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
+index 2c1fc9212cf28..e4e18a5faa9f5 100644
+--- a/include/linux/usb/hcd.h
++++ b/include/linux/usb/hcd.h
+@@ -66,6 +66,7 @@
+
+ struct giveback_urb_bh {
+ bool running;
++ bool high_prio;
+ spinlock_t lock;
+ struct list_head head;
+ struct tasklet_struct bh;
+@@ -514,6 +515,11 @@ void *hcd_buffer_alloc(struct usb_bus *bus, size_t size,
+ void hcd_buffer_free(struct usb_bus *bus, size_t size,
+ void *addr, dma_addr_t dma);
+
++void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
++ size_t size, gfp_t mem_flags, dma_addr_t *dma);
++void hcd_buffer_free_pages(struct usb_hcd *hcd,
++ size_t size, void *addr, dma_addr_t dma);
++
+ /* generic bus glue, needed for host controllers that don't use PCI */
+ extern irqreturn_t usb_hcd_irq(int irq, void *__hcd);
+
+diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
+index 031f148ab3734..b5deafd91f67b 100644
+--- a/include/linux/usb/role.h
++++ b/include/linux/usb/role.h
+@@ -91,6 +91,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node)
+
+ static inline void usb_role_switch_put(struct usb_role_switch *sw) { }
+
++static inline struct usb_role_switch *
++usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode)
++{
++ return NULL;
++}
++
+ static inline struct usb_role_switch *
+ usb_role_switch_register(struct device *parent,
+ const struct usb_role_switch_desc *desc)
+diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
+new file mode 100644
+index 0000000000000..f7c01ce879a28
+--- /dev/null
++++ b/include/linux/usb/tcpci.h
+@@ -0,0 +1,211 @@
++/* SPDX-License-Identifier: GPL-2.0+ */
++/*
++ * Copyright 2015-2017 Google, Inc
++ *
++ * USB Type-C Port Controller Interface.
++ */
++
++#ifndef __LINUX_USB_TCPCI_H
++#define __LINUX_USB_TCPCI_H
++
++#include <linux/usb/typec.h>
++#include <linux/usb/tcpm.h>
++
++#define TCPC_VENDOR_ID 0x0
++#define TCPC_PRODUCT_ID 0x2
++#define TCPC_BCD_DEV 0x4
++#define TCPC_TC_REV 0x6
++#define TCPC_PD_REV 0x8
++#define TCPC_PD_INT_REV 0xa
++
++#define TCPC_ALERT 0x10
++#define TCPC_ALERT_EXTND BIT(14)
++#define TCPC_ALERT_EXTENDED_STATUS BIT(13)
++#define TCPC_ALERT_VBUS_DISCNCT BIT(11)
++#define TCPC_ALERT_RX_BUF_OVF BIT(10)
++#define TCPC_ALERT_FAULT BIT(9)
++#define TCPC_ALERT_V_ALARM_LO BIT(8)
++#define TCPC_ALERT_V_ALARM_HI BIT(7)
++#define TCPC_ALERT_TX_SUCCESS BIT(6)
++#define TCPC_ALERT_TX_DISCARDED BIT(5)
++#define TCPC_ALERT_TX_FAILED BIT(4)
++#define TCPC_ALERT_RX_HARD_RST BIT(3)
++#define TCPC_ALERT_RX_STATUS BIT(2)
++#define TCPC_ALERT_POWER_STATUS BIT(1)
++#define TCPC_ALERT_CC_STATUS BIT(0)
++
++#define TCPC_ALERT_MASK 0x12
++#define TCPC_POWER_STATUS_MASK 0x14
++#define TCPC_FAULT_STATUS_MASK 0x15
++
++#define TCPC_EXTENDED_STATUS_MASK 0x16
++#define TCPC_EXTENDED_STATUS_MASK_VSAFE0V BIT(0)
++
++#define TCPC_ALERT_EXTENDED_MASK 0x17
++#define TCPC_SINK_FAST_ROLE_SWAP BIT(0)
++
++#define TCPC_CONFIG_STD_OUTPUT 0x18
++
++#define TCPC_TCPC_CTRL 0x19
++#define TCPC_TCPC_CTRL_ORIENTATION BIT(0)
++#define PLUG_ORNT_CC1 0
++#define PLUG_ORNT_CC2 1
++#define TCPC_TCPC_CTRL_BIST_TM BIT(1)
++#define TCPC_TCPC_CTRL_EN_LK4CONN_ALRT BIT(6)
++
++#define TCPC_EXTENDED_STATUS 0x20
++#define TCPC_EXTENDED_STATUS_VSAFE0V BIT(0)
++
++#define TCPC_ROLE_CTRL 0x1a
++#define TCPC_ROLE_CTRL_DRP BIT(6)
++#define TCPC_ROLE_CTRL_RP_VAL_SHIFT 4
++#define TCPC_ROLE_CTRL_RP_VAL_MASK 0x3
++#define TCPC_ROLE_CTRL_RP_VAL_DEF 0x0
++#define TCPC_ROLE_CTRL_RP_VAL_1_5 0x1
++#define TCPC_ROLE_CTRL_RP_VAL_3_0 0x2
++#define TCPC_ROLE_CTRL_CC2_SHIFT 2
++#define TCPC_ROLE_CTRL_CC2_MASK 0x3
++#define TCPC_ROLE_CTRL_CC1_SHIFT 0
++#define TCPC_ROLE_CTRL_CC1_MASK 0x3
++#define TCPC_ROLE_CTRL_CC_RA 0x0
++#define TCPC_ROLE_CTRL_CC_RP 0x1
++#define TCPC_ROLE_CTRL_CC_RD 0x2
++#define TCPC_ROLE_CTRL_CC_OPEN 0x3
++
++#define TCPC_FAULT_CTRL 0x1b
++
++#define TCPC_POWER_CTRL 0x1c
++#define TCPC_POWER_CTRL_VCONN_ENABLE BIT(0)
++#define TCPC_POWER_CTRL_BLEED_DISCHARGE BIT(3)
++#define TCPC_POWER_CTRL_AUTO_DISCHARGE BIT(4)
++#define TCPC_DIS_VOLT_ALRM BIT(5)
++#define TCPC_POWER_CTRL_VBUS_VOLT_MON BIT(6)
++#define TCPC_FAST_ROLE_SWAP_EN BIT(7)
++
++#define TCPC_CC_STATUS 0x1d
++#define TCPC_CC_STATUS_TOGGLING BIT(5)
++#define TCPC_CC_STATUS_TERM BIT(4)
++#define TCPC_CC_STATUS_TERM_RP 0
++#define TCPC_CC_STATUS_TERM_RD 1
++#define TCPC_CC_STATE_SRC_OPEN 0
++#define TCPC_CC_STATUS_CC2_SHIFT 2
++#define TCPC_CC_STATUS_CC2_MASK 0x3
++#define TCPC_CC_STATUS_CC1_SHIFT 0
++#define TCPC_CC_STATUS_CC1_MASK 0x3
++
++#define TCPC_POWER_STATUS 0x1e
++#define TCPC_POWER_STATUS_DBG_ACC_CON BIT(7)
++#define TCPC_POWER_STATUS_UNINIT BIT(6)
++#define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4)
++#define TCPC_POWER_STATUS_VBUS_DET BIT(3)
++#define TCPC_POWER_STATUS_VBUS_PRES BIT(2)
++#define TCPC_POWER_STATUS_VCONN_PRES BIT(1)
++#define TCPC_POWER_STATUS_SINKING_VBUS BIT(0)
++
++#define TCPC_FAULT_STATUS 0x1f
++#define TCPC_FAULT_STATUS_ALL_REG_RST_TO_DEFAULT BIT(7)
++
++#define TCPC_ALERT_EXTENDED 0x21
++
++#define TCPC_COMMAND 0x23
++#define TCPC_CMD_WAKE_I2C 0x11
++#define TCPC_CMD_DISABLE_VBUS_DETECT 0x22
++#define TCPC_CMD_ENABLE_VBUS_DETECT 0x33
++#define TCPC_CMD_DISABLE_SINK_VBUS 0x44
++#define TCPC_CMD_SINK_VBUS 0x55
++#define TCPC_CMD_DISABLE_SRC_VBUS 0x66
++#define TCPC_CMD_SRC_VBUS_DEFAULT 0x77
++#define TCPC_CMD_SRC_VBUS_HIGH 0x88
++#define TCPC_CMD_LOOK4CONNECTION 0x99
++#define TCPC_CMD_RXONEMORE 0xAA
++#define TCPC_CMD_I2C_IDLE 0xFF
++
++#define TCPC_DEV_CAP_1 0x24
++#define TCPC_DEV_CAP_2 0x26
++#define TCPC_STD_INPUT_CAP 0x28
++#define TCPC_STD_OUTPUT_CAP 0x29
++
++#define TCPC_MSG_HDR_INFO 0x2e
++#define TCPC_MSG_HDR_INFO_DATA_ROLE BIT(3)
++#define TCPC_MSG_HDR_INFO_PWR_ROLE BIT(0)
++#define TCPC_MSG_HDR_INFO_REV_SHIFT 1
++#define TCPC_MSG_HDR_INFO_REV_MASK 0x3
++
++#define TCPC_RX_DETECT 0x2f
++#define TCPC_RX_DETECT_HARD_RESET BIT(5)
++#define TCPC_RX_DETECT_SOP BIT(0)
++#define TCPC_RX_DETECT_SOP1 BIT(1)
++#define TCPC_RX_DETECT_SOP2 BIT(2)
++#define TCPC_RX_DETECT_DBG1 BIT(3)
++#define TCPC_RX_DETECT_DBG2 BIT(4)
++
++#define TCPC_RX_BYTE_CNT 0x30
++#define TCPC_RX_BUF_FRAME_TYPE 0x31
++#define TCPC_RX_BUF_FRAME_TYPE_SOP 0
++#define TCPC_RX_HDR 0x32
++#define TCPC_RX_DATA 0x34 /* through 0x4f */
++
++#define TCPC_TRANSMIT 0x50
++#define TCPC_TRANSMIT_RETRY_SHIFT 4
++#define TCPC_TRANSMIT_RETRY_MASK 0x3
++#define TCPC_TRANSMIT_TYPE_SHIFT 0
++#define TCPC_TRANSMIT_TYPE_MASK 0x7
++
++#define TCPC_TX_BYTE_CNT 0x51
++#define TCPC_TX_HDR 0x52
++#define TCPC_TX_DATA 0x54 /* through 0x6f */
++
++#define TCPC_VBUS_VOLTAGE 0x70
++#define TCPC_VBUS_VOLTAGE_MASK 0x3ff
++#define TCPC_VBUS_VOLTAGE_LSB_MV 25
++#define TCPC_VBUS_SINK_DISCONNECT_THRESH 0x72
++#define TCPC_VBUS_SINK_DISCONNECT_THRESH_LSB_MV 25
++#define TCPC_VBUS_SINK_DISCONNECT_THRESH_MAX 0x3ff
++#define TCPC_VBUS_STOP_DISCHARGE_THRESH 0x74
++#define TCPC_VBUS_VOLTAGE_ALARM_HI_CFG 0x76
++#define TCPC_VBUS_VOLTAGE_ALARM_LO_CFG 0x78
++
++/* I2C_WRITE_BYTE_COUNT + 1 when TX_BUF_BYTE_x is only accessible I2C_WRITE_BYTE_COUNT */
++#define TCPC_TRANSMIT_BUFFER_MAX_LEN 31
++
++struct tcpci;
++
++/*
++ * @TX_BUF_BYTE_x_hidden:
++ * optional; Set when TX_BUF_BYTE_x can only be accessed through I2C_WRITE_BYTE_COUNT.
++ * @frs_sourcing_vbus:
++ * Optional; Callback to perform chip specific operations when FRS
++ * is sourcing vbus.
++ * @auto_discharge_disconnect:
++ * Optional; Enables TCPC to autonously discharge vbus on disconnect.
++ * @vbus_vsafe0v:
++ * optional; Set when TCPC can detect whether vbus is at VSAFE0V.
++ * @set_partner_usb_comm_capable:
++ * Optional; The USB Communications Capable bit indicates if port
++ * partner is capable of communication over the USB data lines
++ * (e.g. D+/- or SS Tx/Rx). Called to notify the status of the bit.
++ */
++struct tcpci_data {
++ struct regmap *regmap;
++ unsigned char TX_BUF_BYTE_x_hidden:1;
++ unsigned char auto_discharge_disconnect:1;
++ unsigned char vbus_vsafe0v:1;
++
++ int (*init)(struct tcpci *tcpci, struct tcpci_data *data);
++ int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data,
++ bool enable);
++ int (*start_drp_toggling)(struct tcpci *tcpci, struct tcpci_data *data,
++ enum typec_cc_status cc);
++ int (*set_vbus)(struct tcpci *tcpci, struct tcpci_data *data, bool source, bool sink);
++ void (*frs_sourcing_vbus)(struct tcpci *tcpci, struct tcpci_data *data);
++ void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data,
++ bool capable);
++};
++
++struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data);
++void tcpci_unregister_port(struct tcpci *tcpci);
++irqreturn_t tcpci_irq(struct tcpci *tcpci);
++
++struct tcpm_port;
++struct tcpm_port *tcpci_get_tcpm_port(struct tcpci *tcpci);
++#endif /* __LINUX_USB_TCPCI_H */
+diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
+index e2e44bb1dad85..c1e5910809add 100644
+--- a/include/linux/usb/typec.h
++++ b/include/linux/usb/typec.h
+@@ -295,6 +295,9 @@ int typec_set_mode(struct typec_port *port, int mode);
+
+ void *typec_get_drvdata(struct typec_port *port);
+
++int typec_get_fw_cap(struct typec_capability *cap,
++ struct fwnode_handle *fwnode);
++
+ int typec_find_pwr_opmode(const char *name);
+ int typec_find_orientation(const char *name);
+ int typec_find_port_power_role(const char *name);
+diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h
+index 65933cbe91299..c33a8bc626d36 100644
+--- a/include/linux/usb/typec_altmode.h
++++ b/include/linux/usb/typec_altmode.h
+@@ -67,7 +67,7 @@ struct typec_altmode_ops {
+
+ int typec_altmode_enter(struct typec_altmode *altmode, u32 *vdo);
+ int typec_altmode_exit(struct typec_altmode *altmode);
+-void typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
++int typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
+ int typec_altmode_vdm(struct typec_altmode *altmode,
+ const u32 header, const u32 *vdo, int count);
+ int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf,
+diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
+index cfb916cccd316..8d09c2f0a9b80 100644
+--- a/include/linux/usb/typec_dp.h
++++ b/include/linux/usb/typec_dp.h
+@@ -73,6 +73,11 @@ enum {
+ #define DP_CAP_USB BIT(7)
+ #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8)
+ #define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16)
++/* Get pin assignment taking plug & receptacle into consideration */
++#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
++ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_))
++#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
++ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_))
+
+ /* DisplayPort Status Update VDO bits */
+ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3)
+diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
+index 72299f261b253..43db6e47503c7 100644
+--- a/include/linux/util_macros.h
++++ b/include/linux/util_macros.h
+@@ -38,4 +38,16 @@
+ */
+ #define find_closest_descending(x, a, as) __find_closest(x, a, as, >=)
+
++/**
++ * is_insidevar - check if the @ptr points inside the @var memory range.
++ * @ptr: the pointer to a memory address.
++ * @var: the variable which address and size identify the memory range.
++ *
++ * Evaluates to true if the address in @ptr lies within the memory
++ * range allocated to @var.
++ */
++#define is_insidevar(ptr, var) \
++ ((uintptr_t)(ptr) >= (uintptr_t)(var) && \
++ (uintptr_t)(ptr) < (uintptr_t)(var) + sizeof(var))
++
+ #endif
+diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
+index ef9a44b6cf5d5..6e5db4edc3359 100644
+--- a/include/linux/vfio_pci_core.h
++++ b/include/linux/vfio_pci_core.h
+@@ -133,6 +133,8 @@ struct vfio_pci_core_device {
+ struct mutex ioeventfds_lock;
+ struct list_head ioeventfds_list;
+ struct vfio_pci_vf_token *vf_token;
++ struct list_head sriov_pfs_item;
++ struct vfio_pci_core_device *sriov_pf_core_dev;
+ struct notifier_block nb;
+ struct mutex vma_lock;
+ struct list_head vma_list;
+@@ -159,8 +161,17 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev,
+ extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
+
++#ifdef CONFIG_VFIO_PCI_VGA
+ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
++#else
++static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev,
++ char __user *buf, size_t count,
++ loff_t *ppos, bool iswrite)
++{
++ return -EINVAL;
++}
++#endif
+
+ extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
+ uint64_t data, int count, int fd);
+diff --git a/include/linux/virtio.h b/include/linux/virtio.h
+index 41edbc01ffa40..1af8d65d4c8f7 100644
+--- a/include/linux/virtio.h
++++ b/include/linux/virtio.h
+@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev);
+ void virtio_break_device(struct virtio_device *dev);
+
+ void virtio_config_changed(struct virtio_device *dev);
+-int virtio_finalize_features(struct virtio_device *dev);
+ #ifdef CONFIG_PM_SLEEP
+ int virtio_device_freeze(struct virtio_device *dev);
+ int virtio_device_restore(struct virtio_device *dev);
+diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
+index 8519b3ae5d52e..b341dd62aa4da 100644
+--- a/include/linux/virtio_config.h
++++ b/include/linux/virtio_config.h
+@@ -62,8 +62,9 @@ struct virtio_shm_region {
+ * Returns the first 64 feature bits (all we currently need).
+ * @finalize_features: confirm what device features we'll be using.
+ * vdev: the virtio_device
+- * This gives the final feature bits for the device: it can change
++ * This sends the driver feature bits to the device: it can change
+ * the dev->feature bits if it wants.
++ * Note: despite the name this can be called any number of times.
+ * Returns 0 on success or error status
+ * @bus_name: return the bus name associated with the device (optional)
+ * vdev: the virtio_device
+diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
+index b465f8f3e554f..6047058d67037 100644
+--- a/include/linux/virtio_net.h
++++ b/include/linux/virtio_net.h
+@@ -7,9 +7,27 @@
+ #include <uapi/linux/udp.h>
+ #include <uapi/linux/virtio_net.h>
+
++static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
++{
++ switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
++ case VIRTIO_NET_HDR_GSO_TCPV4:
++ return protocol == cpu_to_be16(ETH_P_IP);
++ case VIRTIO_NET_HDR_GSO_TCPV6:
++ return protocol == cpu_to_be16(ETH_P_IPV6);
++ case VIRTIO_NET_HDR_GSO_UDP:
++ return protocol == cpu_to_be16(ETH_P_IP) ||
++ protocol == cpu_to_be16(ETH_P_IPV6);
++ default:
++ return false;
++ }
++}
++
+ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
+ const struct virtio_net_hdr *hdr)
+ {
++ if (skb->protocol)
++ return 0;
++
+ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ case VIRTIO_NET_HDR_GSO_UDP:
+@@ -88,9 +106,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
+ if (!skb->protocol) {
+ __be16 protocol = dev_parse_header_protocol(skb);
+
+- virtio_net_hdr_set_proto(skb, hdr);
+- if (protocol && protocol != skb->protocol)
++ if (!protocol)
++ virtio_net_hdr_set_proto(skb, hdr);
++ else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type))
+ return -EINVAL;
++ else
++ skb->protocol = protocol;
+ }
+ retry:
+ if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+@@ -120,10 +141,19 @@ retry:
+
+ if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
++ unsigned int nh_off = p_off;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
++ /* UFO may not include transport header in gso_size. */
++ if (gso_type & SKB_GSO_UDP)
++ nh_off -= thlen;
++
++ /* Kernel has a special handling for GSO_BY_FRAGS. */
++ if (gso_size == GSO_BY_FRAGS)
++ return -EINVAL;
++
+ /* Too small packets are not really GSO ones. */
+- if (skb->len - p_off > gso_size) {
++ if (skb->len - nh_off > gso_size) {
+ shinfo->gso_size = gso_size;
+ shinfo->gso_type = gso_type;
+
+diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
+index 671d402c3778f..5535be1012a28 100644
+--- a/include/linux/vmalloc.h
++++ b/include/linux/vmalloc.h
+@@ -28,6 +28,13 @@ struct notifier_block; /* in notifier.h */
+ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
+ #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */
+
++#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
++ !defined(CONFIG_KASAN_VMALLOC)
++#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */
++#else
++#define VM_DEFER_KMEMLEAK 0
++#endif
++
+ /*
+ * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC.
+ *
+@@ -152,6 +159,11 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
+ int node, const void *caller);
+ void *vmalloc_no_huge(unsigned long size);
+
++extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
++extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2);
++extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
++extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2);
++
+ extern void vfree(const void *addr);
+ extern void vfree_atomic(const void *addr);
+
+diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h
+index 848db1b1569ff..919d999a8c1db 100644
+--- a/include/linux/vt_buffer.h
++++ b/include/linux/vt_buffer.h
+@@ -16,7 +16,7 @@
+
+ #include <linux/string.h>
+
+-#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_MDA_CONSOLE)
++#if IS_ENABLED(CONFIG_VGA_CONSOLE) || IS_ENABLED(CONFIG_MDA_CONSOLE)
+ #include <asm/vga.h>
+ #endif
+
+diff --git a/include/linux/wait.h b/include/linux/wait.h
+index 93dab0e9580f8..21044562aab74 100644
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void
+ void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
+ void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
++void __wake_up_pollfree(struct wait_queue_head *wq_head);
+
+ #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
+ #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
+@@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
+ #define wake_up_interruptible_sync_poll_locked(x, m) \
+ __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
+
++/**
++ * wake_up_pollfree - signal that a polled waitqueue is going away
++ * @wq_head: the wait queue head
++ *
++ * In the very rare cases where a ->poll() implementation uses a waitqueue whose
++ * lifetime is tied to a task rather than to the 'struct file' being polled,
++ * this function must be called before the waitqueue is freed so that
++ * non-blocking polls (e.g. epoll) are notified that the queue is going away.
++ *
++ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via
++ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU.
++ */
++static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
++{
++ /*
++ * For performance reasons, we don't always take the queue lock here.
++ * Therefore, we might race with someone removing the last entry from
++ * the queue, and proceed while they still hold the queue lock.
++ * However, rcu_read_lock() is required to be held in such cases, so we
++ * can safely proceed with an RCU-delayed free.
++ */
++ if (waitqueue_active(wq_head))
++ __wake_up_pollfree(wq_head);
++}
++
+ #define ___wait_cond_timeout(condition) \
+ ({ \
+ bool __cond = (condition); \
+@@ -518,10 +544,11 @@ do { \
+ \
+ hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \
+ HRTIMER_MODE_REL); \
+- if ((timeout) != KTIME_MAX) \
+- hrtimer_start_range_ns(&__t.timer, timeout, \
+- current->timer_slack_ns, \
+- HRTIMER_MODE_REL); \
++ if ((timeout) != KTIME_MAX) { \
++ hrtimer_set_expires_range_ns(&__t.timer, timeout, \
++ current->timer_slack_ns); \
++ hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL); \
++ } \
+ \
+ __ret = ___wait_event(wq_head, condition, state, 0, 0, \
+ if (!__t.task) { \
+diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
+index c994d1b2cdbaa..3b9a40ae8bdba 100644
+--- a/include/linux/watch_queue.h
++++ b/include/linux/watch_queue.h
+@@ -28,7 +28,8 @@ struct watch_type_filter {
+ struct watch_filter {
+ union {
+ struct rcu_head rcu;
+- unsigned long type_filter[2]; /* Bitmask of accepted types */
++ /* Bitmask of accepted types */
++ DECLARE_BITMAP(type_filter, WATCH_TYPE__NR);
+ };
+ u32 nr_filters; /* Number of filters */
+ struct watch_type_filter filters[];
+diff --git a/include/linux/wireless.h b/include/linux/wireless.h
+index 2d1b54556eff4..e6e34d74dda04 100644
+--- a/include/linux/wireless.h
++++ b/include/linux/wireless.h
+@@ -26,7 +26,15 @@ struct compat_iw_point {
+ struct __compat_iw_event {
+ __u16 len; /* Real length of this stuff */
+ __u16 cmd; /* Wireless IOCTL */
+- compat_caddr_t pointer;
++
++ union {
++ compat_caddr_t pointer;
++
++ /* we need ptr_bytes to make memcpy() run-time destination
++ * buffer bounds checking happy, nothing special
++ */
++ DECLARE_FLEX_ARRAY(__u8, ptr_bytes);
++ };
+ };
+ #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer)
+ #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length)
+diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
+index 74d3c1efd9bb5..20a47eb94b0f3 100644
+--- a/include/linux/workqueue.h
++++ b/include/linux/workqueue.h
+@@ -68,7 +68,6 @@ enum {
+ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT,
+
+ __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE,
+- WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING),
+
+ /*
+ * When a work item is off queue, its high bits point to the last
+@@ -79,12 +78,6 @@ enum {
+ WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+ WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+ WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+- WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1,
+-
+- /* convenience constants */
+- WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
+- WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
+- WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
+
+ /* bit mask for work_busy() return values */
+ WORK_BUSY_PENDING = 1 << 0,
+@@ -94,6 +87,14 @@ enum {
+ WORKER_DESC_LEN = 24,
+ };
+
++/* Convenience constants - of type 'unsigned long', not 'enum'! */
++#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING)
++#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1)
++#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
++
++#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1)
++#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
++
+ struct work_struct {
+ atomic_long_t data;
+ struct list_head entry;
+@@ -469,7 +470,8 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
+ extern unsigned int work_busy(struct work_struct *work);
+ extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
+ extern void print_worker_info(const char *log_lvl, struct task_struct *task);
+-extern void show_workqueue_state(void);
++extern void show_all_workqueues(void);
++extern void show_one_workqueue(struct workqueue_struct *wq);
+ extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
+
+ /**
+diff --git a/include/media/cec.h b/include/media/cec.h
+index 208c9613c07eb..77346f757036d 100644
+--- a/include/media/cec.h
++++ b/include/media/cec.h
+@@ -26,13 +26,17 @@
+ * @dev: cec device
+ * @cdev: cec character device
+ * @minor: device node minor number
++ * @lock: lock to serialize open/release and registration
+ * @registered: the device was correctly registered
+ * @unregistered: the device was unregistered
++ * @lock_fhs: lock to control access to @fhs
+ * @fhs: the list of open filehandles (cec_fh)
+- * @lock: lock to control access to this structure
+ *
+ * This structure represents a cec-related device node.
+ *
++ * To add or remove filehandles from @fhs the @lock must be taken first,
++ * followed by @lock_fhs. It is safe to access @fhs if either lock is held.
++ *
+ * The @parent is a physical device. It must be set by core or device drivers
+ * before registering the node.
+ */
+@@ -43,10 +47,13 @@ struct cec_devnode {
+
+ /* device info */
+ int minor;
++ /* serialize open/release and registration */
++ struct mutex lock;
+ bool registered;
+ bool unregistered;
++ /* protect access to fhs */
++ struct mutex lock_fhs;
+ struct list_head fhs;
+- struct mutex lock;
+ };
+
+ struct cec_adapter;
+diff --git a/include/media/dvb_net.h b/include/media/dvb_net.h
+index 5e31d37f25fac..cc01dffcc9f35 100644
+--- a/include/media/dvb_net.h
++++ b/include/media/dvb_net.h
+@@ -41,6 +41,9 @@
+ * @exit: flag to indicate when the device is being removed.
+ * @demux: pointer to &struct dmx_demux.
+ * @ioctl_mutex: protect access to this struct.
++ * @remove_mutex: mutex that avoids a race condition between a callback
++ * called when the hardware is disconnected and the
++ * file_operations of dvb_net.
+ *
+ * Currently, the core supports up to %DVB_NET_DEVICES_MAX (10) network
+ * devices.
+@@ -53,6 +56,7 @@ struct dvb_net {
+ unsigned int exit:1;
+ struct dmx_demux *demux;
+ struct mutex ioctl_mutex;
++ struct mutex remove_mutex;
+ };
+
+ /**
+diff --git a/include/media/dvbdev.h b/include/media/dvbdev.h
+index 2f6b0861322ae..34b01ebf32828 100644
+--- a/include/media/dvbdev.h
++++ b/include/media/dvbdev.h
+@@ -126,6 +126,7 @@ struct dvb_adapter {
+ * struct dvb_device - represents a DVB device node
+ *
+ * @list_head: List head with all DVB devices
++ * @ref: reference counter
+ * @fops: pointer to struct file_operations
+ * @adapter: pointer to the adapter that holds this device node
+ * @type: type of the device, as defined by &enum dvb_device_type.
+@@ -156,6 +157,7 @@ struct dvb_adapter {
+ */
+ struct dvb_device {
+ struct list_head list_head;
++ struct kref ref;
+ const struct file_operations *fops;
+ struct dvb_adapter *adapter;
+ enum dvb_device_type type;
+@@ -187,6 +189,35 @@ struct dvb_device {
+ void *priv;
+ };
+
++/**
++ * struct dvbdevfops_node - fops nodes registered in dvbdevfops_list
++ *
++ * @fops: Dynamically allocated fops for ->owner registration
++ * @type: type of dvb_device
++ * @template: dvb_device used for registration
++ * @list_head: list_head for dvbdevfops_list
++ */
++struct dvbdevfops_node {
++ struct file_operations *fops;
++ enum dvb_device_type type;
++ const struct dvb_device *template;
++ struct list_head list_head;
++};
++
++/**
++ * dvb_device_get - Increase dvb_device reference
++ *
++ * @dvbdev: pointer to struct dvb_device
++ */
++struct dvb_device *dvb_device_get(struct dvb_device *dvbdev);
++
++/**
++ * dvb_device_put - Decrease dvb_device reference
++ *
++ * @dvbdev: pointer to struct dvb_device
++ */
++void dvb_device_put(struct dvb_device *dvbdev);
++
+ /**
+ * dvb_register_adapter - Registers a new DVB adapter
+ *
+@@ -231,29 +262,17 @@ int dvb_register_device(struct dvb_adapter *adap,
+ /**
+ * dvb_remove_device - Remove a registered DVB device
+ *
+- * This does not free memory. To do that, call dvb_free_device().
++ * This does not free memory. dvb_free_device() will do that when
++ * reference counter is empty
+ *
+ * @dvbdev: pointer to struct dvb_device
+ */
+ void dvb_remove_device(struct dvb_device *dvbdev);
+
+-/**
+- * dvb_free_device - Free memory occupied by a DVB device.
+- *
+- * Call dvb_unregister_device() before calling this function.
+- *
+- * @dvbdev: pointer to struct dvb_device
+- */
+-void dvb_free_device(struct dvb_device *dvbdev);
+
+ /**
+ * dvb_unregister_device - Unregisters a DVB device
+ *
+- * This is a combination of dvb_remove_device() and dvb_free_device().
+- * Using this function is usually a mistake, and is often an indicator
+- * for a use-after-free bug (when a userspace process keeps a file
+- * handle to a detached device).
+- *
+ * @dvbdev: pointer to struct dvb_device
+ */
+ void dvb_unregister_device(struct dvb_device *dvbdev);
+diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
+index 3eb202259e8cc..5e25a098e8ce4 100644
+--- a/include/media/v4l2-common.h
++++ b/include/media/v4l2-common.h
+@@ -175,7 +175,8 @@ struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
+ *
+ * @sd: pointer to &struct v4l2_subdev
+ * @client: pointer to struct i2c_client
+- * @devname: the name of the device; if NULL, the I²C device's name will be used
++ * @devname: the name of the device; if NULL, the I²C device drivers's name
++ * will be used
+ * @postfix: sub-device specific string to put right after the I²C device name;
+ * may be NULL
+ */
+diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
+index 5a91b548ecc0c..8d52c4506762d 100644
+--- a/include/media/v4l2-mem2mem.h
++++ b/include/media/v4l2-mem2mem.h
+@@ -588,7 +588,14 @@ void v4l2_m2m_buf_queue(struct v4l2_m2m_ctx *m2m_ctx,
+ static inline
+ unsigned int v4l2_m2m_num_src_bufs_ready(struct v4l2_m2m_ctx *m2m_ctx)
+ {
+- return m2m_ctx->out_q_ctx.num_rdy;
++ unsigned int num_buf_rdy;
++ unsigned long flags;
++
++ spin_lock_irqsave(&m2m_ctx->out_q_ctx.rdy_spinlock, flags);
++ num_buf_rdy = m2m_ctx->out_q_ctx.num_rdy;
++ spin_unlock_irqrestore(&m2m_ctx->out_q_ctx.rdy_spinlock, flags);
++
++ return num_buf_rdy;
+ }
+
+ /**
+@@ -600,7 +607,14 @@ unsigned int v4l2_m2m_num_src_bufs_ready(struct v4l2_m2m_ctx *m2m_ctx)
+ static inline
+ unsigned int v4l2_m2m_num_dst_bufs_ready(struct v4l2_m2m_ctx *m2m_ctx)
+ {
+- return m2m_ctx->cap_q_ctx.num_rdy;
++ unsigned int num_buf_rdy;
++ unsigned long flags;
++
++ spin_lock_irqsave(&m2m_ctx->cap_q_ctx.rdy_spinlock, flags);
++ num_buf_rdy = m2m_ctx->cap_q_ctx.num_rdy;
++ spin_unlock_irqrestore(&m2m_ctx->cap_q_ctx.rdy_spinlock, flags);
++
++ return num_buf_rdy;
+ }
+
+ /**
+diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
+index 95ec18c2f49ce..9a476f902c425 100644
+--- a/include/media/v4l2-subdev.h
++++ b/include/media/v4l2-subdev.h
+@@ -995,6 +995,8 @@ v4l2_subdev_get_try_format(struct v4l2_subdev *sd,
+ struct v4l2_subdev_state *state,
+ unsigned int pad)
+ {
++ if (WARN_ON(!state))
++ return NULL;
+ if (WARN_ON(pad >= sd->entity.num_pads))
+ pad = 0;
+ return &state->pads[pad].try_fmt;
+@@ -1013,6 +1015,8 @@ v4l2_subdev_get_try_crop(struct v4l2_subdev *sd,
+ struct v4l2_subdev_state *state,
+ unsigned int pad)
+ {
++ if (WARN_ON(!state))
++ return NULL;
+ if (WARN_ON(pad >= sd->entity.num_pads))
+ pad = 0;
+ return &state->pads[pad].try_crop;
+@@ -1031,6 +1035,8 @@ v4l2_subdev_get_try_compose(struct v4l2_subdev *sd,
+ struct v4l2_subdev_state *state,
+ unsigned int pad)
+ {
++ if (WARN_ON(!state))
++ return NULL;
+ if (WARN_ON(pad >= sd->entity.num_pads))
+ pad = 0;
+ return &state->pads[pad].try_compose;
+diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
+index 12955cb460d23..3b5986cee0739 100644
+--- a/include/media/videobuf2-core.h
++++ b/include/media/videobuf2-core.h
+@@ -46,6 +46,7 @@ enum vb2_memory {
+
+ struct vb2_fileio_data;
+ struct vb2_threadio_data;
++struct vb2_buffer;
+
+ /**
+ * struct vb2_mem_ops - memory handling/memory allocator operations.
+@@ -53,10 +54,8 @@ struct vb2_threadio_data;
+ * return ERR_PTR() on failure or a pointer to allocator private,
+ * per-buffer data on success; the returned private structure
+ * will then be passed as @buf_priv argument to other ops in this
+- * structure. Additional gfp_flags to use when allocating the
+- * are also passed to this operation. These flags are from the
+- * gfp_flags field of vb2_queue. The size argument to this function
+- * shall be *page aligned*.
++ * structure. The size argument to this function shall be
++ * *page aligned*.
+ * @put: inform the allocator that the buffer will no longer be used;
+ * usually will result in the allocator freeing the buffer (if
+ * no other users of this buffer are present); the @buf_priv
+@@ -117,31 +116,33 @@ struct vb2_threadio_data;
+ * map_dmabuf, unmap_dmabuf.
+ */
+ struct vb2_mem_ops {
+- void *(*alloc)(struct device *dev, unsigned long attrs,
+- unsigned long size,
+- enum dma_data_direction dma_dir,
+- gfp_t gfp_flags);
++ void *(*alloc)(struct vb2_buffer *vb,
++ struct device *dev,
++ unsigned long size);
+ void (*put)(void *buf_priv);
+- struct dma_buf *(*get_dmabuf)(void *buf_priv, unsigned long flags);
+-
+- void *(*get_userptr)(struct device *dev, unsigned long vaddr,
+- unsigned long size,
+- enum dma_data_direction dma_dir);
++ struct dma_buf *(*get_dmabuf)(struct vb2_buffer *vb,
++ void *buf_priv,
++ unsigned long flags);
++
++ void *(*get_userptr)(struct vb2_buffer *vb,
++ struct device *dev,
++ unsigned long vaddr,
++ unsigned long size);
+ void (*put_userptr)(void *buf_priv);
+
+ void (*prepare)(void *buf_priv);
+ void (*finish)(void *buf_priv);
+
+- void *(*attach_dmabuf)(struct device *dev,
++ void *(*attach_dmabuf)(struct vb2_buffer *vb,
++ struct device *dev,
+ struct dma_buf *dbuf,
+- unsigned long size,
+- enum dma_data_direction dma_dir);
++ unsigned long size);
+ void (*detach_dmabuf)(void *buf_priv);
+ int (*map_dmabuf)(void *buf_priv);
+ void (*unmap_dmabuf)(void *buf_priv);
+
+- void *(*vaddr)(void *buf_priv);
+- void *(*cookie)(void *buf_priv);
++ void *(*vaddr)(struct vb2_buffer *vb, void *buf_priv);
++ void *(*cookie)(struct vb2_buffer *vb, void *buf_priv);
+
+ unsigned int (*num_users)(void *buf_priv);
+
+diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h
+index e3e770f76f349..15dd0076c2936 100644
+--- a/include/memory/renesas-rpc-if.h
++++ b/include/memory/renesas-rpc-if.h
+@@ -59,12 +59,14 @@ struct rpcif_op {
+
+ struct rpcif {
+ struct device *dev;
++ void __iomem *base;
+ void __iomem *dirmap;
+ struct regmap *regmap;
+ struct reset_control *rstc;
+ size_t size;
+ enum rpcif_data_dir dir;
+ u8 bus_size;
++ u8 xfer_size;
+ void *buffer;
+ u32 xferlen;
+ u32 smcr;
+diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
+index 03614de869425..6d0615140dbcf 100644
+--- a/include/net/9p/9p.h
++++ b/include/net/9p/9p.h
+@@ -32,13 +32,13 @@
+ */
+
+ enum p9_debug_flags {
+- P9_DEBUG_ERROR = (1<<0),
+- P9_DEBUG_9P = (1<<2),
++ P9_DEBUG_ERROR = (1<<0),
++ P9_DEBUG_9P = (1<<2),
+ P9_DEBUG_VFS = (1<<3),
+ P9_DEBUG_CONV = (1<<4),
+ P9_DEBUG_MUX = (1<<5),
+ P9_DEBUG_TRANS = (1<<6),
+- P9_DEBUG_SLABS = (1<<7),
++ P9_DEBUG_SLABS = (1<<7),
+ P9_DEBUG_FCALL = (1<<8),
+ P9_DEBUG_FID = (1<<9),
+ P9_DEBUG_PKT = (1<<10),
+@@ -317,8 +317,8 @@ enum p9_qid_t {
+ };
+
+ /* 9P Magic Numbers */
+-#define P9_NOTAG (u16)(~0)
+-#define P9_NOFID (u32)(~0)
++#define P9_NOTAG ((u16)(~0))
++#define P9_NOFID ((u32)(~0))
+ #define P9_MAXWELEM 16
+
+ /* Minimal header size: size[4] type[1] tag[2] */
+diff --git a/include/net/9p/client.h b/include/net/9p/client.h
+index e1c308d8d288e..7060de84c5593 100644
+--- a/include/net/9p/client.h
++++ b/include/net/9p/client.h
+@@ -23,7 +23,7 @@
+ * @p9_proto_2000L: 9P2000.L extension
+ */
+
+-enum p9_proto_versions{
++enum p9_proto_versions {
+ p9_proto_legacy,
+ p9_proto_2000u,
+ p9_proto_2000L,
+@@ -78,7 +78,7 @@ enum p9_req_status_t {
+ struct p9_req_t {
+ int status;
+ int t_err;
+- struct kref refcount;
++ refcount_t refcount;
+ wait_queue_head_t wq;
+ struct p9_fcall tc;
+ struct p9_fcall rc;
+@@ -219,36 +219,40 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
+ u64 request_mask);
+
+ int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode,
+- dev_t rdev, kgid_t gid, struct p9_qid *);
++ dev_t rdev, kgid_t gid, struct p9_qid *qid);
+ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
+- kgid_t gid, struct p9_qid *);
++ kgid_t gid, struct p9_qid *qid);
+ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
+ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
+ void p9_fcall_fini(struct p9_fcall *fc);
+-struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
++struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag);
+
+ static inline void p9_req_get(struct p9_req_t *r)
+ {
+- kref_get(&r->refcount);
++ refcount_inc(&r->refcount);
+ }
+
+ static inline int p9_req_try_get(struct p9_req_t *r)
+ {
+- return kref_get_unless_zero(&r->refcount);
++ return refcount_inc_not_zero(&r->refcount);
+ }
+
+-int p9_req_put(struct p9_req_t *r);
++int p9_req_put(struct p9_client *c, struct p9_req_t *r);
+
+ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
+
+-int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
+-int p9stat_read(struct p9_client *, char *, int, struct p9_wstat *);
+-void p9stat_free(struct p9_wstat *);
++int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type,
++ int16_t *tag, int rewind);
++int p9stat_read(struct p9_client *clnt, char *buf, int len,
++ struct p9_wstat *st);
++void p9stat_free(struct p9_wstat *stbuf);
+
+ int p9_is_proto_dotu(struct p9_client *clnt);
+ int p9_is_proto_dotl(struct p9_client *clnt);
+-struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *);
+-int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int);
++struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
++ const char *attr_name, u64 *attr_size);
++int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
++ u64 attr_size, int flags);
+ int p9_client_readlink(struct p9_fid *fid, char **target);
+
+ int p9_client_init(void);
+diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
+index 3eb4261b29588..7215976116257 100644
+--- a/include/net/9p/transport.h
++++ b/include/net/9p/transport.h
+@@ -40,14 +40,16 @@ struct p9_trans_module {
+ int maxsize; /* max message size of transport */
+ int def; /* this transport should be default */
+ struct module *owner;
+- int (*create)(struct p9_client *, const char *, char *);
+- void (*close) (struct p9_client *);
+- int (*request) (struct p9_client *, struct p9_req_t *req);
+- int (*cancel) (struct p9_client *, struct p9_req_t *req);
+- int (*cancelled)(struct p9_client *, struct p9_req_t *req);
+- int (*zc_request)(struct p9_client *, struct p9_req_t *,
+- struct iov_iter *, struct iov_iter *, int , int, int);
+- int (*show_options)(struct seq_file *, struct p9_client *);
++ int (*create)(struct p9_client *client,
++ const char *devname, char *args);
++ void (*close)(struct p9_client *client);
++ int (*request)(struct p9_client *client, struct p9_req_t *req);
++ int (*cancel)(struct p9_client *client, struct p9_req_t *req);
++ int (*cancelled)(struct p9_client *client, struct p9_req_t *req);
++ int (*zc_request)(struct p9_client *client, struct p9_req_t *req,
++ struct iov_iter *uidata, struct iov_iter *uodata,
++ int inlen, int outlen, int in_hdr_len);
++ int (*show_options)(struct seq_file *m, struct p9_client *client);
+ };
+
+ void v9fs_register_trans(struct p9_trans_module *m);
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h
+index 78ea3e332688f..53627afab1044 100644
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -6,6 +6,8 @@
+ #define RTR_SOLICITATION_INTERVAL (4*HZ)
+ #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */
+
++#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
++
+ #define TEMP_VALID_LIFETIME (7*86400)
+ #define TEMP_PREFERRED_LIFETIME (86400)
+ #define REGEN_MAX_RETRY (3)
+@@ -107,8 +109,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
+ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
+ const struct in6_addr *daddr, unsigned int srcprefs,
+ struct in6_addr *saddr);
+-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+- u32 banned_flags);
+ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+ u32 banned_flags);
+ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+@@ -403,6 +403,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
+ {
+ const struct inet6_dev *idev = __in6_dev_get(dev);
+
++ if (unlikely(!idev))
++ return true;
++
+ return !!idev->cnf.ignore_routes_with_linkdown;
+ }
+
+diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
+index ab207677e0a8b..f742e50207fbd 100644
+--- a/include/net/af_vsock.h
++++ b/include/net/af_vsock.h
+@@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
+ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+ struct sockaddr_vm *dst);
+ void vsock_remove_sock(struct vsock_sock *vsk);
+-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
++void vsock_for_each_connected_socket(struct vsock_transport *transport,
++ void (*fn)(struct sock *sk));
+ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
+ bool vsock_find_cid(unsigned int cid);
+
+diff --git a/include/net/arp.h b/include/net/arp.h
+index 4950191f6b2bf..4a23a97195f33 100644
+--- a/include/net/arp.h
++++ b/include/net/arp.h
+@@ -71,6 +71,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
+ const unsigned char *src_hw, const unsigned char *th);
+ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir);
+ void arp_ifdown(struct net_device *dev);
++int arp_invalidate(struct net_device *dev, __be32 ip, bool force);
+
+ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
+ struct net_device *dev, __be32 src_ip,
+diff --git a/include/net/ax25.h b/include/net/ax25.h
+index 8b7eb46ad72d8..aadff553e4b73 100644
+--- a/include/net/ax25.h
++++ b/include/net/ax25.h
+@@ -236,6 +236,7 @@ typedef struct ax25_dev {
+ #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
+ ax25_dama_info dama;
+ #endif
++ refcount_t refcount;
+ } ax25_dev;
+
+ typedef struct ax25_cb {
+@@ -290,6 +291,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25)
+ }
+ }
+
++static inline void ax25_dev_hold(ax25_dev *ax25_dev)
++{
++ refcount_inc(&ax25_dev->refcount);
++}
++
++static inline void ax25_dev_put(ax25_dev *ax25_dev)
++{
++ if (refcount_dec_and_test(&ax25_dev->refcount)) {
++ kfree(ax25_dev);
++ }
++}
+ static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
+ {
+ skb->dev = dev;
+diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
+index 9125effbf4483..355835639ae58 100644
+--- a/include/net/bluetooth/bluetooth.h
++++ b/include/net/bluetooth/bluetooth.h
+@@ -180,19 +180,21 @@ void bt_err_ratelimited(const char *fmt, ...);
+ #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__)
+ #endif
+
++#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null")
++
+ #define bt_dev_info(hdev, fmt, ...) \
+- BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
++ BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
+ #define bt_dev_warn(hdev, fmt, ...) \
+- BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
++ BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
+ #define bt_dev_err(hdev, fmt, ...) \
+- BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
++ BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
+ #define bt_dev_dbg(hdev, fmt, ...) \
+- BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
++ BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
+
+ #define bt_dev_warn_ratelimited(hdev, fmt, ...) \
+- bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
++ bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
+ #define bt_dev_err_ratelimited(hdev, fmt, ...) \
+- bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
++ bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
+
+ /* Connection and socket states */
+ enum {
+@@ -420,6 +422,71 @@ out:
+ return NULL;
+ }
+
++/* Shall not be called with lock_sock held */
++static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
++ struct msghdr *msg,
++ size_t len, size_t mtu,
++ size_t headroom, size_t tailroom)
++{
++ struct sk_buff *skb;
++ size_t size = min_t(size_t, len, mtu);
++ int err;
++
++ skb = bt_skb_send_alloc(sk, size + headroom + tailroom,
++ msg->msg_flags & MSG_DONTWAIT, &err);
++ if (!skb)
++ return ERR_PTR(err);
++
++ skb_reserve(skb, headroom);
++ skb_tailroom_reserve(skb, mtu, tailroom);
++
++ if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) {
++ kfree_skb(skb);
++ return ERR_PTR(-EFAULT);
++ }
++
++ skb->priority = sk->sk_priority;
++
++ return skb;
++}
++
++/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments
++ * accourding to the MTU.
++ */
++static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
++ struct msghdr *msg,
++ size_t len, size_t mtu,
++ size_t headroom, size_t tailroom)
++{
++ struct sk_buff *skb, **frag;
++
++ skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
++ if (IS_ERR_OR_NULL(skb))
++ return skb;
++
++ len -= skb->len;
++ if (!len)
++ return skb;
++
++ /* Add remaining data over MTU as continuation fragments */
++ frag = &skb_shinfo(skb)->frag_list;
++ while (len) {
++ struct sk_buff *tmp;
++
++ tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
++ if (IS_ERR(tmp)) {
++ return skb;
++ }
++
++ len -= tmp->len;
++
++ *frag = tmp;
++ frag = &(*frag)->next;
++ }
++
++ return skb;
++}
++
+ int bt_to_errno(u16 code);
+
+ void hci_sock_set_flag(struct sock *sk, int nr);
+diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
+index b80415011dcd5..9ce46cb8564d6 100644
+--- a/include/net/bluetooth/hci.h
++++ b/include/net/bluetooth/hci.h
+@@ -246,6 +246,15 @@ enum {
+ * HCI after resume.
+ */
+ HCI_QUIRK_NO_SUSPEND_NOTIFIER,
++
++ /*
++ * When this quirk is set, LE tx power is not queried on startup
++ * and the min/max tx power values default to HCI_TX_POWER_INVALID.
++ *
++ * This quirk can be set before hci_register_dev is called or
++ * during the hdev->setup vendor callback.
++ */
++ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER,
+ };
+
+ /* HCI device flags */
+diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
+index a7360c8c72f82..3da5cfcf84c1d 100644
+--- a/include/net/bluetooth/hci_core.h
++++ b/include/net/bluetooth/hci_core.h
+@@ -35,6 +35,9 @@
+ /* HCI priority */
+ #define HCI_PRIO_MAX 7
+
++/* HCI maximum id value */
++#define HCI_MAX_ID 10000
++
+ /* HCI Core structures */
+ struct inquiry_data {
+ bdaddr_t bdaddr;
+diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
+index 3c4f550e5a8b7..2f766e3437ce2 100644
+--- a/include/net/bluetooth/l2cap.h
++++ b/include/net/bluetooth/l2cap.h
+@@ -847,6 +847,7 @@ enum {
+ };
+
+ void l2cap_chan_hold(struct l2cap_chan *c);
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
+ void l2cap_chan_put(struct l2cap_chan *c);
+
+ static inline void l2cap_chan_lock(struct l2cap_chan *chan)
+diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
+index 38785d48baff9..f2273bd5a4c58 100644
+--- a/include/net/bond_3ad.h
++++ b/include/net/bond_3ad.h
+@@ -15,8 +15,6 @@
+ #define PKT_TYPE_LACPDU cpu_to_be16(ETH_P_SLOW)
+ #define AD_TIMER_INTERVAL 100 /*msec*/
+
+-#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
+-
+ #define AD_LACP_SLOW 0
+ #define AD_LACP_FAST 1
+
+@@ -262,7 +260,7 @@ struct ad_system {
+ struct ad_bond_info {
+ struct ad_system system; /* 802.3ad system structure */
+ struct bond_3ad_stats stats;
+- u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */
++ atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */
+ u16 aggregator_identifier;
+ };
+
+diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
+index f6af76c87a6c3..9dc082b2d5430 100644
+--- a/include/net/bond_alb.h
++++ b/include/net/bond_alb.h
+@@ -126,7 +126,7 @@ struct tlb_slave_info {
+ struct alb_bond_info {
+ struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
+ u32 unbalanced_load;
+- int tx_rebalance_counter;
++ atomic_t tx_rebalance_counter;
+ int lp_counter;
+ /* -------- rlb parameters -------- */
+ int rlb_enabled;
+@@ -156,8 +156,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
+ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
+ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);
+ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
+-int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
+-int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
++netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
++netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
+ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
+ struct sk_buff *skb);
+ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
+diff --git a/include/net/bond_options.h b/include/net/bond_options.h
+index e64833a674eb8..dd75c071f67e2 100644
+--- a/include/net/bond_options.h
++++ b/include/net/bond_options.h
+@@ -65,6 +65,7 @@ enum {
+ BOND_OPT_NUM_PEER_NOTIF_ALIAS,
+ BOND_OPT_PEER_NOTIF_DELAY,
+ BOND_OPT_LACP_ACTIVE,
++ BOND_OPT_MISSED_MAX,
+ BOND_OPT_LAST
+ };
+
+diff --git a/include/net/bonding.h b/include/net/bonding.h
+index 15e083e18f75f..08d222752cc88 100644
+--- a/include/net/bonding.h
++++ b/include/net/bonding.h
+@@ -121,6 +121,7 @@ struct bond_params {
+ int xmit_policy;
+ int miimon;
+ u8 num_peer_notif;
++ u8 missed_max;
+ int arp_interval;
+ int arp_validate;
+ int arp_all_targets;
+@@ -214,6 +215,7 @@ struct bonding {
+ struct bond_up_slave __rcu *usable_slaves;
+ struct bond_up_slave __rcu *all_slaves;
+ bool force_primary;
++ bool notifier_ctx;
+ s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
+ int (*recv_probe)(const struct sk_buff *, struct bonding *,
+ struct slave *);
+@@ -226,7 +228,7 @@ struct bonding {
+ */
+ spinlock_t mode_lock;
+ spinlock_t stats_lock;
+- u8 send_peer_notif;
++ u32 send_peer_notif;
+ u8 igmp_retrans;
+ #ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc_entry;
+@@ -698,37 +700,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
+ }
+
+ /* Caller must hold rcu_read_lock() for read */
+-static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
+- const u8 *mac)
+-{
+- struct list_head *iter;
+- struct slave *tmp;
+-
+- bond_for_each_slave_rcu(bond, tmp, iter)
+- if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
+- return tmp;
+-
+- return NULL;
+-}
+-
+-/* Caller must hold rcu_read_lock() for read */
+-static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
++static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac)
+ {
+ struct list_head *iter;
+ struct slave *tmp;
+- struct netdev_hw_addr *ha;
+
+ bond_for_each_slave_rcu(bond, tmp, iter)
+ if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
+ return true;
+-
+- if (netdev_uc_empty(bond->dev))
+- return false;
+-
+- netdev_for_each_uc_addr(ha, bond->dev)
+- if (ether_addr_equal_64bits(mac, ha->addr))
+- return true;
+-
+ return false;
+ }
+
+@@ -757,6 +736,9 @@ extern struct rtnl_link_ops bond_link_ops;
+ /* exported from bond_sysfs_slave.c */
+ extern const struct sysfs_ops slave_sysfs_ops;
+
++/* exported from bond_3ad.c */
++extern const u8 lacpdu_mcast_addr[];
++
+ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
+ {
+ atomic_long_inc(&dev->tx_dropped);
+diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
+index 40296ed976a97..3459a04a3d61c 100644
+--- a/include/net/busy_poll.h
++++ b/include/net/busy_poll.h
+@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
+
+ static inline bool net_busy_loop_on(void)
+ {
+- return sysctl_net_busy_poll;
++ return READ_ONCE(sysctl_net_busy_poll);
+ }
+
+ static inline bool sk_can_busy_loop(const struct sock *sk)
+diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
+index 27336fc704674..963a810ed70d2 100644
+--- a/include/net/cfg80211.h
++++ b/include/net/cfg80211.h
+@@ -510,6 +510,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
+ if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
+ return NULL;
+
++ if (iftype == NL80211_IFTYPE_AP_VLAN)
++ iftype = NL80211_IFTYPE_AP;
++
+ for (i = 0; i < sband->n_iftype_data; i++) {
+ const struct ieee80211_sband_iftype_data *data =
+ &sband->iftype_data[i];
+diff --git a/include/net/checksum.h b/include/net/checksum.h
+index 5b96d5bd6e545..d3b5d368a0caa 100644
+--- a/include/net/checksum.h
++++ b/include/net/checksum.h
+@@ -22,7 +22,7 @@
+ #include <asm/checksum.h>
+
+ #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+-static inline
++static __always_inline
+ __wsum csum_and_copy_from_user (const void __user *src, void *dst,
+ int len)
+ {
+@@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst,
+ #endif
+
+ #ifndef HAVE_CSUM_COPY_USER
+-static __inline__ __wsum csum_and_copy_to_user
++static __always_inline __wsum csum_and_copy_to_user
+ (const void *src, void __user *dst, int len)
+ {
+ __wsum sum = csum_partial(src, len, ~0U);
+@@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user
+ #endif
+
+ #ifndef _HAVE_ARCH_CSUM_AND_COPY
+-static inline __wsum
++static __always_inline __wsum
+ csum_partial_copy_nocheck(const void *src, void *dst, int len)
+ {
+ memcpy(dst, src, len);
+@@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
+ #endif
+
+ #ifndef HAVE_ARCH_CSUM_ADD
+-static inline __wsum csum_add(__wsum csum, __wsum addend)
++static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
+ {
+ u32 res = (__force u32)csum;
+ res += (__force u32)addend;
+@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
+ }
+ #endif
+
+-static inline __wsum csum_sub(__wsum csum, __wsum addend)
++static __always_inline __wsum csum_sub(__wsum csum, __wsum addend)
+ {
+ return csum_add(csum, ~addend);
+ }
+
+-static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
++static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend)
+ {
+ u16 res = (__force u16)csum;
+
+@@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
+ return (__force __sum16)(res + (res < (__force u16)addend));
+ }
+
+-static inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
++static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
+ {
+ return csum16_add(csum, ~addend);
+ }
+
+-static inline __wsum csum_shift(__wsum sum, int offset)
++static __always_inline __wsum csum_shift(__wsum sum, int offset)
+ {
+ /* rotate sum to align it with a 16b boundary */
+ if (offset & 1)
+@@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset)
+ return sum;
+ }
+
+-static inline __wsum
++static __always_inline __wsum
+ csum_block_add(__wsum csum, __wsum csum2, int offset)
+ {
+ return csum_add(csum, csum_shift(csum2, offset));
+ }
+
+-static inline __wsum
++static __always_inline __wsum
+ csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
+ {
+ return csum_block_add(csum, csum2, offset);
+ }
+
+-static inline __wsum
++static __always_inline __wsum
+ csum_block_sub(__wsum csum, __wsum csum2, int offset)
+ {
+ return csum_block_add(csum, ~csum2, offset);
+ }
+
+-static inline __wsum csum_unfold(__sum16 n)
++static __always_inline __wsum csum_unfold(__sum16 n)
+ {
+ return (__force __wsum)n;
+ }
+
+-static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
++static __always_inline
++__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
+ {
+ return csum_partial(buff, len, sum);
+ }
+
+ #define CSUM_MANGLED_0 ((__force __sum16)0xffff)
+
+-static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
++static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
+ {
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+ }
+
+-static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
++static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
+ {
+ __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
+
+@@ -136,11 +137,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
+ * m : old value of a 16bit field
+ * m' : new value of a 16bit field
+ */
+-static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
++static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
+ {
+ *sum = ~csum16_add(csum16_sub(~(*sum), old), new);
+ }
+
++static inline void csum_replace(__wsum *csum, __wsum old, __wsum new)
++{
++ *csum = csum_add(csum_sub(*csum, old), new);
++}
++
+ struct sk_buff;
+ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
+ __be32 from, __be32 to, bool pseudohdr);
+@@ -150,16 +156,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
+ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+ __wsum diff, bool pseudohdr);
+
+-static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
+- __be16 from, __be16 to,
+- bool pseudohdr)
++static __always_inline
++void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
++ __be16 from, __be16 to, bool pseudohdr)
+ {
+ inet_proto_csum_replace4(sum, skb, (__force __be32)from,
+ (__force __be32)to, pseudohdr);
+ }
+
+-static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
+- int start, int offset)
++static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
++ int start, int offset)
+ {
+ __sum16 *psum = (__sum16 *)(ptr + offset);
+ __wsum delta;
+@@ -175,7 +181,7 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
+ return delta;
+ }
+
+-static inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
++static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
+ {
+ *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
+ }
+diff --git a/include/net/dn.h b/include/net/dn.h
+deleted file mode 100644
+index 56ab0726c641a..0000000000000
+--- a/include/net/dn.h
++++ /dev/null
+@@ -1,231 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _NET_DN_H
+-#define _NET_DN_H
+-
+-#include <linux/dn.h>
+-#include <net/sock.h>
+-#include <net/flow.h>
+-#include <asm/byteorder.h>
+-#include <asm/unaligned.h>
+-
+-struct dn_scp /* Session Control Port */
+-{
+- unsigned char state;
+-#define DN_O 1 /* Open */
+-#define DN_CR 2 /* Connect Receive */
+-#define DN_DR 3 /* Disconnect Reject */
+-#define DN_DRC 4 /* Discon. Rej. Complete*/
+-#define DN_CC 5 /* Connect Confirm */
+-#define DN_CI 6 /* Connect Initiate */
+-#define DN_NR 7 /* No resources */
+-#define DN_NC 8 /* No communication */
+-#define DN_CD 9 /* Connect Delivery */
+-#define DN_RJ 10 /* Rejected */
+-#define DN_RUN 11 /* Running */
+-#define DN_DI 12 /* Disconnect Initiate */
+-#define DN_DIC 13 /* Disconnect Complete */
+-#define DN_DN 14 /* Disconnect Notificat */
+-#define DN_CL 15 /* Closed */
+-#define DN_CN 16 /* Closed Notification */
+-
+- __le16 addrloc;
+- __le16 addrrem;
+- __u16 numdat;
+- __u16 numoth;
+- __u16 numoth_rcv;
+- __u16 numdat_rcv;
+- __u16 ackxmt_dat;
+- __u16 ackxmt_oth;
+- __u16 ackrcv_dat;
+- __u16 ackrcv_oth;
+- __u8 flowrem_sw;
+- __u8 flowloc_sw;
+-#define DN_SEND 2
+-#define DN_DONTSEND 1
+-#define DN_NOCHANGE 0
+- __u16 flowrem_dat;
+- __u16 flowrem_oth;
+- __u16 flowloc_dat;
+- __u16 flowloc_oth;
+- __u8 services_rem;
+- __u8 services_loc;
+- __u8 info_rem;
+- __u8 info_loc;
+-
+- __u16 segsize_rem;
+- __u16 segsize_loc;
+-
+- __u8 nonagle;
+- __u8 multi_ireq;
+- __u8 accept_mode;
+- unsigned long seg_total; /* Running total of current segment */
+-
+- struct optdata_dn conndata_in;
+- struct optdata_dn conndata_out;
+- struct optdata_dn discdata_in;
+- struct optdata_dn discdata_out;
+- struct accessdata_dn accessdata;
+-
+- struct sockaddr_dn addr; /* Local address */
+- struct sockaddr_dn peer; /* Remote address */
+-
+- /*
+- * In this case the RTT estimation is not specified in the
+- * docs, nor is any back off algorithm. Here we follow well
+- * known tcp algorithms with a few small variations.
+- *
+- * snd_window: Max number of packets we send before we wait for
+- * an ack to come back. This will become part of a
+- * more complicated scheme when we support flow
+- * control.
+- *
+- * nsp_srtt: Round-Trip-Time (x8) in jiffies. This is a rolling
+- * average.
+- * nsp_rttvar: Round-Trip-Time-Varience (x4) in jiffies. This is the
+- * varience of the smoothed average (but calculated in
+- * a simpler way than for normal statistical varience
+- * calculations).
+- *
+- * nsp_rxtshift: Backoff counter. Value is zero normally, each time
+- * a packet is lost is increases by one until an ack
+- * is received. Its used to index an array of backoff
+- * multipliers.
+- */
+-#define NSP_MIN_WINDOW 1
+-#define NSP_MAX_WINDOW (0x07fe)
+- unsigned long max_window;
+- unsigned long snd_window;
+-#define NSP_INITIAL_SRTT (HZ)
+- unsigned long nsp_srtt;
+-#define NSP_INITIAL_RTTVAR (HZ*3)
+- unsigned long nsp_rttvar;
+-#define NSP_MAXRXTSHIFT 12
+- unsigned long nsp_rxtshift;
+-
+- /*
+- * Output queues, one for data, one for otherdata/linkservice
+- */
+- struct sk_buff_head data_xmit_queue;
+- struct sk_buff_head other_xmit_queue;
+-
+- /*
+- * Input queue for other data
+- */
+- struct sk_buff_head other_receive_queue;
+- int other_report;
+-
+- /*
+- * Stuff to do with the slow timer
+- */
+- unsigned long stamp; /* time of last transmit */
+- unsigned long persist;
+- int (*persist_fxn)(struct sock *sk);
+- unsigned long keepalive;
+- void (*keepalive_fxn)(struct sock *sk);
+-
+-};
+-
+-static inline struct dn_scp *DN_SK(struct sock *sk)
+-{
+- return (struct dn_scp *)(sk + 1);
+-}
+-
+-/*
+- * src,dst : Source and Destination DECnet addresses
+- * hops : Number of hops through the network
+- * dst_port, src_port : NSP port numbers
+- * services, info : Useful data extracted from conninit messages
+- * rt_flags : Routing flags byte
+- * nsp_flags : NSP layer flags byte
+- * segsize : Size of segment
+- * segnum : Number, for data, otherdata and linkservice
+- * xmit_count : Number of times we've transmitted this skb
+- * stamp : Time stamp of most recent transmission, used in RTT calculations
+- * iif: Input interface number
+- *
+- * As a general policy, this structure keeps all addresses in network
+- * byte order, and all else in host byte order. Thus dst, src, dst_port
+- * and src_port are in network order. All else is in host order.
+- *
+- */
+-#define DN_SKB_CB(skb) ((struct dn_skb_cb *)(skb)->cb)
+-struct dn_skb_cb {
+- __le16 dst;
+- __le16 src;
+- __u16 hops;
+- __le16 dst_port;
+- __le16 src_port;
+- __u8 services;
+- __u8 info;
+- __u8 rt_flags;
+- __u8 nsp_flags;
+- __u16 segsize;
+- __u16 segnum;
+- __u16 xmit_count;
+- unsigned long stamp;
+- int iif;
+-};
+-
+-static inline __le16 dn_eth2dn(unsigned char *ethaddr)
+-{
+- return get_unaligned((__le16 *)(ethaddr + 4));
+-}
+-
+-static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr)
+-{
+- return *(__le16 *)saddr->sdn_nodeaddr;
+-}
+-
+-static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr)
+-{
+- __u16 a = le16_to_cpu(addr);
+- ethaddr[0] = 0xAA;
+- ethaddr[1] = 0x00;
+- ethaddr[2] = 0x04;
+- ethaddr[3] = 0x00;
+- ethaddr[4] = (__u8)(a & 0xff);
+- ethaddr[5] = (__u8)(a >> 8);
+-}
+-
+-static inline void dn_sk_ports_copy(struct flowidn *fld, struct dn_scp *scp)
+-{
+- fld->fld_sport = scp->addrloc;
+- fld->fld_dport = scp->addrrem;
+-}
+-
+-unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu);
+-void dn_register_sysctl(void);
+-void dn_unregister_sysctl(void);
+-
+-#define DN_MENUVER_ACC 0x01
+-#define DN_MENUVER_USR 0x02
+-#define DN_MENUVER_PRX 0x04
+-#define DN_MENUVER_UIC 0x08
+-
+-struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr);
+-struct sock *dn_find_by_skb(struct sk_buff *skb);
+-#define DN_ASCBUF_LEN 9
+-char *dn_addr2asc(__u16, char *);
+-int dn_destroy_timer(struct sock *sk);
+-
+-int dn_sockaddr2username(struct sockaddr_dn *addr, unsigned char *buf,
+- unsigned char type);
+-int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *addr,
+- unsigned char *type);
+-
+-void dn_start_slow_timer(struct sock *sk);
+-void dn_stop_slow_timer(struct sock *sk);
+-
+-extern __le16 decnet_address;
+-extern int decnet_debug_level;
+-extern int decnet_time_wait;
+-extern int decnet_dn_count;
+-extern int decnet_di_count;
+-extern int decnet_dr_count;
+-extern int decnet_no_fc_max_cwnd;
+-
+-extern long sysctl_decnet_mem[3];
+-extern int sysctl_decnet_wmem[3];
+-extern int sysctl_decnet_rmem[3];
+-
+-#endif /* _NET_DN_H */
+diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
+deleted file mode 100644
+index 595b4f6c1eb10..0000000000000
+--- a/include/net/dn_dev.h
++++ /dev/null
+@@ -1,199 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _NET_DN_DEV_H
+-#define _NET_DN_DEV_H
+-
+-
+-struct dn_dev;
+-
+-struct dn_ifaddr {
+- struct dn_ifaddr __rcu *ifa_next;
+- struct dn_dev *ifa_dev;
+- __le16 ifa_local;
+- __le16 ifa_address;
+- __u32 ifa_flags;
+- __u8 ifa_scope;
+- char ifa_label[IFNAMSIZ];
+- struct rcu_head rcu;
+-};
+-
+-#define DN_DEV_S_RU 0 /* Run - working normally */
+-#define DN_DEV_S_CR 1 /* Circuit Rejected */
+-#define DN_DEV_S_DS 2 /* Data Link Start */
+-#define DN_DEV_S_RI 3 /* Routing Layer Initialize */
+-#define DN_DEV_S_RV 4 /* Routing Layer Verify */
+-#define DN_DEV_S_RC 5 /* Routing Layer Complete */
+-#define DN_DEV_S_OF 6 /* Off */
+-#define DN_DEV_S_HA 7 /* Halt */
+-
+-
+-/*
+- * The dn_dev_parms structure contains the set of parameters
+- * for each device (hence inclusion in the dn_dev structure)
+- * and an array is used to store the default types of supported
+- * device (in dn_dev.c).
+- *
+- * The type field matches the ARPHRD_ constants and is used in
+- * searching the list for supported devices when new devices
+- * come up.
+- *
+- * The mode field is used to find out if a device is broadcast,
+- * multipoint, or pointopoint. Please note that DECnet thinks
+- * different ways about devices to the rest of the kernel
+- * so the normal IFF_xxx flags are invalid here. For devices
+- * which can be any combination of the previously mentioned
+- * attributes, you can set this on a per device basis by
+- * installing an up() routine.
+- *
+- * The device state field, defines the initial state in which the
+- * device will come up. In the dn_dev structure, it is the actual
+- * state.
+- *
+- * Things have changed here. I've killed timer1 since it's a user space
+- * issue for a user space routing deamon to sort out. The kernel does
+- * not need to be bothered with it.
+- *
+- * Timers:
+- * t2 - Rate limit timer, min time between routing and hello messages
+- * t3 - Hello timer, send hello messages when it expires
+- *
+- * Callbacks:
+- * up() - Called to initialize device, return value can veto use of
+- * device with DECnet.
+- * down() - Called to turn device off when it goes down
+- * timer3() - Called once for each ifaddr when timer 3 goes off
+- *
+- * sysctl - Hook for sysctl things
+- *
+- */
+-struct dn_dev_parms {
+- int type; /* ARPHRD_xxx */
+- int mode; /* Broadcast, Unicast, Mulitpoint */
+-#define DN_DEV_BCAST 1
+-#define DN_DEV_UCAST 2
+-#define DN_DEV_MPOINT 4
+- int state; /* Initial state */
+- int forwarding; /* 0=EndNode, 1=L1Router, 2=L2Router */
+- unsigned long t2; /* Default value of t2 */
+- unsigned long t3; /* Default value of t3 */
+- int priority; /* Priority to be a router */
+- char *name; /* Name for sysctl */
+- int (*up)(struct net_device *);
+- void (*down)(struct net_device *);
+- void (*timer3)(struct net_device *, struct dn_ifaddr *ifa);
+- void *sysctl;
+-};
+-
+-
+-struct dn_dev {
+- struct dn_ifaddr __rcu *ifa_list;
+- struct net_device *dev;
+- struct dn_dev_parms parms;
+- char use_long;
+- struct timer_list timer;
+- unsigned long t3;
+- struct neigh_parms *neigh_parms;
+- __u8 addr[ETH_ALEN];
+- struct neighbour *router; /* Default router on circuit */
+- struct neighbour *peer; /* Peer on pointopoint links */
+- unsigned long uptime; /* Time device went up in jiffies */
+-};
+-
+-struct dn_short_packet {
+- __u8 msgflg;
+- __le16 dstnode;
+- __le16 srcnode;
+- __u8 forward;
+-} __packed;
+-
+-struct dn_long_packet {
+- __u8 msgflg;
+- __u8 d_area;
+- __u8 d_subarea;
+- __u8 d_id[6];
+- __u8 s_area;
+- __u8 s_subarea;
+- __u8 s_id[6];
+- __u8 nl2;
+- __u8 visit_ct;
+- __u8 s_class;
+- __u8 pt;
+-} __packed;
+-
+-/*------------------------- DRP - Routing messages ---------------------*/
+-
+-struct endnode_hello_message {
+- __u8 msgflg;
+- __u8 tiver[3];
+- __u8 id[6];
+- __u8 iinfo;
+- __le16 blksize;
+- __u8 area;
+- __u8 seed[8];
+- __u8 neighbor[6];
+- __le16 timer;
+- __u8 mpd;
+- __u8 datalen;
+- __u8 data[2];
+-} __packed;
+-
+-struct rtnode_hello_message {
+- __u8 msgflg;
+- __u8 tiver[3];
+- __u8 id[6];
+- __u8 iinfo;
+- __le16 blksize;
+- __u8 priority;
+- __u8 area;
+- __le16 timer;
+- __u8 mpd;
+-} __packed;
+-
+-
+-void dn_dev_init(void);
+-void dn_dev_cleanup(void);
+-
+-int dn_dev_ioctl(unsigned int cmd, void __user *arg);
+-
+-void dn_dev_devices_off(void);
+-void dn_dev_devices_on(void);
+-
+-void dn_dev_init_pkt(struct sk_buff *skb);
+-void dn_dev_veri_pkt(struct sk_buff *skb);
+-void dn_dev_hello(struct sk_buff *skb);
+-
+-void dn_dev_up(struct net_device *);
+-void dn_dev_down(struct net_device *);
+-
+-int dn_dev_set_default(struct net_device *dev, int force);
+-struct net_device *dn_dev_get_default(void);
+-int dn_dev_bind_default(__le16 *addr);
+-
+-int register_dnaddr_notifier(struct notifier_block *nb);
+-int unregister_dnaddr_notifier(struct notifier_block *nb);
+-
+-static inline int dn_dev_islocal(struct net_device *dev, __le16 addr)
+-{
+- struct dn_dev *dn_db;
+- struct dn_ifaddr *ifa;
+- int res = 0;
+-
+- rcu_read_lock();
+- dn_db = rcu_dereference(dev->dn_ptr);
+- if (dn_db == NULL) {
+- printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n");
+- goto out;
+- }
+-
+- for (ifa = rcu_dereference(dn_db->ifa_list);
+- ifa != NULL;
+- ifa = rcu_dereference(ifa->ifa_next))
+- if ((addr ^ ifa->ifa_local) == 0) {
+- res = 1;
+- break;
+- }
+-out:
+- rcu_read_unlock();
+- return res;
+-}
+-
+-#endif /* _NET_DN_DEV_H */
+diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
+deleted file mode 100644
+index ddd6565957b35..0000000000000
+--- a/include/net/dn_fib.h
++++ /dev/null
+@@ -1,167 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _NET_DN_FIB_H
+-#define _NET_DN_FIB_H
+-
+-#include <linux/netlink.h>
+-#include <linux/refcount.h>
+-
+-extern const struct nla_policy rtm_dn_policy[];
+-
+-struct dn_fib_res {
+- struct fib_rule *r;
+- struct dn_fib_info *fi;
+- unsigned char prefixlen;
+- unsigned char nh_sel;
+- unsigned char type;
+- unsigned char scope;
+-};
+-
+-struct dn_fib_nh {
+- struct net_device *nh_dev;
+- unsigned int nh_flags;
+- unsigned char nh_scope;
+- int nh_weight;
+- int nh_power;
+- int nh_oif;
+- __le16 nh_gw;
+-};
+-
+-struct dn_fib_info {
+- struct dn_fib_info *fib_next;
+- struct dn_fib_info *fib_prev;
+- refcount_t fib_treeref;
+- refcount_t fib_clntref;
+- int fib_dead;
+- unsigned int fib_flags;
+- int fib_protocol;
+- __le16 fib_prefsrc;
+- __u32 fib_priority;
+- __u32 fib_metrics[RTAX_MAX];
+- int fib_nhs;
+- int fib_power;
+- struct dn_fib_nh fib_nh[0];
+-#define dn_fib_dev fib_nh[0].nh_dev
+-};
+-
+-
+-#define DN_FIB_RES_RESET(res) ((res).nh_sel = 0)
+-#define DN_FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel])
+-
+-#define DN_FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __dn_fib_res_prefsrc(&res))
+-#define DN_FIB_RES_GW(res) (DN_FIB_RES_NH(res).nh_gw)
+-#define DN_FIB_RES_DEV(res) (DN_FIB_RES_NH(res).nh_dev)
+-#define DN_FIB_RES_OIF(res) (DN_FIB_RES_NH(res).nh_oif)
+-
+-typedef struct {
+- __le16 datum;
+-} dn_fib_key_t;
+-
+-typedef struct {
+- __le16 datum;
+-} dn_fib_hash_t;
+-
+-typedef struct {
+- __u16 datum;
+-} dn_fib_idx_t;
+-
+-struct dn_fib_node {
+- struct dn_fib_node *fn_next;
+- struct dn_fib_info *fn_info;
+-#define DN_FIB_INFO(f) ((f)->fn_info)
+- dn_fib_key_t fn_key;
+- u8 fn_type;
+- u8 fn_scope;
+- u8 fn_state;
+-};
+-
+-
+-struct dn_fib_table {
+- struct hlist_node hlist;
+- u32 n;
+-
+- int (*insert)(struct dn_fib_table *t, struct rtmsg *r,
+- struct nlattr *attrs[], struct nlmsghdr *n,
+- struct netlink_skb_parms *req);
+- int (*delete)(struct dn_fib_table *t, struct rtmsg *r,
+- struct nlattr *attrs[], struct nlmsghdr *n,
+- struct netlink_skb_parms *req);
+- int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld,
+- struct dn_fib_res *res);
+- int (*flush)(struct dn_fib_table *t);
+- int (*dump)(struct dn_fib_table *t, struct sk_buff *skb, struct netlink_callback *cb);
+-
+- unsigned char data[];
+-};
+-
+-#ifdef CONFIG_DECNET_ROUTER
+-/*
+- * dn_fib.c
+- */
+-void dn_fib_init(void);
+-void dn_fib_cleanup(void);
+-
+-int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r,
+- struct nlattr *attrs[],
+- const struct nlmsghdr *nlh, int *errp);
+-int dn_fib_semantic_match(int type, struct dn_fib_info *fi,
+- const struct flowidn *fld, struct dn_fib_res *res);
+-void dn_fib_release_info(struct dn_fib_info *fi);
+-void dn_fib_flush(void);
+-void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res);
+-
+-/*
+- * dn_tables.c
+- */
+-struct dn_fib_table *dn_fib_get_table(u32 n, int creat);
+-struct dn_fib_table *dn_fib_empty_table(void);
+-void dn_fib_table_init(void);
+-void dn_fib_table_cleanup(void);
+-
+-/*
+- * dn_rules.c
+- */
+-void dn_fib_rules_init(void);
+-void dn_fib_rules_cleanup(void);
+-unsigned int dnet_addr_type(__le16 addr);
+-int dn_fib_lookup(struct flowidn *fld, struct dn_fib_res *res);
+-
+-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb);
+-
+-void dn_fib_free_info(struct dn_fib_info *fi);
+-
+-static inline void dn_fib_info_put(struct dn_fib_info *fi)
+-{
+- if (refcount_dec_and_test(&fi->fib_clntref))
+- dn_fib_free_info(fi);
+-}
+-
+-static inline void dn_fib_res_put(struct dn_fib_res *res)
+-{
+- if (res->fi)
+- dn_fib_info_put(res->fi);
+- if (res->r)
+- fib_rule_put(res->r);
+-}
+-
+-#else /* Endnode */
+-
+-#define dn_fib_init() do { } while(0)
+-#define dn_fib_cleanup() do { } while(0)
+-
+-#define dn_fib_lookup(fl, res) (-ESRCH)
+-#define dn_fib_info_put(fi) do { } while(0)
+-#define dn_fib_select_multipath(fl, res) do { } while(0)
+-#define dn_fib_rules_policy(saddr,res,flags) (0)
+-#define dn_fib_res_put(res) do { } while(0)
+-
+-#endif /* CONFIG_DECNET_ROUTER */
+-
+-static inline __le16 dnet_make_mask(int n)
+-{
+- if (n)
+- return cpu_to_le16(~((1 << (16 - n)) - 1));
+- return cpu_to_le16(0);
+-}
+-
+-#endif /* _NET_DN_FIB_H */
+diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h
+deleted file mode 100644
+index 2e3e7793973a8..0000000000000
+--- a/include/net/dn_neigh.h
++++ /dev/null
+@@ -1,30 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _NET_DN_NEIGH_H
+-#define _NET_DN_NEIGH_H
+-
+-/*
+- * The position of the first two fields of
+- * this structure are critical - SJW
+- */
+-struct dn_neigh {
+- struct neighbour n;
+- __le16 addr;
+- unsigned long flags;
+-#define DN_NDFLAG_R1 0x0001 /* Router L1 */
+-#define DN_NDFLAG_R2 0x0002 /* Router L2 */
+-#define DN_NDFLAG_P3 0x0004 /* Phase III Node */
+- unsigned long blksize;
+- __u8 priority;
+-};
+-
+-void dn_neigh_init(void);
+-void dn_neigh_cleanup(void);
+-int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
+-int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
+-void dn_neigh_pointopoint_hello(struct sk_buff *skb);
+-int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n);
+-int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb);
+-
+-extern struct neigh_table dn_neigh_table;
+-
+-#endif /* _NET_DN_NEIGH_H */
+diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
+deleted file mode 100644
+index f83932b864a93..0000000000000
+--- a/include/net/dn_nsp.h
++++ /dev/null
+@@ -1,195 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-or-later */
+-#ifndef _NET_DN_NSP_H
+-#define _NET_DN_NSP_H
+-/******************************************************************************
+- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
+-
+-*******************************************************************************/
+-/* dn_nsp.c functions prototyping */
+-
+-void dn_nsp_send_data_ack(struct sock *sk);
+-void dn_nsp_send_oth_ack(struct sock *sk);
+-void dn_send_conn_ack(struct sock *sk);
+-void dn_send_conn_conf(struct sock *sk, gfp_t gfp);
+-void dn_nsp_send_disc(struct sock *sk, unsigned char type,
+- unsigned short reason, gfp_t gfp);
+-void dn_nsp_return_disc(struct sk_buff *skb, unsigned char type,
+- unsigned short reason);
+-void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval);
+-void dn_nsp_send_conninit(struct sock *sk, unsigned char flags);
+-
+-void dn_nsp_output(struct sock *sk);
+-int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb,
+- struct sk_buff_head *q, unsigned short acknum);
+-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, gfp_t gfp,
+- int oob);
+-unsigned long dn_nsp_persist(struct sock *sk);
+-int dn_nsp_xmit_timeout(struct sock *sk);
+-
+-int dn_nsp_rx(struct sk_buff *);
+-int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+-
+-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
+-struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock,
+- long timeo, int *err);
+-
+-#define NSP_REASON_OK 0 /* No error */
+-#define NSP_REASON_NR 1 /* No resources */
+-#define NSP_REASON_UN 2 /* Unrecognised node name */
+-#define NSP_REASON_SD 3 /* Node shutting down */
+-#define NSP_REASON_ID 4 /* Invalid destination end user */
+-#define NSP_REASON_ER 5 /* End user lacks resources */
+-#define NSP_REASON_OB 6 /* Object too busy */
+-#define NSP_REASON_US 7 /* Unspecified error */
+-#define NSP_REASON_TP 8 /* Third-Party abort */
+-#define NSP_REASON_EA 9 /* End user has aborted the link */
+-#define NSP_REASON_IF 10 /* Invalid node name format */
+-#define NSP_REASON_LS 11 /* Local node shutdown */
+-#define NSP_REASON_LL 32 /* Node lacks logical-link resources */
+-#define NSP_REASON_LE 33 /* End user lacks logical-link resources */
+-#define NSP_REASON_UR 34 /* Unacceptable RQSTRID or PASSWORD field */
+-#define NSP_REASON_UA 36 /* Unacceptable ACCOUNT field */
+-#define NSP_REASON_TM 38 /* End user timed out logical link */
+-#define NSP_REASON_NU 39 /* Node unreachable */
+-#define NSP_REASON_NL 41 /* No-link message */
+-#define NSP_REASON_DC 42 /* Disconnect confirm */
+-#define NSP_REASON_IO 43 /* Image data field overflow */
+-
+-#define NSP_DISCINIT 0x38
+-#define NSP_DISCCONF 0x48
+-
+-/*------------------------- NSP - messages ------------------------------*/
+-/* Data Messages */
+-/*---------------*/
+-
+-/* Data Messages (data segment/interrupt/link service) */
+-
+-struct nsp_data_seg_msg {
+- __u8 msgflg;
+- __le16 dstaddr;
+- __le16 srcaddr;
+-} __packed;
+-
+-struct nsp_data_opt_msg {
+- __le16 acknum;
+- __le16 segnum;
+- __le16 lsflgs;
+-} __packed;
+-
+-struct nsp_data_opt_msg1 {
+- __le16 acknum;
+- __le16 segnum;
+-} __packed;
+-
+-
+-/* Acknowledgment Message (data/other data) */
+-struct nsp_data_ack_msg {
+- __u8 msgflg;
+- __le16 dstaddr;
+- __le16 srcaddr;
+- __le16 acknum;
+-} __packed;
+-
+-/* Connect Acknowledgment Message */
+-struct nsp_conn_ack_msg {
+- __u8 msgflg;
+- __le16 dstaddr;
+-} __packed;
+-
+-
+-/* Connect Initiate/Retransmit Initiate/Connect Confirm */
+-struct nsp_conn_init_msg {
+- __u8 msgflg;
+-#define NSP_CI 0x18 /* Connect Initiate */
+-#define NSP_RCI 0x68 /* Retrans. Conn Init */
+- __le16 dstaddr;
+- __le16 srcaddr;
+- __u8 services;
+-#define NSP_FC_NONE 0x00 /* Flow Control None */
+-#define NSP_FC_SRC 0x04 /* Seg Req. Count */
+-#define NSP_FC_SCMC 0x08 /* Sess. Control Mess */
+-#define NSP_FC_MASK 0x0c /* FC type mask */
+- __u8 info;
+- __le16 segsize;
+-} __packed;
+-
+-/* Disconnect Initiate/Disconnect Confirm */
+-struct nsp_disconn_init_msg {
+- __u8 msgflg;
+- __le16 dstaddr;
+- __le16 srcaddr;
+- __le16 reason;
+-} __packed;
+-
+-
+-
+-struct srcobj_fmt {
+- __u8 format;
+- __u8 task;
+- __le16 grpcode;
+- __le16 usrcode;
+- __u8 dlen;
+-} __packed;
+-
+-/*
+- * A collection of functions for manipulating the sequence
+- * numbers used in NSP. Similar in operation to the functions
+- * of the same name in TCP.
+- */
+-static __inline__ int dn_before(__u16 seq1, __u16 seq2)
+-{
+- seq1 &= 0x0fff;
+- seq2 &= 0x0fff;
+-
+- return (int)((seq1 - seq2) & 0x0fff) > 2048;
+-}
+-
+-
+-static __inline__ int dn_after(__u16 seq1, __u16 seq2)
+-{
+- seq1 &= 0x0fff;
+- seq2 &= 0x0fff;
+-
+- return (int)((seq2 - seq1) & 0x0fff) > 2048;
+-}
+-
+-static __inline__ int dn_equal(__u16 seq1, __u16 seq2)
+-{
+- return ((seq1 ^ seq2) & 0x0fff) == 0;
+-}
+-
+-static __inline__ int dn_before_or_equal(__u16 seq1, __u16 seq2)
+-{
+- return (dn_before(seq1, seq2) || dn_equal(seq1, seq2));
+-}
+-
+-static __inline__ void seq_add(__u16 *seq, __u16 off)
+-{
+- (*seq) += off;
+- (*seq) &= 0x0fff;
+-}
+-
+-static __inline__ int seq_next(__u16 seq1, __u16 seq2)
+-{
+- return dn_equal(seq1 + 1, seq2);
+-}
+-
+-/*
+- * Can we delay the ack ?
+- */
+-static __inline__ int sendack(__u16 seq)
+-{
+- return (int)((seq & 0x1000) ? 0 : 1);
+-}
+-
+-/*
+- * Is socket congested ?
+- */
+-static __inline__ int dn_congested(struct sock *sk)
+-{
+- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+-}
+-
+-#define DN_MAX_NSP_DATA_HEADER (11)
+-
+-#endif /* _NET_DN_NSP_H */
+diff --git a/include/net/dn_route.h b/include/net/dn_route.h
+deleted file mode 100644
+index 6f1e94ac0bdfc..0000000000000
+--- a/include/net/dn_route.h
++++ /dev/null
+@@ -1,115 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-or-later */
+-#ifndef _NET_DN_ROUTE_H
+-#define _NET_DN_ROUTE_H
+-
+-/******************************************************************************
+- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
+-
+-*******************************************************************************/
+-
+-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
+-int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *,
+- struct sock *sk, int flags);
+-int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
+-void dn_rt_cache_flush(int delay);
+-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
+- struct packet_type *pt, struct net_device *orig_dev);
+-
+-/* Masks for flags field */
+-#define DN_RT_F_PID 0x07 /* Mask for packet type */
+-#define DN_RT_F_PF 0x80 /* Padding Follows */
+-#define DN_RT_F_VER 0x40 /* Version =0 discard packet if ==1 */
+-#define DN_RT_F_IE 0x20 /* Intra Ethernet, Reserved in short pkt */
+-#define DN_RT_F_RTS 0x10 /* Packet is being returned to sender */
+-#define DN_RT_F_RQR 0x08 /* Return packet to sender upon non-delivery */
+-
+-/* Mask for types of routing packets */
+-#define DN_RT_PKT_MSK 0x06
+-/* Types of routing packets */
+-#define DN_RT_PKT_SHORT 0x02 /* Short routing packet */
+-#define DN_RT_PKT_LONG 0x06 /* Long routing packet */
+-
+-/* Mask for control/routing selection */
+-#define DN_RT_PKT_CNTL 0x01 /* Set to 1 if a control packet */
+-/* Types of control packets */
+-#define DN_RT_CNTL_MSK 0x0f /* Mask for control packets */
+-#define DN_RT_PKT_INIT 0x01 /* Initialisation packet */
+-#define DN_RT_PKT_VERI 0x03 /* Verification Message */
+-#define DN_RT_PKT_HELO 0x05 /* Hello and Test Message */
+-#define DN_RT_PKT_L1RT 0x07 /* Level 1 Routing Message */
+-#define DN_RT_PKT_L2RT 0x09 /* Level 2 Routing Message */
+-#define DN_RT_PKT_ERTH 0x0b /* Ethernet Router Hello */
+-#define DN_RT_PKT_EEDH 0x0d /* Ethernet EndNode Hello */
+-
+-/* Values for info field in hello message */
+-#define DN_RT_INFO_TYPE 0x03 /* Type mask */
+-#define DN_RT_INFO_L1RT 0x02 /* L1 Router */
+-#define DN_RT_INFO_L2RT 0x01 /* L2 Router */
+-#define DN_RT_INFO_ENDN 0x03 /* EndNode */
+-#define DN_RT_INFO_VERI 0x04 /* Verification Reqd. */
+-#define DN_RT_INFO_RJCT 0x08 /* Reject Flag, Reserved */
+-#define DN_RT_INFO_VFLD 0x10 /* Verification Failed, Reserved */
+-#define DN_RT_INFO_NOML 0x20 /* No Multicast traffic accepted */
+-#define DN_RT_INFO_BLKR 0x40 /* Blocking Requested */
+-
+-/*
+- * The fl structure is what we used to look up the route.
+- * The rt_saddr & rt_daddr entries are the same as key.saddr & key.daddr
+- * except for local input routes, where the rt_saddr = fl.fld_dst and
+- * rt_daddr = fl.fld_src to allow the route to be used for returning
+- * packets to the originating host.
+- */
+-struct dn_route {
+- struct dst_entry dst;
+- struct dn_route __rcu *dn_next;
+-
+- struct neighbour *n;
+-
+- struct flowidn fld;
+-
+- __le16 rt_saddr;
+- __le16 rt_daddr;
+- __le16 rt_gateway;
+- __le16 rt_local_src; /* Source used for forwarding packets */
+- __le16 rt_src_map;
+- __le16 rt_dst_map;
+-
+- unsigned int rt_flags;
+- unsigned int rt_type;
+-};
+-
+-static inline bool dn_is_input_route(struct dn_route *rt)
+-{
+- return rt->fld.flowidn_iif != 0;
+-}
+-
+-static inline bool dn_is_output_route(struct dn_route *rt)
+-{
+- return rt->fld.flowidn_iif == 0;
+-}
+-
+-void dn_route_init(void);
+-void dn_route_cleanup(void);
+-
+-#include <net/sock.h>
+-#include <linux/if_arp.h>
+-
+-static inline void dn_rt_send(struct sk_buff *skb)
+-{
+- dev_queue_xmit(skb);
+-}
+-
+-static inline void dn_rt_finish_output(struct sk_buff *skb, char *dst, char *src)
+-{
+- struct net_device *dev = skb->dev;
+-
+- if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
+- dst = NULL;
+-
+- if (dev_hard_header(skb, dev, ETH_P_DNA_RT, dst, src, skb->len) >= 0)
+- dn_rt_send(skb);
+- else
+- kfree_skb(skb);
+-}
+-
+-#endif /* _NET_DN_ROUTE_H */
+diff --git a/include/net/dsa.h b/include/net/dsa.h
+index d784e76113b8d..bec439c4a0859 100644
+--- a/include/net/dsa.h
++++ b/include/net/dsa.h
+@@ -472,6 +472,34 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p)
+ return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER;
+ }
+
++#define dsa_tree_for_each_user_port(_dp, _dst) \
++ list_for_each_entry((_dp), &(_dst)->ports, list) \
++ if (dsa_port_is_user((_dp)))
++
++#define dsa_switch_for_each_port(_dp, _ds) \
++ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \
++ if ((_dp)->ds == (_ds))
++
++#define dsa_switch_for_each_port_safe(_dp, _next, _ds) \
++ list_for_each_entry_safe((_dp), (_next), &(_ds)->dst->ports, list) \
++ if ((_dp)->ds == (_ds))
++
++#define dsa_switch_for_each_port_continue_reverse(_dp, _ds) \
++ list_for_each_entry_continue_reverse((_dp), &(_ds)->dst->ports, list) \
++ if ((_dp)->ds == (_ds))
++
++#define dsa_switch_for_each_available_port(_dp, _ds) \
++ dsa_switch_for_each_port((_dp), (_ds)) \
++ if (!dsa_port_is_unused((_dp)))
++
++#define dsa_switch_for_each_user_port(_dp, _ds) \
++ dsa_switch_for_each_port((_dp), (_ds)) \
++ if (dsa_port_is_user((_dp)))
++
++#define dsa_switch_for_each_cpu_port(_dp, _ds) \
++ dsa_switch_for_each_port((_dp), (_ds)) \
++ if (dsa_port_is_cpu((_dp)))
++
+ static inline u32 dsa_user_ports(struct dsa_switch *ds)
+ {
+ u32 mask = 0;
+diff --git a/include/net/dst.h b/include/net/dst.h
+index a057319aabefa..827f99d577331 100644
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -238,12 +238,6 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
+ }
+ }
+
+-static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time)
+-{
+- dst_hold(dst);
+- dst_use_noref(dst, time);
+-}
+-
+ static inline struct dst_entry *dst_clone(struct dst_entry *dst)
+ {
+ if (dst)
+@@ -361,9 +355,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
+ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
+ struct net *net)
+ {
+- /* TODO : stats should be SMP safe */
+- dev->stats.rx_packets++;
+- dev->stats.rx_bytes += skb->len;
++ DEV_STATS_INC(dev, rx_packets);
++ DEV_STATS_ADD(dev, rx_bytes, skb->len);
+ __skb_tunnel_rx(skb, dev, net);
+ }
+
+diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
+index 67634675e9197..df6622a5fe98f 100644
+--- a/include/net/dst_cache.h
++++ b/include/net/dst_cache.h
+@@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache)
+ dst_cache->reset_ts = jiffies;
+ }
+
++/**
++ * dst_cache_reset_now - invalidate the cache contents immediately
++ * @dst_cache: the cache
++ *
++ * The caller must be sure there are no concurrent users, as this frees
++ * all dst_cache users immediately, rather than waiting for the next
++ * per-cpu usage like dst_cache_reset does. Most callers should use the
++ * higher speed lazily-freed dst_cache_reset function instead.
++ */
++void dst_cache_reset_now(struct dst_cache *dst_cache);
++
+ /**
+ * dst_cache_init - initialize the cache, allocating the required storage
+ * @dst_cache: the cache
+diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
+index 14efa0ded75dd..adab27ba1ecbf 100644
+--- a/include/net/dst_metadata.h
++++ b/include/net/dst_metadata.h
+@@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
+
+ memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+ sizeof(struct ip_tunnel_info) + md_size);
++#ifdef CONFIG_DST_CACHE
++ /* Unclone the dst cache if there is one */
++ if (new_md->u.tun_info.dst_cache.cache) {
++ int ret;
++
++ ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC);
++ if (ret) {
++ metadata_dst_free(new_md);
++ return ERR_PTR(ret);
++ }
++ }
++#endif
++
+ skb_dst_drop(skb);
+- dst_hold(&new_md->dst);
+ skb_dst_set(skb, &new_md->dst);
+ return new_md;
+ }
+diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
+index 4b10676c69d19..bd07484ab9dd5 100644
+--- a/include/net/fib_rules.h
++++ b/include/net/fib_rules.h
+@@ -69,7 +69,7 @@ struct fib_rules_ops {
+ int (*action)(struct fib_rule *,
+ struct flowi *, int,
+ struct fib_lookup_arg *);
+- bool (*suppress)(struct fib_rule *,
++ bool (*suppress)(struct fib_rule *, int,
+ struct fib_lookup_arg *);
+ int (*match)(struct fib_rule *,
+ struct flowi *, int);
+@@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule,
+ struct fib_lookup_arg *arg));
+
+ INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule,
++ int flags,
+ struct fib_lookup_arg *arg));
+ INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule,
++ int flags,
+ struct fib_lookup_arg *arg));
+ #endif
+diff --git a/include/net/flow.h b/include/net/flow.h
+index 58beb16a49b8d..e3f9d92460e7a 100644
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -54,11 +54,6 @@ union flowi_uli {
+ __u8 code;
+ } icmpt;
+
+- struct {
+- __le16 dport;
+- __le16 sport;
+- } dnports;
+-
+ __be32 gre_key;
+
+ struct {
+@@ -153,27 +148,11 @@ struct flowi6 {
+ __u32 mp_hash;
+ } __attribute__((__aligned__(BITS_PER_LONG/8)));
+
+-struct flowidn {
+- struct flowi_common __fl_common;
+-#define flowidn_oif __fl_common.flowic_oif
+-#define flowidn_iif __fl_common.flowic_iif
+-#define flowidn_mark __fl_common.flowic_mark
+-#define flowidn_scope __fl_common.flowic_scope
+-#define flowidn_proto __fl_common.flowic_proto
+-#define flowidn_flags __fl_common.flowic_flags
+- __le16 daddr;
+- __le16 saddr;
+- union flowi_uli uli;
+-#define fld_sport uli.ports.sport
+-#define fld_dport uli.ports.dport
+-} __attribute__((__aligned__(BITS_PER_LONG/8)));
+-
+ struct flowi {
+ union {
+ struct flowi_common __fl_common;
+ struct flowi4 ip4;
+ struct flowi6 ip6;
+- struct flowidn dn;
+ } u;
+ #define flowi_oif u.__fl_common.flowic_oif
+ #define flowi_iif u.__fl_common.flowic_iif
+@@ -207,11 +186,6 @@ static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6)
+ return &(fl6->__fl_common);
+ }
+
+-static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn)
+-{
+- return container_of(fldn, struct flowi, u.dn);
+-}
+-
+ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
+
+ #endif
+diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
+index ffd386ea0dbb3..c8d1c5e187e4b 100644
+--- a/include/net/flow_dissector.h
++++ b/include/net/flow_dissector.h
+@@ -59,6 +59,8 @@ struct flow_dissector_key_vlan {
+ __be16 vlan_tci;
+ };
+ __be16 vlan_tpid;
++ __be16 vlan_eth_type;
++ u16 padding;
+ };
+
+ struct flow_dissector_mpls_lse {
+diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
+index 3961461d9c8bc..7a2b0223a02c7 100644
+--- a/include/net/flow_offload.h
++++ b/include/net/flow_offload.h
+@@ -575,5 +575,6 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
+ enum tc_setup_type type, void *data,
+ struct flow_block_offload *bo,
+ void (*cleanup)(struct flow_block_cb *block_cb));
++bool flow_indr_dev_exists(void);
+
+ #endif /* _NET_FLOW_OFFLOAD_H */
+diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
+index d0d188c3294bd..03b64bf876a46 100644
+--- a/include/net/ieee802154_netdev.h
++++ b/include/net/ieee802154_netdev.h
+@@ -15,6 +15,22 @@
+ #ifndef IEEE802154_NETDEVICE_H
+ #define IEEE802154_NETDEVICE_H
+
++#define IEEE802154_REQUIRED_SIZE(struct_type, member) \
++ (offsetof(typeof(struct_type), member) + \
++ sizeof(((typeof(struct_type) *)(NULL))->member))
++
++#define IEEE802154_ADDR_OFFSET \
++ offsetof(typeof(struct sockaddr_ieee802154), addr)
++
++#define IEEE802154_MIN_NAMELEN (IEEE802154_ADDR_OFFSET + \
++ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, addr_type))
++
++#define IEEE802154_NAMELEN_SHORT (IEEE802154_ADDR_OFFSET + \
++ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, short_addr))
++
++#define IEEE802154_NAMELEN_LONG (IEEE802154_ADDR_OFFSET + \
++ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, hwaddr))
++
+ #include <net/af_ieee802154.h>
+ #include <linux/netdevice.h>
+ #include <linux/skbuff.h>
+@@ -165,6 +181,33 @@ static inline void ieee802154_devaddr_to_raw(void *raw, __le64 addr)
+ memcpy(raw, &temp, IEEE802154_ADDR_LEN);
+ }
+
++static inline int
++ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len)
++{
++ struct ieee802154_addr_sa *sa;
++ int ret = 0;
++
++ sa = &daddr->addr;
++ if (len < IEEE802154_MIN_NAMELEN)
++ return -EINVAL;
++ switch (sa->addr_type) {
++ case IEEE802154_ADDR_NONE:
++ break;
++ case IEEE802154_ADDR_SHORT:
++ if (len < IEEE802154_NAMELEN_SHORT)
++ ret = -EINVAL;
++ break;
++ case IEEE802154_ADDR_LONG:
++ if (len < IEEE802154_NAMELEN_LONG)
++ ret = -EINVAL;
++ break;
++ default:
++ ret = -EINVAL;
++ break;
++ }
++ return ret;
++}
++
+ static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a,
+ const struct ieee802154_addr_sa *sa)
+ {
+diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
+index 653e7d0f65cb7..8ec0878a90a7a 100644
+--- a/include/net/if_inet6.h
++++ b/include/net/if_inet6.h
+@@ -64,6 +64,14 @@ struct inet6_ifaddr {
+
+ struct hlist_node addr_lst;
+ struct list_head if_list;
++ /*
++ * Used to safely traverse idev->addr_list in process context
++ * if the idev->lock needed to protect idev->addr_list cannot be held.
++ * In that case, add the items to this list temporarily and iterate
++ * without holding idev->lock.
++ * See addrconf_ifdown and dev_forward_change.
++ */
++ struct list_head if_list_aux;
+
+ struct list_head tmp_list;
+ struct inet6_ifaddr *ifpub;
+diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
+index 81b9659530368..56f1286583d3c 100644
+--- a/include/net/inet6_hashtables.h
++++ b/include/net/inet6_hashtables.h
+@@ -103,15 +103,24 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ const int dif);
+
+ int inet6_hash(struct sock *sk);
+-#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \
+- (((__sk)->sk_portpair == (__ports)) && \
+- ((__sk)->sk_family == AF_INET6) && \
+- ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
+- ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
+- (((__sk)->sk_bound_dev_if == (__dif)) || \
+- ((__sk)->sk_bound_dev_if == (__sdif))) && \
+- net_eq(sock_net(__sk), (__net)))
++static inline bool inet6_match(struct net *net, const struct sock *sk,
++ const struct in6_addr *saddr,
++ const struct in6_addr *daddr,
++ const __portpair ports,
++ const int dif, const int sdif)
++{
++ if (!net_eq(sock_net(sk), net) ||
++ sk->sk_family != AF_INET6 ||
++ sk->sk_portpair != ports ||
++ !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) ||
++ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
++ return false;
++
++ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
++ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
++ sdif);
++}
++#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+ #endif /* _INET6_HASHTABLES_H */
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
+index b06c2d02ec84e..695ed45841f06 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -289,7 +289,7 @@ static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
+ {
+ /* The below has to be done to allow calling inet_csk_destroy_sock */
+ sock_set_flag(sk, SOCK_DEAD);
+- percpu_counter_inc(sk->sk_prot->orphan_count);
++ this_cpu_inc(*sk->sk_prot->orphan_count);
+ }
+
+ void inet_csk_destroy_sock(struct sock *sk);
+@@ -315,7 +315,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+
+ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
+
+-#define TCP_PINGPONG_THRESH 3
++#define TCP_PINGPONG_THRESH 1
+
+ static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
+ {
+@@ -332,14 +332,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
+ return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+ }
+
+-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+-{
+- struct inet_connection_sock *icsk = inet_csk(sk);
+-
+- if (icsk->icsk_ack.pingpong < U8_MAX)
+- icsk->icsk_ack.pingpong++;
+-}
+-
+ static inline bool inet_csk_has_ulp(struct sock *sk)
+ {
+ return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 48cc5795ceda6..63540be0fc34a 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
+
+ static inline void fqdir_pre_exit(struct fqdir *fqdir)
+ {
+- fqdir->high_thresh = 0; /* prevent creation of new frags */
+- fqdir->dead = true;
++ /* Prevent creation of new frags.
++ * Pairs with READ_ONCE() in inet_frag_find().
++ */
++ WRITE_ONCE(fqdir->high_thresh, 0);
++
++ /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
++ * and ip6frag_expire_frag_queue().
++ */
++ WRITE_ONCE(fqdir->dead, true);
+ }
+ void fqdir_exit(struct fqdir *fqdir);
+
+diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
+index f72ec113ae568..53c22b64e9724 100644
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -203,17 +203,6 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
+ hashinfo->ehash_locks = NULL;
+ }
+
+-static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+- int dif, int sdif)
+-{
+-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
+- bound_dev_if, dif, sdif);
+-#else
+- return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+-#endif
+-}
+-
+ struct inet_bind_bucket *
+ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
+ struct inet_bind_hashbucket *head,
+@@ -295,7 +284,6 @@ static inline struct sock *inet_lookup_listener(struct net *net,
+ ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
+ #endif
+
+-#if (BITS_PER_LONG == 64)
+ #ifdef __BIG_ENDIAN
+ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+ const __addrpair __name = (__force __addrpair) ( \
+@@ -307,24 +295,20 @@ static inline struct sock *inet_lookup_listener(struct net *net,
+ (((__force __u64)(__be32)(__daddr)) << 32) | \
+ ((__force __u64)(__be32)(__saddr)))
+ #endif /* __BIG_ENDIAN */
+-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
+- (((__sk)->sk_portpair == (__ports)) && \
+- ((__sk)->sk_addrpair == (__cookie)) && \
+- (((__sk)->sk_bound_dev_if == (__dif)) || \
+- ((__sk)->sk_bound_dev_if == (__sdif))) && \
+- net_eq(sock_net(__sk), (__net)))
+-#else /* 32-bit arch */
+-#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+- const int __name __deprecated __attribute__((unused))
+
+-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
+- (((__sk)->sk_portpair == (__ports)) && \
+- ((__sk)->sk_daddr == (__saddr)) && \
+- ((__sk)->sk_rcv_saddr == (__daddr)) && \
+- (((__sk)->sk_bound_dev_if == (__dif)) || \
+- ((__sk)->sk_bound_dev_if == (__sdif))) && \
+- net_eq(sock_net(__sk), (__net)))
+-#endif /* 64-bit arch */
++static inline bool INET_MATCH(struct net *net, const struct sock *sk,
++ const __addrpair cookie, const __portpair ports,
++ int dif, int sdif)
++{
++ if (!net_eq(sock_net(sk), net) ||
++ sk->sk_portpair != ports ||
++ sk->sk_addrpair != cookie)
++ return false;
++
++ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
++ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
++ sdif);
++}
+
+ /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
+ * not check it for lookups anymore, thanks Alexey. -DaveM
+@@ -425,7 +409,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
+ }
+
+ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
+- struct sock *sk, u32 port_offset,
++ struct sock *sk, u64 port_offset,
+ int (*check_established)(struct inet_timewait_death_row *,
+ struct sock *, __u16,
+ struct inet_timewait_sock **));
+diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
+index 89163ef8cf4be..2c2b41ea7f81d 100644
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
+
+ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
+ {
+- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
++ if (!sk->sk_mark &&
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+ return skb->mark;
+
+ return sk->sk_mark;
+@@ -116,14 +117,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
+ static inline int inet_request_bound_dev_if(const struct sock *sk,
+ struct sk_buff *skb)
+ {
++ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ #ifdef CONFIG_NET_L3_MASTER_DEV
+ struct net *net = sock_net(sk);
+
+- if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
++ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
+ return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
+ #endif
+
+- return sk->sk_bound_dev_if;
++ return bound_dev_if;
+ }
+
+ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
+@@ -131,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
+ #ifdef CONFIG_NET_L3_MASTER_DEV
+ struct net *net = sock_net(sk);
+
+- if (!net->ipv4.sysctl_tcp_l3mdev_accept)
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
+ return l3mdev_master_ifindex_by_index(net,
+ sk->sk_bound_dev_if);
+ #endif
+@@ -147,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if,
+ return bound_dev_if == dif || bound_dev_if == sdif;
+ }
+
++static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
++ int dif, int sdif)
++{
++#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
++ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
++ bound_dev_if, dif, sdif);
++#else
++ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
++#endif
++}
++
+ struct inet_cork {
+ unsigned int flags;
+ __be32 addr;
+@@ -207,11 +220,10 @@ struct inet_sock {
+ __be32 inet_saddr;
+ __s16 uc_ttl;
+ __u16 cmsg_flags;
++ struct ip_options_rcu __rcu *inet_opt;
+ __be16 inet_sport;
+ __u16 inet_id;
+
+- struct ip_options_rcu __rcu *inet_opt;
+- int rx_dst_ifindex;
+ __u8 tos;
+ __u8 min_ttl;
+ __u8 mc_ttl;
+@@ -253,6 +265,11 @@ struct inet_sock {
+ #define IP_CMSG_CHECKSUM BIT(7)
+ #define IP_CMSG_RECVFRAGSIZE BIT(8)
+
++static inline bool sk_is_inet(struct sock *sk)
++{
++ return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
++}
++
+ /**
+ * sk_to_full_sk - Access to a full socket
+ * @sk: pointer to a socket
+@@ -369,7 +386,7 @@ static inline bool inet_get_convert_csum(struct sock *sk)
+ static inline bool inet_can_nonlocal_bind(struct net *net,
+ struct inet_sock *inet)
+ {
+- return net->ipv4.sysctl_ip_nonlocal_bind ||
++ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
+ inet->freebind || inet->transparent;
+ }
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 9192444f2964e..e1a93c3391090 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -55,6 +55,8 @@ struct inet_skb_parm {
+ #define IPSKB_DOREDIRECT BIT(5)
+ #define IPSKB_FRAG_PMTU BIT(6)
+ #define IPSKB_L3SLAVE BIT(7)
++#define IPSKB_NOPOLICY BIT(8)
++#define IPSKB_MULTIPATH BIT(9)
+
+ u16 frag_max_size;
+ };
+@@ -74,6 +76,7 @@ struct ipcm_cookie {
+ __be32 addr;
+ int oif;
+ struct ip_options_rcu *opt;
++ __u8 protocol;
+ __u8 ttl;
+ __s16 tos;
+ char priority;
+@@ -94,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+ ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+ ipcm->oif = inet->sk.sk_bound_dev_if;
+ ipcm->addr = inet->inet_saddr;
++ ipcm->protocol = inet->inet_num;
+ }
+
+ #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
+@@ -351,7 +355,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
+
+ static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
+ {
+- return port < net->ipv4.sysctl_ip_prot_sock;
++ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
+ }
+
+ #else
+@@ -378,7 +382,7 @@ void ipfrag_init(void);
+ void ip_static_sysctl_init(void);
+
+ #define IP4_REPLY_MARK(net, mark) \
+- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
++ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
+
+ static inline bool ip_is_fragment(const struct iphdr *iph)
+ {
+@@ -440,7 +444,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
+ struct net *net = dev_net(dst->dev);
+ unsigned int mtu;
+
+- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
++ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
+ ip_mtu_locked(dst) ||
+ !forwarding) {
+ mtu = rt->rt_pmtu;
+@@ -520,19 +524,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
+ {
+ struct iphdr *iph = ip_hdr(skb);
+
++ /* We had many attacks based on IPID, use the private
++ * generator as much as we can.
++ */
++ if (sk && inet_sk(sk)->inet_daddr) {
++ iph->id = htons(inet_sk(sk)->inet_id);
++ inet_sk(sk)->inet_id += segs;
++ return;
++ }
+ if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
+- /* This is only to work around buggy Windows95/2000
+- * VJ compression implementations. If the ID field
+- * does not change, they drop every other packet in
+- * a TCP stream using header compression.
+- */
+- if (sk && inet_sk(sk)->inet_daddr) {
+- iph->id = htons(inet_sk(sk)->inet_id);
+- inet_sk(sk)->inet_id += segs;
+- } else {
+- iph->id = 0;
+- }
++ iph->id = 0;
+ } else {
++ /* Unfortunately we need the big hammer to get a suitable IPID */
+ __ip_select_ident(net, iph, segs);
+ }
+ }
+@@ -559,7 +562,7 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
+ BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) !=
+ offsetof(typeof(flow->addrs), v4addrs.src) +
+ sizeof(flow->addrs.v4addrs.src));
+- memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs));
++ memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs));
+ flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ }
+
+diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
+index c412dde4d67dc..d72cee4dff70b 100644
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -189,14 +189,16 @@ struct fib6_info {
+ u32 fib6_metric;
+ u8 fib6_protocol;
+ u8 fib6_type;
++
++ u8 offload;
++ u8 trap;
++ u8 offload_failed;
++
+ u8 should_flush:1,
+ dst_nocount:1,
+ dst_nopolicy:1,
+ fib6_destroying:1,
+- offload:1,
+- trap:1,
+- offload_failed:1,
+- unused:1;
++ unused:4;
+
+ struct rcu_head rcu;
+ struct nexthop *nh;
+@@ -281,7 +283,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
+ fn = rcu_dereference(f6i->fib6_node);
+
+ if (fn) {
+- *cookie = fn->fn_sernum;
++ *cookie = READ_ONCE(fn->fn_sernum);
+ /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
+ smp_rmb();
+ status = true;
+@@ -485,6 +487,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+ void fib6_nh_release(struct fib6_nh *fib6_nh);
++void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
+
+ int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+@@ -607,7 +610,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net,
+ if (!net->ipv6.fib6_rules_require_fldissect)
+ return false;
+
+- skb_flow_dissect_flow_keys(skb, flkeys, flag);
++ memset(flkeys, 0, sizeof(*flkeys));
++ __skb_flow_dissect(net, skb, &flow_keys_dissector,
++ flkeys, NULL, 0, 0, 0, flag);
++
+ fl6->fl6_sport = flkeys->ports.src;
+ fl6->fl6_dport = flkeys->ports.dst;
+ fl6->flowi6_proto = flkeys->basic.ip_proto;
+diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
+index 028eaea1c8544..42d50856fcf24 100644
+--- a/include/net/ip6_tunnel.h
++++ b/include/net/ip6_tunnel.h
+@@ -57,7 +57,7 @@ struct ip6_tnl {
+
+ /* These fields used only by GRE */
+ __u32 i_seqno; /* The last seen seqno */
+- __u32 o_seqno; /* The last output seqno */
++ atomic_t o_seqno; /* The last output seqno */
+ int hlen; /* tun_hlen + encap_hlen */
+ int tun_hlen; /* Precalculated header length */
+ int encap_hlen; /* Encap header length (FOU,GUE) */
+diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
+index ab5348e57db1a..c3324a1949c3a 100644
+--- a/include/net/ip_fib.h
++++ b/include/net/ip_fib.h
+@@ -415,7 +415,10 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ if (!net->ipv4.fib_rules_require_fldissect)
+ return false;
+
+- skb_flow_dissect_flow_keys(skb, flkeys, flag);
++ memset(flkeys, 0, sizeof(*flkeys));
++ __skb_flow_dissect(net, skb, &flow_keys_dissector,
++ flkeys, NULL, 0, 0, 0, flag);
++
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+@@ -438,7 +441,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+ static inline int fib_num_tclassid_users(struct net *net)
+ {
+- return net->ipv4.fib_num_tclassid_users;
++ return atomic_read(&net->ipv4.fib_num_tclassid_users);
+ }
+ #else
+ static inline int fib_num_tclassid_users(struct net *net)
+diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
+index bc3b13ec93c9d..17ec652e8f124 100644
+--- a/include/net/ip_tunnels.h
++++ b/include/net/ip_tunnels.h
+@@ -113,7 +113,7 @@ struct ip_tunnel {
+
+ /* These four fields used only by GRE */
+ u32 i_seqno; /* The last seen seqno */
+- u32 o_seqno; /* The last output seqno */
++ atomic_t o_seqno; /* The last output seqno */
+ int tun_hlen; /* Precalculated header length */
+
+ /* These four fields used only by ERSPAN */
+@@ -377,9 +377,11 @@ static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
+ const struct sk_buff *skb)
+ {
+- if (skb->protocol == htons(ETH_P_IP))
++ __be16 payload_protocol = skb_protocol(skb, true);
++
++ if (payload_protocol == htons(ETH_P_IP))
+ return iph->tos;
+- else if (skb->protocol == htons(ETH_P_IPV6))
++ else if (payload_protocol == htons(ETH_P_IPV6))
+ return ipv6_get_dsfield((const struct ipv6hdr *)iph);
+ else
+ return 0;
+@@ -388,9 +390,11 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
+ static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
+ const struct sk_buff *skb)
+ {
+- if (skb->protocol == htons(ETH_P_IP))
++ __be16 payload_protocol = skb_protocol(skb, true);
++
++ if (payload_protocol == htons(ETH_P_IP))
+ return iph->ttl;
+- else if (skb->protocol == htons(ETH_P_IPV6))
++ else if (payload_protocol == htons(ETH_P_IPV6))
+ return ((const struct ipv6hdr *)iph)->hop_limit;
+ else
+ return 0;
+@@ -450,15 +454,14 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ put_cpu_ptr(tstats);
++ return;
++ }
++
++ if (pkt_len < 0) {
++ DEV_STATS_INC(dev, tx_errors);
++ DEV_STATS_INC(dev, tx_aborted_errors);
+ } else {
+- struct net_device_stats *err_stats = &dev->stats;
+-
+- if (pkt_len < 0) {
+- err_stats->tx_errors++;
+- err_stats->tx_aborted_errors++;
+- } else {
+- err_stats->tx_dropped++;
+- }
++ DEV_STATS_INC(dev, tx_dropped);
+ }
+ }
+
+diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
+index 7cb5a1aace40d..59f8412de45ac 100644
+--- a/include/net/ip_vs.h
++++ b/include/net/ip_vs.h
+@@ -549,8 +549,10 @@ struct ip_vs_conn {
+ */
+ struct ip_vs_app *app; /* bound ip_vs_app object */
+ void *app_data; /* Application private data */
+- struct ip_vs_seq in_seq; /* incoming seq. struct */
+- struct ip_vs_seq out_seq; /* outgoing seq. struct */
++ struct_group(sync_conn_opt,
++ struct ip_vs_seq in_seq; /* incoming seq. struct */
++ struct ip_vs_seq out_seq; /* outgoing seq. struct */
++ );
+
+ const struct ip_vs_pe *pe;
+ char *pe_data;
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index f2d0ecc257bb2..6b3309e55dcb1 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -391,17 +391,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt)
+ kfree_rcu(opt, rcu);
+ }
+
++#if IS_ENABLED(CONFIG_IPV6)
+ struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label);
+
+ extern struct static_key_false_deferred ipv6_flowlabel_exclusive;
+ static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
+ __be32 label)
+ {
+- if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key))
++ if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) &&
++ READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl))
+ return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);
+
+ return NULL;
+ }
++#endif
+
+ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
+ struct ip6_flowlabel *fl,
+@@ -661,12 +664,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
+ /* more secured version of ipv6_addr_hash() */
+ static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
+ {
+- u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
+-
+- return jhash_3words(v,
+- (__force u32)a->s6_addr32[2],
+- (__force u32)a->s6_addr32[3],
+- initval);
++ return jhash2((__force const u32 *)a->s6_addr32,
++ ARRAY_SIZE(a->s6_addr32), initval);
+ }
+
+ static inline bool ipv6_addr_loopback(const struct in6_addr *a)
+@@ -840,7 +839,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
+ BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) !=
+ offsetof(typeof(flow->addrs), v6addrs.src) +
+ sizeof(flow->addrs.v6addrs.src));
+- memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs));
++ memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs));
+ flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ }
+
+@@ -1115,6 +1114,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
+ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
+ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
+
++void inet6_cleanup_sock(struct sock *sk);
++void inet6_sock_destruct(struct sock *sk);
+ int inet6_release(struct socket *sock);
+ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
+@@ -1257,7 +1258,7 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+ return 0;
+ }
+
+-static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
++static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
+ {
+ int ret;
+
+diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
+index 851029ecff13c..0a4779175a523 100644
+--- a/include/net/ipv6_frag.h
++++ b/include/net/ipv6_frag.h
+@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
+ struct sk_buff *head;
+
+ rcu_read_lock();
+- if (fq->q.fqdir->dead)
++ /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
++ if (READ_ONCE(fq->q.fqdir->dead))
+ goto out_rcu_unlock;
+ spin_lock(&fq->q.lock);
+
+diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
+index afbce90c44808..45e0339be6fa4 100644
+--- a/include/net/ipv6_stubs.h
++++ b/include/net/ipv6_stubs.h
+@@ -47,6 +47,7 @@ struct ipv6_stub {
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+ void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
++ void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
+ void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
+ int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
+ void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
+diff --git a/include/net/llc.h b/include/net/llc.h
+index df282d9b40170..9c10b121b49b0 100644
+--- a/include/net/llc.h
++++ b/include/net/llc.h
+@@ -72,7 +72,9 @@ struct llc_sap {
+ static inline
+ struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex)
+ {
+- return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES];
++ u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS);
++
++ return &sap->sk_dev_hash[bucket];
+ }
+
+ static inline
+diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
+index 6f15e6fa154e6..53bd2d02a4f0d 100644
+--- a/include/net/lwtunnel.h
++++ b/include/net/lwtunnel.h
+@@ -16,9 +16,12 @@
+ #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
+ #define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2)
+
++/* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return
++ * values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety.
++ */
+ enum {
+ LWTUNNEL_XMIT_DONE,
+- LWTUNNEL_XMIT_CONTINUE,
++ LWTUNNEL_XMIT_CONTINUE = 0x100,
+ };
+
+
+diff --git a/include/net/mptcp.h b/include/net/mptcp.h
+index 3214848402ec9..1120363987d05 100644
+--- a/include/net/mptcp.h
++++ b/include/net/mptcp.h
+@@ -93,8 +93,6 @@ struct mptcp_out_options {
+ };
+
+ #ifdef CONFIG_MPTCP
+-extern struct request_sock_ops mptcp_subflow_request_sock_ops;
+-
+ void mptcp_init(void);
+
+ static inline bool sk_is_mptcp(const struct sock *sk)
+@@ -182,6 +180,9 @@ void mptcp_seq_show(struct seq_file *seq);
+ int mptcp_subflow_init_cookie_req(struct request_sock *req,
+ const struct sock *sk_listener,
+ struct sk_buff *skb);
++struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
++ struct sock *sk_listener,
++ bool attach_listener);
+
+ __be32 mptcp_get_reset_option(const struct sk_buff *skb);
+
+@@ -274,6 +275,13 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
+ return 0; /* TCP fallback */
+ }
+
++static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
++ struct sock *sk_listener,
++ bool attach_listener)
++{
++ return NULL;
++}
++
+ static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); }
+ #endif /* CONFIG_MPTCP */
+
+diff --git a/include/net/mrp.h b/include/net/mrp.h
+index 1c308c034e1a6..a8102661fd613 100644
+--- a/include/net/mrp.h
++++ b/include/net/mrp.h
+@@ -120,6 +120,7 @@ struct mrp_applicant {
+ struct sk_buff *pdu;
+ struct rb_root mad;
+ struct rcu_head rcu;
++ bool active;
+ };
+
+ struct mrp_port {
+diff --git a/include/net/ndisc.h b/include/net/ndisc.h
+index 38e4094960cee..e97ef508664f4 100644
+--- a/include/net/ndisc.h
++++ b/include/net/ndisc.h
+@@ -487,9 +487,9 @@ int igmp6_late_init(void);
+ void igmp6_cleanup(void);
+ void igmp6_late_cleanup(void);
+
+-int igmp6_event_query(struct sk_buff *skb);
++void igmp6_event_query(struct sk_buff *skb);
+
+-int igmp6_event_report(struct sk_buff *skb);
++void igmp6_event_report(struct sk_buff *skb);
+
+
+ #ifdef CONFIG_SYSCTL
+diff --git a/include/net/neighbour.h b/include/net/neighbour.h
+index 22ced1381ede5..0e9d33e4439ec 100644
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -174,7 +174,7 @@ struct pneigh_entry {
+ struct net_device *dev;
+ u8 flags;
+ u8 protocol;
+- u8 key[];
++ u32 key[];
+ };
+
+ /*
+@@ -253,17 +253,13 @@ static inline void *neighbour_priv(const struct neighbour *n)
+ #define NEIGH_UPDATE_F_OVERRIDE 0x00000001
+ #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
+ #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
++#define NEIGH_UPDATE_F_USE 0x10000000
+ #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
+ #define NEIGH_UPDATE_F_ISROUTER 0x40000000
+ #define NEIGH_UPDATE_F_ADMIN 0x80000000
+
+ extern const struct nla_policy nda_policy[];
+
+-static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey)
+-{
+- return *(const u16 *)n->primary_key == *(const u16 *)pkey;
+-}
+-
+ static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
+ {
+ return *(const u32 *)n->primary_key == *(const u32 *)pkey;
+@@ -313,8 +309,6 @@ void neigh_table_init(int index, struct neigh_table *tbl);
+ int neigh_table_clear(int index, struct neigh_table *tbl);
+ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev);
+-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
+- const void *pkey);
+ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, bool want_ref);
+ static inline struct neighbour *neigh_create(struct neigh_table *tbl,
+@@ -504,10 +498,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
+ {
+ const struct hh_cache *hh = &n->hh;
+
+- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache)
++ /* n->nud_state and hh->hh_len could be changed under us.
++ * neigh_hh_output() is taking care of the race later.
++ */
++ if (!skip_cache &&
++ (READ_ONCE(n->nud_state) & NUD_CONNECTED) &&
++ READ_ONCE(hh->hh_len))
+ return neigh_hh_output(hh, skb);
+- else
+- return n->output(n, skb);
++
++ return n->output(n, skb);
+ }
+
+ static inline struct neighbour *
+diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
+index bb5fa59140321..2ba326f9e004d 100644
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -479,4 +479,10 @@ static inline void fnhe_genid_bump(struct net *net)
+ atomic_inc(&net->fnhe_genid);
+ }
+
++#ifdef CONFIG_NET
++void net_ns_init(void);
++#else
++static inline void net_ns_init(void) {}
++#endif
++
+ #endif /* __NET_NET_NAMESPACE_H */
+diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
+index cc663c68ddc4b..34c266502a50e 100644
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -76,6 +76,8 @@ struct nf_conn {
+ * Hint, SKB address this struct and refcnt via skb->_nfct and
+ * helpers nf_conntrack_get() and nf_conntrack_put().
+ * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
++ * except that the latter uses internal indirection and does not
++ * result in a conntrack module dependency.
+ * beware nf_ct_get() is different and don't inc refcnt.
+ */
+ struct nf_conntrack ct_general;
+@@ -169,11 +171,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
+ return (struct nf_conn *)(nfct & NFCT_PTRMASK);
+ }
+
++void nf_ct_destroy(struct nf_conntrack *nfct);
++
+ /* decrement reference count on a conntrack */
+ static inline void nf_ct_put(struct nf_conn *ct)
+ {
+- WARN_ON(!ct);
+- nf_conntrack_put(&ct->ct_general);
++ if (ct && refcount_dec_and_test(&ct->ct_general.use))
++ nf_ct_destroy(&ct->ct_general);
+ }
+
+ /* Protocol module loading */
+@@ -276,14 +280,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
+ /* jiffies until ct expires, 0 if already expired */
+ static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
+ {
+- s32 timeout = ct->timeout - nfct_time_stamp;
++ s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+
+ return timeout > 0 ? timeout : 0;
+ }
+
+ static inline bool nf_ct_is_expired(const struct nf_conn *ct)
+ {
+- return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
++ return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
+ }
+
+ /* use after obtaining a reference count */
+@@ -302,7 +306,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
+ static inline void nf_ct_offload_timeout(struct nf_conn *ct)
+ {
+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
+- ct->timeout = nfct_time_stamp + NF_CT_DAY;
++ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+ }
+
+ struct kernel_param;
+diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
+index 13807ea94cd2b..2d524782f53b7 100644
+--- a/include/net/netfilter/nf_conntrack_core.h
++++ b/include/net/netfilter/nf_conntrack_core.h
+@@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
+ int ret = NF_ACCEPT;
+
+ if (ct) {
+- if (!nf_ct_is_confirmed(ct))
++ if (!nf_ct_is_confirmed(ct)) {
+ ret = __nf_conntrack_confirm(skb);
++
++ if (ret == NF_ACCEPT)
++ ct = (struct nf_conn *)skb_nfct(skb);
++ }
++
+ if (likely(ret == NF_ACCEPT))
+ nf_ct_deliver_cached_events(ct);
+ }
+diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
+index 37f0fbefb060f..9939c366f720d 100644
+--- a/include/net/netfilter/nf_conntrack_helper.h
++++ b/include/net/netfilter/nf_conntrack_helper.h
+@@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat);
+ int nf_nat_helper_try_module_get(const char *name, u16 l3num,
+ u8 protonum);
+ void nf_nat_helper_put(struct nf_conntrack_helper *helper);
++void nf_ct_set_auto_assign_helper_warned(struct net *net);
+ #endif /*_NF_CONNTRACK_HELPER_H*/
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index a3647fadf1ccb..aaa518e777e9e 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -10,6 +10,8 @@
+ #include <linux/netfilter/nf_conntrack_tuple_common.h>
+ #include <net/flow_offload.h>
+ #include <net/dst.h>
++#include <linux/if_pppox.h>
++#include <linux/ppp_defs.h>
+
+ struct nf_flowtable;
+ struct nf_flow_rule;
+@@ -264,6 +266,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
+
+ struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple);
++void nf_flow_table_gc_run(struct nf_flowtable *flow_table);
+ void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
+ struct net_device *dev);
+ void nf_flow_table_cleanup(struct net_device *dev);
+@@ -300,6 +303,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+ struct flow_offload *flow);
+
+ void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
++void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
++
+ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd);
+@@ -313,4 +318,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+ int nf_flow_table_offload_init(void);
+ void nf_flow_table_offload_exit(void);
+
++static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
++{
++ __be16 proto;
++
++ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
++ sizeof(struct pppoe_hdr)));
++ switch (proto) {
++ case htons(PPP_IP):
++ return htons(ETH_P_IP);
++ case htons(PPP_IPV6):
++ return htons(ETH_P_IPV6);
++ }
++
++ return 0;
++}
++
+ #endif /* _NF_FLOW_TABLE_H */
+diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
+index 9eed51e920e87..980daa6e1e3aa 100644
+--- a/include/net/netfilter/nf_queue.h
++++ b/include/net/netfilter/nf_queue.h
+@@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh);
+ void nf_unregister_queue_handler(void);
+ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
+
+-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
++bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+ void nf_queue_entry_free(struct nf_queue_entry *entry);
+
+ static inline void init_hashrandom(u32 *jhash_initval)
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index a16171c5fd9eb..1458b3eae8ada 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -21,13 +21,19 @@ struct module;
+
+ #define NFT_JUMP_STACK_SIZE 16
+
++enum {
++ NFT_PKTINFO_L4PROTO = (1 << 0),
++ NFT_PKTINFO_INNER = (1 << 1),
++};
++
+ struct nft_pktinfo {
+ struct sk_buff *skb;
+ const struct nf_hook_state *state;
+- bool tprot_set;
++ u8 flags;
+ u8 tprot;
+ u16 fragoff;
+ unsigned int thoff;
++ unsigned int inneroff;
+ };
+
+ static inline struct sock *nft_sk(const struct nft_pktinfo *pkt)
+@@ -75,7 +81,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
+
+ static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt)
+ {
+- pkt->tprot_set = false;
++ pkt->flags = 0;
+ pkt->tprot = 0;
+ pkt->thoff = 0;
+ pkt->fragoff = 0;
+@@ -187,13 +193,18 @@ struct nft_ctx {
+ bool report;
+ };
+
++enum nft_data_desc_flags {
++ NFT_DATA_DESC_SETELEM = (1 << 0),
++};
++
+ struct nft_data_desc {
+ enum nft_data_types type;
++ unsigned int size;
+ unsigned int len;
++ unsigned int flags;
+ };
+
+-int nft_data_init(const struct nft_ctx *ctx,
+- struct nft_data *data, unsigned int size,
++int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla);
+ void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
+ void nft_data_release(const struct nft_data *data, enum nft_data_types type);
+@@ -272,17 +283,29 @@ struct nft_set_iter {
+ /**
+ * struct nft_set_desc - description of set elements
+ *
++ * @ktype: key type
+ * @klen: key length
++ * @dtype: data type
+ * @dlen: data length
++ * @objtype: object type
++ * @flags: flags
+ * @size: number of set elements
++ * @policy: set policy
++ * @gc_int: garbage collector interval
+ * @field_len: length of each field in concatenation, bytes
+ * @field_count: number of concatenated fields in element
+ * @expr: set must support for expressions
+ */
+ struct nft_set_desc {
++ u32 ktype;
+ unsigned int klen;
++ u32 dtype;
+ unsigned int dlen;
++ u32 objtype;
+ unsigned int size;
++ u32 policy;
++ u32 gc_int;
++ u64 timeout;
+ u8 field_len[NFT_REG32_COUNT];
+ u8 field_count;
+ bool expr;
+@@ -404,7 +427,8 @@ struct nft_set_ops {
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ unsigned int flags);
+-
++ void (*commit)(const struct nft_set *set);
++ void (*abort)(const struct nft_set *set);
+ u64 (*privsize)(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc);
+ bool (*estimate)(const struct nft_set_desc *desc,
+@@ -413,7 +437,8 @@ struct nft_set_ops {
+ int (*init)(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const nla[]);
+- void (*destroy)(const struct nft_set *set);
++ void (*destroy)(const struct nft_ctx *ctx,
++ const struct nft_set *set);
+ void (*gc_init)(const struct nft_set *set);
+
+ unsigned int elemsize;
+@@ -499,6 +524,7 @@ struct nft_set {
+ u16 policy;
+ u16 udlen;
+ unsigned char *udata;
++ struct list_head pending_update;
+ /* runtime data below here */
+ const struct nft_set_ops *ops ____cacheline_aligned;
+ u16 flags:14,
+@@ -539,7 +565,9 @@ void *nft_set_catchall_gc(const struct nft_set *set);
+
+ static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
+ {
+- return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
++ u32 gc_int = READ_ONCE(set->gc_int);
++
++ return gc_int ? msecs_to_jiffies(gc_int) : HZ;
+ }
+
+ /**
+@@ -559,6 +587,7 @@ struct nft_set_binding {
+ };
+
+ enum nft_trans_phase;
++void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set);
+ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding,
+ enum nft_trans_phase phase);
+@@ -636,18 +665,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
+ tmpl->len = sizeof(struct nft_set_ext);
+ }
+
+-static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
+- unsigned int len)
++static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
++ unsigned int len)
+ {
+ tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align);
+- BUG_ON(tmpl->len > U8_MAX);
++ if (tmpl->len > U8_MAX)
++ return -EINVAL;
++
+ tmpl->offset[id] = tmpl->len;
+ tmpl->len += nft_set_ext_types[id].len + len;
++
++ return 0;
+ }
+
+-static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
++static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
+ {
+- nft_set_ext_add_length(tmpl, id, 0);
++ return nft_set_ext_add_length(tmpl, id, 0);
+ }
+
+ static inline void nft_set_ext_init(struct nft_set_ext *ext,
+@@ -740,6 +773,8 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_expr *expr_array[]);
+ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ bool destroy_expr);
++void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set, void *elem);
+
+ /**
+ * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
+@@ -831,6 +866,7 @@ struct nft_expr_type {
+
+ enum nft_trans_phase {
+ NFT_TRANS_PREPARE,
++ NFT_TRANS_PREPARE_ERROR,
+ NFT_TRANS_ABORT,
+ NFT_TRANS_COMMIT,
+ NFT_TRANS_RELEASE
+@@ -883,9 +919,9 @@ struct nft_expr_ops {
+ int (*offload)(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr);
++ bool (*offload_action)(const struct nft_expr *expr);
+ void (*offload_stats)(struct nft_expr *expr,
+ const struct flow_stats *stats);
+- u32 offload_flags;
+ const struct nft_expr_type *type;
+ void *data;
+ };
+@@ -936,7 +972,10 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
+ return (void *)&rule->data[rule->dlen];
+ }
+
+-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule);
++void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule);
++void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
++ enum nft_trans_phase phase);
++void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule);
+
+ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
+ struct nft_regs *regs,
+@@ -1001,6 +1040,12 @@ struct nft_chain {
+ };
+
+ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
++int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
++ const struct nft_set_iter *iter,
++ struct nft_set_elem *elem);
++int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
++int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
++void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
+
+ enum nft_chain_types {
+ NFT_CHAIN_T_DEFAULT = 0,
+@@ -1037,11 +1082,17 @@ int nft_chain_validate_dependency(const struct nft_chain *chain,
+ int nft_chain_validate_hooks(const struct nft_chain *chain,
+ unsigned int hook_flags);
+
++static inline bool nft_chain_binding(const struct nft_chain *chain)
++{
++ return chain->flags & NFT_CHAIN_BINDING;
++}
++
+ static inline bool nft_chain_is_bound(struct nft_chain *chain)
+ {
+ return (chain->flags & NFT_CHAIN_BINDING) && chain->bound;
+ }
+
++int nft_chain_add(struct nft_table *table, struct nft_chain *chain);
+ void nft_chain_del(struct nft_chain *chain);
+ void nf_tables_chain_destroy(struct nft_ctx *ctx);
+
+@@ -1053,7 +1104,6 @@ struct nft_stats {
+
+ struct nft_hook {
+ struct list_head list;
+- bool inactive;
+ struct nf_hook_ops ops;
+ struct rcu_head rcu;
+ };
+@@ -1094,6 +1144,29 @@ int __nft_release_basechain(struct nft_ctx *ctx);
+
+ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
+
++static inline bool nft_use_inc(u32 *use)
++{
++ if (*use == UINT_MAX)
++ return false;
++
++ (*use)++;
++
++ return true;
++}
++
++static inline void nft_use_dec(u32 *use)
++{
++ WARN_ON_ONCE((*use)-- == 0);
++}
++
++/* For error and abort path: restore use counter to previous state. */
++static inline void nft_use_inc_restore(u32 *use)
++{
++ WARN_ON_ONCE(!nft_use_inc(use));
++}
++
++#define nft_use_dec_restore nft_use_dec
++
+ /**
+ * struct nft_table - nf_tables table
+ *
+@@ -1177,8 +1250,8 @@ struct nft_object {
+ struct list_head list;
+ struct rhlist_head rhlhead;
+ struct nft_object_hash_key key;
+- u32 genmask:2,
+- use:30;
++ u32 genmask:2;
++ u32 use;
+ u64 handle;
+ u16 udlen;
+ u8 *udata;
+@@ -1280,8 +1353,8 @@ struct nft_flowtable {
+ char *name;
+ int hooknum;
+ int ops_len;
+- u32 genmask:2,
+- use:30;
++ u32 genmask:2;
++ u32 use;
+ u64 handle;
+ /* runtime data below here */
+ struct list_head hook_list ____cacheline_aligned;
+@@ -1456,6 +1529,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+ * struct nft_trans - nf_tables object update in transaction
+ *
+ * @list: used internally
++ * @binding_list: list of objects with possible bindings
+ * @msg_type: message type
+ * @put_net: ctx->net needs to be put
+ * @ctx: transaction context
+@@ -1463,6 +1537,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+ */
+ struct nft_trans {
+ struct list_head list;
++ struct list_head binding_list;
+ int msg_type;
+ bool put_net;
+ struct nft_ctx ctx;
+@@ -1473,6 +1548,7 @@ struct nft_trans_rule {
+ struct nft_rule *rule;
+ struct nft_flow_rule *flow;
+ u32 rule_id;
++ bool bound;
+ };
+
+ #define nft_trans_rule(trans) \
+@@ -1481,10 +1557,15 @@ struct nft_trans_rule {
+ (((struct nft_trans_rule *)trans->data)->flow)
+ #define nft_trans_rule_id(trans) \
+ (((struct nft_trans_rule *)trans->data)->rule_id)
++#define nft_trans_rule_bound(trans) \
++ (((struct nft_trans_rule *)trans->data)->bound)
+
+ struct nft_trans_set {
+ struct nft_set *set;
+ u32 set_id;
++ u32 gc_int;
++ u64 timeout;
++ bool update;
+ bool bound;
+ };
+
+@@ -1494,15 +1575,25 @@ struct nft_trans_set {
+ (((struct nft_trans_set *)trans->data)->set_id)
+ #define nft_trans_set_bound(trans) \
+ (((struct nft_trans_set *)trans->data)->bound)
++#define nft_trans_set_update(trans) \
++ (((struct nft_trans_set *)trans->data)->update)
++#define nft_trans_set_timeout(trans) \
++ (((struct nft_trans_set *)trans->data)->timeout)
++#define nft_trans_set_gc_int(trans) \
++ (((struct nft_trans_set *)trans->data)->gc_int)
+
+ struct nft_trans_chain {
++ struct nft_chain *chain;
+ bool update;
+ char *name;
+ struct nft_stats __percpu *stats;
+ u8 policy;
++ bool bound;
+ u32 chain_id;
+ };
+
++#define nft_trans_chain(trans) \
++ (((struct nft_trans_chain *)trans->data)->chain)
+ #define nft_trans_chain_update(trans) \
+ (((struct nft_trans_chain *)trans->data)->update)
+ #define nft_trans_chain_name(trans) \
+@@ -1511,6 +1602,8 @@ struct nft_trans_chain {
+ (((struct nft_trans_chain *)trans->data)->stats)
+ #define nft_trans_chain_policy(trans) \
+ (((struct nft_trans_chain *)trans->data)->policy)
++#define nft_trans_chain_bound(trans) \
++ (((struct nft_trans_chain *)trans->data)->bound)
+ #define nft_trans_chain_id(trans) \
+ (((struct nft_trans_chain *)trans->data)->chain_id)
+
+@@ -1583,9 +1676,11 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
+ struct nftables_pernet {
+ struct list_head tables;
+ struct list_head commit_list;
++ struct list_head binding_list;
+ struct list_head module_list;
+ struct list_head notify_list;
+ struct mutex commit_mutex;
++ u64 table_handle;
+ unsigned int base_seq;
+ u8 validate_state;
+ };
+diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
+index 0fa5a6d98a00b..9dfa11d4224d2 100644
+--- a/include/net/netfilter/nf_tables_core.h
++++ b/include/net/netfilter/nf_tables_core.h
+@@ -40,6 +40,14 @@ struct nft_cmp_fast_expr {
+ bool inv;
+ };
+
++struct nft_cmp16_fast_expr {
++ struct nft_data data;
++ struct nft_data mask;
++ u8 sreg;
++ u8 len;
++ bool inv;
++};
++
+ struct nft_immediate_expr {
+ struct nft_data data;
+ u8 dreg;
+@@ -57,6 +65,7 @@ static inline u32 nft_cmp_fast_mask(unsigned int len)
+ }
+
+ extern const struct nft_expr_ops nft_cmp_fast_ops;
++extern const struct nft_expr_ops nft_cmp16_fast_ops;
+
+ struct nft_payload {
+ enum nft_payload_bases base:8;
+diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
+index eb4c094cd54d2..c4a6147b0ef8c 100644
+--- a/include/net/netfilter/nf_tables_ipv4.h
++++ b/include/net/netfilter/nf_tables_ipv4.h
+@@ -10,7 +10,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt)
+ struct iphdr *ip;
+
+ ip = ip_hdr(pkt->skb);
+- pkt->tprot_set = true;
++ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = ip->protocol;
+ pkt->thoff = ip_hdrlen(pkt->skb);
+ pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+@@ -36,7 +36,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
+ else if (len < thoff)
+ return -1;
+
+- pkt->tprot_set = true;
++ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = iph->protocol;
+ pkt->thoff = thoff;
+ pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+@@ -71,7 +71,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
+ goto inhdr_error;
+ }
+
+- pkt->tprot_set = true;
++ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = iph->protocol;
+ pkt->thoff = thoff;
+ pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+@@ -82,4 +82,5 @@ inhdr_error:
+ __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS);
+ return -1;
+ }
++
+ #endif
+diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
+index 7595e02b00ba0..ec7eaeaf4f04c 100644
+--- a/include/net/netfilter/nf_tables_ipv6.h
++++ b/include/net/netfilter/nf_tables_ipv6.h
+@@ -18,7 +18,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt)
+ return;
+ }
+
+- pkt->tprot_set = true;
++ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = protohdr;
+ pkt->thoff = thoff;
+ pkt->fragoff = frag_off;
+@@ -50,7 +50,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
+ if (protohdr < 0)
+ return -1;
+
+- pkt->tprot_set = true;
++ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = protohdr;
+ pkt->thoff = thoff;
+ pkt->fragoff = frag_off;
+@@ -96,7 +96,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt)
+ if (protohdr < 0)
+ goto inhdr_error;
+
+- pkt->tprot_set = true;
++ pkt->flags = NFT_PKTINFO_L4PROTO;
+ pkt->tprot = protohdr;
+ pkt->thoff = thoff;
+ pkt->fragoff = frag_off;
+diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
+index f9d95ff82df83..3568b6a2f5f0f 100644
+--- a/include/net/netfilter/nf_tables_offload.h
++++ b/include/net/netfilter/nf_tables_offload.h
+@@ -67,8 +67,6 @@ struct nft_flow_rule {
+ struct flow_rule *rule;
+ };
+
+-#define NFT_OFFLOAD_F_ACTION (1 << 0)
+-
+ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
+ enum flow_dissector_key_id addr_type);
+
+@@ -94,7 +92,7 @@ int nft_flow_rule_offload_commit(struct net *net);
+ NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
+ memset(&(__reg)->mask, 0xff, (__reg)->len);
+
+-int nft_chain_offload_priority(struct nft_base_chain *basechain);
++bool nft_chain_offload_support(const struct nft_base_chain *basechain);
+
+ int nft_offload_init(void);
+ void nft_offload_exit(void);
+diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
+index 82d0e41b76f22..faa108b1ba675 100644
+--- a/include/net/netfilter/nf_tproxy.h
++++ b/include/net/netfilter/nf_tproxy.h
+@@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
+ return false;
+ }
+
++static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw)
++{
++ local_bh_disable();
++ inet_twsk_deschedule_put(tw);
++ local_bh_enable();
++}
++
+ /* assign a socket to the skb -- consumes sk */
+ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+ {
+diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
+index 2f65701a43c95..d60a10cfc3823 100644
+--- a/include/net/netns/ipv4.h
++++ b/include/net/netns/ipv4.h
+@@ -65,7 +65,7 @@ struct netns_ipv4 {
+ bool fib_has_custom_local_routes;
+ bool fib_offload_disabled;
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+- int fib_num_tclassid_users;
++ atomic_t fib_num_tclassid_users;
+ #endif
+ struct hlist_head *fib_table_hash;
+ struct sock *fibnl;
+@@ -74,7 +74,6 @@ struct netns_ipv4 {
+ struct sock *mc_autojoin_sk;
+
+ struct inet_peer_base *peers;
+- struct sock * __percpu *tcp_sk;
+ struct fqdir *fqdir;
+
+ u8 sysctl_icmp_echo_ignore_all;
+diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
+index a4b5503803165..181b44f6fb686 100644
+--- a/include/net/netns/ipv6.h
++++ b/include/net/netns/ipv6.h
+@@ -53,7 +53,7 @@ struct netns_sysctl_ipv6 {
+ int seg6_flowlabel;
+ u32 ioam6_id;
+ u64 ioam6_id_wide;
+- bool skip_notify_on_dev_down;
++ int skip_notify_on_dev_down;
+ u8 fib_notify_on_flag_change;
+ };
+
+@@ -75,11 +75,12 @@ struct netns_ipv6 {
+ struct list_head fib6_walkers;
+ rwlock_t fib6_walker_lock;
+ spinlock_t fib6_gc_lock;
+- unsigned int ip6_rt_gc_expire;
+- unsigned long ip6_rt_last_gc;
++ atomic_t ip6_rt_gc_expire;
++ unsigned long ip6_rt_last_gc;
++ unsigned char flowlabel_has_excl;
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+- unsigned int fib6_rules_require_fldissect;
+ bool fib6_has_custom_rules;
++ unsigned int fib6_rules_require_fldissect;
+ #ifdef CONFIG_IPV6_SUBTREES
+ unsigned int fib6_routes_require_src;
+ #endif
+diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
+index b593f95e99913..02bbdc577f8e2 100644
+--- a/include/net/netns/netfilter.h
++++ b/include/net/netns/netfilter.h
+@@ -24,9 +24,6 @@ struct netns_nf {
+ #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
+ #endif
+-#if IS_ENABLED(CONFIG_DECNET)
+- struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
+-#endif
+ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ unsigned int defrag_ipv4_users;
+ #endif
+diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
+index 947733a639a6f..bd7c3be4af5d7 100644
+--- a/include/net/netns/xfrm.h
++++ b/include/net/netns/xfrm.h
+@@ -66,11 +66,7 @@ struct netns_xfrm {
+ int sysctl_larval_drop;
+ u32 sysctl_acq_expires;
+
+- u8 policy_default;
+-#define XFRM_POL_DEFAULT_IN 1
+-#define XFRM_POL_DEFAULT_OUT 2
+-#define XFRM_POL_DEFAULT_FWD 4
+-#define XFRM_POL_DEFAULT_MASK 7
++ u8 policy_default[XFRM_POLICY_MAX];
+
+ #ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_hdr;
+diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
+index a964daedc17b6..ea8595651c384 100644
+--- a/include/net/nfc/nci_core.h
++++ b/include/net/nfc/nci_core.h
+@@ -30,6 +30,7 @@ enum nci_flag {
+ NCI_UP,
+ NCI_DATA_EXCHANGE,
+ NCI_DATA_EXCHANGE_TO,
++ NCI_UNREG,
+ };
+
+ /* NCI device states */
+diff --git a/include/net/nl802154.h b/include/net/nl802154.h
+index ddcee128f5d9a..145acb8f25095 100644
+--- a/include/net/nl802154.h
++++ b/include/net/nl802154.h
+@@ -19,6 +19,8 @@
+ *
+ */
+
++#include <linux/types.h>
++
+ #define NL802154_GENL_NAME "nl802154"
+
+ enum nl802154_commands {
+@@ -150,10 +152,9 @@ enum nl802154_attrs {
+ };
+
+ enum nl802154_iftype {
+- /* for backwards compatibility TODO */
+- NL802154_IFTYPE_UNSPEC = -1,
++ NL802154_IFTYPE_UNSPEC = (~(__u32)0),
+
+- NL802154_IFTYPE_NODE,
++ NL802154_IFTYPE_NODE = 0,
+ NL802154_IFTYPE_MONITOR,
+ NL802154_IFTYPE_COORD,
+
+diff --git a/include/net/page_pool.h b/include/net/page_pool.h
+index a4082406a0039..edcc22605842e 100644
+--- a/include/net/page_pool.h
++++ b/include/net/page_pool.h
+@@ -282,22 +282,4 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
+ page_pool_update_nid(pool, new_nid);
+ }
+
+-static inline void page_pool_ring_lock(struct page_pool *pool)
+- __acquires(&pool->ring.producer_lock)
+-{
+- if (in_serving_softirq())
+- spin_lock(&pool->ring.producer_lock);
+- else
+- spin_lock_bh(&pool->ring.producer_lock);
+-}
+-
+-static inline void page_pool_ring_unlock(struct page_pool *pool)
+- __releases(&pool->ring.producer_lock)
+-{
+- if (in_serving_softirq())
+- spin_unlock(&pool->ring.producer_lock);
+- else
+- spin_unlock_bh(&pool->ring.producer_lock);
+-}
+-
+ #endif /* _NET_PAGE_POOL_H */
+diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
+index bf79f3a890af2..d0d9dd2483dd3 100644
+--- a/include/net/pkt_sched.h
++++ b/include/net/pkt_sched.h
+@@ -134,12 +134,14 @@ static inline void qdisc_run(struct Qdisc *q)
+ }
+ }
+
++extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
++
+ /* Calculate maximal size of packet seen by hard_start_xmit
+ routine of this device.
+ */
+ static inline unsigned int psched_mtu(const struct net_device *dev)
+ {
+- return dev->mtu + dev->hard_header_len;
++ return READ_ONCE(dev->mtu) + dev->hard_header_len;
+ }
+
+ static inline struct net *qdisc_net(struct Qdisc *q)
+@@ -193,4 +195,22 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
+ skb->tstamp = ktime_set(0, 0);
+ }
+
++struct tc_skb_cb {
++ struct qdisc_skb_cb qdisc_cb;
++
++ u16 mru;
++ u8 post_ct:1;
++ u8 post_ct_snat:1;
++ u8 post_ct_dnat:1;
++ u16 zone; /* Only valid if post_ct = true */
++};
++
++static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
++{
++ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
++
++ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
++ return cb;
++}
++
+ #endif
+diff --git a/include/net/protocol.h b/include/net/protocol.h
+index f51c06ae365f5..6aef8cb11cc8c 100644
+--- a/include/net/protocol.h
++++ b/include/net/protocol.h
+@@ -35,8 +35,6 @@
+
+ /* This is used to register protocols. */
+ struct net_protocol {
+- int (*early_demux)(struct sk_buff *skb);
+- int (*early_demux_handler)(struct sk_buff *skb);
+ int (*handler)(struct sk_buff *skb);
+
+ /* This returns an error if we weren't able to handle the error. */
+@@ -52,8 +50,6 @@ struct net_protocol {
+
+ #if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol {
+- void (*early_demux)(struct sk_buff *skb);
+- void (*early_demux_handler)(struct sk_buff *skb);
+ int (*handler)(struct sk_buff *skb);
+
+ /* This returns an error if we weren't able to handle the error. */
+diff --git a/include/net/raw.h b/include/net/raw.h
+index 8ad8df5948536..c51a635671a73 100644
+--- a/include/net/raw.h
++++ b/include/net/raw.h
+@@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ int dif, int sdif)
+ {
+ #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+- return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept,
++ return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
+ bound_dev_if, dif, sdif);
+ #else
+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+diff --git a/include/net/route.h b/include/net/route.h
+index 2e6c0e153e3a5..30610101ea14f 100644
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -360,7 +360,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
+ struct net *net = dev_net(dst->dev);
+
+ if (hoplimit == 0)
+- hoplimit = net->ipv4.sysctl_ip_default_ttl;
++ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
+ return hoplimit;
+ }
+
+@@ -369,7 +369,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
+ {
+ struct neighbour *neigh;
+
+- neigh = __ipv4_neigh_lookup_noref(dev, daddr);
++ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
+
+diff --git a/include/net/rpl.h b/include/net/rpl.h
+index 308ef0a05caef..30fe780d1e7c8 100644
+--- a/include/net/rpl.h
++++ b/include/net/rpl.h
+@@ -23,9 +23,6 @@ static inline int rpl_init(void)
+ static inline void rpl_exit(void) {}
+ #endif
+
+-/* Worst decompression memory usage ipv6 address (16) + pad 7 */
+-#define IPV6_RPL_SRH_WORST_SWAP_SIZE (sizeof(struct in6_addr) + 7)
+-
+ size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
+ unsigned char cmpre);
+
+diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
+index 9f48733bfd21c..a2a74e0e5c494 100644
+--- a/include/net/rtnetlink.h
++++ b/include/net/rtnetlink.h
+@@ -175,8 +175,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
+ int rtnl_delete_link(struct net_device *dev);
+ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);
+
+-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
+- struct netlink_ext_ack *exterr);
++int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
++ struct netlink_ext_ack *exterr);
+ struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
+
+ #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index c0069ac00e62d..6906da5c733ea 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -173,37 +173,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+ if (spin_trylock(&qdisc->seqlock))
+ return true;
+
+- /* Paired with smp_mb__after_atomic() to make sure
+- * STATE_MISSED checking is synchronized with clearing
+- * in pfifo_fast_dequeue().
++ /* No need to insist if the MISSED flag was already set.
++ * Note that test_and_set_bit() also gives us memory ordering
++ * guarantees wrt potential earlier enqueue() and below
++ * spin_trylock(), both of which are necessary to prevent races
+ */
+- smp_mb__before_atomic();
+-
+- /* If the MISSED flag is set, it means other thread has
+- * set the MISSED flag before second spin_trylock(), so
+- * we can return false here to avoid multi cpus doing
+- * the set_bit() and second spin_trylock() concurrently.
+- */
+- if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
++ if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state))
+ return false;
+
+- /* Set the MISSED flag before the second spin_trylock(),
+- * if the second spin_trylock() return false, it means
+- * other cpu holding the lock will do dequeuing for us
+- * or it will see the MISSED flag set after releasing
+- * lock and reschedule the net_tx_action() to do the
+- * dequeuing.
+- */
+- set_bit(__QDISC_STATE_MISSED, &qdisc->state);
+-
+- /* spin_trylock() only has load-acquire semantic, so use
+- * smp_mb__after_atomic() to ensure STATE_MISSED is set
+- * before doing the second spin_trylock().
+- */
+- smp_mb__after_atomic();
+-
+- /* Retry again in case other CPU may not see the new flag
+- * after it releases the lock at the end of qdisc_run_end().
++ /* Try to take the lock again to make sure that we will either
++ * grab it or the CPU that still has it will see MISSED set
++ * when testing it in qdisc_run_end()
+ */
+ return spin_trylock(&qdisc->seqlock);
+ } else if (qdisc_is_running(qdisc)) {
+@@ -222,6 +202,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
+ if (qdisc->flags & TCQ_F_NOLOCK) {
+ spin_unlock(&qdisc->seqlock);
+
++ /* spin_unlock() only has store-release semantic. The unlock
++ * and test_bit() ordering is a store-load ordering, so a full
++ * memory barrier is needed here.
++ */
++ smp_mb();
++
+ if (unlikely(test_bit(__QDISC_STATE_MISSED,
+ &qdisc->state)))
+ __netif_schedule(qdisc);
+@@ -308,6 +294,8 @@ struct Qdisc_ops {
+ struct netlink_ext_ack *extack);
+ void (*attach)(struct Qdisc *sch);
+ int (*change_tx_queue_len)(struct Qdisc *, unsigned int);
++ void (*change_real_num_tx)(struct Qdisc *sch,
++ unsigned int new_real_tx);
+
+ int (*dump)(struct Qdisc *, struct sk_buff *);
+ int (*dump_stats)(struct Qdisc *, struct gnet_dump *);
+@@ -438,8 +426,6 @@ struct qdisc_skb_cb {
+ };
+ #define QDISC_CB_PRIV_LEN 20
+ unsigned char data[QDISC_CB_PRIV_LEN];
+- u16 mru;
+- bool post_ct;
+ };
+
+ typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
+@@ -684,6 +670,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
+ void qdisc_class_hash_destroy(struct Qdisc_class_hash *);
+
+ int dev_qdisc_change_tx_queue_len(struct net_device *dev);
++void dev_qdisc_change_real_num_tx(struct net_device *dev,
++ unsigned int new_real_tx);
+ void dev_init_scheduler(struct net_device *dev);
+ void dev_shutdown(struct net_device *dev);
+ void dev_activate(struct net_device *dev);
+@@ -1189,7 +1177,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
+ static inline void qdisc_reset_queue(struct Qdisc *sch)
+ {
+ __qdisc_reset_queue(&sch->q);
+- sch->qstats.backlog = 0;
+ }
+
+ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
+@@ -1258,6 +1245,7 @@ struct psched_ratecfg {
+ u64 rate_bytes_ps; /* bytes per second */
+ u32 mult;
+ u16 overhead;
++ u16 mpu;
+ u8 linklayer;
+ u8 shift;
+ };
+@@ -1267,6 +1255,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
+ {
+ len += r->overhead;
+
++ if (len < r->mpu)
++ len = r->mpu;
++
+ if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
+ return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
+
+@@ -1289,6 +1280,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
+ res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
+
+ res->overhead = r->overhead;
++ res->mpu = r->mpu;
+ res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
+ }
+
+@@ -1343,4 +1335,11 @@ void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
+
+ int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
+
++/* Make sure qdisc is no longer in SCHED state. */
++static inline void qdisc_synchronize(const struct Qdisc *q)
++{
++ while (test_bit(__QDISC_STATE_SCHED, &q->state))
++ msleep(1);
++}
++
+ #endif
+diff --git a/include/net/scm.h b/include/net/scm.h
+index 1ce365f4c2560..585adc1346bd0 100644
+--- a/include/net/scm.h
++++ b/include/net/scm.h
+@@ -105,16 +105,27 @@ static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct sc
+ }
+ }
+ }
++
++static inline bool scm_has_secdata(struct socket *sock)
++{
++ return test_bit(SOCK_PASSSEC, &sock->flags);
++}
+ #else
+ static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
+ { }
++
++static inline bool scm_has_secdata(struct socket *sock)
++{
++ return false;
++}
+ #endif /* CONFIG_SECURITY_NETWORK */
+
+ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
+ struct scm_cookie *scm, int flags)
+ {
+ if (!msg->msg_control) {
+- if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp)
++ if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp ||
++ scm_has_secdata(sock))
+ msg->msg_flags |= MSG_CTRUNC;
+ scm_destroy(scm);
+ return;
+diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
+index 69bab88ad66b1..3ae61ce2eabd0 100644
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -105,19 +105,18 @@ extern struct percpu_counter sctp_sockets_allocated;
+ int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
+ struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+
++typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
+ void sctp_transport_walk_start(struct rhashtable_iter *iter);
+ void sctp_transport_walk_stop(struct rhashtable_iter *iter);
+ struct sctp_transport *sctp_transport_get_next(struct net *net,
+ struct rhashtable_iter *iter);
+ struct sctp_transport *sctp_transport_get_idx(struct net *net,
+ struct rhashtable_iter *iter, int pos);
+-int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
+- struct net *net,
++int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net,
+ const union sctp_addr *laddr,
+ const union sctp_addr *paddr, void *p);
+-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
+- int (*cb_done)(struct sctp_transport *, void *),
+- struct net *net, int *pos, void *p);
++int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
++ struct net *net, int *pos, void *p);
+ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
+ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
+ struct sctp_info *info);
+@@ -626,7 +625,8 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
+
+ static inline int sctp_transport_pl_hlen(struct sctp_transport *t)
+ {
+- return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0);
++ return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0) -
++ sizeof(struct sctphdr);
+ }
+
+ static inline void sctp_transport_pl_reset(struct sctp_transport *t)
+@@ -653,12 +653,10 @@ static inline void sctp_transport_pl_update(struct sctp_transport *t)
+ if (t->pl.state == SCTP_PL_DISABLED)
+ return;
+
+- if (del_timer(&t->probe_timer))
+- sctp_transport_put(t);
+-
+ t->pl.state = SCTP_PL_BASE;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
++ sctp_transport_reset_probe_timer(t);
+ }
+
+ static inline bool sctp_transport_pl_enabled(struct sctp_transport *t)
+diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
+index 01a70b27e026b..65058faea4db1 100644
+--- a/include/net/sctp/stream_sched.h
++++ b/include/net/sctp/stream_sched.h
+@@ -26,6 +26,8 @@ struct sctp_sched_ops {
+ int (*init)(struct sctp_stream *stream);
+ /* Init a stream */
+ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
++ /* free a stream */
++ void (*free_sid)(struct sctp_stream *stream, __u16 sid);
+ /* Frees the entire thing */
+ void (*free)(struct sctp_stream *stream);
+
+diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
+index 651bba654d77d..790252c1478b0 100644
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -1365,6 +1365,7 @@ struct sctp_endpoint {
+
+ u32 secid;
+ u32 peer_secid;
++ struct rcu_head rcu;
+ };
+
+ /* Recover the outter endpoint structure. */
+@@ -1380,7 +1381,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
+ struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
+ void sctp_endpoint_free(struct sctp_endpoint *);
+ void sctp_endpoint_put(struct sctp_endpoint *);
+-void sctp_endpoint_hold(struct sctp_endpoint *);
++int sctp_endpoint_hold(struct sctp_endpoint *ep);
+ void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
+ struct sctp_association *sctp_endpoint_lookup_assoc(
+ const struct sctp_endpoint *ep,
+@@ -1419,6 +1420,7 @@ struct sctp_stream_priorities {
+ /* The next stream in line */
+ struct sctp_stream_out_ext *next;
+ __u16 prio;
++ __u16 users;
+ };
+
+ struct sctp_stream_out_ext {
+diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
+index d7d2495f83c27..dac91aa38c5af 100644
+--- a/include/net/secure_seq.h
++++ b/include/net/secure_seq.h
+@@ -4,8 +4,8 @@
+
+ #include <linux/types.h>
+
+-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
+-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
++u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
++u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+ __be16 dport);
+ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport);
+diff --git a/include/net/seg6.h b/include/net/seg6.h
+index 9d19c15e8545c..af668f17b3988 100644
+--- a/include/net/seg6.h
++++ b/include/net/seg6.h
+@@ -58,9 +58,30 @@ extern int seg6_local_init(void);
+ extern void seg6_local_exit(void);
+
+ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced);
++extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags);
++extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt);
+ extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto);
+ extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
+ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id);
++
++/* If the packet which invoked an ICMP error contains an SRH return
++ * the true destination address from within the SRH, otherwise use the
++ * destination address in the IP header.
++ */
++static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb,
++ struct inet6_skb_parm *opt)
++{
++ struct ipv6_sr_hdr *srh;
++
++ if (opt->flags & IP6SKB_SEG6) {
++ srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff);
++ return &srh->segments[0];
++ }
++
++ return NULL;
++}
++
++
+ #endif
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 463f390d90b3e..640bd7a367779 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -161,9 +161,6 @@ typedef __u64 __bitwise __addrpair;
+ * for struct sock and struct inet_timewait_sock.
+ */
+ struct sock_common {
+- /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
+- * address on 64bit arches : cf INET_MATCH()
+- */
+ union {
+ __addrpair skc_addrpair;
+ struct {
+@@ -259,6 +256,8 @@ struct bpf_local_storage;
+ * @sk_rcvbuf: size of receive buffer in bytes
+ * @sk_wq: sock wait queue and async head
+ * @sk_rx_dst: receive input route used by early demux
++ * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst
++ * @sk_rx_dst_cookie: cookie for @sk_rx_dst
+ * @sk_dst_cache: destination cache
+ * @sk_dst_pending_confirm: need to confirm neighbour
+ * @sk_policy: flow policy
+@@ -324,7 +323,7 @@ struct bpf_local_storage;
+ * @sk_tskey: counter to disambiguate concurrent tstamp requests
+ * @sk_zckey: counter to order MSG_ZEROCOPY notifications
+ * @sk_socket: Identd and reporting IO signals
+- * @sk_user_data: RPC layer private data
++ * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock.
+ * @sk_frag: cached page frag
+ * @sk_peek_off: current peek_offset value
+ * @sk_send_head: front of stuff to transmit
+@@ -335,6 +334,7 @@ struct bpf_local_storage;
+ * @sk_cgrp_data: cgroup data for this cgroup
+ * @sk_memcg: this socket's memory cgroup association
+ * @sk_write_pending: a write to stream socket waits to start
++ * @sk_wait_pending: number of threads blocked on this socket
+ * @sk_state_change: callback to indicate change in the state of the sock
+ * @sk_data_ready: callback to indicate there is data to be processed
+ * @sk_write_space: callback to indicate there is bf sending space available
+@@ -419,6 +419,7 @@ struct sock {
+ unsigned int sk_napi_id;
+ #endif
+ int sk_rcvbuf;
++ int sk_wait_pending;
+
+ struct sk_filter __rcu *sk_filter;
+ union {
+@@ -430,7 +431,10 @@ struct sock {
+ #ifdef CONFIG_XFRM
+ struct xfrm_policy __rcu *sk_policy[2];
+ #endif
+- struct dst_entry *sk_rx_dst;
++ struct dst_entry __rcu *sk_rx_dst;
++ int sk_rx_dst_ifindex;
++ u32 sk_rx_dst_cookie;
++
+ struct dst_entry __rcu *sk_dst_cache;
+ atomic_t sk_omem_alloc;
+ int sk_sndbuf;
+@@ -501,7 +505,7 @@ struct sock {
+ u16 sk_tsflags;
+ int sk_bind_phc;
+ u8 sk_shutdown;
+- u32 sk_tskey;
++ atomic_t sk_tskey;
+ atomic_t sk_zckey;
+
+ u8 sk_clockid;
+@@ -541,14 +545,26 @@ enum sk_pacing {
+ SK_PACING_FQ = 2,
+ };
+
+-/* Pointer stored in sk_user_data might not be suitable for copying
+- * when cloning the socket. For instance, it can point to a reference
+- * counted object. sk_user_data bottom bit is set if pointer must not
+- * be copied.
++/* flag bits in sk_user_data
++ *
++ * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might
++ * not be suitable for copying when cloning the socket. For instance,
++ * it can point to a reference counted object. sk_user_data bottom
++ * bit is set if pointer must not be copied.
++ *
++ * - SK_USER_DATA_BPF: Mark whether sk_user_data field is
++ * managed/owned by a BPF reuseport array. This bit should be set
++ * when sk_user_data's sk is added to the bpf's reuseport_array.
++ *
++ * - SK_USER_DATA_PSOCK: Mark whether pointer stored in
++ * sk_user_data points to psock type. This bit should be set
++ * when sk_user_data is assigned to a psock object.
+ */
+ #define SK_USER_DATA_NOCOPY 1UL
+-#define SK_USER_DATA_BPF 2UL /* Managed by BPF */
+-#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF)
++#define SK_USER_DATA_BPF 2UL
++#define SK_USER_DATA_PSOCK 4UL
++#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\
++ SK_USER_DATA_PSOCK)
+
+ /**
+ * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied
+@@ -561,24 +577,40 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk)
+
+ #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+
++/**
++ * __rcu_dereference_sk_user_data_with_flags - return the pointer
++ * only if argument flags all has been set in sk_user_data. Otherwise
++ * return NULL
++ *
++ * @sk: socket
++ * @flags: flag bits
++ */
++static inline void *
++__rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
++ uintptr_t flags)
++{
++ uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));
++
++ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
++
++ if ((sk_user_data & flags) == flags)
++ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
++ return NULL;
++}
++
+ #define rcu_dereference_sk_user_data(sk) \
++ __rcu_dereference_sk_user_data_with_flags(sk, 0)
++#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \
+ ({ \
+- void *__tmp = rcu_dereference(__sk_user_data((sk))); \
+- (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \
+-})
+-#define rcu_assign_sk_user_data(sk, ptr) \
+-({ \
+- uintptr_t __tmp = (uintptr_t)(ptr); \
+- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
+- rcu_assign_pointer(__sk_user_data((sk)), __tmp); \
+-})
+-#define rcu_assign_sk_user_data_nocopy(sk, ptr) \
+-({ \
+- uintptr_t __tmp = (uintptr_t)(ptr); \
+- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
++ uintptr_t __tmp1 = (uintptr_t)(ptr), \
++ __tmp2 = (uintptr_t)(flags); \
++ WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \
++ WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \
+ rcu_assign_pointer(__sk_user_data((sk)), \
+- __tmp | SK_USER_DATA_NOCOPY); \
++ __tmp1 | __tmp2); \
+ })
++#define rcu_assign_sk_user_data(sk, ptr) \
++ __rcu_assign_sk_user_data_with_flags(sk, ptr, 0)
+
+ /*
+ * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
+@@ -1061,8 +1093,12 @@ static inline void sock_rps_record_flow(const struct sock *sk)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+- if (sk->sk_state == TCP_ESTABLISHED)
+- sock_rps_record_flow_hash(sk->sk_rxhash);
++ if (sk->sk_state == TCP_ESTABLISHED) {
++ /* This READ_ONCE() is paired with the WRITE_ONCE()
++ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
++ */
++ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
++ }
+ }
+ #endif
+ }
+@@ -1071,20 +1107,25 @@ static inline void sock_rps_save_rxhash(struct sock *sk,
+ const struct sk_buff *skb)
+ {
+ #ifdef CONFIG_RPS
+- if (unlikely(sk->sk_rxhash != skb->hash))
+- sk->sk_rxhash = skb->hash;
++ /* The following WRITE_ONCE() is paired with the READ_ONCE()
++ * here, and another one in sock_rps_record_flow().
++ */
++ if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash))
++ WRITE_ONCE(sk->sk_rxhash, skb->hash);
+ #endif
+ }
+
+ static inline void sock_rps_reset_rxhash(struct sock *sk)
+ {
+ #ifdef CONFIG_RPS
+- sk->sk_rxhash = 0;
++ /* Paired with READ_ONCE() in sock_rps_record_flow() */
++ WRITE_ONCE(sk->sk_rxhash, 0);
+ #endif
+ }
+
+ #define sk_wait_event(__sk, __timeo, __condition, __wait) \
+ ({ int __rc; \
++ __sk->sk_wait_pending++; \
+ release_sock(__sk); \
+ __rc = __condition; \
+ if (!__rc) { \
+@@ -1094,6 +1135,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
+ } \
+ sched_annotate_sleep(); \
+ lock_sock(__sk); \
++ __sk->sk_wait_pending--; \
+ __rc = __condition; \
+ __rc; \
+ })
+@@ -1217,6 +1259,7 @@ struct proto {
+ /*
+ * Pressure flag: try to collapse.
+ * Technical note: it is used by multiple contexts non atomically.
++ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
+ * All the __sk_mem_schedule() is of this nature: accounting
+ * is strict, actions are advisory and have some latency.
+ */
+@@ -1237,7 +1280,7 @@ struct proto {
+ unsigned int useroffset; /* Usercopy region offset */
+ unsigned int usersize; /* Usercopy region size */
+
+- struct percpu_counter *orphan_count;
++ unsigned int __percpu *orphan_count;
+
+ struct request_sock_ops *rsk_prot;
+ struct timewait_sock_ops *twsk_prot;
+@@ -1339,6 +1382,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
+ return sk->sk_prot->memory_pressure != NULL;
+ }
+
++static inline bool sk_under_global_memory_pressure(const struct sock *sk)
++{
++ return sk->sk_prot->memory_pressure &&
++ !!READ_ONCE(*sk->sk_prot->memory_pressure);
++}
++
+ static inline bool sk_under_memory_pressure(const struct sock *sk)
+ {
+ if (!sk->sk_prot->memory_pressure)
+@@ -1348,7 +1397,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
+ mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ return true;
+
+- return !!*sk->sk_prot->memory_pressure;
++ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
+ }
+
+ static inline long
+@@ -1406,7 +1455,7 @@ proto_memory_pressure(struct proto *prot)
+ {
+ if (!prot->memory_pressure)
+ return false;
+- return !!*prot->memory_pressure;
++ return !!READ_ONCE(*prot->memory_pressure);
+ }
+
+
+@@ -1479,7 +1528,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount);
+ /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
+ static inline long sk_prot_mem_limits(const struct sock *sk, int index)
+ {
+- long val = sk->sk_prot->sysctl_mem[index];
++ long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]);
+
+ #if PAGE_SIZE > SK_MEM_QUANTUM
+ val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
+@@ -1502,19 +1551,23 @@ static inline bool sk_has_account(struct sock *sk)
+
+ static inline bool sk_wmem_schedule(struct sock *sk, int size)
+ {
++ int delta;
++
+ if (!sk_has_account(sk))
+ return true;
+- return size <= sk->sk_forward_alloc ||
+- __sk_mem_schedule(sk, size, SK_MEM_SEND);
++ delta = size - sk->sk_forward_alloc;
++ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND);
+ }
+
+ static inline bool
+ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
+ {
++ int delta;
++
+ if (!sk_has_account(sk))
+ return true;
+- return size <= sk->sk_forward_alloc ||
+- __sk_mem_schedule(sk, size, SK_MEM_RECV) ||
++ delta = size - sk->sk_forward_alloc;
++ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
+ skb_pfmemalloc(skb);
+ }
+
+@@ -1813,7 +1866,12 @@ void sk_common_release(struct sock *sk);
+ * Default socket callbacks and setup code
+ */
+
+-/* Initialise core socket variables */
++/* Initialise core socket variables using an explicit uid. */
++void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid);
++
++/* Initialise core socket variables.
++ * Assumes struct socket *sock is embedded in a struct socket_alloc.
++ */
+ void sock_init_data(struct socket *sock, struct sock *sk);
+
+ /*
+@@ -1953,6 +2011,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
+ }
+
+ kuid_t sock_i_uid(struct sock *sk);
++unsigned long __sock_i_ino(struct sock *sk);
+ unsigned long sock_i_ino(struct sock *sk);
+
+ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
+@@ -2281,6 +2340,19 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
+ return false;
+ }
+
++static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk)
++{
++ skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
++ if (skb) {
++ if (sk_rmem_schedule(sk, skb, skb->truesize)) {
++ skb_set_owner_r(skb, sk);
++ return skb;
++ }
++ __kfree_skb(skb);
++ }
++ return NULL;
++}
++
+ static inline void skb_prepare_for_gro(struct sk_buff *skb)
+ {
+ if (skb->destructor != sock_wfree) {
+@@ -2400,19 +2472,22 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+ * @sk: socket
+ *
+ * Use the per task page_frag instead of the per socket one for
+- * optimization when we know that we're in the normal context and owns
++ * optimization when we know that we're in process context and own
+ * everything that's associated with %current.
+ *
+- * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
+- * inside other socket operations and end up recursing into sk_page_frag()
+- * while it's already in use.
++ * Both direct reclaim and page faults can nest inside other
++ * socket operations and end up recursing into sk_page_frag()
++ * while it's already in use: explicitly avoid task page_frag
++ * usage if the caller is potentially doing any of them.
++ * This assumes that page fault handlers use the GFP_NOFS flags.
+ *
+ * Return: a per task page_frag if context allows that,
+ * otherwise a per socket one.
+ */
+ static inline struct page_frag *sk_page_frag(struct sock *sk)
+ {
+- if (gfpflags_normal_context(sk->sk_allocation))
++ if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) ==
++ (__GFP_DIRECT_RECLAIM | __GFP_FS))
+ return &current->task_frag;
+
+ return &sk->sk_frag;
+@@ -2435,7 +2510,7 @@ static inline gfp_t gfp_any(void)
+
+ static inline gfp_t gfp_memcg_charge(void)
+ {
+- return in_softirq() ? GFP_NOWAIT : GFP_KERNEL;
++ return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
+ }
+
+ static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
+@@ -2568,7 +2643,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+ __sock_recv_ts_and_drops(msg, sk, skb);
+ else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
+ sock_write_timestamp(sk, skb->tstamp);
+- else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
++ else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP))
+ sock_write_timestamp(sk, 0);
+ }
+
+@@ -2590,7 +2665,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
+ __sock_tx_timestamp(tsflags, tx_flags);
+ if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
+ tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
+- *tskey = sk->sk_tskey++;
++ *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ }
+ if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
+ *tx_flags |= SKBTX_WIFI_STATUS;
+@@ -2757,18 +2832,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
+ {
+ /* Does this proto have per netns sysctl_wmem ? */
+ if (proto->sysctl_wmem_offset)
+- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
+
+- return *proto->sysctl_wmem;
++ return READ_ONCE(*proto->sysctl_wmem);
+ }
+
+ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+ {
+ /* Does this proto have per netns sysctl_rmem ? */
+ if (proto->sysctl_rmem_offset)
+- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
+
+- return *proto->sysctl_rmem;
++ return READ_ONCE(*proto->sysctl_rmem);
+ }
+
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
+index 473b0b0fa4abc..6ec140b0a61bf 100644
+--- a/include/net/sock_reuseport.h
++++ b/include/net/sock_reuseport.h
+@@ -16,6 +16,7 @@ struct sock_reuseport {
+ u16 max_socks; /* length of socks */
+ u16 num_socks; /* elements in socks */
+ u16 num_closed_socks; /* closed elements in socks */
++ u16 incoming_cpu;
+ /* The last synq overflow event timestamp of this
+ * reuse->socks[] group.
+ */
+@@ -43,21 +44,21 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
+ extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
+ extern int reuseport_detach_prog(struct sock *sk);
+
+-static inline bool reuseport_has_conns(struct sock *sk, bool set)
++static inline bool reuseport_has_conns(struct sock *sk)
+ {
+ struct sock_reuseport *reuse;
+ bool ret = false;
+
+ rcu_read_lock();
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+- if (reuse) {
+- if (set)
+- reuse->has_conns = 1;
+- ret = reuse->has_conns;
+- }
++ if (reuse && reuse->has_conns)
++ ret = true;
+ rcu_read_unlock();
+
+ return ret;
+ }
+
++void reuseport_has_conns_set(struct sock *sk);
++void reuseport_update_incoming_cpu(struct sock *sk, int val);
++
+ #endif /* _SOCK_REUSEPORT_H */
+diff --git a/include/net/strparser.h b/include/net/strparser.h
+index 1d20b98493a10..732b7097d78e4 100644
+--- a/include/net/strparser.h
++++ b/include/net/strparser.h
+@@ -54,10 +54,28 @@ struct strp_msg {
+ int offset;
+ };
+
++struct _strp_msg {
++ /* Internal cb structure. struct strp_msg must be first for passing
++ * to upper layer.
++ */
++ struct strp_msg strp;
++ int accum_len;
++};
++
++struct sk_skb_cb {
++#define SK_SKB_CB_PRIV_LEN 20
++ unsigned char data[SK_SKB_CB_PRIV_LEN];
++ struct _strp_msg strp;
++ /* temp_reg is a temporary register used for bpf_convert_data_end_access
++ * when dst_reg == src_reg.
++ */
++ u64 temp_reg;
++};
++
+ static inline struct strp_msg *strp_msg(struct sk_buff *skb)
+ {
+ return (struct strp_msg *)((void *)skb->cb +
+- offsetof(struct qdisc_skb_cb, data));
++ offsetof(struct sk_skb_cb, strp));
+ }
+
+ /* Structure for an attached lower socket */
+diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
+index 748cf87a4d7ea..83fe399317818 100644
+--- a/include/net/tc_act/tc_pedit.h
++++ b/include/net/tc_act/tc_pedit.h
+@@ -4,21 +4,29 @@
+
+ #include <net/act_api.h>
+ #include <linux/tc_act/tc_pedit.h>
++#include <linux/types.h>
+
+ struct tcf_pedit_key_ex {
+ enum pedit_header_type htype;
+ enum pedit_cmd cmd;
+ };
+
+-struct tcf_pedit {
+- struct tc_action common;
+- unsigned char tcfp_nkeys;
+- unsigned char tcfp_flags;
++struct tcf_pedit_parms {
+ struct tc_pedit_key *tcfp_keys;
+ struct tcf_pedit_key_ex *tcfp_keys_ex;
++ u32 tcfp_off_max_hint;
++ unsigned char tcfp_nkeys;
++ unsigned char tcfp_flags;
++ struct rcu_head rcu;
++};
++
++struct tcf_pedit {
++ struct tc_action common;
++ struct tcf_pedit_parms __rcu *parms;
+ };
+
+ #define to_pedit(a) ((struct tcf_pedit *)a)
++#define to_pedit_parms(a) (rcu_dereference(to_pedit(a)->parms))
+
+ static inline bool is_tcf_pedit(const struct tc_action *a)
+ {
+@@ -31,37 +39,81 @@ static inline bool is_tcf_pedit(const struct tc_action *a)
+
+ static inline int tcf_pedit_nkeys(const struct tc_action *a)
+ {
+- return to_pedit(a)->tcfp_nkeys;
++ struct tcf_pedit_parms *parms;
++ int nkeys;
++
++ rcu_read_lock();
++ parms = to_pedit_parms(a);
++ nkeys = parms->tcfp_nkeys;
++ rcu_read_unlock();
++
++ return nkeys;
+ }
+
+ static inline u32 tcf_pedit_htype(const struct tc_action *a, int index)
+ {
+- if (to_pedit(a)->tcfp_keys_ex)
+- return to_pedit(a)->tcfp_keys_ex[index].htype;
++ u32 htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
++ struct tcf_pedit_parms *parms;
++
++ rcu_read_lock();
++ parms = to_pedit_parms(a);
++ if (parms->tcfp_keys_ex)
++ htype = parms->tcfp_keys_ex[index].htype;
++ rcu_read_unlock();
+
+- return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
++ return htype;
+ }
+
+ static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index)
+ {
+- if (to_pedit(a)->tcfp_keys_ex)
+- return to_pedit(a)->tcfp_keys_ex[index].cmd;
++ struct tcf_pedit_parms *parms;
++ u32 cmd = __PEDIT_CMD_MAX;
+
+- return __PEDIT_CMD_MAX;
++ rcu_read_lock();
++ parms = to_pedit_parms(a);
++ if (parms->tcfp_keys_ex)
++ cmd = parms->tcfp_keys_ex[index].cmd;
++ rcu_read_unlock();
++
++ return cmd;
+ }
+
+ static inline u32 tcf_pedit_mask(const struct tc_action *a, int index)
+ {
+- return to_pedit(a)->tcfp_keys[index].mask;
++ struct tcf_pedit_parms *parms;
++ u32 mask;
++
++ rcu_read_lock();
++ parms = to_pedit_parms(a);
++ mask = parms->tcfp_keys[index].mask;
++ rcu_read_unlock();
++
++ return mask;
+ }
+
+ static inline u32 tcf_pedit_val(const struct tc_action *a, int index)
+ {
+- return to_pedit(a)->tcfp_keys[index].val;
++ struct tcf_pedit_parms *parms;
++ u32 val;
++
++ rcu_read_lock();
++ parms = to_pedit_parms(a);
++ val = parms->tcfp_keys[index].val;
++ rcu_read_unlock();
++
++ return val;
+ }
+
+ static inline u32 tcf_pedit_offset(const struct tc_action *a, int index)
+ {
+- return to_pedit(a)->tcfp_keys[index].off;
++ struct tcf_pedit_parms *parms;
++ u32 off;
++
++ rcu_read_lock();
++ parms = to_pedit_parms(a);
++ off = parms->tcfp_keys[index].off;
++ rcu_read_unlock();
++
++ return off;
+ }
+ #endif /* __NET_TC_PED_H */
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 60c384569e9cd..4aafda05a6466 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -48,7 +48,9 @@
+
+ extern struct inet_hashinfo tcp_hashinfo;
+
+-extern struct percpu_counter tcp_orphan_count;
++DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
++int tcp_orphan_count_sum(void);
++
+ void tcp_time_wait(struct sock *sk, int state, int timeo);
+
+ #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
+@@ -290,19 +292,6 @@ static inline bool tcp_out_of_memory(struct sock *sk)
+
+ void sk_forced_mem_schedule(struct sock *sk, int size);
+
+-static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
+-{
+- struct percpu_counter *ocp = sk->sk_prot->orphan_count;
+- int orphans = percpu_counter_read_positive(ocp);
+-
+- if (orphans << shift > sysctl_tcp_max_orphans) {
+- orphans = percpu_counter_sum_positive(ocp);
+- if (orphans << shift > sysctl_tcp_max_orphans)
+- return true;
+- }
+- return false;
+-}
+-
+ bool tcp_check_oom(struct sock *sk, int shift);
+
+
+@@ -351,7 +340,6 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
+
+-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
+ static inline void tcp_dec_quickack_mode(struct sock *sk,
+ const unsigned int pkts)
+ {
+@@ -481,6 +469,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
+ u32 cookie);
+ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
+ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
++ const struct tcp_request_sock_ops *af_ops,
+ struct sock *sk, struct sk_buff *skb);
+ #ifdef CONFIG_SYN_COOKIES
+
+@@ -581,6 +570,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
+ #endif
+ /* tcp_output.c */
+
++void tcp_skb_entail(struct sock *sk, struct sk_buff *skb);
++void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb);
+ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+ int nonagle);
+ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
+@@ -619,6 +610,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
+ void tcp_reset(struct sock *sk, struct sk_buff *skb);
+ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
+ void tcp_fin(struct sock *sk);
++void tcp_check_space(struct sock *sk);
+
+ /* tcp_timer.c */
+ void tcp_init_xmit_timers(struct sock *);
+@@ -928,7 +920,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific;
+
+ INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
+ INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
+-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
++void tcp_v6_early_demux(struct sk_buff *skb);
+
+ #endif
+
+@@ -1037,6 +1029,7 @@ struct rate_sample {
+ int losses; /* number of packets marked lost upon ACK */
+ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
+ u32 prior_in_flight; /* in flight before this ACK */
++ u32 last_end_seq; /* end_seq of most recently ACKed packet */
+ bool is_app_limited; /* is sample from packet with bubble in pipe? */
+ bool is_retrans; /* is sample from retransmission? */
+ bool is_ack_delayed; /* is this (likely) a delayed ACK? */
+@@ -1159,6 +1152,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
+ bool is_sack_reneg, struct rate_sample *rs);
+ void tcp_rate_check_app_limited(struct sock *sk);
+
++static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
++{
++ return t1 > t2 || (t1 == t2 && after(seq1, seq2));
++}
++
+ /* These functions determine how the current flow behaves in respect of SACK
+ * handling. SACK is negotiated with the peer, and therefore it can vary
+ * between different flows.
+@@ -1202,9 +1200,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
+
+ #define TCP_INFINITE_SSTHRESH 0x7fffffff
+
++static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp)
++{
++ return tp->snd_cwnd;
++}
++
++static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val)
++{
++ WARN_ON_ONCE((int)val <= 0);
++ tp->snd_cwnd = val;
++}
++
+ static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+ {
+- return tp->snd_cwnd < tp->snd_ssthresh;
++ return tcp_snd_cwnd(tp) < tp->snd_ssthresh;
+ }
+
+ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
+@@ -1230,8 +1239,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
+ return tp->snd_ssthresh;
+ else
+ return max(tp->snd_ssthresh,
+- ((tp->snd_cwnd >> 1) +
+- (tp->snd_cwnd >> 2)));
++ ((tcp_snd_cwnd(tp) >> 1) +
++ (tcp_snd_cwnd(tp) >> 2)));
+ }
+
+ /* Use define here intentionally to get WARN_ON location shown at the caller */
+@@ -1271,11 +1280,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
+ {
+ const struct tcp_sock *tp = tcp_sk(sk);
+
++ if (tp->is_cwnd_limited)
++ return true;
++
+ /* If in slow start, ensure cwnd grows to twice what was ACKed. */
+ if (tcp_in_slow_start(tp))
+- return tp->snd_cwnd < 2 * tp->max_packets_out;
++ return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out;
+
+- return tp->is_cwnd_limited;
++ return false;
+ }
+
+ /* BBR congestion control needs pacing.
+@@ -1382,8 +1394,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+ s32 delta;
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
+- ca_ops->cong_control)
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
++ tp->packets_out || ca_ops->cong_control)
+ return;
+ delta = tcp_jiffies32 - tp->lsndtime;
+ if (delta > inet_csk(sk)->icsk_rto)
+@@ -1398,7 +1410,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
+
+ static inline int tcp_win_from_space(const struct sock *sk, int space)
+ {
+- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
++ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+
+ return tcp_adv_win_scale <= 0 ?
+ (space>>(-tcp_adv_win_scale)) :
+@@ -1460,22 +1472,38 @@ void tcp_leave_memory_pressure(struct sock *sk);
+ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
++ int val;
+
+- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
++ * and do_tcp_setsockopt().
++ */
++ val = READ_ONCE(tp->keepalive_intvl);
++
++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
+ }
+
+ static inline int keepalive_time_when(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
++ int val;
++
++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
++ val = READ_ONCE(tp->keepalive_time);
+
+- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+ }
+
+ static inline int keepalive_probes(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
++ int val;
++
++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
++ * and do_tcp_setsockopt().
++ */
++ val = READ_ONCE(tp->keepalive_probes);
+
+- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
+ }
+
+ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
+@@ -1488,7 +1516,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
+
+ static inline int tcp_fin_time(const struct sock *sk)
+ {
+- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
++ int fin_timeout = tcp_sk(sk)->linger2 ? :
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
+ const int rto = inet_csk(sk)->icsk_rto;
+
+ if (fin_timeout < (rto << 2) - (rto >> 1))
+@@ -1982,7 +2011,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
+ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
+- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
++ u32 val;
++
++ val = READ_ONCE(tp->notsent_lowat);
++
++ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ }
+
+ bool tcp_stream_memory_free(const struct sock *sk, int wake);
+@@ -2226,8 +2259,8 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+ #endif /* CONFIG_BPF_SYSCALL */
+
+-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
+- int flags);
++int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
++ struct sk_msg *msg, u32 bytes, int flags);
+ #endif /* CONFIG_NET_SOCK_MSG */
+
+ #if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
+diff --git a/include/net/tls.h b/include/net/tls.h
+index 1fffb206f09f5..eda0015c5c592 100644
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -179,6 +179,8 @@ struct tls_offload_context_tx {
+
+ struct scatterlist sg_tx_data[MAX_SKB_FRAGS];
+ void (*sk_destruct)(struct sock *sk);
++ struct work_struct destruct_work;
++ struct tls_context *ctx;
+ u8 driver_state[] __aligned(8);
+ /* The TLS layer reserves room for driver specific state
+ * Currently the belief is that there is not enough
+@@ -707,7 +709,7 @@ int tls_sw_fallback_init(struct sock *sk,
+ struct tls_crypto_info *crypto_info);
+
+ #ifdef CONFIG_TLS_DEVICE
+-void tls_device_init(void);
++int tls_device_init(void);
+ void tls_device_cleanup(void);
+ void tls_device_sk_destruct(struct sock *sk);
+ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+@@ -727,7 +729,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk)
+ return tls_get_ctx(sk)->rx_conf == TLS_HW;
+ }
+ #else
+-static inline void tls_device_init(void) {}
++static inline int tls_device_init(void) { return 0; }
+ static inline void tls_device_cleanup(void) {}
+
+ static inline int
+diff --git a/include/net/udp.h b/include/net/udp.h
+index 909ecf447e0fb..10508c66e7a19 100644
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -173,7 +173,7 @@ INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
+ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+ INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
++void udp_v6_early_demux(struct sk_buff *skb);
+ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
+
+ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+@@ -262,7 +262,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ int dif, int sdif)
+ {
+ #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
++ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
+ bound_dev_if, dif, sdif);
+ #else
+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+@@ -270,7 +270,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ }
+
+ /* net/ipv4/udp.c */
+-void udp_destruct_sock(struct sock *sk);
++void udp_destruct_common(struct sock *sk);
+ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
+ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
+ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
+diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
+index afc7ce713657b..72394f441dad8 100644
+--- a/include/net/udp_tunnel.h
++++ b/include/net/udp_tunnel.h
+@@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net,
+ typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+ typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
+ struct sk_buff *skb);
++typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk,
++ struct sk_buff *skb,
++ unsigned int udp_offset);
+ typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+ typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
+ struct list_head *head,
+@@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg {
+ __u8 encap_type;
+ udp_tunnel_encap_rcv_t encap_rcv;
+ udp_tunnel_encap_err_lookup_t encap_err_lookup;
++ udp_tunnel_encap_err_rcv_t encap_err_rcv;
+ udp_tunnel_encap_destroy_t encap_destroy;
+ udp_tunnel_gro_receive_t gro_receive;
+ udp_tunnel_gro_complete_t gro_complete;
+diff --git a/include/net/udplite.h b/include/net/udplite.h
+index 9185e45b997ff..c59ba86668af0 100644
+--- a/include/net/udplite.h
++++ b/include/net/udplite.h
+@@ -24,14 +24,6 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset,
+ return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT;
+ }
+
+-/* Designate sk as UDP-Lite socket */
+-static inline int udplite_sk_init(struct sock *sk)
+-{
+- udp_init_sock(sk);
+- udp_sk(sk)->pcflag = UDPLITE_BIT;
+- return 0;
+-}
+-
+ /*
+ * Checksumming routines
+ */
+diff --git a/include/net/vxlan.h b/include/net/vxlan.h
+index 08537aa14f7c3..e149a0b6f9a3c 100644
+--- a/include/net/vxlan.h
++++ b/include/net/vxlan.h
+@@ -327,10 +327,15 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
+ return features;
+ }
+
+-/* IP header + UDP + VXLAN + Ethernet header */
+-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
+-/* IPv6 header + UDP + VXLAN + Ethernet header */
+-#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
++static inline int vxlan_headroom(u32 flags)
++{
++ /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */
++ /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */
++ return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) :
++ sizeof(struct iphdr)) +
++ sizeof(struct udphdr) + sizeof(struct vxlanhdr) +
++ (flags & VXLAN_F_GPE ? 0 : ETH_HLEN);
++}
+
+ static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
+ {
+@@ -492,12 +497,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
+ }
+
+ static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
+- int hash,
++ u32 hash,
+ struct vxlan_rdst *rdst)
+ {
+ struct fib_nh_common *nhc;
+
+- nhc = nexthop_path_fdb_result(nh, hash);
++ nhc = nexthop_path_fdb_result(nh, hash >> 1);
+ if (unlikely(!nhc))
+ return false;
+
+diff --git a/include/net/xdp.h b/include/net/xdp.h
+index ad5b02dcb6f4c..b2ac69cb30b3d 100644
+--- a/include/net/xdp.h
++++ b/include/net/xdp.h
+@@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ enum xdp_mem_type type, void *allocator);
+ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
++int xdp_reg_mem_model(struct xdp_mem_info *mem,
++ enum xdp_mem_type type, void *allocator);
++void xdp_unreg_mem_model(struct xdp_mem_info *mem);
+
+ /* Drivers not supporting XDP metadata can use this helper, which
+ * rejects any room expansion for metadata as a result.
+diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
+index 4e295541e3967..ffe13a10bc963 100644
+--- a/include/net/xdp_sock_drv.h
++++ b/include/net/xdp_sock_drv.h
+@@ -13,7 +13,7 @@
+
+ void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
+ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
+-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max);
++u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
+ void xsk_tx_release(struct xsk_buff_pool *pool);
+ struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
+ u16 queue_id);
+@@ -129,8 +129,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool,
+ return false;
+ }
+
+-static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc,
+- u32 max)
++static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max)
+ {
+ return 0;
+ }
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 2308210793a01..6156ed2950f97 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -200,6 +200,11 @@ struct xfrm_state {
+ struct xfrm_algo_aead *aead;
+ const char *geniv;
+
++ /* mapping change rate limiting */
++ __be16 new_mapping_sport;
++ u32 new_mapping; /* seconds */
++ u32 mapping_maxage; /* seconds for input SA */
++
+ /* Data for encapsulator */
+ struct xfrm_encap_tmpl *encap;
+ struct sock __rcu *encap_sk;
+@@ -1021,6 +1026,7 @@ struct xfrm_offload {
+ struct sec_path {
+ int len;
+ int olen;
++ int verified_cnt;
+
+ struct xfrm_state *xvec[XFRM_MAX_DEPTH];
+ struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH];
+@@ -1075,24 +1081,29 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un
+ }
+
+ #ifdef CONFIG_XFRM
+-static inline bool
+-xfrm_default_allow(struct net *net, int dir)
+-{
+- u8 def = net->xfrm.policy_default;
+-
+- switch (dir) {
+- case XFRM_POLICY_IN:
+- return def & XFRM_POL_DEFAULT_IN ? false : true;
+- case XFRM_POLICY_OUT:
+- return def & XFRM_POL_DEFAULT_OUT ? false : true;
+- case XFRM_POLICY_FWD:
+- return def & XFRM_POL_DEFAULT_FWD ? false : true;
+- }
++int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
++ unsigned short family);
++
++static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb,
++ int dir)
++{
++ if (!net->xfrm.policy_count[dir] && !secpath_exists(skb))
++ return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT;
++
+ return false;
+ }
+
+-int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
+- unsigned short family);
++static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb,
++ int dir, unsigned short family)
++{
++ if (dir != XFRM_POLICY_OUT && family == AF_INET) {
++ /* same dst may be used for traffic originating from
++ * devices with different policy settings.
++ */
++ return IPCB(skb)->flags & IPSKB_NOPOLICY;
++ }
++ return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
++}
+
+ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
+ struct sk_buff *skb,
+@@ -1104,13 +1115,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
+ if (sk && sk->sk_policy[XFRM_POLICY_IN])
+ return __xfrm_policy_check(sk, ndir, skb, family);
+
+- if (xfrm_default_allow(net, dir))
+- return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
+- (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+- __xfrm_policy_check(sk, ndir, skb, family);
+- else
+- return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+- __xfrm_policy_check(sk, ndir, skb, family);
++ return __xfrm_check_nopolicy(net, skb, dir) ||
++ __xfrm_check_dev_nopolicy(skb, dir, family) ||
++ __xfrm_policy_check(sk, ndir, skb, family);
+ }
+
+ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
+@@ -1162,13 +1169,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
+ {
+ struct net *net = dev_net(skb->dev);
+
+- if (xfrm_default_allow(net, XFRM_POLICY_FWD))
+- return !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
+- (skb_dst(skb)->flags & DST_NOXFRM) ||
+- __xfrm_route_forward(skb, family);
+- else
+- return (skb_dst(skb)->flags & DST_NOXFRM) ||
+- __xfrm_route_forward(skb, family);
++ if (!net->xfrm.policy_count[XFRM_POLICY_OUT] &&
++ net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT)
++ return true;
++
++ return (skb_dst(skb)->flags & DST_NOXFRM) ||
++ __xfrm_route_forward(skb, family);
+ }
+
+ static inline int xfrm4_route_forward(struct sk_buff *skb)
+@@ -1185,6 +1191,8 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk);
+
+ static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
+ {
++ if (!sk_fullsock(osk))
++ return 0;
+ sk->sk_policy[0] = NULL;
+ sk->sk_policy[1] = NULL;
+ if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
+@@ -1562,7 +1570,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
+ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
+ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
+ int xfrm_init_replay(struct xfrm_state *x);
+-u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu);
+ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
+ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
+ int xfrm_init_state(struct xfrm_state *x);
+@@ -1675,14 +1682,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+ const struct xfrm_migrate *m, int num_bundles,
+ const struct xfrm_kmaddress *k,
+ const struct xfrm_encap_tmpl *encap);
+-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net);
++struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
++ u32 if_id);
+ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
+ struct xfrm_migrate *m,
+ struct xfrm_encap_tmpl *encap);
+ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+ struct xfrm_migrate *m, int num_bundles,
+ struct xfrm_kmaddress *k, struct net *net,
+- struct xfrm_encap_tmpl *encap);
++ struct xfrm_encap_tmpl *encap, u32 if_id);
+ #endif
+
+ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
+diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
+index 7a9a23e7a604a..ebd1f43578d65 100644
+--- a/include/net/xsk_buff_pool.h
++++ b/include/net/xsk_buff_pool.h
+@@ -60,6 +60,7 @@ struct xsk_buff_pool {
+ */
+ dma_addr_t *dma_pages;
+ struct xdp_buff_xsk *heads;
++ struct xdp_desc *tx_descs;
+ u64 chunk_mask;
+ u64 addrs_cnt;
+ u32 free_list_cnt;
+@@ -86,8 +87,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+ struct xdp_umem *umem);
+ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
+ u16 queue_id, u16 flags);
+-int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
++int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
+ struct net_device *dev, u16 queue_id);
++int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs);
+ void xp_destroy(struct xsk_buff_pool *pool);
+ void xp_release(struct xdp_buff_xsk *xskb);
+ void xp_get_pool(struct xsk_buff_pool *pool);
+@@ -150,13 +152,8 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
+ if (likely(!cross_pg))
+ return false;
+
+- if (pool->dma_pages_cnt) {
+- return !(pool->dma_pages[addr >> PAGE_SHIFT] &
+- XSK_NEXT_PG_CONTIG_MASK);
+- }
+-
+- /* skb path */
+- return addr + len > pool->addrs_cnt;
++ return pool->dma_pages_cnt &&
++ !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
+ }
+
+ static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
+diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
+index d808dc3d239e8..811a0f11d0dbe 100644
+--- a/include/rdma/ib_addr.h
++++ b/include/rdma/ib_addr.h
+@@ -194,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
+ return 0;
+ }
+
+-static inline int iboe_get_rate(struct net_device *dev)
+-{
+- struct ethtool_link_ksettings cmd;
+- int err;
+-
+- rtnl_lock();
+- err = __ethtool_get_link_ksettings(dev, &cmd);
+- rtnl_unlock();
+- if (err)
+- return IB_RATE_PORT_CURRENT;
+-
+- if (cmd.base.speed >= 40000)
+- return IB_RATE_40_GBPS;
+- else if (cmd.base.speed >= 30000)
+- return IB_RATE_30_GBPS;
+- else if (cmd.base.speed >= 20000)
+- return IB_RATE_20_GBPS;
+- else if (cmd.base.speed >= 10000)
+- return IB_RATE_10_GBPS;
+- else
+- return IB_RATE_PORT_CURRENT;
+-}
+-
+ static inline int rdma_link_local_addr(struct in6_addr *addr)
+ {
+ if (addr->s6_addr32[0] == htonl(0xfe800000) &&
+diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
+index 4b50d9a3018a6..4ba642fc8a19a 100644
+--- a/include/rdma/ib_verbs.h
++++ b/include/rdma/ib_verbs.h
+@@ -4097,8 +4097,13 @@ static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+ {
++ int nents;
++
+ if (ib_uses_virt_dma(dev)) {
+- ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);
++ nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);
++ if (!nents)
++ return -EIO;
++ sgt->nents = nents;
+ return 0;
+ }
+ return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);
+diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
+index 2758d9df71ee9..c2a79aeee113c 100644
+--- a/include/rdma/rdma_netlink.h
++++ b/include/rdma/rdma_netlink.h
+@@ -30,7 +30,7 @@ enum rdma_nl_flags {
+ * constant as well and the compiler checks they are the same.
+ */
+ #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \
+- static inline void __chk_##_index(void) \
++ static inline void __maybe_unused __chk_##_index(void) \
+ { \
+ BUILD_BUG_ON(_index != _val); \
+ } \
+diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
+index fac8e89aed81d..310e0dbffda99 100644
+--- a/include/scsi/libfcoe.h
++++ b/include/scsi/libfcoe.h
+@@ -249,7 +249,8 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *,
+ struct fc_frame *);
+
+ /* libfcoe funcs */
+-u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], unsigned int, unsigned int);
++u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN], unsigned int scheme,
++ unsigned int port);
+ int fcoe_libfc_config(struct fc_lport *, struct fcoe_ctlr *,
+ const struct libfc_function_template *, int init_fcp);
+ u32 fcoe_fc_crc(struct fc_frame *fp);
+diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
+index 4ee233e5a6ffa..c7ee5279e7fc9 100644
+--- a/include/scsi/libiscsi.h
++++ b/include/scsi/libiscsi.h
+@@ -52,8 +52,10 @@ enum {
+
+ #define ISID_SIZE 6
+
+-/* Connection suspend "bit" */
+-#define ISCSI_SUSPEND_BIT 1
++/* Connection flags */
++#define ISCSI_CONN_FLAG_SUSPEND_TX 0
++#define ISCSI_CONN_FLAG_SUSPEND_RX 1
++#define ISCSI_CONN_FLAG_BOUND 2
+
+ #define ISCSI_ITT_MASK 0x1fff
+ #define ISCSI_TOTAL_CMDS_MAX 4096
+@@ -199,8 +201,9 @@ struct iscsi_conn {
+ struct list_head cmdqueue; /* data-path cmd queue */
+ struct list_head requeue; /* tasks needing another run */
+ struct work_struct xmitwork; /* per-conn. xmit workqueue */
+- unsigned long suspend_tx; /* suspend Tx */
+- unsigned long suspend_rx; /* suspend Rx */
++ /* recv */
++ struct work_struct recvwork;
++ unsigned long flags; /* ISCSI_CONN_FLAGs */
+
+ /* negotiated params */
+ unsigned max_recv_dlength; /* initiator_max_recv_dsl*/
+@@ -399,7 +402,7 @@ extern int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev);
+ extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht,
+ int dd_data_size,
+ bool xmit_can_sleep);
+-extern void iscsi_host_remove(struct Scsi_Host *shost);
++extern void iscsi_host_remove(struct Scsi_Host *shost, bool is_shutdown);
+ extern void iscsi_host_free(struct Scsi_Host *shost);
+ extern int iscsi_target_alloc(struct scsi_target *starget);
+ extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+@@ -411,6 +414,8 @@ extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+ extern struct iscsi_cls_session *
+ iscsi_session_setup(struct iscsi_transport *, struct Scsi_Host *shost,
+ uint16_t, int, int, uint32_t, unsigned int);
++void iscsi_session_remove(struct iscsi_cls_session *cls_session);
++void iscsi_session_free(struct iscsi_cls_session *cls_session);
+ extern void iscsi_session_teardown(struct iscsi_cls_session *);
+ extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
+ extern int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
+@@ -440,8 +445,10 @@ extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
+ extern int iscsi_conn_get_addr_param(struct sockaddr_storage *addr,
+ enum iscsi_param param, char *buf);
+ extern void iscsi_suspend_tx(struct iscsi_conn *conn);
++extern void iscsi_suspend_rx(struct iscsi_conn *conn);
+ extern void iscsi_suspend_queue(struct iscsi_conn *conn);
+-extern void iscsi_conn_queue_work(struct iscsi_conn *conn);
++extern void iscsi_conn_queue_xmit(struct iscsi_conn *conn);
++extern void iscsi_conn_queue_recv(struct iscsi_conn *conn);
+
+ #define iscsi_conn_printk(prefix, _c, fmt, a...) \
+ iscsi_cls_conn_printk(prefix, ((struct iscsi_conn *)_c)->cls_conn, \
+diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
+index eaf04c9a1dfcb..685249233f2fe 100644
+--- a/include/scsi/scsi_cmnd.h
++++ b/include/scsi/scsi_cmnd.h
+@@ -68,7 +68,7 @@ struct scsi_pointer {
+ struct scsi_cmnd {
+ struct scsi_request req;
+ struct scsi_device *device;
+- struct list_head eh_entry; /* entry for the host eh_cmd_q */
++ struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */
+ struct delayed_work abort_work;
+
+ struct rcu_head rcu;
+@@ -211,7 +211,7 @@ static inline unsigned int scsi_get_resid(struct scsi_cmnd *cmd)
+ for_each_sg(scsi_sglist(cmd), sg, nseg, __i)
+
+ static inline int scsi_sg_copy_from_buffer(struct scsi_cmnd *cmd,
+- void *buf, int buflen)
++ const void *buf, int buflen)
+ {
+ return sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd),
+ buf, buflen);
+diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
+index b97e142a7ca92..3b3dbc37653da 100644
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -206,6 +206,7 @@ struct scsi_device {
+ unsigned rpm_autosuspend:1; /* Enable runtime autosuspend at device
+ * creation time */
+ unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */
++ unsigned silence_suspend:1; /* Do not print runtime PM related messages */
+
+ bool offline_already; /* Device offline message logged */
+
+diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
+index 75363707b73f9..f50861e4e88a1 100644
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -556,6 +556,7 @@ struct Scsi_Host {
+
+ struct mutex scan_mutex;/* serialize scanning activity */
+
++ struct list_head eh_abort_list;
+ struct list_head eh_cmd_q;
+ struct task_struct * ehandler; /* Error recovery thread. */
+ struct completion * eh_action; /* Wait for specific actions on the
+@@ -761,7 +762,7 @@ extern void scsi_remove_host(struct Scsi_Host *);
+ extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
+ extern int scsi_host_busy(struct Scsi_Host *shost);
+ extern void scsi_host_put(struct Scsi_Host *t);
+-extern struct Scsi_Host *scsi_host_lookup(unsigned short);
++extern struct Scsi_Host *scsi_host_lookup(unsigned int hostnum);
+ extern const char *scsi_host_state_name(enum scsi_host_state);
+ extern void scsi_host_complete_all_commands(struct Scsi_Host *shost,
+ enum scsi_host_status status);
+diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
+index c5d7810fd7926..304ccf1539285 100644
+--- a/include/scsi/scsi_transport_iscsi.h
++++ b/include/scsi/scsi_transport_iscsi.h
+@@ -211,6 +211,8 @@ struct iscsi_cls_conn {
+ struct mutex ep_mutex;
+ struct iscsi_endpoint *ep;
+
++ /* Used when accessing flags and queueing work. */
++ spinlock_t lock;
+ unsigned long flags;
+ struct work_struct cleanup_work;
+
+@@ -234,6 +236,14 @@ enum {
+ ISCSI_SESSION_FREE,
+ };
+
++enum {
++ ISCSI_SESSION_TARGET_UNBOUND,
++ ISCSI_SESSION_TARGET_ALLOCATED,
++ ISCSI_SESSION_TARGET_SCANNED,
++ ISCSI_SESSION_TARGET_UNBINDING,
++ ISCSI_SESSION_TARGET_MAX,
++};
++
+ #define ISCSI_MAX_TARGET -1
+
+ struct iscsi_cls_session {
+@@ -260,6 +270,7 @@ struct iscsi_cls_session {
+ */
+ pid_t creator;
+ int state;
++ int target_state; /* session target bind state */
+ int sid; /* session id */
+ void *dd_data; /* LLD private data */
+ struct device dev; /* sysfs transport/container device */
+@@ -294,7 +305,7 @@ extern void iscsi_host_for_each_session(struct Scsi_Host *shost,
+ struct iscsi_endpoint {
+ void *dd_data; /* LLD private data */
+ struct device dev;
+- uint64_t id;
++ int id;
+ struct iscsi_cls_conn *conn;
+ };
+
+@@ -439,6 +450,7 @@ extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost,
+ struct iscsi_transport *t,
+ int dd_size,
+ unsigned int target_id);
++extern void iscsi_force_destroy_session(struct iscsi_cls_session *session);
+ extern void iscsi_remove_session(struct iscsi_cls_session *session);
+ extern void iscsi_free_session(struct iscsi_cls_session *session);
+ extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess,
+diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h
+index f6542584ca139..cac3f9cd25f9c 100644
+--- a/include/soc/at91/sama7-ddr.h
++++ b/include/soc/at91/sama7-ddr.h
+@@ -11,8 +11,6 @@
+ #ifndef __SAMA7_DDR_H__
+ #define __SAMA7_DDR_H__
+
+-#ifdef CONFIG_SOC_SAMA7
+-
+ /* DDR3PHY */
+ #define DDR3PHY_PIR (0x04) /* DDR3PHY PHY Initialization Register */
+ #define DDR3PHY_PIR_DLLBYP (1 << 17) /* DLL Bypass */
+@@ -28,7 +26,10 @@
+ #define DDR3PHY_PGSR (0x0C) /* DDR3PHY PHY General Status Register */
+ #define DDR3PHY_PGSR_IDONE (1 << 0) /* Initialization Done */
+
+-#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */
++#define DDR3PHY_ACDLLCR (0x14) /* DDR3PHY AC DLL Control Register */
++#define DDR3PHY_ACDLLCR_DLLSRST (1 << 30) /* DLL Soft Reset */
++
++#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */
+ #define DDR3PHY_ACIOCR_CSPDD_CS0 (1 << 18) /* CS#[0] Power Down Driver */
+ #define DDR3PHY_ACIOCR_CKPDD_CK0 (1 << 8) /* CK[0] Power Down Driver */
+ #define DDR3PHY_ACIORC_ACPDD (1 << 3) /* AC Power Down Driver */
+@@ -40,6 +41,14 @@
+ #define DDR3PHY_DSGCR_ODTPDD_ODT0 (1 << 20) /* ODT[0] Power Down Driver */
+
+ #define DDR3PHY_ZQ0SR0 (0x188) /* ZQ status register 0 */
++#define DDR3PHY_ZQ0SR0_PDO_OFF (0) /* Pull-down output impedance select offset */
++#define DDR3PHY_ZQ0SR0_PUO_OFF (5) /* Pull-up output impedance select offset */
++#define DDR3PHY_ZQ0SR0_PDODT_OFF (10) /* Pull-down on-die termination impedance select offset */
++#define DDR3PHY_ZQ0SRO_PUODT_OFF (15) /* Pull-up on-die termination impedance select offset */
++
++#define DDR3PHY_DX0DLLCR (0x1CC) /* DDR3PHY DATX8 DLL Control Register */
++#define DDR3PHY_DX1DLLCR (0x20C) /* DDR3PHY DATX8 DLL Control Register */
++#define DDR3PHY_DXDLLCR_DLLDIS (1 << 31) /* DLL Disable */
+
+ /* UDDRC */
+ #define UDDRC_STAT (0x04) /* UDDRC Operating Mode Status Register */
+@@ -75,6 +84,4 @@
+ #define UDDRC_PCTRL_3 (0x6A0) /* UDDRC Port 3 Control Register */
+ #define UDDRC_PCTRL_4 (0x750) /* UDDRC Port 4 Control Register */
+
+-#endif /* CONFIG_SOC_SAMA7 */
+-
+ #endif /* __SAMA7_DDR_H__ */
+diff --git a/include/sound/control.h b/include/sound/control.h
+index 985c51a8fb748..a1fc7e0a47d95 100644
+--- a/include/sound/control.h
++++ b/include/sound/control.h
+@@ -109,7 +109,7 @@ struct snd_ctl_file {
+ int preferred_subdevice[SND_CTL_SUBDEV_ITEMS];
+ wait_queue_head_t change_sleep;
+ spinlock_t read_lock;
+- struct fasync_struct *fasync;
++ struct snd_fasync *fasync;
+ int subscribed; /* read interface is activated */
+ struct list_head events; /* waiting events for read */
+ };
+diff --git a/include/sound/core.h b/include/sound/core.h
+index b7e9b58d3c788..39cee40ac22e0 100644
+--- a/include/sound/core.h
++++ b/include/sound/core.h
+@@ -284,6 +284,7 @@ int snd_card_disconnect(struct snd_card *card);
+ void snd_card_disconnect_sync(struct snd_card *card);
+ int snd_card_free(struct snd_card *card);
+ int snd_card_free_when_closed(struct snd_card *card);
++int snd_card_free_on_error(struct device *dev, int ret);
+ void snd_card_set_id(struct snd_card *card, const char *id);
+ int snd_card_register(struct snd_card *card);
+ int snd_card_info_init(void);
+@@ -500,4 +501,12 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device,
+ }
+ #endif
+
++/* async signal helpers */
++struct snd_fasync;
++
++int snd_fasync_helper(int fd, struct file *file, int on,
++ struct snd_fasync **fasyncp);
++void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll);
++void snd_fasync_free(struct snd_fasync *fasync);
++
+ #endif /* __SOUND_CORE_H */
+diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h
+index 0e45963bb767f..82d9daa178517 100644
+--- a/include/sound/hda_codec.h
++++ b/include/sound/hda_codec.h
+@@ -8,7 +8,7 @@
+ #ifndef __SOUND_HDA_CODEC_H
+ #define __SOUND_HDA_CODEC_H
+
+-#include <linux/kref.h>
++#include <linux/refcount.h>
+ #include <linux/mod_devicetable.h>
+ #include <sound/info.h>
+ #include <sound/control.h>
+@@ -166,8 +166,8 @@ struct hda_pcm {
+ bool own_chmap; /* codec driver provides own channel maps */
+ /* private: */
+ struct hda_codec *codec;
+- struct kref kref;
+ struct list_head list;
++ unsigned int disconnected:1;
+ };
+
+ /* codec information */
+@@ -187,6 +187,8 @@ struct hda_codec {
+
+ /* PCM to create, set by patch_ops.build_pcms callback */
+ struct list_head pcm_list_head;
++ refcount_t pcm_ref;
++ wait_queue_head_t remove_sleep;
+
+ /* codec specific info */
+ void *spec;
+@@ -420,7 +422,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec);
+
+ static inline void snd_hda_codec_pcm_get(struct hda_pcm *pcm)
+ {
+- kref_get(&pcm->kref);
++ refcount_inc(&pcm->codec->pcm_ref);
+ }
+ void snd_hda_codec_pcm_put(struct hda_pcm *pcm);
+
+diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
+index 22af68b014262..658fccdc8660f 100644
+--- a/include/sound/hdaudio.h
++++ b/include/sound/hdaudio.h
+@@ -558,6 +558,8 @@ int snd_hdac_stream_set_params(struct hdac_stream *azx_dev,
+ void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start);
+ void snd_hdac_stream_clear(struct hdac_stream *azx_dev);
+ void snd_hdac_stream_stop(struct hdac_stream *azx_dev);
++void snd_hdac_stop_streams(struct hdac_bus *bus);
++void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus);
+ void snd_hdac_stream_reset(struct hdac_stream *azx_dev);
+ void snd_hdac_stream_sync_trigger(struct hdac_stream *azx_dev, bool set,
+ unsigned int streams, unsigned int reg);
+diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
+index 375581634143c..56ea5cde5e63a 100644
+--- a/include/sound/hdaudio_ext.h
++++ b/include/sound/hdaudio_ext.h
+@@ -88,9 +88,10 @@ struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus,
+ struct snd_pcm_substream *substream,
+ int type);
+ void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type);
++void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus,
++ struct hdac_ext_stream *azx_dev, bool decouple);
+ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
+ struct hdac_ext_stream *azx_dev, bool decouple);
+-void snd_hdac_ext_stop_streams(struct hdac_bus *bus);
+
+ int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus,
+ struct hdac_ext_stream *stream, u32 value);
+diff --git a/include/sound/jack.h b/include/sound/jack.h
+index 1181f536557eb..1ed90e2109e9b 100644
+--- a/include/sound/jack.h
++++ b/include/sound/jack.h
+@@ -62,6 +62,7 @@ struct snd_jack {
+ const char *id;
+ #ifdef CONFIG_SND_JACK_INPUT_DEV
+ struct input_dev *input_dev;
++ struct mutex input_dev_lock;
+ int registered;
+ int type;
+ char name[100];
+diff --git a/include/sound/pcm.h b/include/sound/pcm.h
+index 33451f8ff755b..cb9be3632205c 100644
+--- a/include/sound/pcm.h
++++ b/include/sound/pcm.h
+@@ -106,24 +106,24 @@ struct snd_pcm_ops {
+ #define SNDRV_PCM_POS_XRUN ((snd_pcm_uframes_t)-1)
+
+ /* If you change this don't forget to change rates[] table in pcm_native.c */
+-#define SNDRV_PCM_RATE_5512 (1<<0) /* 5512Hz */
+-#define SNDRV_PCM_RATE_8000 (1<<1) /* 8000Hz */
+-#define SNDRV_PCM_RATE_11025 (1<<2) /* 11025Hz */
+-#define SNDRV_PCM_RATE_16000 (1<<3) /* 16000Hz */
+-#define SNDRV_PCM_RATE_22050 (1<<4) /* 22050Hz */
+-#define SNDRV_PCM_RATE_32000 (1<<5) /* 32000Hz */
+-#define SNDRV_PCM_RATE_44100 (1<<6) /* 44100Hz */
+-#define SNDRV_PCM_RATE_48000 (1<<7) /* 48000Hz */
+-#define SNDRV_PCM_RATE_64000 (1<<8) /* 64000Hz */
+-#define SNDRV_PCM_RATE_88200 (1<<9) /* 88200Hz */
+-#define SNDRV_PCM_RATE_96000 (1<<10) /* 96000Hz */
+-#define SNDRV_PCM_RATE_176400 (1<<11) /* 176400Hz */
+-#define SNDRV_PCM_RATE_192000 (1<<12) /* 192000Hz */
+-#define SNDRV_PCM_RATE_352800 (1<<13) /* 352800Hz */
+-#define SNDRV_PCM_RATE_384000 (1<<14) /* 384000Hz */
+-
+-#define SNDRV_PCM_RATE_CONTINUOUS (1<<30) /* continuous range */
+-#define SNDRV_PCM_RATE_KNOT (1<<31) /* supports more non-continuos rates */
++#define SNDRV_PCM_RATE_5512 (1U<<0) /* 5512Hz */
++#define SNDRV_PCM_RATE_8000 (1U<<1) /* 8000Hz */
++#define SNDRV_PCM_RATE_11025 (1U<<2) /* 11025Hz */
++#define SNDRV_PCM_RATE_16000 (1U<<3) /* 16000Hz */
++#define SNDRV_PCM_RATE_22050 (1U<<4) /* 22050Hz */
++#define SNDRV_PCM_RATE_32000 (1U<<5) /* 32000Hz */
++#define SNDRV_PCM_RATE_44100 (1U<<6) /* 44100Hz */
++#define SNDRV_PCM_RATE_48000 (1U<<7) /* 48000Hz */
++#define SNDRV_PCM_RATE_64000 (1U<<8) /* 64000Hz */
++#define SNDRV_PCM_RATE_88200 (1U<<9) /* 88200Hz */
++#define SNDRV_PCM_RATE_96000 (1U<<10) /* 96000Hz */
++#define SNDRV_PCM_RATE_176400 (1U<<11) /* 176400Hz */
++#define SNDRV_PCM_RATE_192000 (1U<<12) /* 192000Hz */
++#define SNDRV_PCM_RATE_352800 (1U<<13) /* 352800Hz */
++#define SNDRV_PCM_RATE_384000 (1U<<14) /* 384000Hz */
++
++#define SNDRV_PCM_RATE_CONTINUOUS (1U<<30) /* continuous range */
++#define SNDRV_PCM_RATE_KNOT (1U<<31) /* supports more non-continuos rates */
+
+ #define SNDRV_PCM_RATE_8000_44100 (SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_11025|\
+ SNDRV_PCM_RATE_16000|SNDRV_PCM_RATE_22050|\
+@@ -398,6 +398,8 @@ struct snd_pcm_runtime {
+ wait_queue_head_t tsleep; /* transfer sleep */
+ struct fasync_struct *fasync;
+ bool stop_operating; /* sync_stop will be called */
++ struct mutex buffer_mutex; /* protect for buffer changes */
++ atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */
+
+ /* -- private section -- */
+ void *private_data;
+@@ -614,6 +616,7 @@ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream);
+ void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream);
+ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream);
+ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream);
++unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream);
+
+ /**
+ * snd_pcm_stream_lock_irqsave - Lock the PCM stream
+@@ -632,6 +635,20 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream);
+ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
+ unsigned long flags);
+
++/**
++ * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking
++ * @substream: PCM substream
++ * @flags: irq flags
++ *
++ * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with
++ * the single-depth lockdep subclass.
++ */
++#define snd_pcm_stream_lock_irqsave_nested(substream, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ flags = _snd_pcm_stream_lock_irqsave_nested(substream); \
++ } while (0)
++
+ /**
+ * snd_pcm_group_for_each_entry - iterate over the linked substreams
+ * @s: the iterator
+diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
+index 0dcb361a98bb3..ef3bb1bcea4e0 100644
+--- a/include/sound/soc-dai.h
++++ b/include/sound/soc-dai.h
+@@ -295,9 +295,9 @@ struct snd_soc_dai_ops {
+ unsigned int *rx_num, unsigned int *rx_slot);
+ int (*set_tristate)(struct snd_soc_dai *dai, int tristate);
+
+- int (*set_sdw_stream)(struct snd_soc_dai *dai,
+- void *stream, int direction);
+- void *(*get_sdw_stream)(struct snd_soc_dai *dai, int direction);
++ int (*set_stream)(struct snd_soc_dai *dai,
++ void *stream, int direction);
++ void *(*get_stream)(struct snd_soc_dai *dai, int direction);
+
+ /*
+ * DAI digital mute - optional.
+@@ -515,42 +515,42 @@ static inline void *snd_soc_dai_get_drvdata(struct snd_soc_dai *dai)
+ }
+
+ /**
+- * snd_soc_dai_set_sdw_stream() - Configures a DAI for SDW stream operation
++ * snd_soc_dai_set_stream() - Configures a DAI for stream operation
+ * @dai: DAI
+- * @stream: STREAM
++ * @stream: STREAM (opaque structure depending on DAI type)
+ * @direction: Stream direction(Playback/Capture)
+- * SoundWire subsystem doesn't have a notion of direction and we reuse
++ * Some subsystems, such as SoundWire, don't have a notion of direction and we reuse
+ * the ASoC stream direction to configure sink/source ports.
+ * Playback maps to source ports and Capture for sink ports.
+ *
+ * This should be invoked with NULL to clear the stream set previously.
+ * Returns 0 on success, a negative error code otherwise.
+ */
+-static inline int snd_soc_dai_set_sdw_stream(struct snd_soc_dai *dai,
+- void *stream, int direction)
++static inline int snd_soc_dai_set_stream(struct snd_soc_dai *dai,
++ void *stream, int direction)
+ {
+- if (dai->driver->ops->set_sdw_stream)
+- return dai->driver->ops->set_sdw_stream(dai, stream, direction);
++ if (dai->driver->ops->set_stream)
++ return dai->driver->ops->set_stream(dai, stream, direction);
+ else
+ return -ENOTSUPP;
+ }
+
+ /**
+- * snd_soc_dai_get_sdw_stream() - Retrieves SDW stream from DAI
++ * snd_soc_dai_get_stream() - Retrieves stream from DAI
+ * @dai: DAI
+ * @direction: Stream direction(Playback/Capture)
+ *
+ * This routine only retrieves that was previously configured
+- * with snd_soc_dai_get_sdw_stream()
++ * with snd_soc_dai_get_stream()
+ *
+ * Returns pointer to stream or an ERR_PTR value, e.g.
+ * ERR_PTR(-ENOTSUPP) if callback is not supported;
+ */
+-static inline void *snd_soc_dai_get_sdw_stream(struct snd_soc_dai *dai,
+- int direction)
++static inline void *snd_soc_dai_get_stream(struct snd_soc_dai *dai,
++ int direction)
+ {
+- if (dai->driver->ops->get_sdw_stream)
+- return dai->driver->ops->get_sdw_stream(dai, direction);
++ if (dai->driver->ops->get_stream)
++ return dai->driver->ops->get_stream(dai, direction);
+ else
+ return ERR_PTR(-ENOTSUPP);
+ }
+diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
+index c3039e97929a5..32e93d55acf73 100644
+--- a/include/sound/soc-dapm.h
++++ b/include/sound/soc-dapm.h
+@@ -16,6 +16,7 @@
+ #include <sound/asoc.h>
+
+ struct device;
++struct snd_pcm_substream;
+ struct snd_soc_pcm_runtime;
+ struct soc_enum;
+
+diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
+index e296a3949b18b..4bb3ebfdaa45e 100644
+--- a/include/sound/soc-dpcm.h
++++ b/include/sound/soc-dpcm.h
+@@ -101,6 +101,8 @@ struct snd_soc_dpcm_runtime {
+ enum snd_soc_dpcm_state state;
+
+ int trigger_pending; /* trigger cmd + 1 if pending, 0 if not */
++
++ int be_start; /* refcount protected by BE stream pcm lock */
+ };
+
+ #define for_each_dpcm_fe(be, stream, _dpcm) \
+@@ -121,6 +123,10 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe,
+ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe,
+ struct snd_soc_pcm_runtime *be, int stream);
+
++/* can this BE perform prepare */
++int snd_soc_dpcm_can_be_prepared(struct snd_soc_pcm_runtime *fe,
++ struct snd_soc_pcm_runtime *be, int stream);
++
+ /* is the current PCM operation for this FE ? */
+ int snd_soc_dpcm_fe_can_update(struct snd_soc_pcm_runtime *fe, int stream);
+
+diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h
+index 4afd667e124c2..3e8a85e1e8094 100644
+--- a/include/sound/soc-topology.h
++++ b/include/sound/soc-topology.h
+@@ -188,8 +188,7 @@ int snd_soc_tplg_widget_bind_event(struct snd_soc_dapm_widget *w,
+
+ #else
+
+-static inline int snd_soc_tplg_component_remove(struct snd_soc_component *comp,
+- u32 index)
++static inline int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
+ {
+ return 0;
+ }
+diff --git a/include/sound/soc.h b/include/sound/soc.h
+index 8e6dd8a257c56..5872a8864f3b6 100644
+--- a/include/sound/soc.h
++++ b/include/sound/soc.h
+@@ -893,8 +893,6 @@ struct snd_soc_card {
+ struct mutex pcm_mutex;
+ enum snd_soc_pcm_subclass pcm_subclass;
+
+- spinlock_t dpcm_lock;
+-
+ int (*probe)(struct snd_soc_card *card);
+ int (*late_probe)(struct snd_soc_card *card);
+ int (*remove)(struct snd_soc_card *card);
+diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
+index fb11c7693b257..c83bb58bfcd1f 100644
+--- a/include/target/target_core_base.h
++++ b/include/target/target_core_base.h
+@@ -812,8 +812,9 @@ struct se_device {
+ atomic_long_t read_bytes;
+ atomic_long_t write_bytes;
+ /* Active commands on this virtual SE device */
+- atomic_t simple_cmds;
+- atomic_t dev_ordered_sync;
++ atomic_t non_ordered;
++ bool ordered_sync_in_progress;
++ atomic_t delayed_cmd_count;
+ atomic_t dev_qf_count;
+ u32 export_count;
+ spinlock_t delayed_cmd_lock;
+@@ -834,6 +835,7 @@ struct se_device {
+ struct list_head dev_sep_list;
+ struct list_head dev_tmr_list;
+ struct work_struct qf_work_queue;
++ struct work_struct delayed_cmd_work;
+ struct list_head delayed_cmd_list;
+ struct list_head qf_cmd_list;
+ /* Pointer to associated SE HBA */
+@@ -863,6 +865,7 @@ struct se_device {
+ struct rcu_head rcu_head;
+ int queue_cnt;
+ struct se_device_queue *queues;
++ struct mutex lun_reset_mutex;
+ };
+
+ struct se_hba {
+diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
+index a23be89119aa5..26ec024c3d58a 100644
+--- a/include/trace/bpf_probe.h
++++ b/include/trace/bpf_probe.h
+@@ -21,6 +21,28 @@
+ #undef __get_bitmask
+ #define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
++#undef __get_sockaddr
++#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
++
++#undef __get_rel_dynamic_array
++#define __get_rel_dynamic_array(field) \
++ ((void *)(&__entry->__rel_loc_##field) + \
++ sizeof(__entry->__rel_loc_##field) + \
++ (__entry->__rel_loc_##field & 0xffff))
++
++#undef __get_rel_dynamic_array_len
++#define __get_rel_dynamic_array_len(field) \
++ ((__entry->__rel_loc_##field >> 16) & 0xffff)
++
++#undef __get_rel_str
++#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
++
++#undef __get_rel_bitmask
++#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
++
++#undef __get_rel_sockaddr
++#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
++
+ #undef __perf_count
+ #define __perf_count(c) (c)
+
+diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
+index 8f58fd95efc74..9271b5dfae4c4 100644
+--- a/include/trace/events/btrfs.h
++++ b/include/trace/events/btrfs.h
+@@ -96,7 +96,7 @@ struct btrfs_space_info;
+ EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \
+ EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \
+ EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \
+- EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \
++ EM( FLUSH_DELAYED_REFS, "FLUSH_DELAYED_REFS") \
+ EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
+ EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \
+ EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \
+diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
+index 7f42a3de59e6b..dd7d7c9efecdf 100644
+--- a/include/trace/events/cgroup.h
++++ b/include/trace/events/cgroup.h
+@@ -59,8 +59,8 @@ DECLARE_EVENT_CLASS(cgroup,
+
+ TP_STRUCT__entry(
+ __field( int, root )
+- __field( int, id )
+ __field( int, level )
++ __field( u64, id )
+ __string( path, path )
+ ),
+
+@@ -71,7 +71,7 @@ DECLARE_EVENT_CLASS(cgroup,
+ __assign_str(path, path);
+ ),
+
+- TP_printk("root=%d id=%d level=%d path=%s",
++ TP_printk("root=%d id=%llu level=%d path=%s",
+ __entry->root, __entry->id, __entry->level, __get_str(path))
+ );
+
+@@ -126,8 +126,8 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
+
+ TP_STRUCT__entry(
+ __field( int, dst_root )
+- __field( int, dst_id )
+ __field( int, dst_level )
++ __field( u64, dst_id )
+ __field( int, pid )
+ __string( dst_path, path )
+ __string( comm, task->comm )
+@@ -142,7 +142,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
+ __assign_str(comm, task->comm);
+ ),
+
+- TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s",
++ TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s",
+ __entry->dst_root, __entry->dst_id, __entry->dst_level,
+ __get_str(dst_path), __entry->pid, __get_str(comm))
+ );
+@@ -171,8 +171,8 @@ DECLARE_EVENT_CLASS(cgroup_event,
+
+ TP_STRUCT__entry(
+ __field( int, root )
+- __field( int, id )
+ __field( int, level )
++ __field( u64, id )
+ __string( path, path )
+ __field( int, val )
+ ),
+@@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(cgroup_event,
+ __entry->val = val;
+ ),
+
+- TP_printk("root=%d id=%d level=%d path=%s val=%d",
++ TP_printk("root=%d id=%llu level=%d path=%s val=%d",
+ __entry->root, __entry->id, __entry->level, __get_str(path),
+ __entry->val)
+ );
+diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
+index 0ea36b2b0662a..c649c7fcb9afb 100644
+--- a/include/trace/events/ext4.h
++++ b/include/trace/events/ext4.h
+@@ -95,6 +95,18 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
+ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
+ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
+
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_ENCRYPTED_FILENAME);
++TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
++
+ #define show_fc_reason(reason) \
+ __print_symbolic(reason, \
+ { EXT4_FC_REASON_XATTR, "XATTR"}, \
+@@ -105,7 +117,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
+ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \
+ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \
+ { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \
+- { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"})
++ { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \
++ { EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"})
+
+ TRACE_EVENT(ext4_other_inode_update_time,
+ TP_PROTO(struct inode *inode, ino_t orig_ino),
+@@ -2723,41 +2736,51 @@ TRACE_EVENT(ext4_fc_commit_stop,
+
+ #define FC_REASON_NAME_STAT(reason) \
+ show_fc_reason(reason), \
+- __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason]
++ __entry->fc_ineligible_rc[reason]
+
+ TRACE_EVENT(ext4_fc_stats,
+- TP_PROTO(struct super_block *sb),
+-
+- TP_ARGS(sb),
++ TP_PROTO(struct super_block *sb),
+
+- TP_STRUCT__entry(
+- __field(dev_t, dev)
+- __field(struct ext4_sb_info *, sbi)
+- __field(int, count)
+- ),
++ TP_ARGS(sb),
+
+- TP_fast_assign(
+- __entry->dev = sb->s_dev;
+- __entry->sbi = EXT4_SB(sb);
+- ),
++ TP_STRUCT__entry(
++ __field(dev_t, dev)
++ __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX)
++ __field(unsigned long, fc_commits)
++ __field(unsigned long, fc_ineligible_commits)
++ __field(unsigned long, fc_numblks)
++ ),
+
+- TP_printk("dev %d:%d fc ineligible reasons:\n"
+- "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; "
+- "num_commits:%ld, ineligible: %ld, numblks: %ld",
+- MAJOR(__entry->dev), MINOR(__entry->dev),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
+- FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
+- __entry->sbi->s_fc_stats.fc_num_commits,
+- __entry->sbi->s_fc_stats.fc_ineligible_commits,
+- __entry->sbi->s_fc_stats.fc_numblks)
++ TP_fast_assign(
++ int i;
+
++ __entry->dev = sb->s_dev;
++ for (i = 0; i < EXT4_FC_REASON_MAX; i++) {
++ __entry->fc_ineligible_rc[i] =
++ EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i];
++ }
++ __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits;
++ __entry->fc_ineligible_commits =
++ EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits;
++ __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks;
++ ),
++
++ TP_printk("dev %d,%d fc ineligible reasons:\n"
++ "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u"
++ "num_commits:%lu, ineligible: %lu, numblks: %lu",
++ MAJOR(__entry->dev), MINOR(__entry->dev),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
++ FC_REASON_NAME_STAT(EXT4_FC_REASON_ENCRYPTED_FILENAME),
++ __entry->fc_commits, __entry->fc_ineligible_commits,
++ __entry->fc_numblks)
+ );
+
+ #define DEFINE_TRACE_DENTRY_EVENT(__type) \
+diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
+index 4e881d91c8744..f5dcf7c9b7076 100644
+--- a/include/trace/events/f2fs.h
++++ b/include/trace/events/f2fs.h
+@@ -513,7 +513,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+- __field(nid_t, nid[3])
++ __array(nid_t, nid, 3)
+ __field(int, depth)
+ __field(int, err)
+ ),
+@@ -807,20 +807,20 @@ TRACE_EVENT(f2fs_lookup_start,
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+- __field(const char *, name)
++ __string(name, dentry->d_name.name)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+- __entry->name = dentry->d_name.name;
++ __assign_str(name, dentry->d_name.name);
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u",
+ show_dev_ino(__entry),
+- __entry->name,
++ __get_str(name),
+ __entry->flags)
+ );
+
+@@ -834,7 +834,7 @@ TRACE_EVENT(f2fs_lookup_end,
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+- __field(const char *, name)
++ __string(name, dentry->d_name.name)
+ __field(nid_t, cino)
+ __field(int, err)
+ ),
+@@ -842,14 +842,14 @@ TRACE_EVENT(f2fs_lookup_end,
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+- __entry->name = dentry->d_name.name;
++ __assign_str(name, dentry->d_name.name);
+ __entry->cino = ino;
+ __entry->err = err;
+ ),
+
+ TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d",
+ show_dev_ino(__entry),
+- __entry->name,
++ __get_str(name),
+ __entry->cino,
+ __entry->err)
+ );
+diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
+index 59363a083ecb9..d92691c78cff6 100644
+--- a/include/trace/events/ib_mad.h
++++ b/include/trace/events/ib_mad.h
+@@ -49,7 +49,6 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
+ __field(int, retries_left)
+ __field(int, max_retries)
+ __field(int, retry)
+- __field(u16, pkey)
+ ),
+
+ TP_fast_assign(
+@@ -89,7 +88,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
+ "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \
+ "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \
+ "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\
+- "pkey 0x%x rpqn 0x%x rqpkey 0x%x",
++ "rpqn 0x%x rqpkey 0x%x",
+ __entry->dev_index, __entry->port_num, __entry->qp_num,
+ __entry->agent_priv, be64_to_cpu(__entry->wrtid),
+ __entry->retries_left, __entry->max_retries,
+@@ -100,7 +99,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
+ be16_to_cpu(__entry->class_specific),
+ be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
+ be32_to_cpu(__entry->attr_mod),
+- be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
++ be32_to_cpu(__entry->dlid), __entry->sl,
+ __entry->rqpn, __entry->rqkey
+ )
+ );
+@@ -204,7 +203,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
+ __field(u16, wc_status)
+ __field(u32, slid)
+ __field(u32, dev_index)
+- __field(u16, pkey)
+ ),
+
+ TP_fast_assign(
+@@ -224,9 +222,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
+ __entry->slid = wc->slid;
+ __entry->src_qp = wc->src_qp;
+ __entry->sl = wc->sl;
+- ib_query_pkey(qp_info->port_priv->device,
+- qp_info->port_priv->port_num,
+- wc->pkey_index, &__entry->pkey);
+ __entry->wc_status = wc->status;
+ ),
+
+@@ -234,7 +229,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
+ "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \
+ "method 0x%02x status 0x%04x class_specific 0x%04x " \
+ "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \
+- "slid 0x%08x src QP%d, sl %d pkey 0x%04x",
++ "slid 0x%08x src QP%d, sl %d",
+ __entry->dev_index, __entry->port_num, __entry->qp_num,
+ __entry->wc_status,
+ __entry->length,
+@@ -244,7 +239,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
+ be16_to_cpu(__entry->class_specific),
+ be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
+ be32_to_cpu(__entry->attr_mod),
+- __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey
++ __entry->slid, __entry->src_qp, __entry->sl
+ )
+ );
+
+diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
+index a4dfe005983d3..34ce197bd76e0 100644
+--- a/include/trace/events/jbd2.h
++++ b/include/trace/events/jbd2.h
+@@ -40,7 +40,7 @@ DECLARE_EVENT_CLASS(jbd2_commit,
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( char, sync_commit )
+- __field( int, transaction )
++ __field( tid_t, transaction )
+ ),
+
+ TP_fast_assign(
+@@ -49,7 +49,7 @@ DECLARE_EVENT_CLASS(jbd2_commit,
+ __entry->transaction = commit_transaction->t_tid;
+ ),
+
+- TP_printk("dev %d,%d transaction %d sync %d",
++ TP_printk("dev %d,%d transaction %u sync %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->transaction, __entry->sync_commit)
+ );
+@@ -97,8 +97,8 @@ TRACE_EVENT(jbd2_end_commit,
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( char, sync_commit )
+- __field( int, transaction )
+- __field( int, head )
++ __field( tid_t, transaction )
++ __field( tid_t, head )
+ ),
+
+ TP_fast_assign(
+@@ -108,7 +108,7 @@ TRACE_EVENT(jbd2_end_commit,
+ __entry->head = journal->j_tail_sequence;
+ ),
+
+- TP_printk("dev %d,%d transaction %d sync %d head %d",
++ TP_printk("dev %d,%d transaction %u sync %d head %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->transaction, __entry->sync_commit, __entry->head)
+ );
+@@ -134,14 +134,14 @@ TRACE_EVENT(jbd2_submit_inode_data,
+ );
+
+ DECLARE_EVENT_CLASS(jbd2_handle_start_class,
+- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
+ unsigned int line_no, int requested_blocks),
+
+ TP_ARGS(dev, tid, type, line_no, requested_blocks),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+- __field( unsigned long, tid )
++ __field( tid_t, tid )
+ __field( unsigned int, type )
+ __field( unsigned int, line_no )
+ __field( int, requested_blocks)
+@@ -155,28 +155,28 @@ DECLARE_EVENT_CLASS(jbd2_handle_start_class,
+ __entry->requested_blocks = requested_blocks;
+ ),
+
+- TP_printk("dev %d,%d tid %lu type %u line_no %u "
++ TP_printk("dev %d,%d tid %u type %u line_no %u "
+ "requested_blocks %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+ __entry->type, __entry->line_no, __entry->requested_blocks)
+ );
+
+ DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_start,
+- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
+ unsigned int line_no, int requested_blocks),
+
+ TP_ARGS(dev, tid, type, line_no, requested_blocks)
+ );
+
+ DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_restart,
+- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
+ unsigned int line_no, int requested_blocks),
+
+ TP_ARGS(dev, tid, type, line_no, requested_blocks)
+ );
+
+ TRACE_EVENT(jbd2_handle_extend,
+- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
+ unsigned int line_no, int buffer_credits,
+ int requested_blocks),
+
+@@ -184,7 +184,7 @@ TRACE_EVENT(jbd2_handle_extend,
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+- __field( unsigned long, tid )
++ __field( tid_t, tid )
+ __field( unsigned int, type )
+ __field( unsigned int, line_no )
+ __field( int, buffer_credits )
+@@ -200,7 +200,7 @@ TRACE_EVENT(jbd2_handle_extend,
+ __entry->requested_blocks = requested_blocks;
+ ),
+
+- TP_printk("dev %d,%d tid %lu type %u line_no %u "
++ TP_printk("dev %d,%d tid %u type %u line_no %u "
+ "buffer_credits %d requested_blocks %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+ __entry->type, __entry->line_no, __entry->buffer_credits,
+@@ -208,7 +208,7 @@ TRACE_EVENT(jbd2_handle_extend,
+ );
+
+ TRACE_EVENT(jbd2_handle_stats,
+- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type,
+ unsigned int line_no, int interval, int sync,
+ int requested_blocks, int dirtied_blocks),
+
+@@ -217,7 +217,7 @@ TRACE_EVENT(jbd2_handle_stats,
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+- __field( unsigned long, tid )
++ __field( tid_t, tid )
+ __field( unsigned int, type )
+ __field( unsigned int, line_no )
+ __field( int, interval )
+@@ -237,7 +237,7 @@ TRACE_EVENT(jbd2_handle_stats,
+ __entry->dirtied_blocks = dirtied_blocks;
+ ),
+
+- TP_printk("dev %d,%d tid %lu type %u line_no %u interval %d "
++ TP_printk("dev %d,%d tid %u type %u line_no %u interval %d "
+ "sync %d requested_blocks %d dirtied_blocks %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+ __entry->type, __entry->line_no, __entry->interval,
+@@ -246,14 +246,14 @@ TRACE_EVENT(jbd2_handle_stats,
+ );
+
+ TRACE_EVENT(jbd2_run_stats,
+- TP_PROTO(dev_t dev, unsigned long tid,
++ TP_PROTO(dev_t dev, tid_t tid,
+ struct transaction_run_stats_s *stats),
+
+ TP_ARGS(dev, tid, stats),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+- __field( unsigned long, tid )
++ __field( tid_t, tid )
+ __field( unsigned long, wait )
+ __field( unsigned long, request_delay )
+ __field( unsigned long, running )
+@@ -279,7 +279,7 @@ TRACE_EVENT(jbd2_run_stats,
+ __entry->blocks_logged = stats->rs_blocks_logged;
+ ),
+
+- TP_printk("dev %d,%d tid %lu wait %u request_delay %u running %u "
++ TP_printk("dev %d,%d tid %u wait %u request_delay %u running %u "
+ "locked %u flushing %u logging %u handle_count %u "
+ "blocks %u blocks_logged %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+@@ -294,14 +294,14 @@ TRACE_EVENT(jbd2_run_stats,
+ );
+
+ TRACE_EVENT(jbd2_checkpoint_stats,
+- TP_PROTO(dev_t dev, unsigned long tid,
++ TP_PROTO(dev_t dev, tid_t tid,
+ struct transaction_chp_stats_s *stats),
+
+ TP_ARGS(dev, tid, stats),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+- __field( unsigned long, tid )
++ __field( tid_t, tid )
+ __field( unsigned long, chp_time )
+ __field( __u32, forced_to_close )
+ __field( __u32, written )
+@@ -317,7 +317,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
+ __entry->dropped = stats->cs_dropped;
+ ),
+
+- TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
++ TP_printk("dev %d,%d tid %u chp_time %u forced_to_close %u "
+ "written %u dropped %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+ jiffies_to_msecs(__entry->chp_time),
+@@ -462,11 +462,9 @@ TRACE_EVENT(jbd2_shrink_scan_exit,
+ TRACE_EVENT(jbd2_shrink_checkpoint_list,
+
+ TP_PROTO(journal_t *journal, tid_t first_tid, tid_t tid, tid_t last_tid,
+- unsigned long nr_freed, unsigned long nr_scanned,
+- tid_t next_tid),
++ unsigned long nr_freed, tid_t next_tid),
+
+- TP_ARGS(journal, first_tid, tid, last_tid, nr_freed,
+- nr_scanned, next_tid),
++ TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, next_tid),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+@@ -474,7 +472,6 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
+ __field(tid_t, tid)
+ __field(tid_t, last_tid)
+ __field(unsigned long, nr_freed)
+- __field(unsigned long, nr_scanned)
+ __field(tid_t, next_tid)
+ ),
+
+@@ -484,15 +481,14 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
+ __entry->tid = tid;
+ __entry->last_tid = last_tid;
+ __entry->nr_freed = nr_freed;
+- __entry->nr_scanned = nr_scanned;
+ __entry->next_tid = next_tid;
+ ),
+
+ TP_printk("dev %d,%d shrink transaction %u-%u(%u) freed %lu "
+- "scanned %lu next transaction %u",
++ "next transaction %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->first_tid, __entry->tid, __entry->last_tid,
+- __entry->nr_freed, __entry->nr_scanned, __entry->next_tid)
++ __entry->nr_freed, __entry->next_tid)
+ );
+
+ #endif /* _TRACE_JBD2_H */
+diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h
+index ab69434e2329e..72e785a903b65 100644
+--- a/include/trace/events/libata.h
++++ b/include/trace/events/libata.h
+@@ -249,6 +249,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template,
+ __entry->hob_feature = qc->result_tf.hob_feature;
+ __entry->nsect = qc->result_tf.nsect;
+ __entry->hob_nsect = qc->result_tf.hob_nsect;
++ __entry->flags = qc->flags;
+ ),
+
+ TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \
+diff --git a/include/trace/events/qrtr.h b/include/trace/events/qrtr.h
+index b1de14c3bb934..441132c67133f 100644
+--- a/include/trace/events/qrtr.h
++++ b/include/trace/events/qrtr.h
+@@ -10,15 +10,16 @@
+
+ TRACE_EVENT(qrtr_ns_service_announce_new,
+
+- TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port),
++ TP_PROTO(unsigned int service, unsigned int instance,
++ unsigned int node, unsigned int port),
+
+ TP_ARGS(service, instance, node, port),
+
+ TP_STRUCT__entry(
+- __field(__le32, service)
+- __field(__le32, instance)
+- __field(__le32, node)
+- __field(__le32, port)
++ __field(unsigned int, service)
++ __field(unsigned int, instance)
++ __field(unsigned int, node)
++ __field(unsigned int, port)
+ ),
+
+ TP_fast_assign(
+@@ -36,15 +37,16 @@ TRACE_EVENT(qrtr_ns_service_announce_new,
+
+ TRACE_EVENT(qrtr_ns_service_announce_del,
+
+- TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port),
++ TP_PROTO(unsigned int service, unsigned int instance,
++ unsigned int node, unsigned int port),
+
+ TP_ARGS(service, instance, node, port),
+
+ TP_STRUCT__entry(
+- __field(__le32, service)
+- __field(__le32, instance)
+- __field(__le32, node)
+- __field(__le32, port)
++ __field(unsigned int, service)
++ __field(unsigned int, instance)
++ __field(unsigned int, node)
++ __field(unsigned int, port)
+ ),
+
+ TP_fast_assign(
+@@ -62,15 +64,16 @@ TRACE_EVENT(qrtr_ns_service_announce_del,
+
+ TRACE_EVENT(qrtr_ns_server_add,
+
+- TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port),
++ TP_PROTO(unsigned int service, unsigned int instance,
++ unsigned int node, unsigned int port),
+
+ TP_ARGS(service, instance, node, port),
+
+ TP_STRUCT__entry(
+- __field(__le32, service)
+- __field(__le32, instance)
+- __field(__le32, node)
+- __field(__le32, port)
++ __field(unsigned int, service)
++ __field(unsigned int, instance)
++ __field(unsigned int, node)
++ __field(unsigned int, port)
+ ),
+
+ TP_fast_assign(
+diff --git a/include/trace/events/random.h b/include/trace/events/random.h
+deleted file mode 100644
+index 3d7b432ca5f31..0000000000000
+--- a/include/trace/events/random.h
++++ /dev/null
+@@ -1,247 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#undef TRACE_SYSTEM
+-#define TRACE_SYSTEM random
+-
+-#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ)
+-#define _TRACE_RANDOM_H
+-
+-#include <linux/writeback.h>
+-#include <linux/tracepoint.h>
+-
+-TRACE_EVENT(add_device_randomness,
+- TP_PROTO(int bytes, unsigned long IP),
+-
+- TP_ARGS(bytes, IP),
+-
+- TP_STRUCT__entry(
+- __field( int, bytes )
+- __field(unsigned long, IP )
+- ),
+-
+- TP_fast_assign(
+- __entry->bytes = bytes;
+- __entry->IP = IP;
+- ),
+-
+- TP_printk("bytes %d caller %pS",
+- __entry->bytes, (void *)__entry->IP)
+-);
+-
+-DECLARE_EVENT_CLASS(random__mix_pool_bytes,
+- TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+-
+- TP_ARGS(pool_name, bytes, IP),
+-
+- TP_STRUCT__entry(
+- __field( const char *, pool_name )
+- __field( int, bytes )
+- __field(unsigned long, IP )
+- ),
+-
+- TP_fast_assign(
+- __entry->pool_name = pool_name;
+- __entry->bytes = bytes;
+- __entry->IP = IP;
+- ),
+-
+- TP_printk("%s pool: bytes %d caller %pS",
+- __entry->pool_name, __entry->bytes, (void *)__entry->IP)
+-);
+-
+-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
+- TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+-
+- TP_ARGS(pool_name, bytes, IP)
+-);
+-
+-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
+- TP_PROTO(const char *pool_name, int bytes, unsigned long IP),
+-
+- TP_ARGS(pool_name, bytes, IP)
+-);
+-
+-TRACE_EVENT(credit_entropy_bits,
+- TP_PROTO(const char *pool_name, int bits, int entropy_count,
+- unsigned long IP),
+-
+- TP_ARGS(pool_name, bits, entropy_count, IP),
+-
+- TP_STRUCT__entry(
+- __field( const char *, pool_name )
+- __field( int, bits )
+- __field( int, entropy_count )
+- __field(unsigned long, IP )
+- ),
+-
+- TP_fast_assign(
+- __entry->pool_name = pool_name;
+- __entry->bits = bits;
+- __entry->entropy_count = entropy_count;
+- __entry->IP = IP;
+- ),
+-
+- TP_printk("%s pool: bits %d entropy_count %d caller %pS",
+- __entry->pool_name, __entry->bits,
+- __entry->entropy_count, (void *)__entry->IP)
+-);
+-
+-TRACE_EVENT(debit_entropy,
+- TP_PROTO(const char *pool_name, int debit_bits),
+-
+- TP_ARGS(pool_name, debit_bits),
+-
+- TP_STRUCT__entry(
+- __field( const char *, pool_name )
+- __field( int, debit_bits )
+- ),
+-
+- TP_fast_assign(
+- __entry->pool_name = pool_name;
+- __entry->debit_bits = debit_bits;
+- ),
+-
+- TP_printk("%s: debit_bits %d", __entry->pool_name,
+- __entry->debit_bits)
+-);
+-
+-TRACE_EVENT(add_input_randomness,
+- TP_PROTO(int input_bits),
+-
+- TP_ARGS(input_bits),
+-
+- TP_STRUCT__entry(
+- __field( int, input_bits )
+- ),
+-
+- TP_fast_assign(
+- __entry->input_bits = input_bits;
+- ),
+-
+- TP_printk("input_pool_bits %d", __entry->input_bits)
+-);
+-
+-TRACE_EVENT(add_disk_randomness,
+- TP_PROTO(dev_t dev, int input_bits),
+-
+- TP_ARGS(dev, input_bits),
+-
+- TP_STRUCT__entry(
+- __field( dev_t, dev )
+- __field( int, input_bits )
+- ),
+-
+- TP_fast_assign(
+- __entry->dev = dev;
+- __entry->input_bits = input_bits;
+- ),
+-
+- TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev),
+- MINOR(__entry->dev), __entry->input_bits)
+-);
+-
+-DECLARE_EVENT_CLASS(random__get_random_bytes,
+- TP_PROTO(int nbytes, unsigned long IP),
+-
+- TP_ARGS(nbytes, IP),
+-
+- TP_STRUCT__entry(
+- __field( int, nbytes )
+- __field(unsigned long, IP )
+- ),
+-
+- TP_fast_assign(
+- __entry->nbytes = nbytes;
+- __entry->IP = IP;
+- ),
+-
+- TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP)
+-);
+-
+-DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
+- TP_PROTO(int nbytes, unsigned long IP),
+-
+- TP_ARGS(nbytes, IP)
+-);
+-
+-DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
+- TP_PROTO(int nbytes, unsigned long IP),
+-
+- TP_ARGS(nbytes, IP)
+-);
+-
+-DECLARE_EVENT_CLASS(random__extract_entropy,
+- TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
+- unsigned long IP),
+-
+- TP_ARGS(pool_name, nbytes, entropy_count, IP),
+-
+- TP_STRUCT__entry(
+- __field( const char *, pool_name )
+- __field( int, nbytes )
+- __field( int, entropy_count )
+- __field(unsigned long, IP )
+- ),
+-
+- TP_fast_assign(
+- __entry->pool_name = pool_name;
+- __entry->nbytes = nbytes;
+- __entry->entropy_count = entropy_count;
+- __entry->IP = IP;
+- ),
+-
+- TP_printk("%s pool: nbytes %d entropy_count %d caller %pS",
+- __entry->pool_name, __entry->nbytes, __entry->entropy_count,
+- (void *)__entry->IP)
+-);
+-
+-
+-DEFINE_EVENT(random__extract_entropy, extract_entropy,
+- TP_PROTO(const char *pool_name, int nbytes, int entropy_count,
+- unsigned long IP),
+-
+- TP_ARGS(pool_name, nbytes, entropy_count, IP)
+-);
+-
+-TRACE_EVENT(urandom_read,
+- TP_PROTO(int got_bits, int pool_left, int input_left),
+-
+- TP_ARGS(got_bits, pool_left, input_left),
+-
+- TP_STRUCT__entry(
+- __field( int, got_bits )
+- __field( int, pool_left )
+- __field( int, input_left )
+- ),
+-
+- TP_fast_assign(
+- __entry->got_bits = got_bits;
+- __entry->pool_left = pool_left;
+- __entry->input_left = input_left;
+- ),
+-
+- TP_printk("got_bits %d nonblocking_pool_entropy_left %d "
+- "input_entropy_left %d", __entry->got_bits,
+- __entry->pool_left, __entry->input_left)
+-);
+-
+-TRACE_EVENT(prandom_u32,
+-
+- TP_PROTO(unsigned int ret),
+-
+- TP_ARGS(ret),
+-
+- TP_STRUCT__entry(
+- __field( unsigned int, ret)
+- ),
+-
+- TP_fast_assign(
+- __entry->ret = ret;
+- ),
+-
+- TP_printk("ret=%u" , __entry->ret)
+-);
+-
+-#endif /* _TRACE_RANDOM_H */
+-
+-/* This part must be outside protection */
+-#include <trace/define_trace.h>
+diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
+index 670e41783edd8..fec252b1e5b8d 100644
+--- a/include/trace/events/rcu.h
++++ b/include/trace/events/rcu.h
+@@ -768,7 +768,7 @@ TRACE_EVENT_RCU(rcu_torture_read,
+ TP_ARGS(rcutorturename, rhp, secs, c_old, c),
+
+ TP_STRUCT__entry(
+- __field(char, rcutorturename[RCUTORTURENAME_LEN])
++ __array(char, rcutorturename, RCUTORTURENAME_LEN)
+ __field(struct rcu_head *, rhp)
+ __field(unsigned long, secs)
+ __field(unsigned long, c_old)
+diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
+index e70c90116edae..221856f2d295c 100644
+--- a/include/trace/events/rxrpc.h
++++ b/include/trace/events/rxrpc.h
+@@ -83,12 +83,15 @@ enum rxrpc_call_trace {
+ rxrpc_call_error,
+ rxrpc_call_got,
+ rxrpc_call_got_kernel,
++ rxrpc_call_got_timer,
+ rxrpc_call_got_userid,
+ rxrpc_call_new_client,
+ rxrpc_call_new_service,
+ rxrpc_call_put,
+ rxrpc_call_put_kernel,
+ rxrpc_call_put_noqueue,
++ rxrpc_call_put_notimer,
++ rxrpc_call_put_timer,
+ rxrpc_call_put_userid,
+ rxrpc_call_queued,
+ rxrpc_call_queued_ref,
+@@ -278,12 +281,15 @@ enum rxrpc_tx_point {
+ EM(rxrpc_call_error, "*E*") \
+ EM(rxrpc_call_got, "GOT") \
+ EM(rxrpc_call_got_kernel, "Gke") \
++ EM(rxrpc_call_got_timer, "GTM") \
+ EM(rxrpc_call_got_userid, "Gus") \
+ EM(rxrpc_call_new_client, "NWc") \
+ EM(rxrpc_call_new_service, "NWs") \
+ EM(rxrpc_call_put, "PUT") \
+ EM(rxrpc_call_put_kernel, "Pke") \
+- EM(rxrpc_call_put_noqueue, "PNQ") \
++ EM(rxrpc_call_put_noqueue, "PnQ") \
++ EM(rxrpc_call_put_notimer, "PnT") \
++ EM(rxrpc_call_put_timer, "PTM") \
+ EM(rxrpc_call_put_userid, "Pus") \
+ EM(rxrpc_call_queued, "QUE") \
+ EM(rxrpc_call_queued_ref, "QUR") \
+@@ -577,7 +583,7 @@ TRACE_EVENT(rxrpc_client,
+ TP_fast_assign(
+ __entry->conn = conn ? conn->debug_id : 0;
+ __entry->channel = channel;
+- __entry->usage = conn ? atomic_read(&conn->usage) : -2;
++ __entry->usage = conn ? refcount_read(&conn->ref) : -2;
+ __entry->op = op;
+ __entry->cid = conn ? conn->proto.cid : 0;
+ ),
+@@ -1503,7 +1509,7 @@ TRACE_EVENT(rxrpc_call_reset,
+ __entry->call_serial = call->rx_serial;
+ __entry->conn_serial = call->conn->hi_serial;
+ __entry->tx_seq = call->tx_hard_ack;
+- __entry->rx_seq = call->ackr_seen;
++ __entry->rx_seq = call->rx_hard_ack;
+ ),
+
+ TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x",
+diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
+index 9e92f22eb086c..485a1d3034a4b 100644
+--- a/include/trace/events/skb.h
++++ b/include/trace/events/skb.h
+@@ -9,29 +9,63 @@
+ #include <linux/netdevice.h>
+ #include <linux/tracepoint.h>
+
++#define TRACE_SKB_DROP_REASON \
++ EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \
++ EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \
++ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \
++ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
++ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \
++ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \
++ EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \
++ EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \
++ EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \
++ EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \
++ EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \
++ EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \
++ UNICAST_IN_L2_MULTICAST) \
++ EMe(SKB_DROP_REASON_MAX, MAX)
++
++#undef EM
++#undef EMe
++
++#define EM(a, b) TRACE_DEFINE_ENUM(a);
++#define EMe(a, b) TRACE_DEFINE_ENUM(a);
++
++TRACE_SKB_DROP_REASON
++
++#undef EM
++#undef EMe
++#define EM(a, b) { a, #b },
++#define EMe(a, b) { a, #b }
++
+ /*
+ * Tracepoint for free an sk_buff:
+ */
+ TRACE_EVENT(kfree_skb,
+
+- TP_PROTO(struct sk_buff *skb, void *location),
++ TP_PROTO(struct sk_buff *skb, void *location,
++ enum skb_drop_reason reason),
+
+- TP_ARGS(skb, location),
++ TP_ARGS(skb, location, reason),
+
+ TP_STRUCT__entry(
+- __field( void *, skbaddr )
+- __field( void *, location )
+- __field( unsigned short, protocol )
++ __field(void *, skbaddr)
++ __field(void *, location)
++ __field(unsigned short, protocol)
++ __field(enum skb_drop_reason, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->skbaddr = skb;
+ __entry->location = location;
+ __entry->protocol = ntohs(skb->protocol);
++ __entry->reason = reason;
+ ),
+
+- TP_printk("skbaddr=%p protocol=%u location=%p",
+- __entry->skbaddr, __entry->protocol, __entry->location)
++ TP_printk("skbaddr=%p protocol=%u location=%p reason: %s",
++ __entry->skbaddr, __entry->protocol, __entry->location,
++ __print_symbolic(__entry->reason,
++ TRACE_SKB_DROP_REASON))
+ );
+
+ TRACE_EVENT(consume_skb,
+diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
+index 12c315782766a..777ee6cbe9330 100644
+--- a/include/trace/events/sock.h
++++ b/include/trace/events/sock.h
+@@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit,
+
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+- __field(long *, sysctl_mem)
++ __array(long, sysctl_mem, 3)
+ __field(long, allocated)
+ __field(int, sysctl_rmem)
+ __field(int, rmem_alloc)
+@@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit,
+
+ TP_fast_assign(
+ strncpy(__entry->name, prot->name, 32);
+- __entry->sysctl_mem = prot->sysctl_mem;
++ __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]);
++ __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]);
++ __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]);
+ __entry->allocated = allocated;
+ __entry->sysctl_rmem = sk_get_rmem0(sk, prot);
+ __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
+diff --git a/include/trace/events/spmi.h b/include/trace/events/spmi.h
+index 8b60efe18ba68..a6819fd85cdf4 100644
+--- a/include/trace/events/spmi.h
++++ b/include/trace/events/spmi.h
+@@ -21,15 +21,15 @@ TRACE_EVENT(spmi_write_begin,
+ __field ( u8, sid )
+ __field ( u16, addr )
+ __field ( u8, len )
+- __dynamic_array ( u8, buf, len + 1 )
++ __dynamic_array ( u8, buf, len )
+ ),
+
+ TP_fast_assign(
+ __entry->opcode = opcode;
+ __entry->sid = sid;
+ __entry->addr = addr;
+- __entry->len = len + 1;
+- memcpy(__get_dynamic_array(buf), buf, len + 1);
++ __entry->len = len;
++ memcpy(__get_dynamic_array(buf), buf, len);
+ ),
+
+ TP_printk("opc=%d sid=%02d addr=0x%04x len=%d buf=0x[%*phD]",
+@@ -92,7 +92,7 @@ TRACE_EVENT(spmi_read_end,
+ __field ( u16, addr )
+ __field ( int, ret )
+ __field ( u8, len )
+- __dynamic_array ( u8, buf, len + 1 )
++ __dynamic_array ( u8, buf, len )
+ ),
+
+ TP_fast_assign(
+@@ -100,8 +100,8 @@ TRACE_EVENT(spmi_read_end,
+ __entry->sid = sid;
+ __entry->addr = addr;
+ __entry->ret = ret;
+- __entry->len = len + 1;
+- memcpy(__get_dynamic_array(buf), buf, len + 1);
++ __entry->len = len;
++ memcpy(__get_dynamic_array(buf), buf, len);
+ ),
+
+ TP_printk("opc=%d sid=%02d addr=0x%04x ret=%d len=%02d buf=0x[%*phD]",
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index 2d04eb96d4183..f09bbb6c918e2 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -925,18 +925,19 @@ TRACE_EVENT(rpc_socket_nospace,
+
+ #define rpc_show_xprt_state(x) \
+ __print_flags(x, "|", \
+- { (1UL << XPRT_LOCKED), "LOCKED"}, \
+- { (1UL << XPRT_CONNECTED), "CONNECTED"}, \
+- { (1UL << XPRT_CONNECTING), "CONNECTING"}, \
+- { (1UL << XPRT_CLOSE_WAIT), "CLOSE_WAIT"}, \
+- { (1UL << XPRT_BOUND), "BOUND"}, \
+- { (1UL << XPRT_BINDING), "BINDING"}, \
+- { (1UL << XPRT_CLOSING), "CLOSING"}, \
+- { (1UL << XPRT_OFFLINE), "OFFLINE"}, \
+- { (1UL << XPRT_REMOVE), "REMOVE"}, \
+- { (1UL << XPRT_CONGESTED), "CONGESTED"}, \
+- { (1UL << XPRT_CWND_WAIT), "CWND_WAIT"}, \
+- { (1UL << XPRT_WRITE_SPACE), "WRITE_SPACE"})
++ { BIT(XPRT_LOCKED), "LOCKED" }, \
++ { BIT(XPRT_CONNECTED), "CONNECTED" }, \
++ { BIT(XPRT_CONNECTING), "CONNECTING" }, \
++ { BIT(XPRT_CLOSE_WAIT), "CLOSE_WAIT" }, \
++ { BIT(XPRT_BOUND), "BOUND" }, \
++ { BIT(XPRT_BINDING), "BINDING" }, \
++ { BIT(XPRT_CLOSING), "CLOSING" }, \
++ { BIT(XPRT_OFFLINE), "OFFLINE" }, \
++ { BIT(XPRT_REMOVE), "REMOVE" }, \
++ { BIT(XPRT_CONGESTED), "CONGESTED" }, \
++ { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \
++ { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \
++ { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" })
+
+ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
+ TP_PROTO(
+@@ -975,7 +976,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect);
+ DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto);
+ DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done);
+ DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force);
+-DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup);
+ DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy);
+
+ DECLARE_EVENT_CLASS(rpc_xprt_event,
+@@ -1133,8 +1133,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
+ __entry->task_id = -1;
+ __entry->client_id = -1;
+ }
+- __entry->snd_task_id = xprt->snd_task ?
+- xprt->snd_task->tk_pid : -1;
++ if (xprt->snd_task &&
++ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
++ __entry->snd_task_id = xprt->snd_task->tk_pid;
++ else
++ __entry->snd_task_id = -1;
+ ),
+
+ TP_printk("task:%u@%u snd_task:%u",
+@@ -1178,8 +1181,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event,
+ __entry->task_id = -1;
+ __entry->client_id = -1;
+ }
+- __entry->snd_task_id = xprt->snd_task ?
+- xprt->snd_task->tk_pid : -1;
++ if (xprt->snd_task &&
++ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
++ __entry->snd_task_id = xprt->snd_task->tk_pid;
++ else
++ __entry->snd_task_id = -1;
++
+ __entry->cong = xprt->cong;
+ __entry->cwnd = xprt->cwnd;
+ __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
+@@ -1719,10 +1726,11 @@ TRACE_EVENT(svc_xprt_create_err,
+ const char *program,
+ const char *protocol,
+ struct sockaddr *sap,
++ size_t salen,
+ const struct svc_xprt *xprt
+ ),
+
+- TP_ARGS(program, protocol, sap, xprt),
++ TP_ARGS(program, protocol, sap, salen, xprt),
+
+ TP_STRUCT__entry(
+ __field(long, error)
+@@ -1735,7 +1743,7 @@ TRACE_EVENT(svc_xprt_create_err,
+ __entry->error = PTR_ERR(xprt);
+ __assign_str(program, program);
+ __assign_str(protocol, protocol);
+- memcpy(__entry->addr, sap, sizeof(__entry->addr));
++ memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr)));
+ ),
+
+ TP_printk("addr=%pISpc program=%s protocol=%s error=%ld",
+@@ -1915,18 +1923,17 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
+ TP_STRUCT__entry(
+ __field(const void *, dr)
+ __field(u32, xid)
+- __string(addr, dr->xprt->xpt_remotebuf)
++ __sockaddr(addr, dr->addrlen)
+ ),
+
+ TP_fast_assign(
+ __entry->dr = dr;
+- __entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
+- (dr->xprt_hlen>>2)));
+- __assign_str(addr, dr->xprt->xpt_remotebuf);
++ __entry->xid = be32_to_cpu(*(__be32 *)dr->args);
++ __assign_sockaddr(addr, &dr->addr, dr->addrlen);
+ ),
+
+- TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr,
+- __entry->xid)
++ TP_printk("addr=%pISpc dr=%p xid=0x%08x", __get_sockaddr(addr),
++ __entry->dr, __entry->xid)
+ );
+
+ #define DEFINE_SVC_DEFERRED_EVENT(name) \
+@@ -2103,17 +2110,17 @@ DECLARE_EVENT_CLASS(svcsock_accept_class,
+ TP_STRUCT__entry(
+ __field(long, status)
+ __string(service, service)
+- __array(unsigned char, addr, sizeof(struct sockaddr_in6))
++ __field(unsigned int, netns_ino)
+ ),
+
+ TP_fast_assign(
+ __entry->status = status;
+ __assign_str(service, service);
+- memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr));
++ __entry->netns_ino = xprt->xpt_net->ns.inum;
+ ),
+
+- TP_printk("listener=%pISpc service=%s status=%ld",
+- __entry->addr, __get_str(service), __entry->status
++ TP_printk("addr=listener service=%s status=%ld",
++ __get_str(service), __entry->status
+ )
+ );
+
+diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
+index 521059d8dc0a6..edcd6369de102 100644
+--- a/include/trace/events/tcp.h
++++ b/include/trace/events/tcp.h
+@@ -279,7 +279,7 @@ TRACE_EVENT(tcp_probe,
+ __entry->data_len = skb->len - __tcp_hdrlen(th);
+ __entry->snd_nxt = tp->snd_nxt;
+ __entry->snd_una = tp->snd_una;
+- __entry->snd_cwnd = tp->snd_cwnd;
++ __entry->snd_cwnd = tcp_snd_cwnd(tp);
+ __entry->snd_wnd = tp->snd_wnd;
+ __entry->rcv_wnd = tp->rcv_wnd;
+ __entry->ssthresh = tcp_current_ssthresh(sk);
+diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
+index 6ad031c71be74..d49e44fcc10f2 100644
+--- a/include/trace/events/timer.h
++++ b/include/trace/events/timer.h
+@@ -156,7 +156,11 @@ DEFINE_EVENT(timer_class, timer_cancel,
+ { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \
+ { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \
+ { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \
+- { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" })
++ { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \
++ { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \
++ { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \
++ { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \
++ { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" })
+
+ /**
+ * hrtimer_init - called when the hrtimer is initialized
+@@ -368,7 +372,8 @@ TRACE_EVENT(itimer_expire,
+ tick_dep_name(PERF_EVENTS) \
+ tick_dep_name(SCHED) \
+ tick_dep_name(CLOCK_UNSTABLE) \
+- tick_dep_name_end(RCU)
++ tick_dep_name(RCU) \
++ tick_dep_name_end(RCU_EXP)
+
+ #undef tick_dep_name
+ #undef tick_dep_mask_name
+diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
+index 88faf2400ec25..b2eeeb0800126 100644
+--- a/include/trace/events/vmscan.h
++++ b/include/trace/events/vmscan.h
+@@ -283,7 +283,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
+ __field(unsigned long, nr_scanned)
+ __field(unsigned long, nr_skipped)
+ __field(unsigned long, nr_taken)
+- __field(isolate_mode_t, isolate_mode)
++ __field(unsigned int, isolate_mode)
+ __field(int, lru)
+ ),
+
+@@ -294,7 +294,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
+ __entry->nr_scanned = nr_scanned;
+ __entry->nr_skipped = nr_skipped;
+ __entry->nr_taken = nr_taken;
+- __entry->isolate_mode = isolate_mode;
++ __entry->isolate_mode = (__force unsigned int)isolate_mode;
+ __entry->lru = lru;
+ ),
+
+diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
+index 840d1ba84cf5c..9d8303a93a365 100644
+--- a/include/trace/events/writeback.h
++++ b/include/trace/events/writeback.h
+@@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(writeback_page_template,
+ strscpy_pad(__entry->name,
+ bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
+ NULL), 32);
+- __entry->ino = mapping ? mapping->host->i_ino : 0;
++ __entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0;
+ __entry->index = page->index;
+ ),
+
+diff --git a/include/trace/perf.h b/include/trace/perf.h
+index dbc6c74defc38..5800d13146c3d 100644
+--- a/include/trace/perf.h
++++ b/include/trace/perf.h
+@@ -21,6 +21,29 @@
+ #undef __get_bitmask
+ #define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
++#undef __get_sockaddr
++#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
++
++#undef __get_rel_dynamic_array
++#define __get_rel_dynamic_array(field) \
++ ((void *)__entry + \
++ offsetof(typeof(*__entry), __rel_loc_##field) + \
++ sizeof(__entry->__rel_loc_##field) + \
++ (__entry->__rel_loc_##field & 0xffff))
++
++#undef __get_rel_dynamic_array_len
++#define __get_rel_dynamic_array_len(field) \
++ ((__entry->__rel_loc_##field >> 16) & 0xffff)
++
++#undef __get_rel_str
++#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
++
++#undef __get_rel_bitmask
++#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
++
++#undef __get_rel_sockaddr
++#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
++
+ #undef __perf_count
+ #define __perf_count(c) (__count = (c))
+
+diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
+index 08810a4638805..e6b19ab357815 100644
+--- a/include/trace/trace_events.h
++++ b/include/trace/trace_events.h
+@@ -108,6 +108,24 @@ TRACE_MAKE_SYSTEM_STR();
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+
++#undef __sockaddr
++#define __sockaddr(field, len) __dynamic_array(u8, field, len)
++
++#undef __rel_dynamic_array
++#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item;
++
++#undef __rel_string
++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_string_len
++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_bitmask
++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_sockaddr
++#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
++
+ #undef TP_STRUCT__entry
+ #define TP_STRUCT__entry(args...) args
+
+@@ -116,7 +134,7 @@ TRACE_MAKE_SYSTEM_STR();
+ struct trace_event_raw_##name { \
+ struct trace_entry ent; \
+ tstruct \
+- char __data[0]; \
++ char __data[]; \
+ }; \
+ \
+ static struct trace_event_class event_class_##name;
+@@ -206,6 +224,24 @@ TRACE_MAKE_SYSTEM_STR();
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
++#undef __sockaddr
++#define __sockaddr(field, len) __dynamic_array(u8, field, len)
++
++#undef __rel_dynamic_array
++#define __rel_dynamic_array(type, item, len) u32 item;
++
++#undef __rel_string
++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_string_len
++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_bitmask
++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
++
++#undef __rel_sockaddr
++#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
++
+ #undef DECLARE_EVENT_CLASS
+ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ struct trace_event_data_offsets_##call { \
+@@ -293,6 +329,20 @@ TRACE_MAKE_SYSTEM_STR();
+ #undef __get_str
+ #define __get_str(field) ((char *)__get_dynamic_array(field))
+
++#undef __get_rel_dynamic_array
++#define __get_rel_dynamic_array(field) \
++ ((void *)__entry + \
++ offsetof(typeof(*__entry), __rel_loc_##field) + \
++ sizeof(__entry->__rel_loc_##field) + \
++ (__entry->__rel_loc_##field & 0xffff))
++
++#undef __get_rel_dynamic_array_len
++#define __get_rel_dynamic_array_len(field) \
++ ((__entry->__rel_loc_##field >> 16) & 0xffff)
++
++#undef __get_rel_str
++#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
++
+ #undef __get_bitmask
+ #define __get_bitmask(field) \
+ ({ \
+@@ -302,6 +352,21 @@ TRACE_MAKE_SYSTEM_STR();
+ trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \
+ })
+
++#undef __get_rel_bitmask
++#define __get_rel_bitmask(field) \
++ ({ \
++ void *__bitmask = __get_rel_dynamic_array(field); \
++ unsigned int __bitmask_size; \
++ __bitmask_size = __get_rel_dynamic_array_len(field); \
++ trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \
++ })
++
++#undef __get_sockaddr
++#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
++
++#undef __get_rel_sockaddr
++#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
++
+ #undef __print_flags
+ #define __print_flags(flag, delim, flag_array...) \
+ ({ \
+@@ -432,16 +497,18 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
+
+ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
++#define ALIGN_STRUCTFIELD(type) ((int)(__alignof__(struct {type b;})))
++
+ #undef __field_ext
+ #define __field_ext(_type, _item, _filter_type) { \
+ .type = #_type, .name = #_item, \
+- .size = sizeof(_type), .align = __alignof__(_type), \
++ .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \
+ .is_signed = is_signed_type(_type), .filter_type = _filter_type },
+
+ #undef __field_struct_ext
+ #define __field_struct_ext(_type, _item, _filter_type) { \
+ .type = #_type, .name = #_item, \
+- .size = sizeof(_type), .align = __alignof__(_type), \
++ .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \
+ 0, .filter_type = _filter_type },
+
+ #undef __field
+@@ -453,7 +520,7 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
+ #undef __array
+ #define __array(_type, _item, _len) { \
+ .type = #_type"["__stringify(_len)"]", .name = #_item, \
+- .size = sizeof(_type[_len]), .align = __alignof__(_type), \
++ .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \
+ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+
+ #undef __dynamic_array
+@@ -471,6 +538,27 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
++#undef __sockaddr
++#define __sockaddr(field, len) __dynamic_array(u8, field, len)
++
++#undef __rel_dynamic_array
++#define __rel_dynamic_array(_type, _item, _len) { \
++ .type = "__rel_loc " #_type "[]", .name = #_item, \
++ .size = 4, .align = 4, \
++ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
++
++#undef __rel_string
++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_string_len
++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_bitmask
++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
++
++#undef __rel_sockaddr
++#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
++
+ #undef DECLARE_EVENT_CLASS
+ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
+ static struct trace_event_fields trace_event_fields_##call[] = { \
+@@ -519,6 +607,22 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
+
++#undef __rel_dynamic_array
++#define __rel_dynamic_array(type, item, len) \
++ __item_length = (len) * sizeof(type); \
++ __data_offsets->item = __data_size + \
++ offsetof(typeof(*entry), __data) - \
++ offsetof(typeof(*entry), __rel_loc_##item) - \
++ sizeof(u32); \
++ __data_offsets->item |= __item_length << 16; \
++ __data_size += __item_length;
++
++#undef __rel_string
++#define __rel_string(item, src) __rel_dynamic_array(char, item, \
++ strlen((src) ? (const char *)(src) : "(null)") + 1)
++
++#undef __rel_string_len
++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1)
+ /*
+ * __bitmask_size_in_bytes_raw is the number of bytes needed to hold
+ * num_possible_cpus().
+@@ -542,6 +646,16 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \
+ __bitmask_size_in_longs(nr_bits))
+
++#undef __rel_bitmask
++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \
++ __bitmask_size_in_longs(nr_bits))
++
++#undef __sockaddr
++#define __sockaddr(field, len) __dynamic_array(u8, field, len)
++
++#undef __rel_sockaddr
++#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
++
+ #undef DECLARE_EVENT_CLASS
+ #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ static inline notrace int trace_event_get_offsets_##call( \
+@@ -706,6 +820,56 @@ static inline notrace int trace_event_get_offsets_##call( \
+ #define __assign_bitmask(dst, src, nr_bits) \
+ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+
++#undef __sockaddr
++#define __sockaddr(field, len) __dynamic_array(u8, field, len)
++
++#undef __get_sockaddr
++#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
++
++#define __assign_sockaddr(dest, src, len) \
++ memcpy(__get_dynamic_array(dest), src, len)
++
++#undef __rel_dynamic_array
++#define __rel_dynamic_array(type, item, len) \
++ __entry->__rel_loc_##item = __data_offsets.item;
++
++#undef __rel_string
++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1)
++
++#undef __rel_string_len
++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1)
++
++#undef __assign_rel_str
++#define __assign_rel_str(dst, src) \
++ strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)");
++
++#undef __assign_rel_str_len
++#define __assign_rel_str_len(dst, src, len) \
++ do { \
++ memcpy(__get_rel_str(dst), (src), (len)); \
++ __get_rel_str(dst)[len] = '\0'; \
++ } while (0)
++
++#undef __rel_bitmask
++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
++
++#undef __get_rel_bitmask
++#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
++
++#undef __assign_rel_bitmask
++#define __assign_rel_bitmask(dst, src, nr_bits) \
++ memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
++
++#undef __rel_sockaddr
++#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
++
++#undef __get_rel_sockaddr
++#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
++
++#define __assign_rel_sockaddr(dest, src, len) \
++ memcpy(__get_rel_dynamic_array(dest), src, len)
++
++
+ #undef TP_fast_assign
+ #define TP_fast_assign(args...) args
+
+@@ -770,6 +934,12 @@ static inline void ftrace_test_probe_##call(void) \
+ #undef __get_dynamic_array_len
+ #undef __get_str
+ #undef __get_bitmask
++#undef __get_sockaddr
++#undef __get_rel_dynamic_array
++#undef __get_rel_dynamic_array_len
++#undef __get_rel_str
++#undef __get_rel_bitmask
++#undef __get_rel_sockaddr
+ #undef __print_array
+ #undef __print_hex_dump
+
+diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h
+index 41b509f410bf9..f9c520ce4bf4e 100644
+--- a/include/uapi/asm-generic/poll.h
++++ b/include/uapi/asm-generic/poll.h
+@@ -29,7 +29,7 @@
+ #define POLLRDHUP 0x2000
+ #endif
+
+-#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */
++#define POLLFREE (__force __poll_t)0x4000
+
+ #define POLL_BUSY_LOOP (__force __poll_t)0x8000
+
+diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
+index 3ba180f550d7c..ffbe4cec9f32d 100644
+--- a/include/uapi/asm-generic/siginfo.h
++++ b/include/uapi/asm-generic/siginfo.h
+@@ -99,6 +99,7 @@ union __sifields {
+ struct {
+ unsigned long _data;
+ __u32 _type;
++ __u32 _flags;
+ } _perf;
+ };
+ } _sigfault;
+@@ -164,6 +165,7 @@ typedef struct siginfo {
+ #define si_pkey _sifields._sigfault._addr_pkey._pkey
+ #define si_perf_data _sifields._sigfault._perf._data
+ #define si_perf_type _sifields._sigfault._perf._type
++#define si_perf_flags _sifields._sigfault._perf._flags
+ #define si_band _sifields._sigpoll._band
+ #define si_fd _sifields._sigpoll._fd
+ #define si_call_addr _sifields._sigsys._call_addr
+@@ -270,6 +272,11 @@ typedef struct siginfo {
+ * that are of the form: ((PTRACE_EVENT_XXX << 8) | SIGTRAP)
+ */
+
++/*
++ * Flags for si_perf_flags if SIGTRAP si_code is TRAP_PERF.
++ */
++#define TRAP_PERF_FLAG_ASYNC (1u << 0)
++
+ /*
+ * SIGCHLD si_codes
+ */
+diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h
+index fe929e7b77ca1..7572f2f46ee89 100644
+--- a/include/uapi/asm-generic/signal-defs.h
++++ b/include/uapi/asm-generic/signal-defs.h
+@@ -45,6 +45,7 @@
+ #define SA_UNSUPPORTED 0x00000400
+ #define SA_EXPOSE_TAGBITS 0x00000800
+ /* 0x00010000 used on mips */
++/* 0x00800000 used for internal SA_IMMUTABLE */
+ /* 0x01000000 used on x86 */
+ /* 0x02000000 used on x86 */
+ /*
+diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
+index 9f4bb4a6f358c..a50e4646bd6de 100644
+--- a/include/uapi/drm/drm_fourcc.h
++++ b/include/uapi/drm/drm_fourcc.h
+@@ -308,6 +308,13 @@ extern "C" {
+ */
+ #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */
+
++/* 2 plane YCbCr420.
++ * 3 10 bit components and 2 padding bits packed into 4 bytes.
++ * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian
++ * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian
++ */
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
++
+ /* 3 plane non-subsampled (444) YCbCr
+ * 16 bits per component, but only 10 bits are used and 6 bits are padded
+ * index 0: Y plane, [15:0] Y:x [10:6] little endian
+@@ -842,6 +849,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
+ * and UV. Some SAND-using hardware stores UV in a separate tiled
+ * image from Y to reduce the column height, which is not supported
+ * with these modifiers.
++ *
++ * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also
++ * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes
++ * wide, but as this is a 10 bpp format that translates to 96 pixels.
+ */
+
+ #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \
+@@ -1352,11 +1363,11 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
+ #define AMD_FMT_MOD_PIPE_MASK 0x7
+
+ #define AMD_FMT_MOD_SET(field, value) \
+- ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT)
++ ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT)
+ #define AMD_FMT_MOD_GET(field, value) \
+ (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK)
+ #define AMD_FMT_MOD_CLEAR(field) \
+- (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT))
++ (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT))
+
+ #if defined(__cplusplus)
+ }
+diff --git a/include/uapi/linux/affs_hardblocks.h b/include/uapi/linux/affs_hardblocks.h
+index 5e2fb8481252a..a5aff2eb5f708 100644
+--- a/include/uapi/linux/affs_hardblocks.h
++++ b/include/uapi/linux/affs_hardblocks.h
+@@ -7,42 +7,42 @@
+ /* Just the needed definitions for the RDB of an Amiga HD. */
+
+ struct RigidDiskBlock {
+- __u32 rdb_ID;
++ __be32 rdb_ID;
+ __be32 rdb_SummedLongs;
+- __s32 rdb_ChkSum;
+- __u32 rdb_HostID;
++ __be32 rdb_ChkSum;
++ __be32 rdb_HostID;
+ __be32 rdb_BlockBytes;
+- __u32 rdb_Flags;
+- __u32 rdb_BadBlockList;
++ __be32 rdb_Flags;
++ __be32 rdb_BadBlockList;
+ __be32 rdb_PartitionList;
+- __u32 rdb_FileSysHeaderList;
+- __u32 rdb_DriveInit;
+- __u32 rdb_Reserved1[6];
+- __u32 rdb_Cylinders;
+- __u32 rdb_Sectors;
+- __u32 rdb_Heads;
+- __u32 rdb_Interleave;
+- __u32 rdb_Park;
+- __u32 rdb_Reserved2[3];
+- __u32 rdb_WritePreComp;
+- __u32 rdb_ReducedWrite;
+- __u32 rdb_StepRate;
+- __u32 rdb_Reserved3[5];
+- __u32 rdb_RDBBlocksLo;
+- __u32 rdb_RDBBlocksHi;
+- __u32 rdb_LoCylinder;
+- __u32 rdb_HiCylinder;
+- __u32 rdb_CylBlocks;
+- __u32 rdb_AutoParkSeconds;
+- __u32 rdb_HighRDSKBlock;
+- __u32 rdb_Reserved4;
++ __be32 rdb_FileSysHeaderList;
++ __be32 rdb_DriveInit;
++ __be32 rdb_Reserved1[6];
++ __be32 rdb_Cylinders;
++ __be32 rdb_Sectors;
++ __be32 rdb_Heads;
++ __be32 rdb_Interleave;
++ __be32 rdb_Park;
++ __be32 rdb_Reserved2[3];
++ __be32 rdb_WritePreComp;
++ __be32 rdb_ReducedWrite;
++ __be32 rdb_StepRate;
++ __be32 rdb_Reserved3[5];
++ __be32 rdb_RDBBlocksLo;
++ __be32 rdb_RDBBlocksHi;
++ __be32 rdb_LoCylinder;
++ __be32 rdb_HiCylinder;
++ __be32 rdb_CylBlocks;
++ __be32 rdb_AutoParkSeconds;
++ __be32 rdb_HighRDSKBlock;
++ __be32 rdb_Reserved4;
+ char rdb_DiskVendor[8];
+ char rdb_DiskProduct[16];
+ char rdb_DiskRevision[4];
+ char rdb_ControllerVendor[8];
+ char rdb_ControllerProduct[16];
+ char rdb_ControllerRevision[4];
+- __u32 rdb_Reserved5[10];
++ __be32 rdb_Reserved5[10];
+ };
+
+ #define IDNAME_RIGIDDISK 0x5244534B /* "RDSK" */
+@@ -50,16 +50,16 @@ struct RigidDiskBlock {
+ struct PartitionBlock {
+ __be32 pb_ID;
+ __be32 pb_SummedLongs;
+- __s32 pb_ChkSum;
+- __u32 pb_HostID;
++ __be32 pb_ChkSum;
++ __be32 pb_HostID;
+ __be32 pb_Next;
+- __u32 pb_Flags;
+- __u32 pb_Reserved1[2];
+- __u32 pb_DevFlags;
++ __be32 pb_Flags;
++ __be32 pb_Reserved1[2];
++ __be32 pb_DevFlags;
+ __u8 pb_DriveName[32];
+- __u32 pb_Reserved2[15];
++ __be32 pb_Reserved2[15];
+ __be32 pb_Environment[17];
+- __u32 pb_EReserved[15];
++ __be32 pb_EReserved[15];
+ };
+
+ #define IDNAME_PARTITION 0x50415254 /* "PART" */
+diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
+index daa481729e9ba..27799acd0e5e0 100644
+--- a/include/uapi/linux/audit.h
++++ b/include/uapi/linux/audit.h
+@@ -182,7 +182,7 @@
+ #define AUDIT_MAX_KEY_LEN 256
+ #define AUDIT_BITMASK_SIZE 64
+ #define AUDIT_WORD(nr) ((__u32)((nr)/32))
+-#define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32))
++#define AUDIT_BIT(nr) (1U << ((nr) - AUDIT_WORD(nr)*32))
+
+ #define AUDIT_SYSCALL_CLASSES 16
+ #define AUDIT_CLASS_DIR_WRITE 0
+diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h
+index 62e625356dc81..08be539605fca 100644
+--- a/include/uapi/linux/auto_dev-ioctl.h
++++ b/include/uapi/linux/auto_dev-ioctl.h
+@@ -109,7 +109,7 @@ struct autofs_dev_ioctl {
+ struct args_ismountpoint ismountpoint;
+ };
+
+- char path[0];
++ char path[];
+ };
+
+ static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
+diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
+index 656a326821a2b..321965feee354 100644
+--- a/include/uapi/linux/blkzoned.h
++++ b/include/uapi/linux/blkzoned.h
+@@ -51,13 +51,13 @@ enum blk_zone_type {
+ *
+ * The Zone Condition state machine in the ZBC/ZAC standards maps the above
+ * deinitions as:
+- * - ZC1: Empty | BLK_ZONE_EMPTY
++ * - ZC1: Empty | BLK_ZONE_COND_EMPTY
+ * - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN
+ * - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN
+- * - ZC4: Closed | BLK_ZONE_CLOSED
+- * - ZC5: Full | BLK_ZONE_FULL
+- * - ZC6: Read Only | BLK_ZONE_READONLY
+- * - ZC7: Offline | BLK_ZONE_OFFLINE
++ * - ZC4: Closed | BLK_ZONE_COND_CLOSED
++ * - ZC5: Full | BLK_ZONE_COND_FULL
++ * - ZC6: Read Only | BLK_ZONE_COND_READONLY
++ * - ZC7: Offline | BLK_ZONE_COND_OFFLINE
+ *
+ * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
+ * be considered invalid.
+diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
+index 791f31dd0abee..a887e582f0e78 100644
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -2276,8 +2276,8 @@ union bpf_attr {
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+- * * 0, if current task belongs to the cgroup2.
+- * * 1, if current task does not belong to the cgroup2.
++ * * 1, if current task belongs to the cgroup2.
++ * * 0, if current task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+@@ -2965,8 +2965,8 @@ union bpf_attr {
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+- * A non-negative value equal to or less than *size* on success,
+- * or a negative error in case of failure.
++ * The non-negative copied *buf* length equal to or less than
++ * *size* on success, or a negative error in case of failure.
+ *
+ * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * Description
+@@ -4269,8 +4269,8 @@ union bpf_attr {
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+- * A non-negative value equal to or less than *size* on success,
+- * or a negative error in case of failure.
++ * The non-negative copied *buf* length equal to or less than
++ * *size* on success, or a negative error in case of failure.
+ *
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ * Description
+@@ -5347,7 +5347,8 @@ struct bpf_sock {
+ __u32 src_ip4;
+ __u32 src_ip6[4];
+ __u32 src_port; /* host byte order */
+- __u32 dst_port; /* network byte order */
++ __be16 dst_port; /* network byte order */
++ __u16 :16; /* zero padding */
+ __u32 dst_ip4;
+ __u32 dst_ip6[4];
+ __u32 state;
+@@ -6222,7 +6223,8 @@ struct bpf_sk_lookup {
+ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+ __u32 remote_ip4; /* Network byte order */
+ __u32 remote_ip6[4]; /* Network byte order */
+- __u32 remote_port; /* Network byte order */
++ __be16 remote_port; /* Network byte order */
++ __u16 :16; /* Zero padding */
+ __u32 local_ip4; /* Network byte order */
+ __u32 local_ip6[4]; /* Network byte order */
+ __u32 local_port; /* Host byte order */
+diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
+index d7d3cfead0563..5f4513f2d41db 100644
+--- a/include/uapi/linux/btrfs.h
++++ b/include/uapi/linux/btrfs.h
+@@ -181,6 +181,7 @@ struct btrfs_scrub_progress {
+ };
+
+ #define BTRFS_SCRUB_READONLY 1
++#define BTRFS_SCRUB_SUPPORTED_FLAGS (BTRFS_SCRUB_READONLY)
+ struct btrfs_ioctl_scrub_args {
+ __u64 devid; /* in */
+ __u64 start; /* in */
+diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
+index e1c4c732aabac..5416f1f1a77a8 100644
+--- a/include/uapi/linux/btrfs_tree.h
++++ b/include/uapi/linux/btrfs_tree.h
+@@ -146,7 +146,9 @@
+
+ /*
+ * dir items are the name -> inode pointers in a directory. There is one
+- * for every name in a directory.
++ * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used
++ * but it's still defined here for documentation purposes and to help avoid
++ * having its numerical value reused in the future.
+ */
+ #define BTRFS_DIR_LOG_ITEM_KEY 60
+ #define BTRFS_DIR_LOG_INDEX_KEY 72
+diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h
+index 2199adc6a6c20..80aa5c41a7636 100644
+--- a/include/uapi/linux/byteorder/big_endian.h
++++ b/include/uapi/linux/byteorder/big_endian.h
+@@ -9,6 +9,7 @@
+ #define __BIG_ENDIAN_BITFIELD
+ #endif
+
++#include <linux/stddef.h>
+ #include <linux/types.h>
+ #include <linux/swab.h>
+
+diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h
+index 601c904fd5cd9..cd98982e7523e 100644
+--- a/include/uapi/linux/byteorder/little_endian.h
++++ b/include/uapi/linux/byteorder/little_endian.h
+@@ -9,6 +9,7 @@
+ #define __LITTLE_ENDIAN_BITFIELD
+ #endif
+
++#include <linux/stddef.h>
+ #include <linux/types.h>
+ #include <linux/swab.h>
+
+diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h
+index 34633283de641..a1000cb630632 100644
+--- a/include/uapi/linux/can/error.h
++++ b/include/uapi/linux/can/error.h
+@@ -120,6 +120,9 @@
+ #define CAN_ERR_TRX_CANL_SHORT_TO_GND 0x70 /* 0111 0000 */
+ #define CAN_ERR_TRX_CANL_SHORT_TO_CANH 0x80 /* 1000 0000 */
+
+-/* controller specific additional information / data[5..7] */
++/* data[5] is reserved (do not use) */
++
++/* TX error counter / data[6] */
++/* RX error counter / data[7] */
+
+ #endif /* _UAPI_CAN_ERROR_H */
+diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
+index c55935b64ccc8..590f8aea2b6d2 100644
+--- a/include/uapi/linux/can/isotp.h
++++ b/include/uapi/linux/can/isotp.h
+@@ -137,20 +137,16 @@ struct can_isotp_ll_options {
+ #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */
+ #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */
+
+-/* default values */
++/* protocol machine default values */
+
+ #define CAN_ISOTP_DEFAULT_FLAGS 0
+ #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00
+ #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */
+-#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0
++#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */
+ #define CAN_ISOTP_DEFAULT_RECV_BS 0
+ #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00
+ #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0
+
+-#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU
+-#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN
+-#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0
+-
+ /*
+ * Remark on CAN_ISOTP_DEFAULT_RECV_* values:
+ *
+@@ -162,4 +158,24 @@ struct can_isotp_ll_options {
+ * consistency and copied directly into the flow control (FC) frame.
+ */
+
++/* link layer default values => make use of Classical CAN frames */
++
++#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU
++#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN
++#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0
++
++/*
++ * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as
++ * it only makes sense for isotp implementation tests to run without
++ * a N_As value. As user space applications usually do not set the
++ * frame_txtime element of struct can_isotp_options the new in-kernel
++ * default is very likely overwritten with zero when the sockopt()
++ * CAN_ISOTP_OPTS is invoked.
++ * To make sure that a N_As value of zero is only set intentional the
++ * value '0' is now interpreted as 'do not change the current value'.
++ * When a frame_txtime of zero is required for testing purposes this
++ * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime.
++ */
++#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF
++
+ #endif /* !_UAPI_CAN_ISOTP_H */
+diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
+index 463d1ba2232ac..3d61a0ae055d4 100644
+--- a/include/uapi/linux/capability.h
++++ b/include/uapi/linux/capability.h
+@@ -426,7 +426,7 @@ struct vfs_ns_cap_data {
+ */
+
+ #define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */
+-#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */
++#define CAP_TO_MASK(x) (1U << ((x) & 31)) /* mask for indexed __u32 */
+
+
+ #endif /* _UAPI_LINUX_CAPABILITY_H */
+diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
+index af2a44c08683d..a429381e7ca50 100644
+--- a/include/uapi/linux/const.h
++++ b/include/uapi/linux/const.h
+@@ -28,7 +28,7 @@
+ #define _BITUL(x) (_UL(1) << (x))
+ #define _BITULL(x) (_ULL(1) << (x))
+
+-#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
++#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
+ #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+ #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h
+new file mode 100644
+index 0000000000000..6225c5aebe06a
+--- /dev/null
++++ b/include/uapi/linux/cyclades.h
+@@ -0,0 +1,35 @@
++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++
++#ifndef _UAPI_LINUX_CYCLADES_H
++#define _UAPI_LINUX_CYCLADES_H
++
++#warning "Support for features provided by this header has been removed"
++#warning "Please consider updating your code"
++
++struct cyclades_monitor {
++ unsigned long int_count;
++ unsigned long char_count;
++ unsigned long char_max;
++ unsigned long char_last;
++};
++
++#define CYGETMON 0x435901
++#define CYGETTHRESH 0x435902
++#define CYSETTHRESH 0x435903
++#define CYGETDEFTHRESH 0x435904
++#define CYSETDEFTHRESH 0x435905
++#define CYGETTIMEOUT 0x435906
++#define CYSETTIMEOUT 0x435907
++#define CYGETDEFTIMEOUT 0x435908
++#define CYSETDEFTIMEOUT 0x435909
++#define CYSETRFLOW 0x43590a
++#define CYGETRFLOW 0x43590b
++#define CYSETRTSDTR_INV 0x43590c
++#define CYGETRTSDTR_INV 0x43590d
++#define CYZSETPOLLCYCLE 0x43590e
++#define CYZGETPOLLCYCLE 0x43590f
++#define CYGETCD1400VER 0x435910
++#define CYSETWAIT 0x435912
++#define CYGETWAIT 0x435913
++
++#endif /* _UAPI_LINUX_CYCLADES_H */
+diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
+index 8e4a2ca0bcbf7..b1523cb8ab307 100644
+--- a/include/uapi/linux/dma-buf.h
++++ b/include/uapi/linux/dma-buf.h
+@@ -92,7 +92,7 @@ struct dma_buf_sync {
+ * between them in actual uapi, they're just different numbers.
+ */
+ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *)
+-#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32)
+-#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64)
++#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32)
++#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+
+ #endif
+diff --git a/include/uapi/linux/dn.h b/include/uapi/linux/dn.h
+deleted file mode 100644
+index 36ca71bd8bbe2..0000000000000
+--- a/include/uapi/linux/dn.h
++++ /dev/null
+@@ -1,149 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+-#ifndef _LINUX_DN_H
+-#define _LINUX_DN_H
+-
+-#include <linux/ioctl.h>
+-#include <linux/types.h>
+-#include <linux/if_ether.h>
+-
+-/*
+-
+- DECnet Data Structures and Constants
+-
+-*/
+-
+-/*
+- * DNPROTO_NSP can't be the same as SOL_SOCKET,
+- * so increment each by one (compared to ULTRIX)
+- */
+-#define DNPROTO_NSP 2 /* NSP protocol number */
+-#define DNPROTO_ROU 3 /* Routing protocol number */
+-#define DNPROTO_NML 4 /* Net mgt protocol number */
+-#define DNPROTO_EVL 5 /* Evl protocol number (usr) */
+-#define DNPROTO_EVR 6 /* Evl protocol number (evl) */
+-#define DNPROTO_NSPT 7 /* NSP trace protocol number */
+-
+-
+-#define DN_ADDL 2
+-#define DN_MAXADDL 2 /* ULTRIX headers have 20 here, but pathworks has 2 */
+-#define DN_MAXOPTL 16
+-#define DN_MAXOBJL 16
+-#define DN_MAXACCL 40
+-#define DN_MAXALIASL 128
+-#define DN_MAXNODEL 256
+-#define DNBUFSIZE 65023
+-
+-/*
+- * SET/GET Socket options - must match the DSO_ numbers below
+- */
+-#define SO_CONDATA 1
+-#define SO_CONACCESS 2
+-#define SO_PROXYUSR 3
+-#define SO_LINKINFO 7
+-
+-#define DSO_CONDATA 1 /* Set/Get connect data */
+-#define DSO_DISDATA 10 /* Set/Get disconnect data */
+-#define DSO_CONACCESS 2 /* Set/Get connect access data */
+-#define DSO_ACCEPTMODE 4 /* Set/Get accept mode */
+-#define DSO_CONACCEPT 5 /* Accept deferred connection */
+-#define DSO_CONREJECT 6 /* Reject deferred connection */
+-#define DSO_LINKINFO 7 /* Set/Get link information */
+-#define DSO_STREAM 8 /* Set socket type to stream */
+-#define DSO_SEQPACKET 9 /* Set socket type to sequenced packet */
+-#define DSO_MAXWINDOW 11 /* Maximum window size allowed */
+-#define DSO_NODELAY 12 /* Turn off nagle */
+-#define DSO_CORK 13 /* Wait for more data! */
+-#define DSO_SERVICES 14 /* NSP Services field */
+-#define DSO_INFO 15 /* NSP Info field */
+-#define DSO_MAX 15 /* Maximum option number */
+-
+-
+-/* LINK States */
+-#define LL_INACTIVE 0
+-#define LL_CONNECTING 1
+-#define LL_RUNNING 2
+-#define LL_DISCONNECTING 3
+-
+-#define ACC_IMMED 0
+-#define ACC_DEFER 1
+-
+-#define SDF_WILD 1 /* Wild card object */
+-#define SDF_PROXY 2 /* Addr eligible for proxy */
+-#define SDF_UICPROXY 4 /* Use uic-based proxy */
+-
+-/* Structures */
+-
+-
+-struct dn_naddr {
+- __le16 a_len;
+- __u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */
+-};
+-
+-struct sockaddr_dn {
+- __u16 sdn_family;
+- __u8 sdn_flags;
+- __u8 sdn_objnum;
+- __le16 sdn_objnamel;
+- __u8 sdn_objname[DN_MAXOBJL];
+- struct dn_naddr sdn_add;
+-};
+-#define sdn_nodeaddrl sdn_add.a_len /* Node address length */
+-#define sdn_nodeaddr sdn_add.a_addr /* Node address */
+-
+-
+-
+-/*
+- * DECnet set/get DSO_CONDATA, DSO_DISDATA (optional data) structure
+- */
+-struct optdata_dn {
+- __le16 opt_status; /* Extended status return */
+-#define opt_sts opt_status
+- __le16 opt_optl; /* Length of user data */
+- __u8 opt_data[16]; /* User data */
+-};
+-
+-struct accessdata_dn {
+- __u8 acc_accl;
+- __u8 acc_acc[DN_MAXACCL];
+- __u8 acc_passl;
+- __u8 acc_pass[DN_MAXACCL];
+- __u8 acc_userl;
+- __u8 acc_user[DN_MAXACCL];
+-};
+-
+-/*
+- * DECnet logical link information structure
+- */
+-struct linkinfo_dn {
+- __u16 idn_segsize; /* Segment size for link */
+- __u8 idn_linkstate; /* Logical link state */
+-};
+-
+-/*
+- * Ethernet address format (for DECnet)
+- */
+-union etheraddress {
+- __u8 dne_addr[ETH_ALEN]; /* Full ethernet address */
+- struct {
+- __u8 dne_hiord[4]; /* DECnet HIORD prefix */
+- __u8 dne_nodeaddr[2]; /* DECnet node address */
+- } dne_remote;
+-};
+-
+-
+-/*
+- * DECnet physical socket address format
+- */
+-struct dn_addr {
+- __le16 dna_family; /* AF_DECnet */
+- union etheraddress dna_netaddr; /* DECnet ethernet address */
+-};
+-
+-#define DECNET_IOCTL_BASE 0x89 /* PROTOPRIVATE range */
+-
+-#define SIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, struct dn_naddr)
+-#define SIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, struct dn_naddr)
+-#define OSIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, int)
+-#define OSIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, int)
+-
+-#endif /* _LINUX_DN_H */
+diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
+index 5545f1ca9237c..97e5d303810f2 100644
+--- a/include/uapi/linux/ethtool_netlink.h
++++ b/include/uapi/linux/ethtool_netlink.h
+@@ -407,7 +407,9 @@ enum {
+ ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
+ ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
+
+- /* add new constants above here */
++ /* add new constants above here
++ * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats!
++ */
+ __ETHTOOL_A_PAUSE_STAT_CNT,
+ ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
+ };
+@@ -741,7 +743,7 @@ enum {
+
+ /* add new constants above here */
+ __ETHTOOL_A_STATS_GRP_CNT,
+- ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_CNT - 1)
++ ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1)
+ };
+
+ enum {
+diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
+index 8a3432d0f0dcb..e687658843b1c 100644
+--- a/include/uapi/linux/eventpoll.h
++++ b/include/uapi/linux/eventpoll.h
+@@ -41,6 +41,12 @@
+ #define EPOLLMSG (__force __poll_t)0x00000400
+ #define EPOLLRDHUP (__force __poll_t)0x00002000
+
++/*
++ * Internal flag - wakeup generated by io_uring, used to detect recursion back
++ * into the io_uring poll handler.
++ */
++#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27))
++
+ /* Set exclusive wakeup mode for the target file descriptor */
+ #define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28))
+
+diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
+index c750eac09fc9c..f7c01709cb0ff 100644
+--- a/include/uapi/linux/idxd.h
++++ b/include/uapi/linux/idxd.h
+@@ -272,7 +272,7 @@ struct dsa_completion_record {
+ };
+
+ uint32_t delta_rec_size;
+- uint32_t crc_val;
++ uint64_t crc_val;
+
+ /* DIF check & strip */
+ struct {
+diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
+index eebd3894fe89a..4ac53b30b6dc9 100644
+--- a/include/uapi/linux/if_link.h
++++ b/include/uapi/linux/if_link.h
+@@ -858,6 +858,7 @@ enum {
+ IFLA_BOND_TLB_DYNAMIC_LB,
+ IFLA_BOND_PEER_NOTIF_DELAY,
+ IFLA_BOND_AD_LACP_ACTIVE,
++ IFLA_BOND_MISSED_MAX,
+ __IFLA_BOND_MAX,
+ };
+
+diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
+index 14168225cecdc..c4702fff64d3a 100644
+--- a/include/uapi/linux/in.h
++++ b/include/uapi/linux/in.h
+@@ -159,6 +159,8 @@ struct in_addr {
+ #define MCAST_MSFILTER 48
+ #define IP_MULTICAST_ALL 49
+ #define IP_UNICAST_IF 50
++#define IP_LOCAL_PORT_RANGE 51
++#define IP_PROTOCOL 52
+
+ #define MCAST_EXCLUDE 0
+ #define MCAST_INCLUDE 1
+diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
+index 225ec87d4f228..7989d9483ea75 100644
+--- a/include/uapi/linux/input-event-codes.h
++++ b/include/uapi/linux/input-event-codes.h
+@@ -278,7 +278,8 @@
+ #define KEY_PAUSECD 201
+ #define KEY_PROG3 202
+ #define KEY_PROG4 203
+-#define KEY_DASHBOARD 204 /* AL Dashboard */
++#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */
++#define KEY_DASHBOARD KEY_ALL_APPLICATIONS
+ #define KEY_SUSPEND 205
+ #define KEY_CLOSE 206 /* AC Close */
+ #define KEY_PLAY 207
+@@ -612,6 +613,7 @@
+ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */
+ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */
+ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */
++#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */
+
+ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */
+ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */
+diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
+index e42d13b55cf3a..860bbf6bf29cb 100644
+--- a/include/uapi/linux/ip.h
++++ b/include/uapi/linux/ip.h
+@@ -18,6 +18,7 @@
+ #ifndef _UAPI_LINUX_IP_H
+ #define _UAPI_LINUX_IP_H
+ #include <linux/types.h>
++#include <linux/stddef.h>
+ #include <asm/byteorder.h>
+
+ #define IPTOS_TOS_MASK 0x1E
+@@ -100,8 +101,10 @@ struct iphdr {
+ __u8 ttl;
+ __u8 protocol;
+ __sum16 check;
+- __be32 saddr;
+- __be32 daddr;
++ __struct_group(/* no tag */, addrs, /* no attrs */,
++ __be32 saddr;
++ __be32 daddr;
++ );
+ /*The options start here. */
+ };
+
+diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
+index b243a53fa985b..39c6add59a1a6 100644
+--- a/include/uapi/linux/ipv6.h
++++ b/include/uapi/linux/ipv6.h
+@@ -4,6 +4,7 @@
+
+ #include <linux/libc-compat.h>
+ #include <linux/types.h>
++#include <linux/stddef.h>
+ #include <linux/in6.h>
+ #include <asm/byteorder.h>
+
+@@ -130,8 +131,10 @@ struct ipv6hdr {
+ __u8 nexthdr;
+ __u8 hop_limit;
+
+- struct in6_addr saddr;
+- struct in6_addr daddr;
++ __struct_group(/* no tag */, addrs, /* no attrs */,
++ struct in6_addr saddr;
++ struct in6_addr daddr;
++ );
+ };
+
+
+diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
+index b3d952067f59c..21c8d58283c9e 100644
+--- a/include/uapi/linux/landlock.h
++++ b/include/uapi/linux/landlock.h
+@@ -33,7 +33,9 @@ struct landlock_ruleset_attr {
+ * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI
+ * version.
+ */
++/* clang-format off */
+ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0)
++/* clang-format on */
+
+ /**
+ * enum landlock_rule_type - Landlock rule type
+@@ -60,8 +62,9 @@ struct landlock_path_beneath_attr {
+ */
+ __u64 allowed_access;
+ /**
+- * @parent_fd: File descriptor, open with ``O_PATH``, which identifies
+- * the parent directory of a file hierarchy, or just a file.
++ * @parent_fd: File descriptor, preferably opened with ``O_PATH``,
++ * which identifies the parent directory of a file hierarchy, or just a
++ * file.
+ */
+ __s32 parent_fd;
+ /*
+@@ -120,6 +123,7 @@ struct landlock_path_beneath_attr {
+ * :manpage:`access(2)`.
+ * Future Landlock evolutions will enable to restrict them.
+ */
++/* clang-format off */
+ #define LANDLOCK_ACCESS_FS_EXECUTE (1ULL << 0)
+ #define LANDLOCK_ACCESS_FS_WRITE_FILE (1ULL << 1)
+ #define LANDLOCK_ACCESS_FS_READ_FILE (1ULL << 2)
+@@ -133,5 +137,6 @@ struct landlock_path_beneath_attr {
+ #define LANDLOCK_ACCESS_FS_MAKE_FIFO (1ULL << 10)
+ #define LANDLOCK_ACCESS_FS_MAKE_BLOCK (1ULL << 11)
+ #define LANDLOCK_ACCESS_FS_MAKE_SYM (1ULL << 12)
++/* clang-format on */
+
+ #endif /* _UAPI_LINUX_LANDLOCK_H */
+diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
+index f66038b9551fa..80c40194e2977 100644
+--- a/include/uapi/linux/mptcp.h
++++ b/include/uapi/linux/mptcp.h
+@@ -129,19 +129,21 @@ struct mptcp_info {
+ * MPTCP_EVENT_REMOVED: token, rem_id
+ * An address has been lost by the peer.
+ *
+- * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
+- * daddr4 | daddr6, sport, dport, backup,
+- * if_idx [, error]
++ * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id,
++ * saddr4 | saddr6, daddr4 | daddr6, sport,
++ * dport, backup, if_idx [, error]
+ * A new subflow has been established. 'error' should not be set.
+ *
+- * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
+- * sport, dport, backup, if_idx [, error]
++ * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6,
++ * daddr4 | daddr6, sport, dport, backup, if_idx
++ * [, error]
+ * A subflow has been closed. An error (copy of sk_err) could be set if an
+ * error has been detected for this subflow.
+ *
+- * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
+- * sport, dport, backup, if_idx [, error]
+- * The priority of a subflow has changed. 'error' should not be set.
++ * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6,
++ * daddr4 | daddr6, sport, dport, backup, if_idx
++ * [, error]
++ * The priority of a subflow has changed. 'error' should not be set.
+ */
+ enum mptcp_event_type {
+ MPTCP_EVENT_UNSPEC = 0,
+diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
+index 4b3395082d15c..26071021e986f 100644
+--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
+@@ -106,7 +106,7 @@ enum ip_conntrack_status {
+ IPS_NAT_CLASH = IPS_UNTRACKED,
+ #endif
+
+- /* Conntrack got a helper explicitly attached via CT target. */
++ /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */
+ IPS_HELPER_BIT = 13,
+ IPS_HELPER = (1 << IPS_HELPER_BIT),
+
+diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
+index edc6ddab0de6a..2d6f80d75ae74 100644
+--- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h
++++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
+@@ -15,7 +15,7 @@ enum sctp_conntrack {
+ SCTP_CONNTRACK_SHUTDOWN_RECD,
+ SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
+ SCTP_CONNTRACK_HEARTBEAT_SENT,
+- SCTP_CONNTRACK_HEARTBEAT_ACKED,
++ SCTP_CONNTRACK_HEARTBEAT_ACKED, /* no longer used */
+ SCTP_CONNTRACK_MAX
+ };
+
+diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
+index e94d1fa554cb2..07871c8a06014 100644
+--- a/include/uapi/linux/netfilter/nf_tables.h
++++ b/include/uapi/linux/netfilter/nf_tables.h
+@@ -753,11 +753,13 @@ enum nft_dynset_attributes {
+ * @NFT_PAYLOAD_LL_HEADER: link layer header
+ * @NFT_PAYLOAD_NETWORK_HEADER: network header
+ * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header
++ * @NFT_PAYLOAD_INNER_HEADER: inner header / payload
+ */
+ enum nft_payload_bases {
+ NFT_PAYLOAD_LL_HEADER,
+ NFT_PAYLOAD_NETWORK_HEADER,
+ NFT_PAYLOAD_TRANSPORT_HEADER,
++ NFT_PAYLOAD_INNER_HEADER,
+ };
+
+ /**
+diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
+index 6b20fb22717b2..aa805e6d4e284 100644
+--- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
++++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
+@@ -94,7 +94,7 @@ enum ctattr_timeout_sctp {
+ CTA_TIMEOUT_SCTP_SHUTDOWN_RECD,
+ CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
+ CTA_TIMEOUT_SCTP_HEARTBEAT_SENT,
+- CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED,
++ CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, /* no longer used */
+ __CTA_TIMEOUT_SCTP_MAX
+ };
+ #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1)
+diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h
+index 49ddcdc61c094..7bfb31a66fc9b 100644
+--- a/include/uapi/linux/netfilter/xt_IDLETIMER.h
++++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h
+@@ -1,6 +1,5 @@
++/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+ /*
+- * linux/include/linux/netfilter/xt_IDLETIMER.h
+- *
+ * Header file for Xtables timer target module.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+@@ -10,20 +9,6 @@
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * version 2 as published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope that it will be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+- * General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+- * 02110-1301 USA
+ */
+
+ #ifndef _XT_IDLETIMER_H
+diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h
+deleted file mode 100644
+index 3c77f54560f21..0000000000000
+--- a/include/uapi/linux/netfilter_decnet.h
++++ /dev/null
+@@ -1,72 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+-#ifndef __LINUX_DECNET_NETFILTER_H
+-#define __LINUX_DECNET_NETFILTER_H
+-
+-/* DECnet-specific defines for netfilter.
+- * This file (C) Steve Whitehouse 1999 derived from the
+- * ipv4 netfilter header file which is
+- * (C)1998 Rusty Russell -- This code is GPL.
+- */
+-
+-#include <linux/netfilter.h>
+-
+-/* only for userspace compatibility */
+-#ifndef __KERNEL__
+-
+-#include <limits.h> /* for INT_MIN, INT_MAX */
+-
+-/* kernel define is in netfilter_defs.h */
+-#define NF_DN_NUMHOOKS 7
+-#endif /* ! __KERNEL__ */
+-
+-/* DECnet Hooks */
+-/* After promisc drops, checksum checks. */
+-#define NF_DN_PRE_ROUTING 0
+-/* If the packet is destined for this box. */
+-#define NF_DN_LOCAL_IN 1
+-/* If the packet is destined for another interface. */
+-#define NF_DN_FORWARD 2
+-/* Packets coming from a local process. */
+-#define NF_DN_LOCAL_OUT 3
+-/* Packets about to hit the wire. */
+-#define NF_DN_POST_ROUTING 4
+-/* Input Hello Packets */
+-#define NF_DN_HELLO 5
+-/* Input Routing Packets */
+-#define NF_DN_ROUTE 6
+-
+-enum nf_dn_hook_priorities {
+- NF_DN_PRI_FIRST = INT_MIN,
+- NF_DN_PRI_CONNTRACK = -200,
+- NF_DN_PRI_MANGLE = -150,
+- NF_DN_PRI_NAT_DST = -100,
+- NF_DN_PRI_FILTER = 0,
+- NF_DN_PRI_NAT_SRC = 100,
+- NF_DN_PRI_DNRTMSG = 200,
+- NF_DN_PRI_LAST = INT_MAX,
+-};
+-
+-struct nf_dn_rtmsg {
+- int nfdn_ifindex;
+-};
+-
+-#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)))
+-
+-#ifndef __KERNEL__
+-/* backwards compatibility for userspace */
+-#define DNRMG_L1_GROUP 0x01
+-#define DNRMG_L2_GROUP 0x02
+-#endif
+-
+-enum {
+- DNRNG_NLGRP_NONE,
+-#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE
+- DNRNG_NLGRP_L1,
+-#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1
+- DNRNG_NLGRP_L2,
+-#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2
+- __DNRNG_NLGRP_MAX
+-};
+-#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1)
+-
+-#endif /*__LINUX_DECNET_NETFILTER_H*/
+diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
+index 4c0cde075c277..4940a93315995 100644
+--- a/include/uapi/linux/netlink.h
++++ b/include/uapi/linux/netlink.h
+@@ -20,7 +20,7 @@
+ #define NETLINK_CONNECTOR 11
+ #define NETLINK_NETFILTER 12 /* netfilter subsystem */
+ #define NETLINK_IP6_FW 13
+-#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
++#define NETLINK_DNRTMSG 14 /* DECnet routing messages (obsolete) */
+ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+ #define NETLINK_GENERIC 16
+ /* leave room for NETLINK_DM (DM Events) */
+diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
+index f6e3c8c9c7449..4fa4e979e948a 100644
+--- a/include/uapi/linux/nfc.h
++++ b/include/uapi/linux/nfc.h
+@@ -263,7 +263,7 @@ enum nfc_sdp_attr {
+ #define NFC_SE_ENABLED 0x1
+
+ struct sockaddr_nfc {
+- sa_family_t sa_family;
++ __kernel_sa_family_t sa_family;
+ __u32 dev_idx;
+ __u32 target_idx;
+ __u32 nfc_protocol;
+@@ -271,14 +271,14 @@ struct sockaddr_nfc {
+
+ #define NFC_LLCP_MAX_SERVICE_NAME 63
+ struct sockaddr_nfc_llcp {
+- sa_family_t sa_family;
++ __kernel_sa_family_t sa_family;
+ __u32 dev_idx;
+ __u32 target_idx;
+ __u32 nfc_protocol;
+ __u8 dsap; /* Destination SAP, if known */
+ __u8 ssap; /* Source SAP to be bound to */
+ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */;
+- size_t service_name_len;
++ __kernel_size_t service_name_len;
+ };
+
+ /* NFC socket protocols */
+diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h
+index 87b55755f4ffe..d9db7ad438908 100644
+--- a/include/uapi/linux/omap3isp.h
++++ b/include/uapi/linux/omap3isp.h
+@@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config {
+ * struct omap3isp_stat_data - Statistic data sent to or received from user
+ * @ts: Timestamp of returned framestats.
+ * @buf: Pointer to pass to user.
++ * @buf_size: Size of buffer.
+ * @frame_number: Frame number of requested stats.
+ * @cur_frame: Current frame number being processed.
+ * @config_counter: Number of the configuration associated with the data.
+@@ -176,10 +177,12 @@ struct omap3isp_stat_data {
+ struct timeval ts;
+ #endif
+ void __user *buf;
+- __u32 buf_size;
+- __u16 frame_number;
+- __u16 cur_frame;
+- __u16 config_counter;
++ __struct_group(/* no tag */, frame, /* no attrs */,
++ __u32 buf_size;
++ __u16 frame_number;
++ __u16 cur_frame;
++ __u16 config_counter;
++ );
+ };
+
+ #ifdef __KERNEL__
+@@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 {
+ __s32 tv_usec;
+ } ts;
+ __u32 buf;
+- __u32 buf_size;
+- __u16 frame_number;
+- __u16 cur_frame;
+- __u16 config_counter;
++ __struct_group(/* no tag */, frame, /* no attrs */,
++ __u32 buf_size;
++ __u16 frame_number;
++ __u16 cur_frame;
++ __u16 config_counter;
++ );
+ };
+ #endif
+
+diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
+index e709ae8235e7f..ff6ccbc6efe96 100644
+--- a/include/uapi/linux/pci_regs.h
++++ b/include/uapi/linux/pci_regs.h
+@@ -504,6 +504,12 @@
+ #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */
+ #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
+ #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */
++#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */
++#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */
++#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */
++#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */
++#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */
++#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */
+ #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */
+ #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */
+ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */
+diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h
+index 9b77cfc42efa3..db6c8588c1d0c 100644
+--- a/include/uapi/linux/rfkill.h
++++ b/include/uapi/linux/rfkill.h
+@@ -159,8 +159,16 @@ struct rfkill_event_ext {
+ * old behaviour for all userspace, unless it explicitly opts in to the
+ * rules outlined here by using the new &struct rfkill_event_ext.
+ *
+- * Userspace using &struct rfkill_event_ext must adhere to the following
+- * rules
++ * Additionally, some other userspace (bluez, g-s-d) was reading with a
++ * large size but as streaming reads rather than message-based, or with
++ * too strict checks for the returned size. So eventually, we completely
++ * reverted this, and extended messages need to be opted in to by using
++ * an ioctl:
++ *
++ * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext));
++ *
++ * Userspace using &struct rfkill_event_ext and the ioctl must adhere to
++ * the following rules:
+ *
+ * 1. accept short writes, optionally using them to detect that it's
+ * running on an older kernel;
+@@ -175,6 +183,8 @@ struct rfkill_event_ext {
+ #define RFKILL_IOC_MAGIC 'R'
+ #define RFKILL_IOC_NOINPUT 1
+ #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT)
++#define RFKILL_IOC_MAX_SIZE 2
++#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32)
+
+ /* and that's all userspace gets */
+
+diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
+index 9a402fdb60e97..77ee207623a9b 100644
+--- a/include/uapi/linux/rseq.h
++++ b/include/uapi/linux/rseq.h
+@@ -105,23 +105,11 @@ struct rseq {
+ * Read and set by the kernel. Set by user-space with single-copy
+ * atomicity semantics. This field should only be updated by the
+ * thread which registered this data structure. Aligned on 64-bit.
++ *
++ * 32-bit architectures should update the low order bits of the
++ * rseq_cs field, leaving the high order bits initialized to 0.
+ */
+- union {
+- __u64 ptr64;
+-#ifdef __LP64__
+- __u64 ptr;
+-#else
+- struct {
+-#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
+- __u32 padding; /* Initialized to zero. */
+- __u32 ptr32;
+-#else /* LITTLE */
+- __u32 ptr32;
+- __u32 padding; /* Initialized to zero. */
+-#endif /* ENDIAN */
+- } ptr;
+-#endif
+- } rseq_cs;
++ __u64 rseq_cs;
+
+ /*
+ * Restartable sequences flags field.
+diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
+index c4042dcfdc0c3..8885e69178bd7 100644
+--- a/include/uapi/linux/serial_core.h
++++ b/include/uapi/linux/serial_core.h
+@@ -68,6 +68,9 @@
+ /* NVIDIA Tegra Combined UART */
+ #define PORT_TEGRA_TCU 41
+
++/* ASPEED AST2x00 virtual UART */
++#define PORT_ASPEED_VUART 42
++
+ /* Intel EG20 */
+ #define PORT_PCH_8LINE 44
+ #define PORT_PCH_2LINE 45
+diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
+index ee8220f8dcf5f..7837ba4fe7289 100644
+--- a/include/uapi/linux/stddef.h
++++ b/include/uapi/linux/stddef.h
+@@ -1,6 +1,47 @@
+ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++#ifndef _UAPI_LINUX_STDDEF_H
++#define _UAPI_LINUX_STDDEF_H
++
+ #include <linux/compiler_types.h>
+
+ #ifndef __always_inline
+ #define __always_inline inline
+ #endif
++
++/**
++ * __struct_group() - Create a mirrored named and anonyomous struct
++ *
++ * @TAG: The tag name for the named sub-struct (usually empty)
++ * @NAME: The identifier name of the mirrored sub-struct
++ * @ATTRS: Any struct attributes (usually empty)
++ * @MEMBERS: The member declarations for the mirrored structs
++ *
++ * Used to create an anonymous union of two structs with identical layout
++ * and size: one anonymous and one named. The former's members can be used
++ * normally without sub-struct naming, and the latter can be used to
++ * reason about the start, end, and size of the group of struct members.
++ * The named struct can also be explicitly tagged for layer reuse, as well
++ * as both having struct attributes appended.
++ */
++#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
++ union { \
++ struct { MEMBERS } ATTRS; \
++ struct TAG { MEMBERS } ATTRS NAME; \
++ }
++
++/**
++ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
++ *
++ * @TYPE: The type of each flexible array element
++ * @NAME: The name of the flexible array member
++ *
++ * In order to have a flexible array member in a union or alone in a
++ * struct, it needs to be wrapped in an anonymous struct with at least 1
++ * named member, but that member can be empty.
++ */
++#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \
++ struct { \
++ struct { } __empty_ ## NAME; \
++ TYPE NAME[]; \
++ }
++#endif
+diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
+index 7272f85d6d6ab..3736f2fe15418 100644
+--- a/include/uapi/linux/swab.h
++++ b/include/uapi/linux/swab.h
+@@ -3,7 +3,7 @@
+ #define _UAPI_LINUX_SWAB_H
+
+ #include <linux/types.h>
+-#include <linux/compiler.h>
++#include <linux/stddef.h>
+ #include <asm/bitsperlong.h>
+ #include <asm/swab.h>
+
+diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
+index ee2dcfb3d6602..d7f7c04a6e0c1 100644
+--- a/include/uapi/linux/sync_file.h
++++ b/include/uapi/linux/sync_file.h
+@@ -52,7 +52,7 @@ struct sync_fence_info {
+ * @name: name of fence
+ * @status: status of fence. 1: signaled 0:active <0:error
+ * @flags: sync_file_info flags
+- * @num_fences number of fences in the sync_file
++ * @num_fences: number of fences in the sync_file
+ * @pad: padding for 64-bit alignment, should always be zero
+ * @sync_fence_info: pointer to array of structs sync_fence_info with all
+ * fences in the sync_file
+diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h
+index bfdae12cdacf8..c58854fb7d94a 100644
+--- a/include/uapi/linux/usb/video.h
++++ b/include/uapi/linux/usb/video.h
+@@ -179,6 +179,36 @@
+ #define UVC_CONTROL_CAP_AUTOUPDATE (1 << 3)
+ #define UVC_CONTROL_CAP_ASYNCHRONOUS (1 << 4)
+
++/* 3.9.2.6 Color Matching Descriptor Values */
++enum uvc_color_primaries_values {
++ UVC_COLOR_PRIMARIES_UNSPECIFIED,
++ UVC_COLOR_PRIMARIES_BT_709_SRGB,
++ UVC_COLOR_PRIMARIES_BT_470_2_M,
++ UVC_COLOR_PRIMARIES_BT_470_2_B_G,
++ UVC_COLOR_PRIMARIES_SMPTE_170M,
++ UVC_COLOR_PRIMARIES_SMPTE_240M,
++};
++
++enum uvc_transfer_characteristics_values {
++ UVC_TRANSFER_CHARACTERISTICS_UNSPECIFIED,
++ UVC_TRANSFER_CHARACTERISTICS_BT_709,
++ UVC_TRANSFER_CHARACTERISTICS_BT_470_2_M,
++ UVC_TRANSFER_CHARACTERISTICS_BT_470_2_B_G,
++ UVC_TRANSFER_CHARACTERISTICS_SMPTE_170M,
++ UVC_TRANSFER_CHARACTERISTICS_SMPTE_240M,
++ UVC_TRANSFER_CHARACTERISTICS_LINEAR,
++ UVC_TRANSFER_CHARACTERISTICS_SRGB,
++};
++
++enum uvc_matrix_coefficients {
++ UVC_MATRIX_COEFFICIENTS_UNSPECIFIED,
++ UVC_MATRIX_COEFFICIENTS_BT_709,
++ UVC_MATRIX_COEFFICIENTS_FCC,
++ UVC_MATRIX_COEFFICIENTS_BT_470_2_B_G,
++ UVC_MATRIX_COEFFICIENTS_SMPTE_170M,
++ UVC_MATRIX_COEFFICIENTS_SMPTE_240M,
++};
++
+ /* ------------------------------------------------------------------------
+ * UVC structures
+ */
+diff --git a/include/uapi/linux/uvcvideo.h b/include/uapi/linux/uvcvideo.h
+index 8288137387c0d..a9d0a64007ba5 100644
+--- a/include/uapi/linux/uvcvideo.h
++++ b/include/uapi/linux/uvcvideo.h
+@@ -86,7 +86,7 @@ struct uvc_xu_control_query {
+ * struct. The first two fields are added by the driver, they can be used for
+ * clock synchronisation. The rest is an exact copy of a UVC payload header.
+ * Only complete objects with complete buffers are included. Therefore it's
+- * always sizeof(meta->ts) + sizeof(meta->sof) + meta->length bytes large.
++ * always sizeof(meta->ns) + sizeof(meta->sof) + meta->length bytes large.
+ */
+ struct uvc_meta_buf {
+ __u64 ns;
+diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
+index 9260791b8438f..f5c6758464f25 100644
+--- a/include/uapi/linux/videodev2.h
++++ b/include/uapi/linux/videodev2.h
+@@ -1560,7 +1560,8 @@ struct v4l2_bt_timings {
+ ((bt)->width + V4L2_DV_BT_BLANKING_WIDTH(bt))
+ #define V4L2_DV_BT_BLANKING_HEIGHT(bt) \
+ ((bt)->vfrontporch + (bt)->vsync + (bt)->vbackporch + \
+- (bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch)
++ ((bt)->interlaced ? \
++ ((bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) : 0))
+ #define V4L2_DV_BT_FRAME_HEIGHT(bt) \
+ ((bt)->height + V4L2_DV_BT_BLANKING_HEIGHT(bt))
+
+@@ -1651,7 +1652,7 @@ struct v4l2_input {
+ __u8 name[32]; /* Label */
+ __u32 type; /* Type of input */
+ __u32 audioset; /* Associated audios (bitfield) */
+- __u32 tuner; /* enum v4l2_tuner_type */
++ __u32 tuner; /* Tuner index */
+ v4l2_std_id std;
+ __u32 status;
+ __u32 capabilities;
+diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
+index 80d76b75bccd9..7aa2eb7662050 100644
+--- a/include/uapi/linux/virtio_ids.h
++++ b/include/uapi/linux/virtio_ids.h
+@@ -73,12 +73,12 @@
+ * Virtio Transitional IDs
+ */
+
+-#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */
+-#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */
+-#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */
+-#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */
+-#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */
+-#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */
+-#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */
++#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */
++#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */
++#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */
++#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */
++#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */
++#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */
++#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */
+
+ #endif /* _LINUX_VIRTIO_IDS_H */
+diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
+index eda0426ec4c2b..65e13a099b1a0 100644
+--- a/include/uapi/linux/xfrm.h
++++ b/include/uapi/linux/xfrm.h
+@@ -313,6 +313,7 @@ enum xfrm_attr_type_t {
+ XFRMA_SET_MARK, /* __u32 */
+ XFRMA_SET_MARK_MASK, /* __u32 */
+ XFRMA_IF_ID, /* __u32 */
++ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */
+ __XFRMA_MAX
+
+ #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */
+@@ -510,6 +511,12 @@ struct xfrm_user_offload {
+ int ifindex;
+ __u8 flags;
+ };
++/* This flag was exposed without any kernel code that supporting it.
++ * Unfortunately, strongswan has the code that uses sets this flag,
++ * which makes impossible to reuse this bit.
++ *
++ * So leave it here to make sure that it won't be reused by mistake.
++ */
+ #define XFRM_OFFLOAD_IPV6 1
+ #define XFRM_OFFLOAD_INBOUND 2
+
+diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
+index 86be4a92b67bf..a96b7d2770e15 100644
+--- a/include/uapi/rdma/mlx5-abi.h
++++ b/include/uapi/rdma/mlx5-abi.h
+@@ -104,6 +104,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask {
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2,
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS = 1UL << 3,
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS = 1UL << 4,
++ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG = 1UL << 5,
+ };
+
+ enum mlx5_user_cmds_supp_uhw {
+diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h
+index a75e14edc957e..dbd60f48b4b01 100644
+--- a/include/uapi/sound/asequencer.h
++++ b/include/uapi/sound/asequencer.h
+@@ -344,10 +344,10 @@ typedef int __bitwise snd_seq_client_type_t;
+ #define KERNEL_CLIENT ((__force snd_seq_client_type_t) 2)
+
+ /* event filter flags */
+-#define SNDRV_SEQ_FILTER_BROADCAST (1<<0) /* accept broadcast messages */
+-#define SNDRV_SEQ_FILTER_MULTICAST (1<<1) /* accept multicast messages */
+-#define SNDRV_SEQ_FILTER_BOUNCE (1<<2) /* accept bounce event in error */
+-#define SNDRV_SEQ_FILTER_USE_EVENT (1<<31) /* use event filter */
++#define SNDRV_SEQ_FILTER_BROADCAST (1U<<0) /* accept broadcast messages */
++#define SNDRV_SEQ_FILTER_MULTICAST (1U<<1) /* accept multicast messages */
++#define SNDRV_SEQ_FILTER_BOUNCE (1U<<2) /* accept bounce event in error */
++#define SNDRV_SEQ_FILTER_USE_EVENT (1U<<31) /* use event filter */
+
+ struct snd_seq_client_info {
+ int client; /* client number to inquire */
+diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
+index 5859ca0a1439b..93e40f91bd49a 100644
+--- a/include/uapi/sound/asound.h
++++ b/include/uapi/sound/asound.h
+@@ -56,8 +56,10 @@
+ * *
+ ****************************************************************************/
+
++#define AES_IEC958_STATUS_SIZE 24
++
+ struct snd_aes_iec958 {
+- unsigned char status[24]; /* AES/IEC958 channel status bits */
++ unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
+ unsigned char subcode[147]; /* AES/IEC958 subcode bits */
+ unsigned char pad; /* nothing */
+ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */
+diff --git a/include/uapi/sound/skl-tplg-interface.h b/include/uapi/sound/skl-tplg-interface.h
+index a93c0decfdd53..215ce16b37d2b 100644
+--- a/include/uapi/sound/skl-tplg-interface.h
++++ b/include/uapi/sound/skl-tplg-interface.h
+@@ -66,7 +66,8 @@ enum skl_ch_cfg {
+ SKL_CH_CFG_DUAL_MONO = 9,
+ SKL_CH_CFG_I2S_DUAL_STEREO_0 = 10,
+ SKL_CH_CFG_I2S_DUAL_STEREO_1 = 11,
+- SKL_CH_CFG_4_CHANNEL = 12,
++ SKL_CH_CFG_7_1 = 12,
++ SKL_CH_CFG_4_CHANNEL = SKL_CH_CFG_7_1,
+ SKL_CH_CFG_INVALID
+ };
+
+diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h
+index e1126a74882a5..eff166fdd81b9 100644
+--- a/include/video/of_display_timing.h
++++ b/include/video/of_display_timing.h
+@@ -8,6 +8,8 @@
+ #ifndef __LINUX_OF_DISPLAY_TIMING_H
+ #define __LINUX_OF_DISPLAY_TIMING_H
+
++#include <linux/errno.h>
++
+ struct device_node;
+ struct display_timing;
+ struct display_timings;
+diff --git a/include/xen/events.h b/include/xen/events.h
+index c204262d9fc24..344081e71584b 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -17,6 +17,7 @@ struct xenbus_device;
+ unsigned xen_evtchn_nr_channels(void);
+
+ int bind_evtchn_to_irq(evtchn_port_t evtchn);
++int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
+ int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags, const char *devname,
+diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
+index cb854df031ce0..c9fea9389ebec 100644
+--- a/include/xen/grant_table.h
++++ b/include/xen/grant_table.h
+@@ -104,17 +104,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
+ * access has been ended, free the given page too. Access will be ended
+ * immediately iff the grant entry is not in use, otherwise it will happen
+ * some time later. page may be 0, in which case no freeing will occur.
++ * Note that the granted page might still be accessed (read or write) by the
++ * other side after gnttab_end_foreign_access() returns, so even if page was
++ * specified as 0 it is not allowed to just reuse the page for other
++ * purposes immediately. gnttab_end_foreign_access() will take an additional
++ * reference to the granted page in this case, which is dropped only after
++ * the grant is no longer in use.
++ * This requires that multi page allocations for areas subject to
++ * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing
++ * via free_pages_exact()) in order to avoid high order pages.
+ */
+ void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
+ unsigned long page);
+
++/*
++ * End access through the given grant reference, iff the grant entry is
++ * no longer in use. In case of success ending foreign access, the
++ * grant reference is deallocated.
++ * Return 1 if the grant entry was freed, 0 if it is still in use.
++ */
++int gnttab_try_end_foreign_access(grant_ref_t ref);
++
+ int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
+
+ unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
+ unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
+
+-int gnttab_query_foreign_access(grant_ref_t ref);
+-
+ /*
+ * operations on reserved batches of grant references
+ */
+diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
+index 732efb08c3e17..744bc41355678 100644
+--- a/include/xen/interface/platform.h
++++ b/include/xen/interface/platform.h
+@@ -500,6 +500,8 @@ struct xenpf_symdata {
+ };
+ DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
+
++#define XENPF_get_dom0_console 64
++
+ struct xen_platform_op {
+ uint32_t cmd;
+ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+@@ -523,6 +525,7 @@ struct xen_platform_op {
+ struct xenpf_mem_hotadd mem_add;
+ struct xenpf_core_parking core_parking;
+ struct xenpf_symdata symdata;
++ struct dom0_vga_console_info dom0_console;
+ uint8_t pad[128];
+ } u;
+ };
+diff --git a/include/xen/xen.h b/include/xen/xen.h
+index 43efba045acc7..5a6a2ab675bed 100644
+--- a/include/xen/xen.h
++++ b/include/xen/xen.h
+@@ -61,4 +61,15 @@ void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages);
+ #include <xen/balloon.h>
+ #endif
+
++#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) && defined(CONFIG_X86)
++bool __init xen_processor_present(uint32_t acpi_id);
++#else
++#include <linux/bug.h>
++static inline bool xen_processor_present(uint32_t acpi_id)
++{
++ BUG();
++ return false;
++}
++#endif
++
+ #endif /* _XEN_XEN_H */
+diff --git a/init/Kconfig b/init/Kconfig
+index 11f8a845f259d..dafc3ba6fa7a1 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -77,6 +77,11 @@ config CC_HAS_ASM_GOTO_OUTPUT
+ depends on CC_HAS_ASM_GOTO
+ def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
+
++config CC_HAS_ASM_GOTO_TIED_OUTPUT
++ depends on CC_HAS_ASM_GOTO_OUTPUT
++ # Detect buggy gcc and clang, fixed in gcc-11 clang-14.
++ def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
++
+ config TOOLS_SUPPORT_RELR
+ def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
+
+@@ -86,6 +91,10 @@ config CC_HAS_ASM_INLINE
+ config CC_HAS_NO_PROFILE_FN_ATTR
+ def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror)
+
++config PAHOLE_VERSION
++ int
++ default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE))
++
+ config CONSTRUCTORS
+ bool
+
+diff --git a/init/main.c b/init/main.c
+index 3c4054a955458..63737af8de51e 100644
+--- a/init/main.c
++++ b/init/main.c
+@@ -96,13 +96,13 @@
+ #include <linux/cache.h>
+ #include <linux/rodata_test.h>
+ #include <linux/jump_label.h>
+-#include <linux/mem_encrypt.h>
+ #include <linux/kcsan.h>
+ #include <linux/init_syscalls.h>
+ #include <linux/stackdepot.h>
++#include <linux/randomize_kstack.h>
++#include <net/net_namespace.h>
+
+ #include <asm/io.h>
+-#include <asm/bugs.h>
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
+@@ -786,8 +786,6 @@ void __init __weak thread_stack_cache_init(void)
+ }
+ #endif
+
+-void __init __weak mem_encrypt_init(void) { }
+-
+ void __init __weak poking_init(void) { }
+
+ void __init __weak pgtable_cache_init(void) { }
+@@ -857,6 +855,7 @@ static void __init mm_init(void)
+ init_espfix_bsp();
+ /* Should be run after espfix64 is set up. */
+ pti_init();
++ mm_cache_init();
+ }
+
+ #ifdef CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+@@ -924,7 +923,9 @@ static void __init print_unknown_bootoptions(void)
+ for (p = &envp_init[2]; *p; p++)
+ end += sprintf(end, " %s", *p);
+
+- pr_notice("Unknown command line parameters:%s\n", unknown_options);
++ /* Start at unknown_options[1] to skip the initial space */
++ pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n",
++ &unknown_options[1]);
+ memblock_free_ptr(unknown_options, len);
+ }
+
+@@ -987,7 +988,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+ sort_main_extable();
+ trap_init();
+ mm_init();
+-
++ poking_init();
+ ftrace_init();
+
+ /* trace_printk can be enabled here */
+@@ -1038,21 +1039,18 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+ softirq_init();
+ timekeeping_init();
+ kfence_init();
++ time_init();
+
+ /*
+ * For best initial stack canary entropy, prepare it after:
+ * - setup_arch() for any UEFI RNG entropy and boot cmdline access
+- * - timekeeping_init() for ktime entropy used in rand_initialize()
+- * - rand_initialize() to get any arch-specific entropy like RDRAND
+- * - add_latent_entropy() to get any latent entropy
+- * - adding command line entropy
++ * - timekeeping_init() for ktime entropy used in random_init()
++ * - time_init() for making random_get_entropy() work on some platforms
++ * - random_init() to initialize the RNG from from early entropy sources
+ */
+- rand_initialize();
+- add_latent_entropy();
+- add_device_randomness(command_line, strlen(command_line));
++ random_init(command_line);
+ boot_init_stack_canary();
+
+- time_init();
+ perf_event_init();
+ profile_init();
+ call_function_init();
+@@ -1082,14 +1080,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+ */
+ locking_selftest();
+
+- /*
+- * This needs to be called before any devices perform DMA
+- * operations that might use the SWIOTLB bounce buffers. It will
+- * mark the bounce buffers as decrypted so that their usage will
+- * not cause "plain-text" data to be decrypted when accessed.
+- */
+- mem_encrypt_init();
+-
+ #ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start && !initrd_below_start_ok &&
+ page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
+@@ -1106,6 +1096,9 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+ late_time_init();
+ sched_clock_init();
+ calibrate_delay();
++
++ arch_cpu_finalize_init();
++
+ pid_idr_init();
+ anon_vma_init();
+ #ifdef CONFIG_X86
+@@ -1120,6 +1113,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+ key_init();
+ security_init();
+ dbg_late_init();
++ net_ns_init();
+ vfs_caches_init();
+ pagecache_init();
+ signals_init();
+@@ -1131,9 +1125,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
+ taskstats_init_early();
+ delayacct_init();
+
+- poking_init();
+- check_bugs();
+-
+ acpi_subsystem_init();
+ arch_post_acpi_subsys_init();
+ kcsan_init();
+@@ -1194,7 +1185,7 @@ static int __init initcall_blacklist(char *str)
+ }
+ } while (str_entry);
+
+- return 0;
++ return 1;
+ }
+
+ static bool __init_or_module initcall_blacklisted(initcall_t fn)
+@@ -1456,7 +1447,9 @@ static noinline void __init kernel_init_freeable(void);
+ bool rodata_enabled __ro_after_init = true;
+ static int __init set_debug_rodata(char *str)
+ {
+- return strtobool(str, &rodata_enabled);
++ if (strtobool(str, &rodata_enabled))
++ pr_warn("Invalid option string for rodata: '%s'\n", str);
++ return 1;
+ }
+ __setup("rodata=", set_debug_rodata);
+ #endif
+diff --git a/io_uring/Makefile b/io_uring/Makefile
+new file mode 100644
+index 0000000000000..3680425df9478
+--- /dev/null
++++ b/io_uring/Makefile
+@@ -0,0 +1,6 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Makefile for io_uring
++
++obj-$(CONFIG_IO_URING) += io_uring.o
++obj-$(CONFIG_IO_WQ) += io-wq.o
+diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
+new file mode 100644
+index 0000000000000..fe8594a0396ca
+--- /dev/null
++++ b/io_uring/io-wq.c
+@@ -0,0 +1,1414 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Basic worker thread pool for io_uring
++ *
++ * Copyright (C) 2019 Jens Axboe
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/errno.h>
++#include <linux/sched/signal.h>
++#include <linux/percpu.h>
++#include <linux/slab.h>
++#include <linux/rculist_nulls.h>
++#include <linux/cpu.h>
++#include <linux/tracehook.h>
++#include <uapi/linux/io_uring.h>
++
++#include "io-wq.h"
++
++#define WORKER_IDLE_TIMEOUT (5 * HZ)
++
++enum {
++ IO_WORKER_F_UP = 1, /* up and active */
++ IO_WORKER_F_RUNNING = 2, /* account as running */
++ IO_WORKER_F_FREE = 4, /* worker on free list */
++ IO_WORKER_F_BOUND = 8, /* is doing bounded work */
++};
++
++enum {
++ IO_WQ_BIT_EXIT = 0, /* wq exiting */
++};
++
++enum {
++ IO_ACCT_STALLED_BIT = 0, /* stalled on hash */
++};
++
++/*
++ * One for each thread in a wqe pool
++ */
++struct io_worker {
++ refcount_t ref;
++ unsigned flags;
++ struct hlist_nulls_node nulls_node;
++ struct list_head all_list;
++ struct task_struct *task;
++ struct io_wqe *wqe;
++
++ struct io_wq_work *cur_work;
++ spinlock_t lock;
++
++ struct completion ref_done;
++
++ unsigned long create_state;
++ struct callback_head create_work;
++ int create_index;
++
++ union {
++ struct rcu_head rcu;
++ struct work_struct work;
++ };
++};
++
++#if BITS_PER_LONG == 64
++#define IO_WQ_HASH_ORDER 6
++#else
++#define IO_WQ_HASH_ORDER 5
++#endif
++
++#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
++
++struct io_wqe_acct {
++ unsigned nr_workers;
++ unsigned max_workers;
++ int index;
++ atomic_t nr_running;
++ struct io_wq_work_list work_list;
++ unsigned long flags;
++};
++
++enum {
++ IO_WQ_ACCT_BOUND,
++ IO_WQ_ACCT_UNBOUND,
++ IO_WQ_ACCT_NR,
++};
++
++/*
++ * Per-node worker thread pool
++ */
++struct io_wqe {
++ raw_spinlock_t lock;
++ struct io_wqe_acct acct[2];
++
++ int node;
++
++ struct hlist_nulls_head free_list;
++ struct list_head all_list;
++
++ struct wait_queue_entry wait;
++
++ struct io_wq *wq;
++ struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
++
++ cpumask_var_t cpu_mask;
++};
++
++/*
++ * Per io_wq state
++ */
++struct io_wq {
++ unsigned long state;
++
++ free_work_fn *free_work;
++ io_wq_work_fn *do_work;
++
++ struct io_wq_hash *hash;
++
++ atomic_t worker_refs;
++ struct completion worker_done;
++
++ struct hlist_node cpuhp_node;
++
++ struct task_struct *task;
++
++ struct io_wqe *wqes[];
++};
++
++static enum cpuhp_state io_wq_online;
++
++struct io_cb_cancel_data {
++ work_cancel_fn *fn;
++ void *data;
++ int nr_running;
++ int nr_pending;
++ bool cancel_all;
++};
++
++static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
++static void io_wqe_dec_running(struct io_worker *worker);
++static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
++ struct io_wqe_acct *acct,
++ struct io_cb_cancel_data *match);
++static void create_worker_cb(struct callback_head *cb);
++static void io_wq_cancel_tw_create(struct io_wq *wq);
++
++static bool io_worker_get(struct io_worker *worker)
++{
++ return refcount_inc_not_zero(&worker->ref);
++}
++
++static void io_worker_release(struct io_worker *worker)
++{
++ if (refcount_dec_and_test(&worker->ref))
++ complete(&worker->ref_done);
++}
++
++static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound)
++{
++ return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND];
++}
++
++static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
++ struct io_wq_work *work)
++{
++ return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND));
++}
++
++static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker)
++{
++ return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND);
++}
++
++static void io_worker_ref_put(struct io_wq *wq)
++{
++ if (atomic_dec_and_test(&wq->worker_refs))
++ complete(&wq->worker_done);
++}
++
++bool io_wq_worker_stopped(void)
++{
++ struct io_worker *worker = current->pf_io_worker;
++
++ if (WARN_ON_ONCE(!io_wq_current_is_worker()))
++ return true;
++
++ return test_bit(IO_WQ_BIT_EXIT, &worker->wqe->wq->state);
++}
++
++static void io_worker_cancel_cb(struct io_worker *worker)
++{
++ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
++ struct io_wqe *wqe = worker->wqe;
++ struct io_wq *wq = wqe->wq;
++
++ atomic_dec(&acct->nr_running);
++ raw_spin_lock(&worker->wqe->lock);
++ acct->nr_workers--;
++ raw_spin_unlock(&worker->wqe->lock);
++ io_worker_ref_put(wq);
++ clear_bit_unlock(0, &worker->create_state);
++ io_worker_release(worker);
++}
++
++static bool io_task_worker_match(struct callback_head *cb, void *data)
++{
++ struct io_worker *worker;
++
++ if (cb->func != create_worker_cb)
++ return false;
++ worker = container_of(cb, struct io_worker, create_work);
++ return worker == data;
++}
++
++static void io_worker_exit(struct io_worker *worker)
++{
++ struct io_wqe *wqe = worker->wqe;
++ struct io_wq *wq = wqe->wq;
++
++ while (1) {
++ struct callback_head *cb = task_work_cancel_match(wq->task,
++ io_task_worker_match, worker);
++
++ if (!cb)
++ break;
++ io_worker_cancel_cb(worker);
++ }
++
++ if (refcount_dec_and_test(&worker->ref))
++ complete(&worker->ref_done);
++ wait_for_completion(&worker->ref_done);
++
++ raw_spin_lock(&wqe->lock);
++ if (worker->flags & IO_WORKER_F_FREE)
++ hlist_nulls_del_rcu(&worker->nulls_node);
++ list_del_rcu(&worker->all_list);
++ preempt_disable();
++ io_wqe_dec_running(worker);
++ worker->flags = 0;
++ current->flags &= ~PF_IO_WORKER;
++ preempt_enable();
++ raw_spin_unlock(&wqe->lock);
++
++ kfree_rcu(worker, rcu);
++ io_worker_ref_put(wqe->wq);
++ do_exit(0);
++}
++
++static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
++{
++ if (!wq_list_empty(&acct->work_list) &&
++ !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
++ return true;
++ return false;
++}
++
++/*
++ * Check head of free list for an available worker. If one isn't available,
++ * caller must create one.
++ */
++static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
++ struct io_wqe_acct *acct)
++ __must_hold(RCU)
++{
++ struct hlist_nulls_node *n;
++ struct io_worker *worker;
++
++ /*
++ * Iterate free_list and see if we can find an idle worker to
++ * activate. If a given worker is on the free_list but in the process
++ * of exiting, keep trying.
++ */
++ hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
++ if (!io_worker_get(worker))
++ continue;
++ if (io_wqe_get_acct(worker) != acct) {
++ io_worker_release(worker);
++ continue;
++ }
++ if (wake_up_process(worker->task)) {
++ io_worker_release(worker);
++ return true;
++ }
++ io_worker_release(worker);
++ }
++
++ return false;
++}
++
++/*
++ * We need a worker. If we find a free one, we're good. If not, and we're
++ * below the max number of workers, create one.
++ */
++static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
++{
++ /*
++ * Most likely an attempt to queue unbounded work on an io_wq that
++ * wasn't setup with any unbounded workers.
++ */
++ if (unlikely(!acct->max_workers))
++ pr_warn_once("io-wq is not configured for unbound workers");
++
++ raw_spin_lock(&wqe->lock);
++ if (acct->nr_workers >= acct->max_workers) {
++ raw_spin_unlock(&wqe->lock);
++ return true;
++ }
++ acct->nr_workers++;
++ raw_spin_unlock(&wqe->lock);
++ atomic_inc(&acct->nr_running);
++ atomic_inc(&wqe->wq->worker_refs);
++ return create_io_worker(wqe->wq, wqe, acct->index);
++}
++
++static void io_wqe_inc_running(struct io_worker *worker)
++{
++ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
++
++ atomic_inc(&acct->nr_running);
++}
++
++static void create_worker_cb(struct callback_head *cb)
++{
++ struct io_worker *worker;
++ struct io_wq *wq;
++ struct io_wqe *wqe;
++ struct io_wqe_acct *acct;
++ bool do_create = false;
++
++ worker = container_of(cb, struct io_worker, create_work);
++ wqe = worker->wqe;
++ wq = wqe->wq;
++ acct = &wqe->acct[worker->create_index];
++ raw_spin_lock(&wqe->lock);
++ if (acct->nr_workers < acct->max_workers) {
++ acct->nr_workers++;
++ do_create = true;
++ }
++ raw_spin_unlock(&wqe->lock);
++ if (do_create) {
++ create_io_worker(wq, wqe, worker->create_index);
++ } else {
++ atomic_dec(&acct->nr_running);
++ io_worker_ref_put(wq);
++ }
++ clear_bit_unlock(0, &worker->create_state);
++ io_worker_release(worker);
++}
++
++static bool io_queue_worker_create(struct io_worker *worker,
++ struct io_wqe_acct *acct,
++ task_work_func_t func)
++{
++ struct io_wqe *wqe = worker->wqe;
++ struct io_wq *wq = wqe->wq;
++
++ /* raced with exit, just ignore create call */
++ if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
++ goto fail;
++ if (!io_worker_get(worker))
++ goto fail;
++ /*
++ * create_state manages ownership of create_work/index. We should
++ * only need one entry per worker, as the worker going to sleep
++ * will trigger the condition, and waking will clear it once it
++ * runs the task_work.
++ */
++ if (test_bit(0, &worker->create_state) ||
++ test_and_set_bit_lock(0, &worker->create_state))
++ goto fail_release;
++
++ atomic_inc(&wq->worker_refs);
++ init_task_work(&worker->create_work, func);
++ worker->create_index = acct->index;
++ if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
++ /*
++ * EXIT may have been set after checking it above, check after
++ * adding the task_work and remove any creation item if it is
++ * now set. wq exit does that too, but we can have added this
++ * work item after we canceled in io_wq_exit_workers().
++ */
++ if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
++ io_wq_cancel_tw_create(wq);
++ io_worker_ref_put(wq);
++ return true;
++ }
++ io_worker_ref_put(wq);
++ clear_bit_unlock(0, &worker->create_state);
++fail_release:
++ io_worker_release(worker);
++fail:
++ atomic_dec(&acct->nr_running);
++ io_worker_ref_put(wq);
++ return false;
++}
++
++static void io_wqe_dec_running(struct io_worker *worker)
++ __must_hold(wqe->lock)
++{
++ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
++ struct io_wqe *wqe = worker->wqe;
++
++ if (!(worker->flags & IO_WORKER_F_UP))
++ return;
++
++ if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
++ atomic_inc(&acct->nr_running);
++ atomic_inc(&wqe->wq->worker_refs);
++ raw_spin_unlock(&wqe->lock);
++ io_queue_worker_create(worker, acct, create_worker_cb);
++ raw_spin_lock(&wqe->lock);
++ }
++}
++
++/*
++ * Worker will start processing some work. Move it to the busy list, if
++ * it's currently on the freelist
++ */
++static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
++ struct io_wq_work *work)
++ __must_hold(wqe->lock)
++{
++ if (worker->flags & IO_WORKER_F_FREE) {
++ worker->flags &= ~IO_WORKER_F_FREE;
++ hlist_nulls_del_init_rcu(&worker->nulls_node);
++ }
++}
++
++/*
++ * No work, worker going to sleep. Move to freelist, and unuse mm if we
++ * have one attached. Dropping the mm may potentially sleep, so we drop
++ * the lock in that case and return success. Since the caller has to
++ * retry the loop in that case (we changed task state), we don't regrab
++ * the lock if we return success.
++ */
++static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
++ __must_hold(wqe->lock)
++{
++ if (!(worker->flags & IO_WORKER_F_FREE)) {
++ worker->flags |= IO_WORKER_F_FREE;
++ hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
++ }
++}
++
++static inline unsigned int io_get_work_hash(struct io_wq_work *work)
++{
++ return work->flags >> IO_WQ_HASH_SHIFT;
++}
++
++static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
++{
++ struct io_wq *wq = wqe->wq;
++ bool ret = false;
++
++ spin_lock_irq(&wq->hash->wait.lock);
++ if (list_empty(&wqe->wait.entry)) {
++ __add_wait_queue(&wq->hash->wait, &wqe->wait);
++ if (!test_bit(hash, &wq->hash->map)) {
++ __set_current_state(TASK_RUNNING);
++ list_del_init(&wqe->wait.entry);
++ ret = true;
++ }
++ }
++ spin_unlock_irq(&wq->hash->wait.lock);
++ return ret;
++}
++
++static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
++ struct io_worker *worker)
++ __must_hold(wqe->lock)
++{
++ struct io_wq_work_node *node, *prev;
++ struct io_wq_work *work, *tail;
++ unsigned int stall_hash = -1U;
++ struct io_wqe *wqe = worker->wqe;
++
++ wq_list_for_each(node, prev, &acct->work_list) {
++ unsigned int hash;
++
++ work = container_of(node, struct io_wq_work, list);
++
++ /* not hashed, can run anytime */
++ if (!io_wq_is_hashed(work)) {
++ wq_list_del(&acct->work_list, node, prev);
++ return work;
++ }
++
++ hash = io_get_work_hash(work);
++ /* all items with this hash lie in [work, tail] */
++ tail = wqe->hash_tail[hash];
++
++ /* hashed, can run if not already running */
++ if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
++ wqe->hash_tail[hash] = NULL;
++ wq_list_cut(&acct->work_list, &tail->list, prev);
++ return work;
++ }
++ if (stall_hash == -1U)
++ stall_hash = hash;
++ /* fast forward to a next hash, for-each will fix up @prev */
++ node = &tail->list;
++ }
++
++ if (stall_hash != -1U) {
++ bool unstalled;
++
++ /*
++ * Set this before dropping the lock to avoid racing with new
++ * work being added and clearing the stalled bit.
++ */
++ set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
++ raw_spin_unlock(&wqe->lock);
++ unstalled = io_wait_on_hash(wqe, stall_hash);
++ raw_spin_lock(&wqe->lock);
++ if (unstalled) {
++ clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
++ if (wq_has_sleeper(&wqe->wq->hash->wait))
++ wake_up(&wqe->wq->hash->wait);
++ }
++ }
++
++ return NULL;
++}
++
++static bool io_flush_signals(void)
++{
++ if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
++ __set_current_state(TASK_RUNNING);
++ tracehook_notify_signal();
++ return true;
++ }
++ return false;
++}
++
++static void io_assign_current_work(struct io_worker *worker,
++ struct io_wq_work *work)
++{
++ if (work) {
++ io_flush_signals();
++ cond_resched();
++ }
++
++ spin_lock(&worker->lock);
++ worker->cur_work = work;
++ spin_unlock(&worker->lock);
++}
++
++static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
++
++static void io_worker_handle_work(struct io_worker *worker)
++ __releases(wqe->lock)
++{
++ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
++ struct io_wqe *wqe = worker->wqe;
++ struct io_wq *wq = wqe->wq;
++ bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
++
++ do {
++ struct io_wq_work *work;
++get_next:
++ /*
++ * If we got some work, mark us as busy. If we didn't, but
++ * the list isn't empty, it means we stalled on hashed work.
++ * Mark us stalled so we don't keep looking for work when we
++ * can't make progress, any work completion or insertion will
++ * clear the stalled flag.
++ */
++ work = io_get_next_work(acct, worker);
++ if (work)
++ __io_worker_busy(wqe, worker, work);
++
++ raw_spin_unlock(&wqe->lock);
++ if (!work)
++ break;
++ io_assign_current_work(worker, work);
++ __set_current_state(TASK_RUNNING);
++
++ /* handle a whole dependent link */
++ do {
++ struct io_wq_work *next_hashed, *linked;
++ unsigned int hash = io_get_work_hash(work);
++
++ next_hashed = wq_next_work(work);
++
++ if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND))
++ work->flags |= IO_WQ_WORK_CANCEL;
++ wq->do_work(work);
++ io_assign_current_work(worker, NULL);
++
++ linked = wq->free_work(work);
++ work = next_hashed;
++ if (!work && linked && !io_wq_is_hashed(linked)) {
++ work = linked;
++ linked = NULL;
++ }
++ io_assign_current_work(worker, work);
++ if (linked)
++ io_wqe_enqueue(wqe, linked);
++
++ if (hash != -1U && !next_hashed) {
++ /* serialize hash clear with wake_up() */
++ spin_lock_irq(&wq->hash->wait.lock);
++ clear_bit(hash, &wq->hash->map);
++ clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
++ spin_unlock_irq(&wq->hash->wait.lock);
++ if (wq_has_sleeper(&wq->hash->wait))
++ wake_up(&wq->hash->wait);
++ raw_spin_lock(&wqe->lock);
++ /* skip unnecessary unlock-lock wqe->lock */
++ if (!work)
++ goto get_next;
++ raw_spin_unlock(&wqe->lock);
++ }
++ } while (work);
++
++ raw_spin_lock(&wqe->lock);
++ } while (1);
++}
++
++static int io_wqe_worker(void *data)
++{
++ struct io_worker *worker = data;
++ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
++ struct io_wqe *wqe = worker->wqe;
++ struct io_wq *wq = wqe->wq;
++ bool last_timeout = false;
++ char buf[TASK_COMM_LEN];
++
++ worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
++
++ snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
++ set_task_comm(current, buf);
++
++ while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
++ long ret;
++
++ set_current_state(TASK_INTERRUPTIBLE);
++loop:
++ raw_spin_lock(&wqe->lock);
++ if (io_acct_run_queue(acct)) {
++ io_worker_handle_work(worker);
++ goto loop;
++ }
++ /* timed out, exit unless we're the last worker */
++ if (last_timeout && acct->nr_workers > 1) {
++ acct->nr_workers--;
++ raw_spin_unlock(&wqe->lock);
++ __set_current_state(TASK_RUNNING);
++ break;
++ }
++ last_timeout = false;
++ __io_worker_idle(wqe, worker);
++ raw_spin_unlock(&wqe->lock);
++ if (io_flush_signals())
++ continue;
++ ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
++ if (signal_pending(current)) {
++ struct ksignal ksig;
++
++ if (!get_signal(&ksig))
++ continue;
++ break;
++ }
++ last_timeout = !ret;
++ }
++
++ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
++ raw_spin_lock(&wqe->lock);
++ io_worker_handle_work(worker);
++ }
++
++ io_worker_exit(worker);
++ return 0;
++}
++
++/*
++ * Called when a worker is scheduled in. Mark us as currently running.
++ */
++void io_wq_worker_running(struct task_struct *tsk)
++{
++ struct io_worker *worker = tsk->pf_io_worker;
++
++ if (!worker)
++ return;
++ if (!(worker->flags & IO_WORKER_F_UP))
++ return;
++ if (worker->flags & IO_WORKER_F_RUNNING)
++ return;
++ worker->flags |= IO_WORKER_F_RUNNING;
++ io_wqe_inc_running(worker);
++}
++
++/*
++ * Called when worker is going to sleep. If there are no workers currently
++ * running and we have work pending, wake up a free one or create a new one.
++ */
++void io_wq_worker_sleeping(struct task_struct *tsk)
++{
++ struct io_worker *worker = tsk->pf_io_worker;
++
++ if (!worker)
++ return;
++ if (!(worker->flags & IO_WORKER_F_UP))
++ return;
++ if (!(worker->flags & IO_WORKER_F_RUNNING))
++ return;
++
++ worker->flags &= ~IO_WORKER_F_RUNNING;
++
++ raw_spin_lock(&worker->wqe->lock);
++ io_wqe_dec_running(worker);
++ raw_spin_unlock(&worker->wqe->lock);
++}
++
++static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
++ struct task_struct *tsk)
++{
++ tsk->pf_io_worker = worker;
++ worker->task = tsk;
++ set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
++ tsk->flags |= PF_NO_SETAFFINITY;
++
++ raw_spin_lock(&wqe->lock);
++ hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
++ list_add_tail_rcu(&worker->all_list, &wqe->all_list);
++ worker->flags |= IO_WORKER_F_FREE;
++ raw_spin_unlock(&wqe->lock);
++ wake_up_new_task(tsk);
++}
++
++static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
++{
++ return true;
++}
++
++static inline bool io_should_retry_thread(long err)
++{
++ /*
++ * Prevent perpetual task_work retry, if the task (or its group) is
++ * exiting.
++ */
++ if (fatal_signal_pending(current))
++ return false;
++
++ switch (err) {
++ case -EAGAIN:
++ case -ERESTARTSYS:
++ case -ERESTARTNOINTR:
++ case -ERESTARTNOHAND:
++ return true;
++ default:
++ return false;
++ }
++}
++
++static void create_worker_cont(struct callback_head *cb)
++{
++ struct io_worker *worker;
++ struct task_struct *tsk;
++ struct io_wqe *wqe;
++
++ worker = container_of(cb, struct io_worker, create_work);
++ clear_bit_unlock(0, &worker->create_state);
++ wqe = worker->wqe;
++ tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
++ if (!IS_ERR(tsk)) {
++ io_init_new_worker(wqe, worker, tsk);
++ io_worker_release(worker);
++ return;
++ } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
++ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
++
++ atomic_dec(&acct->nr_running);
++ raw_spin_lock(&wqe->lock);
++ acct->nr_workers--;
++ if (!acct->nr_workers) {
++ struct io_cb_cancel_data match = {
++ .fn = io_wq_work_match_all,
++ .cancel_all = true,
++ };
++
++ while (io_acct_cancel_pending_work(wqe, acct, &match))
++ raw_spin_lock(&wqe->lock);
++ }
++ raw_spin_unlock(&wqe->lock);
++ io_worker_ref_put(wqe->wq);
++ kfree(worker);
++ return;
++ }
++
++ /* re-create attempts grab a new worker ref, drop the existing one */
++ io_worker_release(worker);
++ schedule_work(&worker->work);
++}
++
++static void io_workqueue_create(struct work_struct *work)
++{
++ struct io_worker *worker = container_of(work, struct io_worker, work);
++ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
++
++ if (!io_queue_worker_create(worker, acct, create_worker_cont))
++ kfree(worker);
++}
++
++static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
++{
++ struct io_wqe_acct *acct = &wqe->acct[index];
++ struct io_worker *worker;
++ struct task_struct *tsk;
++
++ __set_current_state(TASK_RUNNING);
++
++ worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
++ if (!worker) {
++fail:
++ atomic_dec(&acct->nr_running);
++ raw_spin_lock(&wqe->lock);
++ acct->nr_workers--;
++ raw_spin_unlock(&wqe->lock);
++ io_worker_ref_put(wq);
++ return false;
++ }
++
++ refcount_set(&worker->ref, 1);
++ worker->wqe = wqe;
++ spin_lock_init(&worker->lock);
++ init_completion(&worker->ref_done);
++
++ if (index == IO_WQ_ACCT_BOUND)
++ worker->flags |= IO_WORKER_F_BOUND;
++
++ tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
++ if (!IS_ERR(tsk)) {
++ io_init_new_worker(wqe, worker, tsk);
++ } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
++ kfree(worker);
++ goto fail;
++ } else {
++ INIT_WORK(&worker->work, io_workqueue_create);
++ schedule_work(&worker->work);
++ }
++
++ return true;
++}
++
++/*
++ * Iterate the passed in list and call the specific function for each
++ * worker that isn't exiting
++ */
++static bool io_wq_for_each_worker(struct io_wqe *wqe,
++ bool (*func)(struct io_worker *, void *),
++ void *data)
++{
++ struct io_worker *worker;
++ bool ret = false;
++
++ list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
++ if (io_worker_get(worker)) {
++ /* no task if node is/was offline */
++ if (worker->task)
++ ret = func(worker, data);
++ io_worker_release(worker);
++ if (ret)
++ break;
++ }
++ }
++
++ return ret;
++}
++
++static bool io_wq_worker_wake(struct io_worker *worker, void *data)
++{
++ set_notify_signal(worker->task);
++ wake_up_process(worker->task);
++ return false;
++}
++
++static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
++{
++ struct io_wq *wq = wqe->wq;
++
++ do {
++ work->flags |= IO_WQ_WORK_CANCEL;
++ wq->do_work(work);
++ work = wq->free_work(work);
++ } while (work);
++}
++
++static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
++{
++ struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
++ unsigned int hash;
++ struct io_wq_work *tail;
++
++ if (!io_wq_is_hashed(work)) {
++append:
++ wq_list_add_tail(&work->list, &acct->work_list);
++ return;
++ }
++
++ hash = io_get_work_hash(work);
++ tail = wqe->hash_tail[hash];
++ wqe->hash_tail[hash] = work;
++ if (!tail)
++ goto append;
++
++ wq_list_add_after(&work->list, &tail->list, &acct->work_list);
++}
++
++static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
++{
++ return work == data;
++}
++
++static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
++{
++ struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
++ unsigned work_flags = work->flags;
++ bool do_create;
++
++ /*
++ * If io-wq is exiting for this task, or if the request has explicitly
++ * been marked as one that should not get executed, cancel it here.
++ */
++ if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
++ (work->flags & IO_WQ_WORK_CANCEL)) {
++ io_run_cancel(work, wqe);
++ return;
++ }
++
++ raw_spin_lock(&wqe->lock);
++ io_wqe_insert_work(wqe, work);
++ clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
++
++ rcu_read_lock();
++ do_create = !io_wqe_activate_free_worker(wqe, acct);
++ rcu_read_unlock();
++
++ raw_spin_unlock(&wqe->lock);
++
++ if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
++ !atomic_read(&acct->nr_running))) {
++ bool did_create;
++
++ did_create = io_wqe_create_worker(wqe, acct);
++ if (likely(did_create))
++ return;
++
++ raw_spin_lock(&wqe->lock);
++ /* fatal condition, failed to create the first worker */
++ if (!acct->nr_workers) {
++ struct io_cb_cancel_data match = {
++ .fn = io_wq_work_match_item,
++ .data = work,
++ .cancel_all = false,
++ };
++
++ if (io_acct_cancel_pending_work(wqe, acct, &match))
++ raw_spin_lock(&wqe->lock);
++ }
++ raw_spin_unlock(&wqe->lock);
++ }
++}
++
++void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
++{
++ struct io_wqe *wqe = wq->wqes[numa_node_id()];
++
++ io_wqe_enqueue(wqe, work);
++}
++
++/*
++ * Work items that hash to the same value will not be done in parallel.
++ * Used to limit concurrent writes, generally hashed by inode.
++ */
++void io_wq_hash_work(struct io_wq_work *work, void *val)
++{
++ unsigned int bit;
++
++ bit = hash_ptr(val, IO_WQ_HASH_ORDER);
++ work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
++}
++
++static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
++{
++ struct io_cb_cancel_data *match = data;
++
++ /*
++ * Hold the lock to avoid ->cur_work going out of scope, caller
++ * may dereference the passed in work.
++ */
++ spin_lock(&worker->lock);
++ if (worker->cur_work &&
++ match->fn(worker->cur_work, match->data)) {
++ set_notify_signal(worker->task);
++ match->nr_running++;
++ }
++ spin_unlock(&worker->lock);
++
++ return match->nr_running && !match->cancel_all;
++}
++
++static inline void io_wqe_remove_pending(struct io_wqe *wqe,
++ struct io_wq_work *work,
++ struct io_wq_work_node *prev)
++{
++ struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
++ unsigned int hash = io_get_work_hash(work);
++ struct io_wq_work *prev_work = NULL;
++
++ if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) {
++ if (prev)
++ prev_work = container_of(prev, struct io_wq_work, list);
++ if (prev_work && io_get_work_hash(prev_work) == hash)
++ wqe->hash_tail[hash] = prev_work;
++ else
++ wqe->hash_tail[hash] = NULL;
++ }
++ wq_list_del(&acct->work_list, &work->list, prev);
++}
++
++static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
++ struct io_wqe_acct *acct,
++ struct io_cb_cancel_data *match)
++ __releases(wqe->lock)
++{
++ struct io_wq_work_node *node, *prev;
++ struct io_wq_work *work;
++
++ wq_list_for_each(node, prev, &acct->work_list) {
++ work = container_of(node, struct io_wq_work, list);
++ if (!match->fn(work, match->data))
++ continue;
++ io_wqe_remove_pending(wqe, work, prev);
++ raw_spin_unlock(&wqe->lock);
++ io_run_cancel(work, wqe);
++ match->nr_pending++;
++ /* not safe to continue after unlock */
++ return true;
++ }
++
++ return false;
++}
++
++static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
++ struct io_cb_cancel_data *match)
++{
++ int i;
++retry:
++ raw_spin_lock(&wqe->lock);
++ for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++ struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
++
++ if (io_acct_cancel_pending_work(wqe, acct, match)) {
++ if (match->cancel_all)
++ goto retry;
++ return;
++ }
++ }
++ raw_spin_unlock(&wqe->lock);
++}
++
++static void io_wqe_cancel_running_work(struct io_wqe *wqe,
++ struct io_cb_cancel_data *match)
++{
++ rcu_read_lock();
++ io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
++ rcu_read_unlock();
++}
++
++enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
++ void *data, bool cancel_all)
++{
++ struct io_cb_cancel_data match = {
++ .fn = cancel,
++ .data = data,
++ .cancel_all = cancel_all,
++ };
++ int node;
++
++ /*
++ * First check pending list, if we're lucky we can just remove it
++ * from there. CANCEL_OK means that the work is returned as-new,
++ * no completion will be posted for it.
++ */
++ for_each_node(node) {
++ struct io_wqe *wqe = wq->wqes[node];
++
++ io_wqe_cancel_pending_work(wqe, &match);
++ if (match.nr_pending && !match.cancel_all)
++ return IO_WQ_CANCEL_OK;
++ }
++
++ /*
++ * Now check if a free (going busy) or busy worker has the work
++ * currently running. If we find it there, we'll return CANCEL_RUNNING
++ * as an indication that we attempt to signal cancellation. The
++ * completion will run normally in this case.
++ */
++ for_each_node(node) {
++ struct io_wqe *wqe = wq->wqes[node];
++
++ io_wqe_cancel_running_work(wqe, &match);
++ if (match.nr_running && !match.cancel_all)
++ return IO_WQ_CANCEL_RUNNING;
++ }
++
++ if (match.nr_running)
++ return IO_WQ_CANCEL_RUNNING;
++ if (match.nr_pending)
++ return IO_WQ_CANCEL_OK;
++ return IO_WQ_CANCEL_NOTFOUND;
++}
++
++static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
++ int sync, void *key)
++{
++ struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
++ int i;
++
++ list_del_init(&wait->entry);
++
++ rcu_read_lock();
++ for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++ struct io_wqe_acct *acct = &wqe->acct[i];
++
++ if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
++ io_wqe_activate_free_worker(wqe, acct);
++ }
++ rcu_read_unlock();
++ return 1;
++}
++
++struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
++{
++ int ret, node, i;
++ struct io_wq *wq;
++
++ if (WARN_ON_ONCE(!data->free_work || !data->do_work))
++ return ERR_PTR(-EINVAL);
++ if (WARN_ON_ONCE(!bounded))
++ return ERR_PTR(-EINVAL);
++
++ wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL);
++ if (!wq)
++ return ERR_PTR(-ENOMEM);
++ ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
++ if (ret)
++ goto err_wq;
++
++ refcount_inc(&data->hash->refs);
++ wq->hash = data->hash;
++ wq->free_work = data->free_work;
++ wq->do_work = data->do_work;
++
++ ret = -ENOMEM;
++ for_each_node(node) {
++ struct io_wqe *wqe;
++ int alloc_node = node;
++
++ if (!node_online(alloc_node))
++ alloc_node = NUMA_NO_NODE;
++ wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
++ if (!wqe)
++ goto err;
++ wq->wqes[node] = wqe;
++ if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL))
++ goto err;
++ cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
++ wqe->node = alloc_node;
++ wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
++ wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
++ task_rlimit(current, RLIMIT_NPROC);
++ INIT_LIST_HEAD(&wqe->wait.entry);
++ wqe->wait.func = io_wqe_hash_wake;
++ for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++ struct io_wqe_acct *acct = &wqe->acct[i];
++
++ acct->index = i;
++ atomic_set(&acct->nr_running, 0);
++ INIT_WQ_LIST(&acct->work_list);
++ }
++ wqe->wq = wq;
++ raw_spin_lock_init(&wqe->lock);
++ INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
++ INIT_LIST_HEAD(&wqe->all_list);
++ }
++
++ wq->task = get_task_struct(data->task);
++ atomic_set(&wq->worker_refs, 1);
++ init_completion(&wq->worker_done);
++ return wq;
++err:
++ io_wq_put_hash(data->hash);
++ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
++ for_each_node(node) {
++ if (!wq->wqes[node])
++ continue;
++ free_cpumask_var(wq->wqes[node]->cpu_mask);
++ kfree(wq->wqes[node]);
++ }
++err_wq:
++ kfree(wq);
++ return ERR_PTR(ret);
++}
++
++static bool io_task_work_match(struct callback_head *cb, void *data)
++{
++ struct io_worker *worker;
++
++ if (cb->func != create_worker_cb && cb->func != create_worker_cont)
++ return false;
++ worker = container_of(cb, struct io_worker, create_work);
++ return worker->wqe->wq == data;
++}
++
++void io_wq_exit_start(struct io_wq *wq)
++{
++ set_bit(IO_WQ_BIT_EXIT, &wq->state);
++}
++
++static void io_wq_cancel_tw_create(struct io_wq *wq)
++{
++ struct callback_head *cb;
++
++ while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
++ struct io_worker *worker;
++
++ worker = container_of(cb, struct io_worker, create_work);
++ io_worker_cancel_cb(worker);
++ /*
++ * Only the worker continuation helper has worker allocated and
++ * hence needs freeing.
++ */
++ if (cb->func == create_worker_cont)
++ kfree(worker);
++ }
++}
++
++static void io_wq_exit_workers(struct io_wq *wq)
++{
++ int node;
++
++ if (!wq->task)
++ return;
++
++ io_wq_cancel_tw_create(wq);
++
++ rcu_read_lock();
++ for_each_node(node) {
++ struct io_wqe *wqe = wq->wqes[node];
++
++ io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL);
++ }
++ rcu_read_unlock();
++ io_worker_ref_put(wq);
++ wait_for_completion(&wq->worker_done);
++
++ for_each_node(node) {
++ spin_lock_irq(&wq->hash->wait.lock);
++ list_del_init(&wq->wqes[node]->wait.entry);
++ spin_unlock_irq(&wq->hash->wait.lock);
++ }
++ put_task_struct(wq->task);
++ wq->task = NULL;
++}
++
++static void io_wq_destroy(struct io_wq *wq)
++{
++ int node;
++
++ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
++
++ for_each_node(node) {
++ struct io_wqe *wqe = wq->wqes[node];
++ struct io_cb_cancel_data match = {
++ .fn = io_wq_work_match_all,
++ .cancel_all = true,
++ };
++ io_wqe_cancel_pending_work(wqe, &match);
++ free_cpumask_var(wqe->cpu_mask);
++ kfree(wqe);
++ }
++ io_wq_put_hash(wq->hash);
++ kfree(wq);
++}
++
++void io_wq_put_and_exit(struct io_wq *wq)
++{
++ WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
++
++ io_wq_exit_workers(wq);
++ io_wq_destroy(wq);
++}
++
++struct online_data {
++ unsigned int cpu;
++ bool online;
++};
++
++static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
++{
++ struct online_data *od = data;
++
++ if (od->online)
++ cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask);
++ else
++ cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask);
++ return false;
++}
++
++static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online)
++{
++ struct online_data od = {
++ .cpu = cpu,
++ .online = online
++ };
++ int i;
++
++ rcu_read_lock();
++ for_each_node(i)
++ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od);
++ rcu_read_unlock();
++ return 0;
++}
++
++static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
++{
++ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
++
++ return __io_wq_cpu_online(wq, cpu, true);
++}
++
++static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
++{
++ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
++
++ return __io_wq_cpu_online(wq, cpu, false);
++}
++
++int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
++{
++ int i;
++
++ rcu_read_lock();
++ for_each_node(i) {
++ struct io_wqe *wqe = wq->wqes[i];
++
++ if (mask)
++ cpumask_copy(wqe->cpu_mask, mask);
++ else
++ cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
++ }
++ rcu_read_unlock();
++ return 0;
++}
++
++/*
++ * Set max number of unbounded workers, returns old value. If new_count is 0,
++ * then just return the old value.
++ */
++int io_wq_max_workers(struct io_wq *wq, int *new_count)
++{
++ int prev[IO_WQ_ACCT_NR];
++ bool first_node = true;
++ int i, node;
++
++ BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND);
++ BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
++ BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2);
++
++ for (i = 0; i < 2; i++) {
++ if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
++ new_count[i] = task_rlimit(current, RLIMIT_NPROC);
++ }
++
++ for (i = 0; i < IO_WQ_ACCT_NR; i++)
++ prev[i] = 0;
++
++ rcu_read_lock();
++ for_each_node(node) {
++ struct io_wqe *wqe = wq->wqes[node];
++ struct io_wqe_acct *acct;
++
++ raw_spin_lock(&wqe->lock);
++ for (i = 0; i < IO_WQ_ACCT_NR; i++) {
++ acct = &wqe->acct[i];
++ if (first_node)
++ prev[i] = max_t(int, acct->max_workers, prev[i]);
++ if (new_count[i])
++ acct->max_workers = new_count[i];
++ }
++ raw_spin_unlock(&wqe->lock);
++ first_node = false;
++ }
++ rcu_read_unlock();
++
++ for (i = 0; i < IO_WQ_ACCT_NR; i++)
++ new_count[i] = prev[i];
++
++ return 0;
++}
++
++static __init int io_wq_init(void)
++{
++ int ret;
++
++ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
++ io_wq_cpu_online, io_wq_cpu_offline);
++ if (ret < 0)
++ return ret;
++ io_wq_online = ret;
++ return 0;
++}
++subsys_initcall(io_wq_init);
+diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
+new file mode 100644
+index 0000000000000..48721cbd5f40b
+--- /dev/null
++++ b/io_uring/io-wq.h
+@@ -0,0 +1,161 @@
++#ifndef INTERNAL_IO_WQ_H
++#define INTERNAL_IO_WQ_H
++
++#include <linux/refcount.h>
++
++struct io_wq;
++
++enum {
++ IO_WQ_WORK_CANCEL = 1,
++ IO_WQ_WORK_HASHED = 2,
++ IO_WQ_WORK_UNBOUND = 4,
++ IO_WQ_WORK_CONCURRENT = 16,
++
++ IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
++};
++
++enum io_wq_cancel {
++ IO_WQ_CANCEL_OK, /* cancelled before started */
++ IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */
++ IO_WQ_CANCEL_NOTFOUND, /* work not found */
++};
++
++struct io_wq_work_node {
++ struct io_wq_work_node *next;
++};
++
++struct io_wq_work_list {
++ struct io_wq_work_node *first;
++ struct io_wq_work_node *last;
++};
++
++static inline void wq_list_add_after(struct io_wq_work_node *node,
++ struct io_wq_work_node *pos,
++ struct io_wq_work_list *list)
++{
++ struct io_wq_work_node *next = pos->next;
++
++ pos->next = node;
++ node->next = next;
++ if (!next)
++ list->last = node;
++}
++
++static inline void wq_list_add_tail(struct io_wq_work_node *node,
++ struct io_wq_work_list *list)
++{
++ node->next = NULL;
++ if (!list->first) {
++ list->last = node;
++ WRITE_ONCE(list->first, node);
++ } else {
++ list->last->next = node;
++ list->last = node;
++ }
++}
++
++static inline void wq_list_cut(struct io_wq_work_list *list,
++ struct io_wq_work_node *last,
++ struct io_wq_work_node *prev)
++{
++ /* first in the list, if prev==NULL */
++ if (!prev)
++ WRITE_ONCE(list->first, last->next);
++ else
++ prev->next = last->next;
++
++ if (last == list->last)
++ list->last = prev;
++ last->next = NULL;
++}
++
++static inline void wq_list_del(struct io_wq_work_list *list,
++ struct io_wq_work_node *node,
++ struct io_wq_work_node *prev)
++{
++ wq_list_cut(list, node, prev);
++}
++
++#define wq_list_for_each(pos, prv, head) \
++ for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
++
++#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
++#define INIT_WQ_LIST(list) do { \
++ (list)->first = NULL; \
++ (list)->last = NULL; \
++} while (0)
++
++struct io_wq_work {
++ struct io_wq_work_node list;
++ unsigned flags;
++};
++
++static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
++{
++ if (!work->list.next)
++ return NULL;
++
++ return container_of(work->list.next, struct io_wq_work, list);
++}
++
++typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
++typedef void (io_wq_work_fn)(struct io_wq_work *);
++
++struct io_wq_hash {
++ refcount_t refs;
++ unsigned long map;
++ struct wait_queue_head wait;
++};
++
++static inline void io_wq_put_hash(struct io_wq_hash *hash)
++{
++ if (refcount_dec_and_test(&hash->refs))
++ kfree(hash);
++}
++
++struct io_wq_data {
++ struct io_wq_hash *hash;
++ struct task_struct *task;
++ io_wq_work_fn *do_work;
++ free_work_fn *free_work;
++};
++
++struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
++void io_wq_exit_start(struct io_wq *wq);
++void io_wq_put_and_exit(struct io_wq *wq);
++
++void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
++void io_wq_hash_work(struct io_wq_work *work, void *val);
++
++int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
++int io_wq_max_workers(struct io_wq *wq, int *new_count);
++bool io_wq_worker_stopped(void);
++
++static inline bool io_wq_is_hashed(struct io_wq_work *work)
++{
++ return work->flags & IO_WQ_WORK_HASHED;
++}
++
++typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
++
++enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
++ void *data, bool cancel_all);
++
++#if defined(CONFIG_IO_WQ)
++extern void io_wq_worker_sleeping(struct task_struct *);
++extern void io_wq_worker_running(struct task_struct *);
++#else
++static inline void io_wq_worker_sleeping(struct task_struct *tsk)
++{
++}
++static inline void io_wq_worker_running(struct task_struct *tsk)
++{
++}
++#endif
++
++static inline bool io_wq_current_is_worker(void)
++{
++ return in_task() && (current->flags & PF_IO_WORKER) &&
++ current->pf_io_worker;
++}
++#endif
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+new file mode 100644
+index 0000000000000..1519125b98147
+--- /dev/null
++++ b/io_uring/io_uring.c
+@@ -0,0 +1,11374 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Shared application/kernel submission and completion ring pairs, for
++ * supporting fast/efficient IO.
++ *
++ * A note on the read/write ordering memory barriers that are matched between
++ * the application and kernel side.
++ *
++ * After the application reads the CQ ring tail, it must use an
++ * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses
++ * before writing the tail (using smp_load_acquire to read the tail will
++ * do). It also needs a smp_mb() before updating CQ head (ordering the
++ * entry load(s) with the head store), pairing with an implicit barrier
++ * through a control-dependency in io_get_cqe (smp_store_release to
++ * store head will do). Failure to do so could lead to reading invalid
++ * CQ entries.
++ *
++ * Likewise, the application must use an appropriate smp_wmb() before
++ * writing the SQ tail (ordering SQ entry stores with the tail store),
++ * which pairs with smp_load_acquire in io_get_sqring (smp_store_release
++ * to store the tail will do). And it needs a barrier ordering the SQ
++ * head load before writing new SQ entries (smp_load_acquire to read
++ * head will do).
++ *
++ * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
++ * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after*
++ * updating the SQ tail; a full memory barrier smp_mb() is needed
++ * between.
++ *
++ * Also see the examples in the liburing library:
++ *
++ * git://git.kernel.dk/liburing
++ *
++ * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens
++ * from data shared between the kernel and application. This is done both
++ * for ordering purposes, but also to ensure that once a value is loaded from
++ * data that the application could potentially modify, it remains stable.
++ *
++ * Copyright (C) 2018-2019 Jens Axboe
++ * Copyright (c) 2018-2019 Christoph Hellwig
++ */
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/errno.h>
++#include <linux/syscalls.h>
++#include <linux/compat.h>
++#include <net/compat.h>
++#include <linux/refcount.h>
++#include <linux/uio.h>
++#include <linux/bits.h>
++
++#include <linux/sched/signal.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/fdtable.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/percpu.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/bvec.h>
++#include <linux/net.h>
++#include <net/sock.h>
++#include <net/af_unix.h>
++#include <net/scm.h>
++#include <linux/anon_inodes.h>
++#include <linux/sched/mm.h>
++#include <linux/uaccess.h>
++#include <linux/nospec.h>
++#include <linux/sizes.h>
++#include <linux/hugetlb.h>
++#include <linux/highmem.h>
++#include <linux/namei.h>
++#include <linux/fsnotify.h>
++#include <linux/fadvise.h>
++#include <linux/eventpoll.h>
++#include <linux/splice.h>
++#include <linux/task_work.h>
++#include <linux/pagemap.h>
++#include <linux/io_uring.h>
++#include <linux/tracehook.h>
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/io_uring.h>
++
++#include <uapi/linux/io_uring.h>
++
++#include "../fs/internal.h"
++#include "io-wq.h"
++
++#define IORING_MAX_ENTRIES 32768
++#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
++#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
++
++/* only define max */
++#define IORING_MAX_FIXED_FILES (1U << 15)
++#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
++ IORING_REGISTER_LAST + IORING_OP_LAST)
++
++#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
++#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
++#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
++
++#define IORING_MAX_REG_BUFFERS (1U << 14)
++
++#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
++ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
++ IOSQE_BUFFER_SELECT)
++#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
++ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS)
++
++#define IO_TCTX_REFS_CACHE_NR (1U << 10)
++
++struct io_uring {
++ u32 head ____cacheline_aligned_in_smp;
++ u32 tail ____cacheline_aligned_in_smp;
++};
++
++/*
++ * This data is shared with the application through the mmap at offsets
++ * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING.
++ *
++ * The offsets to the member fields are published through struct
++ * io_sqring_offsets when calling io_uring_setup.
++ */
++struct io_rings {
++ /*
++ * Head and tail offsets into the ring; the offsets need to be
++ * masked to get valid indices.
++ *
++ * The kernel controls head of the sq ring and the tail of the cq ring,
++ * and the application controls tail of the sq ring and the head of the
++ * cq ring.
++ */
++ struct io_uring sq, cq;
++ /*
++ * Bitmasks to apply to head and tail offsets (constant, equals
++ * ring_entries - 1)
++ */
++ u32 sq_ring_mask, cq_ring_mask;
++ /* Ring sizes (constant, power of 2) */
++ u32 sq_ring_entries, cq_ring_entries;
++ /*
++ * Number of invalid entries dropped by the kernel due to
++ * invalid index stored in array
++ *
++ * Written by the kernel, shouldn't be modified by the
++ * application (i.e. get number of "new events" by comparing to
++ * cached value).
++ *
++ * After a new SQ head value was read by the application this
++ * counter includes all submissions that were dropped reaching
++ * the new SQ head (and possibly more).
++ */
++ u32 sq_dropped;
++ /*
++ * Runtime SQ flags
++ *
++ * Written by the kernel, shouldn't be modified by the
++ * application.
++ *
++ * The application needs a full memory barrier before checking
++ * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
++ */
++ u32 sq_flags;
++ /*
++ * Runtime CQ flags
++ *
++ * Written by the application, shouldn't be modified by the
++ * kernel.
++ */
++ u32 cq_flags;
++ /*
++ * Number of completion events lost because the queue was full;
++ * this should be avoided by the application by making sure
++ * there are not more requests pending than there is space in
++ * the completion queue.
++ *
++ * Written by the kernel, shouldn't be modified by the
++ * application (i.e. get number of "new events" by comparing to
++ * cached value).
++ *
++ * As completion events come in out of order this counter is not
++ * ordered with any other data.
++ */
++ u32 cq_overflow;
++ /*
++ * Ring buffer of completion events.
++ *
++ * The kernel writes completion events fresh every time they are
++ * produced, so the application is allowed to modify pending
++ * entries.
++ */
++ struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
++};
++
++enum io_uring_cmd_flags {
++ IO_URING_F_NONBLOCK = 1,
++ IO_URING_F_COMPLETE_DEFER = 2,
++};
++
++struct io_mapped_ubuf {
++ u64 ubuf;
++ u64 ubuf_end;
++ unsigned int nr_bvecs;
++ unsigned long acct_pages;
++ struct bio_vec bvec[];
++};
++
++struct io_ring_ctx;
++
++struct io_overflow_cqe {
++ struct io_uring_cqe cqe;
++ struct list_head list;
++};
++
++struct io_fixed_file {
++ /* file * with additional FFS_* flags */
++ unsigned long file_ptr;
++};
++
++struct io_rsrc_put {
++ struct list_head list;
++ u64 tag;
++ union {
++ void *rsrc;
++ struct file *file;
++ struct io_mapped_ubuf *buf;
++ };
++};
++
++struct io_file_table {
++ struct io_fixed_file *files;
++};
++
++struct io_rsrc_node {
++ struct percpu_ref refs;
++ struct list_head node;
++ struct list_head rsrc_list;
++ struct io_rsrc_data *rsrc_data;
++ struct llist_node llist;
++ bool done;
++};
++
++typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
++
++struct io_rsrc_data {
++ struct io_ring_ctx *ctx;
++
++ u64 **tags;
++ unsigned int nr;
++ rsrc_put_fn *do_put;
++ atomic_t refs;
++ struct completion done;
++ bool quiesce;
++};
++
++struct io_buffer {
++ struct list_head list;
++ __u64 addr;
++ __u32 len;
++ __u16 bid;
++};
++
++struct io_restriction {
++ DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
++ DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
++ u8 sqe_flags_allowed;
++ u8 sqe_flags_required;
++ bool registered;
++};
++
++enum {
++ IO_SQ_THREAD_SHOULD_STOP = 0,
++ IO_SQ_THREAD_SHOULD_PARK,
++};
++
++struct io_sq_data {
++ refcount_t refs;
++ atomic_t park_pending;
++ struct mutex lock;
++
++ /* ctx's that are using this sqd */
++ struct list_head ctx_list;
++
++ struct task_struct *thread;
++ struct wait_queue_head wait;
++
++ unsigned sq_thread_idle;
++ int sq_cpu;
++ pid_t task_pid;
++ pid_t task_tgid;
++
++ unsigned long state;
++ struct completion exited;
++};
++
++#define IO_COMPL_BATCH 32
++#define IO_REQ_CACHE_SIZE 32
++#define IO_REQ_ALLOC_BATCH 8
++
++struct io_submit_link {
++ struct io_kiocb *head;
++ struct io_kiocb *last;
++};
++
++struct io_submit_state {
++ struct blk_plug plug;
++ struct io_submit_link link;
++
++ /*
++ * io_kiocb alloc cache
++ */
++ void *reqs[IO_REQ_CACHE_SIZE];
++ unsigned int free_reqs;
++
++ bool plug_started;
++
++ /*
++ * Batch completion logic
++ */
++ struct io_kiocb *compl_reqs[IO_COMPL_BATCH];
++ unsigned int compl_nr;
++ /* inline/task_work completion list, under ->uring_lock */
++ struct list_head free_list;
++
++ unsigned int ios_left;
++};
++
++struct io_ring_ctx {
++ /* const or read-mostly hot data */
++ struct {
++ struct percpu_ref refs;
++
++ struct io_rings *rings;
++ unsigned int flags;
++ unsigned int compat: 1;
++ unsigned int drain_next: 1;
++ unsigned int eventfd_async: 1;
++ unsigned int restricted: 1;
++ unsigned int off_timeout_used: 1;
++ unsigned int drain_active: 1;
++ } ____cacheline_aligned_in_smp;
++
++ /* submission data */
++ struct {
++ struct mutex uring_lock;
++
++ /*
++ * Ring buffer of indices into array of io_uring_sqe, which is
++ * mmapped by the application using the IORING_OFF_SQES offset.
++ *
++ * This indirection could e.g. be used to assign fixed
++ * io_uring_sqe entries to operations and only submit them to
++ * the queue when needed.
++ *
++ * The kernel modifies neither the indices array nor the entries
++ * array.
++ */
++ u32 *sq_array;
++ struct io_uring_sqe *sq_sqes;
++ unsigned cached_sq_head;
++ unsigned sq_entries;
++ struct list_head defer_list;
++
++ /*
++ * Fixed resources fast path, should be accessed only under
++ * uring_lock, and updated through io_uring_register(2)
++ */
++ struct io_rsrc_node *rsrc_node;
++ struct io_file_table file_table;
++ unsigned nr_user_files;
++ unsigned nr_user_bufs;
++ struct io_mapped_ubuf **user_bufs;
++
++ struct io_submit_state submit_state;
++ struct list_head timeout_list;
++ struct list_head ltimeout_list;
++ struct list_head cq_overflow_list;
++ struct xarray io_buffers;
++ struct xarray personalities;
++ u32 pers_next;
++ unsigned sq_thread_idle;
++ } ____cacheline_aligned_in_smp;
++
++ /* IRQ completion list, under ->completion_lock */
++ struct list_head locked_free_list;
++ unsigned int locked_free_nr;
++
++ const struct cred *sq_creds; /* cred used for __io_sq_thread() */
++ struct io_sq_data *sq_data; /* if using sq thread polling */
++
++ struct wait_queue_head sqo_sq_wait;
++ struct list_head sqd_list;
++
++ unsigned long check_cq_overflow;
++
++ struct {
++ unsigned cached_cq_tail;
++ unsigned cq_entries;
++ struct eventfd_ctx *cq_ev_fd;
++ struct wait_queue_head poll_wait;
++ struct wait_queue_head cq_wait;
++ unsigned cq_extra;
++ atomic_t cq_timeouts;
++ unsigned cq_last_tm_flush;
++ } ____cacheline_aligned_in_smp;
++
++ struct {
++ spinlock_t completion_lock;
++
++ spinlock_t timeout_lock;
++
++ /*
++ * ->iopoll_list is protected by the ctx->uring_lock for
++ * io_uring instances that don't use IORING_SETUP_SQPOLL.
++ * For SQPOLL, only the single threaded io_sq_thread() will
++ * manipulate the list, hence no extra locking is needed there.
++ */
++ struct list_head iopoll_list;
++ struct hlist_head *cancel_hash;
++ unsigned cancel_hash_bits;
++ bool poll_multi_queue;
++ } ____cacheline_aligned_in_smp;
++
++ struct io_restriction restrictions;
++
++ /* slow path rsrc auxilary data, used by update/register */
++ struct {
++ struct io_rsrc_node *rsrc_backup_node;
++ struct io_mapped_ubuf *dummy_ubuf;
++ struct io_rsrc_data *file_data;
++ struct io_rsrc_data *buf_data;
++
++ struct delayed_work rsrc_put_work;
++ struct llist_head rsrc_put_llist;
++ struct list_head rsrc_ref_list;
++ spinlock_t rsrc_ref_lock;
++ };
++
++ /* Keep this last, we don't need it for the fast path */
++ struct {
++ #if defined(CONFIG_UNIX)
++ struct socket *ring_sock;
++ #endif
++ /* hashed buffered write serialization */
++ struct io_wq_hash *hash_map;
++
++ /* Only used for accounting purposes */
++ struct user_struct *user;
++ struct mm_struct *mm_account;
++
++ /* ctx exit and cancelation */
++ struct llist_head fallback_llist;
++ struct delayed_work fallback_work;
++ struct work_struct exit_work;
++ struct list_head tctx_list;
++ struct completion ref_comp;
++ u32 iowq_limits[2];
++ bool iowq_limits_set;
++ };
++};
++
++struct io_uring_task {
++ /* submission side */
++ int cached_refs;
++ struct xarray xa;
++ struct wait_queue_head wait;
++ const struct io_ring_ctx *last;
++ struct io_wq *io_wq;
++ struct percpu_counter inflight;
++ atomic_t inflight_tracked;
++ atomic_t in_idle;
++
++ spinlock_t task_lock;
++ struct io_wq_work_list task_list;
++ struct callback_head task_work;
++ bool task_running;
++};
++
++/*
++ * First field must be the file pointer in all the
++ * iocb unions! See also 'struct kiocb' in <linux/fs.h>
++ */
++struct io_poll_iocb {
++ struct file *file;
++ struct wait_queue_head *head;
++ __poll_t events;
++ int retries;
++ struct wait_queue_entry wait;
++};
++
++struct io_poll_update {
++ struct file *file;
++ u64 old_user_data;
++ u64 new_user_data;
++ __poll_t events;
++ bool update_events;
++ bool update_user_data;
++};
++
++struct io_close {
++ struct file *file;
++ int fd;
++ u32 file_slot;
++};
++
++struct io_timeout_data {
++ struct io_kiocb *req;
++ struct hrtimer timer;
++ struct timespec64 ts;
++ enum hrtimer_mode mode;
++ u32 flags;
++};
++
++struct io_accept {
++ struct file *file;
++ struct sockaddr __user *addr;
++ int __user *addr_len;
++ int flags;
++ u32 file_slot;
++ unsigned long nofile;
++};
++
++struct io_sync {
++ struct file *file;
++ loff_t len;
++ loff_t off;
++ int flags;
++ int mode;
++};
++
++struct io_cancel {
++ struct file *file;
++ u64 addr;
++};
++
++struct io_timeout {
++ struct file *file;
++ u32 off;
++ u32 target_seq;
++ struct list_head list;
++ /* head of the link, used by linked timeouts only */
++ struct io_kiocb *head;
++ /* for linked completions */
++ struct io_kiocb *prev;
++};
++
++struct io_timeout_rem {
++ struct file *file;
++ u64 addr;
++
++ /* timeout update */
++ struct timespec64 ts;
++ u32 flags;
++ bool ltimeout;
++};
++
++struct io_rw {
++ /* NOTE: kiocb has the file as the first member, so don't do it here */
++ struct kiocb kiocb;
++ u64 addr;
++ u64 len;
++};
++
++struct io_connect {
++ struct file *file;
++ struct sockaddr __user *addr;
++ int addr_len;
++};
++
++struct io_sr_msg {
++ struct file *file;
++ union {
++ struct compat_msghdr __user *umsg_compat;
++ struct user_msghdr __user *umsg;
++ void __user *buf;
++ };
++ int msg_flags;
++ int bgid;
++ size_t len;
++ size_t done_io;
++ struct io_buffer *kbuf;
++ void __user *msg_control;
++};
++
++struct io_open {
++ struct file *file;
++ int dfd;
++ u32 file_slot;
++ struct filename *filename;
++ struct open_how how;
++ unsigned long nofile;
++};
++
++struct io_rsrc_update {
++ struct file *file;
++ u64 arg;
++ u32 nr_args;
++ u32 offset;
++};
++
++struct io_fadvise {
++ struct file *file;
++ u64 offset;
++ u32 len;
++ u32 advice;
++};
++
++struct io_madvise {
++ struct file *file;
++ u64 addr;
++ u32 len;
++ u32 advice;
++};
++
++struct io_epoll {
++ struct file *file;
++ int epfd;
++ int op;
++ int fd;
++ struct epoll_event event;
++};
++
++struct io_splice {
++ struct file *file_out;
++ loff_t off_out;
++ loff_t off_in;
++ u64 len;
++ int splice_fd_in;
++ unsigned int flags;
++};
++
++struct io_provide_buf {
++ struct file *file;
++ __u64 addr;
++ __u32 len;
++ __u32 bgid;
++ __u16 nbufs;
++ __u16 bid;
++};
++
++struct io_statx {
++ struct file *file;
++ int dfd;
++ unsigned int mask;
++ unsigned int flags;
++ const char __user *filename;
++ struct statx __user *buffer;
++};
++
++struct io_shutdown {
++ struct file *file;
++ int how;
++};
++
++struct io_rename {
++ struct file *file;
++ int old_dfd;
++ int new_dfd;
++ struct filename *oldpath;
++ struct filename *newpath;
++ int flags;
++};
++
++struct io_unlink {
++ struct file *file;
++ int dfd;
++ int flags;
++ struct filename *filename;
++};
++
++struct io_mkdir {
++ struct file *file;
++ int dfd;
++ umode_t mode;
++ struct filename *filename;
++};
++
++struct io_symlink {
++ struct file *file;
++ int new_dfd;
++ struct filename *oldpath;
++ struct filename *newpath;
++};
++
++struct io_hardlink {
++ struct file *file;
++ int old_dfd;
++ int new_dfd;
++ struct filename *oldpath;
++ struct filename *newpath;
++ int flags;
++};
++
++struct io_completion {
++ struct file *file;
++ u32 cflags;
++};
++
++struct io_async_connect {
++ struct sockaddr_storage address;
++};
++
++struct io_async_msghdr {
++ struct iovec fast_iov[UIO_FASTIOV];
++ /* points to an allocated iov, if NULL we use fast_iov instead */
++ struct iovec *free_iov;
++ struct sockaddr __user *uaddr;
++ struct msghdr msg;
++ struct sockaddr_storage addr;
++};
++
++struct io_async_rw {
++ struct iovec fast_iov[UIO_FASTIOV];
++ const struct iovec *free_iovec;
++ struct iov_iter iter;
++ struct iov_iter_state iter_state;
++ size_t bytes_done;
++ struct wait_page_queue wpq;
++};
++
++enum {
++ REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
++ REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
++ REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
++ REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
++ REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
++ REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
++
++ /* first byte is taken by user flags, shift it to not overlap */
++ REQ_F_FAIL_BIT = 8,
++ REQ_F_INFLIGHT_BIT,
++ REQ_F_CUR_POS_BIT,
++ REQ_F_NOWAIT_BIT,
++ REQ_F_LINK_TIMEOUT_BIT,
++ REQ_F_NEED_CLEANUP_BIT,
++ REQ_F_POLLED_BIT,
++ REQ_F_BUFFER_SELECTED_BIT,
++ REQ_F_COMPLETE_INLINE_BIT,
++ REQ_F_REISSUE_BIT,
++ REQ_F_CREDS_BIT,
++ REQ_F_REFCOUNT_BIT,
++ REQ_F_ARM_LTIMEOUT_BIT,
++ REQ_F_PARTIAL_IO_BIT,
++ /* keep async read/write and isreg together and in order */
++ REQ_F_NOWAIT_READ_BIT,
++ REQ_F_NOWAIT_WRITE_BIT,
++ REQ_F_ISREG_BIT,
++
++ /* not a real bit, just to check we're not overflowing the space */
++ __REQ_F_LAST_BIT,
++};
++
++enum {
++ /* ctx owns file */
++ REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT),
++ /* drain existing IO first */
++ REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT),
++ /* linked sqes */
++ REQ_F_LINK = BIT(REQ_F_LINK_BIT),
++ /* doesn't sever on completion < 0 */
++ REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
++ /* IOSQE_ASYNC */
++ REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
++ /* IOSQE_BUFFER_SELECT */
++ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
++
++ /* fail rest of links */
++ REQ_F_FAIL = BIT(REQ_F_FAIL_BIT),
++ /* on inflight list, should be cancelled and waited on exit reliably */
++ REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
++ /* read/write uses file position */
++ REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
++ /* must not punt to workers */
++ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
++ /* has or had linked timeout */
++ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
++ /* needs cleanup */
++ REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
++ /* already went through poll handler */
++ REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
++ /* buffer already selected */
++ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
++ /* completion is deferred through io_comp_state */
++ REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
++ /* caller should reissue async */
++ REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
++ /* supports async reads */
++ REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT),
++ /* supports async writes */
++ REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT),
++ /* regular file */
++ REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
++ /* has creds assigned */
++ REQ_F_CREDS = BIT(REQ_F_CREDS_BIT),
++ /* skip refcounting if not set */
++ REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT),
++ /* there is a linked timeout that has to be armed */
++ REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
++ /* request has already done partial IO */
++ REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
++};
++
++struct async_poll {
++ struct io_poll_iocb poll;
++ struct io_poll_iocb *double_poll;
++};
++
++typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
++
++struct io_task_work {
++ union {
++ struct io_wq_work_node node;
++ struct llist_node fallback_node;
++ };
++ io_req_tw_func_t func;
++};
++
++enum {
++ IORING_RSRC_FILE = 0,
++ IORING_RSRC_BUFFER = 1,
++};
++
++/*
++ * NOTE! Each of the iocb union members has the file pointer
++ * as the first entry in their struct definition. So you can
++ * access the file pointer through any of the sub-structs,
++ * or directly as just 'ki_filp' in this struct.
++ */
++struct io_kiocb {
++ union {
++ struct file *file;
++ struct io_rw rw;
++ struct io_poll_iocb poll;
++ struct io_poll_update poll_update;
++ struct io_accept accept;
++ struct io_sync sync;
++ struct io_cancel cancel;
++ struct io_timeout timeout;
++ struct io_timeout_rem timeout_rem;
++ struct io_connect connect;
++ struct io_sr_msg sr_msg;
++ struct io_open open;
++ struct io_close close;
++ struct io_rsrc_update rsrc_update;
++ struct io_fadvise fadvise;
++ struct io_madvise madvise;
++ struct io_epoll epoll;
++ struct io_splice splice;
++ struct io_provide_buf pbuf;
++ struct io_statx statx;
++ struct io_shutdown shutdown;
++ struct io_rename rename;
++ struct io_unlink unlink;
++ struct io_mkdir mkdir;
++ struct io_symlink symlink;
++ struct io_hardlink hardlink;
++ /* use only after cleaning per-op data, see io_clean_op() */
++ struct io_completion compl;
++ };
++
++ /* opcode allocated if it needs to store data for async defer */
++ void *async_data;
++ u8 opcode;
++ /* polled IO has completed */
++ u8 iopoll_completed;
++
++ u16 buf_index;
++ u32 result;
++
++ struct io_ring_ctx *ctx;
++ unsigned int flags;
++ atomic_t refs;
++ struct task_struct *task;
++ u64 user_data;
++
++ struct io_kiocb *link;
++ struct percpu_ref *fixed_rsrc_refs;
++
++ /* used with ctx->iopoll_list with reads/writes */
++ struct list_head inflight_entry;
++ struct io_task_work io_task_work;
++ /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
++ struct hlist_node hash_node;
++ struct async_poll *apoll;
++ struct io_wq_work work;
++ const struct cred *creds;
++
++ /* store used ubuf, so we can prevent reloading */
++ struct io_mapped_ubuf *imu;
++ /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
++ struct io_buffer *kbuf;
++ atomic_t poll_refs;
++};
++
++struct io_tctx_node {
++ struct list_head ctx_node;
++ struct task_struct *task;
++ struct io_ring_ctx *ctx;
++};
++
++struct io_defer_entry {
++ struct list_head list;
++ struct io_kiocb *req;
++ u32 seq;
++};
++
++struct io_op_def {
++ /* needs req->file assigned */
++ unsigned needs_file : 1;
++ /* hash wq insertion if file is a regular file */
++ unsigned hash_reg_file : 1;
++ /* unbound wq insertion if file is a non-regular file */
++ unsigned unbound_nonreg_file : 1;
++ /* opcode is not supported by this kernel */
++ unsigned not_supported : 1;
++ /* set if opcode supports polled "wait" */
++ unsigned pollin : 1;
++ unsigned pollout : 1;
++ /* op supports buffer selection */
++ unsigned buffer_select : 1;
++ /* do prep async if is going to be punted */
++ unsigned needs_async_setup : 1;
++ /* should block plug */
++ unsigned plug : 1;
++ /* size of async data needed, if any */
++ unsigned short async_size;
++};
++
++static const struct io_op_def io_op_defs[] = {
++ [IORING_OP_NOP] = {},
++ [IORING_OP_READV] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollin = 1,
++ .buffer_select = 1,
++ .needs_async_setup = 1,
++ .plug = 1,
++ .async_size = sizeof(struct io_async_rw),
++ },
++ [IORING_OP_WRITEV] = {
++ .needs_file = 1,
++ .hash_reg_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollout = 1,
++ .needs_async_setup = 1,
++ .plug = 1,
++ .async_size = sizeof(struct io_async_rw),
++ },
++ [IORING_OP_FSYNC] = {
++ .needs_file = 1,
++ },
++ [IORING_OP_READ_FIXED] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollin = 1,
++ .plug = 1,
++ .async_size = sizeof(struct io_async_rw),
++ },
++ [IORING_OP_WRITE_FIXED] = {
++ .needs_file = 1,
++ .hash_reg_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollout = 1,
++ .plug = 1,
++ .async_size = sizeof(struct io_async_rw),
++ },
++ [IORING_OP_POLL_ADD] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ },
++ [IORING_OP_POLL_REMOVE] = {},
++ [IORING_OP_SYNC_FILE_RANGE] = {
++ .needs_file = 1,
++ },
++ [IORING_OP_SENDMSG] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollout = 1,
++ .needs_async_setup = 1,
++ .async_size = sizeof(struct io_async_msghdr),
++ },
++ [IORING_OP_RECVMSG] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollin = 1,
++ .buffer_select = 1,
++ .needs_async_setup = 1,
++ .async_size = sizeof(struct io_async_msghdr),
++ },
++ [IORING_OP_TIMEOUT] = {
++ .async_size = sizeof(struct io_timeout_data),
++ },
++ [IORING_OP_TIMEOUT_REMOVE] = {
++ /* used by timeout updates' prep() */
++ },
++ [IORING_OP_ACCEPT] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollin = 1,
++ },
++ [IORING_OP_ASYNC_CANCEL] = {},
++ [IORING_OP_LINK_TIMEOUT] = {
++ .async_size = sizeof(struct io_timeout_data),
++ },
++ [IORING_OP_CONNECT] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollout = 1,
++ .needs_async_setup = 1,
++ .async_size = sizeof(struct io_async_connect),
++ },
++ [IORING_OP_FALLOCATE] = {
++ .needs_file = 1,
++ },
++ [IORING_OP_OPENAT] = {},
++ [IORING_OP_CLOSE] = {},
++ [IORING_OP_FILES_UPDATE] = {},
++ [IORING_OP_STATX] = {},
++ [IORING_OP_READ] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollin = 1,
++ .buffer_select = 1,
++ .plug = 1,
++ .async_size = sizeof(struct io_async_rw),
++ },
++ [IORING_OP_WRITE] = {
++ .needs_file = 1,
++ .hash_reg_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollout = 1,
++ .plug = 1,
++ .async_size = sizeof(struct io_async_rw),
++ },
++ [IORING_OP_FADVISE] = {
++ .needs_file = 1,
++ },
++ [IORING_OP_MADVISE] = {},
++ [IORING_OP_SEND] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollout = 1,
++ },
++ [IORING_OP_RECV] = {
++ .needs_file = 1,
++ .unbound_nonreg_file = 1,
++ .pollin = 1,
++ .buffer_select = 1,
++ },
++ [IORING_OP_OPENAT2] = {
++ },
++ [IORING_OP_EPOLL_CTL] = {
++ .unbound_nonreg_file = 1,
++ },
++ [IORING_OP_SPLICE] = {
++ .needs_file = 1,
++ .hash_reg_file = 1,
++ .unbound_nonreg_file = 1,
++ },
++ [IORING_OP_PROVIDE_BUFFERS] = {},
++ [IORING_OP_REMOVE_BUFFERS] = {},
++ [IORING_OP_TEE] = {
++ .needs_file = 1,
++ .hash_reg_file = 1,
++ .unbound_nonreg_file = 1,
++ },
++ [IORING_OP_SHUTDOWN] = {
++ .needs_file = 1,
++ },
++ [IORING_OP_RENAMEAT] = {},
++ [IORING_OP_UNLINKAT] = {},
++ [IORING_OP_MKDIRAT] = {},
++ [IORING_OP_SYMLINKAT] = {},
++ [IORING_OP_LINKAT] = {},
++};
++
++/* requests with any of those set should undergo io_disarm_next() */
++#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
++
++static bool io_disarm_next(struct io_kiocb *req);
++static void io_uring_del_tctx_node(unsigned long index);
++static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
++ struct task_struct *task,
++ bool cancel_all);
++static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
++
++static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags);
++
++static void io_put_req(struct io_kiocb *req);
++static void io_put_req_deferred(struct io_kiocb *req);
++static void io_dismantle_req(struct io_kiocb *req);
++static void io_queue_linked_timeout(struct io_kiocb *req);
++static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
++ struct io_uring_rsrc_update2 *up,
++ unsigned nr_args);
++static void io_clean_op(struct io_kiocb *req);
++static struct file *io_file_get(struct io_ring_ctx *ctx,
++ struct io_kiocb *req, int fd, bool fixed,
++ unsigned int issue_flags);
++static void __io_queue_sqe(struct io_kiocb *req);
++static void io_rsrc_put_work(struct work_struct *work);
++
++static void io_req_task_queue(struct io_kiocb *req);
++static void io_submit_flush_completions(struct io_ring_ctx *ctx);
++static int io_req_prep_async(struct io_kiocb *req);
++
++static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
++ unsigned int issue_flags, u32 slot_index);
++static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
++
++static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
++
++static struct kmem_cache *req_cachep;
++
++static const struct file_operations io_uring_fops;
++
++struct sock *io_uring_get_socket(struct file *file)
++{
++#if defined(CONFIG_UNIX)
++ if (file->f_op == &io_uring_fops) {
++ struct io_ring_ctx *ctx = file->private_data;
++
++ return ctx->ring_sock->sk;
++ }
++#endif
++ return NULL;
++}
++EXPORT_SYMBOL(io_uring_get_socket);
++
++static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
++{
++ if (!*locked) {
++ mutex_lock(&ctx->uring_lock);
++ *locked = true;
++ }
++}
++
++#define io_for_each_link(pos, head) \
++ for (pos = (head); pos; pos = pos->link)
++
++/*
++ * Shamelessly stolen from the mm implementation of page reference checking,
++ * see commit f958d7b528b1 for details.
++ */
++#define req_ref_zero_or_close_to_overflow(req) \
++ ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
++
++static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
++{
++ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
++ return atomic_inc_not_zero(&req->refs);
++}
++
++static inline bool req_ref_put_and_test(struct io_kiocb *req)
++{
++ if (likely(!(req->flags & REQ_F_REFCOUNT)))
++ return true;
++
++ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
++ return atomic_dec_and_test(&req->refs);
++}
++
++static inline void req_ref_get(struct io_kiocb *req)
++{
++ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
++ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
++ atomic_inc(&req->refs);
++}
++
++static inline void __io_req_set_refcount(struct io_kiocb *req, int nr)
++{
++ if (!(req->flags & REQ_F_REFCOUNT)) {
++ req->flags |= REQ_F_REFCOUNT;
++ atomic_set(&req->refs, nr);
++ }
++}
++
++static inline void io_req_set_refcount(struct io_kiocb *req)
++{
++ __io_req_set_refcount(req, 1);
++}
++
++static inline void io_req_set_rsrc_node(struct io_kiocb *req)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ if (!req->fixed_rsrc_refs) {
++ req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
++ percpu_ref_get(req->fixed_rsrc_refs);
++ }
++}
++
++static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
++{
++ bool got = percpu_ref_tryget(ref);
++
++ /* already at zero, wait for ->release() */
++ if (!got)
++ wait_for_completion(compl);
++ percpu_ref_resurrect(ref);
++ if (got)
++ percpu_ref_put(ref);
++}
++
++static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
++ bool cancel_all)
++ __must_hold(&req->ctx->timeout_lock)
++{
++ struct io_kiocb *req;
++
++ if (task && head->task != task)
++ return false;
++ if (cancel_all)
++ return true;
++
++ io_for_each_link(req, head) {
++ if (req->flags & REQ_F_INFLIGHT)
++ return true;
++ }
++ return false;
++}
++
++static bool io_match_linked(struct io_kiocb *head)
++{
++ struct io_kiocb *req;
++
++ io_for_each_link(req, head) {
++ if (req->flags & REQ_F_INFLIGHT)
++ return true;
++ }
++ return false;
++}
++
++/*
++ * As io_match_task() but protected against racing with linked timeouts.
++ * User must not hold timeout_lock.
++ */
++static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
++ bool cancel_all)
++{
++ bool matched;
++
++ if (task && head->task != task)
++ return false;
++ if (cancel_all)
++ return true;
++
++ if (head->flags & REQ_F_LINK_TIMEOUT) {
++ struct io_ring_ctx *ctx = head->ctx;
++
++ /* protect against races with linked timeouts */
++ spin_lock_irq(&ctx->timeout_lock);
++ matched = io_match_linked(head);
++ spin_unlock_irq(&ctx->timeout_lock);
++ } else {
++ matched = io_match_linked(head);
++ }
++ return matched;
++}
++
++static inline void req_set_fail(struct io_kiocb *req)
++{
++ req->flags |= REQ_F_FAIL;
++}
++
++static inline void req_fail_link_node(struct io_kiocb *req, int res)
++{
++ req_set_fail(req);
++ req->result = res;
++}
++
++static void io_ring_ctx_ref_free(struct percpu_ref *ref)
++{
++ struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
++
++ complete(&ctx->ref_comp);
++}
++
++static inline bool io_is_timeout_noseq(struct io_kiocb *req)
++{
++ return !req->timeout.off;
++}
++
++static void io_fallback_req_func(struct work_struct *work)
++{
++ struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
++ fallback_work.work);
++ struct llist_node *node = llist_del_all(&ctx->fallback_llist);
++ struct io_kiocb *req, *tmp;
++ bool locked = false;
++
++ percpu_ref_get(&ctx->refs);
++ llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node)
++ req->io_task_work.func(req, &locked);
++
++ if (locked) {
++ if (ctx->submit_state.compl_nr)
++ io_submit_flush_completions(ctx);
++ mutex_unlock(&ctx->uring_lock);
++ }
++ percpu_ref_put(&ctx->refs);
++
++}
++
++static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
++{
++ struct io_ring_ctx *ctx;
++ int hash_bits;
++
++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
++ if (!ctx)
++ return NULL;
++
++ /*
++ * Use 5 bits less than the max cq entries, that should give us around
++ * 32 entries per hash list if totally full and uniformly spread.
++ */
++ hash_bits = ilog2(p->cq_entries);
++ hash_bits -= 5;
++ if (hash_bits <= 0)
++ hash_bits = 1;
++ ctx->cancel_hash_bits = hash_bits;
++ ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head),
++ GFP_KERNEL);
++ if (!ctx->cancel_hash)
++ goto err;
++ __hash_init(ctx->cancel_hash, 1U << hash_bits);
++
++ ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL);
++ if (!ctx->dummy_ubuf)
++ goto err;
++ /* set invalid range, so io_import_fixed() fails meeting it */
++ ctx->dummy_ubuf->ubuf = -1UL;
++
++ if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
++ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
++ goto err;
++
++ ctx->flags = p->flags;
++ init_waitqueue_head(&ctx->sqo_sq_wait);
++ INIT_LIST_HEAD(&ctx->sqd_list);
++ init_waitqueue_head(&ctx->poll_wait);
++ INIT_LIST_HEAD(&ctx->cq_overflow_list);
++ init_completion(&ctx->ref_comp);
++ xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
++ xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
++ mutex_init(&ctx->uring_lock);
++ init_waitqueue_head(&ctx->cq_wait);
++ spin_lock_init(&ctx->completion_lock);
++ spin_lock_init(&ctx->timeout_lock);
++ INIT_LIST_HEAD(&ctx->iopoll_list);
++ INIT_LIST_HEAD(&ctx->defer_list);
++ INIT_LIST_HEAD(&ctx->timeout_list);
++ INIT_LIST_HEAD(&ctx->ltimeout_list);
++ spin_lock_init(&ctx->rsrc_ref_lock);
++ INIT_LIST_HEAD(&ctx->rsrc_ref_list);
++ INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
++ init_llist_head(&ctx->rsrc_put_llist);
++ INIT_LIST_HEAD(&ctx->tctx_list);
++ INIT_LIST_HEAD(&ctx->submit_state.free_list);
++ INIT_LIST_HEAD(&ctx->locked_free_list);
++ INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
++ return ctx;
++err:
++ kfree(ctx->dummy_ubuf);
++ kfree(ctx->cancel_hash);
++ kfree(ctx);
++ return NULL;
++}
++
++static void io_account_cq_overflow(struct io_ring_ctx *ctx)
++{
++ struct io_rings *r = ctx->rings;
++
++ WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
++ ctx->cq_extra--;
++}
++
++static bool req_need_defer(struct io_kiocb *req, u32 seq)
++{
++ if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
++ struct io_ring_ctx *ctx = req->ctx;
++
++ return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail;
++ }
++
++ return false;
++}
++
++#define FFS_ASYNC_READ 0x1UL
++#define FFS_ASYNC_WRITE 0x2UL
++#ifdef CONFIG_64BIT
++#define FFS_ISREG 0x4UL
++#else
++#define FFS_ISREG 0x0UL
++#endif
++#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
++
++static inline bool io_req_ffs_set(struct io_kiocb *req)
++{
++ return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE);
++}
++
++static void io_req_track_inflight(struct io_kiocb *req)
++{
++ if (!(req->flags & REQ_F_INFLIGHT)) {
++ req->flags |= REQ_F_INFLIGHT;
++ atomic_inc(&req->task->io_uring->inflight_tracked);
++ }
++}
++
++static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
++{
++ if (WARN_ON_ONCE(!req->link))
++ return NULL;
++
++ req->flags &= ~REQ_F_ARM_LTIMEOUT;
++ req->flags |= REQ_F_LINK_TIMEOUT;
++
++ /* linked timeouts should have two refs once prep'ed */
++ io_req_set_refcount(req);
++ __io_req_set_refcount(req->link, 2);
++ return req->link;
++}
++
++static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
++{
++ if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT)))
++ return NULL;
++ return __io_prep_linked_timeout(req);
++}
++
++static void io_prep_async_work(struct io_kiocb *req)
++{
++ const struct io_op_def *def = &io_op_defs[req->opcode];
++ struct io_ring_ctx *ctx = req->ctx;
++
++ if (!(req->flags & REQ_F_CREDS)) {
++ req->flags |= REQ_F_CREDS;
++ req->creds = get_current_cred();
++ }
++
++ req->work.list.next = NULL;
++ req->work.flags = 0;
++ if (req->flags & REQ_F_FORCE_ASYNC)
++ req->work.flags |= IO_WQ_WORK_CONCURRENT;
++
++ if (req->flags & REQ_F_ISREG) {
++ if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
++ io_wq_hash_work(&req->work, file_inode(req->file));
++ } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
++ if (def->unbound_nonreg_file)
++ req->work.flags |= IO_WQ_WORK_UNBOUND;
++ }
++}
++
++static void io_prep_async_link(struct io_kiocb *req)
++{
++ struct io_kiocb *cur;
++
++ if (req->flags & REQ_F_LINK_TIMEOUT) {
++ struct io_ring_ctx *ctx = req->ctx;
++
++ spin_lock_irq(&ctx->timeout_lock);
++ io_for_each_link(cur, req)
++ io_prep_async_work(cur);
++ spin_unlock_irq(&ctx->timeout_lock);
++ } else {
++ io_for_each_link(cur, req)
++ io_prep_async_work(cur);
++ }
++}
++
++static void io_queue_async_work(struct io_kiocb *req, bool *locked)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_kiocb *link = io_prep_linked_timeout(req);
++ struct io_uring_task *tctx = req->task->io_uring;
++
++ /* must not take the lock, NULL it as a precaution */
++ locked = NULL;
++
++ BUG_ON(!tctx);
++ BUG_ON(!tctx->io_wq);
++
++ /* init ->work of the whole link before punting */
++ io_prep_async_link(req);
++
++ /*
++ * Not expected to happen, but if we do have a bug where this _can_
++ * happen, catch it here and ensure the request is marked as
++ * canceled. That will make io-wq go through the usual work cancel
++ * procedure rather than attempt to run this request (or create a new
++ * worker for it).
++ */
++ if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
++ req->work.flags |= IO_WQ_WORK_CANCEL;
++
++ trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
++ &req->work, req->flags);
++ io_wq_enqueue(tctx->io_wq, &req->work);
++ if (link)
++ io_queue_linked_timeout(link);
++}
++
++static void io_kill_timeout(struct io_kiocb *req, int status)
++ __must_hold(&req->ctx->completion_lock)
++ __must_hold(&req->ctx->timeout_lock)
++{
++ struct io_timeout_data *io = req->async_data;
++
++ if (hrtimer_try_to_cancel(&io->timer) != -1) {
++ if (status)
++ req_set_fail(req);
++ atomic_set(&req->ctx->cq_timeouts,
++ atomic_read(&req->ctx->cq_timeouts) + 1);
++ list_del_init(&req->timeout.list);
++ io_fill_cqe_req(req, status, 0);
++ io_put_req_deferred(req);
++ }
++}
++
++static void io_queue_deferred(struct io_ring_ctx *ctx)
++{
++ lockdep_assert_held(&ctx->completion_lock);
++
++ while (!list_empty(&ctx->defer_list)) {
++ struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
++ struct io_defer_entry, list);
++
++ if (req_need_defer(de->req, de->seq))
++ break;
++ list_del_init(&de->list);
++ io_req_task_queue(de->req);
++ kfree(de);
++ }
++}
++
++static void io_flush_timeouts(struct io_ring_ctx *ctx)
++ __must_hold(&ctx->completion_lock)
++{
++ u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
++ struct io_kiocb *req, *tmp;
++
++ spin_lock_irq(&ctx->timeout_lock);
++ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
++ u32 events_needed, events_got;
++
++ if (io_is_timeout_noseq(req))
++ break;
++
++ /*
++ * Since seq can easily wrap around over time, subtract
++ * the last seq at which timeouts were flushed before comparing.
++ * Assuming not more than 2^31-1 events have happened since,
++ * these subtractions won't have wrapped, so we can check if
++ * target is in [last_seq, current_seq] by comparing the two.
++ */
++ events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush;
++ events_got = seq - ctx->cq_last_tm_flush;
++ if (events_got < events_needed)
++ break;
++
++ io_kill_timeout(req, 0);
++ }
++ ctx->cq_last_tm_flush = seq;
++ spin_unlock_irq(&ctx->timeout_lock);
++}
++
++static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
++{
++ if (ctx->off_timeout_used)
++ io_flush_timeouts(ctx);
++ if (ctx->drain_active)
++ io_queue_deferred(ctx);
++}
++
++static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx)
++{
++ return ctx->off_timeout_used || ctx->drain_active;
++}
++
++static inline void __io_commit_cqring(struct io_ring_ctx *ctx)
++{
++ /* order cqe stores with ring update */
++ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
++}
++
++static inline void io_commit_cqring(struct io_ring_ctx *ctx)
++{
++ if (unlikely(io_commit_needs_flush(ctx)))
++ __io_commit_cqring_flush(ctx);
++ __io_commit_cqring(ctx);
++}
++
++static inline bool io_sqring_full(struct io_ring_ctx *ctx)
++{
++ struct io_rings *r = ctx->rings;
++
++ return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries;
++}
++
++static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
++{
++ return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
++}
++
++static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
++{
++ struct io_rings *rings = ctx->rings;
++ unsigned tail, mask = ctx->cq_entries - 1;
++
++ /*
++ * writes to the cq entry need to come after reading head; the
++ * control dependency is enough as we're using WRITE_ONCE to
++ * fill the cq entry
++ */
++ if (__io_cqring_events(ctx) == ctx->cq_entries)
++ return NULL;
++
++ tail = ctx->cached_cq_tail++;
++ return &rings->cqes[tail & mask];
++}
++
++static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
++{
++ if (likely(!ctx->cq_ev_fd))
++ return false;
++ if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
++ return false;
++ return !ctx->eventfd_async || io_wq_current_is_worker();
++}
++
++/*
++ * This should only get called when at least one event has been posted.
++ * Some applications rely on the eventfd notification count only changing
++ * IFF a new CQE has been added to the CQ ring. There's no depedency on
++ * 1:1 relationship between how many times this function is called (and
++ * hence the eventfd count) and number of CQEs posted to the CQ ring.
++ */
++static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
++{
++ /*
++ * wake_up_all() may seem excessive, but io_wake_function() and
++ * io_should_wake() handle the termination of the loop and only
++ * wake as many waiters as we need to.
++ */
++ if (wq_has_sleeper(&ctx->cq_wait))
++ __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
++ if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
++ wake_up(&ctx->sq_data->wait);
++ if (io_should_trigger_evfd(ctx))
++ eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE);
++ if (waitqueue_active(&ctx->poll_wait))
++ __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0,
++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
++}
++
++static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
++{
++ /* see waitqueue_active() comment */
++ smp_mb();
++
++ if (ctx->flags & IORING_SETUP_SQPOLL) {
++ if (waitqueue_active(&ctx->cq_wait))
++ __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
++ }
++ if (io_should_trigger_evfd(ctx))
++ eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE);
++ if (waitqueue_active(&ctx->poll_wait))
++ __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0,
++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
++}
++
++/* Returns true if there are no backlogged entries after the flush */
++static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
++{
++ bool all_flushed, posted;
++
++ if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
++ return false;
++
++ posted = false;
++ spin_lock(&ctx->completion_lock);
++ while (!list_empty(&ctx->cq_overflow_list)) {
++ struct io_uring_cqe *cqe = io_get_cqe(ctx);
++ struct io_overflow_cqe *ocqe;
++
++ if (!cqe && !force)
++ break;
++ ocqe = list_first_entry(&ctx->cq_overflow_list,
++ struct io_overflow_cqe, list);
++ if (cqe)
++ memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
++ else
++ io_account_cq_overflow(ctx);
++
++ posted = true;
++ list_del(&ocqe->list);
++ kfree(ocqe);
++ }
++
++ all_flushed = list_empty(&ctx->cq_overflow_list);
++ if (all_flushed) {
++ clear_bit(0, &ctx->check_cq_overflow);
++ WRITE_ONCE(ctx->rings->sq_flags,
++ ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
++ }
++
++ if (posted)
++ io_commit_cqring(ctx);
++ spin_unlock(&ctx->completion_lock);
++ if (posted)
++ io_cqring_ev_posted(ctx);
++ return all_flushed;
++}
++
++static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
++{
++ bool ret = true;
++
++ if (test_bit(0, &ctx->check_cq_overflow)) {
++ /* iopoll syncs against uring_lock, not completion_lock */
++ if (ctx->flags & IORING_SETUP_IOPOLL)
++ mutex_lock(&ctx->uring_lock);
++ ret = __io_cqring_overflow_flush(ctx, false);
++ if (ctx->flags & IORING_SETUP_IOPOLL)
++ mutex_unlock(&ctx->uring_lock);
++ }
++
++ return ret;
++}
++
++/* must to be called somewhat shortly after putting a request */
++static inline void io_put_task(struct task_struct *task, int nr)
++{
++ struct io_uring_task *tctx = task->io_uring;
++
++ if (likely(task == current)) {
++ tctx->cached_refs += nr;
++ } else {
++ percpu_counter_sub(&tctx->inflight, nr);
++ if (unlikely(atomic_read(&tctx->in_idle)))
++ wake_up(&tctx->wait);
++ put_task_struct_many(task, nr);
++ }
++}
++
++static void io_task_refs_refill(struct io_uring_task *tctx)
++{
++ unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
++
++ percpu_counter_add(&tctx->inflight, refill);
++ refcount_add(refill, &current->usage);
++ tctx->cached_refs += refill;
++}
++
++static inline void io_get_task_refs(int nr)
++{
++ struct io_uring_task *tctx = current->io_uring;
++
++ tctx->cached_refs -= nr;
++ if (unlikely(tctx->cached_refs < 0))
++ io_task_refs_refill(tctx);
++}
++
++static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
++{
++ struct io_uring_task *tctx = task->io_uring;
++ unsigned int refs = tctx->cached_refs;
++
++ if (refs) {
++ tctx->cached_refs = 0;
++ percpu_counter_sub(&tctx->inflight, refs);
++ put_task_struct_many(task, refs);
++ }
++}
++
++static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
++ s32 res, u32 cflags)
++{
++ struct io_overflow_cqe *ocqe;
++
++ ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
++ if (!ocqe) {
++ /*
++ * If we're in ring overflow flush mode, or in task cancel mode,
++ * or cannot allocate an overflow entry, then we need to drop it
++ * on the floor.
++ */
++ io_account_cq_overflow(ctx);
++ return false;
++ }
++ if (list_empty(&ctx->cq_overflow_list)) {
++ set_bit(0, &ctx->check_cq_overflow);
++ WRITE_ONCE(ctx->rings->sq_flags,
++ ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
++
++ }
++ ocqe->cqe.user_data = user_data;
++ ocqe->cqe.res = res;
++ ocqe->cqe.flags = cflags;
++ list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
++ return true;
++}
++
++static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
++ s32 res, u32 cflags)
++{
++ struct io_uring_cqe *cqe;
++
++ trace_io_uring_complete(ctx, user_data, res, cflags);
++
++ /*
++ * If we can't get a cq entry, userspace overflowed the
++ * submission (by quite a lot). Increment the overflow count in
++ * the ring.
++ */
++ cqe = io_get_cqe(ctx);
++ if (likely(cqe)) {
++ WRITE_ONCE(cqe->user_data, user_data);
++ WRITE_ONCE(cqe->res, res);
++ WRITE_ONCE(cqe->flags, cflags);
++ return true;
++ }
++ return io_cqring_event_overflow(ctx, user_data, res, cflags);
++}
++
++static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
++{
++ __io_fill_cqe(req->ctx, req->user_data, res, cflags);
++}
++
++static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
++ s32 res, u32 cflags)
++{
++ ctx->cq_extra++;
++ return __io_fill_cqe(ctx, user_data, res, cflags);
++}
++
++static void io_req_complete_post(struct io_kiocb *req, s32 res,
++ u32 cflags)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ spin_lock(&ctx->completion_lock);
++ __io_fill_cqe(ctx, req->user_data, res, cflags);
++ /*
++ * If we're the last reference to this request, add to our locked
++ * free_list cache.
++ */
++ if (req_ref_put_and_test(req)) {
++ if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
++ if (req->flags & IO_DISARM_MASK)
++ io_disarm_next(req);
++ if (req->link) {
++ io_req_task_queue(req->link);
++ req->link = NULL;
++ }
++ }
++ io_dismantle_req(req);
++ io_put_task(req->task, 1);
++ list_add(&req->inflight_entry, &ctx->locked_free_list);
++ ctx->locked_free_nr++;
++ } else {
++ if (!percpu_ref_tryget(&ctx->refs))
++ req = NULL;
++ }
++ io_commit_cqring(ctx);
++ spin_unlock(&ctx->completion_lock);
++
++ if (req) {
++ io_cqring_ev_posted(ctx);
++ percpu_ref_put(&ctx->refs);
++ }
++}
++
++static inline bool io_req_needs_clean(struct io_kiocb *req)
++{
++ return req->flags & IO_REQ_CLEAN_FLAGS;
++}
++
++static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
++ u32 cflags)
++{
++ if (io_req_needs_clean(req))
++ io_clean_op(req);
++ req->result = res;
++ req->compl.cflags = cflags;
++ req->flags |= REQ_F_COMPLETE_INLINE;
++}
++
++static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
++ s32 res, u32 cflags)
++{
++ if (issue_flags & IO_URING_F_COMPLETE_DEFER)
++ io_req_complete_state(req, res, cflags);
++ else
++ io_req_complete_post(req, res, cflags);
++}
++
++static inline void io_req_complete(struct io_kiocb *req, s32 res)
++{
++ __io_req_complete(req, 0, res, 0);
++}
++
++static void io_req_complete_failed(struct io_kiocb *req, s32 res)
++{
++ req_set_fail(req);
++ io_req_complete_post(req, res, 0);
++}
++
++static void io_req_complete_fail_submit(struct io_kiocb *req)
++{
++ /*
++ * We don't submit, fail them all, for that replace hardlinks with
++ * normal links. Extra REQ_F_LINK is tolerated.
++ */
++ req->flags &= ~REQ_F_HARDLINK;
++ req->flags |= REQ_F_LINK;
++ io_req_complete_failed(req, req->result);
++}
++
++/*
++ * Don't initialise the fields below on every allocation, but do that in
++ * advance and keep them valid across allocations.
++ */
++static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
++{
++ req->ctx = ctx;
++ req->link = NULL;
++ req->async_data = NULL;
++ /* not necessary, but safer to zero */
++ req->result = 0;
++}
++
++static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
++ struct io_submit_state *state)
++{
++ spin_lock(&ctx->completion_lock);
++ list_splice_init(&ctx->locked_free_list, &state->free_list);
++ ctx->locked_free_nr = 0;
++ spin_unlock(&ctx->completion_lock);
++}
++
++/* Returns true IFF there are requests in the cache */
++static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
++{
++ struct io_submit_state *state = &ctx->submit_state;
++ int nr;
++
++ /*
++ * If we have more than a batch's worth of requests in our IRQ side
++ * locked cache, grab the lock and move them over to our submission
++ * side cache.
++ */
++ if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
++ io_flush_cached_locked_reqs(ctx, state);
++
++ nr = state->free_reqs;
++ while (!list_empty(&state->free_list)) {
++ struct io_kiocb *req = list_first_entry(&state->free_list,
++ struct io_kiocb, inflight_entry);
++
++ list_del(&req->inflight_entry);
++ state->reqs[nr++] = req;
++ if (nr == ARRAY_SIZE(state->reqs))
++ break;
++ }
++
++ state->free_reqs = nr;
++ return nr != 0;
++}
++
++/*
++ * A request might get retired back into the request caches even before opcode
++ * handlers and io_issue_sqe() are done with it, e.g. inline completion path.
++ * Because of that, io_alloc_req() should be called only under ->uring_lock
++ * and with extra caution to not get a request that is still worked on.
++ */
++static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
++ __must_hold(&ctx->uring_lock)
++{
++ struct io_submit_state *state = &ctx->submit_state;
++ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
++ int ret, i;
++
++ BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH);
++
++ if (likely(state->free_reqs || io_flush_cached_reqs(ctx)))
++ goto got_req;
++
++ ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH,
++ state->reqs);
++
++ /*
++ * Bulk alloc is all-or-nothing. If we fail to get a batch,
++ * retry single alloc to be on the safe side.
++ */
++ if (unlikely(ret <= 0)) {
++ state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
++ if (!state->reqs[0])
++ return NULL;
++ ret = 1;
++ }
++
++ for (i = 0; i < ret; i++)
++ io_preinit_req(state->reqs[i], ctx);
++ state->free_reqs = ret;
++got_req:
++ state->free_reqs--;
++ return state->reqs[state->free_reqs];
++}
++
++static inline void io_put_file(struct file *file)
++{
++ if (file)
++ fput(file);
++}
++
++static void io_dismantle_req(struct io_kiocb *req)
++{
++ unsigned int flags = req->flags;
++
++ if (io_req_needs_clean(req))
++ io_clean_op(req);
++ if (!(flags & REQ_F_FIXED_FILE))
++ io_put_file(req->file);
++ if (req->fixed_rsrc_refs)
++ percpu_ref_put(req->fixed_rsrc_refs);
++ if (req->async_data) {
++ kfree(req->async_data);
++ req->async_data = NULL;
++ }
++}
++
++static void __io_free_req(struct io_kiocb *req)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ io_dismantle_req(req);
++ io_put_task(req->task, 1);
++
++ spin_lock(&ctx->completion_lock);
++ list_add(&req->inflight_entry, &ctx->locked_free_list);
++ ctx->locked_free_nr++;
++ spin_unlock(&ctx->completion_lock);
++
++ percpu_ref_put(&ctx->refs);
++}
++
++static inline void io_remove_next_linked(struct io_kiocb *req)
++{
++ struct io_kiocb *nxt = req->link;
++
++ req->link = nxt->link;
++ nxt->link = NULL;
++}
++
++static bool io_kill_linked_timeout(struct io_kiocb *req)
++ __must_hold(&req->ctx->completion_lock)
++ __must_hold(&req->ctx->timeout_lock)
++{
++ struct io_kiocb *link = req->link;
++
++ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
++ struct io_timeout_data *io = link->async_data;
++
++ io_remove_next_linked(req);
++ link->timeout.head = NULL;
++ if (hrtimer_try_to_cancel(&io->timer) != -1) {
++ list_del(&link->timeout.list);
++ io_fill_cqe_req(link, -ECANCELED, 0);
++ io_put_req_deferred(link);
++ return true;
++ }
++ }
++ return false;
++}
++
++static void io_fail_links(struct io_kiocb *req)
++ __must_hold(&req->ctx->completion_lock)
++{
++ struct io_kiocb *nxt, *link = req->link;
++
++ req->link = NULL;
++ while (link) {
++ long res = -ECANCELED;
++
++ if (link->flags & REQ_F_FAIL)
++ res = link->result;
++
++ nxt = link->link;
++ link->link = NULL;
++
++ trace_io_uring_fail_link(req, link);
++ io_fill_cqe_req(link, res, 0);
++ io_put_req_deferred(link);
++ link = nxt;
++ }
++}
++
++static bool io_disarm_next(struct io_kiocb *req)
++ __must_hold(&req->ctx->completion_lock)
++{
++ bool posted = false;
++
++ if (req->flags & REQ_F_ARM_LTIMEOUT) {
++ struct io_kiocb *link = req->link;
++
++ req->flags &= ~REQ_F_ARM_LTIMEOUT;
++ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
++ io_remove_next_linked(req);
++ io_fill_cqe_req(link, -ECANCELED, 0);
++ io_put_req_deferred(link);
++ posted = true;
++ }
++ } else if (req->flags & REQ_F_LINK_TIMEOUT) {
++ struct io_ring_ctx *ctx = req->ctx;
++
++ spin_lock_irq(&ctx->timeout_lock);
++ posted = io_kill_linked_timeout(req);
++ spin_unlock_irq(&ctx->timeout_lock);
++ }
++ if (unlikely((req->flags & REQ_F_FAIL) &&
++ !(req->flags & REQ_F_HARDLINK))) {
++ posted |= (req->link != NULL);
++ io_fail_links(req);
++ }
++ return posted;
++}
++
++static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
++{
++ struct io_kiocb *nxt;
++
++ /*
++ * If LINK is set, we have dependent requests in this chain. If we
++ * didn't fail this request, queue the first one up, moving any other
++ * dependencies to the next request. In case of failure, fail the rest
++ * of the chain.
++ */
++ if (req->flags & IO_DISARM_MASK) {
++ struct io_ring_ctx *ctx = req->ctx;
++ bool posted;
++
++ spin_lock(&ctx->completion_lock);
++ posted = io_disarm_next(req);
++ if (posted)
++ io_commit_cqring(req->ctx);
++ spin_unlock(&ctx->completion_lock);
++ if (posted)
++ io_cqring_ev_posted(ctx);
++ }
++ nxt = req->link;
++ req->link = NULL;
++ return nxt;
++}
++
++static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
++{
++ if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK))))
++ return NULL;
++ return __io_req_find_next(req);
++}
++
++static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
++{
++ if (!ctx)
++ return;
++ if (*locked) {
++ if (ctx->submit_state.compl_nr)
++ io_submit_flush_completions(ctx);
++ mutex_unlock(&ctx->uring_lock);
++ *locked = false;
++ }
++ percpu_ref_put(&ctx->refs);
++}
++
++static void tctx_task_work(struct callback_head *cb)
++{
++ bool locked = false;
++ struct io_ring_ctx *ctx = NULL;
++ struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
++ task_work);
++
++ while (1) {
++ struct io_wq_work_node *node;
++
++ if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
++ io_submit_flush_completions(ctx);
++
++ spin_lock_irq(&tctx->task_lock);
++ node = tctx->task_list.first;
++ INIT_WQ_LIST(&tctx->task_list);
++ if (!node)
++ tctx->task_running = false;
++ spin_unlock_irq(&tctx->task_lock);
++ if (!node)
++ break;
++
++ do {
++ struct io_wq_work_node *next = node->next;
++ struct io_kiocb *req = container_of(node, struct io_kiocb,
++ io_task_work.node);
++
++ if (req->ctx != ctx) {
++ ctx_flush_and_put(ctx, &locked);
++ ctx = req->ctx;
++ /* if not contended, grab and improve batching */
++ locked = mutex_trylock(&ctx->uring_lock);
++ percpu_ref_get(&ctx->refs);
++ }
++ req->io_task_work.func(req, &locked);
++ node = next;
++ if (unlikely(need_resched())) {
++ ctx_flush_and_put(ctx, &locked);
++ ctx = NULL;
++ cond_resched();
++ }
++ } while (node);
++ }
++
++ ctx_flush_and_put(ctx, &locked);
++
++ /* relaxed read is enough as only the task itself sets ->in_idle */
++ if (unlikely(atomic_read(&tctx->in_idle)))
++ io_uring_drop_tctx_refs(current);
++}
++
++static void io_req_task_work_add(struct io_kiocb *req)
++{
++ struct task_struct *tsk = req->task;
++ struct io_uring_task *tctx = tsk->io_uring;
++ enum task_work_notify_mode notify;
++ struct io_wq_work_node *node;
++ unsigned long flags;
++ bool running;
++
++ WARN_ON_ONCE(!tctx);
++
++ spin_lock_irqsave(&tctx->task_lock, flags);
++ wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
++ running = tctx->task_running;
++ if (!running)
++ tctx->task_running = true;
++ spin_unlock_irqrestore(&tctx->task_lock, flags);
++
++ /* task_work already pending, we're done */
++ if (running)
++ return;
++
++ /*
++ * SQPOLL kernel thread doesn't need notification, just a wakeup. For
++ * all other cases, use TWA_SIGNAL unconditionally to ensure we're
++ * processing task_work. There's no reliable way to tell if TWA_RESUME
++ * will do the job.
++ */
++ notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL;
++ if (!task_work_add(tsk, &tctx->task_work, notify)) {
++ wake_up_process(tsk);
++ return;
++ }
++
++ spin_lock_irqsave(&tctx->task_lock, flags);
++ tctx->task_running = false;
++ node = tctx->task_list.first;
++ INIT_WQ_LIST(&tctx->task_list);
++ spin_unlock_irqrestore(&tctx->task_lock, flags);
++
++ while (node) {
++ req = container_of(node, struct io_kiocb, io_task_work.node);
++ node = node->next;
++ if (llist_add(&req->io_task_work.fallback_node,
++ &req->ctx->fallback_llist))
++ schedule_delayed_work(&req->ctx->fallback_work, 1);
++ }
++}
++
++static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ /* not needed for normal modes, but SQPOLL depends on it */
++ io_tw_lock(ctx, locked);
++ io_req_complete_failed(req, req->result);
++}
++
++static void io_req_task_submit(struct io_kiocb *req, bool *locked)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ io_tw_lock(ctx, locked);
++ /* req->task == current here, checking PF_EXITING is safe */
++ if (likely(!(req->task->flags & PF_EXITING)))
++ __io_queue_sqe(req);
++ else
++ io_req_complete_failed(req, -EFAULT);
++}
++
++static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
++{
++ req->result = ret;
++ req->io_task_work.func = io_req_task_cancel;
++ io_req_task_work_add(req);
++}
++
++static void io_req_task_queue(struct io_kiocb *req)
++{
++ req->io_task_work.func = io_req_task_submit;
++ io_req_task_work_add(req);
++}
++
++static void io_req_task_queue_reissue(struct io_kiocb *req)
++{
++ req->io_task_work.func = io_queue_async_work;
++ io_req_task_work_add(req);
++}
++
++static inline void io_queue_next(struct io_kiocb *req)
++{
++ struct io_kiocb *nxt = io_req_find_next(req);
++
++ if (nxt)
++ io_req_task_queue(nxt);
++}
++
++static void io_free_req(struct io_kiocb *req)
++{
++ io_queue_next(req);
++ __io_free_req(req);
++}
++
++static void io_free_req_work(struct io_kiocb *req, bool *locked)
++{
++ io_free_req(req);
++}
++
++struct req_batch {
++ struct task_struct *task;
++ int task_refs;
++ int ctx_refs;
++};
++
++static inline void io_init_req_batch(struct req_batch *rb)
++{
++ rb->task_refs = 0;
++ rb->ctx_refs = 0;
++ rb->task = NULL;
++}
++
++static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
++ struct req_batch *rb)
++{
++ if (rb->ctx_refs)
++ percpu_ref_put_many(&ctx->refs, rb->ctx_refs);
++ if (rb->task)
++ io_put_task(rb->task, rb->task_refs);
++}
++
++static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
++ struct io_submit_state *state)
++{
++ io_queue_next(req);
++ io_dismantle_req(req);
++
++ if (req->task != rb->task) {
++ if (rb->task)
++ io_put_task(rb->task, rb->task_refs);
++ rb->task = req->task;
++ rb->task_refs = 0;
++ }
++ rb->task_refs++;
++ rb->ctx_refs++;
++
++ if (state->free_reqs != ARRAY_SIZE(state->reqs))
++ state->reqs[state->free_reqs++] = req;
++ else
++ list_add(&req->inflight_entry, &state->free_list);
++}
++
++static void io_submit_flush_completions(struct io_ring_ctx *ctx)
++ __must_hold(&ctx->uring_lock)
++{
++ struct io_submit_state *state = &ctx->submit_state;
++ int i, nr = state->compl_nr;
++ struct req_batch rb;
++
++ spin_lock(&ctx->completion_lock);
++ for (i = 0; i < nr; i++) {
++ struct io_kiocb *req = state->compl_reqs[i];
++
++ __io_fill_cqe(ctx, req->user_data, req->result,
++ req->compl.cflags);
++ }
++ io_commit_cqring(ctx);
++ spin_unlock(&ctx->completion_lock);
++ io_cqring_ev_posted(ctx);
++
++ io_init_req_batch(&rb);
++ for (i = 0; i < nr; i++) {
++ struct io_kiocb *req = state->compl_reqs[i];
++
++ if (req_ref_put_and_test(req))
++ io_req_free_batch(&rb, req, &ctx->submit_state);
++ }
++
++ io_req_free_batch_finish(ctx, &rb);
++ state->compl_nr = 0;
++}
++
++/*
++ * Drop reference to request, return next in chain (if there is one) if this
++ * was the last reference to this request.
++ */
++static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
++{
++ struct io_kiocb *nxt = NULL;
++
++ if (req_ref_put_and_test(req)) {
++ nxt = io_req_find_next(req);
++ __io_free_req(req);
++ }
++ return nxt;
++}
++
++static inline void io_put_req(struct io_kiocb *req)
++{
++ if (req_ref_put_and_test(req))
++ io_free_req(req);
++}
++
++static inline void io_put_req_deferred(struct io_kiocb *req)
++{
++ if (req_ref_put_and_test(req)) {
++ req->io_task_work.func = io_free_req_work;
++ io_req_task_work_add(req);
++ }
++}
++
++static unsigned io_cqring_events(struct io_ring_ctx *ctx)
++{
++ /* See comment at the top of this file */
++ smp_rmb();
++ return __io_cqring_events(ctx);
++}
++
++static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
++{
++ struct io_rings *rings = ctx->rings;
++
++ /* make sure SQ entry isn't read before tail */
++ return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
++}
++
++static unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf)
++{
++ unsigned int cflags;
++
++ cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
++ cflags |= IORING_CQE_F_BUFFER;
++ req->flags &= ~REQ_F_BUFFER_SELECTED;
++ kfree(kbuf);
++ return cflags;
++}
++
++static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
++{
++ struct io_buffer *kbuf;
++
++ if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
++ return 0;
++ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
++ return io_put_kbuf(req, kbuf);
++}
++
++static inline bool io_run_task_work(void)
++{
++ /*
++ * PF_IO_WORKER never returns to userspace, so check here if we have
++ * notify work that needs processing.
++ */
++ if (current->flags & PF_IO_WORKER &&
++ test_thread_flag(TIF_NOTIFY_RESUME)) {
++ __set_current_state(TASK_RUNNING);
++ tracehook_notify_resume(NULL);
++ }
++ if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
++ __set_current_state(TASK_RUNNING);
++ tracehook_notify_signal();
++ return true;
++ }
++
++ return false;
++}
++
++/*
++ * Find and free completed poll iocbs
++ */
++static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
++ struct list_head *done)
++{
++ struct req_batch rb;
++ struct io_kiocb *req;
++
++ /* order with ->result store in io_complete_rw_iopoll() */
++ smp_rmb();
++
++ io_init_req_batch(&rb);
++ while (!list_empty(done)) {
++ struct io_uring_cqe *cqe;
++ unsigned cflags;
++
++ req = list_first_entry(done, struct io_kiocb, inflight_entry);
++ list_del(&req->inflight_entry);
++ cflags = io_put_rw_kbuf(req);
++ (*nr_events)++;
++
++ cqe = io_get_cqe(ctx);
++ if (cqe) {
++ WRITE_ONCE(cqe->user_data, req->user_data);
++ WRITE_ONCE(cqe->res, req->result);
++ WRITE_ONCE(cqe->flags, cflags);
++ } else {
++ spin_lock(&ctx->completion_lock);
++ io_cqring_event_overflow(ctx, req->user_data,
++ req->result, cflags);
++ spin_unlock(&ctx->completion_lock);
++ }
++
++ if (req_ref_put_and_test(req))
++ io_req_free_batch(&rb, req, &ctx->submit_state);
++ }
++
++ if (io_commit_needs_flush(ctx)) {
++ spin_lock(&ctx->completion_lock);
++ __io_commit_cqring_flush(ctx);
++ spin_unlock(&ctx->completion_lock);
++ }
++ __io_commit_cqring(ctx);
++ io_cqring_ev_posted_iopoll(ctx);
++ io_req_free_batch_finish(ctx, &rb);
++}
++
++static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
++ long min)
++{
++ struct io_kiocb *req, *tmp;
++ LIST_HEAD(done);
++ bool spin;
++
++ /*
++ * Only spin for completions if we don't have multiple devices hanging
++ * off our complete list, and we're under the requested amount.
++ */
++ spin = !ctx->poll_multi_queue && *nr_events < min;
++
++ list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
++ struct kiocb *kiocb = &req->rw.kiocb;
++ int ret;
++
++ /*
++ * Move completed and retryable entries to our local lists.
++ * If we find a request that requires polling, break out
++ * and complete those lists first, if we have entries there.
++ */
++ if (READ_ONCE(req->iopoll_completed)) {
++ list_move_tail(&req->inflight_entry, &done);
++ continue;
++ }
++ if (!list_empty(&done))
++ break;
++
++ ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
++ if (unlikely(ret < 0))
++ return ret;
++ else if (ret)
++ spin = false;
++
++ /* iopoll may have completed current req */
++ if (READ_ONCE(req->iopoll_completed))
++ list_move_tail(&req->inflight_entry, &done);
++ }
++
++ if (!list_empty(&done))
++ io_iopoll_complete(ctx, nr_events, &done);
++
++ return 0;
++}
++
++/*
++ * We can't just wait for polled events to come to us, we have to actively
++ * find and complete them.
++ */
++static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
++{
++ if (!(ctx->flags & IORING_SETUP_IOPOLL))
++ return;
++
++ mutex_lock(&ctx->uring_lock);
++ while (!list_empty(&ctx->iopoll_list)) {
++ unsigned int nr_events = 0;
++
++ io_do_iopoll(ctx, &nr_events, 0);
++
++ /* let it sleep and repeat later if can't complete a request */
++ if (nr_events == 0)
++ break;
++ /*
++ * Ensure we allow local-to-the-cpu processing to take place,
++ * in this case we need to ensure that we reap all events.
++ * Also let task_work, etc. to progress by releasing the mutex
++ */
++ if (need_resched()) {
++ mutex_unlock(&ctx->uring_lock);
++ cond_resched();
++ mutex_lock(&ctx->uring_lock);
++ }
++ }
++ mutex_unlock(&ctx->uring_lock);
++}
++
++static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
++{
++ unsigned int nr_events = 0;
++ int ret = 0;
++
++ /*
++ * We disallow the app entering submit/complete with polling, but we
++ * still need to lock the ring to prevent racing with polled issue
++ * that got punted to a workqueue.
++ */
++ mutex_lock(&ctx->uring_lock);
++ /*
++ * Don't enter poll loop if we already have events pending.
++ * If we do, we can potentially be spinning for commands that
++ * already triggered a CQE (eg in error).
++ */
++ if (test_bit(0, &ctx->check_cq_overflow))
++ __io_cqring_overflow_flush(ctx, false);
++ if (io_cqring_events(ctx))
++ goto out;
++ do {
++ /*
++ * If a submit got punted to a workqueue, we can have the
++ * application entering polling for a command before it gets
++ * issued. That app will hold the uring_lock for the duration
++ * of the poll right here, so we need to take a breather every
++ * now and then to ensure that the issue has a chance to add
++ * the poll to the issued list. Otherwise we can spin here
++ * forever, while the workqueue is stuck trying to acquire the
++ * very same mutex.
++ */
++ if (list_empty(&ctx->iopoll_list)) {
++ u32 tail = ctx->cached_cq_tail;
++
++ mutex_unlock(&ctx->uring_lock);
++ io_run_task_work();
++ mutex_lock(&ctx->uring_lock);
++
++ /* some requests don't go through iopoll_list */
++ if (tail != ctx->cached_cq_tail ||
++ list_empty(&ctx->iopoll_list))
++ break;
++ }
++ ret = io_do_iopoll(ctx, &nr_events, min);
++
++ if (task_sigpending(current)) {
++ ret = -EINTR;
++ goto out;
++ }
++ } while (!ret && nr_events < min && !need_resched());
++out:
++ mutex_unlock(&ctx->uring_lock);
++ return ret;
++}
++
++static void kiocb_end_write(struct io_kiocb *req)
++{
++ /*
++ * Tell lockdep we inherited freeze protection from submission
++ * thread.
++ */
++ if (req->flags & REQ_F_ISREG) {
++ struct super_block *sb = file_inode(req->file)->i_sb;
++
++ __sb_writers_acquired(sb, SB_FREEZE_WRITE);
++ sb_end_write(sb);
++ }
++}
++
++#ifdef CONFIG_BLOCK
++static bool io_resubmit_prep(struct io_kiocb *req)
++{
++ struct io_async_rw *rw = req->async_data;
++
++ if (!rw)
++ return !io_req_prep_async(req);
++ iov_iter_restore(&rw->iter, &rw->iter_state);
++ return true;
++}
++
++static bool io_rw_should_reissue(struct io_kiocb *req)
++{
++ umode_t mode = file_inode(req->file)->i_mode;
++ struct io_ring_ctx *ctx = req->ctx;
++
++ if (!S_ISBLK(mode) && !S_ISREG(mode))
++ return false;
++ if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
++ !(ctx->flags & IORING_SETUP_IOPOLL)))
++ return false;
++ /*
++ * If ref is dying, we might be running poll reap from the exit work.
++ * Don't attempt to reissue from that path, just let it fail with
++ * -EAGAIN.
++ */
++ if (percpu_ref_is_dying(&ctx->refs))
++ return false;
++ /*
++ * Play it safe and assume not safe to re-import and reissue if we're
++ * not in the original thread group (or in task context).
++ */
++ if (!same_thread_group(req->task, current) || !in_task())
++ return false;
++ return true;
++}
++#else
++static bool io_resubmit_prep(struct io_kiocb *req)
++{
++ return false;
++}
++static bool io_rw_should_reissue(struct io_kiocb *req)
++{
++ return false;
++}
++#endif
++
++/*
++ * Trigger the notifications after having done some IO, and finish the write
++ * accounting, if any.
++ */
++static void io_req_io_end(struct io_kiocb *req)
++{
++ struct io_rw *rw = &req->rw;
++
++ if (rw->kiocb.ki_flags & IOCB_WRITE) {
++ kiocb_end_write(req);
++ fsnotify_modify(req->file);
++ } else {
++ fsnotify_access(req->file);
++ }
++}
++
++static bool __io_complete_rw_common(struct io_kiocb *req, long res)
++{
++ if (res != req->result) {
++ if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
++ io_rw_should_reissue(req)) {
++ /*
++ * Reissue will start accounting again, finish the
++ * current cycle.
++ */
++ io_req_io_end(req);
++ req->flags |= REQ_F_REISSUE;
++ return true;
++ }
++ req_set_fail(req);
++ req->result = res;
++ }
++ return false;
++}
++
++static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
++{
++ struct io_async_rw *io = req->async_data;
++
++ /* add previously done IO, if any */
++ if (io && io->bytes_done > 0) {
++ if (res < 0)
++ res = io->bytes_done;
++ else
++ res += io->bytes_done;
++ }
++ return res;
++}
++
++static void io_req_task_complete(struct io_kiocb *req, bool *locked)
++{
++ unsigned int cflags = io_put_rw_kbuf(req);
++ int res = req->result;
++
++ if (*locked) {
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_submit_state *state = &ctx->submit_state;
++
++ io_req_complete_state(req, res, cflags);
++ state->compl_reqs[state->compl_nr++] = req;
++ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
++ io_submit_flush_completions(ctx);
++ } else {
++ io_req_complete_post(req, res, cflags);
++ }
++}
++
++static void io_req_rw_complete(struct io_kiocb *req, bool *locked)
++{
++ io_req_io_end(req);
++ io_req_task_complete(req, locked);
++}
++
++static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
++{
++ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
++
++ if (__io_complete_rw_common(req, res))
++ return;
++ req->result = io_fixup_rw_res(req, res);
++ req->io_task_work.func = io_req_rw_complete;
++ io_req_task_work_add(req);
++}
++
++static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
++{
++ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
++
++ if (kiocb->ki_flags & IOCB_WRITE)
++ kiocb_end_write(req);
++ if (unlikely(res != req->result)) {
++ if (res == -EAGAIN && io_rw_should_reissue(req)) {
++ req->flags |= REQ_F_REISSUE;
++ return;
++ }
++ }
++
++ WRITE_ONCE(req->result, res);
++ /* order with io_iopoll_complete() checking ->result */
++ smp_wmb();
++ WRITE_ONCE(req->iopoll_completed, 1);
++}
++
++/*
++ * After the iocb has been issued, it's safe to be found on the poll list.
++ * Adding the kiocb to the list AFTER submission ensures that we don't
++ * find it from a io_do_iopoll() thread before the issuer is done
++ * accessing the kiocb cookie.
++ */
++static void io_iopoll_req_issued(struct io_kiocb *req)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ const bool in_async = io_wq_current_is_worker();
++
++ /* workqueue context doesn't hold uring_lock, grab it now */
++ if (unlikely(in_async))
++ mutex_lock(&ctx->uring_lock);
++
++ /*
++ * Track whether we have multiple files in our lists. This will impact
++ * how we do polling eventually, not spinning if we're on potentially
++ * different devices.
++ */
++ if (list_empty(&ctx->iopoll_list)) {
++ ctx->poll_multi_queue = false;
++ } else if (!ctx->poll_multi_queue) {
++ struct io_kiocb *list_req;
++ unsigned int queue_num0, queue_num1;
++
++ list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
++ inflight_entry);
++
++ if (list_req->file != req->file) {
++ ctx->poll_multi_queue = true;
++ } else {
++ queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie);
++ queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie);
++ if (queue_num0 != queue_num1)
++ ctx->poll_multi_queue = true;
++ }
++ }
++
++ /*
++ * For fast devices, IO may have already completed. If it has, add
++ * it to the front so we find it first.
++ */
++ if (READ_ONCE(req->iopoll_completed))
++ list_add(&req->inflight_entry, &ctx->iopoll_list);
++ else
++ list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
++
++ if (unlikely(in_async)) {
++ /*
++ * If IORING_SETUP_SQPOLL is enabled, sqes are either handle
++ * in sq thread task context or in io worker task context. If
++ * current task context is sq thread, we don't need to check
++ * whether should wake up sq thread.
++ */
++ if ((ctx->flags & IORING_SETUP_SQPOLL) &&
++ wq_has_sleeper(&ctx->sq_data->wait))
++ wake_up(&ctx->sq_data->wait);
++
++ mutex_unlock(&ctx->uring_lock);
++ }
++}
++
++static bool io_bdev_nowait(struct block_device *bdev)
++{
++ return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
++}
++
++/*
++ * If we tracked the file through the SCM inflight mechanism, we could support
++ * any file. For now, just ensure that anything potentially problematic is done
++ * inline.
++ */
++static bool __io_file_supports_nowait(struct file *file, int rw)
++{
++ umode_t mode = file_inode(file)->i_mode;
++
++ if (S_ISBLK(mode)) {
++ if (IS_ENABLED(CONFIG_BLOCK) &&
++ io_bdev_nowait(I_BDEV(file->f_mapping->host)))
++ return true;
++ return false;
++ }
++ if (S_ISSOCK(mode))
++ return true;
++ if (S_ISREG(mode)) {
++ if (IS_ENABLED(CONFIG_BLOCK) &&
++ io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
++ file->f_op != &io_uring_fops)
++ return true;
++ return false;
++ }
++
++ /* any ->read/write should understand O_NONBLOCK */
++ if (file->f_flags & O_NONBLOCK)
++ return true;
++
++ if (!(file->f_mode & FMODE_NOWAIT))
++ return false;
++
++ if (rw == READ)
++ return file->f_op->read_iter != NULL;
++
++ return file->f_op->write_iter != NULL;
++}
++
++static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
++{
++ if (rw == READ && (req->flags & REQ_F_NOWAIT_READ))
++ return true;
++ else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE))
++ return true;
++
++ return __io_file_supports_nowait(req->file, rw);
++}
++
++static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
++ int rw)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ struct kiocb *kiocb = &req->rw.kiocb;
++ struct file *file = req->file;
++ unsigned ioprio;
++ int ret;
++
++ if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode))
++ req->flags |= REQ_F_ISREG;
++
++ kiocb->ki_pos = READ_ONCE(sqe->off);
++ kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
++ kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
++ ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
++ if (unlikely(ret))
++ return ret;
++
++ /*
++ * If the file is marked O_NONBLOCK, still allow retry for it if it
++ * supports async. Otherwise it's impossible to use O_NONBLOCK files
++ * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
++ */
++ if ((kiocb->ki_flags & IOCB_NOWAIT) ||
++ ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw)))
++ req->flags |= REQ_F_NOWAIT;
++
++ ioprio = READ_ONCE(sqe->ioprio);
++ if (ioprio) {
++ ret = ioprio_check_cap(ioprio);
++ if (ret)
++ return ret;
++
++ kiocb->ki_ioprio = ioprio;
++ } else
++ kiocb->ki_ioprio = get_current_ioprio();
++
++ if (ctx->flags & IORING_SETUP_IOPOLL) {
++ if (!(kiocb->ki_flags & IOCB_DIRECT) ||
++ !kiocb->ki_filp->f_op->iopoll)
++ return -EOPNOTSUPP;
++
++ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
++ kiocb->ki_complete = io_complete_rw_iopoll;
++ req->iopoll_completed = 0;
++ } else {
++ if (kiocb->ki_flags & IOCB_HIPRI)
++ return -EINVAL;
++ kiocb->ki_complete = io_complete_rw;
++ }
++
++ /* used for fixed read/write too - just read unconditionally */
++ req->buf_index = READ_ONCE(sqe->buf_index);
++ req->imu = NULL;
++
++ if (req->opcode == IORING_OP_READ_FIXED ||
++ req->opcode == IORING_OP_WRITE_FIXED) {
++ struct io_ring_ctx *ctx = req->ctx;
++ u16 index;
++
++ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
++ return -EFAULT;
++ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
++ req->imu = ctx->user_bufs[index];
++ io_req_set_rsrc_node(req);
++ }
++
++ req->rw.addr = READ_ONCE(sqe->addr);
++ req->rw.len = READ_ONCE(sqe->len);
++ return 0;
++}
++
++static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
++{
++ switch (ret) {
++ case -EIOCBQUEUED:
++ break;
++ case -ERESTARTSYS:
++ case -ERESTARTNOINTR:
++ case -ERESTARTNOHAND:
++ case -ERESTART_RESTARTBLOCK:
++ /*
++ * We can't just restart the syscall, since previously
++ * submitted sqes may already be in progress. Just fail this
++ * IO with EINTR.
++ */
++ ret = -EINTR;
++ fallthrough;
++ default:
++ kiocb->ki_complete(kiocb, ret, 0);
++ }
++}
++
++static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
++{
++ struct kiocb *kiocb = &req->rw.kiocb;
++
++ if (kiocb->ki_pos != -1)
++ return &kiocb->ki_pos;
++
++ if (!(req->file->f_mode & FMODE_STREAM)) {
++ req->flags |= REQ_F_CUR_POS;
++ kiocb->ki_pos = req->file->f_pos;
++ return &kiocb->ki_pos;
++ }
++
++ kiocb->ki_pos = 0;
++ return NULL;
++}
++
++static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
++ unsigned int issue_flags)
++{
++ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
++
++ if (req->flags & REQ_F_CUR_POS)
++ req->file->f_pos = kiocb->ki_pos;
++ if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) {
++ if (!__io_complete_rw_common(req, ret)) {
++ /*
++ * Safe to call io_end from here as we're inline
++ * from the submission path.
++ */
++ io_req_io_end(req);
++ __io_req_complete(req, issue_flags,
++ io_fixup_rw_res(req, ret),
++ io_put_rw_kbuf(req));
++ }
++ } else {
++ io_rw_done(kiocb, ret);
++ }
++
++ if (req->flags & REQ_F_REISSUE) {
++ req->flags &= ~REQ_F_REISSUE;
++ if (io_resubmit_prep(req)) {
++ io_req_task_queue_reissue(req);
++ } else {
++ unsigned int cflags = io_put_rw_kbuf(req);
++ struct io_ring_ctx *ctx = req->ctx;
++
++ ret = io_fixup_rw_res(req, ret);
++ req_set_fail(req);
++ if (!(issue_flags & IO_URING_F_NONBLOCK)) {
++ mutex_lock(&ctx->uring_lock);
++ __io_req_complete(req, issue_flags, ret, cflags);
++ mutex_unlock(&ctx->uring_lock);
++ } else {
++ __io_req_complete(req, issue_flags, ret, cflags);
++ }
++ }
++ }
++}
++
++static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
++ struct io_mapped_ubuf *imu)
++{
++ size_t len = req->rw.len;
++ u64 buf_end, buf_addr = req->rw.addr;
++ size_t offset;
++
++ if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
++ return -EFAULT;
++ /* not inside the mapped region */
++ if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end))
++ return -EFAULT;
++
++ /*
++ * May not be a start of buffer, set size appropriately
++ * and advance us to the beginning.
++ */
++ offset = buf_addr - imu->ubuf;
++ iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
++
++ if (offset) {
++ /*
++ * Don't use iov_iter_advance() here, as it's really slow for
++ * using the latter parts of a big fixed buffer - it iterates
++ * over each segment manually. We can cheat a bit here, because
++ * we know that:
++ *
++ * 1) it's a BVEC iter, we set it up
++ * 2) all bvecs are PAGE_SIZE in size, except potentially the
++ * first and last bvec
++ *
++ * So just find our index, and adjust the iterator afterwards.
++ * If the offset is within the first bvec (or the whole first
++ * bvec, just use iov_iter_advance(). This makes it easier
++ * since we can just skip the first segment, which may not
++ * be PAGE_SIZE aligned.
++ */
++ const struct bio_vec *bvec = imu->bvec;
++
++ if (offset <= bvec->bv_len) {
++ iov_iter_advance(iter, offset);
++ } else {
++ unsigned long seg_skip;
++
++ /* skip first vec */
++ offset -= bvec->bv_len;
++ seg_skip = 1 + (offset >> PAGE_SHIFT);
++
++ iter->bvec = bvec + seg_skip;
++ iter->nr_segs -= seg_skip;
++ iter->count -= bvec->bv_len + offset;
++ iter->iov_offset = offset & ~PAGE_MASK;
++ }
++ }
++
++ return 0;
++}
++
++static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter)
++{
++ if (WARN_ON_ONCE(!req->imu))
++ return -EFAULT;
++ return __io_import_fixed(req, rw, iter, req->imu);
++}
++
++static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
++{
++ if (needs_lock)
++ mutex_unlock(&ctx->uring_lock);
++}
++
++static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
++{
++ /*
++ * "Normal" inline submissions always hold the uring_lock, since we
++ * grab it from the system call. Same is true for the SQPOLL offload.
++ * The only exception is when we've detached the request and issue it
++ * from an async worker thread, grab the lock for that case.
++ */
++ if (needs_lock)
++ mutex_lock(&ctx->uring_lock);
++}
++
++static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
++ int bgid, struct io_buffer *kbuf,
++ bool needs_lock)
++{
++ struct io_buffer *head;
++
++ if (req->flags & REQ_F_BUFFER_SELECTED)
++ return kbuf;
++
++ io_ring_submit_lock(req->ctx, needs_lock);
++
++ lockdep_assert_held(&req->ctx->uring_lock);
++
++ head = xa_load(&req->ctx->io_buffers, bgid);
++ if (head) {
++ if (!list_empty(&head->list)) {
++ kbuf = list_last_entry(&head->list, struct io_buffer,
++ list);
++ list_del(&kbuf->list);
++ } else {
++ kbuf = head;
++ xa_erase(&req->ctx->io_buffers, bgid);
++ }
++ if (*len > kbuf->len)
++ *len = kbuf->len;
++ } else {
++ kbuf = ERR_PTR(-ENOBUFS);
++ }
++
++ io_ring_submit_unlock(req->ctx, needs_lock);
++
++ return kbuf;
++}
++
++static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
++ bool needs_lock)
++{
++ struct io_buffer *kbuf;
++ u16 bgid;
++
++ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
++ bgid = req->buf_index;
++ kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock);
++ if (IS_ERR(kbuf))
++ return kbuf;
++ req->rw.addr = (u64) (unsigned long) kbuf;
++ req->flags |= REQ_F_BUFFER_SELECTED;
++ return u64_to_user_ptr(kbuf->addr);
++}
++
++#ifdef CONFIG_COMPAT
++static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
++ bool needs_lock)
++{
++ struct compat_iovec __user *uiov;
++ compat_ssize_t clen;
++ void __user *buf;
++ ssize_t len;
++
++ uiov = u64_to_user_ptr(req->rw.addr);
++ if (!access_ok(uiov, sizeof(*uiov)))
++ return -EFAULT;
++ if (__get_user(clen, &uiov->iov_len))
++ return -EFAULT;
++ if (clen < 0)
++ return -EINVAL;
++
++ len = clen;
++ buf = io_rw_buffer_select(req, &len, needs_lock);
++ if (IS_ERR(buf))
++ return PTR_ERR(buf);
++ iov[0].iov_base = buf;
++ iov[0].iov_len = (compat_size_t) len;
++ return 0;
++}
++#endif
++
++static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
++ bool needs_lock)
++{
++ struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr);
++ void __user *buf;
++ ssize_t len;
++
++ if (copy_from_user(iov, uiov, sizeof(*uiov)))
++ return -EFAULT;
++
++ len = iov[0].iov_len;
++ if (len < 0)
++ return -EINVAL;
++ buf = io_rw_buffer_select(req, &len, needs_lock);
++ if (IS_ERR(buf))
++ return PTR_ERR(buf);
++ iov[0].iov_base = buf;
++ iov[0].iov_len = len;
++ return 0;
++}
++
++static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
++ bool needs_lock)
++{
++ if (req->flags & REQ_F_BUFFER_SELECTED) {
++ struct io_buffer *kbuf;
++
++ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
++ iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
++ iov[0].iov_len = kbuf->len;
++ return 0;
++ }
++ if (req->rw.len != 1)
++ return -EINVAL;
++
++#ifdef CONFIG_COMPAT
++ if (req->ctx->compat)
++ return io_compat_import(req, iov, needs_lock);
++#endif
++
++ return __io_iov_buffer_select(req, iov, needs_lock);
++}
++
++static int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec,
++ struct iov_iter *iter, bool needs_lock)
++{
++ void __user *buf = u64_to_user_ptr(req->rw.addr);
++ size_t sqe_len = req->rw.len;
++ u8 opcode = req->opcode;
++ ssize_t ret;
++
++ if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
++ *iovec = NULL;
++ return io_import_fixed(req, rw, iter);
++ }
++
++ /* buffer index only valid with fixed read/write, or buffer select */
++ if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT))
++ return -EINVAL;
++
++ if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
++ if (req->flags & REQ_F_BUFFER_SELECT) {
++ buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
++ if (IS_ERR(buf))
++ return PTR_ERR(buf);
++ req->rw.len = sqe_len;
++ }
++
++ ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
++ *iovec = NULL;
++ return ret;
++ }
++
++ if (req->flags & REQ_F_BUFFER_SELECT) {
++ ret = io_iov_buffer_select(req, *iovec, needs_lock);
++ if (!ret)
++ iov_iter_init(iter, rw, *iovec, 1, (*iovec)->iov_len);
++ *iovec = NULL;
++ return ret;
++ }
++
++ return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter,
++ req->ctx->compat);
++}
++
++static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
++{
++ return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos;
++}
++
++/*
++ * For files that don't have ->read_iter() and ->write_iter(), handle them
++ * by looping over ->read() or ->write() manually.
++ */
++static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
++{
++ struct kiocb *kiocb = &req->rw.kiocb;
++ struct file *file = req->file;
++ ssize_t ret = 0;
++ loff_t *ppos;
++
++ /*
++ * Don't support polled IO through this interface, and we can't
++ * support non-blocking either. For the latter, this just causes
++ * the kiocb to be handled from an async context.
++ */
++ if (kiocb->ki_flags & IOCB_HIPRI)
++ return -EOPNOTSUPP;
++ if (kiocb->ki_flags & IOCB_NOWAIT)
++ return -EAGAIN;
++
++ ppos = io_kiocb_ppos(kiocb);
++
++ while (iov_iter_count(iter)) {
++ struct iovec iovec;
++ ssize_t nr;
++
++ if (!iov_iter_is_bvec(iter)) {
++ iovec = iov_iter_iovec(iter);
++ } else {
++ iovec.iov_base = u64_to_user_ptr(req->rw.addr);
++ iovec.iov_len = req->rw.len;
++ }
++
++ if (rw == READ) {
++ nr = file->f_op->read(file, iovec.iov_base,
++ iovec.iov_len, ppos);
++ } else {
++ nr = file->f_op->write(file, iovec.iov_base,
++ iovec.iov_len, ppos);
++ }
++
++ if (nr < 0) {
++ if (!ret)
++ ret = nr;
++ break;
++ }
++ ret += nr;
++ if (!iov_iter_is_bvec(iter)) {
++ iov_iter_advance(iter, nr);
++ } else {
++ req->rw.addr += nr;
++ req->rw.len -= nr;
++ if (!req->rw.len)
++ break;
++ }
++ if (nr != iovec.iov_len)
++ break;
++ }
++
++ return ret;
++}
++
++static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
++ const struct iovec *fast_iov, struct iov_iter *iter)
++{
++ struct io_async_rw *rw = req->async_data;
++
++ memcpy(&rw->iter, iter, sizeof(*iter));
++ rw->free_iovec = iovec;
++ rw->bytes_done = 0;
++ /* can only be fixed buffers, no need to do anything */
++ if (iov_iter_is_bvec(iter))
++ return;
++ if (!iovec) {
++ unsigned iov_off = 0;
++
++ rw->iter.iov = rw->fast_iov;
++ if (iter->iov != fast_iov) {
++ iov_off = iter->iov - fast_iov;
++ rw->iter.iov += iov_off;
++ }
++ if (rw->fast_iov != fast_iov)
++ memcpy(rw->fast_iov + iov_off, fast_iov + iov_off,
++ sizeof(struct iovec) * iter->nr_segs);
++ } else {
++ req->flags |= REQ_F_NEED_CLEANUP;
++ }
++}
++
++static inline int io_alloc_async_data(struct io_kiocb *req)
++{
++ WARN_ON_ONCE(!io_op_defs[req->opcode].async_size);
++ req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL);
++ return req->async_data == NULL;
++}
++
++static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
++ const struct iovec *fast_iov,
++ struct iov_iter *iter, bool force)
++{
++ if (!force && !io_op_defs[req->opcode].needs_async_setup)
++ return 0;
++ if (!req->async_data) {
++ struct io_async_rw *iorw;
++
++ if (io_alloc_async_data(req)) {
++ kfree(iovec);
++ return -ENOMEM;
++ }
++
++ io_req_map_rw(req, iovec, fast_iov, iter);
++ iorw = req->async_data;
++ /* we've copied and mapped the iter, ensure state is saved */
++ iov_iter_save_state(&iorw->iter, &iorw->iter_state);
++ }
++ return 0;
++}
++
++static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
++{
++ struct io_async_rw *iorw = req->async_data;
++ struct iovec *iov = iorw->fast_iov;
++ int ret;
++
++ ret = io_import_iovec(rw, req, &iov, &iorw->iter, false);
++ if (unlikely(ret < 0))
++ return ret;
++
++ iorw->bytes_done = 0;
++ iorw->free_iovec = iov;
++ if (iov)
++ req->flags |= REQ_F_NEED_CLEANUP;
++ iov_iter_save_state(&iorw->iter, &iorw->iter_state);
++ return 0;
++}
++
++static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ if (unlikely(!(req->file->f_mode & FMODE_READ)))
++ return -EBADF;
++ return io_prep_rw(req, sqe, READ);
++}
++
++/*
++ * This is our waitqueue callback handler, registered through lock_page_async()
++ * when we initially tried to do the IO with the iocb armed our waitqueue.
++ * This gets called when the page is unlocked, and we generally expect that to
++ * happen when the page IO is completed and the page is now uptodate. This will
++ * queue a task_work based retry of the operation, attempting to copy the data
++ * again. If the latter fails because the page was NOT uptodate, then we will
++ * do a thread based blocking retry of the operation. That's the unexpected
++ * slow path.
++ */
++static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
++ int sync, void *arg)
++{
++ struct wait_page_queue *wpq;
++ struct io_kiocb *req = wait->private;
++ struct wait_page_key *key = arg;
++
++ wpq = container_of(wait, struct wait_page_queue, wait);
++
++ if (!wake_page_match(wpq, key))
++ return 0;
++
++ req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
++ list_del_init(&wait->entry);
++ io_req_task_queue(req);
++ return 1;
++}
++
++/*
++ * This controls whether a given IO request should be armed for async page
++ * based retry. If we return false here, the request is handed to the async
++ * worker threads for retry. If we're doing buffered reads on a regular file,
++ * we prepare a private wait_page_queue entry and retry the operation. This
++ * will either succeed because the page is now uptodate and unlocked, or it
++ * will register a callback when the page is unlocked at IO completion. Through
++ * that callback, io_uring uses task_work to setup a retry of the operation.
++ * That retry will attempt the buffered read again. The retry will generally
++ * succeed, or in rare cases where it fails, we then fall back to using the
++ * async worker threads for a blocking retry.
++ */
++static bool io_rw_should_retry(struct io_kiocb *req)
++{
++ struct io_async_rw *rw = req->async_data;
++ struct wait_page_queue *wait = &rw->wpq;
++ struct kiocb *kiocb = &req->rw.kiocb;
++
++ /* never retry for NOWAIT, we just complete with -EAGAIN */
++ if (req->flags & REQ_F_NOWAIT)
++ return false;
++
++ /* Only for buffered IO */
++ if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
++ return false;
++
++ /*
++ * just use poll if we can, and don't attempt if the fs doesn't
++ * support callback based unlocks
++ */
++ if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
++ return false;
++
++ wait->wait.func = io_async_buf_func;
++ wait->wait.private = req;
++ wait->wait.flags = 0;
++ INIT_LIST_HEAD(&wait->wait.entry);
++ kiocb->ki_flags |= IOCB_WAITQ;
++ kiocb->ki_flags &= ~IOCB_NOWAIT;
++ kiocb->ki_waitq = wait;
++ return true;
++}
++
++static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
++{
++ if (req->file->f_op->read_iter)
++ return call_read_iter(req->file, &req->rw.kiocb, iter);
++ else if (req->file->f_op->read)
++ return loop_rw_iter(READ, req, iter);
++ else
++ return -EINVAL;
++}
++
++static bool need_read_all(struct io_kiocb *req)
++{
++ return req->flags & REQ_F_ISREG ||
++ S_ISBLK(file_inode(req->file)->i_mode);
++}
++
++static int io_read(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
++ struct kiocb *kiocb = &req->rw.kiocb;
++ struct iov_iter __iter, *iter = &__iter;
++ struct io_async_rw *rw = req->async_data;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++ struct iov_iter_state __state, *state;
++ ssize_t ret, ret2;
++ loff_t *ppos;
++
++ if (rw) {
++ iter = &rw->iter;
++ state = &rw->iter_state;
++ /*
++ * We come here from an earlier attempt, restore our state to
++ * match in case it doesn't. It's cheap enough that we don't
++ * need to make this conditional.
++ */
++ iov_iter_restore(iter, state);
++ iovec = NULL;
++ } else {
++ ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
++ if (ret < 0)
++ return ret;
++ state = &__state;
++ iov_iter_save_state(iter, state);
++ }
++ req->result = iov_iter_count(iter);
++
++ /* Ensure we clear previously set non-block flag */
++ if (!force_nonblock)
++ kiocb->ki_flags &= ~IOCB_NOWAIT;
++ else
++ kiocb->ki_flags |= IOCB_NOWAIT;
++
++ /* If the file doesn't support async, just async punt */
++ if (force_nonblock && !io_file_supports_nowait(req, READ)) {
++ ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
++ return ret ?: -EAGAIN;
++ }
++
++ ppos = io_kiocb_update_pos(req);
++
++ ret = rw_verify_area(READ, req->file, ppos, req->result);
++ if (unlikely(ret)) {
++ kfree(iovec);
++ return ret;
++ }
++
++ ret = io_iter_do_read(req, iter);
++
++ if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
++ req->flags &= ~REQ_F_REISSUE;
++ /* IOPOLL retry should happen for io-wq threads */
++ if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
++ goto done;
++ /* no retry on NONBLOCK nor RWF_NOWAIT */
++ if (req->flags & REQ_F_NOWAIT)
++ goto done;
++ ret = 0;
++ } else if (ret == -EIOCBQUEUED) {
++ goto out_free;
++ } else if (ret <= 0 || ret == req->result || !force_nonblock ||
++ (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
++ /* read all, failed, already did sync or don't want to retry */
++ goto done;
++ }
++
++ /*
++ * Don't depend on the iter state matching what was consumed, or being
++ * untouched in case of error. Restore it and we'll advance it
++ * manually if we need to.
++ */
++ iov_iter_restore(iter, state);
++
++ ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
++ if (ret2)
++ return ret2;
++
++ iovec = NULL;
++ rw = req->async_data;
++ /*
++ * Now use our persistent iterator and state, if we aren't already.
++ * We've restored and mapped the iter to match.
++ */
++ if (iter != &rw->iter) {
++ iter = &rw->iter;
++ state = &rw->iter_state;
++ }
++
++ do {
++ /*
++ * We end up here because of a partial read, either from
++ * above or inside this loop. Advance the iter by the bytes
++ * that were consumed.
++ */
++ iov_iter_advance(iter, ret);
++ if (!iov_iter_count(iter))
++ break;
++ rw->bytes_done += ret;
++ iov_iter_save_state(iter, state);
++
++ /* if we can retry, do so with the callbacks armed */
++ if (!io_rw_should_retry(req)) {
++ kiocb->ki_flags &= ~IOCB_WAITQ;
++ return -EAGAIN;
++ }
++
++ req->result = iov_iter_count(iter);
++ /*
++ * Now retry read with the IOCB_WAITQ parts set in the iocb. If
++ * we get -EIOCBQUEUED, then we'll get a notification when the
++ * desired page gets unlocked. We can also get a partial read
++ * here, and if we do, then just retry at the new offset.
++ */
++ ret = io_iter_do_read(req, iter);
++ if (ret == -EIOCBQUEUED)
++ return 0;
++ /* we got some bytes, but not all. retry. */
++ kiocb->ki_flags &= ~IOCB_WAITQ;
++ iov_iter_restore(iter, state);
++ } while (ret > 0);
++done:
++ kiocb_done(kiocb, ret, issue_flags);
++out_free:
++ /* it's faster to check here then delegate to kfree */
++ if (iovec)
++ kfree(iovec);
++ return 0;
++}
++
++static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
++ return -EBADF;
++ return io_prep_rw(req, sqe, WRITE);
++}
++
++static int io_write(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
++ struct kiocb *kiocb = &req->rw.kiocb;
++ struct iov_iter __iter, *iter = &__iter;
++ struct io_async_rw *rw = req->async_data;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++ struct iov_iter_state __state, *state;
++ ssize_t ret, ret2;
++ loff_t *ppos;
++
++ if (rw) {
++ iter = &rw->iter;
++ state = &rw->iter_state;
++ iov_iter_restore(iter, state);
++ iovec = NULL;
++ } else {
++ ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
++ if (ret < 0)
++ return ret;
++ state = &__state;
++ iov_iter_save_state(iter, state);
++ }
++ req->result = iov_iter_count(iter);
++
++ /* Ensure we clear previously set non-block flag */
++ if (!force_nonblock)
++ kiocb->ki_flags &= ~IOCB_NOWAIT;
++ else
++ kiocb->ki_flags |= IOCB_NOWAIT;
++
++ /* If the file doesn't support async, just async punt */
++ if (force_nonblock && !io_file_supports_nowait(req, WRITE))
++ goto copy_iov;
++
++ /* file path doesn't support NOWAIT for non-direct_IO */
++ if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
++ (req->flags & REQ_F_ISREG))
++ goto copy_iov;
++
++ ppos = io_kiocb_update_pos(req);
++
++ ret = rw_verify_area(WRITE, req->file, ppos, req->result);
++ if (unlikely(ret))
++ goto out_free;
++
++ /*
++ * Open-code file_start_write here to grab freeze protection,
++ * which will be released by another thread in
++ * io_complete_rw(). Fool lockdep by telling it the lock got
++ * released so that it doesn't complain about the held lock when
++ * we return to userspace.
++ */
++ if (req->flags & REQ_F_ISREG) {
++ sb_start_write(file_inode(req->file)->i_sb);
++ __sb_writers_release(file_inode(req->file)->i_sb,
++ SB_FREEZE_WRITE);
++ }
++ kiocb->ki_flags |= IOCB_WRITE;
++
++ if (req->file->f_op->write_iter)
++ ret2 = call_write_iter(req->file, kiocb, iter);
++ else if (req->file->f_op->write)
++ ret2 = loop_rw_iter(WRITE, req, iter);
++ else
++ ret2 = -EINVAL;
++
++ if (req->flags & REQ_F_REISSUE) {
++ req->flags &= ~REQ_F_REISSUE;
++ ret2 = -EAGAIN;
++ }
++
++ /*
++ * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
++ * retry them without IOCB_NOWAIT.
++ */
++ if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
++ ret2 = -EAGAIN;
++ /* no retry on NONBLOCK nor RWF_NOWAIT */
++ if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
++ goto done;
++ if (!force_nonblock || ret2 != -EAGAIN) {
++ /* IOPOLL retry should happen for io-wq threads */
++ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
++ goto copy_iov;
++done:
++ kiocb_done(kiocb, ret2, issue_flags);
++ } else {
++copy_iov:
++ iov_iter_restore(iter, state);
++ ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
++ if (!ret) {
++ if (kiocb->ki_flags & IOCB_WRITE)
++ kiocb_end_write(req);
++ return -EAGAIN;
++ }
++ return ret;
++ }
++out_free:
++ /* it's reportedly faster than delegating the null check to kfree() */
++ if (iovec)
++ kfree(iovec);
++ return ret;
++}
++
++static int io_renameat_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_rename *ren = &req->rename;
++ const char __user *oldf, *newf;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->flags & REQ_F_FIXED_FILE))
++ return -EBADF;
++
++ ren->old_dfd = READ_ONCE(sqe->fd);
++ oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
++ ren->new_dfd = READ_ONCE(sqe->len);
++ ren->flags = READ_ONCE(sqe->rename_flags);
++
++ ren->oldpath = getname(oldf);
++ if (IS_ERR(ren->oldpath))
++ return PTR_ERR(ren->oldpath);
++
++ ren->newpath = getname(newf);
++ if (IS_ERR(ren->newpath)) {
++ putname(ren->oldpath);
++ return PTR_ERR(ren->newpath);
++ }
++
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return 0;
++}
++
++static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_rename *ren = &req->rename;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd,
++ ren->newpath, ren->flags);
++
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_unlinkat_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_unlink *un = &req->unlink;
++ const char __user *fname;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->flags & REQ_F_FIXED_FILE))
++ return -EBADF;
++
++ un->dfd = READ_ONCE(sqe->fd);
++
++ un->flags = READ_ONCE(sqe->unlink_flags);
++ if (un->flags & ~AT_REMOVEDIR)
++ return -EINVAL;
++
++ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ un->filename = getname(fname);
++ if (IS_ERR(un->filename))
++ return PTR_ERR(un->filename);
++
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return 0;
++}
++
++static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_unlink *un = &req->unlink;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ if (un->flags & AT_REMOVEDIR)
++ ret = do_rmdir(un->dfd, un->filename);
++ else
++ ret = do_unlinkat(un->dfd, un->filename);
++
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_mkdirat_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_mkdir *mkd = &req->mkdir;
++ const char __user *fname;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->flags & REQ_F_FIXED_FILE))
++ return -EBADF;
++
++ mkd->dfd = READ_ONCE(sqe->fd);
++ mkd->mode = READ_ONCE(sqe->len);
++
++ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ mkd->filename = getname(fname);
++ if (IS_ERR(mkd->filename))
++ return PTR_ERR(mkd->filename);
++
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return 0;
++}
++
++static int io_mkdirat(struct io_kiocb *req, int issue_flags)
++{
++ struct io_mkdir *mkd = &req->mkdir;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
++
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_symlinkat_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_symlink *sl = &req->symlink;
++ const char __user *oldpath, *newpath;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->flags & REQ_F_FIXED_FILE))
++ return -EBADF;
++
++ sl->new_dfd = READ_ONCE(sqe->fd);
++ oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2));
++
++ sl->oldpath = getname(oldpath);
++ if (IS_ERR(sl->oldpath))
++ return PTR_ERR(sl->oldpath);
++
++ sl->newpath = getname(newpath);
++ if (IS_ERR(sl->newpath)) {
++ putname(sl->oldpath);
++ return PTR_ERR(sl->newpath);
++ }
++
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return 0;
++}
++
++static int io_symlinkat(struct io_kiocb *req, int issue_flags)
++{
++ struct io_symlink *sl = &req->symlink;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
++
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_linkat_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_hardlink *lnk = &req->hardlink;
++ const char __user *oldf, *newf;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->flags & REQ_F_FIXED_FILE))
++ return -EBADF;
++
++ lnk->old_dfd = READ_ONCE(sqe->fd);
++ lnk->new_dfd = READ_ONCE(sqe->len);
++ oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
++ lnk->flags = READ_ONCE(sqe->hardlink_flags);
++
++ lnk->oldpath = getname(oldf);
++ if (IS_ERR(lnk->oldpath))
++ return PTR_ERR(lnk->oldpath);
++
++ lnk->newpath = getname(newf);
++ if (IS_ERR(lnk->newpath)) {
++ putname(lnk->oldpath);
++ return PTR_ERR(lnk->newpath);
++ }
++
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return 0;
++}
++
++static int io_linkat(struct io_kiocb *req, int issue_flags)
++{
++ struct io_hardlink *lnk = &req->hardlink;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
++ lnk->newpath, lnk->flags);
++
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_shutdown_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++#if defined(CONFIG_NET)
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
++ sqe->buf_index || sqe->splice_fd_in))
++ return -EINVAL;
++
++ req->shutdown.how = READ_ONCE(sqe->len);
++ return 0;
++#else
++ return -EOPNOTSUPP;
++#endif
++}
++
++static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
++{
++#if defined(CONFIG_NET)
++ struct socket *sock;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ sock = sock_from_file(req->file);
++ if (unlikely(!sock))
++ return -ENOTSOCK;
++
++ ret = __sys_shutdown_sock(sock, req->shutdown.how);
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++#else
++ return -EOPNOTSUPP;
++#endif
++}
++
++static int __io_splice_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_splice *sp = &req->splice;
++ unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++
++ sp->len = READ_ONCE(sqe->len);
++ sp->flags = READ_ONCE(sqe->splice_flags);
++ if (unlikely(sp->flags & ~valid_flags))
++ return -EINVAL;
++ sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in);
++ return 0;
++}
++
++static int io_tee_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off))
++ return -EINVAL;
++ return __io_splice_prep(req, sqe);
++}
++
++static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_splice *sp = &req->splice;
++ struct file *out = sp->file_out;
++ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
++ struct file *in;
++ long ret = 0;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ in = io_file_get(req->ctx, req, sp->splice_fd_in,
++ (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags);
++ if (!in) {
++ ret = -EBADF;
++ goto done;
++ }
++
++ if (sp->len)
++ ret = do_tee(in, out, sp->len, flags);
++
++ if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
++ io_put_file(in);
++done:
++ if (ret != sp->len)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_splice *sp = &req->splice;
++
++ sp->off_in = READ_ONCE(sqe->splice_off_in);
++ sp->off_out = READ_ONCE(sqe->off);
++ return __io_splice_prep(req, sqe);
++}
++
++static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_splice *sp = &req->splice;
++ struct file *out = sp->file_out;
++ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
++ loff_t *poff_in, *poff_out;
++ struct file *in;
++ long ret = 0;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ in = io_file_get(req->ctx, req, sp->splice_fd_in,
++ (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags);
++ if (!in) {
++ ret = -EBADF;
++ goto done;
++ }
++
++ poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
++ poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
++
++ if (sp->len)
++ ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
++
++ if (!(sp->flags & SPLICE_F_FD_IN_FIXED))
++ io_put_file(in);
++done:
++ if (ret != sp->len)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++/*
++ * IORING_OP_NOP just posts a completion event, nothing else.
++ */
++static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++
++ __io_req_complete(req, issue_flags, 0, 0);
++ return 0;
++}
++
++static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
++ sqe->splice_fd_in))
++ return -EINVAL;
++
++ req->sync.flags = READ_ONCE(sqe->fsync_flags);
++ if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC))
++ return -EINVAL;
++
++ req->sync.off = READ_ONCE(sqe->off);
++ req->sync.len = READ_ONCE(sqe->len);
++ return 0;
++}
++
++static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
++{
++ loff_t end = req->sync.off + req->sync.len;
++ int ret;
++
++ /* fsync always requires a blocking context */
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = vfs_fsync_range(req->file, req->sync.off,
++ end > 0 ? end : LLONG_MAX,
++ req->sync.flags & IORING_FSYNC_DATASYNC);
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_fallocate_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++
++ req->sync.off = READ_ONCE(sqe->off);
++ req->sync.len = READ_ONCE(sqe->addr);
++ req->sync.mode = READ_ONCE(sqe->len);
++ return 0;
++}
++
++static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
++{
++ int ret;
++
++ /* fallocate always requiring blocking context */
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++ ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
++ req->sync.len);
++ if (ret < 0)
++ req_set_fail(req);
++ else
++ fsnotify_modify(req->file);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ const char __user *fname;
++ int ret;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(sqe->ioprio || sqe->buf_index))
++ return -EINVAL;
++ if (unlikely(req->flags & REQ_F_FIXED_FILE))
++ return -EBADF;
++
++ /* open.how should be already initialised */
++ if (!(req->open.how.flags & O_PATH) && force_o_largefile())
++ req->open.how.flags |= O_LARGEFILE;
++
++ req->open.dfd = READ_ONCE(sqe->fd);
++ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ req->open.filename = getname(fname);
++ if (IS_ERR(req->open.filename)) {
++ ret = PTR_ERR(req->open.filename);
++ req->open.filename = NULL;
++ return ret;
++ }
++
++ req->open.file_slot = READ_ONCE(sqe->file_index);
++ if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC))
++ return -EINVAL;
++
++ req->open.nofile = rlimit(RLIMIT_NOFILE);
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return 0;
++}
++
++static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ u64 mode = READ_ONCE(sqe->len);
++ u64 flags = READ_ONCE(sqe->open_flags);
++
++ req->open.how = build_open_how(flags, mode);
++ return __io_openat_prep(req, sqe);
++}
++
++static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct open_how __user *how;
++ size_t len;
++ int ret;
++
++ how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
++ len = READ_ONCE(sqe->len);
++ if (len < OPEN_HOW_SIZE_VER0)
++ return -EINVAL;
++
++ ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how,
++ len);
++ if (ret)
++ return ret;
++
++ return __io_openat_prep(req, sqe);
++}
++
++static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct open_flags op;
++ struct file *file;
++ bool resolve_nonblock, nonblock_set;
++ bool fixed = !!req->open.file_slot;
++ int ret;
++
++ ret = build_open_flags(&req->open.how, &op);
++ if (ret)
++ goto err;
++ nonblock_set = op.open_flag & O_NONBLOCK;
++ resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED;
++ if (issue_flags & IO_URING_F_NONBLOCK) {
++ /*
++ * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
++ * it'll always -EAGAIN. Note that we test for __O_TMPFILE
++ * because O_TMPFILE includes O_DIRECTORY, which isn't a flag
++ * we need to force async for.
++ */
++ if (req->open.how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
++ return -EAGAIN;
++ op.lookup_flags |= LOOKUP_CACHED;
++ op.open_flag |= O_NONBLOCK;
++ }
++
++ if (!fixed) {
++ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
++ if (ret < 0)
++ goto err;
++ }
++
++ file = do_filp_open(req->open.dfd, req->open.filename, &op);
++ if (IS_ERR(file)) {
++ /*
++ * We could hang on to this 'fd' on retrying, but seems like
++ * marginal gain for something that is now known to be a slower
++ * path. So just put it, and we'll get a new one when we retry.
++ */
++ if (!fixed)
++ put_unused_fd(ret);
++
++ ret = PTR_ERR(file);
++ /* only retry if RESOLVE_CACHED wasn't already set by application */
++ if (ret == -EAGAIN &&
++ (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK)))
++ return -EAGAIN;
++ goto err;
++ }
++
++ if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
++ file->f_flags &= ~O_NONBLOCK;
++ fsnotify_open(file);
++
++ if (!fixed)
++ fd_install(ret, file);
++ else
++ ret = io_install_fixed_file(req, file, issue_flags,
++ req->open.file_slot - 1);
++err:
++ putname(req->open.filename);
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret < 0)
++ req_set_fail(req);
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
++{
++ return io_openat2(req, issue_flags);
++}
++
++static int io_remove_buffers_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_provide_buf *p = &req->pbuf;
++ u64 tmp;
++
++ if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++
++ tmp = READ_ONCE(sqe->fd);
++ if (!tmp || tmp > USHRT_MAX)
++ return -EINVAL;
++
++ memset(p, 0, sizeof(*p));
++ p->nbufs = tmp;
++ p->bgid = READ_ONCE(sqe->buf_group);
++ return 0;
++}
++
++static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
++ int bgid, unsigned nbufs)
++{
++ unsigned i = 0;
++
++ /* shouldn't happen */
++ if (!nbufs)
++ return 0;
++
++ /* the head kbuf is the list itself */
++ while (!list_empty(&buf->list)) {
++ struct io_buffer *nxt;
++
++ nxt = list_first_entry(&buf->list, struct io_buffer, list);
++ list_del(&nxt->list);
++ kfree(nxt);
++ if (++i == nbufs)
++ return i;
++ cond_resched();
++ }
++ i++;
++ kfree(buf);
++ xa_erase(&ctx->io_buffers, bgid);
++
++ return i;
++}
++
++static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_provide_buf *p = &req->pbuf;
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_buffer *head;
++ int ret = 0;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++
++ io_ring_submit_lock(ctx, !force_nonblock);
++
++ lockdep_assert_held(&ctx->uring_lock);
++
++ ret = -ENOENT;
++ head = xa_load(&ctx->io_buffers, p->bgid);
++ if (head)
++ ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
++ if (ret < 0)
++ req_set_fail(req);
++
++ /* complete before unlock, IOPOLL may need the lock */
++ __io_req_complete(req, issue_flags, ret, 0);
++ io_ring_submit_unlock(ctx, !force_nonblock);
++ return 0;
++}
++
++static int io_provide_buffers_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ unsigned long size, tmp_check;
++ struct io_provide_buf *p = &req->pbuf;
++ u64 tmp;
++
++ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
++ return -EINVAL;
++
++ tmp = READ_ONCE(sqe->fd);
++ if (!tmp || tmp > USHRT_MAX)
++ return -E2BIG;
++ p->nbufs = tmp;
++ p->addr = READ_ONCE(sqe->addr);
++ p->len = READ_ONCE(sqe->len);
++
++ if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
++ &size))
++ return -EOVERFLOW;
++ if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
++ return -EOVERFLOW;
++
++ size = (unsigned long)p->len * p->nbufs;
++ if (!access_ok(u64_to_user_ptr(p->addr), size))
++ return -EFAULT;
++
++ p->bgid = READ_ONCE(sqe->buf_group);
++ tmp = READ_ONCE(sqe->off);
++ if (tmp > USHRT_MAX)
++ return -E2BIG;
++ p->bid = tmp;
++ return 0;
++}
++
++static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
++{
++ struct io_buffer *buf;
++ u64 addr = pbuf->addr;
++ int i, bid = pbuf->bid;
++
++ for (i = 0; i < pbuf->nbufs; i++) {
++ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
++ if (!buf)
++ break;
++
++ buf->addr = addr;
++ buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
++ buf->bid = bid;
++ addr += pbuf->len;
++ bid++;
++ if (!*head) {
++ INIT_LIST_HEAD(&buf->list);
++ *head = buf;
++ } else {
++ list_add_tail(&buf->list, &(*head)->list);
++ }
++ cond_resched();
++ }
++
++ return i ? i : -ENOMEM;
++}
++
++static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_provide_buf *p = &req->pbuf;
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_buffer *head, *list;
++ int ret = 0;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++
++ io_ring_submit_lock(ctx, !force_nonblock);
++
++ lockdep_assert_held(&ctx->uring_lock);
++
++ list = head = xa_load(&ctx->io_buffers, p->bgid);
++
++ ret = io_add_buffers(p, &head);
++ if (ret >= 0 && !list) {
++ ret = xa_insert(&ctx->io_buffers, p->bgid, head,
++ GFP_KERNEL_ACCOUNT);
++ if (ret < 0)
++ __io_remove_buffers(ctx, head, p->bgid, -1U);
++ }
++ if (ret < 0)
++ req_set_fail(req);
++ /* complete before unlock, IOPOLL may need the lock */
++ __io_req_complete(req, issue_flags, ret, 0);
++ io_ring_submit_unlock(ctx, !force_nonblock);
++ return 0;
++}
++
++static int io_epoll_ctl_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++#if defined(CONFIG_EPOLL)
++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++
++ req->epoll.epfd = READ_ONCE(sqe->fd);
++ req->epoll.op = READ_ONCE(sqe->len);
++ req->epoll.fd = READ_ONCE(sqe->off);
++
++ if (ep_op_has_event(req->epoll.op)) {
++ struct epoll_event __user *ev;
++
++ ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
++ return -EFAULT;
++ }
++
++ return 0;
++#else
++ return -EOPNOTSUPP;
++#endif
++}
++
++static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
++{
++#if defined(CONFIG_EPOLL)
++ struct io_epoll *ie = &req->epoll;
++ int ret;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++
++ ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
++ if (force_nonblock && ret == -EAGAIN)
++ return -EAGAIN;
++
++ if (ret < 0)
++ req_set_fail(req);
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++#else
++ return -EOPNOTSUPP;
++#endif
++}
++
++static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
++ if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++
++ req->madvise.addr = READ_ONCE(sqe->addr);
++ req->madvise.len = READ_ONCE(sqe->len);
++ req->madvise.advice = READ_ONCE(sqe->fadvise_advice);
++ return 0;
++#else
++ return -EOPNOTSUPP;
++#endif
++}
++
++static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
++{
++#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
++ struct io_madvise *ma = &req->madvise;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++#else
++ return -EOPNOTSUPP;
++#endif
++}
++
++static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
++ return -EINVAL;
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++
++ req->fadvise.offset = READ_ONCE(sqe->off);
++ req->fadvise.len = READ_ONCE(sqe->len);
++ req->fadvise.advice = READ_ONCE(sqe->fadvise_advice);
++ return 0;
++}
++
++static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_fadvise *fa = &req->fadvise;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK) {
++ switch (fa->advice) {
++ case POSIX_FADV_NORMAL:
++ case POSIX_FADV_RANDOM:
++ case POSIX_FADV_SEQUENTIAL:
++ break;
++ default:
++ return -EAGAIN;
++ }
++ }
++
++ ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
++ if (ret < 0)
++ req_set_fail(req);
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
++ return -EINVAL;
++ if (req->flags & REQ_F_FIXED_FILE)
++ return -EBADF;
++
++ req->statx.dfd = READ_ONCE(sqe->fd);
++ req->statx.mask = READ_ONCE(sqe->len);
++ req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
++ req->statx.flags = READ_ONCE(sqe->statx_flags);
++
++ return 0;
++}
++
++static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_statx *ctx = &req->statx;
++ int ret;
++
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
++ ctx->buffer);
++
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
++ sqe->rw_flags || sqe->buf_index)
++ return -EINVAL;
++ if (req->flags & REQ_F_FIXED_FILE)
++ return -EBADF;
++
++ req->close.fd = READ_ONCE(sqe->fd);
++ req->close.file_slot = READ_ONCE(sqe->file_index);
++ if (req->close.file_slot && req->close.fd)
++ return -EINVAL;
++
++ return 0;
++}
++
++static int io_close(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct files_struct *files = current->files;
++ struct io_close *close = &req->close;
++ struct fdtable *fdt;
++ struct file *file = NULL;
++ int ret = -EBADF;
++
++ if (req->close.file_slot) {
++ ret = io_close_fixed(req, issue_flags);
++ goto err;
++ }
++
++ spin_lock(&files->file_lock);
++ fdt = files_fdtable(files);
++ if (close->fd >= fdt->max_fds) {
++ spin_unlock(&files->file_lock);
++ goto err;
++ }
++ file = fdt->fd[close->fd];
++ if (!file || file->f_op == &io_uring_fops) {
++ spin_unlock(&files->file_lock);
++ file = NULL;
++ goto err;
++ }
++
++ /* if the file has a flush method, be safe and punt to async */
++ if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) {
++ spin_unlock(&files->file_lock);
++ return -EAGAIN;
++ }
++
++ ret = __close_fd_get_file(close->fd, &file);
++ spin_unlock(&files->file_lock);
++ if (ret < 0) {
++ if (ret == -ENOENT)
++ ret = -EBADF;
++ goto err;
++ }
++
++ /* No ->flush() or already async, safely close from here */
++ ret = filp_close(file, current->files);
++err:
++ if (ret < 0)
++ req_set_fail(req);
++ if (file)
++ fput(file);
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
++ sqe->splice_fd_in))
++ return -EINVAL;
++
++ req->sync.off = READ_ONCE(sqe->off);
++ req->sync.len = READ_ONCE(sqe->len);
++ req->sync.flags = READ_ONCE(sqe->sync_range_flags);
++ return 0;
++}
++
++static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
++{
++ int ret;
++
++ /* sync_file_range always requires a blocking context */
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ return -EAGAIN;
++
++ ret = sync_file_range(req->file, req->sync.off, req->sync.len,
++ req->sync.flags);
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete(req, ret);
++ return 0;
++}
++
++#if defined(CONFIG_NET)
++static bool io_net_retry(struct socket *sock, int flags)
++{
++ if (!(flags & MSG_WAITALL))
++ return false;
++ return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
++}
++
++static int io_setup_async_msg(struct io_kiocb *req,
++ struct io_async_msghdr *kmsg)
++{
++ struct io_async_msghdr *async_msg = req->async_data;
++
++ if (async_msg)
++ return -EAGAIN;
++ if (io_alloc_async_data(req)) {
++ kfree(kmsg->free_iov);
++ return -ENOMEM;
++ }
++ async_msg = req->async_data;
++ req->flags |= REQ_F_NEED_CLEANUP;
++ memcpy(async_msg, kmsg, sizeof(*kmsg));
++ if (async_msg->msg.msg_name)
++ async_msg->msg.msg_name = &async_msg->addr;
++ /* if were using fast_iov, set it to the new one */
++ if (!kmsg->free_iov) {
++ size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov;
++ async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx];
++ }
++
++ return -EAGAIN;
++}
++
++static int io_sendmsg_copy_hdr(struct io_kiocb *req,
++ struct io_async_msghdr *iomsg)
++{
++ struct io_sr_msg *sr = &req->sr_msg;
++ int ret;
++
++ iomsg->msg.msg_name = &iomsg->addr;
++ iomsg->free_iov = iomsg->fast_iov;
++ ret = sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg,
++ req->sr_msg.msg_flags, &iomsg->free_iov);
++ /* save msg_control as sys_sendmsg() overwrites it */
++ sr->msg_control = iomsg->msg.msg_control;
++ return ret;
++}
++
++static int io_sendmsg_prep_async(struct io_kiocb *req)
++{
++ int ret;
++
++ ret = io_sendmsg_copy_hdr(req, req->async_data);
++ if (!ret)
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return ret;
++}
++
++static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_sr_msg *sr = &req->sr_msg;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(sqe->addr2 || sqe->file_index))
++ return -EINVAL;
++ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio))
++ return -EINVAL;
++
++ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ sr->len = READ_ONCE(sqe->len);
++ sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
++ if (sr->msg_flags & MSG_DONTWAIT)
++ req->flags |= REQ_F_NOWAIT;
++
++#ifdef CONFIG_COMPAT
++ if (req->ctx->compat)
++ sr->msg_flags |= MSG_CMSG_COMPAT;
++#endif
++ sr->done_io = 0;
++ return 0;
++}
++
++static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_async_msghdr iomsg, *kmsg;
++ struct io_sr_msg *sr = &req->sr_msg;
++ struct socket *sock;
++ unsigned flags;
++ int min_ret = 0;
++ int ret;
++
++ sock = sock_from_file(req->file);
++ if (unlikely(!sock))
++ return -ENOTSOCK;
++
++ kmsg = req->async_data;
++ if (!kmsg) {
++ ret = io_sendmsg_copy_hdr(req, &iomsg);
++ if (ret)
++ return ret;
++ kmsg = &iomsg;
++ } else {
++ kmsg->msg.msg_control = sr->msg_control;
++ }
++
++ flags = req->sr_msg.msg_flags;
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ flags |= MSG_DONTWAIT;
++ if (flags & MSG_WAITALL)
++ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
++
++ ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
++
++ if (ret < min_ret) {
++ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
++ return io_setup_async_msg(req, kmsg);
++ if (ret == -ERESTARTSYS)
++ ret = -EINTR;
++ if (ret > 0 && io_net_retry(sock, flags)) {
++ kmsg->msg.msg_controllen = 0;
++ kmsg->msg.msg_control = NULL;
++ sr->done_io += ret;
++ req->flags |= REQ_F_PARTIAL_IO;
++ return io_setup_async_msg(req, kmsg);
++ }
++ req_set_fail(req);
++ }
++ /* fast path, check for non-NULL to avoid function call */
++ if (kmsg->free_iov)
++ kfree(kmsg->free_iov);
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret >= 0)
++ ret += sr->done_io;
++ else if (sr->done_io)
++ ret = sr->done_io;
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int io_send(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_sr_msg *sr = &req->sr_msg;
++ struct msghdr msg;
++ struct iovec iov;
++ struct socket *sock;
++ unsigned flags;
++ int min_ret = 0;
++ int ret;
++
++ sock = sock_from_file(req->file);
++ if (unlikely(!sock))
++ return -ENOTSOCK;
++
++ ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
++ if (unlikely(ret))
++ return ret;
++
++ msg.msg_name = NULL;
++ msg.msg_control = NULL;
++ msg.msg_controllen = 0;
++ msg.msg_namelen = 0;
++
++ flags = req->sr_msg.msg_flags;
++ if (issue_flags & IO_URING_F_NONBLOCK)
++ flags |= MSG_DONTWAIT;
++ if (flags & MSG_WAITALL)
++ min_ret = iov_iter_count(&msg.msg_iter);
++
++ msg.msg_flags = flags;
++ ret = sock_sendmsg(sock, &msg);
++ if (ret < min_ret) {
++ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
++ return -EAGAIN;
++ if (ret == -ERESTARTSYS)
++ ret = -EINTR;
++ if (ret > 0 && io_net_retry(sock, flags)) {
++ sr->len -= ret;
++ sr->buf += ret;
++ sr->done_io += ret;
++ req->flags |= REQ_F_PARTIAL_IO;
++ return -EAGAIN;
++ }
++ req_set_fail(req);
++ }
++ if (ret >= 0)
++ ret += sr->done_io;
++ else if (sr->done_io)
++ ret = sr->done_io;
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
++ struct io_async_msghdr *iomsg)
++{
++ struct io_sr_msg *sr = &req->sr_msg;
++ struct iovec __user *uiov;
++ size_t iov_len;
++ int ret;
++
++ ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
++ &iomsg->uaddr, &uiov, &iov_len);
++ if (ret)
++ return ret;
++
++ if (req->flags & REQ_F_BUFFER_SELECT) {
++ if (iov_len > 1)
++ return -EINVAL;
++ if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
++ return -EFAULT;
++ sr->len = iomsg->fast_iov[0].iov_len;
++ iomsg->free_iov = NULL;
++ } else {
++ iomsg->free_iov = iomsg->fast_iov;
++ ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
++ &iomsg->free_iov, &iomsg->msg.msg_iter,
++ false);
++ if (ret > 0)
++ ret = 0;
++ }
++
++ return ret;
++}
++
++#ifdef CONFIG_COMPAT
++static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
++ struct io_async_msghdr *iomsg)
++{
++ struct io_sr_msg *sr = &req->sr_msg;
++ struct compat_iovec __user *uiov;
++ compat_uptr_t ptr;
++ compat_size_t len;
++ int ret;
++
++ ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
++ &ptr, &len);
++ if (ret)
++ return ret;
++
++ uiov = compat_ptr(ptr);
++ if (req->flags & REQ_F_BUFFER_SELECT) {
++ compat_ssize_t clen;
++
++ if (len > 1)
++ return -EINVAL;
++ if (!access_ok(uiov, sizeof(*uiov)))
++ return -EFAULT;
++ if (__get_user(clen, &uiov->iov_len))
++ return -EFAULT;
++ if (clen < 0)
++ return -EINVAL;
++ sr->len = clen;
++ iomsg->free_iov = NULL;
++ } else {
++ iomsg->free_iov = iomsg->fast_iov;
++ ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
++ UIO_FASTIOV, &iomsg->free_iov,
++ &iomsg->msg.msg_iter, true);
++ if (ret < 0)
++ return ret;
++ }
++
++ return 0;
++}
++#endif
++
++static int io_recvmsg_copy_hdr(struct io_kiocb *req,
++ struct io_async_msghdr *iomsg)
++{
++ iomsg->msg.msg_name = &iomsg->addr;
++
++#ifdef CONFIG_COMPAT
++ if (req->ctx->compat)
++ return __io_compat_recvmsg_copy_hdr(req, iomsg);
++#endif
++
++ return __io_recvmsg_copy_hdr(req, iomsg);
++}
++
++static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
++ bool needs_lock)
++{
++ struct io_sr_msg *sr = &req->sr_msg;
++ struct io_buffer *kbuf;
++
++ kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock);
++ if (IS_ERR(kbuf))
++ return kbuf;
++
++ sr->kbuf = kbuf;
++ req->flags |= REQ_F_BUFFER_SELECTED;
++ return kbuf;
++}
++
++static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
++{
++ return io_put_kbuf(req, req->sr_msg.kbuf);
++}
++
++static int io_recvmsg_prep_async(struct io_kiocb *req)
++{
++ int ret;
++
++ ret = io_recvmsg_copy_hdr(req, req->async_data);
++ if (!ret)
++ req->flags |= REQ_F_NEED_CLEANUP;
++ return ret;
++}
++
++static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_sr_msg *sr = &req->sr_msg;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(sqe->addr2 || sqe->file_index))
++ return -EINVAL;
++ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio))
++ return -EINVAL;
++
++ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ sr->len = READ_ONCE(sqe->len);
++ sr->bgid = READ_ONCE(sqe->buf_group);
++ sr->msg_flags = READ_ONCE(sqe->msg_flags);
++ if (sr->msg_flags & MSG_DONTWAIT)
++ req->flags |= REQ_F_NOWAIT;
++
++#ifdef CONFIG_COMPAT
++ if (req->ctx->compat)
++ sr->msg_flags |= MSG_CMSG_COMPAT;
++#endif
++ sr->done_io = 0;
++ return 0;
++}
++
++static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_async_msghdr iomsg, *kmsg;
++ struct io_sr_msg *sr = &req->sr_msg;
++ struct socket *sock;
++ struct io_buffer *kbuf;
++ unsigned flags;
++ int min_ret = 0;
++ int ret, cflags = 0;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++
++ sock = sock_from_file(req->file);
++ if (unlikely(!sock))
++ return -ENOTSOCK;
++
++ kmsg = req->async_data;
++ if (!kmsg) {
++ ret = io_recvmsg_copy_hdr(req, &iomsg);
++ if (ret)
++ return ret;
++ kmsg = &iomsg;
++ }
++
++ if (req->flags & REQ_F_BUFFER_SELECT) {
++ kbuf = io_recv_buffer_select(req, !force_nonblock);
++ if (IS_ERR(kbuf))
++ return PTR_ERR(kbuf);
++ kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
++ kmsg->fast_iov[0].iov_len = req->sr_msg.len;
++ iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov,
++ 1, req->sr_msg.len);
++ }
++
++ flags = req->sr_msg.msg_flags;
++ if (force_nonblock)
++ flags |= MSG_DONTWAIT;
++ if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
++ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
++
++ ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
++ kmsg->uaddr, flags);
++ if (ret < min_ret) {
++ if (ret == -EAGAIN && force_nonblock)
++ return io_setup_async_msg(req, kmsg);
++ if (ret == -ERESTARTSYS)
++ ret = -EINTR;
++ if (ret > 0 && io_net_retry(sock, flags)) {
++ sr->done_io += ret;
++ req->flags |= REQ_F_PARTIAL_IO;
++ return io_setup_async_msg(req, kmsg);
++ }
++ req_set_fail(req);
++ } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
++ req_set_fail(req);
++ }
++
++ if (req->flags & REQ_F_BUFFER_SELECTED)
++ cflags = io_put_recv_kbuf(req);
++ /* fast path, check for non-NULL to avoid function call */
++ if (kmsg->free_iov)
++ kfree(kmsg->free_iov);
++ req->flags &= ~REQ_F_NEED_CLEANUP;
++ if (ret >= 0)
++ ret += sr->done_io;
++ else if (sr->done_io)
++ ret = sr->done_io;
++ __io_req_complete(req, issue_flags, ret, cflags);
++ return 0;
++}
++
++static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_buffer *kbuf;
++ struct io_sr_msg *sr = &req->sr_msg;
++ struct msghdr msg;
++ void __user *buf = sr->buf;
++ struct socket *sock;
++ struct iovec iov;
++ unsigned flags;
++ int min_ret = 0;
++ int ret, cflags = 0;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++
++ sock = sock_from_file(req->file);
++ if (unlikely(!sock))
++ return -ENOTSOCK;
++
++ if (req->flags & REQ_F_BUFFER_SELECT) {
++ kbuf = io_recv_buffer_select(req, !force_nonblock);
++ if (IS_ERR(kbuf))
++ return PTR_ERR(kbuf);
++ buf = u64_to_user_ptr(kbuf->addr);
++ }
++
++ ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
++ if (unlikely(ret))
++ goto out_free;
++
++ msg.msg_name = NULL;
++ msg.msg_control = NULL;
++ msg.msg_controllen = 0;
++ msg.msg_namelen = 0;
++ msg.msg_iocb = NULL;
++ msg.msg_flags = 0;
++
++ flags = req->sr_msg.msg_flags;
++ if (force_nonblock)
++ flags |= MSG_DONTWAIT;
++ if (flags & MSG_WAITALL)
++ min_ret = iov_iter_count(&msg.msg_iter);
++
++ ret = sock_recvmsg(sock, &msg, flags);
++ if (ret < min_ret) {
++ if (ret == -EAGAIN && force_nonblock)
++ return -EAGAIN;
++ if (ret == -ERESTARTSYS)
++ ret = -EINTR;
++ if (ret > 0 && io_net_retry(sock, flags)) {
++ sr->len -= ret;
++ sr->buf += ret;
++ sr->done_io += ret;
++ req->flags |= REQ_F_PARTIAL_IO;
++ return -EAGAIN;
++ }
++ req_set_fail(req);
++ } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
++out_free:
++ req_set_fail(req);
++ }
++ if (req->flags & REQ_F_BUFFER_SELECTED)
++ cflags = io_put_recv_kbuf(req);
++ if (ret >= 0)
++ ret += sr->done_io;
++ else if (sr->done_io)
++ ret = sr->done_io;
++ __io_req_complete(req, issue_flags, ret, cflags);
++ return 0;
++}
++
++static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_accept *accept = &req->accept;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->len || sqe->buf_index)
++ return -EINVAL;
++
++ accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
++ accept->flags = READ_ONCE(sqe->accept_flags);
++ accept->nofile = rlimit(RLIMIT_NOFILE);
++
++ accept->file_slot = READ_ONCE(sqe->file_index);
++ if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
++ return -EINVAL;
++ if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
++ return -EINVAL;
++ if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
++ accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
++ return 0;
++}
++
++static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_accept *accept = &req->accept;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++ unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
++ bool fixed = !!accept->file_slot;
++ struct file *file;
++ int ret, fd;
++
++ if (!fixed) {
++ fd = __get_unused_fd_flags(accept->flags, accept->nofile);
++ if (unlikely(fd < 0))
++ return fd;
++ }
++ file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
++ accept->flags);
++ if (IS_ERR(file)) {
++ if (!fixed)
++ put_unused_fd(fd);
++ ret = PTR_ERR(file);
++ /* safe to retry */
++ req->flags |= REQ_F_PARTIAL_IO;
++ if (ret == -EAGAIN && force_nonblock)
++ return -EAGAIN;
++ if (ret == -ERESTARTSYS)
++ ret = -EINTR;
++ req_set_fail(req);
++ } else if (!fixed) {
++ fd_install(fd, file);
++ ret = fd;
++ } else {
++ ret = io_install_fixed_file(req, file, issue_flags,
++ accept->file_slot - 1);
++ }
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int io_connect_prep_async(struct io_kiocb *req)
++{
++ struct io_async_connect *io = req->async_data;
++ struct io_connect *conn = &req->connect;
++
++ return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
++}
++
++static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_connect *conn = &req->connect;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++
++ conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
++ conn->addr_len = READ_ONCE(sqe->addr2);
++ return 0;
++}
++
++static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_async_connect __io, *io;
++ unsigned file_flags;
++ int ret;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++
++ if (req->async_data) {
++ io = req->async_data;
++ } else {
++ ret = move_addr_to_kernel(req->connect.addr,
++ req->connect.addr_len,
++ &__io.address);
++ if (ret)
++ goto out;
++ io = &__io;
++ }
++
++ file_flags = force_nonblock ? O_NONBLOCK : 0;
++
++ ret = __sys_connect_file(req->file, &io->address,
++ req->connect.addr_len, file_flags);
++ if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
++ if (req->async_data)
++ return -EAGAIN;
++ if (io_alloc_async_data(req)) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ memcpy(req->async_data, &__io, sizeof(__io));
++ return -EAGAIN;
++ }
++ if (ret == -ERESTARTSYS)
++ ret = -EINTR;
++out:
++ if (ret < 0)
++ req_set_fail(req);
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++#else /* !CONFIG_NET */
++#define IO_NETOP_FN(op) \
++static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \
++{ \
++ return -EOPNOTSUPP; \
++}
++
++#define IO_NETOP_PREP(op) \
++IO_NETOP_FN(op) \
++static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
++{ \
++ return -EOPNOTSUPP; \
++} \
++
++#define IO_NETOP_PREP_ASYNC(op) \
++IO_NETOP_PREP(op) \
++static int io_##op##_prep_async(struct io_kiocb *req) \
++{ \
++ return -EOPNOTSUPP; \
++}
++
++IO_NETOP_PREP_ASYNC(sendmsg);
++IO_NETOP_PREP_ASYNC(recvmsg);
++IO_NETOP_PREP_ASYNC(connect);
++IO_NETOP_PREP(accept);
++IO_NETOP_FN(send);
++IO_NETOP_FN(recv);
++#endif /* CONFIG_NET */
++
++struct io_poll_table {
++ struct poll_table_struct pt;
++ struct io_kiocb *req;
++ int nr_entries;
++ int error;
++};
++
++#define IO_POLL_CANCEL_FLAG BIT(31)
++#define IO_POLL_RETRY_FLAG BIT(30)
++#define IO_POLL_REF_MASK GENMASK(29, 0)
++
++/*
++ * We usually have 1-2 refs taken, 128 is more than enough and we want to
++ * maximise the margin between this amount and the moment when it overflows.
++ */
++#define IO_POLL_REF_BIAS 128
++
++static bool io_poll_get_ownership_slowpath(struct io_kiocb *req)
++{
++ int v;
++
++ /*
++ * poll_refs are already elevated and we don't have much hope for
++ * grabbing the ownership. Instead of incrementing set a retry flag
++ * to notify the loop that there might have been some change.
++ */
++ v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs);
++ if (v & IO_POLL_REF_MASK)
++ return false;
++ return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
++}
++
++/*
++ * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
++ * bump it and acquire ownership. It's disallowed to modify requests while not
++ * owning it, that prevents from races for enqueueing task_work's and b/w
++ * arming poll and wakeups.
++ */
++static inline bool io_poll_get_ownership(struct io_kiocb *req)
++{
++ if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
++ return io_poll_get_ownership_slowpath(req);
++ return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
++}
++
++static void io_poll_mark_cancelled(struct io_kiocb *req)
++{
++ atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs);
++}
++
++static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
++{
++ /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
++ if (req->opcode == IORING_OP_POLL_ADD)
++ return req->async_data;
++ return req->apoll->double_poll;
++}
++
++static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
++{
++ if (req->opcode == IORING_OP_POLL_ADD)
++ return &req->poll;
++ return &req->apoll->poll;
++}
++
++static void io_poll_req_insert(struct io_kiocb *req)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ struct hlist_head *list;
++
++ list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
++ hlist_add_head(&req->hash_node, list);
++}
++
++static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
++ wait_queue_func_t wake_func)
++{
++ poll->head = NULL;
++#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
++ /* mask in events that we always want/need */
++ poll->events = events | IO_POLL_UNMASK;
++ INIT_LIST_HEAD(&poll->wait.entry);
++ init_waitqueue_func_entry(&poll->wait, wake_func);
++}
++
++static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
++{
++ struct wait_queue_head *head = smp_load_acquire(&poll->head);
++
++ if (head) {
++ spin_lock_irq(&head->lock);
++ list_del_init(&poll->wait.entry);
++ poll->head = NULL;
++ spin_unlock_irq(&head->lock);
++ }
++}
++
++static void io_poll_remove_entries(struct io_kiocb *req)
++{
++ struct io_poll_iocb *poll = io_poll_get_single(req);
++ struct io_poll_iocb *poll_double = io_poll_get_double(req);
++
++ /*
++ * While we hold the waitqueue lock and the waitqueue is nonempty,
++ * wake_up_pollfree() will wait for us. However, taking the waitqueue
++ * lock in the first place can race with the waitqueue being freed.
++ *
++ * We solve this as eventpoll does: by taking advantage of the fact that
++ * all users of wake_up_pollfree() will RCU-delay the actual free. If
++ * we enter rcu_read_lock() and see that the pointer to the queue is
++ * non-NULL, we can then lock it without the memory being freed out from
++ * under us.
++ *
++ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
++ * case the caller deletes the entry from the queue, leaving it empty.
++ * In that case, only RCU prevents the queue memory from being freed.
++ */
++ rcu_read_lock();
++ io_poll_remove_entry(poll);
++ if (poll_double)
++ io_poll_remove_entry(poll_double);
++ rcu_read_unlock();
++}
++
++/*
++ * All poll tw should go through this. Checks for poll events, manages
++ * references, does rewait, etc.
++ *
++ * Returns a negative error on failure. >0 when no action require, which is
++ * either spurious wakeup or multishot CQE is served. 0 when it's done with
++ * the request, then the mask is stored in req->result.
++ */
++static int io_poll_check_events(struct io_kiocb *req)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_poll_iocb *poll = io_poll_get_single(req);
++ int v;
++
++ /* req->task == current here, checking PF_EXITING is safe */
++ if (unlikely(req->task->flags & PF_EXITING))
++ io_poll_mark_cancelled(req);
++
++ do {
++ v = atomic_read(&req->poll_refs);
++
++ /* tw handler should be the owner, and so have some references */
++ if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))
++ return 0;
++ if (v & IO_POLL_CANCEL_FLAG)
++ return -ECANCELED;
++ /*
++ * cqe.res contains only events of the first wake up
++ * and all others are be lost. Redo vfs_poll() to get
++ * up to date state.
++ */
++ if ((v & IO_POLL_REF_MASK) != 1)
++ req->result = 0;
++ if (v & IO_POLL_RETRY_FLAG) {
++ req->result = 0;
++ /*
++ * We won't find new events that came in between
++ * vfs_poll and the ref put unless we clear the
++ * flag in advance.
++ */
++ atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs);
++ v &= ~IO_POLL_RETRY_FLAG;
++ }
++
++ if (!req->result) {
++ struct poll_table_struct pt = { ._key = poll->events };
++
++ req->result = vfs_poll(req->file, &pt) & poll->events;
++ }
++
++ /* multishot, just fill an CQE and proceed */
++ if (req->result && !(poll->events & EPOLLONESHOT)) {
++ __poll_t mask = mangle_poll(req->result & poll->events);
++ bool filled;
++
++ spin_lock(&ctx->completion_lock);
++ filled = io_fill_cqe_aux(ctx, req->user_data, mask,
++ IORING_CQE_F_MORE);
++ io_commit_cqring(ctx);
++ spin_unlock(&ctx->completion_lock);
++ if (unlikely(!filled))
++ return -ECANCELED;
++ io_cqring_ev_posted(ctx);
++ } else if (req->result) {
++ return 0;
++ }
++
++ /* force the next iteration to vfs_poll() */
++ req->result = 0;
++
++ /*
++ * Release all references, retry if someone tried to restart
++ * task_work while we were executing it.
++ */
++ } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) &
++ IO_POLL_REF_MASK);
++
++ return 1;
++}
++
++static void io_poll_task_func(struct io_kiocb *req, bool *locked)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ int ret;
++
++ ret = io_poll_check_events(req);
++ if (ret > 0)
++ return;
++
++ if (!ret) {
++ req->result = mangle_poll(req->result & req->poll.events);
++ } else {
++ req->result = ret;
++ req_set_fail(req);
++ }
++
++ io_poll_remove_entries(req);
++ spin_lock(&ctx->completion_lock);
++ hash_del(&req->hash_node);
++ spin_unlock(&ctx->completion_lock);
++ io_req_complete_post(req, req->result, 0);
++}
++
++static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ int ret;
++
++ ret = io_poll_check_events(req);
++ if (ret > 0)
++ return;
++
++ io_tw_lock(req->ctx, locked);
++ io_poll_remove_entries(req);
++ spin_lock(&ctx->completion_lock);
++ hash_del(&req->hash_node);
++ spin_unlock(&ctx->completion_lock);
++
++ if (!ret)
++ io_req_task_submit(req, locked);
++ else
++ io_req_complete_failed(req, ret);
++}
++
++static void __io_poll_execute(struct io_kiocb *req, int mask)
++{
++ req->result = mask;
++ if (req->opcode == IORING_OP_POLL_ADD)
++ req->io_task_work.func = io_poll_task_func;
++ else
++ req->io_task_work.func = io_apoll_task_func;
++
++ trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
++ io_req_task_work_add(req);
++}
++
++static inline void io_poll_execute(struct io_kiocb *req, int res)
++{
++ if (io_poll_get_ownership(req))
++ __io_poll_execute(req, res);
++}
++
++static void io_poll_cancel_req(struct io_kiocb *req)
++{
++ io_poll_mark_cancelled(req);
++ /* kick tw, which should complete the request */
++ io_poll_execute(req, 0);
++}
++
++static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
++ void *key)
++{
++ struct io_kiocb *req = wait->private;
++ struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb,
++ wait);
++ __poll_t mask = key_to_poll(key);
++
++ if (unlikely(mask & POLLFREE)) {
++ io_poll_mark_cancelled(req);
++ /* we have to kick tw in case it's not already */
++ io_poll_execute(req, 0);
++
++ /*
++ * If the waitqueue is being freed early but someone is already
++ * holds ownership over it, we have to tear down the request as
++ * best we can. That means immediately removing the request from
++ * its waitqueue and preventing all further accesses to the
++ * waitqueue via the request.
++ */
++ list_del_init(&poll->wait.entry);
++
++ /*
++ * Careful: this *must* be the last step, since as soon
++ * as req->head is NULL'ed out, the request can be
++ * completed and freed, since aio_poll_complete_work()
++ * will no longer need to take the waitqueue lock.
++ */
++ smp_store_release(&poll->head, NULL);
++ return 1;
++ }
++
++ /* for instances that support it check for an event match first */
++ if (mask && !(mask & poll->events))
++ return 0;
++
++ if (io_poll_get_ownership(req)) {
++ /*
++ * If we trigger a multishot poll off our own wakeup path,
++ * disable multishot as there is a circular dependency between
++ * CQ posting and triggering the event.
++ */
++ if (mask & EPOLL_URING_WAKE)
++ poll->events |= EPOLLONESHOT;
++
++ __io_poll_execute(req, mask);
++ }
++ return 1;
++}
++
++static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
++ struct wait_queue_head *head,
++ struct io_poll_iocb **poll_ptr)
++{
++ struct io_kiocb *req = pt->req;
++
++ /*
++ * The file being polled uses multiple waitqueues for poll handling
++ * (e.g. one for read, one for write). Setup a separate io_poll_iocb
++ * if this happens.
++ */
++ if (unlikely(pt->nr_entries)) {
++ struct io_poll_iocb *first = poll;
++
++ /* double add on the same waitqueue head, ignore */
++ if (first->head == head)
++ return;
++ /* already have a 2nd entry, fail a third attempt */
++ if (*poll_ptr) {
++ if ((*poll_ptr)->head == head)
++ return;
++ pt->error = -EINVAL;
++ return;
++ }
++
++ poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
++ if (!poll) {
++ pt->error = -ENOMEM;
++ return;
++ }
++ io_init_poll_iocb(poll, first->events, first->wait.func);
++ *poll_ptr = poll;
++ }
++
++ pt->nr_entries++;
++ poll->head = head;
++ poll->wait.private = req;
++
++ if (poll->events & EPOLLEXCLUSIVE)
++ add_wait_queue_exclusive(head, &poll->wait);
++ else
++ add_wait_queue(head, &poll->wait);
++}
++
++static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
++ struct poll_table_struct *p)
++{
++ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
++
++ __io_queue_proc(&pt->req->poll, pt, head,
++ (struct io_poll_iocb **) &pt->req->async_data);
++}
++
++static int __io_arm_poll_handler(struct io_kiocb *req,
++ struct io_poll_iocb *poll,
++ struct io_poll_table *ipt, __poll_t mask)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ INIT_HLIST_NODE(&req->hash_node);
++ io_init_poll_iocb(poll, mask, io_poll_wake);
++ poll->file = req->file;
++ poll->wait.private = req;
++
++ ipt->pt._key = mask;
++ ipt->req = req;
++ ipt->error = 0;
++ ipt->nr_entries = 0;
++
++ /*
++ * Take the ownership to delay any tw execution up until we're done
++ * with poll arming. see io_poll_get_ownership().
++ */
++ atomic_set(&req->poll_refs, 1);
++ mask = vfs_poll(req->file, &ipt->pt) & poll->events;
++
++ if (mask && (poll->events & EPOLLONESHOT)) {
++ io_poll_remove_entries(req);
++ /* no one else has access to the req, forget about the ref */
++ return mask;
++ }
++ if (!mask && unlikely(ipt->error || !ipt->nr_entries)) {
++ io_poll_remove_entries(req);
++ if (!ipt->error)
++ ipt->error = -EINVAL;
++ return 0;
++ }
++
++ spin_lock(&ctx->completion_lock);
++ io_poll_req_insert(req);
++ spin_unlock(&ctx->completion_lock);
++
++ if (mask) {
++ /* can't multishot if failed, just queue the event we've got */
++ if (unlikely(ipt->error || !ipt->nr_entries)) {
++ poll->events |= EPOLLONESHOT;
++ ipt->error = 0;
++ }
++ __io_poll_execute(req, mask);
++ return 0;
++ }
++
++ /*
++ * Try to release ownership. If we see a change of state, e.g.
++ * poll was waken up, queue up a tw, it'll deal with it.
++ */
++ if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1)
++ __io_poll_execute(req, 0);
++ return 0;
++}
++
++static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
++ struct poll_table_struct *p)
++{
++ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
++ struct async_poll *apoll = pt->req->apoll;
++
++ __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
++}
++
++enum {
++ IO_APOLL_OK,
++ IO_APOLL_ABORTED,
++ IO_APOLL_READY
++};
++
++/*
++ * We can't reliably detect loops in repeated poll triggers and issue
++ * subsequently failing. But rather than fail these immediately, allow a
++ * certain amount of retries before we give up. Given that this condition
++ * should _rarely_ trigger even once, we should be fine with a larger value.
++ */
++#define APOLL_MAX_RETRY 128
++
++static int io_arm_poll_handler(struct io_kiocb *req)
++{
++ const struct io_op_def *def = &io_op_defs[req->opcode];
++ struct io_ring_ctx *ctx = req->ctx;
++ struct async_poll *apoll;
++ struct io_poll_table ipt;
++ __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
++ int ret;
++
++ if (!req->file || !file_can_poll(req->file))
++ return IO_APOLL_ABORTED;
++ if (!def->pollin && !def->pollout)
++ return IO_APOLL_ABORTED;
++
++ if (def->pollin) {
++ mask |= POLLIN | POLLRDNORM;
++
++ /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
++ if ((req->opcode == IORING_OP_RECVMSG) &&
++ (req->sr_msg.msg_flags & MSG_ERRQUEUE))
++ mask &= ~POLLIN;
++ } else {
++ mask |= POLLOUT | POLLWRNORM;
++ }
++
++ if (req->flags & REQ_F_POLLED) {
++ apoll = req->apoll;
++ kfree(apoll->double_poll);
++ if (unlikely(!--apoll->poll.retries)) {
++ apoll->double_poll = NULL;
++ return IO_APOLL_ABORTED;
++ }
++ } else {
++ apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
++ if (unlikely(!apoll))
++ return IO_APOLL_ABORTED;
++ apoll->poll.retries = APOLL_MAX_RETRY;
++ }
++ apoll->double_poll = NULL;
++ req->apoll = apoll;
++ req->flags |= REQ_F_POLLED;
++ ipt.pt._qproc = io_async_queue_proc;
++
++ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask);
++ if (ret || ipt.error)
++ return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
++
++ trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
++ mask, apoll->poll.events);
++ return IO_APOLL_OK;
++}
++
++/*
++ * Returns true if we found and killed one or more poll requests
++ */
++static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
++ bool cancel_all)
++{
++ struct hlist_node *tmp;
++ struct io_kiocb *req;
++ bool found = false;
++ int i;
++
++ spin_lock(&ctx->completion_lock);
++ for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
++ struct hlist_head *list;
++
++ list = &ctx->cancel_hash[i];
++ hlist_for_each_entry_safe(req, tmp, list, hash_node) {
++ if (io_match_task_safe(req, tsk, cancel_all)) {
++ hlist_del_init(&req->hash_node);
++ io_poll_cancel_req(req);
++ found = true;
++ }
++ }
++ }
++ spin_unlock(&ctx->completion_lock);
++ return found;
++}
++
++static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
++ bool poll_only)
++ __must_hold(&ctx->completion_lock)
++{
++ struct hlist_head *list;
++ struct io_kiocb *req;
++
++ list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
++ hlist_for_each_entry(req, list, hash_node) {
++ if (sqe_addr != req->user_data)
++ continue;
++ if (poll_only && req->opcode != IORING_OP_POLL_ADD)
++ continue;
++ return req;
++ }
++ return NULL;
++}
++
++static bool io_poll_disarm(struct io_kiocb *req)
++ __must_hold(&ctx->completion_lock)
++{
++ if (!io_poll_get_ownership(req))
++ return false;
++ io_poll_remove_entries(req);
++ hash_del(&req->hash_node);
++ return true;
++}
++
++static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr,
++ bool poll_only)
++ __must_hold(&ctx->completion_lock)
++{
++ struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only);
++
++ if (!req)
++ return -ENOENT;
++ io_poll_cancel_req(req);
++ return 0;
++}
++
++static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
++ unsigned int flags)
++{
++ u32 events;
++
++ events = READ_ONCE(sqe->poll32_events);
++#ifdef __BIG_ENDIAN
++ events = swahw32(events);
++#endif
++ if (!(flags & IORING_POLL_ADD_MULTI))
++ events |= EPOLLONESHOT;
++ return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
++}
++
++static int io_poll_update_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_poll_update *upd = &req->poll_update;
++ u32 flags;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
++ return -EINVAL;
++ flags = READ_ONCE(sqe->len);
++ if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
++ IORING_POLL_ADD_MULTI))
++ return -EINVAL;
++ /* meaningless without update */
++ if (flags == IORING_POLL_ADD_MULTI)
++ return -EINVAL;
++
++ upd->old_user_data = READ_ONCE(sqe->addr);
++ upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
++ upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
++
++ upd->new_user_data = READ_ONCE(sqe->off);
++ if (!upd->update_user_data && upd->new_user_data)
++ return -EINVAL;
++ if (upd->update_events)
++ upd->events = io_poll_parse_events(sqe, flags);
++ else if (sqe->poll32_events)
++ return -EINVAL;
++
++ return 0;
++}
++
++static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ struct io_poll_iocb *poll = &req->poll;
++ u32 flags;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
++ return -EINVAL;
++ flags = READ_ONCE(sqe->len);
++ if (flags & ~IORING_POLL_ADD_MULTI)
++ return -EINVAL;
++
++ io_req_set_refcount(req);
++ poll->events = io_poll_parse_events(sqe, flags);
++ return 0;
++}
++
++static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_poll_iocb *poll = &req->poll;
++ struct io_poll_table ipt;
++ int ret;
++
++ ipt.pt._qproc = io_poll_queue_proc;
++
++ ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events);
++ if (!ret && ipt.error)
++ req_set_fail(req);
++ ret = ret ?: ipt.error;
++ if (ret)
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_kiocb *preq;
++ int ret2, ret = 0;
++
++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++
++ spin_lock(&ctx->completion_lock);
++ preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
++ if (!preq || !io_poll_disarm(preq)) {
++ spin_unlock(&ctx->completion_lock);
++ ret = preq ? -EALREADY : -ENOENT;
++ goto out;
++ }
++ spin_unlock(&ctx->completion_lock);
++
++ if (req->poll_update.update_events || req->poll_update.update_user_data) {
++ /* only mask one event flags, keep behavior flags */
++ if (req->poll_update.update_events) {
++ preq->poll.events &= ~0xffff;
++ preq->poll.events |= req->poll_update.events & 0xffff;
++ preq->poll.events |= IO_POLL_UNMASK;
++ }
++ if (req->poll_update.update_user_data)
++ preq->user_data = req->poll_update.new_user_data;
++
++ ret2 = io_poll_add(preq, issue_flags);
++ /* successfully updated, don't complete poll request */
++ if (!ret2)
++ goto out;
++ }
++ req_set_fail(preq);
++ io_req_complete(preq, -ECANCELED);
++out:
++ if (ret < 0)
++ req_set_fail(req);
++ /* complete update request, we're done with it */
++ io_req_complete(req, ret);
++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++ return 0;
++}
++
++static void io_req_task_timeout(struct io_kiocb *req, bool *locked)
++{
++ req_set_fail(req);
++ io_req_complete_post(req, -ETIME, 0);
++}
++
++static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
++{
++ struct io_timeout_data *data = container_of(timer,
++ struct io_timeout_data, timer);
++ struct io_kiocb *req = data->req;
++ struct io_ring_ctx *ctx = req->ctx;
++ unsigned long flags;
++
++ spin_lock_irqsave(&ctx->timeout_lock, flags);
++ list_del_init(&req->timeout.list);
++ atomic_set(&req->ctx->cq_timeouts,
++ atomic_read(&req->ctx->cq_timeouts) + 1);
++ spin_unlock_irqrestore(&ctx->timeout_lock, flags);
++
++ req->io_task_work.func = io_req_task_timeout;
++ io_req_task_work_add(req);
++ return HRTIMER_NORESTART;
++}
++
++static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
++ __u64 user_data)
++ __must_hold(&ctx->timeout_lock)
++{
++ struct io_timeout_data *io;
++ struct io_kiocb *req;
++ bool found = false;
++
++ list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
++ found = user_data == req->user_data;
++ if (found)
++ break;
++ }
++ if (!found)
++ return ERR_PTR(-ENOENT);
++
++ io = req->async_data;
++ if (hrtimer_try_to_cancel(&io->timer) == -1)
++ return ERR_PTR(-EALREADY);
++ list_del_init(&req->timeout.list);
++ return req;
++}
++
++static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
++ __must_hold(&ctx->completion_lock)
++ __must_hold(&ctx->timeout_lock)
++{
++ struct io_kiocb *req = io_timeout_extract(ctx, user_data);
++
++ if (IS_ERR(req))
++ return PTR_ERR(req);
++
++ req_set_fail(req);
++ io_fill_cqe_req(req, -ECANCELED, 0);
++ io_put_req_deferred(req);
++ return 0;
++}
++
++static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
++{
++ switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
++ case IORING_TIMEOUT_BOOTTIME:
++ return CLOCK_BOOTTIME;
++ case IORING_TIMEOUT_REALTIME:
++ return CLOCK_REALTIME;
++ default:
++ /* can't happen, vetted at prep time */
++ WARN_ON_ONCE(1);
++ fallthrough;
++ case 0:
++ return CLOCK_MONOTONIC;
++ }
++}
++
++static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
++ struct timespec64 *ts, enum hrtimer_mode mode)
++ __must_hold(&ctx->timeout_lock)
++{
++ struct io_timeout_data *io;
++ struct io_kiocb *req;
++ bool found = false;
++
++ list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
++ found = user_data == req->user_data;
++ if (found)
++ break;
++ }
++ if (!found)
++ return -ENOENT;
++
++ io = req->async_data;
++ if (hrtimer_try_to_cancel(&io->timer) == -1)
++ return -EALREADY;
++ hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
++ io->timer.function = io_link_timeout_fn;
++ hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
++ return 0;
++}
++
++static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
++ struct timespec64 *ts, enum hrtimer_mode mode)
++ __must_hold(&ctx->timeout_lock)
++{
++ struct io_kiocb *req = io_timeout_extract(ctx, user_data);
++ struct io_timeout_data *data;
++
++ if (IS_ERR(req))
++ return PTR_ERR(req);
++
++ req->timeout.off = 0; /* noseq */
++ data = req->async_data;
++ list_add_tail(&req->timeout.list, &ctx->timeout_list);
++ hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
++ data->timer.function = io_timeout_fn;
++ hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
++ return 0;
++}
++
++static int io_timeout_remove_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ struct io_timeout_rem *tr = &req->timeout_rem;
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
++ return -EINVAL;
++
++ tr->ltimeout = false;
++ tr->addr = READ_ONCE(sqe->addr);
++ tr->flags = READ_ONCE(sqe->timeout_flags);
++ if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
++ if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
++ return -EINVAL;
++ if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
++ tr->ltimeout = true;
++ if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
++ return -EINVAL;
++ if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
++ return -EFAULT;
++ } else if (tr->flags) {
++ /* timeout removal doesn't support flags */
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
++{
++ return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
++ : HRTIMER_MODE_REL;
++}
++
++/*
++ * Remove or update an existing timeout command
++ */
++static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_timeout_rem *tr = &req->timeout_rem;
++ struct io_ring_ctx *ctx = req->ctx;
++ int ret;
++
++ if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) {
++ spin_lock(&ctx->completion_lock);
++ spin_lock_irq(&ctx->timeout_lock);
++ ret = io_timeout_cancel(ctx, tr->addr);
++ spin_unlock_irq(&ctx->timeout_lock);
++ spin_unlock(&ctx->completion_lock);
++ } else {
++ enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
++
++ spin_lock_irq(&ctx->timeout_lock);
++ if (tr->ltimeout)
++ ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
++ else
++ ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
++ spin_unlock_irq(&ctx->timeout_lock);
++ }
++
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete_post(req, ret, 0);
++ return 0;
++}
++
++static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
++ bool is_timeout_link)
++{
++ struct io_timeout_data *data;
++ unsigned flags;
++ u32 off = READ_ONCE(sqe->off);
++
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++ if (off && is_timeout_link)
++ return -EINVAL;
++ flags = READ_ONCE(sqe->timeout_flags);
++ if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK))
++ return -EINVAL;
++ /* more than one clock specified is invalid, obviously */
++ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
++ return -EINVAL;
++
++ INIT_LIST_HEAD(&req->timeout.list);
++ req->timeout.off = off;
++ if (unlikely(off && !req->ctx->off_timeout_used))
++ req->ctx->off_timeout_used = true;
++
++ if (!req->async_data && io_alloc_async_data(req))
++ return -ENOMEM;
++
++ data = req->async_data;
++ data->req = req;
++ data->flags = flags;
++
++ if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
++ return -EFAULT;
++
++ INIT_LIST_HEAD(&req->timeout.list);
++ data->mode = io_translate_timeout_mode(flags);
++ hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
++
++ if (is_timeout_link) {
++ struct io_submit_link *link = &req->ctx->submit_state.link;
++
++ if (!link->head)
++ return -EINVAL;
++ if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
++ return -EINVAL;
++ req->timeout.head = link->last;
++ link->last->flags |= REQ_F_ARM_LTIMEOUT;
++ }
++ return 0;
++}
++
++static int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_timeout_data *data = req->async_data;
++ struct list_head *entry;
++ u32 tail, off = req->timeout.off;
++
++ spin_lock_irq(&ctx->timeout_lock);
++
++ /*
++ * sqe->off holds how many events that need to occur for this
++ * timeout event to be satisfied. If it isn't set, then this is
++ * a pure timeout request, sequence isn't used.
++ */
++ if (io_is_timeout_noseq(req)) {
++ entry = ctx->timeout_list.prev;
++ goto add;
++ }
++
++ tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
++ req->timeout.target_seq = tail + off;
++
++ /* Update the last seq here in case io_flush_timeouts() hasn't.
++ * This is safe because ->completion_lock is held, and submissions
++ * and completions are never mixed in the same ->completion_lock section.
++ */
++ ctx->cq_last_tm_flush = tail;
++
++ /*
++ * Insertion sort, ensuring the first entry in the list is always
++ * the one we need first.
++ */
++ list_for_each_prev(entry, &ctx->timeout_list) {
++ struct io_kiocb *nxt = list_entry(entry, struct io_kiocb,
++ timeout.list);
++
++ if (io_is_timeout_noseq(nxt))
++ continue;
++ /* nxt.seq is behind @tail, otherwise would've been completed */
++ if (off >= nxt->timeout.target_seq - tail)
++ break;
++ }
++add:
++ list_add(&req->timeout.list, entry);
++ data->timer.function = io_timeout_fn;
++ hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
++ spin_unlock_irq(&ctx->timeout_lock);
++ return 0;
++}
++
++struct io_cancel_data {
++ struct io_ring_ctx *ctx;
++ u64 user_data;
++};
++
++static bool io_cancel_cb(struct io_wq_work *work, void *data)
++{
++ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
++ struct io_cancel_data *cd = data;
++
++ return req->ctx == cd->ctx && req->user_data == cd->user_data;
++}
++
++static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
++ struct io_ring_ctx *ctx)
++{
++ struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, };
++ enum io_wq_cancel cancel_ret;
++ int ret = 0;
++
++ if (!tctx || !tctx->io_wq)
++ return -ENOENT;
++
++ cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false);
++ switch (cancel_ret) {
++ case IO_WQ_CANCEL_OK:
++ ret = 0;
++ break;
++ case IO_WQ_CANCEL_RUNNING:
++ ret = -EALREADY;
++ break;
++ case IO_WQ_CANCEL_NOTFOUND:
++ ret = -ENOENT;
++ break;
++ }
++
++ return ret;
++}
++
++static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ int ret;
++
++ WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
++
++ ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
++ if (ret != -ENOENT)
++ return ret;
++
++ spin_lock(&ctx->completion_lock);
++ spin_lock_irq(&ctx->timeout_lock);
++ ret = io_timeout_cancel(ctx, sqe_addr);
++ spin_unlock_irq(&ctx->timeout_lock);
++ if (ret != -ENOENT)
++ goto out;
++ ret = io_poll_cancel(ctx, sqe_addr, false);
++out:
++ spin_unlock(&ctx->completion_lock);
++ return ret;
++}
++
++static int io_async_cancel_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++ return -EINVAL;
++ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
++ sqe->splice_fd_in)
++ return -EINVAL;
++
++ req->cancel.addr = READ_ONCE(sqe->addr);
++ return 0;
++}
++
++static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ u64 sqe_addr = req->cancel.addr;
++ struct io_tctx_node *node;
++ int ret;
++
++ ret = io_try_cancel_userdata(req, sqe_addr);
++ if (ret != -ENOENT)
++ goto done;
++
++ /* slow path, try all io-wq's */
++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++ ret = -ENOENT;
++ list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
++ struct io_uring_task *tctx = node->task->io_uring;
++
++ ret = io_async_cancel_one(tctx, req->cancel.addr, ctx);
++ if (ret != -ENOENT)
++ break;
++ }
++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++done:
++ if (ret < 0)
++ req_set_fail(req);
++ io_req_complete_post(req, ret, 0);
++ return 0;
++}
++
++static int io_rsrc_update_prep(struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++{
++ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
++ return -EINVAL;
++ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
++ return -EINVAL;
++
++ req->rsrc_update.offset = READ_ONCE(sqe->off);
++ req->rsrc_update.nr_args = READ_ONCE(sqe->len);
++ if (!req->rsrc_update.nr_args)
++ return -EINVAL;
++ req->rsrc_update.arg = READ_ONCE(sqe->addr);
++ return 0;
++}
++
++static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_uring_rsrc_update2 up;
++ int ret;
++
++ up.offset = req->rsrc_update.offset;
++ up.data = req->rsrc_update.arg;
++ up.nr = 0;
++ up.tags = 0;
++ up.resv = 0;
++ up.resv2 = 0;
++
++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++ ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
++ &up, req->rsrc_update.nr_args);
++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++
++ if (ret < 0)
++ req_set_fail(req);
++ __io_req_complete(req, issue_flags, ret, 0);
++ return 0;
++}
++
++static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
++{
++ switch (req->opcode) {
++ case IORING_OP_NOP:
++ return 0;
++ case IORING_OP_READV:
++ case IORING_OP_READ_FIXED:
++ case IORING_OP_READ:
++ return io_read_prep(req, sqe);
++ case IORING_OP_WRITEV:
++ case IORING_OP_WRITE_FIXED:
++ case IORING_OP_WRITE:
++ return io_write_prep(req, sqe);
++ case IORING_OP_POLL_ADD:
++ return io_poll_add_prep(req, sqe);
++ case IORING_OP_POLL_REMOVE:
++ return io_poll_update_prep(req, sqe);
++ case IORING_OP_FSYNC:
++ return io_fsync_prep(req, sqe);
++ case IORING_OP_SYNC_FILE_RANGE:
++ return io_sfr_prep(req, sqe);
++ case IORING_OP_SENDMSG:
++ case IORING_OP_SEND:
++ return io_sendmsg_prep(req, sqe);
++ case IORING_OP_RECVMSG:
++ case IORING_OP_RECV:
++ return io_recvmsg_prep(req, sqe);
++ case IORING_OP_CONNECT:
++ return io_connect_prep(req, sqe);
++ case IORING_OP_TIMEOUT:
++ return io_timeout_prep(req, sqe, false);
++ case IORING_OP_TIMEOUT_REMOVE:
++ return io_timeout_remove_prep(req, sqe);
++ case IORING_OP_ASYNC_CANCEL:
++ return io_async_cancel_prep(req, sqe);
++ case IORING_OP_LINK_TIMEOUT:
++ return io_timeout_prep(req, sqe, true);
++ case IORING_OP_ACCEPT:
++ return io_accept_prep(req, sqe);
++ case IORING_OP_FALLOCATE:
++ return io_fallocate_prep(req, sqe);
++ case IORING_OP_OPENAT:
++ return io_openat_prep(req, sqe);
++ case IORING_OP_CLOSE:
++ return io_close_prep(req, sqe);
++ case IORING_OP_FILES_UPDATE:
++ return io_rsrc_update_prep(req, sqe);
++ case IORING_OP_STATX:
++ return io_statx_prep(req, sqe);
++ case IORING_OP_FADVISE:
++ return io_fadvise_prep(req, sqe);
++ case IORING_OP_MADVISE:
++ return io_madvise_prep(req, sqe);
++ case IORING_OP_OPENAT2:
++ return io_openat2_prep(req, sqe);
++ case IORING_OP_EPOLL_CTL:
++ return io_epoll_ctl_prep(req, sqe);
++ case IORING_OP_SPLICE:
++ return io_splice_prep(req, sqe);
++ case IORING_OP_PROVIDE_BUFFERS:
++ return io_provide_buffers_prep(req, sqe);
++ case IORING_OP_REMOVE_BUFFERS:
++ return io_remove_buffers_prep(req, sqe);
++ case IORING_OP_TEE:
++ return io_tee_prep(req, sqe);
++ case IORING_OP_SHUTDOWN:
++ return io_shutdown_prep(req, sqe);
++ case IORING_OP_RENAMEAT:
++ return io_renameat_prep(req, sqe);
++ case IORING_OP_UNLINKAT:
++ return io_unlinkat_prep(req, sqe);
++ case IORING_OP_MKDIRAT:
++ return io_mkdirat_prep(req, sqe);
++ case IORING_OP_SYMLINKAT:
++ return io_symlinkat_prep(req, sqe);
++ case IORING_OP_LINKAT:
++ return io_linkat_prep(req, sqe);
++ }
++
++ printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
++ req->opcode);
++ return -EINVAL;
++}
++
++static int io_req_prep_async(struct io_kiocb *req)
++{
++ if (!io_op_defs[req->opcode].needs_async_setup)
++ return 0;
++ if (WARN_ON_ONCE(req->async_data))
++ return -EFAULT;
++ if (io_alloc_async_data(req))
++ return -EAGAIN;
++
++ switch (req->opcode) {
++ case IORING_OP_READV:
++ return io_rw_prep_async(req, READ);
++ case IORING_OP_WRITEV:
++ return io_rw_prep_async(req, WRITE);
++ case IORING_OP_SENDMSG:
++ return io_sendmsg_prep_async(req);
++ case IORING_OP_RECVMSG:
++ return io_recvmsg_prep_async(req);
++ case IORING_OP_CONNECT:
++ return io_connect_prep_async(req);
++ }
++ printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
++ req->opcode);
++ return -EFAULT;
++}
++
++static u32 io_get_sequence(struct io_kiocb *req)
++{
++ u32 seq = req->ctx->cached_sq_head;
++
++ /* need original cached_sq_head, but it was increased for each req */
++ io_for_each_link(req, req)
++ seq--;
++ return seq;
++}
++
++static bool io_drain_req(struct io_kiocb *req)
++{
++ struct io_kiocb *pos;
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_defer_entry *de;
++ int ret;
++ u32 seq;
++
++ if (req->flags & REQ_F_FAIL) {
++ io_req_complete_fail_submit(req);
++ return true;
++ }
++
++ /*
++ * If we need to drain a request in the middle of a link, drain the
++ * head request and the next request/link after the current link.
++ * Considering sequential execution of links, IOSQE_IO_DRAIN will be
++ * maintained for every request of our link.
++ */
++ if (ctx->drain_next) {
++ req->flags |= REQ_F_IO_DRAIN;
++ ctx->drain_next = false;
++ }
++ /* not interested in head, start from the first linked */
++ io_for_each_link(pos, req->link) {
++ if (pos->flags & REQ_F_IO_DRAIN) {
++ ctx->drain_next = true;
++ req->flags |= REQ_F_IO_DRAIN;
++ break;
++ }
++ }
++
++ /* Still need defer if there is pending req in defer list. */
++ spin_lock(&ctx->completion_lock);
++ if (likely(list_empty_careful(&ctx->defer_list) &&
++ !(req->flags & REQ_F_IO_DRAIN))) {
++ spin_unlock(&ctx->completion_lock);
++ ctx->drain_active = false;
++ return false;
++ }
++ spin_unlock(&ctx->completion_lock);
++
++ seq = io_get_sequence(req);
++ /* Still a chance to pass the sequence check */
++ if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
++ return false;
++
++ ret = io_req_prep_async(req);
++ if (ret)
++ goto fail;
++ io_prep_async_link(req);
++ de = kmalloc(sizeof(*de), GFP_KERNEL);
++ if (!de) {
++ ret = -ENOMEM;
++fail:
++ io_req_complete_failed(req, ret);
++ return true;
++ }
++
++ spin_lock(&ctx->completion_lock);
++ if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
++ spin_unlock(&ctx->completion_lock);
++ kfree(de);
++ io_queue_async_work(req, NULL);
++ return true;
++ }
++
++ trace_io_uring_defer(ctx, req, req->user_data);
++ de->req = req;
++ de->seq = seq;
++ list_add_tail(&de->list, &ctx->defer_list);
++ spin_unlock(&ctx->completion_lock);
++ return true;
++}
++
++static void io_clean_op(struct io_kiocb *req)
++{
++ if (req->flags & REQ_F_BUFFER_SELECTED) {
++ switch (req->opcode) {
++ case IORING_OP_READV:
++ case IORING_OP_READ_FIXED:
++ case IORING_OP_READ:
++ kfree((void *)(unsigned long)req->rw.addr);
++ break;
++ case IORING_OP_RECVMSG:
++ case IORING_OP_RECV:
++ kfree(req->sr_msg.kbuf);
++ break;
++ }
++ }
++
++ if (req->flags & REQ_F_NEED_CLEANUP) {
++ switch (req->opcode) {
++ case IORING_OP_READV:
++ case IORING_OP_READ_FIXED:
++ case IORING_OP_READ:
++ case IORING_OP_WRITEV:
++ case IORING_OP_WRITE_FIXED:
++ case IORING_OP_WRITE: {
++ struct io_async_rw *io = req->async_data;
++
++ kfree(io->free_iovec);
++ break;
++ }
++ case IORING_OP_RECVMSG:
++ case IORING_OP_SENDMSG: {
++ struct io_async_msghdr *io = req->async_data;
++
++ kfree(io->free_iov);
++ break;
++ }
++ case IORING_OP_OPENAT:
++ case IORING_OP_OPENAT2:
++ if (req->open.filename)
++ putname(req->open.filename);
++ break;
++ case IORING_OP_RENAMEAT:
++ putname(req->rename.oldpath);
++ putname(req->rename.newpath);
++ break;
++ case IORING_OP_UNLINKAT:
++ putname(req->unlink.filename);
++ break;
++ case IORING_OP_MKDIRAT:
++ putname(req->mkdir.filename);
++ break;
++ case IORING_OP_SYMLINKAT:
++ putname(req->symlink.oldpath);
++ putname(req->symlink.newpath);
++ break;
++ case IORING_OP_LINKAT:
++ putname(req->hardlink.oldpath);
++ putname(req->hardlink.newpath);
++ break;
++ }
++ }
++ if ((req->flags & REQ_F_POLLED) && req->apoll) {
++ kfree(req->apoll->double_poll);
++ kfree(req->apoll);
++ req->apoll = NULL;
++ }
++ if (req->flags & REQ_F_INFLIGHT) {
++ struct io_uring_task *tctx = req->task->io_uring;
++
++ atomic_dec(&tctx->inflight_tracked);
++ }
++ if (req->flags & REQ_F_CREDS)
++ put_cred(req->creds);
++
++ req->flags &= ~IO_REQ_CLEAN_FLAGS;
++}
++
++static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ const struct cred *creds = NULL;
++ int ret;
++
++ if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
++ creds = override_creds(req->creds);
++
++ switch (req->opcode) {
++ case IORING_OP_NOP:
++ ret = io_nop(req, issue_flags);
++ break;
++ case IORING_OP_READV:
++ case IORING_OP_READ_FIXED:
++ case IORING_OP_READ:
++ ret = io_read(req, issue_flags);
++ break;
++ case IORING_OP_WRITEV:
++ case IORING_OP_WRITE_FIXED:
++ case IORING_OP_WRITE:
++ ret = io_write(req, issue_flags);
++ break;
++ case IORING_OP_FSYNC:
++ ret = io_fsync(req, issue_flags);
++ break;
++ case IORING_OP_POLL_ADD:
++ ret = io_poll_add(req, issue_flags);
++ break;
++ case IORING_OP_POLL_REMOVE:
++ ret = io_poll_update(req, issue_flags);
++ break;
++ case IORING_OP_SYNC_FILE_RANGE:
++ ret = io_sync_file_range(req, issue_flags);
++ break;
++ case IORING_OP_SENDMSG:
++ ret = io_sendmsg(req, issue_flags);
++ break;
++ case IORING_OP_SEND:
++ ret = io_send(req, issue_flags);
++ break;
++ case IORING_OP_RECVMSG:
++ ret = io_recvmsg(req, issue_flags);
++ break;
++ case IORING_OP_RECV:
++ ret = io_recv(req, issue_flags);
++ break;
++ case IORING_OP_TIMEOUT:
++ ret = io_timeout(req, issue_flags);
++ break;
++ case IORING_OP_TIMEOUT_REMOVE:
++ ret = io_timeout_remove(req, issue_flags);
++ break;
++ case IORING_OP_ACCEPT:
++ ret = io_accept(req, issue_flags);
++ break;
++ case IORING_OP_CONNECT:
++ ret = io_connect(req, issue_flags);
++ break;
++ case IORING_OP_ASYNC_CANCEL:
++ ret = io_async_cancel(req, issue_flags);
++ break;
++ case IORING_OP_FALLOCATE:
++ ret = io_fallocate(req, issue_flags);
++ break;
++ case IORING_OP_OPENAT:
++ ret = io_openat(req, issue_flags);
++ break;
++ case IORING_OP_CLOSE:
++ ret = io_close(req, issue_flags);
++ break;
++ case IORING_OP_FILES_UPDATE:
++ ret = io_files_update(req, issue_flags);
++ break;
++ case IORING_OP_STATX:
++ ret = io_statx(req, issue_flags);
++ break;
++ case IORING_OP_FADVISE:
++ ret = io_fadvise(req, issue_flags);
++ break;
++ case IORING_OP_MADVISE:
++ ret = io_madvise(req, issue_flags);
++ break;
++ case IORING_OP_OPENAT2:
++ ret = io_openat2(req, issue_flags);
++ break;
++ case IORING_OP_EPOLL_CTL:
++ ret = io_epoll_ctl(req, issue_flags);
++ break;
++ case IORING_OP_SPLICE:
++ ret = io_splice(req, issue_flags);
++ break;
++ case IORING_OP_PROVIDE_BUFFERS:
++ ret = io_provide_buffers(req, issue_flags);
++ break;
++ case IORING_OP_REMOVE_BUFFERS:
++ ret = io_remove_buffers(req, issue_flags);
++ break;
++ case IORING_OP_TEE:
++ ret = io_tee(req, issue_flags);
++ break;
++ case IORING_OP_SHUTDOWN:
++ ret = io_shutdown(req, issue_flags);
++ break;
++ case IORING_OP_RENAMEAT:
++ ret = io_renameat(req, issue_flags);
++ break;
++ case IORING_OP_UNLINKAT:
++ ret = io_unlinkat(req, issue_flags);
++ break;
++ case IORING_OP_MKDIRAT:
++ ret = io_mkdirat(req, issue_flags);
++ break;
++ case IORING_OP_SYMLINKAT:
++ ret = io_symlinkat(req, issue_flags);
++ break;
++ case IORING_OP_LINKAT:
++ ret = io_linkat(req, issue_flags);
++ break;
++ default:
++ ret = -EINVAL;
++ break;
++ }
++
++ if (creds)
++ revert_creds(creds);
++ if (ret)
++ return ret;
++ /* If the op doesn't have a file, we're not polling for it */
++ if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file)
++ io_iopoll_req_issued(req);
++
++ return 0;
++}
++
++static struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
++{
++ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
++
++ req = io_put_req_find_next(req);
++ return req ? &req->work : NULL;
++}
++
++static void io_wq_submit_work(struct io_wq_work *work)
++{
++ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
++ struct io_kiocb *timeout;
++ int ret = 0;
++
++ /* one will be dropped by ->io_free_work() after returning to io-wq */
++ if (!(req->flags & REQ_F_REFCOUNT))
++ __io_req_set_refcount(req, 2);
++ else
++ req_ref_get(req);
++
++ timeout = io_prep_linked_timeout(req);
++ if (timeout)
++ io_queue_linked_timeout(timeout);
++
++ /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
++ if (work->flags & IO_WQ_WORK_CANCEL)
++ ret = -ECANCELED;
++
++ if (!ret) {
++ do {
++ ret = io_issue_sqe(req, 0);
++ /*
++ * We can get EAGAIN for polled IO even though we're
++ * forcing a sync submission from here, since we can't
++ * wait for request slots on the block side.
++ */
++ if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL))
++ break;
++ if (io_wq_worker_stopped())
++ break;
++ /*
++ * If REQ_F_NOWAIT is set, then don't wait or retry with
++ * poll. -EAGAIN is final for that case.
++ */
++ if (req->flags & REQ_F_NOWAIT)
++ break;
++
++ cond_resched();
++ } while (1);
++ }
++
++ /* avoid locking problems by failing it from a clean context */
++ if (ret)
++ io_req_task_queue_fail(req, ret);
++}
++
++static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table,
++ unsigned i)
++{
++ return &table->files[i];
++}
++
++static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
++ int index)
++{
++ struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index);
++
++ return (struct file *) (slot->file_ptr & FFS_MASK);
++}
++
++static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file)
++{
++ unsigned long file_ptr = (unsigned long) file;
++
++ if (__io_file_supports_nowait(file, READ))
++ file_ptr |= FFS_ASYNC_READ;
++ if (__io_file_supports_nowait(file, WRITE))
++ file_ptr |= FFS_ASYNC_WRITE;
++ if (S_ISREG(file_inode(file)->i_mode))
++ file_ptr |= FFS_ISREG;
++ file_slot->file_ptr = file_ptr;
++}
++
++static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
++ struct io_kiocb *req, int fd,
++ unsigned int issue_flags)
++{
++ struct file *file = NULL;
++ unsigned long file_ptr;
++
++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++
++ if (unlikely((unsigned int)fd >= ctx->nr_user_files))
++ goto out;
++ fd = array_index_nospec(fd, ctx->nr_user_files);
++ file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
++ file = (struct file *) (file_ptr & FFS_MASK);
++ file_ptr &= ~FFS_MASK;
++ /* mask in overlapping REQ_F and FFS bits */
++ req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT);
++ io_req_set_rsrc_node(req);
++out:
++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++ return file;
++}
++
++static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
++ struct io_kiocb *req, int fd)
++{
++ struct file *file = fget(fd);
++
++ trace_io_uring_file_get(ctx, fd);
++
++ /* we don't allow fixed io_uring files */
++ if (file && unlikely(file->f_op == &io_uring_fops))
++ io_req_track_inflight(req);
++ return file;
++}
++
++static inline struct file *io_file_get(struct io_ring_ctx *ctx,
++ struct io_kiocb *req, int fd, bool fixed,
++ unsigned int issue_flags)
++{
++ if (fixed)
++ return io_file_get_fixed(ctx, req, fd, issue_flags);
++ else
++ return io_file_get_normal(ctx, req, fd);
++}
++
++static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
++{
++ struct io_kiocb *prev = req->timeout.prev;
++ int ret = -ENOENT;
++
++ if (prev) {
++ if (!(req->task->flags & PF_EXITING))
++ ret = io_try_cancel_userdata(req, prev->user_data);
++ io_req_complete_post(req, ret ?: -ETIME, 0);
++ io_put_req(prev);
++ } else {
++ io_req_complete_post(req, -ETIME, 0);
++ }
++}
++
++static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
++{
++ struct io_timeout_data *data = container_of(timer,
++ struct io_timeout_data, timer);
++ struct io_kiocb *prev, *req = data->req;
++ struct io_ring_ctx *ctx = req->ctx;
++ unsigned long flags;
++
++ spin_lock_irqsave(&ctx->timeout_lock, flags);
++ prev = req->timeout.head;
++ req->timeout.head = NULL;
++
++ /*
++ * We don't expect the list to be empty, that will only happen if we
++ * race with the completion of the linked work.
++ */
++ if (prev) {
++ io_remove_next_linked(prev);
++ if (!req_ref_inc_not_zero(prev))
++ prev = NULL;
++ }
++ list_del(&req->timeout.list);
++ req->timeout.prev = prev;
++ spin_unlock_irqrestore(&ctx->timeout_lock, flags);
++
++ req->io_task_work.func = io_req_task_link_timeout;
++ io_req_task_work_add(req);
++ return HRTIMER_NORESTART;
++}
++
++static void io_queue_linked_timeout(struct io_kiocb *req)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++
++ spin_lock_irq(&ctx->timeout_lock);
++ /*
++ * If the back reference is NULL, then our linked request finished
++ * before we got a chance to setup the timer
++ */
++ if (req->timeout.head) {
++ struct io_timeout_data *data = req->async_data;
++
++ data->timer.function = io_link_timeout_fn;
++ hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
++ data->mode);
++ list_add_tail(&req->timeout.list, &ctx->ltimeout_list);
++ }
++ spin_unlock_irq(&ctx->timeout_lock);
++ /* drop submission reference */
++ io_put_req(req);
++}
++
++static void __io_queue_sqe(struct io_kiocb *req)
++ __must_hold(&req->ctx->uring_lock)
++{
++ struct io_kiocb *linked_timeout;
++ int ret;
++
++issue_sqe:
++ ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
++
++ /*
++ * We async punt it if the file wasn't marked NOWAIT, or if the file
++ * doesn't support non-blocking read/write attempts
++ */
++ if (likely(!ret)) {
++ if (req->flags & REQ_F_COMPLETE_INLINE) {
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_submit_state *state = &ctx->submit_state;
++
++ state->compl_reqs[state->compl_nr++] = req;
++ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
++ io_submit_flush_completions(ctx);
++ return;
++ }
++
++ linked_timeout = io_prep_linked_timeout(req);
++ if (linked_timeout)
++ io_queue_linked_timeout(linked_timeout);
++ } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
++ linked_timeout = io_prep_linked_timeout(req);
++
++ switch (io_arm_poll_handler(req)) {
++ case IO_APOLL_READY:
++ if (linked_timeout)
++ io_queue_linked_timeout(linked_timeout);
++ goto issue_sqe;
++ case IO_APOLL_ABORTED:
++ /*
++ * Queued up for async execution, worker will release
++ * submit reference when the iocb is actually submitted.
++ */
++ io_queue_async_work(req, NULL);
++ break;
++ }
++
++ if (linked_timeout)
++ io_queue_linked_timeout(linked_timeout);
++ } else {
++ io_req_complete_failed(req, ret);
++ }
++}
++
++static inline void io_queue_sqe(struct io_kiocb *req)
++ __must_hold(&req->ctx->uring_lock)
++{
++ if (unlikely(req->ctx->drain_active) && io_drain_req(req))
++ return;
++
++ if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
++ __io_queue_sqe(req);
++ } else if (req->flags & REQ_F_FAIL) {
++ io_req_complete_fail_submit(req);
++ } else {
++ int ret = io_req_prep_async(req);
++
++ if (unlikely(ret))
++ io_req_complete_failed(req, ret);
++ else
++ io_queue_async_work(req, NULL);
++ }
++}
++
++/*
++ * Check SQE restrictions (opcode and flags).
++ *
++ * Returns 'true' if SQE is allowed, 'false' otherwise.
++ */
++static inline bool io_check_restriction(struct io_ring_ctx *ctx,
++ struct io_kiocb *req,
++ unsigned int sqe_flags)
++{
++ if (likely(!ctx->restricted))
++ return true;
++
++ if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
++ return false;
++
++ if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
++ ctx->restrictions.sqe_flags_required)
++ return false;
++
++ if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
++ ctx->restrictions.sqe_flags_required))
++ return false;
++
++ return true;
++}
++
++static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++ __must_hold(&ctx->uring_lock)
++{
++ struct io_submit_state *state;
++ unsigned int sqe_flags;
++ int personality, ret = 0;
++
++ /* req is partially pre-initialised, see io_preinit_req() */
++ req->opcode = READ_ONCE(sqe->opcode);
++ /* same numerical values with corresponding REQ_F_*, safe to copy */
++ req->flags = sqe_flags = READ_ONCE(sqe->flags);
++ req->user_data = READ_ONCE(sqe->user_data);
++ req->file = NULL;
++ req->fixed_rsrc_refs = NULL;
++ req->task = current;
++
++ /* enforce forwards compatibility on users */
++ if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
++ return -EINVAL;
++ if (unlikely(req->opcode >= IORING_OP_LAST))
++ return -EINVAL;
++ if (!io_check_restriction(ctx, req, sqe_flags))
++ return -EACCES;
++
++ if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
++ !io_op_defs[req->opcode].buffer_select)
++ return -EOPNOTSUPP;
++ if (unlikely(sqe_flags & IOSQE_IO_DRAIN))
++ ctx->drain_active = true;
++
++ personality = READ_ONCE(sqe->personality);
++ if (personality) {
++ req->creds = xa_load(&ctx->personalities, personality);
++ if (!req->creds)
++ return -EINVAL;
++ get_cred(req->creds);
++ req->flags |= REQ_F_CREDS;
++ }
++ state = &ctx->submit_state;
++
++ /*
++ * Plug now if we have more than 1 IO left after this, and the target
++ * is potentially a read/write to block based storage.
++ */
++ if (!state->plug_started && state->ios_left > 1 &&
++ io_op_defs[req->opcode].plug) {
++ blk_start_plug(&state->plug);
++ state->plug_started = true;
++ }
++
++ if (io_op_defs[req->opcode].needs_file) {
++ req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
++ (sqe_flags & IOSQE_FIXED_FILE),
++ IO_URING_F_NONBLOCK);
++ if (unlikely(!req->file))
++ ret = -EBADF;
++ }
++
++ state->ios_left--;
++ return ret;
++}
++
++static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
++ const struct io_uring_sqe *sqe)
++ __must_hold(&ctx->uring_lock)
++{
++ struct io_submit_link *link = &ctx->submit_state.link;
++ int ret;
++
++ ret = io_init_req(ctx, req, sqe);
++ if (unlikely(ret)) {
++fail_req:
++ /* fail even hard links since we don't submit */
++ if (link->head) {
++ /*
++ * we can judge a link req is failed or cancelled by if
++ * REQ_F_FAIL is set, but the head is an exception since
++ * it may be set REQ_F_FAIL because of other req's failure
++ * so let's leverage req->result to distinguish if a head
++ * is set REQ_F_FAIL because of its failure or other req's
++ * failure so that we can set the correct ret code for it.
++ * init result here to avoid affecting the normal path.
++ */
++ if (!(link->head->flags & REQ_F_FAIL))
++ req_fail_link_node(link->head, -ECANCELED);
++ } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
++ /*
++ * the current req is a normal req, we should return
++ * error and thus break the submittion loop.
++ */
++ io_req_complete_failed(req, ret);
++ return ret;
++ }
++ req_fail_link_node(req, ret);
++ } else {
++ ret = io_req_prep(req, sqe);
++ if (unlikely(ret))
++ goto fail_req;
++ }
++
++ /* don't need @sqe from now on */
++ trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
++ req->flags, true,
++ ctx->flags & IORING_SETUP_SQPOLL);
++
++ /*
++ * If we already have a head request, queue this one for async
++ * submittal once the head completes. If we don't have a head but
++ * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be
++ * submitted sync once the chain is complete. If none of those
++ * conditions are true (normal request), then just queue it.
++ */
++ if (link->head) {
++ struct io_kiocb *head = link->head;
++
++ if (!(req->flags & REQ_F_FAIL)) {
++ ret = io_req_prep_async(req);
++ if (unlikely(ret)) {
++ req_fail_link_node(req, ret);
++ if (!(head->flags & REQ_F_FAIL))
++ req_fail_link_node(head, -ECANCELED);
++ }
++ }
++ trace_io_uring_link(ctx, req, head);
++ link->last->link = req;
++ link->last = req;
++
++ /* last request of a link, enqueue the link */
++ if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
++ link->head = NULL;
++ io_queue_sqe(head);
++ }
++ } else {
++ if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
++ link->head = req;
++ link->last = req;
++ } else {
++ io_queue_sqe(req);
++ }
++ }
++
++ return 0;
++}
++
++/*
++ * Batched submission is done, ensure local IO is flushed out.
++ */
++static void io_submit_state_end(struct io_submit_state *state,
++ struct io_ring_ctx *ctx)
++{
++ if (state->link.head)
++ io_queue_sqe(state->link.head);
++ if (state->compl_nr)
++ io_submit_flush_completions(ctx);
++ if (state->plug_started)
++ blk_finish_plug(&state->plug);
++}
++
++/*
++ * Start submission side cache.
++ */
++static void io_submit_state_start(struct io_submit_state *state,
++ unsigned int max_ios)
++{
++ state->plug_started = false;
++ state->ios_left = max_ios;
++ /* set only head, no need to init link_last in advance */
++ state->link.head = NULL;
++}
++
++static void io_commit_sqring(struct io_ring_ctx *ctx)
++{
++ struct io_rings *rings = ctx->rings;
++
++ /*
++ * Ensure any loads from the SQEs are done at this point,
++ * since once we write the new head, the application could
++ * write new data to them.
++ */
++ smp_store_release(&rings->sq.head, ctx->cached_sq_head);
++}
++
++/*
++ * Fetch an sqe, if one is available. Note this returns a pointer to memory
++ * that is mapped by userspace. This means that care needs to be taken to
++ * ensure that reads are stable, as we cannot rely on userspace always
++ * being a good citizen. If members of the sqe are validated and then later
++ * used, it's important that those reads are done through READ_ONCE() to
++ * prevent a re-load down the line.
++ */
++static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
++{
++ unsigned head, mask = ctx->sq_entries - 1;
++ unsigned sq_idx = ctx->cached_sq_head++ & mask;
++
++ /*
++ * The cached sq head (or cq tail) serves two purposes:
++ *
++ * 1) allows us to batch the cost of updating the user visible
++ * head updates.
++ * 2) allows the kernel side to track the head on its own, even
++ * though the application is the one updating it.
++ */
++ head = READ_ONCE(ctx->sq_array[sq_idx]);
++ if (likely(head < ctx->sq_entries))
++ return &ctx->sq_sqes[head];
++
++ /* drop invalid entries */
++ spin_lock(&ctx->completion_lock);
++ ctx->cq_extra--;
++ spin_unlock(&ctx->completion_lock);
++ WRITE_ONCE(ctx->rings->sq_dropped,
++ READ_ONCE(ctx->rings->sq_dropped) + 1);
++ return NULL;
++}
++
++static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
++ __must_hold(&ctx->uring_lock)
++{
++ int submitted = 0;
++
++ /* make sure SQ entry isn't read before tail */
++ nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
++ if (!percpu_ref_tryget_many(&ctx->refs, nr))
++ return -EAGAIN;
++ io_get_task_refs(nr);
++
++ io_submit_state_start(&ctx->submit_state, nr);
++ while (submitted < nr) {
++ const struct io_uring_sqe *sqe;
++ struct io_kiocb *req;
++
++ req = io_alloc_req(ctx);
++ if (unlikely(!req)) {
++ if (!submitted)
++ submitted = -EAGAIN;
++ break;
++ }
++ sqe = io_get_sqe(ctx);
++ if (unlikely(!sqe)) {
++ list_add(&req->inflight_entry, &ctx->submit_state.free_list);
++ break;
++ }
++ /* will complete beyond this point, count as submitted */
++ submitted++;
++ if (io_submit_sqe(ctx, req, sqe))
++ break;
++ }
++
++ if (unlikely(submitted != nr)) {
++ int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
++ int unused = nr - ref_used;
++
++ current->io_uring->cached_refs += unused;
++ percpu_ref_put_many(&ctx->refs, unused);
++ }
++
++ io_submit_state_end(&ctx->submit_state, ctx);
++ /* Commit SQ ring head once we've consumed and submitted all SQEs */
++ io_commit_sqring(ctx);
++
++ return submitted;
++}
++
++static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
++{
++ return READ_ONCE(sqd->state);
++}
++
++static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
++{
++ /* Tell userspace we may need a wakeup call */
++ spin_lock(&ctx->completion_lock);
++ WRITE_ONCE(ctx->rings->sq_flags,
++ ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
++ spin_unlock(&ctx->completion_lock);
++}
++
++static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
++{
++ spin_lock(&ctx->completion_lock);
++ WRITE_ONCE(ctx->rings->sq_flags,
++ ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
++ spin_unlock(&ctx->completion_lock);
++}
++
++static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
++{
++ unsigned int to_submit;
++ int ret = 0;
++
++ to_submit = io_sqring_entries(ctx);
++ /* if we're handling multiple rings, cap submit size for fairness */
++ if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE)
++ to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE;
++
++ if (!list_empty(&ctx->iopoll_list) || to_submit) {
++ unsigned nr_events = 0;
++ const struct cred *creds = NULL;
++
++ if (ctx->sq_creds != current_cred())
++ creds = override_creds(ctx->sq_creds);
++
++ mutex_lock(&ctx->uring_lock);
++ if (!list_empty(&ctx->iopoll_list))
++ io_do_iopoll(ctx, &nr_events, 0);
++
++ /*
++ * Don't submit if refs are dying, good for io_uring_register(),
++ * but also it is relied upon by io_ring_exit_work()
++ */
++ if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) &&
++ !(ctx->flags & IORING_SETUP_R_DISABLED))
++ ret = io_submit_sqes(ctx, to_submit);
++ mutex_unlock(&ctx->uring_lock);
++
++ if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
++ wake_up(&ctx->sqo_sq_wait);
++ if (creds)
++ revert_creds(creds);
++ }
++
++ return ret;
++}
++
++static void io_sqd_update_thread_idle(struct io_sq_data *sqd)
++{
++ struct io_ring_ctx *ctx;
++ unsigned sq_thread_idle = 0;
++
++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
++ sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle);
++ sqd->sq_thread_idle = sq_thread_idle;
++}
++
++static bool io_sqd_handle_event(struct io_sq_data *sqd)
++{
++ bool did_sig = false;
++ struct ksignal ksig;
++
++ if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
++ signal_pending(current)) {
++ mutex_unlock(&sqd->lock);
++ if (signal_pending(current))
++ did_sig = get_signal(&ksig);
++ cond_resched();
++ mutex_lock(&sqd->lock);
++ }
++ return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
++}
++
++static int io_sq_thread(void *data)
++{
++ struct io_sq_data *sqd = data;
++ struct io_ring_ctx *ctx;
++ unsigned long timeout = 0;
++ char buf[TASK_COMM_LEN];
++ DEFINE_WAIT(wait);
++
++ snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
++ set_task_comm(current, buf);
++
++ if (sqd->sq_cpu != -1)
++ set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
++ else
++ set_cpus_allowed_ptr(current, cpu_online_mask);
++ current->flags |= PF_NO_SETAFFINITY;
++
++ mutex_lock(&sqd->lock);
++ while (1) {
++ bool cap_entries, sqt_spin = false;
++
++ if (io_sqd_events_pending(sqd) || signal_pending(current)) {
++ if (io_sqd_handle_event(sqd))
++ break;
++ timeout = jiffies + sqd->sq_thread_idle;
++ }
++
++ cap_entries = !list_is_singular(&sqd->ctx_list);
++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
++ int ret = __io_sq_thread(ctx, cap_entries);
++
++ if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
++ sqt_spin = true;
++ }
++ if (io_run_task_work())
++ sqt_spin = true;
++
++ if (sqt_spin || !time_after(jiffies, timeout)) {
++ cond_resched();
++ if (sqt_spin)
++ timeout = jiffies + sqd->sq_thread_idle;
++ continue;
++ }
++
++ prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
++ if (!io_sqd_events_pending(sqd) && !current->task_works) {
++ bool needs_sched = true;
++
++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
++ io_ring_set_wakeup_flag(ctx);
++
++ if ((ctx->flags & IORING_SETUP_IOPOLL) &&
++ !list_empty_careful(&ctx->iopoll_list)) {
++ needs_sched = false;
++ break;
++ }
++ if (io_sqring_entries(ctx)) {
++ needs_sched = false;
++ break;
++ }
++ }
++
++ if (needs_sched) {
++ mutex_unlock(&sqd->lock);
++ schedule();
++ mutex_lock(&sqd->lock);
++ }
++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
++ io_ring_clear_wakeup_flag(ctx);
++ }
++
++ finish_wait(&sqd->wait, &wait);
++ timeout = jiffies + sqd->sq_thread_idle;
++ }
++
++ io_uring_cancel_generic(true, sqd);
++ sqd->thread = NULL;
++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
++ io_ring_set_wakeup_flag(ctx);
++ io_run_task_work();
++ mutex_unlock(&sqd->lock);
++
++ complete(&sqd->exited);
++ do_exit(0);
++}
++
++struct io_wait_queue {
++ struct wait_queue_entry wq;
++ struct io_ring_ctx *ctx;
++ unsigned cq_tail;
++ unsigned nr_timeouts;
++};
++
++static inline bool io_should_wake(struct io_wait_queue *iowq)
++{
++ struct io_ring_ctx *ctx = iowq->ctx;
++ int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
++
++ /*
++ * Wake up if we have enough events, or if a timeout occurred since we
++ * started waiting. For timeouts, we always want to return to userspace,
++ * regardless of event count.
++ */
++ return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
++}
++
++static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
++ int wake_flags, void *key)
++{
++ struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
++ wq);
++
++ /*
++ * Cannot safely flush overflowed CQEs from here, ensure we wake up
++ * the task, and the next invocation will do it.
++ */
++ if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow))
++ return autoremove_wake_function(curr, mode, wake_flags, key);
++ return -1;
++}
++
++static int io_run_task_work_sig(void)
++{
++ if (io_run_task_work())
++ return 1;
++ if (!signal_pending(current))
++ return 0;
++ if (test_thread_flag(TIF_NOTIFY_SIGNAL))
++ return -ERESTARTSYS;
++ return -EINTR;
++}
++
++static bool current_pending_io(void)
++{
++ struct io_uring_task *tctx = current->io_uring;
++
++ if (!tctx)
++ return false;
++ return percpu_counter_read_positive(&tctx->inflight);
++}
++
++/* when returns >0, the caller should retry */
++static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
++ struct io_wait_queue *iowq,
++ ktime_t *timeout)
++{
++ int io_wait, ret;
++
++ /* make sure we run task_work before checking for signals */
++ ret = io_run_task_work_sig();
++ if (ret || io_should_wake(iowq))
++ return ret;
++ /* let the caller flush overflows, retry */
++ if (test_bit(0, &ctx->check_cq_overflow))
++ return 1;
++
++ /*
++ * Mark us as being in io_wait if we have pending requests, so cpufreq
++ * can take into account that the task is waiting for IO - turns out
++ * to be important for low QD IO.
++ */
++ io_wait = current->in_iowait;
++ if (current_pending_io())
++ current->in_iowait = 1;
++ ret = 1;
++ if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS))
++ ret = -ETIME;
++ current->in_iowait = io_wait;
++ return ret;
++}
++
++/*
++ * Wait until events become available, if we don't already have some. The
++ * application must reap them itself, as they reside on the shared cq ring.
++ */
++static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
++ const sigset_t __user *sig, size_t sigsz,
++ struct __kernel_timespec __user *uts)
++{
++ struct io_wait_queue iowq;
++ struct io_rings *rings = ctx->rings;
++ ktime_t timeout = KTIME_MAX;
++ int ret;
++
++ do {
++ io_cqring_overflow_flush(ctx);
++ if (io_cqring_events(ctx) >= min_events)
++ return 0;
++ if (!io_run_task_work())
++ break;
++ } while (1);
++
++ if (uts) {
++ struct timespec64 ts;
++
++ if (get_timespec64(&ts, uts))
++ return -EFAULT;
++ timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
++ }
++
++ if (sig) {
++#ifdef CONFIG_COMPAT
++ if (in_compat_syscall())
++ ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
++ sigsz);
++ else
++#endif
++ ret = set_user_sigmask(sig, sigsz);
++
++ if (ret)
++ return ret;
++ }
++
++ init_waitqueue_func_entry(&iowq.wq, io_wake_function);
++ iowq.wq.private = current;
++ INIT_LIST_HEAD(&iowq.wq.entry);
++ iowq.ctx = ctx;
++ iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
++ iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
++
++ trace_io_uring_cqring_wait(ctx, min_events);
++ do {
++ /* if we can't even flush overflow, don't wait for more */
++ if (!io_cqring_overflow_flush(ctx)) {
++ ret = -EBUSY;
++ break;
++ }
++ prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
++ TASK_INTERRUPTIBLE);
++ ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
++ finish_wait(&ctx->cq_wait, &iowq.wq);
++ cond_resched();
++ } while (ret > 0);
++
++ restore_saved_sigmask_unless(ret == -EINTR);
++
++ return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
++}
++
++static void io_free_page_table(void **table, size_t size)
++{
++ unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
++
++ for (i = 0; i < nr_tables; i++)
++ kfree(table[i]);
++ kfree(table);
++}
++
++static void **io_alloc_page_table(size_t size)
++{
++ unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
++ size_t init_size = size;
++ void **table;
++
++ table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT);
++ if (!table)
++ return NULL;
++
++ for (i = 0; i < nr_tables; i++) {
++ unsigned int this_size = min_t(size_t, size, PAGE_SIZE);
++
++ table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT);
++ if (!table[i]) {
++ io_free_page_table(table, init_size);
++ return NULL;
++ }
++ size -= this_size;
++ }
++ return table;
++}
++
++static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
++{
++ percpu_ref_exit(&ref_node->refs);
++ kfree(ref_node);
++}
++
++static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
++{
++ struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
++ struct io_ring_ctx *ctx = node->rsrc_data->ctx;
++ unsigned long flags;
++ bool first_add = false;
++ unsigned long delay = HZ;
++
++ spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
++ node->done = true;
++
++ /* if we are mid-quiesce then do not delay */
++ if (node->rsrc_data->quiesce)
++ delay = 0;
++
++ while (!list_empty(&ctx->rsrc_ref_list)) {
++ node = list_first_entry(&ctx->rsrc_ref_list,
++ struct io_rsrc_node, node);
++ /* recycle ref nodes in order */
++ if (!node->done)
++ break;
++ list_del(&node->node);
++ first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
++ }
++ spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
++
++ if (first_add)
++ mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
++}
++
++static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
++{
++ struct io_rsrc_node *ref_node;
++
++ ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
++ if (!ref_node)
++ return NULL;
++
++ if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
++ 0, GFP_KERNEL)) {
++ kfree(ref_node);
++ return NULL;
++ }
++ INIT_LIST_HEAD(&ref_node->node);
++ INIT_LIST_HEAD(&ref_node->rsrc_list);
++ ref_node->done = false;
++ return ref_node;
++}
++
++static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
++ struct io_rsrc_data *data_to_kill)
++{
++ WARN_ON_ONCE(!ctx->rsrc_backup_node);
++ WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node);
++
++ if (data_to_kill) {
++ struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
++
++ rsrc_node->rsrc_data = data_to_kill;
++ spin_lock_irq(&ctx->rsrc_ref_lock);
++ list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
++ spin_unlock_irq(&ctx->rsrc_ref_lock);
++
++ atomic_inc(&data_to_kill->refs);
++ percpu_ref_kill(&rsrc_node->refs);
++ ctx->rsrc_node = NULL;
++ }
++
++ if (!ctx->rsrc_node) {
++ ctx->rsrc_node = ctx->rsrc_backup_node;
++ ctx->rsrc_backup_node = NULL;
++ }
++}
++
++static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
++{
++ if (ctx->rsrc_backup_node)
++ return 0;
++ ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx);
++ return ctx->rsrc_backup_node ? 0 : -ENOMEM;
++}
++
++static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx)
++{
++ int ret;
++
++ /* As we may drop ->uring_lock, other task may have started quiesce */
++ if (data->quiesce)
++ return -ENXIO;
++
++ data->quiesce = true;
++ do {
++ ret = io_rsrc_node_switch_start(ctx);
++ if (ret)
++ break;
++ io_rsrc_node_switch(ctx, data);
++
++ /* kill initial ref, already quiesced if zero */
++ if (atomic_dec_and_test(&data->refs))
++ break;
++ mutex_unlock(&ctx->uring_lock);
++ flush_delayed_work(&ctx->rsrc_put_work);
++ ret = wait_for_completion_interruptible(&data->done);
++ if (!ret) {
++ mutex_lock(&ctx->uring_lock);
++ if (atomic_read(&data->refs) > 0) {
++ /*
++ * it has been revived by another thread while
++ * we were unlocked
++ */
++ mutex_unlock(&ctx->uring_lock);
++ } else {
++ break;
++ }
++ }
++
++ atomic_inc(&data->refs);
++ /* wait for all works potentially completing data->done */
++ flush_delayed_work(&ctx->rsrc_put_work);
++ reinit_completion(&data->done);
++
++ ret = io_run_task_work_sig();
++ mutex_lock(&ctx->uring_lock);
++ } while (ret >= 0);
++ data->quiesce = false;
++
++ return ret;
++}
++
++static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx)
++{
++ unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK;
++ unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT;
++
++ return &data->tags[table_idx][off];
++}
++
++static void io_rsrc_data_free(struct io_rsrc_data *data)
++{
++ size_t size = data->nr * sizeof(data->tags[0][0]);
++
++ if (data->tags)
++ io_free_page_table((void **)data->tags, size);
++ kfree(data);
++}
++
++static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
++ u64 __user *utags, unsigned nr,
++ struct io_rsrc_data **pdata)
++{
++ struct io_rsrc_data *data;
++ int ret = -ENOMEM;
++ unsigned i;
++
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
++ if (!data)
++ return -ENOMEM;
++ data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0]));
++ if (!data->tags) {
++ kfree(data);
++ return -ENOMEM;
++ }
++
++ data->nr = nr;
++ data->ctx = ctx;
++ data->do_put = do_put;
++ if (utags) {
++ ret = -EFAULT;
++ for (i = 0; i < nr; i++) {
++ u64 *tag_slot = io_get_tag_slot(data, i);
++
++ if (copy_from_user(tag_slot, &utags[i],
++ sizeof(*tag_slot)))
++ goto fail;
++ }
++ }
++
++ atomic_set(&data->refs, 1);
++ init_completion(&data->done);
++ *pdata = data;
++ return 0;
++fail:
++ io_rsrc_data_free(data);
++ return ret;
++}
++
++static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
++{
++ table->files = kvcalloc(nr_files, sizeof(table->files[0]),
++ GFP_KERNEL_ACCOUNT);
++ return !!table->files;
++}
++
++static void io_free_file_tables(struct io_file_table *table)
++{
++ kvfree(table->files);
++ table->files = NULL;
++}
++
++static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
++{
++#if defined(CONFIG_UNIX)
++ if (ctx->ring_sock) {
++ struct sock *sock = ctx->ring_sock->sk;
++ struct sk_buff *skb;
++
++ while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
++ kfree_skb(skb);
++ }
++#else
++ int i;
++
++ for (i = 0; i < ctx->nr_user_files; i++) {
++ struct file *file;
++
++ file = io_file_from_index(ctx, i);
++ if (file)
++ fput(file);
++ }
++#endif
++ io_free_file_tables(&ctx->file_table);
++ io_rsrc_data_free(ctx->file_data);
++ ctx->file_data = NULL;
++ ctx->nr_user_files = 0;
++}
++
++static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
++{
++ unsigned nr = ctx->nr_user_files;
++ int ret;
++
++ if (!ctx->file_data)
++ return -ENXIO;
++
++ /*
++ * Quiesce may unlock ->uring_lock, and while it's not held
++ * prevent new requests using the table.
++ */
++ ctx->nr_user_files = 0;
++ ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
++ ctx->nr_user_files = nr;
++ if (!ret)
++ __io_sqe_files_unregister(ctx);
++ return ret;
++}
++
++static void io_sq_thread_unpark(struct io_sq_data *sqd)
++ __releases(&sqd->lock)
++{
++ WARN_ON_ONCE(sqd->thread == current);
++
++ /*
++ * Do the dance but not conditional clear_bit() because it'd race with
++ * other threads incrementing park_pending and setting the bit.
++ */
++ clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
++ if (atomic_dec_return(&sqd->park_pending))
++ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
++ mutex_unlock(&sqd->lock);
++}
++
++static void io_sq_thread_park(struct io_sq_data *sqd)
++ __acquires(&sqd->lock)
++{
++ WARN_ON_ONCE(sqd->thread == current);
++
++ atomic_inc(&sqd->park_pending);
++ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
++ mutex_lock(&sqd->lock);
++ if (sqd->thread)
++ wake_up_process(sqd->thread);
++}
++
++static void io_sq_thread_stop(struct io_sq_data *sqd)
++{
++ WARN_ON_ONCE(sqd->thread == current);
++ WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
++
++ set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
++ mutex_lock(&sqd->lock);
++ if (sqd->thread)
++ wake_up_process(sqd->thread);
++ mutex_unlock(&sqd->lock);
++ wait_for_completion(&sqd->exited);
++}
++
++static void io_put_sq_data(struct io_sq_data *sqd)
++{
++ if (refcount_dec_and_test(&sqd->refs)) {
++ WARN_ON_ONCE(atomic_read(&sqd->park_pending));
++
++ io_sq_thread_stop(sqd);
++ kfree(sqd);
++ }
++}
++
++static void io_sq_thread_finish(struct io_ring_ctx *ctx)
++{
++ struct io_sq_data *sqd = ctx->sq_data;
++
++ if (sqd) {
++ io_sq_thread_park(sqd);
++ list_del_init(&ctx->sqd_list);
++ io_sqd_update_thread_idle(sqd);
++ io_sq_thread_unpark(sqd);
++
++ io_put_sq_data(sqd);
++ ctx->sq_data = NULL;
++ }
++}
++
++static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
++{
++ struct io_ring_ctx *ctx_attach;
++ struct io_sq_data *sqd;
++ struct fd f;
++
++ f = fdget(p->wq_fd);
++ if (!f.file)
++ return ERR_PTR(-ENXIO);
++ if (f.file->f_op != &io_uring_fops) {
++ fdput(f);
++ return ERR_PTR(-EINVAL);
++ }
++
++ ctx_attach = f.file->private_data;
++ sqd = ctx_attach->sq_data;
++ if (!sqd) {
++ fdput(f);
++ return ERR_PTR(-EINVAL);
++ }
++ if (sqd->task_tgid != current->tgid) {
++ fdput(f);
++ return ERR_PTR(-EPERM);
++ }
++
++ refcount_inc(&sqd->refs);
++ fdput(f);
++ return sqd;
++}
++
++static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
++ bool *attached)
++{
++ struct io_sq_data *sqd;
++
++ *attached = false;
++ if (p->flags & IORING_SETUP_ATTACH_WQ) {
++ sqd = io_attach_sq_data(p);
++ if (!IS_ERR(sqd)) {
++ *attached = true;
++ return sqd;
++ }
++ /* fall through for EPERM case, setup new sqd/task */
++ if (PTR_ERR(sqd) != -EPERM)
++ return sqd;
++ }
++
++ sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
++ if (!sqd)
++ return ERR_PTR(-ENOMEM);
++
++ atomic_set(&sqd->park_pending, 0);
++ refcount_set(&sqd->refs, 1);
++ INIT_LIST_HEAD(&sqd->ctx_list);
++ mutex_init(&sqd->lock);
++ init_waitqueue_head(&sqd->wait);
++ init_completion(&sqd->exited);
++ return sqd;
++}
++
++#if defined(CONFIG_UNIX)
++/*
++ * Ensure the UNIX gc is aware of our file set, so we are certain that
++ * the io_uring can be safely unregistered on process exit, even if we have
++ * loops in the file referencing.
++ */
++static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
++{
++ struct sock *sk = ctx->ring_sock->sk;
++ struct scm_fp_list *fpl;
++ struct sk_buff *skb;
++ int i, nr_files;
++
++ fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
++ if (!fpl)
++ return -ENOMEM;
++
++ skb = alloc_skb(0, GFP_KERNEL);
++ if (!skb) {
++ kfree(fpl);
++ return -ENOMEM;
++ }
++
++ skb->sk = sk;
++ skb->scm_io_uring = 1;
++
++ nr_files = 0;
++ fpl->user = get_uid(current_user());
++ for (i = 0; i < nr; i++) {
++ struct file *file = io_file_from_index(ctx, i + offset);
++
++ if (!file)
++ continue;
++ fpl->fp[nr_files] = get_file(file);
++ unix_inflight(fpl->user, fpl->fp[nr_files]);
++ nr_files++;
++ }
++
++ if (nr_files) {
++ fpl->max = SCM_MAX_FD;
++ fpl->count = nr_files;
++ UNIXCB(skb).fp = fpl;
++ skb->destructor = unix_destruct_scm;
++ refcount_add(skb->truesize, &sk->sk_wmem_alloc);
++ skb_queue_head(&sk->sk_receive_queue, skb);
++
++ for (i = 0; i < nr; i++) {
++ struct file *file = io_file_from_index(ctx, i + offset);
++
++ if (file)
++ fput(file);
++ }
++ } else {
++ kfree_skb(skb);
++ free_uid(fpl->user);
++ kfree(fpl);
++ }
++
++ return 0;
++}
++
++/*
++ * If UNIX sockets are enabled, fd passing can cause a reference cycle which
++ * causes regular reference counting to break down. We rely on the UNIX
++ * garbage collection to take care of this problem for us.
++ */
++static int io_sqe_files_scm(struct io_ring_ctx *ctx)
++{
++ unsigned left, total;
++ int ret = 0;
++
++ total = 0;
++ left = ctx->nr_user_files;
++ while (left) {
++ unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
++
++ ret = __io_sqe_files_scm(ctx, this_files, total);
++ if (ret)
++ break;
++ left -= this_files;
++ total += this_files;
++ }
++
++ if (!ret)
++ return 0;
++
++ while (total < ctx->nr_user_files) {
++ struct file *file = io_file_from_index(ctx, total);
++
++ if (file)
++ fput(file);
++ total++;
++ }
++
++ return ret;
++}
++#else
++static int io_sqe_files_scm(struct io_ring_ctx *ctx)
++{
++ return 0;
++}
++#endif
++
++static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
++{
++ struct file *file = prsrc->file;
++#if defined(CONFIG_UNIX)
++ struct sock *sock = ctx->ring_sock->sk;
++ struct sk_buff_head list, *head = &sock->sk_receive_queue;
++ struct sk_buff *skb;
++ int i;
++
++ __skb_queue_head_init(&list);
++
++ /*
++ * Find the skb that holds this file in its SCM_RIGHTS. When found,
++ * remove this entry and rearrange the file array.
++ */
++ skb = skb_dequeue(head);
++ while (skb) {
++ struct scm_fp_list *fp;
++
++ fp = UNIXCB(skb).fp;
++ for (i = 0; i < fp->count; i++) {
++ int left;
++
++ if (fp->fp[i] != file)
++ continue;
++
++ unix_notinflight(fp->user, fp->fp[i]);
++ left = fp->count - 1 - i;
++ if (left) {
++ memmove(&fp->fp[i], &fp->fp[i + 1],
++ left * sizeof(struct file *));
++ }
++ fp->count--;
++ if (!fp->count) {
++ kfree_skb(skb);
++ skb = NULL;
++ } else {
++ __skb_queue_tail(&list, skb);
++ }
++ fput(file);
++ file = NULL;
++ break;
++ }
++
++ if (!file)
++ break;
++
++ __skb_queue_tail(&list, skb);
++
++ skb = skb_dequeue(head);
++ }
++
++ if (skb_peek(&list)) {
++ spin_lock_irq(&head->lock);
++ while ((skb = __skb_dequeue(&list)) != NULL)
++ __skb_queue_tail(head, skb);
++ spin_unlock_irq(&head->lock);
++ }
++#else
++ fput(file);
++#endif
++}
++
++static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
++{
++ struct io_rsrc_data *rsrc_data = ref_node->rsrc_data;
++ struct io_ring_ctx *ctx = rsrc_data->ctx;
++ struct io_rsrc_put *prsrc, *tmp;
++
++ list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) {
++ list_del(&prsrc->list);
++
++ if (prsrc->tag) {
++ bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
++
++ io_ring_submit_lock(ctx, lock_ring);
++ spin_lock(&ctx->completion_lock);
++ io_fill_cqe_aux(ctx, prsrc->tag, 0, 0);
++ io_commit_cqring(ctx);
++ spin_unlock(&ctx->completion_lock);
++ io_cqring_ev_posted(ctx);
++ io_ring_submit_unlock(ctx, lock_ring);
++ }
++
++ rsrc_data->do_put(ctx, prsrc);
++ kfree(prsrc);
++ }
++
++ io_rsrc_node_destroy(ref_node);
++ if (atomic_dec_and_test(&rsrc_data->refs))
++ complete(&rsrc_data->done);
++}
++
++static void io_rsrc_put_work(struct work_struct *work)
++{
++ struct io_ring_ctx *ctx;
++ struct llist_node *node;
++
++ ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work);
++ node = llist_del_all(&ctx->rsrc_put_llist);
++
++ while (node) {
++ struct io_rsrc_node *ref_node;
++ struct llist_node *next = node->next;
++
++ ref_node = llist_entry(node, struct io_rsrc_node, llist);
++ __io_rsrc_put_work(ref_node);
++ node = next;
++ }
++}
++
++static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
++ unsigned nr_args, u64 __user *tags)
++{
++ __s32 __user *fds = (__s32 __user *) arg;
++ struct file *file;
++ int fd, ret;
++ unsigned i;
++
++ if (ctx->file_data)
++ return -EBUSY;
++ if (!nr_args)
++ return -EINVAL;
++ if (nr_args > IORING_MAX_FIXED_FILES)
++ return -EMFILE;
++ if (nr_args > rlimit(RLIMIT_NOFILE))
++ return -EMFILE;
++ ret = io_rsrc_node_switch_start(ctx);
++ if (ret)
++ return ret;
++ ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args,
++ &ctx->file_data);
++ if (ret)
++ return ret;
++
++ ret = -ENOMEM;
++ if (!io_alloc_file_tables(&ctx->file_table, nr_args))
++ goto out_free;
++
++ for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
++ if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
++ ret = -EFAULT;
++ goto out_fput;
++ }
++ /* allow sparse sets */
++ if (fd == -1) {
++ ret = -EINVAL;
++ if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
++ goto out_fput;
++ continue;
++ }
++
++ file = fget(fd);
++ ret = -EBADF;
++ if (unlikely(!file))
++ goto out_fput;
++
++ /*
++ * Don't allow io_uring instances to be registered. If UNIX
++ * isn't enabled, then this causes a reference cycle and this
++ * instance can never get freed. If UNIX is enabled we'll
++ * handle it just fine, but there's still no point in allowing
++ * a ring fd as it doesn't support regular read/write anyway.
++ */
++ if (file->f_op == &io_uring_fops) {
++ fput(file);
++ goto out_fput;
++ }
++ io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
++ }
++
++ ret = io_sqe_files_scm(ctx);
++ if (ret) {
++ __io_sqe_files_unregister(ctx);
++ return ret;
++ }
++
++ io_rsrc_node_switch(ctx, NULL);
++ return ret;
++out_fput:
++ for (i = 0; i < ctx->nr_user_files; i++) {
++ file = io_file_from_index(ctx, i);
++ if (file)
++ fput(file);
++ }
++ io_free_file_tables(&ctx->file_table);
++ ctx->nr_user_files = 0;
++out_free:
++ io_rsrc_data_free(ctx->file_data);
++ ctx->file_data = NULL;
++ return ret;
++}
++
++static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
++ int index)
++{
++#if defined(CONFIG_UNIX)
++ struct sock *sock = ctx->ring_sock->sk;
++ struct sk_buff_head *head = &sock->sk_receive_queue;
++ struct sk_buff *skb;
++
++ /*
++ * See if we can merge this file into an existing skb SCM_RIGHTS
++ * file set. If there's no room, fall back to allocating a new skb
++ * and filling it in.
++ */
++ spin_lock_irq(&head->lock);
++ skb = skb_peek(head);
++ if (skb) {
++ struct scm_fp_list *fpl = UNIXCB(skb).fp;
++
++ if (fpl->count < SCM_MAX_FD) {
++ __skb_unlink(skb, head);
++ spin_unlock_irq(&head->lock);
++ fpl->fp[fpl->count] = get_file(file);
++ unix_inflight(fpl->user, fpl->fp[fpl->count]);
++ fpl->count++;
++ spin_lock_irq(&head->lock);
++ __skb_queue_head(head, skb);
++ } else {
++ skb = NULL;
++ }
++ }
++ spin_unlock_irq(&head->lock);
++
++ if (skb) {
++ fput(file);
++ return 0;
++ }
++
++ return __io_sqe_files_scm(ctx, 1, index);
++#else
++ return 0;
++#endif
++}
++
++static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
++ struct io_rsrc_node *node, void *rsrc)
++{
++ u64 *tag_slot = io_get_tag_slot(data, idx);
++ struct io_rsrc_put *prsrc;
++
++ prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
++ if (!prsrc)
++ return -ENOMEM;
++
++ prsrc->tag = *tag_slot;
++ *tag_slot = 0;
++ prsrc->rsrc = rsrc;
++ list_add(&prsrc->list, &node->rsrc_list);
++ return 0;
++}
++
++static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
++ unsigned int issue_flags, u32 slot_index)
++{
++ struct io_ring_ctx *ctx = req->ctx;
++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
++ bool needs_switch = false;
++ struct io_fixed_file *file_slot;
++ int ret = -EBADF;
++
++ io_ring_submit_lock(ctx, !force_nonblock);
++ if (file->f_op == &io_uring_fops)
++ goto err;
++ ret = -ENXIO;
++ if (!ctx->file_data)
++ goto err;
++ ret = -EINVAL;
++ if (slot_index >= ctx->nr_user_files)
++ goto err;
++
++ slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
++ file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
++
++ if (file_slot->file_ptr) {
++ struct file *old_file;
++
++ ret = io_rsrc_node_switch_start(ctx);
++ if (ret)
++ goto err;
++
++ old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
++ ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
++ ctx->rsrc_node, old_file);
++ if (ret)
++ goto err;
++ file_slot->file_ptr = 0;
++ needs_switch = true;
++ }
++
++ *io_get_tag_slot(ctx->file_data, slot_index) = 0;
++ io_fixed_file_set(file_slot, file);
++ ret = io_sqe_file_register(ctx, file, slot_index);
++ if (ret) {
++ file_slot->file_ptr = 0;
++ goto err;
++ }
++
++ ret = 0;
++err:
++ if (needs_switch)
++ io_rsrc_node_switch(ctx, ctx->file_data);
++ io_ring_submit_unlock(ctx, !force_nonblock);
++ if (ret)
++ fput(file);
++ return ret;
++}
++
++static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
++{
++ unsigned int offset = req->close.file_slot - 1;
++ struct io_ring_ctx *ctx = req->ctx;
++ struct io_fixed_file *file_slot;
++ struct file *file;
++ int ret;
++
++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++ ret = -ENXIO;
++ if (unlikely(!ctx->file_data))
++ goto out;
++ ret = -EINVAL;
++ if (offset >= ctx->nr_user_files)
++ goto out;
++ ret = io_rsrc_node_switch_start(ctx);
++ if (ret)
++ goto out;
++
++ offset = array_index_nospec(offset, ctx->nr_user_files);
++ file_slot = io_fixed_file_slot(&ctx->file_table, offset);
++ ret = -EBADF;
++ if (!file_slot->file_ptr)
++ goto out;
++
++ file = (struct file *)(file_slot->file_ptr & FFS_MASK);
++ ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
++ if (ret)
++ goto out;
++
++ file_slot->file_ptr = 0;
++ io_rsrc_node_switch(ctx, ctx->file_data);
++ ret = 0;
++out:
++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
++ return ret;
++}
++
++static int __io_sqe_files_update(struct io_ring_ctx *ctx,
++ struct io_uring_rsrc_update2 *up,
++ unsigned nr_args)
++{
++ u64 __user *tags = u64_to_user_ptr(up->tags);
++ __s32 __user *fds = u64_to_user_ptr(up->data);
++ struct io_rsrc_data *data = ctx->file_data;
++ struct io_fixed_file *file_slot;
++ struct file *file;
++ int fd, i, err = 0;
++ unsigned int done;
++ bool needs_switch = false;
++
++ if (!ctx->file_data)
++ return -ENXIO;
++ if (up->offset + nr_args > ctx->nr_user_files)
++ return -EINVAL;
++
++ for (done = 0; done < nr_args; done++) {
++ u64 tag = 0;
++
++ if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) ||
++ copy_from_user(&fd, &fds[done], sizeof(fd))) {
++ err = -EFAULT;
++ break;
++ }
++ if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) {
++ err = -EINVAL;
++ break;
++ }
++ if (fd == IORING_REGISTER_FILES_SKIP)
++ continue;
++
++ i = array_index_nospec(up->offset + done, ctx->nr_user_files);
++ file_slot = io_fixed_file_slot(&ctx->file_table, i);
++
++ if (file_slot->file_ptr) {
++ file = (struct file *)(file_slot->file_ptr & FFS_MASK);
++ err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file);
++ if (err)
++ break;
++ file_slot->file_ptr = 0;
++ needs_switch = true;
++ }
++ if (fd != -1) {
++ file = fget(fd);
++ if (!file) {
++ err = -EBADF;
++ break;
++ }
++ /*
++ * Don't allow io_uring instances to be registered. If
++ * UNIX isn't enabled, then this causes a reference
++ * cycle and this instance can never get freed. If UNIX
++ * is enabled we'll handle it just fine, but there's
++ * still no point in allowing a ring fd as it doesn't
++ * support regular read/write anyway.
++ */
++ if (file->f_op == &io_uring_fops) {
++ fput(file);
++ err = -EBADF;
++ break;
++ }
++ *io_get_tag_slot(data, i) = tag;
++ io_fixed_file_set(file_slot, file);
++ err = io_sqe_file_register(ctx, file, i);
++ if (err) {
++ file_slot->file_ptr = 0;
++ fput(file);
++ break;
++ }
++ }
++ }
++
++ if (needs_switch)
++ io_rsrc_node_switch(ctx, data);
++ return done ? done : err;
++}
++
++static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
++ struct task_struct *task)
++{
++ struct io_wq_hash *hash;
++ struct io_wq_data data;
++ unsigned int concurrency;
++
++ mutex_lock(&ctx->uring_lock);
++ hash = ctx->hash_map;
++ if (!hash) {
++ hash = kzalloc(sizeof(*hash), GFP_KERNEL);
++ if (!hash) {
++ mutex_unlock(&ctx->uring_lock);
++ return ERR_PTR(-ENOMEM);
++ }
++ refcount_set(&hash->refs, 1);
++ init_waitqueue_head(&hash->wait);
++ ctx->hash_map = hash;
++ }
++ mutex_unlock(&ctx->uring_lock);
++
++ data.hash = hash;
++ data.task = task;
++ data.free_work = io_wq_free_work;
++ data.do_work = io_wq_submit_work;
++
++ /* Do QD, or 4 * CPUS, whatever is smallest */
++ concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
++
++ return io_wq_create(concurrency, &data);
++}
++
++static int io_uring_alloc_task_context(struct task_struct *task,
++ struct io_ring_ctx *ctx)
++{
++ struct io_uring_task *tctx;
++ int ret;
++
++ tctx = kzalloc(sizeof(*tctx), GFP_KERNEL);
++ if (unlikely(!tctx))
++ return -ENOMEM;
++
++ ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
++ if (unlikely(ret)) {
++ kfree(tctx);
++ return ret;
++ }
++
++ tctx->io_wq = io_init_wq_offload(ctx, task);
++ if (IS_ERR(tctx->io_wq)) {
++ ret = PTR_ERR(tctx->io_wq);
++ percpu_counter_destroy(&tctx->inflight);
++ kfree(tctx);
++ return ret;
++ }
++
++ xa_init(&tctx->xa);
++ init_waitqueue_head(&tctx->wait);
++ atomic_set(&tctx->in_idle, 0);
++ atomic_set(&tctx->inflight_tracked, 0);
++ task->io_uring = tctx;
++ spin_lock_init(&tctx->task_lock);
++ INIT_WQ_LIST(&tctx->task_list);
++ init_task_work(&tctx->task_work, tctx_task_work);
++ return 0;
++}
++
++void __io_uring_free(struct task_struct *tsk)
++{
++ struct io_uring_task *tctx = tsk->io_uring;
++
++ WARN_ON_ONCE(!xa_empty(&tctx->xa));
++ WARN_ON_ONCE(tctx->io_wq);
++ WARN_ON_ONCE(tctx->cached_refs);
++
++ percpu_counter_destroy(&tctx->inflight);
++ kfree(tctx);
++ tsk->io_uring = NULL;
++}
++
++static int io_sq_offload_create(struct io_ring_ctx *ctx,
++ struct io_uring_params *p)
++{
++ int ret;
++
++ /* Retain compatibility with failing for an invalid attach attempt */
++ if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) ==
++ IORING_SETUP_ATTACH_WQ) {
++ struct fd f;
++
++ f = fdget(p->wq_fd);
++ if (!f.file)
++ return -ENXIO;
++ if (f.file->f_op != &io_uring_fops) {
++ fdput(f);
++ return -EINVAL;
++ }
++ fdput(f);
++ }
++ if (ctx->flags & IORING_SETUP_SQPOLL) {
++ struct task_struct *tsk;
++ struct io_sq_data *sqd;
++ bool attached;
++
++ sqd = io_get_sq_data(p, &attached);
++ if (IS_ERR(sqd)) {
++ ret = PTR_ERR(sqd);
++ goto err;
++ }
++
++ ctx->sq_creds = get_current_cred();
++ ctx->sq_data = sqd;
++ ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
++ if (!ctx->sq_thread_idle)
++ ctx->sq_thread_idle = HZ;
++
++ io_sq_thread_park(sqd);
++ list_add(&ctx->sqd_list, &sqd->ctx_list);
++ io_sqd_update_thread_idle(sqd);
++ /* don't attach to a dying SQPOLL thread, would be racy */
++ ret = (attached && !sqd->thread) ? -ENXIO : 0;
++ io_sq_thread_unpark(sqd);
++
++ if (ret < 0)
++ goto err;
++ if (attached)
++ return 0;
++
++ if (p->flags & IORING_SETUP_SQ_AFF) {
++ int cpu = p->sq_thread_cpu;
++
++ ret = -EINVAL;
++ if (cpu >= nr_cpu_ids || !cpu_online(cpu))
++ goto err_sqpoll;
++ sqd->sq_cpu = cpu;
++ } else {
++ sqd->sq_cpu = -1;
++ }
++
++ sqd->task_pid = current->pid;
++ sqd->task_tgid = current->tgid;
++ tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
++ if (IS_ERR(tsk)) {
++ ret = PTR_ERR(tsk);
++ goto err_sqpoll;
++ }
++
++ sqd->thread = tsk;
++ ret = io_uring_alloc_task_context(tsk, ctx);
++ wake_up_new_task(tsk);
++ if (ret)
++ goto err;
++ } else if (p->flags & IORING_SETUP_SQ_AFF) {
++ /* Can't have SQ_AFF without SQPOLL */
++ ret = -EINVAL;
++ goto err;
++ }
++
++ return 0;
++err_sqpoll:
++ complete(&ctx->sq_data->exited);
++err:
++ io_sq_thread_finish(ctx);
++ return ret;
++}
++
++static inline void __io_unaccount_mem(struct user_struct *user,
++ unsigned long nr_pages)
++{
++ atomic_long_sub(nr_pages, &user->locked_vm);
++}
++
++static inline int __io_account_mem(struct user_struct *user,
++ unsigned long nr_pages)
++{
++ unsigned long page_limit, cur_pages, new_pages;
++
++ /* Don't allow more pages than we can safely lock */
++ page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
++
++ do {
++ cur_pages = atomic_long_read(&user->locked_vm);
++ new_pages = cur_pages + nr_pages;
++ if (new_pages > page_limit)
++ return -ENOMEM;
++ } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
++ new_pages) != cur_pages);
++
++ return 0;
++}
++
++static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
++{
++ if (ctx->user)
++ __io_unaccount_mem(ctx->user, nr_pages);
++
++ if (ctx->mm_account)
++ atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
++}
++
++static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
++{
++ int ret;
++
++ if (ctx->user) {
++ ret = __io_account_mem(ctx->user, nr_pages);
++ if (ret)
++ return ret;
++ }
++
++ if (ctx->mm_account)
++ atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
++
++ return 0;
++}
++
++static void io_mem_free(void *ptr)
++{
++ struct page *page;
++
++ if (!ptr)
++ return;
++
++ page = virt_to_head_page(ptr);
++ if (put_page_testzero(page))
++ free_compound_page(page);
++}
++
++static void *io_mem_alloc(size_t size)
++{
++ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
++
++ return (void *) __get_free_pages(gfp, get_order(size));
++}
++
++static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
++ size_t *sq_offset)
++{
++ struct io_rings *rings;
++ size_t off, sq_array_size;
++
++ off = struct_size(rings, cqes, cq_entries);
++ if (off == SIZE_MAX)
++ return SIZE_MAX;
++
++#ifdef CONFIG_SMP
++ off = ALIGN(off, SMP_CACHE_BYTES);
++ if (off == 0)
++ return SIZE_MAX;
++#endif
++
++ if (sq_offset)
++ *sq_offset = off;
++
++ sq_array_size = array_size(sizeof(u32), sq_entries);
++ if (sq_array_size == SIZE_MAX)
++ return SIZE_MAX;
++
++ if (check_add_overflow(off, sq_array_size, &off))
++ return SIZE_MAX;
++
++ return off;
++}
++
++static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot)
++{
++ struct io_mapped_ubuf *imu = *slot;
++ unsigned int i;
++
++ if (imu != ctx->dummy_ubuf) {
++ for (i = 0; i < imu->nr_bvecs; i++)
++ unpin_user_page(imu->bvec[i].bv_page);
++ if (imu->acct_pages)
++ io_unaccount_mem(ctx, imu->acct_pages);
++ kvfree(imu);
++ }
++ *slot = NULL;
++}
++
++static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
++{
++ io_buffer_unmap(ctx, &prsrc->buf);
++ prsrc->buf = NULL;
++}
++
++static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
++{
++ unsigned int i;
++
++ for (i = 0; i < ctx->nr_user_bufs; i++)
++ io_buffer_unmap(ctx, &ctx->user_bufs[i]);
++ kfree(ctx->user_bufs);
++ io_rsrc_data_free(ctx->buf_data);
++ ctx->user_bufs = NULL;
++ ctx->buf_data = NULL;
++ ctx->nr_user_bufs = 0;
++}
++
++static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
++{
++ unsigned nr = ctx->nr_user_bufs;
++ int ret;
++
++ if (!ctx->buf_data)
++ return -ENXIO;
++
++ /*
++ * Quiesce may unlock ->uring_lock, and while it's not held
++ * prevent new requests using the table.
++ */
++ ctx->nr_user_bufs = 0;
++ ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
++ ctx->nr_user_bufs = nr;
++ if (!ret)
++ __io_sqe_buffers_unregister(ctx);
++ return ret;
++}
++
++static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
++ void __user *arg, unsigned index)
++{
++ struct iovec __user *src;
++
++#ifdef CONFIG_COMPAT
++ if (ctx->compat) {
++ struct compat_iovec __user *ciovs;
++ struct compat_iovec ciov;
++
++ ciovs = (struct compat_iovec __user *) arg;
++ if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
++ return -EFAULT;
++
++ dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
++ dst->iov_len = ciov.iov_len;
++ return 0;
++ }
++#endif
++ src = (struct iovec __user *) arg;
++ if (copy_from_user(dst, &src[index], sizeof(*dst)))
++ return -EFAULT;
++ return 0;
++}
++
++/*
++ * Not super efficient, but this is just a registration time. And we do cache
++ * the last compound head, so generally we'll only do a full search if we don't
++ * match that one.
++ *
++ * We check if the given compound head page has already been accounted, to
++ * avoid double accounting it. This allows us to account the full size of the
++ * page, not just the constituent pages of a huge page.
++ */
++static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
++ int nr_pages, struct page *hpage)
++{
++ int i, j;
++
++ /* check current page array */
++ for (i = 0; i < nr_pages; i++) {
++ if (!PageCompound(pages[i]))
++ continue;
++ if (compound_head(pages[i]) == hpage)
++ return true;
++ }
++
++ /* check previously registered pages */
++ for (i = 0; i < ctx->nr_user_bufs; i++) {
++ struct io_mapped_ubuf *imu = ctx->user_bufs[i];
++
++ for (j = 0; j < imu->nr_bvecs; j++) {
++ if (!PageCompound(imu->bvec[j].bv_page))
++ continue;
++ if (compound_head(imu->bvec[j].bv_page) == hpage)
++ return true;
++ }
++ }
++
++ return false;
++}
++
++static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
++ int nr_pages, struct io_mapped_ubuf *imu,
++ struct page **last_hpage)
++{
++ int i, ret;
++
++ imu->acct_pages = 0;
++ for (i = 0; i < nr_pages; i++) {
++ if (!PageCompound(pages[i])) {
++ imu->acct_pages++;
++ } else {
++ struct page *hpage;
++
++ hpage = compound_head(pages[i]);
++ if (hpage == *last_hpage)
++ continue;
++ *last_hpage = hpage;
++ if (headpage_already_acct(ctx, pages, i, hpage))
++ continue;
++ imu->acct_pages += page_size(hpage) >> PAGE_SHIFT;
++ }
++ }
++
++ if (!imu->acct_pages)
++ return 0;
++
++ ret = io_account_mem(ctx, imu->acct_pages);
++ if (ret)
++ imu->acct_pages = 0;
++ return ret;
++}
++
++static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
++ struct io_mapped_ubuf **pimu,
++ struct page **last_hpage)
++{
++ struct io_mapped_ubuf *imu = NULL;
++ struct vm_area_struct **vmas = NULL;
++ struct page **pages = NULL;
++ unsigned long off, start, end, ubuf;
++ size_t size;
++ int ret, pret, nr_pages, i;
++
++ if (!iov->iov_base) {
++ *pimu = ctx->dummy_ubuf;
++ return 0;
++ }
++
++ ubuf = (unsigned long) iov->iov_base;
++ end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
++ start = ubuf >> PAGE_SHIFT;
++ nr_pages = end - start;
++
++ *pimu = NULL;
++ ret = -ENOMEM;
++
++ pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
++ if (!pages)
++ goto done;
++
++ vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *),
++ GFP_KERNEL);
++ if (!vmas)
++ goto done;
++
++ imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
++ if (!imu)
++ goto done;
++
++ ret = 0;
++ mmap_read_lock(current->mm);
++ pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
++ pages, vmas);
++ if (pret == nr_pages) {
++ struct file *file = vmas[0]->vm_file;
++
++ /* don't support file backed memory */
++ for (i = 0; i < nr_pages; i++) {
++ if (vmas[i]->vm_file != file) {
++ ret = -EINVAL;
++ break;
++ }
++ if (!file)
++ continue;
++ if (!vma_is_shmem(vmas[i]) && !is_file_hugepages(file)) {
++ ret = -EOPNOTSUPP;
++ break;
++ }
++ }
++ } else {
++ ret = pret < 0 ? pret : -EFAULT;
++ }
++ mmap_read_unlock(current->mm);
++ if (ret) {
++ /*
++ * if we did partial map, or found file backed vmas,
++ * release any pages we did get
++ */
++ if (pret > 0)
++ unpin_user_pages(pages, pret);
++ goto done;
++ }
++
++ ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
++ if (ret) {
++ unpin_user_pages(pages, pret);
++ goto done;
++ }
++
++ off = ubuf & ~PAGE_MASK;
++ size = iov->iov_len;
++ for (i = 0; i < nr_pages; i++) {
++ size_t vec_len;
++
++ vec_len = min_t(size_t, size, PAGE_SIZE - off);
++ imu->bvec[i].bv_page = pages[i];
++ imu->bvec[i].bv_len = vec_len;
++ imu->bvec[i].bv_offset = off;
++ off = 0;
++ size -= vec_len;
++ }
++ /* store original address for later verification */
++ imu->ubuf = ubuf;
++ imu->ubuf_end = ubuf + iov->iov_len;
++ imu->nr_bvecs = nr_pages;
++ *pimu = imu;
++ ret = 0;
++done:
++ if (ret)
++ kvfree(imu);
++ kvfree(pages);
++ kvfree(vmas);
++ return ret;
++}
++
++static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args)
++{
++ ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL);
++ return ctx->user_bufs ? 0 : -ENOMEM;
++}
++
++static int io_buffer_validate(struct iovec *iov)
++{
++ unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
++
++ /*
++ * Don't impose further limits on the size and buffer
++ * constraints here, we'll -EINVAL later when IO is
++ * submitted if they are wrong.
++ */
++ if (!iov->iov_base)
++ return iov->iov_len ? -EFAULT : 0;
++ if (!iov->iov_len)
++ return -EFAULT;
++
++ /* arbitrary limit, but we need something */
++ if (iov->iov_len > SZ_1G)
++ return -EFAULT;
++
++ if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp))
++ return -EOVERFLOW;
++
++ return 0;
++}
++
++static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
++ unsigned int nr_args, u64 __user *tags)
++{
++ struct page *last_hpage = NULL;
++ struct io_rsrc_data *data;
++ int i, ret;
++ struct iovec iov;
++
++ if (ctx->user_bufs)
++ return -EBUSY;
++ if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
++ return -EINVAL;
++ ret = io_rsrc_node_switch_start(ctx);
++ if (ret)
++ return ret;
++ ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data);
++ if (ret)
++ return ret;
++ ret = io_buffers_map_alloc(ctx, nr_args);
++ if (ret) {
++ io_rsrc_data_free(data);
++ return ret;
++ }
++
++ for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
++ ret = io_copy_iov(ctx, &iov, arg, i);
++ if (ret)
++ break;
++ ret = io_buffer_validate(&iov);
++ if (ret)
++ break;
++ if (!iov.iov_base && *io_get_tag_slot(data, i)) {
++ ret = -EINVAL;
++ break;
++ }
++
++ ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
++ &last_hpage);
++ if (ret)
++ break;
++ }
++
++ WARN_ON_ONCE(ctx->buf_data);
++
++ ctx->buf_data = data;
++ if (ret)
++ __io_sqe_buffers_unregister(ctx);
++ else
++ io_rsrc_node_switch(ctx, NULL);
++ return ret;
++}
++
++static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
++ struct io_uring_rsrc_update2 *up,
++ unsigned int nr_args)
++{
++ u64 __user *tags = u64_to_user_ptr(up->tags);
++ struct iovec iov, __user *iovs = u64_to_user_ptr(up->data);
++ struct page *last_hpage = NULL;
++ bool needs_switch = false;
++ __u32 done;
++ int i, err;
++
++ if (!ctx->buf_data)
++ return -ENXIO;
++ if (up->offset + nr_args > ctx->nr_user_bufs)
++ return -EINVAL;
++
++ for (done = 0; done < nr_args; done++) {
++ struct io_mapped_ubuf *imu;
++ int offset = up->offset + done;
++ u64 tag = 0;
++
++ err = io_copy_iov(ctx, &iov, iovs, done);
++ if (err)
++ break;
++ if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) {
++ err = -EFAULT;
++ break;
++ }
++ err = io_buffer_validate(&iov);
++ if (err)
++ break;
++ if (!iov.iov_base && tag) {
++ err = -EINVAL;
++ break;
++ }
++ err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
++ if (err)
++ break;
++
++ i = array_index_nospec(offset, ctx->nr_user_bufs);
++ if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
++ err = io_queue_rsrc_removal(ctx->buf_data, i,
++ ctx->rsrc_node, ctx->user_bufs[i]);
++ if (unlikely(err)) {
++ io_buffer_unmap(ctx, &imu);
++ break;
++ }
++ ctx->user_bufs[i] = NULL;
++ needs_switch = true;
++ }
++
++ ctx->user_bufs[i] = imu;
++ *io_get_tag_slot(ctx->buf_data, offset) = tag;
++ }
++
++ if (needs_switch)
++ io_rsrc_node_switch(ctx, ctx->buf_data);
++ return done ? done : err;
++}
++
++static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
++{
++ __s32 __user *fds = arg;
++ int fd;
++
++ if (ctx->cq_ev_fd)
++ return -EBUSY;
++
++ if (copy_from_user(&fd, fds, sizeof(*fds)))
++ return -EFAULT;
++
++ ctx->cq_ev_fd = eventfd_ctx_fdget(fd);
++ if (IS_ERR(ctx->cq_ev_fd)) {
++ int ret = PTR_ERR(ctx->cq_ev_fd);
++
++ ctx->cq_ev_fd = NULL;
++ return ret;
++ }
++
++ return 0;
++}
++
++static int io_eventfd_unregister(struct io_ring_ctx *ctx)
++{
++ if (ctx->cq_ev_fd) {
++ eventfd_ctx_put(ctx->cq_ev_fd);
++ ctx->cq_ev_fd = NULL;
++ return 0;
++ }
++
++ return -ENXIO;
++}
++
++static void io_destroy_buffers(struct io_ring_ctx *ctx)
++{
++ struct io_buffer *buf;
++ unsigned long index;
++
++ xa_for_each(&ctx->io_buffers, index, buf)
++ __io_remove_buffers(ctx, buf, index, -1U);
++}
++
++static void io_req_cache_free(struct list_head *list)
++{
++ struct io_kiocb *req, *nxt;
++
++ list_for_each_entry_safe(req, nxt, list, inflight_entry) {
++ list_del(&req->inflight_entry);
++ kmem_cache_free(req_cachep, req);
++ }
++}
++
++static void io_req_caches_free(struct io_ring_ctx *ctx)
++{
++ struct io_submit_state *state = &ctx->submit_state;
++
++ mutex_lock(&ctx->uring_lock);
++
++ if (state->free_reqs) {
++ kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
++ state->free_reqs = 0;
++ }
++
++ io_flush_cached_locked_reqs(ctx, state);
++ io_req_cache_free(&state->free_list);
++ mutex_unlock(&ctx->uring_lock);
++}
++
++static void io_wait_rsrc_data(struct io_rsrc_data *data)
++{
++ if (data && !atomic_dec_and_test(&data->refs))
++ wait_for_completion(&data->done);
++}
++
++static void io_ring_ctx_free(struct io_ring_ctx *ctx)
++{
++ io_sq_thread_finish(ctx);
++
++ /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
++ io_wait_rsrc_data(ctx->buf_data);
++ io_wait_rsrc_data(ctx->file_data);
++
++ mutex_lock(&ctx->uring_lock);
++ if (ctx->buf_data)
++ __io_sqe_buffers_unregister(ctx);
++ if (ctx->file_data)
++ __io_sqe_files_unregister(ctx);
++ if (ctx->rings)
++ __io_cqring_overflow_flush(ctx, true);
++ mutex_unlock(&ctx->uring_lock);
++ io_eventfd_unregister(ctx);
++ io_destroy_buffers(ctx);
++ if (ctx->sq_creds)
++ put_cred(ctx->sq_creds);
++
++ /* there are no registered resources left, nobody uses it */
++ if (ctx->rsrc_node)
++ io_rsrc_node_destroy(ctx->rsrc_node);
++ if (ctx->rsrc_backup_node)
++ io_rsrc_node_destroy(ctx->rsrc_backup_node);
++ flush_delayed_work(&ctx->rsrc_put_work);
++
++ WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
++ WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
++
++#if defined(CONFIG_UNIX)
++ if (ctx->ring_sock) {
++ ctx->ring_sock->file = NULL; /* so that iput() is called */
++ sock_release(ctx->ring_sock);
++ }
++#endif
++ WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
++
++ if (ctx->mm_account) {
++ mmdrop(ctx->mm_account);
++ ctx->mm_account = NULL;
++ }
++
++ io_mem_free(ctx->rings);
++ io_mem_free(ctx->sq_sqes);
++
++ percpu_ref_exit(&ctx->refs);
++ free_uid(ctx->user);
++ io_req_caches_free(ctx);
++ if (ctx->hash_map)
++ io_wq_put_hash(ctx->hash_map);
++ kfree(ctx->cancel_hash);
++ kfree(ctx->dummy_ubuf);
++ kfree(ctx);
++}
++
++static __poll_t io_uring_poll(struct file *file, poll_table *wait)
++{
++ struct io_ring_ctx *ctx = file->private_data;
++ __poll_t mask = 0;
++
++ poll_wait(file, &ctx->poll_wait, wait);
++ /*
++ * synchronizes with barrier from wq_has_sleeper call in
++ * io_commit_cqring
++ */
++ smp_rmb();
++ if (!io_sqring_full(ctx))
++ mask |= EPOLLOUT | EPOLLWRNORM;
++
++ /*
++ * Don't flush cqring overflow list here, just do a simple check.
++ * Otherwise there could possible be ABBA deadlock:
++ * CPU0 CPU1
++ * ---- ----
++ * lock(&ctx->uring_lock);
++ * lock(&ep->mtx);
++ * lock(&ctx->uring_lock);
++ * lock(&ep->mtx);
++ *
++ * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
++ * pushs them to do the flush.
++ */
++ if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow))
++ mask |= EPOLLIN | EPOLLRDNORM;
++
++ return mask;
++}
++
++static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
++{
++ const struct cred *creds;
++
++ creds = xa_erase(&ctx->personalities, id);
++ if (creds) {
++ put_cred(creds);
++ return 0;
++ }
++
++ return -EINVAL;
++}
++
++struct io_tctx_exit {
++ struct callback_head task_work;
++ struct completion completion;
++ struct io_ring_ctx *ctx;
++};
++
++static void io_tctx_exit_cb(struct callback_head *cb)
++{
++ struct io_uring_task *tctx = current->io_uring;
++ struct io_tctx_exit *work;
++
++ work = container_of(cb, struct io_tctx_exit, task_work);
++ /*
++ * When @in_idle, we're in cancellation and it's racy to remove the
++ * node. It'll be removed by the end of cancellation, just ignore it.
++ * tctx can be NULL if the queueing of this task_work raced with
++ * work cancelation off the exec path.
++ */
++ if (tctx && !atomic_read(&tctx->in_idle))
++ io_uring_del_tctx_node((unsigned long)work->ctx);
++ complete(&work->completion);
++}
++
++static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
++{
++ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
++
++ return req->ctx == data;
++}
++
++static void io_ring_exit_work(struct work_struct *work)
++{
++ struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
++ unsigned long timeout = jiffies + HZ * 60 * 5;
++ unsigned long interval = HZ / 20;
++ struct io_tctx_exit exit;
++ struct io_tctx_node *node;
++ int ret;
++
++ /*
++ * If we're doing polled IO and end up having requests being
++ * submitted async (out-of-line), then completions can come in while
++ * we're waiting for refs to drop. We need to reap these manually,
++ * as nobody else will be looking for them.
++ */
++ do {
++ io_uring_try_cancel_requests(ctx, NULL, true);
++ if (ctx->sq_data) {
++ struct io_sq_data *sqd = ctx->sq_data;
++ struct task_struct *tsk;
++
++ io_sq_thread_park(sqd);
++ tsk = sqd->thread;
++ if (tsk && tsk->io_uring && tsk->io_uring->io_wq)
++ io_wq_cancel_cb(tsk->io_uring->io_wq,
++ io_cancel_ctx_cb, ctx, true);
++ io_sq_thread_unpark(sqd);
++ }
++
++ if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
++ /* there is little hope left, don't run it too often */
++ interval = HZ * 60;
++ }
++ /*
++ * This is really an uninterruptible wait, as it has to be
++ * complete. But it's also run from a kworker, which doesn't
++ * take signals, so it's fine to make it interruptible. This
++ * avoids scenarios where we knowingly can wait much longer
++ * on completions, for example if someone does a SIGSTOP on
++ * a task that needs to finish task_work to make this loop
++ * complete. That's a synthetic situation that should not
++ * cause a stuck task backtrace, and hence a potential panic
++ * on stuck tasks if that is enabled.
++ */
++ } while (!wait_for_completion_interruptible_timeout(&ctx->ref_comp, interval));
++
++ init_completion(&exit.completion);
++ init_task_work(&exit.task_work, io_tctx_exit_cb);
++ exit.ctx = ctx;
++ /*
++ * Some may use context even when all refs and requests have been put,
++ * and they are free to do so while still holding uring_lock or
++ * completion_lock, see io_req_task_submit(). Apart from other work,
++ * this lock/unlock section also waits them to finish.
++ */
++ mutex_lock(&ctx->uring_lock);
++ while (!list_empty(&ctx->tctx_list)) {
++ WARN_ON_ONCE(time_after(jiffies, timeout));
++
++ node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
++ ctx_node);
++ /* don't spin on a single task if cancellation failed */
++ list_rotate_left(&ctx->tctx_list);
++ ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL);
++ if (WARN_ON_ONCE(ret))
++ continue;
++ wake_up_process(node->task);
++
++ mutex_unlock(&ctx->uring_lock);
++ /*
++ * See comment above for
++ * wait_for_completion_interruptible_timeout() on why this
++ * wait is marked as interruptible.
++ */
++ wait_for_completion_interruptible(&exit.completion);
++ mutex_lock(&ctx->uring_lock);
++ }
++ mutex_unlock(&ctx->uring_lock);
++ spin_lock(&ctx->completion_lock);
++ spin_unlock(&ctx->completion_lock);
++
++ io_ring_ctx_free(ctx);
++}
++
++/* Returns true if we found and killed one or more timeouts */
++static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
++ bool cancel_all)
++{
++ struct io_kiocb *req, *tmp;
++ int canceled = 0;
++
++ spin_lock(&ctx->completion_lock);
++ spin_lock_irq(&ctx->timeout_lock);
++ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
++ if (io_match_task(req, tsk, cancel_all)) {
++ io_kill_timeout(req, -ECANCELED);
++ canceled++;
++ }
++ }
++ spin_unlock_irq(&ctx->timeout_lock);
++ if (canceled != 0)
++ io_commit_cqring(ctx);
++ spin_unlock(&ctx->completion_lock);
++ if (canceled != 0)
++ io_cqring_ev_posted(ctx);
++ return canceled != 0;
++}
++
++static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
++{
++ unsigned long index;
++ struct creds *creds;
++
++ mutex_lock(&ctx->uring_lock);
++ percpu_ref_kill(&ctx->refs);
++ if (ctx->rings)
++ __io_cqring_overflow_flush(ctx, true);
++ xa_for_each(&ctx->personalities, index, creds)
++ io_unregister_personality(ctx, index);
++ mutex_unlock(&ctx->uring_lock);
++
++ io_kill_timeouts(ctx, NULL, true);
++ io_poll_remove_all(ctx, NULL, true);
++
++ /* if we failed setting up the ctx, we might not have any rings */
++ io_iopoll_try_reap_events(ctx);
++
++ /* drop cached put refs after potentially doing completions */
++ if (current->io_uring)
++ io_uring_drop_tctx_refs(current);
++
++ INIT_WORK(&ctx->exit_work, io_ring_exit_work);
++ /*
++ * Use system_unbound_wq to avoid spawning tons of event kworkers
++ * if we're exiting a ton of rings at the same time. It just adds
++ * noise and overhead, there's no discernable change in runtime
++ * over using system_wq.
++ */
++ queue_work(system_unbound_wq, &ctx->exit_work);
++}
++
++static int io_uring_release(struct inode *inode, struct file *file)
++{
++ struct io_ring_ctx *ctx = file->private_data;
++
++ file->private_data = NULL;
++ io_ring_ctx_wait_and_kill(ctx);
++ return 0;
++}
++
++struct io_task_cancel {
++ struct task_struct *task;
++ bool all;
++};
++
++static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
++{
++ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
++ struct io_task_cancel *cancel = data;
++
++ return io_match_task_safe(req, cancel->task, cancel->all);
++}
++
++static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
++ struct task_struct *task, bool cancel_all)
++{
++ struct io_defer_entry *de;
++ LIST_HEAD(list);
++
++ spin_lock(&ctx->completion_lock);
++ list_for_each_entry_reverse(de, &ctx->defer_list, list) {
++ if (io_match_task_safe(de->req, task, cancel_all)) {
++ list_cut_position(&list, &ctx->defer_list, &de->list);
++ break;
++ }
++ }
++ spin_unlock(&ctx->completion_lock);
++ if (list_empty(&list))
++ return false;
++
++ while (!list_empty(&list)) {
++ de = list_first_entry(&list, struct io_defer_entry, list);
++ list_del_init(&de->list);
++ io_req_complete_failed(de->req, -ECANCELED);
++ kfree(de);
++ }
++ return true;
++}
++
++static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
++{
++ struct io_tctx_node *node;
++ enum io_wq_cancel cret;
++ bool ret = false;
++
++ mutex_lock(&ctx->uring_lock);
++ list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
++ struct io_uring_task *tctx = node->task->io_uring;
++
++ /*
++ * io_wq will stay alive while we hold uring_lock, because it's
++ * killed after ctx nodes, which requires to take the lock.
++ */
++ if (!tctx || !tctx->io_wq)
++ continue;
++ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
++ ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
++ }
++ mutex_unlock(&ctx->uring_lock);
++
++ return ret;
++}
++
++static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
++ struct task_struct *task,
++ bool cancel_all)
++{
++ struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
++ struct io_uring_task *tctx = task ? task->io_uring : NULL;
++
++ while (1) {
++ enum io_wq_cancel cret;
++ bool ret = false;
++
++ if (!task) {
++ ret |= io_uring_try_cancel_iowq(ctx);
++ } else if (tctx && tctx->io_wq) {
++ /*
++ * Cancels requests of all rings, not only @ctx, but
++ * it's fine as the task is in exit/exec.
++ */
++ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
++ &cancel, true);
++ ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
++ }
++
++ /* SQPOLL thread does its own polling */
++ if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
++ (ctx->sq_data && ctx->sq_data->thread == current)) {
++ while (!list_empty_careful(&ctx->iopoll_list)) {
++ io_iopoll_try_reap_events(ctx);
++ ret = true;
++ cond_resched();
++ }
++ }
++
++ ret |= io_cancel_defer_files(ctx, task, cancel_all);
++ ret |= io_poll_remove_all(ctx, task, cancel_all);
++ ret |= io_kill_timeouts(ctx, task, cancel_all);
++ if (task)
++ ret |= io_run_task_work();
++ if (!ret)
++ break;
++ cond_resched();
++ }
++}
++
++static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
++{
++ struct io_uring_task *tctx = current->io_uring;
++ struct io_tctx_node *node;
++ int ret;
++
++ if (unlikely(!tctx)) {
++ ret = io_uring_alloc_task_context(current, ctx);
++ if (unlikely(ret))
++ return ret;
++
++ tctx = current->io_uring;
++ if (ctx->iowq_limits_set) {
++ unsigned int limits[2] = { ctx->iowq_limits[0],
++ ctx->iowq_limits[1], };
++
++ ret = io_wq_max_workers(tctx->io_wq, limits);
++ if (ret)
++ return ret;
++ }
++ }
++ if (!xa_load(&tctx->xa, (unsigned long)ctx)) {
++ node = kmalloc(sizeof(*node), GFP_KERNEL);
++ if (!node)
++ return -ENOMEM;
++ node->ctx = ctx;
++ node->task = current;
++
++ ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx,
++ node, GFP_KERNEL));
++ if (ret) {
++ kfree(node);
++ return ret;
++ }
++
++ mutex_lock(&ctx->uring_lock);
++ list_add(&node->ctx_node, &ctx->tctx_list);
++ mutex_unlock(&ctx->uring_lock);
++ }
++ tctx->last = ctx;
++ return 0;
++}
++
++/*
++ * Note that this task has used io_uring. We use it for cancelation purposes.
++ */
++static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx)
++{
++ struct io_uring_task *tctx = current->io_uring;
++
++ if (likely(tctx && tctx->last == ctx))
++ return 0;
++ return __io_uring_add_tctx_node(ctx);
++}
++
++/*
++ * Remove this io_uring_file -> task mapping.
++ */
++static void io_uring_del_tctx_node(unsigned long index)
++{
++ struct io_uring_task *tctx = current->io_uring;
++ struct io_tctx_node *node;
++
++ if (!tctx)
++ return;
++ node = xa_erase(&tctx->xa, index);
++ if (!node)
++ return;
++
++ WARN_ON_ONCE(current != node->task);
++ WARN_ON_ONCE(list_empty(&node->ctx_node));
++
++ mutex_lock(&node->ctx->uring_lock);
++ list_del(&node->ctx_node);
++ mutex_unlock(&node->ctx->uring_lock);
++
++ if (tctx->last == node->ctx)
++ tctx->last = NULL;
++ kfree(node);
++}
++
++static void io_uring_clean_tctx(struct io_uring_task *tctx)
++{
++ struct io_wq *wq = tctx->io_wq;
++ struct io_tctx_node *node;
++ unsigned long index;
++
++ xa_for_each(&tctx->xa, index, node) {
++ io_uring_del_tctx_node(index);
++ cond_resched();
++ }
++ if (wq) {
++ /*
++ * Must be after io_uring_del_task_file() (removes nodes under
++ * uring_lock) to avoid race with io_uring_try_cancel_iowq().
++ */
++ io_wq_put_and_exit(wq);
++ tctx->io_wq = NULL;
++ }
++}
++
++static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
++{
++ if (tracked)
++ return atomic_read(&tctx->inflight_tracked);
++ return percpu_counter_sum(&tctx->inflight);
++}
++
++/*
++ * Find any io_uring ctx that this task has registered or done IO on, and cancel
++ * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
++ */
++static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
++{
++ struct io_uring_task *tctx = current->io_uring;
++ struct io_ring_ctx *ctx;
++ s64 inflight;
++ DEFINE_WAIT(wait);
++
++ WARN_ON_ONCE(sqd && sqd->thread != current);
++
++ if (!current->io_uring)
++ return;
++ if (tctx->io_wq)
++ io_wq_exit_start(tctx->io_wq);
++
++ atomic_inc(&tctx->in_idle);
++ do {
++ io_uring_drop_tctx_refs(current);
++ /* read completions before cancelations */
++ inflight = tctx_inflight(tctx, !cancel_all);
++ if (!inflight)
++ break;
++
++ if (!sqd) {
++ struct io_tctx_node *node;
++ unsigned long index;
++
++ xa_for_each(&tctx->xa, index, node) {
++ /* sqpoll task will cancel all its requests */
++ if (node->ctx->sq_data)
++ continue;
++ io_uring_try_cancel_requests(node->ctx, current,
++ cancel_all);
++ }
++ } else {
++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
++ io_uring_try_cancel_requests(ctx, current,
++ cancel_all);
++ }
++
++ prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
++ io_run_task_work();
++ io_uring_drop_tctx_refs(current);
++
++ /*
++ * If we've seen completions, retry without waiting. This
++ * avoids a race where a completion comes in before we did
++ * prepare_to_wait().
++ */
++ if (inflight == tctx_inflight(tctx, !cancel_all))
++ schedule();
++ finish_wait(&tctx->wait, &wait);
++ } while (1);
++
++ io_uring_clean_tctx(tctx);
++ if (cancel_all) {
++ /*
++ * We shouldn't run task_works after cancel, so just leave
++ * ->in_idle set for normal exit.
++ */
++ atomic_dec(&tctx->in_idle);
++ /* for exec all current's requests should be gone, kill tctx */
++ __io_uring_free(current);
++ }
++}
++
++void __io_uring_cancel(bool cancel_all)
++{
++ io_uring_cancel_generic(cancel_all, NULL);
++}
++
++static void *io_uring_validate_mmap_request(struct file *file,
++ loff_t pgoff, size_t sz)
++{
++ struct io_ring_ctx *ctx = file->private_data;
++ loff_t offset = pgoff << PAGE_SHIFT;
++ struct page *page;
++ void *ptr;
++
++ switch (offset) {
++ case IORING_OFF_SQ_RING:
++ case IORING_OFF_CQ_RING:
++ ptr = ctx->rings;
++ break;
++ case IORING_OFF_SQES:
++ ptr = ctx->sq_sqes;
++ break;
++ default:
++ return ERR_PTR(-EINVAL);
++ }
++
++ page = virt_to_head_page(ptr);
++ if (sz > page_size(page))
++ return ERR_PTR(-EINVAL);
++
++ return ptr;
++}
++
++#ifdef CONFIG_MMU
++
++static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ size_t sz = vma->vm_end - vma->vm_start;
++ unsigned long pfn;
++ void *ptr;
++
++ ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
++ if (IS_ERR(ptr))
++ return PTR_ERR(ptr);
++
++ pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
++ return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
++}
++
++#else /* !CONFIG_MMU */
++
++static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL;
++}
++
++static unsigned int io_uring_nommu_mmap_capabilities(struct file *file)
++{
++ return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE;
++}
++
++static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
++ unsigned long addr, unsigned long len,
++ unsigned long pgoff, unsigned long flags)
++{
++ void *ptr;
++
++ ptr = io_uring_validate_mmap_request(file, pgoff, len);
++ if (IS_ERR(ptr))
++ return PTR_ERR(ptr);
++
++ return (unsigned long) ptr;
++}
++
++#endif /* !CONFIG_MMU */
++
++static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
++{
++ DEFINE_WAIT(wait);
++
++ do {
++ if (!io_sqring_full(ctx))
++ break;
++ prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
++
++ if (!io_sqring_full(ctx))
++ break;
++ schedule();
++ } while (!signal_pending(current));
++
++ finish_wait(&ctx->sqo_sq_wait, &wait);
++ return 0;
++}
++
++static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
++ struct __kernel_timespec __user **ts,
++ const sigset_t __user **sig)
++{
++ struct io_uring_getevents_arg arg;
++
++ /*
++ * If EXT_ARG isn't set, then we have no timespec and the argp pointer
++ * is just a pointer to the sigset_t.
++ */
++ if (!(flags & IORING_ENTER_EXT_ARG)) {
++ *sig = (const sigset_t __user *) argp;
++ *ts = NULL;
++ return 0;
++ }
++
++ /*
++ * EXT_ARG is set - ensure we agree on the size of it and copy in our
++ * timespec and sigset_t pointers if good.
++ */
++ if (*argsz != sizeof(arg))
++ return -EINVAL;
++ if (copy_from_user(&arg, argp, sizeof(arg)))
++ return -EFAULT;
++ if (arg.pad)
++ return -EINVAL;
++ *sig = u64_to_user_ptr(arg.sigmask);
++ *argsz = arg.sigmask_sz;
++ *ts = u64_to_user_ptr(arg.ts);
++ return 0;
++}
++
++SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
++ u32, min_complete, u32, flags, const void __user *, argp,
++ size_t, argsz)
++{
++ struct io_ring_ctx *ctx;
++ int submitted = 0;
++ struct fd f;
++ long ret;
++
++ io_run_task_work();
++
++ if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
++ IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG)))
++ return -EINVAL;
++
++ f = fdget(fd);
++ if (unlikely(!f.file))
++ return -EBADF;
++
++ ret = -EOPNOTSUPP;
++ if (unlikely(f.file->f_op != &io_uring_fops))
++ goto out_fput;
++
++ ret = -ENXIO;
++ ctx = f.file->private_data;
++ if (unlikely(!percpu_ref_tryget(&ctx->refs)))
++ goto out_fput;
++
++ ret = -EBADFD;
++ if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED))
++ goto out;
++
++ /*
++ * For SQ polling, the thread will do all submissions and completions.
++ * Just return the requested submit count, and wake the thread if
++ * we were asked to.
++ */
++ ret = 0;
++ if (ctx->flags & IORING_SETUP_SQPOLL) {
++ io_cqring_overflow_flush(ctx);
++
++ if (unlikely(ctx->sq_data->thread == NULL)) {
++ ret = -EOWNERDEAD;
++ goto out;
++ }
++ if (flags & IORING_ENTER_SQ_WAKEUP)
++ wake_up(&ctx->sq_data->wait);
++ if (flags & IORING_ENTER_SQ_WAIT) {
++ ret = io_sqpoll_wait_sq(ctx);
++ if (ret)
++ goto out;
++ }
++ submitted = to_submit;
++ } else if (to_submit) {
++ ret = io_uring_add_tctx_node(ctx);
++ if (unlikely(ret))
++ goto out;
++ mutex_lock(&ctx->uring_lock);
++ submitted = io_submit_sqes(ctx, to_submit);
++ mutex_unlock(&ctx->uring_lock);
++
++ if (submitted != to_submit)
++ goto out;
++ }
++ if (flags & IORING_ENTER_GETEVENTS) {
++ const sigset_t __user *sig;
++ struct __kernel_timespec __user *ts;
++
++ ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
++ if (unlikely(ret))
++ goto out;
++
++ min_complete = min(min_complete, ctx->cq_entries);
++
++ /*
++ * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
++ * space applications don't need to do io completion events
++ * polling again, they can rely on io_sq_thread to do polling
++ * work, which can reduce cpu usage and uring_lock contention.
++ */
++ if (ctx->flags & IORING_SETUP_IOPOLL &&
++ !(ctx->flags & IORING_SETUP_SQPOLL)) {
++ ret = io_iopoll_check(ctx, min_complete);
++ } else {
++ ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
++ }
++ }
++
++out:
++ percpu_ref_put(&ctx->refs);
++out_fput:
++ fdput(f);
++ return submitted ? submitted : ret;
++}
++
++#ifdef CONFIG_PROC_FS
++static int io_uring_show_cred(struct seq_file *m, unsigned int id,
++ const struct cred *cred)
++{
++ struct user_namespace *uns = seq_user_ns(m);
++ struct group_info *gi;
++ kernel_cap_t cap;
++ unsigned __capi;
++ int g;
++
++ seq_printf(m, "%5d\n", id);
++ seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
++ seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
++ seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
++ seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
++ seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
++ seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
++ seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
++ seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
++ seq_puts(m, "\n\tGroups:\t");
++ gi = cred->group_info;
++ for (g = 0; g < gi->ngroups; g++) {
++ seq_put_decimal_ull(m, g ? " " : "",
++ from_kgid_munged(uns, gi->gid[g]));
++ }
++ seq_puts(m, "\n\tCapEff:\t");
++ cap = cred->cap_effective;
++ CAP_FOR_EACH_U32(__capi)
++ seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
++ seq_putc(m, '\n');
++ return 0;
++}
++
++static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
++{
++ struct io_sq_data *sq = NULL;
++ bool has_lock;
++ int i;
++
++ /*
++ * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
++ * since fdinfo case grabs it in the opposite direction of normal use
++ * cases. If we fail to get the lock, we just don't iterate any
++ * structures that could be going away outside the io_uring mutex.
++ */
++ has_lock = mutex_trylock(&ctx->uring_lock);
++
++ if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
++ sq = ctx->sq_data;
++ if (!sq->thread)
++ sq = NULL;
++ }
++
++ seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
++ seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
++ seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
++ for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
++ struct file *f = io_file_from_index(ctx, i);
++
++ if (f)
++ seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
++ else
++ seq_printf(m, "%5u: <none>\n", i);
++ }
++ seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
++ for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
++ struct io_mapped_ubuf *buf = ctx->user_bufs[i];
++ unsigned int len = buf->ubuf_end - buf->ubuf;
++
++ seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len);
++ }
++ if (has_lock && !xa_empty(&ctx->personalities)) {
++ unsigned long index;
++ const struct cred *cred;
++
++ seq_printf(m, "Personalities:\n");
++ xa_for_each(&ctx->personalities, index, cred)
++ io_uring_show_cred(m, index, cred);
++ }
++ seq_printf(m, "PollList:\n");
++ spin_lock(&ctx->completion_lock);
++ for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
++ struct hlist_head *list = &ctx->cancel_hash[i];
++ struct io_kiocb *req;
++
++ hlist_for_each_entry(req, list, hash_node)
++ seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
++ req->task->task_works != NULL);
++ }
++ spin_unlock(&ctx->completion_lock);
++ if (has_lock)
++ mutex_unlock(&ctx->uring_lock);
++}
++
++static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
++{
++ struct io_ring_ctx *ctx = f->private_data;
++
++ if (percpu_ref_tryget(&ctx->refs)) {
++ __io_uring_show_fdinfo(ctx, m);
++ percpu_ref_put(&ctx->refs);
++ }
++}
++#endif
++
++static const struct file_operations io_uring_fops = {
++ .release = io_uring_release,
++ .mmap = io_uring_mmap,
++#ifndef CONFIG_MMU
++ .get_unmapped_area = io_uring_nommu_get_unmapped_area,
++ .mmap_capabilities = io_uring_nommu_mmap_capabilities,
++#endif
++ .poll = io_uring_poll,
++#ifdef CONFIG_PROC_FS
++ .show_fdinfo = io_uring_show_fdinfo,
++#endif
++};
++
++static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
++ struct io_uring_params *p)
++{
++ struct io_rings *rings;
++ size_t size, sq_array_offset;
++
++ /* make sure these are sane, as we already accounted them */
++ ctx->sq_entries = p->sq_entries;
++ ctx->cq_entries = p->cq_entries;
++
++ size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
++ if (size == SIZE_MAX)
++ return -EOVERFLOW;
++
++ rings = io_mem_alloc(size);
++ if (!rings)
++ return -ENOMEM;
++
++ ctx->rings = rings;
++ ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
++ rings->sq_ring_mask = p->sq_entries - 1;
++ rings->cq_ring_mask = p->cq_entries - 1;
++ rings->sq_ring_entries = p->sq_entries;
++ rings->cq_ring_entries = p->cq_entries;
++
++ size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
++ if (size == SIZE_MAX) {
++ io_mem_free(ctx->rings);
++ ctx->rings = NULL;
++ return -EOVERFLOW;
++ }
++
++ ctx->sq_sqes = io_mem_alloc(size);
++ if (!ctx->sq_sqes) {
++ io_mem_free(ctx->rings);
++ ctx->rings = NULL;
++ return -ENOMEM;
++ }
++
++ return 0;
++}
++
++static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
++{
++ int ret, fd;
++
++ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
++ if (fd < 0)
++ return fd;
++
++ ret = io_uring_add_tctx_node(ctx);
++ if (ret) {
++ put_unused_fd(fd);
++ return ret;
++ }
++ fd_install(fd, file);
++ return fd;
++}
++
++/*
++ * Allocate an anonymous fd, this is what constitutes the application
++ * visible backing of an io_uring instance. The application mmaps this
++ * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
++ * we have to tie this fd to a socket for file garbage collection purposes.
++ */
++static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
++{
++ struct file *file;
++#if defined(CONFIG_UNIX)
++ int ret;
++
++ ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
++ &ctx->ring_sock);
++ if (ret)
++ return ERR_PTR(ret);
++#endif
++
++ file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
++ O_RDWR | O_CLOEXEC);
++#if defined(CONFIG_UNIX)
++ if (IS_ERR(file)) {
++ sock_release(ctx->ring_sock);
++ ctx->ring_sock = NULL;
++ } else {
++ ctx->ring_sock->file = file;
++ }
++#endif
++ return file;
++}
++
++static int io_uring_create(unsigned entries, struct io_uring_params *p,
++ struct io_uring_params __user *params)
++{
++ struct io_ring_ctx *ctx;
++ struct file *file;
++ int ret;
++
++ if (!entries)
++ return -EINVAL;
++ if (entries > IORING_MAX_ENTRIES) {
++ if (!(p->flags & IORING_SETUP_CLAMP))
++ return -EINVAL;
++ entries = IORING_MAX_ENTRIES;
++ }
++
++ /*
++ * Use twice as many entries for the CQ ring. It's possible for the
++ * application to drive a higher depth than the size of the SQ ring,
++ * since the sqes are only used at submission time. This allows for
++ * some flexibility in overcommitting a bit. If the application has
++ * set IORING_SETUP_CQSIZE, it will have passed in the desired number
++ * of CQ ring entries manually.
++ */
++ p->sq_entries = roundup_pow_of_two(entries);
++ if (p->flags & IORING_SETUP_CQSIZE) {
++ /*
++ * If IORING_SETUP_CQSIZE is set, we do the same roundup
++ * to a power-of-two, if it isn't already. We do NOT impose
++ * any cq vs sq ring sizing.
++ */
++ if (!p->cq_entries)
++ return -EINVAL;
++ if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
++ if (!(p->flags & IORING_SETUP_CLAMP))
++ return -EINVAL;
++ p->cq_entries = IORING_MAX_CQ_ENTRIES;
++ }
++ p->cq_entries = roundup_pow_of_two(p->cq_entries);
++ if (p->cq_entries < p->sq_entries)
++ return -EINVAL;
++ } else {
++ p->cq_entries = 2 * p->sq_entries;
++ }
++
++ ctx = io_ring_ctx_alloc(p);
++ if (!ctx)
++ return -ENOMEM;
++ ctx->compat = in_compat_syscall();
++ if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
++ ctx->user = get_uid(current_user());
++
++ /*
++ * This is just grabbed for accounting purposes. When a process exits,
++ * the mm is exited and dropped before the files, hence we need to hang
++ * on to this mm purely for the purposes of being able to unaccount
++ * memory (locked/pinned vm). It's not used for anything else.
++ */
++ mmgrab(current->mm);
++ ctx->mm_account = current->mm;
++
++ ret = io_allocate_scq_urings(ctx, p);
++ if (ret)
++ goto err;
++
++ ret = io_sq_offload_create(ctx, p);
++ if (ret)
++ goto err;
++ /* always set a rsrc node */
++ ret = io_rsrc_node_switch_start(ctx);
++ if (ret)
++ goto err;
++ io_rsrc_node_switch(ctx, NULL);
++
++ memset(&p->sq_off, 0, sizeof(p->sq_off));
++ p->sq_off.head = offsetof(struct io_rings, sq.head);
++ p->sq_off.tail = offsetof(struct io_rings, sq.tail);
++ p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
++ p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
++ p->sq_off.flags = offsetof(struct io_rings, sq_flags);
++ p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
++ p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
++
++ memset(&p->cq_off, 0, sizeof(p->cq_off));
++ p->cq_off.head = offsetof(struct io_rings, cq.head);
++ p->cq_off.tail = offsetof(struct io_rings, cq.tail);
++ p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
++ p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
++ p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
++ p->cq_off.cqes = offsetof(struct io_rings, cqes);
++ p->cq_off.flags = offsetof(struct io_rings, cq_flags);
++
++ p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
++ IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
++ IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
++ IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
++ IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
++ IORING_FEAT_RSRC_TAGS;
++
++ if (copy_to_user(params, p, sizeof(*p))) {
++ ret = -EFAULT;
++ goto err;
++ }
++
++ file = io_uring_get_file(ctx);
++ if (IS_ERR(file)) {
++ ret = PTR_ERR(file);
++ goto err;
++ }
++
++ /*
++ * Install ring fd as the very last thing, so we don't risk someone
++ * having closed it before we finish setup
++ */
++ ret = io_uring_install_fd(ctx, file);
++ if (ret < 0) {
++ /* fput will clean it up */
++ fput(file);
++ return ret;
++ }
++
++ trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
++ return ret;
++err:
++ io_ring_ctx_wait_and_kill(ctx);
++ return ret;
++}
++
++/*
++ * Sets up an aio uring context, and returns the fd. Applications asks for a
++ * ring size, we return the actual sq/cq ring sizes (among other things) in the
++ * params structure passed in.
++ */
++static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
++{
++ struct io_uring_params p;
++ int i;
++
++ if (copy_from_user(&p, params, sizeof(p)))
++ return -EFAULT;
++ for (i = 0; i < ARRAY_SIZE(p.resv); i++) {
++ if (p.resv[i])
++ return -EINVAL;
++ }
++
++ if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
++ IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
++ IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
++ IORING_SETUP_R_DISABLED))
++ return -EINVAL;
++
++ return io_uring_create(entries, &p, params);
++}
++
++SYSCALL_DEFINE2(io_uring_setup, u32, entries,
++ struct io_uring_params __user *, params)
++{
++ return io_uring_setup(entries, params);
++}
++
++static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
++{
++ struct io_uring_probe *p;
++ size_t size;
++ int i, ret;
++
++ size = struct_size(p, ops, nr_args);
++ if (size == SIZE_MAX)
++ return -EOVERFLOW;
++ p = kzalloc(size, GFP_KERNEL);
++ if (!p)
++ return -ENOMEM;
++
++ ret = -EFAULT;
++ if (copy_from_user(p, arg, size))
++ goto out;
++ ret = -EINVAL;
++ if (memchr_inv(p, 0, size))
++ goto out;
++
++ p->last_op = IORING_OP_LAST - 1;
++ if (nr_args > IORING_OP_LAST)
++ nr_args = IORING_OP_LAST;
++
++ for (i = 0; i < nr_args; i++) {
++ p->ops[i].op = i;
++ if (!io_op_defs[i].not_supported)
++ p->ops[i].flags = IO_URING_OP_SUPPORTED;
++ }
++ p->ops_len = i;
++
++ ret = 0;
++ if (copy_to_user(arg, p, size))
++ ret = -EFAULT;
++out:
++ kfree(p);
++ return ret;
++}
++
++static int io_register_personality(struct io_ring_ctx *ctx)
++{
++ const struct cred *creds;
++ u32 id;
++ int ret;
++
++ creds = get_current_cred();
++
++ ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
++ XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
++ if (ret < 0) {
++ put_cred(creds);
++ return ret;
++ }
++ return id;
++}
++
++static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
++ unsigned int nr_args)
++{
++ struct io_uring_restriction *res;
++ size_t size;
++ int i, ret;
++
++ /* Restrictions allowed only if rings started disabled */
++ if (!(ctx->flags & IORING_SETUP_R_DISABLED))
++ return -EBADFD;
++
++ /* We allow only a single restrictions registration */
++ if (ctx->restrictions.registered)
++ return -EBUSY;
++
++ if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
++ return -EINVAL;
++
++ size = array_size(nr_args, sizeof(*res));
++ if (size == SIZE_MAX)
++ return -EOVERFLOW;
++
++ res = memdup_user(arg, size);
++ if (IS_ERR(res))
++ return PTR_ERR(res);
++
++ ret = 0;
++
++ for (i = 0; i < nr_args; i++) {
++ switch (res[i].opcode) {
++ case IORING_RESTRICTION_REGISTER_OP:
++ if (res[i].register_op >= IORING_REGISTER_LAST) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ __set_bit(res[i].register_op,
++ ctx->restrictions.register_op);
++ break;
++ case IORING_RESTRICTION_SQE_OP:
++ if (res[i].sqe_op >= IORING_OP_LAST) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
++ break;
++ case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
++ ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
++ break;
++ case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
++ ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
++ break;
++ default:
++ ret = -EINVAL;
++ goto out;
++ }
++ }
++
++out:
++ /* Reset all restrictions if an error happened */
++ if (ret != 0)
++ memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
++ else
++ ctx->restrictions.registered = true;
++
++ kfree(res);
++ return ret;
++}
++
++static int io_register_enable_rings(struct io_ring_ctx *ctx)
++{
++ if (!(ctx->flags & IORING_SETUP_R_DISABLED))
++ return -EBADFD;
++
++ if (ctx->restrictions.registered)
++ ctx->restricted = 1;
++
++ ctx->flags &= ~IORING_SETUP_R_DISABLED;
++ if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
++ wake_up(&ctx->sq_data->wait);
++ return 0;
++}
++
++static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
++ struct io_uring_rsrc_update2 *up,
++ unsigned nr_args)
++{
++ __u32 tmp;
++ int err;
++
++ if (check_add_overflow(up->offset, nr_args, &tmp))
++ return -EOVERFLOW;
++ err = io_rsrc_node_switch_start(ctx);
++ if (err)
++ return err;
++
++ switch (type) {
++ case IORING_RSRC_FILE:
++ return __io_sqe_files_update(ctx, up, nr_args);
++ case IORING_RSRC_BUFFER:
++ return __io_sqe_buffers_update(ctx, up, nr_args);
++ }
++ return -EINVAL;
++}
++
++static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
++ unsigned nr_args)
++{
++ struct io_uring_rsrc_update2 up;
++
++ if (!nr_args)
++ return -EINVAL;
++ memset(&up, 0, sizeof(up));
++ if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update)))
++ return -EFAULT;
++ if (up.resv || up.resv2)
++ return -EINVAL;
++ return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args);
++}
++
++static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
++ unsigned size, unsigned type)
++{
++ struct io_uring_rsrc_update2 up;
++
++ if (size != sizeof(up))
++ return -EINVAL;
++ if (copy_from_user(&up, arg, sizeof(up)))
++ return -EFAULT;
++ if (!up.nr || up.resv || up.resv2)
++ return -EINVAL;
++ return __io_register_rsrc_update(ctx, type, &up, up.nr);
++}
++
++static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
++ unsigned int size, unsigned int type)
++{
++ struct io_uring_rsrc_register rr;
++
++ /* keep it extendible */
++ if (size != sizeof(rr))
++ return -EINVAL;
++
++ memset(&rr, 0, sizeof(rr));
++ if (copy_from_user(&rr, arg, size))
++ return -EFAULT;
++ if (!rr.nr || rr.resv || rr.resv2)
++ return -EINVAL;
++
++ switch (type) {
++ case IORING_RSRC_FILE:
++ return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
++ rr.nr, u64_to_user_ptr(rr.tags));
++ case IORING_RSRC_BUFFER:
++ return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
++ rr.nr, u64_to_user_ptr(rr.tags));
++ }
++ return -EINVAL;
++}
++
++static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
++ unsigned len)
++{
++ struct io_uring_task *tctx = current->io_uring;
++ cpumask_var_t new_mask;
++ int ret;
++
++ if (!tctx || !tctx->io_wq)
++ return -EINVAL;
++
++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
++ return -ENOMEM;
++
++ cpumask_clear(new_mask);
++ if (len > cpumask_size())
++ len = cpumask_size();
++
++ if (in_compat_syscall()) {
++ ret = compat_get_bitmap(cpumask_bits(new_mask),
++ (const compat_ulong_t __user *)arg,
++ len * 8 /* CHAR_BIT */);
++ } else {
++ ret = copy_from_user(new_mask, arg, len);
++ }
++
++ if (ret) {
++ free_cpumask_var(new_mask);
++ return -EFAULT;
++ }
++
++ ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
++ free_cpumask_var(new_mask);
++ return ret;
++}
++
++static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
++{
++ struct io_uring_task *tctx = current->io_uring;
++
++ if (!tctx || !tctx->io_wq)
++ return -EINVAL;
++
++ return io_wq_cpu_affinity(tctx->io_wq, NULL);
++}
++
++static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
++ void __user *arg)
++ __must_hold(&ctx->uring_lock)
++{
++ struct io_tctx_node *node;
++ struct io_uring_task *tctx = NULL;
++ struct io_sq_data *sqd = NULL;
++ __u32 new_count[2];
++ int i, ret;
++
++ if (copy_from_user(new_count, arg, sizeof(new_count)))
++ return -EFAULT;
++ for (i = 0; i < ARRAY_SIZE(new_count); i++)
++ if (new_count[i] > INT_MAX)
++ return -EINVAL;
++
++ if (ctx->flags & IORING_SETUP_SQPOLL) {
++ sqd = ctx->sq_data;
++ if (sqd) {
++ /*
++ * Observe the correct sqd->lock -> ctx->uring_lock
++ * ordering. Fine to drop uring_lock here, we hold
++ * a ref to the ctx.
++ */
++ refcount_inc(&sqd->refs);
++ mutex_unlock(&ctx->uring_lock);
++ mutex_lock(&sqd->lock);
++ mutex_lock(&ctx->uring_lock);
++ if (sqd->thread)
++ tctx = sqd->thread->io_uring;
++ }
++ } else {
++ tctx = current->io_uring;
++ }
++
++ BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
++
++ for (i = 0; i < ARRAY_SIZE(new_count); i++)
++ if (new_count[i])
++ ctx->iowq_limits[i] = new_count[i];
++ ctx->iowq_limits_set = true;
++
++ ret = -EINVAL;
++ if (tctx && tctx->io_wq) {
++ ret = io_wq_max_workers(tctx->io_wq, new_count);
++ if (ret)
++ goto err;
++ } else {
++ memset(new_count, 0, sizeof(new_count));
++ }
++
++ if (sqd) {
++ mutex_unlock(&sqd->lock);
++ io_put_sq_data(sqd);
++ }
++
++ if (copy_to_user(arg, new_count, sizeof(new_count)))
++ return -EFAULT;
++
++ /* that's it for SQPOLL, only the SQPOLL task creates requests */
++ if (sqd)
++ return 0;
++
++ /* now propagate the restriction to all registered users */
++ list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
++ struct io_uring_task *tctx = node->task->io_uring;
++
++ if (WARN_ON_ONCE(!tctx->io_wq))
++ continue;
++
++ for (i = 0; i < ARRAY_SIZE(new_count); i++)
++ new_count[i] = ctx->iowq_limits[i];
++ /* ignore errors, it always returns zero anyway */
++ (void)io_wq_max_workers(tctx->io_wq, new_count);
++ }
++ return 0;
++err:
++ if (sqd) {
++ mutex_unlock(&sqd->lock);
++ io_put_sq_data(sqd);
++ }
++ return ret;
++}
++
++static bool io_register_op_must_quiesce(int op)
++{
++ switch (op) {
++ case IORING_REGISTER_BUFFERS:
++ case IORING_UNREGISTER_BUFFERS:
++ case IORING_REGISTER_FILES:
++ case IORING_UNREGISTER_FILES:
++ case IORING_REGISTER_FILES_UPDATE:
++ case IORING_REGISTER_PROBE:
++ case IORING_REGISTER_PERSONALITY:
++ case IORING_UNREGISTER_PERSONALITY:
++ case IORING_REGISTER_FILES2:
++ case IORING_REGISTER_FILES_UPDATE2:
++ case IORING_REGISTER_BUFFERS2:
++ case IORING_REGISTER_BUFFERS_UPDATE:
++ case IORING_REGISTER_IOWQ_AFF:
++ case IORING_UNREGISTER_IOWQ_AFF:
++ case IORING_REGISTER_IOWQ_MAX_WORKERS:
++ return false;
++ default:
++ return true;
++ }
++}
++
++static int io_ctx_quiesce(struct io_ring_ctx *ctx)
++{
++ long ret;
++
++ percpu_ref_kill(&ctx->refs);
++
++ /*
++ * Drop uring mutex before waiting for references to exit. If another
++ * thread is currently inside io_uring_enter() it might need to grab the
++ * uring_lock to make progress. If we hold it here across the drain
++ * wait, then we can deadlock. It's safe to drop the mutex here, since
++ * no new references will come in after we've killed the percpu ref.
++ */
++ mutex_unlock(&ctx->uring_lock);
++ do {
++ ret = wait_for_completion_interruptible(&ctx->ref_comp);
++ if (!ret)
++ break;
++ ret = io_run_task_work_sig();
++ } while (ret >= 0);
++ mutex_lock(&ctx->uring_lock);
++
++ if (ret)
++ io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
++ return ret;
++}
++
++static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
++ void __user *arg, unsigned nr_args)
++ __releases(ctx->uring_lock)
++ __acquires(ctx->uring_lock)
++{
++ int ret;
++
++ /*
++ * We're inside the ring mutex, if the ref is already dying, then
++ * someone else killed the ctx or is already going through
++ * io_uring_register().
++ */
++ if (percpu_ref_is_dying(&ctx->refs))
++ return -ENXIO;
++
++ if (ctx->restricted) {
++ opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
++ if (!test_bit(opcode, ctx->restrictions.register_op))
++ return -EACCES;
++ }
++
++ if (io_register_op_must_quiesce(opcode)) {
++ ret = io_ctx_quiesce(ctx);
++ if (ret)
++ return ret;
++ }
++
++ switch (opcode) {
++ case IORING_REGISTER_BUFFERS:
++ ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
++ break;
++ case IORING_UNREGISTER_BUFFERS:
++ ret = -EINVAL;
++ if (arg || nr_args)
++ break;
++ ret = io_sqe_buffers_unregister(ctx);
++ break;
++ case IORING_REGISTER_FILES:
++ ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
++ break;
++ case IORING_UNREGISTER_FILES:
++ ret = -EINVAL;
++ if (arg || nr_args)
++ break;
++ ret = io_sqe_files_unregister(ctx);
++ break;
++ case IORING_REGISTER_FILES_UPDATE:
++ ret = io_register_files_update(ctx, arg, nr_args);
++ break;
++ case IORING_REGISTER_EVENTFD:
++ case IORING_REGISTER_EVENTFD_ASYNC:
++ ret = -EINVAL;
++ if (nr_args != 1)
++ break;
++ ret = io_eventfd_register(ctx, arg);
++ if (ret)
++ break;
++ if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
++ ctx->eventfd_async = 1;
++ else
++ ctx->eventfd_async = 0;
++ break;
++ case IORING_UNREGISTER_EVENTFD:
++ ret = -EINVAL;
++ if (arg || nr_args)
++ break;
++ ret = io_eventfd_unregister(ctx);
++ break;
++ case IORING_REGISTER_PROBE:
++ ret = -EINVAL;
++ if (!arg || nr_args > 256)
++ break;
++ ret = io_probe(ctx, arg, nr_args);
++ break;
++ case IORING_REGISTER_PERSONALITY:
++ ret = -EINVAL;
++ if (arg || nr_args)
++ break;
++ ret = io_register_personality(ctx);
++ break;
++ case IORING_UNREGISTER_PERSONALITY:
++ ret = -EINVAL;
++ if (arg)
++ break;
++ ret = io_unregister_personality(ctx, nr_args);
++ break;
++ case IORING_REGISTER_ENABLE_RINGS:
++ ret = -EINVAL;
++ if (arg || nr_args)
++ break;
++ ret = io_register_enable_rings(ctx);
++ break;
++ case IORING_REGISTER_RESTRICTIONS:
++ ret = io_register_restrictions(ctx, arg, nr_args);
++ break;
++ case IORING_REGISTER_FILES2:
++ ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
++ break;
++ case IORING_REGISTER_FILES_UPDATE2:
++ ret = io_register_rsrc_update(ctx, arg, nr_args,
++ IORING_RSRC_FILE);
++ break;
++ case IORING_REGISTER_BUFFERS2:
++ ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
++ break;
++ case IORING_REGISTER_BUFFERS_UPDATE:
++ ret = io_register_rsrc_update(ctx, arg, nr_args,
++ IORING_RSRC_BUFFER);
++ break;
++ case IORING_REGISTER_IOWQ_AFF:
++ ret = -EINVAL;
++ if (!arg || !nr_args)
++ break;
++ ret = io_register_iowq_aff(ctx, arg, nr_args);
++ break;
++ case IORING_UNREGISTER_IOWQ_AFF:
++ ret = -EINVAL;
++ if (arg || nr_args)
++ break;
++ ret = io_unregister_iowq_aff(ctx);
++ break;
++ case IORING_REGISTER_IOWQ_MAX_WORKERS:
++ ret = -EINVAL;
++ if (!arg || nr_args != 2)
++ break;
++ ret = io_register_iowq_max_workers(ctx, arg);
++ break;
++ default:
++ ret = -EINVAL;
++ break;
++ }
++
++ if (io_register_op_must_quiesce(opcode)) {
++ /* bring the ctx back to life */
++ percpu_ref_reinit(&ctx->refs);
++ reinit_completion(&ctx->ref_comp);
++ }
++ return ret;
++}
++
++SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
++ void __user *, arg, unsigned int, nr_args)
++{
++ struct io_ring_ctx *ctx;
++ long ret = -EBADF;
++ struct fd f;
++
++ if (opcode >= IORING_REGISTER_LAST)
++ return -EINVAL;
++
++ f = fdget(fd);
++ if (!f.file)
++ return -EBADF;
++
++ ret = -EOPNOTSUPP;
++ if (f.file->f_op != &io_uring_fops)
++ goto out_fput;
++
++ ctx = f.file->private_data;
++
++ io_run_task_work();
++
++ mutex_lock(&ctx->uring_lock);
++ ret = __io_uring_register(ctx, opcode, arg, nr_args);
++ mutex_unlock(&ctx->uring_lock);
++ trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs,
++ ctx->cq_ev_fd != NULL, ret);
++out_fput:
++ fdput(f);
++ return ret;
++}
++
++static int __init io_uring_init(void)
++{
++#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
++ BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
++ BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
++} while (0)
++
++#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
++ __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
++ BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
++ BUILD_BUG_SQE_ELEM(0, __u8, opcode);
++ BUILD_BUG_SQE_ELEM(1, __u8, flags);
++ BUILD_BUG_SQE_ELEM(2, __u16, ioprio);
++ BUILD_BUG_SQE_ELEM(4, __s32, fd);
++ BUILD_BUG_SQE_ELEM(8, __u64, off);
++ BUILD_BUG_SQE_ELEM(8, __u64, addr2);
++ BUILD_BUG_SQE_ELEM(16, __u64, addr);
++ BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
++ BUILD_BUG_SQE_ELEM(24, __u32, len);
++ BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
++ BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
++ BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags);
++ BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events);
++ BUILD_BUG_SQE_ELEM(28, __u32, poll32_events);
++ BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, msg_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, accept_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
++ BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
++ BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
++ BUILD_BUG_SQE_ELEM(32, __u64, user_data);
++ BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
++ BUILD_BUG_SQE_ELEM(40, __u16, buf_group);
++ BUILD_BUG_SQE_ELEM(42, __u16, personality);
++ BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
++ BUILD_BUG_SQE_ELEM(44, __u32, file_index);
++
++ BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
++ sizeof(struct io_uring_rsrc_update));
++ BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
++ sizeof(struct io_uring_rsrc_update2));
++
++ /* ->buf_index is u16 */
++ BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
++
++ /* should fit into one byte */
++ BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
++
++ BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
++ BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
++
++ req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
++ SLAB_ACCOUNT);
++ return 0;
++};
++__initcall(io_uring_init);
+diff --git a/ipc/mqueue.c b/ipc/mqueue.c
+index 5becca9be867c..089c34d0732cf 100644
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -45,6 +45,7 @@
+
+ struct mqueue_fs_context {
+ struct ipc_namespace *ipc_ns;
++ bool newns; /* Set if newly created ipc namespace */
+ };
+
+ #define MQUEUE_MAGIC 0x19800202
+@@ -427,6 +428,14 @@ static int mqueue_get_tree(struct fs_context *fc)
+ {
+ struct mqueue_fs_context *ctx = fc->fs_private;
+
++ /*
++ * With a newly created ipc namespace, we don't need to do a search
++ * for an ipc namespace match, but we still need to set s_fs_info.
++ */
++ if (ctx->newns) {
++ fc->s_fs_info = ctx->ipc_ns;
++ return get_tree_nodev(fc, mqueue_fill_super);
++ }
+ return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
+ }
+
+@@ -454,6 +463,10 @@ static int mqueue_init_fs_context(struct fs_context *fc)
+ return 0;
+ }
+
++/*
++ * mq_init_ns() is currently the only caller of mq_create_mount().
++ * So the ns parameter is always a newly created ipc namespace.
++ */
+ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
+ {
+ struct mqueue_fs_context *ctx;
+@@ -465,6 +478,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
+ return ERR_CAST(fc);
+
+ ctx = fc->fs_private;
++ ctx->newns = true;
+ put_ipc_ns(ctx->ipc_ns);
+ ctx->ipc_ns = get_ipc_ns(ns);
+ put_user_ns(fc->user_ns);
+diff --git a/ipc/sem.c b/ipc/sem.c
+index 6693daf4fe112..c1f3ca244a698 100644
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -1964,6 +1964,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
+ */
+ un = lookup_undo(ulp, semid);
+ if (un) {
++ spin_unlock(&ulp->lock);
+ kvfree(new);
+ goto success;
+ }
+@@ -1976,9 +1977,8 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
+ ipc_assert_locked_object(&sma->sem_perm);
+ list_add(&new->list_id, &sma->list_id);
+ un = new;
+-
+-success:
+ spin_unlock(&ulp->lock);
++success:
+ sem_unlock(sma, -1);
+ out:
+ return un;
+@@ -2182,14 +2182,15 @@ long __do_semtimedop(int semid, struct sembuf *sops,
+ * scenarios where we were awakened externally, during the
+ * window between wake_q_add() and wake_up_q().
+ */
++ rcu_read_lock();
+ error = READ_ONCE(queue.status);
+ if (error != -EINTR) {
+ /* see SEM_BARRIER_2 for purpose/pairing */
+ smp_acquire__after_ctrl_dep();
++ rcu_read_unlock();
+ goto out;
+ }
+
+- rcu_read_lock();
+ locknum = sem_lock(sma, sops, nsops);
+
+ if (!ipc_valid_object(&sma->sem_perm))
+diff --git a/ipc/shm.c b/ipc/shm.c
+index ab749be6d8b71..048eb183b24b9 100644
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the kernel */
+ struct pid *shm_lprid;
+ struct ucounts *mlock_ucounts;
+
+- /* The task created the shm object. NULL if the task is dead. */
++ /*
++ * The task created the shm object, for
++ * task_lock(shp->shm_creator)
++ */
+ struct task_struct *shm_creator;
+- struct list_head shm_clist; /* list by creator */
++
++ /*
++ * List by creator. task_lock(->shm_creator) required for read/write.
++ * If list_empty(), then the creator is dead already.
++ */
++ struct list_head shm_clist;
++ struct ipc_namespace *ns;
+ } __randomize_layout;
+
+ /* shm_mode upper byte flags */
+@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
+ struct shmid_kernel *shp;
+
+ shp = container_of(ipcp, struct shmid_kernel, shm_perm);
++ WARN_ON(ns != shp->ns);
+
+ if (shp->shm_nattch) {
+ shp->shm_perm.mode |= SHM_DEST;
+@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head *head)
+ kfree(shp);
+ }
+
+-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
++/*
++ * It has to be called with shp locked.
++ * It must be called before ipc_rmid()
++ */
++static inline void shm_clist_rm(struct shmid_kernel *shp)
+ {
+- list_del(&s->shm_clist);
+- ipc_rmid(&shm_ids(ns), &s->shm_perm);
++ struct task_struct *creator;
++
++ /* ensure that shm_creator does not disappear */
++ rcu_read_lock();
++
++ /*
++ * A concurrent exit_shm may do a list_del_init() as well.
++ * Just do nothing if exit_shm already did the work
++ */
++ if (!list_empty(&shp->shm_clist)) {
++ /*
++ * shp->shm_creator is guaranteed to be valid *only*
++ * if shp->shm_clist is not empty.
++ */
++ creator = shp->shm_creator;
++
++ task_lock(creator);
++ /*
++ * list_del_init() is a nop if the entry was already removed
++ * from the list.
++ */
++ list_del_init(&shp->shm_clist);
++ task_unlock(creator);
++ }
++ rcu_read_unlock();
++}
++
++static inline void shm_rmid(struct shmid_kernel *s)
++{
++ shm_clist_rm(s);
++ ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
+ }
+
+
+@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+ shm_file = shp->shm_file;
+ shp->shm_file = NULL;
+ ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- shm_rmid(ns, shp);
++ shm_rmid(shp);
+ shm_unlock(shp);
+ if (!is_file_hugepages(shm_file))
+ shmem_lock(shm_file, 0, shp->mlock_ucounts);
+@@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+ *
+ * 2) sysctl kernel.shm_rmid_forced is set to 1.
+ */
+-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
++static bool shm_may_destroy(struct shmid_kernel *shp)
+ {
+ return (shp->shm_nattch == 0) &&
+- (ns->shm_rmid_forced ||
++ (shp->ns->shm_rmid_forced ||
+ (shp->shm_perm.mode & SHM_DEST));
+ }
+
+@@ -340,7 +383,7 @@ static void shm_close(struct vm_area_struct *vma)
+ ipc_update_pid(&shp->shm_lprid, task_tgid(current));
+ shp->shm_dtim = ktime_get_real_seconds();
+ shp->shm_nattch--;
+- if (shm_may_destroy(ns, shp))
++ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
+@@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
+ *
+ * As shp->* are changed under rwsem, it's safe to skip shp locking.
+ */
+- if (shp->shm_creator != NULL)
++ if (!list_empty(&shp->shm_clist))
+ return 0;
+
+- if (shm_may_destroy(ns, shp)) {
++ if (shm_may_destroy(shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+@@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
+ /* Locking assumes this will only be called with task == current */
+ void exit_shm(struct task_struct *task)
+ {
+- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+- struct shmid_kernel *shp, *n;
++ for (;;) {
++ struct shmid_kernel *shp;
++ struct ipc_namespace *ns;
+
+- if (list_empty(&task->sysvshm.shm_clist))
+- return;
++ task_lock(task);
++
++ if (list_empty(&task->sysvshm.shm_clist)) {
++ task_unlock(task);
++ break;
++ }
++
++ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
++ shm_clist);
+
+- /*
+- * If kernel.shm_rmid_forced is not set then only keep track of
+- * which shmids are orphaned, so that a later set of the sysctl
+- * can clean them up.
+- */
+- if (!ns->shm_rmid_forced) {
+- down_read(&shm_ids(ns).rwsem);
+- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+- shp->shm_creator = NULL;
+ /*
+- * Only under read lock but we are only called on current
+- * so no entry on the list will be shared.
++ * 1) Get pointer to the ipc namespace. It is worth to say
++ * that this pointer is guaranteed to be valid because
++ * shp lifetime is always shorter than namespace lifetime
++ * in which shp lives.
++ * We taken task_lock it means that shp won't be freed.
+ */
+- list_del(&task->sysvshm.shm_clist);
+- up_read(&shm_ids(ns).rwsem);
+- return;
+- }
++ ns = shp->ns;
+
+- /*
+- * Destroy all already created segments, that were not yet mapped,
+- * and mark any mapped as orphan to cover the sysctl toggling.
+- * Destroy is skipped if shm_may_destroy() returns false.
+- */
+- down_write(&shm_ids(ns).rwsem);
+- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+- shp->shm_creator = NULL;
++ /*
++ * 2) If kernel.shm_rmid_forced is not set then only keep track of
++ * which shmids are orphaned, so that a later set of the sysctl
++ * can clean them up.
++ */
++ if (!ns->shm_rmid_forced)
++ goto unlink_continue;
+
+- if (shm_may_destroy(ns, shp)) {
+- shm_lock_by_ptr(shp);
+- shm_destroy(ns, shp);
++ /*
++ * 3) get a reference to the namespace.
++ * The refcount could be already 0. If it is 0, then
++ * the shm objects will be free by free_ipc_work().
++ */
++ ns = get_ipc_ns_not_zero(ns);
++ if (!ns) {
++unlink_continue:
++ list_del_init(&shp->shm_clist);
++ task_unlock(task);
++ continue;
+ }
+- }
+
+- /* Remove the list head from any segments still attached. */
+- list_del(&task->sysvshm.shm_clist);
+- up_write(&shm_ids(ns).rwsem);
++ /*
++ * 4) get a reference to shp.
++ * This cannot fail: shm_clist_rm() is called before
++ * ipc_rmid(), thus the refcount cannot be 0.
++ */
++ WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
++
++ /*
++ * 5) unlink the shm segment from the list of segments
++ * created by current.
++ * This must be done last. After unlinking,
++ * only the refcounts obtained above prevent IPC_RMID
++ * from destroying the segment or the namespace.
++ */
++ list_del_init(&shp->shm_clist);
++
++ task_unlock(task);
++
++ /*
++ * 6) we have all references
++ * Thus lock & if needed destroy shp.
++ */
++ down_write(&shm_ids(ns).rwsem);
++ shm_lock_by_ptr(shp);
++ /*
++ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
++ * safe to call ipc_rcu_putref here
++ */
++ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
++
++ if (ipc_valid_object(&shp->shm_perm)) {
++ if (shm_may_destroy(shp))
++ shm_destroy(ns, shp);
++ else
++ shm_unlock(shp);
++ } else {
++ /*
++ * Someone else deleted the shp from namespace
++ * idr/kht while we have waited.
++ * Just unlock and continue.
++ */
++ shm_unlock(shp);
++ }
++
++ up_write(&shm_ids(ns).rwsem);
++ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
++ }
+ }
+
+ static vm_fault_t shm_fault(struct vm_fault *vmf)
+@@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
+ if (error < 0)
+ goto no_id;
+
++ shp->ns = ns;
++
++ task_lock(current);
+ list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
++ task_unlock(current);
+
+ /*
+ * shmid gets reported as "inode#" in /proc/pid/maps.
+@@ -1573,7 +1669,8 @@ out_nattch:
+ down_write(&shm_ids(ns).rwsem);
+ shp = shm_lock(ns, shmid);
+ shp->shm_nattch--;
+- if (shm_may_destroy(ns, shp))
++
++ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
+diff --git a/ipc/util.c b/ipc/util.c
+index d48d8cfa1f3fa..fa2d86ef3fb80 100644
+--- a/ipc/util.c
++++ b/ipc/util.c
+@@ -447,8 +447,8 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
+ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+ {
+ if (ipcp->key != IPC_PRIVATE)
+- rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
+- ipc_kht_params);
++ WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
++ ipc_kht_params));
+ }
+
+ /**
+@@ -498,7 +498,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+ {
+ int idx = ipcid_to_idx(ipcp->id);
+
+- idr_remove(&ids->ipcs_idr, idx);
++ WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp);
+ ipc_kht_remove(ids, ipcp);
+ ids->in_use--;
+ ipcp->deleted = true;
+diff --git a/kernel/Makefile b/kernel/Makefile
+index 4df609be42d07..599cb926449a6 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -59,7 +59,7 @@ obj-$(CONFIG_FREEZER) += freezer.o
+ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-y += time/
+-obj-$(CONFIG_FUTEX) += futex.o
++obj-$(CONFIG_FUTEX) += futex/
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += smp.o
+ ifneq ($(CONFIG_SMP),y)
+@@ -113,7 +113,8 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o
+ obj-$(CONFIG_BPF) += bpf/
+ obj-$(CONFIG_KCSAN) += kcsan/
+ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
+-obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
++obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
++obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
+ obj-$(CONFIG_CFI_CLANG) += cfi.o
+
+ obj-$(CONFIG_PERF_EVENTS) += events/
+diff --git a/kernel/acct.c b/kernel/acct.c
+index 23a7ab8e6cbc8..2b5cc63eb295b 100644
+--- a/kernel/acct.c
++++ b/kernel/acct.c
+@@ -331,6 +331,8 @@ static comp_t encode_comp_t(unsigned long value)
+ exp++;
+ }
+
++ if (exp > (((comp_t) ~0U) >> MANTSIZE))
++ return (comp_t) ~0U;
+ /*
+ * Clean it up and polish it off.
+ */
+diff --git a/kernel/async.c b/kernel/async.c
+index b8d7a663497f9..b2c4ba5686ee4 100644
+--- a/kernel/async.c
++++ b/kernel/async.c
+@@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data,
+ atomic_inc(&entry_count);
+ spin_unlock_irqrestore(&async_lock, flags);
+
+- /* mark that this task has queued an async job, used by module init */
+- current->flags |= PF_USED_ASYNC;
+-
+ /* schedule for execution */
+ queue_work_node(node, system_unbound_wq, &entry->work);
+
+diff --git a/kernel/audit.c b/kernel/audit.c
+index 121d37e700a62..94ded5de91317 100644
+--- a/kernel/audit.c
++++ b/kernel/audit.c
+@@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb)
+ /**
+ * kauditd_rehold_skb - Handle a audit record send failure in the hold queue
+ * @skb: audit record
++ * @error: error code (unused)
+ *
+ * Description:
+ * This should only be used by the kauditd_thread when it fails to flush the
+ * hold queue.
+ */
+-static void kauditd_rehold_skb(struct sk_buff *skb)
++static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error)
+ {
+- /* put the record back in the queue at the same place */
+- skb_queue_head(&audit_hold_queue, skb);
++ /* put the record back in the queue */
++ skb_queue_tail(&audit_hold_queue, skb);
+ }
+
+ /**
+ * kauditd_hold_skb - Queue an audit record, waiting for auditd
+ * @skb: audit record
++ * @error: error code
+ *
+ * Description:
+ * Queue the audit record, waiting for an instance of auditd. When this
+@@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
+ * and queue it, if we have room. If we want to hold on to the record, but we
+ * don't have room, record a record lost message.
+ */
+-static void kauditd_hold_skb(struct sk_buff *skb)
++static void kauditd_hold_skb(struct sk_buff *skb, int error)
+ {
+ /* at this point it is uncertain if we will ever send this to auditd so
+ * try to send the message via printk before we go any further */
+ kauditd_printk_skb(skb);
+
+ /* can we just silently drop the message? */
+- if (!audit_default) {
+- kfree_skb(skb);
+- return;
++ if (!audit_default)
++ goto drop;
++
++ /* the hold queue is only for when the daemon goes away completely,
++ * not -EAGAIN failures; if we are in a -EAGAIN state requeue the
++ * record on the retry queue unless it's full, in which case drop it
++ */
++ if (error == -EAGAIN) {
++ if (!audit_backlog_limit ||
++ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
++ skb_queue_tail(&audit_retry_queue, skb);
++ return;
++ }
++ audit_log_lost("kauditd retry queue overflow");
++ goto drop;
+ }
+
+- /* if we have room, queue the message */
++ /* if we have room in the hold queue, queue the message */
+ if (!audit_backlog_limit ||
+ skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
+ skb_queue_tail(&audit_hold_queue, skb);
+@@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb)
+
+ /* we have no other options - drop the message */
+ audit_log_lost("kauditd hold queue overflow");
++drop:
+ kfree_skb(skb);
+ }
+
+ /**
+ * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
+ * @skb: audit record
++ * @error: error code (unused)
+ *
+ * Description:
+ * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
+ * but for some reason we are having problems sending it audit records so
+ * queue the given record and attempt to resend.
+ */
+-static void kauditd_retry_skb(struct sk_buff *skb)
++static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error)
+ {
+- /* NOTE: because records should only live in the retry queue for a
+- * short period of time, before either being sent or moved to the hold
+- * queue, we don't currently enforce a limit on this queue */
+- skb_queue_tail(&audit_retry_queue, skb);
++ if (!audit_backlog_limit ||
++ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
++ skb_queue_tail(&audit_retry_queue, skb);
++ return;
++ }
++
++ /* we have to drop the record, send it via printk as a last effort */
++ kauditd_printk_skb(skb);
++ audit_log_lost("kauditd retry queue overflow");
++ kfree_skb(skb);
+ }
+
+ /**
+@@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac)
+ /* flush the retry queue to the hold queue, but don't touch the main
+ * queue since we need to process that normally for multicast */
+ while ((skb = skb_dequeue(&audit_retry_queue)))
+- kauditd_hold_skb(skb);
++ kauditd_hold_skb(skb, -ECONNREFUSED);
+ }
+
+ /**
+@@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
+ struct sk_buff_head *queue,
+ unsigned int retry_limit,
+ void (*skb_hook)(struct sk_buff *skb),
+- void (*err_hook)(struct sk_buff *skb))
++ void (*err_hook)(struct sk_buff *skb, int error))
+ {
+ int rc = 0;
+- struct sk_buff *skb;
+- static unsigned int failed = 0;
++ struct sk_buff *skb = NULL;
++ struct sk_buff *skb_tail;
++ unsigned int failed = 0;
+
+ /* NOTE: kauditd_thread takes care of all our locking, we just use
+ * the netlink info passed to us (e.g. sk and portid) */
+
+- while ((skb = skb_dequeue(queue))) {
++ skb_tail = skb_peek_tail(queue);
++ while ((skb != skb_tail) && (skb = skb_dequeue(queue))) {
+ /* call the skb_hook for each skb we touch */
+ if (skb_hook)
+ (*skb_hook)(skb);
+@@ -731,36 +755,34 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
+ /* can we send to anyone via unicast? */
+ if (!sk) {
+ if (err_hook)
+- (*err_hook)(skb);
++ (*err_hook)(skb, -ECONNREFUSED);
+ continue;
+ }
+
++retry:
+ /* grab an extra skb reference in case of error */
+ skb_get(skb);
+ rc = netlink_unicast(sk, skb, portid, 0);
+ if (rc < 0) {
+- /* fatal failure for our queue flush attempt? */
++ /* send failed - try a few times unless fatal error */
+ if (++failed >= retry_limit ||
+ rc == -ECONNREFUSED || rc == -EPERM) {
+- /* yes - error processing for the queue */
+ sk = NULL;
+ if (err_hook)
+- (*err_hook)(skb);
+- if (!skb_hook)
+- goto out;
+- /* keep processing with the skb_hook */
++ (*err_hook)(skb, rc);
++ if (rc == -EAGAIN)
++ rc = 0;
++ /* continue to drain the queue */
+ continue;
+ } else
+- /* no - requeue to preserve ordering */
+- skb_queue_head(queue, skb);
++ goto retry;
+ } else {
+- /* it worked - drop the extra reference and continue */
++ /* skb sent - drop the extra reference and continue */
+ consume_skb(skb);
+ failed = 0;
+ }
+ }
+
+-out:
+ return (rc >= 0 ? 0 : rc);
+ }
+
+@@ -1542,6 +1564,20 @@ static void audit_receive(struct sk_buff *skb)
+ nlh = nlmsg_next(nlh, &len);
+ }
+ audit_ctl_unlock();
++
++ /* can't block with the ctrl lock, so penalize the sender now */
++ if (audit_backlog_limit &&
++ (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
++ DECLARE_WAITQUEUE(wait, current);
++
++ /* wake kauditd to try and flush the queue */
++ wake_up_interruptible(&kauditd_wait);
++
++ add_wait_queue_exclusive(&audit_backlog_wait, &wait);
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(audit_backlog_wait_time);
++ remove_wait_queue(&audit_backlog_wait, &wait);
++ }
+ }
+
+ /* Log information about who is connecting to the audit multicast socket */
+@@ -1609,7 +1645,8 @@ static int __net_init audit_net_init(struct net *net)
+ audit_panic("cannot initialize netlink socket in namespace");
+ return -ENOMEM;
+ }
+- aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
++ /* limit the timeout in case auditd is blocked/stopped */
++ aunet->sk->sk_sndtimeo = HZ / 10;
+
+ return 0;
+ }
+@@ -1825,7 +1862,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
+ * task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
+ * using a PID anchored in the caller's namespace
+ * 2. generator holding the audit_cmd_mutex - we don't want to block
+- * while holding the mutex */
++ * while holding the mutex, although we do penalize the sender
++ * later in audit_receive() when it is safe to block
++ */
+ if (!(auditd_test_task(current) || audit_ctl_owner_current())) {
+ long stime = audit_backlog_wait_time;
+
+diff --git a/kernel/audit.h b/kernel/audit.h
+index d6a2c899a8dbf..b2ef4c0d3ec03 100644
+--- a/kernel/audit.h
++++ b/kernel/audit.h
+@@ -194,6 +194,10 @@ struct audit_context {
+ struct {
+ char *name;
+ } module;
++ struct {
++ struct audit_ntp_data ntp_data;
++ struct timespec64 tk_injoffset;
++ } time;
+ };
+ int fds[2];
+ struct audit_proctitle proctitle;
+diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
+index 60739d5e3373f..c428312938e95 100644
+--- a/kernel/audit_fsnotify.c
++++ b/kernel/audit_fsnotify.c
+@@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
+
+ ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true);
+ if (ret < 0) {
++ audit_mark->path = NULL;
+ fsnotify_put_mark(&audit_mark->mark);
+ audit_mark = ERR_PTR(ret);
+ }
+diff --git a/kernel/auditsc.c b/kernel/auditsc.c
+index b1cb1dbf7417f..e7fedf504f760 100644
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -1219,6 +1219,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
+ from_kuid(&init_user_ns, name->fcap.rootid));
+ }
+
++static void audit_log_time(struct audit_context *context, struct audit_buffer **ab)
++{
++ const struct audit_ntp_data *ntp = &context->time.ntp_data;
++ const struct timespec64 *tk = &context->time.tk_injoffset;
++ static const char * const ntp_name[] = {
++ "offset",
++ "freq",
++ "status",
++ "tai",
++ "tick",
++ "adjust",
++ };
++ int type;
++
++ if (context->type == AUDIT_TIME_ADJNTPVAL) {
++ for (type = 0; type < AUDIT_NTP_NVALS; type++) {
++ if (ntp->vals[type].newval != ntp->vals[type].oldval) {
++ if (!*ab) {
++ *ab = audit_log_start(context,
++ GFP_KERNEL,
++ AUDIT_TIME_ADJNTPVAL);
++ if (!*ab)
++ return;
++ }
++ audit_log_format(*ab, "op=%s old=%lli new=%lli",
++ ntp_name[type],
++ ntp->vals[type].oldval,
++ ntp->vals[type].newval);
++ audit_log_end(*ab);
++ *ab = NULL;
++ }
++ }
++ }
++ if (tk->tv_sec != 0 || tk->tv_nsec != 0) {
++ if (!*ab) {
++ *ab = audit_log_start(context, GFP_KERNEL,
++ AUDIT_TIME_INJOFFSET);
++ if (!*ab)
++ return;
++ }
++ audit_log_format(*ab, "sec=%lli nsec=%li",
++ (long long)tk->tv_sec, tk->tv_nsec);
++ audit_log_end(*ab);
++ *ab = NULL;
++ }
++}
++
+ static void show_special(struct audit_context *context, int *call_panic)
+ {
+ struct audit_buffer *ab;
+@@ -1327,6 +1374,11 @@ static void show_special(struct audit_context *context, int *call_panic)
+ audit_log_format(ab, "(null)");
+
+ break;
++ case AUDIT_TIME_ADJNTPVAL:
++ case AUDIT_TIME_INJOFFSET:
++ /* this call deviates from the rest, eating the buffer */
++ audit_log_time(context, &ab);
++ break;
+ }
+ audit_log_end(ab);
+ }
+@@ -2148,6 +2200,8 @@ void __audit_inode_child(struct inode *parent,
+ }
+ }
+
++ cond_resched();
++
+ /* is there a matching child entry? */
+ list_for_each_entry(n, &context->names_list, list) {
+ /* can only match entries that have a name */
+@@ -2564,31 +2618,26 @@ void __audit_fanotify(unsigned int response)
+
+ void __audit_tk_injoffset(struct timespec64 offset)
+ {
+- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET,
+- "sec=%lli nsec=%li",
+- (long long)offset.tv_sec, offset.tv_nsec);
+-}
+-
+-static void audit_log_ntp_val(const struct audit_ntp_data *ad,
+- const char *op, enum audit_ntp_type type)
+-{
+- const struct audit_ntp_val *val = &ad->vals[type];
+-
+- if (val->newval == val->oldval)
+- return;
++ struct audit_context *context = audit_context();
+
+- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL,
+- "op=%s old=%lli new=%lli", op, val->oldval, val->newval);
++ /* only set type if not already set by NTP */
++ if (!context->type)
++ context->type = AUDIT_TIME_INJOFFSET;
++ memcpy(&context->time.tk_injoffset, &offset, sizeof(offset));
+ }
+
+ void __audit_ntp_log(const struct audit_ntp_data *ad)
+ {
+- audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET);
+- audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ);
+- audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS);
+- audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI);
+- audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK);
+- audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST);
++ struct audit_context *context = audit_context();
++ int type;
++
++ for (type = 0; type < AUDIT_NTP_NVALS; type++)
++ if (ad->vals[type].newval != ad->vals[type].oldval) {
++ /* unconditionally set type, overwriting TK */
++ context->type = AUDIT_TIME_ADJNTPVAL;
++ memcpy(&context->time.ntp_data, ad, sizeof(*ad));
++ break;
++ }
+ }
+
+ void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
+diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
+index 447def5405444..88014cd31b28a 100644
+--- a/kernel/bpf/arraymap.c
++++ b/kernel/bpf/arraymap.c
+@@ -620,6 +620,11 @@ static int bpf_iter_init_array_map(void *priv_data,
+ seq_info->percpu_value_buf = value_buf;
+ }
+
++ /* bpf_iter_attach_map() acquires a map uref, and the uref may be
++ * released before or in the middle of iterating map elements, so
++ * acquire an extra map uref for iterator.
++ */
++ bpf_map_inc_with_uref(map);
+ seq_info->map = map;
+ return 0;
+ }
+@@ -628,6 +633,7 @@ static void bpf_iter_fini_array_map(void *priv_data)
+ {
+ struct bpf_iter_seq_array_map_info *seq_info = priv_data;
+
++ bpf_map_put_with_uref(seq_info->map);
+ kfree(seq_info->percpu_value_buf);
+ }
+
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index b305270b7a4bd..5ef8eaf4985ed 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -48,11 +48,21 @@ owner_storage(struct bpf_local_storage_map *smap, void *owner)
+ return map->ops->map_owner_storage_ptr(owner);
+ }
+
++static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem)
++{
++ return !hlist_unhashed_lockless(&selem->snode);
++}
++
+ static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
+ {
+ return !hlist_unhashed(&selem->snode);
+ }
+
++static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
++{
++ return !hlist_unhashed_lockless(&selem->map_node);
++}
++
+ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
+ {
+ return !hlist_unhashed(&selem->map_node);
+@@ -71,7 +81,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (selem) {
+ if (value)
+- memcpy(SDATA(selem)->data, value, smap->map.value_size);
++ copy_map_value(&smap->map, SDATA(selem)->data, value);
+ return selem;
+ }
+
+@@ -142,7 +152,7 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
+ bool free_local_storage = false;
+ unsigned long flags;
+
+- if (unlikely(!selem_linked_to_storage(selem)))
++ if (unlikely(!selem_linked_to_storage_lockless(selem)))
+ /* selem has already been unlinked from sk */
+ return;
+
+@@ -170,7 +180,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
+
+- if (unlikely(!selem_linked_to_map(selem)))
++ if (unlikely(!selem_linked_to_map_lockless(selem)))
+ /* selem has already be unlinked from smap */
+ return;
+
+@@ -373,7 +383,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+ err = check_flags(old_sdata, map_flags);
+ if (err)
+ return ERR_PTR(err);
+- if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
++ if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
+ copy_map_value_locked(&smap->map, old_sdata->data,
+ value, false);
+ return old_sdata;
+@@ -506,11 +516,11 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem, map_node))) {
+ if (busy_counter) {
+ migrate_disable();
+- __this_cpu_inc(*busy_counter);
++ this_cpu_inc(*busy_counter);
+ }
+ bpf_selem_unlink(selem);
+ if (busy_counter) {
+- __this_cpu_dec(*busy_counter);
++ this_cpu_dec(*busy_counter);
+ migrate_enable();
+ }
+ cond_resched_rcu();
+diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
+index ebfa8bc908923..6b7bfce239158 100644
+--- a/kernel/bpf/bpf_task_storage.c
++++ b/kernel/bpf/bpf_task_storage.c
+@@ -25,20 +25,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy);
+ static void bpf_task_storage_lock(void)
+ {
+ migrate_disable();
+- __this_cpu_inc(bpf_task_storage_busy);
++ this_cpu_inc(bpf_task_storage_busy);
+ }
+
+ static void bpf_task_storage_unlock(void)
+ {
+- __this_cpu_dec(bpf_task_storage_busy);
++ this_cpu_dec(bpf_task_storage_busy);
+ migrate_enable();
+ }
+
+ static bool bpf_task_storage_trylock(void)
+ {
+ migrate_disable();
+- if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
+- __this_cpu_dec(bpf_task_storage_busy);
++ if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
++ this_cpu_dec(bpf_task_storage_busy);
+ migrate_enable();
+ return false;
+ }
+diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
+index dfe61df4f974d..5d4bea53ac1f8 100644
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -633,13 +633,12 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
+ return offset < btf->hdr.str_len;
+ }
+
+-static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
++static bool __btf_name_char_ok(char c, bool first)
+ {
+ if ((first ? !isalpha(c) :
+ !isalnum(c)) &&
+ c != '_' &&
+- ((c == '.' && !dot_ok) ||
+- c != '.'))
++ c != '.')
+ return false;
+ return true;
+ }
+@@ -656,20 +655,20 @@ static const char *btf_str_by_offset(const struct btf *btf, u32 offset)
+ return NULL;
+ }
+
+-static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
++static bool __btf_name_valid(const struct btf *btf, u32 offset)
+ {
+ /* offset must be valid */
+ const char *src = btf_str_by_offset(btf, offset);
+ const char *src_limit;
+
+- if (!__btf_name_char_ok(*src, true, dot_ok))
++ if (!__btf_name_char_ok(*src, true))
+ return false;
+
+ /* set a limit on identifier length */
+ src_limit = src + KSYM_NAME_LEN;
+ src++;
+ while (*src && src < src_limit) {
+- if (!__btf_name_char_ok(*src, false, dot_ok))
++ if (!__btf_name_char_ok(*src, false))
+ return false;
+ src++;
+ }
+@@ -677,17 +676,14 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
+ return !*src;
+ }
+
+-/* Only C-style identifier is permitted. This can be relaxed if
+- * necessary.
+- */
+ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+ {
+- return __btf_name_valid(btf, offset, false);
++ return __btf_name_valid(btf, offset);
+ }
+
+ static bool btf_name_valid_section(const struct btf *btf, u32 offset)
+ {
+- return __btf_name_valid(btf, offset, true);
++ return __btf_name_valid(btf, offset);
+ }
+
+ static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
+@@ -2983,7 +2979,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
+ if (v->next_member) {
+ const struct btf_type *last_member_type;
+ const struct btf_member *last_member;
+- u16 last_member_type_id;
++ u32 last_member_type_id;
+
+ last_member = btf_type_member(v->t) + v->next_member - 1;
+ last_member_type_id = last_member->type;
+@@ -3536,7 +3532,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env,
+ }
+
+ if (!t->name_off ||
+- !__btf_name_valid(env->btf, t->name_off, true)) {
++ !__btf_name_valid(env->btf, t->name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+@@ -3655,6 +3651,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env,
+ struct btf *btf = env->btf;
+ u16 i;
+
++ env->resolve_mode = RESOLVE_TBD;
+ for_each_vsi_from(i, v->next_member, v->t, vsi) {
+ u32 var_type_id = vsi->type, type_id, type_size = 0;
+ const struct btf_type *var_type = btf_type_by_id(env->btf,
+@@ -3864,6 +3861,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
+ break;
+ }
+
++ if (btf_type_is_resolve_source_only(arg_type)) {
++ btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
++ return -EINVAL;
++ }
++
+ if (args[i].name_off &&
+ (!btf_name_offset_valid(btf, args[i].name_off) ||
+ !btf_name_valid_identifier(btf, args[i].name_off))) {
+@@ -4332,8 +4334,7 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
+ log->len_total = log_size;
+
+ /* log attributes have to be sane */
+- if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
+- !log->level || !log->ubuf) {
++ if (!bpf_verifier_log_attr_valid(log)) {
+ err = -EINVAL;
+ goto errout;
+ }
+@@ -4464,6 +4465,7 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ if (!ctx_struct)
+ /* should not happen */
+ return NULL;
++again:
+ ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
+ if (!ctx_tname) {
+ /* should not happen */
+@@ -4477,8 +4479,16 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ * int socket_filter_bpf_prog(struct __sk_buff *skb)
+ * { // no fields of skb are ever used }
+ */
+- if (strcmp(ctx_tname, tname))
+- return NULL;
++ if (strcmp(ctx_tname, tname)) {
++ /* bpf_user_pt_regs_t is a typedef, so resolve it to
++ * underlying struct and check name again
++ */
++ if (!btf_type_is_modifier(ctx_struct))
++ return NULL;
++ while (btf_type_is_modifier(ctx_struct))
++ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_struct->type);
++ goto again;
++ }
+ return ctx_type;
+ }
+
+@@ -4801,10 +4811,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
+ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+ const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
++ u32 type, flag;
+
+- if (ctx_arg_info->offset == off &&
+- (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
+- ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
++ type = base_type(ctx_arg_info->reg_type);
++ flag = type_flag(ctx_arg_info->reg_type);
++ if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
++ (flag & PTR_MAYBE_NULL)) {
+ info->reg_type = ctx_arg_info->reg_type;
+ return true;
+ }
+@@ -5440,6 +5452,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs,
+ bool ptr_to_mem_ok)
+ {
++ enum bpf_prog_type prog_type = env->prog->type == BPF_PROG_TYPE_EXT ?
++ env->prog->aux->dst_prog->type : env->prog->type;
+ struct bpf_verifier_log *log = &env->log;
+ const char *func_name, *ref_tname;
+ const struct btf_type *t, *ref_t;
+@@ -5509,9 +5523,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
+ if (reg->type == PTR_TO_BTF_ID) {
+ reg_btf = reg->btf;
+ reg_ref_id = reg->btf_id;
+- } else if (reg2btf_ids[reg->type]) {
++ } else if (reg2btf_ids[base_type(reg->type)]) {
+ reg_btf = btf_vmlinux;
+- reg_ref_id = *reg2btf_ids[reg->type];
++ reg_ref_id = *reg2btf_ids[base_type(reg->type)];
+ } else {
+ bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n",
+ func_name, i,
+@@ -5532,8 +5546,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
+ reg_ref_tname);
+ return -EINVAL;
+ }
+- } else if (btf_get_prog_ctx_type(log, btf, t,
+- env->prog->type, i)) {
++ } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
+ /* If function expects ctx type in BTF check that caller
+ * is passing PTR_TO_CTX.
+ */
+@@ -5718,7 +5731,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
+ return -EINVAL;
+ }
+
+- reg->type = PTR_TO_MEM_OR_NULL;
++ reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
+ reg->id = ++env->id_gen;
+
+ continue;
+@@ -6007,12 +6020,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
+ return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
+ }
+
++enum {
++ BTF_MODULE_F_LIVE = (1 << 0),
++};
++
+ #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ struct btf_module {
+ struct list_head list;
+ struct module *module;
+ struct btf *btf;
+ struct bin_attribute *sysfs_attr;
++ int flags;
+ };
+
+ static LIST_HEAD(btf_modules);
+@@ -6038,7 +6056,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
+ int err = 0;
+
+ if (mod->btf_data_size == 0 ||
+- (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
++ (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
++ op != MODULE_STATE_GOING))
+ goto out;
+
+ switch (op) {
+@@ -6095,6 +6114,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
+ btf_mod->sysfs_attr = attr;
+ }
+
++ break;
++ case MODULE_STATE_LIVE:
++ mutex_lock(&btf_module_mutex);
++ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
++ if (btf_mod->module != module)
++ continue;
++
++ btf_mod->flags |= BTF_MODULE_F_LIVE;
++ break;
++ }
++ mutex_unlock(&btf_module_mutex);
+ break;
+ case MODULE_STATE_GOING:
+ mutex_lock(&btf_module_mutex);
+@@ -6141,7 +6171,12 @@ struct module *btf_try_get_module(const struct btf *btf)
+ if (btf_mod->btf != btf)
+ continue;
+
+- if (try_module_get(btf_mod->module))
++ /* We must only consider module whose __init routine has
++ * finished, hence we must check for BTF_MODULE_F_LIVE flag,
++ * which is set from the notifier callback for
++ * MODULE_STATE_LIVE.
++ */
++ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
+ res = btf_mod->module;
+
+ break;
+@@ -6208,7 +6243,7 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
+ .func = bpf_btf_find_by_name_kind,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+- .arg1_type = ARG_PTR_TO_MEM,
++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
+index 03145d45e3d5b..297569e5c6399 100644
+--- a/kernel/bpf/cgroup.c
++++ b/kernel/bpf/cgroup.c
+@@ -667,6 +667,62 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
+ return ERR_PTR(-ENOENT);
+ }
+
++/**
++ * purge_effective_progs() - After compute_effective_progs fails to alloc new
++ * cgrp->bpf.inactive table we can recover by
++ * recomputing the array in place.
++ *
++ * @cgrp: The cgroup which descendants to travers
++ * @prog: A program to detach or NULL
++ * @link: A link to detach or NULL
++ * @atype: Type of detach operation
++ */
++static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
++ struct bpf_cgroup_link *link,
++ enum cgroup_bpf_attach_type atype)
++{
++ struct cgroup_subsys_state *css;
++ struct bpf_prog_array *progs;
++ struct bpf_prog_list *pl;
++ struct list_head *head;
++ struct cgroup *cg;
++ int pos;
++
++ /* recompute effective prog array in place */
++ css_for_each_descendant_pre(css, &cgrp->self) {
++ struct cgroup *desc = container_of(css, struct cgroup, self);
++
++ if (percpu_ref_is_zero(&desc->bpf.refcnt))
++ continue;
++
++ /* find position of link or prog in effective progs array */
++ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
++ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
++ continue;
++
++ head = &cg->bpf.progs[atype];
++ list_for_each_entry(pl, head, node) {
++ if (!prog_list_prog(pl))
++ continue;
++ if (pl->prog == prog && pl->link == link)
++ goto found;
++ pos++;
++ }
++ }
++
++ /* no link or prog match, skip the cgroup of this layer */
++ continue;
++found:
++ progs = rcu_dereference_protected(
++ desc->bpf.effective[atype],
++ lockdep_is_held(&cgroup_mutex));
++
++ /* Remove the program from the array */
++ WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
++ "Failed to purge a prog from array at index %d", pos);
++ }
++}
++
+ /**
+ * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
+ * propagate the change to descendants
+@@ -686,7 +742,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ struct bpf_prog_list *pl;
+ struct list_head *progs;
+ u32 flags;
+- int err;
+
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+@@ -708,9 +763,12 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ pl->prog = NULL;
+ pl->link = NULL;
+
+- err = update_effective_progs(cgrp, atype);
+- if (err)
+- goto cleanup;
++ if (update_effective_progs(cgrp, atype)) {
++ /* if update effective array failed replace the prog with a dummy prog*/
++ pl->prog = old_prog;
++ pl->link = link;
++ purge_effective_progs(cgrp, old_prog, link, atype);
++ }
+
+ /* now can actually delete it from this cgroup list */
+ list_del(&pl->node);
+@@ -722,12 +780,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ bpf_prog_put(old_prog);
+ static_branch_dec(&cgroup_bpf_enabled_key[atype]);
+ return 0;
+-
+-cleanup:
+- /* restore back prog or link */
+- pl->prog = old_prog;
+- pl->link = link;
+- return err;
+ }
+
+ /* Must be called with cgroup_mutex held to avoid races. */
+@@ -1429,6 +1481,12 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
+ ret = 1;
+ } else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
+ /* optlen is out of bounds */
++ if (*optlen > PAGE_SIZE && ctx.optlen >= 0) {
++ pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
++ ctx.optlen, max_optlen);
++ ret = 0;
++ goto out;
++ }
+ ret = -EFAULT;
+ } else {
+ /* optlen within bounds, run kernel handler */
+@@ -1484,6 +1542,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+ .optname = optname,
+ .retval = retval,
+ };
++ int orig_optlen;
+ int ret;
+
+ /* Opportunistic check to see whether we have any BPF program
+@@ -1493,6 +1552,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+ if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
+ return retval;
+
++ orig_optlen = max_optlen;
+ ctx.optlen = max_optlen;
+
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
+@@ -1516,6 +1576,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+ ret = -EFAULT;
+ goto out;
+ }
++ orig_optlen = ctx.optlen;
+
+ if (copy_from_user(ctx.optval, optval,
+ min(ctx.optlen, max_optlen)) != 0) {
+@@ -1534,7 +1595,13 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+ goto out;
+ }
+
+- if (ctx.optlen > max_optlen || ctx.optlen < 0) {
++ if (optval && (ctx.optlen > max_optlen || ctx.optlen < 0)) {
++ if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) {
++ pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
++ ctx.optlen, max_optlen);
++ ret = retval;
++ goto out;
++ }
+ ret = -EFAULT;
+ goto out;
+ }
+@@ -1548,8 +1615,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+ }
+
+ if (ctx.optlen != 0) {
+- if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
+- put_user(ctx.optlen, optlen)) {
++ if (optval && copy_to_user(optval, ctx.optval, ctx.optlen)) {
++ ret = -EFAULT;
++ goto out;
++ }
++ if (put_user(ctx.optlen, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
+@@ -1753,7 +1823,7 @@ static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ };
+
+@@ -1773,6 +1843,8 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ return &bpf_sysctl_get_new_value_proto;
+ case BPF_FUNC_sysctl_set_new_value:
+ return &bpf_sysctl_set_new_value_proto;
++ case BPF_FUNC_ktime_get_coarse_ns:
++ return &bpf_ktime_get_coarse_ns_proto;
+ default:
+ return cgroup_base_func_proto(func_id, prog);
+ }
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index 6e3ae90ad107a..f7c27c1cc593b 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -32,6 +32,7 @@
+ #include <linux/perf_event.h>
+ #include <linux/extable.h>
+ #include <linux/log2.h>
++#include <linux/nospec.h>
+
+ #include <asm/barrier.h>
+ #include <asm/unaligned.h>
+@@ -66,11 +67,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
+ {
+ u8 *ptr = NULL;
+
+- if (k >= SKF_NET_OFF)
++ if (k >= SKF_NET_OFF) {
+ ptr = skb_network_header(skb) + k - SKF_NET_OFF;
+- else if (k >= SKF_LL_OFF)
++ } else if (k >= SKF_LL_OFF) {
++ if (unlikely(!skb_mac_header_was_set(skb)))
++ return NULL;
+ ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
+-
++ }
+ if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
+ return ptr;
+
+@@ -389,6 +392,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
+ i = end_new;
+ insn = prog->insnsi + end_old;
+ }
++ if (bpf_pseudo_func(insn)) {
++ ret = bpf_adj_delta_to_imm(insn, pos, end_old,
++ end_new, i, probe_pass);
++ if (ret)
++ return ret;
++ continue;
++ }
+ code = insn->code;
+ if ((BPF_CLASS(code) != BPF_JMP &&
+ BPF_CLASS(code) != BPF_JMP32) ||
+@@ -819,7 +829,7 @@ static int __init bpf_jit_charge_init(void)
+ {
+ /* Only used as heuristic here to derive limit. */
+ bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
+- bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
++ bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1,
+ PAGE_SIZE), LONG_MAX);
+ return 0;
+ }
+@@ -1639,9 +1649,7 @@ out:
+ * reuse preexisting logic from Spectre v1 mitigation that
+ * happens to produce the required code on x86 for v4 as well.
+ */
+-#ifdef CONFIG_X86
+ barrier_nospec();
+-#endif
+ CONT;
+ #define LDST(SIZEOP, SIZE) \
+ STX_MEM_##SIZEOP: \
+@@ -1652,6 +1660,11 @@ out:
+ CONT; \
+ LDX_MEM_##SIZEOP: \
+ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
++ CONT; \
++ LDX_PROBE_MEM_##SIZEOP: \
++ bpf_probe_read_kernel(&DST, sizeof(SIZE), \
++ (const void *)(long) (SRC + insn->off)); \
++ DST = *((SIZE *)&DST); \
+ CONT;
+
+ LDST(B, u8)
+@@ -1659,15 +1672,6 @@ out:
+ LDST(W, u32)
+ LDST(DW, u64)
+ #undef LDST
+-#define LDX_PROBE(SIZEOP, SIZE) \
+- LDX_PROBE_MEM_##SIZEOP: \
+- bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \
+- CONT;
+- LDX_PROBE(B, 1)
+- LDX_PROBE(H, 2)
+- LDX_PROBE(W, 4)
+- LDX_PROBE(DW, 8)
+-#undef LDX_PROBE
+
+ #define ATOMIC_ALU_OP(BOP, KOP) \
+ case BOP: \
+diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
+index 585b2b77ccc4f..8d1c4b3ee7604 100644
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -26,6 +26,7 @@
+ #include <linux/workqueue.h>
+ #include <linux/kthread.h>
+ #include <linux/capability.h>
++#include <linux/completion.h>
+ #include <trace/events/xdp.h>
+
+ #include <linux/netdevice.h> /* netif_receive_skb_list */
+@@ -70,6 +71,7 @@ struct bpf_cpu_map_entry {
+ struct rcu_head rcu;
+
+ struct work_struct kthread_stop_wq;
++ struct completion kthread_running;
+ };
+
+ struct bpf_cpu_map {
+@@ -126,22 +128,6 @@ static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+ atomic_inc(&rcpu->refcnt);
+ }
+
+-/* called from workqueue, to workaround syscall using preempt_disable */
+-static void cpu_map_kthread_stop(struct work_struct *work)
+-{
+- struct bpf_cpu_map_entry *rcpu;
+-
+- rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
+-
+- /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
+- * as it waits until all in-flight call_rcu() callbacks complete.
+- */
+- rcu_barrier();
+-
+- /* kthread_stop will wake_up_process and wait for it to complete */
+- kthread_stop(rcpu->kthread);
+-}
+-
+ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
+ {
+ /* The tear-down procedure should have made sure that queue is
+@@ -149,11 +135,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
+ * invoked cpu_map_kthread_stop(). Catch any broken behaviour
+ * gracefully and warn once.
+ */
+- struct xdp_frame *xdpf;
++ void *ptr;
+
+- while ((xdpf = ptr_ring_consume(ring)))
+- if (WARN_ON_ONCE(xdpf))
+- xdp_return_frame(xdpf);
++ while ((ptr = ptr_ring_consume(ring))) {
++ WARN_ON_ONCE(1);
++ if (unlikely(__ptr_test_bit(0, &ptr))) {
++ __ptr_clear_bit(0, &ptr);
++ kfree_skb(ptr);
++ continue;
++ }
++ xdp_return_frame(ptr);
++ }
+ }
+
+ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+@@ -169,6 +161,22 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+ }
+ }
+
++/* called from workqueue, to workaround syscall using preempt_disable */
++static void cpu_map_kthread_stop(struct work_struct *work)
++{
++ struct bpf_cpu_map_entry *rcpu;
++
++ rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
++
++ /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
++ * as it waits until all in-flight call_rcu() callbacks complete.
++ */
++ rcu_barrier();
++
++ /* kthread_stop will wake_up_process and wait for it to complete */
++ kthread_stop(rcpu->kthread);
++}
++
+ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+ struct list_head *listp,
+ struct xdp_cpumap_stats *stats)
+@@ -294,11 +302,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
+ return nframes;
+ }
+
+-
+ static int cpu_map_kthread_run(void *data)
+ {
+ struct bpf_cpu_map_entry *rcpu = data;
+
++ complete(&rcpu->kthread_running);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /* When kthread gives stop order, then rcpu have been disconnected
+@@ -461,6 +469,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
+ goto free_ptr_ring;
+
+ /* Setup kthread */
++ init_completion(&rcpu->kthread_running);
+ rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
+ "cpumap/%d/map:%d", cpu,
+ map->id);
+@@ -474,6 +483,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
+ kthread_bind(rcpu->kthread, cpu);
+ wake_up_process(rcpu->kthread);
+
++ /* Make sure kthread has been running, so kthread_stop() will not
++ * stop the kthread prematurely and all pending frames or skbs
++ * will be handled by the kthread before kthread_stop() returns.
++ */
++ wait_for_completion(&rcpu->kthread_running);
++
+ return rcpu;
+
+ free_prog:
+diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
+index 32471ba027086..a63c68f5945cd 100644
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -161,17 +161,25 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
+ unsigned long *pflags)
+ {
+ unsigned long flags;
++ bool use_raw_lock;
+
+- hash = hash & HASHTAB_MAP_LOCK_MASK;
++ hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);
+
+- migrate_disable();
++ use_raw_lock = htab_use_raw_lock(htab);
++ if (use_raw_lock)
++ preempt_disable();
++ else
++ migrate_disable();
+ if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
+ __this_cpu_dec(*(htab->map_locked[hash]));
+- migrate_enable();
++ if (use_raw_lock)
++ preempt_enable();
++ else
++ migrate_enable();
+ return -EBUSY;
+ }
+
+- if (htab_use_raw_lock(htab))
++ if (use_raw_lock)
+ raw_spin_lock_irqsave(&b->raw_lock, flags);
+ else
+ spin_lock_irqsave(&b->lock, flags);
+@@ -184,13 +192,18 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,
+ struct bucket *b, u32 hash,
+ unsigned long flags)
+ {
+- hash = hash & HASHTAB_MAP_LOCK_MASK;
+- if (htab_use_raw_lock(htab))
++ bool use_raw_lock = htab_use_raw_lock(htab);
++
++ hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);
++ if (use_raw_lock)
+ raw_spin_unlock_irqrestore(&b->raw_lock, flags);
+ else
+ spin_unlock_irqrestore(&b->lock, flags);
+ __this_cpu_dec(*(htab->map_locked[hash]));
+- migrate_enable();
++ if (use_raw_lock)
++ preempt_enable();
++ else
++ migrate_enable();
+ }
+
+ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
+@@ -291,12 +304,8 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
+ struct htab_elem *l;
+
+ if (node) {
+- u32 key_size = htab->map.key_size;
+-
+ l = container_of(node, struct htab_elem, lru_node);
+- memcpy(l->key, key, key_size);
+- check_and_init_map_value(&htab->map,
+- l->key + round_up(key_size, 8));
++ memcpy(l->key, key, htab->map.key_size);
+ return l;
+ }
+
+@@ -1156,7 +1165,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
+
+ ret = htab_lock_bucket(htab, b, hash, &flags);
+ if (ret)
+- return ret;
++ goto err_lock_bucket;
+
+ l_old = lookup_elem_raw(head, hash, key, key_size);
+
+@@ -1177,6 +1186,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
+ err:
+ htab_unlock_bucket(htab, b, hash, flags);
+
++err_lock_bucket:
+ if (ret)
+ htab_lru_push_free(htab, l_new);
+ else if (l_old)
+@@ -1279,7 +1289,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+
+ ret = htab_lock_bucket(htab, b, hash, &flags);
+ if (ret)
+- return ret;
++ goto err_lock_bucket;
+
+ l_old = lookup_elem_raw(head, hash, key, key_size);
+
+@@ -1302,6 +1312,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+ ret = 0;
+ err:
+ htab_unlock_bucket(htab, b, hash, flags);
++err_lock_bucket:
+ if (l_new)
+ bpf_lru_push_free(&htab->lru, &l_new->lru_node);
+ return ret;
+@@ -1662,8 +1673,11 @@ again_nocopy:
+ /* do not grab the lock unless need it (bucket_cnt > 0). */
+ if (locked) {
+ ret = htab_lock_bucket(htab, b, batch, &flags);
+- if (ret)
+- goto next_batch;
++ if (ret) {
++ rcu_read_unlock();
++ bpf_enable_instrumentation();
++ goto after_loop;
++ }
+ }
+
+ bucket_cnt = 0;
+@@ -2023,6 +2037,7 @@ static int bpf_iter_init_hash_map(void *priv_data,
+ seq_info->percpu_value_buf = value_buf;
+ }
+
++ bpf_map_inc_with_uref(map);
+ seq_info->map = map;
+ seq_info->htab = container_of(map, struct bpf_htab, map);
+ return 0;
+@@ -2032,6 +2047,7 @@ static void bpf_iter_fini_hash_map(void *priv_data)
+ {
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+
++ bpf_map_put_with_uref(seq_info->map);
+ kfree(seq_info->percpu_value_buf);
+ }
+
+diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
+index 9aabf84afd4b2..a711ffe238932 100644
+--- a/kernel/bpf/helpers.c
++++ b/kernel/bpf/helpers.c
+@@ -530,7 +530,7 @@ const struct bpf_func_proto bpf_strtol_proto = {
+ .func = bpf_strtol,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+- .arg1_type = ARG_PTR_TO_MEM,
++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_LONG,
+@@ -558,7 +558,7 @@ const struct bpf_func_proto bpf_strtoul_proto = {
+ .func = bpf_strtoul,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+- .arg1_type = ARG_PTR_TO_MEM,
++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_LONG,
+@@ -630,7 +630,7 @@ const struct bpf_func_proto bpf_event_output_data_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -667,7 +667,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
+ const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
+ .func = bpf_per_cpu_ptr,
+ .gpl_only = false,
+- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
++ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
+ .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
+ .arg2_type = ARG_ANYTHING,
+ };
+@@ -680,7 +680,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
+ const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
+ .func = bpf_this_cpu_ptr,
+ .gpl_only = false,
+- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
++ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
+ .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
+ };
+
+@@ -1013,7 +1013,7 @@ const struct bpf_func_proto bpf_snprintf_proto = {
+ .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_CONST_STR,
+- .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
++ .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -1367,8 +1367,6 @@ bpf_base_func_proto(enum bpf_func_id func_id)
+ return &bpf_ktime_get_ns_proto;
+ case BPF_FUNC_ktime_get_boot_ns:
+ return &bpf_ktime_get_boot_ns_proto;
+- case BPF_FUNC_ktime_get_coarse_ns:
+- return &bpf_ktime_get_coarse_ns_proto;
+ case BPF_FUNC_ringbuf_output:
+ return &bpf_ringbuf_output_proto;
+ case BPF_FUNC_ringbuf_reserve:
+diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
+index 80da1db47c686..5a8d9f7467bf4 100644
+--- a/kernel/bpf/inode.c
++++ b/kernel/bpf/inode.c
+@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
+ int opt;
+
+ opt = fs_parse(fc, bpf_fs_parameters, param, &result);
+- if (opt < 0)
++ if (opt < 0) {
+ /* We might like to report bad mount options here, but
+ * traditionally we've ignored all mount options, so we'd
+ * better continue to ignore non-existing options for bpf.
+ */
+- return opt == -ENOPARAM ? 0 : opt;
++ if (opt == -ENOPARAM) {
++ opt = vfs_parse_fs_param_source(fc, param);
++ if (opt != -ENOPARAM)
++ return opt;
++
++ return 0;
++ }
++
++ if (opt < 0)
++ return opt;
++ }
+
+ switch (opt) {
+ case OPT_MODE:
+diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
+index 6a9542af4212a..b0fa190b09790 100644
+--- a/kernel/bpf/map_iter.c
++++ b/kernel/bpf/map_iter.c
+@@ -174,9 +174,9 @@ static const struct bpf_iter_reg bpf_map_elem_reg_info = {
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_map_elem, key),
+- PTR_TO_RDONLY_BUF_OR_NULL },
++ PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY },
+ { offsetof(struct bpf_iter__bpf_map_elem, value),
+- PTR_TO_RDWR_BUF_OR_NULL },
++ PTR_TO_BUF | PTR_MAYBE_NULL },
+ },
+ };
+
+diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
+index bd09290e36484..fcdd28224f532 100644
+--- a/kernel/bpf/offload.c
++++ b/kernel/bpf/offload.c
+@@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
+ if (offload->dev_state)
+ offload->offdev->ops->destroy(prog);
+
+- /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
+- bpf_prog_free_id(prog, true);
+-
+ list_del_init(&offload->offloads);
+ kfree(offload);
+ prog->aux->offload = NULL;
+diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
+index 3d897de890612..bbab8bb4b2fda 100644
+--- a/kernel/bpf/percpu_freelist.c
++++ b/kernel/bpf/percpu_freelist.c
+@@ -102,22 +102,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
+ u32 nr_elems)
+ {
+ struct pcpu_freelist_head *head;
+- int i, cpu, pcpu_entries;
++ unsigned int cpu, cpu_idx, i, j, n, m;
+
+- pcpu_entries = nr_elems / num_possible_cpus() + 1;
+- i = 0;
++ n = nr_elems / num_possible_cpus();
++ m = nr_elems % num_possible_cpus();
+
++ cpu_idx = 0;
+ for_each_possible_cpu(cpu) {
+-again:
+ head = per_cpu_ptr(s->freelist, cpu);
+- /* No locking required as this is not visible yet. */
+- pcpu_freelist_push_node(head, buf);
+- i++;
+- buf += elem_size;
+- if (i == nr_elems)
+- break;
+- if (i % pcpu_entries)
+- goto again;
++ j = n + (cpu_idx < m ? 1 : 0);
++ for (i = 0; i < j; i++) {
++ /* No locking required as this is not visible yet. */
++ pcpu_freelist_push_node(head, buf);
++ buf += elem_size;
++ }
++ cpu_idx++;
+ }
+ }
+
+diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
+index 9e0c10c6892ad..710ba9de12ce4 100644
+--- a/kernel/bpf/ringbuf.c
++++ b/kernel/bpf/ringbuf.c
+@@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
+ }
+
+ rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages,
+- VM_ALLOC | VM_USERMAP, PAGE_KERNEL);
++ VM_MAP | VM_USERMAP, PAGE_KERNEL);
+ if (rb) {
+ kmemleak_not_leak(pages);
+ rb->pages = pages;
+@@ -444,7 +444,7 @@ const struct bpf_func_proto bpf_ringbuf_output_proto = {
+ .func = bpf_ringbuf_output,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+ };
+diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
+index 6e75bbee39f0b..7efae3af62017 100644
+--- a/kernel/bpf/stackmap.c
++++ b/kernel/bpf/stackmap.c
+@@ -119,7 +119,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
+ return ERR_PTR(-E2BIG);
+
+ cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
+- cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
+ smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
+ if (!smap)
+ return ERR_PTR(-ENOMEM);
+@@ -219,7 +218,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
+ }
+
+ static struct perf_callchain_entry *
+-get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
++get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
+ {
+ #ifdef CONFIG_STACKTRACE
+ struct perf_callchain_entry *entry;
+@@ -230,9 +229,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
+ if (!entry)
+ return NULL;
+
+- entry->nr = init_nr +
+- stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
+- sysctl_perf_event_max_stack - init_nr, 0);
++ entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
++ max_depth, 0);
+
+ /* stack_trace_save_tsk() works on unsigned long array, while
+ * perf_callchain_entry uses u64 array. For 32-bit systems, it is
+@@ -244,7 +242,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
+ int i;
+
+ /* copy data from the end to avoid using extra buffer */
+- for (i = entry->nr - 1; i >= (int)init_nr; i--)
++ for (i = entry->nr - 1; i >= 0; i--)
+ to[i] = (u64)(from[i]);
+ }
+
+@@ -261,27 +259,19 @@ static long __bpf_get_stackid(struct bpf_map *map,
+ {
+ struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+ struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
+- u32 max_depth = map->value_size / stack_map_data_size(map);
+- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+- u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+ u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+ u32 hash, id, trace_nr, trace_len;
+ bool user = flags & BPF_F_USER_STACK;
+ u64 *ips;
+ bool hash_matches;
+
+- /* get_perf_callchain() guarantees that trace->nr >= init_nr
+- * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
+- */
+- trace_nr = trace->nr - init_nr;
+-
+- if (trace_nr <= skip)
++ if (trace->nr <= skip)
+ /* skipping more than usable stack trace */
+ return -EFAULT;
+
+- trace_nr -= skip;
++ trace_nr = trace->nr - skip;
+ trace_len = trace_nr * sizeof(u64);
+- ips = trace->ip + skip + init_nr;
++ ips = trace->ip + skip;
+ hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
+ id = hash & (smap->n_buckets - 1);
+ bucket = READ_ONCE(smap->buckets[id]);
+@@ -338,8 +328,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags)
+ {
+ u32 max_depth = map->value_size / stack_map_data_size(map);
+- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+- u32 init_nr = sysctl_perf_event_max_stack - max_depth;
++ u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+ bool user = flags & BPF_F_USER_STACK;
+ struct perf_callchain_entry *trace;
+ bool kernel = !user;
+@@ -348,8 +337,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+ return -EINVAL;
+
+- trace = get_perf_callchain(regs, init_nr, kernel, user,
+- sysctl_perf_event_max_stack, false, false);
++ max_depth += skip;
++ if (max_depth > sysctl_perf_event_max_stack)
++ max_depth = sysctl_perf_event_max_stack;
++
++ trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
++ false, false);
+
+ if (unlikely(!trace))
+ /* couldn't fetch the stack trace */
+@@ -440,7 +433,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
+ struct perf_callchain_entry *trace_in,
+ void *buf, u32 size, u64 flags)
+ {
+- u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
++ u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
+ bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+ u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+ bool user = flags & BPF_F_USER_STACK;
+@@ -465,30 +458,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
+ goto err_fault;
+
+ num_elem = size / elem_size;
+- if (sysctl_perf_event_max_stack < num_elem)
+- init_nr = 0;
+- else
+- init_nr = sysctl_perf_event_max_stack - num_elem;
++ max_depth = num_elem + skip;
++ if (sysctl_perf_event_max_stack < max_depth)
++ max_depth = sysctl_perf_event_max_stack;
+
+ if (trace_in)
+ trace = trace_in;
+ else if (kernel && task)
+- trace = get_callchain_entry_for_task(task, init_nr);
++ trace = get_callchain_entry_for_task(task, max_depth);
+ else
+- trace = get_perf_callchain(regs, init_nr, kernel, user,
+- sysctl_perf_event_max_stack,
++ trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
+ false, false);
+ if (unlikely(!trace))
+ goto err_fault;
+
+- trace_nr = trace->nr - init_nr;
+- if (trace_nr < skip)
++ if (trace->nr < skip)
+ goto err_fault;
+
+- trace_nr -= skip;
++ trace_nr = trace->nr - skip;
+ trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
+ copy_len = trace_nr * elem_size;
+- ips = trace->ip + skip + init_nr;
++
++ ips = trace->ip + skip;
+ if (user && user_build_id)
+ stack_map_get_build_id_offset(buf, ips, trace_nr, user);
+ else
+@@ -525,13 +516,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
+ u32, size, u64, flags)
+ {
+ struct pt_regs *regs;
+- long res;
++ long res = -EINVAL;
+
+ if (!try_get_task_stack(task))
+ return -EFAULT;
+
+ regs = task_pt_regs(task);
+- res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
++ if (regs)
++ res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
+ put_task_stack(task);
+
+ return res;
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 1cad6979a0d0f..ad41b8230780b 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
+ return map;
+ }
+
++static void bpf_map_write_active_inc(struct bpf_map *map)
++{
++ atomic64_inc(&map->writecnt);
++}
++
++static void bpf_map_write_active_dec(struct bpf_map *map)
++{
++ atomic64_dec(&map->writecnt);
++}
++
++bool bpf_map_write_active(const struct bpf_map *map)
++{
++ return atomic64_read(&map->writecnt) != 0;
++}
++
+ static u32 bpf_map_value_size(const struct bpf_map *map)
+ {
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+@@ -596,11 +611,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma)
+ {
+ struct bpf_map *map = vma->vm_file->private_data;
+
+- if (vma->vm_flags & VM_MAYWRITE) {
+- mutex_lock(&map->freeze_mutex);
+- map->writecnt++;
+- mutex_unlock(&map->freeze_mutex);
+- }
++ if (vma->vm_flags & VM_MAYWRITE)
++ bpf_map_write_active_inc(map);
+ }
+
+ /* called for all unmapped memory region (including initial) */
+@@ -608,11 +620,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma)
+ {
+ struct bpf_map *map = vma->vm_file->private_data;
+
+- if (vma->vm_flags & VM_MAYWRITE) {
+- mutex_lock(&map->freeze_mutex);
+- map->writecnt--;
+- mutex_unlock(&map->freeze_mutex);
+- }
++ if (vma->vm_flags & VM_MAYWRITE)
++ bpf_map_write_active_dec(map);
+ }
+
+ static const struct vm_operations_struct bpf_map_default_vmops = {
+@@ -663,7 +672,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
+ goto out;
+
+ if (vma->vm_flags & VM_MAYWRITE)
+- map->writecnt++;
++ bpf_map_write_active_inc(map);
+ out:
+ mutex_unlock(&map->freeze_mutex);
+ return err;
+@@ -1122,6 +1131,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
++ bpf_map_write_active_inc(map);
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+@@ -1157,6 +1167,7 @@ free_value:
+ free_key:
+ kvfree(key);
+ err_put:
++ bpf_map_write_active_dec(map);
+ fdput(f);
+ return err;
+ }
+@@ -1179,6 +1190,7 @@ static int map_delete_elem(union bpf_attr *attr)
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
++ bpf_map_write_active_inc(map);
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+@@ -1209,6 +1221,7 @@ static int map_delete_elem(union bpf_attr *attr)
+ out:
+ kvfree(key);
+ err_put:
++ bpf_map_write_active_dec(map);
+ fdput(f);
+ return err;
+ }
+@@ -1324,6 +1337,7 @@ int generic_map_delete_batch(struct bpf_map *map,
+ maybe_wait_bpf_programs(map);
+ if (err)
+ break;
++ cond_resched();
+ }
+ if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+ err = -EFAULT;
+@@ -1381,6 +1395,7 @@ int generic_map_update_batch(struct bpf_map *map,
+
+ if (err)
+ break;
++ cond_resched();
+ }
+
+ if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
+@@ -1478,6 +1493,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
+ swap(prev_key, key);
+ retry = MAP_LOOKUP_RETRIES;
+ cp++;
++ cond_resched();
+ }
+
+ if (err == -EFAULT)
+@@ -1516,6 +1532,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
++ bpf_map_write_active_inc(map);
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
+ !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+@@ -1580,6 +1597,7 @@ free_value:
+ free_key:
+ kvfree(key);
+ err_put:
++ bpf_map_write_active_dec(map);
+ fdput(f);
+ return err;
+ }
+@@ -1607,8 +1625,7 @@ static int map_freeze(const union bpf_attr *attr)
+ }
+
+ mutex_lock(&map->freeze_mutex);
+-
+- if (map->writecnt) {
++ if (bpf_map_write_active(map)) {
+ err = -EBUSY;
+ goto err_put;
+ }
+@@ -1678,7 +1695,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
+ return;
+ if (audit_enabled == AUDIT_OFF)
+ return;
+- if (op == BPF_AUDIT_LOAD)
++ if (!in_irq() && !irqs_disabled())
+ ctx = audit_context();
+ ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
+ if (unlikely(!ab))
+@@ -1773,6 +1790,7 @@ static void bpf_prog_put_deferred(struct work_struct *work)
+ prog = aux->prog;
+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+ bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
++ bpf_prog_free_id(prog, true);
+ __bpf_prog_put_noref(prog, true);
+ }
+
+@@ -1781,9 +1799,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
+ struct bpf_prog_aux *aux = prog->aux;
+
+ if (atomic64_dec_and_test(&aux->refcnt)) {
+- /* bpf_prog_free_id() must be called first */
+- bpf_prog_free_id(prog, do_idr_lock);
+-
+ if (in_irq() || irqs_disabled()) {
+ INIT_WORK(&aux->work, bpf_prog_put_deferred);
+ schedule_work(&aux->work);
+@@ -1807,8 +1822,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
+ return 0;
+ }
+
++struct bpf_prog_kstats {
++ u64 nsecs;
++ u64 cnt;
++ u64 misses;
++};
++
+ static void bpf_prog_get_stats(const struct bpf_prog *prog,
+- struct bpf_prog_stats *stats)
++ struct bpf_prog_kstats *stats)
+ {
+ u64 nsecs = 0, cnt = 0, misses = 0;
+ int cpu;
+@@ -1821,9 +1842,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
+ st = per_cpu_ptr(prog->stats, cpu);
+ do {
+ start = u64_stats_fetch_begin_irq(&st->syncp);
+- tnsecs = st->nsecs;
+- tcnt = st->cnt;
+- tmisses = st->misses;
++ tnsecs = u64_stats_read(&st->nsecs);
++ tcnt = u64_stats_read(&st->cnt);
++ tmisses = u64_stats_read(&st->misses);
+ } while (u64_stats_fetch_retry_irq(&st->syncp, start));
+ nsecs += tnsecs;
+ cnt += tcnt;
+@@ -1839,7 +1860,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
+ {
+ const struct bpf_prog *prog = filp->private_data;
+ char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+- struct bpf_prog_stats stats;
++ struct bpf_prog_kstats stats;
+
+ bpf_prog_get_stats(prog, &stats);
+ bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+@@ -3578,7 +3599,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
+ struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+ struct bpf_prog_info info;
+ u32 info_len = attr->info.info_len;
+- struct bpf_prog_stats stats;
++ struct bpf_prog_kstats stats;
+ char __user *uinsns;
+ u32 ulen;
+ int err;
+@@ -4077,7 +4098,9 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
+ if (attr->task_fd_query.flags != 0)
+ return -EINVAL;
+
++ rcu_read_lock();
+ task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
++ rcu_read_unlock();
+ if (!task)
+ return -ENOENT;
+
+@@ -4143,6 +4166,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
+ union bpf_attr __user *uattr,
+ int cmd)
+ {
++ bool has_read = cmd == BPF_MAP_LOOKUP_BATCH ||
++ cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
++ bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
+ struct bpf_map *map;
+ int err, ufd;
+ struct fd f;
+@@ -4155,16 +4181,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+-
+- if ((cmd == BPF_MAP_LOOKUP_BATCH ||
+- cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
+- !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
++ if (has_write)
++ bpf_map_write_active_inc(map);
++ if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+ err = -EPERM;
+ goto err_put;
+ }
+-
+- if (cmd != BPF_MAP_LOOKUP_BATCH &&
+- !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
++ if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+ }
+@@ -4177,8 +4200,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
+ BPF_DO_BATCH(map->ops->map_update_batch);
+ else
+ BPF_DO_BATCH(map->ops->map_delete_batch);
+-
+ err_put:
++ if (has_write)
++ bpf_map_write_active_dec(map);
+ fdput(f);
+ return err;
+ }
+@@ -4729,7 +4753,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = {
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ };
+
+@@ -4761,7 +4785,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ {
+ switch (func_id) {
+ case BPF_FUNC_sys_bpf:
+- return &bpf_sys_bpf_proto;
++ return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
+ case BPF_FUNC_btf_find_by_name_kind:
+ return &bpf_btf_find_by_name_kind_proto;
+ case BPF_FUNC_sys_close:
+diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
+index fe1e857324e66..4fa75791b45e2 100644
+--- a/kernel/bpf/trampoline.c
++++ b/kernel/bpf/trampoline.c
+@@ -414,7 +414,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+ {
+ enum bpf_tramp_prog_type kind;
+ int err = 0;
+- int cnt;
++ int cnt = 0, i;
+
+ kind = bpf_attach_type_to_tramp(prog);
+ mutex_lock(&tr->mutex);
+@@ -425,7 +425,10 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+ err = -EBUSY;
+ goto out;
+ }
+- cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
++
++ for (i = 0; i < BPF_TRAMP_MAX; i++)
++ cnt += tr->progs_cnt[i];
++
+ if (kind == BPF_TRAMP_REPLACE) {
+ /* Cannot attach extension if fentry/fexit are in use. */
+ if (cnt) {
+@@ -503,16 +506,19 @@ out:
+
+ void bpf_trampoline_put(struct bpf_trampoline *tr)
+ {
++ int i;
++
+ if (!tr)
+ return;
+ mutex_lock(&trampoline_mutex);
+ if (!refcount_dec_and_test(&tr->refcnt))
+ goto out;
+ WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
+- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
+- goto out;
+- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
+- goto out;
++
++ for (i = 0; i < BPF_TRAMP_MAX; i++)
++ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
++ goto out;
++
+ /* This code will be executed even when the last bpf_tramp_image
+ * is alive. All progs are detached from the trampoline and the
+ * trampoline image is patched with jmp into epilogue to skip
+@@ -541,11 +547,12 @@ static u64 notrace bpf_prog_start_time(void)
+ static void notrace inc_misses_counter(struct bpf_prog *prog)
+ {
+ struct bpf_prog_stats *stats;
++ unsigned int flags;
+
+ stats = this_cpu_ptr(prog->stats);
+- u64_stats_update_begin(&stats->syncp);
+- stats->misses++;
+- u64_stats_update_end(&stats->syncp);
++ flags = u64_stats_update_begin_irqsave(&stats->syncp);
++ u64_stats_inc(&stats->misses);
++ u64_stats_update_end_irqrestore(&stats->syncp, flags);
+ }
+
+ /* The logic is similar to bpf_prog_run(), but with an explicit
+@@ -585,11 +592,13 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
+ * Hence check that 'start' is valid.
+ */
+ start > NO_START_TIME) {
++ unsigned long flags;
++
+ stats = this_cpu_ptr(prog->stats);
+- u64_stats_update_begin(&stats->syncp);
+- stats->cnt++;
+- stats->nsecs += sched_clock() - start;
+- u64_stats_update_end(&stats->syncp);
++ flags = u64_stats_update_begin_irqsave(&stats->syncp);
++ u64_stats_inc(&stats->cnt);
++ u64_stats_add(&stats->nsecs, sched_clock() - start);
++ u64_stats_update_end_irqrestore(&stats->syncp, flags);
+ }
+ }
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index e76b559179054..ecf4332ff312f 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
+ insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
+ }
+
+-static bool bpf_pseudo_func(const struct bpf_insn *insn)
+-{
+- return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+- insn->src_reg == BPF_PSEUDO_FUNC;
+-}
+-
+ struct bpf_call_arg_meta {
+ struct bpf_map *map_ptr;
+ bool raw_mode;
+@@ -445,18 +439,6 @@ static bool reg_type_not_null(enum bpf_reg_type type)
+ type == PTR_TO_SOCK_COMMON;
+ }
+
+-static bool reg_type_may_be_null(enum bpf_reg_type type)
+-{
+- return type == PTR_TO_MAP_VALUE_OR_NULL ||
+- type == PTR_TO_SOCKET_OR_NULL ||
+- type == PTR_TO_SOCK_COMMON_OR_NULL ||
+- type == PTR_TO_TCP_SOCK_OR_NULL ||
+- type == PTR_TO_BTF_ID_OR_NULL ||
+- type == PTR_TO_MEM_OR_NULL ||
+- type == PTR_TO_RDONLY_BUF_OR_NULL ||
+- type == PTR_TO_RDWR_BUF_OR_NULL;
+-}
+-
+ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
+ {
+ return reg->type == PTR_TO_MAP_VALUE &&
+@@ -465,12 +447,14 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
+
+ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
+ {
+- return type == PTR_TO_SOCKET ||
+- type == PTR_TO_SOCKET_OR_NULL ||
+- type == PTR_TO_TCP_SOCK ||
+- type == PTR_TO_TCP_SOCK_OR_NULL ||
+- type == PTR_TO_MEM ||
+- type == PTR_TO_MEM_OR_NULL;
++ return base_type(type) == PTR_TO_SOCKET ||
++ base_type(type) == PTR_TO_TCP_SOCK ||
++ base_type(type) == PTR_TO_MEM;
++}
++
++static bool type_is_rdonly_mem(u32 type)
++{
++ return type & MEM_RDONLY;
+ }
+
+ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
+@@ -478,14 +462,9 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
+ return type == ARG_PTR_TO_SOCK_COMMON;
+ }
+
+-static bool arg_type_may_be_null(enum bpf_arg_type type)
++static bool type_may_be_null(u32 type)
+ {
+- return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
+- type == ARG_PTR_TO_MEM_OR_NULL ||
+- type == ARG_PTR_TO_CTX_OR_NULL ||
+- type == ARG_PTR_TO_SOCKET_OR_NULL ||
+- type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
+- type == ARG_PTR_TO_STACK_OR_NULL;
++ return type & PTR_MAYBE_NULL;
+ }
+
+ /* Determine whether the function releases some resources allocated by another
+@@ -538,6 +517,12 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
+ func_id == BPF_FUNC_skc_to_tcp_request_sock;
+ }
+
++static bool is_callback_calling_function(enum bpf_func_id func_id)
++{
++ return func_id == BPF_FUNC_for_each_map_elem ||
++ func_id == BPF_FUNC_timer_set_callback;
++}
++
+ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
+ {
+ return BPF_CLASS(insn->code) == BPF_STX &&
+@@ -545,39 +530,54 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
+ insn->imm == BPF_CMPXCHG;
+ }
+
+-/* string representation of 'enum bpf_reg_type' */
+-static const char * const reg_type_str[] = {
+- [NOT_INIT] = "?",
+- [SCALAR_VALUE] = "inv",
+- [PTR_TO_CTX] = "ctx",
+- [CONST_PTR_TO_MAP] = "map_ptr",
+- [PTR_TO_MAP_VALUE] = "map_value",
+- [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
+- [PTR_TO_STACK] = "fp",
+- [PTR_TO_PACKET] = "pkt",
+- [PTR_TO_PACKET_META] = "pkt_meta",
+- [PTR_TO_PACKET_END] = "pkt_end",
+- [PTR_TO_FLOW_KEYS] = "flow_keys",
+- [PTR_TO_SOCKET] = "sock",
+- [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
+- [PTR_TO_SOCK_COMMON] = "sock_common",
+- [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
+- [PTR_TO_TCP_SOCK] = "tcp_sock",
+- [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
+- [PTR_TO_TP_BUFFER] = "tp_buffer",
+- [PTR_TO_XDP_SOCK] = "xdp_sock",
+- [PTR_TO_BTF_ID] = "ptr_",
+- [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
+- [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
+- [PTR_TO_MEM] = "mem",
+- [PTR_TO_MEM_OR_NULL] = "mem_or_null",
+- [PTR_TO_RDONLY_BUF] = "rdonly_buf",
+- [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
+- [PTR_TO_RDWR_BUF] = "rdwr_buf",
+- [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
+- [PTR_TO_FUNC] = "func",
+- [PTR_TO_MAP_KEY] = "map_key",
+-};
++/* string representation of 'enum bpf_reg_type'
++ *
++ * Note that reg_type_str() can not appear more than once in a single verbose()
++ * statement.
++ */
++static const char *reg_type_str(struct bpf_verifier_env *env,
++ enum bpf_reg_type type)
++{
++ char postfix[16] = {0}, prefix[16] = {0};
++ static const char * const str[] = {
++ [NOT_INIT] = "?",
++ [SCALAR_VALUE] = "inv",
++ [PTR_TO_CTX] = "ctx",
++ [CONST_PTR_TO_MAP] = "map_ptr",
++ [PTR_TO_MAP_VALUE] = "map_value",
++ [PTR_TO_STACK] = "fp",
++ [PTR_TO_PACKET] = "pkt",
++ [PTR_TO_PACKET_META] = "pkt_meta",
++ [PTR_TO_PACKET_END] = "pkt_end",
++ [PTR_TO_FLOW_KEYS] = "flow_keys",
++ [PTR_TO_SOCKET] = "sock",
++ [PTR_TO_SOCK_COMMON] = "sock_common",
++ [PTR_TO_TCP_SOCK] = "tcp_sock",
++ [PTR_TO_TP_BUFFER] = "tp_buffer",
++ [PTR_TO_XDP_SOCK] = "xdp_sock",
++ [PTR_TO_BTF_ID] = "ptr_",
++ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
++ [PTR_TO_MEM] = "mem",
++ [PTR_TO_BUF] = "buf",
++ [PTR_TO_FUNC] = "func",
++ [PTR_TO_MAP_KEY] = "map_key",
++ };
++
++ if (type & PTR_MAYBE_NULL) {
++ if (base_type(type) == PTR_TO_BTF_ID ||
++ base_type(type) == PTR_TO_PERCPU_BTF_ID)
++ strncpy(postfix, "or_null_", 16);
++ else
++ strncpy(postfix, "_or_null", 16);
++ }
++
++ if (type & MEM_RDONLY)
++ strncpy(prefix, "rdonly_", 16);
++
++ snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
++ prefix, str[base_type(type)], postfix);
++ return env->type_str_buf;
++}
+
+ static char slot_type_char[] = {
+ [STACK_INVALID] = '?',
+@@ -612,6 +612,20 @@ static const char *kernel_type_name(const struct btf* btf, u32 id)
+ return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
+ }
+
++/* The reg state of a pointer or a bounded scalar was saved when
++ * it was spilled to the stack.
++ */
++static bool is_spilled_reg(const struct bpf_stack_state *stack)
++{
++ return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
++}
++
++static void scrub_spilled_slot(u8 *stype)
++{
++ if (*stype != STACK_INVALID)
++ *stype = STACK_MISC;
++}
++
+ static void print_verifier_state(struct bpf_verifier_env *env,
+ const struct bpf_func_state *state)
+ {
+@@ -628,7 +642,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
+ continue;
+ verbose(env, " R%d", i);
+ print_liveness(env, reg->live);
+- verbose(env, "=%s", reg_type_str[t]);
++ verbose(env, "=%s", reg_type_str(env, t));
+ if (t == SCALAR_VALUE && reg->precise)
+ verbose(env, "P");
+ if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
+@@ -636,9 +650,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
+ /* reg->off should be 0 for SCALAR_VALUE */
+ verbose(env, "%lld", reg->var_off.value + reg->off);
+ } else {
+- if (t == PTR_TO_BTF_ID ||
+- t == PTR_TO_BTF_ID_OR_NULL ||
+- t == PTR_TO_PERCPU_BTF_ID)
++ if (base_type(t) == PTR_TO_BTF_ID ||
++ base_type(t) == PTR_TO_PERCPU_BTF_ID)
+ verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
+ verbose(env, "(id=%d", reg->id);
+ if (reg_type_may_be_refcounted_or_null(t))
+@@ -647,10 +660,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
+ verbose(env, ",off=%d", reg->off);
+ if (type_is_pkt_pointer(t))
+ verbose(env, ",r=%d", reg->range);
+- else if (t == CONST_PTR_TO_MAP ||
+- t == PTR_TO_MAP_KEY ||
+- t == PTR_TO_MAP_VALUE ||
+- t == PTR_TO_MAP_VALUE_OR_NULL)
++ else if (base_type(t) == CONST_PTR_TO_MAP ||
++ base_type(t) == PTR_TO_MAP_KEY ||
++ base_type(t) == PTR_TO_MAP_VALUE)
+ verbose(env, ",ks=%d,vs=%d",
+ reg->map_ptr->key_size,
+ reg->map_ptr->value_size);
+@@ -717,10 +729,10 @@ static void print_verifier_state(struct bpf_verifier_env *env,
+ continue;
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, state->stack[i].spilled_ptr.live);
+- if (state->stack[i].slot_type[0] == STACK_SPILL) {
++ if (is_spilled_reg(&state->stack[i])) {
+ reg = &state->stack[i].spilled_ptr;
+ t = reg->type;
+- verbose(env, "=%s", reg_type_str[t]);
++ verbose(env, "=%s", reg_type_str(env, t));
+ if (t == SCALAR_VALUE && reg->precise)
+ verbose(env, "P");
+ if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
+@@ -778,12 +790,17 @@ out:
+ */
+ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
+ {
++ void *new_arr;
++
+ if (!new_n || old_n == new_n)
+ goto out;
+
+- arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
+- if (!arr)
++ new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
++ if (!new_arr) {
++ kfree(arr);
+ return NULL;
++ }
++ arr = new_arr;
+
+ if (new_n > old_n)
+ memset(arr + old_n * size, 0, (new_n - old_n) * size);
+@@ -859,6 +876,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
+ id = ++env->id_gen;
+ state->refs[new_ofs].id = id;
+ state->refs[new_ofs].insn_idx = insn_idx;
++ state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
+
+ return id;
+ }
+@@ -871,6 +889,9 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id)
+ last_idx = state->acquired_refs - 1;
+ for (i = 0; i < state->acquired_refs; i++) {
+ if (state->refs[i].id == ptr_id) {
++ /* Cannot release caller references in callbacks */
++ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
++ return -EINVAL;
+ if (last_idx && i != last_idx)
+ memcpy(&state->refs[i], &state->refs[last_idx],
+ sizeof(*state->refs));
+@@ -1133,8 +1154,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
+
+ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
+ {
+- switch (reg->type) {
+- case PTR_TO_MAP_VALUE_OR_NULL: {
++ if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
+ const struct bpf_map *map = reg->map_ptr;
+
+ if (map->inner_map_meta) {
+@@ -1143,7 +1163,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
+ /* transfer reg's id which is unique for every map_lookup_elem
+ * as UID of the inner map.
+ */
+- reg->map_uid = reg->id;
++ if (map_value_has_timer(map->inner_map_meta))
++ reg->map_uid = reg->id;
+ } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
+ reg->type = PTR_TO_XDP_SOCK;
+ } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
+@@ -1152,32 +1173,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
+ } else {
+ reg->type = PTR_TO_MAP_VALUE;
+ }
+- break;
+- }
+- case PTR_TO_SOCKET_OR_NULL:
+- reg->type = PTR_TO_SOCKET;
+- break;
+- case PTR_TO_SOCK_COMMON_OR_NULL:
+- reg->type = PTR_TO_SOCK_COMMON;
+- break;
+- case PTR_TO_TCP_SOCK_OR_NULL:
+- reg->type = PTR_TO_TCP_SOCK;
+- break;
+- case PTR_TO_BTF_ID_OR_NULL:
+- reg->type = PTR_TO_BTF_ID;
+- break;
+- case PTR_TO_MEM_OR_NULL:
+- reg->type = PTR_TO_MEM;
+- break;
+- case PTR_TO_RDONLY_BUF_OR_NULL:
+- reg->type = PTR_TO_RDONLY_BUF;
+- break;
+- case PTR_TO_RDWR_BUF_OR_NULL:
+- reg->type = PTR_TO_RDWR_BUF;
+- break;
+- default:
+- WARN_ONCE(1, "unknown nullable register type");
++ return;
+ }
++
++ reg->type &= ~PTR_MAYBE_NULL;
+ }
+
+ static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
+@@ -1357,22 +1356,43 @@ static void __reg_bound_offset(struct bpf_reg_state *reg)
+ reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
+ }
+
++static void reg_bounds_sync(struct bpf_reg_state *reg)
++{
++ /* We might have learned new bounds from the var_off. */
++ __update_reg_bounds(reg);
++ /* We might have learned something about the sign bit. */
++ __reg_deduce_bounds(reg);
++ /* We might have learned some bits from the bounds. */
++ __reg_bound_offset(reg);
++ /* Intersecting with the old var_off might have improved our bounds
++ * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
++ * then new var_off is (0; 0x7f...fc) which improves our umax.
++ */
++ __update_reg_bounds(reg);
++}
++
++static bool __reg32_bound_s64(s32 a)
++{
++ return a >= 0 && a <= S32_MAX;
++}
++
+ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
+ {
+ reg->umin_value = reg->u32_min_value;
+ reg->umax_value = reg->u32_max_value;
+- /* Attempt to pull 32-bit signed bounds into 64-bit bounds
+- * but must be positive otherwise set to worse case bounds
+- * and refine later from tnum.
++
++ /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
++ * be positive otherwise set to worse case bounds and refine later
++ * from tnum.
+ */
+- if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
+- reg->smax_value = reg->s32_max_value;
+- else
+- reg->smax_value = U32_MAX;
+- if (reg->s32_min_value >= 0)
++ if (__reg32_bound_s64(reg->s32_min_value) &&
++ __reg32_bound_s64(reg->s32_max_value)) {
+ reg->smin_value = reg->s32_min_value;
+- else
++ reg->smax_value = reg->s32_max_value;
++ } else {
+ reg->smin_value = 0;
++ reg->smax_value = U32_MAX;
++ }
+ }
+
+ static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
+@@ -1392,32 +1412,23 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
+ * so they do not impact tnum bounds calculation.
+ */
+ __mark_reg64_unbounded(reg);
+- __update_reg_bounds(reg);
+ }
+-
+- /* Intersecting with the old var_off might have improved our bounds
+- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+- * then new var_off is (0; 0x7f...fc) which improves our umax.
+- */
+- __reg_deduce_bounds(reg);
+- __reg_bound_offset(reg);
+- __update_reg_bounds(reg);
++ reg_bounds_sync(reg);
+ }
+
+ static bool __reg64_bound_s32(s64 a)
+ {
+- return a > S32_MIN && a < S32_MAX;
++ return a >= S32_MIN && a <= S32_MAX;
+ }
+
+ static bool __reg64_bound_u32(u64 a)
+ {
+- return a > U32_MIN && a < U32_MAX;
++ return a >= U32_MIN && a <= U32_MAX;
+ }
+
+ static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
+ {
+ __mark_reg32_unbounded(reg);
+-
+ if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
+ reg->s32_min_value = (s32)reg->smin_value;
+ reg->s32_max_value = (s32)reg->smax_value;
+@@ -1426,14 +1437,7 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
+ reg->u32_min_value = (u32)reg->umin_value;
+ reg->u32_max_value = (u32)reg->umax_value;
+ }
+-
+- /* Intersecting with the old var_off might have improved our bounds
+- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+- * then new var_off is (0; 0x7f...fc) which improves our umax.
+- */
+- __reg_deduce_bounds(reg);
+- __reg_bound_offset(reg);
+- __update_reg_bounds(reg);
++ reg_bounds_sync(reg);
+ }
+
+ /* Mark a register as having a completely unknown (scalar) value. */
+@@ -1448,7 +1452,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+ reg->type = SCALAR_VALUE;
+ reg->var_off = tnum_unknown;
+ reg->frameno = 0;
+- reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
++ reg->precise = !env->bpf_capable;
+ __mark_reg_unbounded(reg);
+ }
+
+@@ -1807,16 +1811,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
+ return -EPERM;
+ }
+
+- if (bpf_pseudo_func(insn)) {
+- ret = add_subprog(env, i + insn->imm + 1);
+- if (ret >= 0)
+- /* remember subprog */
+- insn[1].imm = ret;
+- } else if (bpf_pseudo_call(insn)) {
++ if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
+ ret = add_subprog(env, i + insn->imm + 1);
+- } else {
++ else
+ ret = add_kfunc_call(env, insn->imm);
+- }
+
+ if (ret < 0)
+ return ret;
+@@ -1899,7 +1897,7 @@ static int mark_reg_read(struct bpf_verifier_env *env,
+ break;
+ if (parent->live & REG_LIVE_DONE) {
+ verbose(env, "verifier BUG type %s var_off %lld off %d\n",
+- reg_type_str[parent->type],
++ reg_type_str(env, parent->type),
+ parent->var_off.value, parent->off);
+ return -EFAULT;
+ }
+@@ -2232,8 +2230,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
+ */
+ if (insn->src_reg != BPF_REG_FP)
+ return 0;
+- if (BPF_SIZE(insn->code) != BPF_DW)
+- return 0;
+
+ /* dreg = *(u64 *)[fp - off] was a fill from the stack.
+ * that [fp - off] slot contains scalar that needs to be
+@@ -2256,8 +2252,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
+ /* scalars can only be spilled into stack */
+ if (insn->dst_reg != BPF_REG_FP)
+ return 0;
+- if (BPF_SIZE(insn->code) != BPF_DW)
+- return 0;
+ spi = (-insn->off - 1) / BPF_REG_SIZE;
+ if (spi >= 64) {
+ verbose(env, "BUG spi %d\n", spi);
+@@ -2273,6 +2267,17 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
+ if (opcode == BPF_CALL) {
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ return -ENOTSUPP;
++ /* kfunc with imm==0 is invalid and fixup_kfunc_call will
++ * catch this error later. Make backtracking conservative
++ * with ENOTSUPP.
++ */
++ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
++ return -ENOTSUPP;
++ /* BPF helpers that invoke callback subprogs are
++ * equivalent to BPF_PSEUDO_CALL above
++ */
++ if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
++ return -ENOTSUPP;
+ /* regular helper call sets R0 */
+ *reg_mask &= ~1;
+ if (*reg_mask & 0x3f) {
+@@ -2285,6 +2290,21 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
+ }
+ } else if (opcode == BPF_EXIT) {
+ return -ENOTSUPP;
++ } else if (BPF_SRC(insn->code) == BPF_X) {
++ if (!(*reg_mask & (dreg | sreg)))
++ return 0;
++ /* dreg <cond> sreg
++ * Both dreg and sreg need precision before
++ * this insn. If only sreg was marked precise
++ * before it would be equally necessary to
++ * propagate it to dreg.
++ */
++ *reg_mask |= (sreg | dreg);
++ /* else dreg <cond> K
++ * Only dreg still needs precision before
++ * this insn, so for the K-based conditional
++ * there is nothing new to be marked.
++ */
+ }
+ } else if (class == BPF_LD) {
+ if (!(*reg_mask & dreg))
+@@ -2362,8 +2382,11 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
+
+ /* big hammer: mark all scalars precise in this path.
+ * pop_stack may still get !precise scalars.
++ * We also skip current state and go straight to first parent state,
++ * because precision markings in current non-checkpointed state are
++ * not needed. See why in the comment in __mark_chain_precision below.
+ */
+- for (; st; st = st->parent)
++ for (st = st->parent; st; st = st->parent) {
+ for (i = 0; i <= st->curframe; i++) {
+ func = st->frame[i];
+ for (j = 0; j < BPF_REG_FP; j++) {
+@@ -2373,7 +2396,7 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
+ reg->precise = true;
+ }
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+- if (func->stack[j].slot_type[0] != STACK_SPILL)
++ if (!is_spilled_reg(&func->stack[j]))
+ continue;
+ reg = &func->stack[j].spilled_ptr;
+ if (reg->type != SCALAR_VALUE)
+@@ -2381,9 +2404,122 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
+ reg->precise = true;
+ }
+ }
++ }
++}
++
++static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
++{
++ struct bpf_func_state *func;
++ struct bpf_reg_state *reg;
++ int i, j;
++
++ for (i = 0; i <= st->curframe; i++) {
++ func = st->frame[i];
++ for (j = 0; j < BPF_REG_FP; j++) {
++ reg = &func->regs[j];
++ if (reg->type != SCALAR_VALUE)
++ continue;
++ reg->precise = false;
++ }
++ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
++ if (!is_spilled_reg(&func->stack[j]))
++ continue;
++ reg = &func->stack[j].spilled_ptr;
++ if (reg->type != SCALAR_VALUE)
++ continue;
++ reg->precise = false;
++ }
++ }
+ }
+
+-static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
++/*
++ * __mark_chain_precision() backtracks BPF program instruction sequence and
++ * chain of verifier states making sure that register *regno* (if regno >= 0)
++ * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
++ * SCALARS, as well as any other registers and slots that contribute to
++ * a tracked state of given registers/stack slots, depending on specific BPF
++ * assembly instructions (see backtrack_insns() for exact instruction handling
++ * logic). This backtracking relies on recorded jmp_history and is able to
++ * traverse entire chain of parent states. This process ends only when all the
++ * necessary registers/slots and their transitive dependencies are marked as
++ * precise.
++ *
++ * One important and subtle aspect is that precise marks *do not matter* in
++ * the currently verified state (current state). It is important to understand
++ * why this is the case.
++ *
++ * First, note that current state is the state that is not yet "checkpointed",
++ * i.e., it is not yet put into env->explored_states, and it has no children
++ * states as well. It's ephemeral, and can end up either a) being discarded if
++ * compatible explored state is found at some point or BPF_EXIT instruction is
++ * reached or b) checkpointed and put into env->explored_states, branching out
++ * into one or more children states.
++ *
++ * In the former case, precise markings in current state are completely
++ * ignored by state comparison code (see regsafe() for details). Only
++ * checkpointed ("old") state precise markings are important, and if old
++ * state's register/slot is precise, regsafe() assumes current state's
++ * register/slot as precise and checks value ranges exactly and precisely. If
++ * states turn out to be compatible, current state's necessary precise
++ * markings and any required parent states' precise markings are enforced
++ * after the fact with propagate_precision() logic, after the fact. But it's
++ * important to realize that in this case, even after marking current state
++ * registers/slots as precise, we immediately discard current state. So what
++ * actually matters is any of the precise markings propagated into current
++ * state's parent states, which are always checkpointed (due to b) case above).
++ * As such, for scenario a) it doesn't matter if current state has precise
++ * markings set or not.
++ *
++ * Now, for the scenario b), checkpointing and forking into child(ren)
++ * state(s). Note that before current state gets to checkpointing step, any
++ * processed instruction always assumes precise SCALAR register/slot
++ * knowledge: if precise value or range is useful to prune jump branch, BPF
++ * verifier takes this opportunity enthusiastically. Similarly, when
++ * register's value is used to calculate offset or memory address, exact
++ * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
++ * what we mentioned above about state comparison ignoring precise markings
++ * during state comparison, BPF verifier ignores and also assumes precise
++ * markings *at will* during instruction verification process. But as verifier
++ * assumes precision, it also propagates any precision dependencies across
++ * parent states, which are not yet finalized, so can be further restricted
++ * based on new knowledge gained from restrictions enforced by their children
++ * states. This is so that once those parent states are finalized, i.e., when
++ * they have no more active children state, state comparison logic in
++ * is_state_visited() would enforce strict and precise SCALAR ranges, if
++ * required for correctness.
++ *
++ * To build a bit more intuition, note also that once a state is checkpointed,
++ * the path we took to get to that state is not important. This is crucial
++ * property for state pruning. When state is checkpointed and finalized at
++ * some instruction index, it can be correctly and safely used to "short
++ * circuit" any *compatible* state that reaches exactly the same instruction
++ * index. I.e., if we jumped to that instruction from a completely different
++ * code path than original finalized state was derived from, it doesn't
++ * matter, current state can be discarded because from that instruction
++ * forward having a compatible state will ensure we will safely reach the
++ * exit. States describe preconditions for further exploration, but completely
++ * forget the history of how we got here.
++ *
++ * This also means that even if we needed precise SCALAR range to get to
++ * finalized state, but from that point forward *that same* SCALAR register is
++ * never used in a precise context (i.e., it's precise value is not needed for
++ * correctness), it's correct and safe to mark such register as "imprecise"
++ * (i.e., precise marking set to false). This is what we rely on when we do
++ * not set precise marking in current state. If no child state requires
++ * precision for any given SCALAR register, it's safe to dictate that it can
++ * be imprecise. If any child state does require this register to be precise,
++ * we'll mark it precise later retroactively during precise markings
++ * propagation from child state to parent states.
++ *
++ * Skipping precise marking setting in current state is a mild version of
++ * relying on the above observation. But we can utilize this property even
++ * more aggressively by proactively forgetting any precise marking in the
++ * current state (which we inherited from the parent state), right before we
++ * checkpoint it and branch off into new child state. This is done by
++ * mark_all_scalars_imprecise() to hopefully get more permissive and generic
++ * finalized states which help in short circuiting more future states.
++ */
++static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
+ int spi)
+ {
+ struct bpf_verifier_state *st = env->cur_state;
+@@ -2400,22 +2536,22 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+ if (!env->bpf_capable)
+ return 0;
+
+- func = st->frame[st->curframe];
++ /* Do sanity checks against current state of register and/or stack
++ * slot, but don't set precise flag in current state, as precision
++ * tracking in the current state is unnecessary.
++ */
++ func = st->frame[frame];
+ if (regno >= 0) {
+ reg = &func->regs[regno];
+ if (reg->type != SCALAR_VALUE) {
+ WARN_ONCE(1, "backtracing misuse");
+ return -EFAULT;
+ }
+- if (!reg->precise)
+- new_marks = true;
+- else
+- reg_mask = 0;
+- reg->precise = true;
++ new_marks = true;
+ }
+
+ while (spi >= 0) {
+- if (func->stack[spi].slot_type[0] != STACK_SPILL) {
++ if (!is_spilled_reg(&func->stack[spi])) {
+ stack_mask = 0;
+ break;
+ }
+@@ -2424,11 +2560,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+ stack_mask = 0;
+ break;
+ }
+- if (!reg->precise)
+- new_marks = true;
+- else
+- stack_mask = 0;
+- reg->precise = true;
++ new_marks = true;
+ break;
+ }
+
+@@ -2436,12 +2568,42 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+ return 0;
+ if (!reg_mask && !stack_mask)
+ return 0;
++
+ for (;;) {
+ DECLARE_BITMAP(mask, 64);
+ u32 history = st->jmp_history_cnt;
+
+ if (env->log.level & BPF_LOG_LEVEL)
+ verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
++
++ if (last_idx < 0) {
++ /* we are at the entry into subprog, which
++ * is expected for global funcs, but only if
++ * requested precise registers are R1-R5
++ * (which are global func's input arguments)
++ */
++ if (st->curframe == 0 &&
++ st->frame[0]->subprogno > 0 &&
++ st->frame[0]->callsite == BPF_MAIN_FUNC &&
++ stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
++ bitmap_from_u64(mask, reg_mask);
++ for_each_set_bit(i, mask, 32) {
++ reg = &st->frame[0]->regs[i];
++ if (reg->type != SCALAR_VALUE) {
++ reg_mask &= ~(1u << i);
++ continue;
++ }
++ reg->precise = true;
++ }
++ return 0;
++ }
++
++ verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
++ st->frame[0]->subprogno, reg_mask, stack_mask);
++ WARN_ONCE(1, "verifier backtracking bug");
++ return -EFAULT;
++ }
++
+ for (i = last_idx;;) {
+ if (skip_first) {
+ err = 0;
+@@ -2481,7 +2643,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+ break;
+
+ new_marks = false;
+- func = st->frame[st->curframe];
++ func = st->frame[frame];
+ bitmap_from_u64(mask, reg_mask);
+ for_each_set_bit(i, mask, 32) {
+ reg = &func->regs[i];
+@@ -2514,7 +2676,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+ return 0;
+ }
+
+- if (func->stack[i].slot_type[0] != STACK_SPILL) {
++ if (!is_spilled_reg(&func->stack[i])) {
+ stack_mask &= ~(1ull << i);
+ continue;
+ }
+@@ -2547,19 +2709,23 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+
+ static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
+ {
+- return __mark_chain_precision(env, regno, -1);
++ return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
+ }
+
+-static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
++static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
+ {
+- return __mark_chain_precision(env, -1, spi);
++ return __mark_chain_precision(env, frame, regno, -1);
++}
++
++static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
++{
++ return __mark_chain_precision(env, frame, -1, spi);
+ }
+
+ static bool is_spillable_regtype(enum bpf_reg_type type)
+ {
+- switch (type) {
++ switch (base_type(type)) {
+ case PTR_TO_MAP_VALUE:
+- case PTR_TO_MAP_VALUE_OR_NULL:
+ case PTR_TO_STACK:
+ case PTR_TO_CTX:
+ case PTR_TO_PACKET:
+@@ -2568,21 +2734,13 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
+ case PTR_TO_FLOW_KEYS:
+ case CONST_PTR_TO_MAP:
+ case PTR_TO_SOCKET:
+- case PTR_TO_SOCKET_OR_NULL:
+ case PTR_TO_SOCK_COMMON:
+- case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+- case PTR_TO_TCP_SOCK_OR_NULL:
+ case PTR_TO_XDP_SOCK:
+ case PTR_TO_BTF_ID:
+- case PTR_TO_BTF_ID_OR_NULL:
+- case PTR_TO_RDONLY_BUF:
+- case PTR_TO_RDONLY_BUF_OR_NULL:
+- case PTR_TO_RDWR_BUF:
+- case PTR_TO_RDWR_BUF_OR_NULL:
++ case PTR_TO_BUF:
+ case PTR_TO_PERCPU_BTF_ID:
+ case PTR_TO_MEM:
+- case PTR_TO_MEM_OR_NULL:
+ case PTR_TO_FUNC:
+ case PTR_TO_MAP_KEY:
+ return true;
+@@ -2625,16 +2783,38 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
+ return reg->type != SCALAR_VALUE;
+ }
+
++/* Copy src state preserving dst->parent and dst->live fields */
++static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
++{
++ struct bpf_reg_state *parent = dst->parent;
++ enum bpf_reg_liveness live = dst->live;
++
++ *dst = *src;
++ dst->parent = parent;
++ dst->live = live;
++}
++
+ static void save_register_state(struct bpf_func_state *state,
+- int spi, struct bpf_reg_state *reg)
++ int spi, struct bpf_reg_state *reg,
++ int size)
+ {
+ int i;
+
+- state->stack[spi].spilled_ptr = *reg;
+- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
++ copy_register_state(&state->stack[spi].spilled_ptr, reg);
++ if (size == BPF_REG_SIZE)
++ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
++
++ for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
++ state->stack[spi].slot_type[i - 1] = STACK_SPILL;
++
++ /* size < 8 bytes spill */
++ for (; i; i--)
++ scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
++}
+
+- for (i = 0; i < BPF_REG_SIZE; i++)
+- state->stack[spi].slot_type[i] = STACK_SPILL;
++static bool is_bpf_st_mem(struct bpf_insn *insn)
++{
++ return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
+ }
+
+ /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
+@@ -2648,8 +2828,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
+ {
+ struct bpf_func_state *cur; /* state of the current function */
+ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
+- u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
++ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
+ struct bpf_reg_state *reg = NULL;
++ u32 dst_reg = insn->dst_reg;
+
+ err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
+ if (err)
+@@ -2671,7 +2852,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
+ bool sanitize = reg && is_spillable_regtype(reg->type);
+
+ for (i = 0; i < size; i++) {
+- if (state->stack[spi].slot_type[i] == STACK_INVALID) {
++ u8 type = state->stack[spi].slot_type[i];
++
++ if (type != STACK_MISC && type != STACK_ZERO) {
+ sanitize = true;
+ break;
+ }
+@@ -2681,7 +2864,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
+ env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+ }
+
+- if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
++ if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
+ !register_is_null(reg) && env->bpf_capable) {
+ if (dst_reg != BPF_REG_FP) {
+ /* The backtracking logic can only recognize explicit
+@@ -2694,7 +2877,17 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
+ if (err)
+ return err;
+ }
+- save_register_state(state, spi, reg);
++ save_register_state(state, spi, reg, size);
++ /* Break the relation on a narrowing spill. */
++ if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
++ state->stack[spi].spilled_ptr.id = 0;
++ } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
++ insn->imm != 0 && env->bpf_capable) {
++ struct bpf_reg_state fake_reg = {};
++
++ __mark_reg_known(&fake_reg, (u32)insn->imm);
++ fake_reg.type = SCALAR_VALUE;
++ save_register_state(state, spi, &fake_reg, size);
+ } else if (reg && is_spillable_regtype(reg->type)) {
+ /* register containing pointer is being spilled into stack */
+ if (size != BPF_REG_SIZE) {
+@@ -2706,16 +2899,16 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
+ verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
+ return -EINVAL;
+ }
+- save_register_state(state, spi, reg);
++ save_register_state(state, spi, reg, size);
+ } else {
+ u8 type = STACK_MISC;
+
+ /* regular write of data into stack destroys any spilled ptr */
+ state->stack[spi].spilled_ptr.type = NOT_INIT;
+ /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
+- if (state->stack[spi].slot_type[0] == STACK_SPILL)
++ if (is_spilled_reg(&state->stack[spi]))
+ for (i = 0; i < BPF_REG_SIZE; i++)
+- state->stack[spi].slot_type[i] = STACK_MISC;
++ scrub_spilled_slot(&state->stack[spi].slot_type[i]);
+
+ /* only mark the slot as written if all 8 bytes were written
+ * otherwise read propagation may incorrectly stop too soon
+@@ -2729,7 +2922,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
+ /* when we zero initialize stack slots mark them as such */
+- if (reg && register_is_null(reg)) {
++ if ((reg && register_is_null(reg)) ||
++ (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
+ /* backtracking doesn't work for STACK_ZERO yet. */
+ err = mark_chain_precision(env, value_regno);
+ if (err)
+@@ -2803,14 +2997,17 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
+ spi = slot / BPF_REG_SIZE;
+ stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
+
+- if (!env->allow_ptr_leaks
+- && *stype != NOT_INIT
+- && *stype != SCALAR_VALUE) {
+- /* Reject the write if there's are spilled pointers in
+- * range. If we didn't reject here, the ptr status
+- * would be erased below (even though not all slots are
+- * actually overwritten), possibly opening the door to
+- * leaks.
++ if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
++ /* Reject the write if range we may write to has not
++ * been initialized beforehand. If we didn't reject
++ * here, the ptr status would be erased below (even
++ * though not all slots are actually overwritten),
++ * possibly opening the door to leaks.
++ *
++ * We do however catch STACK_INVALID case below, and
++ * only allow reading possibly uninitialized memory
++ * later for CAP_PERFMON, as the write may not happen to
++ * that slot.
+ */
+ verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
+ insn_idx, i);
+@@ -2918,35 +3115,56 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
+ struct bpf_reg_state *reg;
+- u8 *stype;
++ u8 *stype, type;
+
+ stype = reg_state->stack[spi].slot_type;
+ reg = &reg_state->stack[spi].spilled_ptr;
+
+- if (stype[0] == STACK_SPILL) {
+- if (size != BPF_REG_SIZE) {
++ if (is_spilled_reg(&reg_state->stack[spi])) {
++ u8 spill_size = 1;
++
++ for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
++ spill_size++;
++
++ if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
+ if (reg->type != SCALAR_VALUE) {
+ verbose_linfo(env, env->insn_idx, "; ");
+ verbose(env, "invalid size of register fill\n");
+ return -EACCES;
+ }
+- if (dst_regno >= 0) {
++
++ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
++ if (dst_regno < 0)
++ return 0;
++
++ if (!(off % BPF_REG_SIZE) && size == spill_size) {
++ /* The earlier check_reg_arg() has decided the
++ * subreg_def for this insn. Save it first.
++ */
++ s32 subreg_def = state->regs[dst_regno].subreg_def;
++
++ copy_register_state(&state->regs[dst_regno], reg);
++ state->regs[dst_regno].subreg_def = subreg_def;
++ } else {
++ for (i = 0; i < size; i++) {
++ type = stype[(slot - i) % BPF_REG_SIZE];
++ if (type == STACK_SPILL)
++ continue;
++ if (type == STACK_MISC)
++ continue;
++ verbose(env, "invalid read from stack off %d+%d size %d\n",
++ off, i, size);
++ return -EACCES;
++ }
+ mark_reg_unknown(env, state->regs, dst_regno);
+- state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
+ }
+- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
++ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
+ return 0;
+ }
+- for (i = 1; i < BPF_REG_SIZE; i++) {
+- if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
+- verbose(env, "corrupted spill memory\n");
+- return -EACCES;
+- }
+- }
+
+ if (dst_regno >= 0) {
+ /* restore register state from stack */
+- state->regs[dst_regno] = *reg;
++ copy_register_state(&state->regs[dst_regno], reg);
+ /* mark reg as written since spilled pointer state likely
+ * has its liveness marks cleared by is_state_visited()
+ * which resets stack/reg liveness for state transitions
+@@ -2965,8 +3183,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
+ }
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+ } else {
+- u8 type;
+-
+ for (i = 0; i < size; i++) {
+ type = stype[(slot - i) % BPF_REG_SIZE];
+ if (type == STACK_MISC)
+@@ -3068,17 +3284,13 @@ static int check_stack_read(struct bpf_verifier_env *env,
+ }
+ /* Variable offset is prohibited for unprivileged mode for simplicity
+ * since it requires corresponding support in Spectre masking for stack
+- * ALU. See also retrieve_ptr_limit().
++ * ALU. See also retrieve_ptr_limit(). The check in
++ * check_stack_access_for_ptr_arithmetic() called by
++ * adjust_ptr_min_max_vals() prevents users from creating stack pointers
++ * with variable offsets, therefore no check is required here. Further,
++ * just checking it here would be insufficient as speculative stack
++ * writes could still lead to unsafe speculative behaviour.
+ */
+- if (!env->bypass_spec_v1 && var_off) {
+- char tn_buf[48];
+-
+- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+- verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
+- ptr_regno, tn_buf);
+- return -EACCES;
+- }
+-
+ if (!var_off) {
+ off += reg->var_off.value;
+ err = check_stack_read_fixed_off(env, state, off, size,
+@@ -3398,7 +3610,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
+ */
+ *reg_type = info.reg_type;
+
+- if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) {
++ if (base_type(*reg_type) == PTR_TO_BTF_ID) {
+ *btf = info.btf;
+ *btf_id = info.btf_id;
+ } else {
+@@ -3466,7 +3678,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
+ }
+
+ verbose(env, "R%d invalid %s access off=%d size=%d\n",
+- regno, reg_type_str[reg->type], off, size);
++ regno, reg_type_str(env, reg->type), off, size);
+
+ return -EACCES;
+ }
+@@ -3685,7 +3897,7 @@ process_func:
+ continue_func:
+ subprog_end = subprog[idx + 1].start;
+ for (; i < subprog_end; i++) {
+- int next_insn;
++ int next_insn, sidx;
+
+ if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
+ continue;
+@@ -3695,21 +3907,23 @@ continue_func:
+
+ /* find the callee */
+ next_insn = i + insn[i].imm + 1;
+- idx = find_subprog(env, next_insn);
+- if (idx < 0) {
++ sidx = find_subprog(env, next_insn);
++ if (sidx < 0) {
+ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ next_insn);
+ return -EFAULT;
+ }
+- if (subprog[idx].is_async_cb) {
+- if (subprog[idx].has_tail_call) {
++ if (subprog[sidx].is_async_cb) {
++ if (subprog[sidx].has_tail_call) {
+ verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+ return -EFAULT;
+ }
+- /* async callbacks don't increase bpf prog stack size */
+- continue;
++ /* async callbacks don't increase bpf prog stack size unless called directly */
++ if (!bpf_pseudo_call(insn + i))
++ continue;
+ }
+ i = next_insn;
++ idx = sidx;
+
+ if (subprog[idx].has_tail_call)
+ tail_call_reachable = true;
+@@ -3884,7 +4098,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
+
+ static bool bpf_map_is_rdonly(const struct bpf_map *map)
+ {
+- return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
++ /* A map is considered read-only if the following condition are true:
++ *
++ * 1) BPF program side cannot change any of the map content. The
++ * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
++ * and was set at map creation time.
++ * 2) The map value(s) have been initialized from user space by a
++ * loader and then "frozen", such that no new map update/delete
++ * operations from syscall side are possible for the rest of
++ * the map's lifetime from that point onwards.
++ * 3) Any parallel/pending map update/delete operations from syscall
++ * side have been completed. Only after that point, it's safe to
++ * assume that map value(s) are immutable.
++ */
++ return (map->map_flags & BPF_F_RDONLY_PROG) &&
++ READ_ONCE(map->frozen) &&
++ !bpf_map_write_active(map);
+ }
+
+ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
+@@ -4178,15 +4407,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+ mark_reg_unknown(env, regs, value_regno);
+ }
+ }
+- } else if (reg->type == PTR_TO_MEM) {
++ } else if (base_type(reg->type) == PTR_TO_MEM) {
++ bool rdonly_mem = type_is_rdonly_mem(reg->type);
++
++ if (type_may_be_null(reg->type)) {
++ verbose(env, "R%d invalid mem access '%s'\n", regno,
++ reg_type_str(env, reg->type));
++ return -EACCES;
++ }
++
++ if (t == BPF_WRITE && rdonly_mem) {
++ verbose(env, "R%d cannot write into %s\n",
++ regno, reg_type_str(env, reg->type));
++ return -EACCES;
++ }
++
+ if (t == BPF_WRITE && value_regno >= 0 &&
+ is_pointer_value(env, value_regno)) {
+ verbose(env, "R%d leaks addr into mem\n", value_regno);
+ return -EACCES;
+ }
++
+ err = check_mem_region_access(env, regno, off, size,
+ reg->mem_size, false);
+- if (!err && t == BPF_READ && value_regno >= 0)
++ if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
+ mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_CTX) {
+ enum bpf_reg_type reg_type = SCALAR_VALUE;
+@@ -4216,7 +4460,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+ } else {
+ mark_reg_known_zero(env, regs,
+ value_regno);
+- if (reg_type_may_be_null(reg_type))
++ if (type_may_be_null(reg_type))
+ regs[value_regno].id = ++env->id_gen;
+ /* A load of ctx field could have different
+ * actual load size with the one encoded in the
+@@ -4224,8 +4468,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+ * a sub-register.
+ */
+ regs[value_regno].subreg_def = DEF_NOT_SUBREG;
+- if (reg_type == PTR_TO_BTF_ID ||
+- reg_type == PTR_TO_BTF_ID_OR_NULL) {
++ if (base_type(reg_type) == PTR_TO_BTF_ID) {
+ regs[value_regno].btf = btf;
+ regs[value_regno].btf_id = btf_id;
+ }
+@@ -4278,7 +4521,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+ } else if (type_is_sk_pointer(reg->type)) {
+ if (t == BPF_WRITE) {
+ verbose(env, "R%d cannot write into %s\n",
+- regno, reg_type_str[reg->type]);
++ regno, reg_type_str(env, reg->type));
+ return -EACCES;
+ }
+ err = check_sock_access(env, insn_idx, regno, off, size, t);
+@@ -4294,26 +4537,32 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+ } else if (reg->type == CONST_PTR_TO_MAP) {
+ err = check_ptr_to_map_access(env, regs, regno, off, size, t,
+ value_regno);
+- } else if (reg->type == PTR_TO_RDONLY_BUF) {
+- if (t == BPF_WRITE) {
+- verbose(env, "R%d cannot write into %s\n",
+- regno, reg_type_str[reg->type]);
+- return -EACCES;
++ } else if (base_type(reg->type) == PTR_TO_BUF) {
++ bool rdonly_mem = type_is_rdonly_mem(reg->type);
++ const char *buf_info;
++ u32 *max_access;
++
++ if (rdonly_mem) {
++ if (t == BPF_WRITE) {
++ verbose(env, "R%d cannot write into %s\n",
++ regno, reg_type_str(env, reg->type));
++ return -EACCES;
++ }
++ buf_info = "rdonly";
++ max_access = &env->prog->aux->max_rdonly_access;
++ } else {
++ buf_info = "rdwr";
++ max_access = &env->prog->aux->max_rdwr_access;
+ }
++
+ err = check_buffer_access(env, reg, regno, off, size, false,
+- "rdonly",
+- &env->prog->aux->max_rdonly_access);
+- if (!err && value_regno >= 0)
+- mark_reg_unknown(env, regs, value_regno);
+- } else if (reg->type == PTR_TO_RDWR_BUF) {
+- err = check_buffer_access(env, reg, regno, off, size, false,
+- "rdwr",
+- &env->prog->aux->max_rdwr_access);
+- if (!err && t == BPF_READ && value_regno >= 0)
++ buf_info, max_access);
++
++ if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
+ mark_reg_unknown(env, regs, value_regno);
+ } else {
+ verbose(env, "R%d invalid mem access '%s'\n", regno,
+- reg_type_str[reg->type]);
++ reg_type_str(env, reg->type));
+ return -EACCES;
+ }
+
+@@ -4364,9 +4613,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
+
+ if (insn->imm == BPF_CMPXCHG) {
+ /* Check comparison of R0 with memory location */
+- err = check_reg_arg(env, BPF_REG_0, SRC_OP);
++ const u32 aux_reg = BPF_REG_0;
++
++ err = check_reg_arg(env, aux_reg, SRC_OP);
+ if (err)
+ return err;
++
++ if (is_pointer_value(env, aux_reg)) {
++ verbose(env, "R%d leaks addr into mem\n", aux_reg);
++ return -EACCES;
++ }
+ }
+
+ if (is_pointer_value(env, insn->src_reg)) {
+@@ -4380,7 +4636,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
+ is_sk_reg(env, insn->dst_reg)) {
+ verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
+ insn->dst_reg,
+- reg_type_str[reg_state(env, insn->dst_reg)->type]);
++ reg_type_str(env, reg_state(env, insn->dst_reg)->type));
+ return -EACCES;
+ }
+
+@@ -4401,13 +4657,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
+ load_reg = -1;
+ }
+
+- /* check whether we can read the memory */
++ /* Check whether we can read the memory, with second call for fetch
++ * case to simulate the register fill.
++ */
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+- BPF_SIZE(insn->code), BPF_READ, load_reg, true);
++ BPF_SIZE(insn->code), BPF_READ, -1, true);
++ if (!err && load_reg >= 0)
++ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
++ BPF_SIZE(insn->code), BPF_READ, load_reg,
++ true);
+ if (err)
+ return err;
+
+- /* check whether we can write into the same memory */
++ /* Check whether we can write into the same memory. */
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ if (err)
+@@ -4514,17 +4776,17 @@ static int check_stack_range_initialized(
+ goto mark;
+ }
+
+- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
++ if (is_spilled_reg(&state->stack[spi]) &&
+ state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
+ goto mark;
+
+- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
++ if (is_spilled_reg(&state->stack[spi]) &&
+ (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
+ env->allow_ptr_leaks)) {
+ if (clobber) {
+ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
+ for (j = 0; j < BPF_REG_SIZE; j++)
+- state->stack[spi].slot_type[j] = STACK_MISC;
++ scrub_spilled_slot(&state->stack[spi].slot_type[j]);
+ }
+ goto mark;
+ }
+@@ -4557,13 +4819,20 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
+ struct bpf_call_arg_meta *meta)
+ {
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
++ const char *buf_info;
++ u32 *max_access;
+
+- switch (reg->type) {
++ switch (base_type(reg->type)) {
+ case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
+ return check_packet_access(env, regno, reg->off, access_size,
+ zero_size_allowed);
+ case PTR_TO_MAP_KEY:
++ if (meta && meta->raw_mode) {
++ verbose(env, "R%d cannot write into %s\n", regno,
++ reg_type_str(env, reg->type));
++ return -EACCES;
++ }
+ return check_mem_region_access(env, regno, reg->off, access_size,
+ reg->map_ptr->key_size, false);
+ case PTR_TO_MAP_VALUE:
+@@ -4574,21 +4843,33 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
+ return check_map_access(env, regno, reg->off, access_size,
+ zero_size_allowed);
+ case PTR_TO_MEM:
++ if (type_is_rdonly_mem(reg->type)) {
++ if (meta && meta->raw_mode) {
++ verbose(env, "R%d cannot write into %s\n", regno,
++ reg_type_str(env, reg->type));
++ return -EACCES;
++ }
++ }
+ return check_mem_region_access(env, regno, reg->off,
+ access_size, reg->mem_size,
+ zero_size_allowed);
+- case PTR_TO_RDONLY_BUF:
+- if (meta && meta->raw_mode)
+- return -EACCES;
+- return check_buffer_access(env, reg, regno, reg->off,
+- access_size, zero_size_allowed,
+- "rdonly",
+- &env->prog->aux->max_rdonly_access);
+- case PTR_TO_RDWR_BUF:
++ case PTR_TO_BUF:
++ if (type_is_rdonly_mem(reg->type)) {
++ if (meta && meta->raw_mode) {
++ verbose(env, "R%d cannot write into %s\n", regno,
++ reg_type_str(env, reg->type));
++ return -EACCES;
++ }
++
++ buf_info = "rdonly";
++ max_access = &env->prog->aux->max_rdonly_access;
++ } else {
++ buf_info = "rdwr";
++ max_access = &env->prog->aux->max_rdwr_access;
++ }
+ return check_buffer_access(env, reg, regno, reg->off,
+ access_size, zero_size_allowed,
+- "rdwr",
+- &env->prog->aux->max_rdwr_access);
++ buf_info, max_access);
+ case PTR_TO_STACK:
+ return check_stack_range_initialized(
+ env,
+@@ -4600,9 +4881,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
+ register_is_null(reg))
+ return 0;
+
+- verbose(env, "R%d type=%s expected=%s\n", regno,
+- reg_type_str[reg->type],
+- reg_type_str[PTR_TO_STACK]);
++ verbose(env, "R%d type=%s ", regno,
++ reg_type_str(env, reg->type));
++ verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
+ return -EACCES;
+ }
+ }
+@@ -4613,7 +4894,7 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ if (register_is_null(reg))
+ return 0;
+
+- if (reg_type_may_be_null(reg->type)) {
++ if (type_may_be_null(reg->type)) {
+ /* Assuming that the register contains a value check if the memory
+ * access is safe. Temporarily save and restore the register's state as
+ * the conversion shouldn't be visible to a caller.
+@@ -4761,9 +5042,8 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
+
+ static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
+ {
+- return type == ARG_PTR_TO_MEM ||
+- type == ARG_PTR_TO_MEM_OR_NULL ||
+- type == ARG_PTR_TO_UNINIT_MEM;
++ return base_type(type) == ARG_PTR_TO_MEM ||
++ base_type(type) == ARG_PTR_TO_UNINIT_MEM;
+ }
+
+ static bool arg_type_is_mem_size(enum bpf_arg_type type)
+@@ -4865,8 +5145,7 @@ static const struct bpf_reg_types mem_types = {
+ PTR_TO_MAP_KEY,
+ PTR_TO_MAP_VALUE,
+ PTR_TO_MEM,
+- PTR_TO_RDONLY_BUF,
+- PTR_TO_RDWR_BUF,
++ PTR_TO_BUF,
+ },
+ };
+
+@@ -4897,31 +5176,26 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
+ [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
+ [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
+ [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
+- [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types,
+ [ARG_CONST_SIZE] = &scalar_types,
+ [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
+ [ARG_PTR_TO_CTX] = &context_types,
+- [ARG_PTR_TO_CTX_OR_NULL] = &context_types,
+ [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
+ #ifdef CONFIG_NET
+ [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
+ #endif
+ [ARG_PTR_TO_SOCKET] = &fullsock_types,
+- [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
+ [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
+ [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
+ [ARG_PTR_TO_MEM] = &mem_types,
+- [ARG_PTR_TO_MEM_OR_NULL] = &mem_types,
+ [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
+ [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
+- [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
+ [ARG_PTR_TO_INT] = &int_ptr_types,
+ [ARG_PTR_TO_LONG] = &int_ptr_types,
+ [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
+ [ARG_PTR_TO_FUNC] = &func_ptr_types,
+- [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
++ [ARG_PTR_TO_STACK] = &stack_ptr_types,
+ [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
+ [ARG_PTR_TO_TIMER] = &timer_types,
+ };
+@@ -4935,12 +5209,27 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
+ const struct bpf_reg_types *compatible;
+ int i, j;
+
+- compatible = compatible_reg_types[arg_type];
++ compatible = compatible_reg_types[base_type(arg_type)];
+ if (!compatible) {
+ verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
+ return -EFAULT;
+ }
+
++ /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
++ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
++ *
++ * Same for MAYBE_NULL:
++ *
++ * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
++ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
++ *
++ * Therefore we fold these flags depending on the arg_type before comparison.
++ */
++ if (arg_type & MEM_RDONLY)
++ type &= ~MEM_RDONLY;
++ if (arg_type & PTR_MAYBE_NULL)
++ type &= ~PTR_MAYBE_NULL;
++
+ for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
+ expected = compatible->types[i];
+ if (expected == NOT_INIT)
+@@ -4950,14 +5239,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
+ goto found;
+ }
+
+- verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
++ verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
+ for (j = 0; j + 1 < i; j++)
+- verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
+- verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
++ verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
++ verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
+ return -EACCES;
+
+ found:
+- if (type == PTR_TO_BTF_ID) {
++ if (reg->type == PTR_TO_BTF_ID) {
+ if (!arg_btf_id) {
+ if (!compatible->btf_id) {
+ verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
+@@ -5016,15 +5305,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
+ return -EACCES;
+ }
+
+- if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
+- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
++ if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
++ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
+ err = resolve_map_arg_type(env, meta, &arg_type);
+ if (err)
+ return err;
+ }
+
+- if (register_is_null(reg) && arg_type_may_be_null(arg_type))
++ if (register_is_null(reg) && type_may_be_null(arg_type))
+ /* A NULL register has a SCALAR_VALUE type, so skip
+ * type checking.
+ */
+@@ -5093,10 +5381,11 @@ skip_type_check:
+ err = check_helper_mem_access(env, regno,
+ meta->map_ptr->key_size, false,
+ NULL);
+- } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+- (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
+- !register_is_null(reg)) ||
+- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
++ } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
++ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
++ if (type_may_be_null(arg_type) && register_is_null(reg))
++ return 0;
++
+ /* bpf_map_xxx(..., map_ptr, ..., value) call:
+ * check [value, value + map->value_size) validity
+ */
+@@ -5590,31 +5879,15 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
+ /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
+ * are now invalid, so turn them into unknown SCALAR_VALUE.
+ */
+-static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
+- struct bpf_func_state *state)
++static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+ {
+- struct bpf_reg_state *regs = state->regs, *reg;
+- int i;
+-
+- for (i = 0; i < MAX_BPF_REG; i++)
+- if (reg_is_pkt_pointer_any(&regs[i]))
+- mark_reg_unknown(env, regs, i);
++ struct bpf_func_state *state;
++ struct bpf_reg_state *reg;
+
+- bpf_for_each_spilled_reg(i, state, reg) {
+- if (!reg)
+- continue;
++ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
+ if (reg_is_pkt_pointer_any(reg))
+ __mark_reg_unknown(env, reg);
+- }
+-}
+-
+-static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+-{
+- struct bpf_verifier_state *vstate = env->cur_state;
+- int i;
+-
+- for (i = 0; i <= vstate->curframe; i++)
+- __clear_all_pkt_pointers(env, vstate->frame[i]);
++ }));
+ }
+
+ enum {
+@@ -5643,41 +5916,28 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range
+ reg->range = AT_PKT_END;
+ }
+
+-static void release_reg_references(struct bpf_verifier_env *env,
+- struct bpf_func_state *state,
+- int ref_obj_id)
+-{
+- struct bpf_reg_state *regs = state->regs, *reg;
+- int i;
+-
+- for (i = 0; i < MAX_BPF_REG; i++)
+- if (regs[i].ref_obj_id == ref_obj_id)
+- mark_reg_unknown(env, regs, i);
+-
+- bpf_for_each_spilled_reg(i, state, reg) {
+- if (!reg)
+- continue;
+- if (reg->ref_obj_id == ref_obj_id)
+- __mark_reg_unknown(env, reg);
+- }
+-}
+-
+ /* The pointer with the specified id has released its reference to kernel
+ * resources. Identify all copies of the same pointer and clear the reference.
+ */
+ static int release_reference(struct bpf_verifier_env *env,
+ int ref_obj_id)
+ {
+- struct bpf_verifier_state *vstate = env->cur_state;
++ struct bpf_func_state *state;
++ struct bpf_reg_state *reg;
+ int err;
+- int i;
+
+ err = release_reference_state(cur_func(env), ref_obj_id);
+ if (err)
+ return err;
+
+- for (i = 0; i <= vstate->curframe; i++)
+- release_reg_references(env, vstate->frame[i], ref_obj_id);
++ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
++ if (reg->ref_obj_id == ref_obj_id) {
++ if (!env->allow_ptr_leaks)
++ __mark_reg_not_init(env, reg);
++ else
++ __mark_reg_unknown(env, reg);
++ }
++ }));
+
+ return 0;
+ }
+@@ -5699,6 +5959,10 @@ typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
+ struct bpf_func_state *callee,
+ int insn_idx);
+
++static int set_callee_state(struct bpf_verifier_env *env,
++ struct bpf_func_state *caller,
++ struct bpf_func_state *callee, int insn_idx);
++
+ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int *insn_idx, int subprog,
+ set_callee_state_fn set_callee_state_cb)
+@@ -5749,7 +6013,18 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ }
+ }
+
++ /* set_callee_state is used for direct subprog calls, but we are
++ * interested in validating only BPF helpers that can call subprogs as
++ * callbacks
++ */
++ if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) {
++ verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n",
++ func_id_name(insn->imm), insn->imm);
++ return -EFAULT;
++ }
++
+ if (insn->code == (BPF_JMP | BPF_CALL) &&
++ insn->src_reg == 0 &&
+ insn->imm == BPF_FUNC_timer_set_callback) {
+ struct bpf_verifier_state *async_cb;
+
+@@ -5792,11 +6067,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ /* Transfer references to the callee */
+ err = copy_reference_state(callee, caller);
+ if (err)
+- return err;
++ goto err_out;
+
+ err = set_callee_state_cb(env, caller, callee, *insn_idx);
+ if (err)
+- return err;
++ goto err_out;
+
+ clear_caller_saved_regs(env, caller->regs);
+
+@@ -5813,6 +6088,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ print_verifier_state(env, callee);
+ }
+ return 0;
++
++err_out:
++ free_func_state(callee);
++ state->frame[state->curframe + 1] = NULL;
++ return err;
+ }
+
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+@@ -5950,8 +6230,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+ return -EINVAL;
+ }
+
+- state->curframe--;
+- caller = state->frame[state->curframe];
++ caller = state->frame[state->curframe - 1];
+ if (callee->in_callback_fn) {
+ /* enforce R0 return value range [0, 1]. */
+ struct tnum range = tnum_range(0, 1);
+@@ -5969,10 +6248,17 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+ caller->regs[BPF_REG_0] = *r0;
+ }
+
+- /* Transfer references to the caller */
+- err = copy_reference_state(caller, callee);
+- if (err)
+- return err;
++ /* callback_fn frame should have released its own additions to parent's
++ * reference state at this point, or check_reference_leak would
++ * complain, hence it must be the same as the caller. There is no need
++ * to copy it back.
++ */
++ if (!callee->in_callback_fn) {
++ /* Transfer references to the caller */
++ err = copy_reference_state(caller, callee);
++ if (err)
++ return err;
++ }
+
+ *insn_idx = callee->callsite + 1;
+ if (env->log.level & BPF_LOG_LEVEL) {
+@@ -5983,7 +6269,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+ }
+ /* clear everything in the callee */
+ free_func_state(callee);
+- state->frame[state->curframe + 1] = NULL;
++ state->frame[state->curframe--] = NULL;
+ return 0;
+ }
+
+@@ -6005,9 +6291,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
+ ret_reg->s32_max_value = meta->msize_max_value;
+ ret_reg->smin_value = -MAX_ERRNO;
+ ret_reg->s32_min_value = -MAX_ERRNO;
+- __reg_deduce_bounds(ret_reg);
+- __reg_bound_offset(ret_reg);
+- __update_reg_bounds(ret_reg);
++ reg_bounds_sync(ret_reg);
+ }
+
+ static int
+@@ -6062,8 +6346,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ struct bpf_reg_state *regs = cur_regs(env), *reg;
+ struct bpf_map *map = meta->map_ptr;
+- struct tnum range;
+- u64 val;
++ u64 val, max;
+ int err;
+
+ if (func_id != BPF_FUNC_tail_call)
+@@ -6073,10 +6356,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ return -EINVAL;
+ }
+
+- range = tnum_range(0, map->max_entries - 1);
+ reg = &regs[BPF_REG_3];
++ val = reg->var_off.value;
++ max = map->max_entries;
+
+- if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
++ if (!(register_is_const(reg) && val < max)) {
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
+ return 0;
+ }
+@@ -6084,8 +6368,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ err = mark_chain_precision(env, BPF_REG_3);
+ if (err)
+ return err;
+-
+- val = reg->var_off.value;
+ if (bpf_map_key_unseen(aux))
+ bpf_map_key_store(aux, val);
+ else if (!bpf_map_key_poisoned(aux) &&
+@@ -6097,13 +6379,20 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ static int check_reference_leak(struct bpf_verifier_env *env)
+ {
+ struct bpf_func_state *state = cur_func(env);
++ bool refs_lingering = false;
+ int i;
+
++ if (state->frameno && !state->in_callback_fn)
++ return 0;
++
+ for (i = 0; i < state->acquired_refs; i++) {
++ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
++ continue;
+ verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
+ state->refs[i].id, state->refs[i].insn_idx);
++ refs_lingering = true;
+ }
+- return state->acquired_refs ? -EINVAL : 0;
++ return refs_lingering ? -EINVAL : 0;
+ }
+
+ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
+@@ -6170,6 +6459,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ int *insn_idx_p)
+ {
+ const struct bpf_func_proto *fn = NULL;
++ enum bpf_return_type ret_type;
++ enum bpf_type_flag ret_flag;
+ struct bpf_reg_state *regs;
+ struct bpf_call_arg_meta meta;
+ int insn_idx = *insn_idx_p;
+@@ -6303,13 +6594,14 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+
+ /* update return register (already marked as written above) */
+- if (fn->ret_type == RET_INTEGER) {
++ ret_type = fn->ret_type;
++ ret_flag = type_flag(fn->ret_type);
++ if (ret_type == RET_INTEGER) {
+ /* sets type to SCALAR_VALUE */
+ mark_reg_unknown(env, regs, BPF_REG_0);
+- } else if (fn->ret_type == RET_VOID) {
++ } else if (ret_type == RET_VOID) {
+ regs[BPF_REG_0].type = NOT_INIT;
+- } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
+- fn->ret_type == RET_PTR_TO_MAP_VALUE) {
++ } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
+ /* There is no offset yet applied, variable or fixed */
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ /* remember map_ptr, so that check_map_access()
+@@ -6323,28 +6615,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ }
+ regs[BPF_REG_0].map_ptr = meta.map_ptr;
+ regs[BPF_REG_0].map_uid = meta.map_uid;
+- if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+- if (map_value_has_spin_lock(meta.map_ptr))
+- regs[BPF_REG_0].id = ++env->id_gen;
+- } else {
+- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
++ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
++ if (!type_may_be_null(ret_type) &&
++ map_value_has_spin_lock(meta.map_ptr)) {
++ regs[BPF_REG_0].id = ++env->id_gen;
+ }
+- } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
++ } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+- regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
+- } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
++ regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
++ } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+- regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
+- } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
++ regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
++ } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+- regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
+- } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
++ regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
++ } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+- regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
++ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
+ regs[BPF_REG_0].mem_size = meta.mem_size;
+- } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
+- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
++ } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
+ const struct btf_type *t;
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+@@ -6362,29 +6651,30 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ tname, PTR_ERR(ret));
+ return -EINVAL;
+ }
+- regs[BPF_REG_0].type =
+- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+- PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
++ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
+ regs[BPF_REG_0].mem_size = tsize;
+ } else {
+- regs[BPF_REG_0].type =
+- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+- PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
++ /* MEM_RDONLY may be carried from ret_flag, but it
++ * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
++ * it will confuse the check of PTR_TO_BTF_ID in
++ * check_mem_access().
++ */
++ ret_flag &= ~MEM_RDONLY;
++
++ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
+ regs[BPF_REG_0].btf = meta.ret_btf;
+ regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+ }
+- } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
+- fn->ret_type == RET_PTR_TO_BTF_ID) {
++ } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
+ int ret_btf_id;
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+- regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ?
+- PTR_TO_BTF_ID :
+- PTR_TO_BTF_ID_OR_NULL;
++ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
+ ret_btf_id = *fn->ret_btf_id;
+ if (ret_btf_id == 0) {
+- verbose(env, "invalid return type %d of func %s#%d\n",
+- fn->ret_type, func_id_name(func_id), func_id);
++ verbose(env, "invalid return type %u of func %s#%d\n",
++ base_type(ret_type), func_id_name(func_id),
++ func_id);
+ return -EINVAL;
+ }
+ /* current BPF helper definitions are only coming from
+@@ -6393,12 +6683,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
+ regs[BPF_REG_0].btf = btf_vmlinux;
+ regs[BPF_REG_0].btf_id = ret_btf_id;
+ } else {
+- verbose(env, "unknown return type %d of func %s#%d\n",
+- fn->ret_type, func_id_name(func_id), func_id);
++ verbose(env, "unknown return type %u of func %s#%d\n",
++ base_type(ret_type), func_id_name(func_id), func_id);
+ return -EINVAL;
+ }
+
+- if (reg_type_may_be_null(regs[BPF_REG_0].type))
++ if (type_may_be_null(regs[BPF_REG_0].type))
+ regs[BPF_REG_0].id = ++env->id_gen;
+
+ if (is_ptr_cast_function(func_id)) {
+@@ -6597,25 +6887,25 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+
+ if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+ verbose(env, "math between %s pointer and %lld is not allowed\n",
+- reg_type_str[type], val);
++ reg_type_str(env, type), val);
+ return false;
+ }
+
+ if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
+ verbose(env, "%s pointer offset %d is not allowed\n",
+- reg_type_str[type], reg->off);
++ reg_type_str(env, type), reg->off);
+ return false;
+ }
+
+ if (smin == S64_MIN) {
+ verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
+- reg_type_str[type]);
++ reg_type_str(env, type));
+ return false;
+ }
+
+ if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+ verbose(env, "value %lld makes %s pointer be out of bounds\n",
+- smin, reg_type_str[type]);
++ smin, reg_type_str(env, type));
+ return false;
+ }
+
+@@ -6818,7 +7108,7 @@ do_sim:
+ */
+ if (!ptr_is_dst_reg) {
+ tmp = *dst_reg;
+- *dst_reg = *ptr_reg;
++ copy_register_state(dst_reg, ptr_reg);
+ }
+ ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+ env->insn_idx);
+@@ -6992,11 +7282,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+ return -EACCES;
+ }
+
+- switch (ptr_reg->type) {
+- case PTR_TO_MAP_VALUE_OR_NULL:
++ if (ptr_reg->type & PTR_MAYBE_NULL) {
+ verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
+- dst, reg_type_str[ptr_reg->type]);
++ dst, reg_type_str(env, ptr_reg->type));
+ return -EACCES;
++ }
++
++ switch (base_type(ptr_reg->type)) {
+ case CONST_PTR_TO_MAP:
+ /* smin_val represents the known value */
+ if (known && smin_val == 0 && opcode == BPF_ADD)
+@@ -7004,16 +7296,16 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+ fallthrough;
+ case PTR_TO_PACKET_END:
+ case PTR_TO_SOCKET:
+- case PTR_TO_SOCKET_OR_NULL:
+ case PTR_TO_SOCK_COMMON:
+- case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+- case PTR_TO_TCP_SOCK_OR_NULL:
+ case PTR_TO_XDP_SOCK:
++reject:
+ verbose(env, "R%d pointer arithmetic on %s prohibited\n",
+- dst, reg_type_str[ptr_reg->type]);
++ dst, reg_type_str(env, ptr_reg->type));
+ return -EACCES;
+ default:
++ if (type_may_be_null(ptr_reg->type))
++ goto reject;
+ break;
+ }
+
+@@ -7164,11 +7456,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+
+ if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+ return -EINVAL;
+-
+- __update_reg_bounds(dst_reg);
+- __reg_deduce_bounds(dst_reg);
+- __reg_bound_offset(dst_reg);
+-
++ reg_bounds_sync(dst_reg);
+ if (sanitize_check_bounds(env, insn, dst_reg) < 0)
+ return -EACCES;
+ if (sanitize_needed(opcode)) {
+@@ -7906,10 +8194,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+ /* ALU32 ops are zero extended into 64bit register */
+ if (alu32)
+ zext_32_to_64(dst_reg);
+-
+- __update_reg_bounds(dst_reg);
+- __reg_deduce_bounds(dst_reg);
+- __reg_bound_offset(dst_reg);
++ reg_bounds_sync(dst_reg);
+ return 0;
+ }
+
+@@ -7969,6 +8254,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+ return err;
+ return adjust_ptr_min_max_vals(env, insn,
+ dst_reg, src_reg);
++ } else if (dst_reg->precise) {
++ /* if dst_reg is precise, src_reg should be precise as well */
++ err = mark_chain_precision(env, insn->src_reg);
++ if (err)
++ return err;
+ }
+ } else {
+ /* Pretend the src is a reg with a known value, since we only
+@@ -8074,7 +8364,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
+ * to propagate min/max range.
+ */
+ src_reg->id = ++env->id_gen;
+- *dst_reg = *src_reg;
++ copy_register_state(dst_reg, src_reg);
+ dst_reg->live |= REG_LIVE_WRITTEN;
+ dst_reg->subreg_def = DEF_NOT_SUBREG;
+ } else {
+@@ -8085,7 +8375,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
+ insn->src_reg);
+ return -EACCES;
+ } else if (src_reg->type == SCALAR_VALUE) {
+- *dst_reg = *src_reg;
++ copy_register_state(dst_reg, src_reg);
+ /* Make sure ID is cleared otherwise
+ * dst_reg min/max could be incorrectly
+ * propagated into src_reg by find_equal_scalars()
+@@ -8098,6 +8388,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
+ insn->dst_reg);
+ }
+ zext_32_to_64(dst_reg);
++ reg_bounds_sync(dst_reg);
+ }
+ } else {
+ /* case: R = imm
+@@ -8169,34 +8460,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
+ return 0;
+ }
+
+-static void __find_good_pkt_pointers(struct bpf_func_state *state,
+- struct bpf_reg_state *dst_reg,
+- enum bpf_reg_type type, int new_range)
+-{
+- struct bpf_reg_state *reg;
+- int i;
+-
+- for (i = 0; i < MAX_BPF_REG; i++) {
+- reg = &state->regs[i];
+- if (reg->type == type && reg->id == dst_reg->id)
+- /* keep the maximum range already checked */
+- reg->range = max(reg->range, new_range);
+- }
+-
+- bpf_for_each_spilled_reg(i, state, reg) {
+- if (!reg)
+- continue;
+- if (reg->type == type && reg->id == dst_reg->id)
+- reg->range = max(reg->range, new_range);
+- }
+-}
+-
+ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
+ struct bpf_reg_state *dst_reg,
+ enum bpf_reg_type type,
+ bool range_right_open)
+ {
+- int new_range, i;
++ struct bpf_func_state *state;
++ struct bpf_reg_state *reg;
++ int new_range;
+
+ if (dst_reg->off < 0 ||
+ (dst_reg->off == 0 && range_right_open))
+@@ -8212,7 +8483,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
+
+ new_range = dst_reg->off;
+ if (range_right_open)
+- new_range--;
++ new_range++;
+
+ /* Examples for register markings:
+ *
+@@ -8261,9 +8532,11 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
+ * the range won't allow anything.
+ * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
+ */
+- for (i = 0; i <= vstate->curframe; i++)
+- __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+- new_range);
++ bpf_for_each_reg_in_vstate(vstate, state, reg, ({
++ if (reg->type == type && reg->id == dst_reg->id)
++ /* keep the maximum range already checked */
++ reg->range = max(reg->range, new_range);
++ }));
+ }
+
+ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
+@@ -8535,26 +8808,33 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
+ return;
+
+ switch (opcode) {
++ /* JEQ/JNE comparison doesn't change the register equivalence.
++ *
++ * r1 = r2;
++ * if (r1 == 42) goto label;
++ * ...
++ * label: // here both r1 and r2 are known to be 42.
++ *
++ * Hence when marking register as known preserve it's ID.
++ */
+ case BPF_JEQ:
++ if (is_jmp32) {
++ __mark_reg32_known(true_reg, val32);
++ true_32off = tnum_subreg(true_reg->var_off);
++ } else {
++ ___mark_reg_known(true_reg, val);
++ true_64off = true_reg->var_off;
++ }
++ break;
+ case BPF_JNE:
+- {
+- struct bpf_reg_state *reg =
+- opcode == BPF_JEQ ? true_reg : false_reg;
+-
+- /* JEQ/JNE comparison doesn't change the register equivalence.
+- * r1 = r2;
+- * if (r1 == 42) goto label;
+- * ...
+- * label: // here both r1 and r2 are known to be 42.
+- *
+- * Hence when marking register as known preserve it's ID.
+- */
+- if (is_jmp32)
+- __mark_reg32_known(reg, val32);
+- else
+- ___mark_reg_known(reg, val);
++ if (is_jmp32) {
++ __mark_reg32_known(false_reg, val32);
++ false_32off = tnum_subreg(false_reg->var_off);
++ } else {
++ ___mark_reg_known(false_reg, val);
++ false_64off = false_reg->var_off;
++ }
+ break;
+- }
+ case BPF_JSET:
+ if (is_jmp32) {
+ false_32off = tnum_and(false_32off, tnum_const(~val32));
+@@ -8693,21 +8973,8 @@ static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
+ dst_reg->smax_value);
+ src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
+ dst_reg->var_off);
+- /* We might have learned new bounds from the var_off. */
+- __update_reg_bounds(src_reg);
+- __update_reg_bounds(dst_reg);
+- /* We might have learned something about the sign bit. */
+- __reg_deduce_bounds(src_reg);
+- __reg_deduce_bounds(dst_reg);
+- /* We might have learned some bits from the bounds. */
+- __reg_bound_offset(src_reg);
+- __reg_bound_offset(dst_reg);
+- /* Intersecting with the old var_off might have improved our bounds
+- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+- * then new var_off is (0; 0x7f...fc) which improves our umax.
+- */
+- __update_reg_bounds(src_reg);
+- __update_reg_bounds(dst_reg);
++ reg_bounds_sync(src_reg);
++ reg_bounds_sync(dst_reg);
+ }
+
+ static void reg_combine_min_max(struct bpf_reg_state *true_src,
+@@ -8730,17 +8997,17 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
+ struct bpf_reg_state *reg, u32 id,
+ bool is_null)
+ {
+- if (reg_type_may_be_null(reg->type) && reg->id == id &&
++ if (type_may_be_null(reg->type) && reg->id == id &&
+ !WARN_ON_ONCE(!reg->id)) {
+- /* Old offset (both fixed and variable parts) should
+- * have been known-zero, because we don't allow pointer
+- * arithmetic on pointers that might be NULL.
+- */
+ if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
+ !tnum_equals_const(reg->var_off, 0) ||
+ reg->off)) {
+- __mark_reg_known_zero(reg);
+- reg->off = 0;
++ /* Old offset (both fixed and variable parts) should
++ * have been known-zero, because we don't allow pointer
++ * arithmetic on pointers that might be NULL. If we
++ * see this happening, don't convert the register.
++ */
++ return;
+ }
+ if (is_null) {
+ reg->type = SCALAR_VALUE;
+@@ -8758,7 +9025,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
+
+ if (!reg_may_point_to_spin_lock(reg)) {
+ /* For not-NULL ptr, reg->ref_obj_id will be reset
+- * in release_reg_references().
++ * in release_reference().
+ *
+ * reg->id is still used by spin_lock ptr. Other
+ * than spin_lock ptr type, reg->id can be reset.
+@@ -8768,22 +9035,6 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
+ }
+ }
+
+-static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+- bool is_null)
+-{
+- struct bpf_reg_state *reg;
+- int i;
+-
+- for (i = 0; i < MAX_BPF_REG; i++)
+- mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+-
+- bpf_for_each_spilled_reg(i, state, reg) {
+- if (!reg)
+- continue;
+- mark_ptr_or_null_reg(state, reg, id, is_null);
+- }
+-}
+-
+ /* The logic is similar to find_good_pkt_pointers(), both could eventually
+ * be folded together at some point.
+ */
+@@ -8791,10 +9042,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
+ bool is_null)
+ {
+ struct bpf_func_state *state = vstate->frame[vstate->curframe];
+- struct bpf_reg_state *regs = state->regs;
++ struct bpf_reg_state *regs = state->regs, *reg;
+ u32 ref_obj_id = regs[regno].ref_obj_id;
+ u32 id = regs[regno].id;
+- int i;
+
+ if (ref_obj_id && ref_obj_id == id && is_null)
+ /* regs[regno] is in the " == NULL" branch.
+@@ -8803,8 +9053,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
+ */
+ WARN_ON_ONCE(release_reference_state(state, id));
+
+- for (i = 0; i <= vstate->curframe; i++)
+- __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
++ bpf_for_each_reg_in_vstate(vstate, state, reg, ({
++ mark_ptr_or_null_reg(state, reg, id, is_null);
++ }));
+ }
+
+ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
+@@ -8917,23 +9168,11 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate,
+ {
+ struct bpf_func_state *state;
+ struct bpf_reg_state *reg;
+- int i, j;
+
+- for (i = 0; i <= vstate->curframe; i++) {
+- state = vstate->frame[i];
+- for (j = 0; j < MAX_BPF_REG; j++) {
+- reg = &state->regs[j];
+- if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
+- *reg = *known_reg;
+- }
+-
+- bpf_for_each_spilled_reg(j, state, reg) {
+- if (!reg)
+- continue;
+- if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
+- *reg = *known_reg;
+- }
+- }
++ bpf_for_each_reg_in_vstate(vstate, state, reg, ({
++ if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
++ copy_register_state(reg, known_reg);
++ }));
+ }
+
+ static int check_cond_jmp_op(struct bpf_verifier_env *env,
+@@ -9108,7 +9347,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
+ */
+ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
+ insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
+- reg_type_may_be_null(dst_reg->type)) {
++ type_may_be_null(dst_reg->type)) {
+ /* Mark all identical registers in each branch as either
+ * safe or unknown depending R == 0 or R != 0 conditional.
+ */
+@@ -9159,11 +9398,15 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
+ return 0;
+ }
+
+- if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
+- mark_reg_known_zero(env, regs, insn->dst_reg);
++ /* All special src_reg cases are listed below. From this point onwards
++ * we either succeed and assign a corresponding dst_reg->type after
++ * zeroing the offset, or fail and reject the program.
++ */
++ mark_reg_known_zero(env, regs, insn->dst_reg);
+
++ if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
+ dst_reg->type = aux->btf_var.reg_type;
+- switch (dst_reg->type) {
++ switch (base_type(dst_reg->type)) {
+ case PTR_TO_MEM:
+ dst_reg->mem_size = aux->btf_var.mem_size;
+ break;
+@@ -9181,7 +9424,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
+
+ if (insn->src_reg == BPF_PSEUDO_FUNC) {
+ struct bpf_prog_aux *aux = env->prog->aux;
+- u32 subprogno = insn[1].imm;
++ u32 subprogno = find_subprog(env,
++ env->insn_idx + insn->imm + 1);
+
+ if (!aux->func_info) {
+ verbose(env, "missing btf func_info\n");
+@@ -9198,7 +9442,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
+ }
+
+ map = env->used_maps[aux->map_index];
+- mark_reg_known_zero(env, regs, insn->dst_reg);
+ dst_reg->map_ptr = map;
+
+ if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+@@ -9361,7 +9604,7 @@ static int check_return_code(struct bpf_verifier_env *env)
+ /* enforce return zero from async callbacks like timer */
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "In async callback the register R0 is not a known value (%s)\n",
+- reg_type_str[reg->type]);
++ reg_type_str(env, reg->type));
+ return -EINVAL;
+ }
+
+@@ -9375,7 +9618,7 @@ static int check_return_code(struct bpf_verifier_env *env)
+ if (is_subprog) {
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
+- reg_type_str[reg->type]);
++ reg_type_str(env, reg->type));
+ return -EINVAL;
+ }
+ return 0;
+@@ -9439,7 +9682,7 @@ static int check_return_code(struct bpf_verifier_env *env)
+
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "At program exit the register R0 is not a known value (%s)\n",
+- reg_type_str[reg->type]);
++ reg_type_str(env, reg->type));
+ return -EINVAL;
+ }
+
+@@ -10220,12 +10463,12 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ return true;
+ if (rcur->type == NOT_INIT)
+ return false;
+- switch (rold->type) {
++ switch (base_type(rold->type)) {
+ case SCALAR_VALUE:
+ if (env->explore_alu_limits)
+ return false;
+ if (rcur->type == SCALAR_VALUE) {
+- if (!rold->precise && !rcur->precise)
++ if (!rold->precise)
+ return true;
+ /* new val must satisfy old val knowledge */
+ return range_within(rold, rcur) &&
+@@ -10242,6 +10485,22 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ }
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
++ /* a PTR_TO_MAP_VALUE could be safe to use as a
++ * PTR_TO_MAP_VALUE_OR_NULL into the same map.
++ * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
++ * checked, doing so could have affected others with the same
++ * id, and we can't check for that because we lost the id when
++ * we converted to a PTR_TO_MAP_VALUE.
++ */
++ if (type_may_be_null(rold->type)) {
++ if (!type_may_be_null(rcur->type))
++ return false;
++ if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
++ return false;
++ /* Check our ids match any regs they're supposed to */
++ return check_ids(rold->id, rcur->id, idmap);
++ }
++
+ /* If the new min/max/var_off satisfy the old ones and
+ * everything else matches, we are OK.
+ * 'id' is not compared, since it's only used for maps with
+@@ -10253,20 +10512,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+ range_within(rold, rcur) &&
+ tnum_in(rold->var_off, rcur->var_off);
+- case PTR_TO_MAP_VALUE_OR_NULL:
+- /* a PTR_TO_MAP_VALUE could be safe to use as a
+- * PTR_TO_MAP_VALUE_OR_NULL into the same map.
+- * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
+- * checked, doing so could have affected others with the same
+- * id, and we can't check for that because we lost the id when
+- * we converted to a PTR_TO_MAP_VALUE.
+- */
+- if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
+- return false;
+- if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
+- return false;
+- /* Check our ids match any regs they're supposed to */
+- return check_ids(rold->id, rcur->id, idmap);
+ case PTR_TO_PACKET_META:
+ case PTR_TO_PACKET:
+ if (rcur->type != rold->type)
+@@ -10295,11 +10540,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ case PTR_TO_PACKET_END:
+ case PTR_TO_FLOW_KEYS:
+ case PTR_TO_SOCKET:
+- case PTR_TO_SOCKET_OR_NULL:
+ case PTR_TO_SOCK_COMMON:
+- case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+- case PTR_TO_TCP_SOCK_OR_NULL:
+ case PTR_TO_XDP_SOCK:
+ /* Only valid matches are exact, which memcmp() above
+ * would have accepted
+@@ -10356,9 +10598,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+ * return false to continue verification of this path
+ */
+ return false;
+- if (i % BPF_REG_SIZE)
++ if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
+ continue;
+- if (old->stack[spi].slot_type[0] != STACK_SPILL)
++ if (!is_spilled_reg(&old->stack[spi]))
+ continue;
+ if (!regsafe(env, &old->stack[spi].spilled_ptr,
+ &cur->stack[spi].spilled_ptr, idmap))
+@@ -10549,34 +10791,38 @@ static int propagate_precision(struct bpf_verifier_env *env,
+ {
+ struct bpf_reg_state *state_reg;
+ struct bpf_func_state *state;
+- int i, err = 0;
++ int i, err = 0, fr;
+
+- state = old->frame[old->curframe];
+- state_reg = state->regs;
+- for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
+- if (state_reg->type != SCALAR_VALUE ||
+- !state_reg->precise)
+- continue;
+- if (env->log.level & BPF_LOG_LEVEL2)
+- verbose(env, "propagating r%d\n", i);
+- err = mark_chain_precision(env, i);
+- if (err < 0)
+- return err;
+- }
++ for (fr = old->curframe; fr >= 0; fr--) {
++ state = old->frame[fr];
++ state_reg = state->regs;
++ for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
++ if (state_reg->type != SCALAR_VALUE ||
++ !state_reg->precise ||
++ !(state_reg->live & REG_LIVE_READ))
++ continue;
++ if (env->log.level & BPF_LOG_LEVEL2)
++ verbose(env, "frame %d: propagating r%d\n", fr, i);
++ err = mark_chain_precision_frame(env, fr, i);
++ if (err < 0)
++ return err;
++ }
+
+- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+- if (state->stack[i].slot_type[0] != STACK_SPILL)
+- continue;
+- state_reg = &state->stack[i].spilled_ptr;
+- if (state_reg->type != SCALAR_VALUE ||
+- !state_reg->precise)
+- continue;
+- if (env->log.level & BPF_LOG_LEVEL2)
+- verbose(env, "propagating fp%d\n",
+- (-i - 1) * BPF_REG_SIZE);
+- err = mark_chain_precision_stack(env, i);
+- if (err < 0)
+- return err;
++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
++ if (!is_spilled_reg(&state->stack[i]))
++ continue;
++ state_reg = &state->stack[i].spilled_ptr;
++ if (state_reg->type != SCALAR_VALUE ||
++ !state_reg->precise ||
++ !(state_reg->live & REG_LIVE_READ))
++ continue;
++ if (env->log.level & BPF_LOG_LEVEL2)
++ verbose(env, "frame %d: propagating fp%d\n",
++ fr, (-i - 1) * BPF_REG_SIZE);
++ err = mark_chain_precision_stack_frame(env, fr, i);
++ if (err < 0)
++ return err;
++ }
+ }
+ return 0;
+ }
+@@ -10771,6 +11017,10 @@ next:
+ env->prev_jmps_processed = env->jmps_processed;
+ env->prev_insn_processed = env->insn_processed;
+
++ /* forget precise markings we inherited, see __mark_chain_precision */
++ if (env->bpf_capable)
++ mark_all_scalars_imprecise(env, cur);
++
+ /* add new state to the head of linked list */
+ new = &new_sl->state;
+ err = copy_verifier_state(new, cur);
+@@ -10825,17 +11075,13 @@ next:
+ /* Return true if it's OK to have the same insn return a different type. */
+ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
+ {
+- switch (type) {
++ switch (base_type(type)) {
+ case PTR_TO_CTX:
+ case PTR_TO_SOCKET:
+- case PTR_TO_SOCKET_OR_NULL:
+ case PTR_TO_SOCK_COMMON:
+- case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+- case PTR_TO_TCP_SOCK_OR_NULL:
+ case PTR_TO_XDP_SOCK:
+ case PTR_TO_BTF_ID:
+- case PTR_TO_BTF_ID_OR_NULL:
+ return false;
+ default:
+ return true;
+@@ -11059,7 +11305,7 @@ static int do_check(struct bpf_verifier_env *env)
+ if (is_ctx_reg(env, insn->dst_reg)) {
+ verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
+ insn->dst_reg,
+- reg_type_str[reg_state(env, insn->dst_reg)->type]);
++ reg_type_str(env, reg_state(env, insn->dst_reg)->type));
+ return -EACCES;
+ }
+
+@@ -11128,6 +11374,16 @@ static int do_check(struct bpf_verifier_env *env)
+ return -EINVAL;
+ }
+
++ /* We must do check_reference_leak here before
++ * prepare_func_exit to handle the case when
++ * state->curframe > 0, it may be a callback
++ * function, for which reference_state must
++ * match caller reference state when it exits.
++ */
++ err = check_reference_leak(env);
++ if (err)
++ return err;
++
+ if (state->curframe) {
+ /* exit from nested function */
+ err = prepare_func_exit(env, &env->insn_idx);
+@@ -11137,10 +11393,6 @@ static int do_check(struct bpf_verifier_env *env)
+ continue;
+ }
+
+- err = check_reference_leak(env);
+- if (err)
+- return err;
+-
+ err = check_return_code(env);
+ if (err)
+ return err;
+@@ -11310,7 +11562,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
+ err = -EINVAL;
+ goto err_put;
+ }
+- aux->btf_var.reg_type = PTR_TO_MEM;
++ aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
+ aux->btf_var.mem_size = tsize;
+ } else {
+ aux->btf_var.reg_type = PTR_TO_BTF_ID;
+@@ -11435,6 +11687,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
+ }
+ }
+
++ if (map_value_has_timer(map)) {
++ if (is_tracing_prog_type(prog_type)) {
++ verbose(env, "tracing progs cannot use bpf_timer yet\n");
++ return -EINVAL;
++ }
++ }
++
+ if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
+ !bpf_offload_prog_map_match(prog, map)) {
+ verbose(env, "offload device mismatch between prog and map\n");
+@@ -12122,6 +12381,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
+ if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
+ continue;
+
++ /* Zero-extension is done by the caller. */
++ if (bpf_pseudo_kfunc_call(&insn))
++ continue;
++
+ if (WARN_ON(load_reg == -1)) {
+ verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
+ return -EFAULT;
+@@ -12318,7 +12581,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
+ insn->dst_reg,
+ shift);
+- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
++ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+ (1ULL << size * 8) - 1);
+ }
+ }
+@@ -12350,14 +12613,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+ return 0;
+
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+- if (bpf_pseudo_func(insn)) {
+- env->insn_aux_data[i].call_imm = insn->imm;
+- /* subprog is encoded in insn[1].imm */
++ if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
+ continue;
+- }
+
+- if (!bpf_pseudo_call(insn))
+- continue;
+ /* Upon error here we cannot fall back to interpreter but
+ * need a hard reject of the program. Thus -EFAULT is
+ * propagated in any case.
+@@ -12378,6 +12636,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+ env->insn_aux_data[i].call_imm = insn->imm;
+ /* point imm to __bpf_call_base+1 from JITs point of view */
+ insn->imm = 1;
++ if (bpf_pseudo_func(insn))
++ /* jit (e.g. x86_64) may emit fewer instructions
++ * if it learns a u32 imm is the same as a u64 imm.
++ * Force a non zero here.
++ */
++ insn[1].imm = 1;
+ }
+
+ err = bpf_prog_alloc_jited_linfo(prog);
+@@ -12413,6 +12677,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+ /* Below members will be freed only at prog->aux */
+ func[i]->aux->btf = prog->aux->btf;
+ func[i]->aux->func_info = prog->aux->func_info;
++ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
+ func[i]->aux->poke_tab = prog->aux->poke_tab;
+ func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
+
+@@ -12425,9 +12690,6 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+ poke->aux = func[i]->aux;
+ }
+
+- /* Use bpf_prog_F_tag to indicate functions in stack traces.
+- * Long term would need debug info to populate names
+- */
+ func[i]->aux->name[0] = 'F';
+ func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
+ func[i]->jit_requested = 1;
+@@ -12461,7 +12723,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+ insn = func[i]->insnsi;
+ for (j = 0; j < func[i]->len; j++, insn++) {
+ if (bpf_pseudo_func(insn)) {
+- subprog = insn[1].imm;
++ subprog = insn->off;
+ insn[0].imm = (u32)(long)func[subprog]->bpf_func;
+ insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
+ continue;
+@@ -12499,9 +12761,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+ }
+
+ /* finally lock prog and jit images for all functions and
+- * populate kallsysm
++ * populate kallsysm. Begin at the first subprogram, since
++ * bpf_prog_load will add the kallsyms for the main program.
+ */
+- for (i = 0; i < env->subprog_cnt; i++) {
++ for (i = 1; i < env->subprog_cnt; i++) {
+ bpf_prog_lock_ro(func[i]);
+ bpf_prog_kallsyms_add(func[i]);
+ }
+@@ -12513,7 +12776,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ if (bpf_pseudo_func(insn)) {
+ insn[0].imm = env->insn_aux_data[i].call_imm;
+- insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
++ insn[1].imm = insn->off;
++ insn->off = 0;
+ continue;
+ }
+ if (!bpf_pseudo_call(insn))
+@@ -12525,6 +12789,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
+
+ prog->jited = 1;
+ prog->bpf_func = func[0]->bpf_func;
++ prog->aux->extable = func[0]->aux->extable;
++ prog->aux->num_exentries = func[0]->aux->num_exentries;
+ prog->aux->func = func;
+ prog->aux->func_cnt = env->subprog_cnt;
+ bpf_prog_jit_attempt_done(prog);
+@@ -13114,6 +13380,8 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
+ BPF_MAIN_FUNC /* callsite */,
+ 0 /* frameno */,
+ subprog);
++ state->first_insn_idx = env->subprog_info[subprog].start;
++ state->last_insn_idx = -1;
+
+ regs = state->frame[state->curframe]->regs;
+ if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
+@@ -13125,7 +13393,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
+ mark_reg_known_zero(env, regs, i);
+ else if (regs[i].type == SCALAR_VALUE)
+ mark_reg_unknown(env, regs, i);
+- else if (regs[i].type == PTR_TO_MEM_OR_NULL) {
++ else if (base_type(regs[i].type) == PTR_TO_MEM) {
+ const u32 mem_size = regs[i].mem_size;
+
+ mark_reg_known_zero(env, regs, i);
+@@ -13572,6 +13840,10 @@ BTF_ID(func, migrate_enable)
+ #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
+ BTF_ID(func, rcu_read_unlock_strict)
+ #endif
++#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
++BTF_ID(func, preempt_count_add)
++BTF_ID(func, preempt_count_sub)
++#endif
+ BTF_SET_END(btf_id_deny)
+
+ static int check_attach_btf_id(struct bpf_verifier_env *env)
+@@ -13713,11 +13985,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
+ log->ubuf = (char __user *) (unsigned long) attr->log_buf;
+ log->len_total = attr->log_size;
+
+- ret = -EINVAL;
+ /* log attributes have to be sane */
+- if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
+- !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
++ if (!bpf_verifier_log_attr_valid(log)) {
++ ret = -EINVAL;
+ goto err_unlock;
++ }
+ }
+
+ if (IS_ERR(btf_vmlinux)) {
+diff --git a/kernel/cfi.c b/kernel/cfi.c
+index 9594cfd1cf2cf..08102d19ec15a 100644
+--- a/kernel/cfi.c
++++ b/kernel/cfi.c
+@@ -281,6 +281,8 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr)
+ static inline cfi_check_fn find_check_fn(unsigned long ptr)
+ {
+ cfi_check_fn fn = NULL;
++ unsigned long flags;
++ bool rcu_idle;
+
+ if (is_kernel_text(ptr))
+ return __cfi_check;
+@@ -290,13 +292,21 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)
+ * the shadow and __module_address use RCU, so we need to wake it
+ * up if necessary.
+ */
+- RCU_NONIDLE({
+- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
+- fn = find_shadow_check_fn(ptr);
++ rcu_idle = !rcu_is_watching();
++ if (rcu_idle) {
++ local_irq_save(flags);
++ rcu_irq_enter();
++ }
++
++ if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
++ fn = find_shadow_check_fn(ptr);
++ if (!fn)
++ fn = find_module_check_fn(ptr);
+
+- if (!fn)
+- fn = find_module_check_fn(ptr);
+- });
++ if (rcu_idle) {
++ rcu_irq_exit();
++ local_irq_restore(flags);
++ }
+
+ return fn;
+ }
+diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
+index bfbeabc17a9df..d8fcc139ac05d 100644
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
+ return container_of(kfc, struct cgroup_fs_context, kfc);
+ }
+
++struct cgroup_pidlist;
++
++struct cgroup_file_ctx {
++ struct cgroup_namespace *ns;
++
++ struct {
++ void *trigger;
++ } psi;
++
++ struct {
++ bool started;
++ struct css_task_iter iter;
++ } procs;
++
++ struct {
++ struct cgroup_pidlist *pidlist;
++ } procs1;
++};
++
+ /*
+ * A cgroup can be associated with multiple css_sets as different tasks may
+ * belong to different cgroups on different hierarchies. In the other
+@@ -150,7 +169,6 @@ extern struct mutex cgroup_mutex;
+ extern spinlock_t css_set_lock;
+ extern struct cgroup_subsys *cgroup_subsys[];
+ extern struct list_head cgroup_roots;
+-extern struct file_system_type cgroup_fs_type;
+
+ /* iterate across the hierarchies */
+ #define for_each_root(root) \
+diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
+index 35b9203283447..ee8b3d80f19ee 100644
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -59,6 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+ int retval = 0;
+
+ mutex_lock(&cgroup_mutex);
++ cpus_read_lock();
+ percpu_down_write(&cgroup_threadgroup_rwsem);
+ for_each_root(root) {
+ struct cgroup *from_cgrp;
+@@ -75,6 +76,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+ break;
+ }
+ percpu_up_write(&cgroup_threadgroup_rwsem);
++ cpus_read_unlock();
+ mutex_unlock(&cgroup_mutex);
+
+ return retval;
+@@ -397,6 +399,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
+ * next pid to display, if any
+ */
+ struct kernfs_open_file *of = s->private;
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct cgroup *cgrp = seq_css(s)->cgroup;
+ struct cgroup_pidlist *l;
+ enum cgroup_filetype type = seq_cft(s)->private;
+@@ -406,25 +409,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
+ mutex_lock(&cgrp->pidlist_mutex);
+
+ /*
+- * !NULL @of->priv indicates that this isn't the first start()
+- * after open. If the matching pidlist is around, we can use that.
+- * Look for it. Note that @of->priv can't be used directly. It
+- * could already have been destroyed.
++ * !NULL @ctx->procs1.pidlist indicates that this isn't the first
++ * start() after open. If the matching pidlist is around, we can use
++ * that. Look for it. Note that @ctx->procs1.pidlist can't be used
++ * directly. It could already have been destroyed.
+ */
+- if (of->priv)
+- of->priv = cgroup_pidlist_find(cgrp, type);
++ if (ctx->procs1.pidlist)
++ ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
+
+ /*
+ * Either this is the first start() after open or the matching
+ * pidlist has been destroyed inbetween. Create a new one.
+ */
+- if (!of->priv) {
+- ret = pidlist_array_load(cgrp, type,
+- (struct cgroup_pidlist **)&of->priv);
++ if (!ctx->procs1.pidlist) {
++ ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+- l = of->priv;
++ l = ctx->procs1.pidlist;
+
+ if (pid) {
+ int end = l->length;
+@@ -452,7 +454,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
+ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
+ {
+ struct kernfs_open_file *of = s->private;
+- struct cgroup_pidlist *l = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
++ struct cgroup_pidlist *l = ctx->procs1.pidlist;
+
+ if (l)
+ mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
+@@ -463,7 +466,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
+ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+ struct kernfs_open_file *of = s->private;
+- struct cgroup_pidlist *l = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
++ struct cgroup_pidlist *l = ctx->procs1.pidlist;
+ pid_t *p = v;
+ pid_t *end = l->list + l->length;
+ /*
+@@ -507,10 +511,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
+ goto out_unlock;
+
+ /*
+- * Even if we're attaching all tasks in the thread group, we only
+- * need to check permissions on one of them.
++ * Even if we're attaching all tasks in the thread group, we only need
++ * to check permissions on one of them. Check permissions using the
++ * credentials from file open to protect against inherited fd attacks.
+ */
+- cred = current_cred();
++ cred = of->file->f_cred;
+ tcred = get_task_cred(task);
+ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+ !uid_eq(cred->euid, tcred->uid) &&
+@@ -546,9 +551,19 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+ {
+ struct cgroup *cgrp;
++ struct cgroup_file_ctx *ctx;
+
+ BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+
++ /*
++ * Release agent gets called with all capabilities,
++ * require capabilities to set release agent.
++ */
++ ctx = of->priv;
++ if ((ctx->ns->user_ns != &init_user_ns) ||
++ !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN))
++ return -EPERM;
++
+ cgrp = cgroup_kn_lock_live(of->kn, false);
+ if (!cgrp)
+ return -ENODEV;
+@@ -960,6 +975,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
+ /* Specifying two release agents is forbidden */
+ if (ctx->release_agent)
+ return invalfc(fc, "release_agent respecified");
++ /*
++ * Release agent gets called with all capabilities,
++ * require capabilities to set release agent.
++ */
++ if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
++ return invalfc(fc, "Setting release_agent not allowed");
+ ctx->release_agent = param->string;
+ param->string = NULL;
+ break;
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index ea08f01d0111a..be467aea457e7 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -56,6 +56,7 @@
+ #include <linux/file.h>
+ #include <linux/fs_parser.h>
+ #include <linux/sched/cputime.h>
++#include <linux/sched/deadline.h>
+ #include <linux/psi.h>
+ #include <net/sock.h>
+
+@@ -764,7 +765,8 @@ struct css_set init_css_set = {
+ .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
+ .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
+ .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
+- .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
++ .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node),
++ .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node),
+ .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
+
+ /*
+@@ -1239,7 +1241,8 @@ static struct css_set *find_css_set(struct css_set *old_cset,
+ INIT_LIST_HEAD(&cset->threaded_csets);
+ INIT_HLIST_NODE(&cset->hlist);
+ INIT_LIST_HEAD(&cset->cgrp_links);
+- INIT_LIST_HEAD(&cset->mg_preload_node);
++ INIT_LIST_HEAD(&cset->mg_src_preload_node);
++ INIT_LIST_HEAD(&cset->mg_dst_preload_node);
+ INIT_LIST_HEAD(&cset->mg_node);
+
+ /* Copy the set of subsystem state objects generated in
+@@ -1739,7 +1742,8 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
+ {
+ struct cgroup *dcgrp = &dst_root->cgrp;
+ struct cgroup_subsys *ss;
+- int ssid, i, ret;
++ int ssid, ret;
++ u16 dfl_disable_ss_mask = 0;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+@@ -1756,20 +1760,43 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
+ /* can't move between two non-dummy roots either */
+ if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
+ return -EBUSY;
++
++ /*
++ * Collect ssid's that need to be disabled from default
++ * hierarchy.
++ */
++ if (ss->root == &cgrp_dfl_root)
++ dfl_disable_ss_mask |= 1 << ssid;
++
+ } while_each_subsys_mask();
+
++ if (dfl_disable_ss_mask) {
++ struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
++
++ /*
++ * Controllers from default hierarchy that need to be rebound
++ * are all disabled together in one go.
++ */
++ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
++ WARN_ON(cgroup_apply_control(scgrp));
++ cgroup_finalize_control(scgrp, 0);
++ }
++
+ do_each_subsys_mask(ss, ssid, ss_mask) {
+ struct cgroup_root *src_root = ss->root;
+ struct cgroup *scgrp = &src_root->cgrp;
+ struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
+- struct css_set *cset;
++ struct css_set *cset, *cset_pos;
++ struct css_task_iter *it;
+
+ WARN_ON(!css || cgroup_css(dcgrp, ss));
+
+- /* disable from the source */
+- src_root->subsys_mask &= ~(1 << ssid);
+- WARN_ON(cgroup_apply_control(scgrp));
+- cgroup_finalize_control(scgrp, 0);
++ if (src_root != &cgrp_dfl_root) {
++ /* disable from the source */
++ src_root->subsys_mask &= ~(1 << ssid);
++ WARN_ON(cgroup_apply_control(scgrp));
++ cgroup_finalize_control(scgrp, 0);
++ }
+
+ /* rebind */
+ RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
+@@ -1778,13 +1805,27 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
+ css->cgroup = dcgrp;
+
+ spin_lock_irq(&css_set_lock);
+- hash_for_each(css_set_table, i, cset, hlist)
++ WARN_ON(!list_empty(&dcgrp->e_csets[ss->id]));
++ list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id],
++ e_cset_node[ss->id]) {
+ list_move_tail(&cset->e_cset_node[ss->id],
+ &dcgrp->e_csets[ss->id]);
++ /*
++ * all css_sets of scgrp together in same order to dcgrp,
++ * patch in-flight iterators to preserve correct iteration.
++ * since the iterator is always advanced right away and
++ * finished when it->cset_pos meets it->cset_head, so only
++ * update it->cset_head is enough here.
++ */
++ list_for_each_entry(it, &cset->task_iters, iters_node)
++ if (it->cset_head == &scgrp->e_csets[ss->id])
++ it->cset_head = &dcgrp->e_csets[ss->id];
++ }
+ spin_unlock_irq(&css_set_lock);
+
+ if (ss->css_rstat_flush) {
+ list_del_rcu(&css->rstat_css_node);
++ synchronize_rcu();
+ list_add_rcu(&css->rstat_css_node,
+ &dcgrp->rstat_css_list);
+ }
+@@ -2319,6 +2360,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+ }
+ EXPORT_SYMBOL_GPL(task_cgroup_path);
+
++/**
++ * cgroup_attach_lock - Lock for ->attach()
++ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
++ *
++ * cgroup migration sometimes needs to stabilize threadgroups against forks and
++ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach()
++ * implementations (e.g. cpuset), also need to disable CPU hotplug.
++ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can
++ * lead to deadlocks.
++ *
++ * Bringing up a CPU may involve creating and destroying tasks which requires
++ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
++ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while
++ * write-locking threadgroup_rwsem, the locking order is reversed and we end up
++ * waiting for an on-going CPU hotplug operation which in turn is waiting for
++ * the threadgroup_rwsem to be released to create new tasks. For more details:
++ *
++ * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu
++ *
++ * Resolve the situation by always acquiring cpus_read_lock() before optionally
++ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
++ * CPU hotplug is disabled on entry.
++ */
++static void cgroup_attach_lock(bool lock_threadgroup)
++{
++ cpus_read_lock();
++ if (lock_threadgroup)
++ percpu_down_write(&cgroup_threadgroup_rwsem);
++}
++
++/**
++ * cgroup_attach_unlock - Undo cgroup_attach_lock()
++ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
++ */
++static void cgroup_attach_unlock(bool lock_threadgroup)
++{
++ if (lock_threadgroup)
++ percpu_up_write(&cgroup_threadgroup_rwsem);
++ cpus_read_unlock();
++}
++
+ /**
+ * cgroup_migrate_add_task - add a migration target task to a migration context
+ * @task: target task
+@@ -2573,21 +2655,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
+ */
+ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
+ {
+- LIST_HEAD(preloaded);
+ struct css_set *cset, *tmp_cset;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+ spin_lock_irq(&css_set_lock);
+
+- list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
+- list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
++ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets,
++ mg_src_preload_node) {
++ cset->mg_src_cgrp = NULL;
++ cset->mg_dst_cgrp = NULL;
++ cset->mg_dst_cset = NULL;
++ list_del_init(&cset->mg_src_preload_node);
++ put_css_set_locked(cset);
++ }
+
+- list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
++ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets,
++ mg_dst_preload_node) {
+ cset->mg_src_cgrp = NULL;
+ cset->mg_dst_cgrp = NULL;
+ cset->mg_dst_cset = NULL;
+- list_del_init(&cset->mg_preload_node);
++ list_del_init(&cset->mg_dst_preload_node);
+ put_css_set_locked(cset);
+ }
+
+@@ -2629,7 +2717,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
+
+ src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
+
+- if (!list_empty(&src_cset->mg_preload_node))
++ if (!list_empty(&src_cset->mg_src_preload_node))
+ return;
+
+ WARN_ON(src_cset->mg_src_cgrp);
+@@ -2640,7 +2728,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
+ src_cset->mg_src_cgrp = src_cgrp;
+ src_cset->mg_dst_cgrp = dst_cgrp;
+ get_css_set(src_cset);
+- list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
++ list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets);
+ }
+
+ /**
+@@ -2665,7 +2753,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
+
+ /* look up the dst cset for each src cset and link it to src */
+ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
+- mg_preload_node) {
++ mg_src_preload_node) {
+ struct css_set *dst_cset;
+ struct cgroup_subsys *ss;
+ int ssid;
+@@ -2684,7 +2772,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
+ if (src_cset == dst_cset) {
+ src_cset->mg_src_cgrp = NULL;
+ src_cset->mg_dst_cgrp = NULL;
+- list_del_init(&src_cset->mg_preload_node);
++ list_del_init(&src_cset->mg_src_preload_node);
+ put_css_set(src_cset);
+ put_css_set(dst_cset);
+ continue;
+@@ -2692,8 +2780,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
+
+ src_cset->mg_dst_cset = dst_cset;
+
+- if (list_empty(&dst_cset->mg_preload_node))
+- list_add_tail(&dst_cset->mg_preload_node,
++ if (list_empty(&dst_cset->mg_dst_preload_node))
++ list_add_tail(&dst_cset->mg_dst_preload_node,
+ &mgctx->preloaded_dst_csets);
+ else
+ put_css_set(dst_cset);
+@@ -2789,8 +2877,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+ }
+
+ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+- bool *locked)
+- __acquires(&cgroup_threadgroup_rwsem)
++ bool *threadgroup_locked)
+ {
+ struct task_struct *tsk;
+ pid_t pid;
+@@ -2807,12 +2894,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+ * Therefore, we can skip the global lock.
+ */
+ lockdep_assert_held(&cgroup_mutex);
+- if (pid || threadgroup) {
+- percpu_down_write(&cgroup_threadgroup_rwsem);
+- *locked = true;
+- } else {
+- *locked = false;
+- }
++ *threadgroup_locked = pid || threadgroup;
++ cgroup_attach_lock(*threadgroup_locked);
+
+ rcu_read_lock();
+ if (pid) {
+@@ -2843,17 +2926,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+ goto out_unlock_rcu;
+
+ out_unlock_threadgroup:
+- if (*locked) {
+- percpu_up_write(&cgroup_threadgroup_rwsem);
+- *locked = false;
+- }
++ cgroup_attach_unlock(*threadgroup_locked);
++ *threadgroup_locked = false;
+ out_unlock_rcu:
+ rcu_read_unlock();
+ return tsk;
+ }
+
+-void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+- __releases(&cgroup_threadgroup_rwsem)
++void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked)
+ {
+ struct cgroup_subsys *ss;
+ int ssid;
+@@ -2861,8 +2941,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+ /* release reference from cgroup_procs_write_start() */
+ put_task_struct(task);
+
+- if (locked)
+- percpu_up_write(&cgroup_threadgroup_rwsem);
++ cgroup_attach_unlock(threadgroup_locked);
++
+ for_each_subsys(ss, ssid)
+ if (ss->post_attach)
+ ss->post_attach();
+@@ -2917,12 +2997,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ struct cgroup_subsys_state *d_css;
+ struct cgroup *dsct;
+ struct css_set *src_cset;
++ bool has_tasks;
+ int ret;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+- percpu_down_write(&cgroup_threadgroup_rwsem);
+-
+ /* look up all csses currently attached to @cgrp's subtree */
+ spin_lock_irq(&css_set_lock);
+ cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+@@ -2933,13 +3012,23 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ }
+ spin_unlock_irq(&css_set_lock);
+
++ /*
++ * We need to write-lock threadgroup_rwsem while migrating tasks.
++ * However, if there are no source csets for @cgrp, changing its
++ * controllers isn't gonna produce any task migrations and the
++ * write-locking can be skipped safely.
++ */
++ has_tasks = !list_empty(&mgctx.preloaded_src_csets);
++ cgroup_attach_lock(has_tasks);
++
+ /* NULL dst indicates self on default hierarchy */
+ ret = cgroup_migrate_prepare_dst(&mgctx);
+ if (ret)
+ goto out_finish;
+
+ spin_lock_irq(&css_set_lock);
+- list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
++ list_for_each_entry(src_cset, &mgctx.preloaded_src_csets,
++ mg_src_preload_node) {
+ struct task_struct *task, *ntask;
+
+ /* all tasks in src_csets need to be migrated */
+@@ -2951,7 +3040,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ ret = cgroup_migrate_execute(&mgctx);
+ out_finish:
+ cgroup_migrate_finish(&mgctx);
+- percpu_up_write(&cgroup_threadgroup_rwsem);
++ cgroup_attach_unlock(has_tasks);
+ return ret;
+ }
+
+@@ -3607,6 +3696,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
+ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, enum psi_res res)
+ {
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct psi_trigger *new;
+ struct cgroup *cgrp;
+ struct psi_group *psi;
+@@ -3618,6 +3708,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
+ cgroup_get(cgrp);
+ cgroup_kn_unlock(of->kn);
+
++ /* Allow only one trigger per file descriptor */
++ if (ctx->psi.trigger) {
++ cgroup_put(cgrp);
++ return -EBUSY;
++ }
++
+ psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+ new = psi_trigger_create(psi, buf, nbytes, res);
+ if (IS_ERR(new)) {
+@@ -3625,8 +3721,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
+ return PTR_ERR(new);
+ }
+
+- psi_trigger_replace(&of->priv, new);
+-
++ smp_store_release(&ctx->psi.trigger, new);
+ cgroup_put(cgrp);
+
+ return nbytes;
+@@ -3656,12 +3751,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
+ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
+ poll_table *pt)
+ {
+- return psi_trigger_poll(&of->priv, of->file, pt);
++ struct cgroup_file_ctx *ctx = of->priv;
++
++ return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
+ }
+
+ static void cgroup_pressure_release(struct kernfs_open_file *of)
+ {
+- psi_trigger_replace(&of->priv, NULL);
++ struct cgroup_file_ctx *ctx = of->priv;
++
++ psi_trigger_destroy(ctx->psi.trigger);
+ }
+
+ bool cgroup_psi_enabled(void)
+@@ -3788,24 +3887,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
+ static int cgroup_file_open(struct kernfs_open_file *of)
+ {
+ struct cftype *cft = of_cft(of);
++ struct cgroup_file_ctx *ctx;
++ int ret;
+
+- if (cft->open)
+- return cft->open(of);
+- return 0;
++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
++ if (!ctx)
++ return -ENOMEM;
++
++ ctx->ns = current->nsproxy->cgroup_ns;
++ get_cgroup_ns(ctx->ns);
++ of->priv = ctx;
++
++ if (!cft->open)
++ return 0;
++
++ ret = cft->open(of);
++ if (ret) {
++ put_cgroup_ns(ctx->ns);
++ kfree(ctx);
++ }
++ return ret;
+ }
+
+ static void cgroup_file_release(struct kernfs_open_file *of)
+ {
+ struct cftype *cft = of_cft(of);
++ struct cgroup_file_ctx *ctx = of->priv;
+
+ if (cft->release)
+ cft->release(of);
++ put_cgroup_ns(ctx->ns);
++ kfree(ctx);
+ }
+
+ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+ {
+- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct cgroup *cgrp = of->kn->parent->priv;
+ struct cftype *cft = of_cft(of);
+ struct cgroup_subsys_state *css;
+@@ -3822,7 +3940,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
+ */
+ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
+ !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
+- ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
++ ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
+ return -EPERM;
+
+ if (cft->write)
+@@ -4728,21 +4846,21 @@ void css_task_iter_end(struct css_task_iter *it)
+
+ static void cgroup_procs_release(struct kernfs_open_file *of)
+ {
+- if (of->priv) {
+- css_task_iter_end(of->priv);
+- kfree(of->priv);
+- }
++ struct cgroup_file_ctx *ctx = of->priv;
++
++ if (ctx->procs.started)
++ css_task_iter_end(&ctx->procs.iter);
+ }
+
+ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+ struct kernfs_open_file *of = s->private;
+- struct css_task_iter *it = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
+
+ if (pos)
+ (*pos)++;
+
+- return css_task_iter_next(it);
++ return css_task_iter_next(&ctx->procs.iter);
+ }
+
+ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
+@@ -4750,21 +4868,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
+ {
+ struct kernfs_open_file *of = s->private;
+ struct cgroup *cgrp = seq_css(s)->cgroup;
+- struct css_task_iter *it = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
++ struct css_task_iter *it = &ctx->procs.iter;
+
+ /*
+ * When a seq_file is seeked, it's always traversed sequentially
+ * from position 0, so we can simply keep iterating on !0 *pos.
+ */
+- if (!it) {
++ if (!ctx->procs.started) {
+ if (WARN_ON_ONCE((*pos)))
+ return ERR_PTR(-EINVAL);
+-
+- it = kzalloc(sizeof(*it), GFP_KERNEL);
+- if (!it)
+- return ERR_PTR(-ENOMEM);
+- of->priv = it;
+ css_task_iter_start(&cgrp->self, iter_flags, it);
++ ctx->procs.started = true;
+ } else if (!(*pos)) {
+ css_task_iter_end(it);
+ css_task_iter_start(&cgrp->self, iter_flags, it);
+@@ -4815,9 +4930,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)
+
+ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
+ struct cgroup *dst_cgrp,
+- struct super_block *sb)
++ struct super_block *sb,
++ struct cgroup_namespace *ns)
+ {
+- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
+ struct cgroup *com_cgrp = src_cgrp;
+ int ret;
+
+@@ -4846,11 +4961,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
+
+ static int cgroup_attach_permissions(struct cgroup *src_cgrp,
+ struct cgroup *dst_cgrp,
+- struct super_block *sb, bool threadgroup)
++ struct super_block *sb, bool threadgroup,
++ struct cgroup_namespace *ns)
+ {
+ int ret = 0;
+
+- ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb);
++ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns);
+ if (ret)
+ return ret;
+
+@@ -4867,16 +4983,18 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp,
+ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+ bool threadgroup)
+ {
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct cgroup *src_cgrp, *dst_cgrp;
+ struct task_struct *task;
++ const struct cred *saved_cred;
+ ssize_t ret;
+- bool locked;
++ bool threadgroup_locked;
+
+ dst_cgrp = cgroup_kn_lock_live(of->kn, false);
+ if (!dst_cgrp)
+ return -ENODEV;
+
+- task = cgroup_procs_write_start(buf, threadgroup, &locked);
++ task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked);
+ ret = PTR_ERR_OR_ZERO(task);
+ if (ret)
+ goto out_unlock;
+@@ -4886,16 +5004,23 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+ src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+ spin_unlock_irq(&css_set_lock);
+
+- /* process and thread migrations follow same delegation rule */
++ /*
++ * Process and thread migrations follow same delegation rule. Check
++ * permissions using the credentials from file open to protect against
++ * inherited fd attacks.
++ */
++ saved_cred = override_creds(of->file->f_cred);
+ ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
+- of->file->f_path.dentry->d_sb, threadgroup);
++ of->file->f_path.dentry->d_sb,
++ threadgroup, ctx->ns);
++ revert_creds(saved_cred);
+ if (ret)
+ goto out_finish;
+
+ ret = cgroup_attach_task(dst_cgrp, task, threadgroup);
+
+ out_finish:
+- cgroup_procs_write_finish(task, locked);
++ cgroup_procs_write_finish(task, threadgroup_locked);
+ out_unlock:
+ cgroup_kn_unlock(of->kn);
+
+@@ -5911,17 +6036,23 @@ struct cgroup *cgroup_get_from_id(u64 id)
+ struct kernfs_node *kn;
+ struct cgroup *cgrp = NULL;
+
+- mutex_lock(&cgroup_mutex);
+ kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
+ if (!kn)
+- goto out_unlock;
++ goto out;
++
++ if (kernfs_type(kn) != KERNFS_DIR)
++ goto put;
++
++ rcu_read_lock();
+
+- cgrp = kn->priv;
+- if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp))
++ cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
++ if (cgrp && !cgroup_tryget(cgrp))
+ cgrp = NULL;
++
++ rcu_read_unlock();
++put:
+ kernfs_put(kn);
+-out_unlock:
+- mutex_unlock(&cgroup_mutex);
++out:
+ return cgrp;
+ }
+ EXPORT_SYMBOL_GPL(cgroup_get_from_id);
+@@ -6104,7 +6235,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
+ goto err;
+
+ ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
+- !(kargs->flags & CLONE_THREAD));
++ !(kargs->flags & CLONE_THREAD),
++ current->nsproxy->cgroup_ns);
+ if (ret)
+ goto err;
+
+@@ -6142,19 +6274,18 @@ err:
+ static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
+ __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
+ {
++ struct cgroup *cgrp = kargs->cgrp;
++ struct css_set *cset = kargs->cset;
++
+ cgroup_threadgroup_change_end(current);
+
+- if (kargs->flags & CLONE_INTO_CGROUP) {
+- struct cgroup *cgrp = kargs->cgrp;
+- struct css_set *cset = kargs->cset;
++ if (cset) {
++ put_css_set(cset);
++ kargs->cset = NULL;
++ }
+
++ if (kargs->flags & CLONE_INTO_CGROUP) {
+ mutex_unlock(&cgroup_mutex);
+-
+- if (cset) {
+- put_css_set(cset);
+- kargs->cset = NULL;
+- }
+-
+ if (cgrp) {
+ cgroup_put(cgrp);
+ kargs->cgrp = NULL;
+@@ -6337,6 +6468,9 @@ void cgroup_exit(struct task_struct *tsk)
+ list_add_tail(&tsk->cg_list, &cset->dying_tasks);
+ cset->nr_tasks--;
+
++ if (dl_task(tsk))
++ dec_dl_tasks_cs(tsk);
++
+ WARN_ON_ONCE(cgroup_task_frozen(tsk));
+ if (unlikely(!(tsk->flags & PF_KTHREAD) &&
+ test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
+@@ -6474,30 +6608,38 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
+ *
+ * Find the cgroup at @path on the default hierarchy, increment its
+ * reference count and return it. Returns pointer to the found cgroup on
+- * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR)
+- * if @path points to a non-directory.
++ * success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already
++ * been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory.
+ */
+ struct cgroup *cgroup_get_from_path(const char *path)
+ {
+ struct kernfs_node *kn;
+- struct cgroup *cgrp;
++ struct cgroup *cgrp = ERR_PTR(-ENOENT);
++ struct cgroup *root_cgrp;
+
+- mutex_lock(&cgroup_mutex);
++ spin_lock_irq(&css_set_lock);
++ root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
++ kn = kernfs_walk_and_get(root_cgrp->kn, path);
++ spin_unlock_irq(&css_set_lock);
++ if (!kn)
++ goto out;
+
+- kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
+- if (kn) {
+- if (kernfs_type(kn) == KERNFS_DIR) {
+- cgrp = kn->priv;
+- cgroup_get_live(cgrp);
+- } else {
+- cgrp = ERR_PTR(-ENOTDIR);
+- }
+- kernfs_put(kn);
+- } else {
+- cgrp = ERR_PTR(-ENOENT);
++ if (kernfs_type(kn) != KERNFS_DIR) {
++ cgrp = ERR_PTR(-ENOTDIR);
++ goto out_kernfs;
+ }
+
+- mutex_unlock(&cgroup_mutex);
++ rcu_read_lock();
++
++ cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
++ if (!cgrp || !cgroup_tryget(cgrp))
++ cgrp = ERR_PTR(-ENOENT);
++
++ rcu_read_unlock();
++
++out_kernfs:
++ kernfs_put(kn);
++out:
+ return cgrp;
+ }
+ EXPORT_SYMBOL_GPL(cgroup_get_from_path);
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 2a9695ccb65f5..6905079c15c25 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -33,6 +33,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/kernel.h>
+ #include <linux/kmod.h>
++#include <linux/kthread.h>
+ #include <linux/list.h>
+ #include <linux/mempolicy.h>
+ #include <linux/mm.h>
+@@ -161,6 +162,14 @@ struct cpuset {
+ int use_parent_ecpus;
+ int child_ecpus_count;
+
++ /*
++ * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
++ * know when to rebuild associated root domain bandwidth information.
++ */
++ int nr_deadline_tasks;
++ int nr_migrate_dl_tasks;
++ u64 sum_migrate_dl_bw;
++
+ /* Handle for cpuset.cpus.partition */
+ struct cgroup_file partition_file;
+ };
+@@ -208,6 +217,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
+ return css_cs(cs->css.parent);
+ }
+
++void inc_dl_tasks_cs(struct task_struct *p)
++{
++ struct cpuset *cs = task_cs(p);
++
++ cs->nr_deadline_tasks++;
++}
++
++void dec_dl_tasks_cs(struct task_struct *p)
++{
++ struct cpuset *cs = task_cs(p);
++
++ cs->nr_deadline_tasks--;
++}
++
+ /* bits in struct cpuset flags field */
+ typedef enum {
+ CS_ONLINE,
+@@ -311,22 +334,23 @@ static struct cpuset top_cpuset = {
+ if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
+
+ /*
+- * There are two global locks guarding cpuset structures - cpuset_rwsem and
++ * There are two global locks guarding cpuset structures - cpuset_mutex and
+ * callback_lock. We also require taking task_lock() when dereferencing a
+ * task's cpuset pointer. See "The task_lock() exception", at the end of this
+- * comment. The cpuset code uses only cpuset_rwsem write lock. Other
+- * kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to
+- * prevent change to cpuset structures.
++ * comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems
++ * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
++ * structures. Note that cpuset_mutex needs to be a mutex as it is used in
++ * paths that rely on priority inheritance (e.g. scheduler - on RT) for
++ * correctness.
+ *
+ * A task must hold both locks to modify cpusets. If a task holds
+- * cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it
+- * is the only task able to also acquire callback_lock and be able to
+- * modify cpusets. It can perform various checks on the cpuset structure
+- * first, knowing nothing will change. It can also allocate memory while
+- * just holding cpuset_rwsem. While it is performing these checks, various
+- * callback routines can briefly acquire callback_lock to query cpusets.
+- * Once it is ready to make the changes, it takes callback_lock, blocking
+- * everyone else.
++ * cpuset_mutex, it blocks others, ensuring that it is the only task able to
++ * also acquire callback_lock and be able to modify cpusets. It can perform
++ * various checks on the cpuset structure first, knowing nothing will change.
++ * It can also allocate memory while just holding cpuset_mutex. While it is
++ * performing these checks, various callback routines can briefly acquire
++ * callback_lock to query cpusets. Once it is ready to make the changes, it
++ * takes callback_lock, blocking everyone else.
+ *
+ * Calls to the kernel memory allocator can not be made while holding
+ * callback_lock, as that would risk double tripping on callback_lock
+@@ -348,16 +372,16 @@ static struct cpuset top_cpuset = {
+ * guidelines for accessing subsystem state in kernel/cgroup.c
+ */
+
+-DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem);
++static DEFINE_MUTEX(cpuset_mutex);
+
+-void cpuset_read_lock(void)
++void cpuset_lock(void)
+ {
+- percpu_down_read(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ }
+
+-void cpuset_read_unlock(void)
++void cpuset_unlock(void)
+ {
+- percpu_up_read(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ }
+
+ static DEFINE_SPINLOCK(callback_lock);
+@@ -395,7 +419,7 @@ static inline bool is_in_v2_mode(void)
+ * One way or another, we guarantee to return some non-empty subset
+ * of cpu_online_mask.
+ *
+- * Call with callback_lock or cpuset_rwsem held.
++ * Call with callback_lock or cpuset_mutex held.
+ */
+ static void guarantee_online_cpus(struct task_struct *tsk,
+ struct cpumask *pmask)
+@@ -437,7 +461,7 @@ out_unlock:
+ * One way or another, we guarantee to return some non-empty subset
+ * of node_states[N_MEMORY].
+ *
+- * Call with callback_lock or cpuset_rwsem held.
++ * Call with callback_lock or cpuset_mutex held.
+ */
+ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
+ {
+@@ -449,7 +473,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
+ /*
+ * update task's spread flag if cpuset's page/slab spread flag is set
+ *
+- * Call with callback_lock or cpuset_rwsem held.
++ * Call with callback_lock or cpuset_mutex held.
+ */
+ static void cpuset_update_task_spread_flag(struct cpuset *cs,
+ struct task_struct *tsk)
+@@ -470,7 +494,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
+ *
+ * One cpuset is a subset of another if all its allowed CPUs and
+ * Memory Nodes are a subset of the other, and its exclusive flags
+- * are only set if the other's are set. Call holding cpuset_rwsem.
++ * are only set if the other's are set. Call holding cpuset_mutex.
+ */
+
+ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
+@@ -579,7 +603,7 @@ static inline void free_cpuset(struct cpuset *cs)
+ * If we replaced the flag and mask values of the current cpuset
+ * (cur) with those values in the trial cpuset (trial), would
+ * our various subset and exclusive rules still be valid? Presumes
+- * cpuset_rwsem held.
++ * cpuset_mutex held.
+ *
+ * 'cur' is the address of an actual, in-use cpuset. Operations
+ * such as list traversal that depend on the actual address of the
+@@ -702,7 +726,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
+ rcu_read_unlock();
+ }
+
+-/* Must be called with cpuset_rwsem held. */
++/* Must be called with cpuset_mutex held. */
+ static inline int nr_cpusets(void)
+ {
+ /* jump label reference count + the top-level cpuset */
+@@ -728,7 +752,7 @@ static inline int nr_cpusets(void)
+ * domains when operating in the severe memory shortage situations
+ * that could cause allocation failures below.
+ *
+- * Must be called with cpuset_rwsem held.
++ * Must be called with cpuset_mutex held.
+ *
+ * The three key local variables below are:
+ * cp - cpuset pointer, used (together with pos_css) to perform a
+@@ -939,11 +963,14 @@ done:
+ return ndoms;
+ }
+
+-static void update_tasks_root_domain(struct cpuset *cs)
++static void dl_update_tasks_root_domain(struct cpuset *cs)
+ {
+ struct css_task_iter it;
+ struct task_struct *task;
+
++ if (cs->nr_deadline_tasks == 0)
++ return;
++
+ css_task_iter_start(&cs->css, 0, &it);
+
+ while ((task = css_task_iter_next(&it)))
+@@ -952,12 +979,12 @@ static void update_tasks_root_domain(struct cpuset *cs)
+ css_task_iter_end(&it);
+ }
+
+-static void rebuild_root_domains(void)
++static void dl_rebuild_rd_accounting(void)
+ {
+ struct cpuset *cs = NULL;
+ struct cgroup_subsys_state *pos_css;
+
+- percpu_rwsem_assert_held(&cpuset_rwsem);
++ lockdep_assert_held(&cpuset_mutex);
+ lockdep_assert_cpus_held();
+ lockdep_assert_held(&sched_domains_mutex);
+
+@@ -980,7 +1007,7 @@ static void rebuild_root_domains(void)
+
+ rcu_read_unlock();
+
+- update_tasks_root_domain(cs);
++ dl_update_tasks_root_domain(cs);
+
+ rcu_read_lock();
+ css_put(&cs->css);
+@@ -994,7 +1021,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+ {
+ mutex_lock(&sched_domains_mutex);
+ partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
+- rebuild_root_domains();
++ dl_rebuild_rd_accounting();
+ mutex_unlock(&sched_domains_mutex);
+ }
+
+@@ -1007,7 +1034,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+ * 'cpus' is removed, then call this routine to rebuild the
+ * scheduler's dynamic sched domains.
+ *
+- * Call with cpuset_rwsem held. Takes cpus_read_lock().
++ * Call with cpuset_mutex held. Takes cpus_read_lock().
+ */
+ static void rebuild_sched_domains_locked(void)
+ {
+@@ -1018,7 +1045,7 @@ static void rebuild_sched_domains_locked(void)
+ int ndoms;
+
+ lockdep_assert_cpus_held();
+- percpu_rwsem_assert_held(&cpuset_rwsem);
++ lockdep_assert_held(&cpuset_mutex);
+
+ /*
+ * If we have raced with CPU hotplug, return early to avoid
+@@ -1069,9 +1096,9 @@ static void rebuild_sched_domains_locked(void)
+ void rebuild_sched_domains(void)
+ {
+ cpus_read_lock();
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ rebuild_sched_domains_locked();
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ }
+
+@@ -1080,17 +1107,25 @@ void rebuild_sched_domains(void)
+ * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+ *
+ * Iterate through each task of @cs updating its cpus_allowed to the
+- * effective cpuset's. As this function is called with cpuset_rwsem held,
++ * effective cpuset's. As this function is called with cpuset_mutex held,
+ * cpuset membership stays stable.
+ */
+ static void update_tasks_cpumask(struct cpuset *cs)
+ {
+ struct css_task_iter it;
+ struct task_struct *task;
++ bool top_cs = cs == &top_cpuset;
+
+ css_task_iter_start(&cs->css, 0, &it);
+- while ((task = css_task_iter_next(&it)))
++ while ((task = css_task_iter_next(&it))) {
++ /*
++ * Percpu kthreads in top_cpuset are ignored
++ */
++ if (top_cs && (task->flags & PF_KTHREAD) &&
++ kthread_is_per_cpu(task))
++ continue;
+ set_cpus_allowed_ptr(task, cs->effective_cpus);
++ }
+ css_task_iter_end(&it);
+ }
+
+@@ -1179,7 +1214,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
+ int old_prs, new_prs;
+ bool part_error = false; /* Partition error? */
+
+- percpu_rwsem_assert_held(&cpuset_rwsem);
++ lockdep_assert_held(&cpuset_mutex);
+
+ /*
+ * The parent must be a partition root.
+@@ -1349,7 +1384,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
+ *
+ * On legacy hierarchy, effective_cpus will be the same with cpu_allowed.
+ *
+- * Called with cpuset_rwsem held
++ * Called with cpuset_mutex held
+ */
+ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
+ {
+@@ -1512,10 +1547,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
+ struct cpuset *sibling;
+ struct cgroup_subsys_state *pos_css;
+
++ lockdep_assert_held(&cpuset_mutex);
++
+ /*
+ * Check all its siblings and call update_cpumasks_hier()
+ * if their use_parent_ecpus flag is set in order for them
+ * to use the right effective_cpus value.
++ *
++ * The update_cpumasks_hier() function may sleep. So we have to
++ * release the RCU read lock before calling it.
+ */
+ rcu_read_lock();
+ cpuset_for_each_child(sibling, pos_css, parent) {
+@@ -1523,8 +1563,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
+ continue;
+ if (!sibling->use_parent_ecpus)
+ continue;
++ if (!css_tryget_online(&sibling->css))
++ continue;
+
++ rcu_read_unlock();
+ update_cpumasks_hier(sibling, tmp);
++ rcu_read_lock();
++ css_put(&sibling->css);
+ }
+ rcu_read_unlock();
+ }
+@@ -1597,8 +1642,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+ * Make sure that subparts_cpus is a subset of cpus_allowed.
+ */
+ if (cs->nr_subparts_cpus) {
+- cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
+- cs->cpus_allowed);
++ cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
+ cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
+ }
+ spin_unlock_irq(&callback_lock);
+@@ -1706,12 +1750,12 @@ static void *cpuset_being_rebound;
+ * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
+ *
+ * Iterate through each task of @cs updating its mems_allowed to the
+- * effective cpuset's. As this function is called with cpuset_rwsem held,
++ * effective cpuset's. As this function is called with cpuset_mutex held,
+ * cpuset membership stays stable.
+ */
+ static void update_tasks_nodemask(struct cpuset *cs)
+ {
+- static nodemask_t newmems; /* protected by cpuset_rwsem */
++ static nodemask_t newmems; /* protected by cpuset_mutex */
+ struct css_task_iter it;
+ struct task_struct *task;
+
+@@ -1724,7 +1768,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
+ * take while holding tasklist_lock. Forks can happen - the
+ * mpol_dup() cpuset_being_rebound check will catch such forks,
+ * and rebind their vma mempolicies too. Because we still hold
+- * the global cpuset_rwsem, we know that no other rebind effort
++ * the global cpuset_mutex, we know that no other rebind effort
+ * will be contending for the global variable cpuset_being_rebound.
+ * It's ok if we rebind the same mm twice; mpol_rebind_mm()
+ * is idempotent. Also migrate pages in each mm to new nodes.
+@@ -1770,7 +1814,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
+ *
+ * On legacy hierarchy, effective_mems will be the same with mems_allowed.
+ *
+- * Called with cpuset_rwsem held
++ * Called with cpuset_mutex held
+ */
+ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
+ {
+@@ -1823,7 +1867,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
+ * mempolicies and if the cpuset is marked 'memory_migrate',
+ * migrate the tasks pages to the new memory.
+ *
+- * Call with cpuset_rwsem held. May take callback_lock during call.
++ * Call with cpuset_mutex held. May take callback_lock during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_lock, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
+@@ -1913,7 +1957,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
+ * @cs: the cpuset in which each task's spread flags needs to be changed
+ *
+ * Iterate through each task of @cs updating its spread flags. As this
+- * function is called with cpuset_rwsem held, cpuset membership stays
++ * function is called with cpuset_mutex held, cpuset membership stays
+ * stable.
+ */
+ static void update_tasks_flags(struct cpuset *cs)
+@@ -1933,7 +1977,7 @@ static void update_tasks_flags(struct cpuset *cs)
+ * cs: the cpuset to update
+ * turning_on: whether the flag is being set or cleared
+ *
+- * Call with cpuset_rwsem held.
++ * Call with cpuset_mutex held.
+ */
+
+ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+@@ -1982,7 +2026,7 @@ out:
+ * cs: the cpuset to update
+ * new_prs: new partition root state
+ *
+- * Call with cpuset_rwsem held.
++ * Call with cpuset_mutex held.
+ */
+ static int update_prstate(struct cpuset *cs, int new_prs)
+ {
+@@ -2043,12 +2087,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
+ update_flag(CS_CPU_EXCLUSIVE, cs, 0);
+ }
+
+- /*
+- * Update cpumask of parent's tasks except when it is the top
+- * cpuset as some system daemons cannot be mapped to other CPUs.
+- */
+- if (parent != &top_cpuset)
+- update_tasks_cpumask(parent);
++ update_tasks_cpumask(parent);
+
+ if (parent->child_ecpus_count)
+ update_sibling_cpumasks(parent, cs, &tmpmask);
+@@ -2169,19 +2208,26 @@ static int fmeter_getrate(struct fmeter *fmp)
+
+ static struct cpuset *cpuset_attach_old_cs;
+
+-/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
++static void reset_migrate_dl_data(struct cpuset *cs)
++{
++ cs->nr_migrate_dl_tasks = 0;
++ cs->sum_migrate_dl_bw = 0;
++}
++
++/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
+ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ {
+ struct cgroup_subsys_state *css;
+- struct cpuset *cs;
++ struct cpuset *cs, *oldcs;
+ struct task_struct *task;
+ int ret;
+
+ /* used later by cpuset_attach() */
+ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
++ oldcs = cpuset_attach_old_cs;
+ cs = css_cs(css);
+
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+
+ /* allow moving tasks into an empty cpuset if on default hierarchy */
+ ret = -ENOSPC;
+@@ -2190,14 +2236,39 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ goto out_unlock;
+
+ cgroup_taskset_for_each(task, css, tset) {
+- ret = task_can_attach(task, cs->cpus_allowed);
++ ret = task_can_attach(task);
+ if (ret)
+ goto out_unlock;
+ ret = security_task_setscheduler(task);
+ if (ret)
+ goto out_unlock;
++
++ if (dl_task(task)) {
++ cs->nr_migrate_dl_tasks++;
++ cs->sum_migrate_dl_bw += task->dl.dl_bw;
++ }
++ }
++
++ if (!cs->nr_migrate_dl_tasks)
++ goto out_success;
++
++ if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
++ int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
++
++ if (unlikely(cpu >= nr_cpu_ids)) {
++ reset_migrate_dl_data(cs);
++ ret = -EINVAL;
++ goto out_unlock;
++ }
++
++ ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
++ if (ret) {
++ reset_migrate_dl_data(cs);
++ goto out_unlock;
++ }
+ }
+
++out_success:
+ /*
+ * Mark attach is in progress. This makes validate_change() fail
+ * changes which zero cpus/mems_allowed.
+@@ -2205,23 +2276,35 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ cs->attach_in_progress++;
+ ret = 0;
+ out_unlock:
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ return ret;
+ }
+
+ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
+ {
+ struct cgroup_subsys_state *css;
++ struct cpuset *cs;
+
+ cgroup_taskset_first(tset, &css);
++ cs = css_cs(css);
+
+- percpu_down_write(&cpuset_rwsem);
+- css_cs(css)->attach_in_progress--;
+- percpu_up_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
++
++ if (cs->nr_migrate_dl_tasks) {
++ int cpu = cpumask_any(cs->effective_cpus);
++
++ dl_bw_free(cpu, cs->sum_migrate_dl_bw);
++ reset_migrate_dl_data(cs);
++ }
++
++ mutex_unlock(&cpuset_mutex);
+ }
+
+ /*
+- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
++ * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach()
+ * but we can't allocate it dynamically there. Define it global and
+ * allocate from cpuset_init().
+ */
+@@ -2229,7 +2312,7 @@ static cpumask_var_t cpus_attach;
+
+ static void cpuset_attach(struct cgroup_taskset *tset)
+ {
+- /* static buf protected by cpuset_rwsem */
++ /* static buf protected by cpuset_mutex */
+ static nodemask_t cpuset_attach_nodemask_to;
+ struct task_struct *task;
+ struct task_struct *leader;
+@@ -2240,7 +2323,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+ cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
+
+- percpu_down_write(&cpuset_rwsem);
++ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */
++ mutex_lock(&cpuset_mutex);
+
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+
+@@ -2288,11 +2372,17 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+
+ cs->old_mems_allowed = cpuset_attach_nodemask_to;
+
++ if (cs->nr_migrate_dl_tasks) {
++ cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks;
++ oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks;
++ reset_migrate_dl_data(cs);
++ }
++
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ }
+
+ /* The various types of files and directories in a cpuset file system */
+@@ -2324,7 +2414,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
+ int retval = 0;
+
+ cpus_read_lock();
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ if (!is_cpuset_online(cs)) {
+ retval = -ENODEV;
+ goto out_unlock;
+@@ -2360,7 +2450,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
+ break;
+ }
+ out_unlock:
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ return retval;
+ }
+@@ -2373,7 +2463,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
+ int retval = -ENODEV;
+
+ cpus_read_lock();
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ if (!is_cpuset_online(cs))
+ goto out_unlock;
+
+@@ -2386,7 +2476,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
+ break;
+ }
+ out_unlock:
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ return retval;
+ }
+@@ -2419,7 +2509,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+ * operation like this one can lead to a deadlock through kernfs
+ * active_ref protection. Let's break the protection. Losing the
+ * protection is okay as we check whether @cs is online after
+- * grabbing cpuset_rwsem anyway. This only happens on the legacy
++ * grabbing cpuset_mutex anyway. This only happens on the legacy
+ * hierarchies.
+ */
+ css_get(&cs->css);
+@@ -2427,7 +2517,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+ flush_work(&cpuset_hotplug_work);
+
+ cpus_read_lock();
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ if (!is_cpuset_online(cs))
+ goto out_unlock;
+
+@@ -2451,7 +2541,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+
+ free_cpuset(trialcs);
+ out_unlock:
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ kernfs_unbreak_active_protection(of->kn);
+ css_put(&cs->css);
+@@ -2584,13 +2674,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
+
+ css_get(&cs->css);
+ cpus_read_lock();
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ if (!is_cpuset_online(cs))
+ goto out_unlock;
+
+ retval = update_prstate(cs, val);
+ out_unlock:
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ css_put(&cs->css);
+ return retval ?: nbytes;
+@@ -2803,7 +2893,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+ return 0;
+
+ cpus_read_lock();
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+
+ set_bit(CS_ONLINE, &cs->flags);
+ if (is_spread_page(parent))
+@@ -2854,7 +2944,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+ cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
+ spin_unlock_irq(&callback_lock);
+ out_unlock:
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ return 0;
+ }
+@@ -2875,7 +2965,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
+ struct cpuset *cs = css_cs(css);
+
+ cpus_read_lock();
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+
+ if (is_partition_root(cs))
+ update_prstate(cs, 0);
+@@ -2894,7 +2984,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
+ cpuset_dec();
+ clear_bit(CS_ONLINE, &cs->flags);
+
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ cpus_read_unlock();
+ }
+
+@@ -2907,7 +2997,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
+
+ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+ {
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ spin_lock_irq(&callback_lock);
+
+ if (is_in_v2_mode()) {
+@@ -2920,7 +3010,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+ }
+
+ spin_unlock_irq(&callback_lock);
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ }
+
+ /*
+@@ -2962,8 +3052,6 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
+
+ int __init cpuset_init(void)
+ {
+- BUG_ON(percpu_init_rwsem(&cpuset_rwsem));
+-
+ BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
+ BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
+ BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
+@@ -3035,7 +3123,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
+ is_empty = cpumask_empty(cs->cpus_allowed) ||
+ nodes_empty(cs->mems_allowed);
+
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+
+ /*
+ * Move tasks to the nearest ancestor with execution resources,
+@@ -3045,7 +3133,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
+ if (is_empty)
+ remove_tasks_in_empty_cpuset(cs);
+
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+ }
+
+ static void
+@@ -3095,14 +3183,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
+ retry:
+ wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
+
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+
+ /*
+ * We have raced with task attaching. We wait until attaching
+ * is finished, so we won't attach a task to an empty cpuset.
+ */
+ if (cs->attach_in_progress) {
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ goto retry;
+ }
+
+@@ -3180,7 +3268,7 @@ update_tasks:
+ hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
+ cpus_updated, mems_updated);
+
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+ }
+
+ /**
+@@ -3210,7 +3298,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
+ if (on_dfl && !alloc_cpumasks(NULL, &tmp))
+ ptmp = &tmp;
+
+- percpu_down_write(&cpuset_rwsem);
++ mutex_lock(&cpuset_mutex);
+
+ /* fetch the available cpus/mems and find out which changed how */
+ cpumask_copy(&new_cpus, cpu_active_mask);
+@@ -3267,7 +3355,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
+ update_tasks_nodemask(&top_cpuset);
+ }
+
+- percpu_up_write(&cpuset_rwsem);
++ mutex_unlock(&cpuset_mutex);
+
+ /* if cpus or mems changed, we need to propagate to descendants */
+ if (cpus_updated || mems_updated) {
+@@ -3336,8 +3424,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = {
+ */
+ void __init cpuset_init_smp(void)
+ {
+- cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
+- top_cpuset.mems_allowed = node_states[N_MEMORY];
++ /*
++ * cpus_allowd/mems_allowed set to v2 values in the initial
++ * cpuset_bind() call will be reset to v1 values in another
++ * cpuset_bind() call when v1 cpuset is mounted.
++ */
+ top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
+
+ cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
+@@ -3674,7 +3765,7 @@ void __cpuset_memory_pressure_bump(void)
+ * - Used for /proc/<pid>/cpuset.
+ * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
+ * doesn't really matter if tsk->cpuset changes after we read it,
+- * and we take cpuset_rwsem, keeping cpuset_attach() from changing it
++ * and we take cpuset_mutex, keeping cpuset_attach() from changing it
+ * anyway.
+ */
+ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
+index 0d5c29879a50b..144a464e45c66 100644
+--- a/kernel/cgroup/namespace.c
++++ b/kernel/cgroup/namespace.c
+@@ -149,9 +149,3 @@ const struct proc_ns_operations cgroupns_operations = {
+ .install = cgroupns_install,
+ .owner = cgroupns_owner,
+ };
+-
+-static __init int cgroup_namespaces_init(void)
+-{
+- return 0;
+-}
+-subsys_initcall(cgroup_namespaces_init);
+diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
+index b264ab5652ba9..1486768f23185 100644
+--- a/kernel/cgroup/rstat.c
++++ b/kernel/cgroup/rstat.c
+@@ -433,8 +433,6 @@ static void root_cgroup_cputime(struct task_cputime *cputime)
+ cputime->sum_exec_runtime += user;
+ cputime->sum_exec_runtime += sys;
+ cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
+- cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST];
+- cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE];
+ }
+ }
+
+diff --git a/kernel/compat.c b/kernel/compat.c
+index 55551989d9da5..fb50f29d9b361 100644
+--- a/kernel/compat.c
++++ b/kernel/compat.c
+@@ -152,7 +152,7 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len,
+ if (len & (sizeof(compat_ulong_t)-1))
+ return -EINVAL;
+
+- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
++ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = sched_getaffinity(pid, mask);
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 192e43a874076..393114c10c285 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -31,8 +31,10 @@
+ #include <linux/smpboot.h>
+ #include <linux/relay.h>
+ #include <linux/slab.h>
++#include <linux/scs.h>
+ #include <linux/percpu-rwsem.h>
+ #include <linux/cpuset.h>
++#include <linux/random.h>
+
+ #include <trace/events/power.h>
+ #define CREATE_TRACE_POINTS
+@@ -69,7 +71,6 @@ struct cpuhp_cpu_state {
+ bool rollback;
+ bool single;
+ bool bringup;
+- int cpu;
+ struct hlist_node *node;
+ struct hlist_node *last;
+ enum cpuhp_state cb_state;
+@@ -473,7 +474,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+ #endif
+
+ static inline enum cpuhp_state
+-cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
++cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
+ {
+ enum cpuhp_state prev_state = st->state;
+ bool bringup = st->state < target;
+@@ -484,14 +485,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+ st->target = target;
+ st->single = false;
+ st->bringup = bringup;
+- if (cpu_dying(st->cpu) != !bringup)
+- set_cpu_dying(st->cpu, !bringup);
++ if (cpu_dying(cpu) != !bringup)
++ set_cpu_dying(cpu, !bringup);
+
+ return prev_state;
+ }
+
+ static inline void
+-cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
++cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
++ enum cpuhp_state prev_state)
+ {
+ bool bringup = !st->bringup;
+
+@@ -518,8 +520,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+ }
+
+ st->bringup = bringup;
+- if (cpu_dying(st->cpu) != !bringup)
+- set_cpu_dying(st->cpu, !bringup);
++ if (cpu_dying(cpu) != !bringup)
++ set_cpu_dying(cpu, !bringup);
+ }
+
+ /* Regular hotplug invocation of the AP hotplug thread */
+@@ -539,15 +541,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
+ wait_for_ap_thread(st, st->bringup);
+ }
+
+-static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
++static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
++ enum cpuhp_state target)
+ {
+ enum cpuhp_state prev_state;
+ int ret;
+
+- prev_state = cpuhp_set_state(st, target);
++ prev_state = cpuhp_set_state(cpu, st, target);
+ __cpuhp_kick_ap(st);
+ if ((ret = st->result)) {
+- cpuhp_reset_state(st, prev_state);
++ cpuhp_reset_state(cpu, st, prev_state);
+ __cpuhp_kick_ap(st);
+ }
+
+@@ -579,7 +582,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
+ if (st->target <= CPUHP_AP_ONLINE_IDLE)
+ return 0;
+
+- return cpuhp_kick_ap(st, st->target);
++ return cpuhp_kick_ap(cpu, st, st->target);
+ }
+
+ static int bringup_cpu(unsigned int cpu)
+@@ -587,6 +590,12 @@ static int bringup_cpu(unsigned int cpu)
+ struct task_struct *idle = idle_thread_get(cpu);
+ int ret;
+
++ /*
++ * Reset stale stack state from the last time this CPU was online.
++ */
++ scs_task_reset(idle);
++ kasan_unpoison_task_stack(idle);
++
+ /*
+ * Some architectures have to walk the irq descriptors to
+ * setup the vector space for the cpu which comes online.
+@@ -653,21 +662,51 @@ static bool cpuhp_next_state(bool bringup,
+ return true;
+ }
+
+-static int cpuhp_invoke_callback_range(bool bringup,
+- unsigned int cpu,
+- struct cpuhp_cpu_state *st,
+- enum cpuhp_state target)
++static int __cpuhp_invoke_callback_range(bool bringup,
++ unsigned int cpu,
++ struct cpuhp_cpu_state *st,
++ enum cpuhp_state target,
++ bool nofail)
+ {
+ enum cpuhp_state state;
+- int err = 0;
++ int ret = 0;
+
+ while (cpuhp_next_state(bringup, &state, st, target)) {
++ int err;
++
+ err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
+- if (err)
++ if (!err)
++ continue;
++
++ if (nofail) {
++ pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
++ cpu, bringup ? "UP" : "DOWN",
++ cpuhp_get_step(st->state)->name,
++ st->state, err);
++ ret = -1;
++ } else {
++ ret = err;
+ break;
++ }
+ }
+
+- return err;
++ return ret;
++}
++
++static inline int cpuhp_invoke_callback_range(bool bringup,
++ unsigned int cpu,
++ struct cpuhp_cpu_state *st,
++ enum cpuhp_state target)
++{
++ return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
++}
++
++static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
++ unsigned int cpu,
++ struct cpuhp_cpu_state *st,
++ enum cpuhp_state target)
++{
++ __cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
+ }
+
+ static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
+@@ -696,7 +735,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
+
+- cpuhp_reset_state(st, prev_state);
++ cpuhp_reset_state(cpu, st, prev_state);
+ if (can_rollback_cpu(st))
+ WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
+ prev_state));
+@@ -713,7 +752,6 @@ static void cpuhp_create(unsigned int cpu)
+
+ init_completion(&st->done_up);
+ init_completion(&st->done_down);
+- st->cpu = cpu;
+ }
+
+ static int cpuhp_should_run(unsigned int cpu)
+@@ -867,7 +905,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
+ cpuhp_lock_release(true);
+
+ trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
+- ret = cpuhp_kick_ap(st, st->target);
++ ret = cpuhp_kick_ap(cpu, st, st->target);
+ trace_cpuhp_exit(cpu, st->state, prev_state, ret);
+
+ return ret;
+@@ -986,7 +1024,6 @@ static int take_cpu_down(void *_param)
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+ enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
+ int err, cpu = smp_processor_id();
+- int ret;
+
+ /* Ensure this CPU doesn't handle any more interrupts. */
+ err = __cpu_disable();
+@@ -999,13 +1036,10 @@ static int take_cpu_down(void *_param)
+ */
+ WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
+
+- /* Invoke the former CPU_DYING callbacks */
+- ret = cpuhp_invoke_callback_range(false, cpu, st, target);
+-
+ /*
+- * DYING must not fail!
++ * Invoke the former CPU_DYING callbacks. DYING must not fail!
+ */
+- WARN_ON_ONCE(ret);
++ cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
+
+ /* Give up timekeeping duties */
+ tick_handover_do_timer();
+@@ -1099,7 +1133,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+ ret, cpu, cpuhp_get_step(st->state)->name,
+ st->state);
+
+- cpuhp_reset_state(st, prev_state);
++ cpuhp_reset_state(cpu, st, prev_state);
+
+ if (st->state < prev_state)
+ WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
+@@ -1126,7 +1160,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+
+ cpuhp_tasks_frozen = tasks_frozen;
+
+- prev_state = cpuhp_set_state(st, target);
++ prev_state = cpuhp_set_state(cpu, st, target);
+ /*
+ * If the current CPU state is in the range of the AP hotplug thread,
+ * then we need to kick the thread.
+@@ -1157,7 +1191,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+ ret = cpuhp_down_callbacks(cpu, st, target);
+ if (ret && st->state < prev_state) {
+ if (st->state == CPUHP_TEARDOWN_CPU) {
+- cpuhp_reset_state(st, prev_state);
++ cpuhp_reset_state(cpu, st, prev_state);
+ __cpuhp_kick_ap(st);
+ } else {
+ WARN(1, "DEAD callback error for CPU%d", cpu);
+@@ -1277,16 +1311,14 @@ void notify_cpu_starting(unsigned int cpu)
+ {
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
+- int ret;
+
+ rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
+ cpumask_set_cpu(cpu, &cpus_booted_once_mask);
+- ret = cpuhp_invoke_callback_range(true, cpu, st, target);
+
+ /*
+ * STARTING must not fail!
+ */
+- WARN_ON_ONCE(ret);
++ cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
+ }
+
+ /*
+@@ -1344,7 +1376,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
+
+ cpuhp_tasks_frozen = tasks_frozen;
+
+- cpuhp_set_state(st, target);
++ cpuhp_set_state(cpu, st, target);
+ /*
+ * If the current CPU state is in the range of the AP hotplug thread,
+ * then we need to kick the thread once more.
+@@ -1652,6 +1684,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+ .startup.single = perf_event_init_cpu,
+ .teardown.single = perf_event_exit_cpu,
+ },
++ [CPUHP_RANDOM_PREPARE] = {
++ .name = "random:prepare",
++ .startup.single = random_prepare_cpu,
++ .teardown.single = NULL,
++ },
+ [CPUHP_WORKQUEUE_PREP] = {
+ .name = "workqueue:prepare",
+ .startup.single = workqueue_prepare_cpu,
+@@ -1775,6 +1812,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
+ .startup.single = workqueue_online_cpu,
+ .teardown.single = workqueue_offline_cpu,
+ },
++ [CPUHP_AP_RANDOM_ONLINE] = {
++ .name = "random:online",
++ .startup.single = random_online_cpu,
++ .teardown.single = NULL,
++ },
+ [CPUHP_AP_RCUTREE_ONLINE] = {
+ .name = "RCU/tree:online",
+ .startup.single = rcutree_online_cpu,
+@@ -2297,8 +2339,10 @@ static ssize_t target_store(struct device *dev, struct device_attribute *attr,
+
+ if (st->state < target)
+ ret = cpu_up(dev->id, target);
+- else
++ else if (st->state > target)
+ ret = cpu_down(dev->id, target);
++ else if (WARN_ON(st->target != target))
++ st->target = target;
+ out:
+ unlock_device_hotplug();
+ return ret ? ret : count;
+diff --git a/kernel/crash_core.c b/kernel/crash_core.c
+index eb53f5ec62c90..256cf6db573cd 100644
+--- a/kernel/crash_core.c
++++ b/kernel/crash_core.c
+@@ -6,6 +6,7 @@
+
+ #include <linux/buildid.h>
+ #include <linux/crash_core.h>
++#include <linux/init.h>
+ #include <linux/utsname.h>
+ #include <linux/vmalloc.h>
+
+@@ -295,6 +296,16 @@ int __init parse_crashkernel_low(char *cmdline,
+ "crashkernel=", suffix_tbl[SUFFIX_LOW]);
+ }
+
++/*
++ * Add a dummy early_param handler to mark crashkernel= as a known command line
++ * parameter and suppress incorrect warnings in init/main.c.
++ */
++static int __init parse_crashkernel_dummy(char *arg)
++{
++ return 0;
++}
++early_param("crashkernel", parse_crashkernel_dummy);
++
+ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
+ void *data, size_t data_len)
+ {
+diff --git a/kernel/cred.c b/kernel/cred.c
+index 1ae0b4948a5a8..933155c969227 100644
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -665,26 +665,20 @@ EXPORT_SYMBOL(cred_fscmp);
+
+ int set_cred_ucounts(struct cred *new)
+ {
+- struct task_struct *task = current;
+- const struct cred *old = task->real_cred;
+ struct ucounts *new_ucounts, *old_ucounts = new->ucounts;
+
+- if (new->user == old->user && new->user_ns == old->user_ns)
+- return 0;
+-
+ /*
+ * This optimization is needed because alloc_ucounts() uses locks
+ * for table lookups.
+ */
+- if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
++ if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->uid))
+ return 0;
+
+- if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid)))
++ if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid)))
+ return -EAGAIN;
+
+ new->ucounts = new_ucounts;
+- if (old_ucounts)
+- put_ucounts(old_ucounts);
++ put_ucounts(old_ucounts);
+
+ return 0;
+ }
+diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
+index da06a5553835b..7beceb447211d 100644
+--- a/kernel/debug/debug_core.c
++++ b/kernel/debug/debug_core.c
+@@ -53,6 +53,7 @@
+ #include <linux/vmacache.h>
+ #include <linux/rcupdate.h>
+ #include <linux/irq.h>
++#include <linux/security.h>
+
+ #include <asm/cacheflush.h>
+ #include <asm/byteorder.h>
+@@ -752,6 +753,29 @@ cpu_master_loop:
+ continue;
+ kgdb_connected = 0;
+ } else {
++ /*
++ * This is a brutal way to interfere with the debugger
++ * and prevent gdb being used to poke at kernel memory.
++ * This could cause trouble if lockdown is applied when
++ * there is already an active gdb session. For now the
++ * answer is simply "don't do that". Typically lockdown
++ * *will* be applied before the debug core gets started
++ * so only developers using kgdb for fairly advanced
++ * early kernel debug can be biten by this. Hopefully
++ * they are sophisticated enough to take care of
++ * themselves, especially with help from the lockdown
++ * message printed on the console!
++ */
++ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
++ if (IS_ENABLED(CONFIG_KGDB_KDB)) {
++ /* Switch back to kdb if possible... */
++ dbg_kdb_mode = 1;
++ continue;
++ } else {
++ /* ... otherwise just bail */
++ break;
++ }
++ }
+ error = gdb_serial_stub(ks);
+ }
+
+diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
+index 1f9f0e47aedaa..10b454554ab03 100644
+--- a/kernel/debug/kdb/kdb_bt.c
++++ b/kernel/debug/kdb/kdb_bt.c
+@@ -46,7 +46,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr)
+ * btp <pid> Kernel stack for <pid>
+ * btt <address-expression> Kernel stack for task structure at
+ * <address-expression>
+- * bta [DRSTCZEUIMA] All useful processes, optionally
++ * bta [state_chars>|A] All useful processes, optionally
+ * filtered by state
+ * btc [<cpu>] The current process on one cpu,
+ * default is all cpus
+@@ -74,7 +74,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr)
+ */
+
+ static int
+-kdb_bt1(struct task_struct *p, unsigned long mask, bool btaprompt)
++kdb_bt1(struct task_struct *p, const char *mask, bool btaprompt)
+ {
+ char ch;
+
+@@ -120,7 +120,7 @@ kdb_bt_cpu(unsigned long cpu)
+ return;
+ }
+
+- kdb_bt1(kdb_tsk, ~0UL, false);
++ kdb_bt1(kdb_tsk, "A", false);
+ }
+
+ int
+@@ -138,8 +138,8 @@ kdb_bt(int argc, const char **argv)
+ if (strcmp(argv[0], "bta") == 0) {
+ struct task_struct *g, *p;
+ unsigned long cpu;
+- unsigned long mask = kdb_task_state_string(argc ? argv[1] :
+- NULL);
++ const char *mask = argc ? argv[1] : kdbgetenv("PS");
++
+ if (argc == 0)
+ kdb_ps_suppressed();
+ /* Run the active tasks first */
+@@ -167,7 +167,7 @@ kdb_bt(int argc, const char **argv)
+ return diag;
+ p = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (p)
+- return kdb_bt1(p, ~0UL, false);
++ return kdb_bt1(p, "A", false);
+ kdb_printf("No process with pid == %ld found\n", pid);
+ return 0;
+ } else if (strcmp(argv[0], "btt") == 0) {
+@@ -176,7 +176,7 @@ kdb_bt(int argc, const char **argv)
+ diag = kdbgetularg((char *)argv[1], &addr);
+ if (diag)
+ return diag;
+- return kdb_bt1((struct task_struct *)addr, ~0UL, false);
++ return kdb_bt1((struct task_struct *)addr, "A", false);
+ } else if (strcmp(argv[0], "btc") == 0) {
+ unsigned long cpu = ~0;
+ if (argc > 1)
+@@ -212,7 +212,7 @@ kdb_bt(int argc, const char **argv)
+ kdb_show_stack(kdb_current_task, (void *)addr);
+ return 0;
+ } else {
+- return kdb_bt1(kdb_current_task, ~0UL, false);
++ return kdb_bt1(kdb_current_task, "A", false);
+ }
+ }
+
+diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
+index fa6deda894a17..ead4da9471270 100644
+--- a/kernel/debug/kdb/kdb_main.c
++++ b/kernel/debug/kdb/kdb_main.c
+@@ -45,6 +45,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/uaccess.h>
+ #include <linux/slab.h>
++#include <linux/security.h>
+ #include "kdb_private.h"
+
+ #undef MODULE_PARAM_PREFIX
+@@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu)
+ }
+
+ /*
+- * Check whether the flags of the current command and the permissions
+- * of the kdb console has allow a command to be run.
++ * Update the permissions flags (kdb_cmd_enabled) to match the
++ * current lockdown state.
++ *
++ * Within this function the calls to security_locked_down() are "lazy". We
++ * avoid calling them if the current value of kdb_cmd_enabled already excludes
++ * flags that might be subject to lockdown. Additionally we deliberately check
++ * the lockdown flags independently (even though read lockdown implies write
++ * lockdown) since that results in both simpler code and clearer messages to
++ * the user on first-time debugger entry.
++ *
++ * The permission masks during a read+write lockdown permits the following
++ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
++ *
++ * The INSPECT commands are not blocked during lockdown because they are
++ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
++ * forcing them to have no arguments) and lsmod. These commands do expose
++ * some kernel state but do not allow the developer seated at the console to
++ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
++ * given these are allowed for root during lockdown already.
++ */
++static void kdb_check_for_lockdown(void)
++{
++ const int write_flags = KDB_ENABLE_MEM_WRITE |
++ KDB_ENABLE_REG_WRITE |
++ KDB_ENABLE_FLOW_CTRL;
++ const int read_flags = KDB_ENABLE_MEM_READ |
++ KDB_ENABLE_REG_READ;
++
++ bool need_to_lockdown_write = false;
++ bool need_to_lockdown_read = false;
++
++ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
++ need_to_lockdown_write =
++ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
++
++ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
++ need_to_lockdown_read =
++ security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
++
++ /* De-compose KDB_ENABLE_ALL if required */
++ if (need_to_lockdown_write || need_to_lockdown_read)
++ if (kdb_cmd_enabled & KDB_ENABLE_ALL)
++ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
++
++ if (need_to_lockdown_write)
++ kdb_cmd_enabled &= ~write_flags;
++
++ if (need_to_lockdown_read)
++ kdb_cmd_enabled &= ~read_flags;
++}
++
++/*
++ * Check whether the flags of the current command, the permissions of the kdb
++ * console and the lockdown state allow a command to be run.
+ */
+-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
++static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+ bool no_args)
+ {
+ /* permissions comes from userspace so needs massaging slightly */
+@@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
+ kdb_curr_task(raw_smp_processor_id());
+
+ KDB_DEBUG_STATE("kdb_local 1", reason);
++
++ kdb_check_for_lockdown();
++
+ kdb_go_count = 0;
+ if (reason == KDB_REASON_DEBUG) {
+ /* special case below */
+@@ -2203,8 +2259,8 @@ static void kdb_cpu_status(void)
+ state = 'D'; /* cpu is online but unresponsive */
+ } else {
+ state = ' '; /* cpu is responding to kdb */
+- if (kdb_task_state_char(KDB_TSK(i)) == 'I')
+- state = 'I'; /* idle task */
++ if (kdb_task_state_char(KDB_TSK(i)) == '-')
++ state = '-'; /* idle task */
+ }
+ if (state != prev_state) {
+ if (prev_state != '?') {
+@@ -2271,37 +2327,30 @@ static int kdb_cpu(int argc, const char **argv)
+ void kdb_ps_suppressed(void)
+ {
+ int idle = 0, daemon = 0;
+- unsigned long mask_I = kdb_task_state_string("I"),
+- mask_M = kdb_task_state_string("M");
+ unsigned long cpu;
+ const struct task_struct *p, *g;
+ for_each_online_cpu(cpu) {
+ p = kdb_curr_task(cpu);
+- if (kdb_task_state(p, mask_I))
++ if (kdb_task_state(p, "-"))
+ ++idle;
+ }
+ for_each_process_thread(g, p) {
+- if (kdb_task_state(p, mask_M))
++ if (kdb_task_state(p, "ims"))
+ ++daemon;
+ }
+ if (idle || daemon) {
+ if (idle)
+- kdb_printf("%d idle process%s (state I)%s\n",
++ kdb_printf("%d idle process%s (state -)%s\n",
+ idle, idle == 1 ? "" : "es",
+ daemon ? " and " : "");
+ if (daemon)
+- kdb_printf("%d sleeping system daemon (state M) "
++ kdb_printf("%d sleeping system daemon (state [ims]) "
+ "process%s", daemon,
+ daemon == 1 ? "" : "es");
+ kdb_printf(" suppressed,\nuse 'ps A' to see all.\n");
+ }
+ }
+
+-/*
+- * kdb_ps - This function implements the 'ps' command which shows a
+- * list of the active processes.
+- * ps [DRSTCZEUIMA] All processes, optionally filtered by state
+- */
+ void kdb_ps1(const struct task_struct *p)
+ {
+ int cpu;
+@@ -2330,17 +2379,25 @@ void kdb_ps1(const struct task_struct *p)
+ }
+ }
+
++/*
++ * kdb_ps - This function implements the 'ps' command which shows a
++ * list of the active processes.
++ *
++ * ps [<state_chars>] Show processes, optionally selecting only those whose
++ * state character is found in <state_chars>.
++ */
+ static int kdb_ps(int argc, const char **argv)
+ {
+ struct task_struct *g, *p;
+- unsigned long mask, cpu;
++ const char *mask;
++ unsigned long cpu;
+
+ if (argc == 0)
+ kdb_ps_suppressed();
+ kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n",
+ (int)(2*sizeof(void *))+2, "Task Addr",
+ (int)(2*sizeof(void *))+2, "Thread");
+- mask = kdb_task_state_string(argc ? argv[1] : NULL);
++ mask = argc ? argv[1] : kdbgetenv("PS");
+ /* Run the active tasks first */
+ for_each_online_cpu(cpu) {
+ if (KDB_FLAG(CMD_INTERRUPT))
+@@ -2742,8 +2799,8 @@ static kdbtab_t maintab[] = {
+ },
+ { .name = "bta",
+ .func = kdb_bt,
+- .usage = "[D|R|S|T|C|Z|E|U|I|M|A]",
+- .help = "Backtrace all processes matching state flag",
++ .usage = "[<state_chars>|A]",
++ .help = "Backtrace all processes whose state matches",
+ .flags = KDB_ENABLE_INSPECT,
+ },
+ { .name = "btc",
+@@ -2797,7 +2854,7 @@ static kdbtab_t maintab[] = {
+ },
+ { .name = "ps",
+ .func = kdb_ps,
+- .usage = "[<flags>|A]",
++ .usage = "[<state_chars>|A]",
+ .help = "Display active task list",
+ .flags = KDB_ENABLE_INSPECT,
+ },
+diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
+index 629590084a0dc..0d2f9feea0a46 100644
+--- a/kernel/debug/kdb/kdb_private.h
++++ b/kernel/debug/kdb/kdb_private.h
+@@ -190,10 +190,8 @@ extern char kdb_grep_string[];
+ extern int kdb_grep_leading;
+ extern int kdb_grep_trailing;
+ extern char *kdb_cmds[];
+-extern unsigned long kdb_task_state_string(const char *);
+ extern char kdb_task_state_char (const struct task_struct *);
+-extern unsigned long kdb_task_state(const struct task_struct *p,
+- unsigned long mask);
++extern bool kdb_task_state(const struct task_struct *p, const char *mask);
+ extern void kdb_ps_suppressed(void);
+ extern void kdb_ps1(const struct task_struct *p);
+ extern void kdb_send_sig(struct task_struct *p, int sig);
+diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
+index 7507d9a8dc6ac..85cb51c4a17e6 100644
+--- a/kernel/debug/kdb/kdb_support.c
++++ b/kernel/debug/kdb/kdb_support.c
+@@ -24,6 +24,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/kdb.h>
+ #include <linux/slab.h>
++#include <linux/ctype.h>
+ #include "kdb_private.h"
+
+ /*
+@@ -290,7 +291,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size)
+ */
+ int kdb_putarea_size(unsigned long addr, void *res, size_t size)
+ {
+- int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size);
++ int ret = copy_to_kernel_nofault((char *)addr, (char *)res, size);
+ if (ret) {
+ if (!KDB_STATE(SUPPRESS)) {
+ kdb_func_printf("Bad address 0x%lx\n", addr);
+@@ -473,82 +474,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size)
+ return diag;
+ }
+
+-/*
+- * kdb_task_state_string - Convert a string containing any of the
+- * letters DRSTCZEUIMA to a mask for the process state field and
+- * return the value. If no argument is supplied, return the mask
+- * that corresponds to environment variable PS, DRSTCZEU by
+- * default.
+- * Inputs:
+- * s String to convert
+- * Returns:
+- * Mask for process state.
+- * Notes:
+- * The mask folds data from several sources into a single long value, so
+- * be careful not to overlap the bits. TASK_* bits are in the LSB,
+- * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there
+- * is no overlap between TASK_* and EXIT_* but that may not always be
+- * true, so EXIT_* bits are shifted left 16 bits before being stored in
+- * the mask.
+- */
+-
+-/* unrunnable is < 0 */
+-#define UNRUNNABLE (1UL << (8*sizeof(unsigned long) - 1))
+-#define RUNNING (1UL << (8*sizeof(unsigned long) - 2))
+-#define IDLE (1UL << (8*sizeof(unsigned long) - 3))
+-#define DAEMON (1UL << (8*sizeof(unsigned long) - 4))
+
+-unsigned long kdb_task_state_string(const char *s)
+-{
+- long res = 0;
+- if (!s) {
+- s = kdbgetenv("PS");
+- if (!s)
+- s = "DRSTCZEU"; /* default value for ps */
+- }
+- while (*s) {
+- switch (*s) {
+- case 'D':
+- res |= TASK_UNINTERRUPTIBLE;
+- break;
+- case 'R':
+- res |= RUNNING;
+- break;
+- case 'S':
+- res |= TASK_INTERRUPTIBLE;
+- break;
+- case 'T':
+- res |= TASK_STOPPED;
+- break;
+- case 'C':
+- res |= TASK_TRACED;
+- break;
+- case 'Z':
+- res |= EXIT_ZOMBIE << 16;
+- break;
+- case 'E':
+- res |= EXIT_DEAD << 16;
+- break;
+- case 'U':
+- res |= UNRUNNABLE;
+- break;
+- case 'I':
+- res |= IDLE;
+- break;
+- case 'M':
+- res |= DAEMON;
+- break;
+- case 'A':
+- res = ~0UL;
+- break;
+- default:
+- kdb_func_printf("unknown flag '%c' ignored\n", *s);
+- break;
+- }
+- ++s;
+- }
+- return res;
+-}
+
+ /*
+ * kdb_task_state_char - Return the character that represents the task state.
+@@ -559,7 +485,6 @@ unsigned long kdb_task_state_string(const char *s)
+ */
+ char kdb_task_state_char (const struct task_struct *p)
+ {
+- unsigned int p_state;
+ unsigned long tmp;
+ char state;
+ int cpu;
+@@ -568,25 +493,18 @@ char kdb_task_state_char (const struct task_struct *p)
+ copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
+ return 'E';
+
+- cpu = kdb_process_cpu(p);
+- p_state = READ_ONCE(p->__state);
+- state = (p_state == 0) ? 'R' :
+- (p_state < 0) ? 'U' :
+- (p_state & TASK_UNINTERRUPTIBLE) ? 'D' :
+- (p_state & TASK_STOPPED) ? 'T' :
+- (p_state & TASK_TRACED) ? 'C' :
+- (p->exit_state & EXIT_ZOMBIE) ? 'Z' :
+- (p->exit_state & EXIT_DEAD) ? 'E' :
+- (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?';
++ state = task_state_to_char((struct task_struct *) p);
++
+ if (is_idle_task(p)) {
+ /* Idle task. Is it really idle, apart from the kdb
+ * interrupt? */
++ cpu = kdb_process_cpu(p);
+ if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) {
+ if (cpu != kdb_initial_cpu)
+- state = 'I'; /* idle task */
++ state = '-'; /* idle task */
+ }
+- } else if (!p->mm && state == 'S') {
+- state = 'M'; /* sleeping system daemon */
++ } else if (!p->mm && strchr("IMS", state)) {
++ state = tolower(state); /* sleeping system daemon */
+ }
+ return state;
+ }
+@@ -596,14 +514,28 @@ char kdb_task_state_char (const struct task_struct *p)
+ * given by the mask.
+ * Inputs:
+ * p struct task for the process
+- * mask mask from kdb_task_state_string to select processes
++ * mask set of characters used to select processes; both NULL
++ * and the empty string mean adopt a default filter, which
++ * is to suppress sleeping system daemons and the idle tasks
+ * Returns:
+ * True if the process matches at least one criteria defined by the mask.
+ */
+-unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask)
++bool kdb_task_state(const struct task_struct *p, const char *mask)
+ {
+- char state[] = { kdb_task_state_char(p), '\0' };
+- return (mask & kdb_task_state_string(state)) != 0;
++ char state = kdb_task_state_char(p);
++
++ /* If there is no mask, then we will filter code that runs when the
++ * scheduler is idling and any system daemons that are currently
++ * sleeping.
++ */
++ if (!mask || mask[0] == '\0')
++ return !strchr("-ims", state);
++
++ /* A is a special case that matches all states */
++ if (strchr(mask, 'A'))
++ return true;
++
++ return strchr(mask, state);
+ }
+
+ /* Maintain a small stack of kdb_flags to allow recursion without disturbing
+diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
+index 7a14ca29c3778..2caafd13f8aac 100644
+--- a/kernel/dma/debug.c
++++ b/kernel/dma/debug.c
+@@ -448,7 +448,7 @@ void debug_dma_dump_mappings(struct device *dev)
+ * other hand, consumes a single dma_debug_entry, but inserts 'nents'
+ * entries into the tree.
+ */
+-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
++static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC);
+ static DEFINE_SPINLOCK(radix_lock);
+ #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
+ #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
+@@ -564,7 +564,7 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
+
+ rc = active_cacheline_insert(entry);
+ if (rc == -ENOMEM) {
+- pr_err("cacheline tracking ENOMEM, dma-debug disabled\n");
++ pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");
+ global_disable = true;
+ } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ err_printk(entry->dev, entry,
+@@ -927,7 +927,7 @@ static __init int dma_debug_cmdline(char *str)
+ global_disable = true;
+ }
+
+- return 0;
++ return 1;
+ }
+
+ static __init int dma_debug_entries_cmdline(char *str)
+@@ -936,7 +936,7 @@ static __init int dma_debug_entries_cmdline(char *str)
+ return -EINVAL;
+ if (!get_option(&str, &nr_prealloc_entries))
+ nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
+- return 0;
++ return 1;
+ }
+
+ __setup("dma_debug=", dma_debug_cmdline);
+diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
+index 4c6c5e0635e34..ed5dd9e023241 100644
+--- a/kernel/dma/direct.c
++++ b/kernel/dma/direct.c
+@@ -75,6 +75,25 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
+ min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
+ }
+
++static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size)
++{
++ if (!force_dma_unencrypted(dev))
++ return 0;
++ return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size));
++}
++
++static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size)
++{
++ int ret;
++
++ if (!force_dma_unencrypted(dev))
++ return 0;
++ ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size));
++ if (ret)
++ pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n");
++ return ret;
++}
++
+ static void __dma_direct_free_pages(struct device *dev, struct page *page,
+ size_t size)
+ {
+@@ -85,7 +104,7 @@ static void __dma_direct_free_pages(struct device *dev, struct page *page,
+ }
+
+ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
+- gfp_t gfp)
++ gfp_t gfp, bool allow_highmem)
+ {
+ int node = dev_to_node(dev);
+ struct page *page = NULL;
+@@ -106,9 +125,12 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
+ }
+
+ page = dma_alloc_contiguous(dev, size, gfp);
+- if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+- dma_free_contiguous(dev, page, size);
+- page = NULL;
++ if (page) {
++ if (!dma_coherent_ok(dev, page_to_phys(page), size) ||
++ (!allow_highmem && PageHighMem(page))) {
++ dma_free_contiguous(dev, page, size);
++ page = NULL;
++ }
+ }
+ again:
+ if (!page)
+@@ -149,29 +171,37 @@ static void *dma_direct_alloc_from_pool(struct device *dev, size_t size,
+ return ret;
+ }
+
++static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size,
++ dma_addr_t *dma_handle, gfp_t gfp)
++{
++ struct page *page;
++
++ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true);
++ if (!page)
++ return NULL;
++
++ /* remove any dirty cache lines on the kernel alias */
++ if (!PageHighMem(page))
++ arch_dma_prep_coherent(page, size);
++
++ /* return the page pointer as the opaque cookie */
++ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
++ return page;
++}
++
+ void *dma_direct_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+ {
+ struct page *page;
+ void *ret;
+- int err;
+
+ size = PAGE_ALIGN(size);
+ if (attrs & DMA_ATTR_NO_WARN)
+ gfp |= __GFP_NOWARN;
+
+ if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
+- !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) {
+- page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
+- if (!page)
+- return NULL;
+- /* remove any dirty cache lines on the kernel alias */
+- if (!PageHighMem(page))
+- arch_dma_prep_coherent(page, size);
+- *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
+- /* return the page pointer as the opaque cookie */
+- return page;
+- }
++ !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev))
++ return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp);
+
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
+ !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+@@ -200,7 +230,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
+ return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
+
+ /* we always manually zero the memory once we are done */
+- page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
++ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true);
+ if (!page)
+ return NULL;
+
+@@ -216,12 +246,6 @@ void *dma_direct_alloc(struct device *dev, size_t size,
+ __builtin_return_address(0));
+ if (!ret)
+ goto out_free_pages;
+- if (force_dma_unencrypted(dev)) {
+- err = set_memory_decrypted((unsigned long)ret,
+- 1 << get_order(size));
+- if (err)
+- goto out_free_pages;
+- }
+ memset(ret, 0, size);
+ goto done;
+ }
+@@ -238,13 +262,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
+ }
+
+ ret = page_address(page);
+- if (force_dma_unencrypted(dev)) {
+- err = set_memory_decrypted((unsigned long)ret,
+- 1 << get_order(size));
+- if (err)
+- goto out_free_pages;
+- }
+-
++ if (dma_set_decrypted(dev, ret, size))
++ goto out_free_pages;
+ memset(ret, 0, size);
+
+ if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
+@@ -259,13 +278,8 @@ done:
+ return ret;
+
+ out_encrypt_pages:
+- if (force_dma_unencrypted(dev)) {
+- err = set_memory_encrypted((unsigned long)page_address(page),
+- 1 << get_order(size));
+- /* If memory cannot be re-encrypted, it must be leaked */
+- if (err)
+- return NULL;
+- }
++ if (dma_set_encrypted(dev, page_address(page), size))
++ return NULL;
+ out_free_pages:
+ __dma_direct_free_pages(dev, page, size);
+ return NULL;
+@@ -304,13 +318,14 @@ void dma_direct_free(struct device *dev, size_t size,
+ dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
+ return;
+
+- if (force_dma_unencrypted(dev))
+- set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
+-
+- if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr))
++ if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
+ vunmap(cpu_addr);
+- else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
+- arch_dma_clear_uncached(cpu_addr, size);
++ } else {
++ if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
++ arch_dma_clear_uncached(cpu_addr, size);
++ if (dma_set_encrypted(dev, cpu_addr, size))
++ return;
++ }
+
+ __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
+ }
+@@ -326,26 +341,13 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
+ !is_swiotlb_for_alloc(dev))
+ return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
+
+- page = __dma_direct_alloc_pages(dev, size, gfp);
++ page = __dma_direct_alloc_pages(dev, size, gfp, false);
+ if (!page)
+ return NULL;
+- if (PageHighMem(page)) {
+- /*
+- * Depending on the cma= arguments and per-arch setup
+- * dma_alloc_contiguous could return highmem pages.
+- * Without remapping there is no way to return them here,
+- * so log an error and fail.
+- */
+- dev_info(dev, "Rejecting highmem page from CMA.\n");
+- goto out_free_pages;
+- }
+
+ ret = page_address(page);
+- if (force_dma_unencrypted(dev)) {
+- if (set_memory_decrypted((unsigned long)ret,
+- 1 << get_order(size)))
+- goto out_free_pages;
+- }
++ if (dma_set_decrypted(dev, ret, size))
++ goto out_free_pages;
+ memset(ret, 0, size);
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
+ return page;
+@@ -358,7 +360,6 @@ void dma_direct_free_pages(struct device *dev, size_t size,
+ struct page *page, dma_addr_t dma_addr,
+ enum dma_data_direction dir)
+ {
+- unsigned int page_order = get_order(size);
+ void *vaddr = page_address(page);
+
+ /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
+@@ -366,9 +367,8 @@ void dma_direct_free_pages(struct device *dev, size_t size,
+ dma_free_from_pool(dev, vaddr, size))
+ return;
+
+- if (force_dma_unencrypted(dev))
+- set_memory_encrypted((unsigned long)vaddr, 1 << page_order);
+-
++ if (dma_set_encrypted(dev, vaddr, size))
++ return;
+ __dma_direct_free_pages(dev, page, size);
+ }
+
+diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
+index 4632b0f4f72eb..8a6cd53dbe8ce 100644
+--- a/kernel/dma/direct.h
++++ b/kernel/dma/direct.h
+@@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+
+ if (unlikely(is_swiotlb_buffer(dev, phys)))
+- swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
++ swiotlb_tbl_unmap_single(dev, phys, size, dir,
++ attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ }
+ #endif /* _KERNEL_DMA_DIRECT_H */
+diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
+index 8349a9f2c3453..9478eccd1c8e6 100644
+--- a/kernel/dma/mapping.c
++++ b/kernel/dma/mapping.c
+@@ -296,10 +296,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
+ if (WARN_ON_ONCE(!dev->dma_mask))
+ return DMA_MAPPING_ERROR;
+
+- /* Don't allow RAM to be mapped */
+- if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr))))
+- return DMA_MAPPING_ERROR;
+-
+ if (dma_map_direct(dev, ops))
+ addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
+ else if (ops->map_resource)
+diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
+index 5f84e6cdb78ea..4d40dcce7604b 100644
+--- a/kernel/dma/pool.c
++++ b/kernel/dma/pool.c
+@@ -203,7 +203,7 @@ static int __init dma_atomic_pool_init(void)
+ GFP_KERNEL);
+ if (!atomic_pool_kernel)
+ ret = -ENOMEM;
+- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
++ if (has_managed_dma()) {
+ atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
+ GFP_KERNEL | GFP_DMA);
+ if (!atomic_pool_dma)
+@@ -226,7 +226,7 @@ static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
+ if (prev == NULL) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
+ return atomic_pool_dma32;
+- if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
++ if (atomic_pool_dma && (gfp & GFP_DMA))
+ return atomic_pool_dma;
+ return atomic_pool_kernel;
+ }
+diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
+index b4526668072e7..27596f3b4aef3 100644
+--- a/kernel/dma/remap.c
++++ b/kernel/dma/remap.c
+@@ -43,13 +43,13 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
+ void *vaddr;
+ int i;
+
+- pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
++ pages = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+ for (i = 0; i < count; i++)
+ pages[i] = nth_page(page, i);
+ vaddr = vmap(pages, count, VM_DMA_COHERENT, prot);
+- kfree(pages);
++ kvfree(pages);
+
+ return vaddr;
+ }
+diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
+index 87c40517e8227..a9849670bdb54 100644
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -435,7 +435,10 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
+ }
+ }
+
+-#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
++static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
++{
++ return start + (idx << IO_TLB_SHIFT);
++}
+
+ /*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+@@ -459,7 +462,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
+ * allocate a buffer from that IO TLB pool.
+ */
+ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+- size_t alloc_size)
++ size_t alloc_size, unsigned int alloc_align_mask)
+ {
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ unsigned long boundary_mask = dma_get_seg_boundary(dev);
+@@ -483,6 +486,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
+ if (alloc_size >= PAGE_SIZE)
+ stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
++ stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
+
+ spin_lock_irqsave(&mem->lock, flags);
+ if (unlikely(nslots > mem->nslabs - mem->used))
+@@ -541,7 +545,8 @@ found:
+
+ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ size_t mapping_size, size_t alloc_size,
+- enum dma_data_direction dir, unsigned long attrs)
++ unsigned int alloc_align_mask, enum dma_data_direction dir,
++ unsigned long attrs)
+ {
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+@@ -549,7 +554,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ int index;
+ phys_addr_t tlb_addr;
+
+- if (!mem)
++ if (!mem || !mem->nslabs)
+ panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
+ if (mem_encrypt_active())
+@@ -561,7 +566,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+- index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset);
++ index = swiotlb_find_slots(dev, orig_addr,
++ alloc_size + offset, alloc_align_mask);
+ if (index == -1) {
+ if (!(attrs & DMA_ATTR_NO_WARN))
+ dev_warn_ratelimited(dev,
+@@ -578,9 +584,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ for (i = 0; i < nr_slots(alloc_size + offset); i++)
+ mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
+ tlb_addr = slot_addr(mem->start, index) + offset;
+- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+- (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
++ /*
++ * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
++ * to the tlb buffer, if we knew for sure the device will
++ * overwirte the entire current content. But we don't. Thus
++ * unconditional bounce may prevent leaking swiotlb content (i.e.
++ * kernel memory) to user-space.
++ */
++ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ return tlb_addr;
+ }
+
+@@ -675,7 +686,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
+ trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
+ swiotlb_force);
+
+- swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir,
++ swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
+ attrs);
+ if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
+ return DMA_MAPPING_ERROR;
+@@ -698,7 +709,18 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
+
+ size_t swiotlb_max_mapping_size(struct device *dev)
+ {
+- return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
++ int min_align_mask = dma_get_min_align_mask(dev);
++ int min_align = 0;
++
++ /*
++ * swiotlb_find_slots() skips slots according to
++ * min align mask. This affects max mapping size.
++ * Take it into acount here.
++ */
++ if (min_align_mask)
++ min_align = roundup(min_align_mask, IO_TLB_SIZE);
++
++ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
+ }
+
+ bool is_swiotlb_active(struct device *dev)
+@@ -759,7 +781,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
+ if (!mem)
+ return NULL;
+
+- index = swiotlb_find_slots(dev, 0, size);
++ index = swiotlb_find_slots(dev, 0, size, 0);
+ if (index == -1)
+ return NULL;
+
+diff --git a/kernel/entry/common.c b/kernel/entry/common.c
+index d5a61d565ad5d..e002bea6b4be3 100644
+--- a/kernel/entry/common.c
++++ b/kernel/entry/common.c
+@@ -124,7 +124,7 @@ static __always_inline void __exit_to_user_mode(void)
+ {
+ instrumentation_begin();
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+
+ user_enter_irqoff();
+@@ -187,7 +187,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ /* Check if any of the above work has queued a deferred wakeup */
+ tick_nohz_user_enter_prepare();
+
+- ti_work = READ_ONCE(current_thread_info()->flags);
++ ti_work = read_thread_flags();
+ }
+
+ /* Return the latest work state for arch_exit_to_user_mode() */
+@@ -196,13 +196,14 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+
+ static void exit_to_user_mode_prepare(struct pt_regs *regs)
+ {
+- unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
++ unsigned long ti_work;
+
+ lockdep_assert_irqs_disabled();
+
+ /* Flush pending rcuog wakeup before the last need_resched() check */
+ tick_nohz_user_enter_prepare();
+
++ ti_work = read_thread_flags();
+ if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
+ ti_work = exit_to_user_mode_loop(regs, ti_work);
+
+@@ -412,7 +413,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
+ instrumentation_begin();
+ /* Tell the tracer that IRET will enable interrupts */
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+ rcu_irq_exit();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+@@ -465,7 +466,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
+ ftrace_nmi_exit();
+ if (irq_state.lockdep) {
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ }
+ instrumentation_end();
+
+diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
+index 49972ee99aff6..96d476e06c777 100644
+--- a/kernel/entry/kvm.c
++++ b/kernel/entry/kvm.c
+@@ -26,7 +26,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
+ if (ret)
+ return ret;
+
+- ti_work = READ_ONCE(current_thread_info()->flags);
++ ti_work = read_thread_flags();
+ } while (ti_work & XFER_TO_GUEST_MODE_WORK || need_resched());
+ return 0;
+ }
+@@ -43,7 +43,7 @@ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu)
+ * disabled in the inner loop before going into guest mode. No need
+ * to disable interrupts here.
+ */
+- ti_work = READ_ONCE(current_thread_info()->flags);
++ ti_work = read_thread_flags();
+ if (!(ti_work & XFER_TO_GUEST_MODE_WORK))
+ return 0;
+
+diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
+index c240302f56e23..0b6379adff6bd 100644
+--- a/kernel/entry/syscall_user_dispatch.c
++++ b/kernel/entry/syscall_user_dispatch.c
+@@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs)
+ * access_ok() is performed once, at prctl time, when
+ * the selector is loaded by userspace.
+ */
+- if (unlikely(__get_user(state, sd->selector)))
+- do_exit(SIGSEGV);
++ if (unlikely(__get_user(state, sd->selector))) {
++ force_exit_sig(SIGSEGV);
++ return true;
++ }
+
+ if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW))
+ return false;
+
+- if (state != SYSCALL_DISPATCH_FILTER_BLOCK)
+- do_exit(SIGSYS);
++ if (state != SYSCALL_DISPATCH_FILTER_BLOCK) {
++ force_exit_sig(SIGSYS);
++ return true;
++ }
+ }
+
+ sd->on_dispatch = true;
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index f23ca260307f0..c7f13da672c9d 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -54,6 +54,7 @@
+ #include <linux/highmem.h>
+ #include <linux/pgtable.h>
+ #include <linux/buildid.h>
++#include <linux/task_work.h>
+
+ #include "internal.h"
+
+@@ -674,6 +675,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
+ WRITE_ONCE(event->state, state);
+ }
+
++/*
++ * UP store-release, load-acquire
++ */
++
++#define __store_release(ptr, val) \
++do { \
++ barrier(); \
++ WRITE_ONCE(*(ptr), (val)); \
++} while (0)
++
++#define __load_acquire(ptr) \
++({ \
++ __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \
++ barrier(); \
++ ___p; \
++})
++
+ #ifdef CONFIG_CGROUP_PERF
+
+ static inline bool
+@@ -719,34 +737,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
+ return t->time;
+ }
+
+-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
+ {
+- struct perf_cgroup_info *info;
+- u64 now;
+-
+- now = perf_clock();
++ struct perf_cgroup_info *t;
+
+- info = this_cpu_ptr(cgrp->info);
++ t = per_cpu_ptr(event->cgrp->info, event->cpu);
++ if (!__load_acquire(&t->active))
++ return t->time;
++ now += READ_ONCE(t->timeoffset);
++ return now;
++}
+
+- info->time += now - info->timestamp;
++static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
++{
++ if (adv)
++ info->time += now - info->timestamp;
+ info->timestamp = now;
++ /*
++ * see update_context_time()
++ */
++ WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
+ }
+
+-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
+ {
+ struct perf_cgroup *cgrp = cpuctx->cgrp;
+ struct cgroup_subsys_state *css;
++ struct perf_cgroup_info *info;
+
+ if (cgrp) {
++ u64 now = perf_clock();
++
+ for (css = &cgrp->css; css; css = css->parent) {
+ cgrp = container_of(css, struct perf_cgroup, css);
+- __update_cgrp_time(cgrp);
++ info = this_cpu_ptr(cgrp->info);
++
++ __update_cgrp_time(info, now, true);
++ if (final)
++ __store_release(&info->active, 0);
+ }
+ }
+ }
+
+ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ {
++ struct perf_cgroup_info *info;
+ struct perf_cgroup *cgrp;
+
+ /*
+@@ -760,8 +795,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ /*
+ * Do not update time when cgroup is not active
+ */
+- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+- __update_cgrp_time(event->cgrp);
++ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
++ info = this_cpu_ptr(event->cgrp->info);
++ __update_cgrp_time(info, perf_clock(), true);
++ }
+ }
+
+ static inline void
+@@ -785,7 +822,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
+ for (css = &cgrp->css; css; css = css->parent) {
+ cgrp = container_of(css, struct perf_cgroup, css);
+ info = this_cpu_ptr(cgrp->info);
+- info->timestamp = ctx->timestamp;
++ __update_cgrp_time(info, ctx->timestamp, false);
++ __store_release(&info->active, 1);
+ }
+ }
+
+@@ -802,7 +840,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
+ */
+ static void perf_cgroup_switch(struct task_struct *task, int mode)
+ {
+- struct perf_cpu_context *cpuctx;
++ struct perf_cpu_context *cpuctx, *tmp;
+ struct list_head *list;
+ unsigned long flags;
+
+@@ -813,7 +851,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
+ local_irq_save(flags);
+
+ list = this_cpu_ptr(&cgrp_cpuctx_list);
+- list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
++ list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
+ WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
+
+ perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+@@ -981,14 +1019,6 @@ out:
+ return ret;
+ }
+
+-static inline void
+-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+-{
+- struct perf_cgroup_info *t;
+- t = per_cpu_ptr(event->cgrp->info, event->cpu);
+- event->shadow_ctx_time = now - t->timestamp;
+-}
+-
+ static inline void
+ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
+ {
+@@ -1066,7 +1096,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ {
+ }
+
+-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
++ bool final)
+ {
+ }
+
+@@ -1098,12 +1129,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
+ {
+ }
+
+-static inline void
+-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
++static inline u64 perf_cgroup_event_time(struct perf_event *event)
+ {
++ return 0;
+ }
+
+-static inline u64 perf_cgroup_event_time(struct perf_event *event)
++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
+ {
+ return 0;
+ }
+@@ -1193,6 +1224,11 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
+ return 0;
+ }
+
++static int perf_mux_hrtimer_restart_ipi(void *arg)
++{
++ return perf_mux_hrtimer_restart(arg);
++}
++
+ void perf_pmu_disable(struct pmu *pmu)
+ {
+ int *count = this_cpu_ptr(pmu->pmu_disable_count);
+@@ -1525,22 +1561,59 @@ static void perf_unpin_context(struct perf_event_context *ctx)
+ /*
+ * Update the record of the current time in a context.
+ */
+-static void update_context_time(struct perf_event_context *ctx)
++static void __update_context_time(struct perf_event_context *ctx, bool adv)
+ {
+ u64 now = perf_clock();
+
+- ctx->time += now - ctx->timestamp;
++ if (adv)
++ ctx->time += now - ctx->timestamp;
+ ctx->timestamp = now;
++
++ /*
++ * The above: time' = time + (now - timestamp), can be re-arranged
++ * into: time` = now + (time - timestamp), which gives a single value
++ * offset to compute future time without locks on.
++ *
++ * See perf_event_time_now(), which can be used from NMI context where
++ * it's (obviously) not possible to acquire ctx->lock in order to read
++ * both the above values in a consistent manner.
++ */
++ WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
++}
++
++static void update_context_time(struct perf_event_context *ctx)
++{
++ __update_context_time(ctx, true);
+ }
+
+ static u64 perf_event_time(struct perf_event *event)
+ {
+ struct perf_event_context *ctx = event->ctx;
+
++ if (unlikely(!ctx))
++ return 0;
++
+ if (is_cgroup_event(event))
+ return perf_cgroup_event_time(event);
+
+- return ctx ? ctx->time : 0;
++ return ctx->time;
++}
++
++static u64 perf_event_time_now(struct perf_event *event, u64 now)
++{
++ struct perf_event_context *ctx = event->ctx;
++
++ if (unlikely(!ctx))
++ return 0;
++
++ if (is_cgroup_event(event))
++ return perf_cgroup_event_time_now(event, now);
++
++ if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
++ return ctx->time;
++
++ now += READ_ONCE(ctx->timeoffset);
++ return now;
+ }
+
+ static enum event_type_t get_event_type(struct perf_event *event)
+@@ -2184,7 +2257,7 @@ static void perf_group_detach(struct perf_event *event)
+ /* Inherit group flags from the previous leader */
+ sibling->group_caps = event->group_caps;
+
+- if (!RB_EMPTY_NODE(&event->group_node)) {
++ if (sibling->attach_state & PERF_ATTACH_CONTEXT) {
+ add_event_to_groups(sibling, event->ctx);
+
+ if (sibling->state == PERF_EVENT_STATE_ACTIVE)
+@@ -2285,11 +2358,27 @@ event_sched_out(struct perf_event *event,
+ event->pmu->del(event, 0);
+ event->oncpu = -1;
+
+- if (READ_ONCE(event->pending_disable) >= 0) {
+- WRITE_ONCE(event->pending_disable, -1);
++ if (event->pending_disable) {
++ event->pending_disable = 0;
+ perf_cgroup_event_disable(event, ctx);
+ state = PERF_EVENT_STATE_OFF;
+ }
++
++ if (event->pending_sigtrap) {
++ bool dec = true;
++
++ event->pending_sigtrap = 0;
++ if (state != PERF_EVENT_STATE_OFF &&
++ !event->pending_work) {
++ event->pending_work = 1;
++ dec = false;
++ WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
++ task_work_add(current, &event->pending_task, TWA_RESUME);
++ }
++ if (dec)
++ local_dec(&event->ctx->nr_pending);
++ }
++
+ perf_event_set_state(event, state);
+
+ if (!is_software_event(event))
+@@ -2329,6 +2418,7 @@ group_sched_out(struct perf_event *group_event,
+
+ #define DETACH_GROUP 0x01UL
+ #define DETACH_CHILD 0x02UL
++#define DETACH_DEAD 0x04UL
+
+ /*
+ * Cross CPU call to remove a performance event
+@@ -2346,17 +2436,28 @@ __perf_remove_from_context(struct perf_event *event,
+
+ if (ctx->is_active & EVENT_TIME) {
+ update_context_time(ctx);
+- update_cgrp_time_from_cpuctx(cpuctx);
++ update_cgrp_time_from_cpuctx(cpuctx, false);
+ }
+
++ /*
++ * Ensure event_sched_out() switches to OFF, at the very least
++ * this avoids raising perf_pending_task() at this time.
++ */
++ if (flags & DETACH_DEAD)
++ event->pending_disable = 1;
+ event_sched_out(event, cpuctx, ctx);
+ if (flags & DETACH_GROUP)
+ perf_group_detach(event);
+ if (flags & DETACH_CHILD)
+ perf_child_detach(event);
+ list_del_event(event, ctx);
++ if (flags & DETACH_DEAD)
++ event->state = PERF_EVENT_STATE_DEAD;
+
+ if (!ctx->nr_events && ctx->is_active) {
++ if (ctx == &cpuctx->ctx)
++ update_cgrp_time_from_cpuctx(cpuctx, true);
++
+ ctx->is_active = 0;
+ ctx->rotate_necessary = 0;
+ if (ctx->task) {
+@@ -2388,7 +2489,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
+ * event_function_call() user.
+ */
+ raw_spin_lock_irq(&ctx->lock);
+- if (!ctx->is_active) {
++ /*
++ * Cgroup events are per-cpu events, and must IPI because of
++ * cgrp_cpuctx_list.
++ */
++ if (!ctx->is_active && !is_cgroup_event(event)) {
+ __perf_remove_from_context(event, __get_cpu_context(ctx),
+ ctx, (void *)flags);
+ raw_spin_unlock_irq(&ctx->lock);
+@@ -2434,7 +2539,7 @@ static void __perf_event_disable(struct perf_event *event,
+ * hold the top-level event's child_mutex, so any descendant that
+ * goes to exit will block in perf_event_exit_event().
+ *
+- * When called from perf_pending_event it's OK because event->ctx
++ * When called from perf_pending_irq it's OK because event->ctx
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_event_task_sched_out for this context.
+ */
+@@ -2473,43 +2578,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
+
+ void perf_event_disable_inatomic(struct perf_event *event)
+ {
+- WRITE_ONCE(event->pending_disable, smp_processor_id());
+- /* can fail, see perf_pending_event_disable() */
+- irq_work_queue(&event->pending);
+-}
+-
+-static void perf_set_shadow_time(struct perf_event *event,
+- struct perf_event_context *ctx)
+-{
+- /*
+- * use the correct time source for the time snapshot
+- *
+- * We could get by without this by leveraging the
+- * fact that to get to this function, the caller
+- * has most likely already called update_context_time()
+- * and update_cgrp_time_xx() and thus both timestamp
+- * are identical (or very close). Given that tstamp is,
+- * already adjusted for cgroup, we could say that:
+- * tstamp - ctx->timestamp
+- * is equivalent to
+- * tstamp - cgrp->timestamp.
+- *
+- * Then, in perf_output_read(), the calculation would
+- * work with no changes because:
+- * - event is guaranteed scheduled in
+- * - no scheduled out in between
+- * - thus the timestamp would be the same
+- *
+- * But this is a bit hairy.
+- *
+- * So instead, we have an explicit cgroup call to remain
+- * within the time source all along. We believe it
+- * is cleaner and simpler to understand.
+- */
+- if (is_cgroup_event(event))
+- perf_cgroup_set_shadow_time(event, event->tstamp);
+- else
+- event->shadow_ctx_time = event->tstamp - ctx->timestamp;
++ event->pending_disable = 1;
++ irq_work_queue(&event->pending_irq);
+ }
+
+ #define MAX_INTERRUPTS (~0ULL)
+@@ -2552,8 +2622,6 @@ event_sched_in(struct perf_event *event,
+
+ perf_pmu_disable(event->pmu);
+
+- perf_set_shadow_time(event, ctx);
+-
+ perf_log_itrace_start(event);
+
+ if (event->pmu->add(event, PERF_EF_START)) {
+@@ -2857,11 +2925,14 @@ perf_install_in_context(struct perf_event_context *ctx,
+ * perf_event_attr::disabled events will not run and can be initialized
+ * without IPI. Except when this is the first event for the context, in
+ * that case we need the magic of the IPI to set ctx->is_active.
++ * Similarly, cgroup events for the context also needs the IPI to
++ * manipulate the cgrp_cpuctx_list.
+ *
+ * The IOC_ENABLE that is sure to follow the creation of a disabled
+ * event will issue the IPI and reprogram the hardware.
+ */
+- if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
++ if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF &&
++ ctx->nr_events && !is_cgroup_event(event)) {
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->task == TASK_TOMBSTONE) {
+ raw_spin_unlock_irq(&ctx->lock);
+@@ -3193,6 +3264,15 @@ static int perf_event_modify_breakpoint(struct perf_event *bp,
+ return err;
+ }
+
++/*
++ * Copy event-type-independent attributes that may be modified.
++ */
++static void perf_event_modify_copy_attr(struct perf_event_attr *to,
++ const struct perf_event_attr *from)
++{
++ to->sig_data = from->sig_data;
++}
++
+ static int perf_event_modify_attr(struct perf_event *event,
+ struct perf_event_attr *attr)
+ {
+@@ -3215,10 +3295,17 @@ static int perf_event_modify_attr(struct perf_event *event,
+ WARN_ON_ONCE(event->ctx->parent_ctx);
+
+ mutex_lock(&event->child_mutex);
++ /*
++ * Event-type-independent attributes must be copied before event-type
++ * modification, which will validate that final attributes match the
++ * source attributes after all relevant attributes have been copied.
++ */
++ perf_event_modify_copy_attr(&event->attr, attr);
+ err = func(event, attr);
+ if (err)
+ goto out;
+ list_for_each_entry(child, &event->child_list, child_list) {
++ perf_event_modify_copy_attr(&child->attr, attr);
+ err = func(child, attr);
+ if (err)
+ goto out;
+@@ -3247,16 +3334,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
+ return;
+ }
+
+- ctx->is_active &= ~event_type;
+- if (!(ctx->is_active & EVENT_ALL))
+- ctx->is_active = 0;
+-
+- if (ctx->task) {
+- WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+- if (!ctx->is_active)
+- cpuctx->task_ctx = NULL;
+- }
+-
+ /*
+ * Always update time if it was set; not only when it changes.
+ * Otherwise we can 'forget' to update time for any but the last
+@@ -3270,7 +3347,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
+ if (is_active & EVENT_TIME) {
+ /* update (and stop) ctx time */
+ update_context_time(ctx);
+- update_cgrp_time_from_cpuctx(cpuctx);
++ update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
++ /*
++ * CPU-release for the below ->is_active store,
++ * see __load_acquire() in perf_event_time_now()
++ */
++ barrier();
++ }
++
++ ctx->is_active &= ~event_type;
++ if (!(ctx->is_active & EVENT_ALL))
++ ctx->is_active = 0;
++
++ if (ctx->task) {
++ WARN_ON_ONCE(cpuctx->task_ctx != ctx);
++ if (!ctx->is_active)
++ cpuctx->task_ctx = NULL;
+ }
+
+ is_active ^= ctx->is_active; /* changed bits */
+@@ -3444,11 +3536,23 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
+ raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+ if (context_equiv(ctx, next_ctx)) {
+
++ perf_pmu_disable(pmu);
++
++ /* PMIs are disabled; ctx->nr_pending is stable. */
++ if (local_read(&ctx->nr_pending) ||
++ local_read(&next_ctx->nr_pending)) {
++ /*
++ * Must not swap out ctx when there's pending
++ * events that rely on the ctx->task relation.
++ */
++ raw_spin_unlock(&next_ctx->lock);
++ rcu_read_unlock();
++ goto inside_switch;
++ }
++
+ WRITE_ONCE(ctx->task, next);
+ WRITE_ONCE(next_ctx->task, task);
+
+- perf_pmu_disable(pmu);
+-
+ if (cpuctx->sched_cb_usage && pmu->sched_task)
+ pmu->sched_task(ctx, false);
+
+@@ -3489,6 +3593,7 @@ unlock:
+ raw_spin_lock(&ctx->lock);
+ perf_pmu_disable(pmu);
+
++inside_switch:
+ if (cpuctx->sched_cb_usage && pmu->sched_task)
+ pmu->sched_task(ctx, false);
+ task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
+@@ -3707,13 +3812,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
+ return 0;
+ }
+
++/*
++ * Because the userpage is strictly per-event (there is no concept of context,
++ * so there cannot be a context indirection), every userpage must be updated
++ * when context time starts :-(
++ *
++ * IOW, we must not miss EVENT_TIME edges.
++ */
+ static inline bool event_update_userpage(struct perf_event *event)
+ {
+ if (likely(!atomic_read(&event->mmap_count)))
+ return false;
+
+ perf_event_update_time(event);
+- perf_set_shadow_time(event, event->ctx);
+ perf_event_update_userpage(event);
+
+ return true;
+@@ -3797,13 +3908,23 @@ ctx_sched_in(struct perf_event_context *ctx,
+ struct task_struct *task)
+ {
+ int is_active = ctx->is_active;
+- u64 now;
+
+ lockdep_assert_held(&ctx->lock);
+
+ if (likely(!ctx->nr_events))
+ return;
+
++ if (!(is_active & EVENT_TIME)) {
++ /* start ctx time */
++ __update_context_time(ctx, false);
++ perf_cgroup_set_timestamp(task, ctx);
++ /*
++ * CPU-release for the below ->is_active store,
++ * see __load_acquire() in perf_event_time_now()
++ */
++ barrier();
++ }
++
+ ctx->is_active |= (event_type | EVENT_TIME);
+ if (ctx->task) {
+ if (!is_active)
+@@ -3814,13 +3935,6 @@ ctx_sched_in(struct perf_event_context *ctx,
+
+ is_active ^= ctx->is_active; /* changed bits */
+
+- if (is_active & EVENT_TIME) {
+- /* start ctx time */
+- now = perf_clock();
+- ctx->timestamp = now;
+- perf_cgroup_set_timestamp(task, ctx);
+- }
+-
+ /*
+ * First go through the list and put on any pinned groups
+ * in order to give them the best chance of going on.
+@@ -4414,6 +4528,18 @@ static inline u64 perf_event_count(struct perf_event *event)
+ return local64_read(&event->count) + atomic64_read(&event->child_count);
+ }
+
++static void calc_timer_values(struct perf_event *event,
++ u64 *now,
++ u64 *enabled,
++ u64 *running)
++{
++ u64 ctx_time;
++
++ *now = perf_clock();
++ ctx_time = perf_event_time_now(event, *now);
++ __perf_update_times(event, ctx_time, enabled, running);
++}
++
+ /*
+ * NMI-safe method to read a local event, that is an event that
+ * is:
+@@ -4473,10 +4599,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
+
+ *value = local64_read(&event->count);
+ if (enabled || running) {
+- u64 now = event->shadow_ctx_time + perf_clock();
+- u64 __enabled, __running;
++ u64 __enabled, __running, __now;;
+
+- __perf_update_times(event, now, &__enabled, &__running);
++ calc_timer_values(event, &__now, &__enabled, &__running);
+ if (enabled)
+ *enabled = __enabled;
+ if (running)
+@@ -4948,7 +5073,7 @@ static void perf_addr_filters_splice(struct perf_event *event,
+
+ static void _free_event(struct perf_event *event)
+ {
+- irq_work_sync(&event->pending);
++ irq_work_sync(&event->pending_irq);
+
+ unaccount_event(event);
+
+@@ -5102,9 +5227,7 @@ int perf_event_release_kernel(struct perf_event *event)
+
+ ctx = perf_event_ctx_lock(event);
+ WARN_ON_ONCE(ctx->parent_ctx);
+- perf_remove_from_context(event, DETACH_GROUP);
+
+- raw_spin_lock_irq(&ctx->lock);
+ /*
+ * Mark this event as STATE_DEAD, there is no external reference to it
+ * anymore.
+@@ -5116,8 +5239,7 @@ int perf_event_release_kernel(struct perf_event *event)
+ * Thus this guarantees that we will in fact observe and kill _ALL_
+ * child events.
+ */
+- event->state = PERF_EVENT_STATE_DEAD;
+- raw_spin_unlock_irq(&ctx->lock);
++ perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
+
+ perf_event_ctx_unlock(event, ctx);
+
+@@ -5798,18 +5920,6 @@ static int perf_event_index(struct perf_event *event)
+ return event->pmu->event_idx(event);
+ }
+
+-static void calc_timer_values(struct perf_event *event,
+- u64 *now,
+- u64 *enabled,
+- u64 *running)
+-{
+- u64 ctx_time;
+-
+- *now = perf_clock();
+- ctx_time = event->shadow_ctx_time + *now;
+- __perf_update_times(event, ctx_time, enabled, running);
+-}
+-
+ static void perf_event_init_userpage(struct perf_event *event)
+ {
+ struct perf_event_mmap_page *userpg;
+@@ -5934,6 +6044,8 @@ static void ring_buffer_attach(struct perf_event *event,
+ struct perf_buffer *old_rb = NULL;
+ unsigned long flags;
+
++ WARN_ON_ONCE(event->parent);
++
+ if (event->rb) {
+ /*
+ * Should be impossible, we set this when removing
+@@ -5991,6 +6103,9 @@ static void ring_buffer_wakeup(struct perf_event *event)
+ {
+ struct perf_buffer *rb;
+
++ if (event->parent)
++ event = event->parent;
++
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (rb) {
+@@ -6004,6 +6119,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event)
+ {
+ struct perf_buffer *rb;
+
++ if (event->parent)
++ event = event->parent;
++
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (rb) {
+@@ -6270,17 +6388,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+ again:
+ mutex_lock(&event->mmap_mutex);
+ if (event->rb) {
+- if (event->rb->nr_pages != nr_pages) {
++ if (data_page_nr(event->rb) != nr_pages) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
+ /*
+- * Raced against perf_mmap_close() through
+- * perf_event_set_output(). Try again, hope for better
+- * luck.
++ * Raced against perf_mmap_close(); remove the
++ * event and try again.
+ */
++ ring_buffer_attach(event, NULL);
+ mutex_unlock(&event->mmap_mutex);
+ goto again;
+ }
+@@ -6349,7 +6467,6 @@ accounting:
+ ring_buffer_attach(event, rb);
+
+ perf_event_update_time(event);
+- perf_set_shadow_time(event, event->ctx);
+ perf_event_init_userpage(event);
+ perf_event_update_userpage(event);
+ } else {
+@@ -6447,32 +6564,43 @@ static void perf_sigtrap(struct perf_event *event)
+ return;
+
+ /*
+- * perf_pending_event() can race with the task exiting.
++ * Both perf_pending_task() and perf_pending_irq() can race with the
++ * task exiting.
+ */
+ if (current->flags & PF_EXITING)
+ return;
+
+- force_sig_perf((void __user *)event->pending_addr,
+- event->attr.type, event->attr.sig_data);
++ send_sig_perf((void __user *)event->pending_addr,
++ event->attr.type, event->attr.sig_data);
+ }
+
+-static void perf_pending_event_disable(struct perf_event *event)
++/*
++ * Deliver the pending work in-event-context or follow the context.
++ */
++static void __perf_pending_irq(struct perf_event *event)
+ {
+- int cpu = READ_ONCE(event->pending_disable);
++ int cpu = READ_ONCE(event->oncpu);
+
++ /*
++ * If the event isn't running; we done. event_sched_out() will have
++ * taken care of things.
++ */
+ if (cpu < 0)
+ return;
+
++ /*
++ * Yay, we hit home and are in the context of the event.
++ */
+ if (cpu == smp_processor_id()) {
+- WRITE_ONCE(event->pending_disable, -1);
+-
+- if (event->attr.sigtrap) {
++ if (event->pending_sigtrap) {
++ event->pending_sigtrap = 0;
+ perf_sigtrap(event);
+- atomic_set_release(&event->event_limit, 1); /* rearm event */
+- return;
++ local_dec(&event->ctx->nr_pending);
++ }
++ if (event->pending_disable) {
++ event->pending_disable = 0;
++ perf_event_disable_local(event);
+ }
+-
+- perf_event_disable_local(event);
+ return;
+ }
+
+@@ -6492,52 +6620,88 @@ static void perf_pending_event_disable(struct perf_event *event)
+ * irq_work_queue(); // FAILS
+ *
+ * irq_work_run()
+- * perf_pending_event()
++ * perf_pending_irq()
+ *
+ * But the event runs on CPU-B and wants disabling there.
+ */
+- irq_work_queue_on(&event->pending, cpu);
++ irq_work_queue_on(&event->pending_irq, cpu);
+ }
+
+-static void perf_pending_event(struct irq_work *entry)
++static void perf_pending_irq(struct irq_work *entry)
+ {
+- struct perf_event *event = container_of(entry, struct perf_event, pending);
++ struct perf_event *event = container_of(entry, struct perf_event, pending_irq);
+ int rctx;
+
+- rctx = perf_swevent_get_recursion_context();
+ /*
+ * If we 'fail' here, that's OK, it means recursion is already disabled
+ * and we won't recurse 'further'.
+ */
++ rctx = perf_swevent_get_recursion_context();
+
+- perf_pending_event_disable(event);
+-
++ /*
++ * The wakeup isn't bound to the context of the event -- it can happen
++ * irrespective of where the event is.
++ */
+ if (event->pending_wakeup) {
+ event->pending_wakeup = 0;
+ perf_event_wakeup(event);
+ }
+
++ __perf_pending_irq(event);
++
+ if (rctx >= 0)
+ perf_swevent_put_recursion_context(rctx);
+ }
+
+-/*
++static void perf_pending_task(struct callback_head *head)
++{
++ struct perf_event *event = container_of(head, struct perf_event, pending_task);
++ int rctx;
++
++ /*
++ * If we 'fail' here, that's OK, it means recursion is already disabled
++ * and we won't recurse 'further'.
++ */
++ preempt_disable_notrace();
++ rctx = perf_swevent_get_recursion_context();
++
++ if (event->pending_work) {
++ event->pending_work = 0;
++ perf_sigtrap(event);
++ local_dec(&event->ctx->nr_pending);
++ }
++
++ if (rctx >= 0)
++ perf_swevent_put_recursion_context(rctx);
++ preempt_enable_notrace();
++
++ put_event(event);
++}
++
++/*
+ * We assume there is only KVM supporting the callbacks.
+ * Later on, we might change it to a list if there is
+ * another virtualization implementation supporting the callbacks.
+ */
+-struct perf_guest_info_callbacks *perf_guest_cbs;
++struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
+
+ int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+ {
+- perf_guest_cbs = cbs;
++ if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs)))
++ return -EBUSY;
++
++ rcu_assign_pointer(perf_guest_cbs, cbs);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
+
+ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+ {
+- perf_guest_cbs = NULL;
++ if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs))
++ return -EINVAL;
++
++ rcu_assign_pointer(perf_guest_cbs, NULL);
++ synchronize_rcu();
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
+@@ -6696,7 +6860,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event,
+ if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
+ goto out;
+
+- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
++ rb = ring_buffer_get(sampler);
+ if (!rb)
+ goto out;
+
+@@ -6762,7 +6926,7 @@ static void perf_aux_sample_output(struct perf_event *event,
+ if (WARN_ON_ONCE(!sampler || !data->aux_size))
+ return;
+
+- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
++ rb = ring_buffer_get(sampler);
+ if (!rb)
+ return;
+
+@@ -7154,7 +7318,6 @@ void perf_output_sample(struct perf_output_handle *handle,
+ static u64 perf_virt_to_phys(u64 virt)
+ {
+ u64 phys_addr = 0;
+- struct page *p = NULL;
+
+ if (!virt)
+ return 0;
+@@ -7173,14 +7336,15 @@ static u64 perf_virt_to_phys(u64 virt)
+ * If failed, leave phys_addr as 0.
+ */
+ if (current->mm != NULL) {
++ struct page *p;
++
+ pagefault_disable();
+- if (get_user_page_fast_only(virt, 0, &p))
++ if (get_user_page_fast_only(virt, 0, &p)) {
+ phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
++ put_page(p);
++ }
+ pagefault_enable();
+ }
+-
+- if (p)
+- put_page(p);
+ }
+
+ return phys_addr;
+@@ -8897,7 +9061,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data)
+
+ perf_event_header__init_id(&bpf_event->event_id.header,
+ &sample, event);
+- ret = perf_output_begin(&handle, data, event,
++ ret = perf_output_begin(&handle, &sample, event,
+ bpf_event->event_id.header.size);
+ if (ret)
+ return;
+@@ -8927,7 +9091,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
+ PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)subprog->bpf_func,
+ subprog->jited_len, unregister,
+- prog->aux->ksym.name);
++ subprog->aux->ksym.name);
+ }
+ }
+ }
+@@ -9112,8 +9276,8 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle)
+ hwc->interrupts = 1;
+ } else {
+ hwc->interrupts++;
+- if (unlikely(throttle
+- && hwc->interrupts >= max_samples_per_tick)) {
++ if (unlikely(throttle &&
++ hwc->interrupts > max_samples_per_tick)) {
+ __this_cpu_inc(perf_throttled_count);
+ tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
+ hwc->interrupts = MAX_INTERRUPTS;
+@@ -9145,8 +9309,8 @@ int perf_event_account_interrupt(struct perf_event *event)
+ */
+
+ static int __perf_event_overflow(struct perf_event *event,
+- int throttle, struct perf_sample_data *data,
+- struct pt_regs *regs)
++ int throttle, struct perf_sample_data *data,
++ struct pt_regs *regs)
+ {
+ int events = atomic_read(&event->event_limit);
+ int ret = 0;
+@@ -9169,24 +9333,49 @@ static int __perf_event_overflow(struct perf_event *event,
+ if (events && atomic_dec_and_test(&event->event_limit)) {
+ ret = 1;
+ event->pending_kill = POLL_HUP;
+- event->pending_addr = data->addr;
+-
+ perf_event_disable_inatomic(event);
+ }
+
++ if (event->attr.sigtrap) {
++ unsigned int pending_id = 1;
++
++ if (regs)
++ pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
++ if (!event->pending_sigtrap) {
++ event->pending_sigtrap = pending_id;
++ local_inc(&event->ctx->nr_pending);
++ } else if (event->attr.exclude_kernel) {
++ /*
++ * Should not be able to return to user space without
++ * consuming pending_sigtrap; with exceptions:
++ *
++ * 1. Where !exclude_kernel, events can overflow again
++ * in the kernel without returning to user space.
++ *
++ * 2. Events that can overflow again before the IRQ-
++ * work without user space progress (e.g. hrtimer).
++ * To approximate progress (with false negatives),
++ * check 32-bit hash of the current IP.
++ */
++ WARN_ON_ONCE(event->pending_sigtrap != pending_id);
++ }
++ event->pending_addr = data->addr;
++ irq_work_queue(&event->pending_irq);
++ }
++
+ READ_ONCE(event->overflow_handler)(event, data, regs);
+
+ if (*perf_event_fasync(event) && event->pending_kill) {
+ event->pending_wakeup = 1;
+- irq_work_queue(&event->pending);
++ irq_work_queue(&event->pending_irq);
+ }
+
+ return ret;
+ }
+
+ int perf_event_overflow(struct perf_event *event,
+- struct perf_sample_data *data,
+- struct pt_regs *regs)
++ struct perf_sample_data *data,
++ struct pt_regs *regs)
+ {
+ return __perf_event_overflow(event, 1, data, regs);
+ }
+@@ -9729,6 +9918,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
+ continue;
+ if (event->attr.config != entry->type)
+ continue;
++ /* Cannot deliver synchronous signal to other task. */
++ if (event->attr.sigtrap)
++ continue;
+ if (perf_tp_event_match(event, &data, regs))
+ perf_swevent_event(event, count, &data, regs);
+ }
+@@ -10443,8 +10635,11 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
+ }
+
+ /* ready to consume more filters */
++ kfree(filename);
++ filename = NULL;
+ state = IF_STATE_ACTION;
+ filter = NULL;
++ kernel = 0;
+ }
+ }
+
+@@ -10947,8 +11142,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
+
+- cpu_function_call(cpu,
+- (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
++ cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpuctx);
+ }
+ cpus_read_unlock();
+ mutex_unlock(&mux_interval_mutex);
+@@ -10985,13 +11179,15 @@ static int pmu_dev_alloc(struct pmu *pmu)
+
+ pmu->dev->groups = pmu->attr_groups;
+ device_initialize(pmu->dev);
+- ret = dev_set_name(pmu->dev, "%s", pmu->name);
+- if (ret)
+- goto free_dev;
+
+ dev_set_drvdata(pmu->dev, pmu);
+ pmu->dev->bus = &pmu_bus;
+ pmu->dev->release = pmu_dev_release;
++
++ ret = dev_set_name(pmu->dev, "%s", pmu->name);
++ if (ret)
++ goto free_dev;
++
+ ret = device_add(pmu->dev);
+ if (ret)
+ goto free_dev;
+@@ -11486,8 +11682,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+
+
+ init_waitqueue_head(&event->waitq);
+- event->pending_disable = -1;
+- init_irq_work(&event->pending, perf_pending_event);
++ init_irq_work(&event->pending_irq, perf_pending_irq);
++ init_task_work(&event->pending_task, perf_pending_task);
+
+ mutex_init(&event->mmap_mutex);
+ raw_spin_lock_init(&event->addr_filters.lock);
+@@ -11506,8 +11702,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+
+ event->state = PERF_EVENT_STATE_INACTIVE;
+
+- if (event->attr.sigtrap)
+- atomic_set(&event->event_limit, 1);
++ if (parent_event)
++ event->event_caps = parent_event->event_caps;
+
+ if (task) {
+ event->attach_state = PERF_ATTACH_TASK;
+@@ -11799,14 +11995,25 @@ err_size:
+ goto out;
+ }
+
++static void mutex_lock_double(struct mutex *a, struct mutex *b)
++{
++ if (b < a)
++ swap(a, b);
++
++ mutex_lock(a);
++ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
++}
++
+ static int
+ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+ {
+ struct perf_buffer *rb = NULL;
+ int ret = -EINVAL;
+
+- if (!output_event)
++ if (!output_event) {
++ mutex_lock(&event->mmap_mutex);
+ goto set;
++ }
+
+ /* don't allow circular references */
+ if (event == output_event)
+@@ -11821,7 +12028,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+ /*
+ * If its not a per-cpu rb, it must be the same task.
+ */
+- if (output_event->cpu == -1 && output_event->ctx != event->ctx)
++ if (output_event->cpu == -1 && output_event->hw.target != event->hw.target)
+ goto out;
+
+ /*
+@@ -11844,8 +12051,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+ event->pmu != output_event->pmu)
+ goto out;
+
++ /*
++ * Hold both mmap_mutex to serialize against perf_mmap_close(). Since
++ * output_event is already on rb->event_list, and the list iteration
++ * restarts after every removal, it is guaranteed this new event is
++ * observed *OR* if output_event is already removed, it's guaranteed we
++ * observe !rb->mmap_count.
++ */
++ mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
+ set:
+- mutex_lock(&event->mmap_mutex);
+ /* Can't redirect output if we've got an active mmap() */
+ if (atomic_read(&event->mmap_count))
+ goto unlock;
+@@ -11855,6 +12069,12 @@ set:
+ rb = ring_buffer_get(output_event);
+ if (!rb)
+ goto unlock;
++
++ /* did we race against perf_mmap_close() */
++ if (!atomic_read(&rb->mmap_count)) {
++ ring_buffer_put(rb);
++ goto unlock;
++ }
+ }
+
+ ring_buffer_attach(event, rb);
+@@ -11862,20 +12082,13 @@ set:
+ ret = 0;
+ unlock:
+ mutex_unlock(&event->mmap_mutex);
++ if (output_event)
++ mutex_unlock(&output_event->mmap_mutex);
+
+ out:
+ return ret;
+ }
+
+-static void mutex_lock_double(struct mutex *a, struct mutex *b)
+-{
+- if (b < a)
+- swap(a, b);
+-
+- mutex_lock(a);
+- mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+-}
+-
+ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
+ {
+ bool nmi_safe = false;
+@@ -12006,12 +12219,12 @@ SYSCALL_DEFINE5(perf_event_open,
+ if (flags & ~PERF_FLAG_ALL)
+ return -EINVAL;
+
+- /* Do we allow access to perf_event_open(2) ? */
+- err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
++ err = perf_copy_attr(attr_uptr, &attr);
+ if (err)
+ return err;
+
+- err = perf_copy_attr(attr_uptr, &attr);
++ /* Do we allow access to perf_event_open(2) ? */
++ err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
+ if (err)
+ return err;
+
+@@ -12190,6 +12403,9 @@ SYSCALL_DEFINE5(perf_event_open,
+ * Do not allow to attach to a group in a different task
+ * or CPU context. If we're moving SW events, we'll fix
+ * this up later, so allow that.
++ *
++ * Racy, not holding group_leader->ctx->mutex, see comment with
++ * perf_event_ctx_lock().
+ */
+ if (!move_group && group_leader->ctx != ctx)
+ goto err_context;
+@@ -12255,6 +12471,7 @@ SYSCALL_DEFINE5(perf_event_open,
+ } else {
+ perf_event_ctx_unlock(group_leader, gctx);
+ move_group = 0;
++ goto not_move_group;
+ }
+ }
+
+@@ -12271,7 +12488,17 @@ SYSCALL_DEFINE5(perf_event_open,
+ }
+ } else {
+ mutex_lock(&ctx->mutex);
++
++ /*
++ * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
++ * see the group_leader && !move_group test earlier.
++ */
++ if (group_leader && group_leader->ctx != ctx) {
++ err = -EINVAL;
++ goto err_locked;
++ }
+ }
++not_move_group:
+
+ if (ctx->task == TASK_TOMBSTONE) {
+ err = -ESRCH;
+diff --git a/kernel/events/internal.h b/kernel/events/internal.h
+index 228801e207886..aa23ffdaf819f 100644
+--- a/kernel/events/internal.h
++++ b/kernel/events/internal.h
+@@ -116,6 +116,11 @@ static inline int page_order(struct perf_buffer *rb)
+ }
+ #endif
+
++static inline int data_page_nr(struct perf_buffer *rb)
++{
++ return rb->nr_pages << page_order(rb);
++}
++
+ static inline unsigned long perf_data_size(struct perf_buffer *rb)
+ {
+ return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
+diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
+index 52868716ec358..f40da32f5e753 100644
+--- a/kernel/events/ring_buffer.c
++++ b/kernel/events/ring_buffer.c
+@@ -22,7 +22,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
+ atomic_set(&handle->rb->poll, EPOLLIN);
+
+ handle->event->pending_wakeup = 1;
+- irq_work_queue(&handle->event->pending);
++ irq_work_queue(&handle->event->pending_irq);
+ }
+
+ /*
+@@ -859,11 +859,6 @@ void rb_free(struct perf_buffer *rb)
+ }
+
+ #else
+-static int data_page_nr(struct perf_buffer *rb)
+-{
+- return rb->nr_pages << page_order(rb);
+-}
+-
+ static struct page *
+ __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
+ {
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 91a43e57a32eb..80efdfda6662b 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -64,11 +64,58 @@
+ #include <linux/rcuwait.h>
+ #include <linux/compat.h>
+ #include <linux/io_uring.h>
++#include <linux/sysfs.h>
+
+ #include <linux/uaccess.h>
+ #include <asm/unistd.h>
+ #include <asm/mmu_context.h>
+
++/*
++ * The default value should be high enough to not crash a system that randomly
++ * crashes its kernel from time to time, but low enough to at least not permit
++ * overflowing 32-bit refcounts or the ldsem writer count.
++ */
++static unsigned int oops_limit = 10000;
++
++#ifdef CONFIG_SYSCTL
++static struct ctl_table kern_exit_table[] = {
++ {
++ .procname = "oops_limit",
++ .data = &oops_limit,
++ .maxlen = sizeof(oops_limit),
++ .mode = 0644,
++ .proc_handler = proc_douintvec,
++ },
++ { }
++};
++
++static __init int kernel_exit_sysctls_init(void)
++{
++ register_sysctl_init("kernel", kern_exit_table);
++ return 0;
++}
++late_initcall(kernel_exit_sysctls_init);
++#endif
++
++static atomic_t oops_count = ATOMIC_INIT(0);
++
++#ifdef CONFIG_SYSFS
++static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr,
++ char *page)
++{
++ return sysfs_emit(page, "%d\n", atomic_read(&oops_count));
++}
++
++static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count);
++
++static __init int kernel_exit_sysfs_init(void)
++{
++ sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL);
++ return 0;
++}
++late_initcall(kernel_exit_sysfs_init);
++#endif
++
+ static void __unhash_process(struct task_struct *p, bool group_dead)
+ {
+ nr_threads--;
+@@ -796,7 +843,7 @@ void __noreturn do_exit(long code)
+
+ #ifdef CONFIG_POSIX_TIMERS
+ hrtimer_cancel(&tsk->signal->real_timer);
+- exit_itimers(tsk->signal);
++ exit_itimers(tsk);
+ #endif
+ if (tsk->mm)
+ setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
+@@ -877,6 +924,31 @@ void __noreturn do_exit(long code)
+ }
+ EXPORT_SYMBOL_GPL(do_exit);
+
++void __noreturn make_task_dead(int signr)
++{
++ /*
++ * Take the task off the cpu after something catastrophic has
++ * happened.
++ */
++ unsigned int limit;
++
++ /*
++ * Every time the system oopses, if the oops happens while a reference
++ * to an object was held, the reference leaks.
++ * If the oops doesn't also leak memory, repeated oopsing can cause
++ * reference counters to wrap around (if they're not using refcount_t).
++ * This means that repeated oopsing can make unexploitable-looking bugs
++ * exploitable through repeated oopsing.
++ * To make sure this can't happen, place an upper bound on how often the
++ * kernel may oops without panic().
++ */
++ limit = READ_ONCE(oops_limit);
++ if (atomic_inc_return(&oops_count) >= limit && limit)
++ panic("Oopsed too often (kernel.oops_limit is %d)", limit);
++
++ do_exit(signr);
++}
++
+ void complete_and_exit(struct completion *comp, long code)
+ {
+ if (comp)
+diff --git a/kernel/fail_function.c b/kernel/fail_function.c
+index 60dc825ecc2b3..d81ec84765811 100644
+--- a/kernel/fail_function.c
++++ b/kernel/fail_function.c
+@@ -163,10 +163,7 @@ static void fei_debugfs_add_attr(struct fei_attr *attr)
+
+ static void fei_debugfs_remove_attr(struct fei_attr *attr)
+ {
+- struct dentry *dir;
+-
+- dir = debugfs_lookup(attr->kp.symbol_name, fei_debugfs_dir);
+- debugfs_remove_recursive(dir);
++ debugfs_lookup_and_remove(attr->kp.symbol_name, fei_debugfs_dir);
+ }
+
+ static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 38681ad44c76b..ace0717c71e27 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -446,6 +446,9 @@ void put_task_stack(struct task_struct *tsk)
+
+ void free_task(struct task_struct *tsk)
+ {
++#ifdef CONFIG_SECCOMP
++ WARN_ON_ONCE(tsk->seccomp.filter);
++#endif
+ release_user_cpus_ptr(tsk);
+ scs_release(tsk);
+
+@@ -467,6 +470,7 @@ void free_task(struct task_struct *tsk)
+ arch_release_task_struct(tsk);
+ if (tsk->flags & PF_KTHREAD)
+ free_kthread_struct(tsk);
++ bpf_task_storage_free(tsk);
+ free_task_struct(tsk);
+ }
+ EXPORT_SYMBOL(free_task);
+@@ -750,7 +754,6 @@ void __put_task_struct(struct task_struct *tsk)
+ cgroup_free(tsk);
+ task_numa_free(tsk, true);
+ security_task_free(tsk);
+- bpf_task_storage_free(tsk);
+ exit_creds(tsk);
+ delayacct_tsk_free(tsk);
+ put_signal_struct(tsk->signal);
+@@ -1153,6 +1156,7 @@ void mmput_async(struct mm_struct *mm)
+ schedule_work(&mm->async_put_work);
+ }
+ }
++EXPORT_SYMBOL_GPL(mmput_async);
+ #endif
+
+ /**
+@@ -2055,18 +2059,18 @@ static __latent_entropy struct task_struct *copy_process(
+ #ifdef CONFIG_PROVE_LOCKING
+ DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
+ #endif
++ retval = copy_creds(p, clone_flags);
++ if (retval < 0)
++ goto bad_fork_free;
++
+ retval = -EAGAIN;
+ if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
+ if (p->real_cred->user != INIT_USER &&
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
+- goto bad_fork_free;
++ goto bad_fork_cleanup_count;
+ }
+ current->flags &= ~PF_NPROC_EXCEEDED;
+
+- retval = copy_creds(p, clone_flags);
+- if (retval < 0)
+- goto bad_fork_free;
+-
+ /*
+ * If multiple threads are within copy_process(), then this check
+ * triggers too late. This doesn't hurt, the check is only there
+@@ -2280,6 +2284,7 @@ static __latent_entropy struct task_struct *copy_process(
+ p->pdeath_signal = 0;
+ INIT_LIST_HEAD(&p->thread_group);
+ p->task_works = NULL;
++ clear_posix_cputimers_work(p);
+
+ #ifdef CONFIG_KRETPROBES
+ p->kretprobe_instances.first = NULL;
+@@ -2295,6 +2300,17 @@ static __latent_entropy struct task_struct *copy_process(
+ if (retval)
+ goto bad_fork_put_pidfd;
+
++ /*
++ * Now that the cgroups are pinned, re-clone the parent cgroup and put
++ * the new task on the correct runqueue. All this *before* the task
++ * becomes visible.
++ *
++ * This isn't part of ->can_fork() because while the re-cloning is
++ * cgroup specific, it unconditionally needs to place the task on a
++ * runqueue.
++ */
++ sched_cgroup_fork(p, args);
++
+ /*
+ * From this point on we must avoid any synchronous user-space
+ * communication until we take the tasklist-lock. In particular, we do
+@@ -2332,12 +2348,6 @@ static __latent_entropy struct task_struct *copy_process(
+
+ spin_lock(&current->sighand->siglock);
+
+- /*
+- * Copy seccomp details explicitly here, in case they were changed
+- * before holding sighand lock.
+- */
+- copy_seccomp(p);
+-
+ rseq_fork(p, clone_flags);
+
+ /* Don't start children in a dying pid namespace */
+@@ -2352,9 +2362,13 @@ static __latent_entropy struct task_struct *copy_process(
+ goto bad_fork_cancel_cgroup;
+ }
+
+- /* past the last point of failure */
+- if (pidfile)
+- fd_install(pidfd, pidfile);
++ /* No more failure paths after this point. */
++
++ /*
++ * Copy seccomp details explicitly here, in case they were changed
++ * before holding sighand lock.
++ */
++ copy_seccomp(p);
+
+ init_task_pid_links(p);
+ if (likely(p->pid)) {
+@@ -2404,6 +2418,9 @@ static __latent_entropy struct task_struct *copy_process(
+ syscall_tracepoint_update(p);
+ write_unlock_irq(&tasklist_lock);
+
++ if (pidfile)
++ fd_install(pidfd, pidfile);
++
+ proc_fork_connector(p);
+ sched_post_fork(p);
+ cgroup_post_fork(p, args);
+@@ -2505,11 +2522,6 @@ struct task_struct * __init fork_idle(int cpu)
+ return task;
+ }
+
+-struct mm_struct *copy_init_mm(void)
+-{
+- return dup_mm(NULL, &init_mm);
+-}
+-
+ /*
+ * This is like kernel_clone(), but shaved down and tailored to just
+ * creating io_uring workers. It returns a created task, or an error pointer.
+@@ -2812,7 +2824,7 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
+ * - make the CLONE_DETACHED bit reusable for clone3
+ * - make the CSIGNAL bits reusable for clone3
+ */
+- if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
++ if (kargs->flags & (CLONE_DETACHED | (CSIGNAL & (~CLONE_NEWTIME))))
+ return false;
+
+ if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) ==
+@@ -2904,10 +2916,27 @@ static void sighand_ctor(void *data)
+ init_waitqueue_head(&sighand->signalfd_wqh);
+ }
+
+-void __init proc_caches_init(void)
++void __init mm_cache_init(void)
+ {
+ unsigned int mm_size;
+
++ /*
++ * The mm_cpumask is located at the end of mm_struct, and is
++ * dynamically sized based on the maximum CPU number this system
++ * can have, taking hotplug into account (nr_cpu_ids).
++ */
++ mm_size = sizeof(struct mm_struct) + cpumask_size();
++
++ mm_cachep = kmem_cache_create_usercopy("mm_struct",
++ mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
++ offsetof(struct mm_struct, saved_auxv),
++ sizeof_field(struct mm_struct, saved_auxv),
++ NULL);
++}
++
++void __init proc_caches_init(void)
++{
+ sighand_cachep = kmem_cache_create("sighand_cache",
+ sizeof(struct sighand_struct), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
+@@ -2925,19 +2954,6 @@ void __init proc_caches_init(void)
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
+ NULL);
+
+- /*
+- * The mm_cpumask is located at the end of mm_struct, and is
+- * dynamically sized based on the maximum CPU number this system
+- * can have, taking hotplug into account (nr_cpu_ids).
+- */
+- mm_size = sizeof(struct mm_struct) + cpumask_size();
+-
+- mm_cachep = kmem_cache_create_usercopy("mm_struct",
+- mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
+- offsetof(struct mm_struct, saved_auxv),
+- sizeof_field(struct mm_struct, saved_auxv),
+- NULL);
+ vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
+ mmap_init();
+ nsproxy_cache_init();
+diff --git a/kernel/futex.c b/kernel/futex.c
+deleted file mode 100644
+index c15ad276fd157..0000000000000
+--- a/kernel/futex.c
++++ /dev/null
+@@ -1,4272 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * Fast Userspace Mutexes (which I call "Futexes!").
+- * (C) Rusty Russell, IBM 2002
+- *
+- * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
+- * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
+- *
+- * Removed page pinning, fix privately mapped COW pages and other cleanups
+- * (C) Copyright 2003, 2004 Jamie Lokier
+- *
+- * Robust futex support started by Ingo Molnar
+- * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
+- * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
+- *
+- * PI-futex support started by Ingo Molnar and Thomas Gleixner
+- * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+- * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
+- *
+- * PRIVATE futexes by Eric Dumazet
+- * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
+- *
+- * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
+- * Copyright (C) IBM Corporation, 2009
+- * Thanks to Thomas Gleixner for conceptual design and careful reviews.
+- *
+- * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
+- * enough at me, Linus for the original (flawed) idea, Matthew
+- * Kirkwood for proof-of-concept implementation.
+- *
+- * "The futexes are also cursed."
+- * "But they come in a choice of three flavours!"
+- */
+-#include <linux/compat.h>
+-#include <linux/jhash.h>
+-#include <linux/pagemap.h>
+-#include <linux/syscalls.h>
+-#include <linux/freezer.h>
+-#include <linux/memblock.h>
+-#include <linux/fault-inject.h>
+-#include <linux/time_namespace.h>
+-
+-#include <asm/futex.h>
+-
+-#include "locking/rtmutex_common.h"
+-
+-/*
+- * READ this before attempting to hack on futexes!
+- *
+- * Basic futex operation and ordering guarantees
+- * =============================================
+- *
+- * The waiter reads the futex value in user space and calls
+- * futex_wait(). This function computes the hash bucket and acquires
+- * the hash bucket lock. After that it reads the futex user space value
+- * again and verifies that the data has not changed. If it has not changed
+- * it enqueues itself into the hash bucket, releases the hash bucket lock
+- * and schedules.
+- *
+- * The waker side modifies the user space value of the futex and calls
+- * futex_wake(). This function computes the hash bucket and acquires the
+- * hash bucket lock. Then it looks for waiters on that futex in the hash
+- * bucket and wakes them.
+- *
+- * In futex wake up scenarios where no tasks are blocked on a futex, taking
+- * the hb spinlock can be avoided and simply return. In order for this
+- * optimization to work, ordering guarantees must exist so that the waiter
+- * being added to the list is acknowledged when the list is concurrently being
+- * checked by the waker, avoiding scenarios like the following:
+- *
+- * CPU 0 CPU 1
+- * val = *futex;
+- * sys_futex(WAIT, futex, val);
+- * futex_wait(futex, val);
+- * uval = *futex;
+- * *futex = newval;
+- * sys_futex(WAKE, futex);
+- * futex_wake(futex);
+- * if (queue_empty())
+- * return;
+- * if (uval == val)
+- * lock(hash_bucket(futex));
+- * queue();
+- * unlock(hash_bucket(futex));
+- * schedule();
+- *
+- * This would cause the waiter on CPU 0 to wait forever because it
+- * missed the transition of the user space value from val to newval
+- * and the waker did not find the waiter in the hash bucket queue.
+- *
+- * The correct serialization ensures that a waiter either observes
+- * the changed user space value before blocking or is woken by a
+- * concurrent waker:
+- *
+- * CPU 0 CPU 1
+- * val = *futex;
+- * sys_futex(WAIT, futex, val);
+- * futex_wait(futex, val);
+- *
+- * waiters++; (a)
+- * smp_mb(); (A) <-- paired with -.
+- * |
+- * lock(hash_bucket(futex)); |
+- * |
+- * uval = *futex; |
+- * | *futex = newval;
+- * | sys_futex(WAKE, futex);
+- * | futex_wake(futex);
+- * |
+- * `--------> smp_mb(); (B)
+- * if (uval == val)
+- * queue();
+- * unlock(hash_bucket(futex));
+- * schedule(); if (waiters)
+- * lock(hash_bucket(futex));
+- * else wake_waiters(futex);
+- * waiters--; (b) unlock(hash_bucket(futex));
+- *
+- * Where (A) orders the waiters increment and the futex value read through
+- * atomic operations (see hb_waiters_inc) and where (B) orders the write
+- * to futex and the waiters read (see hb_waiters_pending()).
+- *
+- * This yields the following case (where X:=waiters, Y:=futex):
+- *
+- * X = Y = 0
+- *
+- * w[X]=1 w[Y]=1
+- * MB MB
+- * r[Y]=y r[X]=x
+- *
+- * Which guarantees that x==0 && y==0 is impossible; which translates back into
+- * the guarantee that we cannot both miss the futex variable change and the
+- * enqueue.
+- *
+- * Note that a new waiter is accounted for in (a) even when it is possible that
+- * the wait call can return error, in which case we backtrack from it in (b).
+- * Refer to the comment in queue_lock().
+- *
+- * Similarly, in order to account for waiters being requeued on another
+- * address we always increment the waiters for the destination bucket before
+- * acquiring the lock. It then decrements them again after releasing it -
+- * the code that actually moves the futex(es) between hash buckets (requeue_futex)
+- * will do the additional required waiter count housekeeping. This is done for
+- * double_lock_hb() and double_unlock_hb(), respectively.
+- */
+-
+-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+-#define futex_cmpxchg_enabled 1
+-#else
+-static int __read_mostly futex_cmpxchg_enabled;
+-#endif
+-
+-/*
+- * Futex flags used to encode options to functions and preserve them across
+- * restarts.
+- */
+-#ifdef CONFIG_MMU
+-# define FLAGS_SHARED 0x01
+-#else
+-/*
+- * NOMMU does not have per process address space. Let the compiler optimize
+- * code away.
+- */
+-# define FLAGS_SHARED 0x00
+-#endif
+-#define FLAGS_CLOCKRT 0x02
+-#define FLAGS_HAS_TIMEOUT 0x04
+-
+-/*
+- * Priority Inheritance state:
+- */
+-struct futex_pi_state {
+- /*
+- * list of 'owned' pi_state instances - these have to be
+- * cleaned up in do_exit() if the task exits prematurely:
+- */
+- struct list_head list;
+-
+- /*
+- * The PI object:
+- */
+- struct rt_mutex_base pi_mutex;
+-
+- struct task_struct *owner;
+- refcount_t refcount;
+-
+- union futex_key key;
+-} __randomize_layout;
+-
+-/**
+- * struct futex_q - The hashed futex queue entry, one per waiting task
+- * @list: priority-sorted list of tasks waiting on this futex
+- * @task: the task waiting on the futex
+- * @lock_ptr: the hash bucket lock
+- * @key: the key the futex is hashed on
+- * @pi_state: optional priority inheritance state
+- * @rt_waiter: rt_waiter storage for use with requeue_pi
+- * @requeue_pi_key: the requeue_pi target futex key
+- * @bitset: bitset for the optional bitmasked wakeup
+- * @requeue_state: State field for futex_requeue_pi()
+- * @requeue_wait: RCU wait for futex_requeue_pi() (RT only)
+- *
+- * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
+- * we can wake only the relevant ones (hashed queues may be shared).
+- *
+- * A futex_q has a woken state, just like tasks have TASK_RUNNING.
+- * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
+- * The order of wakeup is always to make the first condition true, then
+- * the second.
+- *
+- * PI futexes are typically woken before they are removed from the hash list via
+- * the rt_mutex code. See unqueue_me_pi().
+- */
+-struct futex_q {
+- struct plist_node list;
+-
+- struct task_struct *task;
+- spinlock_t *lock_ptr;
+- union futex_key key;
+- struct futex_pi_state *pi_state;
+- struct rt_mutex_waiter *rt_waiter;
+- union futex_key *requeue_pi_key;
+- u32 bitset;
+- atomic_t requeue_state;
+-#ifdef CONFIG_PREEMPT_RT
+- struct rcuwait requeue_wait;
+-#endif
+-} __randomize_layout;
+-
+-/*
+- * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an
+- * underlying rtmutex. The task which is about to be requeued could have
+- * just woken up (timeout, signal). After the wake up the task has to
+- * acquire hash bucket lock, which is held by the requeue code. As a task
+- * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking
+- * and the hash bucket lock blocking would collide and corrupt state.
+- *
+- * On !PREEMPT_RT this is not a problem and everything could be serialized
+- * on hash bucket lock, but aside of having the benefit of common code,
+- * this allows to avoid doing the requeue when the task is already on the
+- * way out and taking the hash bucket lock of the original uaddr1 when the
+- * requeue has been completed.
+- *
+- * The following state transitions are valid:
+- *
+- * On the waiter side:
+- * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE
+- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT
+- *
+- * On the requeue side:
+- * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS
+- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED
+- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed)
+- * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED
+- * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed)
+- *
+- * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this
+- * signals that the waiter is already on the way out. It also means that
+- * the waiter is still on the 'wait' futex, i.e. uaddr1.
+- *
+- * The waiter side signals early wakeup to the requeue side either through
+- * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending
+- * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately
+- * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT,
+- * which means the wakeup is interleaving with a requeue in progress it has
+- * to wait for the requeue side to change the state. Either to DONE/LOCKED
+- * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex
+- * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by
+- * the requeue side when the requeue attempt failed via deadlock detection
+- * and therefore the waiter q is still on the uaddr1 futex.
+- */
+-enum {
+- Q_REQUEUE_PI_NONE = 0,
+- Q_REQUEUE_PI_IGNORE,
+- Q_REQUEUE_PI_IN_PROGRESS,
+- Q_REQUEUE_PI_WAIT,
+- Q_REQUEUE_PI_DONE,
+- Q_REQUEUE_PI_LOCKED,
+-};
+-
+-static const struct futex_q futex_q_init = {
+- /* list gets initialized in queue_me()*/
+- .key = FUTEX_KEY_INIT,
+- .bitset = FUTEX_BITSET_MATCH_ANY,
+- .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
+-};
+-
+-/*
+- * Hash buckets are shared by all the futex_keys that hash to the same
+- * location. Each key may have multiple futex_q structures, one for each task
+- * waiting on a futex.
+- */
+-struct futex_hash_bucket {
+- atomic_t waiters;
+- spinlock_t lock;
+- struct plist_head chain;
+-} ____cacheline_aligned_in_smp;
+-
+-/*
+- * The base of the bucket array and its size are always used together
+- * (after initialization only in hash_futex()), so ensure that they
+- * reside in the same cacheline.
+- */
+-static struct {
+- struct futex_hash_bucket *queues;
+- unsigned long hashsize;
+-} __futex_data __read_mostly __aligned(2*sizeof(long));
+-#define futex_queues (__futex_data.queues)
+-#define futex_hashsize (__futex_data.hashsize)
+-
+-
+-/*
+- * Fault injections for futexes.
+- */
+-#ifdef CONFIG_FAIL_FUTEX
+-
+-static struct {
+- struct fault_attr attr;
+-
+- bool ignore_private;
+-} fail_futex = {
+- .attr = FAULT_ATTR_INITIALIZER,
+- .ignore_private = false,
+-};
+-
+-static int __init setup_fail_futex(char *str)
+-{
+- return setup_fault_attr(&fail_futex.attr, str);
+-}
+-__setup("fail_futex=", setup_fail_futex);
+-
+-static bool should_fail_futex(bool fshared)
+-{
+- if (fail_futex.ignore_private && !fshared)
+- return false;
+-
+- return should_fail(&fail_futex.attr, 1);
+-}
+-
+-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+-
+-static int __init fail_futex_debugfs(void)
+-{
+- umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+- struct dentry *dir;
+-
+- dir = fault_create_debugfs_attr("fail_futex", NULL,
+- &fail_futex.attr);
+- if (IS_ERR(dir))
+- return PTR_ERR(dir);
+-
+- debugfs_create_bool("ignore-private", mode, dir,
+- &fail_futex.ignore_private);
+- return 0;
+-}
+-
+-late_initcall(fail_futex_debugfs);
+-
+-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+-
+-#else
+-static inline bool should_fail_futex(bool fshared)
+-{
+- return false;
+-}
+-#endif /* CONFIG_FAIL_FUTEX */
+-
+-#ifdef CONFIG_COMPAT
+-static void compat_exit_robust_list(struct task_struct *curr);
+-#endif
+-
+-/*
+- * Reflects a new waiter being added to the waitqueue.
+- */
+-static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
+-{
+-#ifdef CONFIG_SMP
+- atomic_inc(&hb->waiters);
+- /*
+- * Full barrier (A), see the ordering comment above.
+- */
+- smp_mb__after_atomic();
+-#endif
+-}
+-
+-/*
+- * Reflects a waiter being removed from the waitqueue by wakeup
+- * paths.
+- */
+-static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
+-{
+-#ifdef CONFIG_SMP
+- atomic_dec(&hb->waiters);
+-#endif
+-}
+-
+-static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
+-{
+-#ifdef CONFIG_SMP
+- /*
+- * Full barrier (B), see the ordering comment above.
+- */
+- smp_mb();
+- return atomic_read(&hb->waiters);
+-#else
+- return 1;
+-#endif
+-}
+-
+-/**
+- * hash_futex - Return the hash bucket in the global hash
+- * @key: Pointer to the futex key for which the hash is calculated
+- *
+- * We hash on the keys returned from get_futex_key (see below) and return the
+- * corresponding hash bucket in the global hash.
+- */
+-static struct futex_hash_bucket *hash_futex(union futex_key *key)
+-{
+- u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
+- key->both.offset);
+-
+- return &futex_queues[hash & (futex_hashsize - 1)];
+-}
+-
+-
+-/**
+- * match_futex - Check whether two futex keys are equal
+- * @key1: Pointer to key1
+- * @key2: Pointer to key2
+- *
+- * Return 1 if two futex_keys are equal, 0 otherwise.
+- */
+-static inline int match_futex(union futex_key *key1, union futex_key *key2)
+-{
+- return (key1 && key2
+- && key1->both.word == key2->both.word
+- && key1->both.ptr == key2->both.ptr
+- && key1->both.offset == key2->both.offset);
+-}
+-
+-enum futex_access {
+- FUTEX_READ,
+- FUTEX_WRITE
+-};
+-
+-/**
+- * futex_setup_timer - set up the sleeping hrtimer.
+- * @time: ptr to the given timeout value
+- * @timeout: the hrtimer_sleeper structure to be set up
+- * @flags: futex flags
+- * @range_ns: optional range in ns
+- *
+- * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
+- * value given
+- */
+-static inline struct hrtimer_sleeper *
+-futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
+- int flags, u64 range_ns)
+-{
+- if (!time)
+- return NULL;
+-
+- hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
+- CLOCK_REALTIME : CLOCK_MONOTONIC,
+- HRTIMER_MODE_ABS);
+- /*
+- * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
+- * effectively the same as calling hrtimer_set_expires().
+- */
+- hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
+-
+- return timeout;
+-}
+-
+-/*
+- * Generate a machine wide unique identifier for this inode.
+- *
+- * This relies on u64 not wrapping in the life-time of the machine; which with
+- * 1ns resolution means almost 585 years.
+- *
+- * This further relies on the fact that a well formed program will not unmap
+- * the file while it has a (shared) futex waiting on it. This mapping will have
+- * a file reference which pins the mount and inode.
+- *
+- * If for some reason an inode gets evicted and read back in again, it will get
+- * a new sequence number and will _NOT_ match, even though it is the exact same
+- * file.
+- *
+- * It is important that match_futex() will never have a false-positive, esp.
+- * for PI futexes that can mess up the state. The above argues that false-negatives
+- * are only possible for malformed programs.
+- */
+-static u64 get_inode_sequence_number(struct inode *inode)
+-{
+- static atomic64_t i_seq;
+- u64 old;
+-
+- /* Does the inode already have a sequence number? */
+- old = atomic64_read(&inode->i_sequence);
+- if (likely(old))
+- return old;
+-
+- for (;;) {
+- u64 new = atomic64_add_return(1, &i_seq);
+- if (WARN_ON_ONCE(!new))
+- continue;
+-
+- old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
+- if (old)
+- return old;
+- return new;
+- }
+-}
+-
+-/**
+- * get_futex_key() - Get parameters which are the keys for a futex
+- * @uaddr: virtual address of the futex
+- * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
+- * @key: address where result is stored.
+- * @rw: mapping needs to be read/write (values: FUTEX_READ,
+- * FUTEX_WRITE)
+- *
+- * Return: a negative error code or 0
+- *
+- * The key words are stored in @key on success.
+- *
+- * For shared mappings (when @fshared), the key is:
+- *
+- * ( inode->i_sequence, page->index, offset_within_page )
+- *
+- * [ also see get_inode_sequence_number() ]
+- *
+- * For private mappings (or when !@fshared), the key is:
+- *
+- * ( current->mm, address, 0 )
+- *
+- * This allows (cross process, where applicable) identification of the futex
+- * without keeping the page pinned for the duration of the FUTEX_WAIT.
+- *
+- * lock_page() might sleep, the caller should not hold a spinlock.
+- */
+-static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
+- enum futex_access rw)
+-{
+- unsigned long address = (unsigned long)uaddr;
+- struct mm_struct *mm = current->mm;
+- struct page *page, *tail;
+- struct address_space *mapping;
+- int err, ro = 0;
+-
+- /*
+- * The futex address must be "naturally" aligned.
+- */
+- key->both.offset = address % PAGE_SIZE;
+- if (unlikely((address % sizeof(u32)) != 0))
+- return -EINVAL;
+- address -= key->both.offset;
+-
+- if (unlikely(!access_ok(uaddr, sizeof(u32))))
+- return -EFAULT;
+-
+- if (unlikely(should_fail_futex(fshared)))
+- return -EFAULT;
+-
+- /*
+- * PROCESS_PRIVATE futexes are fast.
+- * As the mm cannot disappear under us and the 'key' only needs
+- * virtual address, we dont even have to find the underlying vma.
+- * Note : We do have to check 'uaddr' is a valid user address,
+- * but access_ok() should be faster than find_vma()
+- */
+- if (!fshared) {
+- key->private.mm = mm;
+- key->private.address = address;
+- return 0;
+- }
+-
+-again:
+- /* Ignore any VERIFY_READ mapping (futex common case) */
+- if (unlikely(should_fail_futex(true)))
+- return -EFAULT;
+-
+- err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
+- /*
+- * If write access is not required (eg. FUTEX_WAIT), try
+- * and get read-only access.
+- */
+- if (err == -EFAULT && rw == FUTEX_READ) {
+- err = get_user_pages_fast(address, 1, 0, &page);
+- ro = 1;
+- }
+- if (err < 0)
+- return err;
+- else
+- err = 0;
+-
+- /*
+- * The treatment of mapping from this point on is critical. The page
+- * lock protects many things but in this context the page lock
+- * stabilizes mapping, prevents inode freeing in the shared
+- * file-backed region case and guards against movement to swap cache.
+- *
+- * Strictly speaking the page lock is not needed in all cases being
+- * considered here and page lock forces unnecessarily serialization
+- * From this point on, mapping will be re-verified if necessary and
+- * page lock will be acquired only if it is unavoidable
+- *
+- * Mapping checks require the head page for any compound page so the
+- * head page and mapping is looked up now. For anonymous pages, it
+- * does not matter if the page splits in the future as the key is
+- * based on the address. For filesystem-backed pages, the tail is
+- * required as the index of the page determines the key. For
+- * base pages, there is no tail page and tail == page.
+- */
+- tail = page;
+- page = compound_head(page);
+- mapping = READ_ONCE(page->mapping);
+-
+- /*
+- * If page->mapping is NULL, then it cannot be a PageAnon
+- * page; but it might be the ZERO_PAGE or in the gate area or
+- * in a special mapping (all cases which we are happy to fail);
+- * or it may have been a good file page when get_user_pages_fast
+- * found it, but truncated or holepunched or subjected to
+- * invalidate_complete_page2 before we got the page lock (also
+- * cases which we are happy to fail). And we hold a reference,
+- * so refcount care in invalidate_complete_page's remove_mapping
+- * prevents drop_caches from setting mapping to NULL beneath us.
+- *
+- * The case we do have to guard against is when memory pressure made
+- * shmem_writepage move it from filecache to swapcache beneath us:
+- * an unlikely race, but we do need to retry for page->mapping.
+- */
+- if (unlikely(!mapping)) {
+- int shmem_swizzled;
+-
+- /*
+- * Page lock is required to identify which special case above
+- * applies. If this is really a shmem page then the page lock
+- * will prevent unexpected transitions.
+- */
+- lock_page(page);
+- shmem_swizzled = PageSwapCache(page) || page->mapping;
+- unlock_page(page);
+- put_page(page);
+-
+- if (shmem_swizzled)
+- goto again;
+-
+- return -EFAULT;
+- }
+-
+- /*
+- * Private mappings are handled in a simple way.
+- *
+- * If the futex key is stored on an anonymous page, then the associated
+- * object is the mm which is implicitly pinned by the calling process.
+- *
+- * NOTE: When userspace waits on a MAP_SHARED mapping, even if
+- * it's a read-only handle, it's expected that futexes attach to
+- * the object not the particular process.
+- */
+- if (PageAnon(page)) {
+- /*
+- * A RO anonymous page will never change and thus doesn't make
+- * sense for futex operations.
+- */
+- if (unlikely(should_fail_futex(true)) || ro) {
+- err = -EFAULT;
+- goto out;
+- }
+-
+- key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
+- key->private.mm = mm;
+- key->private.address = address;
+-
+- } else {
+- struct inode *inode;
+-
+- /*
+- * The associated futex object in this case is the inode and
+- * the page->mapping must be traversed. Ordinarily this should
+- * be stabilised under page lock but it's not strictly
+- * necessary in this case as we just want to pin the inode, not
+- * update the radix tree or anything like that.
+- *
+- * The RCU read lock is taken as the inode is finally freed
+- * under RCU. If the mapping still matches expectations then the
+- * mapping->host can be safely accessed as being a valid inode.
+- */
+- rcu_read_lock();
+-
+- if (READ_ONCE(page->mapping) != mapping) {
+- rcu_read_unlock();
+- put_page(page);
+-
+- goto again;
+- }
+-
+- inode = READ_ONCE(mapping->host);
+- if (!inode) {
+- rcu_read_unlock();
+- put_page(page);
+-
+- goto again;
+- }
+-
+- key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+- key->shared.i_seq = get_inode_sequence_number(inode);
+- key->shared.pgoff = page_to_pgoff(tail);
+- rcu_read_unlock();
+- }
+-
+-out:
+- put_page(page);
+- return err;
+-}
+-
+-/**
+- * fault_in_user_writeable() - Fault in user address and verify RW access
+- * @uaddr: pointer to faulting user space address
+- *
+- * Slow path to fixup the fault we just took in the atomic write
+- * access to @uaddr.
+- *
+- * We have no generic implementation of a non-destructive write to the
+- * user address. We know that we faulted in the atomic pagefault
+- * disabled section so we can as well avoid the #PF overhead by
+- * calling get_user_pages() right away.
+- */
+-static int fault_in_user_writeable(u32 __user *uaddr)
+-{
+- struct mm_struct *mm = current->mm;
+- int ret;
+-
+- mmap_read_lock(mm);
+- ret = fixup_user_fault(mm, (unsigned long)uaddr,
+- FAULT_FLAG_WRITE, NULL);
+- mmap_read_unlock(mm);
+-
+- return ret < 0 ? ret : 0;
+-}
+-
+-/**
+- * futex_top_waiter() - Return the highest priority waiter on a futex
+- * @hb: the hash bucket the futex_q's reside in
+- * @key: the futex key (to distinguish it from other futex futex_q's)
+- *
+- * Must be called with the hb lock held.
+- */
+-static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
+- union futex_key *key)
+-{
+- struct futex_q *this;
+-
+- plist_for_each_entry(this, &hb->chain, list) {
+- if (match_futex(&this->key, key))
+- return this;
+- }
+- return NULL;
+-}
+-
+-static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
+- u32 uval, u32 newval)
+-{
+- int ret;
+-
+- pagefault_disable();
+- ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
+- pagefault_enable();
+-
+- return ret;
+-}
+-
+-static int get_futex_value_locked(u32 *dest, u32 __user *from)
+-{
+- int ret;
+-
+- pagefault_disable();
+- ret = __get_user(*dest, from);
+- pagefault_enable();
+-
+- return ret ? -EFAULT : 0;
+-}
+-
+-
+-/*
+- * PI code:
+- */
+-static int refill_pi_state_cache(void)
+-{
+- struct futex_pi_state *pi_state;
+-
+- if (likely(current->pi_state_cache))
+- return 0;
+-
+- pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
+-
+- if (!pi_state)
+- return -ENOMEM;
+-
+- INIT_LIST_HEAD(&pi_state->list);
+- /* pi_mutex gets initialized later */
+- pi_state->owner = NULL;
+- refcount_set(&pi_state->refcount, 1);
+- pi_state->key = FUTEX_KEY_INIT;
+-
+- current->pi_state_cache = pi_state;
+-
+- return 0;
+-}
+-
+-static struct futex_pi_state *alloc_pi_state(void)
+-{
+- struct futex_pi_state *pi_state = current->pi_state_cache;
+-
+- WARN_ON(!pi_state);
+- current->pi_state_cache = NULL;
+-
+- return pi_state;
+-}
+-
+-static void pi_state_update_owner(struct futex_pi_state *pi_state,
+- struct task_struct *new_owner)
+-{
+- struct task_struct *old_owner = pi_state->owner;
+-
+- lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
+-
+- if (old_owner) {
+- raw_spin_lock(&old_owner->pi_lock);
+- WARN_ON(list_empty(&pi_state->list));
+- list_del_init(&pi_state->list);
+- raw_spin_unlock(&old_owner->pi_lock);
+- }
+-
+- if (new_owner) {
+- raw_spin_lock(&new_owner->pi_lock);
+- WARN_ON(!list_empty(&pi_state->list));
+- list_add(&pi_state->list, &new_owner->pi_state_list);
+- pi_state->owner = new_owner;
+- raw_spin_unlock(&new_owner->pi_lock);
+- }
+-}
+-
+-static void get_pi_state(struct futex_pi_state *pi_state)
+-{
+- WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
+-}
+-
+-/*
+- * Drops a reference to the pi_state object and frees or caches it
+- * when the last reference is gone.
+- */
+-static void put_pi_state(struct futex_pi_state *pi_state)
+-{
+- if (!pi_state)
+- return;
+-
+- if (!refcount_dec_and_test(&pi_state->refcount))
+- return;
+-
+- /*
+- * If pi_state->owner is NULL, the owner is most probably dying
+- * and has cleaned up the pi_state already
+- */
+- if (pi_state->owner) {
+- unsigned long flags;
+-
+- raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
+- pi_state_update_owner(pi_state, NULL);
+- rt_mutex_proxy_unlock(&pi_state->pi_mutex);
+- raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
+- }
+-
+- if (current->pi_state_cache) {
+- kfree(pi_state);
+- } else {
+- /*
+- * pi_state->list is already empty.
+- * clear pi_state->owner.
+- * refcount is at 0 - put it back to 1.
+- */
+- pi_state->owner = NULL;
+- refcount_set(&pi_state->refcount, 1);
+- current->pi_state_cache = pi_state;
+- }
+-}
+-
+-#ifdef CONFIG_FUTEX_PI
+-
+-/*
+- * This task is holding PI mutexes at exit time => bad.
+- * Kernel cleans up PI-state, but userspace is likely hosed.
+- * (Robust-futex cleanup is separate and might save the day for userspace.)
+- */
+-static void exit_pi_state_list(struct task_struct *curr)
+-{
+- struct list_head *next, *head = &curr->pi_state_list;
+- struct futex_pi_state *pi_state;
+- struct futex_hash_bucket *hb;
+- union futex_key key = FUTEX_KEY_INIT;
+-
+- if (!futex_cmpxchg_enabled)
+- return;
+- /*
+- * We are a ZOMBIE and nobody can enqueue itself on
+- * pi_state_list anymore, but we have to be careful
+- * versus waiters unqueueing themselves:
+- */
+- raw_spin_lock_irq(&curr->pi_lock);
+- while (!list_empty(head)) {
+- next = head->next;
+- pi_state = list_entry(next, struct futex_pi_state, list);
+- key = pi_state->key;
+- hb = hash_futex(&key);
+-
+- /*
+- * We can race against put_pi_state() removing itself from the
+- * list (a waiter going away). put_pi_state() will first
+- * decrement the reference count and then modify the list, so
+- * its possible to see the list entry but fail this reference
+- * acquire.
+- *
+- * In that case; drop the locks to let put_pi_state() make
+- * progress and retry the loop.
+- */
+- if (!refcount_inc_not_zero(&pi_state->refcount)) {
+- raw_spin_unlock_irq(&curr->pi_lock);
+- cpu_relax();
+- raw_spin_lock_irq(&curr->pi_lock);
+- continue;
+- }
+- raw_spin_unlock_irq(&curr->pi_lock);
+-
+- spin_lock(&hb->lock);
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+- raw_spin_lock(&curr->pi_lock);
+- /*
+- * We dropped the pi-lock, so re-check whether this
+- * task still owns the PI-state:
+- */
+- if (head->next != next) {
+- /* retain curr->pi_lock for the loop invariant */
+- raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+- spin_unlock(&hb->lock);
+- put_pi_state(pi_state);
+- continue;
+- }
+-
+- WARN_ON(pi_state->owner != curr);
+- WARN_ON(list_empty(&pi_state->list));
+- list_del_init(&pi_state->list);
+- pi_state->owner = NULL;
+-
+- raw_spin_unlock(&curr->pi_lock);
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+- spin_unlock(&hb->lock);
+-
+- rt_mutex_futex_unlock(&pi_state->pi_mutex);
+- put_pi_state(pi_state);
+-
+- raw_spin_lock_irq(&curr->pi_lock);
+- }
+- raw_spin_unlock_irq(&curr->pi_lock);
+-}
+-#else
+-static inline void exit_pi_state_list(struct task_struct *curr) { }
+-#endif
+-
+-/*
+- * We need to check the following states:
+- *
+- * Waiter | pi_state | pi->owner | uTID | uODIED | ?
+- *
+- * [1] NULL | --- | --- | 0 | 0/1 | Valid
+- * [2] NULL | --- | --- | >0 | 0/1 | Valid
+- *
+- * [3] Found | NULL | -- | Any | 0/1 | Invalid
+- *
+- * [4] Found | Found | NULL | 0 | 1 | Valid
+- * [5] Found | Found | NULL | >0 | 1 | Invalid
+- *
+- * [6] Found | Found | task | 0 | 1 | Valid
+- *
+- * [7] Found | Found | NULL | Any | 0 | Invalid
+- *
+- * [8] Found | Found | task | ==taskTID | 0/1 | Valid
+- * [9] Found | Found | task | 0 | 0 | Invalid
+- * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
+- *
+- * [1] Indicates that the kernel can acquire the futex atomically. We
+- * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
+- *
+- * [2] Valid, if TID does not belong to a kernel thread. If no matching
+- * thread is found then it indicates that the owner TID has died.
+- *
+- * [3] Invalid. The waiter is queued on a non PI futex
+- *
+- * [4] Valid state after exit_robust_list(), which sets the user space
+- * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
+- *
+- * [5] The user space value got manipulated between exit_robust_list()
+- * and exit_pi_state_list()
+- *
+- * [6] Valid state after exit_pi_state_list() which sets the new owner in
+- * the pi_state but cannot access the user space value.
+- *
+- * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
+- *
+- * [8] Owner and user space value match
+- *
+- * [9] There is no transient state which sets the user space TID to 0
+- * except exit_robust_list(), but this is indicated by the
+- * FUTEX_OWNER_DIED bit. See [4]
+- *
+- * [10] There is no transient state which leaves owner and user space
+- * TID out of sync. Except one error case where the kernel is denied
+- * write access to the user address, see fixup_pi_state_owner().
+- *
+- *
+- * Serialization and lifetime rules:
+- *
+- * hb->lock:
+- *
+- * hb -> futex_q, relation
+- * futex_q -> pi_state, relation
+- *
+- * (cannot be raw because hb can contain arbitrary amount
+- * of futex_q's)
+- *
+- * pi_mutex->wait_lock:
+- *
+- * {uval, pi_state}
+- *
+- * (and pi_mutex 'obviously')
+- *
+- * p->pi_lock:
+- *
+- * p->pi_state_list -> pi_state->list, relation
+- * pi_mutex->owner -> pi_state->owner, relation
+- *
+- * pi_state->refcount:
+- *
+- * pi_state lifetime
+- *
+- *
+- * Lock order:
+- *
+- * hb->lock
+- * pi_mutex->wait_lock
+- * p->pi_lock
+- *
+- */
+-
+-/*
+- * Validate that the existing waiter has a pi_state and sanity check
+- * the pi_state against the user space value. If correct, attach to
+- * it.
+- */
+-static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
+- struct futex_pi_state *pi_state,
+- struct futex_pi_state **ps)
+-{
+- pid_t pid = uval & FUTEX_TID_MASK;
+- u32 uval2;
+- int ret;
+-
+- /*
+- * Userspace might have messed up non-PI and PI futexes [3]
+- */
+- if (unlikely(!pi_state))
+- return -EINVAL;
+-
+- /*
+- * We get here with hb->lock held, and having found a
+- * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
+- * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
+- * which in turn means that futex_lock_pi() still has a reference on
+- * our pi_state.
+- *
+- * The waiter holding a reference on @pi_state also protects against
+- * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
+- * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
+- * free pi_state before we can take a reference ourselves.
+- */
+- WARN_ON(!refcount_read(&pi_state->refcount));
+-
+- /*
+- * Now that we have a pi_state, we can acquire wait_lock
+- * and do the state validation.
+- */
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+-
+- /*
+- * Since {uval, pi_state} is serialized by wait_lock, and our current
+- * uval was read without holding it, it can have changed. Verify it
+- * still is what we expect it to be, otherwise retry the entire
+- * operation.
+- */
+- if (get_futex_value_locked(&uval2, uaddr))
+- goto out_efault;
+-
+- if (uval != uval2)
+- goto out_eagain;
+-
+- /*
+- * Handle the owner died case:
+- */
+- if (uval & FUTEX_OWNER_DIED) {
+- /*
+- * exit_pi_state_list sets owner to NULL and wakes the
+- * topmost waiter. The task which acquires the
+- * pi_state->rt_mutex will fixup owner.
+- */
+- if (!pi_state->owner) {
+- /*
+- * No pi state owner, but the user space TID
+- * is not 0. Inconsistent state. [5]
+- */
+- if (pid)
+- goto out_einval;
+- /*
+- * Take a ref on the state and return success. [4]
+- */
+- goto out_attach;
+- }
+-
+- /*
+- * If TID is 0, then either the dying owner has not
+- * yet executed exit_pi_state_list() or some waiter
+- * acquired the rtmutex in the pi state, but did not
+- * yet fixup the TID in user space.
+- *
+- * Take a ref on the state and return success. [6]
+- */
+- if (!pid)
+- goto out_attach;
+- } else {
+- /*
+- * If the owner died bit is not set, then the pi_state
+- * must have an owner. [7]
+- */
+- if (!pi_state->owner)
+- goto out_einval;
+- }
+-
+- /*
+- * Bail out if user space manipulated the futex value. If pi
+- * state exists then the owner TID must be the same as the
+- * user space TID. [9/10]
+- */
+- if (pid != task_pid_vnr(pi_state->owner))
+- goto out_einval;
+-
+-out_attach:
+- get_pi_state(pi_state);
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+- *ps = pi_state;
+- return 0;
+-
+-out_einval:
+- ret = -EINVAL;
+- goto out_error;
+-
+-out_eagain:
+- ret = -EAGAIN;
+- goto out_error;
+-
+-out_efault:
+- ret = -EFAULT;
+- goto out_error;
+-
+-out_error:
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+- return ret;
+-}
+-
+-/**
+- * wait_for_owner_exiting - Block until the owner has exited
+- * @ret: owner's current futex lock status
+- * @exiting: Pointer to the exiting task
+- *
+- * Caller must hold a refcount on @exiting.
+- */
+-static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
+-{
+- if (ret != -EBUSY) {
+- WARN_ON_ONCE(exiting);
+- return;
+- }
+-
+- if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
+- return;
+-
+- mutex_lock(&exiting->futex_exit_mutex);
+- /*
+- * No point in doing state checking here. If the waiter got here
+- * while the task was in exec()->exec_futex_release() then it can
+- * have any FUTEX_STATE_* value when the waiter has acquired the
+- * mutex. OK, if running, EXITING or DEAD if it reached exit()
+- * already. Highly unlikely and not a problem. Just one more round
+- * through the futex maze.
+- */
+- mutex_unlock(&exiting->futex_exit_mutex);
+-
+- put_task_struct(exiting);
+-}
+-
+-static int handle_exit_race(u32 __user *uaddr, u32 uval,
+- struct task_struct *tsk)
+-{
+- u32 uval2;
+-
+- /*
+- * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+- * caller that the alleged owner is busy.
+- */
+- if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
+- return -EBUSY;
+-
+- /*
+- * Reread the user space value to handle the following situation:
+- *
+- * CPU0 CPU1
+- *
+- * sys_exit() sys_futex()
+- * do_exit() futex_lock_pi()
+- * futex_lock_pi_atomic()
+- * exit_signals(tsk) No waiters:
+- * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
+- * mm_release(tsk) Set waiter bit
+- * exit_robust_list(tsk) { *uaddr = 0x80000PID;
+- * Set owner died attach_to_pi_owner() {
+- * *uaddr = 0xC0000000; tsk = get_task(PID);
+- * } if (!tsk->flags & PF_EXITING) {
+- * ... attach();
+- * tsk->futex_state = } else {
+- * FUTEX_STATE_DEAD; if (tsk->futex_state !=
+- * FUTEX_STATE_DEAD)
+- * return -EAGAIN;
+- * return -ESRCH; <--- FAIL
+- * }
+- *
+- * Returning ESRCH unconditionally is wrong here because the
+- * user space value has been changed by the exiting task.
+- *
+- * The same logic applies to the case where the exiting task is
+- * already gone.
+- */
+- if (get_futex_value_locked(&uval2, uaddr))
+- return -EFAULT;
+-
+- /* If the user space value has changed, try again. */
+- if (uval2 != uval)
+- return -EAGAIN;
+-
+- /*
+- * The exiting task did not have a robust list, the robust list was
+- * corrupted or the user space value in *uaddr is simply bogus.
+- * Give up and tell user space.
+- */
+- return -ESRCH;
+-}
+-
+-static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
+- struct futex_pi_state **ps)
+-{
+- /*
+- * No existing pi state. First waiter. [2]
+- *
+- * This creates pi_state, we have hb->lock held, this means nothing can
+- * observe this state, wait_lock is irrelevant.
+- */
+- struct futex_pi_state *pi_state = alloc_pi_state();
+-
+- /*
+- * Initialize the pi_mutex in locked state and make @p
+- * the owner of it:
+- */
+- rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
+-
+- /* Store the key for possible exit cleanups: */
+- pi_state->key = *key;
+-
+- WARN_ON(!list_empty(&pi_state->list));
+- list_add(&pi_state->list, &p->pi_state_list);
+- /*
+- * Assignment without holding pi_state->pi_mutex.wait_lock is safe
+- * because there is no concurrency as the object is not published yet.
+- */
+- pi_state->owner = p;
+-
+- *ps = pi_state;
+-}
+-/*
+- * Lookup the task for the TID provided from user space and attach to
+- * it after doing proper sanity checks.
+- */
+-static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
+- struct futex_pi_state **ps,
+- struct task_struct **exiting)
+-{
+- pid_t pid = uval & FUTEX_TID_MASK;
+- struct task_struct *p;
+-
+- /*
+- * We are the first waiter - try to look up the real owner and attach
+- * the new pi_state to it, but bail out when TID = 0 [1]
+- *
+- * The !pid check is paranoid. None of the call sites should end up
+- * with pid == 0, but better safe than sorry. Let the caller retry
+- */
+- if (!pid)
+- return -EAGAIN;
+- p = find_get_task_by_vpid(pid);
+- if (!p)
+- return handle_exit_race(uaddr, uval, NULL);
+-
+- if (unlikely(p->flags & PF_KTHREAD)) {
+- put_task_struct(p);
+- return -EPERM;
+- }
+-
+- /*
+- * We need to look at the task state to figure out, whether the
+- * task is exiting. To protect against the change of the task state
+- * in futex_exit_release(), we do this protected by p->pi_lock:
+- */
+- raw_spin_lock_irq(&p->pi_lock);
+- if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
+- /*
+- * The task is on the way out. When the futex state is
+- * FUTEX_STATE_DEAD, we know that the task has finished
+- * the cleanup:
+- */
+- int ret = handle_exit_race(uaddr, uval, p);
+-
+- raw_spin_unlock_irq(&p->pi_lock);
+- /*
+- * If the owner task is between FUTEX_STATE_EXITING and
+- * FUTEX_STATE_DEAD then store the task pointer and keep
+- * the reference on the task struct. The calling code will
+- * drop all locks, wait for the task to reach
+- * FUTEX_STATE_DEAD and then drop the refcount. This is
+- * required to prevent a live lock when the current task
+- * preempted the exiting task between the two states.
+- */
+- if (ret == -EBUSY)
+- *exiting = p;
+- else
+- put_task_struct(p);
+- return ret;
+- }
+-
+- __attach_to_pi_owner(p, key, ps);
+- raw_spin_unlock_irq(&p->pi_lock);
+-
+- put_task_struct(p);
+-
+- return 0;
+-}
+-
+-static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+-{
+- int err;
+- u32 curval;
+-
+- if (unlikely(should_fail_futex(true)))
+- return -EFAULT;
+-
+- err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+- if (unlikely(err))
+- return err;
+-
+- /* If user space value changed, let the caller retry */
+- return curval != uval ? -EAGAIN : 0;
+-}
+-
+-/**
+- * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
+- * @uaddr: the pi futex user address
+- * @hb: the pi futex hash bucket
+- * @key: the futex key associated with uaddr and hb
+- * @ps: the pi_state pointer where we store the result of the
+- * lookup
+- * @task: the task to perform the atomic lock work for. This will
+- * be "current" except in the case of requeue pi.
+- * @exiting: Pointer to store the task pointer of the owner task
+- * which is in the middle of exiting
+- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
+- *
+- * Return:
+- * - 0 - ready to wait;
+- * - 1 - acquired the lock;
+- * - <0 - error
+- *
+- * The hb->lock must be held by the caller.
+- *
+- * @exiting is only set when the return value is -EBUSY. If so, this holds
+- * a refcount on the exiting task on return and the caller needs to drop it
+- * after waiting for the exit to complete.
+- */
+-static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+- union futex_key *key,
+- struct futex_pi_state **ps,
+- struct task_struct *task,
+- struct task_struct **exiting,
+- int set_waiters)
+-{
+- u32 uval, newval, vpid = task_pid_vnr(task);
+- struct futex_q *top_waiter;
+- int ret;
+-
+- /*
+- * Read the user space value first so we can validate a few
+- * things before proceeding further.
+- */
+- if (get_futex_value_locked(&uval, uaddr))
+- return -EFAULT;
+-
+- if (unlikely(should_fail_futex(true)))
+- return -EFAULT;
+-
+- /*
+- * Detect deadlocks.
+- */
+- if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
+- return -EDEADLK;
+-
+- if ((unlikely(should_fail_futex(true))))
+- return -EDEADLK;
+-
+- /*
+- * Lookup existing state first. If it exists, try to attach to
+- * its pi_state.
+- */
+- top_waiter = futex_top_waiter(hb, key);
+- if (top_waiter)
+- return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
+-
+- /*
+- * No waiter and user TID is 0. We are here because the
+- * waiters or the owner died bit is set or called from
+- * requeue_cmp_pi or for whatever reason something took the
+- * syscall.
+- */
+- if (!(uval & FUTEX_TID_MASK)) {
+- /*
+- * We take over the futex. No other waiters and the user space
+- * TID is 0. We preserve the owner died bit.
+- */
+- newval = uval & FUTEX_OWNER_DIED;
+- newval |= vpid;
+-
+- /* The futex requeue_pi code can enforce the waiters bit */
+- if (set_waiters)
+- newval |= FUTEX_WAITERS;
+-
+- ret = lock_pi_update_atomic(uaddr, uval, newval);
+- if (ret)
+- return ret;
+-
+- /*
+- * If the waiter bit was requested the caller also needs PI
+- * state attached to the new owner of the user space futex.
+- *
+- * @task is guaranteed to be alive and it cannot be exiting
+- * because it is either sleeping or waiting in
+- * futex_requeue_pi_wakeup_sync().
+- *
+- * No need to do the full attach_to_pi_owner() exercise
+- * because @task is known and valid.
+- */
+- if (set_waiters) {
+- raw_spin_lock_irq(&task->pi_lock);
+- __attach_to_pi_owner(task, key, ps);
+- raw_spin_unlock_irq(&task->pi_lock);
+- }
+- return 1;
+- }
+-
+- /*
+- * First waiter. Set the waiters bit before attaching ourself to
+- * the owner. If owner tries to unlock, it will be forced into
+- * the kernel and blocked on hb->lock.
+- */
+- newval = uval | FUTEX_WAITERS;
+- ret = lock_pi_update_atomic(uaddr, uval, newval);
+- if (ret)
+- return ret;
+- /*
+- * If the update of the user space value succeeded, we try to
+- * attach to the owner. If that fails, no harm done, we only
+- * set the FUTEX_WAITERS bit in the user space variable.
+- */
+- return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
+-}
+-
+-/**
+- * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
+- * @q: The futex_q to unqueue
+- *
+- * The q->lock_ptr must not be NULL and must be held by the caller.
+- */
+-static void __unqueue_futex(struct futex_q *q)
+-{
+- struct futex_hash_bucket *hb;
+-
+- if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
+- return;
+- lockdep_assert_held(q->lock_ptr);
+-
+- hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
+- plist_del(&q->list, &hb->chain);
+- hb_waiters_dec(hb);
+-}
+-
+-/*
+- * The hash bucket lock must be held when this is called.
+- * Afterwards, the futex_q must not be accessed. Callers
+- * must ensure to later call wake_up_q() for the actual
+- * wakeups to occur.
+- */
+-static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
+-{
+- struct task_struct *p = q->task;
+-
+- if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
+- return;
+-
+- get_task_struct(p);
+- __unqueue_futex(q);
+- /*
+- * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
+- * is written, without taking any locks. This is possible in the event
+- * of a spurious wakeup, for example. A memory barrier is required here
+- * to prevent the following store to lock_ptr from getting ahead of the
+- * plist_del in __unqueue_futex().
+- */
+- smp_store_release(&q->lock_ptr, NULL);
+-
+- /*
+- * Queue the task for later wakeup for after we've released
+- * the hb->lock.
+- */
+- wake_q_add_safe(wake_q, p);
+-}
+-
+-/*
+- * Caller must hold a reference on @pi_state.
+- */
+-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
+-{
+- struct rt_mutex_waiter *top_waiter;
+- struct task_struct *new_owner;
+- bool postunlock = false;
+- DEFINE_RT_WAKE_Q(wqh);
+- u32 curval, newval;
+- int ret = 0;
+-
+- top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
+- if (WARN_ON_ONCE(!top_waiter)) {
+- /*
+- * As per the comment in futex_unlock_pi() this should not happen.
+- *
+- * When this happens, give up our locks and try again, giving
+- * the futex_lock_pi() instance time to complete, either by
+- * waiting on the rtmutex or removing itself from the futex
+- * queue.
+- */
+- ret = -EAGAIN;
+- goto out_unlock;
+- }
+-
+- new_owner = top_waiter->task;
+-
+- /*
+- * We pass it to the next owner. The WAITERS bit is always kept
+- * enabled while there is PI state around. We cleanup the owner
+- * died bit, because we are the owner.
+- */
+- newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
+-
+- if (unlikely(should_fail_futex(true))) {
+- ret = -EFAULT;
+- goto out_unlock;
+- }
+-
+- ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+- if (!ret && (curval != uval)) {
+- /*
+- * If a unconditional UNLOCK_PI operation (user space did not
+- * try the TID->0 transition) raced with a waiter setting the
+- * FUTEX_WAITERS flag between get_user() and locking the hash
+- * bucket lock, retry the operation.
+- */
+- if ((FUTEX_TID_MASK & curval) == uval)
+- ret = -EAGAIN;
+- else
+- ret = -EINVAL;
+- }
+-
+- if (!ret) {
+- /*
+- * This is a point of no return; once we modified the uval
+- * there is no going back and subsequent operations must
+- * not fail.
+- */
+- pi_state_update_owner(pi_state, new_owner);
+- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
+- }
+-
+-out_unlock:
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+-
+- if (postunlock)
+- rt_mutex_postunlock(&wqh);
+-
+- return ret;
+-}
+-
+-/*
+- * Express the locking dependencies for lockdep:
+- */
+-static inline void
+-double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
+-{
+- if (hb1 <= hb2) {
+- spin_lock(&hb1->lock);
+- if (hb1 < hb2)
+- spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
+- } else { /* hb1 > hb2 */
+- spin_lock(&hb2->lock);
+- spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
+- }
+-}
+-
+-static inline void
+-double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
+-{
+- spin_unlock(&hb1->lock);
+- if (hb1 != hb2)
+- spin_unlock(&hb2->lock);
+-}
+-
+-/*
+- * Wake up waiters matching bitset queued on this futex (uaddr).
+- */
+-static int
+-futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
+-{
+- struct futex_hash_bucket *hb;
+- struct futex_q *this, *next;
+- union futex_key key = FUTEX_KEY_INIT;
+- int ret;
+- DEFINE_WAKE_Q(wake_q);
+-
+- if (!bitset)
+- return -EINVAL;
+-
+- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
+- if (unlikely(ret != 0))
+- return ret;
+-
+- hb = hash_futex(&key);
+-
+- /* Make sure we really have tasks to wakeup */
+- if (!hb_waiters_pending(hb))
+- return ret;
+-
+- spin_lock(&hb->lock);
+-
+- plist_for_each_entry_safe(this, next, &hb->chain, list) {
+- if (match_futex (&this->key, &key)) {
+- if (this->pi_state || this->rt_waiter) {
+- ret = -EINVAL;
+- break;
+- }
+-
+- /* Check if one of the bits is set in both bitsets */
+- if (!(this->bitset & bitset))
+- continue;
+-
+- mark_wake_futex(&wake_q, this);
+- if (++ret >= nr_wake)
+- break;
+- }
+- }
+-
+- spin_unlock(&hb->lock);
+- wake_up_q(&wake_q);
+- return ret;
+-}
+-
+-static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
+-{
+- unsigned int op = (encoded_op & 0x70000000) >> 28;
+- unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
+- int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
+- int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
+- int oldval, ret;
+-
+- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
+- if (oparg < 0 || oparg > 31) {
+- char comm[sizeof(current->comm)];
+- /*
+- * kill this print and return -EINVAL when userspace
+- * is sane again
+- */
+- pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
+- get_task_comm(comm, current), oparg);
+- oparg &= 31;
+- }
+- oparg = 1 << oparg;
+- }
+-
+- pagefault_disable();
+- ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
+- pagefault_enable();
+- if (ret)
+- return ret;
+-
+- switch (cmp) {
+- case FUTEX_OP_CMP_EQ:
+- return oldval == cmparg;
+- case FUTEX_OP_CMP_NE:
+- return oldval != cmparg;
+- case FUTEX_OP_CMP_LT:
+- return oldval < cmparg;
+- case FUTEX_OP_CMP_GE:
+- return oldval >= cmparg;
+- case FUTEX_OP_CMP_LE:
+- return oldval <= cmparg;
+- case FUTEX_OP_CMP_GT:
+- return oldval > cmparg;
+- default:
+- return -ENOSYS;
+- }
+-}
+-
+-/*
+- * Wake up all waiters hashed on the physical page that is mapped
+- * to this virtual address:
+- */
+-static int
+-futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
+- int nr_wake, int nr_wake2, int op)
+-{
+- union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+- struct futex_hash_bucket *hb1, *hb2;
+- struct futex_q *this, *next;
+- int ret, op_ret;
+- DEFINE_WAKE_Q(wake_q);
+-
+-retry:
+- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
+- if (unlikely(ret != 0))
+- return ret;
+- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
+- if (unlikely(ret != 0))
+- return ret;
+-
+- hb1 = hash_futex(&key1);
+- hb2 = hash_futex(&key2);
+-
+-retry_private:
+- double_lock_hb(hb1, hb2);
+- op_ret = futex_atomic_op_inuser(op, uaddr2);
+- if (unlikely(op_ret < 0)) {
+- double_unlock_hb(hb1, hb2);
+-
+- if (!IS_ENABLED(CONFIG_MMU) ||
+- unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
+- /*
+- * we don't get EFAULT from MMU faults if we don't have
+- * an MMU, but we might get them from range checking
+- */
+- ret = op_ret;
+- return ret;
+- }
+-
+- if (op_ret == -EFAULT) {
+- ret = fault_in_user_writeable(uaddr2);
+- if (ret)
+- return ret;
+- }
+-
+- cond_resched();
+- if (!(flags & FLAGS_SHARED))
+- goto retry_private;
+- goto retry;
+- }
+-
+- plist_for_each_entry_safe(this, next, &hb1->chain, list) {
+- if (match_futex (&this->key, &key1)) {
+- if (this->pi_state || this->rt_waiter) {
+- ret = -EINVAL;
+- goto out_unlock;
+- }
+- mark_wake_futex(&wake_q, this);
+- if (++ret >= nr_wake)
+- break;
+- }
+- }
+-
+- if (op_ret > 0) {
+- op_ret = 0;
+- plist_for_each_entry_safe(this, next, &hb2->chain, list) {
+- if (match_futex (&this->key, &key2)) {
+- if (this->pi_state || this->rt_waiter) {
+- ret = -EINVAL;
+- goto out_unlock;
+- }
+- mark_wake_futex(&wake_q, this);
+- if (++op_ret >= nr_wake2)
+- break;
+- }
+- }
+- ret += op_ret;
+- }
+-
+-out_unlock:
+- double_unlock_hb(hb1, hb2);
+- wake_up_q(&wake_q);
+- return ret;
+-}
+-
+-/**
+- * requeue_futex() - Requeue a futex_q from one hb to another
+- * @q: the futex_q to requeue
+- * @hb1: the source hash_bucket
+- * @hb2: the target hash_bucket
+- * @key2: the new key for the requeued futex_q
+- */
+-static inline
+-void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
+- struct futex_hash_bucket *hb2, union futex_key *key2)
+-{
+-
+- /*
+- * If key1 and key2 hash to the same bucket, no need to
+- * requeue.
+- */
+- if (likely(&hb1->chain != &hb2->chain)) {
+- plist_del(&q->list, &hb1->chain);
+- hb_waiters_dec(hb1);
+- hb_waiters_inc(hb2);
+- plist_add(&q->list, &hb2->chain);
+- q->lock_ptr = &hb2->lock;
+- }
+- q->key = *key2;
+-}
+-
+-static inline bool futex_requeue_pi_prepare(struct futex_q *q,
+- struct futex_pi_state *pi_state)
+-{
+- int old, new;
+-
+- /*
+- * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has
+- * already set Q_REQUEUE_PI_IGNORE to signal that requeue should
+- * ignore the waiter.
+- */
+- old = atomic_read_acquire(&q->requeue_state);
+- do {
+- if (old == Q_REQUEUE_PI_IGNORE)
+- return false;
+-
+- /*
+- * futex_proxy_trylock_atomic() might have set it to
+- * IN_PROGRESS and a interleaved early wake to WAIT.
+- *
+- * It was considered to have an extra state for that
+- * trylock, but that would just add more conditionals
+- * all over the place for a dubious value.
+- */
+- if (old != Q_REQUEUE_PI_NONE)
+- break;
+-
+- new = Q_REQUEUE_PI_IN_PROGRESS;
+- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
+-
+- q->pi_state = pi_state;
+- return true;
+-}
+-
+-static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
+-{
+- int old, new;
+-
+- old = atomic_read_acquire(&q->requeue_state);
+- do {
+- if (old == Q_REQUEUE_PI_IGNORE)
+- return;
+-
+- if (locked >= 0) {
+- /* Requeue succeeded. Set DONE or LOCKED */
+- WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
+- old != Q_REQUEUE_PI_WAIT);
+- new = Q_REQUEUE_PI_DONE + locked;
+- } else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
+- /* Deadlock, no early wakeup interleave */
+- new = Q_REQUEUE_PI_NONE;
+- } else {
+- /* Deadlock, early wakeup interleave. */
+- WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
+- new = Q_REQUEUE_PI_IGNORE;
+- }
+- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
+-
+-#ifdef CONFIG_PREEMPT_RT
+- /* If the waiter interleaved with the requeue let it know */
+- if (unlikely(old == Q_REQUEUE_PI_WAIT))
+- rcuwait_wake_up(&q->requeue_wait);
+-#endif
+-}
+-
+-static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
+-{
+- int old, new;
+-
+- old = atomic_read_acquire(&q->requeue_state);
+- do {
+- /* Is requeue done already? */
+- if (old >= Q_REQUEUE_PI_DONE)
+- return old;
+-
+- /*
+- * If not done, then tell the requeue code to either ignore
+- * the waiter or to wake it up once the requeue is done.
+- */
+- new = Q_REQUEUE_PI_WAIT;
+- if (old == Q_REQUEUE_PI_NONE)
+- new = Q_REQUEUE_PI_IGNORE;
+- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
+-
+- /* If the requeue was in progress, wait for it to complete */
+- if (old == Q_REQUEUE_PI_IN_PROGRESS) {
+-#ifdef CONFIG_PREEMPT_RT
+- rcuwait_wait_event(&q->requeue_wait,
+- atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
+- TASK_UNINTERRUPTIBLE);
+-#else
+- (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
+-#endif
+- }
+-
+- /*
+- * Requeue is now either prohibited or complete. Reread state
+- * because during the wait above it might have changed. Nothing
+- * will modify q->requeue_state after this point.
+- */
+- return atomic_read(&q->requeue_state);
+-}
+-
+-/**
+- * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
+- * @q: the futex_q
+- * @key: the key of the requeue target futex
+- * @hb: the hash_bucket of the requeue target futex
+- *
+- * During futex_requeue, with requeue_pi=1, it is possible to acquire the
+- * target futex if it is uncontended or via a lock steal.
+- *
+- * 1) Set @q::key to the requeue target futex key so the waiter can detect
+- * the wakeup on the right futex.
+- *
+- * 2) Dequeue @q from the hash bucket.
+- *
+- * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
+- * acquisition.
+- *
+- * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
+- * the waiter has to fixup the pi state.
+- *
+- * 5) Complete the requeue state so the waiter can make progress. After
+- * this point the waiter task can return from the syscall immediately in
+- * case that the pi state does not have to be fixed up.
+- *
+- * 6) Wake the waiter task.
+- *
+- * Must be called with both q->lock_ptr and hb->lock held.
+- */
+-static inline
+-void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+- struct futex_hash_bucket *hb)
+-{
+- q->key = *key;
+-
+- __unqueue_futex(q);
+-
+- WARN_ON(!q->rt_waiter);
+- q->rt_waiter = NULL;
+-
+- q->lock_ptr = &hb->lock;
+-
+- /* Signal locked state to the waiter */
+- futex_requeue_pi_complete(q, 1);
+- wake_up_state(q->task, TASK_NORMAL);
+-}
+-
+-/**
+- * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
+- * @pifutex: the user address of the to futex
+- * @hb1: the from futex hash bucket, must be locked by the caller
+- * @hb2: the to futex hash bucket, must be locked by the caller
+- * @key1: the from futex key
+- * @key2: the to futex key
+- * @ps: address to store the pi_state pointer
+- * @exiting: Pointer to store the task pointer of the owner task
+- * which is in the middle of exiting
+- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
+- *
+- * Try and get the lock on behalf of the top waiter if we can do it atomically.
+- * Wake the top waiter if we succeed. If the caller specified set_waiters,
+- * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
+- * hb1 and hb2 must be held by the caller.
+- *
+- * @exiting is only set when the return value is -EBUSY. If so, this holds
+- * a refcount on the exiting task on return and the caller needs to drop it
+- * after waiting for the exit to complete.
+- *
+- * Return:
+- * - 0 - failed to acquire the lock atomically;
+- * - >0 - acquired the lock, return value is vpid of the top_waiter
+- * - <0 - error
+- */
+-static int
+-futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
+- struct futex_hash_bucket *hb2, union futex_key *key1,
+- union futex_key *key2, struct futex_pi_state **ps,
+- struct task_struct **exiting, int set_waiters)
+-{
+- struct futex_q *top_waiter = NULL;
+- u32 curval;
+- int ret;
+-
+- if (get_futex_value_locked(&curval, pifutex))
+- return -EFAULT;
+-
+- if (unlikely(should_fail_futex(true)))
+- return -EFAULT;
+-
+- /*
+- * Find the top_waiter and determine if there are additional waiters.
+- * If the caller intends to requeue more than 1 waiter to pifutex,
+- * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
+- * as we have means to handle the possible fault. If not, don't set
+- * the bit unnecessarily as it will force the subsequent unlock to enter
+- * the kernel.
+- */
+- top_waiter = futex_top_waiter(hb1, key1);
+-
+- /* There are no waiters, nothing for us to do. */
+- if (!top_waiter)
+- return 0;
+-
+- /*
+- * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
+- * and waiting on the 'waitqueue' futex which is always !PI.
+- */
+- if (!top_waiter->rt_waiter || top_waiter->pi_state)
+- return -EINVAL;
+-
+- /* Ensure we requeue to the expected futex. */
+- if (!match_futex(top_waiter->requeue_pi_key, key2))
+- return -EINVAL;
+-
+- /* Ensure that this does not race against an early wakeup */
+- if (!futex_requeue_pi_prepare(top_waiter, NULL))
+- return -EAGAIN;
+-
+- /*
+- * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
+- * in the contended case or if @set_waiters is true.
+- *
+- * In the contended case PI state is attached to the lock owner. If
+- * the user space lock can be acquired then PI state is attached to
+- * the new owner (@top_waiter->task) when @set_waiters is true.
+- */
+- ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+- exiting, set_waiters);
+- if (ret == 1) {
+- /*
+- * Lock was acquired in user space and PI state was
+- * attached to @top_waiter->task. That means state is fully
+- * consistent and the waiter can return to user space
+- * immediately after the wakeup.
+- */
+- requeue_pi_wake_futex(top_waiter, key2, hb2);
+- } else if (ret < 0) {
+- /* Rewind top_waiter::requeue_state */
+- futex_requeue_pi_complete(top_waiter, ret);
+- } else {
+- /*
+- * futex_lock_pi_atomic() did not acquire the user space
+- * futex, but managed to establish the proxy lock and pi
+- * state. top_waiter::requeue_state cannot be fixed up here
+- * because the waiter is not enqueued on the rtmutex
+- * yet. This is handled at the callsite depending on the
+- * result of rt_mutex_start_proxy_lock() which is
+- * guaranteed to be reached with this function returning 0.
+- */
+- }
+- return ret;
+-}
+-
+-/**
+- * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
+- * @uaddr1: source futex user address
+- * @flags: futex flags (FLAGS_SHARED, etc.)
+- * @uaddr2: target futex user address
+- * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
+- * @nr_requeue: number of waiters to requeue (0-INT_MAX)
+- * @cmpval: @uaddr1 expected value (or %NULL)
+- * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
+- * pi futex (pi to pi requeue is not supported)
+- *
+- * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
+- * uaddr2 atomically on behalf of the top waiter.
+- *
+- * Return:
+- * - >=0 - on success, the number of tasks requeued or woken;
+- * - <0 - on error
+- */
+-static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+- u32 __user *uaddr2, int nr_wake, int nr_requeue,
+- u32 *cmpval, int requeue_pi)
+-{
+- union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+- int task_count = 0, ret;
+- struct futex_pi_state *pi_state = NULL;
+- struct futex_hash_bucket *hb1, *hb2;
+- struct futex_q *this, *next;
+- DEFINE_WAKE_Q(wake_q);
+-
+- if (nr_wake < 0 || nr_requeue < 0)
+- return -EINVAL;
+-
+- /*
+- * When PI not supported: return -ENOSYS if requeue_pi is true,
+- * consequently the compiler knows requeue_pi is always false past
+- * this point which will optimize away all the conditional code
+- * further down.
+- */
+- if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
+- return -ENOSYS;
+-
+- if (requeue_pi) {
+- /*
+- * Requeue PI only works on two distinct uaddrs. This
+- * check is only valid for private futexes. See below.
+- */
+- if (uaddr1 == uaddr2)
+- return -EINVAL;
+-
+- /*
+- * futex_requeue() allows the caller to define the number
+- * of waiters to wake up via the @nr_wake argument. With
+- * REQUEUE_PI, waking up more than one waiter is creating
+- * more problems than it solves. Waking up a waiter makes
+- * only sense if the PI futex @uaddr2 is uncontended as
+- * this allows the requeue code to acquire the futex
+- * @uaddr2 before waking the waiter. The waiter can then
+- * return to user space without further action. A secondary
+- * wakeup would just make the futex_wait_requeue_pi()
+- * handling more complex, because that code would have to
+- * look up pi_state and do more or less all the handling
+- * which the requeue code has to do for the to be requeued
+- * waiters. So restrict the number of waiters to wake to
+- * one, and only wake it up when the PI futex is
+- * uncontended. Otherwise requeue it and let the unlock of
+- * the PI futex handle the wakeup.
+- *
+- * All REQUEUE_PI users, e.g. pthread_cond_signal() and
+- * pthread_cond_broadcast() must use nr_wake=1.
+- */
+- if (nr_wake != 1)
+- return -EINVAL;
+-
+- /*
+- * requeue_pi requires a pi_state, try to allocate it now
+- * without any locks in case it fails.
+- */
+- if (refill_pi_state_cache())
+- return -ENOMEM;
+- }
+-
+-retry:
+- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
+- if (unlikely(ret != 0))
+- return ret;
+- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
+- requeue_pi ? FUTEX_WRITE : FUTEX_READ);
+- if (unlikely(ret != 0))
+- return ret;
+-
+- /*
+- * The check above which compares uaddrs is not sufficient for
+- * shared futexes. We need to compare the keys:
+- */
+- if (requeue_pi && match_futex(&key1, &key2))
+- return -EINVAL;
+-
+- hb1 = hash_futex(&key1);
+- hb2 = hash_futex(&key2);
+-
+-retry_private:
+- hb_waiters_inc(hb2);
+- double_lock_hb(hb1, hb2);
+-
+- if (likely(cmpval != NULL)) {
+- u32 curval;
+-
+- ret = get_futex_value_locked(&curval, uaddr1);
+-
+- if (unlikely(ret)) {
+- double_unlock_hb(hb1, hb2);
+- hb_waiters_dec(hb2);
+-
+- ret = get_user(curval, uaddr1);
+- if (ret)
+- return ret;
+-
+- if (!(flags & FLAGS_SHARED))
+- goto retry_private;
+-
+- goto retry;
+- }
+- if (curval != *cmpval) {
+- ret = -EAGAIN;
+- goto out_unlock;
+- }
+- }
+-
+- if (requeue_pi) {
+- struct task_struct *exiting = NULL;
+-
+- /*
+- * Attempt to acquire uaddr2 and wake the top waiter. If we
+- * intend to requeue waiters, force setting the FUTEX_WAITERS
+- * bit. We force this here where we are able to easily handle
+- * faults rather in the requeue loop below.
+- *
+- * Updates topwaiter::requeue_state if a top waiter exists.
+- */
+- ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+- &key2, &pi_state,
+- &exiting, nr_requeue);
+-
+- /*
+- * At this point the top_waiter has either taken uaddr2 or
+- * is waiting on it. In both cases pi_state has been
+- * established and an initial refcount on it. In case of an
+- * error there's nothing.
+- *
+- * The top waiter's requeue_state is up to date:
+- *
+- * - If the lock was acquired atomically (ret == 1), then
+- * the state is Q_REQUEUE_PI_LOCKED.
+- *
+- * The top waiter has been dequeued and woken up and can
+- * return to user space immediately. The kernel/user
+- * space state is consistent. In case that there must be
+- * more waiters requeued the WAITERS bit in the user
+- * space futex is set so the top waiter task has to go
+- * into the syscall slowpath to unlock the futex. This
+- * will block until this requeue operation has been
+- * completed and the hash bucket locks have been
+- * dropped.
+- *
+- * - If the trylock failed with an error (ret < 0) then
+- * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
+- * happened", or Q_REQUEUE_PI_IGNORE when there was an
+- * interleaved early wakeup.
+- *
+- * - If the trylock did not succeed (ret == 0) then the
+- * state is either Q_REQUEUE_PI_IN_PROGRESS or
+- * Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
+- * This will be cleaned up in the loop below, which
+- * cannot fail because futex_proxy_trylock_atomic() did
+- * the same sanity checks for requeue_pi as the loop
+- * below does.
+- */
+- switch (ret) {
+- case 0:
+- /* We hold a reference on the pi state. */
+- break;
+-
+- case 1:
+- /*
+- * futex_proxy_trylock_atomic() acquired the user space
+- * futex. Adjust task_count.
+- */
+- task_count++;
+- ret = 0;
+- break;
+-
+- /*
+- * If the above failed, then pi_state is NULL and
+- * waiter::requeue_state is correct.
+- */
+- case -EFAULT:
+- double_unlock_hb(hb1, hb2);
+- hb_waiters_dec(hb2);
+- ret = fault_in_user_writeable(uaddr2);
+- if (!ret)
+- goto retry;
+- return ret;
+- case -EBUSY:
+- case -EAGAIN:
+- /*
+- * Two reasons for this:
+- * - EBUSY: Owner is exiting and we just wait for the
+- * exit to complete.
+- * - EAGAIN: The user space value changed.
+- */
+- double_unlock_hb(hb1, hb2);
+- hb_waiters_dec(hb2);
+- /*
+- * Handle the case where the owner is in the middle of
+- * exiting. Wait for the exit to complete otherwise
+- * this task might loop forever, aka. live lock.
+- */
+- wait_for_owner_exiting(ret, exiting);
+- cond_resched();
+- goto retry;
+- default:
+- goto out_unlock;
+- }
+- }
+-
+- plist_for_each_entry_safe(this, next, &hb1->chain, list) {
+- if (task_count - nr_wake >= nr_requeue)
+- break;
+-
+- if (!match_futex(&this->key, &key1))
+- continue;
+-
+- /*
+- * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+- * be paired with each other and no other futex ops.
+- *
+- * We should never be requeueing a futex_q with a pi_state,
+- * which is awaiting a futex_unlock_pi().
+- */
+- if ((requeue_pi && !this->rt_waiter) ||
+- (!requeue_pi && this->rt_waiter) ||
+- this->pi_state) {
+- ret = -EINVAL;
+- break;
+- }
+-
+- /* Plain futexes just wake or requeue and are done */
+- if (!requeue_pi) {
+- if (++task_count <= nr_wake)
+- mark_wake_futex(&wake_q, this);
+- else
+- requeue_futex(this, hb1, hb2, &key2);
+- continue;
+- }
+-
+- /* Ensure we requeue to the expected futex for requeue_pi. */
+- if (!match_futex(this->requeue_pi_key, &key2)) {
+- ret = -EINVAL;
+- break;
+- }
+-
+- /*
+- * Requeue nr_requeue waiters and possibly one more in the case
+- * of requeue_pi if we couldn't acquire the lock atomically.
+- *
+- * Prepare the waiter to take the rt_mutex. Take a refcount
+- * on the pi_state and store the pointer in the futex_q
+- * object of the waiter.
+- */
+- get_pi_state(pi_state);
+-
+- /* Don't requeue when the waiter is already on the way out. */
+- if (!futex_requeue_pi_prepare(this, pi_state)) {
+- /*
+- * Early woken waiter signaled that it is on the
+- * way out. Drop the pi_state reference and try the
+- * next waiter. @this->pi_state is still NULL.
+- */
+- put_pi_state(pi_state);
+- continue;
+- }
+-
+- ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+- this->rt_waiter,
+- this->task);
+-
+- if (ret == 1) {
+- /*
+- * We got the lock. We do neither drop the refcount
+- * on pi_state nor clear this->pi_state because the
+- * waiter needs the pi_state for cleaning up the
+- * user space value. It will drop the refcount
+- * after doing so. this::requeue_state is updated
+- * in the wakeup as well.
+- */
+- requeue_pi_wake_futex(this, &key2, hb2);
+- task_count++;
+- } else if (!ret) {
+- /* Waiter is queued, move it to hb2 */
+- requeue_futex(this, hb1, hb2, &key2);
+- futex_requeue_pi_complete(this, 0);
+- task_count++;
+- } else {
+- /*
+- * rt_mutex_start_proxy_lock() detected a potential
+- * deadlock when we tried to queue that waiter.
+- * Drop the pi_state reference which we took above
+- * and remove the pointer to the state from the
+- * waiters futex_q object.
+- */
+- this->pi_state = NULL;
+- put_pi_state(pi_state);
+- futex_requeue_pi_complete(this, ret);
+- /*
+- * We stop queueing more waiters and let user space
+- * deal with the mess.
+- */
+- break;
+- }
+- }
+-
+- /*
+- * We took an extra initial reference to the pi_state in
+- * futex_proxy_trylock_atomic(). We need to drop it here again.
+- */
+- put_pi_state(pi_state);
+-
+-out_unlock:
+- double_unlock_hb(hb1, hb2);
+- wake_up_q(&wake_q);
+- hb_waiters_dec(hb2);
+- return ret ? ret : task_count;
+-}
+-
+-/* The key must be already stored in q->key. */
+-static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
+- __acquires(&hb->lock)
+-{
+- struct futex_hash_bucket *hb;
+-
+- hb = hash_futex(&q->key);
+-
+- /*
+- * Increment the counter before taking the lock so that
+- * a potential waker won't miss a to-be-slept task that is
+- * waiting for the spinlock. This is safe as all queue_lock()
+- * users end up calling queue_me(). Similarly, for housekeeping,
+- * decrement the counter at queue_unlock() when some error has
+- * occurred and we don't end up adding the task to the list.
+- */
+- hb_waiters_inc(hb); /* implies smp_mb(); (A) */
+-
+- q->lock_ptr = &hb->lock;
+-
+- spin_lock(&hb->lock);
+- return hb;
+-}
+-
+-static inline void
+-queue_unlock(struct futex_hash_bucket *hb)
+- __releases(&hb->lock)
+-{
+- spin_unlock(&hb->lock);
+- hb_waiters_dec(hb);
+-}
+-
+-static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+-{
+- int prio;
+-
+- /*
+- * The priority used to register this element is
+- * - either the real thread-priority for the real-time threads
+- * (i.e. threads with a priority lower than MAX_RT_PRIO)
+- * - or MAX_RT_PRIO for non-RT threads.
+- * Thus, all RT-threads are woken first in priority order, and
+- * the others are woken last, in FIFO order.
+- */
+- prio = min(current->normal_prio, MAX_RT_PRIO);
+-
+- plist_node_init(&q->list, prio);
+- plist_add(&q->list, &hb->chain);
+- q->task = current;
+-}
+-
+-/**
+- * queue_me() - Enqueue the futex_q on the futex_hash_bucket
+- * @q: The futex_q to enqueue
+- * @hb: The destination hash bucket
+- *
+- * The hb->lock must be held by the caller, and is released here. A call to
+- * queue_me() is typically paired with exactly one call to unqueue_me(). The
+- * exceptions involve the PI related operations, which may use unqueue_me_pi()
+- * or nothing if the unqueue is done as part of the wake process and the unqueue
+- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
+- * an example).
+- */
+-static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+- __releases(&hb->lock)
+-{
+- __queue_me(q, hb);
+- spin_unlock(&hb->lock);
+-}
+-
+-/**
+- * unqueue_me() - Remove the futex_q from its futex_hash_bucket
+- * @q: The futex_q to unqueue
+- *
+- * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
+- * be paired with exactly one earlier call to queue_me().
+- *
+- * Return:
+- * - 1 - if the futex_q was still queued (and we removed unqueued it);
+- * - 0 - if the futex_q was already removed by the waking thread
+- */
+-static int unqueue_me(struct futex_q *q)
+-{
+- spinlock_t *lock_ptr;
+- int ret = 0;
+-
+- /* In the common case we don't take the spinlock, which is nice. */
+-retry:
+- /*
+- * q->lock_ptr can change between this read and the following spin_lock.
+- * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+- * optimizing lock_ptr out of the logic below.
+- */
+- lock_ptr = READ_ONCE(q->lock_ptr);
+- if (lock_ptr != NULL) {
+- spin_lock(lock_ptr);
+- /*
+- * q->lock_ptr can change between reading it and
+- * spin_lock(), causing us to take the wrong lock. This
+- * corrects the race condition.
+- *
+- * Reasoning goes like this: if we have the wrong lock,
+- * q->lock_ptr must have changed (maybe several times)
+- * between reading it and the spin_lock(). It can
+- * change again after the spin_lock() but only if it was
+- * already changed before the spin_lock(). It cannot,
+- * however, change back to the original value. Therefore
+- * we can detect whether we acquired the correct lock.
+- */
+- if (unlikely(lock_ptr != q->lock_ptr)) {
+- spin_unlock(lock_ptr);
+- goto retry;
+- }
+- __unqueue_futex(q);
+-
+- BUG_ON(q->pi_state);
+-
+- spin_unlock(lock_ptr);
+- ret = 1;
+- }
+-
+- return ret;
+-}
+-
+-/*
+- * PI futexes can not be requeued and must remove themselves from the
+- * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
+- */
+-static void unqueue_me_pi(struct futex_q *q)
+-{
+- __unqueue_futex(q);
+-
+- BUG_ON(!q->pi_state);
+- put_pi_state(q->pi_state);
+- q->pi_state = NULL;
+-}
+-
+-static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+- struct task_struct *argowner)
+-{
+- struct futex_pi_state *pi_state = q->pi_state;
+- struct task_struct *oldowner, *newowner;
+- u32 uval, curval, newval, newtid;
+- int err = 0;
+-
+- oldowner = pi_state->owner;
+-
+- /*
+- * We are here because either:
+- *
+- * - we stole the lock and pi_state->owner needs updating to reflect
+- * that (@argowner == current),
+- *
+- * or:
+- *
+- * - someone stole our lock and we need to fix things to point to the
+- * new owner (@argowner == NULL).
+- *
+- * Either way, we have to replace the TID in the user space variable.
+- * This must be atomic as we have to preserve the owner died bit here.
+- *
+- * Note: We write the user space value _before_ changing the pi_state
+- * because we can fault here. Imagine swapped out pages or a fork
+- * that marked all the anonymous memory readonly for cow.
+- *
+- * Modifying pi_state _before_ the user space value would leave the
+- * pi_state in an inconsistent state when we fault here, because we
+- * need to drop the locks to handle the fault. This might be observed
+- * in the PID checks when attaching to PI state .
+- */
+-retry:
+- if (!argowner) {
+- if (oldowner != current) {
+- /*
+- * We raced against a concurrent self; things are
+- * already fixed up. Nothing to do.
+- */
+- return 0;
+- }
+-
+- if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
+- /* We got the lock. pi_state is correct. Tell caller. */
+- return 1;
+- }
+-
+- /*
+- * The trylock just failed, so either there is an owner or
+- * there is a higher priority waiter than this one.
+- */
+- newowner = rt_mutex_owner(&pi_state->pi_mutex);
+- /*
+- * If the higher priority waiter has not yet taken over the
+- * rtmutex then newowner is NULL. We can't return here with
+- * that state because it's inconsistent vs. the user space
+- * state. So drop the locks and try again. It's a valid
+- * situation and not any different from the other retry
+- * conditions.
+- */
+- if (unlikely(!newowner)) {
+- err = -EAGAIN;
+- goto handle_err;
+- }
+- } else {
+- WARN_ON_ONCE(argowner != current);
+- if (oldowner == current) {
+- /*
+- * We raced against a concurrent self; things are
+- * already fixed up. Nothing to do.
+- */
+- return 1;
+- }
+- newowner = argowner;
+- }
+-
+- newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
+- /* Owner died? */
+- if (!pi_state->owner)
+- newtid |= FUTEX_OWNER_DIED;
+-
+- err = get_futex_value_locked(&uval, uaddr);
+- if (err)
+- goto handle_err;
+-
+- for (;;) {
+- newval = (uval & FUTEX_OWNER_DIED) | newtid;
+-
+- err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+- if (err)
+- goto handle_err;
+-
+- if (curval == uval)
+- break;
+- uval = curval;
+- }
+-
+- /*
+- * We fixed up user space. Now we need to fix the pi_state
+- * itself.
+- */
+- pi_state_update_owner(pi_state, newowner);
+-
+- return argowner == current;
+-
+- /*
+- * In order to reschedule or handle a page fault, we need to drop the
+- * locks here. In the case of a fault, this gives the other task
+- * (either the highest priority waiter itself or the task which stole
+- * the rtmutex) the chance to try the fixup of the pi_state. So once we
+- * are back from handling the fault we need to check the pi_state after
+- * reacquiring the locks and before trying to do another fixup. When
+- * the fixup has been done already we simply return.
+- *
+- * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
+- * drop hb->lock since the caller owns the hb -> futex_q relation.
+- * Dropping the pi_mutex->wait_lock requires the state revalidate.
+- */
+-handle_err:
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+- spin_unlock(q->lock_ptr);
+-
+- switch (err) {
+- case -EFAULT:
+- err = fault_in_user_writeable(uaddr);
+- break;
+-
+- case -EAGAIN:
+- cond_resched();
+- err = 0;
+- break;
+-
+- default:
+- WARN_ON_ONCE(1);
+- break;
+- }
+-
+- spin_lock(q->lock_ptr);
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+-
+- /*
+- * Check if someone else fixed it for us:
+- */
+- if (pi_state->owner != oldowner)
+- return argowner == current;
+-
+- /* Retry if err was -EAGAIN or the fault in succeeded */
+- if (!err)
+- goto retry;
+-
+- /*
+- * fault_in_user_writeable() failed so user state is immutable. At
+- * best we can make the kernel state consistent but user state will
+- * be most likely hosed and any subsequent unlock operation will be
+- * rejected due to PI futex rule [10].
+- *
+- * Ensure that the rtmutex owner is also the pi_state owner despite
+- * the user space value claiming something different. There is no
+- * point in unlocking the rtmutex if current is the owner as it
+- * would need to wait until the next waiter has taken the rtmutex
+- * to guarantee consistent state. Keep it simple. Userspace asked
+- * for this wreckaged state.
+- *
+- * The rtmutex has an owner - either current or some other
+- * task. See the EAGAIN loop above.
+- */
+- pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
+-
+- return err;
+-}
+-
+-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+- struct task_struct *argowner)
+-{
+- struct futex_pi_state *pi_state = q->pi_state;
+- int ret;
+-
+- lockdep_assert_held(q->lock_ptr);
+-
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+- ret = __fixup_pi_state_owner(uaddr, q, argowner);
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+- return ret;
+-}
+-
+-static long futex_wait_restart(struct restart_block *restart);
+-
+-/**
+- * fixup_owner() - Post lock pi_state and corner case management
+- * @uaddr: user address of the futex
+- * @q: futex_q (contains pi_state and access to the rt_mutex)
+- * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
+- *
+- * After attempting to lock an rt_mutex, this function is called to cleanup
+- * the pi_state owner as well as handle race conditions that may allow us to
+- * acquire the lock. Must be called with the hb lock held.
+- *
+- * Return:
+- * - 1 - success, lock taken;
+- * - 0 - success, lock not taken;
+- * - <0 - on error (-EFAULT)
+- */
+-static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
+-{
+- if (locked) {
+- /*
+- * Got the lock. We might not be the anticipated owner if we
+- * did a lock-steal - fix up the PI-state in that case:
+- *
+- * Speculative pi_state->owner read (we don't hold wait_lock);
+- * since we own the lock pi_state->owner == current is the
+- * stable state, anything else needs more attention.
+- */
+- if (q->pi_state->owner != current)
+- return fixup_pi_state_owner(uaddr, q, current);
+- return 1;
+- }
+-
+- /*
+- * If we didn't get the lock; check if anybody stole it from us. In
+- * that case, we need to fix up the uval to point to them instead of
+- * us, otherwise bad things happen. [10]
+- *
+- * Another speculative read; pi_state->owner == current is unstable
+- * but needs our attention.
+- */
+- if (q->pi_state->owner == current)
+- return fixup_pi_state_owner(uaddr, q, NULL);
+-
+- /*
+- * Paranoia check. If we did not take the lock, then we should not be
+- * the owner of the rt_mutex. Warn and establish consistent state.
+- */
+- if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
+- return fixup_pi_state_owner(uaddr, q, current);
+-
+- return 0;
+-}
+-
+-/**
+- * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
+- * @hb: the futex hash bucket, must be locked by the caller
+- * @q: the futex_q to queue up on
+- * @timeout: the prepared hrtimer_sleeper, or null for no timeout
+- */
+-static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
+- struct hrtimer_sleeper *timeout)
+-{
+- /*
+- * The task state is guaranteed to be set before another task can
+- * wake it. set_current_state() is implemented using smp_store_mb() and
+- * queue_me() calls spin_unlock() upon completion, both serializing
+- * access to the hash list and forcing another memory barrier.
+- */
+- set_current_state(TASK_INTERRUPTIBLE);
+- queue_me(q, hb);
+-
+- /* Arm the timer */
+- if (timeout)
+- hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
+-
+- /*
+- * If we have been removed from the hash list, then another task
+- * has tried to wake us, and we can skip the call to schedule().
+- */
+- if (likely(!plist_node_empty(&q->list))) {
+- /*
+- * If the timer has already expired, current will already be
+- * flagged for rescheduling. Only call schedule if there
+- * is no timeout, or if it has yet to expire.
+- */
+- if (!timeout || timeout->task)
+- freezable_schedule();
+- }
+- __set_current_state(TASK_RUNNING);
+-}
+-
+-/**
+- * futex_wait_setup() - Prepare to wait on a futex
+- * @uaddr: the futex userspace address
+- * @val: the expected value
+- * @flags: futex flags (FLAGS_SHARED, etc.)
+- * @q: the associated futex_q
+- * @hb: storage for hash_bucket pointer to be returned to caller
+- *
+- * Setup the futex_q and locate the hash_bucket. Get the futex value and
+- * compare it with the expected value. Handle atomic faults internally.
+- * Return with the hb lock held on success, and unlocked on failure.
+- *
+- * Return:
+- * - 0 - uaddr contains val and hb has been locked;
+- * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
+- */
+-static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
+- struct futex_q *q, struct futex_hash_bucket **hb)
+-{
+- u32 uval;
+- int ret;
+-
+- /*
+- * Access the page AFTER the hash-bucket is locked.
+- * Order is important:
+- *
+- * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
+- * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
+- *
+- * The basic logical guarantee of a futex is that it blocks ONLY
+- * if cond(var) is known to be true at the time of blocking, for
+- * any cond. If we locked the hash-bucket after testing *uaddr, that
+- * would open a race condition where we could block indefinitely with
+- * cond(var) false, which would violate the guarantee.
+- *
+- * On the other hand, we insert q and release the hash-bucket only
+- * after testing *uaddr. This guarantees that futex_wait() will NOT
+- * absorb a wakeup if *uaddr does not match the desired values
+- * while the syscall executes.
+- */
+-retry:
+- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
+- if (unlikely(ret != 0))
+- return ret;
+-
+-retry_private:
+- *hb = queue_lock(q);
+-
+- ret = get_futex_value_locked(&uval, uaddr);
+-
+- if (ret) {
+- queue_unlock(*hb);
+-
+- ret = get_user(uval, uaddr);
+- if (ret)
+- return ret;
+-
+- if (!(flags & FLAGS_SHARED))
+- goto retry_private;
+-
+- goto retry;
+- }
+-
+- if (uval != val) {
+- queue_unlock(*hb);
+- ret = -EWOULDBLOCK;
+- }
+-
+- return ret;
+-}
+-
+-static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+- ktime_t *abs_time, u32 bitset)
+-{
+- struct hrtimer_sleeper timeout, *to;
+- struct restart_block *restart;
+- struct futex_hash_bucket *hb;
+- struct futex_q q = futex_q_init;
+- int ret;
+-
+- if (!bitset)
+- return -EINVAL;
+- q.bitset = bitset;
+-
+- to = futex_setup_timer(abs_time, &timeout, flags,
+- current->timer_slack_ns);
+-retry:
+- /*
+- * Prepare to wait on uaddr. On success, it holds hb->lock and q
+- * is initialized.
+- */
+- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
+- if (ret)
+- goto out;
+-
+- /* queue_me and wait for wakeup, timeout, or a signal. */
+- futex_wait_queue_me(hb, &q, to);
+-
+- /* If we were woken (and unqueued), we succeeded, whatever. */
+- ret = 0;
+- if (!unqueue_me(&q))
+- goto out;
+- ret = -ETIMEDOUT;
+- if (to && !to->task)
+- goto out;
+-
+- /*
+- * We expect signal_pending(current), but we might be the
+- * victim of a spurious wakeup as well.
+- */
+- if (!signal_pending(current))
+- goto retry;
+-
+- ret = -ERESTARTSYS;
+- if (!abs_time)
+- goto out;
+-
+- restart = &current->restart_block;
+- restart->futex.uaddr = uaddr;
+- restart->futex.val = val;
+- restart->futex.time = *abs_time;
+- restart->futex.bitset = bitset;
+- restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
+-
+- ret = set_restart_fn(restart, futex_wait_restart);
+-
+-out:
+- if (to) {
+- hrtimer_cancel(&to->timer);
+- destroy_hrtimer_on_stack(&to->timer);
+- }
+- return ret;
+-}
+-
+-
+-static long futex_wait_restart(struct restart_block *restart)
+-{
+- u32 __user *uaddr = restart->futex.uaddr;
+- ktime_t t, *tp = NULL;
+-
+- if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
+- t = restart->futex.time;
+- tp = &t;
+- }
+- restart->fn = do_no_restart_syscall;
+-
+- return (long)futex_wait(uaddr, restart->futex.flags,
+- restart->futex.val, tp, restart->futex.bitset);
+-}
+-
+-
+-/*
+- * Userspace tried a 0 -> TID atomic transition of the futex value
+- * and failed. The kernel side here does the whole locking operation:
+- * if there are waiters then it will block as a consequence of relying
+- * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
+- * a 0 value of the futex too.).
+- *
+- * Also serves as futex trylock_pi()'ing, and due semantics.
+- */
+-static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+- ktime_t *time, int trylock)
+-{
+- struct hrtimer_sleeper timeout, *to;
+- struct task_struct *exiting = NULL;
+- struct rt_mutex_waiter rt_waiter;
+- struct futex_hash_bucket *hb;
+- struct futex_q q = futex_q_init;
+- int res, ret;
+-
+- if (!IS_ENABLED(CONFIG_FUTEX_PI))
+- return -ENOSYS;
+-
+- if (refill_pi_state_cache())
+- return -ENOMEM;
+-
+- to = futex_setup_timer(time, &timeout, flags, 0);
+-
+-retry:
+- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
+- if (unlikely(ret != 0))
+- goto out;
+-
+-retry_private:
+- hb = queue_lock(&q);
+-
+- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
+- &exiting, 0);
+- if (unlikely(ret)) {
+- /*
+- * Atomic work succeeded and we got the lock,
+- * or failed. Either way, we do _not_ block.
+- */
+- switch (ret) {
+- case 1:
+- /* We got the lock. */
+- ret = 0;
+- goto out_unlock_put_key;
+- case -EFAULT:
+- goto uaddr_faulted;
+- case -EBUSY:
+- case -EAGAIN:
+- /*
+- * Two reasons for this:
+- * - EBUSY: Task is exiting and we just wait for the
+- * exit to complete.
+- * - EAGAIN: The user space value changed.
+- */
+- queue_unlock(hb);
+- /*
+- * Handle the case where the owner is in the middle of
+- * exiting. Wait for the exit to complete otherwise
+- * this task might loop forever, aka. live lock.
+- */
+- wait_for_owner_exiting(ret, exiting);
+- cond_resched();
+- goto retry;
+- default:
+- goto out_unlock_put_key;
+- }
+- }
+-
+- WARN_ON(!q.pi_state);
+-
+- /*
+- * Only actually queue now that the atomic ops are done:
+- */
+- __queue_me(&q, hb);
+-
+- if (trylock) {
+- ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
+- /* Fixup the trylock return value: */
+- ret = ret ? 0 : -EWOULDBLOCK;
+- goto no_block;
+- }
+-
+- rt_mutex_init_waiter(&rt_waiter);
+-
+- /*
+- * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
+- * hold it while doing rt_mutex_start_proxy(), because then it will
+- * include hb->lock in the blocking chain, even through we'll not in
+- * fact hold it while blocking. This will lead it to report -EDEADLK
+- * and BUG when futex_unlock_pi() interleaves with this.
+- *
+- * Therefore acquire wait_lock while holding hb->lock, but drop the
+- * latter before calling __rt_mutex_start_proxy_lock(). This
+- * interleaves with futex_unlock_pi() -- which does a similar lock
+- * handoff -- such that the latter can observe the futex_q::pi_state
+- * before __rt_mutex_start_proxy_lock() is done.
+- */
+- raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
+- spin_unlock(q.lock_ptr);
+- /*
+- * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
+- * such that futex_unlock_pi() is guaranteed to observe the waiter when
+- * it sees the futex_q::pi_state.
+- */
+- ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
+- raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
+-
+- if (ret) {
+- if (ret == 1)
+- ret = 0;
+- goto cleanup;
+- }
+-
+- if (unlikely(to))
+- hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
+-
+- ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
+-
+-cleanup:
+- spin_lock(q.lock_ptr);
+- /*
+- * If we failed to acquire the lock (deadlock/signal/timeout), we must
+- * first acquire the hb->lock before removing the lock from the
+- * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
+- * lists consistent.
+- *
+- * In particular; it is important that futex_unlock_pi() can not
+- * observe this inconsistency.
+- */
+- if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
+- ret = 0;
+-
+-no_block:
+- /*
+- * Fixup the pi_state owner and possibly acquire the lock if we
+- * haven't already.
+- */
+- res = fixup_owner(uaddr, &q, !ret);
+- /*
+- * If fixup_owner() returned an error, propagate that. If it acquired
+- * the lock, clear our -ETIMEDOUT or -EINTR.
+- */
+- if (res)
+- ret = (res < 0) ? res : 0;
+-
+- unqueue_me_pi(&q);
+- spin_unlock(q.lock_ptr);
+- goto out;
+-
+-out_unlock_put_key:
+- queue_unlock(hb);
+-
+-out:
+- if (to) {
+- hrtimer_cancel(&to->timer);
+- destroy_hrtimer_on_stack(&to->timer);
+- }
+- return ret != -EINTR ? ret : -ERESTARTNOINTR;
+-
+-uaddr_faulted:
+- queue_unlock(hb);
+-
+- ret = fault_in_user_writeable(uaddr);
+- if (ret)
+- goto out;
+-
+- if (!(flags & FLAGS_SHARED))
+- goto retry_private;
+-
+- goto retry;
+-}
+-
+-/*
+- * Userspace attempted a TID -> 0 atomic transition, and failed.
+- * This is the in-kernel slowpath: we look up the PI state (if any),
+- * and do the rt-mutex unlock.
+- */
+-static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+-{
+- u32 curval, uval, vpid = task_pid_vnr(current);
+- union futex_key key = FUTEX_KEY_INIT;
+- struct futex_hash_bucket *hb;
+- struct futex_q *top_waiter;
+- int ret;
+-
+- if (!IS_ENABLED(CONFIG_FUTEX_PI))
+- return -ENOSYS;
+-
+-retry:
+- if (get_user(uval, uaddr))
+- return -EFAULT;
+- /*
+- * We release only a lock we actually own:
+- */
+- if ((uval & FUTEX_TID_MASK) != vpid)
+- return -EPERM;
+-
+- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
+- if (ret)
+- return ret;
+-
+- hb = hash_futex(&key);
+- spin_lock(&hb->lock);
+-
+- /*
+- * Check waiters first. We do not trust user space values at
+- * all and we at least want to know if user space fiddled
+- * with the futex value instead of blindly unlocking.
+- */
+- top_waiter = futex_top_waiter(hb, &key);
+- if (top_waiter) {
+- struct futex_pi_state *pi_state = top_waiter->pi_state;
+-
+- ret = -EINVAL;
+- if (!pi_state)
+- goto out_unlock;
+-
+- /*
+- * If current does not own the pi_state then the futex is
+- * inconsistent and user space fiddled with the futex value.
+- */
+- if (pi_state->owner != current)
+- goto out_unlock;
+-
+- get_pi_state(pi_state);
+- /*
+- * By taking wait_lock while still holding hb->lock, we ensure
+- * there is no point where we hold neither; and therefore
+- * wake_futex_pi() must observe a state consistent with what we
+- * observed.
+- *
+- * In particular; this forces __rt_mutex_start_proxy() to
+- * complete such that we're guaranteed to observe the
+- * rt_waiter. Also see the WARN in wake_futex_pi().
+- */
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+- spin_unlock(&hb->lock);
+-
+- /* drops pi_state->pi_mutex.wait_lock */
+- ret = wake_futex_pi(uaddr, uval, pi_state);
+-
+- put_pi_state(pi_state);
+-
+- /*
+- * Success, we're done! No tricky corner cases.
+- */
+- if (!ret)
+- return ret;
+- /*
+- * The atomic access to the futex value generated a
+- * pagefault, so retry the user-access and the wakeup:
+- */
+- if (ret == -EFAULT)
+- goto pi_faulted;
+- /*
+- * A unconditional UNLOCK_PI op raced against a waiter
+- * setting the FUTEX_WAITERS bit. Try again.
+- */
+- if (ret == -EAGAIN)
+- goto pi_retry;
+- /*
+- * wake_futex_pi has detected invalid state. Tell user
+- * space.
+- */
+- return ret;
+- }
+-
+- /*
+- * We have no kernel internal state, i.e. no waiters in the
+- * kernel. Waiters which are about to queue themselves are stuck
+- * on hb->lock. So we can safely ignore them. We do neither
+- * preserve the WAITERS bit not the OWNER_DIED one. We are the
+- * owner.
+- */
+- if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
+- spin_unlock(&hb->lock);
+- switch (ret) {
+- case -EFAULT:
+- goto pi_faulted;
+-
+- case -EAGAIN:
+- goto pi_retry;
+-
+- default:
+- WARN_ON_ONCE(1);
+- return ret;
+- }
+- }
+-
+- /*
+- * If uval has changed, let user space handle it.
+- */
+- ret = (curval == uval) ? 0 : -EAGAIN;
+-
+-out_unlock:
+- spin_unlock(&hb->lock);
+- return ret;
+-
+-pi_retry:
+- cond_resched();
+- goto retry;
+-
+-pi_faulted:
+-
+- ret = fault_in_user_writeable(uaddr);
+- if (!ret)
+- goto retry;
+-
+- return ret;
+-}
+-
+-/**
+- * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
+- * @hb: the hash_bucket futex_q was original enqueued on
+- * @q: the futex_q woken while waiting to be requeued
+- * @timeout: the timeout associated with the wait (NULL if none)
+- *
+- * Determine the cause for the early wakeup.
+- *
+- * Return:
+- * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
+- */
+-static inline
+-int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
+- struct futex_q *q,
+- struct hrtimer_sleeper *timeout)
+-{
+- int ret;
+-
+- /*
+- * With the hb lock held, we avoid races while we process the wakeup.
+- * We only need to hold hb (and not hb2) to ensure atomicity as the
+- * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
+- * It can't be requeued from uaddr2 to something else since we don't
+- * support a PI aware source futex for requeue.
+- */
+- WARN_ON_ONCE(&hb->lock != q->lock_ptr);
+-
+- /*
+- * We were woken prior to requeue by a timeout or a signal.
+- * Unqueue the futex_q and determine which it was.
+- */
+- plist_del(&q->list, &hb->chain);
+- hb_waiters_dec(hb);
+-
+- /* Handle spurious wakeups gracefully */
+- ret = -EWOULDBLOCK;
+- if (timeout && !timeout->task)
+- ret = -ETIMEDOUT;
+- else if (signal_pending(current))
+- ret = -ERESTARTNOINTR;
+- return ret;
+-}
+-
+-/**
+- * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
+- * @uaddr: the futex we initially wait on (non-pi)
+- * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
+- * the same type, no requeueing from private to shared, etc.
+- * @val: the expected value of uaddr
+- * @abs_time: absolute timeout
+- * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
+- * @uaddr2: the pi futex we will take prior to returning to user-space
+- *
+- * The caller will wait on uaddr and will be requeued by futex_requeue() to
+- * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
+- * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
+- * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
+- * without one, the pi logic would not know which task to boost/deboost, if
+- * there was a need to.
+- *
+- * We call schedule in futex_wait_queue_me() when we enqueue and return there
+- * via the following--
+- * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
+- * 2) wakeup on uaddr2 after a requeue
+- * 3) signal
+- * 4) timeout
+- *
+- * If 3, cleanup and return -ERESTARTNOINTR.
+- *
+- * If 2, we may then block on trying to take the rt_mutex and return via:
+- * 5) successful lock
+- * 6) signal
+- * 7) timeout
+- * 8) other lock acquisition failure
+- *
+- * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
+- *
+- * If 4 or 7, we cleanup and return with -ETIMEDOUT.
+- *
+- * Return:
+- * - 0 - On success;
+- * - <0 - On error
+- */
+-static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+- u32 val, ktime_t *abs_time, u32 bitset,
+- u32 __user *uaddr2)
+-{
+- struct hrtimer_sleeper timeout, *to;
+- struct rt_mutex_waiter rt_waiter;
+- struct futex_hash_bucket *hb;
+- union futex_key key2 = FUTEX_KEY_INIT;
+- struct futex_q q = futex_q_init;
+- struct rt_mutex_base *pi_mutex;
+- int res, ret;
+-
+- if (!IS_ENABLED(CONFIG_FUTEX_PI))
+- return -ENOSYS;
+-
+- if (uaddr == uaddr2)
+- return -EINVAL;
+-
+- if (!bitset)
+- return -EINVAL;
+-
+- to = futex_setup_timer(abs_time, &timeout, flags,
+- current->timer_slack_ns);
+-
+- /*
+- * The waiter is allocated on our stack, manipulated by the requeue
+- * code while we sleep on uaddr.
+- */
+- rt_mutex_init_waiter(&rt_waiter);
+-
+- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
+- if (unlikely(ret != 0))
+- goto out;
+-
+- q.bitset = bitset;
+- q.rt_waiter = &rt_waiter;
+- q.requeue_pi_key = &key2;
+-
+- /*
+- * Prepare to wait on uaddr. On success, it holds hb->lock and q
+- * is initialized.
+- */
+- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
+- if (ret)
+- goto out;
+-
+- /*
+- * The check above which compares uaddrs is not sufficient for
+- * shared futexes. We need to compare the keys:
+- */
+- if (match_futex(&q.key, &key2)) {
+- queue_unlock(hb);
+- ret = -EINVAL;
+- goto out;
+- }
+-
+- /* Queue the futex_q, drop the hb lock, wait for wakeup. */
+- futex_wait_queue_me(hb, &q, to);
+-
+- switch (futex_requeue_pi_wakeup_sync(&q)) {
+- case Q_REQUEUE_PI_IGNORE:
+- /* The waiter is still on uaddr1 */
+- spin_lock(&hb->lock);
+- ret = handle_early_requeue_pi_wakeup(hb, &q, to);
+- spin_unlock(&hb->lock);
+- break;
+-
+- case Q_REQUEUE_PI_LOCKED:
+- /* The requeue acquired the lock */
+- if (q.pi_state && (q.pi_state->owner != current)) {
+- spin_lock(q.lock_ptr);
+- ret = fixup_owner(uaddr2, &q, true);
+- /*
+- * Drop the reference to the pi state which the
+- * requeue_pi() code acquired for us.
+- */
+- put_pi_state(q.pi_state);
+- spin_unlock(q.lock_ptr);
+- /*
+- * Adjust the return value. It's either -EFAULT or
+- * success (1) but the caller expects 0 for success.
+- */
+- ret = ret < 0 ? ret : 0;
+- }
+- break;
+-
+- case Q_REQUEUE_PI_DONE:
+- /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */
+- pi_mutex = &q.pi_state->pi_mutex;
+- ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
+-
+- /* Current is not longer pi_blocked_on */
+- spin_lock(q.lock_ptr);
+- if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
+- ret = 0;
+-
+- debug_rt_mutex_free_waiter(&rt_waiter);
+- /*
+- * Fixup the pi_state owner and possibly acquire the lock if we
+- * haven't already.
+- */
+- res = fixup_owner(uaddr2, &q, !ret);
+- /*
+- * If fixup_owner() returned an error, propagate that. If it
+- * acquired the lock, clear -ETIMEDOUT or -EINTR.
+- */
+- if (res)
+- ret = (res < 0) ? res : 0;
+-
+- unqueue_me_pi(&q);
+- spin_unlock(q.lock_ptr);
+-
+- if (ret == -EINTR) {
+- /*
+- * We've already been requeued, but cannot restart
+- * by calling futex_lock_pi() directly. We could
+- * restart this syscall, but it would detect that
+- * the user space "val" changed and return
+- * -EWOULDBLOCK. Save the overhead of the restart
+- * and return -EWOULDBLOCK directly.
+- */
+- ret = -EWOULDBLOCK;
+- }
+- break;
+- default:
+- BUG();
+- }
+-
+-out:
+- if (to) {
+- hrtimer_cancel(&to->timer);
+- destroy_hrtimer_on_stack(&to->timer);
+- }
+- return ret;
+-}
+-
+-/*
+- * Support for robust futexes: the kernel cleans up held futexes at
+- * thread exit time.
+- *
+- * Implementation: user-space maintains a per-thread list of locks it
+- * is holding. Upon do_exit(), the kernel carefully walks this list,
+- * and marks all locks that are owned by this thread with the
+- * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
+- * always manipulated with the lock held, so the list is private and
+- * per-thread. Userspace also maintains a per-thread 'list_op_pending'
+- * field, to allow the kernel to clean up if the thread dies after
+- * acquiring the lock, but just before it could have added itself to
+- * the list. There can only be one such pending lock.
+- */
+-
+-/**
+- * sys_set_robust_list() - Set the robust-futex list head of a task
+- * @head: pointer to the list-head
+- * @len: length of the list-head, as userspace expects
+- */
+-SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
+- size_t, len)
+-{
+- if (!futex_cmpxchg_enabled)
+- return -ENOSYS;
+- /*
+- * The kernel knows only one size for now:
+- */
+- if (unlikely(len != sizeof(*head)))
+- return -EINVAL;
+-
+- current->robust_list = head;
+-
+- return 0;
+-}
+-
+-/**
+- * sys_get_robust_list() - Get the robust-futex list head of a task
+- * @pid: pid of the process [zero for current task]
+- * @head_ptr: pointer to a list-head pointer, the kernel fills it in
+- * @len_ptr: pointer to a length field, the kernel fills in the header size
+- */
+-SYSCALL_DEFINE3(get_robust_list, int, pid,
+- struct robust_list_head __user * __user *, head_ptr,
+- size_t __user *, len_ptr)
+-{
+- struct robust_list_head __user *head;
+- unsigned long ret;
+- struct task_struct *p;
+-
+- if (!futex_cmpxchg_enabled)
+- return -ENOSYS;
+-
+- rcu_read_lock();
+-
+- ret = -ESRCH;
+- if (!pid)
+- p = current;
+- else {
+- p = find_task_by_vpid(pid);
+- if (!p)
+- goto err_unlock;
+- }
+-
+- ret = -EPERM;
+- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+- goto err_unlock;
+-
+- head = p->robust_list;
+- rcu_read_unlock();
+-
+- if (put_user(sizeof(*head), len_ptr))
+- return -EFAULT;
+- return put_user(head, head_ptr);
+-
+-err_unlock:
+- rcu_read_unlock();
+-
+- return ret;
+-}
+-
+-/* Constants for the pending_op argument of handle_futex_death */
+-#define HANDLE_DEATH_PENDING true
+-#define HANDLE_DEATH_LIST false
+-
+-/*
+- * Process a futex-list entry, check whether it's owned by the
+- * dying task, and do notification if so:
+- */
+-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
+- bool pi, bool pending_op)
+-{
+- u32 uval, nval, mval;
+- int err;
+-
+- /* Futex address must be 32bit aligned */
+- if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
+- return -1;
+-
+-retry:
+- if (get_user(uval, uaddr))
+- return -1;
+-
+- /*
+- * Special case for regular (non PI) futexes. The unlock path in
+- * user space has two race scenarios:
+- *
+- * 1. The unlock path releases the user space futex value and
+- * before it can execute the futex() syscall to wake up
+- * waiters it is killed.
+- *
+- * 2. A woken up waiter is killed before it can acquire the
+- * futex in user space.
+- *
+- * In both cases the TID validation below prevents a wakeup of
+- * potential waiters which can cause these waiters to block
+- * forever.
+- *
+- * In both cases the following conditions are met:
+- *
+- * 1) task->robust_list->list_op_pending != NULL
+- * @pending_op == true
+- * 2) User space futex value == 0
+- * 3) Regular futex: @pi == false
+- *
+- * If these conditions are met, it is safe to attempt waking up a
+- * potential waiter without touching the user space futex value and
+- * trying to set the OWNER_DIED bit. The user space futex value is
+- * uncontended and the rest of the user space mutex state is
+- * consistent, so a woken waiter will just take over the
+- * uncontended futex. Setting the OWNER_DIED bit would create
+- * inconsistent state and malfunction of the user space owner died
+- * handling.
+- */
+- if (pending_op && !pi && !uval) {
+- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+- return 0;
+- }
+-
+- if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+- return 0;
+-
+- /*
+- * Ok, this dying thread is truly holding a futex
+- * of interest. Set the OWNER_DIED bit atomically
+- * via cmpxchg, and if the value had FUTEX_WAITERS
+- * set, wake up a waiter (if any). (We have to do a
+- * futex_wake() even if OWNER_DIED is already set -
+- * to handle the rare but possible case of recursive
+- * thread-death.) The rest of the cleanup is done in
+- * userspace.
+- */
+- mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+-
+- /*
+- * We are not holding a lock here, but we want to have
+- * the pagefault_disable/enable() protection because
+- * we want to handle the fault gracefully. If the
+- * access fails we try to fault in the futex with R/W
+- * verification via get_user_pages. get_user() above
+- * does not guarantee R/W access. If that fails we
+- * give up and leave the futex locked.
+- */
+- if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
+- switch (err) {
+- case -EFAULT:
+- if (fault_in_user_writeable(uaddr))
+- return -1;
+- goto retry;
+-
+- case -EAGAIN:
+- cond_resched();
+- goto retry;
+-
+- default:
+- WARN_ON_ONCE(1);
+- return err;
+- }
+- }
+-
+- if (nval != uval)
+- goto retry;
+-
+- /*
+- * Wake robust non-PI futexes here. The wakeup of
+- * PI futexes happens in exit_pi_state():
+- */
+- if (!pi && (uval & FUTEX_WAITERS))
+- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+-
+- return 0;
+-}
+-
+-/*
+- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+- */
+-static inline int fetch_robust_entry(struct robust_list __user **entry,
+- struct robust_list __user * __user *head,
+- unsigned int *pi)
+-{
+- unsigned long uentry;
+-
+- if (get_user(uentry, (unsigned long __user *)head))
+- return -EFAULT;
+-
+- *entry = (void __user *)(uentry & ~1UL);
+- *pi = uentry & 1;
+-
+- return 0;
+-}
+-
+-/*
+- * Walk curr->robust_list (very carefully, it's a userspace list!)
+- * and mark any locks found there dead, and notify any waiters.
+- *
+- * We silently return on any sign of list-walking problem.
+- */
+-static void exit_robust_list(struct task_struct *curr)
+-{
+- struct robust_list_head __user *head = curr->robust_list;
+- struct robust_list __user *entry, *next_entry, *pending;
+- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+- unsigned int next_pi;
+- unsigned long futex_offset;
+- int rc;
+-
+- if (!futex_cmpxchg_enabled)
+- return;
+-
+- /*
+- * Fetch the list head (which was registered earlier, via
+- * sys_set_robust_list()):
+- */
+- if (fetch_robust_entry(&entry, &head->list.next, &pi))
+- return;
+- /*
+- * Fetch the relative futex offset:
+- */
+- if (get_user(futex_offset, &head->futex_offset))
+- return;
+- /*
+- * Fetch any possibly pending lock-add first, and handle it
+- * if it exists:
+- */
+- if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
+- return;
+-
+- next_entry = NULL; /* avoid warning with gcc */
+- while (entry != &head->list) {
+- /*
+- * Fetch the next entry in the list before calling
+- * handle_futex_death:
+- */
+- rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
+- /*
+- * A pending lock might already be on the list, so
+- * don't process it twice:
+- */
+- if (entry != pending) {
+- if (handle_futex_death((void __user *)entry + futex_offset,
+- curr, pi, HANDLE_DEATH_LIST))
+- return;
+- }
+- if (rc)
+- return;
+- entry = next_entry;
+- pi = next_pi;
+- /*
+- * Avoid excessively long or circular lists:
+- */
+- if (!--limit)
+- break;
+-
+- cond_resched();
+- }
+-
+- if (pending) {
+- handle_futex_death((void __user *)pending + futex_offset,
+- curr, pip, HANDLE_DEATH_PENDING);
+- }
+-}
+-
+-static void futex_cleanup(struct task_struct *tsk)
+-{
+- if (unlikely(tsk->robust_list)) {
+- exit_robust_list(tsk);
+- tsk->robust_list = NULL;
+- }
+-
+-#ifdef CONFIG_COMPAT
+- if (unlikely(tsk->compat_robust_list)) {
+- compat_exit_robust_list(tsk);
+- tsk->compat_robust_list = NULL;
+- }
+-#endif
+-
+- if (unlikely(!list_empty(&tsk->pi_state_list)))
+- exit_pi_state_list(tsk);
+-}
+-
+-/**
+- * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
+- * @tsk: task to set the state on
+- *
+- * Set the futex exit state of the task lockless. The futex waiter code
+- * observes that state when a task is exiting and loops until the task has
+- * actually finished the futex cleanup. The worst case for this is that the
+- * waiter runs through the wait loop until the state becomes visible.
+- *
+- * This is called from the recursive fault handling path in do_exit().
+- *
+- * This is best effort. Either the futex exit code has run already or
+- * not. If the OWNER_DIED bit has been set on the futex then the waiter can
+- * take it over. If not, the problem is pushed back to user space. If the
+- * futex exit code did not run yet, then an already queued waiter might
+- * block forever, but there is nothing which can be done about that.
+- */
+-void futex_exit_recursive(struct task_struct *tsk)
+-{
+- /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
+- if (tsk->futex_state == FUTEX_STATE_EXITING)
+- mutex_unlock(&tsk->futex_exit_mutex);
+- tsk->futex_state = FUTEX_STATE_DEAD;
+-}
+-
+-static void futex_cleanup_begin(struct task_struct *tsk)
+-{
+- /*
+- * Prevent various race issues against a concurrent incoming waiter
+- * including live locks by forcing the waiter to block on
+- * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
+- * attach_to_pi_owner().
+- */
+- mutex_lock(&tsk->futex_exit_mutex);
+-
+- /*
+- * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
+- *
+- * This ensures that all subsequent checks of tsk->futex_state in
+- * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
+- * tsk->pi_lock held.
+- *
+- * It guarantees also that a pi_state which was queued right before
+- * the state change under tsk->pi_lock by a concurrent waiter must
+- * be observed in exit_pi_state_list().
+- */
+- raw_spin_lock_irq(&tsk->pi_lock);
+- tsk->futex_state = FUTEX_STATE_EXITING;
+- raw_spin_unlock_irq(&tsk->pi_lock);
+-}
+-
+-static void futex_cleanup_end(struct task_struct *tsk, int state)
+-{
+- /*
+- * Lockless store. The only side effect is that an observer might
+- * take another loop until it becomes visible.
+- */
+- tsk->futex_state = state;
+- /*
+- * Drop the exit protection. This unblocks waiters which observed
+- * FUTEX_STATE_EXITING to reevaluate the state.
+- */
+- mutex_unlock(&tsk->futex_exit_mutex);
+-}
+-
+-void futex_exec_release(struct task_struct *tsk)
+-{
+- /*
+- * The state handling is done for consistency, but in the case of
+- * exec() there is no way to prevent further damage as the PID stays
+- * the same. But for the unlikely and arguably buggy case that a
+- * futex is held on exec(), this provides at least as much state
+- * consistency protection which is possible.
+- */
+- futex_cleanup_begin(tsk);
+- futex_cleanup(tsk);
+- /*
+- * Reset the state to FUTEX_STATE_OK. The task is alive and about
+- * exec a new binary.
+- */
+- futex_cleanup_end(tsk, FUTEX_STATE_OK);
+-}
+-
+-void futex_exit_release(struct task_struct *tsk)
+-{
+- futex_cleanup_begin(tsk);
+- futex_cleanup(tsk);
+- futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
+-}
+-
+-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+- u32 __user *uaddr2, u32 val2, u32 val3)
+-{
+- int cmd = op & FUTEX_CMD_MASK;
+- unsigned int flags = 0;
+-
+- if (!(op & FUTEX_PRIVATE_FLAG))
+- flags |= FLAGS_SHARED;
+-
+- if (op & FUTEX_CLOCK_REALTIME) {
+- flags |= FLAGS_CLOCKRT;
+- if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
+- cmd != FUTEX_LOCK_PI2)
+- return -ENOSYS;
+- }
+-
+- switch (cmd) {
+- case FUTEX_LOCK_PI:
+- case FUTEX_LOCK_PI2:
+- case FUTEX_UNLOCK_PI:
+- case FUTEX_TRYLOCK_PI:
+- case FUTEX_WAIT_REQUEUE_PI:
+- case FUTEX_CMP_REQUEUE_PI:
+- if (!futex_cmpxchg_enabled)
+- return -ENOSYS;
+- }
+-
+- switch (cmd) {
+- case FUTEX_WAIT:
+- val3 = FUTEX_BITSET_MATCH_ANY;
+- fallthrough;
+- case FUTEX_WAIT_BITSET:
+- return futex_wait(uaddr, flags, val, timeout, val3);
+- case FUTEX_WAKE:
+- val3 = FUTEX_BITSET_MATCH_ANY;
+- fallthrough;
+- case FUTEX_WAKE_BITSET:
+- return futex_wake(uaddr, flags, val, val3);
+- case FUTEX_REQUEUE:
+- return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
+- case FUTEX_CMP_REQUEUE:
+- return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
+- case FUTEX_WAKE_OP:
+- return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
+- case FUTEX_LOCK_PI:
+- flags |= FLAGS_CLOCKRT;
+- fallthrough;
+- case FUTEX_LOCK_PI2:
+- return futex_lock_pi(uaddr, flags, timeout, 0);
+- case FUTEX_UNLOCK_PI:
+- return futex_unlock_pi(uaddr, flags);
+- case FUTEX_TRYLOCK_PI:
+- return futex_lock_pi(uaddr, flags, NULL, 1);
+- case FUTEX_WAIT_REQUEUE_PI:
+- val3 = FUTEX_BITSET_MATCH_ANY;
+- return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
+- uaddr2);
+- case FUTEX_CMP_REQUEUE_PI:
+- return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
+- }
+- return -ENOSYS;
+-}
+-
+-static __always_inline bool futex_cmd_has_timeout(u32 cmd)
+-{
+- switch (cmd) {
+- case FUTEX_WAIT:
+- case FUTEX_LOCK_PI:
+- case FUTEX_LOCK_PI2:
+- case FUTEX_WAIT_BITSET:
+- case FUTEX_WAIT_REQUEUE_PI:
+- return true;
+- }
+- return false;
+-}
+-
+-static __always_inline int
+-futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
+-{
+- if (!timespec64_valid(ts))
+- return -EINVAL;
+-
+- *t = timespec64_to_ktime(*ts);
+- if (cmd == FUTEX_WAIT)
+- *t = ktime_add_safe(ktime_get(), *t);
+- else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
+- *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
+- return 0;
+-}
+-
+-SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+- const struct __kernel_timespec __user *, utime,
+- u32 __user *, uaddr2, u32, val3)
+-{
+- int ret, cmd = op & FUTEX_CMD_MASK;
+- ktime_t t, *tp = NULL;
+- struct timespec64 ts;
+-
+- if (utime && futex_cmd_has_timeout(cmd)) {
+- if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
+- return -EFAULT;
+- if (get_timespec64(&ts, utime))
+- return -EFAULT;
+- ret = futex_init_timeout(cmd, op, &ts, &t);
+- if (ret)
+- return ret;
+- tp = &t;
+- }
+-
+- return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+-}
+-
+-#ifdef CONFIG_COMPAT
+-/*
+- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+- */
+-static inline int
+-compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+- compat_uptr_t __user *head, unsigned int *pi)
+-{
+- if (get_user(*uentry, head))
+- return -EFAULT;
+-
+- *entry = compat_ptr((*uentry) & ~1);
+- *pi = (unsigned int)(*uentry) & 1;
+-
+- return 0;
+-}
+-
+-static void __user *futex_uaddr(struct robust_list __user *entry,
+- compat_long_t futex_offset)
+-{
+- compat_uptr_t base = ptr_to_compat(entry);
+- void __user *uaddr = compat_ptr(base + futex_offset);
+-
+- return uaddr;
+-}
+-
+-/*
+- * Walk curr->robust_list (very carefully, it's a userspace list!)
+- * and mark any locks found there dead, and notify any waiters.
+- *
+- * We silently return on any sign of list-walking problem.
+- */
+-static void compat_exit_robust_list(struct task_struct *curr)
+-{
+- struct compat_robust_list_head __user *head = curr->compat_robust_list;
+- struct robust_list __user *entry, *next_entry, *pending;
+- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+- unsigned int next_pi;
+- compat_uptr_t uentry, next_uentry, upending;
+- compat_long_t futex_offset;
+- int rc;
+-
+- if (!futex_cmpxchg_enabled)
+- return;
+-
+- /*
+- * Fetch the list head (which was registered earlier, via
+- * sys_set_robust_list()):
+- */
+- if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+- return;
+- /*
+- * Fetch the relative futex offset:
+- */
+- if (get_user(futex_offset, &head->futex_offset))
+- return;
+- /*
+- * Fetch any possibly pending lock-add first, and handle it
+- * if it exists:
+- */
+- if (compat_fetch_robust_entry(&upending, &pending,
+- &head->list_op_pending, &pip))
+- return;
+-
+- next_entry = NULL; /* avoid warning with gcc */
+- while (entry != (struct robust_list __user *) &head->list) {
+- /*
+- * Fetch the next entry in the list before calling
+- * handle_futex_death:
+- */
+- rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
+- (compat_uptr_t __user *)&entry->next, &next_pi);
+- /*
+- * A pending lock might already be on the list, so
+- * dont process it twice:
+- */
+- if (entry != pending) {
+- void __user *uaddr = futex_uaddr(entry, futex_offset);
+-
+- if (handle_futex_death(uaddr, curr, pi,
+- HANDLE_DEATH_LIST))
+- return;
+- }
+- if (rc)
+- return;
+- uentry = next_uentry;
+- entry = next_entry;
+- pi = next_pi;
+- /*
+- * Avoid excessively long or circular lists:
+- */
+- if (!--limit)
+- break;
+-
+- cond_resched();
+- }
+- if (pending) {
+- void __user *uaddr = futex_uaddr(pending, futex_offset);
+-
+- handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
+- }
+-}
+-
+-COMPAT_SYSCALL_DEFINE2(set_robust_list,
+- struct compat_robust_list_head __user *, head,
+- compat_size_t, len)
+-{
+- if (!futex_cmpxchg_enabled)
+- return -ENOSYS;
+-
+- if (unlikely(len != sizeof(*head)))
+- return -EINVAL;
+-
+- current->compat_robust_list = head;
+-
+- return 0;
+-}
+-
+-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+- compat_uptr_t __user *, head_ptr,
+- compat_size_t __user *, len_ptr)
+-{
+- struct compat_robust_list_head __user *head;
+- unsigned long ret;
+- struct task_struct *p;
+-
+- if (!futex_cmpxchg_enabled)
+- return -ENOSYS;
+-
+- rcu_read_lock();
+-
+- ret = -ESRCH;
+- if (!pid)
+- p = current;
+- else {
+- p = find_task_by_vpid(pid);
+- if (!p)
+- goto err_unlock;
+- }
+-
+- ret = -EPERM;
+- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+- goto err_unlock;
+-
+- head = p->compat_robust_list;
+- rcu_read_unlock();
+-
+- if (put_user(sizeof(*head), len_ptr))
+- return -EFAULT;
+- return put_user(ptr_to_compat(head), head_ptr);
+-
+-err_unlock:
+- rcu_read_unlock();
+-
+- return ret;
+-}
+-#endif /* CONFIG_COMPAT */
+-
+-#ifdef CONFIG_COMPAT_32BIT_TIME
+-SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
+- const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
+- u32, val3)
+-{
+- int ret, cmd = op & FUTEX_CMD_MASK;
+- ktime_t t, *tp = NULL;
+- struct timespec64 ts;
+-
+- if (utime && futex_cmd_has_timeout(cmd)) {
+- if (get_old_timespec32(&ts, utime))
+- return -EFAULT;
+- ret = futex_init_timeout(cmd, op, &ts, &t);
+- if (ret)
+- return ret;
+- tp = &t;
+- }
+-
+- return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+-}
+-#endif /* CONFIG_COMPAT_32BIT_TIME */
+-
+-static void __init futex_detect_cmpxchg(void)
+-{
+-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+- u32 curval;
+-
+- /*
+- * This will fail and we want it. Some arch implementations do
+- * runtime detection of the futex_atomic_cmpxchg_inatomic()
+- * functionality. We want to know that before we call in any
+- * of the complex code paths. Also we want to prevent
+- * registration of robust lists in that case. NULL is
+- * guaranteed to fault and we get -EFAULT on functional
+- * implementation, the non-functional ones will return
+- * -ENOSYS.
+- */
+- if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
+- futex_cmpxchg_enabled = 1;
+-#endif
+-}
+-
+-static int __init futex_init(void)
+-{
+- unsigned int futex_shift;
+- unsigned long i;
+-
+-#if CONFIG_BASE_SMALL
+- futex_hashsize = 16;
+-#else
+- futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
+-#endif
+-
+- futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
+- futex_hashsize, 0,
+- futex_hashsize < 256 ? HASH_SMALL : 0,
+- &futex_shift, NULL,
+- futex_hashsize, futex_hashsize);
+- futex_hashsize = 1UL << futex_shift;
+-
+- futex_detect_cmpxchg();
+-
+- for (i = 0; i < futex_hashsize; i++) {
+- atomic_set(&futex_queues[i].waiters, 0);
+- plist_head_init(&futex_queues[i].chain);
+- spin_lock_init(&futex_queues[i].lock);
+- }
+-
+- return 0;
+-}
+-core_initcall(futex_init);
+diff --git a/kernel/futex/Makefile b/kernel/futex/Makefile
+new file mode 100644
+index 0000000000000..b89ba3fba3437
+--- /dev/null
++++ b/kernel/futex/Makefile
+@@ -0,0 +1,3 @@
++# SPDX-License-Identifier: GPL-2.0
++
++obj-y += core.o
+diff --git a/kernel/futex/core.c b/kernel/futex/core.c
+new file mode 100644
+index 0000000000000..764e73622b386
+--- /dev/null
++++ b/kernel/futex/core.c
+@@ -0,0 +1,4280 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * Fast Userspace Mutexes (which I call "Futexes!").
++ * (C) Rusty Russell, IBM 2002
++ *
++ * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
++ * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
++ *
++ * Removed page pinning, fix privately mapped COW pages and other cleanups
++ * (C) Copyright 2003, 2004 Jamie Lokier
++ *
++ * Robust futex support started by Ingo Molnar
++ * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
++ * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
++ *
++ * PI-futex support started by Ingo Molnar and Thomas Gleixner
++ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
++ * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
++ *
++ * PRIVATE futexes by Eric Dumazet
++ * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
++ *
++ * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
++ * Copyright (C) IBM Corporation, 2009
++ * Thanks to Thomas Gleixner for conceptual design and careful reviews.
++ *
++ * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
++ * enough at me, Linus for the original (flawed) idea, Matthew
++ * Kirkwood for proof-of-concept implementation.
++ *
++ * "The futexes are also cursed."
++ * "But they come in a choice of three flavours!"
++ */
++#include <linux/compat.h>
++#include <linux/jhash.h>
++#include <linux/pagemap.h>
++#include <linux/syscalls.h>
++#include <linux/freezer.h>
++#include <linux/memblock.h>
++#include <linux/fault-inject.h>
++#include <linux/time_namespace.h>
++
++#include <asm/futex.h>
++
++#include "../locking/rtmutex_common.h"
++
++/*
++ * READ this before attempting to hack on futexes!
++ *
++ * Basic futex operation and ordering guarantees
++ * =============================================
++ *
++ * The waiter reads the futex value in user space and calls
++ * futex_wait(). This function computes the hash bucket and acquires
++ * the hash bucket lock. After that it reads the futex user space value
++ * again and verifies that the data has not changed. If it has not changed
++ * it enqueues itself into the hash bucket, releases the hash bucket lock
++ * and schedules.
++ *
++ * The waker side modifies the user space value of the futex and calls
++ * futex_wake(). This function computes the hash bucket and acquires the
++ * hash bucket lock. Then it looks for waiters on that futex in the hash
++ * bucket and wakes them.
++ *
++ * In futex wake up scenarios where no tasks are blocked on a futex, taking
++ * the hb spinlock can be avoided and simply return. In order for this
++ * optimization to work, ordering guarantees must exist so that the waiter
++ * being added to the list is acknowledged when the list is concurrently being
++ * checked by the waker, avoiding scenarios like the following:
++ *
++ * CPU 0 CPU 1
++ * val = *futex;
++ * sys_futex(WAIT, futex, val);
++ * futex_wait(futex, val);
++ * uval = *futex;
++ * *futex = newval;
++ * sys_futex(WAKE, futex);
++ * futex_wake(futex);
++ * if (queue_empty())
++ * return;
++ * if (uval == val)
++ * lock(hash_bucket(futex));
++ * queue();
++ * unlock(hash_bucket(futex));
++ * schedule();
++ *
++ * This would cause the waiter on CPU 0 to wait forever because it
++ * missed the transition of the user space value from val to newval
++ * and the waker did not find the waiter in the hash bucket queue.
++ *
++ * The correct serialization ensures that a waiter either observes
++ * the changed user space value before blocking or is woken by a
++ * concurrent waker:
++ *
++ * CPU 0 CPU 1
++ * val = *futex;
++ * sys_futex(WAIT, futex, val);
++ * futex_wait(futex, val);
++ *
++ * waiters++; (a)
++ * smp_mb(); (A) <-- paired with -.
++ * |
++ * lock(hash_bucket(futex)); |
++ * |
++ * uval = *futex; |
++ * | *futex = newval;
++ * | sys_futex(WAKE, futex);
++ * | futex_wake(futex);
++ * |
++ * `--------> smp_mb(); (B)
++ * if (uval == val)
++ * queue();
++ * unlock(hash_bucket(futex));
++ * schedule(); if (waiters)
++ * lock(hash_bucket(futex));
++ * else wake_waiters(futex);
++ * waiters--; (b) unlock(hash_bucket(futex));
++ *
++ * Where (A) orders the waiters increment and the futex value read through
++ * atomic operations (see hb_waiters_inc) and where (B) orders the write
++ * to futex and the waiters read (see hb_waiters_pending()).
++ *
++ * This yields the following case (where X:=waiters, Y:=futex):
++ *
++ * X = Y = 0
++ *
++ * w[X]=1 w[Y]=1
++ * MB MB
++ * r[Y]=y r[X]=x
++ *
++ * Which guarantees that x==0 && y==0 is impossible; which translates back into
++ * the guarantee that we cannot both miss the futex variable change and the
++ * enqueue.
++ *
++ * Note that a new waiter is accounted for in (a) even when it is possible that
++ * the wait call can return error, in which case we backtrack from it in (b).
++ * Refer to the comment in queue_lock().
++ *
++ * Similarly, in order to account for waiters being requeued on another
++ * address we always increment the waiters for the destination bucket before
++ * acquiring the lock. It then decrements them again after releasing it -
++ * the code that actually moves the futex(es) between hash buckets (requeue_futex)
++ * will do the additional required waiter count housekeeping. This is done for
++ * double_lock_hb() and double_unlock_hb(), respectively.
++ */
++
++#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
++#define futex_cmpxchg_enabled 1
++#else
++static int __read_mostly futex_cmpxchg_enabled;
++#endif
++
++/*
++ * Futex flags used to encode options to functions and preserve them across
++ * restarts.
++ */
++#ifdef CONFIG_MMU
++# define FLAGS_SHARED 0x01
++#else
++/*
++ * NOMMU does not have per process address space. Let the compiler optimize
++ * code away.
++ */
++# define FLAGS_SHARED 0x00
++#endif
++#define FLAGS_CLOCKRT 0x02
++#define FLAGS_HAS_TIMEOUT 0x04
++
++/*
++ * Priority Inheritance state:
++ */
++struct futex_pi_state {
++ /*
++ * list of 'owned' pi_state instances - these have to be
++ * cleaned up in do_exit() if the task exits prematurely:
++ */
++ struct list_head list;
++
++ /*
++ * The PI object:
++ */
++ struct rt_mutex_base pi_mutex;
++
++ struct task_struct *owner;
++ refcount_t refcount;
++
++ union futex_key key;
++} __randomize_layout;
++
++/**
++ * struct futex_q - The hashed futex queue entry, one per waiting task
++ * @list: priority-sorted list of tasks waiting on this futex
++ * @task: the task waiting on the futex
++ * @lock_ptr: the hash bucket lock
++ * @key: the key the futex is hashed on
++ * @pi_state: optional priority inheritance state
++ * @rt_waiter: rt_waiter storage for use with requeue_pi
++ * @requeue_pi_key: the requeue_pi target futex key
++ * @bitset: bitset for the optional bitmasked wakeup
++ * @requeue_state: State field for futex_requeue_pi()
++ * @requeue_wait: RCU wait for futex_requeue_pi() (RT only)
++ *
++ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
++ * we can wake only the relevant ones (hashed queues may be shared).
++ *
++ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
++ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
++ * The order of wakeup is always to make the first condition true, then
++ * the second.
++ *
++ * PI futexes are typically woken before they are removed from the hash list via
++ * the rt_mutex code. See unqueue_me_pi().
++ */
++struct futex_q {
++ struct plist_node list;
++
++ struct task_struct *task;
++ spinlock_t *lock_ptr;
++ union futex_key key;
++ struct futex_pi_state *pi_state;
++ struct rt_mutex_waiter *rt_waiter;
++ union futex_key *requeue_pi_key;
++ u32 bitset;
++ atomic_t requeue_state;
++#ifdef CONFIG_PREEMPT_RT
++ struct rcuwait requeue_wait;
++#endif
++} __randomize_layout;
++
++/*
++ * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an
++ * underlying rtmutex. The task which is about to be requeued could have
++ * just woken up (timeout, signal). After the wake up the task has to
++ * acquire hash bucket lock, which is held by the requeue code. As a task
++ * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking
++ * and the hash bucket lock blocking would collide and corrupt state.
++ *
++ * On !PREEMPT_RT this is not a problem and everything could be serialized
++ * on hash bucket lock, but aside of having the benefit of common code,
++ * this allows to avoid doing the requeue when the task is already on the
++ * way out and taking the hash bucket lock of the original uaddr1 when the
++ * requeue has been completed.
++ *
++ * The following state transitions are valid:
++ *
++ * On the waiter side:
++ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE
++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT
++ *
++ * On the requeue side:
++ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS
++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED
++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed)
++ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED
++ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed)
++ *
++ * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this
++ * signals that the waiter is already on the way out. It also means that
++ * the waiter is still on the 'wait' futex, i.e. uaddr1.
++ *
++ * The waiter side signals early wakeup to the requeue side either through
++ * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending
++ * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately
++ * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT,
++ * which means the wakeup is interleaving with a requeue in progress it has
++ * to wait for the requeue side to change the state. Either to DONE/LOCKED
++ * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex
++ * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by
++ * the requeue side when the requeue attempt failed via deadlock detection
++ * and therefore the waiter q is still on the uaddr1 futex.
++ */
++enum {
++ Q_REQUEUE_PI_NONE = 0,
++ Q_REQUEUE_PI_IGNORE,
++ Q_REQUEUE_PI_IN_PROGRESS,
++ Q_REQUEUE_PI_WAIT,
++ Q_REQUEUE_PI_DONE,
++ Q_REQUEUE_PI_LOCKED,
++};
++
++static const struct futex_q futex_q_init = {
++ /* list gets initialized in queue_me()*/
++ .key = FUTEX_KEY_INIT,
++ .bitset = FUTEX_BITSET_MATCH_ANY,
++ .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
++};
++
++/*
++ * Hash buckets are shared by all the futex_keys that hash to the same
++ * location. Each key may have multiple futex_q structures, one for each task
++ * waiting on a futex.
++ */
++struct futex_hash_bucket {
++ atomic_t waiters;
++ spinlock_t lock;
++ struct plist_head chain;
++} ____cacheline_aligned_in_smp;
++
++/*
++ * The base of the bucket array and its size are always used together
++ * (after initialization only in hash_futex()), so ensure that they
++ * reside in the same cacheline.
++ */
++static struct {
++ struct futex_hash_bucket *queues;
++ unsigned long hashsize;
++} __futex_data __read_mostly __aligned(2*sizeof(long));
++#define futex_queues (__futex_data.queues)
++#define futex_hashsize (__futex_data.hashsize)
++
++
++/*
++ * Fault injections for futexes.
++ */
++#ifdef CONFIG_FAIL_FUTEX
++
++static struct {
++ struct fault_attr attr;
++
++ bool ignore_private;
++} fail_futex = {
++ .attr = FAULT_ATTR_INITIALIZER,
++ .ignore_private = false,
++};
++
++static int __init setup_fail_futex(char *str)
++{
++ return setup_fault_attr(&fail_futex.attr, str);
++}
++__setup("fail_futex=", setup_fail_futex);
++
++static bool should_fail_futex(bool fshared)
++{
++ if (fail_futex.ignore_private && !fshared)
++ return false;
++
++ return should_fail(&fail_futex.attr, 1);
++}
++
++#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
++
++static int __init fail_futex_debugfs(void)
++{
++ umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
++ struct dentry *dir;
++
++ dir = fault_create_debugfs_attr("fail_futex", NULL,
++ &fail_futex.attr);
++ if (IS_ERR(dir))
++ return PTR_ERR(dir);
++
++ debugfs_create_bool("ignore-private", mode, dir,
++ &fail_futex.ignore_private);
++ return 0;
++}
++
++late_initcall(fail_futex_debugfs);
++
++#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
++
++#else
++static inline bool should_fail_futex(bool fshared)
++{
++ return false;
++}
++#endif /* CONFIG_FAIL_FUTEX */
++
++#ifdef CONFIG_COMPAT
++static void compat_exit_robust_list(struct task_struct *curr);
++#endif
++
++/*
++ * Reflects a new waiter being added to the waitqueue.
++ */
++static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
++{
++#ifdef CONFIG_SMP
++ atomic_inc(&hb->waiters);
++ /*
++ * Full barrier (A), see the ordering comment above.
++ */
++ smp_mb__after_atomic();
++#endif
++}
++
++/*
++ * Reflects a waiter being removed from the waitqueue by wakeup
++ * paths.
++ */
++static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
++{
++#ifdef CONFIG_SMP
++ atomic_dec(&hb->waiters);
++#endif
++}
++
++static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
++{
++#ifdef CONFIG_SMP
++ /*
++ * Full barrier (B), see the ordering comment above.
++ */
++ smp_mb();
++ return atomic_read(&hb->waiters);
++#else
++ return 1;
++#endif
++}
++
++/**
++ * hash_futex - Return the hash bucket in the global hash
++ * @key: Pointer to the futex key for which the hash is calculated
++ *
++ * We hash on the keys returned from get_futex_key (see below) and return the
++ * corresponding hash bucket in the global hash.
++ */
++static struct futex_hash_bucket *hash_futex(union futex_key *key)
++{
++ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
++ key->both.offset);
++
++ return &futex_queues[hash & (futex_hashsize - 1)];
++}
++
++
++/**
++ * match_futex - Check whether two futex keys are equal
++ * @key1: Pointer to key1
++ * @key2: Pointer to key2
++ *
++ * Return 1 if two futex_keys are equal, 0 otherwise.
++ */
++static inline int match_futex(union futex_key *key1, union futex_key *key2)
++{
++ return (key1 && key2
++ && key1->both.word == key2->both.word
++ && key1->both.ptr == key2->both.ptr
++ && key1->both.offset == key2->both.offset);
++}
++
++enum futex_access {
++ FUTEX_READ,
++ FUTEX_WRITE
++};
++
++/**
++ * futex_setup_timer - set up the sleeping hrtimer.
++ * @time: ptr to the given timeout value
++ * @timeout: the hrtimer_sleeper structure to be set up
++ * @flags: futex flags
++ * @range_ns: optional range in ns
++ *
++ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
++ * value given
++ */
++static inline struct hrtimer_sleeper *
++futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
++ int flags, u64 range_ns)
++{
++ if (!time)
++ return NULL;
++
++ hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
++ CLOCK_REALTIME : CLOCK_MONOTONIC,
++ HRTIMER_MODE_ABS);
++ /*
++ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
++ * effectively the same as calling hrtimer_set_expires().
++ */
++ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
++
++ return timeout;
++}
++
++/*
++ * Generate a machine wide unique identifier for this inode.
++ *
++ * This relies on u64 not wrapping in the life-time of the machine; which with
++ * 1ns resolution means almost 585 years.
++ *
++ * This further relies on the fact that a well formed program will not unmap
++ * the file while it has a (shared) futex waiting on it. This mapping will have
++ * a file reference which pins the mount and inode.
++ *
++ * If for some reason an inode gets evicted and read back in again, it will get
++ * a new sequence number and will _NOT_ match, even though it is the exact same
++ * file.
++ *
++ * It is important that match_futex() will never have a false-positive, esp.
++ * for PI futexes that can mess up the state. The above argues that false-negatives
++ * are only possible for malformed programs.
++ */
++static u64 get_inode_sequence_number(struct inode *inode)
++{
++ static atomic64_t i_seq;
++ u64 old;
++
++ /* Does the inode already have a sequence number? */
++ old = atomic64_read(&inode->i_sequence);
++ if (likely(old))
++ return old;
++
++ for (;;) {
++ u64 new = atomic64_add_return(1, &i_seq);
++ if (WARN_ON_ONCE(!new))
++ continue;
++
++ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
++ if (old)
++ return old;
++ return new;
++ }
++}
++
++/**
++ * get_futex_key() - Get parameters which are the keys for a futex
++ * @uaddr: virtual address of the futex
++ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
++ * @key: address where result is stored.
++ * @rw: mapping needs to be read/write (values: FUTEX_READ,
++ * FUTEX_WRITE)
++ *
++ * Return: a negative error code or 0
++ *
++ * The key words are stored in @key on success.
++ *
++ * For shared mappings (when @fshared), the key is:
++ *
++ * ( inode->i_sequence, page->index, offset_within_page )
++ *
++ * [ also see get_inode_sequence_number() ]
++ *
++ * For private mappings (or when !@fshared), the key is:
++ *
++ * ( current->mm, address, 0 )
++ *
++ * This allows (cross process, where applicable) identification of the futex
++ * without keeping the page pinned for the duration of the FUTEX_WAIT.
++ *
++ * lock_page() might sleep, the caller should not hold a spinlock.
++ */
++static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
++ enum futex_access rw)
++{
++ unsigned long address = (unsigned long)uaddr;
++ struct mm_struct *mm = current->mm;
++ struct page *page, *tail;
++ struct address_space *mapping;
++ int err, ro = 0;
++
++ /*
++ * The futex address must be "naturally" aligned.
++ */
++ key->both.offset = address % PAGE_SIZE;
++ if (unlikely((address % sizeof(u32)) != 0))
++ return -EINVAL;
++ address -= key->both.offset;
++
++ if (unlikely(!access_ok(uaddr, sizeof(u32))))
++ return -EFAULT;
++
++ if (unlikely(should_fail_futex(fshared)))
++ return -EFAULT;
++
++ /*
++ * PROCESS_PRIVATE futexes are fast.
++ * As the mm cannot disappear under us and the 'key' only needs
++ * virtual address, we dont even have to find the underlying vma.
++ * Note : We do have to check 'uaddr' is a valid user address,
++ * but access_ok() should be faster than find_vma()
++ */
++ if (!fshared) {
++ key->private.mm = mm;
++ key->private.address = address;
++ return 0;
++ }
++
++again:
++ /* Ignore any VERIFY_READ mapping (futex common case) */
++ if (unlikely(should_fail_futex(true)))
++ return -EFAULT;
++
++ err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
++ /*
++ * If write access is not required (eg. FUTEX_WAIT), try
++ * and get read-only access.
++ */
++ if (err == -EFAULT && rw == FUTEX_READ) {
++ err = get_user_pages_fast(address, 1, 0, &page);
++ ro = 1;
++ }
++ if (err < 0)
++ return err;
++ else
++ err = 0;
++
++ /*
++ * The treatment of mapping from this point on is critical. The page
++ * lock protects many things but in this context the page lock
++ * stabilizes mapping, prevents inode freeing in the shared
++ * file-backed region case and guards against movement to swap cache.
++ *
++ * Strictly speaking the page lock is not needed in all cases being
++ * considered here and page lock forces unnecessarily serialization
++ * From this point on, mapping will be re-verified if necessary and
++ * page lock will be acquired only if it is unavoidable
++ *
++ * Mapping checks require the head page for any compound page so the
++ * head page and mapping is looked up now. For anonymous pages, it
++ * does not matter if the page splits in the future as the key is
++ * based on the address. For filesystem-backed pages, the tail is
++ * required as the index of the page determines the key. For
++ * base pages, there is no tail page and tail == page.
++ */
++ tail = page;
++ page = compound_head(page);
++ mapping = READ_ONCE(page->mapping);
++
++ /*
++ * If page->mapping is NULL, then it cannot be a PageAnon
++ * page; but it might be the ZERO_PAGE or in the gate area or
++ * in a special mapping (all cases which we are happy to fail);
++ * or it may have been a good file page when get_user_pages_fast
++ * found it, but truncated or holepunched or subjected to
++ * invalidate_complete_page2 before we got the page lock (also
++ * cases which we are happy to fail). And we hold a reference,
++ * so refcount care in invalidate_complete_page's remove_mapping
++ * prevents drop_caches from setting mapping to NULL beneath us.
++ *
++ * The case we do have to guard against is when memory pressure made
++ * shmem_writepage move it from filecache to swapcache beneath us:
++ * an unlikely race, but we do need to retry for page->mapping.
++ */
++ if (unlikely(!mapping)) {
++ int shmem_swizzled;
++
++ /*
++ * Page lock is required to identify which special case above
++ * applies. If this is really a shmem page then the page lock
++ * will prevent unexpected transitions.
++ */
++ lock_page(page);
++ shmem_swizzled = PageSwapCache(page) || page->mapping;
++ unlock_page(page);
++ put_page(page);
++
++ if (shmem_swizzled)
++ goto again;
++
++ return -EFAULT;
++ }
++
++ /*
++ * Private mappings are handled in a simple way.
++ *
++ * If the futex key is stored on an anonymous page, then the associated
++ * object is the mm which is implicitly pinned by the calling process.
++ *
++ * NOTE: When userspace waits on a MAP_SHARED mapping, even if
++ * it's a read-only handle, it's expected that futexes attach to
++ * the object not the particular process.
++ */
++ if (PageAnon(page)) {
++ /*
++ * A RO anonymous page will never change and thus doesn't make
++ * sense for futex operations.
++ */
++ if (unlikely(should_fail_futex(true)) || ro) {
++ err = -EFAULT;
++ goto out;
++ }
++
++ key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
++ key->private.mm = mm;
++ key->private.address = address;
++
++ } else {
++ struct inode *inode;
++
++ /*
++ * The associated futex object in this case is the inode and
++ * the page->mapping must be traversed. Ordinarily this should
++ * be stabilised under page lock but it's not strictly
++ * necessary in this case as we just want to pin the inode, not
++ * update the radix tree or anything like that.
++ *
++ * The RCU read lock is taken as the inode is finally freed
++ * under RCU. If the mapping still matches expectations then the
++ * mapping->host can be safely accessed as being a valid inode.
++ */
++ rcu_read_lock();
++
++ if (READ_ONCE(page->mapping) != mapping) {
++ rcu_read_unlock();
++ put_page(page);
++
++ goto again;
++ }
++
++ inode = READ_ONCE(mapping->host);
++ if (!inode) {
++ rcu_read_unlock();
++ put_page(page);
++
++ goto again;
++ }
++
++ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
++ key->shared.i_seq = get_inode_sequence_number(inode);
++ key->shared.pgoff = page_to_pgoff(tail);
++ rcu_read_unlock();
++ }
++
++out:
++ put_page(page);
++ return err;
++}
++
++/**
++ * fault_in_user_writeable() - Fault in user address and verify RW access
++ * @uaddr: pointer to faulting user space address
++ *
++ * Slow path to fixup the fault we just took in the atomic write
++ * access to @uaddr.
++ *
++ * We have no generic implementation of a non-destructive write to the
++ * user address. We know that we faulted in the atomic pagefault
++ * disabled section so we can as well avoid the #PF overhead by
++ * calling get_user_pages() right away.
++ */
++static int fault_in_user_writeable(u32 __user *uaddr)
++{
++ struct mm_struct *mm = current->mm;
++ int ret;
++
++ mmap_read_lock(mm);
++ ret = fixup_user_fault(mm, (unsigned long)uaddr,
++ FAULT_FLAG_WRITE, NULL);
++ mmap_read_unlock(mm);
++
++ return ret < 0 ? ret : 0;
++}
++
++/**
++ * futex_top_waiter() - Return the highest priority waiter on a futex
++ * @hb: the hash bucket the futex_q's reside in
++ * @key: the futex key (to distinguish it from other futex futex_q's)
++ *
++ * Must be called with the hb lock held.
++ */
++static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
++ union futex_key *key)
++{
++ struct futex_q *this;
++
++ plist_for_each_entry(this, &hb->chain, list) {
++ if (match_futex(&this->key, key))
++ return this;
++ }
++ return NULL;
++}
++
++static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
++ u32 uval, u32 newval)
++{
++ int ret;
++
++ pagefault_disable();
++ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
++ pagefault_enable();
++
++ return ret;
++}
++
++static int get_futex_value_locked(u32 *dest, u32 __user *from)
++{
++ int ret;
++
++ pagefault_disable();
++ ret = __get_user(*dest, from);
++ pagefault_enable();
++
++ return ret ? -EFAULT : 0;
++}
++
++
++/*
++ * PI code:
++ */
++static int refill_pi_state_cache(void)
++{
++ struct futex_pi_state *pi_state;
++
++ if (likely(current->pi_state_cache))
++ return 0;
++
++ pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
++
++ if (!pi_state)
++ return -ENOMEM;
++
++ INIT_LIST_HEAD(&pi_state->list);
++ /* pi_mutex gets initialized later */
++ pi_state->owner = NULL;
++ refcount_set(&pi_state->refcount, 1);
++ pi_state->key = FUTEX_KEY_INIT;
++
++ current->pi_state_cache = pi_state;
++
++ return 0;
++}
++
++static struct futex_pi_state *alloc_pi_state(void)
++{
++ struct futex_pi_state *pi_state = current->pi_state_cache;
++
++ WARN_ON(!pi_state);
++ current->pi_state_cache = NULL;
++
++ return pi_state;
++}
++
++static void pi_state_update_owner(struct futex_pi_state *pi_state,
++ struct task_struct *new_owner)
++{
++ struct task_struct *old_owner = pi_state->owner;
++
++ lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
++
++ if (old_owner) {
++ raw_spin_lock(&old_owner->pi_lock);
++ WARN_ON(list_empty(&pi_state->list));
++ list_del_init(&pi_state->list);
++ raw_spin_unlock(&old_owner->pi_lock);
++ }
++
++ if (new_owner) {
++ raw_spin_lock(&new_owner->pi_lock);
++ WARN_ON(!list_empty(&pi_state->list));
++ list_add(&pi_state->list, &new_owner->pi_state_list);
++ pi_state->owner = new_owner;
++ raw_spin_unlock(&new_owner->pi_lock);
++ }
++}
++
++static void get_pi_state(struct futex_pi_state *pi_state)
++{
++ WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
++}
++
++/*
++ * Drops a reference to the pi_state object and frees or caches it
++ * when the last reference is gone.
++ */
++static void put_pi_state(struct futex_pi_state *pi_state)
++{
++ if (!pi_state)
++ return;
++
++ if (!refcount_dec_and_test(&pi_state->refcount))
++ return;
++
++ /*
++ * If pi_state->owner is NULL, the owner is most probably dying
++ * and has cleaned up the pi_state already
++ */
++ if (pi_state->owner) {
++ unsigned long flags;
++
++ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
++ pi_state_update_owner(pi_state, NULL);
++ rt_mutex_proxy_unlock(&pi_state->pi_mutex);
++ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
++ }
++
++ if (current->pi_state_cache) {
++ kfree(pi_state);
++ } else {
++ /*
++ * pi_state->list is already empty.
++ * clear pi_state->owner.
++ * refcount is at 0 - put it back to 1.
++ */
++ pi_state->owner = NULL;
++ refcount_set(&pi_state->refcount, 1);
++ current->pi_state_cache = pi_state;
++ }
++}
++
++#ifdef CONFIG_FUTEX_PI
++
++/*
++ * This task is holding PI mutexes at exit time => bad.
++ * Kernel cleans up PI-state, but userspace is likely hosed.
++ * (Robust-futex cleanup is separate and might save the day for userspace.)
++ */
++static void exit_pi_state_list(struct task_struct *curr)
++{
++ struct list_head *next, *head = &curr->pi_state_list;
++ struct futex_pi_state *pi_state;
++ struct futex_hash_bucket *hb;
++ union futex_key key = FUTEX_KEY_INIT;
++
++ if (!futex_cmpxchg_enabled)
++ return;
++ /*
++ * We are a ZOMBIE and nobody can enqueue itself on
++ * pi_state_list anymore, but we have to be careful
++ * versus waiters unqueueing themselves:
++ */
++ raw_spin_lock_irq(&curr->pi_lock);
++ while (!list_empty(head)) {
++ next = head->next;
++ pi_state = list_entry(next, struct futex_pi_state, list);
++ key = pi_state->key;
++ hb = hash_futex(&key);
++
++ /*
++ * We can race against put_pi_state() removing itself from the
++ * list (a waiter going away). put_pi_state() will first
++ * decrement the reference count and then modify the list, so
++ * its possible to see the list entry but fail this reference
++ * acquire.
++ *
++ * In that case; drop the locks to let put_pi_state() make
++ * progress and retry the loop.
++ */
++ if (!refcount_inc_not_zero(&pi_state->refcount)) {
++ raw_spin_unlock_irq(&curr->pi_lock);
++ cpu_relax();
++ raw_spin_lock_irq(&curr->pi_lock);
++ continue;
++ }
++ raw_spin_unlock_irq(&curr->pi_lock);
++
++ spin_lock(&hb->lock);
++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++ raw_spin_lock(&curr->pi_lock);
++ /*
++ * We dropped the pi-lock, so re-check whether this
++ * task still owns the PI-state:
++ */
++ if (head->next != next) {
++ /* retain curr->pi_lock for the loop invariant */
++ raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
++ spin_unlock(&hb->lock);
++ put_pi_state(pi_state);
++ continue;
++ }
++
++ WARN_ON(pi_state->owner != curr);
++ WARN_ON(list_empty(&pi_state->list));
++ list_del_init(&pi_state->list);
++ pi_state->owner = NULL;
++
++ raw_spin_unlock(&curr->pi_lock);
++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++ spin_unlock(&hb->lock);
++
++ rt_mutex_futex_unlock(&pi_state->pi_mutex);
++ put_pi_state(pi_state);
++
++ raw_spin_lock_irq(&curr->pi_lock);
++ }
++ raw_spin_unlock_irq(&curr->pi_lock);
++}
++#else
++static inline void exit_pi_state_list(struct task_struct *curr) { }
++#endif
++
++/*
++ * We need to check the following states:
++ *
++ * Waiter | pi_state | pi->owner | uTID | uODIED | ?
++ *
++ * [1] NULL | --- | --- | 0 | 0/1 | Valid
++ * [2] NULL | --- | --- | >0 | 0/1 | Valid
++ *
++ * [3] Found | NULL | -- | Any | 0/1 | Invalid
++ *
++ * [4] Found | Found | NULL | 0 | 1 | Valid
++ * [5] Found | Found | NULL | >0 | 1 | Invalid
++ *
++ * [6] Found | Found | task | 0 | 1 | Valid
++ *
++ * [7] Found | Found | NULL | Any | 0 | Invalid
++ *
++ * [8] Found | Found | task | ==taskTID | 0/1 | Valid
++ * [9] Found | Found | task | 0 | 0 | Invalid
++ * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
++ *
++ * [1] Indicates that the kernel can acquire the futex atomically. We
++ * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
++ *
++ * [2] Valid, if TID does not belong to a kernel thread. If no matching
++ * thread is found then it indicates that the owner TID has died.
++ *
++ * [3] Invalid. The waiter is queued on a non PI futex
++ *
++ * [4] Valid state after exit_robust_list(), which sets the user space
++ * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
++ *
++ * [5] The user space value got manipulated between exit_robust_list()
++ * and exit_pi_state_list()
++ *
++ * [6] Valid state after exit_pi_state_list() which sets the new owner in
++ * the pi_state but cannot access the user space value.
++ *
++ * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
++ *
++ * [8] Owner and user space value match
++ *
++ * [9] There is no transient state which sets the user space TID to 0
++ * except exit_robust_list(), but this is indicated by the
++ * FUTEX_OWNER_DIED bit. See [4]
++ *
++ * [10] There is no transient state which leaves owner and user space
++ * TID out of sync. Except one error case where the kernel is denied
++ * write access to the user address, see fixup_pi_state_owner().
++ *
++ *
++ * Serialization and lifetime rules:
++ *
++ * hb->lock:
++ *
++ * hb -> futex_q, relation
++ * futex_q -> pi_state, relation
++ *
++ * (cannot be raw because hb can contain arbitrary amount
++ * of futex_q's)
++ *
++ * pi_mutex->wait_lock:
++ *
++ * {uval, pi_state}
++ *
++ * (and pi_mutex 'obviously')
++ *
++ * p->pi_lock:
++ *
++ * p->pi_state_list -> pi_state->list, relation
++ * pi_mutex->owner -> pi_state->owner, relation
++ *
++ * pi_state->refcount:
++ *
++ * pi_state lifetime
++ *
++ *
++ * Lock order:
++ *
++ * hb->lock
++ * pi_mutex->wait_lock
++ * p->pi_lock
++ *
++ */
++
++/*
++ * Validate that the existing waiter has a pi_state and sanity check
++ * the pi_state against the user space value. If correct, attach to
++ * it.
++ */
++static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
++ struct futex_pi_state *pi_state,
++ struct futex_pi_state **ps)
++{
++ pid_t pid = uval & FUTEX_TID_MASK;
++ u32 uval2;
++ int ret;
++
++ /*
++ * Userspace might have messed up non-PI and PI futexes [3]
++ */
++ if (unlikely(!pi_state))
++ return -EINVAL;
++
++ /*
++ * We get here with hb->lock held, and having found a
++ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
++ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
++ * which in turn means that futex_lock_pi() still has a reference on
++ * our pi_state.
++ *
++ * The waiter holding a reference on @pi_state also protects against
++ * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
++ * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
++ * free pi_state before we can take a reference ourselves.
++ */
++ WARN_ON(!refcount_read(&pi_state->refcount));
++
++ /*
++ * Now that we have a pi_state, we can acquire wait_lock
++ * and do the state validation.
++ */
++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++
++ /*
++ * Since {uval, pi_state} is serialized by wait_lock, and our current
++ * uval was read without holding it, it can have changed. Verify it
++ * still is what we expect it to be, otherwise retry the entire
++ * operation.
++ */
++ if (get_futex_value_locked(&uval2, uaddr))
++ goto out_efault;
++
++ if (uval != uval2)
++ goto out_eagain;
++
++ /*
++ * Handle the owner died case:
++ */
++ if (uval & FUTEX_OWNER_DIED) {
++ /*
++ * exit_pi_state_list sets owner to NULL and wakes the
++ * topmost waiter. The task which acquires the
++ * pi_state->rt_mutex will fixup owner.
++ */
++ if (!pi_state->owner) {
++ /*
++ * No pi state owner, but the user space TID
++ * is not 0. Inconsistent state. [5]
++ */
++ if (pid)
++ goto out_einval;
++ /*
++ * Take a ref on the state and return success. [4]
++ */
++ goto out_attach;
++ }
++
++ /*
++ * If TID is 0, then either the dying owner has not
++ * yet executed exit_pi_state_list() or some waiter
++ * acquired the rtmutex in the pi state, but did not
++ * yet fixup the TID in user space.
++ *
++ * Take a ref on the state and return success. [6]
++ */
++ if (!pid)
++ goto out_attach;
++ } else {
++ /*
++ * If the owner died bit is not set, then the pi_state
++ * must have an owner. [7]
++ */
++ if (!pi_state->owner)
++ goto out_einval;
++ }
++
++ /*
++ * Bail out if user space manipulated the futex value. If pi
++ * state exists then the owner TID must be the same as the
++ * user space TID. [9/10]
++ */
++ if (pid != task_pid_vnr(pi_state->owner))
++ goto out_einval;
++
++out_attach:
++ get_pi_state(pi_state);
++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++ *ps = pi_state;
++ return 0;
++
++out_einval:
++ ret = -EINVAL;
++ goto out_error;
++
++out_eagain:
++ ret = -EAGAIN;
++ goto out_error;
++
++out_efault:
++ ret = -EFAULT;
++ goto out_error;
++
++out_error:
++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++ return ret;
++}
++
++/**
++ * wait_for_owner_exiting - Block until the owner has exited
++ * @ret: owner's current futex lock status
++ * @exiting: Pointer to the exiting task
++ *
++ * Caller must hold a refcount on @exiting.
++ */
++static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
++{
++ if (ret != -EBUSY) {
++ WARN_ON_ONCE(exiting);
++ return;
++ }
++
++ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
++ return;
++
++ mutex_lock(&exiting->futex_exit_mutex);
++ /*
++ * No point in doing state checking here. If the waiter got here
++ * while the task was in exec()->exec_futex_release() then it can
++ * have any FUTEX_STATE_* value when the waiter has acquired the
++ * mutex. OK, if running, EXITING or DEAD if it reached exit()
++ * already. Highly unlikely and not a problem. Just one more round
++ * through the futex maze.
++ */
++ mutex_unlock(&exiting->futex_exit_mutex);
++
++ put_task_struct(exiting);
++}
++
++static int handle_exit_race(u32 __user *uaddr, u32 uval,
++ struct task_struct *tsk)
++{
++ u32 uval2;
++
++ /*
++ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
++ * caller that the alleged owner is busy.
++ */
++ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
++ return -EBUSY;
++
++ /*
++ * Reread the user space value to handle the following situation:
++ *
++ * CPU0 CPU1
++ *
++ * sys_exit() sys_futex()
++ * do_exit() futex_lock_pi()
++ * futex_lock_pi_atomic()
++ * exit_signals(tsk) No waiters:
++ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
++ * mm_release(tsk) Set waiter bit
++ * exit_robust_list(tsk) { *uaddr = 0x80000PID;
++ * Set owner died attach_to_pi_owner() {
++ * *uaddr = 0xC0000000; tsk = get_task(PID);
++ * } if (!tsk->flags & PF_EXITING) {
++ * ... attach();
++ * tsk->futex_state = } else {
++ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
++ * FUTEX_STATE_DEAD)
++ * return -EAGAIN;
++ * return -ESRCH; <--- FAIL
++ * }
++ *
++ * Returning ESRCH unconditionally is wrong here because the
++ * user space value has been changed by the exiting task.
++ *
++ * The same logic applies to the case where the exiting task is
++ * already gone.
++ */
++ if (get_futex_value_locked(&uval2, uaddr))
++ return -EFAULT;
++
++ /* If the user space value has changed, try again. */
++ if (uval2 != uval)
++ return -EAGAIN;
++
++ /*
++ * The exiting task did not have a robust list, the robust list was
++ * corrupted or the user space value in *uaddr is simply bogus.
++ * Give up and tell user space.
++ */
++ return -ESRCH;
++}
++
++static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
++ struct futex_pi_state **ps)
++{
++ /*
++ * No existing pi state. First waiter. [2]
++ *
++ * This creates pi_state, we have hb->lock held, this means nothing can
++ * observe this state, wait_lock is irrelevant.
++ */
++ struct futex_pi_state *pi_state = alloc_pi_state();
++
++ /*
++ * Initialize the pi_mutex in locked state and make @p
++ * the owner of it:
++ */
++ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
++
++ /* Store the key for possible exit cleanups: */
++ pi_state->key = *key;
++
++ WARN_ON(!list_empty(&pi_state->list));
++ list_add(&pi_state->list, &p->pi_state_list);
++ /*
++ * Assignment without holding pi_state->pi_mutex.wait_lock is safe
++ * because there is no concurrency as the object is not published yet.
++ */
++ pi_state->owner = p;
++
++ *ps = pi_state;
++}
++/*
++ * Lookup the task for the TID provided from user space and attach to
++ * it after doing proper sanity checks.
++ */
++static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
++ struct futex_pi_state **ps,
++ struct task_struct **exiting)
++{
++ pid_t pid = uval & FUTEX_TID_MASK;
++ struct task_struct *p;
++
++ /*
++ * We are the first waiter - try to look up the real owner and attach
++ * the new pi_state to it, but bail out when TID = 0 [1]
++ *
++ * The !pid check is paranoid. None of the call sites should end up
++ * with pid == 0, but better safe than sorry. Let the caller retry
++ */
++ if (!pid)
++ return -EAGAIN;
++ p = find_get_task_by_vpid(pid);
++ if (!p)
++ return handle_exit_race(uaddr, uval, NULL);
++
++ if (unlikely(p->flags & PF_KTHREAD)) {
++ put_task_struct(p);
++ return -EPERM;
++ }
++
++ /*
++ * We need to look at the task state to figure out, whether the
++ * task is exiting. To protect against the change of the task state
++ * in futex_exit_release(), we do this protected by p->pi_lock:
++ */
++ raw_spin_lock_irq(&p->pi_lock);
++ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
++ /*
++ * The task is on the way out. When the futex state is
++ * FUTEX_STATE_DEAD, we know that the task has finished
++ * the cleanup:
++ */
++ int ret = handle_exit_race(uaddr, uval, p);
++
++ raw_spin_unlock_irq(&p->pi_lock);
++ /*
++ * If the owner task is between FUTEX_STATE_EXITING and
++ * FUTEX_STATE_DEAD then store the task pointer and keep
++ * the reference on the task struct. The calling code will
++ * drop all locks, wait for the task to reach
++ * FUTEX_STATE_DEAD and then drop the refcount. This is
++ * required to prevent a live lock when the current task
++ * preempted the exiting task between the two states.
++ */
++ if (ret == -EBUSY)
++ *exiting = p;
++ else
++ put_task_struct(p);
++ return ret;
++ }
++
++ __attach_to_pi_owner(p, key, ps);
++ raw_spin_unlock_irq(&p->pi_lock);
++
++ put_task_struct(p);
++
++ return 0;
++}
++
++static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
++{
++ int err;
++ u32 curval;
++
++ if (unlikely(should_fail_futex(true)))
++ return -EFAULT;
++
++ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
++ if (unlikely(err))
++ return err;
++
++ /* If user space value changed, let the caller retry */
++ return curval != uval ? -EAGAIN : 0;
++}
++
++/**
++ * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
++ * @uaddr: the pi futex user address
++ * @hb: the pi futex hash bucket
++ * @key: the futex key associated with uaddr and hb
++ * @ps: the pi_state pointer where we store the result of the
++ * lookup
++ * @task: the task to perform the atomic lock work for. This will
++ * be "current" except in the case of requeue pi.
++ * @exiting: Pointer to store the task pointer of the owner task
++ * which is in the middle of exiting
++ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
++ *
++ * Return:
++ * - 0 - ready to wait;
++ * - 1 - acquired the lock;
++ * - <0 - error
++ *
++ * The hb->lock must be held by the caller.
++ *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
++ */
++static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
++ union futex_key *key,
++ struct futex_pi_state **ps,
++ struct task_struct *task,
++ struct task_struct **exiting,
++ int set_waiters)
++{
++ u32 uval, newval, vpid = task_pid_vnr(task);
++ struct futex_q *top_waiter;
++ int ret;
++
++ /*
++ * Read the user space value first so we can validate a few
++ * things before proceeding further.
++ */
++ if (get_futex_value_locked(&uval, uaddr))
++ return -EFAULT;
++
++ if (unlikely(should_fail_futex(true)))
++ return -EFAULT;
++
++ /*
++ * Detect deadlocks.
++ */
++ if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
++ return -EDEADLK;
++
++ if ((unlikely(should_fail_futex(true))))
++ return -EDEADLK;
++
++ /*
++ * Lookup existing state first. If it exists, try to attach to
++ * its pi_state.
++ */
++ top_waiter = futex_top_waiter(hb, key);
++ if (top_waiter)
++ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
++
++ /*
++ * No waiter and user TID is 0. We are here because the
++ * waiters or the owner died bit is set or called from
++ * requeue_cmp_pi or for whatever reason something took the
++ * syscall.
++ */
++ if (!(uval & FUTEX_TID_MASK)) {
++ /*
++ * We take over the futex. No other waiters and the user space
++ * TID is 0. We preserve the owner died bit.
++ */
++ newval = uval & FUTEX_OWNER_DIED;
++ newval |= vpid;
++
++ /* The futex requeue_pi code can enforce the waiters bit */
++ if (set_waiters)
++ newval |= FUTEX_WAITERS;
++
++ ret = lock_pi_update_atomic(uaddr, uval, newval);
++ if (ret)
++ return ret;
++
++ /*
++ * If the waiter bit was requested the caller also needs PI
++ * state attached to the new owner of the user space futex.
++ *
++ * @task is guaranteed to be alive and it cannot be exiting
++ * because it is either sleeping or waiting in
++ * futex_requeue_pi_wakeup_sync().
++ *
++ * No need to do the full attach_to_pi_owner() exercise
++ * because @task is known and valid.
++ */
++ if (set_waiters) {
++ raw_spin_lock_irq(&task->pi_lock);
++ __attach_to_pi_owner(task, key, ps);
++ raw_spin_unlock_irq(&task->pi_lock);
++ }
++ return 1;
++ }
++
++ /*
++ * First waiter. Set the waiters bit before attaching ourself to
++ * the owner. If owner tries to unlock, it will be forced into
++ * the kernel and blocked on hb->lock.
++ */
++ newval = uval | FUTEX_WAITERS;
++ ret = lock_pi_update_atomic(uaddr, uval, newval);
++ if (ret)
++ return ret;
++ /*
++ * If the update of the user space value succeeded, we try to
++ * attach to the owner. If that fails, no harm done, we only
++ * set the FUTEX_WAITERS bit in the user space variable.
++ */
++ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
++}
++
++/**
++ * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
++ * @q: The futex_q to unqueue
++ *
++ * The q->lock_ptr must not be NULL and must be held by the caller.
++ */
++static void __unqueue_futex(struct futex_q *q)
++{
++ struct futex_hash_bucket *hb;
++
++ if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
++ return;
++ lockdep_assert_held(q->lock_ptr);
++
++ hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
++ plist_del(&q->list, &hb->chain);
++ hb_waiters_dec(hb);
++}
++
++/*
++ * The hash bucket lock must be held when this is called.
++ * Afterwards, the futex_q must not be accessed. Callers
++ * must ensure to later call wake_up_q() for the actual
++ * wakeups to occur.
++ */
++static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
++{
++ struct task_struct *p = q->task;
++
++ if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
++ return;
++
++ get_task_struct(p);
++ __unqueue_futex(q);
++ /*
++ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
++ * is written, without taking any locks. This is possible in the event
++ * of a spurious wakeup, for example. A memory barrier is required here
++ * to prevent the following store to lock_ptr from getting ahead of the
++ * plist_del in __unqueue_futex().
++ */
++ smp_store_release(&q->lock_ptr, NULL);
++
++ /*
++ * Queue the task for later wakeup for after we've released
++ * the hb->lock.
++ */
++ wake_q_add_safe(wake_q, p);
++}
++
++/*
++ * Caller must hold a reference on @pi_state.
++ */
++static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
++{
++ struct rt_mutex_waiter *top_waiter;
++ struct task_struct *new_owner;
++ bool postunlock = false;
++ DEFINE_RT_WAKE_Q(wqh);
++ u32 curval, newval;
++ int ret = 0;
++
++ top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
++ if (WARN_ON_ONCE(!top_waiter)) {
++ /*
++ * As per the comment in futex_unlock_pi() this should not happen.
++ *
++ * When this happens, give up our locks and try again, giving
++ * the futex_lock_pi() instance time to complete, either by
++ * waiting on the rtmutex or removing itself from the futex
++ * queue.
++ */
++ ret = -EAGAIN;
++ goto out_unlock;
++ }
++
++ new_owner = top_waiter->task;
++
++ /*
++ * We pass it to the next owner. The WAITERS bit is always kept
++ * enabled while there is PI state around. We cleanup the owner
++ * died bit, because we are the owner.
++ */
++ newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
++
++ if (unlikely(should_fail_futex(true))) {
++ ret = -EFAULT;
++ goto out_unlock;
++ }
++
++ ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
++ if (!ret && (curval != uval)) {
++ /*
++ * If a unconditional UNLOCK_PI operation (user space did not
++ * try the TID->0 transition) raced with a waiter setting the
++ * FUTEX_WAITERS flag between get_user() and locking the hash
++ * bucket lock, retry the operation.
++ */
++ if ((FUTEX_TID_MASK & curval) == uval)
++ ret = -EAGAIN;
++ else
++ ret = -EINVAL;
++ }
++
++ if (!ret) {
++ /*
++ * This is a point of no return; once we modified the uval
++ * there is no going back and subsequent operations must
++ * not fail.
++ */
++ pi_state_update_owner(pi_state, new_owner);
++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
++ }
++
++out_unlock:
++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++
++ if (postunlock)
++ rt_mutex_postunlock(&wqh);
++
++ return ret;
++}
++
++/*
++ * Express the locking dependencies for lockdep:
++ */
++static inline void
++double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
++{
++ if (hb1 <= hb2) {
++ spin_lock(&hb1->lock);
++ if (hb1 < hb2)
++ spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
++ } else { /* hb1 > hb2 */
++ spin_lock(&hb2->lock);
++ spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
++ }
++}
++
++static inline void
++double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
++{
++ spin_unlock(&hb1->lock);
++ if (hb1 != hb2)
++ spin_unlock(&hb2->lock);
++}
++
++/*
++ * Wake up waiters matching bitset queued on this futex (uaddr).
++ */
++static int
++futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
++{
++ struct futex_hash_bucket *hb;
++ struct futex_q *this, *next;
++ union futex_key key = FUTEX_KEY_INIT;
++ int ret;
++ DEFINE_WAKE_Q(wake_q);
++
++ if (!bitset)
++ return -EINVAL;
++
++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
++ if (unlikely(ret != 0))
++ return ret;
++
++ hb = hash_futex(&key);
++
++ /* Make sure we really have tasks to wakeup */
++ if (!hb_waiters_pending(hb))
++ return ret;
++
++ spin_lock(&hb->lock);
++
++ plist_for_each_entry_safe(this, next, &hb->chain, list) {
++ if (match_futex (&this->key, &key)) {
++ if (this->pi_state || this->rt_waiter) {
++ ret = -EINVAL;
++ break;
++ }
++
++ /* Check if one of the bits is set in both bitsets */
++ if (!(this->bitset & bitset))
++ continue;
++
++ mark_wake_futex(&wake_q, this);
++ if (++ret >= nr_wake)
++ break;
++ }
++ }
++
++ spin_unlock(&hb->lock);
++ wake_up_q(&wake_q);
++ return ret;
++}
++
++static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
++{
++ unsigned int op = (encoded_op & 0x70000000) >> 28;
++ unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
++ int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
++ int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
++ int oldval, ret;
++
++ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
++ if (oparg < 0 || oparg > 31) {
++ char comm[sizeof(current->comm)];
++ /*
++ * kill this print and return -EINVAL when userspace
++ * is sane again
++ */
++ pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
++ get_task_comm(comm, current), oparg);
++ oparg &= 31;
++ }
++ oparg = 1 << oparg;
++ }
++
++ pagefault_disable();
++ ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
++ pagefault_enable();
++ if (ret)
++ return ret;
++
++ switch (cmp) {
++ case FUTEX_OP_CMP_EQ:
++ return oldval == cmparg;
++ case FUTEX_OP_CMP_NE:
++ return oldval != cmparg;
++ case FUTEX_OP_CMP_LT:
++ return oldval < cmparg;
++ case FUTEX_OP_CMP_GE:
++ return oldval >= cmparg;
++ case FUTEX_OP_CMP_LE:
++ return oldval <= cmparg;
++ case FUTEX_OP_CMP_GT:
++ return oldval > cmparg;
++ default:
++ return -ENOSYS;
++ }
++}
++
++/*
++ * Wake up all waiters hashed on the physical page that is mapped
++ * to this virtual address:
++ */
++static int
++futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
++ int nr_wake, int nr_wake2, int op)
++{
++ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
++ struct futex_hash_bucket *hb1, *hb2;
++ struct futex_q *this, *next;
++ int ret, op_ret;
++ DEFINE_WAKE_Q(wake_q);
++
++retry:
++ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
++ if (unlikely(ret != 0))
++ return ret;
++ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
++ if (unlikely(ret != 0))
++ return ret;
++
++ hb1 = hash_futex(&key1);
++ hb2 = hash_futex(&key2);
++
++retry_private:
++ double_lock_hb(hb1, hb2);
++ op_ret = futex_atomic_op_inuser(op, uaddr2);
++ if (unlikely(op_ret < 0)) {
++ double_unlock_hb(hb1, hb2);
++
++ if (!IS_ENABLED(CONFIG_MMU) ||
++ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
++ /*
++ * we don't get EFAULT from MMU faults if we don't have
++ * an MMU, but we might get them from range checking
++ */
++ ret = op_ret;
++ return ret;
++ }
++
++ if (op_ret == -EFAULT) {
++ ret = fault_in_user_writeable(uaddr2);
++ if (ret)
++ return ret;
++ }
++
++ cond_resched();
++ if (!(flags & FLAGS_SHARED))
++ goto retry_private;
++ goto retry;
++ }
++
++ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
++ if (match_futex (&this->key, &key1)) {
++ if (this->pi_state || this->rt_waiter) {
++ ret = -EINVAL;
++ goto out_unlock;
++ }
++ mark_wake_futex(&wake_q, this);
++ if (++ret >= nr_wake)
++ break;
++ }
++ }
++
++ if (op_ret > 0) {
++ op_ret = 0;
++ plist_for_each_entry_safe(this, next, &hb2->chain, list) {
++ if (match_futex (&this->key, &key2)) {
++ if (this->pi_state || this->rt_waiter) {
++ ret = -EINVAL;
++ goto out_unlock;
++ }
++ mark_wake_futex(&wake_q, this);
++ if (++op_ret >= nr_wake2)
++ break;
++ }
++ }
++ ret += op_ret;
++ }
++
++out_unlock:
++ double_unlock_hb(hb1, hb2);
++ wake_up_q(&wake_q);
++ return ret;
++}
++
++/**
++ * requeue_futex() - Requeue a futex_q from one hb to another
++ * @q: the futex_q to requeue
++ * @hb1: the source hash_bucket
++ * @hb2: the target hash_bucket
++ * @key2: the new key for the requeued futex_q
++ */
++static inline
++void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
++ struct futex_hash_bucket *hb2, union futex_key *key2)
++{
++
++ /*
++ * If key1 and key2 hash to the same bucket, no need to
++ * requeue.
++ */
++ if (likely(&hb1->chain != &hb2->chain)) {
++ plist_del(&q->list, &hb1->chain);
++ hb_waiters_dec(hb1);
++ hb_waiters_inc(hb2);
++ plist_add(&q->list, &hb2->chain);
++ q->lock_ptr = &hb2->lock;
++ }
++ q->key = *key2;
++}
++
++static inline bool futex_requeue_pi_prepare(struct futex_q *q,
++ struct futex_pi_state *pi_state)
++{
++ int old, new;
++
++ /*
++ * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has
++ * already set Q_REQUEUE_PI_IGNORE to signal that requeue should
++ * ignore the waiter.
++ */
++ old = atomic_read_acquire(&q->requeue_state);
++ do {
++ if (old == Q_REQUEUE_PI_IGNORE)
++ return false;
++
++ /*
++ * futex_proxy_trylock_atomic() might have set it to
++ * IN_PROGRESS and a interleaved early wake to WAIT.
++ *
++ * It was considered to have an extra state for that
++ * trylock, but that would just add more conditionals
++ * all over the place for a dubious value.
++ */
++ if (old != Q_REQUEUE_PI_NONE)
++ break;
++
++ new = Q_REQUEUE_PI_IN_PROGRESS;
++ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
++
++ q->pi_state = pi_state;
++ return true;
++}
++
++static inline void futex_requeue_pi_complete(struct futex_q *q, int locked)
++{
++ int old, new;
++
++ old = atomic_read_acquire(&q->requeue_state);
++ do {
++ if (old == Q_REQUEUE_PI_IGNORE)
++ return;
++
++ if (locked >= 0) {
++ /* Requeue succeeded. Set DONE or LOCKED */
++ WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS &&
++ old != Q_REQUEUE_PI_WAIT);
++ new = Q_REQUEUE_PI_DONE + locked;
++ } else if (old == Q_REQUEUE_PI_IN_PROGRESS) {
++ /* Deadlock, no early wakeup interleave */
++ new = Q_REQUEUE_PI_NONE;
++ } else {
++ /* Deadlock, early wakeup interleave. */
++ WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT);
++ new = Q_REQUEUE_PI_IGNORE;
++ }
++ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
++
++#ifdef CONFIG_PREEMPT_RT
++ /* If the waiter interleaved with the requeue let it know */
++ if (unlikely(old == Q_REQUEUE_PI_WAIT))
++ rcuwait_wake_up(&q->requeue_wait);
++#endif
++}
++
++static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
++{
++ int old, new;
++
++ old = atomic_read_acquire(&q->requeue_state);
++ do {
++ /* Is requeue done already? */
++ if (old >= Q_REQUEUE_PI_DONE)
++ return old;
++
++ /*
++ * If not done, then tell the requeue code to either ignore
++ * the waiter or to wake it up once the requeue is done.
++ */
++ new = Q_REQUEUE_PI_WAIT;
++ if (old == Q_REQUEUE_PI_NONE)
++ new = Q_REQUEUE_PI_IGNORE;
++ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new));
++
++ /* If the requeue was in progress, wait for it to complete */
++ if (old == Q_REQUEUE_PI_IN_PROGRESS) {
++#ifdef CONFIG_PREEMPT_RT
++ rcuwait_wait_event(&q->requeue_wait,
++ atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT,
++ TASK_UNINTERRUPTIBLE);
++#else
++ (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT);
++#endif
++ }
++
++ /*
++ * Requeue is now either prohibited or complete. Reread state
++ * because during the wait above it might have changed. Nothing
++ * will modify q->requeue_state after this point.
++ */
++ return atomic_read(&q->requeue_state);
++}
++
++/**
++ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
++ * @q: the futex_q
++ * @key: the key of the requeue target futex
++ * @hb: the hash_bucket of the requeue target futex
++ *
++ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
++ * target futex if it is uncontended or via a lock steal.
++ *
++ * 1) Set @q::key to the requeue target futex key so the waiter can detect
++ * the wakeup on the right futex.
++ *
++ * 2) Dequeue @q from the hash bucket.
++ *
++ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
++ * acquisition.
++ *
++ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
++ * the waiter has to fixup the pi state.
++ *
++ * 5) Complete the requeue state so the waiter can make progress. After
++ * this point the waiter task can return from the syscall immediately in
++ * case that the pi state does not have to be fixed up.
++ *
++ * 6) Wake the waiter task.
++ *
++ * Must be called with both q->lock_ptr and hb->lock held.
++ */
++static inline
++void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
++ struct futex_hash_bucket *hb)
++{
++ q->key = *key;
++
++ __unqueue_futex(q);
++
++ WARN_ON(!q->rt_waiter);
++ q->rt_waiter = NULL;
++
++ q->lock_ptr = &hb->lock;
++
++ /* Signal locked state to the waiter */
++ futex_requeue_pi_complete(q, 1);
++ wake_up_state(q->task, TASK_NORMAL);
++}
++
++/**
++ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
++ * @pifutex: the user address of the to futex
++ * @hb1: the from futex hash bucket, must be locked by the caller
++ * @hb2: the to futex hash bucket, must be locked by the caller
++ * @key1: the from futex key
++ * @key2: the to futex key
++ * @ps: address to store the pi_state pointer
++ * @exiting: Pointer to store the task pointer of the owner task
++ * which is in the middle of exiting
++ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
++ *
++ * Try and get the lock on behalf of the top waiter if we can do it atomically.
++ * Wake the top waiter if we succeed. If the caller specified set_waiters,
++ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
++ * hb1 and hb2 must be held by the caller.
++ *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
++ *
++ * Return:
++ * - 0 - failed to acquire the lock atomically;
++ * - >0 - acquired the lock, return value is vpid of the top_waiter
++ * - <0 - error
++ */
++static int
++futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
++ struct futex_hash_bucket *hb2, union futex_key *key1,
++ union futex_key *key2, struct futex_pi_state **ps,
++ struct task_struct **exiting, int set_waiters)
++{
++ struct futex_q *top_waiter = NULL;
++ u32 curval;
++ int ret;
++
++ if (get_futex_value_locked(&curval, pifutex))
++ return -EFAULT;
++
++ if (unlikely(should_fail_futex(true)))
++ return -EFAULT;
++
++ /*
++ * Find the top_waiter and determine if there are additional waiters.
++ * If the caller intends to requeue more than 1 waiter to pifutex,
++ * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
++ * as we have means to handle the possible fault. If not, don't set
++ * the bit unnecessarily as it will force the subsequent unlock to enter
++ * the kernel.
++ */
++ top_waiter = futex_top_waiter(hb1, key1);
++
++ /* There are no waiters, nothing for us to do. */
++ if (!top_waiter)
++ return 0;
++
++ /*
++ * Ensure that this is a waiter sitting in futex_wait_requeue_pi()
++ * and waiting on the 'waitqueue' futex which is always !PI.
++ */
++ if (!top_waiter->rt_waiter || top_waiter->pi_state)
++ return -EINVAL;
++
++ /* Ensure we requeue to the expected futex. */
++ if (!match_futex(top_waiter->requeue_pi_key, key2))
++ return -EINVAL;
++
++ /* Ensure that this does not race against an early wakeup */
++ if (!futex_requeue_pi_prepare(top_waiter, NULL))
++ return -EAGAIN;
++
++ /*
++ * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
++ * in the contended case or if @set_waiters is true.
++ *
++ * In the contended case PI state is attached to the lock owner. If
++ * the user space lock can be acquired then PI state is attached to
++ * the new owner (@top_waiter->task) when @set_waiters is true.
++ */
++ ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
++ exiting, set_waiters);
++ if (ret == 1) {
++ /*
++ * Lock was acquired in user space and PI state was
++ * attached to @top_waiter->task. That means state is fully
++ * consistent and the waiter can return to user space
++ * immediately after the wakeup.
++ */
++ requeue_pi_wake_futex(top_waiter, key2, hb2);
++ } else if (ret < 0) {
++ /* Rewind top_waiter::requeue_state */
++ futex_requeue_pi_complete(top_waiter, ret);
++ } else {
++ /*
++ * futex_lock_pi_atomic() did not acquire the user space
++ * futex, but managed to establish the proxy lock and pi
++ * state. top_waiter::requeue_state cannot be fixed up here
++ * because the waiter is not enqueued on the rtmutex
++ * yet. This is handled at the callsite depending on the
++ * result of rt_mutex_start_proxy_lock() which is
++ * guaranteed to be reached with this function returning 0.
++ */
++ }
++ return ret;
++}
++
++/**
++ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
++ * @uaddr1: source futex user address
++ * @flags: futex flags (FLAGS_SHARED, etc.)
++ * @uaddr2: target futex user address
++ * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
++ * @nr_requeue: number of waiters to requeue (0-INT_MAX)
++ * @cmpval: @uaddr1 expected value (or %NULL)
++ * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
++ * pi futex (pi to pi requeue is not supported)
++ *
++ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
++ * uaddr2 atomically on behalf of the top waiter.
++ *
++ * Return:
++ * - >=0 - on success, the number of tasks requeued or woken;
++ * - <0 - on error
++ */
++static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
++ u32 __user *uaddr2, int nr_wake, int nr_requeue,
++ u32 *cmpval, int requeue_pi)
++{
++ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
++ int task_count = 0, ret;
++ struct futex_pi_state *pi_state = NULL;
++ struct futex_hash_bucket *hb1, *hb2;
++ struct futex_q *this, *next;
++ DEFINE_WAKE_Q(wake_q);
++
++ if (nr_wake < 0 || nr_requeue < 0)
++ return -EINVAL;
++
++ /*
++ * When PI not supported: return -ENOSYS if requeue_pi is true,
++ * consequently the compiler knows requeue_pi is always false past
++ * this point which will optimize away all the conditional code
++ * further down.
++ */
++ if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
++ return -ENOSYS;
++
++ if (requeue_pi) {
++ /*
++ * Requeue PI only works on two distinct uaddrs. This
++ * check is only valid for private futexes. See below.
++ */
++ if (uaddr1 == uaddr2)
++ return -EINVAL;
++
++ /*
++ * futex_requeue() allows the caller to define the number
++ * of waiters to wake up via the @nr_wake argument. With
++ * REQUEUE_PI, waking up more than one waiter is creating
++ * more problems than it solves. Waking up a waiter makes
++ * only sense if the PI futex @uaddr2 is uncontended as
++ * this allows the requeue code to acquire the futex
++ * @uaddr2 before waking the waiter. The waiter can then
++ * return to user space without further action. A secondary
++ * wakeup would just make the futex_wait_requeue_pi()
++ * handling more complex, because that code would have to
++ * look up pi_state and do more or less all the handling
++ * which the requeue code has to do for the to be requeued
++ * waiters. So restrict the number of waiters to wake to
++ * one, and only wake it up when the PI futex is
++ * uncontended. Otherwise requeue it and let the unlock of
++ * the PI futex handle the wakeup.
++ *
++ * All REQUEUE_PI users, e.g. pthread_cond_signal() and
++ * pthread_cond_broadcast() must use nr_wake=1.
++ */
++ if (nr_wake != 1)
++ return -EINVAL;
++
++ /*
++ * requeue_pi requires a pi_state, try to allocate it now
++ * without any locks in case it fails.
++ */
++ if (refill_pi_state_cache())
++ return -ENOMEM;
++ }
++
++retry:
++ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
++ if (unlikely(ret != 0))
++ return ret;
++ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
++ requeue_pi ? FUTEX_WRITE : FUTEX_READ);
++ if (unlikely(ret != 0))
++ return ret;
++
++ /*
++ * The check above which compares uaddrs is not sufficient for
++ * shared futexes. We need to compare the keys:
++ */
++ if (requeue_pi && match_futex(&key1, &key2))
++ return -EINVAL;
++
++ hb1 = hash_futex(&key1);
++ hb2 = hash_futex(&key2);
++
++retry_private:
++ hb_waiters_inc(hb2);
++ double_lock_hb(hb1, hb2);
++
++ if (likely(cmpval != NULL)) {
++ u32 curval;
++
++ ret = get_futex_value_locked(&curval, uaddr1);
++
++ if (unlikely(ret)) {
++ double_unlock_hb(hb1, hb2);
++ hb_waiters_dec(hb2);
++
++ ret = get_user(curval, uaddr1);
++ if (ret)
++ return ret;
++
++ if (!(flags & FLAGS_SHARED))
++ goto retry_private;
++
++ goto retry;
++ }
++ if (curval != *cmpval) {
++ ret = -EAGAIN;
++ goto out_unlock;
++ }
++ }
++
++ if (requeue_pi) {
++ struct task_struct *exiting = NULL;
++
++ /*
++ * Attempt to acquire uaddr2 and wake the top waiter. If we
++ * intend to requeue waiters, force setting the FUTEX_WAITERS
++ * bit. We force this here where we are able to easily handle
++ * faults rather in the requeue loop below.
++ *
++ * Updates topwaiter::requeue_state if a top waiter exists.
++ */
++ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
++ &key2, &pi_state,
++ &exiting, nr_requeue);
++
++ /*
++ * At this point the top_waiter has either taken uaddr2 or
++ * is waiting on it. In both cases pi_state has been
++ * established and an initial refcount on it. In case of an
++ * error there's nothing.
++ *
++ * The top waiter's requeue_state is up to date:
++ *
++ * - If the lock was acquired atomically (ret == 1), then
++ * the state is Q_REQUEUE_PI_LOCKED.
++ *
++ * The top waiter has been dequeued and woken up and can
++ * return to user space immediately. The kernel/user
++ * space state is consistent. In case that there must be
++ * more waiters requeued the WAITERS bit in the user
++ * space futex is set so the top waiter task has to go
++ * into the syscall slowpath to unlock the futex. This
++ * will block until this requeue operation has been
++ * completed and the hash bucket locks have been
++ * dropped.
++ *
++ * - If the trylock failed with an error (ret < 0) then
++ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
++ * happened", or Q_REQUEUE_PI_IGNORE when there was an
++ * interleaved early wakeup.
++ *
++ * - If the trylock did not succeed (ret == 0) then the
++ * state is either Q_REQUEUE_PI_IN_PROGRESS or
++ * Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
++ * This will be cleaned up in the loop below, which
++ * cannot fail because futex_proxy_trylock_atomic() did
++ * the same sanity checks for requeue_pi as the loop
++ * below does.
++ */
++ switch (ret) {
++ case 0:
++ /* We hold a reference on the pi state. */
++ break;
++
++ case 1:
++ /*
++ * futex_proxy_trylock_atomic() acquired the user space
++ * futex. Adjust task_count.
++ */
++ task_count++;
++ ret = 0;
++ break;
++
++ /*
++ * If the above failed, then pi_state is NULL and
++ * waiter::requeue_state is correct.
++ */
++ case -EFAULT:
++ double_unlock_hb(hb1, hb2);
++ hb_waiters_dec(hb2);
++ ret = fault_in_user_writeable(uaddr2);
++ if (!ret)
++ goto retry;
++ return ret;
++ case -EBUSY:
++ case -EAGAIN:
++ /*
++ * Two reasons for this:
++ * - EBUSY: Owner is exiting and we just wait for the
++ * exit to complete.
++ * - EAGAIN: The user space value changed.
++ */
++ double_unlock_hb(hb1, hb2);
++ hb_waiters_dec(hb2);
++ /*
++ * Handle the case where the owner is in the middle of
++ * exiting. Wait for the exit to complete otherwise
++ * this task might loop forever, aka. live lock.
++ */
++ wait_for_owner_exiting(ret, exiting);
++ cond_resched();
++ goto retry;
++ default:
++ goto out_unlock;
++ }
++ }
++
++ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
++ if (task_count - nr_wake >= nr_requeue)
++ break;
++
++ if (!match_futex(&this->key, &key1))
++ continue;
++
++ /*
++ * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
++ * be paired with each other and no other futex ops.
++ *
++ * We should never be requeueing a futex_q with a pi_state,
++ * which is awaiting a futex_unlock_pi().
++ */
++ if ((requeue_pi && !this->rt_waiter) ||
++ (!requeue_pi && this->rt_waiter) ||
++ this->pi_state) {
++ ret = -EINVAL;
++ break;
++ }
++
++ /* Plain futexes just wake or requeue and are done */
++ if (!requeue_pi) {
++ if (++task_count <= nr_wake)
++ mark_wake_futex(&wake_q, this);
++ else
++ requeue_futex(this, hb1, hb2, &key2);
++ continue;
++ }
++
++ /* Ensure we requeue to the expected futex for requeue_pi. */
++ if (!match_futex(this->requeue_pi_key, &key2)) {
++ ret = -EINVAL;
++ break;
++ }
++
++ /*
++ * Requeue nr_requeue waiters and possibly one more in the case
++ * of requeue_pi if we couldn't acquire the lock atomically.
++ *
++ * Prepare the waiter to take the rt_mutex. Take a refcount
++ * on the pi_state and store the pointer in the futex_q
++ * object of the waiter.
++ */
++ get_pi_state(pi_state);
++
++ /* Don't requeue when the waiter is already on the way out. */
++ if (!futex_requeue_pi_prepare(this, pi_state)) {
++ /*
++ * Early woken waiter signaled that it is on the
++ * way out. Drop the pi_state reference and try the
++ * next waiter. @this->pi_state is still NULL.
++ */
++ put_pi_state(pi_state);
++ continue;
++ }
++
++ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
++ this->rt_waiter,
++ this->task);
++
++ if (ret == 1) {
++ /*
++ * We got the lock. We do neither drop the refcount
++ * on pi_state nor clear this->pi_state because the
++ * waiter needs the pi_state for cleaning up the
++ * user space value. It will drop the refcount
++ * after doing so. this::requeue_state is updated
++ * in the wakeup as well.
++ */
++ requeue_pi_wake_futex(this, &key2, hb2);
++ task_count++;
++ } else if (!ret) {
++ /* Waiter is queued, move it to hb2 */
++ requeue_futex(this, hb1, hb2, &key2);
++ futex_requeue_pi_complete(this, 0);
++ task_count++;
++ } else {
++ /*
++ * rt_mutex_start_proxy_lock() detected a potential
++ * deadlock when we tried to queue that waiter.
++ * Drop the pi_state reference which we took above
++ * and remove the pointer to the state from the
++ * waiters futex_q object.
++ */
++ this->pi_state = NULL;
++ put_pi_state(pi_state);
++ futex_requeue_pi_complete(this, ret);
++ /*
++ * We stop queueing more waiters and let user space
++ * deal with the mess.
++ */
++ break;
++ }
++ }
++
++ /*
++ * We took an extra initial reference to the pi_state in
++ * futex_proxy_trylock_atomic(). We need to drop it here again.
++ */
++ put_pi_state(pi_state);
++
++out_unlock:
++ double_unlock_hb(hb1, hb2);
++ wake_up_q(&wake_q);
++ hb_waiters_dec(hb2);
++ return ret ? ret : task_count;
++}
++
++/* The key must be already stored in q->key. */
++static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
++ __acquires(&hb->lock)
++{
++ struct futex_hash_bucket *hb;
++
++ hb = hash_futex(&q->key);
++
++ /*
++ * Increment the counter before taking the lock so that
++ * a potential waker won't miss a to-be-slept task that is
++ * waiting for the spinlock. This is safe as all queue_lock()
++ * users end up calling queue_me(). Similarly, for housekeeping,
++ * decrement the counter at queue_unlock() when some error has
++ * occurred and we don't end up adding the task to the list.
++ */
++ hb_waiters_inc(hb); /* implies smp_mb(); (A) */
++
++ q->lock_ptr = &hb->lock;
++
++ spin_lock(&hb->lock);
++ return hb;
++}
++
++static inline void
++queue_unlock(struct futex_hash_bucket *hb)
++ __releases(&hb->lock)
++{
++ spin_unlock(&hb->lock);
++ hb_waiters_dec(hb);
++}
++
++static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
++{
++ int prio;
++
++ /*
++ * The priority used to register this element is
++ * - either the real thread-priority for the real-time threads
++ * (i.e. threads with a priority lower than MAX_RT_PRIO)
++ * - or MAX_RT_PRIO for non-RT threads.
++ * Thus, all RT-threads are woken first in priority order, and
++ * the others are woken last, in FIFO order.
++ */
++ prio = min(current->normal_prio, MAX_RT_PRIO);
++
++ plist_node_init(&q->list, prio);
++ plist_add(&q->list, &hb->chain);
++ q->task = current;
++}
++
++/**
++ * queue_me() - Enqueue the futex_q on the futex_hash_bucket
++ * @q: The futex_q to enqueue
++ * @hb: The destination hash bucket
++ *
++ * The hb->lock must be held by the caller, and is released here. A call to
++ * queue_me() is typically paired with exactly one call to unqueue_me(). The
++ * exceptions involve the PI related operations, which may use unqueue_me_pi()
++ * or nothing if the unqueue is done as part of the wake process and the unqueue
++ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
++ * an example).
++ */
++static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
++ __releases(&hb->lock)
++{
++ __queue_me(q, hb);
++ spin_unlock(&hb->lock);
++}
++
++/**
++ * unqueue_me() - Remove the futex_q from its futex_hash_bucket
++ * @q: The futex_q to unqueue
++ *
++ * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
++ * be paired with exactly one earlier call to queue_me().
++ *
++ * Return:
++ * - 1 - if the futex_q was still queued (and we removed unqueued it);
++ * - 0 - if the futex_q was already removed by the waking thread
++ */
++static int unqueue_me(struct futex_q *q)
++{
++ spinlock_t *lock_ptr;
++ int ret = 0;
++
++ /* In the common case we don't take the spinlock, which is nice. */
++retry:
++ /*
++ * q->lock_ptr can change between this read and the following spin_lock.
++ * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
++ * optimizing lock_ptr out of the logic below.
++ */
++ lock_ptr = READ_ONCE(q->lock_ptr);
++ if (lock_ptr != NULL) {
++ spin_lock(lock_ptr);
++ /*
++ * q->lock_ptr can change between reading it and
++ * spin_lock(), causing us to take the wrong lock. This
++ * corrects the race condition.
++ *
++ * Reasoning goes like this: if we have the wrong lock,
++ * q->lock_ptr must have changed (maybe several times)
++ * between reading it and the spin_lock(). It can
++ * change again after the spin_lock() but only if it was
++ * already changed before the spin_lock(). It cannot,
++ * however, change back to the original value. Therefore
++ * we can detect whether we acquired the correct lock.
++ */
++ if (unlikely(lock_ptr != q->lock_ptr)) {
++ spin_unlock(lock_ptr);
++ goto retry;
++ }
++ __unqueue_futex(q);
++
++ BUG_ON(q->pi_state);
++
++ spin_unlock(lock_ptr);
++ ret = 1;
++ }
++
++ return ret;
++}
++
++/*
++ * PI futexes can not be requeued and must remove themselves from the
++ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
++ */
++static void unqueue_me_pi(struct futex_q *q)
++{
++ __unqueue_futex(q);
++
++ BUG_ON(!q->pi_state);
++ put_pi_state(q->pi_state);
++ q->pi_state = NULL;
++}
++
++static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
++ struct task_struct *argowner)
++{
++ struct futex_pi_state *pi_state = q->pi_state;
++ struct task_struct *oldowner, *newowner;
++ u32 uval, curval, newval, newtid;
++ int err = 0;
++
++ oldowner = pi_state->owner;
++
++ /*
++ * We are here because either:
++ *
++ * - we stole the lock and pi_state->owner needs updating to reflect
++ * that (@argowner == current),
++ *
++ * or:
++ *
++ * - someone stole our lock and we need to fix things to point to the
++ * new owner (@argowner == NULL).
++ *
++ * Either way, we have to replace the TID in the user space variable.
++ * This must be atomic as we have to preserve the owner died bit here.
++ *
++ * Note: We write the user space value _before_ changing the pi_state
++ * because we can fault here. Imagine swapped out pages or a fork
++ * that marked all the anonymous memory readonly for cow.
++ *
++ * Modifying pi_state _before_ the user space value would leave the
++ * pi_state in an inconsistent state when we fault here, because we
++ * need to drop the locks to handle the fault. This might be observed
++ * in the PID checks when attaching to PI state .
++ */
++retry:
++ if (!argowner) {
++ if (oldowner != current) {
++ /*
++ * We raced against a concurrent self; things are
++ * already fixed up. Nothing to do.
++ */
++ return 0;
++ }
++
++ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
++ /* We got the lock. pi_state is correct. Tell caller. */
++ return 1;
++ }
++
++ /*
++ * The trylock just failed, so either there is an owner or
++ * there is a higher priority waiter than this one.
++ */
++ newowner = rt_mutex_owner(&pi_state->pi_mutex);
++ /*
++ * If the higher priority waiter has not yet taken over the
++ * rtmutex then newowner is NULL. We can't return here with
++ * that state because it's inconsistent vs. the user space
++ * state. So drop the locks and try again. It's a valid
++ * situation and not any different from the other retry
++ * conditions.
++ */
++ if (unlikely(!newowner)) {
++ err = -EAGAIN;
++ goto handle_err;
++ }
++ } else {
++ WARN_ON_ONCE(argowner != current);
++ if (oldowner == current) {
++ /*
++ * We raced against a concurrent self; things are
++ * already fixed up. Nothing to do.
++ */
++ return 1;
++ }
++ newowner = argowner;
++ }
++
++ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
++ /* Owner died? */
++ if (!pi_state->owner)
++ newtid |= FUTEX_OWNER_DIED;
++
++ err = get_futex_value_locked(&uval, uaddr);
++ if (err)
++ goto handle_err;
++
++ for (;;) {
++ newval = (uval & FUTEX_OWNER_DIED) | newtid;
++
++ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
++ if (err)
++ goto handle_err;
++
++ if (curval == uval)
++ break;
++ uval = curval;
++ }
++
++ /*
++ * We fixed up user space. Now we need to fix the pi_state
++ * itself.
++ */
++ pi_state_update_owner(pi_state, newowner);
++
++ return argowner == current;
++
++ /*
++ * In order to reschedule or handle a page fault, we need to drop the
++ * locks here. In the case of a fault, this gives the other task
++ * (either the highest priority waiter itself or the task which stole
++ * the rtmutex) the chance to try the fixup of the pi_state. So once we
++ * are back from handling the fault we need to check the pi_state after
++ * reacquiring the locks and before trying to do another fixup. When
++ * the fixup has been done already we simply return.
++ *
++ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
++ * drop hb->lock since the caller owns the hb -> futex_q relation.
++ * Dropping the pi_mutex->wait_lock requires the state revalidate.
++ */
++handle_err:
++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++ spin_unlock(q->lock_ptr);
++
++ switch (err) {
++ case -EFAULT:
++ err = fault_in_user_writeable(uaddr);
++ break;
++
++ case -EAGAIN:
++ cond_resched();
++ err = 0;
++ break;
++
++ default:
++ WARN_ON_ONCE(1);
++ break;
++ }
++
++ spin_lock(q->lock_ptr);
++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++
++ /*
++ * Check if someone else fixed it for us:
++ */
++ if (pi_state->owner != oldowner)
++ return argowner == current;
++
++ /* Retry if err was -EAGAIN or the fault in succeeded */
++ if (!err)
++ goto retry;
++
++ /*
++ * fault_in_user_writeable() failed so user state is immutable. At
++ * best we can make the kernel state consistent but user state will
++ * be most likely hosed and any subsequent unlock operation will be
++ * rejected due to PI futex rule [10].
++ *
++ * Ensure that the rtmutex owner is also the pi_state owner despite
++ * the user space value claiming something different. There is no
++ * point in unlocking the rtmutex if current is the owner as it
++ * would need to wait until the next waiter has taken the rtmutex
++ * to guarantee consistent state. Keep it simple. Userspace asked
++ * for this wreckaged state.
++ *
++ * The rtmutex has an owner - either current or some other
++ * task. See the EAGAIN loop above.
++ */
++ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
++
++ return err;
++}
++
++static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
++ struct task_struct *argowner)
++{
++ struct futex_pi_state *pi_state = q->pi_state;
++ int ret;
++
++ lockdep_assert_held(q->lock_ptr);
++
++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++ ret = __fixup_pi_state_owner(uaddr, q, argowner);
++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
++ return ret;
++}
++
++static long futex_wait_restart(struct restart_block *restart);
++
++/**
++ * fixup_owner() - Post lock pi_state and corner case management
++ * @uaddr: user address of the futex
++ * @q: futex_q (contains pi_state and access to the rt_mutex)
++ * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
++ *
++ * After attempting to lock an rt_mutex, this function is called to cleanup
++ * the pi_state owner as well as handle race conditions that may allow us to
++ * acquire the lock. Must be called with the hb lock held.
++ *
++ * Return:
++ * - 1 - success, lock taken;
++ * - 0 - success, lock not taken;
++ * - <0 - on error (-EFAULT)
++ */
++static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
++{
++ if (locked) {
++ /*
++ * Got the lock. We might not be the anticipated owner if we
++ * did a lock-steal - fix up the PI-state in that case:
++ *
++ * Speculative pi_state->owner read (we don't hold wait_lock);
++ * since we own the lock pi_state->owner == current is the
++ * stable state, anything else needs more attention.
++ */
++ if (q->pi_state->owner != current)
++ return fixup_pi_state_owner(uaddr, q, current);
++ return 1;
++ }
++
++ /*
++ * If we didn't get the lock; check if anybody stole it from us. In
++ * that case, we need to fix up the uval to point to them instead of
++ * us, otherwise bad things happen. [10]
++ *
++ * Another speculative read; pi_state->owner == current is unstable
++ * but needs our attention.
++ */
++ if (q->pi_state->owner == current)
++ return fixup_pi_state_owner(uaddr, q, NULL);
++
++ /*
++ * Paranoia check. If we did not take the lock, then we should not be
++ * the owner of the rt_mutex. Warn and establish consistent state.
++ */
++ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
++ return fixup_pi_state_owner(uaddr, q, current);
++
++ return 0;
++}
++
++/**
++ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
++ * @hb: the futex hash bucket, must be locked by the caller
++ * @q: the futex_q to queue up on
++ * @timeout: the prepared hrtimer_sleeper, or null for no timeout
++ */
++static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
++ struct hrtimer_sleeper *timeout)
++{
++ /*
++ * The task state is guaranteed to be set before another task can
++ * wake it. set_current_state() is implemented using smp_store_mb() and
++ * queue_me() calls spin_unlock() upon completion, both serializing
++ * access to the hash list and forcing another memory barrier.
++ */
++ set_current_state(TASK_INTERRUPTIBLE);
++ queue_me(q, hb);
++
++ /* Arm the timer */
++ if (timeout)
++ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
++
++ /*
++ * If we have been removed from the hash list, then another task
++ * has tried to wake us, and we can skip the call to schedule().
++ */
++ if (likely(!plist_node_empty(&q->list))) {
++ /*
++ * If the timer has already expired, current will already be
++ * flagged for rescheduling. Only call schedule if there
++ * is no timeout, or if it has yet to expire.
++ */
++ if (!timeout || timeout->task)
++ freezable_schedule();
++ }
++ __set_current_state(TASK_RUNNING);
++}
++
++/**
++ * futex_wait_setup() - Prepare to wait on a futex
++ * @uaddr: the futex userspace address
++ * @val: the expected value
++ * @flags: futex flags (FLAGS_SHARED, etc.)
++ * @q: the associated futex_q
++ * @hb: storage for hash_bucket pointer to be returned to caller
++ *
++ * Setup the futex_q and locate the hash_bucket. Get the futex value and
++ * compare it with the expected value. Handle atomic faults internally.
++ * Return with the hb lock held on success, and unlocked on failure.
++ *
++ * Return:
++ * - 0 - uaddr contains val and hb has been locked;
++ * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
++ */
++static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
++ struct futex_q *q, struct futex_hash_bucket **hb)
++{
++ u32 uval;
++ int ret;
++
++ /*
++ * Access the page AFTER the hash-bucket is locked.
++ * Order is important:
++ *
++ * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
++ * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
++ *
++ * The basic logical guarantee of a futex is that it blocks ONLY
++ * if cond(var) is known to be true at the time of blocking, for
++ * any cond. If we locked the hash-bucket after testing *uaddr, that
++ * would open a race condition where we could block indefinitely with
++ * cond(var) false, which would violate the guarantee.
++ *
++ * On the other hand, we insert q and release the hash-bucket only
++ * after testing *uaddr. This guarantees that futex_wait() will NOT
++ * absorb a wakeup if *uaddr does not match the desired values
++ * while the syscall executes.
++ */
++retry:
++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
++ if (unlikely(ret != 0))
++ return ret;
++
++retry_private:
++ *hb = queue_lock(q);
++
++ ret = get_futex_value_locked(&uval, uaddr);
++
++ if (ret) {
++ queue_unlock(*hb);
++
++ ret = get_user(uval, uaddr);
++ if (ret)
++ return ret;
++
++ if (!(flags & FLAGS_SHARED))
++ goto retry_private;
++
++ goto retry;
++ }
++
++ if (uval != val) {
++ queue_unlock(*hb);
++ ret = -EWOULDBLOCK;
++ }
++
++ return ret;
++}
++
++static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
++ ktime_t *abs_time, u32 bitset)
++{
++ struct hrtimer_sleeper timeout, *to;
++ struct restart_block *restart;
++ struct futex_hash_bucket *hb;
++ struct futex_q q = futex_q_init;
++ int ret;
++
++ if (!bitset)
++ return -EINVAL;
++ q.bitset = bitset;
++
++ to = futex_setup_timer(abs_time, &timeout, flags,
++ current->timer_slack_ns);
++retry:
++ /*
++ * Prepare to wait on uaddr. On success, it holds hb->lock and q
++ * is initialized.
++ */
++ ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
++ if (ret)
++ goto out;
++
++ /* queue_me and wait for wakeup, timeout, or a signal. */
++ futex_wait_queue_me(hb, &q, to);
++
++ /* If we were woken (and unqueued), we succeeded, whatever. */
++ ret = 0;
++ if (!unqueue_me(&q))
++ goto out;
++ ret = -ETIMEDOUT;
++ if (to && !to->task)
++ goto out;
++
++ /*
++ * We expect signal_pending(current), but we might be the
++ * victim of a spurious wakeup as well.
++ */
++ if (!signal_pending(current))
++ goto retry;
++
++ ret = -ERESTARTSYS;
++ if (!abs_time)
++ goto out;
++
++ restart = &current->restart_block;
++ restart->futex.uaddr = uaddr;
++ restart->futex.val = val;
++ restart->futex.time = *abs_time;
++ restart->futex.bitset = bitset;
++ restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
++
++ ret = set_restart_fn(restart, futex_wait_restart);
++
++out:
++ if (to) {
++ hrtimer_cancel(&to->timer);
++ destroy_hrtimer_on_stack(&to->timer);
++ }
++ return ret;
++}
++
++
++static long futex_wait_restart(struct restart_block *restart)
++{
++ u32 __user *uaddr = restart->futex.uaddr;
++ ktime_t t, *tp = NULL;
++
++ if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
++ t = restart->futex.time;
++ tp = &t;
++ }
++ restart->fn = do_no_restart_syscall;
++
++ return (long)futex_wait(uaddr, restart->futex.flags,
++ restart->futex.val, tp, restart->futex.bitset);
++}
++
++
++/*
++ * Userspace tried a 0 -> TID atomic transition of the futex value
++ * and failed. The kernel side here does the whole locking operation:
++ * if there are waiters then it will block as a consequence of relying
++ * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
++ * a 0 value of the futex too.).
++ *
++ * Also serves as futex trylock_pi()'ing, and due semantics.
++ */
++static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
++ ktime_t *time, int trylock)
++{
++ struct hrtimer_sleeper timeout, *to;
++ struct task_struct *exiting = NULL;
++ struct rt_mutex_waiter rt_waiter;
++ struct futex_hash_bucket *hb;
++ struct futex_q q = futex_q_init;
++ int res, ret;
++
++ if (!IS_ENABLED(CONFIG_FUTEX_PI))
++ return -ENOSYS;
++
++ if (refill_pi_state_cache())
++ return -ENOMEM;
++
++ to = futex_setup_timer(time, &timeout, flags, 0);
++
++retry:
++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
++ if (unlikely(ret != 0))
++ goto out;
++
++retry_private:
++ hb = queue_lock(&q);
++
++ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
++ &exiting, 0);
++ if (unlikely(ret)) {
++ /*
++ * Atomic work succeeded and we got the lock,
++ * or failed. Either way, we do _not_ block.
++ */
++ switch (ret) {
++ case 1:
++ /* We got the lock. */
++ ret = 0;
++ goto out_unlock_put_key;
++ case -EFAULT:
++ goto uaddr_faulted;
++ case -EBUSY:
++ case -EAGAIN:
++ /*
++ * Two reasons for this:
++ * - EBUSY: Task is exiting and we just wait for the
++ * exit to complete.
++ * - EAGAIN: The user space value changed.
++ */
++ queue_unlock(hb);
++ /*
++ * Handle the case where the owner is in the middle of
++ * exiting. Wait for the exit to complete otherwise
++ * this task might loop forever, aka. live lock.
++ */
++ wait_for_owner_exiting(ret, exiting);
++ cond_resched();
++ goto retry;
++ default:
++ goto out_unlock_put_key;
++ }
++ }
++
++ WARN_ON(!q.pi_state);
++
++ /*
++ * Only actually queue now that the atomic ops are done:
++ */
++ __queue_me(&q, hb);
++
++ if (trylock) {
++ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
++ /* Fixup the trylock return value: */
++ ret = ret ? 0 : -EWOULDBLOCK;
++ goto no_block;
++ }
++
++ rt_mutex_init_waiter(&rt_waiter);
++
++ /*
++ * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
++ * hold it while doing rt_mutex_start_proxy(), because then it will
++ * include hb->lock in the blocking chain, even through we'll not in
++ * fact hold it while blocking. This will lead it to report -EDEADLK
++ * and BUG when futex_unlock_pi() interleaves with this.
++ *
++ * Therefore acquire wait_lock while holding hb->lock, but drop the
++ * latter before calling __rt_mutex_start_proxy_lock(). This
++ * interleaves with futex_unlock_pi() -- which does a similar lock
++ * handoff -- such that the latter can observe the futex_q::pi_state
++ * before __rt_mutex_start_proxy_lock() is done.
++ */
++ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
++ spin_unlock(q.lock_ptr);
++ /*
++ * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
++ * such that futex_unlock_pi() is guaranteed to observe the waiter when
++ * it sees the futex_q::pi_state.
++ */
++ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
++ raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
++
++ if (ret) {
++ if (ret == 1)
++ ret = 0;
++ goto cleanup;
++ }
++
++ if (unlikely(to))
++ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
++
++ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
++
++cleanup:
++ spin_lock(q.lock_ptr);
++ /*
++ * If we failed to acquire the lock (deadlock/signal/timeout), we must
++ * first acquire the hb->lock before removing the lock from the
++ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
++ * lists consistent.
++ *
++ * In particular; it is important that futex_unlock_pi() can not
++ * observe this inconsistency.
++ */
++ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
++ ret = 0;
++
++no_block:
++ /*
++ * Fixup the pi_state owner and possibly acquire the lock if we
++ * haven't already.
++ */
++ res = fixup_owner(uaddr, &q, !ret);
++ /*
++ * If fixup_owner() returned an error, propagate that. If it acquired
++ * the lock, clear our -ETIMEDOUT or -EINTR.
++ */
++ if (res)
++ ret = (res < 0) ? res : 0;
++
++ unqueue_me_pi(&q);
++ spin_unlock(q.lock_ptr);
++ goto out;
++
++out_unlock_put_key:
++ queue_unlock(hb);
++
++out:
++ if (to) {
++ hrtimer_cancel(&to->timer);
++ destroy_hrtimer_on_stack(&to->timer);
++ }
++ return ret != -EINTR ? ret : -ERESTARTNOINTR;
++
++uaddr_faulted:
++ queue_unlock(hb);
++
++ ret = fault_in_user_writeable(uaddr);
++ if (ret)
++ goto out;
++
++ if (!(flags & FLAGS_SHARED))
++ goto retry_private;
++
++ goto retry;
++}
++
++/*
++ * Userspace attempted a TID -> 0 atomic transition, and failed.
++ * This is the in-kernel slowpath: we look up the PI state (if any),
++ * and do the rt-mutex unlock.
++ */
++static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
++{
++ u32 curval, uval, vpid = task_pid_vnr(current);
++ union futex_key key = FUTEX_KEY_INIT;
++ struct futex_hash_bucket *hb;
++ struct futex_q *top_waiter;
++ int ret;
++
++ if (!IS_ENABLED(CONFIG_FUTEX_PI))
++ return -ENOSYS;
++
++retry:
++ if (get_user(uval, uaddr))
++ return -EFAULT;
++ /*
++ * We release only a lock we actually own:
++ */
++ if ((uval & FUTEX_TID_MASK) != vpid)
++ return -EPERM;
++
++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
++ if (ret)
++ return ret;
++
++ hb = hash_futex(&key);
++ spin_lock(&hb->lock);
++
++ /*
++ * Check waiters first. We do not trust user space values at
++ * all and we at least want to know if user space fiddled
++ * with the futex value instead of blindly unlocking.
++ */
++ top_waiter = futex_top_waiter(hb, &key);
++ if (top_waiter) {
++ struct futex_pi_state *pi_state = top_waiter->pi_state;
++
++ ret = -EINVAL;
++ if (!pi_state)
++ goto out_unlock;
++
++ /*
++ * If current does not own the pi_state then the futex is
++ * inconsistent and user space fiddled with the futex value.
++ */
++ if (pi_state->owner != current)
++ goto out_unlock;
++
++ get_pi_state(pi_state);
++ /*
++ * By taking wait_lock while still holding hb->lock, we ensure
++ * there is no point where we hold neither; and therefore
++ * wake_futex_pi() must observe a state consistent with what we
++ * observed.
++ *
++ * In particular; this forces __rt_mutex_start_proxy() to
++ * complete such that we're guaranteed to observe the
++ * rt_waiter. Also see the WARN in wake_futex_pi().
++ */
++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++ spin_unlock(&hb->lock);
++
++ /* drops pi_state->pi_mutex.wait_lock */
++ ret = wake_futex_pi(uaddr, uval, pi_state);
++
++ put_pi_state(pi_state);
++
++ /*
++ * Success, we're done! No tricky corner cases.
++ */
++ if (!ret)
++ return ret;
++ /*
++ * The atomic access to the futex value generated a
++ * pagefault, so retry the user-access and the wakeup:
++ */
++ if (ret == -EFAULT)
++ goto pi_faulted;
++ /*
++ * A unconditional UNLOCK_PI op raced against a waiter
++ * setting the FUTEX_WAITERS bit. Try again.
++ */
++ if (ret == -EAGAIN)
++ goto pi_retry;
++ /*
++ * wake_futex_pi has detected invalid state. Tell user
++ * space.
++ */
++ return ret;
++ }
++
++ /*
++ * We have no kernel internal state, i.e. no waiters in the
++ * kernel. Waiters which are about to queue themselves are stuck
++ * on hb->lock. So we can safely ignore them. We do neither
++ * preserve the WAITERS bit not the OWNER_DIED one. We are the
++ * owner.
++ */
++ if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
++ spin_unlock(&hb->lock);
++ switch (ret) {
++ case -EFAULT:
++ goto pi_faulted;
++
++ case -EAGAIN:
++ goto pi_retry;
++
++ default:
++ WARN_ON_ONCE(1);
++ return ret;
++ }
++ }
++
++ /*
++ * If uval has changed, let user space handle it.
++ */
++ ret = (curval == uval) ? 0 : -EAGAIN;
++
++out_unlock:
++ spin_unlock(&hb->lock);
++ return ret;
++
++pi_retry:
++ cond_resched();
++ goto retry;
++
++pi_faulted:
++
++ ret = fault_in_user_writeable(uaddr);
++ if (!ret)
++ goto retry;
++
++ return ret;
++}
++
++/**
++ * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex
++ * @hb: the hash_bucket futex_q was original enqueued on
++ * @q: the futex_q woken while waiting to be requeued
++ * @timeout: the timeout associated with the wait (NULL if none)
++ *
++ * Determine the cause for the early wakeup.
++ *
++ * Return:
++ * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR
++ */
++static inline
++int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
++ struct futex_q *q,
++ struct hrtimer_sleeper *timeout)
++{
++ int ret;
++
++ /*
++ * With the hb lock held, we avoid races while we process the wakeup.
++ * We only need to hold hb (and not hb2) to ensure atomicity as the
++ * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
++ * It can't be requeued from uaddr2 to something else since we don't
++ * support a PI aware source futex for requeue.
++ */
++ WARN_ON_ONCE(&hb->lock != q->lock_ptr);
++
++ /*
++ * We were woken prior to requeue by a timeout or a signal.
++ * Unqueue the futex_q and determine which it was.
++ */
++ plist_del(&q->list, &hb->chain);
++ hb_waiters_dec(hb);
++
++ /* Handle spurious wakeups gracefully */
++ ret = -EWOULDBLOCK;
++ if (timeout && !timeout->task)
++ ret = -ETIMEDOUT;
++ else if (signal_pending(current))
++ ret = -ERESTARTNOINTR;
++ return ret;
++}
++
++/**
++ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
++ * @uaddr: the futex we initially wait on (non-pi)
++ * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
++ * the same type, no requeueing from private to shared, etc.
++ * @val: the expected value of uaddr
++ * @abs_time: absolute timeout
++ * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
++ * @uaddr2: the pi futex we will take prior to returning to user-space
++ *
++ * The caller will wait on uaddr and will be requeued by futex_requeue() to
++ * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
++ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
++ * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
++ * without one, the pi logic would not know which task to boost/deboost, if
++ * there was a need to.
++ *
++ * We call schedule in futex_wait_queue_me() when we enqueue and return there
++ * via the following--
++ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
++ * 2) wakeup on uaddr2 after a requeue
++ * 3) signal
++ * 4) timeout
++ *
++ * If 3, cleanup and return -ERESTARTNOINTR.
++ *
++ * If 2, we may then block on trying to take the rt_mutex and return via:
++ * 5) successful lock
++ * 6) signal
++ * 7) timeout
++ * 8) other lock acquisition failure
++ *
++ * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
++ *
++ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
++ *
++ * Return:
++ * - 0 - On success;
++ * - <0 - On error
++ */
++static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
++ u32 val, ktime_t *abs_time, u32 bitset,
++ u32 __user *uaddr2)
++{
++ struct hrtimer_sleeper timeout, *to;
++ struct rt_mutex_waiter rt_waiter;
++ struct futex_hash_bucket *hb;
++ union futex_key key2 = FUTEX_KEY_INIT;
++ struct futex_q q = futex_q_init;
++ struct rt_mutex_base *pi_mutex;
++ int res, ret;
++
++ if (!IS_ENABLED(CONFIG_FUTEX_PI))
++ return -ENOSYS;
++
++ if (uaddr == uaddr2)
++ return -EINVAL;
++
++ if (!bitset)
++ return -EINVAL;
++
++ to = futex_setup_timer(abs_time, &timeout, flags,
++ current->timer_slack_ns);
++
++ /*
++ * The waiter is allocated on our stack, manipulated by the requeue
++ * code while we sleep on uaddr.
++ */
++ rt_mutex_init_waiter(&rt_waiter);
++
++ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
++ if (unlikely(ret != 0))
++ goto out;
++
++ q.bitset = bitset;
++ q.rt_waiter = &rt_waiter;
++ q.requeue_pi_key = &key2;
++
++ /*
++ * Prepare to wait on uaddr. On success, it holds hb->lock and q
++ * is initialized.
++ */
++ ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
++ if (ret)
++ goto out;
++
++ /*
++ * The check above which compares uaddrs is not sufficient for
++ * shared futexes. We need to compare the keys:
++ */
++ if (match_futex(&q.key, &key2)) {
++ queue_unlock(hb);
++ ret = -EINVAL;
++ goto out;
++ }
++
++ /* Queue the futex_q, drop the hb lock, wait for wakeup. */
++ futex_wait_queue_me(hb, &q, to);
++
++ switch (futex_requeue_pi_wakeup_sync(&q)) {
++ case Q_REQUEUE_PI_IGNORE:
++ /* The waiter is still on uaddr1 */
++ spin_lock(&hb->lock);
++ ret = handle_early_requeue_pi_wakeup(hb, &q, to);
++ spin_unlock(&hb->lock);
++ break;
++
++ case Q_REQUEUE_PI_LOCKED:
++ /* The requeue acquired the lock */
++ if (q.pi_state && (q.pi_state->owner != current)) {
++ spin_lock(q.lock_ptr);
++ ret = fixup_owner(uaddr2, &q, true);
++ /*
++ * Drop the reference to the pi state which the
++ * requeue_pi() code acquired for us.
++ */
++ put_pi_state(q.pi_state);
++ spin_unlock(q.lock_ptr);
++ /*
++ * Adjust the return value. It's either -EFAULT or
++ * success (1) but the caller expects 0 for success.
++ */
++ ret = ret < 0 ? ret : 0;
++ }
++ break;
++
++ case Q_REQUEUE_PI_DONE:
++ /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */
++ pi_mutex = &q.pi_state->pi_mutex;
++ ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
++
++ /* Current is not longer pi_blocked_on */
++ spin_lock(q.lock_ptr);
++ if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
++ ret = 0;
++
++ debug_rt_mutex_free_waiter(&rt_waiter);
++ /*
++ * Fixup the pi_state owner and possibly acquire the lock if we
++ * haven't already.
++ */
++ res = fixup_owner(uaddr2, &q, !ret);
++ /*
++ * If fixup_owner() returned an error, propagate that. If it
++ * acquired the lock, clear -ETIMEDOUT or -EINTR.
++ */
++ if (res)
++ ret = (res < 0) ? res : 0;
++
++ unqueue_me_pi(&q);
++ spin_unlock(q.lock_ptr);
++
++ if (ret == -EINTR) {
++ /*
++ * We've already been requeued, but cannot restart
++ * by calling futex_lock_pi() directly. We could
++ * restart this syscall, but it would detect that
++ * the user space "val" changed and return
++ * -EWOULDBLOCK. Save the overhead of the restart
++ * and return -EWOULDBLOCK directly.
++ */
++ ret = -EWOULDBLOCK;
++ }
++ break;
++ default:
++ BUG();
++ }
++
++out:
++ if (to) {
++ hrtimer_cancel(&to->timer);
++ destroy_hrtimer_on_stack(&to->timer);
++ }
++ return ret;
++}
++
++/*
++ * Support for robust futexes: the kernel cleans up held futexes at
++ * thread exit time.
++ *
++ * Implementation: user-space maintains a per-thread list of locks it
++ * is holding. Upon do_exit(), the kernel carefully walks this list,
++ * and marks all locks that are owned by this thread with the
++ * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
++ * always manipulated with the lock held, so the list is private and
++ * per-thread. Userspace also maintains a per-thread 'list_op_pending'
++ * field, to allow the kernel to clean up if the thread dies after
++ * acquiring the lock, but just before it could have added itself to
++ * the list. There can only be one such pending lock.
++ */
++
++/**
++ * sys_set_robust_list() - Set the robust-futex list head of a task
++ * @head: pointer to the list-head
++ * @len: length of the list-head, as userspace expects
++ */
++SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
++ size_t, len)
++{
++ if (!futex_cmpxchg_enabled)
++ return -ENOSYS;
++ /*
++ * The kernel knows only one size for now:
++ */
++ if (unlikely(len != sizeof(*head)))
++ return -EINVAL;
++
++ current->robust_list = head;
++
++ return 0;
++}
++
++/**
++ * sys_get_robust_list() - Get the robust-futex list head of a task
++ * @pid: pid of the process [zero for current task]
++ * @head_ptr: pointer to a list-head pointer, the kernel fills it in
++ * @len_ptr: pointer to a length field, the kernel fills in the header size
++ */
++SYSCALL_DEFINE3(get_robust_list, int, pid,
++ struct robust_list_head __user * __user *, head_ptr,
++ size_t __user *, len_ptr)
++{
++ struct robust_list_head __user *head;
++ unsigned long ret;
++ struct task_struct *p;
++
++ if (!futex_cmpxchg_enabled)
++ return -ENOSYS;
++
++ rcu_read_lock();
++
++ ret = -ESRCH;
++ if (!pid)
++ p = current;
++ else {
++ p = find_task_by_vpid(pid);
++ if (!p)
++ goto err_unlock;
++ }
++
++ ret = -EPERM;
++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
++ goto err_unlock;
++
++ head = p->robust_list;
++ rcu_read_unlock();
++
++ if (put_user(sizeof(*head), len_ptr))
++ return -EFAULT;
++ return put_user(head, head_ptr);
++
++err_unlock:
++ rcu_read_unlock();
++
++ return ret;
++}
++
++/* Constants for the pending_op argument of handle_futex_death */
++#define HANDLE_DEATH_PENDING true
++#define HANDLE_DEATH_LIST false
++
++/*
++ * Process a futex-list entry, check whether it's owned by the
++ * dying task, and do notification if so:
++ */
++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
++ bool pi, bool pending_op)
++{
++ u32 uval, nval, mval;
++ pid_t owner;
++ int err;
++
++ /* Futex address must be 32bit aligned */
++ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
++ return -1;
++
++retry:
++ if (get_user(uval, uaddr))
++ return -1;
++
++ /*
++ * Special case for regular (non PI) futexes. The unlock path in
++ * user space has two race scenarios:
++ *
++ * 1. The unlock path releases the user space futex value and
++ * before it can execute the futex() syscall to wake up
++ * waiters it is killed.
++ *
++ * 2. A woken up waiter is killed before it can acquire the
++ * futex in user space.
++ *
++ * In the second case, the wake up notification could be generated
++ * by the unlock path in user space after setting the futex value
++ * to zero or by the kernel after setting the OWNER_DIED bit below.
++ *
++ * In both cases the TID validation below prevents a wakeup of
++ * potential waiters which can cause these waiters to block
++ * forever.
++ *
++ * In both cases the following conditions are met:
++ *
++ * 1) task->robust_list->list_op_pending != NULL
++ * @pending_op == true
++ * 2) The owner part of user space futex value == 0
++ * 3) Regular futex: @pi == false
++ *
++ * If these conditions are met, it is safe to attempt waking up a
++ * potential waiter without touching the user space futex value and
++ * trying to set the OWNER_DIED bit. If the futex value is zero,
++ * the rest of the user space mutex state is consistent, so a woken
++ * waiter will just take over the uncontended futex. Setting the
++ * OWNER_DIED bit would create inconsistent state and malfunction
++ * of the user space owner died handling. Otherwise, the OWNER_DIED
++ * bit is already set, and the woken waiter is expected to deal with
++ * this.
++ */
++ owner = uval & FUTEX_TID_MASK;
++
++ if (pending_op && !pi && !owner) {
++ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
++ return 0;
++ }
++
++ if (owner != task_pid_vnr(curr))
++ return 0;
++
++ /*
++ * Ok, this dying thread is truly holding a futex
++ * of interest. Set the OWNER_DIED bit atomically
++ * via cmpxchg, and if the value had FUTEX_WAITERS
++ * set, wake up a waiter (if any). (We have to do a
++ * futex_wake() even if OWNER_DIED is already set -
++ * to handle the rare but possible case of recursive
++ * thread-death.) The rest of the cleanup is done in
++ * userspace.
++ */
++ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
++
++ /*
++ * We are not holding a lock here, but we want to have
++ * the pagefault_disable/enable() protection because
++ * we want to handle the fault gracefully. If the
++ * access fails we try to fault in the futex with R/W
++ * verification via get_user_pages. get_user() above
++ * does not guarantee R/W access. If that fails we
++ * give up and leave the futex locked.
++ */
++ if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
++ switch (err) {
++ case -EFAULT:
++ if (fault_in_user_writeable(uaddr))
++ return -1;
++ goto retry;
++
++ case -EAGAIN:
++ cond_resched();
++ goto retry;
++
++ default:
++ WARN_ON_ONCE(1);
++ return err;
++ }
++ }
++
++ if (nval != uval)
++ goto retry;
++
++ /*
++ * Wake robust non-PI futexes here. The wakeup of
++ * PI futexes happens in exit_pi_state():
++ */
++ if (!pi && (uval & FUTEX_WAITERS))
++ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
++
++ return 0;
++}
++
++/*
++ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
++ */
++static inline int fetch_robust_entry(struct robust_list __user **entry,
++ struct robust_list __user * __user *head,
++ unsigned int *pi)
++{
++ unsigned long uentry;
++
++ if (get_user(uentry, (unsigned long __user *)head))
++ return -EFAULT;
++
++ *entry = (void __user *)(uentry & ~1UL);
++ *pi = uentry & 1;
++
++ return 0;
++}
++
++/*
++ * Walk curr->robust_list (very carefully, it's a userspace list!)
++ * and mark any locks found there dead, and notify any waiters.
++ *
++ * We silently return on any sign of list-walking problem.
++ */
++static void exit_robust_list(struct task_struct *curr)
++{
++ struct robust_list_head __user *head = curr->robust_list;
++ struct robust_list __user *entry, *next_entry, *pending;
++ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
++ unsigned int next_pi;
++ unsigned long futex_offset;
++ int rc;
++
++ if (!futex_cmpxchg_enabled)
++ return;
++
++ /*
++ * Fetch the list head (which was registered earlier, via
++ * sys_set_robust_list()):
++ */
++ if (fetch_robust_entry(&entry, &head->list.next, &pi))
++ return;
++ /*
++ * Fetch the relative futex offset:
++ */
++ if (get_user(futex_offset, &head->futex_offset))
++ return;
++ /*
++ * Fetch any possibly pending lock-add first, and handle it
++ * if it exists:
++ */
++ if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
++ return;
++
++ next_entry = NULL; /* avoid warning with gcc */
++ while (entry != &head->list) {
++ /*
++ * Fetch the next entry in the list before calling
++ * handle_futex_death:
++ */
++ rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
++ /*
++ * A pending lock might already be on the list, so
++ * don't process it twice:
++ */
++ if (entry != pending) {
++ if (handle_futex_death((void __user *)entry + futex_offset,
++ curr, pi, HANDLE_DEATH_LIST))
++ return;
++ }
++ if (rc)
++ return;
++ entry = next_entry;
++ pi = next_pi;
++ /*
++ * Avoid excessively long or circular lists:
++ */
++ if (!--limit)
++ break;
++
++ cond_resched();
++ }
++
++ if (pending) {
++ handle_futex_death((void __user *)pending + futex_offset,
++ curr, pip, HANDLE_DEATH_PENDING);
++ }
++}
++
++static void futex_cleanup(struct task_struct *tsk)
++{
++ if (unlikely(tsk->robust_list)) {
++ exit_robust_list(tsk);
++ tsk->robust_list = NULL;
++ }
++
++#ifdef CONFIG_COMPAT
++ if (unlikely(tsk->compat_robust_list)) {
++ compat_exit_robust_list(tsk);
++ tsk->compat_robust_list = NULL;
++ }
++#endif
++
++ if (unlikely(!list_empty(&tsk->pi_state_list)))
++ exit_pi_state_list(tsk);
++}
++
++/**
++ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
++ * @tsk: task to set the state on
++ *
++ * Set the futex exit state of the task lockless. The futex waiter code
++ * observes that state when a task is exiting and loops until the task has
++ * actually finished the futex cleanup. The worst case for this is that the
++ * waiter runs through the wait loop until the state becomes visible.
++ *
++ * This is called from the recursive fault handling path in do_exit().
++ *
++ * This is best effort. Either the futex exit code has run already or
++ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
++ * take it over. If not, the problem is pushed back to user space. If the
++ * futex exit code did not run yet, then an already queued waiter might
++ * block forever, but there is nothing which can be done about that.
++ */
++void futex_exit_recursive(struct task_struct *tsk)
++{
++ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
++ if (tsk->futex_state == FUTEX_STATE_EXITING)
++ mutex_unlock(&tsk->futex_exit_mutex);
++ tsk->futex_state = FUTEX_STATE_DEAD;
++}
++
++static void futex_cleanup_begin(struct task_struct *tsk)
++{
++ /*
++ * Prevent various race issues against a concurrent incoming waiter
++ * including live locks by forcing the waiter to block on
++ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
++ * attach_to_pi_owner().
++ */
++ mutex_lock(&tsk->futex_exit_mutex);
++
++ /*
++ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
++ *
++ * This ensures that all subsequent checks of tsk->futex_state in
++ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
++ * tsk->pi_lock held.
++ *
++ * It guarantees also that a pi_state which was queued right before
++ * the state change under tsk->pi_lock by a concurrent waiter must
++ * be observed in exit_pi_state_list().
++ */
++ raw_spin_lock_irq(&tsk->pi_lock);
++ tsk->futex_state = FUTEX_STATE_EXITING;
++ raw_spin_unlock_irq(&tsk->pi_lock);
++}
++
++static void futex_cleanup_end(struct task_struct *tsk, int state)
++{
++ /*
++ * Lockless store. The only side effect is that an observer might
++ * take another loop until it becomes visible.
++ */
++ tsk->futex_state = state;
++ /*
++ * Drop the exit protection. This unblocks waiters which observed
++ * FUTEX_STATE_EXITING to reevaluate the state.
++ */
++ mutex_unlock(&tsk->futex_exit_mutex);
++}
++
++void futex_exec_release(struct task_struct *tsk)
++{
++ /*
++ * The state handling is done for consistency, but in the case of
++ * exec() there is no way to prevent further damage as the PID stays
++ * the same. But for the unlikely and arguably buggy case that a
++ * futex is held on exec(), this provides at least as much state
++ * consistency protection which is possible.
++ */
++ futex_cleanup_begin(tsk);
++ futex_cleanup(tsk);
++ /*
++ * Reset the state to FUTEX_STATE_OK. The task is alive and about
++ * exec a new binary.
++ */
++ futex_cleanup_end(tsk, FUTEX_STATE_OK);
++}
++
++void futex_exit_release(struct task_struct *tsk)
++{
++ futex_cleanup_begin(tsk);
++ futex_cleanup(tsk);
++ futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
++}
++
++long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
++ u32 __user *uaddr2, u32 val2, u32 val3)
++{
++ int cmd = op & FUTEX_CMD_MASK;
++ unsigned int flags = 0;
++
++ if (!(op & FUTEX_PRIVATE_FLAG))
++ flags |= FLAGS_SHARED;
++
++ if (op & FUTEX_CLOCK_REALTIME) {
++ flags |= FLAGS_CLOCKRT;
++ if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
++ cmd != FUTEX_LOCK_PI2)
++ return -ENOSYS;
++ }
++
++ switch (cmd) {
++ case FUTEX_LOCK_PI:
++ case FUTEX_LOCK_PI2:
++ case FUTEX_UNLOCK_PI:
++ case FUTEX_TRYLOCK_PI:
++ case FUTEX_WAIT_REQUEUE_PI:
++ case FUTEX_CMP_REQUEUE_PI:
++ if (!futex_cmpxchg_enabled)
++ return -ENOSYS;
++ }
++
++ switch (cmd) {
++ case FUTEX_WAIT:
++ val3 = FUTEX_BITSET_MATCH_ANY;
++ fallthrough;
++ case FUTEX_WAIT_BITSET:
++ return futex_wait(uaddr, flags, val, timeout, val3);
++ case FUTEX_WAKE:
++ val3 = FUTEX_BITSET_MATCH_ANY;
++ fallthrough;
++ case FUTEX_WAKE_BITSET:
++ return futex_wake(uaddr, flags, val, val3);
++ case FUTEX_REQUEUE:
++ return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
++ case FUTEX_CMP_REQUEUE:
++ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
++ case FUTEX_WAKE_OP:
++ return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
++ case FUTEX_LOCK_PI:
++ flags |= FLAGS_CLOCKRT;
++ fallthrough;
++ case FUTEX_LOCK_PI2:
++ return futex_lock_pi(uaddr, flags, timeout, 0);
++ case FUTEX_UNLOCK_PI:
++ return futex_unlock_pi(uaddr, flags);
++ case FUTEX_TRYLOCK_PI:
++ return futex_lock_pi(uaddr, flags, NULL, 1);
++ case FUTEX_WAIT_REQUEUE_PI:
++ val3 = FUTEX_BITSET_MATCH_ANY;
++ return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
++ uaddr2);
++ case FUTEX_CMP_REQUEUE_PI:
++ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
++ }
++ return -ENOSYS;
++}
++
++static __always_inline bool futex_cmd_has_timeout(u32 cmd)
++{
++ switch (cmd) {
++ case FUTEX_WAIT:
++ case FUTEX_LOCK_PI:
++ case FUTEX_LOCK_PI2:
++ case FUTEX_WAIT_BITSET:
++ case FUTEX_WAIT_REQUEUE_PI:
++ return true;
++ }
++ return false;
++}
++
++static __always_inline int
++futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
++{
++ if (!timespec64_valid(ts))
++ return -EINVAL;
++
++ *t = timespec64_to_ktime(*ts);
++ if (cmd == FUTEX_WAIT)
++ *t = ktime_add_safe(ktime_get(), *t);
++ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
++ *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
++ return 0;
++}
++
++SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
++ const struct __kernel_timespec __user *, utime,
++ u32 __user *, uaddr2, u32, val3)
++{
++ int ret, cmd = op & FUTEX_CMD_MASK;
++ ktime_t t, *tp = NULL;
++ struct timespec64 ts;
++
++ if (utime && futex_cmd_has_timeout(cmd)) {
++ if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
++ return -EFAULT;
++ if (get_timespec64(&ts, utime))
++ return -EFAULT;
++ ret = futex_init_timeout(cmd, op, &ts, &t);
++ if (ret)
++ return ret;
++ tp = &t;
++ }
++
++ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
++}
++
++#ifdef CONFIG_COMPAT
++/*
++ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
++ */
++static inline int
++compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
++ compat_uptr_t __user *head, unsigned int *pi)
++{
++ if (get_user(*uentry, head))
++ return -EFAULT;
++
++ *entry = compat_ptr((*uentry) & ~1);
++ *pi = (unsigned int)(*uentry) & 1;
++
++ return 0;
++}
++
++static void __user *futex_uaddr(struct robust_list __user *entry,
++ compat_long_t futex_offset)
++{
++ compat_uptr_t base = ptr_to_compat(entry);
++ void __user *uaddr = compat_ptr(base + futex_offset);
++
++ return uaddr;
++}
++
++/*
++ * Walk curr->robust_list (very carefully, it's a userspace list!)
++ * and mark any locks found there dead, and notify any waiters.
++ *
++ * We silently return on any sign of list-walking problem.
++ */
++static void compat_exit_robust_list(struct task_struct *curr)
++{
++ struct compat_robust_list_head __user *head = curr->compat_robust_list;
++ struct robust_list __user *entry, *next_entry, *pending;
++ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
++ unsigned int next_pi;
++ compat_uptr_t uentry, next_uentry, upending;
++ compat_long_t futex_offset;
++ int rc;
++
++ if (!futex_cmpxchg_enabled)
++ return;
++
++ /*
++ * Fetch the list head (which was registered earlier, via
++ * sys_set_robust_list()):
++ */
++ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
++ return;
++ /*
++ * Fetch the relative futex offset:
++ */
++ if (get_user(futex_offset, &head->futex_offset))
++ return;
++ /*
++ * Fetch any possibly pending lock-add first, and handle it
++ * if it exists:
++ */
++ if (compat_fetch_robust_entry(&upending, &pending,
++ &head->list_op_pending, &pip))
++ return;
++
++ next_entry = NULL; /* avoid warning with gcc */
++ while (entry != (struct robust_list __user *) &head->list) {
++ /*
++ * Fetch the next entry in the list before calling
++ * handle_futex_death:
++ */
++ rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
++ (compat_uptr_t __user *)&entry->next, &next_pi);
++ /*
++ * A pending lock might already be on the list, so
++ * dont process it twice:
++ */
++ if (entry != pending) {
++ void __user *uaddr = futex_uaddr(entry, futex_offset);
++
++ if (handle_futex_death(uaddr, curr, pi,
++ HANDLE_DEATH_LIST))
++ return;
++ }
++ if (rc)
++ return;
++ uentry = next_uentry;
++ entry = next_entry;
++ pi = next_pi;
++ /*
++ * Avoid excessively long or circular lists:
++ */
++ if (!--limit)
++ break;
++
++ cond_resched();
++ }
++ if (pending) {
++ void __user *uaddr = futex_uaddr(pending, futex_offset);
++
++ handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
++ }
++}
++
++COMPAT_SYSCALL_DEFINE2(set_robust_list,
++ struct compat_robust_list_head __user *, head,
++ compat_size_t, len)
++{
++ if (!futex_cmpxchg_enabled)
++ return -ENOSYS;
++
++ if (unlikely(len != sizeof(*head)))
++ return -EINVAL;
++
++ current->compat_robust_list = head;
++
++ return 0;
++}
++
++COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
++ compat_uptr_t __user *, head_ptr,
++ compat_size_t __user *, len_ptr)
++{
++ struct compat_robust_list_head __user *head;
++ unsigned long ret;
++ struct task_struct *p;
++
++ if (!futex_cmpxchg_enabled)
++ return -ENOSYS;
++
++ rcu_read_lock();
++
++ ret = -ESRCH;
++ if (!pid)
++ p = current;
++ else {
++ p = find_task_by_vpid(pid);
++ if (!p)
++ goto err_unlock;
++ }
++
++ ret = -EPERM;
++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
++ goto err_unlock;
++
++ head = p->compat_robust_list;
++ rcu_read_unlock();
++
++ if (put_user(sizeof(*head), len_ptr))
++ return -EFAULT;
++ return put_user(ptr_to_compat(head), head_ptr);
++
++err_unlock:
++ rcu_read_unlock();
++
++ return ret;
++}
++#endif /* CONFIG_COMPAT */
++
++#ifdef CONFIG_COMPAT_32BIT_TIME
++SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
++ const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
++ u32, val3)
++{
++ int ret, cmd = op & FUTEX_CMD_MASK;
++ ktime_t t, *tp = NULL;
++ struct timespec64 ts;
++
++ if (utime && futex_cmd_has_timeout(cmd)) {
++ if (get_old_timespec32(&ts, utime))
++ return -EFAULT;
++ ret = futex_init_timeout(cmd, op, &ts, &t);
++ if (ret)
++ return ret;
++ tp = &t;
++ }
++
++ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
++}
++#endif /* CONFIG_COMPAT_32BIT_TIME */
++
++static void __init futex_detect_cmpxchg(void)
++{
++#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
++ u32 curval;
++
++ /*
++ * This will fail and we want it. Some arch implementations do
++ * runtime detection of the futex_atomic_cmpxchg_inatomic()
++ * functionality. We want to know that before we call in any
++ * of the complex code paths. Also we want to prevent
++ * registration of robust lists in that case. NULL is
++ * guaranteed to fault and we get -EFAULT on functional
++ * implementation, the non-functional ones will return
++ * -ENOSYS.
++ */
++ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
++ futex_cmpxchg_enabled = 1;
++#endif
++}
++
++static int __init futex_init(void)
++{
++ unsigned int futex_shift;
++ unsigned long i;
++
++#if CONFIG_BASE_SMALL
++ futex_hashsize = 16;
++#else
++ futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
++#endif
++
++ futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
++ futex_hashsize, 0,
++ futex_hashsize < 256 ? HASH_SMALL : 0,
++ &futex_shift, NULL,
++ futex_hashsize, futex_hashsize);
++ futex_hashsize = 1UL << futex_shift;
++
++ futex_detect_cmpxchg();
++
++ for (i = 0; i < futex_hashsize; i++) {
++ atomic_set(&futex_queues[i].waiters, 0);
++ plist_head_init(&futex_queues[i].chain);
++ spin_lock_init(&futex_queues[i].lock);
++ }
++
++ return 0;
++}
++core_initcall(futex_init);
+diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c
+index cbb0bed958abd..7670a811a5657 100644
+--- a/kernel/gcov/clang.c
++++ b/kernel/gcov/clang.c
+@@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src)
+
+ for (i = 0; i < sfn_ptr->num_counters; i++)
+ dfn_ptr->counters[i] += sfn_ptr->counters[i];
++
++ sfn_ptr = list_next_entry(sfn_ptr, head);
+ }
+ }
+
+diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
+index 460c12b7dfea2..74a4ef1da9ad7 100644
+--- a/kernel/gcov/gcc_4_7.c
++++ b/kernel/gcov/gcc_4_7.c
+@@ -30,6 +30,13 @@
+
+ #define GCOV_TAG_FUNCTION_LENGTH 3
+
++/* Since GCC 12.1 sizes are in BYTES and not in WORDS (4B). */
++#if (__GNUC__ >= 12)
++#define GCOV_UNIT_SIZE 4
++#else
++#define GCOV_UNIT_SIZE 1
++#endif
++
+ static struct gcov_info *gcov_info_head;
+
+ /**
+@@ -75,6 +82,7 @@ struct gcov_fn_info {
+ * @version: gcov version magic indicating the gcc version used for compilation
+ * @next: list head for a singly-linked list
+ * @stamp: uniquifying time stamp
++ * @checksum: unique object checksum
+ * @filename: name of the associated gcov data file
+ * @merge: merge functions (null for unused counter type)
+ * @n_functions: number of instrumented functions
+@@ -87,6 +95,10 @@ struct gcov_info {
+ unsigned int version;
+ struct gcov_info *next;
+ unsigned int stamp;
++ /* Since GCC 12.1 a checksum field is added. */
++#if (__GNUC__ >= 12)
++ unsigned int checksum;
++#endif
+ const char *filename;
+ void (*merge[GCOV_COUNTERS])(gcov_type *, unsigned int);
+ unsigned int n_functions;
+@@ -383,12 +395,18 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info)
+ pos += store_gcov_u32(buffer, pos, info->version);
+ pos += store_gcov_u32(buffer, pos, info->stamp);
+
++#if (__GNUC__ >= 12)
++ /* Use zero as checksum of the compilation unit. */
++ pos += store_gcov_u32(buffer, pos, 0);
++#endif
++
+ for (fi_idx = 0; fi_idx < info->n_functions; fi_idx++) {
+ fi_ptr = info->functions[fi_idx];
+
+ /* Function record. */
+ pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION);
+- pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION_LENGTH);
++ pos += store_gcov_u32(buffer, pos,
++ GCOV_TAG_FUNCTION_LENGTH * GCOV_UNIT_SIZE);
+ pos += store_gcov_u32(buffer, pos, fi_ptr->ident);
+ pos += store_gcov_u32(buffer, pos, fi_ptr->lineno_checksum);
+ pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum);
+@@ -402,7 +420,8 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info)
+ /* Counter record. */
+ pos += store_gcov_u32(buffer, pos,
+ GCOV_TAG_FOR_COUNTER(ct_idx));
+- pos += store_gcov_u32(buffer, pos, ci_ptr->num * 2);
++ pos += store_gcov_u32(buffer, pos,
++ ci_ptr->num * 2 * GCOV_UNIT_SIZE);
+
+ for (cv_idx = 0; cv_idx < ci_ptr->num; cv_idx++) {
+ pos += store_gcov_u64(buffer, pos,
+diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
+index fbc54c2a7f239..00d58588ea95a 100644
+--- a/kernel/irq/Kconfig
++++ b/kernel/irq/Kconfig
+@@ -82,6 +82,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
+ # Generic IRQ IPI support
+ config GENERIC_IRQ_IPI
+ bool
++ depends on SMP
+ select IRQ_DOMAIN_HIERARCHY
+
+ # Generic MSI interrupt support
+diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
+index f7ff8919dc9bb..fdf170404650f 100644
+--- a/kernel/irq/affinity.c
++++ b/kernel/irq/affinity.c
+@@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec,
+ */
+ if (numvecs <= nodes) {
+ for_each_node_mask(n, nodemsk) {
+- cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
+- node_to_cpumask[n]);
++ /* Ensure that only CPUs which are in both masks are set */
++ cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
++ cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
+ if (++curvec == last_affv)
+ curvec = firstvec;
+ }
+diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
+index a98bcfc4be7bc..f3920374f71ce 100644
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -1516,7 +1516,8 @@ int irq_chip_request_resources_parent(struct irq_data *data)
+ if (data->chip->irq_request_resources)
+ return data->chip->irq_request_resources(data);
+
+- return -ENOSYS;
++ /* no error on missing optional irq_chip::irq_request_resources */
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent);
+
+diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
+index 221d80c31e94c..fca637d4da1a7 100644
+--- a/kernel/irq/handle.c
++++ b/kernel/irq/handle.c
+@@ -195,7 +195,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
+
+ retval = __handle_irq_event_percpu(desc, &flags);
+
+- add_interrupt_randomness(desc->irq_data.irq, flags);
++ add_interrupt_randomness(desc->irq_data.irq);
+
+ if (!irq_settings_no_debug(desc))
+ note_interrupt(desc, retval);
+diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
+index 54363527feea4..f1d83a8b44171 100644
+--- a/kernel/irq/internals.h
++++ b/kernel/irq/internals.h
+@@ -29,12 +29,14 @@ extern struct irqaction chained_action;
+ * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ * IRQTF_AFFINITY - irq thread is requested to adjust affinity
+ * IRQTF_FORCED_THREAD - irq action is force threaded
++ * IRQTF_READY - signals that irq thread is ready
+ */
+ enum {
+ IRQTF_RUNTHREAD,
+ IRQTF_WARNED,
+ IRQTF_AFFINITY,
+ IRQTF_FORCED_THREAD,
++ IRQTF_READY,
+ };
+
+ /*
+@@ -50,6 +52,7 @@ enum {
+ * IRQS_PENDING - irq is pending and replayed later
+ * IRQS_SUSPENDED - irq is suspended
+ * IRQS_NMI - irq line is used to deliver NMIs
++ * IRQS_SYSFS - descriptor has been added to sysfs
+ */
+ enum {
+ IRQS_AUTODETECT = 0x00000001,
+@@ -62,6 +65,7 @@ enum {
+ IRQS_SUSPENDED = 0x00000800,
+ IRQS_TIMINGS = 0x00001000,
+ IRQS_NMI = 0x00002000,
++ IRQS_SYSFS = 0x00004000,
+ };
+
+ #include "debug.h"
+diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
+index 4e3c29bb603c3..7a45fd5932454 100644
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -288,22 +288,25 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc)
+ if (irq_kobj_base) {
+ /*
+ * Continue even in case of failure as this is nothing
+- * crucial.
++ * crucial and failures in the late irq_sysfs_init()
++ * cannot be rolled back.
+ */
+ if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq))
+ pr_warn("Failed to add kobject for irq %d\n", irq);
++ else
++ desc->istate |= IRQS_SYSFS;
+ }
+ }
+
+ static void irq_sysfs_del(struct irq_desc *desc)
+ {
+ /*
+- * If irq_sysfs_init() has not yet been invoked (early boot), then
+- * irq_kobj_base is NULL and the descriptor was never added.
+- * kobject_del() complains about a object with no parent, so make
+- * it conditional.
++ * Only invoke kobject_del() when kobject_add() was successfully
++ * invoked for the descriptor. This covers both early boot, where
++ * sysfs is not initialized yet, and the case of a failed
++ * kobject_add() invocation.
+ */
+- if (irq_kobj_base)
++ if (desc->istate & IRQS_SYSFS)
+ kobject_del(&desc->kobj);
+ }
+
+@@ -407,6 +410,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
+ lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ mutex_init(&desc->request_mutex);
+ init_rcu_head(&desc->rcu);
++ init_waitqueue_head(&desc->wait_for_threads);
+
+ desc_set_defaults(irq, desc, node, affinity, owner);
+ irqd_set(&desc->irq_data, flags);
+@@ -575,6 +579,7 @@ int __init early_irq_init(void)
+ raw_spin_lock_init(&desc[i].lock);
+ lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+ mutex_init(&desc[i].request_mutex);
++ init_waitqueue_head(&desc[i].wait_for_threads);
+ desc_set_defaults(i, &desc[i], node, NULL, NULL);
+ }
+ return arch_early_irq_init();
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index 4d8fc65cf38f4..e0b67784ac1e0 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -25,6 +25,9 @@ static DEFINE_MUTEX(irq_domain_mutex);
+
+ static struct irq_domain *irq_default_domain;
+
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity);
+ static void irq_domain_check_hierarchy(struct irq_domain *domain);
+
+ struct irqchip_fwid {
+@@ -123,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
+ }
+ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
+
+-/**
+- * __irq_domain_add() - Allocate a new irq_domain data structure
+- * @fwnode: firmware node for the interrupt controller
+- * @size: Size of linear map; 0 for radix mapping only
+- * @hwirq_max: Maximum number of interrupts supported by controller
+- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+- * direct mapping
+- * @ops: domain callbacks
+- * @host_data: Controller private data pointer
+- *
+- * Allocates and initializes an irq_domain structure.
+- * Returns pointer to IRQ domain, or NULL on failure.
+- */
+-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+- irq_hw_number_t hwirq_max, int direct_max,
+- const struct irq_domain_ops *ops,
+- void *host_data)
++static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
++ unsigned int size,
++ irq_hw_number_t hwirq_max,
++ int direct_max,
++ const struct irq_domain_ops *ops,
++ void *host_data)
+ {
+ struct irqchip_fwid *fwid;
+ struct irq_domain *domain;
+@@ -227,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
+
+ irq_domain_check_hierarchy(domain);
+
++ return domain;
++}
++
++static void __irq_domain_publish(struct irq_domain *domain)
++{
+ mutex_lock(&irq_domain_mutex);
+ debugfs_add_domain_dir(domain);
+ list_add(&domain->link, &irq_domain_list);
+ mutex_unlock(&irq_domain_mutex);
+
+ pr_debug("Added domain %s\n", domain->name);
++}
++
++/**
++ * __irq_domain_add() - Allocate a new irq_domain data structure
++ * @fwnode: firmware node for the interrupt controller
++ * @size: Size of linear map; 0 for radix mapping only
++ * @hwirq_max: Maximum number of interrupts supported by controller
++ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
++ * direct mapping
++ * @ops: domain callbacks
++ * @host_data: Controller private data pointer
++ *
++ * Allocates and initializes an irq_domain structure.
++ * Returns pointer to IRQ domain, or NULL on failure.
++ */
++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
++ irq_hw_number_t hwirq_max, int direct_max,
++ const struct irq_domain_ops *ops,
++ void *host_data)
++{
++ struct irq_domain *domain;
++
++ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
++ ops, host_data);
++ if (domain)
++ __irq_domain_publish(domain);
++
+ return domain;
+ }
+ EXPORT_SYMBOL_GPL(__irq_domain_add);
+@@ -538,6 +562,9 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
+ return;
+
+ hwirq = irq_data->hwirq;
++
++ mutex_lock(&irq_domain_mutex);
++
+ irq_set_status_flags(irq, IRQ_NOREQUEST);
+
+ /* remove chip and handler */
+@@ -557,10 +584,12 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
+
+ /* Clear reverse map for this hwirq */
+ irq_domain_clear_mapping(domain, hwirq);
++
++ mutex_unlock(&irq_domain_mutex);
+ }
+
+-int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
+- irq_hw_number_t hwirq)
++static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq,
++ irq_hw_number_t hwirq)
+ {
+ struct irq_data *irq_data = irq_get_irq_data(virq);
+ int ret;
+@@ -573,7 +602,6 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
+ if (WARN(irq_data->domain, "error: virq%i is already associated", virq))
+ return -EINVAL;
+
+- mutex_lock(&irq_domain_mutex);
+ irq_data->hwirq = hwirq;
+ irq_data->domain = domain;
+ if (domain->ops->map) {
+@@ -590,7 +618,6 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
+ }
+ irq_data->domain = NULL;
+ irq_data->hwirq = 0;
+- mutex_unlock(&irq_domain_mutex);
+ return ret;
+ }
+
+@@ -601,12 +628,23 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
+
+ domain->mapcount++;
+ irq_domain_set_mapping(domain, hwirq, irq_data);
+- mutex_unlock(&irq_domain_mutex);
+
+ irq_clear_status_flags(virq, IRQ_NOREQUEST);
+
+ return 0;
+ }
++
++int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
++ irq_hw_number_t hwirq)
++{
++ int ret;
++
++ mutex_lock(&irq_domain_mutex);
++ ret = irq_domain_associate_locked(domain, virq, hwirq);
++ mutex_unlock(&irq_domain_mutex);
++
++ return ret;
++}
+ EXPORT_SYMBOL_GPL(irq_domain_associate);
+
+ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
+@@ -668,6 +706,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
+ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+ #endif
+
++static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain,
++ irq_hw_number_t hwirq,
++ const struct irq_affinity_desc *affinity)
++{
++ struct device_node *of_node = irq_domain_get_of_node(domain);
++ int virq;
++
++ pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
++
++ /* Allocate a virtual interrupt number */
++ virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
++ affinity);
++ if (virq <= 0) {
++ pr_debug("-> virq allocation failed\n");
++ return 0;
++ }
++
++ if (irq_domain_associate_locked(domain, virq, hwirq)) {
++ irq_free_desc(virq);
++ return 0;
++ }
++
++ pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
++ hwirq, of_node_full_name(of_node), virq);
++
++ return virq;
++}
++
+ /**
+ * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
+ * @domain: domain owning this hardware interrupt or NULL for default domain
+@@ -680,14 +746,11 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+ * on the number returned from that call.
+ */
+ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+- irq_hw_number_t hwirq,
+- const struct irq_affinity_desc *affinity)
++ irq_hw_number_t hwirq,
++ const struct irq_affinity_desc *affinity)
+ {
+- struct device_node *of_node;
+ int virq;
+
+- pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
+-
+ /* Look for default domain if necessary */
+ if (domain == NULL)
+ domain = irq_default_domain;
+@@ -695,32 +758,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+ WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq);
+ return 0;
+ }
+- pr_debug("-> using domain @%p\n", domain);
+
+- of_node = irq_domain_get_of_node(domain);
++ mutex_lock(&irq_domain_mutex);
+
+ /* Check if mapping already exists */
+ virq = irq_find_mapping(domain, hwirq);
+ if (virq) {
+- pr_debug("-> existing mapping on virq %d\n", virq);
+- return virq;
+- }
+-
+- /* Allocate a virtual interrupt number */
+- virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
+- affinity);
+- if (virq <= 0) {
+- pr_debug("-> virq allocation failed\n");
+- return 0;
++ pr_debug("existing mapping on virq %d\n", virq);
++ goto out;
+ }
+
+- if (irq_domain_associate(domain, virq, hwirq)) {
+- irq_free_desc(virq);
+- return 0;
+- }
+-
+- pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
+- hwirq, of_node_full_name(of_node), virq);
++ virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity);
++out:
++ mutex_unlock(&irq_domain_mutex);
+
+ return virq;
+ }
+@@ -789,6 +839,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+ if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
+ type &= IRQ_TYPE_SENSE_MASK;
+
++ mutex_lock(&irq_domain_mutex);
++
+ /*
+ * If we've already configured this interrupt,
+ * don't do it again, or hell will break loose.
+@@ -801,7 +853,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+ * interrupt number.
+ */
+ if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+- return virq;
++ goto out;
+
+ /*
+ * If the trigger type has not been set yet, then set
+@@ -809,40 +861,45 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+ */
+ if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
+ irq_data = irq_get_irq_data(virq);
+- if (!irq_data)
+- return 0;
++ if (!irq_data) {
++ virq = 0;
++ goto out;
++ }
+
+ irqd_set_trigger_type(irq_data, type);
+- return virq;
++ goto out;
+ }
+
+ pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
+ hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
+- return 0;
++ virq = 0;
++ goto out;
+ }
+
+ if (irq_domain_is_hierarchy(domain)) {
+- virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
+- if (virq <= 0)
+- return 0;
++ virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
++ fwspec, false, NULL);
++ if (virq <= 0) {
++ virq = 0;
++ goto out;
++ }
+ } else {
+ /* Create mapping */
+- virq = irq_create_mapping(domain, hwirq);
++ virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL);
+ if (!virq)
+- return virq;
++ goto out;
+ }
+
+ irq_data = irq_get_irq_data(virq);
+- if (!irq_data) {
+- if (irq_domain_is_hierarchy(domain))
+- irq_domain_free_irqs(virq, 1);
+- else
+- irq_dispose_mapping(virq);
+- return 0;
++ if (WARN_ON(!irq_data)) {
++ virq = 0;
++ goto out;
+ }
+
+ /* Store trigger type */
+ irqd_set_trigger_type(irq_data, type);
++out:
++ mutex_unlock(&irq_domain_mutex);
+
+ return virq;
+ }
+@@ -910,6 +967,8 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
+ data = irq_domain_get_irq_data(domain, hwirq);
+ if (data && data->hwirq == hwirq)
+ desc = irq_data_to_desc(data);
++ if (irq && desc)
++ *irq = hwirq;
+ }
+
+ return desc;
+@@ -1100,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
+ struct irq_domain *domain;
+
+ if (size)
+- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
++ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
+ else
+- domain = irq_domain_create_tree(fwnode, ops, host_data);
++ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
++
+ if (domain) {
+ domain->parent = parent;
+ domain->flags |= flags;
++
++ __irq_domain_publish(domain);
+ }
+
+ return domain;
+@@ -1423,40 +1485,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
+ return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
+ }
+
+-/**
+- * __irq_domain_alloc_irqs - Allocate IRQs from domain
+- * @domain: domain to allocate from
+- * @irq_base: allocate specified IRQ number if irq_base >= 0
+- * @nr_irqs: number of IRQs to allocate
+- * @node: NUMA node id for memory allocation
+- * @arg: domain specific argument
+- * @realloc: IRQ descriptors have already been allocated if true
+- * @affinity: Optional irq affinity mask for multiqueue devices
+- *
+- * Allocate IRQ numbers and initialized all data structures to support
+- * hierarchy IRQ domains.
+- * Parameter @realloc is mainly to support legacy IRQs.
+- * Returns error code or allocated IRQ number
+- *
+- * The whole process to setup an IRQ has been split into two steps.
+- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+- * descriptor and required hardware resources. The second step,
+- * irq_domain_activate_irq(), is to program the hardware with preallocated
+- * resources. In this way, it's easier to rollback when failing to
+- * allocate resources.
+- */
+-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+- unsigned int nr_irqs, int node, void *arg,
+- bool realloc, const struct irq_affinity_desc *affinity)
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity)
+ {
+ int i, ret, virq;
+
+- if (domain == NULL) {
+- domain = irq_default_domain;
+- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+- return -EINVAL;
+- }
+-
+ if (realloc && irq_base >= 0) {
+ virq = irq_base;
+ } else {
+@@ -1475,24 +1509,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ goto out_free_desc;
+ }
+
+- mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
+- if (ret < 0) {
+- mutex_unlock(&irq_domain_mutex);
++ if (ret < 0)
+ goto out_free_irq_data;
+- }
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = irq_domain_trim_hierarchy(virq + i);
+- if (ret) {
+- mutex_unlock(&irq_domain_mutex);
++ if (ret)
+ goto out_free_irq_data;
+- }
+ }
+-
++
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_insert_irq(virq + i);
+- mutex_unlock(&irq_domain_mutex);
+
+ return virq;
+
+@@ -1503,6 +1531,48 @@ out_free_desc:
+ return ret;
+ }
+
++/**
++ * __irq_domain_alloc_irqs - Allocate IRQs from domain
++ * @domain: domain to allocate from
++ * @irq_base: allocate specified IRQ number if irq_base >= 0
++ * @nr_irqs: number of IRQs to allocate
++ * @node: NUMA node id for memory allocation
++ * @arg: domain specific argument
++ * @realloc: IRQ descriptors have already been allocated if true
++ * @affinity: Optional irq affinity mask for multiqueue devices
++ *
++ * Allocate IRQ numbers and initialized all data structures to support
++ * hierarchy IRQ domains.
++ * Parameter @realloc is mainly to support legacy IRQs.
++ * Returns error code or allocated IRQ number
++ *
++ * The whole process to setup an IRQ has been split into two steps.
++ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
++ * descriptor and required hardware resources. The second step,
++ * irq_domain_activate_irq(), is to program the hardware with preallocated
++ * resources. In this way, it's easier to rollback when failing to
++ * allocate resources.
++ */
++int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity)
++{
++ int ret;
++
++ if (domain == NULL) {
++ domain = irq_default_domain;
++ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
++ return -EINVAL;
++ }
++
++ mutex_lock(&irq_domain_mutex);
++ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
++ realloc, affinity);
++ mutex_unlock(&irq_domain_mutex);
++
++ return ret;
++}
++
+ /* The irq_data was moved, fix the revmap to refer to the new location */
+ static void irq_domain_fix_revmap(struct irq_data *d)
+ {
+@@ -1861,6 +1931,13 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
+ irq_set_handler_data(virq, handler_data);
+ }
+
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity)
++{
++ return -EINVAL;
++}
++
+ static void irq_domain_check_hierarchy(struct irq_domain *domain)
+ {
+ }
+@@ -1911,7 +1988,7 @@ static void debugfs_add_domain_dir(struct irq_domain *d)
+
+ static void debugfs_remove_domain_dir(struct irq_domain *d)
+ {
+- debugfs_remove(debugfs_lookup(d->name, domain_dir));
++ debugfs_lookup_and_remove(d->name, domain_dir);
+ }
+
+ void __init irq_domain_debugfs_init(struct dentry *root)
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index 27667e82ecc91..9862372e0f011 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -222,11 +222,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ {
+ struct irq_desc *desc = irq_data_to_desc(data);
+ struct irq_chip *chip = irq_data_get_irq_chip(data);
++ const struct cpumask *prog_mask;
+ int ret;
+
++ static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
++ static struct cpumask tmp_mask;
++
+ if (!chip || !chip->irq_set_affinity)
+ return -EINVAL;
+
++ raw_spin_lock(&tmp_mask_lock);
+ /*
+ * If this is a managed interrupt and housekeeping is enabled on
+ * it check whether the requested affinity mask intersects with
+@@ -248,24 +253,34 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ */
+ if (irqd_affinity_is_managed(data) &&
+ housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) {
+- const struct cpumask *hk_mask, *prog_mask;
+-
+- static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
+- static struct cpumask tmp_mask;
++ const struct cpumask *hk_mask;
+
+ hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
+
+- raw_spin_lock(&tmp_mask_lock);
+ cpumask_and(&tmp_mask, mask, hk_mask);
+ if (!cpumask_intersects(&tmp_mask, cpu_online_mask))
+ prog_mask = mask;
+ else
+ prog_mask = &tmp_mask;
+- ret = chip->irq_set_affinity(data, prog_mask, force);
+- raw_spin_unlock(&tmp_mask_lock);
+ } else {
+- ret = chip->irq_set_affinity(data, mask, force);
++ prog_mask = mask;
+ }
++
++ /*
++ * Make sure we only provide online CPUs to the irqchip,
++ * unless we are being asked to force the affinity (in which
++ * case we do as we are told).
++ */
++ cpumask_and(&tmp_mask, prog_mask, cpu_online_mask);
++ if (!force && !cpumask_empty(&tmp_mask))
++ ret = chip->irq_set_affinity(data, &tmp_mask, force);
++ else if (force)
++ ret = chip->irq_set_affinity(data, mask, force);
++ else
++ ret = -EINVAL;
++
++ raw_spin_unlock(&tmp_mask_lock);
++
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
+ case IRQ_SET_MASK_OK_DONE:
+@@ -1248,6 +1263,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
+ raw_spin_unlock_irq(&desc->lock);
+ }
+
++/*
++ * Internal function to notify that a interrupt thread is ready.
++ */
++static void irq_thread_set_ready(struct irq_desc *desc,
++ struct irqaction *action)
++{
++ set_bit(IRQTF_READY, &action->thread_flags);
++ wake_up(&desc->wait_for_threads);
++}
++
++/*
++ * Internal function to wake up a interrupt thread and wait until it is
++ * ready.
++ */
++static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc,
++ struct irqaction *action)
++{
++ if (!action || !action->thread)
++ return;
++
++ wake_up_process(action->thread);
++ wait_event(desc->wait_for_threads,
++ test_bit(IRQTF_READY, &action->thread_flags));
++}
++
+ /*
+ * Interrupt handler thread
+ */
+@@ -1259,6 +1299,8 @@ static int irq_thread(void *data)
+ irqreturn_t (*handler_fn)(struct irq_desc *desc,
+ struct irqaction *action);
+
++ irq_thread_set_ready(desc, action);
++
+ if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
+ &action->thread_flags))
+ handler_fn = irq_forced_thread_fn;
+@@ -1683,8 +1725,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
+ }
+
+ if (!shared) {
+- init_waitqueue_head(&desc->wait_for_threads);
+-
+ /* Setup the type (level, edge polarity) if configured: */
+ if (new->flags & IRQF_TRIGGER_MASK) {
+ ret = __irq_set_trigger(desc,
+@@ -1780,14 +1820,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
+
+ irq_setup_timings(desc, new);
+
+- /*
+- * Strictly no need to wake it up, but hung_task complains
+- * when no hard interrupt wakes the thread up.
+- */
+- if (new->thread)
+- wake_up_process(new->thread);
+- if (new->secondary)
+- wake_up_process(new->secondary->thread);
++ wake_up_and_wait_for_irq_thread_ready(desc, new);
++ wake_up_and_wait_for_irq_thread_ready(desc, new->secondary);
+
+ register_irq_proc(irq, desc);
+ new->dir = NULL;
+diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
+index 6a5ecee6e5674..d75586dc584f8 100644
+--- a/kernel/irq/msi.c
++++ b/kernel/irq/msi.c
+@@ -529,10 +529,10 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
+
+ /*
+ * Checking the first MSI descriptor is sufficient. MSIX supports
+- * masking and MSI does so when the maskbit is set.
++ * masking and MSI does so when the can_mask attribute is set.
+ */
+ desc = first_msi_entry(dev);
+- return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
++ return desc->msi_attrib.is_msix || desc->msi_attrib.can_mask;
+ }
+
+ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+@@ -596,6 +596,13 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+ irqd_clr_can_reserve(irq_data);
+ if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
+ irqd_set_msi_nomask_quirk(irq_data);
++ if ((info->flags & MSI_FLAG_ACTIVATE_EARLY) &&
++ irqd_affinity_is_managed(irq_data) &&
++ !cpumask_intersects(irq_data_get_affinity_mask(irq_data),
++ cpu_online_mask)) {
++ irqd_set_managed_shutdown(irq_data);
++ continue;
++ }
+ }
+ ret = irq_domain_activate_irq(irq_data, can_reserve);
+ if (ret)
+diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
+index c2bb07f5bcc72..c4ddd189f3e07 100644
+--- a/kernel/kcsan/Makefile
++++ b/kernel/kcsan/Makefile
+@@ -13,5 +13,6 @@ CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \
+ obj-y := core.o debugfs.o report.o
+ obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
+
+-CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
++CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -fno-omit-frame-pointer
++CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN)
+ obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o
+diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
+index 76e67d1e02d48..3f3b5e3ca9eb3 100644
+--- a/kernel/kcsan/core.c
++++ b/kernel/kcsan/core.c
+@@ -14,10 +14,12 @@
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/list.h>
++#include <linux/minmax.h>
+ #include <linux/moduleparam.h>
+ #include <linux/percpu.h>
+ #include <linux/preempt.h>
+ #include <linux/sched.h>
++#include <linux/string.h>
+ #include <linux/uaccess.h>
+
+ #include "encoding.h"
+@@ -1048,7 +1050,9 @@ EXPORT_SYMBOL(__tsan_init);
+ DEFINE_TSAN_ATOMIC_OPS(8);
+ DEFINE_TSAN_ATOMIC_OPS(16);
+ DEFINE_TSAN_ATOMIC_OPS(32);
++#ifdef CONFIG_64BIT
+ DEFINE_TSAN_ATOMIC_OPS(64);
++#endif
+
+ void __tsan_atomic_thread_fence(int memorder);
+ void __tsan_atomic_thread_fence(int memorder)
+@@ -1060,3 +1064,51 @@ EXPORT_SYMBOL(__tsan_atomic_thread_fence);
+ void __tsan_atomic_signal_fence(int memorder);
+ void __tsan_atomic_signal_fence(int memorder) { }
+ EXPORT_SYMBOL(__tsan_atomic_signal_fence);
++
++#ifdef __HAVE_ARCH_MEMSET
++void *__tsan_memset(void *s, int c, size_t count);
++noinline void *__tsan_memset(void *s, int c, size_t count)
++{
++ /*
++ * Instead of not setting up watchpoints where accessed size is greater
++ * than MAX_ENCODABLE_SIZE, truncate checked size to MAX_ENCODABLE_SIZE.
++ */
++ size_t check_len = min_t(size_t, count, MAX_ENCODABLE_SIZE);
++
++ check_access(s, check_len, KCSAN_ACCESS_WRITE);
++ return memset(s, c, count);
++}
++#else
++void *__tsan_memset(void *s, int c, size_t count) __alias(memset);
++#endif
++EXPORT_SYMBOL(__tsan_memset);
++
++#ifdef __HAVE_ARCH_MEMMOVE
++void *__tsan_memmove(void *dst, const void *src, size_t len);
++noinline void *__tsan_memmove(void *dst, const void *src, size_t len)
++{
++ size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
++
++ check_access(dst, check_len, KCSAN_ACCESS_WRITE);
++ check_access(src, check_len, 0);
++ return memmove(dst, src, len);
++}
++#else
++void *__tsan_memmove(void *dst, const void *src, size_t len) __alias(memmove);
++#endif
++EXPORT_SYMBOL(__tsan_memmove);
++
++#ifdef __HAVE_ARCH_MEMCPY
++void *__tsan_memcpy(void *dst, const void *src, size_t len);
++noinline void *__tsan_memcpy(void *dst, const void *src, size_t len)
++{
++ size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
++
++ check_access(dst, check_len, KCSAN_ACCESS_WRITE);
++ check_access(src, check_len, 0);
++ return memcpy(dst, src, len);
++}
++#else
++void *__tsan_memcpy(void *dst, const void *src, size_t len) __alias(memcpy);
++#endif
++EXPORT_SYMBOL(__tsan_memcpy);
+diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
+index dc55fd5a36fcc..8b176aeab91b6 100644
+--- a/kernel/kcsan/kcsan_test.c
++++ b/kernel/kcsan/kcsan_test.c
+@@ -151,7 +151,7 @@ static bool report_matches(const struct expect_report *r)
+ const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT;
+ bool ret = false;
+ unsigned long flags;
+- typeof(observed.lines) expect;
++ typeof(*observed.lines) *expect;
+ const char *end;
+ char *cur;
+ int i;
+@@ -160,6 +160,10 @@ static bool report_matches(const struct expect_report *r)
+ if (!report_available())
+ return false;
+
++ expect = kmalloc(sizeof(observed.lines), GFP_KERNEL);
++ if (WARN_ON(!expect))
++ return false;
++
+ /* Generate expected report contents. */
+
+ /* Title */
+@@ -243,6 +247,7 @@ static bool report_matches(const struct expect_report *r)
+ strstr(observed.lines[2], expect[1])));
+ out:
+ spin_unlock_irqrestore(&observed.lock, flags);
++ kfree(expect);
+ return ret;
+ }
+
+diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
+index 21137929d4283..b88d5d5f29e48 100644
+--- a/kernel/kcsan/report.c
++++ b/kernel/kcsan/report.c
+@@ -432,8 +432,7 @@ static void print_report(enum kcsan_value_change value_change,
+ dump_stack_print_info(KERN_DEFAULT);
+ pr_err("==================================================================\n");
+
+- if (panic_on_warn)
+- panic("panic_on_warn set ...\n");
++ check_panic_on_warn("KCSAN");
+ }
+
+ static void release_report(unsigned long *flags, struct other_info *other_info)
+diff --git a/kernel/kexec.c b/kernel/kexec.c
+index b5e40f0697681..cb8e6e6f983c7 100644
+--- a/kernel/kexec.c
++++ b/kernel/kexec.c
+@@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
+
+ /*
+ * Because we write directly to the reserved memory region when loading
+- * crash kernels we need a mutex here to prevent multiple crash kernels
+- * from attempting to load simultaneously, and to prevent a crash kernel
+- * from loading over the top of a in use crash kernel.
+- *
+- * KISS: always take the mutex.
++ * crash kernels we need a serialization here to prevent multiple crash
++ * kernels from attempting to load simultaneously.
+ */
+- if (!mutex_trylock(&kexec_mutex))
++ if (!kexec_trylock())
+ return -EBUSY;
+
+ if (flags & KEXEC_ON_CRASH) {
+@@ -165,7 +162,7 @@ out:
+
+ kimage_free(image);
+ out_unlock:
+- mutex_unlock(&kexec_mutex);
++ kexec_unlock();
+ return ret;
+ }
+
+diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
+index 5a5d192a89ac3..7e735fd338586 100644
+--- a/kernel/kexec_core.c
++++ b/kernel/kexec_core.c
+@@ -46,7 +46,7 @@
+ #include <crypto/hash.h>
+ #include "kexec_internal.h"
+
+-DEFINE_MUTEX(kexec_mutex);
++atomic_t __kexec_lock = ATOMIC_INIT(0);
+
+ /* Per cpu memory for storing cpu states in case of system crash. */
+ note_buf_t __percpu *crash_notes;
+@@ -944,7 +944,7 @@ int kexec_load_disabled;
+ */
+ void __noclone __crash_kexec(struct pt_regs *regs)
+ {
+- /* Take the kexec_mutex here to prevent sys_kexec_load
++ /* Take the kexec_lock here to prevent sys_kexec_load
+ * running on one cpu from replacing the crash kernel
+ * we are using after a panic on a different cpu.
+ *
+@@ -952,7 +952,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
+ * of memory the xchg(&kexec_crash_image) would be
+ * sufficient. But since I reuse the memory...
+ */
+- if (mutex_trylock(&kexec_mutex)) {
++ if (kexec_trylock()) {
+ if (kexec_crash_image) {
+ struct pt_regs fixed_regs;
+
+@@ -961,7 +961,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
+ machine_crash_shutdown(&fixed_regs);
+ machine_kexec(kexec_crash_image);
+ }
+- mutex_unlock(&kexec_mutex);
++ kexec_unlock();
+ }
+ }
+ STACK_FRAME_NON_STANDARD(__crash_kexec);
+@@ -989,14 +989,17 @@ void crash_kexec(struct pt_regs *regs)
+ }
+ }
+
+-size_t crash_get_memory_size(void)
++ssize_t crash_get_memory_size(void)
+ {
+- size_t size = 0;
++ ssize_t size = 0;
++
++ if (!kexec_trylock())
++ return -EBUSY;
+
+- mutex_lock(&kexec_mutex);
+ if (crashk_res.end != crashk_res.start)
+ size = resource_size(&crashk_res);
+- mutex_unlock(&kexec_mutex);
++
++ kexec_unlock();
+ return size;
+ }
+
+@@ -1016,7 +1019,8 @@ int crash_shrink_memory(unsigned long new_size)
+ unsigned long old_size;
+ struct resource *ram_res;
+
+- mutex_lock(&kexec_mutex);
++ if (!kexec_trylock())
++ return -EBUSY;
+
+ if (kexec_crash_image) {
+ ret = -ENOENT;
+@@ -1025,6 +1029,7 @@ int crash_shrink_memory(unsigned long new_size)
+ start = crashk_res.start;
+ end = crashk_res.end;
+ old_size = (end == 0) ? 0 : end - start + 1;
++ new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
+ if (new_size >= old_size) {
+ ret = (new_size == old_size) ? 0 : -EINVAL;
+ goto unlock;
+@@ -1036,9 +1041,7 @@ int crash_shrink_memory(unsigned long new_size)
+ goto unlock;
+ }
+
+- start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
+- end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
+-
++ end = start + new_size;
+ crash_free_reserved_phys_range(end, crashk_res.end);
+
+ if ((start == end) && (crashk_res.parent != NULL))
+@@ -1054,7 +1057,7 @@ int crash_shrink_memory(unsigned long new_size)
+ insert_resource(&iomem_resource, ram_res);
+
+ unlock:
+- mutex_unlock(&kexec_mutex);
++ kexec_unlock();
+ return ret;
+ }
+
+@@ -1126,7 +1129,7 @@ int kernel_kexec(void)
+ {
+ int error = 0;
+
+- if (!mutex_trylock(&kexec_mutex))
++ if (!kexec_trylock())
+ return -EBUSY;
+ if (!kexec_image) {
+ error = -EINVAL;
+@@ -1202,7 +1205,7 @@ int kernel_kexec(void)
+ #endif
+
+ Unlock:
+- mutex_unlock(&kexec_mutex);
++ kexec_unlock();
+ return error;
+ }
+
+diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
+index 33400ff051a84..8148e89797c78 100644
+--- a/kernel/kexec_file.c
++++ b/kernel/kexec_file.c
+@@ -29,6 +29,15 @@
+ #include <linux/vmalloc.h>
+ #include "kexec_internal.h"
+
++#ifdef CONFIG_KEXEC_SIG
++static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE);
++
++void set_kexec_sig_enforced(void)
++{
++ sig_enforce = true;
++}
++#endif
++
+ static int kexec_calculate_store_digests(struct kimage *image);
+
+ /*
+@@ -108,40 +117,6 @@ int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+ }
+ #endif
+
+-/*
+- * arch_kexec_apply_relocations_add - apply relocations of type RELA
+- * @pi: Purgatory to be relocated.
+- * @section: Section relocations applying to.
+- * @relsec: Section containing RELAs.
+- * @symtab: Corresponding symtab.
+- *
+- * Return: 0 on success, negative errno on error.
+- */
+-int __weak
+-arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section,
+- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
+-{
+- pr_err("RELA relocation unsupported.\n");
+- return -ENOEXEC;
+-}
+-
+-/*
+- * arch_kexec_apply_relocations - apply relocations of type REL
+- * @pi: Purgatory to be relocated.
+- * @section: Section relocations applying to.
+- * @relsec: Section containing RELs.
+- * @symtab: Corresponding symtab.
+- *
+- * Return: 0 on success, negative errno on error.
+- */
+-int __weak
+-arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section,
+- const Elf_Shdr *relsec, const Elf_Shdr *symtab)
+-{
+- pr_err("REL relocation unsupported.\n");
+- return -ENOEXEC;
+-}
+-
+ /*
+ * Free up memory used by kernel, initrd, and command line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+@@ -193,7 +168,7 @@ kimage_validate_signature(struct kimage *image)
+ image->kernel_buf_len);
+ if (ret) {
+
+- if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
++ if (sig_enforce) {
+ pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
+ return ret;
+ }
+@@ -368,7 +343,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
+
+ image = NULL;
+
+- if (!mutex_trylock(&kexec_mutex))
++ if (!kexec_trylock())
+ return -EBUSY;
+
+ dest_image = &kexec_image;
+@@ -440,7 +415,7 @@ out:
+ if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
+ arch_kexec_protect_crashkres();
+
+- mutex_unlock(&kexec_mutex);
++ kexec_unlock();
+ kimage_free(image);
+ return ret;
+ }
+@@ -935,10 +910,22 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
+ }
+
+ offset = ALIGN(offset, align);
++
++ /*
++ * Check if the segment contains the entry point, if so,
++ * calculate the value of image->start based on it.
++ * If the compiler has produced more than one .text section
++ * (Eg: .text.hot), they are generally after the main .text
++ * section, and they shall not be used to calculate
++ * image->start. So do not re-calculate image->start if it
++ * is not set to the initial value, and warn the user so they
++ * have a chance to fix their purgatory's linker script.
++ */
+ if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
+ pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
+ pi->ehdr->e_entry < (sechdrs[i].sh_addr
+- + sechdrs[i].sh_size)) {
++ + sechdrs[i].sh_size) &&
++ !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
+ kbuf->image->start -= sechdrs[i].sh_addr;
+ kbuf->image->start += kbuf->mem + offset;
+ }
+diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
+index 48aaf2ac0d0d1..74da1409cd14b 100644
+--- a/kernel/kexec_internal.h
++++ b/kernel/kexec_internal.h
+@@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image);
+ int kimage_is_destination_range(struct kimage *image,
+ unsigned long start, unsigned long end);
+
+-extern struct mutex kexec_mutex;
++/*
++ * Whatever is used to serialize accesses to the kexec_crash_image needs to be
++ * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
++ * "simple" atomic variable that is acquired with a cmpxchg().
++ */
++extern atomic_t __kexec_lock;
++static inline bool kexec_trylock(void)
++{
++ return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
++}
++static inline void kexec_unlock(void)
++{
++ atomic_set_release(&__kexec_lock, 0);
++}
+
+ #ifdef CONFIG_KEXEC_FILE
+ #include <linux/purgatory.h>
+diff --git a/kernel/kheaders.c b/kernel/kheaders.c
+index 8f69772af77b4..42163c9e94e55 100644
+--- a/kernel/kheaders.c
++++ b/kernel/kheaders.c
+@@ -26,15 +26,15 @@ asm (
+ " .popsection \n"
+ );
+
+-extern char kernel_headers_data;
+-extern char kernel_headers_data_end;
++extern char kernel_headers_data[];
++extern char kernel_headers_data_end[];
+
+ static ssize_t
+ ikheaders_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+ {
+- memcpy(buf, &kernel_headers_data + off, len);
++ memcpy(buf, &kernel_headers_data[off], len);
+ return len;
+ }
+
+@@ -48,8 +48,8 @@ static struct bin_attribute kheaders_attr __ro_after_init = {
+
+ static int __init ikheaders_init(void)
+ {
+- kheaders_attr.size = (&kernel_headers_data_end -
+- &kernel_headers_data);
++ kheaders_attr.size = (kernel_headers_data_end -
++ kernel_headers_data);
+ return sysfs_create_bin_file(kernel_kobj, &kheaders_attr);
+ }
+
+diff --git a/kernel/kprobes.c b/kernel/kprobes.c
+index 790a573bbe00c..6cf561322bbe6 100644
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -18,6 +18,9 @@
+ * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
+ * <prasanna@in.ibm.com> added function-return probes.
+ */
++
++#define pr_fmt(fmt) "kprobes: " fmt
++
+ #include <linux/kprobes.h>
+ #include <linux/hash.h>
+ #include <linux/init.h>
+@@ -448,8 +451,8 @@ static inline int kprobe_optready(struct kprobe *p)
+ return 0;
+ }
+
+-/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+-static inline int kprobe_disarmed(struct kprobe *p)
++/* Return true if the kprobe is disarmed. Note: p must be on hash list */
++bool kprobe_disarmed(struct kprobe *p)
+ {
+ struct optimized_kprobe *op;
+
+@@ -653,7 +656,7 @@ void wait_for_kprobe_optimizer(void)
+ mutex_unlock(&kprobe_mutex);
+ }
+
+-static bool optprobe_queued_unopt(struct optimized_kprobe *op)
++bool optprobe_queued_unopt(struct optimized_kprobe *op)
+ {
+ struct optimized_kprobe *_op;
+
+@@ -892,7 +895,7 @@ static void optimize_all_kprobes(void)
+ optimize_kprobe(p);
+ }
+ cpus_read_unlock();
+- printk(KERN_INFO "Kprobes globally optimized\n");
++ pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n");
+ out:
+ mutex_unlock(&kprobe_mutex);
+ }
+@@ -925,7 +928,7 @@ static void unoptimize_all_kprobes(void)
+
+ /* Wait for unoptimizing completion */
+ wait_for_kprobe_optimizer();
+- printk(KERN_INFO "Kprobes globally unoptimized\n");
++ pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
+ }
+
+ static DEFINE_MUTEX(kprobe_sysctl_mutex);
+@@ -1003,7 +1006,7 @@ static int reuse_unused_kprobe(struct kprobe *ap)
+ * unregistered.
+ * Thus there should be no chance to reuse unused kprobe.
+ */
+- printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
++ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+@@ -1049,18 +1052,13 @@ static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
+ int ret = 0;
+
+ ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
+- if (ret) {
+- pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n",
+- p->addr, ret);
++ if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret))
+ return ret;
+- }
+
+ if (*cnt == 0) {
+ ret = register_ftrace_function(ops);
+- if (ret) {
+- pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
++ if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret))
+ goto err_ftrace;
+- }
+ }
+
+ (*cnt)++;
+@@ -1092,14 +1090,14 @@ static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
+
+ if (*cnt == 1) {
+ ret = unregister_ftrace_function(ops);
+- if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
++ if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret))
+ return ret;
+ }
+
+ (*cnt)--;
+
+ ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
+- WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n",
++ WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n",
+ p->addr, ret);
+ return ret;
+ }
+@@ -1547,6 +1545,17 @@ int __weak arch_check_ftrace_location(struct kprobe *p)
+ return 0;
+ }
+
++static bool is_cfi_preamble_symbol(unsigned long addr)
++{
++ char symbuf[KSYM_NAME_LEN];
++
++ if (lookup_symbol_name(addr, symbuf))
++ return false;
++
++ return str_has_prefix("__cfi_", symbuf) ||
++ str_has_prefix("__pfx_", symbuf);
++}
++
+ static int check_kprobe_address_safe(struct kprobe *p,
+ struct module **probed_mod)
+ {
+@@ -1559,11 +1568,14 @@ static int check_kprobe_address_safe(struct kprobe *p,
+ preempt_disable();
+
+ /* Ensure it is not in reserved area nor out of text */
+- if (!kernel_text_address((unsigned long) p->addr) ||
++ if (!(core_kernel_text((unsigned long) p->addr) ||
++ is_module_text_address((unsigned long) p->addr)) ||
++ in_gate_area_no_mm((unsigned long) p->addr) ||
+ within_kprobe_blacklist((unsigned long) p->addr) ||
+ jump_label_text_reserved(p->addr, p->addr) ||
+ static_call_text_reserved(p->addr, p->addr) ||
+- find_bug((unsigned long)p->addr)) {
++ find_bug((unsigned long)p->addr) ||
++ is_cfi_preamble_symbol((unsigned long)p->addr)) {
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -1704,11 +1716,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
+ /* Try to disarm and disable this/parent probe */
+ if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+ /*
+- * If kprobes_all_disarmed is set, orig_p
+- * should have already been disarmed, so
+- * skip unneed disarming process.
++ * Don't be lazy here. Even if 'kprobes_all_disarmed'
++ * is false, 'orig_p' might not have been armed yet.
++ * Note arm_all_kprobes() __tries__ to arm all kprobes
++ * on the best effort basis.
+ */
+- if (!kprobes_all_disarmed) {
++ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
+ ret = disarm_kprobe(orig_p, true);
+ if (ret) {
+ p->flags &= ~KPROBE_FLAG_DISABLED;
+@@ -1757,7 +1770,13 @@ static int __unregister_kprobe_top(struct kprobe *p)
+ if ((list_p != p) && (list_p->post_handler))
+ goto noclean;
+ }
+- ap->post_handler = NULL;
++ /*
++ * For the kprobe-on-ftrace case, we keep the
++ * post_handler setting to identify this aggrprobe
++ * armed with kprobe_ipmodify_ops.
++ */
++ if (!kprobe_ftrace(ap))
++ ap->post_handler = NULL;
+ }
+ noclean:
+ /*
+@@ -1885,7 +1904,7 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
+
+ node = node->next;
+ }
+- pr_err("Oops! Kretprobe fails to find correct return address.\n");
++ pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
+ BUG_ON(1);
+
+ found:
+@@ -2006,6 +2025,9 @@ int register_kretprobe(struct kretprobe *rp)
+ }
+ }
+
++ if (rp->data_size > KRETPROBE_MAX_DATA_SIZE)
++ return -E2BIG;
++
+ rp->kp.pre_handler = pre_handler_kretprobe;
+ rp->kp.post_handler = NULL;
+
+@@ -2202,8 +2224,11 @@ int enable_kprobe(struct kprobe *kp)
+ if (!kprobes_all_disarmed && kprobe_disabled(p)) {
+ p->flags &= ~KPROBE_FLAG_DISABLED;
+ ret = arm_kprobe(p);
+- if (ret)
++ if (ret) {
+ p->flags |= KPROBE_FLAG_DISABLED;
++ if (p != kp)
++ kp->flags |= KPROBE_FLAG_DISABLED;
++ }
+ }
+ out:
+ mutex_unlock(&kprobe_mutex);
+@@ -2214,8 +2239,7 @@ EXPORT_SYMBOL_GPL(enable_kprobe);
+ /* Caller must NOT call this in usual path. This is only for critical case */
+ void dump_kprobe(struct kprobe *kp)
+ {
+- pr_err("Dumping kprobe:\n");
+- pr_err("Name: %s\nOffset: %x\nAddress: %pS\n",
++ pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n",
+ kp->symbol_name, kp->offset, kp->addr);
+ }
+ NOKPROBE_SYMBOL(dump_kprobe);
+@@ -2478,8 +2502,7 @@ static int __init init_kprobes(void)
+ err = populate_kprobe_blacklist(__start_kprobe_blacklist,
+ __stop_kprobe_blacklist);
+ if (err) {
+- pr_err("kprobes: failed to populate blacklist: %d\n", err);
+- pr_err("Please take care of using kprobes.\n");
++ pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
+ }
+
+ if (kretprobe_blacklist_size) {
+@@ -2488,7 +2511,7 @@ static int __init init_kprobes(void)
+ kretprobe_blacklist[i].addr =
+ kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
+ if (!kretprobe_blacklist[i].addr)
+- printk("kretprobe: lookup failed: %s\n",
++ pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n",
+ kretprobe_blacklist[i].name);
+ }
+ }
+@@ -2692,7 +2715,7 @@ static int arm_all_kprobes(void)
+ }
+
+ if (errors)
+- pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
++ pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n",
+ errors, total);
+ else
+ pr_info("Kprobes globally enabled\n");
+@@ -2735,7 +2758,7 @@ static int disarm_all_kprobes(void)
+ }
+
+ if (errors)
+- pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
++ pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n",
+ errors, total);
+ else
+ pr_info("Kprobes globally disabled\n");
+@@ -2809,13 +2832,12 @@ static const struct file_operations fops_kp = {
+ static int __init debugfs_kprobe_init(void)
+ {
+ struct dentry *dir;
+- unsigned int value = 1;
+
+ dir = debugfs_create_dir("kprobes", NULL);
+
+ debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
+
+- debugfs_create_file("enabled", 0600, dir, &value, &fops_kp);
++ debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
+
+ debugfs_create_file("blacklist", 0400, dir, NULL,
+ &kprobe_blacklist_fops);
+diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
+index 35859da8bd4f7..e20c19e3ba49c 100644
+--- a/kernel/ksysfs.c
++++ b/kernel/ksysfs.c
+@@ -106,7 +106,12 @@ KERNEL_ATTR_RO(kexec_crash_loaded);
+ static ssize_t kexec_crash_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+ {
+- return sprintf(buf, "%zu\n", crash_get_memory_size());
++ ssize_t size = crash_get_memory_size();
++
++ if (size < 0)
++ return size;
++
++ return sprintf(buf, "%zd\n", size);
+ }
+ static ssize_t kexec_crash_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+diff --git a/kernel/kthread.c b/kernel/kthread.c
+index 5b37a8567168b..e319a1b62586e 100644
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -523,6 +523,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+ to_kthread(p)->cpu = cpu;
+ return p;
+ }
++EXPORT_SYMBOL(kthread_create_on_cpu);
+
+ void kthread_set_per_cpu(struct task_struct *k, int cpu)
+ {
+diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
+index 335d988bd8111..c0789383807b9 100644
+--- a/kernel/livepatch/core.c
++++ b/kernel/livepatch/core.c
+@@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name,
+ return -EINVAL;
+ }
+
+-static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab,
++static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symndx, Elf_Shdr *relasec,
+ const char *sec_objname)
+ {
+@@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab,
+ relas = (Elf_Rela *) relasec->sh_addr;
+ /* For each rela in this klp relocation section */
+ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) {
+- sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info);
++ sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info);
+ if (sym->st_shndx != SHN_LIVEPATCH) {
+ pr_err("symbol %s is not marked as a livepatch symbol\n",
+ strtab + sym->st_name);
+diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
+index 291b857a6e201..dc94bb9420ffb 100644
+--- a/kernel/livepatch/transition.c
++++ b/kernel/livepatch/transition.c
+@@ -610,9 +610,23 @@ void klp_reverse_transition(void)
+ /* Called from copy_process() during fork */
+ void klp_copy_process(struct task_struct *child)
+ {
+- child->patch_state = current->patch_state;
+
+- /* TIF_PATCH_PENDING gets copied in setup_thread_stack() */
++ /*
++ * The parent process may have gone through a KLP transition since
++ * the thread flag was copied in setup_thread_stack earlier. Bring
++ * the task flag up to date with the parent here.
++ *
++ * The operation is serialized against all klp_*_transition()
++ * operations by the tasklist_lock. The only exception is
++ * klp_update_patch_state(current), but we cannot race with
++ * that because we are current.
++ */
++ if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
++ set_tsk_thread_flag(child, TIF_PATCH_PENDING);
++ else
++ clear_tsk_thread_flag(child, TIF_PATCH_PENDING);
++
++ child->patch_state = current->patch_state;
+ }
+
+ /*
+diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
+index bf1c00c881e48..e6a282bc16652 100644
+--- a/kernel/locking/lockdep.c
++++ b/kernel/locking/lockdep.c
+@@ -183,11 +183,9 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
+ static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
+ unsigned long nr_lock_classes;
+ unsigned long nr_zapped_classes;
+-#ifndef CONFIG_DEBUG_LOCKDEP
+-static
+-#endif
++unsigned long max_lock_class_idx;
+ struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+-static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
++DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
+
+ static inline struct lock_class *hlock_class(struct held_lock *hlock)
+ {
+@@ -338,7 +336,7 @@ static inline void lock_release_holdtime(struct held_lock *hlock)
+ * elements. These elements are linked together by the lock_entry member in
+ * struct lock_class.
+ */
+-LIST_HEAD(all_lock_classes);
++static LIST_HEAD(all_lock_classes);
+ static LIST_HEAD(free_lock_classes);
+
+ /**
+@@ -888,7 +886,7 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return NULL;
+
+- hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
++ hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) {
+ if (class->key == key) {
+ /*
+ * Huh! same key, different name? Did someone trample
+@@ -1240,6 +1238,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
+ struct lockdep_subclass_key *key;
+ struct hlist_head *hash_head;
+ struct lock_class *class;
++ int idx;
+
+ DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+
+@@ -1305,6 +1304,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
+ * of classes.
+ */
+ list_move_tail(&class->lock_entry, &all_lock_classes);
++ idx = class - lock_classes;
++ if (idx > max_lock_class_idx)
++ max_lock_class_idx = idx;
+
+ if (verbose(class)) {
+ graph_unlock();
+@@ -1366,7 +1368,7 @@ static struct lock_list *alloc_list_entry(void)
+ */
+ static int add_lock_to_list(struct lock_class *this,
+ struct lock_class *links_to, struct list_head *head,
+- unsigned long ip, u16 distance, u8 dep,
++ u16 distance, u8 dep,
+ const struct lock_trace *trace)
+ {
+ struct lock_list *entry;
+@@ -3119,19 +3121,15 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
+ * to the previous lock's dependency list:
+ */
+ ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
+- &hlock_class(prev)->locks_after,
+- next->acquire_ip, distance,
+- calc_dep(prev, next),
+- *trace);
++ &hlock_class(prev)->locks_after, distance,
++ calc_dep(prev, next), *trace);
+
+ if (!ret)
+ return 0;
+
+ ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
+- &hlock_class(next)->locks_before,
+- next->acquire_ip, distance,
+- calc_depb(prev, next),
+- *trace);
++ &hlock_class(next)->locks_before, distance,
++ calc_depb(prev, next), *trace);
+ if (!ret)
+ return 0;
+
+@@ -3450,7 +3448,7 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
+ u16 chain_hlock = chain_hlocks[chain->base + i];
+ unsigned int class_idx = chain_hlock_class_idx(chain_hlock);
+
+- return lock_classes + class_idx - 1;
++ return lock_classes + class_idx;
+ }
+
+ /*
+@@ -3518,7 +3516,7 @@ static void print_chain_keys_chain(struct lock_chain *chain)
+ hlock_id = chain_hlocks[chain->base + i];
+ chain_key = print_chain_key_iteration(hlock_id, chain_key);
+
+- print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id) - 1);
++ print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id));
+ printk("\n");
+ }
+ }
+@@ -4222,14 +4220,13 @@ static void __trace_hardirqs_on_caller(void)
+
+ /**
+ * lockdep_hardirqs_on_prepare - Prepare for enabling interrupts
+- * @ip: Caller address
+ *
+ * Invoked before a possible transition to RCU idle from exit to user or
+ * guest mode. This ensures that all RCU operations are done before RCU
+ * stops watching. After the RCU transition lockdep_hardirqs_on() has to be
+ * invoked to set the final state.
+ */
+-void lockdep_hardirqs_on_prepare(unsigned long ip)
++void lockdep_hardirqs_on_prepare(void)
+ {
+ if (unlikely(!debug_locks))
+ return;
+@@ -4826,8 +4823,7 @@ EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
+
+ static void
+ print_lock_nested_lock_not_held(struct task_struct *curr,
+- struct held_lock *hlock,
+- unsigned long ip)
++ struct held_lock *hlock)
+ {
+ if (!debug_locks_off())
+ return;
+@@ -5003,7 +4999,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+ chain_key = iterate_chain_key(chain_key, hlock_id(hlock));
+
+ if (nest_lock && !__lock_is_held(nest_lock, -1)) {
+- print_lock_nested_lock_not_held(curr, hlock, ip);
++ print_lock_nested_lock_not_held(curr, hlock);
+ return 0;
+ }
+
+@@ -5200,9 +5196,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
+ return 0;
+ }
+
+- lockdep_init_map_waits(lock, name, key, 0,
+- lock->wait_type_inner,
+- lock->wait_type_outer);
++ lockdep_init_map_type(lock, name, key, 0,
++ lock->wait_type_inner,
++ lock->wait_type_outer,
++ lock->lock_type);
+ class = register_lock_class(lock, subclass, 0);
+ hlock->class_idx = class - lock_classes;
+
+@@ -5366,7 +5363,7 @@ int __lock_is_held(const struct lockdep_map *lock, int read)
+ struct held_lock *hlock = curr->held_locks + i;
+
+ if (match_held_lock(hlock, lock)) {
+- if (read == -1 || hlock->read == read)
++ if (read == -1 || !!hlock->read == read)
+ return LOCK_STATE_HELD;
+
+ return LOCK_STATE_NOT_HELD;
+@@ -5986,6 +5983,8 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
+ WRITE_ONCE(class->name, NULL);
+ nr_lock_classes--;
+ __clear_bit(class - lock_classes, lock_classes_in_use);
++ if (class - lock_classes == max_lock_class_idx)
++ max_lock_class_idx--;
+ } else {
+ WARN_ONCE(true, "%s() failed for class %s\n", __func__,
+ class->name);
+@@ -6276,7 +6275,13 @@ void lockdep_reset_lock(struct lockdep_map *lock)
+ lockdep_reset_lock_reg(lock);
+ }
+
+-/* Unregister a dynamically allocated key. */
++/*
++ * Unregister a dynamically allocated key.
++ *
++ * Unlike lockdep_register_key(), a search is always done to find a matching
++ * key irrespective of debug_locks to avoid potential invalid access to freed
++ * memory in lock_class entry.
++ */
+ void lockdep_unregister_key(struct lock_class_key *key)
+ {
+ struct hlist_head *hash_head = keyhashentry(key);
+@@ -6291,10 +6296,8 @@ void lockdep_unregister_key(struct lock_class_key *key)
+ return;
+
+ raw_local_irq_save(flags);
+- if (!graph_lock())
+- goto out_irq;
++ lockdep_lock();
+
+- pf = get_pending_free();
+ hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
+ if (k == key) {
+ hlist_del_rcu(&k->hash_entry);
+@@ -6302,11 +6305,13 @@ void lockdep_unregister_key(struct lock_class_key *key)
+ break;
+ }
+ }
+- WARN_ON_ONCE(!found);
+- __lockdep_free_key_range(pf, key, 1);
+- call_rcu_zapped(pf);
+- graph_unlock();
+-out_irq:
++ WARN_ON_ONCE(!found && debug_locks);
++ if (found) {
++ pf = get_pending_free();
++ __lockdep_free_key_range(pf, key, 1);
++ call_rcu_zapped(pf);
++ }
++ lockdep_unlock();
+ raw_local_irq_restore(flags);
+
+ /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
+diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
+index ecb8662e7a4ed..bbe9000260d02 100644
+--- a/kernel/locking/lockdep_internals.h
++++ b/kernel/locking/lockdep_internals.h
+@@ -121,7 +121,6 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
+
+ #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
+
+-extern struct list_head all_lock_classes;
+ extern struct lock_chain lock_chains[];
+
+ #define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1)
+@@ -151,6 +150,10 @@ extern unsigned int nr_large_chain_blocks;
+
+ extern unsigned int max_lockdep_depth;
+ extern unsigned int max_bfs_queue_depth;
++extern unsigned long max_lock_class_idx;
++
++extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
++extern unsigned long lock_classes_in_use[];
+
+ #ifdef CONFIG_PROVE_LOCKING
+ extern unsigned long lockdep_count_forward_deps(struct lock_class *);
+@@ -205,7 +208,6 @@ struct lockdep_stats {
+ };
+
+ DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
+-extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+
+ #define __debug_atomic_inc(ptr) \
+ this_cpu_inc(lockdep_stats.ptr);
+diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
+index b8d9a050c337a..15fdc7fa5c688 100644
+--- a/kernel/locking/lockdep_proc.c
++++ b/kernel/locking/lockdep_proc.c
+@@ -24,14 +24,33 @@
+
+ #include "lockdep_internals.h"
+
++/*
++ * Since iteration of lock_classes is done without holding the lockdep lock,
++ * it is not safe to iterate all_lock_classes list directly as the iteration
++ * may branch off to free_lock_classes or the zapped list. Iteration is done
++ * directly on the lock_classes array by checking the lock_classes_in_use
++ * bitmap and max_lock_class_idx.
++ */
++#define iterate_lock_classes(idx, class) \
++ for (idx = 0, class = lock_classes; idx <= max_lock_class_idx; \
++ idx++, class++)
++
+ static void *l_next(struct seq_file *m, void *v, loff_t *pos)
+ {
+- return seq_list_next(v, &all_lock_classes, pos);
++ struct lock_class *class = v;
++
++ ++class;
++ *pos = class - lock_classes;
++ return (*pos > max_lock_class_idx) ? NULL : class;
+ }
+
+ static void *l_start(struct seq_file *m, loff_t *pos)
+ {
+- return seq_list_start_head(&all_lock_classes, *pos);
++ unsigned long idx = *pos;
++
++ if (idx > max_lock_class_idx)
++ return NULL;
++ return lock_classes + idx;
+ }
+
+ static void l_stop(struct seq_file *m, void *v)
+@@ -57,14 +76,16 @@ static void print_name(struct seq_file *m, struct lock_class *class)
+
+ static int l_show(struct seq_file *m, void *v)
+ {
+- struct lock_class *class = list_entry(v, struct lock_class, lock_entry);
++ struct lock_class *class = v;
+ struct lock_list *entry;
+ char usage[LOCK_USAGE_CHARS];
++ int idx = class - lock_classes;
+
+- if (v == &all_lock_classes) {
++ if (v == lock_classes)
+ seq_printf(m, "all lock classes:\n");
++
++ if (!test_bit(idx, lock_classes_in_use))
+ return 0;
+- }
+
+ seq_printf(m, "%p", class->key);
+ #ifdef CONFIG_DEBUG_LOCKDEP
+@@ -220,8 +241,11 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
+
+ #ifdef CONFIG_PROVE_LOCKING
+ struct lock_class *class;
++ unsigned long idx;
+
+- list_for_each_entry(class, &all_lock_classes, lock_entry) {
++ iterate_lock_classes(idx, class) {
++ if (!test_bit(idx, lock_classes_in_use))
++ continue;
+
+ if (class->usage_mask == 0)
+ nr_unused++;
+@@ -254,6 +278,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
+
+ sum_forward_deps += lockdep_count_forward_deps(class);
+ }
++
+ #ifdef CONFIG_DEBUG_LOCKDEP
+ DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
+ #endif
+@@ -345,6 +370,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, " max bfs queue depth: %11u\n",
+ max_bfs_queue_depth);
+ #endif
++ seq_printf(m, " max lock class index: %11lu\n",
++ max_lock_class_idx);
+ lockdep_stats_debug_show(m);
+ seq_printf(m, " debug_locks: %11u\n",
+ debug_locks);
+@@ -622,12 +649,16 @@ static int lock_stat_open(struct inode *inode, struct file *file)
+ if (!res) {
+ struct lock_stat_data *iter = data->stats;
+ struct seq_file *m = file->private_data;
++ unsigned long idx;
+
+- list_for_each_entry(class, &all_lock_classes, lock_entry) {
++ iterate_lock_classes(idx, class) {
++ if (!test_bit(idx, lock_classes_in_use))
++ continue;
+ iter->class = class;
+ iter->stats = lock_stats(class);
+ iter++;
+ }
++
+ data->iter_end = iter;
+
+ sort(data->stats, data->iter_end - data->stats,
+@@ -645,6 +676,7 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+ {
+ struct lock_class *class;
++ unsigned long idx;
+ char c;
+
+ if (count) {
+@@ -654,8 +686,11 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf,
+ if (c != '0')
+ return count;
+
+- list_for_each_entry(class, &all_lock_classes, lock_entry)
++ iterate_lock_classes(idx, class) {
++ if (!test_bit(idx, lock_classes_in_use))
++ continue;
+ clear_lock_stats(class);
++ }
+ }
+ return count;
+ }
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 6bb116c559b4a..ee5be1dda0c40 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -87,15 +87,31 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
+ * set this bit before looking at the lock.
+ */
+
+-static __always_inline void
+-rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
++static __always_inline struct task_struct *
++rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
+ {
+ unsigned long val = (unsigned long)owner;
+
+ if (rt_mutex_has_waiters(lock))
+ val |= RT_MUTEX_HAS_WAITERS;
+
+- WRITE_ONCE(lock->owner, (struct task_struct *)val);
++ return (struct task_struct *)val;
++}
++
++static __always_inline void
++rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
++{
++ /*
++ * lock->wait_lock is held but explicit acquire semantics are needed
++ * for a new lock owner so WRITE_ONCE is insufficient.
++ */
++ xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
++}
++
++static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
++{
++ /* lock->wait_lock is held so the unlock provides release semantics. */
++ WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
+ }
+
+ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
+@@ -104,7 +120,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
+ ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
+ }
+
+-static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
++static __always_inline void
++fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
+ {
+ unsigned long owner, *p = (unsigned long *) &lock->owner;
+
+@@ -170,8 +187,21 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
+ * still set.
+ */
+ owner = READ_ONCE(*p);
+- if (owner & RT_MUTEX_HAS_WAITERS)
+- WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
++ if (owner & RT_MUTEX_HAS_WAITERS) {
++ /*
++ * See rt_mutex_set_owner() and rt_mutex_clear_owner() on
++ * why xchg_acquire() is used for updating owner for
++ * locking and WRITE_ONCE() for unlocking.
++ *
++ * WRITE_ONCE() would work for the acquire case too, but
++ * in case that the lock acquisition failed it might
++ * force other lockers into the slow path unnecessarily.
++ */
++ if (acquire_lock)
++ xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
++ else
++ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
++ }
+ }
+
+ /*
+@@ -206,6 +236,13 @@ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
+ owner = *p;
+ } while (cmpxchg_relaxed(p, owner,
+ owner | RT_MUTEX_HAS_WAITERS) != owner);
++
++ /*
++ * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
++ * operations in the event of contention. Ensure the successful
++ * cmpxchg is visible.
++ */
++ smp_mb__after_atomic();
+ }
+
+ /*
+@@ -294,21 +331,43 @@ static __always_inline int __waiter_prio(struct task_struct *task)
+ return prio;
+ }
+
++/*
++ * Update the waiter->tree copy of the sort keys.
++ */
+ static __always_inline void
+ waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
+ {
+- waiter->prio = __waiter_prio(task);
+- waiter->deadline = task->dl.deadline;
++ lockdep_assert_held(&waiter->lock->wait_lock);
++ lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
++
++ waiter->tree.prio = __waiter_prio(task);
++ waiter->tree.deadline = task->dl.deadline;
++}
++
++/*
++ * Update the waiter->pi_tree copy of the sort keys (from the tree copy).
++ */
++static __always_inline void
++waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
++{
++ lockdep_assert_held(&waiter->lock->wait_lock);
++ lockdep_assert_held(&task->pi_lock);
++ lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
++
++ waiter->pi_tree.prio = waiter->tree.prio;
++ waiter->pi_tree.deadline = waiter->tree.deadline;
+ }
+
+ /*
+- * Only use with rt_mutex_waiter_{less,equal}()
++ * Only use with rt_waiter_node_{less,equal}()
+ */
++#define task_to_waiter_node(p) \
++ &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
+ #define task_to_waiter(p) \
+- &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
++ &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
+
+-static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+- struct rt_mutex_waiter *right)
++static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
++ struct rt_waiter_node *right)
+ {
+ if (left->prio < right->prio)
+ return 1;
+@@ -325,8 +384,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+ return 0;
+ }
+
+-static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+- struct rt_mutex_waiter *right)
++static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
++ struct rt_waiter_node *right)
+ {
+ if (left->prio != right->prio)
+ return 0;
+@@ -346,7 +405,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+ struct rt_mutex_waiter *top_waiter)
+ {
+- if (rt_mutex_waiter_less(waiter, top_waiter))
++ if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
+ return true;
+
+ #ifdef RT_MUTEX_BUILD_SPINLOCKS
+@@ -354,30 +413,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+ * Note that RT tasks are excluded from same priority (lateral)
+ * steals to prevent the introduction of an unbounded latency.
+ */
+- if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
++ if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
+ return false;
+
+- return rt_mutex_waiter_equal(waiter, top_waiter);
++ return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
+ #else
+ return false;
+ #endif
+ }
+
+ #define __node_2_waiter(node) \
+- rb_entry((node), struct rt_mutex_waiter, tree_entry)
++ rb_entry((node), struct rt_mutex_waiter, tree.entry)
+
+ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
+ {
+ struct rt_mutex_waiter *aw = __node_2_waiter(a);
+ struct rt_mutex_waiter *bw = __node_2_waiter(b);
+
+- if (rt_mutex_waiter_less(aw, bw))
++ if (rt_waiter_node_less(&aw->tree, &bw->tree))
+ return 1;
+
+ if (!build_ww_mutex())
+ return 0;
+
+- if (rt_mutex_waiter_less(bw, aw))
++ if (rt_waiter_node_less(&bw->tree, &aw->tree))
+ return 0;
+
+ /* NOTE: relies on waiter->ww_ctx being set before insertion */
+@@ -395,48 +454,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
+ static __always_inline void
+ rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ {
+- rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
++ lockdep_assert_held(&lock->wait_lock);
++
++ rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
+ }
+
+ static __always_inline void
+ rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ {
+- if (RB_EMPTY_NODE(&waiter->tree_entry))
++ lockdep_assert_held(&lock->wait_lock);
++
++ if (RB_EMPTY_NODE(&waiter->tree.entry))
+ return;
+
+- rb_erase_cached(&waiter->tree_entry, &lock->waiters);
+- RB_CLEAR_NODE(&waiter->tree_entry);
++ rb_erase_cached(&waiter->tree.entry, &lock->waiters);
++ RB_CLEAR_NODE(&waiter->tree.entry);
+ }
+
+-#define __node_2_pi_waiter(node) \
+- rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
++#define __node_2_rt_node(node) \
++ rb_entry((node), struct rt_waiter_node, entry)
+
+-static __always_inline bool
+-__pi_waiter_less(struct rb_node *a, const struct rb_node *b)
++static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
+ {
+- return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
++ return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
+ }
+
+ static __always_inline void
+ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
+ {
+- rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
++ lockdep_assert_held(&task->pi_lock);
++
++ rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
+ }
+
+ static __always_inline void
+ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
+ {
+- if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
++ lockdep_assert_held(&task->pi_lock);
++
++ if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
+ return;
+
+- rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
+- RB_CLEAR_NODE(&waiter->pi_tree_entry);
++ rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
++ RB_CLEAR_NODE(&waiter->pi_tree.entry);
+ }
+
+-static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
++static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
++ struct task_struct *p)
+ {
+ struct task_struct *pi_task = NULL;
+
++ lockdep_assert_held(&lock->wait_lock);
++ lockdep_assert(rt_mutex_owner(lock) == p);
+ lockdep_assert_held(&p->pi_lock);
+
+ if (task_has_pi_waiters(p))
+@@ -525,9 +594,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
+ * Chain walk basics and protection scope
+ *
+ * [R] refcount on task
+- * [P] task->pi_lock held
++ * [Pn] task->pi_lock held
+ * [L] rtmutex->wait_lock held
+ *
++ * Normal locking order:
++ *
++ * rtmutex->wait_lock
++ * task->pi_lock
++ *
+ * Step Description Protected by
+ * function arguments:
+ * @task [R]
+@@ -542,27 +616,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
+ * again:
+ * loop_sanity_check();
+ * retry:
+- * [1] lock(task->pi_lock); [R] acquire [P]
+- * [2] waiter = task->pi_blocked_on; [P]
+- * [3] check_exit_conditions_1(); [P]
+- * [4] lock = waiter->lock; [P]
+- * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
+- * unlock(task->pi_lock); release [P]
++ * [1] lock(task->pi_lock); [R] acquire [P1]
++ * [2] waiter = task->pi_blocked_on; [P1]
++ * [3] check_exit_conditions_1(); [P1]
++ * [4] lock = waiter->lock; [P1]
++ * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L]
++ * unlock(task->pi_lock); release [P1]
+ * goto retry;
+ * }
+- * [6] check_exit_conditions_2(); [P] + [L]
+- * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
+- * [8] unlock(task->pi_lock); release [P]
++ * [6] check_exit_conditions_2(); [P1] + [L]
++ * [7] requeue_lock_waiter(lock, waiter); [P1] + [L]
++ * [8] unlock(task->pi_lock); release [P1]
+ * put_task_struct(task); release [R]
+ * [9] check_exit_conditions_3(); [L]
+ * [10] task = owner(lock); [L]
+ * get_task_struct(task); [L] acquire [R]
+- * lock(task->pi_lock); [L] acquire [P]
+- * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
+- * [12] check_exit_conditions_4(); [P] + [L]
+- * [13] unlock(task->pi_lock); release [P]
++ * lock(task->pi_lock); [L] acquire [P2]
++ * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
++ * [12] check_exit_conditions_4(); [P2] + [L]
++ * [13] unlock(task->pi_lock); release [P2]
+ * unlock(lock->wait_lock); release [L]
+ * goto again;
++ *
++ * Where P1 is the blocking task and P2 is the lock owner; going up one step
++ * the owner becomes the next blocked task etc..
++ *
++*
+ */
+ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+ enum rtmutex_chainwalk chwalk,
+@@ -710,7 +789,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * enabled we continue, but stop the requeueing in the chain
+ * walk.
+ */
+- if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++ if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
+ if (!detect_deadlock)
+ goto out_unlock_pi;
+ else
+@@ -718,13 +797,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+ }
+
+ /*
+- * [4] Get the next lock
++ * [4] Get the next lock; per holding task->pi_lock we can't unblock
++ * and guarantee @lock's existence.
+ */
+ lock = waiter->lock;
+ /*
+ * [5] We need to trylock here as we are holding task->pi_lock,
+ * which is the reverse lock order versus the other rtmutex
+ * operations.
++ *
++ * Per the above, holding task->pi_lock guarantees lock exists, so
++ * inverting this lock order is infeasible from a life-time
++ * perspective.
+ */
+ if (!raw_spin_trylock(&lock->wait_lock)) {
+ raw_spin_unlock_irq(&task->pi_lock);
+@@ -828,17 +912,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * or
+ *
+ * DL CBS enforcement advancing the effective deadline.
+- *
+- * Even though pi_waiters also uses these fields, and that tree is only
+- * updated in [11], we can do this here, since we hold [L], which
+- * serializes all pi_waiters access and rb_erase() does not care about
+- * the values of the node being removed.
+ */
+ waiter_update_prio(waiter, task);
+
+ rt_mutex_enqueue(lock, waiter);
+
+- /* [8] Release the task */
++ /*
++ * [8] Release the (blocking) task in preparation for
++ * taking the owner task in [10].
++ *
++ * Since we hold lock->waiter_lock, task cannot unblock, even if we
++ * release task->pi_lock.
++ */
+ raw_spin_unlock(&task->pi_lock);
+ put_task_struct(task);
+
+@@ -855,13 +940,19 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * then we need to wake the new top waiter up to try
+ * to get the lock.
+ */
+- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
+- wake_up_state(waiter->task, waiter->wake_state);
++ top_waiter = rt_mutex_top_waiter(lock);
++ if (prerequeue_top_waiter != top_waiter)
++ wake_up_state(top_waiter->task, top_waiter->wake_state);
+ raw_spin_unlock_irq(&lock->wait_lock);
+ return 0;
+ }
+
+- /* [10] Grab the next task, i.e. the owner of @lock */
++ /*
++ * [10] Grab the next task, i.e. the owner of @lock
++ *
++ * Per holding lock->wait_lock and checking for !owner above, there
++ * must be an owner and it cannot go away.
++ */
+ task = get_task_struct(rt_mutex_owner(lock));
+ raw_spin_lock(&task->pi_lock);
+
+@@ -874,8 +965,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+ * and adjust the priority of the owner.
+ */
+ rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
++ waiter_clone_prio(waiter, task);
+ rt_mutex_enqueue_pi(task, waiter);
+- rt_mutex_adjust_prio(task);
++ rt_mutex_adjust_prio(lock, task);
+
+ } else if (prerequeue_top_waiter == waiter) {
+ /*
+@@ -890,8 +982,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+ */
+ rt_mutex_dequeue_pi(task, waiter);
+ waiter = rt_mutex_top_waiter(lock);
++ waiter_clone_prio(waiter, task);
+ rt_mutex_enqueue_pi(task, waiter);
+- rt_mutex_adjust_prio(task);
++ rt_mutex_adjust_prio(lock, task);
+ } else {
+ /*
+ * Nothing changed. No need to do any priority
+@@ -1104,6 +1197,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+ waiter->task = task;
+ waiter->lock = lock;
+ waiter_update_prio(waiter, task);
++ waiter_clone_prio(waiter, task);
+
+ /* Get the top priority waiter on the lock */
+ if (rt_mutex_has_waiters(lock))
+@@ -1137,7 +1231,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+ rt_mutex_dequeue_pi(owner, top_waiter);
+ rt_mutex_enqueue_pi(owner, waiter);
+
+- rt_mutex_adjust_prio(owner);
++ rt_mutex_adjust_prio(lock, owner);
+ if (owner->pi_blocked_on)
+ chain_walk = 1;
+ } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
+@@ -1184,6 +1278,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
+ {
+ struct rt_mutex_waiter *waiter;
+
++ lockdep_assert_held(&lock->wait_lock);
++
+ raw_spin_lock(&current->pi_lock);
+
+ waiter = rt_mutex_top_waiter(lock);
+@@ -1196,7 +1292,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
+ * task unblocks.
+ */
+ rt_mutex_dequeue_pi(current, waiter);
+- rt_mutex_adjust_prio(current);
++ rt_mutex_adjust_prio(lock, current);
+
+ /*
+ * As we are waking up the top waiter, and the waiter stays
+@@ -1231,7 +1327,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
+ * try_to_take_rt_mutex() sets the lock waiters bit
+ * unconditionally. Clean this up.
+ */
+- fixup_rt_mutex_waiters(lock);
++ fixup_rt_mutex_waiters(lock, true);
+
+ return ret;
+ }
+@@ -1373,7 +1469,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
+ * - the VCPU on which owner runs is preempted
+ */
+ if (!owner->on_cpu || need_resched() ||
+- rt_mutex_waiter_is_top_waiter(lock, waiter) ||
++ !rt_mutex_waiter_is_top_waiter(lock, waiter) ||
+ vcpu_is_preempted(task_cpu(owner))) {
+ res = false;
+ break;
+@@ -1433,7 +1529,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
+ if (rt_mutex_has_waiters(lock))
+ rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
+
+- rt_mutex_adjust_prio(owner);
++ rt_mutex_adjust_prio(lock, owner);
+
+ /* Store the lock on which owner is blocked or NULL */
+ next_lock = task_blocked_on_lock(owner);
+@@ -1591,7 +1687,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
+ * try_to_take_rt_mutex() sets the waiter bit
+ * unconditionally. We might have to fix that up.
+ */
+- fixup_rt_mutex_waiters(lock);
++ fixup_rt_mutex_waiters(lock, true);
+ return ret;
+ }
+
+@@ -1701,7 +1797,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally.
+ * We might have to fix that up:
+ */
+- fixup_rt_mutex_waiters(lock);
++ fixup_rt_mutex_waiters(lock, true);
+ debug_rt_mutex_free_waiter(&waiter);
+ }
+
+diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
+index 5c9299aaabae1..56d1938cb52a1 100644
+--- a/kernel/locking/rtmutex_api.c
++++ b/kernel/locking/rtmutex_api.c
+@@ -245,7 +245,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
+ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
+ {
+ debug_rt_mutex_proxy_unlock(lock);
+- rt_mutex_set_owner(lock, NULL);
++ rt_mutex_clear_owner(lock);
+ }
+
+ /**
+@@ -360,7 +360,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ * have to fix that up.
+ */
+- fixup_rt_mutex_waiters(lock);
++ fixup_rt_mutex_waiters(lock, true);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
+@@ -416,7 +416,7 @@ bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ * have to fix that up.
+ */
+- fixup_rt_mutex_waiters(lock);
++ fixup_rt_mutex_waiters(lock, false);
+
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+@@ -437,7 +437,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task)
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ waiter = task->pi_blocked_on;
+- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++ if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return;
+ }
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index c47e8361bfb5c..1162e07cdaea1 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -17,27 +17,44 @@
+ #include <linux/rtmutex.h>
+ #include <linux/sched/wake_q.h>
+
++
++/*
++ * This is a helper for the struct rt_mutex_waiter below. A waiter goes in two
++ * separate trees and they need their own copy of the sort keys because of
++ * different locking requirements.
++ *
++ * @entry: rbtree node to enqueue into the waiters tree
++ * @prio: Priority of the waiter
++ * @deadline: Deadline of the waiter if applicable
++ *
++ * See rt_waiter_node_less() and waiter_*_prio().
++ */
++struct rt_waiter_node {
++ struct rb_node entry;
++ int prio;
++ u64 deadline;
++};
++
+ /*
+ * This is the control structure for tasks blocked on a rt_mutex,
+ * which is allocated on the kernel stack on of the blocked task.
+ *
+- * @tree_entry: pi node to enqueue into the mutex waiters tree
+- * @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
++ * @tree: node to enqueue into the mutex waiters tree
++ * @pi_tree: node to enqueue into the mutex owner waiters tree
+ * @task: task reference to the blocked task
+ * @lock: Pointer to the rt_mutex on which the waiter blocks
+ * @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT)
+- * @prio: Priority of the waiter
+- * @deadline: Deadline of the waiter if applicable
+ * @ww_ctx: WW context pointer
++ *
++ * @tree is ordered by @lock->wait_lock
++ * @pi_tree is ordered by rt_mutex_owner(@lock)->pi_lock
+ */
+ struct rt_mutex_waiter {
+- struct rb_node tree_entry;
+- struct rb_node pi_tree_entry;
++ struct rt_waiter_node tree;
++ struct rt_waiter_node pi_tree;
+ struct task_struct *task;
+ struct rt_mutex_base *lock;
+ unsigned int wake_state;
+- int prio;
+- u64 deadline;
+ struct ww_acquire_ctx *ww_ctx;
+ };
+
+@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
+ {
+ struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+
+- return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
++ return rb_entry(leftmost, struct rt_mutex_waiter, tree.entry) == waiter;
+ }
+
+ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
+@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *
+ struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+ struct rt_mutex_waiter *w = NULL;
+
++ lockdep_assert_held(&lock->wait_lock);
++
+ if (leftmost) {
+- w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry);
++ w = rb_entry(leftmost, struct rt_mutex_waiter, tree.entry);
+ BUG_ON(w->lock != lock);
+ }
+ return w;
+@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p)
+
+ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
+ {
++ lockdep_assert_held(&p->pi_lock);
++
+ return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter,
+- pi_tree_entry);
++ pi_tree.entry);
+ }
+
+ #define RT_MUTEX_HAS_WAITERS 1UL
+@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
+ static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
+ {
+ debug_rt_mutex_init_waiter(waiter);
+- RB_CLEAR_NODE(&waiter->pi_tree_entry);
+- RB_CLEAR_NODE(&waiter->tree_entry);
++ RB_CLEAR_NODE(&waiter->pi_tree.entry);
++ RB_CLEAR_NODE(&waiter->tree.entry);
+ waiter->wake_state = TASK_NORMAL;
+ waiter->task = NULL;
+ }
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index 000e8d5a28841..f0287a16b4ec8 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -106,9 +106,9 @@
+ * atomic_long_cmpxchg() will be used to obtain writer lock.
+ *
+ * There are three places where the lock handoff bit may be set or cleared.
+- * 1) rwsem_mark_wake() for readers.
+- * 2) rwsem_try_write_lock() for writers.
+- * 3) Error path of rwsem_down_write_slowpath().
++ * 1) rwsem_mark_wake() for readers -- set, clear
++ * 2) rwsem_try_write_lock() for writers -- set, clear
++ * 3) rwsem_del_waiter() -- clear
+ *
+ * For all the above cases, wait_lock will be held. A writer must also
+ * be the first one in the wait_list to be eligible for setting the handoff
+@@ -335,6 +335,7 @@ struct rwsem_waiter {
+ struct task_struct *task;
+ enum rwsem_waiter_type type;
+ unsigned long timeout;
++ bool handoff_set;
+ };
+ #define rwsem_first_waiter(sem) \
+ list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
+@@ -345,12 +346,6 @@ enum rwsem_wake_type {
+ RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
+ };
+
+-enum writer_wait_state {
+- WRITER_NOT_FIRST, /* Writer is not first in wait list */
+- WRITER_FIRST, /* Writer is first in wait list */
+- WRITER_HANDOFF /* Writer is first & handoff needed */
+-};
+-
+ /*
+ * The typical HZ value is either 250 or 1000. So set the minimum waiting
+ * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
+@@ -366,6 +361,31 @@ enum writer_wait_state {
+ */
+ #define MAX_READERS_WAKEUP 0x100
+
++static inline void
++rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
++{
++ lockdep_assert_held(&sem->wait_lock);
++ list_add_tail(&waiter->list, &sem->wait_list);
++ /* caller will set RWSEM_FLAG_WAITERS */
++}
++
++/*
++ * Remove a waiter from the wait_list and clear flags.
++ *
++ * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
++ * this function. Modify with care.
++ */
++static inline void
++rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
++{
++ lockdep_assert_held(&sem->wait_lock);
++ list_del(&waiter->list);
++ if (likely(!list_empty(&sem->wait_list)))
++ return;
++
++ atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
++}
++
+ /*
+ * handle the lock release when processes blocked on it that can now run
+ * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
+@@ -377,6 +397,8 @@ enum writer_wait_state {
+ * preferably when the wait_lock is released
+ * - woken process blocks are discarded from the list after having task zeroed
+ * - writers are only marked woken if downgrading is false
++ *
++ * Implies rwsem_del_waiter() for all woken readers.
+ */
+ static void rwsem_mark_wake(struct rw_semaphore *sem,
+ enum rwsem_wake_type wake_type,
+@@ -432,10 +454,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
+ * to give up the lock), request a HANDOFF to
+ * force the issue.
+ */
+- if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
+- time_after(jiffies, waiter->timeout)) {
+- adjustment -= RWSEM_FLAG_HANDOFF;
+- lockevent_inc(rwsem_rlock_handoff);
++ if (time_after(jiffies, waiter->timeout)) {
++ if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
++ adjustment -= RWSEM_FLAG_HANDOFF;
++ lockevent_inc(rwsem_rlock_handoff);
++ }
++ waiter->handoff_set = true;
+ }
+
+ atomic_long_add(-adjustment, &sem->count);
+@@ -491,18 +515,25 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
+
+ adjustment = woken * RWSEM_READER_BIAS - adjustment;
+ lockevent_cond_inc(rwsem_wake_reader, woken);
++
++ oldcount = atomic_long_read(&sem->count);
+ if (list_empty(&sem->wait_list)) {
+- /* hit end of list above */
++ /*
++ * Combined with list_move_tail() above, this implies
++ * rwsem_del_waiter().
++ */
+ adjustment -= RWSEM_FLAG_WAITERS;
++ if (oldcount & RWSEM_FLAG_HANDOFF)
++ adjustment -= RWSEM_FLAG_HANDOFF;
++ } else if (woken) {
++ /*
++ * When we've woken a reader, we no longer need to force
++ * writers to give up the lock and we can clear HANDOFF.
++ */
++ if (oldcount & RWSEM_FLAG_HANDOFF)
++ adjustment -= RWSEM_FLAG_HANDOFF;
+ }
+
+- /*
+- * When we've woken a reader, we no longer need to force writers
+- * to give up the lock and we can clear HANDOFF.
+- */
+- if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
+- adjustment -= RWSEM_FLAG_HANDOFF;
+-
+ if (adjustment)
+ atomic_long_add(adjustment, &sem->count);
+
+@@ -533,12 +564,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ *
+- * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
+- * bit is set or the lock is acquired with handoff bit cleared.
++ * Implies rwsem_del_waiter() on success.
+ */
+ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+- enum writer_wait_state wstate)
++ struct rwsem_waiter *waiter)
+ {
++ struct rwsem_waiter *first = rwsem_first_waiter(sem);
+ long count, new;
+
+ lockdep_assert_held(&sem->wait_lock);
+@@ -547,13 +578,26 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+ do {
+ bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
+
+- if (has_handoff && wstate == WRITER_NOT_FIRST)
+- return false;
++ if (has_handoff) {
++ /*
++ * Honor handoff bit and yield only when the first
++ * waiter is the one that set it. Otherwisee, we
++ * still try to acquire the rwsem.
++ */
++ if (first->handoff_set && (waiter != first))
++ return false;
++ }
+
+ new = count;
+
+ if (count & RWSEM_LOCK_MASK) {
+- if (has_handoff || (wstate != WRITER_HANDOFF))
++ /*
++ * A waiter (first or not) can set the handoff bit
++ * if it is an RT task or wait in the wait queue
++ * for too long.
++ */
++ if (has_handoff || (!rt_task(waiter->task) &&
++ !time_after(jiffies, waiter->timeout)))
+ return false;
+
+ new |= RWSEM_FLAG_HANDOFF;
+@@ -567,16 +611,43 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+ } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
+
+ /*
+- * We have either acquired the lock with handoff bit cleared or
+- * set the handoff bit.
++ * We have either acquired the lock with handoff bit cleared or set
++ * the handoff bit. Only the first waiter can have its handoff_set
++ * set here to enable optimistic spinning in slowpath loop.
+ */
+- if (new & RWSEM_FLAG_HANDOFF)
++ if (new & RWSEM_FLAG_HANDOFF) {
++ first->handoff_set = true;
++ lockevent_inc(rwsem_wlock_handoff);
+ return false;
++ }
+
++ /*
++ * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
++ * success.
++ */
++ list_del(&waiter->list);
+ rwsem_set_owner(sem);
+ return true;
+ }
+
++/*
++ * The rwsem_spin_on_owner() function returns the following 4 values
++ * depending on the lock owner state.
++ * OWNER_NULL : owner is currently NULL
++ * OWNER_WRITER: when owner changes and is a writer
++ * OWNER_READER: when owner changes and the new owner may be a reader.
++ * OWNER_NONSPINNABLE:
++ * when optimistic spinning has to stop because either the
++ * owner stops running, is unknown, or its timeslice has
++ * been used up.
++ */
++enum owner_state {
++ OWNER_NULL = 1 << 0,
++ OWNER_WRITER = 1 << 1,
++ OWNER_READER = 1 << 2,
++ OWNER_NONSPINNABLE = 1 << 3,
++};
++
+ #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ /*
+ * Try to acquire write lock before the writer has been put on wait queue.
+@@ -632,23 +703,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+ return ret;
+ }
+
+-/*
+- * The rwsem_spin_on_owner() function returns the following 4 values
+- * depending on the lock owner state.
+- * OWNER_NULL : owner is currently NULL
+- * OWNER_WRITER: when owner changes and is a writer
+- * OWNER_READER: when owner changes and the new owner may be a reader.
+- * OWNER_NONSPINNABLE:
+- * when optimistic spinning has to stop because either the
+- * owner stops running, is unknown, or its timeslice has
+- * been used up.
+- */
+-enum owner_state {
+- OWNER_NULL = 1 << 0,
+- OWNER_WRITER = 1 << 1,
+- OWNER_READER = 1 << 2,
+- OWNER_NONSPINNABLE = 1 << 3,
+-};
+ #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
+
+ static inline enum owner_state
+@@ -878,12 +932,11 @@ static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+
+ static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
+
+-static inline int
++static inline enum owner_state
+ rwsem_spin_on_owner(struct rw_semaphore *sem)
+ {
+- return 0;
++ return OWNER_NONSPINNABLE;
+ }
+-#define OWNER_NULL 1
+ #endif
+
+ /*
+@@ -933,6 +986,7 @@ queue:
+ waiter.task = current;
+ waiter.type = RWSEM_WAITING_FOR_READ;
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
++ waiter.handoff_set = false;
+
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (list_empty(&sem->wait_list)) {
+@@ -953,7 +1007,7 @@ queue:
+ }
+ adjustment += RWSEM_FLAG_WAITERS;
+ }
+- list_add_tail(&waiter.list, &sem->wait_list);
++ rwsem_add_waiter(sem, &waiter);
+
+ /* we're now waiting on the lock, but no longer actively locking */
+ count = atomic_long_add_return(adjustment, &sem->count);
+@@ -990,7 +1044,7 @@ queue:
+ /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
+ break;
+ }
+- schedule();
++ schedule_preempt_disabled();
+ lockevent_inc(rwsem_sleep_reader);
+ }
+
+@@ -999,11 +1053,7 @@ queue:
+ return sem;
+
+ out_nolock:
+- list_del(&waiter.list);
+- if (list_empty(&sem->wait_list)) {
+- atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
+- &sem->count);
+- }
++ rwsem_del_waiter(sem, &waiter);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(rwsem_rlock_fail);
+@@ -1017,9 +1067,7 @@ static struct rw_semaphore *
+ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+ {
+ long count;
+- enum writer_wait_state wstate;
+ struct rwsem_waiter waiter;
+- struct rw_semaphore *ret = sem;
+ DEFINE_WAKE_Q(wake_q);
+
+ /* do optimistic spinning and steal lock if possible */
+@@ -1035,16 +1083,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+ waiter.task = current;
+ waiter.type = RWSEM_WAITING_FOR_WRITE;
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
++ waiter.handoff_set = false;
+
+ raw_spin_lock_irq(&sem->wait_lock);
+-
+- /* account for this before adding a new element to the list */
+- wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
+-
+- list_add_tail(&waiter.list, &sem->wait_list);
++ rwsem_add_waiter(sem, &waiter);
+
+ /* we're now waiting on the lock */
+- if (wstate == WRITER_NOT_FIRST) {
++ if (rwsem_first_waiter(sem) != &waiter) {
+ count = atomic_long_read(&sem->count);
+
+ /*
+@@ -1080,13 +1125,16 @@ wait:
+ /* wait until we successfully acquire the lock */
+ set_current_state(state);
+ for (;;) {
+- if (rwsem_try_write_lock(sem, wstate)) {
++ if (rwsem_try_write_lock(sem, &waiter)) {
+ /* rwsem_try_write_lock() implies ACQUIRE on success */
+ break;
+ }
+
+ raw_spin_unlock_irq(&sem->wait_lock);
+
++ if (signal_pending_state(state, current))
++ goto out_nolock;
++
+ /*
+ * After setting the handoff bit and failing to acquire
+ * the lock, attempt to spin on owner to accelerate lock
+@@ -1095,70 +1143,37 @@ wait:
+ * In this case, we attempt to acquire the lock again
+ * without sleeping.
+ */
+- if (wstate == WRITER_HANDOFF &&
+- rwsem_spin_on_owner(sem) == OWNER_NULL)
+- goto trylock_again;
+-
+- /* Block until there are no active lockers. */
+- for (;;) {
+- if (signal_pending_state(state, current))
+- goto out_nolock;
+-
+- schedule();
+- lockevent_inc(rwsem_sleep_writer);
+- set_current_state(state);
+- /*
+- * If HANDOFF bit is set, unconditionally do
+- * a trylock.
+- */
+- if (wstate == WRITER_HANDOFF)
+- break;
++ if (waiter.handoff_set) {
++ enum owner_state owner_state;
+
+- if ((wstate == WRITER_NOT_FIRST) &&
+- (rwsem_first_waiter(sem) == &waiter))
+- wstate = WRITER_FIRST;
++ preempt_disable();
++ owner_state = rwsem_spin_on_owner(sem);
++ preempt_enable();
+
+- count = atomic_long_read(&sem->count);
+- if (!(count & RWSEM_LOCK_MASK))
+- break;
+-
+- /*
+- * The setting of the handoff bit is deferred
+- * until rwsem_try_write_lock() is called.
+- */
+- if ((wstate == WRITER_FIRST) && (rt_task(current) ||
+- time_after(jiffies, waiter.timeout))) {
+- wstate = WRITER_HANDOFF;
+- lockevent_inc(rwsem_wlock_handoff);
+- break;
+- }
++ if (owner_state == OWNER_NULL)
++ goto trylock_again;
+ }
++
++ schedule();
++ lockevent_inc(rwsem_sleep_writer);
++ set_current_state(state);
+ trylock_again:
+ raw_spin_lock_irq(&sem->wait_lock);
+ }
+ __set_current_state(TASK_RUNNING);
+- list_del(&waiter.list);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ lockevent_inc(rwsem_wlock);
+-
+- return ret;
++ return sem;
+
+ out_nolock:
+ __set_current_state(TASK_RUNNING);
+ raw_spin_lock_irq(&sem->wait_lock);
+- list_del(&waiter.list);
+-
+- if (unlikely(wstate == WRITER_HANDOFF))
+- atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
+-
+- if (list_empty(&sem->wait_list))
+- atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
+- else
++ rwsem_del_waiter(sem, &waiter);
++ if (!list_empty(&sem->wait_list))
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
+ lockevent_inc(rwsem_wlock_fail);
+-
+ return ERR_PTR(-EINTR);
+ }
+
+@@ -1206,51 +1221,58 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+ /*
+ * lock for reading
+ */
+-static inline int __down_read_common(struct rw_semaphore *sem, int state)
++static __always_inline int __down_read_common(struct rw_semaphore *sem, int state)
+ {
++ int ret = 0;
+ long count;
+
++ preempt_disable();
+ if (!rwsem_read_trylock(sem, &count)) {
+- if (IS_ERR(rwsem_down_read_slowpath(sem, count, state)))
+- return -EINTR;
++ if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) {
++ ret = -EINTR;
++ goto out;
++ }
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+ }
+- return 0;
++out:
++ preempt_enable();
++ return ret;
+ }
+
+-static inline void __down_read(struct rw_semaphore *sem)
++static __always_inline void __down_read(struct rw_semaphore *sem)
+ {
+ __down_read_common(sem, TASK_UNINTERRUPTIBLE);
+ }
+
+-static inline int __down_read_interruptible(struct rw_semaphore *sem)
++static __always_inline int __down_read_interruptible(struct rw_semaphore *sem)
+ {
+ return __down_read_common(sem, TASK_INTERRUPTIBLE);
+ }
+
+-static inline int __down_read_killable(struct rw_semaphore *sem)
++static __always_inline int __down_read_killable(struct rw_semaphore *sem)
+ {
+ return __down_read_common(sem, TASK_KILLABLE);
+ }
+
+ static inline int __down_read_trylock(struct rw_semaphore *sem)
+ {
++ int ret = 0;
+ long tmp;
+
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
+
+- /*
+- * Optimize for the case when the rwsem is not locked at all.
+- */
+- tmp = RWSEM_UNLOCKED_VALUE;
+- do {
++ preempt_disable();
++ tmp = atomic_long_read(&sem->count);
++ while (!(tmp & RWSEM_READ_FAILED_MASK)) {
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+- tmp + RWSEM_READER_BIAS)) {
++ tmp + RWSEM_READER_BIAS)) {
+ rwsem_set_reader_owned(sem);
+- return 1;
++ ret = 1;
++ break;
+ }
+- } while (!(tmp & RWSEM_READ_FAILED_MASK));
+- return 0;
++ }
++ preempt_enable();
++ return ret;
+ }
+
+ /*
+@@ -1292,6 +1314,7 @@ static inline void __up_read(struct rw_semaphore *sem)
+ DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+
++ preempt_disable();
+ rwsem_clear_reader_owned(sem);
+ tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
+ DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
+@@ -1300,6 +1323,7 @@ static inline void __up_read(struct rw_semaphore *sem)
+ clear_nonspinnable(sem);
+ rwsem_wake(sem);
+ }
++ preempt_enable();
+ }
+
+ /*
+@@ -1617,6 +1641,12 @@ void down_read_non_owner(struct rw_semaphore *sem)
+ {
+ might_sleep();
+ __down_read(sem);
++ /*
++ * The owner value for a reader-owned lock is mostly for debugging
++ * purpose only and is not critical to the correct functioning of
++ * rwsem. So it is perfectly fine to set it in a preempt-enabled
++ * context here.
++ */
+ __rwsem_set_reader_owned(sem, NULL);
+ }
+ EXPORT_SYMBOL(down_read_non_owner);
+diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
+index 56f139201f246..3ad2cc4823e59 100644
+--- a/kernel/locking/ww_mutex.h
++++ b/kernel/locking/ww_mutex.h
+@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock)
+ struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
+ if (!n)
+ return NULL;
+- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+
+ static inline struct rt_mutex_waiter *
+ __ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+ {
+- struct rb_node *n = rb_next(&w->tree_entry);
++ struct rb_node *n = rb_next(&w->tree.entry);
+ if (!n)
+ return NULL;
+- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+
+ static inline struct rt_mutex_waiter *
+ __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+ {
+- struct rb_node *n = rb_prev(&w->tree_entry);
++ struct rb_node *n = rb_prev(&w->tree.entry);
+ if (!n)
+ return NULL;
+- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+
+ static inline struct rt_mutex_waiter *
+@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock)
+ struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
+ if (!n)
+ return NULL;
+- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+
+ static inline void
+diff --git a/kernel/module.c b/kernel/module.c
+index 5c26a76e800b5..3c90840133c0e 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -2220,12 +2220,20 @@ void *__symbol_get(const char *symbol)
+ };
+
+ preempt_disable();
+- if (!find_symbol(&fsa) || strong_try_module_get(fsa.owner)) {
+- preempt_enable();
+- return NULL;
++ if (!find_symbol(&fsa))
++ goto fail;
++ if (fsa.license != GPL_ONLY) {
++ pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n",
++ symbol);
++ goto fail;
+ }
++ if (strong_try_module_get(fsa.owner))
++ goto fail;
+ preempt_enable();
+ return (void *)kernel_symbol_value(fsa.sym);
++fail:
++ preempt_enable();
++ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(__symbol_get);
+
+@@ -2407,7 +2415,7 @@ static long get_offset(struct module *mod, unsigned int *size,
+ return ret;
+ }
+
+-static bool module_init_layout_section(const char *sname)
++bool module_init_layout_section(const char *sname)
+ {
+ #ifndef CONFIG_MODULE_UNLOAD
+ if (module_exit_section(sname))
+@@ -2967,14 +2975,29 @@ static int elf_validity_check(struct load_info *info)
+ Elf_Shdr *shdr, *strhdr;
+ int err;
+
+- if (info->len < sizeof(*(info->hdr)))
+- return -ENOEXEC;
++ if (info->len < sizeof(*(info->hdr))) {
++ pr_err("Invalid ELF header len %lu\n", info->len);
++ goto no_exec;
++ }
+
+- if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0
+- || info->hdr->e_type != ET_REL
+- || !elf_check_arch(info->hdr)
+- || info->hdr->e_shentsize != sizeof(Elf_Shdr))
+- return -ENOEXEC;
++ if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) {
++ pr_err("Invalid ELF header magic: != %s\n", ELFMAG);
++ goto no_exec;
++ }
++ if (info->hdr->e_type != ET_REL) {
++ pr_err("Invalid ELF header type: %u != %u\n",
++ info->hdr->e_type, ET_REL);
++ goto no_exec;
++ }
++ if (!elf_check_arch(info->hdr)) {
++ pr_err("Invalid architecture in ELF header: %u\n",
++ info->hdr->e_machine);
++ goto no_exec;
++ }
++ if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) {
++ pr_err("Invalid ELF section header size\n");
++ goto no_exec;
++ }
+
+ /*
+ * e_shnum is 16 bits, and sizeof(Elf_Shdr) is
+@@ -2983,8 +3006,10 @@ static int elf_validity_check(struct load_info *info)
+ */
+ if (info->hdr->e_shoff >= info->len
+ || (info->hdr->e_shnum * sizeof(Elf_Shdr) >
+- info->len - info->hdr->e_shoff))
+- return -ENOEXEC;
++ info->len - info->hdr->e_shoff)) {
++ pr_err("Invalid ELF section header overflow\n");
++ goto no_exec;
++ }
+
+ info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
+
+@@ -2992,13 +3017,19 @@ static int elf_validity_check(struct load_info *info)
+ * Verify if the section name table index is valid.
+ */
+ if (info->hdr->e_shstrndx == SHN_UNDEF
+- || info->hdr->e_shstrndx >= info->hdr->e_shnum)
+- return -ENOEXEC;
++ || info->hdr->e_shstrndx >= info->hdr->e_shnum) {
++ pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n",
++ info->hdr->e_shstrndx, info->hdr->e_shstrndx,
++ info->hdr->e_shnum);
++ goto no_exec;
++ }
+
+ strhdr = &info->sechdrs[info->hdr->e_shstrndx];
+ err = validate_section_offset(info, strhdr);
+- if (err < 0)
++ if (err < 0) {
++ pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type);
+ return err;
++ }
+
+ /*
+ * The section name table must be NUL-terminated, as required
+@@ -3006,8 +3037,14 @@ static int elf_validity_check(struct load_info *info)
+ * strings in the section safe.
+ */
+ info->secstrings = (void *)info->hdr + strhdr->sh_offset;
+- if (info->secstrings[strhdr->sh_size - 1] != '\0')
+- return -ENOEXEC;
++ if (strhdr->sh_size == 0) {
++ pr_err("empty section name table\n");
++ goto no_exec;
++ }
++ if (info->secstrings[strhdr->sh_size - 1] != '\0') {
++ pr_err("ELF Spec violation: section name table isn't null terminated\n");
++ goto no_exec;
++ }
+
+ /*
+ * The code assumes that section 0 has a length of zero and
+@@ -3015,8 +3052,11 @@ static int elf_validity_check(struct load_info *info)
+ */
+ if (info->sechdrs[0].sh_type != SHT_NULL
+ || info->sechdrs[0].sh_size != 0
+- || info->sechdrs[0].sh_addr != 0)
+- return -ENOEXEC;
++ || info->sechdrs[0].sh_addr != 0) {
++ pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n",
++ info->sechdrs[0].sh_type);
++ goto no_exec;
++ }
+
+ for (i = 1; i < info->hdr->e_shnum; i++) {
+ shdr = &info->sechdrs[i];
+@@ -3026,8 +3066,12 @@ static int elf_validity_check(struct load_info *info)
+ continue;
+ case SHT_SYMTAB:
+ if (shdr->sh_link == SHN_UNDEF
+- || shdr->sh_link >= info->hdr->e_shnum)
+- return -ENOEXEC;
++ || shdr->sh_link >= info->hdr->e_shnum) {
++ pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n",
++ shdr->sh_link, shdr->sh_link,
++ info->hdr->e_shnum);
++ goto no_exec;
++ }
+ fallthrough;
+ default:
+ err = validate_section_offset(info, shdr);
+@@ -3049,6 +3093,9 @@ static int elf_validity_check(struct load_info *info)
+ }
+
+ return 0;
++
++no_exec:
++ return -ENOEXEC;
+ }
+
+ #define COPY_CHUNK_SIZE (16*PAGE_SIZE)
+@@ -3626,7 +3673,8 @@ static bool finished_loading(const char *name)
+ sched_annotate_sleep();
+ mutex_lock(&module_mutex);
+ mod = find_module_all(name, strlen(name), true);
+- ret = !mod || mod->state == MODULE_STATE_LIVE;
++ ret = !mod || mod->state == MODULE_STATE_LIVE
++ || mod->state == MODULE_STATE_GOING;
+ mutex_unlock(&module_mutex);
+
+ return ret;
+@@ -3683,12 +3731,6 @@ static noinline int do_init_module(struct module *mod)
+ }
+ freeinit->module_init = mod->init_layout.base;
+
+- /*
+- * We want to find out whether @mod uses async during init. Clear
+- * PF_USED_ASYNC. async_schedule*() will set it.
+- */
+- current->flags &= ~PF_USED_ASYNC;
+-
+ do_mod_ctors(mod);
+ /* Start the module */
+ if (mod->init != NULL)
+@@ -3714,22 +3756,13 @@ static noinline int do_init_module(struct module *mod)
+
+ /*
+ * We need to finish all async code before the module init sequence
+- * is done. This has potential to deadlock. For example, a newly
+- * detected block device can trigger request_module() of the
+- * default iosched from async probing task. Once userland helper
+- * reaches here, async_synchronize_full() will wait on the async
+- * task waiting on request_module() and deadlock.
+- *
+- * This deadlock is avoided by perfomring async_synchronize_full()
+- * iff module init queued any async jobs. This isn't a full
+- * solution as it will deadlock the same if module loading from
+- * async jobs nests more than once; however, due to the various
+- * constraints, this hack seems to be the best option for now.
+- * Please refer to the following thread for details.
++ * is done. This has potential to deadlock if synchronous module
++ * loading is requested from async (which is not allowed!).
+ *
+- * http://thread.gmane.org/gmane.linux.kernel/1420814
++ * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous
++ * request_module() from async workers") for more details.
+ */
+- if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC))
++ if (!mod->async_probe_requested)
+ async_synchronize_full();
+
+ ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base +
+@@ -3811,20 +3844,35 @@ static int add_unformed_module(struct module *mod)
+
+ mod->state = MODULE_STATE_UNFORMED;
+
+-again:
+ mutex_lock(&module_mutex);
+ old = find_module_all(mod->name, strlen(mod->name), true);
+ if (old != NULL) {
+- if (old->state != MODULE_STATE_LIVE) {
++ if (old->state == MODULE_STATE_COMING
++ || old->state == MODULE_STATE_UNFORMED) {
+ /* Wait in case it fails to load. */
+ mutex_unlock(&module_mutex);
+ err = wait_event_interruptible(module_wq,
+ finished_loading(mod->name));
+ if (err)
+ goto out_unlocked;
+- goto again;
++
++ /* The module might have gone in the meantime. */
++ mutex_lock(&module_mutex);
++ old = find_module_all(mod->name, strlen(mod->name),
++ true);
+ }
+- err = -EEXIST;
++
++ /*
++ * We are here only when the same module was being loaded. Do
++ * not try to load it again right now. It prevents long delays
++ * caused by serialized module load failures. It might happen
++ * when more devices of the same type trigger load of
++ * a particular module.
++ */
++ if (old && old->state == MODULE_STATE_LIVE)
++ err = -EEXIST;
++ else
++ err = -EBUSY;
+ goto out;
+ }
+ mod_update_bounds(mod);
+@@ -3940,10 +3988,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
+ * sections.
+ */
+ err = elf_validity_check(info);
+- if (err) {
+- pr_err("Module has invalid ELF structures\n");
++ if (err)
+ goto free_copy;
+- }
+
+ /*
+ * Everything checks out, so set up the section info
+diff --git a/kernel/padata.c b/kernel/padata.c
+index 18d3a5c699d84..c17f772cc315a 100644
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -207,14 +207,16 @@ int padata_do_parallel(struct padata_shell *ps,
+ pw = padata_work_alloc();
+ spin_unlock(&padata_works_lock);
+
++ if (!pw) {
++ /* Maximum works limit exceeded, run in the current task. */
++ padata->parallel(padata);
++ }
++
+ rcu_read_unlock_bh();
+
+ if (pw) {
+ padata_work_init(pw, padata_parallel_worker, padata, 0);
+ queue_work(pinst->parallel_wq, &pw->pw_work);
+- } else {
+- /* Maximum works limit exceeded, run in the current task. */
+- padata->parallel(padata);
+ }
+
+ return 0;
+@@ -388,13 +390,16 @@ void padata_do_serial(struct padata_priv *padata)
+ int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr);
+ struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
+ struct padata_priv *cur;
++ struct list_head *pos;
+
+ spin_lock(&reorder->lock);
+ /* Sort in ascending order of sequence number. */
+- list_for_each_entry_reverse(cur, &reorder->list, list)
++ list_for_each_prev(pos, &reorder->list) {
++ cur = list_entry(pos, struct padata_priv, list);
+ if (cur->seq_nr < padata->seq_nr)
+ break;
+- list_add(&padata->list, &cur->list);
++ }
++ list_add(&padata->list, pos);
+ spin_unlock(&reorder->lock);
+
+ /*
+diff --git a/kernel/panic.c b/kernel/panic.c
+index cefd7d82366fb..47933d4c769b6 100644
+--- a/kernel/panic.c
++++ b/kernel/panic.c
+@@ -32,6 +32,7 @@
+ #include <linux/bug.h>
+ #include <linux/ratelimit.h>
+ #include <linux/debugfs.h>
++#include <linux/sysfs.h>
+ #include <asm/sections.h>
+
+ #define PANIC_TIMER_STEP 100
+@@ -42,7 +43,9 @@
+ * Should we dump all CPUs backtraces in an oops event?
+ * Defaults to 0, can be changed via sysctl.
+ */
+-unsigned int __read_mostly sysctl_oops_all_cpu_backtrace;
++static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace;
++#else
++#define sysctl_oops_all_cpu_backtrace 0
+ #endif /* CONFIG_SMP */
+
+ int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
+@@ -55,6 +58,7 @@ bool crash_kexec_post_notifiers;
+ int panic_on_warn __read_mostly;
+ unsigned long panic_on_taint;
+ bool panic_on_taint_nousertaint = false;
++static unsigned int warn_limit __read_mostly;
+
+ int panic_timeout = CONFIG_PANIC_TIMEOUT;
+ EXPORT_SYMBOL_GPL(panic_timeout);
+@@ -71,6 +75,56 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
+
+ EXPORT_SYMBOL(panic_notifier_list);
+
++#ifdef CONFIG_SYSCTL
++static struct ctl_table kern_panic_table[] = {
++#ifdef CONFIG_SMP
++ {
++ .procname = "oops_all_cpu_backtrace",
++ .data = &sysctl_oops_all_cpu_backtrace,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = SYSCTL_ONE,
++ },
++#endif
++ {
++ .procname = "warn_limit",
++ .data = &warn_limit,
++ .maxlen = sizeof(warn_limit),
++ .mode = 0644,
++ .proc_handler = proc_douintvec,
++ },
++ { }
++};
++
++static __init int kernel_panic_sysctls_init(void)
++{
++ register_sysctl_init("kernel", kern_panic_table);
++ return 0;
++}
++late_initcall(kernel_panic_sysctls_init);
++#endif
++
++static atomic_t warn_count = ATOMIC_INIT(0);
++
++#ifdef CONFIG_SYSFS
++static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr,
++ char *page)
++{
++ return sysfs_emit(page, "%d\n", atomic_read(&warn_count));
++}
++
++static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count);
++
++static __init int kernel_panic_sysfs_init(void)
++{
++ sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL);
++ return 0;
++}
++late_initcall(kernel_panic_sysfs_init);
++#endif
++
+ static long no_blink(int state)
+ {
+ return 0;
+@@ -167,6 +221,19 @@ static void panic_print_sys_info(void)
+ ftrace_dump(DUMP_ALL);
+ }
+
++void check_panic_on_warn(const char *origin)
++{
++ unsigned int limit;
++
++ if (panic_on_warn)
++ panic("%s: panic_on_warn set ...\n", origin);
++
++ limit = READ_ONCE(warn_limit);
++ if (atomic_inc_return(&warn_count) >= limit && limit)
++ panic("%s: system warned too often (kernel.warn_limit is %d)",
++ origin, limit);
++}
++
+ /**
+ * panic - halt the system
+ * @fmt: The text string to print
+@@ -184,6 +251,16 @@ void panic(const char *fmt, ...)
+ int old_cpu, this_cpu;
+ bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
+
++ if (panic_on_warn) {
++ /*
++ * This thread may hit another WARN() in the panic path.
++ * Resetting this prevents additional WARN() from panicking the
++ * system on this thread. Other threads are blocked by the
++ * panic_mutex in panic().
++ */
++ panic_on_warn = 0;
++ }
++
+ /*
+ * Disable local interrupts. This will prevent panic_smp_self_stop
+ * from deadlocking the first cpu that invokes the panic, since
+@@ -592,16 +669,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
+ if (regs)
+ show_regs(regs);
+
+- if (panic_on_warn) {
+- /*
+- * This thread may hit another WARN() in the panic path.
+- * Resetting this prevents additional WARN() from panicking the
+- * system on this thread. Other threads are blocked by the
+- * panic_mutex in panic().
+- */
+- panic_on_warn = 0;
+- panic("panic_on_warn set ...\n");
+- }
++ check_panic_on_warn("kernel");
+
+ if (!regs)
+ dump_stack();
+diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
+index a46a3723bc662..259fc4ca0d9cc 100644
+--- a/kernel/pid_namespace.c
++++ b/kernel/pid_namespace.c
+@@ -244,7 +244,24 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (pid_ns->pid_allocated == init_pids)
+ break;
++ /*
++ * Release tasks_rcu_exit_srcu to avoid following deadlock:
++ *
++ * 1) TASK A unshare(CLONE_NEWPID)
++ * 2) TASK A fork() twice -> TASK B (child reaper for new ns)
++ * and TASK C
++ * 3) TASK B exits, kills TASK C, waits for TASK A to reap it
++ * 4) TASK A calls synchronize_rcu_tasks()
++ * -> synchronize_srcu(tasks_rcu_exit_srcu)
++ * 5) *DEADLOCK*
++ *
++ * It is considered safe to release tasks_rcu_exit_srcu here
++ * because we assume the current task can not be concurrently
++ * reaped at this point.
++ */
++ exit_tasks_rcu_stop();
+ schedule();
++ exit_tasks_rcu_start();
+ }
+ __set_current_state(TASK_RUNNING);
+
+diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
+index a332ccd829e24..1b902f986f91c 100644
+--- a/kernel/power/energy_model.c
++++ b/kernel/power/energy_model.c
+@@ -85,10 +85,7 @@ static void em_debug_create_pd(struct device *dev)
+
+ static void em_debug_remove_pd(struct device *dev)
+ {
+- struct dentry *debug_dir;
+-
+- debug_dir = debugfs_lookup(dev_name(dev), rootdir);
+- debugfs_remove_recursive(debug_dir);
++ debugfs_lookup_and_remove(dev_name(dev), rootdir);
+ }
+
+ static int __init em_debug_init(void)
+@@ -107,8 +104,7 @@ static void em_debug_remove_pd(struct device *dev) {}
+ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
+ int nr_states, struct em_data_callback *cb)
+ {
+- unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
+- unsigned long power, freq, prev_freq = 0;
++ unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
+ struct em_perf_state *table;
+ int i, ret;
+ u64 fmax;
+@@ -153,27 +149,21 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
+
+ table[i].power = power;
+ table[i].frequency = prev_freq = freq;
+-
+- /*
+- * The hertz/watts efficiency ratio should decrease as the
+- * frequency grows on sane platforms. But this isn't always
+- * true in practice so warn the user if a higher OPP is more
+- * power efficient than a lower one.
+- */
+- opp_eff = freq / power;
+- if (opp_eff >= prev_opp_eff)
+- dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n",
+- i, i - 1);
+- prev_opp_eff = opp_eff;
+ }
+
+ /* Compute the cost of each performance state. */
+ fmax = (u64) table[nr_states - 1].frequency;
+- for (i = 0; i < nr_states; i++) {
++ for (i = nr_states - 1; i >= 0; i--) {
+ unsigned long power_res = em_scale_power(table[i].power);
+
+ table[i].cost = div64_u64(fmax * power_res,
+ table[i].frequency);
++ if (table[i].cost >= prev_cost) {
++ dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
++ table[i].frequency);
++ } else {
++ prev_cost = table[i].cost;
++ }
+ }
+
+ pd->table = table;
+diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
+index 559acef3fddb8..9abc73d500fbf 100644
+--- a/kernel/power/hibernate.c
++++ b/kernel/power/hibernate.c
+@@ -640,7 +640,7 @@ static void power_down(void)
+ int error;
+
+ if (hibernation_mode == HIBERNATION_SUSPEND) {
+- error = suspend_devices_and_enter(PM_SUSPEND_MEM);
++ error = suspend_devices_and_enter(mem_sleep_current);
+ if (error) {
+ hibernation_mode = hibernation_ops ?
+ HIBERNATION_PLATFORM :
+@@ -691,7 +691,7 @@ static int load_image_and_restore(void)
+ goto Unlock;
+
+ error = swsusp_read(&flags);
+- swsusp_close(FMODE_READ);
++ swsusp_close(FMODE_READ | FMODE_EXCL);
+ if (!error)
+ error = hibernation_restore(flags & SF_PLATFORM_MODE);
+
+@@ -981,7 +981,7 @@ static int software_resume(void)
+ /* The snapshot device should not be opened while we're running */
+ if (!hibernate_acquire()) {
+ error = -EBUSY;
+- swsusp_close(FMODE_READ);
++ swsusp_close(FMODE_READ | FMODE_EXCL);
+ goto Unlock;
+ }
+
+@@ -1016,7 +1016,7 @@ static int software_resume(void)
+ pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
+ return error;
+ Close_Finish:
+- swsusp_close(FMODE_READ);
++ swsusp_close(FMODE_READ | FMODE_EXCL);
+ goto Finish;
+ }
+
+@@ -1326,7 +1326,7 @@ static int __init resumedelay_setup(char *str)
+ int rc = kstrtouint(str, 0, &resume_delay);
+
+ if (rc)
+- return rc;
++ pr_warn("resumedelay: bad option string '%s'\n", str);
+ return 1;
+ }
+
+diff --git a/kernel/power/main.c b/kernel/power/main.c
+index 44169f3081fdc..7e646079fbeb2 100644
+--- a/kernel/power/main.c
++++ b/kernel/power/main.c
+@@ -504,7 +504,10 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+ {
+- return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA;
++ if (!pm_wakeup_irq())
++ return -ENODATA;
++
++ return sprintf(buf, "%u\n", pm_wakeup_irq());
+ }
+
+ power_attr_ro(pm_wakeup_irq);
+diff --git a/kernel/power/process.c b/kernel/power/process.c
+index 37401c99b7d7d..11b570fcf0494 100644
+--- a/kernel/power/process.c
++++ b/kernel/power/process.c
+@@ -94,7 +94,7 @@ static int try_to_freeze_tasks(bool user_only)
+ todo - wq_busy, wq_busy);
+
+ if (wq_busy)
+- show_workqueue_state();
++ show_all_workqueues();
+
+ if (!wakeup || pm_debug_messages_on) {
+ read_lock(&tasklist_lock);
+@@ -134,7 +134,7 @@ int freeze_processes(void)
+ if (!pm_freezing)
+ atomic_inc(&system_freezing_cnt);
+
+- pm_wakeup_clear(true);
++ pm_wakeup_clear(0);
+ pr_info("Freezing user space processes ... ");
+ pm_freezing = true;
+ error = try_to_freeze_tasks(true);
+diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
+index f7a9860782135..475d630e650f1 100644
+--- a/kernel/power/snapshot.c
++++ b/kernel/power/snapshot.c
+@@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm)
+ * Register a range of page frames the contents of which should not be saved
+ * during hibernation (to be used in the early initialization code).
+ */
+-void __init __register_nosave_region(unsigned long start_pfn,
+- unsigned long end_pfn, int use_kmalloc)
++void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+ {
+ struct nosave_region *region;
+
+@@ -995,18 +994,12 @@ void __init __register_nosave_region(unsigned long start_pfn,
+ goto Report;
+ }
+ }
+- if (use_kmalloc) {
+- /* During init, this shouldn't fail */
+- region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
+- BUG_ON(!region);
+- } else {
+- /* This allocation cannot fail */
+- region = memblock_alloc(sizeof(struct nosave_region),
+- SMP_CACHE_BYTES);
+- if (!region)
+- panic("%s: Failed to allocate %zu bytes\n", __func__,
+- sizeof(struct nosave_region));
+- }
++ /* This allocation cannot fail */
++ region = memblock_alloc(sizeof(struct nosave_region),
++ SMP_CACHE_BYTES);
++ if (!region)
++ panic("%s: Failed to allocate %zu bytes\n", __func__,
++ sizeof(struct nosave_region));
+ region->start_pfn = start_pfn;
+ region->end_pfn = end_pfn;
+ list_add_tail(&region->list, &nosave_regions);
+@@ -1726,8 +1719,8 @@ static unsigned long minimum_image_size(unsigned long saveable)
+ * /sys/power/reserved_size, respectively). To make this happen, we compute the
+ * total number of available page frames and allocate at least
+ *
+- * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
+- * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
++ * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2
++ * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
+ *
+ * of them, which corresponds to the maximum size of a hibernation image.
+ *
+diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
+index eb75f394a0590..13d905dd32675 100644
+--- a/kernel/power/suspend.c
++++ b/kernel/power/suspend.c
+@@ -138,8 +138,6 @@ static void s2idle_loop(void)
+ break;
+ }
+
+- pm_wakeup_clear(false);
+-
+ s2idle_enter();
+ }
+
+diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
+index d20526c5be15b..b663a97f5867a 100644
+--- a/kernel/power/suspend_test.c
++++ b/kernel/power/suspend_test.c
+@@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value)
+ value++;
+ suspend_type = strsep(&value, ",");
+ if (!suspend_type)
+- return 0;
++ return 1;
+
+ repeat = strsep(&value, ",");
+ if (repeat) {
+ if (kstrtou32(repeat, 0, &test_repeat_count_max))
+- return 0;
++ return 1;
+ }
+
+ for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
+ if (!strcmp(pm_labels[i], suspend_type)) {
+ test_state_label = pm_labels[i];
+- return 0;
++ return 1;
+ }
+
+ printk(warn_bad_state, suspend_type);
+- return 0;
++ return 1;
+ }
+ __setup("test_suspend", setup_test_suspend);
+
+diff --git a/kernel/power/swap.c b/kernel/power/swap.c
+index 3cb89baebc796..f3a1086f7cdb2 100644
+--- a/kernel/power/swap.c
++++ b/kernel/power/swap.c
+@@ -299,7 +299,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
+ return error;
+ }
+
+-static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
++static int hib_wait_io(struct hib_bio_batch *hb)
+ {
+ /*
+ * We are relying on the behavior of blk_plug that a thread with
+@@ -1521,9 +1521,10 @@ end:
+ int swsusp_check(void)
+ {
+ int error;
++ void *holder;
+
+ hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+- FMODE_READ, NULL);
++ FMODE_READ | FMODE_EXCL, &holder);
+ if (!IS_ERR(hib_resume_bdev)) {
+ set_blocksize(hib_resume_bdev, PAGE_SIZE);
+ clear_page(swsusp_header);
+@@ -1545,7 +1546,7 @@ int swsusp_check(void)
+
+ put:
+ if (error)
+- blkdev_put(hib_resume_bdev, FMODE_READ);
++ blkdev_put(hib_resume_bdev, FMODE_READ | FMODE_EXCL);
+ else
+ pr_debug("Image signature found, resuming\n");
+ } else {
+diff --git a/kernel/power/user.c b/kernel/power/user.c
+index 740723bb38852..13cca2e2c2bc6 100644
+--- a/kernel/power/user.c
++++ b/kernel/power/user.c
+@@ -26,6 +26,7 @@
+
+ #include "power.h"
+
++static bool need_wait;
+
+ static struct snapshot_data {
+ struct snapshot_handle handle;
+@@ -78,7 +79,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
+ * Resuming. We may need to wait for the image device to
+ * appear.
+ */
+- wait_for_device_probe();
++ need_wait = true;
+
+ data->swap = -1;
+ data->mode = O_WRONLY;
+@@ -168,6 +169,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
+ ssize_t res;
+ loff_t pg_offp = *offp & ~PAGE_MASK;
+
++ if (need_wait) {
++ wait_for_device_probe();
++ need_wait = false;
++ }
++
+ lock_system_sleep();
+
+ data = filp->private_data;
+@@ -244,6 +250,11 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
+ loff_t size;
+ sector_t offset;
+
++ if (need_wait) {
++ wait_for_device_probe();
++ need_wait = false;
++ }
++
+ if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
+ return -ENOTTY;
+ if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR)
+diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
+index 105df4dfc7839..52571dcad768b 100644
+--- a/kernel/power/wakelock.c
++++ b/kernel/power/wakelock.c
+@@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active)
+ {
+ struct rb_node *node;
+ struct wakelock *wl;
+- char *str = buf;
+- char *end = buf + PAGE_SIZE;
++ int len = 0;
+
+ mutex_lock(&wakelocks_lock);
+
+ for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) {
+ wl = rb_entry(node, struct wakelock, node);
+ if (wl->ws->active == show_active)
+- str += scnprintf(str, end - str, "%s ", wl->name);
++ len += sysfs_emit_at(buf, len, "%s ", wl->name);
+ }
+- if (str > buf)
+- str--;
+
+- str += scnprintf(str, end - str, "\n");
++ len += sysfs_emit_at(buf, len, "\n");
+
+ mutex_unlock(&wakelocks_lock);
+- return (str - buf);
++ return len;
+ }
+
+ #if CONFIG_PM_WAKELOCKS_LIMIT > 0
+diff --git a/kernel/printk/index.c b/kernel/printk/index.c
+index d3709408debe9..d23b8f8a51db5 100644
+--- a/kernel/printk/index.c
++++ b/kernel/printk/index.c
+@@ -146,7 +146,7 @@ static void pi_create_file(struct module *mod)
+ #ifdef CONFIG_MODULES
+ static void pi_remove_file(struct module *mod)
+ {
+- debugfs_remove(debugfs_lookup(pi_get_module_name(mod), dfs_index));
++ debugfs_lookup_and_remove(pi_get_module_name(mod), dfs_index);
+ }
+
+ static int pi_module_notify(struct notifier_block *nb, unsigned long op,
+diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
+index a8d0a58deebc7..8d856b7c2e5af 100644
+--- a/kernel/printk/printk.c
++++ b/kernel/printk/printk.c
+@@ -146,8 +146,10 @@ static int __control_devkmsg(char *str)
+
+ static int __init control_devkmsg(char *str)
+ {
+- if (__control_devkmsg(str) < 0)
++ if (__control_devkmsg(str) < 0) {
++ pr_warn("printk.devkmsg: bad option string '%s'\n", str);
+ return 1;
++ }
+
+ /*
+ * Set sysctl string accordingly:
+@@ -166,7 +168,7 @@ static int __init control_devkmsg(char *str)
+ */
+ devkmsg_log |= DEVKMSG_LOG_MASK_LOCK;
+
+- return 0;
++ return 1;
+ }
+ __setup("printk.devkmsg=", control_devkmsg);
+
+@@ -733,8 +735,19 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
+ goto out;
+ }
+
++ /*
++ * Guarantee this task is visible on the waitqueue before
++ * checking the wake condition.
++ *
++ * The full memory barrier within set_current_state() of
++ * prepare_to_wait_event() pairs with the full memory barrier
++ * within wq_has_sleeper().
++ *
++ * This pairs with __wake_up_klogd:A.
++ */
+ ret = wait_event_interruptible(log_wait,
+- prb_read_valid(prb, atomic64_read(&user->seq), r));
++ prb_read_valid(prb,
++ atomic64_read(&user->seq), r)); /* LMM(devkmsg_read:A) */
+ if (ret)
+ goto out;
+ }
+@@ -1500,7 +1513,18 @@ static int syslog_print(char __user *buf, int size)
+ seq = syslog_seq;
+
+ mutex_unlock(&syslog_lock);
+- len = wait_event_interruptible(log_wait, prb_read_valid(prb, seq, NULL));
++ /*
++ * Guarantee this task is visible on the waitqueue before
++ * checking the wake condition.
++ *
++ * The full memory barrier within set_current_state() of
++ * prepare_to_wait_event() pairs with the full memory barrier
++ * within wq_has_sleeper().
++ *
++ * This pairs with __wake_up_klogd:A.
++ */
++ len = wait_event_interruptible(log_wait,
++ prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */
+ mutex_lock(&syslog_lock);
+
+ if (len)
+@@ -3213,7 +3237,7 @@ static DEFINE_PER_CPU(int, printk_pending);
+
+ static void wake_up_klogd_work_func(struct irq_work *irq_work)
+ {
+- int pending = __this_cpu_xchg(printk_pending, 0);
++ int pending = this_cpu_xchg(printk_pending, 0);
+
+ if (pending & PRINTK_PENDING_OUTPUT) {
+ /* If trylock fails, someone else is doing the printing */
+@@ -3228,28 +3252,48 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
+ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
+ IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func);
+
+-void wake_up_klogd(void)
++static void __wake_up_klogd(int val)
+ {
+ if (!printk_percpu_data_ready())
+ return;
+
+ preempt_disable();
+- if (waitqueue_active(&log_wait)) {
+- this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
++ /*
++ * Guarantee any new records can be seen by tasks preparing to wait
++ * before this context checks if the wait queue is empty.
++ *
++ * The full memory barrier within wq_has_sleeper() pairs with the full
++ * memory barrier within set_current_state() of
++ * prepare_to_wait_event(), which is called after ___wait_event() adds
++ * the waiter but before it has checked the wait condition.
++ *
++ * This pairs with devkmsg_read:A and syslog_print:A.
++ */
++ if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
++ (val & PRINTK_PENDING_OUTPUT)) {
++ this_cpu_or(printk_pending, val);
+ irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+ }
+ preempt_enable();
+ }
+
++void wake_up_klogd(void)
++{
++ __wake_up_klogd(PRINTK_PENDING_WAKEUP);
++}
++
+ void defer_console_output(void)
+ {
+- if (!printk_percpu_data_ready())
+- return;
++ /*
++ * New messages may have been added directly to the ringbuffer
++ * using vprintk_store(), so wake any waiters as well.
++ */
++ __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
++}
+
+- preempt_disable();
+- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
+- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+- preempt_enable();
++void printk_trigger_flush(void)
++{
++ defer_console_output();
+ }
+
+ int vprintk_deferred(const char *fmt, va_list args)
+diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
+index 8a7b7362c0dd4..d069e0d3768ba 100644
+--- a/kernel/printk/printk_ringbuffer.c
++++ b/kernel/printk/printk_ringbuffer.c
+@@ -1724,7 +1724,7 @@ static bool copy_data(struct prb_data_ring *data_ring,
+ if (!buf || !buf_size)
+ return true;
+
+- data_size = min_t(u16, buf_size, len);
++ data_size = min_t(unsigned int, buf_size, len);
+
+ memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */
+ return true;
+diff --git a/kernel/profile.c b/kernel/profile.c
+index eb9c7f0f5ac52..0db1122855c0d 100644
+--- a/kernel/profile.c
++++ b/kernel/profile.c
+@@ -109,6 +109,13 @@ int __ref profile_init(void)
+
+ /* only text is profiled */
+ prof_len = (_etext - _stext) >> prof_shift;
++
++ if (!prof_len) {
++ pr_warn("profiling shift: %u too large\n", prof_shift);
++ prof_on = 0;
++ return -EINVAL;
++ }
++
+ buffer_bytes = prof_len*sizeof(atomic_t);
+
+ if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
+diff --git a/kernel/ptrace.c b/kernel/ptrace.c
+index f8589bf8d7dce..0cf547531ddf0 100644
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -371,6 +371,26 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
+ return !err;
+ }
+
++static int check_ptrace_options(unsigned long data)
++{
++ if (data & ~(unsigned long)PTRACE_O_MASK)
++ return -EINVAL;
++
++ if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
++ if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
++ !IS_ENABLED(CONFIG_SECCOMP))
++ return -EINVAL;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
++ if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
++ current->ptrace & PT_SUSPEND_SECCOMP)
++ return -EPERM;
++ }
++ return 0;
++}
++
+ static int ptrace_attach(struct task_struct *task, long request,
+ unsigned long addr,
+ unsigned long flags)
+@@ -382,8 +402,16 @@ static int ptrace_attach(struct task_struct *task, long request,
+ if (seize) {
+ if (addr != 0)
+ goto out;
++ /*
++ * This duplicates the check in check_ptrace_options() because
++ * ptrace_attach() and ptrace_setoptions() have historically
++ * used different error codes for unknown ptrace options.
++ */
+ if (flags & ~(unsigned long)PTRACE_O_MASK)
+ goto out;
++ retval = check_ptrace_options(flags);
++ if (retval)
++ return retval;
+ flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT);
+ } else {
+ flags = PT_PTRACED;
+@@ -656,22 +684,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
+ static int ptrace_setoptions(struct task_struct *child, unsigned long data)
+ {
+ unsigned flags;
++ int ret;
+
+- if (data & ~(unsigned long)PTRACE_O_MASK)
+- return -EINVAL;
+-
+- if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
+- if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
+- !IS_ENABLED(CONFIG_SECCOMP))
+- return -EINVAL;
+-
+- if (!capable(CAP_SYS_ADMIN))
+- return -EPERM;
+-
+- if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
+- current->ptrace & PT_SUSPEND_SECCOMP)
+- return -EPERM;
+- }
++ ret = check_ptrace_options(data);
++ if (ret)
++ return ret;
+
+ /* Avoid intermediate state when all opts are cleared */
+ flags = child->ptrace;
+@@ -1221,9 +1238,8 @@ int ptrace_request(struct task_struct *child, long request,
+ return ptrace_resume(child, request, data);
+
+ case PTRACE_KILL:
+- if (child->exit_state) /* already dead */
+- return 0;
+- return ptrace_resume(child, request, SIGKILL);
++ send_sig_info(SIGKILL, SEND_SIG_NOINFO, child);
++ return 0;
+
+ #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+ case PTRACE_GETREGSET:
+diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
+index 3128b7cf8e1fd..f73cf17fcee92 100644
+--- a/kernel/rcu/Kconfig
++++ b/kernel/rcu/Kconfig
+@@ -86,6 +86,7 @@ config TASKS_RCU
+
+ config TASKS_RUDE_RCU
+ def_bool 0
++ select IRQ_WORK
+ help
+ This option enables a task-based RCU implementation that uses
+ only context switch (including preemption) and user-mode
+diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
+index 9a19328ff2514..5d405943823ec 100644
+--- a/kernel/rcu/rcu_segcblist.h
++++ b/kernel/rcu/rcu_segcblist.h
+@@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
+ static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp,
+ int flags)
+ {
+- rsclp->flags |= flags;
++ WRITE_ONCE(rsclp->flags, rsclp->flags | flags);
+ }
+
+ static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp,
+ int flags)
+ {
+- rsclp->flags &= ~flags;
++ WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags);
+ }
+
+ static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp,
+diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
+index 2cc34a22a5060..57ec414710bbc 100644
+--- a/kernel/rcu/rcuscale.c
++++ b/kernel/rcu/rcuscale.c
+@@ -50,8 +50,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
+ pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s)
+ #define VERBOSE_SCALEOUT_STRING(s) \
+ do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0)
+-#define VERBOSE_SCALEOUT_ERRSTRING(s) \
+- do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s); } while (0)
++#define SCALEOUT_ERRSTRING(s) \
++ pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s)
+
+ /*
+ * The intended use cases for the nreaders and nwriters module parameters
+@@ -500,89 +500,6 @@ rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
+ scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+ }
+
+-static void
+-rcu_scale_cleanup(void)
+-{
+- int i;
+- int j;
+- int ngps = 0;
+- u64 *wdp;
+- u64 *wdpp;
+-
+- /*
+- * Would like warning at start, but everything is expedited
+- * during the mid-boot phase, so have to wait till the end.
+- */
+- if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
+- VERBOSE_SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
+- if (rcu_gp_is_normal() && gp_exp)
+- VERBOSE_SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
+- if (gp_exp && gp_async)
+- VERBOSE_SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
+-
+- if (torture_cleanup_begin())
+- return;
+- if (!cur_ops) {
+- torture_cleanup_end();
+- return;
+- }
+-
+- if (reader_tasks) {
+- for (i = 0; i < nrealreaders; i++)
+- torture_stop_kthread(rcu_scale_reader,
+- reader_tasks[i]);
+- kfree(reader_tasks);
+- }
+-
+- if (writer_tasks) {
+- for (i = 0; i < nrealwriters; i++) {
+- torture_stop_kthread(rcu_scale_writer,
+- writer_tasks[i]);
+- if (!writer_n_durations)
+- continue;
+- j = writer_n_durations[i];
+- pr_alert("%s%s writer %d gps: %d\n",
+- scale_type, SCALE_FLAG, i, j);
+- ngps += j;
+- }
+- pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
+- scale_type, SCALE_FLAG,
+- t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
+- t_rcu_scale_writer_finished -
+- t_rcu_scale_writer_started,
+- ngps,
+- rcuscale_seq_diff(b_rcu_gp_test_finished,
+- b_rcu_gp_test_started));
+- for (i = 0; i < nrealwriters; i++) {
+- if (!writer_durations)
+- break;
+- if (!writer_n_durations)
+- continue;
+- wdpp = writer_durations[i];
+- if (!wdpp)
+- continue;
+- for (j = 0; j < writer_n_durations[i]; j++) {
+- wdp = &wdpp[j];
+- pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+- scale_type, SCALE_FLAG,
+- i, j, *wdp);
+- if (j % 100 == 0)
+- schedule_timeout_uninterruptible(1);
+- }
+- kfree(writer_durations[i]);
+- }
+- kfree(writer_tasks);
+- kfree(writer_durations);
+- kfree(writer_n_durations);
+- }
+-
+- /* Do torture-type-specific cleanup operations. */
+- if (cur_ops->cleanup != NULL)
+- cur_ops->cleanup();
+-
+- torture_cleanup_end();
+-}
+-
+ /*
+ * Return the number if non-negative. If -1, the number of CPUs.
+ * If less than -1, that much less than the number of CPUs, but
+@@ -602,21 +519,6 @@ static int compute_real(int n)
+ return nr;
+ }
+
+-/*
+- * RCU scalability shutdown kthread. Just waits to be awakened, then shuts
+- * down system.
+- */
+-static int
+-rcu_scale_shutdown(void *arg)
+-{
+- wait_event(shutdown_wq,
+- atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
+- smp_mb(); /* Wake before output. */
+- rcu_scale_cleanup();
+- kernel_power_off();
+- return -EINVAL;
+-}
+-
+ /*
+ * kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number
+ * of iterations and measure total time and number of GP for all iterations to complete.
+@@ -736,8 +638,8 @@ kfree_scale_cleanup(void)
+ static int
+ kfree_scale_shutdown(void *arg)
+ {
+- wait_event(shutdown_wq,
+- atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
++ wait_event_idle(shutdown_wq,
++ atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
+
+ smp_mb(); /* Wake before output. */
+
+@@ -791,6 +693,108 @@ unwind:
+ return firsterr;
+ }
+
++static void
++rcu_scale_cleanup(void)
++{
++ int i;
++ int j;
++ int ngps = 0;
++ u64 *wdp;
++ u64 *wdpp;
++
++ /*
++ * Would like warning at start, but everything is expedited
++ * during the mid-boot phase, so have to wait till the end.
++ */
++ if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
++ SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
++ if (rcu_gp_is_normal() && gp_exp)
++ SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
++ if (gp_exp && gp_async)
++ SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
++
++ if (kfree_rcu_test) {
++ kfree_scale_cleanup();
++ return;
++ }
++
++ if (torture_cleanup_begin())
++ return;
++ if (!cur_ops) {
++ torture_cleanup_end();
++ return;
++ }
++
++ if (reader_tasks) {
++ for (i = 0; i < nrealreaders; i++)
++ torture_stop_kthread(rcu_scale_reader,
++ reader_tasks[i]);
++ kfree(reader_tasks);
++ }
++
++ if (writer_tasks) {
++ for (i = 0; i < nrealwriters; i++) {
++ torture_stop_kthread(rcu_scale_writer,
++ writer_tasks[i]);
++ if (!writer_n_durations)
++ continue;
++ j = writer_n_durations[i];
++ pr_alert("%s%s writer %d gps: %d\n",
++ scale_type, SCALE_FLAG, i, j);
++ ngps += j;
++ }
++ pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
++ scale_type, SCALE_FLAG,
++ t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
++ t_rcu_scale_writer_finished -
++ t_rcu_scale_writer_started,
++ ngps,
++ rcuscale_seq_diff(b_rcu_gp_test_finished,
++ b_rcu_gp_test_started));
++ for (i = 0; i < nrealwriters; i++) {
++ if (!writer_durations)
++ break;
++ if (!writer_n_durations)
++ continue;
++ wdpp = writer_durations[i];
++ if (!wdpp)
++ continue;
++ for (j = 0; j < writer_n_durations[i]; j++) {
++ wdp = &wdpp[j];
++ pr_alert("%s%s %4d writer-duration: %5d %llu\n",
++ scale_type, SCALE_FLAG,
++ i, j, *wdp);
++ if (j % 100 == 0)
++ schedule_timeout_uninterruptible(1);
++ }
++ kfree(writer_durations[i]);
++ }
++ kfree(writer_tasks);
++ kfree(writer_durations);
++ kfree(writer_n_durations);
++ }
++
++ /* Do torture-type-specific cleanup operations. */
++ if (cur_ops->cleanup != NULL)
++ cur_ops->cleanup();
++
++ torture_cleanup_end();
++}
++
++/*
++ * RCU scalability shutdown kthread. Just waits to be awakened, then shuts
++ * down system.
++ */
++static int
++rcu_scale_shutdown(void *arg)
++{
++ wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
++ smp_mb(); /* Wake before output. */
++ rcu_scale_cleanup();
++ kernel_power_off();
++ return -EINVAL;
++}
++
+ static int __init
+ rcu_scale_init(void)
+ {
+@@ -845,7 +849,7 @@ rcu_scale_init(void)
+ reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
+ GFP_KERNEL);
+ if (reader_tasks == NULL) {
+- VERBOSE_SCALEOUT_ERRSTRING("out of memory");
++ SCALEOUT_ERRSTRING("out of memory");
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+@@ -865,7 +869,7 @@ rcu_scale_init(void)
+ kcalloc(nrealwriters, sizeof(*writer_n_durations),
+ GFP_KERNEL);
+ if (!writer_tasks || !writer_durations || !writer_n_durations) {
+- VERBOSE_SCALEOUT_ERRSTRING("out of memory");
++ SCALEOUT_ERRSTRING("out of memory");
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
+index ab4215266ebee..d820ef615475b 100644
+--- a/kernel/rcu/rcutorture.c
++++ b/kernel/rcu/rcutorture.c
+@@ -46,6 +46,7 @@
+ #include <linux/oom.h>
+ #include <linux/tick.h>
+ #include <linux/rcupdate_trace.h>
++#include <linux/nmi.h>
+
+ #include "rcu.h"
+
+@@ -109,6 +110,8 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
+ torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
+ torture_param(int, stall_cpu_holdoff, 10,
+ "Time to wait before starting stall (s).");
++torture_param(bool, stall_no_softlockup, false,
++ "Avoid softlockup warning during cpu stall.");
+ torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
+ torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
+ torture_param(int, stall_gp_kthread, 0,
+@@ -1432,28 +1435,34 @@ static void rcutorture_one_extend(int *readstate, int newstate,
+ /* First, put new protection in place to avoid critical-section gap. */
+ if (statesnew & RCUTORTURE_RDR_BH)
+ local_bh_disable();
++ if (statesnew & RCUTORTURE_RDR_RBH)
++ rcu_read_lock_bh();
+ if (statesnew & RCUTORTURE_RDR_IRQ)
+ local_irq_disable();
+ if (statesnew & RCUTORTURE_RDR_PREEMPT)
+ preempt_disable();
+- if (statesnew & RCUTORTURE_RDR_RBH)
+- rcu_read_lock_bh();
+ if (statesnew & RCUTORTURE_RDR_SCHED)
+ rcu_read_lock_sched();
+ if (statesnew & RCUTORTURE_RDR_RCU)
+ idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT;
+
+- /* Next, remove old protection, irq first due to bh conflict. */
++ /*
++ * Next, remove old protection, in decreasing order of strength
++ * to avoid unlock paths that aren't safe in the stronger
++ * context. Namely: BH can not be enabled with disabled interrupts.
++ * Additionally PREEMPT_RT requires that BH is enabled in preemptible
++ * context.
++ */
+ if (statesold & RCUTORTURE_RDR_IRQ)
+ local_irq_enable();
+- if (statesold & RCUTORTURE_RDR_BH)
+- local_bh_enable();
+ if (statesold & RCUTORTURE_RDR_PREEMPT)
+ preempt_enable();
+- if (statesold & RCUTORTURE_RDR_RBH)
+- rcu_read_unlock_bh();
+ if (statesold & RCUTORTURE_RDR_SCHED)
+ rcu_read_unlock_sched();
++ if (statesold & RCUTORTURE_RDR_BH)
++ local_bh_enable();
++ if (statesold & RCUTORTURE_RDR_RBH)
++ rcu_read_unlock_bh();
+ if (statesold & RCUTORTURE_RDR_RCU) {
+ bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
+
+@@ -1496,6 +1505,9 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
+ int mask = rcutorture_extend_mask_max();
+ unsigned long randmask1 = torture_random(trsp) >> 8;
+ unsigned long randmask2 = randmask1 >> 3;
++ unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED;
++ unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
++ unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
+
+ WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
+ /* Mostly only one bit (need preemption!), sometimes lots of bits. */
+@@ -1503,11 +1515,26 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
+ mask = mask & randmask2;
+ else
+ mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
+- /* Can't enable bh w/irq disabled. */
+- if ((mask & RCUTORTURE_RDR_IRQ) &&
+- ((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) ||
+- (!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH))))
+- mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
++
++ /*
++ * Can't enable bh w/irq disabled.
++ */
++ if (mask & RCUTORTURE_RDR_IRQ)
++ mask |= oldmask & bhs;
++
++ /*
++ * Ideally these sequences would be detected in debug builds
++ * (regardless of RT), but until then don't stop testing
++ * them on non-RT.
++ */
++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
++ /* Can't modify BH in atomic context */
++ if (oldmask & preempts_irq)
++ mask &= ~bhs;
++ if ((oldmask | mask) & preempts_irq)
++ mask |= oldmask & bhs;
++ }
++
+ return mask ?: RCUTORTURE_RDR_RCU;
+ }
+
+@@ -1964,6 +1991,19 @@ static int rcutorture_booster_init(unsigned int cpu)
+ if (boost_tasks[cpu] != NULL)
+ return 0; /* Already created, nothing more to do. */
+
++ // Testing RCU priority boosting requires rcutorture do
++ // some serious abuse. Counter this by running ksoftirqd
++ // at higher priority.
++ if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) {
++ struct sched_param sp;
++ struct task_struct *t;
++
++ t = per_cpu(ksoftirqd, cpu);
++ WARN_ON_ONCE(!t);
++ sp.sched_priority = 2;
++ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
++ }
++
+ /* Don't allow time recalculation while creating a new task. */
+ mutex_lock(&boost_mutex);
+ rcu_torture_disable_rt_throttle();
+@@ -2028,6 +2068,8 @@ static int rcu_torture_stall(void *args)
+ #else
+ schedule_timeout_uninterruptible(HZ);
+ #endif
++ } else if (stall_no_softlockup) {
++ touch_softlockup_watchdog();
+ }
+ if (stall_cpu_irqsoff)
+ local_irq_enable();
+@@ -2819,7 +2861,7 @@ rcu_torture_cleanup(void)
+ rcutorture_seq_diff(gp_seq, start_gp_seq));
+ torture_stop_kthread(rcu_torture_stats, stats_task);
+ torture_stop_kthread(rcu_torture_fqs, fqs_task);
+- if (rcu_torture_can_boost())
++ if (rcu_torture_can_boost() && rcutor_hp >= 0)
+ cpuhp_remove_state(rcutor_hp);
+
+ /*
+@@ -3037,7 +3079,7 @@ rcu_torture_init(void)
+ rcu_torture_write_types();
+ firsterr = torture_create_kthread(rcu_torture_writer, NULL,
+ writer_task);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ if (nfakewriters > 0) {
+ fakewriter_tasks = kcalloc(nfakewriters,
+@@ -3052,7 +3094,7 @@ rcu_torture_init(void)
+ for (i = 0; i < nfakewriters; i++) {
+ firsterr = torture_create_kthread(rcu_torture_fakewriter,
+ NULL, fakewriter_tasks[i]);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+ reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
+@@ -3068,7 +3110,7 @@ rcu_torture_init(void)
+ rcu_torture_reader_mbchk[i].rtc_chkrdr = -1;
+ firsterr = torture_create_kthread(rcu_torture_reader, (void *)i,
+ reader_tasks[i]);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+ nrealnocbers = nocbs_nthreads;
+@@ -3088,18 +3130,18 @@ rcu_torture_init(void)
+ }
+ for (i = 0; i < nrealnocbers; i++) {
+ firsterr = torture_create_kthread(rcu_nocb_toggle, NULL, nocb_tasks[i]);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+ if (stat_interval > 0) {
+ firsterr = torture_create_kthread(rcu_torture_stats, NULL,
+ stats_task);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+ if (test_no_idle_hz && shuffle_interval > 0) {
+ firsterr = torture_shuffle_init(shuffle_interval * HZ);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+ if (stutter < 0)
+@@ -3109,7 +3151,7 @@ rcu_torture_init(void)
+
+ t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ;
+ firsterr = torture_stutter_init(stutter * HZ, t);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+ if (fqs_duration < 0)
+@@ -3118,7 +3160,7 @@ rcu_torture_init(void)
+ /* Create the fqs thread */
+ firsterr = torture_create_kthread(rcu_torture_fqs, NULL,
+ fqs_task);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+ if (test_boost_interval < 1)
+@@ -3132,44 +3174,29 @@ rcu_torture_init(void)
+ firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE",
+ rcutorture_booster_init,
+ rcutorture_booster_cleanup);
+- if (firsterr < 0)
+- goto unwind;
+ rcutor_hp = firsterr;
+-
+- // Testing RCU priority boosting requires rcutorture do
+- // some serious abuse. Counter this by running ksoftirqd
+- // at higher priority.
+- if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) {
+- for_each_online_cpu(cpu) {
+- struct sched_param sp;
+- struct task_struct *t;
+-
+- t = per_cpu(ksoftirqd, cpu);
+- WARN_ON_ONCE(!t);
+- sp.sched_priority = 2;
+- sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+- }
+- }
++ if (torture_init_error(firsterr))
++ goto unwind;
+ }
+ shutdown_jiffies = jiffies + shutdown_secs * HZ;
+ firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval,
+ rcutorture_sync);
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ firsterr = rcu_torture_stall_init();
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ firsterr = rcu_torture_fwd_prog_init();
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ firsterr = rcu_torture_barrier_init();
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ firsterr = rcu_torture_read_exit_init();
+- if (firsterr)
++ if (torture_init_error(firsterr))
+ goto unwind;
+ if (object_debug)
+ rcu_test_debug_objects();
+diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
+index 66dc14cf5687e..fda220e2c0426 100644
+--- a/kernel/rcu/refscale.c
++++ b/kernel/rcu/refscale.c
+@@ -777,7 +777,7 @@ ref_scale_cleanup(void)
+ static int
+ ref_scale_shutdown(void *arg)
+ {
+- wait_event(shutdown_wq, shutdown_start);
++ wait_event_idle(shutdown_wq, shutdown_start);
+
+ smp_mb(); // Wake before output.
+ ref_scale_cleanup();
+@@ -849,12 +849,11 @@ ref_scale_init(void)
+ VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders);
+
+ for (i = 0; i < nreaders; i++) {
++ init_waitqueue_head(&reader_tasks[i].wq);
+ firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
+ reader_tasks[i].task);
+ if (firsterr)
+ goto unwind;
+-
+- init_waitqueue_head(&(reader_tasks[i].wq));
+ }
+
+ // Main Task
+diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
+index 806160c44b172..28f628c702452 100644
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -171,8 +171,9 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
+ static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
+ {
+ /* Complain if the scheduler has not started. */
+- RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
+- "synchronize_rcu_tasks called too soon");
++ if (WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
++ "synchronize_%s() called too soon", rtp->name))
++ return;
+
+ /* Wait for the grace period. */
+ wait_rcu_gp(rtp->call_func);
+@@ -197,6 +198,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
+ * This loop is terminated by the system going down. ;-)
+ */
+ for (;;) {
++ set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
+
+ /* Pick up any new callbacks. */
+ raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
+@@ -236,8 +238,6 @@ static int __noreturn rcu_tasks_kthread(void *arg)
+ }
+ /* Paranoid sleep to keep this from entering a tight loop */
+ schedule_timeout_idle(rtp->gp_sleep);
+-
+- set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
+ }
+ }
+
+@@ -452,11 +452,21 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
+ static void rcu_tasks_postscan(struct list_head *hop)
+ {
+ /*
+- * Wait for tasks that are in the process of exiting. This
+- * does only part of the job, ensuring that all tasks that were
+- * previously exiting reach the point where they have disabled
+- * preemption, allowing the later synchronize_rcu() to finish
+- * the job.
++ * Exiting tasks may escape the tasklist scan. Those are vulnerable
++ * until their final schedule() with TASK_DEAD state. To cope with
++ * this, divide the fragile exit path part in two intersecting
++ * read side critical sections:
++ *
++ * 1) An _SRCU_ read side starting before calling exit_notify(),
++ * which may remove the task from the tasklist, and ending after
++ * the final preempt_disable() call in do_exit().
++ *
++ * 2) An _RCU_ read side starting with the final preempt_disable()
++ * call in do_exit() and ending with the final call to schedule()
++ * with TASK_DEAD state.
++ *
++ * This handles the part 1). And postgp will handle part 2) with a
++ * call to synchronize_rcu().
+ */
+ synchronize_srcu(&tasks_rcu_exit_srcu);
+ }
+@@ -523,7 +533,10 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
+ *
+ * In addition, this synchronize_rcu() waits for exiting tasks
+ * to complete their final preempt_disable() region of execution,
+- * cleaning up after the synchronize_srcu() above.
++ * cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu),
++ * enforcing the whole region before tasklist removal until
++ * the final schedule() with TASK_DEAD state to be an RCU TASKS
++ * read side critical section.
+ */
+ synchronize_rcu();
+ }
+@@ -613,27 +626,42 @@ void show_rcu_tasks_classic_gp_kthread(void)
+ EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
+ #endif // !defined(CONFIG_TINY_RCU)
+
+-/* Do the srcu_read_lock() for the above synchronize_srcu(). */
++/*
++ * Contribute to protect against tasklist scan blind spot while the
++ * task is exiting and may be removed from the tasklist. See
++ * corresponding synchronize_srcu() for further details.
++ */
+ void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
+ {
+- preempt_disable();
+ current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
+- preempt_enable();
+ }
+
+-/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
+-void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
++/*
++ * Contribute to protect against tasklist scan blind spot while the
++ * task is exiting and may be removed from the tasklist. See
++ * corresponding synchronize_srcu() for further details.
++ */
++void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu)
+ {
+ struct task_struct *t = current;
+
+- preempt_disable();
+ __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
+- preempt_enable();
+- exit_tasks_rcu_finish_trace(t);
++}
++
++/*
++ * Contribute to protect against tasklist scan blind spot while the
++ * task is exiting and may be removed from the tasklist. See
++ * corresponding synchronize_srcu() for further details.
++ */
++void exit_tasks_rcu_finish(void)
++{
++ exit_tasks_rcu_stop();
++ exit_tasks_rcu_finish_trace(current);
+ }
+
+ #else /* #ifdef CONFIG_TASKS_RCU */
+ void exit_tasks_rcu_start(void) { }
++void exit_tasks_rcu_stop(void) { }
+ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
+ #endif /* #else #ifdef CONFIG_TASKS_RCU */
+
+@@ -890,32 +918,24 @@ static void trc_read_check_handler(void *t_in)
+
+ // If the task is no longer running on this CPU, leave.
+ if (unlikely(texp != t)) {
+- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
+- wake_up(&trc_wait);
+ goto reset_ipi; // Already on holdout list, so will check later.
+ }
+
+ // If the task is not in a read-side critical section, and
+ // if this is the last reader, awaken the grace-period kthread.
+ if (likely(!READ_ONCE(t->trc_reader_nesting))) {
+- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
+- wake_up(&trc_wait);
+- // Mark as checked after decrement to avoid false
+- // positives on the above WARN_ON_ONCE().
+ WRITE_ONCE(t->trc_reader_checked, true);
+ goto reset_ipi;
+ }
+ // If we are racing with an rcu_read_unlock_trace(), try again later.
+- if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) {
+- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
+- wake_up(&trc_wait);
++ if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0))
+ goto reset_ipi;
+- }
+ WRITE_ONCE(t->trc_reader_checked, true);
+
+ // Get here if the task is in a read-side critical section. Set
+ // its state so that it will awaken the grace-period kthread upon
+ // exit from that critical section.
++ atomic_inc(&trc_n_readers_need_end); // One more to wait on.
+ WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
+ WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
+
+@@ -931,7 +951,7 @@ reset_ipi:
+ static bool trc_inspect_reader(struct task_struct *t, void *arg)
+ {
+ int cpu = task_cpu(t);
+- bool in_qs = false;
++ int nesting;
+ bool ofl = cpu_is_offline(cpu);
+
+ if (task_curr(t)) {
+@@ -951,18 +971,18 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
+ n_heavy_reader_updates++;
+ if (ofl)
+ n_heavy_reader_ofl_updates++;
+- in_qs = true;
++ nesting = 0;
+ } else {
+ // The task is not running, so C-language access is safe.
+- in_qs = likely(!t->trc_reader_nesting);
++ nesting = t->trc_reader_nesting;
+ }
+
+- // Mark as checked so that the grace-period kthread will
+- // remove it from the holdout list.
+- t->trc_reader_checked = true;
+-
+- if (in_qs)
+- return true; // Already in quiescent state, done!!!
++ // If not exiting a read-side critical section, mark as checked
++ // so that the grace-period kthread will remove it from the
++ // holdout list.
++ t->trc_reader_checked = nesting >= 0;
++ if (nesting <= 0)
++ return !nesting; // If in QS, done, otherwise try again later.
+
+ // The task is in a read-side critical section, so set up its
+ // state so that it will awaken the grace-period kthread upon exit
+@@ -1015,21 +1035,17 @@ static void trc_wait_for_one_reader(struct task_struct *t,
+ if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0)
+ return;
+
+- atomic_inc(&trc_n_readers_need_end);
+ per_cpu(trc_ipi_to_cpu, cpu) = true;
+ t->trc_ipi_to_cpu = cpu;
+ rcu_tasks_trace.n_ipis++;
+- if (smp_call_function_single(cpu,
+- trc_read_check_handler, t, 0)) {
++ if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) {
+ // Just in case there is some other reason for
+ // failure than the target CPU being offline.
++ WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n",
++ __func__, cpu);
+ rcu_tasks_trace.n_ipis_fails++;
+ per_cpu(trc_ipi_to_cpu, cpu) = false;
+- t->trc_ipi_to_cpu = cpu;
+- if (atomic_dec_and_test(&trc_n_readers_need_end)) {
+- WARN_ON_ONCE(1);
+- wake_up(&trc_wait);
+- }
++ t->trc_ipi_to_cpu = -1;
+ }
+ }
+ }
+@@ -1150,14 +1166,28 @@ static void check_all_holdout_tasks_trace(struct list_head *hop,
+ }
+ }
+
++static void rcu_tasks_trace_empty_fn(void *unused)
++{
++}
++
+ /* Wait for grace period to complete and provide ordering. */
+ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
+ {
++ int cpu;
+ bool firstreport;
+ struct task_struct *g, *t;
+ LIST_HEAD(holdouts);
+ long ret;
+
++ // Wait for any lingering IPI handlers to complete. Note that
++ // if a CPU has gone offline or transitioned to userspace in the
++ // meantime, all IPI handlers should have been drained beforehand.
++ // Yes, this assumes that CPUs process IPIs in order. If that ever
++ // changes, there will need to be a recheck and/or timed wait.
++ for_each_online_cpu(cpu)
++ if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu)))
++ smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1);
++
+ // Remove the safety count.
+ smp_mb__before_atomic(); // Order vs. earlier atomics
+ atomic_dec(&trc_n_readers_need_end);
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index bce848e50512e..df016f6d0662c 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -327,7 +327,7 @@ static void rcu_dynticks_eqs_online(void)
+ */
+ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
+ {
+- return !(atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1);
++ return !(arch_atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1);
+ }
+
+ /*
+@@ -975,6 +975,7 @@ void __rcu_irq_enter_check_tick(void)
+ }
+ raw_spin_unlock_rcu_node(rdp->mynode);
+ }
++NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
+ #endif /* CONFIG_NO_HZ_FULL */
+
+ /**
+@@ -1594,10 +1595,11 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
+ struct rcu_data *rdp)
+ {
+ rcu_lockdep_assert_cblist_protected(rdp);
+- if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
+- !raw_spin_trylock_rcu_node(rnp))
++ if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
+ return;
+- WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
++ // The grace period cannot end while we hold the rcu_node lock.
++ if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
++ WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
+ raw_spin_unlock_rcu_node(rnp);
+ }
+
+@@ -1907,7 +1909,7 @@ static void rcu_gp_fqs(bool first_time)
+ struct rcu_node *rnp = rcu_get_root();
+
+ WRITE_ONCE(rcu_state.gp_activity, jiffies);
+- rcu_state.n_force_qs++;
++ WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1);
+ if (first_time) {
+ /* Collect dyntick-idle snapshots. */
+ force_qs_rnp(dyntick_save_progress_counter);
+@@ -2475,7 +2477,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
+ div = READ_ONCE(rcu_divisor);
+ div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
+ bl = max(rdp->blimit, pending >> div);
+- if (unlikely(bl > 100)) {
++ if (in_serving_softirq() && unlikely(bl > 100)) {
+ long rrn = READ_ONCE(rcu_resched_ns);
+
+ rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
+@@ -2512,18 +2514,23 @@ static void rcu_do_batch(struct rcu_data *rdp)
+ /*
+ * Stop only if limit reached and CPU has something to do.
+ */
+- if (count >= bl && !offloaded &&
+- (need_resched() ||
+- (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
+- break;
+- if (unlikely(tlimit)) {
+- /* only call local_clock() every 32 callbacks */
+- if (likely((count & 31) || local_clock() < tlimit))
+- continue;
+- /* Exceeded the time limit, so leave. */
+- break;
+- }
+- if (!in_serving_softirq()) {
++ if (in_serving_softirq()) {
++ if (count >= bl && (need_resched() ||
++ (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
++ break;
++
++ /*
++ * Make sure we don't spend too much time here and deprive other
++ * softirq vectors of CPU cycles.
++ */
++ if (unlikely(tlimit)) {
++ /* only call local_clock() every 32 callbacks */
++ if (likely((count & 31) || local_clock() < tlimit))
++ continue;
++ /* Exceeded the time limit, so leave. */
++ break;
++ }
++ } else {
+ local_bh_enable();
+ lockdep_assert_irqs_enabled();
+ cond_resched_tasks_rcu_qs();
+@@ -2550,7 +2557,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
+ /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
+ if (count == 0 && rdp->qlen_last_fqs_check != 0) {
+ rdp->qlen_last_fqs_check = 0;
+- rdp->n_force_qs_snap = rcu_state.n_force_qs;
++ rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);
+ } else if (count < rdp->qlen_last_fqs_check - qhimark)
+ rdp->qlen_last_fqs_check = count;
+
+@@ -2668,7 +2675,7 @@ void rcu_force_quiescent_state(void)
+ struct rcu_node *rnp_old = NULL;
+
+ /* Funnel through hierarchy to reduce memory contention. */
+- rnp = __this_cpu_read(rcu_data.mynode);
++ rnp = raw_cpu_read(rcu_data.mynode);
+ for (; rnp != NULL; rnp = rnp->parent) {
+ ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
+ !raw_spin_trylock(&rnp->fqslock);
+@@ -2898,10 +2905,10 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
+ } else {
+ /* Give the grace period a kick. */
+ rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
+- if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
++ if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap &&
+ rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
+ rcu_force_quiescent_state();
+- rdp->n_force_qs_snap = rcu_state.n_force_qs;
++ rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);
+ rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
+ }
+ }
+@@ -3321,6 +3328,30 @@ static void kfree_rcu_work(struct work_struct *work)
+ }
+ }
+
++static bool
++need_offload_krc(struct kfree_rcu_cpu *krcp)
++{
++ int i;
++
++ for (i = 0; i < FREE_N_CHANNELS; i++)
++ if (krcp->bkvhead[i])
++ return true;
++
++ return !!krcp->head;
++}
++
++static bool
++need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp)
++{
++ int i;
++
++ for (i = 0; i < FREE_N_CHANNELS; i++)
++ if (krwp->bkvhead_free[i])
++ return true;
++
++ return !!krwp->head_free;
++}
++
+ /*
+ * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
+ */
+@@ -3337,14 +3368,13 @@ static void kfree_rcu_monitor(struct work_struct *work)
+ for (i = 0; i < KFREE_N_BATCHES; i++) {
+ struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
+
+- // Try to detach bkvhead or head and attach it over any
+- // available corresponding free channel. It can be that
+- // a previous RCU batch is in progress, it means that
+- // immediately to queue another one is not possible so
+- // in that case the monitor work is rearmed.
+- if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||
+- (krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||
+- (krcp->head && !krwp->head_free)) {
++ // Try to detach bulk_head or head and attach it, only when
++ // all channels are free. Any channel is not free means at krwp
++ // there is on-going rcu work to handle krwp's free business.
++ if (need_wait_for_krwp_work(krwp))
++ continue;
++
++ if (need_offload_krc(krcp)) {
+ // Channel 1 corresponds to the SLAB-pointer bulk path.
+ // Channel 2 corresponds to vmalloc-pointer bulk path.
+ for (j = 0; j < FREE_N_CHANNELS; j++) {
+@@ -3413,15 +3443,16 @@ static void fill_page_cache_func(struct work_struct *work)
+ bnode = (struct kvfree_rcu_bulk_data *)
+ __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+- if (bnode) {
+- raw_spin_lock_irqsave(&krcp->lock, flags);
+- pushed = put_cached_bnode(krcp, bnode);
+- raw_spin_unlock_irqrestore(&krcp->lock, flags);
++ if (!bnode)
++ break;
+
+- if (!pushed) {
+- free_page((unsigned long) bnode);
+- break;
+- }
++ raw_spin_lock_irqsave(&krcp->lock, flags);
++ pushed = put_cached_bnode(krcp, bnode);
++ raw_spin_unlock_irqrestore(&krcp->lock, flags);
++
++ if (!pushed) {
++ free_page((unsigned long) bnode);
++ break;
+ }
+ }
+
+@@ -4128,7 +4159,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
+ /* Set up local state, ensuring consistent view of global state. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rdp->qlen_last_fqs_check = 0;
+- rdp->n_force_qs_snap = rcu_state.n_force_qs;
++ rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);
+ rdp->blimit = blimit;
+ rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */
+ rcu_dynticks_eqs_online();
+diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
+index 2796084ef85a5..407941a2903bc 100644
+--- a/kernel/rcu/tree_exp.h
++++ b/kernel/rcu/tree_exp.h
+@@ -387,6 +387,7 @@ retry_ipi:
+ continue;
+ }
+ if (get_cpu() == cpu) {
++ mask_ofl_test |= mask;
+ put_cpu();
+ continue;
+ }
+@@ -506,7 +507,10 @@ static void synchronize_rcu_expedited_wait(void)
+ if (rdp->rcu_forced_tick_exp)
+ continue;
+ rdp->rcu_forced_tick_exp = true;
+- tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
++ preempt_disable();
++ if (cpu_online(cpu))
++ tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
++ preempt_enable();
+ }
+ }
+ j = READ_ONCE(jiffies_till_first_fqs);
+@@ -564,7 +568,9 @@ static void synchronize_rcu_expedited_wait(void)
+ mask = leaf_node_cpu_bit(rnp, cpu);
+ if (!(READ_ONCE(rnp->expmask) & mask))
+ continue;
++ preempt_disable(); // For smp_processor_id() in dump_cpu_task().
+ dump_cpu_task(cpu);
++ preempt_enable();
+ }
+ }
+ jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
+@@ -705,9 +711,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
+ int ndetected = 0;
+ struct task_struct *t;
+
+- if (!READ_ONCE(rnp->exp_tasks))
+- return 0;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
++ if (!rnp->exp_tasks) {
++ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
++ return 0;
++ }
+ t = list_entry(rnp->exp_tasks->prev,
+ struct task_struct, rcu_node_entry);
+ list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+@@ -760,7 +768,7 @@ static void sync_sched_exp_online_cleanup(int cpu)
+ my_cpu = get_cpu();
+ /* Quiescent state either not needed or already requested, leave. */
+ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+- __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) {
++ rdp->cpu_no_qs.b.exp) {
+ put_cpu();
+ return;
+ }
+diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
+index d070059163d70..f1a73a1f8472e 100644
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -554,16 +554,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+
+- /* Unboost if we were boosted. */
+- if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
+- rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
+-
+ /*
+ * If this was the last task on the expedited lists,
+ * then we need to report up the rcu_node hierarchy.
+ */
+ if (!empty_exp && empty_exp_now)
+ rcu_report_exp_rnp(rnp, true);
++
++ /* Unboost if we were boosted. */
++ if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
++ rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
+ } else {
+ local_irq_restore(flags);
+ }
+@@ -638,7 +638,8 @@ static void rcu_read_unlock_special(struct task_struct *t)
+
+ expboost = (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
+ (rdp->grpmask & READ_ONCE(rnp->expmask)) ||
+- IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
++ (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) &&
++ ((rdp->grpmask & READ_ONCE(rnp->qsmask)) || t->rcu_blocked_node)) ||
+ (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
+ t->rcu_blocked_node);
+ // Need to defer quiescent state until everything is enabled.
+@@ -1480,7 +1481,7 @@ static void rcu_bind_gp_kthread(void)
+ }
+
+ /* Record the current task on dyntick-idle entry. */
+-static void noinstr rcu_dynticks_task_enter(void)
++static __always_inline void rcu_dynticks_task_enter(void)
+ {
+ #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
+@@ -1488,7 +1489,7 @@ static void noinstr rcu_dynticks_task_enter(void)
+ }
+
+ /* Record no current task on dyntick-idle exit. */
+-static void noinstr rcu_dynticks_task_exit(void)
++static __always_inline void rcu_dynticks_task_exit(void)
+ {
+ #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
+@@ -1496,7 +1497,7 @@ static void noinstr rcu_dynticks_task_exit(void)
+ }
+
+ /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
+-static void rcu_dynticks_task_trace_enter(void)
++static __always_inline void rcu_dynticks_task_trace_enter(void)
+ {
+ #ifdef CONFIG_TASKS_TRACE_RCU
+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+@@ -1505,7 +1506,7 @@ static void rcu_dynticks_task_trace_enter(void)
+ }
+
+ /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
+-static void rcu_dynticks_task_trace_exit(void)
++static __always_inline void rcu_dynticks_task_trace_exit(void)
+ {
+ #ifdef CONFIG_TASKS_TRACE_RCU
+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+diff --git a/kernel/relay.c b/kernel/relay.c
+index d1a67fbb819d3..a4d6889af94f9 100644
+--- a/kernel/relay.c
++++ b/kernel/relay.c
+@@ -151,13 +151,13 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan)
+ {
+ struct rchan_buf *buf;
+
+- if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
++ if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t))
+ return NULL;
+
+ buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
+ if (!buf)
+ return NULL;
+- buf->padding = kmalloc_array(chan->n_subbufs, sizeof(size_t *),
++ buf->padding = kmalloc_array(chan->n_subbufs, sizeof(size_t),
+ GFP_KERNEL);
+ if (!buf->padding)
+ goto free_buf;
+@@ -992,7 +992,8 @@ static size_t relay_file_read_start_pos(struct rchan_buf *buf)
+ size_t subbuf_size = buf->chan->subbuf_size;
+ size_t n_subbufs = buf->chan->n_subbufs;
+ size_t consumed = buf->subbufs_consumed % n_subbufs;
+- size_t read_pos = consumed * subbuf_size + buf->bytes_consumed;
++ size_t read_pos = (consumed * subbuf_size + buf->bytes_consumed)
++ % (n_subbufs * subbuf_size);
+
+ read_subbuf = read_pos / subbuf_size;
+ padding = buf->padding[read_subbuf];
+diff --git a/kernel/resource.c b/kernel/resource.c
+index ca9f5198a01ff..cb441e3e7670c 100644
+--- a/kernel/resource.c
++++ b/kernel/resource.c
+@@ -56,14 +56,6 @@ struct resource_constraint {
+
+ static DEFINE_RWLOCK(resource_lock);
+
+-/*
+- * For memory hotplug, there is no way to free resource entries allocated
+- * by boot mem after the system is up. So for reusing the resource entry
+- * we need to remember the resource.
+- */
+-static struct resource *bootmem_resource_free;
+-static DEFINE_SPINLOCK(bootmem_resource_lock);
+-
+ static struct resource *next_resource(struct resource *p)
+ {
+ if (p->child)
+@@ -148,36 +140,19 @@ __initcall(ioresources_init);
+
+ static void free_resource(struct resource *res)
+ {
+- if (!res)
+- return;
+-
+- if (!PageSlab(virt_to_head_page(res))) {
+- spin_lock(&bootmem_resource_lock);
+- res->sibling = bootmem_resource_free;
+- bootmem_resource_free = res;
+- spin_unlock(&bootmem_resource_lock);
+- } else {
++ /**
++ * If the resource was allocated using memblock early during boot
++ * we'll leak it here: we can only return full pages back to the
++ * buddy and trying to be smart and reusing them eventually in
++ * alloc_resource() overcomplicates resource handling.
++ */
++ if (res && PageSlab(virt_to_head_page(res)))
+ kfree(res);
+- }
+ }
+
+ static struct resource *alloc_resource(gfp_t flags)
+ {
+- struct resource *res = NULL;
+-
+- spin_lock(&bootmem_resource_lock);
+- if (bootmem_resource_free) {
+- res = bootmem_resource_free;
+- bootmem_resource_free = res->sibling;
+- }
+- spin_unlock(&bootmem_resource_lock);
+-
+- if (res)
+- memset(res, 0, sizeof(struct resource));
+- else
+- res = kzalloc(sizeof(struct resource), flags);
+-
+- return res;
++ return kzalloc(sizeof(struct resource), flags);
+ }
+
+ /* Return the conflict entry if you can't request it */
+@@ -1350,20 +1325,6 @@ retry:
+ continue;
+ }
+
+- /*
+- * All memory regions added from memory-hotplug path have the
+- * flag IORESOURCE_SYSTEM_RAM. If the resource does not have
+- * this flag, we know that we are dealing with a resource coming
+- * from HMM/devm. HMM/devm use another mechanism to add/release
+- * a resource. This goes via devm_request_mem_region and
+- * devm_release_mem_region.
+- * HMM/devm take care to release their resources when they want,
+- * so if we are dealing with them, let us just back off here.
+- */
+- if (!(res->flags & IORESOURCE_SYSRAM)) {
+- break;
+- }
+-
+ if (!(res->flags & IORESOURCE_MEM))
+ break;
+
+diff --git a/kernel/rseq.c b/kernel/rseq.c
+index 6d45ac3dae7fb..97ac20b4f7387 100644
+--- a/kernel/rseq.c
++++ b/kernel/rseq.c
+@@ -128,10 +128,10 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
+ int ret;
+
+ #ifdef CONFIG_64BIT
+- if (get_user(ptr, &t->rseq->rseq_cs.ptr64))
++ if (get_user(ptr, &t->rseq->rseq_cs))
+ return -EFAULT;
+ #else
+- if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
++ if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr)))
+ return -EFAULT;
+ #endif
+ if (!ptr) {
+@@ -217,9 +217,9 @@ static int clear_rseq_cs(struct task_struct *t)
+ * Set rseq_cs to NULL.
+ */
+ #ifdef CONFIG_64BIT
+- return put_user(0UL, &t->rseq->rseq_cs.ptr64);
++ return put_user(0UL, &t->rseq->rseq_cs);
+ #else
+- if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
++ if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs)))
+ return -EFAULT;
+ return 0;
+ #endif
+diff --git a/kernel/scftorture.c b/kernel/scftorture.c
+index 64a08288b1a6d..27286d99e0c28 100644
+--- a/kernel/scftorture.c
++++ b/kernel/scftorture.c
+@@ -271,9 +271,10 @@ static void scf_handler(void *scfc_in)
+ }
+ this_cpu_inc(scf_invoked_count);
+ if (longwait <= 0) {
+- if (!(r & 0xffc0))
++ if (!(r & 0xffc0)) {
+ udelay(r & 0x3f);
+- goto out;
++ goto out;
++ }
+ }
+ if (r & 0xfff)
+ goto out;
+diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
+index 2067080bb2358..8629b37d118e7 100644
+--- a/kernel/sched/autogroup.c
++++ b/kernel/sched/autogroup.c
+@@ -31,7 +31,7 @@ static inline void autogroup_destroy(struct kref *kref)
+ ag->tg->rt_se = NULL;
+ ag->tg->rt_rq = NULL;
+ #endif
+- sched_offline_group(ag->tg);
++ sched_release_group(ag->tg);
+ sched_destroy_group(ag->tg);
+ }
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index f21714ea3db85..2324b7055260a 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -21,7 +21,7 @@
+ #include <asm/tlb.h>
+
+ #include "../workqueue_internal.h"
+-#include "../../fs/io-wq.h"
++#include "../../io_uring/io-wq.h"
+ #include "../smpboot.h"
+
+ #include "pelt.h"
+@@ -36,6 +36,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
++EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
+@@ -530,10 +531,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
+ swap(rq1, rq2);
+
+ raw_spin_rq_lock(rq1);
+- if (__rq_lockp(rq1) == __rq_lockp(rq2))
+- return;
++ if (__rq_lockp(rq1) != __rq_lockp(rq2))
++ raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+
+- raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
++ double_rq_clock_clear_update(rq1, rq2);
+ }
+ #endif
+
+@@ -1334,7 +1335,7 @@ static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
+ if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
+ return;
+
+- WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
++ uclamp_rq_set(rq, clamp_id, clamp_value);
+ }
+
+ static inline
+@@ -1512,8 +1513,8 @@ static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
+ if (bucket->tasks == 1 || uc_se->value > bucket->value)
+ bucket->value = uc_se->value;
+
+- if (uc_se->value > READ_ONCE(uc_rq->value))
+- WRITE_ONCE(uc_rq->value, uc_se->value);
++ if (uc_se->value > uclamp_rq_get(rq, clamp_id))
++ uclamp_rq_set(rq, clamp_id, uc_se->value);
+ }
+
+ /*
+@@ -1579,7 +1580,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
+ if (likely(bucket->tasks))
+ return;
+
+- rq_clamp = READ_ONCE(uc_rq->value);
++ rq_clamp = uclamp_rq_get(rq, clamp_id);
+ /*
+ * Defensive programming: this should never happen. If it happens,
+ * e.g. due to future modification, warn and fixup the expected value.
+@@ -1587,7 +1588,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
+ SCHED_WARN_ON(bucket->value > rq_clamp);
+ if (bucket->value >= rq_clamp) {
+ bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
+- WRITE_ONCE(uc_rq->value, bkt_clamp);
++ uclamp_rq_set(rq, clamp_id, bkt_clamp);
+ }
+ }
+
+@@ -1914,7 +1915,7 @@ static void __init init_uclamp_rq(struct rq *rq)
+ };
+ }
+
+- rq->uclamp_flags = 0;
++ rq->uclamp_flags = UCLAMP_FLAG_IDLE;
+ }
+
+ static void __init init_uclamp(void)
+@@ -1998,6 +1999,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
+
+ void activate_task(struct rq *rq, struct task_struct *p, int flags)
+ {
++ if (task_on_rq_migrating(p))
++ flags |= ENQUEUE_MIGRATED;
++
+ enqueue_task(rq, p, flags);
+
+ p->on_rq = TASK_ON_RQ_QUEUED;
+@@ -2500,14 +2504,43 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
+ int node)
+ {
+- if (!src->user_cpus_ptr)
++ cpumask_t *user_mask;
++ unsigned long flags;
++
++ /*
++ * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's
++ * may differ by now due to racing.
++ */
++ dst->user_cpus_ptr = NULL;
++
++ /*
++ * This check is racy and losing the race is a valid situation.
++ * It is not worth the extra overhead of taking the pi_lock on
++ * every fork/clone.
++ */
++ if (data_race(!src->user_cpus_ptr))
+ return 0;
+
+- dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
+- if (!dst->user_cpus_ptr)
++ user_mask = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
++ if (!user_mask)
+ return -ENOMEM;
+
+- cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
++ /*
++ * Use pi_lock to protect content of user_cpus_ptr
++ *
++ * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent
++ * do_set_cpus_allowed().
++ */
++ raw_spin_lock_irqsave(&src->pi_lock, flags);
++ if (src->user_cpus_ptr) {
++ swap(dst->user_cpus_ptr, user_mask);
++ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
++ }
++ raw_spin_unlock_irqrestore(&src->pi_lock, flags);
++
++ if (unlikely(user_mask))
++ kfree(user_mask);
++
+ return 0;
+ }
+
+@@ -3489,11 +3522,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
+ #ifdef CONFIG_SMP
+ if (cpu == rq->cpu) {
+ __schedstat_inc(rq->ttwu_local);
+- __schedstat_inc(p->se.statistics.nr_wakeups_local);
++ __schedstat_inc(p->stats.nr_wakeups_local);
+ } else {
+ struct sched_domain *sd;
+
+- __schedstat_inc(p->se.statistics.nr_wakeups_remote);
++ __schedstat_inc(p->stats.nr_wakeups_remote);
+ rcu_read_lock();
+ for_each_domain(rq->cpu, sd) {
+ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
+@@ -3505,14 +3538,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
+ }
+
+ if (wake_flags & WF_MIGRATED)
+- __schedstat_inc(p->se.statistics.nr_wakeups_migrate);
++ __schedstat_inc(p->stats.nr_wakeups_migrate);
+ #endif /* CONFIG_SMP */
+
+ __schedstat_inc(rq->ttwu_count);
+- __schedstat_inc(p->se.statistics.nr_wakeups);
++ __schedstat_inc(p->stats.nr_wakeups);
+
+ if (wake_flags & WF_SYNC)
+- __schedstat_inc(p->se.statistics.nr_wakeups_sync);
++ __schedstat_inc(p->stats.nr_wakeups_sync);
+ }
+
+ /*
+@@ -3707,10 +3740,13 @@ out:
+
+ bool cpus_share_cache(int this_cpu, int that_cpu)
+ {
++ if (this_cpu == that_cpu)
++ return true;
++
+ return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
+ }
+
+-static inline bool ttwu_queue_cond(int cpu, int wake_flags)
++static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
+ {
+ /*
+ * Do not complicate things with the async wake_list while the CPU is
+@@ -3719,6 +3755,10 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+ if (!cpu_active(cpu))
+ return false;
+
++ /* Ensure the task will still be allowed to run on the CPU. */
++ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
++ return false;
++
+ /*
+ * If the CPU does not share cache, then queue the task on the
+ * remote rqs wakelist to avoid accessing remote data.
+@@ -3726,13 +3766,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+ if (!cpus_share_cache(smp_processor_id(), cpu))
+ return true;
+
++ if (cpu == smp_processor_id())
++ return false;
++
+ /*
+- * If the task is descheduling and the only running task on the
+- * CPU then use the wakelist to offload the task activation to
+- * the soon-to-be-idle CPU as the current CPU is likely busy.
+- * nr_running is checked to avoid unnecessary task stacking.
++ * If the wakee cpu is idle, or the task is descheduling and the
++ * only running task on the CPU, then use the wakelist to offload
++ * the task activation to the idle (or soon-to-be-idle) CPU as
++ * the current CPU is likely busy. nr_running is checked to
++ * avoid unnecessary task stacking.
++ *
++ * Note that we can only get here with (wakee) p->on_rq=0,
++ * p->on_cpu can be whatever, we've done the dequeue, so
++ * the wakee has been accounted out of ->nr_running.
+ */
+- if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
++ if (!cpu_rq(cpu)->nr_running)
+ return true;
+
+ return false;
+@@ -3740,10 +3788,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+
+ static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
+ {
+- if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
+- if (WARN_ON_ONCE(cpu == smp_processor_id()))
+- return false;
+-
++ if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) {
+ sched_clock_cpu(cpu); /* Sync clocks across CPUs */
+ __ttwu_queue_wakelist(p, cpu, wake_flags);
+ return true;
+@@ -4065,7 +4110,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+ * scheduling.
+ */
+ if (smp_load_acquire(&p->on_cpu) &&
+- ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
++ ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
+ goto unlock;
+
+ /*
+@@ -4196,7 +4241,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+
+ #ifdef CONFIG_SCHEDSTATS
+ /* Even if schedstat is disabled, there should not be garbage */
+- memset(&p->se.statistics, 0, sizeof(p->se.statistics));
++ memset(&p->stats, 0, sizeof(p->stats));
+ #endif
+
+ RB_CLEAR_NODE(&p->dl.rb_node);
+@@ -4328,8 +4373,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
+ */
+ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ {
+- unsigned long flags;
+-
+ __sched_fork(clone_flags, p);
+ /*
+ * We mark the process as NEW here. This guarantees that
+@@ -4375,23 +4418,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+
+ init_entity_runnable_average(&p->se);
+
+- /*
+- * The child is not yet in the pid-hash so no cgroup attach races,
+- * and the cgroup is pinned to this child due to cgroup_fork()
+- * is ran before sched_fork().
+- *
+- * Silence PROVE_RCU.
+- */
+- raw_spin_lock_irqsave(&p->pi_lock, flags);
+- rseq_migrate(p);
+- /*
+- * We're setting the CPU for the first time, we don't migrate,
+- * so use __set_task_cpu().
+- */
+- __set_task_cpu(p, smp_processor_id());
+- if (p->sched_class->task_fork)
+- p->sched_class->task_fork(p);
+- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+ #ifdef CONFIG_SCHED_INFO
+ if (likely(sched_info_on()))
+@@ -4408,6 +4434,35 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ return 0;
+ }
+
++void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
++{
++ unsigned long flags;
++
++ /*
++ * Because we're not yet on the pid-hash, p->pi_lock isn't strictly
++ * required yet, but lockdep gets upset if rules are violated.
++ */
++ raw_spin_lock_irqsave(&p->pi_lock, flags);
++#ifdef CONFIG_CGROUP_SCHED
++ if (1) {
++ struct task_group *tg;
++ tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
++ struct task_group, css);
++ tg = autogroup_task_group(p, tg);
++ p->sched_task_group = tg;
++ }
++#endif
++ rseq_migrate(p);
++ /*
++ * We're setting the CPU for the first time, we don't migrate,
++ * so use __set_task_cpu().
++ */
++ __set_task_cpu(p, smp_processor_id());
++ if (p->sched_class->task_fork)
++ p->sched_class->task_fork(p);
++ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++}
++
+ void sched_post_fork(struct task_struct *p)
+ {
+ uclamp_post_fork(p);
+@@ -4571,7 +4626,8 @@ static inline void prepare_task(struct task_struct *next)
+ * Claim the task as running, we do this before switching to it
+ * such that any running task will have this set.
+ *
+- * See the ttwu() WF_ON_CPU case and its ordering comment.
++ * See the smp_load_acquire(&p->on_cpu) case in ttwu() and
++ * its ordering comment.
+ */
+ WRITE_ONCE(next->on_cpu, 1);
+ #endif
+@@ -4616,25 +4672,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
+
+ static void balance_push(struct rq *rq);
+
++/*
++ * balance_push_callback is a right abuse of the callback interface and plays
++ * by significantly different rules.
++ *
++ * Where the normal balance_callback's purpose is to be ran in the same context
++ * that queued it (only later, when it's safe to drop rq->lock again),
++ * balance_push_callback is specifically targeted at __schedule().
++ *
++ * This abuse is tolerated because it places all the unlikely/odd cases behind
++ * a single test, namely: rq->balance_callback == NULL.
++ */
+ struct callback_head balance_push_callback = {
+ .next = NULL,
+ .func = (void (*)(struct callback_head *))balance_push,
+ };
+
+-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
++static inline struct callback_head *
++__splice_balance_callbacks(struct rq *rq, bool split)
+ {
+ struct callback_head *head = rq->balance_callback;
+
++ if (likely(!head))
++ return NULL;
++
+ lockdep_assert_rq_held(rq);
+- if (head)
++ /*
++ * Must not take balance_push_callback off the list when
++ * splice_balance_callbacks() and balance_callbacks() are not
++ * in the same rq->lock section.
++ *
++ * In that case it would be possible for __schedule() to interleave
++ * and observe the list empty.
++ */
++ if (split && head == &balance_push_callback)
++ head = NULL;
++ else
+ rq->balance_callback = NULL;
+
+ return head;
+ }
+
++static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
++{
++ return __splice_balance_callbacks(rq, true);
++}
++
+ static void __balance_callbacks(struct rq *rq)
+ {
+- do_balance_callbacks(rq, splice_balance_callbacks(rq));
++ do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
+ }
+
+ static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+@@ -5477,8 +5563,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
+ pr_err("Preemption disabled at:");
+ print_ip_sym(KERN_ERR, preempt_disable_ip);
+ }
+- if (panic_on_warn)
+- panic("scheduling while atomic\n");
++ check_panic_on_warn("scheduling while atomic");
+
+ dump_stack();
+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+@@ -5913,7 +5998,7 @@ static bool try_steal_cookie(int this, int that)
+ if (p == src->core_pick || p == src->curr)
+ goto next;
+
+- if (!cpumask_test_cpu(this, &p->cpus_mask))
++ if (!is_cpu_allowed(p, this))
+ goto next;
+
+ if (p->core_occupation > dst->idle->core_occupation)
+@@ -6335,8 +6420,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
+ preempt_enable_no_resched();
+ }
+
+- if (tsk_is_pi_blocked(tsk))
+- return;
++ /*
++ * spinlock and rwlock must not flush block requests. This will
++ * deadlock if the callback attempts to acquire a lock which is
++ * already acquired.
++ */
++ SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT);
+
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+@@ -6656,11 +6745,11 @@ static int __init setup_preempt_mode(char *str)
+ int mode = sched_dynamic_mode(str);
+ if (mode < 0) {
+ pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
+- return 1;
++ return 0;
+ }
+
+ sched_dynamic_update(mode);
+- return 0;
++ return 1;
+ }
+ __setup("preempt=", setup_preempt_mode);
+
+@@ -7220,6 +7309,7 @@ static int __sched_setscheduler(struct task_struct *p,
+ int reset_on_fork;
+ int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+ struct rq *rq;
++ bool cpuset_locked = false;
+
+ /* The pi code expects interrupts enabled */
+ BUG_ON(pi && in_interrupt());
+@@ -7316,8 +7406,14 @@ recheck:
+ return retval;
+ }
+
+- if (pi)
+- cpuset_read_lock();
++ /*
++ * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
++ * information.
++ */
++ if (dl_policy(policy) || dl_policy(p->policy)) {
++ cpuset_locked = true;
++ cpuset_lock();
++ }
+
+ /*
+ * Make sure no PI-waiters arrive (or leave) while we are
+@@ -7393,8 +7489,8 @@ change:
+ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
+ policy = oldpolicy = -1;
+ task_rq_unlock(rq, p, &rf);
+- if (pi)
+- cpuset_read_unlock();
++ if (cpuset_locked)
++ cpuset_unlock();
+ goto recheck;
+ }
+
+@@ -7461,7 +7557,8 @@ change:
+ task_rq_unlock(rq, p, &rf);
+
+ if (pi) {
+- cpuset_read_unlock();
++ if (cpuset_locked)
++ cpuset_unlock();
+ rt_mutex_adjust_pi(p);
+ }
+
+@@ -7473,8 +7570,8 @@ change:
+
+ unlock:
+ task_rq_unlock(rq, p, &rf);
+- if (pi)
+- cpuset_read_unlock();
++ if (cpuset_locked)
++ cpuset_unlock();
+ return retval;
+ }
+
+@@ -8096,14 +8193,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+ if (len & (sizeof(unsigned long)-1))
+ return -EINVAL;
+
+- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
++ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = sched_getaffinity(pid, mask);
+ if (ret == 0) {
+ unsigned int retlen = min(len, cpumask_size());
+
+- if (copy_to_user(user_mask_ptr, mask, retlen))
++ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
+ ret = -EFAULT;
+ else
+ ret = retlen;
+@@ -8195,9 +8292,7 @@ int __cond_resched_lock(spinlock_t *lock)
+
+ if (spin_needbreak(lock) || resched) {
+ spin_unlock(lock);
+- if (resched)
+- preempt_schedule_common();
+- else
++ if (!_cond_resched())
+ cpu_relax();
+ ret = 1;
+ spin_lock(lock);
+@@ -8215,9 +8310,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock)
+
+ if (rwlock_needbreak(lock) || resched) {
+ read_unlock(lock);
+- if (resched)
+- preempt_schedule_common();
+- else
++ if (!_cond_resched())
+ cpu_relax();
+ ret = 1;
+ read_lock(lock);
+@@ -8235,9 +8328,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
+
+ if (rwlock_needbreak(lock) || resched) {
+ write_unlock(lock);
+- if (resched)
+- preempt_schedule_common();
+- else
++ if (!_cond_resched())
+ cpu_relax();
+ ret = 1;
+ write_lock(lock);
+@@ -8637,9 +8728,6 @@ void __init init_idle(struct task_struct *idle, int cpu)
+ idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY;
+ kthread_set_per_cpu(idle, cpu);
+
+- scs_task_reset(idle);
+- kasan_unpoison_task_stack(idle);
+-
+ #ifdef CONFIG_SMP
+ /*
+ * It's possible that init_idle() gets called multiple times on a task,
+@@ -8701,8 +8789,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
+ return ret;
+ }
+
+-int task_can_attach(struct task_struct *p,
+- const struct cpumask *cs_cpus_allowed)
++int task_can_attach(struct task_struct *p)
+ {
+ int ret = 0;
+
+@@ -8715,16 +8802,9 @@ int task_can_attach(struct task_struct *p,
+ * success of set_cpus_allowed_ptr() on all attached tasks
+ * before cpus_mask may be changed.
+ */
+- if (p->flags & PF_NO_SETAFFINITY) {
++ if (p->flags & PF_NO_SETAFFINITY)
+ ret = -EINVAL;
+- goto out;
+- }
+-
+- if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
+- cs_cpus_allowed))
+- ret = dl_task_can_attach(p, cs_cpus_allowed);
+
+-out:
+ return ret;
+ }
+
+@@ -8795,7 +8875,6 @@ void idle_task_exit(void)
+ finish_arch_post_lock_switch();
+ }
+
+- scs_task_reset(current);
+ /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
+ }
+
+@@ -9007,8 +9086,10 @@ static void cpuset_cpu_active(void)
+ static int cpuset_cpu_inactive(unsigned int cpu)
+ {
+ if (!cpuhp_tasks_frozen) {
+- if (dl_cpu_busy(cpu))
+- return -EBUSY;
++ int ret = dl_bw_check_overflow(cpu);
++
++ if (ret)
++ return ret;
+ cpuset_update_active_cpus();
+ } else {
+ num_cpus_frozen++;
+@@ -9620,9 +9701,9 @@ void normalize_rt_tasks(void)
+ continue;
+
+ p->se.exec_start = 0;
+- schedstat_set(p->se.statistics.wait_start, 0);
+- schedstat_set(p->se.statistics.sleep_start, 0);
+- schedstat_set(p->se.statistics.block_start, 0);
++ schedstat_set(p->stats.wait_start, 0);
++ schedstat_set(p->stats.sleep_start, 0);
++ schedstat_set(p->stats.block_start, 0);
+
+ if (!dl_task(p) && !rt_task(p)) {
+ /*
+@@ -9716,6 +9797,22 @@ static void sched_free_group(struct task_group *tg)
+ kmem_cache_free(task_group_cache, tg);
+ }
+
++static void sched_free_group_rcu(struct rcu_head *rcu)
++{
++ sched_free_group(container_of(rcu, struct task_group, rcu));
++}
++
++static void sched_unregister_group(struct task_group *tg)
++{
++ unregister_fair_sched_group(tg);
++ unregister_rt_sched_group(tg);
++ /*
++ * We have to wait for yet another RCU grace period to expire, as
++ * print_cfs_stats() might run concurrently.
++ */
++ call_rcu(&tg->rcu, sched_free_group_rcu);
++}
++
+ /* allocate runqueue etc for a new task group */
+ struct task_group *sched_create_group(struct task_group *parent)
+ {
+@@ -9759,25 +9856,35 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
+ }
+
+ /* rcu callback to free various structures associated with a task group */
+-static void sched_free_group_rcu(struct rcu_head *rhp)
++static void sched_unregister_group_rcu(struct rcu_head *rhp)
+ {
+ /* Now it should be safe to free those cfs_rqs: */
+- sched_free_group(container_of(rhp, struct task_group, rcu));
++ sched_unregister_group(container_of(rhp, struct task_group, rcu));
+ }
+
+ void sched_destroy_group(struct task_group *tg)
+ {
+ /* Wait for possible concurrent references to cfs_rqs complete: */
+- call_rcu(&tg->rcu, sched_free_group_rcu);
++ call_rcu(&tg->rcu, sched_unregister_group_rcu);
+ }
+
+-void sched_offline_group(struct task_group *tg)
++void sched_release_group(struct task_group *tg)
+ {
+ unsigned long flags;
+
+- /* End participation in shares distribution: */
+- unregister_fair_sched_group(tg);
+-
++ /*
++ * Unlink first, to avoid walk_tg_tree_from() from finding us (via
++ * sched_cfs_period_timer()).
++ *
++ * For this to be effective, we have to wait for all pending users of
++ * this task group to leave their RCU critical section to ensure no new
++ * user will see our dying task group any more. Specifically ensure
++ * that tg_unthrottle_up() won't add decayed cfs_rq's to it.
++ *
++ * We therefore defer calling unregister_fair_sched_group() to
++ * sched_unregister_group() which is guarantied to get called only after the
++ * current RCU grace period has expired.
++ */
+ spin_lock_irqsave(&task_group_lock, flags);
+ list_del_rcu(&tg->list);
+ list_del_rcu(&tg->siblings);
+@@ -9896,7 +10003,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
+ {
+ struct task_group *tg = css_tg(css);
+
+- sched_offline_group(tg);
++ sched_release_group(tg);
+ }
+
+ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
+@@ -9906,7 +10013,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
+ /*
+ * Relies on the RCU grace period between css_released() and this.
+ */
+- sched_free_group(tg);
++ sched_unregister_group(tg);
+ }
+
+ /*
+@@ -10464,11 +10571,14 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
+ seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
+
+ if (schedstat_enabled() && tg != &root_task_group) {
++ struct sched_statistics *stats;
+ u64 ws = 0;
+ int i;
+
+- for_each_possible_cpu(i)
+- ws += schedstat_val(tg->se[i]->statistics.wait_sum);
++ for_each_possible_cpu(i) {
++ stats = __schedstats_from_se(tg->se[i]);
++ ws += schedstat_val(stats->wait_sum);
++ }
+
+ seq_printf(sf, "wait_sum %llu\n", ws);
+ }
+diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
+index 893eece65bfda..cacc2076ad214 100644
+--- a/kernel/sched/cpuacct.c
++++ b/kernel/sched/cpuacct.c
+@@ -21,15 +21,11 @@ static const char * const cpuacct_stat_desc[] = {
+ [CPUACCT_STAT_SYSTEM] = "system",
+ };
+
+-struct cpuacct_usage {
+- u64 usages[CPUACCT_STAT_NSTATS];
+-};
+-
+ /* track CPU usage of a group of tasks and its child groups */
+ struct cpuacct {
+ struct cgroup_subsys_state css;
+ /* cpuusage holds pointer to a u64-type object on every CPU */
+- struct cpuacct_usage __percpu *cpuusage;
++ u64 __percpu *cpuusage;
+ struct kernel_cpustat __percpu *cpustat;
+ };
+
+@@ -49,7 +45,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca)
+ return css_ca(ca->css.parent);
+ }
+
+-static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
++static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
+ static struct cpuacct root_cpuacct = {
+ .cpustat = &kernel_cpustat,
+ .cpuusage = &root_cpuacct_cpuusage,
+@@ -68,7 +64,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
+ if (!ca)
+ goto out;
+
+- ca->cpuusage = alloc_percpu(struct cpuacct_usage);
++ ca->cpuusage = alloc_percpu(u64);
+ if (!ca->cpuusage)
+ goto out_free_ca;
+
+@@ -99,7 +95,8 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
+ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
+ enum cpuacct_stat_index index)
+ {
+- struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
++ u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
++ u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
+ u64 data;
+
+ /*
+@@ -115,14 +112,17 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
+ raw_spin_rq_lock_irq(cpu_rq(cpu));
+ #endif
+
+- if (index == CPUACCT_STAT_NSTATS) {
+- int i = 0;
+-
+- data = 0;
+- for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+- data += cpuusage->usages[i];
+- } else {
+- data = cpuusage->usages[index];
++ switch (index) {
++ case CPUACCT_STAT_USER:
++ data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
++ break;
++ case CPUACCT_STAT_SYSTEM:
++ data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
++ cpustat[CPUTIME_SOFTIRQ];
++ break;
++ case CPUACCT_STAT_NSTATS:
++ data = *cpuusage;
++ break;
+ }
+
+ #ifndef CONFIG_64BIT
+@@ -132,10 +132,14 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
+ return data;
+ }
+
+-static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
++static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
+ {
+- struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+- int i;
++ u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
++ u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
++
++ /* Don't allow to reset global kernel_cpustat */
++ if (ca == &root_cpuacct)
++ return;
+
+ #ifndef CONFIG_64BIT
+ /*
+@@ -143,9 +147,10 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+ */
+ raw_spin_rq_lock_irq(cpu_rq(cpu));
+ #endif
+-
+- for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+- cpuusage->usages[i] = val;
++ *cpuusage = 0;
++ cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
++ cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
++ cpustat[CPUTIME_SOFTIRQ] = 0;
+
+ #ifndef CONFIG_64BIT
+ raw_spin_rq_unlock_irq(cpu_rq(cpu));
+@@ -196,7 +201,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
+ return -EINVAL;
+
+ for_each_possible_cpu(cpu)
+- cpuacct_cpuusage_write(ca, cpu, 0);
++ cpuacct_cpuusage_write(ca, cpu);
+
+ return 0;
+ }
+@@ -243,25 +248,10 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V)
+ seq_puts(m, "\n");
+
+ for_each_possible_cpu(cpu) {
+- struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+-
+ seq_printf(m, "%d", cpu);
+-
+- for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
+-#ifndef CONFIG_64BIT
+- /*
+- * Take rq->lock to make 64-bit read safe on 32-bit
+- * platforms.
+- */
+- raw_spin_rq_lock_irq(cpu_rq(cpu));
+-#endif
+-
+- seq_printf(m, " %llu", cpuusage->usages[index]);
+-
+-#ifndef CONFIG_64BIT
+- raw_spin_rq_unlock_irq(cpu_rq(cpu));
+-#endif
+- }
++ for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
++ seq_printf(m, " %llu",
++ cpuacct_cpuusage_read(ca, cpu, index));
+ seq_puts(m, "\n");
+ }
+ return 0;
+@@ -338,17 +328,13 @@ static struct cftype files[] = {
+ */
+ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
+ {
++ unsigned int cpu = task_cpu(tsk);
+ struct cpuacct *ca;
+- int index = CPUACCT_STAT_SYSTEM;
+- struct pt_regs *regs = get_irq_regs() ? : task_pt_regs(tsk);
+-
+- if (regs && user_mode(regs))
+- index = CPUACCT_STAT_USER;
+
+ rcu_read_lock();
+
+ for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
+- __this_cpu_add(ca->cpuusage->usages[index], cputime);
++ *per_cpu_ptr(ca->cpuusage, cpu) += cputime;
+
+ rcu_read_unlock();
+ }
+diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
+index ceb03d76c0ccd..221ca10505738 100644
+--- a/kernel/sched/cpudeadline.c
++++ b/kernel/sched/cpudeadline.c
+@@ -124,7 +124,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
+ unsigned long cap, max_cap = 0;
+ int cpu, max_cpu = -1;
+
+- if (!static_branch_unlikely(&sched_asym_cpucapacity))
++ if (!sched_asym_cpucap_active())
+ return 1;
+
+ /* Ensure the capacity of the CPUs fits the task. */
+diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
+index e7af18857371e..7f6bb37d3a2f7 100644
+--- a/kernel/sched/cpufreq_schedutil.c
++++ b/kernel/sched/cpufreq_schedutil.c
+@@ -289,6 +289,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
+ * into the same scale so we can compare.
+ */
+ boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
++ boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
+ if (sg_cpu->util < boost)
+ sg_cpu->util = boost;
+ }
+diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
+index 872e481d5098c..042a6dbce8f32 100644
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -148,10 +148,10 @@ void account_guest_time(struct task_struct *p, u64 cputime)
+
+ /* Add guest time to cpustat. */
+ if (task_nice(p) > 0) {
+- cpustat[CPUTIME_NICE] += cputime;
++ task_group_account_field(p, CPUTIME_NICE, cputime);
+ cpustat[CPUTIME_GUEST_NICE] += cputime;
+ } else {
+- cpustat[CPUTIME_USER] += cputime;
++ task_group_account_field(p, CPUTIME_USER, cputime);
+ cpustat[CPUTIME_GUEST] += cputime;
+ }
+ }
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index e94314633b39d..de45e4d2c61fa 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -17,6 +17,7 @@
+ */
+ #include "sched.h"
+ #include "pelt.h"
++#include <linux/cpuset.h>
+
+ struct dl_bandwidth def_dl_bandwidth;
+
+@@ -112,7 +113,7 @@ static inline unsigned long __dl_bw_capacity(int i)
+ */
+ static inline unsigned long dl_bw_capacity(int i)
+ {
+- if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
++ if (!sched_asym_cpucap_active() &&
+ capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
+ return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
+ } else {
+@@ -1265,8 +1266,8 @@ static void update_curr_dl(struct rq *rq)
+ return;
+ }
+
+- schedstat_set(curr->se.statistics.exec_max,
+- max(curr->se.statistics.exec_max, delta_exec));
++ schedstat_set(curr->stats.exec_max,
++ max(curr->stats.exec_max, delta_exec));
+
+ curr->se.sum_exec_runtime += delta_exec;
+ account_group_exec_runtime(curr, delta_exec);
+@@ -1561,7 +1562,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+ * the throttle.
+ */
+ p->dl.dl_throttled = 0;
+- BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
++ if (!(flags & ENQUEUE_REPLENISH))
++ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
++ task_pid_nr(p));
++
+ return;
+ }
+
+@@ -1700,7 +1704,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
+ * Take the capacity of the CPU into account to
+ * ensure it fits the requirement of the task.
+ */
+- if (static_branch_unlikely(&sched_asym_cpucapacity))
++ if (sched_asym_cpucap_active())
+ select_rq |= !dl_task_fits_capacity(p, cpu);
+
+ if (select_rq) {
+@@ -1720,6 +1724,7 @@ out:
+
+ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
+ {
++ struct rq_flags rf;
+ struct rq *rq;
+
+ if (READ_ONCE(p->__state) != TASK_WAKING)
+@@ -1731,7 +1736,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
+ * from try_to_wake_up(). Hence, p->pi_lock is locked, but
+ * rq->lock is not... So, lock it
+ */
+- raw_spin_rq_lock(rq);
++ rq_lock(rq, &rf);
+ if (p->dl.dl_non_contending) {
+ update_rq_clock(rq);
+ sub_running_bw(&p->dl, &rq->dl);
+@@ -1747,7 +1752,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
+ put_task_struct(p);
+ }
+ sub_rq_bw(&p->dl, &rq->dl);
+- raw_spin_rq_unlock(rq);
++ rq_unlock(rq, &rf);
+ }
+
+ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
+@@ -1842,8 +1847,7 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
+ deadline_queue_push_tasks(rq);
+ }
+
+-static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
+- struct dl_rq *dl_rq)
++static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
+ {
+ struct rb_node *left = rb_first_cached(&dl_rq->root);
+
+@@ -1862,7 +1866,7 @@ static struct task_struct *pick_task_dl(struct rq *rq)
+ if (!sched_dl_runnable(rq))
+ return NULL;
+
+- dl_se = pick_next_dl_entity(rq, dl_rq);
++ dl_se = pick_next_dl_entity(dl_rq);
+ BUG_ON(!dl_se);
+ p = dl_task_of(dl_se);
+
+@@ -2081,6 +2085,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
+ !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
+ task_running(rq, task) ||
+ !dl_task(task) ||
++ is_migration_disabled(task) ||
+ !task_on_rq_queued(task))) {
+ double_unlock_balance(rq, later_rq);
+ later_rq = NULL;
+@@ -2145,12 +2150,6 @@ static int push_dl_task(struct rq *rq)
+ return 0;
+
+ retry:
+- if (is_migration_disabled(next_task))
+- return 0;
+-
+- if (WARN_ON(next_task == rq->curr))
+- return 0;
+-
+ /*
+ * If next_task preempts rq->curr, and rq->curr
+ * can move away, it makes sense to just reschedule
+@@ -2163,6 +2162,12 @@ retry:
+ return 0;
+ }
+
++ if (is_migration_disabled(next_task))
++ return 0;
++
++ if (WARN_ON(next_task == rq->curr))
++ return 0;
++
+ /* We might release rq lock */
+ get_task_struct(next_task);
+
+@@ -2442,6 +2447,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
+ if (task_on_rq_queued(p) && p->dl.dl_runtime)
+ task_non_contending(p);
+
++ /*
++ * In case a task is setscheduled out from SCHED_DEADLINE we need to
++ * keep track of that on its cpuset (for correct bandwidth tracking).
++ */
++ dec_dl_tasks_cs(p);
++
+ if (!task_on_rq_queued(p)) {
+ /*
+ * Inactive timer is armed. However, p is leaving DEADLINE and
+@@ -2482,6 +2493,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
+ if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+ put_task_struct(p);
+
++ /*
++ * In case a task is setscheduled to SCHED_DEADLINE we need to keep
++ * track of that on its cpuset (for correct bandwidth tracking).
++ */
++ inc_dl_tasks_cs(p);
++
+ /* If p is not queued we will update its parameters at next wakeup. */
+ if (!task_on_rq_queued(p)) {
+ add_rq_bw(&p->dl, &rq->dl);
+@@ -2860,41 +2877,6 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
+ }
+
+ #ifdef CONFIG_SMP
+-int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
+-{
+- unsigned long flags, cap;
+- unsigned int dest_cpu;
+- struct dl_bw *dl_b;
+- bool overflow;
+- int ret;
+-
+- dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
+-
+- rcu_read_lock_sched();
+- dl_b = dl_bw_of(dest_cpu);
+- raw_spin_lock_irqsave(&dl_b->lock, flags);
+- cap = dl_bw_capacity(dest_cpu);
+- overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
+- if (overflow) {
+- ret = -EBUSY;
+- } else {
+- /*
+- * We reserve space for this task in the destination
+- * root_domain, as we can't fail after this point.
+- * We will free resources in the source root_domain
+- * later on (see set_cpus_allowed_dl()).
+- */
+- int cpus = dl_bw_cpus(dest_cpu);
+-
+- __dl_add(dl_b, p->dl.dl_bw, cpus);
+- ret = 0;
+- }
+- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+- rcu_read_unlock_sched();
+-
+- return ret;
+-}
+-
+ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
+ const struct cpumask *trial)
+ {
+@@ -2916,21 +2898,59 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
+ return ret;
+ }
+
+-bool dl_cpu_busy(unsigned int cpu)
++enum dl_bw_request {
++ dl_bw_req_check_overflow = 0,
++ dl_bw_req_alloc,
++ dl_bw_req_free
++};
++
++static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
+ {
+- unsigned long flags, cap;
++ unsigned long flags;
+ struct dl_bw *dl_b;
+- bool overflow;
++ bool overflow = 0;
+
+ rcu_read_lock_sched();
+ dl_b = dl_bw_of(cpu);
+ raw_spin_lock_irqsave(&dl_b->lock, flags);
+- cap = dl_bw_capacity(cpu);
+- overflow = __dl_overflow(dl_b, cap, 0, 0);
++
++ if (req == dl_bw_req_free) {
++ __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
++ } else {
++ unsigned long cap = dl_bw_capacity(cpu);
++
++ overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
++
++ if (req == dl_bw_req_alloc && !overflow) {
++ /*
++ * We reserve space in the destination
++ * root_domain, as we can't fail after this point.
++ * We will free resources in the source root_domain
++ * later on (see set_cpus_allowed_dl()).
++ */
++ __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
++ }
++ }
++
+ raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+ rcu_read_unlock_sched();
+
+- return overflow;
++ return overflow ? -EBUSY : 0;
++}
++
++int dl_bw_check_overflow(int cpu)
++{
++ return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
++}
++
++int dl_bw_alloc(int cpu, u64 dl_bw)
++{
++ return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
++}
++
++void dl_bw_free(int cpu, u64 dl_bw)
++{
++ dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
+ }
+ #endif
+
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index 17a653b67006a..5ffe16ff039fa 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -416,7 +416,7 @@ void update_sched_domain_debugfs(void)
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "cpu%d", cpu);
+- debugfs_remove(debugfs_lookup(buf, sd_dentry));
++ debugfs_lookup_and_remove(buf, sd_dentry);
+ d_cpu = debugfs_create_dir(buf, sd_dentry);
+
+ i = 0;
+@@ -448,9 +448,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
+ struct sched_entity *se = tg->se[cpu];
+
+ #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
+-#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
++#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
++ #F, (long long)schedstat_val(stats->F))
+ #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+-#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
++#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
++ #F, SPLIT_NS((long long)schedstat_val(stats->F)))
+
+ if (!se)
+ return;
+@@ -460,16 +462,19 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
+ PN(se->sum_exec_runtime);
+
+ if (schedstat_enabled()) {
+- PN_SCHEDSTAT(se->statistics.wait_start);
+- PN_SCHEDSTAT(se->statistics.sleep_start);
+- PN_SCHEDSTAT(se->statistics.block_start);
+- PN_SCHEDSTAT(se->statistics.sleep_max);
+- PN_SCHEDSTAT(se->statistics.block_max);
+- PN_SCHEDSTAT(se->statistics.exec_max);
+- PN_SCHEDSTAT(se->statistics.slice_max);
+- PN_SCHEDSTAT(se->statistics.wait_max);
+- PN_SCHEDSTAT(se->statistics.wait_sum);
+- P_SCHEDSTAT(se->statistics.wait_count);
++ struct sched_statistics *stats;
++ stats = __schedstats_from_se(se);
++
++ PN_SCHEDSTAT(wait_start);
++ PN_SCHEDSTAT(sleep_start);
++ PN_SCHEDSTAT(block_start);
++ PN_SCHEDSTAT(sleep_max);
++ PN_SCHEDSTAT(block_max);
++ PN_SCHEDSTAT(exec_max);
++ PN_SCHEDSTAT(slice_max);
++ PN_SCHEDSTAT(wait_max);
++ PN_SCHEDSTAT(wait_sum);
++ P_SCHEDSTAT(wait_count);
+ }
+
+ P(se->load.weight);
+@@ -536,9 +541,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
+ p->prio);
+
+ SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+- SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
++ SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
+ SPLIT_NS(p->se.sum_exec_runtime),
+- SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
++ SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));
+
+ #ifdef CONFIG_NUMA_BALANCING
+ SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
+@@ -921,25 +926,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
+ {
+ #ifdef CONFIG_NUMA_BALANCING
+- struct mempolicy *pol;
+-
+ if (p->mm)
+ P(mm->numa_scan_seq);
+
+- task_lock(p);
+- pol = p->mempolicy;
+- if (pol && !(pol->flags & MPOL_F_MORON))
+- pol = NULL;
+- mpol_get(pol);
+- task_unlock(p);
+-
+ P(numa_pages_migrated);
+ P(numa_preferred_nid);
+ P(total_numa_faults);
+ SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
+ task_node(p), task_numa_group_id(p));
+ show_numa_stats(p, m);
+- mpol_put(pol);
+ #endif
+ }
+
+@@ -954,8 +949,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ "---------------------------------------------------------"
+ "----------\n");
+
+-#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
+-#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
++#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F))
++#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
+
+ PN(se.exec_start);
+ PN(se.vruntime);
+@@ -968,33 +963,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ if (schedstat_enabled()) {
+ u64 avg_atom, avg_per_cpu;
+
+- PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
+- PN_SCHEDSTAT(se.statistics.wait_start);
+- PN_SCHEDSTAT(se.statistics.sleep_start);
+- PN_SCHEDSTAT(se.statistics.block_start);
+- PN_SCHEDSTAT(se.statistics.sleep_max);
+- PN_SCHEDSTAT(se.statistics.block_max);
+- PN_SCHEDSTAT(se.statistics.exec_max);
+- PN_SCHEDSTAT(se.statistics.slice_max);
+- PN_SCHEDSTAT(se.statistics.wait_max);
+- PN_SCHEDSTAT(se.statistics.wait_sum);
+- P_SCHEDSTAT(se.statistics.wait_count);
+- PN_SCHEDSTAT(se.statistics.iowait_sum);
+- P_SCHEDSTAT(se.statistics.iowait_count);
+- P_SCHEDSTAT(se.statistics.nr_migrations_cold);
+- P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
+- P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
+- P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
+- P_SCHEDSTAT(se.statistics.nr_forced_migrations);
+- P_SCHEDSTAT(se.statistics.nr_wakeups);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_local);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
+- P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
++ PN_SCHEDSTAT(sum_sleep_runtime);
++ PN_SCHEDSTAT(wait_start);
++ PN_SCHEDSTAT(sleep_start);
++ PN_SCHEDSTAT(block_start);
++ PN_SCHEDSTAT(sleep_max);
++ PN_SCHEDSTAT(block_max);
++ PN_SCHEDSTAT(exec_max);
++ PN_SCHEDSTAT(slice_max);
++ PN_SCHEDSTAT(wait_max);
++ PN_SCHEDSTAT(wait_sum);
++ P_SCHEDSTAT(wait_count);
++ PN_SCHEDSTAT(iowait_sum);
++ P_SCHEDSTAT(iowait_count);
++ P_SCHEDSTAT(nr_migrations_cold);
++ P_SCHEDSTAT(nr_failed_migrations_affine);
++ P_SCHEDSTAT(nr_failed_migrations_running);
++ P_SCHEDSTAT(nr_failed_migrations_hot);
++ P_SCHEDSTAT(nr_forced_migrations);
++ P_SCHEDSTAT(nr_wakeups);
++ P_SCHEDSTAT(nr_wakeups_sync);
++ P_SCHEDSTAT(nr_wakeups_migrate);
++ P_SCHEDSTAT(nr_wakeups_local);
++ P_SCHEDSTAT(nr_wakeups_remote);
++ P_SCHEDSTAT(nr_wakeups_affine);
++ P_SCHEDSTAT(nr_wakeups_affine_attempts);
++ P_SCHEDSTAT(nr_wakeups_passive);
++ P_SCHEDSTAT(nr_wakeups_idle);
+
+ avg_atom = p->se.sum_exec_runtime;
+ if (nr_switches)
+@@ -1060,7 +1055,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ void proc_sched_set_task(struct task_struct *p)
+ {
+ #ifdef CONFIG_SCHEDSTATS
+- memset(&p->se.statistics, 0, sizeof(p->se.statistics));
++ memset(&p->stats, 0, sizeof(p->stats));
+ #endif
+ }
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index f6a05d9b54436..646a6ae4b2509 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -837,8 +837,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
+
+ curr->exec_start = now;
+
+- schedstat_set(curr->statistics.exec_max,
+- max(delta_exec, curr->statistics.exec_max));
++ if (schedstat_enabled()) {
++ struct sched_statistics *stats;
++
++ stats = __schedstats_from_se(curr);
++ __schedstat_set(stats->exec_max,
++ max(delta_exec, stats->exec_max));
++ }
+
+ curr->sum_exec_runtime += delta_exec;
+ schedstat_add(cfs_rq->exec_clock, delta_exec);
+@@ -866,39 +871,45 @@ static inline void
+ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+ u64 wait_start, prev_wait_start;
++ struct sched_statistics *stats;
+
+ if (!schedstat_enabled())
+ return;
+
++ stats = __schedstats_from_se(se);
++
+ wait_start = rq_clock(rq_of(cfs_rq));
+- prev_wait_start = schedstat_val(se->statistics.wait_start);
++ prev_wait_start = schedstat_val(stats->wait_start);
+
+ if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
+ likely(wait_start > prev_wait_start))
+ wait_start -= prev_wait_start;
+
+- __schedstat_set(se->statistics.wait_start, wait_start);
++ __schedstat_set(stats->wait_start, wait_start);
+ }
+
+ static inline void
+ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+- struct task_struct *p;
++ struct sched_statistics *stats;
++ struct task_struct *p = NULL;
+ u64 delta;
+
+ if (!schedstat_enabled())
+ return;
+
++ stats = __schedstats_from_se(se);
++
+ /*
+ * When the sched_schedstat changes from 0 to 1, some sched se
+ * maybe already in the runqueue, the se->statistics.wait_start
+ * will be 0.So it will let the delta wrong. We need to avoid this
+ * scenario.
+ */
+- if (unlikely(!schedstat_val(se->statistics.wait_start)))
++ if (unlikely(!schedstat_val(stats->wait_start)))
+ return;
+
+- delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
++ delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(stats->wait_start);
+
+ if (entity_is_task(se)) {
+ p = task_of(se);
+@@ -908,30 +919,33 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ * time stamp can be adjusted to accumulate wait time
+ * prior to migration.
+ */
+- __schedstat_set(se->statistics.wait_start, delta);
++ __schedstat_set(stats->wait_start, delta);
+ return;
+ }
+ trace_sched_stat_wait(p, delta);
+ }
+
+- __schedstat_set(se->statistics.wait_max,
+- max(schedstat_val(se->statistics.wait_max), delta));
+- __schedstat_inc(se->statistics.wait_count);
+- __schedstat_add(se->statistics.wait_sum, delta);
+- __schedstat_set(se->statistics.wait_start, 0);
++ __schedstat_set(stats->wait_max,
++ max(schedstat_val(stats->wait_max), delta));
++ __schedstat_inc(stats->wait_count);
++ __schedstat_add(stats->wait_sum, delta);
++ __schedstat_set(stats->wait_start, 0);
+ }
+
+ static inline void
+ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
++ struct sched_statistics *stats;
+ struct task_struct *tsk = NULL;
+ u64 sleep_start, block_start;
+
+ if (!schedstat_enabled())
+ return;
+
+- sleep_start = schedstat_val(se->statistics.sleep_start);
+- block_start = schedstat_val(se->statistics.block_start);
++ stats = __schedstats_from_se(se);
++
++ sleep_start = schedstat_val(stats->sleep_start);
++ block_start = schedstat_val(stats->block_start);
+
+ if (entity_is_task(se))
+ tsk = task_of(se);
+@@ -942,11 +956,11 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ if ((s64)delta < 0)
+ delta = 0;
+
+- if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
+- __schedstat_set(se->statistics.sleep_max, delta);
++ if (unlikely(delta > schedstat_val(stats->sleep_max)))
++ __schedstat_set(stats->sleep_max, delta);
+
+- __schedstat_set(se->statistics.sleep_start, 0);
+- __schedstat_add(se->statistics.sum_sleep_runtime, delta);
++ __schedstat_set(stats->sleep_start, 0);
++ __schedstat_add(stats->sum_sleep_runtime, delta);
+
+ if (tsk) {
+ account_scheduler_latency(tsk, delta >> 10, 1);
+@@ -959,16 +973,16 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ if ((s64)delta < 0)
+ delta = 0;
+
+- if (unlikely(delta > schedstat_val(se->statistics.block_max)))
+- __schedstat_set(se->statistics.block_max, delta);
++ if (unlikely(delta > schedstat_val(stats->block_max)))
++ __schedstat_set(stats->block_max, delta);
+
+- __schedstat_set(se->statistics.block_start, 0);
+- __schedstat_add(se->statistics.sum_sleep_runtime, delta);
++ __schedstat_set(stats->block_start, 0);
++ __schedstat_add(stats->sum_sleep_runtime, delta);
+
+ if (tsk) {
+ if (tsk->in_iowait) {
+- __schedstat_add(se->statistics.iowait_sum, delta);
+- __schedstat_inc(se->statistics.iowait_count);
++ __schedstat_add(stats->iowait_sum, delta);
++ __schedstat_inc(stats->iowait_count);
+ trace_sched_stat_iowait(tsk, delta);
+ }
+
+@@ -1030,10 +1044,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ /* XXX racy against TTWU */
+ state = READ_ONCE(tsk->__state);
+ if (state & TASK_INTERRUPTIBLE)
+- __schedstat_set(se->statistics.sleep_start,
++ __schedstat_set(tsk->stats.sleep_start,
+ rq_clock(rq_of(cfs_rq)));
+ if (state & TASK_UNINTERRUPTIBLE)
+- __schedstat_set(se->statistics.block_start,
++ __schedstat_set(tsk->stats.block_start,
+ rq_clock(rq_of(cfs_rq)));
+ }
+ }
+@@ -3422,7 +3436,6 @@ void set_task_rq_fair(struct sched_entity *se,
+ se->avg.last_update_time = n_last_update_time;
+ }
+
+-
+ /*
+ * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
+ * propagate its contribution. The key to this propagation is the invariant
+@@ -3490,7 +3503,6 @@ void set_task_rq_fair(struct sched_entity *se,
+ * XXX: only do this for the part of runnable > running ?
+ *
+ */
+-
+ static inline void
+ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
+ {
+@@ -3722,7 +3734,19 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+
+ r = removed_util;
+ sub_positive(&sa->util_avg, r);
+- sa->util_sum = sa->util_avg * divider;
++ sub_positive(&sa->util_sum, r * divider);
++ /*
++ * Because of rounding, se->util_sum might ends up being +1 more than
++ * cfs->util_sum. Although this is not a problem by itself, detaching
++ * a lot of tasks with the rounding problem between 2 updates of
++ * util_avg (~1ms) can make cfs->util_sum becoming null whereas
++ * cfs_util_avg is not.
++ * Check that util_sum is still above its lower bound for the new
++ * util_avg. Given that period_contrib might have moved since the last
++ * sync, we are only sure that util_sum must be above or equal to
++ * util_avg * minimum possible divider
++ */
++ sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
+
+ r = removed_runnable;
+ sub_positive(&sa->runnable_avg, r);
+@@ -3784,11 +3808,11 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
+
+ se->avg.runnable_sum = se->avg.runnable_avg * divider;
+
+- se->avg.load_sum = divider;
+- if (se_weight(se)) {
+- se->avg.load_sum =
+- div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se));
+- }
++ se->avg.load_sum = se->avg.load_avg * divider;
++ if (se_weight(se) < se->avg.load_sum)
++ se->avg.load_sum = div_u64(se->avg.load_sum, se_weight(se));
++ else
++ se->avg.load_sum = 1;
+
+ enqueue_load_avg(cfs_rq, se);
+ cfs_rq->avg.util_avg += se->avg.util_avg;
+@@ -3964,14 +3988,16 @@ static inline unsigned long task_util_est(struct task_struct *p)
+ }
+
+ #ifdef CONFIG_UCLAMP_TASK
+-static inline unsigned long uclamp_task_util(struct task_struct *p)
++static inline unsigned long uclamp_task_util(struct task_struct *p,
++ unsigned long uclamp_min,
++ unsigned long uclamp_max)
+ {
+- return clamp(task_util_est(p),
+- uclamp_eff_value(p, UCLAMP_MIN),
+- uclamp_eff_value(p, UCLAMP_MAX));
++ return clamp(task_util_est(p), uclamp_min, uclamp_max);
+ }
+ #else
+-static inline unsigned long uclamp_task_util(struct task_struct *p)
++static inline unsigned long uclamp_task_util(struct task_struct *p,
++ unsigned long uclamp_min,
++ unsigned long uclamp_max)
+ {
+ return task_util_est(p);
+ }
+@@ -4110,14 +4136,144 @@ done:
+ trace_sched_util_est_se_tp(&p->se);
+ }
+
+-static inline int task_fits_capacity(struct task_struct *p, long capacity)
++static inline int util_fits_cpu(unsigned long util,
++ unsigned long uclamp_min,
++ unsigned long uclamp_max,
++ int cpu)
++{
++ unsigned long capacity_orig, capacity_orig_thermal;
++ unsigned long capacity = capacity_of(cpu);
++ bool fits, uclamp_max_fits;
++
++ /*
++ * Check if the real util fits without any uclamp boost/cap applied.
++ */
++ fits = fits_capacity(util, capacity);
++
++ if (!uclamp_is_used())
++ return fits;
++
++ /*
++ * We must use capacity_orig_of() for comparing against uclamp_min and
++ * uclamp_max. We only care about capacity pressure (by using
++ * capacity_of()) for comparing against the real util.
++ *
++ * If a task is boosted to 1024 for example, we don't want a tiny
++ * pressure to skew the check whether it fits a CPU or not.
++ *
++ * Similarly if a task is capped to capacity_orig_of(little_cpu), it
++ * should fit a little cpu even if there's some pressure.
++ *
++ * Only exception is for thermal pressure since it has a direct impact
++ * on available OPP of the system.
++ *
++ * We honour it for uclamp_min only as a drop in performance level
++ * could result in not getting the requested minimum performance level.
++ *
++ * For uclamp_max, we can tolerate a drop in performance level as the
++ * goal is to cap the task. So it's okay if it's getting less.
++ *
++ * In case of capacity inversion we should honour the inverted capacity
++ * for both uclamp_min and uclamp_max all the time.
++ */
++ capacity_orig = cpu_in_capacity_inversion(cpu);
++ if (capacity_orig) {
++ capacity_orig_thermal = capacity_orig;
++ } else {
++ capacity_orig = capacity_orig_of(cpu);
++ capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu);
++ }
++
++ /*
++ * We want to force a task to fit a cpu as implied by uclamp_max.
++ * But we do have some corner cases to cater for..
++ *
++ *
++ * C=z
++ * | ___
++ * | C=y | |
++ * |_ _ _ _ _ _ _ _ _ ___ _ _ _ | _ | _ _ _ _ _ uclamp_max
++ * | C=x | | | |
++ * | ___ | | | |
++ * | | | | | | | (util somewhere in this region)
++ * | | | | | | |
++ * | | | | | | |
++ * +----------------------------------------
++ * cpu0 cpu1 cpu2
++ *
++ * In the above example if a task is capped to a specific performance
++ * point, y, then when:
++ *
++ * * util = 80% of x then it does not fit on cpu0 and should migrate
++ * to cpu1
++ * * util = 80% of y then it is forced to fit on cpu1 to honour
++ * uclamp_max request.
++ *
++ * which is what we're enforcing here. A task always fits if
++ * uclamp_max <= capacity_orig. But when uclamp_max > capacity_orig,
++ * the normal upmigration rules should withhold still.
++ *
++ * Only exception is when we are on max capacity, then we need to be
++ * careful not to block overutilized state. This is so because:
++ *
++ * 1. There's no concept of capping at max_capacity! We can't go
++ * beyond this performance level anyway.
++ * 2. The system is being saturated when we're operating near
++ * max capacity, it doesn't make sense to block overutilized.
++ */
++ uclamp_max_fits = (capacity_orig == SCHED_CAPACITY_SCALE) && (uclamp_max == SCHED_CAPACITY_SCALE);
++ uclamp_max_fits = !uclamp_max_fits && (uclamp_max <= capacity_orig);
++ fits = fits || uclamp_max_fits;
++
++ /*
++ *
++ * C=z
++ * | ___ (region a, capped, util >= uclamp_max)
++ * | C=y | |
++ * |_ _ _ _ _ _ _ _ _ ___ _ _ _ | _ | _ _ _ _ _ uclamp_max
++ * | C=x | | | |
++ * | ___ | | | | (region b, uclamp_min <= util <= uclamp_max)
++ * |_ _ _|_ _|_ _ _ _| _ | _ _ _| _ | _ _ _ _ _ uclamp_min
++ * | | | | | | |
++ * | | | | | | | (region c, boosted, util < uclamp_min)
++ * +----------------------------------------
++ * cpu0 cpu1 cpu2
++ *
++ * a) If util > uclamp_max, then we're capped, we don't care about
++ * actual fitness value here. We only care if uclamp_max fits
++ * capacity without taking margin/pressure into account.
++ * See comment above.
++ *
++ * b) If uclamp_min <= util <= uclamp_max, then the normal
++ * fits_capacity() rules apply. Except we need to ensure that we
++ * enforce we remain within uclamp_max, see comment above.
++ *
++ * c) If util < uclamp_min, then we are boosted. Same as (b) but we
++ * need to take into account the boosted value fits the CPU without
++ * taking margin/pressure into account.
++ *
++ * Cases (a) and (b) are handled in the 'fits' variable already. We
++ * just need to consider an extra check for case (c) after ensuring we
++ * handle the case uclamp_min > uclamp_max.
++ */
++ uclamp_min = min(uclamp_min, uclamp_max);
++ if (util < uclamp_min && capacity_orig != SCHED_CAPACITY_SCALE)
++ fits = fits && (uclamp_min <= capacity_orig_thermal);
++
++ return fits;
++}
++
++static inline int task_fits_cpu(struct task_struct *p, int cpu)
+ {
+- return fits_capacity(uclamp_task_util(p), capacity);
++ unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN);
++ unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX);
++ unsigned long util = task_util_est(p);
++ return util_fits_cpu(util, uclamp_min, uclamp_max, cpu);
+ }
+
+ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
+ {
+- if (!static_branch_unlikely(&sched_asym_cpucapacity))
++ if (!sched_asym_cpucap_active())
+ return;
+
+ if (!p || p->nr_cpus_allowed == 1) {
+@@ -4125,7 +4281,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
+ return;
+ }
+
+- if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
++ if (task_fits_cpu(p, cpu_of(rq))) {
+ rq->misfit_task_load = 0;
+ return;
+ }
+@@ -4191,6 +4347,29 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ #endif
+ }
+
++static inline bool entity_is_long_sleeper(struct sched_entity *se)
++{
++ struct cfs_rq *cfs_rq;
++ u64 sleep_time;
++
++ if (se->exec_start == 0)
++ return false;
++
++ cfs_rq = cfs_rq_of(se);
++
++ sleep_time = rq_clock_task(rq_of(cfs_rq));
++
++ /* Happen while migrating because of clock task divergence */
++ if (sleep_time <= se->exec_start)
++ return false;
++
++ sleep_time -= se->exec_start;
++ if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
++ return true;
++
++ return false;
++}
++
+ static void
+ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ {
+@@ -4219,8 +4398,29 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ vruntime -= thresh;
+ }
+
+- /* ensure we never gain time by being placed backwards. */
+- se->vruntime = max_vruntime(se->vruntime, vruntime);
++ /*
++ * Pull vruntime of the entity being placed to the base level of
++ * cfs_rq, to prevent boosting it if placed backwards.
++ * However, min_vruntime can advance much faster than real time, with
++ * the extreme being when an entity with the minimal weight always runs
++ * on the cfs_rq. If the waking entity slept for a long time, its
++ * vruntime difference from min_vruntime may overflow s64 and their
++ * comparison may get inversed, so ignore the entity's original
++ * vruntime in that case.
++ * The maximal vruntime speedup is given by the ratio of normal to
++ * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
++ * When placing a migrated waking entity, its exec_start has been set
++ * from a different rq. In order to take into account a possible
++ * divergence between new and prev rq's clocks task because of irq and
++ * stolen time, we take an additional margin.
++ * So, cutting off on the sleep time of
++ * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
++ * should be safe.
++ */
++ if (entity_is_long_sleeper(se))
++ se->vruntime = vruntime;
++ else
++ se->vruntime = max_vruntime(se->vruntime, vruntime);
+ }
+
+ static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
+@@ -4316,6 +4516,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+
+ if (flags & ENQUEUE_WAKEUP)
+ place_entity(cfs_rq, se, 0);
++ /* Entity has migrated, no longer consider this task hot */
++ if (flags & ENQUEUE_MIGRATED)
++ se->exec_start = 0;
+
+ check_schedstat_required();
+ update_stats_enqueue(cfs_rq, se, flags);
+@@ -4502,9 +4705,12 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ */
+ if (schedstat_enabled() &&
+ rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
+- schedstat_set(se->statistics.slice_max,
+- max((u64)schedstat_val(se->statistics.slice_max),
+- se->sum_exec_runtime - se->prev_sum_exec_runtime));
++ struct sched_statistics *stats;
++
++ stats = __schedstats_from_se(se);
++ __schedstat_set(stats->slice_max,
++ max((u64)stats->slice_max,
++ se->sum_exec_runtime - se->prev_sum_exec_runtime));
+ }
+
+ se->prev_sum_exec_runtime = se->sum_exec_runtime;
+@@ -4802,8 +5008,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
+
+ cfs_rq->throttle_count--;
+ if (!cfs_rq->throttle_count) {
+- cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
+- cfs_rq->throttled_clock_task;
++ cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
++ cfs_rq->throttled_clock_pelt;
+
+ /* Add cfs_rq with load or one or more already running entities to the list */
+ if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
+@@ -4820,7 +5026,7 @@ static int tg_throttle_down(struct task_group *tg, void *data)
+
+ /* group is entering throttled state, stop time */
+ if (!cfs_rq->throttle_count) {
+- cfs_rq->throttled_clock_task = rq_clock_task(rq);
++ cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
+ list_del_leaf_cfs_rq(cfs_rq);
+ }
+ cfs_rq->throttle_count++;
+@@ -5264,7 +5470,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
+ pcfs_rq = tg->parent->cfs_rq[cpu];
+
+ cfs_rq->throttle_count = pcfs_rq->throttle_count;
+- cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
++ cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu));
+ }
+
+ /* conditionally throttle active cfs_rq's from put_prev_entity() */
+@@ -5556,7 +5762,10 @@ static inline unsigned long cpu_util(int cpu);
+
+ static inline bool cpu_overutilized(int cpu)
+ {
+- return !fits_capacity(cpu_util(cpu), capacity_of(cpu));
++ unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
++ unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
++
++ return !util_fits_cpu(cpu_util(cpu), rq_util_min, rq_util_max, cpu);
+ }
+
+ static inline void update_overutilized_status(struct rq *rq)
+@@ -5997,12 +6206,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+ if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
+ target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
+
+- schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
+- if (target == nr_cpumask_bits)
++ schedstat_inc(p->stats.nr_wakeups_affine_attempts);
++ if (target != this_cpu)
+ return prev_cpu;
+
+ schedstat_inc(sd->ttwu_move_affine);
+- schedstat_inc(p->se.statistics.nr_wakeups_affine);
++ schedstat_inc(p->stats.nr_wakeups_affine);
+ return target;
+ }
+
+@@ -6270,6 +6479,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
+ {
+ struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ int i, cpu, idle_cpu = -1, nr = INT_MAX;
++ struct sched_domain_shared *sd_share;
+ struct rq *this_rq = this_rq();
+ int this = smp_processor_id();
+ struct sched_domain *this_sd;
+@@ -6309,6 +6519,17 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
+ time = cpu_clock(this);
+ }
+
++ if (sched_feat(SIS_UTIL)) {
++ sd_share = rcu_dereference(per_cpu(sd_llc_shared, target));
++ if (sd_share) {
++ /* because !--nr is the condition to stop scan */
++ nr = READ_ONCE(sd_share->nr_idle_scan) + 1;
++ /* overloaded LLC is unlikely to have idle cpu/core */
++ if (nr == 1)
++ return -1;
++ }
++ }
++
+ for_each_cpu_wrap(cpu, cpus, target + 1) {
+ if (has_idle_core) {
+ i = select_idle_core(p, cpu, cpus, &idle_cpu);
+@@ -6350,21 +6571,23 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
+ static int
+ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
+ {
+- unsigned long task_util, best_cap = 0;
++ unsigned long task_util, util_min, util_max, best_cap = 0;
+ int cpu, best_cpu = -1;
+ struct cpumask *cpus;
+
+ cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+
+- task_util = uclamp_task_util(p);
++ task_util = task_util_est(p);
++ util_min = uclamp_eff_value(p, UCLAMP_MIN);
++ util_max = uclamp_eff_value(p, UCLAMP_MAX);
+
+ for_each_cpu_wrap(cpu, cpus, target) {
+ unsigned long cpu_cap = capacity_of(cpu);
+
+ if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
+ continue;
+- if (fits_capacity(task_util, cpu_cap))
++ if (util_fits_cpu(task_util, util_min, util_max, cpu))
+ return cpu;
+
+ if (cpu_cap > best_cap) {
+@@ -6376,10 +6599,13 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
+ return best_cpu;
+ }
+
+-static inline bool asym_fits_capacity(int task_util, int cpu)
++static inline bool asym_fits_cpu(unsigned long util,
++ unsigned long util_min,
++ unsigned long util_max,
++ int cpu)
+ {
+- if (static_branch_unlikely(&sched_asym_cpucapacity))
+- return fits_capacity(task_util, capacity_of(cpu));
++ if (sched_asym_cpucap_active())
++ return util_fits_cpu(util, util_min, util_max, cpu);
+
+ return true;
+ }
+@@ -6391,16 +6617,18 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ {
+ bool has_idle_core = false;
+ struct sched_domain *sd;
+- unsigned long task_util;
++ unsigned long task_util, util_min, util_max;
+ int i, recent_used_cpu;
+
+ /*
+ * On asymmetric system, update task utilization because we will check
+ * that the task fits with cpu's capacity.
+ */
+- if (static_branch_unlikely(&sched_asym_cpucapacity)) {
++ if (sched_asym_cpucap_active()) {
+ sync_entity_load_avg(&p->se);
+- task_util = uclamp_task_util(p);
++ task_util = task_util_est(p);
++ util_min = uclamp_eff_value(p, UCLAMP_MIN);
++ util_max = uclamp_eff_value(p, UCLAMP_MAX);
+ }
+
+ /*
+@@ -6409,7 +6637,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ lockdep_assert_irqs_disabled();
+
+ if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
+- asym_fits_capacity(task_util, target))
++ asym_fits_cpu(task_util, util_min, util_max, target))
+ return target;
+
+ /*
+@@ -6417,7 +6645,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ */
+ if (prev != target && cpus_share_cache(prev, target) &&
+ (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
+- asym_fits_capacity(task_util, prev))
++ asym_fits_cpu(task_util, util_min, util_max, prev))
+ return prev;
+
+ /*
+@@ -6429,8 +6657,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ * pattern is IO completions.
+ */
+ if (is_per_cpu_kthread(current) &&
++ in_task() &&
+ prev == smp_processor_id() &&
+- this_rq()->nr_running <= 1) {
++ this_rq()->nr_running <= 1 &&
++ asym_fits_cpu(task_util, util_min, util_max, prev)) {
+ return prev;
+ }
+
+@@ -6442,12 +6672,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ cpus_share_cache(recent_used_cpu, target) &&
+ (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
+ cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
+- asym_fits_capacity(task_util, recent_used_cpu)) {
+- /*
+- * Replace recent_used_cpu with prev as it is a potential
+- * candidate for the next wake:
+- */
+- p->recent_used_cpu = prev;
++ asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
+ return recent_used_cpu;
+ }
+
+@@ -6455,7 +6680,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ * For asymmetric CPU capacity systems, our domain of interest is
+ * sd_asym_cpucapacity rather than sd_llc.
+ */
+- if (static_branch_unlikely(&sched_asym_cpucapacity)) {
++ if (sched_asym_cpucap_active()) {
+ sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
+ /*
+ * On an asymmetric CPU capacity system where an exclusive
+@@ -6786,6 +7011,8 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
+ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+ {
+ unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
++ unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0;
++ unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
+ struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+ int cpu, best_energy_cpu = prev_cpu, target = -1;
+ unsigned long cpu_cap, util, base_energy = 0;
+@@ -6810,16 +7037,20 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+ target = prev_cpu;
+
+ sync_entity_load_avg(&p->se);
+- if (!task_util_est(p))
++ if (!uclamp_task_util(p, p_util_min, p_util_max))
+ goto unlock;
+
+ for (; pd; pd = pd->next) {
++ unsigned long util_min = p_util_min, util_max = p_util_max;
+ unsigned long cur_delta, spare_cap, max_spare_cap = 0;
++ unsigned long rq_util_min, rq_util_max;
+ bool compute_prev_delta = false;
+ unsigned long base_energy_pd;
+ int max_spare_cap_cpu = -1;
+
+ for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
++ struct rq *rq = cpu_rq(cpu);
++
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
+ continue;
+
+@@ -6835,8 +7066,21 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+ * much capacity we can get out of the CPU; this is
+ * aligned with sched_cpu_util().
+ */
+- util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
+- if (!fits_capacity(util, cpu_cap))
++ if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
++ /*
++ * Open code uclamp_rq_util_with() except for
++ * the clamp() part. Ie: apply max aggregation
++ * only. util_fits_cpu() logic requires to
++ * operate on non clamped util but must use the
++ * max-aggregated uclamp_{min, max}.
++ */
++ rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
++ rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
++
++ util_min = max(rq_util_min, p_util_min);
++ util_max = max(rq_util_max, p_util_max);
++ }
++ if (!util_fits_cpu(util, util_min, util_max, cpu))
+ continue;
+
+ if (cpu == prev_cpu) {
+@@ -7027,9 +7271,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
+ /* Tell new CPU we are migrated */
+ p->se.avg.last_update_time = 0;
+
+- /* We have migrated, no longer consider this task hot */
+- p->se.exec_start = 0;
+-
+ update_scan_period(p, new_cpu);
+ }
+
+@@ -7806,7 +8047,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
+ if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
+ int cpu;
+
+- schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
++ schedstat_inc(p->stats.nr_failed_migrations_affine);
+
+ env->flags |= LBF_SOME_PINNED;
+
+@@ -7840,7 +8081,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
+ env->flags &= ~LBF_ALL_PINNED;
+
+ if (task_running(env->src_rq, p)) {
+- schedstat_inc(p->se.statistics.nr_failed_migrations_running);
++ schedstat_inc(p->stats.nr_failed_migrations_running);
+ return 0;
+ }
+
+@@ -7862,12 +8103,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
+ env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
+ if (tsk_cache_hot == 1) {
+ schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+- schedstat_inc(p->se.statistics.nr_forced_migrations);
++ schedstat_inc(p->stats.nr_forced_migrations);
+ }
+ return 1;
+ }
+
+- schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
++ schedstat_inc(p->stats.nr_failed_migrations_hot);
+ return 0;
+ }
+
+@@ -8009,7 +8250,7 @@ static int detach_tasks(struct lb_env *env)
+
+ case migrate_misfit:
+ /* This is not a misfit task */
+- if (task_fits_capacity(p, capacity_of(env->src_cpu)))
++ if (task_fits_cpu(p, env->src_cpu))
+ goto next;
+
+ env->imbalance = 0;
+@@ -8395,16 +8636,82 @@ static unsigned long scale_rt_capacity(int cpu)
+
+ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
+ {
++ unsigned long capacity_orig = arch_scale_cpu_capacity(cpu);
+ unsigned long capacity = scale_rt_capacity(cpu);
+ struct sched_group *sdg = sd->groups;
++ struct rq *rq = cpu_rq(cpu);
+
+- cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
++ rq->cpu_capacity_orig = capacity_orig;
+
+ if (!capacity)
+ capacity = 1;
+
+- cpu_rq(cpu)->cpu_capacity = capacity;
+- trace_sched_cpu_capacity_tp(cpu_rq(cpu));
++ rq->cpu_capacity = capacity;
++
++ /*
++ * Detect if the performance domain is in capacity inversion state.
++ *
++ * Capacity inversion happens when another perf domain with equal or
++ * lower capacity_orig_of() ends up having higher capacity than this
++ * domain after subtracting thermal pressure.
++ *
++ * We only take into account thermal pressure in this detection as it's
++ * the only metric that actually results in *real* reduction of
++ * capacity due to performance points (OPPs) being dropped/become
++ * unreachable due to thermal throttling.
++ *
++ * We assume:
++ * * That all cpus in a perf domain have the same capacity_orig
++ * (same uArch).
++ * * Thermal pressure will impact all cpus in this perf domain
++ * equally.
++ */
++ if (sched_energy_enabled()) {
++ unsigned long inv_cap = capacity_orig - thermal_load_avg(rq);
++ struct perf_domain *pd;
++
++ rcu_read_lock();
++
++ pd = rcu_dereference(rq->rd->pd);
++ rq->cpu_capacity_inverted = 0;
++
++ for (; pd; pd = pd->next) {
++ struct cpumask *pd_span = perf_domain_span(pd);
++ unsigned long pd_cap_orig, pd_cap;
++
++ /* We can't be inverted against our own pd */
++ if (cpumask_test_cpu(cpu_of(rq), pd_span))
++ continue;
++
++ cpu = cpumask_any(pd_span);
++ pd_cap_orig = arch_scale_cpu_capacity(cpu);
++
++ if (capacity_orig < pd_cap_orig)
++ continue;
++
++ /*
++ * handle the case of multiple perf domains have the
++ * same capacity_orig but one of them is under higher
++ * thermal pressure. We record it as capacity
++ * inversion.
++ */
++ if (capacity_orig == pd_cap_orig) {
++ pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu));
++
++ if (pd_cap > inv_cap) {
++ rq->cpu_capacity_inverted = inv_cap;
++ break;
++ }
++ } else if (pd_cap_orig > inv_cap) {
++ rq->cpu_capacity_inverted = inv_cap;
++ break;
++ }
++ }
++
++ rcu_read_unlock();
++ }
++
++ trace_sched_cpu_capacity_tp(rq);
+
+ sdg->sgc->capacity = capacity;
+ sdg->sgc->min_capacity = capacity;
+@@ -8894,6 +9201,10 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
+
+ memset(sgs, 0, sizeof(*sgs));
+
++ /* Assume that task can't fit any CPU of the group */
++ if (sd->flags & SD_ASYM_CPUCAPACITY)
++ sgs->group_misfit_task_load = 1;
++
+ for_each_cpu(i, sched_group_span(group)) {
+ struct rq *rq = cpu_rq(i);
+ unsigned int local;
+@@ -8913,12 +9224,12 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
+ if (!nr_running && idle_cpu_without(i, p))
+ sgs->idle_cpus++;
+
+- }
++ /* Check if task fits in the CPU */
++ if (sd->flags & SD_ASYM_CPUCAPACITY &&
++ sgs->group_misfit_task_load &&
++ task_fits_cpu(p, i))
++ sgs->group_misfit_task_load = 0;
+
+- /* Check if task fits in the group */
+- if (sd->flags & SD_ASYM_CPUCAPACITY &&
+- !task_fits_capacity(p, group->sgc->max_capacity)) {
+- sgs->group_misfit_task_load = 1;
+ }
+
+ sgs->group_capacity = group->sgc->capacity;
+@@ -8993,9 +9304,10 @@ static bool update_pick_idlest(struct sched_group *idlest,
+ * This is an approximation as the number of running tasks may not be
+ * related to the number of busy CPUs due to sched_setaffinity.
+ */
+-static inline bool allow_numa_imbalance(int dst_running, int dst_weight)
++static inline bool
++allow_numa_imbalance(unsigned int running, unsigned int weight)
+ {
+- return (dst_running < (dst_weight >> 2));
++ return (running < (weight >> 2));
+ }
+
+ /*
+@@ -9129,12 +9441,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+ return idlest;
+ #endif
+ /*
+- * Otherwise, keep the task on this node to stay close
+- * its wakeup source and improve locality. If there is
+- * a real need of migration, periodic load balance will
+- * take care of it.
++ * Otherwise, keep the task close to the wakeup source
++ * and improve locality if the number of running tasks
++ * would remain below threshold where an imbalance is
++ * allowed. If there is a real need of migration,
++ * periodic load balance will take care of it.
+ */
+- if (allow_numa_imbalance(local_sgs.sum_nr_running, sd->span_weight))
++ if (allow_numa_imbalance(local_sgs.sum_nr_running + 1, local_sgs.group_weight))
+ return NULL;
+ }
+
+@@ -9152,6 +9465,77 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+ return idlest;
+ }
+
++static void update_idle_cpu_scan(struct lb_env *env,
++ unsigned long sum_util)
++{
++ struct sched_domain_shared *sd_share;
++ int llc_weight, pct;
++ u64 x, y, tmp;
++ /*
++ * Update the number of CPUs to scan in LLC domain, which could
++ * be used as a hint in select_idle_cpu(). The update of sd_share
++ * could be expensive because it is within a shared cache line.
++ * So the write of this hint only occurs during periodic load
++ * balancing, rather than CPU_NEWLY_IDLE, because the latter
++ * can fire way more frequently than the former.
++ */
++ if (!sched_feat(SIS_UTIL) || env->idle == CPU_NEWLY_IDLE)
++ return;
++
++ llc_weight = per_cpu(sd_llc_size, env->dst_cpu);
++ if (env->sd->span_weight != llc_weight)
++ return;
++
++ sd_share = rcu_dereference(per_cpu(sd_llc_shared, env->dst_cpu));
++ if (!sd_share)
++ return;
++
++ /*
++ * The number of CPUs to search drops as sum_util increases, when
++ * sum_util hits 85% or above, the scan stops.
++ * The reason to choose 85% as the threshold is because this is the
++ * imbalance_pct(117) when a LLC sched group is overloaded.
++ *
++ * let y = SCHED_CAPACITY_SCALE - p * x^2 [1]
++ * and y'= y / SCHED_CAPACITY_SCALE
++ *
++ * x is the ratio of sum_util compared to the CPU capacity:
++ * x = sum_util / (llc_weight * SCHED_CAPACITY_SCALE)
++ * y' is the ratio of CPUs to be scanned in the LLC domain,
++ * and the number of CPUs to scan is calculated by:
++ *
++ * nr_scan = llc_weight * y' [2]
++ *
++ * When x hits the threshold of overloaded, AKA, when
++ * x = 100 / pct, y drops to 0. According to [1],
++ * p should be SCHED_CAPACITY_SCALE * pct^2 / 10000
++ *
++ * Scale x by SCHED_CAPACITY_SCALE:
++ * x' = sum_util / llc_weight; [3]
++ *
++ * and finally [1] becomes:
++ * y = SCHED_CAPACITY_SCALE -
++ * x'^2 * pct^2 / (10000 * SCHED_CAPACITY_SCALE) [4]
++ *
++ */
++ /* equation [3] */
++ x = sum_util;
++ do_div(x, llc_weight);
++
++ /* equation [4] */
++ pct = env->sd->imbalance_pct;
++ tmp = x * x * pct * pct;
++ do_div(tmp, 10000 * SCHED_CAPACITY_SCALE);
++ tmp = min_t(long, tmp, SCHED_CAPACITY_SCALE);
++ y = SCHED_CAPACITY_SCALE - tmp;
++
++ /* equation [2] */
++ y *= llc_weight;
++ do_div(y, SCHED_CAPACITY_SCALE);
++ if ((int)y != sd_share->nr_idle_scan)
++ WRITE_ONCE(sd_share->nr_idle_scan, (int)y);
++}
++
+ /**
+ * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
+ * @env: The load balancing environment.
+@@ -9164,6 +9548,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
+ struct sched_group *sg = env->sd->groups;
+ struct sg_lb_stats *local = &sds->local_stat;
+ struct sg_lb_stats tmp_sgs;
++ unsigned long sum_util = 0;
+ int sg_status = 0;
+
+ do {
+@@ -9196,6 +9581,7 @@ next_group:
+ sds->total_load += sgs->group_load;
+ sds->total_capacity += sgs->group_capacity;
+
++ sum_util += sgs->group_util;
+ sg = sg->next;
+ } while (sg != env->sd->groups);
+
+@@ -9221,6 +9607,8 @@ next_group:
+ WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
+ trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
+ }
++
++ update_idle_cpu_scan(env, sum_util);
+ }
+
+ #define NUMA_IMBALANCE_MIN 2
+@@ -9340,7 +9728,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+ /* Consider allowing a small imbalance between NUMA groups */
+ if (env->sd->flags & SD_NUMA) {
+ env->imbalance = adjust_numa_imbalance(env->imbalance,
+- busiest->sum_nr_running, busiest->group_weight);
++ local->sum_nr_running + 1, local->group_weight);
+ }
+
+ return;
+@@ -9359,8 +9747,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+ local->avg_load = (local->group_load * SCHED_CAPACITY_SCALE) /
+ local->group_capacity;
+
+- sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
+- sds->total_capacity;
+ /*
+ * If the local group is more loaded than the selected
+ * busiest group don't try to pull any tasks.
+@@ -9369,6 +9755,19 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+ env->imbalance = 0;
+ return;
+ }
++
++ sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
++ sds->total_capacity;
++
++ /*
++ * If the local group is more loaded than the average system
++ * load, don't try to pull any tasks.
++ */
++ if (local->avg_load >= sds->avg_load) {
++ env->imbalance = 0;
++ return;
++ }
++
+ }
+
+ /*
+@@ -11358,8 +11757,6 @@ void free_fair_sched_group(struct task_group *tg)
+ {
+ int i;
+
+- destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
+-
+ for_each_possible_cpu(i) {
+ if (tg->cfs_rq)
+ kfree(tg->cfs_rq[i]);
+@@ -11394,7 +11791,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
+ if (!cfs_rq)
+ goto err;
+
+- se = kzalloc_node(sizeof(struct sched_entity),
++ se = kzalloc_node(sizeof(struct sched_entity_stats),
+ GFP_KERNEL, cpu_to_node(i));
+ if (!se)
+ goto err_free_rq;
+@@ -11436,6 +11833,8 @@ void unregister_fair_sched_group(struct task_group *tg)
+ struct rq *rq;
+ int cpu;
+
++ destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
++
+ for_each_possible_cpu(cpu) {
+ if (tg->se[cpu])
+ remove_entity_load_avg(tg->se[cpu]);
+diff --git a/kernel/sched/features.h b/kernel/sched/features.h
+index 7f8dace0964c2..c4947c1b5edbe 100644
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -55,7 +55,8 @@ SCHED_FEAT(TTWU_QUEUE, true)
+ /*
+ * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
+ */
+-SCHED_FEAT(SIS_PROP, true)
++SCHED_FEAT(SIS_PROP, false)
++SCHED_FEAT(SIS_UTIL, true)
+
+ /*
+ * Issue a WARN when we do multiple update_rq_clock() calls
+diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
+index d17b0a5ce6ac3..499a3e286cd05 100644
+--- a/kernel/sched/idle.c
++++ b/kernel/sched/idle.c
+@@ -105,7 +105,7 @@ void __cpuidle default_idle_call(void)
+ * last -- this is very similar to the entry code.
+ */
+ trace_hardirqs_on_prepare();
+- lockdep_hardirqs_on_prepare(_THIS_IP_);
++ lockdep_hardirqs_on_prepare();
+ rcu_idle_enter();
+ lockdep_hardirqs_on(_THIS_IP_);
+
+diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
+index b5add64d9698c..3d2825408e3a2 100644
+--- a/kernel/sched/membarrier.c
++++ b/kernel/sched/membarrier.c
+@@ -147,11 +147,11 @@
+ #endif
+
+ #ifdef CONFIG_RSEQ
+-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \
++#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
+ (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
+- | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
++ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
+ #else
+-#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
++#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
+ #endif
+
+ #define MEMBARRIER_CMD_BITMASK \
+@@ -159,7 +159,8 @@
+ | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
+ | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
+- | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
++ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
++ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK)
+
+ static void ipi_mb(void *info)
+ {
+diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
+index e06071bf3472c..4ff2ed4f8fa15 100644
+--- a/kernel/sched/pelt.h
++++ b/kernel/sched/pelt.h
+@@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
+ }
+ #endif
+
++#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
++
+ static inline u32 get_pelt_divider(struct sched_avg *avg)
+ {
+- return LOAD_AVG_MAX - 1024 + avg->period_contrib;
++ return PELT_MIN_DIVIDER + avg->period_contrib;
+ }
+
+ static inline void cfs_se_util_change(struct sched_avg *avg)
+@@ -143,9 +145,9 @@ static inline u64 rq_clock_pelt(struct rq *rq)
+ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
+ {
+ if (unlikely(cfs_rq->throttle_count))
+- return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
++ return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time;
+
+- return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
++ return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time;
+ }
+ #else
+ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index 1652f2bb54b79..fa88bf6ccce02 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -34,13 +34,19 @@
+ * delayed on that resource such that nobody is advancing and the CPU
+ * goes idle. This leaves both workload and CPU unproductive.
+ *
+- * Naturally, the FULL state doesn't exist for the CPU resource at the
+- * system level, but exist at the cgroup level, means all non-idle tasks
+- * in a cgroup are delayed on the CPU resource which used by others outside
+- * of the cgroup or throttled by the cgroup cpu.max configuration.
+- *
+ * SOME = nr_delayed_tasks != 0
+- * FULL = nr_delayed_tasks != 0 && nr_running_tasks == 0
++ * FULL = nr_delayed_tasks != 0 && nr_productive_tasks == 0
++ *
++ * What it means for a task to be productive is defined differently
++ * for each resource. For IO, productive means a running task. For
++ * memory, productive means a running task that isn't a reclaimer. For
++ * CPU, productive means an oncpu task.
++ *
++ * Naturally, the FULL state doesn't exist for the CPU resource at the
++ * system level, but exist at the cgroup level. At the cgroup level,
++ * FULL means all non-idle tasks in the cgroup are delayed on the CPU
++ * resource which is being used by others outside of the cgroup or
++ * throttled by the cgroup cpu.max configuration.
+ *
+ * The percentage of wallclock time spent in those compound stall
+ * states gives pressure numbers between 0 and 100 for each resource,
+@@ -81,13 +87,13 @@
+ *
+ * threads = min(nr_nonidle_tasks, nr_cpus)
+ * SOME = min(nr_delayed_tasks / threads, 1)
+- * FULL = (threads - min(nr_running_tasks, threads)) / threads
++ * FULL = (threads - min(nr_productive_tasks, threads)) / threads
+ *
+ * For the 257 number crunchers on 256 CPUs, this yields:
+ *
+ * threads = min(257, 256)
+ * SOME = min(1 / 256, 1) = 0.4%
+- * FULL = (256 - min(257, 256)) / 256 = 0%
++ * FULL = (256 - min(256, 256)) / 256 = 0%
+ *
+ * For the 1 out of 4 memory-delayed tasks, this yields:
+ *
+@@ -112,7 +118,7 @@
+ * For each runqueue, we track:
+ *
+ * tSOME[cpu] = time(nr_delayed_tasks[cpu] != 0)
+- * tFULL[cpu] = time(nr_delayed_tasks[cpu] && !nr_running_tasks[cpu])
++ * tFULL[cpu] = time(nr_delayed_tasks[cpu] && !nr_productive_tasks[cpu])
+ * tNONIDLE[cpu] = time(nr_nonidle_tasks[cpu] != 0)
+ *
+ * and then periodically aggregate:
+@@ -233,7 +239,8 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
+ case PSI_MEM_SOME:
+ return unlikely(tasks[NR_MEMSTALL]);
+ case PSI_MEM_FULL:
+- return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]);
++ return unlikely(tasks[NR_MEMSTALL] &&
++ tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]);
+ case PSI_CPU_SOME:
+ return unlikely(tasks[NR_RUNNING] > tasks[NR_ONCPU]);
+ case PSI_CPU_FULL:
+@@ -710,10 +717,11 @@ static void psi_group_change(struct psi_group *group, int cpu,
+ if (groupc->tasks[t]) {
+ groupc->tasks[t]--;
+ } else if (!psi_bug) {
+- printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n",
++ printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u %u] clear=%x set=%x\n",
+ cpu, t, groupc->tasks[0],
+ groupc->tasks[1], groupc->tasks[2],
+- groupc->tasks[3], clear, set);
++ groupc->tasks[3], groupc->tasks[4],
++ clear, set);
+ psi_bug = 1;
+ }
+ }
+@@ -854,12 +862,15 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
+ int clear = TSK_ONCPU, set = 0;
+
+ /*
+- * When we're going to sleep, psi_dequeue() lets us handle
+- * TSK_RUNNING and TSK_IOWAIT here, where we can combine it
+- * with TSK_ONCPU and save walking common ancestors twice.
++ * When we're going to sleep, psi_dequeue() lets us
++ * handle TSK_RUNNING, TSK_MEMSTALL_RUNNING and
++ * TSK_IOWAIT here, where we can combine it with
++ * TSK_ONCPU and save walking common ancestors twice.
+ */
+ if (sleep) {
+ clear |= TSK_RUNNING;
++ if (prev->in_memstall)
++ clear |= TSK_MEMSTALL_RUNNING;
+ if (prev->in_iowait)
+ set |= TSK_IOWAIT;
+ }
+@@ -908,7 +919,7 @@ void psi_memstall_enter(unsigned long *flags)
+ rq = this_rq_lock_irq(&rf);
+
+ current->in_memstall = 1;
+- psi_task_change(current, 0, TSK_MEMSTALL);
++ psi_task_change(current, 0, TSK_MEMSTALL | TSK_MEMSTALL_RUNNING);
+
+ rq_unlock_irq(rq, &rf);
+ }
+@@ -937,7 +948,7 @@ void psi_memstall_leave(unsigned long *flags)
+ rq = this_rq_lock_irq(&rf);
+
+ current->in_memstall = 0;
+- psi_task_change(current, TSK_MEMSTALL, 0);
++ psi_task_change(current, TSK_MEMSTALL | TSK_MEMSTALL_RUNNING, 0);
+
+ rq_unlock_irq(rq, &rf);
+ }
+@@ -1051,14 +1062,17 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
+ mutex_unlock(&group->avgs_lock);
+
+ for (full = 0; full < 2; full++) {
+- unsigned long avg[3];
+- u64 total;
++ unsigned long avg[3] = { 0, };
++ u64 total = 0;
+ int w;
+
+- for (w = 0; w < 3; w++)
+- avg[w] = group->avg[res * 2 + full][w];
+- total = div_u64(group->total[PSI_AVGS][res * 2 + full],
+- NSEC_PER_USEC);
++ /* CPU FULL is undefined at the system level */
++ if (!(group == &psi_system && res == PSI_CPU && full)) {
++ for (w = 0; w < 3; w++)
++ avg[w] = group->avg[res * 2 + full][w];
++ total = div_u64(group->total[PSI_AVGS][res * 2 + full],
++ NSEC_PER_USEC);
++ }
+
+ seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
+ full ? "full" : "some",
+@@ -1071,44 +1085,6 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
+ return 0;
+ }
+
+-static int psi_io_show(struct seq_file *m, void *v)
+-{
+- return psi_show(m, &psi_system, PSI_IO);
+-}
+-
+-static int psi_memory_show(struct seq_file *m, void *v)
+-{
+- return psi_show(m, &psi_system, PSI_MEM);
+-}
+-
+-static int psi_cpu_show(struct seq_file *m, void *v)
+-{
+- return psi_show(m, &psi_system, PSI_CPU);
+-}
+-
+-static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
+-{
+- if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
+- return -EPERM;
+-
+- return single_open(file, psi_show, NULL);
+-}
+-
+-static int psi_io_open(struct inode *inode, struct file *file)
+-{
+- return psi_open(file, psi_io_show);
+-}
+-
+-static int psi_memory_open(struct inode *inode, struct file *file)
+-{
+- return psi_open(file, psi_memory_show);
+-}
+-
+-static int psi_cpu_open(struct inode *inode, struct file *file)
+-{
+- return psi_open(file, psi_cpu_show);
+-}
+-
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ char *buf, size_t nbytes, enum psi_res res)
+ {
+@@ -1151,7 +1127,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ t->event = 0;
+ t->last_event_time = 0;
+ init_waitqueue_head(&t->event_wait);
+- kref_init(&t->refcount);
+
+ mutex_lock(&group->trigger_lock);
+
+@@ -1180,20 +1155,25 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ return t;
+ }
+
+-static void psi_trigger_destroy(struct kref *ref)
++void psi_trigger_destroy(struct psi_trigger *t)
+ {
+- struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
+- struct psi_group *group = t->group;
++ struct psi_group *group;
+ struct task_struct *task_to_destroy = NULL;
+
+- if (static_branch_likely(&psi_disabled))
++ /*
++ * We do not check psi_disabled since it might have been disabled after
++ * the trigger got created.
++ */
++ if (!t)
+ return;
+
++ group = t->group;
+ /*
+- * Wakeup waiters to stop polling. Can happen if cgroup is deleted
+- * from under a polling process.
++ * Wakeup waiters to stop polling and clear the queue to prevent it from
++ * being accessed later. Can happen if cgroup is deleted from under a
++ * polling process.
+ */
+- wake_up_interruptible(&t->event_wait);
++ wake_up_pollfree(&t->event_wait);
+
+ mutex_lock(&group->trigger_lock);
+
+@@ -1224,9 +1204,9 @@ static void psi_trigger_destroy(struct kref *ref)
+ mutex_unlock(&group->trigger_lock);
+
+ /*
+- * Wait for both *trigger_ptr from psi_trigger_replace and
+- * poll_task RCUs to complete their read-side critical sections
+- * before destroying the trigger and optionally the poll_task
++ * Wait for psi_schedule_poll_work RCU to complete its read-side
++ * critical section before destroying the trigger and optionally the
++ * poll_task.
+ */
+ synchronize_rcu();
+ /*
+@@ -1243,18 +1223,6 @@ static void psi_trigger_destroy(struct kref *ref)
+ kfree(t);
+ }
+
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
+-{
+- struct psi_trigger *old = *trigger_ptr;
+-
+- if (static_branch_likely(&psi_disabled))
+- return;
+-
+- rcu_assign_pointer(*trigger_ptr, new);
+- if (old)
+- kref_put(&old->refcount, psi_trigger_destroy);
+-}
+-
+ __poll_t psi_trigger_poll(void **trigger_ptr,
+ struct file *file, poll_table *wait)
+ {
+@@ -1264,27 +1232,57 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
+ if (static_branch_likely(&psi_disabled))
+ return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+
+- rcu_read_lock();
+-
+- t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
+- if (!t) {
+- rcu_read_unlock();
++ t = smp_load_acquire(trigger_ptr);
++ if (!t)
+ return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+- }
+- kref_get(&t->refcount);
+-
+- rcu_read_unlock();
+
+ poll_wait(file, &t->event_wait, wait);
+
+ if (cmpxchg(&t->event, 1, 0) == 1)
+ ret |= EPOLLPRI;
+
+- kref_put(&t->refcount, psi_trigger_destroy);
+-
+ return ret;
+ }
+
++#ifdef CONFIG_PROC_FS
++static int psi_io_show(struct seq_file *m, void *v)
++{
++ return psi_show(m, &psi_system, PSI_IO);
++}
++
++static int psi_memory_show(struct seq_file *m, void *v)
++{
++ return psi_show(m, &psi_system, PSI_MEM);
++}
++
++static int psi_cpu_show(struct seq_file *m, void *v)
++{
++ return psi_show(m, &psi_system, PSI_CPU);
++}
++
++static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
++{
++ if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
++ return -EPERM;
++
++ return single_open(file, psi_show, NULL);
++}
++
++static int psi_io_open(struct inode *inode, struct file *file)
++{
++ return psi_open(file, psi_io_show);
++}
++
++static int psi_memory_open(struct inode *inode, struct file *file)
++{
++ return psi_open(file, psi_memory_show);
++}
++
++static int psi_cpu_open(struct inode *inode, struct file *file)
++{
++ return psi_open(file, psi_cpu_show);
++}
++
+ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+ size_t nbytes, enum psi_res res)
+ {
+@@ -1305,14 +1303,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+
+ buf[buf_size - 1] = '\0';
+
+- new = psi_trigger_create(&psi_system, buf, nbytes, res);
+- if (IS_ERR(new))
+- return PTR_ERR(new);
+-
+ seq = file->private_data;
++
+ /* Take seq->lock to protect seq->private from concurrent writes */
+ mutex_lock(&seq->lock);
+- psi_trigger_replace(&seq->private, new);
++
++ /* Allow only one trigger per file descriptor */
++ if (seq->private) {
++ mutex_unlock(&seq->lock);
++ return -EBUSY;
++ }
++
++ new = psi_trigger_create(&psi_system, buf, nbytes, res);
++ if (IS_ERR(new)) {
++ mutex_unlock(&seq->lock);
++ return PTR_ERR(new);
++ }
++
++ smp_store_release(&seq->private, new);
+ mutex_unlock(&seq->lock);
+
+ return nbytes;
+@@ -1347,7 +1355,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
+ {
+ struct seq_file *seq = file->private_data;
+
+- psi_trigger_replace(&seq->private, NULL);
++ psi_trigger_destroy(seq->private);
+ return single_release(inode, file);
+ }
+
+@@ -1389,3 +1397,5 @@ static int __init psi_proc_init(void)
+ return 0;
+ }
+ module_init(psi_proc_init);
++
++#endif /* CONFIG_PROC_FS */
+diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
+index 3daf42a0f4623..4b9281e6b1ccd 100644
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -52,11 +52,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
+ rt_b->rt_period_timer.function = sched_rt_period_timer;
+ }
+
+-static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
++static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
+ {
+- if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
+- return;
+-
+ raw_spin_lock(&rt_b->rt_runtime_lock);
+ if (!rt_b->rt_period_active) {
+ rt_b->rt_period_active = 1;
+@@ -75,6 +72,14 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+ raw_spin_unlock(&rt_b->rt_runtime_lock);
+ }
+
++static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
++{
++ if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
++ return;
++
++ do_start_rt_bandwidth(rt_b);
++}
++
+ void init_rt_rq(struct rt_rq *rt_rq)
+ {
+ struct rt_prio_array *array;
+@@ -137,13 +142,17 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
+ return rt_rq->rq;
+ }
+
+-void free_rt_sched_group(struct task_group *tg)
++void unregister_rt_sched_group(struct task_group *tg)
+ {
+- int i;
+-
+ if (tg->rt_se)
+ destroy_rt_bandwidth(&tg->rt_bandwidth);
+
++}
++
++void free_rt_sched_group(struct task_group *tg)
++{
++ int i;
++
+ for_each_possible_cpu(i) {
+ if (tg->rt_rq)
+ kfree(tg->rt_rq[i]);
+@@ -250,6 +259,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
+ return &rq->rt;
+ }
+
++void unregister_rt_sched_group(struct task_group *tg) { }
++
+ void free_rt_sched_group(struct task_group *tg) { }
+
+ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
+@@ -433,7 +444,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
+ #endif /* CONFIG_SMP */
+
+ static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
+-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
++static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
+
+ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
+ {
+@@ -462,7 +473,7 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
+ unsigned int cpu_cap;
+
+ /* Only heterogeneous systems can benefit from this check */
+- if (!static_branch_unlikely(&sched_asym_cpucapacity))
++ if (!sched_asym_cpucap_active())
+ return true;
+
+ min_cap = uclamp_eff_value(p, UCLAMP_MIN);
+@@ -554,7 +565,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
+ rt_se = rt_rq->tg->rt_se[cpu];
+
+ if (!rt_se) {
+- dequeue_top_rt_rq(rt_rq);
++ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
+ /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+ cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
+ }
+@@ -640,7 +651,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+
+ static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
+ {
+- dequeue_top_rt_rq(rt_rq);
++ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
+ }
+
+ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+@@ -874,6 +885,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
+ int enqueue = 0;
+ struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
+ struct rq *rq = rq_of_rt_rq(rt_rq);
++ struct rq_flags rf;
+ int skip;
+
+ /*
+@@ -888,7 +900,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
+ if (skip)
+ continue;
+
+- raw_spin_rq_lock(rq);
++ rq_lock(rq, &rf);
+ update_rq_clock(rq);
+
+ if (rt_rq->rt_time) {
+@@ -926,7 +938,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
+
+ if (enqueue)
+ sched_rt_rq_enqueue(rt_rq);
+- raw_spin_rq_unlock(rq);
++ rq_unlock(rq, &rf);
+ }
+
+ if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
+@@ -1009,8 +1021,8 @@ static void update_curr_rt(struct rq *rq)
+ if (unlikely((s64)delta_exec <= 0))
+ return;
+
+- schedstat_set(curr->se.statistics.exec_max,
+- max(curr->se.statistics.exec_max, delta_exec));
++ schedstat_set(curr->stats.exec_max,
++ max(curr->stats.exec_max, delta_exec));
+
+ curr->se.sum_exec_runtime += delta_exec;
+ account_group_exec_runtime(curr, delta_exec);
+@@ -1023,19 +1035,23 @@ static void update_curr_rt(struct rq *rq)
+
+ for_each_sched_rt_entity(rt_se) {
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
++ int exceeded;
+
+ if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
+ raw_spin_lock(&rt_rq->rt_runtime_lock);
+ rt_rq->rt_time += delta_exec;
+- if (sched_rt_runtime_exceeded(rt_rq))
++ exceeded = sched_rt_runtime_exceeded(rt_rq);
++ if (exceeded)
+ resched_curr(rq);
+ raw_spin_unlock(&rt_rq->rt_runtime_lock);
++ if (exceeded)
++ do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
+ }
+ }
+ }
+
+ static void
+-dequeue_top_rt_rq(struct rt_rq *rt_rq)
++dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
+ {
+ struct rq *rq = rq_of_rt_rq(rt_rq);
+
+@@ -1046,7 +1062,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
+
+ BUG_ON(!rq->nr_running);
+
+- sub_nr_running(rq, rt_rq->rt_nr_running);
++ sub_nr_running(rq, count);
+ rt_rq->rt_queued = 0;
+
+ }
+@@ -1326,18 +1342,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
+ static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
+ {
+ struct sched_rt_entity *back = NULL;
++ unsigned int rt_nr_running;
+
+ for_each_sched_rt_entity(rt_se) {
+ rt_se->back = back;
+ back = rt_se;
+ }
+
+- dequeue_top_rt_rq(rt_rq_of_se(back));
++ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
+
+ for (rt_se = back; rt_se; rt_se = rt_se->back) {
+ if (on_rt_rq(rt_se))
+ __dequeue_rt_entity(rt_se, flags);
+ }
++
++ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
+ }
+
+ static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+@@ -1595,8 +1614,7 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f
+ rt_queue_push_tasks(rq);
+ }
+
+-static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
+- struct rt_rq *rt_rq)
++static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
+ {
+ struct rt_prio_array *array = &rt_rq->active;
+ struct sched_rt_entity *next = NULL;
+@@ -1607,6 +1625,8 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
+ BUG_ON(idx >= MAX_RT_PRIO);
+
+ queue = array->queue + idx;
++ if (SCHED_WARN_ON(list_empty(queue)))
++ return NULL;
+ next = list_entry(queue->next, struct sched_rt_entity, run_list);
+
+ return next;
+@@ -1618,8 +1638,9 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
+ struct rt_rq *rt_rq = &rq->rt;
+
+ do {
+- rt_se = pick_next_rt_entity(rq, rt_rq);
+- BUG_ON(!rt_se);
++ rt_se = pick_next_rt_entity(rt_rq);
++ if (unlikely(!rt_se))
++ return NULL;
+ rt_rq = group_rt_rq(rt_se);
+ } while (rt_rq);
+
+@@ -1717,7 +1738,7 @@ static int find_lowest_rq(struct task_struct *task)
+ * If we're on asym system ensure we consider the different capacities
+ * of the CPUs when searching for the lowest_mask.
+ */
+- if (static_branch_unlikely(&sched_asym_cpucapacity)) {
++ if (sched_asym_cpucap_active()) {
+
+ ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
+ task, lowest_mask,
+@@ -1821,11 +1842,15 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
+ * the mean time, task could have
+ * migrated already or had its affinity changed.
+ * Also make sure that it wasn't scheduled on its rq.
++ * It is possible the task was scheduled, set
++ * "migrate_disabled" and then got preempted, so we must
++ * check the task migration disable flag here too.
+ */
+ if (unlikely(task_rq(task) != rq ||
+ !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
+ task_running(rq, task) ||
+ !rt_task(task) ||
++ is_migration_disabled(task) ||
+ !task_on_rq_queued(task))) {
+
+ double_unlock_balance(rq, lowest_rq);
+@@ -1885,6 +1910,16 @@ static int push_rt_task(struct rq *rq, bool pull)
+ return 0;
+
+ retry:
++ /*
++ * It's possible that the next_task slipped in of
++ * higher priority than current. If that's the case
++ * just reschedule current.
++ */
++ if (unlikely(next_task->prio < rq->curr->prio)) {
++ resched_curr(rq);
++ return 0;
++ }
++
+ if (is_migration_disabled(next_task)) {
+ struct task_struct *push_task = NULL;
+ int cpu;
+@@ -1892,6 +1927,18 @@ retry:
+ if (!pull || rq->push_busy)
+ return 0;
+
++ /*
++ * Invoking find_lowest_rq() on anything but an RT task doesn't
++ * make sense. Per the above priority check, curr has to
++ * be of higher priority than next_task, so no need to
++ * reschedule when bailing out.
++ *
++ * Note that the stoppers are masqueraded as SCHED_FIFO
++ * (cf. sched_set_stop_task()), so we can't rely on rt_task().
++ */
++ if (rq->curr->sched_class != &rt_sched_class)
++ return 0;
++
+ cpu = find_lowest_rq(rq->curr);
+ if (cpu == -1 || cpu == rq->cpu)
+ return 0;
+@@ -1916,16 +1963,6 @@ retry:
+ if (WARN_ON(next_task == rq->curr))
+ return 0;
+
+- /*
+- * It's possible that the next_task slipped in of
+- * higher priority than current. If that's the case
+- * just reschedule current.
+- */
+- if (unlikely(next_task->prio < rq->curr->prio)) {
+- resched_curr(rq);
+- return 0;
+- }
+-
+ /* We might release rq lock */
+ get_task_struct(next_task);
+
+@@ -2779,8 +2816,12 @@ static int sched_rt_global_validate(void)
+
+ static void sched_rt_do_global(void)
+ {
++ unsigned long flags;
++
++ raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
+ def_rt_bandwidth.rt_runtime = global_rt_runtime();
+ def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
++ raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
+ }
+
+ int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 3d3e5793e1172..5061093d9baae 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -348,9 +348,8 @@ extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
+ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
+ extern bool __checkparam_dl(const struct sched_attr *attr);
+ extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
+-extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
+ extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
+-extern bool dl_cpu_busy(unsigned int cpu);
++extern int dl_bw_check_overflow(int cpu);
+
+ #ifdef CONFIG_CGROUP_SCHED
+
+@@ -486,6 +485,7 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
+ extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+ extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
+
++extern void unregister_rt_sched_group(struct task_group *tg);
+ extern void free_rt_sched_group(struct task_group *tg);
+ extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
+ extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
+@@ -501,7 +501,7 @@ extern struct task_group *sched_create_group(struct task_group *parent);
+ extern void sched_online_group(struct task_group *tg,
+ struct task_group *parent);
+ extern void sched_destroy_group(struct task_group *tg);
+-extern void sched_offline_group(struct task_group *tg);
++extern void sched_release_group(struct task_group *tg);
+
+ extern void sched_move_task(struct task_struct *tsk);
+
+@@ -614,8 +614,8 @@ struct cfs_rq {
+ s64 runtime_remaining;
+
+ u64 throttled_clock;
+- u64 throttled_clock_task;
+- u64 throttled_clock_task_time;
++ u64 throttled_clock_pelt;
++ u64 throttled_clock_pelt_time;
+ int throttled;
+ int throttle_count;
+ struct list_head throttled_list;
+@@ -1003,6 +1003,7 @@ struct rq {
+
+ unsigned long cpu_capacity;
+ unsigned long cpu_capacity_orig;
++ unsigned long cpu_capacity_inverted;
+
+ struct callback_head *balance_callback;
+
+@@ -1148,6 +1149,14 @@ static inline bool is_migration_disabled(struct task_struct *p)
+ #endif
+ }
+
++DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
++
++#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
++#define this_rq() this_cpu_ptr(&runqueues)
++#define task_rq(p) cpu_rq(task_cpu(p))
++#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
++#define raw_rq() raw_cpu_ptr(&runqueues)
++
+ struct sched_group;
+ #ifdef CONFIG_SCHED_CORE
+ static inline struct cpumask *sched_group_span(struct sched_group *sg);
+@@ -1235,7 +1244,7 @@ static inline bool sched_group_cookie_match(struct rq *rq,
+ return true;
+
+ for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
+- if (sched_core_cookie_match(rq, p))
++ if (sched_core_cookie_match(cpu_rq(cpu), p))
+ return true;
+ }
+ return false;
+@@ -1361,14 +1370,6 @@ static inline void update_idle_core(struct rq *rq)
+ static inline void update_idle_core(struct rq *rq) { }
+ #endif
+
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+-
+-#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
+-#define this_rq() this_cpu_ptr(&runqueues)
+-#define task_rq(p) cpu_rq(task_cpu(p))
+-#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+-#define raw_rq() raw_cpu_ptr(&runqueues)
+-
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ static inline struct task_struct *task_of(struct sched_entity *se)
+ {
+@@ -1717,6 +1718,11 @@ queue_balance_callback(struct rq *rq,
+ {
+ lockdep_assert_rq_held(rq);
+
++ /*
++ * Don't (re)queue an already queued item; nor queue anything when
++ * balance_push() is active, see the comment with
++ * balance_push_callback.
++ */
+ if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
+ return;
+
+@@ -1783,6 +1789,11 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
+ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
+ extern struct static_key_false sched_asym_cpucapacity;
+
++static __always_inline bool sched_asym_cpucap_active(void)
++{
++ return static_branch_unlikely(&sched_asym_cpucapacity);
++}
++
+ struct sched_group_capacity {
+ atomic_t ref;
+ /*
+@@ -2047,7 +2058,6 @@ static inline int task_on_rq_migrating(struct task_struct *p)
+
+ #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
+ #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
+-#define WF_ON_CPU 0x40 /* Wakee is on_cpu */
+
+ #ifdef CONFIG_SMP
+ static_assert(WF_EXEC == SD_BALANCE_EXEC);
+@@ -2488,6 +2498,24 @@ unsigned long arch_scale_freq_capacity(int cpu)
+ }
+ #endif
+
++#ifdef CONFIG_SCHED_DEBUG
++/*
++ * In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
++ * acquire rq lock instead of rq_lock(). So at the end of these two functions
++ * we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
++ * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
++ */
++static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
++{
++ rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
++ /* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */
++#ifdef CONFIG_SMP
++ rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
++#endif
++}
++#else
++static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
++#endif
+
+ #ifdef CONFIG_SMP
+
+@@ -2553,14 +2581,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ __acquires(busiest->lock)
+ __acquires(this_rq->lock)
+ {
+- if (__rq_lockp(this_rq) == __rq_lockp(busiest))
+- return 0;
+-
+- if (likely(raw_spin_rq_trylock(busiest)))
++ if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
++ likely(raw_spin_rq_trylock(busiest))) {
++ double_rq_clock_clear_update(this_rq, busiest);
+ return 0;
++ }
+
+ if (rq_order_less(this_rq, busiest)) {
+ raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
++ double_rq_clock_clear_update(this_rq, busiest);
+ return 0;
+ }
+
+@@ -2654,6 +2683,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
+ BUG_ON(rq1 != rq2);
+ raw_spin_rq_lock(rq1);
+ __acquire(rq2->lock); /* Fake it out ;) */
++ double_rq_clock_clear_update(rq1, rq2);
+ }
+
+ /*
+@@ -2826,6 +2856,23 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
+ #ifdef CONFIG_UCLAMP_TASK
+ unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
+
++static inline unsigned long uclamp_rq_get(struct rq *rq,
++ enum uclamp_id clamp_id)
++{
++ return READ_ONCE(rq->uclamp[clamp_id].value);
++}
++
++static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
++ unsigned int value)
++{
++ WRITE_ONCE(rq->uclamp[clamp_id].value, value);
++}
++
++static inline bool uclamp_rq_is_idle(struct rq *rq)
++{
++ return rq->uclamp_flags & UCLAMP_FLAG_IDLE;
++}
++
+ /**
+ * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
+ * @rq: The rq to clamp against. Must not be NULL.
+@@ -2861,12 +2908,12 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+ * Ignore last runnable task's max clamp, as this task will
+ * reset it. Similarly, no need to read the rq's min clamp.
+ */
+- if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
++ if (uclamp_rq_is_idle(rq))
+ goto out;
+ }
+
+- min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
+- max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
++ min_util = max_t(unsigned long, min_util, uclamp_rq_get(rq, UCLAMP_MIN));
++ max_util = max_t(unsigned long, max_util, uclamp_rq_get(rq, UCLAMP_MAX));
+ out:
+ /*
+ * Since CPU's {min,max}_util clamps are MAX aggregated considering
+@@ -2892,6 +2939,15 @@ static inline bool uclamp_is_used(void)
+ return static_branch_likely(&sched_uclamp_used);
+ }
+ #else /* CONFIG_UCLAMP_TASK */
++static inline unsigned long uclamp_eff_value(struct task_struct *p,
++ enum uclamp_id clamp_id)
++{
++ if (clamp_id == UCLAMP_MIN)
++ return 0;
++
++ return SCHED_CAPACITY_SCALE;
++}
++
+ static inline
+ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+ struct task_struct *p)
+@@ -2903,6 +2959,25 @@ static inline bool uclamp_is_used(void)
+ {
+ return false;
+ }
++
++static inline unsigned long uclamp_rq_get(struct rq *rq,
++ enum uclamp_id clamp_id)
++{
++ if (clamp_id == UCLAMP_MIN)
++ return 0;
++
++ return SCHED_CAPACITY_SCALE;
++}
++
++static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
++ unsigned int value)
++{
++}
++
++static inline bool uclamp_rq_is_idle(struct rq *rq)
++{
++ return false;
++}
+ #endif /* CONFIG_UCLAMP_TASK */
+
+ #ifdef arch_scale_freq_capacity
+@@ -2919,6 +2994,24 @@ static inline unsigned long capacity_orig_of(int cpu)
+ return cpu_rq(cpu)->cpu_capacity_orig;
+ }
+
++/*
++ * Returns inverted capacity if the CPU is in capacity inversion state.
++ * 0 otherwise.
++ *
++ * Capacity inversion detection only considers thermal impact where actual
++ * performance points (OPPs) gets dropped.
++ *
++ * Capacity inversion state happens when another performance domain that has
++ * equal or lower capacity_orig_of() becomes effectively larger than the perf
++ * domain this CPU belongs to due to thermal pressure throttling it hard.
++ *
++ * See comment in update_cpu_capacity().
++ */
++static inline unsigned long cpu_in_capacity_inversion(int cpu)
++{
++ return cpu_rq(cpu)->cpu_capacity_inverted;
++}
++
+ /**
+ * enum cpu_util_type - CPU utilization type
+ * @FREQUENCY_UTIL: Utilization used to select frequency
+diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
+index d8f8eb0c655ba..975703572bc0d 100644
+--- a/kernel/sched/stats.h
++++ b/kernel/sched/stats.h
+@@ -41,6 +41,7 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
+ #define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
+
+ #else /* !CONFIG_SCHEDSTATS: */
++
+ static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { }
+ static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { }
+ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { }
+@@ -53,8 +54,26 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt
+ # define schedstat_set(var, val) do { } while (0)
+ # define schedstat_val(var) 0
+ # define schedstat_val_or_zero(var) 0
++
+ #endif /* CONFIG_SCHEDSTATS */
+
++#ifdef CONFIG_FAIR_GROUP_SCHED
++struct sched_entity_stats {
++ struct sched_entity se;
++ struct sched_statistics stats;
++} __no_randomize_layout;
++#endif
++
++static inline struct sched_statistics *
++__schedstats_from_se(struct sched_entity *se)
++{
++#ifdef CONFIG_FAIR_GROUP_SCHED
++ if (!entity_is_task(se))
++ return &container_of(se, struct sched_entity_stats, se)->stats;
++#endif
++ return &task_of(se)->stats;
++}
++
+ #ifdef CONFIG_PSI
+ /*
+ * PSI tracks state that persists across sleeps, such as iowaits and
+@@ -69,6 +88,9 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
+ if (static_branch_likely(&psi_disabled))
+ return;
+
++ if (p->in_memstall)
++ set |= TSK_MEMSTALL_RUNNING;
++
+ if (!wakeup || p->sched_psi_wake_requeue) {
+ if (p->in_memstall)
+ set |= TSK_MEMSTALL;
+@@ -99,7 +121,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
+ return;
+
+ if (p->in_memstall)
+- clear |= TSK_MEMSTALL;
++ clear |= (TSK_MEMSTALL | TSK_MEMSTALL_RUNNING);
+
+ psi_task_change(p, clear, 0);
+ }
+diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
+index f988ebe3febb9..0b165a25f22f8 100644
+--- a/kernel/sched/stop_task.c
++++ b/kernel/sched/stop_task.c
+@@ -78,8 +78,8 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
+ if (unlikely((s64)delta_exec < 0))
+ delta_exec = 0;
+
+- schedstat_set(curr->se.statistics.exec_max,
+- max(curr->se.statistics.exec_max, delta_exec));
++ schedstat_set(curr->stats.exec_max,
++ max(curr->stats.exec_max, delta_exec));
+
+ curr->se.sum_exec_runtime += delta_exec;
+ account_group_exec_runtime(curr, delta_exec);
+diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
+index 76577d1642a5d..eca38107b32f1 100644
+--- a/kernel/sched/wait.c
++++ b/kernel/sched/wait.c
+@@ -238,6 +238,13 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
+ }
+ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
+
++void __wake_up_pollfree(struct wait_queue_head *wq_head)
++{
++ __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
++ /* POLLFREE must have cleared the queue. */
++ WARN_ON_ONCE(waitqueue_active(wq_head));
++}
++
+ /*
+ * Note: we use "set_current_state()" _after_ the wait-queue add,
+ * because we need a memory barrier there on SMP, so that any
+diff --git a/kernel/scs.c b/kernel/scs.c
+index e2a71fc82fa06..579841be88646 100644
+--- a/kernel/scs.c
++++ b/kernel/scs.c
+@@ -78,6 +78,7 @@ void scs_free(void *s)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
++ kasan_unpoison_vmalloc(s, SCS_SIZE);
+ vfree_atomic(s);
+ }
+
+diff --git a/kernel/seccomp.c b/kernel/seccomp.c
+index 4d8f44a177274..db10e73d06e02 100644
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -29,6 +29,9 @@
+ #include <linux/syscalls.h>
+ #include <linux/sysctl.h>
+
++/* Not exposed in headers: strictly internal use only. */
++#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1)
++
+ #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+ #include <asm/syscall.h>
+ #endif
+@@ -1010,6 +1013,7 @@ static void __secure_computing_strict(int this_syscall)
+ #ifdef SECCOMP_DEBUG
+ dump_stack();
+ #endif
++ current->seccomp.mode = SECCOMP_MODE_DEAD;
+ seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
+ do_exit(SIGKILL);
+ }
+@@ -1261,6 +1265,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
+ case SECCOMP_RET_KILL_THREAD:
+ case SECCOMP_RET_KILL_PROCESS:
+ default:
++ current->seccomp.mode = SECCOMP_MODE_DEAD;
+ seccomp_log(this_syscall, SIGSYS, action, true);
+ /* Dump core only if this is the last remaining thread. */
+ if (action != SECCOMP_RET_KILL_THREAD ||
+@@ -1309,6 +1314,11 @@ int __secure_computing(const struct seccomp_data *sd)
+ return 0;
+ case SECCOMP_MODE_FILTER:
+ return __seccomp_filter(this_syscall, sd, false);
++ /* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */
++ case SECCOMP_MODE_DEAD:
++ WARN_ON_ONCE(1);
++ do_exit(SIGKILL);
++ return -1;
+ default:
+ BUG();
+ }
+diff --git a/kernel/signal.c b/kernel/signal.c
+index 487bf4f5dadf4..c7dbb19219b9a 100644
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -1298,6 +1298,12 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p
+ return ret;
+ }
+
++enum sig_handler {
++ HANDLER_CURRENT, /* If reachable use the current handler */
++ HANDLER_SIG_DFL, /* Always use SIG_DFL handler semantics */
++ HANDLER_EXIT, /* Only visible as the process exit code */
++};
++
+ /*
+ * Force a signal that the process can't ignore: if necessary
+ * we unblock the signal and change any SIG_IGN to SIG_DFL.
+@@ -1310,7 +1316,8 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p
+ * that is why we also clear SIGNAL_UNKILLABLE.
+ */
+ static int
+-force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool sigdfl)
++force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
++ enum sig_handler handler)
+ {
+ unsigned long int flags;
+ int ret, blocked, ignored;
+@@ -1321,8 +1328,10 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool
+ action = &t->sighand->action[sig-1];
+ ignored = action->sa.sa_handler == SIG_IGN;
+ blocked = sigismember(&t->blocked, sig);
+- if (blocked || ignored || sigdfl) {
++ if (blocked || ignored || (handler != HANDLER_CURRENT)) {
+ action->sa.sa_handler = SIG_DFL;
++ if (handler == HANDLER_EXIT)
++ action->sa.sa_flags |= SA_IMMUTABLE;
+ if (blocked) {
+ sigdelset(&t->blocked, sig);
+ recalc_sigpending_and_wake(t);
+@@ -1330,9 +1339,10 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool
+ }
+ /*
+ * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
+- * debugging to leave init killable.
++ * debugging to leave init killable. But HANDLER_EXIT is always fatal.
+ */
+- if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
++ if (action->sa.sa_handler == SIG_DFL &&
++ (!t->ptrace || (handler == HANDLER_EXIT)))
+ t->signal->flags &= ~SIGNAL_UNKILLABLE;
+ ret = send_signal(sig, info, t, PIDTYPE_PID);
+ spin_unlock_irqrestore(&t->sighand->siglock, flags);
+@@ -1342,7 +1352,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool
+
+ int force_sig_info(struct kernel_siginfo *info)
+ {
+- return force_sig_info_to_task(info, current, false);
++ return force_sig_info_to_task(info, current, HANDLER_CURRENT);
+ }
+
+ /*
+@@ -1649,6 +1659,32 @@ void force_sig(int sig)
+ }
+ EXPORT_SYMBOL(force_sig);
+
++void force_fatal_sig(int sig)
++{
++ struct kernel_siginfo info;
++
++ clear_siginfo(&info);
++ info.si_signo = sig;
++ info.si_errno = 0;
++ info.si_code = SI_KERNEL;
++ info.si_pid = 0;
++ info.si_uid = 0;
++ force_sig_info_to_task(&info, current, HANDLER_SIG_DFL);
++}
++
++void force_exit_sig(int sig)
++{
++ struct kernel_siginfo info;
++
++ clear_siginfo(&info);
++ info.si_signo = sig;
++ info.si_errno = 0;
++ info.si_code = SI_KERNEL;
++ info.si_pid = 0;
++ info.si_uid = 0;
++ force_sig_info_to_task(&info, current, HANDLER_EXIT);
++}
++
+ /*
+ * When things go south during signal handling, we
+ * will force a SIGSEGV. And if the signal that caused
+@@ -1657,15 +1693,10 @@ EXPORT_SYMBOL(force_sig);
+ */
+ void force_sigsegv(int sig)
+ {
+- struct task_struct *p = current;
+-
+- if (sig == SIGSEGV) {
+- unsigned long flags;
+- spin_lock_irqsave(&p->sighand->siglock, flags);
+- p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
+- spin_unlock_irqrestore(&p->sighand->siglock, flags);
+- }
+- force_sig(SIGSEGV);
++ if (sig == SIGSEGV)
++ force_fatal_sig(SIGSEGV);
++ else
++ force_sig(SIGSEGV);
+ }
+
+ int force_sig_fault_to_task(int sig, int code, void __user *addr
+@@ -1684,7 +1715,7 @@ int force_sig_fault_to_task(int sig, int code, void __user *addr
+ info.si_flags = flags;
+ info.si_isr = isr;
+ #endif
+- return force_sig_info_to_task(&info, t, false);
++ return force_sig_info_to_task(&info, t, HANDLER_CURRENT);
+ }
+
+ int force_sig_fault(int sig, int code, void __user *addr
+@@ -1771,7 +1802,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey)
+ }
+ #endif
+
+-int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
++int send_sig_perf(void __user *addr, u32 type, u64 sig_data)
+ {
+ struct kernel_siginfo info;
+
+@@ -1783,7 +1814,18 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
+ info.si_perf_data = sig_data;
+ info.si_perf_type = type;
+
+- return force_sig_info(&info);
++ /*
++ * Signals generated by perf events should not terminate the whole
++ * process if SIGTRAP is blocked, however, delivering the signal
++ * asynchronously is better than not delivering at all. But tell user
++ * space if the signal was asynchronous, so it can clearly be
++ * distinguished from normal synchronous ones.
++ */
++ info.si_perf_flags = sigismember(&current->blocked, info.si_signo) ?
++ TRAP_PERF_FLAG_ASYNC :
++ 0;
++
++ return send_sig_info(info.si_signo, &info, current);
+ }
+
+ /**
+@@ -1804,7 +1846,8 @@ int force_sig_seccomp(int syscall, int reason, bool force_coredump)
+ info.si_errno = reason;
+ info.si_arch = syscall_get_arch(current);
+ info.si_syscall = syscall;
+- return force_sig_info_to_task(&info, current, force_coredump);
++ return force_sig_info_to_task(&info, current,
++ force_coredump ? HANDLER_EXIT : HANDLER_CURRENT);
+ }
+
+ /* For the crazy architectures that include trap information in
+@@ -1984,12 +2027,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
+ bool autoreap = false;
+ u64 utime, stime;
+
+- BUG_ON(sig == -1);
++ WARN_ON_ONCE(sig == -1);
+
+- /* do_notify_parent_cldstop should have been called instead. */
+- BUG_ON(task_is_stopped_or_traced(tsk));
++ /* do_notify_parent_cldstop should have been called instead. */
++ WARN_ON_ONCE(task_is_stopped_or_traced(tsk));
+
+- BUG_ON(!tsk->ptrace &&
++ WARN_ON_ONCE(!tsk->ptrace &&
+ (tsk->group_leader != tsk || !thread_group_empty(tsk)));
+
+ /* Wake up all pidfd waiters */
+@@ -2169,15 +2212,6 @@ static inline bool may_ptrace_stop(void)
+ return true;
+ }
+
+-/*
+- * Return non-zero if there is a SIGKILL that should be waking us up.
+- * Called with the siglock held.
+- */
+-static bool sigkill_pending(struct task_struct *tsk)
+-{
+- return sigismember(&tsk->pending.signal, SIGKILL) ||
+- sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
+-}
+
+ /*
+ * This must be called with current->sighand->siglock held.
+@@ -2204,17 +2238,16 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
+ * calling arch_ptrace_stop, so we must release it now.
+ * To preserve proper semantics, we must do this before
+ * any signal bookkeeping like checking group_stop_count.
+- * Meanwhile, a SIGKILL could come in before we retake the
+- * siglock. That must prevent us from sleeping in TASK_TRACED.
+- * So after regaining the lock, we must check for SIGKILL.
+ */
+ spin_unlock_irq(&current->sighand->siglock);
+ arch_ptrace_stop(exit_code, info);
+ spin_lock_irq(&current->sighand->siglock);
+- if (sigkill_pending(current))
+- return;
+ }
+
++ /*
++ * schedule() will not sleep if there is a pending signal that
++ * can awaken the task.
++ */
+ set_special_state(TASK_TRACED);
+
+ /*
+@@ -2688,19 +2721,19 @@ relock:
+ goto relock;
+ }
+
+- /* Has this task already been marked for death? */
+- if (signal_group_exit(signal)) {
+- ksig->info.si_signo = signr = SIGKILL;
+- sigdelset(&current->pending.signal, SIGKILL);
+- trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
+- &sighand->action[SIGKILL - 1]);
+- recalc_sigpending();
+- goto fatal;
+- }
+-
+ for (;;) {
+ struct k_sigaction *ka;
+
++ /* Has this task already been marked for death? */
++ if (signal_group_exit(signal)) {
++ ksig->info.si_signo = signr = SIGKILL;
++ sigdelset(&current->pending.signal, SIGKILL);
++ trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
++ &sighand->action[SIGKILL - 1]);
++ recalc_sigpending();
++ goto fatal;
++ }
++
+ if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) &&
+ do_signal_stop(0))
+ goto relock;
+@@ -2739,7 +2772,8 @@ relock:
+ if (!signr)
+ break; /* will return 0 */
+
+- if (unlikely(current->ptrace) && signr != SIGKILL) {
++ if (unlikely(current->ptrace) && (signr != SIGKILL) &&
++ !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) {
+ signr = ptrace_signal(signr, &ksig->info);
+ if (!signr)
+ continue;
+@@ -3422,6 +3456,7 @@ void copy_siginfo_to_external32(struct compat_siginfo *to,
+ to->si_addr = ptr_to_compat(from->si_addr);
+ to->si_perf_data = from->si_perf_data;
+ to->si_perf_type = from->si_perf_type;
++ to->si_perf_flags = from->si_perf_flags;
+ break;
+ case SIL_CHLD:
+ to->si_pid = from->si_pid;
+@@ -3499,6 +3534,7 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
+ to->si_addr = compat_ptr(from->si_addr);
+ to->si_perf_data = from->si_perf_data;
+ to->si_perf_type = from->si_perf_type;
++ to->si_perf_flags = from->si_perf_flags;
+ break;
+ case SIL_CHLD:
+ to->si_pid = from->si_pid;
+@@ -4089,6 +4125,10 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
+ k = &p->sighand->action[sig-1];
+
+ spin_lock_irq(&p->sighand->siglock);
++ if (k->sa.sa_flags & SA_IMMUTABLE) {
++ spin_unlock_irq(&p->sighand->siglock);
++ return -EINVAL;
++ }
+ if (oact)
+ *oact = *k;
+
+@@ -4675,6 +4715,7 @@ static inline void siginfo_buildtime_checks(void)
+ CHECK_OFFSET(si_pkey);
+ CHECK_OFFSET(si_perf_data);
+ CHECK_OFFSET(si_perf_type);
++ CHECK_OFFSET(si_perf_flags);
+
+ /* sigpoll */
+ CHECK_OFFSET(si_band);
+diff --git a/kernel/smp.c b/kernel/smp.c
+index f43ede0ab183a..82825345432c5 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -174,9 +174,9 @@ static int __init csdlock_debug(char *str)
+ if (val)
+ static_branch_enable(&csdlock_debug_enabled);
+
+- return 0;
++ return 1;
+ }
+-early_param("csdlock_debug", csdlock_debug);
++__setup("csdlock_debug=", csdlock_debug);
+
+ static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
+ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
+@@ -579,7 +579,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
+
+ /* There shouldn't be any pending callbacks on an offline CPU. */
+ if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
+- !warned && !llist_empty(head))) {
++ !warned && entry != NULL)) {
+ warned = true;
+ WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
+
+diff --git a/kernel/softirq.c b/kernel/softirq.c
+index 322b65d456767..41f470929e991 100644
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -595,7 +595,8 @@ void irq_enter_rcu(void)
+ {
+ __irq_enter_raw();
+
+- if (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))
++ if (tick_nohz_full_cpu(smp_processor_id()) ||
++ (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
+ tick_irq_enter();
+
+ account_hardirq_enter(current);
+diff --git a/kernel/stackleak.c b/kernel/stackleak.c
+index ce161a8e8d975..dd07239ddff9f 100644
+--- a/kernel/stackleak.c
++++ b/kernel/stackleak.c
+@@ -48,7 +48,7 @@ int stack_erasing_sysctl(struct ctl_table *table, int write,
+ #define skip_erasing() false
+ #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
+
+-asmlinkage void notrace stackleak_erase(void)
++asmlinkage void noinstr stackleak_erase(void)
+ {
+ /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
+ unsigned long kstack_ptr = current->lowest_stack;
+@@ -102,9 +102,8 @@ asmlinkage void notrace stackleak_erase(void)
+ /* Reset the 'lowest_stack' value for the next syscall */
+ current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
+ }
+-NOKPROBE_SYMBOL(stackleak_erase);
+
+-void __used __no_caller_saved_registers notrace stackleak_track_stack(void)
++void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
+ {
+ unsigned long sp = current_stack_pointer;
+
+diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
+index 9f8117c7cfdde..9c625257023d2 100644
+--- a/kernel/stacktrace.c
++++ b/kernel/stacktrace.c
+@@ -13,6 +13,7 @@
+ #include <linux/export.h>
+ #include <linux/kallsyms.h>
+ #include <linux/stacktrace.h>
++#include <linux/interrupt.h>
+
+ /**
+ * stack_trace_print - Print the entries in the stack trace
+@@ -373,3 +374,32 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
+ #endif /* CONFIG_USER_STACKTRACE_SUPPORT */
+
+ #endif /* !CONFIG_ARCH_STACKWALK */
++
++static inline bool in_irqentry_text(unsigned long ptr)
++{
++ return (ptr >= (unsigned long)&__irqentry_text_start &&
++ ptr < (unsigned long)&__irqentry_text_end) ||
++ (ptr >= (unsigned long)&__softirqentry_text_start &&
++ ptr < (unsigned long)&__softirqentry_text_end);
++}
++
++/**
++ * filter_irq_stacks - Find first IRQ stack entry in trace
++ * @entries: Pointer to stack trace array
++ * @nr_entries: Number of entries in the storage array
++ *
++ * Return: Number of trace entries until IRQ stack starts.
++ */
++unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries)
++{
++ unsigned int i;
++
++ for (i = 0; i < nr_entries; i++) {
++ if (in_irqentry_text(entries[i])) {
++ /* Include the irqentry function into the stack. */
++ return i + 1;
++ }
++ }
++ return nr_entries;
++}
++EXPORT_SYMBOL_GPL(filter_irq_stacks);
+diff --git a/kernel/static_call.c b/kernel/static_call.c
+index 43ba0b1e0edbb..e9c3e69f38379 100644
+--- a/kernel/static_call.c
++++ b/kernel/static_call.c
+@@ -1,548 +1,8 @@
+ // SPDX-License-Identifier: GPL-2.0
+-#include <linux/init.h>
+ #include <linux/static_call.h>
+-#include <linux/bug.h>
+-#include <linux/smp.h>
+-#include <linux/sort.h>
+-#include <linux/slab.h>
+-#include <linux/module.h>
+-#include <linux/cpu.h>
+-#include <linux/processor.h>
+-#include <asm/sections.h>
+-
+-extern struct static_call_site __start_static_call_sites[],
+- __stop_static_call_sites[];
+-extern struct static_call_tramp_key __start_static_call_tramp_key[],
+- __stop_static_call_tramp_key[];
+-
+-static bool static_call_initialized;
+-
+-/* mutex to protect key modules/sites */
+-static DEFINE_MUTEX(static_call_mutex);
+-
+-static void static_call_lock(void)
+-{
+- mutex_lock(&static_call_mutex);
+-}
+-
+-static void static_call_unlock(void)
+-{
+- mutex_unlock(&static_call_mutex);
+-}
+-
+-static inline void *static_call_addr(struct static_call_site *site)
+-{
+- return (void *)((long)site->addr + (long)&site->addr);
+-}
+-
+-static inline unsigned long __static_call_key(const struct static_call_site *site)
+-{
+- return (long)site->key + (long)&site->key;
+-}
+-
+-static inline struct static_call_key *static_call_key(const struct static_call_site *site)
+-{
+- return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS);
+-}
+-
+-/* These assume the key is word-aligned. */
+-static inline bool static_call_is_init(struct static_call_site *site)
+-{
+- return __static_call_key(site) & STATIC_CALL_SITE_INIT;
+-}
+-
+-static inline bool static_call_is_tail(struct static_call_site *site)
+-{
+- return __static_call_key(site) & STATIC_CALL_SITE_TAIL;
+-}
+-
+-static inline void static_call_set_init(struct static_call_site *site)
+-{
+- site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) -
+- (long)&site->key;
+-}
+-
+-static int static_call_site_cmp(const void *_a, const void *_b)
+-{
+- const struct static_call_site *a = _a;
+- const struct static_call_site *b = _b;
+- const struct static_call_key *key_a = static_call_key(a);
+- const struct static_call_key *key_b = static_call_key(b);
+-
+- if (key_a < key_b)
+- return -1;
+-
+- if (key_a > key_b)
+- return 1;
+-
+- return 0;
+-}
+-
+-static void static_call_site_swap(void *_a, void *_b, int size)
+-{
+- long delta = (unsigned long)_a - (unsigned long)_b;
+- struct static_call_site *a = _a;
+- struct static_call_site *b = _b;
+- struct static_call_site tmp = *a;
+-
+- a->addr = b->addr - delta;
+- a->key = b->key - delta;
+-
+- b->addr = tmp.addr + delta;
+- b->key = tmp.key + delta;
+-}
+-
+-static inline void static_call_sort_entries(struct static_call_site *start,
+- struct static_call_site *stop)
+-{
+- sort(start, stop - start, sizeof(struct static_call_site),
+- static_call_site_cmp, static_call_site_swap);
+-}
+-
+-static inline bool static_call_key_has_mods(struct static_call_key *key)
+-{
+- return !(key->type & 1);
+-}
+-
+-static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
+-{
+- if (!static_call_key_has_mods(key))
+- return NULL;
+-
+- return key->mods;
+-}
+-
+-static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
+-{
+- if (static_call_key_has_mods(key))
+- return NULL;
+-
+- return (struct static_call_site *)(key->type & ~1);
+-}
+-
+-void __static_call_update(struct static_call_key *key, void *tramp, void *func)
+-{
+- struct static_call_site *site, *stop;
+- struct static_call_mod *site_mod, first;
+-
+- cpus_read_lock();
+- static_call_lock();
+-
+- if (key->func == func)
+- goto done;
+-
+- key->func = func;
+-
+- arch_static_call_transform(NULL, tramp, func, false);
+-
+- /*
+- * If uninitialized, we'll not update the callsites, but they still
+- * point to the trampoline and we just patched that.
+- */
+- if (WARN_ON_ONCE(!static_call_initialized))
+- goto done;
+-
+- first = (struct static_call_mod){
+- .next = static_call_key_next(key),
+- .mod = NULL,
+- .sites = static_call_key_sites(key),
+- };
+-
+- for (site_mod = &first; site_mod; site_mod = site_mod->next) {
+- bool init = system_state < SYSTEM_RUNNING;
+- struct module *mod = site_mod->mod;
+-
+- if (!site_mod->sites) {
+- /*
+- * This can happen if the static call key is defined in
+- * a module which doesn't use it.
+- *
+- * It also happens in the has_mods case, where the
+- * 'first' entry has no sites associated with it.
+- */
+- continue;
+- }
+-
+- stop = __stop_static_call_sites;
+-
+- if (mod) {
+-#ifdef CONFIG_MODULES
+- stop = mod->static_call_sites +
+- mod->num_static_call_sites;
+- init = mod->state == MODULE_STATE_COMING;
+-#endif
+- }
+-
+- for (site = site_mod->sites;
+- site < stop && static_call_key(site) == key; site++) {
+- void *site_addr = static_call_addr(site);
+-
+- if (!init && static_call_is_init(site))
+- continue;
+-
+- if (!kernel_text_address((unsigned long)site_addr)) {
+- /*
+- * This skips patching built-in __exit, which
+- * is part of init_section_contains() but is
+- * not part of kernel_text_address().
+- *
+- * Skipping built-in __exit is fine since it
+- * will never be executed.
+- */
+- WARN_ONCE(!static_call_is_init(site),
+- "can't patch static call site at %pS",
+- site_addr);
+- continue;
+- }
+-
+- arch_static_call_transform(site_addr, NULL, func,
+- static_call_is_tail(site));
+- }
+- }
+-
+-done:
+- static_call_unlock();
+- cpus_read_unlock();
+-}
+-EXPORT_SYMBOL_GPL(__static_call_update);
+-
+-static int __static_call_init(struct module *mod,
+- struct static_call_site *start,
+- struct static_call_site *stop)
+-{
+- struct static_call_site *site;
+- struct static_call_key *key, *prev_key = NULL;
+- struct static_call_mod *site_mod;
+-
+- if (start == stop)
+- return 0;
+-
+- static_call_sort_entries(start, stop);
+-
+- for (site = start; site < stop; site++) {
+- void *site_addr = static_call_addr(site);
+-
+- if ((mod && within_module_init((unsigned long)site_addr, mod)) ||
+- (!mod && init_section_contains(site_addr, 1)))
+- static_call_set_init(site);
+-
+- key = static_call_key(site);
+- if (key != prev_key) {
+- prev_key = key;
+-
+- /*
+- * For vmlinux (!mod) avoid the allocation by storing
+- * the sites pointer in the key itself. Also see
+- * __static_call_update()'s @first.
+- *
+- * This allows architectures (eg. x86) to call
+- * static_call_init() before memory allocation works.
+- */
+- if (!mod) {
+- key->sites = site;
+- key->type |= 1;
+- goto do_transform;
+- }
+-
+- site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+- if (!site_mod)
+- return -ENOMEM;
+-
+- /*
+- * When the key has a direct sites pointer, extract
+- * that into an explicit struct static_call_mod, so we
+- * can have a list of modules.
+- */
+- if (static_call_key_sites(key)) {
+- site_mod->mod = NULL;
+- site_mod->next = NULL;
+- site_mod->sites = static_call_key_sites(key);
+-
+- key->mods = site_mod;
+-
+- site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+- if (!site_mod)
+- return -ENOMEM;
+- }
+-
+- site_mod->mod = mod;
+- site_mod->sites = site;
+- site_mod->next = static_call_key_next(key);
+- key->mods = site_mod;
+- }
+-
+-do_transform:
+- arch_static_call_transform(site_addr, NULL, key->func,
+- static_call_is_tail(site));
+- }
+-
+- return 0;
+-}
+-
+-static int addr_conflict(struct static_call_site *site, void *start, void *end)
+-{
+- unsigned long addr = (unsigned long)static_call_addr(site);
+-
+- if (addr <= (unsigned long)end &&
+- addr + CALL_INSN_SIZE > (unsigned long)start)
+- return 1;
+-
+- return 0;
+-}
+-
+-static int __static_call_text_reserved(struct static_call_site *iter_start,
+- struct static_call_site *iter_stop,
+- void *start, void *end, bool init)
+-{
+- struct static_call_site *iter = iter_start;
+-
+- while (iter < iter_stop) {
+- if (init || !static_call_is_init(iter)) {
+- if (addr_conflict(iter, start, end))
+- return 1;
+- }
+- iter++;
+- }
+-
+- return 0;
+-}
+-
+-#ifdef CONFIG_MODULES
+-
+-static int __static_call_mod_text_reserved(void *start, void *end)
+-{
+- struct module *mod;
+- int ret;
+-
+- preempt_disable();
+- mod = __module_text_address((unsigned long)start);
+- WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
+- if (!try_module_get(mod))
+- mod = NULL;
+- preempt_enable();
+-
+- if (!mod)
+- return 0;
+-
+- ret = __static_call_text_reserved(mod->static_call_sites,
+- mod->static_call_sites + mod->num_static_call_sites,
+- start, end, mod->state == MODULE_STATE_COMING);
+-
+- module_put(mod);
+-
+- return ret;
+-}
+-
+-static unsigned long tramp_key_lookup(unsigned long addr)
+-{
+- struct static_call_tramp_key *start = __start_static_call_tramp_key;
+- struct static_call_tramp_key *stop = __stop_static_call_tramp_key;
+- struct static_call_tramp_key *tramp_key;
+-
+- for (tramp_key = start; tramp_key != stop; tramp_key++) {
+- unsigned long tramp;
+-
+- tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp;
+- if (tramp == addr)
+- return (long)tramp_key->key + (long)&tramp_key->key;
+- }
+-
+- return 0;
+-}
+-
+-static int static_call_add_module(struct module *mod)
+-{
+- struct static_call_site *start = mod->static_call_sites;
+- struct static_call_site *stop = start + mod->num_static_call_sites;
+- struct static_call_site *site;
+-
+- for (site = start; site != stop; site++) {
+- unsigned long s_key = __static_call_key(site);
+- unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
+- unsigned long key;
+-
+- /*
+- * Is the key is exported, 'addr' points to the key, which
+- * means modules are allowed to call static_call_update() on
+- * it.
+- *
+- * Otherwise, the key isn't exported, and 'addr' points to the
+- * trampoline so we need to lookup the key.
+- *
+- * We go through this dance to prevent crazy modules from
+- * abusing sensitive static calls.
+- */
+- if (!kernel_text_address(addr))
+- continue;
+-
+- key = tramp_key_lookup(addr);
+- if (!key) {
+- pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n",
+- static_call_addr(site));
+- return -EINVAL;
+- }
+-
+- key |= s_key & STATIC_CALL_SITE_FLAGS;
+- site->key = key - (long)&site->key;
+- }
+-
+- return __static_call_init(mod, start, stop);
+-}
+-
+-static void static_call_del_module(struct module *mod)
+-{
+- struct static_call_site *start = mod->static_call_sites;
+- struct static_call_site *stop = mod->static_call_sites +
+- mod->num_static_call_sites;
+- struct static_call_key *key, *prev_key = NULL;
+- struct static_call_mod *site_mod, **prev;
+- struct static_call_site *site;
+-
+- for (site = start; site < stop; site++) {
+- key = static_call_key(site);
+- if (key == prev_key)
+- continue;
+-
+- prev_key = key;
+-
+- for (prev = &key->mods, site_mod = key->mods;
+- site_mod && site_mod->mod != mod;
+- prev = &site_mod->next, site_mod = site_mod->next)
+- ;
+-
+- if (!site_mod)
+- continue;
+-
+- *prev = site_mod->next;
+- kfree(site_mod);
+- }
+-}
+-
+-static int static_call_module_notify(struct notifier_block *nb,
+- unsigned long val, void *data)
+-{
+- struct module *mod = data;
+- int ret = 0;
+-
+- cpus_read_lock();
+- static_call_lock();
+-
+- switch (val) {
+- case MODULE_STATE_COMING:
+- ret = static_call_add_module(mod);
+- if (ret) {
+- WARN(1, "Failed to allocate memory for static calls");
+- static_call_del_module(mod);
+- }
+- break;
+- case MODULE_STATE_GOING:
+- static_call_del_module(mod);
+- break;
+- }
+-
+- static_call_unlock();
+- cpus_read_unlock();
+-
+- return notifier_from_errno(ret);
+-}
+-
+-static struct notifier_block static_call_module_nb = {
+- .notifier_call = static_call_module_notify,
+-};
+-
+-#else
+-
+-static inline int __static_call_mod_text_reserved(void *start, void *end)
+-{
+- return 0;
+-}
+-
+-#endif /* CONFIG_MODULES */
+-
+-int static_call_text_reserved(void *start, void *end)
+-{
+- bool init = system_state < SYSTEM_RUNNING;
+- int ret = __static_call_text_reserved(__start_static_call_sites,
+- __stop_static_call_sites, start, end, init);
+-
+- if (ret)
+- return ret;
+-
+- return __static_call_mod_text_reserved(start, end);
+-}
+-
+-int __init static_call_init(void)
+-{
+- int ret;
+-
+- if (static_call_initialized)
+- return 0;
+-
+- cpus_read_lock();
+- static_call_lock();
+- ret = __static_call_init(NULL, __start_static_call_sites,
+- __stop_static_call_sites);
+- static_call_unlock();
+- cpus_read_unlock();
+-
+- if (ret) {
+- pr_err("Failed to allocate memory for static_call!\n");
+- BUG();
+- }
+-
+- static_call_initialized = true;
+-
+-#ifdef CONFIG_MODULES
+- register_module_notifier(&static_call_module_nb);
+-#endif
+- return 0;
+-}
+-early_initcall(static_call_init);
+
+ long __static_call_return0(void)
+ {
+ return 0;
+ }
+-
+-#ifdef CONFIG_STATIC_CALL_SELFTEST
+-
+-static int func_a(int x)
+-{
+- return x+1;
+-}
+-
+-static int func_b(int x)
+-{
+- return x+2;
+-}
+-
+-DEFINE_STATIC_CALL(sc_selftest, func_a);
+-
+-static struct static_call_data {
+- int (*func)(int);
+- int val;
+- int expect;
+-} static_call_data [] __initdata = {
+- { NULL, 2, 3 },
+- { func_b, 2, 4 },
+- { func_a, 2, 3 }
+-};
+-
+-static int __init test_static_call_init(void)
+-{
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) {
+- struct static_call_data *scd = &static_call_data[i];
+-
+- if (scd->func)
+- static_call_update(sc_selftest, scd->func);
+-
+- WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect);
+- }
+-
+- return 0;
+-}
+-early_initcall(test_static_call_init);
+-
+-#endif /* CONFIG_STATIC_CALL_SELFTEST */
++EXPORT_SYMBOL_GPL(__static_call_return0);
+diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
+new file mode 100644
+index 0000000000000..dc5665b628140
+--- /dev/null
++++ b/kernel/static_call_inline.c
+@@ -0,0 +1,543 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <linux/init.h>
++#include <linux/static_call.h>
++#include <linux/bug.h>
++#include <linux/smp.h>
++#include <linux/sort.h>
++#include <linux/slab.h>
++#include <linux/module.h>
++#include <linux/cpu.h>
++#include <linux/processor.h>
++#include <asm/sections.h>
++
++extern struct static_call_site __start_static_call_sites[],
++ __stop_static_call_sites[];
++extern struct static_call_tramp_key __start_static_call_tramp_key[],
++ __stop_static_call_tramp_key[];
++
++static bool static_call_initialized;
++
++/* mutex to protect key modules/sites */
++static DEFINE_MUTEX(static_call_mutex);
++
++static void static_call_lock(void)
++{
++ mutex_lock(&static_call_mutex);
++}
++
++static void static_call_unlock(void)
++{
++ mutex_unlock(&static_call_mutex);
++}
++
++static inline void *static_call_addr(struct static_call_site *site)
++{
++ return (void *)((long)site->addr + (long)&site->addr);
++}
++
++static inline unsigned long __static_call_key(const struct static_call_site *site)
++{
++ return (long)site->key + (long)&site->key;
++}
++
++static inline struct static_call_key *static_call_key(const struct static_call_site *site)
++{
++ return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS);
++}
++
++/* These assume the key is word-aligned. */
++static inline bool static_call_is_init(struct static_call_site *site)
++{
++ return __static_call_key(site) & STATIC_CALL_SITE_INIT;
++}
++
++static inline bool static_call_is_tail(struct static_call_site *site)
++{
++ return __static_call_key(site) & STATIC_CALL_SITE_TAIL;
++}
++
++static inline void static_call_set_init(struct static_call_site *site)
++{
++ site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) -
++ (long)&site->key;
++}
++
++static int static_call_site_cmp(const void *_a, const void *_b)
++{
++ const struct static_call_site *a = _a;
++ const struct static_call_site *b = _b;
++ const struct static_call_key *key_a = static_call_key(a);
++ const struct static_call_key *key_b = static_call_key(b);
++
++ if (key_a < key_b)
++ return -1;
++
++ if (key_a > key_b)
++ return 1;
++
++ return 0;
++}
++
++static void static_call_site_swap(void *_a, void *_b, int size)
++{
++ long delta = (unsigned long)_a - (unsigned long)_b;
++ struct static_call_site *a = _a;
++ struct static_call_site *b = _b;
++ struct static_call_site tmp = *a;
++
++ a->addr = b->addr - delta;
++ a->key = b->key - delta;
++
++ b->addr = tmp.addr + delta;
++ b->key = tmp.key + delta;
++}
++
++static inline void static_call_sort_entries(struct static_call_site *start,
++ struct static_call_site *stop)
++{
++ sort(start, stop - start, sizeof(struct static_call_site),
++ static_call_site_cmp, static_call_site_swap);
++}
++
++static inline bool static_call_key_has_mods(struct static_call_key *key)
++{
++ return !(key->type & 1);
++}
++
++static inline struct static_call_mod *static_call_key_next(struct static_call_key *key)
++{
++ if (!static_call_key_has_mods(key))
++ return NULL;
++
++ return key->mods;
++}
++
++static inline struct static_call_site *static_call_key_sites(struct static_call_key *key)
++{
++ if (static_call_key_has_mods(key))
++ return NULL;
++
++ return (struct static_call_site *)(key->type & ~1);
++}
++
++void __static_call_update(struct static_call_key *key, void *tramp, void *func)
++{
++ struct static_call_site *site, *stop;
++ struct static_call_mod *site_mod, first;
++
++ cpus_read_lock();
++ static_call_lock();
++
++ if (key->func == func)
++ goto done;
++
++ key->func = func;
++
++ arch_static_call_transform(NULL, tramp, func, false);
++
++ /*
++ * If uninitialized, we'll not update the callsites, but they still
++ * point to the trampoline and we just patched that.
++ */
++ if (WARN_ON_ONCE(!static_call_initialized))
++ goto done;
++
++ first = (struct static_call_mod){
++ .next = static_call_key_next(key),
++ .mod = NULL,
++ .sites = static_call_key_sites(key),
++ };
++
++ for (site_mod = &first; site_mod; site_mod = site_mod->next) {
++ bool init = system_state < SYSTEM_RUNNING;
++ struct module *mod = site_mod->mod;
++
++ if (!site_mod->sites) {
++ /*
++ * This can happen if the static call key is defined in
++ * a module which doesn't use it.
++ *
++ * It also happens in the has_mods case, where the
++ * 'first' entry has no sites associated with it.
++ */
++ continue;
++ }
++
++ stop = __stop_static_call_sites;
++
++ if (mod) {
++#ifdef CONFIG_MODULES
++ stop = mod->static_call_sites +
++ mod->num_static_call_sites;
++ init = mod->state == MODULE_STATE_COMING;
++#endif
++ }
++
++ for (site = site_mod->sites;
++ site < stop && static_call_key(site) == key; site++) {
++ void *site_addr = static_call_addr(site);
++
++ if (!init && static_call_is_init(site))
++ continue;
++
++ if (!kernel_text_address((unsigned long)site_addr)) {
++ /*
++ * This skips patching built-in __exit, which
++ * is part of init_section_contains() but is
++ * not part of kernel_text_address().
++ *
++ * Skipping built-in __exit is fine since it
++ * will never be executed.
++ */
++ WARN_ONCE(!static_call_is_init(site),
++ "can't patch static call site at %pS",
++ site_addr);
++ continue;
++ }
++
++ arch_static_call_transform(site_addr, NULL, func,
++ static_call_is_tail(site));
++ }
++ }
++
++done:
++ static_call_unlock();
++ cpus_read_unlock();
++}
++EXPORT_SYMBOL_GPL(__static_call_update);
++
++static int __static_call_init(struct module *mod,
++ struct static_call_site *start,
++ struct static_call_site *stop)
++{
++ struct static_call_site *site;
++ struct static_call_key *key, *prev_key = NULL;
++ struct static_call_mod *site_mod;
++
++ if (start == stop)
++ return 0;
++
++ static_call_sort_entries(start, stop);
++
++ for (site = start; site < stop; site++) {
++ void *site_addr = static_call_addr(site);
++
++ if ((mod && within_module_init((unsigned long)site_addr, mod)) ||
++ (!mod && init_section_contains(site_addr, 1)))
++ static_call_set_init(site);
++
++ key = static_call_key(site);
++ if (key != prev_key) {
++ prev_key = key;
++
++ /*
++ * For vmlinux (!mod) avoid the allocation by storing
++ * the sites pointer in the key itself. Also see
++ * __static_call_update()'s @first.
++ *
++ * This allows architectures (eg. x86) to call
++ * static_call_init() before memory allocation works.
++ */
++ if (!mod) {
++ key->sites = site;
++ key->type |= 1;
++ goto do_transform;
++ }
++
++ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
++ if (!site_mod)
++ return -ENOMEM;
++
++ /*
++ * When the key has a direct sites pointer, extract
++ * that into an explicit struct static_call_mod, so we
++ * can have a list of modules.
++ */
++ if (static_call_key_sites(key)) {
++ site_mod->mod = NULL;
++ site_mod->next = NULL;
++ site_mod->sites = static_call_key_sites(key);
++
++ key->mods = site_mod;
++
++ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
++ if (!site_mod)
++ return -ENOMEM;
++ }
++
++ site_mod->mod = mod;
++ site_mod->sites = site;
++ site_mod->next = static_call_key_next(key);
++ key->mods = site_mod;
++ }
++
++do_transform:
++ arch_static_call_transform(site_addr, NULL, key->func,
++ static_call_is_tail(site));
++ }
++
++ return 0;
++}
++
++static int addr_conflict(struct static_call_site *site, void *start, void *end)
++{
++ unsigned long addr = (unsigned long)static_call_addr(site);
++
++ if (addr <= (unsigned long)end &&
++ addr + CALL_INSN_SIZE > (unsigned long)start)
++ return 1;
++
++ return 0;
++}
++
++static int __static_call_text_reserved(struct static_call_site *iter_start,
++ struct static_call_site *iter_stop,
++ void *start, void *end, bool init)
++{
++ struct static_call_site *iter = iter_start;
++
++ while (iter < iter_stop) {
++ if (init || !static_call_is_init(iter)) {
++ if (addr_conflict(iter, start, end))
++ return 1;
++ }
++ iter++;
++ }
++
++ return 0;
++}
++
++#ifdef CONFIG_MODULES
++
++static int __static_call_mod_text_reserved(void *start, void *end)
++{
++ struct module *mod;
++ int ret;
++
++ preempt_disable();
++ mod = __module_text_address((unsigned long)start);
++ WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
++ if (!try_module_get(mod))
++ mod = NULL;
++ preempt_enable();
++
++ if (!mod)
++ return 0;
++
++ ret = __static_call_text_reserved(mod->static_call_sites,
++ mod->static_call_sites + mod->num_static_call_sites,
++ start, end, mod->state == MODULE_STATE_COMING);
++
++ module_put(mod);
++
++ return ret;
++}
++
++static unsigned long tramp_key_lookup(unsigned long addr)
++{
++ struct static_call_tramp_key *start = __start_static_call_tramp_key;
++ struct static_call_tramp_key *stop = __stop_static_call_tramp_key;
++ struct static_call_tramp_key *tramp_key;
++
++ for (tramp_key = start; tramp_key != stop; tramp_key++) {
++ unsigned long tramp;
++
++ tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp;
++ if (tramp == addr)
++ return (long)tramp_key->key + (long)&tramp_key->key;
++ }
++
++ return 0;
++}
++
++static int static_call_add_module(struct module *mod)
++{
++ struct static_call_site *start = mod->static_call_sites;
++ struct static_call_site *stop = start + mod->num_static_call_sites;
++ struct static_call_site *site;
++
++ for (site = start; site != stop; site++) {
++ unsigned long s_key = __static_call_key(site);
++ unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
++ unsigned long key;
++
++ /*
++ * Is the key is exported, 'addr' points to the key, which
++ * means modules are allowed to call static_call_update() on
++ * it.
++ *
++ * Otherwise, the key isn't exported, and 'addr' points to the
++ * trampoline so we need to lookup the key.
++ *
++ * We go through this dance to prevent crazy modules from
++ * abusing sensitive static calls.
++ */
++ if (!kernel_text_address(addr))
++ continue;
++
++ key = tramp_key_lookup(addr);
++ if (!key) {
++ pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n",
++ static_call_addr(site));
++ return -EINVAL;
++ }
++
++ key |= s_key & STATIC_CALL_SITE_FLAGS;
++ site->key = key - (long)&site->key;
++ }
++
++ return __static_call_init(mod, start, stop);
++}
++
++static void static_call_del_module(struct module *mod)
++{
++ struct static_call_site *start = mod->static_call_sites;
++ struct static_call_site *stop = mod->static_call_sites +
++ mod->num_static_call_sites;
++ struct static_call_key *key, *prev_key = NULL;
++ struct static_call_mod *site_mod, **prev;
++ struct static_call_site *site;
++
++ for (site = start; site < stop; site++) {
++ key = static_call_key(site);
++ if (key == prev_key)
++ continue;
++
++ prev_key = key;
++
++ for (prev = &key->mods, site_mod = key->mods;
++ site_mod && site_mod->mod != mod;
++ prev = &site_mod->next, site_mod = site_mod->next)
++ ;
++
++ if (!site_mod)
++ continue;
++
++ *prev = site_mod->next;
++ kfree(site_mod);
++ }
++}
++
++static int static_call_module_notify(struct notifier_block *nb,
++ unsigned long val, void *data)
++{
++ struct module *mod = data;
++ int ret = 0;
++
++ cpus_read_lock();
++ static_call_lock();
++
++ switch (val) {
++ case MODULE_STATE_COMING:
++ ret = static_call_add_module(mod);
++ if (ret) {
++ WARN(1, "Failed to allocate memory for static calls");
++ static_call_del_module(mod);
++ }
++ break;
++ case MODULE_STATE_GOING:
++ static_call_del_module(mod);
++ break;
++ }
++
++ static_call_unlock();
++ cpus_read_unlock();
++
++ return notifier_from_errno(ret);
++}
++
++static struct notifier_block static_call_module_nb = {
++ .notifier_call = static_call_module_notify,
++};
++
++#else
++
++static inline int __static_call_mod_text_reserved(void *start, void *end)
++{
++ return 0;
++}
++
++#endif /* CONFIG_MODULES */
++
++int static_call_text_reserved(void *start, void *end)
++{
++ bool init = system_state < SYSTEM_RUNNING;
++ int ret = __static_call_text_reserved(__start_static_call_sites,
++ __stop_static_call_sites, start, end, init);
++
++ if (ret)
++ return ret;
++
++ return __static_call_mod_text_reserved(start, end);
++}
++
++int __init static_call_init(void)
++{
++ int ret;
++
++ if (static_call_initialized)
++ return 0;
++
++ cpus_read_lock();
++ static_call_lock();
++ ret = __static_call_init(NULL, __start_static_call_sites,
++ __stop_static_call_sites);
++ static_call_unlock();
++ cpus_read_unlock();
++
++ if (ret) {
++ pr_err("Failed to allocate memory for static_call!\n");
++ BUG();
++ }
++
++ static_call_initialized = true;
++
++#ifdef CONFIG_MODULES
++ register_module_notifier(&static_call_module_nb);
++#endif
++ return 0;
++}
++early_initcall(static_call_init);
++
++#ifdef CONFIG_STATIC_CALL_SELFTEST
++
++static int func_a(int x)
++{
++ return x+1;
++}
++
++static int func_b(int x)
++{
++ return x+2;
++}
++
++DEFINE_STATIC_CALL(sc_selftest, func_a);
++
++static struct static_call_data {
++ int (*func)(int);
++ int val;
++ int expect;
++} static_call_data [] __initdata = {
++ { NULL, 2, 3 },
++ { func_b, 2, 4 },
++ { func_a, 2, 3 }
++};
++
++static int __init test_static_call_init(void)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) {
++ struct static_call_data *scd = &static_call_data[i];
++
++ if (scd->func)
++ static_call_update(sc_selftest, scd->func);
++
++ WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect);
++ }
++
++ return 0;
++}
++early_initcall(test_static_call_init);
++
++#endif /* CONFIG_STATIC_CALL_SELFTEST */
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 8fdac0d90504a..2efab44746356 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -472,6 +472,16 @@ static int set_user(struct cred *new)
+ if (!new_user)
+ return -EAGAIN;
+
++ free_uid(new->user);
++ new->user = new_user;
++ return 0;
++}
++
++static void flag_nproc_exceeded(struct cred *new)
++{
++ if (new->ucounts == current_ucounts())
++ return;
++
+ /*
+ * We don't fail in case of NPROC limit excess here because too many
+ * poorly written programs don't check set*uid() return code, assuming
+@@ -480,15 +490,10 @@ static int set_user(struct cred *new)
+ * failure to the execve() stage.
+ */
+ if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
+- new_user != INIT_USER &&
+- !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
++ new->user != INIT_USER)
+ current->flags |= PF_NPROC_EXCEEDED;
+ else
+ current->flags &= ~PF_NPROC_EXCEEDED;
+-
+- free_uid(new->user);
+- new->user = new_user;
+- return 0;
+ }
+
+ /*
+@@ -563,6 +568,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
+ if (retval < 0)
+ goto error;
+
++ flag_nproc_exceeded(new);
+ return commit_creds(new);
+
+ error:
+@@ -625,6 +631,7 @@ long __sys_setuid(uid_t uid)
+ if (retval < 0)
+ goto error;
+
++ flag_nproc_exceeded(new);
+ return commit_creds(new);
+
+ error:
+@@ -649,6 +656,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+ struct cred *new;
+ int retval;
+ kuid_t kruid, keuid, ksuid;
++ bool ruid_new, euid_new, suid_new;
+
+ kruid = make_kuid(ns, ruid);
+ keuid = make_kuid(ns, euid);
+@@ -663,25 +671,29 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+ if ((suid != (uid_t) -1) && !uid_valid(ksuid))
+ return -EINVAL;
+
++ old = current_cred();
++
++ /* check for no-op */
++ if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) &&
++ (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) &&
++ uid_eq(keuid, old->fsuid))) &&
++ (suid == (uid_t) -1 || uid_eq(ksuid, old->suid)))
++ return 0;
++
++ ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
++ !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid);
++ euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
++ !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid);
++ suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
++ !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid);
++ if ((ruid_new || euid_new || suid_new) &&
++ !ns_capable_setid(old->user_ns, CAP_SETUID))
++ return -EPERM;
++
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+- old = current_cred();
+-
+- retval = -EPERM;
+- if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
+- if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
+- !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
+- goto error;
+- if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
+- !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
+- goto error;
+- if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
+- !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
+- goto error;
+- }
+-
+ if (ruid != (uid_t) -1) {
+ new->uid = kruid;
+ if (!uid_eq(kruid, old->uid)) {
+@@ -704,6 +716,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+ if (retval < 0)
+ goto error;
+
++ flag_nproc_exceeded(new);
+ return commit_creds(new);
+
+ error:
+@@ -745,6 +758,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
+ struct cred *new;
+ int retval;
+ kgid_t krgid, kegid, ksgid;
++ bool rgid_new, egid_new, sgid_new;
+
+ krgid = make_kgid(ns, rgid);
+ kegid = make_kgid(ns, egid);
+@@ -757,23 +771,28 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
+ if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
+ return -EINVAL;
+
++ old = current_cred();
++
++ /* check for no-op */
++ if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) &&
++ (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) &&
++ gid_eq(kegid, old->fsgid))) &&
++ (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid)))
++ return 0;
++
++ rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
++ !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid);
++ egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
++ !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid);
++ sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
++ !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid);
++ if ((rgid_new || egid_new || sgid_new) &&
++ !ns_capable_setid(old->user_ns, CAP_SETGID))
++ return -EPERM;
++
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+- old = current_cred();
+-
+- retval = -EPERM;
+- if (!ns_capable_setid(old->user_ns, CAP_SETGID)) {
+- if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
+- !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
+- goto error;
+- if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
+- !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
+- goto error;
+- if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
+- !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
+- goto error;
+- }
+
+ if (rgid != (gid_t) -1)
+ new->gid = krgid;
+@@ -1567,6 +1586,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource,
+
+ if (resource >= RLIM_NLIMITS)
+ return -EINVAL;
++ resource = array_index_nospec(resource, RLIM_NLIMITS);
++
+ if (new_rlim) {
+ if (new_rlim->rlim_cur > new_rlim->rlim_max)
+ return -EINVAL;
+diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
+index f43d89d92860d..126380696f9c5 100644
+--- a/kernel/sys_ni.c
++++ b/kernel/sys_ni.c
+@@ -276,6 +276,7 @@ COND_SYSCALL(landlock_restrict_self);
+
+ /* mm/fadvise.c */
+ COND_SYSCALL(fadvise64_64);
++COND_SYSCALL_COMPAT(fadvise64_64);
+
+ /* mm/, CONFIG_MMU only */
+ COND_SYSCALL(swapon);
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 083be6af29d70..928798f89ca1d 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -113,15 +113,9 @@
+ static int sixty = 60;
+ #endif
+
+-static int __maybe_unused neg_one = -1;
+-static int __maybe_unused two = 2;
+-static int __maybe_unused four = 4;
+ static unsigned long zero_ul;
+ static unsigned long one_ul = 1;
+ static unsigned long long_max = LONG_MAX;
+-static int one_hundred = 100;
+-static int two_hundred = 200;
+-static int one_thousand = 1000;
+ #ifdef CONFIG_PRINTK
+ static int ten_thousand = 10000;
+ #endif
+@@ -228,6 +222,10 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
+ return ret;
+ }
+
++void __weak unpriv_ebpf_notify(int new_state)
++{
++}
++
+ static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+ {
+@@ -245,6 +243,9 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
++
++ unpriv_ebpf_notify(unpriv_enable);
++
+ return ret;
+ }
+ #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
+@@ -378,13 +379,14 @@ int proc_dostring(struct ctl_table *table, int write,
+ ppos);
+ }
+
+-static size_t proc_skip_spaces(char **buf)
++static void proc_skip_spaces(char **buf, size_t *size)
+ {
+- size_t ret;
+- char *tmp = skip_spaces(*buf);
+- ret = tmp - *buf;
+- *buf = tmp;
+- return ret;
++ while (*size) {
++ if (!isspace(**buf))
++ break;
++ (*size)--;
++ (*buf)++;
++ }
+ }
+
+ static void proc_skip_char(char **buf, size_t *size, const char v)
+@@ -453,13 +455,12 @@ static int proc_get_long(char **buf, size_t *size,
+ unsigned long *val, bool *neg,
+ const char *perm_tr, unsigned perm_tr_len, char *tr)
+ {
+- int len;
+ char *p, tmp[TMPBUFLEN];
++ ssize_t len = *size;
+
+- if (!*size)
++ if (len <= 0)
+ return -EINVAL;
+
+- len = *size;
+ if (len > TMPBUFLEN - 1)
+ len = TMPBUFLEN - 1;
+
+@@ -559,14 +560,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
+ if (*negp) {
+ if (*lvalp > (unsigned long) INT_MAX + 1)
+ return -EINVAL;
+- *valp = -*lvalp;
++ WRITE_ONCE(*valp, -*lvalp);
+ } else {
+ if (*lvalp > (unsigned long) INT_MAX)
+ return -EINVAL;
+- *valp = *lvalp;
++ WRITE_ONCE(*valp, *lvalp);
+ }
+ } else {
+- int val = *valp;
++ int val = READ_ONCE(*valp);
+ if (val < 0) {
+ *negp = true;
+ *lvalp = -(unsigned long)val;
+@@ -585,9 +586,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp,
+ if (write) {
+ if (*lvalp > UINT_MAX)
+ return -EINVAL;
+- *valp = *lvalp;
++ WRITE_ONCE(*valp, *lvalp);
+ } else {
+- unsigned int val = *valp;
++ unsigned int val = READ_ONCE(*valp);
+ *lvalp = (unsigned long)val;
+ }
+ return 0;
+@@ -632,7 +633,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
+ bool neg;
+
+ if (write) {
+- left -= proc_skip_spaces(&p);
++ proc_skip_spaces(&p, &left);
+
+ if (!left)
+ break;
+@@ -659,7 +660,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
+ if (!write && !first && left && !err)
+ proc_put_char(&buffer, &left, '\n');
+ if (write && !err && left)
+- left -= proc_skip_spaces(&p);
++ proc_skip_spaces(&p, &left);
+ if (write && first)
+ return err ? : -EINVAL;
+ *lenp -= left;
+@@ -701,7 +702,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
+ if (left > PAGE_SIZE - 1)
+ left = PAGE_SIZE - 1;
+
+- left -= proc_skip_spaces(&p);
++ proc_skip_spaces(&p, &left);
+ if (!left) {
+ err = -EINVAL;
+ goto out_free;
+@@ -721,7 +722,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
+ }
+
+ if (!err && left)
+- left -= proc_skip_spaces(&p);
++ proc_skip_spaces(&p, &left);
+
+ out_free:
+ if (err)
+@@ -981,7 +982,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
+ if ((param->min && *param->min > tmp) ||
+ (param->max && *param->max < tmp))
+ return -EINVAL;
+- *valp = tmp;
++ WRITE_ONCE(*valp, tmp);
+ }
+
+ return 0;
+@@ -1047,7 +1048,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+ (param->max && *param->max < tmp))
+ return -ERANGE;
+
+- *valp = tmp;
++ WRITE_ONCE(*valp, tmp);
+ }
+
+ return 0;
+@@ -1131,13 +1132,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write,
+
+ tmp.maxlen = sizeof(val);
+ tmp.data = &val;
+- val = *data;
++ val = READ_ONCE(*data);
+ res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
+ do_proc_douintvec_minmax_conv, &param);
+ if (res)
+ return res;
+ if (write)
+- *data = val;
++ WRITE_ONCE(*data, val);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
+@@ -1258,7 +1259,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
+ if (write) {
+ bool neg;
+
+- left -= proc_skip_spaces(&p);
++ proc_skip_spaces(&p, &left);
+ if (!left)
+ break;
+
+@@ -1274,9 +1275,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
+ err = -EINVAL;
+ break;
+ }
+- *i = val;
++ WRITE_ONCE(*i, val);
+ } else {
+- val = convdiv * (*i) / convmul;
++ val = convdiv * READ_ONCE(*i) / convmul;
+ if (!first)
+ proc_put_char(&buffer, &left, '\t');
+ proc_put_long(&buffer, &left, val, false);
+@@ -1286,7 +1287,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
+ if (!write && !first && left && !err)
+ proc_put_char(&buffer, &left, '\n');
+ if (write && !err)
+- left -= proc_skip_spaces(&p);
++ proc_skip_spaces(&p, &left);
+ if (write && first)
+ return err ? : -EINVAL;
+ *lenp -= left;
+@@ -1357,9 +1358,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
+ if (write) {
+ if (*lvalp > INT_MAX / HZ)
+ return 1;
+- *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
++ if (*negp)
++ WRITE_ONCE(*valp, -*lvalp * HZ);
++ else
++ WRITE_ONCE(*valp, *lvalp * HZ);
+ } else {
+- int val = *valp;
++ int val = READ_ONCE(*valp);
+ unsigned long lval;
+ if (val < 0) {
+ *negp = true;
+@@ -1405,9 +1409,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
+
+ if (jif > INT_MAX)
+ return 1;
+- *valp = (int)jif;
++ WRITE_ONCE(*valp, (int)jif);
+ } else {
+- int val = *valp;
++ int val = READ_ONCE(*valp);
+ unsigned long lval;
+ if (val < 0) {
+ *negp = true;
+@@ -1475,8 +1479,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
+ * @ppos: the current position in the file
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+- * values from/to the user buffer, treated as an ASCII string.
+- * The values read are assumed to be in 1/1000 seconds, and
++ * values from/to the user buffer, treated as an ASCII string.
++ * The values read are assumed to be in 1/1000 seconds, and
+ * are converted into jiffies.
+ *
+ * Returns 0 on success.
+@@ -1962,7 +1966,7 @@ static struct ctl_table kern_table[] = {
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+- .extra1 = &neg_one,
++ .extra1 = SYSCTL_NEG_ONE,
+ .extra2 = SYSCTL_ONE,
+ },
+ #endif
+@@ -2216,17 +2220,6 @@ static struct ctl_table kern_table[] = {
+ .proc_handler = proc_dointvec,
+ },
+ #endif
+-#ifdef CONFIG_SMP
+- {
+- .procname = "oops_all_cpu_backtrace",
+- .data = &sysctl_oops_all_cpu_backtrace,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = SYSCTL_ZERO,
+- .extra2 = SYSCTL_ONE,
+- },
+-#endif /* CONFIG_SMP */
+ {
+ .procname = "pid_max",
+ .data = &pid_max,
+@@ -2304,7 +2297,7 @@ static struct ctl_table kern_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_sysadmin,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two,
++ .extra2 = SYSCTL_TWO,
+ },
+ #endif
+ {
+@@ -2564,7 +2557,7 @@ static struct ctl_table kern_table[] = {
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+- .extra1 = &neg_one,
++ .extra1 = SYSCTL_NEG_ONE,
+ },
+ #endif
+ #ifdef CONFIG_RT_MUTEXES
+@@ -2626,7 +2619,7 @@ static struct ctl_table kern_table[] = {
+ .mode = 0644,
+ .proc_handler = perf_cpu_time_max_percent_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &one_hundred,
++ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "perf_event_max_stack",
+@@ -2644,7 +2637,7 @@ static struct ctl_table kern_table[] = {
+ .mode = 0644,
+ .proc_handler = perf_event_max_stack_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &one_thousand,
++ .extra2 = SYSCTL_ONE_THOUSAND,
+ },
+ #endif
+ {
+@@ -2675,7 +2668,7 @@ static struct ctl_table kern_table[] = {
+ .mode = 0644,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two,
++ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "bpf_stats_enabled",
+@@ -2729,7 +2722,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = overcommit_policy_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two,
++ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "panic_on_oom",
+@@ -2738,7 +2731,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two,
++ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "oom_kill_allocating_task",
+@@ -2783,7 +2776,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = dirty_background_ratio_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &one_hundred,
++ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "dirty_background_bytes",
+@@ -2800,7 +2793,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = dirty_ratio_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &one_hundred,
++ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "dirty_bytes",
+@@ -2840,8 +2833,19 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two_hundred,
++ .extra2 = SYSCTL_TWO_HUNDRED,
++ },
++#ifdef CONFIG_NUMA
++ {
++ .procname = "numa_stat",
++ .data = &sysctl_vm_numa_stat,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = sysctl_vm_numa_stat_handler,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = SYSCTL_ONE,
+ },
++#endif
+ #ifdef CONFIG_HUGETLB_PAGE
+ {
+ .procname = "nr_hugepages",
+@@ -2858,15 +2862,6 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = &hugetlb_mempolicy_sysctl_handler,
+ },
+- {
+- .procname = "numa_stat",
+- .data = &sysctl_vm_numa_stat,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = sysctl_vm_numa_stat_handler,
+- .extra1 = SYSCTL_ZERO,
+- .extra2 = SYSCTL_ONE,
+- },
+ #endif
+ {
+ .procname = "hugetlb_shm_group",
+@@ -2897,7 +2892,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0200,
+ .proc_handler = drop_caches_sysctl_handler,
+ .extra1 = SYSCTL_ONE,
+- .extra2 = &four,
++ .extra2 = SYSCTL_FOUR,
+ },
+ #ifdef CONFIG_COMPACTION
+ {
+@@ -2914,7 +2909,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = compaction_proactiveness_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &one_hundred,
++ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "extfrag_threshold",
+@@ -2959,7 +2954,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = watermark_scale_factor_sysctl_handler,
+ .extra1 = SYSCTL_ONE,
+- .extra2 = &one_thousand,
++ .extra2 = SYSCTL_THREE_THOUSAND,
+ },
+ {
+ .procname = "percpu_pagelist_high_fraction",
+@@ -3038,7 +3033,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &one_hundred,
++ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "min_slab_ratio",
+@@ -3047,7 +3042,7 @@ static struct ctl_table vm_table[] = {
+ .mode = 0644,
+ .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &one_hundred,
++ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ #endif
+ #ifdef CONFIG_SMP
+@@ -3337,7 +3332,7 @@ static struct ctl_table fs_table[] = {
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two,
++ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "protected_regular",
+@@ -3346,7 +3341,7 @@ static struct ctl_table fs_table[] = {
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two,
++ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "suid_dumpable",
+@@ -3355,7 +3350,7 @@ static struct ctl_table fs_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_coredump,
+ .extra1 = SYSCTL_ZERO,
+- .extra2 = &two,
++ .extra2 = SYSCTL_TWO,
+ },
+ #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+ {
+diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
+index 5897828b9d7ed..7e5dff602585d 100644
+--- a/kernel/time/alarmtimer.c
++++ b/kernel/time/alarmtimer.c
+@@ -470,11 +470,35 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
+ }
+ EXPORT_SYMBOL_GPL(alarm_forward);
+
+-u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
++static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throttle)
+ {
+ struct alarm_base *base = &alarm_bases[alarm->type];
++ ktime_t now = base->get_ktime();
++
++ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && throttle) {
++ /*
++ * Same issue as with posix_timer_fn(). Timers which are
++ * periodic but the signal is ignored can starve the system
++ * with a very small interval. The real fix which was
++ * promised in the context of posix_timer_fn() never
++ * materialized, but someone should really work on it.
++ *
++ * To prevent DOS fake @now to be 1 jiffie out which keeps
++ * the overrun accounting correct but creates an
++ * inconsistency vs. timer_gettime(2).
++ */
++ ktime_t kj = NSEC_PER_SEC / HZ;
++
++ if (interval < kj)
++ now = ktime_add(now, kj);
++ }
++
++ return alarm_forward(alarm, now, interval);
++}
+
+- return alarm_forward(alarm, base->get_ktime(), interval);
++u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
++{
++ return __alarm_forward_now(alarm, interval, false);
+ }
+ EXPORT_SYMBOL_GPL(alarm_forward_now);
+
+@@ -551,9 +575,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
+ if (posix_timer_event(ptr, si_private) && ptr->it_interval) {
+ /*
+ * Handle ignored signals and rearm the timer. This will go
+- * away once we handle ignored signals proper.
++ * away once we handle ignored signals proper. Ensure that
++ * small intervals cannot starve the system.
+ */
+- ptr->it_overrun += alarm_forward_now(alarm, ptr->it_interval);
++ ptr->it_overrun += __alarm_forward_now(alarm, ptr->it_interval, true);
+ ++ptr->it_requeue_pending;
+ ptr->it_active = 1;
+ result = ALARMTIMER_RESTART;
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index b8a14d2fb5ba6..97ec98041f926 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -107,7 +107,7 @@ static u64 suspend_start;
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as
+ * a lower bound for cs->uncertainty_margin values when registering clocks.
+ */
+-#define WATCHDOG_MAX_SKEW (50 * NSEC_PER_USEC)
++#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+
+ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
+ static void clocksource_watchdog_work(struct work_struct *work);
+@@ -205,17 +205,24 @@ EXPORT_SYMBOL_GPL(max_cswd_read_retries);
+ static int verify_n_cpus = 8;
+ module_param(verify_n_cpus, int, 0644);
+
+-static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
++enum wd_read_status {
++ WD_READ_SUCCESS,
++ WD_READ_UNSTABLE,
++ WD_READ_SKIP
++};
++
++static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+ {
+ unsigned int nretries;
+- u64 wd_end, wd_delta;
+- int64_t wd_delay;
++ u64 wd_end, wd_end2, wd_delta;
++ int64_t wd_delay, wd_seq_delay;
+
+ for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
+ local_irq_disable();
+ *wdnow = watchdog->read(watchdog);
+ *csnow = cs->read(cs);
+ wd_end = watchdog->read(watchdog);
++ wd_end2 = watchdog->read(watchdog);
+ local_irq_enable();
+
+ wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
+@@ -226,13 +233,34 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+ pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
+ smp_processor_id(), watchdog->name, nretries);
+ }
+- return true;
++ return WD_READ_SUCCESS;
+ }
++
++ /*
++ * Now compute delay in consecutive watchdog read to see if
++ * there is too much external interferences that cause
++ * significant delay in reading both clocksource and watchdog.
++ *
++ * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2,
++ * report system busy, reinit the watchdog and skip the current
++ * watchdog test.
++ */
++ wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
++ wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
++ if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
++ goto skip_test;
+ }
+
+ pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
+ smp_processor_id(), watchdog->name, wd_delay, nretries);
+- return false;
++ return WD_READ_UNSTABLE;
++
++skip_test:
++ pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
++ smp_processor_id(), watchdog->name, wd_seq_delay);
++ pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
++ cs->name, wd_delay);
++ return WD_READ_SKIP;
+ }
+
+ static u64 csnow_mid;
+@@ -350,12 +378,23 @@ void clocksource_verify_percpu(struct clocksource *cs)
+ }
+ EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
+
++static inline void clocksource_reset_watchdog(void)
++{
++ struct clocksource *cs;
++
++ list_for_each_entry(cs, &watchdog_list, wd_list)
++ cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
++}
++
++
+ static void clocksource_watchdog(struct timer_list *unused)
+ {
+ u64 csnow, wdnow, cslast, wdlast, delta;
+ int next_cpu, reset_pending;
+ int64_t wd_nsec, cs_nsec;
+ struct clocksource *cs;
++ enum wd_read_status read_ret;
++ unsigned long extra_wait = 0;
+ u32 md;
+
+ spin_lock(&watchdog_lock);
+@@ -373,12 +412,32 @@ static void clocksource_watchdog(struct timer_list *unused)
+ continue;
+ }
+
+- if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
++ read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
++
++ if (read_ret == WD_READ_UNSTABLE) {
+ /* Clock readout unreliable, so give it up. */
+ __clocksource_unstable(cs);
+ continue;
+ }
+
++ /*
++ * When WD_READ_SKIP is returned, it means the system is likely
++ * under very heavy load, where the latency of reading
++ * watchdog/clocksource is very big, and affect the accuracy of
++ * watchdog check. So give system some space and suspend the
++ * watchdog check for 5 minutes.
++ */
++ if (read_ret == WD_READ_SKIP) {
++ /*
++ * As the watchdog timer will be suspended, and
++ * cs->last could keep unchanged for 5 minutes, reset
++ * the counters.
++ */
++ clocksource_reset_watchdog();
++ extra_wait = HZ * 300;
++ break;
++ }
++
+ /* Clocksource initialized ? */
+ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
+ atomic_read(&watchdog_reset_pending)) {
+@@ -474,7 +533,7 @@ static void clocksource_watchdog(struct timer_list *unused)
+ * pair clocksource_stop_watchdog() clocksource_start_watchdog().
+ */
+ if (!timer_pending(&watchdog_timer)) {
+- watchdog_timer.expires += WATCHDOG_INTERVAL;
++ watchdog_timer.expires += WATCHDOG_INTERVAL + extra_wait;
+ add_timer_on(&watchdog_timer, next_cpu);
+ }
+ out:
+@@ -499,14 +558,6 @@ static inline void clocksource_stop_watchdog(void)
+ watchdog_running = 0;
+ }
+
+-static inline void clocksource_reset_watchdog(void)
+-{
+- struct clocksource *cs;
+-
+- list_for_each_entry(cs, &watchdog_list, wd_list)
+- cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
+-}
+-
+ static void clocksource_resume_watchdog(void)
+ {
+ atomic_inc(&watchdog_reset_pending);
+diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
+index 0ea8702eb5163..97409581e9dac 100644
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -2126,6 +2126,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
+ if (!timespec64_valid(&tu))
+ return -EINVAL;
+
++ current->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
+ current->restart_block.nanosleep.rmtp = rmtp;
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
+@@ -2147,6 +2148,7 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
+ if (!timespec64_valid(&tu))
+ return -EINVAL;
+
++ current->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
+ current->restart_block.nanosleep.compat_rmtp = rmtp;
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
+@@ -2311,6 +2313,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
+
+ return !t.task ? 0 : -EINTR;
+ }
++EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
+
+ /**
+ * schedule_hrtimeout_range - sleep until timeout
+diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
+index 643d412ac6235..6b6271387de89 100644
+--- a/kernel/time/posix-cpu-timers.c
++++ b/kernel/time/posix-cpu-timers.c
+@@ -840,6 +840,8 @@ static u64 collect_timerqueue(struct timerqueue_head *head,
+ return expires;
+
+ ctmr->firing = 1;
++ /* See posix_cpu_timer_wait_running() */
++ rcu_assign_pointer(ctmr->handling, current);
+ cpu_timer_dequeue(ctmr);
+ list_add_tail(&ctmr->elist, firing);
+ }
+@@ -1155,7 +1157,66 @@ static void handle_posix_cpu_timers(struct task_struct *tsk);
+ #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+ static void posix_cpu_timers_work(struct callback_head *work)
+ {
++ struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work);
++
++ mutex_lock(&cw->mutex);
+ handle_posix_cpu_timers(current);
++ mutex_unlock(&cw->mutex);
++}
++
++/*
++ * Invoked from the posix-timer core when a cancel operation failed because
++ * the timer is marked firing. The caller holds rcu_read_lock(), which
++ * protects the timer and the task which is expiring it from being freed.
++ */
++static void posix_cpu_timer_wait_running(struct k_itimer *timr)
++{
++ struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling);
++
++ /* Has the handling task completed expiry already? */
++ if (!tsk)
++ return;
++
++ /* Ensure that the task cannot go away */
++ get_task_struct(tsk);
++ /* Now drop the RCU protection so the mutex can be locked */
++ rcu_read_unlock();
++ /* Wait on the expiry mutex */
++ mutex_lock(&tsk->posix_cputimers_work.mutex);
++ /* Release it immediately again. */
++ mutex_unlock(&tsk->posix_cputimers_work.mutex);
++ /* Drop the task reference. */
++ put_task_struct(tsk);
++ /* Relock RCU so the callsite is balanced */
++ rcu_read_lock();
++}
++
++static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
++{
++ /* Ensure that timr->it.cpu.handling task cannot go away */
++ rcu_read_lock();
++ spin_unlock_irq(&timr->it_lock);
++ posix_cpu_timer_wait_running(timr);
++ rcu_read_unlock();
++ /* @timr is on stack and is valid */
++ spin_lock_irq(&timr->it_lock);
++}
++
++/*
++ * Clear existing posix CPU timers task work.
++ */
++void clear_posix_cputimers_work(struct task_struct *p)
++{
++ /*
++ * A copied work entry from the old task is not meaningful, clear it.
++ * N.B. init_task_work will not do this.
++ */
++ memset(&p->posix_cputimers_work.work, 0,
++ sizeof(p->posix_cputimers_work.work));
++ init_task_work(&p->posix_cputimers_work.work,
++ posix_cpu_timers_work);
++ mutex_init(&p->posix_cputimers_work.mutex);
++ p->posix_cputimers_work.scheduled = false;
+ }
+
+ /*
+@@ -1164,8 +1225,7 @@ static void posix_cpu_timers_work(struct callback_head *work)
+ */
+ void __init posix_cputimers_init_work(void)
+ {
+- init_task_work(&current->posix_cputimers_work.work,
+- posix_cpu_timers_work);
++ clear_posix_cputimers_work(current);
+ }
+
+ /*
+@@ -1234,6 +1294,18 @@ static inline void __run_posix_cpu_timers(struct task_struct *tsk)
+ lockdep_posixtimer_exit();
+ }
+
++static void posix_cpu_timer_wait_running(struct k_itimer *timr)
++{
++ cpu_relax();
++}
++
++static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
++{
++ spin_unlock_irq(&timr->it_lock);
++ cpu_relax();
++ spin_lock_irq(&timr->it_lock);
++}
++
+ static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
+ {
+ return false;
+@@ -1342,6 +1414,8 @@ static void handle_posix_cpu_timers(struct task_struct *tsk)
+ */
+ if (likely(cpu_firing >= 0))
+ cpu_timer_fire(timer);
++ /* See posix_cpu_timer_wait_running() */
++ rcu_assign_pointer(timer->it.cpu.handling, NULL);
+ spin_unlock(&timer->it_lock);
+ }
+ }
+@@ -1476,23 +1550,16 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
+ expires = cpu_timer_getexpires(&timer.it.cpu);
+ error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
+ if (!error) {
+- /*
+- * Timer is now unarmed, deletion can not fail.
+- */
++ /* Timer is now unarmed, deletion can not fail. */
+ posix_cpu_timer_del(&timer);
++ } else {
++ while (error == TIMER_RETRY) {
++ posix_cpu_timer_wait_running_nsleep(&timer);
++ error = posix_cpu_timer_del(&timer);
++ }
+ }
+- spin_unlock_irq(&timer.it_lock);
+
+- while (error == TIMER_RETRY) {
+- /*
+- * We need to handle case when timer was or is in the
+- * middle of firing. In other cases we already freed
+- * resources.
+- */
+- spin_lock_irq(&timer.it_lock);
+- error = posix_cpu_timer_del(&timer);
+- spin_unlock_irq(&timer.it_lock);
+- }
++ spin_unlock_irq(&timer.it_lock);
+
+ if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
+ /*
+@@ -1602,6 +1669,7 @@ const struct k_clock clock_posix_cpu = {
+ .timer_del = posix_cpu_timer_del,
+ .timer_get = posix_cpu_timer_get,
+ .timer_rearm = posix_cpu_timer_rearm,
++ .timer_wait_running = posix_cpu_timer_wait_running,
+ };
+
+ const struct k_clock clock_process = {
+diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
+index fcb3b21d8bdcd..3783d07d60ba0 100644
+--- a/kernel/time/posix-stubs.c
++++ b/kernel/time/posix-stubs.c
+@@ -146,6 +146,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
+ return -EINVAL;
+ if (flags & TIMER_ABSTIME)
+ rmtp = NULL;
++ current->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
+ current->restart_block.nanosleep.rmtp = rmtp;
+ texp = timespec64_to_ktime(t);
+@@ -239,6 +240,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
+ return -EINVAL;
+ if (flags & TIMER_ABSTIME)
+ rmtp = NULL;
++ current->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
+ current->restart_block.nanosleep.compat_rmtp = rmtp;
+ texp = timespec64_to_ktime(t);
+diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
+index 1cd10b102c51c..ed3c4a9543982 100644
+--- a/kernel/time/posix-timers.c
++++ b/kernel/time/posix-timers.c
+@@ -846,6 +846,10 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer,
+ rcu_read_lock();
+ unlock_timer(timer, *flags);
+
++ /*
++ * kc->timer_wait_running() might drop RCU lock. So @timer
++ * cannot be touched anymore after the function returns!
++ */
+ if (!WARN_ON_ONCE(!kc->timer_wait_running))
+ kc->timer_wait_running(timer);
+
+@@ -1033,33 +1037,69 @@ retry_delete:
+ }
+
+ /*
+- * return timer owned by the process, used by exit_itimers
++ * Delete a timer if it is armed, remove it from the hash and schedule it
++ * for RCU freeing.
+ */
+ static void itimer_delete(struct k_itimer *timer)
+ {
+-retry_delete:
+- spin_lock_irq(&timer->it_lock);
++ unsigned long flags;
+
++ /*
++ * irqsave is required to make timer_wait_running() work.
++ */
++ spin_lock_irqsave(&timer->it_lock, flags);
++
++retry_delete:
++ /*
++ * Even if the timer is not longer accessible from other tasks
++ * it still might be armed and queued in the underlying timer
++ * mechanism. Worse, that timer mechanism might run the expiry
++ * function concurrently.
++ */
+ if (timer_delete_hook(timer) == TIMER_RETRY) {
+- spin_unlock_irq(&timer->it_lock);
++ /*
++ * Timer is expired concurrently, prevent livelocks
++ * and pointless spinning on RT.
++ *
++ * timer_wait_running() drops timer::it_lock, which opens
++ * the possibility for another task to delete the timer.
++ *
++ * That's not possible here because this is invoked from
++ * do_exit() only for the last thread of the thread group.
++ * So no other task can access and delete that timer.
++ */
++ if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
++ return;
++
+ goto retry_delete;
+ }
+ list_del(&timer->list);
+
+- spin_unlock_irq(&timer->it_lock);
++ spin_unlock_irqrestore(&timer->it_lock, flags);
+ release_posix_timer(timer, IT_ID_SET);
+ }
+
+ /*
+- * This is called by do_exit or de_thread, only when there are no more
+- * references to the shared signal_struct.
++ * Invoked from do_exit() when the last thread of a thread group exits.
++ * At that point no other task can access the timers of the dying
++ * task anymore.
+ */
+-void exit_itimers(struct signal_struct *sig)
++void exit_itimers(struct task_struct *tsk)
+ {
++ struct list_head timers;
+ struct k_itimer *tmr;
+
+- while (!list_empty(&sig->posix_timers)) {
+- tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
++ if (list_empty(&tsk->signal->posix_timers))
++ return;
++
++ /* Protect against concurrent read via /proc/$PID/timers */
++ spin_lock_irq(&tsk->sighand->siglock);
++ list_replace_init(&tsk->signal->posix_timers, &timers);
++ spin_unlock_irq(&tsk->sighand->siglock);
++
++ /* The timers are not longer accessible via tsk::signal */
++ while (!list_empty(&timers)) {
++ tmr = list_first_entry(&timers, struct k_itimer, list);
+ itimer_delete(tmr);
+ }
+ }
+@@ -1261,6 +1301,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
+ return -EINVAL;
+ if (flags & TIMER_ABSTIME)
+ rmtp = NULL;
++ current->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
+ current->restart_block.nanosleep.rmtp = rmtp;
+
+@@ -1288,6 +1329,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
+ return -EINVAL;
+ if (flags & TIMER_ABSTIME)
+ rmtp = NULL;
++ current->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
+ current->restart_block.nanosleep.compat_rmtp = rmtp;
+
+diff --git a/kernel/time/test_udelay.c b/kernel/time/test_udelay.c
+index 13b11eb62685e..20d5df631570e 100644
+--- a/kernel/time/test_udelay.c
++++ b/kernel/time/test_udelay.c
+@@ -149,7 +149,7 @@ module_init(udelay_test_init);
+ static void __exit udelay_test_exit(void)
+ {
+ mutex_lock(&udelay_test_lock);
+- debugfs_remove(debugfs_lookup(DEBUGFS_FILENAME, NULL));
++ debugfs_lookup_and_remove(DEBUGFS_FILENAME, NULL);
+ mutex_unlock(&udelay_test_lock);
+ }
+
+diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
+index f7fe6fe361731..0916cc9adb828 100644
+--- a/kernel/time/tick-broadcast.c
++++ b/kernel/time/tick-broadcast.c
+@@ -35,14 +35,15 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
+ #ifdef CONFIG_TICK_ONESHOT
+ static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
+
+-static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
++static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic);
+ static void tick_broadcast_clear_oneshot(int cpu);
+ static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+ # ifdef CONFIG_HOTPLUG_CPU
+ static void tick_broadcast_oneshot_offline(unsigned int cpu);
+ # endif
+ #else
+-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
++static inline void
++tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); }
+ static inline void tick_broadcast_clear_oneshot(int cpu) { }
+ static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
+ # ifdef CONFIG_HOTPLUG_CPU
+@@ -264,7 +265,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
+ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+ tick_broadcast_start_periodic(bc);
+ else
+- tick_broadcast_setup_oneshot(bc);
++ tick_broadcast_setup_oneshot(bc, false);
+ ret = 1;
+ } else {
+ /*
+@@ -500,7 +501,7 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
+ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+ tick_broadcast_start_periodic(bc);
+ else
+- tick_broadcast_setup_oneshot(bc);
++ tick_broadcast_setup_oneshot(bc, false);
+ }
+ }
+ out:
+@@ -1016,48 +1017,101 @@ static inline ktime_t tick_get_next_period(void)
+ /**
+ * tick_broadcast_setup_oneshot - setup the broadcast device
+ */
+-static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
++static void tick_broadcast_setup_oneshot(struct clock_event_device *bc,
++ bool from_periodic)
+ {
+ int cpu = smp_processor_id();
++ ktime_t nexttick = 0;
+
+ if (!bc)
+ return;
+
+- /* Set it up only once ! */
+- if (bc->event_handler != tick_handle_oneshot_broadcast) {
+- int was_periodic = clockevent_state_periodic(bc);
+-
+- bc->event_handler = tick_handle_oneshot_broadcast;
+-
++ /*
++ * When the broadcast device was switched to oneshot by the first
++ * CPU handling the NOHZ change, the other CPUs will reach this
++ * code via hrtimer_run_queues() -> tick_check_oneshot_change()
++ * too. Set up the broadcast device only once!
++ */
++ if (bc->event_handler == tick_handle_oneshot_broadcast) {
+ /*
+- * We must be careful here. There might be other CPUs
+- * waiting for periodic broadcast. We need to set the
+- * oneshot_mask bits for those and program the
+- * broadcast device to fire.
++ * The CPU which switched from periodic to oneshot mode
++ * set the broadcast oneshot bit for all other CPUs which
++ * are in the general (periodic) broadcast mask to ensure
++ * that CPUs which wait for the periodic broadcast are
++ * woken up.
++ *
++ * Clear the bit for the local CPU as the set bit would
++ * prevent the first tick_broadcast_enter() after this CPU
++ * switched to oneshot state to program the broadcast
++ * device.
++ *
++ * This code can also be reached via tick_broadcast_control(),
++ * but this cannot avoid the tick_broadcast_clear_oneshot()
++ * as that would break the periodic to oneshot transition of
++ * secondary CPUs. But that's harmless as the below only
++ * clears already cleared bits.
+ */
++ tick_broadcast_clear_oneshot(cpu);
++ return;
++ }
++
++
++ bc->event_handler = tick_handle_oneshot_broadcast;
++ bc->next_event = KTIME_MAX;
++
++ /*
++ * When the tick mode is switched from periodic to oneshot it must
++ * be ensured that CPUs which are waiting for periodic broadcast
++ * get their wake-up at the next tick. This is achieved by ORing
++ * tick_broadcast_mask into tick_broadcast_oneshot_mask.
++ *
++ * For other callers, e.g. broadcast device replacement,
++ * tick_broadcast_oneshot_mask must not be touched as this would
++ * set bits for CPUs which are already NOHZ, but not idle. Their
++ * next tick_broadcast_enter() would observe the bit set and fail
++ * to update the expiry time and the broadcast event device.
++ */
++ if (from_periodic) {
+ cpumask_copy(tmpmask, tick_broadcast_mask);
++ /* Remove the local CPU as it is obviously not idle */
+ cpumask_clear_cpu(cpu, tmpmask);
+- cpumask_or(tick_broadcast_oneshot_mask,
+- tick_broadcast_oneshot_mask, tmpmask);
++ cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask);
+
+- if (was_periodic && !cpumask_empty(tmpmask)) {
+- ktime_t nextevt = tick_get_next_period();
++ /*
++ * Ensure that the oneshot broadcast handler will wake the
++ * CPUs which are still waiting for periodic broadcast.
++ */
++ nexttick = tick_get_next_period();
++ tick_broadcast_init_next_event(tmpmask, nexttick);
+
+- clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
+- tick_broadcast_init_next_event(tmpmask, nextevt);
+- tick_broadcast_set_event(bc, cpu, nextevt);
+- } else
+- bc->next_event = KTIME_MAX;
+- } else {
+ /*
+- * The first cpu which switches to oneshot mode sets
+- * the bit for all other cpus which are in the general
+- * (periodic) broadcast mask. So the bit is set and
+- * would prevent the first broadcast enter after this
+- * to program the bc device.
++ * If the underlying broadcast clock event device is
++ * already in oneshot state, then there is nothing to do.
++ * The device was already armed for the next tick
++ * in tick_handle_broadcast_periodic()
+ */
+- tick_broadcast_clear_oneshot(cpu);
++ if (clockevent_state_oneshot(bc))
++ return;
+ }
++
++ /*
++ * When switching from periodic to oneshot mode arm the broadcast
++ * device for the next tick.
++ *
++ * If the broadcast device has been replaced in oneshot mode and
++ * the oneshot broadcast mask is not empty, then arm it to expire
++ * immediately in order to reevaluate the next expiring timer.
++ * @nexttick is 0 and therefore in the past which will cause the
++ * clockevent code to force an event.
++ *
++ * For both cases the programming can be avoided when the oneshot
++ * broadcast mask is empty.
++ *
++ * tick_broadcast_set_event() implicitly switches the broadcast
++ * device to oneshot state.
++ */
++ if (!cpumask_empty(tick_broadcast_oneshot_mask))
++ tick_broadcast_set_event(bc, cpu, nexttick);
+ }
+
+ /*
+@@ -1066,14 +1120,16 @@ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+ void tick_broadcast_switch_to_oneshot(void)
+ {
+ struct clock_event_device *bc;
++ enum tick_device_mode oldmode;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+
++ oldmode = tick_broadcast_device.mode;
+ tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
+ bc = tick_broadcast_device.evtdev;
+ if (bc)
+- tick_broadcast_setup_oneshot(bc);
++ tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC);
+
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ }
+diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
+index 46789356f856e..e9138cd7a0f52 100644
+--- a/kernel/time/tick-common.c
++++ b/kernel/time/tick-common.c
+@@ -219,7 +219,6 @@ static void tick_setup_device(struct tick_device *td,
+ */
+ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
+ tick_do_timer_cpu = cpu;
+-
+ tick_next_period = ktime_get();
+ #ifdef CONFIG_NO_HZ_FULL
+ /*
+diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
+index 6bffe5af8cb11..7f5310d1a4d6a 100644
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -161,14 +161,27 @@ static ktime_t tick_init_jiffy_update(void)
+ raw_spin_lock(&jiffies_lock);
+ write_seqcount_begin(&jiffies_seq);
+ /* Did we start the jiffies update yet ? */
+- if (last_jiffies_update == 0)
++ if (last_jiffies_update == 0) {
++ u32 rem;
++
++ /*
++ * Ensure that the tick is aligned to a multiple of
++ * TICK_NSEC.
++ */
++ div_u64_rem(tick_next_period, TICK_NSEC, &rem);
++ if (rem)
++ tick_next_period += TICK_NSEC - rem;
++
+ last_jiffies_update = tick_next_period;
++ }
+ period = last_jiffies_update;
+ write_seqcount_end(&jiffies_seq);
+ raw_spin_unlock(&jiffies_lock);
+ return period;
+ }
+
++#define MAX_STALLED_JIFFIES 5
++
+ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
+ {
+ int cpu = smp_processor_id();
+@@ -186,7 +199,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
+ */
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ #ifdef CONFIG_NO_HZ_FULL
+- WARN_ON(tick_nohz_full_running);
++ WARN_ON_ONCE(tick_nohz_full_running);
+ #endif
+ tick_do_timer_cpu = cpu;
+ }
+@@ -196,6 +209,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
+ if (tick_do_timer_cpu == cpu)
+ tick_do_update_jiffies64(now);
+
++ /*
++ * If jiffies update stalled for too long (timekeeper in stop_machine()
++ * or VMEXIT'ed for several msecs), force an update.
++ */
++ if (ts->last_tick_jiffies != jiffies) {
++ ts->stalled_jiffies = 0;
++ ts->last_tick_jiffies = READ_ONCE(jiffies);
++ } else {
++ if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
++ tick_do_update_jiffies64(now);
++ ts->stalled_jiffies = 0;
++ ts->last_tick_jiffies = READ_ONCE(jiffies);
++ }
++ }
++
+ if (ts->inidle)
+ ts->got_idle_tick = 1;
+ }
+@@ -264,6 +292,11 @@ static bool check_tick_dependency(atomic_t *dep)
+ return true;
+ }
+
++ if (val & TICK_DEP_MASK_RCU_EXP) {
++ trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP);
++ return true;
++ }
++
+ return false;
+ }
+
+@@ -509,9 +542,8 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
+ cpumask_copy(tick_nohz_full_mask, cpumask);
+ tick_nohz_full_running = true;
+ }
+-EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
+
+-static int tick_nohz_cpu_down(unsigned int cpu)
++bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
+ {
+ /*
+ * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
+@@ -519,8 +551,13 @@ static int tick_nohz_cpu_down(unsigned int cpu)
+ * CPUs. It must remain online when nohz full is enabled.
+ */
+ if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+- return -EBUSY;
+- return 0;
++ return false;
++ return true;
++}
++
++static int tick_nohz_cpu_down(unsigned int cpu)
++{
++ return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
+ }
+
+ void __init tick_nohz_init(void)
+@@ -913,6 +950,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
+ if (unlikely(expires == KTIME_MAX)) {
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ hrtimer_cancel(&ts->sched_timer);
++ else
++ tick_program_event(KTIME_MAX, 1);
+ return;
+ }
+
+@@ -1319,9 +1358,15 @@ static void tick_nohz_handler(struct clock_event_device *dev)
+ tick_sched_do_timer(ts, now);
+ tick_sched_handle(ts, regs);
+
+- /* No need to reprogram if we are running tickless */
+- if (unlikely(ts->tick_stopped))
++ if (unlikely(ts->tick_stopped)) {
++ /*
++ * The clockevent device is not reprogrammed, so change the
++ * clock event device to ONESHOT_STOPPED to avoid spurious
++ * interrupts on devices which might not be truly one shot.
++ */
++ tick_program_event(KTIME_MAX, 1);
+ return;
++ }
+
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
+ tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+@@ -1375,6 +1420,13 @@ static inline void tick_nohz_irq_enter(void)
+ now = ktime_get();
+ if (ts->idle_active)
+ tick_nohz_stop_idle(ts, now);
++ /*
++ * If all CPUs are idle. We may need to update a stale jiffies value.
++ * Note nohz_full is a special case: a timekeeper is guaranteed to stay
++ * alive but it might be busy looping with interrupts disabled in some
++ * rare case (typically stop machine). So we must make sure we have a
++ * last resort.
++ */
+ if (ts->tick_stopped)
+ tick_nohz_update_jiffies(now);
+ }
+diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
+index d952ae3934236..504649513399b 100644
+--- a/kernel/time/tick-sched.h
++++ b/kernel/time/tick-sched.h
+@@ -49,6 +49,8 @@ enum tick_nohz_mode {
+ * @timer_expires_base: Base time clock monotonic for @timer_expires
+ * @next_timer: Expiry time of next expiring timer for debugging purpose only
+ * @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
++ * @last_tick_jiffies: Value of jiffies seen on last tick
++ * @stalled_jiffies: Number of stalled jiffies detected across ticks
+ */
+ struct tick_sched {
+ struct hrtimer sched_timer;
+@@ -77,6 +79,8 @@ struct tick_sched {
+ u64 next_timer;
+ ktime_t idle_expires;
+ atomic_t tick_dep_mask;
++ unsigned long last_tick_jiffies;
++ unsigned int stalled_jiffies;
+ };
+
+ extern struct tick_sched *tick_get_tick_sched(int cpu);
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index b348749a9fc62..d921c1b256cf5 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -17,11 +17,13 @@
+ #include <linux/clocksource.h>
+ #include <linux/jiffies.h>
+ #include <linux/time.h>
++#include <linux/timex.h>
+ #include <linux/tick.h>
+ #include <linux/stop_machine.h>
+ #include <linux/pvclock_gtod.h>
+ #include <linux/compiler.h>
+ #include <linux/audit.h>
++#include <linux/random.h>
+
+ #include "tick-internal.h"
+ #include "ntp_internal.h"
+@@ -482,7 +484,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
+ * of the following timestamps. Callers need to be aware of that and
+ * deal with it.
+ */
+-u64 ktime_get_mono_fast_ns(void)
++u64 notrace ktime_get_mono_fast_ns(void)
+ {
+ return __ktime_get_fast_ns(&tk_fast_mono);
+ }
+@@ -494,7 +496,7 @@ EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+ * Contrary to ktime_get_mono_fast_ns() this is always correct because the
+ * conversion factor is not affected by NTP/PTP correction.
+ */
+-u64 ktime_get_raw_fast_ns(void)
++u64 notrace ktime_get_raw_fast_ns(void)
+ {
+ return __ktime_get_fast_ns(&tk_fast_raw);
+ }
+@@ -521,7 +523,7 @@ EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+ * partially updated. Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ *
+- * The caveats vs. timestamp ordering as documented for ktime_get_fast_ns()
++ * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns()
+ * apply as well.
+ */
+ u64 notrace ktime_get_boot_fast_ns(void)
+@@ -557,7 +559,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
+ /**
+ * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
+ *
+- * See ktime_get_fast_ns() for documentation of the time stamp ordering.
++ * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering.
+ */
+ u64 ktime_get_real_fast_ns(void)
+ {
+@@ -1306,8 +1308,7 @@ int do_settimeofday64(const struct timespec64 *ts)
+ timekeeping_forward_now(tk);
+
+ xt = tk_xtime(tk);
+- ts_delta.tv_sec = ts->tv_sec - xt.tv_sec;
+- ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec;
++ ts_delta = timespec64_sub(*ts, xt);
+
+ if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
+ ret = -EINVAL;
+@@ -1326,8 +1327,10 @@ out:
+ /* Signal hrtimers about time change */
+ clock_was_set(CLOCK_SET_WALL);
+
+- if (!ret)
++ if (!ret) {
+ audit_tk_injoffset(ts_delta);
++ add_device_randomness(ts, sizeof(*ts));
++ }
+
+ return ret;
+ }
+@@ -2381,6 +2384,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc)
+ return 0;
+ }
+
++/**
++ * random_get_entropy_fallback - Returns the raw clock source value,
++ * used by random.c for platforms with no valid random_get_entropy().
++ */
++unsigned long random_get_entropy_fallback(void)
++{
++ struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
++ struct clocksource *clock = READ_ONCE(tkr->clock);
++
++ if (unlikely(timekeeping_suspended || !clock))
++ return 0;
++ return clock->read(clock);
++}
++EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
+
+ /**
+ * do_adjtimex() - Accessor function to NTP __do_adjtimex function
+@@ -2399,6 +2416,7 @@ int do_adjtimex(struct __kernel_timex *txc)
+ ret = timekeeping_validate_timex(txc);
+ if (ret)
+ return ret;
++ add_device_randomness(txc, sizeof(*txc));
+
+ if (txc->modes & ADJ_SETOFFSET) {
+ struct timespec64 delta;
+@@ -2416,6 +2434,7 @@ int do_adjtimex(struct __kernel_timex *txc)
+ audit_ntp_init(&ad);
+
+ ktime_get_real_ts64(&ts);
++ add_device_randomness(&ts, sizeof(ts));
+
+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
+ write_seqcount_begin(&tk_core.seq);
+diff --git a/kernel/time/timer.c b/kernel/time/timer.c
+index e3d2c23c413d4..9dd2a39cb3b00 100644
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -1722,11 +1722,14 @@ static inline void __run_timers(struct timer_base *base)
+ time_after_eq(jiffies, base->next_expiry)) {
+ levels = collect_expired_timers(base, heads);
+ /*
+- * The only possible reason for not finding any expired
+- * timer at this clk is that all matching timers have been
+- * dequeued.
++ * The two possible reasons for not finding any expired
++ * timer at this clk are that all matching timers have been
++ * dequeued or no timer has been queued since
++ * base::next_expiry was set to base::clk +
++ * NEXT_TIMER_MAX_DELTA.
+ */
+- WARN_ON_ONCE(!levels && !base->next_expiry_recalc);
++ WARN_ON_ONCE(!levels && !base->next_expiry_recalc
++ && base->timers_pending);
+ base->clk++;
+ base->next_expiry = __next_timer_interrupt(base);
+
+@@ -2054,26 +2057,28 @@ unsigned long msleep_interruptible(unsigned int msecs)
+ EXPORT_SYMBOL(msleep_interruptible);
+
+ /**
+- * usleep_range - Sleep for an approximate time
+- * @min: Minimum time in usecs to sleep
+- * @max: Maximum time in usecs to sleep
++ * usleep_range_state - Sleep for an approximate time in a given state
++ * @min: Minimum time in usecs to sleep
++ * @max: Maximum time in usecs to sleep
++ * @state: State of the current task that will be while sleeping
+ *
+ * In non-atomic context where the exact wakeup time is flexible, use
+- * usleep_range() instead of udelay(). The sleep improves responsiveness
++ * usleep_range_state() instead of udelay(). The sleep improves responsiveness
+ * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
+ * power usage by allowing hrtimers to take advantage of an already-
+ * scheduled interrupt instead of scheduling a new one just for this sleep.
+ */
+-void __sched usleep_range(unsigned long min, unsigned long max)
++void __sched usleep_range_state(unsigned long min, unsigned long max,
++ unsigned int state)
+ {
+ ktime_t exp = ktime_add_us(ktime_get(), min);
+ u64 delta = (u64)(max - min) * NSEC_PER_USEC;
+
+ for (;;) {
+- __set_current_state(TASK_UNINTERRUPTIBLE);
++ __set_current_state(state);
+ /* Do not return before the requested sleep time has elapsed */
+ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+ break;
+ }
+ }
+-EXPORT_SYMBOL(usleep_range);
++EXPORT_SYMBOL(usleep_range_state);
+diff --git a/kernel/torture.c b/kernel/torture.c
+index bb8f411c974b8..7233b847737fd 100644
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -915,7 +915,7 @@ void torture_kthread_stopping(char *title)
+ VERBOSE_TOROUT_STRING(buf);
+ while (!kthread_should_stop()) {
+ torture_shutdown_absorb(title);
+- schedule_timeout_uninterruptible(1);
++ schedule_timeout_uninterruptible(HZ / 20);
+ }
+ }
+ EXPORT_SYMBOL_GPL(torture_kthread_stopping);
+diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
+index 420ff4bc67fd7..4265d125d50f3 100644
+--- a/kernel/trace/Kconfig
++++ b/kernel/trace/Kconfig
+@@ -328,6 +328,7 @@ config SCHED_TRACER
+ config HWLAT_TRACER
+ bool "Tracer to detect hardware latencies (like SMIs)"
+ select GENERIC_TRACER
++ select TRACER_MAX_TRACE
+ help
+ This tracer, when enabled will create one or more kernel threads,
+ depending on what the cpumask file is set to, which each thread
+@@ -363,6 +364,7 @@ config HWLAT_TRACER
+ config OSNOISE_TRACER
+ bool "OS Noise tracer"
+ select GENERIC_TRACER
++ select TRACER_MAX_TRACE
+ help
+ In the context of high-performance computing (HPC), the Operating
+ System Noise (osnoise) refers to the interference experienced by an
+diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
+index 6de5d4d631658..bedc5caceec70 100644
+--- a/kernel/trace/Makefile
++++ b/kernel/trace/Makefile
+@@ -47,6 +47,7 @@ obj-$(CONFIG_TRACING) += trace_output.o
+ obj-$(CONFIG_TRACING) += trace_seq.o
+ obj-$(CONFIG_TRACING) += trace_stat.o
+ obj-$(CONFIG_TRACING) += trace_printk.o
++obj-$(CONFIG_TRACING) += pid_list.o
+ obj-$(CONFIG_TRACING_MAP) += tracing_map.o
+ obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o
+ obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index fa91f398f28b7..e6d03cf148597 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -310,10 +310,20 @@ record_it:
+ local_irq_restore(flags);
+ }
+
+-static void blk_trace_free(struct blk_trace *bt)
++static void blk_trace_free(struct request_queue *q, struct blk_trace *bt)
+ {
+ relay_close(bt->rchan);
+- debugfs_remove(bt->dir);
++
++ /*
++ * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created
++ * under 'q->debugfs_dir', thus lookup and remove them.
++ */
++ if (!bt->dir) {
++ debugfs_lookup_and_remove("dropped", q->debugfs_dir);
++ debugfs_lookup_and_remove("msg", q->debugfs_dir);
++ } else {
++ debugfs_remove(bt->dir);
++ }
+ free_percpu(bt->sequence);
+ free_percpu(bt->msg_data);
+ kfree(bt);
+@@ -335,10 +345,10 @@ static void put_probe_ref(void)
+ mutex_unlock(&blk_probe_mutex);
+ }
+
+-static void blk_trace_cleanup(struct blk_trace *bt)
++static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
+ {
+ synchronize_rcu();
+- blk_trace_free(bt);
++ blk_trace_free(q, bt);
+ put_probe_ref();
+ }
+
+@@ -352,7 +362,7 @@ static int __blk_trace_remove(struct request_queue *q)
+ return -EINVAL;
+
+ if (bt->trace_state != Blktrace_running)
+- blk_trace_cleanup(bt);
++ blk_trace_cleanup(q, bt);
+
+ return 0;
+ }
+@@ -572,7 +582,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+ ret = 0;
+ err:
+ if (ret)
+- blk_trace_free(bt);
++ blk_trace_free(q, bt);
+ return ret;
+ }
+
+@@ -1048,7 +1058,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
+ r.sector_from = cpu_to_be64(from);
+
+ __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+- rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
++ req_op(rq), rq->cmd_flags, BLK_TA_REMAP, 0,
+ sizeof(r), &r, blk_trace_request_get_cgid(rq));
+ rcu_read_unlock();
+ }
+@@ -1537,7 +1547,8 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
+
+ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
+ {
+- if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
++ if ((iter->ent->type != TRACE_BLK) ||
++ !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
+ return TRACE_TYPE_UNHANDLED;
+
+ return print_one_line(iter, true);
+@@ -1615,7 +1626,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
+
+ put_probe_ref();
+ synchronize_rcu();
+- blk_trace_free(bt);
++ blk_trace_free(q, bt);
+ return 0;
+ }
+
+@@ -1646,7 +1657,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
+ return 0;
+
+ free_bt:
+- blk_trace_free(bt);
++ blk_trace_free(q, bt);
+ return ret;
+ }
+
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index 8e2eb950aa829..85a36b19c2b80 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -345,7 +345,7 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ };
+
+@@ -394,7 +394,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
+ .func = bpf_trace_printk,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+- .arg1_type = ARG_PTR_TO_MEM,
++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE,
+ };
+
+@@ -446,9 +446,9 @@ static const struct bpf_func_proto bpf_seq_printf_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+- .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
++ .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -463,7 +463,7 @@ static const struct bpf_func_proto bpf_seq_write_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -487,7 +487,7 @@ static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+ };
+@@ -648,7 +648,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -662,7 +662,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
+ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+ void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
+ {
+- int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
+ struct perf_raw_frag frag = {
+ .copy = ctx_copy,
+ .size = ctx_size,
+@@ -679,8 +678,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+ };
+ struct perf_sample_data *sd;
+ struct pt_regs *regs;
++ int nest_level;
+ u64 ret;
+
++ preempt_disable();
++ nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
++
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
+ ret = -EBUSY;
+ goto out;
+@@ -695,6 +698,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+ ret = __bpf_perf_event_output(regs, map, flags, sd);
+ out:
+ this_cpu_dec(bpf_event_output_nest_level);
++ preempt_enable();
+ return ret;
+ }
+
+@@ -776,6 +780,7 @@ static void do_bpf_send_signal(struct irq_work *entry)
+
+ work = container_of(entry, struct send_signal_irq_work, irq_work);
+ group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
++ put_task_struct(work->task);
+ }
+
+ static int bpf_send_signal_common(u32 sig, enum pid_type type)
+@@ -793,6 +798,9 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
+ return -EPERM;
+ if (unlikely(!nmi_uaccess_okay()))
+ return -EPERM;
++ /* Task should not be pid=1 to avoid kernel panic. */
++ if (unlikely(is_global_init(current)))
++ return -EPERM;
+
+ if (irqs_disabled()) {
+ /* Do an early check on signal validity. Otherwise,
+@@ -809,7 +817,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
+ * to the irq_work. The current task may change when queued
+ * irq works get executed.
+ */
+- work->task = current;
++ work->task = get_task_struct(current);
+ work->sig = sig;
+ work->type = type;
+ irq_work_queue(&work->irq_work);
+@@ -845,13 +853,23 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = {
+
+ BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
+ {
++ struct path copy;
+ long len;
+ char *p;
+
+ if (!sz)
+ return 0;
+
+- p = d_path(path, buf, sz);
++ /*
++ * The path pointer is verified as trusted and safe to use,
++ * but let's double check it's valid anyway to workaround
++ * potentially broken verifier.
++ */
++ len = copy_from_kernel_nofault(&copy, path, sizeof(*path));
++ if (len < 0)
++ return len;
++
++ p = d_path(&copy, buf, sz);
+ if (IS_ERR(p)) {
+ len = PTR_ERR(p);
+ } else {
+@@ -958,7 +976,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+- .arg3_type = ARG_PTR_TO_MEM,
++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_ANYTHING,
+ };
+@@ -1037,8 +1055,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ return &bpf_ktime_get_ns_proto;
+ case BPF_FUNC_ktime_get_boot_ns:
+ return &bpf_ktime_get_boot_ns_proto;
+- case BPF_FUNC_ktime_get_coarse_ns:
+- return &bpf_ktime_get_coarse_ns_proto;
+ case BPF_FUNC_tail_call:
+ return &bpf_tail_call_proto;
+ case BPF_FUNC_get_current_pid_tgid:
+@@ -1209,7 +1225,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -1324,9 +1340,6 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
+ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
+ void *, buf, u32, size, u64, flags)
+ {
+-#ifndef CONFIG_X86
+- return -ENOENT;
+-#else
+ static const u32 br_entry_size = sizeof(struct perf_branch_entry);
+ struct perf_branch_stack *br_stack = ctx->data->br_stack;
+ u32 to_copy;
+@@ -1335,7 +1348,7 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
+ return -EINVAL;
+
+ if (unlikely(!br_stack))
+- return -EINVAL;
++ return -ENOENT;
+
+ if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
+ return br_stack->nr * br_entry_size;
+@@ -1347,7 +1360,6 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
+ memcpy(buf, br_stack->entries, to_copy);
+
+ return to_copy;
+-#endif
+ }
+
+ static const struct bpf_func_proto bpf_read_branch_records_proto = {
+@@ -1435,7 +1447,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -1489,7 +1501,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+ };
+@@ -1984,7 +1996,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+ #ifdef CONFIG_UPROBE_EVENTS
+ if (flags & TRACE_EVENT_FL_UPROBE)
+ err = bpf_get_uprobe_info(event, fd_type, buf,
+- probe_offset,
++ probe_offset, probe_addr,
+ event->attr.type == PERF_TYPE_TRACEPOINT);
+ #endif
+ }
+diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
+index feebf57c64588..157a1d2d9802f 100644
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -988,8 +988,9 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
+ }
+ }
+
+- entry = tracefs_create_file("function_profile_enabled", 0644,
+- d_tracer, NULL, &ftrace_profile_fops);
++ entry = tracefs_create_file("function_profile_enabled",
++ TRACE_MODE_WRITE, d_tracer, NULL,
++ &ftrace_profile_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'function_profile_enabled' entry\n");
+ }
+@@ -1294,6 +1295,7 @@ static int ftrace_add_mod(struct trace_array *tr,
+ if (!ftrace_mod)
+ return -ENOMEM;
+
++ INIT_LIST_HEAD(&ftrace_mod->list);
+ ftrace_mod->func = kstrdup(func, GFP_KERNEL);
+ ftrace_mod->module = kstrdup(module, GFP_KERNEL);
+ ftrace_mod->enable = enable;
+@@ -1536,7 +1538,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
+ key.flags = end; /* overload flags, as it is unsigned long */
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+- if (end < pg->records[0].ip ||
++ if (pg->index == 0 ||
++ end < pg->records[0].ip ||
+ start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
+ continue;
+ rec = bsearch(&key, pg->records, pg->index,
+@@ -2900,6 +2903,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
+
+ ftrace_startup_enable(command);
+
++ /*
++ * If ftrace is in an undefined state, we just remove ops from list
++ * to prevent the NULL pointer, instead of totally rolling it back and
++ * free trampoline, because those actions could cause further damage.
++ */
++ if (unlikely(ftrace_disabled)) {
++ __unregister_ftrace_function(ops);
++ return -ENODEV;
++ }
++
+ ops->flags &= ~FTRACE_OPS_FL_ADDING;
+
+ return 0;
+@@ -2937,18 +2950,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
+ command |= FTRACE_UPDATE_TRACE_FUNC;
+ }
+
+- if (!command || !ftrace_enabled) {
+- /*
+- * If these are dynamic or per_cpu ops, they still
+- * need their data freed. Since, function tracing is
+- * not currently active, we can just free them
+- * without synchronizing all CPUs.
+- */
+- if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
+- goto free_ops;
+-
+- return 0;
+- }
++ if (!command || !ftrace_enabled)
++ goto out;
+
+ /*
+ * If the ops uses a trampoline, then it needs to be
+@@ -2985,6 +2988,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
+ removed_ops = NULL;
+ ops->flags &= ~FTRACE_OPS_FL_REMOVING;
+
++out:
+ /*
+ * Dynamic ops may be freed, we must make sure that all
+ * callers are done before leaving this function.
+@@ -3012,7 +3016,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ synchronize_rcu_tasks();
+
+- free_ops:
+ ftrace_trampoline_free(ops);
+ }
+
+@@ -3173,7 +3176,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
+ /* if we can't allocate this size, try something smaller */
+ if (!order)
+ return -ENOMEM;
+- order >>= 1;
++ order--;
+ goto again;
+ }
+
+@@ -3189,6 +3192,22 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
+ return cnt;
+ }
+
++static void ftrace_free_pages(struct ftrace_page *pages)
++{
++ struct ftrace_page *pg = pages;
++
++ while (pg) {
++ if (pg->records) {
++ free_pages((unsigned long)pg->records, pg->order);
++ ftrace_number_of_pages -= 1 << pg->order;
++ }
++ pages = pg->next;
++ kfree(pg);
++ pg = pages;
++ ftrace_number_of_groups--;
++ }
++}
++
+ static struct ftrace_page *
+ ftrace_allocate_pages(unsigned long num_to_init)
+ {
+@@ -3227,17 +3246,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
+ return start_pg;
+
+ free_pages:
+- pg = start_pg;
+- while (pg) {
+- if (pg->records) {
+- free_pages((unsigned long)pg->records, pg->order);
+- ftrace_number_of_pages -= 1 << pg->order;
+- }
+- start_pg = pg->next;
+- kfree(pg);
+- pg = start_pg;
+- ftrace_number_of_groups--;
+- }
++ ftrace_free_pages(start_pg);
+ pr_info("ftrace: FAILED to allocate memory for functions\n");
+ return NULL;
+ }
+@@ -4419,7 +4428,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
+ * @ip: The instruction pointer address to remove the data from
+ *
+ * Returns the data if it is found, otherwise NULL.
+- * Note, if the data pointer is used as the data itself, (see
++ * Note, if the data pointer is used as the data itself, (see
+ * ftrace_func_mapper_find_ip(), then the return value may be meaningless,
+ * if the data pointer was set to zero.
+ */
+@@ -5145,8 +5154,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
+ __add_hash_entry(direct_functions, entry);
+
+ ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
+- if (ret)
+- remove_hash_entry(direct_functions, entry);
+
+ if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
+ ret = register_ftrace_function(&direct_ops);
+@@ -5155,6 +5162,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
+ }
+
+ if (ret) {
++ remove_hash_entry(direct_functions, entry);
+ kfree(entry);
+ if (!direct->count) {
+ list_del_rcu(&direct->next);
+@@ -5381,12 +5389,15 @@ int modify_ftrace_direct(unsigned long ip,
+ ret = 0;
+ }
+
+- if (unlikely(ret && new_direct)) {
+- direct->count++;
+- list_del_rcu(&new_direct->next);
+- synchronize_rcu_tasks();
+- kfree(new_direct);
+- ftrace_direct_func_count--;
++ if (ret) {
++ direct->addr = old_addr;
++ if (unlikely(new_direct)) {
++ direct->count++;
++ list_del_rcu(&new_direct->next);
++ synchronize_rcu_tasks();
++ kfree(new_direct);
++ ftrace_direct_func_count--;
++ }
+ }
+
+ out_unlock:
+@@ -5644,8 +5655,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
+
+ if (filter_hash) {
+ orig_hash = &iter->ops->func_hash->filter_hash;
+- if (iter->tr && !list_empty(&iter->tr->mod_trace))
+- iter->hash->flags |= FTRACE_HASH_FL_MOD;
++ if (iter->tr) {
++ if (list_empty(&iter->tr->mod_trace))
++ iter->hash->flags &= ~FTRACE_HASH_FL_MOD;
++ else
++ iter->hash->flags |= FTRACE_HASH_FL_MOD;
++ }
+ } else
+ orig_hash = &iter->ops->func_hash->notrace_hash;
+
+@@ -6109,10 +6124,10 @@ void ftrace_create_filter_files(struct ftrace_ops *ops,
+ struct dentry *parent)
+ {
+
+- trace_create_file("set_ftrace_filter", 0644, parent,
++ trace_create_file("set_ftrace_filter", TRACE_MODE_WRITE, parent,
+ ops, &ftrace_filter_fops);
+
+- trace_create_file("set_ftrace_notrace", 0644, parent,
++ trace_create_file("set_ftrace_notrace", TRACE_MODE_WRITE, parent,
+ ops, &ftrace_notrace_fops);
+ }
+
+@@ -6139,19 +6154,19 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops)
+ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
+ {
+
+- trace_create_file("available_filter_functions", 0444,
++ trace_create_file("available_filter_functions", TRACE_MODE_READ,
+ d_tracer, NULL, &ftrace_avail_fops);
+
+- trace_create_file("enabled_functions", 0444,
++ trace_create_file("enabled_functions", TRACE_MODE_READ,
+ d_tracer, NULL, &ftrace_enabled_fops);
+
+ ftrace_create_filter_files(&global_ops, d_tracer);
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- trace_create_file("set_graph_function", 0644, d_tracer,
++ trace_create_file("set_graph_function", TRACE_MODE_WRITE, d_tracer,
+ NULL,
+ &ftrace_graph_fops);
+- trace_create_file("set_graph_notrace", 0644, d_tracer,
++ trace_create_file("set_graph_notrace", TRACE_MODE_WRITE, d_tracer,
+ NULL,
+ &ftrace_graph_notrace_fops);
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+@@ -6175,9 +6190,11 @@ static int ftrace_process_locs(struct module *mod,
+ unsigned long *start,
+ unsigned long *end)
+ {
++ struct ftrace_page *pg_unuse = NULL;
+ struct ftrace_page *start_pg;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
++ unsigned long skipped = 0;
+ unsigned long count;
+ unsigned long *p;
+ unsigned long addr;
+@@ -6231,8 +6248,10 @@ static int ftrace_process_locs(struct module *mod,
+ * object files to satisfy alignments.
+ * Skip any NULL pointers.
+ */
+- if (!addr)
++ if (!addr) {
++ skipped++;
+ continue;
++ }
+
+ end_offset = (pg->index+1) * sizeof(pg->records[0]);
+ if (end_offset > PAGE_SIZE << pg->order) {
+@@ -6246,8 +6265,10 @@ static int ftrace_process_locs(struct module *mod,
+ rec->ip = addr;
+ }
+
+- /* We should have used all pages */
+- WARN_ON(pg->next);
++ if (pg->next) {
++ pg_unuse = pg->next;
++ pg->next = NULL;
++ }
+
+ /* Assign the last page to ftrace_pages */
+ ftrace_pages = pg;
+@@ -6269,6 +6290,11 @@ static int ftrace_process_locs(struct module *mod,
+ out:
+ mutex_unlock(&ftrace_lock);
+
++ /* We should have used all pages unless we skipped some */
++ if (pg_unuse) {
++ WARN_ON(!skipped);
++ ftrace_free_pages(pg_unuse);
++ }
+ return ret;
+ }
+
+@@ -6866,7 +6892,7 @@ void __init ftrace_init(void)
+ }
+
+ pr_info("ftrace: allocating %ld entries in %ld pages\n",
+- count, count / ENTRIES_PER_PAGE + 1);
++ count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
+
+ last_ftrace_enabled = ftrace_enabled = 1;
+
+@@ -7184,10 +7210,10 @@ static void clear_ftrace_pids(struct trace_array *tr, int type)
+ synchronize_rcu();
+
+ if ((type & TRACE_PIDS) && pid_list)
+- trace_free_pid_list(pid_list);
++ trace_pid_list_free(pid_list);
+
+ if ((type & TRACE_NO_PIDS) && no_pid_list)
+- trace_free_pid_list(no_pid_list);
++ trace_pid_list_free(no_pid_list);
+ }
+
+ void ftrace_clear_pids(struct trace_array *tr)
+@@ -7428,7 +7454,7 @@ pid_write(struct file *filp, const char __user *ubuf,
+
+ if (filtered_pids) {
+ synchronize_rcu();
+- trace_free_pid_list(filtered_pids);
++ trace_pid_list_free(filtered_pids);
+ } else if (pid_list && !other_pids) {
+ /* Register a probe to set whether to ignore the tracing of a task */
+ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
+@@ -7494,10 +7520,10 @@ static const struct file_operations ftrace_no_pid_fops = {
+
+ void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
+ {
+- trace_create_file("set_ftrace_pid", 0644, d_tracer,
++ trace_create_file("set_ftrace_pid", TRACE_MODE_WRITE, d_tracer,
+ tr, &ftrace_pid_fops);
+- trace_create_file("set_ftrace_notrace_pid", 0644, d_tracer,
+- tr, &ftrace_no_pid_fops);
++ trace_create_file("set_ftrace_notrace_pid", TRACE_MODE_WRITE,
++ d_tracer, tr, &ftrace_no_pid_fops);
+ }
+
+ void __init ftrace_init_tracefs_toplevel(struct trace_array *tr,
+diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
+index 18b0f1cbb947f..e0c420eb0b2b4 100644
+--- a/kernel/trace/kprobe_event_gen_test.c
++++ b/kernel/trace/kprobe_event_gen_test.c
+@@ -35,6 +35,49 @@
+ static struct trace_event_file *gen_kprobe_test;
+ static struct trace_event_file *gen_kretprobe_test;
+
++#define KPROBE_GEN_TEST_FUNC "do_sys_open"
++
++/* X86 */
++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
++#define KPROBE_GEN_TEST_ARG0 "dfd=%ax"
++#define KPROBE_GEN_TEST_ARG1 "filename=%dx"
++#define KPROBE_GEN_TEST_ARG2 "flags=%cx"
++#define KPROBE_GEN_TEST_ARG3 "mode=+4($stack)"
++
++/* ARM64 */
++#elif defined(CONFIG_ARM64)
++#define KPROBE_GEN_TEST_ARG0 "dfd=%x0"
++#define KPROBE_GEN_TEST_ARG1 "filename=%x1"
++#define KPROBE_GEN_TEST_ARG2 "flags=%x2"
++#define KPROBE_GEN_TEST_ARG3 "mode=%x3"
++
++/* ARM */
++#elif defined(CONFIG_ARM)
++#define KPROBE_GEN_TEST_ARG0 "dfd=%r0"
++#define KPROBE_GEN_TEST_ARG1 "filename=%r1"
++#define KPROBE_GEN_TEST_ARG2 "flags=%r2"
++#define KPROBE_GEN_TEST_ARG3 "mode=%r3"
++
++/* RISCV */
++#elif defined(CONFIG_RISCV)
++#define KPROBE_GEN_TEST_ARG0 "dfd=%a0"
++#define KPROBE_GEN_TEST_ARG1 "filename=%a1"
++#define KPROBE_GEN_TEST_ARG2 "flags=%a2"
++#define KPROBE_GEN_TEST_ARG3 "mode=%a3"
++
++/* others */
++#else
++#define KPROBE_GEN_TEST_ARG0 NULL
++#define KPROBE_GEN_TEST_ARG1 NULL
++#define KPROBE_GEN_TEST_ARG2 NULL
++#define KPROBE_GEN_TEST_ARG3 NULL
++#endif
++
++static bool trace_event_file_is_valid(struct trace_event_file *input)
++{
++ return input && !IS_ERR(input);
++}
++
+ /*
+ * Test to make sure we can create a kprobe event, then add more
+ * fields.
+@@ -58,23 +101,23 @@ static int __init test_gen_kprobe_cmd(void)
+ * fields.
+ */
+ ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test",
+- "do_sys_open",
+- "dfd=%ax", "filename=%dx");
++ KPROBE_GEN_TEST_FUNC,
++ KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1);
+ if (ret)
+- goto free;
++ goto out;
+
+ /* Use kprobe_event_add_fields to add the rest of the fields */
+
+- ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)");
++ ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3);
+ if (ret)
+- goto free;
++ goto out;
+
+ /*
+ * This actually creates the event.
+ */
+ ret = kprobe_event_gen_cmd_end(&cmd);
+ if (ret)
+- goto free;
++ goto out;
+
+ /*
+ * Now get the gen_kprobe_test event file. We need to prevent
+@@ -97,13 +140,13 @@ static int __init test_gen_kprobe_cmd(void)
+ goto delete;
+ }
+ out:
++ kfree(buf);
+ return ret;
+ delete:
++ if (trace_event_file_is_valid(gen_kprobe_test))
++ gen_kprobe_test = NULL;
+ /* We got an error after creating the event, delete it */
+- ret = kprobe_event_delete("gen_kprobe_test");
+- free:
+- kfree(buf);
+-
++ kprobe_event_delete("gen_kprobe_test");
+ goto out;
+ }
+
+@@ -128,17 +171,17 @@ static int __init test_gen_kretprobe_cmd(void)
+ * Define the kretprobe event.
+ */
+ ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test",
+- "do_sys_open",
++ KPROBE_GEN_TEST_FUNC,
+ "$retval");
+ if (ret)
+- goto free;
++ goto out;
+
+ /*
+ * This actually creates the event.
+ */
+ ret = kretprobe_event_gen_cmd_end(&cmd);
+ if (ret)
+- goto free;
++ goto out;
+
+ /*
+ * Now get the gen_kretprobe_test event file. We need to
+@@ -162,13 +205,13 @@ static int __init test_gen_kretprobe_cmd(void)
+ goto delete;
+ }
+ out:
++ kfree(buf);
+ return ret;
+ delete:
++ if (trace_event_file_is_valid(gen_kretprobe_test))
++ gen_kretprobe_test = NULL;
+ /* We got an error after creating the event, delete it */
+- ret = kprobe_event_delete("gen_kretprobe_test");
+- free:
+- kfree(buf);
+-
++ kprobe_event_delete("gen_kretprobe_test");
+ goto out;
+ }
+
+@@ -182,10 +225,12 @@ static int __init kprobe_event_gen_test_init(void)
+
+ ret = test_gen_kretprobe_cmd();
+ if (ret) {
+- WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
+- "kprobes",
+- "gen_kretprobe_test", false));
+- trace_put_event_file(gen_kretprobe_test);
++ if (trace_event_file_is_valid(gen_kretprobe_test)) {
++ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
++ "kprobes",
++ "gen_kretprobe_test", false));
++ trace_put_event_file(gen_kretprobe_test);
++ }
+ WARN_ON(kprobe_event_delete("gen_kretprobe_test"));
+ }
+
+@@ -194,24 +239,30 @@ static int __init kprobe_event_gen_test_init(void)
+
+ static void __exit kprobe_event_gen_test_exit(void)
+ {
+- /* Disable the event or you can't remove it */
+- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
+- "kprobes",
+- "gen_kprobe_test", false));
++ if (trace_event_file_is_valid(gen_kprobe_test)) {
++ /* Disable the event or you can't remove it */
++ WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
++ "kprobes",
++ "gen_kprobe_test", false));
++
++ /* Now give the file and instance back */
++ trace_put_event_file(gen_kprobe_test);
++ }
+
+- /* Now give the file and instance back */
+- trace_put_event_file(gen_kprobe_test);
+
+ /* Now unregister and free the event */
+ WARN_ON(kprobe_event_delete("gen_kprobe_test"));
+
+- /* Disable the event or you can't remove it */
+- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
+- "kprobes",
+- "gen_kretprobe_test", false));
++ if (trace_event_file_is_valid(gen_kretprobe_test)) {
++ /* Disable the event or you can't remove it */
++ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
++ "kprobes",
++ "gen_kretprobe_test", false));
++
++ /* Now give the file and instance back */
++ trace_put_event_file(gen_kretprobe_test);
++ }
+
+- /* Now give the file and instance back */
+- trace_put_event_file(gen_kretprobe_test);
+
+ /* Now unregister and free the event */
+ WARN_ON(kprobe_event_delete("gen_kretprobe_test"));
+diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
+new file mode 100644
+index 0000000000000..4483ef70b5626
+--- /dev/null
++++ b/kernel/trace/pid_list.c
+@@ -0,0 +1,160 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org>
++ */
++#include <linux/vmalloc.h>
++#include <linux/slab.h>
++#include "trace.h"
++
++/**
++ * trace_pid_list_is_set - test if the pid is set in the list
++ * @pid_list: The pid list to test
++ * @pid: The pid to to see if set in the list.
++ *
++ * Tests if @pid is is set in the @pid_list. This is usually called
++ * from the scheduler when a task is scheduled. Its pid is checked
++ * if it should be traced or not.
++ *
++ * Return true if the pid is in the list, false otherwise.
++ */
++bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid)
++{
++ /*
++ * If pid_max changed after filtered_pids was created, we
++ * by default ignore all pids greater than the previous pid_max.
++ */
++ if (pid >= pid_list->pid_max)
++ return false;
++
++ return test_bit(pid, pid_list->pids);
++}
++
++/**
++ * trace_pid_list_set - add a pid to the list
++ * @pid_list: The pid list to add the @pid to.
++ * @pid: The pid to add.
++ *
++ * Adds @pid to @pid_list. This is usually done explicitly by a user
++ * adding a task to be traced, or indirectly by the fork function
++ * when children should be traced and a task's pid is in the list.
++ *
++ * Return 0 on success, negative otherwise.
++ */
++int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid)
++{
++ /* Sorry, but we don't support pid_max changing after setting */
++ if (pid >= pid_list->pid_max)
++ return -EINVAL;
++
++ set_bit(pid, pid_list->pids);
++
++ return 0;
++}
++
++/**
++ * trace_pid_list_clear - remove a pid from the list
++ * @pid_list: The pid list to remove the @pid from.
++ * @pid: The pid to remove.
++ *
++ * Removes @pid from @pid_list. This is usually done explicitly by a user
++ * removing tasks from tracing, or indirectly by the exit function
++ * when a task that is set to be traced exits.
++ *
++ * Return 0 on success, negative otherwise.
++ */
++int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid)
++{
++ /* Sorry, but we don't support pid_max changing after setting */
++ if (pid >= pid_list->pid_max)
++ return -EINVAL;
++
++ clear_bit(pid, pid_list->pids);
++
++ return 0;
++}
++
++/**
++ * trace_pid_list_next - return the next pid in the list
++ * @pid_list: The pid list to examine.
++ * @pid: The pid to start from
++ * @next: The pointer to place the pid that is set starting from @pid.
++ *
++ * Looks for the next consecutive pid that is in @pid_list starting
++ * at the pid specified by @pid. If one is set (including @pid), then
++ * that pid is placed into @next.
++ *
++ * Return 0 when a pid is found, -1 if there are no more pids included.
++ */
++int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid,
++ unsigned int *next)
++{
++ pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
++
++ if (pid < pid_list->pid_max) {
++ *next = pid;
++ return 0;
++ }
++ return -1;
++}
++
++/**
++ * trace_pid_list_first - return the first pid in the list
++ * @pid_list: The pid list to examine.
++ * @pid: The pointer to place the pid first found pid that is set.
++ *
++ * Looks for the first pid that is set in @pid_list, and places it
++ * into @pid if found.
++ *
++ * Return 0 when a pid is found, -1 if there are no pids set.
++ */
++int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid)
++{
++ unsigned int first;
++
++ first = find_first_bit(pid_list->pids, pid_list->pid_max);
++
++ if (first < pid_list->pid_max) {
++ *pid = first;
++ return 0;
++ }
++ return -1;
++}
++
++/**
++ * trace_pid_list_alloc - create a new pid_list
++ *
++ * Allocates a new pid_list to store pids into.
++ *
++ * Returns the pid_list on success, NULL otherwise.
++ */
++struct trace_pid_list *trace_pid_list_alloc(void)
++{
++ struct trace_pid_list *pid_list;
++
++ pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
++ if (!pid_list)
++ return NULL;
++
++ pid_list->pid_max = READ_ONCE(pid_max);
++
++ pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
++ if (!pid_list->pids) {
++ kfree(pid_list);
++ return NULL;
++ }
++ return pid_list;
++}
++
++/**
++ * trace_pid_list_free - Frees an allocated pid_list.
++ *
++ * Frees the memory for a pid_list that was allocated.
++ */
++void trace_pid_list_free(struct trace_pid_list *pid_list)
++{
++ if (!pid_list)
++ return;
++
++ vfree(pid_list->pids);
++ kfree(pid_list);
++}
+diff --git a/kernel/trace/pid_list.h b/kernel/trace/pid_list.h
+new file mode 100644
+index 0000000000000..80d0ecfe1536e
+--- /dev/null
++++ b/kernel/trace/pid_list.h
+@@ -0,0 +1,13 @@
++// SPDX-License-Identifier: GPL-2.0
++
++/* Do not include this file directly. */
++
++#ifndef _TRACE_INTERNAL_PID_LIST_H
++#define _TRACE_INTERNAL_PID_LIST_H
++
++struct trace_pid_list {
++ int pid_max;
++ unsigned long *pids;
++};
++
++#endif /* _TRACE_INTERNAL_PID_LIST_H */
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index c5a3fbf19617e..db7cefd196cec 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -405,6 +405,7 @@ struct rb_irq_work {
+ struct irq_work work;
+ wait_queue_head_t waiters;
+ wait_queue_head_t full_waiters;
++ long wait_index;
+ bool waiters_pending;
+ bool full_waiters_pending;
+ bool wakeup_full;
+@@ -509,6 +510,7 @@ struct ring_buffer_per_cpu {
+ local_t committing;
+ local_t commits;
+ local_t pages_touched;
++ local_t pages_lost;
+ local_t pages_read;
+ long last_pages_touch;
+ size_t shortest_full;
+@@ -518,6 +520,8 @@ struct ring_buffer_per_cpu {
+ rb_time_t before_stamp;
+ u64 event_stamp[MAX_NEST];
+ u64 read_stamp;
++ /* pages removed since last reset */
++ unsigned long pages_removed;
+ /* ring buffer pages to update, > 0 to add, < 0 to remove */
+ long nr_pages_to_update;
+ struct list_head new_pages; /* new pages to add */
+@@ -531,6 +535,7 @@ struct trace_buffer {
+ unsigned flags;
+ int cpus;
+ atomic_t record_disabled;
++ atomic_t resizing;
+ cpumask_var_t cpumask;
+
+ struct lock_class_key *reader_lock_key;
+@@ -553,6 +558,7 @@ struct ring_buffer_iter {
+ struct buffer_page *head_page;
+ struct buffer_page *cache_reader_page;
+ unsigned long cache_read;
++ unsigned long cache_pages_removed;
+ u64 read_stamp;
+ u64 page_stamp;
+ struct ring_buffer_event *event;
+@@ -857,10 +863,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu)
+ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
+ {
+ size_t read;
++ size_t lost;
+ size_t cnt;
+
+ read = local_read(&buffer->buffers[cpu]->pages_read);
++ lost = local_read(&buffer->buffers[cpu]->pages_lost);
+ cnt = local_read(&buffer->buffers[cpu]->pages_touched);
++
++ if (WARN_ON_ONCE(cnt < lost))
++ return 0;
++
++ cnt -= lost;
++
+ /* The reader can read an empty page, but not more than that */
+ if (cnt < read) {
+ WARN_ON_ONCE(read > cnt + 1);
+@@ -870,6 +884,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
+ return cnt - read;
+ }
+
++static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full)
++{
++ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
++ size_t nr_pages;
++ size_t dirty;
++
++ nr_pages = cpu_buffer->nr_pages;
++ if (!nr_pages || !full)
++ return true;
++
++ dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
++
++ return (dirty * 100) > (full * nr_pages);
++}
++
+ /*
+ * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
+ *
+@@ -881,12 +910,55 @@ static void rb_wake_up_waiters(struct irq_work *work)
+ struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
+
+ wake_up_all(&rbwork->waiters);
+- if (rbwork->wakeup_full) {
++ if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+ rbwork->wakeup_full = false;
++ rbwork->full_waiters_pending = false;
+ wake_up_all(&rbwork->full_waiters);
+ }
+ }
+
++/**
++ * ring_buffer_wake_waiters - wake up any waiters on this ring buffer
++ * @buffer: The ring buffer to wake waiters on
++ *
++ * In the case of a file that represents a ring buffer is closing,
++ * it is prudent to wake up any waiters that are on this.
++ */
++void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
++{
++ struct ring_buffer_per_cpu *cpu_buffer;
++ struct rb_irq_work *rbwork;
++
++ if (!buffer)
++ return;
++
++ if (cpu == RING_BUFFER_ALL_CPUS) {
++
++ /* Wake up individual ones too. One level recursion */
++ for_each_buffer_cpu(buffer, cpu)
++ ring_buffer_wake_waiters(buffer, cpu);
++
++ rbwork = &buffer->irq_work;
++ } else {
++ if (WARN_ON_ONCE(!buffer->buffers))
++ return;
++ if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
++ return;
++
++ cpu_buffer = buffer->buffers[cpu];
++ /* The CPU buffer may not have been initialized yet */
++ if (!cpu_buffer)
++ return;
++ rbwork = &cpu_buffer->irq_work;
++ }
++
++ rbwork->wait_index++;
++ /* make sure the waiters see the new index */
++ smp_wmb();
++
++ rb_wake_up_waiters(&rbwork->work);
++}
++
+ /**
+ * ring_buffer_wait - wait for input to the ring buffer
+ * @buffer: buffer to wait on
+@@ -902,6 +974,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+ struct ring_buffer_per_cpu *cpu_buffer;
+ DEFINE_WAIT(wait);
+ struct rb_irq_work *work;
++ long wait_index;
+ int ret = 0;
+
+ /*
+@@ -920,6 +993,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+ work = &cpu_buffer->irq_work;
+ }
+
++ wait_index = READ_ONCE(work->wait_index);
+
+ while (true) {
+ if (full)
+@@ -964,26 +1038,29 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+ !ring_buffer_empty_cpu(buffer, cpu)) {
+ unsigned long flags;
+ bool pagebusy;
+- size_t nr_pages;
+- size_t dirty;
++ bool done;
+
+ if (!full)
+ break;
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+- nr_pages = cpu_buffer->nr_pages;
+- dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
++ done = !pagebusy && full_hit(buffer, cpu, full);
++
+ if (!cpu_buffer->shortest_full ||
+- cpu_buffer->shortest_full < full)
++ cpu_buffer->shortest_full > full)
+ cpu_buffer->shortest_full = full;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+- if (!pagebusy &&
+- (!nr_pages || (dirty * 100) > full * nr_pages))
++ if (done)
+ break;
+ }
+
+ schedule();
++
++ /* Make sure to see the new wait index */
++ smp_rmb();
++ if (wait_index != work->wait_index)
++ break;
+ }
+
+ if (full)
+@@ -1000,6 +1077,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+ * @cpu: the cpu buffer to wait on
+ * @filp: the file descriptor
+ * @poll_table: The poll descriptor
++ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ *
+ * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
+ * as data is added to any of the @buffer's cpu buffers. Otherwise
+@@ -1009,14 +1087,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+ * zero otherwise.
+ */
+ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
+- struct file *filp, poll_table *poll_table)
++ struct file *filp, poll_table *poll_table, int full)
+ {
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct rb_irq_work *work;
+
+- if (cpu == RING_BUFFER_ALL_CPUS)
++ if (cpu == RING_BUFFER_ALL_CPUS) {
+ work = &buffer->irq_work;
+- else {
++ full = 0;
++ } else {
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return -EINVAL;
+
+@@ -1024,8 +1103,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
+ work = &cpu_buffer->irq_work;
+ }
+
+- poll_wait(filp, &work->waiters, poll_table);
+- work->waiters_pending = true;
++ if (full) {
++ poll_wait(filp, &work->full_waiters, poll_table);
++ work->full_waiters_pending = true;
++ } else {
++ poll_wait(filp, &work->waiters, poll_table);
++ work->waiters_pending = true;
++ }
++
+ /*
+ * There's a tight race between setting the waiters_pending and
+ * checking if the ring buffer is empty. Once the waiters_pending bit
+@@ -1041,6 +1126,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
+ */
+ smp_mb();
+
++ if (full)
++ return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
++
+ if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
+ (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
+ return EPOLLIN | EPOLLRDNORM;
+@@ -1460,19 +1548,6 @@ static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
+ return 0;
+ }
+
+-/**
+- * rb_check_list - make sure a pointer to a list has the last bits zero
+- */
+-static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
+- struct list_head *list)
+-{
+- if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
+- return 1;
+- if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
+- return 1;
+- return 0;
+-}
+-
+ /**
+ * rb_check_pages - integrity check of buffer pages
+ * @cpu_buffer: CPU buffer with pages to test
+@@ -1482,36 +1557,27 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
+ */
+ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+ {
+- struct list_head *head = cpu_buffer->pages;
+- struct buffer_page *bpage, *tmp;
+-
+- /* Reset the head page if it exists */
+- if (cpu_buffer->head_page)
+- rb_set_head_page(cpu_buffer);
+-
+- rb_head_page_deactivate(cpu_buffer);
++ struct list_head *head = rb_list_head(cpu_buffer->pages);
++ struct list_head *tmp;
+
+- if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
+- return -1;
+- if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
++ if (RB_WARN_ON(cpu_buffer,
++ rb_list_head(rb_list_head(head->next)->prev) != head))
+ return -1;
+
+- if (rb_check_list(cpu_buffer, head))
++ if (RB_WARN_ON(cpu_buffer,
++ rb_list_head(rb_list_head(head->prev)->next) != head))
+ return -1;
+
+- list_for_each_entry_safe(bpage, tmp, head, list) {
++ for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
+ if (RB_WARN_ON(cpu_buffer,
+- bpage->list.next->prev != &bpage->list))
++ rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
+ return -1;
++
+ if (RB_WARN_ON(cpu_buffer,
+- bpage->list.prev->next != &bpage->list))
+- return -1;
+- if (rb_check_list(cpu_buffer, &bpage->list))
++ rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
+ return -1;
+ }
+
+- rb_head_page_activate(cpu_buffer);
+-
+ return 0;
+ }
+
+@@ -1680,11 +1746,13 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+ struct list_head *head = cpu_buffer->pages;
+ struct buffer_page *bpage, *tmp;
+
+- free_buffer_page(cpu_buffer->reader_page);
++ irq_work_sync(&cpu_buffer->irq_work.work);
+
+- rb_head_page_deactivate(cpu_buffer);
++ free_buffer_page(cpu_buffer->reader_page);
+
+ if (head) {
++ rb_head_page_deactivate(cpu_buffer);
++
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+@@ -1786,6 +1854,8 @@ ring_buffer_free(struct trace_buffer *buffer)
+
+ cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
+
++ irq_work_sync(&buffer->irq_work.work);
++
+ for_each_buffer_cpu(buffer, cpu)
+ rb_free_cpu_buffer(buffer->buffers[cpu]);
+
+@@ -1865,6 +1935,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
+ to_remove = rb_list_head(to_remove)->next;
+ head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
+ }
++ /* Read iterators need to reset themselves when some pages removed */
++ cpu_buffer->pages_removed += nr_removed;
+
+ next_page = rb_list_head(to_remove)->next;
+
+@@ -1886,12 +1958,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
+ cpu_buffer->head_page = list_entry(next_page,
+ struct buffer_page, list);
+
+- /*
+- * change read pointer to make sure any read iterators reset
+- * themselves
+- */
+- cpu_buffer->read = 0;
+-
+ /* pages are removed, resume tracing and then free the pages */
+ atomic_dec(&cpu_buffer->record_disabled);
+ raw_spin_unlock_irq(&cpu_buffer->reader_lock);
+@@ -1920,6 +1986,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
+ */
+ local_add(page_entries, &cpu_buffer->overrun);
+ local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
++ local_inc(&cpu_buffer->pages_lost);
+ }
+
+ /*
+@@ -2071,7 +2138,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
+
+ /* prevent another thread from changing buffer sizes */
+ mutex_lock(&buffer->mutex);
+-
++ atomic_inc(&buffer->resizing);
+
+ if (cpu_id == RING_BUFFER_ALL_CPUS) {
+ /*
+@@ -2210,6 +2277,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
+ atomic_dec(&buffer->record_disabled);
+ }
+
++ atomic_dec(&buffer->resizing);
+ mutex_unlock(&buffer->mutex);
+ return 0;
+
+@@ -2230,6 +2298,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
+ }
+ }
+ out_err_unlock:
++ atomic_dec(&buffer->resizing);
+ mutex_unlock(&buffer->mutex);
+ return err;
+ }
+@@ -2404,6 +2473,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
+ */
+ local_add(entries, &cpu_buffer->overrun);
+ local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
++ local_inc(&cpu_buffer->pages_lost);
+
+ /*
+ * The entries will be zeroed out when we move the
+@@ -2572,6 +2642,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
+ /* Mark the rest of the page with padding */
+ rb_event_set_padding(event);
+
++ /* Make sure the padding is visible before the write update */
++ smp_wmb();
++
+ /* Set the write back to the previous setting */
+ local_sub(length, &tail_page->write);
+ return;
+@@ -2583,6 +2656,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
+ /* time delta must be non zero */
+ event->time_delta = 1;
+
++ /* Make sure the padding is visible before the tail_page->write update */
++ smp_wmb();
++
+ /* Set write to end of buffer */
+ length = (tail + length) - BUF_PAGE_SIZE;
+ local_sub(length, &tail_page->write);
+@@ -2971,6 +3047,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+ if (RB_WARN_ON(cpu_buffer,
+ rb_is_reader_page(cpu_buffer->tail_page)))
+ return;
++ /*
++ * No need for a memory barrier here, as the update
++ * of the tail_page did it for this page.
++ */
+ local_set(&cpu_buffer->commit_page->page->commit,
+ rb_page_write(cpu_buffer->commit_page));
+ rb_inc_page(&cpu_buffer->commit_page);
+@@ -2980,6 +3060,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+ while (rb_commit_index(cpu_buffer) !=
+ rb_page_write(cpu_buffer->commit_page)) {
+
++ /* Make sure the readers see the content of what is committed. */
++ smp_wmb();
+ local_set(&cpu_buffer->commit_page->page->commit,
+ rb_page_write(cpu_buffer->commit_page));
+ RB_WARN_ON(cpu_buffer,
+@@ -3055,10 +3137,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ static __always_inline void
+ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+ {
+- size_t nr_pages;
+- size_t dirty;
+- size_t full;
+-
+ if (buffer->irq_work.waiters_pending) {
+ buffer->irq_work.waiters_pending = false;
+ /* irq_work_queue() supplies it's own memory barriers */
+@@ -3082,10 +3160,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+
+ cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
+
+- full = cpu_buffer->shortest_full;
+- nr_pages = cpu_buffer->nr_pages;
+- dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
+- if (full && nr_pages && (dirty * 100) <= full * nr_pages)
++ if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full))
+ return;
+
+ cpu_buffer->irq_work.wakeup_full = true;
+@@ -4274,6 +4349,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
+
+ iter->cache_reader_page = iter->head_page;
+ iter->cache_read = cpu_buffer->read;
++ iter->cache_pages_removed = cpu_buffer->pages_removed;
+
+ if (iter->head) {
+ iter->read_stamp = cpu_buffer->read_stamp;
+@@ -4547,6 +4623,38 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
+ arch_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
+
++ /*
++ * The writer has preempt disable, wait for it. But not forever
++ * Although, 1 second is pretty much "forever"
++ */
++#define USECS_WAIT 1000000
++ for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) {
++ /* If the write is past the end of page, a writer is still updating it */
++ if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE))
++ break;
++
++ udelay(1);
++
++ /* Get the latest version of the reader write value */
++ smp_rmb();
++ }
++
++ /* The writer is not moving forward? Something is wrong */
++ if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT))
++ reader = NULL;
++
++ /*
++ * Make sure we see any padding after the write update
++ * (see rb_reset_tail()).
++ *
++ * In addition, a writer may be writing on the reader page
++ * if the page has not been fully filled, so the read barrier
++ * is also needed to make sure we see the content of what is
++ * committed by the writer (see rb_set_commit_to_write()).
++ */
++ smp_rmb();
++
++
+ return reader;
+ }
+
+@@ -4695,12 +4803,13 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+ buffer = cpu_buffer->buffer;
+
+ /*
+- * Check if someone performed a consuming read to
+- * the buffer. A consuming read invalidates the iterator
+- * and we need to reset the iterator in this case.
++ * Check if someone performed a consuming read to the buffer
++ * or removed some pages from the buffer. In these cases,
++ * iterator was invalidated and we need to reset it.
+ */
+ if (unlikely(iter->cache_read != cpu_buffer->read ||
+- iter->cache_reader_page != cpu_buffer->reader_page))
++ iter->cache_reader_page != cpu_buffer->reader_page ||
++ iter->cache_pages_removed != cpu_buffer->pages_removed))
+ rb_iter_reset(iter);
+
+ again:
+@@ -5091,28 +5200,34 @@ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_size);
+
++static void rb_clear_buffer_page(struct buffer_page *page)
++{
++ local_set(&page->write, 0);
++ local_set(&page->entries, 0);
++ rb_init_page(page->page);
++ page->read = 0;
++}
++
+ static void
+ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+ {
++ struct buffer_page *page;
++
+ rb_head_page_deactivate(cpu_buffer);
+
+ cpu_buffer->head_page
+ = list_entry(cpu_buffer->pages, struct buffer_page, list);
+- local_set(&cpu_buffer->head_page->write, 0);
+- local_set(&cpu_buffer->head_page->entries, 0);
+- local_set(&cpu_buffer->head_page->page->commit, 0);
+-
+- cpu_buffer->head_page->read = 0;
++ rb_clear_buffer_page(cpu_buffer->head_page);
++ list_for_each_entry(page, cpu_buffer->pages, list) {
++ rb_clear_buffer_page(page);
++ }
+
+ cpu_buffer->tail_page = cpu_buffer->head_page;
+ cpu_buffer->commit_page = cpu_buffer->head_page;
+
+ INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+ INIT_LIST_HEAD(&cpu_buffer->new_pages);
+- local_set(&cpu_buffer->reader_page->write, 0);
+- local_set(&cpu_buffer->reader_page->entries, 0);
+- local_set(&cpu_buffer->reader_page->page->commit, 0);
+- cpu_buffer->reader_page->read = 0;
++ rb_clear_buffer_page(cpu_buffer->reader_page);
+
+ local_set(&cpu_buffer->entries_bytes, 0);
+ local_set(&cpu_buffer->overrun, 0);
+@@ -5122,6 +5237,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+ local_set(&cpu_buffer->committing, 0);
+ local_set(&cpu_buffer->commits, 0);
+ local_set(&cpu_buffer->pages_touched, 0);
++ local_set(&cpu_buffer->pages_lost, 0);
+ local_set(&cpu_buffer->pages_read, 0);
+ cpu_buffer->last_pages_touch = 0;
+ cpu_buffer->shortest_full = 0;
+@@ -5137,6 +5253,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
+ cpu_buffer->last_overrun = 0;
+
+ rb_head_page_activate(cpu_buffer);
++ cpu_buffer->pages_removed = 0;
+ }
+
+ /* Must have disabled the cpu buffer then done a synchronize_rcu */
+@@ -5189,6 +5306,9 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
+
++/* Flag to ensure proper resetting of atomic variables */
++#define RESET_BIT (1 << 30)
++
+ /**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+@@ -5205,20 +5325,27 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
+ for_each_online_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+
+- atomic_inc(&cpu_buffer->resize_disabled);
++ atomic_add(RESET_BIT, &cpu_buffer->resize_disabled);
+ atomic_inc(&cpu_buffer->record_disabled);
+ }
+
+ /* Make sure all commits have finished */
+ synchronize_rcu();
+
+- for_each_online_buffer_cpu(buffer, cpu) {
++ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+
++ /*
++ * If a CPU came online during the synchronize_rcu(), then
++ * ignore it.
++ */
++ if (!(atomic_read(&cpu_buffer->resize_disabled) & RESET_BIT))
++ continue;
++
+ reset_disabled_cpu_buffer(cpu_buffer);
+
+ atomic_dec(&cpu_buffer->record_disabled);
+- atomic_dec(&cpu_buffer->resize_disabled);
++ atomic_sub(RESET_BIT, &cpu_buffer->resize_disabled);
+ }
+
+ mutex_unlock(&buffer->mutex);
+@@ -5233,6 +5360,9 @@ void ring_buffer_reset(struct trace_buffer *buffer)
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu;
+
++ /* prevent another thread from changing buffer sizes */
++ mutex_lock(&buffer->mutex);
++
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+
+@@ -5251,6 +5381,8 @@ void ring_buffer_reset(struct trace_buffer *buffer)
+ atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&cpu_buffer->resize_disabled);
+ }
++
++ mutex_unlock(&buffer->mutex);
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_reset);
+
+@@ -5368,6 +5500,15 @@ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
+ if (local_read(&cpu_buffer_b->committing))
+ goto out_dec;
+
++ /*
++ * When resize is in progress, we cannot swap it because
++ * it will mess the state of the cpu buffer.
++ */
++ if (atomic_read(&buffer_a->resizing))
++ goto out_dec;
++ if (atomic_read(&buffer_b->resizing))
++ goto out_dec;
++
+ buffer_a->buffers[cpu] = cpu_buffer_b;
+ buffer_b->buffers[cpu] = cpu_buffer_a;
+
+@@ -5450,11 +5591,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
+ */
+ void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data)
+ {
+- struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
++ struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_data_page *bpage = data;
+ struct page *page = virt_to_page(bpage);
+ unsigned long flags;
+
++ if (!buffer || !buffer->buffers || !buffer->buffers[cpu])
++ return;
++
++ cpu_buffer = buffer->buffers[cpu];
++
+ /* If the page is still in use someplace else, we can't reuse it */
+ if (page_ref_count(page) > 1)
+ goto out;
+@@ -5569,7 +5715,15 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
+ unsigned int pos = 0;
+ unsigned int size;
+
+- if (full)
++ /*
++ * If a full page is expected, this can still be returned
++ * if there's been a previous partial read and the
++ * rest of the page can be read and the commit page is off
++ * the reader page.
++ */
++ if (full &&
++ (!read || (len < (commit - read)) ||
++ cpu_buffer->reader_page == cpu_buffer->commit_page))
+ goto out_unlock;
+
+ if (len > (commit - read))
+diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
+index 0b15e975d2c2c..8d77526892f45 100644
+--- a/kernel/trace/synth_event_gen_test.c
++++ b/kernel/trace/synth_event_gen_test.c
+@@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void)
+
+ /* Now generate a gen_synth_test event */
+ ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals));
+- out:
++ free:
++ kfree(buf);
+ return ret;
+ delete:
+ /* We got an error after creating the event, delete it */
+ synth_event_delete("gen_synth_test");
+- free:
+- kfree(buf);
+-
+- goto out;
++ goto free;
+ }
+
+ /*
+@@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void)
+
+ /* Now trace an empty_synth_test event */
+ ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals));
+- out:
++ free:
++ kfree(buf);
+ return ret;
+ delete:
+ /* We got an error after creating the event, delete it */
+ synth_event_delete("empty_synth_test");
+- free:
+- kfree(buf);
+-
+- goto out;
++ goto free;
+ }
+
+ static struct synth_field_desc create_synth_test_fields[] = {
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index bc677cd642240..f3f1e3c2f421c 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -235,7 +235,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
+ static int __init set_trace_boot_options(char *str)
+ {
+ strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
+- return 0;
++ return 1;
+ }
+ __setup("trace_options=", set_trace_boot_options);
+
+@@ -246,12 +246,16 @@ static int __init set_trace_boot_clock(char *str)
+ {
+ strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
+ trace_boot_clock = trace_boot_clock_buf;
+- return 0;
++ return 1;
+ }
+ __setup("trace_clock=", set_trace_boot_clock);
+
+ static int __init set_tracepoint_printk(char *str)
+ {
++ /* Ignore the "tp_printk_stop_on_boot" param */
++ if (*str == '_')
++ return 0;
++
+ if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
+ tracepoint_printk = 1;
+ return 1;
+@@ -512,12 +516,6 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec,
+ return 0;
+ }
+
+-void trace_free_pid_list(struct trace_pid_list *pid_list)
+-{
+- vfree(pid_list->pids);
+- kfree(pid_list);
+-}
+-
+ /**
+ * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
+ * @filtered_pids: The list of pids to check
+@@ -528,14 +526,7 @@ void trace_free_pid_list(struct trace_pid_list *pid_list)
+ bool
+ trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
+ {
+- /*
+- * If pid_max changed after filtered_pids was created, we
+- * by default ignore all pids greater than the previous pid_max.
+- */
+- if (search_pid >= filtered_pids->pid_max)
+- return false;
+-
+- return test_bit(search_pid, filtered_pids->pids);
++ return trace_pid_list_is_set(filtered_pids, search_pid);
+ }
+
+ /**
+@@ -592,15 +583,11 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+ return;
+ }
+
+- /* Sorry, but we don't support pid_max changing after setting */
+- if (task->pid >= pid_list->pid_max)
+- return;
+-
+ /* "self" is set for forks, and NULL for exits */
+ if (self)
+- set_bit(task->pid, pid_list->pids);
++ trace_pid_list_set(pid_list, task->pid);
+ else
+- clear_bit(task->pid, pid_list->pids);
++ trace_pid_list_clear(pid_list, task->pid);
+ }
+
+ /**
+@@ -617,18 +604,19 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+ */
+ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
+ {
+- unsigned long pid = (unsigned long)v;
++ long pid = (unsigned long)v;
++ unsigned int next;
+
+ (*pos)++;
+
+ /* pid already is +1 of the actual previous bit */
+- pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
++ if (trace_pid_list_next(pid_list, pid, &next) < 0)
++ return NULL;
+
+- /* Return pid + 1 to allow zero to be represented */
+- if (pid < pid_list->pid_max)
+- return (void *)(pid + 1);
++ pid = next;
+
+- return NULL;
++ /* Return pid + 1 to allow zero to be represented */
++ return (void *)(pid + 1);
+ }
+
+ /**
+@@ -645,12 +633,14 @@ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
+ void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
+ {
+ unsigned long pid;
++ unsigned int first;
+ loff_t l = 0;
+
+- pid = find_first_bit(pid_list->pids, pid_list->pid_max);
+- if (pid >= pid_list->pid_max)
++ if (trace_pid_list_first(pid_list, &first) < 0)
+ return NULL;
+
++ pid = first;
++
+ /* Return pid + 1 so that zero can be the exit value */
+ for (pid++; pid && l < *pos;
+ pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
+@@ -686,7 +676,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
+ unsigned long val;
+ int nr_pids = 0;
+ ssize_t read = 0;
+- ssize_t ret = 0;
++ ssize_t ret;
+ loff_t pos;
+ pid_t pid;
+
+@@ -699,55 +689,48 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
+ * the user. If the operation fails, then the current list is
+ * not modified.
+ */
+- pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
++ pid_list = trace_pid_list_alloc();
+ if (!pid_list) {
+ trace_parser_put(&parser);
+ return -ENOMEM;
+ }
+
+- pid_list->pid_max = READ_ONCE(pid_max);
+-
+- /* Only truncating will shrink pid_max */
+- if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
+- pid_list->pid_max = filtered_pids->pid_max;
+-
+- pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
+- if (!pid_list->pids) {
+- trace_parser_put(&parser);
+- kfree(pid_list);
+- return -ENOMEM;
+- }
+-
+ if (filtered_pids) {
+ /* copy the current bits to the new max */
+- for_each_set_bit(pid, filtered_pids->pids,
+- filtered_pids->pid_max) {
+- set_bit(pid, pid_list->pids);
++ ret = trace_pid_list_first(filtered_pids, &pid);
++ while (!ret) {
++ trace_pid_list_set(pid_list, pid);
++ ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
+ nr_pids++;
+ }
+ }
+
++ ret = 0;
+ while (cnt > 0) {
+
+ pos = 0;
+
+ ret = trace_get_user(&parser, ubuf, cnt, &pos);
+- if (ret < 0 || !trace_parser_loaded(&parser))
++ if (ret < 0)
+ break;
+
+ read += ret;
+ ubuf += ret;
+ cnt -= ret;
+
++ if (!trace_parser_loaded(&parser))
++ break;
++
+ ret = -EINVAL;
+ if (kstrtoul(parser.buffer, 0, &val))
+ break;
+- if (val >= pid_list->pid_max)
+- break;
+
+ pid = (pid_t)val;
+
+- set_bit(pid, pid_list->pids);
++ if (trace_pid_list_set(pid_list, pid) < 0) {
++ ret = -1;
++ break;
++ }
+ nr_pids++;
+
+ trace_parser_clear(&parser);
+@@ -756,14 +739,13 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
+ trace_parser_put(&parser);
+
+ if (ret < 0) {
+- trace_free_pid_list(pid_list);
++ trace_pid_list_free(pid_list);
+ return ret;
+ }
+
+ if (!nr_pids) {
+ /* Cleared the list of pids */
+- trace_free_pid_list(pid_list);
+- read = ret;
++ trace_pid_list_free(pid_list);
+ pid_list = NULL;
+ }
+
+@@ -1008,13 +990,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev
+ ring_buffer_unlock_commit(buffer, event);
+ }
+
+-/**
+- * __trace_puts - write a constant string into the trace buffer.
+- * @ip: The address of the caller
+- * @str: The constant string to write
+- * @size: The size of the string.
+- */
+-int __trace_puts(unsigned long ip, const char *str, int size)
++int __trace_array_puts(struct trace_array *tr, unsigned long ip,
++ const char *str, int size)
+ {
+ struct ring_buffer_event *event;
+ struct trace_buffer *buffer;
+@@ -1022,7 +999,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
+ unsigned int trace_ctx;
+ int alloc;
+
+- if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
++ if (!(tr->trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ if (unlikely(tracing_selftest_running || tracing_disabled))
+@@ -1031,7 +1008,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
+ alloc = sizeof(*entry) + size + 2; /* possible \n added */
+
+ trace_ctx = tracing_gen_ctx();
+- buffer = global_trace.array_buffer.buffer;
++ buffer = tr->array_buffer.buffer;
+ ring_buffer_nest_start(buffer);
+ event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
+ trace_ctx);
+@@ -1053,11 +1030,23 @@ int __trace_puts(unsigned long ip, const char *str, int size)
+ entry->buf[size] = '\0';
+
+ __buffer_unlock_commit(buffer, event);
+- ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
++ ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
+ out:
+ ring_buffer_nest_end(buffer);
+ return size;
+ }
++EXPORT_SYMBOL_GPL(__trace_array_puts);
++
++/**
++ * __trace_puts - write a constant string into the trace buffer.
++ * @ip: The address of the caller
++ * @str: The constant string to write
++ * @size: The size of the string.
++ */
++int __trace_puts(unsigned long ip, const char *str, int size)
++{
++ return __trace_array_puts(&global_trace, ip, str, size);
++}
+ EXPORT_SYMBOL_GPL(__trace_puts);
+
+ /**
+@@ -1111,22 +1100,22 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr,
+ unsigned long flags;
+
+ if (in_nmi()) {
+- internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
+- internal_trace_puts("*** snapshot is being ignored ***\n");
++ trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
++ trace_array_puts(tr, "*** snapshot is being ignored ***\n");
+ return;
+ }
+
+ if (!tr->allocated_snapshot) {
+- internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
+- internal_trace_puts("*** stopping trace here! ***\n");
+- tracing_off();
++ trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
++ trace_array_puts(tr, "*** stopping trace here! ***\n");
++ tracer_tracing_off(tr);
+ return;
+ }
+
+ /* Note, snapshot can not be used when the tracer uses it */
+ if (tracer->use_max_tr) {
+- internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
+- internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
++ trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
++ trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
+ return;
+ }
+
+@@ -1199,12 +1188,14 @@ void *tracing_cond_snapshot_data(struct trace_array *tr)
+ {
+ void *cond_data = NULL;
+
++ local_irq_disable();
+ arch_spin_lock(&tr->max_lock);
+
+ if (tr->cond_snapshot)
+ cond_data = tr->cond_snapshot->cond_data;
+
+ arch_spin_unlock(&tr->max_lock);
++ local_irq_enable();
+
+ return cond_data;
+ }
+@@ -1340,9 +1331,11 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
+ goto fail_unlock;
+ }
+
++ local_irq_disable();
+ arch_spin_lock(&tr->max_lock);
+ tr->cond_snapshot = cond_snapshot;
+ arch_spin_unlock(&tr->max_lock);
++ local_irq_enable();
+
+ mutex_unlock(&trace_types_lock);
+
+@@ -1369,6 +1362,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
+ {
+ int ret = 0;
+
++ local_irq_disable();
+ arch_spin_lock(&tr->max_lock);
+
+ if (!tr->cond_snapshot)
+@@ -1379,6 +1373,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
+ }
+
+ arch_spin_unlock(&tr->max_lock);
++ local_irq_enable();
+
+ return ret;
+ }
+@@ -1421,6 +1416,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
+ return false;
+ }
+ EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
++#define free_snapshot(tr) do { } while (0)
+ #endif /* CONFIG_TRACER_SNAPSHOT */
+
+ void tracer_tracing_off(struct trace_array *tr)
+@@ -1492,10 +1488,12 @@ static int __init set_buf_size(char *str)
+ if (!str)
+ return 0;
+ buf_size = memparse(str, &str);
+- /* nr_entries can not be zero */
+- if (buf_size == 0)
+- return 0;
+- trace_buf_size = buf_size;
++ /*
++ * nr_entries can not be zero and the startup
++ * tests require some buffer space. Therefore
++ * ensure we have at least 4096 bytes of buffer.
++ */
++ trace_buf_size = max(4096UL, buf_size);
+ return 1;
+ }
+ __setup("trace_buf_size=", set_buf_size);
+@@ -1689,6 +1687,8 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
+ }
+
+ unsigned long __read_mostly tracing_thresh;
++
++#ifdef CONFIG_TRACER_MAX_TRACE
+ static const struct file_operations tracing_max_lat_fops;
+
+ #ifdef LATENCY_FS_NOTIFY
+@@ -1714,7 +1714,8 @@ static void trace_create_maxlat_file(struct trace_array *tr,
+ {
+ INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
+ init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
+- tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
++ tr->d_max_latency = trace_create_file("tracing_max_latency",
++ TRACE_MODE_WRITE,
+ d_tracer, &tr->max_latency,
+ &tracing_max_lat_fops);
+ }
+@@ -1744,18 +1745,14 @@ void latency_fsnotify(struct trace_array *tr)
+ irq_work_queue(&tr->fsnotify_irqwork);
+ }
+
+-#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \
+- || defined(CONFIG_OSNOISE_TRACER)
++#else /* !LATENCY_FS_NOTIFY */
+
+ #define trace_create_maxlat_file(tr, d_tracer) \
+- trace_create_file("tracing_max_latency", 0644, d_tracer, \
+- &tr->max_latency, &tracing_max_lat_fops)
++ trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
++ d_tracer, &tr->max_latency, &tracing_max_lat_fops)
+
+-#else
+-#define trace_create_maxlat_file(tr, d_tracer) do { } while (0)
+ #endif
+
+-#ifdef CONFIG_TRACER_MAX_TRACE
+ /*
+ * Copy the new maximum trace into the separate maximum-trace
+ * structure. (this way the maximum trace is permanently saved,
+@@ -1830,14 +1827,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
+ ring_buffer_record_off(tr->max_buffer.buffer);
+
+ #ifdef CONFIG_TRACER_SNAPSHOT
+- if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
+- goto out_unlock;
++ if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
++ arch_spin_unlock(&tr->max_lock);
++ return;
++ }
+ #endif
+ swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
+
+ __update_max_tr(tr, tsk, cpu);
+
+- out_unlock:
+ arch_spin_unlock(&tr->max_lock);
+ }
+
+@@ -1874,9 +1872,10 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
+ * place on this CPU. We fail to record, but we reset
+ * the max trace buffer (no one writes directly to it)
+ * and flag that it failed.
++ * Another reason is resize is in progress.
+ */
+ trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
+- "Failed to swap buffers due to commit in progress\n");
++ "Failed to swap buffers due to commit or resize in progress\n");
+ }
+
+ WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
+@@ -1884,6 +1883,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
+ __update_max_tr(tr, tsk, cpu);
+ arch_spin_unlock(&tr->max_lock);
+ }
++
+ #endif /* CONFIG_TRACER_MAX_TRACE */
+
+ static int wait_on_pipe(struct trace_iterator *iter, int full)
+@@ -2176,10 +2176,12 @@ void tracing_reset_online_cpus(struct array_buffer *buf)
+ }
+
+ /* Must have trace_types_lock held */
+-void tracing_reset_all_online_cpus(void)
++void tracing_reset_all_online_cpus_unlocked(void)
+ {
+ struct trace_array *tr;
+
++ lockdep_assert_held(&trace_types_lock);
++
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (!tr->clear_trace)
+ continue;
+@@ -2191,6 +2193,13 @@ void tracing_reset_all_online_cpus(void)
+ }
+ }
+
++void tracing_reset_all_online_cpus(void)
++{
++ mutex_lock(&trace_types_lock);
++ tracing_reset_all_online_cpus_unlocked();
++ mutex_unlock(&trace_types_lock);
++}
++
+ /*
+ * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
+ * is the tgid last observed corresponding to pid=i.
+@@ -2202,6 +2211,11 @@ static size_t tgid_map_max;
+
+ #define SAVED_CMDLINES_DEFAULT 128
+ #define NO_CMDLINE_MAP UINT_MAX
++/*
++ * Preemption must be disabled before acquiring trace_cmdline_lock.
++ * The various trace_arrays' max_lock must be acquired in a context
++ * where interrupt is disabled.
++ */
+ static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+ struct saved_cmdlines_buffer {
+ unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+@@ -2414,7 +2428,11 @@ static int trace_save_cmdline(struct task_struct *tsk)
+ * the lock, but we also don't want to spin
+ * nor do we want to disable interrupts,
+ * so if we miss here, then better luck next time.
++ *
++ * This is called within the scheduler and wake up, so interrupts
++ * had better been disabled and run queue lock been held.
+ */
++ lockdep_assert_preemption_disabled();
+ if (!arch_spin_trylock(&trace_cmdline_lock))
+ return 0;
+
+@@ -2833,7 +2851,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
+ }
+ EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
+
+-static DEFINE_SPINLOCK(tracepoint_iter_lock);
++static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
+ static DEFINE_MUTEX(tracepoint_printk_mutex);
+
+ static void output_printk(struct trace_event_buffer *fbuffer)
+@@ -2861,14 +2879,14 @@ static void output_printk(struct trace_event_buffer *fbuffer)
+
+ event = &fbuffer->trace_file->event_call->event;
+
+- spin_lock_irqsave(&tracepoint_iter_lock, flags);
++ raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
+ trace_seq_init(&iter->seq);
+ iter->ent = fbuffer->entry;
+ event_call->event.funcs->trace(iter, 0, event);
+ trace_seq_putc(&iter->seq, 0);
+ printk("%s", iter->seq.buffer);
+
+- spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
++ raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
+ }
+
+ int tracepoint_printk_sysctl(struct ctl_table *table, int write,
+@@ -3230,7 +3248,7 @@ struct trace_buffer_struct {
+ char buffer[4][TRACE_BUF_SIZE];
+ };
+
+-static struct trace_buffer_struct *trace_percpu_buffer;
++static struct trace_buffer_struct __percpu *trace_percpu_buffer;
+
+ /*
+ * This allows for lockless recording. If we're nested too deeply, then
+@@ -3240,7 +3258,7 @@ static char *get_trace_buf(void)
+ {
+ struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
+
+- if (!buffer || buffer->nesting >= 4)
++ if (!trace_percpu_buffer || buffer->nesting >= 4)
+ return NULL;
+
+ buffer->nesting++;
+@@ -3259,7 +3277,7 @@ static void put_trace_buf(void)
+
+ static int alloc_percpu_trace_buffer(void)
+ {
+- struct trace_buffer_struct *buffers;
++ struct trace_buffer_struct __percpu *buffers;
+
+ if (trace_percpu_buffer)
+ return 0;
+@@ -3671,12 +3689,17 @@ static char *trace_iter_expand_format(struct trace_iterator *iter)
+ }
+
+ /* Returns true if the string is safe to dereference from an event */
+-static bool trace_safe_str(struct trace_iterator *iter, const char *str)
++static bool trace_safe_str(struct trace_iterator *iter, const char *str,
++ bool star, int len)
+ {
+ unsigned long addr = (unsigned long)str;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+
++ /* Ignore strings with no length */
++ if (star && !len)
++ return true;
++
+ /* OK if part of the event data */
+ if ((addr >= (unsigned long)iter->ent) &&
+ (addr < (unsigned long)iter->ent + iter->ent_size))
+@@ -3835,6 +3858,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
+ iter->fmt[i] = '\0';
+ trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+
++ /*
++ * If iter->seq is full, the above call no longer guarantees
++ * that ap is in sync with fmt processing, and further calls
++ * to va_arg() can return wrong positional arguments.
++ *
++ * Ensure that ap is no longer used in this case.
++ */
++ if (iter->seq.full) {
++ p = "";
++ break;
++ }
++
+ if (star)
+ len = va_arg(ap, int);
+
+@@ -3850,7 +3885,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
+ * instead. See samples/trace_events/trace-events-sample.h
+ * for reference.
+ */
+- if (WARN_ONCE(!trace_safe_str(iter, str),
++ if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
+ "fmt: '%s' current_buffer: '%s'",
+ fmt, show_buffer(&iter->seq))) {
+ int ret;
+@@ -4073,8 +4108,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
+ * will point to the same string as current_trace->name.
+ */
+ mutex_lock(&trace_types_lock);
+- if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
++ if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
++ /* Close iter->trace before switching to the new current tracer */
++ if (iter->trace->close)
++ iter->trace->close(iter);
+ *iter->trace = *tr->current_trace;
++ /* Reopen the new current tracer */
++ if (iter->trace->open)
++ iter->trace->open(iter);
++ }
+ mutex_unlock(&trace_types_lock);
+
+ #ifdef CONFIG_TRACER_MAX_TRACE
+@@ -5075,6 +5117,8 @@ loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
+ static const struct file_operations tracing_fops = {
+ .open = tracing_open,
+ .read = seq_read,
++ .read_iter = seq_read_iter,
++ .splice_read = generic_file_splice_read,
+ .write = tracing_write_stub,
+ .llseek = tracing_lseek,
+ .release = tracing_release,
+@@ -5134,11 +5178,17 @@ int tracing_set_cpumask(struct trace_array *tr,
+ !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
+ atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
+ ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
++#ifdef CONFIG_TRACER_MAX_TRACE
++ ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
++#endif
+ }
+ if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
+ cpumask_test_cpu(cpu, tracing_cpumask_new)) {
+ atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
+ ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
++#ifdef CONFIG_TRACER_MAX_TRACE
++ ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
++#endif
+ }
+ }
+ arch_spin_unlock(&tr->max_lock);
+@@ -5573,7 +5623,7 @@ static const char readme_msg[] =
+ "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
+- "\t <type>\\[<array-size>\\]\n"
++ "\t symstr, <type>\\[<array-size>\\]\n"
+ #ifdef CONFIG_HIST_TRIGGERS
+ "\t field: <stype> <name>;\n"
+ "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
+@@ -5859,9 +5909,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
+ char buf[64];
+ int r;
+
++ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+ r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
+ arch_spin_unlock(&trace_cmdline_lock);
++ preempt_enable();
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ }
+@@ -5886,10 +5938,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val)
+ return -ENOMEM;
+ }
+
++ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+ savedcmd_temp = savedcmd;
+ savedcmd = s;
+ arch_spin_unlock(&trace_cmdline_lock);
++ preempt_enable();
+ free_saved_cmdlines_buffer(savedcmd_temp);
+
+ return 0;
+@@ -6077,7 +6131,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
+
+ static void trace_create_eval_file(struct dentry *d_tracer)
+ {
+- trace_create_file("eval_map", 0444, d_tracer,
++ trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
+ NULL, &tracing_eval_map_fops);
+ }
+
+@@ -6296,12 +6350,18 @@ static void tracing_set_nop(struct trace_array *tr)
+ tr->current_trace = &nop_trace;
+ }
+
++static bool tracer_options_updated;
++
+ static void add_tracer_options(struct trace_array *tr, struct tracer *t)
+ {
+ /* Only enable if the directory has been created already. */
+ if (!tr->dir)
+ return;
+
++ /* Only create trace option files after update_tracer_options finish */
++ if (!tracer_options_updated)
++ return;
++
+ create_trace_option_files(tr, t);
+ }
+
+@@ -6336,10 +6396,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+
+ #ifdef CONFIG_TRACER_SNAPSHOT
+ if (t->use_max_tr) {
++ local_irq_disable();
+ arch_spin_lock(&tr->max_lock);
+ if (tr->cond_snapshot)
+ ret = -EBUSY;
+ arch_spin_unlock(&tr->max_lock);
++ local_irq_enable();
+ if (ret)
+ goto out;
+ }
+@@ -6370,12 +6432,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ if (tr->current_trace->reset)
+ tr->current_trace->reset(tr);
+
++#ifdef CONFIG_TRACER_MAX_TRACE
++ had_max_tr = tr->current_trace->use_max_tr;
++
+ /* Current trace needs to be nop_trace before synchronize_rcu */
+ tr->current_trace = &nop_trace;
+
+-#ifdef CONFIG_TRACER_MAX_TRACE
+- had_max_tr = tr->allocated_snapshot;
+-
+ if (had_max_tr && !t->use_max_tr) {
+ /*
+ * We need to make sure that the update_max_tr sees that
+@@ -6387,14 +6449,14 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ synchronize_rcu();
+ free_snapshot(tr);
+ }
+-#endif
+
+-#ifdef CONFIG_TRACER_MAX_TRACE
+- if (t->use_max_tr && !had_max_tr) {
++ if (t->use_max_tr && !tr->allocated_snapshot) {
+ ret = tracing_alloc_snapshot_instance(tr);
+ if (ret < 0)
+ goto out;
+ }
++#else
++ tr->current_trace = &nop_trace;
+ #endif
+
+ if (t->init) {
+@@ -6507,7 +6569,7 @@ out:
+ return ret;
+ }
+
+-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
++#ifdef CONFIG_TRACER_MAX_TRACE
+
+ static ssize_t
+ tracing_max_lat_read(struct file *filp, char __user *ubuf,
+@@ -6525,10 +6587,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
+
+ #endif
+
++static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
++{
++ if (cpu == RING_BUFFER_ALL_CPUS) {
++ if (cpumask_empty(tr->pipe_cpumask)) {
++ cpumask_setall(tr->pipe_cpumask);
++ return 0;
++ }
++ } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
++ cpumask_set_cpu(cpu, tr->pipe_cpumask);
++ return 0;
++ }
++ return -EBUSY;
++}
++
++static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
++{
++ if (cpu == RING_BUFFER_ALL_CPUS) {
++ WARN_ON(!cpumask_full(tr->pipe_cpumask));
++ cpumask_clear(tr->pipe_cpumask);
++ } else {
++ WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
++ cpumask_clear_cpu(cpu, tr->pipe_cpumask);
++ }
++}
++
+ static int tracing_open_pipe(struct inode *inode, struct file *filp)
+ {
+ struct trace_array *tr = inode->i_private;
+ struct trace_iterator *iter;
++ int cpu;
+ int ret;
+
+ ret = tracing_check_open_get_tr(tr);
+@@ -6536,13 +6624,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
+ return ret;
+
+ mutex_lock(&trace_types_lock);
++ cpu = tracing_get_cpu(inode);
++ ret = open_pipe_on_cpu(tr, cpu);
++ if (ret)
++ goto fail_pipe_on_cpu;
+
+ /* create a buffer to store the information to pass to userspace */
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter) {
+ ret = -ENOMEM;
+- __trace_array_put(tr);
+- goto out;
++ goto fail_alloc_iter;
+ }
+
+ trace_seq_init(&iter->seq);
+@@ -6565,7 +6656,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
+
+ iter->tr = tr;
+ iter->array_buffer = &tr->array_buffer;
+- iter->cpu_file = tracing_get_cpu(inode);
++ iter->cpu_file = cpu;
+ mutex_init(&iter->mutex);
+ filp->private_data = iter;
+
+@@ -6575,12 +6666,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
+ nonseekable_open(inode, filp);
+
+ tr->trace_ref++;
+-out:
++
+ mutex_unlock(&trace_types_lock);
+ return ret;
+
+ fail:
+ kfree(iter);
++fail_alloc_iter:
++ close_pipe_on_cpu(tr, cpu);
++fail_pipe_on_cpu:
+ __trace_array_put(tr);
+ mutex_unlock(&trace_types_lock);
+ return ret;
+@@ -6597,10 +6691,12 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
+
+ if (iter->trace->pipe_close)
+ iter->trace->pipe_close(iter);
+-
++ close_pipe_on_cpu(tr, iter->cpu_file);
+ mutex_unlock(&trace_types_lock);
+
+ free_cpumask_var(iter->started);
++ kfree(iter->fmt);
++ kfree(iter->temp);
+ mutex_destroy(&iter->mutex);
+ kfree(iter);
+
+@@ -6625,7 +6721,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
+ return EPOLLIN | EPOLLRDNORM;
+ else
+ return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
+- filp, poll_table);
++ filp, poll_table, iter->tr->buffer_percent);
+ }
+
+ static __poll_t
+@@ -6733,7 +6829,20 @@ waitagain:
+
+ ret = print_trace_line(iter);
+ if (ret == TRACE_TYPE_PARTIAL_LINE) {
+- /* don't print partial lines */
++ /*
++ * If one print_trace_line() fills entire trace_seq in one shot,
++ * trace_seq_to_user() will returns -EBUSY because save_len == 0,
++ * In this case, we need to consume it, otherwise, loop will peek
++ * this event next time, resulting in an infinite loop.
++ */
++ if (save_len == 0) {
++ iter->seq.full = 0;
++ trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
++ trace_consume(iter);
++ break;
++ }
++
++ /* In other cases, don't print partial lines */
+ iter->seq.seq.len = save_len;
+ break;
+ }
+@@ -7387,6 +7496,11 @@ out:
+ return ret;
+ }
+
++static void tracing_swap_cpu_buffer(void *tr)
++{
++ update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
++}
++
+ static ssize_t
+ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+@@ -7412,10 +7526,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ goto out;
+ }
+
++ local_irq_disable();
+ arch_spin_lock(&tr->max_lock);
+ if (tr->cond_snapshot)
+ ret = -EBUSY;
+ arch_spin_unlock(&tr->max_lock);
++ local_irq_enable();
+ if (ret)
+ goto out;
+
+@@ -7443,13 +7559,15 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ ret = tracing_alloc_snapshot_instance(tr);
+ if (ret < 0)
+ break;
+- local_irq_disable();
+ /* Now, we're going to swap */
+- if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
++ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
++ local_irq_disable();
+ update_max_tr(tr, current, smp_processor_id(), NULL);
+- else
+- update_max_tr_single(tr, current, iter->cpu_file);
+- local_irq_enable();
++ local_irq_enable();
++ } else {
++ smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
++ (void *)tr, 1);
++ }
+ break;
+ default:
+ if (tr->allocated_snapshot) {
+@@ -7528,7 +7646,7 @@ static const struct file_operations tracing_thresh_fops = {
+ .llseek = generic_file_llseek,
+ };
+
+-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
++#ifdef CONFIG_TRACER_MAX_TRACE
+ static const struct file_operations tracing_max_lat_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_max_lat_read,
+@@ -7736,7 +7854,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
+ err = kzalloc(sizeof(*err), GFP_KERNEL);
+ if (!err)
+ err = ERR_PTR(-ENOMEM);
+- tr->n_err_log_entries++;
++ else
++ tr->n_err_log_entries++;
+
+ return err;
+ }
+@@ -7948,7 +8067,7 @@ static const struct file_operations tracing_err_log_fops = {
+ .open = tracing_err_log_open,
+ .write = tracing_err_log_write,
+ .read = seq_read,
+- .llseek = seq_lseek,
++ .llseek = tracing_lseek,
+ .release = tracing_err_log_release,
+ };
+
+@@ -8085,6 +8204,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
+
+ __trace_array_put(iter->tr);
+
++ iter->wait_index++;
++ /* Make sure the waiters see the new wait_index */
++ smp_wmb();
++
++ ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
++
+ if (info->spare)
+ ring_buffer_free_read_page(iter->array_buffer->buffer,
+ info->spare_cpu, info->spare);
+@@ -8238,6 +8363,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+
+ /* did we read anything? */
+ if (!spd.nr_pages) {
++ long wait_index;
++
+ if (ret)
+ goto out;
+
+@@ -8245,10 +8372,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+ if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
+ goto out;
+
++ wait_index = READ_ONCE(iter->wait_index);
++
+ ret = wait_on_pipe(iter, iter->tr->buffer_percent);
+ if (ret)
+ goto out;
+
++ /* No need to wait after waking up when tracing is off */
++ if (!tracer_tracing_is_on(iter->tr))
++ goto out;
++
++ /* Make sure we see the new wait_index */
++ smp_rmb();
++ if (wait_index != iter->wait_index)
++ goto out;
++
+ goto again;
+ }
+
+@@ -8259,12 +8397,34 @@ out:
+ return ret;
+ }
+
++/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
++static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ struct ftrace_buffer_info *info = file->private_data;
++ struct trace_iterator *iter = &info->iter;
++
++ if (cmd)
++ return -ENOIOCTLCMD;
++
++ mutex_lock(&trace_types_lock);
++
++ iter->wait_index++;
++ /* Make sure the waiters see the new wait_index */
++ smp_wmb();
++
++ ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
++
++ mutex_unlock(&trace_types_lock);
++ return 0;
++}
++
+ static const struct file_operations tracing_buffers_fops = {
+ .open = tracing_buffers_open,
+ .read = tracing_buffers_read,
+ .poll = tracing_buffers_poll,
+ .release = tracing_buffers_release,
+ .splice_read = tracing_buffers_splice_read,
++ .unlocked_ioctl = tracing_buffers_ioctl,
+ .llseek = no_llseek,
+ };
+
+@@ -8590,27 +8750,27 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
+ }
+
+ /* per cpu trace_pipe */
+- trace_create_cpu_file("trace_pipe", 0444, d_cpu,
++ trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
+ tr, cpu, &tracing_pipe_fops);
+
+ /* per cpu trace */
+- trace_create_cpu_file("trace", 0644, d_cpu,
++ trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
+ tr, cpu, &tracing_fops);
+
+- trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
++ trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
+ tr, cpu, &tracing_buffers_fops);
+
+- trace_create_cpu_file("stats", 0444, d_cpu,
++ trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
+ tr, cpu, &tracing_stats_fops);
+
+- trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
++ trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
+ tr, cpu, &tracing_entries_fops);
+
+ #ifdef CONFIG_TRACER_SNAPSHOT
+- trace_create_cpu_file("snapshot", 0644, d_cpu,
++ trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
+ tr, cpu, &snapshot_fops);
+
+- trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
++ trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
+ tr, cpu, &snapshot_raw_fops);
+ #endif
+ }
+@@ -8816,8 +8976,8 @@ create_trace_option_file(struct trace_array *tr,
+ topt->opt = opt;
+ topt->tr = tr;
+
+- topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
+- &trace_options_fops);
++ topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
++ t_options, topt, &trace_options_fops);
+
+ }
+
+@@ -8892,7 +9052,7 @@ create_trace_option_core_file(struct trace_array *tr,
+ if (!t_options)
+ return NULL;
+
+- return trace_create_file(option, 0644, t_options,
++ return trace_create_file(option, TRACE_MODE_WRITE, t_options,
+ (void *)&tr->trace_flags_index[index],
+ &trace_options_core_fops);
+ }
+@@ -8953,6 +9113,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
+ tracer_tracing_off(tr);
+ if (tr->current_trace->stop)
+ tr->current_trace->stop(tr);
++ /* Wake up any waiters */
++ ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
+ }
+ mutex_unlock(&trace_types_lock);
+ }
+@@ -8999,9 +9161,6 @@ buffer_percent_write(struct file *filp, const char __user *ubuf,
+ if (val > 100)
+ return -EINVAL;
+
+- if (!val)
+- val = 1;
+-
+ tr->buffer_percent = val;
+
+ (*ppos)++;
+@@ -9121,6 +9280,7 @@ static void __update_tracer_options(struct trace_array *tr)
+ static void update_tracer_options(struct trace_array *tr)
+ {
+ mutex_lock(&trace_types_lock);
++ tracer_options_updated = true;
+ __update_tracer_options(tr);
+ mutex_unlock(&trace_types_lock);
+ }
+@@ -9190,6 +9350,9 @@ static struct trace_array *trace_array_create(const char *name)
+ if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
+ goto out_free_tr;
+
++ if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
++ goto out_free_tr;
++
+ tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
+
+ cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
+@@ -9231,6 +9394,7 @@ static struct trace_array *trace_array_create(const char *name)
+ out_free_tr:
+ ftrace_free_ftrace_ops(tr);
+ free_trace_buffers(tr);
++ free_cpumask_var(tr->pipe_cpumask);
+ free_cpumask_var(tr->tracing_cpumask);
+ kfree(tr->name);
+ kfree(tr);
+@@ -9326,12 +9490,14 @@ static int __remove_instance(struct trace_array *tr)
+ tracefs_remove(tr->dir);
+ free_percpu(tr->last_func_repeats);
+ free_trace_buffers(tr);
++ clear_tracing_err_log(tr);
+
+ for (i = 0; i < tr->nr_topts; i++) {
+ kfree(tr->topts[i].topts);
+ }
+ kfree(tr->topts);
+
++ free_cpumask_var(tr->pipe_cpumask);
+ free_cpumask_var(tr->tracing_cpumask);
+ kfree(tr->name);
+ kfree(tr);
+@@ -9417,28 +9583,28 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
+ struct trace_event_file *file;
+ int cpu;
+
+- trace_create_file("available_tracers", 0444, d_tracer,
++ trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
+ tr, &show_traces_fops);
+
+- trace_create_file("current_tracer", 0644, d_tracer,
++ trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
+ tr, &set_tracer_fops);
+
+- trace_create_file("tracing_cpumask", 0644, d_tracer,
++ trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
+ tr, &tracing_cpumask_fops);
+
+- trace_create_file("trace_options", 0644, d_tracer,
++ trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
+ tr, &tracing_iter_fops);
+
+- trace_create_file("trace", 0644, d_tracer,
++ trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
+ tr, &tracing_fops);
+
+- trace_create_file("trace_pipe", 0444, d_tracer,
++ trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
+ tr, &tracing_pipe_fops);
+
+- trace_create_file("buffer_size_kb", 0644, d_tracer,
++ trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
+ tr, &tracing_entries_fops);
+
+- trace_create_file("buffer_total_size_kb", 0444, d_tracer,
++ trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
+ tr, &tracing_total_entries_fops);
+
+ trace_create_file("free_buffer", 0200, d_tracer,
+@@ -9449,40 +9615,42 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
+
+ file = __find_event_file(tr, "ftrace", "print");
+ if (file && file->dir)
+- trace_create_file("trigger", 0644, file->dir, file,
+- &event_trigger_fops);
++ trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
++ file, &event_trigger_fops);
+ tr->trace_marker_file = file;
+
+ trace_create_file("trace_marker_raw", 0220, d_tracer,
+ tr, &tracing_mark_raw_fops);
+
+- trace_create_file("trace_clock", 0644, d_tracer, tr,
++ trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
+ &trace_clock_fops);
+
+- trace_create_file("tracing_on", 0644, d_tracer,
++ trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
+ tr, &rb_simple_fops);
+
+- trace_create_file("timestamp_mode", 0444, d_tracer, tr,
++ trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
+ &trace_time_stamp_mode_fops);
+
+ tr->buffer_percent = 50;
+
+- trace_create_file("buffer_percent", 0444, d_tracer,
++ trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
+ tr, &buffer_percent_fops);
+
+ create_trace_options_dir(tr);
+
++#ifdef CONFIG_TRACER_MAX_TRACE
+ trace_create_maxlat_file(tr, d_tracer);
++#endif
+
+ if (ftrace_create_function_files(tr, d_tracer))
+ MEM_FAIL(1, "Could not allocate function filter files");
+
+ #ifdef CONFIG_TRACER_SNAPSHOT
+- trace_create_file("snapshot", 0644, d_tracer,
++ trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
+ tr, &snapshot_fops);
+ #endif
+
+- trace_create_file("error_log", 0644, d_tracer,
++ trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
+ tr, &tracing_err_log_fops);
+
+ for_each_tracing_cpu(cpu)
+@@ -9675,19 +9843,19 @@ static __init int tracer_init_tracefs(void)
+ init_tracer_tracefs(&global_trace, NULL);
+ ftrace_init_tracefs_toplevel(&global_trace, NULL);
+
+- trace_create_file("tracing_thresh", 0644, NULL,
++ trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
+ &global_trace, &tracing_thresh_fops);
+
+- trace_create_file("README", 0444, NULL,
++ trace_create_file("README", TRACE_MODE_READ, NULL,
+ NULL, &tracing_readme_fops);
+
+- trace_create_file("saved_cmdlines", 0444, NULL,
++ trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
+ NULL, &tracing_saved_cmdlines_fops);
+
+- trace_create_file("saved_cmdlines_size", 0644, NULL,
++ trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
+ NULL, &tracing_saved_cmdlines_size_fops);
+
+- trace_create_file("saved_tgids", 0444, NULL,
++ trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
+ NULL, &tracing_saved_tgids_fops);
+
+ trace_eval_init();
+@@ -9699,7 +9867,7 @@ static __init int tracer_init_tracefs(void)
+ #endif
+
+ #ifdef CONFIG_DYNAMIC_FTRACE
+- trace_create_file("dyn_ftrace_total_info", 0444, NULL,
++ trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
+ NULL, &tracing_dyn_info_fops);
+ #endif
+
+@@ -9799,6 +9967,12 @@ void trace_init_global_iter(struct trace_iterator *iter)
+ /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+ if (trace_clocks[iter->tr->clock_id].in_ns)
+ iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
++
++ /* Can not use kmalloc for iter.temp and iter.fmt */
++ iter->temp = static_temp_buf;
++ iter->temp_size = STATIC_TEMP_BUF_SIZE;
++ iter->fmt = static_fmt_buf;
++ iter->fmt_size = STATIC_FMT_BUF_SIZE;
+ }
+
+ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
+@@ -9831,11 +10005,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
+
+ /* Simulate the iterator */
+ trace_init_global_iter(&iter);
+- /* Can not use kmalloc for iter.temp and iter.fmt */
+- iter.temp = static_temp_buf;
+- iter.temp_size = STATIC_TEMP_BUF_SIZE;
+- iter.fmt = static_fmt_buf;
+- iter.fmt_size = STATIC_FMT_BUF_SIZE;
+
+ for_each_tracing_cpu(cpu) {
+ atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
+@@ -10039,12 +10208,14 @@ __init static int tracer_alloc_buffers(void)
+ if (trace_create_savedcmd() < 0)
+ goto out_free_temp_buffer;
+
++ if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
++ goto out_free_savedcmd;
++
+ /* TODO: make the number of buffers hot pluggable with CPUS */
+ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
+ MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
+- goto out_free_savedcmd;
++ goto out_free_pipe_cpumask;
+ }
+-
+ if (global_trace.buffer_disabled)
+ tracing_off();
+
+@@ -10097,6 +10268,8 @@ __init static int tracer_alloc_buffers(void)
+
+ return 0;
+
++out_free_pipe_cpumask:
++ free_cpumask_var(global_trace.pipe_cpumask);
+ out_free_savedcmd:
+ free_saved_cmdlines_buffer(savedcmd);
+ out_free_temp_buffer:
+@@ -10123,6 +10296,8 @@ void __init early_trace_init(void)
+ static_key_enable(&tracepoint_printk_key.key);
+ }
+ tracer_alloc_buffers();
++
++ init_events();
+ }
+
+ void __init trace_init(void)
+diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
+index b7c0f8e160fb4..90ab921884b10 100644
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -22,11 +22,16 @@
+ #include <linux/ctype.h>
+ #include <linux/once_lite.h>
+
++#include "pid_list.h"
++
+ #ifdef CONFIG_FTRACE_SYSCALLS
+ #include <asm/unistd.h> /* For NR_SYSCALLS */
+ #include <asm/syscall.h> /* some archs define it here */
+ #endif
+
++#define TRACE_MODE_WRITE 0640
++#define TRACE_MODE_READ 0440
++
+ enum trace_type {
+ __TRACE_FIRST_TYPE = 0,
+
+@@ -78,6 +83,9 @@ enum trace_type {
+ #undef __dynamic_array
+ #define __dynamic_array(type, item) type item[];
+
++#undef __rel_dynamic_array
++#define __rel_dynamic_array(type, item) type item[];
++
+ #undef F_STRUCT
+ #define F_STRUCT(args...) args
+
+@@ -105,6 +113,12 @@ enum trace_type {
+ #define MEM_FAIL(condition, fmt, ...) \
+ DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
+
++#define FAULT_STRING "(fault)"
++
++#define HIST_STACKTRACE_DEPTH 16
++#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
++#define HIST_STACKTRACE_SKIP 5
++
+ /*
+ * syscalls are special, and need special handling, this is why
+ * they are not included in trace_entries.h
+@@ -188,10 +202,14 @@ struct trace_options {
+ struct trace_option_dentry *topts;
+ };
+
+-struct trace_pid_list {
+- int pid_max;
+- unsigned long *pids;
+-};
++struct trace_pid_list *trace_pid_list_alloc(void);
++void trace_pid_list_free(struct trace_pid_list *pid_list);
++bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid);
++int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid);
++int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid);
++int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid);
++int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid,
++ unsigned int *next);
+
+ enum {
+ TRACE_PIDS = BIT(0),
+@@ -297,8 +315,7 @@ struct trace_array {
+ struct array_buffer max_buffer;
+ bool allocated_snapshot;
+ #endif
+-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \
+- || defined(CONFIG_OSNOISE_TRACER)
++#ifdef CONFIG_TRACER_MAX_TRACE
+ unsigned long max_latency;
+ #ifdef CONFIG_FSNOTIFY
+ struct dentry *d_max_latency;
+@@ -350,6 +367,8 @@ struct trace_array {
+ struct list_head events;
+ struct trace_event_file *trace_marker_file;
+ cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
++ /* one per_cpu trace_pipe can be opened by only one user */
++ cpumask_var_t pipe_cpumask;
+ int ref;
+ int trace_ref;
+ #ifdef CONFIG_FUNCTION_TRACER
+@@ -569,6 +588,7 @@ int tracing_is_enabled(void);
+ void tracing_reset_online_cpus(struct array_buffer *buf);
+ void tracing_reset_current(int cpu);
+ void tracing_reset_all_online_cpus(void);
++void tracing_reset_all_online_cpus_unlocked(void);
+ int tracing_open_generic(struct inode *inode, struct file *filp);
+ int tracing_open_generic_tr(struct inode *inode, struct file *filp);
+ bool tracing_is_disabled(void);
+@@ -676,12 +696,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
+ void *cond_data);
+ void update_max_tr_single(struct trace_array *tr,
+ struct task_struct *tsk, int cpu);
+-#endif /* CONFIG_TRACER_MAX_TRACE */
+
+-#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \
+- || defined(CONFIG_OSNOISE_TRACER)) && defined(CONFIG_FSNOTIFY)
++#ifdef CONFIG_FSNOTIFY
+ #define LATENCY_FS_NOTIFY
+ #endif
++#endif /* CONFIG_TRACER_MAX_TRACE */
+
+ #ifdef LATENCY_FS_NOTIFY
+ void latency_fsnotify(struct trace_array *tr);
+@@ -1357,14 +1376,26 @@ __event_trigger_test_discard(struct trace_event_file *file,
+ if (eflags & EVENT_FILE_FL_TRIGGER_COND)
+ *tt = event_triggers_call(file, buffer, entry, event);
+
+- if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
+- (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
+- !filter_match_preds(file->filter, entry))) {
+- __trace_event_discard_commit(buffer, event);
+- return true;
+- }
++ if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED |
++ EVENT_FILE_FL_FILTERED |
++ EVENT_FILE_FL_PID_FILTER))))
++ return false;
++
++ if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
++ goto discard;
++
++ if (file->flags & EVENT_FILE_FL_FILTERED &&
++ !filter_match_preds(file->filter, entry))
++ goto discard;
++
++ if ((file->flags & EVENT_FILE_FL_PID_FILTER) &&
++ trace_event_ignore_this_pid(file))
++ goto discard;
+
+ return false;
++ discard:
++ __trace_event_discard_commit(buffer, event);
++ return true;
+ }
+
+ /**
+@@ -1478,6 +1509,7 @@ extern void trace_event_enable_cmd_record(bool enable);
+ extern void trace_event_enable_tgid_record(bool enable);
+
+ extern int event_trace_init(void);
++extern int init_events(void);
+ extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
+ extern int event_trace_del_tracer(struct trace_array *tr);
+ extern void __trace_early_add_events(struct trace_array *tr);
+@@ -1917,17 +1949,30 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
+ }
+
+ /* Check the name is good for event/group/fields */
+-static inline bool is_good_name(const char *name)
++static inline bool __is_good_name(const char *name, bool hash_ok)
+ {
+- if (!isalpha(*name) && *name != '_')
++ if (!isalpha(*name) && *name != '_' && (!hash_ok || *name != '-'))
+ return false;
+ while (*++name != '\0') {
+- if (!isalpha(*name) && !isdigit(*name) && *name != '_')
++ if (!isalpha(*name) && !isdigit(*name) && *name != '_' &&
++ (!hash_ok || *name != '-'))
+ return false;
+ }
+ return true;
+ }
+
++/* Check the name is good for event/group/fields */
++static inline bool is_good_name(const char *name)
++{
++ return __is_good_name(name, false);
++}
++
++/* Check the name is good for system */
++static inline bool is_good_system_name(const char *name)
++{
++ return __is_good_name(name, true);
++}
++
+ /* Convert certain expected symbols into '_' when generating event names */
+ static inline void sanitize_event_name(char *name)
+ {
+diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
+index 8d252f63cd784..778200dd8edea 100644
+--- a/kernel/trace/trace_boot.c
++++ b/kernel/trace/trace_boot.c
+@@ -300,7 +300,7 @@ trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp,
+ {
+ struct xbc_node *node;
+ const char *p, *handler;
+- int ret;
++ int ret = 0;
+
+ handler = xbc_node_get_data(hnode);
+
+@@ -430,6 +430,8 @@ trace_boot_init_histograms(struct trace_event_file *file,
+ /* All digit started node should be instances. */
+ if (trace_boot_compose_hist_cmd(node, buf, size) == 0) {
+ tmp = kstrdup(buf, GFP_KERNEL);
++ if (!tmp)
++ return;
+ if (trigger_process_regex(file, buf) < 0)
+ pr_err("Failed to apply hist trigger: %s\n", tmp);
+ kfree(tmp);
+@@ -439,6 +441,8 @@ trace_boot_init_histograms(struct trace_event_file *file,
+ if (xbc_node_find_subkey(hnode, "keys")) {
+ if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) {
+ tmp = kstrdup(buf, GFP_KERNEL);
++ if (!tmp)
++ return;
+ if (trigger_process_regex(file, buf) < 0)
+ pr_err("Failed to apply hist trigger: %s\n", tmp);
+ kfree(tmp);
+diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
+index 1110112e55bd7..d4f7137233234 100644
+--- a/kernel/trace/trace_dynevent.c
++++ b/kernel/trace/trace_dynevent.c
+@@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
+ if (ret)
+ break;
+ }
++ tracing_reset_all_online_cpus();
+ mutex_unlock(&event_mutex);
+ out:
+ argv_free(argv);
+@@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type)
+ break;
+ }
+ out:
++ tracing_reset_all_online_cpus();
+ mutex_unlock(&event_mutex);
+
+ return ret;
+@@ -262,7 +264,7 @@ static __init int init_dynamic_event(void)
+ if (ret)
+ return 0;
+
+- entry = tracefs_create_file("dynamic_events", 0644, NULL,
++ entry = tracefs_create_file("dynamic_events", TRACE_MODE_WRITE, NULL,
+ NULL, &dynamic_events_ops);
+
+ /* Event list interface */
+diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
+index 928867f527e70..085f056e66f19 100644
+--- a/kernel/trace/trace_eprobe.c
++++ b/kernel/trace/trace_eprobe.c
+@@ -16,6 +16,7 @@
+ #include "trace_dynevent.h"
+ #include "trace_probe.h"
+ #include "trace_probe_tmpl.h"
++#include "trace_probe_kernel.h"
+
+ #define EPROBE_EVENT_SYSTEM "eprobes"
+
+@@ -226,6 +227,7 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
+ struct probe_arg *parg = &ep->tp.args[i];
+ struct ftrace_event_field *field;
+ struct list_head *head;
++ int ret = -ENOENT;
+
+ head = trace_get_fields(ep->event);
+ list_for_each_entry(field, head, link) {
+@@ -235,9 +237,20 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
+ return 0;
+ }
+ }
++
++ /*
++ * Argument not found on event. But allow for comm and COMM
++ * to be used to get the current->comm.
++ */
++ if (strcmp(parg->code->data, "COMM") == 0 ||
++ strcmp(parg->code->data, "comm") == 0) {
++ parg->code->op = FETCH_OP_COMM;
++ ret = 0;
++ }
++
+ kfree(parg->code->data);
+ parg->code->data = NULL;
+- return -ENOENT;
++ return ret;
+ }
+
+ static int eprobe_event_define_fields(struct trace_event_call *event_call)
+@@ -308,6 +321,24 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec)
+
+ addr = rec + field->offset;
+
++ if (is_string_field(field)) {
++ switch (field->filter_type) {
++ case FILTER_DYN_STRING:
++ val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
++ break;
++ case FILTER_STATIC_STRING:
++ val = (unsigned long)addr;
++ break;
++ case FILTER_PTR_STRING:
++ val = (unsigned long)(*(char *)addr);
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ return 0;
++ }
++ return val;
++ }
++
+ switch (field->size) {
+ case 1:
+ if (field->is_signed)
+@@ -339,16 +370,38 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec)
+
+ static int get_eprobe_size(struct trace_probe *tp, void *rec)
+ {
++ struct fetch_insn *code;
+ struct probe_arg *arg;
+ int i, len, ret = 0;
+
+ for (i = 0; i < tp->nr_args; i++) {
+ arg = tp->args + i;
+- if (unlikely(arg->dynamic)) {
++ if (arg->dynamic) {
+ unsigned long val;
+
+- val = get_event_field(arg->code, rec);
+- len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL);
++ code = arg->code;
++ retry:
++ switch (code->op) {
++ case FETCH_OP_TP_ARG:
++ val = get_event_field(code, rec);
++ break;
++ case FETCH_OP_IMM:
++ val = code->immediate;
++ break;
++ case FETCH_OP_COMM:
++ val = (unsigned long)current->comm;
++ break;
++ case FETCH_OP_DATA:
++ val = (unsigned long)code->data;
++ break;
++ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
++ code++;
++ goto retry;
++ default:
++ continue;
++ }
++ code++;
++ len = process_fetch_insn_bottom(code, val, NULL, NULL);
+ if (len > 0)
+ ret += len;
+ }
+@@ -366,8 +419,28 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+ {
+ unsigned long val;
+
+- val = get_event_field(code, rec);
+- return process_fetch_insn_bottom(code + 1, val, dest, base);
++ retry:
++ switch (code->op) {
++ case FETCH_OP_TP_ARG:
++ val = get_event_field(code, rec);
++ break;
++ case FETCH_OP_IMM:
++ val = code->immediate;
++ break;
++ case FETCH_OP_COMM:
++ val = (unsigned long)current->comm;
++ break;
++ case FETCH_OP_DATA:
++ val = (unsigned long)code->data;
++ break;
++ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
++ code++;
++ goto retry;
++ default:
++ return -EILSEQ;
++ }
++ code++;
++ return process_fetch_insn_bottom(code, val, dest, base);
+ }
+ NOKPROBE_SYMBOL(process_fetch_insn)
+
+@@ -375,29 +448,14 @@ NOKPROBE_SYMBOL(process_fetch_insn)
+ static nokprobe_inline int
+ fetch_store_strlen_user(unsigned long addr)
+ {
+- const void __user *uaddr = (__force const void __user *)addr;
+-
+- return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
++ return kern_fetch_store_strlen_user(addr);
+ }
+
+ /* Return the length of string -- including null terminal byte */
+ static nokprobe_inline int
+ fetch_store_strlen(unsigned long addr)
+ {
+- int ret, len = 0;
+- u8 c;
+-
+-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+- if (addr < TASK_SIZE)
+- return fetch_store_strlen_user(addr);
+-#endif
+-
+- do {
+- ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
+- len++;
+- } while (c && ret == 0 && len < MAX_STRING_SIZE);
+-
+- return (ret < 0) ? ret : len;
++ return kern_fetch_store_strlen(addr);
+ }
+
+ /*
+@@ -407,21 +465,7 @@ fetch_store_strlen(unsigned long addr)
+ static nokprobe_inline int
+ fetch_store_string_user(unsigned long addr, void *dest, void *base)
+ {
+- const void __user *uaddr = (__force const void __user *)addr;
+- int maxlen = get_loc_len(*(u32 *)dest);
+- void *__dest;
+- long ret;
+-
+- if (unlikely(!maxlen))
+- return -ENOMEM;
+-
+- __dest = get_loc_data(dest, base);
+-
+- ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
+- if (ret >= 0)
+- *(u32 *)dest = make_data_loc(ret, __dest - base);
+-
+- return ret;
++ return kern_fetch_store_string_user(addr, dest, base);
+ }
+
+ /*
+@@ -431,29 +475,7 @@ fetch_store_string_user(unsigned long addr, void *dest, void *base)
+ static nokprobe_inline int
+ fetch_store_string(unsigned long addr, void *dest, void *base)
+ {
+- int maxlen = get_loc_len(*(u32 *)dest);
+- void *__dest;
+- long ret;
+-
+-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+- if ((unsigned long)addr < TASK_SIZE)
+- return fetch_store_string_user(addr, dest, base);
+-#endif
+-
+- if (unlikely(!maxlen))
+- return -ENOMEM;
+-
+- __dest = get_loc_data(dest, base);
+-
+- /*
+- * Try to get string again, since the string can be changed while
+- * probing.
+- */
+- ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
+- if (ret >= 0)
+- *(u32 *)dest = make_data_loc(ret, __dest - base);
+-
+- return ret;
++ return kern_fetch_store_string(addr, dest, base);
+ }
+
+ static nokprobe_inline int
+@@ -545,6 +567,12 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
+ {
+ struct eprobe_data *edata = data->private_data;
+
++ if (unlikely(!rec))
++ return;
++
++ if (unlikely(!rec))
++ return;
++
+ __eprobe_trace_func(edata, rec);
+ }
+
+@@ -697,6 +725,7 @@ static int enable_trace_eprobe(struct trace_event_call *call,
+ struct trace_eprobe *ep;
+ bool enabled;
+ int ret = 0;
++ int cnt = 0;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+@@ -720,12 +749,25 @@ static int enable_trace_eprobe(struct trace_event_call *call,
+ if (ret)
+ break;
+ enabled = true;
++ cnt++;
+ }
+
+ if (ret) {
+ /* Failed to enable one of them. Roll back all */
+- if (enabled)
+- disable_eprobe(ep, file->tr);
++ if (enabled) {
++ /*
++ * It's a bug if one failed for something other than memory
++ * not being available but another eprobe succeeded.
++ */
++ WARN_ON_ONCE(ret != -ENOMEM);
++
++ list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
++ ep = container_of(pos, struct trace_eprobe, tp);
++ disable_eprobe(ep, file->tr);
++ if (!--cnt)
++ break;
++ }
++ }
+ if (file)
+ trace_probe_remove_file(tp, file);
+ else
+@@ -849,6 +891,10 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[
+ if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG)
+ ret = trace_eprobe_tp_arg_update(ep, i);
+
++ /* Handle symbols "@" */
++ if (!ret)
++ ret = traceprobe_update_arg(&ep->tp.args[i]);
++
+ return ret;
+ }
+
+diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
+index 6aed10e2f7ce0..083f648e32650 100644
+--- a/kernel/trace/trace_event_perf.c
++++ b/kernel/trace/trace_event_perf.c
+@@ -157,7 +157,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
+ int i;
+
+ if (--tp_event->perf_refcount > 0)
+- goto out;
++ return;
+
+ tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
+
+@@ -176,8 +176,6 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
+ perf_trace_buf[i] = NULL;
+ }
+ }
+-out:
+- trace_event_put_ref(tp_event);
+ }
+
+ static int perf_trace_event_open(struct perf_event *p_event)
+@@ -241,6 +239,7 @@ void perf_trace_destroy(struct perf_event *p_event)
+ mutex_lock(&event_mutex);
+ perf_trace_event_close(p_event);
+ perf_trace_event_unreg(p_event);
++ trace_event_put_ref(p_event->tp_event);
+ mutex_unlock(&event_mutex);
+ }
+
+@@ -292,6 +291,7 @@ void perf_kprobe_destroy(struct perf_event *p_event)
+ mutex_lock(&event_mutex);
+ perf_trace_event_close(p_event);
+ perf_trace_event_unreg(p_event);
++ trace_event_put_ref(p_event->tp_event);
+ mutex_unlock(&event_mutex);
+
+ destroy_local_trace_kprobe(p_event->tp_event);
+@@ -347,6 +347,7 @@ void perf_uprobe_destroy(struct perf_event *p_event)
+ mutex_lock(&event_mutex);
+ perf_trace_event_close(p_event);
+ perf_trace_event_unreg(p_event);
++ trace_event_put_ref(p_event->tp_event);
+ mutex_unlock(&event_mutex);
+ destroy_local_trace_uprobe(p_event->tp_event);
+ }
+@@ -441,13 +442,13 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
+ if (!rcu_is_watching())
+ return;
+
+- if ((unsigned long)ops->private != smp_processor_id())
+- return;
+-
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
++ if ((unsigned long)ops->private != smp_processor_id())
++ goto out;
++
+ event = container_of(ops, struct perf_event, ftrace_ops);
+
+ /*
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 830b3b9940f4c..2a2a599997671 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -40,6 +40,14 @@ static LIST_HEAD(ftrace_generic_fields);
+ static LIST_HEAD(ftrace_common_fields);
+ static bool eventdir_initialized;
+
++static LIST_HEAD(module_strings);
++
++struct module_string {
++ struct list_head next;
++ struct module *module;
++ char *str;
++};
++
+ #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
+
+ static struct kmem_cache *field_cachep;
+@@ -168,6 +176,7 @@ static int trace_define_generic_fields(void)
+
+ __generic_field(int, CPU, FILTER_CPU);
+ __generic_field(int, cpu, FILTER_CPU);
++ __generic_field(int, common_cpu, FILTER_CPU);
+ __generic_field(char *, COMM, FILTER_COMM);
+ __generic_field(char *, comm, FILTER_COMM);
+
+@@ -399,7 +408,14 @@ static void test_event_printk(struct trace_event_call *call)
+ a = strchr(fmt + i, '&');
+ if ((a && (a < r)) || test_field(r, call))
+ dereference_flags &= ~(1ULL << arg);
++ } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
++ (!c || r < c)) {
++ dereference_flags &= ~(1ULL << arg);
++ } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
++ (!c || r < c)) {
++ dereference_flags &= ~(1ULL << arg);
+ }
++
+ next_arg:
+ i--;
+ arg++;
+@@ -578,7 +594,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
+ {
+ struct trace_event_call *call = file->event_call;
+ struct trace_array *tr = file->tr;
+- unsigned long file_flags = file->flags;
+ int ret = 0;
+ int disable;
+
+@@ -602,6 +617,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
+ break;
+ disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
+ clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
++ /* Disable use of trace_buffered_event */
++ trace_buffered_event_disable();
+ } else
+ disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
+
+@@ -640,6 +657,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
+ if (atomic_inc_return(&file->sm_ref) > 1)
+ break;
+ set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
++ /* Enable use of trace_buffered_event */
++ trace_buffered_event_enable();
+ }
+
+ if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
+@@ -679,15 +698,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
+ break;
+ }
+
+- /* Enable or disable use of trace_buffered_event */
+- if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
+- (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
+- if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
+- trace_buffered_event_enable();
+- else
+- trace_buffered_event_disable();
+- }
+-
+ return ret;
+ }
+
+@@ -885,10 +895,10 @@ static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
+ tracepoint_synchronize_unregister();
+
+ if ((type & TRACE_PIDS) && pid_list)
+- trace_free_pid_list(pid_list);
++ trace_pid_list_free(pid_list);
+
+ if ((type & TRACE_NO_PIDS) && no_pid_list)
+- trace_free_pid_list(no_pid_list);
++ trace_pid_list_free(no_pid_list);
+ }
+
+ static void ftrace_clear_event_pids(struct trace_array *tr, int type)
+@@ -1967,7 +1977,7 @@ event_pid_write(struct file *filp, const char __user *ubuf,
+
+ if (filtered_pids) {
+ tracepoint_synchronize_unregister();
+- trace_free_pid_list(filtered_pids);
++ trace_pid_list_free(filtered_pids);
+ } else if (pid_list && !other_pids) {
+ register_pid_events(tr);
+ }
+@@ -2312,7 +2322,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
+ /* the ftrace system is special, do not create enable or filter files */
+ if (strcmp(name, "ftrace") != 0) {
+
+- entry = tracefs_create_file("filter", 0644, dir->entry, dir,
++ entry = tracefs_create_file("filter", TRACE_MODE_WRITE,
++ dir->entry, dir,
+ &ftrace_subsystem_filter_fops);
+ if (!entry) {
+ kfree(system->filter);
+@@ -2320,7 +2331,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
+ pr_warn("Could not create tracefs '%s/filter' entry\n", name);
+ }
+
+- trace_create_file("enable", 0644, dir->entry, dir,
++ trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir,
+ &ftrace_system_enable_fops);
+ }
+
+@@ -2402,12 +2413,12 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
+ }
+
+ if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+- trace_create_file("enable", 0644, file->dir, file,
++ trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file,
+ &ftrace_enable_fops);
+
+ #ifdef CONFIG_PERF_EVENTS
+ if (call->event.type && call->class->reg)
+- trace_create_file("id", 0444, file->dir,
++ trace_create_file("id", TRACE_MODE_READ, file->dir,
+ (void *)(long)call->event.type,
+ &ftrace_event_id_fops);
+ #endif
+@@ -2423,22 +2434,22 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
+ * triggers or filters.
+ */
+ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
+- trace_create_file("filter", 0644, file->dir, file,
+- &ftrace_event_filter_fops);
++ trace_create_file("filter", TRACE_MODE_WRITE, file->dir,
++ file, &ftrace_event_filter_fops);
+
+- trace_create_file("trigger", 0644, file->dir, file,
+- &event_trigger_fops);
++ trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
++ file, &event_trigger_fops);
+ }
+
+ #ifdef CONFIG_HIST_TRIGGERS
+- trace_create_file("hist", 0444, file->dir, file,
++ trace_create_file("hist", TRACE_MODE_READ, file->dir, file,
+ &event_hist_fops);
+ #endif
+ #ifdef CONFIG_HIST_TRIGGERS_DEBUG
+- trace_create_file("hist_debug", 0444, file->dir, file,
++ trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file,
+ &event_hist_debug_fops);
+ #endif
+- trace_create_file("format", 0444, file->dir, call,
++ trace_create_file("format", TRACE_MODE_READ, file->dir, call,
+ &ftrace_event_format_fops);
+
+ #ifdef CONFIG_TRACE_EVENT_INJECT
+@@ -2632,6 +2643,76 @@ static void update_event_printk(struct trace_event_call *call,
+ }
+ }
+
++static void add_str_to_module(struct module *module, char *str)
++{
++ struct module_string *modstr;
++
++ modstr = kmalloc(sizeof(*modstr), GFP_KERNEL);
++
++ /*
++ * If we failed to allocate memory here, then we'll just
++ * let the str memory leak when the module is removed.
++ * If this fails to allocate, there's worse problems than
++ * a leaked string on module removal.
++ */
++ if (WARN_ON_ONCE(!modstr))
++ return;
++
++ modstr->module = module;
++ modstr->str = str;
++
++ list_add(&modstr->next, &module_strings);
++}
++
++static void update_event_fields(struct trace_event_call *call,
++ struct trace_eval_map *map)
++{
++ struct ftrace_event_field *field;
++ struct list_head *head;
++ char *ptr;
++ char *str;
++ int len = strlen(map->eval_string);
++
++ /* Dynamic events should never have field maps */
++ if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
++ return;
++
++ head = trace_get_fields(call);
++ list_for_each_entry(field, head, link) {
++ ptr = strchr(field->type, '[');
++ if (!ptr)
++ continue;
++ ptr++;
++
++ if (!isalpha(*ptr) && *ptr != '_')
++ continue;
++
++ if (strncmp(map->eval_string, ptr, len) != 0)
++ continue;
++
++ str = kstrdup(field->type, GFP_KERNEL);
++ if (WARN_ON_ONCE(!str))
++ return;
++ ptr = str + (ptr - field->type);
++ ptr = eval_replace(ptr, map, len);
++ /* enum/sizeof string smaller than value */
++ if (WARN_ON_ONCE(!ptr)) {
++ kfree(str);
++ continue;
++ }
++
++ /*
++ * If the event is part of a module, then we need to free the string
++ * when the module is removed. Otherwise, it will stay allocated
++ * until a reboot.
++ */
++ if (call->module)
++ add_str_to_module(call->module, str);
++
++ field->type = str;
++ }
++}
++
+ void trace_event_eval_update(struct trace_eval_map **map, int len)
+ {
+ struct trace_event_call *call, *p;
+@@ -2667,6 +2748,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
+ first = false;
+ }
+ update_event_printk(call, map[i]);
++ update_event_fields(call, map[i]);
+ }
+ }
+ }
+@@ -2677,12 +2759,22 @@ static struct trace_event_file *
+ trace_create_new_event(struct trace_event_call *call,
+ struct trace_array *tr)
+ {
++ struct trace_pid_list *no_pid_list;
++ struct trace_pid_list *pid_list;
+ struct trace_event_file *file;
+
+ file = kmem_cache_alloc(file_cachep, GFP_TRACE);
+ if (!file)
+ return NULL;
+
++ pid_list = rcu_dereference_protected(tr->filtered_pids,
++ lockdep_is_held(&event_mutex));
++ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
++ lockdep_is_held(&event_mutex));
++
++ if (pid_list || no_pid_list)
++ file->flags |= EVENT_FILE_FL_PID_FILTER;
++
+ file->event_call = call;
+ file->tr = tr;
+ atomic_set(&file->sm_ref, 0);
+@@ -2776,7 +2868,10 @@ static int probe_remove_event_call(struct trace_event_call *call)
+ * TRACE_REG_UNREGISTER.
+ */
+ if (file->flags & EVENT_FILE_FL_ENABLED)
+- return -EBUSY;
++ goto busy;
++
++ if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
++ tr->clear_trace = true;
+ /*
+ * The do_for_each_event_file_safe() is
+ * a double loop. After finding the call for this
+@@ -2789,6 +2884,12 @@ static int probe_remove_event_call(struct trace_event_call *call)
+ __trace_remove_event_call(call);
+
+ return 0;
++ busy:
++ /* No need to clear the trace now */
++ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
++ tr->clear_trace = false;
++ }
++ return -EBUSY;
+ }
+
+ /* Remove an event_call */
+@@ -2840,6 +2941,7 @@ static void trace_module_add_events(struct module *mod)
+ static void trace_module_remove_events(struct module *mod)
+ {
+ struct trace_event_call *call, *p;
++ struct module_string *modstr, *m;
+
+ down_write(&trace_event_sem);
+ list_for_each_entry_safe(call, p, &ftrace_events, list) {
+@@ -2848,6 +2950,14 @@ static void trace_module_remove_events(struct module *mod)
+ if (call->module == mod)
+ __trace_remove_event_call(call);
+ }
++ /* Check for any strings allocade for this module */
++ list_for_each_entry_safe(modstr, m, &module_strings, next) {
++ if (modstr->module != mod)
++ continue;
++ list_del(&modstr->next);
++ kfree(modstr->str);
++ kfree(modstr);
++ }
+ up_write(&trace_event_sem);
+
+ /*
+@@ -2858,7 +2968,7 @@ static void trace_module_remove_events(struct module *mod)
+ * over from this module may be passed to the new module events and
+ * unexpected results may occur.
+ */
+- tracing_reset_all_online_cpus();
++ tracing_reset_all_online_cpus_unlocked();
+ }
+
+ static int trace_module_notify(struct notifier_block *self,
+@@ -3433,7 +3543,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
+ struct dentry *d_events;
+ struct dentry *entry;
+
+- entry = tracefs_create_file("set_event", 0644, parent,
++ entry = tracefs_create_file("set_event", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_fops);
+ if (!entry) {
+ pr_warn("Could not create tracefs 'set_event' entry\n");
+@@ -3446,7 +3556,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
+ return -ENOMEM;
+ }
+
+- entry = trace_create_file("enable", 0644, d_events,
++ entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
+ tr, &ftrace_tr_enable_fops);
+ if (!entry) {
+ pr_warn("Could not create tracefs 'enable' entry\n");
+@@ -3455,24 +3565,25 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
+
+ /* There are not as crucial, just warn if they are not created */
+
+- entry = tracefs_create_file("set_event_pid", 0644, parent,
++ entry = tracefs_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_pid_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'set_event_pid' entry\n");
+
+- entry = tracefs_create_file("set_event_notrace_pid", 0644, parent,
+- tr, &ftrace_set_event_notrace_pid_fops);
++ entry = tracefs_create_file("set_event_notrace_pid",
++ TRACE_MODE_WRITE, parent, tr,
++ &ftrace_set_event_notrace_pid_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
+
+ /* ring buffer internal formats */
+- entry = trace_create_file("header_page", 0444, d_events,
++ entry = trace_create_file("header_page", TRACE_MODE_READ, d_events,
+ ring_buffer_print_page_header,
+ &ftrace_show_header_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'header_page' entry\n");
+
+- entry = trace_create_file("header_event", 0444, d_events,
++ entry = trace_create_file("header_event", TRACE_MODE_READ, d_events,
+ ring_buffer_print_entry_header,
+ &ftrace_show_header_fops);
+ if (!entry)
+@@ -3689,8 +3800,8 @@ __init int event_trace_init(void)
+ if (!tr)
+ return -ENODEV;
+
+- entry = tracefs_create_file("available_events", 0444, NULL,
+- tr, &ftrace_avail_fops);
++ entry = tracefs_create_file("available_events", TRACE_MODE_READ,
++ NULL, tr, &ftrace_avail_fops);
+ if (!entry)
+ pr_warn("Could not create tracefs 'available_events' entry\n");
+
+diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
+index c9124038b140f..06d6318ee5377 100644
+--- a/kernel/trace/trace_events_filter.c
++++ b/kernel/trace/trace_events_filter.c
+@@ -5,6 +5,7 @@
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ */
+
++#include <linux/uaccess.h>
+ #include <linux/module.h>
+ #include <linux/ctype.h>
+ #include <linux/mutex.h>
+@@ -654,6 +655,52 @@ DEFINE_EQUALITY_PRED(32);
+ DEFINE_EQUALITY_PRED(16);
+ DEFINE_EQUALITY_PRED(8);
+
++/* user space strings temp buffer */
++#define USTRING_BUF_SIZE 1024
++
++struct ustring_buffer {
++ char buffer[USTRING_BUF_SIZE];
++};
++
++static __percpu struct ustring_buffer *ustring_per_cpu;
++
++static __always_inline char *test_string(char *str)
++{
++ struct ustring_buffer *ubuf;
++ char *kstr;
++
++ if (!ustring_per_cpu)
++ return NULL;
++
++ ubuf = this_cpu_ptr(ustring_per_cpu);
++ kstr = ubuf->buffer;
++
++ /* For safety, do not trust the string pointer */
++ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
++ return NULL;
++ return kstr;
++}
++
++static __always_inline char *test_ustring(char *str)
++{
++ struct ustring_buffer *ubuf;
++ char __user *ustr;
++ char *kstr;
++
++ if (!ustring_per_cpu)
++ return NULL;
++
++ ubuf = this_cpu_ptr(ustring_per_cpu);
++ kstr = ubuf->buffer;
++
++ /* user space address? */
++ ustr = (char __user *)str;
++ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
++ return NULL;
++
++ return kstr;
++}
++
+ /* Filter predicate for fixed sized arrays of characters */
+ static int filter_pred_string(struct filter_pred *pred, void *event)
+ {
+@@ -667,19 +714,43 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
+ return match;
+ }
+
+-/* Filter predicate for char * pointers */
+-static int filter_pred_pchar(struct filter_pred *pred, void *event)
++static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
+ {
+- char **addr = (char **)(event + pred->offset);
+ int cmp, match;
+- int len = strlen(*addr) + 1; /* including tailing '\0' */
++ int len;
+
+- cmp = pred->regex.match(*addr, &pred->regex, len);
++ len = strlen(str) + 1; /* including tailing '\0' */
++ cmp = pred->regex.match(str, &pred->regex, len);
+
+ match = cmp ^ pred->not;
+
+ return match;
+ }
++/* Filter predicate for char * pointers */
++static int filter_pred_pchar(struct filter_pred *pred, void *event)
++{
++ char **addr = (char **)(event + pred->offset);
++ char *str;
++
++ str = test_string(*addr);
++ if (!str)
++ return 0;
++
++ return filter_pchar(pred, str);
++}
++
++/* Filter predicate for char * pointers in user space*/
++static int filter_pred_pchar_user(struct filter_pred *pred, void *event)
++{
++ char **addr = (char **)(event + pred->offset);
++ char *str;
++
++ str = test_ustring(*addr);
++ if (!str)
++ return 0;
++
++ return filter_pchar(pred, str);
++}
+
+ /*
+ * Filter predicate for dynamic sized arrays of characters.
+@@ -1158,6 +1229,7 @@ static int parse_pred(const char *str, void *data,
+ struct filter_pred *pred = NULL;
+ char num_buf[24]; /* Big enough to hold an address */
+ char *field_name;
++ bool ustring = false;
+ char q;
+ u64 val;
+ int len;
+@@ -1192,6 +1264,12 @@ static int parse_pred(const char *str, void *data,
+ return -EINVAL;
+ }
+
++ /* See if the field is a user space string */
++ if ((len = str_has_prefix(str + i, ".ustring"))) {
++ ustring = true;
++ i += len;
++ }
++
+ while (isspace(str[i]))
+ i++;
+
+@@ -1320,8 +1398,20 @@ static int parse_pred(const char *str, void *data,
+
+ } else if (field->filter_type == FILTER_DYN_STRING)
+ pred->fn = filter_pred_strloc;
+- else
+- pred->fn = filter_pred_pchar;
++ else {
++
++ if (!ustring_per_cpu) {
++ /* Once allocated, keep it around for good */
++ ustring_per_cpu = alloc_percpu(struct ustring_buffer);
++ if (!ustring_per_cpu)
++ goto err_mem;
++ }
++
++ if (ustring)
++ pred->fn = filter_pred_pchar_user;
++ else
++ pred->fn = filter_pred_pchar;
++ }
+ /* go past the last quote */
+ i++;
+
+@@ -1387,6 +1477,9 @@ static int parse_pred(const char *str, void *data,
+ err_free:
+ kfree(pred);
+ return -EINVAL;
++err_mem:
++ kfree(pred);
++ return -ENOMEM;
+ }
+
+ enum {
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index f01e442716e2f..c32a53f089229 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -315,10 +315,6 @@ DEFINE_HIST_FIELD_FN(u8);
+ #define for_each_hist_key_field(i, hist_data) \
+ for ((i) = (hist_data)->n_vals; (i) < (hist_data)->n_fields; (i)++)
+
+-#define HIST_STACKTRACE_DEPTH 16
+-#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
+-#define HIST_STACKTRACE_SKIP 5
+-
+ #define HITCOUNT_IDX 0
+ #define HIST_KEY_SIZE_MAX (MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE)
+
+@@ -452,7 +448,7 @@ struct action_data {
+ * event param, and is passed to the synthetic event
+ * invocation.
+ */
+- unsigned int var_ref_idx[TRACING_MAP_VARS_MAX];
++ unsigned int var_ref_idx[SYNTH_FIELDS_MAX];
+ struct synth_event *synth_event;
+ bool use_trace_keyword;
+ char *synth_event_name;
+@@ -1127,6 +1123,9 @@ static const char *hist_field_name(struct hist_field *field,
+ {
+ const char *field_name = "";
+
++ if (WARN_ON_ONCE(!field))
++ return field_name;
++
+ if (level > 1)
+ return field_name;
+
+@@ -1699,6 +1698,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
+ hist_field->fn = flags & HIST_FIELD_FL_LOG2 ? hist_field_log2 :
+ hist_field_bucket;
+ hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
++ if (!hist_field->operands[0])
++ goto free;
+ hist_field->size = hist_field->operands[0]->size;
+ hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL);
+ if (!hist_field->type)
+@@ -1733,9 +1734,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
+ if (!hist_field->type)
+ goto free;
+
+- if (field->filter_type == FILTER_STATIC_STRING)
++ if (field->filter_type == FILTER_STATIC_STRING) {
+ hist_field->fn = hist_field_string;
+- else if (field->filter_type == FILTER_DYN_STRING)
++ hist_field->size = field->size;
++ } else if (field->filter_type == FILTER_DYN_STRING)
+ hist_field->fn = hist_field_dynstring;
+ else
+ hist_field->fn = hist_field_pstring;
+@@ -1837,8 +1839,11 @@ static int init_var_ref(struct hist_field *ref_field,
+ return err;
+ free:
+ kfree(ref_field->system);
++ ref_field->system = NULL;
+ kfree(ref_field->event_name);
++ ref_field->event_name = NULL;
+ kfree(ref_field->name);
++ ref_field->name = NULL;
+
+ goto out;
+ }
+@@ -1891,7 +1896,9 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data,
+ return ref_field;
+ }
+ }
+-
++ /* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */
++ if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX)
++ return NULL;
+ ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+ if (ref_field) {
+ if (init_var_ref(ref_field, var_field, system, event_name)) {
+@@ -2048,9 +2055,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+ /*
+ * For backward compatibility, if field_name
+ * was "cpu", then we treat this the same as
+- * common_cpu.
++ * common_cpu. This also works for "CPU".
+ */
+- if (strcmp(field_name, "cpu") == 0) {
++ if (field && field->filter_type == FILTER_CPU) {
+ *flags |= HIST_FIELD_FL_CPU;
+ } else {
+ hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
+@@ -2219,6 +2226,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
+ (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+ expr->fn = hist_field_unary_minus;
+ expr->operands[0] = operand1;
++ expr->size = operand1->size;
++ expr->is_signed = operand1->is_signed;
+ expr->operator = FIELD_OP_UNARY_MINUS;
+ expr->name = expr_str(expr, 0);
+ expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
+@@ -2358,6 +2367,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+
+ /* The operand sizes should be the same, so just pick one */
+ expr->size = operand1->size;
++ expr->is_signed = operand1->is_signed;
+
+ expr->operator = field_op;
+ expr->name = expr_str(expr, 0);
+@@ -2690,8 +2700,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
+ if (val->flags & HIST_FIELD_FL_STRING) {
+ char *str = elt_data->field_var_str[j++];
+ char *val_str = (char *)(uintptr_t)var_val;
++ unsigned int size;
+
+- strscpy(str, val_str, STR_VAR_LEN_MAX);
++ size = min(val->size, STR_VAR_LEN_MAX);
++ strscpy(str, val_str, size);
+ var_val = (u64)(uintptr_t)str;
+ }
+ tracing_map_set_var(elt, var_idx, var_val);
+@@ -3179,6 +3191,7 @@ static int parse_action_params(struct trace_array *tr, char *params,
+ while (params) {
+ if (data->n_params >= SYNTH_FIELDS_MAX) {
+ hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0);
++ ret = -EINVAL;
+ goto out;
+ }
+
+@@ -3414,9 +3427,12 @@ static int check_synth_field(struct synth_event *event,
+ && field->is_dynamic)
+ return 0;
+
++ if (strstr(hist_field->type, "long[") && field->is_stack)
++ return 0;
++
+ if (strcmp(field->type, hist_field->type) != 0) {
+ if (field->size != hist_field->size ||
+- field->is_signed != hist_field->is_signed)
++ (!field->is_string && field->is_signed != hist_field->is_signed))
+ return -EINVAL;
+ }
+
+@@ -3515,6 +3531,10 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
+
+ lockdep_assert_held(&event_mutex);
+
++ /* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */
++ if (data->n_params > SYNTH_FIELDS_MAX)
++ return -EINVAL;
++
+ if (data->use_trace_keyword)
+ synth_event_name = data->synth_event_name;
+ else
+@@ -3578,6 +3598,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
+
+ var_ref_idx = find_var_ref_idx(hist_data, var_ref);
+ if (WARN_ON(var_ref_idx < 0)) {
++ kfree(p);
+ ret = var_ref_idx;
+ goto err;
+ }
+@@ -4046,6 +4067,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data)
+
+ s = kstrdup(field_str, GFP_KERNEL);
+ if (!s) {
++ kfree(hist_data->attrs->var_defs.name[n_vars]);
++ hist_data->attrs->var_defs.name[n_vars] = NULL;
+ ret = -ENOMEM;
+ goto free;
+ }
+@@ -4471,7 +4494,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
+
+ if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
+ cmp_fn = tracing_map_cmp_none;
+- else if (!field)
++ else if (!field || hist_field->flags & HIST_FIELD_FL_CPU)
+ cmp_fn = tracing_map_cmp_num(hist_field->size,
+ hist_field->is_signed);
+ else if (is_string_field(field))
+@@ -4578,6 +4601,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
+ if (hist_field->flags & HIST_FIELD_FL_STRING) {
+ unsigned int str_start, var_str_idx, idx;
+ char *str, *val_str;
++ unsigned int size;
+
+ str_start = hist_data->n_field_var_str +
+ hist_data->n_save_var_str;
+@@ -4586,7 +4610,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
+
+ str = elt_data->field_var_str[idx];
+ val_str = (char *)(uintptr_t)hist_val;
+- strscpy(str, val_str, STR_VAR_LEN_MAX);
++
++ size = min(hist_field->size, STR_VAR_LEN_MAX);
++ strscpy(str, val_str, size);
+
+ hist_val = (u64)(uintptr_t)str;
+ }
+@@ -4662,6 +4688,9 @@ static void event_hist_trigger(struct event_trigger_data *data,
+ void *key = NULL;
+ unsigned int i;
+
++ if (unlikely(!rbe))
++ return;
++
+ memset(compound_key, 0, hist_data->key_size);
+
+ for_each_hist_key_field(i, hist_data) {
+@@ -5914,13 +5943,16 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
+ if (get_named_trigger_data(trigger_data))
+ goto enable;
+
+- if (has_hist_vars(hist_data))
+- save_hist_vars(hist_data);
+-
+ ret = create_actions(hist_data);
+ if (ret)
+ goto out_unreg;
+
++ if (has_hist_vars(hist_data) || hist_data->n_var_refs) {
++ ret = save_hist_vars(hist_data);
++ if (ret)
++ goto out_unreg;
++ }
++
+ ret = tracing_map_init(hist_data->map);
+ if (ret)
+ goto out_unreg;
+@@ -5936,7 +5968,7 @@ enable:
+ /* Just return zero, not the number of registered triggers */
+ ret = 0;
+ out:
+- if (ret == 0)
++ if (ret == 0 && glob[0])
+ hist_err_clear();
+
+ return ret;
+diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
+index d54094b7a9d75..1e02bb431dcb5 100644
+--- a/kernel/trace/trace_events_synth.c
++++ b/kernel/trace/trace_events_synth.c
+@@ -17,6 +17,8 @@
+ /* for gfp flag names */
+ #include <linux/trace_events.h>
+ #include <trace/events/mmflags.h>
++#include "trace_probe.h"
++#include "trace_probe_kernel.h"
+
+ #include "trace_synth.h"
+
+@@ -163,6 +165,14 @@ static int synth_field_is_string(char *type)
+ return false;
+ }
+
++static int synth_field_is_stack(char *type)
++{
++ if (strstr(type, "long[") != NULL)
++ return true;
++
++ return false;
++}
++
+ static int synth_field_string_size(char *type)
+ {
+ char buf[4], *end, *start;
+@@ -238,6 +248,8 @@ static int synth_field_size(char *type)
+ size = sizeof(gfp_t);
+ else if (synth_field_is_string(type))
+ size = synth_field_string_size(type);
++ else if (synth_field_is_stack(type))
++ size = 0;
+
+ return size;
+ }
+@@ -282,6 +294,8 @@ static const char *synth_field_fmt(char *type)
+ fmt = "%x";
+ else if (synth_field_is_string(type))
+ fmt = "%.*s";
++ else if (synth_field_is_stack(type))
++ fmt = "%s";
+
+ return fmt;
+ }
+@@ -361,6 +375,23 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
+ i == se->n_fields - 1 ? "" : " ");
+ n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ }
++ } else if (se->fields[i]->is_stack) {
++ u32 offset, data_offset, len;
++ unsigned long *p, *end;
++
++ offset = (u32)entry->fields[n_u64];
++ data_offset = offset & 0xffff;
++ len = offset >> 16;
++
++ p = (void *)entry + data_offset;
++ end = (void *)p + len - (sizeof(long) - 1);
++
++ trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
++
++ for (; *p && p < end; p++)
++ trace_seq_printf(s, "=> %pS\n", (void *)*p);
++ n_u64++;
++
+ } else {
+ struct trace_print_flags __flags[] = {
+ __def_gfpflag_names, {-1, NULL} };
+@@ -401,6 +432,7 @@ static unsigned int trace_string(struct synth_trace_event *entry,
+ {
+ unsigned int len = 0;
+ char *str_field;
++ int ret;
+
+ if (is_dynamic) {
+ u32 data_offset;
+@@ -409,25 +441,70 @@ static unsigned int trace_string(struct synth_trace_event *entry,
+ data_offset += event->n_u64 * sizeof(u64);
+ data_offset += data_size;
+
+- str_field = (char *)entry + data_offset;
+-
+- len = strlen(str_val) + 1;
+- strscpy(str_field, str_val, len);
++ len = kern_fetch_store_strlen((unsigned long)str_val);
+
+ data_offset |= len << 16;
+ *(u32 *)&entry->fields[*n_u64] = data_offset;
+
++ ret = kern_fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
++
+ (*n_u64)++;
+ } else {
+ str_field = (char *)&entry->fields[*n_u64];
+
+- strscpy(str_field, str_val, STR_VAR_LEN_MAX);
++#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
++ if ((unsigned long)str_val < TASK_SIZE)
++ ret = strncpy_from_user_nofault(str_field, str_val, STR_VAR_LEN_MAX);
++ else
++#endif
++ ret = strncpy_from_kernel_nofault(str_field, str_val, STR_VAR_LEN_MAX);
++
++ if (ret < 0)
++ strcpy(str_field, FAULT_STRING);
++
+ (*n_u64) += STR_VAR_LEN_MAX / sizeof(u64);
+ }
+
+ return len;
+ }
+
++static unsigned int trace_stack(struct synth_trace_event *entry,
++ struct synth_event *event,
++ long *stack,
++ unsigned int data_size,
++ unsigned int *n_u64)
++{
++ unsigned int len;
++ u32 data_offset;
++ void *data_loc;
++
++ data_offset = struct_size(entry, fields, event->n_u64);
++ data_offset += data_size;
++
++ for (len = 0; len < HIST_STACKTRACE_DEPTH; len++) {
++ if (!stack[len])
++ break;
++ }
++
++ /* Include the zero'd element if it fits */
++ if (len < HIST_STACKTRACE_DEPTH)
++ len++;
++
++ len *= sizeof(long);
++
++ /* Find the dynamic section to copy the stack into. */
++ data_loc = (void *)entry + data_offset;
++ memcpy(data_loc, stack, len);
++
++ /* Fill in the field that holds the offset/len combo */
++ data_offset |= len << 16;
++ *(u32 *)&entry->fields[*n_u64] = data_offset;
++
++ (*n_u64)++;
++
++ return len;
++}
++
+ static notrace void trace_event_raw_event_synth(void *__data,
+ u64 *var_ref_vals,
+ unsigned int *var_ref_idx)
+@@ -454,7 +531,7 @@ static notrace void trace_event_raw_event_synth(void *__data,
+ val_idx = var_ref_idx[field_pos];
+ str_val = (char *)(long)var_ref_vals[val_idx];
+
+- len = strlen(str_val) + 1;
++ len = kern_fetch_store_strlen((unsigned long)str_val);
+
+ fields_size += len;
+ }
+@@ -480,6 +557,12 @@ static notrace void trace_event_raw_event_synth(void *__data,
+ event->fields[i]->is_dynamic,
+ data_size, &n_u64);
+ data_size += len; /* only dynamic string increments */
++ } else if (event->fields[i]->is_stack) {
++ long *stack = (long *)(long)var_ref_vals[val_idx];
++
++ len = trace_stack(entry, event, stack,
++ data_size, &n_u64);
++ data_size += len;
+ } else {
+ struct synth_field *field = event->fields[i];
+ u64 val = var_ref_vals[val_idx];
+@@ -542,6 +625,9 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
+ event->fields[i]->is_dynamic)
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", __get_str(%s)", event->fields[i]->name);
++ else if (event->fields[i]->is_stack)
++ pos += snprintf(buf + pos, LEN_OR_ZERO,
++ ", __get_stacktrace(%s)", event->fields[i]->name);
+ else
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", REC->%s", event->fields[i]->name);
+@@ -678,7 +764,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
+ ret = -EINVAL;
+ goto free;
+ } else if (size == 0) {
+- if (synth_field_is_string(field->type)) {
++ if (synth_field_is_string(field->type) ||
++ synth_field_is_stack(field->type)) {
+ char *type;
+
+ len = sizeof("__data_loc ") + strlen(field->type) + 1;
+@@ -709,6 +796,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
+
+ if (synth_field_is_string(field->type))
+ field->is_string = true;
++ else if (synth_field_is_stack(field->type))
++ field->is_stack = true;
+
+ field->is_signed = synth_field_signed(field->type);
+ out:
+@@ -809,10 +898,9 @@ static int register_synth_event(struct synth_event *event)
+ }
+
+ ret = set_synth_event_print_fmt(call);
+- if (ret < 0) {
++ /* unregister_trace_event() will be called inside */
++ if (ret < 0)
+ trace_remove_event_call(call);
+- goto err;
+- }
+ out:
+ return ret;
+ err:
+@@ -1265,12 +1353,12 @@ static int __create_synth_event(const char *name, const char *raw_fields)
+ goto err;
+ }
+
+- fields[n_fields++] = field;
+ if (n_fields == SYNTH_FIELDS_MAX) {
+ synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0);
+ ret = -EINVAL;
+ goto err;
+ }
++ fields[n_fields++] = field;
+
+ n_fields_this_loop++;
+ }
+@@ -1406,7 +1494,6 @@ int synth_event_delete(const char *event_name)
+ mutex_unlock(&event_mutex);
+
+ if (mod) {
+- mutex_lock(&trace_types_lock);
+ /*
+ * It is safest to reset the ring buffer if the module
+ * being unloaded registered any events that were
+@@ -1418,7 +1505,6 @@ int synth_event_delete(const char *event_name)
+ * occur.
+ */
+ tracing_reset_all_online_cpus();
+- mutex_unlock(&trace_types_lock);
+ }
+
+ return ret;
+@@ -2053,6 +2139,13 @@ static int create_synth_event(const char *raw_command)
+
+ last_cmd_set(raw_command);
+
++ name = raw_command;
++
++ /* Don't try to process if not our system */
++ if (name[0] != 's' || name[1] != ':')
++ return -ECANCELED;
++ name += 2;
++
+ p = strpbrk(raw_command, " \t");
+ if (!p) {
+ synth_err(SYNTH_ERR_INVALID_CMD, 0);
+@@ -2061,12 +2154,6 @@ static int create_synth_event(const char *raw_command)
+
+ fields = skip_spaces(p);
+
+- name = raw_command;
+-
+- if (name[0] != 's' || name[1] != ':')
+- return -ECANCELED;
+- name += 2;
+-
+ /* This interface accepts group name prefix */
+ if (strchr(name, '/')) {
+ len = str_has_prefix(name, SYNTH_SYSTEM "/");
+@@ -2227,8 +2314,8 @@ static __init int trace_events_synth_init(void)
+ if (err)
+ goto err;
+
+- entry = tracefs_create_file("synthetic_events", 0644, NULL,
+- NULL, &synth_events_fops);
++ entry = tracefs_create_file("synthetic_events", TRACE_MODE_WRITE,
++ NULL, NULL, &synth_events_fops);
+ if (!entry) {
+ err = -ENODEV;
+ goto err;
+diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
+index 3d5c07239a2a8..106f9813841a1 100644
+--- a/kernel/trace/trace_events_trigger.c
++++ b/kernel/trace/trace_events_trigger.c
+@@ -128,7 +128,8 @@ static bool check_user_trigger(struct trace_event_file *file)
+ {
+ struct event_trigger_data *data;
+
+- list_for_each_entry_rcu(data, &file->triggers, list) {
++ list_for_each_entry_rcu(data, &file->triggers, list,
++ lockdep_is_held(&event_mutex)) {
+ if (data->flags & EVENT_TRIGGER_FL_PROBE)
+ continue;
+ return true;
+@@ -955,6 +956,16 @@ traceon_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+ {
++ struct trace_event_file *file = data->private_data;
++
++ if (file) {
++ if (tracer_tracing_is_on(file->tr))
++ return;
++
++ tracer_tracing_on(file->tr);
++ return;
++ }
++
+ if (tracing_is_on())
+ return;
+
+@@ -966,8 +977,15 @@ traceon_count_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+ {
+- if (tracing_is_on())
+- return;
++ struct trace_event_file *file = data->private_data;
++
++ if (file) {
++ if (tracer_tracing_is_on(file->tr))
++ return;
++ } else {
++ if (tracing_is_on())
++ return;
++ }
+
+ if (!data->count)
+ return;
+@@ -975,7 +993,10 @@ traceon_count_trigger(struct event_trigger_data *data,
+ if (data->count != -1)
+ (data->count)--;
+
+- tracing_on();
++ if (file)
++ tracer_tracing_on(file->tr);
++ else
++ tracing_on();
+ }
+
+ static void
+@@ -983,6 +1004,16 @@ traceoff_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+ {
++ struct trace_event_file *file = data->private_data;
++
++ if (file) {
++ if (!tracer_tracing_is_on(file->tr))
++ return;
++
++ tracer_tracing_off(file->tr);
++ return;
++ }
++
+ if (!tracing_is_on())
+ return;
+
+@@ -994,8 +1025,15 @@ traceoff_count_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+ {
+- if (!tracing_is_on())
+- return;
++ struct trace_event_file *file = data->private_data;
++
++ if (file) {
++ if (!tracer_tracing_is_on(file->tr))
++ return;
++ } else {
++ if (!tracing_is_on())
++ return;
++ }
+
+ if (!data->count)
+ return;
+@@ -1003,7 +1041,10 @@ traceoff_count_trigger(struct event_trigger_data *data,
+ if (data->count != -1)
+ (data->count)--;
+
+- tracing_off();
++ if (file)
++ tracer_tracing_off(file->tr);
++ else
++ tracing_off();
+ }
+
+ static int
+@@ -1200,7 +1241,12 @@ stacktrace_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+ {
+- trace_dump_stack(STACK_SKIP);
++ struct trace_event_file *file = data->private_data;
++
++ if (file)
++ __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP);
++ else
++ trace_dump_stack(STACK_SKIP);
+ }
+
+ static void
+diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
+index 0de6837722da5..6b5ff3ba4251f 100644
+--- a/kernel/trace/trace_functions_graph.c
++++ b/kernel/trace/trace_functions_graph.c
+@@ -1340,7 +1340,7 @@ static __init int init_graph_tracefs(void)
+ if (ret)
+ return 0;
+
+- trace_create_file("max_graph_depth", 0644, NULL,
++ trace_create_file("max_graph_depth", TRACE_MODE_WRITE, NULL,
+ NULL, &graph_depth_fops);
+
+ return 0;
+diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
+index 1b83d75eb103b..3a994bd8520ca 100644
+--- a/kernel/trace/trace_hwlat.c
++++ b/kernel/trace/trace_hwlat.c
+@@ -339,7 +339,7 @@ static void move_to_next_cpu(void)
+ cpumask_clear(current_mask);
+ cpumask_set_cpu(next_cpu, current_mask);
+
+- sched_setaffinity(0, current_mask);
++ set_cpus_allowed_ptr(current, current_mask);
+ return;
+
+ change_mode:
+@@ -446,7 +446,7 @@ static int start_single_kthread(struct trace_array *tr)
+
+ }
+
+- sched_setaffinity(kthread->pid, current_mask);
++ set_cpus_allowed_ptr(kthread, current_mask);
+
+ kdata->kthread = kthread;
+ wake_up_process(kthread);
+@@ -491,18 +491,18 @@ static void stop_per_cpu_kthreads(void)
+ static int start_cpu_kthread(unsigned int cpu)
+ {
+ struct task_struct *kthread;
+- char comm[24];
+
+- snprintf(comm, 24, "hwlatd/%d", cpu);
++ /* Do not start a new hwlatd thread if it is already running */
++ if (per_cpu(hwlat_per_cpu_data, cpu).kthread)
++ return 0;
+
+- kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm);
++ kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u");
+ if (IS_ERR(kthread)) {
+ pr_err(BANNER "could not start sampling thread\n");
+ return -ENOMEM;
+ }
+
+ per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread;
+- wake_up_process(kthread);
+
+ return 0;
+ }
+@@ -588,9 +588,6 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
+ */
+ cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
+
+- for_each_online_cpu(cpu)
+- per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL;
+-
+ for_each_cpu(cpu, current_mask) {
+ retval = start_cpu_kthread(cpu);
+ if (retval)
+@@ -638,7 +635,7 @@ static int s_mode_show(struct seq_file *s, void *v)
+ else
+ seq_printf(s, "%s", thread_mode_str[mode]);
+
+- if (mode != MODE_MAX)
++ if (mode < MODE_MAX - 1) /* if mode is any but last */
+ seq_puts(s, " ");
+
+ return 0;
+@@ -782,21 +779,21 @@ static int init_tracefs(void)
+ if (!top_dir)
+ return -ENOMEM;
+
+- hwlat_sample_window = tracefs_create_file("window", 0640,
++ hwlat_sample_window = tracefs_create_file("window", TRACE_MODE_WRITE,
+ top_dir,
+ &hwlat_window,
+ &trace_min_max_fops);
+ if (!hwlat_sample_window)
+ goto err;
+
+- hwlat_sample_width = tracefs_create_file("width", 0644,
++ hwlat_sample_width = tracefs_create_file("width", TRACE_MODE_WRITE,
+ top_dir,
+ &hwlat_width,
+ &trace_min_max_fops);
+ if (!hwlat_sample_width)
+ goto err;
+
+- hwlat_thread_mode = trace_create_file("mode", 0644,
++ hwlat_thread_mode = trace_create_file("mode", TRACE_MODE_WRITE,
+ top_dir,
+ NULL,
+ &thread_mode_fops);
+diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
+index 590b3d51afae9..ba37f768e2f27 100644
+--- a/kernel/trace/trace_irqsoff.c
++++ b/kernel/trace/trace_irqsoff.c
+@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
+ {
+ if (is_graph(iter->tr))
+ graph_trace_open(iter);
+-
++ else
++ iter->private = NULL;
+ }
+
+ static void irqsoff_trace_close(struct trace_iterator *iter)
+diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
+index 3a64ba4bbad6f..e755e09805ab1 100644
+--- a/kernel/trace/trace_kprobe.c
++++ b/kernel/trace/trace_kprobe.c
+@@ -19,6 +19,7 @@
+ #include "trace_kprobe_selftest.h"
+ #include "trace_probe.h"
+ #include "trace_probe_tmpl.h"
++#include "trace_probe_kernel.h"
+
+ #define KPROBE_EVENT_SYSTEM "kprobes"
+ #define KRETPROBE_MAXACTIVE_MAX 4096
+@@ -31,7 +32,7 @@ static int __init set_kprobe_boot_events(char *str)
+ strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
+ disable_tracing_selftest("running kprobe events");
+
+- return 0;
++ return 1;
+ }
+ __setup("kprobe_event=", set_kprobe_boot_events);
+
+@@ -1175,15 +1176,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
+ {
+ struct dyn_event *ev = v;
+ struct trace_kprobe *tk;
++ unsigned long nmissed;
+
+ if (!is_trace_kprobe(ev))
+ return 0;
+
+ tk = to_trace_kprobe(ev);
++ nmissed = trace_kprobe_is_return(tk) ?
++ tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
+ seq_printf(m, " %-44s %15lu %15lu\n",
+ trace_probe_name(&tk->tp),
+ trace_kprobe_nhit(tk),
+- tk->rp.kp.nmissed);
++ nmissed);
+
+ return 0;
+ }
+@@ -1220,29 +1224,14 @@ static const struct file_operations kprobe_profile_ops = {
+ static nokprobe_inline int
+ fetch_store_strlen_user(unsigned long addr)
+ {
+- const void __user *uaddr = (__force const void __user *)addr;
+-
+- return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
++ return kern_fetch_store_strlen_user(addr);
+ }
+
+ /* Return the length of string -- including null terminal byte */
+ static nokprobe_inline int
+ fetch_store_strlen(unsigned long addr)
+ {
+- int ret, len = 0;
+- u8 c;
+-
+-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+- if (addr < TASK_SIZE)
+- return fetch_store_strlen_user(addr);
+-#endif
+-
+- do {
+- ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
+- len++;
+- } while (c && ret == 0 && len < MAX_STRING_SIZE);
+-
+- return (ret < 0) ? ret : len;
++ return kern_fetch_store_strlen(addr);
+ }
+
+ /*
+@@ -1252,21 +1241,7 @@ fetch_store_strlen(unsigned long addr)
+ static nokprobe_inline int
+ fetch_store_string_user(unsigned long addr, void *dest, void *base)
+ {
+- const void __user *uaddr = (__force const void __user *)addr;
+- int maxlen = get_loc_len(*(u32 *)dest);
+- void *__dest;
+- long ret;
+-
+- if (unlikely(!maxlen))
+- return -ENOMEM;
+-
+- __dest = get_loc_data(dest, base);
+-
+- ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
+- if (ret >= 0)
+- *(u32 *)dest = make_data_loc(ret, __dest - base);
+-
+- return ret;
++ return kern_fetch_store_string_user(addr, dest, base);
+ }
+
+ /*
+@@ -1276,29 +1251,7 @@ fetch_store_string_user(unsigned long addr, void *dest, void *base)
+ static nokprobe_inline int
+ fetch_store_string(unsigned long addr, void *dest, void *base)
+ {
+- int maxlen = get_loc_len(*(u32 *)dest);
+- void *__dest;
+- long ret;
+-
+-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+- if ((unsigned long)addr < TASK_SIZE)
+- return fetch_store_string_user(addr, dest, base);
+-#endif
+-
+- if (unlikely(!maxlen))
+- return -ENOMEM;
+-
+- __dest = get_loc_data(dest, base);
+-
+- /*
+- * Try to get string again, since the string can be changed while
+- * probing.
+- */
+- ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
+- if (ret >= 0)
+- *(u32 *)dest = make_data_loc(ret, __dest - base);
+-
+- return ret;
++ return kern_fetch_store_string(addr, dest, base);
+ }
+
+ static nokprobe_inline int
+@@ -1730,8 +1683,17 @@ static int
+ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+ {
+ struct kretprobe *rp = get_kretprobe(ri);
+- struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp);
++ struct trace_kprobe *tk;
++
++ /*
++ * There is a small chance that get_kretprobe(ri) returns NULL when
++ * the kretprobe is unregister on another CPU between kretprobe's
++ * trampoline_handler and this function.
++ */
++ if (unlikely(!rp))
++ return 0;
+
++ tk = container_of(rp, struct trace_kprobe, rp);
+ raw_cpu_inc(*tk->nhit);
+
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
+@@ -1925,16 +1887,16 @@ static __init int init_kprobe_trace(void)
+ if (ret)
+ return 0;
+
+- entry = tracefs_create_file("kprobe_events", 0644, NULL,
+- NULL, &kprobe_events_ops);
++ entry = tracefs_create_file("kprobe_events", TRACE_MODE_WRITE,
++ NULL, NULL, &kprobe_events_ops);
+
+ /* Event list interface */
+ if (!entry)
+ pr_warn("Could not create tracefs 'kprobe_events' entry\n");
+
+ /* Profile interface */
+- entry = tracefs_create_file("kprobe_profile", 0444, NULL,
+- NULL, &kprobe_profile_ops);
++ entry = tracefs_create_file("kprobe_profile", TRACE_MODE_READ,
++ NULL, NULL, &kprobe_profile_ops);
+
+ if (!entry)
+ pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
+diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
+index ce053619f289e..90c4f70dc9fdf 100644
+--- a/kernel/trace/trace_osnoise.c
++++ b/kernel/trace/trace_osnoise.c
+@@ -730,7 +730,7 @@ void osnoise_trace_irq_entry(int id)
+ void osnoise_trace_irq_exit(int id, const char *desc)
+ {
+ struct osnoise_variables *osn_var = this_cpu_osn_var();
+- int duration;
++ s64 duration;
+
+ if (!osn_var->sampling)
+ return;
+@@ -861,7 +861,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
+ static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
+ {
+ struct osnoise_variables *osn_var = this_cpu_osn_var();
+- int duration;
++ s64 duration;
+
+ if (!osn_var->sampling)
+ return;
+@@ -969,7 +969,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
+ static void
+ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
+ {
+- int duration;
++ s64 duration;
+
+ if (!osn_var->sampling)
+ return;
+@@ -1195,6 +1195,26 @@ static int run_osnoise(void)
+ osnoise_stop_tracing();
+ }
+
++ /*
++ * In some cases, notably when running on a nohz_full CPU with
++ * a stopped tick PREEMPT_RCU has no way to account for QSs.
++ * This will eventually cause unwarranted noise as PREEMPT_RCU
++ * will force preemption as the means of ending the current
++ * grace period. We avoid this problem by calling
++ * rcu_momentary_dyntick_idle(), which performs a zero duration
++ * EQS allowing PREEMPT_RCU to end the current grace period.
++ * This call shouldn't be wrapped inside an RCU critical
++ * section.
++ *
++ * Note that in non PREEMPT_RCU kernels QSs are handled through
++ * cond_resched()
++ */
++ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
++ local_irq_disable();
++ rcu_momentary_dyntick_idle();
++ local_irq_enable();
++ }
++
+ /*
+ * For the non-preemptive kernel config: let threads runs, if
+ * they so wish.
+@@ -1249,6 +1269,37 @@ out:
+ static struct cpumask osnoise_cpumask;
+ static struct cpumask save_cpumask;
+
++/*
++ * osnoise_sleep - sleep until the next period
++ */
++static void osnoise_sleep(void)
++{
++ u64 interval;
++ ktime_t wake_time;
++
++ mutex_lock(&interface_lock);
++ interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
++ mutex_unlock(&interface_lock);
++
++ /*
++ * differently from hwlat_detector, the osnoise tracer can run
++ * without a pause because preemption is on.
++ */
++ if (!interval) {
++ /* Let synchronize_rcu_tasks() make progress */
++ cond_resched_tasks_rcu_qs();
++ return;
++ }
++
++ wake_time = ktime_add_us(ktime_get(), interval);
++ __set_current_state(TASK_INTERRUPTIBLE);
++
++ while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) {
++ if (kthread_should_stop())
++ break;
++ }
++}
++
+ /*
+ * osnoise_main - The osnoise detection kernel thread
+ *
+@@ -1257,30 +1308,10 @@ static struct cpumask save_cpumask;
+ */
+ static int osnoise_main(void *data)
+ {
+- u64 interval;
+
+ while (!kthread_should_stop()) {
+-
+ run_osnoise();
+-
+- mutex_lock(&interface_lock);
+- interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
+- mutex_unlock(&interface_lock);
+-
+- do_div(interval, USEC_PER_MSEC);
+-
+- /*
+- * differently from hwlat_detector, the osnoise tracer can run
+- * without a pause because preemption is on.
+- */
+- if (interval < 1) {
+- /* Let synchronize_rcu_tasks() make progress */
+- cond_resched_tasks_rcu_qs();
+- continue;
+- }
+-
+- if (msleep_interruptible(interval))
+- break;
++ osnoise_sleep();
+ }
+
+ return 0;
+@@ -1567,8 +1598,9 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
+ for_each_cpu(cpu, current_mask) {
+ retval = start_kthread(cpu);
+ if (retval) {
++ cpus_read_unlock();
+ stop_per_cpu_kthreads();
+- break;
++ return retval;
+ }
+ }
+
+@@ -1856,38 +1888,38 @@ static int init_tracefs(void)
+ if (!top_dir)
+ return 0;
+
+- tmp = tracefs_create_file("period_us", 0640, top_dir,
++ tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
+ &osnoise_period, &trace_min_max_fops);
+ if (!tmp)
+ goto err;
+
+- tmp = tracefs_create_file("runtime_us", 0644, top_dir,
++ tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
+ &osnoise_runtime, &trace_min_max_fops);
+ if (!tmp)
+ goto err;
+
+- tmp = tracefs_create_file("stop_tracing_us", 0640, top_dir,
++ tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
+ &osnoise_stop_tracing_in, &trace_min_max_fops);
+ if (!tmp)
+ goto err;
+
+- tmp = tracefs_create_file("stop_tracing_total_us", 0640, top_dir,
++ tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
+ &osnoise_stop_tracing_total, &trace_min_max_fops);
+ if (!tmp)
+ goto err;
+
+- tmp = trace_create_file("cpus", 0644, top_dir, NULL, &cpus_fops);
++ tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
+ if (!tmp)
+ goto err;
+ #ifdef CONFIG_TIMERLAT_TRACER
+ #ifdef CONFIG_STACKTRACE
+- tmp = tracefs_create_file("print_stack", 0640, top_dir,
++ tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
+ &osnoise_print_stack, &trace_min_max_fops);
+ if (!tmp)
+ goto err;
+ #endif
+
+- tmp = tracefs_create_file("timerlat_period_us", 0640, top_dir,
++ tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
+ &timerlat_period, &trace_min_max_fops);
+ if (!tmp)
+ goto err;
+@@ -1932,6 +1964,13 @@ out_unhook_irq:
+ return -EINVAL;
+ }
+
++static void osnoise_unhook_events(void)
++{
++ unhook_thread_events();
++ unhook_softirq_events();
++ unhook_irq_events();
++}
++
+ static int __osnoise_tracer_start(struct trace_array *tr)
+ {
+ int retval;
+@@ -1949,7 +1988,14 @@ static int __osnoise_tracer_start(struct trace_array *tr)
+
+ retval = start_per_cpu_kthreads(tr);
+ if (retval) {
+- unhook_irq_events();
++ trace_osnoise_callback_enabled = false;
++ /*
++ * Make sure that ftrace_nmi_enter/exit() see
++ * trace_osnoise_callback_enabled as false before continuing.
++ */
++ barrier();
++
++ osnoise_unhook_events();
+ return retval;
+ }
+
+@@ -1981,9 +2027,7 @@ static void osnoise_tracer_stop(struct trace_array *tr)
+
+ stop_per_cpu_kthreads();
+
+- unhook_irq_events();
+- unhook_softirq_events();
+- unhook_thread_events();
++ osnoise_unhook_events();
+
+ osnoise_busy = false;
+ }
+diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
+index c2ca40e8595be..6b4d3f3abdae2 100644
+--- a/kernel/trace/trace_output.c
++++ b/kernel/trace/trace_output.c
+@@ -1569,7 +1569,7 @@ static struct trace_event *events[] __initdata = {
+ NULL
+ };
+
+-__init static int init_events(void)
++__init int init_events(void)
+ {
+ struct trace_event *event;
+ int i, ret;
+@@ -1587,4 +1587,3 @@ __init static int init_events(void)
+
+ return 0;
+ }
+-early_initcall(init_events);
+diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
+index f4938040c2286..1e130da1b742c 100644
+--- a/kernel/trace/trace_preemptirq.c
++++ b/kernel/trace/trace_preemptirq.c
+@@ -46,7 +46,7 @@ void trace_hardirqs_on(void)
+ this_cpu_write(tracing_irq_cpu, 0);
+ }
+
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ }
+ EXPORT_SYMBOL(trace_hardirqs_on);
+@@ -94,15 +94,15 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+ this_cpu_write(tracing_irq_cpu, 0);
+ }
+
+- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+- lockdep_hardirqs_on(CALLER_ADDR0);
++ lockdep_hardirqs_on_prepare();
++ lockdep_hardirqs_on(caller_addr);
+ }
+ EXPORT_SYMBOL(trace_hardirqs_on_caller);
+ NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
+
+ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+ {
+- lockdep_hardirqs_off(CALLER_ADDR0);
++ lockdep_hardirqs_off(caller_addr);
+
+ if (!this_cpu_read(tracing_irq_cpu)) {
+ this_cpu_write(tracing_irq_cpu, 1);
+diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
+index 4b320fe7df704..29f6e95439b67 100644
+--- a/kernel/trace/trace_printk.c
++++ b/kernel/trace/trace_printk.c
+@@ -384,7 +384,7 @@ static __init int init_trace_printk_function_export(void)
+ if (ret)
+ return 0;
+
+- trace_create_file("printk_formats", 0444, NULL,
++ trace_create_file("printk_formats", TRACE_MODE_READ, NULL,
+ NULL, &ftrace_formats_fops);
+
+ return 0;
+diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
+index 3ed2a3f372972..0888f0644d257 100644
+--- a/kernel/trace/trace_probe.c
++++ b/kernel/trace/trace_probe.c
+@@ -64,7 +64,7 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent)
+ int len = *(u32 *)data >> 16;
+
+ if (!len)
+- trace_seq_puts(s, "(fault)");
++ trace_seq_puts(s, FAULT_STRING);
+ else
+ trace_seq_printf(s, "\"%s\"",
+ (const char *)get_loc_data(data, ent));
+@@ -76,9 +76,11 @@ const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
+ /* Fetch type information table */
+ static const struct fetch_type probe_fetch_types[] = {
+ /* Special types */
+- __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1,
++ __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, 1,
+ "__data_loc char[]"),
+- __ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1,
++ __ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1, 1,
++ "__data_loc char[]"),
++ __ASSIGN_FETCH_TYPE("symstr", string, string, sizeof(u32), 1, 1,
+ "__data_loc char[]"),
+ /* Basic types */
+ ASSIGN_FETCH_TYPE(u8, u8, 0),
+@@ -246,7 +248,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
+ return -EINVAL;
+ }
+ strlcpy(buf, event, slash - event + 1);
+- if (!is_good_name(buf)) {
++ if (!is_good_system_name(buf)) {
+ trace_probe_log_err(offset, BAD_GROUP_NAME);
+ return -EINVAL;
+ }
+@@ -279,7 +281,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
+ int ret = 0;
+ int len;
+
+- if (strcmp(arg, "retval") == 0) {
++ if (flags & TPARG_FL_TPOINT) {
++ if (code->data)
++ return -EFAULT;
++ code->data = kstrdup(arg, GFP_KERNEL);
++ if (!code->data)
++ return -ENOMEM;
++ code->op = FETCH_OP_TP_ARG;
++ } else if (strcmp(arg, "retval") == 0) {
+ if (flags & TPARG_FL_RETURN) {
+ code->op = FETCH_OP_RETVAL;
+ } else {
+@@ -303,7 +312,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
+ }
+ } else
+ goto inval_var;
+- } else if (strcmp(arg, "comm") == 0) {
++ } else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
+ code->op = FETCH_OP_COMM;
+ #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ } else if (((flags & TPARG_FL_MASK) ==
+@@ -319,13 +328,6 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
+ code->op = FETCH_OP_ARG;
+ code->param = (unsigned int)param - 1;
+ #endif
+- } else if (flags & TPARG_FL_TPOINT) {
+- if (code->data)
+- return -EFAULT;
+- code->data = kstrdup(arg, GFP_KERNEL);
+- if (!code->data)
+- return -ENOMEM;
+- code->op = FETCH_OP_TP_ARG;
+ } else
+ goto inval_var;
+
+@@ -356,6 +358,8 @@ static int __parse_imm_string(char *str, char **pbuf, int offs)
+ return -EINVAL;
+ }
+ *pbuf = kstrndup(str, len - 1, GFP_KERNEL);
++ if (!*pbuf)
++ return -ENOMEM;
+ return 0;
+ }
+
+@@ -378,6 +382,11 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
+ break;
+
+ case '%': /* named register */
++ if (flags & TPARG_FL_TPOINT) {
++ /* eprobes do not handle registers */
++ trace_probe_log_err(offs, BAD_VAR);
++ break;
++ }
+ ret = regs_query_register_offset(arg + 1);
+ if (ret >= 0) {
+ code->op = FETCH_OP_REG;
+@@ -611,9 +620,11 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
+
+ /*
+ * Since $comm and immediate string can not be dereferenced,
+- * we can find those by strcmp.
++ * we can find those by strcmp. But ignore for eprobes.
+ */
+- if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) {
++ if (!(flags & TPARG_FL_TPOINT) &&
++ (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
++ strncmp(arg, "\\\"", 2) == 0)) {
+ /* The type of $comm must be "string", and not an array. */
+ if (parg->count || (t && strcmp(t, "string")))
+ goto out;
+@@ -649,16 +660,26 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
+
+ ret = -EINVAL;
+ /* Store operation */
+- if (!strcmp(parg->type->name, "string") ||
+- !strcmp(parg->type->name, "ustring")) {
+- if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
+- code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM &&
+- code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) {
+- trace_probe_log_err(offset + (t ? (t - arg) : 0),
+- BAD_STRING);
+- goto fail;
++ if (parg->type->is_string) {
++ if (!strcmp(parg->type->name, "symstr")) {
++ if (code->op != FETCH_OP_REG && code->op != FETCH_OP_STACK &&
++ code->op != FETCH_OP_RETVAL && code->op != FETCH_OP_ARG &&
++ code->op != FETCH_OP_DEREF && code->op != FETCH_OP_TP_ARG) {
++ trace_probe_log_err(offset + (t ? (t - arg) : 0),
++ BAD_SYMSTRING);
++ goto fail;
++ }
++ } else {
++ if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
++ code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM &&
++ code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) {
++ trace_probe_log_err(offset + (t ? (t - arg) : 0),
++ BAD_STRING);
++ goto fail;
++ }
+ }
+- if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
++ if (!strcmp(parg->type->name, "symstr") ||
++ (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
+ code->op == FETCH_OP_DATA) || code->op == FETCH_OP_TP_ARG ||
+ parg->count) {
+ /*
+@@ -666,6 +687,8 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
+ * must be kept, and if parg->count != 0, this is an
+ * array of string pointers instead of string address
+ * itself.
++ * For the symstr, it doesn't need to dereference, thus
++ * it just get the value.
+ */
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+@@ -677,6 +700,8 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
+ if (!strcmp(parg->type->name, "ustring") ||
+ code->op == FETCH_OP_UDEREF)
+ code->op = FETCH_OP_ST_USTRING;
++ else if (!strcmp(parg->type->name, "symstr"))
++ code->op = FETCH_OP_ST_SYMSTR;
+ else
+ code->op = FETCH_OP_ST_STRING;
+ code->size = parg->type->size;
+@@ -906,8 +931,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
+ for (i = 0; i < tp->nr_args; i++) {
+ parg = tp->args + i;
+ if (parg->count) {
+- if ((strcmp(parg->type->name, "string") == 0) ||
+- (strcmp(parg->type->name, "ustring") == 0))
++ if (parg->type->is_string)
+ fmt = ", __get_str(%s[%d])";
+ else
+ fmt = ", REC->%s[%d]";
+@@ -915,8 +939,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ fmt, parg->name, j);
+ } else {
+- if ((strcmp(parg->type->name, "string") == 0) ||
+- (strcmp(parg->type->name, "ustring") == 0))
++ if (parg->type->is_string)
+ fmt = ", __get_str(%s)";
+ else
+ fmt = ", REC->%s";
+diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
+index 99e7a5df025e2..0f0e5005b97a0 100644
+--- a/kernel/trace/trace_probe.h
++++ b/kernel/trace/trace_probe.h
+@@ -99,6 +99,7 @@ enum fetch_op {
+ FETCH_OP_ST_UMEM, /* Mem: .offset, .size */
+ FETCH_OP_ST_STRING, /* String: .offset, .size */
+ FETCH_OP_ST_USTRING, /* User String: .offset, .size */
++ FETCH_OP_ST_SYMSTR, /* Kernel Symbol String: .offset, .size */
+ // Stage 4 (modify) op
+ FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */
+ // Stage 5 (loop) op
+@@ -134,7 +135,8 @@ struct fetch_insn {
+ struct fetch_type {
+ const char *name; /* Name of type */
+ size_t size; /* Byte size of type */
+- int is_signed; /* Signed flag */
++ bool is_signed; /* Signed flag */
++ bool is_string; /* String flag */
+ print_type_func_t print; /* Print functions */
+ const char *fmt; /* Format string */
+ const char *fmttype; /* Name in format file */
+@@ -178,16 +180,19 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);
+ #define _ADDR_FETCH_TYPE(t) __ADDR_FETCH_TYPE(t)
+ #define ADDR_FETCH_TYPE _ADDR_FETCH_TYPE(BITS_PER_LONG)
+
+-#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
+- {.name = _name, \
++#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, str, _fmttype) \
++ {.name = _name, \
+ .size = _size, \
+- .is_signed = sign, \
++ .is_signed = (bool)sign, \
++ .is_string = (bool)str, \
+ .print = PRINT_TYPE_FUNC_NAME(ptype), \
+ .fmt = PRINT_TYPE_FMT_NAME(ptype), \
+ .fmttype = _fmttype, \
+ }
++
++/* Non string types can use these macros */
+ #define _ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
+- __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, #_fmttype)
++ __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, 0, #_fmttype)
+ #define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
+ _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, ptype)
+
+@@ -303,7 +308,7 @@ trace_probe_primary_from_call(struct trace_event_call *call)
+ {
+ struct trace_probe_event *tpe = trace_probe_event_from_call(call);
+
+- return list_first_entry(&tpe->probes, struct trace_probe, list);
++ return list_first_entry_or_null(&tpe->probes, struct trace_probe, list);
+ }
+
+ static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp)
+@@ -432,6 +437,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
+ C(ARRAY_TOO_BIG, "Array number is too big"), \
+ C(BAD_TYPE, "Unknown type is specified"), \
+ C(BAD_STRING, "String accepts only memory argument"), \
++ C(BAD_SYMSTRING, "Symbol String doesn't accept data/userdata"), \
+ C(BAD_BITFIELD, "Invalid bitfield"), \
+ C(ARG_NAME_TOO_LONG, "Argument name is too long"), \
+ C(NO_ARG_NAME, "Argument name is not specified"), \
+diff --git a/kernel/trace/trace_probe_kernel.h b/kernel/trace/trace_probe_kernel.h
+new file mode 100644
+index 0000000000000..2da70be83831c
+--- /dev/null
++++ b/kernel/trace/trace_probe_kernel.h
+@@ -0,0 +1,101 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __TRACE_PROBE_KERNEL_H_
++#define __TRACE_PROBE_KERNEL_H_
++
++/*
++ * This depends on trace_probe.h, but can not include it due to
++ * the way trace_probe_tmpl.h is used by trace_kprobe.c and trace_eprobe.c.
++ * Which means that any other user must include trace_probe.h before including
++ * this file.
++ */
++/* Return the length of string -- including null terminal byte */
++static nokprobe_inline int
++kern_fetch_store_strlen_user(unsigned long addr)
++{
++ const void __user *uaddr = (__force const void __user *)addr;
++
++ return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
++}
++
++/* Return the length of string -- including null terminal byte */
++static nokprobe_inline int
++kern_fetch_store_strlen(unsigned long addr)
++{
++ int ret, len = 0;
++ u8 c;
++
++#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
++ if (addr < TASK_SIZE)
++ return kern_fetch_store_strlen_user(addr);
++#endif
++
++ do {
++ ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
++ len++;
++ } while (c && ret == 0 && len < MAX_STRING_SIZE);
++
++ return (ret < 0) ? ret : len;
++}
++
++static nokprobe_inline void set_data_loc(int ret, void *dest, void *__dest, void *base)
++{
++ if (ret < 0)
++ ret = 0;
++ *(u32 *)dest = make_data_loc(ret, __dest - base);
++}
++
++/*
++ * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
++ * with max length and relative data location.
++ */
++static nokprobe_inline int
++kern_fetch_store_string_user(unsigned long addr, void *dest, void *base)
++{
++ const void __user *uaddr = (__force const void __user *)addr;
++ int maxlen = get_loc_len(*(u32 *)dest);
++ void *__dest;
++ long ret;
++
++ if (unlikely(!maxlen))
++ return -ENOMEM;
++
++ __dest = get_loc_data(dest, base);
++
++ ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
++ set_data_loc(ret, dest, __dest, base);
++
++ return ret;
++}
++
++/*
++ * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
++ * length and relative data location.
++ */
++static nokprobe_inline int
++kern_fetch_store_string(unsigned long addr, void *dest, void *base)
++{
++ int maxlen = get_loc_len(*(u32 *)dest);
++ void *__dest;
++ long ret;
++
++#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
++ if ((unsigned long)addr < TASK_SIZE)
++ return kern_fetch_store_string_user(addr, dest, base);
++#endif
++
++ if (unlikely(!maxlen))
++ return -ENOMEM;
++
++ __dest = get_loc_data(dest, base);
++
++ /*
++ * Try to get string again, since the string can be changed while
++ * probing.
++ */
++ ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
++ set_data_loc(ret, dest, __dest, base);
++
++ return ret;
++}
++
++#endif /* __TRACE_PROBE_KERNEL_H_ */
+diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
+index b3bdb8ddb8622..3e2f5a43b974c 100644
+--- a/kernel/trace/trace_probe_tmpl.h
++++ b/kernel/trace/trace_probe_tmpl.h
+@@ -67,6 +67,37 @@ probe_mem_read(void *dest, void *src, size_t size);
+ static nokprobe_inline int
+ probe_mem_read_user(void *dest, void *src, size_t size);
+
++static nokprobe_inline int
++fetch_store_symstrlen(unsigned long addr)
++{
++ char namebuf[KSYM_SYMBOL_LEN];
++ int ret;
++
++ ret = sprint_symbol(namebuf, addr);
++ if (ret < 0)
++ return 0;
++
++ return ret + 1;
++}
++
++/*
++ * Fetch a null-terminated symbol string + offset. Caller MUST set *(u32 *)buf
++ * with max length and relative data location.
++ */
++static nokprobe_inline int
++fetch_store_symstring(unsigned long addr, void *dest, void *base)
++{
++ int maxlen = get_loc_len(*(u32 *)dest);
++ void *__dest;
++
++ if (unlikely(!maxlen))
++ return -ENOMEM;
++
++ __dest = get_loc_data(dest, base);
++
++ return sprint_symbol(__dest, addr);
++}
++
+ /* From the 2nd stage, routine is same */
+ static nokprobe_inline int
+ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
+@@ -99,16 +130,22 @@ stage2:
+ stage3:
+ /* 3rd stage: store value to buffer */
+ if (unlikely(!dest)) {
+- if (code->op == FETCH_OP_ST_STRING) {
++ switch (code->op) {
++ case FETCH_OP_ST_STRING:
+ ret = fetch_store_strlen(val + code->offset);
+ code++;
+ goto array;
+- } else if (code->op == FETCH_OP_ST_USTRING) {
+- ret += fetch_store_strlen_user(val + code->offset);
++ case FETCH_OP_ST_USTRING:
++ ret = fetch_store_strlen_user(val + code->offset);
+ code++;
+ goto array;
+- } else
++ case FETCH_OP_ST_SYMSTR:
++ ret = fetch_store_symstrlen(val + code->offset);
++ code++;
++ goto array;
++ default:
+ return -EILSEQ;
++ }
+ }
+
+ switch (code->op) {
+@@ -129,6 +166,10 @@ stage3:
+ loc = *(u32 *)dest;
+ ret = fetch_store_string_user(val + code->offset, dest, base);
+ break;
++ case FETCH_OP_ST_SYMSTR:
++ loc = *(u32 *)dest;
++ ret = fetch_store_symstring(val + code->offset, dest, base);
++ break;
+ default:
+ return -EILSEQ;
+ }
+@@ -143,6 +184,8 @@ stage3:
+ array:
+ /* the last stage: Loop on array */
+ if (code->op == FETCH_OP_LP_ARRAY) {
++ if (ret < 0)
++ ret = 0;
+ total += ret;
+ if (++i < code->param) {
+ code = s3;
+@@ -204,9 +247,7 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec,
+ if (unlikely(arg->dynamic))
+ *dl = make_data_loc(maxlen, dyndata - base);
+ ret = process_fetch_insn(arg->code, rec, dl, base);
+- if (unlikely(ret < 0 && arg->dynamic)) {
+- *dl = make_data_loc(0, dyndata - base);
+- } else {
++ if (arg->dynamic && likely(ret > 0)) {
+ dyndata += ret;
+ maxlen -= ret;
+ }
+diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c
+index b2edac1fe156e..4d4b78c8ca257 100644
+--- a/kernel/trace/trace_recursion_record.c
++++ b/kernel/trace/trace_recursion_record.c
+@@ -226,8 +226,8 @@ __init static int create_recursed_functions(void)
+ {
+ struct dentry *dentry;
+
+- dentry = trace_create_file("recursed_functions", 0644, NULL, NULL,
+- &recursed_functions_fops);
++ dentry = trace_create_file("recursed_functions", TRACE_MODE_WRITE,
++ NULL, NULL, &recursed_functions_fops);
+ if (!dentry)
+ pr_warn("WARNING: Failed to create recursed_functions\n");
+ return 0;
+diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
+index 2402de520eca7..b239bfaa51ae8 100644
+--- a/kernel/trace/trace_sched_wakeup.c
++++ b/kernel/trace/trace_sched_wakeup.c
+@@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter)
+ {
+ if (is_graph(iter->tr))
+ graph_trace_open(iter);
++ else
++ iter->private = NULL;
+ }
+
+ static void wakeup_trace_close(struct trace_iterator *iter)
+diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
+index 63c2850420516..5a48dba912eae 100644
+--- a/kernel/trace/trace_stack.c
++++ b/kernel/trace/trace_stack.c
+@@ -559,14 +559,14 @@ static __init int stack_trace_init(void)
+ if (ret)
+ return 0;
+
+- trace_create_file("stack_max_size", 0644, NULL,
++ trace_create_file("stack_max_size", TRACE_MODE_WRITE, NULL,
+ &stack_trace_max_size, &stack_max_size_fops);
+
+- trace_create_file("stack_trace", 0444, NULL,
++ trace_create_file("stack_trace", TRACE_MODE_READ, NULL,
+ NULL, &stack_trace_fops);
+
+ #ifdef CONFIG_DYNAMIC_FTRACE
+- trace_create_file("stack_trace_filter", 0644, NULL,
++ trace_create_file("stack_trace_filter", TRACE_MODE_WRITE, NULL,
+ &trace_ops, &stack_trace_filter_fops);
+ #endif
+
+diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
+index 8d141c3825a94..bb247beec4470 100644
+--- a/kernel/trace/trace_stat.c
++++ b/kernel/trace/trace_stat.c
+@@ -297,9 +297,9 @@ static int init_stat_file(struct stat_session *session)
+ if (!stat_dir && (ret = tracing_stat_init()))
+ return ret;
+
+- session->file = tracefs_create_file(session->ts->name, 0644,
+- stat_dir,
+- session, &tracing_stat_fops);
++ session->file = tracefs_create_file(session->ts->name, TRACE_MODE_WRITE,
++ stat_dir, session,
++ &tracing_stat_fops);
+ if (!session->file)
+ return -ENOMEM;
+ return 0;
+diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
+index b29595fe3ac5a..43f6fb6078dbf 100644
+--- a/kernel/trace/trace_synth.h
++++ b/kernel/trace/trace_synth.h
+@@ -18,6 +18,7 @@ struct synth_field {
+ bool is_signed;
+ bool is_string;
+ bool is_dynamic;
++ bool is_stack;
+ };
+
+ struct synth_event {
+diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
+index 8bfcd3b094226..b69e207012c99 100644
+--- a/kernel/trace/trace_syscalls.c
++++ b/kernel/trace/trace_syscalls.c
+@@ -154,7 +154,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
+ goto end;
+
+ /* parameter types */
+- if (tr->trace_flags & TRACE_ITER_VERBOSE)
++ if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
+ trace_seq_printf(s, "%s ", entry->types[i]);
+
+ /* parameter values */
+@@ -296,9 +296,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
+ struct trace_event_file *trace_file;
+ struct syscall_trace_enter *entry;
+ struct syscall_metadata *sys_data;
+- struct ring_buffer_event *event;
+- struct trace_buffer *buffer;
+- unsigned int trace_ctx;
++ struct trace_event_buffer fbuffer;
+ unsigned long args[6];
+ int syscall_nr;
+ int size;
+@@ -321,21 +319,16 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
+
+ size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+
+- trace_ctx = tracing_gen_ctx();
+-
+- buffer = tr->array_buffer.buffer;
+- event = trace_buffer_lock_reserve(buffer,
+- sys_data->enter_event->event.type, size, trace_ctx);
+- if (!event)
++ entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
++ if (!entry)
+ return;
+
+- entry = ring_buffer_event_data(event);
++ entry = ring_buffer_event_data(fbuffer.event);
+ entry->nr = syscall_nr;
+ syscall_get_arguments(current, regs, args);
+ memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
+
+- event_trigger_unlock_commit(trace_file, buffer, event, entry,
+- trace_ctx);
++ trace_event_buffer_commit(&fbuffer);
+ }
+
+ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
+@@ -344,9 +337,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
+ struct trace_event_file *trace_file;
+ struct syscall_trace_exit *entry;
+ struct syscall_metadata *sys_data;
+- struct ring_buffer_event *event;
+- struct trace_buffer *buffer;
+- unsigned int trace_ctx;
++ struct trace_event_buffer fbuffer;
+ int syscall_nr;
+
+ syscall_nr = trace_get_syscall_nr(current, regs);
+@@ -365,21 +356,15 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
+ if (!sys_data)
+ return;
+
+- trace_ctx = tracing_gen_ctx();
+-
+- buffer = tr->array_buffer.buffer;
+- event = trace_buffer_lock_reserve(buffer,
+- sys_data->exit_event->event.type, sizeof(*entry),
+- trace_ctx);
+- if (!event)
++ entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry));
++ if (!entry)
+ return;
+
+- entry = ring_buffer_event_data(event);
++ entry = ring_buffer_event_data(fbuffer.event);
+ entry->nr = syscall_nr;
+ entry->ret = syscall_get_return_value(current, regs);
+
+- event_trigger_unlock_commit(trace_file, buffer, event, entry,
+- trace_ctx);
++ trace_event_buffer_commit(&fbuffer);
+ }
+
+ static int reg_event_syscall_enter(struct trace_event_file *file,
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 225ce569bf8f8..720b46b34ab94 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -168,7 +168,8 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
+ */
+ ret++;
+ *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
+- }
++ } else
++ *(u32 *)dest = make_data_loc(0, (void *)dst - base);
+
+ return ret;
+ }
+@@ -1313,6 +1314,7 @@ static int uprobe_perf_open(struct trace_event_call *call,
+ return 0;
+
+ list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
++ tu = container_of(pos, struct trace_uprobe, tp);
+ err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
+ if (err) {
+ uprobe_perf_close(call, event);
+@@ -1421,7 +1423,7 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
+
+ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+ const char **filename, u64 *probe_offset,
+- bool perf_type_tracepoint)
++ u64 *probe_addr, bool perf_type_tracepoint)
+ {
+ const char *pevent = trace_event_name(event->tp_event);
+ const char *group = event->tp_event->class->system;
+@@ -1438,6 +1440,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+ : BPF_FD_TYPE_UPROBE;
+ *filename = tu->filename;
+ *probe_offset = tu->offset;
++ *probe_addr = 0;
+ return 0;
+ }
+ #endif /* CONFIG_PERF_EVENTS */
+@@ -1618,6 +1621,11 @@ create_local_trace_uprobe(char *name, unsigned long offs,
+ tu->path = path;
+ tu->ref_ctr_offset = ref_ctr_offset;
+ tu->filename = kstrdup(name, GFP_KERNEL);
++ if (!tu->filename) {
++ ret = -ENOMEM;
++ goto error;
++ }
++
+ init_trace_event_call(tu);
+
+ ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+@@ -1655,10 +1663,10 @@ static __init int init_uprobe_trace(void)
+ if (ret)
+ return 0;
+
+- trace_create_file("uprobe_events", 0644, NULL,
++ trace_create_file("uprobe_events", TRACE_MODE_WRITE, NULL,
+ NULL, &uprobe_events_ops);
+ /* Profile interface */
+- trace_create_file("uprobe_profile", 0444, NULL,
++ trace_create_file("uprobe_profile", TRACE_MODE_READ, NULL,
+ NULL, &uprobe_profile_ops);
+ return 0;
+ }
+diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
+index d6bddb157ef20..9628b55718468 100644
+--- a/kernel/trace/tracing_map.c
++++ b/kernel/trace/tracing_map.c
+@@ -15,6 +15,7 @@
+ #include <linux/jhash.h>
+ #include <linux/slab.h>
+ #include <linux/sort.h>
++#include <linux/kmemleak.h>
+
+ #include "tracing_map.h"
+ #include "trace.h"
+@@ -307,6 +308,7 @@ static void tracing_map_array_free(struct tracing_map_array *a)
+ for (i = 0; i < a->n_pages; i++) {
+ if (!a->pages[i])
+ break;
++ kmemleak_free(a->pages[i]);
+ free_page((unsigned long)a->pages[i]);
+ }
+
+@@ -342,6 +344,7 @@ static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
+ a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!a->pages[i])
+ goto free;
++ kmemleak_alloc(a->pages[i], PAGE_SIZE, 1, GFP_KERNEL);
+ }
+ out:
+ return a;
+@@ -834,29 +837,35 @@ int tracing_map_init(struct tracing_map *map)
+ return err;
+ }
+
+-static int cmp_entries_dup(const struct tracing_map_sort_entry **a,
+- const struct tracing_map_sort_entry **b)
++static int cmp_entries_dup(const void *A, const void *B)
+ {
++ const struct tracing_map_sort_entry *a, *b;
+ int ret = 0;
+
+- if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size))
++ a = *(const struct tracing_map_sort_entry **)A;
++ b = *(const struct tracing_map_sort_entry **)B;
++
++ if (memcmp(a->key, b->key, a->elt->map->key_size))
+ ret = 1;
+
+ return ret;
+ }
+
+-static int cmp_entries_sum(const struct tracing_map_sort_entry **a,
+- const struct tracing_map_sort_entry **b)
++static int cmp_entries_sum(const void *A, const void *B)
+ {
+ const struct tracing_map_elt *elt_a, *elt_b;
++ const struct tracing_map_sort_entry *a, *b;
+ struct tracing_map_sort_key *sort_key;
+ struct tracing_map_field *field;
+ tracing_map_cmp_fn_t cmp_fn;
+ void *val_a, *val_b;
+ int ret = 0;
+
+- elt_a = (*a)->elt;
+- elt_b = (*b)->elt;
++ a = *(const struct tracing_map_sort_entry **)A;
++ b = *(const struct tracing_map_sort_entry **)B;
++
++ elt_a = a->elt;
++ elt_b = b->elt;
+
+ sort_key = &elt_a->map->sort_key;
+
+@@ -873,18 +882,21 @@ static int cmp_entries_sum(const struct tracing_map_sort_entry **a,
+ return ret;
+ }
+
+-static int cmp_entries_key(const struct tracing_map_sort_entry **a,
+- const struct tracing_map_sort_entry **b)
++static int cmp_entries_key(const void *A, const void *B)
+ {
+ const struct tracing_map_elt *elt_a, *elt_b;
++ const struct tracing_map_sort_entry *a, *b;
+ struct tracing_map_sort_key *sort_key;
+ struct tracing_map_field *field;
+ tracing_map_cmp_fn_t cmp_fn;
+ void *val_a, *val_b;
+ int ret = 0;
+
+- elt_a = (*a)->elt;
+- elt_b = (*b)->elt;
++ a = *(const struct tracing_map_sort_entry **)A;
++ b = *(const struct tracing_map_sort_entry **)B;
++
++ elt_a = a->elt;
++ elt_b = b->elt;
+
+ sort_key = &elt_a->map->sort_key;
+
+@@ -989,10 +1001,8 @@ static void sort_secondary(struct tracing_map *map,
+ struct tracing_map_sort_key *primary_key,
+ struct tracing_map_sort_key *secondary_key)
+ {
+- int (*primary_fn)(const struct tracing_map_sort_entry **,
+- const struct tracing_map_sort_entry **);
+- int (*secondary_fn)(const struct tracing_map_sort_entry **,
+- const struct tracing_map_sort_entry **);
++ int (*primary_fn)(const void *, const void *);
++ int (*secondary_fn)(const void *, const void *);
+ unsigned i, start = 0, n_sub = 1;
+
+ if (is_key(map, primary_key->field_idx))
+@@ -1061,8 +1071,7 @@ int tracing_map_sort_entries(struct tracing_map *map,
+ unsigned int n_sort_keys,
+ struct tracing_map_sort_entry ***sort_entries)
+ {
+- int (*cmp_entries_fn)(const struct tracing_map_sort_entry **,
+- const struct tracing_map_sort_entry **);
++ int (*cmp_entries_fn)(const void *, const void *);
+ struct tracing_map_sort_entry *sort_entry, **entries;
+ int i, n_entries, ret;
+
+diff --git a/kernel/tsacct.c b/kernel/tsacct.c
+index 257ffb993ea23..fd2f7a052fdd9 100644
+--- a/kernel/tsacct.c
++++ b/kernel/tsacct.c
+@@ -38,11 +38,10 @@ void bacct_add_tsk(struct user_namespace *user_ns,
+ stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
+ stats->ac_btime64 = btime;
+
+- if (thread_group_leader(tsk)) {
++ if (tsk->flags & PF_EXITING)
+ stats->ac_exitcode = tsk->exit_code;
+- if (tsk->flags & PF_FORKNOEXEC)
+- stats->ac_flag |= AFORK;
+- }
++ if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC))
++ stats->ac_flag |= AFORK;
+ if (tsk->flags & PF_SUPERPRIV)
+ stats->ac_flag |= ASU;
+ if (tsk->flags & PF_DUMPCORE)
+diff --git a/kernel/ucount.c b/kernel/ucount.c
+index eb03f3c68375d..a1d67261501a6 100644
+--- a/kernel/ucount.c
++++ b/kernel/ucount.c
+@@ -184,6 +184,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
+ kfree(new);
+ } else {
+ hlist_add_head(&new->node, hashent);
++ get_user_ns(new->ns);
+ spin_unlock_irq(&ucounts_lock);
+ return new;
+ }
+@@ -204,6 +205,7 @@ void put_ucounts(struct ucounts *ucounts)
+ if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
+ hlist_del_init(&ucounts->node);
+ spin_unlock_irqrestore(&ucounts_lock, flags);
++ put_user_ns(ucounts->ns);
+ kfree(ucounts);
+ }
+ }
+@@ -258,15 +260,16 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
+ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
+ {
+ struct ucounts *iter;
++ long max = LONG_MAX;
+ long ret = 0;
+
+ for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+- long max = READ_ONCE(iter->ns->ucount_max[type]);
+ long new = atomic_long_add_return(v, &iter->ucount[type]);
+ if (new < 0 || new > max)
+ ret = LONG_MAX;
+ else if (iter == ucounts)
+ ret = new;
++ max = READ_ONCE(iter->ns->ucount_max[type]);
+ }
+ return ret;
+ }
+@@ -306,15 +309,16 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
+ {
+ /* Caller must hold a reference to ucounts */
+ struct ucounts *iter;
++ long max = LONG_MAX;
+ long dec, ret = 0;
+
+ for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+- long max = READ_ONCE(iter->ns->ucount_max[type]);
+ long new = atomic_long_add_return(1, &iter->ucount[type]);
+ if (new < 0 || new > max)
+ goto unwind;
+ if (iter == ucounts)
+ ret = new;
++ max = READ_ONCE(iter->ns->ucount_max[type]);
+ /*
+ * Grab an extra ucount reference for the caller when
+ * the rlimit count was previously 0.
+@@ -333,15 +337,17 @@ unwind:
+ return 0;
+ }
+
+-bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
++bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit)
+ {
+ struct ucounts *iter;
+- if (get_ucounts_value(ucounts, type) > max)
+- return true;
++ long max = rlimit;
++ if (rlimit > LONG_MAX)
++ max = LONG_MAX;
+ for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+- max = READ_ONCE(iter->ns->ucount_max[type]);
+- if (get_ucounts_value(iter, type) > max)
++ long val = get_ucounts_value(iter, type);
++ if (val < 0 || val > max)
+ return true;
++ max = READ_ONCE(iter->ns->ucount_max[type]);
+ }
+ return false;
+ }
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
+index 6b2e3ca7ee993..5481ba44a8d68 100644
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -58,6 +58,18 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
+ cred->user_ns = user_ns;
+ }
+
++static unsigned long enforced_nproc_rlimit(void)
++{
++ unsigned long limit = RLIM_INFINITY;
++
++ /* Is RLIMIT_NPROC currently enforced? */
++ if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
++ (current_user_ns() != &init_user_ns))
++ limit = rlimit(RLIMIT_NPROC);
++
++ return limit;
++}
++
+ /*
+ * Create a new user namespace, deriving the creator from the user in the
+ * passed credentials, and replacing that user with the new root user for the
+@@ -122,7 +134,7 @@ int create_user_ns(struct cred *new)
+ for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
+ ns->ucount_max[i] = INT_MAX;
+ }
+- set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC));
++ set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
+ set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
+ set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
+ set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
+index 9c9eb20dd2c50..54cbaa9711398 100644
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -34,6 +34,27 @@ MODULE_LICENSE("GPL");
+ #define WATCH_QUEUE_NOTE_SIZE 128
+ #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE)
+
++/*
++ * This must be called under the RCU read-lock, which makes
++ * sure that the wqueue still exists. It can then take the lock,
++ * and check that the wqueue hasn't been destroyed, which in
++ * turn makes sure that the notification pipe still exists.
++ */
++static inline bool lock_wqueue(struct watch_queue *wqueue)
++{
++ spin_lock_bh(&wqueue->lock);
++ if (unlikely(wqueue->defunct)) {
++ spin_unlock_bh(&wqueue->lock);
++ return false;
++ }
++ return true;
++}
++
++static inline void unlock_wqueue(struct watch_queue *wqueue)
++{
++ spin_unlock_bh(&wqueue->lock);
++}
++
+ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+ {
+@@ -54,6 +75,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
+ bit += page->index;
+
+ set_bit(bit, wqueue->notes_bitmap);
++ generic_pipe_buf_release(pipe, buf);
+ }
+
+ // No try_steal function => no stealing
+@@ -68,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = {
+
+ /*
+ * Post a notification to a watch queue.
++ *
++ * Must be called with the RCU lock for reading, and the
++ * watch_queue lock held, which guarantees that the pipe
++ * hasn't been released.
+ */
+ static bool post_one_notification(struct watch_queue *wqueue,
+ struct watch_notification *n)
+@@ -84,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
+
+ spin_lock_irq(&pipe->rd_wait.lock);
+
+- if (wqueue->defunct)
+- goto out;
+-
+ mask = pipe->ring_size - 1;
+ head = pipe->head;
+ tail = pipe->tail;
+@@ -112,7 +135,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
+ buf->offset = offset;
+ buf->len = len;
+ buf->flags = PIPE_BUF_FLAG_WHOLE;
+- pipe->head = head + 1;
++ smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
+
+ if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
+ spin_unlock_irq(&pipe->rd_wait.lock);
+@@ -202,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist,
+ if (security_post_notification(watch->cred, cred, n) < 0)
+ continue;
+
+- post_one_notification(wqueue, n);
++ if (lock_wqueue(wqueue)) {
++ post_one_notification(wqueue, n);
++ unlock_wqueue(wqueue);
++ }
+ }
+
+ rcu_read_unlock();
+@@ -243,10 +269,12 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
+ goto error;
+ }
+
+- ret = pipe_resize_ring(pipe, nr_notes);
++ nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
++ ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes));
+ if (ret < 0)
+ goto error;
+
++ ret = -ENOMEM;
+ pages = kcalloc(sizeof(struct page *), nr_pages, GFP_KERNEL);
+ if (!pages)
+ goto error;
+@@ -268,11 +296,11 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
+ wqueue->notes = pages;
+ wqueue->notes_bitmap = bitmap;
+ wqueue->nr_pages = nr_pages;
+- wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE;
++ wqueue->nr_notes = nr_notes;
+ return 0;
+
+ error_p:
+- for (i = 0; i < nr_pages; i++)
++ while (--i >= 0)
+ __free_page(pages[i]);
+ kfree(pages);
+ error:
+@@ -320,7 +348,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
+ tf[i].info_mask & WATCH_INFO_LENGTH)
+ goto err_filter;
+ /* Ignore any unknown types */
+- if (tf[i].type >= sizeof(wfilter->type_filter) * 8)
++ if (tf[i].type >= WATCH_TYPE__NR)
+ continue;
+ nr_filter++;
+ }
+@@ -336,7 +364,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe,
+
+ q = wfilter->filters;
+ for (i = 0; i < filter.nr_filters; i++) {
+- if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG)
++ if (tf[i].type >= WATCH_TYPE__NR)
+ continue;
+
+ q->type = tf[i].type;
+@@ -371,6 +399,8 @@ static void __put_watch_queue(struct kref *kref)
+
+ for (i = 0; i < wqueue->nr_pages; i++)
+ __free_page(wqueue->notes[i]);
++ kfree(wqueue->notes);
++ bitmap_free(wqueue->notes_bitmap);
+
+ wfilter = rcu_access_pointer(wqueue->filter);
+ if (wfilter)
+@@ -395,6 +425,7 @@ static void free_watch(struct rcu_head *rcu)
+ put_watch_queue(rcu_access_pointer(watch->queue));
+ atomic_dec(&watch->cred->user->nr_watches);
+ put_cred(watch->cred);
++ kfree(watch);
+ }
+
+ static void __put_watch(struct kref *kref)
+@@ -427,6 +458,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
+ rcu_assign_pointer(watch->queue, wqueue);
+ }
+
++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
++{
++ const struct cred *cred;
++ struct watch *w;
++
++ hlist_for_each_entry(w, &wlist->watchers, list_node) {
++ struct watch_queue *wq = rcu_access_pointer(w->queue);
++ if (wqueue == wq && watch->id == w->id)
++ return -EBUSY;
++ }
++
++ cred = current_cred();
++ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
++ atomic_dec(&cred->user->nr_watches);
++ return -EAGAIN;
++ }
++
++ watch->cred = get_cred(cred);
++ rcu_assign_pointer(watch->watch_list, wlist);
++
++ kref_get(&wqueue->usage);
++ kref_get(&watch->usage);
++ hlist_add_head(&watch->queue_node, &wqueue->watches);
++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
++ return 0;
++}
++
+ /**
+ * add_watch_to_object - Add a watch on an object to a watch list
+ * @watch: The watch to add
+@@ -441,33 +499,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
+ */
+ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
+ {
+- struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
+- struct watch *w;
+-
+- hlist_for_each_entry(w, &wlist->watchers, list_node) {
+- struct watch_queue *wq = rcu_access_pointer(w->queue);
+- if (wqueue == wq && watch->id == w->id)
+- return -EBUSY;
+- }
++ struct watch_queue *wqueue;
++ int ret = -ENOENT;
+
+- watch->cred = get_current_cred();
+- rcu_assign_pointer(watch->watch_list, wlist);
++ rcu_read_lock();
+
+- if (atomic_inc_return(&watch->cred->user->nr_watches) >
+- task_rlimit(current, RLIMIT_NOFILE)) {
+- atomic_dec(&watch->cred->user->nr_watches);
+- put_cred(watch->cred);
+- return -EAGAIN;
++ wqueue = rcu_access_pointer(watch->queue);
++ if (lock_wqueue(wqueue)) {
++ spin_lock(&wlist->lock);
++ ret = add_one_watch(watch, wlist, wqueue);
++ spin_unlock(&wlist->lock);
++ unlock_wqueue(wqueue);
+ }
+
+- spin_lock_bh(&wqueue->lock);
+- kref_get(&wqueue->usage);
+- kref_get(&watch->usage);
+- hlist_add_head(&watch->queue_node, &wqueue->watches);
+- spin_unlock_bh(&wqueue->lock);
+-
+- hlist_add_head(&watch->list_node, &wlist->watchers);
+- return 0;
++ rcu_read_unlock();
++ return ret;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);
+
+@@ -518,20 +564,15 @@ found:
+
+ wqueue = rcu_dereference(watch->queue);
+
+- /* We don't need the watch list lock for the next bit as RCU is
+- * protecting *wqueue from deallocation.
+- */
+- if (wqueue) {
++ if (lock_wqueue(wqueue)) {
+ post_one_notification(wqueue, &n.watch);
+
+- spin_lock_bh(&wqueue->lock);
+-
+ if (!hlist_unhashed(&watch->queue_node)) {
+ hlist_del_init_rcu(&watch->queue_node);
+ put_watch(watch);
+ }
+
+- spin_unlock_bh(&wqueue->lock);
++ unlock_wqueue(wqueue);
+ }
+
+ if (wlist->release_watch) {
+@@ -566,7 +607,7 @@ void watch_queue_clear(struct watch_queue *wqueue)
+ rcu_read_lock();
+ spin_lock_bh(&wqueue->lock);
+
+- /* Prevent new additions and prevent notifications from happening */
++ /* Prevent new notifications from being stored. */
+ wqueue->defunct = true;
+
+ while (!hlist_empty(&wqueue->watches)) {
+diff --git a/kernel/watchdog.c b/kernel/watchdog.c
+index ad912511a0c08..1cfa269bd4488 100644
+--- a/kernel/watchdog.c
++++ b/kernel/watchdog.c
+@@ -537,7 +537,7 @@ int lockup_detector_offline_cpu(unsigned int cpu)
+ return 0;
+ }
+
+-static void lockup_detector_reconfigure(void)
++static void __lockup_detector_reconfigure(void)
+ {
+ cpus_read_lock();
+ watchdog_nmi_stop();
+@@ -557,6 +557,13 @@ static void lockup_detector_reconfigure(void)
+ __lockup_detector_cleanup();
+ }
+
++void lockup_detector_reconfigure(void)
++{
++ mutex_lock(&watchdog_mutex);
++ __lockup_detector_reconfigure();
++ mutex_unlock(&watchdog_mutex);
++}
++
+ /*
+ * Create the watchdog infrastructure and configure the detector(s).
+ */
+@@ -573,13 +580,13 @@ static __init void lockup_detector_setup(void)
+ return;
+
+ mutex_lock(&watchdog_mutex);
+- lockup_detector_reconfigure();
++ __lockup_detector_reconfigure();
+ softlockup_initialized = true;
+ mutex_unlock(&watchdog_mutex);
+ }
+
+ #else /* CONFIG_SOFTLOCKUP_DETECTOR */
+-static void lockup_detector_reconfigure(void)
++static void __lockup_detector_reconfigure(void)
+ {
+ cpus_read_lock();
+ watchdog_nmi_stop();
+@@ -587,9 +594,13 @@ static void lockup_detector_reconfigure(void)
+ watchdog_nmi_start();
+ cpus_read_unlock();
+ }
++void lockup_detector_reconfigure(void)
++{
++ __lockup_detector_reconfigure();
++}
+ static inline void lockup_detector_setup(void)
+ {
+- lockup_detector_reconfigure();
++ __lockup_detector_reconfigure();
+ }
+ #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
+
+@@ -629,7 +640,7 @@ static void proc_watchdog_update(void)
+ {
+ /* Remove impossible cpus to keep sysctl output clean. */
+ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
+- lockup_detector_reconfigure();
++ __lockup_detector_reconfigure();
+ }
+
+ /*
+diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
+index 247bf0b1582ca..1e8a49dc956e2 100644
+--- a/kernel/watchdog_hld.c
++++ b/kernel/watchdog_hld.c
+@@ -114,14 +114,14 @@ static void watchdog_overflow_callback(struct perf_event *event,
+ /* Ensure the watchdog never gets throttled */
+ event->hw.interrupts = 0;
+
++ if (!watchdog_check_timestamp())
++ return;
++
+ if (__this_cpu_read(watchdog_nmi_touch) == true) {
+ __this_cpu_write(watchdog_nmi_touch, false);
+ return;
+ }
+
+- if (!watchdog_check_timestamp())
+- return;
+-
+ /* check for a hardlockup
+ * This is done by making sure our timer interrupt
+ * is incrementing. The timer interrupt should have
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 1b3eb1e9531f4..8e108c040cc35 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -375,6 +375,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
+ static int worker_thread(void *__worker);
+ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
+ static void show_pwq(struct pool_workqueue *pwq);
++static void show_one_worker_pool(struct worker_pool *pool);
+
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/workqueue.h>
+@@ -696,12 +697,17 @@ static void clear_work_data(struct work_struct *work)
+ set_work_data(work, WORK_STRUCT_NO_POOL, 0);
+ }
+
++static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
++{
++ return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
++}
++
+ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
+ {
+ unsigned long data = atomic_long_read(&work->data);
+
+ if (data & WORK_STRUCT_PWQ)
+- return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
++ return work_struct_pwq(data);
+ else
+ return NULL;
+ }
+@@ -729,8 +735,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
+ assert_rcu_or_pool_mutex();
+
+ if (data & WORK_STRUCT_PWQ)
+- return ((struct pool_workqueue *)
+- (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
++ return work_struct_pwq(data)->pool;
+
+ pool_id = data >> WORK_OFFQ_POOL_SHIFT;
+ if (pool_id == WORK_OFFQ_POOL_NONE)
+@@ -751,8 +756,7 @@ static int get_work_pool_id(struct work_struct *work)
+ unsigned long data = atomic_long_read(&work->data);
+
+ if (data & WORK_STRUCT_PWQ)
+- return ((struct pool_workqueue *)
+- (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
++ return work_struct_pwq(data)->pool->id;
+
+ return data >> WORK_OFFQ_POOL_SHIFT;
+ }
+@@ -867,8 +871,17 @@ void wq_worker_running(struct task_struct *task)
+
+ if (!worker->sleeping)
+ return;
++
++ /*
++ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
++ * and the nr_running increment below, we may ruin the nr_running reset
++ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
++ * pool. Protect against such race.
++ */
++ preempt_disable();
+ if (!(worker->flags & WORKER_NOT_RUNNING))
+ atomic_inc(&worker->pool->nr_running);
++ preempt_enable();
+ worker->sleeping = 0;
+ }
+
+@@ -3074,10 +3087,8 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
+ if (WARN_ON(!work->func))
+ return false;
+
+- if (!from_cancel) {
+- lock_map_acquire(&work->lockdep_map);
+- lock_map_release(&work->lockdep_map);
+- }
++ lock_map_acquire(&work->lockdep_map);
++ lock_map_release(&work->lockdep_map);
+
+ if (start_flush_work(work, &barr, from_cancel)) {
+ wait_for_completion(&barr.done);
+@@ -4447,7 +4458,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
+ raw_spin_unlock_irq(&pwq->pool->lock);
+ mutex_unlock(&wq->mutex);
+ mutex_unlock(&wq_pool_mutex);
+- show_workqueue_state();
++ show_one_workqueue(wq);
+ return;
+ }
+ raw_spin_unlock_irq(&pwq->pool->lock);
+@@ -4797,97 +4808,120 @@ static void show_pwq(struct pool_workqueue *pwq)
+ }
+
+ /**
+- * show_workqueue_state - dump workqueue state
+- *
+- * Called from a sysrq handler or try_to_freeze_tasks() and prints out
+- * all busy workqueues and pools.
++ * show_one_workqueue - dump state of specified workqueue
++ * @wq: workqueue whose state will be printed
+ */
+-void show_workqueue_state(void)
++void show_one_workqueue(struct workqueue_struct *wq)
+ {
+- struct workqueue_struct *wq;
+- struct worker_pool *pool;
++ struct pool_workqueue *pwq;
++ bool idle = true;
+ unsigned long flags;
+- int pi;
+-
+- rcu_read_lock();
+
+- pr_info("Showing busy workqueues and worker pools:\n");
+-
+- list_for_each_entry_rcu(wq, &workqueues, list) {
+- struct pool_workqueue *pwq;
+- bool idle = true;
+-
+- for_each_pwq(pwq, wq) {
+- if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+- idle = false;
+- break;
+- }
++ for_each_pwq(pwq, wq) {
++ if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
++ idle = false;
++ break;
+ }
+- if (idle)
+- continue;
++ }
++ if (idle) /* Nothing to print for idle workqueue */
++ return;
+
+- pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
++ pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+
+- for_each_pwq(pwq, wq) {
+- raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+- if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+- /*
+- * Defer printing to avoid deadlocks in console
+- * drivers that queue work while holding locks
+- * also taken in their write paths.
+- */
+- printk_deferred_enter();
+- show_pwq(pwq);
+- printk_deferred_exit();
+- }
+- raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
++ for_each_pwq(pwq, wq) {
++ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
++ if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+ /*
+- * We could be printing a lot from atomic context, e.g.
+- * sysrq-t -> show_workqueue_state(). Avoid triggering
+- * hard lockup.
++ * Defer printing to avoid deadlocks in console
++ * drivers that queue work while holding locks
++ * also taken in their write paths.
+ */
+- touch_nmi_watchdog();
+- }
+- }
+-
+- for_each_pool(pool, pi) {
+- struct worker *worker;
+- bool first = true;
+-
+- raw_spin_lock_irqsave(&pool->lock, flags);
+- if (pool->nr_workers == pool->nr_idle)
+- goto next_pool;
+- /*
+- * Defer printing to avoid deadlocks in console drivers that
+- * queue work while holding locks also taken in their write
+- * paths.
+- */
+- printk_deferred_enter();
+- pr_info("pool %d:", pool->id);
+- pr_cont_pool_info(pool);
+- pr_cont(" hung=%us workers=%d",
+- jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+- pool->nr_workers);
+- if (pool->manager)
+- pr_cont(" manager: %d",
+- task_pid_nr(pool->manager->task));
+- list_for_each_entry(worker, &pool->idle_list, entry) {
+- pr_cont(" %s%d", first ? "idle: " : "",
+- task_pid_nr(worker->task));
+- first = false;
++ printk_deferred_enter();
++ show_pwq(pwq);
++ printk_deferred_exit();
+ }
+- pr_cont("\n");
+- printk_deferred_exit();
+- next_pool:
+- raw_spin_unlock_irqrestore(&pool->lock, flags);
++ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+- * sysrq-t -> show_workqueue_state(). Avoid triggering
++ * sysrq-t -> show_all_workqueues(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
+ }
+
++}
++
++/**
++ * show_one_worker_pool - dump state of specified worker pool
++ * @pool: worker pool whose state will be printed
++ */
++static void show_one_worker_pool(struct worker_pool *pool)
++{
++ struct worker *worker;
++ bool first = true;
++ unsigned long flags;
++ unsigned long hung = 0;
++
++ raw_spin_lock_irqsave(&pool->lock, flags);
++ if (pool->nr_workers == pool->nr_idle)
++ goto next_pool;
++
++ /* How long the first pending work is waiting for a worker. */
++ if (!list_empty(&pool->worklist))
++ hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000;
++
++ /*
++ * Defer printing to avoid deadlocks in console drivers that
++ * queue work while holding locks also taken in their write
++ * paths.
++ */
++ printk_deferred_enter();
++ pr_info("pool %d:", pool->id);
++ pr_cont_pool_info(pool);
++ pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers);
++ if (pool->manager)
++ pr_cont(" manager: %d",
++ task_pid_nr(pool->manager->task));
++ list_for_each_entry(worker, &pool->idle_list, entry) {
++ pr_cont(" %s%d", first ? "idle: " : "",
++ task_pid_nr(worker->task));
++ first = false;
++ }
++ pr_cont("\n");
++ printk_deferred_exit();
++next_pool:
++ raw_spin_unlock_irqrestore(&pool->lock, flags);
++ /*
++ * We could be printing a lot from atomic context, e.g.
++ * sysrq-t -> show_all_workqueues(). Avoid triggering
++ * hard lockup.
++ */
++ touch_nmi_watchdog();
++
++}
++
++/**
++ * show_all_workqueues - dump workqueue state
++ *
++ * Called from a sysrq handler or try_to_freeze_tasks() and prints out
++ * all busy workqueues and pools.
++ */
++void show_all_workqueues(void)
++{
++ struct workqueue_struct *wq;
++ struct worker_pool *pool;
++ int pi;
++
++ rcu_read_lock();
++
++ pr_info("Showing busy workqueues and worker pools:\n");
++
++ list_for_each_entry_rcu(wq, &workqueues, list)
++ show_one_workqueue(wq);
++
++ for_each_pool(pool, pi)
++ show_one_worker_pool(pool);
++
+ rcu_read_unlock();
+ }
+
+@@ -5384,9 +5418,6 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
+ int ret = -EINVAL;
+ cpumask_var_t saved_cpumask;
+
+- if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
+- return -ENOMEM;
+-
+ /*
+ * Not excluding isolated cpus on purpose.
+ * If the user wishes to include them, we allow that.
+@@ -5394,6 +5425,15 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
+ cpumask_and(cpumask, cpumask, cpu_possible_mask);
+ if (!cpumask_empty(cpumask)) {
+ apply_wqattrs_lock();
++ if (cpumask_equal(cpumask, wq_unbound_cpumask)) {
++ ret = 0;
++ goto out_unlock;
++ }
++
++ if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) {
++ ret = -ENOMEM;
++ goto out_unlock;
++ }
+
+ /* save the old wq_unbound_cpumask. */
+ cpumask_copy(saved_cpumask, wq_unbound_cpumask);
+@@ -5406,10 +5446,11 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
+ if (ret < 0)
+ cpumask_copy(wq_unbound_cpumask, saved_cpumask);
+
++ free_cpumask_var(saved_cpumask);
++out_unlock:
+ apply_wqattrs_unlock();
+ }
+
+- free_cpumask_var(saved_cpumask);
+ return ret;
+ }
+
+@@ -5869,7 +5910,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
+ rcu_read_unlock();
+
+ if (lockup_detected)
+- show_workqueue_state();
++ show_all_workqueues();
+
+ wq_watchdog_reset_touched();
+ mod_timer(&wq_watchdog_timer, jiffies + thresh);
+diff --git a/lib/Kconfig b/lib/Kconfig
+index 5e7165e6a346c..baa977e003b76 100644
+--- a/lib/Kconfig
++++ b/lib/Kconfig
+@@ -45,7 +45,6 @@ config BITREVERSE
+ config HAVE_ARCH_BITREVERSE
+ bool
+ default n
+- depends on BITREVERSE
+ help
+ This option enables the use of hardware bit-reversal instructions on
+ architectures which support such operations.
+@@ -122,6 +121,11 @@ config INDIRECT_IOMEM_FALLBACK
+ mmio accesses when the IO memory address is not a registered
+ emulated region.
+
++source "lib/crypto/Kconfig"
++
++config LIB_MEMNEQ
++ bool
++
+ config CRC_CCITT
+ tristate "CRC-CCITT functions"
+ help
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index 2a9b6dcdac4ff..dbbd243c865f0 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -208,6 +208,11 @@ config DEBUG_BUGVERBOSE
+
+ endmenu # "printk and dmesg options"
+
++# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which
++# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
++config AS_HAS_NON_CONST_LEB128
++ def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:)
++
+ menu "Compile-time checks and compiler options"
+
+ config DEBUG_INFO
+@@ -274,6 +279,7 @@ choice
+
+ config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
+ bool "Rely on the toolchain's implicit default DWARF version"
++ depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
+ help
+ The implicit default version of DWARF debug info produced by a
+ toolchain changes over time.
+@@ -295,8 +301,8 @@ config DEBUG_INFO_DWARF4
+
+ config DEBUG_INFO_DWARF5
+ bool "Generate DWARF Version 5 debuginfo"
+- depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502)))
+- depends on !DEBUG_INFO_BTF
++ depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
++ depends on !DEBUG_INFO_BTF || PAHOLE_VERSION >= 121
+ help
+ Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
+ 5.0+ accepts the -gdwarf-5 flag but only had partial support for some
+@@ -322,7 +328,7 @@ config DEBUG_INFO_BTF
+ DWARF type info into equivalent deduplicated BTF type info.
+
+ config PAHOLE_HAS_SPLIT_BTF
+- def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
++ def_bool PAHOLE_VERSION >= 119
+
+ config DEBUG_INFO_BTF_MODULES
+ def_bool y
+@@ -346,8 +352,10 @@ config FRAME_WARN
+ int "Warn for stack frames larger than"
+ range 0 8192
+ default 2048 if GCC_PLUGIN_LATENT_ENTROPY
+- default 1536 if (!64BIT && (PARISC || XTENSA))
+- default 1024 if (!64BIT && !PARISC)
++ default 2048 if PARISC
++ default 1536 if (!64BIT && XTENSA)
++ default 1280 if KASAN && !64BIT
++ default 1024 if !64BIT
+ default 2048 if 64BIT
+ help
+ Tell gcc to warn at build time for stack frames larger than this.
+@@ -414,7 +422,8 @@ config SECTION_MISMATCH_WARN_ONLY
+ If unsure, say Y.
+
+ config DEBUG_FORCE_FUNCTION_ALIGN_64B
+- bool "Force all function address 64B aligned" if EXPERT
++ bool "Force all function address 64B aligned"
++ depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC)
+ help
+ There are cases that a commit from one domain changes the function
+ address alignment of other domains, and cause magic performance
+@@ -1558,8 +1567,7 @@ config WARN_ALL_UNSEEDED_RANDOM
+ so architecture maintainers really need to do what they can
+ to get the CRNG seeded sooner after the system is booted.
+ However, since users cannot do anything actionable to
+- address this, by default the kernel will issue only a single
+- warning for the first use of unseeded randomness.
++ address this, by default this option is disabled.
+
+ Say Y here if you want to receive warnings for all uses of
+ unseeded randomness. This will be of use primarily for
+@@ -1866,8 +1874,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT
+ If unsure, say N.
+
+ config FUNCTION_ERROR_INJECTION
+- def_bool y
++ bool "Fault-injections of functions"
+ depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
++ help
++ Add fault injections into various functions that are annotated with
++ ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return
++ value of theses functions. This is useful to test error paths of code.
++
++ If unsure, say N
+
+ config FAULT_INJECTION
+ bool "Fault-injection framework"
+diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence
+index e641add339475..912f252a41fc6 100644
+--- a/lib/Kconfig.kfence
++++ b/lib/Kconfig.kfence
+@@ -25,17 +25,6 @@ menuconfig KFENCE
+
+ if KFENCE
+
+-config KFENCE_STATIC_KEYS
+- bool "Use static keys to set up allocations"
+- default y
+- depends on JUMP_LABEL # To ensure performance, require jump labels
+- help
+- Use static keys (static branches) to set up KFENCE allocations. Using
+- static keys is normally recommended, because it avoids a dynamic
+- branch in the allocator's fast path. However, with very low sample
+- intervals, or on systems that do not support jump labels, a dynamic
+- branch may still be an acceptable performance trade-off.
+-
+ config KFENCE_SAMPLE_INTERVAL
+ int "Default sample interval in milliseconds"
+ default 100
+@@ -56,6 +45,21 @@ config KFENCE_NUM_OBJECTS
+ pages are required; with one containing the object and two adjacent
+ ones used as guard pages.
+
++config KFENCE_STATIC_KEYS
++ bool "Use static keys to set up allocations" if EXPERT
++ depends on JUMP_LABEL
++ help
++ Use static keys (static branches) to set up KFENCE allocations. This
++ option is only recommended when using very large sample intervals, or
++ performance has carefully been evaluated with this option.
++
++ Using static keys comes with trade-offs that need to be carefully
++ evaluated given target workloads and system architectures. Notably,
++ enabling and disabling static keys invoke IPI broadcasts, the latency
++ and impact of which is much harder to predict than a dynamic branch.
++
++ Say N if you are unsure.
++
+ config KFENCE_STRESS_TEST_FAULTS
+ int "Stress testing of fault handling and error reporting" if EXPERT
+ default 0
+diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
+index e5372a13511df..236c5cefc4cc5 100644
+--- a/lib/Kconfig.ubsan
++++ b/lib/Kconfig.ubsan
+@@ -112,19 +112,6 @@ config UBSAN_UNREACHABLE
+ This option enables -fsanitize=unreachable which checks for control
+ flow reaching an expected-to-be-unreachable position.
+
+-config UBSAN_OBJECT_SIZE
+- bool "Perform checking for accesses beyond the end of objects"
+- default UBSAN
+- # gcc hugely expands stack usage with -fsanitize=object-size
+- # https://lore.kernel.org/lkml/CAHk-=wjPasyJrDuwDnpHJS2TuQfExwe=px-SzLeN8GFMAQJPmQ@mail.gmail.com/
+- depends on !CC_IS_GCC
+- depends on $(cc-option,-fsanitize=object-size)
+- help
+- This option enables -fsanitize=object-size which checks for accesses
+- beyond the end of objects where the optimizer can determine both the
+- object being operated on and its size, usually seen with bad downcasts,
+- or access to struct members from NULL pointers.
+-
+ config UBSAN_BOOL
+ bool "Perform checking for non-boolean values used as boolean"
+ default UBSAN
+diff --git a/lib/Makefile b/lib/Makefile
+index a841be5244ac6..0868cb67e5b0e 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -249,6 +249,7 @@ obj-$(CONFIG_DIMLIB) += dim/
+ obj-$(CONFIG_SIGNATURE) += digsig.o
+
+ lib-$(CONFIG_CLZ_TAB) += clz_tab.o
++lib-$(CONFIG_LIB_MEMNEQ) += memneq.o
+
+ obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o
+ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
+@@ -275,7 +276,7 @@ $(foreach file, $(libfdt_files), \
+ $(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt))
+ lib-$(CONFIG_LIBFDT) += $(libfdt_files)
+
+-lib-$(CONFIG_BOOT_CONFIG) += bootconfig.o
++obj-$(CONFIG_BOOT_CONFIG) += bootconfig.o
+
+ obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
+ obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
+diff --git a/lib/assoc_array.c b/lib/assoc_array.c
+index 04c98799c3baf..70304b8f15ace 100644
+--- a/lib/assoc_array.c
++++ b/lib/assoc_array.c
+@@ -1462,6 +1462,7 @@ int assoc_array_gc(struct assoc_array *array,
+ struct assoc_array_ptr *cursor, *ptr;
+ struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+ unsigned long nr_leaves_on_tree;
++ bool retained;
+ int keylen, slot, nr_free, next_slot, i;
+
+ pr_devel("-->%s()\n", __func__);
+@@ -1538,6 +1539,7 @@ continue_node:
+ goto descend;
+ }
+
++retry_compress:
+ pr_devel("-- compress node %p --\n", new_n);
+
+ /* Count up the number of empty slots in this node and work out the
+@@ -1555,6 +1557,7 @@ continue_node:
+ pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+
+ /* See what we can fold in */
++ retained = false;
+ next_slot = 0;
+ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ struct assoc_array_shortcut *s;
+@@ -1604,9 +1607,14 @@ continue_node:
+ pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+ slot, child->nr_leaves_on_branch, nr_free + 1,
+ next_slot);
++ retained = true;
+ }
+ }
+
++ if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
++ pr_devel("internal nodes remain despite enough space, retrying\n");
++ goto retry_compress;
++ }
+ pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+
+ nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c
+index 0d3a686b5ba29..fb8c0c5c2bd27 100644
+--- a/lib/clz_ctz.c
++++ b/lib/clz_ctz.c
+@@ -28,36 +28,16 @@ int __weak __clzsi2(int val)
+ }
+ EXPORT_SYMBOL(__clzsi2);
+
+-int __weak __clzdi2(long val);
+-int __weak __ctzdi2(long val);
+-#if BITS_PER_LONG == 32
+-
+-int __weak __clzdi2(long val)
++int __weak __clzdi2(u64 val);
++int __weak __clzdi2(u64 val)
+ {
+- return 32 - fls((int)val);
++ return 64 - fls64(val);
+ }
+ EXPORT_SYMBOL(__clzdi2);
+
+-int __weak __ctzdi2(long val)
++int __weak __ctzdi2(u64 val);
++int __weak __ctzdi2(u64 val)
+ {
+- return __ffs((u32)val);
++ return __ffs64(val);
+ }
+ EXPORT_SYMBOL(__ctzdi2);
+-
+-#elif BITS_PER_LONG == 64
+-
+-int __weak __clzdi2(long val)
+-{
+- return 64 - fls64((u64)val);
+-}
+-EXPORT_SYMBOL(__clzdi2);
+-
+-int __weak __ctzdi2(long val)
+-{
+- return __ffs64((u64)val);
+-}
+-EXPORT_SYMBOL(__ctzdi2);
+-
+-#else
+-#error BITS_PER_LONG not 32 or 64
+-#endif
+diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
+index f08d9c56f712e..1833ad73de6fc 100644
+--- a/lib/cpu_rmap.c
++++ b/lib/cpu_rmap.c
+@@ -232,7 +232,8 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
+
+ for (index = 0; index < rmap->used; index++) {
+ glue = rmap->obj[index];
+- irq_set_affinity_notifier(glue->notify.irq, NULL);
++ if (glue)
++ irq_set_affinity_notifier(glue->notify.irq, NULL);
+ }
+
+ cpu_rmap_put(rmap);
+@@ -267,6 +268,7 @@ static void irq_cpu_rmap_release(struct kref *ref)
+ struct irq_glue *glue =
+ container_of(ref, struct irq_glue, notify.kref);
+
++ glue->rmap->obj[glue->index] = NULL;
+ cpu_rmap_put(glue->rmap);
+ kfree(glue);
+ }
+@@ -297,6 +299,7 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
+ rc = irq_set_affinity_notifier(irq, &glue->notify);
+ if (rc) {
+ cpu_rmap_put(glue->rmap);
++ rmap->obj[glue->index] = NULL;
+ kfree(glue);
+ }
+ return rc;
+diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
+index 545ccbddf6a1d..a29eff4f969e3 100644
+--- a/lib/crypto/Kconfig
++++ b/lib/crypto/Kconfig
+@@ -1,6 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0
+
+-comment "Crypto library routines"
++menu "Crypto library routines"
+
+ config CRYPTO_LIB_AES
+ tristate
+@@ -9,14 +9,14 @@ config CRYPTO_LIB_ARC4
+ tristate
+
+ config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+- tristate
++ bool
+ help
+ Declares whether the architecture provides an arch-specific
+ accelerated implementation of the Blake2s library interface,
+ either builtin or as a module.
+
+ config CRYPTO_LIB_BLAKE2S_GENERIC
+- tristate
++ def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+ help
+ This symbol can be depended upon by arch implementations of the
+ Blake2s library interface that require the generic code as a
+@@ -24,15 +24,6 @@ config CRYPTO_LIB_BLAKE2S_GENERIC
+ implementation is enabled, this implementation serves the users
+ of CRYPTO_LIB_BLAKE2S.
+
+-config CRYPTO_LIB_BLAKE2S
+- tristate "BLAKE2s hash function library"
+- depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+- select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n
+- help
+- Enable the Blake2s library interface. This interface may be fulfilled
+- by either the generic implementation or an arch-specific one, if one
+- is available and enabled.
+-
+ config CRYPTO_ARCH_HAVE_LIB_CHACHA
+ tristate
+ help
+@@ -42,7 +33,6 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+ config CRYPTO_LIB_CHACHA_GENERIC
+ tristate
+- select CRYPTO_ALGAPI
+ help
+ This symbol can be depended upon by arch implementations of the
+ ChaCha library interface that require the generic code as a
+@@ -52,6 +42,7 @@ config CRYPTO_LIB_CHACHA_GENERIC
+
+ config CRYPTO_LIB_CHACHA
+ tristate "ChaCha library interface"
++ depends on CRYPTO
+ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
+ help
+@@ -79,6 +70,7 @@ config CRYPTO_LIB_CURVE25519
+ tristate "Curve25519 scalar multiplication library"
+ depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
+ select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
++ select LIB_MEMNEQ
+ help
+ Enable the Curve25519 library interface. This interface may be
+ fulfilled by either the generic implementation or an arch-specific
+@@ -123,11 +115,15 @@ config CRYPTO_LIB_CHACHA20POLY1305
+ tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)"
+ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
+ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
++ depends on CRYPTO
+ select CRYPTO_LIB_CHACHA
+ select CRYPTO_LIB_POLY1305
++ select CRYPTO_ALGAPI
+
+ config CRYPTO_LIB_SHA256
+ tristate
+
+ config CRYPTO_LIB_SM4
+ tristate
++
++endmenu
+diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
+index 73205ed269bad..ed43a41f2dcc8 100644
+--- a/lib/crypto/Makefile
++++ b/lib/crypto/Makefile
+@@ -10,11 +10,10 @@ libaes-y := aes.o
+ obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
+ libarc4-y := arc4.o
+
+-obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o
+-libblake2s-generic-y += blake2s-generic.o
+-
+-obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o
+-libblake2s-y += blake2s.o
++# blake2s is used by the /dev/random driver which is always builtin
++obj-y += libblake2s.o
++libblake2s-y := blake2s.o
++libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o
+
+ obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o
+ libchacha20poly1305-y += chacha20poly1305.o
+diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c
+index 04ff8df245136..75ccb3e633e65 100644
+--- a/lib/crypto/blake2s-generic.c
++++ b/lib/crypto/blake2s-generic.c
+@@ -37,7 +37,11 @@ static inline void blake2s_increment_counter(struct blake2s_state *state,
+ state->t[1] += (state->t[0] < inc);
+ }
+
+-void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
++void blake2s_compress(struct blake2s_state *state, const u8 *block,
++ size_t nblocks, const u32 inc)
++ __weak __alias(blake2s_compress_generic);
++
++void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
+ size_t nblocks, const u32 inc)
+ {
+ u32 m[16];
+diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
+index 5d9ea53be9736..7d77dea155873 100644
+--- a/lib/crypto/blake2s-selftest.c
++++ b/lib/crypto/blake2s-selftest.c
+@@ -4,6 +4,8 @@
+ */
+
+ #include <crypto/internal/blake2s.h>
++#include <linux/kernel.h>
++#include <linux/random.h>
+ #include <linux/string.h>
+
+ /*
+@@ -15,7 +17,6 @@
+ * #include <stdio.h>
+ *
+ * #include <openssl/evp.h>
+- * #include <openssl/hmac.h>
+ *
+ * #define BLAKE2S_TESTVEC_COUNT 256
+ *
+@@ -58,16 +59,6 @@
+ * }
+ * printf("};\n\n");
+ *
+- * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n");
+- *
+- * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL);
+- * print_vec(hash, BLAKE2S_OUTBYTES);
+- *
+- * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL);
+- * print_vec(hash, BLAKE2S_OUTBYTES);
+- *
+- * printf("};\n");
+- *
+ * return 0;
+ *}
+ */
+@@ -554,15 +545,6 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
+ 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, },
+ };
+
+-static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
+- { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70,
+- 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79,
+- 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, },
+- { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9,
+- 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f,
+- 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, },
+-};
+-
+ bool __init blake2s_selftest(void)
+ {
+ u8 key[BLAKE2S_KEY_SIZE];
+@@ -607,15 +589,43 @@ bool __init blake2s_selftest(void)
+ }
+ }
+
+- if (success) {
+- blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key));
+- success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE);
++ for (i = 0; i < 32; ++i) {
++ enum { TEST_ALIGNMENT = 16 };
++ u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1]
++ __aligned(TEST_ALIGNMENT);
++ u8 blocks[BLAKE2S_BLOCK_SIZE * 2];
++ struct blake2s_state state1, state2;
+
+- blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf));
+- success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE);
++ get_random_bytes(blocks, sizeof(blocks));
++ get_random_bytes(&state, sizeof(state));
+
+- if (!success)
+- pr_err("blake2s256_hmac self-test: FAIL\n");
++#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \
++ defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)
++ memcpy(&state1, &state, sizeof(state1));
++ memcpy(&state2, &state, sizeof(state2));
++ blake2s_compress(&state1, blocks, 2, BLAKE2S_BLOCK_SIZE);
++ blake2s_compress_generic(&state2, blocks, 2, BLAKE2S_BLOCK_SIZE);
++ if (memcmp(&state1, &state2, sizeof(state1))) {
++ pr_err("blake2s random compress self-test %d: FAIL\n",
++ i + 1);
++ success = false;
++ }
++#endif
++
++ memcpy(&state1, &state, sizeof(state1));
++ blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE);
++ for (l = 1; l < TEST_ALIGNMENT; ++l) {
++ memcpy(unaligned_block + l, blocks,
++ BLAKE2S_BLOCK_SIZE);
++ memcpy(&state2, &state, sizeof(state2));
++ blake2s_compress(&state2, unaligned_block + l, 1,
++ BLAKE2S_BLOCK_SIZE);
++ if (memcmp(&state1, &state2, sizeof(state1))) {
++ pr_err("blake2s random compress align %d self-test %d: FAIL\n",
++ l, i + 1);
++ success = false;
++ }
++ }
+ }
+
+ return success;
+diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
+index 4055aa593ec49..98e688c6d8910 100644
+--- a/lib/crypto/blake2s.c
++++ b/lib/crypto/blake2s.c
+@@ -16,63 +16,48 @@
+ #include <linux/init.h>
+ #include <linux/bug.h>
+
+-#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)
+-# define blake2s_compress blake2s_compress_arch
+-#else
+-# define blake2s_compress blake2s_compress_generic
+-#endif
++static inline void blake2s_set_lastblock(struct blake2s_state *state)
++{
++ state->f[0] = -1;
++}
+
+ void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+ {
+- __blake2s_update(state, in, inlen, blake2s_compress);
++ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
++
++ if (unlikely(!inlen))
++ return;
++ if (inlen > fill) {
++ memcpy(state->buf + state->buflen, in, fill);
++ blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
++ state->buflen = 0;
++ in += fill;
++ inlen -= fill;
++ }
++ if (inlen > BLAKE2S_BLOCK_SIZE) {
++ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
++ blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
++ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++ }
++ memcpy(state->buf + state->buflen, in, inlen);
++ state->buflen += inlen;
+ }
+ EXPORT_SYMBOL(blake2s_update);
+
+ void blake2s_final(struct blake2s_state *state, u8 *out)
+ {
+ WARN_ON(IS_ENABLED(DEBUG) && !out);
+- __blake2s_final(state, out, blake2s_compress);
++ blake2s_set_lastblock(state);
++ memset(state->buf + state->buflen, 0,
++ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
++ blake2s_compress(state, state->buf, 1, state->buflen);
++ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
++ memcpy(out, state->h, state->outlen);
+ memzero_explicit(state, sizeof(*state));
+ }
+ EXPORT_SYMBOL(blake2s_final);
+
+-void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
+- const size_t keylen)
+-{
+- struct blake2s_state state;
+- u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
+- u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
+- int i;
+-
+- if (keylen > BLAKE2S_BLOCK_SIZE) {
+- blake2s_init(&state, BLAKE2S_HASH_SIZE);
+- blake2s_update(&state, key, keylen);
+- blake2s_final(&state, x_key);
+- } else
+- memcpy(x_key, key, keylen);
+-
+- for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
+- x_key[i] ^= 0x36;
+-
+- blake2s_init(&state, BLAKE2S_HASH_SIZE);
+- blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
+- blake2s_update(&state, in, inlen);
+- blake2s_final(&state, i_hash);
+-
+- for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
+- x_key[i] ^= 0x5c ^ 0x36;
+-
+- blake2s_init(&state, BLAKE2S_HASH_SIZE);
+- blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
+- blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
+- blake2s_final(&state, i_hash);
+-
+- memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
+- memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
+- memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
+-}
+-EXPORT_SYMBOL(blake2s256_hmac);
+-
+ static int __init blake2s_mod_init(void)
+ {
+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
+@@ -81,12 +66,7 @@ static int __init blake2s_mod_init(void)
+ return 0;
+ }
+
+-static void __exit blake2s_mod_exit(void)
+-{
+-}
+-
+ module_init(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+ MODULE_LICENSE("GPL v2");
+ MODULE_DESCRIPTION("BLAKE2s hash function");
+ MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c
+index 633b59fed9db8..284e62576d0c6 100644
+--- a/lib/crypto/sm4.c
++++ b/lib/crypto/sm4.c
+@@ -15,7 +15,7 @@ static const u32 fk[4] = {
+ 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+ };
+
+-static const u32 __cacheline_aligned ck[32] = {
++static const u32 ____cacheline_aligned ck[32] = {
+ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+ 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+ 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+@@ -26,7 +26,7 @@ static const u32 __cacheline_aligned ck[32] = {
+ 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+ };
+
+-static const u8 __cacheline_aligned sbox[256] = {
++static const u8 ____cacheline_aligned sbox[256] = {
+ 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+ 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+ 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 6946f8e204e39..579406c1e9ed9 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -129,7 +129,7 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
+
+ static void fill_pool(void)
+ {
+- gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
++ gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
+ struct debug_obj *obj;
+ unsigned long flags;
+
+@@ -219,10 +219,6 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
+ return obj;
+ }
+
+-/*
+- * Allocate a new object. If the pool is empty, switch off the debugger.
+- * Must be called with interrupts disabled.
+- */
+ static struct debug_obj *
+ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
+ {
+@@ -440,6 +436,7 @@ static int object_cpu_offline(unsigned int cpu)
+ struct debug_percpu_free *percpu_pool;
+ struct hlist_node *tmp;
+ struct debug_obj *obj;
++ unsigned long flags;
+
+ /* Remote access is safe as the CPU is dead already */
+ percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu);
+@@ -447,6 +444,12 @@ static int object_cpu_offline(unsigned int cpu)
+ hlist_del(&obj->node);
+ kmem_cache_free(obj_cache, obj);
+ }
++
++ raw_spin_lock_irqsave(&pool_lock, flags);
++ obj_pool_used -= percpu_pool->obj_free;
++ debug_objects_freed += percpu_pool->obj_free;
++ raw_spin_unlock_irqrestore(&pool_lock, flags);
++
+ percpu_pool->obj_free = 0;
+
+ return 0;
+@@ -548,11 +551,49 @@ static void debug_object_is_on_stack(void *addr, int onstack)
+ WARN_ON(1);
+ }
+
++static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
++ const struct debug_obj_descr *descr,
++ bool onstack, bool alloc_ifstatic)
++{
++ struct debug_obj *obj = lookup_object(addr, b);
++ enum debug_obj_state state = ODEBUG_STATE_NONE;
++
++ if (likely(obj))
++ return obj;
++
++ /*
++ * debug_object_init() unconditionally allocates untracked
++ * objects. It does not matter whether it is a static object or
++ * not.
++ *
++ * debug_object_assert_init() and debug_object_activate() allow
++ * allocation only if the descriptor callback confirms that the
++ * object is static and considered initialized. For non-static
++ * objects the allocation needs to be done from the fixup callback.
++ */
++ if (unlikely(alloc_ifstatic)) {
++ if (!descr->is_static_object || !descr->is_static_object(addr))
++ return ERR_PTR(-ENOENT);
++ /* Statically allocated objects are considered initialized */
++ state = ODEBUG_STATE_INIT;
++ }
++
++ obj = alloc_object(addr, b, descr);
++ if (likely(obj)) {
++ obj->state = state;
++ debug_object_is_on_stack(addr, onstack);
++ return obj;
++ }
++
++ /* Out of memory. Do the cleanup outside of the locked region */
++ debug_objects_enabled = 0;
++ return NULL;
++}
++
+ static void
+ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
+ {
+ enum debug_obj_state state;
+- bool check_stack = false;
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+@@ -568,16 +609,11 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+- obj = lookup_object(addr, db);
+- if (!obj) {
+- obj = alloc_object(addr, db, descr);
+- if (!obj) {
+- debug_objects_enabled = 0;
+- raw_spin_unlock_irqrestore(&db->lock, flags);
+- debug_objects_oom();
+- return;
+- }
+- check_stack = true;
++ obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
++ if (unlikely(!obj)) {
++ raw_spin_unlock_irqrestore(&db->lock, flags);
++ debug_objects_oom();
++ return;
+ }
+
+ switch (obj->state) {
+@@ -603,8 +639,6 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+- if (check_stack)
+- debug_object_is_on_stack(addr, onstack);
+ }
+
+ /**
+@@ -644,14 +678,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_stack);
+ */
+ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
+ {
++ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
+ enum debug_obj_state state;
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+ int ret;
+- struct debug_obj o = { .object = addr,
+- .state = ODEBUG_STATE_NOTAVAILABLE,
+- .descr = descr };
+
+ if (!debug_objects_enabled)
+ return 0;
+@@ -660,8 +692,8 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+- obj = lookup_object(addr, db);
+- if (obj) {
++ obj = lookup_object_or_alloc(addr, db, descr, false, true);
++ if (likely(!IS_ERR_OR_NULL(obj))) {
+ bool print_object = false;
+
+ switch (obj->state) {
+@@ -694,24 +726,16 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+
+- /*
+- * We are here when a static object is activated. We
+- * let the type specific code confirm whether this is
+- * true or not. if true, we just make sure that the
+- * static object is tracked in the object tracker. If
+- * not, this must be a bug, so we try to fix it up.
+- */
+- if (descr->is_static_object && descr->is_static_object(addr)) {
+- /* track this static object */
+- debug_object_init(addr, descr);
+- debug_object_activate(addr, descr);
+- } else {
+- debug_print_object(&o, "activate");
+- ret = debug_object_fixup(descr->fixup_activate, addr,
+- ODEBUG_STATE_NOTAVAILABLE);
+- return ret ? 0 : -EINVAL;
++ /* If NULL the allocation has hit OOM */
++ if (!obj) {
++ debug_objects_oom();
++ return 0;
+ }
+- return 0;
++
++ /* Object is neither static nor tracked. It's not initialized */
++ debug_print_object(&o, "activate");
++ ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
++ return ret ? 0 : -EINVAL;
+ }
+ EXPORT_SYMBOL_GPL(debug_object_activate);
+
+@@ -865,6 +889,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
+ */
+ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
+ {
++ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+@@ -875,31 +900,20 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
++ obj = lookup_object_or_alloc(addr, db, descr, false, true);
++ raw_spin_unlock_irqrestore(&db->lock, flags);
++ if (likely(!IS_ERR_OR_NULL(obj)))
++ return;
+
+- obj = lookup_object(addr, db);
++ /* If NULL the allocation has hit OOM */
+ if (!obj) {
+- struct debug_obj o = { .object = addr,
+- .state = ODEBUG_STATE_NOTAVAILABLE,
+- .descr = descr };
+-
+- raw_spin_unlock_irqrestore(&db->lock, flags);
+- /*
+- * Maybe the object is static, and we let the type specific
+- * code confirm. Track this static object if true, else invoke
+- * fixup.
+- */
+- if (descr->is_static_object && descr->is_static_object(addr)) {
+- /* Track this static object */
+- debug_object_init(addr, descr);
+- } else {
+- debug_print_object(&o, "assert_init");
+- debug_object_fixup(descr->fixup_assert_init, addr,
+- ODEBUG_STATE_NOTAVAILABLE);
+- }
++ debug_objects_oom();
+ return;
+ }
+
+- raw_spin_unlock_irqrestore(&db->lock, flags);
++ /* Object is neither tracked nor static. It's not initialized. */
++ debug_print_object(&o, "assert_init");
++ debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
+ }
+ EXPORT_SYMBOL_GPL(debug_object_assert_init);
+
+@@ -1321,6 +1335,8 @@ static int __init debug_objects_replace_static_objects(void)
+ hlist_add_head(&obj->node, &objects);
+ }
+
++ debug_objects_allocated += i;
++
+ /*
+ * debug_objects_mem_init() is now called early that only one CPU is up
+ * and interrupts have been disabled, so it is safe to replace the
+@@ -1389,6 +1405,7 @@ void __init debug_objects_mem_init(void)
+ debug_objects_enabled = 0;
+ kmem_cache_destroy(obj_cache);
+ pr_warn("out of memory.\n");
++ return;
+ } else
+ debug_objects_selftest();
+
+diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
+index a2f38e23004aa..f7a3dc13316a3 100644
+--- a/lib/decompress_unxz.c
++++ b/lib/decompress_unxz.c
+@@ -167,7 +167,7 @@
+ * memeq and memzero are not used much and any remotely sane implementation
+ * is fast enough. memcpy/memmove speed matters in multi-call mode, but
+ * the kernel image is decompressed in single-call mode, in which only
+- * memcpy speed can matter and only if there is a lot of uncompressible data
++ * memmove speed can matter and only if there is a lot of uncompressible data
+ * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
+ * functions below should just be kept small; it's probably not worth
+ * optimizing for speed.
+diff --git a/lib/dim/dim.c b/lib/dim/dim.c
+index 38045d6d05381..e89aaf07bde50 100644
+--- a/lib/dim/dim.c
++++ b/lib/dim/dim.c
+@@ -54,7 +54,7 @@ void dim_park_tired(struct dim *dim)
+ }
+ EXPORT_SYMBOL(dim_park_tired);
+
+-void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
++bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+ struct dim_stats *curr_stats)
+ {
+ /* u32 holds up to 71 minutes, should be enough */
+@@ -66,7 +66,7 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+ start->comp_ctr);
+
+ if (!delta_us)
+- return;
++ return false;
+
+ curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+ curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+@@ -79,5 +79,6 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+ else
+ curr_stats->cpe_ratio = 0;
+
++ return true;
+ }
+ EXPORT_SYMBOL(dim_calc_stats);
+diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
+index 06811d866775c..4e32f7aaac86c 100644
+--- a/lib/dim/net_dim.c
++++ b/lib/dim/net_dim.c
+@@ -12,41 +12,41 @@
+ * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
+ */
+ #define NET_DIM_PARAMS_NUM_PROFILES 5
+-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
++#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
++#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
+ #define NET_DIM_DEF_PROFILE_CQE 1
+ #define NET_DIM_DEF_PROFILE_EQE 1
+
+ #define NET_DIM_RX_EQE_PROFILES { \
+- {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
++ {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \
+ }
+
+ #define NET_DIM_RX_CQE_PROFILES { \
+- {2, 256}, \
+- {8, 128}, \
+- {16, 64}, \
+- {32, 64}, \
+- {64, 64} \
++ {.usec = 2, .pkts = 256,}, \
++ {.usec = 8, .pkts = 128,}, \
++ {.usec = 16, .pkts = 64,}, \
++ {.usec = 32, .pkts = 64,}, \
++ {.usec = 64, .pkts = 64,} \
+ }
+
+ #define NET_DIM_TX_EQE_PROFILES { \
+- {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+- {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
++ {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
++ {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \
+ }
+
+ #define NET_DIM_TX_CQE_PROFILES { \
+- {5, 128}, \
+- {8, 64}, \
+- {16, 32}, \
+- {32, 32}, \
+- {64, 32} \
++ {.usec = 5, .pkts = 128,}, \
++ {.usec = 8, .pkts = 64,}, \
++ {.usec = 16, .pkts = 32,}, \
++ {.usec = 32, .pkts = 32,}, \
++ {.usec = 64, .pkts = 32,} \
+ }
+
+ static const struct dim_cq_moder
+@@ -227,7 +227,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
+ dim->start_sample.event_ctr);
+ if (nevents < DIM_NEVENTS)
+ break;
+- dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats);
++ if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
++ break;
+ if (net_dim_decision(&curr_stats, dim)) {
+ dim->state = DIM_APPLY_NEW_PROFILE;
+ schedule_work(&dim->work);
+diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c
+index 15462d54758d3..88f7794867078 100644
+--- a/lib/dim/rdma_dim.c
++++ b/lib/dim/rdma_dim.c
+@@ -88,7 +88,8 @@ void rdma_dim(struct dim *dim, u64 completions)
+ nevents = curr_sample->event_ctr - dim->start_sample.event_ctr;
+ if (nevents < DIM_NEVENTS)
+ break;
+- dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats);
++ if (!dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats))
++ break;
+ if (rdma_dim_decision(&curr_stats, dim)) {
+ dim->state = DIM_APPLY_NEW_PROFILE;
+ schedule_work(&dim->work);
+diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
+index cb5abb42c16a2..2ca56c22a169e 100644
+--- a/lib/dynamic_debug.c
++++ b/lib/dynamic_debug.c
+@@ -207,10 +207,11 @@ static int ddebug_change(const struct ddebug_query *query,
+ continue;
+ #ifdef CONFIG_JUMP_LABEL
+ if (dp->flags & _DPRINTK_FLAGS_PRINT) {
+- if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT))
++ if (!(newflags & _DPRINTK_FLAGS_PRINT))
+ static_branch_disable(&dp->key.dd_key_true);
+- } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT)
++ } else if (newflags & _DPRINTK_FLAGS_PRINT) {
+ static_branch_enable(&dp->key.dd_key_true);
++ }
+ #endif
+ dp->flags = newflags;
+ v2pr_info("changed %s:%d [%s]%s =%s\n",
+@@ -379,10 +380,6 @@ static int ddebug_parse_query(char *words[], int nwords,
+ return -EINVAL;
+ }
+
+- if (modname)
+- /* support $modname.dyndbg=<multiple queries> */
+- query->module = modname;
+-
+ for (i = 0; i < nwords; i += 2) {
+ char *keyword = words[i];
+ char *arg = words[i+1];
+@@ -423,6 +420,13 @@ static int ddebug_parse_query(char *words[], int nwords,
+ if (rc)
+ return rc;
+ }
++ if (!query->module && modname)
++ /*
++ * support $modname.dyndbg=<multiple queries>, when
++ * not given in the query itself
++ */
++ query->module = modname;
++
+ vpr_info_dq(query, "parsed");
+ return 0;
+ }
+@@ -548,35 +552,6 @@ static int ddebug_exec_queries(char *query, const char *modname)
+ return nfound;
+ }
+
+-/**
+- * dynamic_debug_exec_queries - select and change dynamic-debug prints
+- * @query: query-string described in admin-guide/dynamic-debug-howto
+- * @modname: string containing module name, usually &module.mod_name
+- *
+- * This uses the >/proc/dynamic_debug/control reader, allowing module
+- * authors to modify their dynamic-debug callsites. The modname is
+- * canonically struct module.mod_name, but can also be null or a
+- * module-wildcard, for example: "drm*".
+- */
+-int dynamic_debug_exec_queries(const char *query, const char *modname)
+-{
+- int rc;
+- char *qry; /* writable copy of query */
+-
+- if (!query) {
+- pr_err("non-null query/command string expected\n");
+- return -EINVAL;
+- }
+- qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL);
+- if (!qry)
+- return -ENOMEM;
+-
+- rc = ddebug_exec_queries(qry, modname);
+- kfree(qry);
+- return rc;
+-}
+-EXPORT_SYMBOL_GPL(dynamic_debug_exec_queries);
+-
+ #define PREFIX_SIZE 64
+
+ static int remaining(int wrote)
+@@ -761,6 +736,18 @@ static __init int ddebug_setup_query(char *str)
+
+ __setup("ddebug_query=", ddebug_setup_query);
+
++/*
++ * Install a noop handler to make dyndbg look like a normal kernel cli param.
++ * This avoids warnings about dyndbg being an unknown cli param when supplied
++ * by a user.
++ */
++static __init int dyndbg_setup(char *str)
++{
++ return 1;
++}
++
++__setup("dyndbg=", dyndbg_setup);
++
+ /*
+ * File_ops->write method for <debugfs>/dynamic_debug/control. Gathers the
+ * command text from userspace, parses and executes it.
+diff --git a/lib/errname.c b/lib/errname.c
+index 05cbf731545f0..67739b174a8cc 100644
+--- a/lib/errname.c
++++ b/lib/errname.c
+@@ -21,6 +21,7 @@ static const char *names_0[] = {
+ E(EADDRNOTAVAIL),
+ E(EADV),
+ E(EAFNOSUPPORT),
++ E(EAGAIN), /* EWOULDBLOCK */
+ E(EALREADY),
+ E(EBADE),
+ E(EBADF),
+@@ -31,15 +32,17 @@ static const char *names_0[] = {
+ E(EBADSLT),
+ E(EBFONT),
+ E(EBUSY),
+-#ifdef ECANCELLED
+- E(ECANCELLED),
+-#endif
++ E(ECANCELED), /* ECANCELLED */
+ E(ECHILD),
+ E(ECHRNG),
+ E(ECOMM),
+ E(ECONNABORTED),
++ E(ECONNREFUSED), /* EREFUSED */
+ E(ECONNRESET),
++ E(EDEADLK), /* EDEADLOCK */
++#if EDEADLK != EDEADLOCK /* mips, sparc, powerpc */
+ E(EDEADLOCK),
++#endif
+ E(EDESTADDRREQ),
+ E(EDOM),
+ E(EDOTDOT),
+@@ -166,14 +169,17 @@ static const char *names_0[] = {
+ E(EUSERS),
+ E(EXDEV),
+ E(EXFULL),
+-
+- E(ECANCELED), /* ECANCELLED */
+- E(EAGAIN), /* EWOULDBLOCK */
+- E(ECONNREFUSED), /* EREFUSED */
+- E(EDEADLK), /* EDEADLOCK */
+ };
+ #undef E
+
++#ifdef EREFUSED /* parisc */
++static_assert(EREFUSED == ECONNREFUSED);
++#endif
++#ifdef ECANCELLED /* parisc */
++static_assert(ECANCELLED == ECANCELED);
++#endif
++static_assert(EAGAIN == EWOULDBLOCK); /* everywhere */
++
+ #define E(err) [err - 512 + BUILD_BUG_ON_ZERO(err < 512 || err > 550)] = "-" #err
+ static const char *names_512[] = {
+ E(ERESTARTSYS),
+diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
+index 5f4b07b56cd9c..9738664386088 100644
+--- a/lib/fonts/fonts.c
++++ b/lib/fonts/fonts.c
+@@ -135,8 +135,8 @@ const struct font_desc *get_default_font(int xres, int yres, u32 font_w,
+ if (res > 20)
+ c += 20 - res;
+
+- if ((font_w & (1 << (f->width - 1))) &&
+- (font_h & (1 << (f->height - 1))))
++ if ((font_w & (1U << (f->width - 1))) &&
++ (font_h & (1U << (f->height - 1))))
+ c += 1000;
+
+ if (c > cc) {
+diff --git a/lib/hexdump.c b/lib/hexdump.c
+index 9301578f98e8c..06833d404398d 100644
+--- a/lib/hexdump.c
++++ b/lib/hexdump.c
+@@ -22,15 +22,33 @@ EXPORT_SYMBOL(hex_asc_upper);
+ *
+ * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
+ * input.
++ *
++ * This function is used to load cryptographic keys, so it is coded in such a
++ * way that there are no conditions or memory accesses that depend on data.
++ *
++ * Explanation of the logic:
++ * (ch - '9' - 1) is negative if ch <= '9'
++ * ('0' - 1 - ch) is negative if ch >= '0'
++ * we "and" these two values, so the result is negative if ch is in the range
++ * '0' ... '9'
++ * we are only interested in the sign, so we do a shift ">> 8"; note that right
++ * shift of a negative value is implementation-defined, so we cast the
++ * value to (unsigned) before the shift --- we have 0xffffff if ch is in
++ * the range '0' ... '9', 0 otherwise
++ * we "and" this value with (ch - '0' + 1) --- we have a value 1 ... 10 if ch is
++ * in the range '0' ... '9', 0 otherwise
++ * we add this value to -1 --- we have a value 0 ... 9 if ch is in the range '0'
++ * ... '9', -1 otherwise
++ * the next line is similar to the previous one, but we need to decode both
++ * uppercase and lowercase letters, so we use (ch & 0xdf), which converts
++ * lowercase to uppercase
+ */
+-int hex_to_bin(char ch)
++int hex_to_bin(unsigned char ch)
+ {
+- if ((ch >= '0') && (ch <= '9'))
+- return ch - '0';
+- ch = tolower(ch);
+- if ((ch >= 'a') && (ch <= 'f'))
+- return ch - 'a' + 10;
+- return -1;
++ unsigned char cu = ch & 0xdf;
++ return -1 +
++ ((ch - '0' + 1) & (unsigned)((ch - '9' - 1) & ('0' - 1 - ch)) >> 8) +
++ ((cu - 'A' + 11) & (unsigned)((cu - 'F' - 1) & ('A' - 1 - cu)) >> 8);
+ }
+ EXPORT_SYMBOL(hex_to_bin);
+
+@@ -45,10 +63,13 @@ EXPORT_SYMBOL(hex_to_bin);
+ int hex2bin(u8 *dst, const char *src, size_t count)
+ {
+ while (count--) {
+- int hi = hex_to_bin(*src++);
+- int lo = hex_to_bin(*src++);
++ int hi, lo;
+
+- if ((hi < 0) || (lo < 0))
++ hi = hex_to_bin(*src++);
++ if (unlikely(hi < 0))
++ return -EINVAL;
++ lo = hex_to_bin(*src++);
++ if (unlikely(lo < 0))
+ return -EINVAL;
+
+ *dst++ = (hi << 4) | lo;
+diff --git a/lib/idr.c b/lib/idr.c
+index f4ab4f4aa3c7f..13f2758c23773 100644
+--- a/lib/idr.c
++++ b/lib/idr.c
+@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(idr_alloc);
+ * @end: The maximum ID (exclusive).
+ * @gfp: Memory allocation flags.
+ *
+- * Allocates an unused ID in the range specified by @nextid and @end. If
++ * Allocates an unused ID in the range specified by @start and @end. If
+ * @end is <= 0, it is treated as one larger than %INT_MAX. This allows
+ * callers to use @start + N as @end as long as N is within integer range.
+ * The search for an unused ID will start at the last ID allocated and will
+@@ -491,7 +491,8 @@ void ida_free(struct ida *ida, unsigned int id)
+ struct ida_bitmap *bitmap;
+ unsigned long flags;
+
+- BUG_ON((int)id < 0);
++ if ((int)id < 0)
++ return;
+
+ xas_lock_irqsave(&xas, flags);
+ bitmap = xas_load(&xas);
+diff --git a/lib/iov_iter.c b/lib/iov_iter.c
+index 755c10c5138cd..d0c3e939ee601 100644
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
+ buf = iov->iov_base + skip;
+ copy = min(bytes, iov->iov_len - skip);
+
+- if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
++ if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
+ kaddr = kmap_atomic(page);
+ from = kaddr + offset;
+
+@@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
+ buf = iov->iov_base + skip;
+ copy = min(bytes, iov->iov_len - skip);
+
+- if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
++ if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
+ kaddr = kmap_atomic(page);
+ to = kaddr + offset;
+
+@@ -416,6 +416,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
+ return 0;
+
+ buf->ops = &page_cache_pipe_buf_ops;
++ buf->flags = 0;
+ get_page(page);
+ buf->page = page;
+ buf->offset = offset;
+@@ -430,35 +431,81 @@ out:
+ }
+
+ /*
++ * fault_in_iov_iter_readable - fault in iov iterator for reading
++ * @i: iterator
++ * @size: maximum length
++ *
+ * Fault in one or more iovecs of the given iov_iter, to a maximum length of
+- * bytes. For each iovec, fault in each page that constitutes the iovec.
++ * @size. For each iovec, fault in each page that constitutes the iovec.
++ *
++ * Returns the number of bytes not faulted in (like copy_to_user() and
++ * copy_from_user()).
+ *
+- * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
+- * because it is an invalid address).
++ * Always returns 0 for non-userspace iterators.
+ */
+-int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
++size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
+ {
+ if (iter_is_iovec(i)) {
++ size_t count = min(size, iov_iter_count(i));
+ const struct iovec *p;
+ size_t skip;
+
+- if (bytes > i->count)
+- bytes = i->count;
+- for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
+- size_t len = min(bytes, p->iov_len - skip);
+- int err;
++ size -= count;
++ for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
++ size_t len = min(count, p->iov_len - skip);
++ size_t ret;
+
+ if (unlikely(!len))
+ continue;
+- err = fault_in_pages_readable(p->iov_base + skip, len);
+- if (unlikely(err))
+- return err;
+- bytes -= len;
++ ret = fault_in_readable(p->iov_base + skip, len);
++ count -= len - ret;
++ if (ret)
++ break;
+ }
++ return count + size;
+ }
+ return 0;
+ }
+-EXPORT_SYMBOL(iov_iter_fault_in_readable);
++EXPORT_SYMBOL(fault_in_iov_iter_readable);
++
++/*
++ * fault_in_iov_iter_writeable - fault in iov iterator for writing
++ * @i: iterator
++ * @size: maximum length
++ *
++ * Faults in the iterator using get_user_pages(), i.e., without triggering
++ * hardware page faults. This is primarily useful when we already know that
++ * some or all of the pages in @i aren't in memory.
++ *
++ * Returns the number of bytes not faulted in, like copy_to_user() and
++ * copy_from_user().
++ *
++ * Always returns 0 for non-user-space iterators.
++ */
++size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
++{
++ if (iter_is_iovec(i)) {
++ size_t count = min(size, iov_iter_count(i));
++ const struct iovec *p;
++ size_t skip;
++
++ size -= count;
++ for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
++ size_t len = min(count, p->iov_len - skip);
++ size_t ret;
++
++ if (unlikely(!len))
++ continue;
++ ret = fault_in_safe_writeable(p->iov_base + skip, len);
++ count -= len - ret;
++ if (ret)
++ break;
++ }
++ return count + size;
++ }
++ return 0;
++}
++EXPORT_SYMBOL(fault_in_iov_iter_writeable);
+
+ void iov_iter_init(struct iov_iter *i, unsigned int direction,
+ const struct iovec *iov, unsigned long nr_segs,
+@@ -467,6 +514,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
+ WARN_ON(direction & ~(READ | WRITE));
+ *i = (struct iov_iter) {
+ .iter_type = ITER_IOVEC,
++ .nofault = false,
+ .data_source = direction,
+ .iov = iov,
+ .nr_segs = nr_segs,
+@@ -532,6 +580,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size,
+ break;
+
+ buf->ops = &default_pipe_buf_ops;
++ buf->flags = 0;
+ buf->page = page;
+ buf->offset = 0;
+ buf->len = min_t(ssize_t, left, PAGE_SIZE);
+@@ -642,6 +691,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
+ struct pipe_inode_info *pipe = i->pipe;
+ unsigned int p_mask = pipe->ring_size - 1;
+ unsigned int i_head;
++ unsigned int valid = pipe->head;
+ size_t n, off, xfer = 0;
+
+ if (!sanity(i))
+@@ -655,11 +705,17 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
+ rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
+ chunk -= rem;
+ kunmap_local(p);
+- i->head = i_head;
+- i->iov_offset = off + chunk;
+- xfer += chunk;
+- if (rem)
++ if (chunk) {
++ i->head = i_head;
++ i->iov_offset = off + chunk;
++ xfer += chunk;
++ valid = i_head + 1;
++ }
++ if (rem) {
++ pipe->bufs[i_head & p_mask].len -= rem;
++ pipe_discard_from(pipe, valid);
+ break;
++ }
+ n -= chunk;
+ off = 0;
+ i_head++;
+@@ -1387,7 +1443,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
+ {
+ unsigned nr, offset;
+ pgoff_t index, count;
+- size_t size = maxsize, actual;
++ size_t size = maxsize;
+ loff_t pos;
+
+ if (!size || !maxpages)
+@@ -1414,13 +1470,7 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
+ if (nr == 0)
+ return 0;
+
+- actual = PAGE_SIZE * nr;
+- actual -= offset;
+- if (nr == count && size > 0) {
+- unsigned last_offset = (nr > 1) ? 0 : offset;
+- actual -= PAGE_SIZE - (last_offset + size);
+- }
+- return actual;
++ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
+ }
+
+ /* must be done on non-empty ITER_IOVEC one */
+@@ -1481,14 +1531,18 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
+ return 0;
+
+ if (likely(iter_is_iovec(i))) {
++ unsigned int gup_flags = 0;
+ unsigned long addr;
+
++ if (iov_iter_rw(i) != WRITE)
++ gup_flags |= FOLL_WRITE;
++ if (i->nofault)
++ gup_flags |= FOLL_NOFAULT;
++
+ addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
+ n = DIV_ROUND_UP(len, PAGE_SIZE);
+- res = get_user_pages_fast(addr, n,
+- iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
+- pages);
+- if (unlikely(res < 0))
++ res = get_user_pages_fast(addr, n, gup_flags, pages);
++ if (unlikely(res <= 0))
+ return res;
+ return (res == n ? len : res * PAGE_SIZE) - *start;
+ }
+@@ -1551,7 +1605,7 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
+ struct page **p;
+ unsigned nr, offset;
+ pgoff_t index, count;
+- size_t size = maxsize, actual;
++ size_t size = maxsize;
+ loff_t pos;
+
+ if (!size)
+@@ -1580,13 +1634,7 @@ static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
+ if (nr == 0)
+ return 0;
+
+- actual = PAGE_SIZE * nr;
+- actual -= offset;
+- if (nr == count && size > 0) {
+- unsigned last_offset = (nr > 1) ? 0 : offset;
+- actual -= PAGE_SIZE - (last_offset + size);
+- }
+- return actual;
++ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
+ }
+
+ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+@@ -1603,17 +1651,23 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+ return 0;
+
+ if (likely(iter_is_iovec(i))) {
++ unsigned int gup_flags = 0;
+ unsigned long addr;
+
++ if (iov_iter_rw(i) != WRITE)
++ gup_flags |= FOLL_WRITE;
++ if (i->nofault)
++ gup_flags |= FOLL_NOFAULT;
++
+ addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
+ n = DIV_ROUND_UP(len, PAGE_SIZE);
+ p = get_pages_array(n);
+ if (!p)
+ return -ENOMEM;
+- res = get_user_pages_fast(addr, n,
+- iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p);
+- if (unlikely(res < 0)) {
++ res = get_user_pages_fast(addr, n, gup_flags, p);
++ if (unlikely(res <= 0)) {
+ kvfree(p);
++ *pages = NULL;
+ return res;
+ }
+ *pages = p;
+diff --git a/lib/kobject.c b/lib/kobject.c
+index ea53b30cf4837..184a3dab26991 100644
+--- a/lib/kobject.c
++++ b/lib/kobject.c
+@@ -126,10 +126,10 @@ static int create_dir(struct kobject *kobj)
+ return 0;
+ }
+
+-static int get_kobj_path_length(struct kobject *kobj)
++static int get_kobj_path_length(const struct kobject *kobj)
+ {
+ int length = 1;
+- struct kobject *parent = kobj;
++ const struct kobject *parent = kobj;
+
+ /* walk up the ancestors until we hit the one pointing to the
+ * root.
+@@ -144,21 +144,25 @@ static int get_kobj_path_length(struct kobject *kobj)
+ return length;
+ }
+
+-static void fill_kobj_path(struct kobject *kobj, char *path, int length)
++static int fill_kobj_path(const struct kobject *kobj, char *path, int length)
+ {
+- struct kobject *parent;
++ const struct kobject *parent;
+
+ --length;
+ for (parent = kobj; parent; parent = parent->parent) {
+ int cur = strlen(kobject_name(parent));
+ /* back up enough to print this name with '/' */
+ length -= cur;
++ if (length <= 0)
++ return -EINVAL;
+ memcpy(path + length, kobject_name(parent), cur);
+ *(path + --length) = '/';
+ }
+
+ pr_debug("kobject: '%s' (%p): %s: path = '%s'\n", kobject_name(kobj),
+ kobj, __func__, path);
++
++ return 0;
+ }
+
+ /**
+@@ -168,18 +172,22 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length)
+ *
+ * Return: The newly allocated memory, caller must free with kfree().
+ */
+-char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
++char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask)
+ {
+ char *path;
+ int len;
+
++retry:
+ len = get_kobj_path_length(kobj);
+ if (len == 0)
+ return NULL;
+ path = kzalloc(len, gfp_mask);
+ if (!path)
+ return NULL;
+- fill_kobj_path(kobj, path, len);
++ if (fill_kobj_path(kobj, path, len)) {
++ kfree(path);
++ goto retry;
++ }
+
+ return path;
+ }
+diff --git a/lib/kunit/debugfs.c b/lib/kunit/debugfs.c
+index b71db0abc12bf..1048ef1b8d6ec 100644
+--- a/lib/kunit/debugfs.c
++++ b/lib/kunit/debugfs.c
+@@ -52,7 +52,7 @@ static void debugfs_print_result(struct seq_file *seq,
+ static int debugfs_print_results(struct seq_file *seq, void *v)
+ {
+ struct kunit_suite *suite = (struct kunit_suite *)seq->private;
+- bool success = kunit_suite_has_succeeded(suite);
++ enum kunit_status success = kunit_suite_has_succeeded(suite);
+ struct kunit_case *test_case;
+
+ if (!suite || !suite->log)
+diff --git a/lib/kunit/test.c b/lib/kunit/test.c
+index f246b847024e3..9aef816e573c1 100644
+--- a/lib/kunit/test.c
++++ b/lib/kunit/test.c
+@@ -504,16 +504,18 @@ int kunit_run_tests(struct kunit_suite *suite)
+ struct kunit_result_stats param_stats = { 0 };
+ test_case->status = KUNIT_SKIPPED;
+
+- if (test_case->generate_params) {
++ if (!test_case->generate_params) {
++ /* Non-parameterised test. */
++ kunit_run_case_catch_errors(suite, test_case, &test);
++ kunit_update_stats(&param_stats, test.status);
++ } else {
+ /* Get initial param. */
+ param_desc[0] = '\0';
+ test.param_value = test_case->generate_params(NULL, param_desc);
+- }
+
+- do {
+- kunit_run_case_catch_errors(suite, test_case, &test);
++ while (test.param_value) {
++ kunit_run_case_catch_errors(suite, test_case, &test);
+
+- if (test_case->generate_params) {
+ if (param_desc[0] == '\0') {
+ snprintf(param_desc, sizeof(param_desc),
+ "param-%d", test.param_index);
+@@ -530,11 +532,11 @@ int kunit_run_tests(struct kunit_suite *suite)
+ param_desc[0] = '\0';
+ test.param_value = test_case->generate_params(test.param_value, param_desc);
+ test.param_index++;
+- }
+
+- kunit_update_stats(&param_stats, test.status);
++ kunit_update_stats(&param_stats, test.status);
++ }
++ }
+
+- } while (test.param_value);
+
+ kunit_print_test_stats(&test, param_stats);
+
+diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
+index 0dd434e40487c..71e5c58530996 100644
+--- a/lib/kunit/try-catch.c
++++ b/lib/kunit/try-catch.c
+@@ -52,7 +52,7 @@ static unsigned long kunit_test_timeout(void)
+ * If tests timeout due to exceeding sysctl_hung_task_timeout_secs,
+ * the task will be killed and an oops generated.
+ */
+- return 300 * MSEC_PER_SEC; /* 5 min */
++ return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
+ }
+
+ void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)
+diff --git a/lib/list-test.c b/lib/list-test.c
+index ee09505df16f1..994ea4e3fc1b9 100644
+--- a/lib/list-test.c
++++ b/lib/list-test.c
+@@ -234,6 +234,24 @@ static void list_test_list_bulk_move_tail(struct kunit *test)
+ KUNIT_EXPECT_EQ(test, i, 2);
+ }
+
++static void list_test_list_is_head(struct kunit *test)
++{
++ struct list_head a, b, c;
++
++ /* Two lists: [a] -> b, [c] */
++ INIT_LIST_HEAD(&a);
++ INIT_LIST_HEAD(&c);
++ list_add_tail(&b, &a);
++
++ KUNIT_EXPECT_TRUE_MSG(test, list_is_head(&a, &a),
++ "Head element of same list");
++ KUNIT_EXPECT_FALSE_MSG(test, list_is_head(&a, &b),
++ "Non-head element of same list");
++ KUNIT_EXPECT_FALSE_MSG(test, list_is_head(&a, &c),
++ "Head element of different list");
++}
++
++
+ static void list_test_list_is_first(struct kunit *test)
+ {
+ struct list_head a, b;
+@@ -710,6 +728,7 @@ static struct kunit_case list_test_cases[] = {
+ KUNIT_CASE(list_test_list_move),
+ KUNIT_CASE(list_test_list_move_tail),
+ KUNIT_CASE(list_test_list_bulk_move_tail),
++ KUNIT_CASE(list_test_list_is_head),
+ KUNIT_CASE(list_test_list_is_first),
+ KUNIT_CASE(list_test_list_is_last),
+ KUNIT_CASE(list_test_list_empty),
+diff --git a/lib/list_debug.c b/lib/list_debug.c
+index 5d5424b51b746..413daa72a3d83 100644
+--- a/lib/list_debug.c
++++ b/lib/list_debug.c
+@@ -20,7 +20,11 @@
+ bool __list_add_valid(struct list_head *new, struct list_head *prev,
+ struct list_head *next)
+ {
+- if (CHECK_DATA_CORRUPTION(next->prev != prev,
++ if (CHECK_DATA_CORRUPTION(prev == NULL,
++ "list_add corruption. prev is NULL.\n") ||
++ CHECK_DATA_CORRUPTION(next == NULL,
++ "list_add corruption. next is NULL.\n") ||
++ CHECK_DATA_CORRUPTION(next->prev != prev,
+ "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
+ prev, next->prev, next) ||
+ CHECK_DATA_CORRUPTION(prev->next != next,
+@@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry)
+ prev = entry->prev;
+ next = entry->next;
+
+- if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
++ if (CHECK_DATA_CORRUPTION(next == NULL,
++ "list_del corruption, %px->next is NULL\n", entry) ||
++ CHECK_DATA_CORRUPTION(prev == NULL,
++ "list_del corruption, %px->prev is NULL\n", entry) ||
++ CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ "list_del corruption, %px->next is LIST_POISON1 (%px)\n",
+ entry, LIST_POISON1) ||
+ CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+diff --git a/lib/livepatch/test_klp_callbacks_busy.c b/lib/livepatch/test_klp_callbacks_busy.c
+index 7ac845f65be56..133929e0ce8ff 100644
+--- a/lib/livepatch/test_klp_callbacks_busy.c
++++ b/lib/livepatch/test_klp_callbacks_busy.c
+@@ -16,10 +16,12 @@ MODULE_PARM_DESC(block_transition, "block_transition (default=false)");
+
+ static void busymod_work_func(struct work_struct *work);
+ static DECLARE_WORK(work, busymod_work_func);
++static DECLARE_COMPLETION(busymod_work_started);
+
+ static void busymod_work_func(struct work_struct *work)
+ {
+ pr_info("%s enter\n", __func__);
++ complete(&busymod_work_started);
+
+ while (READ_ONCE(block_transition)) {
+ /*
+@@ -37,6 +39,12 @@ static int test_klp_callbacks_busy_init(void)
+ pr_info("%s\n", __func__);
+ schedule_work(&work);
+
++ /*
++ * To synchronize kernel messages, hold the init function from
++ * exiting until the work function's entry message has printed.
++ */
++ wait_for_completion(&busymod_work_started);
++
+ if (!block_transition) {
+ /*
+ * Serialize output: print all messages from the work
+diff --git a/lib/lockref.c b/lib/lockref.c
+index 5b34bbd3eba81..81ac5f3552428 100644
+--- a/lib/lockref.c
++++ b/lib/lockref.c
+@@ -24,7 +24,6 @@
+ } \
+ if (!--retry) \
+ break; \
+- cpu_relax(); \
+ } \
+ } while (0)
+
+diff --git a/lib/logic_iomem.c b/lib/logic_iomem.c
+index 9bdfde0c0f86d..e7ea9b28d8db5 100644
+--- a/lib/logic_iomem.c
++++ b/lib/logic_iomem.c
+@@ -21,15 +21,15 @@ struct logic_iomem_area {
+
+ #define AREA_SHIFT 24
+ #define MAX_AREA_SIZE (1 << AREA_SHIFT)
+-#define MAX_AREAS ((1ULL<<32) / MAX_AREA_SIZE)
++#define MAX_AREAS ((1U << 31) / MAX_AREA_SIZE)
+ #define AREA_BITS ((MAX_AREAS - 1) << AREA_SHIFT)
+ #define AREA_MASK (MAX_AREA_SIZE - 1)
+ #ifdef CONFIG_64BIT
+ #define IOREMAP_BIAS 0xDEAD000000000000UL
+ #define IOREMAP_MASK 0xFFFFFFFF00000000UL
+ #else
+-#define IOREMAP_BIAS 0
+-#define IOREMAP_MASK 0
++#define IOREMAP_BIAS 0x80000000UL
++#define IOREMAP_MASK 0x80000000UL
+ #endif
+
+ static DEFINE_MUTEX(regions_mtx);
+@@ -68,7 +68,7 @@ int logic_iomem_add_region(struct resource *resource,
+ }
+ EXPORT_SYMBOL(logic_iomem_add_region);
+
+-#ifndef CONFIG_LOGIC_IOMEM_FALLBACK
++#ifndef CONFIG_INDIRECT_IOMEM_FALLBACK
+ static void __iomem *real_ioremap(phys_addr_t offset, size_t size)
+ {
+ WARN(1, "invalid ioremap(0x%llx, 0x%zx)\n",
+@@ -79,9 +79,9 @@ static void __iomem *real_ioremap(phys_addr_t offset, size_t size)
+ static void real_iounmap(void __iomem *addr)
+ {
+ WARN(1, "invalid iounmap for addr 0x%llx\n",
+- (unsigned long long __force)addr);
++ (unsigned long long)(uintptr_t __force)addr);
+ }
+-#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
++#endif /* CONFIG_INDIRECT_IOMEM_FALLBACK */
+
+ void __iomem *ioremap(phys_addr_t offset, size_t size)
+ {
+@@ -168,12 +168,12 @@ void iounmap(void __iomem *addr)
+ }
+ EXPORT_SYMBOL(iounmap);
+
+-#ifndef CONFIG_LOGIC_IOMEM_FALLBACK
++#ifndef CONFIG_INDIRECT_IOMEM_FALLBACK
+ #define MAKE_FALLBACK(op, sz) \
+ static u##sz real_raw_read ## op(const volatile void __iomem *addr) \
+ { \
+ WARN(1, "Invalid read" #op " at address %llx\n", \
+- (unsigned long long __force)addr); \
++ (unsigned long long)(uintptr_t __force)addr); \
+ return (u ## sz)~0ULL; \
+ } \
+ \
+@@ -181,7 +181,8 @@ static void real_raw_write ## op(u ## sz val, \
+ volatile void __iomem *addr) \
+ { \
+ WARN(1, "Invalid writeq" #op " of 0x%llx at address %llx\n", \
+- (unsigned long long)val, (unsigned long long __force)addr);\
++ (unsigned long long)val, \
++ (unsigned long long)(uintptr_t __force)addr);\
+ } \
+
+ MAKE_FALLBACK(b, 8);
+@@ -194,14 +195,14 @@ MAKE_FALLBACK(q, 64);
+ static void real_memset_io(volatile void __iomem *addr, int value, size_t size)
+ {
+ WARN(1, "Invalid memset_io at address 0x%llx\n",
+- (unsigned long long __force)addr);
++ (unsigned long long)(uintptr_t __force)addr);
+ }
+
+ static void real_memcpy_fromio(void *buffer, const volatile void __iomem *addr,
+ size_t size)
+ {
+ WARN(1, "Invalid memcpy_fromio at address 0x%llx\n",
+- (unsigned long long __force)addr);
++ (unsigned long long)(uintptr_t __force)addr);
+
+ memset(buffer, 0xff, size);
+ }
+@@ -210,9 +211,9 @@ static void real_memcpy_toio(volatile void __iomem *addr, const void *buffer,
+ size_t size)
+ {
+ WARN(1, "Invalid memcpy_toio at address 0x%llx\n",
+- (unsigned long long __force)addr);
++ (unsigned long long)(uintptr_t __force)addr);
+ }
+-#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
++#endif /* CONFIG_INDIRECT_IOMEM_FALLBACK */
+
+ #define MAKE_OP(op, sz) \
+ u##sz __raw_read ## op(const volatile void __iomem *addr) \
+diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
+index 926f4823d5eac..fd1728d94babb 100644
+--- a/lib/lz4/lz4_decompress.c
++++ b/lib/lz4/lz4_decompress.c
+@@ -271,8 +271,12 @@ static FORCE_INLINE int LZ4_decompress_generic(
+ ip += length;
+ op += length;
+
+- /* Necessarily EOF, due to parsing restrictions */
+- if (!partialDecoding || (cpy == oend))
++ /* Necessarily EOF when !partialDecoding.
++ * When partialDecoding, it is EOF if we've either
++ * filled the output buffer or
++ * can't proceed with reading an offset for following match.
++ */
++ if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2)))
+ break;
+ } else {
+ /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+diff --git a/lib/memneq.c b/lib/memneq.c
+new file mode 100644
+index 0000000000000..afed1bd16aee0
+--- /dev/null
++++ b/lib/memneq.c
+@@ -0,0 +1,168 @@
++/*
++ * Constant-time equality testing of memory regions.
++ *
++ * Authors:
++ *
++ * James Yonan <james@openvpn.net>
++ * Daniel Borkmann <dborkman@redhat.com>
++ *
++ * This file is provided under a dual BSD/GPLv2 license. When using or
++ * redistributing this file, you may do so under either license.
++ *
++ * GPL LICENSE SUMMARY
++ *
++ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ * The full GNU General Public License is included in this distribution
++ * in the file called LICENSE.GPL.
++ *
++ * BSD LICENSE
++ *
++ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * * Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * * Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in
++ * the documentation and/or other materials provided with the
++ * distribution.
++ * * Neither the name of OpenVPN Technologies nor the names of its
++ * contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <crypto/algapi.h>
++
++#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
++
++/* Generic path for arbitrary size */
++static inline unsigned long
++__crypto_memneq_generic(const void *a, const void *b, size_t size)
++{
++ unsigned long neq = 0;
++
++#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
++ while (size >= sizeof(unsigned long)) {
++ neq |= *(unsigned long *)a ^ *(unsigned long *)b;
++ OPTIMIZER_HIDE_VAR(neq);
++ a += sizeof(unsigned long);
++ b += sizeof(unsigned long);
++ size -= sizeof(unsigned long);
++ }
++#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
++ while (size > 0) {
++ neq |= *(unsigned char *)a ^ *(unsigned char *)b;
++ OPTIMIZER_HIDE_VAR(neq);
++ a += 1;
++ b += 1;
++ size -= 1;
++ }
++ return neq;
++}
++
++/* Loop-free fast-path for frequently used 16-byte size */
++static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
++{
++ unsigned long neq = 0;
++
++#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++ if (sizeof(unsigned long) == 8) {
++ neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
++ OPTIMIZER_HIDE_VAR(neq);
++ } else if (sizeof(unsigned int) == 4) {
++ neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
++ OPTIMIZER_HIDE_VAR(neq);
++ } else
++#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
++ {
++ neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
++ OPTIMIZER_HIDE_VAR(neq);
++ neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
++ OPTIMIZER_HIDE_VAR(neq);
++ }
++
++ return neq;
++}
++
++/* Compare two areas of memory without leaking timing information,
++ * and with special optimizations for common sizes. Users should
++ * not call this function directly, but should instead use
++ * crypto_memneq defined in crypto/algapi.h.
++ */
++noinline unsigned long __crypto_memneq(const void *a, const void *b,
++ size_t size)
++{
++ switch (size) {
++ case 16:
++ return __crypto_memneq_16(a, b);
++ default:
++ return __crypto_memneq_generic(a, b, size);
++ }
++}
++EXPORT_SYMBOL(__crypto_memneq);
++
++#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
+diff --git a/lib/mpi/mpi-mod.c b/lib/mpi/mpi-mod.c
+index 47bc59edd4ff9..54fcc01564d9d 100644
+--- a/lib/mpi/mpi-mod.c
++++ b/lib/mpi/mpi-mod.c
+@@ -40,6 +40,8 @@ mpi_barrett_t mpi_barrett_init(MPI m, int copy)
+
+ mpi_normalize(m);
+ ctx = kcalloc(1, sizeof(*ctx), GFP_KERNEL);
++ if (!ctx)
++ return NULL;
+
+ if (copy) {
+ ctx->m = mpi_copy(m);
+diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
+index 39c4c67310946..3cb6bd148fa9e 100644
+--- a/lib/mpi/mpicoder.c
++++ b/lib/mpi/mpicoder.c
+@@ -504,7 +504,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
+
+ while (sg_miter_next(&miter)) {
+ buff = miter.addr;
+- len = miter.length;
++ len = min_t(unsigned, miter.length, nbytes);
++ nbytes -= len;
+
+ for (x = 0; x < len; x++) {
+ a <<= 8;
+diff --git a/lib/nlattr.c b/lib/nlattr.c
+index 86029ad5ead4f..73635bdb00620 100644
+--- a/lib/nlattr.c
++++ b/lib/nlattr.c
+@@ -10,6 +10,7 @@
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+ #include <linux/jiffies.h>
++#include <linux/nospec.h>
+ #include <linux/skbuff.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
+@@ -369,6 +370,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
+ if (type <= 0 || type > maxtype)
+ return 0;
+
++ type = array_index_nospec(type, maxtype + 1);
+ pt = &policy[type];
+
+ BUG_ON(pt->type > NLA_TYPE_MAX);
+@@ -584,6 +586,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
+ }
+ continue;
+ }
++ type = array_index_nospec(type, maxtype + 1);
+ if (policy) {
+ int err = validate_nla(nla, maxtype, policy,
+ validate, extack, depth);
+diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
+index f9e89001b52eb..199ab201d5019 100644
+--- a/lib/nmi_backtrace.c
++++ b/lib/nmi_backtrace.c
+@@ -75,6 +75,12 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
+ touch_softlockup_watchdog();
+ }
+
++ /*
++ * Force flush any remote buffers that might be stuck in IRQ context
++ * and therefore could not run their irq_work.
++ */
++ printk_trigger_flush();
++
+ clear_bit_unlock(0, &backtrace_flag);
+ put_cpu();
+ }
+diff --git a/lib/nodemask.c b/lib/nodemask.c
+index 3aa454c54c0de..e22647f5181b3 100644
+--- a/lib/nodemask.c
++++ b/lib/nodemask.c
+@@ -3,9 +3,9 @@
+ #include <linux/module.h>
+ #include <linux/random.h>
+
+-int __next_node_in(int node, const nodemask_t *srcp)
++unsigned int __next_node_in(int node, const nodemask_t *srcp)
+ {
+- int ret = __next_node(node, srcp);
++ unsigned int ret = __next_node(node, srcp);
+
+ if (ret == MAX_NUMNODES)
+ ret = __first_node(srcp);
+diff --git a/lib/notifier-error-inject.c b/lib/notifier-error-inject.c
+index 21016b32d3131..2b24ea6c94979 100644
+--- a/lib/notifier-error-inject.c
++++ b/lib/notifier-error-inject.c
+@@ -15,7 +15,7 @@ static int debugfs_errno_get(void *data, u64 *val)
+ return 0;
+ }
+
+-DEFINE_SIMPLE_ATTRIBUTE(fops_errno, debugfs_errno_get, debugfs_errno_set,
++DEFINE_SIMPLE_ATTRIBUTE_SIGNED(fops_errno, debugfs_errno_get, debugfs_errno_set,
+ "%lld\n");
+
+ static struct dentry *debugfs_create_errno(const char *name, umode_t mode,
+diff --git a/lib/once.c b/lib/once.c
+index 59149bf3bfb4a..351f66aad310a 100644
+--- a/lib/once.c
++++ b/lib/once.c
+@@ -66,3 +66,33 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
+ once_disable_jump(once_key, mod);
+ }
+ EXPORT_SYMBOL(__do_once_done);
++
++static DEFINE_MUTEX(once_mutex);
++
++bool __do_once_slow_start(bool *done)
++ __acquires(once_mutex)
++{
++ mutex_lock(&once_mutex);
++ if (*done) {
++ mutex_unlock(&once_mutex);
++ /* Keep sparse happy by restoring an even lock count on
++ * this mutex. In case we return here, we don't call into
++ * __do_once_done but return early in the DO_ONCE_SLOW() macro.
++ */
++ __acquire(once_mutex);
++ return false;
++ }
++
++ return true;
++}
++EXPORT_SYMBOL(__do_once_slow_start);
++
++void __do_once_slow_done(bool *done, struct static_key_true *once_key,
++ struct module *mod)
++ __releases(once_mutex)
++{
++ *done = true;
++ mutex_unlock(&once_mutex);
++ once_disable_jump(once_key, mod);
++}
++EXPORT_SYMBOL(__do_once_slow_done);
+diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
+index af9302141bcf6..e5c5315da2741 100644
+--- a/lib/percpu-refcount.c
++++ b/lib/percpu-refcount.c
+@@ -76,6 +76,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
+ data = kzalloc(sizeof(*ref->data), gfp);
+ if (!data) {
+ free_percpu((void __percpu *)ref->percpu_count_ptr);
++ ref->percpu_count_ptr = 0;
+ return -ENOMEM;
+ }
+
+diff --git a/lib/radix-tree.c b/lib/radix-tree.c
+index b3afafe46fffb..c7918b7b8a23c 100644
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -1134,7 +1134,6 @@ static void set_iter_tags(struct radix_tree_iter *iter,
+ void __rcu **radix_tree_iter_resume(void __rcu **slot,
+ struct radix_tree_iter *iter)
+ {
+- slot++;
+ iter->index = __radix_tree_iter_add(iter, 1);
+ iter->next_index = iter->index;
+ iter->tags = 0;
+diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
+index a4c7cd74cff58..4fb7700a741bd 100644
+--- a/lib/raid6/test/Makefile
++++ b/lib/raid6/test/Makefile
+@@ -4,6 +4,8 @@
+ # from userspace.
+ #
+
++pound := \#
++
+ CC = gcc
+ OPTFLAGS = -O2 # Adjust as desired
+ CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
+@@ -42,7 +44,7 @@ else ifeq ($(HAS_NEON),yes)
+ OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
+ CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
+ else
+- HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
++ HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
+ ifeq ($(HAS_ALTIVEC),yes)
+ CFLAGS += -I../../../arch/powerpc/include
+diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
+index a3cf071941ab4..841a55242abaa 100644
+--- a/lib/raid6/test/test.c
++++ b/lib/raid6/test/test.c
+@@ -19,7 +19,6 @@
+ #define NDISKS 16 /* Including P and Q */
+
+ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+-struct raid6_calls raid6_call;
+
+ char *dataptrs[NDISKS];
+ char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+diff --git a/lib/random32.c b/lib/random32.c
+index 4d0e05e471d72..f0ab17c2244be 100644
+--- a/lib/random32.c
++++ b/lib/random32.c
+@@ -39,8 +39,9 @@
+ #include <linux/random.h>
+ #include <linux/sched.h>
+ #include <linux/bitops.h>
++#include <linux/slab.h>
++#include <linux/notifier.h>
+ #include <asm/unaligned.h>
+-#include <trace/events/random.h>
+
+ /**
+ * prandom_u32_state - seeded pseudo-random number generator.
+@@ -386,7 +387,6 @@ u32 prandom_u32(void)
+ struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+ u32 res = siprand_u32(state);
+
+- trace_prandom_u32(res);
+ put_cpu_ptr(&net_rand_state);
+ return res;
+ }
+@@ -552,9 +552,11 @@ static void prandom_reseed(struct timer_list *unused)
+ * To avoid worrying about whether it's safe to delay that interrupt
+ * long enough to seed all CPUs, just schedule an immediate timer event.
+ */
+-static void prandom_timer_start(struct random_ready_callback *unused)
++static int prandom_timer_start(struct notifier_block *nb,
++ unsigned long action, void *data)
+ {
+ mod_timer(&seed_timer, jiffies);
++ return 0;
+ }
+
+ #ifdef CONFIG_RANDOM32_SELFTEST
+@@ -618,13 +620,13 @@ core_initcall(prandom32_state_selftest);
+ */
+ static int __init prandom_init_late(void)
+ {
+- static struct random_ready_callback random_ready = {
+- .func = prandom_timer_start
++ static struct notifier_block random_ready = {
++ .notifier_call = prandom_timer_start
+ };
+- int ret = add_random_ready_callback(&random_ready);
++ int ret = register_random_ready_notifier(&random_ready);
+
+ if (ret == -EALREADY) {
+- prandom_timer_start(&random_ready);
++ prandom_timer_start(&random_ready, 0, NULL);
+ ret = 0;
+ }
+ return ret;
+diff --git a/lib/ratelimit.c b/lib/ratelimit.c
+index e01a93f46f833..ce945c17980b9 100644
+--- a/lib/ratelimit.c
++++ b/lib/ratelimit.c
+@@ -26,10 +26,16 @@
+ */
+ int ___ratelimit(struct ratelimit_state *rs, const char *func)
+ {
++ /* Paired with WRITE_ONCE() in .proc_handler().
++ * Changing two values seperately could be inconsistent
++ * and some message could be lost. (See: net_ratelimit_state).
++ */
++ int interval = READ_ONCE(rs->interval);
++ int burst = READ_ONCE(rs->burst);
+ unsigned long flags;
+ int ret;
+
+- if (!rs->interval)
++ if (!interval)
+ return 1;
+
+ /*
+@@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
+ if (!rs->begin)
+ rs->begin = jiffies;
+
+- if (time_is_before_jiffies(rs->begin + rs->interval)) {
++ if (time_is_before_jiffies(rs->begin + interval)) {
+ if (rs->missed) {
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ printk_deferred(KERN_WARNING
+@@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
+ rs->begin = jiffies;
+ rs->printed = 0;
+ }
+- if (rs->burst && rs->burst > rs->printed) {
++ if (burst && burst > rs->printed) {
+ rs->printed++;
+ ret = 1;
+ } else {
+diff --git a/lib/sha1.c b/lib/sha1.c
+index 9bd1935a14727..0494766fc574e 100644
+--- a/lib/sha1.c
++++ b/lib/sha1.c
+@@ -9,6 +9,7 @@
+ #include <linux/kernel.h>
+ #include <linux/export.h>
+ #include <linux/bitops.h>
++#include <linux/string.h>
+ #include <crypto/sha1.h>
+ #include <asm/unaligned.h>
+
+@@ -55,7 +56,8 @@
+ #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
+ __u32 TEMP = input(t); setW(t, TEMP); \
+ E += TEMP + rol32(A,5) + (fn) + (constant); \
+- B = ror32(B, 2); } while (0)
++ B = ror32(B, 2); \
++ TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0)
+
+ #define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+ #define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+@@ -84,6 +86,7 @@
+ void sha1_transform(__u32 *digest, const char *data, __u32 *array)
+ {
+ __u32 A, B, C, D, E;
++ unsigned int i = 0;
+
+ A = digest[0];
+ B = digest[1];
+@@ -92,94 +95,24 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array)
+ E = digest[4];
+
+ /* Round 1 - iterations 0-16 take their input from 'data' */
+- T_0_15( 0, A, B, C, D, E);
+- T_0_15( 1, E, A, B, C, D);
+- T_0_15( 2, D, E, A, B, C);
+- T_0_15( 3, C, D, E, A, B);
+- T_0_15( 4, B, C, D, E, A);
+- T_0_15( 5, A, B, C, D, E);
+- T_0_15( 6, E, A, B, C, D);
+- T_0_15( 7, D, E, A, B, C);
+- T_0_15( 8, C, D, E, A, B);
+- T_0_15( 9, B, C, D, E, A);
+- T_0_15(10, A, B, C, D, E);
+- T_0_15(11, E, A, B, C, D);
+- T_0_15(12, D, E, A, B, C);
+- T_0_15(13, C, D, E, A, B);
+- T_0_15(14, B, C, D, E, A);
+- T_0_15(15, A, B, C, D, E);
++ for (; i < 16; ++i)
++ T_0_15(i, A, B, C, D, E);
+
+ /* Round 1 - tail. Input from 512-bit mixing array */
+- T_16_19(16, E, A, B, C, D);
+- T_16_19(17, D, E, A, B, C);
+- T_16_19(18, C, D, E, A, B);
+- T_16_19(19, B, C, D, E, A);
++ for (; i < 20; ++i)
++ T_16_19(i, A, B, C, D, E);
+
+ /* Round 2 */
+- T_20_39(20, A, B, C, D, E);
+- T_20_39(21, E, A, B, C, D);
+- T_20_39(22, D, E, A, B, C);
+- T_20_39(23, C, D, E, A, B);
+- T_20_39(24, B, C, D, E, A);
+- T_20_39(25, A, B, C, D, E);
+- T_20_39(26, E, A, B, C, D);
+- T_20_39(27, D, E, A, B, C);
+- T_20_39(28, C, D, E, A, B);
+- T_20_39(29, B, C, D, E, A);
+- T_20_39(30, A, B, C, D, E);
+- T_20_39(31, E, A, B, C, D);
+- T_20_39(32, D, E, A, B, C);
+- T_20_39(33, C, D, E, A, B);
+- T_20_39(34, B, C, D, E, A);
+- T_20_39(35, A, B, C, D, E);
+- T_20_39(36, E, A, B, C, D);
+- T_20_39(37, D, E, A, B, C);
+- T_20_39(38, C, D, E, A, B);
+- T_20_39(39, B, C, D, E, A);
++ for (; i < 40; ++i)
++ T_20_39(i, A, B, C, D, E);
+
+ /* Round 3 */
+- T_40_59(40, A, B, C, D, E);
+- T_40_59(41, E, A, B, C, D);
+- T_40_59(42, D, E, A, B, C);
+- T_40_59(43, C, D, E, A, B);
+- T_40_59(44, B, C, D, E, A);
+- T_40_59(45, A, B, C, D, E);
+- T_40_59(46, E, A, B, C, D);
+- T_40_59(47, D, E, A, B, C);
+- T_40_59(48, C, D, E, A, B);
+- T_40_59(49, B, C, D, E, A);
+- T_40_59(50, A, B, C, D, E);
+- T_40_59(51, E, A, B, C, D);
+- T_40_59(52, D, E, A, B, C);
+- T_40_59(53, C, D, E, A, B);
+- T_40_59(54, B, C, D, E, A);
+- T_40_59(55, A, B, C, D, E);
+- T_40_59(56, E, A, B, C, D);
+- T_40_59(57, D, E, A, B, C);
+- T_40_59(58, C, D, E, A, B);
+- T_40_59(59, B, C, D, E, A);
++ for (; i < 60; ++i)
++ T_40_59(i, A, B, C, D, E);
+
+ /* Round 4 */
+- T_60_79(60, A, B, C, D, E);
+- T_60_79(61, E, A, B, C, D);
+- T_60_79(62, D, E, A, B, C);
+- T_60_79(63, C, D, E, A, B);
+- T_60_79(64, B, C, D, E, A);
+- T_60_79(65, A, B, C, D, E);
+- T_60_79(66, E, A, B, C, D);
+- T_60_79(67, D, E, A, B, C);
+- T_60_79(68, C, D, E, A, B);
+- T_60_79(69, B, C, D, E, A);
+- T_60_79(70, A, B, C, D, E);
+- T_60_79(71, E, A, B, C, D);
+- T_60_79(72, D, E, A, B, C);
+- T_60_79(73, C, D, E, A, B);
+- T_60_79(74, B, C, D, E, A);
+- T_60_79(75, A, B, C, D, E);
+- T_60_79(76, E, A, B, C, D);
+- T_60_79(77, D, E, A, B, C);
+- T_60_79(78, C, D, E, A, B);
+- T_60_79(79, B, C, D, E, A);
++ for (; i < 80; ++i)
++ T_60_79(i, A, B, C, D, E);
+
+ digest[0] += A;
+ digest[1] += B;
+diff --git a/lib/siphash.c b/lib/siphash.c
+index a90112ee72a1f..71d315a6ad623 100644
+--- a/lib/siphash.c
++++ b/lib/siphash.c
+@@ -18,19 +18,13 @@
+ #include <asm/word-at-a-time.h>
+ #endif
+
+-#define SIPROUND \
+- do { \
+- v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+- v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+- v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+- v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+- } while (0)
++#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3)
+
+ #define PREAMBLE(len) \
+- u64 v0 = 0x736f6d6570736575ULL; \
+- u64 v1 = 0x646f72616e646f6dULL; \
+- u64 v2 = 0x6c7967656e657261ULL; \
+- u64 v3 = 0x7465646279746573ULL; \
++ u64 v0 = SIPHASH_CONST_0; \
++ u64 v1 = SIPHASH_CONST_1; \
++ u64 v2 = SIPHASH_CONST_2; \
++ u64 v3 = SIPHASH_CONST_3; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+@@ -49,6 +43,7 @@
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u64));
+@@ -80,8 +75,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
+ POSTAMBLE
+ }
+ EXPORT_SYMBOL(__siphash_aligned);
++#endif
+
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u64));
+@@ -113,7 +108,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
+ POSTAMBLE
+ }
+ EXPORT_SYMBOL(__siphash_unaligned);
+-#endif
+
+ /**
+ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
+@@ -250,6 +244,7 @@ EXPORT_SYMBOL(siphash_3u32);
+ HSIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u64));
+@@ -280,8 +275,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_aligned);
++#endif
+
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+ {
+@@ -313,7 +308,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_unaligned);
+-#endif
+
+ /**
+ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
+@@ -389,19 +383,13 @@ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+ }
+ EXPORT_SYMBOL(hsiphash_4u32);
+ #else
+-#define HSIPROUND \
+- do { \
+- v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
+- v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
+- v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
+- v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
+- } while (0)
++#define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3)
+
+ #define HPREAMBLE(len) \
+- u32 v0 = 0; \
+- u32 v1 = 0; \
+- u32 v2 = 0x6c796765U; \
+- u32 v3 = 0x74656462U; \
++ u32 v0 = HSIPHASH_CONST_0; \
++ u32 v1 = HSIPHASH_CONST_1; \
++ u32 v2 = HSIPHASH_CONST_2; \
++ u32 v3 = HSIPHASH_CONST_3; \
+ u32 b = ((u32)(len)) << 24; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+@@ -418,6 +406,7 @@ EXPORT_SYMBOL(hsiphash_4u32);
+ HSIPROUND; \
+ return v1 ^ v3;
+
++#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+ {
+ const u8 *end = data + len - (len % sizeof(u32));
+@@ -438,8 +427,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_aligned);
++#endif
+
+-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+ {
+@@ -461,7 +450,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
+ HPOSTAMBLE
+ }
+ EXPORT_SYMBOL(__hsiphash_unaligned);
+-#endif
+
+ /**
+ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
+diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
+index 046ac6297c781..a2bb7738c373c 100644
+--- a/lib/smp_processor_id.c
++++ b/lib/smp_processor_id.c
+@@ -47,9 +47,9 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
+
+ printk("caller is %pS\n", __builtin_return_address(0));
+ dump_stack();
+- instrumentation_end();
+
+ out_enable:
++ instrumentation_end();
+ preempt_enable_no_resched_notrace();
+ out:
+ return this_cpu;
+diff --git a/lib/stackdepot.c b/lib/stackdepot.c
+index 0a2e417f83cba..e90f0f19e77f9 100644
+--- a/lib/stackdepot.c
++++ b/lib/stackdepot.c
+@@ -20,7 +20,6 @@
+ */
+
+ #include <linux/gfp.h>
+-#include <linux/interrupt.h>
+ #include <linux/jhash.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+@@ -341,26 +340,3 @@ fast_exit:
+ return retval;
+ }
+ EXPORT_SYMBOL_GPL(stack_depot_save);
+-
+-static inline int in_irqentry_text(unsigned long ptr)
+-{
+- return (ptr >= (unsigned long)&__irqentry_text_start &&
+- ptr < (unsigned long)&__irqentry_text_end) ||
+- (ptr >= (unsigned long)&__softirqentry_text_start &&
+- ptr < (unsigned long)&__softirqentry_text_end);
+-}
+-
+-unsigned int filter_irq_stacks(unsigned long *entries,
+- unsigned int nr_entries)
+-{
+- unsigned int i;
+-
+- for (i = 0; i < nr_entries; i++) {
+- if (in_irqentry_text(entries[i])) {
+- /* Include the irqentry function into the stack. */
+- return i + 1;
+- }
+- }
+- return nr_entries;
+-}
+-EXPORT_SYMBOL_GPL(filter_irq_stacks);
+diff --git a/lib/string_helpers.c b/lib/string_helpers.c
+index 3806a52ce697a..2ddc10bd9add6 100644
+--- a/lib/string_helpers.c
++++ b/lib/string_helpers.c
+@@ -696,3 +696,23 @@ void kfree_strarray(char **array, size_t n)
+ kfree(array);
+ }
+ EXPORT_SYMBOL_GPL(kfree_strarray);
++
++/**
++ * memcpy_and_pad - Copy one buffer to another with padding
++ * @dest: Where to copy to
++ * @dest_len: The destination buffer size
++ * @src: Where to copy from
++ * @count: The number of bytes to copy
++ * @pad: Character to use for padding if space is left in destination.
++ */
++void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
++ int pad)
++{
++ if (dest_len > count) {
++ memcpy(dest, src, count);
++ memset(dest + count, pad, dest_len - count);
++ } else {
++ memcpy(dest, src, dest_len);
++ }
++}
++EXPORT_SYMBOL(memcpy_and_pad);
+diff --git a/lib/test_bpf.c b/lib/test_bpf.c
+index 830a18ecffc88..84f5dd3b0fc7b 100644
+--- a/lib/test_bpf.c
++++ b/lib/test_bpf.c
+@@ -8890,9 +8890,9 @@ static struct skb_segment_test skb_segment_tests[] __initconst = {
+ .build_skb = build_test_skb_linear_no_head_frag,
+ .features = NETIF_F_SG | NETIF_F_FRAGLIST |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
+- NETIF_F_LLTX_BIT | NETIF_F_GRO |
++ NETIF_F_LLTX | NETIF_F_GRO |
+ NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
+- NETIF_F_HW_VLAN_STAG_TX_BIT
++ NETIF_F_HW_VLAN_STAG_TX
+ }
+ };
+
+@@ -8992,10 +8992,15 @@ static __init int test_bpf(void)
+ struct tail_call_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
++ int flags;
+ int result;
+ int stack_depth;
+ };
+
++/* Flags that can be passed to tail call test cases */
++#define FLAG_NEED_STATE BIT(0)
++#define FLAG_RESULT_IN_STATE BIT(1)
++
+ /*
+ * Magic marker used in test snippets for tail calls below.
+ * BPF_LD/MOV to R2 and R2 with this immediate value is replaced
+@@ -9065,32 +9070,38 @@ static struct tail_call_test tail_call_tests[] = {
+ {
+ "Tail call error path, max count reached",
+ .insns = {
+- BPF_ALU64_IMM(BPF_ADD, R1, 1),
+- BPF_ALU64_REG(BPF_MOV, R0, R1),
++ BPF_LDX_MEM(BPF_W, R2, R1, 0),
++ BPF_ALU64_IMM(BPF_ADD, R2, 1),
++ BPF_STX_MEM(BPF_W, R1, R2, 0),
+ TAIL_CALL(0),
+ BPF_EXIT_INSN(),
+ },
+- .result = MAX_TAIL_CALL_CNT + 1,
++ .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
++ .result = (MAX_TAIL_CALL_CNT + 1 + 1) * MAX_TESTRUNS,
+ },
+ {
+ "Tail call error path, NULL target",
+ .insns = {
+- BPF_ALU64_IMM(BPF_MOV, R0, -1),
++ BPF_LDX_MEM(BPF_W, R2, R1, 0),
++ BPF_ALU64_IMM(BPF_ADD, R2, 1),
++ BPF_STX_MEM(BPF_W, R1, R2, 0),
+ TAIL_CALL(TAIL_CALL_NULL),
+- BPF_ALU64_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+- .result = 1,
++ .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
++ .result = MAX_TESTRUNS,
+ },
+ {
+ "Tail call error path, index out of range",
+ .insns = {
+- BPF_ALU64_IMM(BPF_MOV, R0, -1),
++ BPF_LDX_MEM(BPF_W, R2, R1, 0),
++ BPF_ALU64_IMM(BPF_ADD, R2, 1),
++ BPF_STX_MEM(BPF_W, R1, R2, 0),
+ TAIL_CALL(TAIL_CALL_INVALID),
+- BPF_ALU64_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+- .result = 1,
++ .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
++ .result = MAX_TESTRUNS,
+ },
+ };
+
+@@ -9196,6 +9207,8 @@ static __init int test_tail_calls(struct bpf_array *progs)
+ for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) {
+ struct tail_call_test *test = &tail_call_tests[i];
+ struct bpf_prog *fp = progs->ptrs[i];
++ int *data = NULL;
++ int state = 0;
+ u64 duration;
+ int ret;
+
+@@ -9212,7 +9225,11 @@ static __init int test_tail_calls(struct bpf_array *progs)
+ if (fp->jited)
+ jit_cnt++;
+
+- ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration);
++ if (test->flags & FLAG_NEED_STATE)
++ data = &state;
++ ret = __run_one(fp, data, MAX_TESTRUNS, &duration);
++ if (test->flags & FLAG_RESULT_IN_STATE)
++ ret = state;
+ if (ret == test->result) {
+ pr_cont("%lld PASS", duration);
+ pass_cnt++;
+diff --git a/lib/test_firmware.c b/lib/test_firmware.c
+index 1bccd6cd5f482..b64f87f4f2284 100644
+--- a/lib/test_firmware.c
++++ b/lib/test_firmware.c
+@@ -22,6 +22,7 @@
+ #include <linux/slab.h>
+ #include <linux/uaccess.h>
+ #include <linux/delay.h>
++#include <linux/kstrtox.h>
+ #include <linux/kthread.h>
+ #include <linux/vmalloc.h>
+ #include <linux/efi_embedded_fw.h>
+@@ -41,6 +42,7 @@ struct test_batched_req {
+ bool sent;
+ const struct firmware *fw;
+ const char *name;
++ const char *fw_buf;
+ struct completion completion;
+ struct task_struct *task;
+ struct device *dev;
+@@ -143,8 +145,14 @@ static void __test_release_all_firmware(void)
+
+ for (i = 0; i < test_fw_config->num_requests; i++) {
+ req = &test_fw_config->reqs[i];
+- if (req->fw)
++ if (req->fw) {
++ if (req->fw_buf) {
++ kfree_const(req->fw_buf);
++ req->fw_buf = NULL;
++ }
+ release_firmware(req->fw);
++ req->fw = NULL;
++ }
+ }
+
+ vfree(test_fw_config->reqs);
+@@ -175,7 +183,7 @@ static int __kstrncpy(char **dst, const char *name, size_t count, gfp_t gfp)
+ {
+ *dst = kstrndup(name, count, gfp);
+ if (!*dst)
+- return -ENOSPC;
++ return -ENOMEM;
+ return count;
+ }
+
+@@ -313,16 +321,26 @@ static ssize_t config_test_show_str(char *dst,
+ return len;
+ }
+
+-static int test_dev_config_update_bool(const char *buf, size_t size,
++static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+ {
+ int ret;
+
+- mutex_lock(&test_fw_mutex);
+- if (strtobool(buf, cfg) < 0)
++ if (kstrtobool(buf, cfg) < 0)
+ ret = -EINVAL;
+ else
+ ret = size;
++
++ return ret;
++}
++
++static int test_dev_config_update_bool(const char *buf, size_t size,
++ bool *cfg)
++{
++ int ret;
++
++ mutex_lock(&test_fw_mutex);
++ ret = __test_dev_config_update_bool(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+@@ -333,7 +351,8 @@ static ssize_t test_dev_config_show_bool(char *buf, bool val)
+ return snprintf(buf, PAGE_SIZE, "%d\n", val);
+ }
+
+-static int test_dev_config_update_size_t(const char *buf,
++static int __test_dev_config_update_size_t(
++ const char *buf,
+ size_t size,
+ size_t *cfg)
+ {
+@@ -344,9 +363,7 @@ static int test_dev_config_update_size_t(const char *buf,
+ if (ret)
+ return ret;
+
+- mutex_lock(&test_fw_mutex);
+ *(size_t *)cfg = new;
+- mutex_unlock(&test_fw_mutex);
+
+ /* Always return full write size even if we didn't consume all */
+ return size;
+@@ -362,7 +379,7 @@ static ssize_t test_dev_config_show_int(char *buf, int val)
+ return snprintf(buf, PAGE_SIZE, "%d\n", val);
+ }
+
+-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
++static int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+ {
+ u8 val;
+ int ret;
+@@ -371,14 +388,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+ if (ret)
+ return ret;
+
+- mutex_lock(&test_fw_mutex);
+ *(u8 *)cfg = val;
+- mutex_unlock(&test_fw_mutex);
+
+ /* Always return full write size even if we didn't consume all */
+ return size;
+ }
+
++static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
++{
++ int ret;
++
++ mutex_lock(&test_fw_mutex);
++ ret = __test_dev_config_update_u8(buf, size, cfg);
++ mutex_unlock(&test_fw_mutex);
++
++ return ret;
++}
++
+ static ssize_t test_dev_config_show_u8(char *buf, u8 val)
+ {
+ return snprintf(buf, PAGE_SIZE, "%u\n", val);
+@@ -405,10 +431,10 @@ static ssize_t config_num_requests_store(struct device *dev,
+ mutex_unlock(&test_fw_mutex);
+ goto out;
+ }
+- mutex_unlock(&test_fw_mutex);
+
+- rc = test_dev_config_update_u8(buf, count,
+- &test_fw_config->num_requests);
++ rc = __test_dev_config_update_u8(buf, count,
++ &test_fw_config->num_requests);
++ mutex_unlock(&test_fw_mutex);
+
+ out:
+ return rc;
+@@ -452,10 +478,10 @@ static ssize_t config_buf_size_store(struct device *dev,
+ mutex_unlock(&test_fw_mutex);
+ goto out;
+ }
+- mutex_unlock(&test_fw_mutex);
+
+- rc = test_dev_config_update_size_t(buf, count,
+- &test_fw_config->buf_size);
++ rc = __test_dev_config_update_size_t(buf, count,
++ &test_fw_config->buf_size);
++ mutex_unlock(&test_fw_mutex);
+
+ out:
+ return rc;
+@@ -482,10 +508,10 @@ static ssize_t config_file_offset_store(struct device *dev,
+ mutex_unlock(&test_fw_mutex);
+ goto out;
+ }
+- mutex_unlock(&test_fw_mutex);
+
+- rc = test_dev_config_update_size_t(buf, count,
+- &test_fw_config->file_offset);
++ rc = __test_dev_config_update_size_t(buf, count,
++ &test_fw_config->file_offset);
++ mutex_unlock(&test_fw_mutex);
+
+ out:
+ return rc;
+@@ -580,12 +606,14 @@ static ssize_t trigger_request_store(struct device *dev,
+
+ name = kstrndup(buf, count, GFP_KERNEL);
+ if (!name)
+- return -ENOSPC;
++ return -ENOMEM;
+
+ pr_info("loading '%s'\n", name);
+
+ mutex_lock(&test_fw_mutex);
+ release_firmware(test_firmware);
++ if (test_fw_config->reqs)
++ __test_release_all_firmware();
+ test_firmware = NULL;
+ rc = request_firmware(&test_firmware, name, dev);
+ if (rc) {
+@@ -626,7 +654,7 @@ static ssize_t trigger_request_platform_store(struct device *dev,
+
+ name = kstrndup(buf, count, GFP_KERNEL);
+ if (!name)
+- return -ENOSPC;
++ return -ENOMEM;
+
+ pr_info("inserting test platform fw '%s'\n", name);
+ efi_embedded_fw.name = name;
+@@ -679,13 +707,15 @@ static ssize_t trigger_async_request_store(struct device *dev,
+
+ name = kstrndup(buf, count, GFP_KERNEL);
+ if (!name)
+- return -ENOSPC;
++ return -ENOMEM;
+
+ pr_info("loading '%s'\n", name);
+
+ mutex_lock(&test_fw_mutex);
+ release_firmware(test_firmware);
+ test_firmware = NULL;
++ if (test_fw_config->reqs)
++ __test_release_all_firmware();
+ rc = request_firmware_nowait(THIS_MODULE, 1, name, dev, GFP_KERNEL,
+ NULL, trigger_async_request_cb);
+ if (rc) {
+@@ -722,12 +752,14 @@ static ssize_t trigger_custom_fallback_store(struct device *dev,
+
+ name = kstrndup(buf, count, GFP_KERNEL);
+ if (!name)
+- return -ENOSPC;
++ return -ENOMEM;
+
+ pr_info("loading '%s' using custom fallback mechanism\n", name);
+
+ mutex_lock(&test_fw_mutex);
+ release_firmware(test_firmware);
++ if (test_fw_config->reqs)
++ __test_release_all_firmware();
+ test_firmware = NULL;
+ rc = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOUEVENT, name,
+ dev, GFP_KERNEL, NULL,
+@@ -771,7 +803,7 @@ static int test_fw_run_batch_request(void *data)
+
+ test_buf = kzalloc(TEST_FIRMWARE_BUF_SIZE, GFP_KERNEL);
+ if (!test_buf)
+- return -ENOSPC;
++ return -ENOMEM;
+
+ if (test_fw_config->partial)
+ req->rc = request_partial_firmware_into_buf
+@@ -790,6 +822,8 @@ static int test_fw_run_batch_request(void *data)
+ test_fw_config->buf_size);
+ if (!req->fw)
+ kfree(test_buf);
++ else
++ req->fw_buf = test_buf;
+ } else {
+ req->rc = test_fw_config->req_firmware(&req->fw,
+ req->name,
+@@ -829,6 +863,11 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
+
+ mutex_lock(&test_fw_mutex);
+
++ if (test_fw_config->reqs) {
++ rc = -EBUSY;
++ goto out_bail;
++ }
++
+ test_fw_config->reqs =
+ vzalloc(array3_size(sizeof(struct test_batched_req),
+ test_fw_config->num_requests, 2));
+@@ -845,6 +884,7 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
+ req->fw = NULL;
+ req->idx = i;
+ req->name = test_fw_config->name;
++ req->fw_buf = NULL;
+ req->dev = dev;
+ init_completion(&req->completion);
+ req->task = kthread_run(test_fw_run_batch_request, req,
+@@ -927,6 +967,11 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
+
+ mutex_lock(&test_fw_mutex);
+
++ if (test_fw_config->reqs) {
++ rc = -EBUSY;
++ goto out_bail;
++ }
++
+ test_fw_config->reqs =
+ vzalloc(array3_size(sizeof(struct test_batched_req),
+ test_fw_config->num_requests, 2));
+@@ -944,6 +989,7 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
+ for (i = 0; i < test_fw_config->num_requests; i++) {
+ req = &test_fw_config->reqs[i];
+ req->name = test_fw_config->name;
++ req->fw_buf = NULL;
+ req->fw = NULL;
+ req->idx = i;
+ init_completion(&req->completion);
+@@ -1111,6 +1157,7 @@ static int __init test_firmware_init(void)
+
+ rc = misc_register(&test_fw_misc_device);
+ if (rc) {
++ __test_firmware_config_free();
+ kfree(test_fw_config);
+ pr_err("could not register misc device: %d\n", rc);
+ return rc;
+diff --git a/lib/test_hmm.c b/lib/test_hmm.c
+index c259842f6d443..a89cb4281c9dc 100644
+--- a/lib/test_hmm.c
++++ b/lib/test_hmm.c
+@@ -731,7 +731,7 @@ static int dmirror_exclusive(struct dmirror *dmirror,
+
+ mmap_read_lock(mm);
+ for (addr = start; addr < end; addr = next) {
+- unsigned long mapped;
++ unsigned long mapped = 0;
+ int i;
+
+ if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
+@@ -740,7 +740,13 @@ static int dmirror_exclusive(struct dmirror *dmirror,
+ next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
+
+ ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
+- mapped = dmirror_atomic_map(addr, next, pages, dmirror);
++ /*
++ * Do dmirror_atomic_map() iff all pages are marked for
++ * exclusive access to avoid accessing uninitialized
++ * fields of pages.
++ */
++ if (ret == (next - addr) >> PAGE_SHIFT)
++ mapped = dmirror_atomic_map(addr, next, pages, dmirror);
+ for (i = 0; i < ret; i++) {
+ if (pages[i]) {
+ unlock_page(pages[i]);
+@@ -1087,9 +1093,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
+ return 0;
+ }
+
++static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ unsigned long addr;
++
++ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
++ struct page *page;
++ int ret;
++
++ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
++ if (!page)
++ return -ENOMEM;
++
++ ret = vm_insert_page(vma, addr, page);
++ if (ret) {
++ __free_page(page);
++ return ret;
++ }
++ put_page(page);
++ }
++
++ return 0;
++}
++
+ static const struct file_operations dmirror_fops = {
+ .open = dmirror_fops_open,
+ .release = dmirror_fops_release,
++ .mmap = dmirror_fops_mmap,
+ .unlocked_ioctl = dmirror_fops_unlocked_ioctl,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE,
+diff --git a/lib/test_kasan.c b/lib/test_kasan.c
+index 8835e07845785..89f444cabd4a8 100644
+--- a/lib/test_kasan.c
++++ b/lib/test_kasan.c
+@@ -125,6 +125,7 @@ static void kmalloc_oob_right(struct kunit *test)
+ ptr = kmalloc(size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
++ OPTIMIZER_HIDE_VAR(ptr);
+ /*
+ * An unaligned access past the requested kmalloc size.
+ * Only generic KASAN can precisely detect these.
+@@ -153,6 +154,7 @@ static void kmalloc_oob_left(struct kunit *test)
+ ptr = kmalloc(size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
++ OPTIMIZER_HIDE_VAR(ptr);
+ KUNIT_EXPECT_KASAN_FAIL(test, *ptr = *(ptr - 1));
+ kfree(ptr);
+ }
+@@ -165,6 +167,7 @@ static void kmalloc_node_oob_right(struct kunit *test)
+ ptr = kmalloc_node(size, GFP_KERNEL, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
++ OPTIMIZER_HIDE_VAR(ptr);
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);
+ kfree(ptr);
+ }
+@@ -185,6 +188,7 @@ static void kmalloc_pagealloc_oob_right(struct kunit *test)
+ ptr = kmalloc(size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
++ OPTIMIZER_HIDE_VAR(ptr);
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0);
+
+ kfree(ptr);
+@@ -265,6 +269,7 @@ static void kmalloc_large_oob_right(struct kunit *test)
+ ptr = kmalloc(size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
++ OPTIMIZER_HIDE_VAR(ptr);
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
+ kfree(ptr);
+ }
+@@ -404,6 +409,8 @@ static void kmalloc_oob_16(struct kunit *test)
+ ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
+
++ OPTIMIZER_HIDE_VAR(ptr1);
++ OPTIMIZER_HIDE_VAR(ptr2);
+ KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);
+ kfree(ptr1);
+ kfree(ptr2);
+@@ -712,6 +719,8 @@ static void ksize_unpoisons_memory(struct kunit *test)
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+ real_size = ksize(ptr);
+
++ OPTIMIZER_HIDE_VAR(ptr);
++
+ /* This access shouldn't trigger a KASAN report. */
+ ptr[size] = 'x';
+
+@@ -734,6 +743,7 @@ static void ksize_uaf(struct kunit *test)
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+ kfree(ptr);
+
++ OPTIMIZER_HIDE_VAR(ptr);
+ KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr));
+ KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
+ KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]);
+diff --git a/lib/test_kmod.c b/lib/test_kmod.c
+index ce15893914131..cb800b1d0d99c 100644
+--- a/lib/test_kmod.c
++++ b/lib/test_kmod.c
+@@ -1149,6 +1149,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
+ if (ret) {
+ pr_err("could not register misc device: %d\n", ret);
+ free_test_dev_kmod(test_dev);
++ test_dev = NULL;
+ goto out;
+ }
+
+diff --git a/lib/test_lockup.c b/lib/test_lockup.c
+index 906b598740a7b..c3fd87d6c2dd0 100644
+--- a/lib/test_lockup.c
++++ b/lib/test_lockup.c
+@@ -417,9 +417,14 @@ static bool test_kernel_ptr(unsigned long addr, int size)
+ return false;
+
+ /* should be at least readable kernel address */
+- if (access_ok(ptr, 1) ||
+- access_ok(ptr + size - 1, 1) ||
+- get_kernel_nofault(buf, ptr) ||
++ if (!IS_ENABLED(CONFIG_ALTERNATE_USER_ADDRESS_SPACE) &&
++ (access_ok((void __user *)ptr, 1) ||
++ access_ok((void __user *)ptr + size - 1, 1))) {
++ pr_err("user space ptr invalid in kernel: %#lx\n", addr);
++ return true;
++ }
++
++ if (get_kernel_nofault(buf, ptr) ||
+ get_kernel_nofault(buf, ptr + size - 1)) {
+ pr_err("invalid kernel ptr: %#lx\n", addr);
+ return true;
+diff --git a/lib/test_meminit.c b/lib/test_meminit.c
+index e4f706a404b3a..75638404ed573 100644
+--- a/lib/test_meminit.c
++++ b/lib/test_meminit.c
+@@ -86,7 +86,7 @@ static int __init test_pages(int *total_failures)
+ int failures = 0, num_tests = 0;
+ int i;
+
+- for (i = 0; i < 10; i++)
++ for (i = 0; i <= MAX_ORDER; i++)
+ num_tests += do_alloc_pages_order(i, &failures);
+
+ REPORT_FAILURES_IN_FN();
+@@ -337,6 +337,7 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
+ if (num)
+ kmem_cache_free_bulk(c, num, objects);
+ }
++ kmem_cache_destroy(c);
+ *total_failures += fail;
+ return 1;
+ }
+diff --git a/lib/test_overflow.c b/lib/test_overflow.c
+index 7a4b6f6c5473c..7a5a5738d2d21 100644
+--- a/lib/test_overflow.c
++++ b/lib/test_overflow.c
+@@ -588,12 +588,110 @@ static int __init test_overflow_allocation(void)
+ return err;
+ }
+
++struct __test_flex_array {
++ unsigned long flags;
++ size_t count;
++ unsigned long data[];
++};
++
++static int __init test_overflow_size_helpers(void)
++{
++ struct __test_flex_array *obj;
++ int count = 0;
++ int err = 0;
++ int var;
++
++#define check_one_size_helper(expected, func, args...) ({ \
++ bool __failure = false; \
++ size_t _r; \
++ \
++ _r = func(args); \
++ if (_r != (expected)) { \
++ pr_warn("expected " #func "(" #args ") " \
++ "to return %zu but got %zu instead\n", \
++ (size_t)(expected), _r); \
++ __failure = true; \
++ } \
++ count++; \
++ __failure; \
++})
++
++ var = 4;
++ err |= check_one_size_helper(20, size_mul, var++, 5);
++ err |= check_one_size_helper(20, size_mul, 4, var++);
++ err |= check_one_size_helper(0, size_mul, 0, 3);
++ err |= check_one_size_helper(0, size_mul, 3, 0);
++ err |= check_one_size_helper(6, size_mul, 2, 3);
++ err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 1);
++ err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 3);
++ err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3);
++
++ var = 4;
++ err |= check_one_size_helper(9, size_add, var++, 5);
++ err |= check_one_size_helper(9, size_add, 4, var++);
++ err |= check_one_size_helper(9, size_add, 9, 0);
++ err |= check_one_size_helper(9, size_add, 0, 9);
++ err |= check_one_size_helper(5, size_add, 2, 3);
++ err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 1);
++ err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 3);
++ err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3);
++
++ var = 4;
++ err |= check_one_size_helper(1, size_sub, var--, 3);
++ err |= check_one_size_helper(1, size_sub, 4, var--);
++ err |= check_one_size_helper(1, size_sub, 3, 2);
++ err |= check_one_size_helper(9, size_sub, 9, 0);
++ err |= check_one_size_helper(SIZE_MAX, size_sub, 9, -3);
++ err |= check_one_size_helper(SIZE_MAX, size_sub, 0, 9);
++ err |= check_one_size_helper(SIZE_MAX, size_sub, 2, 3);
++ err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 0);
++ err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10);
++ err |= check_one_size_helper(SIZE_MAX, size_sub, 0, SIZE_MAX);
++ err |= check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX);
++ err |= check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1, 1);
++ err |= check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1, 3);
++ err |= check_one_size_helper(1, size_sub, SIZE_MAX - 1, -3);
++
++ var = 4;
++ err |= check_one_size_helper(4 * sizeof(*obj->data),
++ flex_array_size, obj, data, var++);
++ err |= check_one_size_helper(5 * sizeof(*obj->data),
++ flex_array_size, obj, data, var++);
++ err |= check_one_size_helper(0, flex_array_size, obj, data, 0);
++ err |= check_one_size_helper(sizeof(*obj->data),
++ flex_array_size, obj, data, 1);
++ err |= check_one_size_helper(7 * sizeof(*obj->data),
++ flex_array_size, obj, data, 7);
++ err |= check_one_size_helper(SIZE_MAX,
++ flex_array_size, obj, data, -1);
++ err |= check_one_size_helper(SIZE_MAX,
++ flex_array_size, obj, data, SIZE_MAX - 4);
++
++ var = 4;
++ err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)),
++ struct_size, obj, data, var++);
++ err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)),
++ struct_size, obj, data, var++);
++ err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0);
++ err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data),
++ struct_size, obj, data, 1);
++ err |= check_one_size_helper(SIZE_MAX,
++ struct_size, obj, data, -3);
++ err |= check_one_size_helper(SIZE_MAX,
++ struct_size, obj, data, SIZE_MAX - 3);
++
++ pr_info("%d overflow size helper tests finished\n", count);
++
++ return err;
++}
++
+ static int __init test_module_init(void)
+ {
+ int err = 0;
+
+ err |= test_overflow_calculation();
+ err |= test_overflow_shift();
++ err |= test_overflow_size_helpers();
+ err |= test_overflow_allocation();
+
+ if (err) {
+diff --git a/lib/test_scanf.c b/lib/test_scanf.c
+index b620cf7de5035..a2707af2951ab 100644
+--- a/lib/test_scanf.c
++++ b/lib/test_scanf.c
+@@ -606,7 +606,7 @@ static void __init numbers_slice(void)
+ #define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn) \
+ do { \
+ const T expect[2] = { expect0, expect1 }; \
+- T result[2] = {~expect[0], ~expect[1]}; \
++ T result[2] = { (T)~expect[0], (T)~expect[1] }; \
+ \
+ _test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]); \
+ } while (0)
+diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
+index 7e7bbd0f3fd27..2062be1f2e80f 100644
+--- a/lib/test_ubsan.c
++++ b/lib/test_ubsan.c
+@@ -79,15 +79,6 @@ static void test_ubsan_load_invalid_value(void)
+ eval2 = eval;
+ }
+
+-static void test_ubsan_null_ptr_deref(void)
+-{
+- volatile int *ptr = NULL;
+- int val;
+-
+- UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
+- val = *ptr;
+-}
+-
+ static void test_ubsan_misaligned_access(void)
+ {
+ volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
+@@ -98,29 +89,16 @@ static void test_ubsan_misaligned_access(void)
+ *ptr = val;
+ }
+
+-static void test_ubsan_object_size_mismatch(void)
+-{
+- /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
+- volatile int val __aligned(8) = 4;
+- volatile long long *ptr, val2;
+-
+- UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
+- ptr = (long long *)&val;
+- val2 = *ptr;
+-}
+-
+ static const test_ubsan_fp test_ubsan_array[] = {
+ test_ubsan_shift_out_of_bounds,
+ test_ubsan_out_of_bounds,
+ test_ubsan_load_invalid_value,
+ test_ubsan_misaligned_access,
+- test_ubsan_object_size_mismatch,
+ };
+
+ /* Excluded because they Oops the module. */
+ static const test_ubsan_fp skip_ubsan_array[] = {
+ test_ubsan_divrem_overflow,
+- test_ubsan_null_ptr_deref,
+ };
+
+ static int __init test_ubsan_init(void)
+diff --git a/lib/test_xarray.c b/lib/test_xarray.c
+index 8b1c318189ce8..e77d4856442c3 100644
+--- a/lib/test_xarray.c
++++ b/lib/test_xarray.c
+@@ -1463,6 +1463,25 @@ unlock:
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+
++static noinline void check_create_range_5(struct xarray *xa,
++ unsigned long index, unsigned int order)
++{
++ XA_STATE_ORDER(xas, xa, index, order);
++ unsigned int i;
++
++ xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL);
++
++ for (i = 0; i < order + 10; i++) {
++ do {
++ xas_lock(&xas);
++ xas_create_range(&xas);
++ xas_unlock(&xas);
++ } while (xas_nomem(&xas, GFP_KERNEL));
++ }
++
++ xa_destroy(xa);
++}
++
+ static noinline void check_create_range(struct xarray *xa)
+ {
+ unsigned int order;
+@@ -1490,6 +1509,9 @@ static noinline void check_create_range(struct xarray *xa)
+ check_create_range_4(xa, (3U << order) + 1, order);
+ check_create_range_4(xa, (3U << order) - 1, order);
+ check_create_range_4(xa, (1U << 24) + 1, order);
++
++ check_create_range_5(xa, 0, order);
++ check_create_range_5(xa, (1U << order), order);
+ }
+
+ check_create_range_3();
+diff --git a/lib/ts_bm.c b/lib/ts_bm.c
+index 4cf250031f0f0..352ae837e0317 100644
+--- a/lib/ts_bm.c
++++ b/lib/ts_bm.c
+@@ -60,10 +60,12 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
+ struct ts_bm *bm = ts_config_priv(conf);
+ unsigned int i, text_len, consumed = state->offset;
+ const u8 *text;
+- int shift = bm->patlen - 1, bs;
++ int bs;
+ const u8 icase = conf->flags & TS_IGNORECASE;
+
+ for (;;) {
++ int shift = bm->patlen - 1;
++
+ text_len = conf->get_next_block(consumed, &text, conf, state);
+
+ if (unlikely(text_len == 0))
+diff --git a/lib/ubsan.c b/lib/ubsan.c
+index bdc380ff5d5c7..60c7099857a05 100644
+--- a/lib/ubsan.c
++++ b/lib/ubsan.c
+@@ -154,16 +154,7 @@ static void ubsan_epilogue(void)
+
+ current->in_ubsan--;
+
+- if (panic_on_warn) {
+- /*
+- * This thread may hit another WARN() in the panic path.
+- * Resetting this prevents additional WARN() from panicking the
+- * system on this thread. Other threads are blocked by the
+- * panic_mutex in panic().
+- */
+- panic_on_warn = 0;
+- panic("panic_on_warn set ...\n");
+- }
++ check_panic_on_warn("UBSAN");
+ }
+
+ void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs)
+diff --git a/lib/usercopy.c b/lib/usercopy.c
+index 7413dd300516e..7ee63df042d7e 100644
+--- a/lib/usercopy.c
++++ b/lib/usercopy.c
+@@ -3,6 +3,7 @@
+ #include <linux/fault-inject-usercopy.h>
+ #include <linux/instrumented.h>
+ #include <linux/uaccess.h>
++#include <linux/nospec.h>
+
+ /* out-of-line parts */
+
+@@ -12,6 +13,12 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n
+ unsigned long res = n;
+ might_fault();
+ if (!should_fail_usercopy() && likely(access_ok(from, n))) {
++ /*
++ * Ensure that bad access_ok() speculation will not
++ * lead to nasty side effects *after* the copy is
++ * finished:
++ */
++ barrier_nospec();
+ instrument_copy_from_user(to, from, n);
+ res = raw_copy_from_user(to, from, n);
+ }
+diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile
+index c415a685d61bb..e814061d6aa01 100644
+--- a/lib/vdso/Makefile
++++ b/lib/vdso/Makefile
+@@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set)
+ endif
+
+ quiet_cmd_vdso_check = VDSOCHK $@
+- cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \
++ cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \
+ then (echo >&2 "$@: dynamic relocations are not supported"; \
+ rm -f $@; /bin/false); fi
+diff --git a/lib/vsprintf.c b/lib/vsprintf.c
+index d7ad44f2c8f57..a60f0bb2ea902 100644
+--- a/lib/vsprintf.c
++++ b/lib/vsprintf.c
+@@ -49,12 +49,16 @@
+
+ #include <asm/page.h> /* for PAGE_SIZE */
+ #include <asm/byteorder.h> /* cpu_to_le16 */
++#include <asm/unaligned.h>
+
+ #include <linux/string_helpers.h>
+ #include "kstrtox.h"
+
+-static unsigned long long simple_strntoull(const char *startp, size_t max_chars,
+- char **endp, unsigned int base)
++/* Disable pointer hashing if requested */
++bool no_hash_pointers __ro_after_init;
++EXPORT_SYMBOL_GPL(no_hash_pointers);
++
++static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base)
+ {
+ const char *cp;
+ unsigned long long result = 0ULL;
+@@ -757,14 +761,16 @@ static void enable_ptr_key_workfn(struct work_struct *work)
+
+ static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+
+-static void fill_random_ptr_key(struct random_ready_callback *unused)
++static int fill_random_ptr_key(struct notifier_block *nb,
++ unsigned long action, void *data)
+ {
+ /* This may be in an interrupt handler. */
+ queue_work(system_unbound_wq, &enable_ptr_key_work);
++ return 0;
+ }
+
+-static struct random_ready_callback random_ready = {
+- .func = fill_random_ptr_key
++static struct notifier_block random_ready = {
++ .notifier_call = fill_random_ptr_key
+ };
+
+ static int __init initialize_ptr_random(void)
+@@ -778,7 +784,7 @@ static int __init initialize_ptr_random(void)
+ return 0;
+ }
+
+- ret = add_random_ready_callback(&random_ready);
++ ret = register_random_ready_notifier(&random_ready);
+ if (!ret) {
+ return 0;
+ } else if (ret == -EALREADY) {
+@@ -848,6 +854,19 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
+ return pointer_string(buf, end, (const void *)hashval, spec);
+ }
+
++static char *default_pointer(char *buf, char *end, const void *ptr,
++ struct printf_spec spec)
++{
++ /*
++ * default is to _not_ leak addresses, so hash before printing,
++ * unless no_hash_pointers is specified on the command line.
++ */
++ if (unlikely(no_hash_pointers))
++ return pointer_string(buf, end, ptr, spec);
++
++ return ptr_to_id(buf, end, ptr, spec);
++}
++
+ int kptr_restrict __read_mostly;
+
+ static noinline_for_stack
+@@ -857,7 +876,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
+ switch (kptr_restrict) {
+ case 0:
+ /* Handle as %p, hash and do _not_ leak addresses. */
+- return ptr_to_id(buf, end, ptr, spec);
++ return default_pointer(buf, end, ptr, spec);
+ case 1: {
+ const struct cred *cred;
+
+@@ -1771,7 +1790,7 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc,
+ char output[sizeof("0123 little-endian (0x01234567)")];
+ char *p = output;
+ unsigned int i;
+- u32 val;
++ u32 orig, val;
+
+ if (fmt[1] != 'c' || fmt[2] != 'c')
+ return error_string(buf, end, "(%p4?)", spec);
+@@ -1779,21 +1798,22 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc,
+ if (check_pointer(&buf, end, fourcc, spec))
+ return buf;
+
+- val = *fourcc & ~BIT(31);
++ orig = get_unaligned(fourcc);
++ val = orig & ~BIT(31);
+
+- for (i = 0; i < sizeof(*fourcc); i++) {
++ for (i = 0; i < sizeof(u32); i++) {
+ unsigned char c = val >> (i * 8);
+
+ /* Print non-control ASCII characters as-is, dot otherwise */
+ *p++ = isascii(c) && isprint(c) ? c : '.';
+ }
+
+- strcpy(p, *fourcc & BIT(31) ? " big-endian" : " little-endian");
++ strcpy(p, orig & BIT(31) ? " big-endian" : " little-endian");
+ p += strlen(p);
+
+ *p++ = ' ';
+ *p++ = '(';
+- p = special_hex_number(p, output + sizeof(output) - 2, *fourcc, sizeof(u32));
++ p = special_hex_number(p, output + sizeof(output) - 2, orig, sizeof(u32));
+ *p++ = ')';
+ *p = '\0';
+
+@@ -2225,10 +2245,6 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
+ return widen_string(buf, buf - buf_start, end, spec);
+ }
+
+-/* Disable pointer hashing if requested */
+-bool no_hash_pointers __ro_after_init;
+-EXPORT_SYMBOL_GPL(no_hash_pointers);
+-
+ int __init no_hash_pointers_enable(char *str)
+ {
+ if (no_hash_pointers)
+@@ -2457,7 +2473,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+ case 'e':
+ /* %pe with a non-ERR_PTR gets treated as plain %p */
+ if (!IS_ERR(ptr))
+- break;
++ return default_pointer(buf, end, ptr, spec);
+ return err_ptr(buf, end, ptr, spec);
+ case 'u':
+ case 'k':
+@@ -2467,16 +2483,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+ default:
+ return error_string(buf, end, "(einval)", spec);
+ }
++ default:
++ return default_pointer(buf, end, ptr, spec);
+ }
+-
+- /*
+- * default is to _not_ leak addresses, so hash before printing,
+- * unless no_hash_pointers is specified on the command line.
+- */
+- if (unlikely(no_hash_pointers))
+- return pointer_string(buf, end, ptr, spec);
+- else
+- return ptr_to_id(buf, end, ptr, spec);
+ }
+
+ /*
+diff --git a/lib/xarray.c b/lib/xarray.c
+index f5d8f54907b4f..96e2d7748e5aa 100644
+--- a/lib/xarray.c
++++ b/lib/xarray.c
+@@ -722,6 +722,8 @@ void xas_create_range(struct xa_state *xas)
+
+ for (;;) {
+ struct xa_node *node = xas->xa_node;
++ if (node->shift >= shift)
++ break;
+ xas->xa_node = xa_parent_locked(xas->xa, node);
+ xas->xa_offset = node->offset - 1;
+ if (node->offset != 0)
+@@ -1079,6 +1081,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order)
+ xa_mk_node(child));
+ if (xa_is_value(curr))
+ values--;
++ xas_update(xas, child);
+ } else {
+ unsigned int canon = offset - xas->xa_sibs;
+
+@@ -1093,6 +1096,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order)
+ } while (offset-- > xas->xa_offset);
+
+ node->nr_values += values;
++ xas_update(xas, node);
+ }
+ EXPORT_SYMBOL_GPL(xas_split);
+ #endif
+diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
+index 7a6781e3f47b6..d548cf0e59fe6 100644
+--- a/lib/xz/xz_dec_lzma2.c
++++ b/lib/xz/xz_dec_lzma2.c
+@@ -387,7 +387,14 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
+
+ *left -= copy_size;
+
+- memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
++ /*
++ * If doing in-place decompression in single-call mode and the
++ * uncompressed size of the file is larger than the caller
++ * thought (i.e. it is invalid input!), the buffers below may
++ * overlap and cause undefined behavior with memcpy().
++ * With valid inputs memcpy() would be fine here.
++ */
++ memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
+ dict->pos += copy_size;
+
+ if (dict->full < dict->pos)
+@@ -397,7 +404,11 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
+ if (dict->pos == dict->end)
+ dict->pos = 0;
+
+- memcpy(b->out + b->out_pos, b->in + b->in_pos,
++ /*
++ * Like above but for multi-call mode: use memmove()
++ * to avoid undefined behavior with invalid input.
++ */
++ memmove(b->out + b->out_pos, b->in + b->in_pos,
+ copy_size);
+ }
+
+@@ -421,6 +432,12 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
+ if (dict->pos == dict->end)
+ dict->pos = 0;
+
++ /*
++ * These buffers cannot overlap even if doing in-place
++ * decompression because in multi-call mode dict->buf
++ * has been allocated by us in this file; it's not
++ * provided by the caller like in single-call mode.
++ */
+ memcpy(b->out + b->out_pos, dict->buf + dict->start,
+ copy_size);
+ }
+diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c
+index fea86deaaa01d..683570b93a8c4 100644
+--- a/lib/xz/xz_dec_stream.c
++++ b/lib/xz/xz_dec_stream.c
+@@ -402,12 +402,12 @@ static enum xz_ret dec_stream_header(struct xz_dec *s)
+ * we will accept other check types too, but then the check won't
+ * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
+ */
++ if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX)
++ return XZ_OPTIONS_ERROR;
++
+ s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
+
+ #ifdef XZ_DEC_ANY_CHECK
+- if (s->check_type > XZ_CHECK_MAX)
+- return XZ_OPTIONS_ERROR;
+-
+ if (s->check_type > XZ_CHECK_CRC32)
+ return XZ_UNSUPPORTED_CHECK;
+ #else
+diff --git a/mm/Kconfig b/mm/Kconfig
+index d16ba9249bc53..c048dea7e3420 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -887,6 +887,9 @@ config MAPPING_DIRTY_HELPERS
+ config KMAP_LOCAL
+ bool
+
++config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
++ bool
++
+ # struct io_mapping based helper. Selected by drivers that need them
+ config IO_MAPPING
+ bool
+diff --git a/mm/backing-dev.c b/mm/backing-dev.c
+index 4a9d4e27d0d9b..afdd132768455 100644
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -229,20 +229,13 @@ static __init int bdi_class_init(void)
+ }
+ postcore_initcall(bdi_class_init);
+
+-static int bdi_init(struct backing_dev_info *bdi);
+-
+ static int __init default_bdi_init(void)
+ {
+- int err;
+-
+ bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
+ WQ_SYSFS, 0);
+ if (!bdi_wq)
+ return -ENOMEM;
+-
+- err = bdi_init(&noop_backing_dev_info);
+-
+- return err;
++ return 0;
+ }
+ subsys_initcall(default_bdi_init);
+
+@@ -265,10 +258,10 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
+ unsigned long timeout;
+
+ timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (test_bit(WB_registered, &wb->state))
+ queue_delayed_work(bdi_wq, &wb->dwork, timeout);
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ }
+
+ static void wb_update_bandwidth_workfn(struct work_struct *work)
+@@ -344,12 +337,12 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
+ static void wb_shutdown(struct bdi_writeback *wb)
+ {
+ /* Make sure nobody queues further work */
+- spin_lock_bh(&wb->work_lock);
++ spin_lock_irq(&wb->work_lock);
+ if (!test_and_clear_bit(WB_registered, &wb->state)) {
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+ return;
+ }
+- spin_unlock_bh(&wb->work_lock);
++ spin_unlock_irq(&wb->work_lock);
+
+ cgwb_remove_from_bdi_list(wb);
+ /*
+@@ -392,6 +385,15 @@ static LIST_HEAD(offline_cgwbs);
+ static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
+ static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
+
++static void cgwb_free_rcu(struct rcu_head *rcu_head)
++{
++ struct bdi_writeback *wb = container_of(rcu_head,
++ struct bdi_writeback, rcu);
++
++ percpu_ref_exit(&wb->refcnt);
++ kfree(wb);
++}
++
+ static void cgwb_release_workfn(struct work_struct *work)
+ {
+ struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
+@@ -414,10 +416,9 @@ static void cgwb_release_workfn(struct work_struct *work)
+ list_del(&wb->offline_node);
+ spin_unlock_irq(&cgwb_lock);
+
+- percpu_ref_exit(&wb->refcnt);
+ wb_exit(wb);
+ WARN_ON_ONCE(!list_empty(&wb->b_attached));
+- kfree_rcu(wb, rcu);
++ call_rcu(&wb->rcu, cgwb_free_rcu);
+ }
+
+ static void cgwb_release(struct percpu_ref *refcnt)
+@@ -784,7 +785,7 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
+
+ #endif /* CONFIG_CGROUP_WRITEBACK */
+
+-static int bdi_init(struct backing_dev_info *bdi)
++int bdi_init(struct backing_dev_info *bdi)
+ {
+ int ret;
+
+@@ -947,6 +948,13 @@ void bdi_unregister(struct backing_dev_info *bdi)
+ wb_shutdown(&bdi->wb);
+ cgwb_bdi_unregister(bdi);
+
++ /*
++ * If this BDI's min ratio has been set, use bdi_set_min_ratio() to
++ * update the global bdi_min_ratio.
++ */
++ if (bdi->min_ratio)
++ bdi_set_min_ratio(bdi, 0);
++
+ if (bdi->dev) {
+ bdi_debug_unregister(bdi);
+ device_unregister(bdi->dev);
+diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
+index f03f42f426f69..8655492159a5f 100644
+--- a/mm/bootmem_info.c
++++ b/mm/bootmem_info.c
+@@ -12,6 +12,7 @@
+ #include <linux/memblock.h>
+ #include <linux/bootmem_info.h>
+ #include <linux/memory_hotplug.h>
++#include <linux/kmemleak.h>
+
+ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
+ {
+@@ -34,6 +35,7 @@ void put_page_bootmem(struct page *page)
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ INIT_LIST_HEAD(&page->lru);
++ kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
+ free_reserved_page(page);
+ }
+ }
+diff --git a/mm/cma.c b/mm/cma.c
+index 995e15480937f..a972c3440c404 100644
+--- a/mm/cma.c
++++ b/mm/cma.c
+@@ -37,6 +37,7 @@
+
+ struct cma cma_areas[MAX_CMA_AREAS];
+ unsigned cma_area_count;
++static DEFINE_MUTEX(cma_mutex);
+
+ phys_addr_t cma_get_base(const struct cma *cma)
+ {
+@@ -471,9 +472,10 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
+ spin_unlock_irq(&cma->lock);
+
+ pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
++ mutex_lock(&cma_mutex);
+ ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
+ GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
+-
++ mutex_unlock(&cma_mutex);
+ if (ret == 0) {
+ page = pfn_to_page(pfn);
+ break;
+diff --git a/mm/compaction.c b/mm/compaction.c
+index bfc93da1c2c7c..89517ad5d6a0b 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -779,7 +779,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
+ * @cc: Compaction control structure.
+ * @low_pfn: The first PFN to isolate
+ * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock
+- * @isolate_mode: Isolation mode to be used.
++ * @mode: Isolation mode to be used.
+ *
+ * Isolate all pages that can be migrated from the range specified by
+ * [low_pfn, end_pfn). The range is expected to be within same pageblock.
+@@ -792,7 +792,7 @@ static bool too_many_isolated(pg_data_t *pgdat)
+ */
+ static int
+ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+- unsigned long end_pfn, isolate_mode_t isolate_mode)
++ unsigned long end_pfn, isolate_mode_t mode)
+ {
+ pg_data_t *pgdat = cc->zone->zone_pgdat;
+ unsigned long nr_scanned = 0, nr_isolated = 0;
+@@ -800,6 +800,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ unsigned long flags = 0;
+ struct lruvec *locked = NULL;
+ struct page *page = NULL, *valid_page = NULL;
++ struct address_space *mapping;
+ unsigned long start_pfn = low_pfn;
+ bool skip_on_failure = false;
+ unsigned long next_skip_pfn = 0;
+@@ -984,40 +985,76 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
+ locked = NULL;
+ }
+
+- if (!isolate_movable_page(page, isolate_mode))
++ if (!isolate_movable_page(page, mode))
+ goto isolate_success;
+ }
+
+ goto isolate_fail;
+ }
+
++ /*
++ * Be careful not to clear PageLRU until after we're
++ * sure the page is not being freed elsewhere -- the
++ * page release code relies on it.
++ */
++ if (unlikely(!get_page_unless_zero(page)))
++ goto isolate_fail;
++
+ /*
+ * Migration will fail if an anonymous page is pinned in memory,
+ * so avoid taking lru_lock and isolating it unnecessarily in an
+ * admittedly racy check.
+ */
+- if (!page_mapping(page) &&
+- page_count(page) > page_mapcount(page))
+- goto isolate_fail;
++ mapping = page_mapping(page);
++ if (!mapping && (page_count(page) - 1) > total_mapcount(page))
++ goto isolate_fail_put;
+
+ /*
+ * Only allow to migrate anonymous pages in GFP_NOFS context
+ * because those do not depend on fs locks.
+ */
+- if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
+- goto isolate_fail;
++ if (!(cc->gfp_mask & __GFP_FS) && mapping)
++ goto isolate_fail_put;
++
++ /* Only take pages on LRU: a check now makes later tests safe */
++ if (!PageLRU(page))
++ goto isolate_fail_put;
++
++ /* Compaction might skip unevictable pages but CMA takes them */
++ if (!(mode & ISOLATE_UNEVICTABLE) && PageUnevictable(page))
++ goto isolate_fail_put;
+
+ /*
+- * Be careful not to clear PageLRU until after we're
+- * sure the page is not being freed elsewhere -- the
+- * page release code relies on it.
++ * To minimise LRU disruption, the caller can indicate with
++ * ISOLATE_ASYNC_MIGRATE that it only wants to isolate pages
++ * it will be able to migrate without blocking - clean pages
++ * for the most part. PageWriteback would require blocking.
+ */
+- if (unlikely(!get_page_unless_zero(page)))
+- goto isolate_fail;
+-
+- if (!__isolate_lru_page_prepare(page, isolate_mode))
++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageWriteback(page))
+ goto isolate_fail_put;
+
++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageDirty(page)) {
++ bool migrate_dirty;
++
++ /*
++ * Only pages without mappings or that have a
++ * ->migratepage callback are possible to migrate
++ * without blocking. However, we can be racing with
++ * truncation so it's necessary to lock the page
++ * to stabilise the mapping as truncation holds
++ * the page lock until after the page is removed
++ * from the page cache.
++ */
++ if (!trylock_page(page))
++ goto isolate_fail_put;
++
++ mapping = page_mapping(page);
++ migrate_dirty = !mapping || mapping->a_ops->migratepage;
++ unlock_page(page);
++ if (!migrate_dirty)
++ goto isolate_fail_put;
++ }
++
+ /* Try isolate the page */
+ if (!TestClearPageLRU(page))
+ goto isolate_fail_put;
+@@ -1313,7 +1350,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage)
+ }
+
+ static void
+-fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
++fast_isolate_around(struct compact_control *cc, unsigned long pfn)
+ {
+ unsigned long start_pfn, end_pfn;
+ struct page *page;
+@@ -1334,21 +1371,13 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
+ if (!page)
+ return;
+
+- /* Scan before */
+- if (start_pfn != pfn) {
+- isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
+- if (cc->nr_freepages >= cc->nr_migratepages)
+- return;
+- }
+-
+- /* Scan after */
+- start_pfn = pfn + nr_isolated;
+- if (start_pfn < end_pfn)
+- isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
++ isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
+
+ /* Skip this pageblock in the future as it's full or nearly full */
+ if (cc->nr_freepages < cc->nr_migratepages)
+ set_pageblock_skip(page);
++
++ return;
+ }
+
+ /* Search orders in round-robin fashion */
+@@ -1524,7 +1553,7 @@ fast_isolate_freepages(struct compact_control *cc)
+ return cc->free_pfn;
+
+ low_pfn = page_to_pfn(page);
+- fast_isolate_around(cc, low_pfn, nr_isolated);
++ fast_isolate_around(cc, low_pfn);
+ return low_pfn;
+ }
+
+@@ -1815,6 +1844,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
+
+ update_fast_start_pfn(cc, free_pfn);
+ pfn = pageblock_start_pfn(free_pfn);
++ if (pfn < cc->zone->zone_start_pfn)
++ pfn = cc->zone->zone_start_pfn;
+ cc->fast_search_fail = 0;
+ found_block = true;
+ set_pageblock_skip(freepage);
+diff --git a/mm/damon/core.c b/mm/damon/core.c
+index 30e9211f494a7..7a4912d6e65f2 100644
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -357,6 +357,15 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
+ return err;
+ }
+
++static void kdamond_usleep(unsigned long usecs)
++{
++ /* See Documentation/timers/timers-howto.rst for the thresholds */
++ if (usecs > 20 * 1000)
++ schedule_timeout_idle(usecs_to_jiffies(usecs));
++ else
++ usleep_idle_range(usecs, usecs + 1);
++}
++
+ /*
+ * __damon_stop() - Stops monitoring of given context.
+ * @ctx: monitoring context
+@@ -370,8 +379,7 @@ static int __damon_stop(struct damon_ctx *ctx)
+ ctx->kdamond_stop = true;
+ mutex_unlock(&ctx->kdamond_lock);
+ while (damon_kdamond_running(ctx))
+- usleep_range(ctx->sample_interval,
+- ctx->sample_interval * 2);
++ kdamond_usleep(ctx->sample_interval);
+ return 0;
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+@@ -670,7 +678,7 @@ static int kdamond_fn(void *data)
+ ctx->callback.after_sampling(ctx))
+ set_kdamond_stop(ctx);
+
+- usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
++ kdamond_usleep(ctx->sample_interval);
+
+ if (ctx->primitive.check_accesses)
+ max_nr_accesses = ctx->primitive.check_accesses(ctx);
+diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
+index faee070977d80..b039fd1f8a1db 100644
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -32,7 +32,7 @@ static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos)
+ if (*ppos)
+ return ERR_PTR(-EINVAL);
+
+- kbuf = kmalloc(count + 1, GFP_KERNEL);
++ kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return ERR_PTR(-ENOMEM);
+
+@@ -185,6 +185,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+ {
+ struct damon_ctx *ctx = file->private_data;
++ struct damon_target *t, *next_t;
+ char *kbuf, *nrs;
+ unsigned long *targets;
+ ssize_t nr_targets;
+@@ -224,6 +225,13 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
+ goto unlock_out;
+ }
+
++ /* remove previously set targets */
++ damon_for_each_target_safe(t, next_t, ctx) {
++ if (targetid_is_pid(ctx))
++ put_pid((struct pid *)t->id);
++ damon_destroy_target(t);
++ }
++
+ err = damon_set_targets(ctx, targets, nr_targets);
+ if (err) {
+ if (targetid_is_pid(ctx))
+@@ -247,7 +255,7 @@ static ssize_t dbgfs_kdamond_pid_read(struct file *file,
+ char *kbuf;
+ ssize_t len;
+
+- kbuf = kmalloc(count, GFP_KERNEL);
++ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return -ENOMEM;
+
+@@ -309,10 +317,12 @@ static int dbgfs_before_terminate(struct damon_ctx *ctx)
+ if (!targetid_is_pid(ctx))
+ return 0;
+
++ mutex_lock(&ctx->kdamond_lock);
+ damon_for_each_target_safe(t, next, ctx) {
+ put_pid((struct pid *)t->id);
+ damon_destroy_target(t);
+ }
++ mutex_unlock(&ctx->kdamond_lock);
+ return 0;
+ }
+
+@@ -366,6 +376,9 @@ static int dbgfs_mk_context(char *name)
+ return -ENOENT;
+
+ new_dir = debugfs_create_dir(name, root);
++ /* Below check is required for a potential duplicated name case */
++ if (IS_ERR(new_dir))
++ return PTR_ERR(new_dir);
+ dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
+
+ new_ctx = dbgfs_new_ctx();
+@@ -428,8 +441,10 @@ out:
+ static int dbgfs_rm_context(char *name)
+ {
+ struct dentry *root, *dir, **new_dirs;
++ struct inode *inode;
+ struct damon_ctx **new_ctxs;
+ int i, j;
++ int ret = 0;
+
+ if (damon_nr_running_ctxs())
+ return -EBUSY;
+@@ -442,16 +457,24 @@ static int dbgfs_rm_context(char *name)
+ if (!dir)
+ return -ENOENT;
+
++ inode = d_inode(dir);
++ if (!S_ISDIR(inode->i_mode)) {
++ ret = -EINVAL;
++ goto out_dput;
++ }
++
+ new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs),
+ GFP_KERNEL);
+- if (!new_dirs)
+- return -ENOMEM;
++ if (!new_dirs) {
++ ret = -ENOMEM;
++ goto out_dput;
++ }
+
+ new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs),
+ GFP_KERNEL);
+ if (!new_ctxs) {
+- kfree(new_dirs);
+- return -ENOMEM;
++ ret = -ENOMEM;
++ goto out_new_dirs;
+ }
+
+ for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) {
+@@ -471,7 +494,13 @@ static int dbgfs_rm_context(char *name)
+ dbgfs_ctxs = new_ctxs;
+ dbgfs_nr_ctxs--;
+
+- return 0;
++ goto out_dput;
++
++out_new_dirs:
++ kfree(new_dirs);
++out_dput:
++ dput(dir);
++ return ret;
+ }
+
+ static ssize_t dbgfs_rm_context_write(struct file *file,
+@@ -538,12 +567,14 @@ static ssize_t dbgfs_monitor_on_write(struct file *file,
+ return -EINVAL;
+ }
+
++ mutex_lock(&damon_dbgfs_lock);
+ if (!strncmp(kbuf, "on", count))
+ err = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs);
+ else if (!strncmp(kbuf, "off", count))
+ err = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs);
+ else
+ err = -EINVAL;
++ mutex_unlock(&damon_dbgfs_lock);
+
+ if (err)
+ ret = err;
+@@ -596,15 +627,16 @@ static int __init __damon_dbgfs_init(void)
+
+ static int __init damon_dbgfs_init(void)
+ {
+- int rc;
++ int rc = -ENOMEM;
+
++ mutex_lock(&damon_dbgfs_lock);
+ dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL);
+ if (!dbgfs_ctxs)
+- return -ENOMEM;
++ goto out;
+ dbgfs_ctxs[0] = dbgfs_new_ctx();
+ if (!dbgfs_ctxs[0]) {
+ kfree(dbgfs_ctxs);
+- return -ENOMEM;
++ goto out;
+ }
+ dbgfs_nr_ctxs = 1;
+
+@@ -615,6 +647,8 @@ static int __init damon_dbgfs_init(void)
+ pr_err("%s: dbgfs init failed\n", __func__);
+ }
+
++out:
++ mutex_unlock(&damon_dbgfs_lock);
+ return rc;
+ }
+
+diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
+index 58c1fb2aafa91..6ad96da15081f 100644
+--- a/mm/damon/vaddr.c
++++ b/mm/damon/vaddr.c
+@@ -393,7 +393,7 @@ static struct page *damon_get_page(unsigned long pfn)
+ return page;
+ }
+
+-static void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm,
++static void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma,
+ unsigned long addr)
+ {
+ bool referenced = false;
+@@ -402,13 +402,11 @@ static void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm,
+ if (!page)
+ return;
+
+- if (pte_young(*pte)) {
++ if (ptep_test_and_clear_young(vma, addr, pte))
+ referenced = true;
+- *pte = pte_mkold(*pte);
+- }
+
+ #ifdef CONFIG_MMU_NOTIFIER
+- if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE))
++ if (mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE))
+ referenced = true;
+ #endif /* CONFIG_MMU_NOTIFIER */
+
+@@ -419,7 +417,7 @@ static void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm,
+ put_page(page);
+ }
+
+-static void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm,
++static void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma,
+ unsigned long addr)
+ {
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+@@ -429,13 +427,11 @@ static void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm,
+ if (!page)
+ return;
+
+- if (pmd_young(*pmd)) {
++ if (pmdp_test_and_clear_young(vma, addr, pmd))
+ referenced = true;
+- *pmd = pmd_mkold(*pmd);
+- }
+
+ #ifdef CONFIG_MMU_NOTIFIER
+- if (mmu_notifier_clear_young(mm, addr,
++ if (mmu_notifier_clear_young(vma->vm_mm, addr,
+ addr + ((1UL) << HPAGE_PMD_SHIFT)))
+ referenced = true;
+ #endif /* CONFIG_MMU_NOTIFIER */
+@@ -456,8 +452,13 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
+
+ if (pmd_huge(*pmd)) {
+ ptl = pmd_lock(walk->mm, pmd);
++ if (!pmd_present(*pmd)) {
++ spin_unlock(ptl);
++ return 0;
++ }
++
+ if (pmd_huge(*pmd)) {
+- damon_pmdp_mkold(pmd, walk->mm, addr);
++ damon_pmdp_mkold(pmd, walk->vma, addr);
+ spin_unlock(ptl);
+ return 0;
+ }
+@@ -469,7 +470,7 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte_present(*pte))
+ goto out;
+- damon_ptep_mkold(pte, walk->mm, addr);
++ damon_ptep_mkold(pte, walk->vma, addr);
+ out:
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+@@ -530,6 +531,11 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_huge(*pmd)) {
+ ptl = pmd_lock(walk->mm, pmd);
++ if (!pmd_present(*pmd)) {
++ spin_unlock(ptl);
++ return 0;
++ }
++
+ if (!pmd_huge(*pmd)) {
+ spin_unlock(ptl);
+ goto regular_page;
+diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
+index 1403639302e48..718d0d3ad8c4e 100644
+--- a/mm/debug_vm_pgtable.c
++++ b/mm/debug_vm_pgtable.c
+@@ -171,6 +171,8 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
+ ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
+ pte = ptep_get(args->ptep);
+ WARN_ON(pte_young(pte));
++
++ ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
+ }
+
+ static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
+diff --git a/mm/filemap.c b/mm/filemap.c
+index dae481293b5d9..81e28722edfaf 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -90,7 +90,7 @@
+ * ->lock_page (filemap_fault, access_process_vm)
+ *
+ * ->i_rwsem (generic_perform_write)
+- * ->mmap_lock (fault_in_pages_readable->do_page_fault)
++ * ->mmap_lock (fault_in_readable->do_page_fault)
+ *
+ * bdi->wb.list_lock
+ * sb_lock (fs/fs-writeback.c)
+@@ -2090,10 +2090,13 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+
+ rcu_read_lock();
+ while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
++ unsigned long next_idx = xas.xa_index + 1;
++
+ if (!xa_is_value(page)) {
++ if (PageTransHuge(page))
++ next_idx = page->index + thp_nr_pages(page);
+ if (page->index < start)
+ goto put;
+- VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
+ if (page->index + thp_nr_pages(page) - 1 > end)
+ goto put;
+ if (!trylock_page(page))
+@@ -2112,13 +2115,11 @@ unlock:
+ put:
+ put_page(page);
+ next:
+- if (!xa_is_value(page) && PageTransHuge(page)) {
+- unsigned int nr_pages = thp_nr_pages(page);
+-
++ if (next_idx != xas.xa_index + 1) {
+ /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */
+- xas_set(&xas, page->index + nr_pages);
+- if (xas.xa_index < nr_pages)
++ if (next_idx < xas.xa_index)
+ break;
++ xas_set(&xas, next_idx);
+ }
+ }
+ rcu_read_unlock();
+@@ -2355,8 +2356,12 @@ static void filemap_get_read_batch(struct address_space *mapping,
+ break;
+ if (PageReadahead(head))
+ break;
+- xas.xa_index = head->index + thp_nr_pages(head) - 1;
+- xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
++ if (PageHead(head)) {
++ xas_set(&xas, head->index + thp_nr_pages(head));
++ /* Handle wrap correctly */
++ if (xas.xa_index - 1 >= max)
++ break;
++ }
+ continue;
+ put_page:
+ put_page(head);
+@@ -2533,18 +2538,19 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
+ struct page *page;
+ int err = 0;
+
++ /* "last_index" is the index of the page beyond the end of the read */
+ last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
+ retry:
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+- filemap_get_read_batch(mapping, index, last_index, pvec);
++ filemap_get_read_batch(mapping, index, last_index - 1, pvec);
+ if (!pagevec_count(pvec)) {
+ if (iocb->ki_flags & IOCB_NOIO)
+ return -EAGAIN;
+ page_cache_sync_readahead(mapping, ra, filp, index,
+ last_index - index);
+- filemap_get_read_batch(mapping, index, last_index, pvec);
++ filemap_get_read_batch(mapping, index, last_index - 1, pvec);
+ }
+ if (!pagevec_count(pvec)) {
+ if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
+@@ -3744,7 +3750,7 @@ ssize_t generic_perform_write(struct file *file,
+ unsigned long offset; /* Offset into pagecache page */
+ unsigned long bytes; /* Bytes to write to page */
+ size_t copied; /* Bytes copied from user */
+- void *fsdata;
++ void *fsdata = NULL;
+
+ offset = (pos & (PAGE_SIZE - 1));
+ bytes = min_t(unsigned long, PAGE_SIZE - offset,
+@@ -3757,7 +3763,7 @@ again:
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ */
+- if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
++ if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
+diff --git a/mm/gup.c b/mm/gup.c
+index 886d6148d3d03..0a1839b325747 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -124,8 +124,8 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
+ * considered failure, and furthermore, a likely bug in the caller, so a warning
+ * is also emitted.
+ */
+-struct page *try_grab_compound_head(struct page *page,
+- int refs, unsigned int flags)
++__maybe_unused struct page *try_grab_compound_head(struct page *page,
++ int refs, unsigned int flags)
+ {
+ if (flags & FOLL_GET)
+ return try_get_compound_head(page, refs);
+@@ -208,10 +208,35 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
+ */
+ bool __must_check try_grab_page(struct page *page, unsigned int flags)
+ {
+- if (!(flags & (FOLL_GET | FOLL_PIN)))
+- return true;
++ WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
+
+- return try_grab_compound_head(page, 1, flags);
++ if (flags & FOLL_GET)
++ return try_get_page(page);
++ else if (flags & FOLL_PIN) {
++ int refs = 1;
++
++ page = compound_head(page);
++
++ if (WARN_ON_ONCE(page_ref_count(page) <= 0))
++ return false;
++
++ if (hpage_pincount_available(page))
++ hpage_pincount_add(page, 1);
++ else
++ refs = GUP_PIN_COUNTING_BIAS;
++
++ /*
++ * Similar to try_grab_compound_head(): even if using the
++ * hpage_pincount_add/_sub() routines, be sure to
++ * *also* increment the normal page refcount field at least
++ * once, so that the page really is pinned.
++ */
++ page_ref_add(page, refs);
++
++ mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1);
++ }
++
++ return true;
+ }
+
+ /**
+@@ -440,7 +465,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
+ pte_t *pte, unsigned int flags)
+ {
+ /* No page to get reference */
+- if (flags & FOLL_GET)
++ if (flags & (FOLL_GET | FOLL_PIN))
+ return -EFAULT;
+
+ if (flags & FOLL_TOUCH) {
+@@ -484,6 +509,18 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
+ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
+ (FOLL_PIN | FOLL_GET)))
+ return ERR_PTR(-EINVAL);
++
++ /*
++ * Considering PTE level hugetlb, like continuous-PTE hugetlb on
++ * ARM64 architecture.
++ */
++ if (is_vm_hugetlb_page(vma)) {
++ page = follow_huge_pmd_pte(vma, address, flags);
++ if (page)
++ return page;
++ return no_page_table(vma, flags);
++ }
++
+ retry:
+ if (unlikely(pmd_bad(*pmd)))
+ return no_page_table(vma, flags);
+@@ -627,7 +664,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
+ if (pmd_none(pmdval))
+ return no_page_table(vma, flags);
+ if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
+- page = follow_huge_pmd(mm, address, pmd, flags);
++ page = follow_huge_pmd_pte(vma, address, flags);
+ if (page)
+ return page;
+ return no_page_table(vma, flags);
+@@ -918,6 +955,8 @@ static int faultin_page(struct vm_area_struct *vma,
+ /* mlock all present pages, but do not fault in new pages */
+ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+ return -ENOENT;
++ if (*flags & FOLL_NOFAULT)
++ return -EFAULT;
+ if (*flags & FOLL_WRITE)
+ fault_flags |= FAULT_FLAG_WRITE;
+ if (*flags & FOLL_REMOTE)
+@@ -1656,6 +1695,122 @@ finish_or_fault:
+ }
+ #endif /* !CONFIG_MMU */
+
++/**
++ * fault_in_writeable - fault in userspace address range for writing
++ * @uaddr: start of address range
++ * @size: size of address range
++ *
++ * Returns the number of bytes not faulted in (like copy_to_user() and
++ * copy_from_user()).
++ */
++size_t fault_in_writeable(char __user *uaddr, size_t size)
++{
++ char __user *start = uaddr, *end;
++
++ if (unlikely(size == 0))
++ return 0;
++ if (!PAGE_ALIGNED(uaddr)) {
++ if (unlikely(__put_user(0, uaddr) != 0))
++ return size;
++ uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
++ }
++ end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
++ if (unlikely(end < start))
++ end = NULL;
++ while (uaddr != end) {
++ if (unlikely(__put_user(0, uaddr) != 0))
++ goto out;
++ uaddr += PAGE_SIZE;
++ }
++
++out:
++ if (size > uaddr - start)
++ return size - (uaddr - start);
++ return 0;
++}
++EXPORT_SYMBOL(fault_in_writeable);
++
++/*
++ * fault_in_safe_writeable - fault in an address range for writing
++ * @uaddr: start of address range
++ * @size: length of address range
++ *
++ * Faults in an address range for writing. This is primarily useful when we
++ * already know that some or all of the pages in the address range aren't in
++ * memory.
++ *
++ * Unlike fault_in_writeable(), this function is non-destructive.
++ *
++ * Note that we don't pin or otherwise hold the pages referenced that we fault
++ * in. There's no guarantee that they'll stay in memory for any duration of
++ * time.
++ *
++ * Returns the number of bytes not faulted in, like copy_to_user() and
++ * copy_from_user().
++ */
++size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
++{
++ unsigned long start = (unsigned long)uaddr, end;
++ struct mm_struct *mm = current->mm;
++ bool unlocked = false;
++
++ if (unlikely(size == 0))
++ return 0;
++ end = PAGE_ALIGN(start + size);
++ if (end < start)
++ end = 0;
++
++ mmap_read_lock(mm);
++ do {
++ if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
++ break;
++ start = (start + PAGE_SIZE) & PAGE_MASK;
++ } while (start != end);
++ mmap_read_unlock(mm);
++
++ if (size > (unsigned long)uaddr - start)
++ return size - ((unsigned long)uaddr - start);
++ return 0;
++}
++EXPORT_SYMBOL(fault_in_safe_writeable);
++
++/**
++ * fault_in_readable - fault in userspace address range for reading
++ * @uaddr: start of user address range
++ * @size: size of user address range
++ *
++ * Returns the number of bytes not faulted in (like copy_to_user() and
++ * copy_from_user()).
++ */
++size_t fault_in_readable(const char __user *uaddr, size_t size)
++{
++ const char __user *start = uaddr, *end;
++ volatile char c;
++
++ if (unlikely(size == 0))
++ return 0;
++ if (!PAGE_ALIGNED(uaddr)) {
++ if (unlikely(__get_user(c, uaddr) != 0))
++ return size;
++ uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
++ }
++ end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
++ if (unlikely(end < start))
++ end = NULL;
++ while (uaddr != end) {
++ if (unlikely(__get_user(c, uaddr) != 0))
++ goto out;
++ uaddr += PAGE_SIZE;
++ }
++
++out:
++ (void)c;
++ if (size > uaddr - start)
++ return size - (uaddr - start);
++ return 0;
++}
++EXPORT_SYMBOL(fault_in_readable);
++
+ /**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+@@ -1722,7 +1877,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
+ */
+ if (!is_pinnable_page(head)) {
+ if (PageHuge(head)) {
+- if (!isolate_huge_page(head, &movable_page_list))
++ if (isolate_hugetlb(head, &movable_page_list))
+ isolation_error_count++;
+ } else {
+ if (!PageLRU(head) && drain_allow) {
+@@ -2123,8 +2278,28 @@ static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
+ }
+
+ #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
+-static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
+- unsigned int flags, struct page **pages, int *nr)
++/*
++ * Fast-gup relies on pte change detection to avoid concurrent pgtable
++ * operations.
++ *
++ * To pin the page, fast-gup needs to do below in order:
++ * (1) pin the page (by prefetching pte), then (2) check pte not changed.
++ *
++ * For the rest of pgtable operations where pgtable updates can be racy
++ * with fast-gup, we need to do (1) clear pte, then (2) check whether page
++ * is pinned.
++ *
++ * Above will work for all pte-level operations, including THP split.
++ *
++ * For THP collapse, it's a bit more complicated because fast-gup may be
++ * walking a pgtable page that is being freed (pte is still valid but pmd
++ * can be cleared already). To avoid race in such condition, we need to
++ * also check pmd here to make sure pmd doesn't change (corresponds to
++ * pmdp_collapse_flush() in the THP collapse code path).
++ */
++static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
++ unsigned long end, unsigned int flags,
++ struct page **pages, int *nr)
+ {
+ struct dev_pagemap *pgmap = NULL;
+ int nr_start = *nr, ret = 0;
+@@ -2169,7 +2344,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
+ goto pte_unmap;
+ }
+
+- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
++ if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
++ unlikely(pte_val(pte) != pte_val(*ptep))) {
+ put_compound_head(head, 1, flags);
+ goto pte_unmap;
+ }
+@@ -2214,8 +2390,9 @@ pte_unmap:
+ * get_user_pages_fast_only implementation that can pin pages. Thus it's still
+ * useful to have gup_huge_pmd even if we can't operate on ptes.
+ */
+-static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
+- unsigned int flags, struct page **pages, int *nr)
++static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
++ unsigned long end, unsigned int flags,
++ struct page **pages, int *nr)
+ {
+ return 0;
+ }
+@@ -2524,7 +2701,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
+ if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
+ PMD_SHIFT, next, flags, pages, nr))
+ return 0;
+- } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
++ } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr))
+ return 0;
+ } while (pmdp++, addr = next, addr != end);
+
+@@ -2544,7 +2721,7 @@ static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned lo
+ next = pud_addr_end(addr, end);
+ if (unlikely(!pud_present(pud)))
+ return 0;
+- if (unlikely(pud_huge(pud))) {
++ if (unlikely(pud_huge(pud) || pud_devmap(pud))) {
+ if (!gup_huge_pud(pud, pudp, addr, next, flags,
+ pages, nr))
+ return 0;
+@@ -2708,7 +2885,7 @@ static int internal_get_user_pages_fast(unsigned long start,
+
+ if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
+ FOLL_FORCE | FOLL_PIN | FOLL_GET |
+- FOLL_FAST_ONLY)))
++ FOLL_FAST_ONLY | FOLL_NOFAULT)))
+ return -EINVAL;
+
+ if (gup_flags & FOLL_PIN)
+diff --git a/mm/highmem.c b/mm/highmem.c
+index 4212ad0e4a195..4f942678e9da2 100644
+--- a/mm/highmem.c
++++ b/mm/highmem.c
+@@ -504,16 +504,22 @@ static inline int kmap_local_calc_idx(int idx)
+
+ static pte_t *__kmap_pte;
+
+-static pte_t *kmap_get_pte(void)
++static pte_t *kmap_get_pte(unsigned long vaddr, int idx)
+ {
++ if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY))
++ /*
++ * Set by the arch if __kmap_pte[-idx] does not produce
++ * the correct entry.
++ */
++ return virt_to_kpte(vaddr);
+ if (!__kmap_pte)
+ __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
+- return __kmap_pte;
++ return &__kmap_pte[-idx];
+ }
+
+ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
+ {
+- pte_t pteval, *kmap_pte = kmap_get_pte();
++ pte_t pteval, *kmap_pte;
+ unsigned long vaddr;
+ int idx;
+
+@@ -525,9 +531,10 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
+ preempt_disable();
+ idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+- BUG_ON(!pte_none(*(kmap_pte - idx)));
++ kmap_pte = kmap_get_pte(vaddr, idx);
++ BUG_ON(!pte_none(*kmap_pte));
+ pteval = pfn_pte(pfn, prot);
+- arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval);
++ arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
+ arch_kmap_local_post_map(vaddr, pteval);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
+ preempt_enable();
+@@ -560,7 +567,7 @@ EXPORT_SYMBOL(__kmap_local_page_prot);
+ void kunmap_local_indexed(void *vaddr)
+ {
+ unsigned long addr = (unsigned long) vaddr & PAGE_MASK;
+- pte_t *kmap_pte = kmap_get_pte();
++ pte_t *kmap_pte;
+ int idx;
+
+ if (addr < __fix_to_virt(FIX_KMAP_END) ||
+@@ -585,8 +592,9 @@ void kunmap_local_indexed(void *vaddr)
+ idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr);
+ WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+
++ kmap_pte = kmap_get_pte(addr, idx);
+ arch_kmap_local_pre_unmap(addr);
+- pte_clear(&init_mm, addr, kmap_pte - idx);
++ pte_clear(&init_mm, addr, kmap_pte);
+ arch_kmap_local_post_unmap(addr);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
+ kmap_local_idx_pop();
+@@ -608,7 +616,7 @@ EXPORT_SYMBOL(kunmap_local_indexed);
+ void __kmap_local_sched_out(void)
+ {
+ struct task_struct *tsk = current;
+- pte_t *kmap_pte = kmap_get_pte();
++ pte_t *kmap_pte;
+ int i;
+
+ /* Clear kmaps */
+@@ -619,7 +627,7 @@ void __kmap_local_sched_out(void)
+
+ /* With debug all even slots are unmapped and act as guard */
+ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
+- WARN_ON_ONCE(!pte_none(pteval));
++ WARN_ON_ONCE(pte_val(pteval) != 0);
+ continue;
+ }
+ if (WARN_ON_ONCE(pte_none(pteval)))
+@@ -635,8 +643,9 @@ void __kmap_local_sched_out(void)
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++ kmap_pte = kmap_get_pte(addr, idx);
+ arch_kmap_local_pre_unmap(addr);
+- pte_clear(&init_mm, addr, kmap_pte - idx);
++ pte_clear(&init_mm, addr, kmap_pte);
+ arch_kmap_local_post_unmap(addr);
+ }
+ }
+@@ -644,7 +653,7 @@ void __kmap_local_sched_out(void)
+ void __kmap_local_sched_in(void)
+ {
+ struct task_struct *tsk = current;
+- pte_t *kmap_pte = kmap_get_pte();
++ pte_t *kmap_pte;
+ int i;
+
+ /* Restore kmaps */
+@@ -655,7 +664,7 @@ void __kmap_local_sched_in(void)
+
+ /* With debug all even slots are unmapped and act as guard */
+ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
+- WARN_ON_ONCE(!pte_none(pteval));
++ WARN_ON_ONCE(pte_val(pteval) != 0);
+ continue;
+ }
+ if (WARN_ON_ONCE(pte_none(pteval)))
+@@ -664,7 +673,8 @@ void __kmap_local_sched_in(void)
+ /* See comment in __kmap_local_sched_out() */
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+- set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
++ kmap_pte = kmap_get_pte(addr, idx);
++ set_pte_at(&init_mm, addr, kmap_pte, pteval);
+ arch_kmap_local_post_map(addr, pteval);
+ }
+ }
+diff --git a/mm/hmm.c b/mm/hmm.c
+index 842e265992380..3af995c814a66 100644
+--- a/mm/hmm.c
++++ b/mm/hmm.c
+@@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
+ unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+-static inline bool hmm_is_device_private_entry(struct hmm_range *range,
+- swp_entry_t entry)
+-{
+- return is_device_private_entry(entry) &&
+- pfn_swap_entry_to_page(entry)->pgmap->owner ==
+- range->dev_private_owner;
+-}
+-
+ static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
+ pte_t pte)
+ {
+@@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
+ swp_entry_t entry = pte_to_swp_entry(pte);
+
+ /*
+- * Never fault in device private pages, but just report
+- * the PFN even if not present.
++ * Don't fault in device private pages owned by the caller,
++ * just report the PFN.
+ */
+- if (hmm_is_device_private_entry(range, entry)) {
++ if (is_device_private_entry(entry) &&
++ pfn_swap_entry_to_page(entry)->pgmap->owner ==
++ range->dev_private_owner) {
+ cpu_flags = HMM_PFN_VALID;
+ if (is_writable_device_private_entry(entry))
+ cpu_flags |= HMM_PFN_WRITE;
+@@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
+ if (!non_swap_entry(entry))
+ goto fault;
+
++ if (is_device_private_entry(entry))
++ goto fault;
++
+ if (is_device_exclusive_entry(entry))
+ goto fault;
+
+@@ -300,7 +297,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
+ * Since each architecture defines a struct page for the zero page, just
+ * fall through and treat it like a normal page.
+ */
+- if (pte_special(pte) && !pte_devmap(pte) &&
++ if (!vm_normal_page(walk->vma, addr, pte) &&
++ !pte_devmap(pte) &&
+ !is_zero_pfn(pte_pfn(pte))) {
+ if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
+ pte_unmap(ptep);
+@@ -518,7 +516,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
+ struct hmm_range *range = hmm_vma_walk->range;
+ struct vm_area_struct *vma = walk->vma;
+
+- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
++ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
+ vma->vm_flags & VM_READ)
+ return 0;
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index c5142d237e482..98ff57c8eda69 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1926,7 +1926,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ {
+ struct mm_struct *mm = vma->vm_mm;
+ pgtable_t pgtable;
+- pmd_t _pmd;
++ pmd_t _pmd, old_pmd;
+ int i;
+
+ /*
+@@ -1937,7 +1937,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ *
+ * See Documentation/vm/mmu_notifier.rst
+ */
+- pmdp_huge_clear_flush(vma, haddr, pmd);
++ old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ pmd_populate(mm, &_pmd, pgtable);
+@@ -1946,6 +1946,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ pte_t *pte, entry;
+ entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
+ entry = pte_mkspecial(entry);
++ if (pmd_uffd_wp(old_pmd))
++ entry = pte_mkuffd_wp(entry);
+ pte = pte_offset_map(&_pmd, haddr);
+ VM_BUG_ON(!pte_none(*pte));
+ set_pte_at(mm, haddr, pte, entry);
+@@ -2617,11 +2619,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ struct address_space *mapping = NULL;
+ int extra_pins, ret;
+ pgoff_t end;
++ bool is_hzp;
+
+- VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
+ VM_BUG_ON_PAGE(!PageLocked(head), head);
+ VM_BUG_ON_PAGE(!PageCompound(head), head);
+
++ is_hzp = is_huge_zero_page(head);
++ VM_WARN_ON_ONCE_PAGE(is_hzp, head);
++ if (is_hzp)
++ return -EBUSY;
++
+ if (PageWriteback(head))
+ return -EBUSY;
+
+@@ -2773,6 +2780,9 @@ void deferred_split_huge_page(struct page *page)
+ if (PageSwapCache(page))
+ return;
+
++ if (!list_empty(page_deferred_list(page)))
++ return;
++
+ spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ if (list_empty(page_deferred_list(page))) {
+ count_vm_event(THP_DEFERRED_SPLIT_PAGE);
+@@ -2874,14 +2884,15 @@ static void split_huge_pages_all(void)
+ unsigned long total = 0, split = 0;
+
+ pr_debug("Split all THPs\n");
+- for_each_populated_zone(zone) {
++ for_each_zone(zone) {
++ if (!managed_zone(zone))
++ continue;
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
+- if (!pfn_valid(pfn))
+- continue;
++ int nr_pages;
+
+- page = pfn_to_page(pfn);
+- if (!get_page_unless_zero(page))
++ page = pfn_to_online_page(pfn);
++ if (!page || !get_page_unless_zero(page))
+ continue;
+
+ if (zone != page_zone(page))
+@@ -2892,8 +2903,10 @@ static void split_huge_pages_all(void)
+
+ total++;
+ lock_page(page);
++ nr_pages = thp_nr_pages(page);
+ if (!split_huge_page(page))
+ split++;
++ pfn += nr_pages - 1;
+ unlock_page(page);
+ next:
+ put_page(page);
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 95dc7b83381f9..2f5c1b2456ef2 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -82,6 +82,8 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
+
+ /* Forward declaration */
+ static int hugetlb_acct_memory(struct hstate *h, long delta);
++static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
++ unsigned long start, unsigned long end);
+
+ static inline bool subpool_is_free(struct hugepage_subpool *spool)
+ {
+@@ -2654,8 +2656,7 @@ retry:
+ * Fail with -EBUSY if not possible.
+ */
+ spin_unlock_irq(&hugetlb_lock);
+- if (!isolate_huge_page(old_page, list))
+- ret = -EBUSY;
++ ret = isolate_hugetlb(old_page, list);
+ spin_lock_irq(&hugetlb_lock);
+ goto free_new;
+ } else if (!HPageFreed(old_page)) {
+@@ -2731,7 +2732,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
+ if (hstate_is_gigantic(h))
+ return -ENOMEM;
+
+- if (page_count(head) && isolate_huge_page(head, list))
++ if (page_count(head) && !isolate_hugetlb(head, list))
+ ret = 0;
+ else if (!page_count(head))
+ ret = alloc_and_dissolve_huge_page(h, head, list);
+@@ -2813,11 +2814,11 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
+ page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
+ if (!page)
+ goto out_uncharge_cgroup;
++ spin_lock_irq(&hugetlb_lock);
+ if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
+ SetHPageRestoreReserve(page);
+ h->resv_huge_pages--;
+ }
+- spin_lock_irq(&hugetlb_lock);
+ list_add(&page->lru, &h->hugepage_activelist);
+ /* Fall through */
+ }
+@@ -4164,6 +4165,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
+ {
+ if (addr & ~(huge_page_mask(hstate_vma(vma))))
+ return -EINVAL;
++
++ /*
++ * PMD sharing is only possible for PUD_SIZE-aligned address ranges
++ * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
++ * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
++ */
++ if (addr & ~PUD_MASK) {
++ /*
++ * hugetlb_vm_op_split is called right before we attempt to
++ * split the VMA. We will need to unshare PMDs in the old and
++ * new VMAs, so let's unshare before we split.
++ */
++ unsigned long floor = addr & PUD_MASK;
++ unsigned long ceil = floor + PUD_SIZE;
++
++ if (floor >= vma->vm_start && ceil <= vma->vm_end)
++ hugetlb_unshare_pmds(vma, floor, ceil);
++ }
++
+ return 0;
+ }
+
+@@ -4439,6 +4459,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ struct hstate *h = hstate_vma(vma);
+ unsigned long sz = huge_page_size(h);
+ struct mmu_notifier_range range;
++ bool force_flush = false;
+
+ WARN_ON(!is_vm_hugetlb_page(vma));
+ BUG_ON(start & ~huge_page_mask(h));
+@@ -4467,10 +4488,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ ptl = huge_pte_lock(h, mm, ptep);
+ if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+ spin_unlock(ptl);
+- /*
+- * We just unmapped a page of PMDs by clearing a PUD.
+- * The caller's TLB flush range should cover this area.
+- */
++ tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
++ force_flush = true;
+ continue;
+ }
+
+@@ -4527,6 +4546,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ }
+ mmu_notifier_invalidate_range_end(&range);
+ tlb_end_vma(tlb, vma);
++
++ /*
++ * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
++ * could defer the flush until now, since by holding i_mmap_rwsem we
++ * guaranteed that the last refernece would not be dropped. But we must
++ * do the flushing before we return, as otherwise i_mmap_rwsem will be
++ * dropped and the last reference to the shared PMDs page might be
++ * dropped as well.
++ *
++ * In theory we could defer the freeing of the PMD pages as well, but
++ * huge_pmd_unshare() relies on the exact page_count for the PMD page to
++ * detect sharing, so we cannot defer the release of the page either.
++ * Instead, do flush now.
++ */
++ if (force_flush)
++ tlb_flush_mmu_tlbonly(tlb);
+ }
+
+ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+@@ -4829,7 +4864,6 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
+ unsigned long haddr,
+ unsigned long reason)
+ {
+- vm_fault_t ret;
+ u32 hash;
+ struct vm_fault vmf = {
+ .vma = vma,
+@@ -4846,18 +4880,14 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
+ };
+
+ /*
+- * hugetlb_fault_mutex and i_mmap_rwsem must be
+- * dropped before handling userfault. Reacquire
+- * after handling fault to make calling code simpler.
++ * vma_lock and hugetlb_fault_mutex must be dropped before handling
++ * userfault. Also mmap_lock will be dropped during handling
++ * userfault, any vma operation should be careful from here.
+ */
+ hash = hugetlb_fault_mutex_hash(mapping, idx);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ i_mmap_unlock_read(mapping);
+- ret = handle_userfault(&vmf, reason);
+- i_mmap_lock_read(mapping);
+- mutex_lock(&hugetlb_fault_mutex_table[hash]);
+-
+- return ret;
++ return handle_userfault(&vmf, reason);
+ }
+
+ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
+@@ -4874,6 +4904,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
+ spinlock_t *ptl;
+ unsigned long haddr = address & huge_page_mask(h);
+ bool new_page, new_pagecache_page = false;
++ u32 hash = hugetlb_fault_mutex_hash(mapping, idx);
+
+ /*
+ * Currently, we are forced to kill the process in the event the
+@@ -4883,7 +4914,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
+ if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
+ pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n",
+ current->pid);
+- return ret;
++ goto out;
+ }
+
+ /*
+@@ -4900,12 +4931,10 @@ retry:
+ page = find_lock_page(mapping, idx);
+ if (!page) {
+ /* Check for page in userfault range */
+- if (userfaultfd_missing(vma)) {
+- ret = hugetlb_handle_userfault(vma, mapping, idx,
++ if (userfaultfd_missing(vma))
++ return hugetlb_handle_userfault(vma, mapping, idx,
+ flags, haddr,
+ VM_UFFD_MISSING);
+- goto out;
+- }
+
+ page = alloc_huge_page(vma, haddr, 0);
+ if (IS_ERR(page)) {
+@@ -4965,10 +4994,9 @@ retry:
+ if (userfaultfd_minor(vma)) {
+ unlock_page(page);
+ put_page(page);
+- ret = hugetlb_handle_userfault(vma, mapping, idx,
++ return hugetlb_handle_userfault(vma, mapping, idx,
+ flags, haddr,
+ VM_UFFD_MINOR);
+- goto out;
+ }
+ }
+
+@@ -5019,6 +5047,8 @@ retry:
+
+ unlock_page(page);
+ out:
++ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
++ i_mmap_unlock_read(mapping);
+ return ret;
+
+ backout:
+@@ -5116,10 +5146,12 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
+ entry = huge_ptep_get(ptep);
+- if (huge_pte_none(entry)) {
+- ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags);
+- goto out_mutex;
+- }
++ if (huge_pte_none(entry))
++ /*
++ * hugetlb_no_page will drop vma lock and hugetlb fault
++ * mutex internally, which make us return immediately.
++ */
++ return hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags);
+
+ ret = 0;
+
+@@ -5236,13 +5268,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
+ int ret = -ENOMEM;
+ struct page *page;
+ int writable;
+- bool new_pagecache_page = false;
++ bool page_in_pagecache = false;
+
+ if (is_continue) {
+ ret = -EFAULT;
+ page = find_lock_page(mapping, idx);
+ if (!page)
+ goto out;
++ page_in_pagecache = true;
+ } else if (!*pagep) {
+ /* If a page already exists, then it's UFFDIO_COPY for
+ * a non-missing case. Return -EEXIST.
+@@ -5298,6 +5331,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
+
+ page = alloc_huge_page(dst_vma, dst_addr, 0);
+ if (IS_ERR(page)) {
++ put_page(*pagep);
+ ret = -ENOMEM;
+ *pagep = NULL;
+ goto out;
+@@ -5330,12 +5364,16 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
+ ret = huge_add_to_page_cache(page, mapping, idx);
+ if (ret)
+ goto out_release_nounlock;
+- new_pagecache_page = true;
++ page_in_pagecache = true;
+ }
+
+ ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
+ spin_lock(ptl);
+
++ ret = -EIO;
++ if (PageHWPoison(page))
++ goto out_release_unlock;
++
+ /*
+ * Recheck the i_size after holding PT lock to make sure not
+ * to leave any page mapped (as page_mapped()) beyond the end
+@@ -5354,7 +5392,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
+ if (!huge_pte_none(huge_ptep_get(dst_pte)))
+ goto out_release_unlock;
+
+- if (vm_shared) {
++ if (page_in_pagecache) {
+ page_dup_rmap(page, true);
+ } else {
+ ClearHPageRestoreReserve(page);
+@@ -5394,7 +5432,7 @@ out_release_unlock:
+ if (vm_shared || is_continue)
+ unlock_page(page);
+ out_release_nounlock:
+- if (!new_pagecache_page)
++ if (!page_in_pagecache)
+ restore_reserve_on_error(h, dst_vma, dst_addr, page);
+ put_page(page);
+ goto out;
+@@ -6044,7 +6082,14 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+ pud_clear(pud);
+ put_page(virt_to_page(ptep));
+ mm_dec_nr_pmds(mm);
+- *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
++ /*
++ * This update of passed address optimizes loops sequentially
++ * processing addresses in increments of huge page size (PMD_SIZE
++ * in this case). By clearing the pud, a PUD_SIZE area is unmapped.
++ * Update address to the 'last page' in the cleared area so that
++ * calling loop can move to first page past this area.
++ */
++ *addr |= PUD_SIZE - PMD_SIZE;
+ return 1;
+ }
+
+@@ -6161,12 +6206,13 @@ follow_huge_pd(struct vm_area_struct *vma,
+ }
+
+ struct page * __weak
+-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+- pmd_t *pmd, int flags)
++follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
+ {
++ struct hstate *h = hstate_vma(vma);
++ struct mm_struct *mm = vma->vm_mm;
+ struct page *page = NULL;
+ spinlock_t *ptl;
+- pte_t pte;
++ pte_t *ptep, pte;
+
+ /* FOLL_GET and FOLL_PIN are mutually exclusive. */
+ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
+@@ -6174,17 +6220,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+ return NULL;
+
+ retry:
+- ptl = pmd_lockptr(mm, pmd);
+- spin_lock(ptl);
+- /*
+- * make sure that the address range covered by this pmd is not
+- * unmapped from other threads.
+- */
+- if (!pmd_huge(*pmd))
+- goto out;
+- pte = huge_ptep_get((pte_t *)pmd);
++ ptep = huge_pte_offset(mm, address, huge_page_size(h));
++ if (!ptep)
++ return NULL;
++
++ ptl = huge_pte_lock(h, mm, ptep);
++ pte = huge_ptep_get(ptep);
+ if (pte_present(pte)) {
+- page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
++ page = pte_page(pte) +
++ ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
+ /*
+ * try_grab_page() should always succeed here, because: a) we
+ * hold the pmd (ptl) lock, and b) we've just checked that the
+@@ -6200,7 +6244,7 @@ retry:
+ } else {
+ if (is_hugetlb_entry_migration(pte)) {
+ spin_unlock(ptl);
+- __migration_entry_wait(mm, (pte_t *)pmd, ptl);
++ __migration_entry_wait(mm, ptep, ptl);
+ goto retry;
+ }
+ /*
+@@ -6232,15 +6276,15 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
+ return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
+ }
+
+-bool isolate_huge_page(struct page *page, struct list_head *list)
++int isolate_hugetlb(struct page *page, struct list_head *list)
+ {
+- bool ret = true;
++ int ret = 0;
+
+ spin_lock_irq(&hugetlb_lock);
+ if (!PageHeadHuge(page) ||
+ !HPageMigratable(page) ||
+ !get_page_unless_zero(page)) {
+- ret = false;
++ ret = -EBUSY;
+ goto unlock;
+ }
+ ClearHPageMigratable(page);
+@@ -6267,6 +6311,16 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+ return ret;
+ }
+
++int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
++{
++ int ret;
++
++ spin_lock_irq(&hugetlb_lock);
++ ret = __get_huge_page_for_hwpoison(pfn, flags);
++ spin_unlock_irq(&hugetlb_lock);
++ return ret;
++}
++
+ void putback_active_hugepage(struct page *page)
+ {
+ spin_lock_irq(&hugetlb_lock);
+@@ -6315,26 +6369,21 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
+ }
+ }
+
+-/*
+- * This function will unconditionally remove all the shared pmd pgtable entries
+- * within the specific vma for a hugetlbfs memory range.
+- */
+-void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
++static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
++ unsigned long start,
++ unsigned long end)
+ {
+ struct hstate *h = hstate_vma(vma);
+ unsigned long sz = huge_page_size(h);
+ struct mm_struct *mm = vma->vm_mm;
+ struct mmu_notifier_range range;
+- unsigned long address, start, end;
++ unsigned long address;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+- start = ALIGN(vma->vm_start, PUD_SIZE);
+- end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
+-
+ if (start >= end)
+ return;
+
+@@ -6366,6 +6415,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
+ mmu_notifier_invalidate_range_end(&range);
+ }
+
++/*
++ * This function will unconditionally remove all the shared pmd pgtable entries
++ * within the specific vma for a hugetlbfs memory range.
++ */
++void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
++{
++ hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
++ ALIGN_DOWN(vma->vm_end, PUD_SIZE));
++}
++
+ #ifdef CONFIG_CMA
+ static bool cma_reserve_called __initdata;
+
+diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
+index aff4d27ec2352..a1d6fc3c78b9c 100644
+--- a/mm/hwpoison-inject.c
++++ b/mm/hwpoison-inject.c
+@@ -48,7 +48,8 @@ static int hwpoison_inject(void *data, u64 val)
+
+ inject:
+ pr_info("Injecting memory failure at pfn %#lx\n", pfn);
+- return memory_failure(pfn, 0);
++ err = memory_failure(pfn, 0);
++ return (err == -EOPNOTSUPP) ? 0 : err;
+ }
+
+ static int hwpoison_unpoison(void *data, u64 val)
+diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
+index d8ccff4c1275e..1bd6a3f13467b 100644
+--- a/mm/kasan/quarantine.c
++++ b/mm/kasan/quarantine.c
+@@ -132,11 +132,22 @@ static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
+ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
+ {
+ void *object = qlink_to_object(qlink, cache);
++ struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
+ unsigned long flags;
+
+ if (IS_ENABLED(CONFIG_SLAB))
+ local_irq_save(flags);
+
++ /*
++ * If init_on_free is enabled and KASAN's free metadata is stored in
++ * the object, zero the metadata. Otherwise, the object's memory will
++ * not be properly zeroed, as KASAN saves the metadata after the slab
++ * allocator zeroes the object.
++ */
++ if (slab_want_init_on_free(cache) &&
++ cache->kasan_info.free_meta_offset == 0)
++ memzero_explicit(meta, sizeof(*meta));
++
+ /*
+ * As the object now gets freed from the quarantine, assume that its
+ * free track is no longer valid.
+@@ -304,6 +315,13 @@ static void per_cpu_remove_cache(void *arg)
+ struct qlist_head *q;
+
+ q = this_cpu_ptr(&cpu_quarantine);
++ /*
++ * Ensure the ordering between the writing to q->offline and
++ * per_cpu_remove_cache. Prevent cpu_quarantine from being corrupted
++ * by interrupt.
++ */
++ if (READ_ONCE(q->offline))
++ return;
+ qlist_move_cache(q, &to_free, cache);
+ qlist_free_all(&to_free, cache);
+ }
+diff --git a/mm/kasan/report.c b/mm/kasan/report.c
+index 884a950c70265..887af873733bc 100644
+--- a/mm/kasan/report.c
++++ b/mm/kasan/report.c
+@@ -117,16 +117,8 @@ static void end_report(unsigned long *flags, unsigned long addr)
+ pr_err("==================================================================\n");
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+ spin_unlock_irqrestore(&report_lock, *flags);
+- if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) {
+- /*
+- * This thread may hit another WARN() in the panic path.
+- * Resetting this prevents additional WARN() from panicking the
+- * system on this thread. Other threads are blocked by the
+- * panic_mutex in panic().
+- */
+- panic_on_warn = 0;
+- panic("panic_on_warn set ...\n");
+- }
++ if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
++ check_panic_on_warn("KASAN");
+ if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC)
+ panic("kasan.fault=panic set ...\n");
+ kasan_enable_current();
+diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
+index 8d95ee52d0194..dd79840e60964 100644
+--- a/mm/kasan/shadow.c
++++ b/mm/kasan/shadow.c
+@@ -493,7 +493,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
+
+ #else /* CONFIG_KASAN_VMALLOC */
+
+-int kasan_module_alloc(void *addr, size_t size)
++int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask)
+ {
+ void *ret;
+ size_t scaled_size;
+@@ -515,9 +515,14 @@ int kasan_module_alloc(void *addr, size_t size)
+ __builtin_return_address(0));
+
+ if (ret) {
++ struct vm_struct *vm = find_vm_area(addr);
+ __memset(ret, KASAN_SHADOW_INIT, shadow_size);
+- find_vm_area(addr)->flags |= VM_KASAN;
++ vm->flags |= VM_KASAN;
+ kmemleak_ignore(ret);
++
++ if (vm->flags & VM_DEFER_KMEMLEAK)
++ kmemleak_vmalloc(vm, size, gfp_mask);
++
+ return 0;
+ }
+
+diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile
+index 6872cd5e53907..cb2bcf7730833 100644
+--- a/mm/kfence/Makefile
++++ b/mm/kfence/Makefile
+@@ -2,5 +2,5 @@
+
+ obj-$(CONFIG_KFENCE) := core.o report.o
+
+-CFLAGS_kfence_test.o := -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
++CFLAGS_kfence_test.o := -fno-omit-frame-pointer -fno-optimize-sibling-calls
+ obj-$(CONFIG_KFENCE_KUNIT_TEST) += kfence_test.o
+diff --git a/mm/kfence/core.c b/mm/kfence/core.c
+index 7a97db8bc8e75..3eab72fb3d8c9 100644
+--- a/mm/kfence/core.c
++++ b/mm/kfence/core.c
+@@ -10,12 +10,15 @@
+ #include <linux/atomic.h>
+ #include <linux/bug.h>
+ #include <linux/debugfs.h>
++#include <linux/hash.h>
+ #include <linux/irq_work.h>
++#include <linux/jhash.h>
+ #include <linux/kcsan-checks.h>
+ #include <linux/kfence.h>
+ #include <linux/kmemleak.h>
+ #include <linux/list.h>
+ #include <linux/lockdep.h>
++#include <linux/log2.h>
+ #include <linux/memblock.h>
+ #include <linux/moduleparam.h>
+ #include <linux/random.h>
+@@ -82,6 +85,10 @@ static const struct kernel_param_ops sample_interval_param_ops = {
+ };
+ module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
+
++/* Pool usage% threshold when currently covered allocations are skipped. */
++static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
++module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
++
+ /* The pool of pages used for guard pages and objects. */
+ char *__kfence_pool __ro_after_init;
+ EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
+@@ -97,14 +104,41 @@ struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+ static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
+ static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
+
+-#ifdef CONFIG_KFENCE_STATIC_KEYS
+-/* The static key to set up a KFENCE allocation. */
++/*
++ * The static key to set up a KFENCE allocation; or if static keys are not used
++ * to gate allocations, to avoid a load and compare if KFENCE is disabled.
++ */
+ DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
+-#endif
+
+ /* Gates the allocation, ensuring only one succeeds in a given period. */
+ atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
+
++/*
++ * A Counting Bloom filter of allocation coverage: limits currently covered
++ * allocations of the same source filling up the pool.
++ *
++ * Assuming a range of 15%-85% unique allocations in the pool at any point in
++ * time, the below parameters provide a probablity of 0.02-0.33 for false
++ * positive hits respectively:
++ *
++ * P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM
++ */
++#define ALLOC_COVERED_HNUM 2
++#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)
++#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER)
++#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER)
++#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1)
++static atomic_t alloc_covered[ALLOC_COVERED_SIZE];
++
++/* Stack depth used to determine uniqueness of an allocation. */
++#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8)
++
++/*
++ * Randomness for stack hashes, making the same collisions across reboots and
++ * different machines less likely.
++ */
++static u32 stack_hash_seed __ro_after_init;
++
+ /* Statistics counters for debugfs. */
+ enum kfence_counter_id {
+ KFENCE_COUNTER_ALLOCATED,
+@@ -112,6 +146,9 @@ enum kfence_counter_id {
+ KFENCE_COUNTER_FREES,
+ KFENCE_COUNTER_ZOMBIES,
+ KFENCE_COUNTER_BUGS,
++ KFENCE_COUNTER_SKIP_INCOMPAT,
++ KFENCE_COUNTER_SKIP_CAPACITY,
++ KFENCE_COUNTER_SKIP_COVERED,
+ KFENCE_COUNTER_COUNT,
+ };
+ static atomic_long_t counters[KFENCE_COUNTER_COUNT];
+@@ -121,40 +158,67 @@ static const char *const counter_names[] = {
+ [KFENCE_COUNTER_FREES] = "total frees",
+ [KFENCE_COUNTER_ZOMBIES] = "zombie allocations",
+ [KFENCE_COUNTER_BUGS] = "total bugs",
++ [KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)",
++ [KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)",
++ [KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)",
+ };
+ static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
+
+ /* === Internals ============================================================ */
+
+-static bool kfence_protect(unsigned long addr)
++static inline bool should_skip_covered(void)
+ {
+- return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
++ unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100;
++
++ return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh;
+ }
+
+-static bool kfence_unprotect(unsigned long addr)
++static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries)
+ {
+- return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
++ num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH);
++ num_entries = filter_irq_stacks(stack_entries, num_entries);
++ return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed);
+ }
+
+-static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
++/*
++ * Adds (or subtracts) count @val for allocation stack trace hash
++ * @alloc_stack_hash from Counting Bloom filter.
++ */
++static void alloc_covered_add(u32 alloc_stack_hash, int val)
+ {
+- long index;
++ int i;
+
+- /* The checks do not affect performance; only called from slow-paths. */
++ for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
++ atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]);
++ alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
++ }
++}
+
+- if (!is_kfence_address((void *)addr))
+- return NULL;
++/*
++ * Returns true if the allocation stack trace hash @alloc_stack_hash is
++ * currently contained (non-zero count) in Counting Bloom filter.
++ */
++static bool alloc_covered_contains(u32 alloc_stack_hash)
++{
++ int i;
+
+- /*
+- * May be an invalid index if called with an address at the edge of
+- * __kfence_pool, in which case we would report an "invalid access"
+- * error.
+- */
+- index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
+- if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS)
+- return NULL;
++ for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
++ if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]))
++ return false;
++ alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
++ }
++
++ return true;
++}
++
++static bool kfence_protect(unsigned long addr)
++{
++ return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
++}
+
+- return &kfence_metadata[index];
++static bool kfence_unprotect(unsigned long addr)
++{
++ return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
+ }
+
+ static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
+@@ -183,19 +247,26 @@ static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *m
+ * Update the object's metadata state, including updating the alloc/free stacks
+ * depending on the state transition.
+ */
+-static noinline void metadata_update_state(struct kfence_metadata *meta,
+- enum kfence_object_state next)
++static noinline void
++metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next,
++ unsigned long *stack_entries, size_t num_stack_entries)
+ {
+ struct kfence_track *track =
+ next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
+
+ lockdep_assert_held(&meta->lock);
+
+- /*
+- * Skip over 1 (this) functions; noinline ensures we do not accidentally
+- * skip over the caller by never inlining.
+- */
+- track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
++ if (stack_entries) {
++ memcpy(track->stack_entries, stack_entries,
++ num_stack_entries * sizeof(stack_entries[0]));
++ } else {
++ /*
++ * Skip over 1 (this) functions; noinline ensures we do not
++ * accidentally skip over the caller by never inlining.
++ */
++ num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
++ }
++ track->num_stack_entries = num_stack_entries;
+ track->pid = task_pid_nr(current);
+ track->cpu = raw_smp_processor_id();
+ track->ts_nsec = local_clock(); /* Same source as printk timestamps. */
+@@ -257,7 +328,9 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta,
+ }
+ }
+
+-static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp)
++static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp,
++ unsigned long *stack_entries, size_t num_stack_entries,
++ u32 alloc_stack_hash)
+ {
+ struct kfence_metadata *meta = NULL;
+ unsigned long flags;
+@@ -271,8 +344,10 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
+ list_del_init(&meta->list);
+ }
+ raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+- if (!meta)
++ if (!meta) {
++ atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]);
+ return NULL;
++ }
+
+ if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
+ /*
+@@ -314,10 +389,12 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
+ addr = (void *)meta->addr;
+
+ /* Update remaining metadata. */
+- metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED);
++ metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries);
+ /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */
+ WRITE_ONCE(meta->cache, cache);
+ meta->size = size;
++ meta->alloc_stack_hash = alloc_stack_hash;
++
+ for_each_canary(meta, set_canary_byte);
+
+ /* Set required struct page fields. */
+@@ -330,6 +407,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
+
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+
++ alloc_covered_add(alloc_stack_hash, 1);
++
+ /* Memory initialization. */
+
+ /*
+@@ -394,10 +473,12 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z
+ memzero_explicit(addr, meta->size);
+
+ /* Mark the object as freed. */
+- metadata_update_state(meta, KFENCE_OBJECT_FREED);
++ metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0);
+
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+
++ alloc_covered_add(meta->alloc_stack_hash, -1);
++
+ /* Protect to detect use-after-frees. */
+ kfence_protect((unsigned long)addr);
+
+@@ -429,6 +510,7 @@ static bool __init kfence_init_pool(void)
+ unsigned long addr = (unsigned long)__kfence_pool;
+ struct page *pages;
+ int i;
++ char *p;
+
+ if (!__kfence_pool)
+ return false;
+@@ -447,6 +529,8 @@ static bool __init kfence_init_pool(void)
+ * enters __slab_free() slow-path.
+ */
+ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
++ struct page *page = &pages[i];
++
+ if (!i || (i % 2))
+ continue;
+
+@@ -454,7 +538,11 @@ static bool __init kfence_init_pool(void)
+ if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
+ goto err;
+
+- __SetPageSlab(&pages[i]);
++ __SetPageSlab(page);
++#ifdef CONFIG_MEMCG
++ page->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg |
++ MEMCG_DATA_OBJCGS;
++#endif
+ }
+
+ /*
+@@ -505,6 +593,16 @@ err:
+ * fails for the first page, and therefore expect addr==__kfence_pool in
+ * most failure cases.
+ */
++ for (p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) {
++ struct page *page = virt_to_page(p);
++
++ if (!PageSlab(page))
++ continue;
++#ifdef CONFIG_MEMCG
++ page->memcg_data = 0;
++#endif
++ __ClearPageSlab(page);
++ }
+ memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
+ __kfence_pool = NULL;
+ return false;
+@@ -577,12 +675,17 @@ static const struct file_operations objects_fops = {
+ .open = open_objects,
+ .read = seq_read,
+ .llseek = seq_lseek,
++ .release = seq_release,
+ };
+
+-static int __init kfence_debugfs_init(void)
++static int kfence_debugfs_init(void)
+ {
+- struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL);
++ struct dentry *kfence_dir;
++
++ if (!READ_ONCE(kfence_enabled))
++ return 0;
+
++ kfence_dir = debugfs_create_dir("kfence", NULL);
+ debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
+ debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
+ return 0;
+@@ -663,11 +766,14 @@ void __init kfence_init(void)
+ if (!kfence_sample_interval)
+ return;
+
++ stack_hash_seed = (u32)random_get_entropy();
+ if (!kfence_init_pool()) {
+ pr_err("%s failed\n", __func__);
+ return;
+ }
+
++ if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
++ static_branch_enable(&kfence_allocation_key);
+ WRITE_ONCE(kfence_enabled, true);
+ queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
+ pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
+@@ -736,12 +842,18 @@ void kfence_shutdown_cache(struct kmem_cache *s)
+
+ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+ {
++ unsigned long stack_entries[KFENCE_STACK_DEPTH];
++ size_t num_stack_entries;
++ u32 alloc_stack_hash;
++
+ /*
+ * Perform size check before switching kfence_allocation_gate, so that
+ * we don't disable KFENCE without making an allocation.
+ */
+- if (size > PAGE_SIZE)
++ if (size > PAGE_SIZE) {
++ atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
+ return NULL;
++ }
+
+ /*
+ * Skip allocations from non-default zones, including DMA. We cannot
+@@ -749,15 +861,12 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+ * properties (e.g. reside in DMAable memory).
+ */
+ if ((flags & GFP_ZONEMASK) ||
+- (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32)))
++ (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) {
++ atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
+ return NULL;
++ }
+
+- /*
+- * allocation_gate only needs to become non-zero, so it doesn't make
+- * sense to continue writing to it and pay the associated contention
+- * cost, in case we have a large number of concurrent allocations.
+- */
+- if (atomic_read(&kfence_allocation_gate) || atomic_inc_return(&kfence_allocation_gate) > 1)
++ if (atomic_inc_return(&kfence_allocation_gate) > 1)
+ return NULL;
+ #ifdef CONFIG_KFENCE_STATIC_KEYS
+ /*
+@@ -776,7 +885,25 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+ if (!READ_ONCE(kfence_enabled))
+ return NULL;
+
+- return kfence_guarded_alloc(s, size, flags);
++ num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0);
++
++ /*
++ * Do expensive check for coverage of allocation in slow-path after
++ * allocation_gate has already become non-zero, even though it might
++ * mean not making any allocation within a given sample interval.
++ *
++ * This ensures reasonable allocation coverage when the pool is almost
++ * full, including avoiding long-lived allocations of the same source
++ * filling up the pool (e.g. pagecache allocations).
++ */
++ alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries);
++ if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) {
++ atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]);
++ return NULL;
++ }
++
++ return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries,
++ alloc_stack_hash);
+ }
+
+ size_t kfence_ksize(const void *addr)
+@@ -805,6 +932,9 @@ void __kfence_free(void *addr)
+ {
+ struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
++#ifdef CONFIG_MEMCG
++ KFENCE_WARN_ON(meta->objcg);
++#endif
+ /*
+ * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
+ * the object, as the object page may be recycled for other-typed
+diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
+index c1f23c61e5f91..600f2e2431d6d 100644
+--- a/mm/kfence/kfence.h
++++ b/mm/kfence/kfence.h
+@@ -87,10 +87,36 @@ struct kfence_metadata {
+ /* Allocation and free stack information. */
+ struct kfence_track alloc_track;
+ struct kfence_track free_track;
++ /* For updating alloc_covered on frees. */
++ u32 alloc_stack_hash;
++#ifdef CONFIG_MEMCG
++ struct obj_cgroup *objcg;
++#endif
+ };
+
+ extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+
++static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
++{
++ long index;
++
++ /* The checks do not affect performance; only called from slow-paths. */
++
++ if (!is_kfence_address((void *)addr))
++ return NULL;
++
++ /*
++ * May be an invalid index if called with an address at the edge of
++ * __kfence_pool, in which case we would report an "invalid access"
++ * error.
++ */
++ index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
++ if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS)
++ return NULL;
++
++ return &kfence_metadata[index];
++}
++
+ /* KFENCE error types for report generation. */
+ enum kfence_error_type {
+ KFENCE_ERROR_OOB, /* Detected a out-of-bounds access. */
+diff --git a/mm/kfence/report.c b/mm/kfence/report.c
+index f93a7b2a338be..cbd9456359b96 100644
+--- a/mm/kfence/report.c
++++ b/mm/kfence/report.c
+@@ -267,9 +267,55 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
+
+ lockdep_on();
+
+- if (panic_on_warn)
+- panic("panic_on_warn set ...\n");
++ check_panic_on_warn("KFENCE");
+
+ /* We encountered a memory safety error, taint the kernel! */
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK);
+ }
++
++#ifdef CONFIG_PRINTK
++static void kfence_to_kp_stack(const struct kfence_track *track, void **kp_stack)
++{
++ int i, j;
++
++ i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL);
++ for (j = 0; i < track->num_stack_entries && j < KS_ADDRS_COUNT; ++i, ++j)
++ kp_stack[j] = (void *)track->stack_entries[i];
++ if (j < KS_ADDRS_COUNT)
++ kp_stack[j] = NULL;
++}
++
++bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
++{
++ struct kfence_metadata *meta = addr_to_metadata((unsigned long)object);
++ unsigned long flags;
++
++ if (!meta)
++ return false;
++
++ /*
++ * If state is UNUSED at least show the pointer requested; the rest
++ * would be garbage data.
++ */
++ kpp->kp_ptr = object;
++
++ /* Requesting info an a never-used object is almost certainly a bug. */
++ if (WARN_ON(meta->state == KFENCE_OBJECT_UNUSED))
++ return true;
++
++ raw_spin_lock_irqsave(&meta->lock, flags);
++
++ kpp->kp_page = page;
++ kpp->kp_slab_cache = meta->cache;
++ kpp->kp_objp = (void *)meta->addr;
++ kfence_to_kp_stack(&meta->alloc_track, kpp->kp_stack);
++ if (meta->state == KFENCE_OBJECT_FREED)
++ kfence_to_kp_stack(&meta->free_track, kpp->kp_free_stack);
++ /* get_stack_skipnr() ensures the first entry is outside allocator. */
++ kpp->kp_ret = kpp->kp_stack[0];
++
++ raw_spin_unlock_irqrestore(&meta->lock, flags);
++
++ return true;
++}
++#endif
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 8a8b3aa929370..203792e70ac1c 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -625,6 +625,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
+ result = SCAN_PTE_NON_PRESENT;
+ goto out;
+ }
++ if (pte_uffd_wp(pteval)) {
++ result = SCAN_PTE_UFFD_WP;
++ goto out;
++ }
+ page = vm_normal_page(vma, address, pteval);
+ if (unlikely(!page)) {
+ result = SCAN_PAGE_NULL;
+@@ -1146,14 +1150,17 @@ static void collapse_huge_page(struct mm_struct *mm,
+
+ pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
+ /*
+- * After this gup_fast can't run anymore. This also removes
+- * any huge TLB entry from the CPU so we won't allow
+- * huge and small TLB entries for the same virtual address
+- * to avoid the risk of CPU bugs in that area.
++ * This removes any huge TLB entry from the CPU so we won't allow
++ * huge and small TLB entries for the same virtual address to
++ * avoid the risk of CPU bugs in that area.
++ *
++ * Parallel fast GUP is fine since fast GUP will back off when
++ * it detects PMD is changed.
+ */
+ _pmd = pmdp_collapse_flush(vma, address, pmd);
+ spin_unlock(pmd_ptl);
+ mmu_notifier_invalidate_range_end(&range);
++ tlb_remove_table_sync_one();
+
+ spin_lock(pte_ptl);
+ isolated = __collapse_huge_page_isolate(vma, address, pte,
+@@ -1440,6 +1447,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ spinlock_t *ptl;
+ int count = 0;
+ int i;
++ struct mmu_notifier_range range;
+
+ if (!vma || !vma->vm_file ||
+ !range_in_vma(vma, haddr, haddr + HPAGE_PMD_SIZE))
+@@ -1466,6 +1474,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ if (!pmd)
+ goto drop_hpage;
+
++ /*
++ * We need to lock the mapping so that from here on, only GUP-fast and
++ * hardware page walks can access the parts of the page tables that
++ * we're operating on.
++ */
++ i_mmap_lock_write(vma->vm_file->f_mapping);
++
++ /*
++ * This spinlock should be unnecessary: Nobody else should be accessing
++ * the page tables under spinlock protection here, only
++ * lockless_pages_from_mm() and the hardware page walker can access page
++ * tables while all the high-level locks are held in write mode.
++ */
+ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+
+ /* step 1: check all mapped PTEs are to the right huge page */
+@@ -1512,12 +1533,23 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+ }
+
+ /* step 4: collapse pmd */
+- ptl = pmd_lock(vma->vm_mm, pmd);
++ /* we make no change to anon, but protect concurrent anon page lookup */
++ if (vma->anon_vma)
++ anon_vma_lock_write(vma->anon_vma);
++
++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr,
++ haddr + HPAGE_PMD_SIZE);
++ mmu_notifier_invalidate_range_start(&range);
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+- spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
++ tlb_remove_table_sync_one();
++ mmu_notifier_invalidate_range_end(&range);
+ pte_free(mm, pmd_pgtable(_pmd));
+
++ if (vma->anon_vma)
++ anon_vma_unlock_write(vma->anon_vma);
++ i_mmap_unlock_write(vma->vm_file->f_mapping);
++
+ drop_hpage:
+ unlock_page(hpage);
+ put_page(hpage);
+@@ -1525,6 +1557,7 @@ drop_hpage:
+
+ abort:
+ pte_unmap_unlock(start_pte, ptl);
++ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ goto drop_hpage;
+ }
+
+@@ -1573,7 +1606,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ * An alternative would be drop the check, but check that page
+ * table is clear before calling pmdp_collapse_flush() under
+ * ptl. It has higher chance to recover THP for the VMA, but
+- * has higher cost too.
++ * has higher cost too. It would also probably require locking
++ * the anon_vma.
+ */
+ if (vma->anon_vma)
+ continue;
+@@ -1595,12 +1629,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ */
+ if (mmap_write_trylock(mm)) {
+ if (!khugepaged_test_exit(mm)) {
+- spinlock_t *ptl = pmd_lock(mm, pmd);
++ struct mmu_notifier_range range;
++
++ mmu_notifier_range_init(&range,
++ MMU_NOTIFY_CLEAR, 0,
++ NULL, mm, addr,
++ addr + HPAGE_PMD_SIZE);
++ mmu_notifier_invalidate_range_start(&range);
+ /* assume page table is clear */
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+- spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
++ tlb_remove_table_sync_one();
+ pte_free(mm, pmd_pgtable(_pmd));
++ mmu_notifier_invalidate_range_end(&range);
+ }
+ mmap_write_unlock(mm);
+ } else {
+diff --git a/mm/kmemleak.c b/mm/kmemleak.c
+index b57383c17cf60..b78861b8e0139 100644
+--- a/mm/kmemleak.c
++++ b/mm/kmemleak.c
+@@ -789,6 +789,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
+ unsigned long flags;
+ struct kmemleak_object *object;
+ struct kmemleak_scan_area *area = NULL;
++ unsigned long untagged_ptr;
++ unsigned long untagged_objp;
+
+ object = find_and_get_object(ptr, 1);
+ if (!object) {
+@@ -797,6 +799,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
+ return;
+ }
+
++ untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
++ untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer);
++
+ if (scan_area_cache)
+ area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
+
+@@ -808,8 +813,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
+ goto out_unlock;
+ }
+ if (size == SIZE_MAX) {
+- size = object->pointer + object->size - ptr;
+- } else if (ptr + size > object->pointer + object->size) {
++ size = untagged_objp + object->size - untagged_ptr;
++ } else if (untagged_ptr + size > untagged_objp + object->size) {
+ kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
+ dump_object_info(object);
+ kmem_cache_free(scan_area_cache, area);
+@@ -1403,7 +1408,8 @@ static void kmemleak_scan(void)
+ {
+ unsigned long flags;
+ struct kmemleak_object *object;
+- int i;
++ struct zone *zone;
++ int __maybe_unused i;
+ int new_leaks = 0;
+
+ jiffies_last_scan = jiffies;
+@@ -1443,9 +1449,9 @@ static void kmemleak_scan(void)
+ * Struct page scanning for each node.
+ */
+ get_online_mems();
+- for_each_online_node(i) {
+- unsigned long start_pfn = node_start_pfn(i);
+- unsigned long end_pfn = node_end_pfn(i);
++ for_each_populated_zone(zone) {
++ unsigned long start_pfn = zone->zone_start_pfn;
++ unsigned long end_pfn = zone_end_pfn(zone);
+ unsigned long pfn;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+@@ -1454,8 +1460,8 @@ static void kmemleak_scan(void)
+ if (!page)
+ continue;
+
+- /* only scan pages belonging to this node */
+- if (page_to_nid(page) != i)
++ /* only scan pages belonging to this zone */
++ if (page_zone(page) != zone)
+ continue;
+ /* only scan if page is in use */
+ if (page_count(page) == 0)
+diff --git a/mm/maccess.c b/mm/maccess.c
+index d3f1a1f0b1c1a..ded4bfaba7f37 100644
+--- a/mm/maccess.c
++++ b/mm/maccess.c
+@@ -99,7 +99,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
+ return src - unsafe_addr;
+ Efault:
+ pagefault_enable();
+- dst[-1] = '\0';
++ dst[0] = '\0';
+ return -EFAULT;
+ }
+ #else /* HAVE_GET_KERNEL_NOFAULT */
+diff --git a/mm/madvise.c b/mm/madvise.c
+index 0734db8d53a7a..6c099f8bb8e69 100644
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -436,8 +436,11 @@ regular_page:
+ continue;
+ }
+
+- /* Do not interfere with other mappings of this page */
+- if (page_mapcount(page) != 1)
++ /*
++ * Do not interfere with other mappings of this page and
++ * non-LRU page.
++ */
++ if (!PageLRU(page) || page_mapcount(page) != 1)
+ continue;
+
+ VM_BUG_ON_PAGE(PageTransCompound(page), page);
+@@ -968,6 +971,8 @@ static int madvise_inject_error(int behavior,
+ pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
+ pfn, start);
+ ret = memory_failure(pfn, MF_COUNT_INCREASED);
++ if (ret == -EOPNOTSUPP)
++ ret = 0;
+ }
+
+ if (ret)
+@@ -1294,8 +1299,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
+ iov_iter_advance(&iter, iovec.iov_len);
+ }
+
+- if (ret == 0)
+- ret = total_len - iov_iter_count(&iter);
++ ret = (total_len - iov_iter_count(&iter)) ? : ret;
+
+ release_mm:
+ mmput(mm);
+diff --git a/mm/memblock.c b/mm/memblock.c
+index 5096500b26473..2b7397781c99a 100644
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -366,14 +366,20 @@ void __init memblock_discard(void)
+ addr = __pa(memblock.reserved.regions);
+ size = PAGE_ALIGN(sizeof(struct memblock_region) *
+ memblock.reserved.max);
+- __memblock_free_late(addr, size);
++ if (memblock_reserved_in_slab)
++ kfree(memblock.reserved.regions);
++ else
++ __memblock_free_late(addr, size);
+ }
+
+ if (memblock.memory.regions != memblock_memory_init_regions) {
+ addr = __pa(memblock.memory.regions);
+ size = PAGE_ALIGN(sizeof(struct memblock_region) *
+ memblock.memory.max);
+- __memblock_free_late(addr, size);
++ if (memblock_memory_in_slab)
++ kfree(memblock.memory.regions);
++ else
++ __memblock_free_late(addr, size);
+ }
+
+ memblock_memory = NULL;
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 6da5020a8656d..b68b2fe639fdd 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -103,11 +103,6 @@ static bool do_memsw_account(void)
+ return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
+ }
+
+-/* memcg and lruvec stats flushing */
+-static void flush_memcg_stats_dwork(struct work_struct *w);
+-static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
+-static DEFINE_SPINLOCK(stats_flush_lock);
+-
+ #define THRESHOLDS_EVENTS_TARGET 128
+ #define SOFTLIMIT_EVENTS_TARGET 1024
+
+@@ -239,7 +234,7 @@ enum res_type {
+ iter != NULL; \
+ iter = mem_cgroup_iter(NULL, iter, NULL))
+
+-static inline bool should_force_charge(void)
++static inline bool task_is_dying(void)
+ {
+ return tsk_is_oom_victim(current) || fatal_signal_pending(current) ||
+ (current->flags & PF_EXITING);
+@@ -259,7 +254,7 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
+ }
+
+ #ifdef CONFIG_MEMCG_KMEM
+-extern spinlock_t css_set_lock;
++static DEFINE_SPINLOCK(objcg_lock);
+
+ bool mem_cgroup_kmem_disabled(void)
+ {
+@@ -303,9 +298,9 @@ static void obj_cgroup_release(struct percpu_ref *ref)
+ if (nr_pages)
+ obj_cgroup_uncharge_pages(objcg, nr_pages);
+
+- spin_lock_irqsave(&css_set_lock, flags);
++ spin_lock_irqsave(&objcg_lock, flags);
+ list_del(&objcg->list);
+- spin_unlock_irqrestore(&css_set_lock, flags);
++ spin_unlock_irqrestore(&objcg_lock, flags);
+
+ percpu_ref_exit(ref);
+ kfree_rcu(objcg, rcu);
+@@ -337,7 +332,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
+
+ objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
+
+- spin_lock_irq(&css_set_lock);
++ spin_lock_irq(&objcg_lock);
+
+ /* 1) Ready to reparent active objcg. */
+ list_add(&objcg->list, &memcg->objcg_list);
+@@ -347,7 +342,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
+ /* 3) Move already reparented objcgs to the parent's list */
+ list_splice(&memcg->objcg_list, &parent->objcg_list);
+
+- spin_unlock_irq(&css_set_lock);
++ spin_unlock_irq(&objcg_lock);
+
+ percpu_ref_kill(&objcg->refcnt);
+ }
+@@ -635,6 +630,74 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
+ return mz;
+ }
+
++/*
++ * memcg and lruvec stats flushing
++ *
++ * Many codepaths leading to stats update or read are performance sensitive and
++ * adding stats flushing in such codepaths is not desirable. So, to optimize the
++ * flushing the kernel does:
++ *
++ * 1) Periodically and asynchronously flush the stats every 2 seconds to not let
++ * rstat update tree grow unbounded.
++ *
++ * 2) Flush the stats synchronously on reader side only when there are more than
++ * (MEMCG_CHARGE_BATCH * nr_cpus) update events. Though this optimization
++ * will let stats be out of sync by atmost (MEMCG_CHARGE_BATCH * nr_cpus) but
++ * only for 2 seconds due to (1).
++ */
++static void flush_memcg_stats_dwork(struct work_struct *w);
++static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
++static DEFINE_SPINLOCK(stats_flush_lock);
++static DEFINE_PER_CPU(unsigned int, stats_updates);
++static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
++static u64 flush_next_time;
++
++#define FLUSH_TIME (2UL*HZ)
++
++static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
++{
++ unsigned int x;
++
++ cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
++
++ x = __this_cpu_add_return(stats_updates, abs(val));
++ if (x > MEMCG_CHARGE_BATCH) {
++ atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold);
++ __this_cpu_write(stats_updates, 0);
++ }
++}
++
++static void __mem_cgroup_flush_stats(void)
++{
++ unsigned long flag;
++
++ if (!spin_trylock_irqsave(&stats_flush_lock, flag))
++ return;
++
++ flush_next_time = jiffies_64 + 2*FLUSH_TIME;
++ cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
++ atomic_set(&stats_flush_threshold, 0);
++ spin_unlock_irqrestore(&stats_flush_lock, flag);
++}
++
++void mem_cgroup_flush_stats(void)
++{
++ if (atomic_read(&stats_flush_threshold) > num_online_cpus())
++ __mem_cgroup_flush_stats();
++}
++
++void mem_cgroup_flush_stats_delayed(void)
++{
++ if (time_after64(jiffies_64, flush_next_time))
++ mem_cgroup_flush_stats();
++}
++
++static void flush_memcg_stats_dwork(struct work_struct *w)
++{
++ __mem_cgroup_flush_stats();
++ queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
++}
++
+ /**
+ * __mod_memcg_state - update cgroup memory statistics
+ * @memcg: the memory cgroup
+@@ -647,7 +710,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
+ return;
+
+ __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
+- cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
++ memcg_rstat_updated(memcg, val);
+ }
+
+ /* idx can be of type enum memcg_stat_item or node_stat_item. */
+@@ -675,10 +738,12 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ memcg = pn->memcg;
+
+ /* Update memcg */
+- __mod_memcg_state(memcg, idx, val);
++ __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
+
+ /* Update lruvec */
+ __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
++
++ memcg_rstat_updated(memcg, val);
+ }
+
+ /**
+@@ -780,7 +845,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
+ return;
+
+ __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
+- cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
++ memcg_rstat_updated(memcg, count);
+ }
+
+ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
+@@ -1414,7 +1479,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
+ *
+ * Current memory state:
+ */
+- cgroup_rstat_flush(memcg->css.cgroup);
++ mem_cgroup_flush_stats();
+
+ for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
+ u64 size;
+@@ -1575,7 +1640,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ * A few threads which were not waiting at mutex_lock_killable() can
+ * fail to bail out. Therefore, check again after holding oom_lock.
+ */
+- ret = should_force_charge() || out_of_memory(&oc);
++ ret = task_is_dying() || out_of_memory(&oc);
+
+ unlock:
+ mutex_unlock(&oom_lock);
+@@ -2530,6 +2595,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ struct page_counter *counter;
+ enum oom_status oom_status;
+ unsigned long nr_reclaimed;
++ bool passed_oom = false;
+ bool may_swap = true;
+ bool drained = false;
+ unsigned long pflags;
+@@ -2564,15 +2630,6 @@ retry:
+ if (gfp_mask & __GFP_ATOMIC)
+ goto force;
+
+- /*
+- * Unlike in global OOM situations, memcg is not in a physical
+- * memory shortage. Allow dying and OOM-killed tasks to
+- * bypass the last charges so that they can exit quickly and
+- * free their memory.
+- */
+- if (unlikely(should_force_charge()))
+- goto force;
+-
+ /*
+ * Prevent unbounded recursion when reclaim operations need to
+ * allocate memory. This might exceed the limits temporarily,
+@@ -2630,8 +2687,9 @@ retry:
+ if (gfp_mask & __GFP_RETRY_MAYFAIL)
+ goto nomem;
+
+- if (fatal_signal_pending(current))
+- goto force;
++ /* Avoid endless loop for tasks bypassed by the oom killer */
++ if (passed_oom && task_is_dying())
++ goto nomem;
+
+ /*
+ * keep retrying as long as the memcg oom killer is able to make
+@@ -2640,14 +2698,10 @@ retry:
+ */
+ oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
+ get_order(nr_pages * PAGE_SIZE));
+- switch (oom_status) {
+- case OOM_SUCCESS:
++ if (oom_status == OOM_SUCCESS) {
++ passed_oom = true;
+ nr_retries = MAX_RECLAIM_RETRIES;
+ goto retry;
+- case OOM_FAILED:
+- goto force;
+- default:
+- goto nomem;
+ }
+ nomem:
+ if (!(gfp_mask & __GFP_NOFAIL))
+@@ -3518,8 +3572,7 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
+ unsigned long val;
+
+ if (mem_cgroup_is_root(memcg)) {
+- /* mem_cgroup_threshold() calls here from irqsafe context */
+- cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
++ mem_cgroup_flush_stats();
+ val = memcg_page_state(memcg, NR_FILE_PAGES) +
+ memcg_page_state(memcg, NR_ANON_MAPPED);
+ if (swap)
+@@ -3819,6 +3872,10 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
+ {
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
++ pr_warn_once("Cgroup memory moving (move_charge_at_immigrate) is deprecated. "
++ "Please report your usecase to linux-mm@kvack.org if you "
++ "depend on this functionality.\n");
++
+ if (val & ~MOVE_MASK)
+ return -EINVAL;
+
+@@ -3900,7 +3957,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
+ int nid;
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+- cgroup_rstat_flush(memcg->css.cgroup);
++ mem_cgroup_flush_stats();
+
+ for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
+ seq_printf(m, "%s=%lu", stat->name,
+@@ -3972,7 +4029,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
+
+ BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
+
+- cgroup_rstat_flush(memcg->css.cgroup);
++ mem_cgroup_flush_stats();
+
+ for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
+ unsigned long nr;
+@@ -4475,7 +4532,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
+ struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+ struct mem_cgroup *parent;
+
+- cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
++ mem_cgroup_flush_stats();
+
+ *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+ *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+@@ -4736,6 +4793,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+ unsigned int efd, cfd;
+ struct fd efile;
+ struct fd cfile;
++ struct dentry *cdentry;
+ const char *name;
+ char *endp;
+ int ret;
+@@ -4786,6 +4844,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+ if (ret < 0)
+ goto out_put_cfile;
+
++ /*
++ * The control file must be a regular cgroup1 file. As a regular cgroup
++ * file can't be renamed, it's safe to access its name afterwards.
++ */
++ cdentry = cfile.file->f_path.dentry;
++ if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) {
++ ret = -EINVAL;
++ goto out_put_cfile;
++ }
++
+ /*
+ * Determine the event callbacks and set them in @event. This used
+ * to be done via struct cftype but cgroup core no longer knows
+@@ -4794,7 +4862,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+ *
+ * DO NOT ADD NEW FILES.
+ */
+- name = cfile.file->f_path.dentry->d_name.name;
++ name = cdentry->d_name.name;
+
+ if (!strcmp(name, "memory.usage_in_bytes")) {
+ event->register_event = mem_cgroup_usage_register_event;
+@@ -4818,7 +4886,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+ * automatically removed on cgroup destruction but the removal is
+ * asynchronous, so take an extra ref on @css.
+ */
+- cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
++ cfile_css = css_tryget_online_from_dir(cdentry->d_parent,
+ &memory_cgrp_subsys);
+ ret = -EINVAL;
+ if (IS_ERR(cfile_css))
+@@ -5341,21 +5409,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
+ memcg_wb_domain_size_changed(memcg);
+ }
+
+-void mem_cgroup_flush_stats(void)
+-{
+- if (!spin_trylock(&stats_flush_lock))
+- return;
+-
+- cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
+- spin_unlock(&stats_flush_lock);
+-}
+-
+-static void flush_memcg_stats_dwork(struct work_struct *w)
+-{
+- mem_cgroup_flush_stats();
+- queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
+-}
+-
+ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+ {
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+@@ -6373,7 +6426,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
+ int i;
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+- cgroup_rstat_flush(memcg->css.cgroup);
++ mem_cgroup_flush_stats();
+
+ for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
+ int nid;
+@@ -7077,7 +7130,7 @@ static int __init cgroup_memory(char *s)
+ if (!strcmp(token, "nokmem"))
+ cgroup_memory_nokmem = true;
+ }
+- return 0;
++ return 1;
+ }
+ __setup("cgroup.memory=", cgroup_memory);
+
+diff --git a/mm/memfd.c b/mm/memfd.c
+index 081dd33e6a61b..a73af8be9c285 100644
+--- a/mm/memfd.c
++++ b/mm/memfd.c
+@@ -31,20 +31,28 @@
+ static void memfd_tag_pins(struct xa_state *xas)
+ {
+ struct page *page;
+- unsigned int tagged = 0;
++ int latency = 0;
++ int cache_count;
+
+ lru_add_drain();
+
+ xas_lock_irq(xas);
+ xas_for_each(xas, page, ULONG_MAX) {
+- if (xa_is_value(page))
+- continue;
+- page = find_subpage(page, xas->xa_index);
+- if (page_count(page) - page_mapcount(page) > 1)
++ cache_count = 1;
++ if (!xa_is_value(page) &&
++ PageTransHuge(page) && !PageHuge(page))
++ cache_count = HPAGE_PMD_NR;
++
++ if (!xa_is_value(page) &&
++ page_count(page) - total_mapcount(page) != cache_count)
+ xas_set_mark(xas, MEMFD_TAG_PINNED);
++ if (cache_count != 1)
++ xas_set(xas, page->index + cache_count);
+
+- if (++tagged % XA_CHECK_SCHED)
++ latency += cache_count;
++ if (latency < XA_CHECK_SCHED)
+ continue;
++ latency = 0;
+
+ xas_pause(xas);
+ xas_unlock_irq(xas);
+@@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping)
+
+ error = 0;
+ for (scan = 0; scan <= LAST_SCAN; scan++) {
+- unsigned int tagged = 0;
++ int latency = 0;
++ int cache_count;
+
+ if (!xas_marked(&xas, MEMFD_TAG_PINNED))
+ break;
+@@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping)
+ xas_lock_irq(&xas);
+ xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
+ bool clear = true;
+- if (xa_is_value(page))
+- continue;
+- page = find_subpage(page, xas.xa_index);
+- if (page_count(page) - page_mapcount(page) != 1) {
++
++ cache_count = 1;
++ if (!xa_is_value(page) &&
++ PageTransHuge(page) && !PageHuge(page))
++ cache_count = HPAGE_PMD_NR;
++
++ if (!xa_is_value(page) && cache_count !=
++ page_count(page) - total_mapcount(page)) {
+ /*
+ * On the last scan, we clean up all those tags
+ * we inserted; but make a note that we still
+@@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping)
+ }
+ if (clear)
+ xas_clear_mark(&xas, MEMFD_TAG_PINNED);
+- if (++tagged % XA_CHECK_SCHED)
++
++ latency += cache_count;
++ if (latency < XA_CHECK_SCHED)
+ continue;
++ latency = 0;
+
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
+@@ -314,7 +330,8 @@ SYSCALL_DEFINE2(memfd_create,
+
+ if (flags & MFD_ALLOW_SEALING) {
+ file_seals = memfd_file_seals_ptr(file);
+- *file_seals &= ~F_SEAL_SEAL;
++ if (file_seals)
++ *file_seals &= ~F_SEAL_SEAL;
+ }
+
+ fd_install(fd, file);
+diff --git a/mm/memory-failure.c b/mm/memory-failure.c
+index bdbbb32211a58..bcd71d8736be5 100644
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -57,6 +57,7 @@
+ #include <linux/ratelimit.h>
+ #include <linux/page-isolation.h>
+ #include <linux/pagewalk.h>
++#include <linux/shmem_fs.h>
+ #include "internal.h"
+ #include "ras/ras_event.h"
+
+@@ -700,13 +701,18 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
+ };
+ priv.tk.tsk = p;
+
++ if (!p->mm)
++ return -EFAULT;
++
+ mmap_read_lock(p->mm);
+ ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+ (void *)&priv);
+ if (ret == 1 && priv.tk.addr)
+ kill_proc(&priv.tk, pfn, flags);
++ else
++ ret = 0;
+ mmap_read_unlock(p->mm);
+- return ret ? -EFAULT : -EHWPOISON;
++ return ret > 0 ? -EHWPOISON : -EFAULT;
+ }
+
+ static const char *action_name[] = {
+@@ -806,12 +812,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
+ return ret;
+ }
+
++struct page_state {
++ unsigned long mask;
++ unsigned long res;
++ enum mf_action_page_type type;
++
++ /* Callback ->action() has to unlock the relevant page inside it. */
++ int (*action)(struct page_state *ps, struct page *p);
++};
++
++/*
++ * Return true if page is still referenced by others, otherwise return
++ * false.
++ *
++ * The extra_pins is true when one extra refcount is expected.
++ */
++static bool has_extra_refcount(struct page_state *ps, struct page *p,
++ bool extra_pins)
++{
++ int count = page_count(p) - 1;
++
++ if (extra_pins)
++ count -= 1;
++
++ if (count > 0) {
++ pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
++ page_to_pfn(p), action_page_types[ps->type], count);
++ return true;
++ }
++
++ return false;
++}
++
+ /*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+-static int me_kernel(struct page *p, unsigned long pfn)
++static int me_kernel(struct page_state *ps, struct page *p)
+ {
+ unlock_page(p);
+ return MF_IGNORED;
+@@ -820,9 +858,9 @@ static int me_kernel(struct page *p, unsigned long pfn)
+ /*
+ * Page in unknown state. Do nothing.
+ */
+-static int me_unknown(struct page *p, unsigned long pfn)
++static int me_unknown(struct page_state *ps, struct page *p)
+ {
+- pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
++ pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p));
+ unlock_page(p);
+ return MF_FAILED;
+ }
+@@ -830,10 +868,11 @@ static int me_unknown(struct page *p, unsigned long pfn)
+ /*
+ * Clean (or cleaned) page cache page.
+ */
+-static int me_pagecache_clean(struct page *p, unsigned long pfn)
++static int me_pagecache_clean(struct page_state *ps, struct page *p)
+ {
+ int ret;
+ struct address_space *mapping;
++ bool extra_pins;
+
+ delete_from_lru_cache(p);
+
+@@ -862,14 +901,24 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
+ goto out;
+ }
+
++ /*
++ * The shmem page is kept in page cache instead of truncating
++ * so is expected to have an extra refcount after error-handling.
++ */
++ extra_pins = shmem_mapping(mapping);
++
+ /*
+ * Truncation is a bit tricky. Enable it per file system for now.
+ *
+ * Open: to take i_rwsem or not for this? Right now we don't.
+ */
+- ret = truncate_error_page(p, pfn, mapping);
++ ret = truncate_error_page(p, page_to_pfn(p), mapping);
++ if (has_extra_refcount(ps, p, extra_pins))
++ ret = MF_FAILED;
++
+ out:
+ unlock_page(p);
++
+ return ret;
+ }
+
+@@ -878,7 +927,7 @@ out:
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+-static int me_pagecache_dirty(struct page *p, unsigned long pfn)
++static int me_pagecache_dirty(struct page_state *ps, struct page *p)
+ {
+ struct address_space *mapping = page_mapping(p);
+
+@@ -922,7 +971,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+ mapping_set_error(mapping, -EIO);
+ }
+
+- return me_pagecache_clean(p, pfn);
++ return me_pagecache_clean(ps, p);
+ }
+
+ /*
+@@ -944,9 +993,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+-static int me_swapcache_dirty(struct page *p, unsigned long pfn)
++static int me_swapcache_dirty(struct page_state *ps, struct page *p)
+ {
+ int ret;
++ bool extra_pins = false;
+
+ ClearPageDirty(p);
+ /* Trigger EIO in shmem: */
+@@ -954,10 +1004,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
+ unlock_page(p);
++
++ if (ret == MF_DELAYED)
++ extra_pins = true;
++
++ if (has_extra_refcount(ps, p, extra_pins))
++ ret = MF_FAILED;
++
+ return ret;
+ }
+
+-static int me_swapcache_clean(struct page *p, unsigned long pfn)
++static int me_swapcache_clean(struct page_state *ps, struct page *p)
+ {
+ int ret;
+
+@@ -965,6 +1022,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
+
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
+ unlock_page(p);
++
++ if (has_extra_refcount(ps, p, false))
++ ret = MF_FAILED;
++
+ return ret;
+ }
+
+@@ -974,18 +1035,21 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
+ * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
+ * To narrow down kill region to one page, we need to break up pmd.
+ */
+-static int me_huge_page(struct page *p, unsigned long pfn)
++static int me_huge_page(struct page_state *ps, struct page *p)
+ {
+ int res;
+ struct page *hpage = compound_head(p);
+ struct address_space *mapping;
++ bool extra_pins = false;
+
+ if (!PageHuge(hpage))
+ return MF_DELAYED;
+
+ mapping = page_mapping(hpage);
+ if (mapping) {
+- res = truncate_error_page(hpage, pfn, mapping);
++ res = truncate_error_page(hpage, page_to_pfn(p), mapping);
++ /* The page is kept in page cache. */
++ extra_pins = true;
+ unlock_page(hpage);
+ } else {
+ res = MF_FAILED;
+@@ -1003,6 +1067,9 @@ static int me_huge_page(struct page *p, unsigned long pfn)
+ }
+ }
+
++ if (has_extra_refcount(ps, p, extra_pins))
++ res = MF_FAILED;
++
+ return res;
+ }
+
+@@ -1028,14 +1095,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
+ #define slab (1UL << PG_slab)
+ #define reserved (1UL << PG_reserved)
+
+-static struct page_state {
+- unsigned long mask;
+- unsigned long res;
+- enum mf_action_page_type type;
+-
+- /* Callback ->action() has to unlock the relevant page inside it. */
+- int (*action)(struct page *p, unsigned long pfn);
+-} error_states[] = {
++static struct page_state error_states[] = {
+ { reserved, reserved, MF_MSG_KERNEL, me_kernel },
+ /*
+ * free pages are specially detected outside this table:
+@@ -1095,19 +1155,10 @@ static int page_action(struct page_state *ps, struct page *p,
+ unsigned long pfn)
+ {
+ int result;
+- int count;
+
+ /* page p should be unlocked after returning from ps->action(). */
+- result = ps->action(p, pfn);
++ result = ps->action(ps, p);
+
+- count = page_count(p) - 1;
+- if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
+- count--;
+- if (count > 0) {
+- pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
+- pfn, action_page_types[ps->type], count);
+- result = MF_FAILED;
+- }
+ action_result(pfn, ps->type, result);
+
+ /* Could do more checks here if page looks ok */
+@@ -1215,7 +1266,7 @@ try_again:
+ }
+ out:
+ if (ret == -EIO)
+- dump_page(p, "hwpoison: unhandlable page");
++ pr_err("Memory failure: %#lx: unhandlable page.\n", page_to_pfn(p));
+
+ return ret;
+ }
+@@ -1400,14 +1451,11 @@ static int identify_page_state(unsigned long pfn, struct page *p,
+ static int try_to_split_thp_page(struct page *page, const char *msg)
+ {
+ lock_page(page);
+- if (!PageAnon(page) || unlikely(split_huge_page(page))) {
++ if (unlikely(split_huge_page(page))) {
+ unsigned long pfn = page_to_pfn(page);
+
+ unlock_page(page);
+- if (!PageAnon(page))
+- pr_info("%s: %#lx: non anonymous thp\n", msg, pfn);
+- else
+- pr_info("%s: %#lx: thp split failed\n", msg, pfn);
++ pr_info("%s: %#lx: thp split failed\n", msg, pfn);
+ put_page(page);
+ return -EBUSY;
+ }
+@@ -1416,64 +1464,115 @@ static int try_to_split_thp_page(struct page *page, const char *msg)
+ return 0;
+ }
+
+-static int memory_failure_hugetlb(unsigned long pfn, int flags)
++/*
++ * Called from hugetlb code with hugetlb_lock held.
++ *
++ * Return values:
++ * 0 - free hugepage
++ * 1 - in-use hugepage
++ * 2 - not a hugepage
++ * -EBUSY - the hugepage is busy (try to retry)
++ * -EHWPOISON - the hugepage is already hwpoisoned
++ */
++int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
++{
++ struct page *page = pfn_to_page(pfn);
++ struct page *head = compound_head(page);
++ int ret = 2; /* fallback to normal page handling */
++ bool count_increased = false;
++
++ if (!PageHeadHuge(head))
++ goto out;
++
++ if (flags & MF_COUNT_INCREASED) {
++ ret = 1;
++ count_increased = true;
++ } else if (HPageFreed(head) || HPageMigratable(head)) {
++ ret = get_page_unless_zero(head);
++ if (ret)
++ count_increased = true;
++ } else {
++ ret = -EBUSY;
++ goto out;
++ }
++
++ if (TestSetPageHWPoison(head)) {
++ ret = -EHWPOISON;
++ goto out;
++ }
++
++ return ret;
++out:
++ if (count_increased)
++ put_page(head);
++ return ret;
++}
++
++#ifdef CONFIG_HUGETLB_PAGE
++/*
++ * Taking refcount of hugetlb pages needs extra care about race conditions
++ * with basic operations like hugepage allocation/free/demotion.
++ * So some of prechecks for hwpoison (pinning, and testing/setting
++ * PageHWPoison) should be done in single hugetlb_lock range.
++ */
++static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
+ {
+- struct page *p = pfn_to_page(pfn);
+- struct page *head = compound_head(p);
+ int res;
++ struct page *p = pfn_to_page(pfn);
++ struct page *head;
+ unsigned long page_flags;
++ bool retry = true;
+
+- if (TestSetPageHWPoison(head)) {
+- pr_err("Memory failure: %#lx: already hardware poisoned\n",
+- pfn);
+- res = -EHWPOISON;
+- if (flags & MF_ACTION_REQUIRED)
++ *hugetlb = 1;
++retry:
++ res = get_huge_page_for_hwpoison(pfn, flags);
++ if (res == 2) { /* fallback to normal page handling */
++ *hugetlb = 0;
++ return 0;
++ } else if (res == -EHWPOISON) {
++ pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
++ if (flags & MF_ACTION_REQUIRED) {
++ head = compound_head(p);
+ res = kill_accessing_process(current, page_to_pfn(head), flags);
++ }
++ return res;
++ } else if (res == -EBUSY) {
++ if (retry) {
++ retry = false;
++ goto retry;
++ }
++ action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+ return res;
+ }
+
+- num_poisoned_pages_inc();
++ head = compound_head(p);
++ lock_page(head);
+
+- if (!(flags & MF_COUNT_INCREASED)) {
+- res = get_hwpoison_page(p, flags);
+- if (!res) {
+- /*
+- * Check "filter hit" and "race with other subpage."
+- */
+- lock_page(head);
+- if (PageHWPoison(head)) {
+- if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+- || (p != head && TestSetPageHWPoison(head))) {
+- num_poisoned_pages_dec();
+- unlock_page(head);
+- return 0;
+- }
+- }
+- unlock_page(head);
+- res = MF_FAILED;
+- if (__page_handle_poison(p)) {
+- page_ref_inc(p);
+- res = MF_RECOVERED;
+- }
+- action_result(pfn, MF_MSG_FREE_HUGE, res);
+- return res == MF_RECOVERED ? 0 : -EBUSY;
+- } else if (res < 0) {
+- action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+- return -EBUSY;
+- }
++ if (hwpoison_filter(p)) {
++ ClearPageHWPoison(head);
++ res = -EOPNOTSUPP;
++ goto out;
+ }
+
+- lock_page(head);
+- page_flags = head->flags;
++ num_poisoned_pages_inc();
+
+- if (!PageHWPoison(head)) {
+- pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
+- num_poisoned_pages_dec();
++ /*
++ * Handling free hugepage. The possible race with hugepage allocation
++ * or demotion can be prevented by PageHWPoison flag.
++ */
++ if (res == 0) {
+ unlock_page(head);
+- put_page(head);
+- return 0;
++ res = MF_FAILED;
++ if (__page_handle_poison(p)) {
++ page_ref_inc(p);
++ res = MF_RECOVERED;
++ }
++ action_result(pfn, MF_MSG_FREE_HUGE, res);
++ return res == MF_RECOVERED ? 0 : -EBUSY;
+ }
+
++ page_flags = head->flags;
++
+ /*
+ * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
+ * simply disable it. In order to make it work properly, we need
+@@ -1500,6 +1599,12 @@ out:
+ unlock_page(head);
+ return res;
+ }
++#else
++static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
++{
++ return 0;
++}
++#endif
+
+ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
+ struct dev_pagemap *pgmap)
+@@ -1536,7 +1641,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
+ goto out;
+
+ if (hwpoison_filter(page)) {
+- rc = 0;
++ rc = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+@@ -1587,6 +1692,8 @@ out:
+ return rc;
+ }
+
++static DEFINE_MUTEX(mf_mutex);
++
+ /**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+@@ -1603,6 +1710,10 @@ out:
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
++ *
++ * Return: 0 for successfully handled the memory error,
++ * -EOPNOTSUPP for memory_filter() filtered the error event,
++ * < 0(except -EOPNOTSUPP) on failure.
+ */
+ int memory_failure(unsigned long pfn, int flags)
+ {
+@@ -1613,7 +1724,7 @@ int memory_failure(unsigned long pfn, int flags)
+ int res = 0;
+ unsigned long page_flags;
+ bool retry = true;
+- static DEFINE_MUTEX(mf_mutex);
++ int hugetlb = 0;
+
+ if (!sysctl_memory_failure_recovery)
+ panic("Memory failure on page %lx", pfn);
+@@ -1634,10 +1745,9 @@ int memory_failure(unsigned long pfn, int flags)
+ mutex_lock(&mf_mutex);
+
+ try_again:
+- if (PageHuge(p)) {
+- res = memory_failure_hugetlb(pfn, flags);
++ res = try_memory_failure_hugetlb(pfn, flags, &hugetlb);
++ if (hugetlb)
+ goto unlock_mutex;
+- }
+
+ if (TestSetPageHWPoison(p)) {
+ pr_err("Memory failure: %#lx: already hardware poisoned\n",
+@@ -1747,21 +1857,12 @@ try_again:
+ */
+ page_flags = p->flags;
+
+- /*
+- * unpoison always clear PG_hwpoison inside page lock
+- */
+- if (!PageHWPoison(p)) {
+- pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
+- num_poisoned_pages_dec();
+- unlock_page(p);
+- put_page(p);
+- goto unlock_mutex;
+- }
+ if (hwpoison_filter(p)) {
+ if (TestClearPageHWPoison(p))
+ num_poisoned_pages_dec();
+ unlock_page(p);
+ put_page(p);
++ res = -EOPNOTSUPP;
+ goto unlock_mutex;
+ }
+
+@@ -1937,6 +2038,7 @@ int unpoison_memory(unsigned long pfn)
+ struct page *page;
+ struct page *p;
+ int freeit = 0;
++ int ret = 0;
+ unsigned long flags = 0;
+ static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+@@ -1947,39 +2049,30 @@ int unpoison_memory(unsigned long pfn)
+ p = pfn_to_page(pfn);
+ page = compound_head(p);
+
++ mutex_lock(&mf_mutex);
++
+ if (!PageHWPoison(p)) {
+ unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
+ pfn, &unpoison_rs);
+- return 0;
++ goto unlock_mutex;
+ }
+
+ if (page_count(page) > 1) {
+ unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
+ pfn, &unpoison_rs);
+- return 0;
++ goto unlock_mutex;
+ }
+
+ if (page_mapped(page)) {
+ unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
+ pfn, &unpoison_rs);
+- return 0;
++ goto unlock_mutex;
+ }
+
+ if (page_mapping(page)) {
+ unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
+ pfn, &unpoison_rs);
+- return 0;
+- }
+-
+- /*
+- * unpoison_memory() can encounter thp only when the thp is being
+- * worked by memory_failure() and the page lock is not held yet.
+- * In such case, we yield to memory_failure() and make unpoison fail.
+- */
+- if (!PageHuge(page) && PageTransHuge(page)) {
+- unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
+- pfn, &unpoison_rs);
+- return 0;
++ goto unlock_mutex;
+ }
+
+ if (!get_hwpoison_page(p, flags)) {
+@@ -1987,29 +2080,23 @@ int unpoison_memory(unsigned long pfn)
+ num_poisoned_pages_dec();
+ unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
+ pfn, &unpoison_rs);
+- return 0;
++ goto unlock_mutex;
+ }
+
+- lock_page(page);
+- /*
+- * This test is racy because PG_hwpoison is set outside of page lock.
+- * That's acceptable because that won't trigger kernel panic. Instead,
+- * the PG_hwpoison page will be caught and isolated on the entrance to
+- * the free buddy page pool.
+- */
+ if (TestClearPageHWPoison(page)) {
+ unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+ pfn, &unpoison_rs);
+ num_poisoned_pages_dec();
+ freeit = 1;
+ }
+- unlock_page(page);
+
+ put_page(page);
+ if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
+ put_page(page);
+
+- return 0;
++unlock_mutex:
++ mutex_unlock(&mf_mutex);
++ return ret;
+ }
+ EXPORT_SYMBOL(unpoison_memory);
+
+@@ -2019,7 +2106,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
+ bool lru = PageLRU(page);
+
+ if (PageHuge(page)) {
+- isolated = isolate_huge_page(page, pagelist);
++ isolated = !isolate_hugetlb(page, pagelist);
+ } else {
+ if (lru)
+ isolated = !isolate_lru_page(page);
+@@ -2132,16 +2219,6 @@ static int soft_offline_in_use_page(struct page *page)
+ return __soft_offline_page(page);
+ }
+
+-static int soft_offline_free_page(struct page *page)
+-{
+- int rc = 0;
+-
+- if (!page_handle_poison(page, true, false))
+- rc = -EBUSY;
+-
+- return rc;
+-}
+-
+ static void put_ref_page(struct page *page)
+ {
+ if (page)
+@@ -2190,9 +2267,12 @@ int soft_offline_page(unsigned long pfn, int flags)
+ return -EIO;
+ }
+
++ mutex_lock(&mf_mutex);
++
+ if (PageHWPoison(page)) {
+ pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
+ put_ref_page(ref_page);
++ mutex_unlock(&mf_mutex);
+ return 0;
+ }
+
+@@ -2204,11 +2284,17 @@ retry:
+ if (ret > 0) {
+ ret = soft_offline_in_use_page(page);
+ } else if (ret == 0) {
+- if (soft_offline_free_page(page) && try_again) {
+- try_again = false;
+- goto retry;
++ if (!page_handle_poison(page, true, false)) {
++ if (try_again) {
++ try_again = false;
++ flags &= ~MF_COUNT_INCREASED;
++ goto retry;
++ }
++ ret = -EBUSY;
+ }
+ }
+
++ mutex_unlock(&mf_mutex);
++
+ return ret;
+ }
+diff --git a/mm/memory.c b/mm/memory.c
+index c52be6d6b6055..1bb01b12db532 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1301,6 +1301,17 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
+ return ret;
+ }
+
++/* Whether we should zap all COWed (private) pages too */
++static inline bool should_zap_cows(struct zap_details *details)
++{
++ /* By default, zap all pages */
++ if (!details)
++ return true;
++
++ /* Or, we zap COWed pages only if the caller wants to */
++ return !details->check_mapping;
++}
++
+ static unsigned long zap_pte_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+@@ -1396,16 +1407,18 @@ again:
+ continue;
+ }
+
+- /* If details->check_mapping, we leave swap entries. */
+- if (unlikely(details))
+- continue;
+-
+- if (!non_swap_entry(entry))
++ if (!non_swap_entry(entry)) {
++ /* Genuine swap entry, hence a private anon page */
++ if (!should_zap_cows(details))
++ continue;
+ rss[MM_SWAPENTS]--;
+- else if (is_migration_entry(entry)) {
++ } else if (is_migration_entry(entry)) {
+ struct page *page;
+
+ page = pfn_swap_entry_to_page(entry);
++ if (details && details->check_mapping &&
++ details->check_mapping != page_rmapping(page))
++ continue;
+ rss[mm_counter(page)]--;
+ }
+ if (unlikely(!free_swap_and_cache(entry)))
+@@ -2740,10 +2753,16 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
+ return same;
+ }
+
+-static inline bool cow_user_page(struct page *dst, struct page *src,
+- struct vm_fault *vmf)
++/*
++ * Return:
++ * 0: copied succeeded
++ * -EHWPOISON: copy failed due to hwpoison in source page
++ * -EAGAIN: copied failed (some other reason)
++ */
++static inline int cow_user_page(struct page *dst, struct page *src,
++ struct vm_fault *vmf)
+ {
+- bool ret;
++ int ret;
+ void *kaddr;
+ void __user *uaddr;
+ bool locked = false;
+@@ -2752,8 +2771,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
+ unsigned long addr = vmf->address;
+
+ if (likely(src)) {
+- copy_user_highpage(dst, src, addr, vma);
+- return true;
++ if (copy_mc_user_highpage(dst, src, addr, vma)) {
++ memory_failure_queue(page_to_pfn(src), 0);
++ return -EHWPOISON;
++ }
++ return 0;
+ }
+
+ /*
+@@ -2780,7 +2802,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
+ * and update local tlb only
+ */
+ update_mmu_tlb(vma, addr, vmf->pte);
+- ret = false;
++ ret = -EAGAIN;
+ goto pte_unlock;
+ }
+
+@@ -2805,7 +2827,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
+ if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
+ /* The PTE changed under us, update local tlb */
+ update_mmu_tlb(vma, addr, vmf->pte);
+- ret = false;
++ ret = -EAGAIN;
+ goto pte_unlock;
+ }
+
+@@ -2824,7 +2846,7 @@ warn:
+ }
+ }
+
+- ret = true;
++ ret = 0;
+
+ pte_unlock:
+ if (locked)
+@@ -2990,6 +3012,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
+ pte_t entry;
+ int page_copied = 0;
+ struct mmu_notifier_range range;
++ int ret;
+
+ if (unlikely(anon_vma_prepare(vma)))
+ goto oom;
+@@ -3005,17 +3028,20 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
+ if (!new_page)
+ goto oom;
+
+- if (!cow_user_page(new_page, old_page, vmf)) {
++ ret = cow_user_page(new_page, old_page, vmf);
++ if (ret) {
+ /*
+ * COW failed, if the fault was solved by other,
+ * it's fine. If not, userspace would re-fault on
+ * the same address and we will handle the fault
+ * from the second attempt.
++ * The -EHWPOISON case will not be retried.
+ */
+ put_page(new_page);
+ if (old_page)
+ put_page(old_page);
+- return 0;
++
++ return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
+ }
+ }
+
+@@ -3449,8 +3475,21 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
+ struct vm_area_struct *vma = vmf->vma;
+ struct mmu_notifier_range range;
+
+- if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags))
++ /*
++ * We need a reference to lock the page because we don't hold
++ * the PTL so a racing thread can remove the device-exclusive
++ * entry and unmap it. If the page is free the entry must
++ * have been removed already. If it happens to have already
++ * been re-allocated after being freed all we do is lock and
++ * unlock it.
++ */
++ if (!get_page_unless_zero(page))
++ return 0;
++
++ if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
++ put_page(page);
+ return VM_FAULT_RETRY;
++ }
+ mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
+ vma->vm_mm, vmf->address & PAGE_MASK,
+ (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL);
+@@ -3463,6 +3502,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
+
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ unlock_page(page);
++ put_page(page);
+
+ mmu_notifier_invalidate_range_end(&range);
+ return 0;
+@@ -3861,11 +3901,20 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
+ return ret;
+
+ if (unlikely(PageHWPoison(vmf->page))) {
+- if (ret & VM_FAULT_LOCKED)
+- unlock_page(vmf->page);
+- put_page(vmf->page);
++ struct page *page = vmf->page;
++ vm_fault_t poisonret = VM_FAULT_HWPOISON;
++ if (ret & VM_FAULT_LOCKED) {
++ if (page_mapped(page))
++ unmap_mapping_pages(page_mapping(page),
++ page->index, 1, false);
++ /* Retry if a clean page was removed from the cache. */
++ if (invalidate_inode_page(page))
++ poisonret = VM_FAULT_NOPAGE;
++ unlock_page(page);
++ }
++ put_page(page);
+ vmf->page = NULL;
+- return VM_FAULT_HWPOISON;
++ return poisonret;
+ }
+
+ if (unlikely(!(ret & VM_FAULT_LOCKED)))
+@@ -4049,9 +4098,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
+ }
+ }
+
+- /* See comment in handle_pte_fault() */
++ /*
++ * See comment in handle_pte_fault() for how this scenario happens, we
++ * need to return NOPAGE so that we drop this page.
++ */
+ if (pmd_devmap_trans_unstable(vmf->pmd))
+- return 0;
++ return VM_FAULT_NOPAGE;
+
+ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+ vmf->address, &vmf->ptl);
+@@ -4465,6 +4517,19 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
+
+ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
+ {
++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
++ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
++ /* No support for anonymous transparent PUD pages yet */
++ if (vma_is_anonymous(vmf->vma))
++ return VM_FAULT_FALLBACK;
++ if (vmf->vma->vm_ops->huge_fault)
++ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
++#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
++ return VM_FAULT_FALLBACK;
++}
++
++static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
++{
+ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+ /* No support for anonymous transparent PUD pages yet */
+@@ -4479,19 +4544,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
+ split:
+ /* COW or write-notify not handled on PUD level: split pud.*/
+ __split_huge_pud(vmf->vma, vmf->pud, vmf->address);
+-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+- return VM_FAULT_FALLBACK;
+-}
+-
+-static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+-{
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+- /* No support for anonymous transparent PUD pages yet */
+- if (vma_is_anonymous(vmf->vma))
+- return VM_FAULT_FALLBACK;
+- if (vmf->vma->vm_ops->huge_fault)
+- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
++#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+ return VM_FAULT_FALLBACK;
+ }
+
+@@ -5445,6 +5498,8 @@ long copy_huge_page_from_user(struct page *dst_page,
+ if (rc)
+ break;
+
++ flush_dcache_page(subpage);
++
+ cond_resched();
+ }
+ return ret_val;
+diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
+index 9fd0be32a281e..81f2a97c886c9 100644
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1704,7 +1704,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
+
+ if (PageHuge(page)) {
+ pfn = page_to_pfn(head) + compound_nr(head) - 1;
+- isolate_huge_page(head, &source);
++ isolate_hugetlb(head, &source);
+ continue;
+ } else if (PageTransHuge(page))
+ pfn = page_to_pfn(head) + thp_nr_pages(page) - 1;
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index d12e0608fced2..818753635e427 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -347,7 +347,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
+ */
+ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
+ {
+- if (!pol)
++ if (!pol || pol->mode == MPOL_LOCAL)
+ return;
+ if (!mpol_store_user_nodemask(pol) &&
+ nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
+@@ -603,8 +603,9 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
+
+ /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
+ if (flags & (MPOL_MF_MOVE_ALL) ||
+- (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
+- if (!isolate_huge_page(page, qp->pagelist) &&
++ (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 &&
++ !hugetlb_pmd_shared(pte))) {
++ if (isolate_hugetlb(page, qp->pagelist) &&
+ (flags & MPOL_MF_STRICT))
+ /*
+ * Failed to isolate page but allow migrating pages
+@@ -783,7 +784,6 @@ static int vma_replace_policy(struct vm_area_struct *vma,
+ static int mbind_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, struct mempolicy *new_pol)
+ {
+- struct vm_area_struct *next;
+ struct vm_area_struct *prev;
+ struct vm_area_struct *vma;
+ int err = 0;
+@@ -798,8 +798,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
+ if (start > vma->vm_start)
+ prev = vma;
+
+- for (; vma && vma->vm_start < end; prev = vma, vma = next) {
+- next = vma->vm_next;
++ for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) {
+ vmstart = max(start, vma->vm_start);
+ vmend = min(end, vma->vm_end);
+
+@@ -813,10 +812,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
+ new_pol, vma->vm_userfaultfd_ctx);
+ if (prev) {
+ vma = prev;
+- next = vma->vm_next;
+- if (mpol_equal(vma_policy(vma), new_pol))
+- continue;
+- /* vma_merge() joined vma && vma->next, case 8 */
+ goto replace;
+ }
+ if (vma->vm_start != vmstart) {
+@@ -1395,7 +1390,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
+ unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG);
+ unsigned long t;
+
+- if (get_bitmap(&t, &nmask[maxnode / BITS_PER_LONG], bits))
++ if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits))
+ return -EFAULT;
+
+ if (maxnode - bits >= MAX_NUMNODES) {
+@@ -2140,8 +2135,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
+ * memory with both reclaim and compact as well.
+ */
+ if (!page && (gfp & __GFP_DIRECT_RECLAIM))
+- page = __alloc_pages_node(hpage_node,
+- gfp, order);
++ page = __alloc_pages(gfp, order, hpage_node, nmask);
+
+ goto out;
+ }
+@@ -2568,6 +2562,7 @@ alloc_new:
+ mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
+ if (!mpol_new)
+ goto err_out;
++ atomic_set(&mpol_new->refcnt, 1);
+ goto restart;
+ }
+
+diff --git a/mm/memremap.c b/mm/memremap.c
+index ed593bf87109a..1a7539502bbc0 100644
+--- a/mm/memremap.c
++++ b/mm/memremap.c
+@@ -112,30 +112,6 @@ static unsigned long pfn_next(unsigned long pfn)
+ #define for_each_device_pfn(pfn, map, i) \
+ for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
+
+-static void dev_pagemap_kill(struct dev_pagemap *pgmap)
+-{
+- if (pgmap->ops && pgmap->ops->kill)
+- pgmap->ops->kill(pgmap);
+- else
+- percpu_ref_kill(pgmap->ref);
+-}
+-
+-static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
+-{
+- if (pgmap->ops && pgmap->ops->cleanup) {
+- pgmap->ops->cleanup(pgmap);
+- } else {
+- wait_for_completion(&pgmap->done);
+- percpu_ref_exit(pgmap->ref);
+- }
+- /*
+- * Undo the pgmap ref assignment for the internal case as the
+- * caller may re-enable the same pgmap.
+- */
+- if (pgmap->ref == &pgmap->internal_ref)
+- pgmap->ref = NULL;
+-}
+-
+ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
+ {
+ struct range *range = &pgmap->ranges[range_id];
+@@ -167,14 +143,15 @@ void memunmap_pages(struct dev_pagemap *pgmap)
+ unsigned long pfn;
+ int i;
+
+- dev_pagemap_kill(pgmap);
++ percpu_ref_kill(&pgmap->ref);
+ for (i = 0; i < pgmap->nr_range; i++)
+ for_each_device_pfn(pfn, pgmap, i)
+ put_page(pfn_to_page(pfn));
+- dev_pagemap_cleanup(pgmap);
++ wait_for_completion(&pgmap->done);
+
+ for (i = 0; i < pgmap->nr_range; i++)
+ pageunmap_range(pgmap, i);
++ percpu_ref_exit(&pgmap->ref);
+
+ WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
+ devmap_managed_enable_put(pgmap);
+@@ -188,8 +165,7 @@ static void devm_memremap_pages_release(void *data)
+
+ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
+ {
+- struct dev_pagemap *pgmap =
+- container_of(ref, struct dev_pagemap, internal_ref);
++ struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
+
+ complete(&pgmap->done);
+ }
+@@ -245,7 +221,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
+
+ if (!mhp_range_allowed(range->start, range_len(range), !is_private)) {
+ error = -EINVAL;
+- goto err_pfn_remap;
++ goto err_kasan;
+ }
+
+ mem_hotplug_begin();
+@@ -295,8 +271,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
+ memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
+ PHYS_PFN(range->start),
+ PHYS_PFN(range_len(range)), pgmap);
+- percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id)
+- - pfn_first(pgmap, range_id));
++ percpu_ref_get_many(&pgmap->ref,
++ pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id));
+ return 0;
+
+ err_add_memory:
+@@ -351,6 +327,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
+ WARN(1, "File system DAX not supported\n");
+ return ERR_PTR(-EINVAL);
+ }
++ params.pgprot = pgprot_decrypted(params.pgprot);
+ break;
+ case MEMORY_DEVICE_GENERIC:
+ break;
+@@ -362,22 +339,11 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
+ break;
+ }
+
+- if (!pgmap->ref) {
+- if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
+- return ERR_PTR(-EINVAL);
+-
+- init_completion(&pgmap->done);
+- error = percpu_ref_init(&pgmap->internal_ref,
+- dev_pagemap_percpu_release, 0, GFP_KERNEL);
+- if (error)
+- return ERR_PTR(error);
+- pgmap->ref = &pgmap->internal_ref;
+- } else {
+- if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
+- WARN(1, "Missing reference count teardown definition\n");
+- return ERR_PTR(-EINVAL);
+- }
+- }
++ init_completion(&pgmap->done);
++ error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
++ GFP_KERNEL);
++ if (error)
++ return ERR_PTR(error);
+
+ devmap_managed_enable_get(pgmap);
+
+@@ -486,7 +452,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ /* fall back to slow path lookup */
+ rcu_read_lock();
+ pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
+- if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
++ if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
+ pgmap = NULL;
+ rcu_read_unlock();
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 1852d787e6ab6..dd50b1cc089e0 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -134,7 +134,7 @@ static void putback_movable_page(struct page *page)
+ *
+ * This function shall be used whenever the isolated pageset has been
+ * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
+- * and isolate_huge_page().
++ * and isolate_hugetlb().
+ */
+ void putback_movable_pages(struct list_head *l)
+ {
+@@ -948,9 +948,12 @@ static int move_to_new_page(struct page *newpage, struct page *page,
+ if (!PageMappingFlags(page))
+ page->mapping = NULL;
+
+- if (likely(!is_zone_device_page(newpage)))
+- flush_dcache_page(newpage);
++ if (likely(!is_zone_device_page(newpage))) {
++ int i, nr = compound_nr(newpage);
+
++ for (i = 0; i < nr; i++)
++ flush_dcache_page(newpage + i);
++ }
+ }
+ out:
+ return rc;
+@@ -1719,8 +1722,9 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+
+ if (PageHuge(page)) {
+ if (PageHead(page)) {
+- isolate_huge_page(page, pagelist);
+- err = 1;
++ err = isolate_hugetlb(page, pagelist);
++ if (!err)
++ err = 1;
+ }
+ } else {
+ struct page *head;
+@@ -2419,13 +2423,14 @@ next:
+ migrate->dst[migrate->npages] = 0;
+ migrate->src[migrate->npages++] = mpfn;
+ }
+- arch_leave_lazy_mmu_mode();
+- pte_unmap_unlock(ptep - 1, ptl);
+
+ /* Only flush the TLB if we actually modified any entries */
+ if (unmapped)
+ flush_tlb_range(walk->vma, start, end);
+
++ arch_leave_lazy_mmu_mode();
++ pte_unmap_unlock(ptep - 1, ptl);
++
+ return 0;
+ }
+
+diff --git a/mm/mlock.c b/mm/mlock.c
+index 16d2ee160d43c..0cee3f97d3df7 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -826,18 +826,18 @@ int user_shm_lock(size_t size, struct ucounts *ucounts)
+
+ locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ lock_limit = rlimit(RLIMIT_MEMLOCK);
+- if (lock_limit == RLIM_INFINITY)
+- allowed = 1;
+- lock_limit >>= PAGE_SHIFT;
++ if (lock_limit != RLIM_INFINITY)
++ lock_limit >>= PAGE_SHIFT;
+ spin_lock(&shmlock_user_lock);
+ memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
+
+- if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
++ if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
+ goto out;
+ }
+ if (!get_ucounts(ucounts)) {
+ dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
++ allowed = 0;
+ goto out;
+ }
+ allowed = 1;
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 88dcc5c252255..a0a4eadc8779d 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1684,8 +1684,12 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
+ pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
+ return 0;
+
+- /* Do we need to track softdirty? */
+- if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
++ /*
++ * Do we need to track softdirty? hugetlb does not support softdirty
++ * tracking yet.
++ */
++ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY) &&
++ !is_vm_hugetlb_page(vma))
+ return 1;
+
+ /* Specialty mapping? */
+@@ -1832,7 +1836,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
+ if (!arch_validate_flags(vma->vm_flags)) {
+ error = -EINVAL;
+ if (file)
+- goto unmap_and_free_vma;
++ goto close_and_free_vma;
+ else
+ goto free_vma;
+ }
+@@ -1872,13 +1876,15 @@ out:
+
+ return addr;
+
++close_and_free_vma:
++ if (vma->vm_ops && vma->vm_ops->close)
++ vma->vm_ops->close(vma);
+ unmap_and_free_vma:
+ fput(vma->vm_file);
+ vma->vm_file = NULL;
+
+ /* Undo any partial mapping done by a device driver. */
+ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+- charged = 0;
+ if (vm_flags & VM_SHARED)
+ mapping_unmap_writable(file->f_mapping);
+ free_vma:
+@@ -2113,14 +2119,6 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
+ return addr;
+ }
+
+-#ifndef arch_get_mmap_end
+-#define arch_get_mmap_end(addr) (TASK_SIZE)
+-#endif
+-
+-#ifndef arch_get_mmap_base
+-#define arch_get_mmap_base(addr, base) (base)
+-#endif
+-
+ /* Get an address range which is currently unmapped.
+ * For shmat() with addr=0.
+ *
+@@ -2551,7 +2549,7 @@ static int __init cmdline_parse_stack_guard_gap(char *p)
+ if (!*endptr)
+ stack_guard_gap = val << PAGE_SHIFT;
+
+- return 0;
++ return 1;
+ }
+ __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
+@@ -2643,11 +2641,28 @@ static void unmap_region(struct mm_struct *mm,
+ {
+ struct vm_area_struct *next = vma_next(mm, prev);
+ struct mmu_gather tlb;
++ struct vm_area_struct *cur_vma;
+
+ lru_add_drain();
+ tlb_gather_mmu(&tlb, mm);
+ update_hiwater_rss(mm);
+ unmap_vmas(&tlb, vma, start, end);
++
++ /*
++ * Ensure we have no stale TLB entries by the time this mapping is
++ * removed from the rmap.
++ * Note that we don't have to worry about nested flushes here because
++ * we're holding the mm semaphore for removing the mapping - so any
++ * concurrent flush in this region has to be coming through the rmap,
++ * and we synchronize against that using the rmap lock.
++ */
++ for (cur_vma = vma; cur_vma; cur_vma = cur_vma->vm_next) {
++ if ((cur_vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) {
++ tlb_flush_mmu(&tlb);
++ break;
++ }
++ }
++
+ free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+ next ? next->vm_start : USER_PGTABLES_CEILING);
+ tlb_finish_mmu(&tlb);
+diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
+index 1b9837419bf9c..8be26c7ddb47f 100644
+--- a/mm/mmu_gather.c
++++ b/mm/mmu_gather.c
+@@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg)
+ /* Simply deliver the interrupt */
+ }
+
+-static void tlb_remove_table_sync_one(void)
++void tlb_remove_table_sync_one(void)
+ {
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+@@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
+
+ #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+
+-static void tlb_remove_table_sync_one(void) { }
+-
+ static void tlb_remove_table_free(struct mmu_table_batch *batch)
+ {
+ __tlb_remove_table_free(batch);
+diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
+index 459d195d2ff64..f45ff1b7626a6 100644
+--- a/mm/mmu_notifier.c
++++ b/mm/mmu_notifier.c
+@@ -1036,6 +1036,18 @@ int mmu_interval_notifier_insert_locked(
+ }
+ EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked);
+
++static bool
++mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions,
++ unsigned long seq)
++{
++ bool ret;
++
++ spin_lock(&subscriptions->lock);
++ ret = subscriptions->invalidate_seq != seq;
++ spin_unlock(&subscriptions->lock);
++ return ret;
++}
++
+ /**
+ * mmu_interval_notifier_remove - Remove a interval notifier
+ * @interval_sub: Interval subscription to unregister
+@@ -1083,7 +1095,7 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub)
+ lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+ if (seq)
+ wait_event(subscriptions->wq,
+- READ_ONCE(subscriptions->invalidate_seq) != seq);
++ mmu_interval_seq_released(subscriptions, seq));
+
+ /* pairs with mmgrab in mmu_interval_notifier_insert() */
+ mmdrop(mm);
+diff --git a/mm/mprotect.c b/mm/mprotect.c
+index 883e2cc85cad8..ed18dc49533f6 100644
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -94,7 +94,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+
+ /* Also skip shared copy-on-write pages */
+ if (is_cow_mapping(vma->vm_flags) &&
+- page_mapcount(page) != 1)
++ page_count(page) != 1)
+ continue;
+
+ /*
+diff --git a/mm/mremap.c b/mm/mremap.c
+index badfe17ade1f0..3a3cf4cc2c632 100644
+--- a/mm/mremap.c
++++ b/mm/mremap.c
+@@ -486,6 +486,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
+ pmd_t *old_pmd, *new_pmd;
+ pud_t *old_pud, *new_pud;
+
++ if (!len)
++ return 0;
++
+ old_end = old_addr + len;
+ flush_cache_range(vma, old_addr, old_end);
+
+diff --git a/mm/oom_kill.c b/mm/oom_kill.c
+index 989f35a2bbb1d..262f752d3d516 100644
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -635,7 +635,7 @@ done:
+ */
+ set_bit(MMF_OOM_SKIP, &mm->flags);
+
+- /* Drop a reference taken by wake_oom_reaper */
++ /* Drop a reference taken by queue_oom_reaper */
+ put_task_struct(tsk);
+ }
+
+@@ -645,12 +645,12 @@ static int oom_reaper(void *unused)
+ struct task_struct *tsk = NULL;
+
+ wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
+- spin_lock(&oom_reaper_lock);
++ spin_lock_irq(&oom_reaper_lock);
+ if (oom_reaper_list != NULL) {
+ tsk = oom_reaper_list;
+ oom_reaper_list = tsk->oom_reaper_list;
+ }
+- spin_unlock(&oom_reaper_lock);
++ spin_unlock_irq(&oom_reaper_lock);
+
+ if (tsk)
+ oom_reap_task(tsk);
+@@ -659,22 +659,48 @@ static int oom_reaper(void *unused)
+ return 0;
+ }
+
+-static void wake_oom_reaper(struct task_struct *tsk)
++static void wake_oom_reaper(struct timer_list *timer)
+ {
+- /* mm is already queued? */
+- if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
+- return;
++ struct task_struct *tsk = container_of(timer, struct task_struct,
++ oom_reaper_timer);
++ struct mm_struct *mm = tsk->signal->oom_mm;
++ unsigned long flags;
+
+- get_task_struct(tsk);
++ /* The victim managed to terminate on its own - see exit_mmap */
++ if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
++ put_task_struct(tsk);
++ return;
++ }
+
+- spin_lock(&oom_reaper_lock);
++ spin_lock_irqsave(&oom_reaper_lock, flags);
+ tsk->oom_reaper_list = oom_reaper_list;
+ oom_reaper_list = tsk;
+- spin_unlock(&oom_reaper_lock);
++ spin_unlock_irqrestore(&oom_reaper_lock, flags);
+ trace_wake_reaper(tsk->pid);
+ wake_up(&oom_reaper_wait);
+ }
+
++/*
++ * Give the OOM victim time to exit naturally before invoking the oom_reaping.
++ * The timers timeout is arbitrary... the longer it is, the longer the worst
++ * case scenario for the OOM can take. If it is too small, the oom_reaper can
++ * get in the way and release resources needed by the process exit path.
++ * e.g. The futex robust list can sit in Anon|Private memory that gets reaped
++ * before the exit path is able to wake the futex waiters.
++ */
++#define OOM_REAPER_DELAY (2*HZ)
++static void queue_oom_reaper(struct task_struct *tsk)
++{
++ /* mm is already queued? */
++ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
++ return;
++
++ get_task_struct(tsk);
++ timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0);
++ tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY;
++ add_timer(&tsk->oom_reaper_timer);
++}
++
+ static int __init oom_init(void)
+ {
+ oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
+@@ -682,7 +708,7 @@ static int __init oom_init(void)
+ }
+ subsys_initcall(oom_init)
+ #else
+-static inline void wake_oom_reaper(struct task_struct *tsk)
++static inline void queue_oom_reaper(struct task_struct *tsk)
+ {
+ }
+ #endif /* CONFIG_MMU */
+@@ -933,7 +959,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
+ rcu_read_unlock();
+
+ if (can_oom_reap)
+- wake_oom_reaper(victim);
++ queue_oom_reaper(victim);
+
+ mmdrop(mm);
+ put_task_struct(victim);
+@@ -969,7 +995,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
+ task_lock(victim);
+ if (task_will_free_mem(victim)) {
+ mark_oom_victim(victim);
+- wake_oom_reaper(victim);
++ queue_oom_reaper(victim);
+ task_unlock(victim);
+ put_task_struct(victim);
+ return;
+@@ -1067,7 +1093,7 @@ bool out_of_memory(struct oom_control *oc)
+ */
+ if (task_will_free_mem(current)) {
+ mark_oom_victim(current);
+- wake_oom_reaper(current);
++ queue_oom_reaper(current);
+ return true;
+ }
+
+@@ -1120,27 +1146,24 @@ bool out_of_memory(struct oom_control *oc)
+ }
+
+ /*
+- * The pagefault handler calls here because it is out of memory, so kill a
+- * memory-hogging task. If oom_lock is held by somebody else, a parallel oom
+- * killing is already in progress so do nothing.
++ * The pagefault handler calls here because some allocation has failed. We have
++ * to take care of the memcg OOM here because this is the only safe context without
++ * any locks held but let the oom killer triggered from the allocation context care
++ * about the global OOM.
+ */
+ void pagefault_out_of_memory(void)
+ {
+- struct oom_control oc = {
+- .zonelist = NULL,
+- .nodemask = NULL,
+- .memcg = NULL,
+- .gfp_mask = 0,
+- .order = 0,
+- };
++ static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL,
++ DEFAULT_RATELIMIT_BURST);
+
+ if (mem_cgroup_oom_synchronize(true))
+ return;
+
+- if (!mutex_trylock(&oom_lock))
++ if (fatal_signal_pending(current))
+ return;
+- out_of_memory(&oc);
+- mutex_unlock(&oom_lock);
++
++ if (__ratelimit(&pfoom_rs))
++ pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n");
+ }
+
+ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
+diff --git a/mm/page-writeback.c b/mm/page-writeback.c
+index 4812a17b288c5..8ca6617b2a723 100644
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -2755,6 +2755,7 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb)
+
+ static void wb_inode_writeback_end(struct bdi_writeback *wb)
+ {
++ unsigned long flags;
+ atomic_dec(&wb->writeback_inodes);
+ /*
+ * Make sure estimate of writeback throughput gets updated after
+@@ -2763,7 +2764,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)
+ * that if multiple inodes end writeback at a similar time, they get
+ * batched into one bandwidth update.
+ */
+- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++ spin_lock_irqsave(&wb->work_lock, flags);
++ if (test_bit(WB_registered, &wb->state))
++ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
++ spin_unlock_irqrestore(&wb->work_lock, flags);
+ }
+
+ int test_clear_page_writeback(struct page *page)
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 23d3339ac4e8e..f320ee2bd34a7 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3928,11 +3928,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
+ * need to be calculated.
+ */
+ if (!order) {
+- long fast_free;
++ long usable_free;
++ long reserved;
+
+- fast_free = free_pages;
+- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
+- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
++ usable_free = free_pages;
++ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
++
++ /* reserved may over estimate high-atomic reserves. */
++ usable_free -= min(usable_free, reserved);
++ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
+ return true;
+ }
+
+@@ -4210,7 +4214,9 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
+ va_list args;
+ static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1);
+
+- if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
++ if ((gfp_mask & __GFP_NOWARN) ||
++ !__ratelimit(&nopage_rs) ||
++ ((gfp_mask & __GFP_DMA) && !has_managed_dma()))
+ return;
+
+ va_start(args, fmt);
+@@ -4575,6 +4581,30 @@ void fs_reclaim_release(gfp_t gfp_mask)
+ EXPORT_SYMBOL_GPL(fs_reclaim_release);
+ #endif
+
++/*
++ * Zonelists may change due to hotplug during allocation. Detect when zonelists
++ * have been rebuilt so allocation retries. Reader side does not lock and
++ * retries the allocation if zonelist changes. Writer side is protected by the
++ * embedded spin_lock.
++ */
++static DEFINE_SEQLOCK(zonelist_update_seq);
++
++static unsigned int zonelist_iter_begin(void)
++{
++ if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
++ return read_seqbegin(&zonelist_update_seq);
++
++ return 0;
++}
++
++static unsigned int check_retry_zonelist(unsigned int seq)
++{
++ if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
++ return read_seqretry(&zonelist_update_seq, seq);
++
++ return seq;
++}
++
+ /* Perform direct synchronous page reclaim */
+ static unsigned long
+ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
+@@ -4882,6 +4912,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ int compaction_retries;
+ int no_progress_loops;
+ unsigned int cpuset_mems_cookie;
++ unsigned int zonelist_iter_cookie;
+ int reserve_flags;
+
+ /*
+@@ -4892,11 +4923,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+ gfp_mask &= ~__GFP_ATOMIC;
+
+-retry_cpuset:
++restart:
+ compaction_retries = 0;
+ no_progress_loops = 0;
+ compact_priority = DEF_COMPACT_PRIORITY;
+ cpuset_mems_cookie = read_mems_allowed_begin();
++ zonelist_iter_cookie = zonelist_iter_begin();
+
+ /*
+ * The fast path uses conservative alloc_flags to succeed only until
+@@ -5055,9 +5087,13 @@ retry:
+ goto retry;
+
+
+- /* Deal with possible cpuset update races before we start OOM killing */
+- if (check_retry_cpuset(cpuset_mems_cookie, ac))
+- goto retry_cpuset;
++ /*
++ * Deal with possible cpuset update races or zonelist updates to avoid
++ * a unnecessary OOM kill.
++ */
++ if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
++ check_retry_zonelist(zonelist_iter_cookie))
++ goto restart;
+
+ /* Reclaim has failed us, start killing things */
+ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
+@@ -5077,9 +5113,13 @@ retry:
+ }
+
+ nopage:
+- /* Deal with possible cpuset update races before we fail */
+- if (check_retry_cpuset(cpuset_mems_cookie, ac))
+- goto retry_cpuset;
++ /*
++ * Deal with possible cpuset update races or zonelist updates to avoid
++ * a unnecessary OOM kill.
++ */
++ if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
++ check_retry_zonelist(zonelist_iter_cookie))
++ goto restart;
+
+ /*
+ * Make sure that __GFP_NOFAIL request doesn't leak out and make sure
+@@ -5297,8 +5337,8 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+ page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
+ pcp, pcp_list);
+ if (unlikely(!page)) {
+- /* Try and get at least one page */
+- if (!nr_populated)
++ /* Try and allocate at least one page */
++ if (!nr_account)
+ goto failed_irq;
+ break;
+ }
+@@ -5450,9 +5490,12 @@ EXPORT_SYMBOL(get_zeroed_page);
+ */
+ void __free_pages(struct page *page, unsigned int order)
+ {
++ /* get PageHead before we drop reference */
++ int head = PageHead(page);
++
+ if (put_page_testzero(page))
+ free_the_page(page, order);
+- else if (!PageHead(page))
++ else if (!head)
+ while (order-- > 0)
+ free_the_page(page + (1 << order), order);
+ }
+@@ -5560,6 +5603,18 @@ refill:
+ /* reset page count bias and offset to start of new frag */
+ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
+ offset = size - fragsz;
++ if (unlikely(offset < 0)) {
++ /*
++ * The caller is trying to allocate a fragment
++ * with fragsz > PAGE_SIZE but the cache isn't big
++ * enough to satisfy the request, this may
++ * happen in low memory conditions.
++ * We don't release the cache page because
++ * it could make memory pressure worse
++ * so we simply return NULL here.
++ */
++ return NULL;
++ }
+ }
+
+ nc->pagecnt_bias--;
+@@ -6090,7 +6145,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs)
+ do {
+ zone_type--;
+ zone = pgdat->node_zones + zone_type;
+- if (managed_zone(zone)) {
++ if (populated_zone(zone)) {
+ zoneref_set_zone(zone, &zonerefs[nr_zones++]);
+ check_highest_zone(zone_type);
+ }
+@@ -6361,9 +6416,22 @@ static void __build_all_zonelists(void *data)
+ int nid;
+ int __maybe_unused cpu;
+ pg_data_t *self = data;
+- static DEFINE_SPINLOCK(lock);
++ unsigned long flags;
+
+- spin_lock(&lock);
++ /*
++ * Explicitly disable this CPU's interrupts before taking seqlock
++ * to prevent any IRQ handler from calling into the page allocator
++ * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
++ */
++ local_irq_save(flags);
++ /*
++ * Explicitly disable this CPU's synchronous printk() before taking
++ * seqlock to prevent any printk() from trying to hold port->lock, for
++ * tty_insert_flip_string_and_push_buffer() on other CPU might be
++ * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
++ */
++ printk_deferred_enter();
++ write_seqlock(&zonelist_update_seq);
+
+ #ifdef CONFIG_NUMA
+ memset(node_load, 0, sizeof(node_load));
+@@ -6396,7 +6464,9 @@ static void __build_all_zonelists(void *data)
+ #endif
+ }
+
+- spin_unlock(&lock);
++ write_sequnlock(&zonelist_update_seq);
++ printk_deferred_exit();
++ local_irq_restore(flags);
+ }
+
+ static noinline void __init
+@@ -7897,10 +7967,17 @@ restart:
+
+ out2:
+ /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
+- for (nid = 0; nid < MAX_NUMNODES; nid++)
++ for (nid = 0; nid < MAX_NUMNODES; nid++) {
++ unsigned long start_pfn, end_pfn;
++
+ zone_movable_pfn[nid] =
+ roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
+
++ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
++ if (zone_movable_pfn[nid] >= end_pfn)
++ zone_movable_pfn[nid] = 0;
++ }
++
+ out:
+ /* restore the node_state */
+ node_states[N_MEMORY] = saved_node_state;
+@@ -8160,7 +8237,7 @@ void __init mem_init_print_info(void)
+ */
+ #define adj_init_size(start, end, size, pos, adj) \
+ do { \
+- if (start <= pos && pos < end && size > adj) \
++ if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \
+ size -= adj; \
+ } while (0)
+
+@@ -9178,6 +9255,9 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
+
+ if (PageReserved(page))
+ return false;
++
++ if (PageHuge(page))
++ return false;
+ }
+ return true;
+ }
+@@ -9449,3 +9529,18 @@ bool take_page_off_buddy(struct page *page)
+ return ret;
+ }
+ #endif
++
++#ifdef CONFIG_ZONE_DMA
++bool has_managed_dma(void)
++{
++ struct pglist_data *pgdat;
++
++ for_each_online_pgdat(pgdat) {
++ struct zone *zone = &pgdat->node_zones[ZONE_DMA];
++
++ if (managed_zone(zone))
++ return true;
++ }
++ return false;
++}
++#endif /* CONFIG_ZONE_DMA */
+diff --git a/mm/page_io.c b/mm/page_io.c
+index c493ce9ebcf50..66c6fbb07bc4c 100644
+--- a/mm/page_io.c
++++ b/mm/page_io.c
+@@ -50,54 +50,6 @@ void end_swap_bio_write(struct bio *bio)
+ bio_put(bio);
+ }
+
+-static void swap_slot_free_notify(struct page *page)
+-{
+- struct swap_info_struct *sis;
+- struct gendisk *disk;
+- swp_entry_t entry;
+-
+- /*
+- * There is no guarantee that the page is in swap cache - the software
+- * suspend code (at least) uses end_swap_bio_read() against a non-
+- * swapcache page. So we must check PG_swapcache before proceeding with
+- * this optimization.
+- */
+- if (unlikely(!PageSwapCache(page)))
+- return;
+-
+- sis = page_swap_info(page);
+- if (data_race(!(sis->flags & SWP_BLKDEV)))
+- return;
+-
+- /*
+- * The swap subsystem performs lazy swap slot freeing,
+- * expecting that the page will be swapped out again.
+- * So we can avoid an unnecessary write if the page
+- * isn't redirtied.
+- * This is good for real swap storage because we can
+- * reduce unnecessary I/O and enhance wear-leveling
+- * if an SSD is used as the as swap device.
+- * But if in-memory swap device (eg zram) is used,
+- * this causes a duplicated copy between uncompressed
+- * data in VM-owned memory and compressed data in
+- * zram-owned memory. So let's free zram-owned memory
+- * and make the VM-owned decompressed page *dirty*,
+- * so the page should be swapped out somewhere again if
+- * we again wish to reclaim it.
+- */
+- disk = sis->bdev->bd_disk;
+- entry.val = page_private(page);
+- if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
+- unsigned long offset;
+-
+- offset = swp_offset(entry);
+-
+- SetPageDirty(page);
+- disk->fops->swap_slot_free_notify(sis->bdev,
+- offset);
+- }
+-}
+-
+ static void end_swap_bio_read(struct bio *bio)
+ {
+ struct page *page = bio_first_page_all(bio);
+@@ -113,7 +65,6 @@ static void end_swap_bio_read(struct bio *bio)
+ }
+
+ SetPageUptodate(page);
+- swap_slot_free_notify(page);
+ out:
+ unlock_page(page);
+ WRITE_ONCE(bio->bi_private, NULL);
+@@ -392,11 +343,6 @@ int swap_readpage(struct page *page, bool synchronous)
+ if (sis->flags & SWP_SYNCHRONOUS_IO) {
+ ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
+ if (!ret) {
+- if (trylock_page(page)) {
+- swap_slot_free_notify(page);
+- unlock_page(page);
+- }
+-
+ count_vm_event(PSWPIN);
+ goto out;
+ }
+diff --git a/mm/pagewalk.c b/mm/pagewalk.c
+index 9b3db11a4d1db..fa7a3d21a7518 100644
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -110,7 +110,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
+ do {
+ again:
+ next = pmd_addr_end(addr, end);
+- if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
++ if (pmd_none(*pmd)) {
+ if (ops->pte_hole)
+ err = ops->pte_hole(addr, next, depth, walk);
+ if (err)
+@@ -171,7 +171,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
+ do {
+ again:
+ next = pud_addr_end(addr, end);
+- if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
++ if (pud_none(*pud)) {
+ if (ops->pte_hole)
+ err = ops->pte_hole(addr, next, depth, walk);
+ if (err)
+@@ -366,19 +366,19 @@ static int __walk_page_range(unsigned long start, unsigned long end,
+ struct vm_area_struct *vma = walk->vma;
+ const struct mm_walk_ops *ops = walk->ops;
+
+- if (vma && ops->pre_vma) {
++ if (ops->pre_vma) {
+ err = ops->pre_vma(start, end, walk);
+ if (err)
+ return err;
+ }
+
+- if (vma && is_vm_hugetlb_page(vma)) {
++ if (is_vm_hugetlb_page(vma)) {
+ if (ops->hugetlb_entry)
+ err = walk_hugetlb_range(start, end, walk);
+ } else
+ err = walk_pgd_range(start, end, walk);
+
+- if (vma && ops->post_vma)
++ if (ops->post_vma)
+ ops->post_vma(walk);
+
+ return err;
+@@ -450,9 +450,13 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
+ if (!vma) { /* after the last vma */
+ walk.vma = NULL;
+ next = end;
++ if (ops->pte_hole)
++ err = ops->pte_hole(start, next, -1, &walk);
+ } else if (start < vma->vm_start) { /* outside vma */
+ walk.vma = NULL;
+ next = min(end, vma->vm_start);
++ if (ops->pte_hole)
++ err = ops->pte_hole(start, next, -1, &walk);
+ } else { /* inside vma */
+ walk.vma = vma;
+ next = min(end, vma->vm_end);
+@@ -470,9 +474,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
+ }
+ if (err < 0)
+ break;
+- }
+- if (walk.vma || walk.ops->pte_hole)
+ err = __walk_page_range(start, next, &walk);
++ }
+ if (err)
+ break;
+ } while (start = next, start < end);
+@@ -501,9 +504,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
+ if (start >= end || !walk.mm)
+ return -EINVAL;
+
+- mmap_assert_locked(walk.mm);
++ mmap_assert_write_locked(walk.mm);
+
+- return __walk_page_range(start, end, &walk);
++ return walk_pgd_range(start, end, &walk);
+ }
+
+ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
+diff --git a/mm/ptdump.c b/mm/ptdump.c
+index da751448d0e4e..f84ea700662fc 100644
+--- a/mm/ptdump.c
++++ b/mm/ptdump.c
+@@ -144,13 +144,13 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
+ {
+ const struct ptdump_range *range = st->range;
+
+- mmap_read_lock(mm);
++ mmap_write_lock(mm);
+ while (range->start != range->end) {
+ walk_page_range_novma(mm, range->start, range->end,
+ &ptdump_ops, pgd, st);
+ range++;
+ }
+- mmap_read_unlock(mm);
++ mmap_write_unlock(mm);
+
+ /* Flush out the last page */
+ st->note_page(st, 0, -1, 0);
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 6aebd17472512..330b361a460ea 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -90,7 +90,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
+ anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+ if (anon_vma) {
+ atomic_set(&anon_vma->refcount, 1);
+- anon_vma->degree = 1; /* Reference for first vma */
++ anon_vma->num_children = 0;
++ anon_vma->num_active_vmas = 0;
+ anon_vma->parent = anon_vma;
+ /*
+ * Initialise the anon_vma root to point to itself. If called
+@@ -198,6 +199,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
+ anon_vma = anon_vma_alloc();
+ if (unlikely(!anon_vma))
+ goto out_enomem_free_avc;
++ anon_vma->num_children++; /* self-parent link for new root */
+ allocated = anon_vma;
+ }
+
+@@ -207,8 +209,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
+ if (likely(!vma->anon_vma)) {
+ vma->anon_vma = anon_vma;
+ anon_vma_chain_link(vma, avc, anon_vma);
+- /* vma reference or self-parent link for new root */
+- anon_vma->degree++;
++ anon_vma->num_active_vmas++;
+ allocated = NULL;
+ avc = NULL;
+ }
+@@ -293,19 +294,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
+ anon_vma_chain_link(dst, avc, anon_vma);
+
+ /*
+- * Reuse existing anon_vma if its degree lower than two,
+- * that means it has no vma and only one anon_vma child.
++ * Reuse existing anon_vma if it has no vma and only one
++ * anon_vma child.
+ *
+- * Do not chose parent anon_vma, otherwise first child
+- * will always reuse it. Root anon_vma is never reused:
++ * Root anon_vma is never reused:
+ * it has self-parent reference and at least one child.
+ */
+ if (!dst->anon_vma && src->anon_vma &&
+- anon_vma != src->anon_vma && anon_vma->degree < 2)
++ anon_vma->num_children < 2 &&
++ anon_vma->num_active_vmas == 0)
+ dst->anon_vma = anon_vma;
+ }
+ if (dst->anon_vma)
+- dst->anon_vma->degree++;
++ dst->anon_vma->num_active_vmas++;
+ unlock_anon_vma_root(root);
+ return 0;
+
+@@ -355,6 +356,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
+ anon_vma = anon_vma_alloc();
+ if (!anon_vma)
+ goto out_error;
++ anon_vma->num_active_vmas++;
+ avc = anon_vma_chain_alloc(GFP_KERNEL);
+ if (!avc)
+ goto out_error_free_anon_vma;
+@@ -375,7 +377,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
+ vma->anon_vma = anon_vma;
+ anon_vma_lock_write(anon_vma);
+ anon_vma_chain_link(vma, avc, anon_vma);
+- anon_vma->parent->degree++;
++ anon_vma->parent->num_children++;
+ anon_vma_unlock_write(anon_vma);
+
+ return 0;
+@@ -407,7 +409,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
+ * to free them outside the lock.
+ */
+ if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
+- anon_vma->parent->degree--;
++ anon_vma->parent->num_children--;
+ continue;
+ }
+
+@@ -415,7 +417,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
+ anon_vma_chain_free(avc);
+ }
+ if (vma->anon_vma) {
+- vma->anon_vma->degree--;
++ vma->anon_vma->num_active_vmas--;
+
+ /*
+ * vma would still be needed after unlink, and anon_vma will be prepared
+@@ -433,7 +435,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
+ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
+ struct anon_vma *anon_vma = avc->anon_vma;
+
+- VM_WARN_ON(anon_vma->degree);
++ VM_WARN_ON(anon_vma->num_children);
++ VM_WARN_ON(anon_vma->num_active_vmas);
+ put_anon_vma(anon_vma);
+
+ list_del(&avc->same_vma);
+@@ -1570,7 +1573,30 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+
+ /* MADV_FREE page check */
+ if (!PageSwapBacked(page)) {
+- if (!PageDirty(page)) {
++ int ref_count, map_count;
++
++ /*
++ * Synchronize with gup_pte_range():
++ * - clear PTE; barrier; read refcount
++ * - inc refcount; barrier; read PTE
++ */
++ smp_mb();
++
++ ref_count = page_ref_count(page);
++ map_count = page_mapcount(page);
++
++ /*
++ * Order reads for page refcount and dirty flag
++ * (see comments in __remove_mapping()).
++ */
++ smp_rmb();
++
++ /*
++ * The only page refs must be one from isolation
++ * plus the rmap(s) (dropped by discard:).
++ */
++ if (ref_count == 1 + map_count &&
++ !PageDirty(page)) {
+ /* Invalidate as we cleared the pte */
+ mmu_notifier_invalidate_range(mm,
+ address, address + PAGE_SIZE);
+diff --git a/mm/secretmem.c b/mm/secretmem.c
+index 22b310adb53d9..d1986ce2e7c77 100644
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
+ gfp_t gfp = vmf->gfp_mask;
+ unsigned long addr;
+ struct page *page;
++ vm_fault_t ret;
+ int err;
+
+ if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
+ return vmf_error(-EINVAL);
+
++ filemap_invalidate_lock_shared(mapping);
++
+ retry:
+ page = find_lock_page(mapping, offset);
+ if (!page) {
+ page = alloc_page(gfp | __GFP_ZERO);
+- if (!page)
+- return VM_FAULT_OOM;
++ if (!page) {
++ ret = VM_FAULT_OOM;
++ goto out;
++ }
+
+ err = set_direct_map_invalid_noflush(page);
+ if (err) {
+ put_page(page);
+- return vmf_error(err);
++ ret = vmf_error(err);
++ goto out;
+ }
+
+ __SetPageUptodate(page);
+@@ -86,7 +92,8 @@ retry:
+ if (err == -EEXIST)
+ goto retry;
+
+- return vmf_error(err);
++ ret = vmf_error(err);
++ goto out;
+ }
+
+ addr = (unsigned long)page_address(page);
+@@ -94,7 +101,11 @@ retry:
+ }
+
+ vmf->page = page;
+- return VM_FAULT_LOCKED;
++ ret = VM_FAULT_LOCKED;
++
++out:
++ filemap_invalidate_unlock_shared(mapping);
++ return ret;
+ }
+
+ static const struct vm_operations_struct secretmem_vm_ops = {
+@@ -158,6 +169,30 @@ const struct address_space_operations secretmem_aops = {
+ .isolate_page = secretmem_isolate_page,
+ };
+
++static int secretmem_setattr(struct user_namespace *mnt_userns,
++ struct dentry *dentry, struct iattr *iattr)
++{
++ struct inode *inode = d_inode(dentry);
++ struct address_space *mapping = inode->i_mapping;
++ unsigned int ia_valid = iattr->ia_valid;
++ int ret;
++
++ filemap_invalidate_lock(mapping);
++
++ if ((ia_valid & ATTR_SIZE) && inode->i_size)
++ ret = -EINVAL;
++ else
++ ret = simple_setattr(mnt_userns, dentry, iattr);
++
++ filemap_invalidate_unlock(mapping);
++
++ return ret;
++}
++
++static const struct inode_operations secretmem_iops = {
++ .setattr = secretmem_setattr,
++};
++
+ static struct vfsmount *secretmem_mnt;
+
+ static struct file *secretmem_file_create(unsigned long flags)
+@@ -177,6 +212,7 @@ static struct file *secretmem_file_create(unsigned long flags)
+ mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+ mapping_set_unevictable(inode->i_mapping);
+
++ inode->i_op = &secretmem_iops;
+ inode->i_mapping->a_ops = &secretmem_aops;
+
+ /* pretend we are a normal file with zero size */
+@@ -247,7 +283,7 @@ static int secretmem_init(void)
+
+ secretmem_mnt = kern_mount(&secretmem_fs);
+ if (IS_ERR(secretmem_mnt))
+- ret = PTR_ERR(secretmem_mnt);
++ return PTR_ERR(secretmem_mnt);
+
+ /* prevent secretmem mappings from ever getting PROT_EXEC */
+ secretmem_mnt->mnt_flags |= MNT_NOEXEC;
+diff --git a/mm/shmem.c b/mm/shmem.c
+index b5860f4a2738e..663fb117cd877 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -555,7 +555,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+ struct shmem_inode_info *info;
+ struct page *page;
+ unsigned long batch = sc ? sc->nr_to_scan : 128;
+- int removed = 0, split = 0;
++ int split = 0;
+
+ if (list_empty(&sbinfo->shrinklist))
+ return SHRINK_STOP;
+@@ -570,7 +570,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+ /* inode is about to be evicted */
+ if (!inode) {
+ list_del_init(&info->shrinklist);
+- removed++;
+ goto next;
+ }
+
+@@ -578,12 +577,12 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
+ if (round_up(inode->i_size, PAGE_SIZE) ==
+ round_up(inode->i_size, HPAGE_PMD_SIZE)) {
+ list_move(&info->shrinklist, &to_remove);
+- removed++;
+ goto next;
+ }
+
+ list_move(&info->shrinklist, &list);
+ next:
++ sbinfo->shrinklist_len--;
+ if (!--batch)
+ break;
+ }
+@@ -603,7 +602,7 @@ next:
+ inode = &info->vfs_inode;
+
+ if (nr_to_split && split >= nr_to_split)
+- goto leave;
++ goto move_back;
+
+ page = find_get_page(inode->i_mapping,
+ (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
+@@ -617,38 +616,44 @@ next:
+ }
+
+ /*
+- * Leave the inode on the list if we failed to lock
+- * the page at this time.
++ * Move the inode on the list back to shrinklist if we failed
++ * to lock the page at this time.
+ *
+ * Waiting for the lock may lead to deadlock in the
+ * reclaim path.
+ */
+ if (!trylock_page(page)) {
+ put_page(page);
+- goto leave;
++ goto move_back;
+ }
+
+ ret = split_huge_page(page);
+ unlock_page(page);
+ put_page(page);
+
+- /* If split failed leave the inode on the list */
++ /* If split failed move the inode on the list back to shrinklist */
+ if (ret)
+- goto leave;
++ goto move_back;
+
+ split++;
+ drop:
+ list_del_init(&info->shrinklist);
+- removed++;
+-leave:
++ goto put;
++move_back:
++ /*
++ * Make sure the inode is either on the global list or deleted
++ * from any local list before iput() since it could be deleted
++ * in another thread once we put the inode (then the local list
++ * is corrupted).
++ */
++ spin_lock(&sbinfo->shrinklist_lock);
++ list_move(&info->shrinklist, &sbinfo->shrinklist);
++ sbinfo->shrinklist_len++;
++ spin_unlock(&sbinfo->shrinklist_lock);
++put:
+ iput(inode);
+ }
+
+- spin_lock(&sbinfo->shrinklist_lock);
+- list_splice_tail(&list, &sbinfo->shrinklist);
+- sbinfo->shrinklist_len -= removed;
+- spin_unlock(&sbinfo->shrinklist_lock);
+-
+ return split;
+ }
+
+@@ -2389,8 +2394,10 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
+ /* don't free the page */
+ goto out_unacct_blocks;
+ }
++
++ flush_dcache_page(page);
+ } else { /* ZEROPAGE */
+- clear_highpage(page);
++ clear_user_highpage(page, dst_addr);
+ }
+ } else {
+ page = *pagep;
+@@ -2456,6 +2463,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
+ struct inode *inode = mapping->host;
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ pgoff_t index = pos >> PAGE_SHIFT;
++ int ret = 0;
+
+ /* i_rwsem is held by caller */
+ if (unlikely(info->seals & (F_SEAL_GROW |
+@@ -2466,7 +2474,19 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
+ return -EPERM;
+ }
+
+- return shmem_getpage(inode, index, pagep, SGP_WRITE);
++ ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
++
++ if (ret)
++ return ret;
++
++ if (PageHWPoison(*pagep)) {
++ unlock_page(*pagep);
++ put_page(*pagep);
++ *pagep = NULL;
++ return -EIO;
++ }
++
++ return 0;
+ }
+
+ static int
+@@ -2553,6 +2573,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ if (sgp == SGP_CACHE)
+ set_page_dirty(page);
+ unlock_page(page);
++
++ if (PageHWPoison(page)) {
++ put_page(page);
++ error = -EIO;
++ break;
++ }
+ }
+
+ /*
+@@ -3114,7 +3140,8 @@ static const char *shmem_get_link(struct dentry *dentry,
+ page = find_get_page(inode->i_mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+- if (!PageUptodate(page)) {
++ if (PageHWPoison(page) ||
++ !PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+@@ -3122,6 +3149,13 @@ static const char *shmem_get_link(struct dentry *dentry,
+ error = shmem_getpage(inode, 0, &page, SGP_READ);
+ if (error)
+ return ERR_PTR(error);
++ if (!page)
++ return ERR_PTR(-ECHILD);
++ if (PageHWPoison(page)) {
++ unlock_page(page);
++ put_page(page);
++ return ERR_PTR(-ECHILD);
++ }
+ unlock_page(page);
+ }
+ set_delayed_call(done, shmem_put_link, page);
+@@ -3360,6 +3394,8 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
+ unsigned long long size;
+ char *rest;
+ int opt;
++ kuid_t kuid;
++ kgid_t kgid;
+
+ opt = fs_parse(fc, shmem_fs_parameters, param, &result);
+ if (opt < 0)
+@@ -3395,14 +3431,32 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
+ ctx->mode = result.uint_32 & 07777;
+ break;
+ case Opt_uid:
+- ctx->uid = make_kuid(current_user_ns(), result.uint_32);
+- if (!uid_valid(ctx->uid))
++ kuid = make_kuid(current_user_ns(), result.uint_32);
++ if (!uid_valid(kuid))
++ goto bad_value;
++
++ /*
++ * The requested uid must be representable in the
++ * filesystem's idmapping.
++ */
++ if (!kuid_has_mapping(fc->user_ns, kuid))
+ goto bad_value;
++
++ ctx->uid = kuid;
+ break;
+ case Opt_gid:
+- ctx->gid = make_kgid(current_user_ns(), result.uint_32);
+- if (!gid_valid(ctx->gid))
++ kgid = make_kgid(current_user_ns(), result.uint_32);
++ if (!gid_valid(kgid))
++ goto bad_value;
++
++ /*
++ * The requested gid must be representable in the
++ * filesystem's idmapping.
++ */
++ if (!kgid_has_mapping(fc->user_ns, kgid))
+ goto bad_value;
++
++ ctx->gid = kgid;
+ break;
+ case Opt_huge:
+ ctx->huge = result.uint_32;
+@@ -3772,6 +3826,13 @@ static void shmem_destroy_inodecache(void)
+ kmem_cache_destroy(shmem_inode_cachep);
+ }
+
++/* Keep the page in page cache instead of truncating it */
++static int shmem_error_remove_page(struct address_space *mapping,
++ struct page *page)
++{
++ return 0;
++}
++
+ const struct address_space_operations shmem_aops = {
+ .writepage = shmem_writepage,
+ .set_page_dirty = __set_page_dirty_no_writeback,
+@@ -3782,7 +3843,7 @@ const struct address_space_operations shmem_aops = {
+ #ifdef CONFIG_MIGRATION
+ .migratepage = migrate_page,
+ #endif
+- .error_remove_page = generic_error_remove_page,
++ .error_remove_page = shmem_error_remove_page,
+ };
+ EXPORT_SYMBOL(shmem_aops);
+
+@@ -4002,7 +4063,7 @@ static struct file_system_type shmem_fs_type = {
+ .name = "tmpfs",
+ .init_fs_context = ramfs_init_fs_context,
+ .parameters = ramfs_fs_parameters,
+- .kill_sb = kill_litter_super,
++ .kill_sb = ramfs_kill_sb,
+ .fs_flags = FS_USERNS_MOUNT,
+ };
+
+@@ -4190,9 +4251,14 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+ error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
+ gfp, NULL, NULL, NULL);
+ if (error)
+- page = ERR_PTR(error);
+- else
+- unlock_page(page);
++ return ERR_PTR(error);
++
++ unlock_page(page);
++ if (PageHWPoison(page)) {
++ put_page(page);
++ return ERR_PTR(-EIO);
++ }
++
+ return page;
+ #else
+ /*
+diff --git a/mm/slab.c b/mm/slab.c
+index 874b3f8fe80da..f5b2246f832da 100644
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -855,7 +855,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
+ return 0;
+ }
+
+-#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
++#if defined(CONFIG_NUMA) || defined(CONFIG_SMP)
+ /*
+ * Allocates and initializes node for a node on each slab cache, used for
+ * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
+@@ -3429,6 +3429,7 @@ static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
+
+ if (is_kfence_address(objp)) {
+ kmemleak_free_recursive(objp, cachep->flags);
++ memcg_slab_free_hook(cachep, &objp, 1);
+ __kfence_free(objp);
+ return;
+ }
+@@ -3657,7 +3658,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
+ #endif /* CONFIG_NUMA */
+
+ #ifdef CONFIG_PRINTK
+-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
++void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
+ {
+ struct kmem_cache *cachep;
+ unsigned int objnr;
+diff --git a/mm/slab.h b/mm/slab.h
+index 58c01a34e5b86..1ae1bdd485c17 100644
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -147,7 +147,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
+ #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
+ SLAB_TEMPORARY | SLAB_ACCOUNT)
+ #else
+-#define SLAB_CACHE_FLAGS (0)
++#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
+ #endif
+
+ /* Common flags available with current configuration */
+@@ -643,7 +643,7 @@ struct kmem_obj_info {
+ void *kp_stack[KS_ADDRS_COUNT];
+ void *kp_free_stack[KS_ADDRS_COUNT];
+ };
+-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
++void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
+ #endif
+
+ #endif /* MM_SLAB_H */
+diff --git a/mm/slab_common.c b/mm/slab_common.c
+index ec2bb0beed757..022319e7deaf7 100644
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -568,6 +568,13 @@ bool kmem_valid_obj(void *object)
+ }
+ EXPORT_SYMBOL_GPL(kmem_valid_obj);
+
++static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
++{
++ if (__kfence_obj_info(kpp, object, page))
++ return;
++ __kmem_obj_info(kpp, object, page);
++}
++
+ /**
+ * kmem_dump_obj - Print available slab provenance information
+ * @object: slab object for which to find provenance information.
+@@ -603,6 +610,8 @@ void kmem_dump_obj(void *object)
+ pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
+ else
+ pr_cont(" slab%s", cp);
++ if (is_kfence_address(object))
++ pr_cont(" (kfence)");
+ if (kp.kp_objp)
+ pr_cont(" start %px", kp.kp_objp);
+ if (kp.kp_data_offset)
+diff --git a/mm/slob.c b/mm/slob.c
+index 74d3f6e60666e..f3fc15df971af 100644
+--- a/mm/slob.c
++++ b/mm/slob.c
+@@ -462,7 +462,7 @@ out:
+ }
+
+ #ifdef CONFIG_PRINTK
+-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
++void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
+ {
+ kpp->kp_ptr = object;
+ kpp->kp_page = page;
+diff --git a/mm/slub.c b/mm/slub.c
+index d8f77346376d8..f95ae136a0698 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -308,6 +308,11 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
+ */
+ static nodemask_t slab_nodes;
+
++/*
++ * Workqueue used for flush_cpu_slab().
++ */
++static struct workqueue_struct *flushwq;
++
+ /********************************************************************
+ * Core slab cache functions
+ *******************************************************************/
+@@ -2688,7 +2693,7 @@ static void flush_all_cpus_locked(struct kmem_cache *s)
+ INIT_WORK(&sfw->work, flush_cpu_slab);
+ sfw->skip = false;
+ sfw->s = s;
+- schedule_work_on(cpu, &sfw->work);
++ queue_work_on(cpu, flushwq, &sfw->work);
+ }
+
+ for_each_online_cpu(cpu) {
+@@ -2935,6 +2940,7 @@ redo:
+
+ if (!freelist) {
+ c->page = NULL;
++ c->tid = next_tid(c->tid);
+ local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+ stat(s, DEACTIVATE_BYPASS);
+ goto new_slab;
+@@ -2967,6 +2973,7 @@ deactivate_slab:
+ freelist = c->freelist;
+ c->page = NULL;
+ c->freelist = NULL;
++ c->tid = next_tid(c->tid);
+ local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+ deactivate_slab(s, page, freelist);
+
+@@ -4299,7 +4306,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
+ }
+
+ #ifdef CONFIG_PRINTK
+-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
++void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
+ {
+ void *base;
+ int __maybe_unused i;
+@@ -4848,6 +4855,8 @@ void __init kmem_cache_init(void)
+
+ void __init kmem_cache_init_late(void)
+ {
++ flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
++ WARN_ON(!flushwq);
+ }
+
+ struct kmem_cache *
+@@ -4918,6 +4927,8 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
+ /* Honor the call site pointer we received. */
+ trace_kmalloc(caller, ret, size, s->size, gfpflags);
+
++ ret = kasan_kmalloc(s, ret, size, gfpflags);
++
+ return ret;
+ }
+ EXPORT_SYMBOL(__kmalloc_track_caller);
+@@ -4949,6 +4960,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
+ /* Honor the call site pointer we received. */
+ trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+
++ ret = kasan_kmalloc(s, ret, size, gfpflags);
++
+ return ret;
+ }
+ EXPORT_SYMBOL(__kmalloc_node_track_caller);
+@@ -5072,6 +5085,7 @@ struct loc_track {
+ unsigned long max;
+ unsigned long count;
+ struct location *loc;
++ loff_t idx;
+ };
+
+ static struct dentry *slab_debugfs_root;
+@@ -5862,7 +5876,8 @@ static char *create_unique_id(struct kmem_cache *s)
+ char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
+ char *p = name;
+
+- BUG_ON(!name);
++ if (!name)
++ return ERR_PTR(-ENOMEM);
+
+ *p++ = ':';
+ /*
+@@ -5920,6 +5935,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
+ * for the symlinks.
+ */
+ name = create_unique_id(s);
++ if (IS_ERR(name))
++ return PTR_ERR(name);
+ }
+
+ s->kobj.kset = kset;
+@@ -6035,11 +6052,11 @@ __initcall(slab_sysfs_init);
+ #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
+ static int slab_debugfs_show(struct seq_file *seq, void *v)
+ {
+-
+- struct location *l;
+- unsigned int idx = *(unsigned int *)v;
+ struct loc_track *t = seq->private;
++ struct location *l;
++ unsigned long idx;
+
++ idx = (unsigned long) t->idx;
+ if (idx < t->count) {
+ l = &t->loc[idx];
+
+@@ -6088,16 +6105,18 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
+ {
+ struct loc_track *t = seq->private;
+
+- v = ppos;
+- ++*ppos;
++ t->idx = ++(*ppos);
+ if (*ppos <= t->count)
+- return v;
++ return ppos;
+
+ return NULL;
+ }
+
+ static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
+ {
++ struct loc_track *t = seq->private;
++
++ t->idx = *ppos;
+ return ppos;
+ }
+
+diff --git a/mm/swap_state.c b/mm/swap_state.c
+index bc7cee6b2ec54..122a37cbc081f 100644
+--- a/mm/swap_state.c
++++ b/mm/swap_state.c
+@@ -478,7 +478,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+ * __read_swap_cache_async(), which has set SWAP_HAS_CACHE
+ * in swap_map, but not yet added its page to swap cache.
+ */
+- cond_resched();
++ schedule_timeout_uninterruptible(1);
+ }
+
+ /*
+diff --git a/mm/swapfile.c b/mm/swapfile.c
+index 22d10f7138487..b7e1620adee62 100644
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -673,6 +673,7 @@ static void __del_from_avail_list(struct swap_info_struct *p)
+ {
+ int nid;
+
++ assert_spin_locked(&p->lock);
+ for_each_node(nid)
+ plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]);
+ }
+@@ -1093,6 +1094,7 @@ start_over:
+ goto check_out;
+ pr_debug("scan_swap_map of si %d failed to find offset\n",
+ si->type);
++ cond_resched();
+
+ spin_lock(&swap_avail_lock);
+ nextsi:
+@@ -2564,8 +2566,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
+ spin_unlock(&swap_lock);
+ goto out_dput;
+ }
+- del_from_avail_list(p);
+ spin_lock(&p->lock);
++ del_from_avail_list(p);
+ if (p->prio < 0) {
+ struct swap_info_struct *si = p;
+ int nid;
+diff --git a/mm/usercopy.c b/mm/usercopy.c
+index b3de3c4eefba7..540968b481e7e 100644
+--- a/mm/usercopy.c
++++ b/mm/usercopy.c
+@@ -294,7 +294,10 @@ static bool enable_checks __initdata = true;
+
+ static int __init parse_hardened_usercopy(char *str)
+ {
+- return strtobool(str, &enable_checks);
++ if (strtobool(str, &enable_checks))
++ pr_warn("Invalid option string for hardened_usercopy: '%s'\n",
++ str);
++ return 1;
+ }
+
+ __setup("hardened_usercopy=", parse_hardened_usercopy);
+diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
+index 7a90084155343..caa13abe0c56b 100644
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -63,7 +63,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+ pte_t _dst_pte, *dst_pte;
+ bool writable = dst_vma->vm_flags & VM_WRITE;
+ bool vm_shared = dst_vma->vm_flags & VM_SHARED;
+- bool page_in_cache = page->mapping;
++ bool page_in_cache = page_mapping(page);
+ spinlock_t *ptl;
+ struct inode *inode;
+ pgoff_t offset, max_off;
+@@ -151,6 +151,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
+ /* don't free the page */
+ goto out;
+ }
++
++ flush_dcache_page(page);
+ } else {
+ page = *pagep;
+ *pagep = NULL;
+@@ -225,7 +227,10 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
+ struct page *page;
+ int ret;
+
+- ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
++ ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC);
++ /* Our caller expects us to return -EFAULT if we failed to find page. */
++ if (ret == -ENOENT)
++ ret = -EFAULT;
+ if (ret)
+ goto out;
+ if (!page) {
+@@ -233,6 +238,11 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
+ goto out;
+ }
+
++ if (PageHWPoison(page)) {
++ ret = -EIO;
++ goto out_release;
++ }
++
+ ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
+ page, false, wp_copy);
+ if (ret)
+@@ -621,6 +631,7 @@ retry:
+ err = -EFAULT;
+ goto out;
+ }
++ flush_dcache_page(page);
+ goto retry;
+ } else
+ BUG_ON(page);
+diff --git a/mm/util.c b/mm/util.c
+index bacabe4469065..e89ef15085de7 100644
+--- a/mm/util.c
++++ b/mm/util.c
+@@ -343,6 +343,38 @@ unsigned long randomize_stack_top(unsigned long stack_top)
+ #endif
+ }
+
++/**
++ * randomize_page - Generate a random, page aligned address
++ * @start: The smallest acceptable address the caller will take.
++ * @range: The size of the area, starting at @start, within which the
++ * random address must fall.
++ *
++ * If @start + @range would overflow, @range is capped.
++ *
++ * NOTE: Historical use of randomize_range, which this replaces, presumed that
++ * @start was already page aligned. We now align it regardless.
++ *
++ * Return: A page aligned address within [start, start + range). On error,
++ * @start is returned.
++ */
++unsigned long randomize_page(unsigned long start, unsigned long range)
++{
++ if (!PAGE_ALIGNED(start)) {
++ range -= PAGE_ALIGN(start) - start;
++ start = PAGE_ALIGN(start);
++ }
++
++ if (start > ULONG_MAX - range)
++ range = ULONG_MAX - start;
++
++ range >>= PAGE_SHIFT;
++
++ if (range == 0)
++ return start;
++
++ return start + (get_random_long() % range << PAGE_SHIFT);
++}
++
+ #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+ unsigned long arch_randomize_brk(struct mm_struct *mm)
+ {
+@@ -594,8 +626,10 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
+ return ret;
+
+ /* Don't even allow crazy sizes */
+- if (WARN_ON_ONCE(size > INT_MAX))
++ if (unlikely(size > INT_MAX)) {
++ WARN_ON_ONCE(!(flags & __GFP_NOWARN));
+ return NULL;
++ }
+
+ return __vmalloc_node(size, 1, flags, node,
+ __builtin_return_address(0));
+@@ -664,6 +698,56 @@ static inline void *__page_rmapping(struct page *page)
+ return (void *)mapping;
+ }
+
++/**
++ * __vmalloc_array - allocate memory for a virtually contiguous array.
++ * @n: number of elements.
++ * @size: element size.
++ * @flags: the type of memory to allocate (see kmalloc).
++ */
++void *__vmalloc_array(size_t n, size_t size, gfp_t flags)
++{
++ size_t bytes;
++
++ if (unlikely(check_mul_overflow(n, size, &bytes)))
++ return NULL;
++ return __vmalloc(bytes, flags);
++}
++EXPORT_SYMBOL(__vmalloc_array);
++
++/**
++ * vmalloc_array - allocate memory for a virtually contiguous array.
++ * @n: number of elements.
++ * @size: element size.
++ */
++void *vmalloc_array(size_t n, size_t size)
++{
++ return __vmalloc_array(n, size, GFP_KERNEL);
++}
++EXPORT_SYMBOL(vmalloc_array);
++
++/**
++ * __vcalloc - allocate and zero memory for a virtually contiguous array.
++ * @n: number of elements.
++ * @size: element size.
++ * @flags: the type of memory to allocate (see kmalloc).
++ */
++void *__vcalloc(size_t n, size_t size, gfp_t flags)
++{
++ return __vmalloc_array(n, size, flags | __GFP_ZERO);
++}
++EXPORT_SYMBOL(__vcalloc);
++
++/**
++ * vcalloc - allocate and zero memory for a virtually contiguous array.
++ * @n: number of elements.
++ * @size: element size.
++ */
++void *vcalloc(size_t n, size_t size)
++{
++ return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO);
++}
++EXPORT_SYMBOL(vcalloc);
++
+ /* Neutral page->mapping pointer to address_space or anon_vma or other */
+ void *page_rmapping(struct page *page)
+ {
+@@ -1026,7 +1110,9 @@ void mem_dump_obj(void *object)
+ if (vmalloc_dump_obj(object))
+ return;
+
+- if (virt_addr_valid(object))
++ if (is_vmalloc_addr(object))
++ type = "vmalloc memory";
++ else if (virt_addr_valid(object))
+ type = "non-slab/vmalloc memory";
+ else if (object == NULL)
+ type = "NULL pointer";
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index e8a807c781107..cd434f0ec47f2 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -2806,6 +2806,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
+ free_vm_area(area);
+ return NULL;
+ }
++
++ flush_cache_vmap((unsigned long)area->addr,
++ (unsigned long)area->addr + count * PAGE_SIZE);
++
+ return area->addr;
+ }
+ EXPORT_SYMBOL_GPL(vmap_pfn);
+@@ -2927,9 +2931,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
+ * allocation request, free them via __vfree() if any.
+ */
+ if (area->nr_pages != nr_small_pages) {
+- warn_alloc(gfp_mask, NULL,
+- "vmalloc error: size %lu, page order %u, failed to allocate pages",
+- area->nr_pages * PAGE_SIZE, page_order);
++ /* vm_area_alloc_pages() can also fail due to a fatal signal */
++ if (!fatal_signal_pending(current))
++ warn_alloc(gfp_mask, NULL,
++ "vmalloc error: size %lu, page order %u, failed to allocate pages",
++ area->nr_pages * PAGE_SIZE, page_order);
+ goto fail;
+ }
+
+@@ -3032,7 +3038,8 @@ again:
+ clear_vm_uninitialized_flag(area);
+
+ size = PAGE_ALIGN(size);
+- kmemleak_vmalloc(area, size, gfp_mask);
++ if (!(vm_flags & VM_DEFER_KMEMLEAK))
++ kmemleak_vmalloc(area, size, gfp_mask);
+
+ return addr;
+
+@@ -3816,14 +3823,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
+ #ifdef CONFIG_PRINTK
+ bool vmalloc_dump_obj(void *object)
+ {
+- struct vm_struct *vm;
+ void *objp = (void *)PAGE_ALIGN((unsigned long)object);
++ const void *caller;
++ struct vm_struct *vm;
++ struct vmap_area *va;
++ unsigned long addr;
++ unsigned int nr_pages;
+
+- vm = find_vm_area(objp);
+- if (!vm)
++ if (!spin_trylock(&vmap_area_lock))
+ return false;
++ va = __find_vmap_area((unsigned long)objp);
++ if (!va) {
++ spin_unlock(&vmap_area_lock);
++ return false;
++ }
++
++ vm = va->vm;
++ if (!vm) {
++ spin_unlock(&vmap_area_lock);
++ return false;
++ }
++ addr = (unsigned long)vm->addr;
++ caller = vm->caller;
++ nr_pages = vm->nr_pages;
++ spin_unlock(&vmap_area_lock);
+ pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
+- vm->nr_pages, (unsigned long)vm->addr, vm->caller);
++ nr_pages, addr, caller);
+ return true;
+ }
+ #endif
+diff --git a/mm/vmpressure.c b/mm/vmpressure.c
+index 76518e4166dc9..383e0463c0258 100644
+--- a/mm/vmpressure.c
++++ b/mm/vmpressure.c
+@@ -244,6 +244,14 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
+ if (mem_cgroup_disabled())
+ return;
+
++ /*
++ * The in-kernel users only care about the reclaim efficiency
++ * for this @memcg rather than the whole subtree, and there
++ * isn't and won't be any in-kernel user in a legacy cgroup.
++ */
++ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !tree)
++ return;
++
+ vmpr = memcg_to_vmpressure(memcg);
+
+ /*
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 74296c2d1fed2..201acea818040 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1865,69 +1865,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ return nr_reclaimed;
+ }
+
+-/*
+- * Attempt to remove the specified page from its LRU. Only take this page
+- * if it is of the appropriate PageActive status. Pages which are being
+- * freed elsewhere are also ignored.
+- *
+- * page: page to consider
+- * mode: one of the LRU isolation modes defined above
+- *
+- * returns true on success, false on failure.
+- */
+-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+-{
+- /* Only take pages on the LRU. */
+- if (!PageLRU(page))
+- return false;
+-
+- /* Compaction should not handle unevictable pages but CMA can do so */
+- if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
+- return false;
+-
+- /*
+- * To minimise LRU disruption, the caller can indicate that it only
+- * wants to isolate pages it will be able to operate on without
+- * blocking - clean pages for the most part.
+- *
+- * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+- * that it is possible to migrate without blocking
+- */
+- if (mode & ISOLATE_ASYNC_MIGRATE) {
+- /* All the caller can do on PageWriteback is block */
+- if (PageWriteback(page))
+- return false;
+-
+- if (PageDirty(page)) {
+- struct address_space *mapping;
+- bool migrate_dirty;
+-
+- /*
+- * Only pages without mappings or that have a
+- * ->migratepage callback are possible to migrate
+- * without blocking. However, we can be racing with
+- * truncation so it's necessary to lock the page
+- * to stabilise the mapping as truncation holds
+- * the page lock until after the page is removed
+- * from the page cache.
+- */
+- if (!trylock_page(page))
+- return false;
+-
+- mapping = page_mapping(page);
+- migrate_dirty = !mapping || mapping->a_ops->migratepage;
+- unlock_page(page);
+- if (!migrate_dirty)
+- return false;
+- }
+- }
+-
+- if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+- return false;
+-
+- return true;
+-}
+-
+ /*
+ * Update LRU sizes after isolating pages. The LRU size updates must
+ * be complete before mem_cgroup_update_lru_size due to a sanity check.
+@@ -1979,11 +1916,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ unsigned long skipped = 0;
+ unsigned long scan, total_scan, nr_pages;
+ LIST_HEAD(pages_skipped);
+- isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
+
+ total_scan = 0;
+ scan = 0;
+ while (scan < nr_to_scan && !list_empty(src)) {
++ struct list_head *move_to = src;
+ struct page *page;
+
+ page = lru_to_page(src);
+@@ -1993,9 +1930,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ total_scan += nr_pages;
+
+ if (page_zonenum(page) > sc->reclaim_idx) {
+- list_move(&page->lru, &pages_skipped);
+ nr_skipped[page_zonenum(page)] += nr_pages;
+- continue;
++ move_to = &pages_skipped;
++ goto move;
+ }
+
+ /*
+@@ -2003,37 +1940,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ * return with no isolated pages if the LRU mostly contains
+ * ineligible pages. This causes the VM to not reclaim any
+ * pages, triggering a premature OOM.
+- *
+- * Account all tail pages of THP. This would not cause
+- * premature OOM since __isolate_lru_page() returns -EBUSY
+- * only when the page is being freed somewhere else.
++ * Account all tail pages of THP.
+ */
+ scan += nr_pages;
+- if (!__isolate_lru_page_prepare(page, mode)) {
+- /* It is being freed elsewhere */
+- list_move(&page->lru, src);
+- continue;
+- }
++
++ if (!PageLRU(page))
++ goto move;
++ if (!sc->may_unmap && page_mapped(page))
++ goto move;
++
+ /*
+ * Be careful not to clear PageLRU until after we're
+ * sure the page is not being freed elsewhere -- the
+ * page release code relies on it.
+ */
+- if (unlikely(!get_page_unless_zero(page))) {
+- list_move(&page->lru, src);
+- continue;
+- }
++ if (unlikely(!get_page_unless_zero(page)))
++ goto move;
+
+ if (!TestClearPageLRU(page)) {
+ /* Another thread is already isolating this page */
+ put_page(page);
+- list_move(&page->lru, src);
+- continue;
++ goto move;
+ }
+
+ nr_taken += nr_pages;
+ nr_zone_taken[page_zonenum(page)] += nr_pages;
+- list_move(&page->lru, dst);
++ move_to = dst;
++move:
++ list_move(&page->lru, move_to);
+ }
+
+ /*
+@@ -2057,7 +1991,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ }
+ *nr_scanned = total_scan;
+ trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
+- total_scan, skipped, nr_taken, mode, lru);
++ total_scan, skipped, nr_taken,
++ sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
+ update_lru_sizes(lruvec, lru, nr_zone_taken);
+ return nr_taken;
+ }
+@@ -2791,8 +2726,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ enum lru_list lru;
+ unsigned long nr_reclaimed = 0;
+ unsigned long nr_to_reclaim = sc->nr_to_reclaim;
++ bool proportional_reclaim;
+ struct blk_plug plug;
+- bool scan_adjusted;
+
+ get_scan_count(lruvec, sc, nr);
+
+@@ -2810,8 +2745,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ * abort proportional reclaim if either the file or anon lru has already
+ * dropped to zero at the first pass.
+ */
+- scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+- sc->priority == DEF_PRIORITY);
++ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
++ sc->priority == DEF_PRIORITY);
+
+ blk_start_plug(&plug);
+ while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+@@ -2831,7 +2766,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+
+ cond_resched();
+
+- if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
++ if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
+ continue;
+
+ /*
+@@ -2882,8 +2817,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ nr_scanned = targets[lru] - nr[lru];
+ nr[lru] = targets[lru] * (100 - percentage) / 100;
+ nr[lru] -= min(nr[lru], nr_scanned);
+-
+- scan_adjusted = true;
+ }
+ blk_finish_plug(&plug);
+ sc->nr_reclaimed += nr_reclaimed;
+diff --git a/mm/workingset.c b/mm/workingset.c
+index d5b81e4f4cbe8..880d882f3325f 100644
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -352,7 +352,7 @@ void workingset_refault(struct page *page, void *shadow)
+
+ inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file);
+
+- mem_cgroup_flush_stats();
++ mem_cgroup_flush_stats_delayed();
+ /*
+ * Compare the distance to the existing workingset size. We
+ * don't activate pages that couldn't stay resident even if
+diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
+index 68e8831068f4b..439deb8decbcc 100644
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -1743,11 +1743,40 @@ static enum fullness_group putback_zspage(struct size_class *class,
+ */
+ static void lock_zspage(struct zspage *zspage)
+ {
+- struct page *page = get_first_page(zspage);
++ struct page *curr_page, *page;
+
+- do {
+- lock_page(page);
+- } while ((page = get_next_page(page)) != NULL);
++ /*
++ * Pages we haven't locked yet can be migrated off the list while we're
++ * trying to lock them, so we need to be careful and only attempt to
++ * lock each page under migrate_read_lock(). Otherwise, the page we lock
++ * may no longer belong to the zspage. This means that we may wait for
++ * the wrong page to unlock, so we must take a reference to the page
++ * prior to waiting for it to unlock outside migrate_read_lock().
++ */
++ while (1) {
++ migrate_read_lock(zspage);
++ page = get_first_page(zspage);
++ if (trylock_page(page))
++ break;
++ get_page(page);
++ migrate_read_unlock(zspage);
++ wait_on_page_locked(page);
++ put_page(page);
++ }
++
++ curr_page = page;
++ while ((page = get_next_page(curr_page))) {
++ if (trylock_page(page)) {
++ curr_page = page;
++ } else {
++ get_page(page);
++ migrate_read_unlock(zspage);
++ wait_on_page_locked(page);
++ put_page(page);
++ migrate_read_lock(zspage);
++ }
++ }
++ migrate_read_unlock(zspage);
+ }
+
+ static int zs_init_fs_context(struct fs_context *fc)
+@@ -1830,10 +1859,11 @@ static inline void zs_pool_dec_isolated(struct zs_pool *pool)
+ VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
+ atomic_long_dec(&pool->isolated_pages);
+ /*
+- * There's no possibility of racing, since wait_for_isolated_drain()
+- * checks the isolated count under &class->lock after enqueuing
+- * on migration_wait.
++ * Checking pool->destroying must happen after atomic_long_dec()
++ * for pool->isolated_pages above. Paired with the smp_mb() in
++ * zs_unregister_migration().
+ */
++ smp_mb__after_atomic();
+ if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
+ wake_up_all(&pool->migration_wait);
+ }
+diff --git a/net/802/mrp.c b/net/802/mrp.c
+index 35e04cc5390c4..c10a432a5b435 100644
+--- a/net/802/mrp.c
++++ b/net/802/mrp.c
+@@ -606,7 +606,10 @@ static void mrp_join_timer(struct timer_list *t)
+ spin_unlock(&app->lock);
+
+ mrp_queue_xmit(app);
+- mrp_join_timer_arm(app);
++ spin_lock(&app->lock);
++ if (likely(app->active))
++ mrp_join_timer_arm(app);
++ spin_unlock(&app->lock);
+ }
+
+ static void mrp_periodic_timer_arm(struct mrp_applicant *app)
+@@ -620,11 +623,12 @@ static void mrp_periodic_timer(struct timer_list *t)
+ struct mrp_applicant *app = from_timer(app, t, periodic_timer);
+
+ spin_lock(&app->lock);
+- mrp_mad_event(app, MRP_EVENT_PERIODIC);
+- mrp_pdu_queue(app);
++ if (likely(app->active)) {
++ mrp_mad_event(app, MRP_EVENT_PERIODIC);
++ mrp_pdu_queue(app);
++ mrp_periodic_timer_arm(app);
++ }
+ spin_unlock(&app->lock);
+-
+- mrp_periodic_timer_arm(app);
+ }
+
+ static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
+@@ -872,6 +876,7 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
+ app->dev = dev;
+ app->app = appl;
+ app->mad = RB_ROOT;
++ app->active = true;
+ spin_lock_init(&app->lock);
+ skb_queue_head_init(&app->queue);
+ rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
+@@ -900,6 +905,9 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
+
+ RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+
++ spin_lock_bh(&app->lock);
++ app->active = false;
++ spin_unlock_bh(&app->lock);
+ /* Delete timer and generate a final TX event to flush out
+ * all pending messages before the applicant is gone.
+ */
+diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
+index 55275ef9a31a7..abaa5d96ded24 100644
+--- a/net/8021q/vlan.c
++++ b/net/8021q/vlan.c
+@@ -123,9 +123,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
+ }
+
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+-
+- /* Get rid of the vlan's reference to real_dev */
+- dev_put(real_dev);
+ }
+
+ int vlan_check_real_dev(struct net_device *real_dev,
+@@ -187,9 +184,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
+ if (err)
+ goto out_unregister_netdev;
+
+- /* Account for reference in struct vlan_dev_priv */
+- dev_hold(real_dev);
+-
+ vlan_stacked_transfer_operstate(real_dev, dev, vlan);
+ linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
+
+diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
+index 0c21d1fec8522..3d0f0d0a323b5 100644
+--- a/net/8021q/vlan_dev.c
++++ b/net/8021q/vlan_dev.c
+@@ -108,8 +108,8 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
+ * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
+ * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
+ */
+- if (veth->h_vlan_proto != vlan->vlan_proto ||
+- vlan->flags & VLAN_FLAG_REORDER_HDR) {
++ if (vlan->flags & VLAN_FLAG_REORDER_HDR ||
++ veth->h_vlan_proto != vlan->vlan_proto) {
+ u16 vlan_tci;
+ vlan_tci = vlan->vlan_id;
+ vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
+@@ -250,7 +250,7 @@ bool vlan_dev_inherit_address(struct net_device *dev,
+ if (dev->addr_assign_type != NET_ADDR_STOLEN)
+ return false;
+
+- ether_addr_copy(dev->dev_addr, real_dev->dev_addr);
++ eth_hw_addr_set(dev, real_dev->dev_addr);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return true;
+ }
+@@ -349,7 +349,7 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
+ dev_uc_del(real_dev, dev->dev_addr);
+
+ out:
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+ return 0;
+ }
+
+@@ -365,7 +365,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+- if (!net_eq(dev_net(dev), &init_net))
++ if (!net_eq(dev_net(dev), dev_net(real_dev)))
+ break;
+ fallthrough;
+ case SIOCGMIIPHY:
+@@ -586,7 +586,7 @@ static int vlan_dev_init(struct net_device *dev)
+ dev->dev_id = real_dev->dev_id;
+
+ if (is_zero_ether_addr(dev->dev_addr)) {
+- ether_addr_copy(dev->dev_addr, real_dev->dev_addr);
++ eth_hw_addr_set(dev, real_dev->dev_addr);
+ dev->addr_assign_type = NET_ADDR_STOLEN;
+ }
+ if (is_zero_ether_addr(dev->broadcast))
+@@ -615,6 +615,9 @@ static int vlan_dev_init(struct net_device *dev)
+ if (!vlan->vlan_pcpu_stats)
+ return -ENOMEM;
+
++ /* Get vlan's reference to real_dev */
++ dev_hold(real_dev);
++
+ return 0;
+ }
+
+@@ -843,6 +846,9 @@ static void vlan_dev_free(struct net_device *dev)
+
+ free_percpu(vlan->vlan_pcpu_stats);
+ vlan->vlan_pcpu_stats = NULL;
++
++ /* Get rid of the vlan's reference to real_dev */
++ dev_put(vlan->real_dev);
+ }
+
+ void vlan_setup(struct net_device *dev)
+diff --git a/net/9p/client.c b/net/9p/client.c
+index 213f12ed76cd8..c4c1e44cd7ca3 100644
+--- a/net/9p/client.c
++++ b/net/9p/client.c
+@@ -32,10 +32,9 @@
+
+ #define DEFAULT_MSIZE (128 * 1024)
+
+-/*
+- * Client Option Parsing (code inspired by NFS code)
+- * - a little lazy - parse all client options
+- */
++/* Client Option Parsing (code inspired by NFS code)
++ * - a little lazy - parse all client options
++ */
+
+ enum {
+ Opt_msize,
+@@ -89,20 +88,18 @@ int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
+ }
+ EXPORT_SYMBOL(p9_show_client_options);
+
+-/*
+- * Some error codes are taken directly from the server replies,
++/* Some error codes are taken directly from the server replies,
+ * make sure they are valid.
+ */
+ static int safe_errno(int err)
+ {
+- if ((err > 0) || (err < -MAX_ERRNO)) {
++ if (err > 0 || err < -MAX_ERRNO) {
+ p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err);
+ return -EPROTO;
+ }
+ return err;
+ }
+
+-
+ /* Interpret mount option for protocol version */
+ static int get_protocol_version(char *s)
+ {
+@@ -117,8 +114,9 @@ static int get_protocol_version(char *s)
+ } else if (!strcmp(s, "9p2000.L")) {
+ version = p9_proto_2000L;
+ p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
+- } else
++ } else {
+ pr_info("Unknown protocol version %s\n", s);
++ }
+
+ return version;
+ }
+@@ -147,15 +145,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
+ return 0;
+
+ tmp_options = kstrdup(opts, GFP_KERNEL);
+- if (!tmp_options) {
+- p9_debug(P9_DEBUG_ERROR,
+- "failed to allocate copy of option string\n");
++ if (!tmp_options)
+ return -ENOMEM;
+- }
+ options = tmp_options;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token, r;
++
+ if (!*p)
+ continue;
+ token = match_token(p, tokens, args);
+@@ -187,7 +183,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
+
+ v9fs_put_trans(clnt->trans_mod);
+ clnt->trans_mod = v9fs_get_trans_by_name(s);
+- if (clnt->trans_mod == NULL) {
++ if (!clnt->trans_mod) {
+ pr_info("Could not find request transport: %s\n",
+ s);
+ ret = -EINVAL;
+@@ -285,6 +281,11 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
+ p9pdu_reset(&req->rc);
+ req->t_err = 0;
+ req->status = REQ_STATUS_ALLOC;
++ /* refcount needs to be set to 0 before inserting into the idr
++ * so p9_tag_lookup does not accept a request that is not fully
++ * initialized. refcount_set to 2 below will mark request ready.
++ */
++ refcount_set(&req->refcount, 0);
+ init_waitqueue_head(&req->wq);
+ INIT_LIST_HEAD(&req->req_list);
+
+@@ -311,7 +312,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
+ * callback), so p9_client_cb eats the second ref there
+ * as the pointer is duplicated directly by virtqueue_add_sgs()
+ */
+- refcount_set(&req->refcount.refcount, 2);
++ refcount_set(&req->refcount, 2);
+
+ return req;
+
+@@ -347,7 +348,7 @@ again:
+ if (!p9_req_try_get(req))
+ goto again;
+ if (req->tc.tag != tag) {
+- p9_req_put(req);
++ p9_req_put(c, req);
+ goto again;
+ }
+ }
+@@ -373,20 +374,18 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
+ spin_lock_irqsave(&c->lock, flags);
+ idr_remove(&c->reqs, tag);
+ spin_unlock_irqrestore(&c->lock, flags);
+- return p9_req_put(r);
+-}
+-
+-static void p9_req_free(struct kref *ref)
+-{
+- struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
+- p9_fcall_fini(&r->tc);
+- p9_fcall_fini(&r->rc);
+- kmem_cache_free(p9_req_cache, r);
++ return p9_req_put(c, r);
+ }
+
+-int p9_req_put(struct p9_req_t *r)
++int p9_req_put(struct p9_client *c, struct p9_req_t *r)
+ {
+- return kref_put(&r->refcount, p9_req_free);
++ if (refcount_dec_and_test(&r->refcount)) {
++ p9_fcall_fini(&r->tc);
++ p9_fcall_fini(&r->rc);
++ kmem_cache_free(p9_req_cache, r);
++ return 1;
++ }
++ return 0;
+ }
+ EXPORT_SYMBOL(p9_req_put);
+
+@@ -423,8 +422,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
+ {
+ p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
+
+- /*
+- * This barrier is needed to make sure any change made to req before
++ /* This barrier is needed to make sure any change made to req before
+ * the status change is visible to another thread
+ */
+ smp_wmb();
+@@ -432,7 +430,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
+
+ wake_up(&req->wq);
+ p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+- p9_req_put(req);
++ p9_req_put(c, req);
+ }
+ EXPORT_SYMBOL(p9_client_cb);
+
+@@ -446,12 +444,12 @@ EXPORT_SYMBOL(p9_client_cb);
+ */
+
+ int
+-p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,
+- int rewind)
++p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type,
++ int16_t *tag, int rewind)
+ {
+- int8_t r_type;
+- int16_t r_tag;
+- int32_t r_size;
++ s8 r_type;
++ s16 r_tag;
++ s32 r_size;
+ int offset = pdu->offset;
+ int err;
+
+@@ -499,7 +497,7 @@ EXPORT_SYMBOL(p9_parse_header);
+
+ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
+ {
+- int8_t type;
++ s8 type;
+ int err;
+ int ecode;
+
+@@ -510,8 +508,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
+ req->rc.size);
+ return -EIO;
+ }
+- /*
+- * dump the response from server
++ /* dump the response from server
+ * This should be after check errors which poplulate pdu_fcall.
+ */
+ trace_9p_protocol_dump(c, &req->rc);
+@@ -524,6 +521,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
+
+ if (!p9_is_proto_dotl(c)) {
+ char *ename;
++
+ err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
+ &ename, &ecode);
+ if (err)
+@@ -541,6 +539,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
+ kfree(ename);
+ } else {
+ err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
++ if (err)
++ goto out_err;
+ err = -ecode;
+
+ p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+@@ -572,12 +572,11 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
+ {
+ int err;
+ int ecode;
+- int8_t type;
++ s8 type;
+ char *ename = NULL;
+
+ err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
+- /*
+- * dump the response from server
++ /* dump the response from server
+ * This should be after parse_header which poplulate pdu_fcall.
+ */
+ trace_9p_protocol_dump(c, &req->rc);
+@@ -605,7 +604,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
+ if (len > inline_len) {
+ /* We have error in external buffer */
+ if (!copy_from_iter_full(ename + inline_len,
+- len - inline_len, uidata)) {
++ len - inline_len, uidata)) {
+ err = -EFAULT;
+ goto out_err;
+ }
+@@ -657,7 +656,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
+ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
+ {
+ struct p9_req_t *req;
+- int16_t oldtag;
++ s16 oldtag;
+ int err;
+
+ err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
+@@ -670,8 +669,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+- /*
+- * if we haven't received a response for oldreq,
++ /* if we haven't received a response for oldreq,
+ * remove it from the list
+ */
+ if (oldreq->status == REQ_STATUS_SENT) {
+@@ -697,7 +695,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
+ return ERR_PTR(-EIO);
+
+ /* if status is begin_disconnected we allow only clunk request */
+- if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
++ if (c->status == BeginDisconnect && type != P9_TCLUNK)
+ return ERR_PTR(-EIO);
+
+ req = p9_tag_alloc(c, type, req_size);
+@@ -715,7 +713,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
+ reterr:
+ p9_tag_remove(c, req);
+ /* We have to put also the 2nd reference as it won't be used */
+- p9_req_put(req);
++ p9_req_put(c, req);
+ return ERR_PTR(err);
+ }
+
+@@ -745,13 +743,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+ if (signal_pending(current)) {
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+- } else
++ } else {
+ sigpending = 0;
++ }
+
+ err = c->trans_mod->request(c, req);
+ if (err < 0) {
+ /* write won't happen */
+- p9_req_put(req);
++ p9_req_put(c, req);
+ if (err != -ERESTARTSYS && err != -EFAULT)
+ c->status = Disconnected;
+ goto recalc_sigpending;
+@@ -760,14 +759,13 @@ again:
+ /* Wait for the response */
+ err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
+
+- /*
+- * Make sure our req is coherent with regard to updates in other
++ /* Make sure our req is coherent with regard to updates in other
+ * threads - echoes to wmb() in the callback
+ */
+ smp_rmb();
+
+- if ((err == -ERESTARTSYS) && (c->status == Connected)
+- && (type == P9_TFLUSH)) {
++ if (err == -ERESTARTSYS && c->status == Connected &&
++ type == P9_TFLUSH) {
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+ goto again;
+@@ -777,7 +775,7 @@ again:
+ p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+ err = req->t_err;
+ }
+- if ((err == -ERESTARTSYS) && (c->status == Connected)) {
++ if (err == -ERESTARTSYS && c->status == Connected) {
+ p9_debug(P9_DEBUG_MUX, "flushing\n");
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+@@ -832,8 +830,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
+ struct p9_req_t *req;
+
+ va_start(ap, fmt);
+- /*
+- * We allocate a inline protocol data of only 4k bytes.
++ /* We allocate a inline protocol data of only 4k bytes.
+ * The actual content is passed in zero-copy fashion.
+ */
+ req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
+@@ -844,8 +841,9 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
+ if (signal_pending(current)) {
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+- } else
++ } else {
+ sigpending = 0;
++ }
+
+ err = c->trans_mod->zc_request(c, req, uidata, uodata,
+ inlen, olen, in_hdrlen);
+@@ -859,7 +857,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
+ p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+ err = req->t_err;
+ }
+- if ((err == -ERESTARTSYS) && (c->status == Connected)) {
++ if (err == -ERESTARTSYS && c->status == Connected) {
+ p9_debug(P9_DEBUG_MUX, "flushing\n");
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+@@ -895,16 +893,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
+ struct p9_fid *fid;
+
+ p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
+- fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
++ fid = kzalloc(sizeof(*fid), GFP_KERNEL);
+ if (!fid)
+ return NULL;
+
+- memset(&fid->qid, 0, sizeof(struct p9_qid));
+ fid->mode = -1;
+ fid->uid = current_fsuid();
+ fid->clnt = clnt;
+- fid->rdir = NULL;
+- fid->fid = 0;
+ refcount_set(&fid->count, 1);
+
+ idr_preload(GFP_KERNEL);
+@@ -947,15 +942,15 @@ static int p9_client_version(struct p9_client *c)
+ switch (c->proto_version) {
+ case p9_proto_2000L:
+ req = p9_client_rpc(c, P9_TVERSION, "ds",
+- c->msize, "9P2000.L");
++ c->msize, "9P2000.L");
+ break;
+ case p9_proto_2000u:
+ req = p9_client_rpc(c, P9_TVERSION, "ds",
+- c->msize, "9P2000.u");
++ c->msize, "9P2000.u");
+ break;
+ case p9_proto_legacy:
+ req = p9_client_rpc(c, P9_TVERSION, "ds",
+- c->msize, "9P2000");
++ c->msize, "9P2000");
+ break;
+ default:
+ return -EINVAL;
+@@ -972,13 +967,13 @@ static int p9_client_version(struct p9_client *c)
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
+- if (!strncmp(version, "9P2000.L", 8))
++ if (!strncmp(version, "9P2000.L", 8)) {
+ c->proto_version = p9_proto_2000L;
+- else if (!strncmp(version, "9P2000.u", 8))
++ } else if (!strncmp(version, "9P2000.u", 8)) {
+ c->proto_version = p9_proto_2000u;
+- else if (!strncmp(version, "9P2000", 6))
++ } else if (!strncmp(version, "9P2000", 6)) {
+ c->proto_version = p9_proto_legacy;
+- else {
++ } else {
+ p9_debug(P9_DEBUG_ERROR,
+ "server returned an unknown version: %s\n", version);
+ err = -EREMOTEIO;
+@@ -1008,7 +1003,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
+ char *client_id;
+
+ err = 0;
+- clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
++ clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
+ if (!clnt)
+ return ERR_PTR(-ENOMEM);
+
+@@ -1030,7 +1025,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
+ if (!clnt->trans_mod)
+ clnt->trans_mod = v9fs_get_default_trans();
+
+- if (clnt->trans_mod == NULL) {
++ if (!clnt->trans_mod) {
+ err = -EPROTONOSUPPORT;
+ p9_debug(P9_DEBUG_ERROR,
+ "No transport defined or default transport\n");
+@@ -1118,14 +1113,14 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
+ EXPORT_SYMBOL(p9_client_begin_disconnect);
+
+ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
+- const char *uname, kuid_t n_uname, const char *aname)
++ const char *uname, kuid_t n_uname,
++ const char *aname)
+ {
+ int err = 0;
+ struct p9_req_t *req;
+ struct p9_fid *fid;
+ struct p9_qid qid;
+
+-
+ p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
+ afid ? afid->fid : -1, uname, aname);
+ fid = p9_fid_create(clnt);
+@@ -1136,7 +1131,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
+ fid->uid = n_uname;
+
+ req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid,
+- afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
++ afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -1150,7 +1145,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
+- qid.type, (unsigned long long)qid.path, qid.version);
++ qid.type, qid.path, qid.version);
+
+ memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+
+@@ -1165,14 +1160,14 @@ error:
+ EXPORT_SYMBOL(p9_client_attach);
+
+ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
+- const unsigned char * const *wnames, int clone)
++ const unsigned char * const *wnames, int clone)
+ {
+ int err;
+ struct p9_client *clnt;
+ struct p9_fid *fid;
+ struct p9_qid *wqids;
+ struct p9_req_t *req;
+- uint16_t nwqids, count;
++ u16 nwqids, count;
+
+ err = 0;
+ wqids = NULL;
+@@ -1185,14 +1180,14 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
+ }
+
+ fid->uid = oldfid->uid;
+- } else
++ } else {
+ fid = oldfid;
+-
++ }
+
+ p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
+ oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
+ req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
+- nwname, wnames);
++ nwname, wnames);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -1215,9 +1210,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
+
+ for (count = 0; count < nwqids; count++)
+ p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n",
+- count, wqids[count].type,
+- (unsigned long long)wqids[count].path,
+- wqids[count].version);
++ count, wqids[count].type,
++ wqids[count].path,
++ wqids[count].version);
+
+ if (nwname)
+ memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
+@@ -1233,7 +1228,7 @@ clunk_fid:
+ fid = NULL;
+
+ error:
+- if (fid && (fid != oldfid))
++ if (fid && fid != oldfid)
+ p9_fid_destroy(fid);
+
+ return ERR_PTR(err);
+@@ -1250,7 +1245,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
+
+ clnt = fid->clnt;
+ p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
+- p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
++ p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
+ err = 0;
+
+ if (fid->mode != -1)
+@@ -1272,8 +1267,8 @@ int p9_client_open(struct p9_fid *fid, int mode)
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
+- p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type,
+- (unsigned long long)qid.path, qid.version, iounit);
++ p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type,
++ qid.path, qid.version, iounit);
+
+ memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+ fid->mode = mode;
+@@ -1286,8 +1281,8 @@ error:
+ }
+ EXPORT_SYMBOL(p9_client_open);
+
+-int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
+- kgid_t gid, struct p9_qid *qid)
++int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
++ u32 mode, kgid_t gid, struct p9_qid *qid)
+ {
+ int err = 0;
+ struct p9_client *clnt;
+@@ -1295,16 +1290,16 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
+ int iounit;
+
+ p9_debug(P9_DEBUG_9P,
+- ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
+- ofid->fid, name, flags, mode,
+- from_kgid(&init_user_ns, gid));
++ ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
++ ofid->fid, name, flags, mode,
++ from_kgid(&init_user_ns, gid));
+ clnt = ofid->clnt;
+
+ if (ofid->mode != -1)
+ return -EINVAL;
+
+ req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags,
+- mode, gid);
++ mode, gid);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -1317,12 +1312,10 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
+- qid->type,
+- (unsigned long long)qid->path,
+- qid->version, iounit);
++ qid->type, qid->path, qid->version, iounit);
+
+ memmove(&ofid->qid, qid, sizeof(struct p9_qid));
+- ofid->mode = mode;
++ ofid->mode = flags;
+ ofid->iounit = iounit;
+
+ free_and_error:
+@@ -1342,7 +1335,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
+ int iounit;
+
+ p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
+- fid->fid, name, perm, mode);
++ fid->fid, name, perm, mode);
+ err = 0;
+ clnt = fid->clnt;
+
+@@ -1350,7 +1343,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
+ return -EINVAL;
+
+ req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm,
+- mode, extension);
++ mode, extension);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -1363,9 +1356,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
+- qid.type,
+- (unsigned long long)qid.path,
+- qid.version, iounit);
++ qid.type, qid.path, qid.version, iounit);
+
+ memmove(&fid->qid, &qid, sizeof(struct p9_qid));
+ fid->mode = mode;
+@@ -1379,18 +1370,18 @@ error:
+ EXPORT_SYMBOL(p9_client_fcreate);
+
+ int p9_client_symlink(struct p9_fid *dfid, const char *name,
+- const char *symtgt, kgid_t gid, struct p9_qid *qid)
++ const char *symtgt, kgid_t gid, struct p9_qid *qid)
+ {
+ int err = 0;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n",
+- dfid->fid, name, symtgt);
++ dfid->fid, name, symtgt);
+ clnt = dfid->clnt;
+
+ req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt,
+- gid);
++ gid);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -1403,7 +1394,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
+- qid->type, (unsigned long long)qid->path, qid->version);
++ qid->type, qid->path, qid->version);
+
+ free_and_error:
+ p9_tag_remove(clnt, req);
+@@ -1418,10 +1409,10 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna
+ struct p9_req_t *req;
+
+ p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
+- dfid->fid, oldfid->fid, newname);
++ dfid->fid, oldfid->fid, newname);
+ clnt = dfid->clnt;
+ req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid,
+- newname);
++ newname);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+@@ -1438,7 +1429,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
+ struct p9_req_t *req;
+
+ p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
+- fid->fid, datasync);
++ fid->fid, datasync);
+ err = 0;
+ clnt = fid->clnt;
+
+@@ -1474,8 +1465,8 @@ int p9_client_clunk(struct p9_fid *fid)
+ return 0;
+
+ again:
+- p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid,
+- retries);
++ p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
++ fid->fid, retries);
+ err = 0;
+ clnt = fid->clnt;
+
+@@ -1489,16 +1480,16 @@ again:
+
+ p9_tag_remove(clnt, req);
+ error:
+- /*
+- * Fid is not valid even after a failed clunk
++ /* Fid is not valid even after a failed clunk
+ * If interrupted, retry once then give up and
+ * leak fid until umount.
+ */
+ if (err == -ERESTARTSYS) {
+ if (retries++ == 0)
+ goto again;
+- } else
++ } else {
+ p9_fid_destroy(fid);
++ }
+ return err;
+ }
+ EXPORT_SYMBOL(p9_client_clunk);
+@@ -1538,7 +1529,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags)
+ struct p9_client *clnt;
+
+ p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n",
+- dfid->fid, name, flags);
++ dfid->fid, name, flags);
+
+ clnt = dfid->clnt;
+ req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags);
+@@ -1584,8 +1575,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
+ char *dataptr;
+
+ *err = 0;
+- p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
+- fid->fid, (unsigned long long) offset, (int)iov_iter_count(to));
++ p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %zu\n",
++ fid->fid, offset, iov_iter_count(to));
+
+ rsize = fid->iounit;
+ if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
+@@ -1651,13 +1642,13 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ *err = 0;
+
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
+- fid->fid, (unsigned long long) offset,
+- iov_iter_count(from));
++ fid->fid, offset, iov_iter_count(from));
+
+ while (iov_iter_count(from)) {
+ int count = iov_iter_count(from);
+ int rsize = fid->iounit;
+- if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
++
++ if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
+ rsize = clnt->msize - P9_IOHDRSZ;
+
+ if (count < rsize)
+@@ -1670,7 +1661,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ fid->fid, offset, rsize);
+ } else {
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
+- offset, rsize, from);
++ offset, rsize, from);
+ }
+ if (IS_ERR(req)) {
+ *err = PTR_ERR(req);
+@@ -1703,12 +1694,13 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
+ {
+ int err;
+ struct p9_client *clnt;
+- struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL);
++ struct p9_wstat *ret;
+ struct p9_req_t *req;
+ u16 ignored;
+
+ p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);
+
++ ret = kmalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+@@ -1729,17 +1721,17 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
+ }
+
+ p9_debug(P9_DEBUG_9P,
+- "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+- "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+- "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+- "<<< uid=%d gid=%d n_muid=%d\n",
+- ret->size, ret->type, ret->dev, ret->qid.type,
+- (unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
+- ret->atime, ret->mtime, (unsigned long long)ret->length,
+- ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
+- from_kuid(&init_user_ns, ret->n_uid),
+- from_kgid(&init_user_ns, ret->n_gid),
+- from_kuid(&init_user_ns, ret->n_muid));
++ "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
++ "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
++ "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
++ "<<< uid=%d gid=%d n_muid=%d\n",
++ ret->size, ret->type, ret->dev, ret->qid.type, ret->qid.path,
++ ret->qid.version, ret->mode,
++ ret->atime, ret->mtime, ret->length,
++ ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
++ from_kuid(&init_user_ns, ret->n_uid),
++ from_kgid(&init_user_ns, ret->n_gid),
++ from_kuid(&init_user_ns, ret->n_muid));
+
+ p9_tag_remove(clnt, req);
+ return ret;
+@@ -1751,17 +1743,17 @@ error:
+ EXPORT_SYMBOL(p9_client_stat);
+
+ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
+- u64 request_mask)
++ u64 request_mask)
+ {
+ int err;
+ struct p9_client *clnt;
+- struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl),
+- GFP_KERNEL);
++ struct p9_stat_dotl *ret;
+ struct p9_req_t *req;
+
+ p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
+- fid->fid, request_mask);
++ fid->fid, request_mask);
+
++ ret = kmalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+@@ -1781,26 +1773,27 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
+ goto error;
+ }
+
+- p9_debug(P9_DEBUG_9P,
+- "<<< RGETATTR st_result_mask=%lld\n"
+- "<<< qid=%x.%llx.%x\n"
+- "<<< st_mode=%8.8x st_nlink=%llu\n"
+- "<<< st_uid=%d st_gid=%d\n"
+- "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
+- "<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
+- "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
+- "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
+- "<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
+- "<<< st_gen=%lld st_data_version=%lld\n",
+- ret->st_result_mask, ret->qid.type, ret->qid.path,
+- ret->qid.version, ret->st_mode, ret->st_nlink,
+- from_kuid(&init_user_ns, ret->st_uid),
+- from_kgid(&init_user_ns, ret->st_gid),
+- ret->st_rdev, ret->st_size, ret->st_blksize,
+- ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
+- ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
+- ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
+- ret->st_gen, ret->st_data_version);
++ p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n"
++ "<<< qid=%x.%llx.%x\n"
++ "<<< st_mode=%8.8x st_nlink=%llu\n"
++ "<<< st_uid=%d st_gid=%d\n"
++ "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
++ "<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
++ "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
++ "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
++ "<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
++ "<<< st_gen=%lld st_data_version=%lld\n",
++ ret->st_result_mask,
++ ret->qid.type, ret->qid.path, ret->qid.version,
++ ret->st_mode, ret->st_nlink,
++ from_kuid(&init_user_ns, ret->st_uid),
++ from_kgid(&init_user_ns, ret->st_gid),
++ ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks,
++ ret->st_atime_sec, ret->st_atime_nsec,
++ ret->st_mtime_sec, ret->st_mtime_nsec,
++ ret->st_ctime_sec, ret->st_ctime_nsec,
++ ret->st_btime_sec, ret->st_btime_nsec,
++ ret->st_gen, ret->st_data_version);
+
+ p9_tag_remove(clnt, req);
+ return ret;
+@@ -1819,7 +1812,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
+ /* size[2] type[2] dev[4] qid[13] */
+ /* mode[4] atime[4] mtime[4] length[8]*/
+ /* name[s] uid[s] gid[s] muid[s] */
+- ret = 2+4+13+4+4+4+8+2+2+2+2;
++ ret = 2 + 4 + 13 + 4 + 4 + 4 + 8 + 2 + 2 + 2 + 2;
+
+ if (wst->name)
+ ret += strlen(wst->name);
+@@ -1830,9 +1823,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
+ if (wst->muid)
+ ret += strlen(wst->muid);
+
+- if ((proto_version == p9_proto_2000u) ||
+- (proto_version == p9_proto_2000L)) {
+- ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
++ if (proto_version == p9_proto_2000u ||
++ proto_version == p9_proto_2000L) {
++ /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
++ ret += 2 + 4 + 4 + 4;
+ if (wst->extension)
+ ret += strlen(wst->extension);
+ }
+@@ -1849,21 +1843,23 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
+ err = 0;
+ clnt = fid->clnt;
+ wst->size = p9_client_statsize(wst, clnt->proto_version);
+- p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
++ p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n",
++ fid->fid);
+ p9_debug(P9_DEBUG_9P,
+- " sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
+- " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
+- " name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
+- " uid=%d gid=%d n_muid=%d\n",
+- wst->size, wst->type, wst->dev, wst->qid.type,
+- (unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
+- wst->atime, wst->mtime, (unsigned long long)wst->length,
+- wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
+- from_kuid(&init_user_ns, wst->n_uid),
+- from_kgid(&init_user_ns, wst->n_gid),
+- from_kuid(&init_user_ns, wst->n_muid));
+-
+- req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);
++ " sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
++ " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"
++ " name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
++ " uid=%d gid=%d n_muid=%d\n",
++ wst->size, wst->type, wst->dev, wst->qid.type,
++ wst->qid.path, wst->qid.version,
++ wst->mode, wst->atime, wst->mtime, wst->length,
++ wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
++ from_kuid(&init_user_ns, wst->n_uid),
++ from_kgid(&init_user_ns, wst->n_gid),
++ from_kuid(&init_user_ns, wst->n_muid));
++
++ req = p9_client_rpc(clnt, P9_TWSTAT, "dwS",
++ fid->fid, wst->size + 2, wst);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -1886,15 +1882,15 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
+ err = 0;
+ clnt = fid->clnt;
+ p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
+- p9_debug(P9_DEBUG_9P,
+- " valid=%x mode=%x uid=%d gid=%d size=%lld\n"
+- " atime_sec=%lld atime_nsec=%lld\n"
+- " mtime_sec=%lld mtime_nsec=%lld\n",
+- p9attr->valid, p9attr->mode,
+- from_kuid(&init_user_ns, p9attr->uid),
+- from_kgid(&init_user_ns, p9attr->gid),
+- p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
+- p9attr->mtime_sec, p9attr->mtime_nsec);
++ p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n",
++ p9attr->valid, p9attr->mode,
++ from_kuid(&init_user_ns, p9attr->uid),
++ from_kgid(&init_user_ns, p9attr->gid),
++ p9attr->size);
++ p9_debug(P9_DEBUG_9P, " atime_sec=%lld atime_nsec=%lld\n",
++ p9attr->atime_sec, p9attr->atime_nsec);
++ p9_debug(P9_DEBUG_9P, " mtime_sec=%lld mtime_nsec=%lld\n",
++ p9attr->mtime_sec, p9attr->mtime_nsec);
+
+ req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr);
+
+@@ -1935,12 +1931,10 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
+ goto error;
+ }
+
+- p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
+- "blocks %llu bfree %llu bavail %llu files %llu ffree %llu "
+- "fsid %llu namelen %ld\n",
+- fid->fid, (long unsigned int)sb->type, (long int)sb->bsize,
+- sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree,
+- sb->fsid, (long int)sb->namelen);
++ p9_debug(P9_DEBUG_9P,
++ "<<< RSTATFS fid %d type 0x%x bsize %u blocks %llu bfree %llu bavail %llu files %llu ffree %llu fsid %llu namelen %u\n",
++ fid->fid, sb->type, sb->bsize, sb->blocks, sb->bfree,
++ sb->bavail, sb->files, sb->ffree, sb->fsid, sb->namelen);
+
+ p9_tag_remove(clnt, req);
+ error:
+@@ -1959,10 +1953,10 @@ int p9_client_rename(struct p9_fid *fid,
+ clnt = fid->clnt;
+
+ p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
+- fid->fid, newdirfid->fid, name);
++ fid->fid, newdirfid->fid, name);
+
+ req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
+- newdirfid->fid, name);
++ newdirfid->fid, name);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -1986,9 +1980,9 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
+ err = 0;
+ clnt = olddirfid->clnt;
+
+- p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s"
+- " newdirfid %d new name %s\n", olddirfid->fid, old_name,
+- newdirfid->fid, new_name);
++ p9_debug(P9_DEBUG_9P,
++ ">>> TRENAMEAT olddirfid %d old name %s newdirfid %d new name %s\n",
++ olddirfid->fid, old_name, newdirfid->fid, new_name);
+
+ req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid,
+ old_name, newdirfid->fid, new_name);
+@@ -1998,7 +1992,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
+- newdirfid->fid, new_name);
++ newdirfid->fid, new_name);
+
+ p9_tag_remove(clnt, req);
+ error:
+@@ -2006,11 +2000,10 @@ error:
+ }
+ EXPORT_SYMBOL(p9_client_renameat);
+
+-/*
+- * An xattrwalk without @attr_name gives the fid for the lisxattr namespace
++/* An xattrwalk without @attr_name gives the fid for the lisxattr namespace
+ */
+ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
+- const char *attr_name, u64 *attr_size)
++ const char *attr_name, u64 *attr_size)
+ {
+ int err;
+ struct p9_req_t *req;
+@@ -2025,11 +2018,11 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
+ goto error;
+ }
+ p9_debug(P9_DEBUG_9P,
+- ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
+- file_fid->fid, attr_fid->fid, attr_name);
++ ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
++ file_fid->fid, attr_fid->fid, attr_name);
+
+ req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds",
+- file_fid->fid, attr_fid->fid, attr_name);
++ file_fid->fid, attr_fid->fid, attr_name);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -2042,13 +2035,13 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
+ }
+ p9_tag_remove(clnt, req);
+ p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n",
+- attr_fid->fid, *attr_size);
++ attr_fid->fid, *attr_size);
+ return attr_fid;
+ clunk_fid:
+ p9_client_clunk(attr_fid);
+ attr_fid = NULL;
+ error:
+- if (attr_fid && (attr_fid != file_fid))
++ if (attr_fid && attr_fid != file_fid)
+ p9_fid_destroy(attr_fid);
+
+ return ERR_PTR(err);
+@@ -2056,19 +2049,19 @@ error:
+ EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
+
+ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
+- u64 attr_size, int flags)
++ u64 attr_size, int flags)
+ {
+ int err;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+
+ p9_debug(P9_DEBUG_9P,
+- ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n",
+- fid->fid, name, (long long)attr_size, flags);
++ ">>> TXATTRCREATE fid %d name %s size %llu flag %d\n",
++ fid->fid, name, attr_size, flags);
+ err = 0;
+ clnt = fid->clnt;
+ req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
+- fid->fid, name, attr_size, flags);
++ fid->fid, name, attr_size, flags);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+@@ -2092,13 +2085,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
+ iov_iter_kvec(&to, READ, &kv, 1, count);
+
+ p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
+- fid->fid, (unsigned long long) offset, count);
++ fid->fid, offset, count);
+
+ err = 0;
+ clnt = fid->clnt;
+
+ rsize = fid->iounit;
+- if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ)
++ if (!rsize || rsize > clnt->msize - P9_READDIRHDRSZ)
+ rsize = clnt->msize - P9_READDIRHDRSZ;
+
+ if (count < rsize)
+@@ -2106,8 +2099,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
+
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && rsize > 1024) {
+- /*
+- * response header len is 11
++ /* response header len is 11
+ * PDU Header(7) + IO Size (4)
+ */
+ req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0,
+@@ -2148,7 +2140,7 @@ error:
+ EXPORT_SYMBOL(p9_client_readdir);
+
+ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
+- dev_t rdev, kgid_t gid, struct p9_qid *qid)
++ dev_t rdev, kgid_t gid, struct p9_qid *qid)
+ {
+ int err;
+ struct p9_client *clnt;
+@@ -2156,10 +2148,11 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
+
+ err = 0;
+ clnt = fid->clnt;
+- p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
+- "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
++ p9_debug(P9_DEBUG_9P,
++ ">>> TMKNOD fid %d name %s mode %d major %d minor %d\n",
++ fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
+ req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode,
+- MAJOR(rdev), MINOR(rdev), gid);
++ MAJOR(rdev), MINOR(rdev), gid);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+@@ -2168,18 +2161,17 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
+ trace_9p_protocol_dump(clnt, &req->rc);
+ goto error;
+ }
+- p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
+- (unsigned long long)qid->path, qid->version);
++ p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n",
++ qid->type, qid->path, qid->version);
+
+ error:
+ p9_tag_remove(clnt, req);
+ return err;
+-
+ }
+ EXPORT_SYMBOL(p9_client_mknod_dotl);
+
+ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
+- kgid_t gid, struct p9_qid *qid)
++ kgid_t gid, struct p9_qid *qid)
+ {
+ int err;
+ struct p9_client *clnt;
+@@ -2189,8 +2181,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
+ clnt = fid->clnt;
+ p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
+ fid->fid, name, mode, from_kgid(&init_user_ns, gid));
+- req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode,
+- gid);
++ req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg",
++ fid->fid, name, mode, gid);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+@@ -2200,12 +2192,11 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
+ goto error;
+ }
+ p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
+- (unsigned long long)qid->path, qid->version);
++ qid->path, qid->version);
+
+ error:
+ p9_tag_remove(clnt, req);
+ return err;
+-
+ }
+ EXPORT_SYMBOL(p9_client_mkdir_dotl);
+
+@@ -2217,14 +2208,14 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
+
+ err = 0;
+ clnt = fid->clnt;
+- p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
+- "start %lld length %lld proc_id %d client_id %s\n",
+- fid->fid, flock->type, flock->flags, flock->start,
+- flock->length, flock->proc_id, flock->client_id);
++ p9_debug(P9_DEBUG_9P,
++ ">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n",
++ fid->fid, flock->type, flock->flags, flock->start,
++ flock->length, flock->proc_id, flock->client_id);
+
+ req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
+- flock->flags, flock->start, flock->length,
+- flock->proc_id, flock->client_id);
++ flock->flags, flock->start, flock->length,
++ flock->proc_id, flock->client_id);
+
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+@@ -2238,7 +2229,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
+ error:
+ p9_tag_remove(clnt, req);
+ return err;
+-
+ }
+ EXPORT_SYMBOL(p9_client_lock_dotl);
+
+@@ -2250,12 +2240,14 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
+
+ err = 0;
+ clnt = fid->clnt;
+- p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
+- "length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
+- glock->start, glock->length, glock->proc_id, glock->client_id);
++ p9_debug(P9_DEBUG_9P,
++ ">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n",
++ fid->fid, glock->type, glock->start, glock->length,
++ glock->proc_id, glock->client_id);
+
+- req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type,
+- glock->start, glock->length, glock->proc_id, glock->client_id);
++ req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid,
++ glock->type, glock->start, glock->length,
++ glock->proc_id, glock->client_id);
+
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+@@ -2267,9 +2259,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
+ trace_9p_protocol_dump(clnt, &req->rc);
+ goto error;
+ }
+- p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
+- "proc_id %d client_id %s\n", glock->type, glock->start,
+- glock->length, glock->proc_id, glock->client_id);
++ p9_debug(P9_DEBUG_9P,
++ "<<< RGETLOCK type %i start %lld length %lld proc_id %d client_id %s\n",
++ glock->type, glock->start, glock->length,
++ glock->proc_id, glock->client_id);
+ error:
+ p9_tag_remove(clnt, req);
+ return err;
+diff --git a/net/9p/error.c b/net/9p/error.c
+index 61c18daf3050a..ff935746754e0 100644
+--- a/net/9p/error.c
++++ b/net/9p/error.c
+@@ -185,7 +185,7 @@ int p9_error_init(void)
+ INIT_HLIST_HEAD(&hash_errmap[bucket]);
+
+ /* load initial error map into hash table */
+- for (c = errmap; c->name != NULL; c++) {
++ for (c = errmap; c->name; c++) {
+ c->namelen = strlen(c->name);
+ bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
+ INIT_HLIST_NODE(&c->list);
+diff --git a/net/9p/mod.c b/net/9p/mod.c
+index 5126566850bd0..535cf016633c7 100644
+--- a/net/9p/mod.c
++++ b/net/9p/mod.c
+@@ -24,13 +24,13 @@
+ #include <linux/spinlock.h>
+
+ #ifdef CONFIG_NET_9P_DEBUG
+-unsigned int p9_debug_level = 0; /* feature-rific global debug level */
++unsigned int p9_debug_level; /* feature-rific global debug level */
+ EXPORT_SYMBOL(p9_debug_level);
+ module_param_named(debug, p9_debug_level, uint, 0);
+ MODULE_PARM_DESC(debug, "9P debugging level");
+
+ void _p9_debug(enum p9_debug_flags level, const char *func,
+- const char *fmt, ...)
++ const char *fmt, ...)
+ {
+ struct va_format vaf;
+ va_list args;
+@@ -53,10 +53,7 @@ void _p9_debug(enum p9_debug_flags level, const char *func,
+ EXPORT_SYMBOL(_p9_debug);
+ #endif
+
+-/*
+- * Dynamic Transport Registration Routines
+- *
+- */
++/* Dynamic Transport Registration Routines */
+
+ static DEFINE_SPINLOCK(v9fs_trans_lock);
+ static LIST_HEAD(v9fs_trans_list);
+diff --git a/net/9p/protocol.c b/net/9p/protocol.c
+index 03593eb240d87..59eb71f357fa7 100644
+--- a/net/9p/protocol.c
++++ b/net/9p/protocol.c
+@@ -46,6 +46,7 @@ EXPORT_SYMBOL(p9stat_free);
+ size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
+ {
+ size_t len = min(pdu->size - pdu->offset, size);
++
+ memcpy(data, &pdu->sdata[pdu->offset], len);
+ pdu->offset += len;
+ return size - len;
+@@ -54,6 +55,7 @@ size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
+ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
+ {
+ size_t len = min(pdu->capacity - pdu->size, size);
++
+ memcpy(&pdu->sdata[pdu->size], data, len);
+ pdu->size += len;
+ return size - len;
+@@ -64,6 +66,7 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size)
+ {
+ size_t len = min(pdu->capacity - pdu->size, size);
+ struct iov_iter i = *from;
++
+ if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i))
+ len = 0;
+
+@@ -71,26 +74,25 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size)
+ return size - len;
+ }
+
+-/*
+- b - int8_t
+- w - int16_t
+- d - int32_t
+- q - int64_t
+- s - string
+- u - numeric uid
+- g - numeric gid
+- S - stat
+- Q - qid
+- D - data blob (int32_t size followed by void *, results are not freed)
+- T - array of strings (int16_t count, followed by strings)
+- R - array of qids (int16_t count, followed by qids)
+- A - stat for 9p2000.L (p9_stat_dotl)
+- ? - if optional = 1, continue parsing
+-*/
++/* b - int8_t
++ * w - int16_t
++ * d - int32_t
++ * q - int64_t
++ * s - string
++ * u - numeric uid
++ * g - numeric gid
++ * S - stat
++ * Q - qid
++ * D - data blob (int32_t size followed by void *, results are not freed)
++ * T - array of strings (int16_t count, followed by strings)
++ * R - array of qids (int16_t count, followed by qids)
++ * A - stat for 9p2000.L (p9_stat_dotl)
++ * ? - if optional = 1, continue parsing
++ */
+
+ static int
+ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
+- va_list ap)
++ va_list ap)
+ {
+ const char *ptr;
+ int errcode = 0;
+diff --git a/net/9p/protocol.h b/net/9p/protocol.h
+index 6835f91cfda59..4a2f686510371 100644
+--- a/net/9p/protocol.h
++++ b/net/9p/protocol.h
+@@ -11,7 +11,7 @@
+ */
+
+ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+- va_list ap);
++ va_list ap);
+ int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
+ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
+ int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu);
+diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
+index c43babb3f6354..65c094c321a29 100644
+--- a/net/9p/trans_common.h
++++ b/net/9p/trans_common.h
+@@ -12,4 +12,4 @@
+ *
+ */
+
+-void p9_release_pages(struct page **, int);
++void p9_release_pages(struct page **pages, int nr_pages);
+diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
+index 007bbcc68010b..f359cfdc1858f 100644
+--- a/net/9p/trans_fd.c
++++ b/net/9p/trans_fd.c
+@@ -93,6 +93,7 @@ struct p9_poll_wait {
+ * @mux_list: list link for mux to manage multiple connections (?)
+ * @client: reference to client instance for this connection
+ * @err: error state
++ * @req_lock: lock protecting req_list and requests statuses
+ * @req_list: accounting for requests which have been sent
+ * @unsent_req_list: accounting for requests that haven't been sent
+ * @rreq: read request
+@@ -116,11 +117,12 @@ struct p9_conn {
+ struct list_head mux_list;
+ struct p9_client *client;
+ int err;
++ spinlock_t req_lock;
+ struct list_head req_list;
+ struct list_head unsent_req_list;
+ struct p9_req_t *rreq;
+ struct p9_req_t *wreq;
+- char tmp_buf[7];
++ char tmp_buf[P9_HDRSZ];
+ struct p9_fcall rc;
+ int wpos;
+ int wsize;
+@@ -191,10 +193,10 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
+
+ p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
+
+- spin_lock(&m->client->lock);
++ spin_lock(&m->req_lock);
+
+ if (m->err) {
+- spin_unlock(&m->client->lock);
++ spin_unlock(&m->req_lock);
+ return;
+ }
+
+@@ -202,11 +204,15 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
+
+ list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+ list_move(&req->req_list, &cancel_list);
++ req->status = REQ_STATUS_ERROR;
+ }
+ list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
+ list_move(&req->req_list, &cancel_list);
++ req->status = REQ_STATUS_ERROR;
+ }
+
++ spin_unlock(&m->req_lock);
++
+ list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
+ p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
+ list_del(&req->req_list);
+@@ -214,7 +220,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
+ req->t_err = err;
+ p9_client_cb(m->client, req, REQ_STATUS_ERROR);
+ }
+- spin_unlock(&m->client->lock);
+ }
+
+ static __poll_t
+@@ -290,7 +295,7 @@ static void p9_read_work(struct work_struct *work)
+ if (!m->rc.sdata) {
+ m->rc.sdata = m->tmp_buf;
+ m->rc.offset = 0;
+- m->rc.capacity = 7; /* start by reading header */
++ m->rc.capacity = P9_HDRSZ; /* start by reading header */
+ }
+
+ clear_bit(Rpending, &m->wsched);
+@@ -313,7 +318,7 @@ static void p9_read_work(struct work_struct *work)
+ p9_debug(P9_DEBUG_TRANS, "got new header\n");
+
+ /* Header size */
+- m->rc.size = 7;
++ m->rc.size = P9_HDRSZ;
+ err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
+ if (err) {
+ p9_debug(P9_DEBUG_ERROR,
+@@ -345,6 +350,7 @@ static void p9_read_work(struct work_struct *work)
+ p9_debug(P9_DEBUG_ERROR,
+ "No recv fcall for tag %d (req %p), disconnecting!\n",
+ m->rc.tag, m->rreq);
++ p9_req_put(m->client, m->rreq);
+ m->rreq = NULL;
+ err = -EIO;
+ goto error;
+@@ -360,7 +366,7 @@ static void p9_read_work(struct work_struct *work)
+ if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
+ p9_debug(P9_DEBUG_TRANS, "got new packet\n");
+ m->rreq->rc.size = m->rc.offset;
+- spin_lock(&m->client->lock);
++ spin_lock(&m->req_lock);
+ if (m->rreq->status == REQ_STATUS_SENT) {
+ list_del(&m->rreq->req_list);
+ p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+@@ -369,18 +375,18 @@ static void p9_read_work(struct work_struct *work)
+ p9_debug(P9_DEBUG_TRANS,
+ "Ignore replies associated with a cancelled request\n");
+ } else {
+- spin_unlock(&m->client->lock);
++ spin_unlock(&m->req_lock);
+ p9_debug(P9_DEBUG_ERROR,
+ "Request tag %d errored out while we were reading the reply\n",
+ m->rc.tag);
+ err = -EIO;
+ goto error;
+ }
+- spin_unlock(&m->client->lock);
++ spin_unlock(&m->req_lock);
+ m->rc.sdata = NULL;
+ m->rc.offset = 0;
+ m->rc.capacity = 0;
+- p9_req_put(m->rreq);
++ p9_req_put(m->client, m->rreq);
+ m->rreq = NULL;
+ }
+
+@@ -454,10 +460,10 @@ static void p9_write_work(struct work_struct *work)
+ }
+
+ if (!m->wsize) {
+- spin_lock(&m->client->lock);
++ spin_lock(&m->req_lock);
+ if (list_empty(&m->unsent_req_list)) {
+ clear_bit(Wworksched, &m->wsched);
+- spin_unlock(&m->client->lock);
++ spin_unlock(&m->req_lock);
+ return;
+ }
+
+@@ -472,7 +478,7 @@ static void p9_write_work(struct work_struct *work)
+ m->wpos = 0;
+ p9_req_get(req);
+ m->wreq = req;
+- spin_unlock(&m->client->lock);
++ spin_unlock(&m->req_lock);
+ }
+
+ p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
+@@ -494,7 +500,7 @@ static void p9_write_work(struct work_struct *work)
+ m->wpos += err;
+ if (m->wpos == m->wsize) {
+ m->wpos = m->wsize = 0;
+- p9_req_put(m->wreq);
++ p9_req_put(m->client, m->wreq);
+ m->wreq = NULL;
+ }
+
+@@ -589,6 +595,7 @@ static void p9_conn_create(struct p9_client *client)
+ INIT_LIST_HEAD(&m->mux_list);
+ m->client = client;
+
++ spin_lock_init(&m->req_lock);
+ INIT_LIST_HEAD(&m->req_list);
+ INIT_LIST_HEAD(&m->unsent_req_list);
+ INIT_WORK(&m->rq, p9_read_work);
+@@ -670,10 +677,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
+ if (m->err < 0)
+ return m->err;
+
+- spin_lock(&client->lock);
++ spin_lock(&m->req_lock);
+ req->status = REQ_STATUS_UNSENT;
+ list_add_tail(&req->req_list, &m->unsent_req_list);
+- spin_unlock(&client->lock);
++ spin_unlock(&m->req_lock);
+
+ if (test_and_clear_bit(Wpending, &m->wsched))
+ n = EPOLLOUT;
+@@ -688,33 +695,38 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
+
+ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
+ {
++ struct p9_trans_fd *ts = client->trans;
++ struct p9_conn *m = &ts->conn;
+ int ret = 1;
+
+ p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+
+- spin_lock(&client->lock);
++ spin_lock(&m->req_lock);
+
+ if (req->status == REQ_STATUS_UNSENT) {
+ list_del(&req->req_list);
+ req->status = REQ_STATUS_FLSHD;
+- p9_req_put(req);
++ p9_req_put(client, req);
+ ret = 0;
+ }
+- spin_unlock(&client->lock);
++ spin_unlock(&m->req_lock);
+
+ return ret;
+ }
+
+ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
+ {
++ struct p9_trans_fd *ts = client->trans;
++ struct p9_conn *m = &ts->conn;
++
+ p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+
+- spin_lock(&client->lock);
++ spin_lock(&m->req_lock);
+ /* Ignore cancelled request if message has been received
+ * before lock.
+ */
+ if (req->status == REQ_STATUS_RCVD) {
+- spin_unlock(&client->lock);
++ spin_unlock(&m->req_lock);
+ return 0;
+ }
+
+@@ -723,8 +735,9 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
+ */
+ list_del(&req->req_list);
+ req->status = REQ_STATUS_FLSHD;
+- spin_unlock(&client->lock);
+- p9_req_put(req);
++ spin_unlock(&m->req_lock);
++
++ p9_req_put(client, req);
+
+ return 0;
+ }
+@@ -822,11 +835,14 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
+ goto out_free_ts;
+ if (!(ts->rd->f_mode & FMODE_READ))
+ goto out_put_rd;
++ /* prevent workers from hanging on IO when fd is a pipe */
++ ts->rd->f_flags |= O_NONBLOCK;
+ ts->wr = fget(wfd);
+ if (!ts->wr)
+ goto out_put_rd;
+ if (!(ts->wr->f_mode & FMODE_WRITE))
+ goto out_put_wr;
++ ts->wr->f_flags |= O_NONBLOCK;
+
+ client->trans = ts;
+ client->status = Connected;
+@@ -848,8 +864,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
+ struct file *file;
+
+ p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
+- if (!p)
++ if (!p) {
++ sock_release(csocket);
+ return -ENOMEM;
++ }
+
+ csocket->sk->sk_allocation = GFP_NOIO;
+ file = sock_alloc_file(csocket, 0, NULL);
+@@ -885,12 +903,12 @@ static void p9_conn_destroy(struct p9_conn *m)
+ p9_mux_poll_stop(m);
+ cancel_work_sync(&m->rq);
+ if (m->rreq) {
+- p9_req_put(m->rreq);
++ p9_req_put(m->client, m->rreq);
+ m->rreq = NULL;
+ }
+ cancel_work_sync(&m->wq);
+ if (m->wreq) {
+- p9_req_put(m->wreq);
++ p9_req_put(m->client, m->wreq);
+ m->wreq = NULL;
+ }
+
+diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
+index af0a8a6cd3fd8..e5bfe8d7ef449 100644
+--- a/net/9p/trans_rdma.c
++++ b/net/9p/trans_rdma.c
+@@ -352,7 +352,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
+ c->busa, c->req->tc.size,
+ DMA_TO_DEVICE);
+ up(&rdma->sq_sem);
+- p9_req_put(c->req);
++ p9_req_put(client, c->req);
+ kfree(c);
+ }
+
+@@ -388,6 +388,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
+ struct p9_trans_rdma *rdma = client->trans;
+ struct ib_recv_wr wr;
+ struct ib_sge sge;
++ int ret;
+
+ c->busa = ib_dma_map_single(rdma->cm_id->device,
+ c->rc.sdata, client->msize,
+@@ -405,7 +406,12 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
+ wr.wr_cqe = &c->cqe;
+ wr.sg_list = &sge;
+ wr.num_sge = 1;
+- return ib_post_recv(rdma->qp, &wr, NULL);
++
++ ret = ib_post_recv(rdma->qp, &wr, NULL);
++ if (ret)
++ ib_dma_unmap_single(rdma->cm_id->device, c->busa,
++ client->msize, DMA_FROM_DEVICE);
++ return ret;
+
+ error:
+ p9_debug(P9_DEBUG_ERROR, "EIO\n");
+@@ -502,7 +508,7 @@ dont_need_post_recv:
+
+ if (down_interruptible(&rdma->sq_sem)) {
+ err = -EINTR;
+- goto send_error;
++ goto dma_unmap;
+ }
+
+ /* Mark request as `sent' *before* we actually send it,
+@@ -512,11 +518,14 @@ dont_need_post_recv:
+ req->status = REQ_STATUS_SENT;
+ err = ib_post_send(rdma->qp, &wr, NULL);
+ if (err)
+- goto send_error;
++ goto dma_unmap;
+
+ /* Success */
+ return 0;
+
++dma_unmap:
++ ib_dma_unmap_single(rdma->cm_id->device, c->busa,
++ c->req->tc.size, DMA_TO_DEVICE);
+ /* Handle errors that happened during or while preparing the send: */
+ send_error:
+ req->status = REQ_STATUS_ERROR;
+diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
+index 490a4c9003395..96eecc2dcaa36 100644
+--- a/net/9p/trans_virtio.c
++++ b/net/9p/trans_virtio.c
+@@ -199,7 +199,7 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
+ /* Reply won't come, so drop req ref */
+ static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req)
+ {
+- p9_req_put(req);
++ p9_req_put(client, req);
+ return 0;
+ }
+
+@@ -399,7 +399,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
+ struct page **in_pages = NULL, **out_pages = NULL;
+ struct virtio_chan *chan = client->trans;
+ struct scatterlist *sgs[4];
+- size_t offs;
++ size_t offs = 0;
+ int need_drop = 0;
+ int kicked = 0;
+
+@@ -523,7 +523,7 @@ err_out:
+ kvfree(out_pages);
+ if (!kicked) {
+ /* reply won't come */
+- p9_req_put(req);
++ p9_req_put(client, req);
+ }
+ return err;
+ }
+diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
+index 3ec1a51a6944e..99e6b2483311c 100644
+--- a/net/9p/trans_xen.c
++++ b/net/9p/trans_xen.c
+@@ -186,7 +186,7 @@ again:
+ ring->intf->out_prod = prod;
+ spin_unlock_irqrestore(&ring->lock, flags);
+ notify_remote_via_irq(ring->irq);
+- p9_req_put(p9_req);
++ p9_req_put(client, p9_req);
+
+ return 0;
+ }
+@@ -231,6 +231,14 @@ static void p9_xen_response(struct work_struct *work)
+ continue;
+ }
+
++ if (h.size > req->rc.capacity) {
++ dev_warn(&priv->dev->dev,
++ "requested packet size too big: %d for tag %d with capacity %zd\n",
++ h.size, h.tag, req->rc.capacity);
++ req->status = REQ_STATUS_ERROR;
++ goto recv_error;
++ }
++
+ memcpy(&req->rc, &h, sizeof(h));
+ req->rc.offset = 0;
+
+@@ -240,6 +248,7 @@ static void p9_xen_response(struct work_struct *work)
+ masked_prod, &masked_cons,
+ XEN_9PFS_RING_SIZE(ring));
+
++recv_error:
+ virt_mb();
+ cons += h.size;
+ ring->intf->in_cons = cons;
+@@ -291,6 +300,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
+ write_unlock(&xen_9pfs_lock);
+
+ for (i = 0; i < priv->num_rings; i++) {
++ struct xen_9pfs_dataring *ring = &priv->rings[i];
++
++ cancel_work_sync(&ring->work);
++
+ if (!priv->rings[i].intf)
+ break;
+ if (priv->rings[i].irq > 0)
+@@ -304,9 +317,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
+ ref = priv->rings[i].intf->ref[j];
+ gnttab_end_foreign_access(ref, 0, 0);
+ }
+- free_pages((unsigned long)priv->rings[i].data.in,
+- priv->rings[i].intf->ring_order -
+- (PAGE_SHIFT - XEN_PAGE_SHIFT));
++ free_pages_exact(priv->rings[i].data.in,
++ 1UL << (priv->rings[i].intf->ring_order +
++ XEN_PAGE_SHIFT));
+ }
+ gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
+ free_page((unsigned long)priv->rings[i].intf);
+@@ -345,8 +358,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
+ if (ret < 0)
+ goto out;
+ ring->ref = ret;
+- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+- order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
++ bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT),
++ GFP_KERNEL | __GFP_ZERO);
+ if (!bytes) {
+ ret = -ENOMEM;
+ goto out;
+@@ -377,28 +390,31 @@ out:
+ if (bytes) {
+ for (i--; i >= 0; i--)
+ gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
+- free_pages((unsigned long)bytes,
+- ring->intf->ring_order -
+- (PAGE_SHIFT - XEN_PAGE_SHIFT));
++ free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT));
+ }
+ gnttab_end_foreign_access(ring->ref, 0, 0);
+ free_page((unsigned long)ring->intf);
+ return ret;
+ }
+
+-static int xen_9pfs_front_probe(struct xenbus_device *dev,
+- const struct xenbus_device_id *id)
++static int xen_9pfs_front_init(struct xenbus_device *dev)
+ {
+ int ret, i;
+ struct xenbus_transaction xbt;
+- struct xen_9pfs_front_priv *priv = NULL;
+- char *versions;
++ struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
++ char *versions, *v;
+ unsigned int max_rings, max_ring_order, len = 0;
+
+ versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
+ if (IS_ERR(versions))
+ return PTR_ERR(versions);
+- if (strcmp(versions, "1")) {
++ for (v = versions; *v; v++) {
++ if (simple_strtoul(v, &v, 10) == 1) {
++ v = NULL;
++ break;
++ }
++ }
++ if (v) {
+ kfree(versions);
+ return -EINVAL;
+ }
+@@ -413,11 +429,6 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
+ if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order))
+ p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2;
+
+- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+- if (!priv)
+- return -ENOMEM;
+-
+- priv->dev = dev;
+ priv->num_rings = XEN_9PFS_NUM_RINGS;
+ priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings),
+ GFP_KERNEL);
+@@ -476,23 +487,35 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
+ goto error;
+ }
+
+- write_lock(&xen_9pfs_lock);
+- list_add_tail(&priv->list, &xen_9pfs_devs);
+- write_unlock(&xen_9pfs_lock);
+- dev_set_drvdata(&dev->dev, priv);
+- xenbus_switch_state(dev, XenbusStateInitialised);
+-
+ return 0;
+
+ error_xenbus:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, ret, "writing xenstore");
+ error:
+- dev_set_drvdata(&dev->dev, NULL);
+ xen_9pfs_front_free(priv);
+ return ret;
+ }
+
++static int xen_9pfs_front_probe(struct xenbus_device *dev,
++ const struct xenbus_device_id *id)
++{
++ struct xen_9pfs_front_priv *priv = NULL;
++
++ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
++ if (!priv)
++ return -ENOMEM;
++
++ priv->dev = dev;
++ dev_set_drvdata(&dev->dev, priv);
++
++ write_lock(&xen_9pfs_lock);
++ list_add_tail(&priv->list, &xen_9pfs_devs);
++ write_unlock(&xen_9pfs_lock);
++
++ return 0;
++}
++
+ static int xen_9pfs_front_resume(struct xenbus_device *dev)
+ {
+ dev_warn(&dev->dev, "suspend/resume unsupported\n");
+@@ -511,6 +534,8 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev,
+ break;
+
+ case XenbusStateInitWait:
++ if (!xen_9pfs_front_init(dev))
++ xenbus_switch_state(dev, XenbusStateInitialised);
+ break;
+
+ case XenbusStateConnected:
+diff --git a/net/Kconfig b/net/Kconfig
+index fb13460c6dab3..76a3385943e50 100644
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -204,7 +204,6 @@ config BRIDGE_NETFILTER
+ source "net/netfilter/Kconfig"
+ source "net/ipv4/netfilter/Kconfig"
+ source "net/ipv6/netfilter/Kconfig"
+-source "net/decnet/netfilter/Kconfig"
+ source "net/bridge/netfilter/Kconfig"
+
+ endif
+@@ -221,7 +220,6 @@ source "net/802/Kconfig"
+ source "net/bridge/Kconfig"
+ source "net/dsa/Kconfig"
+ source "net/8021q/Kconfig"
+-source "net/decnet/Kconfig"
+ source "net/llc/Kconfig"
+ source "drivers/net/appletalk/Kconfig"
+ source "net/x25/Kconfig"
+diff --git a/net/Makefile b/net/Makefile
+index fbfeb8a0bb379..6a62e5b273781 100644
+--- a/net/Makefile
++++ b/net/Makefile
+@@ -38,7 +38,6 @@ obj-$(CONFIG_AF_KCM) += kcm/
+ obj-$(CONFIG_STREAM_PARSER) += strparser/
+ obj-$(CONFIG_ATM) += atm/
+ obj-$(CONFIG_L2TP) += l2tp/
+-obj-$(CONFIG_DECNET) += decnet/
+ obj-$(CONFIG_PHONET) += phonet/
+ ifneq ($(CONFIG_VLAN_8021Q),)
+ obj-y += 8021q/
+diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
+index 829db9eba0cb9..aaf64b9539150 100644
+--- a/net/atm/mpoa_proc.c
++++ b/net/atm/mpoa_proc.c
+@@ -219,11 +219,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
+ if (!page)
+ return -ENOMEM;
+
+- for (p = page, len = 0; len < nbytes; p++, len++) {
++ for (p = page, len = 0; len < nbytes; p++) {
+ if (get_user(*p, buff++)) {
+ free_page((unsigned long)page);
+ return -EFAULT;
+ }
++ len += 1;
+ if (*p == '\0' || *p == '\n')
+ break;
+ }
+diff --git a/net/atm/resources.c b/net/atm/resources.c
+index 2b2d33eeaf200..995d29e7fb138 100644
+--- a/net/atm/resources.c
++++ b/net/atm/resources.c
+@@ -400,6 +400,7 @@ done:
+ return error;
+ }
+
++#ifdef CONFIG_PROC_FS
+ void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+ mutex_lock(&atm_dev_mutex);
+@@ -415,3 +416,4 @@ void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ return seq_list_next(v, &atm_devs, pos);
+ }
++#endif
+diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
+index 2631efc6e359f..f99ed1eddf5e5 100644
+--- a/net/ax25/af_ax25.c
++++ b/net/ax25/af_ax25.c
+@@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev)
+ {
+ ax25_dev *ax25_dev;
+ ax25_cb *s;
++ struct sock *sk;
+
+ if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+ return;
+@@ -85,11 +86,26 @@ static void ax25_kill_by_device(struct net_device *dev)
+ again:
+ ax25_for_each(s, &ax25_list) {
+ if (s->ax25_dev == ax25_dev) {
+- s->ax25_dev = NULL;
++ sk = s->sk;
++ if (!sk) {
++ spin_unlock_bh(&ax25_list_lock);
++ ax25_disconnect(s, ENETUNREACH);
++ s->ax25_dev = NULL;
++ spin_lock_bh(&ax25_list_lock);
++ goto again;
++ }
++ sock_hold(sk);
+ spin_unlock_bh(&ax25_list_lock);
++ lock_sock(sk);
+ ax25_disconnect(s, ENETUNREACH);
++ s->ax25_dev = NULL;
++ if (sk->sk_socket) {
++ dev_put(ax25_dev->dev);
++ ax25_dev_put(ax25_dev);
++ }
++ release_sock(sk);
+ spin_lock_bh(&ax25_list_lock);
+-
++ sock_put(sk);
+ /* The entry could have been deleted from the
+ * list meanwhile and thus the next pointer is
+ * no longer valid. Play it safe and restart
+@@ -353,21 +369,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
+ if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
+ return -EFAULT;
+
+- if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
+- return -ENODEV;
+-
+ if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
+ return -EINVAL;
+
+ if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
+ return -EINVAL;
+
++ ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
++ if (!ax25_dev)
++ return -ENODEV;
++
+ digi.ndigi = ax25_ctl.digi_count;
+ for (k = 0; k < digi.ndigi; k++)
+ digi.calls[k] = ax25_ctl.digi_addr[k];
+
+- if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
++ ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
++ if (!ax25) {
++ ax25_dev_put(ax25_dev);
+ return -ENOTCONN;
++ }
+
+ switch (ax25_ctl.cmd) {
+ case AX25_KILL:
+@@ -434,6 +454,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
+ }
+
+ out_put:
++ ax25_dev_put(ax25_dev);
+ ax25_cb_put(ax25);
+ return ret;
+
+@@ -534,7 +555,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
+ ax25_cb *ax25;
+ struct net_device *dev;
+ char devname[IFNAMSIZ];
+- unsigned long opt;
++ unsigned int opt;
+ int res = 0;
+
+ if (level != SOL_AX25)
+@@ -566,7 +587,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case AX25_T1:
+- if (opt < 1 || opt > ULONG_MAX / HZ) {
++ if (opt < 1 || opt > UINT_MAX / HZ) {
+ res = -EINVAL;
+ break;
+ }
+@@ -575,7 +596,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case AX25_T2:
+- if (opt < 1 || opt > ULONG_MAX / HZ) {
++ if (opt < 1 || opt > UINT_MAX / HZ) {
+ res = -EINVAL;
+ break;
+ }
+@@ -591,7 +612,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case AX25_T3:
+- if (opt < 1 || opt > ULONG_MAX / HZ) {
++ if (opt < 1 || opt > UINT_MAX / HZ) {
+ res = -EINVAL;
+ break;
+ }
+@@ -599,7 +620,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case AX25_IDLE:
+- if (opt > ULONG_MAX / (60 * HZ)) {
++ if (opt > UINT_MAX / (60 * HZ)) {
+ res = -EINVAL;
+ break;
+ }
+@@ -960,14 +981,16 @@ static int ax25_release(struct socket *sock)
+ {
+ struct sock *sk = sock->sk;
+ ax25_cb *ax25;
++ ax25_dev *ax25_dev;
+
+ if (sk == NULL)
+ return 0;
+
+ sock_hold(sk);
+- sock_orphan(sk);
+ lock_sock(sk);
++ sock_orphan(sk);
+ ax25 = sk_to_ax25(sk);
++ ax25_dev = ax25->ax25_dev;
+
+ if (sk->sk_type == SOCK_SEQPACKET) {
+ switch (ax25->state) {
+@@ -1029,6 +1052,15 @@ static int ax25_release(struct socket *sock)
+ sk->sk_state_change(sk);
+ ax25_destroy_socket(ax25);
+ }
++ if (ax25_dev) {
++ del_timer_sync(&ax25->timer);
++ del_timer_sync(&ax25->t1timer);
++ del_timer_sync(&ax25->t2timer);
++ del_timer_sync(&ax25->t3timer);
++ del_timer_sync(&ax25->idletimer);
++ dev_put(ax25_dev->dev);
++ ax25_dev_put(ax25_dev);
++ }
+
+ sock->sk = NULL;
+ release_sock(sk);
+@@ -1105,8 +1137,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ }
+ }
+
+- if (ax25_dev != NULL)
++ if (ax25_dev) {
+ ax25_fillin_cb(ax25, ax25_dev);
++ dev_hold(ax25_dev->dev);
++ }
+
+ done:
+ ax25_cb_add(ax25);
+@@ -1620,9 +1654,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
+ {
+ struct sock *sk = sock->sk;
+- struct sk_buff *skb;
++ struct sk_buff *skb, *last;
++ struct sk_buff_head *sk_queue;
+ int copied;
+ int err = 0;
++ int off = 0;
++ long timeo;
+
+ lock_sock(sk);
+ /*
+@@ -1634,11 +1671,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ goto out;
+ }
+
+- /* Now we can treat all alike */
+- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+- flags & MSG_DONTWAIT, &err);
+- if (skb == NULL)
+- goto out;
++ /* We need support for non-blocking reads. */
++ sk_queue = &sk->sk_receive_queue;
++ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
++ /* If no packet is available, release_sock(sk) and try again. */
++ if (!skb) {
++ if (err != -EAGAIN)
++ goto out;
++ release_sock(sk);
++ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
++ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
++ &timeo, last)) {
++ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
++ &err, &last);
++ if (skb)
++ break;
++
++ if (err != -EAGAIN)
++ goto done;
++ }
++ if (!skb)
++ goto done;
++ lock_sock(sk);
++ }
+
+ if (!sk_to_ax25(sk)->pidincl)
+ skb_pull(skb, 1); /* Remove PID */
+@@ -1685,6 +1740,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ out:
+ release_sock(sk);
+
++done:
+ return err;
+ }
+
+diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
+index 4ac2e0847652a..d2e0cc67d91a7 100644
+--- a/net/ax25/ax25_dev.c
++++ b/net/ax25/ax25_dev.c
+@@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
+ for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
+ if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) {
+ res = ax25_dev;
++ ax25_dev_hold(ax25_dev);
+ }
+ spin_unlock_bh(&ax25_dev_lock);
+
+@@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev)
+ return;
+ }
+
++ refcount_set(&ax25_dev->refcount, 1);
+ dev->ax25_ptr = ax25_dev;
+ ax25_dev->dev = dev;
+ dev_hold(dev);
+@@ -84,6 +86,7 @@ void ax25_dev_device_up(struct net_device *dev)
+ ax25_dev->next = ax25_dev_list;
+ ax25_dev_list = ax25_dev;
+ spin_unlock_bh(&ax25_dev_lock);
++ ax25_dev_hold(ax25_dev);
+
+ ax25_register_dev_sysctl(ax25_dev);
+ }
+@@ -113,9 +116,10 @@ void ax25_dev_device_down(struct net_device *dev)
+ if ((s = ax25_dev_list) == ax25_dev) {
+ ax25_dev_list = s->next;
+ spin_unlock_bh(&ax25_dev_lock);
++ ax25_dev_put(ax25_dev);
+ dev->ax25_ptr = NULL;
+ dev_put(dev);
+- kfree(ax25_dev);
++ ax25_dev_put(ax25_dev);
+ return;
+ }
+
+@@ -123,9 +127,10 @@ void ax25_dev_device_down(struct net_device *dev)
+ if (s->next == ax25_dev) {
+ s->next = ax25_dev->next;
+ spin_unlock_bh(&ax25_dev_lock);
++ ax25_dev_put(ax25_dev);
+ dev->ax25_ptr = NULL;
+ dev_put(dev);
+- kfree(ax25_dev);
++ ax25_dev_put(ax25_dev);
+ return;
+ }
+
+@@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev)
+ }
+ spin_unlock_bh(&ax25_dev_lock);
+ dev->ax25_ptr = NULL;
++ ax25_dev_put(ax25_dev);
+ }
+
+ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
+@@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
+
+ switch (cmd) {
+ case SIOCAX25ADDFWD:
+- if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
++ fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
++ if (!fwd_dev) {
++ ax25_dev_put(ax25_dev);
+ return -EINVAL;
+- if (ax25_dev->forward != NULL)
++ }
++ if (ax25_dev->forward) {
++ ax25_dev_put(fwd_dev);
++ ax25_dev_put(ax25_dev);
+ return -EINVAL;
++ }
+ ax25_dev->forward = fwd_dev->dev;
++ ax25_dev_put(fwd_dev);
++ ax25_dev_put(ax25_dev);
+ break;
+
+ case SIOCAX25DELFWD:
+- if (ax25_dev->forward == NULL)
++ if (!ax25_dev->forward) {
++ ax25_dev_put(ax25_dev);
+ return -EINVAL;
++ }
+ ax25_dev->forward = NULL;
++ ax25_dev_put(ax25_dev);
+ break;
+
+ default:
++ ax25_dev_put(ax25_dev);
+ return -EINVAL;
+ }
+
+diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
+index d0b2e094bd552..9751207f77572 100644
+--- a/net/ax25/ax25_route.c
++++ b/net/ax25/ax25_route.c
+@@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
+ ax25_dev *ax25_dev;
+ int i;
+
+- if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
+- return -EINVAL;
+ if (route->digi_count > AX25_MAX_DIGIS)
+ return -EINVAL;
+
++ ax25_dev = ax25_addr_ax25dev(&route->port_addr);
++ if (!ax25_dev)
++ return -EINVAL;
++
+ write_lock_bh(&ax25_route_lock);
+
+ ax25_rt = ax25_route_list;
+@@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
+ if (route->digi_count != 0) {
+ if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+ write_unlock_bh(&ax25_route_lock);
++ ax25_dev_put(ax25_dev);
+ return -ENOMEM;
+ }
+ ax25_rt->digipeat->lastrepeat = -1;
+@@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
+ }
+ }
+ write_unlock_bh(&ax25_route_lock);
++ ax25_dev_put(ax25_dev);
+ return 0;
+ }
+ ax25_rt = ax25_rt->next;
+@@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
+
+ if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
+ write_unlock_bh(&ax25_route_lock);
++ ax25_dev_put(ax25_dev);
+ return -ENOMEM;
+ }
+
+@@ -120,6 +125,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
+ if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
+ write_unlock_bh(&ax25_route_lock);
+ kfree(ax25_rt);
++ ax25_dev_put(ax25_dev);
+ return -ENOMEM;
+ }
+ ax25_rt->digipeat->lastrepeat = -1;
+@@ -132,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
+ ax25_rt->next = ax25_route_list;
+ ax25_route_list = ax25_rt;
+ write_unlock_bh(&ax25_route_lock);
++ ax25_dev_put(ax25_dev);
+
+ return 0;
+ }
+@@ -173,6 +180,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
+ }
+ }
+ write_unlock_bh(&ax25_route_lock);
++ ax25_dev_put(ax25_dev);
+
+ return 0;
+ }
+@@ -215,6 +223,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
+
+ out:
+ write_unlock_bh(&ax25_route_lock);
++ ax25_dev_put(ax25_dev);
+ return err;
+ }
+
+diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
+index 15ab812c4fe4b..3a476e4f6cd0b 100644
+--- a/net/ax25/ax25_subr.c
++++ b/net/ax25/ax25_subr.c
+@@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
+ {
+ ax25_clear_queues(ax25);
+
+- if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
+- ax25_stop_heartbeat(ax25);
+- ax25_stop_t1timer(ax25);
+- ax25_stop_t2timer(ax25);
+- ax25_stop_t3timer(ax25);
+- ax25_stop_idletimer(ax25);
++ if (reason == ENETUNREACH) {
++ del_timer_sync(&ax25->timer);
++ del_timer_sync(&ax25->t1timer);
++ del_timer_sync(&ax25->t2timer);
++ del_timer_sync(&ax25->t3timer);
++ del_timer_sync(&ax25->idletimer);
++ } else {
++ if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
++ ax25_stop_heartbeat(ax25);
++ ax25_stop_t1timer(ax25);
++ ax25_stop_t2timer(ax25);
++ ax25_stop_t3timer(ax25);
++ ax25_stop_idletimer(ax25);
++ }
+
+ ax25->state = AX25_STATE_0;
+
+diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
+index 71999e13f7293..5c5ddacd81cb7 100644
+--- a/net/batman-adv/bat_v_elp.c
++++ b/net/batman-adv/bat_v_elp.c
+@@ -507,7 +507,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
+ struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct batadv_elp_packet *elp_packet;
+ struct batadv_hard_iface *primary_if;
+- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
++ struct ethhdr *ethhdr;
+ bool res;
+ int ret = NET_RX_DROP;
+
+@@ -515,6 +515,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
+ if (!res)
+ goto free_skb;
+
++ ethhdr = eth_hdr(skb);
+ if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
+ goto free_skb;
+
+diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
+index 1d750f3cb2e4a..4fe6df68dfcb7 100644
+--- a/net/batman-adv/bat_v_ogm.c
++++ b/net/batman-adv/bat_v_ogm.c
+@@ -124,8 +124,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
+ {
+ struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+
+- if (hard_iface->if_status != BATADV_IF_ACTIVE)
++ if (hard_iface->if_status != BATADV_IF_ACTIVE) {
++ kfree_skb(skb);
+ return;
++ }
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
+@@ -986,7 +988,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
+ {
+ struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct batadv_ogm2_packet *ogm_packet;
+- struct ethhdr *ethhdr = eth_hdr(skb);
++ struct ethhdr *ethhdr;
+ int ogm_offset;
+ u8 *packet_pos;
+ int ret = NET_RX_DROP;
+@@ -1000,6 +1002,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
+ if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
+ goto free_skb;
+
++ ethhdr = eth_hdr(skb);
+ if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
+ goto free_skb;
+
+diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
+index 2f008e3290079..42dcdf5fd76a1 100644
+--- a/net/batman-adv/distributed-arp-table.c
++++ b/net/batman-adv/distributed-arp-table.c
+@@ -101,7 +101,6 @@ static void batadv_dat_purge(struct work_struct *work);
+ */
+ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
+ {
+- INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work,
+ msecs_to_jiffies(10000));
+ }
+@@ -819,6 +818,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
+ if (!bat_priv->dat.hash)
+ return -ENOMEM;
+
++ INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
+ batadv_dat_start_timer(bat_priv);
+
+ batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1,
+diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
+index 0899a729a23f4..c120c7c6d25fc 100644
+--- a/net/batman-adv/fragmentation.c
++++ b/net/batman-adv/fragmentation.c
+@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
+ goto free_skb;
+ }
+
++ /* GRO might have added fragments to the fragment list instead of
++ * frags[]. But this is not handled by skb_split and must be
++ * linearized to avoid incorrect length information after all
++ * batman-adv fragments were created and submitted to the
++ * hard-interface
++ */
++ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
++ ret = -ENOMEM;
++ goto free_skb;
++ }
++
+ /* Create one header to be copied to all fragments */
+ frag_header.packet_type = BATADV_UNICAST_FRAG;
+ frag_header.version = BATADV_COMPAT_VERSION;
+diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
+index 8a2b78f9c4b2c..44cf612c0831e 100644
+--- a/net/batman-adv/hard-interface.c
++++ b/net/batman-adv/hard-interface.c
+@@ -149,22 +149,25 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
+ struct net *net = dev_net(net_dev);
+ struct net_device *parent_dev;
+ struct net *parent_net;
++ int iflink;
+ bool ret;
+
+ /* check if this is a batman-adv mesh interface */
+ if (batadv_softif_is_valid(net_dev))
+ return true;
+
+- /* no more parents..stop recursion */
+- if (dev_get_iflink(net_dev) == 0 ||
+- dev_get_iflink(net_dev) == net_dev->ifindex)
++ iflink = dev_get_iflink(net_dev);
++ if (iflink == 0)
+ return false;
+
+ parent_net = batadv_getlink_net(net_dev, net);
+
++ /* iflink to itself, most likely physical device */
++ if (net == parent_net && iflink == net_dev->ifindex)
++ return false;
++
+ /* recurse over the parent device */
+- parent_dev = __dev_get_by_index((struct net *)parent_net,
+- dev_get_iflink(net_dev));
++ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
+ /* if we got a NULL parent_dev there is something broken.. */
+ if (!parent_dev) {
+ pr_err("Cannot find parent device\n");
+@@ -214,14 +217,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
+ struct net_device *real_netdev = NULL;
+ struct net *real_net;
+ struct net *net;
+- int ifindex;
++ int iflink;
+
+ ASSERT_RTNL();
+
+ if (!netdev)
+ return NULL;
+
+- if (netdev->ifindex == dev_get_iflink(netdev)) {
++ iflink = dev_get_iflink(netdev);
++ if (iflink == 0) {
+ dev_hold(netdev);
+ return netdev;
+ }
+@@ -231,9 +235,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
+ goto out;
+
+ net = dev_net(hard_iface->soft_iface);
+- ifindex = dev_get_iflink(netdev);
+ real_net = batadv_getlink_net(netdev, net);
+- real_netdev = dev_get_by_index(real_net, ifindex);
++
++ /* iflink to itself, most likely physical device */
++ if (net == real_net && netdev->ifindex == iflink) {
++ real_netdev = netdev;
++ dev_hold(real_netdev);
++ goto out;
++ }
++
++ real_netdev = dev_get_by_index(real_net, iflink);
+
+ out:
+ batadv_hardif_put(hard_iface);
+@@ -616,7 +627,19 @@ out:
+ */
+ void batadv_update_min_mtu(struct net_device *soft_iface)
+ {
+- soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
++ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
++ int limit_mtu;
++ int mtu;
++
++ mtu = batadv_hardif_min_mtu(soft_iface);
++
++ if (bat_priv->mtu_set_by_user)
++ limit_mtu = bat_priv->mtu_set_by_user;
++ else
++ limit_mtu = ETH_DATA_LEN;
++
++ mtu = min(mtu, limit_mtu);
++ dev_set_mtu(soft_iface, mtu);
+
+ /* Check if the local translate table should be cleaned up to match a
+ * new (and smaller) MTU.
+diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
+index a3b6658ed7899..2853634a39790 100644
+--- a/net/batman-adv/multicast.c
++++ b/net/batman-adv/multicast.c
+@@ -134,7 +134,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
+ {
+ struct inet6_dev *in6_dev = __in6_dev_get(dev);
+
+- if (in6_dev && in6_dev->cnf.mc_forwarding)
++ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
+ return BATADV_NO_FLAGS;
+ else
+ return BATADV_MCAST_WANT_NO_RTR6;
+@@ -1339,6 +1339,7 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv,
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: The multicast packet to check
+ * @orig: an originator to be set to forward the skb to
++ * @is_routable: stores whether the destination is routable
+ *
+ * Return: the forwarding mode as enum batadv_forw_mode and in case of
+ * BATADV_FORW_SINGLE set the orig to the single originator the skb
+@@ -1346,17 +1347,16 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv,
+ */
+ enum batadv_forw_mode
+ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
+- struct batadv_orig_node **orig)
++ struct batadv_orig_node **orig, int *is_routable)
+ {
+ int ret, tt_count, ip_count, unsnoop_count, total_count;
+ bool is_unsnoopable = false;
+ unsigned int mcast_fanout;
+ struct ethhdr *ethhdr;
+- int is_routable = 0;
+ int rtr_count = 0;
+
+ ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable,
+- &is_routable);
++ is_routable);
+ if (ret == -ENOMEM)
+ return BATADV_FORW_NONE;
+ else if (ret < 0)
+@@ -1369,7 +1369,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr);
+ unsnoop_count = !is_unsnoopable ? 0 :
+ atomic_read(&bat_priv->mcast.num_want_all_unsnoopables);
+- rtr_count = batadv_mcast_forw_rtr_count(bat_priv, is_routable);
++ rtr_count = batadv_mcast_forw_rtr_count(bat_priv, *is_routable);
+
+ total_count = tt_count + ip_count + unsnoop_count + rtr_count;
+
+@@ -1689,6 +1689,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv,
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
++ * @is_routable: stores whether the destination is routable
+ *
+ * Sends copies of a frame with multicast destination to any node that signaled
+ * interest in it, that is either via the translation table or the according
+@@ -1701,7 +1702,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv,
+ * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise.
+ */
+ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
+- unsigned short vid)
++ unsigned short vid, int is_routable)
+ {
+ int ret;
+
+@@ -1717,12 +1718,16 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ return ret;
+ }
+
++ if (!is_routable)
++ goto skip_mc_router;
++
+ ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid);
+ if (ret != NET_XMIT_SUCCESS) {
+ kfree_skb(skb);
+ return ret;
+ }
+
++skip_mc_router:
+ consume_skb(skb);
+ return ret;
+ }
+diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
+index 9fee5da083113..8aec818d0bf63 100644
+--- a/net/batman-adv/multicast.h
++++ b/net/batman-adv/multicast.h
+@@ -43,7 +43,8 @@ enum batadv_forw_mode {
+
+ enum batadv_forw_mode
+ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
+- struct batadv_orig_node **mcast_single_orig);
++ struct batadv_orig_node **mcast_single_orig,
++ int *is_routable);
+
+ int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+@@ -51,7 +52,7 @@ int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node);
+
+ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
+- unsigned short vid);
++ unsigned short vid, int is_routable);
+
+ void batadv_mcast_init(struct batadv_priv *bat_priv);
+
+@@ -68,7 +69,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
+
+ static inline enum batadv_forw_mode
+ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
+- struct batadv_orig_node **mcast_single_orig)
++ struct batadv_orig_node **mcast_single_orig,
++ int *is_routable)
+ {
+ return BATADV_FORW_ALL;
+ }
+@@ -85,7 +87,7 @@ batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+
+ static inline int
+ batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
+- unsigned short vid)
++ unsigned short vid, int is_routable)
+ {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
+index 29276284d281c..bbd6ecf1678c9 100644
+--- a/net/batman-adv/netlink.c
++++ b/net/batman-adv/netlink.c
+@@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
+ attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
+
+ atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
++
++ rtnl_lock();
+ batadv_update_min_mtu(bat_priv->soft_iface);
++ rtnl_unlock();
+ }
+
+ if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
+@@ -1368,21 +1371,21 @@ static const struct genl_small_ops batadv_netlink_ops[] = {
+ {
+ .cmd = BATADV_CMD_TP_METER,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = batadv_netlink_tp_meter_start,
+ .internal_flags = BATADV_FLAG_NEED_MESH,
+ },
+ {
+ .cmd = BATADV_CMD_TP_METER_CANCEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = batadv_netlink_tp_meter_cancel,
+ .internal_flags = BATADV_FLAG_NEED_MESH,
+ },
+ {
+ .cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_algo_dump,
+ },
+ {
+@@ -1397,68 +1400,68 @@ static const struct genl_small_ops batadv_netlink_ops[] = {
+ {
+ .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_tt_local_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_tt_global_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_ORIGINATORS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_orig_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_NEIGHBORS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_hardif_neigh_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_GATEWAYS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_gw_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_BLA_CLAIM,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_bla_claim_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_BLA_BACKBONE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_bla_backbone_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_DAT_CACHE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_dat_cache_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_MCAST_FLAGS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .dumpit = batadv_mcast_flags_dump,
+ },
+ {
+ .cmd = BATADV_CMD_SET_MESH,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = batadv_netlink_set_mesh,
+ .internal_flags = BATADV_FLAG_NEED_MESH,
+ },
+ {
+ .cmd = BATADV_CMD_SET_HARDIF,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = batadv_netlink_set_hardif,
+ .internal_flags = BATADV_FLAG_NEED_MESH |
+ BATADV_FLAG_NEED_HARDIF,
+@@ -1474,7 +1477,7 @@ static const struct genl_small_ops batadv_netlink_ops[] = {
+ {
+ .cmd = BATADV_CMD_SET_VLAN,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+- .flags = GENL_ADMIN_PERM,
++ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = batadv_netlink_set_vlan,
+ .internal_flags = BATADV_FLAG_NEED_MESH |
+ BATADV_FLAG_NEED_VLAN,
+diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
+index 0604b02795731..99cd8aef07354 100644
+--- a/net/batman-adv/soft-interface.c
++++ b/net/batman-adv/soft-interface.c
+@@ -154,11 +154,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
+
+ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
+ {
++ struct batadv_priv *bat_priv = netdev_priv(dev);
++
+ /* check ranges */
+ if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
+ return -EINVAL;
+
+ dev->mtu = new_mtu;
++ bat_priv->mtu_set_by_user = new_mtu;
+
+ return 0;
+ }
+@@ -198,6 +201,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
+ int gw_mode;
+ enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE;
+ struct batadv_orig_node *mcast_single_orig = NULL;
++ int mcast_is_routable = 0;
+ int network_offset = ETH_HLEN;
+ __be16 proto;
+
+@@ -300,7 +304,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
+ send:
+ if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
+ forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
+- &mcast_single_orig);
++ &mcast_single_orig,
++ &mcast_is_routable);
+ if (forw_mode == BATADV_FORW_NONE)
+ goto dropped;
+
+@@ -359,7 +364,8 @@ send:
+ ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid,
+ mcast_single_orig);
+ } else if (forw_mode == BATADV_FORW_SOME) {
+- ret = batadv_mcast_forw_send(bat_priv, skb, vid);
++ ret = batadv_mcast_forw_send(bat_priv, skb, vid,
++ mcast_is_routable);
+ } else {
+ if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
+ skb))
+diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
+index 4b7ad6684bc40..1e1cf0e8a1427 100644
+--- a/net/batman-adv/translation-table.c
++++ b/net/batman-adv/translation-table.c
+@@ -774,7 +774,6 @@ check_roaming:
+ if (roamed_back) {
+ batadv_tt_global_free(bat_priv, tt_global,
+ "Roaming canceled");
+- tt_global = NULL;
+ } else {
+ /* The global entry has to be marked as ROAMING and
+ * has to be kept for consistency purpose
+diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
+index 2be5d4a712c5c..2635763bbd67a 100644
+--- a/net/batman-adv/types.h
++++ b/net/batman-adv/types.h
+@@ -1546,6 +1546,12 @@ struct batadv_priv {
+ /** @soft_iface: net device which holds this struct as private data */
+ struct net_device *soft_iface;
+
++ /**
++ * @mtu_set_by_user: MTU was set once by user
++ * protected by rtnl_lock
++ */
++ int mtu_set_by_user;
++
+ /**
+ * @bat_counters: mesh internal traffic statistic counters (see
+ * batadv_counters)
+diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
+index fd164a248569c..580b0940f067a 100644
+--- a/net/bluetooth/6lowpan.c
++++ b/net/bluetooth/6lowpan.c
+@@ -971,6 +971,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
+ hci_dev_lock(hdev);
+ hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
+ hci_dev_unlock(hdev);
++ hci_dev_put(hdev);
+
+ if (!hcon)
+ return -ENOENT;
+diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
+index 1661979b6a6e8..ce744b14d1a98 100644
+--- a/net/bluetooth/af_bluetooth.c
++++ b/net/bluetooth/af_bluetooth.c
+@@ -736,7 +736,7 @@ static int __init bt_init(void)
+
+ err = bt_sysfs_init();
+ if (err < 0)
+- return err;
++ goto cleanup_led;
+
+ err = sock_register(&bt_sock_family_ops);
+ if (err)
+@@ -772,6 +772,8 @@ unregister_socket:
+ sock_unregister(PF_BLUETOOTH);
+ cleanup_sysfs:
+ bt_sysfs_cleanup();
++cleanup_led:
++ bt_leds_cleanup();
+ return err;
+ }
+
+diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
+index 0a2d78e811cf5..83eb84e8e688f 100644
+--- a/net/bluetooth/cmtp/core.c
++++ b/net/bluetooth/cmtp/core.c
+@@ -501,9 +501,7 @@ static int __init cmtp_init(void)
+ {
+ BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
+
+- cmtp_init_sockets();
+-
+- return 0;
++ return cmtp_init_sockets();
+ }
+
+ static void __exit cmtp_exit(void)
+diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
+index 2b5059a56cdaa..7a7e92be1652c 100644
+--- a/net/bluetooth/hci_conn.c
++++ b/net/bluetooth/hci_conn.c
+@@ -541,7 +541,9 @@ static void le_conn_timeout(struct work_struct *work)
+ if (conn->role == HCI_ROLE_SLAVE) {
+ /* Disable LE Advertising */
+ le_disable_advertising(hdev);
++ hci_dev_lock(hdev);
+ hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
++ hci_dev_unlock(hdev);
+ return;
+ }
+
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index 8a47a3017d61d..e777ccf76b2b7 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -742,7 +742,8 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
+ hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
+ }
+
+- if (hdev->commands[38] & 0x80) {
++ if ((hdev->commands[38] & 0x80) &&
++ !test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks)) {
+ /* Read LE Min/Max Tx Power*/
+ hci_req_add(req, HCI_OP_LE_READ_TRANSMIT_POWER,
+ 0, NULL);
+@@ -1631,6 +1632,7 @@ setup_failed:
+ hdev->flush(hdev);
+
+ if (hdev->sent_cmd) {
++ cancel_delayed_work_sync(&hdev->cmd_timer);
+ kfree_skb(hdev->sent_cmd);
+ hdev->sent_cmd = NULL;
+ }
+@@ -2353,9 +2355,9 @@ void hci_uuids_clear(struct hci_dev *hdev)
+
+ void hci_link_keys_clear(struct hci_dev *hdev)
+ {
+- struct link_key *key;
++ struct link_key *key, *tmp;
+
+- list_for_each_entry(key, &hdev->link_keys, list) {
++ list_for_each_entry_safe(key, tmp, &hdev->link_keys, list) {
+ list_del_rcu(&key->list);
+ kfree_rcu(key, rcu);
+ }
+@@ -2363,9 +2365,9 @@ void hci_link_keys_clear(struct hci_dev *hdev)
+
+ void hci_smp_ltks_clear(struct hci_dev *hdev)
+ {
+- struct smp_ltk *k;
++ struct smp_ltk *k, *tmp;
+
+- list_for_each_entry(k, &hdev->long_term_keys, list) {
++ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
+ list_del_rcu(&k->list);
+ kfree_rcu(k, rcu);
+ }
+@@ -2373,9 +2375,9 @@ void hci_smp_ltks_clear(struct hci_dev *hdev)
+
+ void hci_smp_irks_clear(struct hci_dev *hdev)
+ {
+- struct smp_irk *k;
++ struct smp_irk *k, *tmp;
+
+- list_for_each_entry(k, &hdev->identity_resolving_keys, list) {
++ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
+ list_del_rcu(&k->list);
+ kfree_rcu(k, rcu);
+ }
+@@ -2383,9 +2385,9 @@ void hci_smp_irks_clear(struct hci_dev *hdev)
+
+ void hci_blocked_keys_clear(struct hci_dev *hdev)
+ {
+- struct blocked_key *b;
++ struct blocked_key *b, *tmp;
+
+- list_for_each_entry(b, &hdev->blocked_keys, list) {
++ list_for_each_entry_safe(b, tmp, &hdev->blocked_keys, list) {
+ list_del_rcu(&b->list);
+ kfree_rcu(b, rcu);
+ }
+@@ -2695,10 +2697,10 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
+
+ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
+ {
+- struct smp_ltk *k;
++ struct smp_ltk *k, *tmp;
+ int removed = 0;
+
+- list_for_each_entry_rcu(k, &hdev->long_term_keys, list) {
++ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
+ if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type)
+ continue;
+
+@@ -2714,9 +2716,9 @@ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
+
+ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
+ {
+- struct smp_irk *k;
++ struct smp_irk *k, *tmp;
+
+- list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) {
++ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
+ if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type)
+ continue;
+
+@@ -3906,10 +3908,10 @@ int hci_register_dev(struct hci_dev *hdev)
+ */
+ switch (hdev->dev_type) {
+ case HCI_PRIMARY:
+- id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
++ id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
+ break;
+ case HCI_AMP:
+- id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
++ id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
+ break;
+ default:
+ return -EINVAL;
+@@ -3918,7 +3920,7 @@ int hci_register_dev(struct hci_dev *hdev)
+ if (id < 0)
+ return id;
+
+- sprintf(hdev->name, "hci%d", id);
++ snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
+ hdev->id = id;
+
+ BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
+@@ -3984,7 +3986,8 @@ int hci_register_dev(struct hci_dev *hdev)
+ hci_sock_dev_event(hdev, HCI_DEV_REG);
+ hci_dev_hold(hdev);
+
+- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
++ if (!hdev->suspend_notifier.notifier_call &&
++ !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
+ error = register_pm_notifier(&hdev->suspend_notifier);
+ if (error)
+@@ -3998,6 +4001,7 @@ int hci_register_dev(struct hci_dev *hdev)
+ return id;
+
+ err_wqueue:
++ debugfs_remove_recursive(hdev->debugfs);
+ destroy_workqueue(hdev->workqueue);
+ destroy_workqueue(hdev->req_workqueue);
+ err:
+@@ -4081,6 +4085,7 @@ void hci_release_dev(struct hci_dev *hdev)
+ hci_dev_unlock(hdev);
+
+ ida_simple_remove(&hci_index_ida, hdev->id);
++ kfree_skb(hdev->sent_cmd);
+ kfree(hdev);
+ }
+ EXPORT_SYMBOL(hci_release_dev);
+@@ -4670,15 +4675,27 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
+ return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len);
+ }
+
+-static void __check_timeout(struct hci_dev *hdev, unsigned int cnt)
++static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type)
+ {
+- if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+- /* ACL tx timeout must be longer than maximum
+- * link supervision timeout (40.9 seconds) */
+- if (!cnt && time_after(jiffies, hdev->acl_last_tx +
+- HCI_ACL_TX_TIMEOUT))
+- hci_link_tx_to(hdev, ACL_LINK);
++ unsigned long last_tx;
++
++ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
++ return;
++
++ switch (type) {
++ case LE_LINK:
++ last_tx = hdev->le_last_tx;
++ break;
++ default:
++ last_tx = hdev->acl_last_tx;
++ break;
+ }
++
++ /* tx timeout must be longer than maximum link supervision timeout
++ * (40.9 seconds)
++ */
++ if (!cnt && time_after(jiffies, last_tx + HCI_ACL_TX_TIMEOUT))
++ hci_link_tx_to(hdev, type);
+ }
+
+ /* Schedule SCO */
+@@ -4736,7 +4753,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
+ struct sk_buff *skb;
+ int quote;
+
+- __check_timeout(hdev, cnt);
++ __check_timeout(hdev, cnt, ACL_LINK);
+
+ while (hdev->acl_cnt &&
+ (chan = hci_chan_sent(hdev, ACL_LINK, &quote))) {
+@@ -4779,8 +4796,6 @@ static void hci_sched_acl_blk(struct hci_dev *hdev)
+ int quote;
+ u8 type;
+
+- __check_timeout(hdev, cnt);
+-
+ BT_DBG("%s", hdev->name);
+
+ if (hdev->dev_type == HCI_AMP)
+@@ -4788,6 +4803,8 @@ static void hci_sched_acl_blk(struct hci_dev *hdev)
+ else
+ type = ACL_LINK;
+
++ __check_timeout(hdev, cnt, type);
++
+ while (hdev->block_cnt > 0 &&
+ (chan = hci_chan_sent(hdev, type, &quote))) {
+ u32 priority = (skb_peek(&chan->data_q))->priority;
+@@ -4861,7 +4878,7 @@ static void hci_sched_le(struct hci_dev *hdev)
+
+ cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
+
+- __check_timeout(hdev, cnt);
++ __check_timeout(hdev, cnt, LE_LINK);
+
+ tmp = cnt;
+ while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, &quote))) {
+@@ -5085,7 +5102,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
+ *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+ else
+ *req_complete = bt_cb(skb)->hci.req_complete;
+- kfree_skb(skb);
++ dev_kfree_skb_irq(skb);
+ }
+ spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+ }
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 0bca035bf2dcc..9f82fe0e62708 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -1325,8 +1325,10 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
+ &conn->le_conn_timeout,
+ conn->conn_timeout);
+ } else {
+- if (adv) {
+- adv->enabled = false;
++ if (cp->num_of_sets) {
++ if (adv)
++ adv->enabled = false;
++
+ /* If just one instance was disabled check if there are
+ * any other instance enabled before clearing HCI_LE_ADV
+ */
+@@ -1557,7 +1559,9 @@ static void hci_cc_le_clear_accept_list(struct hci_dev *hdev,
+ if (status)
+ return;
+
++ hci_dev_lock(hdev);
+ hci_bdaddr_list_clear(&hdev->le_accept_list);
++ hci_dev_unlock(hdev);
+ }
+
+ static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev,
+@@ -1575,8 +1579,10 @@ static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev,
+ if (!sent)
+ return;
+
++ hci_dev_lock(hdev);
+ hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr,
+ sent->bdaddr_type);
++ hci_dev_unlock(hdev);
+ }
+
+ static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev,
+@@ -1594,8 +1600,10 @@ static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev,
+ if (!sent)
+ return;
+
++ hci_dev_lock(hdev);
+ hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr,
+ sent->bdaddr_type);
++ hci_dev_unlock(hdev);
+ }
+
+ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
+@@ -1659,9 +1667,11 @@ static void hci_cc_le_add_to_resolv_list(struct hci_dev *hdev,
+ if (!sent)
+ return;
+
++ hci_dev_lock(hdev);
+ hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
+ sent->bdaddr_type, sent->peer_irk,
+ sent->local_irk);
++ hci_dev_unlock(hdev);
+ }
+
+ static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev,
+@@ -1679,8 +1689,10 @@ static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev,
+ if (!sent)
+ return;
+
++ hci_dev_lock(hdev);
+ hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
+ sent->bdaddr_type);
++ hci_dev_unlock(hdev);
+ }
+
+ static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev,
+@@ -1693,7 +1705,9 @@ static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev,
+ if (status)
+ return;
+
++ hci_dev_lock(hdev);
+ hci_bdaddr_list_clear(&hdev->le_resolv_list);
++ hci_dev_unlock(hdev);
+ }
+
+ static void hci_cc_le_read_resolv_list_size(struct hci_dev *hdev,
+@@ -2788,10 +2802,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+ return;
+ }
+
++ hci_dev_lock(hdev);
++
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr,
+ BDADDR_BREDR)) {
+ hci_reject_conn(hdev, &ev->bdaddr);
+- return;
++ goto unlock;
+ }
+
+ /* Require HCI_CONNECTABLE or an accept list entry to accept the
+@@ -2803,13 +2819,11 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+ !hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr,
+ BDADDR_BREDR)) {
+ hci_reject_conn(hdev, &ev->bdaddr);
+- return;
++ goto unlock;
+ }
+
+ /* Connection accepted */
+
+- hci_dev_lock(hdev);
+-
+ ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
+ if (ie)
+ memcpy(ie->data.dev_class, ev->dev_class, 3);
+@@ -2821,8 +2835,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+ HCI_ROLE_SLAVE);
+ if (!conn) {
+ bt_dev_err(hdev, "no memory for new connection");
+- hci_dev_unlock(hdev);
+- return;
++ goto unlock;
+ }
+ }
+
+@@ -2862,6 +2875,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
+ conn->state = BT_CONNECT2;
+ hci_connect_cfm(conn, 0);
+ }
++
++ return;
++unlock:
++ hci_dev_unlock(hdev);
+ }
+
+ static u8 hci_to_mgmt_reason(u8 err)
+@@ -4398,6 +4415,19 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
+ struct hci_ev_sync_conn_complete *ev = (void *) skb->data;
+ struct hci_conn *conn;
+
++ switch (ev->link_type) {
++ case SCO_LINK:
++ case ESCO_LINK:
++ break;
++ default:
++ /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type
++ * for HCI_Synchronous_Connection_Complete is limited to
++ * either SCO or eSCO
++ */
++ bt_dev_err(hdev, "Ignoring connect complete event for invalid link type");
++ return;
++ }
++
+ BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
+
+ hci_dev_lock(hdev);
+@@ -5151,8 +5181,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
+ hci_dev_lock(hdev);
+
+ hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
+- if (hcon) {
++ if (hcon && hcon->type == AMP_LINK) {
+ hcon->state = BT_CLOSED;
++ hci_disconn_cfm(hcon, ev->reason);
+ hci_conn_del(hcon);
+ }
+
+@@ -5780,7 +5811,13 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
+ struct hci_ev_le_advertising_info *ev = ptr;
+ s8 rssi;
+
+- if (ev->length <= HCI_MAX_AD_LENGTH) {
++ if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) {
++ bt_dev_err(hdev, "Malicious advertising data.");
++ break;
++ }
++
++ if (ev->length <= HCI_MAX_AD_LENGTH &&
++ ev->data + ev->length <= skb_tail_pointer(skb)) {
+ rssi = ev->data[ev->length];
+ process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
+ ev->bdaddr_type, NULL, 0, rssi,
+diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
+index f15626607b2d6..c2db60ad0f1d2 100644
+--- a/net/bluetooth/hci_request.c
++++ b/net/bluetooth/hci_request.c
+@@ -2318,7 +2318,7 @@ int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance)
+ /* Set duration per instance since controller is responsible for
+ * scheduling it.
+ */
+- if (adv_instance && adv_instance->duration) {
++ if (adv_instance && adv_instance->timeout) {
+ u16 duration = adv_instance->timeout * MSEC_PER_SEC;
+
+ /* Time = N * 10 ms */
+@@ -3174,6 +3174,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
+ if (err < 0)
+ own_addr_type = ADDR_LE_DEV_PUBLIC;
+
++ hci_dev_lock(hdev);
+ if (hci_is_adv_monitoring(hdev)) {
+ /* Duplicate filter should be disabled when some advertisement
+ * monitor is activated, otherwise AdvMon can only receive one
+@@ -3190,6 +3191,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
+ */
+ filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
+ }
++ hci_dev_unlock(hdev);
+
+ hci_req_start_scan(req, LE_SCAN_ACTIVE, interval,
+ hdev->le_scan_window_discovery, own_addr_type,
+diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
+index f1128c2134f02..315f9ad3dc4d4 100644
+--- a/net/bluetooth/hci_sock.c
++++ b/net/bluetooth/hci_sock.c
+@@ -888,10 +888,6 @@ static int hci_sock_release(struct socket *sock)
+ }
+
+ sock_orphan(sk);
+-
+- skb_queue_purge(&sk->sk_receive_queue);
+- skb_queue_purge(&sk->sk_write_queue);
+-
+ release_sock(sk);
+ sock_put(sk);
+ return 0;
+@@ -984,6 +980,34 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
+
+ BT_DBG("cmd %x arg %lx", cmd, arg);
+
++ /* Make sure the cmd is valid before doing anything */
++ switch (cmd) {
++ case HCIGETDEVLIST:
++ case HCIGETDEVINFO:
++ case HCIGETCONNLIST:
++ case HCIDEVUP:
++ case HCIDEVDOWN:
++ case HCIDEVRESET:
++ case HCIDEVRESTAT:
++ case HCISETSCAN:
++ case HCISETAUTH:
++ case HCISETENCRYPT:
++ case HCISETPTYPE:
++ case HCISETLINKPOL:
++ case HCISETLINKMODE:
++ case HCISETACLMTU:
++ case HCISETSCOMTU:
++ case HCIINQUIRY:
++ case HCISETRAW:
++ case HCIGETCONNINFO:
++ case HCIGETAUTHINFO:
++ case HCIBLOCKADDR:
++ case HCIUNBLOCKADDR:
++ break;
++ default:
++ return -ENOIOCTLCMD;
++ }
++
+ lock_sock(sk);
+
+ if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
+@@ -1000,7 +1024,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
+ if (hci_sock_gen_cookie(sk)) {
+ struct sk_buff *skb;
+
+- if (capable(CAP_NET_ADMIN))
++ /* Perform careful checks before setting the HCI_SOCK_TRUSTED
++ * flag. Make sure that not only the current task but also
++ * the socket opener has the required capability, since
++ * privileged programs can be tricked into making ioctl calls
++ * on HCI sockets, and the socket should not be marked as
++ * trusted simply because the ioctl caller is privileged.
++ */
++ if (sk_capable(sk, CAP_NET_ADMIN))
+ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
+
+ /* Send event to monitor */
+@@ -2012,6 +2043,12 @@ done:
+ return err;
+ }
+
++static void hci_sock_destruct(struct sock *sk)
++{
++ skb_queue_purge(&sk->sk_receive_queue);
++ skb_queue_purge(&sk->sk_write_queue);
++}
++
+ static const struct proto_ops hci_sock_ops = {
+ .family = PF_BLUETOOTH,
+ .owner = THIS_MODULE,
+@@ -2065,6 +2102,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
+
+ sock->state = SS_UNCONNECTED;
+ sk->sk_state = BT_OPEN;
++ sk->sk_destruct = hci_sock_destruct;
+
+ bt_sock_link(&hci_sk_list, sk);
+ return 0;
+diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
+index 7827639ecf5c3..08542dfc2dc53 100644
+--- a/net/bluetooth/hci_sysfs.c
++++ b/net/bluetooth/hci_sysfs.c
+@@ -48,6 +48,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
+
+ BT_DBG("conn %p", conn);
+
++ if (device_is_registered(&conn->dev))
++ return;
++
+ dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
+
+ if (device_add(&conn->dev) < 0) {
+@@ -86,6 +89,8 @@ static void bt_host_release(struct device *dev)
+
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ hci_release_dev(hdev);
++ else
++ kfree(hdev);
+ module_put(THIS_MODULE);
+ }
+
+diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
+index 80848dfc01db5..021ab957a5c4d 100644
+--- a/net/bluetooth/hidp/core.c
++++ b/net/bluetooth/hidp/core.c
+@@ -433,7 +433,7 @@ static void hidp_set_timer(struct hidp_session *session)
+ static void hidp_del_timer(struct hidp_session *session)
+ {
+ if (session->idle_to > 0)
+- del_timer(&session->timer);
++ del_timer_sync(&session->timer);
+ }
+
+ static void hidp_process_report(struct hidp_session *session, int type,
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 77ba68209dbd8..0770286ecf0bc 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -61,6 +61,9 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err);
+
+ static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
+ struct sk_buff_head *skbs, u8 event);
++static void l2cap_retrans_timeout(struct work_struct *work);
++static void l2cap_monitor_timeout(struct work_struct *work);
++static void l2cap_ack_timeout(struct work_struct *work);
+
+ static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type)
+ {
+@@ -111,7 +114,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn,
+ }
+
+ /* Find channel with given SCID.
+- * Returns locked channel. */
++ * Returns a reference locked channel.
++ */
+ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
+ u16 cid)
+ {
+@@ -119,15 +123,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
+
+ mutex_lock(&conn->chan_lock);
+ c = __l2cap_get_chan_by_scid(conn, cid);
+- if (c)
+- l2cap_chan_lock(c);
++ if (c) {
++ /* Only lock if chan reference is not 0 */
++ c = l2cap_chan_hold_unless_zero(c);
++ if (c)
++ l2cap_chan_lock(c);
++ }
+ mutex_unlock(&conn->chan_lock);
+
+ return c;
+ }
+
+ /* Find channel with given DCID.
+- * Returns locked channel.
++ * Returns a reference locked channel.
+ */
+ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
+ u16 cid)
+@@ -136,8 +144,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
+
+ mutex_lock(&conn->chan_lock);
+ c = __l2cap_get_chan_by_dcid(conn, cid);
+- if (c)
+- l2cap_chan_lock(c);
++ if (c) {
++ /* Only lock if chan reference is not 0 */
++ c = l2cap_chan_hold_unless_zero(c);
++ if (c)
++ l2cap_chan_lock(c);
++ }
+ mutex_unlock(&conn->chan_lock);
+
+ return c;
+@@ -162,8 +174,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,
+
+ mutex_lock(&conn->chan_lock);
+ c = __l2cap_get_chan_by_ident(conn, ident);
+- if (c)
+- l2cap_chan_lock(c);
++ if (c) {
++ /* Only lock if chan reference is not 0 */
++ c = l2cap_chan_hold_unless_zero(c);
++ if (c)
++ l2cap_chan_lock(c);
++ }
+ mutex_unlock(&conn->chan_lock);
+
+ return c;
+@@ -463,6 +479,9 @@ struct l2cap_chan *l2cap_chan_create(void)
+ write_unlock(&chan_list_lock);
+
+ INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout);
++ INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
++ INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
++ INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
+
+ chan->state = BT_OPEN;
+
+@@ -497,6 +516,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)
+ kref_get(&c->kref);
+ }
+
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
++{
++ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
++
++ if (!kref_get_unless_zero(&c->kref))
++ return NULL;
++
++ return c;
++}
++
+ void l2cap_chan_put(struct l2cap_chan *c)
+ {
+ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+@@ -679,6 +708,17 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
+ }
+ EXPORT_SYMBOL_GPL(l2cap_chan_del);
+
++static void __l2cap_chan_list_id(struct l2cap_conn *conn, u16 id,
++ l2cap_chan_func_t func, void *data)
++{
++ struct l2cap_chan *chan, *l;
++
++ list_for_each_entry_safe(chan, l, &conn->chan_l, list) {
++ if (chan->ident == id)
++ func(chan, data);
++ }
++}
++
+ static void __l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func,
+ void *data)
+ {
+@@ -746,23 +786,9 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan)
+
+ static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan)
+ {
+- struct l2cap_conn *conn = chan->conn;
+- struct l2cap_ecred_conn_rsp rsp;
+- u16 result;
+-
+- if (test_bit(FLAG_DEFER_SETUP, &chan->flags))
+- result = L2CAP_CR_LE_AUTHORIZATION;
+- else
+- result = L2CAP_CR_LE_BAD_PSM;
+-
+ l2cap_state_change(chan, BT_DISCONN);
+
+- memset(&rsp, 0, sizeof(rsp));
+-
+- rsp.result = cpu_to_le16(result);
+-
+- l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp),
+- &rsp);
++ __l2cap_ecred_conn_rsp_defer(chan);
+ }
+
+ static void l2cap_chan_connect_reject(struct l2cap_chan *chan)
+@@ -817,7 +843,7 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason)
+ break;
+ case L2CAP_MODE_EXT_FLOWCTL:
+ l2cap_chan_ecred_connect_reject(chan);
+- break;
++ return;
+ }
+ }
+ }
+@@ -1436,6 +1462,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
+
+ l2cap_ecred_init(chan, 0);
+
++ memset(&data, 0, sizeof(data));
+ data.pdu.req.psm = chan->psm;
+ data.pdu.req.mtu = cpu_to_le16(chan->imtu);
+ data.pdu.req.mps = cpu_to_le16(chan->mps);
+@@ -1946,11 +1973,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
+ bdaddr_t *dst,
+ u8 link_type)
+ {
+- struct l2cap_chan *c, *c1 = NULL;
++ struct l2cap_chan *c, *tmp, *c1 = NULL;
+
+ read_lock(&chan_list_lock);
+
+- list_for_each_entry(c, &chan_list, global_l) {
++ list_for_each_entry_safe(c, tmp, &chan_list, global_l) {
+ if (state && c->state != state)
+ continue;
+
+@@ -1960,7 +1987,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
+ if (link_type == LE_LINK && c->src_type == BDADDR_BREDR)
+ continue;
+
+- if (c->psm == psm) {
++ if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) {
+ int src_match, dst_match;
+ int src_any, dst_any;
+
+@@ -1968,7 +1995,9 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
+ src_match = !bacmp(&c->src, src);
+ dst_match = !bacmp(&c->dst, dst);
+ if (src_match && dst_match) {
+- l2cap_chan_hold(c);
++ if (!l2cap_chan_hold_unless_zero(c))
++ continue;
++
+ read_unlock(&chan_list_lock);
+ return c;
+ }
+@@ -1983,7 +2012,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
+ }
+
+ if (c1)
+- l2cap_chan_hold(c1);
++ c1 = l2cap_chan_hold_unless_zero(c1);
+
+ read_unlock(&chan_list_lock);
+
+@@ -2651,14 +2680,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+- /* Channel lock is released before requesting new skb and then
+- * reacquired thus we need to recheck channel state.
+- */
+- if (chan->state != BT_CONNECTED) {
+- kfree_skb(skb);
+- return -ENOTCONN;
+- }
+-
+ l2cap_do_send(chan, skb);
+ return len;
+ }
+@@ -2703,14 +2724,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+- /* Channel lock is released before requesting new skb and then
+- * reacquired thus we need to recheck channel state.
+- */
+- if (chan->state != BT_CONNECTED) {
+- kfree_skb(skb);
+- return -ENOTCONN;
+- }
+-
+ l2cap_do_send(chan, skb);
+ err = len;
+ break;
+@@ -2731,14 +2744,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
+ */
+ err = l2cap_segment_sdu(chan, &seg_queue, msg, len);
+
+- /* The channel could have been closed while segmenting,
+- * check that it is still connected.
+- */
+- if (chan->state != BT_CONNECTED) {
+- __skb_queue_purge(&seg_queue);
+- err = -ENOTCONN;
+- }
+-
+ if (err)
+ break;
+
+@@ -3294,10 +3299,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan)
+ chan->rx_state = L2CAP_RX_STATE_RECV;
+ chan->tx_state = L2CAP_TX_STATE_XMIT;
+
+- INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
+- INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
+- INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
+-
+ skb_queue_head_init(&chan->srej_q);
+
+ err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win);
+@@ -3736,7 +3737,8 @@ done:
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
+ sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
+
+- if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
++ if (remote_efs &&
++ test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
+ chan->remote_id = efs.id;
+ chan->remote_stype = efs.stype;
+ chan->remote_msdu = le16_to_cpu(efs.msdu);
+@@ -3933,43 +3935,86 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan)
+ &rsp);
+ }
+
+-void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan)
++static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data)
+ {
++ int *result = data;
++
++ if (*result || test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags))
++ return;
++
++ switch (chan->state) {
++ case BT_CONNECT2:
++ /* If channel still pending accept add to result */
++ (*result)++;
++ return;
++ case BT_CONNECTED:
++ return;
++ default:
++ /* If not connected or pending accept it has been refused */
++ *result = -ECONNREFUSED;
++ return;
++ }
++}
++
++struct l2cap_ecred_rsp_data {
+ struct {
+ struct l2cap_ecred_conn_rsp rsp;
+- __le16 dcid[5];
++ __le16 scid[L2CAP_ECRED_MAX_CID];
+ } __packed pdu;
++ int count;
++};
++
++static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
++{
++ struct l2cap_ecred_rsp_data *rsp = data;
++
++ if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags))
++ return;
++
++ /* Reset ident so only one response is sent */
++ chan->ident = 0;
++
++ /* Include all channels pending with the same ident */
++ if (!rsp->pdu.rsp.result)
++ rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid);
++ else
++ l2cap_chan_del(chan, ECONNRESET);
++}
++
++void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan)
++{
+ struct l2cap_conn *conn = chan->conn;
+- u16 ident = chan->ident;
+- int i = 0;
++ struct l2cap_ecred_rsp_data data;
++ u16 id = chan->ident;
++ int result = 0;
+
+- if (!ident)
++ if (!id)
+ return;
+
+- BT_DBG("chan %p ident %d", chan, ident);
++ BT_DBG("chan %p id %d", chan, id);
+
+- pdu.rsp.mtu = cpu_to_le16(chan->imtu);
+- pdu.rsp.mps = cpu_to_le16(chan->mps);
+- pdu.rsp.credits = cpu_to_le16(chan->rx_credits);
+- pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS);
++ memset(&data, 0, sizeof(data));
+
+- mutex_lock(&conn->chan_lock);
++ data.pdu.rsp.mtu = cpu_to_le16(chan->imtu);
++ data.pdu.rsp.mps = cpu_to_le16(chan->mps);
++ data.pdu.rsp.credits = cpu_to_le16(chan->rx_credits);
++ data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS);
+
+- list_for_each_entry(chan, &conn->chan_l, list) {
+- if (chan->ident != ident)
+- continue;
++ /* Verify that all channels are ready */
++ __l2cap_chan_list_id(conn, id, l2cap_ecred_list_defer, &result);
+
+- /* Reset ident so only one response is sent */
+- chan->ident = 0;
++ if (result > 0)
++ return;
+
+- /* Include all channels pending with the same ident */
+- pdu.dcid[i++] = cpu_to_le16(chan->scid);
+- }
++ if (result < 0)
++ data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_AUTHORIZATION);
+
+- mutex_unlock(&conn->chan_lock);
++ /* Build response */
++ __l2cap_chan_list_id(conn, id, l2cap_ecred_rsp_defer, &data);
+
+- l2cap_send_cmd(conn, ident, L2CAP_ECRED_CONN_RSP,
+- sizeof(pdu.rsp) + i * sizeof(__le16), &pdu);
++ l2cap_send_cmd(conn, id, L2CAP_ECRED_CONN_RSP,
++ sizeof(data.pdu.rsp) + (data.count * sizeof(__le16)),
++ &data.pdu);
+ }
+
+ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan)
+@@ -4262,6 +4307,10 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
+ result = __le16_to_cpu(rsp->result);
+ status = __le16_to_cpu(rsp->status);
+
++ if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START ||
++ dcid > L2CAP_CID_DYN_END))
++ return -EPROTO;
++
+ BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x",
+ dcid, scid, result, status);
+
+@@ -4281,12 +4330,23 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
+ }
+ }
+
++ chan = l2cap_chan_hold_unless_zero(chan);
++ if (!chan) {
++ err = -EBADSLT;
++ goto unlock;
++ }
++
+ err = 0;
+
+ l2cap_chan_lock(chan);
+
+ switch (result) {
+ case L2CAP_CR_SUCCESS:
++ if (__l2cap_get_chan_by_dcid(conn, dcid)) {
++ err = -EBADSLT;
++ break;
++ }
++
+ l2cap_state_change(chan, BT_CONFIG);
+ chan->ident = 0;
+ chan->dcid = dcid;
+@@ -4310,6 +4370,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
+ }
+
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+
+ unlock:
+ mutex_unlock(&conn->chan_lock);
+@@ -4417,7 +4478,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
+
+ chan->ident = cmd->ident;
+ l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
+- chan->num_conf_rsp++;
++ if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP)
++ chan->num_conf_rsp++;
+
+ /* Reset config buffer. */
+ chan->conf_len = 0;
+@@ -4463,6 +4525,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
+
+ unlock:
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+ return err;
+ }
+
+@@ -4577,6 +4640,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
+
+ done:
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+ return err;
+ }
+
+@@ -4597,33 +4661,29 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
+
+ BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid);
+
+- mutex_lock(&conn->chan_lock);
+-
+- chan = __l2cap_get_chan_by_scid(conn, dcid);
++ chan = l2cap_get_chan_by_scid(conn, dcid);
+ if (!chan) {
+- mutex_unlock(&conn->chan_lock);
+ cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid);
+ return 0;
+ }
+
+- l2cap_chan_hold(chan);
+- l2cap_chan_lock(chan);
+-
+ rsp.dcid = cpu_to_le16(chan->scid);
+ rsp.scid = cpu_to_le16(chan->dcid);
+ l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
+
+ chan->ops->set_shutdown(chan);
+
++ l2cap_chan_unlock(chan);
++ mutex_lock(&conn->chan_lock);
++ l2cap_chan_lock(chan);
+ l2cap_chan_del(chan, ECONNRESET);
++ mutex_unlock(&conn->chan_lock);
+
+ chan->ops->close(chan);
+
+ l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
+
+- mutex_unlock(&conn->chan_lock);
+-
+ return 0;
+ }
+
+@@ -4643,33 +4703,28 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
+
+ BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid);
+
+- mutex_lock(&conn->chan_lock);
+-
+- chan = __l2cap_get_chan_by_scid(conn, scid);
++ chan = l2cap_get_chan_by_scid(conn, scid);
+ if (!chan) {
+- mutex_unlock(&conn->chan_lock);
+ return 0;
+ }
+
+- l2cap_chan_hold(chan);
+- l2cap_chan_lock(chan);
+-
+ if (chan->state != BT_DISCONN) {
+ l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
+- mutex_unlock(&conn->chan_lock);
+ return 0;
+ }
+
++ l2cap_chan_unlock(chan);
++ mutex_lock(&conn->chan_lock);
++ l2cap_chan_lock(chan);
+ l2cap_chan_del(chan, 0);
++ mutex_unlock(&conn->chan_lock);
+
+ chan->ops->close(chan);
+
+ l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
+
+- mutex_unlock(&conn->chan_lock);
+-
+ return 0;
+ }
+
+@@ -5304,6 +5359,7 @@ send_move_response:
+ l2cap_send_move_chan_rsp(chan, result);
+
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+
+ return 0;
+ }
+@@ -5396,6 +5452,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)
+ }
+
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+ }
+
+ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
+@@ -5425,6 +5482,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
+ l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
+
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+ }
+
+ static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
+@@ -5488,6 +5546,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn,
+ l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
+
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+
+ return 0;
+ }
+@@ -5523,6 +5582,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
+ }
+
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+
+ return 0;
+ }
+@@ -5771,6 +5831,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
+ BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm),
+ scid, mtu, mps);
+
++ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A
++ * page 1059:
++ *
++ * Valid range: 0x0001-0x00ff
++ *
++ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges
++ */
++ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) {
++ result = L2CAP_CR_LE_BAD_PSM;
++ chan = NULL;
++ goto response;
++ }
++
+ /* Check if we have socket listening on psm */
+ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
+ &conn->hcon->dst, LE_LINK);
+@@ -5895,12 +5968,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
+ if (credits > max_credits) {
+ BT_ERR("LE credits overflow");
+ l2cap_send_disconn_req(chan, ECONNRESET);
+- l2cap_chan_unlock(chan);
+
+ /* Return 0 so that we don't trigger an unnecessary
+ * command reject packet.
+ */
+- return 0;
++ goto unlock;
+ }
+
+ chan->tx_credits += credits;
+@@ -5911,7 +5983,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
+ if (chan->tx_credits)
+ chan->ops->resume(chan);
+
++unlock:
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+
+ return 0;
+ }
+@@ -5958,6 +6032,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
+
+ psm = req->psm;
+
++ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A
++ * page 1059:
++ *
++ * Valid range: 0x0001-0x00ff
++ *
++ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges
++ */
++ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) {
++ result = L2CAP_CR_LE_BAD_PSM;
++ goto response;
++ }
++
+ BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps);
+
+ memset(&pdu, 0, sizeof(pdu));
+@@ -6032,6 +6118,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
+ __set_chan_timer(chan, chan->ops->get_sndtimeo(chan));
+
+ chan->ident = cmd->ident;
++ chan->mode = L2CAP_MODE_EXT_FLOWCTL;
+
+ if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
+ l2cap_state_change(chan, BT_CONNECT2);
+@@ -6288,9 +6375,14 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn,
+ if (!chan)
+ goto done;
+
++ chan = l2cap_chan_hold_unless_zero(chan);
++ if (!chan)
++ goto done;
++
+ l2cap_chan_lock(chan);
+ l2cap_chan_del(chan, ECONNREFUSED);
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+
+ done:
+ mutex_unlock(&conn->chan_lock);
+@@ -6842,6 +6934,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan,
+ struct l2cap_ctrl *control,
+ struct sk_buff *skb, u8 event)
+ {
++ struct l2cap_ctrl local_control;
+ int err = 0;
+ bool skb_in_use = false;
+
+@@ -6866,15 +6959,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan,
+ chan->buffer_seq = chan->expected_tx_seq;
+ skb_in_use = true;
+
++ /* l2cap_reassemble_sdu may free skb, hence invalidate
++ * control, so make a copy in advance to use it after
++ * l2cap_reassemble_sdu returns and to avoid the race
++ * condition, for example:
++ *
++ * The current thread calls:
++ * l2cap_reassemble_sdu
++ * chan->ops->recv == l2cap_sock_recv_cb
++ * __sock_queue_rcv_skb
++ * Another thread calls:
++ * bt_sock_recvmsg
++ * skb_recv_datagram
++ * skb_free_datagram
++ * Then the current thread tries to access control, but
++ * it was freed by skb_free_datagram.
++ */
++ local_control = *control;
+ err = l2cap_reassemble_sdu(chan, skb, control);
+ if (err)
+ break;
+
+- if (control->final) {
++ if (local_control.final) {
+ if (!test_and_clear_bit(CONN_REJ_ACT,
+ &chan->conn_state)) {
+- control->final = 0;
+- l2cap_retransmit_all(chan, control);
++ local_control.final = 0;
++ l2cap_retransmit_all(chan, &local_control);
+ l2cap_ertm_send(chan);
+ }
+ }
+@@ -7254,11 +7364,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
+ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
+ struct sk_buff *skb)
+ {
++ /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store
++ * the txseq field in advance to use it after l2cap_reassemble_sdu
++ * returns and to avoid the race condition, for example:
++ *
++ * The current thread calls:
++ * l2cap_reassemble_sdu
++ * chan->ops->recv == l2cap_sock_recv_cb
++ * __sock_queue_rcv_skb
++ * Another thread calls:
++ * bt_sock_recvmsg
++ * skb_recv_datagram
++ * skb_free_datagram
++ * Then the current thread tries to access control, but it was freed by
++ * skb_free_datagram.
++ */
++ u16 txseq = control->txseq;
++
+ BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb,
+ chan->rx_state);
+
+- if (l2cap_classify_txseq(chan, control->txseq) ==
+- L2CAP_TXSEQ_EXPECTED) {
++ if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) {
+ l2cap_pass_to_tx(chan, control);
+
+ BT_DBG("buffer_seq %u->%u", chan->buffer_seq,
+@@ -7281,8 +7407,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
+ }
+ }
+
+- chan->last_acked_seq = control->txseq;
+- chan->expected_tx_seq = __next_seq(chan, control->txseq);
++ chan->last_acked_seq = txseq;
++ chan->expected_tx_seq = __next_seq(chan, txseq);
+
+ return 0;
+ }
+@@ -7538,6 +7664,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid,
+ return;
+ }
+
++ l2cap_chan_hold(chan);
+ l2cap_chan_lock(chan);
+ } else {
+ BT_DBG("unknown cid 0x%4.4x", cid);
+@@ -7597,6 +7724,7 @@ drop:
+
+ done:
+ l2cap_chan_unlock(chan);
++ l2cap_chan_put(chan);
+ }
+
+ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
+@@ -8085,7 +8213,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
+ if (src_type != c->src_type)
+ continue;
+
+- l2cap_chan_hold(c);
++ c = l2cap_chan_hold_unless_zero(c);
+ read_unlock(&chan_list_lock);
+ return c;
+ }
+@@ -8382,9 +8510,8 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
+ * expected length.
+ */
+ if (skb->len < L2CAP_LEN_SIZE) {
+- if (l2cap_recv_frag(conn, skb, conn->mtu) < 0)
+- goto drop;
+- return;
++ l2cap_recv_frag(conn, skb, conn->mtu);
++ break;
+ }
+
+ len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE;
+@@ -8428,7 +8555,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
+
+ /* Header still could not be read just continue */
+ if (conn->rx_skb->len < L2CAP_LEN_SIZE)
+- return;
++ break;
+ }
+
+ if (skb->len > conn->rx_len) {
+diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
+index c99d65ef13b1e..756523e5402a8 100644
+--- a/net/bluetooth/l2cap_sock.c
++++ b/net/bluetooth/l2cap_sock.c
+@@ -45,6 +45,7 @@ static const struct proto_ops l2cap_sock_ops;
+ static void l2cap_sock_init(struct sock *sk, struct sock *parent);
+ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
+ int proto, gfp_t prio, int kern);
++static void l2cap_sock_cleanup_listen(struct sock *parent);
+
+ bool l2cap_is_socket(struct socket *sock)
+ {
+@@ -161,7 +162,11 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
+ break;
+ }
+
+- if (chan->psm && bdaddr_type_is_le(chan->src_type))
++ /* Use L2CAP_MODE_LE_FLOWCTL (CoC) in case of LE address and
++ * L2CAP_MODE_EXT_FLOWCTL (ECRED) has not been set.
++ */
++ if (chan->psm && bdaddr_type_is_le(chan->src_type) &&
++ chan->mode != L2CAP_MODE_EXT_FLOWCTL)
+ chan->mode = L2CAP_MODE_LE_FLOWCTL;
+
+ chan->state = BT_BOUND;
+@@ -172,6 +177,21 @@ done:
+ return err;
+ }
+
++static void l2cap_sock_init_pid(struct sock *sk)
++{
++ struct l2cap_chan *chan = l2cap_pi(sk)->chan;
++
++ /* Only L2CAP_MODE_EXT_FLOWCTL ever need to access the PID in order to
++ * group the channels being requested.
++ */
++ if (chan->mode != L2CAP_MODE_EXT_FLOWCTL)
++ return;
++
++ spin_lock(&sk->sk_peer_lock);
++ sk->sk_peer_pid = get_pid(task_tgid(current));
++ spin_unlock(&sk->sk_peer_lock);
++}
++
+ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
+ int alen, int flags)
+ {
+@@ -240,9 +260,15 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
+ return -EINVAL;
+ }
+
+- if (chan->psm && bdaddr_type_is_le(chan->src_type) && !chan->mode)
++ /* Use L2CAP_MODE_LE_FLOWCTL (CoC) in case of LE address and
++ * L2CAP_MODE_EXT_FLOWCTL (ECRED) has not been set.
++ */
++ if (chan->psm && bdaddr_type_is_le(chan->src_type) &&
++ chan->mode != L2CAP_MODE_EXT_FLOWCTL)
+ chan->mode = L2CAP_MODE_LE_FLOWCTL;
+
++ l2cap_sock_init_pid(sk);
++
+ err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid),
+ &la.l2_bdaddr, la.l2_bdaddr_type);
+ if (err)
+@@ -298,6 +324,8 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
+ goto done;
+ }
+
++ l2cap_sock_init_pid(sk);
++
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_ack_backlog = 0;
+
+@@ -876,6 +904,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
+ struct l2cap_conn *conn;
+ int len, err = 0;
+ u32 opt;
++ u16 mtu;
++ u8 mode;
+
+ BT_DBG("sk %p", sk);
+
+@@ -1058,16 +1088,16 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
+ break;
+ }
+
+- if (copy_from_sockptr(&opt, optval, sizeof(u16))) {
++ if (copy_from_sockptr(&mtu, optval, sizeof(u16))) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (chan->mode == L2CAP_MODE_EXT_FLOWCTL &&
+ sk->sk_state == BT_CONNECTED)
+- err = l2cap_chan_reconfigure(chan, opt);
++ err = l2cap_chan_reconfigure(chan, mtu);
+ else
+- chan->imtu = opt;
++ chan->imtu = mtu;
+
+ break;
+
+@@ -1089,14 +1119,14 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
+ break;
+ }
+
+- if (copy_from_sockptr(&opt, optval, sizeof(u8))) {
++ if (copy_from_sockptr(&mode, optval, sizeof(u8))) {
+ err = -EFAULT;
+ break;
+ }
+
+- BT_DBG("opt %u", opt);
++ BT_DBG("mode %u", mode);
+
+- err = l2cap_set_mode(chan, opt);
++ err = l2cap_set_mode(chan, mode);
+ if (err)
+ break;
+
+@@ -1385,6 +1415,7 @@ static int l2cap_sock_release(struct socket *sock)
+ if (!sk)
+ return 0;
+
++ l2cap_sock_cleanup_listen(sk);
+ bt_sock_unlink(&l2cap_sk_list, sk);
+
+ err = l2cap_sock_shutdown(sock, SHUT_RDWR);
+@@ -1508,6 +1539,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan)
+ {
+ struct sock *sk = chan->data;
+
++ if (!sk)
++ return;
++
+ l2cap_sock_kill(sk);
+ }
+
+@@ -1516,6 +1550,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
+ struct sock *sk = chan->data;
+ struct sock *parent;
+
++ if (!sk)
++ return;
++
+ BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
+
+ /* This callback can be called both for server (BT_LISTEN)
+@@ -1588,6 +1625,14 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
+ if (!skb)
+ return ERR_PTR(err);
+
++ /* Channel lock is released before requesting new skb and then
++ * reacquired thus we need to recheck channel state.
++ */
++ if (chan->state != BT_CONNECTED) {
++ kfree_skb(skb);
++ return ERR_PTR(-ENOTCONN);
++ }
++
+ skb->priority = sk->sk_priority;
+
+ bt_cb(skb)->l2cap.chan = chan;
+@@ -1707,8 +1752,10 @@ static void l2cap_sock_destruct(struct sock *sk)
+ {
+ BT_DBG("sk %p", sk);
+
+- if (l2cap_pi(sk)->chan)
++ if (l2cap_pi(sk)->chan) {
++ l2cap_pi(sk)->chan->data = NULL;
+ l2cap_chan_put(l2cap_pi(sk)->chan);
++ }
+
+ if (l2cap_pi(sk)->rx_busy_skb) {
+ kfree_skb(l2cap_pi(sk)->rx_busy_skb);
+diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
+index cea01e275f1ea..04000499f4a21 100644
+--- a/net/bluetooth/mgmt.c
++++ b/net/bluetooth/mgmt.c
+@@ -3806,7 +3806,7 @@ static const u8 rpa_resolution_uuid[16] = {
+ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 data_len)
+ {
+- char buf[62]; /* Enough space for 3 features */
++ char buf[62]; /* Enough space for 3 features */
+ struct mgmt_rp_read_exp_features_info *rp = (void *)buf;
+ u16 idx = 0;
+ u32 flags;
+@@ -3892,150 +3892,186 @@ static int exp_debug_feature_changed(bool enabled, struct sock *skip)
+ }
+ #endif
+
+-static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
+- void *data, u16 data_len)
++#define EXP_FEAT(_uuid, _set_func) \
++{ \
++ .uuid = _uuid, \
++ .set_func = _set_func, \
++}
++
++/* The zero key uuid is special. Multiple exp features are set through it. */
++static int set_zero_key_func(struct sock *sk, struct hci_dev *hdev,
++ struct mgmt_cp_set_exp_feature *cp, u16 data_len)
+ {
+- struct mgmt_cp_set_exp_feature *cp = data;
+ struct mgmt_rp_set_exp_feature rp;
+
+- bt_dev_dbg(hdev, "sock %p", sk);
+-
+- if (!memcmp(cp->uuid, ZERO_KEY, 16)) {
+- memset(rp.uuid, 0, 16);
+- rp.flags = cpu_to_le32(0);
++ memset(rp.uuid, 0, 16);
++ rp.flags = cpu_to_le32(0);
+
+ #ifdef CONFIG_BT_FEATURE_DEBUG
+- if (!hdev) {
+- bool changed = bt_dbg_get();
++ if (!hdev) {
++ bool changed = bt_dbg_get();
+
+- bt_dbg_set(false);
++ bt_dbg_set(false);
+
+- if (changed)
+- exp_debug_feature_changed(false, sk);
+- }
++ if (changed)
++ exp_debug_feature_changed(false, sk);
++ }
+ #endif
+
+- if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) {
+- bool changed = hci_dev_test_flag(hdev,
+- HCI_ENABLE_LL_PRIVACY);
+-
+- hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
++ if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) {
++ bool changed;
+
+- if (changed)
+- exp_ll_privacy_feature_changed(false, hdev, sk);
+- }
++ changed = hci_dev_test_and_clear_flag(hdev,
++ HCI_ENABLE_LL_PRIVACY);
++ if (changed)
++ exp_ll_privacy_feature_changed(false, hdev, sk);
++ }
+
+- hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
++ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+- return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
+- MGMT_OP_SET_EXP_FEATURE, 0,
+- &rp, sizeof(rp));
+- }
++ return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
++ MGMT_OP_SET_EXP_FEATURE, 0,
++ &rp, sizeof(rp));
++}
+
+ #ifdef CONFIG_BT_FEATURE_DEBUG
+- if (!memcmp(cp->uuid, debug_uuid, 16)) {
+- bool val, changed;
+- int err;
++static int set_debug_func(struct sock *sk, struct hci_dev *hdev,
++ struct mgmt_cp_set_exp_feature *cp, u16 data_len)
++{
++ struct mgmt_rp_set_exp_feature rp;
+
+- /* Command requires to use the non-controller index */
+- if (hdev)
+- return mgmt_cmd_status(sk, hdev->id,
+- MGMT_OP_SET_EXP_FEATURE,
+- MGMT_STATUS_INVALID_INDEX);
++ bool val, changed;
++ int err;
+
+- /* Parameters are limited to a single octet */
+- if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+- return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+- MGMT_OP_SET_EXP_FEATURE,
+- MGMT_STATUS_INVALID_PARAMS);
++ /* Command requires to use the non-controller index */
++ if (hdev)
++ return mgmt_cmd_status(sk, hdev->id,
++ MGMT_OP_SET_EXP_FEATURE,
++ MGMT_STATUS_INVALID_INDEX);
+
+- /* Only boolean on/off is supported */
+- if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+- return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+- MGMT_OP_SET_EXP_FEATURE,
+- MGMT_STATUS_INVALID_PARAMS);
++ /* Parameters are limited to a single octet */
++ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
++ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
++ MGMT_OP_SET_EXP_FEATURE,
++ MGMT_STATUS_INVALID_PARAMS);
+
+- val = !!cp->param[0];
+- changed = val ? !bt_dbg_get() : bt_dbg_get();
+- bt_dbg_set(val);
++ /* Only boolean on/off is supported */
++ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
++ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
++ MGMT_OP_SET_EXP_FEATURE,
++ MGMT_STATUS_INVALID_PARAMS);
+
+- memcpy(rp.uuid, debug_uuid, 16);
+- rp.flags = cpu_to_le32(val ? BIT(0) : 0);
++ val = !!cp->param[0];
++ changed = val ? !bt_dbg_get() : bt_dbg_get();
++ bt_dbg_set(val);
+
+- hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
++ memcpy(rp.uuid, debug_uuid, 16);
++ rp.flags = cpu_to_le32(val ? BIT(0) : 0);
+
+- err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
+- MGMT_OP_SET_EXP_FEATURE, 0,
+- &rp, sizeof(rp));
++ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+- if (changed)
+- exp_debug_feature_changed(val, sk);
++ err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
++ MGMT_OP_SET_EXP_FEATURE, 0,
++ &rp, sizeof(rp));
+
+- return err;
+- }
++ if (changed)
++ exp_debug_feature_changed(val, sk);
++
++ return err;
++}
+ #endif
+
+- if (!memcmp(cp->uuid, rpa_resolution_uuid, 16)) {
+- bool val, changed;
+- int err;
+- u32 flags;
++static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev,
++ struct mgmt_cp_set_exp_feature *cp,
++ u16 data_len)
++{
++ struct mgmt_rp_set_exp_feature rp;
++ bool val, changed;
++ int err;
++ u32 flags;
+
+- /* Command requires to use the controller index */
+- if (!hdev)
+- return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+- MGMT_OP_SET_EXP_FEATURE,
+- MGMT_STATUS_INVALID_INDEX);
++ /* Command requires to use the controller index */
++ if (!hdev)
++ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
++ MGMT_OP_SET_EXP_FEATURE,
++ MGMT_STATUS_INVALID_INDEX);
+
+- /* Changes can only be made when controller is powered down */
+- if (hdev_is_powered(hdev))
+- return mgmt_cmd_status(sk, hdev->id,
+- MGMT_OP_SET_EXP_FEATURE,
+- MGMT_STATUS_REJECTED);
++ /* Changes can only be made when controller is powered down */
++ if (hdev_is_powered(hdev))
++ return mgmt_cmd_status(sk, hdev->id,
++ MGMT_OP_SET_EXP_FEATURE,
++ MGMT_STATUS_REJECTED);
+
+- /* Parameters are limited to a single octet */
+- if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+- return mgmt_cmd_status(sk, hdev->id,
+- MGMT_OP_SET_EXP_FEATURE,
+- MGMT_STATUS_INVALID_PARAMS);
++ /* Parameters are limited to a single octet */
++ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
++ return mgmt_cmd_status(sk, hdev->id,
++ MGMT_OP_SET_EXP_FEATURE,
++ MGMT_STATUS_INVALID_PARAMS);
+
+- /* Only boolean on/off is supported */
+- if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+- return mgmt_cmd_status(sk, hdev->id,
+- MGMT_OP_SET_EXP_FEATURE,
+- MGMT_STATUS_INVALID_PARAMS);
++ /* Only boolean on/off is supported */
++ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
++ return mgmt_cmd_status(sk, hdev->id,
++ MGMT_OP_SET_EXP_FEATURE,
++ MGMT_STATUS_INVALID_PARAMS);
+
+- val = !!cp->param[0];
++ val = !!cp->param[0];
+
+- if (val) {
+- changed = !hci_dev_test_flag(hdev,
++ if (val) {
++ changed = !hci_dev_test_and_set_flag(hdev,
+ HCI_ENABLE_LL_PRIVACY);
+- hci_dev_set_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+- hci_dev_clear_flag(hdev, HCI_ADVERTISING);
++ hci_dev_clear_flag(hdev, HCI_ADVERTISING);
+
+- /* Enable LL privacy + supported settings changed */
+- flags = BIT(0) | BIT(1);
+- } else {
+- changed = hci_dev_test_flag(hdev,
+- HCI_ENABLE_LL_PRIVACY);
+- hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
++ /* Enable LL privacy + supported settings changed */
++ flags = BIT(0) | BIT(1);
++ } else {
++ changed = hci_dev_test_and_clear_flag(hdev,
++ HCI_ENABLE_LL_PRIVACY);
+
+- /* Disable LL privacy + supported settings changed */
+- flags = BIT(1);
+- }
++ /* Disable LL privacy + supported settings changed */
++ flags = BIT(1);
++ }
+
+- memcpy(rp.uuid, rpa_resolution_uuid, 16);
+- rp.flags = cpu_to_le32(flags);
++ memcpy(rp.uuid, rpa_resolution_uuid, 16);
++ rp.flags = cpu_to_le32(flags);
+
+- hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
++ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+- err = mgmt_cmd_complete(sk, hdev->id,
+- MGMT_OP_SET_EXP_FEATURE, 0,
+- &rp, sizeof(rp));
++ err = mgmt_cmd_complete(sk, hdev->id,
++ MGMT_OP_SET_EXP_FEATURE, 0,
++ &rp, sizeof(rp));
+
+- if (changed)
+- exp_ll_privacy_feature_changed(val, hdev, sk);
++ if (changed)
++ exp_ll_privacy_feature_changed(val, hdev, sk);
+
+- return err;
++ return err;
++}
++
++static const struct mgmt_exp_feature {
++ const u8 *uuid;
++ int (*set_func)(struct sock *sk, struct hci_dev *hdev,
++ struct mgmt_cp_set_exp_feature *cp, u16 data_len);
++} exp_features[] = {
++ EXP_FEAT(ZERO_KEY, set_zero_key_func),
++#ifdef CONFIG_BT_FEATURE_DEBUG
++ EXP_FEAT(debug_uuid, set_debug_func),
++#endif
++ EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func),
++
++ /* end with a null feature */
++ EXP_FEAT(NULL, NULL)
++};
++
++static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
++ void *data, u16 data_len)
++{
++ struct mgmt_cp_set_exp_feature *cp = data;
++ size_t i = 0;
++
++ bt_dev_dbg(hdev, "sock %p", sk);
++
++ for (i = 0; exp_features[i].uuid; i++) {
++ if (!memcmp(cp->uuid, exp_features[i].uuid, 16))
++ return exp_features[i].set_func(sk, hdev, cp, data_len);
+ }
+
+ return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
+@@ -7935,7 +7971,7 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev,
+ * extra parameters we don't know about will be ignored in this request.
+ */
+ if (data_len < MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE)
+- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
++ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ flags = __le32_to_cpu(cp->flags);
+diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
+index f2bacb464ccf3..8d6fce9005bdd 100644
+--- a/net/bluetooth/rfcomm/core.c
++++ b/net/bluetooth/rfcomm/core.c
+@@ -549,22 +549,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel)
+ return dlc;
+ }
+
++static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag)
++{
++ int len = frag->len;
++
++ BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
++
++ if (len > d->mtu)
++ return -EINVAL;
++
++ rfcomm_make_uih(frag, d->addr);
++ __skb_queue_tail(&d->tx_queue, frag);
++
++ return len;
++}
++
+ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
+ {
+- int len = skb->len;
++ unsigned long flags;
++ struct sk_buff *frag, *next;
++ int len;
+
+ if (d->state != BT_CONNECTED)
+ return -ENOTCONN;
+
+- BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
++ frag = skb_shinfo(skb)->frag_list;
++ skb_shinfo(skb)->frag_list = NULL;
+
+- if (len > d->mtu)
+- return -EINVAL;
++ /* Queue all fragments atomically. */
++ spin_lock_irqsave(&d->tx_queue.lock, flags);
+
+- rfcomm_make_uih(skb, d->addr);
+- skb_queue_tail(&d->tx_queue, skb);
++ len = rfcomm_dlc_send_frag(d, skb);
++ if (len < 0 || !frag)
++ goto unlock;
++
++ for (; frag; frag = next) {
++ int ret;
++
++ next = frag->next;
++
++ ret = rfcomm_dlc_send_frag(d, frag);
++ if (ret < 0) {
++ dev_kfree_skb_irq(frag);
++ goto unlock;
++ }
++
++ len += ret;
++ }
++
++unlock:
++ spin_unlock_irqrestore(&d->tx_queue.lock, flags);
+
+- if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
++ if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags))
+ rfcomm_schedule();
+ return len;
+ }
+diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
+index 2c95bb58f901a..4397e14ff560f 100644
+--- a/net/bluetooth/rfcomm/sock.c
++++ b/net/bluetooth/rfcomm/sock.c
+@@ -391,6 +391,7 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
+ addr->sa_family != AF_BLUETOOTH)
+ return -EINVAL;
+
++ sock_hold(sk);
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+@@ -410,14 +411,18 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
+ d->sec_level = rfcomm_pi(sk)->sec_level;
+ d->role_switch = rfcomm_pi(sk)->role_switch;
+
++ /* Drop sock lock to avoid potential deadlock with the RFCOMM lock */
++ release_sock(sk);
+ err = rfcomm_dlc_open(d, &rfcomm_pi(sk)->src, &sa->rc_bdaddr,
+ sa->rc_channel);
+- if (!err)
++ lock_sock(sk);
++ if (!err && !sock_flag(sk, SOCK_ZAPPED))
+ err = bt_sock_wait_state(sk, BT_CONNECTED,
+ sock_sndtimeo(sk, flags & O_NONBLOCK));
+
+ done:
+ release_sock(sk);
++ sock_put(sk);
+ return err;
+ }
+
+@@ -575,46 +580,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ lock_sock(sk);
+
+ sent = bt_sock_wait_ready(sk, msg->msg_flags);
+- if (sent)
+- goto done;
+-
+- while (len) {
+- size_t size = min_t(size_t, len, d->mtu);
+- int err;
+-
+- skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE,
+- msg->msg_flags & MSG_DONTWAIT, &err);
+- if (!skb) {
+- if (sent == 0)
+- sent = err;
+- break;
+- }
+- skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
+-
+- err = memcpy_from_msg(skb_put(skb, size), msg, size);
+- if (err) {
+- kfree_skb(skb);
+- if (sent == 0)
+- sent = err;
+- break;
+- }
+
+- skb->priority = sk->sk_priority;
++ release_sock(sk);
+
+- err = rfcomm_dlc_send(d, skb);
+- if (err < 0) {
+- kfree_skb(skb);
+- if (sent == 0)
+- sent = err;
+- break;
+- }
++ if (sent)
++ return sent;
+
+- sent += size;
+- len -= size;
+- }
++ skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE,
++ RFCOMM_SKB_TAIL_RESERVE);
++ if (IS_ERR(skb))
++ return PTR_ERR(skb);
+
+-done:
+- release_sock(sk);
++ sent = rfcomm_dlc_send(d, skb);
++ if (sent < 0)
++ kfree_skb(skb);
+
+ return sent;
+ }
+@@ -928,7 +907,10 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how)
+ lock_sock(sk);
+ if (!sk->sk_shutdown) {
+ sk->sk_shutdown = SHUTDOWN_MASK;
++
++ release_sock(sk);
+ __rfcomm_sock_close(sk);
++ lock_sock(sk);
+
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
+diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
+index 98a8815865128..9a8814d4565a0 100644
+--- a/net/bluetooth/sco.c
++++ b/net/bluetooth/sco.c
+@@ -133,6 +133,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
+ return NULL;
+
+ spin_lock_init(&conn->lock);
++ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
+
+ hcon->sco_data = conn;
+ conn->hcon = hcon;
+@@ -187,20 +188,21 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
+ /* Kill socket */
+ sco_conn_lock(conn);
+ sk = conn->sk;
++ if (sk)
++ sock_hold(sk);
+ sco_conn_unlock(conn);
+
+ if (sk) {
+- sock_hold(sk);
+ lock_sock(sk);
+ sco_sock_clear_timer(sk);
+ sco_chan_del(sk, err);
+ release_sock(sk);
+ sock_put(sk);
+-
+- /* Ensure no more work items will run before freeing conn. */
+- cancel_delayed_work_sync(&conn->timeout_work);
+ }
+
++ /* Ensure no more work items will run before freeing conn. */
++ cancel_delayed_work_sync(&conn->timeout_work);
++
+ hcon->sco_data = NULL;
+ kfree(conn);
+ }
+@@ -213,8 +215,6 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
+ sco_pi(sk)->conn = conn;
+ conn->sk = sk;
+
+- INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
+-
+ if (parent)
+ bt_accept_enqueue(parent, sk, true);
+ }
+@@ -280,11 +280,10 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk)
+ return err;
+ }
+
+-static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
++static int sco_send_frame(struct sock *sk, struct sk_buff *skb)
+ {
+ struct sco_conn *conn = sco_pi(sk)->conn;
+- struct sk_buff *skb;
+- int err;
++ int len = skb->len;
+
+ /* Check outgoing MTU */
+ if (len > conn->mtu)
+@@ -292,15 +291,6 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
+
+ BT_DBG("sk %p len %d", sk, len);
+
+- skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
+- if (!skb)
+- return err;
+-
+- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
+- kfree_skb(skb);
+- return -EFAULT;
+- }
+-
+ hci_send_sco(conn->hcon, skb);
+
+ return len;
+@@ -578,19 +568,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
+ addr->sa_family != AF_BLUETOOTH)
+ return -EINVAL;
+
+- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
+- return -EBADFD;
++ lock_sock(sk);
++ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
++ err = -EBADFD;
++ goto done;
++ }
+
+- if (sk->sk_type != SOCK_SEQPACKET)
+- return -EINVAL;
++ if (sk->sk_type != SOCK_SEQPACKET) {
++ err = -EINVAL;
++ goto done;
++ }
+
+ hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
+- if (!hdev)
+- return -EHOSTUNREACH;
++ if (!hdev) {
++ err = -EHOSTUNREACH;
++ goto done;
++ }
+ hci_dev_lock(hdev);
+
+- lock_sock(sk);
+-
+ /* Set destination address and psm */
+ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
+
+@@ -725,6 +720,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
+ {
+ struct sock *sk = sock->sk;
++ struct sk_buff *skb;
+ int err;
+
+ BT_DBG("sock %p, sk %p", sock, sk);
+@@ -736,14 +732,21 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ if (msg->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
++ skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0);
++ if (IS_ERR(skb))
++ return PTR_ERR(skb);
++
+ lock_sock(sk);
+
+ if (sk->sk_state == BT_CONNECTED)
+- err = sco_send_frame(sk, msg, len);
++ err = sco_send_frame(sk, skb);
+ else
+ err = -ENOTCONN;
+
+ release_sock(sk);
++
++ if (err < 0)
++ kfree_skb(skb);
+ return err;
+ }
+
+diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
+index b5f4ef35357c8..11d254ce3581c 100644
+--- a/net/bpf/test_run.c
++++ b/net/bpf/test_run.c
+@@ -259,6 +259,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
+ if (user_size > size)
+ return ERR_PTR(-EMSGSIZE);
+
++ size = SKB_DATA_ALIGN(size);
+ data = kzalloc(size + headroom + tailroom, GFP_USER);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+@@ -954,7 +955,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
+ if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
+ goto out;
+
+- if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
++ if (user_ctx->local_port > U16_MAX) {
+ ret = -ERANGE;
+ goto out;
+ }
+@@ -962,7 +963,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
+ ctx.family = (u16)user_ctx->family;
+ ctx.protocol = (u16)user_ctx->protocol;
+ ctx.dport = (u16)user_ctx->local_port;
+- ctx.sport = (__force __be16)user_ctx->remote_port;
++ ctx.sport = user_ctx->remote_port;
+
+ switch (ctx.family) {
+ case AF_INET:
+diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
+index ec646656dbf14..3d69ad5463a9f 100644
+--- a/net/bridge/br_forward.c
++++ b/net/bridge/br_forward.c
+@@ -42,7 +42,7 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
+ eth_type_vlan(skb->protocol)) {
+ int depth;
+
+- if (!__vlan_get_protocol(skb, skb->protocol, &depth))
++ if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth))
+ goto drop;
+
+ skb_set_network_header(skb, depth);
+diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
+index 4a02f8bb278a1..1f70441cbada8 100644
+--- a/net/bridge/br_if.c
++++ b/net/bridge/br_if.c
+@@ -157,8 +157,9 @@ void br_manage_promisc(struct net_bridge *br)
+ * This lets us disable promiscuous mode and write
+ * this config to hw.
+ */
+- if (br->auto_cnt == 0 ||
+- (br->auto_cnt == 1 && br_auto_port(p)))
++ if ((p->dev->priv_flags & IFF_UNICAST_FLT) &&
++ (br->auto_cnt == 0 ||
++ (br->auto_cnt == 1 && br_auto_port(p))))
+ br_port_clear_promisc(p);
+ else
+ br_port_set_promisc(p);
+diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
+index b50382f957c12..6743c8a0fe8e1 100644
+--- a/net/bridge/br_input.c
++++ b/net/bridge/br_input.c
+@@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
+ dev_sw_netstats_rx_add(brdev, skb->len);
+
+ vg = br_vlan_group_rcu(br);
++
++ /* Reset the offload_fwd_mark because there could be a stacked
++ * bridge above, and it should not think this bridge it doing
++ * that bridge's work forwarding out its ports.
++ */
++ br_switchdev_frame_unmark(skb);
++
+ /* Bridge is just like any other port. Make sure the
+ * packet is allowed except in promisc mode when someone
+ * may be running packet capture.
+diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
+index 793b0db9d9a36..9922497e59f8c 100644
+--- a/net/bridge/br_ioctl.c
++++ b/net/bridge/br_ioctl.c
+@@ -71,7 +71,8 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf,
+
+ num = br_fdb_fillbuf(br, buf, maxnum, offset);
+ if (num > 0) {
+- if (copy_to_user(userbuf, buf, num*sizeof(struct __fdb_entry)))
++ if (copy_to_user(userbuf, buf,
++ array_size(num, sizeof(struct __fdb_entry))))
+ num = -EFAULT;
+ }
+ kfree(buf);
+@@ -188,7 +189,7 @@ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user
+ return -ENOMEM;
+
+ get_port_ifindices(br, indices, num);
+- if (copy_to_user(argp, indices, num * sizeof(int)))
++ if (copy_to_user(argp, indices, array_size(num, sizeof(int))))
+ num = -EFAULT;
+ kfree(indices);
+ return num;
+@@ -336,7 +337,8 @@ static int old_deviceless(struct net *net, void __user *uarg)
+
+ args[2] = get_bridge_ifindices(net, indices, args[2]);
+
+- ret = copy_to_user(uarg, indices, args[2]*sizeof(int))
++ ret = copy_to_user((void __user *)args[1], indices,
++ array_size(args[2], sizeof(int)))
+ ? -EFAULT : args[2];
+
+ kfree(indices);
+diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
+index f3d751105343c..db4f2641d1cd1 100644
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -82,6 +82,9 @@ static void br_multicast_find_del_pg(struct net_bridge *br,
+ struct net_bridge_port_group *pg);
+ static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
+
++static int br_mc_disabled_update(struct net_device *dev, bool value,
++ struct netlink_ext_ack *extack);
++
+ static struct net_bridge_port_group *
+ br_sg_port_find(struct net_bridge *br,
+ struct net_bridge_port_group_sg_key *sg_p)
+@@ -1156,6 +1159,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
+ return mp;
+
+ if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
++ br_mc_disabled_update(br->dev, false, NULL);
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
+ return ERR_PTR(-E2BIG);
+ }
+@@ -4522,6 +4526,38 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+ }
+ #endif
+
++void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
++ unsigned long val)
++{
++ unsigned long intvl_jiffies = clock_t_to_jiffies(val);
++
++ if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) {
++ br_info(brmctx->br,
++ "trying to set multicast query interval below minimum, setting to %lu (%ums)\n",
++ jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN),
++ jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN));
++ intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN;
++ }
++
++ brmctx->multicast_query_interval = intvl_jiffies;
++}
++
++void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx,
++ unsigned long val)
++{
++ unsigned long intvl_jiffies = clock_t_to_jiffies(val);
++
++ if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) {
++ br_info(brmctx->br,
++ "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n",
++ jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN),
++ jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN));
++ intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN;
++ }
++
++ brmctx->multicast_startup_query_interval = intvl_jiffies;
++}
++
+ /**
+ * br_multicast_list_adjacent - Returns snooped multicast addresses
+ * @dev: The bridge port adjacent to which to retrieve addresses
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index 8edfb98ae1d58..f14beb9a62edb 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -384,6 +384,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
+ /* - Bridged-and-DNAT'ed traffic doesn't
+ * require ip_forwarding. */
+ if (rt->dst.dev == dev) {
++ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
+ goto bridged_dnat;
+ }
+@@ -413,6 +414,7 @@ bridged_dnat:
+ kfree_skb(skb);
+ return 0;
+ }
++ skb_dst_drop(skb);
+ skb_dst_set_noref(skb, &rt->dst);
+ }
+
+@@ -743,6 +745,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
+ if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+ mtu = nf_bridge->frag_max_size;
+
++ nf_bridge_update_protocol(skb);
++ nf_bridge_push_encap_header(skb);
++
+ if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
+ nf_bridge_info_free(skb);
+ return br_dev_queue_push_xmit(net, sk, skb);
+@@ -760,8 +765,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
+
+ IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
+
+- nf_bridge_update_protocol(skb);
+-
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+
+ if (skb_vlan_tag_present(skb)) {
+@@ -789,8 +792,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
+
+ IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
+
+- nf_bridge_update_protocol(skb);
+-
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ data->encap_size = nf_bridge_encap_header_len(skb);
+ data->size = ETH_HLEN + data->encap_size;
+@@ -867,11 +868,17 @@ static unsigned int ip_sabotage_in(void *priv,
+ {
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+- if (nf_bridge && !nf_bridge->in_prerouting &&
+- !netif_is_l3_master(skb->dev) &&
+- !netif_is_l3_slave(skb->dev)) {
+- state->okfn(state->net, state->sk, skb);
+- return NF_STOLEN;
++ if (nf_bridge) {
++ if (nf_bridge->sabotage_in_done)
++ return NF_ACCEPT;
++
++ if (!nf_bridge->in_prerouting &&
++ !netif_is_l3_master(skb->dev) &&
++ !netif_is_l3_slave(skb->dev)) {
++ nf_bridge->sabotage_in_done = 1;
++ state->okfn(state->net, state->sk, skb);
++ return NF_STOLEN;
++ }
+ }
+
+ return NF_ACCEPT;
+@@ -1013,9 +1020,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
+ return okfn(net, sk, skb);
+
+ ops = nf_hook_entries_get_hook_ops(e);
+- for (i = 0; i < e->num_hook_entries &&
+- ops[i]->priority <= NF_BR_PRI_BRNF; i++)
+- ;
++ for (i = 0; i < e->num_hook_entries; i++) {
++ /* These hooks have already been called */
++ if (ops[i]->priority < NF_BR_PRI_BRNF)
++ continue;
++
++ /* These hooks have not been called yet, run them. */
++ if (ops[i]->priority > NF_BR_PRI_BRNF)
++ break;
++
++ /* take a closer look at NF_BR_PRI_BRNF. */
++ if (ops[i]->hook == br_nf_pre_routing) {
++ /* This hook diverted the skb to this function,
++ * hooks after this have not been run yet.
++ */
++ i++;
++ break;
++ }
++ }
+
+ nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
+ sk, net, okfn);
+diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
+index e4e0c836c3f51..6b07f30675bb0 100644
+--- a/net/bridge/br_netfilter_ipv6.c
++++ b/net/bridge/br_netfilter_ipv6.c
+@@ -197,6 +197,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
+ kfree_skb(skb);
+ return 0;
+ }
++ skb_dst_drop(skb);
+ skb_dst_set_noref(skb, &rt->dst);
+ }
+
+diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
+index 5c6c4305ed235..e365cf82f0615 100644
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
+ if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
+
+- br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
++ br_multicast_set_query_intvl(&br->multicast_ctx, val);
+ }
+
+ if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
+@@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
+ if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
+
+- br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
++ br_multicast_set_startup_query_intvl(&br->multicast_ctx, val);
+ }
+
+ if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index 37ca76406f1e8..bd218c2b2cd97 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -28,6 +28,8 @@
+ #define BR_MAX_PORTS (1<<BR_PORT_BITS)
+
+ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096
++#define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000)
++#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN
+
+ #define BR_HWDOM_MAX BITS_PER_LONG
+
+@@ -968,6 +970,10 @@ int br_multicast_dump_querier_state(struct sk_buff *skb,
+ int nest_attr);
+ size_t br_multicast_querier_state_size(void);
+ size_t br_rports_size(const struct net_bridge_mcast *brmctx);
++void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
++ unsigned long val);
++void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx,
++ unsigned long val);
+
+ static inline bool br_group_is_l2(const struct br_ip *group)
+ {
+@@ -1152,9 +1158,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx)
+ static inline bool
+ br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx)
+ {
+- return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+- br_multicast_ctx_is_vlan(brmctx) &&
+- !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED);
++ return br_multicast_ctx_is_vlan(brmctx) &&
++ (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) ||
++ !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED));
+ }
+
+ static inline bool
+@@ -1911,11 +1917,13 @@ static inline int br_cfm_status_fill_info(struct sk_buff *skb,
+
+ static inline int br_cfm_mep_count(struct net_bridge *br, u32 *count)
+ {
++ *count = 0;
+ return -EOPNOTSUPP;
+ }
+
+ static inline int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count)
+ {
++ *count = 0;
+ return -EOPNOTSUPP;
+ }
+ #endif
+diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
+index 2b053289f0166..efb096025151a 100644
+--- a/net/bridge/br_private_tunnel.h
++++ b/net/bridge/br_private_tunnel.h
+@@ -27,6 +27,10 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br,
+ int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg);
+ int br_fill_vlan_tunnel_info(struct sk_buff *skb,
+ struct net_bridge_vlan_group *vg);
++bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr,
++ const struct net_bridge_vlan *v_last);
++int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd,
++ u16 vid, u32 tun_id, bool *changed);
+
+ #ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ /* br_vlan_tunnel.c */
+@@ -43,10 +47,6 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
+ struct net_bridge_vlan_group *vg);
+ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
+ struct net_bridge_vlan *vlan);
+-bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr,
+- const struct net_bridge_vlan *v_last);
+-int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd,
+- u16 vid, u32 tun_id, bool *changed);
+ #else
+ static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg)
+ {
+diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
+index ba55851fe132c..3326dfced68ab 100644
+--- a/net/bridge/br_stp_if.c
++++ b/net/bridge/br_stp_if.c
+@@ -201,6 +201,9 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
+ {
+ ASSERT_RTNL();
+
++ if (!net_eq(dev_net(br->dev), &init_net))
++ NL_SET_ERR_MSG_MOD(extack, "STP does not work in non-root netns");
++
+ if (br_mrp_enabled(br)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "STP can't be enabled if MRP is already enabled");
+diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
+index d9a89ddd03310..7b0c19772111c 100644
+--- a/net/bridge/br_sysfs_br.c
++++ b/net/bridge/br_sysfs_br.c
+@@ -658,7 +658,7 @@ static ssize_t multicast_query_interval_show(struct device *d,
+ static int set_query_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+ {
+- br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
++ br_multicast_set_query_intvl(&br->multicast_ctx, val);
+ return 0;
+ }
+
+@@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show(
+ static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+ {
+- br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
++ br_multicast_set_startup_query_intvl(&br->multicast_ctx, val);
+ return 0;
+ }
+
+diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
+index 19f65ab91a027..86441ff78a0f8 100644
+--- a/net/bridge/br_vlan.c
++++ b/net/bridge/br_vlan.c
+@@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br,
+ !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
+ if (*state == BR_STATE_FORWARDING) {
+ *state = br_vlan_get_pvid_state(vg);
+- return br_vlan_state_allowed(*state, true);
+- } else {
+- return true;
++ if (!br_vlan_state_allowed(*state, true))
++ goto drop;
+ }
++ return true;
+ }
+ }
+ v = br_vlan_find(vg, *vid);
+@@ -904,6 +904,8 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto,
+ list_for_each_entry(p, &br->port_list, list) {
+ vg = nbp_vlan_group(p);
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
++ continue;
+ err = vlan_vid_add(p->dev, proto, vlan->vid);
+ if (err)
+ goto err_filt;
+@@ -918,8 +920,11 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto,
+ /* Delete VLANs for the old proto from the device filter. */
+ list_for_each_entry(p, &br->port_list, list) {
+ vg = nbp_vlan_group(p);
+- list_for_each_entry(vlan, &vg->vlan_list, vlist)
++ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
++ continue;
+ vlan_vid_del(p->dev, oldproto, vlan->vid);
++ }
+ }
+
+ return 0;
+@@ -928,13 +933,19 @@ err_filt:
+ attr.u.vlan_protocol = ntohs(oldproto);
+ switchdev_port_attr_set(br->dev, &attr, NULL);
+
+- list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist)
++ list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) {
++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
++ continue;
+ vlan_vid_del(p->dev, proto, vlan->vid);
++ }
+
+ list_for_each_entry_continue_reverse(p, &br->port_list, list) {
+ vg = nbp_vlan_group(p);
+- list_for_each_entry(vlan, &vg->vlan_list, vlist)
++ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
++ continue;
+ vlan_vid_del(p->dev, proto, vlan->vid);
++ }
+ }
+
+ return err;
+@@ -2105,7 +2116,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ goto out_err;
+ }
+ err = br_vlan_dump_dev(dev, skb, cb, dump_flags);
+- if (err && err != -EMSGSIZE)
++ /* if the dump completed without an error we return 0 here */
++ if (err != -EMSGSIZE)
+ goto out_err;
+ } else {
+ for_each_netdev_rcu(net, dev) {
+diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
+index 8ffd4ed2563c6..a6382973b3e70 100644
+--- a/net/bridge/br_vlan_options.c
++++ b/net/bridge/br_vlan_options.c
+@@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]);
+- v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
++ br_multicast_set_query_intvl(&v->br_mcast_ctx, val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) {
+@@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]);
+- v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
++ br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) {
+diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
+index a7af4eaff17d3..3d4ea774d7e8f 100644
+--- a/net/bridge/netfilter/ebtable_broute.c
++++ b/net/bridge/netfilter/ebtable_broute.c
+@@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = {
+ .entries = (char *)&initial_chain,
+ };
+
+-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+-{
+- if (valid_hooks & ~(1 << NF_BR_BROUTING))
+- return -EINVAL;
+- return 0;
+-}
+-
+ static const struct ebt_table broute_table = {
+ .name = "broute",
+ .table = &initial_table,
+ .valid_hooks = 1 << NF_BR_BROUTING,
+- .check = check,
+ .me = THIS_MODULE,
+ };
+
+diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
+index c0b121df4a9af..257d63b5dec16 100644
+--- a/net/bridge/netfilter/ebtable_filter.c
++++ b/net/bridge/netfilter/ebtable_filter.c
+@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
+ .entries = (char *)initial_chains,
+ };
+
+-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+-{
+- if (valid_hooks & ~FILTER_VALID_HOOKS)
+- return -EINVAL;
+- return 0;
+-}
+-
+ static const struct ebt_table frame_filter = {
+ .name = "filter",
+ .table = &initial_table,
+ .valid_hooks = FILTER_VALID_HOOKS,
+- .check = check,
+ .me = THIS_MODULE,
+ };
+
+diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
+index 4078151c224fb..39179c2cf87d2 100644
+--- a/net/bridge/netfilter/ebtable_nat.c
++++ b/net/bridge/netfilter/ebtable_nat.c
+@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
+ .entries = (char *)initial_chains,
+ };
+
+-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+-{
+- if (valid_hooks & ~NAT_VALID_HOOKS)
+- return -EINVAL;
+- return 0;
+-}
+-
+ static const struct ebt_table frame_nat = {
+ .name = "nat",
+ .table = &initial_table,
+ .valid_hooks = NAT_VALID_HOOKS,
+- .check = check,
+ .me = THIS_MODULE,
+ };
+
+diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
+index ba045f35114dd..a09b2fc11c80e 100644
+--- a/net/bridge/netfilter/ebtables.c
++++ b/net/bridge/netfilter/ebtables.c
+@@ -1040,9 +1040,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
+ goto free_iterate;
+ }
+
+- /* the table doesn't like it */
+- if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
++ if (repl->valid_hooks != t->valid_hooks) {
++ ret = -EINVAL;
+ goto free_unlock;
++ }
+
+ if (repl->num_counters && repl->num_counters != t->private->nentries) {
+ ret = -EINVAL;
+@@ -1089,7 +1090,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
+
+ audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
+ AUDIT_XT_OP_REPLACE, GFP_KERNEL);
+- return ret;
++ return 0;
+
+ free_unlock:
+ mutex_unlock(&ebt_mutex);
+@@ -1231,11 +1232,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
+ if (ret != 0)
+ goto free_chainstack;
+
+- if (table->check && table->check(newinfo, table->valid_hooks)) {
+- ret = -EINVAL;
+- goto free_chainstack;
+- }
+-
+ table->private = newinfo;
+ rwlock_init(&table->lock);
+ mutex_lock(&ebt_mutex);
+diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
+index eba0efe64d05a..fbf858ddec352 100644
+--- a/net/bridge/netfilter/nft_reject_bridge.c
++++ b/net/bridge/netfilter/nft_reject_bridge.c
+@@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
+ {
+ struct sk_buff *nskb;
+
+- nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook);
++ nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook);
+ if (!nskb)
+ return;
+
+@@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
+ {
+ struct sk_buff *nskb;
+
+- nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code);
++ nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code);
+ if (!nskb)
+ return;
+
+@@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
+ {
+ struct sk_buff *nskb;
+
+- nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook);
++ nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook);
+ if (!nskb)
+ return;
+
+@@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
+ {
+ struct sk_buff *nskb;
+
+- nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code);
++ nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code);
+ if (!nskb)
+ return;
+
+diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
+index e12fd3cad6194..997c4ebdce6f6 100644
+--- a/net/caif/caif_socket.c
++++ b/net/caif/caif_socket.c
+@@ -1020,6 +1020,7 @@ static void caif_sock_destructor(struct sock *sk)
+ return;
+ }
+ sk_stream_kill_queues(&cf_sk->sk);
++ WARN_ON(sk->sk_forward_alloc);
+ caif_free_client(&cf_sk->layer);
+ }
+
+diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
+index b02e1292f7f19..24488a4e2d26e 100644
+--- a/net/caif/caif_usb.c
++++ b/net/caif/caif_usb.c
+@@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
+ struct usb_device *usbdev;
+ int res;
+
++ if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED)
++ return 0;
++
+ /* Check whether we have a NCM device, and find its VID/PID. */
+ if (!(dev->dev.parent && dev->dev.parent->driver &&
+ strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0))
+diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
+index 2809cbd6b7f74..d8cb4b2a076b4 100644
+--- a/net/caif/cfctrl.c
++++ b/net/caif/cfctrl.c
+@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
+ default:
+ pr_warn("Request setup of bad link type = %d\n",
+ param->linktype);
++ cfpkt_destroy(pkt);
+ return -EINVAL;
+ }
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+- if (!req)
++ if (!req) {
++ cfpkt_destroy(pkt);
+ return -ENOMEM;
++ }
++
+ req->client_layer = user_layer;
+ req->cmd = CFCTRL_CMD_LINK_SETUP;
+ req->param = *param;
+diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
+index 414dc5671c45e..2de6b44deb2c4 100644
+--- a/net/caif/chnl_net.c
++++ b/net/caif/chnl_net.c
+@@ -310,9 +310,6 @@ static int chnl_net_open(struct net_device *dev)
+
+ if (result == 0) {
+ pr_debug("connect timeout\n");
+- caif_disconnect_client(dev_net(dev), &priv->chnl);
+- priv->state = CAIF_DISCONNECTED;
+- pr_debug("state disconnected\n");
+ result = -ETIMEDOUT;
+ goto error;
+ }
+diff --git a/net/can/af_can.c b/net/can/af_can.c
+index cce2af10eb3ea..20d2dcb7c97ae 100644
+--- a/net/can/af_can.c
++++ b/net/can/af_can.c
+@@ -451,7 +451,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
+
+ /* insert new receiver (dev,canid,mask) -> (func,data) */
+
+- if (dev && dev->type != ARPHRD_CAN)
++ if (dev && (dev->type != ARPHRD_CAN || !can_get_ml_priv(dev)))
+ return -ENODEV;
+
+ if (dev && !net_eq(net, dev_net(dev)))
+@@ -680,7 +680,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
+ {
+ struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+
+- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) {
++ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CAN_MTU)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
+ dev->type, skb->len);
+ goto free_skb;
+@@ -706,7 +706,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
+ {
+ struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+
+- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) {
++ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CANFD_MTU)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
+ dev->type, skb->len);
+ goto free_skb;
+diff --git a/net/can/bcm.c b/net/can/bcm.c
+index 508f67de0b801..a2fd68d1149b1 100644
+--- a/net/can/bcm.c
++++ b/net/can/bcm.c
+@@ -100,6 +100,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
+
+ struct bcm_op {
+ struct list_head list;
++ struct rcu_head rcu;
+ int ifindex;
+ canid_t can_id;
+ u32 flags;
+@@ -273,6 +274,7 @@ static void bcm_can_tx(struct bcm_op *op)
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
++ int err;
+
+ /* no target device? => exit */
+ if (!op->ifindex)
+@@ -297,11 +299,11 @@ static void bcm_can_tx(struct bcm_op *op)
+ /* send with loopback */
+ skb->dev = dev;
+ can_skb_set_owner(skb, op->sk);
+- can_send(skb, 1);
++ err = can_send(skb, 1);
++ if (!err)
++ op->frames_abs++;
+
+- /* update statistics */
+ op->currframe++;
+- op->frames_abs++;
+
+ /* reached last frame? */
+ if (op->currframe >= op->nframes)
+@@ -718,10 +720,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
+ return NULL;
+ }
+
+-static void bcm_remove_op(struct bcm_op *op)
++static void bcm_free_op_rcu(struct rcu_head *rcu_head)
+ {
+- hrtimer_cancel(&op->timer);
+- hrtimer_cancel(&op->thrtimer);
++ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
+
+ if ((op->frames) && (op->frames != &op->sframe))
+ kfree(op->frames);
+@@ -732,6 +733,14 @@ static void bcm_remove_op(struct bcm_op *op)
+ kfree(op);
+ }
+
++static void bcm_remove_op(struct bcm_op *op)
++{
++ hrtimer_cancel(&op->timer);
++ hrtimer_cancel(&op->thrtimer);
++
++ call_rcu(&op->rcu, bcm_free_op_rcu);
++}
++
+ static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
+ {
+ if (op->rx_reg_dev == dev) {
+@@ -757,6 +766,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
+ if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
+ (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
+
++ /* disable automatic timer on frame reception */
++ op->flags |= RX_NO_AUTOTIMER;
++
+ /*
+ * Don't care if we're bound or not (due to netdev
+ * problems) can_rx_unregister() is always a save
+@@ -785,7 +797,6 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
+ bcm_rx_handler, op);
+
+ list_del(&op->list);
+- synchronize_rcu();
+ bcm_remove_op(op);
+ return 1; /* done */
+ }
+@@ -925,6 +936,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+
+ cf = op->frames + op->cfsiz * i;
+ err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
++ if (err < 0)
++ goto free_op;
+
+ if (op->flags & CAN_FD_FRAME) {
+ if (cf->len > 64)
+@@ -934,12 +947,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+ err = -EINVAL;
+ }
+
+- if (err < 0) {
+- if (op->frames != &op->sframe)
+- kfree(op->frames);
+- kfree(op);
+- return err;
+- }
++ if (err < 0)
++ goto free_op;
+
+ if (msg_head->flags & TX_CP_CAN_ID) {
+ /* copy can_id into frame */
+@@ -1010,6 +1019,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+ bcm_tx_start_timer(op);
+
+ return msg_head->nframes * op->cfsiz + MHSIZ;
++
++free_op:
++ if (op->frames != &op->sframe)
++ kfree(op->frames);
++ kfree(op);
++ return err;
+ }
+
+ /*
+@@ -1506,6 +1521,12 @@ static int bcm_release(struct socket *sock)
+
+ lock_sock(sk);
+
++#if IS_ENABLED(CONFIG_PROC_FS)
++ /* remove procfs entry */
++ if (net->can.bcmproc_dir && bo->bcm_proc_read)
++ remove_proc_entry(bo->procname, net->can.bcmproc_dir);
++#endif /* CONFIG_PROC_FS */
++
+ list_for_each_entry_safe(op, next, &bo->tx_ops, list)
+ bcm_remove_op(op);
+
+@@ -1541,12 +1562,6 @@ static int bcm_release(struct socket *sock)
+ list_for_each_entry_safe(op, next, &bo->rx_ops, list)
+ bcm_remove_op(op);
+
+-#if IS_ENABLED(CONFIG_PROC_FS)
+- /* remove procfs entry */
+- if (net->can.bcmproc_dir && bo->bcm_proc_read)
+- remove_proc_entry(bo->procname, net->can.bcmproc_dir);
+-#endif /* CONFIG_PROC_FS */
+-
+ /* remove device reference */
+ if (bo->bound) {
+ bo->bound = 0;
+diff --git a/net/can/isotp.c b/net/can/isotp.c
+index df6968b28bf41..4dccf7b4b88d1 100644
+--- a/net/can/isotp.c
++++ b/net/can/isotp.c
+@@ -56,6 +56,7 @@
+ #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
++#include <linux/spinlock.h>
+ #include <linux/hrtimer.h>
+ #include <linux/wait.h>
+ #include <linux/uio.h>
+@@ -119,8 +120,8 @@ enum {
+ };
+
+ struct tpcon {
+- int idx;
+- int len;
++ unsigned int idx;
++ unsigned int len;
+ u32 state;
+ u8 bs;
+ u8 sn;
+@@ -140,11 +141,13 @@ struct isotp_sock {
+ struct can_isotp_options opt;
+ struct can_isotp_fc_options rxfc, txfc;
+ struct can_isotp_ll_options ll;
++ u32 frame_txtime;
+ u32 force_tx_stmin;
+ u32 force_rx_stmin;
+ struct tpcon rx, tx;
+ struct list_head notifier;
+ wait_queue_head_t wait;
++ spinlock_t rx_lock; /* protect single thread state machine */
+ };
+
+ static LIST_HEAD(isotp_notifier_list);
+@@ -358,7 +361,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
+
+ so->tx_gap = ktime_set(0, 0);
+ /* add transmission time for CAN frame N_As */
+- so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
++ so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime);
+ /* add waiting time for consecutive frames N_Cs */
+ if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
+ so->tx_gap = ktime_add_ns(so->tx_gap,
+@@ -615,11 +618,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
+
+ n_pci_type = cf->data[ae] & 0xF0;
+
++ /* Make sure the state changes and data structures stay consistent at
++ * CAN frame reception time. This locking is not needed in real world
++ * use cases but the inconsistency can be triggered with syzkaller.
++ */
++ spin_lock(&so->rx_lock);
++
+ if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
+ /* check rx/tx path half duplex expectations */
+ if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
+ (so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
+- return;
++ goto out_unlock;
+ }
+
+ switch (n_pci_type) {
+@@ -668,6 +677,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
+ isotp_rcv_cf(sk, cf, ae, skb);
+ break;
+ }
++
++out_unlock:
++ spin_unlock(&so->rx_lock);
+ }
+
+ static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
+@@ -854,6 +866,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ struct canfd_frame *cf;
+ int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+ int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
++ s64 hrtimer_sec = 0;
+ int off;
+ int err;
+
+@@ -876,7 +889,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+
+ if (!size || size > MAX_MSG_LENGTH) {
+ err = -EINVAL;
+- goto err_out;
++ goto err_out_drop;
+ }
+
+ /* take care of a potential SF_DL ESC offset for TX_DL > 8 */
+@@ -886,24 +899,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
+ (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
+ err = -EINVAL;
+- goto err_out;
++ goto err_out_drop;
+ }
+
+ err = memcpy_from_msg(so->tx.buf, msg, size);
+ if (err < 0)
+- goto err_out;
++ goto err_out_drop;
+
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+ if (!dev) {
+ err = -ENXIO;
+- goto err_out;
++ goto err_out_drop;
+ }
+
+ skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
+ msg->msg_flags & MSG_DONTWAIT, &err);
+ if (!skb) {
+ dev_put(dev);
+- goto err_out;
++ goto err_out_drop;
+ }
+
+ can_skb_reserve(skb);
+@@ -952,7 +965,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ isotp_create_fframe(cf, so, ae);
+
+ /* start timeout for FC */
+- hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
++ hrtimer_sec = 1;
++ hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
++ HRTIMER_MODE_REL_SOFT);
+ }
+
+ /* send the first or only CAN frame */
+@@ -965,19 +980,28 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ if (err) {
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(err));
+- goto err_out;
++
++ /* no transmission -> no timeout monitoring */
++ if (hrtimer_sec)
++ hrtimer_cancel(&so->txtimer);
++
++ goto err_out_drop;
+ }
+
+ if (wait_tx_done) {
+ /* wait for complete transmission of current pdu */
+ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+
+- if (sk->sk_err)
+- return -sk->sk_err;
++ err = sock_error(sk);
++ if (err)
++ return err;
+ }
+
+ return size;
+
++err_out_drop:
++ /* drop this PDU and unlock a potential wait queue */
++ old_state = ISOTP_IDLE;
+ err_out:
+ so->tx.state = old_state;
+ if (so->tx.state == ISOTP_IDLE)
+@@ -991,26 +1015,29 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ {
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+- int err = 0;
+- int noblock;
++ struct isotp_sock *so = isotp_sk(sk);
++ int noblock = flags & MSG_DONTWAIT;
++ int ret = 0;
+
+- noblock = flags & MSG_DONTWAIT;
+- flags &= ~MSG_DONTWAIT;
++ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK | MSG_CMSG_COMPAT))
++ return -EINVAL;
++
++ if (!so->bound)
++ return -EADDRNOTAVAIL;
+
+- skb = skb_recv_datagram(sk, flags, noblock, &err);
++ flags &= ~MSG_DONTWAIT;
++ skb = skb_recv_datagram(sk, flags, noblock, &ret);
+ if (!skb)
+- return err;
++ return ret;
+
+ if (size < skb->len)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ size = skb->len;
+
+- err = memcpy_to_msg(msg, skb->data, size);
+- if (err < 0) {
+- skb_free_datagram(sk, skb);
+- return err;
+- }
++ ret = memcpy_to_msg(msg, skb->data, size);
++ if (ret < 0)
++ goto out_err;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+@@ -1020,9 +1047,13 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+ }
+
++ /* set length of return value */
++ ret = (flags & MSG_TRUNC) ? skb->len : size;
++
++out_err:
+ skb_free_datagram(sk, skb);
+
+- return size;
++ return ret;
+ }
+
+ static int isotp_release(struct socket *sock)
+@@ -1090,6 +1121,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+ struct net *net = sock_net(sk);
+ int ifindex;
+ struct net_device *dev;
++ canid_t tx_id, rx_id;
+ int err = 0;
+ int notify_enetdown = 0;
+ int do_rx_reg = 1;
+@@ -1097,35 +1129,38 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+ if (len < ISOTP_MIN_NAMELEN)
+ return -EINVAL;
+
+- if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
+- return -EADDRNOTAVAIL;
++ /* sanitize tx/rx CAN identifiers */
++ tx_id = addr->can_addr.tp.tx_id;
++ if (tx_id & CAN_EFF_FLAG)
++ tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
++ else
++ tx_id &= CAN_SFF_MASK;
++
++ rx_id = addr->can_addr.tp.rx_id;
++ if (rx_id & CAN_EFF_FLAG)
++ rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
++ else
++ rx_id &= CAN_SFF_MASK;
+
+ if (!addr->can_ifindex)
+ return -ENODEV;
+
+ lock_sock(sk);
+
++ if (so->bound) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ /* do not register frame reception for functional addressing */
+ if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
+ do_rx_reg = 0;
+
+ /* do not validate rx address for functional addressing */
+- if (do_rx_reg) {
+- if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) {
+- err = -EADDRNOTAVAIL;
+- goto out;
+- }
+-
+- if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) {
+- err = -EADDRNOTAVAIL;
+- goto out;
+- }
+- }
+-
+- if (so->bound && addr->can_ifindex == so->ifindex &&
+- addr->can_addr.tp.rx_id == so->rxid &&
+- addr->can_addr.tp.tx_id == so->txid)
++ if (do_rx_reg && rx_id == tx_id) {
++ err = -EADDRNOTAVAIL;
+ goto out;
++ }
+
+ dev = dev_get_by_index(net, addr->can_ifindex);
+ if (!dev) {
+@@ -1148,29 +1183,15 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+ ifindex = dev->ifindex;
+
+ if (do_rx_reg)
+- can_rx_register(net, dev, addr->can_addr.tp.rx_id,
+- SINGLE_MASK(addr->can_addr.tp.rx_id),
++ can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
+ isotp_rcv, sk, "isotp", sk);
+
+ dev_put(dev);
+
+- if (so->bound && do_rx_reg) {
+- /* unregister old filter */
+- if (so->ifindex) {
+- dev = dev_get_by_index(net, so->ifindex);
+- if (dev) {
+- can_rx_unregister(net, dev, so->rxid,
+- SINGLE_MASK(so->rxid),
+- isotp_rcv, sk);
+- dev_put(dev);
+- }
+- }
+- }
+-
+ /* switch to new settings */
+ so->ifindex = ifindex;
+- so->rxid = addr->can_addr.tp.rx_id;
+- so->txid = addr->can_addr.tp.tx_id;
++ so->rxid = rx_id;
++ so->txid = tx_id;
+ so->bound = 1;
+
+ out:
+@@ -1224,6 +1245,14 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
+ /* no separate rx_ext_address is given => use ext_address */
+ if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
+ so->opt.rx_ext_address = so->opt.ext_address;
++
++ /* check for frame_txtime changes (0 => no changes) */
++ if (so->opt.frame_txtime) {
++ if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
++ so->frame_txtime = 0;
++ else
++ so->frame_txtime = so->opt.frame_txtime;
++ }
+ break;
+
+ case CAN_ISOTP_RECV_FC:
+@@ -1425,6 +1454,7 @@ static int isotp_init(struct sock *sk)
+ so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
+ so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
+ so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
++ so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+ so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
+ so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
+ so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
+@@ -1444,6 +1474,7 @@ static int isotp_init(struct sock *sk)
+ so->txtimer.function = isotp_tx_timer_handler;
+
+ init_waitqueue_head(&so->wait);
++ spin_lock_init(&so->rx_lock);
+
+ spin_lock(&isotp_notifier_lock);
+ list_add_tail(&so->notifier, &isotp_notifier_list);
+@@ -1452,6 +1483,21 @@ static int isotp_init(struct sock *sk)
+ return 0;
+ }
+
++static __poll_t isotp_poll(struct file *file, struct socket *sock, poll_table *wait)
++{
++ struct sock *sk = sock->sk;
++ struct isotp_sock *so = isotp_sk(sk);
++
++ __poll_t mask = datagram_poll(file, sock, wait);
++ poll_wait(file, &so->wait, wait);
++
++ /* Check for false positives due to TX state */
++ if ((mask & EPOLLWRNORM) && (so->tx.state != ISOTP_IDLE))
++ mask &= ~(EPOLLOUT | EPOLLWRNORM);
++
++ return mask;
++}
++
+ static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+ {
+@@ -1467,7 +1513,7 @@ static const struct proto_ops isotp_ops = {
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = isotp_getname,
+- .poll = datagram_poll,
++ .poll = isotp_poll,
+ .ioctl = isotp_sock_no_ioctlcmd,
+ .gettstamp = sock_gettstamp,
+ .listen = sock_no_listen,
+diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
+index f33c473279278..ca4ad6cdd5cbf 100644
+--- a/net/can/j1939/address-claim.c
++++ b/net/can/j1939/address-claim.c
+@@ -165,6 +165,46 @@ static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
+ * leaving this function.
+ */
+ ecu = j1939_ecu_get_by_name_locked(priv, name);
++
++ if (ecu && ecu->addr == skcb->addr.sa) {
++ /* The ISO 11783-5 standard, in "4.5.2 - Address claim
++ * requirements", states:
++ * d) No CF shall begin, or resume, transmission on the
++ * network until 250 ms after it has successfully claimed
++ * an address except when responding to a request for
++ * address-claimed.
++ *
++ * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim
++ * prioritization" show that the CF begins the transmission
++ * after 250 ms from the first AC (address-claimed) message
++ * even if it sends another AC message during that time window
++ * to resolve the address contention with another CF.
++ *
++ * As stated in "4.4.2.3 - Address-claimed message":
++ * In order to successfully claim an address, the CF sending
++ * an address claimed message shall not receive a contending
++ * claim from another CF for at least 250 ms.
++ *
++ * As stated in "4.4.3.2 - NAME management (NM) message":
++ * 1) A commanding CF can
++ * d) request that a CF with a specified NAME transmit
++ * the address-claimed message with its current NAME.
++ * 2) A target CF shall
++ * d) send an address-claimed message in response to a
++ * request for a matching NAME
++ *
++ * Taking the above arguments into account, the 250 ms wait is
++ * requested only during network initialization.
++ *
++ * Do not restart the timer on AC message if both the NAME and
++ * the address match and so if the address has already been
++ * claimed (timer has expired) or the AC message has been sent
++ * to resolve the contention with another CF (timer is still
++ * running).
++ */
++ goto out_ecu_put;
++ }
++
+ if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
+ ecu = j1939_ecu_create_locked(priv, name);
+
+diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
+index 9bc55ecb37f9f..e82b915092581 100644
+--- a/net/can/j1939/main.c
++++ b/net/can/j1939/main.c
+@@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
+ skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX;
+ /* set default message type */
+ skcb->addr.type = J1939_TP;
++
++ if (!j1939_address_is_valid(skcb->addr.sa)) {
++ netdev_err_once(priv->ndev, "%s: sa is broadcast address, ignoring!\n",
++ __func__);
++ goto done;
++ }
++
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn)) {
+ /* Type 1: with destination address */
+ skcb->addr.da = skcb->addr.pgn;
+@@ -115,7 +122,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
+ #define J1939_CAN_ID CAN_EFF_FLAG
+ #define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG)
+
+-static DEFINE_SPINLOCK(j1939_netdev_lock);
++static DEFINE_MUTEX(j1939_netdev_lock);
+
+ static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
+ {
+@@ -209,7 +216,7 @@ static void __j1939_rx_release(struct kref *kref)
+ j1939_can_rx_unregister(priv);
+ j1939_ecu_unmap_all(priv);
+ j1939_priv_set(priv->ndev, NULL);
+- spin_unlock(&j1939_netdev_lock);
++ mutex_unlock(&j1939_netdev_lock);
+ }
+
+ /* get pointer to priv without increasing ref counter */
+@@ -237,9 +244,9 @@ static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev)
+ {
+ struct j1939_priv *priv;
+
+- spin_lock(&j1939_netdev_lock);
++ mutex_lock(&j1939_netdev_lock);
+ priv = j1939_priv_get_by_ndev_locked(ndev);
+- spin_unlock(&j1939_netdev_lock);
++ mutex_unlock(&j1939_netdev_lock);
+
+ return priv;
+ }
+@@ -249,14 +256,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
+ struct j1939_priv *priv, *priv_new;
+ int ret;
+
+- spin_lock(&j1939_netdev_lock);
++ mutex_lock(&j1939_netdev_lock);
+ priv = j1939_priv_get_by_ndev_locked(ndev);
+ if (priv) {
+ kref_get(&priv->rx_kref);
+- spin_unlock(&j1939_netdev_lock);
++ mutex_unlock(&j1939_netdev_lock);
+ return priv;
+ }
+- spin_unlock(&j1939_netdev_lock);
++ mutex_unlock(&j1939_netdev_lock);
+
+ priv = j1939_priv_create(ndev);
+ if (!priv)
+@@ -266,29 +273,31 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
+ spin_lock_init(&priv->j1939_socks_lock);
+ INIT_LIST_HEAD(&priv->j1939_socks);
+
+- spin_lock(&j1939_netdev_lock);
++ mutex_lock(&j1939_netdev_lock);
+ priv_new = j1939_priv_get_by_ndev_locked(ndev);
+ if (priv_new) {
+ /* Someone was faster than us, use their priv and roll
+ * back our's.
+ */
+ kref_get(&priv_new->rx_kref);
+- spin_unlock(&j1939_netdev_lock);
++ mutex_unlock(&j1939_netdev_lock);
+ dev_put(ndev);
+ kfree(priv);
+ return priv_new;
+ }
+ j1939_priv_set(ndev, priv);
+- spin_unlock(&j1939_netdev_lock);
+
+ ret = j1939_can_rx_register(priv);
+ if (ret < 0)
+ goto out_priv_put;
+
++ mutex_unlock(&j1939_netdev_lock);
+ return priv;
+
+ out_priv_put:
+ j1939_priv_set(ndev, NULL);
++ mutex_unlock(&j1939_netdev_lock);
++
+ dev_put(ndev);
+ kfree(priv);
+
+@@ -297,7 +306,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
+
+ void j1939_netdev_stop(struct j1939_priv *priv)
+ {
+- kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
++ kref_put_mutex(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
+ j1939_priv_put(priv);
+ }
+
+@@ -325,6 +334,9 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
+ /* re-claim the CAN_HDR from the SKB */
+ cf = skb_push(skb, J1939_CAN_HDR);
+
++ /* initialize header structure */
++ memset(cf, 0, J1939_CAN_HDR);
++
+ /* make it a full can frame again */
+ skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+
+diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
+index 6dff4510687a4..dfce84f2349f8 100644
+--- a/net/can/j1939/socket.c
++++ b/net/can/j1939/socket.c
+@@ -178,7 +178,10 @@ activate_next:
+ if (!first)
+ return;
+
+- if (WARN_ON_ONCE(j1939_session_activate(first))) {
++ if (j1939_session_activate(first)) {
++ netdev_warn_once(first->priv->ndev,
++ "%s: 0x%p: Identical session is already activated.\n",
++ __func__, first);
+ first->err = -EBUSY;
+ goto activate_next;
+ } else {
+@@ -795,7 +798,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
+ struct j1939_sk_buff_cb *skcb;
+ int ret = 0;
+
+- if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE))
++ if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE | MSG_CMSG_COMPAT))
+ return -EINVAL;
+
+ if (flags & MSG_ERRQUEUE)
+@@ -1085,6 +1088,11 @@ void j1939_sk_errqueue(struct j1939_session *session,
+
+ void j1939_sk_send_loop_abort(struct sock *sk, int err)
+ {
++ struct j1939_sock *jsk = j1939_sk(sk);
++
++ if (jsk->state & J1939_SOCK_ERRQUEUE)
++ return;
++
+ sk->sk_err = err;
+
+ sk_error_report(sk);
+diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
+index 6c0a0ebdd024c..bd8ec24338324 100644
+--- a/net/can/j1939/transport.c
++++ b/net/can/j1939/transport.c
+@@ -260,6 +260,8 @@ static void __j1939_session_drop(struct j1939_session *session)
+
+ static void j1939_session_destroy(struct j1939_session *session)
+ {
++ struct sk_buff *skb;
++
+ if (session->transmission) {
+ if (session->err)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
+@@ -274,7 +276,11 @@ static void j1939_session_destroy(struct j1939_session *session)
+ WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry));
+ WARN_ON_ONCE(!list_empty(&session->active_session_list_entry));
+
+- skb_queue_purge(&session->skb_queue);
++ while ((skb = skb_dequeue(&session->skb_queue)) != NULL) {
++ /* drop ref taken in j1939_session_skb_queue() */
++ skb_unref(skb);
++ kfree_skb(skb);
++ }
+ __j1939_session_drop(session);
+ j1939_priv_put(session->priv);
+ kfree(session);
+@@ -336,10 +342,12 @@ static void j1939_session_skb_drop_old(struct j1939_session *session)
+ __skb_unlink(do_skb, &session->skb_queue);
+ /* drop ref taken in j1939_session_skb_queue() */
+ skb_unref(do_skb);
++ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+
+ kfree_skb(do_skb);
++ } else {
++ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+ }
+- spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+ }
+
+ void j1939_session_skb_queue(struct j1939_session *session,
+@@ -596,7 +604,10 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
+ /* reserve CAN header */
+ skb_reserve(skb, offsetof(struct can_frame, data));
+
+- memcpy(skb->cb, re_skcb, sizeof(skb->cb));
++ /* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */
++ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb));
++
++ memcpy(skb->cb, re_skcb, sizeof(*re_skcb));
+ skcb = j1939_skb_to_cb(skb);
+ if (swap_src_dst)
+ j1939_skbcb_swap(skcb);
+@@ -1084,10 +1095,6 @@ static bool j1939_session_deactivate(struct j1939_session *session)
+ bool active;
+
+ j1939_session_list_lock(priv);
+- /* This function should be called with a session ref-count of at
+- * least 2.
+- */
+- WARN_ON_ONCE(kref_read(&session->kref) < 2);
+ active = j1939_session_deactivate_locked(session);
+ j1939_session_list_unlock(priv);
+
+@@ -1120,8 +1127,6 @@ static void __j1939_session_cancel(struct j1939_session *session,
+
+ if (session->sk)
+ j1939_sk_send_loop_abort(session->sk, session->err);
+- else
+- j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
+
+ static void j1939_session_cancel(struct j1939_session *session,
+@@ -1136,6 +1141,9 @@ static void j1939_session_cancel(struct j1939_session *session,
+ }
+
+ j1939_session_list_unlock(session->priv);
++
++ if (!session->sk)
++ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
+
+ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
+@@ -1249,6 +1257,9 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
+ __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+ }
+ j1939_session_list_unlock(session->priv);
++
++ if (!session->sk)
++ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
+
+ j1939_session_put(session);
+@@ -2006,7 +2017,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+ /* set the end-packet for broadcast */
+ session->pkt.last = session->pkt.total;
+
+- skcb->tskey = session->sk->sk_tskey++;
++ skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1;
+ session->tskey = skcb->tskey;
+
+ return session;
+@@ -2023,6 +2034,11 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
+ extd = J1939_ETP;
+ fallthrough;
+ case J1939_TP_CMD_BAM:
++ if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) {
++ netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n",
++ __func__, skcb->addr.sa);
++ return;
++ }
+ fallthrough;
+ case J1939_TP_CMD_RTS:
+ if (skcb->addr.type != extd)
+@@ -2085,6 +2101,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
+ break;
+
+ case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
++ if (j1939_cb_is_broadcast(skcb)) {
++ netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n",
++ __func__, skcb->addr.sa);
++ return;
++ }
++
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_abort(priv, skb, true);
+
+diff --git a/net/can/raw.c b/net/can/raw.c
+index 7105fa4824e4b..8877d22da67ee 100644
+--- a/net/can/raw.c
++++ b/net/can/raw.c
+@@ -83,6 +83,7 @@ struct raw_sock {
+ struct sock sk;
+ int bound;
+ int ifindex;
++ struct net_device *dev;
+ struct list_head notifier;
+ int loopback;
+ int recv_own_msgs;
+@@ -275,21 +276,24 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return;
+
+- if (ro->ifindex != dev->ifindex)
++ if (ro->dev != dev)
+ return;
+
+ switch (msg) {
+ case NETDEV_UNREGISTER:
+ lock_sock(sk);
+ /* remove current filters & unregister */
+- if (ro->bound)
++ if (ro->bound) {
+ raw_disable_allfilters(dev_net(dev), dev, sk);
++ dev_put(dev);
++ }
+
+ if (ro->count > 1)
+ kfree(ro->filter);
+
+ ro->ifindex = 0;
+ ro->bound = 0;
++ ro->dev = NULL;
+ ro->count = 0;
+ release_sock(sk);
+
+@@ -335,6 +339,7 @@ static int raw_init(struct sock *sk)
+
+ ro->bound = 0;
+ ro->ifindex = 0;
++ ro->dev = NULL;
+
+ /* set default filter to single entry dfilter */
+ ro->dfilter.can_id = 0;
+@@ -380,18 +385,14 @@ static int raw_release(struct socket *sock)
+ list_del(&ro->notifier);
+ spin_unlock(&raw_notifier_lock);
+
++ rtnl_lock();
+ lock_sock(sk);
+
+ /* remove current filters & unregister */
+ if (ro->bound) {
+- if (ro->ifindex) {
+- struct net_device *dev;
+-
+- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+- if (dev) {
+- raw_disable_allfilters(dev_net(dev), dev, sk);
+- dev_put(dev);
+- }
++ if (ro->dev) {
++ raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
++ dev_put(ro->dev);
+ } else {
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
+ }
+@@ -402,6 +403,7 @@ static int raw_release(struct socket *sock)
+
+ ro->ifindex = 0;
+ ro->bound = 0;
++ ro->dev = NULL;
+ ro->count = 0;
+ free_percpu(ro->uniq);
+
+@@ -409,6 +411,8 @@ static int raw_release(struct socket *sock)
+ sock->sk = NULL;
+
+ release_sock(sk);
++ rtnl_unlock();
++
+ sock_put(sk);
+
+ return 0;
+@@ -419,6 +423,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct sock *sk = sock->sk;
+ struct raw_sock *ro = raw_sk(sk);
++ struct net_device *dev = NULL;
+ int ifindex;
+ int err = 0;
+ int notify_enetdown = 0;
+@@ -428,24 +433,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
+
++ rtnl_lock();
+ lock_sock(sk);
+
+ if (ro->bound && addr->can_ifindex == ro->ifindex)
+ goto out;
+
+ if (addr->can_ifindex) {
+- struct net_device *dev;
+-
+ dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out;
+ }
+ if (dev->type != ARPHRD_CAN) {
+- dev_put(dev);
+ err = -ENODEV;
+- goto out;
++ goto out_put_dev;
+ }
++
+ if (!(dev->flags & IFF_UP))
+ notify_enetdown = 1;
+
+@@ -453,7 +457,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+
+ /* filters set by default/setsockopt */
+ err = raw_enable_allfilters(sock_net(sk), dev, sk);
+- dev_put(dev);
++ if (err)
++ goto out_put_dev;
++
+ } else {
+ ifindex = 0;
+
+@@ -464,26 +470,30 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+ if (!err) {
+ if (ro->bound) {
+ /* unregister old filters */
+- if (ro->ifindex) {
+- struct net_device *dev;
+-
+- dev = dev_get_by_index(sock_net(sk),
+- ro->ifindex);
+- if (dev) {
+- raw_disable_allfilters(dev_net(dev),
+- dev, sk);
+- dev_put(dev);
+- }
++ if (ro->dev) {
++ raw_disable_allfilters(dev_net(ro->dev),
++ ro->dev, sk);
++ /* drop reference to old ro->dev */
++ dev_put(ro->dev);
+ } else {
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
+ }
+ }
+ ro->ifindex = ifindex;
+ ro->bound = 1;
++ /* bind() ok -> hold a reference for new ro->dev */
++ ro->dev = dev;
++ if (ro->dev)
++ dev_hold(ro->dev);
+ }
+
+- out:
++out_put_dev:
++ /* remove potential reference from dev_get_by_index() */
++ if (dev)
++ dev_put(dev);
++out:
+ release_sock(sk);
++ rtnl_unlock();
+
+ if (notify_enetdown) {
+ sk->sk_err = ENETDOWN;
+@@ -549,9 +559,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
+ rtnl_lock();
+ lock_sock(sk);
+
+- if (ro->bound && ro->ifindex) {
+- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+- if (!dev) {
++ dev = ro->dev;
++ if (ro->bound && dev) {
++ if (dev->reg_state != NETREG_REGISTERED) {
+ if (count > 1)
+ kfree(filter);
+ err = -ENODEV;
+@@ -592,7 +602,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
+ ro->count = count;
+
+ out_fil:
+- dev_put(dev);
+ release_sock(sk);
+ rtnl_unlock();
+
+@@ -610,9 +619,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
+ rtnl_lock();
+ lock_sock(sk);
+
+- if (ro->bound && ro->ifindex) {
+- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+- if (!dev) {
++ dev = ro->dev;
++ if (ro->bound && dev) {
++ if (dev->reg_state != NETREG_REGISTERED) {
+ err = -ENODEV;
+ goto out_err;
+ }
+@@ -636,7 +645,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
+ ro->err_mask = err_mask;
+
+ out_err:
+- dev_put(dev);
+ release_sock(sk);
+ rtnl_unlock();
+
+diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
+index 57d043b382ed0..9bf085ddbe51f 100644
+--- a/net/ceph/messenger.c
++++ b/net/ceph/messenger.c
+@@ -1144,6 +1144,7 @@ bool ceph_addr_is_blank(const struct ceph_entity_addr *addr)
+ return true;
+ }
+ }
++EXPORT_SYMBOL(ceph_addr_is_blank);
+
+ int ceph_addr_port(const struct ceph_entity_addr *addr)
+ {
+diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
+index cc40ce4e02fbc..c3d105e59d251 100644
+--- a/net/ceph/messenger_v2.c
++++ b/net/ceph/messenger_v2.c
+@@ -391,6 +391,8 @@ static int head_onwire_len(int ctrl_len, bool secure)
+ int head_len;
+ int rem_len;
+
++ BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
++
+ if (secure) {
+ head_len = CEPH_PREAMBLE_SECURE_LEN;
+ if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
+@@ -409,6 +411,10 @@ static int head_onwire_len(int ctrl_len, bool secure)
+ static int __tail_onwire_len(int front_len, int middle_len, int data_len,
+ bool secure)
+ {
++ BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN ||
++ middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN ||
++ data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN);
++
+ if (!front_len && !middle_len && !data_len)
+ return 0;
+
+@@ -521,29 +527,34 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc)
+ desc->fd_aligns[i] = ceph_decode_16(&p);
+ }
+
+- /*
+- * This would fire for FRAME_TAG_WAIT (it has one empty
+- * segment), but we should never get it as client.
+- */
+- if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
+- pr_err("last segment empty\n");
++ if (desc->fd_lens[0] < 0 ||
++ desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
++ pr_err("bad control segment length %d\n", desc->fd_lens[0]);
+ return -EINVAL;
+ }
+-
+- if (desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
+- pr_err("control segment too big %d\n", desc->fd_lens[0]);
++ if (desc->fd_lens[1] < 0 ||
++ desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
++ pr_err("bad front segment length %d\n", desc->fd_lens[1]);
+ return -EINVAL;
+ }
+- if (desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
+- pr_err("front segment too big %d\n", desc->fd_lens[1]);
++ if (desc->fd_lens[2] < 0 ||
++ desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
++ pr_err("bad middle segment length %d\n", desc->fd_lens[2]);
+ return -EINVAL;
+ }
+- if (desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
+- pr_err("middle segment too big %d\n", desc->fd_lens[2]);
++ if (desc->fd_lens[3] < 0 ||
++ desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
++ pr_err("bad data segment length %d\n", desc->fd_lens[3]);
+ return -EINVAL;
+ }
+- if (desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
+- pr_err("data segment too big %d\n", desc->fd_lens[3]);
++
++ /*
++ * This would fire for FRAME_TAG_WAIT (it has one empty
++ * segment), but we should never get it as client.
++ */
++ if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
++ pr_err("last segment empty, segment count %d\n",
++ desc->fd_seg_cnt);
+ return -EINVAL;
+ }
+
+diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
+index ff8624a7c9643..0c5e0d2c609e3 100644
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
+ target_init(&req->r_t);
+ }
+
+-/*
+- * This is ugly, but it allows us to reuse linger registration and ping
+- * requests, keeping the structure of the code around send_linger{_ping}()
+- * reasonable. Setting up a min_nr=2 mempool for each linger request
+- * and dealing with copying ops (this blasts req only, watch op remains
+- * intact) isn't any better.
+- */
+-static void request_reinit(struct ceph_osd_request *req)
+-{
+- struct ceph_osd_client *osdc = req->r_osdc;
+- bool mempool = req->r_mempool;
+- unsigned int num_ops = req->r_num_ops;
+- u64 snapid = req->r_snapid;
+- struct ceph_snap_context *snapc = req->r_snapc;
+- bool linger = req->r_linger;
+- struct ceph_msg *request_msg = req->r_request;
+- struct ceph_msg *reply_msg = req->r_reply;
+-
+- dout("%s req %p\n", __func__, req);
+- WARN_ON(kref_read(&req->r_kref) != 1);
+- request_release_checks(req);
+-
+- WARN_ON(kref_read(&request_msg->kref) != 1);
+- WARN_ON(kref_read(&reply_msg->kref) != 1);
+- target_destroy(&req->r_t);
+-
+- request_init(req);
+- req->r_osdc = osdc;
+- req->r_mempool = mempool;
+- req->r_num_ops = num_ops;
+- req->r_snapid = snapid;
+- req->r_snapc = snapc;
+- req->r_linger = linger;
+- req->r_request = request_msg;
+- req->r_reply = reply_msg;
+-}
+-
+ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+ struct ceph_snap_context *snapc,
+ unsigned int num_ops,
+@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
+ * @watch_opcode: CEPH_OSD_WATCH_OP_*
+ */
+ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
+- u64 cookie, u8 watch_opcode)
++ u8 watch_opcode, u64 cookie, u32 gen)
+ {
+ struct ceph_osd_req_op *op;
+
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
+ op->watch.cookie = cookie;
+ op->watch.op = watch_opcode;
+- op->watch.gen = 0;
++ op->watch.gen = gen;
++}
++
++/*
++ * prot_ver, timeout and notify payload (may be empty) should already be
++ * encoded in @request_pl
++ */
++static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
++ u64 cookie, struct ceph_pagelist *request_pl)
++{
++ struct ceph_osd_req_op *op;
++
++ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
++ op->notify.cookie = cookie;
++
++ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
++ op->indata_len = request_pl->length;
+ }
+
+ /*
+@@ -2727,10 +2706,13 @@ static void linger_release(struct kref *kref)
+ WARN_ON(!list_empty(&lreq->pending_lworks));
+ WARN_ON(lreq->osd);
+
+- if (lreq->reg_req)
+- ceph_osdc_put_request(lreq->reg_req);
+- if (lreq->ping_req)
+- ceph_osdc_put_request(lreq->ping_req);
++ if (lreq->request_pl)
++ ceph_pagelist_release(lreq->request_pl);
++ if (lreq->notify_id_pages)
++ ceph_release_page_vector(lreq->notify_id_pages, 1);
++
++ ceph_osdc_put_request(lreq->reg_req);
++ ceph_osdc_put_request(lreq->ping_req);
+ target_destroy(&lreq->t);
+ kfree(lreq);
+ }
+@@ -2999,6 +2981,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
+ struct ceph_osd_linger_request *lreq = req->r_priv;
+
+ mutex_lock(&lreq->lock);
++ if (req != lreq->reg_req) {
++ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
++ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
++ goto out;
++ }
++
+ dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
+ lreq->linger_id, req->r_result);
+ linger_reg_commit_complete(lreq, req->r_result);
+@@ -3022,6 +3010,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
+ }
+ }
+
++out:
+ mutex_unlock(&lreq->lock);
+ linger_put(lreq);
+ }
+@@ -3044,6 +3033,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
+ struct ceph_osd_linger_request *lreq = req->r_priv;
+
+ mutex_lock(&lreq->lock);
++ if (req != lreq->reg_req) {
++ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
++ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
++ goto out;
++ }
++
+ dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
+ lreq, lreq->linger_id, req->r_result, lreq->last_error);
+ if (req->r_result < 0) {
+@@ -3053,46 +3048,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
+ }
+ }
+
++out:
+ mutex_unlock(&lreq->lock);
+ linger_put(lreq);
+ }
+
+ static void send_linger(struct ceph_osd_linger_request *lreq)
+ {
+- struct ceph_osd_request *req = lreq->reg_req;
+- struct ceph_osd_req_op *op = &req->r_ops[0];
++ struct ceph_osd_client *osdc = lreq->osdc;
++ struct ceph_osd_request *req;
++ int ret;
+
+- verify_osdc_wrlocked(req->r_osdc);
++ verify_osdc_wrlocked(osdc);
++ mutex_lock(&lreq->lock);
+ dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
+
+- if (req->r_osd)
+- cancel_linger_request(req);
++ if (lreq->reg_req) {
++ if (lreq->reg_req->r_osd)
++ cancel_linger_request(lreq->reg_req);
++ ceph_osdc_put_request(lreq->reg_req);
++ }
++
++ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
++ BUG_ON(!req);
+
+- request_reinit(req);
+ target_copy(&req->r_t, &lreq->t);
+ req->r_mtime = lreq->mtime;
+
+- mutex_lock(&lreq->lock);
+ if (lreq->is_watch && lreq->committed) {
+- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
+- op->watch.cookie != lreq->linger_id);
+- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
+- op->watch.gen = ++lreq->register_gen;
++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
++ lreq->linger_id, ++lreq->register_gen);
+ dout("lreq %p reconnect register_gen %u\n", lreq,
+- op->watch.gen);
++ req->r_ops[0].watch.gen);
+ req->r_callback = linger_reconnect_cb;
+ } else {
+- if (!lreq->is_watch)
++ if (lreq->is_watch) {
++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
++ lreq->linger_id, 0);
++ } else {
+ lreq->notify_id = 0;
+- else
+- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
++
++ refcount_inc(&lreq->request_pl->refcnt);
++ osd_req_op_notify_init(req, 0, lreq->linger_id,
++ lreq->request_pl);
++ ceph_osd_data_pages_init(
++ osd_req_op_data(req, 0, notify, response_data),
++ lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
++ }
+ dout("lreq %p register\n", lreq);
+ req->r_callback = linger_commit_cb;
+ }
+- mutex_unlock(&lreq->lock);
++
++ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
++ BUG_ON(ret);
+
+ req->r_priv = linger_get(lreq);
+ req->r_linger = true;
++ lreq->reg_req = req;
++ mutex_unlock(&lreq->lock);
+
+ submit_request(req, true);
+ }
+@@ -3102,6 +3115,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
+ struct ceph_osd_linger_request *lreq = req->r_priv;
+
+ mutex_lock(&lreq->lock);
++ if (req != lreq->ping_req) {
++ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
++ __func__, lreq, lreq->linger_id, req, lreq->ping_req);
++ goto out;
++ }
++
+ dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
+ __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
+ lreq->last_error);
+@@ -3117,6 +3136,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
+ lreq->register_gen, req->r_ops[0].watch.gen);
+ }
+
++out:
+ mutex_unlock(&lreq->lock);
+ linger_put(lreq);
+ }
+@@ -3124,8 +3144,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
+ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
+ {
+ struct ceph_osd_client *osdc = lreq->osdc;
+- struct ceph_osd_request *req = lreq->ping_req;
+- struct ceph_osd_req_op *op = &req->r_ops[0];
++ struct ceph_osd_request *req;
++ int ret;
+
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
+ dout("%s PAUSERD\n", __func__);
+@@ -3137,19 +3157,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
+ __func__, lreq, lreq->linger_id, lreq->ping_sent,
+ lreq->register_gen);
+
+- if (req->r_osd)
+- cancel_linger_request(req);
++ if (lreq->ping_req) {
++ if (lreq->ping_req->r_osd)
++ cancel_linger_request(lreq->ping_req);
++ ceph_osdc_put_request(lreq->ping_req);
++ }
+
+- request_reinit(req);
+- target_copy(&req->r_t, &lreq->t);
++ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
++ BUG_ON(!req);
+
+- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
+- op->watch.cookie != lreq->linger_id ||
+- op->watch.op != CEPH_OSD_WATCH_OP_PING);
+- op->watch.gen = lreq->register_gen;
++ target_copy(&req->r_t, &lreq->t);
++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
++ lreq->register_gen);
+ req->r_callback = linger_ping_cb;
++
++ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
++ BUG_ON(ret);
++
+ req->r_priv = linger_get(lreq);
+ req->r_linger = true;
++ lreq->ping_req = req;
+
+ ceph_osdc_get_request(req);
+ account_request(req);
+@@ -3165,12 +3192,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
+
+ down_write(&osdc->lock);
+ linger_register(lreq);
+- if (lreq->is_watch) {
+- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
+- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
+- } else {
+- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
+- }
+
+ calc_target(osdc, &lreq->t, false);
+ osd = lookup_create_osd(osdc, lreq->t.osd, true);
+@@ -3202,9 +3223,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
+ */
+ static void __linger_cancel(struct ceph_osd_linger_request *lreq)
+ {
+- if (lreq->is_watch && lreq->ping_req->r_osd)
++ if (lreq->ping_req && lreq->ping_req->r_osd)
+ cancel_linger_request(lreq->ping_req);
+- if (lreq->reg_req->r_osd)
++ if (lreq->reg_req && lreq->reg_req->r_osd)
+ cancel_linger_request(lreq->reg_req);
+ cancel_linger_map_check(lreq);
+ unlink_linger(lreq->osd, lreq);
+@@ -3309,17 +3330,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
+ int ret;
+
+ dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
+- ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
++ ret = wait_for_completion_killable(&lreq->reg_commit_wait);
+ return ret ?: lreq->reg_commit_error;
+ }
+
+-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
++static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
++ unsigned long timeout)
+ {
+- int ret;
++ long left;
+
+ dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
+- ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
+- return ret ?: lreq->notify_finish_error;
++ left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
++ ceph_timeout_jiffies(timeout));
++ if (left <= 0)
++ left = left ?: -ETIMEDOUT;
++ else
++ left = lreq->notify_finish_error; /* completed */
++
++ return left;
+ }
+
+ /*
+@@ -4653,43 +4681,6 @@ again:
+ }
+ EXPORT_SYMBOL(ceph_osdc_sync);
+
+-static struct ceph_osd_request *
+-alloc_linger_request(struct ceph_osd_linger_request *lreq)
+-{
+- struct ceph_osd_request *req;
+-
+- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
+- if (!req)
+- return NULL;
+-
+- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
+- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
+- return req;
+-}
+-
+-static struct ceph_osd_request *
+-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
+-{
+- struct ceph_osd_request *req;
+-
+- req = alloc_linger_request(lreq);
+- if (!req)
+- return NULL;
+-
+- /*
+- * Pass 0 for cookie because we don't know it yet, it will be
+- * filled in by linger_submit().
+- */
+- osd_req_op_watch_init(req, 0, 0, watch_opcode);
+-
+- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
+- ceph_osdc_put_request(req);
+- return NULL;
+- }
+-
+- return req;
+-}
+-
+ /*
+ * Returns a handle, caller owns a ref.
+ */
+@@ -4719,18 +4710,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
+ lreq->t.flags = CEPH_OSD_FLAG_WRITE;
+ ktime_get_real_ts64(&lreq->mtime);
+
+- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
+- if (!lreq->reg_req) {
+- ret = -ENOMEM;
+- goto err_put_lreq;
+- }
+-
+- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
+- if (!lreq->ping_req) {
+- ret = -ENOMEM;
+- goto err_put_lreq;
+- }
+-
+ linger_submit(lreq);
+ ret = linger_reg_commit_wait(lreq);
+ if (ret) {
+@@ -4768,8 +4747,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
+ ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
+ req->r_flags = CEPH_OSD_FLAG_WRITE;
+ ktime_get_real_ts64(&req->r_mtime);
+- osd_req_op_watch_init(req, 0, lreq->linger_id,
+- CEPH_OSD_WATCH_OP_UNWATCH);
++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
++ lreq->linger_id, 0);
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+@@ -4855,35 +4834,6 @@ out_put_req:
+ }
+ EXPORT_SYMBOL(ceph_osdc_notify_ack);
+
+-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+- u64 cookie, u32 prot_ver, u32 timeout,
+- void *payload, u32 payload_len)
+-{
+- struct ceph_osd_req_op *op;
+- struct ceph_pagelist *pl;
+- int ret;
+-
+- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+- op->notify.cookie = cookie;
+-
+- pl = ceph_pagelist_alloc(GFP_NOIO);
+- if (!pl)
+- return -ENOMEM;
+-
+- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
+- ret |= ceph_pagelist_encode_32(pl, timeout);
+- ret |= ceph_pagelist_encode_32(pl, payload_len);
+- ret |= ceph_pagelist_append(pl, payload, payload_len);
+- if (ret) {
+- ceph_pagelist_release(pl);
+- return -ENOMEM;
+- }
+-
+- ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
+- op->indata_len = pl->length;
+- return 0;
+-}
+-
+ /*
+ * @timeout: in seconds
+ *
+@@ -4902,7 +4852,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
+ size_t *preply_len)
+ {
+ struct ceph_osd_linger_request *lreq;
+- struct page **pages;
+ int ret;
+
+ WARN_ON(!timeout);
+@@ -4915,46 +4864,41 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
+ if (!lreq)
+ return -ENOMEM;
+
+- lreq->preply_pages = preply_pages;
+- lreq->preply_len = preply_len;
+-
+- ceph_oid_copy(&lreq->t.base_oid, oid);
+- ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+- lreq->t.flags = CEPH_OSD_FLAG_READ;
+-
+- lreq->reg_req = alloc_linger_request(lreq);
+- if (!lreq->reg_req) {
++ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
++ if (!lreq->request_pl) {
+ ret = -ENOMEM;
+ goto out_put_lreq;
+ }
+
+- /*
+- * Pass 0 for cookie because we don't know it yet, it will be
+- * filled in by linger_submit().
+- */
+- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
+- payload, payload_len);
+- if (ret)
++ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
++ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
++ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
++ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
++ if (ret) {
++ ret = -ENOMEM;
+ goto out_put_lreq;
++ }
+
+ /* for notify_id */
+- pages = ceph_alloc_page_vector(1, GFP_NOIO);
+- if (IS_ERR(pages)) {
+- ret = PTR_ERR(pages);
++ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
++ if (IS_ERR(lreq->notify_id_pages)) {
++ ret = PTR_ERR(lreq->notify_id_pages);
++ lreq->notify_id_pages = NULL;
+ goto out_put_lreq;
+ }
+- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
+- response_data),
+- pages, PAGE_SIZE, 0, false, true);
+
+- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
+- if (ret)
+- goto out_put_lreq;
++ lreq->preply_pages = preply_pages;
++ lreq->preply_len = preply_len;
++
++ ceph_oid_copy(&lreq->t.base_oid, oid);
++ ceph_oloc_copy(&lreq->t.base_oloc, oloc);
++ lreq->t.flags = CEPH_OSD_FLAG_READ;
+
+ linger_submit(lreq);
+ ret = linger_reg_commit_wait(lreq);
+ if (!ret)
+- ret = linger_notify_finish_wait(lreq);
++ ret = linger_notify_finish_wait(lreq,
++ msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
+ else
+ dout("lreq %p failed to initiate notify %d\n", lreq, ret);
+
+diff --git a/net/core/Makefile b/net/core/Makefile
+index 35ced6201814c..4268846f2f475 100644
+--- a/net/core/Makefile
++++ b/net/core/Makefile
+@@ -36,3 +36,4 @@ obj-$(CONFIG_FAILOVER) += failover.o
+ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
+ obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
+ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
++obj-$(CONFIG_OF) += of_net.o
+diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
+index 68d2cbf8331ac..4953abee79fea 100644
+--- a/net/core/bpf_sk_storage.c
++++ b/net/core/bpf_sk_storage.c
+@@ -305,11 +305,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
+ static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
+ void *owner, u32 size)
+ {
++ int optmem_max = READ_ONCE(sysctl_optmem_max);
+ struct sock *sk = (struct sock *)owner;
+
+ /* same check as in sock_kmalloc() */
+- if (size <= sysctl_optmem_max &&
+- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
++ if (size <= optmem_max &&
++ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
+ atomic_add(size, &sk->sk_omem_alloc);
+ return 0;
+ }
+@@ -520,8 +521,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
+ return ERR_PTR(-EPERM);
+
+ nla_for_each_nested(nla, nla_stgs, rem) {
+- if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
++ if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
++ if (nla_len(nla) != sizeof(u32))
++ return ERR_PTR(-EINVAL);
+ nr_maps++;
++ }
+ }
+
+ diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
+@@ -865,10 +869,18 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
+ {
+ struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+
++ bpf_map_inc_with_uref(aux->map);
+ seq_info->map = aux->map;
+ return 0;
+ }
+
++static void bpf_iter_fini_sk_storage_map(void *priv_data)
++{
++ struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
++
++ bpf_map_put_with_uref(seq_info->map);
++}
++
+ static int bpf_iter_attach_map(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
+@@ -886,7 +898,7 @@ static int bpf_iter_attach_map(struct bpf_prog *prog,
+ if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+ goto put_map;
+
+- if (prog->aux->max_rdonly_access > map->value_size) {
++ if (prog->aux->max_rdwr_access > map->value_size) {
+ err = -EACCES;
+ goto put_map;
+ }
+@@ -914,7 +926,7 @@ static const struct seq_operations bpf_sk_storage_map_seq_ops = {
+ static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_sk_storage_map_seq_ops,
+ .init_seq_private = bpf_iter_init_sk_storage_map,
+- .fini_seq_private = NULL,
++ .fini_seq_private = bpf_iter_fini_sk_storage_map,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
+ };
+
+@@ -929,7 +941,7 @@ static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
+- PTR_TO_RDWR_BUF_OR_NULL },
++ PTR_TO_BUF | PTR_MAYBE_NULL },
+ },
+ .seq_info = &iter_seq_info,
+ };
+diff --git a/net/core/datagram.c b/net/core/datagram.c
+index 15ab9ffb27fe9..1ff8241217a9c 100644
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -677,7 +677,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ page_ref_sub(last_head, refs);
+ refs = 0;
+ }
+- skb_fill_page_desc(skb, frag++, head, start, size);
++ skb_fill_page_desc_noacc(skb, frag++, head, start, size);
+ }
+ if (refs)
+ page_ref_sub(last_head, refs);
+@@ -799,18 +799,21 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
+ {
+ struct sock *sk = sock->sk;
+ __poll_t mask;
++ u8 shutdown;
+
+ sock_poll_wait(file, sock, wait);
+ mask = 0;
+
+ /* exceptional events? */
+- if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
++ if (READ_ONCE(sk->sk_err) ||
++ !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+
+- if (sk->sk_shutdown & RCV_SHUTDOWN)
++ shutdown = READ_ONCE(sk->sk_shutdown);
++ if (shutdown & RCV_SHUTDOWN)
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
+- if (sk->sk_shutdown == SHUTDOWN_MASK)
++ if (shutdown == SHUTDOWN_MASK)
+ mask |= EPOLLHUP;
+
+ /* readable? */
+@@ -819,10 +822,12 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
+
+ /* Connection-based need to check for termination and startup */
+ if (connection_based(sk)) {
+- if (sk->sk_state == TCP_CLOSE)
++ int state = READ_ONCE(sk->sk_state);
++
++ if (state == TCP_CLOSE)
+ mask |= EPOLLHUP;
+ /* connection hasn't started yet? */
+- if (sk->sk_state == TCP_SYN_SENT)
++ if (state == TCP_SYN_SENT)
+ return mask;
+ }
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index eb3a366bf212c..4d698ccf41726 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -365,12 +365,12 @@ static void list_netdevice(struct net_device *dev)
+
+ ASSERT_RTNL();
+
+- write_lock_bh(&dev_base_lock);
++ write_lock(&dev_base_lock);
+ list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
+ netdev_name_node_add(net, dev->name_node);
+ hlist_add_head_rcu(&dev->index_hlist,
+ dev_index_hash(net, dev->ifindex));
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+
+ dev_base_seq_inc(net);
+ }
+@@ -378,16 +378,18 @@ static void list_netdevice(struct net_device *dev)
+ /* Device list removal
+ * caller must respect a RCU grace period before freeing/reusing dev
+ */
+-static void unlist_netdevice(struct net_device *dev)
++static void unlist_netdevice(struct net_device *dev, bool lock)
+ {
+ ASSERT_RTNL();
+
+ /* Unlink dev from the device chain */
+- write_lock_bh(&dev_base_lock);
++ if (lock)
++ write_lock(&dev_base_lock);
+ list_del_rcu(&dev->dev_list);
+ netdev_name_node_del(dev->name_node);
+ hlist_del_rcu(&dev->index_hlist);
+- write_unlock_bh(&dev_base_lock);
++ if (lock)
++ write_unlock(&dev_base_lock);
+
+ dev_base_seq_inc(dev_net(dev));
+ }
+@@ -741,11 +743,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+ const struct net_device *last_dev;
+ struct net_device_path_ctx ctx = {
+ .dev = dev,
+- .daddr = daddr,
+ };
+ struct net_device_path *path;
+ int ret = 0;
+
++ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
+ stack->num_paths = 0;
+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
+ last_dev = ctx.dev;
+@@ -1266,15 +1268,15 @@ rollback:
+
+ netdev_adjacent_rename_links(dev, oldname);
+
+- write_lock_bh(&dev_base_lock);
++ write_lock(&dev_base_lock);
+ netdev_name_node_del(dev->name_node);
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+
+ synchronize_rcu();
+
+- write_lock_bh(&dev_base_lock);
++ write_lock(&dev_base_lock);
+ netdev_name_node_add(net, dev->name_node);
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+
+ ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
+ ret = notifier_to_errno(ret);
+@@ -2572,6 +2574,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+ struct xps_map *map, *new_map;
+ unsigned int nr_ids;
+
++ WARN_ON_ONCE(index >= dev->num_tx_queues);
++
+ if (dev->num_tc) {
+ /* Do not allow XPS on subordinate device directly */
+ num_tc = dev->num_tc;
+@@ -2921,6 +2925,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+ if (dev->num_tc)
+ netif_setup_tc(dev, txq);
+
++ dev_qdisc_change_real_num_tx(dev, txq);
++
+ dev->real_num_tx_queues = txq;
+
+ if (disabling) {
+@@ -3109,8 +3115,10 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
+ {
+ if (in_hardirq() || irqs_disabled())
+ __dev_kfree_skb_irq(skb, reason);
++ else if (unlikely(reason == SKB_REASON_DROPPED))
++ kfree_skb(skb);
+ else
+- dev_kfree_skb(skb);
++ consume_skb(skb);
+ }
+ EXPORT_SYMBOL(__dev_kfree_skb_any);
+
+@@ -3231,11 +3239,15 @@ int skb_checksum_help(struct sk_buff *skb)
+ }
+
+ offset = skb_checksum_start_offset(skb);
+- BUG_ON(offset >= skb_headlen(skb));
++ ret = -EINVAL;
++ if (WARN_ON_ONCE(offset >= skb_headlen(skb)))
++ goto out;
++
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+ offset += skb->csum_offset;
+- BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
++ if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb)))
++ goto out;
+
+ ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
+ if (ret)
+@@ -3304,7 +3316,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
+ type = eth->h_proto;
+ }
+
+- return __vlan_get_protocol(skb, type, depth);
++ return vlan_get_protocol_and_depth(skb, type, depth);
+ }
+
+ /**
+@@ -3932,8 +3944,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+ return skb;
+
+ /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
+- qdisc_skb_cb(skb)->mru = 0;
+- qdisc_skb_cb(skb)->post_ct = false;
++ tc_skb_cb(skb)->mru = 0;
++ tc_skb_cb(skb)->post_ct = false;
+ mini_qdisc_bstats_cpu_update(miniq, skb);
+
+ switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
+@@ -4139,6 +4151,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
+ bool again = false;
+
+ skb_reset_mac_header(skb);
++ skb_assert_len(skb);
+
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
+ __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
+@@ -4193,7 +4206,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
+ if (dev->flags & IFF_UP) {
+ int cpu = smp_processor_id(); /* ok because BHs are off */
+
+- if (txq->xmit_lock_owner != cpu) {
++ /* Other cpus might concurrently change txq->xmit_lock_owner
++ * to -1 or to their cpu id, but not to our id.
++ */
++ if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
+ if (dev_xmit_recursion())
+ goto recursion_alert;
+
+@@ -4442,8 +4458,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ u32 next_cpu;
+ u32 ident;
+
+- /* First check into global flow table if there is a match */
+- ident = sock_flow_table->ents[hash & sock_flow_table->mask];
++ /* First check into global flow table if there is a match.
++ * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
++ */
++ ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
+ if ((ident ^ hash) & ~rps_cpu_mask)
+ goto try_rps;
+
+@@ -4578,7 +4596,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
+ struct softnet_data *sd;
+ unsigned int old_flow, new_flow;
+
+- if (qlen < (netdev_max_backlog >> 1))
++ if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
+ return false;
+
+ sd = this_cpu_ptr(&softnet_data);
+@@ -4626,7 +4644,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+ if (!netif_running(skb->dev))
+ goto drop;
+ qlen = skb_queue_len(&sd->input_pkt_queue);
+- if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
++ if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
+ if (qlen) {
+ enqueue:
+ __skb_queue_tail(&sd->input_pkt_queue, skb);
+@@ -4882,7 +4900,7 @@ static int netif_rx_internal(struct sk_buff *skb)
+ {
+ int ret;
+
+- net_timestamp_check(netdev_tstamp_prequeue, skb);
++ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+
+ trace_netif_rx(skb);
+
+@@ -4994,7 +5012,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
+ if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
+ trace_consume_skb(skb);
+ else
+- trace_kfree_skb(skb, net_tx_action);
++ trace_kfree_skb(skb, net_tx_action,
++ SKB_DROP_REASON_NOT_SPECIFIED);
+
+ if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ __kfree_skb(skb);
+@@ -5083,8 +5102,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ }
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+- qdisc_skb_cb(skb)->mru = 0;
+- qdisc_skb_cb(skb)->post_ct = false;
++ tc_skb_cb(skb)->mru = 0;
++ tc_skb_cb(skb)->post_ct = false;
+ skb->tc_at_ingress = 1;
+ mini_qdisc_bstats_cpu_update(miniq, skb);
+
+@@ -5241,7 +5260,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
+ int ret = NET_RX_DROP;
+ __be16 type;
+
+- net_timestamp_check(!netdev_tstamp_prequeue, skb);
++ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
+
+ trace_netif_receive_skb(skb);
+
+@@ -5622,7 +5641,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
+ {
+ int ret;
+
+- net_timestamp_check(netdev_tstamp_prequeue, skb);
++ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+
+ if (skb_defer_rx_timestamp(skb))
+ return NET_RX_SUCCESS;
+@@ -5652,7 +5671,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
+
+ INIT_LIST_HEAD(&sublist);
+ list_for_each_entry_safe(skb, next, head, list) {
+- net_timestamp_check(netdev_tstamp_prequeue, skb);
++ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+ skb_list_del_init(skb);
+ if (!skb_defer_rx_timestamp(skb))
+ list_add_tail(&skb->list, &sublist);
+@@ -6425,7 +6444,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
+ net_rps_action_and_irq_enable(sd);
+ }
+
+- napi->weight = dev_rx_weight;
++ napi->weight = READ_ONCE(dev_rx_weight);
+ while (again) {
+ struct sk_buff *skb;
+
+@@ -7125,8 +7144,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
+ {
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+ unsigned long time_limit = jiffies +
+- usecs_to_jiffies(netdev_budget_usecs);
+- int budget = netdev_budget;
++ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
++ int budget = READ_ONCE(netdev_budget);
+ LIST_HEAD(list);
+ LIST_HEAD(repoll);
+
+@@ -9631,6 +9650,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+ goto out_unlock;
+ }
+ old_prog = link->prog;
++ if (old_prog->type != new_prog->type ||
++ old_prog->expected_attach_type != new_prog->expected_attach_type) {
++ err = -EINVAL;
++ goto out_unlock;
++ }
++
+ if (old_prog == new_prog) {
+ /* no-op, don't disturb drivers */
+ bpf_prog_put(new_prog);
+@@ -10304,11 +10329,11 @@ int register_netdevice(struct net_device *dev)
+ goto err_uninit;
+
+ ret = netdev_register_kobject(dev);
+- if (ret) {
+- dev->reg_state = NETREG_UNREGISTERED;
++ write_lock(&dev_base_lock);
++ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
++ write_unlock(&dev_base_lock);
++ if (ret)
+ goto err_uninit;
+- }
+- dev->reg_state = NETREG_REGISTERED;
+
+ __netdev_update_features(dev);
+
+@@ -10468,8 +10493,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
+ unsigned long rebroadcast_time, warning_time;
+ int wait = 0, refcnt;
+
+- linkwatch_forget_dev(dev);
+-
+ rebroadcast_time = warning_time = jiffies;
+ refcnt = netdev_refcnt_read(dev);
+
+@@ -10583,7 +10606,10 @@ void netdev_run_todo(void)
+ continue;
+ }
+
++ write_lock(&dev_base_lock);
+ dev->reg_state = NETREG_UNREGISTERED;
++ write_unlock(&dev_base_lock);
++ linkwatch_forget_dev(dev);
+
+ netdev_wait_allrefs(dev);
+
+@@ -10593,9 +10619,7 @@ void netdev_run_todo(void)
+ BUG_ON(!list_empty(&dev->ptype_specific));
+ WARN_ON(rcu_access_pointer(dev->ip_ptr));
+ WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+-#if IS_ENABLED(CONFIG_DECNET)
+- WARN_ON(dev->dn_ptr);
+-#endif
++
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
+ if (dev->needs_free_netdev)
+@@ -10620,24 +10644,16 @@ void netdev_run_todo(void)
+ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+ const struct net_device_stats *netdev_stats)
+ {
+-#if BITS_PER_LONG == 64
+- BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
+- memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
+- /* zero out counters that only exist in rtnl_link_stats64 */
+- memset((char *)stats64 + sizeof(*netdev_stats), 0,
+- sizeof(*stats64) - sizeof(*netdev_stats));
+-#else
+- size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
+- const unsigned long *src = (const unsigned long *)netdev_stats;
++ size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t);
++ const atomic_long_t *src = (atomic_long_t *)netdev_stats;
+ u64 *dst = (u64 *)stats64;
+
+ BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
+ for (i = 0; i < n; i++)
+- dst[i] = src[i];
++ dst[i] = (unsigned long)atomic_long_read(&src[i]);
+ /* zero out counters that only exist in rtnl_link_stats64 */
+ memset((char *)stats64 + n * sizeof(u64), 0,
+ sizeof(*stats64) - n * sizeof(u64));
+-#endif
+ }
+ EXPORT_SYMBOL(netdev_stats_to_stats64);
+
+@@ -11028,9 +11044,10 @@ void unregister_netdevice_many(struct list_head *head)
+
+ list_for_each_entry(dev, head, unreg_list) {
+ /* And unlink it from device chain. */
+- unlist_netdevice(dev);
+-
++ write_lock(&dev_base_lock);
++ unlist_netdevice(dev, false);
+ dev->reg_state = NETREG_UNREGISTERING;
++ write_unlock(&dev_base_lock);
+ }
+ flush_all_backlogs();
+
+@@ -11175,7 +11192,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+ dev_close(dev);
+
+ /* And unlink it from device chain */
+- unlist_netdevice(dev);
++ unlist_netdevice(dev, true);
+
+ synchronize_net();
+
+@@ -11367,8 +11384,7 @@ static int __net_init netdev_init(struct net *net)
+ BUILD_BUG_ON(GRO_HASH_BUCKETS >
+ 8 * sizeof_field(struct napi_struct, gro_bitmask));
+
+- if (net != &init_net)
+- INIT_LIST_HEAD(&net->dev_base_head);
++ INIT_LIST_HEAD(&net->dev_base_head);
+
+ net->dev_name_head = netdev_create_hash();
+ if (net->dev_name_head == NULL)
+diff --git a/net/core/devlink.c b/net/core/devlink.c
+index a856ae401ea5c..b4d7a7f749c18 100644
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -4031,14 +4031,6 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+ return err;
+ }
+
+- if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+- info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+- info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+- dest_net = devlink_netns_get(skb, info);
+- if (IS_ERR(dest_net))
+- return PTR_ERR(dest_net);
+- }
+-
+ if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+ action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+ else
+@@ -4081,6 +4073,14 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+ return -EINVAL;
+ }
+ }
++ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
++ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
++ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
++ dest_net = devlink_netns_get(skb, info);
++ if (IS_ERR(dest_net))
++ return PTR_ERR(dest_net);
++ }
++
+ err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
+
+ if (dest_net)
+@@ -4413,7 +4413,7 @@ static int devlink_param_get(struct devlink *devlink,
+ const struct devlink_param *param,
+ struct devlink_param_gset_ctx *ctx)
+ {
+- if (!param->get)
++ if (!param->get || devlink->reload_failed)
+ return -EOPNOTSUPP;
+ return param->get(devlink, param->id, ctx);
+ }
+@@ -4422,7 +4422,7 @@ static int devlink_param_set(struct devlink *devlink,
+ const struct devlink_param *param,
+ struct devlink_param_gset_ctx *ctx)
+ {
+- if (!param->set)
++ if (!param->set || devlink->reload_failed)
+ return -EOPNOTSUPP;
+ return param->set(devlink, param->id, ctx);
+ }
+@@ -8795,8 +8795,6 @@ static const struct genl_small_ops devlink_nl_ops[] = {
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
+ .flags = GENL_ADMIN_PERM,
+- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
+- DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
+index 49442cae6f69d..78202141930f4 100644
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -110,7 +110,8 @@ static u32 net_dm_queue_len = 1000;
+
+ struct net_dm_alert_ops {
+ void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
+- void *location);
++ void *location,
++ enum skb_drop_reason reason);
+ void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
+ int work, int budget);
+ void (*work_item_func)(struct work_struct *work);
+@@ -262,7 +263,9 @@ out:
+ spin_unlock_irqrestore(&data->lock, flags);
+ }
+
+-static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
++static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
++ void *location,
++ enum skb_drop_reason reason)
+ {
+ trace_drop_common(skb, location);
+ }
+@@ -280,13 +283,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
++ struct net_device *dev;
++
+ /*
+ * only add a note to our monitor buffer if:
+ * 1) this is the dev we received on
+ * 2) its after the last_rx delta
+ * 3) our rx_dropped count has gone up
+ */
+- if ((new_stat->dev == napi->dev) &&
++ /* Paired with WRITE_ONCE() in dropmon_net_event() */
++ dev = READ_ONCE(new_stat->dev);
++ if ((dev == napi->dev) &&
+ (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
+ (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+ trace_drop_common(NULL, NULL);
+@@ -490,7 +497,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
+
+ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
+ struct sk_buff *skb,
+- void *location)
++ void *location,
++ enum skb_drop_reason reason)
+ {
+ ktime_t tstamp = ktime_get_real();
+ struct per_cpu_dm_data *data;
+@@ -1572,7 +1580,10 @@ static int dropmon_net_event(struct notifier_block *ev_block,
+ mutex_lock(&net_dm_mutex);
+ list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
+ if (new_stat->dev == dev) {
+- new_stat->dev = NULL;
++
++ /* Paired with READ_ONCE() in trace_napi_poll_hit() */
++ WRITE_ONCE(new_stat->dev, NULL);
++
+ if (trace_state == TRACE_OFF) {
+ list_del_rcu(&new_stat->list);
+ kfree_rcu(new_stat, rcu);
+diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
+index be74ab4551c20..0ccfd5fa5cb9b 100644
+--- a/net/core/dst_cache.c
++++ b/net/core/dst_cache.c
+@@ -162,3 +162,22 @@ void dst_cache_destroy(struct dst_cache *dst_cache)
+ free_percpu(dst_cache->cache);
+ }
+ EXPORT_SYMBOL_GPL(dst_cache_destroy);
++
++void dst_cache_reset_now(struct dst_cache *dst_cache)
++{
++ int i;
++
++ if (!dst_cache->cache)
++ return;
++
++ dst_cache->reset_ts = jiffies;
++ for_each_possible_cpu(i) {
++ struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
++ struct dst_entry *dst = idst->dst;
++
++ idst->cookie = 0;
++ idst->dst = NULL;
++ dst_release(dst);
++ }
++}
++EXPORT_SYMBOL_GPL(dst_cache_reset_now);
+diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
+index 79df7cd9dbc16..1bb567a3b329c 100644
+--- a/net/core/fib_rules.c
++++ b/net/core/fib_rules.c
+@@ -323,7 +323,7 @@ jumped:
+ if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress,
+ fib6_rule_suppress,
+ fib4_rule_suppress,
+- rule, arg))
++ rule, flags, arg))
+ continue;
+
+ if (err != -EAGAIN) {
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 2e32cee2c4690..76432aa3b717c 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1213,10 +1213,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
+ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+ {
+ u32 filter_size = bpf_prog_size(fp->prog->len);
++ int optmem_max = READ_ONCE(sysctl_optmem_max);
+
+ /* same check as in sock_kmalloc() */
+- if (filter_size <= sysctl_optmem_max &&
+- atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
++ if (filter_size <= optmem_max &&
++ atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
+ atomic_add(filter_size, &sk->sk_omem_alloc);
+ return true;
+ }
+@@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
++ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
+ err = -ENOMEM;
+ else
+ err = reuseport_attach_prog(sk, prog);
+@@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
+ }
+ } else {
+ /* BPF_PROG_TYPE_SOCKET_FILTER */
+- if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
++ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
+ err = -ENOMEM;
+ goto err_prog_put;
+ }
+@@ -1688,7 +1689,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
+
+ if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
+ return -EINVAL;
+- if (unlikely(offset > 0xffff))
++ if (unlikely(offset > INT_MAX))
+ return -EFAULT;
+ if (unlikely(bpf_try_make_writable(skb, offset + len)))
+ return -EFAULT;
+@@ -1713,7 +1714,7 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_MEM,
++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_ANYTHING,
+ };
+@@ -1723,7 +1724,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
+ {
+ void *ptr;
+
+- if (unlikely(offset > 0xffff))
++ if (unlikely(offset > INT_MAX))
+ goto err_clear;
+
+ ptr = skb_header_pointer(skb, offset, len, to);
+@@ -2018,9 +2019,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+- .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
++ .arg1_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+- .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
++ .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg5_type = ARG_ANYTHING,
+ };
+@@ -2122,8 +2123,17 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
+ {
+ unsigned int mlen = skb_network_offset(skb);
+
++ if (unlikely(skb->len <= mlen)) {
++ kfree_skb(skb);
++ return -ERANGE;
++ }
++
+ if (mlen) {
+ __skb_pull(skb, mlen);
++ if (unlikely(!skb->len)) {
++ kfree_skb(skb);
++ return -ERANGE;
++ }
+
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+@@ -2143,7 +2153,7 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+ {
+ /* Verify that a link layer header is carried */
+- if (unlikely(skb->mac_header >= skb->network_header)) {
++ if (unlikely(skb->mac_header >= skb->network_header || skb->len == 0)) {
+ kfree_skb(skb);
+ return -ERANGE;
+ }
+@@ -2541,7 +2551,7 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+- .arg2_type = ARG_PTR_TO_MEM_OR_NULL,
++ .arg2_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+ };
+@@ -2711,6 +2721,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
+ if (unlikely(flags))
+ return -EINVAL;
+
++ if (unlikely(len == 0))
++ return 0;
++
+ /* First find the starting scatterlist element */
+ i = msg->sg.start;
+ do {
+@@ -3169,15 +3182,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+
+ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+ {
++ void *old_data;
++
+ /* skb_ensure_writable() is not needed here, as we're
+ * already working on an uncloned skb.
+ */
+ if (unlikely(!pskb_may_pull(skb, off + len)))
+ return -ENOMEM;
+
+- skb_postpull_rcsum(skb, skb->data + off, len);
+- memmove(skb->data + len, skb->data, off);
++ old_data = skb->data;
+ __skb_pull(skb, len);
++ skb_postpull_rcsum(skb, old_data + off, len);
++ memmove(skb->data, old_data, off);
+
+ return 0;
+ }
+@@ -3827,12 +3843,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
+ if (unlikely(data_end > data_hard_end))
+ return -EINVAL;
+
+- /* ALL drivers MUST init xdp->frame_sz, chicken check below */
+- if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
+- WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
+- return -EINVAL;
+- }
+-
+ if (unlikely(data_end < xdp->data + ETH_HLEN))
+ return -EINVAL;
+
+@@ -4174,7 +4184,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -4188,7 +4198,7 @@ const struct bpf_func_proto bpf_skb_output_proto = {
+ .arg1_btf_id = &bpf_skb_output_btf_ids[0],
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -4371,7 +4381,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ };
+@@ -4397,7 +4407,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ };
+
+@@ -4567,7 +4577,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -4581,7 +4591,7 @@ const struct bpf_func_proto bpf_xdp_output_proto = {
+ .arg1_btf_id = &bpf_xdp_output_btf_ids[0],
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ };
+
+@@ -4741,13 +4751,15 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
+ /* Only some socketops are supported */
+ switch (optname) {
+ case SO_RCVBUF:
+- val = min_t(u32, val, sysctl_rmem_max);
++ val = min_t(u32, val, READ_ONCE(sysctl_rmem_max));
++ val = min_t(int, val, INT_MAX / 2);
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
+ break;
+ case SO_SNDBUF:
+- val = min_t(u32, val, sysctl_wmem_max);
++ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
++ val = min_t(int, val, INT_MAX / 2);
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ WRITE_ONCE(sk->sk_sndbuf,
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
+@@ -4880,7 +4892,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
+ if (val <= 0 || tp->data_segs_out > tp->syn_data)
+ ret = -EINVAL;
+ else
+- tp->snd_cwnd = val;
++ tcp_snd_cwnd_set(tp, val);
+ break;
+ case TCP_BPF_SNDCWND_CLAMP:
+ if (val <= 0) {
+@@ -5067,7 +5079,7 @@ const struct bpf_func_proto bpf_sk_setsockopt_proto = {
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+ };
+
+@@ -5101,7 +5113,7 @@ static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+ };
+
+@@ -5135,7 +5147,7 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+ };
+
+@@ -5310,7 +5322,7 @@ static const struct bpf_func_proto bpf_bind_proto = {
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ };
+
+@@ -5488,7 +5500,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+ }
+
+- if (!neigh)
++ if (!neigh || !(neigh->nud_state & NUD_VALID))
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
+
+ return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+@@ -5603,7 +5615,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ * not needed here.
+ */
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+- if (!neigh)
++ if (!neigh || !(neigh->nud_state & NUD_VALID))
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
+
+ return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+@@ -5846,7 +5858,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
+ if (err)
+ return err;
+
+- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ return seg6_lookup_nexthop(skb, NULL, 0);
+@@ -5898,7 +5909,7 @@ static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_MEM,
++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE
+ };
+
+@@ -5908,7 +5919,7 @@ static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_MEM,
++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE
+ };
+
+@@ -5951,7 +5962,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_MEM,
++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE
+ };
+
+@@ -6039,7 +6050,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+- .arg3_type = ARG_PTR_TO_MEM,
++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE
+ };
+
+@@ -6157,12 +6168,11 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
+ static struct sock *
+ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+- u64 flags)
++ u64 flags, int sdif)
+ {
+ struct sock *sk = NULL;
+- u8 family = AF_UNSPEC;
+ struct net *net;
+- int sdif;
++ u8 family;
+
+ if (len == sizeof(tuple->ipv4))
+ family = AF_INET;
+@@ -6171,14 +6181,15 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ else
+ return NULL;
+
+- if (unlikely(family == AF_UNSPEC || flags ||
+- !((s32)netns_id < 0 || netns_id <= S32_MAX)))
++ if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
+ goto out;
+
+- if (family == AF_INET)
+- sdif = inet_sdif(skb);
+- else
+- sdif = inet6_sdif(skb);
++ if (sdif < 0) {
++ if (family == AF_INET)
++ sdif = inet_sdif(skb);
++ else
++ sdif = inet6_sdif(skb);
++ }
+
+ if ((s32)netns_id < 0) {
+ net = caller_net;
+@@ -6198,16 +6209,28 @@ out:
+ static struct sock *
+ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+- u64 flags)
++ u64 flags, int sdif)
+ {
+ struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
+- ifindex, proto, netns_id, flags);
++ ifindex, proto, netns_id, flags,
++ sdif);
+
+ if (sk) {
+- sk = sk_to_full_sk(sk);
+- if (!sk_fullsock(sk)) {
++ struct sock *sk2 = sk_to_full_sk(sk);
++
++ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
++ * sock refcnt is decremented to prevent a request_sock leak.
++ */
++ if (!sk_fullsock(sk2))
++ sk2 = NULL;
++ if (sk2 != sk) {
+ sock_gen_put(sk);
+- return NULL;
++ /* Ensure there is no need to bump sk2 refcnt */
++ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
++ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
++ return NULL;
++ }
++ sk = sk2;
+ }
+ }
+
+@@ -6230,7 +6253,7 @@ bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ }
+
+ return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
+- netns_id, flags);
++ netns_id, flags, -1);
+ }
+
+ static struct sock *
+@@ -6241,10 +6264,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ flags);
+
+ if (sk) {
+- sk = sk_to_full_sk(sk);
+- if (!sk_fullsock(sk)) {
++ struct sock *sk2 = sk_to_full_sk(sk);
++
++ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
++ * sock refcnt is decremented to prevent a request_sock leak.
++ */
++ if (!sk_fullsock(sk2))
++ sk2 = NULL;
++ if (sk2 != sk) {
+ sock_gen_put(sk);
+- return NULL;
++ /* Ensure there is no need to bump sk2 refcnt */
++ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
++ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
++ return NULL;
++ }
++ sk = sk2;
+ }
+ }
+
+@@ -6264,7 +6298,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6283,7 +6317,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6302,7 +6336,79 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
++ .arg3_type = ARG_CONST_SIZE,
++ .arg4_type = ARG_ANYTHING,
++ .arg5_type = ARG_ANYTHING,
++};
++
++BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
++ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
++{
++ struct net_device *dev = skb->dev;
++ int ifindex = dev->ifindex, sdif = dev_sdif(dev);
++ struct net *caller_net = dev_net(dev);
++
++ return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
++ ifindex, IPPROTO_TCP, netns_id,
++ flags, sdif);
++}
++
++static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
++ .func = bpf_tc_skc_lookup_tcp,
++ .gpl_only = false,
++ .pkt_access = true,
++ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
++ .arg1_type = ARG_PTR_TO_CTX,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
++ .arg3_type = ARG_CONST_SIZE,
++ .arg4_type = ARG_ANYTHING,
++ .arg5_type = ARG_ANYTHING,
++};
++
++BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
++ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
++{
++ struct net_device *dev = skb->dev;
++ int ifindex = dev->ifindex, sdif = dev_sdif(dev);
++ struct net *caller_net = dev_net(dev);
++
++ return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
++ ifindex, IPPROTO_TCP, netns_id,
++ flags, sdif);
++}
++
++static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
++ .func = bpf_tc_sk_lookup_tcp,
++ .gpl_only = false,
++ .pkt_access = true,
++ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
++ .arg1_type = ARG_PTR_TO_CTX,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
++ .arg3_type = ARG_CONST_SIZE,
++ .arg4_type = ARG_ANYTHING,
++ .arg5_type = ARG_ANYTHING,
++};
++
++BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
++ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
++{
++ struct net_device *dev = skb->dev;
++ int ifindex = dev->ifindex, sdif = dev_sdif(dev);
++ struct net *caller_net = dev_net(dev);
++
++ return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
++ ifindex, IPPROTO_UDP, netns_id,
++ flags, sdif);
++}
++
++static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
++ .func = bpf_tc_sk_lookup_udp,
++ .gpl_only = false,
++ .pkt_access = true,
++ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
++ .arg1_type = ARG_PTR_TO_CTX,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6325,12 +6431,13 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
+ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+ {
+- struct net *caller_net = dev_net(ctx->rxq->dev);
+- int ifindex = ctx->rxq->dev->ifindex;
++ struct net_device *dev = ctx->rxq->dev;
++ int ifindex = dev->ifindex, sdif = dev_sdif(dev);
++ struct net *caller_net = dev_net(dev);
+
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_UDP, netns_id,
+- flags);
++ flags, sdif);
+ }
+
+ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
+@@ -6339,7 +6446,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6348,12 +6455,13 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
+ BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+ {
+- struct net *caller_net = dev_net(ctx->rxq->dev);
+- int ifindex = ctx->rxq->dev->ifindex;
++ struct net_device *dev = ctx->rxq->dev;
++ int ifindex = dev->ifindex, sdif = dev_sdif(dev);
++ struct net *caller_net = dev_net(dev);
+
+ return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+- flags);
++ flags, sdif);
+ }
+
+ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+@@ -6362,7 +6470,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6371,12 +6479,13 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+ BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+ {
+- struct net *caller_net = dev_net(ctx->rxq->dev);
+- int ifindex = ctx->rxq->dev->ifindex;
++ struct net_device *dev = ctx->rxq->dev;
++ int ifindex = dev->ifindex, sdif = dev_sdif(dev);
++ struct net *caller_net = dev_net(dev);
+
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+- flags);
++ flags, sdif);
+ }
+
+ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
+@@ -6385,7 +6494,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6396,7 +6505,8 @@ BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+ {
+ return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0,
+- IPPROTO_TCP, netns_id, flags);
++ IPPROTO_TCP, netns_id, flags,
++ -1);
+ }
+
+ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
+@@ -6404,7 +6514,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6415,7 +6525,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+ {
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0, IPPROTO_TCP,
+- netns_id, flags);
++ netns_id, flags, -1);
+ }
+
+ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
+@@ -6423,7 +6533,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6434,7 +6544,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
+ {
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0, IPPROTO_UDP,
+- netns_id, flags);
++ netns_id, flags, -1);
+ }
+
+ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
+@@ -6442,7 +6552,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+@@ -6708,30 +6818,39 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
+ if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+ return -EINVAL;
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
+ return -EINVAL;
+
+ if (!th->ack || th->rst || th->syn)
+ return -ENOENT;
+
++ if (unlikely(iph_len < sizeof(struct iphdr)))
++ return -EINVAL;
++
+ if (tcp_synq_no_recent_overflow(sk))
+ return -ENOENT;
+
+ cookie = ntohl(th->ack_seq) - 1;
+
+- switch (sk->sk_family) {
+- case AF_INET:
+- if (unlikely(iph_len < sizeof(struct iphdr)))
++ /* Both struct iphdr and struct ipv6hdr have the version field at the
++ * same offset so we can cast to the shorter header (struct iphdr).
++ */
++ switch (((struct iphdr *)iph)->version) {
++ case 4:
++ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
+ return -EINVAL;
+
+ ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+ break;
+
+ #if IS_BUILTIN(CONFIG_IPV6)
+- case AF_INET6:
++ case 6:
+ if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+ return -EINVAL;
+
++ if (sk->sk_family != AF_INET6)
++ return -EINVAL;
++
+ ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+ break;
+ #endif /* CONFIG_IPV6 */
+@@ -6755,9 +6874,9 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+ };
+
+@@ -6774,7 +6893,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+ if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+ return -EINVAL;
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
+ return -ENOENT;
+
+ if (!th->syn || th->ack || th->fin || th->rst)
+@@ -6824,9 +6943,9 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+- .arg4_type = ARG_PTR_TO_MEM,
++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+ };
+
+@@ -6840,6 +6959,8 @@ BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
+ return -ENETUNREACH;
+ if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
+ return -ESOCKTNOSUPPORT;
++ if (sk_unhashed(sk))
++ return -EOPNOTSUPP;
+ if (sk_is_refcounted(sk) &&
+ unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
+ return -ENOENT;
+@@ -7055,7 +7176,7 @@ static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+- .arg2_type = ARG_PTR_TO_MEM,
++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ };
+@@ -7162,6 +7283,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ #endif
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_cg_sock_proto;
++ case BPF_FUNC_ktime_get_coarse_ns:
++ return &bpf_ktime_get_coarse_ns_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+@@ -7426,9 +7549,9 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ #endif
+ #ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_tcp:
+- return &bpf_sk_lookup_tcp_proto;
++ return &bpf_tc_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+- return &bpf_sk_lookup_udp_proto;
++ return &bpf_tc_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+ case BPF_FUNC_tcp_sock:
+@@ -7436,7 +7559,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ case BPF_FUNC_get_listener_sock:
+ return &bpf_get_listener_sock_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+- return &bpf_skc_lookup_tcp_proto;
++ return &bpf_tc_skc_lookup_tcp_proto;
+ case BPF_FUNC_tcp_check_syncookie:
+ return &bpf_tcp_check_syncookie_proto;
+ case BPF_FUNC_skb_ecn_set_ce:
+@@ -7959,6 +8082,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+ {
+ const int size_default = sizeof(__u32);
++ int field_size;
+
+ if (off < 0 || off >= sizeof(struct bpf_sock))
+ return false;
+@@ -7970,7 +8094,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ case offsetof(struct bpf_sock, family):
+ case offsetof(struct bpf_sock, type):
+ case offsetof(struct bpf_sock, protocol):
+- case offsetof(struct bpf_sock, dst_port):
+ case offsetof(struct bpf_sock, src_port):
+ case offsetof(struct bpf_sock, rx_queue_mapping):
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+@@ -7979,6 +8102,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
++ case bpf_ctx_range(struct bpf_sock, dst_port):
++ field_size = size == size_default ?
++ size_default : sizeof_field(struct bpf_sock, dst_port);
++ bpf_ctx_record_field_size(info, field_size);
++ return bpf_ctx_narrow_access_ok(off, size, field_size);
++ case offsetofend(struct bpf_sock, dst_port) ...
++ offsetof(struct bpf_sock, dst_ip4) - 1:
++ return false;
+ }
+
+ return size == size_default;
+@@ -8174,9 +8305,9 @@ void bpf_warn_invalid_xdp_action(u32 act)
+ {
+ const u32 act_max = XDP_REDIRECT;
+
+- WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
+- act > act_max ? "Illegal" : "Driver unsupported",
+- act);
++ pr_warn_once("%s XDP return value %u, expect packet loss!\n",
++ act > act_max ? "Illegal" : "Driver unsupported",
++ act);
+ }
+ EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+
+@@ -9735,22 +9866,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
+ static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
+ struct bpf_insn *insn)
+ {
+- /* si->dst_reg = skb->data */
++ int reg;
++ int temp_reg_off = offsetof(struct sk_buff, cb) +
++ offsetof(struct sk_skb_cb, temp_reg);
++
++ if (si->src_reg == si->dst_reg) {
++ /* We need an extra register, choose and save a register. */
++ reg = BPF_REG_9;
++ if (si->src_reg == reg || si->dst_reg == reg)
++ reg--;
++ if (si->src_reg == reg || si->dst_reg == reg)
++ reg--;
++ *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
++ } else {
++ reg = si->dst_reg;
++ }
++
++ /* reg = skb->data */
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+- si->dst_reg, si->src_reg,
++ reg, si->src_reg,
+ offsetof(struct sk_buff, data));
+ /* AX = skb->len */
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+ BPF_REG_AX, si->src_reg,
+ offsetof(struct sk_buff, len));
+- /* si->dst_reg = skb->data + skb->len */
+- *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
++ /* reg = skb->data + skb->len */
++ *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
+ /* AX = skb->data_len */
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
+ BPF_REG_AX, si->src_reg,
+ offsetof(struct sk_buff, data_len));
+- /* si->dst_reg = skb->data + skb->len - skb->data_len */
+- *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
++
++ /* reg = skb->data + skb->len - skb->data_len */
++ *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
++
++ if (si->src_reg == si->dst_reg) {
++ /* Restore the saved register */
++ *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
++ *insn++ = BPF_MOV64_REG(si->dst_reg, reg);
++ *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
++ }
+
+ return insn;
+ }
+@@ -9761,11 +9916,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+ struct bpf_prog *prog, u32 *target_size)
+ {
+ struct bpf_insn *insn = insn_buf;
++ int off;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data_end):
+ insn = bpf_convert_data_end_access(si, insn);
+ break;
++ case offsetof(struct __sk_buff, cb[0]) ...
++ offsetofend(struct __sk_buff, cb[4]) - 1:
++ BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
++ BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
++ offsetof(struct sk_skb_cb, data)) %
++ sizeof(__u64));
++
++ prog->cb_access = 1;
++ off = si->off;
++ off -= offsetof(struct __sk_buff, cb[0]);
++ off += offsetof(struct sk_buff, cb);
++ off += offsetof(struct sk_skb_cb, data);
++ if (type == BPF_WRITE)
++ *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
++ si->src_reg, off);
++ else
++ *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
++ si->src_reg, off);
++ break;
++
++
+ default:
+ return bpf_convert_ctx_access(type, si, insn_buf, prog,
+ target_size);
+@@ -10260,6 +10437,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
+ return &sk_reuseport_load_bytes_relative_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_ptr_cookie_proto;
++ case BPF_FUNC_ktime_get_coarse_ns:
++ return &bpf_ktime_get_coarse_ns_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+@@ -10468,7 +10647,8 @@ static bool sk_lookup_is_valid_access(int off, int size,
+ case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
+- case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
++ case offsetof(struct bpf_sk_lookup, remote_port) ...
++ offsetof(struct bpf_sk_lookup, local_ip4) - 1:
+ case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+ bpf_ctx_record_field_size(info, sizeof(__u32));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+@@ -10741,6 +10921,8 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
+ case BPF_FUNC_skc_to_udp6_sock:
+ func = &bpf_skc_to_udp6_sock_proto;
+ break;
++ case BPF_FUNC_ktime_get_coarse_ns:
++ return &bpf_ktime_get_coarse_ns_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
+index bac0184cf3de7..2596a54c2fe71 100644
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -238,7 +238,7 @@ void
+ skb_flow_dissect_ct(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, u16 *ctinfo_map,
+- size_t mapsize, bool post_ct)
++ size_t mapsize, bool post_ct, u16 zone)
+ {
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ struct flow_dissector_key_ct *key;
+@@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
+ if (!ct) {
+ key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID;
++ key->ct_zone = zone;
+ return;
+ }
+
+@@ -269,7 +270,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
+ key->ct_zone = ct->zone.id;
+ #endif
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+- key->ct_mark = ct->mark;
++ key->ct_mark = READ_ONCE(ct->mark);
+ #endif
+
+ cl = nf_ct_labels_find(ct);
+@@ -1180,6 +1181,7 @@ proto_again:
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ }
+ key_vlan->vlan_tpid = saved_vlan_tpid;
++ key_vlan->vlan_eth_type = proto;
+ }
+
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+@@ -1517,9 +1519,8 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
+
+ switch (keys->control.addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+- addr_diff = (__force u32)keys->addrs.v4addrs.dst -
+- (__force u32)keys->addrs.v4addrs.src;
+- if (addr_diff < 0)
++ if ((__force u32)keys->addrs.v4addrs.dst <
++ (__force u32)keys->addrs.v4addrs.src)
+ swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+ if ((__force u16)keys->ports.dst <
+@@ -1613,8 +1614,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
+
+ memset(&keys, 0, sizeof(keys));
+ __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
+- &keys, NULL, 0, 0, 0,
+- FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
++ &keys, NULL, 0, 0, 0, 0);
+
+ return __flow_hash_from_keys(&keys, &hashrnd);
+ }
+diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
+index 6beaea13564a8..fb11103fa8afc 100644
+--- a/net/core/flow_offload.c
++++ b/net/core/flow_offload.c
+@@ -565,3 +565,9 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
+ return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
+ }
+ EXPORT_SYMBOL(flow_indr_dev_setup_offload);
++
++bool flow_indr_dev_exists(void)
++{
++ return !list_empty(&flow_block_indr_dev_list);
++}
++EXPORT_SYMBOL(flow_indr_dev_exists);
+diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
+index 6eb2e5ec2c506..2f66f3f295630 100644
+--- a/net/core/gro_cells.c
++++ b/net/core/gro_cells.c
+@@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
+
+ cell = this_cpu_ptr(gcells->cells);
+
+- if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
++ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
+ drop:
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+diff --git a/net/core/link_watch.c b/net/core/link_watch.c
+index 1a455847da54f..9599afd0862da 100644
+--- a/net/core/link_watch.c
++++ b/net/core/link_watch.c
+@@ -55,7 +55,7 @@ static void rfc2863_policy(struct net_device *dev)
+ if (operstate == dev->operstate)
+ return;
+
+- write_lock_bh(&dev_base_lock);
++ write_lock(&dev_base_lock);
+
+ switch(dev->link_mode) {
+ case IF_LINK_MODE_TESTING:
+@@ -74,7 +74,7 @@ static void rfc2863_policy(struct net_device *dev)
+
+ dev->operstate = operstate;
+
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+ }
+
+
+diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
+index 2f7940bcf7151..f6c327c7badb4 100644
+--- a/net/core/lwt_bpf.c
++++ b/net/core/lwt_bpf.c
+@@ -59,9 +59,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
+ ret = BPF_OK;
+ } else {
+ skb_reset_mac_header(skb);
+- ret = skb_do_redirect(skb);
+- if (ret == 0)
+- ret = BPF_REDIRECT;
++ skb_do_redirect(skb);
++ ret = BPF_REDIRECT;
+ }
+ break;
+
+@@ -158,10 +157,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ return dst->lwtstate->orig_output(net, sk, skb);
+ }
+
+-static int xmit_check_hhlen(struct sk_buff *skb)
++static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
+ {
+- int hh_len = skb_dst(skb)->dev->hard_header_len;
+-
+ if (skb_headroom(skb) < hh_len) {
+ int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+@@ -256,7 +253,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
+
+ err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+ if (unlikely(err))
+- return err;
++ return net_xmit_errno(err);
+
+ /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
+ return LWTUNNEL_XMIT_DONE;
+@@ -273,6 +270,7 @@ static int bpf_xmit(struct sk_buff *skb)
+
+ bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+ if (bpf->xmit.prog) {
++ int hh_len = dst->dev->hard_header_len;
+ __be16 proto = skb->protocol;
+ int ret;
+
+@@ -290,7 +288,7 @@ static int bpf_xmit(struct sk_buff *skb)
+ /* If the header was expanded, headroom might be too
+ * small for L2 header to come, expand as needed.
+ */
+- ret = xmit_check_hhlen(skb);
++ ret = xmit_check_hhlen(skb, hh_len);
+ if (unlikely(ret))
+ return ret;
+
+diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
+index 2820aca2173a8..9ccd64e8a666a 100644
+--- a/net/core/lwtunnel.c
++++ b/net/core/lwtunnel.c
+@@ -197,6 +197,10 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
+ nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+
+ if (nla_entype) {
++ if (nla_len(nla_entype) < sizeof(u16)) {
++ NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE");
++ return -EINVAL;
++ }
+ encap_type = nla_get_u16(nla_entype);
+
+ if (lwtunnel_valid_encap_type(encap_type,
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 2d5bc3a75faec..af022db48b7a9 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -241,7 +241,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
+ (n->nud_state == NUD_NOARP) ||
+ (tbl->is_multicast &&
+ tbl->is_multicast(n->primary_key)) ||
+- time_after(tref, n->updated))
++ !time_in_range(n->updated, tref, jiffies))
+ remove = true;
+ write_unlock(&n->lock);
+
+@@ -261,7 +261,17 @@ static int neigh_forced_gc(struct neigh_table *tbl)
+
+ static void neigh_add_timer(struct neighbour *n, unsigned long when)
+ {
++ /* Use safe distance from the jiffies - LONG_MAX point while timer
++ * is running in DELAY/PROBE state but still show to user space
++ * large times in the past.
++ */
++ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
++
+ neigh_hold(n);
++ if (!time_in_range(n->confirmed, mint, jiffies))
++ n->confirmed = mint;
++ if (time_before(n->used, n->confirmed))
++ n->used = n->confirmed;
+ if (unlikely(mod_timer(&n->timer, when))) {
+ printk("NEIGH: BUG, double timer add, state is %x\n",
+ n->nud_state);
+@@ -279,11 +289,26 @@ static int neigh_del_timer(struct neighbour *n)
+ return 0;
+ }
+
+-static void pneigh_queue_purge(struct sk_buff_head *list)
++static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
+ {
++ struct sk_buff_head tmp;
++ unsigned long flags;
+ struct sk_buff *skb;
+
+- while ((skb = skb_dequeue(list)) != NULL) {
++ skb_queue_head_init(&tmp);
++ spin_lock_irqsave(&list->lock, flags);
++ skb = skb_peek(list);
++ while (skb != NULL) {
++ struct sk_buff *skb_next = skb_peek_next(skb, list);
++ if (net == NULL || net_eq(dev_net(skb->dev), net)) {
++ __skb_unlink(skb, list);
++ __skb_queue_tail(&tmp, skb);
++ }
++ skb = skb_next;
++ }
++ spin_unlock_irqrestore(&list->lock, flags);
++
++ while ((skb = __skb_dequeue(&tmp))) {
+ dev_put(skb->dev);
+ kfree_skb(skb);
+ }
+@@ -357,9 +382,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ write_lock_bh(&tbl->lock);
+ neigh_flush_dev(tbl, dev, skip_perm);
+ pneigh_ifdown_and_unlock(tbl, dev);
+-
+- del_timer_sync(&tbl->proxy_timer);
+- pneigh_queue_purge(&tbl->proxy_queue);
++ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
++ if (skb_queue_empty_lockless(&tbl->proxy_queue))
++ del_timer_sync(&tbl->proxy_timer);
+ return 0;
+ }
+
+@@ -379,7 +404,7 @@ EXPORT_SYMBOL(neigh_ifdown);
+
+ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
+ struct net_device *dev,
+- bool exempt_from_gc)
++ u8 flags, bool exempt_from_gc)
+ {
+ struct neighbour *n = NULL;
+ unsigned long now = jiffies;
+@@ -412,6 +437,7 @@ do_alloc:
+ n->updated = n->used = now;
+ n->nud_state = NUD_NONE;
+ n->output = neigh_blackhole;
++ n->flags = flags;
+ seqlock_init(&n->hh.hh_lock);
+ n->parms = neigh_parms_clone(&tbl->parms);
+ timer_setup(&n->timer, neigh_timer_handler, 0);
+@@ -544,50 +570,18 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
+ }
+ EXPORT_SYMBOL(neigh_lookup);
+
+-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
+- const void *pkey)
++static struct neighbour *
++___neigh_create(struct neigh_table *tbl, const void *pkey,
++ struct net_device *dev, u8 flags,
++ bool exempt_from_gc, bool want_ref)
+ {
+- struct neighbour *n;
+- unsigned int key_len = tbl->key_len;
+- u32 hash_val;
++ u32 hash_val, key_len = tbl->key_len;
++ struct neighbour *n1, *rc, *n;
+ struct neigh_hash_table *nht;
+-
+- NEIGH_CACHE_STAT_INC(tbl, lookups);
+-
+- rcu_read_lock_bh();
+- nht = rcu_dereference_bh(tbl->nht);
+- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
+-
+- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+- n != NULL;
+- n = rcu_dereference_bh(n->next)) {
+- if (!memcmp(n->primary_key, pkey, key_len) &&
+- net_eq(dev_net(n->dev), net)) {
+- if (!refcount_inc_not_zero(&n->refcnt))
+- n = NULL;
+- NEIGH_CACHE_STAT_INC(tbl, hits);
+- break;
+- }
+- }
+-
+- rcu_read_unlock_bh();
+- return n;
+-}
+-EXPORT_SYMBOL(neigh_lookup_nodev);
+-
+-static struct neighbour *___neigh_create(struct neigh_table *tbl,
+- const void *pkey,
+- struct net_device *dev,
+- bool exempt_from_gc, bool want_ref)
+-{
+- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
+- u32 hash_val;
+- unsigned int key_len = tbl->key_len;
+ int error;
+- struct neigh_hash_table *nht;
+
++ n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
+ trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
+-
+ if (!n) {
+ rc = ERR_PTR(-ENOBUFS);
+ goto out;
+@@ -674,7 +668,7 @@ out_neigh_release:
+ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, bool want_ref)
+ {
+- return ___neigh_create(tbl, pkey, dev, false, want_ref);
++ return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
+ }
+ EXPORT_SYMBOL(__neigh_create);
+
+@@ -733,11 +727,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
+
+ ASSERT_RTNL();
+
+- n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
++ n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ if (!n)
+ goto out;
+
+- n->protocol = 0;
+ write_pnet(&n->net, net);
+ memcpy(n->key, pkey, key_len);
+ n->dev = dev;
+@@ -929,12 +922,14 @@ static void neigh_periodic_work(struct work_struct *work)
+ goto next_elt;
+ }
+
+- if (time_before(n->used, n->confirmed))
++ if (time_before(n->used, n->confirmed) &&
++ time_is_before_eq_jiffies(n->confirmed))
+ n->used = n->confirmed;
+
+ if (refcount_read(&n->refcnt) == 1 &&
+ (state == NUD_FAILED ||
+- time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
++ !time_in_range_open(jiffies, n->used,
++ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
+ *np = n->next;
+ neigh_mark_dead(n);
+ write_unlock(&n->lock);
+@@ -1217,7 +1212,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
+ lladdr instead of overriding it
+ if it is different.
+ NEIGH_UPDATE_F_ADMIN means that the change is administrative.
+-
++ NEIGH_UPDATE_F_USE means that the entry is user triggered.
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
+ NTF_ROUTER flag.
+ NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
+@@ -1255,6 +1250,12 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
+ goto out;
+
+ ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
++ if (flags & NEIGH_UPDATE_F_USE) {
++ new = old & ~NUD_PERMANENT;
++ neigh->nud_state = new;
++ err = 0;
++ goto out;
++ }
+
+ if (!(new & NUD_VALID)) {
+ neigh_del_timer(neigh);
+@@ -1730,7 +1731,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
+ /* It is not clean... Fix it to unload IPv6 module safely */
+ cancel_delayed_work_sync(&tbl->gc_work);
+ del_timer_sync(&tbl->proxy_timer);
+- pneigh_queue_purge(&tbl->proxy_queue);
++ pneigh_queue_purge(&tbl->proxy_queue, NULL);
+ neigh_ifdown(tbl, NULL);
+ if (atomic_read(&tbl->entries))
+ pr_crit("neighbour leakage\n");
+@@ -1762,9 +1763,6 @@ static struct neigh_table *neigh_find_table(int family)
+ case AF_INET6:
+ tbl = neigh_tables[NEIGH_ND_TABLE];
+ break;
+- case AF_DECnet:
+- tbl = neigh_tables[NEIGH_DN_TABLE];
+- break;
+ }
+
+ return tbl;
+@@ -1942,7 +1940,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+
+ exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
+ ndm->ndm_flags & NTF_EXT_LEARNED;
+- neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
++ neigh = ___neigh_create(tbl, dst, dev,
++ ndm->ndm_flags & NTF_EXT_LEARNED,
++ exempt_from_gc, true);
+ if (IS_ERR(neigh)) {
+ err = PTR_ERR(neigh);
+ goto out;
+@@ -1961,22 +1961,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+
+ if (protocol)
+ neigh->protocol = protocol;
+-
+ if (ndm->ndm_flags & NTF_EXT_LEARNED)
+ flags |= NEIGH_UPDATE_F_EXT_LEARNED;
+-
+ if (ndm->ndm_flags & NTF_ROUTER)
+ flags |= NEIGH_UPDATE_F_ISROUTER;
++ if (ndm->ndm_flags & NTF_USE)
++ flags |= NEIGH_UPDATE_F_USE;
+
+- if (ndm->ndm_flags & NTF_USE) {
++ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
++ NETLINK_CB(skb).portid, extack);
++ if (!err && ndm->ndm_flags & NTF_USE) {
+ neigh_event_send(neigh, NULL);
+ err = 0;
+- } else
+- err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+- NETLINK_CB(skb).portid, extack);
+-
++ }
+ neigh_release(neigh);
+-
+ out:
+ return err;
+ }
+diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
+index d8b9dbabd4a43..88cc0ad7d386e 100644
+--- a/net/core/net-procfs.c
++++ b/net/core/net-procfs.c
+@@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = {
+ .show = softnet_seq_show,
+ };
+
+-static void *ptype_get_idx(loff_t pos)
++static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
+ {
++ struct list_head *ptype_list = NULL;
+ struct packet_type *pt = NULL;
++ struct net_device *dev;
+ loff_t i = 0;
+ int t;
+
++ for_each_netdev_rcu(seq_file_net(seq), dev) {
++ ptype_list = &dev->ptype_all;
++ list_for_each_entry_rcu(pt, ptype_list, list) {
++ if (i == pos)
++ return pt;
++ ++i;
++ }
++ }
++
+ list_for_each_entry_rcu(pt, &ptype_all, list) {
+ if (i == pos)
+ return pt;
+@@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+ {
+ rcu_read_lock();
+- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
++ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ }
+
+ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++ struct net_device *dev;
+ struct packet_type *pt;
+ struct list_head *nxt;
+ int hash;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+- return ptype_get_idx(0);
++ return ptype_get_idx(seq, 0);
+
+ pt = v;
+ nxt = pt->list.next;
++ if (pt->dev) {
++ if (nxt != &pt->dev->ptype_all)
++ goto found;
++
++ dev = pt->dev;
++ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
++ if (!list_empty(&dev->ptype_all)) {
++ nxt = dev->ptype_all.next;
++ goto found;
++ }
++ }
++
++ nxt = ptype_all.next;
++ goto ptype_all;
++ }
++
+ if (pt->type == htons(ETH_P_ALL)) {
++ptype_all:
+ if (nxt != &ptype_all)
+ goto found;
+ hash = 0;
+@@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
+
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq, "Type Device Function\n");
+- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
++ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
++ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
+ if (pt->type == htons(ETH_P_ALL))
+ seq_puts(seq, "ALL ");
+ else
+diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
+index b2e49eb7001d6..e9ea0695efb42 100644
+--- a/net/core/net-sysfs.c
++++ b/net/core/net-sysfs.c
+@@ -32,6 +32,7 @@ static const char fmt_dec[] = "%d\n";
+ static const char fmt_ulong[] = "%lu\n";
+ static const char fmt_u64[] = "%llu\n";
+
++/* Caller holds RTNL or dev_base_lock */
+ static inline int dev_isalive(const struct net_device *dev)
+ {
+ return dev->reg_state <= NETREG_REGISTERED;
+@@ -175,6 +176,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
+ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+ {
++ struct net_device *netdev = to_net_dev(dev);
++
++ /* The check is also done in change_carrier; this helps returning early
++ * without hitting the trylock/restart in netdev_store.
++ */
++ if (!netdev->netdev_ops->ndo_change_carrier)
++ return -EOPNOTSUPP;
++
+ return netdev_store(dev, attr, buf, len, change_carrier);
+ }
+
+@@ -196,10 +205,16 @@ static ssize_t speed_show(struct device *dev,
+ struct net_device *netdev = to_net_dev(dev);
+ int ret = -EINVAL;
+
++ /* The check is also done in __ethtool_get_link_ksettings; this helps
++ * returning early without hitting the trylock/restart below.
++ */
++ if (!netdev->ethtool_ops->get_link_ksettings)
++ return ret;
++
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+- if (netif_running(netdev)) {
++ if (netif_running(netdev) && netif_device_present(netdev)) {
+ struct ethtool_link_ksettings cmd;
+
+ if (!__ethtool_get_link_ksettings(netdev, &cmd))
+@@ -216,6 +231,12 @@ static ssize_t duplex_show(struct device *dev,
+ struct net_device *netdev = to_net_dev(dev);
+ int ret = -EINVAL;
+
++ /* The check is also done in __ethtool_get_link_ksettings; this helps
++ * returning early without hitting the trylock/restart below.
++ */
++ if (!netdev->ethtool_ops->get_link_ksettings)
++ return ret;
++
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+@@ -468,6 +489,14 @@ static ssize_t proto_down_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+ {
++ struct net_device *netdev = to_net_dev(dev);
++
++ /* The check is also done in change_proto_down; this helps returning
++ * early without hitting the trylock/restart in netdev_store.
++ */
++ if (!netdev->netdev_ops->ndo_change_proto_down)
++ return -EOPNOTSUPP;
++
+ return netdev_store(dev, attr, buf, len, change_proto_down);
+ }
+ NETDEVICE_SHOW_RW(proto_down, fmt_dec);
+@@ -478,6 +507,12 @@ static ssize_t phys_port_id_show(struct device *dev,
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
++ /* The check is also done in dev_get_phys_port_id; this helps returning
++ * early without hitting the trylock/restart below.
++ */
++ if (!netdev->netdev_ops->ndo_get_phys_port_id)
++ return -EOPNOTSUPP;
++
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+@@ -500,6 +535,13 @@ static ssize_t phys_port_name_show(struct device *dev,
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
++ /* The checks are also done in dev_get_phys_port_name; this helps
++ * returning early without hitting the trylock/restart below.
++ */
++ if (!netdev->netdev_ops->ndo_get_phys_port_name &&
++ !netdev->netdev_ops->ndo_get_devlink_port)
++ return -EOPNOTSUPP;
++
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+@@ -522,6 +564,14 @@ static ssize_t phys_switch_id_show(struct device *dev,
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
++ /* The checks are also done in dev_get_phys_port_name; this helps
++ * returning early without hitting the trylock/restart below. This works
++ * because recurse is false when calling dev_get_port_parent_id.
++ */
++ if (!netdev->netdev_ops->ndo_get_port_parent_id &&
++ !netdev->netdev_ops->ndo_get_devlink_port)
++ return -EOPNOTSUPP;
++
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+@@ -1226,6 +1276,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
++ /* The check is also done later; this helps returning early without
++ * hitting the trylock/restart below.
++ */
++ if (!dev->netdev_ops->ndo_set_tx_maxrate)
++ return -EOPNOTSUPP;
++
+ err = kstrtou32(buf, 10, &rate);
+ if (err < 0)
+ return err;
+@@ -1765,6 +1821,9 @@ static void remove_queue_kobjects(struct net_device *dev)
+
+ net_rx_queue_update_kobjects(dev, real_rx, 0);
+ netdev_queue_update_kobjects(dev, real_tx, 0);
++
++ dev->real_num_rx_queues = 0;
++ dev->real_num_tx_queues = 0;
+ #ifdef CONFIG_SYSFS
+ kset_unregister(dev->queues_kset);
+ #endif
+@@ -1869,7 +1928,7 @@ static struct class net_class __ro_after_init = {
+ .get_ownership = net_get_ownership,
+ };
+
+-#ifdef CONFIG_OF_NET
++#ifdef CONFIG_OF
+ static int of_dev_node_match(struct device *dev, const void *data)
+ {
+ for (; dev; dev = dev->parent) {
+diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
+index a448a9b5bb2d6..dcddc54d08409 100644
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem);
+ static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
+ #endif
+
+-struct net init_net = {
+- .ns.count = REFCOUNT_INIT(1),
+- .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
+-#ifdef CONFIG_KEYS
+- .key_domain = &init_net_key_domain,
+-#endif
+-};
++struct net init_net;
+ EXPORT_SYMBOL(init_net);
+
+ static bool init_net_initialized;
+@@ -123,6 +117,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
+
+ static int ops_init(const struct pernet_operations *ops, struct net *net)
+ {
++ struct net_generic *ng;
+ int err = -ENOMEM;
+ void *data = NULL;
+
+@@ -141,6 +136,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
+ if (!err)
+ return 0;
+
++ if (ops->id && ops->size) {
++ ng = rcu_dereference_protected(net->gen,
++ lockdep_is_held(&pernet_ops_rwsem));
++ ng->ptr[*ops->id] = NULL;
++ }
++
+ cleanup:
+ kfree(data);
+
+@@ -164,8 +165,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
+ {
+ struct net *net;
+ if (ops->exit) {
+- list_for_each_entry(net, net_exit_list, exit_list)
++ list_for_each_entry(net, net_exit_list, exit_list) {
+ ops->exit(net);
++ cond_resched();
++ }
+ }
+ if (ops->exit_batch)
+ ops->exit_batch(net_exit_list);
+@@ -473,7 +476,9 @@ struct net *copy_net_ns(unsigned long flags,
+
+ if (rv < 0) {
+ put_userns:
++#ifdef CONFIG_KEYS
+ key_remove_domain(net->key_domain);
++#endif
+ put_user_ns(user_ns);
+ net_free(net);
+ dec_ucounts:
+@@ -605,7 +610,9 @@ static void cleanup_net(struct work_struct *work)
+ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
+ list_del_init(&net->exit_list);
+ dec_net_namespaces(net->ucounts);
++#ifdef CONFIG_KEYS
+ key_remove_domain(net->key_domain);
++#endif
+ put_user_ns(net->user_ns);
+ net_free(net);
+ }
+@@ -1075,7 +1082,7 @@ out:
+ rtnl_set_sk_err(net, RTNLGRP_NSID, err);
+ }
+
+-static int __init net_ns_init(void)
++void __init net_ns_init(void)
+ {
+ struct net_generic *ng;
+
+@@ -1096,6 +1103,9 @@ static int __init net_ns_init(void)
+
+ rcu_assign_pointer(init_net.gen, ng);
+
++#ifdef CONFIG_KEYS
++ init_net.key_domain = &init_net_key_domain;
++#endif
+ down_write(&pernet_ops_rwsem);
+ if (setup_net(&init_net, &init_user_ns))
+ panic("Could not setup the initial network namespace");
+@@ -1110,12 +1120,8 @@ static int __init net_ns_init(void)
+ RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
+ RTNL_FLAG_DOIT_UNLOCKED);
+-
+- return 0;
+ }
+
+-pure_initcall(net_ns_init);
+-
+ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
+ {
+ ops_pre_exit_list(ops, net_exit_list);
+diff --git a/net/core/netpoll.c b/net/core/netpoll.c
+index edfc0f8011f88..bd750863959f2 100644
+--- a/net/core/netpoll.c
++++ b/net/core/netpoll.c
+@@ -137,6 +137,20 @@ static void queue_process(struct work_struct *work)
+ }
+ }
+
++static int netif_local_xmit_active(struct net_device *dev)
++{
++ int i;
++
++ for (i = 0; i < dev->num_tx_queues; i++) {
++ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
++
++ if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
++ return 1;
++ }
++
++ return 0;
++}
++
+ static void poll_one_napi(struct napi_struct *napi)
+ {
+ int work;
+@@ -183,7 +197,10 @@ void netpoll_poll_dev(struct net_device *dev)
+ if (!ni || down_trylock(&ni->dev_lock))
+ return;
+
+- if (!netif_running(dev)) {
++ /* Some drivers will take the same locks in poll and xmit,
++ * we can't poll if local CPU is already in xmit.
++ */
++ if (!netif_running(dev) || netif_local_xmit_active(dev)) {
+ up(&ni->dev_lock);
+ return;
+ }
+diff --git a/net/core/of_net.c b/net/core/of_net.c
+new file mode 100644
+index 0000000000000..f1a9bf7578e7a
+--- /dev/null
++++ b/net/core/of_net.c
+@@ -0,0 +1,170 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * OF helpers for network devices.
++ *
++ * Initially copied out of arch/powerpc/kernel/prom_parse.c
++ */
++#include <linux/etherdevice.h>
++#include <linux/kernel.h>
++#include <linux/of_net.h>
++#include <linux/of_platform.h>
++#include <linux/phy.h>
++#include <linux/export.h>
++#include <linux/device.h>
++#include <linux/nvmem-consumer.h>
++
++/**
++ * of_get_phy_mode - Get phy mode for given device_node
++ * @np: Pointer to the given device_node
++ * @interface: Pointer to the result
++ *
++ * The function gets phy interface string from property 'phy-mode' or
++ * 'phy-connection-type'. The index in phy_modes table is set in
++ * interface and 0 returned. In case of error interface is set to
++ * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
++ */
++int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
++{
++ const char *pm;
++ int err, i;
++
++ *interface = PHY_INTERFACE_MODE_NA;
++
++ err = of_property_read_string(np, "phy-mode", &pm);
++ if (err < 0)
++ err = of_property_read_string(np, "phy-connection-type", &pm);
++ if (err < 0)
++ return err;
++
++ for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
++ if (!strcasecmp(pm, phy_modes(i))) {
++ *interface = i;
++ return 0;
++ }
++
++ return -ENODEV;
++}
++EXPORT_SYMBOL_GPL(of_get_phy_mode);
++
++static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
++{
++ struct property *pp = of_find_property(np, name, NULL);
++
++ if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
++ memcpy(addr, pp->value, ETH_ALEN);
++ return 0;
++ }
++ return -ENODEV;
++}
++
++static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
++{
++ struct platform_device *pdev = of_find_device_by_node(np);
++ struct nvmem_cell *cell;
++ const void *mac;
++ size_t len;
++ int ret;
++
++ /* Try lookup by device first, there might be a nvmem_cell_lookup
++ * associated with a given device.
++ */
++ if (pdev) {
++ ret = nvmem_get_mac_address(&pdev->dev, addr);
++ put_device(&pdev->dev);
++ return ret;
++ }
++
++ cell = of_nvmem_cell_get(np, "mac-address");
++ if (IS_ERR(cell))
++ return PTR_ERR(cell);
++
++ mac = nvmem_cell_read(cell, &len);
++ nvmem_cell_put(cell);
++
++ if (IS_ERR(mac))
++ return PTR_ERR(mac);
++
++ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
++ kfree(mac);
++ return -EINVAL;
++ }
++
++ memcpy(addr, mac, ETH_ALEN);
++ kfree(mac);
++
++ return 0;
++}
++
++/**
++ * of_get_mac_address()
++ * @np: Caller's Device Node
++ * @addr: Pointer to a six-byte array for the result
++ *
++ * Search the device tree for the best MAC address to use. 'mac-address' is
++ * checked first, because that is supposed to contain to "most recent" MAC
++ * address. If that isn't set, then 'local-mac-address' is checked next,
++ * because that is the default address. If that isn't set, then the obsolete
++ * 'address' is checked, just in case we're using an old device tree. If any
++ * of the above isn't set, then try to get MAC address from nvmem cell named
++ * 'mac-address'.
++ *
++ * Note that the 'address' property is supposed to contain a virtual address of
++ * the register set, but some DTS files have redefined that property to be the
++ * MAC address.
++ *
++ * All-zero MAC addresses are rejected, because those could be properties that
++ * exist in the device tree, but were not set by U-Boot. For example, the
++ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
++ * addresses. Some older U-Boots only initialized 'local-mac-address'. In
++ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
++ * but is all zeros.
++ *
++ * Return: 0 on success and errno in case of error.
++*/
++int of_get_mac_address(struct device_node *np, u8 *addr)
++{
++ int ret;
++
++ if (!np)
++ return -ENODEV;
++
++ ret = of_get_mac_addr(np, "mac-address", addr);
++ if (!ret)
++ return 0;
++
++ ret = of_get_mac_addr(np, "local-mac-address", addr);
++ if (!ret)
++ return 0;
++
++ ret = of_get_mac_addr(np, "address", addr);
++ if (!ret)
++ return 0;
++
++ return of_get_mac_addr_nvmem(np, addr);
++}
++EXPORT_SYMBOL(of_get_mac_address);
++
++/**
++ * of_get_ethdev_address()
++ * @np: Caller's Device Node
++ * @dev: Pointer to netdevice which address will be updated
++ *
++ * Search the device tree for the best MAC address to use.
++ * If found set @dev->dev_addr to that address.
++ *
++ * See documentation of of_get_mac_address() for more information on how
++ * the best address is determined.
++ *
++ * Return: 0 on success and errno in case of error.
++ */
++int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
++{
++ u8 addr[ETH_ALEN];
++ int ret;
++
++ ret = of_get_mac_address(np, addr);
++ if (!ret)
++ eth_hw_addr_set(dev, addr);
++ return ret;
++}
++EXPORT_SYMBOL(of_get_ethdev_address);
+diff --git a/net/core/page_pool.c b/net/core/page_pool.c
+index 1a6978427d6c8..069d6ba0e33fb 100644
+--- a/net/core/page_pool.c
++++ b/net/core/page_pool.c
+@@ -26,6 +26,29 @@
+
+ #define BIAS_MAX LONG_MAX
+
++static bool page_pool_producer_lock(struct page_pool *pool)
++ __acquires(&pool->ring.producer_lock)
++{
++ bool in_softirq = in_softirq();
++
++ if (in_softirq)
++ spin_lock(&pool->ring.producer_lock);
++ else
++ spin_lock_bh(&pool->ring.producer_lock);
++
++ return in_softirq;
++}
++
++static void page_pool_producer_unlock(struct page_pool *pool,
++ bool in_softirq)
++ __releases(&pool->ring.producer_lock)
++{
++ if (in_softirq)
++ spin_unlock(&pool->ring.producer_lock);
++ else
++ spin_unlock_bh(&pool->ring.producer_lock);
++}
++
+ static int page_pool_init(struct page_pool *pool,
+ const struct page_pool_params *params)
+ {
+@@ -390,8 +413,8 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page)
+ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
+ {
+ int ret;
+- /* BH protection not needed if current is serving softirq */
+- if (in_serving_softirq())
++ /* BH protection not needed if current is softirq */
++ if (in_softirq())
+ ret = ptr_ring_produce(&pool->ring, page);
+ else
+ ret = ptr_ring_produce_bh(&pool->ring, page);
+@@ -446,7 +469,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
+ page_pool_dma_sync_for_device(pool, page,
+ dma_sync_size);
+
+- if (allow_direct && in_serving_softirq() &&
++ if (allow_direct && in_softirq() &&
+ page_pool_recycle_in_cache(page, pool))
+ return NULL;
+
+@@ -489,6 +512,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ int count)
+ {
+ int i, bulk_len = 0;
++ bool in_softirq;
+
+ for (i = 0; i < count; i++) {
+ struct page *page = virt_to_head_page(data[i]);
+@@ -503,12 +527,12 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ return;
+
+ /* Bulk producer into ptr_ring page_pool cache */
+- page_pool_ring_lock(pool);
++ in_softirq = page_pool_producer_lock(pool);
+ for (i = 0; i < bulk_len; i++) {
+ if (__ptr_ring_produce(&pool->ring, data[i]))
+ break; /* ring full */
+ }
+- page_pool_ring_unlock(pool);
++ page_pool_producer_unlock(pool, in_softirq);
+
+ /* Hopefully all pages was return into ptr_ring */
+ if (likely(i == bulk_len))
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 8ccce85562a1d..1b71e5c582bbc 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -842,9 +842,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
+ }
+
+ if (dev->operstate != operstate) {
+- write_lock_bh(&dev_base_lock);
++ write_lock(&dev_base_lock);
+ dev->operstate = operstate;
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+ netdev_state_change(dev);
+ }
+ }
+@@ -922,24 +922,27 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
+ nla_total_size(sizeof(struct ifla_vf_rate)) +
+ nla_total_size(sizeof(struct ifla_vf_link_state)) +
+ nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
+- nla_total_size(0) + /* nest IFLA_VF_STATS */
+- /* IFLA_VF_STATS_RX_PACKETS */
+- nla_total_size_64bit(sizeof(__u64)) +
+- /* IFLA_VF_STATS_TX_PACKETS */
+- nla_total_size_64bit(sizeof(__u64)) +
+- /* IFLA_VF_STATS_RX_BYTES */
+- nla_total_size_64bit(sizeof(__u64)) +
+- /* IFLA_VF_STATS_TX_BYTES */
+- nla_total_size_64bit(sizeof(__u64)) +
+- /* IFLA_VF_STATS_BROADCAST */
+- nla_total_size_64bit(sizeof(__u64)) +
+- /* IFLA_VF_STATS_MULTICAST */
+- nla_total_size_64bit(sizeof(__u64)) +
+- /* IFLA_VF_STATS_RX_DROPPED */
+- nla_total_size_64bit(sizeof(__u64)) +
+- /* IFLA_VF_STATS_TX_DROPPED */
+- nla_total_size_64bit(sizeof(__u64)) +
+ nla_total_size(sizeof(struct ifla_vf_trust)));
++ if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) {
++ size += num_vfs *
++ (nla_total_size(0) + /* nest IFLA_VF_STATS */
++ /* IFLA_VF_STATS_RX_PACKETS */
++ nla_total_size_64bit(sizeof(__u64)) +
++ /* IFLA_VF_STATS_TX_PACKETS */
++ nla_total_size_64bit(sizeof(__u64)) +
++ /* IFLA_VF_STATS_RX_BYTES */
++ nla_total_size_64bit(sizeof(__u64)) +
++ /* IFLA_VF_STATS_TX_BYTES */
++ nla_total_size_64bit(sizeof(__u64)) +
++ /* IFLA_VF_STATS_BROADCAST */
++ nla_total_size_64bit(sizeof(__u64)) +
++ /* IFLA_VF_STATS_MULTICAST */
++ nla_total_size_64bit(sizeof(__u64)) +
++ /* IFLA_VF_STATS_RX_DROPPED */
++ nla_total_size_64bit(sizeof(__u64)) +
++ /* IFLA_VF_STATS_TX_DROPPED */
++ nla_total_size_64bit(sizeof(__u64)));
++ }
+ return size;
+ } else
+ return 0;
+@@ -1214,7 +1217,8 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
+ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
+ struct net_device *dev,
+ int vfs_num,
+- struct nlattr *vfinfo)
++ struct nlattr *vfinfo,
++ u32 ext_filter_mask)
+ {
+ struct ifla_vf_rss_query_en vf_rss_query_en;
+ struct nlattr *vf, *vfstats, *vfvlanlist;
+@@ -1320,33 +1324,35 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
+ goto nla_put_vf_failure;
+ }
+ nla_nest_end(skb, vfvlanlist);
+- memset(&vf_stats, 0, sizeof(vf_stats));
+- if (dev->netdev_ops->ndo_get_vf_stats)
+- dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
+- &vf_stats);
+- vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS);
+- if (!vfstats)
+- goto nla_put_vf_failure;
+- if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
+- vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
+- nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
+- vf_stats.tx_packets, IFLA_VF_STATS_PAD) ||
+- nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES,
+- vf_stats.rx_bytes, IFLA_VF_STATS_PAD) ||
+- nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES,
+- vf_stats.tx_bytes, IFLA_VF_STATS_PAD) ||
+- nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
+- vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
+- nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
+- vf_stats.multicast, IFLA_VF_STATS_PAD) ||
+- nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
+- vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
+- nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
+- vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
+- nla_nest_cancel(skb, vfstats);
+- goto nla_put_vf_failure;
++ if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) {
++ memset(&vf_stats, 0, sizeof(vf_stats));
++ if (dev->netdev_ops->ndo_get_vf_stats)
++ dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
++ &vf_stats);
++ vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS);
++ if (!vfstats)
++ goto nla_put_vf_failure;
++ if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
++ vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
++ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
++ vf_stats.tx_packets, IFLA_VF_STATS_PAD) ||
++ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES,
++ vf_stats.rx_bytes, IFLA_VF_STATS_PAD) ||
++ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES,
++ vf_stats.tx_bytes, IFLA_VF_STATS_PAD) ||
++ nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
++ vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
++ nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
++ vf_stats.multicast, IFLA_VF_STATS_PAD) ||
++ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
++ vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
++ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
++ vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
++ nla_nest_cancel(skb, vfstats);
++ goto nla_put_vf_failure;
++ }
++ nla_nest_end(skb, vfstats);
+ }
+- nla_nest_end(skb, vfstats);
+ nla_nest_end(skb, vf);
+ return 0;
+
+@@ -1379,7 +1385,7 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
+ return -EMSGSIZE;
+
+ for (i = 0; i < num_vfs; i++) {
+- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
++ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask))
+ return -EMSGSIZE;
+ }
+
+@@ -1698,6 +1704,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
+ {
+ struct ifinfomsg *ifm;
+ struct nlmsghdr *nlh;
++ struct Qdisc *qdisc;
+
+ ASSERT_RTNL();
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+@@ -1715,6 +1722,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
+ if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
+ goto nla_put_failure;
+
++ qdisc = rtnl_dereference(dev->qdisc);
+ if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
+ nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
+ nla_put_u8(skb, IFLA_OPERSTATE,
+@@ -1733,8 +1741,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
+ #endif
+ put_master_ifindex(skb, dev) ||
+ nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
+- (dev->qdisc &&
+- nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
++ (qdisc &&
++ nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
+ nla_put_ifalias(skb, dev) ||
+ nla_put_u32(skb, IFLA_CARRIER_CHANGES,
+ atomic_read(&dev->carrier_up_count) +
+@@ -2165,13 +2173,27 @@ out_err:
+ return err;
+ }
+
+-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
+- struct netlink_ext_ack *exterr)
++int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
++ struct netlink_ext_ack *exterr)
+ {
+- return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy,
++ const struct ifinfomsg *ifmp;
++ const struct nlattr *attrs;
++ size_t len;
++
++ ifmp = nla_data(nla_peer);
++ attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg);
++ len = nla_len(nla_peer) - sizeof(struct ifinfomsg);
++
++ if (ifmp->ifi_index < 0) {
++ NL_SET_ERR_MSG_ATTR(exterr, nla_peer,
++ "ifindex can't be negative");
++ return -EINVAL;
++ }
++
++ return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy,
+ exterr);
+ }
+-EXPORT_SYMBOL(rtnl_nla_parse_ifla);
++EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
+
+ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+ {
+@@ -2779,11 +2801,11 @@ static int do_setlink(const struct sk_buff *skb,
+ if (tb[IFLA_LINKMODE]) {
+ unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
+
+- write_lock_bh(&dev_base_lock);
++ write_lock(&dev_base_lock);
+ if (dev->link_mode ^ value)
+ status |= DO_SETLINK_NOTIFY;
+ dev->link_mode = value;
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+ }
+
+ if (tb[IFLA_VFINFO_LIST]) {
+@@ -3153,6 +3175,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
+ struct net_device *dev;
+ unsigned int num_tx_queues = 1;
+ unsigned int num_rx_queues = 1;
++ int err;
+
+ if (tb[IFLA_NUM_TX_QUEUES])
+ num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]);
+@@ -3188,13 +3211,18 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
++ err = validate_linkmsg(dev, tb, extack);
++ if (err < 0) {
++ free_netdev(dev);
++ return ERR_PTR(err);
++ }
++
+ dev_net_set(dev, net);
+ dev->rtnl_link_ops = ops;
+ dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
+
+ if (tb[IFLA_MTU]) {
+ u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+- int err;
+
+ err = dev_validate_mtu(dev, mtu, extack);
+ if (err) {
+@@ -3254,8 +3282,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+ unsigned char name_assign_type = NET_NAME_USER;
+ struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+- const struct rtnl_link_ops *m_ops = NULL;
+- struct net_device *master_dev = NULL;
++ const struct rtnl_link_ops *m_ops;
++ struct net_device *master_dev;
+ struct net *net = sock_net(skb->sk);
+ const struct rtnl_link_ops *ops;
+ struct nlattr *tb[IFLA_MAX + 1];
+@@ -3266,6 +3294,7 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct ifinfomsg *ifm;
+ char ifname[IFNAMSIZ];
+ struct nlattr **data;
++ bool link_specified;
+ int err;
+
+ #ifdef CONFIG_MODULES
+@@ -3286,13 +3315,22 @@ replay:
+ ifname[0] = '\0';
+
+ ifm = nlmsg_data(nlh);
+- if (ifm->ifi_index > 0)
++ if (ifm->ifi_index > 0) {
++ link_specified = true;
+ dev = __dev_get_by_index(net, ifm->ifi_index);
+- else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
++ } else if (ifm->ifi_index < 0) {
++ NL_SET_ERR_MSG(extack, "ifindex can't be negative");
++ return -EINVAL;
++ } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
++ link_specified = true;
+ dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
+- else
++ } else {
++ link_specified = false;
+ dev = NULL;
++ }
+
++ master_dev = NULL;
++ m_ops = NULL;
+ if (dev) {
+ master_dev = netdev_master_upper_dev_get(dev);
+ if (master_dev)
+@@ -3392,7 +3430,12 @@ replay:
+ }
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+- if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
++ /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
++ * or it's for a group
++ */
++ if (link_specified)
++ return -ENODEV;
++ if (tb[IFLA_GROUP])
+ return rtnl_group_changelink(skb, net,
+ nla_get_u32(tb[IFLA_GROUP]),
+ ifm, extack, tb);
+@@ -3627,13 +3670,24 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
+ bool *changed, struct netlink_ext_ack *extack)
+ {
+ char *alt_ifname;
++ size_t size;
+ int err;
+
+ err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
+ if (err)
+ return err;
+
+- alt_ifname = nla_strdup(attr, GFP_KERNEL);
++ if (cmd == RTM_NEWLINKPROP) {
++ size = rtnl_prop_list_size(dev);
++ size += nla_total_size(ALTIFNAMSIZ);
++ if (size >= U16_MAX) {
++ NL_SET_ERR_MSG(extack,
++ "effective property list too long");
++ return -EINVAL;
++ }
++ }
++
++ alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
+ if (!alt_ifname)
+ return -ENOMEM;
+
+@@ -3884,7 +3938,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
+ ndm->ndm_ifindex = dev->ifindex;
+ ndm->ndm_state = ndm_state;
+
+- if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
++ if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr))
+ goto nla_put_failure;
+ if (vid)
+ if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid))
+@@ -3898,10 +3952,10 @@ nla_put_failure:
+ return -EMSGSIZE;
+ }
+
+-static inline size_t rtnl_fdb_nlmsg_size(void)
++static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev)
+ {
+ return NLMSG_ALIGN(sizeof(struct ndmsg)) +
+- nla_total_size(ETH_ALEN) + /* NDA_LLADDR */
++ nla_total_size(dev->addr_len) + /* NDA_LLADDR */
+ nla_total_size(sizeof(u16)) + /* NDA_VLAN */
+ 0;
+ }
+@@ -3913,7 +3967,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+- skb = nlmsg_new(rtnl_fdb_nlmsg_size(), GFP_ATOMIC);
++ skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC);
+ if (!skb)
+ goto errout;
+
+@@ -4892,13 +4946,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (br_spec) {
+ nla_for_each_nested(attr, br_spec, rem) {
+- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
++ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_len(attr) < sizeof(flags))
+ return -EINVAL;
+
+ have_flags = true;
+ flags = nla_get_u16(attr);
+- break;
++ }
++
++ if (nla_type(attr) == IFLA_BRIDGE_MODE) {
++ if (nla_len(attr) < sizeof(u16))
++ return -EINVAL;
+ }
+ }
+ }
+diff --git a/net/core/scm.c b/net/core/scm.c
+index 5c356f0dee30c..acb7d776fa6ec 100644
+--- a/net/core/scm.c
++++ b/net/core/scm.c
+@@ -229,6 +229,8 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
+ if (msg->msg_control_is_user) {
+ struct cmsghdr __user *cm = msg->msg_control_user;
+
++ check_object_size(data, cmlen - sizeof(*cm), true);
++
+ if (!user_write_access_begin(cm, cmlen))
+ goto efault;
+
+diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
+index b5bc680d47553..189eea1372d5d 100644
+--- a/net/core/secure_seq.c
++++ b/net/core/secure_seq.c
+@@ -22,6 +22,8 @@
+ static siphash_key_t net_secret __read_mostly;
+ static siphash_key_t ts_secret __read_mostly;
+
++#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
++
+ static __always_inline void net_secret_init(void)
+ {
+ net_get_random_once(&net_secret, sizeof(net_secret));
+@@ -62,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
+ .daddr = *(struct in6_addr *)daddr,
+ };
+
+- if (net->ipv4.sysctl_tcp_timestamps != 1)
++ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+ return 0;
+
+ ts_secret_init();
+@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+ }
+ EXPORT_SYMBOL(secure_tcpv6_seq);
+
+-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
++u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+ __be16 dport)
+ {
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
++ unsigned int timeseed;
+ __be16 dport;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+- .dport = dport
++ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
++ .dport = dport,
+ };
+ net_secret_init();
+ return siphash(&combined, offsetofend(typeof(combined), dport),
+@@ -116,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
+ #ifdef CONFIG_INET
+ u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
+ {
+- if (net->ipv4.sysctl_tcp_timestamps != 1)
++ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+ return 0;
+
+ ts_secret_init();
+@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+ }
+ EXPORT_SYMBOL_GPL(secure_tcp_seq);
+
+-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
++u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+ {
+ net_secret_init();
+- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
+- (__force u16)dport, &net_secret);
++ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
++ (__force u16)dport,
++ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
++ &net_secret);
+ }
+ EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
+ #endif
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index fe9358437380c..7090844af4991 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -203,7 +203,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
+ skb->head = data;
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+- skb->end = skb->tail + size;
++ skb_set_end_offset(skb, size);
+ skb->mac_header = (typeof(skb->mac_header))~0U;
+ skb->transport_header = (typeof(skb->transport_header))~0U;
+
+@@ -759,21 +759,23 @@ void __kfree_skb(struct sk_buff *skb)
+ EXPORT_SYMBOL(__kfree_skb);
+
+ /**
+- * kfree_skb - free an sk_buff
++ * kfree_skb_reason - free an sk_buff with special reason
+ * @skb: buffer to free
++ * @reason: reason why this skb is dropped
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+- * hit zero.
++ * hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
++ * tracepoint.
+ */
+-void kfree_skb(struct sk_buff *skb)
++void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+ {
+ if (!skb_unref(skb))
+ return;
+
+- trace_kfree_skb(skb, __builtin_return_address(0));
++ trace_kfree_skb(skb, __builtin_return_address(0), reason);
+ __kfree_skb(skb);
+ }
+-EXPORT_SYMBOL(kfree_skb);
++EXPORT_SYMBOL(kfree_skb_reason);
+
+ void kfree_skb_list(struct sk_buff *segs)
+ {
+@@ -832,7 +834,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
+ ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
+
+ if (dev)
+- printk("%sdev name=%s feat=0x%pNF\n",
++ printk("%sdev name=%s feat=%pNF\n",
+ level, dev->name, &dev->features);
+ if (sk)
+ printk("%ssk family=%hu type=%u proto=%u\n",
+@@ -1738,11 +1740,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
+ skb->head = data;
+ skb->head_frag = 0;
+ skb->data += off;
++
++ skb_set_end_offset(skb, size);
+ #ifdef NET_SKBUFF_DATA_USES_OFFSET
+- skb->end = size;
+ off = nhead;
+-#else
+- skb->end = skb->head + size;
+ #endif
+ skb->tail += off;
+ skb_headers_offset_update(skb, nhead);
+@@ -1790,6 +1791,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+ }
+ EXPORT_SYMBOL(skb_realloc_headroom);
+
++int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
++{
++ unsigned int saved_end_offset, saved_truesize;
++ struct skb_shared_info *shinfo;
++ int res;
++
++ saved_end_offset = skb_end_offset(skb);
++ saved_truesize = skb->truesize;
++
++ res = pskb_expand_head(skb, 0, 0, pri);
++ if (res)
++ return res;
++
++ skb->truesize = saved_truesize;
++
++ if (likely(skb_end_offset(skb) == saved_end_offset))
++ return 0;
++
++ shinfo = skb_shinfo(skb);
++
++ /* We are about to change back skb->end,
++ * we need to move skb_shinfo() to its new location.
++ */
++ memmove(skb->head + saved_end_offset,
++ shinfo,
++ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
++
++ skb_set_end_offset(skb, saved_end_offset);
++
++ return 0;
++}
++
+ /**
+ * skb_expand_head - reallocate header of &sk_buff
+ * @skb: buffer to reallocate
+@@ -2230,6 +2263,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
+ insp = list;
+ } else {
+ /* Eaten partially. */
++ if (skb_is_gso(skb) && !list->head_frag &&
++ skb_headlen(list))
++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+
+ if (skb_shared(list)) {
+ /* Sucks! We need to fork list. :-( */
+@@ -2254,7 +2290,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
+ /* Free pulled out fragments. */
+ while ((list = skb_shinfo(skb)->frag_list) != insp) {
+ skb_shinfo(skb)->frag_list = list->next;
+- kfree_skb(list);
++ consume_skb(list);
+ }
+ /* And insert new clone at head. */
+ if (clone) {
+@@ -3449,19 +3485,7 @@ EXPORT_SYMBOL(skb_split);
+ */
+ static int skb_prepare_for_shift(struct sk_buff *skb)
+ {
+- int ret = 0;
+-
+- if (skb_cloned(skb)) {
+- /* Save and restore truesize: pskb_expand_head() may reallocate
+- * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
+- * cannot change truesize at this point.
+- */
+- unsigned int save_truesize = skb->truesize;
+-
+- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+- skb->truesize = save_truesize;
+- }
+- return ret;
++ return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
+ }
+
+ /**
+@@ -3800,7 +3824,7 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
+ } else if (i < MAX_SKB_FRAGS) {
+ get_page(page);
+- skb_fill_page_desc(skb, i, page, offset, size);
++ skb_fill_page_desc_noacc(skb, i, page, offset, size);
+ } else {
+ return -EMSGSIZE;
+ }
+@@ -3854,17 +3878,23 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ unsigned int delta_len = 0;
+ struct sk_buff *tail = NULL;
+ struct sk_buff *nskb, *tmp;
+- int err;
++ int len_diff, err;
+
+ skb_push(skb, -skb_network_offset(skb) + offset);
+
++ /* Ensure the head is writeable before touching the shared info */
++ err = skb_unclone(skb, GFP_ATOMIC);
++ if (err)
++ goto err_linearize;
++
+ skb_shinfo(skb)->frag_list = NULL;
+
+- do {
++ while (list_skb) {
+ nskb = list_skb;
+ list_skb = list_skb->next;
+
+ err = 0;
++ delta_truesize += nskb->truesize;
+ if (skb_shared(nskb)) {
+ tmp = skb_clone(nskb, GFP_ATOMIC);
+ if (tmp) {
+@@ -3889,14 +3919,15 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ tail = nskb;
+
+ delta_len += nskb->len;
+- delta_truesize += nskb->truesize;
+
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
+
+ skb_release_head_state(nskb);
++ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
+ __copy_skb_header(nskb, skb);
+
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
++ nskb->transport_header += len_diff;
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ nskb->data - tnl_hlen,
+ offset + tnl_hlen);
+@@ -3904,8 +3935,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ if (skb_needs_linearize(nskb, features) &&
+ __skb_linearize(nskb))
+ goto err_linearize;
+-
+- } while (list_skb);
++ }
+
+ skb->truesize = skb->truesize - delta_truesize;
+ skb->data_len = skb->data_len - delta_len;
+@@ -3971,39 +4001,40 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
+ struct sk_buff *segs = NULL;
+ struct sk_buff *tail = NULL;
+ struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
+- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+ unsigned int mss = skb_shinfo(head_skb)->gso_size;
+ unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
+- struct sk_buff *frag_skb = head_skb;
+ unsigned int offset = doffset;
+ unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
+ unsigned int partial_segs = 0;
+ unsigned int headroom;
+ unsigned int len = head_skb->len;
++ struct sk_buff *frag_skb;
++ skb_frag_t *frag;
+ __be16 proto;
+ bool csum, sg;
+- int nfrags = skb_shinfo(head_skb)->nr_frags;
+ int err = -ENOMEM;
+ int i = 0;
+- int pos;
+-
+- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
+- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
+- /* gso_size is untrusted, and we have a frag_list with a linear
+- * non head_frag head.
+- *
+- * (we assume checking the first list_skb member suffices;
+- * i.e if either of the list_skb members have non head_frag
+- * head, then the first one has too).
+- *
+- * If head_skb's headlen does not fit requested gso_size, it
+- * means that the frag_list members do NOT terminate on exact
+- * gso_size boundaries. Hence we cannot perform skb_frag_t page
+- * sharing. Therefore we must fallback to copying the frag_list
+- * skbs; we do so by disabling SG.
+- */
+- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
+- features &= ~NETIF_F_SG;
++ int nfrags, pos;
++
++ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
++ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
++ struct sk_buff *check_skb;
++
++ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
++ if (skb_headlen(check_skb) && !check_skb->head_frag) {
++ /* gso_size is untrusted, and we have a frag_list with
++ * a linear non head_frag item.
++ *
++ * If head_skb's headlen does not fit requested gso_size,
++ * it means that the frag_list members do NOT terminate
++ * on exact gso_size boundaries. Hence we cannot perform
++ * skb_frag_t page sharing. Therefore we must fallback to
++ * copying the frag_list skbs; we do so by disabling SG.
++ */
++ features &= ~NETIF_F_SG;
++ break;
++ }
++ }
+ }
+
+ __skb_push(head_skb, doffset);
+@@ -4060,6 +4091,13 @@ normal:
+ headroom = skb_headroom(head_skb);
+ pos = skb_headlen(head_skb);
+
++ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
++ return ERR_PTR(-ENOMEM);
++
++ nfrags = skb_shinfo(head_skb)->nr_frags;
++ frag = skb_shinfo(head_skb)->frags;
++ frag_skb = head_skb;
++
+ do {
+ struct sk_buff *nskb;
+ skb_frag_t *nskb_frag;
+@@ -4080,6 +4118,10 @@ normal:
+ (skb_headlen(list_skb) == len || sg)) {
+ BUG_ON(skb_headlen(list_skb) > len);
+
++ nskb = skb_clone(list_skb, GFP_ATOMIC);
++ if (unlikely(!nskb))
++ goto err;
++
+ i = 0;
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+@@ -4098,12 +4140,8 @@ normal:
+ frag++;
+ }
+
+- nskb = skb_clone(list_skb, GFP_ATOMIC);
+ list_skb = list_skb->next;
+
+- if (unlikely(!nskb))
+- goto err;
+-
+ if (unlikely(pskb_trim(nskb, len))) {
+ kfree_skb(nskb);
+ goto err;
+@@ -4165,9 +4203,8 @@ normal:
+ SKB_GSO_CB(nskb)->csum_start =
+ skb_headroom(nskb) + doffset;
+ } else {
+- skb_copy_bits(head_skb, offset,
+- skb_put(nskb, len),
+- len);
++ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
++ goto err;
+ }
+ continue;
+ }
+@@ -4180,12 +4217,16 @@ normal:
+ skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
+ SKBFL_SHARED_FRAG;
+
+- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
++ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ goto err;
+
+ while (pos < offset + len) {
+ if (i >= nfrags) {
++ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
++ skb_zerocopy_clone(nskb, list_skb,
++ GFP_ATOMIC))
++ goto err;
++
+ i = 0;
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+@@ -4199,10 +4240,6 @@ normal:
+ i--;
+ frag--;
+ }
+- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+- skb_zerocopy_clone(nskb, frag_skb,
+- GFP_ATOMIC))
+- goto err;
+
+ list_skb = list_skb->next;
+ }
+@@ -4856,7 +4893,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
+ serr->ee.ee_data = skb_shinfo(skb)->tskey;
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
+- serr->ee.ee_data -= sk->sk_tskey;
++ serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
+ }
+
+ err = sock_queue_err_skb(sk, skb);
+@@ -4869,7 +4906,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
+ {
+ bool ret;
+
+- if (likely(sysctl_tstamp_allow_data || tsonly))
++ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
+ return true;
+
+ read_lock_bh(&sk->sk_callback_lock);
+@@ -4934,6 +4971,11 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
+ skb = alloc_skb(0, GFP_ATOMIC);
+ } else {
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
++
++ if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
++ kfree_skb(skb);
++ return;
++ }
+ }
+ if (!skb)
+ return;
+@@ -5371,11 +5413,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
+ if (skb_cloned(to))
+ return false;
+
+- /* The page pool signature of struct page will eventually figure out
+- * which pages can be recycled or not but for now let's prohibit slab
+- * allocated and page_pool allocated SKBs from being coalesced.
++ /* In general, avoid mixing page_pool and non-page_pool allocated
++ * pages within the same SKB. Additionally avoid dealing with clones
++ * with page_pool pages, in case the SKB is using page_pool fragment
++ * references (PP_FLAG_PAGE_FRAG). Since we only take full page
++ * references for cloned SKBs at the moment that would result in
++ * inconsistent reference counts.
++ * In theory we could take full references if @from is cloned and
++ * !@to->pp_recycle but its tricky (due to potential race with
++ * the clone disappearing) and rare, so not worth dealing with.
+ */
+- if (to->pp_recycle != from->pp_recycle)
++ if (to->pp_recycle != from->pp_recycle ||
++ (from->pp_recycle && skb_cloned(from)))
+ return false;
+
+ if (len <= skb_tailroom(to)) {
+@@ -6171,11 +6220,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
+ skb->head = data;
+ skb->data = data;
+ skb->head_frag = 0;
+-#ifdef NET_SKBUFF_DATA_USES_OFFSET
+- skb->end = size;
+-#else
+- skb->end = skb->head + size;
+-#endif
++ skb_set_end_offset(skb, size);
+ skb_set_tail_pointer(skb, skb_headlen(skb));
+ skb_headers_offset_update(skb, 0);
+ skb->cloned = 0;
+@@ -6232,7 +6277,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb,
+ /* Free pulled out fragments. */
+ while ((list = shinfo->frag_list) != insp) {
+ shinfo->frag_list = list->next;
+- kfree_skb(list);
++ consume_skb(list);
+ }
+ /* And insert new clone at head. */
+ if (clone) {
+@@ -6313,11 +6358,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
+ skb->head = data;
+ skb->head_frag = 0;
+ skb->data = data;
+-#ifdef NET_SKBUFF_DATA_USES_OFFSET
+- skb->end = size;
+-#else
+- skb->end = skb->head + size;
+-#endif
++ skb_set_end_offset(skb, size);
+ skb_reset_tail_pointer(skb);
+ skb_headers_offset_update(skb, 0);
+ skb->cloned = 0;
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index a86ef7e844f8c..9cd14212dcd0b 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+ int elem_first_coalesce)
+ {
+ struct page_frag *pfrag = sk_page_frag(sk);
++ u32 osize = msg->sg.size;
+ int ret = 0;
+
+ len -= msg->sg.size;
+@@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+ u32 orig_offset;
+ int use, i;
+
+- if (!sk_page_frag_refill(sk, pfrag))
+- return -ENOMEM;
++ if (!sk_page_frag_refill(sk, pfrag)) {
++ ret = -ENOMEM;
++ goto msg_trim;
++ }
+
+ orig_offset = pfrag->offset;
+ use = min_t(int, len, pfrag->size - orig_offset);
+- if (!sk_wmem_schedule(sk, use))
+- return -ENOMEM;
++ if (!sk_wmem_schedule(sk, use)) {
++ ret = -ENOMEM;
++ goto msg_trim;
++ }
+
+ i = msg->sg.end;
+ sk_msg_iter_var_prev(i);
+@@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+ }
+
+ return ret;
++
++msg_trim:
++ sk_msg_trim(sk, msg, osize);
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(sk_msg_alloc);
+
+@@ -426,8 +435,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ if (copied + copy > len)
+ copy = len - copied;
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
+- if (!copy)
+- return copied ? copied : -EFAULT;
++ if (!copy) {
++ copied = copied ? copied : -EFAULT;
++ goto out;
++ }
+
+ copied += copy;
+ if (likely(!peek)) {
+@@ -447,13 +458,13 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ * didn't copy the entire length lets just break.
+ */
+ if (copy != sge->length)
+- return copied;
++ goto out;
+ sk_msg_iter_var_next(i);
+ }
+
+ if (copied == len)
+ break;
+- } while (i != msg_rx->sg.end);
++ } while ((i != msg_rx->sg.end) && !sg_is_last(sge));
+
+ if (unlikely(peek)) {
+ msg_rx = sk_psock_next_msg(psock, msg_rx);
+@@ -463,13 +474,15 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ }
+
+ msg_rx->sg.start = i;
+- if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
++ if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) {
+ msg_rx = sk_psock_dequeue_msg(psock);
+ kfree_sk_msg(msg_rx);
+ }
+ msg_rx = sk_psock_peek_msg(psock);
+ }
+-
++out:
++ if (psock->work_state.skb && copied > 0)
++ schedule_work(&psock->work);
+ return copied;
+ }
+ EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+@@ -508,6 +521,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+ }
+
+ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
++ u32 off, u32 len,
+ struct sk_psock *psock,
+ struct sock *sk,
+ struct sk_msg *msg)
+@@ -521,11 +535,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+ */
+ if (skb_linearize(skb))
+ return -EAGAIN;
+- num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
++ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
+ if (unlikely(num_sge < 0))
+ return num_sge;
+
+- copied = skb->len;
++ copied = len;
+ msg->sg.start = 0;
+ msg->sg.size = copied;
+ msg->sg.end = num_sge;
+@@ -536,9 +550,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+ return copied;
+ }
+
+-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
++static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
++ u32 off, u32 len);
+
+-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
++static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
++ u32 off, u32 len)
+ {
+ struct sock *sk = psock->sk;
+ struct sk_msg *msg;
+@@ -549,7 +565,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+ * correctly.
+ */
+ if (unlikely(skb->sk == sk))
+- return sk_psock_skb_ingress_self(psock, skb);
++ return sk_psock_skb_ingress_self(psock, skb, off, len);
+ msg = sk_psock_create_ingress_msg(sk, skb);
+ if (!msg)
+ return -EAGAIN;
+@@ -561,7 +577,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+ * into user buffers.
+ */
+ skb_set_owner_r(skb, sk);
+- err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
++ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
+@@ -571,7 +587,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+ * skb. In this case we do not need to check memory limits or skb_set_owner_r
+ * because the skb is already accounted for here.
+ */
+-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
++static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
++ u32 off, u32 len)
+ {
+ struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+ struct sock *sk = psock->sk;
+@@ -581,7 +598,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
+ return -EAGAIN;
+ sk_msg_init(msg);
+ skb_set_owner_r(skb, sk);
+- err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
++ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
+@@ -595,7 +612,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+ return -EAGAIN;
+ return skb_send_sock(psock->sk, skb, off, len);
+ }
+- return sk_psock_skb_ingress(psock, skb);
++ return sk_psock_skb_ingress(psock, skb, off, len);
+ }
+
+ static void sk_psock_skb_state(struct sk_psock *psock,
+@@ -638,6 +655,12 @@ static void sk_psock_backlog(struct work_struct *work)
+ while ((skb = skb_dequeue(&psock->ingress_skb))) {
+ len = skb->len;
+ off = 0;
++ if (skb_bpf_strparser(skb)) {
++ struct strp_msg *stm = strp_msg(skb);
++
++ off = stm->offset;
++ len = stm->full_len;
++ }
+ start:
+ ingress = skb_bpf_ingress(skb);
+ skb_bpf_redirect_clear(skb);
+@@ -676,6 +699,11 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
+
+ write_lock_bh(&sk->sk_callback_lock);
+
++ if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) {
++ psock = ERR_PTR(-EINVAL);
++ goto out;
++ }
++
+ if (sk->sk_user_data) {
+ psock = ERR_PTR(-EBUSY);
+ goto out;
+@@ -692,6 +720,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
+ psock->eval = __SK_NONE;
+ psock->sk_proto = prot;
+ psock->saved_unhash = prot->unhash;
++ psock->saved_destroy = prot->destroy;
+ psock->saved_close = prot->close;
+ psock->saved_write_space = sk->sk_write_space;
+
+@@ -707,7 +736,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
+ sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
+ refcount_set(&psock->refcnt, 1);
+
+- rcu_assign_sk_user_data_nocopy(sk, psock);
++ __rcu_assign_sk_user_data_with_flags(sk, psock,
++ SK_USER_DATA_NOCOPY |
++ SK_USER_DATA_PSOCK);
+ sock_hold(sk);
+
+ out:
+@@ -766,16 +797,13 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
+ }
+ }
+
+-void sk_psock_stop(struct sk_psock *psock, bool wait)
++void sk_psock_stop(struct sk_psock *psock)
+ {
+ spin_lock_bh(&psock->ingress_lock);
+ sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+ sk_psock_cork_free(psock);
+ __sk_psock_zap_ingress(psock);
+ spin_unlock_bh(&psock->ingress_lock);
+-
+- if (wait)
+- cancel_work_sync(&psock->work);
+ }
+
+ static void sk_psock_done_strp(struct sk_psock *psock);
+@@ -813,7 +841,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
+ sk_psock_stop_verdict(sk, psock);
+ write_unlock_bh(&sk->sk_callback_lock);
+
+- sk_psock_stop(psock, false);
++ sk_psock_stop(psock);
+
+ INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
+ queue_rcu_work(system_wq, &psock->rwork);
+@@ -852,13 +880,16 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
+ ret = sk_psock_map_verd(ret, msg->sk_redir);
+ psock->apply_bytes = msg->apply_bytes;
+ if (ret == __SK_REDIRECT) {
+- if (psock->sk_redir)
++ if (psock->sk_redir) {
+ sock_put(psock->sk_redir);
+- psock->sk_redir = msg->sk_redir;
+- if (!psock->sk_redir) {
++ psock->sk_redir = NULL;
++ }
++ if (!msg->sk_redir) {
+ ret = __SK_DROP;
+ goto out;
+ }
++ psock->redir_ingress = sk_msg_to_ingress(msg);
++ psock->sk_redir = msg->sk_redir;
+ sock_hold(psock->sk_redir);
+ }
+ out:
+@@ -877,6 +908,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
+ * return code, but then didn't set a redirect interface.
+ */
+ if (unlikely(!sk_other)) {
++ skb_bpf_redirect_clear(skb);
+ sock_drop(from->sk, skb);
+ return -EIO;
+ }
+@@ -944,6 +976,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+ {
+ struct sock *sk_other;
+ int err = 0;
++ u32 len, off;
+
+ switch (verdict) {
+ case __SK_PASS:
+@@ -951,6 +984,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+ sk_other = psock->sk;
+ if (sock_flag(sk_other, SOCK_DEAD) ||
+ !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
++ skb_bpf_redirect_clear(skb);
+ goto out_free;
+ }
+
+@@ -963,7 +997,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+ * retrying later from workqueue.
+ */
+ if (skb_queue_empty(&psock->ingress_skb)) {
+- err = sk_psock_skb_ingress_self(psock, skb);
++ len = skb->len;
++ off = 0;
++ if (skb_bpf_strparser(skb)) {
++ struct strp_msg *stm = strp_msg(skb);
++
++ off = stm->offset;
++ len = stm->full_len;
++ }
++ err = sk_psock_skb_ingress_self(psock, skb, off, len);
+ }
+ if (err < 0) {
+ spin_lock_bh(&psock->ingress_lock);
+@@ -1029,6 +1071,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
+ skb_dst_drop(skb);
+ skb_bpf_redirect_clear(skb);
+ ret = bpf_prog_run_pin_on_cpu(prog, skb);
++ if (ret == SK_PASS)
++ skb_bpf_set_strparser(skb);
+ ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
+ skb->sk = NULL;
+ }
+@@ -1080,13 +1124,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
+
+ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+ {
++ int ret;
++
+ static const struct strp_callbacks cb = {
+ .rcv_msg = sk_psock_strp_read,
+ .read_sock_done = sk_psock_strp_read_done,
+ .parse_msg = sk_psock_strp_parse,
+ };
+
+- return strp_init(&psock->strp, sk, &cb);
++ ret = strp_init(&psock->strp, sk, &cb);
++ if (!ret)
++ sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
++
++ return ret;
+ }
+
+ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+@@ -1101,6 +1151,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+
+ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+ {
++ psock_set_prog(&psock->progs.stream_parser, NULL);
++
+ if (!psock->saved_data_ready)
+ return;
+
+@@ -1112,7 +1164,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+ static void sk_psock_done_strp(struct sk_psock *psock)
+ {
+ /* Parser has been stopped */
+- if (psock->progs.stream_parser)
++ if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
+ strp_done(&psock->strp);
+ }
+ #else
+@@ -1128,7 +1180,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
+ struct sk_psock *psock;
+ struct bpf_prog *prog;
+ int ret = __SK_DROP;
+- int len = skb->len;
++ int len = orig_len;
+
+ /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
+ skb = skb_clone(skb, GFP_ATOMIC);
+@@ -1189,6 +1241,9 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
+
+ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
+ {
++ psock_set_prog(&psock->progs.stream_verdict, NULL);
++ psock_set_prog(&psock->progs.skb_verdict, NULL);
++
+ if (!psock->saved_data_ready)
+ return;
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index c1601f75ec4b3..8faa0f9cc0839 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -717,7 +717,8 @@ bool sk_mc_loop(struct sock *sk)
+ return false;
+ if (!sk)
+ return true;
+- switch (sk->sk_family) {
++ /* IPV6_ADDRFORM can change sk->sk_family under us. */
++ switch (READ_ONCE(sk->sk_family)) {
+ case AF_INET:
+ return inet_sk(sk)->mc_loop;
+ #if IS_ENABLED(CONFIG_IPV6)
+@@ -830,6 +831,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
+ }
+
+ num = ethtool_get_phc_vclocks(dev, &vclock_index);
++ dev_put(dev);
++
+ for (i = 0; i < num; i++) {
+ if (*(vclock_index + i) == phc_index) {
+ match = true;
+@@ -864,9 +867,9 @@ int sock_set_timestamping(struct sock *sk, int optname,
+ if ((1 << sk->sk_state) &
+ (TCPF_CLOSE | TCPF_LISTEN))
+ return -EINVAL;
+- sk->sk_tskey = tcp_sk(sk)->snd_una;
++ atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
+ } else {
+- sk->sk_tskey = 0;
++ atomic_set(&sk->sk_tskey, 0);
+ }
+ }
+
+@@ -1012,7 +1015,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
+ * play 'guess the biggest size' games. RCVBUF/SNDBUF
+ * are treated in BSD as hints
+ */
+- val = min_t(u32, val, sysctl_wmem_max);
++ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
+ set_sndbuf:
+ /* Ensure val * 2 fits into an int, to prevent max_t()
+ * from treating it as a negative value.
+@@ -1044,7 +1047,7 @@ set_sndbuf:
+ * play 'guess the biggest size' games. RCVBUF/SNDBUF
+ * are treated in BSD as hints
+ */
+- __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
++ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
+ break;
+
+ case SO_RCVBUFFORCE:
+@@ -1295,12 +1298,13 @@ set_sndbuf:
+ cmpxchg(&sk->sk_pacing_status,
+ SK_PACING_NONE,
+ SK_PACING_NEEDED);
+- sk->sk_max_pacing_rate = ulval;
++ /* Pairs with READ_ONCE() from sk_getsockopt() */
++ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
+ sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
+ break;
+ }
+ case SO_INCOMING_CPU:
+- WRITE_ONCE(sk->sk_incoming_cpu, val);
++ reuseport_update_incoming_cpu(sk, val);
+ break;
+
+ case SO_CNX_ADVICE:
+@@ -1453,11 +1457,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case SO_SNDBUF:
+- v.val = sk->sk_sndbuf;
++ v.val = READ_ONCE(sk->sk_sndbuf);
+ break;
+
+ case SO_RCVBUF:
+- v.val = sk->sk_rcvbuf;
++ v.val = READ_ONCE(sk->sk_rcvbuf);
+ break;
+
+ case SO_REUSEADDR:
+@@ -1546,7 +1550,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case SO_RCVLOWAT:
+- v.val = sk->sk_rcvlowat;
++ v.val = READ_ONCE(sk->sk_rcvlowat);
+ break;
+
+ case SO_SNDLOWAT:
+@@ -1640,7 +1644,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ if (!sock->ops->set_peek_off)
+ return -EOPNOTSUPP;
+
+- v.val = sk->sk_peek_off;
++ v.val = READ_ONCE(sk->sk_peek_off);
+ break;
+ case SO_NOFCS:
+ v.val = sock_flag(sk, SOCK_NOFCS);
+@@ -1670,7 +1674,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_BUSY_POLL:
+- v.val = sk->sk_ll_usec;
++ v.val = READ_ONCE(sk->sk_ll_usec);
+ break;
+ case SO_PREFER_BUSY_POLL:
+ v.val = READ_ONCE(sk->sk_prefer_busy_poll);
+@@ -1678,12 +1682,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ #endif
+
+ case SO_MAX_PACING_RATE:
++ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
+ if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
+ lv = sizeof(v.ulval);
+- v.ulval = sk->sk_max_pacing_rate;
++ v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
+ } else {
+ /* 32bit version */
+- v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
++ v.val = min_t(unsigned long, ~0U,
++ READ_ONCE(sk->sk_max_pacing_rate));
+ }
+ break;
+
+@@ -2043,8 +2049,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
+ newsk->sk_prot_creator = prot;
+
+ /* SANITY */
+- if (likely(newsk->sk_net_refcnt))
++ if (likely(newsk->sk_net_refcnt)) {
+ get_net(sock_net(newsk));
++ sock_inuse_add(sock_net(newsk), 1);
++ }
+ sk_node_init(&newsk->sk_node);
+ sock_lock_init(newsk);
+ bh_lock_sock(newsk);
+@@ -2115,8 +2123,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
+ newsk->sk_err_soft = 0;
+ newsk->sk_priority = 0;
+ newsk->sk_incoming_cpu = raw_smp_processor_id();
+- if (likely(newsk->sk_net_refcnt))
+- sock_inuse_add(sock_net(newsk), 1);
+
+ /* Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.rst for details)
+@@ -2163,7 +2169,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+ {
+ u32 max_segs = 1;
+
+- sk_dst_set(sk, dst);
+ sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
+ if (sk->sk_route_caps & NETIF_F_GSO)
+ sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+@@ -2178,6 +2183,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+ }
+ }
+ sk->sk_gso_max_segs = max_segs;
++ sk_dst_set(sk, dst);
+ }
+ EXPORT_SYMBOL_GPL(sk_setup_caps);
+
+@@ -2322,13 +2328,24 @@ kuid_t sock_i_uid(struct sock *sk)
+ }
+ EXPORT_SYMBOL(sock_i_uid);
+
+-unsigned long sock_i_ino(struct sock *sk)
++unsigned long __sock_i_ino(struct sock *sk)
+ {
+ unsigned long ino;
+
+- read_lock_bh(&sk->sk_callback_lock);
++ read_lock(&sk->sk_callback_lock);
+ ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
+- read_unlock_bh(&sk->sk_callback_lock);
++ read_unlock(&sk->sk_callback_lock);
++ return ino;
++}
++EXPORT_SYMBOL(__sock_i_ino);
++
++unsigned long sock_i_ino(struct sock *sk)
++{
++ unsigned long ino;
++
++ local_bh_disable();
++ ino = __sock_i_ino(sk);
++ local_bh_enable();
+ return ino;
+ }
+ EXPORT_SYMBOL(sock_i_ino);
+@@ -2366,7 +2383,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+
+ /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+- sysctl_optmem_max)
++ READ_ONCE(sysctl_optmem_max))
+ return NULL;
+
+ skb = alloc_skb(size, priority);
+@@ -2384,8 +2401,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+ */
+ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
+ {
+- if ((unsigned int)size <= sysctl_optmem_max &&
+- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
++ int optmem_max = READ_ONCE(sysctl_optmem_max);
++
++ if ((unsigned int)size <= optmem_max &&
++ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
+ void *mem;
+ /* First do the add, to avoid the race if kmalloc
+ * might sleep.
+@@ -2445,9 +2464,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
+ break;
+- if (sk->sk_shutdown & SEND_SHUTDOWN)
++ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
+ break;
+- if (sk->sk_err)
++ if (READ_ONCE(sk->sk_err))
+ break;
+ timeo = schedule_timeout(timeo);
+ }
+@@ -2475,7 +2494,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ goto failure;
+
+ err = -EPIPE;
+- if (sk->sk_shutdown & SEND_SHUTDOWN)
++ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
+ goto failure;
+
+ if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
+@@ -2862,7 +2881,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
+
+- if (sk_under_memory_pressure(sk) &&
++ if (sk_under_global_memory_pressure(sk) &&
+ (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
+ sk_leave_memory_pressure(sk);
+ }
+@@ -2883,7 +2902,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
+
+ int sk_set_peek_off(struct sock *sk, int val)
+ {
+- sk->sk_peek_off = val;
++ WRITE_ONCE(sk->sk_peek_off, val);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(sk_set_peek_off);
+@@ -3114,7 +3133,7 @@ void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
+ }
+ EXPORT_SYMBOL(sk_stop_timer_sync);
+
+-void sock_init_data(struct socket *sock, struct sock *sk)
++void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
+ {
+ sk_init_common(sk);
+ sk->sk_send_head = NULL;
+@@ -3122,8 +3141,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+ timer_setup(&sk->sk_timer, NULL, 0);
+
+ sk->sk_allocation = GFP_KERNEL;
+- sk->sk_rcvbuf = sysctl_rmem_default;
+- sk->sk_sndbuf = sysctl_wmem_default;
++ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
++ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
+ sk->sk_state = TCP_CLOSE;
+ sk_set_socket(sk, sock);
+
+@@ -3133,11 +3152,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+ sk->sk_type = sock->type;
+ RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
+ sock->sk = sk;
+- sk->sk_uid = SOCK_INODE(sock)->i_uid;
+ } else {
+ RCU_INIT_POINTER(sk->sk_wq, NULL);
+- sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
+ }
++ sk->sk_uid = uid;
+
+ rwlock_init(&sk->sk_callback_lock);
+ if (sk->sk_kern_sock)
+@@ -3178,7 +3196,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+ sk->sk_napi_id = 0;
+- sk->sk_ll_usec = sysctl_net_busy_read;
++ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
+ #endif
+
+ sk->sk_max_pacing_rate = ~0UL;
+@@ -3195,6 +3213,16 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+ refcount_set(&sk->sk_refcnt, 1);
+ atomic_set(&sk->sk_drops, 0);
+ }
++EXPORT_SYMBOL(sock_init_data_uid);
++
++void sock_init_data(struct socket *sock, struct sock *sk)
++{
++ kuid_t uid = sock ?
++ SOCK_INODE(sock)->i_uid :
++ make_kuid(sock_net(sk)->user_ns, 0);
++
++ sock_init_data_uid(sock, sk, uid);
++}
+ EXPORT_SYMBOL(sock_init_data);
+
+ void lock_sock_nested(struct sock *sk, int subclass)
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index e252b8ec2b85e..caae43e66353d 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -117,7 +117,6 @@ static void sock_map_sk_acquire(struct sock *sk)
+ __acquires(&sk->sk_lock.slock)
+ {
+ lock_sock(sk);
+- preempt_disable();
+ rcu_read_lock();
+ }
+
+@@ -125,7 +124,6 @@ static void sock_map_sk_release(struct sock *sk)
+ __releases(&sk->sk_lock.slock)
+ {
+ rcu_read_unlock();
+- preempt_enable();
+ release_sock(sk);
+ }
+
+@@ -150,13 +148,13 @@ static void sock_map_del_link(struct sock *sk,
+ list_for_each_entry_safe(link, tmp, &psock->link, list) {
+ if (link->link_raw == link_raw) {
+ struct bpf_map *map = link->map;
+- struct bpf_stab *stab = container_of(map, struct bpf_stab,
+- map);
+- if (psock->saved_data_ready && stab->progs.stream_parser)
++ struct sk_psock_progs *progs = sock_map_progs(map);
++
++ if (psock->saved_data_ready && progs->stream_parser)
+ strp_stop = true;
+- if (psock->saved_data_ready && stab->progs.stream_verdict)
++ if (psock->saved_data_ready && progs->stream_verdict)
+ verdict_stop = true;
+- if (psock->saved_data_ready && stab->progs.skb_verdict)
++ if (psock->saved_data_ready && progs->skb_verdict)
+ verdict_stop = true;
+ list_del(&link->list);
+ sk_psock_free_link(link);
+@@ -167,8 +165,11 @@ static void sock_map_del_link(struct sock *sk,
+ write_lock_bh(&sk->sk_callback_lock);
+ if (strp_stop)
+ sk_psock_stop_strp(sk, psock);
+- else
++ if (verdict_stop)
+ sk_psock_stop_verdict(sk, psock);
++
++ if (psock->psock_update_sk_prot)
++ psock->psock_update_sk_prot(sk, psock, false);
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
+ }
+@@ -282,32 +283,38 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
+
+ if (msg_parser)
+ psock_set_prog(&psock->progs.msg_parser, msg_parser);
++ if (stream_parser)
++ psock_set_prog(&psock->progs.stream_parser, stream_parser);
++ if (stream_verdict)
++ psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
++ if (skb_verdict)
++ psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+
++ /* msg_* and stream_* programs references tracked in psock after this
++ * point. Reference dec and cleanup will occur through psock destructor
++ */
+ ret = sock_map_init_proto(sk, psock);
+- if (ret < 0)
+- goto out_drop;
++ if (ret < 0) {
++ sk_psock_put(sk, psock);
++ goto out;
++ }
+
+ write_lock_bh(&sk->sk_callback_lock);
+ if (stream_parser && stream_verdict && !psock->saved_data_ready) {
+ ret = sk_psock_init_strp(sk, psock);
+- if (ret)
+- goto out_unlock_drop;
+- psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
+- psock_set_prog(&psock->progs.stream_parser, stream_parser);
++ if (ret) {
++ write_unlock_bh(&sk->sk_callback_lock);
++ sk_psock_put(sk, psock);
++ goto out;
++ }
+ sk_psock_start_strp(sk, psock);
+ } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
+- psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
+ sk_psock_start_verdict(sk,psock);
+ } else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) {
+- psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ sk_psock_start_verdict(sk, psock);
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+ return 0;
+-out_unlock_drop:
+- write_unlock_bh(&sk->sk_callback_lock);
+-out_drop:
+- sk_psock_put(sk, psock);
+ out_progs:
+ if (skb_verdict)
+ bpf_prog_put(skb_verdict);
+@@ -320,6 +327,7 @@ out_put_stream_parser:
+ out_put_stream_verdict:
+ if (stream_verdict)
+ bpf_prog_put(stream_verdict);
++out:
+ return ret;
+ }
+
+@@ -339,11 +347,13 @@ static void sock_map_free(struct bpf_map *map)
+
+ sk = xchg(psk, NULL);
+ if (sk) {
++ sock_hold(sk);
+ lock_sock(sk);
+ rcu_read_lock();
+ sock_map_unref(sk, psk);
+ rcu_read_unlock();
+ release_sock(sk);
++ sock_put(sk);
+ }
+ }
+
+@@ -779,13 +789,22 @@ static int sock_map_init_seq_private(void *priv_data,
+ {
+ struct sock_map_seq_info *info = priv_data;
+
++ bpf_map_inc_with_uref(aux->map);
+ info->map = aux->map;
+ return 0;
+ }
+
++static void sock_map_fini_seq_private(void *priv_data)
++{
++ struct sock_map_seq_info *info = priv_data;
++
++ bpf_map_put_with_uref(info->map);
++}
++
+ static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
+ .seq_ops = &sock_map_seq_ops,
+ .init_seq_private = sock_map_init_seq_private,
++ .fini_seq_private = sock_map_fini_seq_private,
+ .seq_priv_size = sizeof(struct sock_map_seq_info),
+ };
+
+@@ -1366,18 +1385,27 @@ static const struct seq_operations sock_hash_seq_ops = {
+ };
+
+ static int sock_hash_init_seq_private(void *priv_data,
+- struct bpf_iter_aux_info *aux)
++ struct bpf_iter_aux_info *aux)
+ {
+ struct sock_hash_seq_info *info = priv_data;
+
++ bpf_map_inc_with_uref(aux->map);
+ info->map = aux->map;
+ info->htab = container_of(aux->map, struct bpf_shtab, map);
+ return 0;
+ }
+
++static void sock_hash_fini_seq_private(void *priv_data)
++{
++ struct sock_hash_seq_info *info = priv_data;
++
++ bpf_map_put_with_uref(info->map);
++}
++
+ static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
+ .seq_ops = &sock_hash_seq_ops,
+ .init_seq_private = sock_hash_init_seq_private,
++ .fini_seq_private = sock_hash_fini_seq_private,
+ .seq_priv_size = sizeof(struct sock_hash_seq_info),
+ };
+
+@@ -1484,18 +1512,43 @@ void sock_map_unhash(struct sock *sk)
+ psock = sk_psock(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+- if (sk->sk_prot->unhash)
+- sk->sk_prot->unhash(sk);
+- return;
++ saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
++ } else {
++ saved_unhash = psock->saved_unhash;
++ sock_map_remove_links(sk, psock);
++ rcu_read_unlock();
+ }
+-
+- saved_unhash = psock->saved_unhash;
+- sock_map_remove_links(sk, psock);
+- rcu_read_unlock();
+- saved_unhash(sk);
++ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
++ return;
++ if (saved_unhash)
++ saved_unhash(sk);
+ }
+ EXPORT_SYMBOL_GPL(sock_map_unhash);
+
++void sock_map_destroy(struct sock *sk)
++{
++ void (*saved_destroy)(struct sock *sk);
++ struct sk_psock *psock;
++
++ rcu_read_lock();
++ psock = sk_psock_get(sk);
++ if (unlikely(!psock)) {
++ rcu_read_unlock();
++ saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
++ } else {
++ saved_destroy = psock->saved_destroy;
++ sock_map_remove_links(sk, psock);
++ rcu_read_unlock();
++ sk_psock_stop(psock);
++ sk_psock_put(sk, psock);
++ }
++ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
++ return;
++ if (saved_destroy)
++ saved_destroy(sk);
++}
++EXPORT_SYMBOL_GPL(sock_map_destroy);
++
+ void sock_map_close(struct sock *sk, long timeout)
+ {
+ void (*saved_close)(struct sock *sk, long timeout);
+@@ -1507,15 +1560,21 @@ void sock_map_close(struct sock *sk, long timeout)
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ release_sock(sk);
+- return sk->sk_prot->close(sk, timeout);
++ saved_close = READ_ONCE(sk->sk_prot)->close;
++ } else {
++ saved_close = psock->saved_close;
++ sock_map_remove_links(sk, psock);
++ rcu_read_unlock();
++ sk_psock_stop(psock);
++ release_sock(sk);
++ cancel_work_sync(&psock->work);
++ sk_psock_put(sk, psock);
+ }
+-
+- saved_close = psock->saved_close;
+- sock_map_remove_links(sk, psock);
+- rcu_read_unlock();
+- sk_psock_stop(psock, true);
+- sk_psock_put(sk, psock);
+- release_sock(sk);
++ /* Make sure we do not recurse. This is a bug.
++ * Leak the socket instead of crashing on a stack overflow.
++ */
++ if (WARN_ON_ONCE(saved_close == sock_map_close))
++ return;
+ saved_close(sk, timeout);
+ }
+ EXPORT_SYMBOL_GPL(sock_map_close);
+@@ -1565,7 +1624,7 @@ static struct bpf_iter_reg sock_map_iter_reg = {
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__sockmap, key),
+- PTR_TO_RDONLY_BUF_OR_NULL },
++ PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY },
+ { offsetof(struct bpf_iter__sockmap, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
+index 3f00a28fe762a..5a165286e4d8e 100644
+--- a/net/core/sock_reuseport.c
++++ b/net/core/sock_reuseport.c
+@@ -21,6 +21,86 @@ static DEFINE_IDA(reuseport_ida);
+ static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+ struct sock_reuseport *reuse, bool bind_inany);
+
++void reuseport_has_conns_set(struct sock *sk)
++{
++ struct sock_reuseport *reuse;
++
++ if (!rcu_access_pointer(sk->sk_reuseport_cb))
++ return;
++
++ spin_lock_bh(&reuseport_lock);
++ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
++ lockdep_is_held(&reuseport_lock));
++ if (likely(reuse))
++ reuse->has_conns = 1;
++ spin_unlock_bh(&reuseport_lock);
++}
++EXPORT_SYMBOL(reuseport_has_conns_set);
++
++static void __reuseport_get_incoming_cpu(struct sock_reuseport *reuse)
++{
++ /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */
++ WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu + 1);
++}
++
++static void __reuseport_put_incoming_cpu(struct sock_reuseport *reuse)
++{
++ /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */
++ WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu - 1);
++}
++
++static void reuseport_get_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse)
++{
++ if (sk->sk_incoming_cpu >= 0)
++ __reuseport_get_incoming_cpu(reuse);
++}
++
++static void reuseport_put_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse)
++{
++ if (sk->sk_incoming_cpu >= 0)
++ __reuseport_put_incoming_cpu(reuse);
++}
++
++void reuseport_update_incoming_cpu(struct sock *sk, int val)
++{
++ struct sock_reuseport *reuse;
++ int old_sk_incoming_cpu;
++
++ if (unlikely(!rcu_access_pointer(sk->sk_reuseport_cb))) {
++ /* Paired with REAE_ONCE() in sk_incoming_cpu_update()
++ * and compute_score().
++ */
++ WRITE_ONCE(sk->sk_incoming_cpu, val);
++ return;
++ }
++
++ spin_lock_bh(&reuseport_lock);
++
++ /* This must be done under reuseport_lock to avoid a race with
++ * reuseport_grow(), which accesses sk->sk_incoming_cpu without
++ * lock_sock() when detaching a shutdown()ed sk.
++ *
++ * Paired with READ_ONCE() in reuseport_select_sock_by_hash().
++ */
++ old_sk_incoming_cpu = sk->sk_incoming_cpu;
++ WRITE_ONCE(sk->sk_incoming_cpu, val);
++
++ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
++ lockdep_is_held(&reuseport_lock));
++
++ /* reuseport_grow() has detached a closed sk. */
++ if (!reuse)
++ goto out;
++
++ if (old_sk_incoming_cpu < 0 && val >= 0)
++ __reuseport_get_incoming_cpu(reuse);
++ else if (old_sk_incoming_cpu >= 0 && val < 0)
++ __reuseport_put_incoming_cpu(reuse);
++
++out:
++ spin_unlock_bh(&reuseport_lock);
++}
++
+ static int reuseport_sock_index(struct sock *sk,
+ const struct sock_reuseport *reuse,
+ bool closed)
+@@ -48,6 +128,7 @@ static void __reuseport_add_sock(struct sock *sk,
+ /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
+ smp_wmb();
+ reuse->num_socks++;
++ reuseport_get_incoming_cpu(sk, reuse);
+ }
+
+ static bool __reuseport_detach_sock(struct sock *sk,
+@@ -60,6 +141,7 @@ static bool __reuseport_detach_sock(struct sock *sk,
+
+ reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+ reuse->num_socks--;
++ reuseport_put_incoming_cpu(sk, reuse);
+
+ return true;
+ }
+@@ -70,6 +152,7 @@ static void __reuseport_add_closed_sock(struct sock *sk,
+ reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk;
+ /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1);
++ reuseport_get_incoming_cpu(sk, reuse);
+ }
+
+ static bool __reuseport_detach_closed_sock(struct sock *sk,
+@@ -83,6 +166,7 @@ static bool __reuseport_detach_closed_sock(struct sock *sk,
+ reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+ /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1);
++ reuseport_put_incoming_cpu(sk, reuse);
+
+ return true;
+ }
+@@ -150,6 +234,7 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
+ reuse->bind_inany = bind_inany;
+ reuse->socks[0] = sk;
+ reuse->num_socks = 1;
++ reuseport_get_incoming_cpu(sk, reuse);
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+ out:
+@@ -193,6 +278,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
+ more_reuse->reuseport_id = reuse->reuseport_id;
+ more_reuse->bind_inany = reuse->bind_inany;
+ more_reuse->has_conns = reuse->has_conns;
++ more_reuse->incoming_cpu = reuse->incoming_cpu;
+
+ memcpy(more_reuse->socks, reuse->socks,
+ reuse->num_socks * sizeof(struct sock *));
+@@ -387,7 +473,7 @@ void reuseport_stop_listen_sock(struct sock *sk)
+ prog = rcu_dereference_protected(reuse->prog,
+ lockdep_is_held(&reuseport_lock));
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
+ (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
+ /* Migration capable, move sk from the listening section
+ * to the closed section.
+@@ -442,18 +528,32 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
+ static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
+ u32 hash, u16 num_socks)
+ {
++ struct sock *first_valid_sk = NULL;
+ int i, j;
+
+ i = j = reciprocal_scale(hash, num_socks);
+- while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
++ do {
++ struct sock *sk = reuse->socks[i];
++
++ if (sk->sk_state != TCP_ESTABLISHED) {
++ /* Paired with WRITE_ONCE() in __reuseport_(get|put)_incoming_cpu(). */
++ if (!READ_ONCE(reuse->incoming_cpu))
++ return sk;
++
++ /* Paired with WRITE_ONCE() in reuseport_update_incoming_cpu(). */
++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
++ return sk;
++
++ if (!first_valid_sk)
++ first_valid_sk = sk;
++ }
++
+ i++;
+ if (i >= num_socks)
+ i = 0;
+- if (i == j)
+- return NULL;
+- }
++ } while (i != j);
+
+- return reuse->socks[i];
++ return first_valid_sk;
+ }
+
+ /**
+@@ -545,7 +645,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
+ hash = migrating_sk->sk_hash;
+ prog = rcu_dereference(reuse->prog);
+ if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
+- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
+ goto select_by_hash;
+ goto failure;
+ }
+diff --git a/net/core/stream.c b/net/core/stream.c
+index 4f1d4aa5fb38d..422ee97e4f2be 100644
+--- a/net/core/stream.c
++++ b/net/core/stream.c
+@@ -73,8 +73,8 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk->sk_write_pending++;
+ done = sk_wait_event(sk, timeo_p,
+- !sk->sk_err &&
+- !((1 << sk->sk_state) &
++ !READ_ONCE(sk->sk_err) &&
++ !((1 << READ_ONCE(sk->sk_state)) &
+ ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ sk->sk_write_pending--;
+@@ -87,9 +87,9 @@ EXPORT_SYMBOL(sk_stream_wait_connect);
+ * sk_stream_closing - Return 1 if we still have things to send in our buffers.
+ * @sk: socket to verify
+ */
+-static inline int sk_stream_closing(struct sock *sk)
++static int sk_stream_closing(const struct sock *sk)
+ {
+- return (1 << sk->sk_state) &
++ return (1 << READ_ONCE(sk->sk_state)) &
+ (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
+ }
+
+@@ -142,8 +142,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+- sk_wait_event(sk, &current_timeo, sk->sk_err ||
+- (sk->sk_shutdown & SEND_SHUTDOWN) ||
++ sk_wait_event(sk, &current_timeo, READ_ONCE(sk->sk_err) ||
++ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
+ (sk_stream_memory_free(sk) &&
+ !vm_wait), &wait);
+ sk->sk_write_pending--;
+@@ -159,7 +159,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+ *timeo_p = current_timeo;
+ }
+ out:
+- remove_wait_queue(sk_sleep(sk), &wait);
++ if (!sock_flag(sk, SOCK_DEAD))
++ remove_wait_queue(sk_sleep(sk), &wait);
+ return err;
+
+ do_error:
+@@ -195,8 +196,11 @@ void sk_stream_kill_queues(struct sock *sk)
+ /* First the read buffer. */
+ __skb_queue_purge(&sk->sk_receive_queue);
+
+- /* Next, the error queue. */
+- __skb_queue_purge(&sk->sk_error_queue);
++ /* Next, the error queue.
++ * We need to use queue lock, because other threads might
++ * add packets to the queue without socket lock being held.
++ */
++ skb_queue_purge(&sk->sk_error_queue);
+
+ /* Next, the write queue. */
+ WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
+@@ -205,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk)
+ sk_mem_reclaim(sk);
+
+ WARN_ON(sk->sk_wmem_queued);
+- WARN_ON(sk->sk_forward_alloc);
+
+ /* It is _impossible_ for the backlog to contain anything
+ * when we get here. All user references to this socket
+diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
+index 5f88526ad61cc..ed20cbdd19315 100644
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -236,14 +236,17 @@ static int set_default_qdisc(struct ctl_table *table, int write,
+ static int proc_do_dev_weight(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+ {
+- int ret;
++ static DEFINE_MUTEX(dev_weight_mutex);
++ int ret, weight;
+
++ mutex_lock(&dev_weight_mutex);
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+- if (ret != 0)
+- return ret;
+-
+- dev_rx_weight = weight_p * dev_weight_rx_bias;
+- dev_tx_weight = weight_p * dev_weight_tx_bias;
++ if (!ret && write) {
++ weight = READ_ONCE(weight_p);
++ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
++ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
++ }
++ mutex_unlock(&dev_weight_mutex);
+
+ return ret;
+ }
+diff --git a/net/core/xdp.c b/net/core/xdp.c
+index cc92ccb384325..a3e3d2538a3a8 100644
+--- a/net/core/xdp.c
++++ b/net/core/xdp.c
+@@ -110,20 +110,15 @@ static void mem_allocator_disconnect(void *allocator)
+ mutex_unlock(&mem_id_lock);
+ }
+
+-void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
++void xdp_unreg_mem_model(struct xdp_mem_info *mem)
+ {
+ struct xdp_mem_allocator *xa;
+- int type = xdp_rxq->mem.type;
+- int id = xdp_rxq->mem.id;
++ int type = mem->type;
++ int id = mem->id;
+
+ /* Reset mem info to defaults */
+- xdp_rxq->mem.id = 0;
+- xdp_rxq->mem.type = 0;
+-
+- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+- WARN(1, "Missing register, driver bug");
+- return;
+- }
++ mem->id = 0;
++ mem->type = 0;
+
+ if (id == 0)
+ return;
+@@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+ rcu_read_unlock();
+ }
+ }
++EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
++
++void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
++{
++ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
++ WARN(1, "Missing register, driver bug");
++ return;
++ }
++
++ xdp_unreg_mem_model(&xdp_rxq->mem);
++}
+ EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
+
+ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
+@@ -261,28 +267,24 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
+ return true;
+ }
+
+-int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+- enum xdp_mem_type type, void *allocator)
++static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
++ enum xdp_mem_type type,
++ void *allocator)
+ {
+ struct xdp_mem_allocator *xdp_alloc;
+ gfp_t gfp = GFP_KERNEL;
+ int id, errno, ret;
+ void *ptr;
+
+- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+- WARN(1, "Missing register, driver bug");
+- return -EFAULT;
+- }
+-
+ if (!__is_supported_mem_type(type))
+- return -EOPNOTSUPP;
++ return ERR_PTR(-EOPNOTSUPP);
+
+- xdp_rxq->mem.type = type;
++ mem->type = type;
+
+ if (!allocator) {
+ if (type == MEM_TYPE_PAGE_POOL)
+- return -EINVAL; /* Setup time check page_pool req */
+- return 0;
++ return ERR_PTR(-EINVAL); /* Setup time check page_pool req */
++ return NULL;
+ }
+
+ /* Delay init of rhashtable to save memory if feature isn't used */
+@@ -292,13 +294,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ mutex_unlock(&mem_id_lock);
+ if (ret < 0) {
+ WARN_ON(1);
+- return ret;
++ return ERR_PTR(ret);
+ }
+ }
+
+ xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
+ if (!xdp_alloc)
+- return -ENOMEM;
++ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&mem_id_lock);
+ id = __mem_id_cyclic_get(gfp);
+@@ -306,15 +308,15 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+ errno = id;
+ goto err;
+ }
+- xdp_rxq->mem.id = id;
+- xdp_alloc->mem = xdp_rxq->mem;
++ mem->id = id;
++ xdp_alloc->mem = *mem;
+ xdp_alloc->allocator = allocator;
+
+ /* Insert allocator into ID lookup table */
+ ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
+ if (IS_ERR(ptr)) {
+- ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
+- xdp_rxq->mem.id = 0;
++ ida_simple_remove(&mem_id_pool, mem->id);
++ mem->id = 0;
+ errno = PTR_ERR(ptr);
+ goto err;
+ }
+@@ -324,13 +326,44 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+
+ mutex_unlock(&mem_id_lock);
+
+- trace_mem_connect(xdp_alloc, xdp_rxq);
+- return 0;
++ return xdp_alloc;
+ err:
+ mutex_unlock(&mem_id_lock);
+ kfree(xdp_alloc);
+- return errno;
++ return ERR_PTR(errno);
++}
++
++int xdp_reg_mem_model(struct xdp_mem_info *mem,
++ enum xdp_mem_type type, void *allocator)
++{
++ struct xdp_mem_allocator *xdp_alloc;
++
++ xdp_alloc = __xdp_reg_mem_model(mem, type, allocator);
++ if (IS_ERR(xdp_alloc))
++ return PTR_ERR(xdp_alloc);
++ return 0;
+ }
++EXPORT_SYMBOL_GPL(xdp_reg_mem_model);
++
++int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
++ enum xdp_mem_type type, void *allocator)
++{
++ struct xdp_mem_allocator *xdp_alloc;
++
++ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
++ WARN(1, "Missing register, driver bug");
++ return -EFAULT;
++ }
++
++ xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator);
++ if (IS_ERR(xdp_alloc))
++ return PTR_ERR(xdp_alloc);
++
++ if (trace_mem_connect_enabled() && xdp_alloc)
++ trace_mem_connect(xdp_alloc, xdp_rxq);
++ return 0;
++}
++
+ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
+
+ /* XDP RX runs under NAPI protection, and in different delivery error
+diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
+index b441ab330fd34..d2981e89d3638 100644
+--- a/net/dcb/dcbnl.c
++++ b/net/dcb/dcbnl.c
+@@ -946,7 +946,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
+ return -EOPNOTSUPP;
+
+ ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
+- tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
++ tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
+ NULL);
+ if (ret)
+ return ret;
+@@ -2073,8 +2073,52 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev)
+ }
+ EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
+
++static void dcbnl_flush_dev(struct net_device *dev)
++{
++ struct dcb_app_type *itr, *tmp;
++
++ spin_lock_bh(&dcb_lock);
++
++ list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) {
++ if (itr->ifindex == dev->ifindex) {
++ list_del(&itr->list);
++ kfree(itr);
++ }
++ }
++
++ spin_unlock_bh(&dcb_lock);
++}
++
++static int dcbnl_netdevice_event(struct notifier_block *nb,
++ unsigned long event, void *ptr)
++{
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++
++ switch (event) {
++ case NETDEV_UNREGISTER:
++ if (!dev->dcbnl_ops)
++ return NOTIFY_DONE;
++
++ dcbnl_flush_dev(dev);
++
++ return NOTIFY_OK;
++ default:
++ return NOTIFY_DONE;
++ }
++}
++
++static struct notifier_block dcbnl_nb __read_mostly = {
++ .notifier_call = dcbnl_netdevice_event,
++};
++
+ static int __init dcbnl_init(void)
+ {
++ int err;
++
++ err = register_netdevice_notifier(&dcbnl_nb);
++ if (err)
++ return err;
++
+ rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
+
+diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
+index c5c1d2b8045e8..0218eb169891c 100644
+--- a/net/dccp/dccp.h
++++ b/net/dccp/dccp.h
+@@ -48,7 +48,7 @@ extern bool dccp_debug;
+
+ extern struct inet_hashinfo dccp_hashinfo;
+
+-extern struct percpu_counter dccp_orphan_count;
++DECLARE_PER_CPU(unsigned int, dccp_orphan_count);
+
+ void dccp_time_wait(struct sock *sk, int state, int timeo);
+
+@@ -283,6 +283,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct dccp_hdr *dh, const unsigned int len);
+
++void dccp_destruct_common(struct sock *sk);
+ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
+ void dccp_destroy_sock(struct sock *sk);
+
+diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
+index 0ea29270d7e53..1490ba960365e 100644
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -137,6 +137,8 @@ failure:
+ * This unhashes the socket and releases the local port, if necessary.
+ */
+ dccp_set_state(sk, DCCP_CLOSED);
++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
++ inet_reset_saddr(sk);
+ ip_rt_put(rt);
+ sk->sk_route_caps = 0;
+ inet->inet_dport = 0;
+@@ -248,12 +250,17 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
+ int err;
+ struct net *net = dev_net(skb->dev);
+
+- /* Only need dccph_dport & dccph_sport which are the first
+- * 4 bytes in dccp header.
++ /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
++ * which is in byte 7 of the dccp header.
+ * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
++ *
++ * Later on, we want to access the sequence number fields, which are
++ * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
+ */
+- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++ dh = (struct dccp_hdr *)(skb->data + offset);
++ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
++ return -EINVAL;
++ iph = (struct iphdr *)skb->data;
+ dh = (struct dccp_hdr *)(skb->data + offset);
+
+ sk = __inet_lookup_established(net, &dccp_hashinfo,
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index fa663518fa0e4..c9f11f86266c0 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -74,7 +74,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
+ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+ {
+- const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
++ const struct ipv6hdr *hdr;
+ const struct dccp_hdr *dh;
+ struct dccp_sock *dp;
+ struct ipv6_pinfo *np;
+@@ -83,12 +83,17 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ __u64 seq;
+ struct net *net = dev_net(skb->dev);
+
+- /* Only need dccph_dport & dccph_sport which are the first
+- * 4 bytes in dccp header.
++ /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
++ * which is in byte 7 of the dccp header.
+ * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
++ *
++ * Later on, we want to access the sequence number fields, which are
++ * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
+ */
+- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++ dh = (struct dccp_hdr *)(skb->data + offset);
++ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
++ return -EINVAL;
++ hdr = (const struct ipv6hdr *)skb->data;
+ dh = (struct dccp_hdr *)(skb->data + offset);
+
+ sk = __inet6_lookup_established(net, &dccp_hashinfo,
+@@ -551,11 +556,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
+ /* Clone pktoptions received with SYN, if we own the req */
+ if (*own_req && ireq->pktopts) {
+- newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
++ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
+- if (newnp->pktoptions)
+- skb_set_owner_r(newnp->pktoptions, newsk);
+ }
+
+ return newsk;
+@@ -615,7 +618,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+ --ANK (980728)
+ */
+ if (np->rxopt.all)
+- opt_skb = skb_clone(skb, GFP_ATOMIC);
++ opt_skb = skb_clone_and_charge_r(skb, sk);
+
+ if (sk->sk_state == DCCP_OPEN) { /* Fast path */
+ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
+@@ -679,7 +682,6 @@ ipv6_pktoptions:
+ np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
+ if (ipv6_opt_accepted(sk, opt_skb,
+ &DCCP_SKB_CB(opt_skb)->header.h6)) {
+- skb_set_owner_r(opt_skb, sk);
+ memmove(IP6CB(opt_skb),
+ &DCCP_SKB_CB(opt_skb)->header.h6,
+ sizeof(struct inet6_skb_parm));
+@@ -967,6 +969,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+
+ late_failure:
+ dccp_set_state(sk, DCCP_CLOSED);
++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
++ inet_reset_saddr(sk);
+ __sk_dst_reset(sk);
+ failure:
+ inet->inet_dport = 0;
+@@ -1003,6 +1007,12 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
+ .sockaddr_len = sizeof(struct sockaddr_in6),
+ };
+
++static void dccp_v6_sk_destruct(struct sock *sk)
++{
++ dccp_destruct_common(sk);
++ inet6_sock_destruct(sk);
++}
++
+ /* NOTE: A lot of things set to zero explicitly by call to
+ * sk_alloc() so need not be done here.
+ */
+@@ -1015,17 +1025,12 @@ static int dccp_v6_init_sock(struct sock *sk)
+ if (unlikely(!dccp_v6_ctl_sock_initialized))
+ dccp_v6_ctl_sock_initialized = 1;
+ inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
++ sk->sk_destruct = dccp_v6_sk_destruct;
+ }
+
+ return err;
+ }
+
+-static void dccp_v6_destroy_sock(struct sock *sk)
+-{
+- dccp_destroy_sock(sk);
+- inet6_destroy_sock(sk);
+-}
+-
+ static struct timewait_sock_ops dccp6_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
+ };
+@@ -1048,7 +1053,7 @@ static struct proto dccp_v6_prot = {
+ .accept = inet_csk_accept,
+ .get_port = inet_csk_get_port,
+ .shutdown = dccp_shutdown,
+- .destroy = dccp_v6_destroy_sock,
++ .destroy = dccp_destroy_sock,
+ .orphan_count = &dccp_orphan_count,
+ .max_header = MAX_DCCP_HEADER,
+ .obj_size = sizeof(struct dccp6_sock),
+diff --git a/net/dccp/output.c b/net/dccp/output.c
+index b8a24734385ef..fd2eb148d24de 100644
+--- a/net/dccp/output.c
++++ b/net/dccp/output.c
+@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
+
+ /* And store cached results */
+ icsk->icsk_pmtu_cookie = pmtu;
+- dp->dccps_mss_cache = cur_mps;
++ WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
+
+ return cur_mps;
+ }
+diff --git a/net/dccp/proto.c b/net/dccp/proto.c
+index abb5c596a8176..0b0567a692a8f 100644
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -42,8 +42,8 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
+
+ EXPORT_SYMBOL_GPL(dccp_statistics);
+
+-struct percpu_counter dccp_orphan_count;
+-EXPORT_SYMBOL_GPL(dccp_orphan_count);
++DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
++EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
+
+ struct inet_hashinfo dccp_hashinfo;
+ EXPORT_SYMBOL_GPL(dccp_hashinfo);
+@@ -171,12 +171,18 @@ const char *dccp_packet_name(const int type)
+
+ EXPORT_SYMBOL_GPL(dccp_packet_name);
+
+-static void dccp_sk_destruct(struct sock *sk)
++void dccp_destruct_common(struct sock *sk)
+ {
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_tx_ccid = NULL;
++}
++EXPORT_SYMBOL_GPL(dccp_destruct_common);
++
++static void dccp_sk_destruct(struct sock *sk)
++{
++ dccp_destruct_common(sk);
+ inet_sock_destruct(sk);
+ }
+
+@@ -318,11 +324,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
+ __poll_t dccp_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+ {
+- __poll_t mask;
+ struct sock *sk = sock->sk;
++ __poll_t mask;
++ u8 shutdown;
++ int state;
+
+ sock_poll_wait(file, sock, wait);
+- if (sk->sk_state == DCCP_LISTEN)
++
++ state = inet_sk_state_load(sk);
++ if (state == DCCP_LISTEN)
+ return inet_csk_listen_poll(sk);
+
+ /* Socket is not locked. We are protected from async events
+@@ -331,20 +341,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
+ */
+
+ mask = 0;
+- if (sk->sk_err)
++ if (READ_ONCE(sk->sk_err))
+ mask = EPOLLERR;
++ shutdown = READ_ONCE(sk->sk_shutdown);
+
+- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
++ if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
+ mask |= EPOLLHUP;
+- if (sk->sk_shutdown & RCV_SHUTDOWN)
++ if (shutdown & RCV_SHUTDOWN)
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+
+ /* Connected? */
+- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
++ if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+ if (atomic_read(&sk->sk_rmem_alloc) > 0)
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
++ if (!(shutdown & SEND_SHUTDOWN)) {
+ if (sk_stream_is_writeable(sk)) {
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ } else { /* send SIGIO later */
+@@ -362,7 +373,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
+ }
+ return mask;
+ }
+-
+ EXPORT_SYMBOL_GPL(dccp_poll);
+
+ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+@@ -633,7 +643,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
+ return dccp_getsockopt_service(sk, len,
+ (__be32 __user *)optval, optlen);
+ case DCCP_SOCKOPT_GET_CUR_MPS:
+- val = dp->dccps_mss_cache;
++ val = READ_ONCE(dp->dccps_mss_cache);
+ break;
+ case DCCP_SOCKOPT_AVAILABLE_CCIDS:
+ return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
+@@ -742,16 +752,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+
+ trace_dccp_probe(sk, len);
+
+- if (len > dp->dccps_mss_cache)
++ if (len > READ_ONCE(dp->dccps_mss_cache))
+ return -EMSGSIZE;
+
+ lock_sock(sk);
+
+- if (dccp_qpolicy_full(sk)) {
+- rc = -EAGAIN;
+- goto out_release;
+- }
+-
+ timeo = sock_sndtimeo(sk, noblock);
+
+ /*
+@@ -770,11 +775,22 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ if (skb == NULL)
+ goto out_release;
+
++ if (dccp_qpolicy_full(sk)) {
++ rc = -EAGAIN;
++ goto out_discard;
++ }
++
+ if (sk->sk_state == DCCP_CLOSED) {
+ rc = -ENOTCONN;
+ goto out_discard;
+ }
+
++ /* We need to check dccps_mss_cache after socket is locked. */
++ if (len > dp->dccps_mss_cache) {
++ rc = -EMSGSIZE;
++ goto out_discard;
++ }
++
+ skb_reserve(skb, sk->sk_prot->max_header);
+ rc = memcpy_from_msg(skb_put(skb, len), msg, len);
+ if (rc != 0)
+@@ -1055,7 +1071,7 @@ adjudge_to_death:
+ bh_lock_sock(sk);
+ WARN_ON(sock_owned_by_user(sk));
+
+- percpu_counter_inc(sk->sk_prot->orphan_count);
++ this_cpu_inc(dccp_orphan_count);
+
+ /* Have we already been destroyed by a softirq or backlog? */
+ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
+@@ -1115,13 +1131,10 @@ static int __init dccp_init(void)
+
+ BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
+ sizeof_field(struct sk_buff, cb));
+- rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
+- if (rc)
+- goto out_fail;
+ inet_hashinfo_init(&dccp_hashinfo);
+ rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
+ if (rc)
+- goto out_free_percpu;
++ goto out_fail;
+ rc = -ENOBUFS;
+ dccp_hashinfo.bind_bucket_cachep =
+ kmem_cache_create("dccp_bind_bucket",
+@@ -1226,8 +1239,6 @@ out_free_bind_bucket_cachep:
+ kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+ out_free_hashinfo2:
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
+-out_free_percpu:
+- percpu_counter_destroy(&dccp_orphan_count);
+ out_fail:
+ dccp_hashinfo.bhash = NULL;
+ dccp_hashinfo.ehash = NULL;
+@@ -1250,7 +1261,6 @@ static void __exit dccp_fini(void)
+ dccp_ackvec_exit();
+ dccp_sysctl_exit();
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
+- percpu_counter_destroy(&dccp_orphan_count);
+ }
+
+ module_init(dccp_init);
+diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
+deleted file mode 100644
+index 24336bdb10546..0000000000000
+--- a/net/decnet/Kconfig
++++ /dev/null
+@@ -1,43 +0,0 @@
+-# SPDX-License-Identifier: GPL-2.0-only
+-#
+-# DECnet configuration
+-#
+-config DECNET
+- tristate "DECnet Support"
+- help
+- The DECnet networking protocol was used in many products made by
+- Digital (now Compaq). It provides reliable stream and sequenced
+- packet communications over which run a variety of services similar
+- to those which run over TCP/IP.
+-
+- To find some tools to use with the kernel layer support, please
+- look at Patrick Caulfield's web site:
+- <http://linux-decnet.sourceforge.net/>.
+-
+- More detailed documentation is available in
+- <file:Documentation/networking/decnet.rst>.
+-
+- Be sure to say Y to "/proc file system support" and "Sysctl support"
+- below when using DECnet, since you will need sysctl support to aid
+- in configuration at run time.
+-
+- The DECnet code is also available as a module ( = code which can be
+- inserted in and removed from the running kernel whenever you want).
+- The module is called decnet.
+-
+-config DECNET_ROUTER
+- bool "DECnet: router support"
+- depends on DECNET
+- select FIB_RULES
+- help
+- Add support for turning your DECnet Endnode into a level 1 or 2
+- router. This is an experimental, but functional option. If you
+- do say Y here, then make sure that you also say Y to "Kernel/User
+- network link driver", "Routing messages" and "Network packet
+- filtering". The first two are required to allow configuration via
+- rtnetlink (you will need Alexey Kuznetsov's iproute2 package
+- from <ftp://ftp.tux.org/pub/net/ip-routing/>). The "Network packet
+- filtering" option will be required for the forthcoming routing daemon
+- to work.
+-
+- See <file:Documentation/networking/decnet.rst> for more information.
+diff --git a/net/decnet/Makefile b/net/decnet/Makefile
+deleted file mode 100644
+index 07b38e441b2d0..0000000000000
+--- a/net/decnet/Makefile
++++ /dev/null
+@@ -1,10 +0,0 @@
+-# SPDX-License-Identifier: GPL-2.0
+-
+-obj-$(CONFIG_DECNET) += decnet.o
+-
+-decnet-y := af_decnet.o dn_nsp_in.o dn_nsp_out.o \
+- dn_route.o dn_dev.o dn_neigh.o dn_timer.o
+-decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o
+-decnet-y += sysctl_net_decnet.o
+-
+-obj-$(CONFIG_NETFILTER) += netfilter/
+diff --git a/net/decnet/README b/net/decnet/README
+deleted file mode 100644
+index 60e7ec88c81fd..0000000000000
+--- a/net/decnet/README
++++ /dev/null
+@@ -1,8 +0,0 @@
+- Linux DECnet Project
+- ======================
+-
+-The documentation for this kernel subsystem is available in the
+-Documentation/networking subdirectory of this distribution and also
+-on line at http://www.chygwyn.com/DECnet/
+-
+-Steve Whitehouse <SteveW@ACM.org>
+diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
+deleted file mode 100644
+index dc92a67baea39..0000000000000
+--- a/net/decnet/af_decnet.c
++++ /dev/null
+@@ -1,2400 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Socket Layer Interface
+- *
+- * Authors: Eduardo Marcelo Serrat <emserrat@geocities.com>
+- * Patrick Caulfield <patrick@pandh.demon.co.uk>
+- *
+- * Changes:
+- * Steve Whitehouse: Copied from Eduardo Serrat and Patrick Caulfield's
+- * version of the code. Original copyright preserved
+- * below.
+- * Steve Whitehouse: Some bug fixes, cleaning up some code to make it
+- * compatible with my routing layer.
+- * Steve Whitehouse: Merging changes from Eduardo Serrat and Patrick
+- * Caulfield.
+- * Steve Whitehouse: Further bug fixes, checking module code still works
+- * with new routing layer.
+- * Steve Whitehouse: Additional set/get_sockopt() calls.
+- * Steve Whitehouse: Fixed TIOCINQ ioctl to be same as Eduardo's new
+- * code.
+- * Steve Whitehouse: recvmsg() changed to try and behave in a POSIX like
+- * way. Didn't manage it entirely, but its better.
+- * Steve Whitehouse: ditto for sendmsg().
+- * Steve Whitehouse: A selection of bug fixes to various things.
+- * Steve Whitehouse: Added TIOCOUTQ ioctl.
+- * Steve Whitehouse: Fixes to username2sockaddr & sockaddr2username.
+- * Steve Whitehouse: Fixes to connect() error returns.
+- * Patrick Caulfield: Fixes to delayed acceptance logic.
+- * David S. Miller: New socket locking
+- * Steve Whitehouse: Socket list hashing/locking
+- * Arnaldo C. Melo: use capable, not suser
+- * Steve Whitehouse: Removed unused code. Fix to use sk->allocation
+- * when required.
+- * Patrick Caulfield: /proc/net/decnet now has object name/number
+- * Steve Whitehouse: Fixed local port allocation, hashed sk list
+- * Matthew Wilcox: Fixes for dn_ioctl()
+- * Steve Whitehouse: New connect/accept logic to allow timeouts and
+- * prepare for sendpage etc.
+- */
+-
+-
+-/******************************************************************************
+- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
+-
+-
+-HISTORY:
+-
+-Version Kernel Date Author/Comments
+-------- ------ ---- ---------------
+-Version 0.0.1 2.0.30 01-dic-97 Eduardo Marcelo Serrat
+- (emserrat@geocities.com)
+-
+- First Development of DECnet Socket La-
+- yer for Linux. Only supports outgoing
+- connections.
+-
+-Version 0.0.2 2.1.105 20-jun-98 Patrick J. Caulfield
+- (patrick@pandh.demon.co.uk)
+-
+- Port to new kernel development version.
+-
+-Version 0.0.3 2.1.106 25-jun-98 Eduardo Marcelo Serrat
+- (emserrat@geocities.com)
+- _
+- Added support for incoming connections
+- so we can start developing server apps
+- on Linux.
+- -
+- Module Support
+-Version 0.0.4 2.1.109 21-jul-98 Eduardo Marcelo Serrat
+- (emserrat@geocities.com)
+- _
+- Added support for X11R6.4. Now we can
+- use DECnet transport for X on Linux!!!
+- -
+-Version 0.0.5 2.1.110 01-aug-98 Eduardo Marcelo Serrat
+- (emserrat@geocities.com)
+- Removed bugs on flow control
+- Removed bugs on incoming accessdata
+- order
+- -
+-Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
+- dn_recvmsg fixes
+-
+- Patrick J. Caulfield
+- dn_bind fixes
+-*******************************************************************************/
+-
+-#include <linux/module.h>
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/slab.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/kernel.h>
+-#include <linux/sched/signal.h>
+-#include <linux/timer.h>
+-#include <linux/string.h>
+-#include <linux/sockios.h>
+-#include <linux/net.h>
+-#include <linux/netdevice.h>
+-#include <linux/inet.h>
+-#include <linux/route.h>
+-#include <linux/netfilter.h>
+-#include <linux/seq_file.h>
+-#include <net/sock.h>
+-#include <net/tcp_states.h>
+-#include <net/flow.h>
+-#include <asm/ioctls.h>
+-#include <linux/capability.h>
+-#include <linux/mm.h>
+-#include <linux/interrupt.h>
+-#include <linux/proc_fs.h>
+-#include <linux/stat.h>
+-#include <linux/init.h>
+-#include <linux/poll.h>
+-#include <linux/jiffies.h>
+-#include <net/net_namespace.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/fib_rules.h>
+-#include <net/tcp.h>
+-#include <net/dn.h>
+-#include <net/dn_nsp.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_route.h>
+-#include <net/dn_fib.h>
+-#include <net/dn_neigh.h>
+-
+-struct dn_sock {
+- struct sock sk;
+- struct dn_scp scp;
+-};
+-
+-static void dn_keepalive(struct sock *sk);
+-
+-#define DN_SK_HASH_SHIFT 8
+-#define DN_SK_HASH_SIZE (1 << DN_SK_HASH_SHIFT)
+-#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
+-
+-
+-static const struct proto_ops dn_proto_ops;
+-static DEFINE_RWLOCK(dn_hash_lock);
+-static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
+-static struct hlist_head dn_wild_sk;
+-static atomic_long_t decnet_memory_allocated;
+-
+-static int __dn_setsockopt(struct socket *sock, int level, int optname,
+- sockptr_t optval, unsigned int optlen, int flags);
+-static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
+-
+-static struct hlist_head *dn_find_list(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- if (scp->addr.sdn_flags & SDF_WILD)
+- return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL;
+-
+- return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK];
+-}
+-
+-/*
+- * Valid ports are those greater than zero and not already in use.
+- */
+-static int check_port(__le16 port)
+-{
+- struct sock *sk;
+-
+- if (port == 0)
+- return -1;
+-
+- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
+- struct dn_scp *scp = DN_SK(sk);
+- if (scp->addrloc == port)
+- return -1;
+- }
+- return 0;
+-}
+-
+-static unsigned short port_alloc(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- static unsigned short port = 0x2000;
+- unsigned short i_port = port;
+-
+- while(check_port(cpu_to_le16(++port)) != 0) {
+- if (port == i_port)
+- return 0;
+- }
+-
+- scp->addrloc = cpu_to_le16(port);
+-
+- return 1;
+-}
+-
+-/*
+- * Since this is only ever called from user
+- * level, we don't need a write_lock() version
+- * of this.
+- */
+-static int dn_hash_sock(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct hlist_head *list;
+- int rv = -EUSERS;
+-
+- BUG_ON(sk_hashed(sk));
+-
+- write_lock_bh(&dn_hash_lock);
+-
+- if (!scp->addrloc && !port_alloc(sk))
+- goto out;
+-
+- rv = -EADDRINUSE;
+- if ((list = dn_find_list(sk)) == NULL)
+- goto out;
+-
+- sk_add_node(sk, list);
+- rv = 0;
+-out:
+- write_unlock_bh(&dn_hash_lock);
+- return rv;
+-}
+-
+-static void dn_unhash_sock(struct sock *sk)
+-{
+- write_lock(&dn_hash_lock);
+- sk_del_node_init(sk);
+- write_unlock(&dn_hash_lock);
+-}
+-
+-static void dn_unhash_sock_bh(struct sock *sk)
+-{
+- write_lock_bh(&dn_hash_lock);
+- sk_del_node_init(sk);
+- write_unlock_bh(&dn_hash_lock);
+-}
+-
+-static struct hlist_head *listen_hash(struct sockaddr_dn *addr)
+-{
+- int i;
+- unsigned int hash = addr->sdn_objnum;
+-
+- if (hash == 0) {
+- hash = addr->sdn_objnamel;
+- for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) {
+- hash ^= addr->sdn_objname[i];
+- hash ^= (hash << 3);
+- }
+- }
+-
+- return &dn_sk_hash[hash & DN_SK_HASH_MASK];
+-}
+-
+-/*
+- * Called to transform a socket from bound (i.e. with a local address)
+- * into a listening socket (doesn't need a local port number) and rehashes
+- * based upon the object name/number.
+- */
+-static void dn_rehash_sock(struct sock *sk)
+-{
+- struct hlist_head *list;
+- struct dn_scp *scp = DN_SK(sk);
+-
+- if (scp->addr.sdn_flags & SDF_WILD)
+- return;
+-
+- write_lock_bh(&dn_hash_lock);
+- sk_del_node_init(sk);
+- DN_SK(sk)->addrloc = 0;
+- list = listen_hash(&DN_SK(sk)->addr);
+- sk_add_node(sk, list);
+- write_unlock_bh(&dn_hash_lock);
+-}
+-
+-int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned char type)
+-{
+- int len = 2;
+-
+- *buf++ = type;
+-
+- switch (type) {
+- case 0:
+- *buf++ = sdn->sdn_objnum;
+- break;
+- case 1:
+- *buf++ = 0;
+- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
+- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
+- len = 3 + le16_to_cpu(sdn->sdn_objnamel);
+- break;
+- case 2:
+- memset(buf, 0, 5);
+- buf += 5;
+- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
+- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
+- len = 7 + le16_to_cpu(sdn->sdn_objnamel);
+- break;
+- }
+-
+- return len;
+-}
+-
+-/*
+- * On reception of usernames, we handle types 1 and 0 for destination
+- * addresses only. Types 2 and 4 are used for source addresses, but the
+- * UIC, GIC are ignored and they are both treated the same way. Type 3
+- * is never used as I've no idea what its purpose might be or what its
+- * format is.
+- */
+-int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, unsigned char *fmt)
+-{
+- unsigned char type;
+- int size = len;
+- int namel = 12;
+-
+- sdn->sdn_objnum = 0;
+- sdn->sdn_objnamel = cpu_to_le16(0);
+- memset(sdn->sdn_objname, 0, DN_MAXOBJL);
+-
+- if (len < 2)
+- return -1;
+-
+- len -= 2;
+- *fmt = *data++;
+- type = *data++;
+-
+- switch (*fmt) {
+- case 0:
+- sdn->sdn_objnum = type;
+- return 2;
+- case 1:
+- namel = 16;
+- break;
+- case 2:
+- len -= 4;
+- data += 4;
+- break;
+- case 4:
+- len -= 8;
+- data += 8;
+- break;
+- default:
+- return -1;
+- }
+-
+- len -= 1;
+-
+- if (len < 0)
+- return -1;
+-
+- sdn->sdn_objnamel = cpu_to_le16(*data++);
+- len -= le16_to_cpu(sdn->sdn_objnamel);
+-
+- if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel))
+- return -1;
+-
+- memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel));
+-
+- return size - len;
+-}
+-
+-struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
+-{
+- struct hlist_head *list = listen_hash(addr);
+- struct sock *sk;
+-
+- read_lock(&dn_hash_lock);
+- sk_for_each(sk, list) {
+- struct dn_scp *scp = DN_SK(sk);
+- if (sk->sk_state != TCP_LISTEN)
+- continue;
+- if (scp->addr.sdn_objnum) {
+- if (scp->addr.sdn_objnum != addr->sdn_objnum)
+- continue;
+- } else {
+- if (addr->sdn_objnum)
+- continue;
+- if (scp->addr.sdn_objnamel != addr->sdn_objnamel)
+- continue;
+- if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0)
+- continue;
+- }
+- sock_hold(sk);
+- read_unlock(&dn_hash_lock);
+- return sk;
+- }
+-
+- sk = sk_head(&dn_wild_sk);
+- if (sk) {
+- if (sk->sk_state == TCP_LISTEN)
+- sock_hold(sk);
+- else
+- sk = NULL;
+- }
+-
+- read_unlock(&dn_hash_lock);
+- return sk;
+-}
+-
+-struct sock *dn_find_by_skb(struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct sock *sk;
+- struct dn_scp *scp;
+-
+- read_lock(&dn_hash_lock);
+- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
+- scp = DN_SK(sk);
+- if (cb->src != dn_saddr2dn(&scp->peer))
+- continue;
+- if (cb->dst_port != scp->addrloc)
+- continue;
+- if (scp->addrrem && (cb->src_port != scp->addrrem))
+- continue;
+- sock_hold(sk);
+- goto found;
+- }
+- sk = NULL;
+-found:
+- read_unlock(&dn_hash_lock);
+- return sk;
+-}
+-
+-
+-
+-static void dn_destruct(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- skb_queue_purge(&scp->data_xmit_queue);
+- skb_queue_purge(&scp->other_xmit_queue);
+- skb_queue_purge(&scp->other_receive_queue);
+-
+- dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
+-}
+-
+-static unsigned long dn_memory_pressure;
+-
+-static void dn_enter_memory_pressure(struct sock *sk)
+-{
+- if (!dn_memory_pressure) {
+- dn_memory_pressure = 1;
+- }
+-}
+-
+-static struct proto dn_proto = {
+- .name = "NSP",
+- .owner = THIS_MODULE,
+- .enter_memory_pressure = dn_enter_memory_pressure,
+- .memory_pressure = &dn_memory_pressure,
+- .memory_allocated = &decnet_memory_allocated,
+- .sysctl_mem = sysctl_decnet_mem,
+- .sysctl_wmem = sysctl_decnet_wmem,
+- .sysctl_rmem = sysctl_decnet_rmem,
+- .max_header = DN_MAX_NSP_DATA_HEADER + 64,
+- .obj_size = sizeof(struct dn_sock),
+-};
+-
+-static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern)
+-{
+- struct dn_scp *scp;
+- struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern);
+-
+- if (!sk)
+- goto out;
+-
+- if (sock)
+- sock->ops = &dn_proto_ops;
+- sock_init_data(sock, sk);
+-
+- sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
+- sk->sk_destruct = dn_destruct;
+- sk->sk_no_check_tx = 1;
+- sk->sk_family = PF_DECnet;
+- sk->sk_protocol = 0;
+- sk->sk_allocation = gfp;
+- sk->sk_sndbuf = sysctl_decnet_wmem[1];
+- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
+-
+- /* Initialization of DECnet Session Control Port */
+- scp = DN_SK(sk);
+- scp->state = DN_O; /* Open */
+- scp->numdat = 1; /* Next data seg to tx */
+- scp->numoth = 1; /* Next oth data to tx */
+- scp->ackxmt_dat = 0; /* Last data seg ack'ed */
+- scp->ackxmt_oth = 0; /* Last oth data ack'ed */
+- scp->ackrcv_dat = 0; /* Highest data ack recv*/
+- scp->ackrcv_oth = 0; /* Last oth data ack rec*/
+- scp->flowrem_sw = DN_SEND;
+- scp->flowloc_sw = DN_SEND;
+- scp->flowrem_dat = 0;
+- scp->flowrem_oth = 1;
+- scp->flowloc_dat = 0;
+- scp->flowloc_oth = 1;
+- scp->services_rem = 0;
+- scp->services_loc = 1 | NSP_FC_NONE;
+- scp->info_rem = 0;
+- scp->info_loc = 0x03; /* NSP version 4.1 */
+- scp->segsize_rem = 230 - DN_MAX_NSP_DATA_HEADER; /* Default: Updated by remote segsize */
+- scp->nonagle = 0;
+- scp->multi_ireq = 1;
+- scp->accept_mode = ACC_IMMED;
+- scp->addr.sdn_family = AF_DECnet;
+- scp->peer.sdn_family = AF_DECnet;
+- scp->accessdata.acc_accl = 5;
+- memcpy(scp->accessdata.acc_acc, "LINUX", 5);
+-
+- scp->max_window = NSP_MAX_WINDOW;
+- scp->snd_window = NSP_MIN_WINDOW;
+- scp->nsp_srtt = NSP_INITIAL_SRTT;
+- scp->nsp_rttvar = NSP_INITIAL_RTTVAR;
+- scp->nsp_rxtshift = 0;
+-
+- skb_queue_head_init(&scp->data_xmit_queue);
+- skb_queue_head_init(&scp->other_xmit_queue);
+- skb_queue_head_init(&scp->other_receive_queue);
+-
+- scp->persist = 0;
+- scp->persist_fxn = NULL;
+- scp->keepalive = 10 * HZ;
+- scp->keepalive_fxn = dn_keepalive;
+-
+- dn_start_slow_timer(sk);
+-out:
+- return sk;
+-}
+-
+-/*
+- * Keepalive timer.
+- * FIXME: Should respond to SO_KEEPALIVE etc.
+- */
+-static void dn_keepalive(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- /*
+- * By checking the other_data transmit queue is empty
+- * we are double checking that we are not sending too
+- * many of these keepalive frames.
+- */
+- if (skb_queue_empty(&scp->other_xmit_queue))
+- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
+-}
+-
+-
+-/*
+- * Timer for shutdown/destroyed sockets.
+- * When socket is dead & no packets have been sent for a
+- * certain amount of time, they are removed by this
+- * routine. Also takes care of sending out DI & DC
+- * frames at correct times.
+- */
+-int dn_destroy_timer(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- scp->persist = dn_nsp_persist(sk);
+-
+- switch (scp->state) {
+- case DN_DI:
+- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
+- if (scp->nsp_rxtshift >= decnet_di_count)
+- scp->state = DN_CN;
+- return 0;
+-
+- case DN_DR:
+- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
+- if (scp->nsp_rxtshift >= decnet_dr_count)
+- scp->state = DN_DRC;
+- return 0;
+-
+- case DN_DN:
+- if (scp->nsp_rxtshift < decnet_dn_count) {
+- /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */
+- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
+- GFP_ATOMIC);
+- return 0;
+- }
+- }
+-
+- scp->persist = (HZ * decnet_time_wait);
+-
+- if (sk->sk_socket)
+- return 0;
+-
+- if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) {
+- dn_unhash_sock(sk);
+- sock_put(sk);
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+-static void dn_destroy_sock(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- scp->nsp_rxtshift = 0; /* reset back off */
+-
+- if (sk->sk_socket) {
+- if (sk->sk_socket->state != SS_UNCONNECTED)
+- sk->sk_socket->state = SS_DISCONNECTING;
+- }
+-
+- sk->sk_state = TCP_CLOSE;
+-
+- switch (scp->state) {
+- case DN_DN:
+- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
+- sk->sk_allocation);
+- scp->persist_fxn = dn_destroy_timer;
+- scp->persist = dn_nsp_persist(sk);
+- break;
+- case DN_CR:
+- scp->state = DN_DR;
+- goto disc_reject;
+- case DN_RUN:
+- scp->state = DN_DI;
+- fallthrough;
+- case DN_DI:
+- case DN_DR:
+-disc_reject:
+- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
+- fallthrough;
+- case DN_NC:
+- case DN_NR:
+- case DN_RJ:
+- case DN_DIC:
+- case DN_CN:
+- case DN_DRC:
+- case DN_CI:
+- case DN_CD:
+- scp->persist_fxn = dn_destroy_timer;
+- scp->persist = dn_nsp_persist(sk);
+- break;
+- default:
+- printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
+- fallthrough;
+- case DN_O:
+- dn_stop_slow_timer(sk);
+-
+- dn_unhash_sock_bh(sk);
+- sock_put(sk);
+-
+- break;
+- }
+-}
+-
+-char *dn_addr2asc(__u16 addr, char *buf)
+-{
+- unsigned short node, area;
+-
+- node = addr & 0x03ff;
+- area = addr >> 10;
+- sprintf(buf, "%hd.%hd", area, node);
+-
+- return buf;
+-}
+-
+-
+-
+-static int dn_create(struct net *net, struct socket *sock, int protocol,
+- int kern)
+-{
+- struct sock *sk;
+-
+- if (protocol < 0 || protocol > U8_MAX)
+- return -EINVAL;
+-
+- if (!net_eq(net, &init_net))
+- return -EAFNOSUPPORT;
+-
+- switch (sock->type) {
+- case SOCK_SEQPACKET:
+- if (protocol != DNPROTO_NSP)
+- return -EPROTONOSUPPORT;
+- break;
+- case SOCK_STREAM:
+- break;
+- default:
+- return -ESOCKTNOSUPPORT;
+- }
+-
+-
+- if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL)
+- return -ENOBUFS;
+-
+- sk->sk_protocol = protocol;
+-
+- return 0;
+-}
+-
+-
+-static int
+-dn_release(struct socket *sock)
+-{
+- struct sock *sk = sock->sk;
+-
+- if (sk) {
+- sock_orphan(sk);
+- sock_hold(sk);
+- lock_sock(sk);
+- dn_destroy_sock(sk);
+- release_sock(sk);
+- sock_put(sk);
+- }
+-
+- return 0;
+-}
+-
+-static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
+- struct net_device *dev, *ldev;
+- int rv;
+-
+- if (addr_len != sizeof(struct sockaddr_dn))
+- return -EINVAL;
+-
+- if (saddr->sdn_family != AF_DECnet)
+- return -EINVAL;
+-
+- if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2))
+- return -EINVAL;
+-
+- if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL)
+- return -EINVAL;
+-
+- if (saddr->sdn_flags & ~SDF_WILD)
+- return -EINVAL;
+-
+- if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum ||
+- (saddr->sdn_flags & SDF_WILD)))
+- return -EACCES;
+-
+- if (!(saddr->sdn_flags & SDF_WILD)) {
+- if (le16_to_cpu(saddr->sdn_nodeaddrl)) {
+- rcu_read_lock();
+- ldev = NULL;
+- for_each_netdev_rcu(&init_net, dev) {
+- if (!dev->dn_ptr)
+- continue;
+- if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
+- ldev = dev;
+- break;
+- }
+- }
+- rcu_read_unlock();
+- if (ldev == NULL)
+- return -EADDRNOTAVAIL;
+- }
+- }
+-
+- rv = -EINVAL;
+- lock_sock(sk);
+- if (sock_flag(sk, SOCK_ZAPPED)) {
+- memcpy(&scp->addr, saddr, addr_len);
+- sock_reset_flag(sk, SOCK_ZAPPED);
+-
+- rv = dn_hash_sock(sk);
+- if (rv)
+- sock_set_flag(sk, SOCK_ZAPPED);
+- }
+- release_sock(sk);
+-
+- return rv;
+-}
+-
+-
+-static int dn_auto_bind(struct socket *sock)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- int rv;
+-
+- sock_reset_flag(sk, SOCK_ZAPPED);
+-
+- scp->addr.sdn_flags = 0;
+- scp->addr.sdn_objnum = 0;
+-
+- /*
+- * This stuff is to keep compatibility with Eduardo's
+- * patch. I hope I can dispense with it shortly...
+- */
+- if ((scp->accessdata.acc_accl != 0) &&
+- (scp->accessdata.acc_accl <= 12)) {
+-
+- scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl);
+- memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel));
+-
+- scp->accessdata.acc_accl = 0;
+- memset(scp->accessdata.acc_acc, 0, 40);
+- }
+- /* End of compatibility stuff */
+-
+- scp->addr.sdn_add.a_len = cpu_to_le16(2);
+- rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr);
+- if (rv == 0) {
+- rv = dn_hash_sock(sk);
+- if (rv)
+- sock_set_flag(sk, SOCK_ZAPPED);
+- }
+-
+- return rv;
+-}
+-
+-static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- DEFINE_WAIT_FUNC(wait, woken_wake_function);
+- int err;
+-
+- if (scp->state != DN_CR)
+- return -EINVAL;
+-
+- scp->state = DN_CC;
+- scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
+- dn_send_conn_conf(sk, allocation);
+-
+- add_wait_queue(sk_sleep(sk), &wait);
+- for(;;) {
+- release_sock(sk);
+- if (scp->state == DN_CC)
+- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
+- lock_sock(sk);
+- err = 0;
+- if (scp->state == DN_RUN)
+- break;
+- err = sock_error(sk);
+- if (err)
+- break;
+- err = sock_intr_errno(*timeo);
+- if (signal_pending(current))
+- break;
+- err = -EAGAIN;
+- if (!*timeo)
+- break;
+- }
+- remove_wait_queue(sk_sleep(sk), &wait);
+- if (err == 0) {
+- sk->sk_socket->state = SS_CONNECTED;
+- } else if (scp->state != DN_CC) {
+- sk->sk_socket->state = SS_UNCONNECTED;
+- }
+- return err;
+-}
+-
+-static int dn_wait_run(struct sock *sk, long *timeo)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- DEFINE_WAIT_FUNC(wait, woken_wake_function);
+- int err = 0;
+-
+- if (scp->state == DN_RUN)
+- goto out;
+-
+- if (!*timeo)
+- return -EALREADY;
+-
+- add_wait_queue(sk_sleep(sk), &wait);
+- for(;;) {
+- release_sock(sk);
+- if (scp->state == DN_CI || scp->state == DN_CC)
+- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
+- lock_sock(sk);
+- err = 0;
+- if (scp->state == DN_RUN)
+- break;
+- err = sock_error(sk);
+- if (err)
+- break;
+- err = sock_intr_errno(*timeo);
+- if (signal_pending(current))
+- break;
+- err = -ETIMEDOUT;
+- if (!*timeo)
+- break;
+- }
+- remove_wait_queue(sk_sleep(sk), &wait);
+-out:
+- if (err == 0) {
+- sk->sk_socket->state = SS_CONNECTED;
+- } else if (scp->state != DN_CI && scp->state != DN_CC) {
+- sk->sk_socket->state = SS_UNCONNECTED;
+- }
+- return err;
+-}
+-
+-static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
+-{
+- struct socket *sock = sk->sk_socket;
+- struct dn_scp *scp = DN_SK(sk);
+- int err = -EISCONN;
+- struct flowidn fld;
+- struct dst_entry *dst;
+-
+- if (sock->state == SS_CONNECTED)
+- goto out;
+-
+- if (sock->state == SS_CONNECTING) {
+- err = 0;
+- if (scp->state == DN_RUN) {
+- sock->state = SS_CONNECTED;
+- goto out;
+- }
+- err = -ECONNREFUSED;
+- if (scp->state != DN_CI && scp->state != DN_CC) {
+- sock->state = SS_UNCONNECTED;
+- goto out;
+- }
+- return dn_wait_run(sk, timeo);
+- }
+-
+- err = -EINVAL;
+- if (scp->state != DN_O)
+- goto out;
+-
+- if (addr == NULL || addrlen != sizeof(struct sockaddr_dn))
+- goto out;
+- if (addr->sdn_family != AF_DECnet)
+- goto out;
+- if (addr->sdn_flags & SDF_WILD)
+- goto out;
+-
+- if (sock_flag(sk, SOCK_ZAPPED)) {
+- err = dn_auto_bind(sk->sk_socket);
+- if (err)
+- goto out;
+- }
+-
+- memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn));
+-
+- err = -EHOSTUNREACH;
+- memset(&fld, 0, sizeof(fld));
+- fld.flowidn_oif = sk->sk_bound_dev_if;
+- fld.daddr = dn_saddr2dn(&scp->peer);
+- fld.saddr = dn_saddr2dn(&scp->addr);
+- dn_sk_ports_copy(&fld, scp);
+- fld.flowidn_proto = DNPROTO_NSP;
+- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
+- goto out;
+- dst = __sk_dst_get(sk);
+- sk->sk_route_caps = dst->dev->features;
+- sock->state = SS_CONNECTING;
+- scp->state = DN_CI;
+- scp->segsize_loc = dst_metric_advmss(dst);
+-
+- dn_nsp_send_conninit(sk, NSP_CI);
+- err = -EINPROGRESS;
+- if (*timeo) {
+- err = dn_wait_run(sk, timeo);
+- }
+-out:
+- return err;
+-}
+-
+-static int dn_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags)
+-{
+- struct sockaddr_dn *addr = (struct sockaddr_dn *)uaddr;
+- struct sock *sk = sock->sk;
+- int err;
+- long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+-
+- lock_sock(sk);
+- err = __dn_connect(sk, addr, addrlen, &timeo, 0);
+- release_sock(sk);
+-
+- return err;
+-}
+-
+-static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- switch (scp->state) {
+- case DN_RUN:
+- return 0;
+- case DN_CR:
+- return dn_confirm_accept(sk, timeo, sk->sk_allocation);
+- case DN_CI:
+- case DN_CC:
+- return dn_wait_run(sk, timeo);
+- case DN_O:
+- return __dn_connect(sk, addr, addrlen, timeo, flags);
+- }
+-
+- return -EINVAL;
+-}
+-
+-
+-static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc)
+-{
+- unsigned char *ptr = skb->data;
+-
+- acc->acc_userl = *ptr++;
+- memcpy(&acc->acc_user, ptr, acc->acc_userl);
+- ptr += acc->acc_userl;
+-
+- acc->acc_passl = *ptr++;
+- memcpy(&acc->acc_pass, ptr, acc->acc_passl);
+- ptr += acc->acc_passl;
+-
+- acc->acc_accl = *ptr++;
+- memcpy(&acc->acc_acc, ptr, acc->acc_accl);
+-
+- skb_pull(skb, acc->acc_accl + acc->acc_passl + acc->acc_userl + 3);
+-
+-}
+-
+-static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
+-{
+- unsigned char *ptr = skb->data;
+- u16 len = *ptr++; /* yes, it's 8bit on the wire */
+-
+- BUG_ON(len > 16); /* we've checked the contents earlier */
+- opt->opt_optl = cpu_to_le16(len);
+- opt->opt_status = 0;
+- memcpy(opt->opt_data, ptr, len);
+- skb_pull(skb, len + 1);
+-}
+-
+-static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
+-{
+- DEFINE_WAIT_FUNC(wait, woken_wake_function);
+- struct sk_buff *skb = NULL;
+- int err = 0;
+-
+- add_wait_queue(sk_sleep(sk), &wait);
+- for(;;) {
+- release_sock(sk);
+- skb = skb_dequeue(&sk->sk_receive_queue);
+- if (skb == NULL) {
+- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
+- skb = skb_dequeue(&sk->sk_receive_queue);
+- }
+- lock_sock(sk);
+- if (skb != NULL)
+- break;
+- err = -EINVAL;
+- if (sk->sk_state != TCP_LISTEN)
+- break;
+- err = sock_intr_errno(*timeo);
+- if (signal_pending(current))
+- break;
+- err = -EAGAIN;
+- if (!*timeo)
+- break;
+- }
+- remove_wait_queue(sk_sleep(sk), &wait);
+-
+- return skb == NULL ? ERR_PTR(err) : skb;
+-}
+-
+-static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
+- bool kern)
+-{
+- struct sock *sk = sock->sk, *newsk;
+- struct sk_buff *skb = NULL;
+- struct dn_skb_cb *cb;
+- unsigned char menuver;
+- int err = 0;
+- unsigned char type;
+- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+- struct dst_entry *dst;
+-
+- lock_sock(sk);
+-
+- if (sk->sk_state != TCP_LISTEN || DN_SK(sk)->state != DN_O) {
+- release_sock(sk);
+- return -EINVAL;
+- }
+-
+- skb = skb_dequeue(&sk->sk_receive_queue);
+- if (skb == NULL) {
+- skb = dn_wait_for_connect(sk, &timeo);
+- if (IS_ERR(skb)) {
+- release_sock(sk);
+- return PTR_ERR(skb);
+- }
+- }
+-
+- cb = DN_SKB_CB(skb);
+- sk_acceptq_removed(sk);
+- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
+- if (newsk == NULL) {
+- release_sock(sk);
+- kfree_skb(skb);
+- return -ENOBUFS;
+- }
+- release_sock(sk);
+-
+- dst = skb_dst(skb);
+- sk_dst_set(newsk, dst);
+- skb_dst_set(skb, NULL);
+-
+- DN_SK(newsk)->state = DN_CR;
+- DN_SK(newsk)->addrrem = cb->src_port;
+- DN_SK(newsk)->services_rem = cb->services;
+- DN_SK(newsk)->info_rem = cb->info;
+- DN_SK(newsk)->segsize_rem = cb->segsize;
+- DN_SK(newsk)->accept_mode = DN_SK(sk)->accept_mode;
+-
+- if (DN_SK(newsk)->segsize_rem < 230)
+- DN_SK(newsk)->segsize_rem = 230;
+-
+- if ((DN_SK(newsk)->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
+- DN_SK(newsk)->max_window = decnet_no_fc_max_cwnd;
+-
+- newsk->sk_state = TCP_LISTEN;
+- memcpy(&(DN_SK(newsk)->addr), &(DN_SK(sk)->addr), sizeof(struct sockaddr_dn));
+-
+- /*
+- * If we are listening on a wild socket, we don't want
+- * the newly created socket on the wrong hash queue.
+- */
+- DN_SK(newsk)->addr.sdn_flags &= ~SDF_WILD;
+-
+- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->addr), &type));
+- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->peer), &type));
+- *(__le16 *)(DN_SK(newsk)->peer.sdn_add.a_addr) = cb->src;
+- *(__le16 *)(DN_SK(newsk)->addr.sdn_add.a_addr) = cb->dst;
+-
+- menuver = *skb->data;
+- skb_pull(skb, 1);
+-
+- if (menuver & DN_MENUVER_ACC)
+- dn_access_copy(skb, &(DN_SK(newsk)->accessdata));
+-
+- if (menuver & DN_MENUVER_USR)
+- dn_user_copy(skb, &(DN_SK(newsk)->conndata_in));
+-
+- if (menuver & DN_MENUVER_PRX)
+- DN_SK(newsk)->peer.sdn_flags |= SDF_PROXY;
+-
+- if (menuver & DN_MENUVER_UIC)
+- DN_SK(newsk)->peer.sdn_flags |= SDF_UICPROXY;
+-
+- kfree_skb(skb);
+-
+- memcpy(&(DN_SK(newsk)->conndata_out), &(DN_SK(sk)->conndata_out),
+- sizeof(struct optdata_dn));
+- memcpy(&(DN_SK(newsk)->discdata_out), &(DN_SK(sk)->discdata_out),
+- sizeof(struct optdata_dn));
+-
+- lock_sock(newsk);
+- err = dn_hash_sock(newsk);
+- if (err == 0) {
+- sock_reset_flag(newsk, SOCK_ZAPPED);
+- dn_send_conn_ack(newsk);
+-
+- /*
+- * Here we use sk->sk_allocation since although the conn conf is
+- * for the newsk, the context is the old socket.
+- */
+- if (DN_SK(newsk)->accept_mode == ACC_IMMED)
+- err = dn_confirm_accept(newsk, &timeo,
+- sk->sk_allocation);
+- }
+- release_sock(newsk);
+- return err;
+-}
+-
+-
+-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
+-{
+- struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+-
+- lock_sock(sk);
+-
+- if (peer) {
+- if ((sock->state != SS_CONNECTED &&
+- sock->state != SS_CONNECTING) &&
+- scp->accept_mode == ACC_IMMED) {
+- release_sock(sk);
+- return -ENOTCONN;
+- }
+-
+- memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn));
+- } else {
+- memcpy(sa, &scp->addr, sizeof(struct sockaddr_dn));
+- }
+-
+- release_sock(sk);
+-
+- return sizeof(struct sockaddr_dn);
+-}
+-
+-
+-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- __poll_t mask = datagram_poll(file, sock, wait);
+-
+- if (!skb_queue_empty_lockless(&scp->other_receive_queue))
+- mask |= EPOLLRDBAND;
+-
+- return mask;
+-}
+-
+-static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- int err = -EOPNOTSUPP;
+- long amount = 0;
+- struct sk_buff *skb;
+- int val;
+-
+- switch(cmd)
+- {
+- case SIOCGIFADDR:
+- case SIOCSIFADDR:
+- return dn_dev_ioctl(cmd, (void __user *)arg);
+-
+- case SIOCATMARK:
+- lock_sock(sk);
+- val = !skb_queue_empty(&scp->other_receive_queue);
+- if (scp->state != DN_RUN)
+- val = -ENOTCONN;
+- release_sock(sk);
+- return val;
+-
+- case TIOCOUTQ:
+- amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+- if (amount < 0)
+- amount = 0;
+- err = put_user(amount, (int __user *)arg);
+- break;
+-
+- case TIOCINQ:
+- lock_sock(sk);
+- skb = skb_peek(&scp->other_receive_queue);
+- if (skb) {
+- amount = skb->len;
+- } else {
+- skb_queue_walk(&sk->sk_receive_queue, skb)
+- amount += skb->len;
+- }
+- release_sock(sk);
+- err = put_user(amount, (int __user *)arg);
+- break;
+-
+- default:
+- err = -ENOIOCTLCMD;
+- break;
+- }
+-
+- return err;
+-}
+-
+-static int dn_listen(struct socket *sock, int backlog)
+-{
+- struct sock *sk = sock->sk;
+- int err = -EINVAL;
+-
+- lock_sock(sk);
+-
+- if (sock_flag(sk, SOCK_ZAPPED))
+- goto out;
+-
+- if ((DN_SK(sk)->state != DN_O) || (sk->sk_state == TCP_LISTEN))
+- goto out;
+-
+- sk->sk_max_ack_backlog = backlog;
+- sk->sk_ack_backlog = 0;
+- sk->sk_state = TCP_LISTEN;
+- err = 0;
+- dn_rehash_sock(sk);
+-
+-out:
+- release_sock(sk);
+-
+- return err;
+-}
+-
+-
+-static int dn_shutdown(struct socket *sock, int how)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- int err = -ENOTCONN;
+-
+- lock_sock(sk);
+-
+- if (sock->state == SS_UNCONNECTED)
+- goto out;
+-
+- err = 0;
+- if (sock->state == SS_DISCONNECTING)
+- goto out;
+-
+- err = -EINVAL;
+- if (scp->state == DN_O)
+- goto out;
+-
+- if (how != SHUT_RDWR)
+- goto out;
+-
+- sk->sk_shutdown = SHUTDOWN_MASK;
+- dn_destroy_sock(sk);
+- err = 0;
+-
+-out:
+- release_sock(sk);
+-
+- return err;
+-}
+-
+-static int dn_setsockopt(struct socket *sock, int level, int optname,
+- sockptr_t optval, unsigned int optlen)
+-{
+- struct sock *sk = sock->sk;
+- int err;
+-
+- lock_sock(sk);
+- err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
+- release_sock(sk);
+-#ifdef CONFIG_NETFILTER
+- /* we need to exclude all possible ENOPROTOOPTs except default case */
+- if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
+- optname != DSO_STREAM && optname != DSO_SEQPACKET)
+- err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
+-#endif
+-
+- return err;
+-}
+-
+-static int __dn_setsockopt(struct socket *sock, int level, int optname,
+- sockptr_t optval, unsigned int optlen, int flags)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- long timeo;
+- union {
+- struct optdata_dn opt;
+- struct accessdata_dn acc;
+- int mode;
+- unsigned long win;
+- int val;
+- unsigned char services;
+- unsigned char info;
+- } u;
+- int err;
+-
+- if (optlen && sockptr_is_null(optval))
+- return -EINVAL;
+-
+- if (optlen > sizeof(u))
+- return -EINVAL;
+-
+- if (copy_from_sockptr(&u, optval, optlen))
+- return -EFAULT;
+-
+- switch (optname) {
+- case DSO_CONDATA:
+- if (sock->state == SS_CONNECTED)
+- return -EISCONN;
+- if ((scp->state != DN_O) && (scp->state != DN_CR))
+- return -EINVAL;
+-
+- if (optlen != sizeof(struct optdata_dn))
+- return -EINVAL;
+-
+- if (le16_to_cpu(u.opt.opt_optl) > 16)
+- return -EINVAL;
+-
+- memcpy(&scp->conndata_out, &u.opt, optlen);
+- break;
+-
+- case DSO_DISDATA:
+- if (sock->state != SS_CONNECTED &&
+- scp->accept_mode == ACC_IMMED)
+- return -ENOTCONN;
+-
+- if (optlen != sizeof(struct optdata_dn))
+- return -EINVAL;
+-
+- if (le16_to_cpu(u.opt.opt_optl) > 16)
+- return -EINVAL;
+-
+- memcpy(&scp->discdata_out, &u.opt, optlen);
+- break;
+-
+- case DSO_CONACCESS:
+- if (sock->state == SS_CONNECTED)
+- return -EISCONN;
+- if (scp->state != DN_O)
+- return -EINVAL;
+-
+- if (optlen != sizeof(struct accessdata_dn))
+- return -EINVAL;
+-
+- if ((u.acc.acc_accl > DN_MAXACCL) ||
+- (u.acc.acc_passl > DN_MAXACCL) ||
+- (u.acc.acc_userl > DN_MAXACCL))
+- return -EINVAL;
+-
+- memcpy(&scp->accessdata, &u.acc, optlen);
+- break;
+-
+- case DSO_ACCEPTMODE:
+- if (sock->state == SS_CONNECTED)
+- return -EISCONN;
+- if (scp->state != DN_O)
+- return -EINVAL;
+-
+- if (optlen != sizeof(int))
+- return -EINVAL;
+-
+- if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER))
+- return -EINVAL;
+-
+- scp->accept_mode = (unsigned char)u.mode;
+- break;
+-
+- case DSO_CONACCEPT:
+- if (scp->state != DN_CR)
+- return -EINVAL;
+- timeo = sock_rcvtimeo(sk, 0);
+- err = dn_confirm_accept(sk, &timeo, sk->sk_allocation);
+- return err;
+-
+- case DSO_CONREJECT:
+- if (scp->state != DN_CR)
+- return -EINVAL;
+-
+- scp->state = DN_DR;
+- sk->sk_shutdown = SHUTDOWN_MASK;
+- dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
+- break;
+-
+- case DSO_MAXWINDOW:
+- if (optlen != sizeof(unsigned long))
+- return -EINVAL;
+- if (u.win > NSP_MAX_WINDOW)
+- u.win = NSP_MAX_WINDOW;
+- if (u.win == 0)
+- return -EINVAL;
+- scp->max_window = u.win;
+- if (scp->snd_window > u.win)
+- scp->snd_window = u.win;
+- break;
+-
+- case DSO_NODELAY:
+- if (optlen != sizeof(int))
+- return -EINVAL;
+- if (scp->nonagle == TCP_NAGLE_CORK)
+- return -EINVAL;
+- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
+- /* if (scp->nonagle == 1) { Push pending frames } */
+- break;
+-
+- case DSO_CORK:
+- if (optlen != sizeof(int))
+- return -EINVAL;
+- if (scp->nonagle == TCP_NAGLE_OFF)
+- return -EINVAL;
+- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
+- /* if (scp->nonagle == 0) { Push pending frames } */
+- break;
+-
+- case DSO_SERVICES:
+- if (optlen != sizeof(unsigned char))
+- return -EINVAL;
+- if ((u.services & ~NSP_FC_MASK) != 0x01)
+- return -EINVAL;
+- if ((u.services & NSP_FC_MASK) == NSP_FC_MASK)
+- return -EINVAL;
+- scp->services_loc = u.services;
+- break;
+-
+- case DSO_INFO:
+- if (optlen != sizeof(unsigned char))
+- return -EINVAL;
+- if (u.info & 0xfc)
+- return -EINVAL;
+- scp->info_loc = u.info;
+- break;
+-
+- case DSO_LINKINFO:
+- case DSO_STREAM:
+- case DSO_SEQPACKET:
+- default:
+- return -ENOPROTOOPT;
+- }
+-
+- return 0;
+-}
+-
+-static int dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+-{
+- struct sock *sk = sock->sk;
+- int err;
+-
+- lock_sock(sk);
+- err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
+- release_sock(sk);
+-#ifdef CONFIG_NETFILTER
+- if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
+- optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
+- optname != DSO_CONREJECT) {
+- int len;
+-
+- if (get_user(len, optlen))
+- return -EFAULT;
+-
+- err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
+- if (err >= 0)
+- err = put_user(len, optlen);
+- }
+-#endif
+-
+- return err;
+-}
+-
+-static int __dn_getsockopt(struct socket *sock, int level,int optname, char __user *optval,int __user *optlen, int flags)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- struct linkinfo_dn link;
+- unsigned int r_len;
+- void *r_data = NULL;
+- unsigned int val;
+-
+- if(get_user(r_len , optlen))
+- return -EFAULT;
+-
+- switch (optname) {
+- case DSO_CONDATA:
+- if (r_len > sizeof(struct optdata_dn))
+- r_len = sizeof(struct optdata_dn);
+- r_data = &scp->conndata_in;
+- break;
+-
+- case DSO_DISDATA:
+- if (r_len > sizeof(struct optdata_dn))
+- r_len = sizeof(struct optdata_dn);
+- r_data = &scp->discdata_in;
+- break;
+-
+- case DSO_CONACCESS:
+- if (r_len > sizeof(struct accessdata_dn))
+- r_len = sizeof(struct accessdata_dn);
+- r_data = &scp->accessdata;
+- break;
+-
+- case DSO_ACCEPTMODE:
+- if (r_len > sizeof(unsigned char))
+- r_len = sizeof(unsigned char);
+- r_data = &scp->accept_mode;
+- break;
+-
+- case DSO_LINKINFO:
+- if (r_len > sizeof(struct linkinfo_dn))
+- r_len = sizeof(struct linkinfo_dn);
+-
+- memset(&link, 0, sizeof(link));
+-
+- switch (sock->state) {
+- case SS_CONNECTING:
+- link.idn_linkstate = LL_CONNECTING;
+- break;
+- case SS_DISCONNECTING:
+- link.idn_linkstate = LL_DISCONNECTING;
+- break;
+- case SS_CONNECTED:
+- link.idn_linkstate = LL_RUNNING;
+- break;
+- default:
+- link.idn_linkstate = LL_INACTIVE;
+- }
+-
+- link.idn_segsize = scp->segsize_rem;
+- r_data = &link;
+- break;
+-
+- case DSO_MAXWINDOW:
+- if (r_len > sizeof(unsigned long))
+- r_len = sizeof(unsigned long);
+- r_data = &scp->max_window;
+- break;
+-
+- case DSO_NODELAY:
+- if (r_len > sizeof(int))
+- r_len = sizeof(int);
+- val = (scp->nonagle == TCP_NAGLE_OFF);
+- r_data = &val;
+- break;
+-
+- case DSO_CORK:
+- if (r_len > sizeof(int))
+- r_len = sizeof(int);
+- val = (scp->nonagle == TCP_NAGLE_CORK);
+- r_data = &val;
+- break;
+-
+- case DSO_SERVICES:
+- if (r_len > sizeof(unsigned char))
+- r_len = sizeof(unsigned char);
+- r_data = &scp->services_rem;
+- break;
+-
+- case DSO_INFO:
+- if (r_len > sizeof(unsigned char))
+- r_len = sizeof(unsigned char);
+- r_data = &scp->info_rem;
+- break;
+-
+- case DSO_STREAM:
+- case DSO_SEQPACKET:
+- case DSO_CONACCEPT:
+- case DSO_CONREJECT:
+- default:
+- return -ENOPROTOOPT;
+- }
+-
+- if (r_data) {
+- if (copy_to_user(optval, r_data, r_len))
+- return -EFAULT;
+- if (put_user(r_len, optlen))
+- return -EFAULT;
+- }
+-
+- return 0;
+-}
+-
+-
+-static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int target)
+-{
+- struct sk_buff *skb;
+- int len = 0;
+-
+- if (flags & MSG_OOB)
+- return !skb_queue_empty(q) ? 1 : 0;
+-
+- skb_queue_walk(q, skb) {
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- len += skb->len;
+-
+- if (cb->nsp_flags & 0x40) {
+- /* SOCK_SEQPACKET reads to EOM */
+- if (sk->sk_type == SOCK_SEQPACKET)
+- return 1;
+- /* so does SOCK_STREAM unless WAITALL is specified */
+- if (!(flags & MSG_WAITALL))
+- return 1;
+- }
+-
+- /* minimum data length for read exceeded */
+- if (len >= target)
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+-
+-static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+- int flags)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- struct sk_buff_head *queue = &sk->sk_receive_queue;
+- size_t target = size > 1 ? 1 : 0;
+- size_t copied = 0;
+- int rv = 0;
+- struct sk_buff *skb, *n;
+- struct dn_skb_cb *cb = NULL;
+- unsigned char eor = 0;
+- long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+-
+- lock_sock(sk);
+-
+- if (sock_flag(sk, SOCK_ZAPPED)) {
+- rv = -EADDRNOTAVAIL;
+- goto out;
+- }
+-
+- if (sk->sk_shutdown & RCV_SHUTDOWN) {
+- rv = 0;
+- goto out;
+- }
+-
+- rv = dn_check_state(sk, NULL, 0, &timeo, flags);
+- if (rv)
+- goto out;
+-
+- if (flags & ~(MSG_CMSG_COMPAT|MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) {
+- rv = -EOPNOTSUPP;
+- goto out;
+- }
+-
+- if (flags & MSG_OOB)
+- queue = &scp->other_receive_queue;
+-
+- if (flags & MSG_WAITALL)
+- target = size;
+-
+-
+- /*
+- * See if there is data ready to read, sleep if there isn't
+- */
+- for(;;) {
+- DEFINE_WAIT_FUNC(wait, woken_wake_function);
+-
+- if (sk->sk_err)
+- goto out;
+-
+- if (!skb_queue_empty(&scp->other_receive_queue)) {
+- if (!(flags & MSG_OOB)) {
+- msg->msg_flags |= MSG_OOB;
+- if (!scp->other_report) {
+- scp->other_report = 1;
+- goto out;
+- }
+- }
+- }
+-
+- if (scp->state != DN_RUN)
+- goto out;
+-
+- if (signal_pending(current)) {
+- rv = sock_intr_errno(timeo);
+- goto out;
+- }
+-
+- if (dn_data_ready(sk, queue, flags, target))
+- break;
+-
+- if (flags & MSG_DONTWAIT) {
+- rv = -EWOULDBLOCK;
+- goto out;
+- }
+-
+- add_wait_queue(sk_sleep(sk), &wait);
+- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+- sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait);
+- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+- remove_wait_queue(sk_sleep(sk), &wait);
+- }
+-
+- skb_queue_walk_safe(queue, skb, n) {
+- unsigned int chunk = skb->len;
+- cb = DN_SKB_CB(skb);
+-
+- if ((chunk + copied) > size)
+- chunk = size - copied;
+-
+- if (memcpy_to_msg(msg, skb->data, chunk)) {
+- rv = -EFAULT;
+- break;
+- }
+- copied += chunk;
+-
+- if (!(flags & MSG_PEEK))
+- skb_pull(skb, chunk);
+-
+- eor = cb->nsp_flags & 0x40;
+-
+- if (skb->len == 0) {
+- skb_unlink(skb, queue);
+- kfree_skb(skb);
+- /*
+- * N.B. Don't refer to skb or cb after this point
+- * in loop.
+- */
+- if ((scp->flowloc_sw == DN_DONTSEND) && !dn_congested(sk)) {
+- scp->flowloc_sw = DN_SEND;
+- dn_nsp_send_link(sk, DN_SEND, 0);
+- }
+- }
+-
+- if (eor) {
+- if (sk->sk_type == SOCK_SEQPACKET)
+- break;
+- if (!(flags & MSG_WAITALL))
+- break;
+- }
+-
+- if (flags & MSG_OOB)
+- break;
+-
+- if (copied >= target)
+- break;
+- }
+-
+- rv = copied;
+-
+-
+- if (eor && (sk->sk_type == SOCK_SEQPACKET))
+- msg->msg_flags |= MSG_EOR;
+-
+-out:
+- if (rv == 0)
+- rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk);
+-
+- if ((rv >= 0) && msg->msg_name) {
+- __sockaddr_check_size(sizeof(struct sockaddr_dn));
+- memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn));
+- msg->msg_namelen = sizeof(struct sockaddr_dn);
+- }
+-
+- release_sock(sk);
+-
+- return rv;
+-}
+-
+-
+-static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *queue, int flags)
+-{
+- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
+- if (skb_queue_len(queue) >= scp->snd_window)
+- return 1;
+- if (fctype != NSP_FC_NONE) {
+- if (flags & MSG_OOB) {
+- if (scp->flowrem_oth == 0)
+- return 1;
+- } else {
+- if (scp->flowrem_dat == 0)
+- return 1;
+- }
+- }
+- return 0;
+-}
+-
+-/*
+- * The DECnet spec requires that the "routing layer" accepts packets which
+- * are at least 230 bytes in size. This excludes any headers which the NSP
+- * layer might add, so we always assume that we'll be using the maximal
+- * length header on data packets. The variation in length is due to the
+- * inclusion (or not) of the two 16 bit acknowledgement fields so it doesn't
+- * make much practical difference.
+- */
+-unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu)
+-{
+- unsigned int mss = 230 - DN_MAX_NSP_DATA_HEADER;
+- if (dev) {
+- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
+- mtu -= LL_RESERVED_SPACE(dev);
+- if (dn_db->use_long)
+- mtu -= 21;
+- else
+- mtu -= 6;
+- mtu -= DN_MAX_NSP_DATA_HEADER;
+- } else {
+- /*
+- * 21 = long header, 16 = guess at MAC header length
+- */
+- mtu -= (21 + DN_MAX_NSP_DATA_HEADER + 16);
+- }
+- if (mtu > mss)
+- mss = mtu;
+- return mss;
+-}
+-
+-static inline unsigned int dn_current_mss(struct sock *sk, int flags)
+-{
+- struct dst_entry *dst = __sk_dst_get(sk);
+- struct dn_scp *scp = DN_SK(sk);
+- int mss_now = min_t(int, scp->segsize_loc, scp->segsize_rem);
+-
+- /* Other data messages are limited to 16 bytes per packet */
+- if (flags & MSG_OOB)
+- return 16;
+-
+- /* This works out the maximum size of segment we can send out */
+- if (dst) {
+- u32 mtu = dst_mtu(dst);
+- mss_now = min_t(int, dn_mss_from_pmtu(dst->dev, mtu), mss_now);
+- }
+-
+- return mss_now;
+-}
+-
+-/*
+- * N.B. We get the timeout wrong here, but then we always did get it
+- * wrong before and this is another step along the road to correcting
+- * it. It ought to get updated each time we pass through the routine,
+- * but in practise it probably doesn't matter too much for now.
+- */
+-static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
+- unsigned long datalen, int noblock,
+- int *errcode)
+-{
+- struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
+- noblock, errcode);
+- if (skb) {
+- skb->protocol = htons(ETH_P_DNA_RT);
+- skb->pkt_type = PACKET_OUTGOING;
+- }
+- return skb;
+-}
+-
+-static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+-{
+- struct sock *sk = sock->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- size_t mss;
+- struct sk_buff_head *queue = &scp->data_xmit_queue;
+- int flags = msg->msg_flags;
+- int err = 0;
+- size_t sent = 0;
+- int addr_len = msg->msg_namelen;
+- DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name);
+- struct sk_buff *skb = NULL;
+- struct dn_skb_cb *cb;
+- size_t len;
+- unsigned char fctype;
+- long timeo;
+-
+- if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT))
+- return -EOPNOTSUPP;
+-
+- if (addr_len && (addr_len != sizeof(struct sockaddr_dn)))
+- return -EINVAL;
+-
+- lock_sock(sk);
+- timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+- /*
+- * The only difference between stream sockets and sequenced packet
+- * sockets is that the stream sockets always behave as if MSG_EOR
+- * has been set.
+- */
+- if (sock->type == SOCK_STREAM) {
+- if (flags & MSG_EOR) {
+- err = -EINVAL;
+- goto out;
+- }
+- flags |= MSG_EOR;
+- }
+-
+-
+- err = dn_check_state(sk, addr, addr_len, &timeo, flags);
+- if (err)
+- goto out_err;
+-
+- if (sk->sk_shutdown & SEND_SHUTDOWN) {
+- err = -EPIPE;
+- if (!(flags & MSG_NOSIGNAL))
+- send_sig(SIGPIPE, current, 0);
+- goto out_err;
+- }
+-
+- if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
+- dst_negative_advice(sk);
+-
+- mss = scp->segsize_rem;
+- fctype = scp->services_rem & NSP_FC_MASK;
+-
+- mss = dn_current_mss(sk, flags);
+-
+- if (flags & MSG_OOB) {
+- queue = &scp->other_xmit_queue;
+- if (size > mss) {
+- err = -EMSGSIZE;
+- goto out;
+- }
+- }
+-
+- scp->persist_fxn = dn_nsp_xmit_timeout;
+-
+- while(sent < size) {
+- err = sock_error(sk);
+- if (err)
+- goto out;
+-
+- if (signal_pending(current)) {
+- err = sock_intr_errno(timeo);
+- goto out;
+- }
+-
+- /*
+- * Calculate size that we wish to send.
+- */
+- len = size - sent;
+-
+- if (len > mss)
+- len = mss;
+-
+- /*
+- * Wait for queue size to go down below the window
+- * size.
+- */
+- if (dn_queue_too_long(scp, queue, flags)) {
+- DEFINE_WAIT_FUNC(wait, woken_wake_function);
+-
+- if (flags & MSG_DONTWAIT) {
+- err = -EWOULDBLOCK;
+- goto out;
+- }
+-
+- add_wait_queue(sk_sleep(sk), &wait);
+- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+- sk_wait_event(sk, &timeo,
+- !dn_queue_too_long(scp, queue, flags), &wait);
+- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+- remove_wait_queue(sk_sleep(sk), &wait);
+- continue;
+- }
+-
+- /*
+- * Get a suitably sized skb.
+- * 64 is a bit of a hack really, but its larger than any
+- * link-layer headers and has served us well as a good
+- * guess as to their real length.
+- */
+- skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER,
+- flags & MSG_DONTWAIT, &err);
+-
+- if (err)
+- break;
+-
+- if (!skb)
+- continue;
+-
+- cb = DN_SKB_CB(skb);
+-
+- skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER);
+-
+- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
+- err = -EFAULT;
+- goto out;
+- }
+-
+- if (flags & MSG_OOB) {
+- cb->nsp_flags = 0x30;
+- if (fctype != NSP_FC_NONE)
+- scp->flowrem_oth--;
+- } else {
+- cb->nsp_flags = 0x00;
+- if (scp->seg_total == 0)
+- cb->nsp_flags |= 0x20;
+-
+- scp->seg_total += len;
+-
+- if (((sent + len) == size) && (flags & MSG_EOR)) {
+- cb->nsp_flags |= 0x40;
+- scp->seg_total = 0;
+- if (fctype == NSP_FC_SCMC)
+- scp->flowrem_dat--;
+- }
+- if (fctype == NSP_FC_SRC)
+- scp->flowrem_dat--;
+- }
+-
+- sent += len;
+- dn_nsp_queue_xmit(sk, skb, sk->sk_allocation, flags & MSG_OOB);
+- skb = NULL;
+-
+- scp->persist = dn_nsp_persist(sk);
+-
+- }
+-out:
+-
+- kfree_skb(skb);
+-
+- release_sock(sk);
+-
+- return sent ? sent : err;
+-
+-out_err:
+- err = sk_stream_error(sk, flags, err);
+- release_sock(sk);
+- return err;
+-}
+-
+-static int dn_device_event(struct notifier_block *this, unsigned long event,
+- void *ptr)
+-{
+- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+-
+- if (!net_eq(dev_net(dev), &init_net))
+- return NOTIFY_DONE;
+-
+- switch (event) {
+- case NETDEV_UP:
+- dn_dev_up(dev);
+- break;
+- case NETDEV_DOWN:
+- dn_dev_down(dev);
+- break;
+- default:
+- break;
+- }
+-
+- return NOTIFY_DONE;
+-}
+-
+-static struct notifier_block dn_dev_notifier = {
+- .notifier_call = dn_device_event,
+-};
+-
+-static struct packet_type dn_dix_packet_type __read_mostly = {
+- .type = cpu_to_be16(ETH_P_DNA_RT),
+- .func = dn_route_rcv,
+-};
+-
+-#ifdef CONFIG_PROC_FS
+-struct dn_iter_state {
+- int bucket;
+-};
+-
+-static struct sock *dn_socket_get_first(struct seq_file *seq)
+-{
+- struct dn_iter_state *state = seq->private;
+- struct sock *n = NULL;
+-
+- for(state->bucket = 0;
+- state->bucket < DN_SK_HASH_SIZE;
+- ++state->bucket) {
+- n = sk_head(&dn_sk_hash[state->bucket]);
+- if (n)
+- break;
+- }
+-
+- return n;
+-}
+-
+-static struct sock *dn_socket_get_next(struct seq_file *seq,
+- struct sock *n)
+-{
+- struct dn_iter_state *state = seq->private;
+-
+- n = sk_next(n);
+- while (!n) {
+- if (++state->bucket >= DN_SK_HASH_SIZE)
+- break;
+- n = sk_head(&dn_sk_hash[state->bucket]);
+- }
+- return n;
+-}
+-
+-static struct sock *socket_get_idx(struct seq_file *seq, loff_t *pos)
+-{
+- struct sock *sk = dn_socket_get_first(seq);
+-
+- if (sk) {
+- while(*pos && (sk = dn_socket_get_next(seq, sk)))
+- --*pos;
+- }
+- return *pos ? NULL : sk;
+-}
+-
+-static void *dn_socket_get_idx(struct seq_file *seq, loff_t pos)
+-{
+- void *rc;
+- read_lock_bh(&dn_hash_lock);
+- rc = socket_get_idx(seq, &pos);
+- if (!rc) {
+- read_unlock_bh(&dn_hash_lock);
+- }
+- return rc;
+-}
+-
+-static void *dn_socket_seq_start(struct seq_file *seq, loff_t *pos)
+-{
+- return *pos ? dn_socket_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+-}
+-
+-static void *dn_socket_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+-{
+- void *rc;
+-
+- if (v == SEQ_START_TOKEN) {
+- rc = dn_socket_get_idx(seq, 0);
+- goto out;
+- }
+-
+- rc = dn_socket_get_next(seq, v);
+- if (rc)
+- goto out;
+- read_unlock_bh(&dn_hash_lock);
+-out:
+- ++*pos;
+- return rc;
+-}
+-
+-static void dn_socket_seq_stop(struct seq_file *seq, void *v)
+-{
+- if (v && v != SEQ_START_TOKEN)
+- read_unlock_bh(&dn_hash_lock);
+-}
+-
+-#define IS_NOT_PRINTABLE(x) ((x) < 32 || (x) > 126)
+-
+-static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf)
+-{
+- int i;
+-
+- switch (le16_to_cpu(dn->sdn_objnamel)) {
+- case 0:
+- sprintf(buf, "%d", dn->sdn_objnum);
+- break;
+- default:
+- for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) {
+- buf[i] = dn->sdn_objname[i];
+- if (IS_NOT_PRINTABLE(buf[i]))
+- buf[i] = '.';
+- }
+- buf[i] = 0;
+- }
+-}
+-
+-static char *dn_state2asc(unsigned char state)
+-{
+- switch (state) {
+- case DN_O:
+- return "OPEN";
+- case DN_CR:
+- return " CR";
+- case DN_DR:
+- return " DR";
+- case DN_DRC:
+- return " DRC";
+- case DN_CC:
+- return " CC";
+- case DN_CI:
+- return " CI";
+- case DN_NR:
+- return " NR";
+- case DN_NC:
+- return " NC";
+- case DN_CD:
+- return " CD";
+- case DN_RJ:
+- return " RJ";
+- case DN_RUN:
+- return " RUN";
+- case DN_DI:
+- return " DI";
+- case DN_DIC:
+- return " DIC";
+- case DN_DN:
+- return " DN";
+- case DN_CL:
+- return " CL";
+- case DN_CN:
+- return " CN";
+- }
+-
+- return "????";
+-}
+-
+-static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- char buf1[DN_ASCBUF_LEN];
+- char buf2[DN_ASCBUF_LEN];
+- char local_object[DN_MAXOBJL+3];
+- char remote_object[DN_MAXOBJL+3];
+-
+- dn_printable_object(&scp->addr, local_object);
+- dn_printable_object(&scp->peer, remote_object);
+-
+- seq_printf(seq,
+- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s "
+- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n",
+- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1),
+- scp->addrloc,
+- scp->numdat,
+- scp->numoth,
+- scp->ackxmt_dat,
+- scp->ackxmt_oth,
+- scp->flowloc_sw,
+- local_object,
+- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2),
+- scp->addrrem,
+- scp->numdat_rcv,
+- scp->numoth_rcv,
+- scp->ackrcv_dat,
+- scp->ackrcv_oth,
+- scp->flowrem_sw,
+- remote_object,
+- dn_state2asc(scp->state),
+- ((scp->accept_mode == ACC_IMMED) ? "IMMED" : "DEFER"));
+-}
+-
+-static int dn_socket_seq_show(struct seq_file *seq, void *v)
+-{
+- if (v == SEQ_START_TOKEN) {
+- seq_puts(seq, "Local Remote\n");
+- } else {
+- dn_socket_format_entry(seq, v);
+- }
+- return 0;
+-}
+-
+-static const struct seq_operations dn_socket_seq_ops = {
+- .start = dn_socket_seq_start,
+- .next = dn_socket_seq_next,
+- .stop = dn_socket_seq_stop,
+- .show = dn_socket_seq_show,
+-};
+-#endif
+-
+-static const struct net_proto_family dn_family_ops = {
+- .family = AF_DECnet,
+- .create = dn_create,
+- .owner = THIS_MODULE,
+-};
+-
+-static const struct proto_ops dn_proto_ops = {
+- .family = AF_DECnet,
+- .owner = THIS_MODULE,
+- .release = dn_release,
+- .bind = dn_bind,
+- .connect = dn_connect,
+- .socketpair = sock_no_socketpair,
+- .accept = dn_accept,
+- .getname = dn_getname,
+- .poll = dn_poll,
+- .ioctl = dn_ioctl,
+- .listen = dn_listen,
+- .shutdown = dn_shutdown,
+- .setsockopt = dn_setsockopt,
+- .getsockopt = dn_getsockopt,
+- .sendmsg = dn_sendmsg,
+- .recvmsg = dn_recvmsg,
+- .mmap = sock_no_mmap,
+- .sendpage = sock_no_sendpage,
+-};
+-
+-MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
+-MODULE_AUTHOR("Linux DECnet Project Team");
+-MODULE_LICENSE("GPL");
+-MODULE_ALIAS_NETPROTO(PF_DECnet);
+-
+-static const char banner[] __initconst = KERN_INFO
+-"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
+-
+-static int __init decnet_init(void)
+-{
+- int rc;
+-
+- printk(banner);
+-
+- rc = proto_register(&dn_proto, 1);
+- if (rc != 0)
+- goto out;
+-
+- dn_neigh_init();
+- dn_dev_init();
+- dn_route_init();
+- dn_fib_init();
+-
+- sock_register(&dn_family_ops);
+- dev_add_pack(&dn_dix_packet_type);
+- register_netdevice_notifier(&dn_dev_notifier);
+-
+- proc_create_seq_private("decnet", 0444, init_net.proc_net,
+- &dn_socket_seq_ops, sizeof(struct dn_iter_state),
+- NULL);
+- dn_register_sysctl();
+-out:
+- return rc;
+-
+-}
+-module_init(decnet_init);
+-
+-/*
+- * Prevent DECnet module unloading until its fixed properly.
+- * Requires an audit of the code to check for memory leaks and
+- * initialisation problems etc.
+- */
+-#if 0
+-static void __exit decnet_exit(void)
+-{
+- sock_unregister(AF_DECnet);
+- rtnl_unregister_all(PF_DECnet);
+- dev_remove_pack(&dn_dix_packet_type);
+-
+- dn_unregister_sysctl();
+-
+- unregister_netdevice_notifier(&dn_dev_notifier);
+-
+- dn_route_cleanup();
+- dn_dev_cleanup();
+- dn_neigh_cleanup();
+- dn_fib_cleanup();
+-
+- remove_proc_entry("decnet", init_net.proc_net);
+-
+- proto_unregister(&dn_proto);
+-
+- rcu_barrier(); /* Wait for completion of call_rcu()'s */
+-}
+-module_exit(decnet_exit);
+-#endif
+diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
+deleted file mode 100644
+index 0ee7d4c0c9554..0000000000000
+--- a/net/decnet/dn_dev.c
++++ /dev/null
+@@ -1,1433 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Device Layer
+- *
+- * Authors: Steve Whitehouse <SteveW@ACM.org>
+- * Eduardo Marcelo Serrat <emserrat@geocities.com>
+- *
+- * Changes:
+- * Steve Whitehouse : Devices now see incoming frames so they
+- * can mark on who it came from.
+- * Steve Whitehouse : Fixed bug in creating neighbours. Each neighbour
+- * can now have a device specific setup func.
+- * Steve Whitehouse : Added /proc/sys/net/decnet/conf/<dev>/
+- * Steve Whitehouse : Fixed bug which sometimes killed timer
+- * Steve Whitehouse : Multiple ifaddr support
+- * Steve Whitehouse : SIOCGIFCONF is now a compile time option
+- * Steve Whitehouse : /proc/sys/net/decnet/conf/<sys>/forwarding
+- * Steve Whitehouse : Removed timer1 - it's a user space issue now
+- * Patrick Caulfield : Fixed router hello message format
+- * Steve Whitehouse : Got rid of constant sizes for blksize for
+- * devices. All mtu based now.
+- */
+-
+-#include <linux/capability.h>
+-#include <linux/module.h>
+-#include <linux/moduleparam.h>
+-#include <linux/init.h>
+-#include <linux/net.h>
+-#include <linux/netdevice.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/timer.h>
+-#include <linux/string.h>
+-#include <linux/if_addr.h>
+-#include <linux/if_arp.h>
+-#include <linux/if_ether.h>
+-#include <linux/skbuff.h>
+-#include <linux/sysctl.h>
+-#include <linux/notifier.h>
+-#include <linux/slab.h>
+-#include <linux/jiffies.h>
+-#include <linux/uaccess.h>
+-#include <net/net_namespace.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-#include <net/fib_rules.h>
+-#include <net/netlink.h>
+-#include <net/dn.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_route.h>
+-#include <net/dn_neigh.h>
+-#include <net/dn_fib.h>
+-
+-#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn))
+-
+-static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
+-static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00};
+-static char dn_hiord[ETH_ALEN] = {0xAA,0x00,0x04,0x00,0x00,0x00};
+-static unsigned char dn_eco_version[3] = {0x02,0x00,0x00};
+-
+-extern struct neigh_table dn_neigh_table;
+-
+-/*
+- * decnet_address is kept in network order.
+- */
+-__le16 decnet_address = 0;
+-
+-static DEFINE_SPINLOCK(dndev_lock);
+-static struct net_device *decnet_default_device;
+-static BLOCKING_NOTIFIER_HEAD(dnaddr_chain);
+-
+-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err);
+-static void dn_dev_delete(struct net_device *dev);
+-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa);
+-
+-static int dn_eth_up(struct net_device *);
+-static void dn_eth_down(struct net_device *);
+-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa);
+-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa);
+-
+-static struct dn_dev_parms dn_dev_list[] = {
+-{
+- .type = ARPHRD_ETHER, /* Ethernet */
+- .mode = DN_DEV_BCAST,
+- .state = DN_DEV_S_RU,
+- .t2 = 1,
+- .t3 = 10,
+- .name = "ethernet",
+- .up = dn_eth_up,
+- .down = dn_eth_down,
+- .timer3 = dn_send_brd_hello,
+-},
+-{
+- .type = ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */
+- .mode = DN_DEV_BCAST,
+- .state = DN_DEV_S_RU,
+- .t2 = 1,
+- .t3 = 10,
+- .name = "ipgre",
+- .timer3 = dn_send_brd_hello,
+-},
+-#if 0
+-{
+- .type = ARPHRD_X25, /* Bog standard X.25 */
+- .mode = DN_DEV_UCAST,
+- .state = DN_DEV_S_DS,
+- .t2 = 1,
+- .t3 = 120,
+- .name = "x25",
+- .timer3 = dn_send_ptp_hello,
+-},
+-#endif
+-#if 0
+-{
+- .type = ARPHRD_PPP, /* DECnet over PPP */
+- .mode = DN_DEV_BCAST,
+- .state = DN_DEV_S_RU,
+- .t2 = 1,
+- .t3 = 10,
+- .name = "ppp",
+- .timer3 = dn_send_brd_hello,
+-},
+-#endif
+-{
+- .type = ARPHRD_DDCMP, /* DECnet over DDCMP */
+- .mode = DN_DEV_UCAST,
+- .state = DN_DEV_S_DS,
+- .t2 = 1,
+- .t3 = 120,
+- .name = "ddcmp",
+- .timer3 = dn_send_ptp_hello,
+-},
+-{
+- .type = ARPHRD_LOOPBACK, /* Loopback interface - always last */
+- .mode = DN_DEV_BCAST,
+- .state = DN_DEV_S_RU,
+- .t2 = 1,
+- .t3 = 10,
+- .name = "loopback",
+- .timer3 = dn_send_brd_hello,
+-}
+-};
+-
+-#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list)
+-
+-#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x)
+-
+-#ifdef CONFIG_SYSCTL
+-
+-static int min_t2[] = { 1 };
+-static int max_t2[] = { 60 }; /* No max specified, but this seems sensible */
+-static int min_t3[] = { 1 };
+-static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MULT or T3MULT */
+-
+-static int min_priority[1];
+-static int max_priority[] = { 127 }; /* From DECnet spec */
+-
+-static int dn_forwarding_proc(struct ctl_table *, int, void *, size_t *,
+- loff_t *);
+-static struct dn_dev_sysctl_table {
+- struct ctl_table_header *sysctl_header;
+- struct ctl_table dn_dev_vars[5];
+-} dn_dev_sysctl = {
+- NULL,
+- {
+- {
+- .procname = "forwarding",
+- .data = (void *)DN_DEV_PARMS_OFFSET(forwarding),
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = dn_forwarding_proc,
+- },
+- {
+- .procname = "priority",
+- .data = (void *)DN_DEV_PARMS_OFFSET(priority),
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_priority,
+- .extra2 = &max_priority
+- },
+- {
+- .procname = "t2",
+- .data = (void *)DN_DEV_PARMS_OFFSET(t2),
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_t2,
+- .extra2 = &max_t2
+- },
+- {
+- .procname = "t3",
+- .data = (void *)DN_DEV_PARMS_OFFSET(t3),
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_t3,
+- .extra2 = &max_t3
+- },
+- { }
+- },
+-};
+-
+-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
+-{
+- struct dn_dev_sysctl_table *t;
+- int i;
+-
+- char path[sizeof("net/decnet/conf/") + IFNAMSIZ];
+-
+- t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
+- if (t == NULL)
+- return;
+-
+- for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) {
+- long offset = (long)t->dn_dev_vars[i].data;
+- t->dn_dev_vars[i].data = ((char *)parms) + offset;
+- }
+-
+- snprintf(path, sizeof(path), "net/decnet/conf/%s",
+- dev? dev->name : parms->name);
+-
+- t->dn_dev_vars[0].extra1 = (void *)dev;
+-
+- t->sysctl_header = register_net_sysctl(&init_net, path, t->dn_dev_vars);
+- if (t->sysctl_header == NULL)
+- kfree(t);
+- else
+- parms->sysctl = t;
+-}
+-
+-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
+-{
+- if (parms->sysctl) {
+- struct dn_dev_sysctl_table *t = parms->sysctl;
+- parms->sysctl = NULL;
+- unregister_net_sysctl_table(t->sysctl_header);
+- kfree(t);
+- }
+-}
+-
+-static int dn_forwarding_proc(struct ctl_table *table, int write,
+- void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-#ifdef CONFIG_DECNET_ROUTER
+- struct net_device *dev = table->extra1;
+- struct dn_dev *dn_db;
+- int err;
+- int tmp, old;
+-
+- if (table->extra1 == NULL)
+- return -EINVAL;
+-
+- dn_db = rcu_dereference_raw(dev->dn_ptr);
+- old = dn_db->parms.forwarding;
+-
+- err = proc_dointvec(table, write, buffer, lenp, ppos);
+-
+- if ((err >= 0) && write) {
+- if (dn_db->parms.forwarding < 0)
+- dn_db->parms.forwarding = 0;
+- if (dn_db->parms.forwarding > 2)
+- dn_db->parms.forwarding = 2;
+- /*
+- * What an ugly hack this is... its works, just. It
+- * would be nice if sysctl/proc were just that little
+- * bit more flexible so I don't have to write a special
+- * routine, or suffer hacks like this - SJW
+- */
+- tmp = dn_db->parms.forwarding;
+- dn_db->parms.forwarding = old;
+- if (dn_db->parms.down)
+- dn_db->parms.down(dev);
+- dn_db->parms.forwarding = tmp;
+- if (dn_db->parms.up)
+- dn_db->parms.up(dev);
+- }
+-
+- return err;
+-#else
+- return -EINVAL;
+-#endif
+-}
+-
+-#else /* CONFIG_SYSCTL */
+-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
+-{
+-}
+-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
+-{
+-}
+-
+-#endif /* CONFIG_SYSCTL */
+-
+-static inline __u16 mtu2blksize(struct net_device *dev)
+-{
+- u32 blksize = dev->mtu;
+- if (blksize > 0xffff)
+- blksize = 0xffff;
+-
+- if (dev->type == ARPHRD_ETHER ||
+- dev->type == ARPHRD_PPP ||
+- dev->type == ARPHRD_IPGRE ||
+- dev->type == ARPHRD_LOOPBACK)
+- blksize -= 2;
+-
+- return (__u16)blksize;
+-}
+-
+-static struct dn_ifaddr *dn_dev_alloc_ifa(void)
+-{
+- struct dn_ifaddr *ifa;
+-
+- ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
+-
+- return ifa;
+-}
+-
+-static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
+-{
+- kfree_rcu(ifa, rcu);
+-}
+-
+-static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
+-{
+- struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
+- unsigned char mac_addr[6];
+- struct net_device *dev = dn_db->dev;
+-
+- ASSERT_RTNL();
+-
+- *ifap = ifa1->ifa_next;
+-
+- if (dn_db->dev->type == ARPHRD_ETHER) {
+- if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
+- dn_dn2eth(mac_addr, ifa1->ifa_local);
+- dev_mc_del(dev, mac_addr);
+- }
+- }
+-
+- dn_ifaddr_notify(RTM_DELADDR, ifa1);
+- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
+- if (destroy) {
+- dn_dev_free_ifa(ifa1);
+-
+- if (dn_db->ifa_list == NULL)
+- dn_dev_delete(dn_db->dev);
+- }
+-}
+-
+-static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
+-{
+- struct net_device *dev = dn_db->dev;
+- struct dn_ifaddr *ifa1;
+- unsigned char mac_addr[6];
+-
+- ASSERT_RTNL();
+-
+- /* Check for duplicates */
+- for (ifa1 = rtnl_dereference(dn_db->ifa_list);
+- ifa1 != NULL;
+- ifa1 = rtnl_dereference(ifa1->ifa_next)) {
+- if (ifa1->ifa_local == ifa->ifa_local)
+- return -EEXIST;
+- }
+-
+- if (dev->type == ARPHRD_ETHER) {
+- if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
+- dn_dn2eth(mac_addr, ifa->ifa_local);
+- dev_mc_add(dev, mac_addr);
+- }
+- }
+-
+- ifa->ifa_next = dn_db->ifa_list;
+- rcu_assign_pointer(dn_db->ifa_list, ifa);
+-
+- dn_ifaddr_notify(RTM_NEWADDR, ifa);
+- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
+-
+- return 0;
+-}
+-
+-static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
+-{
+- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
+- int rv;
+-
+- if (dn_db == NULL) {
+- int err;
+- dn_db = dn_dev_create(dev, &err);
+- if (dn_db == NULL)
+- return err;
+- }
+-
+- ifa->ifa_dev = dn_db;
+-
+- if (dev->flags & IFF_LOOPBACK)
+- ifa->ifa_scope = RT_SCOPE_HOST;
+-
+- rv = dn_dev_insert_ifa(dn_db, ifa);
+- if (rv)
+- dn_dev_free_ifa(ifa);
+- return rv;
+-}
+-
+-
+-int dn_dev_ioctl(unsigned int cmd, void __user *arg)
+-{
+- char buffer[DN_IFREQ_SIZE];
+- struct ifreq *ifr = (struct ifreq *)buffer;
+- struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
+- struct dn_dev *dn_db;
+- struct net_device *dev;
+- struct dn_ifaddr *ifa = NULL;
+- struct dn_ifaddr __rcu **ifap = NULL;
+- int ret = 0;
+-
+- if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
+- return -EFAULT;
+- ifr->ifr_name[IFNAMSIZ-1] = 0;
+-
+- dev_load(&init_net, ifr->ifr_name);
+-
+- switch (cmd) {
+- case SIOCGIFADDR:
+- break;
+- case SIOCSIFADDR:
+- if (!capable(CAP_NET_ADMIN))
+- return -EACCES;
+- if (sdn->sdn_family != AF_DECnet)
+- return -EINVAL;
+- break;
+- default:
+- return -EINVAL;
+- }
+-
+- rtnl_lock();
+-
+- if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) {
+- ret = -ENODEV;
+- goto done;
+- }
+-
+- if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
+- for (ifap = &dn_db->ifa_list;
+- (ifa = rtnl_dereference(*ifap)) != NULL;
+- ifap = &ifa->ifa_next)
+- if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
+- break;
+- }
+-
+- if (ifa == NULL && cmd != SIOCSIFADDR) {
+- ret = -EADDRNOTAVAIL;
+- goto done;
+- }
+-
+- switch (cmd) {
+- case SIOCGIFADDR:
+- *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local;
+- if (copy_to_user(arg, ifr, DN_IFREQ_SIZE))
+- ret = -EFAULT;
+- break;
+-
+- case SIOCSIFADDR:
+- if (!ifa) {
+- if ((ifa = dn_dev_alloc_ifa()) == NULL) {
+- ret = -ENOBUFS;
+- break;
+- }
+- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+- } else {
+- if (ifa->ifa_local == dn_saddr2dn(sdn))
+- break;
+- dn_dev_del_ifa(dn_db, ifap, 0);
+- }
+-
+- ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn);
+-
+- ret = dn_dev_set_ifa(dev, ifa);
+- }
+-done:
+- rtnl_unlock();
+-
+- return ret;
+-}
+-
+-struct net_device *dn_dev_get_default(void)
+-{
+- struct net_device *dev;
+-
+- spin_lock(&dndev_lock);
+- dev = decnet_default_device;
+- if (dev) {
+- if (dev->dn_ptr)
+- dev_hold(dev);
+- else
+- dev = NULL;
+- }
+- spin_unlock(&dndev_lock);
+-
+- return dev;
+-}
+-
+-int dn_dev_set_default(struct net_device *dev, int force)
+-{
+- struct net_device *old = NULL;
+- int rv = -EBUSY;
+- if (!dev->dn_ptr)
+- return -ENODEV;
+-
+- spin_lock(&dndev_lock);
+- if (force || decnet_default_device == NULL) {
+- old = decnet_default_device;
+- decnet_default_device = dev;
+- rv = 0;
+- }
+- spin_unlock(&dndev_lock);
+-
+- dev_put(old);
+- return rv;
+-}
+-
+-static void dn_dev_check_default(struct net_device *dev)
+-{
+- spin_lock(&dndev_lock);
+- if (dev == decnet_default_device) {
+- decnet_default_device = NULL;
+- } else {
+- dev = NULL;
+- }
+- spin_unlock(&dndev_lock);
+-
+- dev_put(dev);
+-}
+-
+-/*
+- * Called with RTNL
+- */
+-static struct dn_dev *dn_dev_by_index(int ifindex)
+-{
+- struct net_device *dev;
+- struct dn_dev *dn_dev = NULL;
+-
+- dev = __dev_get_by_index(&init_net, ifindex);
+- if (dev)
+- dn_dev = rtnl_dereference(dev->dn_ptr);
+-
+- return dn_dev;
+-}
+-
+-static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
+- [IFA_ADDRESS] = { .type = NLA_U16 },
+- [IFA_LOCAL] = { .type = NLA_U16 },
+- [IFA_LABEL] = { .type = NLA_STRING,
+- .len = IFNAMSIZ - 1 },
+- [IFA_FLAGS] = { .type = NLA_U32 },
+-};
+-
+-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+- struct netlink_ext_ack *extack)
+-{
+- struct net *net = sock_net(skb->sk);
+- struct nlattr *tb[IFA_MAX+1];
+- struct dn_dev *dn_db;
+- struct ifaddrmsg *ifm;
+- struct dn_ifaddr *ifa;
+- struct dn_ifaddr __rcu **ifap;
+- int err = -EINVAL;
+-
+- if (!netlink_capable(skb, CAP_NET_ADMIN))
+- return -EPERM;
+-
+- if (!net_eq(net, &init_net))
+- goto errout;
+-
+- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+- dn_ifa_policy, extack);
+- if (err < 0)
+- goto errout;
+-
+- err = -ENODEV;
+- ifm = nlmsg_data(nlh);
+- if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL)
+- goto errout;
+-
+- err = -EADDRNOTAVAIL;
+- for (ifap = &dn_db->ifa_list;
+- (ifa = rtnl_dereference(*ifap)) != NULL;
+- ifap = &ifa->ifa_next) {
+- if (tb[IFA_LOCAL] &&
+- nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
+- continue;
+-
+- if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
+- continue;
+-
+- dn_dev_del_ifa(dn_db, ifap, 1);
+- return 0;
+- }
+-
+-errout:
+- return err;
+-}
+-
+-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+- struct netlink_ext_ack *extack)
+-{
+- struct net *net = sock_net(skb->sk);
+- struct nlattr *tb[IFA_MAX+1];
+- struct net_device *dev;
+- struct dn_dev *dn_db;
+- struct ifaddrmsg *ifm;
+- struct dn_ifaddr *ifa;
+- int err;
+-
+- if (!netlink_capable(skb, CAP_NET_ADMIN))
+- return -EPERM;
+-
+- if (!net_eq(net, &init_net))
+- return -EINVAL;
+-
+- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+- dn_ifa_policy, extack);
+- if (err < 0)
+- return err;
+-
+- if (tb[IFA_LOCAL] == NULL)
+- return -EINVAL;
+-
+- ifm = nlmsg_data(nlh);
+- if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
+- return -ENODEV;
+-
+- if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
+- dn_db = dn_dev_create(dev, &err);
+- if (!dn_db)
+- return err;
+- }
+-
+- if ((ifa = dn_dev_alloc_ifa()) == NULL)
+- return -ENOBUFS;
+-
+- if (tb[IFA_ADDRESS] == NULL)
+- tb[IFA_ADDRESS] = tb[IFA_LOCAL];
+-
+- ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]);
+- ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]);
+- ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
+- ifm->ifa_flags;
+- ifa->ifa_scope = ifm->ifa_scope;
+- ifa->ifa_dev = dn_db;
+-
+- if (tb[IFA_LABEL])
+- nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
+- else
+- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+-
+- err = dn_dev_insert_ifa(dn_db, ifa);
+- if (err)
+- dn_dev_free_ifa(ifa);
+-
+- return err;
+-}
+-
+-static inline size_t dn_ifaddr_nlmsg_size(void)
+-{
+- return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+- + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+- + nla_total_size(2) /* IFA_ADDRESS */
+- + nla_total_size(2) /* IFA_LOCAL */
+- + nla_total_size(4); /* IFA_FLAGS */
+-}
+-
+-static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
+- u32 portid, u32 seq, int event, unsigned int flags)
+-{
+- struct ifaddrmsg *ifm;
+- struct nlmsghdr *nlh;
+- u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT;
+-
+- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
+- if (nlh == NULL)
+- return -EMSGSIZE;
+-
+- ifm = nlmsg_data(nlh);
+- ifm->ifa_family = AF_DECnet;
+- ifm->ifa_prefixlen = 16;
+- ifm->ifa_flags = ifa_flags;
+- ifm->ifa_scope = ifa->ifa_scope;
+- ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+-
+- if ((ifa->ifa_address &&
+- nla_put_le16(skb, IFA_ADDRESS, ifa->ifa_address)) ||
+- (ifa->ifa_local &&
+- nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) ||
+- (ifa->ifa_label[0] &&
+- nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+- nla_put_u32(skb, IFA_FLAGS, ifa_flags))
+- goto nla_put_failure;
+- nlmsg_end(skb, nlh);
+- return 0;
+-
+-nla_put_failure:
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+-}
+-
+-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
+-{
+- struct sk_buff *skb;
+- int err = -ENOBUFS;
+-
+- skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL);
+- if (skb == NULL)
+- goto errout;
+-
+- err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+- if (err < 0) {
+- /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */
+- WARN_ON(err == -EMSGSIZE);
+- kfree_skb(skb);
+- goto errout;
+- }
+- rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+- return;
+-errout:
+- if (err < 0)
+- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
+-}
+-
+-static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+-{
+- struct net *net = sock_net(skb->sk);
+- int idx, dn_idx = 0, skip_ndevs, skip_naddr;
+- struct net_device *dev;
+- struct dn_dev *dn_db;
+- struct dn_ifaddr *ifa;
+-
+- if (!net_eq(net, &init_net))
+- return 0;
+-
+- skip_ndevs = cb->args[0];
+- skip_naddr = cb->args[1];
+-
+- idx = 0;
+- rcu_read_lock();
+- for_each_netdev_rcu(&init_net, dev) {
+- if (idx < skip_ndevs)
+- goto cont;
+- else if (idx > skip_ndevs) {
+- /* Only skip over addresses for first dev dumped
+- * in this iteration (idx == skip_ndevs) */
+- skip_naddr = 0;
+- }
+-
+- if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
+- goto cont;
+-
+- for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
+- ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
+- if (dn_idx < skip_naddr)
+- continue;
+-
+- if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid,
+- cb->nlh->nlmsg_seq, RTM_NEWADDR,
+- NLM_F_MULTI) < 0)
+- goto done;
+- }
+-cont:
+- idx++;
+- }
+-done:
+- rcu_read_unlock();
+- cb->args[0] = idx;
+- cb->args[1] = dn_idx;
+-
+- return skb->len;
+-}
+-
+-static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
+-{
+- struct dn_dev *dn_db;
+- struct dn_ifaddr *ifa;
+- int rv = -ENODEV;
+-
+- rcu_read_lock();
+- dn_db = rcu_dereference(dev->dn_ptr);
+- if (dn_db == NULL)
+- goto out;
+-
+- ifa = rcu_dereference(dn_db->ifa_list);
+- if (ifa != NULL) {
+- *addr = ifa->ifa_local;
+- rv = 0;
+- }
+-out:
+- rcu_read_unlock();
+- return rv;
+-}
+-
+-/*
+- * Find a default address to bind to.
+- *
+- * This is one of those areas where the initial VMS concepts don't really
+- * map onto the Linux concepts, and since we introduced multiple addresses
+- * per interface we have to cope with slightly odd ways of finding out what
+- * "our address" really is. Mostly it's not a problem; for this we just guess
+- * a sensible default. Eventually the routing code will take care of all the
+- * nasties for us I hope.
+- */
+-int dn_dev_bind_default(__le16 *addr)
+-{
+- struct net_device *dev;
+- int rv;
+- dev = dn_dev_get_default();
+-last_chance:
+- if (dev) {
+- rv = dn_dev_get_first(dev, addr);
+- dev_put(dev);
+- if (rv == 0 || dev == init_net.loopback_dev)
+- return rv;
+- }
+- dev = init_net.loopback_dev;
+- dev_hold(dev);
+- goto last_chance;
+-}
+-
+-static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+-{
+- struct endnode_hello_message *msg;
+- struct sk_buff *skb = NULL;
+- __le16 *pktlen;
+- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
+-
+- if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
+- return;
+-
+- skb->dev = dev;
+-
+- msg = skb_put(skb, sizeof(*msg));
+-
+- msg->msgflg = 0x0D;
+- memcpy(msg->tiver, dn_eco_version, 3);
+- dn_dn2eth(msg->id, ifa->ifa_local);
+- msg->iinfo = DN_RT_INFO_ENDN;
+- msg->blksize = cpu_to_le16(mtu2blksize(dev));
+- msg->area = 0x00;
+- memset(msg->seed, 0, 8);
+- memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
+-
+- if (dn_db->router) {
+- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+- dn_dn2eth(msg->neighbor, dn->addr);
+- }
+-
+- msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3);
+- msg->mpd = 0x00;
+- msg->datalen = 0x02;
+- memset(msg->data, 0xAA, 2);
+-
+- pktlen = skb_push(skb, 2);
+- *pktlen = cpu_to_le16(skb->len - 2);
+-
+- skb_reset_network_header(skb);
+-
+- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
+-}
+-
+-
+-#define DRDELAY (5 * HZ)
+-
+-static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa)
+-{
+- /* First check time since device went up */
+- if (time_before(jiffies, dn_db->uptime + DRDELAY))
+- return 0;
+-
+- /* If there is no router, then yes... */
+- if (!dn_db->router)
+- return 1;
+-
+- /* otherwise only if we have a higher priority or.. */
+- if (dn->priority < dn_db->parms.priority)
+- return 1;
+-
+- /* if we have equal priority and a higher node number */
+- if (dn->priority != dn_db->parms.priority)
+- return 0;
+-
+- if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local))
+- return 1;
+-
+- return 0;
+-}
+-
+-static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+-{
+- int n;
+- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
+- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+- struct sk_buff *skb;
+- size_t size;
+- unsigned char *ptr;
+- unsigned char *i1, *i2;
+- __le16 *pktlen;
+- char *src;
+-
+- if (mtu2blksize(dev) < (26 + 7))
+- return;
+-
+- n = mtu2blksize(dev) - 26;
+- n /= 7;
+-
+- if (n > 32)
+- n = 32;
+-
+- size = 2 + 26 + 7 * n;
+-
+- if ((skb = dn_alloc_skb(NULL, size, GFP_ATOMIC)) == NULL)
+- return;
+-
+- skb->dev = dev;
+- ptr = skb_put(skb, size);
+-
+- *ptr++ = DN_RT_PKT_CNTL | DN_RT_PKT_ERTH;
+- *ptr++ = 2; /* ECO */
+- *ptr++ = 0;
+- *ptr++ = 0;
+- dn_dn2eth(ptr, ifa->ifa_local);
+- src = ptr;
+- ptr += ETH_ALEN;
+- *ptr++ = dn_db->parms.forwarding == 1 ?
+- DN_RT_INFO_L1RT : DN_RT_INFO_L2RT;
+- *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev));
+- ptr += 2;
+- *ptr++ = dn_db->parms.priority; /* Priority */
+- *ptr++ = 0; /* Area: Reserved */
+- *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3);
+- ptr += 2;
+- *ptr++ = 0; /* MPD: Reserved */
+- i1 = ptr++;
+- memset(ptr, 0, 7); /* Name: Reserved */
+- ptr += 7;
+- i2 = ptr++;
+-
+- n = dn_neigh_elist(dev, ptr, n);
+-
+- *i2 = 7 * n;
+- *i1 = 8 + *i2;
+-
+- skb_trim(skb, (27 + *i2));
+-
+- pktlen = skb_push(skb, 2);
+- *pktlen = cpu_to_le16(skb->len - 2);
+-
+- skb_reset_network_header(skb);
+-
+- if (dn_am_i_a_router(dn, dn_db, ifa)) {
+- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+- if (skb2) {
+- dn_rt_finish_output(skb2, dn_rt_all_end_mcast, src);
+- }
+- }
+-
+- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
+-}
+-
+-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+-{
+- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
+-
+- if (dn_db->parms.forwarding == 0)
+- dn_send_endnode_hello(dev, ifa);
+- else
+- dn_send_router_hello(dev, ifa);
+-}
+-
+-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
+-{
+- int tdlen = 16;
+- int size = dev->hard_header_len + 2 + 4 + tdlen;
+- struct sk_buff *skb = dn_alloc_skb(NULL, size, GFP_ATOMIC);
+- int i;
+- unsigned char *ptr;
+- char src[ETH_ALEN];
+-
+- if (skb == NULL)
+- return ;
+-
+- skb->dev = dev;
+- skb_push(skb, dev->hard_header_len);
+- ptr = skb_put(skb, 2 + 4 + tdlen);
+-
+- *ptr++ = DN_RT_PKT_HELO;
+- *((__le16 *)ptr) = ifa->ifa_local;
+- ptr += 2;
+- *ptr++ = tdlen;
+-
+- for(i = 0; i < tdlen; i++)
+- *ptr++ = 0252;
+-
+- dn_dn2eth(src, ifa->ifa_local);
+- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
+-}
+-
+-static int dn_eth_up(struct net_device *dev)
+-{
+- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
+-
+- if (dn_db->parms.forwarding == 0)
+- dev_mc_add(dev, dn_rt_all_end_mcast);
+- else
+- dev_mc_add(dev, dn_rt_all_rt_mcast);
+-
+- dn_db->use_long = 1;
+-
+- return 0;
+-}
+-
+-static void dn_eth_down(struct net_device *dev)
+-{
+- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
+-
+- if (dn_db->parms.forwarding == 0)
+- dev_mc_del(dev, dn_rt_all_end_mcast);
+- else
+- dev_mc_del(dev, dn_rt_all_rt_mcast);
+-}
+-
+-static void dn_dev_set_timer(struct net_device *dev);
+-
+-static void dn_dev_timer_func(struct timer_list *t)
+-{
+- struct dn_dev *dn_db = from_timer(dn_db, t, timer);
+- struct net_device *dev;
+- struct dn_ifaddr *ifa;
+-
+- rcu_read_lock();
+- dev = dn_db->dev;
+- if (dn_db->t3 <= dn_db->parms.t2) {
+- if (dn_db->parms.timer3) {
+- for (ifa = rcu_dereference(dn_db->ifa_list);
+- ifa;
+- ifa = rcu_dereference(ifa->ifa_next)) {
+- if (!(ifa->ifa_flags & IFA_F_SECONDARY))
+- dn_db->parms.timer3(dev, ifa);
+- }
+- }
+- dn_db->t3 = dn_db->parms.t3;
+- } else {
+- dn_db->t3 -= dn_db->parms.t2;
+- }
+- rcu_read_unlock();
+- dn_dev_set_timer(dev);
+-}
+-
+-static void dn_dev_set_timer(struct net_device *dev)
+-{
+- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
+-
+- if (dn_db->parms.t2 > dn_db->parms.t3)
+- dn_db->parms.t2 = dn_db->parms.t3;
+-
+- dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
+-
+- add_timer(&dn_db->timer);
+-}
+-
+-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
+-{
+- int i;
+- struct dn_dev_parms *p = dn_dev_list;
+- struct dn_dev *dn_db;
+-
+- for(i = 0; i < DN_DEV_LIST_SIZE; i++, p++) {
+- if (p->type == dev->type)
+- break;
+- }
+-
+- *err = -ENODEV;
+- if (i == DN_DEV_LIST_SIZE)
+- return NULL;
+-
+- *err = -ENOBUFS;
+- if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
+- return NULL;
+-
+- memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
+-
+- rcu_assign_pointer(dev->dn_ptr, dn_db);
+- dn_db->dev = dev;
+- timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
+-
+- dn_db->uptime = jiffies;
+-
+- dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
+- if (!dn_db->neigh_parms) {
+- RCU_INIT_POINTER(dev->dn_ptr, NULL);
+- kfree(dn_db);
+- return NULL;
+- }
+-
+- if (dn_db->parms.up) {
+- if (dn_db->parms.up(dev) < 0) {
+- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
+- dev->dn_ptr = NULL;
+- kfree(dn_db);
+- return NULL;
+- }
+- }
+-
+- dn_dev_sysctl_register(dev, &dn_db->parms);
+-
+- dn_dev_set_timer(dev);
+-
+- *err = 0;
+- return dn_db;
+-}
+-
+-
+-/*
+- * This processes a device up event. We only start up
+- * the loopback device & ethernet devices with correct
+- * MAC addresses automatically. Others must be started
+- * specifically.
+- *
+- * FIXME: How should we configure the loopback address ? If we could dispense
+- * with using decnet_address here and for autobind, it will be one less thing
+- * for users to worry about setting up.
+- */
+-
+-void dn_dev_up(struct net_device *dev)
+-{
+- struct dn_ifaddr *ifa;
+- __le16 addr = decnet_address;
+- int maybe_default = 0;
+- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
+-
+- if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
+- return;
+-
+- /*
+- * Need to ensure that loopback device has a dn_db attached to it
+- * to allow creation of neighbours against it, even though it might
+- * not have a local address of its own. Might as well do the same for
+- * all autoconfigured interfaces.
+- */
+- if (dn_db == NULL) {
+- int err;
+- dn_db = dn_dev_create(dev, &err);
+- if (dn_db == NULL)
+- return;
+- }
+-
+- if (dev->type == ARPHRD_ETHER) {
+- if (memcmp(dev->dev_addr, dn_hiord, 4) != 0)
+- return;
+- addr = dn_eth2dn(dev->dev_addr);
+- maybe_default = 1;
+- }
+-
+- if (addr == 0)
+- return;
+-
+- if ((ifa = dn_dev_alloc_ifa()) == NULL)
+- return;
+-
+- ifa->ifa_local = ifa->ifa_address = addr;
+- ifa->ifa_flags = 0;
+- ifa->ifa_scope = RT_SCOPE_UNIVERSE;
+- strcpy(ifa->ifa_label, dev->name);
+-
+- dn_dev_set_ifa(dev, ifa);
+-
+- /*
+- * Automagically set the default device to the first automatically
+- * configured ethernet card in the system.
+- */
+- if (maybe_default) {
+- dev_hold(dev);
+- if (dn_dev_set_default(dev, 0))
+- dev_put(dev);
+- }
+-}
+-
+-static void dn_dev_delete(struct net_device *dev)
+-{
+- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
+-
+- if (dn_db == NULL)
+- return;
+-
+- del_timer_sync(&dn_db->timer);
+- dn_dev_sysctl_unregister(&dn_db->parms);
+- dn_dev_check_default(dev);
+- neigh_ifdown(&dn_neigh_table, dev);
+-
+- if (dn_db->parms.down)
+- dn_db->parms.down(dev);
+-
+- dev->dn_ptr = NULL;
+-
+- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
+- neigh_ifdown(&dn_neigh_table, dev);
+-
+- if (dn_db->router)
+- neigh_release(dn_db->router);
+- if (dn_db->peer)
+- neigh_release(dn_db->peer);
+-
+- kfree(dn_db);
+-}
+-
+-void dn_dev_down(struct net_device *dev)
+-{
+- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
+- struct dn_ifaddr *ifa;
+-
+- if (dn_db == NULL)
+- return;
+-
+- while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
+- dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
+- dn_dev_free_ifa(ifa);
+- }
+-
+- dn_dev_delete(dev);
+-}
+-
+-void dn_dev_init_pkt(struct sk_buff *skb)
+-{
+-}
+-
+-void dn_dev_veri_pkt(struct sk_buff *skb)
+-{
+-}
+-
+-void dn_dev_hello(struct sk_buff *skb)
+-{
+-}
+-
+-void dn_dev_devices_off(void)
+-{
+- struct net_device *dev;
+-
+- rtnl_lock();
+- for_each_netdev(&init_net, dev)
+- dn_dev_down(dev);
+- rtnl_unlock();
+-
+-}
+-
+-void dn_dev_devices_on(void)
+-{
+- struct net_device *dev;
+-
+- rtnl_lock();
+- for_each_netdev(&init_net, dev) {
+- if (dev->flags & IFF_UP)
+- dn_dev_up(dev);
+- }
+- rtnl_unlock();
+-}
+-
+-int register_dnaddr_notifier(struct notifier_block *nb)
+-{
+- return blocking_notifier_chain_register(&dnaddr_chain, nb);
+-}
+-
+-int unregister_dnaddr_notifier(struct notifier_block *nb)
+-{
+- return blocking_notifier_chain_unregister(&dnaddr_chain, nb);
+-}
+-
+-#ifdef CONFIG_PROC_FS
+-static inline int is_dn_dev(struct net_device *dev)
+-{
+- return dev->dn_ptr != NULL;
+-}
+-
+-static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
+- __acquires(RCU)
+-{
+- int i;
+- struct net_device *dev;
+-
+- rcu_read_lock();
+-
+- if (*pos == 0)
+- return SEQ_START_TOKEN;
+-
+- i = 1;
+- for_each_netdev_rcu(&init_net, dev) {
+- if (!is_dn_dev(dev))
+- continue;
+-
+- if (i++ == *pos)
+- return dev;
+- }
+-
+- return NULL;
+-}
+-
+-static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+-{
+- struct net_device *dev;
+-
+- ++*pos;
+-
+- dev = v;
+- if (v == SEQ_START_TOKEN)
+- dev = net_device_entry(&init_net.dev_base_head);
+-
+- for_each_netdev_continue_rcu(&init_net, dev) {
+- if (!is_dn_dev(dev))
+- continue;
+-
+- return dev;
+- }
+-
+- return NULL;
+-}
+-
+-static void dn_dev_seq_stop(struct seq_file *seq, void *v)
+- __releases(RCU)
+-{
+- rcu_read_unlock();
+-}
+-
+-static char *dn_type2asc(char type)
+-{
+- switch (type) {
+- case DN_DEV_BCAST:
+- return "B";
+- case DN_DEV_UCAST:
+- return "U";
+- case DN_DEV_MPOINT:
+- return "M";
+- }
+-
+- return "?";
+-}
+-
+-static int dn_dev_seq_show(struct seq_file *seq, void *v)
+-{
+- if (v == SEQ_START_TOKEN)
+- seq_puts(seq, "Name Flags T1 Timer1 T3 Timer3 BlkSize Pri State DevType Router Peer\n");
+- else {
+- struct net_device *dev = v;
+- char peer_buf[DN_ASCBUF_LEN];
+- char router_buf[DN_ASCBUF_LEN];
+- struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
+-
+- seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu"
+- " %04hu %03d %02x %-10s %-7s %-7s\n",
+- dev->name,
+- dn_type2asc(dn_db->parms.mode),
+- 0, 0,
+- dn_db->t3, dn_db->parms.t3,
+- mtu2blksize(dev),
+- dn_db->parms.priority,
+- dn_db->parms.state, dn_db->parms.name,
+- dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "",
+- dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : "");
+- }
+- return 0;
+-}
+-
+-static const struct seq_operations dn_dev_seq_ops = {
+- .start = dn_dev_seq_start,
+- .next = dn_dev_seq_next,
+- .stop = dn_dev_seq_stop,
+- .show = dn_dev_seq_show,
+-};
+-#endif /* CONFIG_PROC_FS */
+-
+-static int addr[2];
+-module_param_array(addr, int, NULL, 0444);
+-MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
+-
+-void __init dn_dev_init(void)
+-{
+- if (addr[0] > 63 || addr[0] < 0) {
+- printk(KERN_ERR "DECnet: Area must be between 0 and 63");
+- return;
+- }
+-
+- if (addr[1] > 1023 || addr[1] < 0) {
+- printk(KERN_ERR "DECnet: Node must be between 0 and 1023");
+- return;
+- }
+-
+- decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]);
+-
+- dn_dev_devices_on();
+-
+- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR,
+- dn_nl_newaddr, NULL, 0);
+- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR,
+- dn_nl_deladdr, NULL, 0);
+- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
+- NULL, dn_nl_dump_ifaddr, 0);
+-
+- proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops);
+-
+-#ifdef CONFIG_SYSCTL
+- {
+- int i;
+- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
+- dn_dev_sysctl_register(NULL, &dn_dev_list[i]);
+- }
+-#endif /* CONFIG_SYSCTL */
+-}
+-
+-void __exit dn_dev_cleanup(void)
+-{
+-#ifdef CONFIG_SYSCTL
+- {
+- int i;
+- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
+- dn_dev_sysctl_unregister(&dn_dev_list[i]);
+- }
+-#endif /* CONFIG_SYSCTL */
+-
+- remove_proc_entry("decnet_dev", init_net.proc_net);
+-
+- dn_dev_devices_off();
+-}
+diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
+deleted file mode 100644
+index 269c029ad74fc..0000000000000
+--- a/net/decnet/dn_fib.c
++++ /dev/null
+@@ -1,798 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Routing Forwarding Information Base (Glue/Info List)
+- *
+- * Author: Steve Whitehouse <SteveW@ACM.org>
+- *
+- *
+- * Changes:
+- * Alexey Kuznetsov : SMP locking changes
+- * Steve Whitehouse : Rewrote it... Well to be more correct, I
+- * copied most of it from the ipv4 fib code.
+- * Steve Whitehouse : Updated it in style and fixed a few bugs
+- * which were fixed in the ipv4 code since
+- * this code was copied from it.
+- *
+- */
+-#include <linux/string.h>
+-#include <linux/net.h>
+-#include <linux/socket.h>
+-#include <linux/slab.h>
+-#include <linux/sockios.h>
+-#include <linux/init.h>
+-#include <linux/skbuff.h>
+-#include <linux/netlink.h>
+-#include <linux/rtnetlink.h>
+-#include <linux/proc_fs.h>
+-#include <linux/netdevice.h>
+-#include <linux/timer.h>
+-#include <linux/spinlock.h>
+-#include <linux/atomic.h>
+-#include <linux/uaccess.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-#include <net/fib_rules.h>
+-#include <net/dn.h>
+-#include <net/dn_route.h>
+-#include <net/dn_fib.h>
+-#include <net/dn_neigh.h>
+-#include <net/dn_dev.h>
+-#include <net/rtnh.h>
+-
+-#define RT_MIN_TABLE 1
+-
+-#define for_fib_info() { struct dn_fib_info *fi;\
+- for(fi = dn_fib_info_list; fi; fi = fi->fib_next)
+-#define endfor_fib_info() }
+-
+-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
+- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
+-
+-#define change_nexthops(fi) { int nhsel; struct dn_fib_nh *nh;\
+- for(nhsel = 0, nh = (struct dn_fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
+-
+-#define endfor_nexthops(fi) }
+-
+-static DEFINE_SPINLOCK(dn_fib_multipath_lock);
+-static struct dn_fib_info *dn_fib_info_list;
+-static DEFINE_SPINLOCK(dn_fib_info_lock);
+-
+-static struct
+-{
+- int error;
+- u8 scope;
+-} dn_fib_props[RTN_MAX+1] = {
+- [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
+- [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE },
+- [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST },
+- [RTN_BROADCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+- [RTN_ANYCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+- [RTN_MULTICAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+- [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE },
+- [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE },
+- [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE },
+- [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE },
+- [RTN_NAT] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
+- [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
+-};
+-
+-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
+-static int dn_fib_sync_up(struct net_device *dev);
+-
+-void dn_fib_free_info(struct dn_fib_info *fi)
+-{
+- if (fi->fib_dead == 0) {
+- printk(KERN_DEBUG "DECnet: BUG! Attempt to free alive dn_fib_info\n");
+- return;
+- }
+-
+- change_nexthops(fi) {
+- dev_put(nh->nh_dev);
+- nh->nh_dev = NULL;
+- } endfor_nexthops(fi);
+- kfree(fi);
+-}
+-
+-void dn_fib_release_info(struct dn_fib_info *fi)
+-{
+- spin_lock(&dn_fib_info_lock);
+- if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
+- if (fi->fib_next)
+- fi->fib_next->fib_prev = fi->fib_prev;
+- if (fi->fib_prev)
+- fi->fib_prev->fib_next = fi->fib_next;
+- if (fi == dn_fib_info_list)
+- dn_fib_info_list = fi->fib_next;
+- fi->fib_dead = 1;
+- dn_fib_info_put(fi);
+- }
+- spin_unlock(&dn_fib_info_lock);
+-}
+-
+-static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
+-{
+- const struct dn_fib_nh *onh = ofi->fib_nh;
+-
+- for_nexthops(fi) {
+- if (nh->nh_oif != onh->nh_oif ||
+- nh->nh_gw != onh->nh_gw ||
+- nh->nh_scope != onh->nh_scope ||
+- nh->nh_weight != onh->nh_weight ||
+- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
+- return -1;
+- onh++;
+- } endfor_nexthops(fi);
+- return 0;
+-}
+-
+-static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi)
+-{
+- for_fib_info() {
+- if (fi->fib_nhs != nfi->fib_nhs)
+- continue;
+- if (nfi->fib_protocol == fi->fib_protocol &&
+- nfi->fib_prefsrc == fi->fib_prefsrc &&
+- nfi->fib_priority == fi->fib_priority &&
+- memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
+- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
+- (nfi->fib_nhs == 0 || dn_fib_nh_comp(fi, nfi) == 0))
+- return fi;
+- } endfor_fib_info();
+- return NULL;
+-}
+-
+-static int dn_fib_count_nhs(const struct nlattr *attr)
+-{
+- struct rtnexthop *nhp = nla_data(attr);
+- int nhs = 0, nhlen = nla_len(attr);
+-
+- while (rtnh_ok(nhp, nhlen)) {
+- nhs++;
+- nhp = rtnh_next(nhp, &nhlen);
+- }
+-
+- /* leftover implies invalid nexthop configuration, discard it */
+- return nhlen > 0 ? 0 : nhs;
+-}
+-
+-static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
+- const struct rtmsg *r)
+-{
+- struct rtnexthop *nhp = nla_data(attr);
+- int nhlen = nla_len(attr);
+-
+- change_nexthops(fi) {
+- int attrlen;
+-
+- if (!rtnh_ok(nhp, nhlen))
+- return -EINVAL;
+-
+- nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
+- nh->nh_oif = nhp->rtnh_ifindex;
+- nh->nh_weight = nhp->rtnh_hops + 1;
+-
+- attrlen = rtnh_attrlen(nhp);
+- if (attrlen > 0) {
+- struct nlattr *gw_attr;
+-
+- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
+- nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
+- }
+-
+- nhp = rtnh_next(nhp, &nhlen);
+- } endfor_nexthops(fi);
+-
+- return 0;
+-}
+-
+-
+-static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct dn_fib_nh *nh)
+-{
+- int err;
+-
+- if (nh->nh_gw) {
+- struct flowidn fld;
+- struct dn_fib_res res;
+-
+- if (nh->nh_flags&RTNH_F_ONLINK) {
+- struct net_device *dev;
+-
+- if (r->rtm_scope >= RT_SCOPE_LINK)
+- return -EINVAL;
+- if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST)
+- return -EINVAL;
+- if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
+- return -ENODEV;
+- if (!(dev->flags&IFF_UP))
+- return -ENETDOWN;
+- nh->nh_dev = dev;
+- dev_hold(dev);
+- nh->nh_scope = RT_SCOPE_LINK;
+- return 0;
+- }
+-
+- memset(&fld, 0, sizeof(fld));
+- fld.daddr = nh->nh_gw;
+- fld.flowidn_oif = nh->nh_oif;
+- fld.flowidn_scope = r->rtm_scope + 1;
+-
+- if (fld.flowidn_scope < RT_SCOPE_LINK)
+- fld.flowidn_scope = RT_SCOPE_LINK;
+-
+- if ((err = dn_fib_lookup(&fld, &res)) != 0)
+- return err;
+-
+- err = -EINVAL;
+- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+- goto out;
+- nh->nh_scope = res.scope;
+- nh->nh_oif = DN_FIB_RES_OIF(res);
+- nh->nh_dev = DN_FIB_RES_DEV(res);
+- if (nh->nh_dev == NULL)
+- goto out;
+- dev_hold(nh->nh_dev);
+- err = -ENETDOWN;
+- if (!(nh->nh_dev->flags & IFF_UP))
+- goto out;
+- err = 0;
+-out:
+- dn_fib_res_put(&res);
+- return err;
+- } else {
+- struct net_device *dev;
+-
+- if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+- return -EINVAL;
+-
+- dev = __dev_get_by_index(&init_net, nh->nh_oif);
+- if (dev == NULL || dev->dn_ptr == NULL)
+- return -ENODEV;
+- if (!(dev->flags&IFF_UP))
+- return -ENETDOWN;
+- nh->nh_dev = dev;
+- dev_hold(nh->nh_dev);
+- nh->nh_scope = RT_SCOPE_HOST;
+- }
+-
+- return 0;
+-}
+-
+-
+-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
+- const struct nlmsghdr *nlh, int *errp)
+-{
+- int err;
+- struct dn_fib_info *fi = NULL;
+- struct dn_fib_info *ofi;
+- int nhs = 1;
+-
+- if (r->rtm_type > RTN_MAX)
+- goto err_inval;
+-
+- if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
+- goto err_inval;
+-
+- if (attrs[RTA_MULTIPATH] &&
+- (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
+- goto err_inval;
+-
+- fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
+- err = -ENOBUFS;
+- if (fi == NULL)
+- goto failure;
+-
+- fi->fib_protocol = r->rtm_protocol;
+- fi->fib_nhs = nhs;
+- fi->fib_flags = r->rtm_flags;
+-
+- if (attrs[RTA_PRIORITY])
+- fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
+-
+- if (attrs[RTA_METRICS]) {
+- struct nlattr *attr;
+- int rem;
+-
+- nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
+- int type = nla_type(attr);
+-
+- if (type) {
+- if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
+- nla_len(attr) < 4)
+- goto err_inval;
+-
+- fi->fib_metrics[type-1] = nla_get_u32(attr);
+- }
+- }
+- }
+-
+- if (attrs[RTA_PREFSRC])
+- fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
+-
+- if (attrs[RTA_MULTIPATH]) {
+- if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
+- goto failure;
+-
+- if (attrs[RTA_OIF] &&
+- fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
+- goto err_inval;
+-
+- if (attrs[RTA_GATEWAY] &&
+- fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
+- goto err_inval;
+- } else {
+- struct dn_fib_nh *nh = fi->fib_nh;
+-
+- if (attrs[RTA_OIF])
+- nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
+-
+- if (attrs[RTA_GATEWAY])
+- nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
+-
+- nh->nh_flags = r->rtm_flags;
+- nh->nh_weight = 1;
+- }
+-
+- if (r->rtm_type == RTN_NAT) {
+- if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
+- goto err_inval;
+-
+- fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
+- goto link_it;
+- }
+-
+- if (dn_fib_props[r->rtm_type].error) {
+- if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
+- goto err_inval;
+-
+- goto link_it;
+- }
+-
+- if (r->rtm_scope > RT_SCOPE_HOST)
+- goto err_inval;
+-
+- if (r->rtm_scope == RT_SCOPE_HOST) {
+- struct dn_fib_nh *nh = fi->fib_nh;
+-
+- /* Local address is added */
+- if (nhs != 1 || nh->nh_gw)
+- goto err_inval;
+- nh->nh_scope = RT_SCOPE_NOWHERE;
+- nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
+- err = -ENODEV;
+- if (nh->nh_dev == NULL)
+- goto failure;
+- } else {
+- change_nexthops(fi) {
+- if ((err = dn_fib_check_nh(r, fi, nh)) != 0)
+- goto failure;
+- } endfor_nexthops(fi)
+- }
+-
+- if (fi->fib_prefsrc) {
+- if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
+- fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
+- if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
+- goto err_inval;
+- }
+-
+-link_it:
+- if ((ofi = dn_fib_find_info(fi)) != NULL) {
+- fi->fib_dead = 1;
+- dn_fib_free_info(fi);
+- refcount_inc(&ofi->fib_treeref);
+- return ofi;
+- }
+-
+- refcount_set(&fi->fib_treeref, 1);
+- refcount_set(&fi->fib_clntref, 1);
+- spin_lock(&dn_fib_info_lock);
+- fi->fib_next = dn_fib_info_list;
+- fi->fib_prev = NULL;
+- if (dn_fib_info_list)
+- dn_fib_info_list->fib_prev = fi;
+- dn_fib_info_list = fi;
+- spin_unlock(&dn_fib_info_lock);
+- return fi;
+-
+-err_inval:
+- err = -EINVAL;
+-
+-failure:
+- *errp = err;
+- if (fi) {
+- fi->fib_dead = 1;
+- dn_fib_free_info(fi);
+- }
+-
+- return NULL;
+-}
+-
+-int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res)
+-{
+- int err = dn_fib_props[type].error;
+-
+- if (err == 0) {
+- if (fi->fib_flags & RTNH_F_DEAD)
+- return 1;
+-
+- res->fi = fi;
+-
+- switch (type) {
+- case RTN_NAT:
+- DN_FIB_RES_RESET(*res);
+- refcount_inc(&fi->fib_clntref);
+- return 0;
+- case RTN_UNICAST:
+- case RTN_LOCAL:
+- for_nexthops(fi) {
+- if (nh->nh_flags & RTNH_F_DEAD)
+- continue;
+- if (!fld->flowidn_oif ||
+- fld->flowidn_oif == nh->nh_oif)
+- break;
+- }
+- if (nhsel < fi->fib_nhs) {
+- res->nh_sel = nhsel;
+- refcount_inc(&fi->fib_clntref);
+- return 0;
+- }
+- endfor_nexthops(fi);
+- res->fi = NULL;
+- return 1;
+- default:
+- net_err_ratelimited("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n",
+- type);
+- res->fi = NULL;
+- return -EINVAL;
+- }
+- }
+- return err;
+-}
+-
+-void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
+-{
+- struct dn_fib_info *fi = res->fi;
+- int w;
+-
+- spin_lock_bh(&dn_fib_multipath_lock);
+- if (fi->fib_power <= 0) {
+- int power = 0;
+- change_nexthops(fi) {
+- if (!(nh->nh_flags&RTNH_F_DEAD)) {
+- power += nh->nh_weight;
+- nh->nh_power = nh->nh_weight;
+- }
+- } endfor_nexthops(fi);
+- fi->fib_power = power;
+- if (power < 0) {
+- spin_unlock_bh(&dn_fib_multipath_lock);
+- res->nh_sel = 0;
+- return;
+- }
+- }
+-
+- w = jiffies % fi->fib_power;
+-
+- change_nexthops(fi) {
+- if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
+- if ((w -= nh->nh_power) <= 0) {
+- nh->nh_power--;
+- fi->fib_power--;
+- res->nh_sel = nhsel;
+- spin_unlock_bh(&dn_fib_multipath_lock);
+- return;
+- }
+- }
+- } endfor_nexthops(fi);
+- res->nh_sel = 0;
+- spin_unlock_bh(&dn_fib_multipath_lock);
+-}
+-
+-static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
+-{
+- if (attrs[RTA_TABLE])
+- table = nla_get_u32(attrs[RTA_TABLE]);
+-
+- return table;
+-}
+-
+-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+- struct netlink_ext_ack *extack)
+-{
+- struct net *net = sock_net(skb->sk);
+- struct dn_fib_table *tb;
+- struct rtmsg *r = nlmsg_data(nlh);
+- struct nlattr *attrs[RTA_MAX+1];
+- int err;
+-
+- if (!netlink_capable(skb, CAP_NET_ADMIN))
+- return -EPERM;
+-
+- if (!net_eq(net, &init_net))
+- return -EINVAL;
+-
+- err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
+- rtm_dn_policy, extack);
+- if (err < 0)
+- return err;
+-
+- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
+- if (!tb)
+- return -ESRCH;
+-
+- return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
+-}
+-
+-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+- struct netlink_ext_ack *extack)
+-{
+- struct net *net = sock_net(skb->sk);
+- struct dn_fib_table *tb;
+- struct rtmsg *r = nlmsg_data(nlh);
+- struct nlattr *attrs[RTA_MAX+1];
+- int err;
+-
+- if (!netlink_capable(skb, CAP_NET_ADMIN))
+- return -EPERM;
+-
+- if (!net_eq(net, &init_net))
+- return -EINVAL;
+-
+- err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
+- rtm_dn_policy, extack);
+- if (err < 0)
+- return err;
+-
+- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
+- if (!tb)
+- return -ENOBUFS;
+-
+- return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
+-}
+-
+-static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
+-{
+- struct dn_fib_table *tb;
+- struct {
+- struct nlmsghdr nlh;
+- struct rtmsg rtm;
+- } req;
+- struct {
+- struct nlattr hdr;
+- __le16 dst;
+- } dst_attr = {
+- .dst = dst,
+- };
+- struct {
+- struct nlattr hdr;
+- __le16 prefsrc;
+- } prefsrc_attr = {
+- .prefsrc = ifa->ifa_local,
+- };
+- struct {
+- struct nlattr hdr;
+- u32 oif;
+- } oif_attr = {
+- .oif = ifa->ifa_dev->dev->ifindex,
+- };
+- struct nlattr *attrs[RTA_MAX+1] = {
+- [RTA_DST] = (struct nlattr *) &dst_attr,
+- [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
+- [RTA_OIF] = (struct nlattr *) &oif_attr,
+- };
+-
+- memset(&req.rtm, 0, sizeof(req.rtm));
+-
+- if (type == RTN_UNICAST)
+- tb = dn_fib_get_table(RT_MIN_TABLE, 1);
+- else
+- tb = dn_fib_get_table(RT_TABLE_LOCAL, 1);
+-
+- if (tb == NULL)
+- return;
+-
+- req.nlh.nlmsg_len = sizeof(req);
+- req.nlh.nlmsg_type = cmd;
+- req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
+- req.nlh.nlmsg_pid = 0;
+- req.nlh.nlmsg_seq = 0;
+-
+- req.rtm.rtm_dst_len = dst_len;
+- req.rtm.rtm_table = tb->n;
+- req.rtm.rtm_protocol = RTPROT_KERNEL;
+- req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
+- req.rtm.rtm_type = type;
+-
+- if (cmd == RTM_NEWROUTE)
+- tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
+- else
+- tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
+-}
+-
+-static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
+-{
+-
+- fib_magic(RTM_NEWROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
+-
+-#if 0
+- if (!(dev->flags&IFF_UP))
+- return;
+- /* In the future, we will want to add default routes here */
+-
+-#endif
+-}
+-
+-static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
+-{
+- int found_it = 0;
+- struct net_device *dev;
+- struct dn_dev *dn_db;
+- struct dn_ifaddr *ifa2;
+-
+- ASSERT_RTNL();
+-
+- /* Scan device list */
+- rcu_read_lock();
+- for_each_netdev_rcu(&init_net, dev) {
+- dn_db = rcu_dereference(dev->dn_ptr);
+- if (dn_db == NULL)
+- continue;
+- for (ifa2 = rcu_dereference(dn_db->ifa_list);
+- ifa2 != NULL;
+- ifa2 = rcu_dereference(ifa2->ifa_next)) {
+- if (ifa2->ifa_local == ifa->ifa_local) {
+- found_it = 1;
+- break;
+- }
+- }
+- }
+- rcu_read_unlock();
+-
+- if (found_it == 0) {
+- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
+-
+- if (dnet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
+- if (dn_fib_sync_down(ifa->ifa_local, NULL, 0))
+- dn_fib_flush();
+- }
+- }
+-}
+-
+-static void dn_fib_disable_addr(struct net_device *dev, int force)
+-{
+- if (dn_fib_sync_down(0, dev, force))
+- dn_fib_flush();
+- dn_rt_cache_flush(0);
+- neigh_ifdown(&dn_neigh_table, dev);
+-}
+-
+-static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
+-{
+- struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr;
+-
+- switch (event) {
+- case NETDEV_UP:
+- dn_fib_add_ifaddr(ifa);
+- dn_fib_sync_up(ifa->ifa_dev->dev);
+- dn_rt_cache_flush(-1);
+- break;
+- case NETDEV_DOWN:
+- dn_fib_del_ifaddr(ifa);
+- if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
+- dn_fib_disable_addr(ifa->ifa_dev->dev, 1);
+- } else {
+- dn_rt_cache_flush(-1);
+- }
+- break;
+- }
+- return NOTIFY_DONE;
+-}
+-
+-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
+-{
+- int ret = 0;
+- int scope = RT_SCOPE_NOWHERE;
+-
+- if (force)
+- scope = -1;
+-
+- for_fib_info() {
+- /*
+- * This makes no sense for DECnet.... we will almost
+- * certainly have more than one local address the same
+- * over all our interfaces. It needs thinking about
+- * some more.
+- */
+- if (local && fi->fib_prefsrc == local) {
+- fi->fib_flags |= RTNH_F_DEAD;
+- ret++;
+- } else if (dev && fi->fib_nhs) {
+- int dead = 0;
+-
+- change_nexthops(fi) {
+- if (nh->nh_flags&RTNH_F_DEAD)
+- dead++;
+- else if (nh->nh_dev == dev &&
+- nh->nh_scope != scope) {
+- spin_lock_bh(&dn_fib_multipath_lock);
+- nh->nh_flags |= RTNH_F_DEAD;
+- fi->fib_power -= nh->nh_power;
+- nh->nh_power = 0;
+- spin_unlock_bh(&dn_fib_multipath_lock);
+- dead++;
+- }
+- } endfor_nexthops(fi)
+- if (dead == fi->fib_nhs) {
+- fi->fib_flags |= RTNH_F_DEAD;
+- ret++;
+- }
+- }
+- } endfor_fib_info();
+- return ret;
+-}
+-
+-
+-static int dn_fib_sync_up(struct net_device *dev)
+-{
+- int ret = 0;
+-
+- if (!(dev->flags&IFF_UP))
+- return 0;
+-
+- for_fib_info() {
+- int alive = 0;
+-
+- change_nexthops(fi) {
+- if (!(nh->nh_flags&RTNH_F_DEAD)) {
+- alive++;
+- continue;
+- }
+- if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
+- continue;
+- if (nh->nh_dev != dev || dev->dn_ptr == NULL)
+- continue;
+- alive++;
+- spin_lock_bh(&dn_fib_multipath_lock);
+- nh->nh_power = 0;
+- nh->nh_flags &= ~RTNH_F_DEAD;
+- spin_unlock_bh(&dn_fib_multipath_lock);
+- } endfor_nexthops(fi);
+-
+- if (alive > 0) {
+- fi->fib_flags &= ~RTNH_F_DEAD;
+- ret++;
+- }
+- } endfor_fib_info();
+- return ret;
+-}
+-
+-static struct notifier_block dn_fib_dnaddr_notifier = {
+- .notifier_call = dn_fib_dnaddr_event,
+-};
+-
+-void __exit dn_fib_cleanup(void)
+-{
+- dn_fib_table_cleanup();
+- dn_fib_rules_cleanup();
+-
+- unregister_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+-}
+-
+-
+-void __init dn_fib_init(void)
+-{
+- dn_fib_table_init();
+- dn_fib_rules_init();
+-
+- register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+-
+- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE,
+- dn_fib_rtm_newroute, NULL, 0);
+- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
+- dn_fib_rtm_delroute, NULL, 0);
+-}
+diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
+deleted file mode 100644
+index 94b306f6d5511..0000000000000
+--- a/net/decnet/dn_neigh.c
++++ /dev/null
+@@ -1,605 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Neighbour Functions (Adjacency Database and
+- * On-Ethernet Cache)
+- *
+- * Author: Steve Whitehouse <SteveW@ACM.org>
+- *
+- *
+- * Changes:
+- * Steve Whitehouse : Fixed router listing routine
+- * Steve Whitehouse : Added error_report functions
+- * Steve Whitehouse : Added default router detection
+- * Steve Whitehouse : Hop counts in outgoing messages
+- * Steve Whitehouse : Fixed src/dst in outgoing messages so
+- * forwarding now stands a good chance of
+- * working.
+- * Steve Whitehouse : Fixed neighbour states (for now anyway).
+- * Steve Whitehouse : Made error_report functions dummies. This
+- * is not the right place to return skbs.
+- * Steve Whitehouse : Convert to seq_file
+- *
+- */
+-
+-#include <linux/net.h>
+-#include <linux/module.h>
+-#include <linux/socket.h>
+-#include <linux/if_arp.h>
+-#include <linux/slab.h>
+-#include <linux/if_ether.h>
+-#include <linux/init.h>
+-#include <linux/proc_fs.h>
+-#include <linux/string.h>
+-#include <linux/netfilter_decnet.h>
+-#include <linux/spinlock.h>
+-#include <linux/seq_file.h>
+-#include <linux/rcupdate.h>
+-#include <linux/jhash.h>
+-#include <linux/atomic.h>
+-#include <net/net_namespace.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-#include <net/dn.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_neigh.h>
+-#include <net/dn_route.h>
+-
+-static int dn_neigh_construct(struct neighbour *);
+-static void dn_neigh_error_report(struct neighbour *, struct sk_buff *);
+-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb);
+-
+-/*
+- * Operations for adding the link layer header.
+- */
+-static const struct neigh_ops dn_neigh_ops = {
+- .family = AF_DECnet,
+- .error_report = dn_neigh_error_report,
+- .output = dn_neigh_output,
+- .connected_output = dn_neigh_output,
+-};
+-
+-static u32 dn_neigh_hash(const void *pkey,
+- const struct net_device *dev,
+- __u32 *hash_rnd)
+-{
+- return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]);
+-}
+-
+-static bool dn_key_eq(const struct neighbour *neigh, const void *pkey)
+-{
+- return neigh_key_eq16(neigh, pkey);
+-}
+-
+-struct neigh_table dn_neigh_table = {
+- .family = PF_DECnet,
+- .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
+- .key_len = sizeof(__le16),
+- .protocol = cpu_to_be16(ETH_P_DNA_RT),
+- .hash = dn_neigh_hash,
+- .key_eq = dn_key_eq,
+- .constructor = dn_neigh_construct,
+- .id = "dn_neigh_cache",
+- .parms ={
+- .tbl = &dn_neigh_table,
+- .reachable_time = 30 * HZ,
+- .data = {
+- [NEIGH_VAR_MCAST_PROBES] = 0,
+- [NEIGH_VAR_UCAST_PROBES] = 0,
+- [NEIGH_VAR_APP_PROBES] = 0,
+- [NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
+- [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
+- [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
+- [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
+- [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
+- [NEIGH_VAR_PROXY_QLEN] = 0,
+- [NEIGH_VAR_ANYCAST_DELAY] = 0,
+- [NEIGH_VAR_PROXY_DELAY] = 0,
+- [NEIGH_VAR_LOCKTIME] = 1 * HZ,
+- },
+- },
+- .gc_interval = 30 * HZ,
+- .gc_thresh1 = 128,
+- .gc_thresh2 = 512,
+- .gc_thresh3 = 1024,
+-};
+-
+-static int dn_neigh_construct(struct neighbour *neigh)
+-{
+- struct net_device *dev = neigh->dev;
+- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
+- struct dn_dev *dn_db;
+- struct neigh_parms *parms;
+-
+- rcu_read_lock();
+- dn_db = rcu_dereference(dev->dn_ptr);
+- if (dn_db == NULL) {
+- rcu_read_unlock();
+- return -EINVAL;
+- }
+-
+- parms = dn_db->neigh_parms;
+- if (!parms) {
+- rcu_read_unlock();
+- return -EINVAL;
+- }
+-
+- __neigh_parms_put(neigh->parms);
+- neigh->parms = neigh_parms_clone(parms);
+- rcu_read_unlock();
+-
+- neigh->ops = &dn_neigh_ops;
+- neigh->nud_state = NUD_NOARP;
+- neigh->output = neigh->ops->connected_output;
+-
+- if ((dev->type == ARPHRD_IPGRE) || (dev->flags & IFF_POINTOPOINT))
+- memcpy(neigh->ha, dev->broadcast, dev->addr_len);
+- else if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK))
+- dn_dn2eth(neigh->ha, dn->addr);
+- else {
+- net_dbg_ratelimited("Trying to create neigh for hw %d\n",
+- dev->type);
+- return -EINVAL;
+- }
+-
+- /*
+- * Make an estimate of the remote block size by assuming that its
+- * two less then the device mtu, which it true for ethernet (and
+- * other things which support long format headers) since there is
+- * an extra length field (of 16 bits) which isn't part of the
+- * ethernet headers and which the DECnet specs won't admit is part
+- * of the DECnet routing headers either.
+- *
+- * If we over estimate here its no big deal, the NSP negotiations
+- * will prevent us from sending packets which are too large for the
+- * remote node to handle. In any case this figure is normally updated
+- * by a hello message in most cases.
+- */
+- dn->blksize = dev->mtu - 2;
+-
+- return 0;
+-}
+-
+-static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb)
+-{
+- printk(KERN_DEBUG "dn_neigh_error_report: called\n");
+- kfree_skb(skb);
+-}
+-
+-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
+-{
+- struct dst_entry *dst = skb_dst(skb);
+- struct dn_route *rt = (struct dn_route *)dst;
+- struct net_device *dev = neigh->dev;
+- char mac_addr[ETH_ALEN];
+- unsigned int seq;
+- int err;
+-
+- dn_dn2eth(mac_addr, rt->rt_local_src);
+- do {
+- seq = read_seqbegin(&neigh->ha_lock);
+- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+- neigh->ha, mac_addr, skb->len);
+- } while (read_seqretry(&neigh->ha_lock, seq));
+-
+- if (err >= 0)
+- err = dev_queue_xmit(skb);
+- else {
+- kfree_skb(skb);
+- err = -EINVAL;
+- }
+- return err;
+-}
+-
+-static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- struct dst_entry *dst = skb_dst(skb);
+- struct dn_route *rt = (struct dn_route *)dst;
+- struct neighbour *neigh = rt->n;
+-
+- return neigh->output(neigh, skb);
+-}
+-
+-/*
+- * For talking to broadcast devices: Ethernet & PPP
+- */
+-static int dn_long_output(struct neighbour *neigh, struct sock *sk,
+- struct sk_buff *skb)
+-{
+- struct net_device *dev = neigh->dev;
+- int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3;
+- unsigned char *data;
+- struct dn_long_packet *lp;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+-
+-
+- if (skb_headroom(skb) < headroom) {
+- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
+- if (skb2 == NULL) {
+- net_crit_ratelimited("dn_long_output: no memory\n");
+- kfree_skb(skb);
+- return -ENOBUFS;
+- }
+- consume_skb(skb);
+- skb = skb2;
+- net_info_ratelimited("dn_long_output: Increasing headroom\n");
+- }
+-
+- data = skb_push(skb, sizeof(struct dn_long_packet) + 3);
+- lp = (struct dn_long_packet *)(data+3);
+-
+- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
+- *(data + 2) = 1 | DN_RT_F_PF; /* Padding */
+-
+- lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS));
+- lp->d_area = lp->d_subarea = 0;
+- dn_dn2eth(lp->d_id, cb->dst);
+- lp->s_area = lp->s_subarea = 0;
+- dn_dn2eth(lp->s_id, cb->src);
+- lp->nl2 = 0;
+- lp->visit_ct = cb->hops & 0x3f;
+- lp->s_class = 0;
+- lp->pt = 0;
+-
+- skb_reset_network_header(skb);
+-
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+- &init_net, sk, skb, NULL, neigh->dev,
+- dn_neigh_output_packet);
+-}
+-
+-/*
+- * For talking to pointopoint and multidrop devices: DDCMP and X.25
+- */
+-static int dn_short_output(struct neighbour *neigh, struct sock *sk,
+- struct sk_buff *skb)
+-{
+- struct net_device *dev = neigh->dev;
+- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
+- struct dn_short_packet *sp;
+- unsigned char *data;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+-
+-
+- if (skb_headroom(skb) < headroom) {
+- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
+- if (skb2 == NULL) {
+- net_crit_ratelimited("dn_short_output: no memory\n");
+- kfree_skb(skb);
+- return -ENOBUFS;
+- }
+- consume_skb(skb);
+- skb = skb2;
+- net_info_ratelimited("dn_short_output: Increasing headroom\n");
+- }
+-
+- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
+- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
+- sp = (struct dn_short_packet *)(data+2);
+-
+- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
+- sp->dstnode = cb->dst;
+- sp->srcnode = cb->src;
+- sp->forward = cb->hops & 0x3f;
+-
+- skb_reset_network_header(skb);
+-
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+- &init_net, sk, skb, NULL, neigh->dev,
+- dn_neigh_output_packet);
+-}
+-
+-/*
+- * For talking to DECnet phase III nodes
+- * Phase 3 output is the same as short output, execpt that
+- * it clears the area bits before transmission.
+- */
+-static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
+- struct sk_buff *skb)
+-{
+- struct net_device *dev = neigh->dev;
+- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
+- struct dn_short_packet *sp;
+- unsigned char *data;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+-
+- if (skb_headroom(skb) < headroom) {
+- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
+- if (skb2 == NULL) {
+- net_crit_ratelimited("dn_phase3_output: no memory\n");
+- kfree_skb(skb);
+- return -ENOBUFS;
+- }
+- consume_skb(skb);
+- skb = skb2;
+- net_info_ratelimited("dn_phase3_output: Increasing headroom\n");
+- }
+-
+- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
+- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
+- sp = (struct dn_short_packet *)(data + 2);
+-
+- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
+- sp->dstnode = cb->dst & cpu_to_le16(0x03ff);
+- sp->srcnode = cb->src & cpu_to_le16(0x03ff);
+- sp->forward = cb->hops & 0x3f;
+-
+- skb_reset_network_header(skb);
+-
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+- &init_net, sk, skb, NULL, neigh->dev,
+- dn_neigh_output_packet);
+-}
+-
+-int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- struct dst_entry *dst = skb_dst(skb);
+- struct dn_route *rt = (struct dn_route *) dst;
+- struct neighbour *neigh = rt->n;
+- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
+- struct dn_dev *dn_db;
+- bool use_long;
+-
+- rcu_read_lock();
+- dn_db = rcu_dereference(neigh->dev->dn_ptr);
+- if (dn_db == NULL) {
+- rcu_read_unlock();
+- return -EINVAL;
+- }
+- use_long = dn_db->use_long;
+- rcu_read_unlock();
+-
+- if (dn->flags & DN_NDFLAG_P3)
+- return dn_phase3_output(neigh, sk, skb);
+- if (use_long)
+- return dn_long_output(neigh, sk, skb);
+- else
+- return dn_short_output(neigh, sk, skb);
+-}
+-
+-/*
+- * Unfortunately, the neighbour code uses the device in its hash
+- * function, so we don't get any advantage from it. This function
+- * basically does a neigh_lookup(), but without comparing the device
+- * field. This is required for the On-Ethernet cache
+- */
+-
+-/*
+- * Pointopoint link receives a hello message
+- */
+-void dn_neigh_pointopoint_hello(struct sk_buff *skb)
+-{
+- kfree_skb(skb);
+-}
+-
+-/*
+- * Ethernet router hello message received
+- */
+-int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
+-
+- struct neighbour *neigh;
+- struct dn_neigh *dn;
+- struct dn_dev *dn_db;
+- __le16 src;
+-
+- src = dn_eth2dn(msg->id);
+-
+- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
+-
+- dn = container_of(neigh, struct dn_neigh, n);
+-
+- if (neigh) {
+- write_lock(&neigh->lock);
+-
+- neigh->used = jiffies;
+- dn_db = rcu_dereference(neigh->dev->dn_ptr);
+-
+- if (!(neigh->nud_state & NUD_PERMANENT)) {
+- neigh->updated = jiffies;
+-
+- if (neigh->dev->type == ARPHRD_ETHER)
+- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
+-
+- dn->blksize = le16_to_cpu(msg->blksize);
+- dn->priority = msg->priority;
+-
+- dn->flags &= ~DN_NDFLAG_P3;
+-
+- switch (msg->iinfo & DN_RT_INFO_TYPE) {
+- case DN_RT_INFO_L1RT:
+- dn->flags &=~DN_NDFLAG_R2;
+- dn->flags |= DN_NDFLAG_R1;
+- break;
+- case DN_RT_INFO_L2RT:
+- dn->flags |= DN_NDFLAG_R2;
+- }
+- }
+-
+- /* Only use routers in our area */
+- if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) {
+- if (!dn_db->router) {
+- dn_db->router = neigh_clone(neigh);
+- } else {
+- if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+- neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
+- }
+- }
+- write_unlock(&neigh->lock);
+- neigh_release(neigh);
+- }
+-
+- kfree_skb(skb);
+- return 0;
+-}
+-
+-/*
+- * Endnode hello message received
+- */
+-int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
+- struct neighbour *neigh;
+- struct dn_neigh *dn;
+- __le16 src;
+-
+- src = dn_eth2dn(msg->id);
+-
+- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
+-
+- dn = container_of(neigh, struct dn_neigh, n);
+-
+- if (neigh) {
+- write_lock(&neigh->lock);
+-
+- neigh->used = jiffies;
+-
+- if (!(neigh->nud_state & NUD_PERMANENT)) {
+- neigh->updated = jiffies;
+-
+- if (neigh->dev->type == ARPHRD_ETHER)
+- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
+- dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2);
+- dn->blksize = le16_to_cpu(msg->blksize);
+- dn->priority = 0;
+- }
+-
+- write_unlock(&neigh->lock);
+- neigh_release(neigh);
+- }
+-
+- kfree_skb(skb);
+- return 0;
+-}
+-
+-static char *dn_find_slot(char *base, int max, int priority)
+-{
+- int i;
+- unsigned char *min = NULL;
+-
+- base += 6; /* skip first id */
+-
+- for(i = 0; i < max; i++) {
+- if (!min || (*base < *min))
+- min = base;
+- base += 7; /* find next priority */
+- }
+-
+- if (!min)
+- return NULL;
+-
+- return (*min < priority) ? (min - 6) : NULL;
+-}
+-
+-struct elist_cb_state {
+- struct net_device *dev;
+- unsigned char *ptr;
+- unsigned char *rs;
+- int t, n;
+-};
+-
+-static void neigh_elist_cb(struct neighbour *neigh, void *_info)
+-{
+- struct elist_cb_state *s = _info;
+- struct dn_neigh *dn;
+-
+- if (neigh->dev != s->dev)
+- return;
+-
+- dn = container_of(neigh, struct dn_neigh, n);
+- if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
+- return;
+-
+- if (s->t == s->n)
+- s->rs = dn_find_slot(s->ptr, s->n, dn->priority);
+- else
+- s->t++;
+- if (s->rs == NULL)
+- return;
+-
+- dn_dn2eth(s->rs, dn->addr);
+- s->rs += 6;
+- *(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0;
+- *(s->rs) |= dn->priority;
+- s->rs++;
+-}
+-
+-int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n)
+-{
+- struct elist_cb_state state;
+-
+- state.dev = dev;
+- state.t = 0;
+- state.n = n;
+- state.ptr = ptr;
+- state.rs = ptr;
+-
+- neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state);
+-
+- return state.t;
+-}
+-
+-
+-#ifdef CONFIG_PROC_FS
+-
+-static inline void dn_neigh_format_entry(struct seq_file *seq,
+- struct neighbour *n)
+-{
+- struct dn_neigh *dn = container_of(n, struct dn_neigh, n);
+- char buf[DN_ASCBUF_LEN];
+-
+- read_lock(&n->lock);
+- seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n",
+- dn_addr2asc(le16_to_cpu(dn->addr), buf),
+- (dn->flags&DN_NDFLAG_R1) ? "1" : "-",
+- (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
+- (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
+- dn->n.nud_state,
+- refcount_read(&dn->n.refcnt),
+- dn->blksize,
+- (dn->n.dev) ? dn->n.dev->name : "?");
+- read_unlock(&n->lock);
+-}
+-
+-static int dn_neigh_seq_show(struct seq_file *seq, void *v)
+-{
+- if (v == SEQ_START_TOKEN) {
+- seq_puts(seq, "Addr Flags State Use Blksize Dev\n");
+- } else {
+- dn_neigh_format_entry(seq, v);
+- }
+-
+- return 0;
+-}
+-
+-static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos)
+-{
+- return neigh_seq_start(seq, pos, &dn_neigh_table,
+- NEIGH_SEQ_NEIGH_ONLY);
+-}
+-
+-static const struct seq_operations dn_neigh_seq_ops = {
+- .start = dn_neigh_seq_start,
+- .next = neigh_seq_next,
+- .stop = neigh_seq_stop,
+- .show = dn_neigh_seq_show,
+-};
+-#endif
+-
+-void __init dn_neigh_init(void)
+-{
+- neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
+- proc_create_net("decnet_neigh", 0444, init_net.proc_net,
+- &dn_neigh_seq_ops, sizeof(struct neigh_seq_state));
+-}
+-
+-void __exit dn_neigh_cleanup(void)
+-{
+- remove_proc_entry("decnet_neigh", init_net.proc_net);
+- neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table);
+-}
+diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
+deleted file mode 100644
+index 7ab788f41a3fb..0000000000000
+--- a/net/decnet/dn_nsp_in.c
++++ /dev/null
+@@ -1,906 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Network Services Protocol (Input)
+- *
+- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
+- *
+- * Changes:
+- *
+- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
+- * original dn_nsp.c.
+- * Steve Whitehouse: Updated to work with my new routing architecture.
+- * Steve Whitehouse: Add changes from Eduardo Serrat's patches.
+- * Steve Whitehouse: Put all ack handling code in a common routine.
+- * Steve Whitehouse: Put other common bits into dn_nsp_rx()
+- * Steve Whitehouse: More checks on skb->len to catch bogus packets
+- * Fixed various race conditions and possible nasties.
+- * Steve Whitehouse: Now handles returned conninit frames.
+- * David S. Miller: New socket locking
+- * Steve Whitehouse: Fixed lockup when socket filtering was enabled.
+- * Paul Koning: Fix to push CC sockets into RUN when acks are
+- * received.
+- * Steve Whitehouse:
+- * Patrick Caulfield: Checking conninits for correctness & sending of error
+- * responses.
+- * Steve Whitehouse: Added backlog congestion level return codes.
+- * Patrick Caulfield:
+- * Steve Whitehouse: Added flow control support (outbound)
+- * Steve Whitehouse: Prepare for nonlinear skbs
+- */
+-
+-/******************************************************************************
+- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
+-
+-*******************************************************************************/
+-
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/kernel.h>
+-#include <linux/timer.h>
+-#include <linux/string.h>
+-#include <linux/sockios.h>
+-#include <linux/net.h>
+-#include <linux/netdevice.h>
+-#include <linux/inet.h>
+-#include <linux/route.h>
+-#include <linux/slab.h>
+-#include <net/sock.h>
+-#include <net/tcp_states.h>
+-#include <linux/fcntl.h>
+-#include <linux/mm.h>
+-#include <linux/termios.h>
+-#include <linux/interrupt.h>
+-#include <linux/proc_fs.h>
+-#include <linux/stat.h>
+-#include <linux/init.h>
+-#include <linux/poll.h>
+-#include <linux/netfilter_decnet.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/dn.h>
+-#include <net/dn_nsp.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_route.h>
+-
+-extern int decnet_log_martians;
+-
+-static void dn_log_martian(struct sk_buff *skb, const char *msg)
+-{
+- if (decnet_log_martians) {
+- char *devname = skb->dev ? skb->dev->name : "???";
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- net_info_ratelimited("DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n",
+- msg, devname,
+- le16_to_cpu(cb->src),
+- le16_to_cpu(cb->dst),
+- le16_to_cpu(cb->src_port),
+- le16_to_cpu(cb->dst_port));
+- }
+-}
+-
+-/*
+- * For this function we've flipped the cross-subchannel bit
+- * if the message is an otherdata or linkservice message. Thus
+- * we can use it to work out what to update.
+- */
+-static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- unsigned short type = ((ack >> 12) & 0x0003);
+- int wakeup = 0;
+-
+- switch (type) {
+- case 0: /* ACK - Data */
+- if (dn_after(ack, scp->ackrcv_dat)) {
+- scp->ackrcv_dat = ack & 0x0fff;
+- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
+- &scp->data_xmit_queue,
+- ack);
+- }
+- break;
+- case 1: /* NAK - Data */
+- break;
+- case 2: /* ACK - OtherData */
+- if (dn_after(ack, scp->ackrcv_oth)) {
+- scp->ackrcv_oth = ack & 0x0fff;
+- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
+- &scp->other_xmit_queue,
+- ack);
+- }
+- break;
+- case 3: /* NAK - OtherData */
+- break;
+- }
+-
+- if (wakeup && !sock_flag(sk, SOCK_DEAD))
+- sk->sk_state_change(sk);
+-}
+-
+-/*
+- * This function is a universal ack processor.
+- */
+-static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth)
+-{
+- __le16 *ptr = (__le16 *)skb->data;
+- int len = 0;
+- unsigned short ack;
+-
+- if (skb->len < 2)
+- return len;
+-
+- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
+- skb_pull(skb, 2);
+- ptr++;
+- len += 2;
+- if ((ack & 0x4000) == 0) {
+- if (oth)
+- ack ^= 0x2000;
+- dn_ack(sk, skb, ack);
+- }
+- }
+-
+- if (skb->len < 2)
+- return len;
+-
+- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
+- skb_pull(skb, 2);
+- len += 2;
+- if ((ack & 0x4000) == 0) {
+- if (oth)
+- ack ^= 0x2000;
+- dn_ack(sk, skb, ack);
+- }
+- }
+-
+- return len;
+-}
+-
+-
+-/**
+- * dn_check_idf - Check an image data field format is correct.
+- * @pptr: Pointer to pointer to image data
+- * @len: Pointer to length of image data
+- * @max: The maximum allowed length of the data in the image data field
+- * @follow_on: Check that this many bytes exist beyond the end of the image data
+- *
+- * Returns: 0 if ok, -1 on error
+- */
+-static inline int dn_check_idf(unsigned char **pptr, int *len, unsigned char max, unsigned char follow_on)
+-{
+- unsigned char *ptr = *pptr;
+- unsigned char flen = *ptr++;
+-
+- (*len)--;
+- if (flen > max)
+- return -1;
+- if ((flen + follow_on) > *len)
+- return -1;
+-
+- *len -= flen;
+- *pptr = ptr + flen;
+- return 0;
+-}
+-
+-/*
+- * Table of reason codes to pass back to node which sent us a badly
+- * formed message, plus text messages for the log. A zero entry in
+- * the reason field means "don't reply" otherwise a disc init is sent with
+- * the specified reason code.
+- */
+-static struct {
+- unsigned short reason;
+- const char *text;
+-} ci_err_table[] = {
+- { 0, "CI: Truncated message" },
+- { NSP_REASON_ID, "CI: Destination username error" },
+- { NSP_REASON_ID, "CI: Destination username type" },
+- { NSP_REASON_US, "CI: Source username error" },
+- { 0, "CI: Truncated at menuver" },
+- { 0, "CI: Truncated before access or user data" },
+- { NSP_REASON_IO, "CI: Access data format error" },
+- { NSP_REASON_IO, "CI: User data format error" }
+-};
+-
+-/*
+- * This function uses a slightly different lookup method
+- * to find its sockets, since it searches on object name/number
+- * rather than port numbers. Various tests are done to ensure that
+- * the incoming data is in the correct format before it is queued to
+- * a socket.
+- */
+-static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct nsp_conn_init_msg *msg = (struct nsp_conn_init_msg *)skb->data;
+- struct sockaddr_dn dstaddr;
+- struct sockaddr_dn srcaddr;
+- unsigned char type = 0;
+- int dstlen;
+- int srclen;
+- unsigned char *ptr;
+- int len;
+- int err = 0;
+- unsigned char menuver;
+-
+- memset(&dstaddr, 0, sizeof(struct sockaddr_dn));
+- memset(&srcaddr, 0, sizeof(struct sockaddr_dn));
+-
+- /*
+- * 1. Decode & remove message header
+- */
+- cb->src_port = msg->srcaddr;
+- cb->dst_port = msg->dstaddr;
+- cb->services = msg->services;
+- cb->info = msg->info;
+- cb->segsize = le16_to_cpu(msg->segsize);
+-
+- if (!pskb_may_pull(skb, sizeof(*msg)))
+- goto err_out;
+-
+- skb_pull(skb, sizeof(*msg));
+-
+- len = skb->len;
+- ptr = skb->data;
+-
+- /*
+- * 2. Check destination end username format
+- */
+- dstlen = dn_username2sockaddr(ptr, len, &dstaddr, &type);
+- err++;
+- if (dstlen < 0)
+- goto err_out;
+-
+- err++;
+- if (type > 1)
+- goto err_out;
+-
+- len -= dstlen;
+- ptr += dstlen;
+-
+- /*
+- * 3. Check source end username format
+- */
+- srclen = dn_username2sockaddr(ptr, len, &srcaddr, &type);
+- err++;
+- if (srclen < 0)
+- goto err_out;
+-
+- len -= srclen;
+- ptr += srclen;
+- err++;
+- if (len < 1)
+- goto err_out;
+-
+- menuver = *ptr;
+- ptr++;
+- len--;
+-
+- /*
+- * 4. Check that optional data actually exists if menuver says it does
+- */
+- err++;
+- if ((menuver & (DN_MENUVER_ACC | DN_MENUVER_USR)) && (len < 1))
+- goto err_out;
+-
+- /*
+- * 5. Check optional access data format
+- */
+- err++;
+- if (menuver & DN_MENUVER_ACC) {
+- if (dn_check_idf(&ptr, &len, 39, 1))
+- goto err_out;
+- if (dn_check_idf(&ptr, &len, 39, 1))
+- goto err_out;
+- if (dn_check_idf(&ptr, &len, 39, (menuver & DN_MENUVER_USR) ? 1 : 0))
+- goto err_out;
+- }
+-
+- /*
+- * 6. Check optional user data format
+- */
+- err++;
+- if (menuver & DN_MENUVER_USR) {
+- if (dn_check_idf(&ptr, &len, 16, 0))
+- goto err_out;
+- }
+-
+- /*
+- * 7. Look up socket based on destination end username
+- */
+- return dn_sklist_find_listener(&dstaddr);
+-err_out:
+- dn_log_martian(skb, ci_err_table[err].text);
+- *reason = ci_err_table[err].reason;
+- return NULL;
+-}
+-
+-
+-static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb)
+-{
+- if (sk_acceptq_is_full(sk)) {
+- kfree_skb(skb);
+- return;
+- }
+-
+- sk_acceptq_added(sk);
+- skb_queue_tail(&sk->sk_receive_queue, skb);
+- sk->sk_state_change(sk);
+-}
+-
+-static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct dn_scp *scp = DN_SK(sk);
+- unsigned char *ptr;
+-
+- if (skb->len < 4)
+- goto out;
+-
+- ptr = skb->data;
+- cb->services = *ptr++;
+- cb->info = *ptr++;
+- cb->segsize = le16_to_cpu(*(__le16 *)ptr);
+-
+- if ((scp->state == DN_CI) || (scp->state == DN_CD)) {
+- scp->persist = 0;
+- scp->addrrem = cb->src_port;
+- sk->sk_state = TCP_ESTABLISHED;
+- scp->state = DN_RUN;
+- scp->services_rem = cb->services;
+- scp->info_rem = cb->info;
+- scp->segsize_rem = cb->segsize;
+-
+- if ((scp->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
+- scp->max_window = decnet_no_fc_max_cwnd;
+-
+- if (skb->len > 0) {
+- u16 dlen = *skb->data;
+- if ((dlen <= 16) && (dlen <= skb->len)) {
+- scp->conndata_in.opt_optl = cpu_to_le16(dlen);
+- skb_copy_from_linear_data_offset(skb, 1,
+- scp->conndata_in.opt_data, dlen);
+- }
+- }
+- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
+- if (!sock_flag(sk, SOCK_DEAD))
+- sk->sk_state_change(sk);
+- }
+-
+-out:
+- kfree_skb(skb);
+-}
+-
+-static void dn_nsp_conn_ack(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- if (scp->state == DN_CI) {
+- scp->state = DN_CD;
+- scp->persist = 0;
+- }
+-
+- kfree_skb(skb);
+-}
+-
+-static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- unsigned short reason;
+-
+- if (skb->len < 2)
+- goto out;
+-
+- reason = le16_to_cpu(*(__le16 *)skb->data);
+- skb_pull(skb, 2);
+-
+- scp->discdata_in.opt_status = cpu_to_le16(reason);
+- scp->discdata_in.opt_optl = 0;
+- memset(scp->discdata_in.opt_data, 0, 16);
+-
+- if (skb->len > 0) {
+- u16 dlen = *skb->data;
+- if ((dlen <= 16) && (dlen <= skb->len)) {
+- scp->discdata_in.opt_optl = cpu_to_le16(dlen);
+- skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
+- }
+- }
+-
+- scp->addrrem = cb->src_port;
+- sk->sk_state = TCP_CLOSE;
+-
+- switch (scp->state) {
+- case DN_CI:
+- case DN_CD:
+- scp->state = DN_RJ;
+- sk->sk_err = ECONNREFUSED;
+- break;
+- case DN_RUN:
+- sk->sk_shutdown |= SHUTDOWN_MASK;
+- scp->state = DN_DN;
+- break;
+- case DN_DI:
+- scp->state = DN_DIC;
+- break;
+- }
+-
+- if (!sock_flag(sk, SOCK_DEAD)) {
+- if (sk->sk_socket->state != SS_UNCONNECTED)
+- sk->sk_socket->state = SS_DISCONNECTING;
+- sk->sk_state_change(sk);
+- }
+-
+- /*
+- * It appears that its possible for remote machines to send disc
+- * init messages with no port identifier if we are in the CI and
+- * possibly also the CD state. Obviously we shouldn't reply with
+- * a message if we don't know what the end point is.
+- */
+- if (scp->addrrem) {
+- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC);
+- }
+- scp->persist_fxn = dn_destroy_timer;
+- scp->persist = dn_nsp_persist(sk);
+-
+-out:
+- kfree_skb(skb);
+-}
+-
+-/*
+- * disc_conf messages are also called no_resources or no_link
+- * messages depending upon the "reason" field.
+- */
+-static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- unsigned short reason;
+-
+- if (skb->len != 2)
+- goto out;
+-
+- reason = le16_to_cpu(*(__le16 *)skb->data);
+-
+- sk->sk_state = TCP_CLOSE;
+-
+- switch (scp->state) {
+- case DN_CI:
+- scp->state = DN_NR;
+- break;
+- case DN_DR:
+- if (reason == NSP_REASON_DC)
+- scp->state = DN_DRC;
+- if (reason == NSP_REASON_NL)
+- scp->state = DN_CN;
+- break;
+- case DN_DI:
+- scp->state = DN_DIC;
+- break;
+- case DN_RUN:
+- sk->sk_shutdown |= SHUTDOWN_MASK;
+- fallthrough;
+- case DN_CC:
+- scp->state = DN_CN;
+- }
+-
+- if (!sock_flag(sk, SOCK_DEAD)) {
+- if (sk->sk_socket->state != SS_UNCONNECTED)
+- sk->sk_socket->state = SS_DISCONNECTING;
+- sk->sk_state_change(sk);
+- }
+-
+- scp->persist_fxn = dn_destroy_timer;
+- scp->persist = dn_nsp_persist(sk);
+-
+-out:
+- kfree_skb(skb);
+-}
+-
+-static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- unsigned short segnum;
+- unsigned char lsflags;
+- signed char fcval;
+- int wake_up = 0;
+- char *ptr = skb->data;
+- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
+-
+- if (skb->len != 4)
+- goto out;
+-
+- segnum = le16_to_cpu(*(__le16 *)ptr);
+- ptr += 2;
+- lsflags = *(unsigned char *)ptr++;
+- fcval = *ptr;
+-
+- /*
+- * Here we ignore erroneous packets which should really
+- * should cause a connection abort. It is not critical
+- * for now though.
+- */
+- if (lsflags & 0xf8)
+- goto out;
+-
+- if (seq_next(scp->numoth_rcv, segnum)) {
+- seq_add(&scp->numoth_rcv, 1);
+- switch(lsflags & 0x04) { /* FCVAL INT */
+- case 0x00: /* Normal Request */
+- switch(lsflags & 0x03) { /* FCVAL MOD */
+- case 0x00: /* Request count */
+- if (fcval < 0) {
+- unsigned char p_fcval = -fcval;
+- if ((scp->flowrem_dat > p_fcval) &&
+- (fctype == NSP_FC_SCMC)) {
+- scp->flowrem_dat -= p_fcval;
+- }
+- } else if (fcval > 0) {
+- scp->flowrem_dat += fcval;
+- wake_up = 1;
+- }
+- break;
+- case 0x01: /* Stop outgoing data */
+- scp->flowrem_sw = DN_DONTSEND;
+- break;
+- case 0x02: /* Ok to start again */
+- scp->flowrem_sw = DN_SEND;
+- dn_nsp_output(sk);
+- wake_up = 1;
+- }
+- break;
+- case 0x04: /* Interrupt Request */
+- if (fcval > 0) {
+- scp->flowrem_oth += fcval;
+- wake_up = 1;
+- }
+- break;
+- }
+- if (wake_up && !sock_flag(sk, SOCK_DEAD))
+- sk->sk_state_change(sk);
+- }
+-
+- dn_nsp_send_oth_ack(sk);
+-
+-out:
+- kfree_skb(skb);
+-}
+-
+-/*
+- * Copy of sock_queue_rcv_skb (from sock.h) without
+- * bh_lock_sock() (its already held when this is called) which
+- * also allows data and other data to be queued to a socket.
+- */
+-static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue)
+-{
+- int err;
+-
+- /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
+- number of warnings when compiling with -W --ANK
+- */
+- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+- (unsigned int)sk->sk_rcvbuf) {
+- err = -ENOMEM;
+- goto out;
+- }
+-
+- err = sk_filter(sk, skb);
+- if (err)
+- goto out;
+-
+- skb_set_owner_r(skb, sk);
+- skb_queue_tail(queue, skb);
+-
+- if (!sock_flag(sk, SOCK_DEAD))
+- sk->sk_data_ready(sk);
+-out:
+- return err;
+-}
+-
+-static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- unsigned short segnum;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- int queued = 0;
+-
+- if (skb->len < 2)
+- goto out;
+-
+- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
+- skb_pull(skb, 2);
+-
+- if (seq_next(scp->numoth_rcv, segnum)) {
+-
+- if (dn_queue_skb(sk, skb, SIGURG, &scp->other_receive_queue) == 0) {
+- seq_add(&scp->numoth_rcv, 1);
+- scp->other_report = 0;
+- queued = 1;
+- }
+- }
+-
+- dn_nsp_send_oth_ack(sk);
+-out:
+- if (!queued)
+- kfree_skb(skb);
+-}
+-
+-static void dn_nsp_data(struct sock *sk, struct sk_buff *skb)
+-{
+- int queued = 0;
+- unsigned short segnum;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct dn_scp *scp = DN_SK(sk);
+-
+- if (skb->len < 2)
+- goto out;
+-
+- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
+- skb_pull(skb, 2);
+-
+- if (seq_next(scp->numdat_rcv, segnum)) {
+- if (dn_queue_skb(sk, skb, SIGIO, &sk->sk_receive_queue) == 0) {
+- seq_add(&scp->numdat_rcv, 1);
+- queued = 1;
+- }
+-
+- if ((scp->flowloc_sw == DN_SEND) && dn_congested(sk)) {
+- scp->flowloc_sw = DN_DONTSEND;
+- dn_nsp_send_link(sk, DN_DONTSEND, 0);
+- }
+- }
+-
+- dn_nsp_send_data_ack(sk);
+-out:
+- if (!queued)
+- kfree_skb(skb);
+-}
+-
+-/*
+- * If one of our conninit messages is returned, this function
+- * deals with it. It puts the socket into the NO_COMMUNICATION
+- * state.
+- */
+-static void dn_returned_conn_init(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- if (scp->state == DN_CI) {
+- scp->state = DN_NC;
+- sk->sk_state = TCP_CLOSE;
+- if (!sock_flag(sk, SOCK_DEAD))
+- sk->sk_state_change(sk);
+- }
+-
+- kfree_skb(skb);
+-}
+-
+-static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- int ret = NET_RX_DROP;
+-
+- /* Must not reply to returned packets */
+- if (cb->rt_flags & DN_RT_F_RTS)
+- goto out;
+-
+- if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) {
+- switch (cb->nsp_flags & 0x70) {
+- case 0x10:
+- case 0x60: /* (Retransmitted) Connect Init */
+- dn_nsp_return_disc(skb, NSP_DISCINIT, reason);
+- ret = NET_RX_SUCCESS;
+- break;
+- case 0x20: /* Connect Confirm */
+- dn_nsp_return_disc(skb, NSP_DISCCONF, reason);
+- ret = NET_RX_SUCCESS;
+- break;
+- }
+- }
+-
+-out:
+- kfree_skb(skb);
+- return ret;
+-}
+-
+-static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
+- struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct sock *sk = NULL;
+- unsigned char *ptr = (unsigned char *)skb->data;
+- unsigned short reason = NSP_REASON_NL;
+-
+- if (!pskb_may_pull(skb, 2))
+- goto free_out;
+-
+- skb_reset_transport_header(skb);
+- cb->nsp_flags = *ptr++;
+-
+- if (decnet_debug_level & 2)
+- printk(KERN_DEBUG "dn_nsp_rx: Message type 0x%02x\n", (int)cb->nsp_flags);
+-
+- if (cb->nsp_flags & 0x83)
+- goto free_out;
+-
+- /*
+- * Filter out conninits and useless packet types
+- */
+- if ((cb->nsp_flags & 0x0c) == 0x08) {
+- switch (cb->nsp_flags & 0x70) {
+- case 0x00: /* NOP */
+- case 0x70: /* Reserved */
+- case 0x50: /* Reserved, Phase II node init */
+- goto free_out;
+- case 0x10:
+- case 0x60:
+- if (unlikely(cb->rt_flags & DN_RT_F_RTS))
+- goto free_out;
+- sk = dn_find_listener(skb, &reason);
+- goto got_it;
+- }
+- }
+-
+- if (!pskb_may_pull(skb, 3))
+- goto free_out;
+-
+- /*
+- * Grab the destination address.
+- */
+- cb->dst_port = *(__le16 *)ptr;
+- cb->src_port = 0;
+- ptr += 2;
+-
+- /*
+- * If not a connack, grab the source address too.
+- */
+- if (pskb_may_pull(skb, 5)) {
+- cb->src_port = *(__le16 *)ptr;
+- ptr += 2;
+- skb_pull(skb, 5);
+- }
+-
+- /*
+- * Returned packets...
+- * Swap src & dst and look up in the normal way.
+- */
+- if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
+- swap(cb->dst_port, cb->src_port);
+- swap(cb->dst, cb->src);
+- }
+-
+- /*
+- * Find the socket to which this skb is destined.
+- */
+- sk = dn_find_by_skb(skb);
+-got_it:
+- if (sk != NULL) {
+- struct dn_scp *scp = DN_SK(sk);
+-
+- /* Reset backoff */
+- scp->nsp_rxtshift = 0;
+-
+- /*
+- * We linearize everything except data segments here.
+- */
+- if (cb->nsp_flags & ~0x60) {
+- if (unlikely(skb_linearize(skb)))
+- goto free_out;
+- }
+-
+- return sk_receive_skb(sk, skb, 0);
+- }
+-
+- return dn_nsp_no_socket(skb, reason);
+-
+-free_out:
+- kfree_skb(skb);
+- return NET_RX_DROP;
+-}
+-
+-int dn_nsp_rx(struct sk_buff *skb)
+-{
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
+- &init_net, NULL, skb, skb->dev, NULL,
+- dn_nsp_rx_packet);
+-}
+-
+-/*
+- * This is the main receive routine for sockets. It is called
+- * from the above when the socket is not busy, and also from
+- * sock_release() when there is a backlog queued up.
+- */
+-int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+-
+- if (cb->rt_flags & DN_RT_F_RTS) {
+- if (cb->nsp_flags == 0x18 || cb->nsp_flags == 0x68)
+- dn_returned_conn_init(sk, skb);
+- else
+- kfree_skb(skb);
+- return NET_RX_SUCCESS;
+- }
+-
+- /*
+- * Control packet.
+- */
+- if ((cb->nsp_flags & 0x0c) == 0x08) {
+- switch (cb->nsp_flags & 0x70) {
+- case 0x10:
+- case 0x60:
+- dn_nsp_conn_init(sk, skb);
+- break;
+- case 0x20:
+- dn_nsp_conn_conf(sk, skb);
+- break;
+- case 0x30:
+- dn_nsp_disc_init(sk, skb);
+- break;
+- case 0x40:
+- dn_nsp_disc_conf(sk, skb);
+- break;
+- }
+-
+- } else if (cb->nsp_flags == 0x24) {
+- /*
+- * Special for connacks, 'cos they don't have
+- * ack data or ack otherdata info.
+- */
+- dn_nsp_conn_ack(sk, skb);
+- } else {
+- int other = 1;
+-
+- /* both data and ack frames can kick a CC socket into RUN */
+- if ((scp->state == DN_CC) && !sock_flag(sk, SOCK_DEAD)) {
+- scp->state = DN_RUN;
+- sk->sk_state = TCP_ESTABLISHED;
+- sk->sk_state_change(sk);
+- }
+-
+- if ((cb->nsp_flags & 0x1c) == 0)
+- other = 0;
+- if (cb->nsp_flags == 0x04)
+- other = 0;
+-
+- /*
+- * Read out ack data here, this applies equally
+- * to data, other data, link service and both
+- * ack data and ack otherdata.
+- */
+- dn_process_ack(sk, skb, other);
+-
+- /*
+- * If we've some sort of data here then call a
+- * suitable routine for dealing with it, otherwise
+- * the packet is an ack and can be discarded.
+- */
+- if ((cb->nsp_flags & 0x0c) == 0) {
+-
+- if (scp->state != DN_RUN)
+- goto free_out;
+-
+- switch (cb->nsp_flags) {
+- case 0x10: /* LS */
+- dn_nsp_linkservice(sk, skb);
+- break;
+- case 0x30: /* OD */
+- dn_nsp_otherdata(sk, skb);
+- break;
+- default:
+- dn_nsp_data(sk, skb);
+- }
+-
+- } else { /* Ack, chuck it out here */
+-free_out:
+- kfree_skb(skb);
+- }
+- }
+-
+- return NET_RX_SUCCESS;
+-}
+diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
+deleted file mode 100644
+index eadc895831689..0000000000000
+--- a/net/decnet/dn_nsp_out.c
++++ /dev/null
+@@ -1,695 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Network Services Protocol (Output)
+- *
+- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
+- *
+- * Changes:
+- *
+- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
+- * original dn_nsp.c.
+- * Steve Whitehouse: Updated to work with my new routing architecture.
+- * Steve Whitehouse: Added changes from Eduardo Serrat's patches.
+- * Steve Whitehouse: Now conninits have the "return" bit set.
+- * Steve Whitehouse: Fixes to check alloc'd skbs are non NULL!
+- * Moved output state machine into one function
+- * Steve Whitehouse: New output state machine
+- * Paul Koning: Connect Confirm message fix.
+- * Eduardo Serrat: Fix to stop dn_nsp_do_disc() sending malformed packets.
+- * Steve Whitehouse: dn_nsp_output() and friends needed a spring clean
+- * Steve Whitehouse: Moved dn_nsp_send() in here from route.h
+- */
+-
+-/******************************************************************************
+- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
+-
+-*******************************************************************************/
+-
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/kernel.h>
+-#include <linux/timer.h>
+-#include <linux/string.h>
+-#include <linux/sockios.h>
+-#include <linux/net.h>
+-#include <linux/netdevice.h>
+-#include <linux/inet.h>
+-#include <linux/route.h>
+-#include <linux/slab.h>
+-#include <net/sock.h>
+-#include <linux/fcntl.h>
+-#include <linux/mm.h>
+-#include <linux/termios.h>
+-#include <linux/interrupt.h>
+-#include <linux/proc_fs.h>
+-#include <linux/stat.h>
+-#include <linux/init.h>
+-#include <linux/poll.h>
+-#include <linux/if_packet.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-#include <net/dn.h>
+-#include <net/dn_nsp.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_route.h>
+-
+-
+-static int nsp_backoff[NSP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+-
+-static void dn_nsp_send(struct sk_buff *skb)
+-{
+- struct sock *sk = skb->sk;
+- struct dn_scp *scp = DN_SK(sk);
+- struct dst_entry *dst;
+- struct flowidn fld;
+-
+- skb_reset_transport_header(skb);
+- scp->stamp = jiffies;
+-
+- dst = sk_dst_check(sk, 0);
+- if (dst) {
+-try_again:
+- skb_dst_set(skb, dst);
+- dst_output(&init_net, skb->sk, skb);
+- return;
+- }
+-
+- memset(&fld, 0, sizeof(fld));
+- fld.flowidn_oif = sk->sk_bound_dev_if;
+- fld.saddr = dn_saddr2dn(&scp->addr);
+- fld.daddr = dn_saddr2dn(&scp->peer);
+- dn_sk_ports_copy(&fld, scp);
+- fld.flowidn_proto = DNPROTO_NSP;
+- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) {
+- dst = sk_dst_get(sk);
+- sk->sk_route_caps = dst->dev->features;
+- goto try_again;
+- }
+-
+- sk->sk_err = EHOSTUNREACH;
+- if (!sock_flag(sk, SOCK_DEAD))
+- sk->sk_state_change(sk);
+-}
+-
+-
+-/*
+- * If sk == NULL, then we assume that we are supposed to be making
+- * a routing layer skb. If sk != NULL, then we are supposed to be
+- * creating an skb for the NSP layer.
+- *
+- * The eventual aim is for each socket to have a cached header size
+- * for its outgoing packets, and to set hdr from this when sk != NULL.
+- */
+-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
+-{
+- struct sk_buff *skb;
+- int hdr = 64;
+-
+- if ((skb = alloc_skb(size + hdr, pri)) == NULL)
+- return NULL;
+-
+- skb->protocol = htons(ETH_P_DNA_RT);
+- skb->pkt_type = PACKET_OUTGOING;
+-
+- if (sk)
+- skb_set_owner_w(skb, sk);
+-
+- skb_reserve(skb, hdr);
+-
+- return skb;
+-}
+-
+-/*
+- * Calculate persist timer based upon the smoothed round
+- * trip time and the variance. Backoff according to the
+- * nsp_backoff[] array.
+- */
+-unsigned long dn_nsp_persist(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
+-
+- t *= nsp_backoff[scp->nsp_rxtshift];
+-
+- if (t < HZ) t = HZ;
+- if (t > (600*HZ)) t = (600*HZ);
+-
+- if (scp->nsp_rxtshift < NSP_MAXRXTSHIFT)
+- scp->nsp_rxtshift++;
+-
+- /* printk(KERN_DEBUG "rxtshift %lu, t=%lu\n", scp->nsp_rxtshift, t); */
+-
+- return t;
+-}
+-
+-/*
+- * This is called each time we get an estimate for the rtt
+- * on the link.
+- */
+-static void dn_nsp_rtt(struct sock *sk, long rtt)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- long srtt = (long)scp->nsp_srtt;
+- long rttvar = (long)scp->nsp_rttvar;
+- long delta;
+-
+- /*
+- * If the jiffies clock flips over in the middle of timestamp
+- * gathering this value might turn out negative, so we make sure
+- * that is it always positive here.
+- */
+- if (rtt < 0)
+- rtt = -rtt;
+- /*
+- * Add new rtt to smoothed average
+- */
+- delta = ((rtt << 3) - srtt);
+- srtt += (delta >> 3);
+- if (srtt >= 1)
+- scp->nsp_srtt = (unsigned long)srtt;
+- else
+- scp->nsp_srtt = 1;
+-
+- /*
+- * Add new rtt variance to smoothed varience
+- */
+- delta >>= 1;
+- rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2);
+- if (rttvar >= 1)
+- scp->nsp_rttvar = (unsigned long)rttvar;
+- else
+- scp->nsp_rttvar = 1;
+-
+- /* printk(KERN_DEBUG "srtt=%lu rttvar=%lu\n", scp->nsp_srtt, scp->nsp_rttvar); */
+-}
+-
+-/**
+- * dn_nsp_clone_and_send - Send a data packet by cloning it
+- * @skb: The packet to clone and transmit
+- * @gfp: memory allocation flag
+- *
+- * Clone a queued data or other data packet and transmit it.
+- *
+- * Returns: The number of times the packet has been sent previously
+- */
+-static inline unsigned int dn_nsp_clone_and_send(struct sk_buff *skb,
+- gfp_t gfp)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct sk_buff *skb2;
+- int ret = 0;
+-
+- if ((skb2 = skb_clone(skb, gfp)) != NULL) {
+- ret = cb->xmit_count;
+- cb->xmit_count++;
+- cb->stamp = jiffies;
+- skb2->sk = skb->sk;
+- dn_nsp_send(skb2);
+- }
+-
+- return ret;
+-}
+-
+-/**
+- * dn_nsp_output - Try and send something from socket queues
+- * @sk: The socket whose queues are to be investigated
+- *
+- * Try and send the packet on the end of the data and other data queues.
+- * Other data gets priority over data, and if we retransmit a packet we
+- * reduce the window by dividing it in two.
+- *
+- */
+-void dn_nsp_output(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct sk_buff *skb;
+- unsigned int reduce_win = 0;
+-
+- /*
+- * First we check for otherdata/linkservice messages
+- */
+- if ((skb = skb_peek(&scp->other_xmit_queue)) != NULL)
+- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
+-
+- /*
+- * If we may not send any data, we don't.
+- * If we are still trying to get some other data down the
+- * channel, we don't try and send any data.
+- */
+- if (reduce_win || (scp->flowrem_sw != DN_SEND))
+- goto recalc_window;
+-
+- if ((skb = skb_peek(&scp->data_xmit_queue)) != NULL)
+- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
+-
+- /*
+- * If we've sent any frame more than once, we cut the
+- * send window size in half. There is always a minimum
+- * window size of one available.
+- */
+-recalc_window:
+- if (reduce_win) {
+- scp->snd_window >>= 1;
+- if (scp->snd_window < NSP_MIN_WINDOW)
+- scp->snd_window = NSP_MIN_WINDOW;
+- }
+-}
+-
+-int dn_nsp_xmit_timeout(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- dn_nsp_output(sk);
+-
+- if (!skb_queue_empty(&scp->data_xmit_queue) ||
+- !skb_queue_empty(&scp->other_xmit_queue))
+- scp->persist = dn_nsp_persist(sk);
+-
+- return 0;
+-}
+-
+-static inline __le16 *dn_mk_common_header(struct dn_scp *scp, struct sk_buff *skb, unsigned char msgflag, int len)
+-{
+- unsigned char *ptr = skb_push(skb, len);
+-
+- BUG_ON(len < 5);
+-
+- *ptr++ = msgflag;
+- *((__le16 *)ptr) = scp->addrrem;
+- ptr += 2;
+- *((__le16 *)ptr) = scp->addrloc;
+- ptr += 2;
+- return (__le16 __force *)ptr;
+-}
+-
+-static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned char msgflag, int hlen, int other)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- unsigned short acknum = scp->numdat_rcv & 0x0FFF;
+- unsigned short ackcrs = scp->numoth_rcv & 0x0FFF;
+- __le16 *ptr;
+-
+- BUG_ON(hlen < 9);
+-
+- scp->ackxmt_dat = acknum;
+- scp->ackxmt_oth = ackcrs;
+- acknum |= 0x8000;
+- ackcrs |= 0x8000;
+-
+- /* If this is an "other data/ack" message, swap acknum and ackcrs */
+- if (other)
+- swap(acknum, ackcrs);
+-
+- /* Set "cross subchannel" bit in ackcrs */
+- ackcrs |= 0x2000;
+-
+- ptr = dn_mk_common_header(scp, skb, msgflag, hlen);
+-
+- *ptr++ = cpu_to_le16(acknum);
+- *ptr++ = cpu_to_le16(ackcrs);
+-
+- return ptr;
+-}
+-
+-static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int oth)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- __le16 *ptr = dn_mk_ack_header(sk, skb, cb->nsp_flags, 11, oth);
+-
+- if (unlikely(oth)) {
+- cb->segnum = scp->numoth;
+- seq_add(&scp->numoth, 1);
+- } else {
+- cb->segnum = scp->numdat;
+- seq_add(&scp->numdat, 1);
+- }
+- *(ptr++) = cpu_to_le16(cb->segnum);
+-
+- return ptr;
+-}
+-
+-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb,
+- gfp_t gfp, int oth)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
+-
+- cb->xmit_count = 0;
+- dn_nsp_mk_data_header(sk, skb, oth);
+-
+- /*
+- * Slow start: If we have been idle for more than
+- * one RTT, then reset window to min size.
+- */
+- if ((jiffies - scp->stamp) > t)
+- scp->snd_window = NSP_MIN_WINDOW;
+-
+- if (oth)
+- skb_queue_tail(&scp->other_xmit_queue, skb);
+- else
+- skb_queue_tail(&scp->data_xmit_queue, skb);
+-
+- if (scp->flowrem_sw != DN_SEND)
+- return;
+-
+- dn_nsp_clone_and_send(skb, gfp);
+-}
+-
+-
+-int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct dn_scp *scp = DN_SK(sk);
+- struct sk_buff *skb2, *n, *ack = NULL;
+- int wakeup = 0;
+- int try_retrans = 0;
+- unsigned long reftime = cb->stamp;
+- unsigned long pkttime;
+- unsigned short xmit_count;
+- unsigned short segnum;
+-
+- skb_queue_walk_safe(q, skb2, n) {
+- struct dn_skb_cb *cb2 = DN_SKB_CB(skb2);
+-
+- if (dn_before_or_equal(cb2->segnum, acknum))
+- ack = skb2;
+-
+- /* printk(KERN_DEBUG "ack: %s %04x %04x\n", ack ? "ACK" : "SKIP", (int)cb2->segnum, (int)acknum); */
+-
+- if (ack == NULL)
+- continue;
+-
+- /* printk(KERN_DEBUG "check_xmit_queue: %04x, %d\n", acknum, cb2->xmit_count); */
+-
+- /* Does _last_ packet acked have xmit_count > 1 */
+- try_retrans = 0;
+- /* Remember to wake up the sending process */
+- wakeup = 1;
+- /* Keep various statistics */
+- pkttime = cb2->stamp;
+- xmit_count = cb2->xmit_count;
+- segnum = cb2->segnum;
+- /* Remove and drop ack'ed packet */
+- skb_unlink(ack, q);
+- kfree_skb(ack);
+- ack = NULL;
+-
+- /*
+- * We don't expect to see acknowledgements for packets we
+- * haven't sent yet.
+- */
+- WARN_ON(xmit_count == 0);
+-
+- /*
+- * If the packet has only been sent once, we can use it
+- * to calculate the RTT and also open the window a little
+- * further.
+- */
+- if (xmit_count == 1) {
+- if (dn_equal(segnum, acknum))
+- dn_nsp_rtt(sk, (long)(pkttime - reftime));
+-
+- if (scp->snd_window < scp->max_window)
+- scp->snd_window++;
+- }
+-
+- /*
+- * Packet has been sent more than once. If this is the last
+- * packet to be acknowledged then we want to send the next
+- * packet in the send queue again (assumes the remote host does
+- * go-back-N error control).
+- */
+- if (xmit_count > 1)
+- try_retrans = 1;
+- }
+-
+- if (try_retrans)
+- dn_nsp_output(sk);
+-
+- return wakeup;
+-}
+-
+-void dn_nsp_send_data_ack(struct sock *sk)
+-{
+- struct sk_buff *skb = NULL;
+-
+- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
+- return;
+-
+- skb_reserve(skb, 9);
+- dn_mk_ack_header(sk, skb, 0x04, 9, 0);
+- dn_nsp_send(skb);
+-}
+-
+-void dn_nsp_send_oth_ack(struct sock *sk)
+-{
+- struct sk_buff *skb = NULL;
+-
+- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
+- return;
+-
+- skb_reserve(skb, 9);
+- dn_mk_ack_header(sk, skb, 0x14, 9, 1);
+- dn_nsp_send(skb);
+-}
+-
+-
+-void dn_send_conn_ack (struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct sk_buff *skb = NULL;
+- struct nsp_conn_ack_msg *msg;
+-
+- if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL)
+- return;
+-
+- msg = skb_put(skb, 3);
+- msg->msgflg = 0x24;
+- msg->dstaddr = scp->addrrem;
+-
+- dn_nsp_send(skb);
+-}
+-
+-static int dn_nsp_retrans_conn_conf(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- if (scp->state == DN_CC)
+- dn_send_conn_conf(sk, GFP_ATOMIC);
+-
+- return 0;
+-}
+-
+-void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct sk_buff *skb = NULL;
+- struct nsp_conn_init_msg *msg;
+- __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
+-
+- if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL)
+- return;
+-
+- msg = skb_put(skb, sizeof(*msg));
+- msg->msgflg = 0x28;
+- msg->dstaddr = scp->addrrem;
+- msg->srcaddr = scp->addrloc;
+- msg->services = scp->services_loc;
+- msg->info = scp->info_loc;
+- msg->segsize = cpu_to_le16(scp->segsize_loc);
+-
+- skb_put_u8(skb, len);
+-
+- if (len > 0)
+- skb_put_data(skb, scp->conndata_out.opt_data, len);
+-
+-
+- dn_nsp_send(skb);
+-
+- scp->persist = dn_nsp_persist(sk);
+- scp->persist_fxn = dn_nsp_retrans_conn_conf;
+-}
+-
+-
+-static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
+- unsigned short reason, gfp_t gfp,
+- struct dst_entry *dst,
+- int ddl, unsigned char *dd, __le16 rem, __le16 loc)
+-{
+- struct sk_buff *skb = NULL;
+- int size = 7 + ddl + ((msgflg == NSP_DISCINIT) ? 1 : 0);
+- unsigned char *msg;
+-
+- if ((dst == NULL) || (rem == 0)) {
+- net_dbg_ratelimited("DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n",
+- le16_to_cpu(rem), dst);
+- return;
+- }
+-
+- if ((skb = dn_alloc_skb(sk, size, gfp)) == NULL)
+- return;
+-
+- msg = skb_put(skb, size);
+- *msg++ = msgflg;
+- *(__le16 *)msg = rem;
+- msg += 2;
+- *(__le16 *)msg = loc;
+- msg += 2;
+- *(__le16 *)msg = cpu_to_le16(reason);
+- msg += 2;
+- if (msgflg == NSP_DISCINIT)
+- *msg++ = ddl;
+-
+- if (ddl) {
+- memcpy(msg, dd, ddl);
+- }
+-
+- /*
+- * This doesn't go via the dn_nsp_send() function since we need
+- * to be able to send disc packets out which have no socket
+- * associations.
+- */
+- skb_dst_set(skb, dst_clone(dst));
+- dst_output(&init_net, skb->sk, skb);
+-}
+-
+-
+-void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg,
+- unsigned short reason, gfp_t gfp)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- int ddl = 0;
+-
+- if (msgflg == NSP_DISCINIT)
+- ddl = le16_to_cpu(scp->discdata_out.opt_optl);
+-
+- if (reason == 0)
+- reason = le16_to_cpu(scp->discdata_out.opt_status);
+-
+- dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl,
+- scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
+-}
+-
+-
+-void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg,
+- unsigned short reason)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- int ddl = 0;
+- gfp_t gfp = GFP_ATOMIC;
+-
+- dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl,
+- NULL, cb->src_port, cb->dst_port);
+-}
+-
+-
+-void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct sk_buff *skb;
+- unsigned char *ptr;
+- gfp_t gfp = GFP_ATOMIC;
+-
+- if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL)
+- return;
+-
+- skb_reserve(skb, DN_MAX_NSP_DATA_HEADER);
+- ptr = skb_put(skb, 2);
+- DN_SKB_CB(skb)->nsp_flags = 0x10;
+- *ptr++ = lsflags;
+- *ptr = fcval;
+-
+- dn_nsp_queue_xmit(sk, skb, gfp, 1);
+-
+- scp->persist = dn_nsp_persist(sk);
+- scp->persist_fxn = dn_nsp_xmit_timeout;
+-}
+-
+-static int dn_nsp_retrans_conninit(struct sock *sk)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+-
+- if (scp->state == DN_CI)
+- dn_nsp_send_conninit(sk, NSP_RCI);
+-
+- return 0;
+-}
+-
+-void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
+-{
+- struct dn_scp *scp = DN_SK(sk);
+- struct nsp_conn_init_msg *msg;
+- unsigned char aux;
+- unsigned char menuver;
+- struct dn_skb_cb *cb;
+- unsigned char type = 1;
+- gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
+- struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation);
+-
+- if (!skb)
+- return;
+-
+- cb = DN_SKB_CB(skb);
+- msg = skb_put(skb, sizeof(*msg));
+-
+- msg->msgflg = msgflg;
+- msg->dstaddr = 0x0000; /* Remote Node will assign it*/
+-
+- msg->srcaddr = scp->addrloc;
+- msg->services = scp->services_loc; /* Requested flow control */
+- msg->info = scp->info_loc; /* Version Number */
+- msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */
+-
+- if (scp->peer.sdn_objnum)
+- type = 0;
+-
+- skb_put(skb, dn_sockaddr2username(&scp->peer,
+- skb_tail_pointer(skb), type));
+- skb_put(skb, dn_sockaddr2username(&scp->addr,
+- skb_tail_pointer(skb), 2));
+-
+- menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
+- if (scp->peer.sdn_flags & SDF_PROXY)
+- menuver |= DN_MENUVER_PRX;
+- if (scp->peer.sdn_flags & SDF_UICPROXY)
+- menuver |= DN_MENUVER_UIC;
+-
+- skb_put_u8(skb, menuver); /* Menu Version */
+-
+- aux = scp->accessdata.acc_userl;
+- skb_put_u8(skb, aux);
+- if (aux > 0)
+- skb_put_data(skb, scp->accessdata.acc_user, aux);
+-
+- aux = scp->accessdata.acc_passl;
+- skb_put_u8(skb, aux);
+- if (aux > 0)
+- skb_put_data(skb, scp->accessdata.acc_pass, aux);
+-
+- aux = scp->accessdata.acc_accl;
+- skb_put_u8(skb, aux);
+- if (aux > 0)
+- skb_put_data(skb, scp->accessdata.acc_acc, aux);
+-
+- aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
+- skb_put_u8(skb, aux);
+- if (aux > 0)
+- skb_put_data(skb, scp->conndata_out.opt_data, aux);
+-
+- scp->persist = dn_nsp_persist(sk);
+- scp->persist_fxn = dn_nsp_retrans_conninit;
+-
+- cb->rt_flags = DN_RT_F_RQR;
+-
+- dn_nsp_send(skb);
+-}
+diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
+deleted file mode 100644
+index 7e85f2a1ae254..0000000000000
+--- a/net/decnet/dn_route.c
++++ /dev/null
+@@ -1,1922 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Routing Functions (Endnode and Router)
+- *
+- * Authors: Steve Whitehouse <SteveW@ACM.org>
+- * Eduardo Marcelo Serrat <emserrat@geocities.com>
+- *
+- * Changes:
+- * Steve Whitehouse : Fixes to allow "intra-ethernet" and
+- * "return-to-sender" bits on outgoing
+- * packets.
+- * Steve Whitehouse : Timeouts for cached routes.
+- * Steve Whitehouse : Use dst cache for input routes too.
+- * Steve Whitehouse : Fixed error values in dn_send_skb.
+- * Steve Whitehouse : Rework routing functions to better fit
+- * DECnet routing design
+- * Alexey Kuznetsov : New SMP locking
+- * Steve Whitehouse : More SMP locking changes & dn_cache_dump()
+- * Steve Whitehouse : Prerouting NF hook, now really is prerouting.
+- * Fixed possible skb leak in rtnetlink funcs.
+- * Steve Whitehouse : Dave Miller's dynamic hash table sizing and
+- * Alexey Kuznetsov's finer grained locking
+- * from ipv4/route.c.
+- * Steve Whitehouse : Routing is now starting to look like a
+- * sensible set of code now, mainly due to
+- * my copying the IPv4 routing code. The
+- * hooks here are modified and will continue
+- * to evolve for a while.
+- * Steve Whitehouse : Real SMP at last :-) Also new netfilter
+- * stuff. Look out raw sockets your days
+- * are numbered!
+- * Steve Whitehouse : Added return-to-sender functions. Added
+- * backlog congestion level return codes.
+- * Steve Whitehouse : Fixed bug where routes were set up with
+- * no ref count on net devices.
+- * Steve Whitehouse : RCU for the route cache
+- * Steve Whitehouse : Preparations for the flow cache
+- * Steve Whitehouse : Prepare for nonlinear skbs
+- */
+-
+-/******************************************************************************
+- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
+-
+-*******************************************************************************/
+-
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/kernel.h>
+-#include <linux/sockios.h>
+-#include <linux/net.h>
+-#include <linux/netdevice.h>
+-#include <linux/inet.h>
+-#include <linux/route.h>
+-#include <linux/in_route.h>
+-#include <linux/slab.h>
+-#include <net/sock.h>
+-#include <linux/mm.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/init.h>
+-#include <linux/rtnetlink.h>
+-#include <linux/string.h>
+-#include <linux/netfilter_decnet.h>
+-#include <linux/rcupdate.h>
+-#include <linux/times.h>
+-#include <linux/export.h>
+-#include <asm/errno.h>
+-#include <net/net_namespace.h>
+-#include <net/netlink.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-#include <net/fib_rules.h>
+-#include <net/dn.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_nsp.h>
+-#include <net/dn_route.h>
+-#include <net/dn_neigh.h>
+-#include <net/dn_fib.h>
+-
+-struct dn_rt_hash_bucket {
+- struct dn_route __rcu *chain;
+- spinlock_t lock;
+-};
+-
+-extern struct neigh_table dn_neigh_table;
+-
+-
+-static unsigned char dn_hiord_addr[6] = {0xAA, 0x00, 0x04, 0x00, 0x00, 0x00};
+-
+-static const int dn_rt_min_delay = 2 * HZ;
+-static const int dn_rt_max_delay = 10 * HZ;
+-static const int dn_rt_mtu_expires = 10 * 60 * HZ;
+-
+-static unsigned long dn_rt_deadline;
+-
+-static int dn_dst_gc(struct dst_ops *ops);
+-static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
+-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
+-static unsigned int dn_dst_mtu(const struct dst_entry *dst);
+-static void dn_dst_destroy(struct dst_entry *);
+-static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
+-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
+-static void dn_dst_link_failure(struct sk_buff *);
+-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb , u32 mtu,
+- bool confirm_neigh);
+-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb);
+-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+- struct sk_buff *skb,
+- const void *daddr);
+-static int dn_route_input(struct sk_buff *);
+-static void dn_run_flush(struct timer_list *unused);
+-
+-static struct dn_rt_hash_bucket *dn_rt_hash_table;
+-static unsigned int dn_rt_hash_mask;
+-
+-static struct timer_list dn_route_timer;
+-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
+-int decnet_dst_gc_interval = 2;
+-
+-static struct dst_ops dn_dst_ops = {
+- .family = PF_DECnet,
+- .gc_thresh = 128,
+- .gc = dn_dst_gc,
+- .check = dn_dst_check,
+- .default_advmss = dn_dst_default_advmss,
+- .mtu = dn_dst_mtu,
+- .cow_metrics = dst_cow_metrics_generic,
+- .destroy = dn_dst_destroy,
+- .ifdown = dn_dst_ifdown,
+- .negative_advice = dn_dst_negative_advice,
+- .link_failure = dn_dst_link_failure,
+- .update_pmtu = dn_dst_update_pmtu,
+- .redirect = dn_dst_redirect,
+- .neigh_lookup = dn_dst_neigh_lookup,
+-};
+-
+-static void dn_dst_destroy(struct dst_entry *dst)
+-{
+- struct dn_route *rt = (struct dn_route *) dst;
+-
+- if (rt->n)
+- neigh_release(rt->n);
+- dst_destroy_metrics_generic(dst);
+-}
+-
+-static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how)
+-{
+- if (how) {
+- struct dn_route *rt = (struct dn_route *) dst;
+- struct neighbour *n = rt->n;
+-
+- if (n && n->dev == dev) {
+- n->dev = dev_net(dev)->loopback_dev;
+- dev_hold(n->dev);
+- dev_put(dev);
+- }
+- }
+-}
+-
+-static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
+-{
+- __u16 tmp = (__u16 __force)(src ^ dst);
+- tmp ^= (tmp >> 3);
+- tmp ^= (tmp >> 5);
+- tmp ^= (tmp >> 10);
+- return dn_rt_hash_mask & (unsigned int)tmp;
+-}
+-
+-static void dn_dst_check_expire(struct timer_list *unused)
+-{
+- int i;
+- struct dn_route *rt;
+- struct dn_route __rcu **rtp;
+- unsigned long now = jiffies;
+- unsigned long expire = 120 * HZ;
+-
+- for (i = 0; i <= dn_rt_hash_mask; i++) {
+- rtp = &dn_rt_hash_table[i].chain;
+-
+- spin_lock(&dn_rt_hash_table[i].lock);
+- while ((rt = rcu_dereference_protected(*rtp,
+- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
+- if (atomic_read(&rt->dst.__refcnt) > 1 ||
+- (now - rt->dst.lastuse) < expire) {
+- rtp = &rt->dn_next;
+- continue;
+- }
+- *rtp = rt->dn_next;
+- rt->dn_next = NULL;
+- dst_dev_put(&rt->dst);
+- dst_release(&rt->dst);
+- }
+- spin_unlock(&dn_rt_hash_table[i].lock);
+-
+- if ((jiffies - now) > 0)
+- break;
+- }
+-
+- mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
+-}
+-
+-static int dn_dst_gc(struct dst_ops *ops)
+-{
+- struct dn_route *rt;
+- struct dn_route __rcu **rtp;
+- int i;
+- unsigned long now = jiffies;
+- unsigned long expire = 10 * HZ;
+-
+- for (i = 0; i <= dn_rt_hash_mask; i++) {
+-
+- spin_lock_bh(&dn_rt_hash_table[i].lock);
+- rtp = &dn_rt_hash_table[i].chain;
+-
+- while ((rt = rcu_dereference_protected(*rtp,
+- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
+- if (atomic_read(&rt->dst.__refcnt) > 1 ||
+- (now - rt->dst.lastuse) < expire) {
+- rtp = &rt->dn_next;
+- continue;
+- }
+- *rtp = rt->dn_next;
+- rt->dn_next = NULL;
+- dst_dev_put(&rt->dst);
+- dst_release(&rt->dst);
+- break;
+- }
+- spin_unlock_bh(&dn_rt_hash_table[i].lock);
+- }
+-
+- return 0;
+-}
+-
+-/*
+- * The decnet standards don't impose a particular minimum mtu, what they
+- * do insist on is that the routing layer accepts a datagram of at least
+- * 230 bytes long. Here we have to subtract the routing header length from
+- * 230 to get the minimum acceptable mtu. If there is no neighbour, then we
+- * assume the worst and use a long header size.
+- *
+- * We update both the mtu and the advertised mss (i.e. the segment size we
+- * advertise to the other end).
+- */
+-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu,
+- bool confirm_neigh)
+-{
+- struct dn_route *rt = (struct dn_route *) dst;
+- struct neighbour *n = rt->n;
+- u32 min_mtu = 230;
+- struct dn_dev *dn;
+-
+- dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL;
+-
+- if (dn && dn->use_long == 0)
+- min_mtu -= 6;
+- else
+- min_mtu -= 21;
+-
+- if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
+- if (!(dst_metric_locked(dst, RTAX_MTU))) {
+- dst_metric_set(dst, RTAX_MTU, mtu);
+- dst_set_expires(dst, dn_rt_mtu_expires);
+- }
+- if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
+- u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
+- u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS);
+- if (!existing_mss || existing_mss > mss)
+- dst_metric_set(dst, RTAX_ADVMSS, mss);
+- }
+- }
+-}
+-
+-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb)
+-{
+-}
+-
+-/*
+- * When a route has been marked obsolete. (e.g. routing cache flush)
+- */
+-static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie)
+-{
+- return NULL;
+-}
+-
+-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
+-{
+- dst_release(dst);
+- return NULL;
+-}
+-
+-static void dn_dst_link_failure(struct sk_buff *skb)
+-{
+-}
+-
+-static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2)
+-{
+- return ((fl1->daddr ^ fl2->daddr) |
+- (fl1->saddr ^ fl2->saddr) |
+- (fl1->flowidn_mark ^ fl2->flowidn_mark) |
+- (fl1->flowidn_scope ^ fl2->flowidn_scope) |
+- (fl1->flowidn_oif ^ fl2->flowidn_oif) |
+- (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0;
+-}
+-
+-static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_route **rp)
+-{
+- struct dn_route *rth;
+- struct dn_route __rcu **rthp;
+- unsigned long now = jiffies;
+-
+- rthp = &dn_rt_hash_table[hash].chain;
+-
+- spin_lock_bh(&dn_rt_hash_table[hash].lock);
+- while ((rth = rcu_dereference_protected(*rthp,
+- lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
+- if (compare_keys(&rth->fld, &rt->fld)) {
+- /* Put it first */
+- *rthp = rth->dn_next;
+- rcu_assign_pointer(rth->dn_next,
+- dn_rt_hash_table[hash].chain);
+- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
+-
+- dst_hold_and_use(&rth->dst, now);
+- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
+-
+- dst_release_immediate(&rt->dst);
+- *rp = rth;
+- return 0;
+- }
+- rthp = &rth->dn_next;
+- }
+-
+- rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
+- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
+-
+- dst_hold_and_use(&rt->dst, now);
+- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
+- *rp = rt;
+- return 0;
+-}
+-
+-static void dn_run_flush(struct timer_list *unused)
+-{
+- int i;
+- struct dn_route *rt, *next;
+-
+- for (i = 0; i < dn_rt_hash_mask; i++) {
+- spin_lock_bh(&dn_rt_hash_table[i].lock);
+-
+- rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL);
+- if (!rt)
+- goto nothing_to_declare;
+-
+- for (; rt; rt = next) {
+- next = rcu_dereference_raw(rt->dn_next);
+- RCU_INIT_POINTER(rt->dn_next, NULL);
+- dst_dev_put(&rt->dst);
+- dst_release(&rt->dst);
+- }
+-
+-nothing_to_declare:
+- spin_unlock_bh(&dn_rt_hash_table[i].lock);
+- }
+-}
+-
+-static DEFINE_SPINLOCK(dn_rt_flush_lock);
+-
+-void dn_rt_cache_flush(int delay)
+-{
+- unsigned long now = jiffies;
+- int user_mode = !in_interrupt();
+-
+- if (delay < 0)
+- delay = dn_rt_min_delay;
+-
+- spin_lock_bh(&dn_rt_flush_lock);
+-
+- if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) {
+- long tmo = (long)(dn_rt_deadline - now);
+-
+- if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay)
+- tmo = 0;
+-
+- if (delay > tmo)
+- delay = tmo;
+- }
+-
+- if (delay <= 0) {
+- spin_unlock_bh(&dn_rt_flush_lock);
+- dn_run_flush(NULL);
+- return;
+- }
+-
+- if (dn_rt_deadline == 0)
+- dn_rt_deadline = now + dn_rt_max_delay;
+-
+- dn_rt_flush_timer.expires = now + delay;
+- add_timer(&dn_rt_flush_timer);
+- spin_unlock_bh(&dn_rt_flush_lock);
+-}
+-
+-/**
+- * dn_return_short - Return a short packet to its sender
+- * @skb: The packet to return
+- *
+- */
+-static int dn_return_short(struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb;
+- unsigned char *ptr;
+- __le16 *src;
+- __le16 *dst;
+-
+- /* Add back headers */
+- skb_push(skb, skb->data - skb_network_header(skb));
+-
+- skb = skb_unshare(skb, GFP_ATOMIC);
+- if (!skb)
+- return NET_RX_DROP;
+-
+- cb = DN_SKB_CB(skb);
+- /* Skip packet length and point to flags */
+- ptr = skb->data + 2;
+- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
+-
+- dst = (__le16 *)ptr;
+- ptr += 2;
+- src = (__le16 *)ptr;
+- ptr += 2;
+- *ptr = 0; /* Zero hop count */
+-
+- swap(*src, *dst);
+-
+- skb->pkt_type = PACKET_OUTGOING;
+- dn_rt_finish_output(skb, NULL, NULL);
+- return NET_RX_SUCCESS;
+-}
+-
+-/**
+- * dn_return_long - Return a long packet to its sender
+- * @skb: The long format packet to return
+- *
+- */
+-static int dn_return_long(struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb;
+- unsigned char *ptr;
+- unsigned char *src_addr, *dst_addr;
+- unsigned char tmp[ETH_ALEN];
+-
+- /* Add back all headers */
+- skb_push(skb, skb->data - skb_network_header(skb));
+-
+- skb = skb_unshare(skb, GFP_ATOMIC);
+- if (!skb)
+- return NET_RX_DROP;
+-
+- cb = DN_SKB_CB(skb);
+- /* Ignore packet length and point to flags */
+- ptr = skb->data + 2;
+-
+- /* Skip padding */
+- if (*ptr & DN_RT_F_PF) {
+- char padlen = (*ptr & ~DN_RT_F_PF);
+- ptr += padlen;
+- }
+-
+- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
+- ptr += 2;
+- dst_addr = ptr;
+- ptr += 8;
+- src_addr = ptr;
+- ptr += 6;
+- *ptr = 0; /* Zero hop count */
+-
+- /* Swap source and destination */
+- memcpy(tmp, src_addr, ETH_ALEN);
+- memcpy(src_addr, dst_addr, ETH_ALEN);
+- memcpy(dst_addr, tmp, ETH_ALEN);
+-
+- skb->pkt_type = PACKET_OUTGOING;
+- dn_rt_finish_output(skb, dst_addr, src_addr);
+- return NET_RX_SUCCESS;
+-}
+-
+-/**
+- * dn_route_rx_packet - Try and find a route for an incoming packet
+- * @net: The applicable net namespace
+- * @sk: Socket packet transmitted on
+- * @skb: The packet to find a route for
+- *
+- * Returns: result of input function if route is found, error code otherwise
+- */
+-static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb;
+- int err;
+-
+- err = dn_route_input(skb);
+- if (err == 0)
+- return dst_input(skb);
+-
+- cb = DN_SKB_CB(skb);
+- if (decnet_debug_level & 4) {
+- char *devname = skb->dev ? skb->dev->name : "???";
+-
+- printk(KERN_DEBUG
+- "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
+- (int)cb->rt_flags, devname, skb->len,
+- le16_to_cpu(cb->src), le16_to_cpu(cb->dst),
+- err, skb->pkt_type);
+- }
+-
+- if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) {
+- switch (cb->rt_flags & DN_RT_PKT_MSK) {
+- case DN_RT_PKT_SHORT:
+- return dn_return_short(skb);
+- case DN_RT_PKT_LONG:
+- return dn_return_long(skb);
+- }
+- }
+-
+- kfree_skb(skb);
+- return NET_RX_DROP;
+-}
+-
+-static int dn_route_rx_long(struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- unsigned char *ptr = skb->data;
+-
+- if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */
+- goto drop_it;
+-
+- skb_pull(skb, 20);
+- skb_reset_transport_header(skb);
+-
+- /* Destination info */
+- ptr += 2;
+- cb->dst = dn_eth2dn(ptr);
+- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
+- goto drop_it;
+- ptr += 6;
+-
+-
+- /* Source info */
+- ptr += 2;
+- cb->src = dn_eth2dn(ptr);
+- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
+- goto drop_it;
+- ptr += 6;
+- /* Other junk */
+- ptr++;
+- cb->hops = *ptr++; /* Visit Count */
+-
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+- &init_net, NULL, skb, skb->dev, NULL,
+- dn_route_rx_packet);
+-
+-drop_it:
+- kfree_skb(skb);
+- return NET_RX_DROP;
+-}
+-
+-
+-
+-static int dn_route_rx_short(struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- unsigned char *ptr = skb->data;
+-
+- if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */
+- goto drop_it;
+-
+- skb_pull(skb, 5);
+- skb_reset_transport_header(skb);
+-
+- cb->dst = *(__le16 *)ptr;
+- ptr += 2;
+- cb->src = *(__le16 *)ptr;
+- ptr += 2;
+- cb->hops = *ptr & 0x3f;
+-
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+- &init_net, NULL, skb, skb->dev, NULL,
+- dn_route_rx_packet);
+-
+-drop_it:
+- kfree_skb(skb);
+- return NET_RX_DROP;
+-}
+-
+-static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- /*
+- * I know we drop the packet here, but that's considered success in
+- * this case
+- */
+- kfree_skb(skb);
+- return NET_RX_SUCCESS;
+-}
+-
+-static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- dn_dev_hello(skb);
+- dn_neigh_pointopoint_hello(skb);
+- return NET_RX_SUCCESS;
+-}
+-
+-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+-{
+- struct dn_skb_cb *cb;
+- unsigned char flags = 0;
+- __u16 len = le16_to_cpu(*(__le16 *)skb->data);
+- struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
+- unsigned char padlen = 0;
+-
+- if (!net_eq(dev_net(dev), &init_net))
+- goto dump_it;
+-
+- if (dn == NULL)
+- goto dump_it;
+-
+- skb = skb_share_check(skb, GFP_ATOMIC);
+- if (!skb)
+- goto out;
+-
+- if (!pskb_may_pull(skb, 3))
+- goto dump_it;
+-
+- skb_pull(skb, 2);
+-
+- if (len > skb->len)
+- goto dump_it;
+-
+- skb_trim(skb, len);
+-
+- flags = *skb->data;
+-
+- cb = DN_SKB_CB(skb);
+- cb->stamp = jiffies;
+- cb->iif = dev->ifindex;
+-
+- /*
+- * If we have padding, remove it.
+- */
+- if (flags & DN_RT_F_PF) {
+- padlen = flags & ~DN_RT_F_PF;
+- if (!pskb_may_pull(skb, padlen + 1))
+- goto dump_it;
+- skb_pull(skb, padlen);
+- flags = *skb->data;
+- }
+-
+- skb_reset_network_header(skb);
+-
+- /*
+- * Weed out future version DECnet
+- */
+- if (flags & DN_RT_F_VER)
+- goto dump_it;
+-
+- cb->rt_flags = flags;
+-
+- if (decnet_debug_level & 1)
+- printk(KERN_DEBUG
+- "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
+- (int)flags, dev->name, len, skb->len,
+- padlen);
+-
+- if (flags & DN_RT_PKT_CNTL) {
+- if (unlikely(skb_linearize(skb)))
+- goto dump_it;
+-
+- switch (flags & DN_RT_CNTL_MSK) {
+- case DN_RT_PKT_INIT:
+- dn_dev_init_pkt(skb);
+- break;
+- case DN_RT_PKT_VERI:
+- dn_dev_veri_pkt(skb);
+- break;
+- }
+-
+- if (dn->parms.state != DN_DEV_S_RU)
+- goto dump_it;
+-
+- switch (flags & DN_RT_CNTL_MSK) {
+- case DN_RT_PKT_HELO:
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+- &init_net, NULL, skb, skb->dev, NULL,
+- dn_route_ptp_hello);
+-
+- case DN_RT_PKT_L1RT:
+- case DN_RT_PKT_L2RT:
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
+- &init_net, NULL, skb, skb->dev, NULL,
+- dn_route_discard);
+- case DN_RT_PKT_ERTH:
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+- &init_net, NULL, skb, skb->dev, NULL,
+- dn_neigh_router_hello);
+-
+- case DN_RT_PKT_EEDH:
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+- &init_net, NULL, skb, skb->dev, NULL,
+- dn_neigh_endnode_hello);
+- }
+- } else {
+- if (dn->parms.state != DN_DEV_S_RU)
+- goto dump_it;
+-
+- skb_pull(skb, 1); /* Pull flags */
+-
+- switch (flags & DN_RT_PKT_MSK) {
+- case DN_RT_PKT_LONG:
+- return dn_route_rx_long(skb);
+- case DN_RT_PKT_SHORT:
+- return dn_route_rx_short(skb);
+- }
+- }
+-
+-dump_it:
+- kfree_skb(skb);
+-out:
+- return NET_RX_DROP;
+-}
+-
+-static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- struct dst_entry *dst = skb_dst(skb);
+- struct dn_route *rt = (struct dn_route *)dst;
+- struct net_device *dev = dst->dev;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+-
+- int err = -EINVAL;
+-
+- if (rt->n == NULL)
+- goto error;
+-
+- skb->dev = dev;
+-
+- cb->src = rt->rt_saddr;
+- cb->dst = rt->rt_daddr;
+-
+- /*
+- * Always set the Intra-Ethernet bit on all outgoing packets
+- * originated on this node. Only valid flag from upper layers
+- * is return-to-sender-requested. Set hop count to 0 too.
+- */
+- cb->rt_flags &= ~DN_RT_F_RQR;
+- cb->rt_flags |= DN_RT_F_IE;
+- cb->hops = 0;
+-
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
+- &init_net, sk, skb, NULL, dev,
+- dn_to_neigh_output);
+-
+-error:
+- net_dbg_ratelimited("dn_output: This should not happen\n");
+-
+- kfree_skb(skb);
+-
+- return err;
+-}
+-
+-static int dn_forward(struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct dst_entry *dst = skb_dst(skb);
+- struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
+- struct dn_route *rt;
+- int header_len;
+- struct net_device *dev = skb->dev;
+-
+- if (skb->pkt_type != PACKET_HOST)
+- goto drop;
+-
+- /* Ensure that we have enough space for headers */
+- rt = (struct dn_route *)skb_dst(skb);
+- header_len = dn_db->use_long ? 21 : 6;
+- if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
+- goto drop;
+-
+- /*
+- * Hop count exceeded.
+- */
+- if (++cb->hops > 30)
+- goto drop;
+-
+- skb->dev = rt->dst.dev;
+-
+- /*
+- * If packet goes out same interface it came in on, then set
+- * the Intra-Ethernet bit. This has no effect for short
+- * packets, so we don't need to test for them here.
+- */
+- cb->rt_flags &= ~DN_RT_F_IE;
+- if (rt->rt_flags & RTCF_DOREDIRECT)
+- cb->rt_flags |= DN_RT_F_IE;
+-
+- return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
+- &init_net, NULL, skb, dev, skb->dev,
+- dn_to_neigh_output);
+-
+-drop:
+- kfree_skb(skb);
+- return NET_RX_DROP;
+-}
+-
+-/*
+- * Used to catch bugs. This should never normally get
+- * called.
+- */
+-static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+-
+- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
+- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
+-
+- kfree_skb(skb);
+-
+- return NET_RX_DROP;
+-}
+-
+-static int dn_rt_bug(struct sk_buff *skb)
+-{
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+-
+- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
+- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
+-
+- kfree_skb(skb);
+-
+- return NET_RX_DROP;
+-}
+-
+-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
+-{
+- return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
+-}
+-
+-static unsigned int dn_dst_mtu(const struct dst_entry *dst)
+-{
+- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+-
+- return mtu ? : dst->dev->mtu;
+-}
+-
+-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+- struct sk_buff *skb,
+- const void *daddr)
+-{
+- return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev);
+-}
+-
+-static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
+-{
+- struct dn_fib_info *fi = res->fi;
+- struct net_device *dev = rt->dst.dev;
+- unsigned int mss_metric;
+- struct neighbour *n;
+-
+- if (fi) {
+- if (DN_FIB_RES_GW(*res) &&
+- DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+- rt->rt_gateway = DN_FIB_RES_GW(*res);
+- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+- }
+- rt->rt_type = res->type;
+-
+- if (dev != NULL && rt->n == NULL) {
+- n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
+- if (IS_ERR(n))
+- return PTR_ERR(n);
+- rt->n = n;
+- }
+-
+- if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
+- dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
+- mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
+- if (mss_metric) {
+- unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
+- if (mss_metric > mss)
+- dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
+- }
+- return 0;
+-}
+-
+-static inline int dn_match_addr(__le16 addr1, __le16 addr2)
+-{
+- __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
+- int match = 16;
+- while (tmp) {
+- tmp >>= 1;
+- match--;
+- }
+- return match;
+-}
+-
+-static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
+-{
+- __le16 saddr = 0;
+- struct dn_dev *dn_db;
+- struct dn_ifaddr *ifa;
+- int best_match = 0;
+- int ret;
+-
+- rcu_read_lock();
+- dn_db = rcu_dereference(dev->dn_ptr);
+- for (ifa = rcu_dereference(dn_db->ifa_list);
+- ifa != NULL;
+- ifa = rcu_dereference(ifa->ifa_next)) {
+- if (ifa->ifa_scope > scope)
+- continue;
+- if (!daddr) {
+- saddr = ifa->ifa_local;
+- break;
+- }
+- ret = dn_match_addr(daddr, ifa->ifa_local);
+- if (ret > best_match)
+- saddr = ifa->ifa_local;
+- if (best_match == 0)
+- saddr = ifa->ifa_local;
+- }
+- rcu_read_unlock();
+-
+- return saddr;
+-}
+-
+-static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res)
+-{
+- return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope);
+-}
+-
+-static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res)
+-{
+- __le16 mask = dnet_make_mask(res->prefixlen);
+- return (daddr&~mask)|res->fi->fib_nh->nh_gw;
+-}
+-
+-static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard)
+-{
+- struct flowidn fld = {
+- .daddr = oldflp->daddr,
+- .saddr = oldflp->saddr,
+- .flowidn_scope = RT_SCOPE_UNIVERSE,
+- .flowidn_mark = oldflp->flowidn_mark,
+- .flowidn_iif = LOOPBACK_IFINDEX,
+- .flowidn_oif = oldflp->flowidn_oif,
+- };
+- struct dn_route *rt = NULL;
+- struct net_device *dev_out = NULL, *dev;
+- struct neighbour *neigh = NULL;
+- unsigned int hash;
+- unsigned int flags = 0;
+- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST };
+- int err;
+- int free_res = 0;
+- __le16 gateway = 0;
+-
+- if (decnet_debug_level & 16)
+- printk(KERN_DEBUG
+- "dn_route_output_slow: dst=%04x src=%04x mark=%d"
+- " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
+- le16_to_cpu(oldflp->saddr),
+- oldflp->flowidn_mark, LOOPBACK_IFINDEX,
+- oldflp->flowidn_oif);
+-
+- /* If we have an output interface, verify its a DECnet device */
+- if (oldflp->flowidn_oif) {
+- dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif);
+- err = -ENODEV;
+- if (dev_out && dev_out->dn_ptr == NULL) {
+- dev_put(dev_out);
+- dev_out = NULL;
+- }
+- if (dev_out == NULL)
+- goto out;
+- }
+-
+- /* If we have a source address, verify that its a local address */
+- if (oldflp->saddr) {
+- err = -EADDRNOTAVAIL;
+-
+- if (dev_out) {
+- if (dn_dev_islocal(dev_out, oldflp->saddr))
+- goto source_ok;
+- dev_put(dev_out);
+- goto out;
+- }
+- rcu_read_lock();
+- for_each_netdev_rcu(&init_net, dev) {
+- if (!dev->dn_ptr)
+- continue;
+- if (!dn_dev_islocal(dev, oldflp->saddr))
+- continue;
+- if ((dev->flags & IFF_LOOPBACK) &&
+- oldflp->daddr &&
+- !dn_dev_islocal(dev, oldflp->daddr))
+- continue;
+-
+- dev_out = dev;
+- break;
+- }
+- rcu_read_unlock();
+- if (dev_out == NULL)
+- goto out;
+- dev_hold(dev_out);
+-source_ok:
+- ;
+- }
+-
+- /* No destination? Assume its local */
+- if (!fld.daddr) {
+- fld.daddr = fld.saddr;
+-
+- dev_put(dev_out);
+- err = -EINVAL;
+- dev_out = init_net.loopback_dev;
+- if (!dev_out->dn_ptr)
+- goto out;
+- err = -EADDRNOTAVAIL;
+- dev_hold(dev_out);
+- if (!fld.daddr) {
+- fld.daddr =
+- fld.saddr = dnet_select_source(dev_out, 0,
+- RT_SCOPE_HOST);
+- if (!fld.daddr)
+- goto done;
+- }
+- fld.flowidn_oif = LOOPBACK_IFINDEX;
+- res.type = RTN_LOCAL;
+- goto make_route;
+- }
+-
+- if (decnet_debug_level & 16)
+- printk(KERN_DEBUG
+- "dn_route_output_slow: initial checks complete."
+- " dst=%04x src=%04x oif=%d try_hard=%d\n",
+- le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
+- fld.flowidn_oif, try_hard);
+-
+- /*
+- * N.B. If the kernel is compiled without router support then
+- * dn_fib_lookup() will evaluate to non-zero so this if () block
+- * will always be executed.
+- */
+- err = -ESRCH;
+- if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) {
+- struct dn_dev *dn_db;
+- if (err != -ESRCH)
+- goto out;
+- /*
+- * Here the fallback is basically the standard algorithm for
+- * routing in endnodes which is described in the DECnet routing
+- * docs
+- *
+- * If we are not trying hard, look in neighbour cache.
+- * The result is tested to ensure that if a specific output
+- * device/source address was requested, then we honour that
+- * here
+- */
+- if (!try_hard) {
+- neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr);
+- if (neigh) {
+- if ((oldflp->flowidn_oif &&
+- (neigh->dev->ifindex != oldflp->flowidn_oif)) ||
+- (oldflp->saddr &&
+- (!dn_dev_islocal(neigh->dev,
+- oldflp->saddr)))) {
+- neigh_release(neigh);
+- neigh = NULL;
+- } else {
+- dev_put(dev_out);
+- if (dn_dev_islocal(neigh->dev, fld.daddr)) {
+- dev_out = init_net.loopback_dev;
+- res.type = RTN_LOCAL;
+- } else {
+- dev_out = neigh->dev;
+- }
+- dev_hold(dev_out);
+- goto select_source;
+- }
+- }
+- }
+-
+- /* Not there? Perhaps its a local address */
+- if (dev_out == NULL)
+- dev_out = dn_dev_get_default();
+- err = -ENODEV;
+- if (dev_out == NULL)
+- goto out;
+- dn_db = rcu_dereference_raw(dev_out->dn_ptr);
+- if (!dn_db)
+- goto e_inval;
+- /* Possible improvement - check all devices for local addr */
+- if (dn_dev_islocal(dev_out, fld.daddr)) {
+- dev_put(dev_out);
+- dev_out = init_net.loopback_dev;
+- dev_hold(dev_out);
+- res.type = RTN_LOCAL;
+- goto select_source;
+- }
+- /* Not local either.... try sending it to the default router */
+- neigh = neigh_clone(dn_db->router);
+- BUG_ON(neigh && neigh->dev != dev_out);
+-
+- /* Ok then, we assume its directly connected and move on */
+-select_source:
+- if (neigh)
+- gateway = ((struct dn_neigh *)neigh)->addr;
+- if (gateway == 0)
+- gateway = fld.daddr;
+- if (fld.saddr == 0) {
+- fld.saddr = dnet_select_source(dev_out, gateway,
+- res.type == RTN_LOCAL ?
+- RT_SCOPE_HOST :
+- RT_SCOPE_LINK);
+- if (fld.saddr == 0 && res.type != RTN_LOCAL)
+- goto e_addr;
+- }
+- fld.flowidn_oif = dev_out->ifindex;
+- goto make_route;
+- }
+- free_res = 1;
+-
+- if (res.type == RTN_NAT)
+- goto e_inval;
+-
+- if (res.type == RTN_LOCAL) {
+- if (!fld.saddr)
+- fld.saddr = fld.daddr;
+- dev_put(dev_out);
+- dev_out = init_net.loopback_dev;
+- dev_hold(dev_out);
+- if (!dev_out->dn_ptr)
+- goto e_inval;
+- fld.flowidn_oif = dev_out->ifindex;
+- if (res.fi)
+- dn_fib_info_put(res.fi);
+- res.fi = NULL;
+- goto make_route;
+- }
+-
+- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
+- dn_fib_select_multipath(&fld, &res);
+-
+- /*
+- * We could add some logic to deal with default routes here and
+- * get rid of some of the special casing above.
+- */
+-
+- if (!fld.saddr)
+- fld.saddr = DN_FIB_RES_PREFSRC(res);
+-
+- dev_put(dev_out);
+- dev_out = DN_FIB_RES_DEV(res);
+- dev_hold(dev_out);
+- fld.flowidn_oif = dev_out->ifindex;
+- gateway = DN_FIB_RES_GW(res);
+-
+-make_route:
+- if (dev_out->flags & IFF_LOOPBACK)
+- flags |= RTCF_LOCAL;
+-
+- rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, 0);
+- if (rt == NULL)
+- goto e_nobufs;
+-
+- rt->dn_next = NULL;
+- memset(&rt->fld, 0, sizeof(rt->fld));
+- rt->fld.saddr = oldflp->saddr;
+- rt->fld.daddr = oldflp->daddr;
+- rt->fld.flowidn_oif = oldflp->flowidn_oif;
+- rt->fld.flowidn_iif = 0;
+- rt->fld.flowidn_mark = oldflp->flowidn_mark;
+-
+- rt->rt_saddr = fld.saddr;
+- rt->rt_daddr = fld.daddr;
+- rt->rt_gateway = gateway ? gateway : fld.daddr;
+- rt->rt_local_src = fld.saddr;
+-
+- rt->rt_dst_map = fld.daddr;
+- rt->rt_src_map = fld.saddr;
+-
+- rt->n = neigh;
+- neigh = NULL;
+-
+- rt->dst.lastuse = jiffies;
+- rt->dst.output = dn_output;
+- rt->dst.input = dn_rt_bug;
+- rt->rt_flags = flags;
+- if (flags & RTCF_LOCAL)
+- rt->dst.input = dn_nsp_rx;
+-
+- err = dn_rt_set_next_hop(rt, &res);
+- if (err)
+- goto e_neighbour;
+-
+- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
+- /* dn_insert_route() increments dst->__refcnt */
+- dn_insert_route(rt, hash, (struct dn_route **)pprt);
+-
+-done:
+- if (neigh)
+- neigh_release(neigh);
+- if (free_res)
+- dn_fib_res_put(&res);
+- dev_put(dev_out);
+-out:
+- return err;
+-
+-e_addr:
+- err = -EADDRNOTAVAIL;
+- goto done;
+-e_inval:
+- err = -EINVAL;
+- goto done;
+-e_nobufs:
+- err = -ENOBUFS;
+- goto done;
+-e_neighbour:
+- dst_release_immediate(&rt->dst);
+- goto e_nobufs;
+-}
+-
+-
+-/*
+- * N.B. The flags may be moved into the flowi at some future stage.
+- */
+-static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags)
+-{
+- unsigned int hash = dn_hash(flp->saddr, flp->daddr);
+- struct dn_route *rt = NULL;
+-
+- if (!(flags & MSG_TRYHARD)) {
+- rcu_read_lock_bh();
+- for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
+- rt = rcu_dereference_bh(rt->dn_next)) {
+- if ((flp->daddr == rt->fld.daddr) &&
+- (flp->saddr == rt->fld.saddr) &&
+- (flp->flowidn_mark == rt->fld.flowidn_mark) &&
+- dn_is_output_route(rt) &&
+- (rt->fld.flowidn_oif == flp->flowidn_oif)) {
+- dst_hold_and_use(&rt->dst, jiffies);
+- rcu_read_unlock_bh();
+- *pprt = &rt->dst;
+- return 0;
+- }
+- }
+- rcu_read_unlock_bh();
+- }
+-
+- return dn_route_output_slow(pprt, flp, flags);
+-}
+-
+-static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags)
+-{
+- int err;
+-
+- err = __dn_route_output_key(pprt, flp, flags);
+- if (err == 0 && flp->flowidn_proto) {
+- *pprt = xfrm_lookup(&init_net, *pprt,
+- flowidn_to_flowi(flp), NULL, 0);
+- if (IS_ERR(*pprt)) {
+- err = PTR_ERR(*pprt);
+- *pprt = NULL;
+- }
+- }
+- return err;
+-}
+-
+-int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags)
+-{
+- int err;
+-
+- err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
+- if (err == 0 && fl->flowidn_proto) {
+- *pprt = xfrm_lookup(&init_net, *pprt,
+- flowidn_to_flowi(fl), sk, 0);
+- if (IS_ERR(*pprt)) {
+- err = PTR_ERR(*pprt);
+- *pprt = NULL;
+- }
+- }
+- return err;
+-}
+-
+-static int dn_route_input_slow(struct sk_buff *skb)
+-{
+- struct dn_route *rt = NULL;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- struct net_device *in_dev = skb->dev;
+- struct net_device *out_dev = NULL;
+- struct dn_dev *dn_db;
+- struct neighbour *neigh = NULL;
+- unsigned int hash;
+- int flags = 0;
+- __le16 gateway = 0;
+- __le16 local_src = 0;
+- struct flowidn fld = {
+- .daddr = cb->dst,
+- .saddr = cb->src,
+- .flowidn_scope = RT_SCOPE_UNIVERSE,
+- .flowidn_mark = skb->mark,
+- .flowidn_iif = skb->dev->ifindex,
+- };
+- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
+- int err = -EINVAL;
+- int free_res = 0;
+-
+- dev_hold(in_dev);
+-
+- dn_db = rcu_dereference(in_dev->dn_ptr);
+- if (!dn_db)
+- goto out;
+-
+- /* Zero source addresses are not allowed */
+- if (fld.saddr == 0)
+- goto out;
+-
+- /*
+- * In this case we've just received a packet from a source
+- * outside ourselves pretending to come from us. We don't
+- * allow it any further to prevent routing loops, spoofing and
+- * other nasties. Loopback packets already have the dst attached
+- * so this only affects packets which have originated elsewhere.
+- */
+- err = -ENOTUNIQ;
+- if (dn_dev_islocal(in_dev, cb->src))
+- goto out;
+-
+- err = dn_fib_lookup(&fld, &res);
+- if (err) {
+- if (err != -ESRCH)
+- goto out;
+- /*
+- * Is the destination us ?
+- */
+- if (!dn_dev_islocal(in_dev, cb->dst))
+- goto e_inval;
+-
+- res.type = RTN_LOCAL;
+- } else {
+- __le16 src_map = fld.saddr;
+- free_res = 1;
+-
+- out_dev = DN_FIB_RES_DEV(res);
+- if (out_dev == NULL) {
+- net_crit_ratelimited("Bug in dn_route_input_slow() No output device\n");
+- goto e_inval;
+- }
+- dev_hold(out_dev);
+-
+- if (res.r)
+- src_map = fld.saddr; /* no NAT support for now */
+-
+- gateway = DN_FIB_RES_GW(res);
+- if (res.type == RTN_NAT) {
+- fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res);
+- dn_fib_res_put(&res);
+- free_res = 0;
+- if (dn_fib_lookup(&fld, &res))
+- goto e_inval;
+- free_res = 1;
+- if (res.type != RTN_UNICAST)
+- goto e_inval;
+- flags |= RTCF_DNAT;
+- gateway = fld.daddr;
+- }
+- fld.saddr = src_map;
+- }
+-
+- switch (res.type) {
+- case RTN_UNICAST:
+- /*
+- * Forwarding check here, we only check for forwarding
+- * being turned off, if you want to only forward intra
+- * area, its up to you to set the routing tables up
+- * correctly.
+- */
+- if (dn_db->parms.forwarding == 0)
+- goto e_inval;
+-
+- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
+- dn_fib_select_multipath(&fld, &res);
+-
+- /*
+- * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
+- * flag as a hint to set the intra-ethernet bit when
+- * forwarding. If we've got NAT in operation, we don't do
+- * this optimisation.
+- */
+- if (out_dev == in_dev && !(flags & RTCF_NAT))
+- flags |= RTCF_DOREDIRECT;
+-
+- local_src = DN_FIB_RES_PREFSRC(res);
+- break;
+- case RTN_BLACKHOLE:
+- case RTN_UNREACHABLE:
+- break;
+- case RTN_LOCAL:
+- flags |= RTCF_LOCAL;
+- fld.saddr = cb->dst;
+- fld.daddr = cb->src;
+-
+- /* Routing tables gave us a gateway */
+- if (gateway)
+- goto make_route;
+-
+- /* Packet was intra-ethernet, so we know its on-link */
+- if (cb->rt_flags & DN_RT_F_IE) {
+- gateway = cb->src;
+- goto make_route;
+- }
+-
+- /* Use the default router if there is one */
+- neigh = neigh_clone(dn_db->router);
+- if (neigh) {
+- gateway = ((struct dn_neigh *)neigh)->addr;
+- goto make_route;
+- }
+-
+- /* Close eyes and pray */
+- gateway = cb->src;
+- goto make_route;
+- default:
+- goto e_inval;
+- }
+-
+-make_route:
+- rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, 0);
+- if (rt == NULL)
+- goto e_nobufs;
+-
+- rt->dn_next = NULL;
+- memset(&rt->fld, 0, sizeof(rt->fld));
+- rt->rt_saddr = fld.saddr;
+- rt->rt_daddr = fld.daddr;
+- rt->rt_gateway = fld.daddr;
+- if (gateway)
+- rt->rt_gateway = gateway;
+- rt->rt_local_src = local_src ? local_src : rt->rt_saddr;
+-
+- rt->rt_dst_map = fld.daddr;
+- rt->rt_src_map = fld.saddr;
+-
+- rt->fld.saddr = cb->src;
+- rt->fld.daddr = cb->dst;
+- rt->fld.flowidn_oif = 0;
+- rt->fld.flowidn_iif = in_dev->ifindex;
+- rt->fld.flowidn_mark = fld.flowidn_mark;
+-
+- rt->n = neigh;
+- rt->dst.lastuse = jiffies;
+- rt->dst.output = dn_rt_bug_out;
+- switch (res.type) {
+- case RTN_UNICAST:
+- rt->dst.input = dn_forward;
+- break;
+- case RTN_LOCAL:
+- rt->dst.output = dn_output;
+- rt->dst.input = dn_nsp_rx;
+- rt->dst.dev = in_dev;
+- flags |= RTCF_LOCAL;
+- break;
+- default:
+- case RTN_UNREACHABLE:
+- case RTN_BLACKHOLE:
+- rt->dst.input = dst_discard;
+- }
+- rt->rt_flags = flags;
+-
+- err = dn_rt_set_next_hop(rt, &res);
+- if (err)
+- goto e_neighbour;
+-
+- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
+- /* dn_insert_route() increments dst->__refcnt */
+- dn_insert_route(rt, hash, &rt);
+- skb_dst_set(skb, &rt->dst);
+-
+-done:
+- if (neigh)
+- neigh_release(neigh);
+- if (free_res)
+- dn_fib_res_put(&res);
+- dev_put(in_dev);
+- dev_put(out_dev);
+-out:
+- return err;
+-
+-e_inval:
+- err = -EINVAL;
+- goto done;
+-
+-e_nobufs:
+- err = -ENOBUFS;
+- goto done;
+-
+-e_neighbour:
+- dst_release_immediate(&rt->dst);
+- goto done;
+-}
+-
+-static int dn_route_input(struct sk_buff *skb)
+-{
+- struct dn_route *rt;
+- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+- unsigned int hash = dn_hash(cb->src, cb->dst);
+-
+- if (skb_dst(skb))
+- return 0;
+-
+- rcu_read_lock();
+- for (rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
+- rt = rcu_dereference(rt->dn_next)) {
+- if ((rt->fld.saddr == cb->src) &&
+- (rt->fld.daddr == cb->dst) &&
+- (rt->fld.flowidn_oif == 0) &&
+- (rt->fld.flowidn_mark == skb->mark) &&
+- (rt->fld.flowidn_iif == cb->iif)) {
+- dst_hold_and_use(&rt->dst, jiffies);
+- rcu_read_unlock();
+- skb_dst_set(skb, (struct dst_entry *)rt);
+- return 0;
+- }
+- }
+- rcu_read_unlock();
+-
+- return dn_route_input_slow(skb);
+-}
+-
+-static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
+- int event, int nowait, unsigned int flags)
+-{
+- struct dn_route *rt = (struct dn_route *)skb_dst(skb);
+- struct rtmsg *r;
+- struct nlmsghdr *nlh;
+- long expires;
+-
+- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
+- if (!nlh)
+- return -EMSGSIZE;
+-
+- r = nlmsg_data(nlh);
+- r->rtm_family = AF_DECnet;
+- r->rtm_dst_len = 16;
+- r->rtm_src_len = 0;
+- r->rtm_tos = 0;
+- r->rtm_table = RT_TABLE_MAIN;
+- r->rtm_type = rt->rt_type;
+- r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
+- r->rtm_scope = RT_SCOPE_UNIVERSE;
+- r->rtm_protocol = RTPROT_UNSPEC;
+-
+- if (rt->rt_flags & RTCF_NOTIFY)
+- r->rtm_flags |= RTM_F_NOTIFY;
+-
+- if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN) < 0 ||
+- nla_put_le16(skb, RTA_DST, rt->rt_daddr) < 0)
+- goto errout;
+-
+- if (rt->fld.saddr) {
+- r->rtm_src_len = 16;
+- if (nla_put_le16(skb, RTA_SRC, rt->fld.saddr) < 0)
+- goto errout;
+- }
+- if (rt->dst.dev &&
+- nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex) < 0)
+- goto errout;
+-
+- /*
+- * Note to self - change this if input routes reverse direction when
+- * they deal only with inputs and not with replies like they do
+- * currently.
+- */
+- if (nla_put_le16(skb, RTA_PREFSRC, rt->rt_local_src) < 0)
+- goto errout;
+-
+- if (rt->rt_daddr != rt->rt_gateway &&
+- nla_put_le16(skb, RTA_GATEWAY, rt->rt_gateway) < 0)
+- goto errout;
+-
+- if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+- goto errout;
+-
+- expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
+- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires,
+- rt->dst.error) < 0)
+- goto errout;
+-
+- if (dn_is_input_route(rt) &&
+- nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0)
+- goto errout;
+-
+- nlmsg_end(skb, nlh);
+- return 0;
+-
+-errout:
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+-}
+-
+-const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
+- [RTA_DST] = { .type = NLA_U16 },
+- [RTA_SRC] = { .type = NLA_U16 },
+- [RTA_IIF] = { .type = NLA_U32 },
+- [RTA_OIF] = { .type = NLA_U32 },
+- [RTA_GATEWAY] = { .type = NLA_U16 },
+- [RTA_PRIORITY] = { .type = NLA_U32 },
+- [RTA_PREFSRC] = { .type = NLA_U16 },
+- [RTA_METRICS] = { .type = NLA_NESTED },
+- [RTA_MULTIPATH] = { .type = NLA_NESTED },
+- [RTA_TABLE] = { .type = NLA_U32 },
+- [RTA_MARK] = { .type = NLA_U32 },
+-};
+-
+-/*
+- * This is called by both endnodes and routers now.
+- */
+-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+- struct netlink_ext_ack *extack)
+-{
+- struct net *net = sock_net(in_skb->sk);
+- struct rtmsg *rtm = nlmsg_data(nlh);
+- struct dn_route *rt = NULL;
+- struct dn_skb_cb *cb;
+- int err;
+- struct sk_buff *skb;
+- struct flowidn fld;
+- struct nlattr *tb[RTA_MAX+1];
+-
+- if (!net_eq(net, &init_net))
+- return -EINVAL;
+-
+- err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+- rtm_dn_policy, extack);
+- if (err < 0)
+- return err;
+-
+- memset(&fld, 0, sizeof(fld));
+- fld.flowidn_proto = DNPROTO_NSP;
+-
+- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+- if (skb == NULL)
+- return -ENOBUFS;
+- skb_reset_mac_header(skb);
+- cb = DN_SKB_CB(skb);
+-
+- if (tb[RTA_SRC])
+- fld.saddr = nla_get_le16(tb[RTA_SRC]);
+-
+- if (tb[RTA_DST])
+- fld.daddr = nla_get_le16(tb[RTA_DST]);
+-
+- if (tb[RTA_IIF])
+- fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
+-
+- if (fld.flowidn_iif) {
+- struct net_device *dev;
+- dev = __dev_get_by_index(&init_net, fld.flowidn_iif);
+- if (!dev || !dev->dn_ptr) {
+- kfree_skb(skb);
+- return -ENODEV;
+- }
+- skb->protocol = htons(ETH_P_DNA_RT);
+- skb->dev = dev;
+- cb->src = fld.saddr;
+- cb->dst = fld.daddr;
+- local_bh_disable();
+- err = dn_route_input(skb);
+- local_bh_enable();
+- memset(cb, 0, sizeof(struct dn_skb_cb));
+- rt = (struct dn_route *)skb_dst(skb);
+- if (!err && -rt->dst.error)
+- err = rt->dst.error;
+- } else {
+- if (tb[RTA_OIF])
+- fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
+-
+- err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
+- }
+-
+- skb->dev = NULL;
+- if (err)
+- goto out_free;
+- skb_dst_set(skb, &rt->dst);
+- if (rtm->rtm_flags & RTM_F_NOTIFY)
+- rt->rt_flags |= RTCF_NOTIFY;
+-
+- err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
+- if (err < 0) {
+- err = -EMSGSIZE;
+- goto out_free;
+- }
+-
+- return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid);
+-
+-out_free:
+- kfree_skb(skb);
+- return err;
+-}
+-
+-/*
+- * For routers, this is called from dn_fib_dump, but for endnodes its
+- * called directly from the rtnetlink dispatch table.
+- */
+-int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
+-{
+- struct net *net = sock_net(skb->sk);
+- struct dn_route *rt;
+- int h, s_h;
+- int idx, s_idx;
+- struct rtmsg *rtm;
+-
+- if (!net_eq(net, &init_net))
+- return 0;
+-
+- if (nlmsg_len(cb->nlh) < sizeof(struct rtmsg))
+- return -EINVAL;
+-
+- rtm = nlmsg_data(cb->nlh);
+- if (!(rtm->rtm_flags & RTM_F_CLONED))
+- return 0;
+-
+- s_h = cb->args[0];
+- s_idx = idx = cb->args[1];
+- for (h = 0; h <= dn_rt_hash_mask; h++) {
+- if (h < s_h)
+- continue;
+- if (h > s_h)
+- s_idx = 0;
+- rcu_read_lock_bh();
+- for (rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
+- rt;
+- rt = rcu_dereference_bh(rt->dn_next), idx++) {
+- if (idx < s_idx)
+- continue;
+- skb_dst_set(skb, dst_clone(&rt->dst));
+- if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
+- cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+- 1, NLM_F_MULTI) < 0) {
+- skb_dst_drop(skb);
+- rcu_read_unlock_bh();
+- goto done;
+- }
+- skb_dst_drop(skb);
+- }
+- rcu_read_unlock_bh();
+- }
+-
+-done:
+- cb->args[0] = h;
+- cb->args[1] = idx;
+- return skb->len;
+-}
+-
+-#ifdef CONFIG_PROC_FS
+-struct dn_rt_cache_iter_state {
+- int bucket;
+-};
+-
+-static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
+-{
+- struct dn_route *rt = NULL;
+- struct dn_rt_cache_iter_state *s = seq->private;
+-
+- for (s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
+- rcu_read_lock_bh();
+- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
+- if (rt)
+- break;
+- rcu_read_unlock_bh();
+- }
+- return rt;
+-}
+-
+-static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
+-{
+- struct dn_rt_cache_iter_state *s = seq->private;
+-
+- rt = rcu_dereference_bh(rt->dn_next);
+- while (!rt) {
+- rcu_read_unlock_bh();
+- if (--s->bucket < 0)
+- break;
+- rcu_read_lock_bh();
+- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
+- }
+- return rt;
+-}
+-
+-static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
+-{
+- struct dn_route *rt = dn_rt_cache_get_first(seq);
+-
+- if (rt) {
+- while (*pos && (rt = dn_rt_cache_get_next(seq, rt)))
+- --*pos;
+- }
+- return *pos ? NULL : rt;
+-}
+-
+-static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+-{
+- struct dn_route *rt = dn_rt_cache_get_next(seq, v);
+- ++*pos;
+- return rt;
+-}
+-
+-static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v)
+-{
+- if (v)
+- rcu_read_unlock_bh();
+-}
+-
+-static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
+-{
+- struct dn_route *rt = v;
+- char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
+-
+- seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
+- rt->dst.dev ? rt->dst.dev->name : "*",
+- dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
+- dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
+- atomic_read(&rt->dst.__refcnt),
+- rt->dst.__use, 0);
+- return 0;
+-}
+-
+-static const struct seq_operations dn_rt_cache_seq_ops = {
+- .start = dn_rt_cache_seq_start,
+- .next = dn_rt_cache_seq_next,
+- .stop = dn_rt_cache_seq_stop,
+- .show = dn_rt_cache_seq_show,
+-};
+-#endif /* CONFIG_PROC_FS */
+-
+-void __init dn_route_init(void)
+-{
+- int i, goal, order;
+-
+- dn_dst_ops.kmem_cachep =
+- kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+- dst_entries_init(&dn_dst_ops);
+- timer_setup(&dn_route_timer, dn_dst_check_expire, 0);
+- dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
+- add_timer(&dn_route_timer);
+-
+- goal = totalram_pages() >> (26 - PAGE_SHIFT);
+-
+- for (order = 0; (1UL << order) < goal; order++)
+- /* NOTHING */;
+-
+- /*
+- * Only want 1024 entries max, since the table is very, very unlikely
+- * to be larger than that.
+- */
+- while (order && ((((1UL << order) * PAGE_SIZE) /
+- sizeof(struct dn_rt_hash_bucket)) >= 2048))
+- order--;
+-
+- do {
+- dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
+- sizeof(struct dn_rt_hash_bucket);
+- while (dn_rt_hash_mask & (dn_rt_hash_mask - 1))
+- dn_rt_hash_mask--;
+- dn_rt_hash_table = (struct dn_rt_hash_bucket *)
+- __get_free_pages(GFP_ATOMIC, order);
+- } while (dn_rt_hash_table == NULL && --order > 0);
+-
+- if (!dn_rt_hash_table)
+- panic("Failed to allocate DECnet route cache hash table\n");
+-
+- printk(KERN_INFO
+- "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n",
+- dn_rt_hash_mask,
+- (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
+-
+- dn_rt_hash_mask--;
+- for (i = 0; i <= dn_rt_hash_mask; i++) {
+- spin_lock_init(&dn_rt_hash_table[i].lock);
+- dn_rt_hash_table[i].chain = NULL;
+- }
+-
+- dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
+-
+- proc_create_seq_private("decnet_cache", 0444, init_net.proc_net,
+- &dn_rt_cache_seq_ops,
+- sizeof(struct dn_rt_cache_iter_state), NULL);
+-
+-#ifdef CONFIG_DECNET_ROUTER
+- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
+- dn_cache_getroute, dn_fib_dump, 0);
+-#else
+- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
+- dn_cache_getroute, dn_cache_dump, 0);
+-#endif
+-}
+-
+-void __exit dn_route_cleanup(void)
+-{
+- del_timer(&dn_route_timer);
+- dn_run_flush(NULL);
+-
+- remove_proc_entry("decnet_cache", init_net.proc_net);
+- dst_entries_destroy(&dn_dst_ops);
+-}
+diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
+deleted file mode 100644
+index 4a4e3c17740cb..0000000000000
+--- a/net/decnet/dn_rules.c
++++ /dev/null
+@@ -1,258 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Routing Forwarding Information Base (Rules)
+- *
+- * Author: Steve Whitehouse <SteveW@ACM.org>
+- * Mostly copied from Alexey Kuznetsov's ipv4/fib_rules.c
+- *
+- *
+- * Changes:
+- * Steve Whitehouse <steve@chygwyn.com>
+- * Updated for Thomas Graf's generic rules
+- *
+- */
+-#include <linux/net.h>
+-#include <linux/init.h>
+-#include <linux/netlink.h>
+-#include <linux/rtnetlink.h>
+-#include <linux/netdevice.h>
+-#include <linux/spinlock.h>
+-#include <linux/list.h>
+-#include <linux/rcupdate.h>
+-#include <linux/export.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-#include <net/fib_rules.h>
+-#include <net/dn.h>
+-#include <net/dn_fib.h>
+-#include <net/dn_neigh.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_route.h>
+-
+-static struct fib_rules_ops *dn_fib_rules_ops;
+-
+-struct dn_fib_rule
+-{
+- struct fib_rule common;
+- unsigned char dst_len;
+- unsigned char src_len;
+- __le16 src;
+- __le16 srcmask;
+- __le16 dst;
+- __le16 dstmask;
+- __le16 srcmap;
+- u8 flags;
+-};
+-
+-
+-int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res)
+-{
+- struct fib_lookup_arg arg = {
+- .result = res,
+- };
+- int err;
+-
+- err = fib_rules_lookup(dn_fib_rules_ops,
+- flowidn_to_flowi(flp), 0, &arg);
+- res->r = arg.rule;
+-
+- return err;
+-}
+-
+-static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
+- int flags, struct fib_lookup_arg *arg)
+-{
+- struct flowidn *fld = &flp->u.dn;
+- int err = -EAGAIN;
+- struct dn_fib_table *tbl;
+-
+- switch(rule->action) {
+- case FR_ACT_TO_TBL:
+- break;
+-
+- case FR_ACT_UNREACHABLE:
+- err = -ENETUNREACH;
+- goto errout;
+-
+- case FR_ACT_PROHIBIT:
+- err = -EACCES;
+- goto errout;
+-
+- case FR_ACT_BLACKHOLE:
+- default:
+- err = -EINVAL;
+- goto errout;
+- }
+-
+- tbl = dn_fib_get_table(rule->table, 0);
+- if (tbl == NULL)
+- goto errout;
+-
+- err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result);
+- if (err > 0)
+- err = -EAGAIN;
+-errout:
+- return err;
+-}
+-
+-static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = {
+- FRA_GENERIC_POLICY,
+-};
+-
+-static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+-{
+- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+- struct flowidn *fld = &fl->u.dn;
+- __le16 daddr = fld->daddr;
+- __le16 saddr = fld->saddr;
+-
+- if (((saddr ^ r->src) & r->srcmask) ||
+- ((daddr ^ r->dst) & r->dstmask))
+- return 0;
+-
+- return 1;
+-}
+-
+-static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+- struct fib_rule_hdr *frh,
+- struct nlattr **tb,
+- struct netlink_ext_ack *extack)
+-{
+- int err = -EINVAL;
+- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+-
+- if (frh->tos) {
+- NL_SET_ERR_MSG(extack, "Invalid tos value");
+- goto errout;
+- }
+-
+- if (rule->table == RT_TABLE_UNSPEC) {
+- if (rule->action == FR_ACT_TO_TBL) {
+- struct dn_fib_table *table;
+-
+- table = dn_fib_empty_table();
+- if (table == NULL) {
+- err = -ENOBUFS;
+- goto errout;
+- }
+-
+- rule->table = table->n;
+- }
+- }
+-
+- if (frh->src_len)
+- r->src = nla_get_le16(tb[FRA_SRC]);
+-
+- if (frh->dst_len)
+- r->dst = nla_get_le16(tb[FRA_DST]);
+-
+- r->src_len = frh->src_len;
+- r->srcmask = dnet_make_mask(r->src_len);
+- r->dst_len = frh->dst_len;
+- r->dstmask = dnet_make_mask(r->dst_len);
+- err = 0;
+-errout:
+- return err;
+-}
+-
+-static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+- struct nlattr **tb)
+-{
+- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+-
+- if (frh->src_len && (r->src_len != frh->src_len))
+- return 0;
+-
+- if (frh->dst_len && (r->dst_len != frh->dst_len))
+- return 0;
+-
+- if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC])))
+- return 0;
+-
+- if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST])))
+- return 0;
+-
+- return 1;
+-}
+-
+-unsigned int dnet_addr_type(__le16 addr)
+-{
+- struct flowidn fld = { .daddr = addr };
+- struct dn_fib_res res;
+- unsigned int ret = RTN_UNICAST;
+- struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
+-
+- res.r = NULL;
+-
+- if (tb) {
+- if (!tb->lookup(tb, &fld, &res)) {
+- ret = res.type;
+- dn_fib_res_put(&res);
+- }
+- }
+- return ret;
+-}
+-
+-static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+- struct fib_rule_hdr *frh)
+-{
+- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+-
+- frh->dst_len = r->dst_len;
+- frh->src_len = r->src_len;
+- frh->tos = 0;
+-
+- if ((r->dst_len &&
+- nla_put_le16(skb, FRA_DST, r->dst)) ||
+- (r->src_len &&
+- nla_put_le16(skb, FRA_SRC, r->src)))
+- goto nla_put_failure;
+- return 0;
+-
+-nla_put_failure:
+- return -ENOBUFS;
+-}
+-
+-static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
+-{
+- dn_rt_cache_flush(-1);
+-}
+-
+-static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = {
+- .family = AF_DECnet,
+- .rule_size = sizeof(struct dn_fib_rule),
+- .addr_size = sizeof(u16),
+- .action = dn_fib_rule_action,
+- .match = dn_fib_rule_match,
+- .configure = dn_fib_rule_configure,
+- .compare = dn_fib_rule_compare,
+- .fill = dn_fib_rule_fill,
+- .flush_cache = dn_fib_rule_flush_cache,
+- .nlgroup = RTNLGRP_DECnet_RULE,
+- .policy = dn_fib_rule_policy,
+- .owner = THIS_MODULE,
+- .fro_net = &init_net,
+-};
+-
+-void __init dn_fib_rules_init(void)
+-{
+- dn_fib_rules_ops =
+- fib_rules_register(&dn_fib_rules_ops_template, &init_net);
+- BUG_ON(IS_ERR(dn_fib_rules_ops));
+- BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff,
+- RT_TABLE_MAIN, 0));
+-}
+-
+-void __exit dn_fib_rules_cleanup(void)
+-{
+- rtnl_lock();
+- fib_rules_unregister(dn_fib_rules_ops);
+- rtnl_unlock();
+- rcu_barrier();
+-}
+diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
+deleted file mode 100644
+index 4086f9c746af4..0000000000000
+--- a/net/decnet/dn_table.c
++++ /dev/null
+@@ -1,929 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Routing Forwarding Information Base (Routing Tables)
+- *
+- * Author: Steve Whitehouse <SteveW@ACM.org>
+- * Mostly copied from the IPv4 routing code
+- *
+- *
+- * Changes:
+- *
+- */
+-#include <linux/string.h>
+-#include <linux/net.h>
+-#include <linux/socket.h>
+-#include <linux/slab.h>
+-#include <linux/sockios.h>
+-#include <linux/init.h>
+-#include <linux/skbuff.h>
+-#include <linux/rtnetlink.h>
+-#include <linux/proc_fs.h>
+-#include <linux/netdevice.h>
+-#include <linux/timer.h>
+-#include <linux/spinlock.h>
+-#include <linux/atomic.h>
+-#include <linux/uaccess.h>
+-#include <linux/route.h> /* RTF_xxx */
+-#include <net/neighbour.h>
+-#include <net/netlink.h>
+-#include <net/tcp.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-#include <net/fib_rules.h>
+-#include <net/dn.h>
+-#include <net/dn_route.h>
+-#include <net/dn_fib.h>
+-#include <net/dn_neigh.h>
+-#include <net/dn_dev.h>
+-
+-struct dn_zone
+-{
+- struct dn_zone *dz_next;
+- struct dn_fib_node **dz_hash;
+- int dz_nent;
+- int dz_divisor;
+- u32 dz_hashmask;
+-#define DZ_HASHMASK(dz) ((dz)->dz_hashmask)
+- int dz_order;
+- __le16 dz_mask;
+-#define DZ_MASK(dz) ((dz)->dz_mask)
+-};
+-
+-struct dn_hash
+-{
+- struct dn_zone *dh_zones[17];
+- struct dn_zone *dh_zone_list;
+-};
+-
+-#define dz_key_0(key) ((key).datum = 0)
+-
+-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
+- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
+-
+-#define endfor_nexthops(fi) }
+-
+-#define DN_MAX_DIVISOR 1024
+-#define DN_S_ZOMBIE 1
+-#define DN_S_ACCESSED 2
+-
+-#define DN_FIB_SCAN(f, fp) \
+-for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
+-
+-#define DN_FIB_SCAN_KEY(f, fp, key) \
+-for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
+-
+-#define RT_TABLE_MIN 1
+-#define DN_FIB_TABLE_HASHSZ 256
+-static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
+-static DEFINE_RWLOCK(dn_fib_tables_lock);
+-
+-static struct kmem_cache *dn_hash_kmem __read_mostly;
+-static int dn_fib_hash_zombies;
+-
+-static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
+-{
+- u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order);
+- h ^= (h >> 10);
+- h ^= (h >> 6);
+- h &= DZ_HASHMASK(dz);
+- return *(dn_fib_idx_t *)&h;
+-}
+-
+-static inline dn_fib_key_t dz_key(__le16 dst, struct dn_zone *dz)
+-{
+- dn_fib_key_t k;
+- k.datum = dst & DZ_MASK(dz);
+- return k;
+-}
+-
+-static inline struct dn_fib_node **dn_chain_p(dn_fib_key_t key, struct dn_zone *dz)
+-{
+- return &dz->dz_hash[dn_hash(key, dz).datum];
+-}
+-
+-static inline struct dn_fib_node *dz_chain(dn_fib_key_t key, struct dn_zone *dz)
+-{
+- return dz->dz_hash[dn_hash(key, dz).datum];
+-}
+-
+-static inline int dn_key_eq(dn_fib_key_t a, dn_fib_key_t b)
+-{
+- return a.datum == b.datum;
+-}
+-
+-static inline int dn_key_leq(dn_fib_key_t a, dn_fib_key_t b)
+-{
+- return a.datum <= b.datum;
+-}
+-
+-static inline void dn_rebuild_zone(struct dn_zone *dz,
+- struct dn_fib_node **old_ht,
+- int old_divisor)
+-{
+- struct dn_fib_node *f, **fp, *next;
+- int i;
+-
+- for(i = 0; i < old_divisor; i++) {
+- for(f = old_ht[i]; f; f = next) {
+- next = f->fn_next;
+- for(fp = dn_chain_p(f->fn_key, dz);
+- *fp && dn_key_leq((*fp)->fn_key, f->fn_key);
+- fp = &(*fp)->fn_next)
+- /* NOTHING */;
+- f->fn_next = *fp;
+- *fp = f;
+- }
+- }
+-}
+-
+-static void dn_rehash_zone(struct dn_zone *dz)
+-{
+- struct dn_fib_node **ht, **old_ht;
+- int old_divisor, new_divisor;
+- u32 new_hashmask;
+-
+- old_divisor = dz->dz_divisor;
+-
+- switch (old_divisor) {
+- case 16:
+- new_divisor = 256;
+- new_hashmask = 0xFF;
+- break;
+- default:
+- printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
+- old_divisor);
+- fallthrough;
+- case 256:
+- new_divisor = 1024;
+- new_hashmask = 0x3FF;
+- break;
+- }
+-
+- ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL);
+- if (ht == NULL)
+- return;
+-
+- write_lock_bh(&dn_fib_tables_lock);
+- old_ht = dz->dz_hash;
+- dz->dz_hash = ht;
+- dz->dz_hashmask = new_hashmask;
+- dz->dz_divisor = new_divisor;
+- dn_rebuild_zone(dz, old_ht, old_divisor);
+- write_unlock_bh(&dn_fib_tables_lock);
+- kfree(old_ht);
+-}
+-
+-static void dn_free_node(struct dn_fib_node *f)
+-{
+- dn_fib_release_info(DN_FIB_INFO(f));
+- kmem_cache_free(dn_hash_kmem, f);
+-}
+-
+-
+-static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
+-{
+- int i;
+- struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL);
+- if (!dz)
+- return NULL;
+-
+- if (z) {
+- dz->dz_divisor = 16;
+- dz->dz_hashmask = 0x0F;
+- } else {
+- dz->dz_divisor = 1;
+- dz->dz_hashmask = 0;
+- }
+-
+- dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL);
+- if (!dz->dz_hash) {
+- kfree(dz);
+- return NULL;
+- }
+-
+- dz->dz_order = z;
+- dz->dz_mask = dnet_make_mask(z);
+-
+- for(i = z + 1; i <= 16; i++)
+- if (table->dh_zones[i])
+- break;
+-
+- write_lock_bh(&dn_fib_tables_lock);
+- if (i>16) {
+- dz->dz_next = table->dh_zone_list;
+- table->dh_zone_list = dz;
+- } else {
+- dz->dz_next = table->dh_zones[i]->dz_next;
+- table->dh_zones[i]->dz_next = dz;
+- }
+- table->dh_zones[z] = dz;
+- write_unlock_bh(&dn_fib_tables_lock);
+- return dz;
+-}
+-
+-
+-static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
+-{
+- struct rtnexthop *nhp;
+- int nhlen;
+-
+- if (attrs[RTA_PRIORITY] &&
+- nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
+- return 1;
+-
+- if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
+- if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
+- (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
+- return 0;
+- return 1;
+- }
+-
+- if (!attrs[RTA_MULTIPATH])
+- return 0;
+-
+- nhp = nla_data(attrs[RTA_MULTIPATH]);
+- nhlen = nla_len(attrs[RTA_MULTIPATH]);
+-
+- for_nexthops(fi) {
+- int attrlen = nhlen - sizeof(struct rtnexthop);
+- __le16 gw;
+-
+- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+- return -EINVAL;
+- if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+- return 1;
+- if (attrlen) {
+- struct nlattr *gw_attr;
+-
+- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
+- gw = gw_attr ? nla_get_le16(gw_attr) : 0;
+-
+- if (gw && gw != nh->nh_gw)
+- return 1;
+- }
+- nhp = RTNH_NEXT(nhp);
+- } endfor_nexthops(fi);
+-
+- return 0;
+-}
+-
+-static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
+-{
+- size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
+- + nla_total_size(4) /* RTA_TABLE */
+- + nla_total_size(2) /* RTA_DST */
+- + nla_total_size(4) /* RTA_PRIORITY */
+- + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+-
+- /* space for nested metrics */
+- payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
+-
+- if (fi->fib_nhs) {
+- /* Also handles the special case fib_nhs == 1 */
+-
+- /* each nexthop is packed in an attribute */
+- size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+-
+- /* may contain a gateway attribute */
+- nhsize += nla_total_size(4);
+-
+- /* all nexthops are packed in a nested attribute */
+- payload += nla_total_size(fi->fib_nhs * nhsize);
+- }
+-
+- return payload;
+-}
+-
+-static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
+- u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
+- struct dn_fib_info *fi, unsigned int flags)
+-{
+- struct rtmsg *rtm;
+- struct nlmsghdr *nlh;
+-
+- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
+- if (!nlh)
+- return -EMSGSIZE;
+-
+- rtm = nlmsg_data(nlh);
+- rtm->rtm_family = AF_DECnet;
+- rtm->rtm_dst_len = dst_len;
+- rtm->rtm_src_len = 0;
+- rtm->rtm_tos = 0;
+- rtm->rtm_table = tb_id;
+- rtm->rtm_flags = fi->fib_flags;
+- rtm->rtm_scope = scope;
+- rtm->rtm_type = type;
+- rtm->rtm_protocol = fi->fib_protocol;
+-
+- if (nla_put_u32(skb, RTA_TABLE, tb_id) < 0)
+- goto errout;
+-
+- if (rtm->rtm_dst_len &&
+- nla_put(skb, RTA_DST, 2, dst) < 0)
+- goto errout;
+-
+- if (fi->fib_priority &&
+- nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority) < 0)
+- goto errout;
+-
+- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+- goto errout;
+-
+- if (fi->fib_nhs == 1) {
+- if (fi->fib_nh->nh_gw &&
+- nla_put_le16(skb, RTA_GATEWAY, fi->fib_nh->nh_gw) < 0)
+- goto errout;
+-
+- if (fi->fib_nh->nh_oif &&
+- nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif) < 0)
+- goto errout;
+- }
+-
+- if (fi->fib_nhs > 1) {
+- struct rtnexthop *nhp;
+- struct nlattr *mp_head;
+-
+- mp_head = nla_nest_start_noflag(skb, RTA_MULTIPATH);
+- if (!mp_head)
+- goto errout;
+-
+- for_nexthops(fi) {
+- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp))))
+- goto errout;
+-
+- nhp->rtnh_flags = nh->nh_flags & 0xFF;
+- nhp->rtnh_hops = nh->nh_weight - 1;
+- nhp->rtnh_ifindex = nh->nh_oif;
+-
+- if (nh->nh_gw &&
+- nla_put_le16(skb, RTA_GATEWAY, nh->nh_gw) < 0)
+- goto errout;
+-
+- nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
+- } endfor_nexthops(fi);
+-
+- nla_nest_end(skb, mp_head);
+- }
+-
+- nlmsg_end(skb, nlh);
+- return 0;
+-
+-errout:
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+-}
+-
+-
+-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
+- struct nlmsghdr *nlh, struct netlink_skb_parms *req)
+-{
+- struct sk_buff *skb;
+- u32 portid = req ? req->portid : 0;
+- int err = -ENOBUFS;
+-
+- skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL);
+- if (skb == NULL)
+- goto errout;
+-
+- err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id,
+- f->fn_type, f->fn_scope, &f->fn_key, z,
+- DN_FIB_INFO(f), 0);
+- if (err < 0) {
+- /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */
+- WARN_ON(err == -EMSGSIZE);
+- kfree_skb(skb);
+- goto errout;
+- }
+- rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+- return;
+-errout:
+- if (err < 0)
+- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
+-}
+-
+-static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
+- struct netlink_callback *cb,
+- struct dn_fib_table *tb,
+- struct dn_zone *dz,
+- struct dn_fib_node *f)
+-{
+- int i, s_i;
+-
+- s_i = cb->args[4];
+- for(i = 0; f; i++, f = f->fn_next) {
+- if (i < s_i)
+- continue;
+- if (f->fn_state & DN_S_ZOMBIE)
+- continue;
+- if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+- cb->nlh->nlmsg_seq,
+- RTM_NEWROUTE,
+- tb->n,
+- (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
+- f->fn_scope, &f->fn_key, dz->dz_order,
+- f->fn_info, NLM_F_MULTI) < 0) {
+- cb->args[4] = i;
+- return -1;
+- }
+- }
+- cb->args[4] = i;
+- return skb->len;
+-}
+-
+-static __inline__ int dn_hash_dump_zone(struct sk_buff *skb,
+- struct netlink_callback *cb,
+- struct dn_fib_table *tb,
+- struct dn_zone *dz)
+-{
+- int h, s_h;
+-
+- s_h = cb->args[3];
+- for(h = 0; h < dz->dz_divisor; h++) {
+- if (h < s_h)
+- continue;
+- if (h > s_h)
+- memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
+- if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
+- continue;
+- if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
+- cb->args[3] = h;
+- return -1;
+- }
+- }
+- cb->args[3] = h;
+- return skb->len;
+-}
+-
+-static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
+- struct netlink_callback *cb)
+-{
+- int m, s_m;
+- struct dn_zone *dz;
+- struct dn_hash *table = (struct dn_hash *)tb->data;
+-
+- s_m = cb->args[2];
+- read_lock(&dn_fib_tables_lock);
+- for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
+- if (m < s_m)
+- continue;
+- if (m > s_m)
+- memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
+-
+- if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
+- cb->args[2] = m;
+- read_unlock(&dn_fib_tables_lock);
+- return -1;
+- }
+- }
+- read_unlock(&dn_fib_tables_lock);
+- cb->args[2] = m;
+-
+- return skb->len;
+-}
+-
+-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+-{
+- struct net *net = sock_net(skb->sk);
+- unsigned int h, s_h;
+- unsigned int e = 0, s_e;
+- struct dn_fib_table *tb;
+- int dumped = 0;
+-
+- if (!net_eq(net, &init_net))
+- return 0;
+-
+- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
+- ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
+- return dn_cache_dump(skb, cb);
+-
+- s_h = cb->args[0];
+- s_e = cb->args[1];
+-
+- for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
+- e = 0;
+- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) {
+- if (e < s_e)
+- goto next;
+- if (dumped)
+- memset(&cb->args[2], 0, sizeof(cb->args) -
+- 2 * sizeof(cb->args[0]));
+- if (tb->dump(tb, skb, cb) < 0)
+- goto out;
+- dumped = 1;
+-next:
+- e++;
+- }
+- }
+-out:
+- cb->args[1] = e;
+- cb->args[0] = h;
+-
+- return skb->len;
+-}
+-
+-static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
+- struct nlmsghdr *n, struct netlink_skb_parms *req)
+-{
+- struct dn_hash *table = (struct dn_hash *)tb->data;
+- struct dn_fib_node *new_f, *f, **fp, **del_fp;
+- struct dn_zone *dz;
+- struct dn_fib_info *fi;
+- int z = r->rtm_dst_len;
+- int type = r->rtm_type;
+- dn_fib_key_t key;
+- int err;
+-
+- if (z > 16)
+- return -EINVAL;
+-
+- dz = table->dh_zones[z];
+- if (!dz && !(dz = dn_new_zone(table, z)))
+- return -ENOBUFS;
+-
+- dz_key_0(key);
+- if (attrs[RTA_DST]) {
+- __le16 dst = nla_get_le16(attrs[RTA_DST]);
+- if (dst & ~DZ_MASK(dz))
+- return -EINVAL;
+- key = dz_key(dst, dz);
+- }
+-
+- if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
+- return err;
+-
+- if (dz->dz_nent > (dz->dz_divisor << 2) &&
+- dz->dz_divisor > DN_MAX_DIVISOR &&
+- (z==16 || (1<<z) > dz->dz_divisor))
+- dn_rehash_zone(dz);
+-
+- fp = dn_chain_p(key, dz);
+-
+- DN_FIB_SCAN(f, fp) {
+- if (dn_key_leq(key, f->fn_key))
+- break;
+- }
+-
+- del_fp = NULL;
+-
+- if (f && (f->fn_state & DN_S_ZOMBIE) &&
+- dn_key_eq(f->fn_key, key)) {
+- del_fp = fp;
+- fp = &f->fn_next;
+- f = *fp;
+- goto create;
+- }
+-
+- DN_FIB_SCAN_KEY(f, fp, key) {
+- if (fi->fib_priority <= DN_FIB_INFO(f)->fib_priority)
+- break;
+- }
+-
+- if (f && dn_key_eq(f->fn_key, key) &&
+- fi->fib_priority == DN_FIB_INFO(f)->fib_priority) {
+- struct dn_fib_node **ins_fp;
+-
+- err = -EEXIST;
+- if (n->nlmsg_flags & NLM_F_EXCL)
+- goto out;
+-
+- if (n->nlmsg_flags & NLM_F_REPLACE) {
+- del_fp = fp;
+- fp = &f->fn_next;
+- f = *fp;
+- goto replace;
+- }
+-
+- ins_fp = fp;
+- err = -EEXIST;
+-
+- DN_FIB_SCAN_KEY(f, fp, key) {
+- if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority)
+- break;
+- if (f->fn_type == type &&
+- f->fn_scope == r->rtm_scope &&
+- DN_FIB_INFO(f) == fi)
+- goto out;
+- }
+-
+- if (!(n->nlmsg_flags & NLM_F_APPEND)) {
+- fp = ins_fp;
+- f = *fp;
+- }
+- }
+-
+-create:
+- err = -ENOENT;
+- if (!(n->nlmsg_flags & NLM_F_CREATE))
+- goto out;
+-
+-replace:
+- err = -ENOBUFS;
+- new_f = kmem_cache_zalloc(dn_hash_kmem, GFP_KERNEL);
+- if (new_f == NULL)
+- goto out;
+-
+- new_f->fn_key = key;
+- new_f->fn_type = type;
+- new_f->fn_scope = r->rtm_scope;
+- DN_FIB_INFO(new_f) = fi;
+-
+- new_f->fn_next = f;
+- write_lock_bh(&dn_fib_tables_lock);
+- *fp = new_f;
+- write_unlock_bh(&dn_fib_tables_lock);
+- dz->dz_nent++;
+-
+- if (del_fp) {
+- f = *del_fp;
+- write_lock_bh(&dn_fib_tables_lock);
+- *del_fp = f->fn_next;
+- write_unlock_bh(&dn_fib_tables_lock);
+-
+- if (!(f->fn_state & DN_S_ZOMBIE))
+- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
+- if (f->fn_state & DN_S_ACCESSED)
+- dn_rt_cache_flush(-1);
+- dn_free_node(f);
+- dz->dz_nent--;
+- } else {
+- dn_rt_cache_flush(-1);
+- }
+-
+- dn_rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->n, n, req);
+-
+- return 0;
+-out:
+- dn_fib_release_info(fi);
+- return err;
+-}
+-
+-
+-static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
+- struct nlmsghdr *n, struct netlink_skb_parms *req)
+-{
+- struct dn_hash *table = (struct dn_hash*)tb->data;
+- struct dn_fib_node **fp, **del_fp, *f;
+- int z = r->rtm_dst_len;
+- struct dn_zone *dz;
+- dn_fib_key_t key;
+- int matched;
+-
+-
+- if (z > 16)
+- return -EINVAL;
+-
+- if ((dz = table->dh_zones[z]) == NULL)
+- return -ESRCH;
+-
+- dz_key_0(key);
+- if (attrs[RTA_DST]) {
+- __le16 dst = nla_get_le16(attrs[RTA_DST]);
+- if (dst & ~DZ_MASK(dz))
+- return -EINVAL;
+- key = dz_key(dst, dz);
+- }
+-
+- fp = dn_chain_p(key, dz);
+-
+- DN_FIB_SCAN(f, fp) {
+- if (dn_key_eq(f->fn_key, key))
+- break;
+- if (dn_key_leq(key, f->fn_key))
+- return -ESRCH;
+- }
+-
+- matched = 0;
+- del_fp = NULL;
+- DN_FIB_SCAN_KEY(f, fp, key) {
+- struct dn_fib_info *fi = DN_FIB_INFO(f);
+-
+- if (f->fn_state & DN_S_ZOMBIE)
+- return -ESRCH;
+-
+- matched++;
+-
+- if (del_fp == NULL &&
+- (!r->rtm_type || f->fn_type == r->rtm_type) &&
+- (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
+- (!r->rtm_protocol ||
+- fi->fib_protocol == r->rtm_protocol) &&
+- dn_fib_nh_match(r, n, attrs, fi) == 0)
+- del_fp = fp;
+- }
+-
+- if (del_fp) {
+- f = *del_fp;
+- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
+-
+- if (matched != 1) {
+- write_lock_bh(&dn_fib_tables_lock);
+- *del_fp = f->fn_next;
+- write_unlock_bh(&dn_fib_tables_lock);
+-
+- if (f->fn_state & DN_S_ACCESSED)
+- dn_rt_cache_flush(-1);
+- dn_free_node(f);
+- dz->dz_nent--;
+- } else {
+- f->fn_state |= DN_S_ZOMBIE;
+- if (f->fn_state & DN_S_ACCESSED) {
+- f->fn_state &= ~DN_S_ACCESSED;
+- dn_rt_cache_flush(-1);
+- }
+- if (++dn_fib_hash_zombies > 128)
+- dn_fib_flush();
+- }
+-
+- return 0;
+- }
+-
+- return -ESRCH;
+-}
+-
+-static inline int dn_flush_list(struct dn_fib_node **fp, int z, struct dn_hash *table)
+-{
+- int found = 0;
+- struct dn_fib_node *f;
+-
+- while((f = *fp) != NULL) {
+- struct dn_fib_info *fi = DN_FIB_INFO(f);
+-
+- if (fi && ((f->fn_state & DN_S_ZOMBIE) || (fi->fib_flags & RTNH_F_DEAD))) {
+- write_lock_bh(&dn_fib_tables_lock);
+- *fp = f->fn_next;
+- write_unlock_bh(&dn_fib_tables_lock);
+-
+- dn_free_node(f);
+- found++;
+- continue;
+- }
+- fp = &f->fn_next;
+- }
+-
+- return found;
+-}
+-
+-static int dn_fib_table_flush(struct dn_fib_table *tb)
+-{
+- struct dn_hash *table = (struct dn_hash *)tb->data;
+- struct dn_zone *dz;
+- int found = 0;
+-
+- dn_fib_hash_zombies = 0;
+- for(dz = table->dh_zone_list; dz; dz = dz->dz_next) {
+- int i;
+- int tmp = 0;
+- for(i = dz->dz_divisor-1; i >= 0; i--)
+- tmp += dn_flush_list(&dz->dz_hash[i], dz->dz_order, table);
+- dz->dz_nent -= tmp;
+- found += tmp;
+- }
+-
+- return found;
+-}
+-
+-static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res)
+-{
+- int err;
+- struct dn_zone *dz;
+- struct dn_hash *t = (struct dn_hash *)tb->data;
+-
+- read_lock(&dn_fib_tables_lock);
+- for(dz = t->dh_zone_list; dz; dz = dz->dz_next) {
+- struct dn_fib_node *f;
+- dn_fib_key_t k = dz_key(flp->daddr, dz);
+-
+- for(f = dz_chain(k, dz); f; f = f->fn_next) {
+- if (!dn_key_eq(k, f->fn_key)) {
+- if (dn_key_leq(k, f->fn_key))
+- break;
+- else
+- continue;
+- }
+-
+- f->fn_state |= DN_S_ACCESSED;
+-
+- if (f->fn_state&DN_S_ZOMBIE)
+- continue;
+-
+- if (f->fn_scope < flp->flowidn_scope)
+- continue;
+-
+- err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res);
+-
+- if (err == 0) {
+- res->type = f->fn_type;
+- res->scope = f->fn_scope;
+- res->prefixlen = dz->dz_order;
+- goto out;
+- }
+- if (err < 0)
+- goto out;
+- }
+- }
+- err = 1;
+-out:
+- read_unlock(&dn_fib_tables_lock);
+- return err;
+-}
+-
+-
+-struct dn_fib_table *dn_fib_get_table(u32 n, int create)
+-{
+- struct dn_fib_table *t;
+- unsigned int h;
+-
+- if (n < RT_TABLE_MIN)
+- return NULL;
+-
+- if (n > RT_TABLE_MAX)
+- return NULL;
+-
+- h = n & (DN_FIB_TABLE_HASHSZ - 1);
+- rcu_read_lock();
+- hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) {
+- if (t->n == n) {
+- rcu_read_unlock();
+- return t;
+- }
+- }
+- rcu_read_unlock();
+-
+- if (!create)
+- return NULL;
+-
+- if (in_interrupt()) {
+- net_dbg_ratelimited("DECnet: BUG! Attempt to create routing table from interrupt\n");
+- return NULL;
+- }
+-
+- t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash),
+- GFP_KERNEL);
+- if (t == NULL)
+- return NULL;
+-
+- t->n = n;
+- t->insert = dn_fib_table_insert;
+- t->delete = dn_fib_table_delete;
+- t->lookup = dn_fib_table_lookup;
+- t->flush = dn_fib_table_flush;
+- t->dump = dn_fib_table_dump;
+- hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
+-
+- return t;
+-}
+-
+-struct dn_fib_table *dn_fib_empty_table(void)
+-{
+- u32 id;
+-
+- for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
+- if (dn_fib_get_table(id, 0) == NULL)
+- return dn_fib_get_table(id, 1);
+- return NULL;
+-}
+-
+-void dn_fib_flush(void)
+-{
+- int flushed = 0;
+- struct dn_fib_table *tb;
+- unsigned int h;
+-
+- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist)
+- flushed += tb->flush(tb);
+- }
+-
+- if (flushed)
+- dn_rt_cache_flush(-1);
+-}
+-
+-void __init dn_fib_table_init(void)
+-{
+- dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
+- sizeof(struct dn_fib_info),
+- 0, SLAB_HWCACHE_ALIGN,
+- NULL);
+-}
+-
+-void __exit dn_fib_table_cleanup(void)
+-{
+- struct dn_fib_table *t;
+- struct hlist_node *next;
+- unsigned int h;
+-
+- write_lock(&dn_fib_tables_lock);
+- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+- hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h],
+- hlist) {
+- hlist_del(&t->hlist);
+- kfree(t);
+- }
+- }
+- write_unlock(&dn_fib_tables_lock);
+-}
+diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
+deleted file mode 100644
+index aa4155875ca84..0000000000000
+--- a/net/decnet/dn_timer.c
++++ /dev/null
+@@ -1,104 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Socket Timer Functions
+- *
+- * Author: Steve Whitehouse <SteveW@ACM.org>
+- *
+- *
+- * Changes:
+- * Steve Whitehouse : Made keepalive timer part of the same
+- * timer idea.
+- * Steve Whitehouse : Added checks for sk->sock_readers
+- * David S. Miller : New socket locking
+- * Steve Whitehouse : Timer grabs socket ref.
+- */
+-#include <linux/net.h>
+-#include <linux/socket.h>
+-#include <linux/skbuff.h>
+-#include <linux/netdevice.h>
+-#include <linux/timer.h>
+-#include <linux/spinlock.h>
+-#include <net/sock.h>
+-#include <linux/atomic.h>
+-#include <linux/jiffies.h>
+-#include <net/flow.h>
+-#include <net/dn.h>
+-
+-/*
+- * Slow timer is for everything else (n * 500mS)
+- */
+-
+-#define SLOW_INTERVAL (HZ/2)
+-
+-static void dn_slow_timer(struct timer_list *t);
+-
+-void dn_start_slow_timer(struct sock *sk)
+-{
+- timer_setup(&sk->sk_timer, dn_slow_timer, 0);
+- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
+-}
+-
+-void dn_stop_slow_timer(struct sock *sk)
+-{
+- sk_stop_timer(sk, &sk->sk_timer);
+-}
+-
+-static void dn_slow_timer(struct timer_list *t)
+-{
+- struct sock *sk = from_timer(sk, t, sk_timer);
+- struct dn_scp *scp = DN_SK(sk);
+-
+- bh_lock_sock(sk);
+-
+- if (sock_owned_by_user(sk)) {
+- sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10);
+- goto out;
+- }
+-
+- /*
+- * The persist timer is the standard slow timer used for retransmits
+- * in both connection establishment and disconnection as well as
+- * in the RUN state. The different states are catered for by changing
+- * the function pointer in the socket. Setting the timer to a value
+- * of zero turns it off. We allow the persist_fxn to turn the
+- * timer off in a permant way by returning non-zero, so that
+- * timer based routines may remove sockets. This is why we have a
+- * sock_hold()/sock_put() around the timer to prevent the socket
+- * going away in the middle.
+- */
+- if (scp->persist && scp->persist_fxn) {
+- if (scp->persist <= SLOW_INTERVAL) {
+- scp->persist = 0;
+-
+- if (scp->persist_fxn(sk))
+- goto out;
+- } else {
+- scp->persist -= SLOW_INTERVAL;
+- }
+- }
+-
+- /*
+- * Check for keepalive timeout. After the other timer 'cos if
+- * the previous timer caused a retransmit, we don't need to
+- * do this. scp->stamp is the last time that we sent a packet.
+- * The keepalive function sends a link service packet to the
+- * other end. If it remains unacknowledged, the standard
+- * socket timers will eventually shut the socket down. Each
+- * time we do this, scp->stamp will be updated, thus
+- * we won't try and send another until scp->keepalive has passed
+- * since the last successful transmission.
+- */
+- if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) {
+- if (time_after_eq(jiffies, scp->stamp + scp->keepalive))
+- scp->keepalive_fxn(sk);
+- }
+-
+- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
+-out:
+- bh_unlock_sock(sk);
+- sock_put(sk);
+-}
+diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
+deleted file mode 100644
+index 14ec4ef95fab1..0000000000000
+--- a/net/decnet/netfilter/Kconfig
++++ /dev/null
+@@ -1,17 +0,0 @@
+-# SPDX-License-Identifier: GPL-2.0-only
+-#
+-# DECnet netfilter configuration
+-#
+-
+-menu "DECnet: Netfilter Configuration"
+- depends on DECNET && NETFILTER
+- depends on NETFILTER_ADVANCED
+-
+-config DECNET_NF_GRABULATOR
+- tristate "Routing message grabulator (for userland routing daemon)"
+- help
+- Enable this module if you want to use the userland DECnet routing
+- daemon. You will also need to enable routing support for DECnet
+- unless you just want to monitor routing messages from other nodes.
+-
+-endmenu
+diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile
+deleted file mode 100644
+index 429c84289d0ff..0000000000000
+--- a/net/decnet/netfilter/Makefile
++++ /dev/null
+@@ -1,6 +0,0 @@
+-# SPDX-License-Identifier: GPL-2.0-only
+-#
+-# Makefile for DECnet netfilter modules
+-#
+-
+-obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o
+diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
+deleted file mode 100644
+index 26a9193df7831..0000000000000
+--- a/net/decnet/netfilter/dn_rtmsg.c
++++ /dev/null
+@@ -1,158 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet Routing Message Grabulator
+- *
+- * (C) 2000 ChyGwyn Limited - https://www.chygwyn.com/
+- *
+- * Author: Steven Whitehouse <steve@chygwyn.com>
+- */
+-#include <linux/module.h>
+-#include <linux/skbuff.h>
+-#include <linux/slab.h>
+-#include <linux/init.h>
+-#include <linux/netdevice.h>
+-#include <linux/netfilter.h>
+-#include <linux/spinlock.h>
+-#include <net/netlink.h>
+-#include <linux/netfilter_decnet.h>
+-
+-#include <net/sock.h>
+-#include <net/flow.h>
+-#include <net/dn.h>
+-#include <net/dn_route.h>
+-
+-static struct sock *dnrmg = NULL;
+-
+-
+-static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
+-{
+- struct sk_buff *skb = NULL;
+- size_t size;
+- sk_buff_data_t old_tail;
+- struct nlmsghdr *nlh;
+- unsigned char *ptr;
+- struct nf_dn_rtmsg *rtm;
+-
+- size = NLMSG_ALIGN(rt_skb->len) +
+- NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
+- skb = nlmsg_new(size, GFP_ATOMIC);
+- if (!skb) {
+- *errp = -ENOMEM;
+- return NULL;
+- }
+- old_tail = skb->tail;
+- nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
+- if (!nlh) {
+- kfree_skb(skb);
+- *errp = -ENOMEM;
+- return NULL;
+- }
+- rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
+- rtm->nfdn_ifindex = rt_skb->dev->ifindex;
+- ptr = NFDN_RTMSG(rtm);
+- skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
+- nlh->nlmsg_len = skb->tail - old_tail;
+- return skb;
+-}
+-
+-static void dnrmg_send_peer(struct sk_buff *skb)
+-{
+- struct sk_buff *skb2;
+- int status = 0;
+- int group = 0;
+- unsigned char flags = *skb->data;
+-
+- switch (flags & DN_RT_CNTL_MSK) {
+- case DN_RT_PKT_L1RT:
+- group = DNRNG_NLGRP_L1;
+- break;
+- case DN_RT_PKT_L2RT:
+- group = DNRNG_NLGRP_L2;
+- break;
+- default:
+- return;
+- }
+-
+- skb2 = dnrmg_build_message(skb, &status);
+- if (skb2 == NULL)
+- return;
+- NETLINK_CB(skb2).dst_group = group;
+- netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC);
+-}
+-
+-
+-static unsigned int dnrmg_hook(void *priv,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
+-{
+- dnrmg_send_peer(skb);
+- return NF_ACCEPT;
+-}
+-
+-
+-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0)
+-
+-static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
+-{
+- struct nlmsghdr *nlh = nlmsg_hdr(skb);
+-
+- if (skb->len < sizeof(*nlh) ||
+- nlh->nlmsg_len < sizeof(*nlh) ||
+- skb->len < nlh->nlmsg_len)
+- return;
+-
+- if (!netlink_capable(skb, CAP_NET_ADMIN))
+- RCV_SKB_FAIL(-EPERM);
+-
+- /* Eventually we might send routing messages too */
+-
+- RCV_SKB_FAIL(-EINVAL);
+-}
+-
+-static const struct nf_hook_ops dnrmg_ops = {
+- .hook = dnrmg_hook,
+- .pf = NFPROTO_DECNET,
+- .hooknum = NF_DN_ROUTE,
+- .priority = NF_DN_PRI_DNRTMSG,
+-};
+-
+-static int __init dn_rtmsg_init(void)
+-{
+- int rv = 0;
+- struct netlink_kernel_cfg cfg = {
+- .groups = DNRNG_NLGRP_MAX,
+- .input = dnrmg_receive_user_skb,
+- };
+-
+- dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg);
+- if (dnrmg == NULL) {
+- printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
+- return -ENOMEM;
+- }
+-
+- rv = nf_register_net_hook(&init_net, &dnrmg_ops);
+- if (rv) {
+- netlink_kernel_release(dnrmg);
+- }
+-
+- return rv;
+-}
+-
+-static void __exit dn_rtmsg_fini(void)
+-{
+- nf_unregister_net_hook(&init_net, &dnrmg_ops);
+- netlink_kernel_release(dnrmg);
+-}
+-
+-
+-MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
+-MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>");
+-MODULE_LICENSE("GPL");
+-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
+-
+-module_init(dn_rtmsg_init);
+-module_exit(dn_rtmsg_fini);
+diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
+deleted file mode 100644
+index 67b5ab2657b7c..0000000000000
+--- a/net/decnet/sysctl_net_decnet.c
++++ /dev/null
+@@ -1,362 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * DECnet An implementation of the DECnet protocol suite for the LINUX
+- * operating system. DECnet is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * DECnet sysctl support functions
+- *
+- * Author: Steve Whitehouse <SteveW@ACM.org>
+- *
+- *
+- * Changes:
+- * Steve Whitehouse - C99 changes and default device handling
+- * Steve Whitehouse - Memory buffer settings, like the tcp ones
+- *
+- */
+-#include <linux/mm.h>
+-#include <linux/sysctl.h>
+-#include <linux/fs.h>
+-#include <linux/netdevice.h>
+-#include <linux/string.h>
+-#include <net/neighbour.h>
+-#include <net/dst.h>
+-#include <net/flow.h>
+-
+-#include <linux/uaccess.h>
+-
+-#include <net/dn.h>
+-#include <net/dn_dev.h>
+-#include <net/dn_route.h>
+-
+-
+-int decnet_debug_level;
+-int decnet_time_wait = 30;
+-int decnet_dn_count = 1;
+-int decnet_di_count = 3;
+-int decnet_dr_count = 3;
+-int decnet_log_martians = 1;
+-int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
+-
+-/* Reasonable defaults, I hope, based on tcp's defaults */
+-long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+-int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
+-int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
+-
+-#ifdef CONFIG_SYSCTL
+-extern int decnet_dst_gc_interval;
+-static int min_decnet_time_wait[] = { 5 };
+-static int max_decnet_time_wait[] = { 600 };
+-static int min_state_count[] = { 1 };
+-static int max_state_count[] = { NSP_MAXRXTSHIFT };
+-static int min_decnet_dst_gc_interval[] = { 1 };
+-static int max_decnet_dst_gc_interval[] = { 60 };
+-static int min_decnet_no_fc_max_cwnd[] = { NSP_MIN_WINDOW };
+-static int max_decnet_no_fc_max_cwnd[] = { NSP_MAX_WINDOW };
+-static char node_name[7] = "???";
+-
+-static struct ctl_table_header *dn_table_header = NULL;
+-
+-/*
+- * ctype.h :-)
+- */
+-#define ISNUM(x) (((x) >= '0') && ((x) <= '9'))
+-#define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z'))
+-#define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z'))
+-#define ISALPHA(x) (ISLOWER(x) || ISUPPER(x))
+-#define INVALID_END_CHAR(x) (ISNUM(x) || ISALPHA(x))
+-
+-static void strip_it(char *str)
+-{
+- for(;;) {
+- switch (*str) {
+- case ' ':
+- case '\n':
+- case '\r':
+- case ':':
+- *str = 0;
+- fallthrough;
+- case 0:
+- return;
+- }
+- str++;
+- }
+-}
+-
+-/*
+- * Simple routine to parse an ascii DECnet address
+- * into a network order address.
+- */
+-static int parse_addr(__le16 *addr, char *str)
+-{
+- __u16 area, node;
+-
+- while(*str && !ISNUM(*str)) str++;
+-
+- if (*str == 0)
+- return -1;
+-
+- area = (*str++ - '0');
+- if (ISNUM(*str)) {
+- area *= 10;
+- area += (*str++ - '0');
+- }
+-
+- if (*str++ != '.')
+- return -1;
+-
+- if (!ISNUM(*str))
+- return -1;
+-
+- node = *str++ - '0';
+- if (ISNUM(*str)) {
+- node *= 10;
+- node += (*str++ - '0');
+- }
+- if (ISNUM(*str)) {
+- node *= 10;
+- node += (*str++ - '0');
+- }
+- if (ISNUM(*str)) {
+- node *= 10;
+- node += (*str++ - '0');
+- }
+-
+- if ((node > 1023) || (area > 63))
+- return -1;
+-
+- if (INVALID_END_CHAR(*str))
+- return -1;
+-
+- *addr = cpu_to_le16((area << 10) | node);
+-
+- return 0;
+-}
+-
+-static int dn_node_address_handler(struct ctl_table *table, int write,
+- void *buffer, size_t *lenp, loff_t *ppos)
+-{
+- char addr[DN_ASCBUF_LEN];
+- size_t len;
+- __le16 dnaddr;
+-
+- if (!*lenp || (*ppos && !write)) {
+- *lenp = 0;
+- return 0;
+- }
+-
+- if (write) {
+- len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
+- memcpy(addr, buffer, len);
+- addr[len] = 0;
+- strip_it(addr);
+-
+- if (parse_addr(&dnaddr, addr))
+- return -EINVAL;
+-
+- dn_dev_devices_off();
+-
+- decnet_address = dnaddr;
+-
+- dn_dev_devices_on();
+-
+- *ppos += len;
+-
+- return 0;
+- }
+-
+- dn_addr2asc(le16_to_cpu(decnet_address), addr);
+- len = strlen(addr);
+- addr[len++] = '\n';
+-
+- if (len > *lenp)
+- len = *lenp;
+- memcpy(buffer, addr, len);
+- *lenp = len;
+- *ppos += len;
+-
+- return 0;
+-}
+-
+-static int dn_def_dev_handler(struct ctl_table *table, int write,
+- void *buffer, size_t *lenp, loff_t *ppos)
+-{
+- size_t len;
+- struct net_device *dev;
+- char devname[17];
+-
+- if (!*lenp || (*ppos && !write)) {
+- *lenp = 0;
+- return 0;
+- }
+-
+- if (write) {
+- if (*lenp > 16)
+- return -E2BIG;
+-
+- memcpy(devname, buffer, *lenp);
+- devname[*lenp] = 0;
+- strip_it(devname);
+-
+- dev = dev_get_by_name(&init_net, devname);
+- if (dev == NULL)
+- return -ENODEV;
+-
+- if (dev->dn_ptr == NULL) {
+- dev_put(dev);
+- return -ENODEV;
+- }
+-
+- if (dn_dev_set_default(dev, 1)) {
+- dev_put(dev);
+- return -ENODEV;
+- }
+- *ppos += *lenp;
+-
+- return 0;
+- }
+-
+- dev = dn_dev_get_default();
+- if (dev == NULL) {
+- *lenp = 0;
+- return 0;
+- }
+-
+- strcpy(devname, dev->name);
+- dev_put(dev);
+- len = strlen(devname);
+- devname[len++] = '\n';
+-
+- if (len > *lenp) len = *lenp;
+-
+- memcpy(buffer, devname, len);
+- *lenp = len;
+- *ppos += len;
+-
+- return 0;
+-}
+-
+-static struct ctl_table dn_table[] = {
+- {
+- .procname = "node_address",
+- .maxlen = 7,
+- .mode = 0644,
+- .proc_handler = dn_node_address_handler,
+- },
+- {
+- .procname = "node_name",
+- .data = node_name,
+- .maxlen = 7,
+- .mode = 0644,
+- .proc_handler = proc_dostring,
+- },
+- {
+- .procname = "default_device",
+- .maxlen = 16,
+- .mode = 0644,
+- .proc_handler = dn_def_dev_handler,
+- },
+- {
+- .procname = "time_wait",
+- .data = &decnet_time_wait,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_decnet_time_wait,
+- .extra2 = &max_decnet_time_wait
+- },
+- {
+- .procname = "dn_count",
+- .data = &decnet_dn_count,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_state_count,
+- .extra2 = &max_state_count
+- },
+- {
+- .procname = "di_count",
+- .data = &decnet_di_count,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_state_count,
+- .extra2 = &max_state_count
+- },
+- {
+- .procname = "dr_count",
+- .data = &decnet_dr_count,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_state_count,
+- .extra2 = &max_state_count
+- },
+- {
+- .procname = "dst_gc_interval",
+- .data = &decnet_dst_gc_interval,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_decnet_dst_gc_interval,
+- .extra2 = &max_decnet_dst_gc_interval
+- },
+- {
+- .procname = "no_fc_max_cwnd",
+- .data = &decnet_no_fc_max_cwnd,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &min_decnet_no_fc_max_cwnd,
+- .extra2 = &max_decnet_no_fc_max_cwnd
+- },
+- {
+- .procname = "decnet_mem",
+- .data = &sysctl_decnet_mem,
+- .maxlen = sizeof(sysctl_decnet_mem),
+- .mode = 0644,
+- .proc_handler = proc_doulongvec_minmax
+- },
+- {
+- .procname = "decnet_rmem",
+- .data = &sysctl_decnet_rmem,
+- .maxlen = sizeof(sysctl_decnet_rmem),
+- .mode = 0644,
+- .proc_handler = proc_dointvec,
+- },
+- {
+- .procname = "decnet_wmem",
+- .data = &sysctl_decnet_wmem,
+- .maxlen = sizeof(sysctl_decnet_wmem),
+- .mode = 0644,
+- .proc_handler = proc_dointvec,
+- },
+- {
+- .procname = "debug",
+- .data = &decnet_debug_level,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec,
+- },
+- { }
+-};
+-
+-void dn_register_sysctl(void)
+-{
+- dn_table_header = register_net_sysctl(&init_net, "net/decnet", dn_table);
+-}
+-
+-void dn_unregister_sysctl(void)
+-{
+- unregister_net_sysctl_table(dn_table_header);
+-}
+-
+-#else /* CONFIG_SYSCTL */
+-void dn_unregister_sysctl(void)
+-{
+-}
+-void dn_register_sysctl(void)
+-{
+-}
+-
+-#endif
+diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
+index e9911b18bdbfa..34763f575c308 100644
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -1253,9 +1253,9 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
+ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
+ const char *user_protocol)
+ {
++ const struct dsa_device_ops *tag_ops = NULL;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+- const struct dsa_device_ops *tag_ops;
+ enum dsa_tag_protocol default_proto;
+
+ /* Find out which protocol the switch would prefer. */
+@@ -1278,10 +1278,17 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
+ }
+
+ tag_ops = dsa_find_tagger_by_name(user_protocol);
+- } else {
+- tag_ops = dsa_tag_driver_get(default_proto);
++ if (IS_ERR(tag_ops)) {
++ dev_warn(ds->dev,
++ "Failed to find a tagging driver for protocol %s, using default\n",
++ user_protocol);
++ tag_ops = NULL;
++ }
+ }
+
++ if (!tag_ops)
++ tag_ops = dsa_tag_driver_get(default_proto);
++
+ if (IS_ERR(tag_ops)) {
+ if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
+ return -EPROBE_DEFER;
+@@ -1341,6 +1348,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
+ const char *user_protocol;
+
+ master = of_find_net_device_by_node(ethernet);
++ of_node_put(ethernet);
+ if (!master)
+ return -EPROBE_DEFER;
+
+@@ -1630,6 +1638,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
+ struct dsa_port *dp;
+
+ mutex_lock(&dsa2_mutex);
++
++ if (!ds->setup)
++ goto out;
++
+ rtnl_lock();
+
+ list_for_each_entry(dp, &ds->dst->ports, list) {
+@@ -1664,6 +1676,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
+ unregister_netdevice_many(&unregister_list);
+
+ rtnl_unlock();
++out:
+ mutex_unlock(&dsa2_mutex);
+ }
+ EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
+diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
+index a5c9bc7b66c6e..e91265434354e 100644
+--- a/net/dsa/dsa_priv.h
++++ b/net/dsa/dsa_priv.h
+@@ -198,6 +198,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
+ }
+
+ /* port.c */
++bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr);
+ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
+ const struct dsa_device_ops *tag_ops);
+ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
+diff --git a/net/dsa/master.c b/net/dsa/master.c
+index e8e19857621bd..69ec510abe83c 100644
+--- a/net/dsa/master.c
++++ b/net/dsa/master.c
+@@ -204,8 +204,7 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ * switch in the tree that is PTP capable.
+ */
+ list_for_each_entry(dp, &dst->ports, list)
+- if (dp->ds->ops->port_hwtstamp_get ||
+- dp->ds->ops->port_hwtstamp_set)
++ if (dsa_port_supports_hwtstamp(dp, ifr))
+ return -EBUSY;
+ break;
+ }
+diff --git a/net/dsa/port.c b/net/dsa/port.c
+index 616330a16d319..31e8a7a8c3e60 100644
+--- a/net/dsa/port.c
++++ b/net/dsa/port.c
+@@ -75,6 +75,22 @@ static bool dsa_port_can_configure_learning(struct dsa_port *dp)
+ return !err;
+ }
+
++bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr)
++{
++ struct dsa_switch *ds = dp->ds;
++ int err;
++
++ if (!ds->ops->port_hwtstamp_get || !ds->ops->port_hwtstamp_set)
++ return false;
++
++ /* "See through" shim implementations of the "get" method.
++ * This will clobber the ifreq structure, but we will either return an
++ * error, or the master will overwrite it with proper values.
++ */
++ err = ds->ops->port_hwtstamp_get(ds, dp->index, ifr);
++ return err != -EOPNOTSUPP;
++}
++
+ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
+ {
+ struct dsa_switch *ds = dp->ds;
+@@ -111,11 +127,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
+ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
+ bool do_fast_age)
+ {
++ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ err = dsa_port_set_state(dp, state, do_fast_age);
+- if (err)
+- pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
++ if (err && err != -EOPNOTSUPP) {
++ dev_err(ds->dev, "port %d failed to set STP state %u: %pe\n",
++ dp->index, state, ERR_PTR(err));
++ }
+ }
+
+ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
+@@ -1201,8 +1220,10 @@ int dsa_port_link_register_of(struct dsa_port *dp)
+ if (ds->ops->phylink_mac_link_down)
+ ds->ops->phylink_mac_link_down(ds, port,
+ MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
++ of_node_put(phy_np);
+ return dsa_port_phylink_register(dp);
+ }
++ of_node_put(phy_np);
+ return 0;
+ }
+
+diff --git a/net/dsa/slave.c b/net/dsa/slave.c
+index a2bf2d8ac65b7..11ec9e689589b 100644
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -174,7 +174,7 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
+ dev_uc_del(master, dev->dev_addr);
+
+ out:
+- ether_addr_copy(dev->dev_addr, addr->sa_data);
++ eth_hw_addr_set(dev, addr->sa_data);
+
+ return 0;
+ }
+@@ -1954,7 +1954,7 @@ int dsa_slave_create(struct dsa_port *port)
+
+ slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
+ if (!is_zero_ether_addr(port->mac))
+- ether_addr_copy(slave_dev->dev_addr, port->mac);
++ eth_hw_addr_set(slave_dev, port->mac);
+ else
+ eth_hw_addr_inherit(slave_dev, master);
+ slave_dev->priv_flags |= IFF_NO_QUEUE;
+diff --git a/net/dsa/switch.c b/net/dsa/switch.c
+index 6466d0539af9f..fb69f2f14234e 100644
+--- a/net/dsa/switch.c
++++ b/net/dsa/switch.c
+@@ -264,7 +264,7 @@ static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port,
+
+ err = ds->ops->port_mdb_del(ds, port, mdb);
+ if (err) {
+- refcount_inc(&a->refcount);
++ refcount_set(&a->refcount, 1);
+ return err;
+ }
+
+@@ -329,7 +329,7 @@ static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port,
+
+ err = ds->ops->port_fdb_del(ds, port, addr, vid);
+ if (err) {
+- refcount_inc(&a->refcount);
++ refcount_set(&a->refcount, 1);
+ return err;
+ }
+
+@@ -644,7 +644,7 @@ static int
+ dsa_switch_mrp_add_ring_role(struct dsa_switch *ds,
+ struct dsa_notifier_mrp_ring_role_info *info)
+ {
+- if (!ds->ops->port_mrp_add)
++ if (!ds->ops->port_mrp_add_ring_role)
+ return -EOPNOTSUPP;
+
+ if (ds->index == info->sw_index)
+@@ -658,7 +658,7 @@ static int
+ dsa_switch_mrp_del_ring_role(struct dsa_switch *ds,
+ struct dsa_notifier_mrp_ring_role_info *info)
+ {
+- if (!ds->ops->port_mrp_del)
++ if (!ds->ops->port_mrp_del_ring_role)
+ return -EOPNOTSUPP;
+
+ if (ds->index == info->sw_index)
+diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
+index f8f7b7c34e7da..e443088ab0f65 100644
+--- a/net/dsa/tag_8021q.c
++++ b/net/dsa/tag_8021q.c
+@@ -529,6 +529,7 @@ static void dsa_tag_8021q_teardown(struct dsa_switch *ds)
+ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto)
+ {
+ struct dsa_8021q_context *ctx;
++ int err;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+@@ -541,7 +542,15 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto)
+
+ ds->tag_8021q_ctx = ctx;
+
+- return dsa_tag_8021q_setup(ds);
++ err = dsa_tag_8021q_setup(ds);
++ if (err)
++ goto err_free;
++
++ return 0;
++
++err_free:
++ kfree(ctx);
++ return err;
+ }
+ EXPORT_SYMBOL_GPL(dsa_tag_8021q_register);
+
+diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
+index 96dbb8ee2fee1..ed5f68c4f1dad 100644
+--- a/net/dsa/tag_brcm.c
++++ b/net/dsa/tag_brcm.c
+@@ -7,6 +7,7 @@
+
+ #include <linux/dsa/brcm.h>
+ #include <linux/etherdevice.h>
++#include <linux/if_vlan.h>
+ #include <linux/list.h>
+ #include <linux/slab.h>
+
+@@ -248,6 +249,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
+ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+ {
++ int len = BRCM_LEG_TAG_LEN;
+ int source_port;
+ u8 *brcm_tag;
+
+@@ -262,12 +264,16 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
+ if (!skb->dev)
+ return NULL;
+
++ /* VLAN tag is added by BCM63xx internal switch */
++ if (netdev_uses_dsa(skb->dev))
++ len += VLAN_HLEN;
++
+ /* Remove Broadcom tag and update checksum */
+- skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN);
++ skb_pull_rcsum(skb, len);
+
+ dsa_default_offload_fwd_mark(skb);
+
+- dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN);
++ dsa_strip_etype_header(skb, len);
+
+ return skb;
+ }
+diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
+index f64b805303cd7..53a206d116850 100644
+--- a/net/dsa/tag_hellcreek.c
++++ b/net/dsa/tag_hellcreek.c
+@@ -21,6 +21,14 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ u8 *tag;
+
++ /* Calculate checksums (if required) before adding the trailer tag to
++ * avoid including it in calculations. That would lead to wrong
++ * checksums after the switch strips the tag.
++ */
++ if (skb->ip_summed == CHECKSUM_PARTIAL &&
++ skb_checksum_help(skb))
++ return NULL;
++
+ /* Tag encoding */
+ tag = skb_put(skb, HELLCREEK_TAG_LEN);
+ *tag = BIT(dp->index);
+@@ -37,11 +45,12 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
+
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev) {
+- netdev_warn(dev, "Failed to get source port: %d\n", port);
++ netdev_warn_once(dev, "Failed to get source port: %d\n", port);
+ return NULL;
+ }
+
+- pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
++ if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN))
++ return NULL;
+
+ dsa_default_offload_fwd_mark(skb);
+
+diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
+index fa1d60d13ad90..6795dd0174996 100644
+--- a/net/dsa/tag_ksz.c
++++ b/net/dsa/tag_ksz.c
+@@ -22,7 +22,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
+ if (!skb->dev)
+ return NULL;
+
+- pskb_trim_rcsum(skb, skb->len - len);
++ if (pskb_trim_rcsum(skb, skb->len - len))
++ return NULL;
+
+ dsa_default_offload_fwd_mark(skb);
+
+diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
+index cb548188f8134..98d7d7120bab2 100644
+--- a/net/dsa/tag_lan9303.c
++++ b/net/dsa/tag_lan9303.c
+@@ -77,7 +77,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
+
+ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
+ {
+- __be16 *lan9303_tag;
+ u16 lan9303_tag1;
+ unsigned int source_port;
+
+@@ -87,14 +86,15 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
+ return NULL;
+ }
+
+- lan9303_tag = dsa_etype_header_pos_rx(skb);
+-
+- if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
+- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
+- return NULL;
++ if (skb_vlan_tag_present(skb)) {
++ lan9303_tag1 = skb_vlan_tag_get(skb);
++ __vlan_hwaccel_clear_tag(skb);
++ } else {
++ skb_push_rcsum(skb, ETH_HLEN);
++ __skb_vlan_pop(skb, &lan9303_tag1);
++ skb_pull_rcsum(skb, ETH_HLEN);
+ }
+
+- lan9303_tag1 = ntohs(lan9303_tag[1]);
+ source_port = lan9303_tag1 & 0x3;
+
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+@@ -103,13 +103,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
+ return NULL;
+ }
+
+- /* remove the special VLAN tag between the MAC addresses
+- * and the current ethertype field.
+- */
+- skb_pull_rcsum(skb, 2 + 2);
+-
+- dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
+-
+ if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
+ dsa_default_offload_fwd_mark(skb);
+
+diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
+index 605b51ca69210..6e0518aa3a4d2 100644
+--- a/net/dsa/tag_ocelot.c
++++ b/net/dsa/tag_ocelot.c
+@@ -62,6 +62,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
+ struct dsa_port *dp;
+ u8 *extraction;
+ u16 vlan_tpid;
++ u64 rew_val;
+
+ /* Revert skb->data by the amount consumed by the DSA master,
+ * so it points to the beginning of the frame.
+@@ -91,6 +92,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
+ ocelot_xfh_get_qos_class(extraction, &qos_class);
+ ocelot_xfh_get_tag_type(extraction, &tag_type);
+ ocelot_xfh_get_vlan_tci(extraction, &vlan_tci);
++ ocelot_xfh_get_rew_val(extraction, &rew_val);
+
+ skb->dev = dsa_master_find_slave(netdev, 0, src_port);
+ if (!skb->dev)
+@@ -104,6 +106,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
+
+ dsa_default_offload_fwd_mark(skb);
+ skb->priority = qos_class;
++ OCELOT_SKB_CB(skb)->tstamp_lo = rew_val;
+
+ /* Ocelot switches copy frames unmodified to the CPU. However, it is
+ * possible for the user to request a VLAN modification through
+diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
+index 2edede9ddac93..a163f535697e1 100644
+--- a/net/dsa/tag_sja1105.c
++++ b/net/dsa/tag_sja1105.c
+@@ -100,8 +100,8 @@ static void sja1105_meta_unpack(const struct sk_buff *skb,
+ * a unified unpacking command for both device series.
+ */
+ packing(buf, &meta->tstamp, 31, 0, 4, UNPACK, 0);
+- packing(buf + 4, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0);
+- packing(buf + 5, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0);
++ packing(buf + 4, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0);
++ packing(buf + 5, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0);
+ packing(buf + 6, &meta->source_port, 7, 0, 1, UNPACK, 0);
+ packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0);
+ }
+@@ -644,7 +644,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
+ * padding and trailer we need to account for the fact that
+ * skb->data points to skb_mac_header(skb) + ETH_HLEN.
+ */
+- pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN);
++ if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN))
++ return NULL;
+ /* Trap-to-host frame, no timestamp trailer */
+ } else {
+ *source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header);
+diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
+index 7e6b37a54add3..49c0a2a77f02d 100644
+--- a/net/ethtool/eeprom.c
++++ b/net/ethtool/eeprom.c
+@@ -36,7 +36,7 @@ static int fallback_set_params(struct eeprom_req_info *request,
+ if (request->page)
+ offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset;
+
+- if (modinfo->type == ETH_MODULE_SFF_8079 &&
++ if (modinfo->type == ETH_MODULE_SFF_8472 &&
+ request->i2c_address == 0x51)
+ offset += ETH_MODULE_EEPROM_PAGE_LEN * 2;
+
+@@ -124,7 +124,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
+ if (ret)
+ goto err_free;
+
+- ret = get_module_eeprom_by_page(dev, &page_data, info->extack);
++ ret = get_module_eeprom_by_page(dev, &page_data, info ? info->extack : NULL);
+ if (ret < 0)
+ goto err_ops;
+
+diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
+index f2abc31528883..53e2ef6ada8f3 100644
+--- a/net/ethtool/ioctl.c
++++ b/net/ethtool/ioctl.c
+@@ -568,8 +568,8 @@ static int ethtool_get_link_ksettings(struct net_device *dev,
+ static int ethtool_set_link_ksettings(struct net_device *dev,
+ void __user *useraddr)
+ {
++ struct ethtool_link_ksettings link_ksettings = {};
+ int err;
+- struct ethtool_link_ksettings link_ksettings;
+
+ ASSERT_RTNL();
+
+@@ -1697,7 +1697,7 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce coalesce;
+ int ret;
+
+- if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce)
++ if (!dev->ethtool_ops->set_coalesce || !dev->ethtool_ops->get_coalesce)
+ return -EOPNOTSUPP;
+
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+@@ -1988,7 +1988,8 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
+ } else {
+ /* Driver expects to be called at twice the frequency in rc */
+ int n = rc * 2, interval = HZ / n;
+- u64 count = n * id.data, i = 0;
++ u64 count = mul_u32_u32(n, id.data);
++ u64 i = 0;
+
+ do {
+ rtnl_lock();
+@@ -2073,7 +2074,8 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+ return n_stats;
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+- WARN_ON_ONCE(!n_stats);
++ if (WARN_ON_ONCE(!n_stats))
++ return -EOPNOTSUPP;
+
+ if (copy_from_user(&stats, useraddr, sizeof(stats)))
+ return -EFAULT;
+diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
+index f9eda596f3014..3d05b9bf34854 100644
+--- a/net/ethtool/linkmodes.c
++++ b/net/ethtool/linkmodes.c
+@@ -277,11 +277,12 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
+ "lanes configuration not supported by device");
+ return -EOPNOTSUPP;
+ }
+- } else if (!lsettings->autoneg) {
+- /* If autoneg is off and lanes parameter is not passed from user,
+- * set the lanes parameter to 0.
++ } else if (!lsettings->autoneg && ksettings->lanes) {
++ /* If autoneg is off and lanes parameter is not passed from user but
++ * it was defined previously then set the lanes parameter to 0.
+ */
+ ksettings->lanes = 0;
++ *mod = true;
+ }
+
+ ret = ethnl_update_bitset(ksettings->link_modes.advertising,
+diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
+index 1797a0a900195..b3729bdafb602 100644
+--- a/net/ethtool/netlink.c
++++ b/net/ethtool/netlink.c
+@@ -40,7 +40,8 @@ int ethnl_ops_begin(struct net_device *dev)
+ if (dev->dev.parent)
+ pm_runtime_get_sync(dev->dev.parent);
+
+- if (!netif_device_present(dev)) {
++ if (!netif_device_present(dev) ||
++ dev->reg_state == NETREG_UNREGISTERING) {
+ ret = -ENODEV;
+ goto err;
+ }
+diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
+index 9009f412151e7..ee1e5806bc93a 100644
+--- a/net/ethtool/pause.c
++++ b/net/ethtool/pause.c
+@@ -56,8 +56,7 @@ static int pause_reply_size(const struct ethnl_req_info *req_base,
+
+ if (req_base->flags & ETHTOOL_FLAG_STATS)
+ n += nla_total_size(0) + /* _PAUSE_STATS */
+- nla_total_size_64bit(sizeof(u64)) *
+- (ETHTOOL_A_PAUSE_STAT_MAX - 2);
++ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT;
+ return n;
+ }
+
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index 26c32407f0290..7ce40b49c9560 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -30,13 +30,13 @@ static bool is_slave_up(struct net_device *dev)
+
+ static void __hsr_set_operstate(struct net_device *dev, int transition)
+ {
+- write_lock_bh(&dev_base_lock);
++ write_lock(&dev_base_lock);
+ if (dev->operstate != transition) {
+ dev->operstate = transition;
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+ netdev_state_change(dev);
+ } else {
+- write_unlock_bh(&dev_base_lock);
++ write_unlock(&dev_base_lock);
+ }
+ }
+
+@@ -219,7 +219,9 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+ skb->dev = master->dev;
+ skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
++ spin_lock_bh(&hsr->seqnr_lock);
+ hsr_forward_skb(skb, master);
++ spin_unlock_bh(&hsr->seqnr_lock);
+ } else {
+ atomic_long_inc(&dev->tx_dropped);
+ dev_kfree_skb_any(skb);
+@@ -278,7 +280,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
+ __u8 type = HSR_TLV_LIFE_CHECK;
+ struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tag *hsr_stag;
+- unsigned long irqflags;
+ struct sk_buff *skb;
+
+ *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+@@ -299,7 +300,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
+ set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version);
+
+ /* From HSRv1 on we have separate supervision sequence numbers. */
+- spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
++ spin_lock_bh(&hsr->seqnr_lock);
+ if (hsr->prot_version > 0) {
+ hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+ hsr->sup_sequence_nr++;
+@@ -307,7 +308,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
+ hsr_stag->sequence_nr = htons(hsr->sequence_nr);
+ hsr->sequence_nr++;
+ }
+- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
+
+ hsr_stag->HSR_TLV_type = type;
+ /* TODO: Why 12 in HSRv0? */
+@@ -318,11 +318,13 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
+ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
+ ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
+
+- if (skb_put_padto(skb, ETH_ZLEN))
++ if (skb_put_padto(skb, ETH_ZLEN)) {
++ spin_unlock_bh(&hsr->seqnr_lock);
+ return;
++ }
+
+ hsr_forward_skb(skb, master);
+-
++ spin_unlock_bh(&hsr->seqnr_lock);
+ return;
+ }
+
+@@ -332,7 +334,6 @@ static void send_prp_supervision_frame(struct hsr_port *master,
+ struct hsr_priv *hsr = master->hsr;
+ struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tag *hsr_stag;
+- unsigned long irqflags;
+ struct sk_buff *skb;
+
+ skb = hsr_init_skb(master);
+@@ -347,7 +348,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
+ set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0));
+
+ /* From HSRv1 on we have separate supervision sequence numbers. */
+- spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
++ spin_lock_bh(&hsr->seqnr_lock);
+ hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+ hsr->sup_sequence_nr++;
+ hsr_stag->HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
+@@ -358,13 +359,12 @@ static void send_prp_supervision_frame(struct hsr_port *master,
+ ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
+
+ if (skb_put_padto(skb, ETH_ZLEN)) {
+- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
++ spin_unlock_bh(&hsr->seqnr_lock);
+ return;
+ }
+
+- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
+-
+ hsr_forward_skb(skb, master);
++ spin_unlock_bh(&hsr->seqnr_lock);
+ }
+
+ /* Announce (supervision frame) timer function
+@@ -444,7 +444,7 @@ void hsr_dev_setup(struct net_device *dev)
+ dev->header_ops = &hsr_header_ops;
+ dev->netdev_ops = &hsr_device_ops;
+ SET_NETDEV_DEVTYPE(dev, &hsr_type);
+- dev->priv_flags |= IFF_NO_QUEUE;
++ dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
+
+ dev->needs_free_netdev = true;
+
+@@ -493,7 +493,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+ INIT_LIST_HEAD(&hsr->self_node_db);
+ spin_lock_init(&hsr->list_lock);
+
+- ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
++ eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
+
+ /* initialize protocol specific functions */
+ if (protocol_version == PRP_V1) {
+diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
+index ceb8afb2a62f4..12ba43023d30e 100644
+--- a/net/hsr/hsr_forward.c
++++ b/net/hsr/hsr_forward.c
+@@ -108,15 +108,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port)
+ {
+ if (!frame->skb_std) {
+- if (frame->skb_hsr) {
++ if (frame->skb_hsr)
+ frame->skb_std =
+ create_stripped_skb_hsr(frame->skb_hsr, frame);
+- } else {
+- /* Unexpected */
+- WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
+- __FILE__, __LINE__, port->dev->name);
++ else
++ netdev_warn_once(port->dev,
++ "Unexpected frame received in hsr_get_untagged_frame()\n");
++
++ if (!frame->skb_std)
+ return NULL;
+- }
+ }
+
+ return skb_clone(frame->skb_std, GFP_ATOMIC);
+@@ -309,17 +309,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
+ struct hsr_node *node_src)
+ {
+ bool was_multicast_frame;
+- int res;
++ int res, recv_len;
+
+ was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST);
+ hsr_addr_subst_source(node_src, skb);
+ skb_pull(skb, ETH_HLEN);
++ recv_len = skb->len;
+ res = netif_rx(skb);
+ if (res == NET_RX_DROP) {
+ dev->stats.rx_dropped++;
+ } else {
+ dev->stats.rx_packets++;
+- dev->stats.rx_bytes += skb->len;
++ dev->stats.rx_bytes += recv_len;
+ if (was_multicast_frame)
+ dev->stats.multicast++;
+ }
+@@ -457,7 +458,6 @@ static void handle_std_frame(struct sk_buff *skb,
+ {
+ struct hsr_port *port = frame->port_rcv;
+ struct hsr_priv *hsr = port->hsr;
+- unsigned long irqflags;
+
+ frame->skb_hsr = NULL;
+ frame->skb_prp = NULL;
+@@ -467,10 +467,9 @@ static void handle_std_frame(struct sk_buff *skb,
+ frame->is_from_san = true;
+ } else {
+ /* Sequence nr for the master node */
+- spin_lock_irqsave(&hsr->seqnr_lock, irqflags);
++ lockdep_assert_held(&hsr->seqnr_lock);
+ frame->sequence_nr = hsr->sequence_nr;
+ hsr->sequence_nr++;
+- spin_unlock_irqrestore(&hsr->seqnr_lock, irqflags);
+ }
+ }
+
+@@ -553,6 +552,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
+ proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
+ /* FIXME: */
+ netdev_warn_once(skb->dev, "VLAN not yet supported");
++ return -EINVAL;
+ }
+
+ frame->is_from_san = false;
+@@ -571,11 +571,13 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
+ {
+ struct hsr_frame_info frame;
+
++ rcu_read_lock();
+ if (fill_frame_info(&frame, skb, port) < 0)
+ goto out_drop;
+
+ hsr_register_frame_in(frame.node_src, port, frame.sequence_nr);
+ hsr_forward_do(&frame);
++ rcu_read_unlock();
+ /* Gets called for ingress frames as well as egress from master port.
+ * So check and increment stats for master port only here.
+ */
+@@ -590,6 +592,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
+ return;
+
+ out_drop:
++ rcu_read_unlock();
+ port->dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ }
+diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
+index e31949479305e..8c9a20e99f0f4 100644
+--- a/net/hsr/hsr_framereg.c
++++ b/net/hsr/hsr_framereg.c
+@@ -159,6 +159,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
+ return NULL;
+
+ ether_addr_copy(new_node->macaddress_A, addr);
++ spin_lock_init(&new_node->seq_out_lock);
+
+ /* We are only interested in time diffs here, so use current jiffies
+ * as initialization. (0 could trigger an spurious ring error warning).
+@@ -313,6 +314,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
+ goto done;
+
+ ether_addr_copy(node_real->macaddress_B, ethhdr->h_source);
++ spin_lock_bh(&node_real->seq_out_lock);
+ for (i = 0; i < HSR_PT_PORTS; i++) {
+ if (!node_curr->time_in_stale[i] &&
+ time_after(node_curr->time_in[i], node_real->time_in[i])) {
+@@ -323,12 +325,16 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
+ if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i]))
+ node_real->seq_out[i] = node_curr->seq_out[i];
+ }
++ spin_unlock_bh(&node_real->seq_out_lock);
+ node_real->addr_B_port = port_rcv->type;
+
+ spin_lock_bh(&hsr->list_lock);
+- list_del_rcu(&node_curr->mac_list);
++ if (!node_curr->removed) {
++ list_del_rcu(&node_curr->mac_list);
++ node_curr->removed = true;
++ kfree_rcu(node_curr, rcu_head);
++ }
+ spin_unlock_bh(&hsr->list_lock);
+- kfree_rcu(node_curr, rcu_head);
+
+ done:
+ /* PRP uses v0 header */
+@@ -379,7 +385,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
+ node_dst = find_node_by_addr_A(&port->hsr->node_db,
+ eth_hdr(skb)->h_dest);
+ if (!node_dst) {
+- if (net_ratelimit())
++ if (port->hsr->prot_version != PRP_V1 && net_ratelimit())
+ netdev_err(skb->dev, "%s: Unknown node\n", __func__);
+ return;
+ }
+@@ -416,13 +422,17 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
+ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
+ u16 sequence_nr)
+ {
++ spin_lock_bh(&node->seq_out_lock);
+ if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) &&
+ time_is_after_jiffies(node->time_out[port->type] +
+- msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)))
++ msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) {
++ spin_unlock_bh(&node->seq_out_lock);
+ return 1;
++ }
+
+ node->time_out[port->type] = jiffies;
+ node->seq_out[port->type] = sequence_nr;
++ spin_unlock_bh(&node->seq_out_lock);
+ return 0;
+ }
+
+@@ -502,9 +512,12 @@ void hsr_prune_nodes(struct timer_list *t)
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+- list_del_rcu(&node->mac_list);
+- /* Note that we need to free this entry later: */
+- kfree_rcu(node, rcu_head);
++ if (!node->removed) {
++ list_del_rcu(&node->mac_list);
++ node->removed = true;
++ /* Note that we need to free this entry later: */
++ kfree_rcu(node, rcu_head);
++ }
+ }
+ }
+ spin_unlock_bh(&hsr->list_lock);
+diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
+index d9628e7a5f051..48990166e4c4e 100644
+--- a/net/hsr/hsr_framereg.h
++++ b/net/hsr/hsr_framereg.h
+@@ -69,6 +69,8 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup);
+
+ struct hsr_node {
+ struct list_head mac_list;
++ /* Protect R/W access to seq_out */
++ spinlock_t seq_out_lock;
+ unsigned char macaddress_A[ETH_ALEN];
+ unsigned char macaddress_B[ETH_ALEN];
+ /* Local slave through which AddrB frames are received from this node */
+@@ -80,6 +82,7 @@ struct hsr_node {
+ bool san_a;
+ bool san_b;
+ u16 seq_out[HSR_PT_PORTS];
++ bool removed;
+ struct rcu_head rcu_head;
+ };
+
+diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
+index f7e284f23b1f3..b099c31501509 100644
+--- a/net/hsr/hsr_main.c
++++ b/net/hsr/hsr_main.c
+@@ -75,7 +75,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+
+ if (port->type == HSR_PT_SLAVE_A) {
+- ether_addr_copy(master->dev->dev_addr, dev->dev_addr);
++ eth_hw_addr_set(master->dev, dev->dev_addr);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR,
+ master->dev);
+ }
+diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
+index 277124f206e06..e0b072aecf0f3 100644
+--- a/net/ieee802154/nl802154.c
++++ b/net/ieee802154/nl802154.c
+@@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+- return -1;
++ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+@@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+- return -1;
++ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+@@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+- return -1;
++ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+@@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+- return -1;
++ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
+index 7bb9ef35c5707..c33f46c9b6b34 100644
+--- a/net/ieee802154/socket.c
++++ b/net/ieee802154/socket.c
+@@ -200,8 +200,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
+ int err = 0;
+ struct net_device *dev = NULL;
+
+- if (len < sizeof(*uaddr))
+- return -EINVAL;
++ err = ieee802154_sockaddr_check_size(uaddr, len);
++ if (err < 0)
++ return err;
+
+ uaddr = (struct sockaddr_ieee802154 *)_uaddr;
+ if (uaddr->family != AF_IEEE802154)
+@@ -271,6 +272,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ err = -EMSGSIZE;
+ goto out_dev;
+ }
++ if (!size) {
++ err = 0;
++ goto out_dev;
++ }
+
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+@@ -493,11 +498,14 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
+
+ ro->bound = 0;
+
+- if (len < sizeof(*addr))
++ err = ieee802154_sockaddr_check_size(addr, len);
++ if (err < 0)
+ goto out;
+
+- if (addr->family != AF_IEEE802154)
++ if (addr->family != AF_IEEE802154) {
++ err = -EINVAL;
+ goto out;
++ }
+
+ ieee802154_addr_from_sa(&haddr, &addr->addr);
+ dev = ieee802154_get_dev(sock_net(sk), &haddr);
+@@ -564,8 +572,9 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
+ struct dgram_sock *ro = dgram_sk(sk);
+ int err = 0;
+
+- if (len < sizeof(*addr))
+- return -EINVAL;
++ err = ieee802154_sockaddr_check_size(addr, len);
++ if (err < 0)
++ return err;
+
+ if (addr->family != AF_IEEE802154)
+ return -EINVAL;
+@@ -604,6 +613,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ struct ieee802154_mac_cb *cb;
+ struct dgram_sock *ro = dgram_sk(sk);
+ struct ieee802154_addr dst_addr;
++ DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name);
+ int hlen, tlen;
+ int err;
+
+@@ -612,10 +622,20 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ return -EOPNOTSUPP;
+ }
+
+- if (!ro->connected && !msg->msg_name)
+- return -EDESTADDRREQ;
+- else if (ro->connected && msg->msg_name)
+- return -EISCONN;
++ if (msg->msg_name) {
++ if (ro->connected)
++ return -EISCONN;
++ if (msg->msg_namelen < IEEE802154_MIN_NAMELEN)
++ return -EINVAL;
++ err = ieee802154_sockaddr_check_size(daddr, msg->msg_namelen);
++ if (err < 0)
++ return err;
++ ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
++ } else {
++ if (!ro->connected)
++ return -EDESTADDRREQ;
++ dst_addr = ro->dst_addr;
++ }
+
+ if (!ro->bound)
+ dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
+@@ -651,16 +671,6 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ cb = mac_cb_init(skb);
+ cb->type = IEEE802154_FC_TYPE_DATA;
+ cb->ackreq = ro->want_ack;
+-
+- if (msg->msg_name) {
+- DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
+- daddr, msg->msg_name);
+-
+- ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+- } else {
+- dst_addr = ro->dst_addr;
+- }
+-
+ cb->secen = ro->secen;
+ cb->secen_override = ro->secen_override;
+ cb->seclevel = ro->seclevel;
+diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
+index 87983e70f03f3..23b06063e1a51 100644
+--- a/net/ipv4/Kconfig
++++ b/net/ipv4/Kconfig
+@@ -403,6 +403,16 @@ config INET_IPCOMP
+
+ If unsure, say Y.
+
++config INET_TABLE_PERTURB_ORDER
++ int "INET: Source port perturbation table size (as power of 2)" if EXPERT
++ default 16
++ help
++ Source port perturbation table size (as power of 2) for
++ RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm.
++
++ The default is almost always what you want.
++ Only change this if you know what you are doing.
++
+ config INET_XFRM_TUNNEL
+ tristate
+ select INET_TUNNEL
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 1d816a5fd3eb9..e46b11507edc2 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -158,7 +158,7 @@ void inet_sock_destruct(struct sock *sk)
+
+ kfree(rcu_dereference_protected(inet->inet_opt, 1));
+ dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
+- dst_release(sk->sk_rx_dst);
++ dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
+ sk_refcnt_debug_dec(sk);
+ }
+ EXPORT_SYMBOL(inet_sock_destruct);
+@@ -220,7 +220,7 @@ int inet_listen(struct socket *sock, int backlog)
+ * because the socket was in TCP_LISTEN state previously but
+ * was shutdown() rather than close().
+ */
+- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
++ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
+ if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+ (tcp_fastopen & TFO_SERVER_ENABLE) &&
+ !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
+@@ -338,7 +338,7 @@ lookup_protocol:
+ inet->hdrincl = 1;
+ }
+
+- if (net->ipv4.sysctl_ip_no_pmtu_disc)
++ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
+ inet->pmtudisc = IP_PMTUDISC_DONT;
+ else
+ inet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -587,6 +587,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk->sk_write_pending += writebias;
++ sk->sk_wait_pending++;
+
+ /* Basic assumption: if someone sets sk->sk_err, he _must_
+ * change state of the socket from TCP_SYN_*.
+@@ -602,6 +603,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+ sk->sk_write_pending -= writebias;
++ sk->sk_wait_pending--;
+ return timeo;
+ }
+
+@@ -773,26 +775,28 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
+ DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
+
+ sin->sin_family = AF_INET;
++ lock_sock(sk);
+ if (peer) {
+ if (!inet->inet_dport ||
+ (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+- peer == 1))
++ peer == 1)) {
++ release_sock(sk);
+ return -ENOTCONN;
++ }
+ sin->sin_port = inet->inet_dport;
+ sin->sin_addr.s_addr = inet->inet_daddr;
+- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+- CGROUP_INET4_GETPEERNAME,
+- NULL);
++ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
++ CGROUP_INET4_GETPEERNAME);
+ } else {
+ __be32 addr = inet->inet_rcv_saddr;
+ if (!addr)
+ addr = inet->inet_saddr;
+ sin->sin_port = inet->inet_sport;
+ sin->sin_addr.s_addr = addr;
+- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+- CGROUP_INET4_GETSOCKNAME,
+- NULL);
++ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
++ CGROUP_INET4_GETSOCKNAME);
+ }
++ release_sock(sk);
+ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ return sizeof(*sin);
+ }
+@@ -888,7 +892,7 @@ int inet_shutdown(struct socket *sock, int how)
+ EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
+ fallthrough;
+ default:
+- sk->sk_shutdown |= how;
++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how);
+ if (sk->sk_prot->shutdown)
+ sk->sk_prot->shutdown(sk, how);
+ break;
+@@ -1249,7 +1253,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
+ if (new_saddr == old_saddr)
+ return 0;
+
+- if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
+ pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
+ __func__, &old_saddr, &new_saddr);
+ }
+@@ -1304,7 +1308,7 @@ int inet_sk_rebuild_header(struct sock *sk)
+ * Other protocols have to map its equivalent state to TCP_SYN_SENT.
+ * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
+ */
+- if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
+ sk->sk_state != TCP_SYN_SENT ||
+ (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
+ (err = inet_sk_reselect_saddr(sk)) != 0)
+@@ -1378,8 +1382,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+ }
+
+ ops = rcu_dereference(inet_offloads[proto]);
+- if (likely(ops && ops->callbacks.gso_segment))
++ if (likely(ops && ops->callbacks.gso_segment)) {
+ segs = ops->callbacks.gso_segment(skb, features);
++ if (!segs)
++ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
++ }
+
+ if (IS_ERR_OR_NULL(segs))
+ goto out;
+@@ -1723,24 +1730,14 @@ static const struct net_protocol igmp_protocol = {
+ };
+ #endif
+
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct net_protocol tcp_protocol = {
+- .early_demux = tcp_v4_early_demux,
+- .early_demux_handler = tcp_v4_early_demux,
++static const struct net_protocol tcp_protocol = {
+ .handler = tcp_v4_rcv,
+ .err_handler = tcp_v4_err,
+ .no_policy = 1,
+ .icmp_strict_tag_validation = 1,
+ };
+
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct net_protocol udp_protocol = {
+- .early_demux = udp_v4_early_demux,
+- .early_demux_handler = udp_v4_early_demux,
++static const struct net_protocol udp_protocol = {
+ .handler = udp_rcv,
+ .err_handler = udp_err,
+ .no_policy = 1,
+@@ -2002,6 +1999,10 @@ static int __init inet_init(void)
+
+ ip_init();
+
++ /* Initialise per-cpu ipv4 mibs */
++ if (init_ipv4_mibs())
++ panic("%s: Cannot init ipv4 mibs\n", __func__);
++
+ /* Setup TCP slab cache for open requests. */
+ tcp_init();
+
+@@ -2032,12 +2033,6 @@ static int __init inet_init(void)
+
+ if (init_inet_pernet_ops())
+ pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
+- /*
+- * Initialise per-cpu ipv4 mibs
+- */
+-
+- if (init_ipv4_mibs())
+- pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
+
+ ipv4_proc_init();
+
+diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
+index 922dd73e57406..83a47998c4b18 100644
+--- a/net/ipv4/arp.c
++++ b/net/ipv4/arp.c
+@@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
+ return err;
+ }
+
+-static int arp_invalidate(struct net_device *dev, __be32 ip)
++int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
+ {
+ struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
+ int err = -ENXIO;
+ struct neigh_table *tbl = &arp_tbl;
+
+ if (neigh) {
++ if ((neigh->nud_state & NUD_VALID) && !force) {
++ neigh_release(neigh);
++ return 0;
++ }
++
+ if (neigh->nud_state & ~NUD_NOARP)
+ err = neigh_update(neigh, NULL, NUD_FAILED,
+ NEIGH_UPDATE_F_OVERRIDE|
+@@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
+ if (!dev)
+ return -EINVAL;
+ }
+- return arp_invalidate(dev, ip);
++ return arp_invalidate(dev, ip, true);
+ }
+
+ /*
+diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
+index 0dcee9df13268..d3a2dbd13ea6b 100644
+--- a/net/ipv4/bpf_tcp_ca.c
++++ b/net/ipv4/bpf_tcp_ca.c
+@@ -212,6 +212,8 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
+ offsetof(struct tcp_congestion_ops, release))
+ return &bpf_sk_getsockopt_proto;
+ return NULL;
++ case BPF_FUNC_ktime_get_coarse_ns:
++ return &bpf_ktime_get_coarse_ns_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
+index 099259fc826aa..75ac145253445 100644
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
+ struct cipso_v4_map_cache_entry *prev_entry = NULL;
+ u32 hash;
+
+- if (!cipso_v4_cache_enabled)
++ if (!READ_ONCE(cipso_v4_cache_enabled))
+ return -ENOENT;
+
+ hash = cipso_v4_map_cache_hash(key, key_len);
+@@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key,
+ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
+ const struct netlbl_lsm_secattr *secattr)
+ {
++ int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
+ int ret_val = -EPERM;
+ u32 bkt;
+ struct cipso_v4_map_cache_entry *entry = NULL;
+ struct cipso_v4_map_cache_entry *old_entry = NULL;
+ u32 cipso_ptr_len;
+
+- if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
++ if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
+ return 0;
+
+ cipso_ptr_len = cipso_ptr[1];
+@@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
+
+ bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
+ spin_lock_bh(&cipso_v4_cache[bkt].lock);
+- if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
++ if (cipso_v4_cache[bkt].size < bkt_size) {
+ list_add(&entry->list, &cipso_v4_cache[bkt].list);
+ cipso_v4_cache[bkt].size += 1;
+ } else {
+@@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
+ /* This will send packets using the "optimized" format when
+ * possible as specified in section 3.4.2.6 of the
+ * CIPSO draft. */
+- if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
++ if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
++ ret_val <= 10)
+ tag_len = 14;
+ else
+ tag_len = 4 + ret_val;
+@@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
+ * all the CIPSO validations here but it doesn't
+ * really specify _exactly_ what we need to validate
+ * ... so, just make it a sysctl tunable. */
+- if (cipso_v4_rbm_strictvalid) {
++ if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
+ if (cipso_v4_map_lvl_valid(doi_def,
+ tag[3]) < 0) {
+ err_offset = opt_iter + 3;
+diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
+index 4a8550c49202d..112c6e892d305 100644
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
+ }
+ inet->inet_daddr = fl4->daddr;
+ inet->inet_dport = usin->sin_port;
+- reuseport_has_conns(sk, true);
++ reuseport_has_conns_set(sk);
+ sk->sk_state = TCP_ESTABLISHED;
+ sk_set_txhash(sk);
+ inet->inet_id = prandom_u32();
+diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+index f4468980b6757..c511751c2f41a 100644
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -351,14 +351,14 @@ static void __inet_del_ifa(struct in_device *in_dev,
+ {
+ struct in_ifaddr *promote = NULL;
+ struct in_ifaddr *ifa, *ifa1;
+- struct in_ifaddr *last_prim;
++ struct in_ifaddr __rcu **last_prim;
+ struct in_ifaddr *prev_prom = NULL;
+ int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
+
+ ASSERT_RTNL();
+
+ ifa1 = rtnl_dereference(*ifap);
+- last_prim = rtnl_dereference(in_dev->ifa_list);
++ last_prim = ifap;
+ if (in_dev->dead)
+ goto no_promotions;
+
+@@ -372,7 +372,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
+ while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
+ if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
+ ifa1->ifa_scope <= ifa->ifa_scope)
+- last_prim = ifa;
++ last_prim = &ifa->ifa_next;
+
+ if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
+ ifa1->ifa_mask != ifa->ifa_mask ||
+@@ -436,9 +436,9 @@ no_promotions:
+
+ rcu_assign_pointer(prev_prom->ifa_next, next_sec);
+
+- last_sec = rtnl_dereference(last_prim->ifa_next);
++ last_sec = rtnl_dereference(*last_prim);
+ rcu_assign_pointer(promote->ifa_next, last_sec);
+- rcu_assign_pointer(last_prim->ifa_next, promote);
++ rcu_assign_pointer(*last_prim, promote);
+ }
+
+ promote->ifa_flags &= ~IFA_F_SECONDARY;
+@@ -2587,7 +2587,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
+ free:
+ kfree(t);
+ out:
+- return -ENOBUFS;
++ return -ENOMEM;
+ }
+
+ static void __devinet_sysctl_unregister(struct net *net,
+@@ -2673,23 +2673,27 @@ static __net_init int devinet_init_net(struct net *net)
+ #endif
+
+ if (!net_eq(net, &init_net)) {
+- if (IS_ENABLED(CONFIG_SYSCTL) &&
+- sysctl_devconf_inherit_init_net == 3) {
++ switch (net_inherit_devconf()) {
++ case 3:
+ /* copy from the current netns */
+ memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
+ sizeof(ipv4_devconf));
+ memcpy(dflt,
+ current->nsproxy->net_ns->ipv4.devconf_dflt,
+ sizeof(ipv4_devconf_dflt));
+- } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
+- sysctl_devconf_inherit_init_net != 2) {
+- /* inherit == 0 or 1: copy from init_net */
++ break;
++ case 0:
++ case 1:
++ /* copy from init_net */
+ memcpy(all, init_net.ipv4.devconf_all,
+ sizeof(ipv4_devconf));
+ memcpy(dflt, init_net.ipv4.devconf_dflt,
+ sizeof(ipv4_devconf_dflt));
++ break;
++ case 2:
++ /* use compiled values */
++ break;
+ }
+- /* else inherit == 2: use compiled values */
+ }
+
+ #ifdef CONFIG_SYSCTL
+diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
+index 851f542928a33..386e9875e5b80 100644
+--- a/net/ipv4/esp4.c
++++ b/net/ipv4/esp4.c
+@@ -455,6 +455,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
+ return err;
+ }
+
++ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
++ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
++ goto cow;
++
+ if (!skb_cloned(skb)) {
+ if (tailen <= skb_tailroom(skb)) {
+ nfrags = 1;
+@@ -671,7 +675,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
++ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ esp.tfclen = padto - skb->len;
+ }
+@@ -1129,7 +1133,7 @@ static int esp_init_authenc(struct xfrm_state *x)
+ err = crypto_aead_setkey(aead, key, keylen);
+
+ free_key:
+- kfree(key);
++ kfree_sensitive(key);
+
+ error:
+ return err;
+diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
+index 8e4e9aa12130d..ab6fe94b8fd90 100644
+--- a/net/ipv4/esp4_offload.c
++++ b/net/ipv4/esp4_offload.c
+@@ -159,6 +159,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x,
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ }
+
++ if (proto == IPPROTO_IPV6)
++ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
++
+ __skb_pull(skb, skb_transport_offset(skb));
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+@@ -308,6 +311,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
+ xo->seq.low += skb_shinfo(skb)->gso_segs;
+ }
+
++ if (xo->seq.low < seq)
++ xo->seq.hi++;
++
+ esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
+
+ ip_hdr(skb)->tot_len = htons(skb->len);
+@@ -331,6 +337,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
+
+ secpath_reset(skb);
+
++ if (skb_needs_linearize(skb, skb->dev->features) &&
++ __skb_linearize(skb))
++ return -ENOMEM;
+ return 0;
+ }
+
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+index 9fe13e4f5d08a..c21d57f02c651 100644
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -389,7 +389,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+ dev_match = dev_match || (res.type == RTN_LOCAL &&
+ dev == net->loopback_dev);
+ if (dev_match) {
+- ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
++ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
+ return ret;
+ }
+ if (no_addr)
+@@ -401,7 +401,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+ ret = 0;
+ if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
+ if (res.type == RTN_UNICAST)
+- ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
++ ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_LINK;
+ }
+ return ret;
+
+@@ -573,6 +573,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
+ cfg->fc_scope = RT_SCOPE_UNIVERSE;
+ }
+
++ if (!cfg->fc_table)
++ cfg->fc_table = RT_TABLE_MAIN;
++
+ if (cmd == SIOCDELRT)
+ return 0;
+
+@@ -830,6 +833,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
+ return -EINVAL;
+ }
+
++ if (!cfg->fc_table)
++ cfg->fc_table = RT_TABLE_MAIN;
++
+ return 0;
+ errout:
+ return err;
+@@ -1112,9 +1118,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
+ return;
+
+ /* Add broadcast address, if it is explicitly assigned. */
+- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
++ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
++ arp_invalidate(dev, ifa->ifa_broadcast, false);
++ }
+
+ if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
+ (prefix != addr || ifa->ifa_prefixlen < 32)) {
+@@ -1128,6 +1136,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
+ if (ifa->ifa_prefixlen < 31) {
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
+ 32, prim, 0);
++ arp_invalidate(dev, prefix | ~mask, false);
+ }
+ }
+ }
+@@ -1582,7 +1591,7 @@ static int __net_init fib_net_init(struct net *net)
+ int error;
+
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+- net->ipv4.fib_num_tclassid_users = 0;
++ atomic_set(&net->ipv4.fib_num_tclassid_users, 0);
+ #endif
+ error = ip_fib_net_init(net);
+ if (error < 0)
+diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
+index e184bcb199434..78e40ea42e58d 100644
+--- a/net/ipv4/fib_lookup.h
++++ b/net/ipv4/fib_lookup.h
+@@ -16,10 +16,9 @@ struct fib_alias {
+ u8 fa_slen;
+ u32 tb_id;
+ s16 fa_default;
+- u8 offload:1,
+- trap:1,
+- offload_failed:1,
+- unused:5;
++ u8 offload;
++ u8 trap;
++ u8 offload_failed;
+ struct rcu_head rcu;
+ };
+
+diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
+index ce54a30c2ef1e..d279cb8ac1584 100644
+--- a/net/ipv4/fib_rules.c
++++ b/net/ipv4/fib_rules.c
+@@ -141,6 +141,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule,
+ }
+
+ INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
++ int flags,
+ struct fib_lookup_arg *arg)
+ {
+ struct fib_result *result = (struct fib_result *) arg->result;
+@@ -263,7 +264,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+ if (tb[FRA_FLOW]) {
+ rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
+ if (rule4->tclassid)
+- net->ipv4.fib_num_tclassid_users++;
++ atomic_inc(&net->ipv4.fib_num_tclassid_users);
+ }
+ #endif
+
+@@ -295,7 +296,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
+
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+ if (((struct fib4_rule *)rule)->tclassid)
+- net->ipv4.fib_num_tclassid_users--;
++ atomic_dec(&net->ipv4.fib_num_tclassid_users);
+ #endif
+ net->ipv4.fib_has_custom_rules = true;
+
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index 3364cb9c67e01..799370bcc70c1 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -29,6 +29,8 @@
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/netlink.h>
++#include <linux/hash.h>
++#include <linux/nospec.h>
+
+ #include <net/arp.h>
+ #include <net/ip.h>
+@@ -220,7 +222,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
+ {
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+ if (fib_nh->nh_tclassid)
+- net->ipv4.fib_num_tclassid_users--;
++ atomic_dec(&net->ipv4.fib_num_tclassid_users);
+ #endif
+ fib_nh_common_release(&fib_nh->nh_common);
+ }
+@@ -249,7 +251,6 @@ void free_fib_info(struct fib_info *fi)
+ pr_warn("Freeing alive fib_info %p\n", fi);
+ return;
+ }
+- fib_info_cnt--;
+
+ call_rcu(&fi->rcu, free_fib_info_rcu);
+ }
+@@ -260,6 +261,10 @@ void fib_release_info(struct fib_info *fi)
+ spin_lock_bh(&fib_info_lock);
+ if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
+ hlist_del(&fi->fib_hash);
++
++ /* Paired with READ_ONCE() in fib_create_info(). */
++ WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
++
+ if (fi->fib_prefsrc)
+ hlist_del(&fi->fib_lhash);
+ if (fi->nh) {
+@@ -271,7 +276,8 @@ void fib_release_info(struct fib_info *fi)
+ hlist_del(&nexthop_nh->nh_hash);
+ } endfor_nexthops(fi)
+ }
+- fi->fib_dead = 1;
++ /* Paired with READ_ONCE() from fib_table_lookup() */
++ WRITE_ONCE(fi->fib_dead, 1);
+ fib_info_put(fi);
+ }
+ spin_unlock_bh(&fib_info_lock);
+@@ -316,11 +322,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
+
+ static inline unsigned int fib_devindex_hashfn(unsigned int val)
+ {
+- unsigned int mask = DEVINDEX_HASHSIZE - 1;
++ return hash_32(val, DEVINDEX_HASHBITS);
++}
++
++static struct hlist_head *
++fib_info_devhash_bucket(const struct net_device *dev)
++{
++ u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
+
+- return (val ^
+- (val >> DEVINDEX_HASHBITS) ^
+- (val >> (DEVINDEX_HASHBITS * 2))) & mask;
++ return &fib_info_devhash[fib_devindex_hashfn(val)];
+ }
+
+ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
+@@ -413,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
+ nfi->fib_prefsrc == fi->fib_prefsrc &&
+ nfi->fib_priority == fi->fib_priority &&
+ nfi->fib_type == fi->fib_type &&
++ nfi->fib_tb_id == fi->fib_tb_id &&
+ memcmp(nfi->fib_metrics, fi->fib_metrics,
+ sizeof(u32) * RTAX_MAX) == 0 &&
+ !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
+@@ -430,12 +441,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
+ {
+ struct hlist_head *head;
+ struct fib_nh *nh;
+- unsigned int hash;
+
+ spin_lock(&fib_info_lock);
+
+- hash = fib_devindex_hashfn(dev->ifindex);
+- head = &fib_info_devhash[hash];
++ head = fib_info_devhash_bucket(dev);
++
+ hlist_for_each_entry(nh, head, nh_hash) {
+ if (nh->fib_nh_dev == dev &&
+ nh->fib_nh_gw4 == gw &&
+@@ -517,9 +527,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
+ fri.dst_len = dst_len;
+ fri.tos = fa->fa_tos;
+ fri.type = fa->fa_type;
+- fri.offload = fa->offload;
+- fri.trap = fa->trap;
+- fri.offload_failed = fa->offload_failed;
++ fri.offload = READ_ONCE(fa->offload);
++ fri.trap = READ_ONCE(fa->trap);
++ fri.offload_failed = READ_ONCE(fa->offload_failed);
+ err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
+@@ -632,7 +642,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+ nh->nh_tclassid = cfg->fc_flow;
+ if (nh->nh_tclassid)
+- net->ipv4.fib_num_tclassid_users++;
++ atomic_inc(&net->ipv4.fib_num_tclassid_users);
+ #endif
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+ nh->fib_nh_weight = nh_weight;
+@@ -662,6 +672,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
+ return nhs;
+ }
+
++static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla,
++ struct netlink_ext_ack *extack)
++{
++ if (nla_len(nla) < sizeof(*gw)) {
++ NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY");
++ return -EINVAL;
++ }
++
++ *gw = nla_get_in_addr(nla);
++
++ return 0;
++}
++
+ /* only called when fib_nh is integrated into fib_info */
+ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+ int remaining, struct fib_config *cfg,
+@@ -704,7 +727,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+ return -EINVAL;
+ }
+ if (nla) {
+- fib_cfg.fc_gw4 = nla_get_in_addr(nla);
++ ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla,
++ extack);
++ if (ret)
++ goto errout;
++
+ if (fib_cfg.fc_gw4)
+ fib_cfg.fc_gw_family = AF_INET;
+ } else if (nlav) {
+@@ -714,10 +741,18 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+ }
+
+ nla = nla_find(attrs, attrlen, RTA_FLOW);
+- if (nla)
++ if (nla) {
++ if (nla_len(nla) < sizeof(u32)) {
++ NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW");
++ return -EINVAL;
++ }
+ fib_cfg.fc_flow = nla_get_u32(nla);
++ }
+
+ fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
++ /* RTA_ENCAP_TYPE length checked in
++ * lwtunnel_valid_encap_type_attr
++ */
+ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+ if (nla)
+ fib_cfg.fc_encap_type = nla_get_u16(nla);
+@@ -854,9 +889,16 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
+ return 1;
+ }
+
++ if (fi->nh) {
++ if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp)
++ return 1;
++ return 0;
++ }
++
+ if (cfg->fc_oif || cfg->fc_gw_family) {
+- struct fib_nh *nh = fib_info_nh(fi, 0);
++ struct fib_nh *nh;
+
++ nh = fib_info_nh(fi, 0);
+ if (cfg->fc_encap) {
+ if (fib_encap_match(net, cfg->fc_encap_type,
+ cfg->fc_encap, nh, cfg, extack))
+@@ -902,6 +944,7 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
++ int err;
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ nlav = nla_find(attrs, attrlen, RTA_VIA);
+@@ -912,12 +955,17 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
+ }
+
+ if (nla) {
++ __be32 gw;
++
++ err = fib_gw_from_attr(&gw, nla, extack);
++ if (err)
++ return err;
++
+ if (nh->fib_nh_gw_family != AF_INET ||
+- nla_get_in_addr(nla) != nh->fib_nh_gw4)
++ gw != nh->fib_nh_gw4)
+ return 1;
+ } else if (nlav) {
+ struct fib_config cfg2;
+- int err;
+
+ err = fib_gw_from_via(&cfg2, nlav, extack);
+ if (err)
+@@ -940,8 +988,14 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
+
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+ nla = nla_find(attrs, attrlen, RTA_FLOW);
+- if (nla && nla_get_u32(nla) != nh->nh_tclassid)
+- return 1;
++ if (nla) {
++ if (nla_len(nla) < sizeof(u32)) {
++ NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW");
++ return -EINVAL;
++ }
++ if (nla_get_u32(nla) != nh->nh_tclassid)
++ return 1;
++ }
+ #endif
+ }
+
+@@ -968,6 +1022,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
+ if (type > RTAX_MAX)
+ return false;
+
++ type = array_index_nospec(type, RTAX_MAX + 1);
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+ bool ecn_ca = false;
+@@ -1179,7 +1234,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
+
+ nh->fib_nh_dev = in_dev->dev;
+ dev_hold(nh->fib_nh_dev);
+- nh->fib_nh_scope = RT_SCOPE_HOST;
++ nh->fib_nh_scope = RT_SCOPE_LINK;
+ if (!netif_carrier_ok(nh->fib_nh_dev))
+ nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ err = 0;
+@@ -1393,7 +1448,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
+ #endif
+
+ err = -ENOBUFS;
+- if (fib_info_cnt >= fib_info_hash_size) {
++
++ /* Paired with WRITE_ONCE() in fib_release_info() */
++ if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
+ unsigned int new_size = fib_info_hash_size << 1;
+ struct hlist_head *new_info_hash;
+ struct hlist_head *new_laddrhash;
+@@ -1425,7 +1482,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
+ return ERR_PTR(err);
+ }
+
+- fib_info_cnt++;
+ fi->fib_net = net;
+ fi->fib_protocol = cfg->fc_protocol;
+ fi->fib_scope = cfg->fc_scope;
+@@ -1543,6 +1599,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
+ link_it:
+ ofi = fib_find_info(fi);
+ if (ofi) {
++ /* fib_table_lookup() should not see @fi yet. */
+ fi->fib_dead = 1;
+ free_fib_info(fi);
+ refcount_inc(&ofi->fib_treeref);
+@@ -1552,6 +1609,7 @@ link_it:
+ refcount_set(&fi->fib_treeref, 1);
+ refcount_set(&fi->fib_clntref, 1);
+ spin_lock_bh(&fib_info_lock);
++ fib_info_cnt++;
+ hlist_add_head(&fi->fib_hash,
+ &fib_info_hash[fib_info_hashfn(fi)]);
+ if (fi->fib_prefsrc) {
+@@ -1565,12 +1623,10 @@ link_it:
+ } else {
+ change_nexthops(fi) {
+ struct hlist_head *head;
+- unsigned int hash;
+
+ if (!nexthop_nh->fib_nh_dev)
+ continue;
+- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
+- head = &fib_info_devhash[hash];
++ head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
+ hlist_add_head(&nexthop_nh->nh_hash, head);
+ } endfor_nexthops(fi)
+ }
+@@ -1582,6 +1638,7 @@ err_inval:
+
+ failure:
+ if (fi) {
++ /* fib_table_lookup() should not see @fi yet. */
+ fi->fib_dead = 1;
+ free_fib_info(fi);
+ }
+@@ -1780,7 +1837,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
+ goto nla_put_failure;
+ if (nexthop_is_blackhole(fi->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
+- if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
++ if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))
+ goto offload;
+ }
+
+@@ -1922,8 +1979,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
+
+ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+ {
+- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+- struct hlist_head *head = &fib_info_devhash[hash];
++ struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct fib_nh *nh;
+
+ hlist_for_each_entry(nh, head, nh_hash) {
+@@ -1942,12 +1998,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+ */
+ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
+ {
+- int ret = 0;
+- int scope = RT_SCOPE_NOWHERE;
++ struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct fib_info *prev_fi = NULL;
+- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+- struct hlist_head *head = &fib_info_devhash[hash];
++ int scope = RT_SCOPE_NOWHERE;
+ struct fib_nh *nh;
++ int ret = 0;
+
+ if (force)
+ scope = -1;
+@@ -2092,7 +2147,6 @@ out:
+ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
+ {
+ struct fib_info *prev_fi;
+- unsigned int hash;
+ struct hlist_head *head;
+ struct fib_nh *nh;
+ int ret;
+@@ -2108,8 +2162,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
+ }
+
+ prev_fi = NULL;
+- hash = fib_devindex_hashfn(dev->ifindex);
+- head = &fib_info_devhash[hash];
++ head = fib_info_devhash_bucket(dev);
+ ret = 0;
+
+ hlist_for_each_entry(nh, head, nh_hash) {
+@@ -2188,7 +2241,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
+ }
+
+ change_nexthops(fi) {
+- if (net->ipv4.sysctl_fib_multipath_use_neigh) {
++ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
+ if (!fib_good_nh(nexthop_nh))
+ continue;
+ if (!first) {
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index 8060524f42566..22531aac0ccbf 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -497,7 +497,7 @@ static void tnode_free(struct key_vector *tn)
+ tn = container_of(head, struct tnode, rcu)->kv;
+ }
+
+- if (tnode_free_size >= sysctl_fib_sync_mem) {
++ if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
+ tnode_free_size = 0;
+ synchronize_rcu();
+ }
+@@ -1037,6 +1037,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
+
+ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+ {
++ u8 fib_notify_on_flag_change;
+ struct fib_alias *fa_match;
+ struct sk_buff *skb;
+ int err;
+@@ -1047,21 +1048,27 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+ if (!fa_match)
+ goto out;
+
+- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
+- fa_match->offload_failed == fri->offload_failed)
++ /* These are paired with the WRITE_ONCE() happening in this function.
++ * The reason is that we are only protected by RCU at this point.
++ */
++ if (READ_ONCE(fa_match->offload) == fri->offload &&
++ READ_ONCE(fa_match->trap) == fri->trap &&
++ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
+ goto out;
+
+- fa_match->offload = fri->offload;
+- fa_match->trap = fri->trap;
++ WRITE_ONCE(fa_match->offload, fri->offload);
++ WRITE_ONCE(fa_match->trap, fri->trap);
++
++ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
+
+ /* 2 means send notifications only if offload_failed was changed. */
+- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
+- fa_match->offload_failed == fri->offload_failed)
++ if (fib_notify_on_flag_change == 2 &&
++ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
+ goto out;
+
+- fa_match->offload_failed = fri->offload_failed;
++ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
+
+- if (!net->ipv4.sysctl_fib_notify_on_flag_change)
++ if (!fib_notify_on_flag_change)
+ goto out;
+
+ skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
+@@ -1368,8 +1375,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
+
+ /* The alias was already inserted, so the node must exist. */
+ l = l ? l : fib_find_node(t, &tp, key);
+- if (WARN_ON_ONCE(!l))
++ if (WARN_ON_ONCE(!l)) {
++ err = -ENOENT;
+ goto out_free_new_fa;
++ }
+
+ if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
+ new_fa) {
+@@ -1569,7 +1578,8 @@ found:
+ }
+ if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ continue;
+- if (fi->fib_dead)
++ /* Paired with WRITE_ONCE() in fib_release_info() */
++ if (READ_ONCE(fi->fib_dead))
+ continue;
+ if (fa->fa_info->fib_scope < flp->flowi4_scope)
+ continue;
+@@ -2297,9 +2307,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
+ fri.dst_len = KEYLENGTH - fa->fa_slen;
+ fri.tos = fa->fa_tos;
+ fri.type = fa->fa_type;
+- fri.offload = fa->offload;
+- fri.trap = fa->trap;
+- fri.offload_failed = fa->offload_failed;
++ fri.offload = READ_ONCE(fa->offload);
++ fri.trap = READ_ONCE(fa->trap);
++ fri.offload_failed = READ_ONCE(fa->offload_failed);
+ err = fib_dump_info(skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index b7e277d8a84d2..7b749a98327c2 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -261,11 +261,12 @@ bool icmp_global_allow(void)
+ spin_lock(&icmp_global.lock);
+ delta = min_t(u32, now - icmp_global.stamp, HZ);
+ if (delta >= HZ / 50) {
+- incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
++ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
+ if (incr)
+ WRITE_ONCE(icmp_global.stamp, now);
+ }
+- credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
++ credit = min_t(u32, icmp_global.credit + incr,
++ READ_ONCE(sysctl_icmp_msgs_burst));
+ if (credit) {
+ /* We want to use a credit of one in average, but need to randomize
+ * it for security reasons.
+@@ -289,7 +290,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
+ return true;
+
+ /* Limit if icmp type is enabled in ratemask. */
+- if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
++ if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
+ return true;
+
+ return false;
+@@ -327,7 +328,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
+
+ vif = l3mdev_master_ifindex(dst->dev);
+ peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
+- rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
++ rc = inet_peer_xrlim_allow(peer,
++ READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
+ if (peer)
+ inet_putpeer(peer);
+ out:
+@@ -701,7 +703,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+
+ rcu_read_lock();
+ if (rt_is_input_route(rt) &&
+- net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
++ READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
+ dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
+
+ if (dev)
+@@ -753,6 +755,11 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+ room = 576;
+ room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
+ room -= sizeof(struct icmphdr);
++ /* Guard against tiny mtu. We need to include at least one
++ * IP network header for this message to make any sense.
++ */
++ if (room <= (int)sizeof(struct iphdr))
++ goto ende;
+
+ icmp_param.data_len = skb_in->len - icmp_param.offset;
+ if (icmp_param.data_len > room)
+@@ -885,7 +892,7 @@ static bool icmp_unreach(struct sk_buff *skb)
+ * values please see
+ * Documentation/networking/ip-sysctl.rst
+ */
+- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
++ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
+ default:
+ net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
+ &iph->daddr);
+@@ -938,7 +945,7 @@ static bool icmp_unreach(struct sk_buff *skb)
+ * get the other vendor to fix their kit.
+ */
+
+- if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
++ if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
+ inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
+ net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
+ &ip_hdr(skb)->saddr,
+@@ -1033,7 +1040,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
+ u16 ident_len;
+ u8 status;
+
+- if (!net->ipv4.sysctl_icmp_echo_enable_probe)
++ if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
+ return false;
+
+ /* We currently only support probing interfaces on the proxy node
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index d2e2b3d18c668..1f63dc8897a40 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -353,8 +353,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
+ struct flowi4 fl4;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
+- unsigned int size = mtu;
++ unsigned int size;
+
++ size = min(mtu, IP_MAX_MTU);
+ while (1) {
+ skb = alloc_skb(size + hlen + tlen,
+ GFP_ATOMIC | __GFP_NOWARN);
+@@ -467,7 +468,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
+
+ if (pmc->multiaddr == IGMP_ALL_HOSTS)
+ return skb;
+- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
++ if (ipv4_is_local_multicast(pmc->multiaddr) &&
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ return skb;
+
+ mtu = READ_ONCE(dev->mtu);
+@@ -593,7 +595,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
+ if (pmc->multiaddr == IGMP_ALL_HOSTS)
+ continue;
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+- !net->ipv4.sysctl_igmp_llm_reports)
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ continue;
+ spin_lock_bh(&pmc->lock);
+ if (pmc->sfcount[MCAST_EXCLUDE])
+@@ -736,7 +738,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
+ if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
+ return igmpv3_send_report(in_dev, pmc);
+
+- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
++ if (ipv4_is_local_multicast(group) &&
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ return 0;
+
+ if (type == IGMP_HOST_LEAVE_MESSAGE)
+@@ -825,7 +828,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
+ struct net *net = dev_net(in_dev->dev);
+ if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
+ return;
+- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
++ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
+ igmp_ifc_start_timer(in_dev, 1);
+ }
+
+@@ -920,7 +923,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
+
+ if (group == IGMP_ALL_HOSTS)
+ return false;
+- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
++ if (ipv4_is_local_multicast(group) &&
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ return false;
+
+ rcu_read_lock();
+@@ -1006,7 +1010,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+ * received value was zero, use the default or statically
+ * configured value.
+ */
+- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
++ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+ /* RFC3376, 8.3. Query Response Interval:
+@@ -1045,7 +1049,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+ if (im->multiaddr == IGMP_ALL_HOSTS)
+ continue;
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+- !net->ipv4.sysctl_igmp_llm_reports)
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ continue;
+ spin_lock_bh(&im->lock);
+ if (im->tm_running)
+@@ -1186,7 +1190,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+ pmc->interface = im->interface;
+ in_dev_hold(in_dev);
+ pmc->multiaddr = im->multiaddr;
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ pmc->sfmode = im->sfmode;
+ if (pmc->sfmode == MCAST_INCLUDE) {
+ struct ip_sf_list *psf;
+@@ -1237,9 +1241,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
+ swap(im->tomb, pmc->tomb);
+ swap(im->sources, pmc->sources);
+ for (psf = im->sources; psf; psf = psf->sf_next)
+- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ psf->sf_crcount = in_dev->mr_qrv ?:
++ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ } else {
+- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ im->crcount = in_dev->mr_qrv ?:
++ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ }
+ in_dev_put(pmc->interface);
+ kfree_pmc(pmc);
+@@ -1296,7 +1302,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
+ #ifdef CONFIG_IP_MULTICAST
+ if (im->multiaddr == IGMP_ALL_HOSTS)
+ return;
+- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
++ if (ipv4_is_local_multicast(im->multiaddr) &&
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ return;
+
+ reporter = im->reporter;
+@@ -1338,13 +1345,14 @@ static void igmp_group_added(struct ip_mc_list *im)
+ #ifdef CONFIG_IP_MULTICAST
+ if (im->multiaddr == IGMP_ALL_HOSTS)
+ return;
+- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
++ if (ipv4_is_local_multicast(im->multiaddr) &&
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ return;
+
+ if (in_dev->dead)
+ return;
+
+- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
++ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
+ spin_lock_bh(&im->lock);
+ igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
+@@ -1358,7 +1366,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+ * IN() to IN(A).
+ */
+ if (im->sfmode == MCAST_EXCLUDE)
+- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+
+ igmp_ifc_event(in_dev);
+ #endif
+@@ -1642,7 +1650,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
+ if (im->multiaddr == IGMP_ALL_HOSTS)
+ continue;
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+- !net->ipv4.sysctl_igmp_llm_reports)
++ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
+ continue;
+
+ /* a failover is happening and switches
+@@ -1749,7 +1757,7 @@ static void ip_mc_reset(struct in_device *in_dev)
+
+ in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+ in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
++ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ }
+ #else
+ static void ip_mc_reset(struct in_device *in_dev)
+@@ -1883,7 +1891,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+ if (psf->sf_oldin &&
+ !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
+- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ psf->sf_next = pmc->tomb;
+ pmc->tomb = psf;
+ rv = 1;
+@@ -1947,7 +1955,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ /* filter mode change */
+ pmc->sfmode = MCAST_INCLUDE;
+ #ifdef CONFIG_IP_MULTICAST
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+ for (psf = pmc->sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+@@ -2126,7 +2134,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+ /* else no filters; keep old mode for reports */
+
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+ for (psf = pmc->sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+@@ -2192,7 +2200,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
+ count++;
+ }
+ err = -ENOBUFS;
+- if (count >= net->ipv4.sysctl_igmp_max_memberships)
++ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
+ goto done;
+ iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
+ if (!iml)
+@@ -2379,7 +2387,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
+ }
+ /* else, add a new source to the filter */
+
+- if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
++ if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
+ err = -ENOBUFS;
+ goto done;
+ }
+@@ -2403,9 +2411,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
+ /* decrease mem now to avoid the memleak warning */
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
+- kfree_rcu(psl, rcu);
+ }
+ rcu_assign_pointer(pmc->sflist, newpsl);
++ if (psl)
++ kfree_rcu(psl, rcu);
+ psl = newpsl;
+ }
+ rv = 1; /* > 0 for insert logic below if sl_count is 0 */
+@@ -2507,11 +2516,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
+ /* decrease mem now to avoid the memleak warning */
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
+- kfree_rcu(psl, rcu);
+- } else
++ } else {
+ (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
+ 0, NULL, 0);
++ }
+ rcu_assign_pointer(pmc->sflist, newpsl);
++ if (psl)
++ kfree_rcu(psl, rcu);
+ pmc->sfmode = msf->imsf_fmode;
+ err = 0;
+ done:
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index f25d02ad4a8af..c770719797e12 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -155,10 +155,14 @@ static int inet_csk_bind_conflict(const struct sock *sk,
+ */
+
+ sk_for_each_bound(sk2, &tb->owners) {
+- if (sk != sk2 &&
+- (!sk->sk_bound_dev_if ||
+- !sk2->sk_bound_dev_if ||
+- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
++ int bound_dev_if2;
++
++ if (sk == sk2)
++ continue;
++ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
++ if ((!sk->sk_bound_dev_if ||
++ !bound_dev_if2 ||
++ sk->sk_bound_dev_if == bound_dev_if2)) {
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+@@ -259,7 +263,7 @@ next_port:
+ goto other_half_scan;
+ }
+
+- if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
++ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
+ /* We still have a chance to connect to different destinations */
+ relax = true;
+ goto ports_exhausted;
+@@ -721,7 +725,7 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
+
+ sk_node_init(&nreq_sk->sk_node);
+ nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
+-#ifdef CONFIG_XPS
++#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
+ nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
+ #endif
+ nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
+@@ -829,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t)
+
+ icsk = inet_csk(sk_listener);
+ net = sock_net(sk_listener);
+- max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
++ max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
++ READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
+ /* Normally all the openreqs are young and become mature
+ * (i.e. converted to established socket) for first timeout.
+ * If synack was not acknowledged for 1 second, it means
+@@ -958,6 +963,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
+ if (newsk) {
+ struct inet_connection_sock *newicsk = inet_csk(newsk);
+
++ newsk->sk_wait_pending = 0;
+ inet_sk_set_state(newsk, TCP_SYN_RECV);
+ newicsk->icsk_bind_hash = NULL;
+
+@@ -1015,7 +1021,7 @@ void inet_csk_destroy_sock(struct sock *sk)
+
+ sk_refcnt_debug_release(sk);
+
+- percpu_counter_dec(sk->sk_prot->orphan_count);
++ this_cpu_dec(*sk->sk_prot->orphan_count);
+
+ sock_put(sk);
+ }
+@@ -1035,11 +1041,25 @@ void inet_csk_prepare_forced_close(struct sock *sk)
+ }
+ EXPORT_SYMBOL(inet_csk_prepare_forced_close);
+
++static int inet_ulp_can_listen(const struct sock *sk)
++{
++ const struct inet_connection_sock *icsk = inet_csk(sk);
++
++ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
++ return -EINVAL;
++
++ return 0;
++}
++
+ int inet_csk_listen_start(struct sock *sk, int backlog)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+- int err = -EADDRINUSE;
++ int err;
++
++ err = inet_ulp_can_listen(sk);
++ if (unlikely(err))
++ return err;
+
+ reqsk_queue_alloc(&icsk->icsk_accept_queue);
+
+@@ -1051,6 +1071,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
+ * It is OK, because this socket enters to hash table only
+ * after validation is complete.
+ */
++ err = -EADDRINUSE;
+ inet_sk_state_store(sk, TCP_LISTEN);
+ if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
+ inet->inet_sport = htons(inet->inet_num);
+@@ -1074,7 +1095,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
+
+ sock_orphan(child);
+
+- percpu_counter_inc(sk->sk_prot->orphan_count);
++ this_cpu_inc(*sk->sk_prot->orphan_count);
+
+ if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
+ BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
+diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
+index ef7897226f08e..ae70e07c52445 100644
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -261,6 +261,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
+ r->idiag_state = sk->sk_state;
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
++ r->idiag_expires = 0;
+
+ if (inet_diag_msg_attrs_fill(sk, skb, r, ext,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+@@ -314,9 +315,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
+ r->idiag_retrans = icsk->icsk_probes_out;
+ r->idiag_expires =
+ jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
+- } else {
+- r->idiag_timer = 0;
+- r->idiag_expires = 0;
+ }
+
+ if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index 05cd198d7a6ba..341096807100c 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -235,9 +235,9 @@ void inet_frag_kill(struct inet_frag_queue *fq)
+ /* The RCU read lock provides a memory barrier
+ * guaranteeing that if fqdir->dead is false then
+ * the hash table destruction will not start until
+- * after we unlock. Paired with inet_frags_exit_net().
++ * after we unlock. Paired with fqdir_pre_exit().
+ */
+- if (!fqdir->dead) {
++ if (!READ_ONCE(fqdir->dead)) {
+ rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
+ fqdir->f->rhash_params);
+ refcount_dec(&fq->refcnt);
+@@ -352,9 +352,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
+ /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
+ {
++ /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
++ long high_thresh = READ_ONCE(fqdir->high_thresh);
+ struct inet_frag_queue *fq = NULL, *prev;
+
+- if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
++ if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
+ return NULL;
+
+ rcu_read_lock();
+diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
+index bfb522e513461..2936676f86eb8 100644
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -410,13 +410,11 @@ begin:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ if (sk->sk_hash != hash)
+ continue;
+- if (likely(INET_MATCH(sk, net, acookie,
+- saddr, daddr, ports, dif, sdif))) {
++ if (likely(INET_MATCH(net, sk, acookie, ports, dif, sdif))) {
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
+ goto out;
+- if (unlikely(!INET_MATCH(sk, net, acookie,
+- saddr, daddr, ports,
+- dif, sdif))) {
++ if (unlikely(!INET_MATCH(net, sk, acookie,
++ ports, dif, sdif))) {
+ sock_gen_put(sk);
+ goto begin;
+ }
+@@ -465,8 +463,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
+ if (sk2->sk_hash != hash)
+ continue;
+
+- if (likely(INET_MATCH(sk2, net, acookie,
+- saddr, daddr, ports, dif, sdif))) {
++ if (likely(INET_MATCH(net, sk2, acookie, ports, dif, sdif))) {
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ tw = inet_twsk(sk2);
+ if (twsk_unique(sk, sk2, twp))
+@@ -504,7 +501,7 @@ not_unique:
+ return -EADDRNOTAVAIL;
+ }
+
+-static u32 inet_sk_port_offset(const struct sock *sk)
++static u64 inet_sk_port_offset(const struct sock *sk)
+ {
+ const struct inet_sock *inet = inet_sk(sk);
+
+@@ -532,16 +529,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
+ if (esk->sk_hash != sk->sk_hash)
+ continue;
+ if (sk->sk_family == AF_INET) {
+- if (unlikely(INET_MATCH(esk, net, acookie,
+- sk->sk_daddr,
+- sk->sk_rcv_saddr,
++ if (unlikely(INET_MATCH(net, esk, acookie,
+ ports, dif, sdif))) {
+ return true;
+ }
+ }
+ #if IS_ENABLED(CONFIG_IPV6)
+ else if (sk->sk_family == AF_INET6) {
+- if (unlikely(INET6_MATCH(esk, net,
++ if (unlikely(inet6_match(net, esk,
+ &sk->sk_v6_daddr,
+ &sk->sk_v6_rcv_saddr,
+ ports, dif, sdif))) {
+@@ -598,7 +593,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+ if (ok) {
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ } else {
+- percpu_counter_inc(sk->sk_prot->orphan_count);
++ this_cpu_inc(*sk->sk_prot->orphan_count);
+ inet_sk_set_state(sk, TCP_CLOSE);
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_csk_destroy_sock(sk);
+@@ -637,7 +632,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
+ int err = 0;
+
+ if (sk->sk_state != TCP_LISTEN) {
++ local_bh_disable();
+ inet_ehash_nolisten(sk, osk, NULL);
++ local_bh_enable();
+ return 0;
+ }
+ WARN_ON(!sk_unhashed(sk));
+@@ -669,45 +666,54 @@ int inet_hash(struct sock *sk)
+ {
+ int err = 0;
+
+- if (sk->sk_state != TCP_CLOSE) {
+- local_bh_disable();
++ if (sk->sk_state != TCP_CLOSE)
+ err = __inet_hash(sk, NULL);
+- local_bh_enable();
+- }
+
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(inet_hash);
+
+-void inet_unhash(struct sock *sk)
++static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
+ {
+- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+- struct inet_listen_hashbucket *ilb = NULL;
+- spinlock_t *lock;
+-
+ if (sk_unhashed(sk))
+ return;
+
+- if (sk->sk_state == TCP_LISTEN) {
+- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+- lock = &ilb->lock;
+- } else {
+- lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+- }
+- spin_lock_bh(lock);
+- if (sk_unhashed(sk))
+- goto unlock;
+-
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_stop_listen_sock(sk);
+ if (ilb) {
++ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
++
+ inet_unhash2(hashinfo, sk);
+ ilb->count--;
+ }
+ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+-unlock:
+- spin_unlock_bh(lock);
++}
++
++void inet_unhash(struct sock *sk)
++{
++ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
++
++ if (sk_unhashed(sk))
++ return;
++
++ if (sk->sk_state == TCP_LISTEN) {
++ struct inet_listen_hashbucket *ilb;
++
++ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
++ /* Don't disable bottom halves while acquiring the lock to
++ * avoid circular locking dependency on PREEMPT_RT.
++ */
++ spin_lock(&ilb->lock);
++ __inet_unhash(sk, ilb);
++ spin_unlock(&ilb->lock);
++ } else {
++ spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
++
++ spin_lock_bh(lock);
++ __inet_unhash(sk, NULL);
++ spin_unlock_bh(lock);
++ }
+ }
+ EXPORT_SYMBOL_GPL(inet_unhash);
+
+@@ -715,15 +721,17 @@ EXPORT_SYMBOL_GPL(inet_unhash);
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
+- * we use 256 instead to really give more isolation and
+- * privacy, this only consumes 1 KB of kernel memory.
++ *
++ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
++ * attacks were since demonstrated, thus we use 65536 by default instead
++ * to really give more isolation and privacy, at the expense of 256kB
++ * of kernel memory.
+ */
+-#define INET_TABLE_PERTURB_SHIFT 8
+-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
++#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER)
++static u32 *table_perturb;
+
+ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
+- struct sock *sk, u32 port_offset,
++ struct sock *sk, u64 port_offset,
+ int (*check_established)(struct inet_timewait_death_row *,
+ struct sock *, __u16, struct inet_timewait_sock **))
+ {
+@@ -739,17 +747,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
+ u32 index;
+
+ if (port) {
+- head = &hinfo->bhash[inet_bhashfn(net, port,
+- hinfo->bhash_size)];
+- tb = inet_csk(sk)->icsk_bind_hash;
+- spin_lock_bh(&head->lock);
+- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+- inet_ehash_nolisten(sk, NULL, NULL);
+- spin_unlock_bh(&head->lock);
+- return 0;
+- }
+- spin_unlock(&head->lock);
+- /* No definite answer... Walk to established hash table */
++ local_bh_disable();
+ ret = check_established(death_row, sk, port, NULL);
+ local_bh_enable();
+ return ret;
+@@ -763,10 +761,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
+ if (likely(remaining > 1))
+ remaining &= ~1U;
+
+- net_get_random_once(table_perturb, sizeof(table_perturb));
+- index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
++ get_random_slow_once(table_perturb,
++ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
++ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
++
++ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
++ offset %= remaining;
+
+- offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
+ /* In first pass we try ports of @low parity.
+ * inet_csk_get_port() does the opposite choice.
+ */
+@@ -820,11 +821,12 @@ next_port:
+ return -EADDRNOTAVAIL;
+
+ ok:
+- /* If our first attempt found a candidate, skip next candidate
+- * in 1/16 of cases to add some noise.
++ /* Here we want to add a little bit of randomness to the next source
++ * port that will be chosen. We use a max() with a random here so that
++ * on low contention the randomness is maximal and on high contention
++ * it may be inexistent.
+ */
+- if (!i && !(prandom_u32() % 16))
+- i = 2;
++ i = max_t(int, i, (prandom_u32() & 7) * 2);
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
+
+ /* Head lock still held and bh's disabled */
+@@ -848,7 +850,7 @@ ok:
+ int inet_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+ {
+- u32 port_offset = 0;
++ u64 port_offset = 0;
+
+ if (!inet_sk(sk)->inet_num)
+ port_offset = inet_sk_port_offset(sk);
+@@ -898,6 +900,14 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
+ low_limit,
+ high_limit);
+ init_hashinfo_lhash2(h);
++
++ /* this one is used for source ports of outgoing connections */
++ table_perturb = alloc_large_system_hash("Table-perturb",
++ sizeof(*table_perturb),
++ INET_TABLE_PERTURB_SIZE,
++ 0, 0, NULL, NULL,
++ INET_TABLE_PERTURB_SIZE,
++ INET_TABLE_PERTURB_SIZE);
+ }
+
+ int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
+diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
+index da21dfce24d73..e9fed83e9b3cc 100644
+--- a/net/ipv4/inetpeer.c
++++ b/net/ipv4/inetpeer.c
+@@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base,
+ struct inet_peer *gc_stack[],
+ unsigned int gc_cnt)
+ {
++ int peer_threshold, peer_maxttl, peer_minttl;
+ struct inet_peer *p;
+ __u32 delta, ttl;
+ int i;
+
+- if (base->total >= inet_peer_threshold)
++ peer_threshold = READ_ONCE(inet_peer_threshold);
++ peer_maxttl = READ_ONCE(inet_peer_maxttl);
++ peer_minttl = READ_ONCE(inet_peer_minttl);
++
++ if (base->total >= peer_threshold)
+ ttl = 0; /* be aggressive */
+ else
+- ttl = inet_peer_maxttl
+- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
+- base->total / inet_peer_threshold * HZ;
++ ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
++ base->total / peer_threshold * HZ;
+ for (i = 0; i < gc_cnt; i++) {
+ p = gc_stack[i];
+
+diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
+index 00ec819f949b5..29730edda220a 100644
+--- a/net/ipv4/ip_forward.c
++++ b/net/ipv4/ip_forward.c
+@@ -151,7 +151,7 @@ int ip_forward(struct sk_buff *skb)
+ !skb_sec_path(skb))
+ ip_rt_send_redirect(skb);
+
+- if (net->ipv4.sysctl_ip_fwd_update_priority)
++ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
+ skb->priority = rt_tos2priority(iph->tos);
+
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index cfeb8890f94ee..fad803d2d711e 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t)
+
+ rcu_read_lock();
+
+- if (qp->q.fqdir->dead)
++ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
++ if (READ_ONCE(qp->q.fqdir->dead))
+ goto out_rcu_unlock;
+
+ spin_lock(&qp->q.lock);
+diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
+index 0fe6c936dc54a..c094963a86f1e 100644
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
+ {
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+-
+- if (tunnel->parms.o_flags & TUNNEL_SEQ)
+- tunnel->o_seqno++;
++ __be16 flags = tunnel->parms.o_flags;
+
+ /* Push GRE header. */
+ gre_build_header(skb, tunnel->tun_hlen,
+- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
+- htonl(tunnel->o_seqno));
++ flags, proto, tunnel->parms.o_key,
++ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+
+ ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
+ }
+@@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+ gre_build_header(skb, tunnel_hlen, flags, proto,
+ tunnel_id_to_key32(tun_info->key.tun_id),
+- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
++ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+
+ ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
+
+@@ -526,7 +524,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+ int tunnel_hlen;
+ int version;
+ int nhoff;
+- int thoff;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+@@ -555,15 +552,21 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+ truncate = true;
+ }
+
+- nhoff = skb_network_header(skb) - skb_mac_header(skb);
++ nhoff = skb_network_offset(skb);
+ if (skb->protocol == htons(ETH_P_IP) &&
+ (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+ truncate = true;
+
+- thoff = skb_transport_header(skb) - skb_mac_header(skb);
+- if (skb->protocol == htons(ETH_P_IPV6) &&
+- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
+- truncate = true;
++ if (skb->protocol == htons(ETH_P_IPV6)) {
++ int thoff;
++
++ if (skb_transport_header_was_set(skb))
++ thoff = skb_transport_offset(skb);
++ else
++ thoff = nhoff + sizeof(struct ipv6hdr);
++ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
++ truncate = true;
++ }
+
+ if (version == 1) {
+ erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
+@@ -581,7 +584,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+ }
+
+ gre_build_header(skb, 8, TUNNEL_SEQ,
+- proto, 0, htonl(tunnel->o_seqno++));
++ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
+
+ ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
+
+@@ -604,8 +607,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+
+ key = &info->key;
+ ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
+- tunnel_id_to_key32(key->tun_id), key->tos, 0,
+- skb->mark, skb_get_hash(skb));
++ tunnel_id_to_key32(key->tun_id),
++ key->tos & ~INET_ECN_MASK, 0, skb->mark,
++ skb_get_hash(skb));
+ rt = ip_route_output_key(dev_net(dev), &fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+@@ -630,21 +634,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
+ }
+
+ if (dev->header_ops) {
+- const int pull_len = tunnel->hlen + sizeof(struct iphdr);
+-
+ if (skb_cow_head(skb, 0))
+ goto free_skb;
+
+ tnl_params = (const struct iphdr *)skb->data;
+
+- if (pull_len > skb_transport_offset(skb))
+- goto free_skb;
+-
+ /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
+ * to gre header.
+ */
+- skb_pull(skb, pull_len);
++ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ skb_reset_mac_header(skb);
++
++ if (skb->ip_summed == CHECKSUM_PARTIAL &&
++ skb_checksum_start(skb) < skb->data)
++ goto free_skb;
+ } else {
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
+@@ -1495,24 +1498,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+ struct ip_tunnel_parm *p = &t->parms;
+ __be16 o_flags = p->o_flags;
+
+- if (t->erspan_ver <= 2) {
+- if (t->erspan_ver != 0 && !t->collect_md)
+- o_flags |= TUNNEL_KEY;
+-
+- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+- goto nla_put_failure;
+-
+- if (t->erspan_ver == 1) {
+- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+- goto nla_put_failure;
+- } else if (t->erspan_ver == 2) {
+- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+- goto nla_put_failure;
+- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+- goto nla_put_failure;
+- }
+- }
+-
+ if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS,
+ gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+@@ -1553,6 +1538,34 @@ nla_put_failure:
+ return -EMSGSIZE;
+ }
+
++static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
++{
++ struct ip_tunnel *t = netdev_priv(dev);
++
++ if (t->erspan_ver <= 2) {
++ if (t->erspan_ver != 0 && !t->collect_md)
++ t->parms.o_flags |= TUNNEL_KEY;
++
++ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
++ goto nla_put_failure;
++
++ if (t->erspan_ver == 1) {
++ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
++ goto nla_put_failure;
++ } else if (t->erspan_ver == 2) {
++ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
++ goto nla_put_failure;
++ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
++ goto nla_put_failure;
++ }
++ }
++
++ return ipgre_fill_info(skb, dev);
++
++nla_put_failure:
++ return -EMSGSIZE;
++}
++
+ static void erspan_setup(struct net_device *dev)
+ {
+ struct ip_tunnel *t = netdev_priv(dev);
+@@ -1631,7 +1644,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+ .changelink = erspan_changelink,
+ .dellink = ip_tunnel_dellink,
+ .get_size = ipgre_get_size,
+- .fill_info = ipgre_fill_info,
++ .fill_info = erspan_fill_info,
+ .get_link_net = ip_tunnel_get_link_net,
+ };
+
+diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
+index 3a025c0119718..5d0bc0dbdb4d9 100644
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -310,16 +310,17 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
+ ip_hdr(hint)->tos == iph->tos;
+ }
+
+-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
+-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
++int tcp_v4_early_demux(struct sk_buff *skb);
++int udp_v4_early_demux(struct sk_buff *skb);
+ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb, struct net_device *dev,
+ const struct sk_buff *hint)
+ {
+ const struct iphdr *iph = ip_hdr(skb);
+- int (*edemux)(struct sk_buff *skb);
++ int err, drop_reason;
+ struct rtable *rt;
+- int err;
++
++ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+
+ if (ip_can_use_hint(skb, iph, hint)) {
+ err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
+@@ -328,21 +329,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+ goto drop_error;
+ }
+
+- if (net->ipv4.sysctl_ip_early_demux &&
++ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) &&
+ !skb->sk &&
+ !ip_is_fragment(iph)) {
+- const struct net_protocol *ipprot;
+- int protocol = iph->protocol;
+-
+- ipprot = rcu_dereference(inet_protos[protocol]);
+- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
+- udp_v4_early_demux, skb);
+- if (unlikely(err))
+- goto drop_error;
+- /* must reload iph, skb->head might have changed */
+- iph = ip_hdr(skb);
++ switch (iph->protocol) {
++ case IPPROTO_TCP:
++ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
++ tcp_v4_early_demux(skb);
++
++ /* must reload iph, skb->head might have changed */
++ iph = ip_hdr(skb);
++ }
++ break;
++ case IPPROTO_UDP:
++ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
++ err = udp_v4_early_demux(skb);
++ if (unlikely(err))
++ goto drop_error;
++
++ /* must reload iph, skb->head might have changed */
++ iph = ip_hdr(skb);
++ }
++ break;
+ }
+ }
+
+@@ -355,6 +364,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+ iph->tos, dev);
+ if (unlikely(err))
+ goto drop_error;
++ } else {
++ struct in_device *in_dev = __in_dev_get_rcu(dev);
++
++ if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY))
++ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+ }
+
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+@@ -396,19 +410,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+ * so-called "hole-196" attack) so do it for both.
+ */
+ if (in_dev &&
+- IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
++ IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
++ drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
+ goto drop;
++ }
+ }
+
+ return NET_RX_SUCCESS;
+
+ drop:
+- kfree_skb(skb);
++ kfree_skb_reason(skb, drop_reason);
+ return NET_RX_DROP;
+
+ drop_error:
+- if (err == -EXDEV)
++ if (err == -EXDEV) {
++ drop_reason = SKB_DROP_REASON_IP_RPFILTER;
+ __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
++ }
+ goto drop;
+ }
+
+@@ -436,13 +454,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
+ {
+ const struct iphdr *iph;
++ int drop_reason;
+ u32 len;
+
+ /* When the interface is in promisc. mode, drop all the crap
+ * that it receives, do not try to analyse it.
+ */
+- if (skb->pkt_type == PACKET_OTHERHOST)
++ if (skb->pkt_type == PACKET_OTHERHOST) {
++ drop_reason = SKB_DROP_REASON_OTHERHOST;
+ goto drop;
++ }
+
+ __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
+
+@@ -452,6 +473,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
+ goto out;
+ }
+
++ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto inhdr_error;
+
+@@ -488,6 +510,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
+
+ len = ntohs(iph->tot_len);
+ if (skb->len < len) {
++ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
+ __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ } else if (len < (iph->ihl*4))
+@@ -516,11 +539,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
+ return skb;
+
+ csum_error:
++ drop_reason = SKB_DROP_REASON_IP_CSUM;
+ __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
+ inhdr_error:
++ if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
++ drop_reason = SKB_DROP_REASON_IP_INHDR;
+ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+ drop:
+- kfree_skb(skb);
++ kfree_skb_reason(skb, drop_reason);
+ out:
+ return NULL;
+ }
+@@ -555,7 +581,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
+ static struct sk_buff *ip_extract_route_hint(const struct net *net,
+ struct sk_buff *skb, int rt_type)
+ {
+- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
++ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
++ IPCB(skb)->flags & IPSKB_MULTIPATH)
+ return NULL;
+
+ return skb;
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index 9bca57ef8b838..a5f09d64c6ed1 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
+ iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
+ iph->saddr = saddr;
+ iph->protocol = sk->sk_protocol;
+- if (ip_dont_fragment(sk, &rt->dst)) {
++ /* Do not bother generating IPID for small packets (eg SYNACK) */
++ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
+ iph->frag_off = htons(IP_DF);
+ iph->id = 0;
+ } else {
+ iph->frag_off = 0;
+- __ip_select_ident(net, iph, 1);
++ /* TCP packets here are SYNACK with fat IPv4/TCP options.
++ * Avoid using the hashed IP ident generator.
++ */
++ if (sk->sk_protocol == IPPROTO_TCP)
++ iph->id = (__force __be16)prandom_u32();
++ else
++ __ip_select_ident(net, iph, 1);
+ }
+
+ if (opt && opt->opt.optlen) {
+@@ -207,7 +214,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
+ if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+ int res = lwtunnel_xmit(skb);
+
+- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
++ if (res != LWTUNNEL_XMIT_CONTINUE)
+ return res;
+ }
+
+@@ -826,15 +833,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ /* Everything is OK. Generate! */
+ ip_fraglist_init(skb, iph, hlen, &iter);
+
+- if (iter.frag)
+- ip_options_fragment(iter.frag);
+-
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+ if (iter.frag) {
++ bool first_frag = (iter.offset == 0);
++
+ IPCB(iter.frag)->flags = IPCB(skb)->flags;
+ ip_fraglist_prepare(skb, &iter);
++ if (first_frag && IPCB(skb)->opt.optlen) {
++ /* ipcb->opt is not populated for frags
++ * coming from __ip_make_skb(),
++ * ip_options_fragment() needs optlen
++ */
++ IPCB(iter.frag)->opt.optlen =
++ IPCB(skb)->opt.optlen;
++ ip_options_fragment(iter.frag);
++ ip_send_check(iter.iph);
++ }
+ }
+
+ skb->tstamp = tstamp;
+@@ -976,7 +992,7 @@ static int __ip_append_data(struct sock *sk,
+
+ if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+- tskey = sk->sk_tskey++;
++ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
+ hh_len = LL_RESERVED_SPACE(rt->dst.dev);
+
+@@ -1539,9 +1555,19 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
+ cork->dst = NULL;
+ skb_dst_set(skb, &rt->dst);
+
+- if (iph->protocol == IPPROTO_ICMP)
+- icmp_out_count(net, ((struct icmphdr *)
+- skb_transport_header(skb))->type);
++ if (iph->protocol == IPPROTO_ICMP) {
++ u8 icmp_type;
++
++ /* For such sockets, transhdrlen is zero when do ip_append_data(),
++ * so icmphdr does not in skb linear region and can not get icmp_type
++ * by icmp_hdr(skb)->type.
++ */
++ if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl)
++ icmp_type = fl4->fl4_icmp_type;
++ else
++ icmp_type = icmp_hdr(skb)->type;
++ icmp_out_count(net, icmp_type);
++ }
+
+ ip_cork_release(cork);
+ out:
+@@ -1688,7 +1714,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+ tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+ arg->uid);
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
+- rt = ip_route_output_key(net, &fl4);
++ rt = ip_route_output_flow(net, &fl4, sk);
+ if (IS_ERR(rt))
+ return;
+
+@@ -1696,7 +1722,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+
+ sk->sk_protocol = ip_hdr(skb)->protocol;
+ sk->sk_bound_dev_if = arg->bound_dev_if;
+- sk->sk_sndbuf = sysctl_wmem_default;
++ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
+ ipc.sockc.mark = fl4.flowi4_mark;
+ err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
+ len, 0, &ipc, &rt, MSG_DONTWAIT);
+diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
+index b297bb28556ec..540002c9f3b35 100644
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -317,7 +317,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
+ ipc->tos = val;
+ ipc->priority = rt_tos2priority(ipc->tos);
+ break;
+-
++ case IP_PROTOCOL:
++ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
++ return -EINVAL;
++ val = *(int *)CMSG_DATA(cmsg);
++ if (val < 1 || val > 255)
++ return -EINVAL;
++ ipc->protocol = val;
++ break;
+ default:
+ return -EINVAL;
+ }
+@@ -772,7 +779,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ return -EINVAL;
+- if (optlen > sysctl_optmem_max)
++ if (optlen > READ_ONCE(sysctl_optmem_max))
+ return -ENOBUFS;
+
+ gsf = memdup_sockptr(optval, optlen);
+@@ -782,7 +789,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ err = -ENOBUFS;
+ if (gsf->gf_numsrc >= 0x1ffffff ||
+- gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
++ gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
+ goto out_free_gsf;
+
+ err = -EINVAL;
+@@ -808,7 +815,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+
+ if (optlen < size0)
+ return -EINVAL;
+- if (optlen > sysctl_optmem_max - 4)
++ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ return -ENOBUFS;
+
+ p = kmalloc(optlen + 4, GFP_KERNEL);
+@@ -832,7 +839,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ err = -ENOBUFS;
+- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
++ if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
+ goto out_free_gsf;
+ err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
+ &gf32->gf_group, gf32->gf_slist_flex);
+@@ -1231,7 +1238,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
+
+ if (optlen < IP_MSFILTER_SIZE(0))
+ goto e_inval;
+- if (optlen > sysctl_optmem_max) {
++ if (optlen > READ_ONCE(sysctl_optmem_max)) {
+ err = -ENOBUFS;
+ break;
+ }
+@@ -1242,7 +1249,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
+ }
+ /* numsrc >= (1G-4) overflow in 32 bits */
+ if (msf->imsf_numsrc >= 0x3ffffffcU ||
+- msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
++ msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
+ kfree(msf);
+ err = -ENOBUFS;
+ break;
+@@ -1597,7 +1604,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
+ {
+ struct net *net = sock_net(sk);
+ val = (inet->uc_ttl == -1 ?
+- net->ipv4.sysctl_ip_default_ttl :
++ READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
+ inet->uc_ttl);
+ break;
+ }
+@@ -1724,6 +1731,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
+ case IP_MINTTL:
+ val = inet->min_ttl;
+ break;
++ case IP_PROTOCOL:
++ val = inet_sk(sk)->inet_num;
++ break;
+ default:
+ release_sock(sk);
+ return -ENOPROTOOPT;
+diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
+index fe9101d3d69e0..426dc910aaf87 100644
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -613,10 +613,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ }
+
+ headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+- if (headroom > dev->needed_headroom)
+- dev->needed_headroom = headroom;
++ if (headroom > READ_ONCE(dev->needed_headroom))
++ WRITE_ONCE(dev->needed_headroom, headroom);
+
+- if (skb_cow_head(skb, dev->needed_headroom)) {
++ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ ip_rt_put(rt);
+ goto tx_dropped;
+ }
+@@ -797,10 +797,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+
+ max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
+- if (max_headroom > dev->needed_headroom)
+- dev->needed_headroom = max_headroom;
++ if (max_headroom > READ_ONCE(dev->needed_headroom))
++ WRITE_ONCE(dev->needed_headroom, max_headroom);
+
+- if (skb_cow_head(skb, dev->needed_headroom)) {
++ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ ip_rt_put(rt);
+ dev->stats.tx_dropped++;
+ kfree_skb(skb);
+diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
+index 6b2dc7b2b6127..d3275d1ed2601 100644
+--- a/net/ipv4/ip_tunnel_core.c
++++ b/net/ipv4/ip_tunnel_core.c
+@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
+ .un.frag.__unused = 0,
+ .un.frag.mtu = htons(mtu),
+ };
+- icmph->checksum = ip_compute_csum(icmph, len);
++ icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
+ skb_reset_transport_header(skb);
+
+ niph = skb_push(skb, sizeof(*niph));
+@@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
+ u32 mtu = dst_mtu(encap_dst) - headroom;
+
+ if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) ||
+- (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
++ (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu))
+ return 0;
+
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
+diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
+index efe25a0172e6f..df23319adc804 100644
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -287,12 +287,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+- xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
++ xfrm_decode_session(skb, &fl, AF_INET);
+ break;
+ case htons(ETH_P_IPV6):
+- xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
++ xfrm_decode_session(skb, &fl, AF_INET6);
+ break;
+ default:
+ goto tx_err;
+diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
+index 2dda856ca2602..aea29d97f8dfa 100644
+--- a/net/ipv4/ipmr.c
++++ b/net/ipv4/ipmr.c
+@@ -261,7 +261,9 @@ static int __net_init ipmr_rules_init(struct net *net)
+ return 0;
+
+ err2:
++ rtnl_lock();
+ ipmr_free_table(mrt);
++ rtnl_unlock();
+ err1:
+ fib_rules_unregister(ops);
+ return err;
+diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
+index 25ea6ac44db95..6a1427916c7dc 100644
+--- a/net/ipv4/metrics.c
++++ b/net/ipv4/metrics.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ #include <linux/netlink.h>
++#include <linux/nospec.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/types.h>
+ #include <net/ip.h>
+@@ -28,6 +29,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
+ return -EINVAL;
+ }
+
++ type = array_index_nospec(type, RTAX_MAX + 1);
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+
+diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
+index c53f14b943560..71bf3aeed73c1 100644
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -1524,6 +1524,10 @@ int arpt_register_table(struct net *net,
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
++ struct arpt_entry *iter;
++
++ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
++ cleanup_entry(iter, net);
+ xt_free_table_info(newinfo);
+ return PTR_ERR(new_table);
+ }
+diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
+index 13acb687c19ab..a748a1e754605 100644
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -1044,7 +1044,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ struct xt_counters *counters;
+ struct ipt_entry *iter;
+
+- ret = 0;
+ counters = xt_counters_alloc(num_counters);
+ if (!counters) {
+ ret = -ENOMEM;
+@@ -1090,7 +1089,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
+ vfree(counters);
+- return ret;
++ return 0;
+
+ put_module:
+ module_put(t->me);
+@@ -1741,6 +1740,10 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
++ struct ipt_entry *iter;
++
++ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
++ cleanup_entry(iter, net);
+ xt_free_table_info(newinfo);
+ return PTR_ERR(new_table);
+ }
+diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
+index 8fd1aba8af31c..34737b1d6526c 100644
+--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
++++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
+@@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
+
+ switch (ctinfo) {
+ case IP_CT_NEW:
+- ct->mark = hash;
++ WRITE_ONCE(ct->mark, hash);
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+@@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
+ #ifdef DEBUG
+ nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ #endif
+- pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
++ pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark));
+ if (!clusterip_responsible(cipinfo->config, hash)) {
+ pr_debug("not responsible\n");
+ return NF_DROP;
+@@ -520,8 +520,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
+ if (IS_ERR(config))
+ return PTR_ERR(config);
+ }
+- } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
++ } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) {
++ clusterip_config_entry_put(config);
++ clusterip_config_put(config);
+ return -EINVAL;
++ }
+
+ ret = nf_ct_netns_get(par->net, par->family);
+ if (ret < 0) {
+diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
+index 4eed5afca392e..f2edb40c0db00 100644
+--- a/net/ipv4/netfilter/nf_reject_ipv4.c
++++ b/net/ipv4/netfilter/nf_reject_ipv4.c
+@@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
+
+ skb_reserve(nskb, LL_MAX_HEADER);
+ niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
+- net->ipv4.sysctl_ip_default_ttl);
++ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
+ nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
+ niph->tot_len = htons(nskb->len);
+ ip_send_check(niph);
+@@ -115,7 +115,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
+
+ skb_reserve(nskb, LL_MAX_HEADER);
+ niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
+- net->ipv4.sysctl_ip_default_ttl);
++ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
+
+ skb_reset_transport_header(nskb);
+ icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
+diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+index b2bae0b0e42a1..61cb2341f50fe 100644
+--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
++++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+@@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+ hp->source, lport ? lport : hp->dest,
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+ if (sk2) {
+- inet_twsk_deschedule_put(inet_twsk(sk));
++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
+ }
+ }
+diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
+index 03df986217b7b..9e6f0f1275e2c 100644
+--- a/net/ipv4/netfilter/nft_fib_ipv4.c
++++ b/net/ipv4/netfilter/nft_fib_ipv4.c
+@@ -83,6 +83,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ else
+ oif = NULL;
+
++ if (priv->flags & NFTA_FIB_F_IIF)
++ fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif);
++
+ if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
+ nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
+index 9e8100728d464..c140a36bd1e65 100644
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -1857,7 +1857,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
+ /* __ip6_del_rt does a release, so do a hold here */
+ fib6_info_hold(f6i);
+ ipv6_stub->ip6_del_rt(net, f6i,
+- !net->ipv4.sysctl_nexthop_compat_mode);
++ !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
+ }
+ }
+
+@@ -1899,15 +1899,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
+ /* if any FIB entries reference this nexthop, any dst entries
+ * need to be regenerated
+ */
+-static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
++static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
++ struct nexthop *replaced_nh)
+ {
+ struct fib6_info *f6i;
++ struct nh_group *nhg;
++ int i;
+
+ if (!list_empty(&nh->fi_list))
+ rt_cache_flush(net);
+
+ list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+ ipv6_stub->fib6_update_sernum(net, f6i);
++
++ /* if an IPv6 group was replaced, we have to release all old
++ * dsts to make sure all refcounts are released
++ */
++ if (!replaced_nh->is_group)
++ return;
++
++ /* new dsts must use only the new nexthop group */
++ synchronize_net();
++
++ nhg = rtnl_dereference(replaced_nh->nh_grp);
++ for (i = 0; i < nhg->num_nh; i++) {
++ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
++ struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
++
++ if (nhi->family == AF_INET6)
++ ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
++ }
+ }
+
+ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
+@@ -2247,7 +2268,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
+ err = replace_nexthop_single(net, old, new, extack);
+
+ if (!err) {
+- nh_rt_cache_flush(net, old);
++ nh_rt_cache_flush(net, old, new);
+
+ __remove_nexthop(net, new, NULL);
+ nexthop_put(new);
+@@ -2341,7 +2362,8 @@ out:
+ if (!rc) {
+ nh_base_seq_inc(net);
+ nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
+- if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
++ if (replace_notify &&
++ READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
+ nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
+ }
+
+@@ -2513,7 +2535,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
+ if (!err) {
+ nh->nh_flags = fib_nh->fib_nh_flags;
+ fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
+- fib_nh->fib_nh_scope);
++ !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1);
+ } else {
+ fib_nh_release(net, fib_nh);
+ }
+@@ -2544,11 +2566,15 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
+ /* sets nh_dev if successful */
+ err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
+ extack);
+- if (err)
++ if (err) {
++ /* IPv6 is not enabled, don't call fib6_nh_release */
++ if (err == -EAFNOSUPPORT)
++ goto out;
+ ipv6_stub->fib6_nh_release(fib6_nh);
+- else
++ } else {
+ nh->nh_flags = fib6_nh->fib_nh_flags;
+-
++ }
++out:
+ return err;
+ }
+
+@@ -3196,13 +3222,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
+ &rtm_dump_nexthop_cb, &filter);
+ if (err < 0) {
+ if (likely(skb->len))
+- goto out;
+- goto out_err;
++ err = skb->len;
+ }
+
+-out:
+- err = skb->len;
+-out_err:
+ cb->seq = net->nexthop.seq;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ return err;
+@@ -3342,25 +3364,19 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
+ dd->filter.res_bucket_nh_id != nhge->nh->id)
+ continue;
+
++ dd->ctx->bucket_index = bucket_index;
+ err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+ RTM_NEWNEXTHOPBUCKET, portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+- if (err < 0) {
+- if (likely(skb->len))
+- goto out;
+- goto out_err;
+- }
++ if (err)
++ return err;
+ }
+
+ dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+- bucket_index = 0;
++ dd->ctx->bucket_index = 0;
+
+-out:
+- err = skb->len;
+-out_err:
+- dd->ctx->bucket_index = bucket_index;
+- return err;
++ return 0;
+ }
+
+ static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
+@@ -3409,13 +3425,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
+
+ if (err < 0) {
+ if (likely(skb->len))
+- goto out;
+- goto out_err;
++ err = skb->len;
+ }
+
+-out:
+- err = skb->len;
+-out_err:
+ cb->seq = net->nexthop.seq;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ return err;
+diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
+index 1e44a43acfe2d..c4a2565da2806 100644
+--- a/net/ipv4/ping.c
++++ b/net/ipv4/ping.c
+@@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
+ struct sock *sk = NULL;
+ struct inet_sock *isk;
+ struct hlist_nulls_node *hnode;
+- int dif = skb->dev->ifindex;
++ int dif, sdif;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
++ dif = inet_iif(skb);
++ sdif = inet_sdif(skb);
+ pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
+ (int)ident, &ip_hdr(skb)->daddr, dif);
+ #if IS_ENABLED(CONFIG_IPV6)
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
++ dif = inet6_iif(skb);
++ sdif = inet6_sdif(skb);
+ pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
+ (int)ident, &ipv6_hdr(skb)->daddr, dif);
+ #endif
++ } else {
++ return NULL;
+ }
+
+ read_lock_bh(&ping_table.lock);
+@@ -220,7 +226,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
+ continue;
+ }
+
+- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
++ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
++ sk->sk_bound_dev_if != sdif)
+ continue;
+
+ sock_hold(sk);
+@@ -298,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
+ struct net *net = sock_net(sk);
+ if (sk->sk_family == AF_INET) {
+ struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
++ u32 tb_id = RT_TABLE_LOCAL;
+ int chk_addr_ret;
+
+ if (addr_len < sizeof(*addr))
+@@ -313,8 +321,10 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
+
+ if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+ chk_addr_ret = RTN_LOCAL;
+- else
+- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
++ else {
++ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
++ chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
++ }
+
+ if ((!inet_can_nonlocal_bind(net, isk) &&
+ chk_addr_ret != RTN_LOCAL) ||
+@@ -352,6 +362,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
+ return -ENODEV;
+ }
+ }
++
++ if (!dev && sk->sk_bound_dev_if) {
++ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
++ if (!dev) {
++ rcu_read_unlock();
++ return -ENODEV;
++ }
++ }
+ has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
+ scoped);
+ rcu_read_unlock();
+diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
+index b0d3a09dc84e7..4b9280a3b6732 100644
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -53,7 +53,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ struct net *net = seq->private;
+ int orphans, sockets;
+
+- orphans = percpu_counter_sum_positive(&tcp_orphan_count);
++ orphans = tcp_orphan_count_sum();
+ sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
+
+ socket_seq_show(seq);
+@@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
+
+ seq_printf(seq, "\nIp: %d %d",
+ IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
+- net->ipv4.sysctl_ip_default_ttl);
++ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
+
+ BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
+ snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
+diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
+index bb446e60cf580..f532589d26926 100644
+--- a/net/ipv4/raw.c
++++ b/net/ipv4/raw.c
+@@ -559,6 +559,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ }
+
+ ipcm_init_sk(&ipc, inet);
++ /* Keep backward compat */
++ if (hdrincl)
++ ipc.protocol = IPPROTO_RAW;
+
+ if (msg->msg_controllen) {
+ err = ip_cmsg_send(sk, msg, &ipc, false);
+@@ -626,7 +629,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+
+ flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
+ RT_SCOPE_UNIVERSE,
+- hdrincl ? IPPROTO_RAW : sk->sk_protocol,
++ hdrincl ? ipc.protocol : sk->sk_protocol,
+ inet_sk_flowi_flags(sk) |
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ daddr, saddr, 0, 0, sk->sk_uid);
+@@ -721,6 +724,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+ int ret = -EINVAL;
+ int chk_addr_ret;
+
++ lock_sock(sk);
+ if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
+ goto out;
+
+@@ -740,7 +744,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+ inet->inet_saddr = 0; /* Use device */
+ sk_dst_reset(sk);
+ ret = 0;
+-out: return ret;
++out:
++ release_sock(sk);
++ return ret;
+ }
+
+ /*
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index d6899ab5fb39b..bc6240d327a8f 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -506,6 +506,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
+ }
+ EXPORT_SYMBOL(__ip_select_ident);
+
++static void ip_rt_fix_tos(struct flowi4 *fl4)
++{
++ __u8 tos = RT_FL_TOS(fl4);
++
++ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
++ fl4->flowi4_scope = tos & RTO_ONLINK ?
++ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
++}
++
+ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
+ const struct sock *sk,
+ const struct iphdr *iph,
+@@ -831,6 +840,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
+ rt = (struct rtable *) dst;
+
+ __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
++ ip_rt_fix_tos(&fl4);
+ __ip_do_redirect(rt, skb, &fl4, true);
+ }
+
+@@ -1055,6 +1065,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct flowi4 fl4;
+
+ ip_rt_build_flow_key(&fl4, sk, skb);
++ ip_rt_fix_tos(&fl4);
+
+ /* Don't make lookup fail for bridged encapsulations */
+ if (skb && netif_is_any_bridge_port(skb->dev))
+@@ -1129,6 +1140,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+ goto out;
+
+ new = true;
++ } else {
++ ip_rt_fix_tos(&fl4);
+ }
+
+ __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
+@@ -1391,7 +1404,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
+ struct fib_info *fi = res->fi;
+ u32 mtu = 0;
+
+- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
++ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
+ fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
+ mtu = fi->fib_mtu;
+
+@@ -1714,6 +1727,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ unsigned int flags = RTCF_MULTICAST;
+ struct rtable *rth;
++ bool no_policy;
+ u32 itag = 0;
+ int err;
+
+@@ -1724,8 +1738,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ if (our)
+ flags |= RTCF_LOCAL;
+
++ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
++ if (no_policy)
++ IPCB(skb)->flags |= IPSKB_NOPOLICY;
++
+ rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
+- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
++ no_policy, false);
+ if (!rth)
+ return -ENOBUFS;
+
+@@ -1741,6 +1759,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ #endif
+ RT_CACHE_STAT_INC(in_slow_mc);
+
++ skb_dst_drop(skb);
+ skb_dst_set(skb, &rth->dst);
+ return 0;
+ }
+@@ -1783,7 +1802,7 @@ static int __mkroute_input(struct sk_buff *skb,
+ struct rtable *rth;
+ int err;
+ struct in_device *out_dev;
+- bool do_cache;
++ bool do_cache, no_policy;
+ u32 itag = 0;
+
+ /* get a working reference to the output device */
+@@ -1828,6 +1847,10 @@ static int __mkroute_input(struct sk_buff *skb,
+ }
+ }
+
++ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
++ if (no_policy)
++ IPCB(skb)->flags |= IPSKB_NOPOLICY;
++
+ fnhe = find_exception(nhc, daddr);
+ if (do_cache) {
+ if (fnhe)
+@@ -1840,8 +1863,7 @@ static int __mkroute_input(struct sk_buff *skb,
+ }
+ }
+
+- rth = rt_dst_alloc(out_dev->dev, 0, res->type,
+- IN_DEV_ORCONF(in_dev, NOPOLICY),
++ rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy,
+ IN_DEV_ORCONF(out_dev, NOXFRM));
+ if (!rth) {
+ err = -ENOBUFS;
+@@ -1907,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
+ const struct sk_buff *skb,
+ bool *p_has_inner)
+ {
+- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
++ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
+ struct flow_keys keys, hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+@@ -1936,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
+ const struct sk_buff *skb,
+ bool has_inner)
+ {
+- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
++ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
+ struct flow_keys keys, hash_keys;
+
+ /* We assume the packet carries an encapsulation, but if none was
+@@ -1996,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net,
+ static u32 fib_multipath_custom_hash_fl4(const struct net *net,
+ const struct flowi4 *fl4)
+ {
+- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
++ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
+ struct flow_keys hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+@@ -2026,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ struct flow_keys hash_keys;
+ u32 mhash = 0;
+
+- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
++ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+@@ -2129,6 +2151,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
+ int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
+
+ fib_select_multipath(res, h);
++ IPCB(skb)->flags |= IPSKB_MULTIPATH;
+ }
+ #endif
+
+@@ -2216,6 +2239,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ struct rtable *rth;
+ struct flowi4 fl4;
+ bool do_cache = true;
++ bool no_policy;
+
+ /* IP on this device is disabled. */
+
+@@ -2333,6 +2357,10 @@ brd_input:
+ RT_CACHE_STAT_INC(in_brd);
+
+ local_input:
++ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
++ if (no_policy)
++ IPCB(skb)->flags |= IPSKB_NOPOLICY;
++
+ do_cache &= res->fi && !itag;
+ if (do_cache) {
+ struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+@@ -2347,7 +2375,7 @@ local_input:
+
+ rth = rt_dst_alloc(ip_rt_get_dev(net, res),
+ flags | RTCF_LOCAL, res->type,
+- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
++ no_policy, false);
+ if (!rth)
+ goto e_nobufs;
+
+@@ -2609,7 +2637,6 @@ add:
+ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+ const struct sk_buff *skb)
+ {
+- __u8 tos = RT_FL_TOS(fl4);
+ struct fib_result res = {
+ .type = RTN_UNSPEC,
+ .fi = NULL,
+@@ -2619,9 +2646,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+ struct rtable *rth;
+
+ fl4->flowi4_iif = LOOPBACK_IFINDEX;
+- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
+- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
++ ip_rt_fix_tos(fl4);
+
+ rcu_read_lock();
+ rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
+@@ -3401,8 +3426,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ fa->fa_tos == fri.tos &&
+ fa->fa_info == res.fi &&
+ fa->fa_type == fri.type) {
+- fri.offload = fa->offload;
+- fri.trap = fa->trap;
++ fri.offload = READ_ONCE(fa->offload);
++ fri.trap = READ_ONCE(fa->trap);
+ break;
+ }
+ }
+diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
+index 33792cf55a793..3aab914eb1039 100644
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -249,12 +249,12 @@ bool cookie_timestamp_decode(const struct net *net,
+ return true;
+ }
+
+- if (!net->ipv4.sysctl_tcp_timestamps)
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
+ return false;
+
+ tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
+
+- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
++ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
+ return false;
+
+ if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
+@@ -263,7 +263,7 @@ bool cookie_timestamp_decode(const struct net *net,
+ tcp_opt->wscale_ok = 1;
+ tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
+
+- return net->ipv4.sysctl_tcp_window_scaling != 0;
++ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
+ }
+ EXPORT_SYMBOL(cookie_timestamp_decode);
+
+@@ -275,7 +275,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
+ if (!ecn_ok)
+ return false;
+
+- if (net->ipv4.sysctl_tcp_ecn)
++ if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
+ return true;
+
+ return dst_feature(dst, RTAX_FEATURE_ECN);
+@@ -283,22 +283,26 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
+ EXPORT_SYMBOL(cookie_ecn_ok);
+
+ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
++ const struct tcp_request_sock_ops *af_ops,
+ struct sock *sk,
+ struct sk_buff *skb)
+ {
+ struct tcp_request_sock *treq;
+ struct request_sock *req;
+
+-#ifdef CONFIG_MPTCP
+ if (sk_is_mptcp(sk))
+- ops = &mptcp_subflow_request_sock_ops;
+-#endif
++ req = mptcp_subflow_reqsk_alloc(ops, sk, false);
++ else
++ req = inet_reqsk_alloc(ops, sk, false);
+
+- req = inet_reqsk_alloc(ops, sk, false);
+ if (!req)
+ return NULL;
+
+ treq = tcp_rsk(req);
++
++ /* treq->af_specific might be used to perform TCP_MD5 lookup */
++ treq->af_specific = af_ops;
++
+ treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
+ #if IS_ENABLED(CONFIG_MPTCP)
+ treq->is_mptcp = sk_is_mptcp(sk);
+@@ -337,7 +341,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+ struct flowi4 fl4;
+ u32 tsoff = 0;
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
++ !th->ack || th->rst)
+ goto out;
+
+ if (tcp_synq_no_recent_overflow(sk))
+@@ -366,7 +371,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+ goto out;
+
+ ret = NULL;
+- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
++ req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
++ &tcp_request_sock_ipv4_ops, sk, skb);
+ if (!req)
+ goto out;
+
+diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
+index 6f1e64d492328..1f22e72074fdc 100644
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -38,6 +38,7 @@ static int ip_local_port_range_min[] = { 1, 1 };
+ static int ip_local_port_range_max[] = { 65535, 65535 };
+ static int tcp_adv_win_scale_min = -31;
+ static int tcp_adv_win_scale_max = 31;
++static int tcp_app_win_max = 31;
+ static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
+ static int tcp_min_snd_mss_max = 65535;
+ static int ip_privileged_port_min;
+@@ -97,7 +98,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
+ * port limit.
+ */
+ if ((range[1] < range[0]) ||
+- (range[0] < net->ipv4.sysctl_ip_prot_sock))
++ (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
+ ret = -EINVAL;
+ else
+ set_local_port_range(net, range);
+@@ -123,7 +124,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
+ .extra2 = &ip_privileged_port_max,
+ };
+
+- pports = net->ipv4.sysctl_ip_prot_sock;
++ pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+@@ -135,7 +136,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
+ if (range[0] < pports)
+ ret = -EINVAL;
+ else
+- net->ipv4.sysctl_ip_prot_sock = pports;
++ WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
+ }
+
+ return ret;
+@@ -363,61 +364,6 @@ bad_key:
+ return ret;
+ }
+
+-static void proc_configure_early_demux(int enabled, int protocol)
+-{
+- struct net_protocol *ipprot;
+-#if IS_ENABLED(CONFIG_IPV6)
+- struct inet6_protocol *ip6prot;
+-#endif
+-
+- rcu_read_lock();
+-
+- ipprot = rcu_dereference(inet_protos[protocol]);
+- if (ipprot)
+- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
+- NULL;
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+- ip6prot = rcu_dereference(inet6_protos[protocol]);
+- if (ip6prot)
+- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
+- NULL;
+-#endif
+- rcu_read_unlock();
+-}
+-
+-static int proc_tcp_early_demux(struct ctl_table *table, int write,
+- void *buffer, size_t *lenp, loff_t *ppos)
+-{
+- int ret = 0;
+-
+- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+-
+- if (write && !ret) {
+- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+-
+- proc_configure_early_demux(enabled, IPPROTO_TCP);
+- }
+-
+- return ret;
+-}
+-
+-static int proc_udp_early_demux(struct ctl_table *table, int write,
+- void *buffer, size_t *lenp, loff_t *ppos)
+-{
+- int ret = 0;
+-
+- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+-
+- if (write && !ret) {
+- int enabled = init_net.ipv4.sysctl_udp_early_demux;
+-
+- proc_configure_early_demux(enabled, IPPROTO_UDP);
+- }
+-
+- return ret;
+-}
+-
+ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
+ int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+@@ -639,6 +585,8 @@ static struct ctl_table ipv4_net_table[] = {
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = SYSCTL_ONE
+ },
+ {
+ .procname = "icmp_errors_use_inbound_ifaddr",
+@@ -646,6 +594,8 @@ static struct ctl_table ipv4_net_table[] = {
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = SYSCTL_ONE
+ },
+ {
+ .procname = "icmp_ratelimit",
+@@ -685,6 +635,8 @@ static struct ctl_table ipv4_net_table[] = {
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "tcp_ecn_fallback",
+@@ -692,6 +644,8 @@ static struct ctl_table ipv4_net_table[] = {
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "ip_dynaddr",
+@@ -712,14 +666,14 @@ static struct ctl_table ipv4_net_table[] = {
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+- .proc_handler = proc_udp_early_demux
++ .proc_handler = proc_dou8vec_minmax,
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+- .proc_handler = proc_tcp_early_demux
++ .proc_handler = proc_dou8vec_minmax,
+ },
+ {
+ .procname = "nexthop_compat_mode",
+@@ -1215,6 +1169,8 @@ static struct ctl_table ipv4_net_table[] = {
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = &tcp_app_win_max,
+ },
+ {
+ .procname = "tcp_adv_win_scale",
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index f5c336f8b0c8e..a91cf000bb61b 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -287,8 +287,8 @@ enum {
+ TCP_CMSG_TS = 2
+ };
+
+-struct percpu_counter tcp_orphan_count;
+-EXPORT_SYMBOL_GPL(tcp_orphan_count);
++DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
++EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
+
+ long sysctl_tcp_mem[3] __read_mostly;
+ EXPORT_SYMBOL(sysctl_tcp_mem);
+@@ -435,10 +435,11 @@ void tcp_init_sock(struct sock *sk)
+ * algorithms that we must have the following bandaid to talk
+ * efficiently to them. -DaveM
+ */
+- tp->snd_cwnd = TCP_INIT_CWND;
++ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
+
+ /* There's a bubble in the pipe until at least the first ACK. */
+ tp->app_limited = ~0U;
++ tp->rate_app_limited = 1;
+
+ /* See draft-stevens-tcpca-spec-01 for discussion of the
+ * initialization of these values.
+@@ -447,7 +448,7 @@ void tcp_init_sock(struct sock *sk)
+ tp->snd_cwnd_clamp = ~0;
+ tp->mss_cache = TCP_MSS_DEFAULT;
+
+- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
++ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
+ tcp_assign_congestion_control(sk);
+
+ tp->tsoffset = 0;
+@@ -458,8 +459,8 @@ void tcp_init_sock(struct sock *sk)
+
+ icsk->icsk_sync_mss = tcp_sync_mss;
+
+- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
++ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+
+ sk_sockets_allocated_inc(sk);
+ sk->sk_route_forced_caps = NETIF_F_GSO;
+@@ -501,6 +502,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+ __poll_t mask;
+ struct sock *sk = sock->sk;
+ const struct tcp_sock *tp = tcp_sk(sk);
++ u8 shutdown;
+ int state;
+
+ sock_poll_wait(file, sock, wait);
+@@ -543,9 +545,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+ * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
+ * blocking on fresh not-connected or disconnected socket. --ANK
+ */
+- if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
++ shutdown = READ_ONCE(sk->sk_shutdown);
++ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+ mask |= EPOLLHUP;
+- if (sk->sk_shutdown & RCV_SHUTDOWN)
++ if (shutdown & RCV_SHUTDOWN)
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+
+ /* Connected or passive Fast Open socket? */
+@@ -561,7 +564,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+ if (tcp_stream_is_readable(sk, target))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
++ if (!(shutdown & SEND_SHUTDOWN)) {
+ if (__sk_stream_is_writeable(sk, 1)) {
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ } else { /* send SIGIO later */
+@@ -644,7 +647,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+ }
+ EXPORT_SYMBOL(tcp_ioctl);
+
+-static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
++void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
+ {
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
+ tp->pushed_seq = tp->write_seq;
+@@ -655,7 +658,7 @@ static inline bool forced_push(const struct tcp_sock *tp)
+ return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
+ }
+
+-static void skb_entail(struct sock *sk, struct sk_buff *skb)
++void tcp_skb_entail(struct sock *sk, struct sk_buff *skb)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+@@ -694,7 +697,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
+ int size_goal)
+ {
+ return skb->len < size_goal &&
+- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
+ !tcp_rtx_queue_empty(sk) &&
+ refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
+ }
+@@ -952,7 +955,7 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
+ */
+ void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
+ {
+- if (skb && !skb->len) {
++ if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
+ tcp_unlink_write_queue(skb, sk);
+ if (tcp_write_queue_empty(sk))
+ tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+@@ -982,7 +985,7 @@ new_segment:
+ #ifdef CONFIG_TLS_DEVICE
+ skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+ #endif
+- skb_entail(sk, skb);
++ tcp_skb_entail(sk, skb);
+ copy = size_goal;
+ }
+
+@@ -991,7 +994,7 @@ new_segment:
+
+ i = skb_shinfo(skb)->nr_frags;
+ can_coalesce = skb_can_coalesce(skb, i, page, offset);
+- if (!can_coalesce && i >= sysctl_max_skb_frags) {
++ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
+ tcp_mark_push(tp, skb);
+ goto new_segment;
+ }
+@@ -1002,7 +1005,7 @@ new_segment:
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ } else {
+ get_page(page);
+- skb_fill_page_desc(skb, i, page, offset, copy);
++ skb_fill_page_desc_noacc(skb, i, page, offset, copy);
+ }
+
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+@@ -1159,7 +1162,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+ struct sockaddr *uaddr = msg->msg_name;
+ int err, flags;
+
+- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
++ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
++ TFO_CLIENT_ENABLE) ||
+ (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+ uaddr->sa_family == AF_UNSPEC))
+ return -EOPNOTSUPP;
+@@ -1311,7 +1315,7 @@ new_segment:
+ process_backlog++;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+- skb_entail(sk, skb);
++ tcp_skb_entail(sk, skb);
+ copy = size_goal;
+
+ /* All packets are restored as if they have
+@@ -1343,7 +1347,7 @@ new_segment:
+
+ if (!skb_can_coalesce(skb, i, pfrag->page,
+ pfrag->offset)) {
+- if (i >= sysctl_max_skb_frags) {
++ if (i >= READ_ONCE(sysctl_max_skb_frags)) {
+ tcp_mark_push(tp, skb);
+ goto new_segment;
+ }
+@@ -1663,11 +1667,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ if (!copied)
+ copied = used;
+ break;
+- } else if (used <= len) {
+- seq += used;
+- copied += used;
+- offset += used;
+ }
++ if (WARN_ON_ONCE(used > len))
++ used = len;
++ seq += used;
++ copied += used;
++ offset += used;
++
+ /* If recv_actor drops the lock (e.g. TCP splice
+ * receive) the skb pointer might be invalid when
+ * getting here: tcp_collapse might have deleted it
+@@ -1719,7 +1725,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ cap = sk->sk_rcvbuf >> 1;
+ else
+- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
++ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ val = min(val, cap);
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+
+@@ -1776,6 +1782,9 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
+ {
+ skb_frag_t *frag;
+
++ if (unlikely(offset_skb >= skb->len))
++ return NULL;
++
+ offset_skb -= skb_headlen(skb);
+ if ((int)offset_skb < 0 || skb_has_frag_list(skb))
+ return NULL;
+@@ -2687,11 +2696,37 @@ void tcp_shutdown(struct sock *sk, int how)
+ }
+ EXPORT_SYMBOL(tcp_shutdown);
+
++int tcp_orphan_count_sum(void)
++{
++ int i, total = 0;
++
++ for_each_possible_cpu(i)
++ total += per_cpu(tcp_orphan_count, i);
++
++ return max(total, 0);
++}
++
++static int tcp_orphan_cache;
++static struct timer_list tcp_orphan_timer;
++#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100)
++
++static void tcp_orphan_update(struct timer_list *unused)
++{
++ WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum());
++ mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
++}
++
++static bool tcp_too_many_orphans(int shift)
++{
++ return READ_ONCE(tcp_orphan_cache) << shift >
++ READ_ONCE(sysctl_tcp_max_orphans);
++}
++
+ bool tcp_check_oom(struct sock *sk, int shift)
+ {
+ bool too_many_orphans, out_of_socket_memory;
+
+- too_many_orphans = tcp_too_many_orphans(sk, shift);
++ too_many_orphans = tcp_too_many_orphans(shift);
+ out_of_socket_memory = tcp_out_of_memory(sk);
+
+ if (too_many_orphans)
+@@ -2707,7 +2742,7 @@ void __tcp_close(struct sock *sk, long timeout)
+ int data_was_unread = 0;
+ int state;
+
+- sk->sk_shutdown = SHUTDOWN_MASK;
++ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ tcp_set_state(sk, TCP_CLOSE);
+@@ -2800,7 +2835,7 @@ adjudge_to_death:
+ /* remove backlog if any, without releasing ownership. */
+ __release_sock(sk);
+
+- percpu_counter_inc(sk->sk_prot->orphan_count);
++ this_cpu_inc(tcp_orphan_count);
+
+ /* Have we already been destroyed by a softirq or backlog? */
+ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
+@@ -2937,6 +2972,12 @@ int tcp_disconnect(struct sock *sk, int flags)
+ int old_state = sk->sk_state;
+ u32 seq;
+
++ /* Deny disconnect if other threads are blocked in sk_wait_event()
++ * or inet_wait_for_connect().
++ */
++ if (sk->sk_wait_pending)
++ return -EBUSY;
++
+ if (old_state != TCP_CLOSE)
+ tcp_set_state(sk, TCP_CLOSE);
+
+@@ -2973,7 +3014,7 @@ int tcp_disconnect(struct sock *sk, int flags)
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
+
+- sk->sk_shutdown = 0;
++ WRITE_ONCE(sk->sk_shutdown, 0);
+ sock_reset_flag(sk, SOCK_DONE);
+ tp->srtt_us = 0;
+ tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+@@ -2991,8 +3032,10 @@ int tcp_disconnect(struct sock *sk, int flags)
+ icsk->icsk_rto_min = TCP_RTO_MIN;
+ icsk->icsk_delack_max = TCP_DELACK_MAX;
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+- tp->snd_cwnd = TCP_INIT_CWND;
++ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
+ tp->snd_cwnd_cnt = 0;
++ tp->is_cwnd_limited = 0;
++ tp->max_packets_out = 0;
+ tp->window_clamp = 0;
+ tp->delivered = 0;
+ tp->delivered_ce = 0;
+@@ -3011,8 +3054,7 @@ int tcp_disconnect(struct sock *sk, int flags)
+ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
+ memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
+ __sk_dst_reset(sk);
+- dst_release(sk->sk_rx_dst);
+- sk->sk_rx_dst = NULL;
++ dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL));
+ tcp_saved_syn_free(tp);
+ tp->compressed_ack = 0;
+ tp->segs_in = 0;
+@@ -3033,6 +3075,7 @@ int tcp_disconnect(struct sock *sk, int flags)
+ tp->last_oow_ack_time = 0;
+ /* There's a bubble in the pipe until at least the first ACK. */
+ tp->app_limited = ~0U;
++ tp->rate_app_limited = 1;
+ tp->rack.mstamp = 0;
+ tp->rack.advanced = 0;
+ tp->rack.reo_wnd_steps = 1;
+@@ -3253,7 +3296,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
+ return -EINVAL;
+
+ lock_sock(sk);
+- inet_csk(sk)->icsk_syn_retries = val;
++ WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
+ release_sock(sk);
+ return 0;
+ }
+@@ -3262,7 +3305,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt);
+ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+ {
+ lock_sock(sk);
+- inet_csk(sk)->icsk_user_timeout = val;
++ WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
+ release_sock(sk);
+ }
+ EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+@@ -3274,7 +3317,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
+ if (val < 1 || val > MAX_TCP_KEEPIDLE)
+ return -EINVAL;
+
+- tp->keepalive_time = val * HZ;
++ /* Paired with WRITE_ONCE() in keepalive_time_when() */
++ WRITE_ONCE(tp->keepalive_time, val * HZ);
+ if (sock_flag(sk, SOCK_KEEPOPEN) &&
+ !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+ u32 elapsed = keepalive_time_elapsed(tp);
+@@ -3306,7 +3350,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
+ return -EINVAL;
+
+ lock_sock(sk);
+- tcp_sk(sk)->keepalive_intvl = val * HZ;
++ WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
+ release_sock(sk);
+ return 0;
+ }
+@@ -3318,7 +3362,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
+ return -EINVAL;
+
+ lock_sock(sk);
+- tcp_sk(sk)->keepalive_probes = val;
++ /* Paired with READ_ONCE() in keepalive_probes() */
++ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
+ release_sock(sk);
+ return 0;
+ }
+@@ -3503,7 +3548,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ case TCP_REPAIR_OPTIONS:
+ if (!tp->repair)
+ err = -EINVAL;
+- else if (sk->sk_state == TCP_ESTABLISHED)
++ else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent)
+ err = tcp_repair_options_est(sk, optval, optlen);
+ else
+ err = -EPERM;
+@@ -3520,19 +3565,19 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ err = -EINVAL;
+ else
+- tp->keepalive_intvl = val * HZ;
++ WRITE_ONCE(tp->keepalive_intvl, val * HZ);
+ break;
+ case TCP_KEEPCNT:
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ err = -EINVAL;
+ else
+- tp->keepalive_probes = val;
++ WRITE_ONCE(tp->keepalive_probes, val);
+ break;
+ case TCP_SYNCNT:
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ err = -EINVAL;
+ else
+- icsk->icsk_syn_retries = val;
++ WRITE_ONCE(icsk->icsk_syn_retries, val);
+ break;
+
+ case TCP_SAVE_SYN:
+@@ -3545,18 +3590,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+
+ case TCP_LINGER2:
+ if (val < 0)
+- tp->linger2 = -1;
++ WRITE_ONCE(tp->linger2, -1);
+ else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+- tp->linger2 = TCP_FIN_TIMEOUT_MAX;
++ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
+ else
+- tp->linger2 = val * HZ;
++ WRITE_ONCE(tp->linger2, val * HZ);
+ break;
+
+ case TCP_DEFER_ACCEPT:
+ /* Translate value in seconds to number of retransmits */
+- icsk->icsk_accept_queue.rskq_defer_accept =
+- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+- TCP_RTO_MAX / HZ);
++ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
++ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
++ TCP_RTO_MAX / HZ));
+ break;
+
+ case TCP_WINDOW_CLAMP:
+@@ -3580,7 +3625,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ if (val < 0)
+ err = -EINVAL;
+ else
+- icsk->icsk_user_timeout = val;
++ WRITE_ONCE(icsk->icsk_user_timeout, val);
+ break;
+
+ case TCP_FASTOPEN:
+@@ -3596,7 +3641,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ case TCP_FASTOPEN_CONNECT:
+ if (val > 1 || val < 0) {
+ err = -EINVAL;
+- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
++ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
++ TFO_CLIENT_ENABLE) {
+ if (sk->sk_state == TCP_CLOSE)
+ tp->fastopen_connect = val;
+ else
+@@ -3623,7 +3669,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ err = tcp_repair_set_window(tp, optval, optlen);
+ break;
+ case TCP_NOTSENT_LOWAT:
+- tp->notsent_lowat = val;
++ WRITE_ONCE(tp->notsent_lowat, val);
+ sk->sk_write_space(sk);
+ break;
+ case TCP_INQ:
+@@ -3635,7 +3681,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ case TCP_TX_DELAY:
+ if (val)
+ tcp_enable_tx_delay();
+- tp->tcp_tx_delay = val;
++ WRITE_ONCE(tp->tcp_tx_delay, val);
+ break;
+ default:
+ err = -ENOPROTOOPT;
+@@ -3703,7 +3749,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
+ info->tcpi_max_pacing_rate = rate64;
+
+ info->tcpi_reordering = tp->reordering;
+- info->tcpi_snd_cwnd = tp->snd_cwnd;
++ info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);
+
+ if (info->tcpi_state == TCP_LISTEN) {
+ /* listeners aliased fields :
+@@ -3872,7 +3918,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
+ rate64 = tcp_compute_delivery_rate(tp);
+ nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
+
+- nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
++ nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
+ nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
+ nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
+
+@@ -3923,7 +3969,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+ switch (optname) {
+ case TCP_MAXSEG:
+ val = tp->mss_cache;
+- if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
++ if (tp->rx_opt.user_mss &&
++ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ val = tp->rx_opt.user_mss;
+ if (tp->repair)
+ val = tp->rx_opt.mss_clamp;
+@@ -3944,16 +3991,18 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+ val = keepalive_probes(tp);
+ break;
+ case TCP_SYNCNT:
+- val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
++ val = READ_ONCE(icsk->icsk_syn_retries) ? :
++ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
+ break;
+ case TCP_LINGER2:
+- val = tp->linger2;
++ val = READ_ONCE(tp->linger2);
+ if (val >= 0)
+- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
++ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+ break;
+ case TCP_DEFER_ACCEPT:
+- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
++ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
++ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
++ TCP_RTO_MAX / HZ);
+ break;
+ case TCP_WINDOW_CLAMP:
+ val = tp->window_clamp;
+@@ -4089,11 +4138,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+ break;
+
+ case TCP_USER_TIMEOUT:
+- val = icsk->icsk_user_timeout;
++ val = READ_ONCE(icsk->icsk_user_timeout);
+ break;
+
+ case TCP_FASTOPEN:
+- val = icsk->icsk_accept_queue.fastopenq.max_qlen;
++ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
+ break;
+
+ case TCP_FASTOPEN_CONNECT:
+@@ -4105,14 +4154,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
+ break;
+
+ case TCP_TX_DELAY:
+- val = tp->tcp_tx_delay;
++ val = READ_ONCE(tp->tcp_tx_delay);
+ break;
+
+ case TCP_TIMESTAMP:
+ val = tcp_time_stamp_raw() + tp->tsoffset;
+ break;
+ case TCP_NOTSENT_LOWAT:
+- val = tp->notsent_lowat;
++ val = READ_ONCE(tp->notsent_lowat);
+ break;
+ case TCP_INQ:
+ val = tp->recvmsg_inq;
+@@ -4296,12 +4345,16 @@ static void __tcp_alloc_md5sig_pool(void)
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
+ */
+ smp_wmb();
+- tcp_md5sig_pool_populated = true;
++ /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
++ * and tcp_get_md5sig_pool().
++ */
++ WRITE_ONCE(tcp_md5sig_pool_populated, true);
+ }
+
+ bool tcp_alloc_md5sig_pool(void)
+ {
+- if (unlikely(!tcp_md5sig_pool_populated)) {
++ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
++ if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
+ mutex_lock(&tcp_md5sig_mutex);
+
+ if (!tcp_md5sig_pool_populated) {
+@@ -4312,7 +4365,8 @@ bool tcp_alloc_md5sig_pool(void)
+
+ mutex_unlock(&tcp_md5sig_mutex);
+ }
+- return tcp_md5sig_pool_populated;
++ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
++ return READ_ONCE(tcp_md5sig_pool_populated);
+ }
+ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
+
+@@ -4328,7 +4382,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
+ {
+ local_bh_disable();
+
+- if (tcp_md5sig_pool_populated) {
++ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
++ if (READ_ONCE(tcp_md5sig_pool_populated)) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+@@ -4410,7 +4465,7 @@ void tcp_done(struct sock *sk)
+ if (req)
+ reqsk_fastopen_remove(sk, req, false);
+
+- sk->sk_shutdown = SHUTDOWN_MASK;
++ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_state_change(sk);
+@@ -4502,7 +4557,10 @@ void __init tcp_init(void)
+ sizeof_field(struct sk_buff, cb));
+
+ percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
+- percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
++
++ timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
++ mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
++
+ inet_hashinfo_init(&tcp_hashinfo);
+ inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
+ thash_entries, 21, /* one slot per 2 MB*/
+diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
+index 6274462b86b4b..c5ee83654db1c 100644
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -274,7 +274,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+- bw = (u64)tp->snd_cwnd * BW_UNIT;
++ bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
+ do_div(bw, rtt_us);
+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+ }
+@@ -321,9 +321,9 @@ static void bbr_save_cwnd(struct sock *sk)
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
+- bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
++ bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */
+ else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
+- bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
++ bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
+ }
+
+ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+@@ -480,7 +480,7 @@ static bool bbr_set_cwnd_to_recover_or_restore(
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
+- u32 cwnd = tp->snd_cwnd;
++ u32 cwnd = tcp_snd_cwnd(tp);
+
+ /* An ACK for P pkts should release at most 2*P packets. We do this
+ * in two steps. First, here we deduct the number of lost packets.
+@@ -518,7 +518,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+- u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
++ u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
+
+ if (!acked)
+ goto done; /* no packet fully ACKed; just apply caps */
+@@ -542,9 +542,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
+ cwnd = max(cwnd, bbr_cwnd_min_target);
+
+ done:
+- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
++ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */
+ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
+- tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
++ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
+ }
+
+ /* End cycle phase if it's time and/or we hit the phase's in-flight target. */
+@@ -854,7 +854,7 @@ static void bbr_update_ack_aggregation(struct sock *sk,
+ bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
+ bbr->ack_epoch_acked + rs->acked_sacked);
+ extra_acked = bbr->ack_epoch_acked - expected_acked;
+- extra_acked = min(extra_acked, tp->snd_cwnd);
++ extra_acked = min(extra_acked, tcp_snd_cwnd(tp));
+ if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
+ bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
+ }
+@@ -912,7 +912,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
+ return;
+
+ bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */
+- tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
++ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
+ bbr_reset_mode(sk);
+ }
+
+@@ -1091,7 +1091,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
+ bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
+ bbr->full_bw_cnt = 0;
+ bbr_reset_lt_bw_sampling(sk);
+- return tcp_sk(sk)->snd_cwnd;
++ return tcp_snd_cwnd(tcp_sk(sk));
+ }
+
+ /* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
+diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
+index f5f588b1f6e9d..58358bf92e1b8 100644
+--- a/net/ipv4/tcp_bic.c
++++ b/net/ipv4/tcp_bic.c
+@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ if (!acked)
+ return;
+ }
+- bictcp_update(ca, tp->snd_cwnd);
++ bictcp_update(ca, tcp_snd_cwnd(tp));
+ tcp_cong_avoid_ai(tp, ca->cnt, acked);
+ }
+
+@@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
+ ca->epoch_start = 0; /* end of epoch */
+
+ /* Wmax and fast convergence */
+- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
++ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
++ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
+ / (2 * BICTCP_BETA_SCALE);
+ else
+- ca->last_max_cwnd = tp->snd_cwnd;
++ ca->last_max_cwnd = tcp_snd_cwnd(tp);
+
+- if (tp->snd_cwnd <= low_window)
+- return max(tp->snd_cwnd >> 1U, 2U);
++ if (tcp_snd_cwnd(tp) <= low_window)
++ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
+ else
+- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
++ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
+ }
+
+ static void bictcp_state(struct sock *sk, u8 new_state)
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index 5f4d6f45d87f7..e3a9477293ce4 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -6,6 +6,7 @@
+ #include <linux/bpf.h>
+ #include <linux/init.h>
+ #include <linux/wait.h>
++#include <linux/util_macros.h>
+
+ #include <net/inet_common.h>
+ #include <net/tls.h>
+@@ -45,8 +46,11 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+ tmp->sg.end = i;
+ if (apply) {
+ apply_bytes -= size;
+- if (!apply_bytes)
++ if (!apply_bytes) {
++ if (sge->length)
++ sk_msg_iter_var_prev(i);
+ break;
++ }
+ }
+ } while (i != msg->sg.end);
+
+@@ -131,17 +135,15 @@ static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
+ return ret;
+ }
+
+-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
+- u32 bytes, int flags)
++int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
++ struct sk_msg *msg, u32 bytes, int flags)
+ {
+- bool ingress = sk_msg_to_ingress(msg);
+ struct sk_psock *psock = sk_psock_get(sk);
+ int ret;
+
+- if (unlikely(!psock)) {
+- sk_msg_free(sk, msg);
+- return 0;
+- }
++ if (unlikely(!psock))
++ return -EPIPE;
++
+ ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
+ tcp_bpf_push_locked(sk, msg, bytes, flags, false);
+ sk_psock_put(sk, psock);
+@@ -166,12 +168,77 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ ret = sk_wait_event(sk, &timeo,
+ !list_empty(&psock->ingress_msg) ||
+- !skb_queue_empty(&sk->sk_receive_queue), &wait);
++ !skb_queue_empty_lockless(&sk->sk_receive_queue), &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+ }
+
++static int tcp_bpf_recvmsg_parser(struct sock *sk,
++ struct msghdr *msg,
++ size_t len,
++ int nonblock,
++ int flags,
++ int *addr_len)
++{
++ struct sk_psock *psock;
++ int copied;
++
++ if (unlikely(flags & MSG_ERRQUEUE))
++ return inet_recv_error(sk, msg, len, addr_len);
++
++ if (!len)
++ return 0;
++
++ psock = sk_psock_get(sk);
++ if (unlikely(!psock))
++ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
++
++ lock_sock(sk);
++msg_bytes_ready:
++ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
++ if (!copied) {
++ long timeo;
++ int data;
++
++ if (sock_flag(sk, SOCK_DONE))
++ goto out;
++
++ if (sk->sk_err) {
++ copied = sock_error(sk);
++ goto out;
++ }
++
++ if (sk->sk_shutdown & RCV_SHUTDOWN)
++ goto out;
++
++ if (sk->sk_state == TCP_CLOSE) {
++ copied = -ENOTCONN;
++ goto out;
++ }
++
++ timeo = sock_rcvtimeo(sk, nonblock);
++ if (!timeo) {
++ copied = -EAGAIN;
++ goto out;
++ }
++
++ if (signal_pending(current)) {
++ copied = sock_intr_errno(timeo);
++ goto out;
++ }
++
++ data = tcp_msg_wait_data(sk, psock, timeo);
++ if (data && !sk_psock_queue_empty(psock))
++ goto msg_bytes_ready;
++ copied = -EAGAIN;
++ }
++out:
++ release_sock(sk);
++ sk_psock_put(sk, psock);
++ return copied;
++}
++
+ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int nonblock, int flags, int *addr_len)
+ {
+@@ -181,6 +248,9 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
++ if (!len)
++ return 0;
++
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+@@ -216,10 +286,10 @@ msg_bytes_ready:
+ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
+ struct sk_msg *msg, int *copied, int flags)
+ {
+- bool cork = false, enospc = sk_msg_full(msg);
++ bool cork = false, enospc = sk_msg_full(msg), redir_ingress;
+ struct sock *sk_redir;
+- u32 tosend, delta = 0;
+- u32 eval = __SK_NONE;
++ u32 tosend, origsize, sent, delta = 0;
++ u32 eval;
+ int ret;
+
+ more_data:
+@@ -250,6 +320,7 @@ more_data:
+ tosend = msg->sg.size;
+ if (psock->apply_bytes && psock->apply_bytes < tosend)
+ tosend = psock->apply_bytes;
++ eval = __SK_NONE;
+
+ switch (psock->eval) {
+ case __SK_PASS:
+@@ -261,6 +332,7 @@ more_data:
+ sk_msg_apply_bytes(psock, tosend);
+ break;
+ case __SK_REDIRECT:
++ redir_ingress = psock->redir_ingress;
+ sk_redir = psock->sk_redir;
+ sk_msg_apply_bytes(psock, tosend);
+ if (!psock->apply_bytes) {
+@@ -276,7 +348,10 @@ more_data:
+ sk_msg_return(sk, msg, tosend);
+ release_sock(sk);
+
+- ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
++ origsize = msg->sg.size;
++ ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
++ msg, tosend, flags);
++ sent = origsize - msg->sg.size;
+
+ if (eval == __SK_REDIRECT)
+ sock_put(sk_redir);
+@@ -313,8 +388,11 @@ more_data:
+ }
+ if (msg &&
+ msg->sg.data[msg->sg.start].page_link &&
+- msg->sg.data[msg->sg.start].length)
++ msg->sg.data[msg->sg.start].length) {
++ if (eval == __SK_REDIRECT)
++ sk_mem_charge(sk, tosend - sent);
+ goto more_data;
++ }
+ }
+ return ret;
+ }
+@@ -464,6 +542,8 @@ enum {
+ enum {
+ TCP_BPF_BASE,
+ TCP_BPF_TX,
++ TCP_BPF_RX,
++ TCP_BPF_TXRX,
+ TCP_BPF_NUM_CFGS,
+ };
+
+@@ -475,7 +555,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
+ struct proto *base)
+ {
+ prot[TCP_BPF_BASE] = *base;
+- prot[TCP_BPF_BASE].unhash = sock_map_unhash;
++ prot[TCP_BPF_BASE].destroy = sock_map_destroy;
+ prot[TCP_BPF_BASE].close = sock_map_close;
+ prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
+ prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
+@@ -483,6 +563,12 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
+ prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
+ prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
+ prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
++
++ prot[TCP_BPF_RX] = prot[TCP_BPF_BASE];
++ prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser;
++
++ prot[TCP_BPF_TXRX] = prot[TCP_BPF_TX];
++ prot[TCP_BPF_TXRX].recvmsg = tcp_bpf_recvmsg_parser;
+ }
+
+ static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops)
+@@ -520,6 +606,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+ int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+ int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
+
++ if (psock->progs.stream_verdict || psock->progs.skb_verdict) {
++ config = (config == TCP_BPF_TX) ? TCP_BPF_TXRX : TCP_BPF_RX;
++ }
++
+ if (restore) {
+ if (inet_csk_has_ulp(sk)) {
+ /* TLS does not have an unhash proto in SW cases,
+@@ -537,9 +627,6 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+ return 0;
+ }
+
+- if (inet_csk_has_ulp(sk))
+- return -EINVAL;
+-
+ if (sk->sk_family == AF_INET6) {
+ if (tcp_bpf_assert_proto_ops(psock->sk_proto))
+ return -EINVAL;
+@@ -560,10 +647,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
+ */
+ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+ {
+- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+ struct proto *prot = newsk->sk_prot;
+
+- if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
++ if (is_insidevar(prot, tcp_bpf_prots))
+ newsk->sk_prot = sk->sk_prot_creator;
+ }
+ #endif /* CONFIG_BPF_SYSCALL */
+diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
+index 709d238018239..112f28f936934 100644
+--- a/net/ipv4/tcp_cdg.c
++++ b/net/ipv4/tcp_cdg.c
+@@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+- tp->snd_cwnd);
+- tp->snd_ssthresh = tp->snd_cwnd;
++ tcp_snd_cwnd(tp));
++ tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ return;
+ }
+ }
+@@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+- tp->snd_cwnd);
+- tp->snd_ssthresh = tp->snd_cwnd;
++ tcp_snd_cwnd(tp));
++ tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ }
+ }
+ }
+@@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
+ return false;
+ }
+
+- ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
++ ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp));
+ ca->state = CDG_BACKOFF;
+ tcp_enter_cwr(sk);
+ return true;
+@@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ }
+
+ if (!tcp_is_cwnd_limited(sk)) {
+- ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
++ ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp));
+ return;
+ }
+
+- prior_snd_cwnd = tp->snd_cwnd;
++ prior_snd_cwnd = tcp_snd_cwnd(tp);
+ tcp_reno_cong_avoid(sk, ack, acked);
+
+- incr = tp->snd_cwnd - prior_snd_cwnd;
++ incr = tcp_snd_cwnd(tp) - prior_snd_cwnd;
+ ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
+ }
+
+@@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (ca->state == CDG_BACKOFF)
+- return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
++ return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10);
+
+ if (ca->state == CDG_NONFULL && use_tolerance)
+- return tp->snd_cwnd;
++ return tcp_snd_cwnd(tp);
+
+- ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
++ ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp));
+ if (use_shadow)
+- return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
+- return max(2U, tp->snd_cwnd >> 1);
++ return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1);
++ return max(2U, tcp_snd_cwnd(tp) >> 1);
+ }
+
+ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
+@@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
+
+ ca->gradients = gradients;
+ ca->rtt_seq = tp->snd_nxt;
+- ca->shadow_wnd = tp->snd_cwnd;
++ ca->shadow_wnd = tcp_snd_cwnd(tp);
+ break;
+ case CA_EVENT_COMPLETE_CWR:
+ ca->state = CDG_UNKNOWN;
+@@ -375,12 +375,13 @@ static void tcp_cdg_init(struct sock *sk)
+ struct cdg *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
++ ca->gradients = NULL;
+ /* We silently fall back to window = 1 if allocation fails. */
+ if (window > 1)
+ ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
+ GFP_NOWAIT | __GFP_NOWARN);
+ ca->rtt_seq = tp->snd_nxt;
+- ca->shadow_wnd = tp->snd_cwnd;
++ ca->shadow_wnd = tcp_snd_cwnd(tp);
+ }
+
+ static void tcp_cdg_release(struct sock *sk)
+@@ -388,6 +389,7 @@ static void tcp_cdg_release(struct sock *sk)
+ struct cdg *ca = inet_csk_ca(sk);
+
+ kfree(ca->gradients);
++ ca->gradients = NULL;
+ }
+
+ static struct tcp_congestion_ops tcp_cdg __read_mostly = {
+diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
+index db5831e6c136a..f43db30a7195d 100644
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -395,10 +395,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
+ */
+ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
+ {
+- u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
++ u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
+
+- acked -= cwnd - tp->snd_cwnd;
+- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
++ acked -= cwnd - tcp_snd_cwnd(tp);
++ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));
+
+ return acked;
+ }
+@@ -412,7 +412,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
+ /* If credits accumulated at a higher w, apply them gently now. */
+ if (tp->snd_cwnd_cnt >= w) {
+ tp->snd_cwnd_cnt = 0;
+- tp->snd_cwnd++;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ }
+
+ tp->snd_cwnd_cnt += acked;
+@@ -420,9 +420,9 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
+ u32 delta = tp->snd_cwnd_cnt / w;
+
+ tp->snd_cwnd_cnt -= delta * w;
+- tp->snd_cwnd += delta;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta);
+ }
+- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
++ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
+ }
+ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
+
+@@ -447,7 +447,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ return;
+ }
+ /* In dangerous area, increase slowly. */
+- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
++ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
+ }
+ EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
+
+@@ -456,7 +456,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
+ {
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+- return max(tp->snd_cwnd >> 1U, 2U);
++ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
+ }
+ EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
+
+@@ -464,7 +464,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
+ {
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+- return max(tp->snd_cwnd, tp->prior_cwnd);
++ return max(tcp_snd_cwnd(tp), tp->prior_cwnd);
+ }
+ EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
+
+diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
+index 4a30deaa9a37f..af4fc067f2a19 100644
+--- a/net/ipv4/tcp_cubic.c
++++ b/net/ipv4/tcp_cubic.c
+@@ -328,13 +328,11 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ return;
+
+ if (tcp_in_slow_start(tp)) {
+- if (hystart && after(ack, ca->end_seq))
+- bictcp_hystart_reset(sk);
+ acked = tcp_slow_start(tp, acked);
+ if (!acked)
+ return;
+ }
+- bictcp_update(ca, tp->snd_cwnd, acked);
++ bictcp_update(ca, tcp_snd_cwnd(tp), acked);
+ tcp_cong_avoid_ai(tp, ca->cnt, acked);
+ }
+
+@@ -346,13 +344,13 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
+ ca->epoch_start = 0; /* end of epoch */
+
+ /* Wmax and fast convergence */
+- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
+- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
++ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
++ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
+ / (2 * BICTCP_BETA_SCALE);
+ else
+- ca->last_max_cwnd = tp->snd_cwnd;
++ ca->last_max_cwnd = tcp_snd_cwnd(tp);
+
+- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
++ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
+ }
+
+ static void cubictcp_state(struct sock *sk, u8 new_state)
+@@ -389,6 +387,9 @@ static void hystart_update(struct sock *sk, u32 delay)
+ struct bictcp *ca = inet_csk_ca(sk);
+ u32 threshold;
+
++ if (after(tp->snd_una, ca->end_seq))
++ bictcp_hystart_reset(sk);
++
+ if (hystart_detect & HYSTART_ACK_TRAIN) {
+ u32 now = bictcp_clock_us(sk);
+
+@@ -410,13 +411,13 @@ static void hystart_update(struct sock *sk, u32 delay)
+ ca->found = 1;
+ pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
+ now - ca->round_start, threshold,
+- ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
++ ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp));
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+- tp->snd_cwnd);
+- tp->snd_ssthresh = tp->snd_cwnd;
++ tcp_snd_cwnd(tp));
++ tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ }
+ }
+ }
+@@ -435,8 +436,8 @@ static void hystart_update(struct sock *sk, u32 delay)
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+ NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+- tp->snd_cwnd);
+- tp->snd_ssthresh = tp->snd_cwnd;
++ tcp_snd_cwnd(tp));
++ tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ }
+ }
+ }
+@@ -466,7 +467,7 @@ static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
+
+ /* hystart triggers when cwnd is larger than some threshold */
+ if (!ca->found && tcp_in_slow_start(tp) && hystart &&
+- tp->snd_cwnd >= hystart_low_window)
++ tcp_snd_cwnd(tp) >= hystart_low_window)
+ hystart_update(sk, delay);
+ }
+
+diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
+index 79f705450c162..43bcefbaefbb1 100644
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -104,8 +104,8 @@ static u32 dctcp_ssthresh(struct sock *sk)
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- ca->loss_cwnd = tp->snd_cwnd;
+- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
++ ca->loss_cwnd = tcp_snd_cwnd(tp);
++ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
+ }
+
+ static void dctcp_update_alpha(struct sock *sk, u32 flags)
+@@ -146,8 +146,8 @@ static void dctcp_react_to_loss(struct sock *sk)
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- ca->loss_cwnd = tp->snd_cwnd;
+- tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
++ ca->loss_cwnd = tcp_snd_cwnd(tp);
++ tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
+ }
+
+ static void dctcp_state(struct sock *sk, u8 new_state)
+@@ -209,8 +209,9 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
+ static u32 dctcp_cwnd_undo(struct sock *sk)
+ {
+ const struct dctcp *ca = inet_csk_ca(sk);
++ struct tcp_sock *tp = tcp_sk(sk);
+
+- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
++ return max(tcp_snd_cwnd(tp), ca->loss_cwnd);
+ }
+
+ static struct tcp_congestion_ops dctcp __read_mostly = {
+diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
+index 59412d6354a01..e9b5d6f10c56d 100644
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -301,6 +301,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
+ static bool tcp_fastopen_queue_check(struct sock *sk)
+ {
+ struct fastopen_queue *fastopenq;
++ int max_qlen;
+
+ /* Make sure the listener has enabled fastopen, and we don't
+ * exceed the max # of pending TFO requests allowed before trying
+@@ -313,10 +314,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
+ * temporarily vs a server not supporting Fast Open at all.
+ */
+ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+- if (fastopenq->max_qlen == 0)
++ max_qlen = READ_ONCE(fastopenq->max_qlen);
++ if (max_qlen == 0)
+ return false;
+
+- if (fastopenq->qlen >= fastopenq->max_qlen) {
++ if (fastopenq->qlen >= max_qlen) {
+ struct request_sock *req1;
+ spin_lock(&fastopenq->lock);
+ req1 = fastopenq->rskq_rst_head;
+@@ -338,7 +340,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
+ const struct dst_entry *dst,
+ int flag)
+ {
+- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
++ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
+ tcp_sk(sk)->fastopen_no_cookie ||
+ (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
+ }
+@@ -353,7 +355,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+ const struct dst_entry *dst)
+ {
+ bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
++ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+ struct sock *child;
+ int ret = 0;
+@@ -495,7 +497,7 @@ void tcp_fastopen_active_disable(struct sock *sk)
+ {
+ struct net *net = sock_net(sk);
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
+ return;
+
+ /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
+@@ -516,7 +518,8 @@ void tcp_fastopen_active_disable(struct sock *sk)
+ */
+ bool tcp_fastopen_active_should_disable(struct sock *sk)
+ {
+- unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
++ unsigned int tfo_bh_timeout =
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
+ unsigned long timeout;
+ int tfo_da_times;
+ int multiplier;
+diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
+index 349069d6cd0aa..c6de5ce79ad3c 100644
+--- a/net/ipv4/tcp_highspeed.c
++++ b/net/ipv4/tcp_highspeed.c
+@@ -127,22 +127,22 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ * snd_cwnd <=
+ * hstcp_aimd_vals[ca->ai].cwnd
+ */
+- if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
+- while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
++ if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) {
++ while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd &&
+ ca->ai < HSTCP_AIMD_MAX - 1)
+ ca->ai++;
+- } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+- while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
++ } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) {
++ while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd)
+ ca->ai--;
+ }
+
+ /* Do additive increase */
+- if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
++ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
+ /* cwnd = cwnd + a(w) / cwnd */
+ tp->snd_cwnd_cnt += ca->ai + 1;
+- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+- tp->snd_cwnd_cnt -= tp->snd_cwnd;
+- tp->snd_cwnd++;
++ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
++ tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp);
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ }
+ }
+ }
+@@ -154,7 +154,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
+ struct hstcp *ca = inet_csk_ca(sk);
+
+ /* Do multiplicative decrease */
+- return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
++ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
+ }
+
+ static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
+diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
+index 55adcfcf96fea..52b1f2665dfae 100644
+--- a/net/ipv4/tcp_htcp.c
++++ b/net/ipv4/tcp_htcp.c
+@@ -124,7 +124,7 @@ static void measure_achieved_throughput(struct sock *sk,
+
+ ca->packetcount += sample->pkts_acked;
+
+- if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
++ if (ca->packetcount >= tcp_snd_cwnd(tp) - (ca->alpha >> 7 ? : 1) &&
+ now - ca->lasttime >= ca->minRTT &&
+ ca->minRTT > 0) {
+ __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
+@@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
+ const struct htcp *ca = inet_csk_ca(sk);
+
+ htcp_param_update(sk);
+- return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
++ return max((tcp_snd_cwnd(tp) * ca->beta) >> 7, 2U);
+ }
+
+ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+@@ -242,9 +242,9 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ /* In dangerous area, increase slowly.
+ * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
+ */
+- if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) {
+- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+- tp->snd_cwnd++;
++ if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tcp_snd_cwnd(tp)) {
++ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp)
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ tp->snd_cwnd_cnt = 0;
+ htcp_alpha_update(ca);
+ } else
+diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
+index be39327e04e6c..abd7d91807e54 100644
+--- a/net/ipv4/tcp_hybla.c
++++ b/net/ipv4/tcp_hybla.c
+@@ -54,7 +54,7 @@ static void hybla_init(struct sock *sk)
+ ca->rho2_7ls = 0;
+ ca->snd_cwnd_cents = 0;
+ ca->hybla_en = true;
+- tp->snd_cwnd = 2;
++ tcp_snd_cwnd_set(tp, 2);
+ tp->snd_cwnd_clamp = 65535;
+
+ /* 1st Rho measurement based on initial srtt */
+@@ -62,7 +62,7 @@ static void hybla_init(struct sock *sk)
+
+ /* set minimum rtt as this is the 1st ever seen */
+ ca->minrtt_us = tp->srtt_us;
+- tp->snd_cwnd = ca->rho;
++ tcp_snd_cwnd_set(tp, ca->rho);
+ }
+
+ static void hybla_state(struct sock *sk, u8 ca_state)
+@@ -137,31 +137,31 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ * as long as increment is estimated as (rho<<7)/window
+ * it already is <<7 and we can easily count its fractions.
+ */
+- increment = ca->rho2_7ls / tp->snd_cwnd;
++ increment = ca->rho2_7ls / tcp_snd_cwnd(tp);
+ if (increment < 128)
+ tp->snd_cwnd_cnt++;
+ }
+
+ odd = increment % 128;
+- tp->snd_cwnd += increment >> 7;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + (increment >> 7));
+ ca->snd_cwnd_cents += odd;
+
+ /* check when fractions goes >=128 and increase cwnd by 1. */
+ while (ca->snd_cwnd_cents >= 128) {
+- tp->snd_cwnd++;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ ca->snd_cwnd_cents -= 128;
+ tp->snd_cwnd_cnt = 0;
+ }
+ /* check when cwnd has not been incremented for a while */
+- if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+- tp->snd_cwnd++;
++ if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ tp->snd_cwnd_cnt = 0;
+ }
+ /* clamp down slowstart cwnd to ssthresh value. */
+ if (is_slowstart)
+- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
++ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_ssthresh));
+
+- tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
++ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
+ }
+
+ static struct tcp_congestion_ops tcp_hybla __read_mostly = {
+diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
+index 00e54873213e8..c0c81a2c77fae 100644
+--- a/net/ipv4/tcp_illinois.c
++++ b/net/ipv4/tcp_illinois.c
+@@ -224,7 +224,7 @@ static void update_params(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+- if (tp->snd_cwnd < win_thresh) {
++ if (tcp_snd_cwnd(tp) < win_thresh) {
+ ca->alpha = ALPHA_BASE;
+ ca->beta = BETA_BASE;
+ } else if (ca->cnt_rtt > 0) {
+@@ -284,9 +284,9 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ * tp->snd_cwnd += alpha/tp->snd_cwnd
+ */
+ delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
+- if (delta >= tp->snd_cwnd) {
+- tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
+- (u32)tp->snd_cwnd_clamp);
++ if (delta >= tcp_snd_cwnd(tp)) {
++ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp),
++ (u32)tp->snd_cwnd_clamp));
+ tp->snd_cwnd_cnt = 0;
+ }
+ }
+@@ -296,9 +296,11 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
++ u32 decr;
+
+ /* Multiplicative decrease */
+- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
++ decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT;
++ return max(tcp_snd_cwnd(tp) - decr, 2U);
+ }
+
+ /* Extract info for Tcp socket info provided via netlink. */
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 141e85e6422b1..94633f499e148 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -287,7 +287,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
+ icsk->icsk_ack.quick = quickacks;
+ }
+
+-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
++static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+@@ -295,7 +295,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+ inet_csk_exit_pingpong_mode(sk);
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
+-EXPORT_SYMBOL(tcp_enter_quickack_mode);
+
+ /* Send ACKs quickly, if "quick" count is not exhausted
+ * and the session is not interactive.
+@@ -414,7 +413,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
+ per_mss = roundup_pow_of_two(per_mss) +
+ SKB_DATA_ALIGN(sizeof(struct sk_buff));
+
+- nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
++ nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
+ nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
+
+ /* Fast Recovery (RFC 5681 3.2) :
+@@ -426,7 +425,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
+
+ if (sk->sk_sndbuf < sndmem)
+ WRITE_ONCE(sk->sk_sndbuf,
+- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
++ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
+ }
+
+ /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+@@ -461,7 +460,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ struct tcp_sock *tp = tcp_sk(sk);
+ /* Optimize this! */
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
++ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
+
+ while (tp->rcv_ssthresh <= window) {
+ if (truesize <= skb->len)
+@@ -526,7 +525,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+ */
+ static void tcp_init_buffer_space(struct sock *sk)
+ {
+- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
++ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
+ struct tcp_sock *tp = tcp_sk(sk);
+ int maxwin;
+
+@@ -566,16 +565,17 @@ static void tcp_clamp_window(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
++ int rmem2;
+
+ icsk->icsk_ack.quick = 0;
++ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
+
+- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
++ if (sk->sk_rcvbuf < rmem2 &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+ !tcp_under_memory_pressure(sk) &&
+ sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
+ WRITE_ONCE(sk->sk_rcvbuf,
+- min(atomic_read(&sk->sk_rmem_alloc),
+- net->ipv4.sysctl_tcp_rmem[2]));
++ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
+ }
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
+@@ -716,7 +716,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+ * <prev RTT . ><current RTT .. ><next RTT .... >
+ */
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
+@@ -737,7 +737,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+
+ do_div(rcvwin, tp->advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+ if (rcvbuf > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+
+@@ -901,12 +901,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
+ * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
+ * end of slow start and should slow down.
+ */
+- if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
++ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
+ else
+- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
+
+- rate *= max(tp->snd_cwnd, tp->packets_out);
++ rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
+
+ if (likely(tp->srtt_us))
+ do_div(rate, tp->srtt_us);
+@@ -1043,7 +1043,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+ tp->undo_marker ? tp->undo_retrans : 0);
+ #endif
+ tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
+ }
+
+ /* This exciting event is worth to be remembered. 8) */
+@@ -1652,6 +1652,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
+ (mss != tcp_skb_seglen(skb)))
+ goto out;
+
++ if (!tcp_skb_can_collapse(prev, skb))
++ goto out;
+ len = skb->len;
+ pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, pcount, len))
+@@ -2020,7 +2022,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+ return;
+
+ tp->reordering = min_t(u32, tp->packets_out + addend,
+- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
+ tp->reord_seen++;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
+ }
+@@ -2085,7 +2087,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
+
+ static bool tcp_is_rack(const struct sock *sk)
+ {
+- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
++ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
++ TCP_RACK_LOSS_DETECTION;
+ }
+
+ /* If we detect SACK reneging, forget all SACK information
+@@ -2129,6 +2132,7 @@ void tcp_enter_loss(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
+ bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
++ u8 reordering;
+
+ tcp_timeout_mark_lost(sk);
+
+@@ -2137,22 +2141,24 @@ void tcp_enter_loss(struct sock *sk)
+ !after(tp->high_seq, tp->snd_una) ||
+ (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+ tp->prior_ssthresh = tcp_current_ssthresh(sk);
+- tp->prior_cwnd = tp->snd_cwnd;
++ tp->prior_cwnd = tcp_snd_cwnd(tp);
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ tcp_ca_event(sk, CA_EVENT_LOSS);
+ tcp_init_undo(tp);
+ }
+- tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
++ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
+ tp->snd_cwnd_cnt = 0;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+
+ /* Timeout in disordered state after receiving substantial DUPACKs
+ * suggests that the degree of reordering is over-estimated.
+ */
++ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
+- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
++ tp->sacked_out >= reordering)
+ tp->reordering = min_t(unsigned int, tp->reordering,
+- net->ipv4.sysctl_tcp_reordering);
++ reordering);
++
+ tcp_set_ca_state(sk, TCP_CA_Loss);
+ tp->high_seq = tp->snd_nxt;
+ tcp_ecn_queue_cwr(tp);
+@@ -2161,7 +2167,7 @@ void tcp_enter_loss(struct sock *sk)
+ * loss recovery is underway except recurring timeout(s) on
+ * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+ */
+- tp->frto = net->ipv4.sysctl_tcp_frto &&
++ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
+ (new_recovery || icsk->icsk_retransmits) &&
+ !inet_csk(sk)->icsk_mtup.probe_size;
+ }
+@@ -2178,7 +2184,8 @@ void tcp_enter_loss(struct sock *sk)
+ */
+ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
+ {
+- if (flag & FLAG_SACK_RENEGING) {
++ if (flag & FLAG_SACK_RENEGING &&
++ flag & FLAG_SND_UNA_ADVANCED) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
+ msecs_to_jiffies(10));
+@@ -2448,7 +2455,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
+ pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
+ msg,
+ &inet->inet_daddr, ntohs(inet->inet_dport),
+- tp->snd_cwnd, tcp_left_out(tp),
++ tcp_snd_cwnd(tp), tcp_left_out(tp),
+ tp->snd_ssthresh, tp->prior_ssthresh,
+ tp->packets_out);
+ }
+@@ -2457,7 +2464,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
+ pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
+ msg,
+ &sk->sk_v6_daddr, ntohs(inet->inet_dport),
+- tp->snd_cwnd, tcp_left_out(tp),
++ tcp_snd_cwnd(tp), tcp_left_out(tp),
+ tp->snd_ssthresh, tp->prior_ssthresh,
+ tp->packets_out);
+ }
+@@ -2482,7 +2489,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
+ if (tp->prior_ssthresh) {
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+- tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
++ tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
+
+ if (tp->prior_ssthresh > tp->snd_ssthresh) {
+ tp->snd_ssthresh = tp->prior_ssthresh;
+@@ -2499,6 +2506,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp)
+ return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
+ }
+
++static bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
++{
++ struct tcp_sock *tp = tcp_sk(sk);
++
++ if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
++ /* Hold old state until something *above* high_seq
++ * is ACKed. For Reno it is MUST to prevent false
++ * fast retransmits (RFC2582). SACK TCP is safe. */
++ if (!tcp_any_retrans_done(sk))
++ tp->retrans_stamp = 0;
++ return true;
++ }
++ return false;
++}
++
+ /* People celebrate: "We love our President!" */
+ static bool tcp_try_undo_recovery(struct sock *sk)
+ {
+@@ -2521,14 +2543,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
+ } else if (tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_persist--;
+ }
+- if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
+- /* Hold old state until something *above* high_seq
+- * is ACKed. For Reno it is MUST to prevent false
+- * fast retransmits (RFC2582). SACK TCP is safe. */
+- if (!tcp_any_retrans_done(sk))
+- tp->retrans_stamp = 0;
++ if (tcp_is_non_sack_preventing_reopen(sk))
+ return true;
+- }
+ tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
+ return false;
+@@ -2564,6 +2580,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUSRTOS);
+ inet_csk(sk)->icsk_retransmits = 0;
++ if (tcp_is_non_sack_preventing_reopen(sk))
++ return true;
+ if (frto_undo || tcp_is_sack(tp)) {
+ tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
+@@ -2589,7 +2607,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
+ tp->high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = 0;
+ tp->snd_cwnd_cnt = 0;
+- tp->prior_cwnd = tp->snd_cwnd;
++ tp->prior_cwnd = tcp_snd_cwnd(tp);
+ tp->prr_delivered = 0;
+ tp->prr_out = 0;
+ tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+@@ -2619,7 +2637,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost,
+ }
+ /* Force a fast retransmit upon entering fast recovery */
+ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
+- tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
++ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
+ }
+
+ static inline void tcp_end_cwnd_reduction(struct sock *sk)
+@@ -2632,7 +2650,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
+ /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
+- tp->snd_cwnd = tp->snd_ssthresh;
++ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ }
+ tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
+@@ -2696,12 +2714,15 @@ static void tcp_mtup_probe_success(struct sock *sk)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
++ u64 val;
+
+- /* FIXME: breaks with very large cwnd */
+ tp->prior_ssthresh = tcp_current_ssthresh(sk);
+- tp->snd_cwnd = tp->snd_cwnd *
+- tcp_mss_to_mtu(sk, tp->mss_cache) /
+- icsk->icsk_mtup.probe_size;
++
++ val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache);
++ do_div(val, icsk->icsk_mtup.probe_size);
++ WARN_ON_ONCE((u32)val != val);
++ tcp_snd_cwnd_set(tp, max_t(u32, 1U, val));
++
+ tp->snd_cwnd_cnt = 0;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+@@ -3024,7 +3045,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ tp->snd_una == tp->mtu_probe.probe_seq_start) {
+ tcp_mtup_probe_failed(sk);
+ /* Restores the reduction we did in tcp_mtup_probe() */
+- tp->snd_cwnd++;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ tcp_simple_retransmit(sk);
+ return;
+ }
+@@ -3041,7 +3062,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+
+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
+ {
+- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+@@ -3452,7 +3473,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
+ * new SACK or ECE mark may first advance cwnd here and later reduce
+ * cwnd in tcp_fastretrans_alert() based on more states.
+ */
+- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
++ if (tcp_sk(sk)->reordering >
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
+ return flag & FLAG_FORWARD_PROGRESS;
+
+ return flag & FLAG_DATA_ACKED;
+@@ -3561,16 +3583,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
+ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+ u32 *last_oow_ack_time)
+ {
+- if (*last_oow_ack_time) {
+- s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
++ /* Paired with the WRITE_ONCE() in this function. */
++ u32 val = READ_ONCE(*last_oow_ack_time);
+
+- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
++ if (val) {
++ s32 elapsed = (s32)(tcp_jiffies32 - val);
++
++ if (0 <= elapsed &&
++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
+ NET_INC_STATS(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+ }
+
+- *last_oow_ack_time = tcp_jiffies32;
++ /* Paired with the prior READ_ONCE() and with itself,
++ * as we might be lockless.
++ */
++ WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32);
+
+ return false; /* not rate-limited: go ahead, send dupack now! */
+ }
+@@ -3611,11 +3640,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
+
+ /* Then check host-wide RFC 5961 rate limit. */
+ now = jiffies / HZ;
+- if (now != challenge_timestamp) {
+- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
++ if (now != READ_ONCE(challenge_timestamp)) {
++ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
+ u32 half = (ack_limit + 1) >> 1;
+
+- challenge_timestamp = now;
++ WRITE_ONCE(challenge_timestamp, now);
+ WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
+ }
+ count = READ_ONCE(challenge_count);
+@@ -3858,7 +3887,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ tcp_process_tlp_ack(sk, ack, flag);
+
+ if (tcp_ack_is_dubious(sk, flag)) {
+- if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
++ if (!(flag & (FLAG_SND_UNA_ADVANCED |
++ FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
+ num_dupack = 1;
+ /* Consider if pure acks were aggregated in tcp_add_backlog() */
+ if (!(flag & FLAG_DATA))
+@@ -4043,7 +4073,7 @@ void tcp_parse_options(const struct net *net,
+ break;
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW && th->syn &&
+- !estab && net->ipv4.sysctl_tcp_window_scaling) {
++ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
+ __u8 snd_wscale = *(__u8 *)ptr;
+ opt_rx->wscale_ok = 1;
+ if (snd_wscale > TCP_MAX_WSCALE) {
+@@ -4059,7 +4089,7 @@ void tcp_parse_options(const struct net *net,
+ case TCPOPT_TIMESTAMP:
+ if ((opsize == TCPOLEN_TIMESTAMP) &&
+ ((estab && opt_rx->tstamp_ok) ||
+- (!estab && net->ipv4.sysctl_tcp_timestamps))) {
++ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
+ opt_rx->saw_tstamp = 1;
+ opt_rx->rcv_tsval = get_unaligned_be32(ptr);
+ opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
+@@ -4067,7 +4097,7 @@ void tcp_parse_options(const struct net *net,
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+- !estab && net->ipv4.sysctl_tcp_sack) {
++ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
+ opt_rx->sack_ok = TCP_SACK_SEEN;
+ tcp_sack_reset(opt_rx);
+ }
+@@ -4328,7 +4358,7 @@ void tcp_fin(struct sock *sk)
+
+ inet_csk_schedule_ack(sk);
+
+- sk->sk_shutdown |= RCV_SHUTDOWN;
++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
+ sock_set_flag(sk, SOCK_DONE);
+
+ switch (sk->sk_state) {
+@@ -4408,7 +4438,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+ int mib_idx;
+
+ if (before(seq, tp->rcv_nxt))
+@@ -4455,7 +4485,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+
+- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+ tcp_rcv_spurious_retrans(sk, skb);
+@@ -5400,7 +5430,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
+ return false;
+
+ /* If we filled the congestion window, do not expand. */
+- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
++ if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp))
+ return false;
+
+ return true;
+@@ -5418,7 +5448,17 @@ static void tcp_new_space(struct sock *sk)
+ INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
+ }
+
+-static void tcp_check_space(struct sock *sk)
++/* Caller made space either from:
++ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
++ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
++ *
++ * We might be able to generate EPOLLOUT to the application if:
++ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
++ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
++ * small enough that tcp_stream_memory_free() decides it
++ * is time to generate EPOLLOUT.
++ */
++void tcp_check_space(struct sock *sk)
+ {
+ /* pairs with tcp_poll() */
+ smp_mb();
+@@ -5468,7 +5508,7 @@ send_now:
+ }
+
+ if (!tcp_is_sack(tp) ||
+- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
+ goto send_now;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+@@ -5489,11 +5529,12 @@ send_now:
+ if (tp->srtt_us && tp->srtt_us < rtt)
+ rtt = tp->srtt_us;
+
+- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
++ delay = min_t(unsigned long,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
+ rtt * (NSEC_PER_USEC >> 3)/20);
+ sock_hold(sk);
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
+ HRTIMER_MODE_REL_PINNED_SOFT);
+ }
+
+@@ -5521,7 +5562,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 ptr = ntohs(th->urg_ptr);
+
+- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
++ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
+ ptr--;
+ ptr += ntohl(th->seq);
+
+@@ -5770,7 +5811,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
+ trace_tcp_probe(sk, skb);
+
+ tcp_mstamp_refresh(tp);
+- if (unlikely(!sk->sk_rx_dst))
++ if (unlikely(!rcu_access_pointer(sk->sk_rx_dst)))
+ inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
+ /*
+ * Header prediction.
+@@ -5957,9 +5998,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
+ * retransmission has occurred.
+ */
+ if (tp->total_retrans > 1 && tp->undo_marker)
+- tp->snd_cwnd = 1;
++ tcp_snd_cwnd_set(tp, 1);
+ else
+- tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
++ tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk)));
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+
+ bpf_skops_established(sk, bpf_op, skb);
+@@ -6495,7 +6536,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
+ break;
+
+ tcp_set_state(sk, TCP_FIN_WAIT2);
+- sk->sk_shutdown |= SEND_SHUTDOWN;
++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN);
+
+ sk_dst_confirm(sk);
+
+@@ -6653,7 +6694,7 @@ static void tcp_ecn_create_request(struct request_sock *req,
+
+ ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
+ ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
+- ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
++ ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;
+
+ if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
+ (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
+@@ -6719,11 +6760,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
+ {
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+ const char *msg = "Dropping request";
+- bool want_cookie = false;
+ struct net *net = sock_net(sk);
++ bool want_cookie = false;
++ u8 syncookies;
++
++ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+
+ #ifdef CONFIG_SYN_COOKIES
+- if (net->ipv4.sysctl_tcp_syncookies) {
++ if (syncookies) {
+ msg = "Sending cookies";
+ want_cookie = true;
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+@@ -6731,8 +6775,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
+ #endif
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
+
+- if (!queue->synflood_warned &&
+- net->ipv4.sysctl_tcp_syncookies != 2 &&
++ if (!queue->synflood_warned && syncookies != 2 &&
+ xchg(&queue->synflood_warned, 1) == 0)
+ net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
+ proto, sk->sk_num, msg);
+@@ -6781,7 +6824,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+ struct tcp_sock *tp = tcp_sk(sk);
+ u16 mss;
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
+ !inet_csk_reqsk_queue_is_full(sk))
+ return 0;
+
+@@ -6815,13 +6858,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
+ bool want_cookie = false;
+ struct dst_entry *dst;
+ struct flowi fl;
++ u8 syncookies;
++
++ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+
+ /* TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+- if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
+- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
++ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
+ if (!want_cookie)
+ goto drop;
+@@ -6870,10 +6915,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
+ tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
+
+ if (!want_cookie && !isn) {
++ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
++
+ /* Kill the following clause, if you dislike this way. */
+- if (!net->ipv4.sysctl_tcp_syncookies &&
+- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
++ if (!syncookies &&
++ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
++ (max_syn_backlog >> 2)) &&
+ !tcp_peer_is_proven(req, dst)) {
+ /* Without syncookies last quarter of
+ * backlog is filled with destinations,
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 5b8ce65dfc067..b5cb674eca1c7 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
+ struct inet_hashinfo tcp_hashinfo;
+ EXPORT_SYMBOL(tcp_hashinfo);
+
++static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
++
+ static u32 tcp_v4_init_seq(const struct sk_buff *skb)
+ {
+ return secure_tcp_seq(ip_hdr(skb)->daddr,
+@@ -106,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
+
+ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
+ {
++ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
+ const struct inet_timewait_sock *tw = inet_twsk(sktw);
+ const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+ struct tcp_sock *tp = tcp_sk(sk);
+- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
+
+ if (reuse == 2) {
+ /* Still does not detect *everything* that goes through
+@@ -322,6 +324,8 @@ failure:
+ * if necessary.
+ */
+ tcp_set_state(sk, TCP_CLOSE);
++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
++ inet_reset_saddr(sk);
+ ip_rt_put(rt);
+ sk->sk_route_caps = 0;
+ inet->inet_dport = 0;
+@@ -807,13 +811,18 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+ arg.tos = ip_hdr(skb)->tos;
+ arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
+ local_bh_disable();
+- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
++ ctl_sk = this_cpu_read(ipv4_tcp_sk);
++ sock_net_set(ctl_sk, net);
+ if (sk) {
+ ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_mark : sk->sk_mark;
+ ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_priority : sk->sk_priority;
+ transmit_time = tcp_transmit_time(sk);
++ xfrm_sk_clone_policy(ctl_sk, sk);
++ } else {
++ ctl_sk->sk_mark = 0;
++ ctl_sk->sk_priority = 0;
+ }
+ ip_send_unicast_reply(ctl_sk,
+ skb, &TCP_SKB_CB(skb)->header.h4.opt,
+@@ -821,7 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+ &arg, arg.iov[0].iov_len,
+ transmit_time);
+
+- ctl_sk->sk_mark = 0;
++ xfrm_sk_free_policy(ctl_sk);
++ sock_net_set(ctl_sk, &init_net);
+ __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+ __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
+ local_bh_enable();
+@@ -905,7 +915,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
+ arg.tos = tos;
+ arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
+ local_bh_disable();
+- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
++ ctl_sk = this_cpu_read(ipv4_tcp_sk);
++ sock_net_set(ctl_sk, net);
+ ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_mark : sk->sk_mark;
+ ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+@@ -917,7 +928,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
+ &arg, arg.iov[0].iov_len,
+ transmit_time);
+
+- ctl_sk->sk_mark = 0;
++ sock_net_set(ctl_sk, &init_net);
+ __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+ local_bh_enable();
+ }
+@@ -964,7 +975,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ tcp_rsk(req)->rcv_nxt,
+ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+- req->ts_recent,
++ READ_ONCE(req->ts_recent),
+ 0,
+ tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+ inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+@@ -998,7 +1009,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+ if (skb) {
+ __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
+
+- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (inet_sk(sk)->tos & INET_ECN_MASK) :
+ inet_sk(sk)->tos;
+@@ -1584,7 +1595,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+ /* Set ToS of the new socket based upon the value of incoming SYN.
+ * ECT bits are set later in tcp_init_transfer().
+ */
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+
+ if (!dst) {
+@@ -1698,16 +1709,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+ struct sock *rsk;
+
+ if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+- struct dst_entry *dst = sk->sk_rx_dst;
++ struct dst_entry *dst;
++
++ dst = rcu_dereference_protected(sk->sk_rx_dst,
++ lockdep_sock_is_held(sk));
+
+ sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
+ if (dst) {
+- if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
++ if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
+ !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check,
+ dst, 0)) {
++ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
+ dst_release(dst);
+- sk->sk_rx_dst = NULL;
+ }
+ }
+ tcp_rcv_established(sk, skb);
+@@ -1783,12 +1797,12 @@ int tcp_v4_early_demux(struct sk_buff *skb)
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ if (sk_fullsock(sk)) {
+- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
++ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
+
+ if (dst)
+ dst = dst_check(dst, 0);
+ if (dst &&
+- inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
++ sk->sk_rx_dst_ifindex == skb->skb_iif)
+ skb_dst_set_noref(skb, dst);
+ }
+ }
+@@ -1797,8 +1811,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
+
+ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+ {
+- u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
+- u32 tail_gso_size, tail_gso_segs;
++ u32 limit, tail_gso_size, tail_gso_segs;
+ struct skb_shared_info *shinfo;
+ const struct tcphdr *th;
+ struct tcphdr *thtail;
+@@ -1902,11 +1915,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+ __skb_push(skb, hdrlen);
+
+ no_coalesce:
++ limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
++
+ /* Only socket owner can try to collapse/prune rx queues
+ * to reduce memory overhead, so add a little headroom here.
+ * Few sockets backlog are possibly concurrently non empty.
+ */
+- limit += 64*1024;
++ limit += 64 * 1024;
+
+ if (unlikely(sk_add_backlog(sk, skb, limit))) {
+ bh_unlock_sock(sk);
+@@ -1967,8 +1982,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
+ const struct tcphdr *th;
+ bool refcounted;
+ struct sock *sk;
++ int drop_reason;
+ int ret;
+
++ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ if (skb->pkt_type != PACKET_HOST)
+ goto discard_it;
+
+@@ -1980,8 +1997,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
+
+ th = (const struct tcphdr *)skb->data;
+
+- if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
++ if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
++ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
+ goto bad_packet;
++ }
+ if (!pskb_may_pull(skb, th->doff * 4))
+ goto discard_it;
+
+@@ -2011,7 +2030,8 @@ process:
+ struct sock *nsk;
+
+ sk = req->rsk_listener;
+- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
++ if (unlikely(!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb) ||
++ tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
+ sk_drops_add(sk, skb);
+ reqsk_put(req);
+ goto discard_it;
+@@ -2058,6 +2078,7 @@ process:
+ }
+ goto discard_and_relse;
+ }
++ nf_reset_ct(skb);
+ if (nsk == sk) {
+ reqsk_put(req);
+ tcp_v4_restore_cb(skb);
+@@ -2082,8 +2103,10 @@ process:
+
+ nf_reset_ct(skb);
+
+- if (tcp_filter(sk, skb))
++ if (tcp_filter(sk, skb)) {
++ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ goto discard_and_relse;
++ }
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);
+ tcp_v4_fill_cb(skb, iph, th);
+@@ -2120,6 +2143,7 @@ put_and_return:
+ return ret;
+
+ no_tcp_socket:
++ drop_reason = SKB_DROP_REASON_NO_SOCKET;
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard_it;
+
+@@ -2127,6 +2151,7 @@ no_tcp_socket:
+
+ if (tcp_checksum_complete(skb)) {
+ csum_error:
++ drop_reason = SKB_DROP_REASON_TCP_CSUM;
+ trace_tcp_bad_csum(skb);
+ __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
+ bad_packet:
+@@ -2137,7 +2162,7 @@ bad_packet:
+
+ discard_it:
+ /* Discard frame. */
+- kfree_skb(skb);
++ kfree_skb_reason(skb, drop_reason);
+ return 0;
+
+ discard_and_relse:
+@@ -2200,8 +2225,8 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst && dst_hold_safe(dst)) {
+- sk->sk_rx_dst = dst;
+- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
++ rcu_assign_pointer(sk->sk_rx_dst, dst);
++ sk->sk_rx_dst_ifindex = skb->skb_iif;
+ }
+ }
+ EXPORT_SYMBOL(inet_sk_rx_dst_set);
+@@ -2653,7 +2678,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
+- tp->snd_cwnd,
++ tcp_snd_cwnd(tp),
+ state == TCP_LISTEN ?
+ fastopenq->max_qlen :
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
+@@ -2736,7 +2761,7 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+ {
+ while (iter->cur_sk < iter->end_sk)
+- sock_put(iter->batch[iter->cur_sk++]);
++ sock_gen_put(iter->batch[iter->cur_sk++]);
+ }
+
+ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+@@ -2895,7 +2920,7 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ * st->bucket. See tcp_seek_last_pos().
+ */
+ st->offset++;
+- sock_put(iter->batch[iter->cur_sk++]);
++ sock_gen_put(iter->batch[iter->cur_sk++]);
+ }
+
+ if (iter->cur_sk < iter->end_sk)
+@@ -3098,41 +3123,14 @@ EXPORT_SYMBOL(tcp_prot);
+
+ static void __net_exit tcp_sk_exit(struct net *net)
+ {
+- int cpu;
+-
+ if (net->ipv4.tcp_congestion_control)
+ bpf_module_put(net->ipv4.tcp_congestion_control,
+ net->ipv4.tcp_congestion_control->owner);
+-
+- for_each_possible_cpu(cpu)
+- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
+- free_percpu(net->ipv4.tcp_sk);
+ }
+
+ static int __net_init tcp_sk_init(struct net *net)
+ {
+- int res, cpu, cnt;
+-
+- net->ipv4.tcp_sk = alloc_percpu(struct sock *);
+- if (!net->ipv4.tcp_sk)
+- return -ENOMEM;
+-
+- for_each_possible_cpu(cpu) {
+- struct sock *sk;
+-
+- res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+- IPPROTO_TCP, net);
+- if (res)
+- goto fail;
+- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+-
+- /* Please enforce IP_DF and IPID==0 for RST and
+- * ACK sent in SYN-RECV and TIME-WAIT state.
+- */
+- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+-
+- *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
+- }
++ int cnt;
+
+ net->ipv4.sysctl_tcp_ecn = 2;
+ net->ipv4.sysctl_tcp_ecn_fallback = 1;
+@@ -3216,10 +3214,6 @@ static int __net_init tcp_sk_init(struct net *net)
+ net->ipv4.tcp_congestion_control = &tcp_reno;
+
+ return 0;
+-fail:
+- tcp_sk_exit(net);
+-
+- return res;
+ }
+
+ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
+@@ -3313,6 +3307,24 @@ static void __init bpf_iter_register(void)
+
+ void __init tcp_v4_init(void)
+ {
++ int cpu, res;
++
++ for_each_possible_cpu(cpu) {
++ struct sock *sk;
++
++ res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
++ IPPROTO_TCP, &init_net);
++ if (res)
++ panic("Failed to create the TCP control socket.\n");
++ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
++
++ /* Please enforce IP_DF and IPID==0 for RST and
++ * ACK sent in SYN-RECV and TIME-WAIT state.
++ */
++ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
++
++ per_cpu(ipv4_tcp_sk, cpu) = sk;
++ }
+ if (register_pernet_subsys(&tcp_sk_ops))
+ panic("Failed to create the TCP control socket.\n");
+
+diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
+index 82b36ec3f2f82..ae36780977d27 100644
+--- a/net/ipv4/tcp_lp.c
++++ b/net/ipv4/tcp_lp.c
+@@ -297,7 +297,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
+ lp->flag &= ~LP_WITHIN_THR;
+
+ pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
+- tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max,
++ tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max,
+ lp->sowd >> 3);
+
+ if (lp->flag & LP_WITHIN_THR)
+@@ -313,12 +313,12 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
+ /* happened within inference
+ * drop snd_cwnd into 1 */
+ if (lp->flag & LP_WITHIN_INF)
+- tp->snd_cwnd = 1U;
++ tcp_snd_cwnd_set(tp, 1U);
+
+ /* happened after inference
+ * cut snd_cwnd into half */
+ else
+- tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
++ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U));
+
+ /* record this drop time */
+ lp->last_drop = now;
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 0588b004ddac1..5df97aaac252e 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
+
+ struct tcp_metrics_block {
+ struct tcp_metrics_block __rcu *tcpm_next;
+- possible_net_t tcpm_net;
++ struct net *tcpm_net;
+ struct inetpeer_addr tcpm_saddr;
+ struct inetpeer_addr tcpm_daddr;
+ unsigned long tcpm_stamp;
+@@ -51,34 +51,38 @@ struct tcp_metrics_block {
+ struct rcu_head rcu_head;
+ };
+
+-static inline struct net *tm_net(struct tcp_metrics_block *tm)
++static inline struct net *tm_net(const struct tcp_metrics_block *tm)
+ {
+- return read_pnet(&tm->tcpm_net);
++ /* Paired with the WRITE_ONCE() in tcpm_new() */
++ return READ_ONCE(tm->tcpm_net);
+ }
+
+ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+ enum tcp_metric_index idx)
+ {
+- return tm->tcpm_lock & (1 << idx);
++ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
++ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
+ }
+
+-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
++static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
+ enum tcp_metric_index idx)
+ {
+- return tm->tcpm_vals[idx];
++ /* Paired with WRITE_ONCE() in tcp_metric_set() */
++ return READ_ONCE(tm->tcpm_vals[idx]);
+ }
+
+ static void tcp_metric_set(struct tcp_metrics_block *tm,
+ enum tcp_metric_index idx,
+ u32 val)
+ {
+- tm->tcpm_vals[idx] = val;
++ /* Paired with READ_ONCE() in tcp_metric_get() */
++ WRITE_ONCE(tm->tcpm_vals[idx], val);
+ }
+
+ static bool addr_same(const struct inetpeer_addr *a,
+ const struct inetpeer_addr *b)
+ {
+- return inetpeer_addr_cmp(a, b) == 0;
++ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
+ }
+
+ struct tcpm_hash_bucket {
+@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
+ static unsigned int tcp_metrics_hash_log __read_mostly;
+
+ static DEFINE_SPINLOCK(tcp_metrics_lock);
++static DEFINE_SEQLOCK(fastopen_seqlock);
+
+ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst,
+@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ u32 msval;
+ u32 val;
+
+- tm->tcpm_stamp = jiffies;
++ WRITE_ONCE(tm->tcpm_stamp, jiffies);
+
+ val = 0;
+ if (dst_metric_locked(dst, RTAX_RTT))
+@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ val |= 1 << TCP_METRIC_CWND;
+ if (dst_metric_locked(dst, RTAX_REORDERING))
+ val |= 1 << TCP_METRIC_REORDERING;
+- tm->tcpm_lock = val;
++ /* Paired with READ_ONCE() in tcp_metric_locked() */
++ WRITE_ONCE(tm->tcpm_lock, val);
+
+ msval = dst_metric_raw(dst, RTAX_RTT);
+- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
++ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
+
+ msval = dst_metric_raw(dst, RTAX_RTTVAR);
+- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
+- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
+- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
+- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
++ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
++ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
++ dst_metric_raw(dst, RTAX_SSTHRESH));
++ tcp_metric_set(tm, TCP_METRIC_CWND,
++ dst_metric_raw(dst, RTAX_CWND));
++ tcp_metric_set(tm, TCP_METRIC_REORDERING,
++ dst_metric_raw(dst, RTAX_REORDERING));
+ if (fastopen_clear) {
++ write_seqlock(&fastopen_seqlock);
+ tm->tcpm_fastopen.mss = 0;
+ tm->tcpm_fastopen.syn_loss = 0;
+ tm->tcpm_fastopen.try_exp = 0;
+ tm->tcpm_fastopen.cookie.exp = false;
+ tm->tcpm_fastopen.cookie.len = 0;
++ write_sequnlock(&fastopen_seqlock);
+ }
+ }
+
+ #define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
+
+-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
++static void tcpm_check_stamp(struct tcp_metrics_block *tm,
++ const struct dst_entry *dst)
+ {
+- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
++ unsigned long limit;
++
++ if (!tm)
++ return;
++ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
++ if (unlikely(time_after(jiffies, limit)))
+ tcpm_suck_dst(tm, dst, false);
+ }
+
+@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+ oldest = deref_locked(tcp_metrics_hash[hash].chain);
+ for (tm = deref_locked(oldest->tcpm_next); tm;
+ tm = deref_locked(tm->tcpm_next)) {
+- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
++ if (time_before(READ_ONCE(tm->tcpm_stamp),
++ READ_ONCE(oldest->tcpm_stamp)))
+ oldest = tm;
+ }
+ tm = oldest;
+ } else {
+- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
++ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
+ if (!tm)
+ goto out_unlock;
+ }
+- write_pnet(&tm->tcpm_net, net);
++ /* Paired with the READ_ONCE() in tm_net() */
++ WRITE_ONCE(tm->tcpm_net, net);
++
+ tm->tcpm_saddr = *saddr;
+ tm->tcpm_daddr = *daddr;
+
+- tcpm_suck_dst(tm, dst, true);
++ tcpm_suck_dst(tm, dst, reclaim);
+
+ if (likely(!reclaim)) {
+ tm->tcpm_next = tcp_metrics_hash[hash].chain;
+@@ -329,7 +349,7 @@ void tcp_update_metrics(struct sock *sk)
+ int m;
+
+ sk_dst_confirm(sk);
+- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
++ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
+ return;
+
+ rcu_read_lock();
+@@ -385,29 +405,29 @@ void tcp_update_metrics(struct sock *sk)
+
+ if (tcp_in_initial_slowstart(tp)) {
+ /* Slow start still did not finish. */
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+- if (val && (tp->snd_cwnd >> 1) > val)
++ if (val && (tcp_snd_cwnd(tp) >> 1) > val)
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+- tp->snd_cwnd >> 1);
++ tcp_snd_cwnd(tp) >> 1);
+ }
+ if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
+ val = tcp_metric_get(tm, TCP_METRIC_CWND);
+- if (tp->snd_cwnd > val)
++ if (tcp_snd_cwnd(tp) > val)
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+- tp->snd_cwnd);
++ tcp_snd_cwnd(tp));
+ }
+ } else if (!tcp_in_slow_start(tp) &&
+ icsk->icsk_ca_state == TCP_CA_Open) {
+ /* Cong. avoidance phase, cwnd is reliable. */
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+- max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
++ max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
+ if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
+ val = tcp_metric_get(tm, TCP_METRIC_CWND);
+- tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1);
++ tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1);
+ }
+ } else {
+ /* Else slow start did not finish, cwnd is non-sense,
+@@ -418,7 +438,7 @@ void tcp_update_metrics(struct sock *sk)
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ (val + tp->snd_ssthresh) >> 1);
+ }
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ if (val && tp->snd_ssthresh > val)
+@@ -428,12 +448,13 @@ void tcp_update_metrics(struct sock *sk)
+ if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
+ val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
+ if (val < tp->reordering &&
+- tp->reordering != net->ipv4.sysctl_tcp_reordering)
++ tp->reordering !=
++ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ tp->reordering);
+ }
+ }
+- tm->tcpm_stamp = jiffies;
++ WRITE_ONCE(tm->tcpm_stamp, jiffies);
+ out_unlock:
+ rcu_read_unlock();
+ }
+@@ -462,7 +483,7 @@ void tcp_init_metrics(struct sock *sk)
+ if (tcp_metric_locked(tm, TCP_METRIC_CWND))
+ tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
+
+- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
++ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
+ 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ if (val) {
+ tp->snd_ssthresh = val;
+@@ -538,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
+ return ret;
+ }
+
+-static DEFINE_SEQLOCK(fastopen_seqlock);
+-
+ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+ struct tcp_fastopen_cookie *cookie)
+ {
+@@ -646,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+ }
+
+ if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
+- jiffies - tm->tcpm_stamp,
++ jiffies - READ_ONCE(tm->tcpm_stamp),
+ TCP_METRICS_ATTR_PAD) < 0)
+ goto nla_put_failure;
+
+@@ -657,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+ if (!nest)
+ goto nla_put_failure;
+ for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
+- u32 val = tm->tcpm_vals[i];
++ u32 val = tcp_metric_get(tm, i);
+
+ if (!val)
+ continue;
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index 0a4f3f16140ad..2606a5571116a 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -180,7 +180,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
+ * Oh well... nobody has a sufficient solution to this
+ * protocol bug yet.
+ */
+- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
++ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
+ kill:
+ inet_twsk_deschedule_put(tw);
+ return TCP_TW_SUCCESS;
+@@ -523,7 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+ newtp->max_window = newtp->snd_wnd;
+
+ if (newtp->rx_opt.tstamp_ok) {
+- newtp->rx_opt.ts_recent = req->ts_recent;
++ newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
+ newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+ newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ } else {
+@@ -538,7 +538,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+ newtp->tsoffset = treq->ts_off;
+ #ifdef CONFIG_TCP_MD5SIG
+ newtp->md5sig_info = NULL; /*XXX*/
+- if (newtp->af_specific->md5_lookup(sk, newsk))
++ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
+ newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+ #endif
+ if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
+@@ -565,6 +565,9 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
+ * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
+ *
+ * We don't need to initialize tmp_opt.sack_ok as we don't use the results
++ *
++ * Note: If @fastopen is true, this can be called from process context.
++ * Otherwise, this is from BH context.
+ */
+
+ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+@@ -583,7 +586,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+
+ if (tmp_opt.saw_tstamp) {
+- tmp_opt.ts_recent = req->ts_recent;
++ tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
+ if (tmp_opt.rcv_tsecr)
+ tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
+ /* We do not store true stamp, but it is not required,
+@@ -717,14 +720,17 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+ &tcp_rsk(req)->last_oow_ack_time))
+ req->rsk_ops->send_ack(sk, skb, req);
+ if (paws_reject)
+- __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
++ NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ return NULL;
+ }
+
+ /* In sequence, PAWS is OK. */
+
++ /* TODO: We probably should defer ts_recent change once
++ * we take ownership of @req.
++ */
+ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
+- req->ts_recent = tmp_opt.rcv_tsval;
++ WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
+
+ if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
+ /* Truncate SYN, it is out of window starting
+@@ -736,7 +742,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+ * "fourth, check the SYN bit"
+ */
+ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
+- __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
++ TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+ goto embryonic_reset;
+ }
+
+@@ -789,7 +795,7 @@ listen_overflow:
+ if (sk != req->rsk_listener)
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
+ inet_rsk(req)->acked = 1;
+ return NULL;
+ }
+diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
+index 95db7a11ba2ad..63024ec17b204 100644
+--- a/net/ipv4/tcp_nv.c
++++ b/net/ipv4/tcp_nv.c
+@@ -198,10 +198,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ }
+
+ if (ca->cwnd_growth_factor < 0) {
+- cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
++ cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor;
+ tcp_cong_avoid_ai(tp, cnt, acked);
+ } else {
+- cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
++ cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor);
+ tcp_cong_avoid_ai(tp, cnt, acked);
+ }
+ }
+@@ -210,7 +210,7 @@ static u32 tcpnv_recalc_ssthresh(struct sock *sk)
+ {
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+- return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
++ return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U);
+ }
+
+ static void tcpnv_state(struct sock *sk, u8 new_state)
+@@ -258,7 +258,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
+ return;
+
+ /* Stop cwnd growth if we were in catch up mode */
+- if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
++ if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) {
+ ca->nv_catchup = 0;
+ ca->nv_allow_cwnd_growth = 0;
+ }
+@@ -372,7 +372,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
+ * if cwnd < max_win, grow cwnd
+ * else leave the same
+ */
+- if (tp->snd_cwnd > max_win) {
++ if (tcp_snd_cwnd(tp) > max_win) {
+ /* there is congestion, check that it is ok
+ * to make a CA decision
+ * 1. We should have at least nv_dec_eval_min_calls
+@@ -399,20 +399,20 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
+ ca->nv_allow_cwnd_growth = 0;
+ tp->snd_ssthresh =
+ (nv_ssthresh_factor * max_win) >> 3;
+- if (tp->snd_cwnd - max_win > 2) {
++ if (tcp_snd_cwnd(tp) - max_win > 2) {
+ /* gap > 2, we do exponential cwnd decrease */
+ int dec;
+
+- dec = max(2U, ((tp->snd_cwnd - max_win) *
++ dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) *
+ nv_cong_dec_mult) >> 7);
+- tp->snd_cwnd -= dec;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec);
+ } else if (nv_cong_dec_mult > 0) {
+- tp->snd_cwnd = max_win;
++ tcp_snd_cwnd_set(tp, max_win);
+ }
+ if (ca->cwnd_growth_factor > 0)
+ ca->cwnd_growth_factor = 0;
+ ca->nv_no_cong_cnt = 0;
+- } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
++ } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) {
+ /* There is no congestion, grow cwnd if allowed*/
+ if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
+ return;
+@@ -445,8 +445,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
+ * (it wasn't before, if it is now is because nv
+ * decreased it).
+ */
+- if (tp->snd_cwnd < nv_min_cwnd)
+- tp->snd_cwnd = nv_min_cwnd;
++ if (tcp_snd_cwnd(tp) < nv_min_cwnd)
++ tcp_snd_cwnd_set(tp, nv_min_cwnd);
+ }
+ }
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 6d72f3ea48c4e..d46fb6d7057bd 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
+
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
+ tcp_skb_pcount(skb));
++ tcp_check_space(sk);
+ }
+
+ /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
+@@ -142,7 +143,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
+- u32 cwnd = tp->snd_cwnd;
++ u32 cwnd = tcp_snd_cwnd(tp);
+
+ tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
+
+@@ -151,7 +152,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
+
+ while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
+ cwnd >>= 1;
+- tp->snd_cwnd = max(cwnd, restart_cwnd);
++ tcp_snd_cwnd_set(tp, max(cwnd, restart_cwnd));
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ tp->snd_cwnd_used = 0;
+ }
+@@ -166,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
+ if (tcp_packets_in_flight(tp) == 0)
+ tcp_ca_event(sk, CA_EVENT_TX_START);
+
+- /* If this is the first data packet sent in response to the
+- * previous received data,
+- * and it is a reply for ato after last received packet,
+- * increase pingpong count.
+- */
+- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+- inet_csk_inc_pingpong_cnt(sk);
+-
+ tp->lsndtime = now;
++
++ /* If it is a reply for ato after last received
++ * packet, enter pingpong mode.
++ */
++ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
++ inet_csk_enter_pingpong_mode(sk);
+ }
+
+ /* Account for an ACK we sent. */
+@@ -240,8 +238,8 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
+ *rcv_wscale = 0;
+ if (wscale_ok) {
+ /* Set window scaling on max possible window */
+- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+- space = max_t(u32, space, sysctl_rmem_max);
++ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
++ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
+ space = min_t(u32, space, *window_clamp);
+ *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+ 0, TCP_MAX_WSCALE);
+@@ -323,7 +321,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
+- bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
++ bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
+ tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+
+ if (!use_ecn) {
+@@ -345,7 +343,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
+
+ static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
+ {
+- if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
+ /* tp->ecn_flags are cleared at a later point in time when
+ * SYN ACK is ultimatively being received.
+ */
+@@ -789,18 +787,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
+ opts->mss = tcp_advertise_mss(sk);
+ remaining -= TCPOLEN_MSS_ALIGNED;
+
+- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
++ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
+ opts->tsecr = tp->rx_opt.ts_recent;
+ remaining -= TCPOLEN_TSTAMP_ALIGNED;
+ }
+- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
++ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
+ opts->ws = tp->rx_opt.rcv_wscale;
+ opts->options |= OPTION_WSCALE;
+ remaining -= TCPOLEN_WSCALE_ALIGNED;
+ }
+- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
++ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
+ opts->options |= OPTION_SACK_ADVERTISE;
+ if (unlikely(!(OPTION_TS & opts->options)))
+ remaining -= TCPOLEN_SACKPERM_ALIGNED;
+@@ -877,7 +875,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
+ if (likely(ireq->tstamp_ok)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
+- opts->tsecr = req->ts_recent;
++ opts->tsecr = READ_ONCE(req->ts_recent);
+ remaining -= TCPOLEN_TSTAMP_ALIGNED;
+ }
+ if (likely(ireq->sack_ok)) {
+@@ -1014,7 +1012,7 @@ static void tcp_tsq_write(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->lost_out > tp->retrans_out &&
+- tp->snd_cwnd > tcp_packets_in_flight(tp)) {
++ tcp_snd_cwnd(tp) > tcp_packets_in_flight(tp)) {
+ tcp_mstamp_refresh(tp);
+ tcp_xmit_retransmit_queue(sk);
+ }
+@@ -1562,7 +1560,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ return -ENOMEM;
+ }
+
+- if (skb_unclone(skb, gfp))
++ if (skb_unclone_keeptruesize(skb, gfp))
+ return -ENOMEM;
+
+ /* Get a new skb... force flag on. */
+@@ -1672,7 +1670,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
+ {
+ u32 delta_truesize;
+
+- if (skb_unclone(skb, GFP_ATOMIC))
++ if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
+ return -ENOMEM;
+
+ delta_truesize = __pskb_trim_head(skb, len);
+@@ -1721,7 +1719,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
+ mss_now -= icsk->icsk_ext_hdr_len;
+
+ /* Then reserve room for full set of TCP options and 8 bytes of data */
+- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
++ mss_now = max(mss_now,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
+ return mss_now;
+ }
+
+@@ -1764,10 +1763,10 @@ void tcp_mtup_init(struct sock *sk)
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
+
+- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
++ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
+ icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
+ icsk->icsk_af_ops->net_header_len;
+- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
++ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
+ icsk->icsk_mtup.probe_size = 0;
+ if (icsk->icsk_mtup.enabled)
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
+@@ -1865,9 +1864,9 @@ static void tcp_cwnd_application_limited(struct sock *sk)
+ /* Limited by application or receiver window. */
+ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ u32 win_used = max(tp->snd_cwnd_used, init_win);
+- if (win_used < tp->snd_cwnd) {
++ if (win_used < tcp_snd_cwnd(tp)) {
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+- tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
++ tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1);
+ }
+ tp->snd_cwnd_used = 0;
+ }
+@@ -1879,15 +1878,20 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- /* Track the maximum number of outstanding packets in each
+- * window, and remember whether we were cwnd-limited then.
++ /* Track the strongest available signal of the degree to which the cwnd
++ * is fully utilized. If cwnd-limited then remember that fact for the
++ * current window. If not cwnd-limited then track the maximum number of
++ * outstanding packets in the current window. (If cwnd-limited then we
++ * chose to not update tp->max_packets_out to avoid an extra else
++ * clause with no functional impact.)
+ */
+- if (!before(tp->snd_una, tp->max_packets_seq) ||
+- tp->packets_out > tp->max_packets_out ||
+- is_cwnd_limited) {
+- tp->max_packets_out = tp->packets_out;
+- tp->max_packets_seq = tp->snd_nxt;
++ if (!before(tp->snd_una, tp->cwnd_usage_seq) ||
++ is_cwnd_limited ||
++ (!tp->is_cwnd_limited &&
++ tp->packets_out > tp->max_packets_out)) {
+ tp->is_cwnd_limited = is_cwnd_limited;
++ tp->max_packets_out = tp->packets_out;
++ tp->cwnd_usage_seq = tp->snd_nxt;
+ }
+
+ if (tcp_is_cwnd_limited(sk)) {
+@@ -1899,7 +1903,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
+ if (tp->packets_out > tp->snd_cwnd_used)
+ tp->snd_cwnd_used = tp->packets_out;
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
+ (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
+ !ca_ops->cong_control)
+ tcp_cwnd_application_limited(sk);
+@@ -1987,7 +1991,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+ return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+@@ -2039,7 +2043,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
+ return 1;
+
+ in_flight = tcp_packets_in_flight(tp);
+- cwnd = tp->snd_cwnd;
++ cwnd = tcp_snd_cwnd(tp);
+ if (in_flight >= cwnd)
+ return 0;
+
+@@ -2196,12 +2200,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+ in_flight = tcp_packets_in_flight(tp);
+
+ BUG_ON(tcp_skb_pcount(skb) <= 1);
+- BUG_ON(tp->snd_cwnd <= in_flight);
++ BUG_ON(tcp_snd_cwnd(tp) <= in_flight);
+
+ send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
+
+ /* From in_flight test above, we know that cwnd > in_flight. */
+- cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
++ cong_win = (tcp_snd_cwnd(tp) - in_flight) * tp->mss_cache;
+
+ limit = min(send_win, cong_win);
+
+@@ -2215,7 +2219,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+
+ win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
+ if (win_divisor) {
+- u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
++ u32 chunk = min(tp->snd_wnd, tcp_snd_cwnd(tp) * tp->mss_cache);
+
+ /* If at least some fraction of a window is available,
+ * just use it.
+@@ -2279,7 +2283,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
+ u32 interval;
+ s32 delta;
+
+- interval = net->ipv4.sysctl_tcp_probe_interval;
++ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
+ delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
+ if (unlikely(delta >= interval * HZ)) {
+ int mss = tcp_current_mss(sk);
+@@ -2343,7 +2347,7 @@ static int tcp_mtu_probe(struct sock *sk)
+ if (likely(!icsk->icsk_mtup.enabled ||
+ icsk->icsk_mtup.probe_size ||
+ inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
+- tp->snd_cwnd < 11 ||
++ tcp_snd_cwnd(tp) < 11 ||
+ tp->rx_opt.num_sacks || tp->rx_opt.dsack))
+ return -1;
+
+@@ -2361,7 +2365,7 @@ static int tcp_mtu_probe(struct sock *sk)
+ * probing process by not resetting search range to its orignal.
+ */
+ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
+- interval < net->ipv4.sysctl_tcp_probe_threshold) {
++ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
+ /* Check whether enough time has elaplased for
+ * another round of probing.
+ */
+@@ -2379,7 +2383,7 @@ static int tcp_mtu_probe(struct sock *sk)
+ return 0;
+
+ /* Do we need to wait to drain cwnd? With none in flight, don't stall */
+- if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
++ if (tcp_packets_in_flight(tp) + 2 > tcp_snd_cwnd(tp)) {
+ if (!tcp_packets_in_flight(tp))
+ return -1;
+ else
+@@ -2451,7 +2455,7 @@ static int tcp_mtu_probe(struct sock *sk)
+ if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
+ /* Decrement cwnd here because we are sending
+ * effectively two packets. */
+- tp->snd_cwnd--;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
+ tcp_event_new_data_sent(sk, nskb);
+
+ icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
+@@ -2504,7 +2508,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ limit = min_t(unsigned long, limit,
+- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
+ limit <<= factor;
+
+ if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+@@ -2708,7 +2712,7 @@ repair:
+ else
+ tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
+
+- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
++ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp));
+ if (likely(sent_pkts || is_cwnd_limited))
+ tcp_cwnd_validate(sk, is_cwnd_limited);
+
+@@ -2737,7 +2741,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
+ if (rcu_access_pointer(tp->fastopen_rsk))
+ return false;
+
+- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
++ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
+ /* Schedule a loss probe in 2*RTT for SACK capable connections
+ * not in loss recovery, that are either limited by cwnd or application.
+ */
+@@ -2818,7 +2822,7 @@ void tcp_send_loss_probe(struct sock *sk)
+ if (unlikely(!skb)) {
+ WARN_ONCE(tp->packets_out,
+ "invalid inflight: %u state %u cwnd %u mss %d\n",
+- tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
++ tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss);
+ inet_csk(sk)->icsk_pending = 0;
+ return;
+ }
+@@ -3106,7 +3110,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
+ struct sk_buff *skb = to, *tmp;
+ bool first = true;
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
+ return;
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ return;
+@@ -3146,7 +3150,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int cur_mss;
+ int diff, len, err;
+-
++ int avail_wnd;
+
+ /* Inconclusive MTU probe */
+ if (icsk->icsk_mtup.probe_size)
+@@ -3168,23 +3172,31 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
+ cur_mss = tcp_current_mss(sk);
++ avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
+
+ /* If receiver has shrunk his window, and skb is out of
+ * new window, do not retransmit it. The exception is the
+ * case, when window is shrunk to zero. In this case
+- * our retransmit serves as a zero window probe.
++ * our retransmit of one segment serves as a zero window probe.
+ */
+- if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
+- TCP_SKB_CB(skb)->seq != tp->snd_una)
+- return -EAGAIN;
++ if (avail_wnd <= 0) {
++ if (TCP_SKB_CB(skb)->seq != tp->snd_una)
++ return -EAGAIN;
++ avail_wnd = cur_mss;
++ }
+
+ len = cur_mss * segs;
++ if (len > avail_wnd) {
++ len = rounddown(avail_wnd, cur_mss);
++ if (!len)
++ len = avail_wnd;
++ }
+ if (skb->len > len) {
+ if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+ cur_mss, GFP_ATOMIC))
+ return -ENOMEM; /* We'll try again later. */
+ } else {
+- if (skb_unclone(skb, GFP_ATOMIC))
++ if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
+ return -ENOMEM;
+
+ diff = tcp_skb_pcount(skb);
+@@ -3192,8 +3204,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
+ diff -= tcp_skb_pcount(skb);
+ if (diff)
+ tcp_adjust_pcount(sk, skb, diff);
+- if (skb->len < cur_mss)
+- tcp_retrans_try_collapse(sk, skb, cur_mss);
++ avail_wnd = min_t(int, avail_wnd, cur_mss);
++ if (skb->len < avail_wnd)
++ tcp_retrans_try_collapse(sk, skb, avail_wnd);
+ }
+
+ /* RFC3168, section 6.1.1.1. ECN fallback */
+@@ -3307,7 +3320,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
+ if (!hole)
+ tp->retransmit_skb_hint = skb;
+
+- segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
++ segs = tcp_snd_cwnd(tp) - tcp_packets_in_flight(tp);
+ if (segs <= 0)
+ break;
+ sacked = TCP_SKB_CB(skb)->sacked;
+@@ -3364,11 +3377,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
+ */
+ void sk_forced_mem_schedule(struct sock *sk, int size)
+ {
+- int amt;
++ int delta, amt;
+
+- if (size <= sk->sk_forward_alloc)
++ delta = size - sk->sk_forward_alloc;
++ if (delta <= 0)
+ return;
+- amt = sk_mem_pages(size);
++ amt = sk_mem_pages(delta);
+ sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
+ sk_memory_allocated_add(sk, amt);
+
+@@ -3596,7 +3610,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
+ th->window = htons(min(req->rsk_rcv_wnd, 65535U));
+ tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ th->doff = (tcp_header_size >> 2);
+- __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
++ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+
+ #ifdef CONFIG_TCP_MD5SIG
+ /* Okay, we have all we need - do the md5 hash if needed */
+@@ -3647,7 +3661,7 @@ static void tcp_connect_init(struct sock *sk)
+ * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
+ */
+ tp->tcp_header_len = sizeof(struct tcphdr);
+- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
+ tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+
+ #ifdef CONFIG_TCP_MD5SIG
+@@ -3683,7 +3697,7 @@ static void tcp_connect_init(struct sock *sk)
+ tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+ &tp->rcv_wnd,
+ &tp->window_clamp,
+- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
+ &rcv_wscale,
+ rcv_wnd);
+
+@@ -3734,6 +3748,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
+ */
+ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
+ {
++ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_fastopen_request *fo = tp->fastopen_req;
+ int space, err = 0;
+@@ -3748,8 +3763,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
+ * private TCP options. The cost is reduced data space in SYN :(
+ */
+ tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
++ /* Sync mss_cache after updating the mss_clamp */
++ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+
+- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
++ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
+ MAX_TCP_OPTION_SPACE;
+
+ space = min_t(size_t, space, fo->size);
+@@ -4088,7 +4105,7 @@ void tcp_send_probe0(struct sock *sk)
+
+ icsk->icsk_probes_out++;
+ if (err <= 0) {
+- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
++ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
+ icsk->icsk_backoff++;
+ timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
+ } else {
+@@ -4112,8 +4129,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
+ res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
+ NULL);
+ if (!res) {
+- __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
++ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
++ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ if (unlikely(tcp_passive_fastopen(sk)))
+ tcp_sk(sk)->total_retrans++;
+ trace_tcp_retransmit_synack(sk, req);
+diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
+index 0de6935659635..042e27f541162 100644
+--- a/net/ipv4/tcp_rate.c
++++ b/net/ipv4/tcp_rate.c
+@@ -73,26 +73,31 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
+ *
+ * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
+ * called multiple times. We favor the information from the most recently
+- * sent skb, i.e., the skb with the highest prior_delivered count.
++ * sent skb, i.e., the skb with the most recently sent time and the highest
++ * sequence.
+ */
+ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
+ struct rate_sample *rs)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
++ u64 tx_tstamp;
+
+ if (!scb->tx.delivered_mstamp)
+ return;
+
++ tx_tstamp = tcp_skb_timestamp_us(skb);
+ if (!rs->prior_delivered ||
+- after(scb->tx.delivered, rs->prior_delivered)) {
++ tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
++ scb->end_seq, rs->last_end_seq)) {
+ rs->prior_delivered = scb->tx.delivered;
+ rs->prior_mstamp = scb->tx.delivered_mstamp;
+ rs->is_app_limited = scb->tx.is_app_limited;
+ rs->is_retrans = scb->sacked & TCPCB_RETRANS;
++ rs->last_end_seq = scb->end_seq;
+
+ /* Record send time of most recently ACKed packet: */
+- tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
++ tp->first_tx_mstamp = tx_tstamp;
+ /* Find the duration of the "send phase" of this window: */
+ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
+ scb->tx.first_tx_mstamp);
+@@ -189,7 +194,7 @@ void tcp_rate_check_app_limited(struct sock *sk)
+ /* Nothing in sending host's qdisc queues or NIC tx queue. */
+ sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) &&
+ /* We are not limited by CWND. */
+- tcp_packets_in_flight(tp) < tp->snd_cwnd &&
++ tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) &&
+ /* All lost packets have been retransmitted. */
+ tp->lost_out <= tp->retrans_out)
+ tp->app_limited =
+diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
+index fd113f6226efc..ac14216f6204f 100644
+--- a/net/ipv4/tcp_recovery.c
++++ b/net/ipv4/tcp_recovery.c
+@@ -19,7 +19,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
+ return 0;
+
+ if (tp->sacked_out >= tp->reordering &&
+- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
++ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
++ TCP_RACK_NO_DUPTHRESH))
+ return 0;
+ }
+
+@@ -192,7 +193,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
++ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
++ TCP_RACK_STATIC_REO_WND) ||
+ !rs->prior_delivered)
+ return;
+
+diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
+index 5842081bc8a25..862b96248a92d 100644
+--- a/net/ipv4/tcp_scalable.c
++++ b/net/ipv4/tcp_scalable.c
+@@ -27,7 +27,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ if (!acked)
+ return;
+ }
+- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
++ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
+ acked);
+ }
+
+@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
+ {
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+- return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
++ return max(tcp_snd_cwnd(tp) - (tcp_snd_cwnd(tp)>>TCP_SCALABLE_MD_SCALE), 2U);
+ }
+
+ static struct tcp_congestion_ops tcp_scalable __read_mostly = {
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 20cf4a98c69d8..40a354dcfec5a 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
+ */
+ static int tcp_orphan_retries(struct sock *sk, bool alive)
+ {
+- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
++ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
+
+ /* We know from an ICMP that something is wrong. */
+ if (sk->sk_err_soft && !alive)
+@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
+ int mss;
+
+ /* Black hole detection */
+- if (!net->ipv4.sysctl_tcp_mtu_probing)
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
+ return;
+
+ if (!icsk->icsk_mtup.enabled) {
+@@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
+ } else {
+ mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
+- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
+- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
++ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
++ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
++ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+ }
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+@@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk)
+ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+ if (icsk->icsk_retransmits)
+ __dst_negative_advice(sk);
+- retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
++ retry_until = icsk->icsk_syn_retries ? :
++ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
+ expired = icsk->icsk_retransmits >= retry_until;
+ } else {
+- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
++ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
+ /* Black hole detection */
+ tcp_mtu_probing(icsk, sk);
+
+ __dst_negative_advice(sk);
+ }
+
+- retry_until = net->ipv4.sysctl_tcp_retries2;
++ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
+ if (sock_flag(sk, SOCK_DEAD)) {
+ const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
+
+@@ -380,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk)
+ msecs_to_jiffies(icsk->icsk_user_timeout))
+ goto abort;
+
+- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
++ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
+ if (sock_flag(sk, SOCK_DEAD)) {
+ const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
+
+@@ -406,12 +407,15 @@ abort: tcp_write_err(sk);
+ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+- int max_retries = icsk->icsk_syn_retries ? :
+- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
+ struct tcp_sock *tp = tcp_sk(sk);
++ int max_retries;
+
+ req->rsk_ops->syn_ack_timeout(req);
+
++ /* add one more retry for fastopen */
++ max_retries = icsk->icsk_syn_retries ? :
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
++
+ if (req->num_timeout >= max_retries) {
+ tcp_write_err(sk);
+ return;
+@@ -433,6 +437,22 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
+ TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
+ }
+
++static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
++ const struct sk_buff *skb)
++{
++ const struct tcp_sock *tp = tcp_sk(sk);
++ const int timeout = TCP_RTO_MAX * 2;
++ u32 rcv_delta, rtx_delta;
++
++ rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
++ if (rcv_delta <= timeout)
++ return false;
++
++ rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
++ (tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
++
++ return rtx_delta > timeout;
++}
+
+ /**
+ * tcp_retransmit_timer() - The TCP retransmit timeout handler
+@@ -498,7 +518,7 @@ void tcp_retransmit_timer(struct sock *sk)
+ tp->snd_una, tp->snd_nxt);
+ }
+ #endif
+- if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
++ if (tcp_rtx_probe0_timed_out(sk, skb)) {
+ tcp_write_err(sk);
+ goto out;
+ }
+@@ -574,18 +594,20 @@ out_reset_timer:
+ * linear-timeout retransmissions into a black hole
+ */
+ if (sk->sk_state == TCP_ESTABLISHED &&
+- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
++ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
+ tcp_stream_is_thin(tp) &&
+ icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
+ icsk->icsk_backoff = 0;
+- icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
++ icsk->icsk_rto = clamp(__tcp_set_rto(tp),
++ tcp_rto_min(sk),
++ TCP_RTO_MAX);
+ } else {
+ /* Use normal (exponential) backoff */
+ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ }
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
+- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
++ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
+ __sk_dst_reset(sk);
+
+ out:;
+diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
+index 7c27aa629af19..8e135af0d4f70 100644
+--- a/net/ipv4/tcp_ulp.c
++++ b/net/ipv4/tcp_ulp.c
+@@ -136,6 +136,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
+ if (icsk->icsk_ulp_ops)
+ goto out_err;
+
++ err = -ENOTCONN;
++ if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN)
++ goto out_err;
++
+ err = ulp_ops->init(sk);
+ if (err)
+ goto out_err;
+diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
+index c8003c8aad2c0..786848ad37ea8 100644
+--- a/net/ipv4/tcp_vegas.c
++++ b/net/ipv4/tcp_vegas.c
+@@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
+
+ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
+ {
+- return min(tp->snd_ssthresh, tp->snd_cwnd);
++ return min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
+ }
+
+ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+@@ -217,14 +217,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ * This is:
+ * (actual rate in segments) * baseRTT
+ */
+- target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
++ target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT;
+ do_div(target_cwnd, rtt);
+
+ /* Calculate the difference between the window we had,
+ * and the window we would like to have. This quantity
+ * is the "Diff" from the Arizona Vegas papers.
+ */
+- diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
++ diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT;
+
+ if (diff > gamma && tcp_in_slow_start(tp)) {
+ /* Going too fast. Time to slow down
+@@ -238,7 +238,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ * truncation robs us of full link
+ * utilization.
+ */
+- tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
++ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
++ (u32)target_cwnd + 1));
+ tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
+
+ } else if (tcp_in_slow_start(tp)) {
+@@ -254,14 +255,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ /* The old window was too fast, so
+ * we slow down.
+ */
+- tp->snd_cwnd--;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
+ tp->snd_ssthresh
+ = tcp_vegas_ssthresh(tp);
+ } else if (diff < alpha) {
+ /* We don't have enough extra packets
+ * in the network, so speed up.
+ */
+- tp->snd_cwnd++;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ } else {
+ /* Sending just as fast as we
+ * should be.
+@@ -269,10 +270,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ }
+ }
+
+- if (tp->snd_cwnd < 2)
+- tp->snd_cwnd = 2;
+- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+- tp->snd_cwnd = tp->snd_cwnd_clamp;
++ if (tcp_snd_cwnd(tp) < 2)
++ tcp_snd_cwnd_set(tp, 2);
++ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
++ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
+
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ }
+diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
+index cd50a61c9976d..366ff6f214b2e 100644
+--- a/net/ipv4/tcp_veno.c
++++ b/net/ipv4/tcp_veno.c
+@@ -146,11 +146,11 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+
+ rtt = veno->minrtt;
+
+- target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
++ target_cwnd = (u64)tcp_snd_cwnd(tp) * veno->basertt;
+ target_cwnd <<= V_PARAM_SHIFT;
+ do_div(target_cwnd, rtt);
+
+- veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
++ veno->diff = (tcp_snd_cwnd(tp) << V_PARAM_SHIFT) - target_cwnd;
+
+ if (tcp_in_slow_start(tp)) {
+ /* Slow start. */
+@@ -164,15 +164,15 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ /* In the "non-congestive state", increase cwnd
+ * every rtt.
+ */
+- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
++ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
+ } else {
+ /* In the "congestive state", increase cwnd
+ * every other rtt.
+ */
+- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
++ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ if (veno->inc &&
+- tp->snd_cwnd < tp->snd_cwnd_clamp) {
+- tp->snd_cwnd++;
++ tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
+ veno->inc = 0;
+ } else
+ veno->inc = 1;
+@@ -181,10 +181,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ tp->snd_cwnd_cnt += acked;
+ }
+ done:
+- if (tp->snd_cwnd < 2)
+- tp->snd_cwnd = 2;
+- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+- tp->snd_cwnd = tp->snd_cwnd_clamp;
++ if (tcp_snd_cwnd(tp) < 2)
++ tcp_snd_cwnd_set(tp, 2);
++ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
++ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
+ }
+ /* Wipe the slate clean for the next rtt. */
+ /* veno->cntrtt = 0; */
+@@ -199,10 +199,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
+
+ if (veno->diff < beta)
+ /* in "non-congestive state", cut cwnd by 1/5 */
+- return max(tp->snd_cwnd * 4 / 5, 2U);
++ return max(tcp_snd_cwnd(tp) * 4 / 5, 2U);
+ else
+ /* in "congestive state", cut cwnd by 1/2 */
+- return max(tp->snd_cwnd >> 1U, 2U);
++ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
+ }
+
+ static struct tcp_congestion_ops tcp_veno __read_mostly = {
+diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
+index b2e05c4cea00f..c6e97141eef25 100644
+--- a/net/ipv4/tcp_westwood.c
++++ b/net/ipv4/tcp_westwood.c
+@@ -244,7 +244,8 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
+
+ switch (event) {
+ case CA_EVENT_COMPLETE_CWR:
+- tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
++ tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
++ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
+ break;
+ case CA_EVENT_LOSS:
+ tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
+index 07c4c93b9fdb6..18b07ff5d20e6 100644
+--- a/net/ipv4/tcp_yeah.c
++++ b/net/ipv4/tcp_yeah.c
+@@ -71,11 +71,11 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+
+ if (!yeah->doing_reno_now) {
+ /* Scalable */
+- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
++ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
+ acked);
+ } else {
+ /* Reno */
+- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
++ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
+ }
+
+ /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
+@@ -130,7 +130,7 @@ do_vegas:
+ /* Compute excess number of packets above bandwidth
+ * Avoid doing full 64 bit divide.
+ */
+- bw = tp->snd_cwnd;
++ bw = tcp_snd_cwnd(tp);
+ bw *= rtt - yeah->vegas.baseRTT;
+ do_div(bw, rtt);
+ queue = bw;
+@@ -138,20 +138,20 @@ do_vegas:
+ if (queue > TCP_YEAH_ALPHA ||
+ rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
+ if (queue > TCP_YEAH_ALPHA &&
+- tp->snd_cwnd > yeah->reno_count) {
++ tcp_snd_cwnd(tp) > yeah->reno_count) {
+ u32 reduction = min(queue / TCP_YEAH_GAMMA ,
+- tp->snd_cwnd >> TCP_YEAH_EPSILON);
++ tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON);
+
+- tp->snd_cwnd -= reduction;
++ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction);
+
+- tp->snd_cwnd = max(tp->snd_cwnd,
+- yeah->reno_count);
++ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp),
++ yeah->reno_count));
+
+- tp->snd_ssthresh = tp->snd_cwnd;
++ tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ }
+
+ if (yeah->reno_count <= 2)
+- yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
++ yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U);
+ else
+ yeah->reno_count++;
+
+@@ -176,7 +176,7 @@ do_vegas:
+ */
+ yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
+ yeah->vegas.beg_snd_nxt = tp->snd_nxt;
+- yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
++ yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp);
+
+ /* Wipe the slate clean for the next RTT. */
+ yeah->vegas.cntRTT = 0;
+@@ -193,16 +193,16 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
+ if (yeah->doing_reno_now < TCP_YEAH_RHO) {
+ reduction = yeah->lastQ;
+
+- reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
++ reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U));
+
+- reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
++ reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA);
+ } else
+- reduction = max(tp->snd_cwnd>>1, 2U);
++ reduction = max(tcp_snd_cwnd(tp)>>1, 2U);
+
+ yeah->fast_count = 0;
+ yeah->reno_count = max(yeah->reno_count>>1, 2U);
+
+- return max_t(int, tp->snd_cwnd - reduction, 2);
++ return max_t(int, tcp_snd_cwnd(tp) - reduction, 2);
+ }
+
+ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 2fffcf2b54f3f..198d8e07413d3 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -443,14 +443,24 @@ static struct sock *udp4_lib_lookup2(struct net *net,
+ score = compute_score(sk, net, saddr, sport,
+ daddr, hnum, dif, sdif);
+ if (score > badness) {
+- result = lookup_reuseport(net, sk, skb,
+- saddr, sport, daddr, hnum);
++ badness = score;
++ result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
++ if (!result) {
++ result = sk;
++ continue;
++ }
++
+ /* Fall back to scoring if group has connections */
+- if (result && !reuseport_has_conns(sk, false))
++ if (!reuseport_has_conns(sk))
+ return result;
+
+- result = result ? : sk;
+- badness = score;
++ /* Reuseport logic returned an error, keep original score. */
++ if (IS_ERR(result))
++ continue;
++
++ badness = compute_score(result, net, saddr, sport,
++ daddr, hnum, dif, sdif);
++
+ }
+ }
+ return result;
+@@ -781,6 +791,8 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
+ */
+ if (tunnel) {
+ /* ...not for tunnels though: we don't have a sending socket */
++ if (udp_sk(sk)->encap_err_rcv)
++ udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2);
+ goto out;
+ }
+ if (!inet->recverr) {
+@@ -917,7 +929,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
++ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+@@ -1594,7 +1606,7 @@ drop:
+ }
+ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
+
+-void udp_destruct_sock(struct sock *sk)
++void udp_destruct_common(struct sock *sk)
+ {
+ /* reclaim completely the forward allocated memory */
+ struct udp_sock *up = udp_sk(sk);
+@@ -1607,10 +1619,14 @@ void udp_destruct_sock(struct sock *sk)
+ kfree_skb(skb);
+ }
+ udp_rmem_release(sk, total, 0, true);
++}
++EXPORT_SYMBOL_GPL(udp_destruct_common);
+
++static void udp_destruct_sock(struct sock *sk)
++{
++ udp_destruct_common(sk);
+ inet_sock_destruct(sk);
+ }
+-EXPORT_SYMBOL_GPL(udp_destruct_sock);
+
+ int udp_init_sock(struct sock *sk)
+ {
+@@ -1618,7 +1634,6 @@ int udp_init_sock(struct sock *sk)
+ sk->sk_destruct = udp_destruct_sock;
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(udp_init_sock);
+
+ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
+ {
+@@ -1808,6 +1823,17 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ skb = skb_recv_udp(sk, 0, 1, &err);
+ if (!skb)
+ return err;
++
++ if (udp_lib_checksum_complete(skb)) {
++ __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
++ IS_UDPLITE(sk));
++ __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
++ IS_UDPLITE(sk));
++ atomic_inc(&sk->sk_drops);
++ kfree_skb(skb);
++ continue;
++ }
++
+ used = recv_actor(desc, skb, 0, skb->len);
+ if (used <= 0) {
+ if (!copied)
+@@ -2240,7 +2266,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ struct dst_entry *old;
+
+ if (dst_hold_safe(dst)) {
+- old = xchg(&sk->sk_rx_dst, dst);
++ old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst);
+ dst_release(old);
+ return old != dst;
+ }
+@@ -2400,6 +2426,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ __be32 saddr, daddr;
+ struct net *net = dev_net(skb->dev);
+ bool refcounted;
++ int drop_reason;
++
++ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+
+ /*
+ * Validate the packet.
+@@ -2430,7 +2459,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ struct dst_entry *dst = skb_dst(skb);
+ int ret;
+
+- if (unlikely(sk->sk_rx_dst != dst))
++ if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
+ udp_sk_rx_dst_set(sk, dst);
+
+ ret = udp_unicast_rcv_skb(sk, skb, uh);
+@@ -2455,6 +2484,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
++ drop_reason = SKB_DROP_REASON_NO_SOCKET;
+ __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+@@ -2462,10 +2492,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ * Hmm. We got an UDP packet to a port to which we
+ * don't wanna listen. Ignore it.
+ */
+- kfree_skb(skb);
++ kfree_skb_reason(skb, drop_reason);
+ return 0;
+
+ short_packet:
++ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
+ net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
+ proto == IPPROTO_UDPLITE ? "Lite" : "",
+ &saddr, ntohs(uh->source),
+@@ -2478,6 +2509,7 @@ csum_error:
+ * RFC1122: OK. Discards the bad packet silently (as far as
+ * the network is concerned, anyway) as per 4.1.3.4 (MUST).
+ */
++ drop_reason = SKB_DROP_REASON_UDP_CSUM;
+ net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
+ proto == IPPROTO_UDPLITE ? "Lite" : "",
+ &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
+@@ -2485,7 +2517,7 @@ csum_error:
+ __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
+ drop:
+ __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+- kfree_skb(skb);
++ kfree_skb_reason(skb, drop_reason);
+ return 0;
+ }
+
+@@ -2537,8 +2569,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
+ struct sock *sk;
+
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+- if (INET_MATCH(sk, net, acookie, rmt_addr,
+- loc_addr, ports, dif, sdif))
++ if (INET_MATCH(net, sk, acookie, ports, dif, sdif))
+ return sk;
+ /* Only check first socket in chain */
+ break;
+@@ -2589,7 +2620,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
+
+ skb->sk = sk;
+ skb->destructor = sock_efree;
+- dst = READ_ONCE(sk->sk_rx_dst);
++ dst = rcu_dereference(sk->sk_rx_dst);
+
+ if (dst)
+ dst = dst_check(dst, 0);
+@@ -3065,7 +3096,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
+ {
+ seq_setwidth(seq, 127);
+ if (v == SEQ_START_TOKEN)
+- seq_puts(seq, " sl local_address rem_address st tx_queue "
++ seq_puts(seq, " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout "
+ "inode ref pointer drops");
+ else {
+diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
+index bbe6569c9ad34..56e1047632f6b 100644
+--- a/net/ipv4/udp_bpf.c
++++ b/net/ipv4/udp_bpf.c
+@@ -69,6 +69,9 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
++ if (!len)
++ return 0;
++
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
+index b97e3635acf50..1ff5b8e30bb92 100644
+--- a/net/ipv4/udp_tunnel_core.c
++++ b/net/ipv4/udp_tunnel_core.c
+@@ -75,6 +75,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+
+ udp_sk(sk)->encap_type = cfg->encap_type;
+ udp_sk(sk)->encap_rcv = cfg->encap_rcv;
++ udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv;
+ udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
+ udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+ udp_sk(sk)->gro_receive = cfg->gro_receive;
+@@ -178,6 +179,7 @@ EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
+ void udp_tunnel_sock_release(struct socket *sock)
+ {
+ rcu_assign_sk_user_data(sock->sk, NULL);
++ synchronize_rcu();
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sock_release(sock);
+ }
+diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
+index b91003538d87a..bc3a043a5d5c7 100644
+--- a/net/ipv4/udp_tunnel_nic.c
++++ b/net/ipv4/udp_tunnel_nic.c
+@@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
+ list_for_each_entry(node, &info->shared->devices, list)
+ if (node->dev == dev)
+ break;
+- if (node->dev != dev)
++ if (list_entry_is_head(node, &info->shared->devices, list))
+ return;
+
+ list_del(&node->list);
+diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
+index cd1cd68adeec8..03382c11c1623 100644
+--- a/net/ipv4/udplite.c
++++ b/net/ipv4/udplite.c
+@@ -17,6 +17,14 @@
+ struct udp_table udplite_table __read_mostly;
+ EXPORT_SYMBOL(udplite_table);
+
++/* Designate sk as UDP-Lite socket */
++static int udplite_sk_init(struct sock *sk)
++{
++ udp_init_sock(sk);
++ udp_sk(sk)->pcflag = UDPLITE_BIT;
++ return 0;
++}
++
+ static int udplite_rcv(struct sk_buff *skb)
+ {
+ return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
+@@ -53,6 +61,8 @@ struct proto udplite_prot = {
+ .get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
++ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
++ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp_sock),
+ .h.udp_table = &udplite_table,
+ };
+diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
+index ad2afeef4f106..eac206a290d05 100644
+--- a/net/ipv4/xfrm4_input.c
++++ b/net/ipv4/xfrm4_input.c
+@@ -164,6 +164,7 @@ drop:
+ kfree_skb(skb);
+ return 0;
+ }
++EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
+
+ int xfrm4_rcv(struct sk_buff *skb)
+ {
+diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
+index 2fe5860c21d6e..b146ce88c5d0c 100644
+--- a/net/ipv4/xfrm4_protocol.c
++++ b/net/ipv4/xfrm4_protocol.c
+@@ -304,4 +304,3 @@ void __init xfrm4_protocol_init(void)
+ {
+ xfrm_input_register_afinfo(&xfrm4_input_afinfo);
+ }
+-EXPORT_SYMBOL(xfrm4_protocol_init);
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index c6a90b7bbb70e..6572174e2115f 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -323,9 +323,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
+ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
+ unsigned long when)
+ {
+- if (!timer_pending(&idev->rs_timer))
++ if (!mod_timer(&idev->rs_timer, jiffies + when))
+ in6_dev_hold(idev);
+- mod_timer(&idev->rs_timer, jiffies + when);
+ }
+
+ static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+@@ -552,7 +551,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
+ #ifdef CONFIG_IPV6_MROUTE
+ if ((all || type == NETCONFA_MC_FORWARDING) &&
+ nla_put_s32(skb, NETCONFA_MC_FORWARDING,
+- devconf->mc_forwarding) < 0)
++ atomic_read(&devconf->mc_forwarding)) < 0)
+ goto nla_put_failure;
+ #endif
+ if ((all || type == NETCONFA_PROXY_NEIGH) &&
+@@ -798,6 +797,7 @@ static void dev_forward_change(struct inet6_dev *idev)
+ {
+ struct net_device *dev;
+ struct inet6_ifaddr *ifa;
++ LIST_HEAD(tmp_addr_list);
+
+ if (!idev)
+ return;
+@@ -816,14 +816,24 @@ static void dev_forward_change(struct inet6_dev *idev)
+ }
+ }
+
++ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ if (ifa->flags&IFA_F_TENTATIVE)
+ continue;
++ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
++ }
++ read_unlock_bh(&idev->lock);
++
++ while (!list_empty(&tmp_addr_list)) {
++ ifa = list_first_entry(&tmp_addr_list,
++ struct inet6_ifaddr, if_list_aux);
++ list_del(&ifa->if_list_aux);
+ if (idev->cnf.forwarding)
+ addrconf_join_anycast(ifa);
+ else
+ addrconf_leave_anycast(ifa);
+ }
++
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
+ dev->ifindex, &idev->cnf);
+@@ -1100,10 +1110,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
+ goto out;
+ }
+
+- if (net->ipv6.devconf_all->disable_policy ||
+- idev->cnf.disable_policy)
+- f6i->dst_nopolicy = true;
+-
+ neigh_parms_data_state_setall(idev->nd_parms);
+
+ ifa->addr = *cfg->pfx;
+@@ -1362,7 +1368,7 @@ retry:
+ * idev->desync_factor if it's larger
+ */
+ cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
+- max_desync_factor = min_t(__u32,
++ max_desync_factor = min_t(long,
+ idev->cnf.max_desync_factor,
+ cnf_temp_preferred_lft - regen_advance);
+
+@@ -1837,8 +1843,8 @@ out:
+ }
+ EXPORT_SYMBOL(ipv6_dev_get_saddr);
+
+-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+- u32 banned_flags)
++static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
++ u32 banned_flags)
+ {
+ struct inet6_ifaddr *ifp;
+ int err = -EADDRNOTAVAIL;
+@@ -2559,12 +2565,18 @@ static void manage_tempaddrs(struct inet6_dev *idev,
+ ipv6_ifa_notify(0, ift);
+ }
+
+- if ((create || list_empty(&idev->tempaddr_list)) &&
+- idev->cnf.use_tempaddr > 0) {
++ /* Also create a temporary address if it's enabled but no temporary
++ * address currently exists.
++ * However, we get called with valid_lft == 0, prefered_lft == 0, create == false
++ * as part of cleanup (ie. deleting the mngtmpaddr).
++ * We don't want that to result in creating a new temporary ip address.
++ */
++ if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
++ create = true;
++
++ if (create && idev->cnf.use_tempaddr > 0) {
+ /* When a new public address is created as described
+ * in [ADDRCONF], also create a new temporary address.
+- * Also create a temporary address if it's enabled but
+- * no temporary address currently exists.
+ */
+ read_unlock_bh(&idev->lock);
+ ipv6_create_tempaddr(ifp, false);
+@@ -2587,7 +2599,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ __u32 valid_lft, u32 prefered_lft)
+ {
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
+- int create = 0;
++ int create = 0, update_lft = 0;
+
+ if (!ifp && valid_lft) {
+ int max_addresses = in6_dev->cnf.max_addresses;
+@@ -2631,19 +2643,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ unsigned long now;
+ u32 stored_lft;
+
+- /* Update lifetime (RFC4862 5.5.3 e)
+- * We deviate from RFC4862 by honoring all Valid Lifetimes to
+- * improve the reaction of SLAAC to renumbering events
+- * (draft-gont-6man-slaac-renum-06, Section 4.2)
+- */
++ /* update lifetime (RFC2462 5.5.3 e) */
+ spin_lock_bh(&ifp->lock);
+ now = jiffies;
+ if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+ stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+ else
+ stored_lft = 0;
+-
+ if (!create && stored_lft) {
++ const u32 minimum_lft = min_t(u32,
++ stored_lft, MIN_VALID_LIFETIME);
++ valid_lft = max(valid_lft, minimum_lft);
++
++ /* RFC4862 Section 5.5.3e:
++ * "Note that the preferred lifetime of the
++ * corresponding address is always reset to
++ * the Preferred Lifetime in the received
++ * Prefix Information option, regardless of
++ * whether the valid lifetime is also reset or
++ * ignored."
++ *
++ * So we should always update prefered_lft here.
++ */
++ update_lft = 1;
++ }
++
++ if (update_lft) {
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = now;
+@@ -3109,14 +3134,17 @@ static void add_v4_addrs(struct inet6_dev *idev)
+ offset = sizeof(struct in6_addr) - 4;
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
+
+- if (idev->dev->flags&IFF_POINTOPOINT) {
+- addr.s6_addr32[0] = htonl(0xfe800000);
+- scope = IFA_LINK;
+- plen = 64;
+- } else {
++ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
+ scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
++ } else {
++ if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
++ return;
++
++ addr.s6_addr32[0] = htonl(0xfe800000);
++ scope = IFA_LINK;
++ plen = 64;
+ }
+
+ if (addr.s6_addr32[3]) {
+@@ -3424,6 +3452,30 @@ static void addrconf_gre_config(struct net_device *dev)
+ }
+ #endif
+
++static void addrconf_init_auto_addrs(struct net_device *dev)
++{
++ switch (dev->type) {
++#if IS_ENABLED(CONFIG_IPV6_SIT)
++ case ARPHRD_SIT:
++ addrconf_sit_config(dev);
++ break;
++#endif
++#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
++ case ARPHRD_IP6GRE:
++ case ARPHRD_IPGRE:
++ addrconf_gre_config(dev);
++ break;
++#endif
++ case ARPHRD_LOOPBACK:
++ init_loopback(dev);
++ break;
++
++ default:
++ addrconf_dev_config(dev);
++ break;
++ }
++}
++
+ static int fixup_permanent_addr(struct net *net,
+ struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp)
+@@ -3588,26 +3640,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
+ run_pending = 1;
+ }
+
+- switch (dev->type) {
+-#if IS_ENABLED(CONFIG_IPV6_SIT)
+- case ARPHRD_SIT:
+- addrconf_sit_config(dev);
+- break;
+-#endif
+-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+- case ARPHRD_IP6GRE:
+- case ARPHRD_IPGRE:
+- addrconf_gre_config(dev);
+- break;
+-#endif
+- case ARPHRD_LOOPBACK:
+- init_loopback(dev);
+- break;
+-
+- default:
+- addrconf_dev_config(dev);
+- break;
+- }
++ addrconf_init_auto_addrs(dev);
+
+ if (!IS_ERR_OR_NULL(idev)) {
+ if (run_pending)
+@@ -3712,8 +3745,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
+ unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
+ struct net *net = dev_net(dev);
+ struct inet6_dev *idev;
+- struct inet6_ifaddr *ifa, *tmp;
++ struct inet6_ifaddr *ifa;
++ LIST_HEAD(tmp_addr_list);
+ bool keep_addr = false;
++ bool was_ready;
+ int state, i;
+
+ ASSERT_RTNL();
+@@ -3779,7 +3814,10 @@ restart:
+
+ addrconf_del_rs_timer(idev);
+
+- /* Step 2: clear flags for stateless addrconf */
++ /* Step 2: clear flags for stateless addrconf, repeated down
++ * detection
++ */
++ was_ready = idev->if_flags & IF_READY;
+ if (!unregister)
+ idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
+
+@@ -3800,16 +3838,23 @@ restart:
+ write_lock_bh(&idev->lock);
+ }
+
+- list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
++ list_for_each_entry(ifa, &idev->addr_list, if_list)
++ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
++ write_unlock_bh(&idev->lock);
++
++ while (!list_empty(&tmp_addr_list)) {
+ struct fib6_info *rt = NULL;
+ bool keep;
+
++ ifa = list_first_entry(&tmp_addr_list,
++ struct inet6_ifaddr, if_list_aux);
++ list_del(&ifa->if_list_aux);
++
+ addrconf_del_dad_work(ifa);
+
+ keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
+ !addr_is_local(&ifa->addr);
+
+- write_unlock_bh(&idev->lock);
+ spin_lock_bh(&ifa->lock);
+
+ if (keep) {
+@@ -3840,20 +3885,19 @@ restart:
+ addrconf_leave_solict(ifa->idev, &ifa->addr);
+ }
+
+- write_lock_bh(&idev->lock);
+ if (!keep) {
++ write_lock_bh(&idev->lock);
+ list_del_rcu(&ifa->if_list);
++ write_unlock_bh(&idev->lock);
+ in6_ifa_put(ifa);
+ }
+ }
+
+- write_unlock_bh(&idev->lock);
+-
+ /* Step 5: Discard anycast and multicast list */
+ if (unregister) {
+ ipv6_ac_destroy_dev(idev);
+ ipv6_mc_destroy_dev(idev);
+- } else {
++ } else if (was_ready) {
+ ipv6_mc_down(idev);
+ }
+
+@@ -4181,7 +4225,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
+ send_rs = send_mld &&
+ ipv6_accept_ra(ifp->idev) &&
+ ifp->idev->cnf.rtr_solicits != 0 &&
+- (dev->flags&IFF_LOOPBACK) == 0;
++ (dev->flags & IFF_LOOPBACK) == 0 &&
++ (dev->type != ARPHRD_TUNNEL);
+ read_unlock_bh(&ifp->idev->lock);
+
+ /* While dad is in progress mld report's source address is in6_addrany.
+@@ -4980,6 +5025,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ goto error;
+
++ spin_lock_bh(&ifa->lock);
+ if (!((ifa->flags&IFA_F_PERMANENT) &&
+ (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
+ preferred = ifa->prefered_lft;
+@@ -5001,6 +5047,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+ preferred = INFINITY_LIFE_TIME;
+ valid = INFINITY_LIFE_TIME;
+ }
++ spin_unlock_bh(&ifa->lock);
+
+ if (!ipv6_addr_any(&ifa->peer_addr)) {
+ if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
+@@ -5129,9 +5176,9 @@ next:
+ fillargs->event = RTM_GETMULTICAST;
+
+ /* multicast address */
+- for (ifmca = rcu_dereference(idev->mc_list);
++ for (ifmca = rtnl_dereference(idev->mc_list);
+ ifmca;
+- ifmca = rcu_dereference(ifmca->next), ip_idx++) {
++ ifmca = rtnl_dereference(ifmca->next), ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
+@@ -5515,7 +5562,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
+ array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
+ #endif
+ #ifdef CONFIG_IPV6_MROUTE
+- array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
++ array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
+ #endif
+ array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
+ array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
+@@ -6348,7 +6395,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
+
+ if (idev->cnf.addr_gen_mode != new_val) {
+ idev->cnf.addr_gen_mode = new_val;
+- addrconf_dev_config(idev->dev);
++ addrconf_init_auto_addrs(idev->dev);
+ }
+ } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
+ struct net_device *dev;
+@@ -6359,7 +6406,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
+ if (idev &&
+ idev->cnf.addr_gen_mode != new_val) {
+ idev->cnf.addr_gen_mode = new_val;
+- addrconf_dev_config(idev->dev);
++ addrconf_init_auto_addrs(idev->dev);
+ }
+ }
+ }
+@@ -7091,9 +7138,8 @@ static int __net_init addrconf_init_net(struct net *net)
+ if (!dflt)
+ goto err_alloc_dflt;
+
+- if (IS_ENABLED(CONFIG_SYSCTL) &&
+- !net_eq(net, &init_net)) {
+- switch (sysctl_devconf_inherit_init_net) {
++ if (!net_eq(net, &init_net)) {
++ switch (net_inherit_devconf()) {
+ case 1: /* copy from init_net */
+ memcpy(all, init_net.ipv6.devconf_all,
+ sizeof(ipv6_devconf));
+diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
+index 8a22486cf2702..17ac45aa7194c 100644
+--- a/net/ipv6/addrlabel.c
++++ b/net/ipv6/addrlabel.c
+@@ -437,6 +437,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
+ {
+ struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
+ ifal->ifal_family = AF_INET6;
++ ifal->__ifal_reserved = 0;
+ ifal->ifal_prefixlen = prefixlen;
+ ifal->ifal_flags = 0;
+ ifal->ifal_index = ifindex;
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index b5878bb8e419d..164b130203f1e 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -108,6 +108,13 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+ }
+
++void inet6_sock_destruct(struct sock *sk)
++{
++ inet6_cleanup_sock(sk);
++ inet_sock_destruct(sk);
++}
++EXPORT_SYMBOL_GPL(inet6_sock_destruct);
++
+ static int inet6_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+ {
+@@ -200,7 +207,7 @@ lookup_protocol:
+ inet->hdrincl = 1;
+ }
+
+- sk->sk_destruct = inet_sock_destruct;
++ sk->sk_destruct = inet6_sock_destruct;
+ sk->sk_family = PF_INET6;
+ sk->sk_protocol = protocol;
+
+@@ -226,7 +233,7 @@ lookup_protocol:
+ RCU_INIT_POINTER(inet->mc_list, NULL);
+ inet->rcv_tos = 0;
+
+- if (net->ipv4.sysctl_ip_no_pmtu_disc)
++ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
+ inet->pmtudisc = IP_PMTUDISC_DONT;
+ else
+ inet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -507,6 +514,12 @@ void inet6_destroy_sock(struct sock *sk)
+ }
+ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
+
++void inet6_cleanup_sock(struct sock *sk)
++{
++ inet6_destroy_sock(sk);
++}
++EXPORT_SYMBOL_GPL(inet6_cleanup_sock);
++
+ /*
+ * This does both peername and sockname.
+ */
+@@ -521,31 +534,32 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_scope_id = 0;
++ lock_sock(sk);
+ if (peer) {
+- if (!inet->inet_dport)
+- return -ENOTCONN;
+- if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+- peer == 1)
++ if (!inet->inet_dport ||
++ (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
++ peer == 1)) {
++ release_sock(sk);
+ return -ENOTCONN;
++ }
+ sin->sin6_port = inet->inet_dport;
+ sin->sin6_addr = sk->sk_v6_daddr;
+ if (np->sndflow)
+ sin->sin6_flowinfo = np->flow_label;
+- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+- CGROUP_INET6_GETPEERNAME,
+- NULL);
++ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
++ CGROUP_INET6_GETPEERNAME);
+ } else {
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ sin->sin6_addr = np->saddr;
+ else
+ sin->sin6_addr = sk->sk_v6_rcv_saddr;
+ sin->sin6_port = inet->inet_sport;
+- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+- CGROUP_INET6_GETSOCKNAME,
+- NULL);
++ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
++ CGROUP_INET6_GETSOCKNAME);
+ }
+ sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
+ sk->sk_bound_dev_if);
++ release_sock(sk);
+ return sizeof(*sin);
+ }
+ EXPORT_SYMBOL(inet6_getname);
+@@ -1025,6 +1039,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
+ .ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
+ .fib6_nh_init = fib6_nh_init,
+ .fib6_nh_release = fib6_nh_release,
++ .fib6_nh_release_dsts = fib6_nh_release_dsts,
+ .fib6_update_sernum = fib6_update_sernum_stub,
+ .fib6_rt_update = fib6_rt_update,
+ .ip6_del_rt = ip6_del_rt,
+diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
+index 206f66310a88d..a30ff5d6808aa 100644
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_dport = inet->inet_dport;
+ fl6->fl6_sport = inet->inet_sport;
+- fl6->flowlabel = np->flow_label;
++ fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
+ fl6->flowi6_uid = sk->sk_uid;
+
+ if (!fl6->flowi6_oif)
+@@ -256,7 +256,7 @@ ipv4_connected:
+ goto out;
+ }
+
+- reuseport_has_conns(sk, true);
++ reuseport_has_conns_set(sk);
+ sk->sk_state = TCP_ESTABLISHED;
+ sk_set_txhash(sk);
+ out:
+diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
+index ed2f061b87685..6219d97cac7a3 100644
+--- a/net/ipv6/esp6.c
++++ b/net/ipv6/esp6.c
+@@ -491,6 +491,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
+ return err;
+ }
+
++ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
++ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
++ goto cow;
++
+ if (!skb_cloned(skb)) {
+ if (tailen <= skb_tailroom(skb)) {
+ nfrags = 1;
+@@ -708,7 +712,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
++ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ esp.tfclen = padto - skb->len;
+ }
+@@ -808,6 +812,11 @@ int esp6_input_done2(struct sk_buff *skb, int err)
+ struct tcphdr *th;
+
+ offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
++ if (offset == -1) {
++ err = -EINVAL;
++ goto out;
++ }
++
+ uh = (void *)(skb->data + offset);
+ th = (void *)(skb->data + offset);
+ hdr_len += offset;
+diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
+index a349d47980776..6b30f34c79783 100644
+--- a/net/ipv6/esp6_offload.c
++++ b/net/ipv6/esp6_offload.c
+@@ -198,6 +198,9 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x,
+ ipv6_skip_exthdr(skb, 0, &proto, &frag);
+ }
+
++ if (proto == IPPROTO_IPIP)
++ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6;
++
+ __skb_pull(skb, skb_transport_offset(skb));
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+@@ -340,6 +343,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
+ xo->seq.low += skb_shinfo(skb)->gso_segs;
+ }
+
++ if (xo->seq.low < seq)
++ xo->seq.hi++;
++
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+ len = skb->len - sizeof(struct ipv6hdr);
+@@ -366,6 +372,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
+
+ secpath_reset(skb);
+
++ if (skb_needs_linearize(skb, skb->dev->features) &&
++ __skb_linearize(skb))
++ return -ENOMEM;
+ return 0;
+ }
+
+diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
+index 3a871a09f9625..d273f6fe19c20 100644
+--- a/net/ipv6/exthdrs.c
++++ b/net/ipv6/exthdrs.c
+@@ -564,24 +564,6 @@ looped_back:
+ return -1;
+ }
+
+- if (skb_cloned(skb)) {
+- if (pskb_expand_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE, 0,
+- GFP_ATOMIC)) {
+- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+- IPSTATS_MIB_OUTDISCARDS);
+- kfree_skb(skb);
+- return -1;
+- }
+- } else {
+- err = skb_cow_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE);
+- if (unlikely(err)) {
+- kfree_skb(skb);
+- return -1;
+- }
+- }
+-
+- hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb);
+-
+ if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri,
+ hdr->cmpre))) {
+ kfree_skb(skb);
+@@ -627,6 +609,17 @@ looped_back:
+ skb_pull(skb, ((hdr->hdrlen + 1) << 3));
+ skb_postpull_rcsum(skb, oldhdr,
+ sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3));
++ if (unlikely(!hdr->segments_left)) {
++ if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0,
++ GFP_ATOMIC)) {
++ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS);
++ kfree_skb(skb);
++ kfree(buf);
++ return -1;
++ }
++
++ oldhdr = ipv6_hdr(skb);
++ }
+ skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
+index da46c42846765..49e31e4ae7b7f 100644
+--- a/net/ipv6/exthdrs_core.c
++++ b/net/ipv6/exthdrs_core.c
+@@ -143,6 +143,8 @@ int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type)
+ optlen = 1;
+ break;
+ default:
++ if (len < 2)
++ goto bad;
+ optlen = nh[offset + 1] + 2;
+ if (optlen > len)
+ goto bad;
+diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
+index 40f3e4f9f33a2..dcedfe29d9d93 100644
+--- a/net/ipv6/fib6_rules.c
++++ b/net/ipv6/fib6_rules.c
+@@ -267,6 +267,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule,
+ }
+
+ INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule,
++ int flags,
+ struct fib_lookup_arg *arg)
+ {
+ struct fib6_result *res = arg->result;
+@@ -294,8 +295,7 @@ INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule,
+ return false;
+
+ suppress_route:
+- if (!(arg->flags & FIB_LOOKUP_NOREF))
+- ip6_rt_put(rt);
++ ip6_rt_put_flags(rt, flags);
+ return true;
+ }
+
+diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
+index a7c31ab67c5d6..71a69166a6bd2 100644
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -57,6 +57,7 @@
+ #include <net/protocol.h>
+ #include <net/raw.h>
+ #include <net/rawv6.h>
++#include <net/seg6.h>
+ #include <net/transp_v6.h>
+ #include <net/ip6_route.h>
+ #include <net/addrconf.h>
+@@ -429,7 +430,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb)
+ if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
+ const struct rt6_info *rt6 = skb_rt6_info(skb);
+
+- if (rt6)
++ /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
++ * and ip6_null_entry could be set to skb if no route is found.
++ */
++ if (rt6 && rt6->rt6i_idev)
+ dev = rt6->rt6i_idev->dev;
+ }
+
+@@ -820,6 +824,7 @@ out_bh_enable:
+
+ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+ {
++ struct inet6_skb_parm *opt = IP6CB(skb);
+ const struct inet6_protocol *ipprot;
+ int inner_offset;
+ __be16 frag_off;
+@@ -829,6 +834,8 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto out;
+
++ seg6_icmp_srh(skb, opt);
++
+ nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
+ if (ipv6_ext_hdr(nexthdr)) {
+ /* now skip over extension headers */
+@@ -853,7 +860,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+
+ ipprot = rcu_dereference(inet6_protos[nexthdr]);
+ if (ipprot && ipprot->err_handler)
+- ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
++ ipprot->err_handler(skb, opt, type, code, inner_offset, info);
+
+ raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
+ return;
+@@ -923,7 +930,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
+ break;
+ case ICMPV6_EXT_ECHO_REQUEST:
+ if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
+- net->ipv4.sysctl_icmp_echo_enable_probe)
++ READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
+ icmpv6_echo_reply(skb);
+ break;
+
+diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
+index a1ac0e3d8c60c..163668531a57f 100644
+--- a/net/ipv6/ila/ila_xlat.c
++++ b/net/ipv6/ila/ila_xlat.c
+@@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
+
+ rcu_read_lock();
+
++ ret = -ESRCH;
+ ila = ila_lookup_by_params(&xp, ilan);
+ if (ila) {
+ ret = ila_dump_info(ila,
+diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
+index 67c9114835c84..b4a5e01e12016 100644
+--- a/net/ipv6/inet6_hashtables.c
++++ b/net/ipv6/inet6_hashtables.c
+@@ -71,12 +71,12 @@ begin:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ if (sk->sk_hash != hash)
+ continue;
+- if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
++ if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
+ continue;
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
+ goto out;
+
+- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
++ if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) {
+ sock_gen_put(sk);
+ goto begin;
+ }
+@@ -269,7 +269,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+ if (sk2->sk_hash != hash)
+ continue;
+
+- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
++ if (likely(inet6_match(net, sk2, saddr, daddr, ports,
+ dif, sdif))) {
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ tw = inet_twsk(sk2);
+@@ -308,7 +308,7 @@ not_unique:
+ return -EADDRNOTAVAIL;
+ }
+
+-static u32 inet6_sk_port_offset(const struct sock *sk)
++static u64 inet6_sk_port_offset(const struct sock *sk)
+ {
+ const struct inet_sock *inet = inet_sk(sk);
+
+@@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk)
+ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+ {
+- u32 port_offset = 0;
++ u64 port_offset = 0;
+
+ if (!inet_sk(sk)->inet_num)
+ port_offset = inet6_sk_port_offset(sk);
+@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
+ {
+ int err = 0;
+
+- if (sk->sk_state != TCP_CLOSE) {
+- local_bh_disable();
++ if (sk->sk_state != TCP_CLOSE)
+ err = __inet_hash(sk, NULL);
+- local_bh_enable();
+- }
+
+ return err;
+ }
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index 0371d2c141455..a506e57c4032a 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -111,7 +111,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
+ fn = rcu_dereference_protected(f6i->fib6_node,
+ lockdep_is_held(&f6i->fib6_table->tb6_lock));
+ if (fn)
+- fn->fn_sernum = fib6_new_sernum(net);
++ WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
+ }
+
+ /*
+@@ -589,12 +589,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+ spin_unlock_bh(&table->tb6_lock);
+ if (res > 0) {
+ cb->args[4] = 1;
+- cb->args[5] = w->root->fn_sernum;
++ cb->args[5] = READ_ONCE(w->root->fn_sernum);
+ }
+ } else {
+- if (cb->args[5] != w->root->fn_sernum) {
++ int sernum = READ_ONCE(w->root->fn_sernum);
++ if (cb->args[5] != sernum) {
+ /* Begin at the root if the tree changed */
+- cb->args[5] = w->root->fn_sernum;
++ cb->args[5] = sernum;
+ w->state = FWS_INIT;
+ w->node = w->root;
+ w->skip = w->count;
+@@ -1344,7 +1345,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
+ /* paired with smp_rmb() in fib6_get_cookie_safe() */
+ smp_wmb();
+ while (fn) {
+- fn->fn_sernum = sernum;
++ WRITE_ONCE(fn->fn_sernum, sernum);
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ }
+@@ -2173,8 +2174,8 @@ static int fib6_clean_node(struct fib6_walker *w)
+ };
+
+ if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
+- w->node->fn_sernum != c->sernum)
+- w->node->fn_sernum = c->sernum;
++ READ_ONCE(w->node->fn_sernum) != c->sernum)
++ WRITE_ONCE(w->node->fn_sernum, c->sernum);
+
+ if (!c->func) {
+ WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
+@@ -2542,7 +2543,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ iter->w.args = iter;
+- iter->sernum = iter->w.root->fn_sernum;
++ iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
+ INIT_LIST_HEAD(&iter->w.lh);
+ fib6_walker_link(net, &iter->w);
+ }
+@@ -2570,8 +2571,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
+
+ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
+ {
+- if (iter->sernum != iter->w.root->fn_sernum) {
+- iter->sernum = iter->w.root->fn_sernum;
++ int sernum = READ_ONCE(iter->w.root->fn_sernum);
++
++ if (iter->sernum != sernum) {
++ iter->sernum = sernum;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ WARN_ON(iter->w.skip);
+diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
+index aa673a6a7e432..ceb85c67ce395 100644
+--- a/net/ipv6/ip6_flowlabel.c
++++ b/net/ipv6/ip6_flowlabel.c
+@@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
+ err = -EINVAL;
+ goto done;
+ }
+- if (fl_shared_exclusive(fl) || fl->opt)
++ if (fl_shared_exclusive(fl) || fl->opt) {
++ WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
+ static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
++ }
+ return fl;
+
+ done:
+diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
+index 3ad201d372d88..0efd5b4346b09 100644
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
+ {
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ __be16 protocol;
++ __be16 flags;
+
+ if (dev->type == ARPHRD_ETHER)
+ IPCB(skb)->flags = 0;
+@@ -733,16 +734,13 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
+ else
+ fl6->daddr = tunnel->parms.raddr;
+
+- if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+- return -ENOMEM;
+-
+ /* Push GRE header. */
+ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
+
+ if (tunnel->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+- __be16 flags;
++ int tun_hlen;
+
+ tun_info = skb_tunnel_info_txcheck(skb);
+ if (IS_ERR(tun_info) ||
+@@ -755,25 +753,32 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
+ fl6->daddr = key->u.ipv6.dst;
+ fl6->flowlabel = key->label;
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
++ fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id);
+
+ dsfield = key->tos;
+ flags = key->tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+- tunnel->tun_hlen = gre_calc_hlen(flags);
++ tun_hlen = gre_calc_hlen(flags);
++
++ if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
++ return -ENOMEM;
+
+- gre_build_header(skb, tunnel->tun_hlen,
++ gre_build_header(skb, tun_hlen,
+ flags, protocol,
+ tunnel_id_to_key32(tun_info->key.tun_id),
+- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
++ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
+
+ } else {
+- if (tunnel->parms.o_flags & TUNNEL_SEQ)
+- tunnel->o_seqno++;
++ if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
++ return -ENOMEM;
+
+- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
++ flags = tunnel->parms.o_flags;
++
++ gre_build_header(skb, tunnel->tun_hlen, flags,
+ protocol, tunnel->parms.o_key,
+- htonl(tunnel->o_seqno));
++ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
++ : 0);
+ }
+
+ return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
+@@ -939,7 +944,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ __be16 proto;
+ __u32 mtu;
+ int nhoff;
+- int thoff;
+
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+@@ -951,19 +955,26 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ goto tx_err;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+- pskb_trim(skb, dev->mtu + dev->hard_header_len);
++ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
++ goto tx_err;
+ truncate = true;
+ }
+
+- nhoff = skb_network_header(skb) - skb_mac_header(skb);
++ nhoff = skb_network_offset(skb);
+ if (skb->protocol == htons(ETH_P_IP) &&
+ (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+ truncate = true;
+
+- thoff = skb_transport_header(skb) - skb_mac_header(skb);
+- if (skb->protocol == htons(ETH_P_IPV6) &&
+- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
+- truncate = true;
++ if (skb->protocol == htons(ETH_P_IPV6)) {
++ int thoff;
++
++ if (skb_transport_header_was_set(skb))
++ thoff = skb_transport_offset(skb);
++ else
++ thoff = nhoff + sizeof(struct ipv6hdr);
++ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
++ truncate = true;
++ }
+
+ if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
+ goto tx_err;
+@@ -990,6 +1001,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
++ fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id);
+
+ dsfield = key->tos;
+ if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+@@ -1004,12 +1016,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ ntohl(tun_id),
+ ntohl(md->u.index), truncate,
+ false);
++ proto = htons(ETH_P_ERSPAN);
+ } else if (md->version == 2) {
+ erspan_build_header_v2(skb,
+ ntohl(tun_id),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, false);
++ proto = htons(ETH_P_ERSPAN2);
+ } else {
+ goto tx_err;
+ }
+@@ -1032,25 +1046,26 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ break;
+ }
+
+- if (t->parms.erspan_ver == 1)
++ if (t->parms.erspan_ver == 1) {
+ erspan_build_header(skb, ntohl(t->parms.o_key),
+ t->parms.index,
+ truncate, false);
+- else if (t->parms.erspan_ver == 2)
++ proto = htons(ETH_P_ERSPAN);
++ } else if (t->parms.erspan_ver == 2) {
+ erspan_build_header_v2(skb, ntohl(t->parms.o_key),
+ t->parms.dir,
+ t->parms.hwid,
+ truncate, false);
+- else
++ proto = htons(ETH_P_ERSPAN2);
++ } else {
+ goto tx_err;
++ }
+
+ fl6.daddr = t->parms.raddr;
+ }
+
+ /* Push GRE header. */
+- proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
+- : htons(ETH_P_ERSPAN2);
+- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
++ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+@@ -1098,6 +1113,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
+ fl6->flowi6_oif = p->link;
+ fl6->flowlabel = 0;
+ fl6->flowi6_proto = IPPROTO_GRE;
++ fl6->fl6_gre_key = t->parms.o_key;
+
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+ fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+@@ -1141,14 +1157,16 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
+ dev->needed_headroom = dst_len;
+
+ if (set_mtu) {
+- dev->mtu = rt->dst.dev->mtu - t_hlen;
++ int mtu = rt->dst.dev->mtu - t_hlen;
++
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+- dev->mtu -= 8;
++ mtu -= 8;
+ if (dev->type == ARPHRD_ETHER)
+- dev->mtu -= ETH_HLEN;
++ mtu -= ETH_HLEN;
+
+- if (dev->mtu < IPV6_MIN_MTU)
+- dev->mtu = IPV6_MIN_MTU;
++ if (mtu < IPV6_MIN_MTU)
++ mtu = IPV6_MIN_MTU;
++ WRITE_ONCE(dev->mtu, mtu);
+ }
+ }
+ ip6_rt_put(rt);
+@@ -1544,7 +1562,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
+ static struct inet6_protocol ip6gre_protocol __read_mostly = {
+ .handler = gre_rcv,
+ .err_handler = ip6gre_err,
+- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
++ .flags = INET6_PROTO_FINAL,
+ };
+
+ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
+diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
+index 80256717868e6..32071529bfd98 100644
+--- a/net/ipv6/ip6_input.c
++++ b/net/ipv6/ip6_input.c
+@@ -45,20 +45,23 @@
+ #include <net/inet_ecn.h>
+ #include <net/dst_metadata.h>
+
+-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
+ static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+ {
+- void (*edemux)(struct sk_buff *skb);
+-
+- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
+- const struct inet6_protocol *ipprot;
+-
+- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
+- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
+- INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
+- udp_v6_early_demux, skb);
++ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
++ !skb_dst(skb) && !skb->sk) {
++ switch (ipv6_hdr(skb)->nexthdr) {
++ case IPPROTO_TCP:
++ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
++ tcp_v6_early_demux(skb);
++ break;
++ case IPPROTO_UDP:
++ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
++ udp_v6_early_demux(skb);
++ break;
++ }
+ }
++
+ if (!skb_valid_dst(skb))
+ ip6_route_input(skb);
+ }
+@@ -508,7 +511,7 @@ int ip6_mc_input(struct sk_buff *skb)
+ /*
+ * IPv6 multicast router mode is now supported ;)
+ */
+- if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
++ if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
+ !(ipv6_addr_type(&hdr->daddr) &
+ (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
+ likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
+diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
+index 1b9827ff8ccf4..172565d125704 100644
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -114,6 +114,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+ if (likely(ops && ops->callbacks.gso_segment)) {
+ skb_reset_transport_header(skb);
+ segs = ops->callbacks.gso_segment(skb, features);
++ if (!segs)
++ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+ }
+
+ if (IS_ERR_OR_NULL(segs))
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 2f044a49afa8c..2207acd7108c1 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -112,7 +112,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
+ if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+ int res = lwtunnel_xmit(skb);
+
+- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
++ if (res != LWTUNNEL_XMIT_CONTINUE)
+ return res;
+ }
+
+@@ -174,7 +174,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
+ #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+ /* Policy lookup after SNAT yielded a new policy */
+ if (skb_dst(skb)->xfrm) {
+- IPCB(skb)->flags |= IPSKB_REROUTED;
++ IP6CB(skb)->flags |= IP6SKB_REROUTED;
+ return dst_output(net, sk, skb);
+ }
+ #endif
+@@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb)
+ goto drop;
+
+ if (!net->ipv6.devconf_all->disable_policy &&
+- !idev->cnf.disable_policy &&
++ (!idev || !idev->cnf.disable_policy) &&
+ !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+ goto drop;
+@@ -527,7 +527,20 @@ int ip6_forward(struct sk_buff *skb)
+ pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
+ int proxied = ip6_forward_proxy_check(skb);
+ if (proxied > 0) {
+- hdr->hop_limit--;
++ /* It's tempting to decrease the hop limit
++ * here by 1, as we do at the end of the
++ * function too.
++ *
++ * But that would be incorrect, as proxying is
++ * not forwarding. The ip6_input function
++ * will handle this packet locally, and it
++ * depends on the hop limit being unchanged.
++ *
++ * One example is the NDP hop limit, that
++ * always has to stay 255, but other would be
++ * similar checks around RA packets, where the
++ * user can even change the desired limit.
++ */
+ return ip6_input(skb);
+ } else if (proxied < 0) {
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+@@ -897,6 +910,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ if (err < 0)
+ goto fail;
+
++ /* We prevent @rt from being freed. */
++ rcu_read_lock();
++
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+@@ -920,6 +936,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ if (err == 0) {
+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+ IPSTATS_MIB_FRAGOKS);
++ rcu_read_unlock();
+ return 0;
+ }
+
+@@ -927,6 +944,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+
+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+ IPSTATS_MIB_FRAGFAILS);
++ rcu_read_unlock();
+ return err;
+
+ slow_path_clean:
+@@ -1289,8 +1307,7 @@ struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
+ fl6.daddr = info->key.u.ipv6.dst;
+ fl6.saddr = info->key.u.ipv6.src;
+ prio = info->key.tos;
+- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
+- info->key.label);
++ fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
+ NULL);
+@@ -1408,8 +1425,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
+ if (np->frag_size)
+ mtu = np->frag_size;
+ }
+- if (mtu < IPV6_MIN_MTU)
+- return -EINVAL;
+ cork->base.fragsize = mtu;
+ cork->base.gso_size = ipc6->gso_size;
+ cork->base.tx_flags = 0;
+@@ -1465,14 +1480,12 @@ static int __ip6_append_data(struct sock *sk,
+
+ if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+- tskey = sk->sk_tskey++;
++ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
+ hh_len = LL_RESERVED_SPACE(rt->dst.dev);
+
+ fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
+ (opt ? opt->opt_nflen : 0);
+- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+- sizeof(struct frag_hdr);
+
+ headersize = sizeof(struct ipv6hdr) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
+@@ -1480,6 +1493,13 @@ static int __ip6_append_data(struct sock *sk,
+ sizeof(struct frag_hdr) : 0) +
+ rt->rt6i_nfheader_len;
+
++ if (mtu <= fragheaderlen ||
++ ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
++ goto emsgsize;
++
++ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
++ sizeof(struct frag_hdr);
++
+ /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
+ * the first fragment
+ */
+@@ -1893,8 +1913,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
+ IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ if (proto == IPPROTO_ICMPV6) {
+ struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
++ u8 icmp6_type;
+
+- ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
++ if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
++ icmp6_type = fl6->fl6_icmp_type;
++ else
++ icmp6_type = icmp6_hdr(skb)->icmp6_type;
++ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ }
+
+diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+index 20a67efda47f5..bc5d3188454d0 100644
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
+
+ if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE)))
+- pr_warn("%s xmit: Local address not yet configured!\n",
+- p->name);
++ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
++ p->name);
+ else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+ !ipv6_addr_is_multicast(raddr) &&
+ unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+ true, 0, IFA_F_TENTATIVE)))
+- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
+- p->name);
++ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
++ p->name);
+ else
+ ret = 1;
+ rcu_read_unlock();
+@@ -1237,8 +1237,8 @@ route_lookup:
+ */
+ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ + dst->header_len + t->hlen;
+- if (max_headroom > dev->needed_headroom)
+- dev->needed_headroom = max_headroom;
++ if (max_headroom > READ_ONCE(dev->needed_headroom))
++ WRITE_ONCE(dev->needed_headroom, max_headroom);
+
+ err = ip6_tnl_encap(skb, t, &proto, fl6);
+ if (err)
+@@ -1446,8 +1446,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
+ struct net_device *tdev = NULL;
+ struct __ip6_tnl_parm *p = &t->parms;
+ struct flowi6 *fl6 = &t->fl.u.ip6;
+- unsigned int mtu;
+ int t_hlen;
++ int mtu;
+
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+@@ -1494,12 +1494,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
+ dev->hard_header_len = tdev->hard_header_len + t_hlen;
+ mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
+
+- dev->mtu = mtu - t_hlen;
++ mtu = mtu - t_hlen;
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+- dev->mtu -= 8;
++ mtu -= 8;
+
+- if (dev->mtu < IPV6_MIN_MTU)
+- dev->mtu = IPV6_MIN_MTU;
++ if (mtu < IPV6_MIN_MTU)
++ mtu = IPV6_MIN_MTU;
++ WRITE_ONCE(dev->mtu, mtu);
+ }
+ }
+ }
+diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
+index 1d8e3ffa225d8..190aa3b19591c 100644
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -570,12 +570,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ vti6_addr_conflict(t, ipv6_hdr(skb)))
+ goto tx_err;
+
+- xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
++ xfrm_decode_session(skb, &fl, AF_INET6);
+ break;
+ case htons(ETH_P_IP):
+- xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
++ xfrm_decode_session(skb, &fl, AF_INET);
+ break;
+ default:
+ goto tx_err;
+@@ -808,6 +808,8 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
++ memset(&p1, 0, sizeof(p1));
++
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ if (dev == ip6n->fb_tnl_dev) {
+diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
+index 36ed9efb88254..ee094645c7cea 100644
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -248,7 +248,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
+ return 0;
+
+ err2:
++ rtnl_lock();
+ ip6mr_free_table(mrt);
++ rtnl_unlock();
+ err1:
+ fib_rules_unregister(ops);
+ return err;
+@@ -737,7 +739,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
+
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev) {
+- in6_dev->cnf.mc_forwarding--;
++ atomic_dec(&in6_dev->cnf.mc_forwarding);
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in6_dev->cnf);
+@@ -905,7 +907,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
+
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev) {
+- in6_dev->cnf.mc_forwarding++;
++ atomic_inc(&in6_dev->cnf.mc_forwarding);
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in6_dev->cnf);
+@@ -1066,7 +1068,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
+ And all this only to mangle msg->im6_msgtype and
+ to set msg->im6_mbz to "mbz" :-)
+ */
+- skb_push(skb, -skb_network_offset(pkt));
++ __skb_pull(skb, skb_network_offset(pkt));
+
+ skb_push(skb, sizeof(*msg));
+ skb_reset_transport_header(skb);
+@@ -1555,7 +1557,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+- net->ipv6.devconf_all->mc_forwarding++;
++ atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
+ }
+ write_unlock_bh(&mrt_lock);
+
+@@ -1588,7 +1590,7 @@ int ip6mr_sk_done(struct sock *sk)
+ * so the RCU grace period before sk freeing
+ * is guaranteed by sk_destruct()
+ */
+- net->ipv6.devconf_all->mc_forwarding--;
++ atomic_dec(&net->ipv6.devconf_all->mc_forwarding);
+ write_unlock_bh(&mrt_lock);
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
+diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
+index e4bdb09c55867..197e12d5607f1 100644
+--- a/net/ipv6/ipv6_sockglue.c
++++ b/net/ipv6/ipv6_sockglue.c
+@@ -208,7 +208,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ return -EINVAL;
+- if (optlen > sysctl_optmem_max)
++ if (optlen > READ_ONCE(sysctl_optmem_max))
+ return -ENOBUFS;
+
+ gsf = memdup_sockptr(optval, optlen);
+@@ -242,7 +242,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+
+ if (optlen < size0)
+ return -EINVAL;
+- if (optlen > sysctl_optmem_max - 4)
++ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ return -ENOBUFS;
+
+ p = kmalloc(optlen + 4, GFP_KERNEL);
+@@ -417,15 +417,18 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+ rtnl_lock();
+ lock_sock(sk);
+
++ /* Another thread has converted the socket into IPv4 with
++ * IPV6_ADDRFORM concurrently.
++ */
++ if (unlikely(sk->sk_family != AF_INET6))
++ goto unlock;
++
+ switch (optname) {
+
+ case IPV6_ADDRFORM:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val == PF_INET) {
+- struct ipv6_txoptions *opt;
+- struct sk_buff *pktopt;
+-
+ if (sk->sk_type == SOCK_RAW)
+ break;
+
+@@ -456,7 +459,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+ break;
+ }
+
+- fl6_free_socklist(sk);
+ __ipv6_sock_mc_close(sk);
+ __ipv6_sock_ac_close(sk);
+
+@@ -491,14 +493,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+ sk->sk_socket->ops = &inet_dgram_ops;
+ sk->sk_family = PF_INET;
+ }
+- opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+- NULL);
+- if (opt) {
+- atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+- txopt_put(opt);
+- }
+- pktopt = xchg(&np->pktoptions, NULL);
+- kfree_skb(pktopt);
++
++ /* Disable all options not to allocate memory anymore,
++ * but there is still a race. See the lockless path
++ * in udpv6_sendmsg() and ipv6_local_rxpmtu().
++ */
++ np->rxopt.all = 0;
++
++ inet6_cleanup_sock(sk);
+
+ /*
+ * ... and add it to the refcnt debug socks count
+@@ -976,6 +978,7 @@ done:
+ break;
+ }
+
++unlock:
+ release_sock(sk);
+ if (needs_rtnl)
+ rtnl_unlock();
+diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
+index bed8155508c85..87c699d57b366 100644
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
+ newpsl->sl_addr[i] = psl->sl_addr[i];
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
+- kfree_rcu(psl, rcu);
+ }
++ rcu_assign_pointer(pmc->sflist, newpsl);
++ kfree_rcu(psl, rcu);
+ psl = newpsl;
+- rcu_assign_pointer(pmc->sflist, psl);
+ }
+ rv = 1; /* > 0 for insert logic below if sl_count is 0 */
+ for (i = 0; i < psl->sl_count; i++) {
+@@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
+ psl->sl_count, psl->sl_addr, 0);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
+- kfree_rcu(psl, rcu);
+ } else {
+ ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+ }
+- mutex_unlock(&idev->mc_lock);
+ rcu_assign_pointer(pmc->sflist, newpsl);
++ mutex_unlock(&idev->mc_lock);
++ kfree_rcu(psl, rcu);
+ pmc->sfmode = gsf->gf_fmode;
+ err = 0;
+ done:
+@@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+ }
+
+ /* called with rcu_read_lock() */
+-int igmp6_event_query(struct sk_buff *skb)
++void igmp6_event_query(struct sk_buff *skb)
+ {
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+- if (!idev)
+- return -EINVAL;
+-
+- if (idev->dead) {
+- kfree_skb(skb);
+- return -ENODEV;
+- }
++ if (!idev || idev->dead)
++ goto out;
+
+ spin_lock_bh(&idev->mc_query_lock);
+ if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) {
+ __skb_queue_tail(&idev->mc_query_queue, skb);
+ if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0))
+ in6_dev_hold(idev);
++ skb = NULL;
+ }
+ spin_unlock_bh(&idev->mc_query_lock);
+-
+- return 0;
++out:
++ kfree_skb(skb);
+ }
+
+ static void __mld_query_work(struct sk_buff *skb)
+@@ -1526,7 +1522,6 @@ static void mld_query_work(struct work_struct *work)
+
+ if (++cnt >= MLD_MAX_QUEUE) {
+ rework = true;
+- schedule_delayed_work(&idev->mc_query_work, 0);
+ break;
+ }
+ }
+@@ -1537,32 +1532,30 @@ static void mld_query_work(struct work_struct *work)
+ __mld_query_work(skb);
+ mutex_unlock(&idev->mc_lock);
+
+- if (!rework)
+- in6_dev_put(idev);
++ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
++ return;
++
++ in6_dev_put(idev);
+ }
+
+ /* called with rcu_read_lock() */
+-int igmp6_event_report(struct sk_buff *skb)
++void igmp6_event_report(struct sk_buff *skb)
+ {
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+- if (!idev)
+- return -EINVAL;
+-
+- if (idev->dead) {
+- kfree_skb(skb);
+- return -ENODEV;
+- }
++ if (!idev || idev->dead)
++ goto out;
+
+ spin_lock_bh(&idev->mc_report_lock);
+ if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) {
+ __skb_queue_tail(&idev->mc_report_queue, skb);
+ if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0))
+ in6_dev_hold(idev);
++ skb = NULL;
+ }
+ spin_unlock_bh(&idev->mc_report_lock);
+-
+- return 0;
++out:
++ kfree_skb(skb);
+ }
+
+ static void __mld_report_work(struct sk_buff *skb)
+@@ -1632,7 +1625,6 @@ static void mld_report_work(struct work_struct *work)
+
+ if (++cnt >= MLD_MAX_QUEUE) {
+ rework = true;
+- schedule_delayed_work(&idev->mc_report_work, 0);
+ break;
+ }
+ }
+@@ -1643,8 +1635,10 @@ static void mld_report_work(struct work_struct *work)
+ __mld_report_work(skb);
+ mutex_unlock(&idev->mc_lock);
+
+- if (!rework)
+- in6_dev_put(idev);
++ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
++ return;
++
++ in6_dev_put(idev);
+ }
+
+ static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+@@ -1759,7 +1753,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
+ skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
+
+- if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
++ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
+ /* <draft-ietf-magma-mld-source-05.txt>:
+ * use unspecified address as the source address
+ * when a valid link-local address is not available.
+diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
+index 4b098521a44cd..3ab903f7e0f8d 100644
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -196,7 +196,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
+ static inline int ndisc_is_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *opt)
+ {
+- return opt->nd_opt_type == ND_OPT_RDNSS ||
++ return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
++ opt->nd_opt_type == ND_OPT_RDNSS ||
+ opt->nd_opt_type == ND_OPT_DNSSL ||
+ opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
+ opt->nd_opt_type == ND_OPT_PREF64 ||
+@@ -1317,6 +1318,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
+ if (!rt && lifetime) {
+ ND_PRINTK(3, info, "RA: adding default router\n");
+
++ if (neigh)
++ neigh_release(neigh);
++
+ rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
+ skb->dev, pref, defrtr_usr_metric);
+ if (!rt) {
+diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
+index 6ab710b5a1a82..118e834e91902 100644
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -24,14 +24,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
+ {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sock *sk = sk_to_full_sk(sk_partial);
++ struct net_device *dev = skb_dst(skb)->dev;
+ struct flow_keys flkeys;
+ unsigned int hh_len;
+ struct dst_entry *dst;
+ int strict = (ipv6_addr_type(&iph->daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+ struct flowi6 fl6 = {
+- .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
+- strict ? skb_dst(skb)->dev->ifindex : 0,
+ .flowi6_mark = skb->mark,
+ .flowi6_uid = sock_net_uid(net, sk),
+ .daddr = iph->daddr,
+@@ -39,6 +38,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
+ };
+ int err;
+
++ if (sk && sk->sk_bound_dev_if)
++ fl6.flowi6_oif = sk->sk_bound_dev_if;
++ else if (strict)
++ fl6.flowi6_oif = dev->ifindex;
++ else
++ fl6.flowi6_oif = l3mdev_master_ifindex(dev);
++
+ fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
+ dst = ip6_route_output(net, sk, &fl6);
+ err = dst->error;
+diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
+index a579ea14a69b6..277a5ee887eb3 100644
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -1062,7 +1062,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ struct xt_counters *counters;
+ struct ip6t_entry *iter;
+
+- ret = 0;
+ counters = xt_counters_alloc(num_counters);
+ if (!counters) {
+ ret = -ENOMEM;
+@@ -1108,7 +1107,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+ }
+ vfree(counters);
+- return ret;
++ return 0;
+
+ put_module:
+ module_put(t->me);
+@@ -1751,6 +1750,10 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
++ struct ip6t_entry *iter;
++
++ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
++ cleanup_entry(iter, net);
+ xt_free_table_info(newinfo);
+ return PTR_ERR(new_table);
+ }
+diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
+index 6bac68fb27a39..3fe4f15e01dc8 100644
+--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
++++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
+@@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
+ lport ? lport : hp->dest,
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+ if (sk2) {
+- inet_twsk_deschedule_put(inet_twsk(sk));
++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
+ }
+ }
+diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
+index 92f3235fa2874..602743f6dcee0 100644
+--- a/net/ipv6/netfilter/nft_fib_ipv6.c
++++ b/net/ipv6/netfilter/nft_fib_ipv6.c
+@@ -37,6 +37,9 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
+ if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
++ } else if ((priv->flags & NFTA_FIB_F_IIF) &&
++ (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) {
++ fl6->flowi6_oif = dev->ifindex;
+ }
+
+ if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
+@@ -193,7 +196,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
+ goto put_rt_err;
+
+- if (oif && oif != rt->rt6i_idev->dev)
++ if (oif && oif != rt->rt6i_idev->dev &&
++ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
+ goto put_rt_err;
+
+ nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index 6ac88fe24a8e0..7fab29f3ce6e8 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -96,7 +96,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ addr_type = ipv6_addr_type(daddr);
+ if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
+ (addr_type & IPV6_ADDR_MAPPED) ||
+- (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
++ (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if &&
++ l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
+ return -EINVAL;
+
+ /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index 60f1e4f5be5aa..6ff25c3e9d5a4 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -539,6 +539,7 @@ csum_copy_err:
+ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct raw6_sock *rp)
+ {
++ struct ipv6_txoptions *opt;
+ struct sk_buff *skb;
+ int err = 0;
+ int offset;
+@@ -556,6 +557,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+
+ offset = rp->offset;
+ total_len = inet_sk(sk)->cork.base.length;
++ opt = inet6_sk(sk)->cork.opt;
++ total_len -= opt ? opt->opt_flen : 0;
++
+ if (offset >= total_len - 1) {
+ err = -EINVAL;
+ ip6_flush_pending_frames(sk);
+@@ -824,7 +828,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+
+ if (!proto)
+ proto = inet->inet_num;
+- else if (proto != inet->inet_num)
++ else if (proto != inet->inet_num &&
++ inet->inet_num != IPPROTO_RAW)
+ return -EINVAL;
+
+ if (proto > 255)
+@@ -1020,6 +1025,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
+ struct raw6_sock *rp = raw6_sk(sk);
+ int val;
+
++ if (optlen < sizeof(val))
++ return -EINVAL;
++
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+@@ -1204,8 +1212,6 @@ static void raw6_destroy(struct sock *sk)
+ lock_sock(sk);
+ ip6_flush_pending_frames(sk);
+ release_sock(sk);
+-
+- inet6_destroy_sock(sk);
+ }
+
+ static int rawv6_init_sk(struct sock *sk)
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 9b9ef09382ab9..7b26882b9e70e 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb)
+ if (from) {
+ fn = rcu_dereference(from->fib6_node);
+ if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+- fn->fn_sernum = -1;
++ WRITE_ONCE(fn->fn_sernum, -1);
+ }
+ }
+ rcu_read_unlock();
+@@ -3303,6 +3303,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
+ int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
+ int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
+ unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
++ unsigned int val;
+ int entries;
+
+ entries = dst_entries_get_fast(ops);
+@@ -3313,13 +3314,13 @@ static int ip6_dst_gc(struct dst_ops *ops)
+ entries <= rt_max_size)
+ goto out;
+
+- net->ipv6.ip6_rt_gc_expire++;
+- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
++ fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
+ entries = dst_entries_get_slow(ops);
+ if (entries < ops->gc_thresh)
+- net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
++ atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
+ out:
+- net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
++ val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
++ atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
+ return entries > rt_max_size;
+ }
+
+@@ -3680,6 +3681,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh)
+ fib_nh_common_release(&fib6_nh->nh_common);
+ }
+
++void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
++{
++ int cpu;
++
++ if (!fib6_nh->rt6i_pcpu)
++ return;
++
++ for_each_possible_cpu(cpu) {
++ struct rt6_info *pcpu_rt, **ppcpu_rt;
++
++ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
++ pcpu_rt = xchg(ppcpu_rt, NULL);
++ if (pcpu_rt) {
++ dst_dev_put(&pcpu_rt->dst);
++ dst_release(&pcpu_rt->dst);
++ }
++ }
++}
++
+ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
+ gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+@@ -4490,7 +4510,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
+ struct inet6_dev *idev;
+ int type;
+
+- if (netif_is_l3_master(skb->dev) &&
++ if (netif_is_l3_master(skb->dev) ||
+ dst->dev == net->loopback_dev)
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
+ else
+@@ -4570,8 +4590,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
+ }
+
+ f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
+- if (!IS_ERR(f6i))
++ if (!IS_ERR(f6i)) {
+ f6i->dst_nocount = true;
++
++ if (!anycast &&
++ (net->ipv6.devconf_all->disable_policy ||
++ idev->cnf.disable_policy))
++ f6i->dst_nopolicy = true;
++ }
++
+ return f6i;
+ }
+
+@@ -5205,6 +5232,19 @@ out:
+ return should_notify;
+ }
+
++static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
++ struct netlink_ext_ack *extack)
++{
++ if (nla_len(nla) < sizeof(*gw)) {
++ NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
++ return -EINVAL;
++ }
++
++ *gw = nla_get_in6_addr(nla);
++
++ return 0;
++}
++
+ static int ip6_route_multipath_add(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
+ {
+@@ -5245,10 +5285,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+- r_cfg.fc_gateway = nla_get_in6_addr(nla);
++ err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
++ extack);
++ if (err)
++ goto cleanup;
++
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
++
++ /* RTA_ENCAP_TYPE length checked in
++ * lwtunnel_valid_encap_type_attr
++ */
+ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+ if (nla)
+ r_cfg.fc_encap_type = nla_get_u16(nla);
+@@ -5415,7 +5463,13 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+- nla_memcpy(&r_cfg.fc_gateway, nla, 16);
++ err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
++ extack);
++ if (err) {
++ last_err = err;
++ goto next_rtnh;
++ }
++
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ }
+@@ -5423,6 +5477,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
+ if (err)
+ last_err = err;
+
++next_rtnh:
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+@@ -5500,16 +5555,17 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i)
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
+ &nexthop_len);
+ } else {
++ struct fib6_info *sibling, *next_sibling;
+ struct fib6_nh *nh = f6i->fib6_nh;
+
+ nexthop_len = 0;
+ if (f6i->fib6_nsiblings) {
+- nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+- + NLA_ALIGN(sizeof(struct rtnexthop))
+- + nla_total_size(16) /* RTA_GATEWAY */
+- + lwtunnel_get_encap_size(nh->fib_nh_lws);
++ rt6_nh_nlmsg_size(nh, &nexthop_len);
+
+- nexthop_len *= f6i->fib6_nsiblings;
++ list_for_each_entry_safe(sibling, next_sibling,
++ &f6i->fib6_siblings, fib6_siblings) {
++ rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
++ }
+ }
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+ }
+@@ -5701,7 +5757,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+ if (nexthop_is_blackhole(rt->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
+
+- if (net->ipv4.sysctl_nexthop_compat_mode &&
++ if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
+ rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
+ goto nla_put_failure;
+
+@@ -5720,11 +5776,11 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+ }
+
+ if (!dst) {
+- if (rt->offload)
++ if (READ_ONCE(rt->offload))
+ rtm->rtm_flags |= RTM_F_OFFLOAD;
+- if (rt->trap)
++ if (READ_ONCE(rt->trap))
+ rtm->rtm_flags |= RTM_F_TRAP;
+- if (rt->offload_failed)
++ if (READ_ONCE(rt->offload_failed))
+ rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
+ }
+
+@@ -6182,19 +6238,20 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
+ struct sk_buff *skb;
+ int err;
+
+- if (f6i->offload == offload && f6i->trap == trap &&
+- f6i->offload_failed == offload_failed)
++ if (READ_ONCE(f6i->offload) == offload &&
++ READ_ONCE(f6i->trap) == trap &&
++ READ_ONCE(f6i->offload_failed) == offload_failed)
+ return;
+
+- f6i->offload = offload;
+- f6i->trap = trap;
++ WRITE_ONCE(f6i->offload, offload);
++ WRITE_ONCE(f6i->trap, trap);
+
+ /* 2 means send notifications only if offload_failed was changed. */
+ if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
+- f6i->offload_failed == offload_failed)
++ READ_ONCE(f6i->offload_failed) == offload_failed)
+ return;
+
+- f6i->offload_failed = offload_failed;
++ WRITE_ONCE(f6i->offload_failed, offload_failed);
+
+ if (!rcu_access_pointer(f6i->fib6_node))
+ /* The route was removed from the tree, do not send
+@@ -6480,7 +6537,7 @@ static int __net_init ip6_route_net_init(struct net *net)
+ net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+ net->ipv6.sysctl.skip_notify_on_dev_down = 0;
+
+- net->ipv6.ip6_rt_gc_expire = 30*HZ;
++ atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
+
+ ret = 0;
+ out:
+@@ -6514,10 +6571,16 @@ static void __net_exit ip6_route_net_exit(struct net *net)
+ static int __net_init ip6_route_net_init_late(struct net *net)
+ {
+ #ifdef CONFIG_PROC_FS
+- proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
+- sizeof(struct ipv6_route_iter));
+- proc_create_net_single("rt6_stats", 0444, net->proc_net,
+- rt6_stats_seq_show, NULL);
++ if (!proc_create_net("ipv6_route", 0, net->proc_net,
++ &ipv6_route_seq_ops,
++ sizeof(struct ipv6_route_iter)))
++ return -ENOMEM;
++
++ if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
++ rt6_stats_seq_show, NULL)) {
++ remove_proc_entry("ipv6_route", net->proc_net);
++ return -ENOMEM;
++ }
+ #endif
+ return 0;
+ }
+diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c
+index 488aec9e1a74f..d1876f1922255 100644
+--- a/net/ipv6/rpl.c
++++ b/net/ipv6/rpl.c
+@@ -32,7 +32,8 @@ static void *ipv6_rpl_segdata_pos(const struct ipv6_rpl_sr_hdr *hdr, int i)
+ size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
+ unsigned char cmpre)
+ {
+- return (n * IPV6_PFXTAIL_LEN(cmpri)) + IPV6_PFXTAIL_LEN(cmpre);
++ return sizeof(struct ipv6_rpl_sr_hdr) + (n * IPV6_PFXTAIL_LEN(cmpri)) +
++ IPV6_PFXTAIL_LEN(cmpre);
+ }
+
+ void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr,
+diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
+index e412817fba2f3..0c7c6fc16c3c3 100644
+--- a/net/ipv6/seg6.c
++++ b/net/ipv6/seg6.c
+@@ -75,6 +75,65 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced)
+ return true;
+ }
+
++struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags)
++{
++ struct ipv6_sr_hdr *srh;
++ int len, srhoff = 0;
++
++ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0)
++ return NULL;
++
++ if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
++ return NULL;
++
++ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
++
++ len = (srh->hdrlen + 1) << 3;
++
++ if (!pskb_may_pull(skb, srhoff + len))
++ return NULL;
++
++ /* note that pskb_may_pull may change pointers in header;
++ * for this reason it is necessary to reload them when needed.
++ */
++ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
++
++ if (!seg6_validate_srh(srh, len, true))
++ return NULL;
++
++ return srh;
++}
++
++/* Determine if an ICMP invoking packet contains a segment routing
++ * header. If it does, extract the offset to the true destination
++ * address, which is in the first segment address.
++ */
++void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt)
++{
++ __u16 network_header = skb->network_header;
++ struct ipv6_sr_hdr *srh;
++
++ /* Update network header to point to the invoking packet
++ * inside the ICMP packet, so we can use the seg6_get_srh()
++ * helper.
++ */
++ skb_reset_network_header(skb);
++
++ srh = seg6_get_srh(skb, 0);
++ if (!srh)
++ goto out;
++
++ if (srh->type != IPV6_SRCRT_TYPE_4)
++ goto out;
++
++ opt->flags |= IP6SKB_SEG6;
++ opt->srhoff = (unsigned char *)srh - skb->data;
++
++out:
++ /* Restore the network header back to the ICMP packet */
++ skb->network_header = network_header;
++}
++
+ static struct genl_family seg6_genl_family;
+
+ static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = {
+@@ -132,6 +191,11 @@ static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
+ goto out_unlock;
+ }
+
++ if (slen > nla_len(info->attrs[SEG6_ATTR_SECRET])) {
++ err = -EINVAL;
++ goto out_unlock;
++ }
++
+ if (hinfo) {
+ err = seg6_hmac_info_del(net, hmackeyid);
+ if (err)
+diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
+index 687d95dce0852..b7d6b64cc5320 100644
+--- a/net/ipv6/seg6_hmac.c
++++ b/net/ipv6/seg6_hmac.c
+@@ -399,7 +399,6 @@ int __init seg6_hmac_init(void)
+ {
+ return seg6_hmac_init_algo();
+ }
+-EXPORT_SYMBOL(seg6_hmac_init);
+
+ int __net_init seg6_hmac_net_init(struct net *net)
+ {
+@@ -409,7 +408,6 @@ int __net_init seg6_hmac_net_init(struct net *net)
+
+ return 0;
+ }
+-EXPORT_SYMBOL(seg6_hmac_net_init);
+
+ void seg6_hmac_exit(void)
+ {
+diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
+index 3adc5d9211ad6..e756ba705fd9b 100644
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -161,6 +161,14 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+ hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
++
++ /* the control block has been erased, so we have to set the
++ * iif once again.
++ * We read the receiving interface index directly from the
++ * skb->skb_iif as it is done in the IPv4 receiving path (i.e.:
++ * ip_rcv_core(...)).
++ */
++ IP6CB(skb)->iif = skb->skb_iif;
+ }
+
+ hdr->nexthdr = NEXTHDR_ROUTING;
+@@ -181,6 +189,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+ }
+ #endif
+
++ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++
+ skb_postpush_rcsum(skb, hdr, tot_len);
+
+ return 0;
+@@ -233,6 +243,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+ }
+ #endif
+
++ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++
+ skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
+
+ return 0;
+@@ -294,7 +306,6 @@ static int seg6_do_srh(struct sk_buff *skb)
+ break;
+ }
+
+- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
+
+diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
+index 2dc40b3f373ef..59454285d5c58 100644
+--- a/net/ipv6/seg6_local.c
++++ b/net/ipv6/seg6_local.c
+@@ -150,40 +150,11 @@ static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
+ return (struct seg6_local_lwt *)lwt->data;
+ }
+
+-static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb, int flags)
+-{
+- struct ipv6_sr_hdr *srh;
+- int len, srhoff = 0;
+-
+- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0)
+- return NULL;
+-
+- if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
+- return NULL;
+-
+- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+-
+- len = (srh->hdrlen + 1) << 3;
+-
+- if (!pskb_may_pull(skb, srhoff + len))
+- return NULL;
+-
+- /* note that pskb_may_pull may change pointers in header;
+- * for this reason it is necessary to reload them when needed.
+- */
+- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+-
+- if (!seg6_validate_srh(srh, len, true))
+- return NULL;
+-
+- return srh;
+-}
+-
+ static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
+ {
+ struct ipv6_sr_hdr *srh;
+
+- srh = get_srh(skb, IP6_FH_F_SKIP_RH);
++ srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH);
+ if (!srh)
+ return NULL;
+
+@@ -200,7 +171,7 @@ static bool decap_and_validate(struct sk_buff *skb, int proto)
+ struct ipv6_sr_hdr *srh;
+ unsigned int off = 0;
+
+- srh = get_srh(skb, 0);
++ srh = seg6_get_srh(skb, 0);
+ if (srh && srh->segments_left > 0)
+ return false;
+
+@@ -853,7 +824,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+ if (err)
+ goto drop;
+
+- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ seg6_lookup_nexthop(skb, NULL, 0);
+@@ -885,7 +855,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
+ if (err)
+ goto drop;
+
+- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ seg6_lookup_nexthop(skb, NULL, 0);
+diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
+index ef0c7a7c18e23..3bc02ab9ceaca 100644
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -323,8 +323,6 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
+ NULL;
+
+- rcu_read_lock();
+-
+ ca = min(t->prl_count, cmax);
+
+ if (!kp) {
+@@ -341,7 +339,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
+ }
+ }
+
+- c = 0;
++ rcu_read_lock();
+ for_each_prl_rcu(t->prl) {
+ if (c >= cmax)
+ break;
+@@ -353,7 +351,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
+ if (kprl.addr != htonl(INADDR_ANY))
+ break;
+ }
+-out:
++
+ rcu_read_unlock();
+
+ len = sizeof(*kp) * c;
+@@ -362,7 +360,7 @@ out:
+ ret = -EFAULT;
+
+ kfree(kp);
+-
++out:
+ return ret;
+ }
+
+@@ -698,7 +696,7 @@ static int ipip6_rcv(struct sk_buff *skb)
+ skb->dev = tunnel->dev;
+
+ if (packet_is_spoofed(skb, iph, tunnel)) {
+- tunnel->dev->stats.rx_errors++;
++ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto out;
+ }
+
+@@ -718,8 +716,8 @@ static int ipip6_rcv(struct sk_buff *skb)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &iph->saddr, iph->tos);
+ if (err > 1) {
+- ++tunnel->dev->stats.rx_frame_errors;
+- ++tunnel->dev->stats.rx_errors;
++ DEV_STATS_INC(tunnel->dev, rx_frame_errors);
++ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto out;
+ }
+ }
+@@ -950,7 +948,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
+ if (!rt) {
+ rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
+ if (IS_ERR(rt)) {
+- dev->stats.tx_carrier_errors++;
++ DEV_STATS_INC(dev, tx_carrier_errors);
+ goto tx_error_icmp;
+ }
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
+@@ -958,14 +956,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
+
+ if (rt->rt_type != RTN_UNICAST) {
+ ip_rt_put(rt);
+- dev->stats.tx_carrier_errors++;
++ DEV_STATS_INC(dev, tx_carrier_errors);
+ goto tx_error_icmp;
+ }
+ tdev = rt->dst.dev;
+
+ if (tdev == dev) {
+ ip_rt_put(rt);
+- dev->stats.collisions++;
++ DEV_STATS_INC(dev, collisions);
+ goto tx_error;
+ }
+
+@@ -978,7 +976,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
+ mtu = dst_mtu(&rt->dst) - t_hlen;
+
+ if (mtu < IPV4_MIN_MTU) {
+- dev->stats.collisions++;
++ DEV_STATS_INC(dev, collisions);
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+@@ -1017,7 +1015,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
+ struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ ip_rt_put(rt);
+- dev->stats.tx_dropped++;
++ DEV_STATS_INC(dev, tx_dropped);
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+@@ -1047,7 +1045,7 @@ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ kfree_skb(skb);
+- dev->stats.tx_errors++;
++ DEV_STATS_INC(dev, tx_errors);
+ return NETDEV_TX_OK;
+ }
+
+@@ -1066,7 +1064,7 @@ static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
+ return NETDEV_TX_OK;
+ tx_error:
+ kfree_skb(skb);
+- dev->stats.tx_errors++;
++ DEV_STATS_INC(dev, tx_errors);
+ return NETDEV_TX_OK;
+ }
+
+@@ -1095,7 +1093,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
+ return NETDEV_TX_OK;
+
+ tx_err:
+- dev->stats.tx_errors++;
++ DEV_STATS_INC(dev, tx_errors);
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+
+@@ -1103,12 +1101,13 @@ tx_err:
+
+ static void ipip6_tunnel_bind_dev(struct net_device *dev)
+ {
++ struct ip_tunnel *tunnel = netdev_priv(dev);
++ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ struct net_device *tdev = NULL;
+- struct ip_tunnel *tunnel;
++ int hlen = LL_MAX_HEADER;
+ const struct iphdr *iph;
+ struct flowi4 fl4;
+
+- tunnel = netdev_priv(dev);
+ iph = &tunnel->parms.iph;
+
+ if (iph->daddr) {
+@@ -1131,12 +1130,15 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
+ tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
+
+ if (tdev && !netif_is_l3_master(tdev)) {
+- int t_hlen = tunnel->hlen + sizeof(struct iphdr);
++ int mtu;
+
+- dev->mtu = tdev->mtu - t_hlen;
+- if (dev->mtu < IPV6_MIN_MTU)
+- dev->mtu = IPV6_MIN_MTU;
++ mtu = tdev->mtu - t_hlen;
++ if (mtu < IPV6_MIN_MTU)
++ mtu = IPV6_MIN_MTU;
++ WRITE_ONCE(dev->mtu, mtu);
++ hlen = tdev->hard_header_len + tdev->needed_headroom;
+ }
++ dev->needed_headroom = t_hlen + hlen;
+ }
+
+ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
+@@ -1933,7 +1935,6 @@ static int __net_init sit_init_net(struct net *net)
+ return 0;
+
+ err_reg_dev:
+- ipip6_dev_free(sitn->fb_tunnel_dev);
+ free_netdev(sitn->fb_tunnel_dev);
+ err_alloc_dev:
+ return err;
+diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
+index e8cfb9e997bf0..12ae817aaf2ec 100644
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+ __u8 rcv_wscale;
+ u32 tsoff = 0;
+
+- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
++ !th->ack || th->rst)
+ goto out;
+
+ if (tcp_synq_no_recent_overflow(sk))
+@@ -170,7 +171,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+ goto out;
+
+ ret = NULL;
+- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
++ req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
++ &tcp_request_sock_ipv6_ops, sk, skb);
+ if (!req)
+ goto out;
+
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index b03dd02c9f13c..c18fdddbfa09d 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -107,9 +107,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+ if (dst && dst_hold_safe(dst)) {
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+- sk->sk_rx_dst = dst;
+- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+- tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
++ rcu_assign_pointer(sk->sk_rx_dst, dst);
++ sk->sk_rx_dst_ifindex = skb->skb_iif;
++ sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
+ }
+ }
+
+@@ -269,6 +269,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.daddr = sk->sk_v6_daddr;
+ fl6.saddr = saddr ? *saddr : np->saddr;
++ fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_mark = sk->sk_mark;
+ fl6.fl6_dport = usin->sin6_port;
+@@ -339,6 +340,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+
+ late_failure:
+ tcp_set_state(sk, TCP_CLOSE);
++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
++ inet_reset_saddr(sk);
+ failure:
+ inet->inet_dport = 0;
+ sk->sk_route_caps = 0;
+@@ -542,7 +545,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+ if (np->repflow && ireq->pktopts)
+ fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+
+- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (np->tclass & INET_ECN_MASK) :
+ np->tclass;
+@@ -1001,7 +1004,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
+ * Underlying function will use this to retrieve the network
+ * namespace
+ */
+- dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
++ if (sk && sk->sk_state != TCP_TIME_WAIT)
++ dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
++ else
++ dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
+ if (!IS_ERR(dst)) {
+ skb_dst_set(buff, dst);
+ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
+@@ -1165,7 +1171,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ tcp_rsk(req)->rcv_nxt,
+ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+- req->ts_recent, sk->sk_bound_dev_if,
++ READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
+ }
+@@ -1364,7 +1370,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+ /* Set ToS of the new socket based upon the value of incoming SYN.
+ * ECT bits are set later in tcp_init_transfer().
+ */
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+
+ /* Clone native IPv6 options from listening socket (if any)
+@@ -1424,14 +1430,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+
+ /* Clone pktoptions received with SYN, if we own the req */
+ if (ireq->pktopts) {
+- newnp->pktoptions = skb_clone(ireq->pktopts,
+- sk_gfp_mask(sk, GFP_ATOMIC));
++ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
+- if (newnp->pktoptions) {
++ if (newnp->pktoptions)
+ tcp_v6_restore_cb(newnp->pktoptions);
+- skb_set_owner_r(newnp->pktoptions, newsk);
+- }
+ }
+ } else {
+ if (!req_unhash && found_dup_sk) {
+@@ -1501,19 +1504,22 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+ --ANK (980728)
+ */
+ if (np->rxopt.all)
+- opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
++ opt_skb = skb_clone_and_charge_r(skb, sk);
+
+ if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+- struct dst_entry *dst = sk->sk_rx_dst;
++ struct dst_entry *dst;
++
++ dst = rcu_dereference_protected(sk->sk_rx_dst,
++ lockdep_sock_is_held(sk));
+
+ sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
+ if (dst) {
+- if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
++ if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
+ INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
+- dst, np->rx_dst_cookie) == NULL) {
++ dst, sk->sk_rx_dst_cookie) == NULL) {
++ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
+ dst_release(dst);
+- sk->sk_rx_dst = NULL;
+ }
+ }
+
+@@ -1582,7 +1588,6 @@ ipv6_pktoptions:
+ if (np->repflow)
+ np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
+ if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
+- skb_set_owner_r(opt_skb, sk);
+ tcp_v6_restore_cb(opt_skb);
+ opt_skb = xchg(&np->pktoptions, opt_skb);
+ } else {
+@@ -1848,7 +1853,7 @@ do_time_wait:
+ goto discard_it;
+ }
+
+-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
++void tcp_v6_early_demux(struct sk_buff *skb)
+ {
+ const struct ipv6hdr *hdr;
+ const struct tcphdr *th;
+@@ -1875,12 +1880,12 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ if (sk_fullsock(sk)) {
+- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
++ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
+
+ if (dst)
+- dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
++ dst = dst_check(dst, sk->sk_rx_dst_cookie);
+ if (dst &&
+- inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
++ sk->sk_rx_dst_ifindex == skb->skb_iif)
+ skb_dst_set_noref(skb, dst);
+ }
+ }
+@@ -1967,12 +1972,6 @@ static int tcp_v6_init_sock(struct sock *sk)
+ return 0;
+ }
+
+-static void tcp_v6_destroy_sock(struct sock *sk)
+-{
+- tcp_v4_destroy_sock(sk);
+- inet6_destroy_sock(sk);
+-}
+-
+ #ifdef CONFIG_PROC_FS
+ /* Proc filesystem TCPv6 sock list dumping. */
+ static void get_openreq6(struct seq_file *seq,
+@@ -2072,7 +2071,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
+- tp->snd_cwnd,
++ tcp_snd_cwnd(tp),
+ state == TCP_LISTEN ?
+ fastopenq->max_qlen :
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+@@ -2165,7 +2164,7 @@ struct proto tcpv6_prot = {
+ .accept = inet_csk_accept,
+ .ioctl = tcp_ioctl,
+ .init = tcp_v6_init_sock,
+- .destroy = tcp_v6_destroy_sock,
++ .destroy = tcp_v4_destroy_sock,
+ .shutdown = tcp_shutdown,
+ .setsockopt = tcp_setsockopt,
+ .getsockopt = tcp_getsockopt,
+@@ -2203,12 +2202,7 @@ struct proto tcpv6_prot = {
+ };
+ EXPORT_SYMBOL_GPL(tcpv6_prot);
+
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct inet6_protocol tcpv6_protocol = {
+- .early_demux = tcp_v6_early_demux,
+- .early_demux_handler = tcp_v6_early_demux,
++static const struct inet6_protocol tcpv6_protocol = {
+ .handler = tcp_v6_rcv,
+ .err_handler = tcp_v6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 8d785232b4796..d5d254ca2dfe6 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -40,6 +40,7 @@
+ #include <net/transp_v6.h>
+ #include <net/ip6_route.h>
+ #include <net/raw.h>
++#include <net/seg6.h>
+ #include <net/tcp_states.h>
+ #include <net/ip6_checksum.h>
+ #include <net/ip6_tunnel.h>
+@@ -54,6 +55,19 @@
+ #include <trace/events/skb.h>
+ #include "udp_impl.h"
+
++static void udpv6_destruct_sock(struct sock *sk)
++{
++ udp_destruct_common(sk);
++ inet6_sock_destruct(sk);
++}
++
++int udpv6_init_sock(struct sock *sk)
++{
++ skb_queue_head_init(&udp_sk(sk)->reader_queue);
++ sk->sk_destruct = udpv6_destruct_sock;
++ return 0;
++}
++
+ static u32 udp6_ehashfn(const struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+@@ -74,7 +88,7 @@ static u32 udp6_ehashfn(const struct net *net,
+ fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+- udp_ipv6_hash_secret + net_hash_mix(net));
++ udp6_ehash_secret + net_hash_mix(net));
+ }
+
+ int udp_v6_get_port(struct sock *sk, unsigned short snum)
+@@ -176,14 +190,23 @@ static struct sock *udp6_lib_lookup2(struct net *net,
+ score = compute_score(sk, net, saddr, sport,
+ daddr, hnum, dif, sdif);
+ if (score > badness) {
+- result = lookup_reuseport(net, sk, skb,
+- saddr, sport, daddr, hnum);
++ badness = score;
++ result = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
++ if (!result) {
++ result = sk;
++ continue;
++ }
++
+ /* Fall back to scoring if group has connections */
+- if (result && !reuseport_has_conns(sk, false))
++ if (!reuseport_has_conns(sk))
+ return result;
+
+- result = result ? : sk;
+- badness = score;
++ /* Reuseport logic returned an error, keep original score. */
++ if (IS_ERR(result))
++ continue;
++
++ badness = compute_score(sk, net, saddr, sport,
++ daddr, hnum, dif, sdif);
+ }
+ }
+ return result;
+@@ -561,7 +584,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ struct ipv6_pinfo *np;
+ const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+ const struct in6_addr *saddr = &hdr->saddr;
+- const struct in6_addr *daddr = &hdr->daddr;
++ const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr;
+ struct udphdr *uh = (struct udphdr *)(skb->data+offset);
+ bool tunnel = false;
+ struct sock *sk;
+@@ -613,8 +636,11 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ }
+
+ /* Tunnels don't have an application socket: don't pass errors back */
+- if (tunnel)
++ if (tunnel) {
++ if (udp_sk(sk)->encap_err_rcv)
++ udp_sk(sk)->encap_err_rcv(sk, skb, offset);
+ goto out;
++ }
+
+ if (!np->recverr) {
+ if (!harderr || sk->sk_state != TCP_ESTABLISHED)
+@@ -884,7 +910,7 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ if (udp_sk_rx_dst_set(sk, dst)) {
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+- inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
++ sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
+ }
+ }
+
+@@ -910,6 +936,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
+ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ int proto)
+ {
++ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ const struct in6_addr *saddr, *daddr;
+ struct net *net = dev_net(skb->dev);
+ struct udphdr *uh;
+@@ -956,7 +983,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ struct dst_entry *dst = skb_dst(skb);
+ int ret;
+
+- if (unlikely(sk->sk_rx_dst != dst))
++ if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
+ udp6_sk_rx_dst_set(sk, dst);
+
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+@@ -986,6 +1013,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ return udp6_unicast_rcv_skb(sk, skb, uh);
+ }
+
++ reason = SKB_DROP_REASON_NO_SOCKET;
++
+ if (!uh->check)
+ goto report_csum_error;
+
+@@ -998,10 +1027,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
+ __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+- kfree_skb(skb);
++ kfree_skb_reason(skb, reason);
+ return 0;
+
+ short_packet:
++ if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
++ reason = SKB_DROP_REASON_PKT_TOO_SMALL;
+ net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
+ saddr, ntohs(uh->source),
+@@ -1012,10 +1043,12 @@ short_packet:
+ report_csum_error:
+ udp6_csum_zero_error(skb);
+ csum_error:
++ if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
++ reason = SKB_DROP_REASON_UDP_CSUM;
+ __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
+ discard:
+ __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+- kfree_skb(skb);
++ kfree_skb_reason(skb, reason);
+ return 0;
+ }
+
+@@ -1034,7 +1067,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
+
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+ if (sk->sk_state == TCP_ESTABLISHED &&
+- INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
++ inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
+ return sk;
+ /* Only check first socket in chain */
+ break;
+@@ -1042,7 +1075,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
+ return NULL;
+ }
+
+-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
++void udp_v6_early_demux(struct sk_buff *skb)
+ {
+ struct net *net = dev_net(skb->dev);
+ const struct udphdr *uh;
+@@ -1070,10 +1103,10 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+
+ skb->sk = sk;
+ skb->destructor = sock_efree;
+- dst = READ_ONCE(sk->sk_rx_dst);
++ dst = rcu_dereference(sk->sk_rx_dst);
+
+ if (dst)
+- dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
++ dst = dst_check(dst, sk->sk_rx_dst_cookie);
+ if (dst) {
+ /* set noref for now.
+ * any place which wants to hold dst has to call
+@@ -1204,7 +1237,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
++ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+@@ -1355,9 +1388,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ msg->msg_name = &sin;
+ msg->msg_namelen = sizeof(sin);
+ do_udp_sendmsg:
+- if (__ipv6_only_sock(sk))
+- return -ENETUNREACH;
+- return udp_sendmsg(sk, msg, len);
++ err = __ipv6_only_sock(sk) ?
++ -ENETUNREACH : udp_sendmsg(sk, msg, len);
++ msg->msg_name = sin6;
++ msg->msg_namelen = addr_len;
++ return err;
+ }
+ }
+
+@@ -1435,7 +1470,6 @@ do_udp_sendmsg:
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
+- fl6.flowi6_mark = ipc6.sockc.mark;
+ fl6.flowi6_uid = sk->sk_uid;
+
+ if (msg->msg_controllen) {
+@@ -1471,6 +1505,7 @@ do_udp_sendmsg:
+ ipc6.opt = opt;
+
+ fl6.flowi6_proto = sk->sk_protocol;
++ fl6.flowi6_mark = ipc6.sockc.mark;
+ fl6.daddr = *daddr;
+ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
+ fl6.saddr = np->saddr;
+@@ -1630,8 +1665,6 @@ void udpv6_destroy_sock(struct sock *sk)
+ udp_encap_disable();
+ }
+ }
+-
+- inet6_destroy_sock(sk);
+ }
+
+ /*
+@@ -1655,12 +1688,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ return ipv6_getsockopt(sk, level, optname, optval, optlen);
+ }
+
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct inet6_protocol udpv6_protocol = {
+- .early_demux = udp_v6_early_demux,
+- .early_demux_handler = udp_v6_early_demux,
++static const struct inet6_protocol udpv6_protocol = {
+ .handler = udpv6_rcv,
+ .err_handler = udpv6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+@@ -1720,7 +1748,7 @@ struct proto udpv6_prot = {
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+- .init = udp_init_sock,
++ .init = udpv6_init_sock,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
+index b2fcc46c1630e..e497768194414 100644
+--- a/net/ipv6/udp_impl.h
++++ b/net/ipv6/udp_impl.h
+@@ -12,6 +12,7 @@ int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
+ int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
+ __be32, struct udp_table *);
+
++int udpv6_init_sock(struct sock *sk);
+ int udp_v6_get_port(struct sock *sk, unsigned short snum);
+ void udp_v6_rehash(struct sock *sk);
+
+diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
+index fbb700d3f437e..26199f743791c 100644
+--- a/net/ipv6/udplite.c
++++ b/net/ipv6/udplite.c
+@@ -12,6 +12,13 @@
+ #include <linux/proc_fs.h>
+ #include "udp_impl.h"
+
++static int udplitev6_sk_init(struct sock *sk)
++{
++ udpv6_init_sock(sk);
++ udp_sk(sk)->pcflag = UDPLITE_BIT;
++ return 0;
++}
++
+ static int udplitev6_rcv(struct sk_buff *skb)
+ {
+ return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
+@@ -38,7 +45,7 @@ struct proto udplitev6_prot = {
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+- .init = udplite_sk_init,
++ .init = udplitev6_sk_init,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+@@ -50,6 +57,8 @@ struct proto udplitev6_prot = {
+ .get_port = udp_v6_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
++ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
++ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp6_sock),
+ .h.udp_table = &udplite_table,
+ };
+diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
+index 04cbeefd89828..4907ab241d6be 100644
+--- a/net/ipv6/xfrm6_input.c
++++ b/net/ipv6/xfrm6_input.c
+@@ -86,6 +86,9 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+ __be32 *udpdata32;
+ __u16 encap_type = up->encap_type;
+
++ if (skb->protocol == htons(ETH_P_IP))
++ return xfrm4_udp_encap_rcv(sk, skb);
++
+ /* if this is not encapsulated socket, then just return now */
+ if (!encap_type)
+ return 1;
+diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
+index d0d280077721b..ad07904642cad 100644
+--- a/net/ipv6/xfrm6_output.c
++++ b/net/ipv6/xfrm6_output.c
+@@ -45,6 +45,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf
+ return xfrm_output(sk, skb);
+ }
+
++static int xfrm6_noneed_fragment(struct sk_buff *skb)
++{
++ struct frag_hdr *fh;
++ u8 prevhdr = ipv6_hdr(skb)->nexthdr;
++
++ if (prevhdr != NEXTHDR_FRAGMENT)
++ return 0;
++ fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
++ if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
++ return 1;
++ return 0;
++}
++
+ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+ struct dst_entry *dst = skb_dst(skb);
+@@ -73,6 +86,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ xfrm6_local_rxpmtu(skb, mtu);
+ kfree_skb(skb);
+ return -EMSGSIZE;
++ } else if (toobig && xfrm6_noneed_fragment(skb)) {
++ skb->ignore_df = 1;
++ goto skip_frag;
+ } else if (!skb->ignore_df && toobig && skb->sk) {
+ xfrm_local_error(skb, mtu);
+ kfree_skb(skb);
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index af7a4b8b1e9c4..247296e3294bd 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -289,9 +289,13 @@ int __init xfrm6_init(void)
+ if (ret)
+ goto out_state;
+
+- register_pernet_subsys(&xfrm6_net_ops);
++ ret = register_pernet_subsys(&xfrm6_net_ops);
++ if (ret)
++ goto out_protocol;
+ out:
+ return ret;
++out_protocol:
++ xfrm6_protocol_fini();
+ out_state:
+ xfrm6_state_fini();
+ out_policy:
+diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
+index f3343a8541a57..8efc369934fc7 100644
+--- a/net/iucv/iucv.c
++++ b/net/iucv/iucv.c
+@@ -83,7 +83,7 @@ struct iucv_irq_data {
+ u16 ippathid;
+ u8 ipflags1;
+ u8 iptype;
+- u32 res2[8];
++ u32 res2[9];
+ };
+
+ struct iucv_irq_list {
+diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
+index 11a715d76a4f1..0d1ab4149553c 100644
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -161,7 +161,8 @@ static void kcm_rcv_ready(struct kcm_sock *kcm)
+ /* Buffer limit is okay now, add to ready list */
+ list_add_tail(&kcm->wait_rx_list,
+ &kcm->mux->kcm_rx_waiters);
+- kcm->rx_wait = true;
++ /* paired with lockless reads in kcm_rfree() */
++ WRITE_ONCE(kcm->rx_wait, true);
+ }
+
+ static void kcm_rfree(struct sk_buff *skb)
+@@ -177,7 +178,7 @@ static void kcm_rfree(struct sk_buff *skb)
+ /* For reading rx_wait and rx_psock without holding lock */
+ smp_mb__after_atomic();
+
+- if (!kcm->rx_wait && !kcm->rx_psock &&
++ if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) &&
+ sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
+ spin_lock_bh(&mux->rx_lock);
+ kcm_rcv_ready(kcm);
+@@ -220,7 +221,7 @@ static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
+ struct sk_buff *skb;
+ struct kcm_sock *kcm;
+
+- while ((skb = __skb_dequeue(head))) {
++ while ((skb = skb_dequeue(head))) {
+ /* Reset destructor to avoid calling kcm_rcv_ready */
+ skb->destructor = sock_rfree;
+ skb_orphan(skb);
+@@ -236,7 +237,8 @@ try_again:
+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+ /* Should mean socket buffer full */
+ list_del(&kcm->wait_rx_list);
+- kcm->rx_wait = false;
++ /* paired with lockless reads in kcm_rfree() */
++ WRITE_ONCE(kcm->rx_wait, false);
+
+ /* Commit rx_wait to read in kcm_free */
+ smp_wmb();
+@@ -279,10 +281,12 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
+ kcm = list_first_entry(&mux->kcm_rx_waiters,
+ struct kcm_sock, wait_rx_list);
+ list_del(&kcm->wait_rx_list);
+- kcm->rx_wait = false;
++ /* paired with lockless reads in kcm_rfree() */
++ WRITE_ONCE(kcm->rx_wait, false);
+
+ psock->rx_kcm = kcm;
+- kcm->rx_psock = psock;
++ /* paired with lockless reads in kcm_rfree() */
++ WRITE_ONCE(kcm->rx_psock, psock);
+
+ spin_unlock_bh(&mux->rx_lock);
+
+@@ -309,7 +313,8 @@ static void unreserve_rx_kcm(struct kcm_psock *psock,
+ spin_lock_bh(&mux->rx_lock);
+
+ psock->rx_kcm = NULL;
+- kcm->rx_psock = NULL;
++ /* paired with lockless reads in kcm_rfree() */
++ WRITE_ONCE(kcm->rx_psock, NULL);
+
+ /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
+ * kcm_rfree
+@@ -833,7 +838,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
+ }
+
+ get_page(page);
+- skb_fill_page_desc(skb, i, page, offset, size);
++ skb_fill_page_desc_noacc(skb, i, page, offset, size);
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
+
+ coalesced:
+@@ -1059,15 +1064,18 @@ partial_message:
+ out_error:
+ kcm_push(kcm);
+
+- if (copied && sock->type == SOCK_SEQPACKET) {
++ if (sock->type == SOCK_SEQPACKET) {
+ /* Wrote some bytes before encountering an
+ * error, return partial success.
+ */
+- goto partial_message;
+- }
+-
+- if (head != kcm->seq_skb)
++ if (copied)
++ goto partial_message;
++ if (head != kcm->seq_skb)
++ kfree_skb(head);
++ } else {
+ kfree_skb(head);
++ kcm->seq_skb = NULL;
++ }
+
+ err = sk_stream_error(sk, msg->msg_flags, err);
+
+@@ -1079,53 +1087,18 @@ out_error:
+ return err;
+ }
+
+-static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
+- long timeo, int *err)
+-{
+- struct sk_buff *skb;
+-
+- while (!(skb = skb_peek(&sk->sk_receive_queue))) {
+- if (sk->sk_err) {
+- *err = sock_error(sk);
+- return NULL;
+- }
+-
+- if (sock_flag(sk, SOCK_DONE))
+- return NULL;
+-
+- if ((flags & MSG_DONTWAIT) || !timeo) {
+- *err = -EAGAIN;
+- return NULL;
+- }
+-
+- sk_wait_data(sk, &timeo, NULL);
+-
+- /* Handle signals */
+- if (signal_pending(current)) {
+- *err = sock_intr_errno(timeo);
+- return NULL;
+- }
+- }
+-
+- return skb;
+-}
+-
+ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
+ {
++ int noblock = flags & MSG_DONTWAIT;
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+ int err = 0;
+- long timeo;
+ struct strp_msg *stm;
+ int copied = 0;
+ struct sk_buff *skb;
+
+- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+-
+- lock_sock(sk);
+-
+- skb = kcm_wait_data(sk, flags, timeo, &err);
++ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out;
+
+@@ -1156,14 +1129,11 @@ msg_finished:
+ /* Finished with message */
+ msg->msg_flags |= MSG_EOR;
+ KCM_STATS_INCR(kcm->stats.rx_msgs);
+- skb_unlink(skb, &sk->sk_receive_queue);
+- kfree_skb(skb);
+ }
+ }
+
+ out:
+- release_sock(sk);
+-
++ skb_free_datagram(sk, skb);
+ return copied ? : err;
+ }
+
+@@ -1171,9 +1141,9 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+ {
++ int noblock = flags & MSG_DONTWAIT;
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+- long timeo;
+ struct strp_msg *stm;
+ int err = 0;
+ ssize_t copied;
+@@ -1181,11 +1151,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
+
+ /* Only support splice for SOCKSEQPACKET */
+
+- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+-
+- lock_sock(sk);
+-
+- skb = kcm_wait_data(sk, flags, timeo, &err);
++ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto err_out;
+
+@@ -1213,13 +1179,11 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
+ * finish reading the message.
+ */
+
+- release_sock(sk);
+-
++ skb_free_datagram(sk, skb);
+ return copied;
+
+ err_out:
+- release_sock(sk);
+-
++ skb_free_datagram(sk, skb);
+ return err;
+ }
+
+@@ -1239,7 +1203,8 @@ static void kcm_recv_disable(struct kcm_sock *kcm)
+ if (!kcm->rx_psock) {
+ if (kcm->rx_wait) {
+ list_del(&kcm->wait_rx_list);
+- kcm->rx_wait = false;
++ /* paired with lockless reads in kcm_rfree() */
++ WRITE_ONCE(kcm->rx_wait, false);
+ }
+
+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
+@@ -1411,12 +1376,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
+ psock->sk = csk;
+ psock->bpf_prog = prog;
+
+- err = strp_init(&psock->strp, csk, &cb);
+- if (err) {
+- kmem_cache_free(kcm_psockp, psock);
+- goto out;
+- }
+-
+ write_lock_bh(&csk->sk_callback_lock);
+
+ /* Check if sk_user_data is already by KCM or someone else.
+@@ -1424,13 +1383,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
+ */
+ if (csk->sk_user_data) {
+ write_unlock_bh(&csk->sk_callback_lock);
+- strp_stop(&psock->strp);
+- strp_done(&psock->strp);
+ kmem_cache_free(kcm_psockp, psock);
+ err = -EALREADY;
+ goto out;
+ }
+
++ err = strp_init(&psock->strp, csk, &cb);
++ if (err) {
++ write_unlock_bh(&csk->sk_callback_lock);
++ kmem_cache_free(kcm_psockp, psock);
++ goto out;
++ }
++
+ psock->save_data_ready = csk->sk_data_ready;
+ psock->save_write_space = csk->sk_write_space;
+ psock->save_state_change = csk->sk_state_change;
+@@ -1793,7 +1757,8 @@ static void kcm_done(struct kcm_sock *kcm)
+
+ if (kcm->rx_wait) {
+ list_del(&kcm->wait_rx_list);
+- kcm->rx_wait = false;
++ /* paired with lockless reads in kcm_rfree() */
++ WRITE_ONCE(kcm->rx_wait, false);
+ }
+ /* Move any pending receive messages to other kcm sockets */
+ requeue_rx_msgs(mux, &sk->sk_receive_queue);
+@@ -1838,10 +1803,10 @@ static int kcm_release(struct socket *sock)
+ kcm = kcm_sk(sk);
+ mux = kcm->mux;
+
++ lock_sock(sk);
+ sock_orphan(sk);
+ kfree_skb(kcm->seq_skb);
+
+- lock_sock(sk);
+ /* Purge queue under lock to avoid race condition with tx_work trying
+ * to act when queue is nonempty. If tx_work runs after this point
+ * it will just return.
+@@ -2020,6 +1985,8 @@ static __net_exit void kcm_exit_net(struct net *net)
+ * that all multiplexors and psocks have been destroyed.
+ */
+ WARN_ON(!list_empty(&knet->mux_list));
++
++ mutex_destroy(&knet->mutex);
+ }
+
+ static struct pernet_operations kcm_net_ops = {
+diff --git a/net/key/af_key.c b/net/key/af_key.c
+index de24a7d474dfd..258fa046f440d 100644
+--- a/net/key/af_key.c
++++ b/net/key/af_key.c
+@@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
+ pfk->registered |= (1<<hdr->sadb_msg_satype);
+ }
+
++ mutex_lock(&pfkey_mutex);
+ xfrm_probe_algs();
+
+- supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
++ supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
++ mutex_unlock(&pfkey_mutex);
++
+ if (!supp_skb) {
+ if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
+ pfk->registered &= ~(1<<hdr->sadb_msg_satype);
+@@ -1845,9 +1848,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
+ if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
+ struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
+
+- if ((xfilter->sadb_x_filter_splen >=
++ if ((xfilter->sadb_x_filter_splen >
+ (sizeof(xfrm_address_t) << 3)) ||
+- (xfilter->sadb_x_filter_dplen >=
++ (xfilter->sadb_x_filter_dplen >
+ (sizeof(xfrm_address_t) << 3))) {
+ mutex_unlock(&pfk->dump_lock);
+ return -EINVAL;
+@@ -1937,7 +1940,8 @@ static u32 gen_reqid(struct net *net)
+ }
+
+ static int
+-parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
++parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol,
++ struct sadb_x_ipsecrequest *rq)
+ {
+ struct net *net = xp_net(xp);
+ struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr;
+@@ -1955,9 +1959,12 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
+ if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
+ return -EINVAL;
+ t->mode = mode;
+- if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
++ if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) {
++ if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) &&
++ pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND)
++ return -EINVAL;
+ t->optional = 1;
+- else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
++ } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
+ t->reqid = rq->sadb_x_ipsecrequest_reqid;
+ if (t->reqid > IPSEC_MANUAL_REQID_MAX)
+ t->reqid = 0;
+@@ -1999,7 +2006,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
+ rq->sadb_x_ipsecrequest_len < sizeof(*rq))
+ return -EINVAL;
+
+- if ((err = parse_ipsecrequest(xp, rq)) < 0)
++ if ((err = parse_ipsecrequest(xp, pol, rq)) < 0)
+ return err;
+ len -= rq->sadb_x_ipsecrequest_len;
+ rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len);
+@@ -2623,7 +2630,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
+ }
+
+ return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i,
+- kma ? &k : NULL, net, NULL);
++ kma ? &k : NULL, net, NULL, 0);
+
+ out:
+ return err;
+@@ -2826,6 +2833,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
+ void *ext_hdrs[SADB_EXT_MAX];
+ int err;
+
++ /* Non-zero return value of pfkey_broadcast() does not always signal
++ * an error and even on an actual error we may still want to process
++ * the message so rather ignore the return value.
++ */
+ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+ BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+
+@@ -2934,9 +2945,10 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
+ return sz + sizeof(struct sadb_prop);
+ }
+
+-static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
++static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+ {
+ struct sadb_prop *p;
++ int sz = 0;
+ int i;
+
+ p = skb_put(skb, sizeof(struct sadb_prop));
+@@ -2964,13 +2976,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+ c->sadb_comb_soft_addtime = 20*60*60;
+ c->sadb_comb_hard_usetime = 8*60*60;
+ c->sadb_comb_soft_usetime = 7*60*60;
++ sz += sizeof(*c);
+ }
+ }
++
++ return sz + sizeof(*p);
+ }
+
+-static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
++static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+ {
+ struct sadb_prop *p;
++ int sz = 0;
+ int i, k;
+
+ p = skb_put(skb, sizeof(struct sadb_prop));
+@@ -3012,8 +3028,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+ c->sadb_comb_soft_addtime = 20*60*60;
+ c->sadb_comb_hard_usetime = 8*60*60;
+ c->sadb_comb_soft_usetime = 7*60*60;
++ sz += sizeof(*c);
+ }
+ }
++
++ return sz + sizeof(*p);
+ }
+
+ static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c)
+@@ -3143,6 +3162,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
+ struct sadb_x_sec_ctx *sec_ctx;
+ struct xfrm_sec_ctx *xfrm_ctx;
+ int ctx_size = 0;
++ int alg_size = 0;
+
+ sockaddr_size = pfkey_sockaddr_size(x->props.family);
+ if (!sockaddr_size)
+@@ -3154,16 +3174,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
+ sizeof(struct sadb_x_policy);
+
+ if (x->id.proto == IPPROTO_AH)
+- size += count_ah_combs(t);
++ alg_size = count_ah_combs(t);
+ else if (x->id.proto == IPPROTO_ESP)
+- size += count_esp_combs(t);
++ alg_size = count_esp_combs(t);
+
+ if ((xfrm_ctx = x->security)) {
+ ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+ size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
+ }
+
+- skb = alloc_skb(size + 16, GFP_ATOMIC);
++ skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+@@ -3217,10 +3237,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
+ pol->sadb_x_policy_priority = xp->priority;
+
+ /* Set sadb_comb's. */
++ alg_size = 0;
+ if (x->id.proto == IPPROTO_AH)
+- dump_ah_combs(skb, t);
++ alg_size = dump_ah_combs(skb, t);
+ else if (x->id.proto == IPPROTO_ESP)
+- dump_esp_combs(skb, t);
++ alg_size = dump_esp_combs(skb, t);
++
++ hdr->sadb_msg_len += alg_size / 8;
+
+ /* security context */
+ if (xfrm_ctx) {
+@@ -3375,7 +3398,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
+ hdr->sadb_msg_len = size / sizeof(uint64_t);
+ hdr->sadb_msg_errno = 0;
+ hdr->sadb_msg_reserved = 0;
+- hdr->sadb_msg_seq = x->km.seq = get_acqseq();
++ hdr->sadb_msg_seq = x->km.seq;
+ hdr->sadb_msg_pid = 0;
+
+ /* SA */
+diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
+index 93271a2632b8e..a2b13e213e06f 100644
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq;
+ /* per-net private data for this module */
+ static unsigned int l2tp_net_id;
+ struct l2tp_net {
+- struct list_head l2tp_tunnel_list;
+- /* Lock for write access to l2tp_tunnel_list */
+- spinlock_t l2tp_tunnel_list_lock;
++ /* Lock for write access to l2tp_tunnel_idr */
++ spinlock_t l2tp_tunnel_idr_lock;
++ struct idr l2tp_tunnel_idr;
+ struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
+ /* Lock for write access to l2tp_session_hlist */
+ spinlock_t l2tp_session_hlist_lock;
+@@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
+ struct l2tp_tunnel *tunnel;
+
+ rcu_read_lock_bh();
+- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+- if (tunnel->tunnel_id == tunnel_id &&
+- refcount_inc_not_zero(&tunnel->ref_count)) {
+- rcu_read_unlock_bh();
+-
+- return tunnel;
+- }
++ tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id);
++ if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) {
++ rcu_read_unlock_bh();
++ return tunnel;
+ }
+ rcu_read_unlock_bh();
+
+@@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
+
+ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
+ {
+- const struct l2tp_net *pn = l2tp_pernet(net);
++ struct l2tp_net *pn = l2tp_pernet(net);
++ unsigned long tunnel_id, tmp;
+ struct l2tp_tunnel *tunnel;
+ int count = 0;
+
+ rcu_read_lock_bh();
+- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+- if (++count > nth &&
++ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
++ if (tunnel && ++count > nth &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
+ rcu_read_unlock_bh();
+ return tunnel;
+@@ -1043,7 +1041,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
+ nf_reset_ct(skb);
+
+- bh_lock_sock(sk);
++ bh_lock_sock_nested(sk);
+ if (sock_owned_by_user(sk)) {
+ kfree_skb(skb);
+ ret = NET_XMIT_DROP;
+@@ -1150,8 +1148,10 @@ static void l2tp_tunnel_destruct(struct sock *sk)
+ }
+
+ /* Remove hooks into tunnel socket */
++ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_destruct = tunnel->old_sk_destruct;
+ sk->sk_user_data = NULL;
++ write_unlock_bh(&sk->sk_callback_lock);
+
+ /* Call the original destructor */
+ if (sk->sk_destruct)
+@@ -1227,6 +1227,15 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
+ l2tp_tunnel_delete(tunnel);
+ }
+
++static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
++{
++ struct l2tp_net *pn = l2tp_pernet(net);
++
++ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
++ idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id);
++ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
++}
++
+ /* Workqueue tunnel deletion function */
+ static void l2tp_tunnel_del_work(struct work_struct *work)
+ {
+@@ -1234,7 +1243,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
+ del_work);
+ struct sock *sk = tunnel->sock;
+ struct socket *sock = sk->sk_socket;
+- struct l2tp_net *pn;
+
+ l2tp_tunnel_closeall(tunnel);
+
+@@ -1248,12 +1256,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
+ }
+ }
+
+- /* Remove the tunnel struct from the tunnel list */
+- pn = l2tp_pernet(tunnel->l2tp_net);
+- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+- list_del_rcu(&tunnel->list);
+- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+-
++ l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
+ /* drop initial ref */
+ l2tp_tunnel_dec_refcount(tunnel);
+
+@@ -1384,8 +1387,6 @@ out:
+ return err;
+ }
+
+-static struct lock_class_key l2tp_socket_class;
+-
+ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
+ struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
+ {
+@@ -1455,12 +1456,19 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
+ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+ struct l2tp_tunnel_cfg *cfg)
+ {
+- struct l2tp_tunnel *tunnel_walk;
+- struct l2tp_net *pn;
++ struct l2tp_net *pn = l2tp_pernet(net);
++ u32 tunnel_id = tunnel->tunnel_id;
+ struct socket *sock;
+ struct sock *sk;
+ int ret;
+
++ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
++ ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id,
++ GFP_ATOMIC);
++ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
++ if (ret)
++ return ret == -ENOSPC ? -EEXIST : ret;
++
+ if (tunnel->fd < 0) {
+ ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
+ tunnel->peer_tunnel_id, cfg,
+@@ -1471,30 +1479,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+ sock = sockfd_lookup(tunnel->fd, &ret);
+ if (!sock)
+ goto err;
+-
+- ret = l2tp_validate_socket(sock->sk, net, tunnel->encap);
+- if (ret < 0)
+- goto err_sock;
+ }
+
+- tunnel->l2tp_net = net;
+- pn = l2tp_pernet(net);
+-
+ sk = sock->sk;
+- sock_hold(sk);
+- tunnel->sock = sk;
+-
+- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+- list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
+- if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
+- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+- sock_put(sk);
+- ret = -EEXIST;
+- goto err_sock;
+- }
+- }
+- list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
++ lock_sock(sk);
++ write_lock_bh(&sk->sk_callback_lock);
++ ret = l2tp_validate_socket(sk, net, tunnel->encap);
++ if (ret < 0)
++ goto err_inval_sock;
++ rcu_assign_sk_user_data(sk, tunnel);
++ write_unlock_bh(&sk->sk_callback_lock);
+
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ struct udp_tunnel_sock_cfg udp_cfg = {
+@@ -1505,15 +1499,20 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+ };
+
+ setup_udp_tunnel_sock(net, sock, &udp_cfg);
+- } else {
+- sk->sk_user_data = tunnel;
+ }
+
+ tunnel->old_sk_destruct = sk->sk_destruct;
+ sk->sk_destruct = &l2tp_tunnel_destruct;
+- lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
+- "l2tp_sock");
+ sk->sk_allocation = GFP_ATOMIC;
++ release_sock(sk);
++
++ sock_hold(sk);
++ tunnel->sock = sk;
++ tunnel->l2tp_net = net;
++
++ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
++ idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id);
++ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
+
+ trace_register_tunnel(tunnel);
+
+@@ -1522,12 +1521,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+
+ return 0;
+
+-err_sock:
++err_inval_sock:
++ write_unlock_bh(&sk->sk_callback_lock);
++ release_sock(sk);
++
+ if (tunnel->fd < 0)
+ sock_release(sock);
+ else
+ sockfd_put(sock);
+ err:
++ l2tp_tunnel_remove(net, tunnel);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
+@@ -1641,8 +1644,8 @@ static __net_init int l2tp_init_net(struct net *net)
+ struct l2tp_net *pn = net_generic(net, l2tp_net_id);
+ int hash;
+
+- INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
+- spin_lock_init(&pn->l2tp_tunnel_list_lock);
++ idr_init(&pn->l2tp_tunnel_idr);
++ spin_lock_init(&pn->l2tp_tunnel_idr_lock);
+
+ for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+ INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
+@@ -1656,11 +1659,13 @@ static __net_exit void l2tp_exit_net(struct net *net)
+ {
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_tunnel *tunnel = NULL;
++ unsigned long tunnel_id, tmp;
+ int hash;
+
+ rcu_read_lock_bh();
+- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+- l2tp_tunnel_delete(tunnel);
++ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
++ if (tunnel)
++ l2tp_tunnel_delete(tunnel);
+ }
+ rcu_read_unlock_bh();
+
+@@ -1670,6 +1675,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
+
+ for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+ WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
++ idr_destroy(&pn->l2tp_tunnel_idr);
+ }
+
+ static struct pernet_operations l2tp_net_ops = {
+diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
+index 96f975777438f..382124d6f7647 100644
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -255,8 +255,6 @@ static void l2tp_ip6_destroy_sock(struct sock *sk)
+
+ if (tunnel)
+ l2tp_tunnel_delete(tunnel);
+-
+- inet6_destroy_sock(sk);
+ }
+
+ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+@@ -502,14 +500,15 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ struct ipcm6_cookie ipc6;
+ int addr_len = msg->msg_namelen;
+ int transhdrlen = 4; /* zero session-id */
+- int ulen = len + transhdrlen;
++ int ulen;
+ int err;
+
+ /* Rough check on arithmetic overflow,
+ * better check is made in ip6_append_data().
+ */
+- if (len > INT_MAX)
++ if (len > INT_MAX - transhdrlen)
+ return -EMSGSIZE;
++ ulen = len + transhdrlen;
+
+ /* Mirror BSD error message compatibility */
+ if (msg->msg_flags & MSG_OOB)
+diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
+index bf35710127dd0..9cef8e080f644 100644
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -651,54 +651,22 @@ static int pppol2tp_tunnel_mtu(const struct l2tp_tunnel *tunnel)
+ return mtu - PPPOL2TP_HEADER_OVERHEAD;
+ }
+
+-/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+- */
+-static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+- int sockaddr_len, int flags)
++static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
++ const struct l2tp_connect_info *info,
++ bool *new_tunnel)
+ {
+- struct sock *sk = sock->sk;
+- struct pppox_sock *po = pppox_sk(sk);
+- struct l2tp_session *session = NULL;
+- struct l2tp_connect_info info;
+ struct l2tp_tunnel *tunnel;
+- struct pppol2tp_session *ps;
+- struct l2tp_session_cfg cfg = { 0, };
+- bool drop_refcnt = false;
+- bool drop_tunnel = false;
+- bool new_session = false;
+- bool new_tunnel = false;
+ int error;
+
+- error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info);
+- if (error < 0)
+- return error;
++ *new_tunnel = false;
+
+- lock_sock(sk);
+-
+- /* Check for already bound sockets */
+- error = -EBUSY;
+- if (sk->sk_state & PPPOX_CONNECTED)
+- goto end;
+-
+- /* We don't supporting rebinding anyway */
+- error = -EALREADY;
+- if (sk->sk_user_data)
+- goto end; /* socket is already attached */
+-
+- /* Don't bind if tunnel_id is 0 */
+- error = -EINVAL;
+- if (!info.tunnel_id)
+- goto end;
+-
+- tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id);
+- if (tunnel)
+- drop_tunnel = true;
++ tunnel = l2tp_tunnel_get(net, info->tunnel_id);
+
+ /* Special case: create tunnel context if session_id and
+ * peer_session_id is 0. Otherwise look up tunnel using supplied
+ * tunnel id.
+ */
+- if (!info.session_id && !info.peer_session_id) {
++ if (!info->session_id && !info->peer_session_id) {
+ if (!tunnel) {
+ struct l2tp_tunnel_cfg tcfg = {
+ .encap = L2TP_ENCAPTYPE_UDP,
+@@ -707,40 +675,82 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+ /* Prevent l2tp_tunnel_register() from trying to set up
+ * a kernel socket.
+ */
+- if (info.fd < 0) {
+- error = -EBADF;
+- goto end;
+- }
++ if (info->fd < 0)
++ return ERR_PTR(-EBADF);
+
+- error = l2tp_tunnel_create(info.fd,
+- info.version,
+- info.tunnel_id,
+- info.peer_tunnel_id, &tcfg,
++ error = l2tp_tunnel_create(info->fd,
++ info->version,
++ info->tunnel_id,
++ info->peer_tunnel_id, &tcfg,
+ &tunnel);
+ if (error < 0)
+- goto end;
++ return ERR_PTR(error);
+
+ l2tp_tunnel_inc_refcount(tunnel);
+- error = l2tp_tunnel_register(tunnel, sock_net(sk),
+- &tcfg);
++ error = l2tp_tunnel_register(tunnel, net, &tcfg);
+ if (error < 0) {
+ kfree(tunnel);
+- goto end;
++ return ERR_PTR(error);
+ }
+- drop_tunnel = true;
+- new_tunnel = true;
++
++ *new_tunnel = true;
+ }
+ } else {
+ /* Error if we can't find the tunnel */
+- error = -ENOENT;
+ if (!tunnel)
+- goto end;
++ return ERR_PTR(-ENOENT);
+
+ /* Error if socket is not prepped */
+- if (!tunnel->sock)
+- goto end;
++ if (!tunnel->sock) {
++ l2tp_tunnel_dec_refcount(tunnel);
++ return ERR_PTR(-ENOENT);
++ }
+ }
+
++ return tunnel;
++}
++
++/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
++ */
++static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
++ int sockaddr_len, int flags)
++{
++ struct sock *sk = sock->sk;
++ struct pppox_sock *po = pppox_sk(sk);
++ struct l2tp_session *session = NULL;
++ struct l2tp_connect_info info;
++ struct l2tp_tunnel *tunnel;
++ struct pppol2tp_session *ps;
++ struct l2tp_session_cfg cfg = { 0, };
++ bool drop_refcnt = false;
++ bool new_session = false;
++ bool new_tunnel = false;
++ int error;
++
++ error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info);
++ if (error < 0)
++ return error;
++
++ /* Don't bind if tunnel_id is 0 */
++ if (!info.tunnel_id)
++ return -EINVAL;
++
++ tunnel = pppol2tp_tunnel_get(sock_net(sk), &info, &new_tunnel);
++ if (IS_ERR(tunnel))
++ return PTR_ERR(tunnel);
++
++ lock_sock(sk);
++
++ /* Check for already bound sockets */
++ error = -EBUSY;
++ if (sk->sk_state & PPPOX_CONNECTED)
++ goto end;
++
++ /* We don't supporting rebinding anyway */
++ error = -EALREADY;
++ if (sk->sk_user_data)
++ goto end; /* socket is already attached */
++
+ if (tunnel->peer_tunnel_id == 0)
+ tunnel->peer_tunnel_id = info.peer_tunnel_id;
+
+@@ -841,8 +851,7 @@ end:
+ }
+ if (drop_refcnt)
+ l2tp_session_dec_refcount(session);
+- if (drop_tunnel)
+- l2tp_tunnel_dec_refcount(tunnel);
++ l2tp_tunnel_dec_refcount(tunnel);
+ release_sock(sk);
+
+ return error;
+diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
+index 17927966abb33..8b14a24f10404 100644
+--- a/net/l3mdev/l3mdev.c
++++ b/net/l3mdev/l3mdev.c
+@@ -147,7 +147,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ while (dev && !netif_is_l3_master(dev))
+- dev = netdev_master_upper_dev_get(dev);
++ dev = netdev_master_upper_dev_get_rcu(dev);
+
+ return dev ? dev->ifindex : 0;
+ }
+diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
+index 3086f4a6ae683..8b9a10d10036f 100644
+--- a/net/llc/af_llc.c
++++ b/net/llc/af_llc.c
+@@ -275,6 +275,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
+ {
+ struct sock *sk = sock->sk;
+ struct llc_sock *llc = llc_sk(sk);
++ struct net_device *dev = NULL;
+ struct llc_sap *sap;
+ int rc = -EINVAL;
+
+@@ -286,14 +287,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
+ goto out;
+ rc = -ENODEV;
+ if (sk->sk_bound_dev_if) {
+- llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+- if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
+- dev_put(llc->dev);
+- llc->dev = NULL;
++ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
++ if (dev && addr->sllc_arphrd != dev->type) {
++ dev_put(dev);
++ dev = NULL;
+ }
+ } else
+- llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+- if (!llc->dev)
++ dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
++ if (!dev)
+ goto out;
+ rc = -EUSERS;
+ llc->laddr.lsap = llc_ui_autoport();
+@@ -303,6 +304,11 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
+ sap = llc_sap_open(llc->laddr.lsap, NULL);
+ if (!sap)
+ goto out;
++
++ /* Note: We do not expect errors from this point. */
++ llc->dev = dev;
++ dev = NULL;
++
+ memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN);
+ memcpy(&llc->addr, addr, sizeof(llc->addr));
+ /* assign new connection to its SAP */
+@@ -310,6 +316,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
+ sock_reset_flag(sk, SOCK_ZAPPED);
+ rc = 0;
+ out:
++ dev_put(dev);
+ return rc;
+ }
+
+@@ -332,6 +339,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
+ struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
+ struct sock *sk = sock->sk;
+ struct llc_sock *llc = llc_sk(sk);
++ struct net_device *dev = NULL;
+ struct llc_sap *sap;
+ int rc = -EINVAL;
+
+@@ -347,25 +355,27 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
+ rc = -ENODEV;
+ rcu_read_lock();
+ if (sk->sk_bound_dev_if) {
+- llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
+- if (llc->dev) {
++ dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
++ if (dev) {
+ if (is_zero_ether_addr(addr->sllc_mac))
+- memcpy(addr->sllc_mac, llc->dev->dev_addr,
++ memcpy(addr->sllc_mac, dev->dev_addr,
+ IFHWADDRLEN);
+- if (addr->sllc_arphrd != llc->dev->type ||
++ if (addr->sllc_arphrd != dev->type ||
+ !ether_addr_equal(addr->sllc_mac,
+- llc->dev->dev_addr)) {
++ dev->dev_addr)) {
+ rc = -EINVAL;
+- llc->dev = NULL;
++ dev = NULL;
+ }
+ }
+- } else
+- llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
++ } else {
++ dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
+ addr->sllc_mac);
+- dev_hold(llc->dev);
++ }
++ dev_hold(dev);
+ rcu_read_unlock();
+- if (!llc->dev)
++ if (!dev)
+ goto out;
++
+ if (!addr->sllc_sap) {
+ rc = -EUSERS;
+ addr->sllc_sap = llc_ui_autoport();
+@@ -397,6 +407,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
+ goto out_put;
+ }
+ }
++
++ /* Note: We do not expect errors from this point. */
++ llc->dev = dev;
++ dev = NULL;
++
+ llc->laddr.lsap = addr->sllc_sap;
+ memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN);
+ memcpy(&llc->addr, addr, sizeof(llc->addr));
+@@ -407,6 +422,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
+ out_put:
+ llc_sap_put(sap);
+ out:
++ dev_put(dev);
+ release_sock(sk);
+ return rc;
+ }
+@@ -565,7 +581,8 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ while (1) {
+- if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
++ if (sk_wait_event(sk, &timeout,
++ READ_ONCE(sk->sk_state) == TCP_CLOSE, &wait))
+ break;
+ rc = -ERESTARTSYS;
+ if (signal_pending(current))
+@@ -585,7 +602,8 @@ static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ while (1) {
+- if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
++ if (sk_wait_event(sk, &timeout,
++ READ_ONCE(sk->sk_state) != TCP_SYN_SENT, &wait))
+ break;
+ if (signal_pending(current) || !timeout)
+ break;
+@@ -604,7 +622,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
+ while (1) {
+ rc = 0;
+ if (sk_wait_event(sk, &timeout,
+- (sk->sk_shutdown & RCV_SHUTDOWN) ||
++ (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN) ||
+ (!llc_data_accept_state(llc->state) &&
+ !llc->remote_busy_flag &&
+ !llc->p_flag), &wait))
+diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
+index c309b72a58779..7cac441862e21 100644
+--- a/net/llc/llc_input.c
++++ b/net/llc/llc_input.c
+@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
+ void (*sta_handler)(struct sk_buff *skb);
+ void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
+
+- if (!net_eq(dev_net(dev), &init_net))
+- goto drop;
+-
+ /*
+ * When the interface is in promisc. mode, drop all the crap that it
+ * receives, do not try to analyse it.
+diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
+index cce28e3b22323..0d2bab9d351c6 100644
+--- a/net/mac80211/agg-rx.c
++++ b/net/mac80211/agg-rx.c
+@@ -9,7 +9,7 @@
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2007-2010, Intel Corporation
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
+- * Copyright (C) 2018-2020 Intel Corporation
++ * Copyright (C) 2018-2021 Intel Corporation
+ */
+
+ /**
+@@ -191,7 +191,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return;
+- he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type);
++ he_cap = ieee80211_get_he_iftype_cap(sband,
++ ieee80211_vif_type_p2p(&sdata->vif));
+ if (!he_cap)
+ return;
+
+@@ -309,7 +310,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
+ }
+
+ if (sta->sta.he_cap.has_he)
+- max_buf_size = IEEE80211_MAX_AMPDU_BUF;
++ max_buf_size = IEEE80211_MAX_AMPDU_BUF_HE;
+ else
+ max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+
+@@ -477,7 +478,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
+ size_t len)
+ {
+ u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
+- struct ieee802_11_elems elems = { };
++ struct ieee802_11_elems *elems = NULL;
+ u8 dialog_token;
+ int ies_len;
+
+@@ -495,16 +496,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
+ ies_len = len - offsetof(struct ieee80211_mgmt,
+ u.action.u.addba_req.variable);
+ if (ies_len) {
+- ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
+- ies_len, true, &elems, mgmt->bssid, NULL);
+- if (elems.parse_error)
+- return;
++ elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
++ ies_len, true, mgmt->bssid, NULL);
++ if (!elems || elems->parse_error)
++ goto free;
+ }
+
+ __ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
+ start_seq_num, ba_policy, tid,
+ buf_size, true, false,
+- elems.addba_ext_ie);
++ elems ? elems->addba_ext_ie : NULL);
++free:
++ kfree(elems);
+ }
+
+ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
+diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
+index 430a585875388..a4d3fa14f76b7 100644
+--- a/net/mac80211/agg-tx.c
++++ b/net/mac80211/agg-tx.c
+@@ -9,7 +9,7 @@
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2007-2010, Intel Corporation
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
+- * Copyright (C) 2018 - 2020 Intel Corporation
++ * Copyright (C) 2018 - 2022 Intel Corporation
+ */
+
+ #include <linux/ieee80211.h>
+@@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
+ mgmt->u.action.u.addba_req.start_seq_num =
+ cpu_to_le16(start_seq_num << 4);
+
+- ieee80211_tx_skb(sdata, skb);
++ ieee80211_tx_skb_tid(sdata, skb, tid);
+ }
+
+ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn)
+@@ -213,6 +213,8 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
+ struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct txq_info *txqi;
+
++ lockdep_assert_held(&sta->ampdu_mlme.mtx);
++
+ if (!txq)
+ return;
+
+@@ -290,7 +292,6 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid)
+ ieee80211_assign_tid_tx(sta, tid, NULL);
+
+ ieee80211_agg_splice_finish(sta->sdata, tid);
+- ieee80211_agg_start_txq(sta, tid, false);
+
+ kfree_rcu(tid_tx, rcu_head);
+ }
+@@ -480,8 +481,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
+
+ /* send AddBA request */
+ ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
+- tid_tx->dialog_token,
+- sta->tid_seq[tid] >> 4,
++ tid_tx->dialog_token, tid_tx->ssn,
+ buf_size, tid_tx->timeout);
+
+ WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state));
+@@ -491,7 +491,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
+ {
+ struct tid_ampdu_tx *tid_tx;
+ struct ieee80211_local *local = sta->local;
+- struct ieee80211_sub_if_data *sdata = sta->sdata;
++ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_ampdu_params params = {
+ .sta = &sta->sta,
+ .action = IEEE80211_AMPDU_TX_START,
+@@ -521,8 +521,10 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
+ */
+ synchronize_net();
+
++ sdata = sta->sdata;
+ params.ssn = sta->tid_seq[tid] >> 4;
+ ret = drv_ampdu_action(local, sdata, &params);
++ tid_tx->ssn = params.ssn;
+ if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) {
+ return;
+ } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
+@@ -533,6 +535,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
+ */
+ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
+ } else if (ret) {
++ if (!sdata)
++ return;
++
+ ht_dbg(sdata,
+ "BA request denied - HW unavailable for %pM tid %d\n",
+ sta->sta.addr, tid);
+@@ -625,6 +630,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
+ return -EINVAL;
+ }
+
++ if (test_sta_flag(sta, WLAN_STA_MFP) &&
++ !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
++ ht_dbg(sdata,
++ "MFP STA not authorized - deny BA session request %pM tid %d\n",
++ sta->sta.addr, tid);
++ return -EINVAL;
++ }
++
+ /*
+ * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a
+ * member of an IBSS, and has no other existing Block Ack agreement
+@@ -889,6 +902,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+ {
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ bool send_delba = false;
++ bool start_txq = false;
+
+ ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n",
+ sta->sta.addr, tid);
+@@ -906,10 +920,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+ send_delba = true;
+
+ ieee80211_remove_tid_tx(sta, tid);
++ start_txq = true;
+
+ unlock_sta:
+ spin_unlock_bh(&sta->lock);
+
++ if (start_txq)
++ ieee80211_agg_start_txq(sta, tid, false);
++
+ if (send_delba)
+ ieee80211_send_delba(sdata, sta->sta.addr, tid,
+ WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
+diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
+index 26d2f8ba70297..758ef63669e7b 100644
+--- a/net/mac80211/airtime.c
++++ b/net/mac80211/airtime.c
+@@ -457,6 +457,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
+ (status->encoding == RX_ENC_HE && streams > 8)))
+ return 0;
+
++ if (idx >= MCS_GROUP_RATES)
++ return 0;
++
+ duration = airtime_mcs_groups[group].duration[idx];
+ duration <<= airtime_mcs_groups[group].shift;
+ *overhead = 36 + (streams << 2);
+diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
+index d69b31c20fe28..4fa216a108ae8 100644
+--- a/net/mac80211/cfg.c
++++ b/net/mac80211/cfg.c
+@@ -80,7 +80,8 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
+ }
+
+ /* also validate MU-MIMO change */
+- monitor_sdata = rtnl_dereference(local->monitor_sdata);
++ monitor_sdata = wiphy_dereference(local->hw.wiphy,
++ local->monitor_sdata);
+
+ if (!monitor_sdata &&
+ (params->vht_mumimo_groups || params->vht_mumimo_follow_addr))
+@@ -810,7 +811,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
+
+ mutex_lock(&local->mtx);
+ if (local->use_chanctx) {
+- sdata = rtnl_dereference(local->monitor_sdata);
++ sdata = wiphy_dereference(local->hw.wiphy,
++ local->monitor_sdata);
+ if (sdata) {
+ ieee80211_vif_release_channel(sdata);
+ ret = ieee80211_vif_use_channel(sdata, chandef,
+@@ -1224,7 +1226,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
+ return 0;
+
+ error:
++ mutex_lock(&local->mtx);
+ ieee80211_vif_release_channel(sdata);
++ mutex_unlock(&local->mtx);
++
+ return err;
+ }
+
+@@ -2105,14 +2110,12 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
+ const struct mesh_setup *setup)
+ {
+ u8 *new_ie;
+- const u8 *old_ie;
+ struct ieee80211_sub_if_data *sdata = container_of(ifmsh,
+ struct ieee80211_sub_if_data, u.mesh);
+ int i;
+
+ /* allocate information elements */
+ new_ie = NULL;
+- old_ie = ifmsh->ie;
+
+ if (setup->ie_len) {
+ new_ie = kmemdup(setup->ie, setup->ie_len,
+@@ -2122,7 +2125,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
+ }
+ ifmsh->ie_len = setup->ie_len;
+ ifmsh->ie = new_ie;
+- kfree(old_ie);
+
+ /* now copy the rest of the setup parameters */
+ ifmsh->mesh_id_len = setup->mesh_id_len;
+@@ -2669,7 +2671,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
+ sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+- sdata = rtnl_dereference(local->monitor_sdata);
++ sdata = wiphy_dereference(local->hw.wiphy,
++ local->monitor_sdata);
+ if (!sdata)
+ return -EOPNOTSUPP;
+ }
+@@ -2729,7 +2732,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
+ mutex_unlock(&local->iflist_mtx);
+
+ if (has_monitor) {
+- sdata = rtnl_dereference(local->monitor_sdata);
++ sdata = wiphy_dereference(local->hw.wiphy,
++ local->monitor_sdata);
+ if (sdata) {
+ sdata->user_power_level = local->user_power_level;
+ if (txp_type != sdata->vif.bss_conf.txpower_type)
+@@ -3380,9 +3384,6 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
+ case NL80211_IFTYPE_MESH_POINT: {
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+
+- if (params->chandef.width != sdata->vif.bss_conf.chandef.width)
+- return -EINVAL;
+-
+ /* changes into another band are not supported */
+ if (sdata->vif.bss_conf.chandef.chan->band !=
+ params->chandef.chan->band)
+diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
+index 76fc36a68750e..f32d8d07d6a30 100644
+--- a/net/mac80211/chan.c
++++ b/net/mac80211/chan.c
+@@ -563,7 +563,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
+ ctx->conf.rx_chains_dynamic = 1;
+ ctx->mode = mode;
+ ctx->conf.radar_enabled = false;
+- ieee80211_recalc_chanctx_min_def(local, ctx);
++ _ieee80211_recalc_chanctx_min_def(local, ctx);
+
+ return ctx;
+ }
+@@ -1746,12 +1746,9 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
+
+ if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) {
+ if (old_ctx)
+- err = ieee80211_vif_use_reserved_reassign(sdata);
+- else
+- err = ieee80211_vif_use_reserved_assign(sdata);
++ return ieee80211_vif_use_reserved_reassign(sdata);
+
+- if (err)
+- return err;
++ return ieee80211_vif_use_reserved_assign(sdata);
+ }
+
+ /*
+diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
+index 48322e45e7ddb..120bd9cdf7dfa 100644
+--- a/net/mac80211/driver-ops.c
++++ b/net/mac80211/driver-ops.c
+@@ -331,6 +331,9 @@ int drv_ampdu_action(struct ieee80211_local *local,
+
+ might_sleep();
+
++ if (!sdata)
++ return -EIO;
++
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
+index cd3731cbf6c68..c336267f4599c 100644
+--- a/net/mac80211/driver-ops.h
++++ b/net/mac80211/driver-ops.h
+@@ -1219,8 +1219,11 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
+ {
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
+
+- if (local->in_reconfig)
++ /* In reconfig don't transmit now, but mark for waking later */
++ if (local->in_reconfig) {
++ set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags);
+ return;
++ }
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
+index 5d6ca4c3e6981..48e0260f3424f 100644
+--- a/net/mac80211/ibss.c
++++ b/net/mac80211/ibss.c
+@@ -9,7 +9,7 @@
+ * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
+- * Copyright(c) 2018-2020 Intel Corporation
++ * Copyright(c) 2018-2021 Intel Corporation
+ */
+
+ #include <linux/delay.h>
+@@ -534,6 +534,10 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
+
+ sdata_assert_lock(sdata);
+
++ /* When not connected/joined, sending CSA doesn't make sense. */
++ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED)
++ return -ENOLINK;
++
+ /* update cfg80211 bss information with the new channel */
+ if (!is_zero_ether_addr(ifibss->bssid)) {
+ cbss = cfg80211_get_bss(sdata->local->hw.wiphy,
+@@ -1589,7 +1593,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_rx_status *rx_status)
+ {
+ size_t baselen;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+
+ BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) !=
+ offsetof(typeof(mgmt->u.beacon), variable));
+@@ -1602,10 +1606,14 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
+ if (baselen > len)
+ return;
+
+- ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
+- false, &elems, mgmt->bssid, NULL);
++ elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable,
++ len - baselen, false,
++ mgmt->bssid, NULL);
+
+- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
++ if (elems) {
++ ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, elems);
++ kfree(elems);
++ }
+ }
+
+ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+@@ -1614,7 +1622,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_rx_status *rx_status;
+ struct ieee80211_mgmt *mgmt;
+ u16 fc;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ int ies_len;
+
+ rx_status = IEEE80211_SKB_RXCB(skb);
+@@ -1651,15 +1659,16 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+ if (ies_len < 0)
+ break;
+
+- ieee802_11_parse_elems(
++ elems = ieee802_11_parse_elems(
+ mgmt->u.action.u.chan_switch.variable,
+- ies_len, true, &elems, mgmt->bssid, NULL);
+-
+- if (elems.parse_error)
+- break;
+-
+- ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len,
+- rx_status, &elems);
++ ies_len, true, mgmt->bssid, NULL);
++
++ if (elems && !elems->parse_error)
++ ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt,
++ skb->len,
++ rx_status,
++ elems);
++ kfree(elems);
+ break;
+ }
+ }
+diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
+index 159af6c3ffb05..21549a440b38c 100644
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -376,7 +376,7 @@ struct ieee80211_mgd_auth_data {
+
+ u8 key[WLAN_KEY_LEN_WEP104];
+ u8 key_len, key_idx;
+- bool done;
++ bool done, waiting;
+ bool peer_confirmed;
+ bool timeout_started;
+
+@@ -631,10 +631,9 @@ struct ieee80211_if_ocb {
+ */
+ struct ieee802_11_elems;
+ struct ieee80211_mesh_sync_ops {
+- void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata,
+- u16 stype,
+- struct ieee80211_mgmt *mgmt,
+- struct ieee802_11_elems *elems,
++ void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype,
++ struct ieee80211_mgmt *mgmt, unsigned int len,
++ const struct ieee80211_meshconf_ie *mesh_cfg,
+ struct ieee80211_rx_status *rx_status);
+
+ /* should be called with beacon_data under RCU read lock */
+@@ -648,6 +647,26 @@ struct mesh_csa_settings {
+ struct cfg80211_csa_settings settings;
+ };
+
++/**
++ * struct mesh_table
++ *
++ * @known_gates: list of known mesh gates and their mpaths by the station. The
++ * gate's mpath may or may not be resolved and active.
++ * @gates_lock: protects updates to known_gates
++ * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
++ * @walk_head: linked list containing all mesh_path objects
++ * @walk_lock: lock protecting walk_head
++ * @entries: number of entries in the table
++ */
++struct mesh_table {
++ struct hlist_head known_gates;
++ spinlock_t gates_lock;
++ struct rhashtable rhead;
++ struct hlist_head walk_head;
++ spinlock_t walk_lock;
++ atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */
++};
++
+ struct ieee80211_if_mesh {
+ struct timer_list housekeeping_timer;
+ struct timer_list mesh_path_timer;
+@@ -722,8 +741,8 @@ struct ieee80211_if_mesh {
+ /* offset from skb->data while building IE */
+ int meshconf_offset;
+
+- struct mesh_table *mesh_paths;
+- struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */
++ struct mesh_table mesh_paths;
++ struct mesh_table mpp_paths; /* Store paths for MPP&MAP */
+ int mesh_paths_generation;
+ int mpp_paths_generation;
+ };
+@@ -1109,6 +1128,9 @@ struct tpt_led_trigger {
+ * a scan complete for an aborted scan.
+ * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
+ * cancelled.
++ * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR
++ * and could send a probe request after receiving a beacon.
++ * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request
+ */
+ enum {
+ SCAN_SW_SCANNING,
+@@ -1117,6 +1139,8 @@ enum {
+ SCAN_COMPLETED,
+ SCAN_ABORTED,
+ SCAN_HW_CANCELLED,
++ SCAN_BEACON_WAIT,
++ SCAN_BEACON_DONE,
+ };
+
+ /**
+@@ -1508,6 +1532,7 @@ struct ieee80211_csa_ie {
+ struct ieee802_11_elems {
+ const u8 *ie_start;
+ size_t total_len;
++ u32 crc;
+
+ /* pointers to IEs */
+ const struct ieee80211_tdls_lnkie *lnk_id;
+@@ -1517,7 +1542,6 @@ struct ieee802_11_elems {
+ const u8 *supp_rates;
+ const u8 *ds_params;
+ const struct ieee80211_tim_ie *tim;
+- const u8 *challenge;
+ const u8 *rsn;
+ const u8 *rsnx;
+ const u8 *erp_info;
+@@ -1571,7 +1595,6 @@ struct ieee802_11_elems {
+ u8 ssid_len;
+ u8 supp_rates_len;
+ u8 tim_len;
+- u8 challenge_len;
+ u8 rsn_len;
+ u8 rsnx_len;
+ u8 ext_supp_rates_len;
+@@ -1590,6 +1613,14 @@ struct ieee802_11_elems {
+
+ /* whether a parse error occurred while retrieving these elements */
+ bool parse_error;
++
++ /*
++ * scratch buffer that can be used for various element parsing related
++ * tasks, e.g., element de-fragmentation etc.
++ */
++ size_t scratch_len;
++ u8 *scratch_pos;
++ u8 scratch[];
+ };
+
+ static inline struct ieee80211_local *hw_to_local(
+@@ -2194,18 +2225,18 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
+ ieee80211_tx_skb_tid(sdata, skb, 7);
+ }
+
+-u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+- struct ieee802_11_elems *elems,
+- u64 filter, u32 crc, u8 *transmitter_bssid,
+- u8 *bss_bssid);
+-static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
+- bool action,
+- struct ieee802_11_elems *elems,
+- u8 *transmitter_bssid,
+- u8 *bss_bssid)
++struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len,
++ bool action,
++ u64 filter, u32 crc,
++ const u8 *transmitter_bssid,
++ const u8 *bss_bssid);
++static inline struct ieee802_11_elems *
++ieee802_11_parse_elems(const u8 *start, size_t len, bool action,
++ const u8 *transmitter_bssid,
++ const u8 *bss_bssid)
+ {
+- ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0,
+- transmitter_bssid, bss_bssid);
++ return ieee802_11_parse_elems_crc(start, len, action, 0, 0,
++ transmitter_bssid, bss_bssid);
+ }
+
+
+@@ -2359,7 +2390,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
+ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
+ const struct cfg80211_chan_def *chandef);
+ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
+-u8 *ieee80211_ie_build_he_cap(u8 *pos,
++u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos,
+ const struct ieee80211_sta_he_cap *he_cap,
+ u8 *end);
+ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
+diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
+index 62c95597704b4..041859b5b71d0 100644
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -588,7 +588,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
+ */
+ if (local->suspended) {
+ WARN_ON(local->wowlan);
+- WARN_ON(rtnl_dereference(local->monitor_sdata));
++ WARN_ON(rcu_access_pointer(local->monitor_sdata));
+ return;
+ }
+
+@@ -932,6 +932,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
+ return 0;
+
+ ASSERT_RTNL();
++ lockdep_assert_wiphy(local->hw.wiphy);
+
+ if (local->monitor_sdata)
+ return 0;
+@@ -999,6 +1000,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
+ return;
+
+ ASSERT_RTNL();
++ lockdep_assert_wiphy(local->hw.wiphy);
+
+ mutex_lock(&local->iflist_mtx);
+
+diff --git a/net/mac80211/led.h b/net/mac80211/led.h
+index fb3aaa3c56069..b71a1428d883c 100644
+--- a/net/mac80211/led.h
++++ b/net/mac80211/led.h
+@@ -72,19 +72,19 @@ static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
+ #endif
+
+ static inline void
+-ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes)
++ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, int bytes)
+ {
+ #ifdef CONFIG_MAC80211_LEDS
+- if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
++ if (atomic_read(&local->tpt_led_active))
+ local->tpt_led_trigger->tx_bytes += bytes;
+ #endif
+ }
+
+ static inline void
+-ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes)
++ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, int bytes)
+ {
+ #ifdef CONFIG_MAC80211_LEDS
+- if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
++ if (atomic_read(&local->tpt_led_active))
+ local->tpt_led_trigger->rx_bytes += bytes;
+ #endif
+ }
+diff --git a/net/mac80211/main.c b/net/mac80211/main.c
+index 45fb517591ee9..9617ff8e27147 100644
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -1131,17 +1131,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
+ local->scan_ies_len +=
+ 2 + sizeof(struct ieee80211_vht_cap);
+
+- /* HE cap element is variable in size - set len to allow max size */
+ /*
+- * TODO: 1 is added at the end of the calculation to accommodate for
+- * the temporary placing of the HE capabilities IE under EXT.
+- * Remove it once it is placed in the final place.
+- */
+- if (supp_he)
++ * HE cap element is variable in size - set len to allow max size */
++ if (supp_he) {
+ local->scan_ies_len +=
+- 2 + sizeof(struct ieee80211_he_cap_elem) +
++ 3 + sizeof(struct ieee80211_he_cap_elem) +
+ sizeof(struct ieee80211_he_mcs_nss_supp) +
+- IEEE80211_HE_PPE_THRES_MAX_LEN + 1;
++ IEEE80211_HE_PPE_THRES_MAX_LEN;
++ }
+
+ if (!local->ops->hw_scan) {
+ /* For hw_scan, driver needs to set these up. */
+@@ -1360,8 +1357,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
+ ieee80211_led_exit(local);
+ destroy_workqueue(local->workqueue);
+ fail_workqueue:
+- if (local->wiphy_ciphers_allocated)
++ if (local->wiphy_ciphers_allocated) {
+ kfree(local->hw.wiphy->cipher_suites);
++ local->wiphy_ciphers_allocated = false;
++ }
+ kfree(local->int_scan_req);
+ return result;
+ }
+@@ -1429,8 +1428,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
+ mutex_destroy(&local->iflist_mtx);
+ mutex_destroy(&local->mtx);
+
+- if (local->wiphy_ciphers_allocated)
++ if (local->wiphy_ciphers_allocated) {
+ kfree(local->hw.wiphy->cipher_suites);
++ local->wiphy_ciphers_allocated = false;
++ }
+
+ idr_for_each(&local->ack_status_frames,
+ ieee80211_free_ack_frame, NULL);
+diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
+index 5dcfd53a4ab6c..6847fdf934392 100644
+--- a/net/mac80211/mesh.c
++++ b/net/mac80211/mesh.c
+@@ -580,7 +580,7 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
+ return -ENOMEM;
+
+ pos = skb_put(skb, ie_len);
+- ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len);
++ ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len);
+
+ return 0;
+ }
+@@ -1247,7 +1247,7 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *presp;
+ struct beacon_data *bcn;
+ struct ieee80211_mgmt *hdr;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ size_t baselen;
+ u8 *pos;
+
+@@ -1256,22 +1256,24 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
+ if (baselen > len)
+ return;
+
+- ieee802_11_parse_elems(pos, len - baselen, false, &elems, mgmt->bssid,
+- NULL);
+-
+- if (!elems.mesh_id)
++ elems = ieee802_11_parse_elems(pos, len - baselen, false, mgmt->bssid,
++ NULL);
++ if (!elems)
+ return;
+
++ if (!elems->mesh_id)
++ goto free;
++
+ /* 802.11-2012 10.1.4.3.2 */
+ if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
+ !is_broadcast_ether_addr(mgmt->da)) ||
+- elems.ssid_len != 0)
+- return;
++ elems->ssid_len != 0)
++ goto free;
+
+- if (elems.mesh_id_len != 0 &&
+- (elems.mesh_id_len != ifmsh->mesh_id_len ||
+- memcmp(elems.mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len)))
+- return;
++ if (elems->mesh_id_len != 0 &&
++ (elems->mesh_id_len != ifmsh->mesh_id_len ||
++ memcmp(elems->mesh_id, ifmsh->mesh_id, ifmsh->mesh_id_len)))
++ goto free;
+
+ rcu_read_lock();
+ bcn = rcu_dereference(ifmsh->beacon);
+@@ -1295,6 +1297,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
+ ieee80211_tx_skb(sdata, presp);
+ out:
+ rcu_read_unlock();
++free:
++ kfree(elems);
+ }
+
+ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
+@@ -1305,7 +1309,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
+ {
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ struct ieee80211_channel *channel;
+ size_t baselen;
+ int freq;
+@@ -1320,42 +1324,47 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
+ if (baselen > len)
+ return;
+
+- ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
+- false, &elems, mgmt->bssid, NULL);
++ elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable,
++ len - baselen,
++ false, mgmt->bssid, NULL);
++ if (!elems)
++ return;
+
+ /* ignore non-mesh or secure / unsecure mismatch */
+- if ((!elems.mesh_id || !elems.mesh_config) ||
+- (elems.rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) ||
+- (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
+- return;
++ if ((!elems->mesh_id || !elems->mesh_config) ||
++ (elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) ||
++ (!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
++ goto free;
+
+- if (elems.ds_params)
+- freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
++ if (elems->ds_params)
++ freq = ieee80211_channel_to_frequency(elems->ds_params[0], band);
+ else
+ freq = rx_status->freq;
+
+ channel = ieee80211_get_channel(local->hw.wiphy, freq);
+
+ if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+- return;
++ goto free;
+
+- if (mesh_matches_local(sdata, &elems)) {
++ if (mesh_matches_local(sdata, elems)) {
+ mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n",
+ sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal);
+ if (!sdata->u.mesh.user_mpm ||
+ sdata->u.mesh.mshcfg.rssi_threshold == 0 ||
+ sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
+- mesh_neighbour_update(sdata, mgmt->sa, &elems,
++ mesh_neighbour_update(sdata, mgmt->sa, elems,
+ rx_status);
+
+ if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT &&
+ !sdata->vif.csa_active)
+- ieee80211_mesh_process_chnswitch(sdata, &elems, true);
++ ieee80211_mesh_process_chnswitch(sdata, elems, true);
+ }
+
+ if (ifmsh->sync_ops)
+- ifmsh->sync_ops->rx_bcn_presp(sdata,
+- stype, mgmt, &elems, rx_status);
++ ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len,
++ elems->mesh_config, rx_status);
++free:
++ kfree(elems);
+ }
+
+ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata)
+@@ -1447,7 +1456,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len)
+ {
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ u16 pre_value;
+ bool fwd_csa = true;
+ size_t baselen;
+@@ -1460,33 +1469,37 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
+ pos = mgmt->u.action.u.chan_switch.variable;
+ baselen = offsetof(struct ieee80211_mgmt,
+ u.action.u.chan_switch.variable);
+- ieee802_11_parse_elems(pos, len - baselen, true, &elems,
+- mgmt->bssid, NULL);
+-
+- if (!mesh_matches_local(sdata, &elems))
++ elems = ieee802_11_parse_elems(pos, len - baselen, true,
++ mgmt->bssid, NULL);
++ if (!elems)
+ return;
+
+- ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl;
++ if (!mesh_matches_local(sdata, elems))
++ goto free;
++
++ ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl;
+ if (!--ifmsh->chsw_ttl)
+ fwd_csa = false;
+
+- pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value);
++ pre_value = le16_to_cpu(elems->mesh_chansw_params_ie->mesh_pre_value);
+ if (ifmsh->pre_value >= pre_value)
+- return;
++ goto free;
+
+ ifmsh->pre_value = pre_value;
+
+ if (!sdata->vif.csa_active &&
+- !ieee80211_mesh_process_chnswitch(sdata, &elems, false)) {
++ !ieee80211_mesh_process_chnswitch(sdata, elems, false)) {
+ mcsa_dbg(sdata, "Failed to process CSA action frame");
+- return;
++ goto free;
+ }
+
+ /* forward or re-broadcast the CSA frame */
+ if (fwd_csa) {
+- if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0)
++ if (mesh_fwd_csa_frame(sdata, mgmt, len, elems) < 0)
+ mcsa_dbg(sdata, "Failed to forward the CSA frame");
+ }
++free:
++ kfree(elems);
+ }
+
+ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
+diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
+index 77080b4f87b8a..b2b717a78114f 100644
+--- a/net/mac80211/mesh.h
++++ b/net/mac80211/mesh.h
+@@ -127,26 +127,6 @@ struct mesh_path {
+ u32 path_change_count;
+ };
+
+-/**
+- * struct mesh_table
+- *
+- * @known_gates: list of known mesh gates and their mpaths by the station. The
+- * gate's mpath may or may not be resolved and active.
+- * @gates_lock: protects updates to known_gates
+- * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
+- * @walk_head: linked list containing all mesh_path objects
+- * @walk_lock: lock protecting walk_head
+- * @entries: number of entries in the table
+- */
+-struct mesh_table {
+- struct hlist_head known_gates;
+- spinlock_t gates_lock;
+- struct rhashtable rhead;
+- struct hlist_head walk_head;
+- spinlock_t walk_lock;
+- atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */
+-};
+-
+ /* Recent multicast cache */
+ /* RMC_BUCKETS must be a power of 2, maximum 256 */
+ #define RMC_BUCKETS 256
+@@ -308,7 +288,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
+ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta);
+ void mesh_path_flush_pending(struct mesh_path *mpath);
+ void mesh_path_tx_pending(struct mesh_path *mpath);
+-int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata);
++void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata);
+ void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata);
+ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr);
+ void mesh_path_timer(struct timer_list *t);
+diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
+index a05b615deb517..44a6fdb6efbd4 100644
+--- a/net/mac80211/mesh_hwmp.c
++++ b/net/mac80211/mesh_hwmp.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+ * Copyright (c) 2008, 2009 open80211s Ltd.
+- * Copyright (C) 2019 Intel Corporation
++ * Copyright (C) 2019, 2021 Intel Corporation
+ * Author: Luis Carlos Cobo <luisca@cozybit.com>
+ */
+
+@@ -908,7 +908,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
+ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len)
+ {
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ size_t baselen;
+ u32 path_metric;
+ struct sta_info *sta;
+@@ -926,37 +926,41 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
+ rcu_read_unlock();
+
+ baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
+- ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
+- len - baselen, false, &elems, mgmt->bssid, NULL);
++ elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
++ len - baselen, false, mgmt->bssid, NULL);
++ if (!elems)
++ return;
+
+- if (elems.preq) {
+- if (elems.preq_len != 37)
++ if (elems->preq) {
++ if (elems->preq_len != 37)
+ /* Right now we support just 1 destination and no AE */
+- return;
+- path_metric = hwmp_route_info_get(sdata, mgmt, elems.preq,
++ goto free;
++ path_metric = hwmp_route_info_get(sdata, mgmt, elems->preq,
+ MPATH_PREQ);
+ if (path_metric)
+- hwmp_preq_frame_process(sdata, mgmt, elems.preq,
++ hwmp_preq_frame_process(sdata, mgmt, elems->preq,
+ path_metric);
+ }
+- if (elems.prep) {
+- if (elems.prep_len != 31)
++ if (elems->prep) {
++ if (elems->prep_len != 31)
+ /* Right now we support no AE */
+- return;
+- path_metric = hwmp_route_info_get(sdata, mgmt, elems.prep,
++ goto free;
++ path_metric = hwmp_route_info_get(sdata, mgmt, elems->prep,
+ MPATH_PREP);
+ if (path_metric)
+- hwmp_prep_frame_process(sdata, mgmt, elems.prep,
++ hwmp_prep_frame_process(sdata, mgmt, elems->prep,
+ path_metric);
+ }
+- if (elems.perr) {
+- if (elems.perr_len != 15)
++ if (elems->perr) {
++ if (elems->perr_len != 15)
+ /* Right now we support only one destination per PERR */
+- return;
+- hwmp_perr_frame_process(sdata, mgmt, elems.perr);
++ goto free;
++ hwmp_perr_frame_process(sdata, mgmt, elems->perr);
+ }
+- if (elems.rann)
+- hwmp_rann_frame_process(sdata, mgmt, elems.rann);
++ if (elems->rann)
++ hwmp_rann_frame_process(sdata, mgmt, elems->rann);
++free:
++ kfree(elems);
+ }
+
+ /**
+diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
+index 7cab1cf09bf1a..69d5e1ec6edef 100644
+--- a/net/mac80211/mesh_pathtbl.c
++++ b/net/mac80211/mesh_pathtbl.c
+@@ -47,32 +47,24 @@ static void mesh_path_rht_free(void *ptr, void *tblptr)
+ mesh_path_free_rcu(tbl, mpath);
+ }
+
+-static struct mesh_table *mesh_table_alloc(void)
++static void mesh_table_init(struct mesh_table *tbl)
+ {
+- struct mesh_table *newtbl;
++ INIT_HLIST_HEAD(&tbl->known_gates);
++ INIT_HLIST_HEAD(&tbl->walk_head);
++ atomic_set(&tbl->entries, 0);
++ spin_lock_init(&tbl->gates_lock);
++ spin_lock_init(&tbl->walk_lock);
+
+- newtbl = kmalloc(sizeof(struct mesh_table), GFP_ATOMIC);
+- if (!newtbl)
+- return NULL;
+-
+- INIT_HLIST_HEAD(&newtbl->known_gates);
+- INIT_HLIST_HEAD(&newtbl->walk_head);
+- atomic_set(&newtbl->entries, 0);
+- spin_lock_init(&newtbl->gates_lock);
+- spin_lock_init(&newtbl->walk_lock);
+- if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) {
+- kfree(newtbl);
+- return NULL;
+- }
+-
+- return newtbl;
++ /* rhashtable_init() may fail only in case of wrong
++ * mesh_rht_params
++ */
++ WARN_ON(rhashtable_init(&tbl->rhead, &mesh_rht_params));
+ }
+
+ static void mesh_table_free(struct mesh_table *tbl)
+ {
+ rhashtable_free_and_destroy(&tbl->rhead,
+ mesh_path_rht_free, tbl);
+- kfree(tbl);
+ }
+
+ /**
+@@ -238,13 +230,13 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst,
+ struct mesh_path *
+ mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
+ {
+- return mpath_lookup(sdata->u.mesh.mesh_paths, dst, sdata);
++ return mpath_lookup(&sdata->u.mesh.mesh_paths, dst, sdata);
+ }
+
+ struct mesh_path *
+ mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
+ {
+- return mpath_lookup(sdata->u.mesh.mpp_paths, dst, sdata);
++ return mpath_lookup(&sdata->u.mesh.mpp_paths, dst, sdata);
+ }
+
+ static struct mesh_path *
+@@ -281,7 +273,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
+ struct mesh_path *
+ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
+ {
+- return __mesh_path_lookup_by_idx(sdata->u.mesh.mesh_paths, idx);
++ return __mesh_path_lookup_by_idx(&sdata->u.mesh.mesh_paths, idx);
+ }
+
+ /**
+@@ -296,7 +288,7 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
+ struct mesh_path *
+ mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
+ {
+- return __mesh_path_lookup_by_idx(sdata->u.mesh.mpp_paths, idx);
++ return __mesh_path_lookup_by_idx(&sdata->u.mesh.mpp_paths, idx);
+ }
+
+ /**
+@@ -309,7 +301,7 @@ int mesh_path_add_gate(struct mesh_path *mpath)
+ int err;
+
+ rcu_read_lock();
+- tbl = mpath->sdata->u.mesh.mesh_paths;
++ tbl = &mpath->sdata->u.mesh.mesh_paths;
+
+ spin_lock_bh(&mpath->state_lock);
+ if (mpath->is_gate) {
+@@ -418,7 +410,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
+ if (!new_mpath)
+ return ERR_PTR(-ENOMEM);
+
+- tbl = sdata->u.mesh.mesh_paths;
++ tbl = &sdata->u.mesh.mesh_paths;
+ spin_lock_bh(&tbl->walk_lock);
+ mpath = rhashtable_lookup_get_insert_fast(&tbl->rhead,
+ &new_mpath->rhash,
+@@ -460,7 +452,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
+ return -ENOMEM;
+
+ memcpy(new_mpath->mpp, mpp, ETH_ALEN);
+- tbl = sdata->u.mesh.mpp_paths;
++ tbl = &sdata->u.mesh.mpp_paths;
+
+ spin_lock_bh(&tbl->walk_lock);
+ ret = rhashtable_lookup_insert_fast(&tbl->rhead,
+@@ -489,7 +481,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
+ void mesh_plink_broken(struct sta_info *sta)
+ {
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+- struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
++ struct mesh_table *tbl = &sdata->u.mesh.mesh_paths;
+ static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ struct mesh_path *mpath;
+
+@@ -548,7 +540,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath)
+ void mesh_path_flush_by_nexthop(struct sta_info *sta)
+ {
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+- struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
++ struct mesh_table *tbl = &sdata->u.mesh.mesh_paths;
+ struct mesh_path *mpath;
+ struct hlist_node *n;
+
+@@ -563,7 +555,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
+ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
+ const u8 *proxy)
+ {
+- struct mesh_table *tbl = sdata->u.mesh.mpp_paths;
++ struct mesh_table *tbl = &sdata->u.mesh.mpp_paths;
+ struct mesh_path *mpath;
+ struct hlist_node *n;
+
+@@ -597,8 +589,8 @@ static void table_flush_by_iface(struct mesh_table *tbl)
+ */
+ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
+ {
+- table_flush_by_iface(sdata->u.mesh.mesh_paths);
+- table_flush_by_iface(sdata->u.mesh.mpp_paths);
++ table_flush_by_iface(&sdata->u.mesh.mesh_paths);
++ table_flush_by_iface(&sdata->u.mesh.mpp_paths);
+ }
+
+ /**
+@@ -644,7 +636,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
+ /* flush relevant mpp entries first */
+ mpp_flush_by_proxy(sdata, addr);
+
+- err = table_path_del(sdata->u.mesh.mesh_paths, sdata, addr);
++ err = table_path_del(&sdata->u.mesh.mesh_paths, sdata, addr);
+ sdata->u.mesh.mesh_paths_generation++;
+ return err;
+ }
+@@ -682,7 +674,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
+ struct mesh_path *gate;
+ bool copy = false;
+
+- tbl = sdata->u.mesh.mesh_paths;
++ tbl = &sdata->u.mesh.mesh_paths;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) {
+@@ -718,7 +710,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
+ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+ {
+- kfree_skb(skb);
++ ieee80211_free_txskb(&sdata->local->hw, skb);
+ sdata->u.mesh.mshstats.dropped_frames_no_route++;
+ }
+
+@@ -762,29 +754,10 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop)
+ mesh_path_tx_pending(mpath);
+ }
+
+-int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata)
++void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata)
+ {
+- struct mesh_table *tbl_path, *tbl_mpp;
+- int ret;
+-
+- tbl_path = mesh_table_alloc();
+- if (!tbl_path)
+- return -ENOMEM;
+-
+- tbl_mpp = mesh_table_alloc();
+- if (!tbl_mpp) {
+- ret = -ENOMEM;
+- goto free_path;
+- }
+-
+- sdata->u.mesh.mesh_paths = tbl_path;
+- sdata->u.mesh.mpp_paths = tbl_mpp;
+-
+- return 0;
+-
+-free_path:
+- mesh_table_free(tbl_path);
+- return ret;
++ mesh_table_init(&sdata->u.mesh.mesh_paths);
++ mesh_table_init(&sdata->u.mesh.mpp_paths);
+ }
+
+ static
+@@ -806,12 +779,12 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
+
+ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
+ {
+- mesh_path_tbl_expire(sdata, sdata->u.mesh.mesh_paths);
+- mesh_path_tbl_expire(sdata, sdata->u.mesh.mpp_paths);
++ mesh_path_tbl_expire(sdata, &sdata->u.mesh.mesh_paths);
++ mesh_path_tbl_expire(sdata, &sdata->u.mesh.mpp_paths);
+ }
+
+ void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata)
+ {
+- mesh_table_free(sdata->u.mesh.mesh_paths);
+- mesh_table_free(sdata->u.mesh.mpp_paths);
++ mesh_table_free(&sdata->u.mesh.mesh_paths);
++ mesh_table_free(&sdata->u.mesh.mpp_paths);
+ }
+diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
+index a6915847d78ae..a829470dd59ed 100644
+--- a/net/mac80211/mesh_plink.c
++++ b/net/mac80211/mesh_plink.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+ * Copyright (c) 2008, 2009 open80211s Ltd.
+- * Copyright (C) 2019 Intel Corporation
++ * Copyright (C) 2019, 2021 Intel Corporation
+ * Author: Luis Carlos Cobo <luisca@cozybit.com>
+ */
+ #include <linux/gfp.h>
+@@ -1200,7 +1200,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len,
+ struct ieee80211_rx_status *rx_status)
+ {
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ size_t baselen;
+ u8 *baseaddr;
+
+@@ -1228,7 +1228,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
+ if (baselen > len)
+ return;
+ }
+- ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems,
+- mgmt->bssid, NULL);
+- mesh_process_plink_frame(sdata, mgmt, &elems, rx_status);
++ elems = ieee802_11_parse_elems(baseaddr, len - baselen, true,
++ mgmt->bssid, NULL);
++ mesh_process_plink_frame(sdata, mgmt, elems, rx_status);
++ kfree(elems);
+ }
+diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
+index fde93de2b80ac..9e342cc2504c0 100644
+--- a/net/mac80211/mesh_sync.c
++++ b/net/mac80211/mesh_sync.c
+@@ -3,6 +3,7 @@
+ * Copyright 2011-2012, Pavel Zubarev <pavel.zubarev@gmail.com>
+ * Copyright 2011-2012, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
+ * Copyright 2011-2012, cozybit Inc.
++ * Copyright (C) 2021 Intel Corporation
+ */
+
+ #include "ieee80211_i.h"
+@@ -35,12 +36,12 @@ struct sync_method {
+ /**
+ * mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT
+ *
+- * @ie: information elements of a management frame from the mesh peer
++ * @cfg: mesh config element from the mesh peer (or %NULL)
+ */
+-static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie)
++static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg)
+ {
+- return (ie->mesh_config->meshconf_cap &
+- IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0;
++ return cfg &&
++ (cfg->meshconf_cap & IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING);
+ }
+
+ void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata)
+@@ -76,11 +77,11 @@ void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata)
+ }
+ }
+
+-static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
+- u16 stype,
+- struct ieee80211_mgmt *mgmt,
+- struct ieee802_11_elems *elems,
+- struct ieee80211_rx_status *rx_status)
++static void
++mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype,
++ struct ieee80211_mgmt *mgmt, unsigned int len,
++ const struct ieee80211_meshconf_ie *mesh_cfg,
++ struct ieee80211_rx_status *rx_status)
+ {
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct ieee80211_local *local = sdata->local;
+@@ -101,10 +102,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
+ */
+ if (ieee80211_have_rx_timestamp(rx_status))
+ t_r = ieee80211_calculate_rx_timestamp(local, rx_status,
+- 24 + 12 +
+- elems->total_len +
+- FCS_LEN,
+- 24);
++ len + FCS_LEN, 24);
+ else
+ t_r = drv_get_tsf(local, sdata);
+
+@@ -119,7 +117,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
+ * dot11MeshNbrOffsetMaxNeighbor non-peer non-MBSS neighbors
+ */
+
+- if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) {
++ if (mesh_peer_tbtt_adjusting(mesh_cfg)) {
+ msync_dbg(sdata, "STA %pM : is adjusting TBTT\n",
+ sta->sta.addr);
+ goto no_sync;
+diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
+index c0ea3b1aa9e1c..cc6d38a2e6d5a 100644
+--- a/net/mac80211/mlme.c
++++ b/net/mac80211/mlme.c
+@@ -37,6 +37,7 @@
+ #define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2)
+ #define IEEE80211_AUTH_MAX_TRIES 3
+ #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
++#define IEEE80211_AUTH_WAIT_SAE_RETRY (HZ * 2)
+ #define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
+ #define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2)
+ #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
+@@ -629,7 +630,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb,
+ struct ieee80211_supported_band *sband)
+ {
+- u8 *pos;
++ u8 *pos, *pre_he_pos;
+ const struct ieee80211_sta_he_cap *he_cap = NULL;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ u8 he_cap_size;
+@@ -646,25 +647,26 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
+
+ he_cap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+- if (!he_cap || !reg_cap)
++ if (!he_cap || !chanctx_conf || !reg_cap)
+ return;
+
+- /*
+- * TODO: the 1 added is because this temporarily is under the EXTENSION
+- * IE. Get rid of it when it moves.
+- */
++ /* get a max size estimate */
+ he_cap_size =
+ 2 + 1 + sizeof(he_cap->he_cap_elem) +
+ ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
+ ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+ he_cap->he_cap_elem.phy_cap_info);
+ pos = skb_put(skb, he_cap_size);
+- ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
++ pre_he_pos = pos;
++ pos = ieee80211_ie_build_he_cap(sdata->u.mgd.flags,
++ pos, he_cap, pos + he_cap_size);
++ /* trim excess if any */
++ skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos));
+
+ ieee80211_ie_build_he_6ghz_cap(sdata, skb);
+ }
+
+-static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
++static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+ {
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+@@ -684,6 +686,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+ enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+ const struct ieee80211_sband_iftype_data *iftd;
+ struct ieee80211_prep_tx_info info = {};
++ int ret;
+
+ /* we know it's writable, cast away the const */
+ if (assoc_data->ie_len)
+@@ -697,7 +700,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+- return;
++ return -EINVAL;
+ }
+ chan = chanctx_conf->def.chan;
+ rcu_read_unlock();
+@@ -748,7 +751,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+ (iftd ? iftd->vendor_elems.len : 0),
+ GFP_KERNEL);
+ if (!skb)
+- return;
++ return -ENOMEM;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+@@ -1029,15 +1032,22 @@ skip_rates:
+ skb_put_data(skb, assoc_data->ie + offset, noffset - offset);
+ }
+
+- if (assoc_data->fils_kek_len &&
+- fils_encrypt_assoc_req(skb, assoc_data) < 0) {
+- dev_kfree_skb(skb);
+- return;
++ if (assoc_data->fils_kek_len) {
++ ret = fils_encrypt_assoc_req(skb, assoc_data);
++ if (ret < 0) {
++ dev_kfree_skb(skb);
++ return ret;
++ }
+ }
+
+ pos = skb_tail_pointer(skb);
+ kfree(ifmgd->assoc_req_ies);
+ ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
++ if (!ifmgd->assoc_req_ies) {
++ dev_kfree_skb(skb);
++ return -ENOMEM;
++ }
++
+ ifmgd->assoc_req_ies_len = pos - ie_start;
+
+ drv_mgd_prepare_tx(local, sdata, &info);
+@@ -1047,6 +1057,8 @@ skip_rates:
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
+ IEEE80211_TX_INTFL_MLME_CONN_TX;
+ ieee80211_tx_skb(sdata, skb);
++
++ return 0;
+ }
+
+ void ieee80211_send_pspoll(struct ieee80211_local *local,
+@@ -2446,11 +2458,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
+ u16 tx_time)
+ {
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+- u16 tid = ieee80211_get_tid(hdr);
+- int ac = ieee80211_ac_from_tid(tid);
+- struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
++ u16 tid;
++ int ac;
++ struct ieee80211_sta_tx_tspec *tx_tspec;
+ unsigned long now = jiffies;
+
++ if (!ieee80211_is_data_qos(hdr->frame_control))
++ return;
++
++ tid = ieee80211_get_tid(hdr);
++ ac = ieee80211_ac_from_tid(tid);
++ tx_tspec = &ifmgd->tx_tspec[ac];
++
+ if (likely(!tx_tspec->admitted_time))
+ return;
+
+@@ -2870,17 +2889,17 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
+ {
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data;
++ const struct element *challenge;
+ u8 *pos;
+- struct ieee802_11_elems elems;
+ u32 tx_flags = 0;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_AUTH,
+ };
+
+ pos = mgmt->u.auth.variable;
+- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
+- mgmt->bssid, auth_data->bss->bssid);
+- if (!elems.challenge)
++ challenge = cfg80211_find_elem(WLAN_EID_CHALLENGE, pos,
++ len - (pos - (u8 *)mgmt));
++ if (!challenge)
+ return;
+ auth_data->expected_transaction = 4;
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
+@@ -2888,7 +2907,8 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
+ tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+ IEEE80211_TX_INTFL_MLME_CONN_TX;
+ ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0,
+- elems.challenge - 2, elems.challenge_len + 2,
++ (void *)challenge,
++ challenge->datalen + sizeof(*challenge),
+ auth_data->bss->bssid, auth_data->bss->bssid,
+ auth_data->key, auth_data->key_len,
+ auth_data->key_idx, tx_flags);
+@@ -2977,8 +2997,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
+ (status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED ||
+ (auth_transaction == 1 &&
+ (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
+- status_code == WLAN_STATUS_SAE_PK))))
++ status_code == WLAN_STATUS_SAE_PK)))) {
++ /* waiting for userspace now */
++ ifmgd->auth_data->waiting = true;
++ ifmgd->auth_data->timeout =
++ jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY;
++ ifmgd->auth_data->timeout_started = true;
++ run_again(sdata, ifmgd->auth_data->timeout);
+ goto notify_driver;
++ }
+
+ sdata_info(sdata, "%pM denied authentication (status %d)\n",
+ mgmt->sa, status_code);
+@@ -3290,8 +3317,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+ aid = 0; /* TODO */
+ }
+ capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
+- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, elems,
+- mgmt->bssid, assoc_data->bss->bssid);
++ elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false,
++ mgmt->bssid, assoc_data->bss->bssid);
++
++ if (!elems)
++ return false;
+
+ if (elems->aid_resp)
+ aid = le16_to_cpu(elems->aid_resp->aid);
+@@ -3313,7 +3343,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+
+ if (!is_s1g && !elems->supp_rates) {
+ sdata_info(sdata, "no SuppRates element in AssocResp\n");
+- return false;
++ ret = false;
++ goto out;
+ }
+
+ sdata->vif.bss_conf.aid = aid;
+@@ -3335,7 +3366,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+ (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ (!elems->vht_cap_elem || !elems->vht_operation)))) {
+ const struct cfg80211_bss_ies *ies;
+- struct ieee802_11_elems bss_elems;
++ struct ieee802_11_elems *bss_elems;
+
+ rcu_read_lock();
+ ies = rcu_dereference(cbss->ies);
+@@ -3343,16 +3374,22 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+ bss_ies = kmemdup(ies, sizeof(*ies) + ies->len,
+ GFP_ATOMIC);
+ rcu_read_unlock();
+- if (!bss_ies)
+- return false;
++ if (!bss_ies) {
++ ret = false;
++ goto out;
++ }
++
++ bss_elems = ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
++ false, mgmt->bssid,
++ assoc_data->bss->bssid);
++ if (!bss_elems) {
++ ret = false;
++ goto out;
++ }
+
+- ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
+- false, &bss_elems,
+- mgmt->bssid,
+- assoc_data->bss->bssid);
+ if (assoc_data->wmm &&
+- !elems->wmm_param && bss_elems.wmm_param) {
+- elems->wmm_param = bss_elems.wmm_param;
++ !elems->wmm_param && bss_elems->wmm_param) {
++ elems->wmm_param = bss_elems->wmm_param;
+ sdata_info(sdata,
+ "AP bug: WMM param missing from AssocResp\n");
+ }
+@@ -3361,30 +3398,32 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+ * Also check if we requested HT/VHT, otherwise the AP doesn't
+ * have to include the IEs in the (re)association response.
+ */
+- if (!elems->ht_cap_elem && bss_elems.ht_cap_elem &&
++ if (!elems->ht_cap_elem && bss_elems->ht_cap_elem &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+- elems->ht_cap_elem = bss_elems.ht_cap_elem;
++ elems->ht_cap_elem = bss_elems->ht_cap_elem;
+ sdata_info(sdata,
+ "AP bug: HT capability missing from AssocResp\n");
+ }
+- if (!elems->ht_operation && bss_elems.ht_operation &&
++ if (!elems->ht_operation && bss_elems->ht_operation &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+- elems->ht_operation = bss_elems.ht_operation;
++ elems->ht_operation = bss_elems->ht_operation;
+ sdata_info(sdata,
+ "AP bug: HT operation missing from AssocResp\n");
+ }
+- if (!elems->vht_cap_elem && bss_elems.vht_cap_elem &&
++ if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+- elems->vht_cap_elem = bss_elems.vht_cap_elem;
++ elems->vht_cap_elem = bss_elems->vht_cap_elem;
+ sdata_info(sdata,
+ "AP bug: VHT capa missing from AssocResp\n");
+ }
+- if (!elems->vht_operation && bss_elems.vht_operation &&
++ if (!elems->vht_operation && bss_elems->vht_operation &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+- elems->vht_operation = bss_elems.vht_operation;
++ elems->vht_operation = bss_elems->vht_operation;
+ sdata_info(sdata,
+ "AP bug: VHT operation missing from AssocResp\n");
+ }
++
++ kfree(bss_elems);
+ }
+
+ /*
+@@ -3515,6 +3554,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+ cbss->transmitted_bss->bssid);
+ bss_conf->bssid_indicator = cbss->max_bssid_indicator;
+ bss_conf->bssid_index = cbss->bssid_index;
++ } else {
++ bss_conf->nontransmitted = false;
++ memset(bss_conf->transmitter_bssid, 0,
++ sizeof(bss_conf->transmitter_bssid));
++ bss_conf->bssid_indicator = 0;
++ bss_conf->bssid_index = 0;
+ }
+
+ /*
+@@ -3629,6 +3674,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+
+ ret = true;
+ out:
++ kfree(elems);
+ kfree(bss_ies);
+ return ret;
+ }
+@@ -3640,7 +3686,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+ u16 capab_info, status_code, aid;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ int ac, uapsd_queues = -1;
+ u8 *pos;
+ bool reassoc;
+@@ -3697,14 +3743,16 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+ fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0)
+ return;
+
+- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
+- mgmt->bssid, assoc_data->bss->bssid);
++ elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false,
++ mgmt->bssid, assoc_data->bss->bssid);
++ if (!elems)
++ goto notify_driver;
+
+ if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
+- elems.timeout_int &&
+- elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
++ elems->timeout_int &&
++ elems->timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
+ u32 tu, ms;
+- tu = le32_to_cpu(elems.timeout_int->value);
++ tu = le32_to_cpu(elems->timeout_int->value);
+ ms = tu * 1024 / 1000;
+ sdata_info(sdata,
+ "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n",
+@@ -3724,7 +3772,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+ event.u.mlme.reason = status_code;
+ drv_event_callback(sdata->local, sdata, &event);
+ } else {
+- if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, &elems)) {
++ if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, elems)) {
+ /* oops -- internal error -- send timeout for now */
+ ieee80211_destroy_assoc_data(sdata, false, false);
+ cfg80211_assoc_timeout(sdata->dev, cbss);
+@@ -3754,6 +3802,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+ ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
+ notify_driver:
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
++ kfree(elems);
+ }
+
+ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
+@@ -3958,7 +4007,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ struct ieee80211_mgmt *mgmt = (void *) hdr;
+ size_t baselen;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_channel *chan;
+@@ -4004,15 +4053,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+
+ if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
+ ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->bss)) {
+- ieee802_11_parse_elems(variable,
+- len - baselen, false, &elems,
+- bssid,
+- ifmgd->assoc_data->bss->bssid);
++ elems = ieee802_11_parse_elems(variable, len - baselen, false,
++ bssid,
++ ifmgd->assoc_data->bss->bssid);
++ if (!elems)
++ return;
+
+ ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
+
+- if (elems.dtim_period)
+- ifmgd->dtim_period = elems.dtim_period;
++ if (elems->dtim_period)
++ ifmgd->dtim_period = elems->dtim_period;
+ ifmgd->have_beacon = true;
+ ifmgd->assoc_data->need_beacon = false;
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
+@@ -4020,17 +4070,17 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ le64_to_cpu(mgmt->u.beacon.timestamp);
+ sdata->vif.bss_conf.sync_device_ts =
+ rx_status->device_timestamp;
+- sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
++ sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count;
+ }
+
+- if (elems.mbssid_config_ie)
++ if (elems->mbssid_config_ie)
+ bss_conf->profile_periodicity =
+- elems.mbssid_config_ie->profile_periodicity;
++ elems->mbssid_config_ie->profile_periodicity;
+ else
+ bss_conf->profile_periodicity = 0;
+
+- if (elems.ext_capab_len >= 11 &&
+- (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
++ if (elems->ext_capab_len >= 11 &&
++ (elems->ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+ bss_conf->ema_ap = true;
+ else
+ bss_conf->ema_ap = false;
+@@ -4039,6 +4089,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ ifmgd->assoc_data->timeout = jiffies;
+ ifmgd->assoc_data->timeout_started = true;
+ run_again(sdata, ifmgd->assoc_data->timeout);
++ kfree(elems);
+ return;
+ }
+
+@@ -4070,13 +4121,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ */
+ if (!ieee80211_is_s1g_beacon(hdr->frame_control))
+ ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
+- ncrc = ieee802_11_parse_elems_crc(variable,
+- len - baselen, false, &elems,
+- care_about_ies, ncrc,
+- mgmt->bssid, bssid);
++ elems = ieee802_11_parse_elems_crc(variable, len - baselen,
++ false, care_about_ies, ncrc,
++ mgmt->bssid, bssid);
++ if (!elems)
++ return;
++ ncrc = elems->crc;
+
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
+- ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) {
++ ieee80211_check_tim(elems->tim, elems->tim_len, bss_conf->aid)) {
+ if (local->hw.conf.dynamic_ps_timeout > 0) {
+ if (local->hw.conf.flags & IEEE80211_CONF_PS) {
+ local->hw.conf.flags &= ~IEEE80211_CONF_PS;
+@@ -4146,12 +4199,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ le64_to_cpu(mgmt->u.beacon.timestamp);
+ sdata->vif.bss_conf.sync_device_ts =
+ rx_status->device_timestamp;
+- sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
++ sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count;
+ }
+
+ if ((ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) ||
+ ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+- return;
++ goto free;
+ ifmgd->beacon_crc = ncrc;
+ ifmgd->beacon_crc_valid = true;
+
+@@ -4159,12 +4212,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+
+ ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
+ rx_status->device_timestamp,
+- &elems, true);
++ elems, true);
+
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) &&
+- ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
+- elems.wmm_param_len,
+- elems.mu_edca_param_set))
++ ieee80211_sta_wmm_params(local, sdata, elems->wmm_param,
++ elems->wmm_param_len,
++ elems->mu_edca_param_set))
+ changed |= BSS_CHANGED_QOS;
+
+ /*
+@@ -4173,7 +4226,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ */
+ if (!ifmgd->have_beacon) {
+ /* a few bogus AP send dtim_period = 0 or no TIM IE */
+- bss_conf->dtim_period = elems.dtim_period ?: 1;
++ bss_conf->dtim_period = elems->dtim_period ?: 1;
+
+ changed |= BSS_CHANGED_BEACON_INFO;
+ ifmgd->have_beacon = true;
+@@ -4185,9 +4238,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ ieee80211_recalc_ps_vif(sdata);
+ }
+
+- if (elems.erp_info) {
++ if (elems->erp_info) {
+ erp_valid = true;
+- erp_value = elems.erp_info[0];
++ erp_value = elems->erp_info[0];
+ } else {
+ erp_valid = false;
+ }
+@@ -4200,12 +4253,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get(sdata, bssid);
+
+- changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
++ changed |= ieee80211_recalc_twt_req(sdata, sta, elems);
+
+- if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem,
+- elems.vht_cap_elem, elems.ht_operation,
+- elems.vht_operation, elems.he_operation,
+- elems.s1g_oper, bssid, &changed)) {
++ if (ieee80211_config_bw(sdata, sta, elems->ht_cap_elem,
++ elems->vht_cap_elem, elems->ht_operation,
++ elems->vht_operation, elems->he_operation,
++ elems->s1g_oper, bssid, &changed)) {
+ mutex_unlock(&local->sta_mtx);
+ sdata_info(sdata,
+ "failed to follow AP %pM bandwidth change, disconnect\n",
+@@ -4217,21 +4270,23 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+ sizeof(deauth_buf), true,
+ WLAN_REASON_DEAUTH_LEAVING,
+ false);
+- return;
++ goto free;
+ }
+
+- if (sta && elems.opmode_notif)
+- ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif,
++ if (sta && elems->opmode_notif)
++ ieee80211_vht_handle_opmode(sdata, sta, *elems->opmode_notif,
+ rx_status->band);
+ mutex_unlock(&local->sta_mtx);
+
+ changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
+- elems.country_elem,
+- elems.country_elem_len,
+- elems.pwr_constr_elem,
+- elems.cisco_dtpc_elem);
++ elems->country_elem,
++ elems->country_elem_len,
++ elems->pwr_constr_elem,
++ elems->cisco_dtpc_elem);
+
+ ieee80211_bss_info_change_notify(sdata, changed);
++free:
++ kfree(elems);
+ }
+
+ void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
+@@ -4260,7 +4315,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_rx_status *rx_status;
+ struct ieee80211_mgmt *mgmt;
+ u16 fc;
+- struct ieee802_11_elems elems;
+ int ies_len;
+
+ rx_status = (struct ieee80211_rx_status *) skb->cb;
+@@ -4292,6 +4346,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+ break;
+ case IEEE80211_STYPE_ACTION:
+ if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
++ struct ieee802_11_elems *elems;
++
+ ies_len = skb->len -
+ offsetof(struct ieee80211_mgmt,
+ u.action.u.chan_switch.variable);
+@@ -4300,18 +4356,19 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+ break;
+
+ /* CSA IE cannot be overridden, no need for BSSID */
+- ieee802_11_parse_elems(
+- mgmt->u.action.u.chan_switch.variable,
+- ies_len, true, &elems, mgmt->bssid, NULL);
+-
+- if (elems.parse_error)
+- break;
+-
+- ieee80211_sta_process_chanswitch(sdata,
+- rx_status->mactime,
+- rx_status->device_timestamp,
+- &elems, false);
++ elems = ieee802_11_parse_elems(
++ mgmt->u.action.u.chan_switch.variable,
++ ies_len, true, mgmt->bssid, NULL);
++
++ if (elems && !elems->parse_error)
++ ieee80211_sta_process_chanswitch(sdata,
++ rx_status->mactime,
++ rx_status->device_timestamp,
++ elems, false);
++ kfree(elems);
+ } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) {
++ struct ieee802_11_elems *elems;
++
+ ies_len = skb->len -
+ offsetof(struct ieee80211_mgmt,
+ u.action.u.ext_chan_switch.variable);
+@@ -4323,21 +4380,22 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
+ * extended CSA IE can't be overridden, no need for
+ * BSSID
+ */
+- ieee802_11_parse_elems(
+- mgmt->u.action.u.ext_chan_switch.variable,
+- ies_len, true, &elems, mgmt->bssid, NULL);
+-
+- if (elems.parse_error)
+- break;
+-
+- /* for the handling code pretend this was also an IE */
+- elems.ext_chansw_ie =
+- &mgmt->u.action.u.ext_chan_switch.data;
++ elems = ieee802_11_parse_elems(
++ mgmt->u.action.u.ext_chan_switch.variable,
++ ies_len, true, mgmt->bssid, NULL);
++
++ if (elems && !elems->parse_error) {
++ /* for the handling code pretend it was an IE */
++ elems->ext_chansw_ie =
++ &mgmt->u.action.u.ext_chan_switch.data;
++
++ ieee80211_sta_process_chanswitch(sdata,
++ rx_status->mactime,
++ rx_status->device_timestamp,
++ elems, false);
++ }
+
+- ieee80211_sta_process_chanswitch(sdata,
+- rx_status->mactime,
+- rx_status->device_timestamp,
+- &elems, false);
++ kfree(elems);
+ }
+ break;
+ }
+@@ -4444,6 +4502,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
+ {
+ struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
+ struct ieee80211_local *local = sdata->local;
++ int ret;
+
+ sdata_assert_lock(sdata);
+
+@@ -4464,7 +4523,9 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
+ sdata_info(sdata, "associate with %pM (try %d/%d)\n",
+ assoc_data->bss->bssid, assoc_data->tries,
+ IEEE80211_ASSOC_MAX_TRIES);
+- ieee80211_send_assoc(sdata);
++ ret = ieee80211_send_assoc(sdata);
++ if (ret)
++ return ret;
+
+ if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
+ assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
+@@ -4537,10 +4598,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
+
+ if (ifmgd->auth_data && ifmgd->auth_data->timeout_started &&
+ time_after(jiffies, ifmgd->auth_data->timeout)) {
+- if (ifmgd->auth_data->done) {
++ if (ifmgd->auth_data->done || ifmgd->auth_data->waiting) {
+ /*
+- * ok ... we waited for assoc but userspace didn't,
+- * so let's just kill the auth data
++ * ok ... we waited for assoc or continuation but
++ * userspace didn't do it, so kill the auth data
+ */
+ ieee80211_destroy_auth_data(sdata, false);
+ } else if (ieee80211_auth(sdata)) {
+@@ -5209,7 +5270,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
+ */
+ if (new_sta) {
+ u32 rates = 0, basic_rates = 0;
+- bool have_higher_than_11mbit;
++ bool have_higher_than_11mbit = false;
+ int min_rate = INT_MAX, min_rate_index = -1;
+ const struct cfg80211_bss_ies *ies;
+ int shift = ieee80211_vif_get_shift(&sdata->vif);
+diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
+index 72b44d4c42d0e..90238170dec35 100644
+--- a/net/mac80211/rc80211_minstrel_ht.c
++++ b/net/mac80211/rc80211_minstrel_ht.c
+@@ -364,6 +364,9 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+
+ group = MINSTREL_CCK_GROUP;
+ for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) {
++ if (!(mi->supported[group] & BIT(idx)))
++ continue;
++
+ if (rate->idx != mp->cck_rates[idx])
+ continue;
+
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index c4071b015c188..175ead6b19cb4 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -364,7 +364,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
+ * the compiler to think we have walked past the end of the
+ * struct member.
+ */
+- pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional];
++ pos = (void *)&rthdr->it_optional[it_present + 1 - rthdr->it_optional];
+
+ /* the order of the following fields is important */
+
+@@ -1400,8 +1400,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
+ goto dont_reorder;
+
+ /* not part of a BA session */
+- if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
+- ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
++ if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
+ goto dont_reorder;
+
+ /* new, potentially un-ordered, ampdu frame - process it */
+@@ -1952,7 +1951,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
+ int keyid = rx->sta->ptk_idx;
+ sta_ptk = rcu_dereference(rx->sta->ptk[keyid]);
+
+- if (ieee80211_has_protected(fc)) {
++ if (ieee80211_has_protected(fc) &&
++ !(status->flag & RX_FLAG_IV_STRIPPED)) {
+ cs = rx->sta->cipher_scheme;
+ keyid = ieee80211_get_keyid(rx->skb, cs);
+
+@@ -1982,10 +1982,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
+
+ if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS ||
+ mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
+- NUM_DEFAULT_BEACON_KEYS) {
+- cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+- skb->data,
+- skb->len);
++ NUM_DEFAULT_BEACON_KEYS) {
++ if (rx->sdata->dev)
++ cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
++ skb->data,
++ skb->len);
+ return RX_DROP_MONITOR; /* unexpected BIP keyidx */
+ }
+
+@@ -2133,7 +2134,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
+ /* either the frame has been decrypted or will be dropped */
+ status->flag |= RX_FLAG_DECRYPTED;
+
+- if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE))
++ if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE &&
++ rx->sdata->dev))
+ cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+ skb->data, skb->len);
+
+@@ -2601,7 +2603,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
+ * address, so that the authenticator (e.g. hostapd) will see
+ * the frame, but bridge won't forward it anywhere else. Note
+ * that due to earlier filtering, the only other address can
+- * be the PAE group address.
++ * be the PAE group address, unless the hardware allowed them
++ * through in 802.3 offloaded mode.
+ */
+ if (unlikely(skb->protocol == sdata->control_port_protocol &&
+ !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+@@ -2916,13 +2919,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+ ether_addr_equal(sdata->vif.addr, hdr->addr3))
+ return RX_CONTINUE;
+
+- ac = ieee80211_select_queue_80211(sdata, skb, hdr);
++ ac = ieee802_1d_to_ac[skb->priority];
+ q = sdata->vif.hw_queue[ac];
+ if (ieee80211_queue_stopped(&local->hw, q)) {
+ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
+ return RX_DROP_MONITOR;
+ }
+- skb_set_queue_mapping(skb, q);
++ skb_set_queue_mapping(skb, ac);
+
+ if (!--mesh_hdr->ttl) {
+ if (!is_multicast_ether_addr(hdr->addr1))
+@@ -4517,12 +4520,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
+
+ /* deliver to local stack */
+ skb->protocol = eth_type_trans(skb, fast_rx->dev);
+- memset(skb->cb, 0, sizeof(skb->cb));
+- if (rx->list)
+- list_add_tail(&skb->list, rx->list);
+- else
+- netif_receive_skb(skb);
+-
++ ieee80211_deliver_skb_to_local_stack(skb, rx);
+ }
+
+ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
+@@ -4873,6 +4871,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+ struct ieee80211_rate *rate = NULL;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
++ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+ WARN_ON_ONCE(softirq_count() == 0);
+
+@@ -4931,7 +4930,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+ goto drop;
+ break;
+ case RX_ENC_VHT:
+- if (WARN_ONCE(status->rate_idx > 9 ||
++ if (WARN_ONCE(status->rate_idx > 11 ||
+ !status->nss ||
+ status->nss > 8,
+ "Rate marked as a VHT rate but data is invalid: MCS: %d, NSS: %d\n",
+@@ -4969,9 +4968,9 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+ if (!(status->flag & RX_FLAG_8023))
+ skb = ieee80211_rx_monitor(local, skb, rate);
+ if (skb) {
+- ieee80211_tpt_led_trig_rx(local,
+- ((struct ieee80211_hdr *)skb->data)->frame_control,
+- skb->len);
++ if ((status->flag & RX_FLAG_8023) ||
++ ieee80211_is_data_present(hdr->frame_control))
++ ieee80211_tpt_led_trig_rx(local, skb->len);
+
+ if (status->flag & RX_FLAG_8023)
+ __ieee80211_rx_handle_8023(hw, pubsta, skb, list);
+diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
+index 7e35ab5b61664..10b34bc4b67d4 100644
+--- a/net/mac80211/s1g.c
++++ b/net/mac80211/s1g.c
+@@ -104,12 +104,17 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata,
+
+ /* broadcast TWT not supported yet */
+ if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) {
+- le16p_replace_bits(&twt_agrt->req_type,
+- TWT_SETUP_CMD_REJECT,
+- IEEE80211_TWT_REQTYPE_SETUP_CMD);
++ twt_agrt->req_type &=
++ ~cpu_to_le16(IEEE80211_TWT_REQTYPE_SETUP_CMD);
++ twt_agrt->req_type |=
++ le16_encode_bits(TWT_SETUP_CMD_REJECT,
++ IEEE80211_TWT_REQTYPE_SETUP_CMD);
+ goto out;
+ }
+
++ /* TWT Information not supported yet */
++ twt->control |= IEEE80211_TWT_CONTROL_RX_DISABLED;
++
+ drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt);
+ out:
+ ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt);
+diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
+index 6b50cb5e0e3cc..e692a2487eb5d 100644
+--- a/net/mac80211/scan.c
++++ b/net/mac80211/scan.c
+@@ -9,7 +9,7 @@
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2013-2015 Intel Mobile Communications GmbH
+ * Copyright 2016-2017 Intel Deutschland GmbH
+- * Copyright (C) 2018-2020 Intel Corporation
++ * Copyright (C) 2018-2021 Intel Corporation
+ */
+
+ #include <linux/if_arp.h>
+@@ -155,7 +155,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
+ };
+ bool signal_valid;
+ struct ieee80211_sub_if_data *scan_sdata;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ size_t baselen;
+ u8 *elements;
+
+@@ -209,8 +209,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
+ if (baselen > len)
+ return NULL;
+
+- ieee802_11_parse_elems(elements, len - baselen, false, &elems,
+- mgmt->bssid, cbss->bssid);
++ elems = ieee802_11_parse_elems(elements, len - baselen, false,
++ mgmt->bssid, cbss->bssid);
++ if (!elems)
++ return NULL;
+
+ /* In case the signal is invalid update the status */
+ signal_valid = channel == cbss->channel;
+@@ -218,15 +220,17 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
+ rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
+
+ bss = (void *)cbss->priv;
+- ieee80211_update_bss_from_elems(local, bss, &elems, rx_status, beacon);
++ ieee80211_update_bss_from_elems(local, bss, elems, rx_status, beacon);
+
+ list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) {
+ non_tx_bss = (void *)non_tx_cbss->priv;
+
+- ieee80211_update_bss_from_elems(local, non_tx_bss, &elems,
++ ieee80211_update_bss_from_elems(local, non_tx_bss, elems,
+ rx_status, beacon);
+ }
+
++ kfree(elems);
++
+ return bss;
+ }
+
+@@ -277,6 +281,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
+ if (likely(!sdata1 && !sdata2))
+ return;
+
++ if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) {
++ /*
++ * we were passive scanning because of radar/no-IR, but
++ * the beacon/proberesp rx gives us an opportunity to upgrade
++ * to active scan
++ */
++ set_bit(SCAN_BEACON_DONE, &local->scanning);
++ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
++ }
++
+ if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+ struct cfg80211_scan_request *scan_req;
+ struct cfg80211_sched_scan_request *sched_scan_req;
+@@ -451,16 +465,19 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
+ scan_req = rcu_dereference_protected(local->scan_req,
+ lockdep_is_held(&local->mtx));
+
+- if (scan_req != local->int_scan_req) {
+- local->scan_info.aborted = aborted;
+- cfg80211_scan_done(scan_req, &local->scan_info);
+- }
+ RCU_INIT_POINTER(local->scan_req, NULL);
+ RCU_INIT_POINTER(local->scan_sdata, NULL);
+
+ local->scanning = 0;
+ local->scan_chandef.chan = NULL;
+
++ synchronize_rcu();
++
++ if (scan_req != local->int_scan_req) {
++ local->scan_info.aborted = aborted;
++ cfg80211_scan_done(scan_req, &local->scan_info);
++ }
++
+ /* Set power back to normal operating levels. */
+ ieee80211_hw_config(local, 0);
+
+@@ -783,6 +800,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
+ IEEE80211_CHAN_RADAR)) ||
+ !req->n_ssids) {
+ next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
++ if (req->n_ssids)
++ set_bit(SCAN_BEACON_WAIT, &local->scanning);
+ } else {
+ ieee80211_scan_state_send_probe(local, &next_delay);
+ next_delay = IEEE80211_CHANNEL_TIME;
+@@ -994,6 +1013,8 @@ set_channel:
+ !scan_req->n_ssids) {
+ *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ local->next_scan_state = SCAN_DECISION;
++ if (scan_req->n_ssids)
++ set_bit(SCAN_BEACON_WAIT, &local->scanning);
+ return;
+ }
+
+@@ -1086,6 +1107,8 @@ void ieee80211_scan_work(struct work_struct *work)
+ goto out;
+ }
+
++ clear_bit(SCAN_BEACON_WAIT, &local->scanning);
++
+ /*
+ * as long as no delay is required advance immediately
+ * without scheduling a new work
+@@ -1096,6 +1119,10 @@ void ieee80211_scan_work(struct work_struct *work)
+ goto out_complete;
+ }
+
++ if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) &&
++ local->next_scan_state == SCAN_DECISION)
++ local->next_scan_state = SCAN_SEND_PROBE;
++
+ switch (local->next_scan_state) {
+ case SCAN_DECISION:
+ /* if no more bands/channels left, complete scan */
+diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
+index 2b5acb37587f7..e10bcfa20526d 100644
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -641,13 +641,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
+ /* check if STA exists already */
+ if (sta_info_get_bss(sdata, sta->sta.addr)) {
+ err = -EEXIST;
+- goto out_err;
++ goto out_cleanup;
+ }
+
+ sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
+ if (!sinfo) {
+ err = -ENOMEM;
+- goto out_err;
++ goto out_cleanup;
+ }
+
+ local->num_sta++;
+@@ -703,8 +703,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
+ out_drop_sta:
+ local->num_sta--;
+ synchronize_net();
++ out_cleanup:
+ cleanup_single_sta(sta);
+- out_err:
+ mutex_unlock(&local->sta_mtx);
+ kfree(sinfo);
+ rcu_read_lock();
+@@ -1036,7 +1036,8 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
+ list_del_rcu(&sta->list);
+ sta->removed = true;
+
+- drv_sta_pre_rcu_remove(local, sta->sdata, sta);
++ if (sta->uploaded)
++ drv_sta_pre_rcu_remove(local, sta->sdata, sta);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+ rcu_access_pointer(sdata->u.vlan.sta) == sta)
+@@ -2190,7 +2191,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
+
+ static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
+ {
+- u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
++ u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+
+ if (rate == STA_STATS_RATE_INVALID)
+ return -EINVAL;
+@@ -2206,9 +2207,9 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats,
+ u64 value;
+
+ do {
+- start = u64_stats_fetch_begin(&rxstats->syncp);
++ start = u64_stats_fetch_begin_irq(&rxstats->syncp);
+ value = rxstats->msdu[tid];
+- } while (u64_stats_fetch_retry(&rxstats->syncp, start));
++ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start));
+
+ return value;
+ }
+@@ -2272,9 +2273,9 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats)
+ u64 value;
+
+ do {
+- start = u64_stats_fetch_begin(&rxstats->syncp);
++ start = u64_stats_fetch_begin_irq(&rxstats->syncp);
+ value = rxstats->bytes;
+- } while (u64_stats_fetch_retry(&rxstats->syncp, start));
++ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start));
+
+ return value;
+ }
+diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
+index ba27967820084..e7443fc4669c8 100644
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -199,6 +199,7 @@ struct tid_ampdu_tx {
+ u8 stop_initiator;
+ bool tx_stop;
+ u16 buf_size;
++ u16 ssn;
+
+ u16 failed_bar_ssn;
+ bool bar_pending;
+diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
+index 45e532ad1215b..137be9ec94af1 100644
+--- a/net/mac80211/tdls.c
++++ b/net/mac80211/tdls.c
+@@ -6,7 +6,7 @@
+ * Copyright 2014, Intel Corporation
+ * Copyright 2014 Intel Mobile Communications GmbH
+ * Copyright 2015 - 2016 Intel Deutschland GmbH
+- * Copyright (C) 2019 Intel Corporation
++ * Copyright (C) 2019, 2021 Intel Corporation
+ */
+
+ #include <linux/ieee80211.h>
+@@ -1684,7 +1684,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+ {
+ struct ieee80211_local *local = sdata->local;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems = NULL;
+ struct sta_info *sta;
+ struct ieee80211_tdls_data *tf = (void *)skb->data;
+ bool local_initiator;
+@@ -1718,16 +1718,20 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
+ goto call_drv;
+ }
+
+- ieee802_11_parse_elems(tf->u.chan_switch_resp.variable,
+- skb->len - baselen, false, &elems,
+- NULL, NULL);
+- if (elems.parse_error) {
++ elems = ieee802_11_parse_elems(tf->u.chan_switch_resp.variable,
++ skb->len - baselen, false, NULL, NULL);
++ if (!elems) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ if (elems->parse_error) {
+ tdls_dbg(sdata, "Invalid IEs in TDLS channel switch resp\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+- if (!elems.ch_sw_timing || !elems.lnk_id) {
++ if (!elems->ch_sw_timing || !elems->lnk_id) {
+ tdls_dbg(sdata, "TDLS channel switch resp - missing IEs\n");
+ ret = -EINVAL;
+ goto out;
+@@ -1735,15 +1739,15 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
+
+ /* validate the initiator is set correctly */
+ local_initiator =
+- !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
++ !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
+ if (local_initiator == sta->sta.tdls_initiator) {
+ tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+- params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time);
+- params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout);
++ params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time);
++ params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout);
+
+ params.tmpl_skb =
+ ieee80211_tdls_ch_sw_resp_tmpl_get(sta, &params.ch_sw_tm_ie);
+@@ -1763,6 +1767,7 @@ call_drv:
+ out:
+ mutex_unlock(&local->sta_mtx);
+ dev_kfree_skb_any(params.tmpl_skb);
++ kfree(elems);
+ return ret;
+ }
+
+@@ -1771,7 +1776,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+ {
+ struct ieee80211_local *local = sdata->local;
+- struct ieee802_11_elems elems;
++ struct ieee802_11_elems *elems;
+ struct cfg80211_chan_def chandef;
+ struct ieee80211_channel *chan;
+ enum nl80211_channel_type chan_type;
+@@ -1831,22 +1836,27 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
+ return -EINVAL;
+ }
+
+- ieee802_11_parse_elems(tf->u.chan_switch_req.variable,
+- skb->len - baselen, false, &elems, NULL, NULL);
+- if (elems.parse_error) {
++ elems = ieee802_11_parse_elems(tf->u.chan_switch_req.variable,
++ skb->len - baselen, false, NULL, NULL);
++ if (!elems)
++ return -ENOMEM;
++
++ if (elems->parse_error) {
+ tdls_dbg(sdata, "Invalid IEs in TDLS channel switch req\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto free;
+ }
+
+- if (!elems.ch_sw_timing || !elems.lnk_id) {
++ if (!elems->ch_sw_timing || !elems->lnk_id) {
+ tdls_dbg(sdata, "TDLS channel switch req - missing IEs\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto free;
+ }
+
+- if (!elems.sec_chan_offs) {
++ if (!elems->sec_chan_offs) {
+ chan_type = NL80211_CHAN_HT20;
+ } else {
+- switch (elems.sec_chan_offs->sec_chan_offs) {
++ switch (elems->sec_chan_offs->sec_chan_offs) {
+ case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+ chan_type = NL80211_CHAN_HT40PLUS;
+ break;
+@@ -1865,7 +1875,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
+ if (!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &chandef,
+ sdata->wdev.iftype)) {
+ tdls_dbg(sdata, "TDLS chan switch to forbidden channel\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto free;
+ }
+
+ mutex_lock(&local->sta_mtx);
+@@ -1881,7 +1892,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
+
+ /* validate the initiator is set correctly */
+ local_initiator =
+- !memcmp(elems.lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
++ !memcmp(elems->lnk_id->init_sta, sdata->vif.addr, ETH_ALEN);
+ if (local_initiator == sta->sta.tdls_initiator) {
+ tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n");
+ ret = -EINVAL;
+@@ -1889,16 +1900,16 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
+ }
+
+ /* peer should have known better */
+- if (!sta->sta.ht_cap.ht_supported && elems.sec_chan_offs &&
+- elems.sec_chan_offs->sec_chan_offs) {
++ if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs &&
++ elems->sec_chan_offs->sec_chan_offs) {
+ tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n");
+ ret = -ENOTSUPP;
+ goto out;
+ }
+
+ params.chandef = &chandef;
+- params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time);
+- params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout);
++ params.switch_time = le16_to_cpu(elems->ch_sw_timing->switch_time);
++ params.switch_timeout = le16_to_cpu(elems->ch_sw_timing->switch_timeout);
+
+ params.tmpl_skb =
+ ieee80211_tdls_ch_sw_resp_tmpl_get(sta,
+@@ -1917,6 +1928,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
+ out:
+ mutex_unlock(&local->sta_mtx);
+ dev_kfree_skb_any(params.tmpl_skb);
++free:
++ kfree(elems);
+ return ret;
+ }
+
+diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
+index 9e8381bef7ed8..8a9b62f6e9236 100644
+--- a/net/mac80211/trace.h
++++ b/net/mac80211/trace.h
+@@ -67,7 +67,7 @@
+ __entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0; \
+ __entry->min_chan_width = (c)->width; \
+ __entry->min_center_freq1 = (c)->center_freq1; \
+- __entry->freq1_offset = (c)->freq1_offset; \
++ __entry->min_freq1_offset = (c)->freq1_offset; \
+ __entry->min_center_freq2 = (c)->center_freq2;
+ #define MIN_CHANDEF_PR_FMT " min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz"
+ #define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \
+diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
+index 8921088a5df65..8f8dc2625d535 100644
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -1720,21 +1720,19 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
+ * Returns false if the frame couldn't be transmitted but was queued instead.
+ */
+ static bool __ieee80211_tx(struct ieee80211_local *local,
+- struct sk_buff_head *skbs, int led_len,
+- struct sta_info *sta, bool txpending)
++ struct sk_buff_head *skbs, struct sta_info *sta,
++ bool txpending)
+ {
+ struct ieee80211_tx_info *info;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_vif *vif;
+ struct sk_buff *skb;
+ bool result;
+- __le16 fc;
+
+ if (WARN_ON(skb_queue_empty(skbs)))
+ return true;
+
+ skb = skb_peek(skbs);
+- fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
+ info = IEEE80211_SKB_CB(skb);
+ sdata = vif_to_sdata(info->control.vif);
+ if (sta && !sta->uploaded)
+@@ -1768,8 +1766,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
+
+ result = ieee80211_tx_frags(local, vif, sta, skbs, txpending);
+
+- ieee80211_tpt_led_trig_tx(local, fc, led_len);
+-
+ WARN_ON_ONCE(!skb_queue_empty(skbs));
+
+ return result;
+@@ -1825,15 +1821,15 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ ieee80211_tx_result res = TX_CONTINUE;
+
++ if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
++ CALL_TXH(ieee80211_tx_h_rate_ctrl);
++
+ if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
+ __skb_queue_tail(&tx->skbs, tx->skb);
+ tx->skb = NULL;
+ goto txh_done;
+ }
+
+- if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
+- CALL_TXH(ieee80211_tx_h_rate_ctrl);
+-
+ CALL_TXH(ieee80211_tx_h_michael_mic_add);
+ CALL_TXH(ieee80211_tx_h_sequence);
+ CALL_TXH(ieee80211_tx_h_fragment);
+@@ -1919,7 +1915,6 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
+ ieee80211_tx_result res_prepare;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ bool result = true;
+- int led_len;
+
+ if (unlikely(skb->len < 10)) {
+ dev_kfree_skb(skb);
+@@ -1927,7 +1922,6 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
+ }
+
+ /* initialises tx */
+- led_len = skb->len;
+ res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb);
+
+ if (unlikely(res_prepare == TX_DROP)) {
+@@ -1950,8 +1944,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
+ return true;
+
+ if (!invoke_tx_handlers_late(&tx))
+- result = __ieee80211_tx(local, &tx.skbs, led_len,
+- tx.sta, txpending);
++ result = __ieee80211_tx(local, &tx.skbs, tx.sta, txpending);
+
+ return result;
+ }
+@@ -4174,6 +4167,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+ struct sk_buff *next;
++ int len = skb->len;
+
+ if (unlikely(skb->len < ETH_HLEN)) {
+ kfree_skb(skb);
+@@ -4220,10 +4214,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
+ }
+ } else {
+ /* we cannot process non-linear frames on this path */
+- if (skb_linearize(skb)) {
+- kfree_skb(skb);
+- goto out;
+- }
++ if (skb_linearize(skb))
++ goto out_free;
+
+ /* the frame could be fragmented, software-encrypted, and other
+ * things so we cannot really handle checksum offload with it -
+@@ -4257,7 +4249,10 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
+ goto out;
+ out_free:
+ kfree_skb(skb);
++ len = 0;
+ out:
++ if (len)
++ ieee80211_tpt_led_trig_tx(local, len);
+ rcu_read_unlock();
+ }
+
+@@ -4395,8 +4390,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
+ }
+
+ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
+- struct sk_buff *skb, int led_len,
+- struct sta_info *sta,
++ struct sk_buff *skb, struct sta_info *sta,
+ bool txpending)
+ {
+ struct ieee80211_local *local = sdata->local;
+@@ -4409,6 +4403,8 @@ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
+ if (sta)
+ sk_pacing_shift_update(skb->sk, local->hw.tx_sk_pacing_shift);
+
++ ieee80211_tpt_led_trig_tx(local, skb->len);
++
+ if (ieee80211_queue_skb(local, sdata, sta, skb))
+ return true;
+
+@@ -4497,7 +4493,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
+ if (key)
+ info->control.hw_key = &key->conf;
+
+- ieee80211_tx_8023(sdata, skb, skb->len, sta, false);
++ ieee80211_tx_8023(sdata, skb, sta, false);
+
+ return;
+
+@@ -4636,7 +4632,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
+ if (IS_ERR(sta) || (sta && !sta->uploaded))
+ sta = NULL;
+
+- result = ieee80211_tx_8023(sdata, skb, skb->len, sta, true);
++ result = ieee80211_tx_8023(sdata, skb, sta, true);
+ } else {
+ struct sk_buff_head skbs;
+
+@@ -4646,7 +4642,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
+ hdr = (struct ieee80211_hdr *)skb->data;
+ sta = sta_info_get(sdata, hdr->addr1);
+
+- result = __ieee80211_tx(local, &skbs, skb->len, sta, true);
++ result = __ieee80211_tx(local, &skbs, sta, true);
+ }
+
+ return result;
+@@ -5723,6 +5719,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
++ if (local->hw.queues < IEEE80211_NUM_ACS)
++ goto start_xmit;
++
+ /* update QoS header to prioritize control port frames if possible,
+ * priorization also happens for control port frames send over
+ * AF_PACKET
+@@ -5738,6 +5737,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
+
+ rcu_read_unlock();
+
++start_xmit:
+ /* mutex lock is only needed for incrementing the cookie counter */
+ mutex_lock(&local->mtx);
+
+diff --git a/net/mac80211/util.c b/net/mac80211/util.c
+index 49cb96d251695..354badd32793a 100644
+--- a/net/mac80211/util.c
++++ b/net/mac80211/util.c
+@@ -796,7 +796,7 @@ static void __iterate_interfaces(struct ieee80211_local *local,
+
+ sdata = rcu_dereference_check(local->monitor_sdata,
+ lockdep_is_held(&local->iflist_mtx) ||
+- lockdep_rtnl_is_held());
++ lockdep_is_held(&local->hw.wiphy->mtx));
+ if (sdata &&
+ (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only ||
+ sdata->flags & IEEE80211_SDATA_IN_DRIVER))
+@@ -943,7 +943,12 @@ static void ieee80211_parse_extension_element(u32 *crc,
+ struct ieee802_11_elems *elems)
+ {
+ const void *data = elem->data + 1;
+- u8 len = elem->datalen - 1;
++ u8 len;
++
++ if (!elem->datalen)
++ return;
++
++ len = elem->datalen - 1;
+
+ switch (elem->data[0]) {
+ case WLAN_EID_EXT_HE_MU_EDCA:
+@@ -1112,10 +1117,6 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+ } else
+ elem_parse_failed = true;
+ break;
+- case WLAN_EID_CHALLENGE:
+- elems->challenge = pos;
+- elems->challenge_len = elen;
+- break;
+ case WLAN_EID_VENDOR_SPECIFIC:
+ if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
+ pos[2] == 0xf2) {
+@@ -1395,8 +1396,8 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+
+ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
+ struct ieee802_11_elems *elems,
+- u8 *transmitter_bssid,
+- u8 *bss_bssid,
++ const u8 *transmitter_bssid,
++ const u8 *bss_bssid,
+ u8 *nontransmitted_profile)
+ {
+ const struct element *elem, *sub;
+@@ -1409,6 +1410,8 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
+ for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
+ if (elem->datalen < 2)
+ continue;
++ if (elem->data[0] < 1 || elem->data[0] > 8)
++ continue;
+
+ for_each_element(sub, elem->data + 1, elem->datalen - 1) {
+ u8 new_bssid[ETH_ALEN];
+@@ -1461,31 +1464,36 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
+ return found ? profile_len : 0;
+ }
+
+-u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+- struct ieee802_11_elems *elems,
+- u64 filter, u32 crc, u8 *transmitter_bssid,
+- u8 *bss_bssid)
++struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len,
++ bool action, u64 filter,
++ u32 crc,
++ const u8 *transmitter_bssid,
++ const u8 *bss_bssid)
+ {
++ struct ieee802_11_elems *elems;
+ const struct element *non_inherit = NULL;
+ u8 *nontransmitted_profile;
+ int nontransmitted_profile_len = 0;
+
+- memset(elems, 0, sizeof(*elems));
++ elems = kzalloc(sizeof(*elems) + len, GFP_ATOMIC);
++ if (!elems)
++ return NULL;
+ elems->ie_start = start;
+ elems->total_len = len;
+
+- nontransmitted_profile = kmalloc(len, GFP_ATOMIC);
+- if (nontransmitted_profile) {
+- nontransmitted_profile_len =
+- ieee802_11_find_bssid_profile(start, len, elems,
+- transmitter_bssid,
+- bss_bssid,
+- nontransmitted_profile);
+- non_inherit =
+- cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+- nontransmitted_profile,
+- nontransmitted_profile_len);
+- }
++ elems->scratch_len = len;
++ elems->scratch_pos = elems->scratch;
++
++ nontransmitted_profile = elems->scratch_pos;
++ nontransmitted_profile_len =
++ ieee802_11_find_bssid_profile(start, len, elems,
++ transmitter_bssid,
++ bss_bssid,
++ nontransmitted_profile);
++ non_inherit =
++ cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
++ nontransmitted_profile,
++ nontransmitted_profile_len);
+
+ crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter,
+ crc, non_inherit);
+@@ -1514,9 +1522,9 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+ offsetofend(struct ieee80211_bssid_index, dtim_count))
+ elems->dtim_count = elems->bssid_index->dtim_count;
+
+- kfree(nontransmitted_profile);
++ elems->crc = crc;
+
+- return crc;
++ return elems;
+ }
+
+ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
+@@ -1954,7 +1962,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
+ if (he_cap &&
+ cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ IEEE80211_CHAN_NO_HE)) {
+- pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
++ pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end);
+ if (!pos)
+ goto out_err;
+ }
+@@ -2379,7 +2387,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
+ IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
+
+ /* add interfaces */
+- sdata = rtnl_dereference(local->monitor_sdata);
++ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
+ if (sdata) {
+ /* in HW restart it exists already */
+ WARN_ON(local->resuming);
+@@ -2424,7 +2432,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
+ WARN_ON(drv_add_chanctx(local, ctx));
+ mutex_unlock(&local->chanctx_mtx);
+
+- sdata = rtnl_dereference(local->monitor_sdata);
++ sdata = wiphy_dereference(local->hw.wiphy,
++ local->monitor_sdata);
+ if (sdata && ieee80211_sdata_running(sdata))
+ ieee80211_assign_chanctx(local, sdata);
+ }
+@@ -2897,10 +2906,11 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+ he_cap->he_cap_elem.phy_cap_info);
+ }
+
+-u8 *ieee80211_ie_build_he_cap(u8 *pos,
++u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos,
+ const struct ieee80211_sta_he_cap *he_cap,
+ u8 *end)
+ {
++ struct ieee80211_he_cap_elem elem;
+ u8 n;
+ u8 ie_len;
+ u8 *orig_pos = pos;
+@@ -2913,7 +2923,23 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos,
+ if (!he_cap)
+ return orig_pos;
+
+- n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
++ /* modify on stack first to calculate 'n' and 'ie_len' correctly */
++ elem = he_cap->he_cap_elem;
++
++ if (disable_flags & IEEE80211_STA_DISABLE_40MHZ)
++ elem.phy_cap_info[0] &=
++ ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
++ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G);
++
++ if (disable_flags & IEEE80211_STA_DISABLE_160MHZ)
++ elem.phy_cap_info[0] &=
++ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
++
++ if (disable_flags & IEEE80211_STA_DISABLE_80P80MHZ)
++ elem.phy_cap_info[0] &=
++ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
++
++ n = ieee80211_he_mcs_nss_size(&elem);
+ ie_len = 2 + 1 +
+ sizeof(he_cap->he_cap_elem) + n +
+ ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+@@ -2927,8 +2953,8 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos,
+ *pos++ = WLAN_EID_EXT_HE_CAPABILITY;
+
+ /* Fixed data */
+- memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem));
+- pos += sizeof(he_cap->he_cap_elem);
++ memcpy(pos, &elem, sizeof(elem));
++ pos += sizeof(elem);
+
+ memcpy(pos, &he_cap->he_mcs_nss_supp, n);
+ pos += n;
+diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
+index 9ea6004abe1be..93691301577ba 100644
+--- a/net/mac80211/wme.c
++++ b/net/mac80211/wme.c
+@@ -143,20 +143,21 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata,
+ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, struct sk_buff *skb)
+ {
+- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
++ const struct ethhdr *eth = (void *)skb->data;
+ struct mac80211_qos_map *qos_map;
+ bool qos;
+
+ /* all mesh/ocb stations are required to support WME */
+- if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
+- sdata->vif.type == NL80211_IFTYPE_OCB)
++ if ((sdata->vif.type == NL80211_IFTYPE_MESH_POINT &&
++ !is_multicast_ether_addr(eth->h_dest)) ||
++ (sdata->vif.type == NL80211_IFTYPE_OCB && sta))
+ qos = true;
+ else if (sta)
+ qos = sta->sta.wme;
+ else
+ qos = false;
+
+- if (!qos || (info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER)) {
++ if (!qos) {
+ skb->priority = 0; /* required for correct WPA/11i MIC */
+ return IEEE80211_AC_BE;
+ }
+diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
+index 323d3d2d986f8..3e510664fc891 100644
+--- a/net/mac802154/iface.c
++++ b/net/mac802154/iface.c
+@@ -661,6 +661,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
+ sdata->dev = ndev;
+ sdata->wpan_dev.wpan_phy = local->hw.phy;
+ sdata->local = local;
++ INIT_LIST_HEAD(&sdata->wpan_dev.list);
+
+ /* setup type-dependent data */
+ ret = ieee802154_setup_sdata(sdata, type);
+diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
+index b8ce84618a55b..726b47a4611b5 100644
+--- a/net/mac802154/rx.c
++++ b/net/mac802154/rx.c
+@@ -44,7 +44,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
+
+ switch (mac_cb(skb)->dest.mode) {
+ case IEEE802154_ADDR_NONE:
+- if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE)
++ if (hdr->source.mode != IEEE802154_ADDR_NONE)
+ /* FIXME: check if we are PAN coordinator */
+ skb->pkt_type = PACKET_OTHERHOST;
+ else
+@@ -132,7 +132,7 @@ static int
+ ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr)
+ {
+ int hlen;
+- struct ieee802154_mac_cb *cb = mac_cb_init(skb);
++ struct ieee802154_mac_cb *cb = mac_cb(skb);
+
+ skb_reset_mac_header(skb);
+
+@@ -294,8 +294,9 @@ void
+ ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi)
+ {
+ struct ieee802154_local *local = hw_to_local(hw);
++ struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+
+- mac_cb(skb)->lqi = lqi;
++ cb->lqi = lqi;
+ skb->pkt_type = IEEE802154_RX_MSG;
+ skb_queue_tail(&local->skb_queue, skb);
+ tasklet_schedule(&local->tasklet);
+diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
+index a9526ac29dffe..77137a8627d06 100644
+--- a/net/mctp/af_mctp.c
++++ b/net/mctp/af_mctp.c
+@@ -30,6 +30,12 @@ static int mctp_release(struct socket *sock)
+ return 0;
+ }
+
++/* Generic sockaddr checks, padding checks only so far */
++static bool mctp_sockaddr_is_ok(const struct sockaddr_mctp *addr)
++{
++ return !addr->__smctp_pad0 && !addr->__smctp_pad1;
++}
++
+ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+ {
+ struct sock *sk = sock->sk;
+@@ -49,6 +55,9 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+ /* it's a valid sockaddr for MCTP, cast and do protocol checks */
+ smctp = (struct sockaddr_mctp *)addr;
+
++ if (!mctp_sockaddr_is_ok(smctp))
++ return -EINVAL;
++
+ lock_sock(sk);
+
+ /* TODO: allow rebind */
+@@ -83,6 +92,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+ return -EINVAL;
+ if (addr->smctp_family != AF_MCTP)
+ return -EINVAL;
++ if (!mctp_sockaddr_is_ok(addr))
++ return -EINVAL;
+ if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ return -EINVAL;
+
+@@ -172,11 +183,13 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+
+ addr = msg->msg_name;
+ addr->smctp_family = AF_MCTP;
++ addr->__smctp_pad0 = 0;
+ addr->smctp_network = cb->net;
+ addr->smctp_addr.s_addr = hdr->src;
+ addr->smctp_type = type;
+ addr->smctp_tag = hdr->flags_seq_tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
++ addr->__smctp_pad1 = 0;
+ msg->msg_namelen = sizeof(*addr);
+ }
+
+@@ -275,11 +288,17 @@ static void mctp_sk_unhash(struct sock *sk)
+
+ kfree_rcu(key, rcu);
+ }
++ sock_set_flag(sk, SOCK_DEAD);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ synchronize_rcu();
+ }
+
++static void mctp_sk_destruct(struct sock *sk)
++{
++ skb_queue_purge(&sk->sk_receive_queue);
++}
++
+ static struct proto mctp_proto = {
+ .name = "MCTP",
+ .owner = THIS_MODULE,
+@@ -316,6 +335,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock,
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
++ sk->sk_destruct = mctp_sk_destruct;
+
+ rc = 0;
+ if (sk->sk_prot->init)
+@@ -362,12 +382,14 @@ static __init int mctp_init(void)
+
+ rc = mctp_neigh_init();
+ if (rc)
+- goto err_unreg_proto;
++ goto err_unreg_routes;
+
+ mctp_device_init();
+
+ return 0;
+
++err_unreg_routes:
++ mctp_routes_exit();
+ err_unreg_proto:
+ proto_unregister(&mctp_proto);
+ err_unreg_sock:
+diff --git a/net/mctp/route.c b/net/mctp/route.c
+index 5ca186d53cb0f..89e67399249b4 100644
+--- a/net/mctp/route.c
++++ b/net/mctp/route.c
+@@ -135,6 +135,11 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+
++ if (sock_flag(&msk->sk, SOCK_DEAD)) {
++ rc = -EINVAL;
++ goto out_unlock;
++ }
++
+ hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
+ if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
+ key->tag)) {
+@@ -148,6 +153,7 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
+ hlist_add_head(&key->sklist, &msk->keys);
+ }
+
++out_unlock:
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+@@ -396,7 +402,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
+
+ rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+ daddr, skb->dev->dev_addr, skb->len);
+- if (rc) {
++ if (rc < 0) {
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+ }
+@@ -760,7 +766,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ }
+
+ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+- unsigned int daddr_extent)
++ unsigned int daddr_extent, unsigned char type)
+ {
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+@@ -777,7 +783,8 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev &&
+- rt->min == daddr_start && rt->max == daddr_end) {
++ rt->min == daddr_start && rt->max == daddr_end &&
++ rt->type == type) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+@@ -795,7 +802,7 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+
+ int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+ {
+- return mctp_route_remove(mdev, addr, 0);
++ return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
+ }
+
+ /* removes all entries for a given device */
+@@ -975,7 +982,7 @@ static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ if (rtm->rtm_type != RTN_UNICAST)
+ return -EINVAL;
+
+- rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
++ rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
+ return rc;
+ }
+
+@@ -1108,7 +1115,7 @@ int __init mctp_routes_init(void)
+ return register_pernet_subsys(&mctp_net_ops);
+ }
+
+-void __exit mctp_routes_exit(void)
++void mctp_routes_exit(void)
+ {
+ unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELROUTE);
+diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
+index ffeb2df8be7ae..e69bed96811b5 100644
+--- a/net/mpls/af_mpls.c
++++ b/net/mpls/af_mpls.c
+@@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev,
+
+ p = per_cpu_ptr(mdev->stats, i);
+ do {
+- start = u64_stats_fetch_begin(&p->syncp);
++ start = u64_stats_fetch_begin_irq(&p->syncp);
+ local = p->stats;
+- } while (u64_stats_fetch_retry(&p->syncp, start));
++ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
+
+ stats->rx_packets += local.rx_packets;
+ stats->rx_bytes += local.rx_bytes;
+@@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
+ free:
+ kfree(table);
+ out:
++ mdev->sysctl = NULL;
+ return -ENOBUFS;
+ }
+
+@@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
+ struct net *net = dev_net(dev);
+ struct ctl_table *table;
+
++ if (!mdev->sysctl)
++ return;
++
+ table = mdev->sysctl->ctl_table_arg;
+ unregister_net_sysctl_table(mdev->sysctl);
+ kfree(table);
+@@ -1491,22 +1495,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head)
+ kfree(mdev);
+ }
+
+-static void mpls_ifdown(struct net_device *dev, int event)
++static int mpls_ifdown(struct net_device *dev, int event)
+ {
+ struct mpls_route __rcu **platform_label;
+ struct net *net = dev_net(dev);
+- u8 alive, deleted;
+ unsigned index;
+
+ platform_label = rtnl_dereference(net->mpls.platform_label);
+ for (index = 0; index < net->mpls.platform_labels; index++) {
+ struct mpls_route *rt = rtnl_dereference(platform_label[index]);
++ bool nh_del = false;
++ u8 alive = 0;
+
+ if (!rt)
+ continue;
+
+- alive = 0;
+- deleted = 0;
++ if (event == NETDEV_UNREGISTER) {
++ u8 deleted = 0;
++
++ for_nexthops(rt) {
++ struct net_device *nh_dev =
++ rtnl_dereference(nh->nh_dev);
++
++ if (!nh_dev || nh_dev == dev)
++ deleted++;
++ if (nh_dev == dev)
++ nh_del = true;
++ } endfor_nexthops(rt);
++
++ /* if there are no more nexthops, delete the route */
++ if (deleted == rt->rt_nhn) {
++ mpls_route_update(net, index, NULL, NULL);
++ continue;
++ }
++
++ if (nh_del) {
++ size_t size = sizeof(*rt) + rt->rt_nhn *
++ rt->rt_nh_size;
++ struct mpls_route *orig = rt;
++
++ rt = kmalloc(size, GFP_KERNEL);
++ if (!rt)
++ return -ENOMEM;
++ memcpy(rt, orig, size);
++ }
++ }
++
+ change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
+
+@@ -1530,16 +1564,15 @@ static void mpls_ifdown(struct net_device *dev, int event)
+ next:
+ if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
+ alive++;
+- if (!rtnl_dereference(nh->nh_dev))
+- deleted++;
+ } endfor_nexthops(rt);
+
+ WRITE_ONCE(rt->rt_nhn_alive, alive);
+
+- /* if there are no more nexthops, delete the route */
+- if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
+- mpls_route_update(net, index, NULL, NULL);
++ if (nh_del)
++ mpls_route_update(net, index, rt, NULL);
+ }
++
++ return 0;
+ }
+
+ static void mpls_ifup(struct net_device *dev, unsigned int flags)
+@@ -1597,8 +1630,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
+ return NOTIFY_OK;
+
+ switch (event) {
++ int err;
++
+ case NETDEV_DOWN:
+- mpls_ifdown(dev, event);
++ err = mpls_ifdown(dev, event);
++ if (err)
++ return notifier_from_errno(err);
+ break;
+ case NETDEV_UP:
+ flags = dev_get_flags(dev);
+@@ -1609,13 +1646,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
+ break;
+ case NETDEV_CHANGE:
+ flags = dev_get_flags(dev);
+- if (flags & (IFF_RUNNING | IFF_LOWER_UP))
++ if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
+ mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
+- else
+- mpls_ifdown(dev, event);
++ } else {
++ err = mpls_ifdown(dev, event);
++ if (err)
++ return notifier_from_errno(err);
++ }
+ break;
+ case NETDEV_UNREGISTER:
+- mpls_ifdown(dev, event);
++ err = mpls_ifdown(dev, event);
++ if (err)
++ return notifier_from_errno(err);
+ mdev = mpls_dev_get(dev);
+ if (mdev) {
+ mpls_dev_sysctl_unregister(dev, mdev);
+@@ -1626,8 +1668,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
+ case NETDEV_CHANGENAME:
+ mdev = mpls_dev_get(dev);
+ if (mdev) {
+- int err;
+-
+ mpls_dev_sysctl_unregister(dev, mdev);
+ err = mpls_dev_sysctl_register(dev, mdev);
+ if (err)
+diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
+index b21ff9be04c61..8d1c67b935911 100644
+--- a/net/mptcp/mib.c
++++ b/net/mptcp/mib.c
+@@ -35,12 +35,14 @@ static const struct snmp_mib mptcp_snmp_list[] = {
+ SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
+ SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
+ SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
++ SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
+ SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
+ SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX),
+ SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX),
+ SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX),
+ SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
+ SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
++ SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
+ SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
+ SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
+ SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
+diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
+index ecd3d8b117e0b..2966fcb6548ba 100644
+--- a/net/mptcp/mib.h
++++ b/net/mptcp/mib.h
+@@ -28,12 +28,14 @@ enum linux_mptcp_mib_field {
+ MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
+ MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
+ MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
++ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */
+ MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
+ MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */
+ MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */
+ MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */
+ MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
+ MPTCP_MIB_RMADDR, /* Received RM_ADDR */
++ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */
+ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
+ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
+ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
+diff --git a/net/mptcp/options.c b/net/mptcp/options.c
+index f0f22eb4fd5f7..aa4b0cf7c6380 100644
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
+ ptr += 2;
+ }
+ if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
+- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
++ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
+ ptr += 2;
+ }
+@@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
+
+ if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
+- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
++ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
+ ptr += 2;
+ }
+
+@@ -422,28 +422,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
+ return false;
+ }
+
+-/* MP_JOIN client subflow must wait for 4th ack before sending any data:
+- * TCP can't schedule delack timer before the subflow is fully established.
+- * MPTCP uses the delack timer to do 3rd ack retransmissions
+- */
+-static void schedule_3rdack_retransmission(struct sock *sk)
+-{
+- struct inet_connection_sock *icsk = inet_csk(sk);
+- struct tcp_sock *tp = tcp_sk(sk);
+- unsigned long timeout;
+-
+- /* reschedule with a timeout above RTT, as we must look only for drop */
+- if (tp->srtt_us)
+- timeout = tp->srtt_us << 1;
+- else
+- timeout = TCP_TIMEOUT_INIT;
+-
+- WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
+- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+- icsk->icsk_ack.timeout = timeout;
+- sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
+-}
+-
+ static void clear_3rdack_retransmission(struct sock *sk)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+@@ -526,7 +504,15 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+ *size = TCPOLEN_MPTCP_MPJ_ACK;
+ pr_debug("subflow=%p", subflow);
+
+- schedule_3rdack_retransmission(sk);
++ /* we can use the full delegate action helper only from BH context
++ * If we are in process context - sk is flushing the backlog at
++ * socket lock release time - just set the appropriate flag, will
++ * be handled by the release callback
++ */
++ if (sock_owned_by_user(sk))
++ set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status);
++ else
++ mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK);
+ return true;
+ }
+ return false;
+@@ -837,10 +823,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
+ if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+ ret = true;
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
++ unsigned int mp_fail_size;
++
+ ret = true;
+- if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+- *size += opt_size;
+- remaining -= opt_size;
++ if (mptcp_established_options_mp_fail(sk, &mp_fail_size,
++ remaining - opt_size, opts)) {
++ *size += opt_size + mp_fail_size;
++ remaining -= opt_size - mp_fail_size;
+ return true;
+ }
+ }
+@@ -1019,11 +1008,9 @@ static void ack_update_msk(struct mptcp_sock *msk,
+ old_snd_una = msk->snd_una;
+ new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
+
+- /* ACK for data not even sent yet and even above recovery bound? Ignore.*/
+- if (unlikely(after64(new_snd_una, snd_nxt))) {
+- if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
+- new_snd_una = old_snd_una;
+- }
++ /* ACK for data not even sent yet? Ignore.*/
++ if (unlikely(after64(new_snd_una, snd_nxt)))
++ new_snd_una = old_snd_una;
+
+ new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
+
+@@ -1169,9 +1156,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
+ */
+ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
+ if (mp_opt.data_fin && mp_opt.data_len == 1 &&
+- mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
+- schedule_work(&msk->work))
+- sock_hold(subflow->conn);
++ mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64))
++ mptcp_schedule_work((struct sock *)msk);
+
+ return true;
+ }
+@@ -1227,7 +1213,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
+ WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+ }
+
+-static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum)
++__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
+ {
+ struct csum_pseudo_header header;
+ __wsum csum;
+@@ -1242,14 +1228,24 @@ static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum1
+ header.data_len = htons(data_len);
+ header.csum = 0;
+
+- csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum));
+- return (__force u16)csum_fold(csum);
++ csum = csum_partial(&header, sizeof(header), sum);
++ return csum_fold(csum);
+ }
+
+-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
++static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext)
+ {
+ return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
+- mpext->csum);
++ ~csum_unfold(mpext->csum));
++}
++
++static void put_len_csum(u16 len, __sum16 csum, void *data)
++{
++ __sum16 *sumptr = data + 2;
++ __be16 *ptr = data;
++
++ put_unaligned_be16(len, ptr);
++
++ put_unaligned(csum, sumptr);
+ }
+
+ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+@@ -1328,12 +1324,14 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+ put_unaligned_be32(mpext->subflow_seq, ptr);
+ ptr += 1;
+ if (opts->csum_reqd) {
+- put_unaligned_be32(mpext->data_len << 16 |
+- mptcp_make_csum(mpext), ptr);
++ put_len_csum(mpext->data_len,
++ mptcp_make_csum(mpext),
++ ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
++ ptr += 1;
+ }
+ } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+ OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
+@@ -1376,11 +1374,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+ goto mp_capable_done;
+
+ if (opts->csum_reqd) {
+- put_unaligned_be32(opts->data_len << 16 |
+- __mptcp_make_csum(opts->data_seq,
+- opts->subflow_seq,
+- opts->data_len,
+- opts->csum), ptr);
++ put_len_csum(opts->data_len,
++ __mptcp_make_csum(opts->data_seq,
++ opts->subflow_seq,
++ opts->data_len,
++ ~csum_unfold(opts->csum)),
++ ptr);
+ } else {
+ put_unaligned_be32(opts->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
+index 6ab386ff32944..d9790d6fbce9c 100644
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -194,6 +194,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+ mptcp_pm_add_addr_send_ack(msk);
+ } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
+ pm->remote = *addr;
++ } else {
++ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
+ }
+
+ spin_unlock_bh(&pm->lock);
+@@ -234,8 +236,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
+ mptcp_event_addr_removed(msk, rm_list->ids[i]);
+
+ spin_lock_bh(&pm->lock);
+- mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
+- pm->rm_list_rx = *rm_list;
++ if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
++ pm->rm_list_rx = *rm_list;
++ else
++ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
+ spin_unlock_bh(&pm->lock);
+ }
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 050eea231528b..935f351751740 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -459,6 +459,18 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
+ return i;
+ }
+
++static struct mptcp_pm_addr_entry *
++__lookup_addr(struct pm_nl_pernet *pernet, struct mptcp_addr_info *info)
++{
++ struct mptcp_pm_addr_entry *entry;
++
++ list_for_each_entry(entry, &pernet->local_addr_list, list) {
++ if (addresses_equal(&entry->addr, info, true))
++ return entry;
++ }
++ return NULL;
++}
++
+ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+ {
+ struct sock *sk = (struct sock *)msk;
+@@ -594,6 +606,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
+ unsigned int add_addr_accept_max;
+ struct mptcp_addr_info remote;
+ unsigned int subflows_max;
++ bool reset_port = false;
+ int i, nr;
+
+ add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
+@@ -603,15 +616,19 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
+ msk->pm.add_addr_accepted, add_addr_accept_max,
+ msk->pm.remote.family);
+
+- if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
++ remote = msk->pm.remote;
++ if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
+ goto add_addr_echo;
+
++ /* pick id 0 port, if none is provided the remote address */
++ if (!remote.port) {
++ reset_port = true;
++ remote.port = sk->sk_dport;
++ }
++
+ /* connect to the specified remote address, using whatever
+ * local address the routing configuration will pick.
+ */
+- remote = msk->pm.remote;
+- if (!remote.port)
+- remote.port = sk->sk_dport;
+ nr = fill_local_addresses_vec(msk, addrs);
+
+ msk->pm.add_addr_accepted++;
+@@ -624,8 +641,12 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
+ __mptcp_subflow_connect(sk, &addrs[i], &remote);
+ spin_lock_bh(&msk->pm.lock);
+
++ /* be sure to echo exactly the received address */
++ if (reset_port)
++ remote.port = 0;
++
+ add_addr_echo:
+- mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
++ mptcp_pm_announce_addr(msk, &remote, true);
+ mptcp_pm_nl_addr_send_ack(msk);
+ }
+
+@@ -671,6 +692,8 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ if (!addresses_equal(&local, addr, addr->port))
+ continue;
+
++ if (subflow->backup != bkup)
++ msk->last_snd = NULL;
+ subflow->backup = bkup;
+ subflow->send_mp_prio = 1;
+ subflow->request_bkup = bkup;
+@@ -700,6 +723,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+
+ msk_owned_by_me(msk);
+
++ if (sk->sk_state == TCP_LISTEN)
++ return;
++
+ if (!rm_list->nr)
+ return;
+
+@@ -707,6 +733,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+ return;
+
+ for (i = 0; i < rm_list->nr; i++) {
++ bool removed = false;
++
+ list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+@@ -726,15 +754,19 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+ mptcp_close_ssk(sk, ssk, subflow);
+ spin_lock_bh(&msk->pm.lock);
+
+- if (rm_type == MPTCP_MIB_RMADDR) {
+- msk->pm.add_addr_accepted--;
+- WRITE_ONCE(msk->pm.accept_addr, true);
+- } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
+- msk->pm.local_addr_used--;
+- }
++ removed = true;
+ msk->pm.subflows--;
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
+ }
++ if (!removed)
++ continue;
++
++ if (rm_type == MPTCP_MIB_RMADDR) {
++ msk->pm.add_addr_accepted--;
++ WRITE_ONCE(msk->pm.accept_addr, true);
++ } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
++ msk->pm.local_addr_used--;
++ }
+ }
+ }
+
+@@ -854,12 +886,17 @@ out:
+ return ret;
+ }
+
++static struct lock_class_key mptcp_slock_keys[2];
++static struct lock_class_key mptcp_keys[2];
++
+ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
+ struct mptcp_pm_addr_entry *entry)
+ {
++ bool is_ipv6 = sk->sk_family == AF_INET6;
++ int addrlen = sizeof(struct sockaddr_in);
+ struct sockaddr_storage addr;
+- struct mptcp_sock *msk;
+ struct socket *ssock;
++ struct sock *newsk;
+ int backlog = 1024;
+ int err;
+
+@@ -868,21 +905,38 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
+ if (err)
+ return err;
+
+- msk = mptcp_sk(entry->lsk->sk);
+- if (!msk) {
++ newsk = entry->lsk->sk;
++ if (!newsk) {
+ err = -EINVAL;
+ goto out;
+ }
+
+- ssock = __mptcp_nmpc_socket(msk);
++ /* The subflow socket lock is acquired in a nested to the msk one
++ * in several places, even by the TCP stack, and this msk is a kernel
++ * socket: lockdep complains. Instead of propagating the _nested
++ * modifiers in several places, re-init the lock class for the msk
++ * socket to an mptcp specific one.
++ */
++ sock_lock_init_class_and_name(newsk,
++ is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
++ &mptcp_slock_keys[is_ipv6],
++ is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
++ &mptcp_keys[is_ipv6]);
++
++ lock_sock(newsk);
++ ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
++ release_sock(newsk);
+ if (!ssock) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
+- err = kernel_bind(ssock, (struct sockaddr *)&addr,
+- sizeof(struct sockaddr_in));
++#if IS_ENABLED(CONFIG_MPTCP_IPV6)
++ if (entry->addr.family == AF_INET6)
++ addrlen = sizeof(struct sockaddr_in6);
++#endif
++ err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
+ if (err) {
+ pr_warn("kernel_bind error, err=%d", err);
+ goto out;
+@@ -1716,17 +1770,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
+
+- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+- if (addresses_equal(&entry->addr, &addr.addr, true)) {
+- mptcp_nl_addr_backup(net, &entry->addr, bkup);
+-
+- if (bkup)
+- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+- else
+- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+- }
++ spin_lock_bh(&pernet->lock);
++ entry = __lookup_addr(pernet, &addr.addr);
++ if (!entry) {
++ spin_unlock_bh(&pernet->lock);
++ return -EINVAL;
+ }
+
++ if (bkup)
++ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
++ else
++ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
++ addr = *entry;
++ spin_unlock_bh(&pernet->lock);
++
++ mptcp_nl_addr_backup(net, &addr.addr, bkup);
+ return 0;
+ }
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index d073b21113828..82b1583f709d3 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -51,7 +51,7 @@ enum {
+ static struct percpu_counter mptcp_sockets_allocated;
+
+ static void __mptcp_destroy_sock(struct sock *sk);
+-static void __mptcp_check_send_data_fin(struct sock *sk);
++static void mptcp_check_send_data_fin(struct sock *sk);
+
+ DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+ static struct net_device mptcp_napi_dev;
+@@ -355,8 +355,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
+ {
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+- return !__mptcp_check_fallback(msk) &&
+- ((1 << sk->sk_state) &
++ return ((1 << sk->sk_state) &
+ (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
+ msk->write_seq == READ_ONCE(msk->snd_una);
+ }
+@@ -406,9 +405,12 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
+ static void mptcp_set_datafin_timeout(const struct sock *sk)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
++ u32 retransmits;
+
+- mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
+- TCP_RTO_MIN << icsk->icsk_retransmits);
++ retransmits = min_t(u32, icsk->icsk_retransmits,
++ ilog2(TCP_RTO_MAX / TCP_RTO_MIN));
++
++ mptcp_sk(sk)->timer_ival = TCP_RTO_MIN << retransmits;
+ }
+
+ static void __mptcp_set_timeout(struct sock *sk, long tout)
+@@ -506,9 +508,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
+ u64 rcv_data_fin_seq;
+ bool ret = false;
+
+- if (__mptcp_check_fallback(msk))
+- return ret;
+-
+ /* Need to ack a DATA_FIN received from a peer while this side
+ * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
+ * msk->rcv_data_fin was set when parsing the incoming options
+@@ -546,7 +545,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
+ }
+
+ ret = true;
+- mptcp_send_ack(msk);
++ if (!__mptcp_check_fallback(msk))
++ mptcp_send_ack(msk);
+ mptcp_close_wake_up(sk);
+ }
+ return ret;
+@@ -1221,6 +1221,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
+ if (likely(__mptcp_add_ext(skb, gfp))) {
+ skb_reserve(skb, MAX_TCP_HEADER);
+ skb->reserved_tailroom = skb->end - skb->tail;
++ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
+ return skb;
+ }
+ __kfree_skb(skb);
+@@ -1230,31 +1231,24 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
+ return NULL;
+ }
+
+-static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
++static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
+ {
+ struct sk_buff *skb;
+
+- if (ssk->sk_tx_skb_cache) {
+- skb = ssk->sk_tx_skb_cache;
+- if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) &&
+- !__mptcp_add_ext(skb, gfp)))
+- return false;
+- return true;
+- }
+-
+ skb = __mptcp_do_alloc_tx_skb(sk, gfp);
+ if (!skb)
+- return false;
++ return NULL;
+
+ if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
+- ssk->sk_tx_skb_cache = skb;
+- return true;
++ tcp_skb_entail(ssk, skb);
++ return skb;
+ }
++ tcp_skb_tsorted_anchor_cleanup(skb);
+ kfree_skb(skb);
+- return false;
++ return NULL;
+ }
+
+-static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
++static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
+ {
+ gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
+
+@@ -1284,23 +1278,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
+ struct mptcp_sendmsg_info *info)
+ {
+ u64 data_seq = dfrag->data_seq + info->sent;
++ int offset = dfrag->offset + info->sent;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ bool zero_window_probe = false;
+ struct mptcp_ext *mpext = NULL;
+- struct sk_buff *skb, *tail;
+- bool must_collapse = false;
+- int size_bias = 0;
+- int avail_size;
+- size_t ret = 0;
++ bool can_coalesce = false;
++ bool reuse_skb = true;
++ struct sk_buff *skb;
++ size_t copy;
++ int i;
+
+ pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u",
+ msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent);
+
++ if (WARN_ON_ONCE(info->sent > info->limit ||
++ info->limit > dfrag->data_len))
++ return 0;
++
+ /* compute send limit */
+ info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
+- avail_size = info->size_goal;
++ copy = info->size_goal;
++
+ skb = tcp_write_queue_tail(ssk);
+- if (skb) {
++ if (skb && copy > skb->len) {
+ /* Limit the write to the size available in the
+ * current skb, if any, so that we create at most a new skb.
+ * Explicitly tells TCP internals to avoid collapsing on later
+@@ -1313,62 +1313,80 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
+ goto alloc_skb;
+ }
+
+- must_collapse = (info->size_goal > skb->len) &&
+- (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
+- if (must_collapse) {
+- size_bias = skb->len;
+- avail_size = info->size_goal - skb->len;
++ i = skb_shinfo(skb)->nr_frags;
++ can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
++ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
++ tcp_mark_push(tcp_sk(ssk), skb);
++ goto alloc_skb;
+ }
+- }
+
++ copy -= skb->len;
++ } else {
+ alloc_skb:
+- if (!must_collapse &&
+- !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
+- return 0;
++ skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held);
++ if (!skb)
++ return -ENOMEM;
++
++ i = skb_shinfo(skb)->nr_frags;
++ reuse_skb = false;
++ mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
++ }
+
+ /* Zero window and all data acked? Probe. */
+- avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size);
+- if (avail_size == 0) {
++ copy = mptcp_check_allowed_size(msk, data_seq, copy);
++ if (copy == 0) {
+ u64 snd_una = READ_ONCE(msk->snd_una);
+
+- if (skb || snd_una != msk->snd_nxt)
++ if (snd_una != msk->snd_nxt) {
++ tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
+ return 0;
++ }
++
+ zero_window_probe = true;
+ data_seq = snd_una - 1;
+- avail_size = 1;
+- }
++ copy = 1;
+
+- if (WARN_ON_ONCE(info->sent > info->limit ||
+- info->limit > dfrag->data_len))
+- return 0;
++ /* all mptcp-level data is acked, no skbs should be present into the
++ * ssk write queue
++ */
++ WARN_ON_ONCE(reuse_skb);
++ }
+
+- ret = info->limit - info->sent;
+- tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags,
+- dfrag->page, dfrag->offset + info->sent, &ret);
+- if (!tail) {
+- tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk));
++ copy = min_t(size_t, copy, info->limit - info->sent);
++ if (!sk_wmem_schedule(ssk, copy)) {
++ tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
+ return -ENOMEM;
+ }
+
+- /* if the tail skb is still the cached one, collapsing really happened.
+- */
+- if (skb == tail) {
+- TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
+- mpext->data_len += ret;
++ if (can_coalesce) {
++ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
++ } else {
++ get_page(dfrag->page);
++ skb_fill_page_desc(skb, i, dfrag->page, offset, copy);
++ }
++
++ skb->len += copy;
++ skb->data_len += copy;
++ skb->truesize += copy;
++ sk_wmem_queued_add(ssk, copy);
++ sk_mem_charge(ssk, copy);
++ skb->ip_summed = CHECKSUM_PARTIAL;
++ WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy);
++ TCP_SKB_CB(skb)->end_seq += copy;
++ tcp_skb_pcount_set(skb, 0);
++
++ /* on skb reuse we just need to update the DSS len */
++ if (reuse_skb) {
++ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
++ mpext->data_len += copy;
+ WARN_ON_ONCE(zero_window_probe);
+ goto out;
+ }
+
+- mpext = skb_ext_find(tail, SKB_EXT_MPTCP);
+- if (WARN_ON_ONCE(!mpext)) {
+- /* should never reach here, stream corrupted */
+- return -EINVAL;
+- }
+-
+ memset(mpext, 0, sizeof(*mpext));
+ mpext->data_seq = data_seq;
+ mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
+- mpext->data_len = ret;
++ mpext->data_len = copy;
+ mpext->use_map = 1;
+ mpext->dsn64 = 1;
+
+@@ -1377,18 +1395,18 @@ alloc_skb:
+ mpext->dsn64);
+
+ if (zero_window_probe) {
+- mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
++ mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
+ mpext->frozen = 1;
+ if (READ_ONCE(msk->csum_enabled))
+- mptcp_update_data_checksum(tail, ret);
++ mptcp_update_data_checksum(skb, copy);
+ tcp_push_pending_frames(ssk);
+ return 0;
+ }
+ out:
+ if (READ_ONCE(msk->csum_enabled))
+- mptcp_update_data_checksum(tail, ret);
+- mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
+- return ret;
++ mptcp_update_data_checksum(skb, copy);
++ mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
++ return copy;
+ }
+
+ #define MPTCP_SEND_BURST_SIZE ((1 << 16) - \
+@@ -1505,6 +1523,32 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
+ release_sock(ssk);
+ }
+
++static void mptcp_update_post_push(struct mptcp_sock *msk,
++ struct mptcp_data_frag *dfrag,
++ u32 sent)
++{
++ u64 snd_nxt_new = dfrag->data_seq;
++
++ dfrag->already_sent += sent;
++
++ msk->snd_burst -= sent;
++ msk->tx_pending_data -= sent;
++
++ snd_nxt_new += dfrag->already_sent;
++
++ /* snd_nxt_new can be smaller than snd_nxt in case mptcp
++ * is recovering after a failover. In that event, this re-sends
++ * old segments.
++ *
++ * Thus compute snd_nxt_new candidate based on
++ * the dfrag->data_seq that was sent and the data
++ * that has been handed to the subflow for transmission
++ * and skip update in case it was old dfrag.
++ */
++ if (likely(after64(snd_nxt_new, msk->snd_nxt)))
++ msk->snd_nxt = snd_nxt_new;
++}
++
+ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+ {
+ struct sock *prev_ssk = NULL, *ssk = NULL;
+@@ -1523,7 +1567,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+ int ret = 0;
+
+ prev_ssk = ssk;
+- mptcp_flush_join_list(msk);
++ __mptcp_flush_join_list(msk);
+ ssk = mptcp_subflow_get_send(msk);
+
+ /* First check. If the ssk has changed since
+@@ -1548,12 +1592,10 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+ }
+
+ info.sent += ret;
+- dfrag->already_sent += ret;
+- msk->snd_nxt += ret;
+- msk->snd_burst -= ret;
+- msk->tx_pending_data -= ret;
+ copied += ret;
+ len -= ret;
++
++ mptcp_update_post_push(msk, dfrag, ret);
+ }
+ WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ }
+@@ -1567,7 +1609,7 @@ out:
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
+ if (copied)
+- __mptcp_check_send_data_fin(sk);
++ mptcp_check_send_data_fin(sk);
+ }
+
+ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
+@@ -1597,7 +1639,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
+ if (!xmit_ssk)
+ goto out;
+ if (xmit_ssk != ssk) {
+- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
++ mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
++ MPTCP_DELEGATE_SEND);
+ goto out;
+ }
+
+@@ -1606,13 +1649,11 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
+ goto out;
+
+ info.sent += ret;
+- dfrag->already_sent += ret;
+- msk->snd_nxt += ret;
+- msk->snd_burst -= ret;
+- msk->tx_pending_data -= ret;
+ copied += ret;
+ len -= ret;
+ first = false;
++
++ mptcp_update_post_push(msk, dfrag, ret);
+ }
+ WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ }
+@@ -1855,7 +1896,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+ if (msk->rcvq_space.copied <= msk->rcvq_space.space)
+ goto new_measure;
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
+@@ -1873,7 +1914,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+
+ do_div(rcvwin, advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+
+ if (rcvbuf > sk->sk_rcvbuf) {
+ u32 window_clamp;
+@@ -2183,15 +2224,12 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
+ return false;
+ }
+
+- /* will accept ack for reijected data before re-sending them */
+- if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
+- msk->recovery_snd_nxt = msk->snd_nxt;
++ msk->recovery_snd_nxt = msk->snd_nxt;
+ msk->recovery = true;
+ mptcp_data_unlock(sk);
+
+ msk->first_pending = rtx_head;
+ msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
+- msk->snd_nxt = rtx_head->data_seq;
+ msk->snd_burst = 0;
+
+ /* be sure to clear the "sent status" on all re-injected fragments */
+@@ -2407,10 +2445,9 @@ static void mptcp_worker(struct work_struct *work)
+
+ lock_sock(sk);
+ state = sk->sk_state;
+- if (unlikely(state == TCP_CLOSE))
++ if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ goto unlock;
+
+- mptcp_check_data_fin_ack(sk);
+ mptcp_flush_join_list(msk);
+
+ mptcp_check_fastclose(msk);
+@@ -2421,7 +2458,8 @@ static void mptcp_worker(struct work_struct *work)
+ if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+ mptcp_check_for_eof(msk);
+
+- __mptcp_check_send_data_fin(sk);
++ mptcp_check_send_data_fin(sk);
++ mptcp_check_data_fin_ack(sk);
+ mptcp_check_data_fin(sk);
+
+ /* There is no point in keeping around an orphaned sk timedout or
+@@ -2509,8 +2547,8 @@ static int mptcp_init_sock(struct sock *sk)
+ icsk->icsk_ca_ops = NULL;
+
+ sk_sockets_allocated_inc(sk);
+- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
++ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+
+ return 0;
+ }
+@@ -2550,6 +2588,12 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
+ pr_debug("Fallback");
+ ssk->sk_shutdown |= how;
+ tcp_shutdown(ssk, how);
++
++ /* simulate the data_fin ack reception to let the state
++ * machine move forward
++ */
++ WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
++ mptcp_schedule_work(sk);
+ } else {
+ pr_debug("Sending DATA_FIN on subflow %p", ssk);
+ tcp_send_ack(ssk);
+@@ -2589,7 +2633,7 @@ static int mptcp_close_state(struct sock *sk)
+ return next & TCP_ACTION_FIN;
+ }
+
+-static void __mptcp_check_send_data_fin(struct sock *sk)
++static void mptcp_check_send_data_fin(struct sock *sk)
+ {
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+@@ -2607,18 +2651,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
+
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+
+- /* fallback socket will not get data_fin/ack, can move to the next
+- * state now
+- */
+- if (__mptcp_check_fallback(msk)) {
+- if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
+- inet_sk_state_store(sk, TCP_CLOSE);
+- mptcp_close_wake_up(sk);
+- } else if (sk->sk_state == TCP_FIN_WAIT1) {
+- inet_sk_state_store(sk, TCP_FIN_WAIT2);
+- }
+- }
+-
+ mptcp_flush_join_list(msk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+@@ -2639,7 +2671,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
+ WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
+ WRITE_ONCE(msk->snd_data_fin_enable, 1);
+
+- __mptcp_check_send_data_fin(sk);
++ mptcp_check_send_data_fin(sk);
+ }
+
+ static void __mptcp_destroy_sock(struct sock *sk)
+@@ -2685,6 +2717,7 @@ static void mptcp_close(struct sock *sk, long timeout)
+ {
+ struct mptcp_subflow_context *subflow;
+ bool do_cancel_work = false;
++ int subflows_alive = 0;
+
+ lock_sock(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+@@ -2706,11 +2739,19 @@ cleanup:
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow = lock_sock_fast_nested(ssk);
+
++ subflows_alive += ssk->sk_state != TCP_CLOSE;
++
+ sock_orphan(ssk);
+ unlock_sock_fast(ssk, slow);
+ }
+ sock_orphan(sk);
+
++ /* all the subflows are closed, only timeout can change the msk
++ * state, let's not keep resources busy for no reasons
++ */
++ if (subflows_alive == 0)
++ inet_sk_state_store(sk, TCP_CLOSE);
++
+ sock_hold(sk);
+ pr_debug("msk=%p state=%d", sk, sk->sk_state);
+ if (sk->sk_state == TCP_CLOSE) {
+@@ -2757,6 +2798,12 @@ static int mptcp_disconnect(struct sock *sk, int flags)
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
++ /* Deny disconnect if other threads are blocked in sk_wait_event()
++ * or inet_wait_for_connect().
++ */
++ if (sk->sk_wait_pending)
++ return -EBUSY;
++
+ mptcp_do_flush_join_list(msk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+@@ -2795,6 +2842,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
+ inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
+ #endif
+
++ nsk->sk_wait_pending = 0;
+ __mptcp_init_sock(nsk);
+
+ msk = mptcp_sk(nsk);
+@@ -2881,7 +2929,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+ */
+ if (WARN_ON_ONCE(!new_mptcp_sock)) {
+ tcp_sk(newsk)->is_mptcp = 0;
+- return newsk;
++ goto out;
+ }
+
+ /* acquire the 2nd reference for the owning socket */
+@@ -2893,6 +2941,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+ }
+
++out:
++ newsk->sk_kern_sock = kern;
+ return newsk;
+ }
+
+@@ -2940,7 +2990,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
+ if (xmit_ssk == ssk)
+ __mptcp_subflow_push_pending(sk, ssk);
+ else if (xmit_ssk)
+- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
++ mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
+ } else {
+ set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ }
+@@ -2994,18 +3044,50 @@ static void mptcp_release_cb(struct sock *sk)
+ __mptcp_update_rmem(sk);
+ }
+
++/* MP_JOIN client subflow must wait for 4th ack before sending any data:
++ * TCP can't schedule delack timer before the subflow is fully established.
++ * MPTCP uses the delack timer to do 3rd ack retransmissions
++ */
++static void schedule_3rdack_retransmission(struct sock *ssk)
++{
++ struct inet_connection_sock *icsk = inet_csk(ssk);
++ struct tcp_sock *tp = tcp_sk(ssk);
++ unsigned long timeout;
++
++ if (mptcp_subflow_ctx(ssk)->fully_established)
++ return;
++
++ /* reschedule with a timeout above RTT, as we must look only for drop */
++ if (tp->srtt_us)
++ timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
++ else
++ timeout = TCP_TIMEOUT_INIT;
++ timeout += jiffies;
++
++ WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
++ icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
++ icsk->icsk_ack.timeout = timeout;
++ sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
++}
++
+ void mptcp_subflow_process_delegated(struct sock *ssk)
+ {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+
+- mptcp_data_lock(sk);
+- if (!sock_owned_by_user(sk))
+- __mptcp_subflow_push_pending(sk, ssk);
+- else
+- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+- mptcp_data_unlock(sk);
+- mptcp_subflow_delegated_done(subflow);
++ if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
++ mptcp_data_lock(sk);
++ if (!sock_owned_by_user(sk))
++ __mptcp_subflow_push_pending(sk, ssk);
++ else
++ set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
++ mptcp_data_unlock(sk);
++ mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
++ }
++ if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
++ schedule_3rdack_retransmission(ssk);
++ mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
++ }
+ }
+
+ static int mptcp_hash(struct sock *sk)
+@@ -3537,12 +3619,6 @@ static const struct proto_ops mptcp_v6_stream_ops = {
+
+ static struct proto mptcp_v6_prot;
+
+-static void mptcp_v6_destroy(struct sock *sk)
+-{
+- mptcp_destroy(sk);
+- inet6_destroy_sock(sk);
+-}
+-
+ static struct inet_protosw mptcp_v6_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+@@ -3558,7 +3634,6 @@ int __init mptcp_proto_v6_init(void)
+ mptcp_v6_prot = mptcp_prot;
+ strcpy(mptcp_v6_prot.name, "MPTCPv6");
+ mptcp_v6_prot.slab = NULL;
+- mptcp_v6_prot.destroy = mptcp_v6_destroy;
+ mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
+
+ err = proto_register(&mptcp_v6_prot, 1);
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index dc984676c5eb1..e193b710b471a 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -401,6 +401,7 @@ struct mptcp_delegated_action {
+ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+
+ #define MPTCP_DELEGATE_SEND 0
++#define MPTCP_DELEGATE_ACK 1
+
+ /* MPTCP subflow context */
+ struct mptcp_subflow_context {
+@@ -435,7 +436,8 @@ struct mptcp_subflow_context {
+ rx_eof : 1,
+ can_ack : 1, /* only after processing the remote a key */
+ disposable : 1, /* ctx can be free at ulp release time */
+- stale : 1; /* unable to snd/rcv data, do not use for xmit */
++ stale : 1, /* unable to snd/rcv data, do not use for xmit */
++ valid_csum_seen : 1; /* at least one csum validated */
+ enum mptcp_data_avail data_avail;
+ u32 remote_nonce;
+ u64 thmac;
+@@ -506,23 +508,23 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
+
+ void mptcp_subflow_process_delegated(struct sock *ssk);
+
+-static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
++static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
+ {
+ struct mptcp_delegated_action *delegated;
+ bool schedule;
+
++ /* the caller held the subflow bh socket lock */
++ lockdep_assert_in_softirq();
++
+ /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
+ * ensures the below list check sees list updates done prior to status
+ * bit changes
+ */
+- if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
++ if (!test_and_set_bit(action, &subflow->delegated_status)) {
+ /* still on delegated list from previous scheduling */
+ if (!list_empty(&subflow->delegated_node))
+ return;
+
+- /* the caller held the subflow bh socket lock */
+- lockdep_assert_in_softirq();
+-
+ delegated = this_cpu_ptr(&mptcp_delegated_actions);
+ schedule = list_empty(&delegated->head);
+ list_add_tail(&subflow->delegated_node, &delegated->head);
+@@ -547,16 +549,16 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
+
+ static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
+ {
+- return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
++ return !!READ_ONCE(subflow->delegated_status);
+ }
+
+-static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
++static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
+ {
+ /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
+ * touching the status bit
+ */
+ smp_wmb();
+- clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
++ clear_bit(action, &subflow->delegated_status);
+ }
+
+ int mptcp_is_enabled(const struct net *net);
+@@ -717,6 +719,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
+ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
+
+ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
++__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
+
+ void __init mptcp_pm_init(void);
+ void mptcp_pm_data_init(struct mptcp_sock *msk);
+diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
+index 8c03afac5ca03..4bb305342fcc7 100644
+--- a/net/mptcp/sockopt.c
++++ b/net/mptcp/sockopt.c
+@@ -523,7 +523,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
+ case TCP_NODELAY:
+ case TCP_THIN_LINEAR_TIMEOUTS:
+ case TCP_CONGESTION:
+- case TCP_ULP:
+ case TCP_CORK:
+ case TCP_KEEPIDLE:
+ case TCP_KEEPINTVL:
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 6172f380dfb76..666f6720db765 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -45,7 +45,6 @@ static void subflow_req_destructor(struct request_sock *req)
+ sock_put((struct sock *)subflow_req->msk);
+
+ mptcp_token_destroy_request(req);
+- tcp_request_sock_ops.destructor(req);
+ }
+
+ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
+@@ -359,12 +358,10 @@ void mptcp_subflow_reset(struct sock *ssk)
+ /* must hold: tcp_done() could drop last reference on parent */
+ sock_hold(sk);
+
+- tcp_set_state(ssk, TCP_CLOSE);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ tcp_done(ssk);
+- if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+- schedule_work(&mptcp_sk(sk)->work))
+- return; /* worker will put sk for us */
++ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
++ mptcp_schedule_work(sk);
+
+ sock_put(sk);
+ }
+@@ -483,9 +480,8 @@ do_reset:
+ mptcp_subflow_reset(sk);
+ }
+
+-struct request_sock_ops mptcp_subflow_request_sock_ops;
+-EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
+-static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
++static struct request_sock_ops mptcp_subflow_v4_request_sock_ops __ro_after_init;
++static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
+
+ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+ {
+@@ -497,7 +493,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ goto drop;
+
+- return tcp_conn_request(&mptcp_subflow_request_sock_ops,
++ return tcp_conn_request(&mptcp_subflow_v4_request_sock_ops,
+ &subflow_request_sock_ipv4_ops,
+ sk, skb);
+ drop:
+@@ -505,11 +501,18 @@ drop:
+ return 0;
+ }
+
++static void subflow_v4_req_destructor(struct request_sock *req)
++{
++ subflow_req_destructor(req);
++ tcp_request_sock_ops.destructor(req);
++}
++
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+-static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
+-static struct inet_connection_sock_af_ops subflow_v6_specific;
+-static struct inet_connection_sock_af_ops subflow_v6m_specific;
+-static struct proto tcpv6_prot_override;
++static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init;
++static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
++static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
++static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
++static struct proto tcpv6_prot_override __ro_after_init;
+
+ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+ {
+@@ -528,15 +531,36 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+ return 0;
+ }
+
+- return tcp_conn_request(&mptcp_subflow_request_sock_ops,
++ return tcp_conn_request(&mptcp_subflow_v6_request_sock_ops,
+ &subflow_request_sock_ipv6_ops, sk, skb);
+
+ drop:
+ tcp_listendrop(sk);
+ return 0; /* don't send reset */
+ }
++
++static void subflow_v6_req_destructor(struct request_sock *req)
++{
++ subflow_req_destructor(req);
++ tcp6_request_sock_ops.destructor(req);
++}
++#endif
++
++struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
++ struct sock *sk_listener,
++ bool attach_listener)
++{
++ if (ops->family == AF_INET)
++ ops = &mptcp_subflow_v4_request_sock_ops;
++#if IS_ENABLED(CONFIG_MPTCP_IPV6)
++ else if (ops->family == AF_INET6)
++ ops = &mptcp_subflow_v6_request_sock_ops;
+ #endif
+
++ return inet_reqsk_alloc(ops, sk_listener, attach_listener);
++}
++EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc);
++
+ /* validate hmac received in third ACK */
+ static bool subflow_hmac_valid(const struct request_sock *req,
+ const struct mptcp_options_received *mp_opt)
+@@ -790,8 +814,8 @@ dispose_child:
+ return child;
+ }
+
+-static struct inet_connection_sock_af_ops subflow_specific;
+-static struct proto tcp_prot_override;
++static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
++static struct proto tcp_prot_override __ro_after_init;
+
+ enum mapping_status {
+ MAPPING_OK,
+@@ -845,9 +869,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
+ bool csum_reqd)
+ {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+- struct csum_pseudo_header header;
+ u32 offset, seq, delta;
+- __wsum csum;
++ __sum16 csum;
+ int len;
+
+ if (!csum_reqd)
+@@ -908,19 +931,20 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
+ * while the pseudo header requires the original DSS data len,
+ * including that
+ */
+- header.data_seq = cpu_to_be64(subflow->map_seq);
+- header.subflow_seq = htonl(subflow->map_subflow_seq);
+- header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
+- header.csum = 0;
+-
+- csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
+- if (unlikely(csum_fold(csum))) {
++ csum = __mptcp_make_csum(subflow->map_seq,
++ subflow->map_subflow_seq,
++ subflow->map_data_len + subflow->map_data_fin,
++ subflow->map_data_csum);
++ if (unlikely(csum)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+- subflow->send_mp_fail = 1;
+- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
++ if (subflow->mp_join || subflow->valid_csum_seen) {
++ subflow->send_mp_fail = 1;
++ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
++ }
+ return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ }
+
++ subflow->valid_csum_seen = 1;
+ return MAPPING_OK;
+ }
+
+@@ -985,8 +1009,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
+ skb_ext_del(skb, SKB_EXT_MPTCP);
+ return MAPPING_OK;
+ } else {
+- if (updated && schedule_work(&msk->work))
+- sock_hold((struct sock *)msk);
++ if (updated)
++ mptcp_schedule_work((struct sock *)msk);
+
+ return MAPPING_DATA_FIN;
+ }
+@@ -1089,17 +1113,24 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
+ /* sched mptcp worker to remove the subflow if no more data is pending */
+ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
+ {
+- struct sock *sk = (struct sock *)msk;
+-
+ if (likely(ssk->sk_state != TCP_CLOSE))
+ return;
+
+ if (skb_queue_empty(&ssk->sk_receive_queue) &&
+- !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
+- sock_hold(sk);
+- if (!schedule_work(&msk->work))
+- sock_put(sk);
+- }
++ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
++ mptcp_schedule_work((struct sock *)msk);
++}
++
++static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
++{
++ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
++
++ if (subflow->mp_join)
++ return false;
++ else if (READ_ONCE(msk->csum_enabled))
++ return !subflow->valid_csum_seen;
++ else
++ return !subflow->fully_established;
+ }
+
+ static bool subflow_check_data_avail(struct sock *ssk)
+@@ -1179,7 +1210,7 @@ fallback:
+ return true;
+ }
+
+- if (subflow->mp_join || subflow->fully_established) {
++ if (!subflow_can_fallback(subflow)) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+@@ -1246,6 +1277,7 @@ void __mptcp_error_report(struct sock *sk)
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err = sock_error(ssk);
++ int ssk_state;
+
+ if (!err)
+ continue;
+@@ -1256,7 +1288,14 @@ void __mptcp_error_report(struct sock *sk)
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+ continue;
+
+- inet_sk_state_store(sk, inet_sk_state_load(ssk));
++ /* We need to propagate only transition to CLOSE state.
++ * Orphaned socket will see such state change via
++ * subflow_sched_work_if_closed() and that path will properly
++ * destroy the msk as needed.
++ */
++ ssk_state = inet_sk_state_load(ssk);
++ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
++ inet_sk_state_store(sk, ssk_state);
+ sk->sk_err = -err;
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+@@ -1270,6 +1309,13 @@ static void subflow_error_report(struct sock *ssk)
+ {
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
++ /* bail early if this is a no-op, so that we avoid introducing a
++ * problematic lockdep dependency between TCP accept queue lock
++ * and msk socket spinlock
++ */
++ if (!sk->sk_socket)
++ return;
++
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_error_report(sk);
+@@ -1315,7 +1361,7 @@ static void subflow_write_space(struct sock *ssk)
+ mptcp_write_space(sk);
+ }
+
+-static struct inet_connection_sock_af_ops *
++static const struct inet_connection_sock_af_ops *
+ subflow_default_af_ops(struct sock *sk)
+ {
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+@@ -1330,7 +1376,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
+ {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+- struct inet_connection_sock_af_ops *target;
++ const struct inet_connection_sock_af_ops *target;
+
+ target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
+
+@@ -1524,7 +1570,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
+ if (err)
+ return err;
+
+- lock_sock(sf->sk);
++ lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
+
+ /* the newly created socket has to be in the same cgroup as its parent */
+ mptcp_attach_cgroup(sk, sf->sk);
+@@ -1607,14 +1653,16 @@ static void subflow_state_change(struct sock *sk)
+ {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct sock *parent = subflow->conn;
++ struct mptcp_sock *msk;
+
+ __subflow_state_change(sk);
+
++ msk = mptcp_sk(parent);
+ if (subflow_simultaneous_connect(sk)) {
+ mptcp_propagate_sndbuf(parent, sk);
+ mptcp_do_fallback(sk);
+- mptcp_rcv_space_init(mptcp_sk(parent), sk);
+- pr_fallback(mptcp_sk(parent));
++ mptcp_rcv_space_init(msk, sk);
++ pr_fallback(msk);
+ subflow->conn_finished = 1;
+ mptcp_set_connected(parent);
+ }
+@@ -1630,11 +1678,12 @@ static void subflow_state_change(struct sock *sk)
+
+ subflow_sched_work_if_closed(mptcp_sk(parent), sk);
+
+- if (__mptcp_check_fallback(mptcp_sk(parent)) &&
+- !subflow->rx_eof && subflow_is_done(sk)) {
+- subflow->rx_eof = 1;
+- mptcp_subflow_eof(parent);
+- }
++ /* when the fallback subflow closes the rx side, trigger a 'dummy'
++ * ingress data fin, so that the msk state will follow along
++ */
++ if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk &&
++ mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
++ mptcp_schedule_work(parent);
+ }
+
+ static int subflow_ulp_init(struct sock *sk)
+@@ -1770,7 +1819,6 @@ static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
+ static int subflow_ops_init(struct request_sock_ops *subflow_ops)
+ {
+ subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
+- subflow_ops->slab_name = "request_sock_subflow";
+
+ subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
+ subflow_ops->obj_size, 0,
+@@ -1780,16 +1828,17 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops)
+ if (!subflow_ops->slab)
+ return -ENOMEM;
+
+- subflow_ops->destructor = subflow_req_destructor;
+-
+ return 0;
+ }
+
+ void __init mptcp_subflow_init(void)
+ {
+- mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
+- if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
+- panic("MPTCP: failed to init subflow request sock ops\n");
++ mptcp_subflow_v4_request_sock_ops = tcp_request_sock_ops;
++ mptcp_subflow_v4_request_sock_ops.slab_name = "request_sock_subflow_v4";
++ mptcp_subflow_v4_request_sock_ops.destructor = subflow_v4_req_destructor;
++
++ if (subflow_ops_init(&mptcp_subflow_v4_request_sock_ops) != 0)
++ panic("MPTCP: failed to init subflow v4 request sock ops\n");
+
+ subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
+ subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
+@@ -1803,6 +1852,20 @@ void __init mptcp_subflow_init(void)
+ tcp_prot_override.release_cb = tcp_release_cb_override;
+
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
++ /* In struct mptcp_subflow_request_sock, we assume the TCP request sock
++ * structures for v4 and v6 have the same size. It should not changed in
++ * the future but better to make sure to be warned if it is no longer
++ * the case.
++ */
++ BUILD_BUG_ON(sizeof(struct tcp_request_sock) != sizeof(struct tcp6_request_sock));
++
++ mptcp_subflow_v6_request_sock_ops = tcp6_request_sock_ops;
++ mptcp_subflow_v6_request_sock_ops.slab_name = "request_sock_subflow_v6";
++ mptcp_subflow_v6_request_sock_ops.destructor = subflow_v6_req_destructor;
++
++ if (subflow_ops_init(&mptcp_subflow_v6_request_sock_ops) != 0)
++ panic("MPTCP: failed to init subflow v6 request sock ops\n");
++
+ subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
+ subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
+
+diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
+index b635c194f0a85..62fb1031763d1 100644
+--- a/net/ncsi/ncsi-aen.c
++++ b/net/ncsi/ncsi-aen.c
+@@ -165,6 +165,7 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp,
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
++ nc->modes[NCSI_MODE_TX_ENABLE].enable = 0;
+
+ return ncsi_process_next_channel(ndp);
+ }
+diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
+index ba9ae482141b0..dda8b76b77988 100644
+--- a/net/ncsi/ncsi-cmd.c
++++ b/net/ncsi/ncsi-cmd.c
+@@ -18,6 +18,8 @@
+ #include "internal.h"
+ #include "ncsi-pkt.h"
+
++static const int padding_bytes = 26;
++
+ u32 ncsi_calculate_checksum(unsigned char *data, int len)
+ {
+ u32 checksum = 0;
+@@ -213,12 +215,17 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb,
+ {
+ struct ncsi_cmd_oem_pkt *cmd;
+ unsigned int len;
++ int payload;
++ /* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2
++ * requires payload to be padded with 0 to
++ * 32-bit boundary before the checksum field.
++ * Ensure the padding bytes are accounted for in
++ * skb allocation
++ */
+
++ payload = ALIGN(nca->payload, 4);
+ len = sizeof(struct ncsi_cmd_pkt_hdr) + 4;
+- if (nca->payload < 26)
+- len += 26;
+- else
+- len += nca->payload;
++ len += max(payload, padding_bytes);
+
+ cmd = skb_put_zero(skb, len);
+ memcpy(&cmd->mfr_id, nca->data, nca->payload);
+@@ -272,6 +279,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
+ struct net_device *dev = nd->dev;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
++ int payload;
+ int len = hlen + tlen;
+ struct sk_buff *skb;
+ struct ncsi_request *nr;
+@@ -281,14 +289,14 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
+ return NULL;
+
+ /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum.
++ * Payload needs padding so that the checksum field following payload is
++ * aligned to 32-bit boundary.
+ * The packet needs padding if its payload is less than 26 bytes to
+ * meet 64 bytes minimal ethernet frame length.
+ */
+ len += sizeof(struct ncsi_cmd_pkt_hdr) + 4;
+- if (nca->payload < 26)
+- len += 26;
+- else
+- len += nca->payload;
++ payload = ALIGN(nca->payload, 4);
++ len += max(payload, padding_bytes);
+
+ /* Allocate skb */
+ skb = alloc_skb(len, GFP_ATOMIC);
+diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
+index bb5f1650f11cb..c189b4c8a1823 100644
+--- a/net/ncsi/ncsi-netlink.c
++++ b/net/ncsi/ncsi-netlink.c
+@@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb,
+ pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR);
+ if (!pnest)
+ return -ENOMEM;
+- nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
++ rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
++ if (rc) {
++ nla_nest_cancel(skb, pnest);
++ return rc;
++ }
+ if ((0x1 << np->id) == ndp->package_whitelist)
+ nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
+ cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
+index 6447a09932f55..069c2659074bc 100644
+--- a/net/ncsi/ncsi-rsp.c
++++ b/net/ncsi/ncsi-rsp.c
+@@ -611,14 +611,14 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
+ return 0;
+ }
+
+-/* Response handler for Mellanox command Get Mac Address */
+-static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr)
++/* Response handler for Get Mac Address command */
++static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id)
+ {
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+- const struct net_device_ops *ops = ndev->netdev_ops;
+ struct ncsi_rsp_oem_pkt *rsp;
+ struct sockaddr saddr;
++ u32 mac_addr_off = 0;
+ int ret = 0;
+
+ /* Get the response header */
+@@ -626,11 +626,25 @@ static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr)
+
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+- memcpy(saddr.sa_data, &rsp->data[MLX_MAC_ADDR_OFFSET], ETH_ALEN);
++ if (mfr_id == NCSI_OEM_MFR_BCM_ID)
++ mac_addr_off = BCM_MAC_ADDR_OFFSET;
++ else if (mfr_id == NCSI_OEM_MFR_MLX_ID)
++ mac_addr_off = MLX_MAC_ADDR_OFFSET;
++ else if (mfr_id == NCSI_OEM_MFR_INTEL_ID)
++ mac_addr_off = INTEL_MAC_ADDR_OFFSET;
++
++ memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN);
++ if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID)
++ eth_addr_inc((u8 *)saddr.sa_data);
++ if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
++ return -ENXIO;
++
+ /* Set the flag for GMA command which should only be called once */
+ ndp->gma_flag = 1;
+
+- ret = ops->ndo_set_mac_address(ndev, &saddr);
++ rtnl_lock();
++ ret = dev_set_mac_address(ndev, &saddr, NULL);
++ rtnl_unlock();
+ if (ret < 0)
+ netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
+
+@@ -649,41 +663,10 @@ static int ncsi_rsp_handler_oem_mlx(struct ncsi_request *nr)
+
+ if (mlx->cmd == NCSI_OEM_MLX_CMD_GMA &&
+ mlx->param == NCSI_OEM_MLX_CMD_GMA_PARAM)
+- return ncsi_rsp_handler_oem_mlx_gma(nr);
++ return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_MLX_ID);
+ return 0;
+ }
+
+-/* Response handler for Broadcom command Get Mac Address */
+-static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
+-{
+- struct ncsi_dev_priv *ndp = nr->ndp;
+- struct net_device *ndev = ndp->ndev.dev;
+- const struct net_device_ops *ops = ndev->netdev_ops;
+- struct ncsi_rsp_oem_pkt *rsp;
+- struct sockaddr saddr;
+- int ret = 0;
+-
+- /* Get the response header */
+- rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+-
+- saddr.sa_family = ndev->type;
+- ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+- memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
+- /* Increase mac address by 1 for BMC's address */
+- eth_addr_inc((u8 *)saddr.sa_data);
+- if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+- return -ENXIO;
+-
+- /* Set the flag for GMA command which should only be called once */
+- ndp->gma_flag = 1;
+-
+- ret = ops->ndo_set_mac_address(ndev, &saddr);
+- if (ret < 0)
+- netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
+-
+- return ret;
+-}
+-
+ /* Response handler for Broadcom card */
+ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
+ {
+@@ -695,42 +678,10 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
+ bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data);
+
+ if (bcm->type == NCSI_OEM_BCM_CMD_GMA)
+- return ncsi_rsp_handler_oem_bcm_gma(nr);
++ return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_BCM_ID);
+ return 0;
+ }
+
+-/* Response handler for Intel command Get Mac Address */
+-static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr)
+-{
+- struct ncsi_dev_priv *ndp = nr->ndp;
+- struct net_device *ndev = ndp->ndev.dev;
+- const struct net_device_ops *ops = ndev->netdev_ops;
+- struct ncsi_rsp_oem_pkt *rsp;
+- struct sockaddr saddr;
+- int ret = 0;
+-
+- /* Get the response header */
+- rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+-
+- saddr.sa_family = ndev->type;
+- ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+- memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN);
+- /* Increase mac address by 1 for BMC's address */
+- eth_addr_inc((u8 *)saddr.sa_data);
+- if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+- return -ENXIO;
+-
+- /* Set the flag for GMA command which should only be called once */
+- ndp->gma_flag = 1;
+-
+- ret = ops->ndo_set_mac_address(ndev, &saddr);
+- if (ret < 0)
+- netdev_warn(ndev,
+- "NCSI: 'Writing mac address to device failed\n");
+-
+- return ret;
+-}
+-
+ /* Response handler for Intel card */
+ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
+ {
+@@ -742,7 +693,7 @@ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
+ intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data);
+
+ if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA)
+- return ncsi_rsp_handler_oem_intel_gma(nr);
++ return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_INTEL_ID);
+
+ return 0;
+ }
+diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
+index 92a747896f808..4f645d51c2573 100644
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -133,7 +133,6 @@ config NF_CONNTRACK_ZONES
+
+ config NF_CONNTRACK_PROCFS
+ bool "Supply CT list in procfs (OBSOLETE)"
+- default y
+ depends on PROC_FS
+ help
+ This option enables for the list of known conntrack entries
+diff --git a/net/netfilter/core.c b/net/netfilter/core.c
+index 63d032191e626..ffa84cafb746b 100644
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -300,12 +300,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
+ return NULL;
+ return net->nf.hooks_ipv6 + hooknum;
+-#if IS_ENABLED(CONFIG_DECNET)
+- case NFPROTO_DECNET:
+- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
+- return NULL;
+- return net->nf.hooks_decnet + hooknum;
+-#endif
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+@@ -406,14 +400,15 @@ static int __nf_register_net_hook(struct net *net, int pf,
+ p = nf_entry_dereference(*pp);
+ new_hooks = nf_hook_entries_grow(p, reg);
+
+- if (!IS_ERR(new_hooks))
++ if (!IS_ERR(new_hooks)) {
++ hooks_validate(new_hooks);
+ rcu_assign_pointer(*pp, new_hooks);
++ }
+
+ mutex_unlock(&nf_hook_mutex);
+ if (IS_ERR(new_hooks))
+ return PTR_ERR(new_hooks);
+
+- hooks_validate(new_hooks);
+ #ifdef CONFIG_NETFILTER_INGRESS
+ if (nf_ingress_hook(reg, pf))
+ net_inc_ingress_queue();
+@@ -591,7 +586,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+ case NF_ACCEPT:
+ break;
+ case NF_DROP:
+- kfree_skb(skb);
++ kfree_skb_reason(skb,
++ SKB_DROP_REASON_NETFILTER_DROP);
+ ret = NF_DROP_GETERR(verdict);
+ if (ret == 0)
+ ret = -EPERM;
+@@ -673,9 +669,11 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
+
+ rcu_read_lock();
+ ct_hook = rcu_dereference(nf_ct_hook);
+- BUG_ON(ct_hook == NULL);
+- ct_hook->destroy(nfct);
++ if (ct_hook)
++ ct_hook->destroy(nfct);
+ rcu_read_unlock();
++
++ WARN_ON(!ct_hook);
+ }
+ EXPORT_SYMBOL(nf_conntrack_destroy);
+
+@@ -721,10 +719,6 @@ static int __net_init netfilter_net_init(struct net *net)
+ #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
+ #endif
+-#if IS_ENABLED(CONFIG_DECNET)
+- __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
+-#endif
+-
+ #ifdef CONFIG_PROC_FS
+ net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
+ net->proc_net);
+diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
+index a8ce04a4bb72a..e4fa00abde6a2 100644
+--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
++++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
+@@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+ return -IPSET_ERR_BITMAP_RANGE;
+
+ pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
+- hosts = 2 << (32 - netmask - 1);
+- elements = 2 << (netmask - mask_bits - 1);
++ hosts = 2U << (32 - netmask - 1);
++ elements = 2UL << (netmask - mask_bits - 1);
+ }
+ if (elements > IPSET_BITMAP_MAX_RANGE + 1)
+ return -IPSET_ERR_BITMAP_RANGE_SIZE;
+diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
+index 16ae92054baa8..c911fc09f363c 100644
+--- a/net/netfilter/ipset/ip_set_core.c
++++ b/net/netfilter/ipset/ip_set_core.c
+@@ -1694,13 +1694,22 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
+
+ do {
++ if (retried) {
++ __ip_set_get(set);
++ nfnl_unlock(NFNL_SUBSYS_IPSET);
++ cond_resched();
++ nfnl_lock(NFNL_SUBSYS_IPSET);
++ __ip_set_put(set);
++ }
++
+ ip_set_lock(set);
+ ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
+ ip_set_unlock(set);
+ retried = true;
+- } while (ret == -EAGAIN &&
+- set->variant->resize &&
+- (ret = set->variant->resize(set, retried)) == 0);
++ } while (ret == -ERANGE ||
++ (ret == -EAGAIN &&
++ set->variant->resize &&
++ (ret = set->variant->resize(set, retried)) == 0));
+
+ if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
+ return 0;
+diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
+index 6e391308431da..7499192af5866 100644
+--- a/net/netfilter/ipset/ip_set_hash_gen.h
++++ b/net/netfilter/ipset/ip_set_hash_gen.h
+@@ -42,31 +42,8 @@
+ #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE)
+ /* Max muber of elements in the array block when tuned */
+ #define AHASH_MAX_TUNED 64
+-
+ #define AHASH_MAX(h) ((h)->bucketsize)
+
+-/* Max number of elements can be tuned */
+-#ifdef IP_SET_HASH_WITH_MULTI
+-static u8
+-tune_bucketsize(u8 curr, u32 multi)
+-{
+- u32 n;
+-
+- if (multi < curr)
+- return curr;
+-
+- n = curr + AHASH_INIT_SIZE;
+- /* Currently, at listing one hash bucket must fit into a message.
+- * Therefore we have a hard limit here.
+- */
+- return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
+-}
+-#define TUNE_BUCKETSIZE(h, multi) \
+- ((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi))
+-#else
+-#define TUNE_BUCKETSIZE(h, multi)
+-#endif
+-
+ /* A hash bucket */
+ struct hbucket {
+ struct rcu_head rcu; /* for call_rcu */
+@@ -936,7 +913,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ goto set_full;
+ /* Create a new slot */
+ if (n->pos >= n->size) {
+- TUNE_BUCKETSIZE(h, multi);
++#ifdef IP_SET_HASH_WITH_MULTI
++ if (h->bucketsize >= AHASH_MAX_TUNED)
++ goto set_full;
++ else if (h->bucketsize <= multi)
++ h->bucketsize += AHASH_INIT_SIZE;
++#endif
+ if (n->size >= AHASH_MAX(h)) {
+ /* Trigger rehashing */
+ mtype_data_next(&h->next, d);
+diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
+index dd30c03d5a23f..24adcdd7a0b16 100644
+--- a/net/netfilter/ipset/ip_set_hash_ip.c
++++ b/net/netfilter/ipset/ip_set_hash_ip.c
+@@ -98,11 +98,11 @@ static int
+ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_ip4 *h = set->data;
++ struct hash_ip4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ip4_elem e = { 0 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+- u32 ip = 0, ip_to = 0, hosts;
++ u32 ip = 0, ip_to = 0, hosts, i = 0;
+ int ret = 0;
+
+ if (tb[IPSET_ATTR_LINENO])
+@@ -147,22 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+
+ hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+
+- /* 64bit division is not allowed on 32bit */
+- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+- return -ERANGE;
+-
+- if (retried) {
++ if (retried)
+ ip = ntohl(h->next.ip);
++ for (; ip <= ip_to; i++) {
+ e.ip = htonl(ip);
+- }
+- for (; ip <= ip_to;) {
++ if (i > IPSET_MAX_RANGE) {
++ hash_ip4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+
+ ip += hosts;
+- e.ip = htonl(ip);
+- if (e.ip == 0)
++ if (ip == 0)
+ return 0;
+
+ ret = 0;
+diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
+index 153de3457423e..a22ec1a6f6ec8 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
++++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
+@@ -97,11 +97,11 @@ static int
+ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_ipmark4 *h = set->data;
++ struct hash_ipmark4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+- u32 ip, ip_to = 0;
++ u32 ip, ip_to = 0, i = 0;
+ int ret;
+
+ if (tb[IPSET_ATTR_LINENO])
+@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+ ip_set_mask_from_to(ip, ip_to, cidr);
+ }
+
+- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+- return -ERANGE;
+-
+ if (retried)
+ ip = ntohl(h->next.ip);
+- for (; ip <= ip_to; ip++) {
++ for (; ip <= ip_to; ip++, i++) {
+ e.ip = htonl(ip);
++ if (i > IPSET_MAX_RANGE) {
++ hash_ipmark4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
+index 7303138e46be1..10481760a9b25 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipport.c
++++ b/net/netfilter/ipset/ip_set_hash_ipport.c
+@@ -105,11 +105,11 @@ static int
+ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_ipport4 *h = set->data;
++ struct hash_ipport4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipport4_elem e = { .ip = 0 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+- u32 ip, ip_to = 0, p = 0, port, port_to;
++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+ bool with_ports = false;
+ int ret;
+
+@@ -173,17 +173,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+ swap(port, port_to);
+ }
+
+- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+- return -ERANGE;
+-
+ if (retried)
+ ip = ntohl(h->next.ip);
+ for (; ip <= ip_to; ip++) {
+ p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+ : port;
+- for (; p <= port_to; p++) {
++ for (; p <= port_to; p++, i++) {
+ e.ip = htonl(ip);
+ e.port = htons(p);
++ if (i > IPSET_MAX_RANGE) {
++ hash_ipport4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
+index 334fb1ad0e86c..39a01934b1536 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
++++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
+@@ -108,11 +108,11 @@ static int
+ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_ipportip4 *h = set->data;
++ struct hash_ipportip4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipportip4_elem e = { .ip = 0 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+- u32 ip, ip_to = 0, p = 0, port, port_to;
++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+ bool with_ports = false;
+ int ret;
+
+@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+ swap(port, port_to);
+ }
+
+- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+- return -ERANGE;
+-
+ if (retried)
+ ip = ntohl(h->next.ip);
+ for (; ip <= ip_to; ip++) {
+ p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+ : port;
+- for (; p <= port_to; p++) {
++ for (; p <= port_to; p++, i++) {
+ e.ip = htonl(ip);
+ e.port = htons(p);
++ if (i > IPSET_MAX_RANGE) {
++ hash_ipportip4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+index 7df94f437f600..5c6de605a9fb7 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+@@ -160,12 +160,12 @@ static int
+ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_ipportnet4 *h = set->data;
++ struct hash_ipportnet4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+- u32 ip2_from = 0, ip2_to = 0, ip2;
++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+ bool with_ports = false;
+ u8 cidr;
+ int ret;
+@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ swap(port, port_to);
+ }
+
+- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+- return -ERANGE;
+-
+ ip2_to = ip2_from;
+ if (tb[IPSET_ATTR_IP2_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ for (; p <= port_to; p++) {
+ e.port = htons(p);
+ do {
++ i++;
+ e.ip2 = htonl(ip2);
+ ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
+ e.cidr = cidr - 1;
++ if (i > IPSET_MAX_RANGE) {
++ hash_ipportnet4_data_next(&h->next,
++ &e);
++ return -ERANGE;
++ }
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
+index 1422739d9aa25..ce0a9ce5a91f1 100644
+--- a/net/netfilter/ipset/ip_set_hash_net.c
++++ b/net/netfilter/ipset/ip_set_hash_net.c
+@@ -136,11 +136,11 @@ static int
+ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_net4 *h = set->data;
++ struct hash_net4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_net4_elem e = { .cidr = HOST_MASK };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+- u32 ip = 0, ip_to = 0, ipn, n = 0;
++ u32 ip = 0, ip_to = 0, i = 0;
+ int ret;
+
+ if (tb[IPSET_ATTR_LINENO])
+@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+ if (ip + UINT_MAX == ip_to)
+ return -IPSET_ERR_HASH_RANGE;
+ }
+- ipn = ip;
+- do {
+- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+- n++;
+- } while (ipn++ < ip_to);
+-
+- if (n > IPSET_MAX_RANGE)
+- return -ERANGE;
+
+ if (retried)
+ ip = ntohl(h->next.ip);
+ do {
++ i++;
+ e.ip = htonl(ip);
++ if (i > IPSET_MAX_RANGE) {
++ hash_net4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
+index 9810f5bf63f5e..0310732862362 100644
+--- a/net/netfilter/ipset/ip_set_hash_netiface.c
++++ b/net/netfilter/ipset/ip_set_hash_netiface.c
+@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+- u32 ip = 0, ip_to = 0, ipn, n = 0;
++ u32 ip = 0, ip_to = 0, i = 0;
+ int ret;
+
+ if (tb[IPSET_ATTR_LINENO])
+@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+ } else {
+ ip_set_mask_from_to(ip, ip_to, e.cidr);
+ }
+- ipn = ip;
+- do {
+- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+- n++;
+- } while (ipn++ < ip_to);
+-
+- if (n > IPSET_MAX_RANGE)
+- return -ERANGE;
+
+ if (retried)
+ ip = ntohl(h->next.ip);
+ do {
++ i++;
+ e.ip = htonl(ip);
++ if (i > IPSET_MAX_RANGE) {
++ hash_netiface4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
+index 3d09eefe998a7..c07b70bf32db4 100644
+--- a/net/netfilter/ipset/ip_set_hash_netnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netnet.c
+@@ -163,13 +163,12 @@ static int
+ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_netnet4 *h = set->data;
++ struct hash_netnet4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netnet4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0;
+- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+- u64 n = 0, m = 0;
++ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
+ int ret;
+
+ if (tb[IPSET_ATTR_LINENO])
+@@ -245,19 +244,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ } else {
+ ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+ }
+- ipn = ip;
+- do {
+- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+- n++;
+- } while (ipn++ < ip_to);
+- ipn = ip2_from;
+- do {
+- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+- m++;
+- } while (ipn++ < ip2_to);
+-
+- if (n*m > IPSET_MAX_RANGE)
+- return -ERANGE;
+
+ if (retried) {
+ ip = ntohl(h->next.ip[0]);
+@@ -270,7 +256,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ e.ip[0] = htonl(ip);
+ ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ do {
++ i++;
+ e.ip[1] = htonl(ip2);
++ if (i > IPSET_MAX_RANGE) {
++ hash_netnet4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
+index 09cf72eb37f8d..d1a0628df4ef3 100644
+--- a/net/netfilter/ipset/ip_set_hash_netport.c
++++ b/net/netfilter/ipset/ip_set_hash_netport.c
+@@ -154,12 +154,11 @@ static int
+ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_netport4 *h = set->data;
++ struct hash_netport4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+- u64 n = 0;
++ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
+ bool with_ports = false;
+ u8 cidr;
+ int ret;
+@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+ } else {
+ ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+ }
+- ipn = ip;
+- do {
+- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+- n++;
+- } while (ipn++ < ip_to);
+-
+- if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+- return -ERANGE;
+
+ if (retried) {
+ ip = ntohl(h->next.ip);
+@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+ e.ip = htonl(ip);
+ ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
+ e.cidr = cidr - 1;
+- for (; p <= port_to; p++) {
++ for (; p <= port_to; p++, i++) {
+ e.port = htons(p);
++ if (i > IPSET_MAX_RANGE) {
++ hash_netport4_data_next(&h->next, &e);
++ return -ERANGE;
++ }
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
+index 19bcdb3141f6e..bf4f91b78e1dc 100644
+--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
+@@ -36,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
+ #define IP_SET_HASH_WITH_PROTO
+ #define IP_SET_HASH_WITH_NETS
+ #define IPSET_NET_COUNT 2
++#define IP_SET_HASH_WITH_NET0
+
+ /* IPv4 variant */
+
+@@ -173,17 +174,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+ }
+
++static u32
++hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
++{
++ if (from == 0 && to == UINT_MAX) {
++ *cidr = 0;
++ return to;
++ }
++ return ip_set_range_to_cidr(from, to, cidr);
++}
++
+ static int
+ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+- const struct hash_netportnet4 *h = set->data;
++ struct hash_netportnet4 *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netportnet4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+- u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+- u64 n = 0, m = 0;
++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+ bool with_ports = false;
+ int ret;
+
+@@ -285,19 +295,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ } else {
+ ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+ }
+- ipn = ip;
+- do {
+- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+- n++;
+- } while (ipn++ < ip_to);
+- ipn = ip2_from;
+- do {
+- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+- m++;
+- } while (ipn++ < ip2_to);
+-
+- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+- return -ERANGE;
+
+ if (retried) {
+ ip = ntohl(h->next.ip[0]);
+@@ -310,13 +307,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+
+ do {
+ e.ip[0] = htonl(ip);
+- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
++ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ for (; p <= port_to; p++) {
+ e.port = htons(p);
+ do {
++ i++;
+ e.ip[1] = htonl(ip2);
+- ip2 = ip_set_range_to_cidr(ip2, ip2_to,
+- &e.cidr[1]);
++ if (i > IPSET_MAX_RANGE) {
++ hash_netportnet4_data_next(&h->next,
++ &e);
++ return -ERANGE;
++ }
++ ip2 = hash_netportnet4_range_to_cidr(ip2,
++ ip2_to, &e.cidr[1]);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
+index 271da8447b293..2a3017b9c001b 100644
+--- a/net/netfilter/ipvs/Kconfig
++++ b/net/netfilter/ipvs/Kconfig
+@@ -44,7 +44,8 @@ config IP_VS_DEBUG
+
+ config IP_VS_TAB_BITS
+ int "IPVS connection table size (the Nth power of 2)"
+- range 8 20
++ range 8 20 if !64BIT
++ range 8 27 if 64BIT
+ default 12
+ help
+ The IPVS connection hash table uses the chaining scheme to handle
+@@ -54,24 +55,24 @@ config IP_VS_TAB_BITS
+
+ Note the table size must be power of 2. The table size will be the
+ value of 2 to the your input number power. The number to choose is
+- from 8 to 20, the default number is 12, which means the table size
+- is 4096. Don't input the number too small, otherwise you will lose
+- performance on it. You can adapt the table size yourself, according
+- to your virtual server application. It is good to set the table size
+- not far less than the number of connections per second multiplying
+- average lasting time of connection in the table. For example, your
+- virtual server gets 200 connections per second, the connection lasts
+- for 200 seconds in average in the connection table, the table size
+- should be not far less than 200x200, it is good to set the table
+- size 32768 (2**15).
++ from 8 to 27 for 64BIT(20 otherwise), the default number is 12,
++ which means the table size is 4096. Don't input the number too
++ small, otherwise you will lose performance on it. You can adapt the
++ table size yourself, according to your virtual server application.
++ It is good to set the table size not far less than the number of
++ connections per second multiplying average lasting time of
++ connection in the table. For example, your virtual server gets 200
++ connections per second, the connection lasts for 200 seconds in
++ average in the connection table, the table size should be not far
++ less than 200x200, it is good to set the table size 32768 (2**15).
+
+ Another note that each connection occupies 128 bytes effectively and
+ each hash entry uses 8 bytes, so you can estimate how much memory is
+ needed for your box.
+
+ You can overwrite this number setting conn_tab_bits module parameter
+- or by appending ip_vs.conn_tab_bits=? to the kernel command line
+- if IP VS was compiled built-in.
++ or by appending ip_vs.conn_tab_bits=? to the kernel command line if
++ IP VS was compiled built-in.
+
+ comment "IPVS transport protocol load balancing support"
+
+diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
+index f9b16f2b22191..fdacbc3c15bef 100644
+--- a/net/netfilter/ipvs/ip_vs_app.c
++++ b/net/netfilter/ipvs/ip_vs_app.c
+@@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = {
+ int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
+ {
+ INIT_LIST_HEAD(&ipvs->app_list);
+- proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
+- sizeof(struct seq_net_private));
++#ifdef CONFIG_PROC_FS
++ if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net,
++ &ip_vs_app_seq_ops,
++ sizeof(struct seq_net_private)))
++ return -ENOMEM;
++#endif
+ return 0;
+ }
+
+ void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
+ {
+ unregister_ip_vs_app(ipvs, NULL /* all */);
++#ifdef CONFIG_PROC_FS
+ remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
++#endif
+ }
+diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
+index 2c467c422dc63..3252d67b6df0a 100644
+--- a/net/netfilter/ipvs/ip_vs_conn.c
++++ b/net/netfilter/ipvs/ip_vs_conn.c
+@@ -1265,8 +1265,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
+ * The drop rate array needs tuning for real environments.
+ * Called from timer bh only => no locking
+ */
+- static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+- static char todrop_counter[9] = {0};
++ static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
++ static signed char todrop_counter[9] = {0};
+ int i;
+
+ /* if the conn entry hasn't lasted for 60 seconds, don't drop it.
+@@ -1447,20 +1447,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
+ {
+ atomic_set(&ipvs->conn_count, 0);
+
+- proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
+- &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state));
+- proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+- &ip_vs_conn_sync_seq_ops,
+- sizeof(struct ip_vs_iter_state));
++#ifdef CONFIG_PROC_FS
++ if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
++ &ip_vs_conn_seq_ops,
++ sizeof(struct ip_vs_iter_state)))
++ goto err_conn;
++
++ if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
++ &ip_vs_conn_sync_seq_ops,
++ sizeof(struct ip_vs_iter_state)))
++ goto err_conn_sync;
++#endif
++
+ return 0;
++
++#ifdef CONFIG_PROC_FS
++err_conn_sync:
++ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
++err_conn:
++ return -ENOMEM;
++#endif
+ }
+
+ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
+ {
+ /* flush all the connection entries first */
+ ip_vs_conn_flush(ipvs);
++#ifdef CONFIG_PROC_FS
+ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
++#endif
+ }
+
+ int __init ip_vs_conn_init(void)
+@@ -1468,8 +1484,8 @@ int __init ip_vs_conn_init(void)
+ int idx;
+
+ /* Compute size and mask */
+- if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
+- pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
++ if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 27) {
++ pr_info("conn_tab_bits not in [8, 27]. Using default value\n");
+ ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+ }
+ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
+@@ -1495,7 +1511,7 @@ int __init ip_vs_conn_init(void)
+ pr_info("Connection hash table configured "
+ "(size=%d, memory=%ldKbytes)\n",
+ ip_vs_conn_tab_size,
+- (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
++ (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
+ IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
+ sizeof(struct ip_vs_conn));
+
+diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
+index 128690c512dff..393058a43aa73 100644
+--- a/net/netfilter/ipvs/ip_vs_core.c
++++ b/net/netfilter/ipvs/ip_vs_core.c
+@@ -1964,7 +1964,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
+ struct ip_vs_proto_data *pd;
+ struct ip_vs_conn *cp;
+ int ret, pkts;
+- int conn_reuse_mode;
+ struct sock *sk;
+
+ /* Already marked as IPVS request or reply? */
+@@ -2041,15 +2040,16 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
+ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+ ipvs, af, skb, &iph);
+
+- conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+- if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
++ if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
++ int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+ bool old_ct = false, resched = false;
+
+ if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) {
+ resched = true;
+ old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
+- } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
++ } else if (conn_reuse_mode &&
++ is_new_conn_expected(cp, conn_reuse_mode)) {
+ old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
+ if (!atomic_read(&cp->n_control)) {
+ resched = true;
+diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
+index 29ec3ef63edc7..d0b64c36471d5 100644
+--- a/net/netfilter/ipvs/ip_vs_ctl.c
++++ b/net/netfilter/ipvs/ip_vs_ctl.c
+@@ -1802,6 +1802,7 @@ static int
+ proc_do_sync_threshold(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+ {
++ struct netns_ipvs *ipvs = table->extra2;
+ int *valp = table->data;
+ int val[2];
+ int rc;
+@@ -1811,6 +1812,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
+ .mode = table->mode,
+ };
+
++ mutex_lock(&ipvs->sync_mutex);
+ memcpy(val, valp, sizeof(val));
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write) {
+@@ -1820,6 +1822,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
+ else
+ memcpy(valp, val, sizeof(val));
+ }
++ mutex_unlock(&ipvs->sync_mutex);
+ return rc;
+ }
+
+@@ -4077,6 +4080,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
+ ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+ ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
+ tbl[idx].data = &ipvs->sysctl_sync_threshold;
++ tbl[idx].extra2 = ipvs;
+ tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+ ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
+ tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
+diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
+index 9d43277b8b4fe..0d89e68dc9d18 100644
+--- a/net/netfilter/ipvs/ip_vs_sync.c
++++ b/net/netfilter/ipvs/ip_vs_sync.c
+@@ -603,7 +603,7 @@ static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+ struct ip_vs_sync_conn_options *opt =
+ (struct ip_vs_sync_conn_options *)&s[1];
+- memcpy(opt, &cp->in_seq, sizeof(*opt));
++ memcpy(opt, &cp->sync_conn_opt, sizeof(*opt));
+ }
+
+ m->nr_conns++;
+@@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val)
+ lock_sock(sk);
+ if (mode) {
+ val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
+- sysctl_wmem_max);
++ READ_ONCE(sysctl_wmem_max));
+ sk->sk_sndbuf = val * 2;
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ } else {
+ val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
+- sysctl_rmem_max);
++ READ_ONCE(sysctl_rmem_max));
+ sk->sk_rcvbuf = val * 2;
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ }
+diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
+index d2e5a8f644b80..cd2130e98836b 100644
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -1225,6 +1225,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ skb->transport_header = skb->network_header;
+
+ skb_set_inner_ipproto(skb, next_protocol);
++ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+@@ -1373,6 +1374,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ skb->transport_header = skb->network_header;
+
+ skb_set_inner_ipproto(skb, next_protocol);
++ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 770a63103c7a4..10622760f894a 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -66,6 +66,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+ struct conntrack_gc_work {
+ struct delayed_work dwork;
+ u32 next_bucket;
++ u32 avg_timeout;
++ u32 count;
++ u32 start_time;
+ bool exiting;
+ bool early_drop;
+ };
+@@ -77,11 +80,24 @@ static __read_mostly bool nf_conntrack_locks_all;
+ /* serialize hash resizes and nf_ct_iterate_cleanup */
+ static DEFINE_MUTEX(nf_conntrack_mutex);
+
+-#define GC_SCAN_INTERVAL (120u * HZ)
++#define GC_SCAN_INTERVAL_MAX (60ul * HZ)
++#define GC_SCAN_INTERVAL_MIN (1ul * HZ)
++
++/* clamp timeouts to this value (TCP unacked) */
++#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ)
++
++/* Initial bias pretending we have 100 entries at the upper bound so we don't
++ * wakeup often just because we have three entries with a 1s timeout while still
++ * allowing non-idle machines to wakeup more often when needed.
++ */
++#define GC_SCAN_INITIAL_COUNT 100
++#define GC_SCAN_INTERVAL_INIT GC_SCAN_INTERVAL_MAX
++
+ #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
++#define GC_SCAN_EXPIRED_MAX (64000u / HZ)
+
+-#define MIN_CHAINLEN 8u
+-#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
++#define MIN_CHAINLEN 50u
++#define MAX_CHAINLEN (80u - MIN_CHAINLEN)
+
+ static struct conntrack_gc_work conntrack_gc_work;
+
+@@ -558,7 +574,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
+
+ #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
+
+-/* Released via destroy_conntrack() */
++/* Released via nf_ct_destroy() */
+ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+ const struct nf_conntrack_zone *zone,
+ gfp_t flags)
+@@ -585,7 +601,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+ tmpl->status = IPS_TEMPLATE;
+ write_pnet(&tmpl->ct_net, net);
+ nf_ct_zone_add(tmpl, zone);
+- atomic_set(&tmpl->ct_general.use, 0);
++ refcount_set(&tmpl->ct_general.use, 1);
+
+ return tmpl;
+ }
+@@ -612,13 +628,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
+ #endif
+ }
+
+-static void
+-destroy_conntrack(struct nf_conntrack *nfct)
++void nf_ct_destroy(struct nf_conntrack *nfct)
+ {
+ struct nf_conn *ct = (struct nf_conn *)nfct;
+
+- pr_debug("destroy_conntrack(%p)\n", ct);
+- WARN_ON(atomic_read(&nfct->use) != 0);
++ pr_debug("%s(%p)\n", __func__, ct);
++ WARN_ON(refcount_read(&nfct->use) != 0);
+
+ if (unlikely(nf_ct_is_template(ct))) {
+ nf_ct_tmpl_free(ct);
+@@ -643,9 +658,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
+ if (ct->master)
+ nf_ct_put(ct->master);
+
+- pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
++ pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
+ nf_conntrack_free(ct);
+ }
++EXPORT_SYMBOL(nf_ct_destroy);
+
+ static void nf_ct_delete_from_lists(struct nf_conn *ct)
+ {
+@@ -684,7 +700,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
+
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp) {
+- s32 timeout = ct->timeout - nfct_time_stamp;
++ s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+
+ tstamp->stop = ktime_get_real_ns();
+ if (timeout < 0)
+@@ -742,7 +758,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
+ /* caller must hold rcu readlock and none of the nf_conntrack_locks */
+ static void nf_ct_gc_expired(struct nf_conn *ct)
+ {
+- if (!atomic_inc_not_zero(&ct->ct_general.use))
++ if (!refcount_inc_not_zero(&ct->ct_general.use))
+ return;
+
+ if (nf_ct_should_gc(ct))
+@@ -810,7 +826,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
+ * in, try to obtain a reference and re-check tuple
+ */
+ ct = nf_ct_tuplehash_to_ctrack(h);
+- if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
++ if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
+ if (likely(nf_ct_key_equal(h, tuple, zone, net)))
+ goto found;
+
+@@ -907,7 +923,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
+
+ smp_wmb();
+ /* The caller holds a reference to this object */
+- atomic_set(&ct->ct_general.use, 2);
++ refcount_set(&ct->ct_general.use, 2);
+ __nf_conntrack_hash_insert(ct, hash, reply_hash);
+ nf_conntrack_double_unlock(hash, reply_hash);
+ NF_CT_STAT_INC(net, insert);
+@@ -958,7 +974,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
+ {
+ struct nf_conn_tstamp *tstamp;
+
+- atomic_inc(&ct->ct_general.use);
++ refcount_inc(&ct->ct_general.use);
+ ct->status |= IPS_CONFIRMED;
+
+ /* set conntrack timestamp, if enabled. */
+@@ -989,7 +1005,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
+
+ nf_ct_acct_merge(ct, ctinfo, loser_ct);
+ nf_ct_add_to_dying_list(loser_ct);
+- nf_conntrack_put(&loser_ct->ct_general);
++ nf_ct_put(loser_ct);
+ nf_ct_set(skb, ct, ctinfo);
+
+ NF_CT_STAT_INC(net, clash_resolve);
+@@ -1036,7 +1052,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
+ }
+
+ /* We want the clashing entry to go away real soon: 1 second timeout. */
+- loser_ct->timeout = nfct_time_stamp + HZ;
++ WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
+
+ /* IPS_NAT_CLASH removes the entry automatically on the first
+ * reply. Also prevents UDP tracker from moving the entry to
+@@ -1351,7 +1367,7 @@ static unsigned int early_drop_list(struct net *net,
+ nf_ct_is_dying(tmp))
+ continue;
+
+- if (!atomic_inc_not_zero(&tmp->ct_general.use))
++ if (!refcount_inc_not_zero(&tmp->ct_general.use))
+ continue;
+
+ /* kill only if still in same netns -- might have moved due to
+@@ -1420,16 +1436,31 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
+
+ static void gc_worker(struct work_struct *work)
+ {
+- unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
+ unsigned int i, hashsz, nf_conntrack_max95 = 0;
+- unsigned long next_run = GC_SCAN_INTERVAL;
++ u32 end_time, start_time = nfct_time_stamp;
+ struct conntrack_gc_work *gc_work;
++ unsigned int expired_count = 0;
++ unsigned long next_run;
++ s32 delta_time;
++ long count;
++
+ gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+ i = gc_work->next_bucket;
+ if (gc_work->early_drop)
+ nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
+
++ if (i == 0) {
++ gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
++ gc_work->count = GC_SCAN_INITIAL_COUNT;
++ gc_work->start_time = start_time;
++ }
++
++ next_run = gc_work->avg_timeout;
++ count = gc_work->count;
++
++ end_time = start_time + GC_SCAN_MAX_DURATION;
++
+ do {
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
+@@ -1447,6 +1478,7 @@ static void gc_worker(struct work_struct *work)
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ struct nf_conntrack_net *cnet;
+ struct net *net;
++ long expires;
+
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+@@ -1455,11 +1487,30 @@ static void gc_worker(struct work_struct *work)
+ continue;
+ }
+
++ if (expired_count > GC_SCAN_EXPIRED_MAX) {
++ rcu_read_unlock();
++
++ gc_work->next_bucket = i;
++ gc_work->avg_timeout = next_run;
++ gc_work->count = count;
++
++ delta_time = nfct_time_stamp - gc_work->start_time;
++
++ /* re-sched immediately if total cycle time is exceeded */
++ next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
++ goto early_exit;
++ }
++
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
++ expired_count++;
+ continue;
+ }
+
++ expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
++ expires = (expires - (long)next_run) / ++count;
++ next_run += expires;
++
+ if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
+ continue;
+
+@@ -1469,7 +1520,7 @@ static void gc_worker(struct work_struct *work)
+ continue;
+
+ /* need to take reference to avoid possible races */
+- if (!atomic_inc_not_zero(&tmp->ct_general.use))
++ if (!refcount_inc_not_zero(&tmp->ct_general.use))
+ continue;
+
+ if (gc_worker_skip_ct(tmp)) {
+@@ -1477,8 +1528,10 @@ static void gc_worker(struct work_struct *work)
+ continue;
+ }
+
+- if (gc_worker_can_early_drop(tmp))
++ if (gc_worker_can_early_drop(tmp)) {
+ nf_ct_kill(tmp);
++ expired_count++;
++ }
+
+ nf_ct_put(tmp);
+ }
+@@ -1491,33 +1544,39 @@ static void gc_worker(struct work_struct *work)
+ cond_resched();
+ i++;
+
+- if (time_after(jiffies, end_time) && i < hashsz) {
++ delta_time = nfct_time_stamp - end_time;
++ if (delta_time > 0 && i < hashsz) {
++ gc_work->avg_timeout = next_run;
++ gc_work->count = count;
+ gc_work->next_bucket = i;
+ next_run = 0;
+- break;
++ goto early_exit;
+ }
+ } while (i < hashsz);
+
++ gc_work->next_bucket = 0;
++
++ next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
++
++ delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
++ if (next_run > (unsigned long)delta_time)
++ next_run -= delta_time;
++ else
++ next_run = 1;
++
++early_exit:
+ if (gc_work->exiting)
+ return;
+
+- /*
+- * Eviction will normally happen from the packet path, and not
+- * from this gc worker.
+- *
+- * This worker is only here to reap expired entries when system went
+- * idle after a busy period.
+- */
+- if (next_run) {
++ if (next_run)
+ gc_work->early_drop = false;
+- gc_work->next_bucket = 0;
+- }
++
+ queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
+ }
+
+ static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+ {
+- INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
++ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->exiting = false;
+ }
+
+@@ -1560,7 +1619,7 @@ __nf_conntrack_alloc(struct net *net,
+ /* save hash for reusing when confirming */
+ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
+ ct->status = 0;
+- ct->timeout = 0;
++ WRITE_ONCE(ct->timeout, 0);
+ write_pnet(&ct->ct_net, net);
+ memset(&ct->__nfct_init_offset, 0,
+ offsetof(struct nf_conn, proto) -
+@@ -1571,7 +1630,7 @@ __nf_conntrack_alloc(struct net *net,
+ /* Because we use RCU lookups, we set ct_general.use to zero before
+ * this is inserted in any list.
+ */
+- atomic_set(&ct->ct_general.use, 0);
++ refcount_set(&ct->ct_general.use, 0);
+ return ct;
+ out:
+ atomic_dec(&cnet->count);
+@@ -1596,7 +1655,7 @@ void nf_conntrack_free(struct nf_conn *ct)
+ /* A freed object has refcnt == 0, that's
+ * the golden rule for SLAB_TYPESAFE_BY_RCU
+ */
+- WARN_ON(atomic_read(&ct->ct_general.use) != 0);
++ WARN_ON(refcount_read(&ct->ct_general.use) != 0);
+
+ nf_ct_ext_destroy(ct);
+ kmem_cache_free(nf_conntrack_cachep, ct);
+@@ -1676,7 +1735,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
+ }
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+- ct->mark = exp->master->mark;
++ ct->mark = READ_ONCE(exp->master->mark);
+ #endif
+ #ifdef CONFIG_NF_CONNTRACK_SECMARK
+ ct->secmark = exp->master->secmark;
+@@ -1688,8 +1747,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
+ if (!exp)
+ __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
+
+- /* Now it is inserted into the unconfirmed list, bump refcount */
+- nf_conntrack_get(&ct->ct_general);
++ /* Now it is inserted into the unconfirmed list, set refcount to 1. */
++ refcount_set(&ct->ct_general.use, 1);
+ nf_ct_add_to_unconfirmed_list(ct);
+
+ local_bh_enable();
+@@ -1920,17 +1979,19 @@ repeat:
+ /* Invalid: inverse of the return code tells
+ * the netfilter core what to do */
+ pr_debug("nf_conntrack_in: Can't track with proto module\n");
+- nf_conntrack_put(&ct->ct_general);
++ nf_ct_put(ct);
+ skb->_nfct = 0;
+- NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+- if (ret == -NF_DROP)
+- NF_CT_STAT_INC_ATOMIC(state->net, drop);
+ /* Special case: TCP tracker reports an attempt to reopen a
+ * closed/aborted connection. We have to go back and create a
+ * fresh conntrack.
+ */
+ if (ret == -NF_REPEAT)
+ goto repeat;
++
++ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
++ if (ret == -NF_DROP)
++ NF_CT_STAT_INC_ATOMIC(state->net, drop);
++
+ ret = -ret;
+ goto out;
+ }
+@@ -2163,6 +2224,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
+ return 0;
+
+ helper = rcu_dereference(help->helper);
++ if (!helper)
++ return 0;
++
+ if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
+ return 0;
+
+@@ -2299,7 +2363,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
+
+ return NULL;
+ found:
+- atomic_inc(&ct->ct_general.use);
++ refcount_inc(&ct->ct_general.use);
+ spin_unlock(lockp);
+ local_bh_enable();
+ return ct;
+@@ -2772,7 +2836,7 @@ err_cachep:
+
+ static struct nf_ct_hook nf_conntrack_hook = {
+ .update = nf_conntrack_update,
+- .destroy = destroy_conntrack,
++ .destroy = nf_ct_destroy,
+ .get_tuple_skb = nf_conntrack_get_tuple_skb,
+ };
+
+diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
+index f562eeef42349..6d056ebba57c6 100644
+--- a/net/netfilter/nf_conntrack_expect.c
++++ b/net/netfilter/nf_conntrack_expect.c
+@@ -203,12 +203,12 @@ nf_ct_find_expectation(struct net *net,
+ * about to invoke ->destroy(), or nf_ct_delete() via timeout
+ * or early_drop().
+ *
+- * The atomic_inc_not_zero() check tells: If that fails, we
++ * The refcount_inc_not_zero() check tells: If that fails, we
+ * know that the ct is being destroyed. If it succeeds, we
+ * can be sure the ct cannot disappear underneath.
+ */
+ if (unlikely(nf_ct_is_dying(exp->master) ||
+- !atomic_inc_not_zero(&exp->master->ct_general.use)))
++ !refcount_inc_not_zero(&exp->master->ct_general.use)))
+ return NULL;
+
+ if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
+index ae4488a13c70c..41c9708b50575 100644
+--- a/net/netfilter/nf_conntrack_helper.c
++++ b/net/netfilter/nf_conntrack_helper.c
+@@ -405,6 +405,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
+ BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
+ BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+
++ if (!nf_ct_helper_hash)
++ return -ENOENT;
++
+ if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
+ return -EINVAL;
+
+@@ -556,6 +559,12 @@ static const struct nf_ct_ext_type helper_extend = {
+ .id = NF_CT_EXT_HELPER,
+ };
+
++void nf_ct_set_auto_assign_helper_warned(struct net *net)
++{
++ nf_ct_pernet(net)->auto_assign_helper_warned = true;
++}
++EXPORT_SYMBOL_GPL(nf_ct_set_auto_assign_helper_warned);
++
+ void nf_conntrack_helper_pernet_init(struct net *net)
+ {
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+@@ -589,4 +598,5 @@ void nf_conntrack_helper_fini(void)
+ {
+ nf_ct_extend_unregister(&helper_extend);
+ kvfree(nf_ct_helper_hash);
++ nf_ct_helper_hash = NULL;
+ }
+diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
+index 08ee4e760a3d2..159e1e4441a43 100644
+--- a/net/netfilter/nf_conntrack_irc.c
++++ b/net/netfilter/nf_conntrack_irc.c
+@@ -151,15 +151,37 @@ static int help(struct sk_buff *skb, unsigned int protoff,
+ data = ib_ptr;
+ data_limit = ib_ptr + skb->len - dataoff;
+
+- /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
+- * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
+- while (data < data_limit - (19 + MINMATCHLEN)) {
+- if (memcmp(data, "\1DCC ", 5)) {
++ /* Skip any whitespace */
++ while (data < data_limit - 10) {
++ if (*data == ' ' || *data == '\r' || *data == '\n')
++ data++;
++ else
++ break;
++ }
++
++ /* strlen("PRIVMSG x ")=10 */
++ if (data < data_limit - 10) {
++ if (strncasecmp("PRIVMSG ", data, 8))
++ goto out;
++ data += 8;
++ }
++
++ /* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26
++ * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26
++ */
++ while (data < data_limit - (21 + MINMATCHLEN)) {
++ /* Find first " :", the start of message */
++ if (memcmp(data, " :", 2)) {
+ data++;
+ continue;
+ }
++ data += 2;
++
++ /* then check that place only for the DCC command */
++ if (memcmp(data, "\1DCC ", 5))
++ goto out;
+ data += 5;
+- /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
++ /* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */
+
+ iph = ip_hdr(skb);
+ pr_debug("DCC found in master %pI4:%u %pI4:%u\n",
+@@ -175,7 +197,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
+ pr_debug("DCC %s detected\n", dccprotos[i]);
+
+ /* we have at least
+- * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
++ * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid
+ * data left (== 14/13 bytes) */
+ if (parse_dcc(data, data_limit, &dcc_ip,
+ &dcc_port, &addr_beg_p, &addr_end_p)) {
+@@ -188,8 +210,9 @@ static int help(struct sk_buff *skb, unsigned int protoff,
+
+ /* dcc_ip can be the internal OR external (NAT'ed) IP */
+ tuple = &ct->tuplehash[dir].tuple;
+- if (tuple->src.u3.ip != dcc_ip &&
+- tuple->dst.u3.ip != dcc_ip) {
++ if ((tuple->src.u3.ip != dcc_ip &&
++ ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) ||
++ dcc_port == 0) {
+ net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
+ &tuple->src.u3.ip,
+ &dcc_ip, dcc_port);
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index f1e5443fe7c74..c427f7625a3b5 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -322,9 +322,15 @@ nla_put_failure:
+ }
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+-static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
++static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct,
++ bool dump)
+ {
+- if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark)))
++ u32 mark = READ_ONCE(ct->mark);
++
++ if (!mark && !dump)
++ return 0;
++
++ if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
+ goto nla_put_failure;
+ return 0;
+
+@@ -332,7 +338,7 @@ nla_put_failure:
+ return -1;
+ }
+ #else
+-#define ctnetlink_dump_mark(a, b) (0)
++#define ctnetlink_dump_mark(a, b, c) (0)
+ #endif
+
+ #ifdef CONFIG_NF_CONNTRACK_SECMARK
+@@ -508,7 +514,7 @@ nla_put_failure:
+
+ static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
+ {
+- if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use))))
++ if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use))))
+ goto nla_put_failure;
+ return 0;
+
+@@ -537,7 +543,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+ {
+ if (ctnetlink_dump_status(skb, ct) < 0 ||
+- ctnetlink_dump_mark(skb, ct) < 0 ||
++ ctnetlink_dump_mark(skb, ct, true) < 0 ||
+ ctnetlink_dump_secctx(skb, ct) < 0 ||
+ ctnetlink_dump_id(skb, ct) < 0 ||
+ ctnetlink_dump_use(skb, ct) < 0 ||
+@@ -820,8 +826,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
+ }
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+- if ((events & (1 << IPCT_MARK) || ct->mark)
+- && ctnetlink_dump_mark(skb, ct) < 0)
++ if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
+ goto nla_put_failure;
+ #endif
+ nlmsg_end(skb, nlh);
+@@ -1011,11 +1016,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
+ CTA_TUPLE_REPLY,
+ filter->family,
+ &filter->zone,
+- filter->orig_flags);
+- if (err < 0) {
+- err = -EINVAL;
++ filter->reply_flags);
++ if (err < 0)
+ goto err_filter;
+- }
+ }
+
+ return filter;
+@@ -1150,7 +1153,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
+ }
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+- if ((ct->mark & filter->mark.mask) != filter->mark.val)
++ if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val)
+ goto ignore_entry;
+ #endif
+ status = (u32)READ_ONCE(ct->status);
+@@ -1202,7 +1205,7 @@ restart:
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ if (i < ARRAY_SIZE(nf_ct_evict) &&
+- atomic_inc_not_zero(&ct->ct_general.use))
++ refcount_inc_not_zero(&ct->ct_general.use))
+ nf_ct_evict[i++] = ct;
+ continue;
+ }
+@@ -1543,9 +1546,6 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
+
+ static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
+ {
+- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+- return 0;
+-
+ return ctnetlink_filter_match(ct, data);
+ }
+
+@@ -1609,11 +1609,6 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+- if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
+- nf_ct_put(ct);
+- return -EBUSY;
+- }
+-
+ if (cda[CTA_ID]) {
+ __be32 id = nla_get_be32(cda[CTA_ID]);
+
+@@ -1750,7 +1745,7 @@ restart:
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct, dying ? true : false, 0);
+ if (res < 0) {
+- if (!atomic_inc_not_zero(&ct->ct_general.use))
++ if (!refcount_inc_not_zero(&ct->ct_general.use))
+ continue;
+ cb->args[0] = cpu;
+ cb->args[1] = (unsigned long)ct;
+@@ -2000,7 +1995,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
+
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+- ct->timeout = nfct_time_stamp + (u32)timeout;
++ WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+
+ if (test_bit(IPS_DYING_BIT, &ct->status))
+ return -ETIME;
+@@ -2018,9 +2013,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct,
+ mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+ mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+- newmark = (ct->mark & mask) ^ mark;
+- if (newmark != ct->mark)
+- ct->mark = newmark;
++ newmark = (READ_ONCE(ct->mark) & mask) ^ mark;
++ if (newmark != READ_ONCE(ct->mark))
++ WRITE_ONCE(ct->mark, newmark);
+ }
+ #endif
+
+@@ -2312,7 +2307,8 @@ ctnetlink_create_conntrack(struct net *net,
+ if (helper->from_nlattr)
+ helper->from_nlattr(helpinfo, ct);
+
+- /* not in hash table yet so not strictly necessary */
++ /* disable helper auto-assignment for this entry */
++ ct->status |= IPS_HELPER;
+ RCU_INIT_POINTER(help->helper, helper);
+ }
+ } else {
+@@ -2392,12 +2388,15 @@ ctnetlink_create_conntrack(struct net *net,
+
+ err = nf_conntrack_hash_check_insert(ct);
+ if (err < 0)
+- goto err2;
++ goto err3;
+
+ rcu_read_unlock();
+
+ return ct;
+
++err3:
++ if (ct->master)
++ nf_ct_put(ct->master);
+ err2:
+ rcu_read_unlock();
+ err1:
+@@ -2752,7 +2751,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
+ goto nla_put_failure;
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+- if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0)
++ if (ctnetlink_dump_mark(skb, ct, true) < 0)
+ goto nla_put_failure;
+ #endif
+ if (ctnetlink_dump_labels(skb, ct) < 0)
+@@ -2993,7 +2992,9 @@ nla_put_failure:
+ return -1;
+ }
+
++#if IS_ENABLED(CONFIG_NF_NAT)
+ static const union nf_inet_addr any_addr;
++#endif
+
+ static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+ {
+@@ -3472,10 +3473,12 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
+ return 0;
+ }
+
++#if IS_ENABLED(CONFIG_NF_NAT)
+ static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {
+ [CTA_EXPECT_NAT_DIR] = { .type = NLA_U32 },
+ [CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED },
+ };
++#endif
+
+ static int
+ ctnetlink_parse_expect_nat(const struct nlattr *attr,
+diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
+index c1557d47ccd1e..d4fd626d2b8c3 100644
+--- a/net/netfilter/nf_conntrack_proto_dccp.c
++++ b/net/netfilter/nf_conntrack_proto_dccp.c
+@@ -432,9 +432,19 @@ static bool dccp_error(const struct dccp_hdr *dh,
+ struct sk_buff *skb, unsigned int dataoff,
+ const struct nf_hook_state *state)
+ {
++ static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST |
++ 1 << DCCP_PKT_RESPONSE |
++ 1 << DCCP_PKT_CLOSEREQ |
++ 1 << DCCP_PKT_CLOSE |
++ 1 << DCCP_PKT_RESET |
++ 1 << DCCP_PKT_SYNC |
++ 1 << DCCP_PKT_SYNCACK;
+ unsigned int dccp_len = skb->len - dataoff;
+ unsigned int cscov;
+ const char *msg;
++ u8 type;
++
++ BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG);
+
+ if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
+ dh->dccph_doff * 4 > dccp_len) {
+@@ -459,34 +469,70 @@ static bool dccp_error(const struct dccp_hdr *dh,
+ goto out_invalid;
+ }
+
+- if (dh->dccph_type >= DCCP_PKT_INVALID) {
++ type = dh->dccph_type;
++ if (type >= DCCP_PKT_INVALID) {
+ msg = "nf_ct_dccp: reserved packet type ";
+ goto out_invalid;
+ }
++
++ if (test_bit(type, &require_seq48) && !dh->dccph_x) {
++ msg = "nf_ct_dccp: type lacks 48bit sequence numbers";
++ goto out_invalid;
++ }
++
+ return false;
+ out_invalid:
+ nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
+ return true;
+ }
+
++struct nf_conntrack_dccp_buf {
++ struct dccp_hdr dh; /* generic header part */
++ struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */
++ union { /* depends on header type */
++ struct dccp_hdr_ack_bits ack;
++ struct dccp_hdr_request req;
++ struct dccp_hdr_response response;
++ struct dccp_hdr_reset rst;
++ } u;
++};
++
++static struct dccp_hdr *
++dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh,
++ struct nf_conntrack_dccp_buf *buf)
++{
++ unsigned int hdrlen = __dccp_hdr_len(dh);
++
++ if (hdrlen > sizeof(*buf))
++ return NULL;
++
++ return skb_header_pointer(skb, offset, hdrlen, buf);
++}
++
+ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
+ {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+- struct dccp_hdr _dh, *dh;
++ struct nf_conntrack_dccp_buf _dh;
+ u_int8_t type, old_state, new_state;
+ enum ct_dccp_roles role;
+ unsigned int *timeouts;
++ struct dccp_hdr *dh;
+
+- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
++ dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
+ if (!dh)
+ return NF_DROP;
+
+ if (dccp_error(dh, skb, dataoff, state))
+ return -NF_ACCEPT;
+
++ /* pull again, including possible 48 bit sequences and subtype header */
++ dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
++ if (!dh)
++ return NF_DROP;
++
+ type = dh->dccph_type;
+ if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
+ return -NF_ACCEPT;
+diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
+index 61e3b05cf02c3..1020d67600a95 100644
+--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
++++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
+@@ -129,6 +129,56 @@ static void icmpv6_error_log(const struct sk_buff *skb,
+ nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg);
+ }
+
++static noinline_for_stack int
++nf_conntrack_icmpv6_redirect(struct nf_conn *tmpl, struct sk_buff *skb,
++ unsigned int dataoff,
++ const struct nf_hook_state *state)
++{
++ u8 hl = ipv6_hdr(skb)->hop_limit;
++ union nf_inet_addr outer_daddr;
++ union {
++ struct nd_opt_hdr nd_opt;
++ struct rd_msg rd_msg;
++ } tmp;
++ const struct nd_opt_hdr *nd_opt;
++ const struct rd_msg *rd_msg;
++
++ rd_msg = skb_header_pointer(skb, dataoff, sizeof(*rd_msg), &tmp.rd_msg);
++ if (!rd_msg) {
++ icmpv6_error_log(skb, state, "short redirect");
++ return -NF_ACCEPT;
++ }
++
++ if (rd_msg->icmph.icmp6_code != 0)
++ return NF_ACCEPT;
++
++ if (hl != 255 || !(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
++ icmpv6_error_log(skb, state, "invalid saddr or hoplimit for redirect");
++ return -NF_ACCEPT;
++ }
++
++ dataoff += sizeof(*rd_msg);
++
++ /* warning: rd_msg no longer usable after this call */
++ nd_opt = skb_header_pointer(skb, dataoff, sizeof(*nd_opt), &tmp.nd_opt);
++ if (!nd_opt || nd_opt->nd_opt_len == 0) {
++ icmpv6_error_log(skb, state, "redirect without options");
++ return -NF_ACCEPT;
++ }
++
++ /* We could call ndisc_parse_options(), but it would need
++ * skb_linearize() and a bit more work.
++ */
++ if (nd_opt->nd_opt_type != ND_OPT_REDIRECT_HDR)
++ return NF_ACCEPT;
++
++ memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
++ sizeof(outer_daddr.ip6));
++ dataoff += 8;
++ return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
++ IPPROTO_ICMPV6, &outer_daddr);
++}
++
+ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+@@ -159,6 +209,9 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+ return NF_ACCEPT;
+ }
+
++ if (icmp6h->icmp6_type == NDISC_REDIRECT)
++ return nf_conntrack_icmpv6_redirect(tmpl, skb, dataoff, state);
++
+ /* is not error message ? */
+ if (icmp6h->icmp6_type >= 128)
+ return NF_ACCEPT;
+diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
+index 2394238d01c91..7247af51bdfc4 100644
+--- a/net/netfilter/nf_conntrack_proto_sctp.c
++++ b/net/netfilter/nf_conntrack_proto_sctp.c
+@@ -27,22 +27,16 @@
+ #include <net/netfilter/nf_conntrack_ecache.h>
+ #include <net/netfilter/nf_conntrack_timeout.h>
+
+-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+- closely. They're more complex. --RR
+-
+- And so for me for SCTP :D -Kiran */
+-
+ static const char *const sctp_conntrack_names[] = {
+- "NONE",
+- "CLOSED",
+- "COOKIE_WAIT",
+- "COOKIE_ECHOED",
+- "ESTABLISHED",
+- "SHUTDOWN_SENT",
+- "SHUTDOWN_RECD",
+- "SHUTDOWN_ACK_SENT",
+- "HEARTBEAT_SENT",
+- "HEARTBEAT_ACKED",
++ [SCTP_CONNTRACK_NONE] = "NONE",
++ [SCTP_CONNTRACK_CLOSED] = "CLOSED",
++ [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT",
++ [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED",
++ [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED",
++ [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
++ [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD",
++ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT",
++ [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT",
+ };
+
+ #define SECS * HZ
+@@ -54,12 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
+ [SCTP_CONNTRACK_CLOSED] = 10 SECS,
+ [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
+ [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
+- [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
+- [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
+- [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
++ [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
++ [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
++ [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
+ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
+- [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
+ };
+
+ #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1
+@@ -73,7 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
+ #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
+ #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+ #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
+-#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
+ #define sIV SCTP_CONNTRACK_MAX
+
+ /*
+@@ -96,9 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
+ CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
+ the SHUTDOWN chunk. Connection is closed.
+ HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
+-HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to
+- that of the HEARTBEAT chunk. Secondary connection is
+- established.
+ */
+
+ /* TODO
+@@ -115,33 +104,33 @@ cookie echoed to closed.
+ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
+ {
+ /* ORIGINAL */
+-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
+-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
+-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
+-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA},
+-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/
+-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */
+-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */
+-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA},
+-/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+-/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}
++/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
++/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
++/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
++/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
++/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
++/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
++/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
++/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
++/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
++/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
++/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
++/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+ },
+ {
+ /* REPLY */
+-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */
+-/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
+-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL},
+-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR},
+-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA},
+-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA},
+-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */
+-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA},
+-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA},
+-/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+-/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA}
++/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
++/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */
++/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV},
++/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV},
++/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
++/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
++/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
++/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
++/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
++/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
++/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
++/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES},
+ }
+ };
+
+@@ -412,22 +401,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
+ for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
+ /* Special cases of Verification tag check (Sec 8.5.1) */
+ if (sch->type == SCTP_CID_INIT) {
+- /* Sec 8.5.1 (A) */
++ /* (A) vtag MUST be zero */
+ if (sh->vtag != 0)
+ goto out_unlock;
+ } else if (sch->type == SCTP_CID_ABORT) {
+- /* Sec 8.5.1 (B) */
+- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
+- sh->vtag != ct->proto.sctp.vtag[!dir])
++ /* (B) vtag MUST match own vtag if T flag is unset OR
++ * MUST match peer's vtag if T flag is set
++ */
++ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
++ sh->vtag != ct->proto.sctp.vtag[dir]) ||
++ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
++ sh->vtag != ct->proto.sctp.vtag[!dir]))
+ goto out_unlock;
+ } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
+- /* Sec 8.5.1 (C) */
+- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
+- sh->vtag != ct->proto.sctp.vtag[!dir] &&
+- sch->flags & SCTP_CHUNK_FLAG_T)
++ /* (C) vtag MUST match own vtag if T flag is unset OR
++ * MUST match peer's vtag if T flag is set
++ */
++ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
++ sh->vtag != ct->proto.sctp.vtag[dir]) ||
++ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
++ sh->vtag != ct->proto.sctp.vtag[!dir]))
+ goto out_unlock;
+ } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
+- /* Sec 8.5.1 (D) */
++ /* (D) vtag must be same as init_vtag as found in INIT_ACK */
+ if (sh->vtag != ct->proto.sctp.vtag[dir])
+ goto out_unlock;
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
+@@ -489,11 +485,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
+ pr_debug("Setting vtag %x for dir %d\n",
+ ih->init_tag, !dir);
+ ct->proto.sctp.vtag[!dir] = ih->init_tag;
++
++ /* don't renew timeout on init retransmit so
++ * port reuse by client or NAT middlebox cannot
++ * keep entry alive indefinitely (incl. nat info).
++ */
++ if (new_state == SCTP_CONNTRACK_CLOSED &&
++ old_state == SCTP_CONNTRACK_CLOSED &&
++ nf_ct_is_confirmed(ct))
++ ignore = true;
+ }
+
+ ct->proto.sctp.state = new_state;
+- if (old_state != new_state)
++ if (old_state != new_state) {
+ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
++ if (new_state == SCTP_CONNTRACK_ESTABLISHED &&
++ !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
++ nf_conntrack_event_cache(IPCT_ASSURED, ct);
++ }
+ }
+ spin_unlock_bh(&ct->lock);
+
+@@ -507,14 +516,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
+
+ nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
+
+- if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
+- dir == IP_CT_DIR_REPLY &&
+- new_state == SCTP_CONNTRACK_ESTABLISHED) {
+- pr_debug("Setting assured bit\n");
+- set_bit(IPS_ASSURED_BIT, &ct->status);
+- nf_conntrack_event_cache(IPCT_ASSURED, ct);
+- }
+-
+ return NF_ACCEPT;
+
+ out_unlock:
+diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
+index af5115e127cfd..1ecfdc4f23be8 100644
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -341,8 +341,8 @@ static void tcp_options(const struct sk_buff *skb,
+ if (!ptr)
+ return;
+
+- state->td_scale =
+- state->flags = 0;
++ state->td_scale = 0;
++ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
+
+ while (length > 0) {
+ int opcode=*ptr++;
+@@ -671,6 +671,37 @@ static bool tcp_in_window(struct nf_conn *ct,
+ tn->tcp_be_liberal)
+ res = true;
+ if (!res) {
++ bool seq_ok = before(seq, sender->td_maxend + 1);
++
++ if (!seq_ok) {
++ u32 overshot = end - sender->td_maxend + 1;
++ bool ack_ok;
++
++ ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1);
++
++ if (in_recv_win &&
++ ack_ok &&
++ overshot <= receiver->td_maxwin &&
++ before(sack, receiver->td_end + 1)) {
++ /* Work around TCPs that send more bytes than allowed by
++ * the receive window.
++ *
++ * If the (marked as invalid) packet is allowed to pass by
++ * the ruleset and the peer acks this data, then its possible
++ * all future packets will trigger 'ACK is over upper bound' check.
++ *
++ * Thus if only the sequence check fails then do update td_end so
++ * possible ACK for this data can update internal state.
++ */
++ sender->td_end = end;
++ sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
++
++ nf_ct_l4proto_log_invalid(skb, ct, hook_state,
++ "%u bytes more than expected", overshot);
++ return res;
++ }
++ }
++
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state,
+ "%s",
+ before(seq, sender->td_maxend + 1) ?
+@@ -839,6 +870,16 @@ static bool tcp_can_early_drop(const struct nf_conn *ct)
+ return false;
+ }
+
++static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
++{
++ state->td_end = 0;
++ state->td_maxend = 0;
++ state->td_maxwin = 0;
++ state->td_maxack = 0;
++ state->td_scale = 0;
++ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
++}
++
+ /* Returns verdict for packet, or -1 for invalid. */
+ int nf_conntrack_tcp_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+@@ -945,8 +986,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
+ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
+ ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
+ ct->proto.tcp.last_flags;
+- memset(&ct->proto.tcp.seen[dir], 0,
+- sizeof(struct ip_ct_tcp_state));
++ nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]);
+ break;
+ }
+ ct->proto.tcp.last_index = index;
+diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
+index f8e3c0d2602f6..3b516cffc779b 100644
+--- a/net/netfilter/nf_conntrack_proto_udp.c
++++ b/net/netfilter/nf_conntrack_proto_udp.c
+@@ -104,10 +104,13 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
+ */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ unsigned long extra = timeouts[UDP_CT_UNREPLIED];
++ bool stream = false;
+
+ /* Still active after two seconds? Extend timeout. */
+- if (time_after(jiffies, ct->proto.udp.stream_ts))
++ if (time_after(jiffies, ct->proto.udp.stream_ts)) {
+ extra = timeouts[UDP_CT_REPLIED];
++ stream = true;
++ }
+
+ nf_ct_refresh_acct(ct, ctinfo, skb, extra);
+
+@@ -116,7 +119,7 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
+ return NF_ACCEPT;
+
+ /* Also, more likely to be important, and not a probe */
+- if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
++ if (stream && !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
+ } else {
+ nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]);
+diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
+index b83dc9bf0a5dd..751df19fe0f8a 100644
+--- a/net/netfilter/nf_conntrack_sip.c
++++ b/net/netfilter/nf_conntrack_sip.c
+@@ -477,7 +477,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
+ return ret;
+ if (ret == 0)
+ break;
+- dataoff += *matchoff;
++ dataoff = *matchoff;
+ }
+ *in_header = 0;
+ }
+@@ -489,7 +489,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
+ break;
+ if (ret == 0)
+ return ret;
+- dataoff += *matchoff;
++ dataoff = *matchoff;
+ }
+
+ if (in_header)
+@@ -611,7 +611,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
+ start += strlen(name);
+ *val = simple_strtoul(start, &end, 0);
+ if (start == end)
+- return 0;
++ return -1;
+ if (matchoff && matchlen) {
+ *matchoff = start - dptr;
+ *matchlen = end - start;
+diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
+index 80f675d884b26..7515705583bcf 100644
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -303,7 +303,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
+ int ret = 0;
+
+ WARN_ON(!ct);
+- if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
++ if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
+ return 0;
+
+ if (nf_ct_should_gc(ct)) {
+@@ -363,14 +363,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
+ goto release;
+
+ #if defined(CONFIG_NF_CONNTRACK_MARK)
+- seq_printf(s, "mark=%u ", ct->mark);
++ seq_printf(s, "mark=%u ", READ_ONCE(ct->mark));
+ #endif
+
+ ct_show_secctx(s, ct);
+ ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
+ ct_show_delta_time(s, ct);
+
+- seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
++ seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use));
+
+ if (seq_has_overflowed(s))
+ goto release;
+@@ -599,7 +599,6 @@ enum nf_ct_sysctl_index {
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
+- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED,
+ #endif
+ #ifdef CONFIG_NF_CT_PROTO_DCCP
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
+@@ -823,7 +822,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+-#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
+ .procname = "nf_flowtable_udp_timeout",
+ .maxlen = sizeof(unsigned int),
+@@ -892,12 +891,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = {
+- .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_jiffies,
+- },
+ #endif
+ #ifdef CONFIG_NF_CT_PROTO_DCCP
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
+@@ -1041,7 +1034,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
+ XASSIGN(SHUTDOWN_RECD, sn);
+ XASSIGN(SHUTDOWN_ACK_SENT, sn);
+ XASSIGN(HEARTBEAT_SENT, sn);
+- XASSIGN(HEARTBEAT_ACKED, sn);
+ #undef XASSIGN
+ #endif
+ }
+@@ -1237,11 +1229,12 @@ static int __init nf_conntrack_standalone_init(void)
+ nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+ #endif
+
++ nf_conntrack_init_end();
++
+ ret = register_pernet_subsys(&nf_conntrack_net_ops);
+ if (ret < 0)
+ goto out_pernet;
+
+- nf_conntrack_init_end();
+ return 0;
+
+ out_pernet:
+diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
+index 87a7388b6c894..4f61eb1282834 100644
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -48,7 +48,7 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
+ struct flow_offload *flow;
+
+ if (unlikely(nf_ct_is_dying(ct) ||
+- !atomic_inc_not_zero(&ct->ct_general.use)))
++ !refcount_inc_not_zero(&ct->ct_general.use)))
+ return NULL;
+
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+@@ -173,12 +173,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
+
+ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+ {
+- tcp->state = TCP_CONNTRACK_ESTABLISHED;
+ tcp->seen[0].td_maxwin = 0;
+ tcp->seen[1].td_maxwin = 0;
+ }
+
+-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
++static void flow_offload_fixup_ct(struct nf_conn *ct)
+ {
+ struct net *net = nf_ct_net(ct);
+ int l4num = nf_ct_protonum(ct);
+@@ -187,7 +186,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+- timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++ flow_offload_fixup_tcp(&ct->proto.tcp);
++
++ timeout = tn->timeouts[ct->proto.tcp.state];
+ timeout -= tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+@@ -201,20 +202,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+ if (timeout < 0)
+ timeout = 0;
+
+- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
+- ct->timeout = nfct_time_stamp + timeout;
+-}
+-
+-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+-{
+- if (nf_ct_protonum(ct) == IPPROTO_TCP)
+- flow_offload_fixup_tcp(&ct->proto.tcp);
+-}
+-
+-static void flow_offload_fixup_ct(struct nf_conn *ct)
+-{
+- flow_offload_fixup_ct_state(ct);
+- flow_offload_fixup_ct_timeout(ct);
++ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
++ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
+
+ static void flow_offload_route_release(struct flow_offload *flow)
+@@ -329,8 +318,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
+ u32 timeout;
+
+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+- if (READ_ONCE(flow->timeout) != timeout)
++ if (timeout - READ_ONCE(flow->timeout) > HZ)
+ WRITE_ONCE(flow->timeout, timeout);
++ else
++ return;
+
+ if (likely(!nf_flowtable_hw_offload(flow_table)))
+ return;
+@@ -353,22 +344,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ nf_flow_offload_rhash_params);
+-
+- clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+-
+- if (nf_flow_has_expired(flow))
+- flow_offload_fixup_ct(flow->ct);
+- else
+- flow_offload_fixup_ct_timeout(flow->ct);
+-
+ flow_offload_free(flow);
+ }
+
+ void flow_offload_teardown(struct flow_offload *flow)
+ {
++ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+-
+- flow_offload_fixup_ct_state(flow->ct);
++ flow_offload_fixup_ct(flow->ct);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
+@@ -399,7 +382,8 @@ EXPORT_SYMBOL_GPL(flow_offload_lookup);
+
+ static int
+ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+- void (*iter)(struct flow_offload *flow, void *data),
++ void (*iter)(struct nf_flowtable *flowtable,
++ struct flow_offload *flow, void *data),
+ void *data)
+ {
+ struct flow_offload_tuple_rhash *tuplehash;
+@@ -423,7 +407,7 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+- iter(flow, data);
++ iter(flow_table, flow, data);
+ }
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+@@ -431,34 +415,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ return err;
+ }
+
+-static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
+-{
+- struct dst_entry *dst;
+-
+- if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+- tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+- dst = tuple->dst_cache;
+- if (!dst_check(dst, tuple->dst_cookie))
+- return true;
+- }
+-
+- return false;
+-}
+-
+-static bool nf_flow_has_stale_dst(struct flow_offload *flow)
+-{
+- return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
+- flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
+-}
+-
+-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
++static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
++ struct flow_offload *flow, void *data)
+ {
+- struct nf_flowtable *flow_table = data;
+-
+ if (nf_flow_has_expired(flow) ||
+- nf_ct_is_dying(flow->ct) ||
+- nf_flow_has_stale_dst(flow))
+- set_bit(NF_FLOW_TEARDOWN, &flow->flags);
++ nf_ct_is_dying(flow->ct))
++ flow_offload_teardown(flow);
+
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
+@@ -474,12 +436,17 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
+ }
+ }
+
++void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
++{
++ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
++}
++
+ static void nf_flow_offload_work_gc(struct work_struct *work)
+ {
+ struct nf_flowtable *flow_table;
+
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
++ nf_flow_table_gc_run(flow_table);
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+ }
+
+@@ -595,7 +562,8 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_init);
+
+-static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
++static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
++ struct flow_offload *flow, void *data)
+ {
+ struct net_device *dev = data;
+
+@@ -636,12 +604,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
+ mutex_unlock(&flowtable_lock);
+
+ cancel_delayed_work_sync(&flow_table->gc_work);
+- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
+ nf_flow_table_offload_flush(flow_table);
+- if (nf_flowtable_hw_offload(flow_table))
+- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
+- flow_table);
++ /* ... no more pending work after this stage ... */
++ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
++ nf_flow_table_gc_run(flow_table);
++ nf_flow_table_offload_flush_cleanup(flow_table);
+ rhashtable_destroy(&flow_table->rhashtable);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_free);
+diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
+index bc4126d8ef65f..280fdd32965f6 100644
+--- a/net/netfilter/nf_flow_table_inet.c
++++ b/net/netfilter/nf_flow_table_inet.c
+@@ -6,12 +6,29 @@
+ #include <linux/rhashtable.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_tables.h>
++#include <linux/if_vlan.h>
+
+ static unsigned int
+ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
++ struct vlan_ethhdr *veth;
++ __be16 proto;
++
+ switch (skb->protocol) {
++ case htons(ETH_P_8021Q):
++ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
++ proto = veth->h_vlan_encapsulated_proto;
++ break;
++ case htons(ETH_P_PPP_SES):
++ proto = nf_flow_pppoe_proto(skb);
++ break;
++ default:
++ proto = skb->protocol;
++ break;
++ }
++
++ switch (proto) {
+ case htons(ETH_P_IP):
+ return nf_flow_offload_ip_hook(priv, skb, state);
+ case htons(ETH_P_IPV6):
+diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
+index 889cf88d3dba6..28026467b54cd 100644
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -8,8 +8,6 @@
+ #include <linux/ipv6.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_ether.h>
+-#include <linux/if_pppox.h>
+-#include <linux/ppp_defs.h>
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+@@ -229,6 +227,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+ return true;
+ }
+
++static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
++{
++ if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
++ tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
++ return true;
++
++ return dst_check(tuple->dst_cache, tuple->dst_cookie);
++}
++
+ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct dst_entry *dst)
+@@ -239,22 +246,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+ return NF_STOLEN;
+ }
+
+-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+-{
+- __be16 proto;
+-
+- proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+- sizeof(struct pppoe_hdr)));
+- switch (proto) {
+- case htons(PPP_IP):
+- return htons(ETH_P_IP);
+- case htons(PPP_IPV6):
+- return htons(ETH_P_IPV6);
+- }
+-
+- return 0;
+-}
+-
+ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+ u32 *offset)
+ {
+@@ -364,6 +355,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
+ return NF_ACCEPT;
+
++ if (!nf_flow_dst_check(&tuplehash->tuple)) {
++ flow_offload_teardown(flow);
++ return NF_ACCEPT;
++ }
++
+ if (skb_try_make_writable(skb, thoff + hdrsize))
+ return NF_DROP;
+
+@@ -600,6 +596,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
+ return NF_ACCEPT;
+
++ if (!nf_flow_dst_check(&tuplehash->tuple)) {
++ flow_offload_teardown(flow);
++ return NF_ACCEPT;
++ }
++
+ if (skb_try_make_writable(skb, thoff + hdrsize))
+ return NF_DROP;
+
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+index d6bf1b2cd541b..336f282a221fd 100644
+--- a/net/netfilter/nf_flow_table_offload.c
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -65,11 +65,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
+ sizeof(struct in6_addr));
+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
+ sizeof(struct in6_addr)))
+- memset(&key->enc_ipv6.src, 0xff,
++ memset(&mask->enc_ipv6.src, 0xff,
+ sizeof(struct in6_addr));
+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
+ sizeof(struct in6_addr)))
+- memset(&key->enc_ipv6.dst, 0xff,
++ memset(&mask->enc_ipv6.dst, 0xff,
+ sizeof(struct in6_addr));
+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+@@ -372,12 +372,12 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
+ const __be32 *addr, const __be32 *mask)
+ {
+ struct flow_action_entry *entry;
+- int i, j;
++ int i;
+
+- for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
++ for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
+ entry = flow_action_entry_next(flow_rule);
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
+- offset + i, &addr[j], mask);
++ offset + i * sizeof(u32), &addr[i], mask);
+ }
+ }
+
+@@ -1050,6 +1050,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+ flow_offload_queue_work(offload);
+ }
+
++void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
++{
++ if (nf_flowtable_hw_offload(flowtable)) {
++ flush_workqueue(nf_flow_offload_del_wq);
++ nf_flow_table_gc_run(flowtable);
++ }
++}
++
+ void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
+ {
+ if (nf_flowtable_hw_offload(flowtable)) {
+@@ -1066,6 +1074,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
+ struct flow_block_cb *block_cb, *next;
+ int err = 0;
+
++ down_write(&flowtable->flow_block_lock);
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
+@@ -1080,6 +1089,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
+ WARN_ON_ONCE(1);
+ err = -EOPNOTSUPP;
+ }
++ up_write(&flowtable->flow_block_lock);
+
+ return err;
+ }
+@@ -1136,7 +1146,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
+
+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
+ extack);
++ down_write(&flowtable->flow_block_lock);
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
++ up_write(&flowtable->flow_block_lock);
+ if (err < 0)
+ return err;
+
+diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
+index 13234641cdb34..7000e069bc076 100644
+--- a/net/netfilter/nf_log_syslog.c
++++ b/net/netfilter/nf_log_syslog.c
+@@ -61,7 +61,7 @@ dump_arp_packet(struct nf_log_buf *m,
+ unsigned int logflags;
+ struct arphdr _arph;
+
+- ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
++ ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph);
+ if (!ah) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+@@ -90,7 +90,7 @@ dump_arp_packet(struct nf_log_buf *m,
+ ah->ar_pln != sizeof(__be32))
+ return;
+
+- ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
++ ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp);
+ if (!ap) {
+ nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
+ skb->len - sizeof(_arph));
+@@ -144,7 +144,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+ prefix);
+- dump_arp_packet(m, loginfo, skb, 0);
++ dump_arp_packet(m, loginfo, skb, skb_network_offset(skb));
+
+ nf_log_buf_close(m);
+ }
+@@ -829,7 +829,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
+ if (in)
+ dump_ipv4_mac_header(m, loginfo, skb);
+
+- dump_ipv4_packet(net, m, loginfo, skb, 0);
++ dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb));
+
+ nf_log_buf_close(m);
+ }
+diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
+index 6d12afabfe8a3..63d1516816b1f 100644
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(void)
+ }
+ EXPORT_SYMBOL(nf_unregister_queue_handler);
+
++static void nf_queue_sock_put(struct sock *sk)
++{
++#ifdef CONFIG_INET
++ sock_gen_put(sk);
++#else
++ sock_put(sk);
++#endif
++}
++
+ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+ {
+ struct nf_hook_state *state = &entry->state;
+@@ -54,7 +63,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+ dev_put(state->in);
+ dev_put(state->out);
+ if (state->sk)
+- sock_put(state->sk);
++ nf_queue_sock_put(state->sk);
+
+ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ dev_put(entry->physin);
+@@ -87,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
+ }
+
+ /* Bump dev refs so they don't vanish while packet is out */
+-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
++bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+ {
+ struct nf_hook_state *state = &entry->state;
+
++ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
++ return false;
++
+ dev_hold(state->in);
+ dev_hold(state->out);
+- if (state->sk)
+- sock_hold(state->sk);
+
+ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ dev_hold(entry->physin);
+ dev_hold(entry->physout);
+ #endif
++ return true;
+ }
+ EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
+
+@@ -169,6 +180,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
+ break;
+ }
+
++ if (skb_sk_is_prefetched(skb)) {
++ struct sock *sk = skb->sk;
++
++ if (!sk_is_refcounted(sk)) {
++ if (!refcount_inc_not_zero(&sk->sk_refcnt))
++ return -ENOTCONN;
++
++ /* drop refcount on skb_orphan */
++ skb->destructor = sock_edemux;
++ }
++ }
++
+ entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
+ if (!entry)
+ return -ENOMEM;
+@@ -187,7 +210,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
+
+ __nf_queue_entry_init_physdevs(entry);
+
+- nf_queue_entry_get_refs(entry);
++ if (!nf_queue_entry_get_refs(entry)) {
++ kfree(entry);
++ return -ENOTCONN;
++ }
+
+ switch (entry->state.pf) {
+ case AF_INET:
+diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
+index 3d6d49420db8b..049a88f038011 100644
+--- a/net/netfilter/nf_synproxy_core.c
++++ b/net/netfilter/nf_synproxy_core.c
+@@ -349,7 +349,6 @@ static int __net_init synproxy_net_init(struct net *net)
+ goto err2;
+
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+- nf_conntrack_get(&ct->ct_general);
+ snet->tmpl = ct;
+
+ snet->stats = alloc_percpu(struct synproxy_stats);
+@@ -428,7 +427,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+- iph->ttl = net->ipv4.sysctl_ip_default_ttl;
++ iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index c0851fec11d46..d84da11aaee5c 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects);
+ static LIST_HEAD(nf_tables_flowtables);
+ static LIST_HEAD(nf_tables_destroy_list);
+ static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+-static u64 table_handle;
+
+ enum {
+ NFT_VALIDATE_SKIP = 0,
+@@ -153,6 +152,8 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
+ if (trans == NULL)
+ return NULL;
+
++ INIT_LIST_HEAD(&trans->list);
++ INIT_LIST_HEAD(&trans->binding_list);
+ trans->msg_type = msg_type;
+ trans->ctx = *ctx;
+
+@@ -165,13 +166,20 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
+ return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
+ }
+
+-static void nft_trans_destroy(struct nft_trans *trans)
++static void nft_trans_list_del(struct nft_trans *trans)
+ {
+ list_del(&trans->list);
++ list_del(&trans->binding_list);
++}
++
++static void nft_trans_destroy(struct nft_trans *trans)
++{
++ nft_trans_list_del(trans);
+ kfree(trans);
+ }
+
+-static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
++static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set,
++ bool bind)
+ {
+ struct nftables_pernet *nft_net;
+ struct net *net = ctx->net;
+@@ -185,16 +193,82 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (nft_trans_set(trans) == set)
+- nft_trans_set_bound(trans) = true;
++ nft_trans_set_bound(trans) = bind;
+ break;
+ case NFT_MSG_NEWSETELEM:
+ if (nft_trans_elem_set(trans) == set)
+- nft_trans_elem_set_bound(trans) = true;
++ nft_trans_elem_set_bound(trans) = bind;
++ break;
++ }
++ }
++}
++
++static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
++{
++ return __nft_set_trans_bind(ctx, set, true);
++}
++
++static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set)
++{
++ return __nft_set_trans_bind(ctx, set, false);
++}
++
++static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
++ struct nft_chain *chain, bool bind)
++{
++ struct nftables_pernet *nft_net;
++ struct net *net = ctx->net;
++ struct nft_trans *trans;
++
++ if (!nft_chain_binding(chain))
++ return;
++
++ nft_net = nft_pernet(net);
++ list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
++ switch (trans->msg_type) {
++ case NFT_MSG_NEWCHAIN:
++ if (nft_trans_chain(trans) == chain)
++ nft_trans_chain_bound(trans) = bind;
++ break;
++ case NFT_MSG_NEWRULE:
++ if (trans->ctx.chain == chain)
++ nft_trans_rule_bound(trans) = bind;
+ break;
+ }
+ }
+ }
+
++static void nft_chain_trans_bind(const struct nft_ctx *ctx,
++ struct nft_chain *chain)
++{
++ __nft_chain_trans_bind(ctx, chain, true);
++}
++
++int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
++{
++ if (!nft_chain_binding(chain))
++ return 0;
++
++ if (nft_chain_binding(ctx->chain))
++ return -EOPNOTSUPP;
++
++ if (chain->bound)
++ return -EBUSY;
++
++ if (!nft_use_inc(&chain->use))
++ return -EMFILE;
++
++ chain->bound = true;
++ nft_chain_trans_bind(ctx, chain);
++
++ return 0;
++}
++
++void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
++{
++ __nft_chain_trans_bind(ctx, chain, false);
++}
++
+ static int nft_netdev_register_hooks(struct net *net,
+ struct list_head *hook_list)
+ {
+@@ -222,12 +296,18 @@ err_register:
+ }
+
+ static void nft_netdev_unregister_hooks(struct net *net,
+- struct list_head *hook_list)
++ struct list_head *hook_list,
++ bool release_netdev)
+ {
+- struct nft_hook *hook;
++ struct nft_hook *hook, *next;
+
+- list_for_each_entry(hook, hook_list, list)
++ list_for_each_entry_safe(hook, next, hook_list, list) {
+ nf_unregister_net_hook(net, &hook->ops);
++ if (release_netdev) {
++ list_del(&hook->list);
++ kfree_rcu(hook, rcu);
++ }
++ }
+ }
+
+ static int nf_tables_register_hook(struct net *net,
+@@ -253,9 +333,10 @@ static int nf_tables_register_hook(struct net *net,
+ return nf_register_net_hook(net, &basechain->ops);
+ }
+
+-static void nf_tables_unregister_hook(struct net *net,
+- const struct nft_table *table,
+- struct nft_chain *chain)
++static void __nf_tables_unregister_hook(struct net *net,
++ const struct nft_table *table,
++ struct nft_chain *chain,
++ bool release_netdev)
+ {
+ struct nft_base_chain *basechain;
+ const struct nf_hook_ops *ops;
+@@ -270,15 +351,36 @@ static void nf_tables_unregister_hook(struct net *net,
+ return basechain->type->ops_unregister(net, ops);
+
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+- nft_netdev_unregister_hooks(net, &basechain->hook_list);
++ nft_netdev_unregister_hooks(net, &basechain->hook_list,
++ release_netdev);
+ else
+ nf_unregister_net_hook(net, &basechain->ops);
+ }
+
++static void nf_tables_unregister_hook(struct net *net,
++ const struct nft_table *table,
++ struct nft_chain *chain)
++{
++ return __nf_tables_unregister_hook(net, table, chain, false);
++}
++
+ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
++ switch (trans->msg_type) {
++ case NFT_MSG_NEWSET:
++ if (!nft_trans_set_update(trans) &&
++ nft_set_is_anonymous(nft_trans_set(trans)))
++ list_add_tail(&trans->binding_list, &nft_net->binding_list);
++ break;
++ case NFT_MSG_NEWCHAIN:
++ if (!nft_trans_chain_update(trans) &&
++ nft_chain_binding(nft_trans_chain(trans)))
++ list_add_tail(&trans->binding_list, &nft_net->binding_list);
++ break;
++ }
++
+ list_add_tail(&trans->list, &nft_net->commit_list);
+ }
+
+@@ -325,8 +427,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+ ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
+ }
+ }
+-
++ nft_trans_chain(trans) = ctx->chain;
+ nft_trans_commit_list_add_tail(ctx->net, trans);
++
+ return trans;
+ }
+
+@@ -338,14 +441,13 @@ static int nft_delchain(struct nft_ctx *ctx)
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+- ctx->table->use--;
++ nft_use_dec(&ctx->table->use);
+ nft_deactivate_next(ctx->net, ctx->chain);
+
+ return 0;
+ }
+
+-static void nft_rule_expr_activate(const struct nft_ctx *ctx,
+- struct nft_rule *rule)
++void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule)
+ {
+ struct nft_expr *expr;
+
+@@ -358,9 +460,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
+ }
+ }
+
+-static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
+- struct nft_rule *rule,
+- enum nft_trans_phase phase)
++void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
++ enum nft_trans_phase phase)
+ {
+ struct nft_expr *expr;
+
+@@ -379,7 +480,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
+ /* You cannot delete the same rule twice */
+ if (nft_is_active_next(ctx->net, rule)) {
+ nft_deactivate_next(ctx->net, rule);
+- ctx->chain->use--;
++ nft_use_dec(&ctx->chain->use);
+ return 0;
+ }
+ return -ENOENT;
+@@ -450,8 +551,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
+ return 0;
+ }
+
+-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+- struct nft_set *set)
++static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
++ struct nft_set *set,
++ const struct nft_set_desc *desc)
+ {
+ struct nft_trans *trans;
+
+@@ -459,17 +561,80 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ if (trans == NULL)
+ return -ENOMEM;
+
+- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
++ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
+ nft_trans_set_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+ nft_activate_next(ctx->net, set);
+ }
+ nft_trans_set(trans) = set;
++ if (desc) {
++ nft_trans_set_update(trans) = true;
++ nft_trans_set_gc_int(trans) = desc->gc_int;
++ nft_trans_set_timeout(trans) = desc->timeout;
++ }
+ nft_trans_commit_list_add_tail(ctx->net, trans);
+
+ return 0;
+ }
+
++static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
++ struct nft_set *set)
++{
++ return __nft_trans_set_add(ctx, msg_type, set, NULL);
++}
++
++static void nft_setelem_data_deactivate(const struct net *net,
++ const struct nft_set *set,
++ struct nft_set_elem *elem);
++
++static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
++ struct nft_set *set,
++ const struct nft_set_iter *iter,
++ struct nft_set_elem *elem)
++{
++ nft_setelem_data_deactivate(ctx->net, set, elem);
++
++ return 0;
++}
++
++struct nft_set_elem_catchall {
++ struct list_head list;
++ struct rcu_head rcu;
++ void *elem;
++};
++
++static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
++ struct nft_set *set)
++{
++ u8 genmask = nft_genmask_next(ctx->net);
++ struct nft_set_elem_catchall *catchall;
++ struct nft_set_elem elem;
++ struct nft_set_ext *ext;
++
++ list_for_each_entry(catchall, &set->catchall_list, list) {
++ ext = nft_set_elem_ext(set, catchall->elem);
++ if (!nft_set_elem_active(ext, genmask))
++ continue;
++
++ elem.priv = catchall->elem;
++ nft_setelem_data_deactivate(ctx->net, set, &elem);
++ break;
++ }
++}
++
++static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
++{
++ struct nft_set_iter iter = {
++ .genmask = nft_genmask_next(ctx->net),
++ .fn = nft_mapelem_deactivate,
++ };
++
++ set->ops->walk(ctx, set, &iter);
++ WARN_ON_ONCE(iter.err);
++
++ nft_map_catchall_deactivate(ctx, set);
++}
++
+ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
+ {
+ int err;
+@@ -478,8 +643,11 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
+ if (err < 0)
+ return err;
+
++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++ nft_map_deactivate(ctx, set);
++
+ nft_deactivate_next(ctx->net, set);
+- ctx->table->use--;
++ nft_use_dec(&ctx->table->use);
+
+ return err;
+ }
+@@ -511,7 +679,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
+ return err;
+
+ nft_deactivate_next(ctx->net, obj);
+- ctx->table->use--;
++ nft_use_dec(&ctx->table->use);
+
+ return err;
+ }
+@@ -529,6 +697,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ if (msg_type == NFT_MSG_NEWFLOWTABLE)
+ nft_activate_next(ctx->net, flowtable);
+
++ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ nft_trans_flowtable(trans) = flowtable;
+ nft_trans_commit_list_add_tail(ctx->net, trans);
+
+@@ -545,7 +714,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
+ return err;
+
+ nft_deactivate_next(ctx->net, flowtable);
+- ctx->table->use--;
++ nft_use_dec(&ctx->table->use);
+
+ return err;
+ }
+@@ -820,7 +989,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = nft_net->base_seq;
++ cb->seq = READ_ONCE(nft_net->base_seq);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -1139,7 +1308,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
+ INIT_LIST_HEAD(&table->flowtables);
+ table->family = family;
+ table->flags = flags;
+- table->handle = ++table_handle;
++ table->handle = ++nft_net->table_handle;
+ if (table->flags & NFT_TABLE_F_OWNER)
+ table->nlpid = NETLINK_CB(skb).portid;
+
+@@ -1609,7 +1778,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = nft_net->base_seq;
++ cb->seq = READ_ONCE(nft_net->base_seq);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -1820,7 +1989,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
+ goto err_hook_dev;
+ }
+ hook->ops.dev = dev;
+- hook->inactive = false;
+
+ return hook;
+
+@@ -2057,15 +2225,17 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+ chain->flags |= NFT_CHAIN_BASE | flags;
+ basechain->policy = NF_ACCEPT;
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+- nft_chain_offload_priority(basechain) < 0)
++ !nft_chain_offload_support(basechain)) {
++ list_splice_init(&basechain->hook_list, &hook->list);
+ return -EOPNOTSUPP;
++ }
+
+ flow_block_init(&basechain->flow_block);
+
+ return 0;
+ }
+
+-static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
++int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
+ {
+ int err;
+
+@@ -2088,7 +2258,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_base_chain *basechain;
+- struct nft_stats __percpu *stats;
+ struct net *net = ctx->net;
+ char name[NFT_NAME_MAXLEN];
+ struct nft_trans *trans;
+@@ -2096,10 +2265,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ struct nft_rule **rules;
+ int err;
+
+- if (table->use == UINT_MAX)
+- return -EOVERFLOW;
+-
+ if (nla[NFTA_CHAIN_HOOK]) {
++ struct nft_stats __percpu *stats = NULL;
+ struct nft_chain_hook hook;
+
+ if (flags & NFT_CHAIN_BINDING)
+@@ -2125,15 +2292,17 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ return PTR_ERR(stats);
+ }
+ rcu_assign_pointer(basechain->stats, stats);
+- static_branch_inc(&nft_counters_enabled);
+ }
+
+ err = nft_basechain_init(basechain, family, &hook, flags);
+ if (err < 0) {
+ nft_chain_release_hook(&hook);
+ kfree(basechain);
++ free_percpu(stats);
+ return err;
+ }
++ if (stats)
++ static_branch_inc(&nft_counters_enabled);
+ } else {
+ if (flags & NFT_CHAIN_BASE)
+ return -EINVAL;
+@@ -2192,6 +2361,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ if (err < 0)
+ goto err_destroy_chain;
+
++ if (!nft_use_inc(&table->use)) {
++ err = -EMFILE;
++ goto err_use;
++ }
++
+ trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+@@ -2208,10 +2382,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ goto err_unregister_hook;
+ }
+
+- table->use++;
+-
+ return 0;
++
+ err_unregister_hook:
++ nft_use_dec_restore(&table->use);
++err_use:
+ nf_tables_unregister_hook(net, table, chain);
+ err_destroy_chain:
+ nf_tables_chain_destroy(ctx);
+@@ -2362,7 +2537,8 @@ err:
+ }
+
+ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
+- const struct nlattr *nla)
++ const struct nft_table *table,
++ const struct nlattr *nla, u8 genmask)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ u32 id = ntohl(nla_get_be32(nla));
+@@ -2372,7 +2548,9 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
+ struct nft_chain *chain = trans->ctx.chain;
+
+ if (trans->msg_type == NFT_MSG_NEWCHAIN &&
+- id == nft_trans_chain_id(trans))
++ chain->table == table &&
++ id == nft_trans_chain_id(trans) &&
++ nft_active_genmask(chain, genmask))
+ return chain;
+ }
+ return ERR_PTR(-ENOENT);
+@@ -2461,6 +2639,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
+
+ if (chain != NULL) {
++ if (chain->flags & NFT_CHAIN_BINDING)
++ return -EINVAL;
++
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+ NL_SET_BAD_ATTR(extack, attr);
+ return -EEXIST;
+@@ -2778,27 +2959,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
+
+ err = nf_tables_expr_parse(ctx, nla, &expr_info);
+ if (err < 0)
+- goto err1;
++ goto err_expr_parse;
++
++ err = -EOPNOTSUPP;
++ if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL))
++ goto err_expr_stateful;
+
+ err = -ENOMEM;
+ expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
+ if (expr == NULL)
+- goto err2;
++ goto err_expr_stateful;
+
+ err = nf_tables_newexpr(ctx, &expr_info, expr);
+ if (err < 0)
+- goto err3;
++ goto err_expr_new;
+
+ return expr;
+-err3:
++err_expr_new:
+ kfree(expr);
+-err2:
++err_expr_stateful:
+ owner = expr_info.ops->type->owner;
+ if (expr_info.ops->type->release_ops)
+ expr_info.ops->type->release_ops(expr_info.ops);
+
+ module_put(owner);
+-err1:
++err_expr_parse:
+ return ERR_PTR(err);
+ }
+
+@@ -3032,7 +3217,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = nft_net->base_seq;
++ cb->seq = READ_ONCE(nft_net->base_seq);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -3180,8 +3365,7 @@ err_fill_rule_info:
+ return err;
+ }
+
+-static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
+- struct nft_rule *rule)
++void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
+ {
+ struct nft_expr *expr, *next;
+
+@@ -3198,7 +3382,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
+ kfree(rule);
+ }
+
+-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
++static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
+ {
+ nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
+ nf_tables_rule_destroy(ctx, rule);
+@@ -3254,7 +3438,60 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
+ return 0;
+ }
+
++int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
++ const struct nft_set_iter *iter,
++ struct nft_set_elem *elem)
++{
++ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
++ struct nft_ctx *pctx = (struct nft_ctx *)ctx;
++ const struct nft_data *data;
++ int err;
++
++ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
++ *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
++ return 0;
++
++ data = nft_set_ext_data(ext);
++ switch (data->verdict.code) {
++ case NFT_JUMP:
++ case NFT_GOTO:
++ pctx->level++;
++ err = nft_chain_validate(ctx, data->verdict.chain);
++ if (err < 0)
++ return err;
++ pctx->level--;
++ break;
++ default:
++ break;
++ }
++
++ return 0;
++}
++
++int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
++{
++ u8 genmask = nft_genmask_next(ctx->net);
++ struct nft_set_elem_catchall *catchall;
++ struct nft_set_elem elem;
++ struct nft_set_ext *ext;
++ int ret = 0;
++
++ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
++ ext = nft_set_elem_ext(set, catchall->elem);
++ if (!nft_set_elem_active(ext, genmask))
++ continue;
++
++ elem.priv = catchall->elem;
++ ret = nft_setelem_validate(ctx, set, NULL, &elem);
++ if (ret < 0)
++ return ret;
++ }
++
++ return ret;
++}
++
+ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
++ const struct nft_chain *chain,
+ const struct nlattr *nla);
+
+ #define NFT_RULE_MAXEXPRS 128
+@@ -3297,11 +3534,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
+ return PTR_ERR(chain);
+ }
+- if (nft_chain_is_bound(chain))
+- return -EOPNOTSUPP;
+
+ } else if (nla[NFTA_RULE_CHAIN_ID]) {
+- chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]);
++ chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
++ genmask);
+ if (IS_ERR(chain)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
+ return PTR_ERR(chain);
+@@ -3310,6 +3546,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ return -EINVAL;
+ }
+
++ if (nft_chain_is_bound(chain))
++ return -EOPNOTSUPP;
++
+ if (nla[NFTA_RULE_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+ rule = __nft_rule_lookup(chain, handle);
+@@ -3332,9 +3571,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ return -EINVAL;
+ handle = nf_tables_alloc_handle(table);
+
+- if (chain->use == UINT_MAX)
+- return -EOVERFLOW;
+-
+ if (nla[NFTA_RULE_POSITION]) {
+ pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+ old_rule = __nft_rule_lookup(chain, pos_handle);
+@@ -3343,7 +3579,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ return PTR_ERR(old_rule);
+ }
+ } else if (nla[NFTA_RULE_POSITION_ID]) {
+- old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
++ old_rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_POSITION_ID]);
+ if (IS_ERR(old_rule)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
+ return PTR_ERR(old_rule);
+@@ -3428,6 +3664,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ }
+ }
+
++ if (!nft_use_inc(&chain->use)) {
++ err = -EMFILE;
++ goto err_release_rule;
++ }
++
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+ err = nft_delrule(&ctx, old_rule);
+ if (err < 0)
+@@ -3459,7 +3700,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ }
+ }
+ kvfree(expr_info);
+- chain->use++;
+
+ if (flow)
+ nft_trans_flow_rule(trans) = flow;
+@@ -3470,10 +3710,12 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ return 0;
+
+ err_destroy_flow_rule:
++ nft_use_dec_restore(&chain->use);
+ if (flow)
+ nft_flow_rule_destroy(flow);
+ err_release_rule:
+- nf_tables_rule_release(&ctx, rule);
++ nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR);
++ nf_tables_rule_destroy(&ctx, rule);
+ err_release_expr:
+ for (i = 0; i < n; i++) {
+ if (expr_info[i].ops) {
+@@ -3488,6 +3730,7 @@ err_release_expr:
+ }
+
+ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
++ const struct nft_chain *chain,
+ const struct nlattr *nla)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+@@ -3495,11 +3738,10 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ struct nft_trans *trans;
+
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
+- struct nft_rule *rule = nft_trans_rule(trans);
+-
+ if (trans->msg_type == NFT_MSG_NEWRULE &&
++ trans->ctx.chain == chain &&
+ id == nft_trans_rule_id(trans))
+- return rule;
++ return nft_trans_rule(trans);
+ }
+ return ERR_PTR(-ENOENT);
+ }
+@@ -3547,7 +3789,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
+
+ err = nft_delrule(&ctx, rule);
+ } else if (nla[NFTA_RULE_ID]) {
+- rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
++ rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]);
+ if (IS_ERR(rule)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
+ return PTR_ERR(rule);
+@@ -3561,6 +3803,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
++ if (nft_chain_is_bound(chain))
++ continue;
+
+ ctx.chain = chain;
+ err = nft_delrule_by_chain(&ctx);
+@@ -3604,8 +3848,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
+ static const struct nft_set_ops *
+ nft_select_set_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const nla[],
+- const struct nft_set_desc *desc,
+- enum nft_set_policies policy)
++ const struct nft_set_desc *desc)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ const struct nft_set_ops *ops, *bops;
+@@ -3634,7 +3877,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
+ if (!ops->estimate(desc, flags, &est))
+ continue;
+
+- switch (policy) {
++ switch (desc->policy) {
+ case NFT_SET_POL_PERFORMANCE:
+ if (est.lookup < best.lookup)
+ break;
+@@ -3726,6 +3969,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
+ }
+
+ static struct nft_set *nft_set_lookup_byid(const struct net *net,
++ const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+@@ -3737,6 +3981,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
+ struct nft_set *set = nft_trans_set(trans);
+
+ if (id == nft_trans_set_id(trans) &&
++ set->table == table &&
+ nft_active_genmask(set, genmask))
+ return set;
+ }
+@@ -3757,7 +4002,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
+ if (!nla_set_id)
+ return set;
+
+- set = nft_set_lookup_byid(net, nla_set_id, genmask);
++ set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
+ }
+ return set;
+ }
+@@ -3783,7 +4028,7 @@ cont:
+ list_for_each_entry(i, &ctx->table->sets, list) {
+ int tmp;
+
+- if (!nft_is_active_next(ctx->net, set))
++ if (!nft_is_active_next(ctx->net, i))
+ continue;
+ if (!sscanf(i->name, name, &tmp))
+ continue;
+@@ -3867,8 +4112,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
+ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+ const struct nft_set *set, u16 event, u16 flags)
+ {
+- struct nlmsghdr *nlh;
++ u64 timeout = READ_ONCE(set->timeout);
++ u32 gc_int = READ_ONCE(set->gc_int);
+ u32 portid = ctx->portid;
++ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+ u32 seq = ctx->seq;
+ int i;
+@@ -3904,13 +4151,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+ nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
+ goto nla_put_failure;
+
+- if (set->timeout &&
++ if (timeout &&
+ nla_put_be64(skb, NFTA_SET_TIMEOUT,
+- nf_jiffies64_to_msecs(set->timeout),
++ nf_jiffies64_to_msecs(timeout),
+ NFTA_SET_PAD))
+ goto nla_put_failure;
+- if (set->gc_int &&
+- nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
++ if (gc_int &&
++ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
+ goto nla_put_failure;
+
+ if (set->policy != NFT_SET_POL_PERFORMANCE) {
+@@ -4009,7 +4256,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = nft_net->base_seq;
++ cb->seq = READ_ONCE(nft_net->base_seq);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (ctx->family != NFPROTO_UNSPEC &&
+@@ -4147,6 +4394,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
+ u32 len;
+ int err;
+
++ if (desc->field_count >= ARRAY_SIZE(desc->field_len))
++ return -E2BIG;
++
+ err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr,
+ nft_concat_policy, NULL);
+ if (err < 0)
+@@ -4156,9 +4406,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
+ return -EINVAL;
+
+ len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN]));
+-
+- if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT)
+- return -E2BIG;
++ if (!len || len > U8_MAX)
++ return -EINVAL;
+
+ desc->field_len[desc->field_count++] = len;
+
+@@ -4169,7 +4418,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
+ const struct nlattr *nla)
+ {
+ struct nlattr *attr;
+- int rem, err;
++ u32 num_regs = 0;
++ int rem, err, i;
+
+ nla_for_each_nested(attr, nla, rem) {
+ if (nla_type(attr) != NFTA_LIST_ELEM)
+@@ -4180,6 +4430,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
+ return err;
+ }
+
++ for (i = 0; i < desc->field_count; i++)
++ num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
++
++ if (num_regs > NFT_REG32_COUNT)
++ return -E2BIG;
++
+ return 0;
+ }
+
+@@ -4202,15 +4458,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
+ return err;
+ }
+
++static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
++ const struct nlattr * const *nla,
++ struct nft_expr **exprs, int *num_exprs,
++ u32 flags)
++{
++ struct nft_expr *expr;
++ int err, i;
++
++ if (nla[NFTA_SET_EXPR]) {
++ expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
++ if (IS_ERR(expr)) {
++ err = PTR_ERR(expr);
++ goto err_set_expr_alloc;
++ }
++ exprs[0] = expr;
++ (*num_exprs)++;
++ } else if (nla[NFTA_SET_EXPRESSIONS]) {
++ struct nlattr *tmp;
++ int left;
++
++ if (!(flags & NFT_SET_EXPR)) {
++ err = -EINVAL;
++ goto err_set_expr_alloc;
++ }
++ i = 0;
++ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
++ if (i == NFT_SET_EXPR_MAX) {
++ err = -E2BIG;
++ goto err_set_expr_alloc;
++ }
++ if (nla_type(tmp) != NFTA_LIST_ELEM) {
++ err = -EINVAL;
++ goto err_set_expr_alloc;
++ }
++ expr = nft_set_elem_expr_alloc(ctx, set, tmp);
++ if (IS_ERR(expr)) {
++ err = PTR_ERR(expr);
++ goto err_set_expr_alloc;
++ }
++ exprs[i++] = expr;
++ (*num_exprs)++;
++ }
++ }
++
++ return 0;
++
++err_set_expr_alloc:
++ for (i = 0; i < *num_exprs; i++)
++ nft_expr_destroy(ctx, exprs[i]);
++
++ return err;
++}
++
++static bool nft_set_is_same(const struct nft_set *set,
++ const struct nft_set_desc *desc,
++ struct nft_expr *exprs[], u32 num_exprs, u32 flags)
++{
++ int i;
++
++ if (set->ktype != desc->ktype ||
++ set->dtype != desc->dtype ||
++ set->flags != flags ||
++ set->klen != desc->klen ||
++ set->dlen != desc->dlen ||
++ set->field_count != desc->field_count ||
++ set->num_exprs != num_exprs)
++ return false;
++
++ for (i = 0; i < desc->field_count; i++) {
++ if (set->field_len[i] != desc->field_len[i])
++ return false;
++ }
++
++ for (i = 0; i < num_exprs; i++) {
++ if (set->exprs[i]->ops != exprs[i]->ops)
++ return false;
++ }
++
++ return true;
++}
++
+ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+ {
+- u32 ktype, dtype, flags, policy, gc_int, objtype;
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
+ u8 family = info->nfmsg->nfgen_family;
+ const struct nft_set_ops *ops;
+- struct nft_expr *expr = NULL;
+ struct net *net = info->net;
+ struct nft_set_desc desc;
+ struct nft_table *table;
+@@ -4218,10 +4553,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ size_t alloc_size;
+- u64 timeout;
++ int num_exprs = 0;
+ char *name;
+ int err, i;
+ u16 udlen;
++ u32 flags;
+ u64 size;
+
+ if (nla[NFTA_SET_TABLE] == NULL ||
+@@ -4232,10 +4568,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+
+ memset(&desc, 0, sizeof(desc));
+
+- ktype = NFT_DATA_VALUE;
++ desc.ktype = NFT_DATA_VALUE;
+ if (nla[NFTA_SET_KEY_TYPE] != NULL) {
+- ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+- if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
++ desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
++ if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+ return -EINVAL;
+ }
+
+@@ -4260,17 +4596,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ return -EOPNOTSUPP;
+ }
+
+- dtype = 0;
++ desc.dtype = 0;
+ if (nla[NFTA_SET_DATA_TYPE] != NULL) {
+ if (!(flags & NFT_SET_MAP))
+ return -EINVAL;
+
+- dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+- if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+- dtype != NFT_DATA_VERDICT)
++ desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
++ if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
++ desc.dtype != NFT_DATA_VERDICT)
+ return -EINVAL;
+
+- if (dtype != NFT_DATA_VERDICT) {
++ if (desc.dtype != NFT_DATA_VERDICT) {
+ if (nla[NFTA_SET_DATA_LEN] == NULL)
+ return -EINVAL;
+ desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+@@ -4285,39 +4621,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ if (!(flags & NFT_SET_OBJECT))
+ return -EINVAL;
+
+- objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+- if (objtype == NFT_OBJECT_UNSPEC ||
+- objtype > NFT_OBJECT_MAX)
++ desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
++ if (desc.objtype == NFT_OBJECT_UNSPEC ||
++ desc.objtype > NFT_OBJECT_MAX)
+ return -EOPNOTSUPP;
+ } else if (flags & NFT_SET_OBJECT)
+ return -EINVAL;
+ else
+- objtype = NFT_OBJECT_UNSPEC;
++ desc.objtype = NFT_OBJECT_UNSPEC;
+
+- timeout = 0;
++ desc.timeout = 0;
+ if (nla[NFTA_SET_TIMEOUT] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+
+- err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
++ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
+ if (err)
+ return err;
+ }
+- gc_int = 0;
++ desc.gc_int = 0;
+ if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+- gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
++ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ }
+
+- policy = NFT_SET_POL_PERFORMANCE;
++ desc.policy = NFT_SET_POL_PERFORMANCE;
+ if (nla[NFTA_SET_POLICY] != NULL)
+- policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
++ desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+
+ if (nla[NFTA_SET_DESC] != NULL) {
+ err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
+ if (err < 0)
+ return err;
++
++ if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT))
++ return -EINVAL;
++ } else if (flags & NFT_SET_CONCAT) {
++ return -EINVAL;
+ }
+
+ if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
+@@ -4339,6 +4680,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ return PTR_ERR(set);
+ }
+ } else {
++ struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
++
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+ return -EEXIST;
+@@ -4346,13 +4689,32 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+- return 0;
++ if (nft_set_is_anonymous(set))
++ return -EOPNOTSUPP;
++
++ err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
++ if (err < 0)
++ return err;
++
++ err = 0;
++ if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
++ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
++ err = -EEXIST;
++ }
++
++ for (i = 0; i < num_exprs; i++)
++ nft_expr_destroy(&ctx, exprs[i]);
++
++ if (err < 0)
++ return err;
++
++ return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
+ }
+
+ if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+ return -ENOENT;
+
+- ops = nft_select_set_ops(&ctx, nla, &desc, policy);
++ ops = nft_select_set_ops(&ctx, nla, &desc);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+@@ -4366,9 +4728,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ alloc_size = sizeof(*set) + size + udlen;
+ if (alloc_size < size || alloc_size > INT_MAX)
+ return -ENOMEM;
++
++ if (!nft_use_inc(&table->use))
++ return -EMFILE;
++
+ set = kvzalloc(alloc_size, GFP_KERNEL);
+- if (!set)
+- return -ENOMEM;
++ if (!set) {
++ err = -ENOMEM;
++ goto err_alloc;
++ }
+
+ name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+ if (!name) {
+@@ -4392,18 +4760,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ set->table = table;
+ write_pnet(&set->net, net);
+ set->ops = ops;
+- set->ktype = ktype;
++ set->ktype = desc.ktype;
+ set->klen = desc.klen;
+- set->dtype = dtype;
+- set->objtype = objtype;
++ set->dtype = desc.dtype;
++ set->objtype = desc.objtype;
+ set->dlen = desc.dlen;
+ set->flags = flags;
+ set->size = desc.size;
+- set->policy = policy;
++ set->policy = desc.policy;
+ set->udlen = udlen;
+ set->udata = udata;
+- set->timeout = timeout;
+- set->gc_int = gc_int;
++ set->timeout = desc.timeout;
++ set->gc_int = desc.gc_int;
+
+ set->field_count = desc.field_count;
+ for (i = 0; i < desc.field_count; i++)
+@@ -4413,80 +4781,46 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ if (err < 0)
+ goto err_set_init;
+
+- if (nla[NFTA_SET_EXPR]) {
+- expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
+- if (IS_ERR(expr)) {
+- err = PTR_ERR(expr);
+- goto err_set_expr_alloc;
+- }
+- set->exprs[0] = expr;
+- set->num_exprs++;
+- } else if (nla[NFTA_SET_EXPRESSIONS]) {
+- struct nft_expr *expr;
+- struct nlattr *tmp;
+- int left;
+-
+- if (!(flags & NFT_SET_EXPR)) {
+- err = -EINVAL;
+- goto err_set_expr_alloc;
+- }
+- i = 0;
+- nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+- if (i == NFT_SET_EXPR_MAX) {
+- err = -E2BIG;
+- goto err_set_expr_alloc;
+- }
+- if (nla_type(tmp) != NFTA_LIST_ELEM) {
+- err = -EINVAL;
+- goto err_set_expr_alloc;
+- }
+- expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
+- if (IS_ERR(expr)) {
+- err = PTR_ERR(expr);
+- goto err_set_expr_alloc;
+- }
+- set->exprs[i++] = expr;
+- set->num_exprs++;
+- }
+- }
++ err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
++ if (err < 0)
++ goto err_set_destroy;
+
++ set->num_exprs = num_exprs;
+ set->handle = nf_tables_alloc_handle(table);
++ INIT_LIST_HEAD(&set->pending_update);
+
+ err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
+ if (err < 0)
+ goto err_set_expr_alloc;
+
+ list_add_tail_rcu(&set->list, &table->sets);
+- table->use++;
++
+ return 0;
+
+ err_set_expr_alloc:
+ for (i = 0; i < set->num_exprs; i++)
+ nft_expr_destroy(&ctx, set->exprs[i]);
+-
+- ops->destroy(set);
++err_set_destroy:
++ ops->destroy(&ctx, set);
+ err_set_init:
+ kfree(set->name);
+ err_set_name:
+ kvfree(set);
++err_alloc:
++ nft_use_dec_restore(&table->use);
++
+ return err;
+ }
+
+-struct nft_set_elem_catchall {
+- struct list_head list;
+- struct rcu_head rcu;
+- void *elem;
+-};
+-
+ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
+ struct nft_set *set)
+ {
+- struct nft_set_elem_catchall *catchall;
++ struct nft_set_elem_catchall *next, *catchall;
+
+- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
++ list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+ list_del_rcu(&catchall->list);
+- nft_set_elem_destroy(set, catchall->elem, true);
+- kfree_rcu(catchall);
++ nf_tables_set_elem_destroy(ctx, set, catchall->elem);
++ kfree_rcu(catchall, rcu);
+ }
+ }
+
+@@ -4500,7 +4834,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+ for (i = 0; i < set->num_exprs; i++)
+ nft_expr_destroy(ctx, set->exprs[i]);
+
+- set->ops->destroy(set);
++ set->ops->destroy(ctx, set);
+ nft_set_catchall_destroy(ctx, set);
+ kfree(set->name);
+ kvfree(set);
+@@ -4609,9 +4943,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *i;
+ struct nft_set_iter iter;
+
+- if (set->use == UINT_MAX)
+- return -EOVERFLOW;
+-
+ if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
+ return -EBUSY;
+
+@@ -4639,10 +4970,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ return iter.err;
+ }
+ bind:
++ if (!nft_use_inc(&set->use))
++ return -EMFILE;
++
+ binding->chain = ctx->chain;
+ list_add_tail_rcu(&binding->list, &set->bindings);
+ nft_set_trans_bind(ctx, set);
+- set->use++;
+
+ return 0;
+ }
+@@ -4661,17 +4994,95 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ }
+ }
+
++static void nft_setelem_data_activate(const struct net *net,
++ const struct nft_set *set,
++ struct nft_set_elem *elem);
++
++static int nft_mapelem_activate(const struct nft_ctx *ctx,
++ struct nft_set *set,
++ const struct nft_set_iter *iter,
++ struct nft_set_elem *elem)
++{
++ nft_setelem_data_activate(ctx->net, set, elem);
++
++ return 0;
++}
++
++static void nft_map_catchall_activate(const struct nft_ctx *ctx,
++ struct nft_set *set)
++{
++ u8 genmask = nft_genmask_next(ctx->net);
++ struct nft_set_elem_catchall *catchall;
++ struct nft_set_elem elem;
++ struct nft_set_ext *ext;
++
++ list_for_each_entry(catchall, &set->catchall_list, list) {
++ ext = nft_set_elem_ext(set, catchall->elem);
++ if (!nft_set_elem_active(ext, genmask))
++ continue;
++
++ elem.priv = catchall->elem;
++ nft_setelem_data_activate(ctx->net, set, &elem);
++ break;
++ }
++}
++
++static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
++{
++ struct nft_set_iter iter = {
++ .genmask = nft_genmask_next(ctx->net),
++ .fn = nft_mapelem_activate,
++ };
++
++ set->ops->walk(ctx, set, &iter);
++ WARN_ON_ONCE(iter.err);
++
++ nft_map_catchall_activate(ctx, set);
++}
++
++void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
++{
++ if (nft_set_is_anonymous(set)) {
++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++ nft_map_activate(ctx, set);
++
++ nft_clear(ctx->net, set);
++ }
++
++ nft_use_inc_restore(&set->use);
++}
++EXPORT_SYMBOL_GPL(nf_tables_activate_set);
++
+ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding,
+ enum nft_trans_phase phase)
+ {
+ switch (phase) {
++ case NFT_TRANS_PREPARE_ERROR:
++ nft_set_trans_unbind(ctx, set);
++ if (nft_set_is_anonymous(set))
++ nft_deactivate_next(ctx->net, set);
++ else
++ list_del_rcu(&binding->list);
++
++ nft_use_dec(&set->use);
++ break;
+ case NFT_TRANS_PREPARE:
+- set->use--;
++ if (nft_set_is_anonymous(set)) {
++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++ nft_map_deactivate(ctx, set);
++
++ nft_deactivate_next(ctx->net, set);
++ }
++ nft_use_dec(&set->use);
+ return;
+ case NFT_TRANS_ABORT:
+ case NFT_TRANS_RELEASE:
+- set->use--;
++ if (nft_set_is_anonymous(set) &&
++ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++ nft_map_deactivate(ctx, set);
++
++ nft_use_dec(&set->use);
+ fallthrough;
+ default:
+ nf_tables_unbind_set(ctx, set, binding,
+@@ -4928,6 +5339,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
++ cb->seq = READ_ONCE(nft_net->base_seq);
++
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
+ dump_ctx->ctx.family != table->family)
+@@ -5063,6 +5476,9 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
+ if (!(set->flags & NFT_SET_INTERVAL) &&
+ *flags & NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
++ if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) ==
++ (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL))
++ return -EINVAL;
+
+ return 0;
+ }
+@@ -5070,19 +5486,13 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
+ static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_data *key, struct nlattr *attr)
+ {
+- struct nft_data_desc desc;
+- int err;
+-
+- err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr);
+- if (err < 0)
+- return err;
+-
+- if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
+- nft_data_release(key, desc.type);
+- return -EINVAL;
+- }
++ struct nft_data_desc desc = {
++ .type = NFT_DATA_VALUE,
++ .size = NFT_DATA_VALUE_MAXLEN,
++ .len = set->klen,
++ };
+
+- return 0;
++ return nft_data_init(ctx, key, &desc, attr);
+ }
+
+ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+@@ -5090,18 +5500,19 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_data *data,
+ struct nlattr *attr)
+ {
+- int err;
++ u32 dtype;
+
+- err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
+- if (err < 0)
+- return err;
++ if (set->dtype == NFT_DATA_VERDICT)
++ dtype = NFT_DATA_VERDICT;
++ else
++ dtype = NFT_DATA_VALUE;
+
+- if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
+- nft_data_release(data, desc->type);
+- return -EINVAL;
+- }
++ desc->type = dtype;
++ desc->size = NFT_DATA_VALUE_MAXLEN;
++ desc->len = set->dlen;
++ desc->flags = NFT_DATA_DESC_SETELEM;
+
+- return 0;
++ return nft_data_init(ctx, data, desc, attr);
+ }
+
+ static void *nft_setelem_catchall_get(const struct net *net,
+@@ -5216,7 +5627,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
+ int rem, err = 0;
+
+ table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+- genmask, NETLINK_CB(skb).portid);
++ genmask, 0);
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ return PTR_ERR(table);
+@@ -5318,9 +5729,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
+ return expr;
+
+ err = -EOPNOTSUPP;
+- if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL))
+- goto err_set_elem_expr;
+-
+ if (expr->ops->type->flags & NFT_EXPR_GC) {
+ if (set->flags & NFT_SET_TIMEOUT)
+ goto err_set_elem_expr;
+@@ -5389,6 +5797,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
+ __nft_set_elem_expr_destroy(ctx, expr);
+ }
+
++/* Drop references and destroy. Called from gc, dynset and abort path. */
+ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ bool destroy_expr)
+ {
+@@ -5405,16 +5814,16 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+ nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+- (*nft_set_ext_obj(ext))->use--;
++ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
+ kfree(elem);
+ }
+ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+
+-/* Only called from commit path, nft_setelem_data_deactivate() already deals
+- * with the refcounting from the preparation phase.
++/* Destroy element. References have been already dropped in the preparation
++ * path via nft_setelem_data_deactivate().
+ */
+-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+- const struct nft_set *set, void *elem)
++void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set, void *elem)
+ {
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+@@ -5437,7 +5846,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
+
+ err = nft_expr_clone(expr, set->exprs[i]);
+ if (err < 0) {
+- nft_expr_destroy(ctx, expr);
++ kfree(expr);
+ goto err_expr;
+ }
+ expr_array[i] = expr;
+@@ -5653,7 +6062,7 @@ static void nft_setelem_catchall_remove(const struct net *net,
+ list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+ if (catchall->elem == elem->priv) {
+ list_del_rcu(&catchall->list);
+- kfree_rcu(catchall);
++ kfree_rcu(catchall, rcu);
+ break;
+ }
+ }
+@@ -5669,6 +6078,25 @@ static void nft_setelem_remove(const struct net *net,
+ set->ops->remove(net, set, elem);
+ }
+
++static bool nft_setelem_valid_key_end(const struct nft_set *set,
++ struct nlattr **nla, u32 flags)
++{
++ if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) ==
++ (NFT_SET_CONCAT | NFT_SET_INTERVAL)) {
++ if (flags & NFT_SET_ELEM_INTERVAL_END)
++ return false;
++
++ if (nla[NFTA_SET_ELEM_KEY_END] &&
++ flags & NFT_SET_ELEM_CATCHALL)
++ return false;
++ } else {
++ if (nla[NFTA_SET_ELEM_KEY_END])
++ return false;
++ }
++
++ return true;
++}
++
+ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr, u32 nlmsg_flags)
+ {
+@@ -5701,11 +6129,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ if (err < 0)
+ return err;
+
+- if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
++ if (((flags & NFT_SET_ELEM_CATCHALL) && nla[NFTA_SET_ELEM_KEY]) ||
++ (!(flags & NFT_SET_ELEM_CATCHALL) && !nla[NFTA_SET_ELEM_KEY]))
+ return -EINVAL;
+
+- if (flags != 0)
+- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
++ if (flags != 0) {
++ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
++ if (err < 0)
++ return err;
++ }
+
+ if (set->flags & NFT_SET_MAP) {
+ if (nla[NFTA_SET_ELEM_DATA] == NULL &&
+@@ -5716,6 +6148,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ return -EINVAL;
+ }
+
++ if (set->flags & NFT_SET_OBJECT) {
++ if (!nla[NFTA_SET_ELEM_OBJREF] &&
++ !(flags & NFT_SET_ELEM_INTERVAL_END))
++ return -EINVAL;
++ } else {
++ if (nla[NFTA_SET_ELEM_OBJREF])
++ return -EINVAL;
++ }
++
++ if (!nft_setelem_valid_key_end(set, nla, flags))
++ return -EINVAL;
++
+ if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
+ (nla[NFTA_SET_ELEM_DATA] ||
+ nla[NFTA_SET_ELEM_OBJREF] ||
+@@ -5723,6 +6167,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ nla[NFTA_SET_ELEM_EXPIRATION] ||
+ nla[NFTA_SET_ELEM_USERDATA] ||
+ nla[NFTA_SET_ELEM_EXPR] ||
++ nla[NFTA_SET_ELEM_KEY_END] ||
+ nla[NFTA_SET_ELEM_EXPRESSIONS]))
+ return -EINVAL;
+
+@@ -5734,8 +6179,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ &timeout);
+ if (err)
+ return err;
+- } else if (set->flags & NFT_SET_TIMEOUT) {
+- timeout = set->timeout;
++ } else if (set->flags & NFT_SET_TIMEOUT &&
++ !(flags & NFT_SET_ELEM_INTERVAL_END)) {
++ timeout = READ_ONCE(set->timeout);
+ }
+
+ expiration = 0;
+@@ -5800,7 +6246,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ err = -EOPNOTSUPP;
+ goto err_set_elem_expr;
+ }
+- } else if (set->num_exprs > 0) {
++ } else if (set->num_exprs > 0 &&
++ !(flags & NFT_SET_ELEM_INTERVAL_END)) {
+ err = nft_set_elem_expr_clone(ctx, set, expr_array);
+ if (err < 0)
+ goto err_set_elem_expr_clone;
+@@ -5814,7 +6261,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ if (err < 0)
+ goto err_set_elem_expr;
+
+- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
++ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
++ if (err < 0)
++ goto err_parse_key;
+ }
+
+ if (nla[NFTA_SET_ELEM_KEY_END]) {
+@@ -5823,37 +6272,52 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ if (err < 0)
+ goto err_parse_key;
+
+- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
++ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
++ if (err < 0)
++ goto err_parse_key_end;
+ }
+
+ if (timeout > 0) {
+- nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+- if (timeout != set->timeout)
+- nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
++ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
++ if (err < 0)
++ goto err_parse_key_end;
++
++ if (timeout != READ_ONCE(set->timeout)) {
++ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
++ if (err < 0)
++ goto err_parse_key_end;
++ }
+ }
+
+ if (num_exprs) {
+ for (i = 0; i < num_exprs; i++)
+ size += expr_array[i]->ops->size;
+
+- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
+- sizeof(struct nft_set_elem_expr) +
+- size);
++ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
++ sizeof(struct nft_set_elem_expr) + size);
++ if (err < 0)
++ goto err_parse_key_end;
+ }
+
+ if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
+- if (!(set->flags & NFT_SET_OBJECT)) {
+- err = -EINVAL;
+- goto err_parse_key_end;
+- }
+ obj = nft_obj_lookup(ctx->net, ctx->table,
+ nla[NFTA_SET_ELEM_OBJREF],
+ set->objtype, genmask);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
++ obj = NULL;
++ goto err_parse_key_end;
++ }
++
++ if (!nft_use_inc(&obj->use)) {
++ err = -EMFILE;
++ obj = NULL;
+ goto err_parse_key_end;
+ }
+- nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
++
++ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
++ if (err < 0)
++ goto err_parse_key_end;
+ }
+
+ if (nla[NFTA_SET_ELEM_DATA] != NULL) {
+@@ -5887,7 +6351,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ NFT_VALIDATE_NEED);
+ }
+
+- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
++ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
++ if (err < 0)
++ goto err_parse_data;
+ }
+
+ /* The full maximum length of userdata can exceed the maximum
+@@ -5897,9 +6363,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ ulen = 0;
+ if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+ ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+- if (ulen > 0)
+- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+- ulen);
++ if (ulen > 0) {
++ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
++ ulen);
++ if (err < 0)
++ goto err_parse_data;
++ }
+ }
+
+ err = -ENOMEM;
+@@ -5917,10 +6386,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ udata->len = ulen - 1;
+ nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
+ }
+- if (obj) {
++ if (obj)
+ *nft_set_ext_obj(ext) = obj;
+- obj->use++;
+- }
++
+ err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs);
+ if (err < 0)
+ goto err_elem_expr;
+@@ -5975,14 +6443,14 @@ err_set_full:
+ err_element_clash:
+ kfree(trans);
+ err_elem_expr:
+- if (obj)
+- obj->use--;
+-
+ nf_tables_set_elem_destroy(ctx, set, elem.priv);
+ err_parse_data:
+ if (nla[NFTA_SET_ELEM_DATA] != NULL)
+ nft_data_release(&elem.data.val, desc.type);
+ err_parse_key_end:
++ if (obj)
++ nft_use_dec_restore(&obj->use);
++
+ nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
+ err_parse_key:
+ nft_data_release(&elem.key.val, NFT_DATA_VALUE);
+@@ -6023,7 +6491,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
++ if (!list_empty(&set->bindings) &&
++ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
+ return -EBUSY;
+
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+@@ -6054,23 +6523,13 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
+ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+ {
+ struct nft_chain *chain;
+- struct nft_rule *rule;
+
+ if (type == NFT_DATA_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+- chain->use++;
+-
+- if (!nft_chain_is_bound(chain))
+- break;
+-
+- chain->table->use++;
+- list_for_each_entry(rule, &chain->rules, list)
+- chain->use++;
+-
+- nft_chain_add(chain->table, chain);
++ nft_use_inc_restore(&chain->use);
+ break;
+ }
+ }
+@@ -6085,7 +6544,7 @@ static void nft_setelem_data_activate(const struct net *net,
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ nft_data_hold(nft_set_ext_data(ext), set->dtype);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+- (*nft_set_ext_obj(ext))->use++;
++ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
+ }
+
+ static void nft_setelem_data_deactivate(const struct net *net,
+@@ -6097,7 +6556,7 @@ static void nft_setelem_data_deactivate(const struct net *net,
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ nft_data_release(nft_set_ext_data(ext), set->dtype);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+- (*nft_set_ext_obj(ext))->use--;
++ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
+ }
+
+ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
+@@ -6123,10 +6582,16 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
+ if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
+ return -EINVAL;
+
++ if (!nft_setelem_valid_key_end(set, nla, flags))
++ return -EINVAL;
++
+ nft_set_ext_prepare(&tmpl);
+
+- if (flags != 0)
+- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
++ if (flags != 0) {
++ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
++ if (err < 0)
++ return err;
++ }
+
+ if (nla[NFTA_SET_ELEM_KEY]) {
+ err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+@@ -6134,16 +6599,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
+ if (err < 0)
+ return err;
+
+- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
++ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
++ if (err < 0)
++ goto fail_elem;
+ }
+
+ if (nla[NFTA_SET_ELEM_KEY_END]) {
+ err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
+ nla[NFTA_SET_ELEM_KEY_END]);
+ if (err < 0)
+- return err;
++ goto fail_elem;
+
+- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
++ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
++ if (err < 0)
++ goto fail_elem_key_end;
+ }
+
+ err = -ENOMEM;
+@@ -6151,7 +6620,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
+ elem.key_end.val.data, NULL, 0, 0,
+ GFP_KERNEL);
+ if (elem.priv == NULL)
+- goto fail_elem;
++ goto fail_elem_key_end;
+
+ ext = nft_set_elem_ext(set, elem.priv);
+ if (flags)
+@@ -6175,6 +6644,8 @@ fail_ops:
+ kfree(trans);
+ fail_trans:
+ kfree(elem.priv);
++fail_elem_key_end:
++ nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
+ fail_elem:
+ nft_data_release(&elem.key.val, NFT_DATA_VALUE);
+ return err;
+@@ -6248,6 +6719,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
+ ret = __nft_set_catchall_flush(ctx, set, &elem);
+ if (ret < 0)
+ break;
++ nft_set_elem_change_active(ctx->net, set, ext);
+ }
+
+ return ret;
+@@ -6291,7 +6763,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
+ set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
++
++ if (!list_empty(&set->bindings) &&
++ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
+ return -EBUSY;
+
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+@@ -6535,12 +7009,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
+ {
+ struct nft_object *newobj;
+ struct nft_trans *trans;
+- int err;
++ int err = -ENOMEM;
++
++ if (!try_module_get(type->owner))
++ return -ENOENT;
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
+ sizeof(struct nft_trans_obj));
+ if (!trans)
+- return -ENOMEM;
++ goto err_trans;
+
+ newobj = nft_obj_init(ctx, type, attr);
+ if (IS_ERR(newobj)) {
+@@ -6557,6 +7034,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
+
+ err_free_trans:
+ kfree(trans);
++err_trans:
++ module_put(type->owner);
+ return err;
+ }
+
+@@ -6603,6 +7082,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+ return -EOPNOTSUPP;
+
+ type = __nft_obj_type_get(objtype);
++ if (WARN_ON_ONCE(!type))
++ return -ENOENT;
++
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+ return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
+@@ -6610,9 +7092,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
++ if (!nft_use_inc(&table->use))
++ return -EMFILE;
++
+ type = nft_obj_type_get(net, objtype);
+- if (IS_ERR(type))
+- return PTR_ERR(type);
++ if (IS_ERR(type)) {
++ err = PTR_ERR(type);
++ goto err_type;
++ }
+
+ obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
+ if (IS_ERR(obj)) {
+@@ -6646,7 +7133,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+ goto err_obj_ht;
+
+ list_add_tail_rcu(&obj->list, &table->objects);
+- table->use++;
++
+ return 0;
+ err_obj_ht:
+ /* queued in transaction log */
+@@ -6662,6 +7149,9 @@ err_strdup:
+ kfree(obj);
+ err_init:
+ module_put(type->owner);
++err_type:
++ nft_use_dec_restore(&table->use);
++
+ return err;
+ }
+
+@@ -6721,7 +7211,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = nft_net->base_seq;
++ cb->seq = READ_ONCE(nft_net->base_seq);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -7048,10 +7538,11 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+ enum nft_trans_phase phase)
+ {
+ switch (phase) {
++ case NFT_TRANS_PREPARE_ERROR:
+ case NFT_TRANS_PREPARE:
+ case NFT_TRANS_ABORT:
+ case NFT_TRANS_RELEASE:
+- flowtable->use--;
++ nft_use_dec(&flowtable->use);
+ fallthrough;
+ default:
+ return;
+@@ -7191,13 +7682,25 @@ static void nft_unregister_flowtable_hook(struct net *net,
+ FLOW_BLOCK_UNBIND);
+ }
+
+-static void nft_unregister_flowtable_net_hooks(struct net *net,
+- struct list_head *hook_list)
++static void __nft_unregister_flowtable_net_hooks(struct net *net,
++ struct list_head *hook_list,
++ bool release_netdev)
+ {
+- struct nft_hook *hook;
++ struct nft_hook *hook, *next;
+
+- list_for_each_entry(hook, hook_list, list)
++ list_for_each_entry_safe(hook, next, hook_list, list) {
+ nf_unregister_net_hook(net, &hook->ops);
++ if (release_netdev) {
++ list_del(&hook->list);
++ kfree_rcu(hook, rcu);
++ }
++ }
++}
++
++static void nft_unregister_flowtable_net_hooks(struct net *net,
++ struct list_head *hook_list)
++{
++ __nft_unregister_flowtable_net_hooks(net, hook_list, false);
+ }
+
+ static int nft_register_flowtable_net_hooks(struct net *net,
+@@ -7290,11 +7793,15 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
+
+ if (nla[NFTA_FLOWTABLE_FLAGS]) {
+ flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
+- if (flags & ~NFT_FLOWTABLE_MASK)
+- return -EOPNOTSUPP;
++ if (flags & ~NFT_FLOWTABLE_MASK) {
++ err = -EOPNOTSUPP;
++ goto err_flowtable_update_hook;
++ }
+ if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
+- (flags & NFT_FLOWTABLE_HW_OFFLOAD))
+- return -EOPNOTSUPP;
++ (flags & NFT_FLOWTABLE_HW_OFFLOAD)) {
++ err = -EOPNOTSUPP;
++ goto err_flowtable_update_hook;
++ }
+ } else {
+ flags = flowtable->data.flags;
+ }
+@@ -7383,9 +7890,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
+
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
++ if (!nft_use_inc(&table->use))
++ return -EMFILE;
++
+ flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+- if (!flowtable)
+- return -ENOMEM;
++ if (!flowtable) {
++ err = -ENOMEM;
++ goto flowtable_alloc;
++ }
+
+ flowtable->table = table;
+ flowtable->handle = nf_tables_alloc_handle(table);
+@@ -7440,7 +7952,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
+ goto err5;
+
+ list_add_tail_rcu(&flowtable->list, &table->flowtables);
+- table->use++;
+
+ return 0;
+ err5:
+@@ -7457,6 +7968,9 @@ err2:
+ kfree(flowtable->name);
+ err1:
+ kfree(flowtable);
++flowtable_alloc:
++ nft_use_dec_restore(&table->use);
++
+ return err;
+ }
+
+@@ -7475,6 +7989,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
+ {
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_flowtable_hook flowtable_hook;
++ LIST_HEAD(flowtable_del_list);
+ struct nft_hook *this, *hook;
+ struct nft_trans *trans;
+ int err;
+@@ -7490,7 +8005,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
+ err = -ENOENT;
+ goto err_flowtable_del_hook;
+ }
+- hook->inactive = true;
++ list_move(&hook->list, &flowtable_del_list);
+ }
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
+@@ -7503,6 +8018,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
+ nft_trans_flowtable(trans) = flowtable;
+ nft_trans_flowtable_update(trans) = true;
+ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
++ list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans));
+ nft_flowtable_hook_release(&flowtable_hook);
+
+ nft_trans_commit_list_add_tail(ctx->net, trans);
+@@ -7510,13 +8026,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
+ return 0;
+
+ err_flowtable_del_hook:
+- list_for_each_entry(this, &flowtable_hook.list, list) {
+- hook = nft_hook_list_find(&flowtable->hook_list, this);
+- if (!hook)
+- break;
+-
+- hook->inactive = false;
+- }
++ list_splice(&flowtable_del_list, &flowtable->hook_list);
+ nft_flowtable_hook_release(&flowtable_hook);
+
+ return err;
+@@ -7641,7 +8151,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
+
+ rcu_read_lock();
+ nft_net = nft_pernet(net);
+- cb->seq = nft_net->base_seq;
++ cb->seq = READ_ONCE(nft_net->base_seq);
+
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
+@@ -8101,6 +8611,8 @@ static int nf_tables_validate(struct net *net)
+ if (nft_table_validate(net, table) < 0)
+ return -EAGAIN;
+ }
++
++ nft_validate_state_update(net, NFT_VALIDATE_SKIP);
+ break;
+ }
+
+@@ -8169,7 +8681,7 @@ static void nft_obj_commit_update(struct nft_trans *trans)
+ if (obj->ops->update)
+ obj->ops->update(obj, newobj);
+
+- kfree(newobj);
++ nft_obj_destroy(&trans->ctx, newobj);
+ }
+
+ static void nft_commit_release(struct nft_trans *trans)
+@@ -8228,7 +8740,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
+ synchronize_rcu();
+
+ list_for_each_entry_safe(trans, next, &head, list) {
+- list_del(&trans->list);
++ nft_trans_list_del(trans);
+ nft_commit_release(trans);
+ }
+ }
+@@ -8371,17 +8883,6 @@ void nft_chain_del(struct nft_chain *chain)
+ list_del_rcu(&chain->list);
+ }
+
+-static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
+- struct list_head *hook_list)
+-{
+- struct nft_hook *hook, *next;
+-
+- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+- if (hook->inactive)
+- list_move(&hook->list, hook_list);
+- }
+-}
+-
+ static void nf_tables_module_autoload_cleanup(struct net *net)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+@@ -8526,13 +9027,29 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
+ }
+ }
+
++static void nft_set_commit_update(struct list_head *set_update_list)
++{
++ struct nft_set *set, *next;
++
++ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
++ list_del_init(&set->pending_update);
++
++ if (!set->ops->commit)
++ continue;
++
++ set->ops->commit(set);
++ }
++}
++
+ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ struct nft_trans *trans, *next;
++ LIST_HEAD(set_update_list);
+ struct nft_trans_elem *te;
+ struct nft_chain *chain;
+ struct nft_table *table;
++ unsigned int base_seq;
+ LIST_HEAD(adl);
+ int err;
+
+@@ -8541,6 +9058,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ return 0;
+ }
+
++ list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
++ switch (trans->msg_type) {
++ case NFT_MSG_NEWSET:
++ if (!nft_trans_set_update(trans) &&
++ nft_set_is_anonymous(nft_trans_set(trans)) &&
++ !nft_trans_set_bound(trans)) {
++ pr_warn_once("nftables ruleset with unbound set\n");
++ return -EINVAL;
++ }
++ break;
++ case NFT_MSG_NEWCHAIN:
++ if (!nft_trans_chain_update(trans) &&
++ nft_chain_binding(nft_trans_chain(trans)) &&
++ !nft_trans_chain_bound(trans)) {
++ pr_warn_once("nftables ruleset with unbound chain\n");
++ return -EINVAL;
++ }
++ break;
++ }
++ }
++
+ /* 0. Validate ruleset, otherwise roll back for error reporting. */
+ if (nf_tables_validate(net) < 0)
+ return -EAGAIN;
+@@ -8582,9 +9120,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ * Bump generation counter, invalidate any dump in progress.
+ * Cannot fail after this point.
+ */
+- while (++nft_net->base_seq == 0)
++ base_seq = READ_ONCE(nft_net->base_seq);
++ while (++base_seq == 0)
+ ;
+
++ WRITE_ONCE(nft_net->base_seq, base_seq);
++
+ /* step 3. Start new generation, rules_gen_X now in use. */
+ net->nft.gencursor = nft_gencursor_next(net);
+
+@@ -8636,6 +9177,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_NEWRULE);
++ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
++ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
++
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELRULE:
+@@ -8646,16 +9190,25 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ nft_rule_expr_deactivate(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_TRANS_COMMIT);
++
++ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
++ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+ break;
+ case NFT_MSG_NEWSET:
+- nft_clear(net, nft_trans_set(trans));
+- /* This avoids hitting -EBUSY when deleting the table
+- * from the transaction.
+- */
+- if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+- !list_empty(&nft_trans_set(trans)->bindings))
+- trans->ctx.table->use--;
++ if (nft_trans_set_update(trans)) {
++ struct nft_set *set = nft_trans_set(trans);
+
++ WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
++ WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
++ } else {
++ nft_clear(net, nft_trans_set(trans));
++ /* This avoids hitting -EBUSY when deleting the table
++ * from the transaction.
++ */
++ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
++ !list_empty(&nft_trans_set(trans)->bindings))
++ nft_use_dec(&trans->ctx.table->use);
++ }
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_NEWSET, GFP_KERNEL);
+ nft_trans_destroy(trans);
+@@ -8672,6 +9225,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ nf_tables_setelem_notify(&trans->ctx, te->set,
+ &te->elem,
+ NFT_MSG_NEWSETELEM);
++ if (te->set->ops->commit &&
++ list_empty(&te->set->pending_update)) {
++ list_add_tail(&te->set->pending_update,
++ &set_update_list);
++ }
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+@@ -8685,6 +9243,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
+ }
++ if (te->set->ops->commit &&
++ list_empty(&te->set->pending_update)) {
++ list_add_tail(&te->set->pending_update,
++ &set_update_list);
++ }
+ break;
+ case NFT_MSG_NEWOBJ:
+ if (nft_trans_obj_update(trans)) {
+@@ -8726,8 +9289,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ break;
+ case NFT_MSG_DELFLOWTABLE:
+ if (nft_trans_flowtable_update(trans)) {
+- nft_flowtable_hooks_del(nft_trans_flowtable(trans),
+- &nft_trans_flowtable_hooks(trans));
+ nf_tables_flowtable_notify(&trans->ctx,
+ nft_trans_flowtable(trans),
+ &nft_trans_flowtable_hooks(trans),
+@@ -8747,6 +9308,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ }
+ }
+
++ nft_set_commit_update(&set_update_list);
++
+ nft_commit_notify(net, NETLINK_CB(skb).portid);
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+ nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+@@ -8803,12 +9366,26 @@ static void nf_tables_abort_release(struct nft_trans *trans)
+ kfree(trans);
+ }
+
++static void nft_set_abort_update(struct list_head *set_update_list)
++{
++ struct nft_set *set, *next;
++
++ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
++ list_del_init(&set->pending_update);
++
++ if (!set->ops->abort)
++ continue;
++
++ set->ops->abort(set);
++ }
++}
++
+ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ struct nft_trans *trans, *next;
++ LIST_HEAD(set_update_list);
+ struct nft_trans_elem *te;
+- struct nft_hook *hook;
+
+ if (action == NFNL_ABORT_VALIDATE &&
+ nf_tables_validate(net) < 0)
+@@ -8845,11 +9422,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ kfree(nft_trans_chain_name(trans));
+ nft_trans_destroy(trans);
+ } else {
+- if (nft_chain_is_bound(trans->ctx.chain)) {
++ if (nft_trans_chain_bound(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
+- trans->ctx.table->use--;
++ nft_use_dec_restore(&trans->ctx.table->use);
+ nft_chain_del(trans->ctx.chain);
+ nf_tables_unregister_hook(trans->ctx.net,
+ trans->ctx.table,
+@@ -8857,12 +9434,16 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ }
+ break;
+ case NFT_MSG_DELCHAIN:
+- trans->ctx.table->use++;
++ nft_use_inc_restore(&trans->ctx.table->use);
+ nft_clear(trans->ctx.net, trans->ctx.chain);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWRULE:
+- trans->ctx.chain->use--;
++ if (nft_trans_rule_bound(trans)) {
++ nft_trans_destroy(trans);
++ break;
++ }
++ nft_use_dec_restore(&trans->ctx.chain->use);
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nft_rule_expr_deactivate(&trans->ctx,
+ nft_trans_rule(trans),
+@@ -8871,7 +9452,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+ break;
+ case NFT_MSG_DELRULE:
+- trans->ctx.chain->use++;
++ nft_use_inc_restore(&trans->ctx.chain->use);
+ nft_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+@@ -8880,7 +9461,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSET:
+- trans->ctx.table->use--;
++ if (nft_trans_set_update(trans)) {
++ nft_trans_destroy(trans);
++ break;
++ }
++ nft_use_dec_restore(&trans->ctx.table->use);
+ if (nft_trans_set_bound(trans)) {
+ nft_trans_destroy(trans);
+ break;
+@@ -8888,8 +9473,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ list_del_rcu(&nft_trans_set(trans)->list);
+ break;
+ case NFT_MSG_DELSET:
+- trans->ctx.table->use++;
++ nft_use_inc_restore(&trans->ctx.table->use);
+ nft_clear(trans->ctx.net, nft_trans_set(trans));
++ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++ nft_map_activate(&trans->ctx, nft_trans_set(trans));
++
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSETELEM:
+@@ -8901,6 +9489,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ nft_setelem_remove(net, te->set, &te->elem);
+ if (!nft_setelem_is_catchall(te->set, &te->elem))
+ atomic_dec(&te->set->nelems);
++
++ if (te->set->ops->abort &&
++ list_empty(&te->set->pending_update)) {
++ list_add_tail(&te->set->pending_update,
++ &set_update_list);
++ }
+ break;
+ case NFT_MSG_DELSETELEM:
+ te = (struct nft_trans_elem *)trans->data;
+@@ -8910,19 +9504,24 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ if (!nft_setelem_is_catchall(te->set, &te->elem))
+ te->set->ndeact--;
+
++ if (te->set->ops->abort &&
++ list_empty(&te->set->pending_update)) {
++ list_add_tail(&te->set->pending_update,
++ &set_update_list);
++ }
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWOBJ:
+ if (nft_trans_obj_update(trans)) {
+- kfree(nft_trans_obj_newobj(trans));
++ nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
+ nft_trans_destroy(trans);
+ } else {
+- trans->ctx.table->use--;
++ nft_use_dec_restore(&trans->ctx.table->use);
+ nft_obj_del(nft_trans_obj(trans));
+ }
+ break;
+ case NFT_MSG_DELOBJ:
+- trans->ctx.table->use++;
++ nft_use_inc_restore(&trans->ctx.table->use);
+ nft_clear(trans->ctx.net, nft_trans_obj(trans));
+ nft_trans_destroy(trans);
+ break;
+@@ -8931,7 +9530,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ nft_unregister_flowtable_net_hooks(net,
+ &nft_trans_flowtable_hooks(trans));
+ } else {
+- trans->ctx.table->use--;
++ nft_use_dec_restore(&trans->ctx.table->use);
+ list_del_rcu(&nft_trans_flowtable(trans)->list);
+ nft_unregister_flowtable_net_hooks(net,
+ &nft_trans_flowtable(trans)->hook_list);
+@@ -8939,10 +9538,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ break;
+ case NFT_MSG_DELFLOWTABLE:
+ if (nft_trans_flowtable_update(trans)) {
+- list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
+- hook->inactive = false;
++ list_splice(&nft_trans_flowtable_hooks(trans),
++ &nft_trans_flowtable(trans)->hook_list);
+ } else {
+- trans->ctx.table->use++;
++ nft_use_inc_restore(&trans->ctx.table->use);
+ nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+ }
+ nft_trans_destroy(trans);
+@@ -8950,11 +9549,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ }
+ }
+
++ nft_set_abort_update(&set_update_list);
++
+ synchronize_rcu();
+
+ list_for_each_entry_safe_reverse(trans, next,
+ &nft_net->commit_list, list) {
+- list_del(&trans->list);
++ nft_trans_list_del(trans);
+ nf_tables_abort_release(trans);
+ }
+
+@@ -8966,11 +9567,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ return 0;
+ }
+
+-static void nf_tables_cleanup(struct net *net)
+-{
+- nft_validate_state_update(net, NFT_VALIDATE_SKIP);
+-}
+-
+ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+ enum nfnl_abort_action action)
+ {
+@@ -9004,7 +9600,6 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
+ .cb = nf_tables_cb,
+ .commit = nf_tables_commit,
+ .abort = nf_tables_abort,
+- .cleanup = nf_tables_cleanup,
+ .valid_genid = nf_tables_valid_genid,
+ .owner = THIS_MODULE,
+ };
+@@ -9203,17 +9798,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+ }
+ EXPORT_SYMBOL_GPL(nft_parse_u32_check);
+
+-static unsigned int nft_parse_register(const struct nlattr *attr)
++static int nft_parse_register(const struct nlattr *attr, u32 *preg)
+ {
+ unsigned int reg;
+
+ reg = ntohl(nla_get_be32(attr));
+ switch (reg) {
+ case NFT_REG_VERDICT...NFT_REG_4:
+- return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
++ *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
++ break;
++ case NFT_REG32_00...NFT_REG32_15:
++ *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
++ break;
+ default:
+- return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
++ return -ERANGE;
+ }
++
++ return 0;
+ }
+
+ /**
+@@ -9255,7 +9856,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+ u32 reg;
+ int err;
+
+- reg = nft_parse_register(attr);
++ err = nft_parse_register(attr, &reg);
++ if (err < 0)
++ return err;
++
+ err = nft_validate_register_load(reg, len);
+ if (err < 0)
+ return err;
+@@ -9310,7 +9914,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx,
+ int err;
+ u32 reg;
+
+- reg = nft_parse_register(attr);
++ err = nft_parse_register(attr, &reg);
++ if (err < 0)
++ return err;
++
+ err = nft_validate_register_store(ctx, reg, data, type, len);
+ if (err < 0)
+ return err;
+@@ -9342,6 +9949,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+
+ if (!tb[NFTA_VERDICT_CODE])
+ return -EINVAL;
++
++ /* zero padding hole for memcmp */
++ memset(data, 0, sizeof(*data));
+ data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+ switch (data->verdict.code) {
+@@ -9366,8 +9976,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+ tb[NFTA_VERDICT_CHAIN],
+ genmask);
+ } else if (tb[NFTA_VERDICT_CHAIN_ID]) {
+- chain = nft_chain_lookup_byid(ctx->net,
+- tb[NFTA_VERDICT_CHAIN_ID]);
++ chain = nft_chain_lookup_byid(ctx->net, ctx->table,
++ tb[NFTA_VERDICT_CHAIN_ID],
++ genmask);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ } else {
+@@ -9378,36 +9989,32 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+ return PTR_ERR(chain);
+ if (nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
++ if (nft_chain_is_bound(chain))
++ return -EINVAL;
++ if (desc->flags & NFT_DATA_DESC_SETELEM &&
++ chain->flags & NFT_CHAIN_BINDING)
++ return -EINVAL;
++ if (!nft_use_inc(&chain->use))
++ return -EMFILE;
+
+- chain->use++;
+ data->verdict.chain = chain;
+ break;
+ }
+
+ desc->len = sizeof(data->verdict);
+- desc->type = NFT_DATA_VERDICT;
++
+ return 0;
+ }
+
+ static void nft_verdict_uninit(const struct nft_data *data)
+ {
+ struct nft_chain *chain;
+- struct nft_rule *rule;
+
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+- chain->use--;
+-
+- if (!nft_chain_is_bound(chain))
+- break;
+-
+- chain->table->use--;
+- list_for_each_entry(rule, &chain->rules, list)
+- chain->use--;
+-
+- nft_chain_del(chain);
++ nft_use_dec(&chain->use);
+ break;
+ }
+ }
+@@ -9438,20 +10045,25 @@ nla_put_failure:
+ }
+
+ static int nft_value_init(const struct nft_ctx *ctx,
+- struct nft_data *data, unsigned int size,
+- struct nft_data_desc *desc, const struct nlattr *nla)
++ struct nft_data *data, struct nft_data_desc *desc,
++ const struct nlattr *nla)
+ {
+ unsigned int len;
+
+ len = nla_len(nla);
+ if (len == 0)
+ return -EINVAL;
+- if (len > size)
++ if (len > desc->size)
+ return -EOVERFLOW;
++ if (desc->len) {
++ if (len != desc->len)
++ return -EINVAL;
++ } else {
++ desc->len = len;
++ }
+
+ nla_memcpy(data->data, nla, len);
+- desc->type = NFT_DATA_VALUE;
+- desc->len = len;
++
+ return 0;
+ }
+
+@@ -9471,7 +10083,6 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+ *
+ * @ctx: context of the expression using the data
+ * @data: destination struct nft_data
+- * @size: maximum data length
+ * @desc: data description
+ * @nla: netlink attribute containing data
+ *
+@@ -9481,24 +10092,35 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+ * The caller can indicate that it only wants to accept data of type
+ * NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+-int nft_data_init(const struct nft_ctx *ctx,
+- struct nft_data *data, unsigned int size,
++int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla)
+ {
+ struct nlattr *tb[NFTA_DATA_MAX + 1];
+ int err;
+
++ if (WARN_ON_ONCE(!desc->size))
++ return -EINVAL;
++
+ err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla,
+ nft_data_policy, NULL);
+ if (err < 0)
+ return err;
+
+- if (tb[NFTA_DATA_VALUE])
+- return nft_value_init(ctx, data, size, desc,
+- tb[NFTA_DATA_VALUE]);
+- if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+- return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+- return -EINVAL;
++ if (tb[NFTA_DATA_VALUE]) {
++ if (desc->type != NFT_DATA_VALUE)
++ return -EINVAL;
++
++ err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
++ } else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) {
++ if (desc->type != NFT_DATA_VERDICT)
++ return -EINVAL;
++
++ err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
++ } else {
++ err = -EINVAL;
++ }
++
++ return err;
+ }
+ EXPORT_SYMBOL_GPL(nft_data_init);
+
+@@ -9561,11 +10183,11 @@ int __nft_release_basechain(struct nft_ctx *ctx)
+ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
+ list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
+ list_del(&rule->list);
+- ctx->chain->use--;
++ nft_use_dec(&ctx->chain->use);
+ nf_tables_rule_release(ctx, rule);
+ }
+ nft_chain_del(ctx->chain);
+- ctx->table->use--;
++ nft_use_dec(&ctx->table->use);
+ nf_tables_chain_destroy(ctx);
+
+ return 0;
+@@ -9574,10 +10196,14 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain);
+
+ static void __nft_release_hook(struct net *net, struct nft_table *table)
+ {
++ struct nft_flowtable *flowtable;
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list)
+- nf_tables_unregister_hook(net, table, chain);
++ __nf_tables_unregister_hook(net, table, chain, true);
++ list_for_each_entry(flowtable, &table->flowtables, list)
++ __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list,
++ true);
+ }
+
+ static void __nft_release_hooks(struct net *net)
+@@ -9608,32 +10234,38 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
+ ctx.family = table->family;
+ ctx.table = table;
+ list_for_each_entry(chain, &table->chains, list) {
++ if (nft_chain_is_bound(chain))
++ continue;
++
+ ctx.chain = chain;
+ list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+ list_del(&rule->list);
+- chain->use--;
++ nft_use_dec(&chain->use);
+ nf_tables_rule_release(&ctx, rule);
+ }
+ }
+ list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+ list_del(&flowtable->list);
+- table->use--;
++ nft_use_dec(&table->use);
+ nf_tables_flowtable_destroy(flowtable);
+ }
+ list_for_each_entry_safe(set, ns, &table->sets, list) {
+ list_del(&set->list);
+- table->use--;
++ nft_use_dec(&table->use);
++ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
++ nft_map_deactivate(&ctx, set);
++
+ nft_set_destroy(&ctx, set);
+ }
+ list_for_each_entry_safe(obj, ne, &table->objects, list) {
+ nft_obj_del(obj);
+- table->use--;
++ nft_use_dec(&table->use);
+ nft_obj_destroy(&ctx, obj);
+ }
+ list_for_each_entry_safe(chain, nc, &table->chains, list) {
+ ctx.chain = chain;
+ nft_chain_del(chain);
+- table->use--;
++ nft_use_dec(&table->use);
+ nf_tables_chain_destroy(&ctx);
+ }
+ nf_tables_table_destroy(&ctx);
+@@ -9670,6 +10302,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
+ nft_net = nft_pernet(net);
+ deleted = 0;
+ mutex_lock(&nft_net->commit_mutex);
++ if (!list_empty(&nf_tables_destroy_list))
++ nf_tables_trans_destroy_flush_work();
+ again:
+ list_for_each_entry(table, &nft_net->tables, list) {
+ if (nft_table_has_owner(table) &&
+@@ -9705,6 +10339,7 @@ static int __net_init nf_tables_init_net(struct net *net)
+
+ INIT_LIST_HEAD(&nft_net->tables);
+ INIT_LIST_HEAD(&nft_net->commit_list);
++ INIT_LIST_HEAD(&nft_net->binding_list);
+ INIT_LIST_HEAD(&nft_net->module_list);
+ INIT_LIST_HEAD(&nft_net->notify_list);
+ mutex_init(&nft_net->commit_mutex);
+@@ -9716,7 +10351,11 @@ static int __net_init nf_tables_init_net(struct net *net)
+
+ static void __net_exit nf_tables_pre_exit_net(struct net *net)
+ {
++ struct nftables_pernet *nft_net = nft_pernet(net);
++
++ mutex_lock(&nft_net->commit_mutex);
+ __nft_release_hooks(net);
++ mutex_unlock(&nft_net->commit_mutex);
+ }
+
+ static void __net_exit nf_tables_exit_net(struct net *net)
+@@ -9724,7 +10363,8 @@ static void __net_exit nf_tables_exit_net(struct net *net)
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ mutex_lock(&nft_net->commit_mutex);
+- if (!list_empty(&nft_net->commit_list))
++ if (!list_empty(&nft_net->commit_list) ||
++ !list_empty(&nft_net->module_list))
+ __nf_tables_abort(net, NFNL_ABORT_NONE);
+ __nft_release_tables(net);
+ mutex_unlock(&nft_net->commit_mutex);
+diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
+index 866cfba04d6c0..2ab4216d2a903 100644
+--- a/net/netfilter/nf_tables_core.c
++++ b/net/netfilter/nf_tables_core.c
+@@ -67,6 +67,50 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
+ regs->verdict.code = NFT_BREAK;
+ }
+
++static void nft_cmp16_fast_eval(const struct nft_expr *expr,
++ struct nft_regs *regs)
++{
++ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++ const u64 *reg_data = (const u64 *)&regs->data[priv->sreg];
++ const u64 *mask = (const u64 *)&priv->mask;
++ const u64 *data = (const u64 *)&priv->data;
++
++ if (((reg_data[0] & mask[0]) == data[0] &&
++ ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv)
++ return;
++ regs->verdict.code = NFT_BREAK;
++}
++
++static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
++ const struct nft_chain *chain,
++ const struct nft_regs *regs)
++{
++ enum nft_trace_types type;
++
++ switch (regs->verdict.code) {
++ case NFT_CONTINUE:
++ case NFT_RETURN:
++ type = NFT_TRACETYPE_RETURN;
++ break;
++ default:
++ type = NFT_TRACETYPE_RULE;
++ break;
++ }
++
++ __nft_trace_packet(info, chain, type);
++}
++
++static inline void nft_trace_verdict(struct nft_traceinfo *info,
++ const struct nft_chain *chain,
++ const struct nft_rule *rule,
++ const struct nft_regs *regs)
++{
++ if (static_branch_unlikely(&nft_trace_enabled)) {
++ info->rule = rule;
++ __nft_trace_verdict(info, chain, regs);
++ }
++}
++
+ static bool nft_payload_fast_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+@@ -79,7 +123,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
+ if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
+ ptr = skb_network_header(skb);
+ else {
+- if (!pkt->tprot_set)
++ if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ return false;
+ ptr = skb_network_header(skb) + nft_thoff(pkt);
+ }
+@@ -162,7 +206,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
+ struct nft_rule *const *rules;
+ const struct nft_rule *rule;
+ const struct nft_expr *expr, *last;
+- struct nft_regs regs;
++ struct nft_regs regs = {};
+ unsigned int stackptr = 0;
+ struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
+ bool genbit = READ_ONCE(net->nft.gencursor);
+@@ -185,6 +229,8 @@ next_rule:
+ nft_rule_for_each_expr(expr, last, rule) {
+ if (expr->ops == &nft_cmp_fast_ops)
+ nft_cmp_fast_eval(expr, &regs);
++ else if (expr->ops == &nft_cmp16_fast_ops)
++ nft_cmp16_fast_eval(expr, &regs);
+ else if (expr->ops == &nft_bitwise_fast_ops)
+ nft_bitwise_fast_eval(expr, &regs);
+ else if (expr->ops != &nft_payload_fast_ops ||
+@@ -207,13 +253,13 @@ next_rule:
+ break;
+ }
+
++ nft_trace_verdict(&info, chain, rule, &regs);
++
+ switch (regs.verdict.code & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ case NF_STOLEN:
+- nft_trace_packet(&info, chain, rule,
+- NFT_TRACETYPE_RULE);
+ return regs.verdict.code;
+ }
+
+@@ -226,15 +272,10 @@ next_rule:
+ stackptr++;
+ fallthrough;
+ case NFT_GOTO:
+- nft_trace_packet(&info, chain, rule,
+- NFT_TRACETYPE_RULE);
+-
+ chain = regs.verdict.chain;
+ goto do_chain;
+ case NFT_CONTINUE:
+ case NFT_RETURN:
+- nft_trace_packet(&info, chain, rule,
+- NFT_TRACETYPE_RETURN);
+ break;
+ default:
+ WARN_ON(1);
+diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
+index 9656c16462222..910ef881c3b85 100644
+--- a/net/netfilter/nf_tables_offload.c
++++ b/net/netfilter/nf_tables_offload.c
+@@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
+
+ expr = nft_expr_first(rule);
+ while (nft_expr_more(rule, expr)) {
+- if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
++ if (expr->ops->offload_action &&
++ expr->ops->offload_action(expr))
+ num_actions++;
+
+ expr = nft_expr_next(expr);
+@@ -207,7 +208,7 @@ static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
+ return 0;
+ }
+
+-int nft_chain_offload_priority(struct nft_base_chain *basechain)
++static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
+ {
+ if (basechain->ops.priority <= 0 ||
+ basechain->ops.priority > USHRT_MAX)
+@@ -216,6 +217,27 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
+ return 0;
+ }
+
++bool nft_chain_offload_support(const struct nft_base_chain *basechain)
++{
++ struct net_device *dev;
++ struct nft_hook *hook;
++
++ if (nft_chain_offload_priority(basechain) < 0)
++ return false;
++
++ list_for_each_entry(hook, &basechain->hook_list, list) {
++ if (hook->ops.pf != NFPROTO_NETDEV ||
++ hook->ops.hooknum != NF_NETDEV_INGRESS)
++ return false;
++
++ dev = hook->ops.dev;
++ if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
++ return false;
++ }
++
++ return true;
++}
++
+ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
+ const struct nft_base_chain *basechain,
+ const struct nft_rule *rule,
+diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
+index e4fe2f0780eb6..84a7dea46efae 100644
+--- a/net/netfilter/nf_tables_trace.c
++++ b/net/netfilter/nf_tables_trace.c
+@@ -113,13 +113,13 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
+ int off = skb_network_offset(skb);
+ unsigned int len, nh_end;
+
+- nh_end = pkt->tprot_set ? nft_thoff(pkt) : skb->len;
++ nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len;
+ len = min_t(unsigned int, nh_end - skb_network_offset(skb),
+ NFT_TRACETYPE_NETWORK_HSIZE);
+ if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
+ return -1;
+
+- if (pkt->tprot_set) {
++ if (pkt->flags & NFT_PKTINFO_L4PROTO) {
+ len = min_t(unsigned int, skb->len - nft_thoff(pkt),
+ NFT_TRACETYPE_TRANSPORT_HSIZE);
+ if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
+index 7e2c8dd01408f..85c74d46cd23a 100644
+--- a/net/netfilter/nfnetlink.c
++++ b/net/netfilter/nfnetlink.c
+@@ -290,6 +290,7 @@ replay:
+ nfnl_lock(subsys_id);
+ if (nfnl_dereference_protected(subsys_id) != ss ||
+ nfnetlink_find_client(type, ss) != nc) {
++ nfnl_unlock(subsys_id);
+ err = -EAGAIN;
+ break;
+ }
+@@ -527,7 +528,8 @@ ack:
+ * processed, this avoids that the same error is
+ * reported several times when replaying the batch.
+ */
+- if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
++ if (err == -ENOMEM ||
++ nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
+ /* We failed to enqueue an error, reset the
+ * list of errors and send OOM to userspace
+ * pointing to the batch header.
+@@ -584,8 +586,6 @@ done:
+ goto replay_abort;
+ }
+ }
+- if (ss->cleanup)
+- ss->cleanup(net);
+
+ nfnl_err_deliver(&err_list, oskb);
+ kfree_skb(skb);
+diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
+index f554e2ea32eed..4692f8597060c 100644
+--- a/net/netfilter/nfnetlink_hook.c
++++ b/net/netfilter/nfnetlink_hook.c
+@@ -214,13 +214,6 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
+ hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
+ #endif
+ break;
+-#if IS_ENABLED(CONFIG_DECNET)
+- case NFPROTO_DECNET:
+- if (hook >= ARRAY_SIZE(net->nf.hooks_decnet))
+- return ERR_PTR(-EINVAL);
+- hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
+- break;
+-#endif
+ #ifdef CONFIG_NETFILTER_INGRESS
+ case NFPROTO_NETDEV:
+ if (hook != NF_NETDEV_INGRESS)
+diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
+index 691ef4cffdd90..7f83f9697fc14 100644
+--- a/net/netfilter/nfnetlink_log.c
++++ b/net/netfilter/nfnetlink_log.c
+@@ -556,7 +556,8 @@ __build_packet_message(struct nfnl_log_net *log,
+ goto nla_put_failure;
+
+ if (indev && skb->dev &&
+- skb->mac_header != skb->network_header) {
++ skb_mac_header_was_set(skb) &&
++ skb_mac_header_len(skb) != 0) {
+ struct nfulnl_msg_packet_hw phw;
+ int len;
+
+diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
+index 0fa2e20304272..50723ba082890 100644
+--- a/net/netfilter/nfnetlink_osf.c
++++ b/net/netfilter/nfnetlink_osf.c
+@@ -269,6 +269,7 @@ bool nf_osf_find(const struct sk_buff *skb,
+ struct nf_osf_hdr_ctx ctx;
+ const struct tcphdr *tcp;
+ struct tcphdr _tcph;
++ bool found = false;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+@@ -283,10 +284,11 @@ bool nf_osf_find(const struct sk_buff *skb,
+
+ data->genre = f->genre;
+ data->version = f->version;
++ found = true;
+ break;
+ }
+
+- return true;
++ return found;
+ }
+ EXPORT_SYMBOL_GPL(nf_osf_find);
+
+@@ -313,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
+
+ f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
++ if (f->opt_num > ARRAY_SIZE(f->opt))
++ return -EINVAL;
++
++ if (!memchr(f->genre, 0, MAXGENRELEN) ||
++ !memchr(f->subtype, 0, MAXGENRELEN) ||
++ !memchr(f->version, 0, MAXGENRELEN))
++ return -EINVAL;
++
+ kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
+ if (!kf)
+ return -ENOMEM;
+@@ -437,3 +447,4 @@ module_init(nfnl_osf_init);
+ module_exit(nfnl_osf_fini);
+
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 4c3fbaaeb1030..5329ebf19a18b 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -560,7 +560,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ goto nla_put_failure;
+
+ if (indev && entskb->dev &&
+- entskb->mac_header != entskb->network_header) {
++ skb_mac_header_was_set(entskb) &&
++ skb_mac_header_len(entskb) != 0) {
+ struct nfqnl_msg_packet_hw phw;
+ int len;
+
+@@ -709,9 +710,15 @@ static struct nf_queue_entry *
+ nf_queue_entry_dup(struct nf_queue_entry *e)
+ {
+ struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
+- if (entry)
+- nf_queue_entry_get_refs(entry);
+- return entry;
++
++ if (!entry)
++ return NULL;
++
++ if (nf_queue_entry_get_refs(entry))
++ return entry;
++
++ kfree(entry);
++ return NULL;
+ }
+
+ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+@@ -829,11 +836,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+ }
+
+ static int
+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
+ {
+ struct sk_buff *nskb;
+
+ if (diff < 0) {
++ unsigned int min_len = skb_transport_offset(e->skb);
++
++ if (data_len < min_len)
++ return -EINVAL;
++
+ if (pskb_trim(e->skb, data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
+diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
+index 47b0dba95054f..d6ab7aa14adc2 100644
+--- a/net/netfilter/nft_bitwise.c
++++ b/net/netfilter/nft_bitwise.c
+@@ -93,7 +93,16 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
+ {
+- struct nft_data_desc mask, xor;
++ struct nft_data_desc mask = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(priv->mask),
++ .len = priv->len,
++ };
++ struct nft_data_desc xor = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(priv->xor),
++ .len = priv->len,
++ };
+ int err;
+
+ if (tb[NFTA_BITWISE_DATA])
+@@ -103,36 +112,30 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
+ !tb[NFTA_BITWISE_XOR])
+ return -EINVAL;
+
+- err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask,
+- tb[NFTA_BITWISE_MASK]);
++ err = nft_data_init(NULL, &priv->mask, &mask, tb[NFTA_BITWISE_MASK]);
+ if (err < 0)
+ return err;
+- if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
+- err = -EINVAL;
+- goto err1;
+- }
+
+- err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
+- tb[NFTA_BITWISE_XOR]);
++ err = nft_data_init(NULL, &priv->xor, &xor, tb[NFTA_BITWISE_XOR]);
+ if (err < 0)
+- goto err1;
+- if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
+- err = -EINVAL;
+- goto err2;
+- }
++ goto err_xor_err;
+
+ return 0;
+-err2:
+- nft_data_release(&priv->xor, xor.type);
+-err1:
++
++err_xor_err:
+ nft_data_release(&priv->mask, mask.type);
++
+ return err;
+ }
+
+ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
+ {
+- struct nft_data_desc d;
++ struct nft_data_desc desc = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(priv->data),
++ .len = sizeof(u32),
++ };
+ int err;
+
+ if (tb[NFTA_BITWISE_MASK] ||
+@@ -142,13 +145,12 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
+ if (!tb[NFTA_BITWISE_DATA])
+ return -EINVAL;
+
+- err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &d,
+- tb[NFTA_BITWISE_DATA]);
++ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_BITWISE_DATA]);
+ if (err < 0)
+ return err;
+- if (d.type != NFT_DATA_VALUE || d.len != sizeof(u32) ||
+- priv->data.data[0] >= BITS_PER_TYPE(u32)) {
+- nft_data_release(&priv->data, d.type);
++
++ if (priv->data.data[0] >= BITS_PER_TYPE(u32)) {
++ nft_data_release(&priv->data, desc.type);
+ return -EINVAL;
+ }
+
+@@ -290,22 +292,21 @@ static const struct nft_expr_ops nft_bitwise_ops = {
+ static int
+ nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out)
+ {
+- struct nft_data_desc desc;
+ struct nft_data data;
+- int err = 0;
++ struct nft_data_desc desc = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(data),
++ .len = sizeof(u32),
++ };
++ int err;
+
+- err = nft_data_init(NULL, &data, sizeof(data), &desc, tb);
++ err = nft_data_init(NULL, &data, &desc, tb);
+ if (err < 0)
+ return err;
+
+- if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) {
+- err = -EINVAL;
+- goto err;
+- }
+ *out = data.data[0];
+-err:
+- nft_data_release(&data, desc.type);
+- return err;
++
++ return 0;
+ }
+
+ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
+diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
+index 9d5947ab8d4ef..7b0b8fecb2205 100644
+--- a/net/netfilter/nft_byteorder.c
++++ b/net/netfilter/nft_byteorder.c
+@@ -30,11 +30,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
+ const struct nft_byteorder *priv = nft_expr_priv(expr);
+ u32 *src = &regs->data[priv->sreg];
+ u32 *dst = &regs->data[priv->dreg];
+- union { u32 u32; u16 u16; } *s, *d;
++ u16 *s16, *d16;
+ unsigned int i;
+
+- s = (void *)src;
+- d = (void *)dst;
++ s16 = (void *)src;
++ d16 = (void *)dst;
+
+ switch (priv->size) {
+ case 8: {
+@@ -61,11 +61,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 4; i++)
+- d[i].u32 = ntohl((__force __be32)s[i].u32);
++ dst[i] = ntohl((__force __be32)src[i]);
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 4; i++)
+- d[i].u32 = (__force __u32)htonl(s[i].u32);
++ dst[i] = (__force __u32)htonl(src[i]);
+ break;
+ }
+ break;
+@@ -73,11 +73,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 2; i++)
+- d[i].u16 = ntohs((__force __be16)s[i].u16);
++ d16[i] = ntohs((__force __be16)s16[i]);
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 2; i++)
+- d[i].u16 = (__force __u16)htons(s[i].u16);
++ d16[i] = (__force __u16)htons(s16[i]);
+ break;
+ }
+ break;
+diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
+index 3ced0eb6b7c3b..5b02408a920bf 100644
+--- a/net/netfilter/nft_chain_filter.c
++++ b/net/netfilter/nft_chain_filter.c
+@@ -342,6 +342,12 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
+ return;
+ }
+
++ /* UNREGISTER events are also happening on netns exit.
++ *
++ * Although nf_tables core releases all tables/chains, only this event
++ * handler provides guarantee that hook->ops.dev is still accessible,
++ * so we cannot skip exiting net namespaces.
++ */
+ __nft_release_basechain(ctx);
+ }
+
+@@ -360,9 +366,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
+ event != NETDEV_CHANGENAME)
+ return NOTIFY_DONE;
+
+- if (!check_net(ctx.net))
+- return NOTIFY_DONE;
+-
+ nft_net = nft_pernet(ctx.net);
+ mutex_lock(&nft_net->commit_mutex);
+ list_for_each_entry(table, &nft_net->tables, list) {
+diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
+index 47b6d05f1ae69..461763a571f20 100644
+--- a/net/netfilter/nft_cmp.c
++++ b/net/netfilter/nft_cmp.c
+@@ -73,20 +73,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+ {
+ struct nft_cmp_expr *priv = nft_expr_priv(expr);
+- struct nft_data_desc desc;
++ struct nft_data_desc desc = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(priv->data),
++ };
+ int err;
+
+- err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
+- tb[NFTA_CMP_DATA]);
++ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return err;
+
+- if (desc.type != NFT_DATA_VALUE) {
+- err = -EINVAL;
+- nft_data_release(&priv->data, desc.type);
+- return err;
+- }
+-
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+@@ -201,12 +197,14 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+ {
+ struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+- struct nft_data_desc desc;
+ struct nft_data data;
++ struct nft_data_desc desc = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(data),
++ };
+ int err;
+
+- err = nft_data_init(NULL, &data, sizeof(data), &desc,
+- tb[NFTA_CMP_DATA]);
++ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return err;
+
+@@ -272,12 +270,108 @@ const struct nft_expr_ops nft_cmp_fast_ops = {
+ .offload = nft_cmp_fast_offload,
+ };
+
++static u32 nft_cmp_mask(u32 bitlen)
++{
++ return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen));
++}
++
++static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen)
++{
++ int len = bitlen / BITS_PER_BYTE;
++ int i, words = len / sizeof(u32);
++
++ for (i = 0; i < words; i++) {
++ data->data[i] = 0xffffffff;
++ bitlen -= sizeof(u32) * BITS_PER_BYTE;
++ }
++
++ if (len % sizeof(u32))
++ data->data[i++] = nft_cmp_mask(bitlen);
++
++ for (; i < 4; i++)
++ data->data[i] = 0;
++}
++
++static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
++ const struct nft_expr *expr,
++ const struct nlattr * const tb[])
++{
++ struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++ struct nft_data_desc desc = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(priv->data),
++ };
++ int err;
++
++ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
++ if (err < 0)
++ return err;
++
++ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
++ if (err < 0)
++ return err;
++
++ nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE);
++ priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ;
++ priv->len = desc.len;
++
++ return 0;
++}
++
++static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx,
++ struct nft_flow_rule *flow,
++ const struct nft_expr *expr)
++{
++ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++ struct nft_cmp_expr cmp = {
++ .data = priv->data,
++ .sreg = priv->sreg,
++ .len = priv->len,
++ .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ,
++ };
++
++ return __nft_cmp_offload(ctx, flow, &cmp);
++}
++
++static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
++{
++ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++ enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ;
++
++ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
++ goto nla_put_failure;
++ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op)))
++ goto nla_put_failure;
++
++ if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
++ NFT_DATA_VALUE, priv->len) < 0)
++ goto nla_put_failure;
++ return 0;
++
++nla_put_failure:
++ return -1;
++}
++
++
++const struct nft_expr_ops nft_cmp16_fast_ops = {
++ .type = &nft_cmp_type,
++ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)),
++ .eval = NULL, /* inlined */
++ .init = nft_cmp16_fast_init,
++ .dump = nft_cmp16_fast_dump,
++ .offload = nft_cmp16_fast_offload,
++};
++
+ static const struct nft_expr_ops *
+ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+ {
+- struct nft_data_desc desc;
+ struct nft_data data;
++ struct nft_data_desc desc = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(data),
++ };
+ enum nft_cmp_ops op;
++ u8 sreg;
+ int err;
+
+ if (tb[NFTA_CMP_SREG] == NULL ||
+@@ -298,21 +392,21 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+ return ERR_PTR(-EINVAL);
+ }
+
+- err = nft_data_init(NULL, &data, sizeof(data), &desc,
+- tb[NFTA_CMP_DATA]);
++ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return ERR_PTR(err);
+
+- if (desc.type != NFT_DATA_VALUE)
+- goto err1;
+-
+- if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ))
+- return &nft_cmp_fast_ops;
++ sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+
++ if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) {
++ if (desc.len <= sizeof(u32))
++ return &nft_cmp_fast_ops;
++ else if (desc.len <= sizeof(data) &&
++ ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) ||
++ (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0)))
++ return &nft_cmp16_fast_ops;
++ }
+ return &nft_cmp_ops;
+-err1:
+- nft_data_release(&data, desc.type);
+- return ERR_PTR(-EINVAL);
+ }
+
+ struct nft_expr_type nft_cmp_type __read_mostly = {
+diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
+index 99b1de14ff7ee..bd468e955a212 100644
+--- a/net/netfilter/nft_ct.c
++++ b/net/netfilter/nft_ct.c
+@@ -97,7 +97,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
+ return;
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+- *dest = ct->mark;
++ *dest = READ_ONCE(ct->mark);
+ return;
+ #endif
+ #ifdef CONFIG_NF_CONNTRACK_SECMARK
+@@ -259,10 +259,13 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+
+ ct = this_cpu_read(nft_ct_pcpu_template);
+
+- if (likely(atomic_read(&ct->ct_general.use) == 1)) {
++ if (likely(refcount_read(&ct->ct_general.use) == 1)) {
++ refcount_inc(&ct->ct_general.use);
+ nf_ct_zone_add(ct, &zone);
+ } else {
+- /* previous skb got queued to userspace */
++ /* previous skb got queued to userspace, allocate temporary
++ * one until percpu template can be reused.
++ */
+ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
+ if (!ct) {
+ regs->verdict.code = NF_DROP;
+@@ -270,7 +273,6 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+ }
+ }
+
+- atomic_inc(&ct->ct_general.use);
+ nf_ct_set(skb, ct, IP_CT_NEW);
+ }
+ #endif
+@@ -294,8 +296,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
+ switch (priv->key) {
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+- if (ct->mark != value) {
+- ct->mark = value;
++ if (READ_ONCE(ct->mark) != value) {
++ WRITE_ONCE(ct->mark, value);
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+ break;
+@@ -375,7 +377,6 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
+ return false;
+ }
+
+- atomic_set(&tmp->ct_general.use, 1);
+ per_cpu(nft_ct_pcpu_template, cpu) = tmp;
+ }
+
+@@ -1040,6 +1041,9 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
+ if (err < 0)
+ goto err_put_helper;
+
++ /* Avoid the bogus warning, helper will be assigned after CT init */
++ nf_ct_set_auto_assign_helper_warned(ctx->net);
++
+ return 0;
+
+ err_put_helper:
+diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
+index bbf3fcba3df40..5b5c607fbf83f 100644
+--- a/net/netfilter/nft_dup_netdev.c
++++ b/net/netfilter/nft_dup_netdev.c
+@@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
+ return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
+ }
+
++static bool nft_dup_netdev_offload_action(const struct nft_expr *expr)
++{
++ return true;
++}
++
+ static struct nft_expr_type nft_dup_netdev_type;
+ static const struct nft_expr_ops nft_dup_netdev_ops = {
+ .type = &nft_dup_netdev_type,
+@@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = {
+ .init = nft_dup_netdev_init,
+ .dump = nft_dup_netdev_dump,
+ .offload = nft_dup_netdev_offload,
++ .offload_action = nft_dup_netdev_offload_action,
+ };
+
+ static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
+diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
+index 6ba3256fa8449..73e606372b05d 100644
+--- a/net/netfilter/nft_dynset.c
++++ b/net/netfilter/nft_dynset.c
+@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
++ if (set->flags & NFT_SET_OBJECT)
++ return -EOPNOTSUPP;
++
+ if (set->ops->update == NULL)
+ return -EOPNOTSUPP;
+
+@@ -198,17 +201,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
+ return -EBUSY;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+- switch (priv->op) {
+- case NFT_DYNSET_OP_ADD:
+- case NFT_DYNSET_OP_DELETE:
+- break;
+- case NFT_DYNSET_OP_UPDATE:
+- if (!(set->flags & NFT_SET_TIMEOUT))
+- return -EOPNOTSUPP;
+- break;
+- default:
++ if (priv->op > NFT_DYNSET_OP_DELETE)
+ return -EOPNOTSUPP;
+- }
+
+ timeout = 0;
+ if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+@@ -351,7 +345,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx,
+ {
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+- priv->set->use++;
++ nf_tables_activate_set(ctx, priv->set);
+ }
+
+ static void nft_dynset_destroy(const struct nft_ctx *ctx,
+diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
+index af4ee874a067c..3609680831a14 100644
+--- a/net/netfilter/nft_exthdr.c
++++ b/net/netfilter/nft_exthdr.c
+@@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
+ return opt[offset + 1];
+ }
+
++static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
++{
++ if (len % NFT_REG32_SIZE)
++ dest[len / NFT_REG32_SIZE] = 0;
++
++ return skb_copy_bits(skb, offset, dest, len);
++}
++
+ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+@@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+ }
+ offset += priv->offset;
+
+- dest[priv->len / NFT_REG32_SIZE] = 0;
+- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
++ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
+ goto err;
+ return;
+ err:
+@@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
+ }
+ offset += priv->offset;
+
+- dest[priv->len / NFT_REG32_SIZE] = 0;
+- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
++ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
+ goto err;
+ return;
+ err:
+@@ -167,7 +173,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+ {
+ struct tcphdr *tcph;
+
+- if (pkt->tprot != IPPROTO_TCP)
++ if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
+ return NULL;
+
+ tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
+@@ -210,7 +216,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = 1;
+ } else {
+- dest[priv->len / NFT_REG32_SIZE] = 0;
++ if (priv->len % NFT_REG32_SIZE)
++ dest[priv->len / NFT_REG32_SIZE] = 0;
+ memcpy(dest, opt + offset, priv->len);
+ }
+
+@@ -236,7 +243,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!tcph)
+- return;
++ goto err;
+
+ opt = (u8 *)tcph;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+@@ -251,16 +258,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+ continue;
+
+ if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+- return;
++ goto err;
+
+ if (skb_ensure_writable(pkt->skb,
+ nft_thoff(pkt) + i + priv->len))
+- return;
++ goto err;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+ &tcphdr_len);
+ if (!tcph)
+- return;
++ goto err;
+
+ offset = i + priv->offset;
+
+@@ -303,6 +310,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+
+ return;
+ }
++ return;
++err:
++ regs->verdict.code = NFT_BREAK;
+ }
+
+ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
+@@ -332,9 +342,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
+ offset + ntohs(sch->length) > pkt->skb->len)
+ break;
+
+- dest[priv->len / NFT_REG32_SIZE] = 0;
+- if (skb_copy_bits(pkt->skb, offset + priv->offset,
+- dest, priv->len) < 0)
++ if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
++ dest, priv->len) < 0)
+ break;
+ return;
+ }
+diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
+index 0af34ad414796..a5fc7213be3ed 100644
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
+ route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
+ }
+
++static bool nft_is_valid_ether_device(const struct net_device *dev)
++{
++ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
++ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
++ return false;
++
++ return true;
++}
++
+ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
+ const struct dst_entry *dst_cache,
+ const struct nf_conn *ct,
+@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
+ struct neighbour *n;
+ u8 nud_state;
+
++ if (!nft_is_valid_ether_device(dev))
++ goto out;
++
+ n = dst_neigh_lookup(dst_cache, daddr);
+ if (!n)
+ return -1;
+@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
+ if (!(nud_state & NUD_VALID))
+ return -1;
+
++out:
+ return dev_fill_forward_path(dev, ha, stack);
+ }
+
+@@ -78,15 +91,6 @@ struct nft_forward_info {
+ enum flow_offload_xmit_type xmit_type;
+ };
+
+-static bool nft_is_valid_ether_device(const struct net_device *dev)
+-{
+- if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+- dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+- return false;
+-
+- return true;
+-}
+-
+ static void nft_dev_path_info(const struct net_device_path_stack *stack,
+ struct nft_forward_info *info,
+ unsigned char *ha, struct nf_flowtable *flowtable)
+@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
+ info->indev = NULL;
+ break;
+ }
+- info->outdev = path->dev;
++ if (!info->outdev)
++ info->outdev = path->dev;
+ info->encap[info->num_encaps].id = path->encap.id;
+ info->encap[info->num_encaps].proto = path->encap.proto;
+ info->num_encaps++;
+@@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
+ case IPPROTO_TCP:
+ tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
+ sizeof(_tcph), &_tcph);
+- if (unlikely(!tcph || tcph->fin || tcph->rst))
++ if (unlikely(!tcph || tcph->fin || tcph->rst ||
++ !nf_conntrack_tcp_established(ct)))
+ goto out;
+ break;
+ case IPPROTO_UDP:
+@@ -375,8 +381,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
+ if (IS_ERR(flowtable))
+ return PTR_ERR(flowtable);
+
++ if (!nft_use_inc(&flowtable->use))
++ return -EMFILE;
++
+ priv->flowtable = flowtable;
+- flowtable->use++;
+
+ return nf_ct_netns_get(ctx->net, ctx->family);
+ }
+@@ -395,7 +403,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+ {
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+- priv->flowtable->use++;
++ nft_use_inc_restore(&priv->flowtable->use);
+ }
+
+ static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
+index cd59afde5b2f8..7730409f6f091 100644
+--- a/net/netfilter/nft_fwd_netdev.c
++++ b/net/netfilter/nft_fwd_netdev.c
+@@ -77,6 +77,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
+ return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
+ }
+
++static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr)
++{
++ return true;
++}
++
+ struct nft_fwd_neigh {
+ u8 sreg_dev;
+ u8 sreg_addr;
+@@ -219,6 +224,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
+ .dump = nft_fwd_netdev_dump,
+ .validate = nft_fwd_validate,
+ .offload = nft_fwd_netdev_offload,
++ .offload_action = nft_fwd_netdev_offload_action,
+ };
+
+ static const struct nft_expr_ops *
+diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
+index 90c64d27ae532..7d5b63c5a30af 100644
+--- a/net/netfilter/nft_immediate.c
++++ b/net/netfilter/nft_immediate.c
+@@ -29,20 +29,36 @@ static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+ [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED },
+ };
+
++static enum nft_data_types nft_reg_to_type(const struct nlattr *nla)
++{
++ enum nft_data_types type;
++ u8 reg;
++
++ reg = ntohl(nla_get_be32(nla));
++ if (reg == NFT_REG_VERDICT)
++ type = NFT_DATA_VERDICT;
++ else
++ type = NFT_DATA_VALUE;
++
++ return type;
++}
++
+ static int nft_immediate_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+ {
+ struct nft_immediate_expr *priv = nft_expr_priv(expr);
+- struct nft_data_desc desc;
++ struct nft_data_desc desc = {
++ .size = sizeof(priv->data),
++ };
+ int err;
+
+ if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+ tb[NFTA_IMMEDIATE_DATA] == NULL)
+ return -EINVAL;
+
+- err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc,
+- tb[NFTA_IMMEDIATE_DATA]);
++ desc.type = nft_reg_to_type(tb[NFTA_IMMEDIATE_DREG]);
++ err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+ if (err < 0)
+ return err;
+
+@@ -60,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
+ switch (priv->data.verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+- if (nft_chain_is_bound(chain)) {
+- err = -EBUSY;
+- goto err1;
+- }
+- chain->bound = true;
++ err = nf_tables_bind_chain(ctx, chain);
++ if (err < 0)
++ return err;
+ break;
+ default:
+ break;
+@@ -82,15 +96,86 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+ {
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
++ const struct nft_data *data = &priv->data;
++ struct nft_ctx chain_ctx;
++ struct nft_chain *chain;
++ struct nft_rule *rule;
++
++ if (priv->dreg == NFT_REG_VERDICT) {
++ switch (data->verdict.code) {
++ case NFT_JUMP:
++ case NFT_GOTO:
++ chain = data->verdict.chain;
++ if (!nft_chain_binding(chain))
++ break;
++
++ chain_ctx = *ctx;
++ chain_ctx.chain = chain;
++
++ list_for_each_entry(rule, &chain->rules, list)
++ nft_rule_expr_activate(&chain_ctx, rule);
++
++ nft_clear(ctx->net, chain);
++ break;
++ default:
++ break;
++ }
++ }
+
+ return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
+ }
+
++static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
++ struct nft_chain *chain,
++ enum nft_trans_phase phase)
++{
++ struct nft_ctx chain_ctx;
++ struct nft_rule *rule;
++
++ chain_ctx = *ctx;
++ chain_ctx.chain = chain;
++
++ list_for_each_entry(rule, &chain->rules, list)
++ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
++}
++
+ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
+ {
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
++ const struct nft_data *data = &priv->data;
++ struct nft_chain *chain;
++
++ if (priv->dreg == NFT_REG_VERDICT) {
++ switch (data->verdict.code) {
++ case NFT_JUMP:
++ case NFT_GOTO:
++ chain = data->verdict.chain;
++ if (!nft_chain_binding(chain))
++ break;
++
++ switch (phase) {
++ case NFT_TRANS_PREPARE_ERROR:
++ nf_tables_unbind_chain(ctx, chain);
++ nft_deactivate_next(ctx->net, chain);
++ break;
++ case NFT_TRANS_PREPARE:
++ nft_immediate_chain_deactivate(ctx, chain, phase);
++ nft_deactivate_next(ctx->net, chain);
++ break;
++ default:
++ nft_immediate_chain_deactivate(ctx, chain, phase);
++ nft_chain_del(chain);
++ chain->bound = false;
++ nft_use_dec(&chain->table->use);
++ break;
++ }
++ break;
++ default:
++ break;
++ }
++ }
+
+ if (phase == NFT_TRANS_COMMIT)
+ return;
+@@ -115,15 +200,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+
+- if (!nft_chain_is_bound(chain))
++ if (!nft_chain_binding(chain))
+ break;
+
++ /* Rule construction failed, but chain is already bound:
++ * let the transaction records release this chain and its rules.
++ */
++ if (chain->bound) {
++ nft_use_dec(&chain->use);
++ break;
++ }
++
++ /* Rule has been deleted, release chain and its rules. */
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+- list_for_each_entry_safe(rule, n, &chain->rules, list)
+- nf_tables_rule_release(&chain_ctx, rule);
+-
++ nft_use_dec(&chain->use);
++ list_for_each_entry_safe(rule, n, &chain->rules, list) {
++ nft_use_dec(&chain->use);
++ list_del(&rule->list);
++ nf_tables_rule_destroy(&chain_ctx, rule);
++ }
+ nf_tables_chain_destroy(&chain_ctx);
+ break;
+ default:
+@@ -213,6 +310,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
+ return 0;
+ }
+
++static bool nft_immediate_offload_action(const struct nft_expr *expr)
++{
++ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
++
++ if (priv->dreg == NFT_REG_VERDICT)
++ return true;
++
++ return false;
++}
++
+ static const struct nft_expr_ops nft_imm_ops = {
+ .type = &nft_imm_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+@@ -224,7 +331,7 @@ static const struct nft_expr_ops nft_imm_ops = {
+ .dump = nft_immediate_dump,
+ .validate = nft_immediate_validate,
+ .offload = nft_immediate_offload,
+- .offload_flags = NFT_OFFLOAD_F_ACTION,
++ .offload_action = nft_immediate_offload_action,
+ };
+
+ struct nft_expr_type nft_imm_type __read_mostly = {
+diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
+index 90becbf5bff3d..9d18c5428d53c 100644
+--- a/net/netfilter/nft_lookup.c
++++ b/net/netfilter/nft_lookup.c
+@@ -167,7 +167,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx,
+ {
+ struct nft_lookup *priv = nft_expr_priv(expr);
+
+- priv->set->use++;
++ nf_tables_activate_set(ctx, priv->set);
+ }
+
+ static void nft_lookup_destroy(const struct nft_ctx *ctx,
+@@ -198,37 +198,6 @@ nla_put_failure:
+ return -1;
+ }
+
+-static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
+- struct nft_set *set,
+- const struct nft_set_iter *iter,
+- struct nft_set_elem *elem)
+-{
+- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+- struct nft_ctx *pctx = (struct nft_ctx *)ctx;
+- const struct nft_data *data;
+- int err;
+-
+- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
+- return 0;
+-
+- data = nft_set_ext_data(ext);
+- switch (data->verdict.code) {
+- case NFT_JUMP:
+- case NFT_GOTO:
+- pctx->level++;
+- err = nft_chain_validate(ctx, data->verdict.chain);
+- if (err < 0)
+- return err;
+- pctx->level--;
+- break;
+- default:
+- break;
+- }
+-
+- return 0;
+-}
+-
+ static int nft_lookup_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **d)
+@@ -244,9 +213,12 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
+ iter.skip = 0;
+ iter.count = 0;
+ iter.err = 0;
+- iter.fn = nft_lookup_validate_setelem;
++ iter.fn = nft_setelem_validate;
+
+ priv->set->ops->walk(ctx, priv->set, &iter);
++ if (!iter.err)
++ iter.err = nft_set_catchall_validate(ctx, priv->set);
++
+ if (iter.err < 0)
+ return iter.err;
+
+diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
+index 9953e80537536..1818dbf089cad 100644
+--- a/net/netfilter/nft_masq.c
++++ b/net/netfilter/nft_masq.c
+@@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+ {
+- u32 plen = sizeof_field(struct nf_nat_range, min_addr.all);
++ u32 plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ struct nft_masq *priv = nft_expr_priv(expr);
+ int err;
+
+diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
+index a7e01e9952f17..14412f69a34e8 100644
+--- a/net/netfilter/nft_meta.c
++++ b/net/netfilter/nft_meta.c
+@@ -14,6 +14,7 @@
+ #include <linux/in.h>
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
++#include <linux/random.h>
+ #include <linux/smp.h>
+ #include <linux/static_key.h>
+ #include <net/dst.h>
+@@ -32,8 +33,6 @@
+ #define NFT_META_SECS_PER_DAY 86400
+ #define NFT_META_DAYS_PER_WEEK 7
+
+-static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
+-
+ static u8 nft_meta_weekday(void)
+ {
+ time64_t secs = ktime_get_real_seconds();
+@@ -267,13 +266,6 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest,
+ return true;
+ }
+
+-static noinline u32 nft_prandom_u32(void)
+-{
+- struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
+-
+- return prandom_u32_state(state);
+-}
+-
+ #ifdef CONFIG_IP_ROUTE_CLASSID
+ static noinline bool
+ nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
+@@ -329,7 +321,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
+ nft_reg_store8(dest, nft_pf(pkt));
+ break;
+ case NFT_META_L4PROTO:
+- if (!pkt->tprot_set)
++ if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ goto err;
+ nft_reg_store8(dest, pkt->tprot);
+ break;
+@@ -385,7 +377,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
+ break;
+ #endif
+ case NFT_META_PRANDOM:
+- *dest = nft_prandom_u32();
++ *dest = get_random_u32();
+ break;
+ #ifdef CONFIG_XFRM
+ case NFT_META_SECPATH:
+@@ -514,7 +506,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
+ len = IFNAMSIZ;
+ break;
+ case NFT_META_PRANDOM:
+- prandom_init_once(&nft_prandom_state);
+ len = sizeof(u32);
+ break;
+ #ifdef CONFIG_XFRM
+diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
+index be1595d6979d8..cd4eb4996aff3 100644
+--- a/net/netfilter/nft_nat.c
++++ b/net/netfilter/nft_nat.c
+@@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ priv->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+- plen = sizeof_field(struct nf_nat_range, min_addr.all);
++ plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ if (tb[NFTA_NAT_REG_PROTO_MIN]) {
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
+@@ -334,7 +334,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr,
+ {
+ const struct nft_nat *priv = nft_expr_priv(expr);
+
+- if (priv->family == nft_pf(pkt))
++ if (priv->family == nft_pf(pkt) ||
++ priv->family == NFPROTO_INET)
+ nft_nat_eval(expr, regs, pkt);
+ }
+
+diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
+index 722cac1e90e0e..4e43214e88def 100644
+--- a/net/netfilter/nft_numgen.c
++++ b/net/netfilter/nft_numgen.c
+@@ -9,12 +9,11 @@
+ #include <linux/netlink.h>
+ #include <linux/netfilter.h>
+ #include <linux/netfilter/nf_tables.h>
++#include <linux/random.h>
+ #include <linux/static_key.h>
+ #include <net/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_tables_core.h>
+
+-static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
+-
+ struct nft_ng_inc {
+ u8 dreg;
+ u32 modulus;
+@@ -104,12 +103,9 @@ struct nft_ng_random {
+ u32 offset;
+ };
+
+-static u32 nft_ng_random_gen(struct nft_ng_random *priv)
++static u32 nft_ng_random_gen(const struct nft_ng_random *priv)
+ {
+- struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+-
+- return reciprocal_scale(prandom_u32_state(state), priv->modulus) +
+- priv->offset;
++ return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset;
+ }
+
+ static void nft_ng_random_eval(const struct nft_expr *expr,
+@@ -137,8 +133,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
+- prandom_init_once(&nft_numgen_prandom_state);
+-
+ return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
+ }
+diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
+index 94b2327e71dc4..156787b766676 100644
+--- a/net/netfilter/nft_objref.c
++++ b/net/netfilter/nft_objref.c
+@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
+ if (IS_ERR(obj))
+ return -ENOENT;
+
++ if (!nft_use_inc(&obj->use))
++ return -EMFILE;
++
+ nft_objref_priv(expr) = obj;
+- obj->use++;
+
+ return 0;
+ }
+@@ -71,7 +73,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
+ if (phase == NFT_TRANS_COMMIT)
+ return;
+
+- obj->use--;
++ nft_use_dec(&obj->use);
+ }
+
+ static void nft_objref_activate(const struct nft_ctx *ctx,
+@@ -79,7 +81,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
+ {
+ struct nft_object *obj = nft_objref_priv(expr);
+
+- obj->use++;
++ nft_use_inc_restore(&obj->use);
+ }
+
+ static struct nft_expr_type nft_objref_type;
+@@ -183,7 +185,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx,
+ {
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+
+- priv->set->use++;
++ nf_tables_activate_set(ctx, priv->set);
+ }
+
+ static void nft_objref_map_destroy(const struct nft_ctx *ctx,
+diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
+index d82677e83400b..720dc9fba6d4f 100644
+--- a/net/netfilter/nft_osf.c
++++ b/net/netfilter/nft_osf.c
+@@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+ {
+- return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
+- (1 << NF_INET_PRE_ROUTING) |
+- (1 << NF_INET_FORWARD));
++ unsigned int hooks;
++
++ switch (ctx->family) {
++ case NFPROTO_IPV4:
++ case NFPROTO_IPV6:
++ case NFPROTO_INET:
++ hooks = (1 << NF_INET_LOCAL_IN) |
++ (1 << NF_INET_PRE_ROUTING) |
++ (1 << NF_INET_FORWARD);
++ break;
++ default:
++ return -EOPNOTSUPP;
++ }
++
++ return nft_chain_validate_hooks(ctx->chain, hooks);
+ }
+
+ static struct nft_expr_type nft_osf_type;
+diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
+index a44b14f6c0dc0..208a6f59281db 100644
+--- a/net/netfilter/nft_payload.c
++++ b/net/netfilter/nft_payload.c
+@@ -22,6 +22,7 @@
+ #include <linux/icmpv6.h>
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
++#include <linux/ip.h>
+ #include <net/sctp/checksum.h>
+
+ static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
+@@ -62,7 +63,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+ return false;
+
+ if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
+- ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen;
++ ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
+
+ memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
+
+@@ -79,6 +80,45 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+ return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
+ }
+
++static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
++{
++ unsigned int thoff = nft_thoff(pkt);
++
++ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
++ return -1;
++
++ switch (pkt->tprot) {
++ case IPPROTO_UDP:
++ pkt->inneroff = thoff + sizeof(struct udphdr);
++ break;
++ case IPPROTO_TCP: {
++ struct tcphdr *th, _tcph;
++
++ th = skb_header_pointer(pkt->skb, thoff, sizeof(_tcph), &_tcph);
++ if (!th)
++ return -1;
++
++ pkt->inneroff = thoff + __tcp_hdrlen(th);
++ }
++ break;
++ default:
++ return -1;
++ }
++
++ pkt->flags |= NFT_PKTINFO_INNER;
++
++ return 0;
++}
++
++static int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
++{
++ if (!(pkt->flags & NFT_PKTINFO_INNER) &&
++ __nft_payload_inner_offset((struct nft_pktinfo *)pkt) < 0)
++ return -1;
++
++ return pkt->inneroff;
++}
++
+ void nft_payload_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+@@ -108,10 +148,15 @@ void nft_payload_eval(const struct nft_expr *expr,
+ offset = skb_network_offset(skb);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+- if (!pkt->tprot_set)
++ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
+ goto err;
+ offset = nft_thoff(pkt);
+ break;
++ case NFT_PAYLOAD_INNER_HEADER:
++ offset = nft_payload_inner_offset(pkt);
++ if (offset < 0)
++ goto err;
++ break;
+ default:
+ BUG();
+ }
+@@ -502,6 +547,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ unsigned int *l4csum_offset)
+ {
++ if (pkt->fragoff)
++ return -1;
++
+ switch (pkt->tprot) {
+ case IPPROTO_TCP:
+ *l4csum_offset = offsetof(struct tcphdr, check);
+@@ -610,10 +658,15 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
+ offset = skb_network_offset(skb);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+- if (!pkt->tprot_set)
++ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
+ goto err;
+ offset = nft_thoff(pkt);
+ break;
++ case NFT_PAYLOAD_INNER_HEADER:
++ offset = nft_payload_inner_offset(pkt);
++ if (offset < 0)
++ goto err;
++ break;
+ default:
+ BUG();
+ }
+@@ -622,7 +675,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
+ offset += priv->offset;
+
+ if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) &&
+- (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
++ ((priv->base != NFT_PAYLOAD_TRANSPORT_HEADER &&
++ priv->base != NFT_PAYLOAD_INNER_HEADER) ||
+ skb->ip_summed != CHECKSUM_PARTIAL)) {
+ fsum = skb_checksum(skb, offset, priv->len, 0);
+ tsum = csum_partial(src, priv->len, 0);
+@@ -643,7 +697,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
+ if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
+ pkt->tprot == IPPROTO_SCTP &&
+ skb->ip_summed != CHECKSUM_PARTIAL) {
+- if (nft_payload_csum_sctp(skb, nft_thoff(pkt)))
++ if (pkt->fragoff == 0 &&
++ nft_payload_csum_sctp(skb, nft_thoff(pkt)))
+ goto err;
+ }
+
+@@ -657,17 +712,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+ {
+ struct nft_payload_set *priv = nft_expr_priv(expr);
++ u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
++ int err;
+
+ priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+
+ if (tb[NFTA_PAYLOAD_CSUM_TYPE])
+- priv->csum_type =
+- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+- if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
+- priv->csum_offset =
+- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
++ csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
++ if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
++ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
++ &csum_offset);
++ if (err < 0)
++ return err;
++
++ priv->csum_offset = csum_offset;
++ }
+ if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
+ u32 flags;
+
+@@ -678,7 +739,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
+ priv->csum_flags = flags;
+ }
+
+- switch (priv->csum_type) {
++ switch (csum_type) {
+ case NFT_PAYLOAD_CSUM_NONE:
+ case NFT_PAYLOAD_CSUM_INET:
+ break;
+@@ -692,6 +753,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
+ default:
+ return -EOPNOTSUPP;
+ }
++ priv->csum_type = csum_type;
+
+ return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+ priv->len);
+@@ -730,6 +792,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
+ {
+ enum nft_payload_bases base;
+ unsigned int offset, len;
++ int err;
+
+ if (tb[NFTA_PAYLOAD_BASE] == NULL ||
+ tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+@@ -741,6 +804,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
+ case NFT_PAYLOAD_LL_HEADER:
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
++ case NFT_PAYLOAD_INNER_HEADER:
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+@@ -755,11 +819,16 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
+ if (tb[NFTA_PAYLOAD_DREG] == NULL)
+ return ERR_PTR(-EINVAL);
+
+- offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
++ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
++ if (err < 0)
++ return ERR_PTR(err);
++
++ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
++ if (err < 0)
++ return ERR_PTR(err);
+
+ if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
+- base != NFT_PAYLOAD_LL_HEADER)
++ base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER)
+ return &nft_payload_fast_ops;
+ else
+ return &nft_payload_ops;
+diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
+index e4a1c44d7f513..e6bbe32c323df 100644
+--- a/net/netfilter/nft_range.c
++++ b/net/netfilter/nft_range.c
+@@ -51,7 +51,14 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
+ const struct nlattr * const tb[])
+ {
+ struct nft_range_expr *priv = nft_expr_priv(expr);
+- struct nft_data_desc desc_from, desc_to;
++ struct nft_data_desc desc_from = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(priv->data_from),
++ };
++ struct nft_data_desc desc_to = {
++ .type = NFT_DATA_VALUE,
++ .size = sizeof(priv->data_to),
++ };
+ int err;
+ u32 op;
+
+@@ -61,26 +68,16 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
+ !tb[NFTA_RANGE_TO_DATA])
+ return -EINVAL;
+
+- err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
+- &desc_from, tb[NFTA_RANGE_FROM_DATA]);
++ err = nft_data_init(NULL, &priv->data_from, &desc_from,
++ tb[NFTA_RANGE_FROM_DATA]);
+ if (err < 0)
+ return err;
+
+- if (desc_from.type != NFT_DATA_VALUE) {
+- err = -EINVAL;
+- goto err1;
+- }
+-
+- err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
+- &desc_to, tb[NFTA_RANGE_TO_DATA]);
++ err = nft_data_init(NULL, &priv->data_to, &desc_to,
++ tb[NFTA_RANGE_TO_DATA]);
+ if (err < 0)
+ goto err1;
+
+- if (desc_to.type != NFT_DATA_VALUE) {
+- err = -EINVAL;
+- goto err2;
+- }
+-
+ if (desc_from.len != desc_to.len) {
+ err = -EINVAL;
+ goto err2;
+diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
+index ba09890dddb50..e64f531d66cfc 100644
+--- a/net/netfilter/nft_redir.c
++++ b/net/netfilter/nft_redir.c
+@@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx,
+ unsigned int plen;
+ int err;
+
+- plen = sizeof_field(struct nf_nat_range, min_addr.all);
++ plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
+@@ -232,7 +232,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = {
+ .name = "redir",
+ .ops = &nft_redir_inet_ops,
+ .policy = nft_redir_policy,
+- .maxattr = NFTA_MASQ_MAX,
++ .maxattr = NFTA_REDIR_MAX,
+ .owner = THIS_MODULE,
+ };
+
+diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
+index e7ae5914971e7..60122539fee67 100644
+--- a/net/netfilter/nft_set_bitmap.c
++++ b/net/netfilter/nft_set_bitmap.c
+@@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set,
+ return 0;
+ }
+
+-static void nft_bitmap_destroy(const struct nft_set *set)
++static void nft_bitmap_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set)
+ {
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_bitmap_elem *be, *n;
+
+ list_for_each_entry_safe(be, n, &priv->list, head)
+- nft_set_elem_destroy(set, be, true);
++ nf_tables_set_elem_destroy(ctx, set, be);
+ }
+
+ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
+diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
+index df40314de21f5..0b73cb0e752f7 100644
+--- a/net/netfilter/nft_set_hash.c
++++ b/net/netfilter/nft_set_hash.c
+@@ -143,6 +143,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
+ /* Another cpu may race to insert the element with the same key */
+ if (prev) {
+ nft_set_elem_destroy(set, he, true);
++ atomic_dec(&set->nelems);
+ he = prev;
+ }
+
+@@ -152,6 +153,7 @@ out:
+
+ err2:
+ nft_set_elem_destroy(set, he, true);
++ atomic_dec(&set->nelems);
+ err1:
+ return false;
+ }
+@@ -398,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set,
+ return 0;
+ }
+
++struct nft_rhash_ctx {
++ const struct nft_ctx ctx;
++ const struct nft_set *set;
++};
++
+ static void nft_rhash_elem_destroy(void *ptr, void *arg)
+ {
+- nft_set_elem_destroy(arg, ptr, true);
++ struct nft_rhash_ctx *rhash_ctx = arg;
++
++ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
+ }
+
+-static void nft_rhash_destroy(const struct nft_set *set)
++static void nft_rhash_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set)
+ {
+ struct nft_rhash *priv = nft_set_priv(set);
++ struct nft_rhash_ctx rhash_ctx = {
++ .ctx = *ctx,
++ .set = set,
++ };
+
+ cancel_delayed_work_sync(&priv->gc_work);
+ rcu_barrier();
+ rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
+- (void *)set);
++ (void *)&rhash_ctx);
+ }
+
+ /* Number of buckets is stored in u32, so cap our result to 1U<<31 */
+@@ -641,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set,
+ return 0;
+ }
+
+-static void nft_hash_destroy(const struct nft_set *set)
++static void nft_hash_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set)
+ {
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+@@ -651,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set)
+ for (i = 0; i < priv->buckets; i++) {
+ hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
+ hlist_del_rcu(&he->node);
+- nft_set_elem_destroy(set, he, true);
++ nf_tables_set_elem_destroy(ctx, set, he);
+ }
+ }
+ }
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index dce866d93feed..8c16681884b7e 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -901,12 +901,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
+ static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
+ int mask_bits)
+ {
+- int rule = f->rules++, group, ret, bit_offset = 0;
++ int rule = f->rules, group, ret, bit_offset = 0;
+
+- ret = pipapo_resize(f, f->rules - 1, f->rules);
++ ret = pipapo_resize(f, f->rules, f->rules + 1);
+ if (ret)
+ return ret;
+
++ f->rules++;
++
+ for (group = 0; group < f->groups; group++) {
+ int i, v;
+ u8 mask;
+@@ -1051,7 +1053,9 @@ static int pipapo_expand(struct nft_pipapo_field *f,
+ step++;
+ if (step >= len) {
+ if (!masks) {
+- pipapo_insert(f, base, 0);
++ err = pipapo_insert(f, base, 0);
++ if (err < 0)
++ return err;
+ masks = 1;
+ }
+ goto out;
+@@ -1162,6 +1166,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
+ struct nft_pipapo_match *m = priv->clone;
+ u8 genmask = nft_genmask_next(net);
+ struct nft_pipapo_field *f;
++ const u8 *start_p, *end_p;
+ int i, bsize_max, err = 0;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
+@@ -1202,9 +1207,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
+ }
+
+ /* Validate */
++ start_p = start;
++ end_p = end;
+ nft_pipapo_for_each_field(f, i, m) {
+- const u8 *start_p = start, *end_p = end;
+-
+ if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
+ return -ENOSPC;
+
+@@ -1233,6 +1238,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
+ else
+ ret = pipapo_expand(f, start, end, f->groups * f->bb);
+
++ if (ret < 0)
++ return ret;
++
+ if (f->bsize > bsize_max)
+ bsize_max = f->bsize;
+
+@@ -1290,6 +1298,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
+ if (!new->scratch_aligned)
+ goto out_scratch;
+ #endif
++ for_each_possible_cpu(i)
++ *per_cpu_ptr(new->scratch, i) = NULL;
++
++ if (pipapo_realloc_scratch(new, old->bsize_max))
++ goto out_scratch_realloc;
+
+ rcu_head_init(&new->rcu);
+
+@@ -1334,6 +1347,9 @@ out_lt:
+ kvfree(dst->lt);
+ dst--;
+ }
++out_scratch_realloc:
++ for_each_possible_cpu(i)
++ kfree(*per_cpu_ptr(new->scratch, i));
+ #ifdef NFT_PIPAPO_ALIGN
+ free_percpu(new->scratch_aligned);
+ #endif
+@@ -1591,17 +1607,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m)
+ }
+ }
+
+-/**
+- * pipapo_reclaim_match - RCU callback to free fields from old matching data
+- * @rcu: RCU head
+- */
+-static void pipapo_reclaim_match(struct rcu_head *rcu)
++static void pipapo_free_match(struct nft_pipapo_match *m)
+ {
+- struct nft_pipapo_match *m;
+ int i;
+
+- m = container_of(rcu, struct nft_pipapo_match, rcu);
+-
+ for_each_possible_cpu(i)
+ kfree(*per_cpu_ptr(m->scratch, i));
+
+@@ -1616,7 +1625,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
+ }
+
+ /**
+- * pipapo_commit() - Replace lookup data with current working copy
++ * pipapo_reclaim_match - RCU callback to free fields from old matching data
++ * @rcu: RCU head
++ */
++static void pipapo_reclaim_match(struct rcu_head *rcu)
++{
++ struct nft_pipapo_match *m;
++
++ m = container_of(rcu, struct nft_pipapo_match, rcu);
++ pipapo_free_match(m);
++}
++
++/**
++ * nft_pipapo_commit() - Replace lookup data with current working copy
+ * @set: nftables API set representation
+ *
+ * While at it, check if we should perform garbage collection on the working
+@@ -1626,7 +1647,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
+ * We also need to create a new working copy for subsequent insertions and
+ * deletions.
+ */
+-static void pipapo_commit(const struct nft_set *set)
++static void nft_pipapo_commit(const struct nft_set *set)
+ {
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *new_clone, *old;
+@@ -1651,6 +1672,37 @@ static void pipapo_commit(const struct nft_set *set)
+ priv->clone = new_clone;
+ }
+
++static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
++{
++#ifdef CONFIG_PROVE_LOCKING
++ const struct net *net = read_pnet(&set->net);
++
++ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
++#else
++ return true;
++#endif
++}
++
++static void nft_pipapo_abort(const struct nft_set *set)
++{
++ struct nft_pipapo *priv = nft_set_priv(set);
++ struct nft_pipapo_match *new_clone, *m;
++
++ if (!priv->dirty)
++ return;
++
++ m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
++
++ new_clone = pipapo_clone(m);
++ if (IS_ERR(new_clone))
++ return;
++
++ priv->dirty = false;
++
++ pipapo_free_match(priv->clone);
++ priv->clone = new_clone;
++}
++
+ /**
+ * nft_pipapo_activate() - Mark element reference as active given key, commit
+ * @net: Network namespace
+@@ -1658,8 +1710,7 @@ static void pipapo_commit(const struct nft_set *set)
+ * @elem: nftables API element representation containing key data
+ *
+ * On insertion, elements are added to a copy of the matching data currently
+- * in use for lookups, and not directly inserted into current lookup data, so
+- * we'll take care of that by calling pipapo_commit() here. Both
++ * in use for lookups, and not directly inserted into current lookup data. Both
+ * nft_pipapo_insert() and nft_pipapo_activate() are called once for each
+ * element, hence we can't purpose either one as a real commit operation.
+ */
+@@ -1675,8 +1726,6 @@ static void nft_pipapo_activate(const struct net *net,
+
+ nft_set_elem_change_active(net, set, &e->ext);
+ nft_set_elem_clear_busy(&e->ext);
+-
+- pipapo_commit(set);
+ }
+
+ /**
+@@ -1899,7 +1948,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
+ int i, start, rules_fx;
+
+ match_start = data;
+- match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
++
++ if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
++ match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
++ else
++ match_end = data;
+
+ start = first_rule;
+ rules_fx = rules_f0;
+@@ -1922,7 +1975,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
+ if (i == m->field_count) {
+ priv->dirty = true;
+ pipapo_drop(m, rulemap);
+- pipapo_commit(set);
+ return;
+ }
+
+@@ -1944,12 +1996,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
+ {
+ struct nft_pipapo *priv = nft_set_priv(set);
++ struct net *net = read_pnet(&set->net);
+ struct nft_pipapo_match *m;
+ struct nft_pipapo_field *f;
+ int i, r;
+
+ rcu_read_lock();
+- m = rcu_dereference(priv->match);
++ if (iter->genmask == nft_genmask_cur(net))
++ m = rcu_dereference(priv->match);
++ else
++ m = priv->clone;
+
+ if (unlikely(!m))
+ goto out;
+@@ -2116,34 +2172,51 @@ out_scratch:
+ return err;
+ }
+
++/**
++ * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
++ * @ctx: context
++ * @set: nftables API set representation
++ * @m: matching data pointing to key mapping array
++ */
++static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set,
++ struct nft_pipapo_match *m)
++{
++ struct nft_pipapo_field *f;
++ int i, r;
++
++ for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
++ ;
++
++ for (r = 0; r < f->rules; r++) {
++ struct nft_pipapo_elem *e;
++
++ if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
++ continue;
++
++ e = f->mt[r].e;
++
++ nf_tables_set_elem_destroy(ctx, set, e);
++ }
++}
++
+ /**
+ * nft_pipapo_destroy() - Free private data for set and all committed elements
++ * @ctx: context
+ * @set: nftables API set representation
+ */
+-static void nft_pipapo_destroy(const struct nft_set *set)
++static void nft_pipapo_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set)
+ {
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m;
+- struct nft_pipapo_field *f;
+- int i, r, cpu;
++ int cpu;
+
+ m = rcu_dereference_protected(priv->match, true);
+ if (m) {
+ rcu_barrier();
+
+- for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
+- ;
+-
+- for (r = 0; r < f->rules; r++) {
+- struct nft_pipapo_elem *e;
+-
+- if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
+- continue;
+-
+- e = f->mt[r].e;
+-
+- nft_set_elem_destroy(set, e, true);
+- }
++ nft_set_pipapo_match_destroy(ctx, set, m);
+
+ #ifdef NFT_PIPAPO_ALIGN
+ free_percpu(m->scratch_aligned);
+@@ -2157,6 +2230,11 @@ static void nft_pipapo_destroy(const struct nft_set *set)
+ }
+
+ if (priv->clone) {
++ m = priv->clone;
++
++ if (priv->dirty)
++ nft_set_pipapo_match_destroy(ctx, set, m);
++
+ #ifdef NFT_PIPAPO_ALIGN
+ free_percpu(priv->clone->scratch_aligned);
+ #endif
+@@ -2203,6 +2281,8 @@ const struct nft_set_type nft_set_pipapo_type = {
+ .init = nft_pipapo_init,
+ .destroy = nft_pipapo_destroy,
+ .gc_init = nft_pipapo_gc_init,
++ .commit = nft_pipapo_commit,
++ .abort = nft_pipapo_abort,
+ .elemsize = offsetof(struct nft_pipapo_elem, ext),
+ },
+ };
+@@ -2225,6 +2305,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = {
+ .init = nft_pipapo_init,
+ .destroy = nft_pipapo_destroy,
+ .gc_init = nft_pipapo_gc_init,
++ .commit = nft_pipapo_commit,
++ .abort = nft_pipapo_abort,
+ .elemsize = offsetof(struct nft_pipapo_elem, ext),
+ },
+ };
+diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
+index e517663e0cd17..6f4116e729581 100644
+--- a/net/netfilter/nft_set_pipapo_avx2.c
++++ b/net/netfilter/nft_set_pipapo_avx2.c
+@@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
+ NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize);
+
+ NFT_PIPAPO_AVX2_AND(5, 0, 1);
+- NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize);
++ NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize);
+ NFT_PIPAPO_AVX2_AND(7, 2, 3);
+
+ /* Stall */
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index d600a566da324..8d73fffd2d09d 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
+ return !nft_rbtree_interval_end(rbe);
+ }
+
+-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
+- const struct nft_rbtree_elem *interval)
++static int nft_rbtree_cmp(const struct nft_set *set,
++ const struct nft_rbtree_elem *e1,
++ const struct nft_rbtree_elem *e2)
+ {
+- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
++ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
++ set->klen);
+ }
+
+ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+@@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
+ const struct nft_rbtree_elem *rbe, *interval = NULL;
+ u8 genmask = nft_genmask_cur(net);
+ const struct rb_node *parent;
+- const void *this;
+ int d;
+
+ parent = rcu_dereference_raw(priv->root.rb_node);
+@@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
+
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+- this = nft_set_ext_key(&rbe->ext);
+- d = memcmp(this, key, set->klen);
++ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
+ if (d < 0) {
+ parent = rcu_dereference_raw(parent->rb_left);
+ if (interval &&
+- nft_rbtree_equal(set, this, interval) &&
++ !nft_rbtree_cmp(set, rbe, interval) &&
+ nft_rbtree_interval_end(rbe) &&
+ nft_rbtree_interval_start(interval))
+ continue;
+@@ -215,150 +215,230 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ return rbe;
+ }
+
++static int nft_rbtree_gc_elem(const struct nft_set *__set,
++ struct nft_rbtree *priv,
++ struct nft_rbtree_elem *rbe,
++ u8 genmask)
++{
++ struct nft_set *set = (struct nft_set *)__set;
++ struct rb_node *prev = rb_prev(&rbe->node);
++ struct nft_rbtree_elem *rbe_prev;
++ struct nft_set_gc_batch *gcb;
++
++ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
++ if (!gcb)
++ return -ENOMEM;
++
++ /* search for end interval coming before this element.
++ * end intervals don't carry a timeout extension, they
++ * are coupled with the interval start element.
++ */
++ while (prev) {
++ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
++ if (nft_rbtree_interval_end(rbe_prev) &&
++ nft_set_elem_active(&rbe_prev->ext, genmask))
++ break;
++
++ prev = rb_prev(prev);
++ }
++
++ if (prev) {
++ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
++
++ rb_erase(&rbe_prev->node, &priv->root);
++ atomic_dec(&set->nelems);
++ nft_set_gc_batch_add(gcb, rbe_prev);
++ }
++
++ rb_erase(&rbe->node, &priv->root);
++ atomic_dec(&set->nelems);
++
++ nft_set_gc_batch_add(gcb, rbe);
++ nft_set_gc_batch_complete(gcb);
++
++ return 0;
++}
++
++static bool nft_rbtree_update_first(const struct nft_set *set,
++ struct nft_rbtree_elem *rbe,
++ struct rb_node *first)
++{
++ struct nft_rbtree_elem *first_elem;
++
++ first_elem = rb_entry(first, struct nft_rbtree_elem, node);
++ /* this element is closest to where the new element is to be inserted:
++ * update the first element for the node list path.
++ */
++ if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
++ return true;
++
++ return false;
++}
++
+ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+ struct nft_rbtree_elem *new,
+ struct nft_set_ext **ext)
+ {
+- bool overlap = false, dup_end_left = false, dup_end_right = false;
++ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
++ struct rb_node *node, *next, *parent, **p, *first = NULL;
+ struct nft_rbtree *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+- struct nft_rbtree_elem *rbe;
+- struct rb_node *parent, **p;
+- int d;
++ int d, err;
+
+- /* Detect overlaps as we descend the tree. Set the flag in these cases:
+- *
+- * a1. _ _ __>| ?_ _ __| (insert end before existing end)
+- * a2. _ _ ___| ?_ _ _>| (insert end after existing end)
+- * a3. _ _ ___? >|_ _ __| (insert start before existing end)
+- *
+- * and clear it later on, as we eventually reach the points indicated by
+- * '?' above, in the cases described below. We'll always meet these
+- * later, locally, due to tree ordering, and overlaps for the intervals
+- * that are the closest together are always evaluated last.
+- *
+- * b1. _ _ __>| !_ _ __| (insert end before existing start)
+- * b2. _ _ ___| !_ _ _>| (insert end after existing start)
+- * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf)
+- * '--' no nodes falling in this range
+- * b4. >|_ _ ! (insert start before existing start)
+- *
+- * Case a3. resolves to b3.:
+- * - if the inserted start element is the leftmost, because the '0'
+- * element in the tree serves as end element
+- * - otherwise, if an existing end is found immediately to the left. If
+- * there are existing nodes in between, we need to further descend the
+- * tree before we can conclude the new start isn't causing an overlap
+- *
+- * or to b4., which, preceded by a3., means we already traversed one or
+- * more existing intervals entirely, from the right.
+- *
+- * For a new, rightmost pair of elements, we'll hit cases b3. and b2.,
+- * in that order.
+- *
+- * The flag is also cleared in two special cases:
+- *
+- * b5. |__ _ _!|<_ _ _ (insert start right before existing end)
+- * b6. |__ _ >|!__ _ _ (insert end right after existing start)
+- *
+- * which always happen as last step and imply that no further
+- * overlapping is possible.
+- *
+- * Another special case comes from the fact that start elements matching
+- * an already existing start element are allowed: insertion is not
+- * performed but we return -EEXIST in that case, and the error will be
+- * cleared by the caller if NLM_F_EXCL is not present in the request.
+- * This way, request for insertion of an exact overlap isn't reported as
+- * error to userspace if not desired.
+- *
+- * However, if the existing start matches a pre-existing start, but the
+- * end element doesn't match the corresponding pre-existing end element,
+- * we need to report a partial overlap. This is a local condition that
+- * can be noticed without need for a tracking flag, by checking for a
+- * local duplicated end for a corresponding start, from left and right,
+- * separately.
++ /* Descend the tree to search for an existing element greater than the
++ * key value to insert that is greater than the new element. This is the
++ * first element to walk the ordered elements to find possible overlap.
+ */
+-
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+- d = memcmp(nft_set_ext_key(&rbe->ext),
+- nft_set_ext_key(&new->ext),
+- set->klen);
++ d = nft_rbtree_cmp(set, rbe, new);
++
+ if (d < 0) {
+ p = &parent->rb_left;
+-
+- if (nft_rbtree_interval_start(new)) {
+- if (nft_rbtree_interval_end(rbe) &&
+- nft_set_elem_active(&rbe->ext, genmask) &&
+- !nft_set_elem_expired(&rbe->ext) && !*p)
+- overlap = false;
+- } else {
+- if (dup_end_left && !*p)
+- return -ENOTEMPTY;
+-
+- overlap = nft_rbtree_interval_end(rbe) &&
+- nft_set_elem_active(&rbe->ext,
+- genmask) &&
+- !nft_set_elem_expired(&rbe->ext);
+-
+- if (overlap) {
+- dup_end_right = true;
+- continue;
+- }
+- }
+ } else if (d > 0) {
+- p = &parent->rb_right;
++ if (!first ||
++ nft_rbtree_update_first(set, rbe, first))
++ first = &rbe->node;
+
+- if (nft_rbtree_interval_end(new)) {
+- if (dup_end_right && !*p)
+- return -ENOTEMPTY;
+-
+- overlap = nft_rbtree_interval_end(rbe) &&
+- nft_set_elem_active(&rbe->ext,
+- genmask) &&
+- !nft_set_elem_expired(&rbe->ext);
+-
+- if (overlap) {
+- dup_end_left = true;
+- continue;
+- }
+- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
+- !nft_set_elem_expired(&rbe->ext)) {
+- overlap = nft_rbtree_interval_end(rbe);
+- }
++ p = &parent->rb_right;
+ } else {
+- if (nft_rbtree_interval_end(rbe) &&
+- nft_rbtree_interval_start(new)) {
++ if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+-
+- if (nft_set_elem_active(&rbe->ext, genmask) &&
+- !nft_set_elem_expired(&rbe->ext))
+- overlap = false;
+- } else if (nft_rbtree_interval_start(rbe) &&
+- nft_rbtree_interval_end(new)) {
++ else
+ p = &parent->rb_right;
++ }
++ }
+
+- if (nft_set_elem_active(&rbe->ext, genmask) &&
+- !nft_set_elem_expired(&rbe->ext))
+- overlap = false;
+- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
+- !nft_set_elem_expired(&rbe->ext)) {
+- *ext = &rbe->ext;
+- return -EEXIST;
+- } else {
+- p = &parent->rb_left;
++ if (!first)
++ first = rb_first(&priv->root);
++
++ /* Detect overlap by going through the list of valid tree nodes.
++ * Values stored in the tree are in reversed order, starting from
++ * highest to lowest value.
++ */
++ for (node = first; node != NULL; node = next) {
++ next = rb_next(node);
++
++ rbe = rb_entry(node, struct nft_rbtree_elem, node);
++
++ if (!nft_set_elem_active(&rbe->ext, genmask))
++ continue;
++
++ /* perform garbage collection to avoid bogus overlap reports. */
++ if (nft_set_elem_expired(&rbe->ext)) {
++ err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
++ if (err < 0)
++ return err;
++
++ continue;
++ }
++
++ d = nft_rbtree_cmp(set, rbe, new);
++ if (d == 0) {
++ /* Matching end element: no need to look for an
++ * overlapping greater or equal element.
++ */
++ if (nft_rbtree_interval_end(rbe)) {
++ rbe_le = rbe;
++ break;
++ }
++
++ /* first element that is greater or equal to key value. */
++ if (!rbe_ge) {
++ rbe_ge = rbe;
++ continue;
++ }
++
++ /* this is a closer more or equal element, update it. */
++ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
++ rbe_ge = rbe;
++ continue;
+ }
++
++ /* element is equal to key value, make sure flags are
++ * the same, an existing more or equal start element
++ * must not be replaced by more or equal end element.
++ */
++ if ((nft_rbtree_interval_start(new) &&
++ nft_rbtree_interval_start(rbe_ge)) ||
++ (nft_rbtree_interval_end(new) &&
++ nft_rbtree_interval_end(rbe_ge))) {
++ rbe_ge = rbe;
++ continue;
++ }
++ } else if (d > 0) {
++ /* annotate element greater than the new element. */
++ rbe_ge = rbe;
++ continue;
++ } else if (d < 0) {
++ /* annotate element less than the new element. */
++ rbe_le = rbe;
++ break;
+ }
++ }
++
++ /* - new start element matching existing start element: full overlap
++ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
++ */
++ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
++ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
++ *ext = &rbe_ge->ext;
++ return -EEXIST;
++ }
+
+- dup_end_left = dup_end_right = false;
++ /* - new end element matching existing end element: full overlap
++ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
++ */
++ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
++ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
++ *ext = &rbe_le->ext;
++ return -EEXIST;
+ }
+
+- if (overlap)
++ /* - new start element with existing closest, less or equal key value
++ * being a start element: partial overlap, reported as -ENOTEMPTY.
++ * Anonymous sets allow for two consecutive start element since they
++ * are constant, skip them to avoid bogus overlap reports.
++ */
++ if (!nft_set_is_anonymous(set) && rbe_le &&
++ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
++ return -ENOTEMPTY;
++
++ /* - new end element with existing closest, less or equal key value
++ * being a end element: partial overlap, reported as -ENOTEMPTY.
++ */
++ if (rbe_le &&
++ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
++ /* - new end element with existing closest, greater or equal key value
++ * being an end element: partial overlap, reported as -ENOTEMPTY
++ */
++ if (rbe_ge &&
++ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
++ return -ENOTEMPTY;
++
++ /* Accepted element: pick insertion point depending on key value */
++ parent = NULL;
++ p = &priv->root.rb_node;
++ while (*p != NULL) {
++ parent = *p;
++ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
++ d = nft_rbtree_cmp(set, rbe, new);
++
++ if (d < 0)
++ p = &parent->rb_left;
++ else if (d > 0)
++ p = &parent->rb_right;
++ else if (nft_rbtree_interval_end(rbe))
++ p = &parent->rb_left;
++ else
++ p = &parent->rb_right;
++ }
++
+ rb_link_node_rcu(&new->node, parent, p);
+ rb_insert_color(&new->node, &priv->root);
+ return 0;
+@@ -497,23 +577,37 @@ static void nft_rbtree_gc(struct work_struct *work)
+ struct nft_rbtree *priv;
+ struct rb_node *node;
+ struct nft_set *set;
++ struct net *net;
++ u8 genmask;
+
+ priv = container_of(work, struct nft_rbtree, gc_work.work);
+ set = nft_set_container_of(priv);
++ net = read_pnet(&set->net);
++ genmask = nft_genmask_cur(net);
+
+ write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
+ for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
++ if (!nft_set_elem_active(&rbe->ext, genmask))
++ continue;
++
++ /* elements are reversed in the rbtree for historical reasons,
++ * from highest to lowest value, that is why end element is
++ * always visited before the start element.
++ */
+ if (nft_rbtree_interval_end(rbe)) {
+ rbe_end = rbe;
+ continue;
+ }
+ if (!nft_set_elem_expired(&rbe->ext))
+ continue;
+- if (nft_set_elem_mark_busy(&rbe->ext))
++
++ if (nft_set_elem_mark_busy(&rbe->ext)) {
++ rbe_end = NULL;
+ continue;
++ }
+
+ if (rbe_prev) {
+ rb_erase(&rbe_prev->node, &priv->root);
+@@ -578,7 +672,8 @@ static int nft_rbtree_init(const struct nft_set *set,
+ return 0;
+ }
+
+-static void nft_rbtree_destroy(const struct nft_set *set)
++static void nft_rbtree_destroy(const struct nft_ctx *ctx,
++ const struct nft_set *set)
+ {
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+@@ -589,7 +684,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
+ while ((node = priv->root.rb_node) != NULL) {
+ rb_erase(node, &priv->root);
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+- nft_set_elem_destroy(set, rbe, true);
++ nf_tables_set_elem_destroy(ctx, set, rbe);
+ }
+ }
+
+diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
+index d601974c9d2e0..9ad9cc0d1d27c 100644
+--- a/net/netfilter/nft_socket.c
++++ b/net/netfilter/nft_socket.c
+@@ -36,12 +36,11 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
+
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+ static noinline bool
+-nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
++nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
+ {
+- struct sock *sk = skb_to_full_sk(pkt->skb);
+ struct cgroup *cgrp;
+
+- if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
++ if (!sk_fullsock(sk))
+ return false;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+@@ -54,6 +53,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
+ }
+ #endif
+
++static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
++{
++ const struct net_device *indev = nft_in(pkt);
++ const struct sk_buff *skb = pkt->skb;
++ struct sock *sk = NULL;
++
++ if (!indev)
++ return NULL;
++
++ switch (nft_pf(pkt)) {
++ case NFPROTO_IPV4:
++ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
++ break;
++#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
++ case NFPROTO_IPV6:
++ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
++ break;
++#endif
++ default:
++ WARN_ON_ONCE(1);
++ break;
++ }
++
++ return sk;
++}
++
+ static void nft_socket_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+@@ -67,20 +92,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
+ sk = NULL;
+
+ if (!sk)
+- switch(nft_pf(pkt)) {
+- case NFPROTO_IPV4:
+- sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
+- break;
+-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+- case NFPROTO_IPV6:
+- sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
+- break;
+-#endif
+- default:
+- WARN_ON_ONCE(1);
+- regs->verdict.code = NFT_BREAK;
+- return;
+- }
++ sk = nft_socket_do_lookup(pkt);
+
+ if (!sk) {
+ regs->verdict.code = NFT_BREAK;
+@@ -108,7 +120,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
+ break;
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+ case NFT_SOCKET_CGROUPV2:
+- if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) {
++ if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+@@ -198,6 +210,16 @@ static int nft_socket_dump(struct sk_buff *skb,
+ return 0;
+ }
+
++static int nft_socket_validate(const struct nft_ctx *ctx,
++ const struct nft_expr *expr,
++ const struct nft_data **data)
++{
++ return nft_chain_validate_hooks(ctx->chain,
++ (1 << NF_INET_PRE_ROUTING) |
++ (1 << NF_INET_LOCAL_IN) |
++ (1 << NF_INET_LOCAL_OUT));
++}
++
+ static struct nft_expr_type nft_socket_type;
+ static const struct nft_expr_ops nft_socket_ops = {
+ .type = &nft_socket_type,
+@@ -205,6 +227,7 @@ static const struct nft_expr_ops nft_socket_ops = {
+ .eval = nft_socket_eval,
+ .init = nft_socket_init,
+ .dump = nft_socket_dump,
++ .validate = nft_socket_validate,
+ };
+
+ static struct nft_expr_type nft_socket_type __read_mostly = {
+diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
+index a0109fa1e92d0..1133e06f3c40e 100644
+--- a/net/netfilter/nft_synproxy.c
++++ b/net/netfilter/nft_synproxy.c
+@@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
+ if (err)
+ goto nf_ct_failure;
+ err = nf_synproxy_ipv6_init(snet, ctx->net);
+- if (err)
++ if (err) {
++ nf_synproxy_ipv4_fini(snet, ctx->net);
+ goto nf_ct_failure;
++ }
+ break;
+ }
+
+diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
+index b5b09a902c7ac..9fea90ed79d44 100644
+--- a/net/netfilter/nft_tproxy.c
++++ b/net/netfilter/nft_tproxy.c
+@@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb,
+ return 0;
+ }
+
++static int nft_tproxy_validate(const struct nft_ctx *ctx,
++ const struct nft_expr *expr,
++ const struct nft_data **data)
++{
++ return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
++}
++
+ static struct nft_expr_type nft_tproxy_type;
+ static const struct nft_expr_ops nft_tproxy_ops = {
+ .type = &nft_tproxy_type,
+@@ -320,6 +327,7 @@ static const struct nft_expr_ops nft_tproxy_ops = {
+ .init = nft_tproxy_init,
+ .destroy = nft_tproxy_destroy,
+ .dump = nft_tproxy_dump,
++ .validate = nft_tproxy_validate,
+ };
+
+ static struct nft_expr_type nft_tproxy_type __read_mostly = {
+diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
+index 3b27926d5382c..2ee50996da8cc 100644
+--- a/net/netfilter/nft_tunnel.c
++++ b/net/netfilter/nft_tunnel.c
+@@ -133,6 +133,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = {
+
+ static struct nft_expr_type nft_tunnel_type __read_mostly = {
+ .name = "tunnel",
++ .family = NFPROTO_NETDEV,
+ .ops = &nft_tunnel_get_ops,
+ .policy = nft_tunnel_policy,
+ .maxattr = NFTA_TUNNEL_MAX,
+diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
+index 0a913ce07425a..267757b0392a6 100644
+--- a/net/netfilter/xt_CT.c
++++ b/net/netfilter/xt_CT.c
+@@ -24,7 +24,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
+ return XT_CONTINUE;
+
+ if (ct) {
+- atomic_inc(&ct->ct_general.use);
++ refcount_inc(&ct->ct_general.use);
+ nf_ct_set(skb, ct, IP_CT_NEW);
+ } else {
+ nf_ct_set(skb, ct, IP_CT_UNTRACKED);
+@@ -201,7 +201,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
+ goto err4;
+ }
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+- nf_conntrack_get(&ct->ct_general);
+ out:
+ info->ct = ct;
+ return 0;
+diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
+index e5ebc0810675a..ad3c033db64e7 100644
+--- a/net/netfilter/xt_connmark.c
++++ b/net/netfilter/xt_connmark.c
+@@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
+ u_int32_t new_targetmark;
+ struct nf_conn *ct;
+ u_int32_t newmark;
++ u_int32_t oldmark;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+@@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
+
+ switch (info->mode) {
+ case XT_CONNMARK_SET:
+- newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
++ oldmark = READ_ONCE(ct->mark);
++ newmark = (oldmark & ~info->ctmask) ^ info->ctmark;
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ newmark >>= info->shift_bits;
+ else
+ newmark <<= info->shift_bits;
+
+- if (ct->mark != newmark) {
+- ct->mark = newmark;
++ if (READ_ONCE(ct->mark) != newmark) {
++ WRITE_ONCE(ct->mark, newmark);
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+ break;
+@@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
+ else
+ new_targetmark <<= info->shift_bits;
+
+- newmark = (ct->mark & ~info->ctmask) ^
++ newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^
+ new_targetmark;
+- if (ct->mark != newmark) {
+- ct->mark = newmark;
++ if (READ_ONCE(ct->mark) != newmark) {
++ WRITE_ONCE(ct->mark, newmark);
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+ break;
+ case XT_CONNMARK_RESTORE:
+- new_targetmark = (ct->mark & info->ctmask);
++ new_targetmark = (READ_ONCE(ct->mark) & info->ctmask);
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ new_targetmark >>= info->shift_bits;
+ else
+@@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
+ if (ct == NULL)
+ return false;
+
+- return ((ct->mark & info->mask) == info->mark) ^ info->invert;
++ return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert;
+ }
+
+ static int connmark_mt_check(const struct xt_mtchk_param *par)
+diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
+index e1990baf3a3b7..dc9485854002a 100644
+--- a/net/netfilter/xt_osf.c
++++ b/net/netfilter/xt_osf.c
+@@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
+ MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+ MODULE_ALIAS("ipt_osf");
+ MODULE_ALIAS("ip6t_osf");
+-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
+diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
+index 680015ba7cb6e..d4bf089c9e3f9 100644
+--- a/net/netfilter/xt_sctp.c
++++ b/net/netfilter/xt_sctp.c
+@@ -150,6 +150,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
+ {
+ const struct xt_sctp_info *info = par->matchinfo;
+
++ if (info->flag_count > ARRAY_SIZE(info->flag_info))
++ return -EINVAL;
+ if (info->flags & ~XT_SCTP_VALID_FLAGS)
+ return -EINVAL;
+ if (info->invflags & ~XT_SCTP_VALID_FLAGS)
+diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
+index 5e6459e116055..7013f55f05d1e 100644
+--- a/net/netfilter/xt_socket.c
++++ b/net/netfilter/xt_socket.c
+@@ -220,8 +220,10 @@ static void socket_mt_destroy(const struct xt_mtdtor_param *par)
+ {
+ if (par->family == NFPROTO_IPV4)
+ nf_defrag_ipv4_disable(par->net);
++#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ else if (par->family == NFPROTO_IPV6)
+- nf_defrag_ipv4_disable(par->net);
++ nf_defrag_ipv6_disable(par->net);
++#endif
+ }
+
+ static struct xt_match socket_mt_reg[] __read_mostly = {
+diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
+index 177b40d08098b..117d4615d6684 100644
+--- a/net/netfilter/xt_u32.c
++++ b/net/netfilter/xt_u32.c
+@@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
+ return ret ^ data->invert;
+ }
+
++static int u32_mt_checkentry(const struct xt_mtchk_param *par)
++{
++ const struct xt_u32 *data = par->matchinfo;
++ const struct xt_u32_test *ct;
++ unsigned int i;
++
++ if (data->ntests > ARRAY_SIZE(data->tests))
++ return -EINVAL;
++
++ for (i = 0; i < data->ntests; ++i) {
++ ct = &data->tests[i];
++
++ if (ct->nnums > ARRAY_SIZE(ct->location) ||
++ ct->nvalues > ARRAY_SIZE(ct->value))
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
+ static struct xt_match xt_u32_mt_reg __read_mostly = {
+ .name = "u32",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = u32_mt,
++ .checkentry = u32_mt_checkentry,
+ .matchsize = sizeof(struct xt_u32),
+ .me = THIS_MODULE,
+ };
+diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
+index beb0e573266d0..27511c90a26f4 100644
+--- a/net/netlabel/netlabel_kapi.c
++++ b/net/netlabel/netlabel_kapi.c
+@@ -857,7 +857,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
+
+ offset -= iter->startbit;
+ idx = offset / NETLBL_CATMAP_MAPSIZE;
+- iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE);
++ iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap
++ << (offset % NETLBL_CATMAP_MAPSIZE);
+
+ return 0;
+ }
+@@ -885,6 +886,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
+ unsigned char bitmask;
+ unsigned char byte;
+
++ if (offset >= bitmap_len)
++ return -1;
+ byte_offset = offset / 8;
+ byte = bitmap[byte_offset];
+ bit_spot = offset;
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index ada47e59647a0..f41e130a812f0 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -157,6 +157,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack);
+
+ static inline u32 netlink_group_mask(u32 group)
+ {
++ if (group > 32)
++ return 0;
+ return group ? 1 << (group - 1) : 0;
+ }
+
+@@ -576,7 +578,9 @@ static int netlink_insert(struct sock *sk, u32 portid)
+ if (nlk_sk(sk)->bound)
+ goto err;
+
+- nlk_sk(sk)->portid = portid;
++ /* portid can be read locklessly from netlink_getname(). */
++ WRITE_ONCE(nlk_sk(sk)->portid, portid);
++
+ sock_hold(sk);
+
+ err = __netlink_insert(table, sk);
+@@ -1085,9 +1089,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
+ return -EINVAL;
+
+ if (addr->sa_family == AF_UNSPEC) {
+- sk->sk_state = NETLINK_UNCONNECTED;
+- nlk->dst_portid = 0;
+- nlk->dst_group = 0;
++ /* paired with READ_ONCE() in netlink_getsockbyportid() */
++ WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED);
++ /* dst_portid and dst_group can be read locklessly */
++ WRITE_ONCE(nlk->dst_portid, 0);
++ WRITE_ONCE(nlk->dst_group, 0);
+ return 0;
+ }
+ if (addr->sa_family != AF_NETLINK)
+@@ -1108,9 +1114,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
+ err = netlink_autobind(sock);
+
+ if (err == 0) {
+- sk->sk_state = NETLINK_CONNECTED;
+- nlk->dst_portid = nladdr->nl_pid;
+- nlk->dst_group = ffs(nladdr->nl_groups);
++ /* paired with READ_ONCE() in netlink_getsockbyportid() */
++ WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED);
++ /* dst_portid and dst_group can be read locklessly */
++ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid);
++ WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups));
+ }
+
+ return err;
+@@ -1127,10 +1135,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
+ nladdr->nl_pad = 0;
+
+ if (peer) {
+- nladdr->nl_pid = nlk->dst_portid;
+- nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
++ /* Paired with WRITE_ONCE() in netlink_connect() */
++ nladdr->nl_pid = READ_ONCE(nlk->dst_portid);
++ nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group));
+ } else {
+- nladdr->nl_pid = nlk->portid;
++ /* Paired with WRITE_ONCE() in netlink_insert() */
++ nladdr->nl_pid = READ_ONCE(nlk->portid);
+ netlink_lock_table();
+ nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
+ netlink_unlock_table();
+@@ -1157,8 +1167,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
+
+ /* Don't bother queuing skb if kernel socket has no input function */
+ nlk = nlk_sk(sock);
+- if (sock->sk_state == NETLINK_CONNECTED &&
+- nlk->dst_portid != nlk_sk(ssk)->portid) {
++ /* dst_portid and sk_state can be changed in netlink_connect() */
++ if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED &&
++ READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) {
+ sock_put(sock);
+ return ERR_PTR(-ECONNREFUSED);
+ }
+@@ -1599,6 +1610,7 @@ out:
+ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
+ {
+ struct netlink_set_err_data info;
++ unsigned long flags;
+ struct sock *sk;
+ int ret = 0;
+
+@@ -1608,12 +1620,12 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
+ /* sk->sk_err wants a positive error value */
+ info.code = -code;
+
+- read_lock(&nl_table_lock);
++ read_lock_irqsave(&nl_table_lock, flags);
+
+ sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
+ ret += do_one_set_err(sk, &info);
+
+- read_unlock(&nl_table_lock);
++ read_unlock_irqrestore(&nl_table_lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL(netlink_set_err);
+@@ -1741,7 +1753,8 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
+ {
+ struct sock *sk = sock->sk;
+ struct netlink_sock *nlk = nlk_sk(sk);
+- int len, val, err;
++ unsigned int flag;
++ int len, val;
+
+ if (level != SOL_NETLINK)
+ return -ENOPROTOOPT;
+@@ -1753,39 +1766,17 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
+
+ switch (optname) {
+ case NETLINK_PKTINFO:
+- if (len < sizeof(int))
+- return -EINVAL;
+- len = sizeof(int);
+- val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
+- if (put_user(len, optlen) ||
+- put_user(val, optval))
+- return -EFAULT;
+- err = 0;
++ flag = NETLINK_F_RECV_PKTINFO;
+ break;
+ case NETLINK_BROADCAST_ERROR:
+- if (len < sizeof(int))
+- return -EINVAL;
+- len = sizeof(int);
+- val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
+- if (put_user(len, optlen) ||
+- put_user(val, optval))
+- return -EFAULT;
+- err = 0;
++ flag = NETLINK_F_BROADCAST_SEND_ERROR;
+ break;
+ case NETLINK_NO_ENOBUFS:
+- if (len < sizeof(int))
+- return -EINVAL;
+- len = sizeof(int);
+- val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
+- if (put_user(len, optlen) ||
+- put_user(val, optval))
+- return -EFAULT;
+- err = 0;
++ flag = NETLINK_F_RECV_NO_ENOBUFS;
+ break;
+ case NETLINK_LIST_MEMBERSHIPS: {
+- int pos, idx, shift;
++ int pos, idx, shift, err = 0;
+
+- err = 0;
+ netlink_lock_table();
+ for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
+ if (len - pos < sizeof(u32))
+@@ -1799,43 +1790,35 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
+ break;
+ }
+ }
+- if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
++ if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen))
+ err = -EFAULT;
+ netlink_unlock_table();
+- break;
++ return err;
+ }
+ case NETLINK_CAP_ACK:
+- if (len < sizeof(int))
+- return -EINVAL;
+- len = sizeof(int);
+- val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0;
+- if (put_user(len, optlen) ||
+- put_user(val, optval))
+- return -EFAULT;
+- err = 0;
++ flag = NETLINK_F_CAP_ACK;
+ break;
+ case NETLINK_EXT_ACK:
+- if (len < sizeof(int))
+- return -EINVAL;
+- len = sizeof(int);
+- val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0;
+- if (put_user(len, optlen) || put_user(val, optval))
+- return -EFAULT;
+- err = 0;
++ flag = NETLINK_F_EXT_ACK;
+ break;
+ case NETLINK_GET_STRICT_CHK:
+- if (len < sizeof(int))
+- return -EINVAL;
+- len = sizeof(int);
+- val = nlk->flags & NETLINK_F_STRICT_CHK ? 1 : 0;
+- if (put_user(len, optlen) || put_user(val, optval))
+- return -EFAULT;
+- err = 0;
++ flag = NETLINK_F_STRICT_CHK;
+ break;
+ default:
+- err = -ENOPROTOOPT;
++ return -ENOPROTOOPT;
+ }
+- return err;
++
++ if (len < sizeof(int))
++ return -EINVAL;
++
++ len = sizeof(int);
++ val = nlk->flags & flag ? 1 : 0;
++
++ if (put_user(len, optlen) ||
++ copy_to_user(optval, &val, len))
++ return -EFAULT;
++
++ return 0;
+ }
+
+ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+@@ -1871,6 +1854,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+ if (msg->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
++ if (len == 0) {
++ pr_warn_once("Zero length message leads to an empty skb\n");
++ return -ENODATA;
++ }
++
+ err = scm_send(sock, msg, &scm, true);
+ if (err < 0)
+ return err;
+@@ -1889,8 +1877,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+ goto out;
+ netlink_skb_flags |= NETLINK_SKB_DST;
+ } else {
+- dst_portid = nlk->dst_portid;
+- dst_group = nlk->dst_group;
++ /* Paired with WRITE_ONCE() in netlink_connect() */
++ dst_portid = READ_ONCE(nlk->dst_portid);
++ dst_group = READ_ONCE(nlk->dst_group);
+ }
+
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+@@ -1989,7 +1978,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ copied = len;
+ }
+
+- skb_reset_transport_header(data_skb);
+ err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
+
+ if (msg->msg_name) {
+@@ -2013,7 +2001,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+
+ skb_free_datagram(sk, skb);
+
+- if (nlk->cb_running &&
++ if (READ_ONCE(nlk->cb_running) &&
+ atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
+ ret = netlink_dump(sk);
+ if (ret) {
+@@ -2277,6 +2265,13 @@ static int netlink_dump(struct sock *sk)
+ * single netdev. The outcome is MSG_TRUNC error.
+ */
+ skb_reserve(skb, skb_tailroom(skb) - alloc_size);
++
++ /* Make sure malicious BPF programs can not read unitialized memory
++ * from skb->head -> skb->data
++ */
++ skb_reset_network_header(skb);
++ skb_reset_mac_header(skb);
++
+ netlink_skb_set_owner_r(skb, sk);
+
+ if (nlk->dump_done_errno > 0) {
+@@ -2318,7 +2313,7 @@ static int netlink_dump(struct sock *sk)
+ if (cb->done)
+ cb->done(cb);
+
+- nlk->cb_running = false;
++ WRITE_ONCE(nlk->cb_running, false);
+ module = cb->module;
+ skb = cb->skb;
+ mutex_unlock(nlk->cb_mutex);
+@@ -2381,7 +2376,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+ goto error_put;
+ }
+
+- nlk->cb_running = true;
++ WRITE_ONCE(nlk->cb_running, true);
+ nlk->dump_done_errno = INT_MAX;
+
+ mutex_unlock(nlk->cb_mutex);
+@@ -2667,7 +2662,7 @@ static int netlink_native_seq_show(struct seq_file *seq, void *v)
+ nlk->groups ? (u32)nlk->groups[0] : 0,
+ sk_rmem_alloc_get(s),
+ sk_wmem_alloc_get(s),
+- nlk->cb_running,
++ READ_ONCE(nlk->cb_running),
+ refcount_read(&s->sk_refcnt),
+ atomic_read(&s->sk_drops),
+ sock_i_ino(s)
+diff --git a/net/netlink/diag.c b/net/netlink/diag.c
+index c6255eac305c7..e4f21b1067bcc 100644
+--- a/net/netlink/diag.c
++++ b/net/netlink/diag.c
+@@ -94,6 +94,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net *net = sock_net(skb->sk);
+ struct netlink_diag_req *req;
+ struct netlink_sock *nlsk;
++ unsigned long flags;
+ struct sock *sk;
+ int num = 2;
+ int ret = 0;
+@@ -152,7 +153,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ num++;
+
+ mc_list:
+- read_lock(&nl_table_lock);
++ read_lock_irqsave(&nl_table_lock, flags);
+ sk_for_each_bound(sk, &tbl->mc_list) {
+ if (sk_hashed(sk))
+ continue;
+@@ -167,13 +168,13 @@ mc_list:
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+- sock_i_ino(sk)) < 0) {
++ __sock_i_ino(sk)) < 0) {
+ ret = 1;
+ break;
+ }
+ num++;
+ }
+- read_unlock(&nl_table_lock);
++ read_unlock_irqrestore(&nl_table_lock, flags);
+
+ done:
+ cb->args[0] = num;
+diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
+index 1afca2a6c2ac1..57010927e20a8 100644
+--- a/net/netlink/genetlink.c
++++ b/net/netlink/genetlink.c
+@@ -1174,13 +1174,17 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
+ op.policy,
+ op.maxattr);
+ if (err)
+- return err;
++ goto err_free_state;
+ }
+ }
+
+ if (!ctx->state)
+ return -ENODATA;
+ return 0;
++
++err_free_state:
++ netlink_policy_dump_free(ctx->state);
++ return err;
+ }
+
+ static void *ctrl_dumppolicy_prep(struct sk_buff *skb,
+diff --git a/net/netlink/policy.c b/net/netlink/policy.c
+index 8d7c900e27f4c..87e3de0fde896 100644
+--- a/net/netlink/policy.c
++++ b/net/netlink/policy.c
+@@ -144,7 +144,7 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
+
+ err = add_policy(&state, policy, maxtype);
+ if (err)
+- return err;
++ goto err_try_undo;
+
+ for (policy_idx = 0;
+ policy_idx < state->n_alloc && state->policies[policy_idx].policy;
+@@ -164,7 +164,7 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
+ policy[type].nested_policy,
+ policy[type].len);
+ if (err)
+- return err;
++ goto err_try_undo;
+ break;
+ default:
+ break;
+@@ -174,6 +174,16 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
+
+ *pstate = state;
+ return 0;
++
++err_try_undo:
++ /* Try to preserve reasonable unwind semantics - if we're starting from
++ * scratch clean up fully, otherwise record what we got and caller will.
++ */
++ if (!*pstate)
++ netlink_policy_dump_free(state);
++ else
++ *pstate = state;
++ return err;
+ }
+
+ static bool
+diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
+index 6d16e1ab1a8ab..24747163122bb 100644
+--- a/net/netrom/af_netrom.c
++++ b/net/netrom/af_netrom.c
+@@ -298,7 +298,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname,
+ {
+ struct sock *sk = sock->sk;
+ struct nr_sock *nr = nr_sk(sk);
+- unsigned long opt;
++ unsigned int opt;
+
+ if (level != SOL_NETROM)
+ return -ENOPROTOOPT;
+@@ -306,18 +306,18 @@ static int nr_setsockopt(struct socket *sock, int level, int optname,
+ if (optlen < sizeof(unsigned int))
+ return -EINVAL;
+
+- if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
++ if (copy_from_sockptr(&opt, optval, sizeof(opt)))
+ return -EFAULT;
+
+ switch (optname) {
+ case NETROM_T1:
+- if (opt < 1 || opt > ULONG_MAX / HZ)
++ if (opt < 1 || opt > UINT_MAX / HZ)
+ return -EINVAL;
+ nr->t1 = opt * HZ;
+ return 0;
+
+ case NETROM_T2:
+- if (opt < 1 || opt > ULONG_MAX / HZ)
++ if (opt < 1 || opt > UINT_MAX / HZ)
+ return -EINVAL;
+ nr->t2 = opt * HZ;
+ return 0;
+@@ -329,13 +329,13 @@ static int nr_setsockopt(struct socket *sock, int level, int optname,
+ return 0;
+
+ case NETROM_T4:
+- if (opt < 1 || opt > ULONG_MAX / HZ)
++ if (opt < 1 || opt > UINT_MAX / HZ)
+ return -EINVAL;
+ nr->t4 = opt * HZ;
+ return 0;
+
+ case NETROM_IDLE:
+- if (opt > ULONG_MAX / (60 * HZ))
++ if (opt > UINT_MAX / (60 * HZ))
+ return -EINVAL;
+ nr->idle = opt * 60 * HZ;
+ return 0;
+@@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog)
+ struct sock *sk = sock->sk;
+
+ lock_sock(sk);
++ if (sock->state != SS_UNCONNECTED) {
++ release_sock(sk);
++ return -EINVAL;
++ }
++
+ if (sk->sk_state != TCP_LISTEN) {
+ memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
+ sk->sk_max_ack_backlog = backlog;
+@@ -655,6 +660,11 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
+ goto out_release;
+ }
+
++ if (sock->state == SS_CONNECTING) {
++ err = -EALREADY;
++ goto out_release;
++ }
++
+ sk->sk_state = TCP_CLOSE;
+ sock->state = SS_UNCONNECTED;
+
+diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
+index 3f99b432ea707..e2d2af924cff4 100644
+--- a/net/netrom/nr_subr.c
++++ b/net/netrom/nr_subr.c
+@@ -123,7 +123,7 @@ void nr_write_internal(struct sock *sk, int frametype)
+ unsigned char *dptr;
+ int len, timeout;
+
+- len = NR_NETWORK_LEN + NR_TRANSPORT_LEN;
++ len = NR_TRANSPORT_LEN;
+
+ switch (frametype & 0x0F) {
+ case NR_CONNREQ:
+@@ -141,7 +141,8 @@ void nr_write_internal(struct sock *sk, int frametype)
+ return;
+ }
+
+- if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
++ skb = alloc_skb(NR_NETWORK_LEN + len, GFP_ATOMIC);
++ if (!skb)
+ return;
+
+ /*
+@@ -149,7 +150,7 @@ void nr_write_internal(struct sock *sk, int frametype)
+ */
+ skb_reserve(skb, NR_NETWORK_LEN);
+
+- dptr = skb_put(skb, skb_tailroom(skb));
++ dptr = skb_put(skb, len);
+
+ switch (frametype & 0x0F) {
+ case NR_CONNREQ:
+diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
+index a8da88db7893f..4e7c968cde2dc 100644
+--- a/net/netrom/nr_timer.c
++++ b/net/netrom/nr_timer.c
+@@ -121,6 +121,7 @@ static void nr_heartbeat_expiry(struct timer_list *t)
+ is accepted() it isn't 'dead' so doesn't get removed. */
+ if (sock_flag(sk, SOCK_DESTROY) ||
+ (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
++ sock_hold(sk);
+ bh_unlock_sock(sk);
+ nr_destroy_socket(sk);
+ goto out;
+diff --git a/net/nfc/core.c b/net/nfc/core.c
+index 3c645c1d99c9b..6ff3e10ff8e35 100644
+--- a/net/nfc/core.c
++++ b/net/nfc/core.c
+@@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -94,13 +94,13 @@ int nfc_dev_up(struct nfc_dev *dev)
+
+ device_lock(&dev->dev);
+
+- if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
+- rc = -ERFKILL;
++ if (dev->shutting_down) {
++ rc = -ENODEV;
+ goto error;
+ }
+
+- if (!device_is_registered(&dev->dev)) {
+- rc = -ENODEV;
++ if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
++ rc = -ERFKILL;
+ goto error;
+ }
+
+@@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ kfree_skb(skb);
+ goto error;
+@@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
+
+ device_lock(&dev->dev);
+
+- if (!device_is_registered(&dev->dev)) {
++ if (dev->shutting_down) {
+ rc = -ENODEV;
+ goto error;
+ }
+@@ -1125,11 +1125,7 @@ int nfc_register_device(struct nfc_dev *dev)
+ if (rc)
+ pr_err("Could not register llcp device\n");
+
+- rc = nfc_genl_device_added(dev);
+- if (rc)
+- pr_debug("The userspace won't be notified that the device %s was added\n",
+- dev_name(&dev->dev));
+-
++ device_lock(&dev->dev);
+ dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
+ RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
+ if (dev->rfkill) {
+@@ -1138,6 +1134,13 @@ int nfc_register_device(struct nfc_dev *dev)
+ dev->rfkill = NULL;
+ }
+ }
++ dev->shutting_down = false;
++ device_unlock(&dev->dev);
++
++ rc = nfc_genl_device_added(dev);
++ if (rc)
++ pr_debug("The userspace won't be notified that the device %s was added\n",
++ dev_name(&dev->dev));
+
+ return 0;
+ }
+@@ -1154,24 +1157,25 @@ void nfc_unregister_device(struct nfc_dev *dev)
+
+ pr_debug("dev_name=%s\n", dev_name(&dev->dev));
+
++ rc = nfc_genl_device_removed(dev);
++ if (rc)
++ pr_debug("The userspace won't be notified that the device %s "
++ "was removed\n", dev_name(&dev->dev));
++
++ device_lock(&dev->dev);
+ if (dev->rfkill) {
+ rfkill_unregister(dev->rfkill);
+ rfkill_destroy(dev->rfkill);
++ dev->rfkill = NULL;
+ }
++ dev->shutting_down = true;
++ device_unlock(&dev->dev);
+
+ if (dev->ops->check_presence) {
+- device_lock(&dev->dev);
+- dev->shutting_down = true;
+- device_unlock(&dev->dev);
+ del_timer_sync(&dev->check_pres_timer);
+ cancel_work_sync(&dev->check_pres_work);
+ }
+
+- rc = nfc_genl_device_removed(dev);
+- if (rc)
+- pr_debug("The userspace won't be notified that the device %s "
+- "was removed\n", dev_name(&dev->dev));
+-
+ nfc_llcp_unregister_device(dev);
+
+ mutex_lock(&nfc_devlist_mutex);
+diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
+index d49d4bf2e37c8..a81893bc06ce8 100644
+--- a/net/nfc/llcp.h
++++ b/net/nfc/llcp.h
+@@ -202,7 +202,6 @@ void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s);
+ void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s);
+ void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock);
+ struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
+-struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local);
+ int nfc_llcp_local_put(struct nfc_llcp_local *local);
+ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
+ struct nfc_llcp_sock *sock);
+diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
+index 3c4172a5aeb5e..5b8754ae7d3af 100644
+--- a/net/nfc/llcp_commands.c
++++ b/net/nfc/llcp_commands.c
+@@ -361,6 +361,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
+ struct sk_buff *skb;
+ struct nfc_llcp_local *local;
+ u16 size = 0;
++ int err;
+
+ pr_debug("Sending SYMM\n");
+
+@@ -372,8 +373,10 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
+ size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
+
+ skb = alloc_skb(size, GFP_KERNEL);
+- if (skb == NULL)
+- return -ENOMEM;
++ if (skb == NULL) {
++ err = -ENOMEM;
++ goto out;
++ }
+
+ skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
+
+@@ -383,8 +386,11 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
+
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX);
+
+- return nfc_data_exchange(dev, local->target_idx, skb,
++ err = nfc_data_exchange(dev, local->target_idx, skb,
+ nfc_llcp_recv, local);
++out:
++ nfc_llcp_local_put(local);
++ return err;
+ }
+
+ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
+@@ -394,7 +400,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
+ const u8 *service_name_tlv = NULL;
+ const u8 *miux_tlv = NULL;
+ const u8 *rw_tlv = NULL;
+- u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw;
++ u8 service_name_tlv_length = 0;
++ u8 miux_tlv_length, rw_tlv_length, rw;
+ int err;
+ u16 size = 0;
+ __be16 miux;
+diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
+index eaeb2b1cfa6ac..ddfd159f64e13 100644
+--- a/net/nfc/llcp_core.c
++++ b/net/nfc/llcp_core.c
+@@ -17,6 +17,8 @@
+ static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
+
+ static LIST_HEAD(llcp_devices);
++/* Protects llcp_devices list */
++static DEFINE_SPINLOCK(llcp_devices_lock);
+
+ static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);
+
+@@ -143,7 +145,7 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
+ write_unlock(&local->raw_sockets.lock);
+ }
+
+-struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
++static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
+ {
+ kref_get(&local->ref);
+
+@@ -159,6 +161,7 @@ static void local_cleanup(struct nfc_llcp_local *local)
+ cancel_work_sync(&local->rx_work);
+ cancel_work_sync(&local->timeout_work);
+ kfree_skb(local->rx_pending);
++ local->rx_pending = NULL;
+ del_timer_sync(&local->sdreq_timer);
+ cancel_work_sync(&local->sdreq_timeout_work);
+ nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
+@@ -170,7 +173,6 @@ static void local_release(struct kref *ref)
+
+ local = container_of(ref, struct nfc_llcp_local, ref);
+
+- list_del(&local->list);
+ local_cleanup(local);
+ kfree(local);
+ }
+@@ -283,12 +285,33 @@ static void nfc_llcp_sdreq_timer(struct timer_list *t)
+ struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
+ {
+ struct nfc_llcp_local *local;
++ struct nfc_llcp_local *res = NULL;
+
++ spin_lock(&llcp_devices_lock);
+ list_for_each_entry(local, &llcp_devices, list)
+- if (local->dev == dev)
++ if (local->dev == dev) {
++ res = nfc_llcp_local_get(local);
++ break;
++ }
++ spin_unlock(&llcp_devices_lock);
++
++ return res;
++}
++
++static struct nfc_llcp_local *nfc_llcp_remove_local(struct nfc_dev *dev)
++{
++ struct nfc_llcp_local *local, *tmp;
++
++ spin_lock(&llcp_devices_lock);
++ list_for_each_entry_safe(local, tmp, &llcp_devices, list)
++ if (local->dev == dev) {
++ list_del(&local->list);
++ spin_unlock(&llcp_devices_lock);
+ return local;
++ }
++ spin_unlock(&llcp_devices_lock);
+
+- pr_debug("No device found\n");
++ pr_warn("Shutting down device not found\n");
+
+ return NULL;
+ }
+@@ -609,12 +632,15 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
+
+ *general_bytes_len = local->gb_len;
+
++ nfc_llcp_local_put(local);
++
+ return local->gb;
+ }
+
+ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
+ {
+ struct nfc_llcp_local *local;
++ int err;
+
+ if (gb_len < 3 || gb_len > NFC_MAX_GT_LEN)
+ return -EINVAL;
+@@ -631,12 +657,16 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
+
+ if (memcmp(local->remote_gb, llcp_magic, 3)) {
+ pr_err("MAC does not support LLCP\n");
+- return -EINVAL;
++ err = -EINVAL;
++ goto out;
+ }
+
+- return nfc_llcp_parse_gb_tlv(local,
++ err = nfc_llcp_parse_gb_tlv(local,
+ &local->remote_gb[3],
+ local->remote_gb_len - 3);
++out:
++ nfc_llcp_local_put(local);
++ return err;
+ }
+
+ static u8 nfc_llcp_dsap(const struct sk_buff *pdu)
+@@ -1526,6 +1556,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb)
+
+ __nfc_llcp_recv(local, skb);
+
++ nfc_llcp_local_put(local);
++
+ return 0;
+ }
+
+@@ -1542,6 +1574,8 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev)
+
+ /* Close and purge all existing sockets */
+ nfc_llcp_socket_release(local, true, 0);
++
++ nfc_llcp_local_put(local);
+ }
+
+ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+@@ -1567,6 +1601,8 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+ mod_timer(&local->link_timer,
+ jiffies + msecs_to_jiffies(local->remote_lto));
+ }
++
++ nfc_llcp_local_put(local);
+ }
+
+ int nfc_llcp_register_device(struct nfc_dev *ndev)
+@@ -1617,7 +1653,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
+
+ void nfc_llcp_unregister_device(struct nfc_dev *dev)
+ {
+- struct nfc_llcp_local *local = nfc_llcp_find_local(dev);
++ struct nfc_llcp_local *local = nfc_llcp_remove_local(dev);
+
+ if (local == NULL) {
+ pr_debug("No such device\n");
+diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
+index 6cfd30fc07985..6e1fba2084930 100644
+--- a/net/nfc/llcp_sock.c
++++ b/net/nfc/llcp_sock.c
+@@ -99,7 +99,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
+ }
+
+ llcp_sock->dev = dev;
+- llcp_sock->local = nfc_llcp_local_get(local);
++ llcp_sock->local = local;
+ llcp_sock->nfc_protocol = llcp_addr.nfc_protocol;
+ llcp_sock->service_name_len = min_t(unsigned int,
+ llcp_addr.service_name_len,
+@@ -181,7 +181,7 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr,
+ }
+
+ llcp_sock->dev = dev;
+- llcp_sock->local = nfc_llcp_local_get(local);
++ llcp_sock->local = local;
+ llcp_sock->nfc_protocol = llcp_addr.nfc_protocol;
+
+ nfc_llcp_sock_link(&local->raw_sockets, sk);
+@@ -698,24 +698,22 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
+ if (dev->dep_link_up == false) {
+ ret = -ENOLINK;
+ device_unlock(&dev->dev);
+- goto put_dev;
++ goto sock_llcp_put_local;
+ }
+ device_unlock(&dev->dev);
+
+ if (local->rf_mode == NFC_RF_INITIATOR &&
+ addr->target_idx != local->target_idx) {
+ ret = -ENOLINK;
+- goto put_dev;
++ goto sock_llcp_put_local;
+ }
+
+ llcp_sock->dev = dev;
+- llcp_sock->local = nfc_llcp_local_get(local);
++ llcp_sock->local = local;
+ llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
+ if (llcp_sock->ssap == LLCP_SAP_MAX) {
+- nfc_llcp_local_put(llcp_sock->local);
+- llcp_sock->local = NULL;
+ ret = -ENOMEM;
+- goto put_dev;
++ goto sock_llcp_nullify;
+ }
+
+ llcp_sock->reserved_ssap = llcp_sock->ssap;
+@@ -760,8 +758,13 @@ sock_unlink:
+
+ sock_llcp_release:
+ nfc_llcp_put_ssap(local, llcp_sock->ssap);
+- nfc_llcp_local_put(llcp_sock->local);
++
++sock_llcp_nullify:
+ llcp_sock->local = NULL;
++ llcp_sock->dev = NULL;
++
++sock_llcp_put_local:
++ nfc_llcp_local_put(local);
+
+ put_dev:
+ nfc_put_device(dev);
+@@ -789,6 +792,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+
+ lock_sock(sk);
+
++ if (!llcp_sock->local) {
++ release_sock(sk);
++ return -ENODEV;
++ }
++
+ if (sk->sk_type == SOCK_DGRAM) {
+ DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
+ msg->msg_name);
+diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
+index 82ab39d80726e..7b6cf9a44aea7 100644
+--- a/net/nfc/nci/core.c
++++ b/net/nfc/nci/core.c
+@@ -144,12 +144,15 @@ inline int nci_request(struct nci_dev *ndev,
+ {
+ int rc;
+
+- if (!test_bit(NCI_UP, &ndev->flags))
+- return -ENETDOWN;
+-
+ /* Serialize all requests */
+ mutex_lock(&ndev->req_lock);
+- rc = __nci_request(ndev, req, opt, timeout);
++ /* check the state after obtaing the lock against any races
++ * from nci_close_device when the device gets removed.
++ */
++ if (test_bit(NCI_UP, &ndev->flags))
++ rc = __nci_request(ndev, req, opt, timeout);
++ else
++ rc = -ENETDOWN;
+ mutex_unlock(&ndev->req_lock);
+
+ return rc;
+@@ -473,6 +476,11 @@ static int nci_open_device(struct nci_dev *ndev)
+
+ mutex_lock(&ndev->req_lock);
+
++ if (test_bit(NCI_UNREG, &ndev->flags)) {
++ rc = -ENODEV;
++ goto done;
++ }
++
+ if (test_bit(NCI_UP, &ndev->flags)) {
+ rc = -EALREADY;
+ goto done;
+@@ -534,7 +542,7 @@ static int nci_open_device(struct nci_dev *ndev)
+ skb_queue_purge(&ndev->tx_q);
+
+ ndev->ops->close(ndev);
+- ndev->flags = 0;
++ ndev->flags &= BIT(NCI_UNREG);
+ }
+
+ done:
+@@ -545,9 +553,17 @@ done:
+ static int nci_close_device(struct nci_dev *ndev)
+ {
+ nci_req_cancel(ndev, ENODEV);
++
++ /* This mutex needs to be held as a barrier for
++ * caller nci_unregister_device
++ */
+ mutex_lock(&ndev->req_lock);
+
+ if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
++ /* Need to flush the cmd wq in case
++ * there is a queued/running cmd_work
++ */
++ flush_workqueue(ndev->cmd_wq);
+ del_timer_sync(&ndev->cmd_timer);
+ del_timer_sync(&ndev->data_timer);
+ mutex_unlock(&ndev->req_lock);
+@@ -582,8 +598,8 @@ static int nci_close_device(struct nci_dev *ndev)
+
+ del_timer_sync(&ndev->cmd_timer);
+
+- /* Clear flags */
+- ndev->flags = 0;
++ /* Clear flags except NCI_UNREG */
++ ndev->flags &= BIT(NCI_UNREG);
+
+ mutex_unlock(&ndev->req_lock);
+
+@@ -1270,6 +1286,12 @@ void nci_unregister_device(struct nci_dev *ndev)
+ {
+ struct nci_conn_info *conn_info, *n;
+
++ /* This set_bit is not protected with specialized barrier,
++ * However, it is fine because the mutex_lock(&ndev->req_lock);
++ * in nci_close_device() will help to emit one.
++ */
++ set_bit(NCI_UNREG, &ndev->flags);
++
+ nci_close_device(ndev);
+
+ destroy_workqueue(ndev->cmd_wq);
+diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
+index 6055dc9a82aa0..3d36ea5701f02 100644
+--- a/net/nfc/nci/data.c
++++ b/net/nfc/nci/data.c
+@@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
+
+ skb_frag = nci_skb_alloc(ndev,
+ (NCI_DATA_HDR_SIZE + frag_len),
+- GFP_KERNEL);
++ GFP_ATOMIC);
+ if (skb_frag == NULL) {
+ rc = -ENOMEM;
+ goto free_exit;
+@@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
+ nci_plen(skb->data));
+
+ conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data));
+- if (!conn_info)
++ if (!conn_info) {
++ kfree_skb(skb);
+ return;
++ }
+
+ /* strip the nci data header */
+ skb_pull(skb, NCI_DATA_HDR_SIZE);
+diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
+index e199912ee1e59..85b808fdcbc3a 100644
+--- a/net/nfc/nci/hci.c
++++ b/net/nfc/nci/hci.c
+@@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
+
+ i = 0;
+ skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
+- NCI_DATA_HDR_SIZE, GFP_KERNEL);
++ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+@@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
+ if (i < data_len) {
+ skb = nci_skb_alloc(ndev,
+ conn_info->max_pkt_payload_len +
+- NCI_DATA_HDR_SIZE, GFP_KERNEL);
++ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
+index c5eacaac41aea..8f48b10619448 100644
+--- a/net/nfc/nci/ntf.c
++++ b/net/nfc/nci/ntf.c
+@@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
+ target->sens_res = nfca_poll->sens_res;
+ target->sel_res = nfca_poll->sel_res;
+ target->nfcid1_len = nfca_poll->nfcid1_len;
++ if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1))
++ return -EPROTO;
+ if (target->nfcid1_len > 0) {
+ memcpy(target->nfcid1, nfca_poll->nfcid1,
+ target->nfcid1_len);
+@@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
+ nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params;
+
+ target->sensb_res_len = nfcb_poll->sensb_res_len;
++ if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res))
++ return -EPROTO;
+ if (target->sensb_res_len > 0) {
+ memcpy(target->sensb_res, nfcb_poll->sensb_res,
+ target->sensb_res_len);
+@@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
+ nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params;
+
+ target->sensf_res_len = nfcf_poll->sensf_res_len;
++ if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res))
++ return -EPROTO;
+ if (target->sensf_res_len > 0) {
+ memcpy(target->sensf_res, nfcf_poll->sensf_res,
+ target->sensf_res_len);
+diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
+index 49089c50872e6..eb4f70a827e4d 100644
+--- a/net/nfc/netlink.c
++++ b/net/nfc/netlink.c
+@@ -636,8 +636,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb)
+ {
+ struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
+
+- nfc_device_iter_exit(iter);
+- kfree(iter);
++ if (iter) {
++ nfc_device_iter_exit(iter);
++ kfree(iter);
++ }
+
+ return 0;
+ }
+@@ -1037,11 +1039,14 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info)
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ rc = -ENOMEM;
+- goto exit;
++ goto put_local;
+ }
+
+ rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq);
+
++put_local:
++ nfc_llcp_local_put(local);
++
+ exit:
+ device_unlock(&dev->dev);
+
+@@ -1103,7 +1108,7 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info)
+ if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) {
+ if (dev->dep_link_up) {
+ rc = -EINPROGRESS;
+- goto exit;
++ goto put_local;
+ }
+
+ local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]);
+@@ -1115,6 +1120,9 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info)
+ if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX])
+ local->miux = cpu_to_be16(miux);
+
++put_local:
++ nfc_llcp_local_put(local);
++
+ exit:
+ device_unlock(&dev->dev);
+
+@@ -1170,7 +1178,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
+
+ if (rc != 0) {
+ rc = -EINVAL;
+- goto exit;
++ goto put_local;
+ }
+
+ if (!sdp_attrs[NFC_SDP_ATTR_URI])
+@@ -1189,7 +1197,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
+ sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len);
+ if (sdreq == NULL) {
+ rc = -ENOMEM;
+- goto exit;
++ goto put_local;
+ }
+
+ tlvs_len += sdreq->tlv_len;
+@@ -1199,10 +1207,14 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
+
+ if (hlist_empty(&sdreq_list)) {
+ rc = -EINVAL;
+- goto exit;
++ goto put_local;
+ }
+
+ rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len);
++
++put_local:
++ nfc_llcp_local_put(local);
++
+ exit:
+ device_unlock(&dev->dev);
+
+@@ -1242,7 +1254,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
+ struct sk_buff *msg;
+ void *hdr;
+
+- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
++ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!msg)
+ return -ENOMEM;
+
+@@ -1258,7 +1270,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
+
+ genlmsg_end(msg, hdr);
+
+- genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
++ genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
+
+ return 0;
+
+@@ -1392,8 +1404,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb)
+ {
+ struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
+
+- nfc_device_iter_exit(iter);
+- kfree(iter);
++ if (iter) {
++ nfc_device_iter_exit(iter);
++ kfree(iter);
++ }
+
+ return 0;
+ }
+@@ -1438,8 +1452,12 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx,
+ rc = dev->ops->se_io(dev, se_idx, apdu,
+ apdu_length, cb, cb_context);
+
++ device_unlock(&dev->dev);
++ return rc;
++
+ error:
+ device_unlock(&dev->dev);
++ kfree(cb_context);
+ return rc;
+ }
+
+@@ -1493,6 +1511,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+ u32 dev_idx, se_idx;
+ u8 *apdu;
+ size_t apdu_len;
++ int rc;
+
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_SE_INDEX] ||
+@@ -1506,25 +1525,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+ if (!dev)
+ return -ENODEV;
+
+- if (!dev->ops || !dev->ops->se_io)
+- return -ENOTSUPP;
++ if (!dev->ops || !dev->ops->se_io) {
++ rc = -EOPNOTSUPP;
++ goto put_dev;
++ }
+
+ apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
+- if (apdu_len == 0)
+- return -EINVAL;
++ if (apdu_len == 0) {
++ rc = -EINVAL;
++ goto put_dev;
++ }
+
+ apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
+- if (!apdu)
+- return -EINVAL;
++ if (!apdu) {
++ rc = -EINVAL;
++ goto put_dev;
++ }
+
+ ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
+- if (!ctx)
+- return -ENOMEM;
++ if (!ctx) {
++ rc = -ENOMEM;
++ goto put_dev;
++ }
+
+ ctx->dev_idx = dev_idx;
+ ctx->se_idx = se_idx;
+
+- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++
++put_dev:
++ nfc_put_device(dev);
++ return rc;
+ }
+
+ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+@@ -1547,14 +1578,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+ subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
+
+ dev = nfc_get_device(dev_idx);
+- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
++ if (!dev)
+ return -ENODEV;
+
++ if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
++ err = -ENODEV;
++ goto put_dev;
++ }
++
+ if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
+ data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
+ data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
+- if (data_len == 0)
+- return -EINVAL;
++ if (data_len == 0) {
++ err = -EINVAL;
++ goto put_dev;
++ }
+ } else {
+ data = NULL;
+ data_len = 0;
+@@ -1569,10 +1607,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+ dev->cur_cmd_info = info;
+ err = cmd->doit(dev, data, data_len);
+ dev->cur_cmd_info = NULL;
+- return err;
++ goto put_dev;
+ }
+
+- return -EOPNOTSUPP;
++ err = -EOPNOTSUPP;
++
++put_dev:
++ nfc_put_device(dev);
++ return err;
+ }
+
+ /* message building helper */
+diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
+index de2ec66d7e83a..0b1e6466f4fbf 100644
+--- a/net/nfc/nfc.h
++++ b/net/nfc/nfc.h
+@@ -52,6 +52,7 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len);
+ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len);
+ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
+ struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
++int nfc_llcp_local_put(struct nfc_llcp_local *local);
+ int __init nfc_llcp_init(void);
+ void nfc_llcp_exit(void);
+ void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
+diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
+index e9ca007718b7e..0f23e5e8e03eb 100644
+--- a/net/nsh/nsh.c
++++ b/net/nsh/nsh.c
+@@ -77,13 +77,12 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+ {
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
++ u16 mac_offset = skb->mac_header;
+ unsigned int nsh_len, mac_len;
+ __be16 proto;
+- int nhoff;
+
+ skb_reset_network_header(skb);
+
+- nhoff = skb->network_header - skb->mac_header;
+ mac_len = skb->mac_len;
+
+ if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
+@@ -108,15 +107,14 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
+ segs = skb_mac_gso_segment(skb, features);
+ if (IS_ERR_OR_NULL(segs)) {
+ skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
+- skb->network_header - nhoff,
+- mac_len);
++ mac_offset, mac_len);
+ goto out;
+ }
+
+ for (skb = segs; skb; skb = skb->next) {
+ skb->protocol = htons(ETH_P_NSH);
+ __skb_push(skb, nsh_len);
+- skb_set_mac_header(skb, -nhoff);
++ skb->mac_header = mac_offset;
+ skb->network_header = skb->mac_header + mac_len;
+ skb->mac_len = mac_len;
+ }
+diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
+index 076774034bb96..aca6e2b599c86 100644
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -373,6 +373,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
+ update_ip_l4_checksum(skb, nh, *addr, new_addr);
+ csum_replace4(&nh->check, *addr, new_addr);
+ skb_clear_hash(skb);
++ ovs_ct_clear(skb, NULL);
+ *addr = new_addr;
+ }
+
+@@ -420,15 +421,47 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
+ update_ipv6_checksum(skb, l4_proto, addr, new_addr);
+
+ skb_clear_hash(skb);
++ ovs_ct_clear(skb, NULL);
+ memcpy(addr, new_addr, sizeof(__be32[4]));
+ }
+
+-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
++static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
+ {
++ u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
++
++ ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
++
++ if (skb->ip_summed == CHECKSUM_COMPLETE)
++ csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
++ (__force __wsum)(ipv6_tclass << 12));
++
++ ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
++}
++
++static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
++{
++ u32 ofl;
++
++ ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
++ fl = OVS_MASKED(ofl, fl, mask);
++
+ /* Bits 21-24 are always unmasked, so this retains their values. */
+- OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+- OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+- OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
++ nh->flow_lbl[0] = (u8)(fl >> 16);
++ nh->flow_lbl[1] = (u8)(fl >> 8);
++ nh->flow_lbl[2] = (u8)fl;
++
++ if (skb->ip_summed == CHECKSUM_COMPLETE)
++ csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
++}
++
++static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
++{
++ new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
++
++ if (skb->ip_summed == CHECKSUM_COMPLETE)
++ csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
++ (__force __wsum)(new_ttl << 8));
++ nh->hop_limit = new_ttl;
+ }
+
+ static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+@@ -546,18 +579,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ }
+ }
+ if (mask->ipv6_tclass) {
+- ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
++ set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
+ flow_key->ip.tos = ipv6_get_dsfield(nh);
+ }
+ if (mask->ipv6_label) {
+- set_ipv6_fl(nh, ntohl(key->ipv6_label),
++ set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
+ ntohl(mask->ipv6_label));
+ flow_key->ipv6.label =
+ *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+ }
+ if (mask->ipv6_hlimit) {
+- OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
+- mask->ipv6_hlimit);
++ set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
+ flow_key->ip.ttl = nh->hop_limit;
+ }
+ return 0;
+@@ -630,6 +662,7 @@ static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ static void set_tp_port(struct sk_buff *skb, __be16 *port,
+ __be16 new_port, __sum16 *check)
+ {
++ ovs_ct_clear(skb, NULL);
+ inet_proto_csum_replace2(check, skb, *port, new_port, false);
+ *port = new_port;
+ }
+@@ -669,6 +702,7 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ uh->dest = dst;
+ flow_key->tp.src = src;
+ flow_key->tp.dst = dst;
++ ovs_ct_clear(skb, NULL);
+ }
+
+ skb_clear_hash(skb);
+@@ -731,6 +765,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+
+ skb_clear_hash(skb);
++ ovs_ct_clear(skb, NULL);
++
+ flow_key->tp.src = sh->source;
+ flow_key->tp.dst = sh->dest;
+
+@@ -1021,7 +1057,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb,
+ int rem = nla_len(attr);
+ bool dont_clone_flow_key;
+
+- /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
++ /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
+ clone_arg = nla_data(attr);
+ dont_clone_flow_key = nla_get_u32(clone_arg);
+ actions = nla_next(clone_arg, &rem);
+diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
+index 1b5eae57bc900..7106ce231a2dd 100644
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -150,7 +150,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
+ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
+ {
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+- return ct ? ct->mark : 0;
++ return ct ? READ_ONCE(ct->mark) : 0;
+ #else
+ return 0;
+ #endif
+@@ -338,9 +338,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+ u32 new_mark;
+
+- new_mark = ct_mark | (ct->mark & ~(mask));
+- if (ct->mark != new_mark) {
+- ct->mark = new_mark;
++ new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask));
++ if (READ_ONCE(ct->mark) != new_mark) {
++ WRITE_ONCE(ct->mark, new_mark);
+ if (nf_ct_is_confirmed(ct))
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ key->ct.mark = new_mark;
+@@ -574,7 +574,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ nf_ct_delete(ct, 0, 0);
+- nf_conntrack_put(&ct->ct_general);
++ nf_ct_put(ct);
+ }
+ }
+
+@@ -723,7 +723,7 @@ static bool skb_nfct_cached(struct net *net,
+ if (nf_ct_is_confirmed(ct))
+ nf_ct_delete(ct, 0, 0);
+
+- nf_conntrack_put(&ct->ct_general);
++ nf_ct_put(ct);
+ nf_ct_set(skb, NULL, 0);
+ return false;
+ }
+@@ -732,6 +732,57 @@ static bool skb_nfct_cached(struct net *net,
+ }
+
+ #if IS_ENABLED(CONFIG_NF_NAT)
++static void ovs_nat_update_key(struct sw_flow_key *key,
++ const struct sk_buff *skb,
++ enum nf_nat_manip_type maniptype)
++{
++ if (maniptype == NF_NAT_MANIP_SRC) {
++ __be16 src;
++
++ key->ct_state |= OVS_CS_F_SRC_NAT;
++ if (key->eth.type == htons(ETH_P_IP))
++ key->ipv4.addr.src = ip_hdr(skb)->saddr;
++ else if (key->eth.type == htons(ETH_P_IPV6))
++ memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
++ sizeof(key->ipv6.addr.src));
++ else
++ return;
++
++ if (key->ip.proto == IPPROTO_UDP)
++ src = udp_hdr(skb)->source;
++ else if (key->ip.proto == IPPROTO_TCP)
++ src = tcp_hdr(skb)->source;
++ else if (key->ip.proto == IPPROTO_SCTP)
++ src = sctp_hdr(skb)->source;
++ else
++ return;
++
++ key->tp.src = src;
++ } else {
++ __be16 dst;
++
++ key->ct_state |= OVS_CS_F_DST_NAT;
++ if (key->eth.type == htons(ETH_P_IP))
++ key->ipv4.addr.dst = ip_hdr(skb)->daddr;
++ else if (key->eth.type == htons(ETH_P_IPV6))
++ memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
++ sizeof(key->ipv6.addr.dst));
++ else
++ return;
++
++ if (key->ip.proto == IPPROTO_UDP)
++ dst = udp_hdr(skb)->dest;
++ else if (key->ip.proto == IPPROTO_TCP)
++ dst = tcp_hdr(skb)->dest;
++ else if (key->ip.proto == IPPROTO_SCTP)
++ dst = sctp_hdr(skb)->dest;
++ else
++ return;
++
++ key->tp.dst = dst;
++ }
++}
++
+ /* Modelled after nf_nat_ipv[46]_fn().
+ * range is only used for new, uninitialized NAT state.
+ * Returns either NF_ACCEPT or NF_DROP.
+@@ -739,7 +790,7 @@ static bool skb_nfct_cached(struct net *net,
+ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_nat_range2 *range,
+- enum nf_nat_manip_type maniptype)
++ enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
+ {
+ int hooknum, nh_off, err = NF_ACCEPT;
+
+@@ -811,58 +862,11 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
+ push:
+ skb_push_rcsum(skb, nh_off);
+
+- return err;
+-}
+-
+-static void ovs_nat_update_key(struct sw_flow_key *key,
+- const struct sk_buff *skb,
+- enum nf_nat_manip_type maniptype)
+-{
+- if (maniptype == NF_NAT_MANIP_SRC) {
+- __be16 src;
+-
+- key->ct_state |= OVS_CS_F_SRC_NAT;
+- if (key->eth.type == htons(ETH_P_IP))
+- key->ipv4.addr.src = ip_hdr(skb)->saddr;
+- else if (key->eth.type == htons(ETH_P_IPV6))
+- memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
+- sizeof(key->ipv6.addr.src));
+- else
+- return;
+-
+- if (key->ip.proto == IPPROTO_UDP)
+- src = udp_hdr(skb)->source;
+- else if (key->ip.proto == IPPROTO_TCP)
+- src = tcp_hdr(skb)->source;
+- else if (key->ip.proto == IPPROTO_SCTP)
+- src = sctp_hdr(skb)->source;
+- else
+- return;
+-
+- key->tp.src = src;
+- } else {
+- __be16 dst;
+-
+- key->ct_state |= OVS_CS_F_DST_NAT;
+- if (key->eth.type == htons(ETH_P_IP))
+- key->ipv4.addr.dst = ip_hdr(skb)->daddr;
+- else if (key->eth.type == htons(ETH_P_IPV6))
+- memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
+- sizeof(key->ipv6.addr.dst));
+- else
+- return;
+-
+- if (key->ip.proto == IPPROTO_UDP)
+- dst = udp_hdr(skb)->dest;
+- else if (key->ip.proto == IPPROTO_TCP)
+- dst = tcp_hdr(skb)->dest;
+- else if (key->ip.proto == IPPROTO_SCTP)
+- dst = sctp_hdr(skb)->dest;
+- else
+- return;
++ /* Update the flow key if NAT successful. */
++ if (err == NF_ACCEPT)
++ ovs_nat_update_key(key, skb, maniptype);
+
+- key->tp.dst = dst;
+- }
++ return err;
+ }
+
+ /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
+@@ -904,7 +908,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
+ } else {
+ return NF_ACCEPT; /* Connection is not NATed. */
+ }
+- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
++ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
+
+ if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+ if (ct->status & IPS_SRC_NAT) {
+@@ -914,17 +918,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
+ maniptype = NF_NAT_MANIP_SRC;
+
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
+- maniptype);
++ maniptype, key);
+ } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
+- NF_NAT_MANIP_SRC);
++ NF_NAT_MANIP_SRC, key);
+ }
+ }
+
+- /* Mark NAT done if successful and update the flow key. */
+- if (err == NF_ACCEPT)
+- ovs_nat_update_key(key, skb, maniptype);
+-
+ return err;
+ }
+ #else /* !CONFIG_NF_NAT */
+@@ -967,7 +967,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
+
+ /* Associate skb with specified zone. */
+ if (tmpl) {
+- nf_conntrack_put(skb_nfct(skb));
++ ct = nf_ct_get(skb, &ctinfo);
++ nf_ct_put(ct);
+ nf_conntrack_get(&tmpl->ct_general);
+ nf_ct_set(skb, tmpl, IP_CT_NEW);
+ }
+@@ -1328,9 +1329,16 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
+
+ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
+ {
+- nf_conntrack_put(skb_nfct(skb));
++ enum ip_conntrack_info ctinfo;
++ struct nf_conn *ct;
++
++ ct = nf_ct_get(skb, &ctinfo);
++
++ nf_ct_put(ct);
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+- ovs_ct_fill_key(skb, key, false);
++
++ if (key)
++ ovs_ct_fill_key(skb, key, false);
+
+ return 0;
+ }
+@@ -1716,7 +1724,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
+ goto err_free_ct;
+
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+- nf_conntrack_get(&ct_info.ct->ct_general);
+ return 0;
+ err_free_ct:
+ __ovs_ct_free_action(&ct_info);
+diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
+index 67ad08320886b..0fc98e89a1149 100644
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -251,10 +251,17 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
+
+ upcall.mru = OVS_CB(skb)->mru;
+ error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
+- if (unlikely(error))
+- kfree_skb(skb);
+- else
++ switch (error) {
++ case 0:
++ case -EAGAIN:
++ case -ERESTARTSYS:
++ case -EINTR:
+ consume_skb(skb);
++ break;
++ default:
++ kfree_skb(skb);
++ break;
++ }
+ stats_counter = &stats->n_missed;
+ goto out;
+ }
+@@ -550,8 +557,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
+ out:
+ if (err)
+ skb_tx_error(skb);
+- kfree_skb(user_skb);
+- kfree_skb(nskb);
++ consume_skb(user_skb);
++ consume_skb(nskb);
++
+ return err;
+ }
+
+@@ -938,6 +946,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
+ struct sw_flow_mask mask;
+ struct sk_buff *reply;
+ struct datapath *dp;
++ struct sw_flow_key *key;
+ struct sw_flow_actions *acts;
+ struct sw_flow_match match;
+ u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+@@ -965,30 +974,32 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
+ }
+
+ /* Extract key. */
+- ovs_match_init(&match, &new_flow->key, false, &mask);
++ key = kzalloc(sizeof(*key), GFP_KERNEL);
++ if (!key) {
++ error = -ENOMEM;
++ goto err_kfree_flow;
++ }
++
++ ovs_match_init(&match, key, false, &mask);
+ error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
+ a[OVS_FLOW_ATTR_MASK], log);
+ if (error)
+- goto err_kfree_flow;
++ goto err_kfree_key;
++
++ ovs_flow_mask_key(&new_flow->key, key, true, &mask);
+
+ /* Extract flow identifier. */
+ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+- &new_flow->key, log);
++ key, log);
+ if (error)
+- goto err_kfree_flow;
+-
+- /* unmasked key is needed to match when ufid is not used. */
+- if (ovs_identifier_is_key(&new_flow->id))
+- match.key = new_flow->id.unmasked_key;
+-
+- ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
++ goto err_kfree_key;
+
+ /* Validate actions. */
+ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
+ &new_flow->key, &acts, log);
+ if (error) {
+ OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
+- goto err_kfree_flow;
++ goto err_kfree_key;
+ }
+
+ reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
+@@ -1009,7 +1020,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
+ if (ovs_identifier_is_ufid(&new_flow->id))
+ flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
+ if (!flow)
+- flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
++ flow = ovs_flow_tbl_lookup(&dp->table, key);
+ if (likely(!flow)) {
+ rcu_assign_pointer(new_flow->sf_acts, acts);
+
+@@ -1079,6 +1090,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
+
+ if (reply)
+ ovs_notify(&dp_flow_genl_family, reply, info);
++
++ kfree(key);
+ return 0;
+
+ err_unlock_ovs:
+@@ -1086,6 +1099,8 @@ err_unlock_ovs:
+ kfree_skb(reply);
+ err_kfree_acts:
+ ovs_nla_free_flow_actions(acts);
++err_kfree_key:
++ kfree(key);
+ err_kfree_flow:
+ ovs_flow_free(new_flow, false);
+ error:
+@@ -1597,7 +1612,8 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
+ if (IS_ERR(dp))
+ return;
+
+- WARN(dp->user_features, "Dropping previously announced user features\n");
++ pr_warn("%s: Dropping previously announced user features\n",
++ ovs_dp_name(dp));
+ dp->user_features = 0;
+ }
+
+@@ -1801,7 +1817,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
+ ovs_dp_reset_user_features(skb, info);
+ }
+
+- goto err_unlock_and_destroy_meters;
++ goto err_destroy_portids;
+ }
+
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+@@ -1816,6 +1832,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
+ ovs_notify(&dp_datapath_genl_family, reply, info);
+ return 0;
+
++err_destroy_portids:
++ kfree(rcu_dereference_raw(dp->upcall_portids));
+ err_unlock_and_destroy_meters:
+ ovs_unlock();
+ ovs_meters_exit(dp);
+diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
+index 9713035b89e3a..1b81d71bac3cf 100644
+--- a/net/openvswitch/flow.c
++++ b/net/openvswitch/flow.c
+@@ -34,6 +34,7 @@
+ #include <net/mpls.h>
+ #include <net/ndisc.h>
+ #include <net/nsh.h>
++#include <net/netfilter/nf_conntrack_zones.h>
+
+ #include "conntrack.h"
+ #include "datapath.h"
+@@ -265,7 +266,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+ if (flags & IP6_FH_F_FRAG) {
+ if (frag_off) {
+ key->ip.frag = OVS_FRAG_TYPE_LATER;
+- key->ip.proto = nexthdr;
++ key->ip.proto = NEXTHDR_FRAGMENT;
+ return 0;
+ }
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
+@@ -858,8 +859,9 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
+ #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *tc_ext;
+ #endif
+- bool post_ct = false;
++ bool post_ct = false, post_ct_snat = false, post_ct_dnat = false;
+ int res, err;
++ u16 zone = 0;
+
+ /* Extract metadata from packet. */
+ if (tun_info) {
+@@ -898,6 +900,9 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
+ key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
+ post_ct = tc_ext ? tc_ext->post_ct : false;
++ post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
++ post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false;
++ zone = post_ct ? tc_ext->zone : 0;
+ } else {
+ key->recirc_id = 0;
+ }
+@@ -906,8 +911,19 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
+ #endif
+
+ err = key_extract(skb, key);
+- if (!err)
++ if (!err) {
+ ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */
++ if (post_ct) {
++ if (!skb_get_nfct(skb)) {
++ key->ct_zone = zone;
++ } else {
++ if (!post_ct_dnat)
++ key->ct_state &= ~OVS_CS_F_DST_NAT;
++ if (!post_ct_snat)
++ key->ct_state &= ~OVS_CS_F_SRC_NAT;
++ }
++ }
++ }
+ return err;
+ }
+
+diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
+index fd1f809e9bc1b..d77c21ff066c9 100644
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -2201,8 +2201,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
+ icmpv6_key->icmpv6_type = ntohs(output->tp.src);
+ icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
+
+- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
++ if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
++ swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ struct ovs_key_nd *nd_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+@@ -2288,6 +2288,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size)
+ return sfa;
+ }
+
++static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
++
++static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
++{
++ const struct nlattr *a;
++ int rem;
++
++ nla_for_each_nested(a, action, rem) {
++ switch (nla_type(a)) {
++ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
++ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
++ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
++ break;
++ }
++ }
++}
++
++static void ovs_nla_free_clone_action(const struct nlattr *action)
++{
++ const struct nlattr *a = nla_data(action);
++ int rem = nla_len(action);
++
++ switch (nla_type(a)) {
++ case OVS_CLONE_ATTR_EXEC:
++ /* The real list of actions follows this attribute. */
++ a = nla_next(a, &rem);
++ ovs_nla_free_nested_actions(a, rem);
++ break;
++ }
++}
++
++static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
++{
++ const struct nlattr *a = nla_data(action);
++
++ switch (nla_type(a)) {
++ case OVS_DEC_TTL_ATTR_ACTION:
++ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
++ break;
++ }
++}
++
++static void ovs_nla_free_sample_action(const struct nlattr *action)
++{
++ const struct nlattr *a = nla_data(action);
++ int rem = nla_len(action);
++
++ switch (nla_type(a)) {
++ case OVS_SAMPLE_ATTR_ARG:
++ /* The real list of actions follows this attribute. */
++ a = nla_next(a, &rem);
++ ovs_nla_free_nested_actions(a, rem);
++ break;
++ }
++}
++
+ static void ovs_nla_free_set_action(const struct nlattr *a)
+ {
+ const struct nlattr *ovs_key = nla_data(a);
+@@ -2301,25 +2357,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a)
+ }
+ }
+
+-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
++static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
+ {
+ const struct nlattr *a;
+ int rem;
+
+- if (!sf_acts)
++ /* Whenever new actions are added, the need to update this
++ * function should be considered.
++ */
++ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
++
++ if (!actions)
+ return;
+
+- nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
++ nla_for_each_attr(a, actions, len, rem) {
+ switch (nla_type(a)) {
+- case OVS_ACTION_ATTR_SET:
+- ovs_nla_free_set_action(a);
++ case OVS_ACTION_ATTR_CHECK_PKT_LEN:
++ ovs_nla_free_check_pkt_len_action(a);
++ break;
++
++ case OVS_ACTION_ATTR_CLONE:
++ ovs_nla_free_clone_action(a);
+ break;
++
+ case OVS_ACTION_ATTR_CT:
+ ovs_ct_free_action(a);
+ break;
++
++ case OVS_ACTION_ATTR_DEC_TTL:
++ ovs_nla_free_dec_ttl_action(a);
++ break;
++
++ case OVS_ACTION_ATTR_SAMPLE:
++ ovs_nla_free_sample_action(a);
++ break;
++
++ case OVS_ACTION_ATTR_SET:
++ ovs_nla_free_set_action(a);
++ break;
+ }
+ }
++}
++
++void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
++{
++ if (!sf_acts)
++ return;
+
++ ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
+ kfree(sf_acts);
+ }
+
+@@ -2351,7 +2436,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+ new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
+
+ if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
+- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
++ if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
+ OVS_NLERR(log, "Flow action size exceeds max %u",
+ MAX_ACTIONS_BUFSIZE);
+ return ERR_PTR(-EMSGSIZE);
+@@ -3429,7 +3514,9 @@ static int clone_action_to_attr(const struct nlattr *attr,
+ if (!start)
+ return -EMSGSIZE;
+
+- err = ovs_nla_put_actions(nla_data(attr), rem, skb);
++ /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
++ attr = nla_next(nla_data(attr), &rem);
++ err = ovs_nla_put_actions(attr, rem, skb);
+
+ if (err)
+ nla_nest_cancel(skb, start);
+diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
+index 896b8f5bc8853..67b471c666c7e 100644
+--- a/net/openvswitch/meter.c
++++ b/net/openvswitch/meter.c
+@@ -450,7 +450,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
+
+ err = attach_meter(meter_tbl, meter);
+ if (err)
+- goto exit_unlock;
++ goto exit_free_old_meter;
+
+ ovs_unlock();
+
+@@ -473,6 +473,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
++exit_free_old_meter:
++ ovs_meter_free(old_meter);
+ exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 2a2bc64f75cfd..62c0fbb9de812 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -271,7 +271,8 @@ static void packet_cached_dev_reset(struct packet_sock *po)
+
+ static bool packet_use_direct_xmit(const struct packet_sock *po)
+ {
+- return po->xmit == packet_direct_xmit;
++ /* Paired with WRITE_ONCE() in packet_setsockopt() */
++ return READ_ONCE(po->xmit) == packet_direct_xmit;
+ }
+
+ static u16 packet_pick_tx_queue(struct sk_buff *skb)
+@@ -367,18 +368,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
+ {
+ union tpacket_uhdr h;
+
++ /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
++
+ h.raw = frame;
+ switch (po->tp_version) {
+ case TPACKET_V1:
+- h.h1->tp_status = status;
++ WRITE_ONCE(h.h1->tp_status, status);
+ flush_dcache_page(pgv_to_page(&h.h1->tp_status));
+ break;
+ case TPACKET_V2:
+- h.h2->tp_status = status;
++ WRITE_ONCE(h.h2->tp_status, status);
+ flush_dcache_page(pgv_to_page(&h.h2->tp_status));
+ break;
+ case TPACKET_V3:
+- h.h3->tp_status = status;
++ WRITE_ONCE(h.h3->tp_status, status);
+ flush_dcache_page(pgv_to_page(&h.h3->tp_status));
+ break;
+ default:
+@@ -395,17 +398,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
+
+ smp_rmb();
+
++ /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
++
+ h.raw = frame;
+ switch (po->tp_version) {
+ case TPACKET_V1:
+ flush_dcache_page(pgv_to_page(&h.h1->tp_status));
+- return h.h1->tp_status;
++ return READ_ONCE(h.h1->tp_status);
+ case TPACKET_V2:
+ flush_dcache_page(pgv_to_page(&h.h2->tp_status));
+- return h.h2->tp_status;
++ return READ_ONCE(h.h2->tp_status);
+ case TPACKET_V3:
+ flush_dcache_page(pgv_to_page(&h.h3->tp_status));
+- return h.h3->tp_status;
++ return READ_ONCE(h.h3->tp_status);
+ default:
+ WARN(1, "TPACKET version not supported.\n");
+ BUG();
+@@ -1738,6 +1743,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
+ match->prot_hook.dev = po->prot_hook.dev;
+ match->prot_hook.func = packet_rcv_fanout;
+ match->prot_hook.af_packet_priv = match;
++ match->prot_hook.af_packet_net = read_pnet(&match->net);
+ match->prot_hook.id_match = match_fanout_group;
+ match->max_num_members = args->max_num_members;
+ list_add(&match->list, &fanout_list);
+@@ -1752,7 +1758,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
+ err = -ENOSPC;
+ if (refcount_read(&match->sk_ref) < match->max_num_members) {
+ __dev_remove_pack(&po->prot_hook);
+- po->fanout = match;
++
++ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
++ WRITE_ONCE(po->fanout, match);
++
+ po->rollover = rollover;
+ rollover = NULL;
+ refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
+@@ -1884,12 +1893,20 @@ oom:
+
+ static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
+ {
++ int depth;
++
+ if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
+ sock->type == SOCK_RAW) {
+ skb_reset_mac_header(skb);
+ skb->protocol = dev_parse_header_protocol(skb);
+ }
+
++ /* Move network header to the right position for VLAN tagged packets */
++ if (likely(skb->dev->type == ARPHRD_ETHER) &&
++ eth_type_vlan(skb->protocol) &&
++ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
++ skb_set_network_header(skb, depth);
++
+ skb_probe_transport_header(skb);
+ }
+
+@@ -1984,7 +2001,7 @@ retry:
+ goto retry;
+ }
+
+- if (!dev_validate_header(dev, skb->data, len)) {
++ if (!dev_validate_header(dev, skb->data, len) || !skb->len) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+@@ -2134,7 +2151,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
+ sll = &PACKET_SKB_CB(skb)->sa.ll;
+ sll->sll_hatype = dev->type;
+ sll->sll_pkttype = skb->pkt_type;
+- if (unlikely(po->origdev))
++ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
+@@ -2242,8 +2259,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ status |= TP_STATUS_CSUMNOTREADY;
+ else if (skb->pkt_type != PACKET_OUTGOING &&
+- (skb->ip_summed == CHECKSUM_COMPLETE ||
+- skb_csum_unnecessary(skb)))
++ skb_csum_unnecessary(skb))
+ status |= TP_STATUS_CSUM_VALID;
+
+ if (snaplen > res)
+@@ -2277,8 +2293,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
+ copy_skb = skb_get(skb);
+ skb_head = skb->data;
+ }
+- if (copy_skb)
++ if (copy_skb) {
++ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
++ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
+ skb_set_owner_r(copy_skb, sk);
++ }
+ }
+ snaplen = po->rx_ring.frame_size - macoff;
+ if ((int)snaplen < 0) {
+@@ -2405,7 +2424,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
+ sll->sll_hatype = dev->type;
+ sll->sll_protocol = skb->protocol;
+ sll->sll_pkttype = skb->pkt_type;
+- if (unlikely(po->origdev))
++ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
+@@ -2812,9 +2831,11 @@ tpacket_error:
+ packet_inc_pending(&po->tx_ring);
+
+ status = TP_STATUS_SEND_REQUEST;
+- err = po->xmit(skb);
+- if (unlikely(err > 0)) {
+- err = net_xmit_errno(err);
++ /* Paired with WRITE_ONCE() in packet_setsockopt() */
++ err = READ_ONCE(po->xmit)(skb);
++ if (unlikely(err != 0)) {
++ if (err > 0)
++ err = net_xmit_errno(err);
+ if (err && __packet_get_status(po, ph) ==
+ TP_STATUS_AVAILABLE) {
+ /* skb was destructed already */
+@@ -2981,8 +3002,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
+ if (err)
+ goto out_free;
+
+- if (sock->type == SOCK_RAW &&
+- !dev_validate_header(dev, skb->data, len)) {
++ if ((sock->type == SOCK_RAW &&
++ !dev_validate_header(dev, skb->data, len)) || !skb->len) {
+ err = -EINVAL;
+ goto out_free;
+ }
+@@ -3001,6 +3022,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
+ skb->mark = sockc.mark;
+ skb->tstamp = sockc.transmit_time;
+
++ if (unlikely(extra_len == 4))
++ skb->no_fcs = 1;
++
++ packet_parse_headers(skb, sock);
++
+ if (has_vnet_hdr) {
+ err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
+ if (err)
+@@ -3009,14 +3035,14 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
+ virtio_net_hdr_set_proto(skb, &vnet_hdr);
+ }
+
+- packet_parse_headers(skb, sock);
+-
+- if (unlikely(extra_len == 4))
+- skb->no_fcs = 1;
+-
+- err = po->xmit(skb);
+- if (err > 0 && (err = net_xmit_errno(err)) != 0)
+- goto out_unlock;
++ /* Paired with WRITE_ONCE() in packet_setsockopt() */
++ err = READ_ONCE(po->xmit)(skb);
++ if (unlikely(err != 0)) {
++ if (err > 0)
++ err = net_xmit_errno(err);
++ if (err)
++ goto out_unlock;
++ }
+
+ dev_put(dev);
+
+@@ -3137,6 +3163,9 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
+
+ lock_sock(sk);
+ spin_lock(&po->bind_lock);
++ if (!proto)
++ proto = po->num;
++
+ rcu_read_lock();
+
+ if (po->fanout) {
+@@ -3237,7 +3266,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
+ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
+ name[sizeof(uaddr->sa_data)] = 0;
+
+- return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
++ return packet_do_bind(sk, name, 0, 0);
+ }
+
+ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+@@ -3254,8 +3283,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
+ if (sll->sll_family != AF_PACKET)
+ return -EINVAL;
+
+- return packet_do_bind(sk, NULL, sll->sll_ifindex,
+- sll->sll_protocol ? : pkt_sk(sk)->num);
++ return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol);
+ }
+
+ static struct proto packet_proto = {
+@@ -3323,6 +3351,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
+ po->prot_hook.func = packet_rcv_spkt;
+
+ po->prot_hook.af_packet_priv = sk;
++ po->prot_hook.af_packet_net = sock_net(sk);
+
+ if (proto) {
+ po->prot_hook.type = proto;
+@@ -3429,6 +3458,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (msg->msg_name) {
++ const size_t max_len = min(sizeof(skb->cb),
++ sizeof(struct sockaddr_storage));
+ int copy_len;
+
+ /* If the address length field is there to be filled
+@@ -3451,18 +3482,21 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ msg->msg_namelen = sizeof(struct sockaddr_ll);
+ }
+ }
++ if (WARN_ON_ONCE(copy_len > max_len)) {
++ copy_len = max_len;
++ msg->msg_namelen = copy_len;
++ }
+ memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
+ }
+
+- if (pkt_sk(sk)->auxdata) {
++ if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
+ struct tpacket_auxdata aux;
+
+ aux.tp_status = TP_STATUS_USER;
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ aux.tp_status |= TP_STATUS_CSUMNOTREADY;
+ else if (skb->pkt_type != PACKET_OUTGOING &&
+- (skb->ip_summed == CHECKSUM_COMPLETE ||
+- skb_csum_unnecessary(skb)))
++ skb_csum_unnecessary(skb))
+ aux.tp_status |= TP_STATUS_CSUM_VALID;
+
+ aux.tp_len = origlen;
+@@ -3839,9 +3873,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+- lock_sock(sk);
+- po->auxdata = !!val;
+- release_sock(sk);
++ packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
+ return 0;
+ }
+ case PACKET_ORIGDEV:
+@@ -3853,9 +3885,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+- lock_sock(sk);
+- po->origdev = !!val;
+- release_sock(sk);
++ packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
+ return 0;
+ }
+ case PACKET_VNET_HDR:
+@@ -3904,7 +3934,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ }
+ case PACKET_FANOUT_DATA:
+ {
+- if (!po->fanout)
++ /* Paired with the WRITE_ONCE() in fanout_add() */
++ if (!READ_ONCE(po->fanout))
+ return -EINVAL;
+
+ return fanout_set_data(po, optval, optlen);
+@@ -3948,7 +3979,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+- po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
++ /* Paired with all lockless reads of po->xmit */
++ WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit);
+ return 0;
+ }
+ default:
+@@ -3999,10 +4031,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
+
+ break;
+ case PACKET_AUXDATA:
+- val = po->auxdata;
++ val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
+ break;
+ case PACKET_ORIGDEV:
+- val = po->origdev;
++ val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
+ break;
+ case PACKET_VNET_HDR:
+ val = po->has_vnet_hdr;
+@@ -4457,9 +4489,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
+ }
+
+ out_free_pg_vec:
+- bitmap_free(rx_owner_map);
+- if (pg_vec)
++ if (pg_vec) {
++ bitmap_free(rx_owner_map);
+ free_pg_vec(pg_vec, order, req->tp_block_nr);
++ }
+ out:
+ return err;
+ }
+diff --git a/net/packet/diag.c b/net/packet/diag.c
+index 07812ae5ca073..a68a84574c739 100644
+--- a/net/packet/diag.c
++++ b/net/packet/diag.c
+@@ -23,9 +23,9 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
+ pinfo.pdi_flags = 0;
+ if (po->running)
+ pinfo.pdi_flags |= PDI_RUNNING;
+- if (po->auxdata)
++ if (packet_sock_flag(po, PACKET_SOCK_AUXDATA))
+ pinfo.pdi_flags |= PDI_AUXDATA;
+- if (po->origdev)
++ if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV))
+ pinfo.pdi_flags |= PDI_ORIGDEV;
+ if (po->has_vnet_hdr)
+ pinfo.pdi_flags |= PDI_VNETHDR;
+@@ -143,7 +143,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ rp = nlmsg_data(nlh);
+ rp->pdiag_family = AF_PACKET;
+ rp->pdiag_type = sk->sk_type;
+- rp->pdiag_num = ntohs(po->num);
++ rp->pdiag_num = ntohs(READ_ONCE(po->num));
+ rp->pdiag_ino = sk_ino;
+ sock_diag_save_cookie(sk, rp->pdiag_cookie);
+
+diff --git a/net/packet/internal.h b/net/packet/internal.h
+index 48af35b1aed25..3bae8ea7a36f5 100644
+--- a/net/packet/internal.h
++++ b/net/packet/internal.h
+@@ -116,10 +116,9 @@ struct packet_sock {
+ int copy_thresh;
+ spinlock_t bind_lock;
+ struct mutex pg_vec_lock;
++ unsigned long flags;
+ unsigned int running; /* bind_lock must be held */
+- unsigned int auxdata:1, /* writer must hold sock lock */
+- origdev:1,
+- has_vnet_hdr:1,
++ unsigned int has_vnet_hdr:1, /* writer must hold sock lock */
+ tp_loss:1,
+ tp_tx_has_off:1;
+ int pressure;
+@@ -144,4 +143,25 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
+ return (struct packet_sock *)sk;
+ }
+
++enum packet_sock_flags {
++ PACKET_SOCK_ORIGDEV,
++ PACKET_SOCK_AUXDATA,
++};
++
++static inline void packet_sock_flag_set(struct packet_sock *po,
++ enum packet_sock_flags flag,
++ bool val)
++{
++ if (val)
++ set_bit(flag, &po->flags);
++ else
++ clear_bit(flag, &po->flags);
++}
++
++static inline bool packet_sock_flag(const struct packet_sock *po,
++ enum packet_sock_flags flag)
++{
++ return test_bit(flag, &po->flags);
++}
++
+ #endif
+diff --git a/net/phonet/pep.c b/net/phonet/pep.c
+index a1525916885ae..65d463ad87707 100644
+--- a/net/phonet/pep.c
++++ b/net/phonet/pep.c
+@@ -868,6 +868,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
+
+ err = pep_accept_conn(newsk, skb);
+ if (err) {
++ __sock_put(sk);
+ sock_put(newsk);
+ newsk = NULL;
+ goto drop;
+@@ -946,6 +947,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
+ ret = -EBUSY;
+ else if (sk->sk_state == TCP_ESTABLISHED)
+ ret = -EISCONN;
++ else if (!pn->pn_sk.sobject)
++ ret = -EADDRNOTAVAIL;
+ else
+ ret = pep_sock_enable(sk, NULL, 0);
+ release_sock(sk);
+diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
+index 1b1411d158a73..8e0605f88a73d 100644
+--- a/net/qrtr/Makefile
++++ b/net/qrtr/Makefile
+@@ -1,5 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+-obj-$(CONFIG_QRTR) := qrtr.o ns.o
++obj-$(CONFIG_QRTR) += qrtr.o
++qrtr-y := af_qrtr.o ns.o
+
+ obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
+ qrtr-smd-y := smd.o
+diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
+new file mode 100644
+index 0000000000000..e0a27a404404f
+--- /dev/null
++++ b/net/qrtr/af_qrtr.c
+@@ -0,0 +1,1325 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (c) 2015, Sony Mobile Communications Inc.
++ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
++ */
++#include <linux/module.h>
++#include <linux/netlink.h>
++#include <linux/qrtr.h>
++#include <linux/termios.h> /* For TIOCINQ/OUTQ */
++#include <linux/spinlock.h>
++#include <linux/wait.h>
++
++#include <net/sock.h>
++
++#include "qrtr.h"
++
++#define QRTR_PROTO_VER_1 1
++#define QRTR_PROTO_VER_2 3
++
++/* auto-bind range */
++#define QRTR_MIN_EPH_SOCKET 0x4000
++#define QRTR_MAX_EPH_SOCKET 0x7fff
++#define QRTR_EPH_PORT_RANGE \
++ XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
++
++/**
++ * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
++ * @version: protocol version
++ * @type: packet type; one of QRTR_TYPE_*
++ * @src_node_id: source node
++ * @src_port_id: source port
++ * @confirm_rx: boolean; whether a resume-tx packet should be send in reply
++ * @size: length of packet, excluding this header
++ * @dst_node_id: destination node
++ * @dst_port_id: destination port
++ */
++struct qrtr_hdr_v1 {
++ __le32 version;
++ __le32 type;
++ __le32 src_node_id;
++ __le32 src_port_id;
++ __le32 confirm_rx;
++ __le32 size;
++ __le32 dst_node_id;
++ __le32 dst_port_id;
++} __packed;
++
++/**
++ * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
++ * @version: protocol version
++ * @type: packet type; one of QRTR_TYPE_*
++ * @flags: bitmask of QRTR_FLAGS_*
++ * @optlen: length of optional header data
++ * @size: length of packet, excluding this header and optlen
++ * @src_node_id: source node
++ * @src_port_id: source port
++ * @dst_node_id: destination node
++ * @dst_port_id: destination port
++ */
++struct qrtr_hdr_v2 {
++ u8 version;
++ u8 type;
++ u8 flags;
++ u8 optlen;
++ __le32 size;
++ __le16 src_node_id;
++ __le16 src_port_id;
++ __le16 dst_node_id;
++ __le16 dst_port_id;
++};
++
++#define QRTR_FLAGS_CONFIRM_RX BIT(0)
++
++struct qrtr_cb {
++ u32 src_node;
++ u32 src_port;
++ u32 dst_node;
++ u32 dst_port;
++
++ u8 type;
++ u8 confirm_rx;
++};
++
++#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
++ sizeof(struct qrtr_hdr_v2))
++
++struct qrtr_sock {
++ /* WARNING: sk must be the first member */
++ struct sock sk;
++ struct sockaddr_qrtr us;
++ struct sockaddr_qrtr peer;
++};
++
++static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
++{
++ BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0);
++ return container_of(sk, struct qrtr_sock, sk);
++}
++
++static unsigned int qrtr_local_nid = 1;
++
++/* for node ids */
++static RADIX_TREE(qrtr_nodes, GFP_ATOMIC);
++static DEFINE_SPINLOCK(qrtr_nodes_lock);
++/* broadcast list */
++static LIST_HEAD(qrtr_all_nodes);
++/* lock for qrtr_all_nodes and node reference */
++static DEFINE_MUTEX(qrtr_node_lock);
++
++/* local port allocation management */
++static DEFINE_XARRAY_ALLOC(qrtr_ports);
++
++/**
++ * struct qrtr_node - endpoint node
++ * @ep_lock: lock for endpoint management and callbacks
++ * @ep: endpoint
++ * @ref: reference count for node
++ * @nid: node id
++ * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port
++ * @qrtr_tx_lock: lock for qrtr_tx_flow inserts
++ * @rx_queue: receive queue
++ * @item: list item for broadcast list
++ */
++struct qrtr_node {
++ struct mutex ep_lock;
++ struct qrtr_endpoint *ep;
++ struct kref ref;
++ unsigned int nid;
++
++ struct radix_tree_root qrtr_tx_flow;
++ struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */
++
++ struct sk_buff_head rx_queue;
++ struct list_head item;
++};
++
++/**
++ * struct qrtr_tx_flow - tx flow control
++ * @resume_tx: waiters for a resume tx from the remote
++ * @pending: number of waiting senders
++ * @tx_failed: indicates that a message with confirm_rx flag was lost
++ */
++struct qrtr_tx_flow {
++ struct wait_queue_head resume_tx;
++ int pending;
++ int tx_failed;
++};
++
++#define QRTR_TX_FLOW_HIGH 10
++#define QRTR_TX_FLOW_LOW 5
++
++static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
++ int type, struct sockaddr_qrtr *from,
++ struct sockaddr_qrtr *to);
++static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
++ int type, struct sockaddr_qrtr *from,
++ struct sockaddr_qrtr *to);
++static struct qrtr_sock *qrtr_port_lookup(int port);
++static void qrtr_port_put(struct qrtr_sock *ipc);
++
++/* Release node resources and free the node.
++ *
++ * Do not call directly, use qrtr_node_release. To be used with
++ * kref_put_mutex. As such, the node mutex is expected to be locked on call.
++ */
++static void __qrtr_node_release(struct kref *kref)
++{
++ struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
++ struct radix_tree_iter iter;
++ struct qrtr_tx_flow *flow;
++ unsigned long flags;
++ void __rcu **slot;
++
++ spin_lock_irqsave(&qrtr_nodes_lock, flags);
++ /* If the node is a bridge for other nodes, there are possibly
++ * multiple entries pointing to our released node, delete them all.
++ */
++ radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) {
++ if (*slot == node)
++ radix_tree_iter_delete(&qrtr_nodes, &iter, slot);
++ }
++ spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
++
++ list_del(&node->item);
++ mutex_unlock(&qrtr_node_lock);
++
++ skb_queue_purge(&node->rx_queue);
++
++ /* Free tx flow counters */
++ radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
++ flow = *slot;
++ radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot);
++ kfree(flow);
++ }
++ kfree(node);
++}
++
++/* Increment reference to node. */
++static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node)
++{
++ if (node)
++ kref_get(&node->ref);
++ return node;
++}
++
++/* Decrement reference to node and release as necessary. */
++static void qrtr_node_release(struct qrtr_node *node)
++{
++ if (!node)
++ return;
++ kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock);
++}
++
++/**
++ * qrtr_tx_resume() - reset flow control counter
++ * @node: qrtr_node that the QRTR_TYPE_RESUME_TX packet arrived on
++ * @skb: resume_tx packet
++ */
++static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb)
++{
++ struct qrtr_ctrl_pkt *pkt = (struct qrtr_ctrl_pkt *)skb->data;
++ u64 remote_node = le32_to_cpu(pkt->client.node);
++ u32 remote_port = le32_to_cpu(pkt->client.port);
++ struct qrtr_tx_flow *flow;
++ unsigned long key;
++
++ key = remote_node << 32 | remote_port;
++
++ rcu_read_lock();
++ flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
++ rcu_read_unlock();
++ if (flow) {
++ spin_lock(&flow->resume_tx.lock);
++ flow->pending = 0;
++ spin_unlock(&flow->resume_tx.lock);
++ wake_up_interruptible_all(&flow->resume_tx);
++ }
++
++ consume_skb(skb);
++}
++
++/**
++ * qrtr_tx_wait() - flow control for outgoing packets
++ * @node: qrtr_node that the packet is to be send to
++ * @dest_node: node id of the destination
++ * @dest_port: port number of the destination
++ * @type: type of message
++ *
++ * The flow control scheme is based around the low and high "watermarks". When
++ * the low watermark is passed the confirm_rx flag is set on the outgoing
++ * message, which will trigger the remote to send a control message of the type
++ * QRTR_TYPE_RESUME_TX to reset the counter. If the high watermark is hit
++ * further transmision should be paused.
++ *
++ * Return: 1 if confirm_rx should be set, 0 otherwise or errno failure
++ */
++static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port,
++ int type)
++{
++ unsigned long key = (u64)dest_node << 32 | dest_port;
++ struct qrtr_tx_flow *flow;
++ int confirm_rx = 0;
++ int ret;
++
++ /* Never set confirm_rx on non-data packets */
++ if (type != QRTR_TYPE_DATA)
++ return 0;
++
++ mutex_lock(&node->qrtr_tx_lock);
++ flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
++ if (!flow) {
++ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
++ if (flow) {
++ init_waitqueue_head(&flow->resume_tx);
++ if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) {
++ kfree(flow);
++ flow = NULL;
++ }
++ }
++ }
++ mutex_unlock(&node->qrtr_tx_lock);
++
++ /* Set confirm_rx if we where unable to find and allocate a flow */
++ if (!flow)
++ return 1;
++
++ spin_lock_irq(&flow->resume_tx.lock);
++ ret = wait_event_interruptible_locked_irq(flow->resume_tx,
++ flow->pending < QRTR_TX_FLOW_HIGH ||
++ flow->tx_failed ||
++ !node->ep);
++ if (ret < 0) {
++ confirm_rx = ret;
++ } else if (!node->ep) {
++ confirm_rx = -EPIPE;
++ } else if (flow->tx_failed) {
++ flow->tx_failed = 0;
++ confirm_rx = 1;
++ } else {
++ flow->pending++;
++ confirm_rx = flow->pending == QRTR_TX_FLOW_LOW;
++ }
++ spin_unlock_irq(&flow->resume_tx.lock);
++
++ return confirm_rx;
++}
++
++/**
++ * qrtr_tx_flow_failed() - flag that tx of confirm_rx flagged messages failed
++ * @node: qrtr_node that the packet is to be send to
++ * @dest_node: node id of the destination
++ * @dest_port: port number of the destination
++ *
++ * Signal that the transmission of a message with confirm_rx flag failed. The
++ * flow's "pending" counter will keep incrementing towards QRTR_TX_FLOW_HIGH,
++ * at which point transmission would stall forever waiting for the resume TX
++ * message associated with the dropped confirm_rx message.
++ * Work around this by marking the flow as having a failed transmission and
++ * cause the next transmission attempt to be sent with the confirm_rx.
++ */
++static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node,
++ int dest_port)
++{
++ unsigned long key = (u64)dest_node << 32 | dest_port;
++ struct qrtr_tx_flow *flow;
++
++ rcu_read_lock();
++ flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
++ rcu_read_unlock();
++ if (flow) {
++ spin_lock_irq(&flow->resume_tx.lock);
++ flow->tx_failed = 1;
++ spin_unlock_irq(&flow->resume_tx.lock);
++ }
++}
++
++/* Pass an outgoing packet socket buffer to the endpoint driver. */
++static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
++ int type, struct sockaddr_qrtr *from,
++ struct sockaddr_qrtr *to)
++{
++ struct qrtr_hdr_v1 *hdr;
++ size_t len = skb->len;
++ int rc, confirm_rx;
++
++ confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type);
++ if (confirm_rx < 0) {
++ kfree_skb(skb);
++ return confirm_rx;
++ }
++
++ hdr = skb_push(skb, sizeof(*hdr));
++ hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
++ hdr->type = cpu_to_le32(type);
++ hdr->src_node_id = cpu_to_le32(from->sq_node);
++ hdr->src_port_id = cpu_to_le32(from->sq_port);
++ if (to->sq_port == QRTR_PORT_CTRL) {
++ hdr->dst_node_id = cpu_to_le32(node->nid);
++ hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
++ } else {
++ hdr->dst_node_id = cpu_to_le32(to->sq_node);
++ hdr->dst_port_id = cpu_to_le32(to->sq_port);
++ }
++
++ hdr->size = cpu_to_le32(len);
++ hdr->confirm_rx = !!confirm_rx;
++
++ rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
++
++ if (!rc) {
++ mutex_lock(&node->ep_lock);
++ rc = -ENODEV;
++ if (node->ep)
++ rc = node->ep->xmit(node->ep, skb);
++ else
++ kfree_skb(skb);
++ mutex_unlock(&node->ep_lock);
++ }
++ /* Need to ensure that a subsequent message carries the otherwise lost
++ * confirm_rx flag if we dropped this one */
++ if (rc && confirm_rx)
++ qrtr_tx_flow_failed(node, to->sq_node, to->sq_port);
++
++ return rc;
++}
++
++/* Lookup node by id.
++ *
++ * callers must release with qrtr_node_release()
++ */
++static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
++{
++ struct qrtr_node *node;
++ unsigned long flags;
++
++ mutex_lock(&qrtr_node_lock);
++ spin_lock_irqsave(&qrtr_nodes_lock, flags);
++ node = radix_tree_lookup(&qrtr_nodes, nid);
++ node = qrtr_node_acquire(node);
++ spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
++ mutex_unlock(&qrtr_node_lock);
++
++ return node;
++}
++
++/* Assign node id to node.
++ *
++ * This is mostly useful for automatic node id assignment, based on
++ * the source id in the incoming packet.
++ */
++static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
++{
++ unsigned long flags;
++
++ if (nid == QRTR_EP_NID_AUTO)
++ return;
++
++ spin_lock_irqsave(&qrtr_nodes_lock, flags);
++ radix_tree_insert(&qrtr_nodes, nid, node);
++ if (node->nid == QRTR_EP_NID_AUTO)
++ node->nid = nid;
++ spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
++}
++
++/**
++ * qrtr_endpoint_post() - post incoming data
++ * @ep: endpoint handle
++ * @data: data pointer
++ * @len: size of data in bytes
++ *
++ * Return: 0 on success; negative error code on failure
++ */
++int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
++{
++ struct qrtr_node *node = ep->node;
++ const struct qrtr_hdr_v1 *v1;
++ const struct qrtr_hdr_v2 *v2;
++ struct qrtr_sock *ipc;
++ struct sk_buff *skb;
++ struct qrtr_cb *cb;
++ size_t size;
++ unsigned int ver;
++ size_t hdrlen;
++
++ if (len == 0 || len & 3)
++ return -EINVAL;
++
++ skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN);
++ if (!skb)
++ return -ENOMEM;
++
++ cb = (struct qrtr_cb *)skb->cb;
++
++ /* Version field in v1 is little endian, so this works for both cases */
++ ver = *(u8*)data;
++
++ switch (ver) {
++ case QRTR_PROTO_VER_1:
++ if (len < sizeof(*v1))
++ goto err;
++ v1 = data;
++ hdrlen = sizeof(*v1);
++
++ cb->type = le32_to_cpu(v1->type);
++ cb->src_node = le32_to_cpu(v1->src_node_id);
++ cb->src_port = le32_to_cpu(v1->src_port_id);
++ cb->confirm_rx = !!v1->confirm_rx;
++ cb->dst_node = le32_to_cpu(v1->dst_node_id);
++ cb->dst_port = le32_to_cpu(v1->dst_port_id);
++
++ size = le32_to_cpu(v1->size);
++ break;
++ case QRTR_PROTO_VER_2:
++ if (len < sizeof(*v2))
++ goto err;
++ v2 = data;
++ hdrlen = sizeof(*v2) + v2->optlen;
++
++ cb->type = v2->type;
++ cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
++ cb->src_node = le16_to_cpu(v2->src_node_id);
++ cb->src_port = le16_to_cpu(v2->src_port_id);
++ cb->dst_node = le16_to_cpu(v2->dst_node_id);
++ cb->dst_port = le16_to_cpu(v2->dst_port_id);
++
++ if (cb->src_port == (u16)QRTR_PORT_CTRL)
++ cb->src_port = QRTR_PORT_CTRL;
++ if (cb->dst_port == (u16)QRTR_PORT_CTRL)
++ cb->dst_port = QRTR_PORT_CTRL;
++
++ size = le32_to_cpu(v2->size);
++ break;
++ default:
++ pr_err("qrtr: Invalid version %d\n", ver);
++ goto err;
++ }
++
++ if (!size || len != ALIGN(size, 4) + hdrlen)
++ goto err;
++
++ if ((cb->type == QRTR_TYPE_NEW_SERVER ||
++ cb->type == QRTR_TYPE_RESUME_TX) &&
++ size < sizeof(struct qrtr_ctrl_pkt))
++ goto err;
++
++ if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
++ cb->type != QRTR_TYPE_RESUME_TX)
++ goto err;
++
++ skb_put_data(skb, data + hdrlen, size);
++
++ qrtr_node_assign(node, cb->src_node);
++
++ if (cb->type == QRTR_TYPE_NEW_SERVER) {
++ /* Remote node endpoint can bridge other distant nodes */
++ const struct qrtr_ctrl_pkt *pkt;
++
++ pkt = data + hdrlen;
++ qrtr_node_assign(node, le32_to_cpu(pkt->server.node));
++ }
++
++ if (cb->type == QRTR_TYPE_RESUME_TX) {
++ qrtr_tx_resume(node, skb);
++ } else {
++ ipc = qrtr_port_lookup(cb->dst_port);
++ if (!ipc)
++ goto err;
++
++ if (sock_queue_rcv_skb(&ipc->sk, skb)) {
++ qrtr_port_put(ipc);
++ goto err;
++ }
++
++ qrtr_port_put(ipc);
++ }
++
++ return 0;
++
++err:
++ kfree_skb(skb);
++ return -EINVAL;
++
++}
++EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
++
++/**
++ * qrtr_alloc_ctrl_packet() - allocate control packet skb
++ * @pkt: reference to qrtr_ctrl_pkt pointer
++ * @flags: the type of memory to allocate
++ *
++ * Returns newly allocated sk_buff, or NULL on failure
++ *
++ * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
++ * on success returns a reference to the control packet in @pkt.
++ */
++static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt,
++ gfp_t flags)
++{
++ const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
++ struct sk_buff *skb;
++
++ skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, flags);
++ if (!skb)
++ return NULL;
++
++ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
++ *pkt = skb_put_zero(skb, pkt_len);
++
++ return skb;
++}
++
++/**
++ * qrtr_endpoint_register() - register a new endpoint
++ * @ep: endpoint to register
++ * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment
++ * Return: 0 on success; negative error code on failure
++ *
++ * The specified endpoint must have the xmit function pointer set on call.
++ */
++int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid)
++{
++ struct qrtr_node *node;
++
++ if (!ep || !ep->xmit)
++ return -EINVAL;
++
++ node = kzalloc(sizeof(*node), GFP_KERNEL);
++ if (!node)
++ return -ENOMEM;
++
++ kref_init(&node->ref);
++ mutex_init(&node->ep_lock);
++ skb_queue_head_init(&node->rx_queue);
++ node->nid = QRTR_EP_NID_AUTO;
++ node->ep = ep;
++
++ INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL);
++ mutex_init(&node->qrtr_tx_lock);
++
++ qrtr_node_assign(node, nid);
++
++ mutex_lock(&qrtr_node_lock);
++ list_add(&node->item, &qrtr_all_nodes);
++ mutex_unlock(&qrtr_node_lock);
++ ep->node = node;
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
++
++/**
++ * qrtr_endpoint_unregister - unregister endpoint
++ * @ep: endpoint to unregister
++ */
++void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
++{
++ struct qrtr_node *node = ep->node;
++ struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
++ struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
++ struct radix_tree_iter iter;
++ struct qrtr_ctrl_pkt *pkt;
++ struct qrtr_tx_flow *flow;
++ struct sk_buff *skb;
++ unsigned long flags;
++ void __rcu **slot;
++
++ mutex_lock(&node->ep_lock);
++ node->ep = NULL;
++ mutex_unlock(&node->ep_lock);
++
++ /* Notify the local controller about the event */
++ spin_lock_irqsave(&qrtr_nodes_lock, flags);
++ radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) {
++ if (*slot != node)
++ continue;
++ src.sq_node = iter.index;
++ skb = qrtr_alloc_ctrl_packet(&pkt, GFP_ATOMIC);
++ if (skb) {
++ pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
++ qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
++ }
++ }
++ spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
++
++ /* Wake up any transmitters waiting for resume-tx from the node */
++ mutex_lock(&node->qrtr_tx_lock);
++ radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
++ flow = *slot;
++ wake_up_interruptible_all(&flow->resume_tx);
++ }
++ mutex_unlock(&node->qrtr_tx_lock);
++
++ qrtr_node_release(node);
++ ep->node = NULL;
++}
++EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister);
++
++/* Lookup socket by port.
++ *
++ * Callers must release with qrtr_port_put()
++ */
++static struct qrtr_sock *qrtr_port_lookup(int port)
++{
++ struct qrtr_sock *ipc;
++
++ if (port == QRTR_PORT_CTRL)
++ port = 0;
++
++ rcu_read_lock();
++ ipc = xa_load(&qrtr_ports, port);
++ if (ipc)
++ sock_hold(&ipc->sk);
++ rcu_read_unlock();
++
++ return ipc;
++}
++
++/* Release acquired socket. */
++static void qrtr_port_put(struct qrtr_sock *ipc)
++{
++ sock_put(&ipc->sk);
++}
++
++/* Remove port assignment. */
++static void qrtr_port_remove(struct qrtr_sock *ipc)
++{
++ struct qrtr_ctrl_pkt *pkt;
++ struct sk_buff *skb;
++ int port = ipc->us.sq_port;
++ struct sockaddr_qrtr to;
++
++ to.sq_family = AF_QIPCRTR;
++ to.sq_node = QRTR_NODE_BCAST;
++ to.sq_port = QRTR_PORT_CTRL;
++
++ skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL);
++ if (skb) {
++ pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
++ pkt->client.node = cpu_to_le32(ipc->us.sq_node);
++ pkt->client.port = cpu_to_le32(ipc->us.sq_port);
++
++ skb_set_owner_w(skb, &ipc->sk);
++ qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
++ &to);
++ }
++
++ if (port == QRTR_PORT_CTRL)
++ port = 0;
++
++ __sock_put(&ipc->sk);
++
++ xa_erase(&qrtr_ports, port);
++
++ /* Ensure that if qrtr_port_lookup() did enter the RCU read section we
++ * wait for it to up increment the refcount */
++ synchronize_rcu();
++}
++
++/* Assign port number to socket.
++ *
++ * Specify port in the integer pointed to by port, and it will be adjusted
++ * on return as necesssary.
++ *
++ * Port may be:
++ * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET]
++ * <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN
++ * >QRTR_MIN_EPH_SOCKET: Specified; available to all
++ */
++static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
++{
++ int rc;
++
++ if (!*port) {
++ rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE,
++ GFP_KERNEL);
++ } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
++ rc = -EACCES;
++ } else if (*port == QRTR_PORT_CTRL) {
++ rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL);
++ } else {
++ rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL);
++ }
++
++ if (rc == -EBUSY)
++ return -EADDRINUSE;
++ else if (rc < 0)
++ return rc;
++
++ sock_hold(&ipc->sk);
++
++ return 0;
++}
++
++/* Reset all non-control ports */
++static void qrtr_reset_ports(void)
++{
++ struct qrtr_sock *ipc;
++ unsigned long index;
++
++ rcu_read_lock();
++ xa_for_each_start(&qrtr_ports, index, ipc, 1) {
++ sock_hold(&ipc->sk);
++ ipc->sk.sk_err = ENETRESET;
++ sk_error_report(&ipc->sk);
++ sock_put(&ipc->sk);
++ }
++ rcu_read_unlock();
++}
++
++/* Bind socket to address.
++ *
++ * Socket should be locked upon call.
++ */
++static int __qrtr_bind(struct socket *sock,
++ const struct sockaddr_qrtr *addr, int zapped)
++{
++ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
++ struct sock *sk = sock->sk;
++ int port;
++ int rc;
++
++ /* rebinding ok */
++ if (!zapped && addr->sq_port == ipc->us.sq_port)
++ return 0;
++
++ port = addr->sq_port;
++ rc = qrtr_port_assign(ipc, &port);
++ if (rc)
++ return rc;
++
++ /* unbind previous, if any */
++ if (!zapped)
++ qrtr_port_remove(ipc);
++ ipc->us.sq_port = port;
++
++ sock_reset_flag(sk, SOCK_ZAPPED);
++
++ /* Notify all open ports about the new controller */
++ if (port == QRTR_PORT_CTRL)
++ qrtr_reset_ports();
++
++ return 0;
++}
++
++/* Auto bind to an ephemeral port. */
++static int qrtr_autobind(struct socket *sock)
++{
++ struct sock *sk = sock->sk;
++ struct sockaddr_qrtr addr;
++
++ if (!sock_flag(sk, SOCK_ZAPPED))
++ return 0;
++
++ addr.sq_family = AF_QIPCRTR;
++ addr.sq_node = qrtr_local_nid;
++ addr.sq_port = 0;
++
++ return __qrtr_bind(sock, &addr, 1);
++}
++
++/* Bind socket to specified sockaddr. */
++static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
++{
++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
++ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
++ struct sock *sk = sock->sk;
++ int rc;
++
++ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
++ return -EINVAL;
++
++ if (addr->sq_node != ipc->us.sq_node)
++ return -EINVAL;
++
++ lock_sock(sk);
++ rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED));
++ release_sock(sk);
++
++ return rc;
++}
++
++/* Queue packet to local peer socket. */
++static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
++ int type, struct sockaddr_qrtr *from,
++ struct sockaddr_qrtr *to)
++{
++ struct qrtr_sock *ipc;
++ struct qrtr_cb *cb;
++
++ ipc = qrtr_port_lookup(to->sq_port);
++ if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
++ if (ipc)
++ qrtr_port_put(ipc);
++ kfree_skb(skb);
++ return -ENODEV;
++ }
++
++ cb = (struct qrtr_cb *)skb->cb;
++ cb->src_node = from->sq_node;
++ cb->src_port = from->sq_port;
++
++ if (sock_queue_rcv_skb(&ipc->sk, skb)) {
++ qrtr_port_put(ipc);
++ kfree_skb(skb);
++ return -ENOSPC;
++ }
++
++ qrtr_port_put(ipc);
++
++ return 0;
++}
++
++/* Queue packet for broadcast. */
++static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
++ int type, struct sockaddr_qrtr *from,
++ struct sockaddr_qrtr *to)
++{
++ struct sk_buff *skbn;
++
++ mutex_lock(&qrtr_node_lock);
++ list_for_each_entry(node, &qrtr_all_nodes, item) {
++ skbn = skb_clone(skb, GFP_KERNEL);
++ if (!skbn)
++ break;
++ skb_set_owner_w(skbn, skb->sk);
++ qrtr_node_enqueue(node, skbn, type, from, to);
++ }
++ mutex_unlock(&qrtr_node_lock);
++
++ qrtr_local_enqueue(NULL, skb, type, from, to);
++
++ return 0;
++}
++
++static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
++{
++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
++ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
++ struct sockaddr_qrtr *, struct sockaddr_qrtr *);
++ __le32 qrtr_type = cpu_to_le32(QRTR_TYPE_DATA);
++ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
++ struct sock *sk = sock->sk;
++ struct qrtr_node *node;
++ struct sk_buff *skb;
++ size_t plen;
++ u32 type;
++ int rc;
++
++ if (msg->msg_flags & ~(MSG_DONTWAIT))
++ return -EINVAL;
++
++ if (len > 65535)
++ return -EMSGSIZE;
++
++ lock_sock(sk);
++
++ if (addr) {
++ if (msg->msg_namelen < sizeof(*addr)) {
++ release_sock(sk);
++ return -EINVAL;
++ }
++
++ if (addr->sq_family != AF_QIPCRTR) {
++ release_sock(sk);
++ return -EINVAL;
++ }
++
++ rc = qrtr_autobind(sock);
++ if (rc) {
++ release_sock(sk);
++ return rc;
++ }
++ } else if (sk->sk_state == TCP_ESTABLISHED) {
++ addr = &ipc->peer;
++ } else {
++ release_sock(sk);
++ return -ENOTCONN;
++ }
++
++ node = NULL;
++ if (addr->sq_node == QRTR_NODE_BCAST) {
++ if (addr->sq_port != QRTR_PORT_CTRL &&
++ qrtr_local_nid != QRTR_NODE_BCAST) {
++ release_sock(sk);
++ return -ENOTCONN;
++ }
++ enqueue_fn = qrtr_bcast_enqueue;
++ } else if (addr->sq_node == ipc->us.sq_node) {
++ enqueue_fn = qrtr_local_enqueue;
++ } else {
++ node = qrtr_node_lookup(addr->sq_node);
++ if (!node) {
++ release_sock(sk);
++ return -ECONNRESET;
++ }
++ enqueue_fn = qrtr_node_enqueue;
++ }
++
++ plen = (len + 3) & ~3;
++ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
++ msg->msg_flags & MSG_DONTWAIT, &rc);
++ if (!skb) {
++ rc = -ENOMEM;
++ goto out_node;
++ }
++
++ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
++
++ rc = memcpy_from_msg(skb_put(skb, len), msg, len);
++ if (rc) {
++ kfree_skb(skb);
++ goto out_node;
++ }
++
++ if (ipc->us.sq_port == QRTR_PORT_CTRL) {
++ if (len < 4) {
++ rc = -EINVAL;
++ kfree_skb(skb);
++ goto out_node;
++ }
++
++ /* control messages already require the type as 'command' */
++ skb_copy_bits(skb, 0, &qrtr_type, 4);
++ }
++
++ type = le32_to_cpu(qrtr_type);
++ rc = enqueue_fn(node, skb, type, &ipc->us, addr);
++ if (rc >= 0)
++ rc = len;
++
++out_node:
++ qrtr_node_release(node);
++ release_sock(sk);
++
++ return rc;
++}
++
++static int qrtr_send_resume_tx(struct qrtr_cb *cb)
++{
++ struct sockaddr_qrtr remote = { AF_QIPCRTR, cb->src_node, cb->src_port };
++ struct sockaddr_qrtr local = { AF_QIPCRTR, cb->dst_node, cb->dst_port };
++ struct qrtr_ctrl_pkt *pkt;
++ struct qrtr_node *node;
++ struct sk_buff *skb;
++ int ret;
++
++ node = qrtr_node_lookup(remote.sq_node);
++ if (!node)
++ return -EINVAL;
++
++ skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL);
++ if (!skb)
++ return -ENOMEM;
++
++ pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
++ pkt->client.node = cpu_to_le32(cb->dst_node);
++ pkt->client.port = cpu_to_le32(cb->dst_port);
++
++ ret = qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, &local, &remote);
++
++ qrtr_node_release(node);
++
++ return ret;
++}
++
++static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
++ size_t size, int flags)
++{
++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
++ struct sock *sk = sock->sk;
++ struct sk_buff *skb;
++ struct qrtr_cb *cb;
++ int copied, rc;
++
++ lock_sock(sk);
++
++ if (sock_flag(sk, SOCK_ZAPPED)) {
++ release_sock(sk);
++ return -EADDRNOTAVAIL;
++ }
++
++ skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
++ flags & MSG_DONTWAIT, &rc);
++ if (!skb) {
++ release_sock(sk);
++ return rc;
++ }
++ cb = (struct qrtr_cb *)skb->cb;
++
++ copied = skb->len;
++ if (copied > size) {
++ copied = size;
++ msg->msg_flags |= MSG_TRUNC;
++ }
++
++ rc = skb_copy_datagram_msg(skb, 0, msg, copied);
++ if (rc < 0)
++ goto out;
++ rc = copied;
++
++ if (addr) {
++ /* There is an anonymous 2-byte hole after sq_family,
++ * make sure to clear it.
++ */
++ memset(addr, 0, sizeof(*addr));
++
++ addr->sq_family = AF_QIPCRTR;
++ addr->sq_node = cb->src_node;
++ addr->sq_port = cb->src_port;
++ msg->msg_namelen = sizeof(*addr);
++ }
++
++out:
++ if (cb->confirm_rx)
++ qrtr_send_resume_tx(cb);
++
++ skb_free_datagram(sk, skb);
++ release_sock(sk);
++
++ return rc;
++}
++
++static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
++ int len, int flags)
++{
++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
++ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
++ struct sock *sk = sock->sk;
++ int rc;
++
++ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
++ return -EINVAL;
++
++ lock_sock(sk);
++
++ sk->sk_state = TCP_CLOSE;
++ sock->state = SS_UNCONNECTED;
++
++ rc = qrtr_autobind(sock);
++ if (rc) {
++ release_sock(sk);
++ return rc;
++ }
++
++ ipc->peer = *addr;
++ sock->state = SS_CONNECTED;
++ sk->sk_state = TCP_ESTABLISHED;
++
++ release_sock(sk);
++
++ return 0;
++}
++
++static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
++ int peer)
++{
++ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
++ struct sockaddr_qrtr qaddr;
++ struct sock *sk = sock->sk;
++
++ lock_sock(sk);
++ if (peer) {
++ if (sk->sk_state != TCP_ESTABLISHED) {
++ release_sock(sk);
++ return -ENOTCONN;
++ }
++
++ qaddr = ipc->peer;
++ } else {
++ qaddr = ipc->us;
++ }
++ release_sock(sk);
++
++ qaddr.sq_family = AF_QIPCRTR;
++
++ memcpy(saddr, &qaddr, sizeof(qaddr));
++
++ return sizeof(qaddr);
++}
++
++static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
++{
++ void __user *argp = (void __user *)arg;
++ struct qrtr_sock *ipc = qrtr_sk(sock->sk);
++ struct sock *sk = sock->sk;
++ struct sockaddr_qrtr *sq;
++ struct sk_buff *skb;
++ struct ifreq ifr;
++ long len = 0;
++ int rc = 0;
++
++ lock_sock(sk);
++
++ switch (cmd) {
++ case TIOCOUTQ:
++ len = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
++ if (len < 0)
++ len = 0;
++ rc = put_user(len, (int __user *)argp);
++ break;
++ case TIOCINQ:
++ skb = skb_peek(&sk->sk_receive_queue);
++ if (skb)
++ len = skb->len;
++ rc = put_user(len, (int __user *)argp);
++ break;
++ case SIOCGIFADDR:
++ if (get_user_ifreq(&ifr, NULL, argp)) {
++ rc = -EFAULT;
++ break;
++ }
++
++ sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
++ *sq = ipc->us;
++ if (put_user_ifreq(&ifr, argp)) {
++ rc = -EFAULT;
++ break;
++ }
++ break;
++ case SIOCADDRT:
++ case SIOCDELRT:
++ case SIOCSIFADDR:
++ case SIOCGIFDSTADDR:
++ case SIOCSIFDSTADDR:
++ case SIOCGIFBRDADDR:
++ case SIOCSIFBRDADDR:
++ case SIOCGIFNETMASK:
++ case SIOCSIFNETMASK:
++ rc = -EINVAL;
++ break;
++ default:
++ rc = -ENOIOCTLCMD;
++ break;
++ }
++
++ release_sock(sk);
++
++ return rc;
++}
++
++static int qrtr_release(struct socket *sock)
++{
++ struct sock *sk = sock->sk;
++ struct qrtr_sock *ipc;
++
++ if (!sk)
++ return 0;
++
++ lock_sock(sk);
++
++ ipc = qrtr_sk(sk);
++ sk->sk_shutdown = SHUTDOWN_MASK;
++ if (!sock_flag(sk, SOCK_DEAD))
++ sk->sk_state_change(sk);
++
++ sock_set_flag(sk, SOCK_DEAD);
++ sock_orphan(sk);
++ sock->sk = NULL;
++
++ if (!sock_flag(sk, SOCK_ZAPPED))
++ qrtr_port_remove(ipc);
++
++ skb_queue_purge(&sk->sk_receive_queue);
++
++ release_sock(sk);
++ sock_put(sk);
++
++ return 0;
++}
++
++static const struct proto_ops qrtr_proto_ops = {
++ .owner = THIS_MODULE,
++ .family = AF_QIPCRTR,
++ .bind = qrtr_bind,
++ .connect = qrtr_connect,
++ .socketpair = sock_no_socketpair,
++ .accept = sock_no_accept,
++ .listen = sock_no_listen,
++ .sendmsg = qrtr_sendmsg,
++ .recvmsg = qrtr_recvmsg,
++ .getname = qrtr_getname,
++ .ioctl = qrtr_ioctl,
++ .gettstamp = sock_gettstamp,
++ .poll = datagram_poll,
++ .shutdown = sock_no_shutdown,
++ .release = qrtr_release,
++ .mmap = sock_no_mmap,
++ .sendpage = sock_no_sendpage,
++};
++
++static struct proto qrtr_proto = {
++ .name = "QIPCRTR",
++ .owner = THIS_MODULE,
++ .obj_size = sizeof(struct qrtr_sock),
++};
++
++static int qrtr_create(struct net *net, struct socket *sock,
++ int protocol, int kern)
++{
++ struct qrtr_sock *ipc;
++ struct sock *sk;
++
++ if (sock->type != SOCK_DGRAM)
++ return -EPROTOTYPE;
++
++ sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern);
++ if (!sk)
++ return -ENOMEM;
++
++ sock_set_flag(sk, SOCK_ZAPPED);
++
++ sock_init_data(sock, sk);
++ sock->ops = &qrtr_proto_ops;
++
++ ipc = qrtr_sk(sk);
++ ipc->us.sq_family = AF_QIPCRTR;
++ ipc->us.sq_node = qrtr_local_nid;
++ ipc->us.sq_port = 0;
++
++ return 0;
++}
++
++static const struct net_proto_family qrtr_family = {
++ .owner = THIS_MODULE,
++ .family = AF_QIPCRTR,
++ .create = qrtr_create,
++};
++
++static int __init qrtr_proto_init(void)
++{
++ int rc;
++
++ rc = proto_register(&qrtr_proto, 1);
++ if (rc)
++ return rc;
++
++ rc = sock_register(&qrtr_family);
++ if (rc)
++ goto err_proto;
++
++ rc = qrtr_ns_init();
++ if (rc)
++ goto err_sock;
++
++ return 0;
++
++err_sock:
++ sock_unregister(qrtr_family.family);
++err_proto:
++ proto_unregister(&qrtr_proto);
++ return rc;
++}
++postcore_initcall(qrtr_proto_init);
++
++static void __exit qrtr_proto_fini(void)
++{
++ qrtr_ns_remove();
++ sock_unregister(qrtr_family.family);
++ proto_unregister(&qrtr_proto);
++}
++module_exit(qrtr_proto_fini);
++
++MODULE_DESCRIPTION("Qualcomm IPC-router driver");
++MODULE_LICENSE("GPL v2");
++MODULE_ALIAS_NETPROTO(PF_QIPCRTR);
+diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
+index fa611678af052..49e7cab43d24c 100644
+--- a/net/qrtr/mhi.c
++++ b/net/qrtr/mhi.c
+@@ -78,11 +78,6 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
+ struct qrtr_mhi_dev *qdev;
+ int rc;
+
+- /* start channels */
+- rc = mhi_prepare_for_transfer(mhi_dev);
+- if (rc)
+- return rc;
+-
+ qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
+ if (!qdev)
+ return -ENOMEM;
+@@ -96,6 +91,13 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
+ if (rc)
+ return rc;
+
++ /* start channels */
++ rc = mhi_prepare_for_transfer(mhi_dev);
++ if (rc) {
++ qrtr_endpoint_unregister(&qdev->ep);
++ return rc;
++ }
++
+ dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
+
+ return 0;
+diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
+index 1990d496fcfc0..3e40a1ba48f79 100644
+--- a/net/qrtr/ns.c
++++ b/net/qrtr/ns.c
+@@ -83,7 +83,10 @@ static struct qrtr_node *node_get(unsigned int node_id)
+
+ node->id = node_id;
+
+- radix_tree_insert(&nodes, node_id, node);
++ if (radix_tree_insert(&nodes, node_id, node)) {
++ kfree(node);
++ return NULL;
++ }
+
+ return node;
+ }
+@@ -270,7 +273,7 @@ err:
+ return NULL;
+ }
+
+-static int server_del(struct qrtr_node *node, unsigned int port)
++static int server_del(struct qrtr_node *node, unsigned int port, bool bcast)
+ {
+ struct qrtr_lookup *lookup;
+ struct qrtr_server *srv;
+@@ -283,7 +286,7 @@ static int server_del(struct qrtr_node *node, unsigned int port)
+ radix_tree_delete(&node->servers, port);
+
+ /* Broadcast the removal of local servers */
+- if (srv->node == qrtr_ns.local_node)
++ if (srv->node == qrtr_ns.local_node && bcast)
+ service_announce_del(&qrtr_ns.bcast_sq, srv);
+
+ /* Announce the service's disappearance to observers */
+@@ -369,7 +372,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
+ }
+ slot = radix_tree_iter_resume(slot, &iter);
+ rcu_read_unlock();
+- server_del(node, srv->port);
++ server_del(node, srv->port, true);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+@@ -455,10 +458,13 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
+ kfree(lookup);
+ }
+
+- /* Remove the server belonging to this port */
++ /* Remove the server belonging to this port but don't broadcast
++ * DEL_SERVER. Neighbours would've already removed the server belonging
++ * to this port due to the DEL_CLIENT broadcast from qrtr_port_remove().
++ */
+ node = node_get(node_id);
+ if (node)
+- server_del(node, port);
++ server_del(node, port, false);
+
+ /* Advertise the removal of this client to all local servers */
+ local_node = node_get(qrtr_ns.local_node);
+@@ -563,7 +569,7 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
+ if (!node)
+ return -ENOENT;
+
+- return server_del(node, port);
++ return server_del(node, port, true);
+ }
+
+ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
+diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
+deleted file mode 100644
+index ec23225297278..0000000000000
+--- a/net/qrtr/qrtr.c
++++ /dev/null
+@@ -1,1321 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Copyright (c) 2015, Sony Mobile Communications Inc.
+- * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+- */
+-#include <linux/module.h>
+-#include <linux/netlink.h>
+-#include <linux/qrtr.h>
+-#include <linux/termios.h> /* For TIOCINQ/OUTQ */
+-#include <linux/spinlock.h>
+-#include <linux/wait.h>
+-
+-#include <net/sock.h>
+-
+-#include "qrtr.h"
+-
+-#define QRTR_PROTO_VER_1 1
+-#define QRTR_PROTO_VER_2 3
+-
+-/* auto-bind range */
+-#define QRTR_MIN_EPH_SOCKET 0x4000
+-#define QRTR_MAX_EPH_SOCKET 0x7fff
+-#define QRTR_EPH_PORT_RANGE \
+- XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
+-
+-/**
+- * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
+- * @version: protocol version
+- * @type: packet type; one of QRTR_TYPE_*
+- * @src_node_id: source node
+- * @src_port_id: source port
+- * @confirm_rx: boolean; whether a resume-tx packet should be send in reply
+- * @size: length of packet, excluding this header
+- * @dst_node_id: destination node
+- * @dst_port_id: destination port
+- */
+-struct qrtr_hdr_v1 {
+- __le32 version;
+- __le32 type;
+- __le32 src_node_id;
+- __le32 src_port_id;
+- __le32 confirm_rx;
+- __le32 size;
+- __le32 dst_node_id;
+- __le32 dst_port_id;
+-} __packed;
+-
+-/**
+- * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
+- * @version: protocol version
+- * @type: packet type; one of QRTR_TYPE_*
+- * @flags: bitmask of QRTR_FLAGS_*
+- * @optlen: length of optional header data
+- * @size: length of packet, excluding this header and optlen
+- * @src_node_id: source node
+- * @src_port_id: source port
+- * @dst_node_id: destination node
+- * @dst_port_id: destination port
+- */
+-struct qrtr_hdr_v2 {
+- u8 version;
+- u8 type;
+- u8 flags;
+- u8 optlen;
+- __le32 size;
+- __le16 src_node_id;
+- __le16 src_port_id;
+- __le16 dst_node_id;
+- __le16 dst_port_id;
+-};
+-
+-#define QRTR_FLAGS_CONFIRM_RX BIT(0)
+-
+-struct qrtr_cb {
+- u32 src_node;
+- u32 src_port;
+- u32 dst_node;
+- u32 dst_port;
+-
+- u8 type;
+- u8 confirm_rx;
+-};
+-
+-#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
+- sizeof(struct qrtr_hdr_v2))
+-
+-struct qrtr_sock {
+- /* WARNING: sk must be the first member */
+- struct sock sk;
+- struct sockaddr_qrtr us;
+- struct sockaddr_qrtr peer;
+-};
+-
+-static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
+-{
+- BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0);
+- return container_of(sk, struct qrtr_sock, sk);
+-}
+-
+-static unsigned int qrtr_local_nid = 1;
+-
+-/* for node ids */
+-static RADIX_TREE(qrtr_nodes, GFP_ATOMIC);
+-static DEFINE_SPINLOCK(qrtr_nodes_lock);
+-/* broadcast list */
+-static LIST_HEAD(qrtr_all_nodes);
+-/* lock for qrtr_all_nodes and node reference */
+-static DEFINE_MUTEX(qrtr_node_lock);
+-
+-/* local port allocation management */
+-static DEFINE_XARRAY_ALLOC(qrtr_ports);
+-
+-/**
+- * struct qrtr_node - endpoint node
+- * @ep_lock: lock for endpoint management and callbacks
+- * @ep: endpoint
+- * @ref: reference count for node
+- * @nid: node id
+- * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port
+- * @qrtr_tx_lock: lock for qrtr_tx_flow inserts
+- * @rx_queue: receive queue
+- * @item: list item for broadcast list
+- */
+-struct qrtr_node {
+- struct mutex ep_lock;
+- struct qrtr_endpoint *ep;
+- struct kref ref;
+- unsigned int nid;
+-
+- struct radix_tree_root qrtr_tx_flow;
+- struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */
+-
+- struct sk_buff_head rx_queue;
+- struct list_head item;
+-};
+-
+-/**
+- * struct qrtr_tx_flow - tx flow control
+- * @resume_tx: waiters for a resume tx from the remote
+- * @pending: number of waiting senders
+- * @tx_failed: indicates that a message with confirm_rx flag was lost
+- */
+-struct qrtr_tx_flow {
+- struct wait_queue_head resume_tx;
+- int pending;
+- int tx_failed;
+-};
+-
+-#define QRTR_TX_FLOW_HIGH 10
+-#define QRTR_TX_FLOW_LOW 5
+-
+-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+- int type, struct sockaddr_qrtr *from,
+- struct sockaddr_qrtr *to);
+-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+- int type, struct sockaddr_qrtr *from,
+- struct sockaddr_qrtr *to);
+-static struct qrtr_sock *qrtr_port_lookup(int port);
+-static void qrtr_port_put(struct qrtr_sock *ipc);
+-
+-/* Release node resources and free the node.
+- *
+- * Do not call directly, use qrtr_node_release. To be used with
+- * kref_put_mutex. As such, the node mutex is expected to be locked on call.
+- */
+-static void __qrtr_node_release(struct kref *kref)
+-{
+- struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
+- struct radix_tree_iter iter;
+- struct qrtr_tx_flow *flow;
+- unsigned long flags;
+- void __rcu **slot;
+-
+- spin_lock_irqsave(&qrtr_nodes_lock, flags);
+- /* If the node is a bridge for other nodes, there are possibly
+- * multiple entries pointing to our released node, delete them all.
+- */
+- radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) {
+- if (*slot == node)
+- radix_tree_iter_delete(&qrtr_nodes, &iter, slot);
+- }
+- spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
+-
+- list_del(&node->item);
+- mutex_unlock(&qrtr_node_lock);
+-
+- skb_queue_purge(&node->rx_queue);
+-
+- /* Free tx flow counters */
+- radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
+- flow = *slot;
+- radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot);
+- kfree(flow);
+- }
+- kfree(node);
+-}
+-
+-/* Increment reference to node. */
+-static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node)
+-{
+- if (node)
+- kref_get(&node->ref);
+- return node;
+-}
+-
+-/* Decrement reference to node and release as necessary. */
+-static void qrtr_node_release(struct qrtr_node *node)
+-{
+- if (!node)
+- return;
+- kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock);
+-}
+-
+-/**
+- * qrtr_tx_resume() - reset flow control counter
+- * @node: qrtr_node that the QRTR_TYPE_RESUME_TX packet arrived on
+- * @skb: resume_tx packet
+- */
+-static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb)
+-{
+- struct qrtr_ctrl_pkt *pkt = (struct qrtr_ctrl_pkt *)skb->data;
+- u64 remote_node = le32_to_cpu(pkt->client.node);
+- u32 remote_port = le32_to_cpu(pkt->client.port);
+- struct qrtr_tx_flow *flow;
+- unsigned long key;
+-
+- key = remote_node << 32 | remote_port;
+-
+- rcu_read_lock();
+- flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
+- rcu_read_unlock();
+- if (flow) {
+- spin_lock(&flow->resume_tx.lock);
+- flow->pending = 0;
+- spin_unlock(&flow->resume_tx.lock);
+- wake_up_interruptible_all(&flow->resume_tx);
+- }
+-
+- consume_skb(skb);
+-}
+-
+-/**
+- * qrtr_tx_wait() - flow control for outgoing packets
+- * @node: qrtr_node that the packet is to be send to
+- * @dest_node: node id of the destination
+- * @dest_port: port number of the destination
+- * @type: type of message
+- *
+- * The flow control scheme is based around the low and high "watermarks". When
+- * the low watermark is passed the confirm_rx flag is set on the outgoing
+- * message, which will trigger the remote to send a control message of the type
+- * QRTR_TYPE_RESUME_TX to reset the counter. If the high watermark is hit
+- * further transmision should be paused.
+- *
+- * Return: 1 if confirm_rx should be set, 0 otherwise or errno failure
+- */
+-static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port,
+- int type)
+-{
+- unsigned long key = (u64)dest_node << 32 | dest_port;
+- struct qrtr_tx_flow *flow;
+- int confirm_rx = 0;
+- int ret;
+-
+- /* Never set confirm_rx on non-data packets */
+- if (type != QRTR_TYPE_DATA)
+- return 0;
+-
+- mutex_lock(&node->qrtr_tx_lock);
+- flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
+- if (!flow) {
+- flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+- if (flow) {
+- init_waitqueue_head(&flow->resume_tx);
+- if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) {
+- kfree(flow);
+- flow = NULL;
+- }
+- }
+- }
+- mutex_unlock(&node->qrtr_tx_lock);
+-
+- /* Set confirm_rx if we where unable to find and allocate a flow */
+- if (!flow)
+- return 1;
+-
+- spin_lock_irq(&flow->resume_tx.lock);
+- ret = wait_event_interruptible_locked_irq(flow->resume_tx,
+- flow->pending < QRTR_TX_FLOW_HIGH ||
+- flow->tx_failed ||
+- !node->ep);
+- if (ret < 0) {
+- confirm_rx = ret;
+- } else if (!node->ep) {
+- confirm_rx = -EPIPE;
+- } else if (flow->tx_failed) {
+- flow->tx_failed = 0;
+- confirm_rx = 1;
+- } else {
+- flow->pending++;
+- confirm_rx = flow->pending == QRTR_TX_FLOW_LOW;
+- }
+- spin_unlock_irq(&flow->resume_tx.lock);
+-
+- return confirm_rx;
+-}
+-
+-/**
+- * qrtr_tx_flow_failed() - flag that tx of confirm_rx flagged messages failed
+- * @node: qrtr_node that the packet is to be send to
+- * @dest_node: node id of the destination
+- * @dest_port: port number of the destination
+- *
+- * Signal that the transmission of a message with confirm_rx flag failed. The
+- * flow's "pending" counter will keep incrementing towards QRTR_TX_FLOW_HIGH,
+- * at which point transmission would stall forever waiting for the resume TX
+- * message associated with the dropped confirm_rx message.
+- * Work around this by marking the flow as having a failed transmission and
+- * cause the next transmission attempt to be sent with the confirm_rx.
+- */
+-static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node,
+- int dest_port)
+-{
+- unsigned long key = (u64)dest_node << 32 | dest_port;
+- struct qrtr_tx_flow *flow;
+-
+- rcu_read_lock();
+- flow = radix_tree_lookup(&node->qrtr_tx_flow, key);
+- rcu_read_unlock();
+- if (flow) {
+- spin_lock_irq(&flow->resume_tx.lock);
+- flow->tx_failed = 1;
+- spin_unlock_irq(&flow->resume_tx.lock);
+- }
+-}
+-
+-/* Pass an outgoing packet socket buffer to the endpoint driver. */
+-static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+- int type, struct sockaddr_qrtr *from,
+- struct sockaddr_qrtr *to)
+-{
+- struct qrtr_hdr_v1 *hdr;
+- size_t len = skb->len;
+- int rc, confirm_rx;
+-
+- confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type);
+- if (confirm_rx < 0) {
+- kfree_skb(skb);
+- return confirm_rx;
+- }
+-
+- hdr = skb_push(skb, sizeof(*hdr));
+- hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
+- hdr->type = cpu_to_le32(type);
+- hdr->src_node_id = cpu_to_le32(from->sq_node);
+- hdr->src_port_id = cpu_to_le32(from->sq_port);
+- if (to->sq_port == QRTR_PORT_CTRL) {
+- hdr->dst_node_id = cpu_to_le32(node->nid);
+- hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+- } else {
+- hdr->dst_node_id = cpu_to_le32(to->sq_node);
+- hdr->dst_port_id = cpu_to_le32(to->sq_port);
+- }
+-
+- hdr->size = cpu_to_le32(len);
+- hdr->confirm_rx = !!confirm_rx;
+-
+- rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
+-
+- if (!rc) {
+- mutex_lock(&node->ep_lock);
+- rc = -ENODEV;
+- if (node->ep)
+- rc = node->ep->xmit(node->ep, skb);
+- else
+- kfree_skb(skb);
+- mutex_unlock(&node->ep_lock);
+- }
+- /* Need to ensure that a subsequent message carries the otherwise lost
+- * confirm_rx flag if we dropped this one */
+- if (rc && confirm_rx)
+- qrtr_tx_flow_failed(node, to->sq_node, to->sq_port);
+-
+- return rc;
+-}
+-
+-/* Lookup node by id.
+- *
+- * callers must release with qrtr_node_release()
+- */
+-static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
+-{
+- struct qrtr_node *node;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&qrtr_nodes_lock, flags);
+- node = radix_tree_lookup(&qrtr_nodes, nid);
+- node = qrtr_node_acquire(node);
+- spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
+-
+- return node;
+-}
+-
+-/* Assign node id to node.
+- *
+- * This is mostly useful for automatic node id assignment, based on
+- * the source id in the incoming packet.
+- */
+-static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
+-{
+- unsigned long flags;
+-
+- if (nid == QRTR_EP_NID_AUTO)
+- return;
+-
+- spin_lock_irqsave(&qrtr_nodes_lock, flags);
+- radix_tree_insert(&qrtr_nodes, nid, node);
+- if (node->nid == QRTR_EP_NID_AUTO)
+- node->nid = nid;
+- spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
+-}
+-
+-/**
+- * qrtr_endpoint_post() - post incoming data
+- * @ep: endpoint handle
+- * @data: data pointer
+- * @len: size of data in bytes
+- *
+- * Return: 0 on success; negative error code on failure
+- */
+-int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
+-{
+- struct qrtr_node *node = ep->node;
+- const struct qrtr_hdr_v1 *v1;
+- const struct qrtr_hdr_v2 *v2;
+- struct qrtr_sock *ipc;
+- struct sk_buff *skb;
+- struct qrtr_cb *cb;
+- size_t size;
+- unsigned int ver;
+- size_t hdrlen;
+-
+- if (len == 0 || len & 3)
+- return -EINVAL;
+-
+- skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN);
+- if (!skb)
+- return -ENOMEM;
+-
+- cb = (struct qrtr_cb *)skb->cb;
+-
+- /* Version field in v1 is little endian, so this works for both cases */
+- ver = *(u8*)data;
+-
+- switch (ver) {
+- case QRTR_PROTO_VER_1:
+- if (len < sizeof(*v1))
+- goto err;
+- v1 = data;
+- hdrlen = sizeof(*v1);
+-
+- cb->type = le32_to_cpu(v1->type);
+- cb->src_node = le32_to_cpu(v1->src_node_id);
+- cb->src_port = le32_to_cpu(v1->src_port_id);
+- cb->confirm_rx = !!v1->confirm_rx;
+- cb->dst_node = le32_to_cpu(v1->dst_node_id);
+- cb->dst_port = le32_to_cpu(v1->dst_port_id);
+-
+- size = le32_to_cpu(v1->size);
+- break;
+- case QRTR_PROTO_VER_2:
+- if (len < sizeof(*v2))
+- goto err;
+- v2 = data;
+- hdrlen = sizeof(*v2) + v2->optlen;
+-
+- cb->type = v2->type;
+- cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
+- cb->src_node = le16_to_cpu(v2->src_node_id);
+- cb->src_port = le16_to_cpu(v2->src_port_id);
+- cb->dst_node = le16_to_cpu(v2->dst_node_id);
+- cb->dst_port = le16_to_cpu(v2->dst_port_id);
+-
+- if (cb->src_port == (u16)QRTR_PORT_CTRL)
+- cb->src_port = QRTR_PORT_CTRL;
+- if (cb->dst_port == (u16)QRTR_PORT_CTRL)
+- cb->dst_port = QRTR_PORT_CTRL;
+-
+- size = le32_to_cpu(v2->size);
+- break;
+- default:
+- pr_err("qrtr: Invalid version %d\n", ver);
+- goto err;
+- }
+-
+- if (!size || len != ALIGN(size, 4) + hdrlen)
+- goto err;
+-
+- if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
+- cb->type != QRTR_TYPE_RESUME_TX)
+- goto err;
+-
+- skb_put_data(skb, data + hdrlen, size);
+-
+- qrtr_node_assign(node, cb->src_node);
+-
+- if (cb->type == QRTR_TYPE_NEW_SERVER) {
+- /* Remote node endpoint can bridge other distant nodes */
+- const struct qrtr_ctrl_pkt *pkt;
+-
+- if (size < sizeof(*pkt))
+- goto err;
+-
+- pkt = data + hdrlen;
+- qrtr_node_assign(node, le32_to_cpu(pkt->server.node));
+- }
+-
+- if (cb->type == QRTR_TYPE_RESUME_TX) {
+- qrtr_tx_resume(node, skb);
+- } else {
+- ipc = qrtr_port_lookup(cb->dst_port);
+- if (!ipc)
+- goto err;
+-
+- if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+- qrtr_port_put(ipc);
+- goto err;
+- }
+-
+- qrtr_port_put(ipc);
+- }
+-
+- return 0;
+-
+-err:
+- kfree_skb(skb);
+- return -EINVAL;
+-
+-}
+-EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
+-
+-/**
+- * qrtr_alloc_ctrl_packet() - allocate control packet skb
+- * @pkt: reference to qrtr_ctrl_pkt pointer
+- * @flags: the type of memory to allocate
+- *
+- * Returns newly allocated sk_buff, or NULL on failure
+- *
+- * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
+- * on success returns a reference to the control packet in @pkt.
+- */
+-static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt,
+- gfp_t flags)
+-{
+- const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
+- struct sk_buff *skb;
+-
+- skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, flags);
+- if (!skb)
+- return NULL;
+-
+- skb_reserve(skb, QRTR_HDR_MAX_SIZE);
+- *pkt = skb_put_zero(skb, pkt_len);
+-
+- return skb;
+-}
+-
+-/**
+- * qrtr_endpoint_register() - register a new endpoint
+- * @ep: endpoint to register
+- * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment
+- * Return: 0 on success; negative error code on failure
+- *
+- * The specified endpoint must have the xmit function pointer set on call.
+- */
+-int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid)
+-{
+- struct qrtr_node *node;
+-
+- if (!ep || !ep->xmit)
+- return -EINVAL;
+-
+- node = kzalloc(sizeof(*node), GFP_KERNEL);
+- if (!node)
+- return -ENOMEM;
+-
+- kref_init(&node->ref);
+- mutex_init(&node->ep_lock);
+- skb_queue_head_init(&node->rx_queue);
+- node->nid = QRTR_EP_NID_AUTO;
+- node->ep = ep;
+-
+- INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL);
+- mutex_init(&node->qrtr_tx_lock);
+-
+- qrtr_node_assign(node, nid);
+-
+- mutex_lock(&qrtr_node_lock);
+- list_add(&node->item, &qrtr_all_nodes);
+- mutex_unlock(&qrtr_node_lock);
+- ep->node = node;
+-
+- return 0;
+-}
+-EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
+-
+-/**
+- * qrtr_endpoint_unregister - unregister endpoint
+- * @ep: endpoint to unregister
+- */
+-void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
+-{
+- struct qrtr_node *node = ep->node;
+- struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
+- struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
+- struct radix_tree_iter iter;
+- struct qrtr_ctrl_pkt *pkt;
+- struct qrtr_tx_flow *flow;
+- struct sk_buff *skb;
+- unsigned long flags;
+- void __rcu **slot;
+-
+- mutex_lock(&node->ep_lock);
+- node->ep = NULL;
+- mutex_unlock(&node->ep_lock);
+-
+- /* Notify the local controller about the event */
+- spin_lock_irqsave(&qrtr_nodes_lock, flags);
+- radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) {
+- if (*slot != node)
+- continue;
+- src.sq_node = iter.index;
+- skb = qrtr_alloc_ctrl_packet(&pkt, GFP_ATOMIC);
+- if (skb) {
+- pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
+- qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
+- }
+- }
+- spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
+-
+- /* Wake up any transmitters waiting for resume-tx from the node */
+- mutex_lock(&node->qrtr_tx_lock);
+- radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) {
+- flow = *slot;
+- wake_up_interruptible_all(&flow->resume_tx);
+- }
+- mutex_unlock(&node->qrtr_tx_lock);
+-
+- qrtr_node_release(node);
+- ep->node = NULL;
+-}
+-EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister);
+-
+-/* Lookup socket by port.
+- *
+- * Callers must release with qrtr_port_put()
+- */
+-static struct qrtr_sock *qrtr_port_lookup(int port)
+-{
+- struct qrtr_sock *ipc;
+-
+- if (port == QRTR_PORT_CTRL)
+- port = 0;
+-
+- rcu_read_lock();
+- ipc = xa_load(&qrtr_ports, port);
+- if (ipc)
+- sock_hold(&ipc->sk);
+- rcu_read_unlock();
+-
+- return ipc;
+-}
+-
+-/* Release acquired socket. */
+-static void qrtr_port_put(struct qrtr_sock *ipc)
+-{
+- sock_put(&ipc->sk);
+-}
+-
+-/* Remove port assignment. */
+-static void qrtr_port_remove(struct qrtr_sock *ipc)
+-{
+- struct qrtr_ctrl_pkt *pkt;
+- struct sk_buff *skb;
+- int port = ipc->us.sq_port;
+- struct sockaddr_qrtr to;
+-
+- to.sq_family = AF_QIPCRTR;
+- to.sq_node = QRTR_NODE_BCAST;
+- to.sq_port = QRTR_PORT_CTRL;
+-
+- skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL);
+- if (skb) {
+- pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
+- pkt->client.node = cpu_to_le32(ipc->us.sq_node);
+- pkt->client.port = cpu_to_le32(ipc->us.sq_port);
+-
+- skb_set_owner_w(skb, &ipc->sk);
+- qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
+- &to);
+- }
+-
+- if (port == QRTR_PORT_CTRL)
+- port = 0;
+-
+- __sock_put(&ipc->sk);
+-
+- xa_erase(&qrtr_ports, port);
+-
+- /* Ensure that if qrtr_port_lookup() did enter the RCU read section we
+- * wait for it to up increment the refcount */
+- synchronize_rcu();
+-}
+-
+-/* Assign port number to socket.
+- *
+- * Specify port in the integer pointed to by port, and it will be adjusted
+- * on return as necesssary.
+- *
+- * Port may be:
+- * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET]
+- * <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN
+- * >QRTR_MIN_EPH_SOCKET: Specified; available to all
+- */
+-static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
+-{
+- int rc;
+-
+- if (!*port) {
+- rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE,
+- GFP_KERNEL);
+- } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
+- rc = -EACCES;
+- } else if (*port == QRTR_PORT_CTRL) {
+- rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL);
+- } else {
+- rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL);
+- }
+-
+- if (rc == -EBUSY)
+- return -EADDRINUSE;
+- else if (rc < 0)
+- return rc;
+-
+- sock_hold(&ipc->sk);
+-
+- return 0;
+-}
+-
+-/* Reset all non-control ports */
+-static void qrtr_reset_ports(void)
+-{
+- struct qrtr_sock *ipc;
+- unsigned long index;
+-
+- rcu_read_lock();
+- xa_for_each_start(&qrtr_ports, index, ipc, 1) {
+- sock_hold(&ipc->sk);
+- ipc->sk.sk_err = ENETRESET;
+- sk_error_report(&ipc->sk);
+- sock_put(&ipc->sk);
+- }
+- rcu_read_unlock();
+-}
+-
+-/* Bind socket to address.
+- *
+- * Socket should be locked upon call.
+- */
+-static int __qrtr_bind(struct socket *sock,
+- const struct sockaddr_qrtr *addr, int zapped)
+-{
+- struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+- struct sock *sk = sock->sk;
+- int port;
+- int rc;
+-
+- /* rebinding ok */
+- if (!zapped && addr->sq_port == ipc->us.sq_port)
+- return 0;
+-
+- port = addr->sq_port;
+- rc = qrtr_port_assign(ipc, &port);
+- if (rc)
+- return rc;
+-
+- /* unbind previous, if any */
+- if (!zapped)
+- qrtr_port_remove(ipc);
+- ipc->us.sq_port = port;
+-
+- sock_reset_flag(sk, SOCK_ZAPPED);
+-
+- /* Notify all open ports about the new controller */
+- if (port == QRTR_PORT_CTRL)
+- qrtr_reset_ports();
+-
+- return 0;
+-}
+-
+-/* Auto bind to an ephemeral port. */
+-static int qrtr_autobind(struct socket *sock)
+-{
+- struct sock *sk = sock->sk;
+- struct sockaddr_qrtr addr;
+-
+- if (!sock_flag(sk, SOCK_ZAPPED))
+- return 0;
+-
+- addr.sq_family = AF_QIPCRTR;
+- addr.sq_node = qrtr_local_nid;
+- addr.sq_port = 0;
+-
+- return __qrtr_bind(sock, &addr, 1);
+-}
+-
+-/* Bind socket to specified sockaddr. */
+-static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
+-{
+- DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
+- struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+- struct sock *sk = sock->sk;
+- int rc;
+-
+- if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
+- return -EINVAL;
+-
+- if (addr->sq_node != ipc->us.sq_node)
+- return -EINVAL;
+-
+- lock_sock(sk);
+- rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED));
+- release_sock(sk);
+-
+- return rc;
+-}
+-
+-/* Queue packet to local peer socket. */
+-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+- int type, struct sockaddr_qrtr *from,
+- struct sockaddr_qrtr *to)
+-{
+- struct qrtr_sock *ipc;
+- struct qrtr_cb *cb;
+-
+- ipc = qrtr_port_lookup(to->sq_port);
+- if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
+- if (ipc)
+- qrtr_port_put(ipc);
+- kfree_skb(skb);
+- return -ENODEV;
+- }
+-
+- cb = (struct qrtr_cb *)skb->cb;
+- cb->src_node = from->sq_node;
+- cb->src_port = from->sq_port;
+-
+- if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+- qrtr_port_put(ipc);
+- kfree_skb(skb);
+- return -ENOSPC;
+- }
+-
+- qrtr_port_put(ipc);
+-
+- return 0;
+-}
+-
+-/* Queue packet for broadcast. */
+-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+- int type, struct sockaddr_qrtr *from,
+- struct sockaddr_qrtr *to)
+-{
+- struct sk_buff *skbn;
+-
+- mutex_lock(&qrtr_node_lock);
+- list_for_each_entry(node, &qrtr_all_nodes, item) {
+- skbn = skb_clone(skb, GFP_KERNEL);
+- if (!skbn)
+- break;
+- skb_set_owner_w(skbn, skb->sk);
+- qrtr_node_enqueue(node, skbn, type, from, to);
+- }
+- mutex_unlock(&qrtr_node_lock);
+-
+- qrtr_local_enqueue(NULL, skb, type, from, to);
+-
+- return 0;
+-}
+-
+-static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+-{
+- DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
+- int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
+- struct sockaddr_qrtr *, struct sockaddr_qrtr *);
+- __le32 qrtr_type = cpu_to_le32(QRTR_TYPE_DATA);
+- struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+- struct sock *sk = sock->sk;
+- struct qrtr_node *node;
+- struct sk_buff *skb;
+- size_t plen;
+- u32 type;
+- int rc;
+-
+- if (msg->msg_flags & ~(MSG_DONTWAIT))
+- return -EINVAL;
+-
+- if (len > 65535)
+- return -EMSGSIZE;
+-
+- lock_sock(sk);
+-
+- if (addr) {
+- if (msg->msg_namelen < sizeof(*addr)) {
+- release_sock(sk);
+- return -EINVAL;
+- }
+-
+- if (addr->sq_family != AF_QIPCRTR) {
+- release_sock(sk);
+- return -EINVAL;
+- }
+-
+- rc = qrtr_autobind(sock);
+- if (rc) {
+- release_sock(sk);
+- return rc;
+- }
+- } else if (sk->sk_state == TCP_ESTABLISHED) {
+- addr = &ipc->peer;
+- } else {
+- release_sock(sk);
+- return -ENOTCONN;
+- }
+-
+- node = NULL;
+- if (addr->sq_node == QRTR_NODE_BCAST) {
+- if (addr->sq_port != QRTR_PORT_CTRL &&
+- qrtr_local_nid != QRTR_NODE_BCAST) {
+- release_sock(sk);
+- return -ENOTCONN;
+- }
+- enqueue_fn = qrtr_bcast_enqueue;
+- } else if (addr->sq_node == ipc->us.sq_node) {
+- enqueue_fn = qrtr_local_enqueue;
+- } else {
+- node = qrtr_node_lookup(addr->sq_node);
+- if (!node) {
+- release_sock(sk);
+- return -ECONNRESET;
+- }
+- enqueue_fn = qrtr_node_enqueue;
+- }
+-
+- plen = (len + 3) & ~3;
+- skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
+- msg->msg_flags & MSG_DONTWAIT, &rc);
+- if (!skb) {
+- rc = -ENOMEM;
+- goto out_node;
+- }
+-
+- skb_reserve(skb, QRTR_HDR_MAX_SIZE);
+-
+- rc = memcpy_from_msg(skb_put(skb, len), msg, len);
+- if (rc) {
+- kfree_skb(skb);
+- goto out_node;
+- }
+-
+- if (ipc->us.sq_port == QRTR_PORT_CTRL) {
+- if (len < 4) {
+- rc = -EINVAL;
+- kfree_skb(skb);
+- goto out_node;
+- }
+-
+- /* control messages already require the type as 'command' */
+- skb_copy_bits(skb, 0, &qrtr_type, 4);
+- }
+-
+- type = le32_to_cpu(qrtr_type);
+- rc = enqueue_fn(node, skb, type, &ipc->us, addr);
+- if (rc >= 0)
+- rc = len;
+-
+-out_node:
+- qrtr_node_release(node);
+- release_sock(sk);
+-
+- return rc;
+-}
+-
+-static int qrtr_send_resume_tx(struct qrtr_cb *cb)
+-{
+- struct sockaddr_qrtr remote = { AF_QIPCRTR, cb->src_node, cb->src_port };
+- struct sockaddr_qrtr local = { AF_QIPCRTR, cb->dst_node, cb->dst_port };
+- struct qrtr_ctrl_pkt *pkt;
+- struct qrtr_node *node;
+- struct sk_buff *skb;
+- int ret;
+-
+- node = qrtr_node_lookup(remote.sq_node);
+- if (!node)
+- return -EINVAL;
+-
+- skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL);
+- if (!skb)
+- return -ENOMEM;
+-
+- pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+- pkt->client.node = cpu_to_le32(cb->dst_node);
+- pkt->client.port = cpu_to_le32(cb->dst_port);
+-
+- ret = qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, &local, &remote);
+-
+- qrtr_node_release(node);
+-
+- return ret;
+-}
+-
+-static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
+- size_t size, int flags)
+-{
+- DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
+- struct sock *sk = sock->sk;
+- struct sk_buff *skb;
+- struct qrtr_cb *cb;
+- int copied, rc;
+-
+- lock_sock(sk);
+-
+- if (sock_flag(sk, SOCK_ZAPPED)) {
+- release_sock(sk);
+- return -EADDRNOTAVAIL;
+- }
+-
+- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+- flags & MSG_DONTWAIT, &rc);
+- if (!skb) {
+- release_sock(sk);
+- return rc;
+- }
+- cb = (struct qrtr_cb *)skb->cb;
+-
+- copied = skb->len;
+- if (copied > size) {
+- copied = size;
+- msg->msg_flags |= MSG_TRUNC;
+- }
+-
+- rc = skb_copy_datagram_msg(skb, 0, msg, copied);
+- if (rc < 0)
+- goto out;
+- rc = copied;
+-
+- if (addr) {
+- /* There is an anonymous 2-byte hole after sq_family,
+- * make sure to clear it.
+- */
+- memset(addr, 0, sizeof(*addr));
+-
+- addr->sq_family = AF_QIPCRTR;
+- addr->sq_node = cb->src_node;
+- addr->sq_port = cb->src_port;
+- msg->msg_namelen = sizeof(*addr);
+- }
+-
+-out:
+- if (cb->confirm_rx)
+- qrtr_send_resume_tx(cb);
+-
+- skb_free_datagram(sk, skb);
+- release_sock(sk);
+-
+- return rc;
+-}
+-
+-static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
+- int len, int flags)
+-{
+- DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
+- struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+- struct sock *sk = sock->sk;
+- int rc;
+-
+- if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
+- return -EINVAL;
+-
+- lock_sock(sk);
+-
+- sk->sk_state = TCP_CLOSE;
+- sock->state = SS_UNCONNECTED;
+-
+- rc = qrtr_autobind(sock);
+- if (rc) {
+- release_sock(sk);
+- return rc;
+- }
+-
+- ipc->peer = *addr;
+- sock->state = SS_CONNECTED;
+- sk->sk_state = TCP_ESTABLISHED;
+-
+- release_sock(sk);
+-
+- return 0;
+-}
+-
+-static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
+- int peer)
+-{
+- struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+- struct sockaddr_qrtr qaddr;
+- struct sock *sk = sock->sk;
+-
+- lock_sock(sk);
+- if (peer) {
+- if (sk->sk_state != TCP_ESTABLISHED) {
+- release_sock(sk);
+- return -ENOTCONN;
+- }
+-
+- qaddr = ipc->peer;
+- } else {
+- qaddr = ipc->us;
+- }
+- release_sock(sk);
+-
+- qaddr.sq_family = AF_QIPCRTR;
+-
+- memcpy(saddr, &qaddr, sizeof(qaddr));
+-
+- return sizeof(qaddr);
+-}
+-
+-static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+-{
+- void __user *argp = (void __user *)arg;
+- struct qrtr_sock *ipc = qrtr_sk(sock->sk);
+- struct sock *sk = sock->sk;
+- struct sockaddr_qrtr *sq;
+- struct sk_buff *skb;
+- struct ifreq ifr;
+- long len = 0;
+- int rc = 0;
+-
+- lock_sock(sk);
+-
+- switch (cmd) {
+- case TIOCOUTQ:
+- len = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+- if (len < 0)
+- len = 0;
+- rc = put_user(len, (int __user *)argp);
+- break;
+- case TIOCINQ:
+- skb = skb_peek(&sk->sk_receive_queue);
+- if (skb)
+- len = skb->len;
+- rc = put_user(len, (int __user *)argp);
+- break;
+- case SIOCGIFADDR:
+- if (get_user_ifreq(&ifr, NULL, argp)) {
+- rc = -EFAULT;
+- break;
+- }
+-
+- sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
+- *sq = ipc->us;
+- if (put_user_ifreq(&ifr, argp)) {
+- rc = -EFAULT;
+- break;
+- }
+- break;
+- case SIOCADDRT:
+- case SIOCDELRT:
+- case SIOCSIFADDR:
+- case SIOCGIFDSTADDR:
+- case SIOCSIFDSTADDR:
+- case SIOCGIFBRDADDR:
+- case SIOCSIFBRDADDR:
+- case SIOCGIFNETMASK:
+- case SIOCSIFNETMASK:
+- rc = -EINVAL;
+- break;
+- default:
+- rc = -ENOIOCTLCMD;
+- break;
+- }
+-
+- release_sock(sk);
+-
+- return rc;
+-}
+-
+-static int qrtr_release(struct socket *sock)
+-{
+- struct sock *sk = sock->sk;
+- struct qrtr_sock *ipc;
+-
+- if (!sk)
+- return 0;
+-
+- lock_sock(sk);
+-
+- ipc = qrtr_sk(sk);
+- sk->sk_shutdown = SHUTDOWN_MASK;
+- if (!sock_flag(sk, SOCK_DEAD))
+- sk->sk_state_change(sk);
+-
+- sock_set_flag(sk, SOCK_DEAD);
+- sock_orphan(sk);
+- sock->sk = NULL;
+-
+- if (!sock_flag(sk, SOCK_ZAPPED))
+- qrtr_port_remove(ipc);
+-
+- skb_queue_purge(&sk->sk_receive_queue);
+-
+- release_sock(sk);
+- sock_put(sk);
+-
+- return 0;
+-}
+-
+-static const struct proto_ops qrtr_proto_ops = {
+- .owner = THIS_MODULE,
+- .family = AF_QIPCRTR,
+- .bind = qrtr_bind,
+- .connect = qrtr_connect,
+- .socketpair = sock_no_socketpair,
+- .accept = sock_no_accept,
+- .listen = sock_no_listen,
+- .sendmsg = qrtr_sendmsg,
+- .recvmsg = qrtr_recvmsg,
+- .getname = qrtr_getname,
+- .ioctl = qrtr_ioctl,
+- .gettstamp = sock_gettstamp,
+- .poll = datagram_poll,
+- .shutdown = sock_no_shutdown,
+- .release = qrtr_release,
+- .mmap = sock_no_mmap,
+- .sendpage = sock_no_sendpage,
+-};
+-
+-static struct proto qrtr_proto = {
+- .name = "QIPCRTR",
+- .owner = THIS_MODULE,
+- .obj_size = sizeof(struct qrtr_sock),
+-};
+-
+-static int qrtr_create(struct net *net, struct socket *sock,
+- int protocol, int kern)
+-{
+- struct qrtr_sock *ipc;
+- struct sock *sk;
+-
+- if (sock->type != SOCK_DGRAM)
+- return -EPROTOTYPE;
+-
+- sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern);
+- if (!sk)
+- return -ENOMEM;
+-
+- sock_set_flag(sk, SOCK_ZAPPED);
+-
+- sock_init_data(sock, sk);
+- sock->ops = &qrtr_proto_ops;
+-
+- ipc = qrtr_sk(sk);
+- ipc->us.sq_family = AF_QIPCRTR;
+- ipc->us.sq_node = qrtr_local_nid;
+- ipc->us.sq_port = 0;
+-
+- return 0;
+-}
+-
+-static const struct net_proto_family qrtr_family = {
+- .owner = THIS_MODULE,
+- .family = AF_QIPCRTR,
+- .create = qrtr_create,
+-};
+-
+-static int __init qrtr_proto_init(void)
+-{
+- int rc;
+-
+- rc = proto_register(&qrtr_proto, 1);
+- if (rc)
+- return rc;
+-
+- rc = sock_register(&qrtr_family);
+- if (rc)
+- goto err_proto;
+-
+- rc = qrtr_ns_init();
+- if (rc)
+- goto err_sock;
+-
+- return 0;
+-
+-err_sock:
+- sock_unregister(qrtr_family.family);
+-err_proto:
+- proto_unregister(&qrtr_proto);
+- return rc;
+-}
+-postcore_initcall(qrtr_proto_init);
+-
+-static void __exit qrtr_proto_fini(void)
+-{
+- qrtr_ns_remove();
+- sock_unregister(qrtr_family.family);
+- proto_unregister(&qrtr_proto);
+-}
+-module_exit(qrtr_proto_fini);
+-
+-MODULE_DESCRIPTION("Qualcomm IPC-router driver");
+-MODULE_LICENSE("GPL v2");
+-MODULE_ALIAS_NETPROTO(PF_QIPCRTR);
+diff --git a/net/rds/connection.c b/net/rds/connection.c
+index a3bc4b54d4910..b4cc699c5fad3 100644
+--- a/net/rds/connection.c
++++ b/net/rds/connection.c
+@@ -253,6 +253,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
+ * should end up here, but if it
+ * does, reset/destroy the connection.
+ */
++ kfree(conn->c_path);
+ kmem_cache_free(rds_conn_slab, conn);
+ conn = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
+index 6fdedd9dbbc28..cfbf0e129cba5 100644
+--- a/net/rds/ib_recv.c
++++ b/net/rds/ib_recv.c
+@@ -363,6 +363,7 @@ static int acquire_refill(struct rds_connection *conn)
+ static void release_refill(struct rds_connection *conn)
+ {
+ clear_bit(RDS_RECV_REFILL, &conn->c_flags);
++ smp_mb__after_atomic();
+
+ /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
+ * hot path and finding waiters is very rare. We don't want to walk
+diff --git a/net/rds/message.c b/net/rds/message.c
+index 799034e0f513d..8fa3d19c2e667 100644
+--- a/net/rds/message.c
++++ b/net/rds/message.c
+@@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+ spin_lock_irqsave(&q->lock, flags);
+ head = &q->zcookie_head;
+ if (!list_empty(head)) {
+- info = list_entry(head, struct rds_msg_zcopy_info,
+- rs_zcookie_next);
+- if (info && rds_zcookie_add(info, cookie)) {
++ info = list_first_entry(head, struct rds_msg_zcopy_info,
++ rs_zcookie_next);
++ if (rds_zcookie_add(info, cookie)) {
+ spin_unlock_irqrestore(&q->lock, flags);
+ kfree(rds_info_from_znotifier(znotif));
+ /* caller invokes rds_wake_sk_sleep() */
+@@ -118,7 +118,7 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+ ck = &info->zcookies;
+ memset(ck, 0, sizeof(*ck));
+ WARN_ON(!rds_zcookie_add(info, cookie));
+- list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
++ list_add_tail(&info->rs_zcookie_next, &q->zcookie_head);
+
+ spin_unlock_irqrestore(&q->lock, flags);
+ /* caller invokes rds_wake_sk_sleep() */
+diff --git a/net/rds/tcp.c b/net/rds/tcp.c
+index abf19c0e3ba0b..b560d06e6d96d 100644
+--- a/net/rds/tcp.c
++++ b/net/rds/tcp.c
+@@ -166,10 +166,10 @@ void rds_tcp_reset_callbacks(struct socket *sock,
+ */
+ atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
+ wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
+- lock_sock(osock->sk);
+ /* reset receive side state for rds_tcp_data_recv() for osock */
+ cancel_delayed_work_sync(&cp->cp_send_w);
+ cancel_delayed_work_sync(&cp->cp_recv_w);
++ lock_sock(osock->sk);
+ if (tc->t_tinc) {
+ rds_inc_put(&tc->t_tinc->ti_inc);
+ tc->t_tinc = NULL;
+@@ -500,7 +500,7 @@ void rds_tcp_tune(struct socket *sock)
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ }
+ if (rtn->rcvbuf_size > 0) {
+- sk->sk_sndbuf = rtn->rcvbuf_size;
++ sk->sk_rcvbuf = rtn->rcvbuf_size;
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ }
+ release_sock(sk);
+diff --git a/net/rfkill/core.c b/net/rfkill/core.c
+index ac15a944573f7..068c7bcd30c94 100644
+--- a/net/rfkill/core.c
++++ b/net/rfkill/core.c
+@@ -78,6 +78,7 @@ struct rfkill_data {
+ struct mutex mtx;
+ wait_queue_head_t read_wait;
+ bool input_handler;
++ u8 max_size;
+ };
+
+
+@@ -1141,6 +1142,8 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
+ if (!data)
+ return -ENOMEM;
+
++ data->max_size = RFKILL_EVENT_SIZE_V1;
++
+ INIT_LIST_HEAD(&data->events);
+ mutex_init(&data->mtx);
+ init_waitqueue_head(&data->read_wait);
+@@ -1223,6 +1226,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
+ list);
+
+ sz = min_t(unsigned long, sizeof(ev->ev), count);
++ sz = min_t(unsigned long, sz, data->max_size);
+ ret = sz;
+ if (copy_to_user(buf, &ev->ev, sz))
+ ret = -EFAULT;
+@@ -1237,6 +1241,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
+ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+ {
++ struct rfkill_data *data = file->private_data;
+ struct rfkill *rfkill;
+ struct rfkill_event_ext ev;
+ int ret;
+@@ -1251,6 +1256,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
+ * our API version even in a write() call, if it cares.
+ */
+ count = min(count, sizeof(ev));
++ count = min_t(size_t, count, data->max_size);
+ if (copy_from_user(&ev, buf, count))
+ return -EFAULT;
+
+@@ -1310,31 +1316,47 @@ static int rfkill_fop_release(struct inode *inode, struct file *file)
+ return 0;
+ }
+
+-#ifdef CONFIG_RFKILL_INPUT
+ static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+ {
+ struct rfkill_data *data = file->private_data;
++ int ret = -ENOSYS;
++ u32 size;
+
+ if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
+ return -ENOSYS;
+
+- if (_IOC_NR(cmd) != RFKILL_IOC_NOINPUT)
+- return -ENOSYS;
+-
+ mutex_lock(&data->mtx);
+-
+- if (!data->input_handler) {
+- if (atomic_inc_return(&rfkill_input_disabled) == 1)
+- printk(KERN_DEBUG "rfkill: input handler disabled\n");
+- data->input_handler = true;
++ switch (_IOC_NR(cmd)) {
++#ifdef CONFIG_RFKILL_INPUT
++ case RFKILL_IOC_NOINPUT:
++ if (!data->input_handler) {
++ if (atomic_inc_return(&rfkill_input_disabled) == 1)
++ printk(KERN_DEBUG "rfkill: input handler disabled\n");
++ data->input_handler = true;
++ }
++ ret = 0;
++ break;
++#endif
++ case RFKILL_IOC_MAX_SIZE:
++ if (get_user(size, (__u32 __user *)arg)) {
++ ret = -EFAULT;
++ break;
++ }
++ if (size < RFKILL_EVENT_SIZE_V1 || size > U8_MAX) {
++ ret = -EINVAL;
++ break;
++ }
++ data->max_size = size;
++ ret = 0;
++ break;
++ default:
++ break;
+ }
+-
+ mutex_unlock(&data->mtx);
+
+- return 0;
++ return ret;
+ }
+-#endif
+
+ static const struct file_operations rfkill_fops = {
+ .owner = THIS_MODULE,
+@@ -1343,10 +1365,8 @@ static const struct file_operations rfkill_fops = {
+ .write = rfkill_fop_write,
+ .poll = rfkill_fop_poll,
+ .release = rfkill_fop_release,
+-#ifdef CONFIG_RFKILL_INPUT
+ .unlocked_ioctl = rfkill_fop_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+-#endif
+ .llseek = no_llseek,
+ };
+
+diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
+index cf7d974e0f619..86c93cf1744b0 100644
+--- a/net/rose/af_rose.c
++++ b/net/rose/af_rose.c
+@@ -191,6 +191,7 @@ static void rose_kill_by_device(struct net_device *dev)
+ rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
+ if (rose->neighbour)
+ rose->neighbour->use--;
++ dev_put(rose->device);
+ rose->device = NULL;
+ }
+ }
+@@ -486,6 +487,12 @@ static int rose_listen(struct socket *sock, int backlog)
+ {
+ struct sock *sk = sock->sk;
+
++ lock_sock(sk);
++ if (sock->state != SS_UNCONNECTED) {
++ release_sock(sk);
++ return -EINVAL;
++ }
++
+ if (sk->sk_state != TCP_LISTEN) {
+ struct rose_sock *rose = rose_sk(sk);
+
+@@ -495,8 +502,10 @@ static int rose_listen(struct socket *sock, int backlog)
+ memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_state = TCP_LISTEN;
++ release_sock(sk);
+ return 0;
+ }
++ release_sock(sk);
+
+ return -EOPNOTSUPP;
+ }
+@@ -591,6 +600,8 @@ static struct sock *rose_make_new(struct sock *osk)
+ rose->idle = orose->idle;
+ rose->defer = orose->defer;
+ rose->device = orose->device;
++ if (rose->device)
++ dev_hold(rose->device);
+ rose->qbitincl = orose->qbitincl;
+
+ return sk;
+@@ -644,6 +655,7 @@ static int rose_release(struct socket *sock)
+ break;
+ }
+
++ dev_put(rose->device);
+ sock->sk = NULL;
+ release_sock(sk);
+ sock_put(sk);
+@@ -720,7 +732,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
+ struct rose_sock *rose = rose_sk(sk);
+ struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
+ unsigned char cause, diagnostic;
+- struct net_device *dev;
+ ax25_uid_assoc *user;
+ int n, err = 0;
+
+@@ -777,9 +788,12 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
+ }
+
+ if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
++ struct net_device *dev;
++
+ sock_reset_flag(sk, SOCK_ZAPPED);
+
+- if ((dev = rose_dev_first()) == NULL) {
++ dev = rose_dev_first();
++ if (!dev) {
+ err = -ENETUNREACH;
+ goto out_release;
+ }
+@@ -787,6 +801,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
+ user = ax25_findbyuid(current_euid());
+ if (!user) {
+ err = -EINVAL;
++ dev_put(dev);
+ goto out_release;
+ }
+
+diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
+index f6102e6f51617..730d2205f1976 100644
+--- a/net/rose/rose_link.c
++++ b/net/rose/rose_link.c
+@@ -236,6 +236,9 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns
+ unsigned char *dptr;
+ int len;
+
++ if (!neigh->dev)
++ return;
++
+ len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
+
+ if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
+index 11c45c8c6c164..036d92c0ad794 100644
+--- a/net/rose/rose_loopback.c
++++ b/net/rose/rose_loopback.c
+@@ -96,7 +96,8 @@ static void rose_loopback_timer(struct timer_list *unused)
+ }
+
+ if (frametype == ROSE_CALL_REQUEST) {
+- if (!rose_loopback_neigh->dev) {
++ if (!rose_loopback_neigh->dev &&
++ !rose_loopback_neigh->loopback) {
+ kfree_skb(skb);
+ continue;
+ }
+diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
+index c0e04c261a156..66aa05db5390f 100644
+--- a/net/rose/rose_route.c
++++ b/net/rose/rose_route.c
+@@ -227,8 +227,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
+ {
+ struct rose_neigh *s;
+
+- rose_stop_ftimer(rose_neigh);
+- rose_stop_t0timer(rose_neigh);
++ del_timer_sync(&rose_neigh->ftimer);
++ del_timer_sync(&rose_neigh->t0timer);
+
+ skb_queue_purge(&rose_neigh->queue);
+
+@@ -615,6 +615,8 @@ struct net_device *rose_dev_first(void)
+ if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
+ first = dev;
+ }
++ if (first)
++ dev_hold(first);
+ rcu_read_unlock();
+
+ return first;
+diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
+index b3138fc2e552e..f06ddbed3fed6 100644
+--- a/net/rose/rose_timer.c
++++ b/net/rose/rose_timer.c
+@@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *);
+
+ void rose_start_heartbeat(struct sock *sk)
+ {
+- del_timer(&sk->sk_timer);
++ sk_stop_timer(sk, &sk->sk_timer);
+
+ sk->sk_timer.function = rose_heartbeat_expiry;
+ sk->sk_timer.expires = jiffies + 5 * HZ;
+
+- add_timer(&sk->sk_timer);
++ sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
+ }
+
+ void rose_start_t1timer(struct sock *sk)
+ {
+ struct rose_sock *rose = rose_sk(sk);
+
+- del_timer(&rose->timer);
++ sk_stop_timer(sk, &rose->timer);
+
+ rose->timer.function = rose_timer_expiry;
+ rose->timer.expires = jiffies + rose->t1;
+
+- add_timer(&rose->timer);
++ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
+ }
+
+ void rose_start_t2timer(struct sock *sk)
+ {
+ struct rose_sock *rose = rose_sk(sk);
+
+- del_timer(&rose->timer);
++ sk_stop_timer(sk, &rose->timer);
+
+ rose->timer.function = rose_timer_expiry;
+ rose->timer.expires = jiffies + rose->t2;
+
+- add_timer(&rose->timer);
++ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
+ }
+
+ void rose_start_t3timer(struct sock *sk)
+ {
+ struct rose_sock *rose = rose_sk(sk);
+
+- del_timer(&rose->timer);
++ sk_stop_timer(sk, &rose->timer);
+
+ rose->timer.function = rose_timer_expiry;
+ rose->timer.expires = jiffies + rose->t3;
+
+- add_timer(&rose->timer);
++ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
+ }
+
+ void rose_start_hbtimer(struct sock *sk)
+ {
+ struct rose_sock *rose = rose_sk(sk);
+
+- del_timer(&rose->timer);
++ sk_stop_timer(sk, &rose->timer);
+
+ rose->timer.function = rose_timer_expiry;
+ rose->timer.expires = jiffies + rose->hb;
+
+- add_timer(&rose->timer);
++ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
+ }
+
+ void rose_start_idletimer(struct sock *sk)
+ {
+ struct rose_sock *rose = rose_sk(sk);
+
+- del_timer(&rose->idletimer);
++ sk_stop_timer(sk, &rose->idletimer);
+
+ if (rose->idle > 0) {
+ rose->idletimer.function = rose_idletimer_expiry;
+ rose->idletimer.expires = jiffies + rose->idle;
+
+- add_timer(&rose->idletimer);
++ sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
+ }
+ }
+
+ void rose_stop_heartbeat(struct sock *sk)
+ {
+- del_timer(&sk->sk_timer);
++ sk_stop_timer(sk, &sk->sk_timer);
+ }
+
+ void rose_stop_timer(struct sock *sk)
+ {
+- del_timer(&rose_sk(sk)->timer);
++ sk_stop_timer(sk, &rose_sk(sk)->timer);
+ }
+
+ void rose_stop_idletimer(struct sock *sk)
+ {
+- del_timer(&rose_sk(sk)->idletimer);
++ sk_stop_timer(sk, &rose_sk(sk)->idletimer);
+ }
+
+ static void rose_heartbeat_expiry(struct timer_list *t)
+@@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
+ (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
+ bh_unlock_sock(sk);
+ rose_destroy_socket(sk);
++ sock_put(sk);
+ return;
+ }
+ break;
+@@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
+
+ rose_start_heartbeat(sk);
+ bh_unlock_sock(sk);
++ sock_put(sk);
+ }
+
+ static void rose_timer_expiry(struct timer_list *t)
+@@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t)
+ break;
+ }
+ bh_unlock_sock(sk);
++ sock_put(sk);
+ }
+
+ static void rose_idletimer_expiry(struct timer_list *t)
+@@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t)
+ sock_set_flag(sk, SOCK_DEAD);
+ }
+ bh_unlock_sock(sk);
++ sock_put(sk);
+ }
+diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
+index 2b5f89713e365..ceba28e9dce62 100644
+--- a/net/rxrpc/af_rxrpc.c
++++ b/net/rxrpc/af_rxrpc.c
+@@ -351,7 +351,7 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+ */
+ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
+ {
+- _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
++ _enter("%d{%d}", call->debug_id, refcount_read(&call->ref));
+
+ mutex_lock(&call->user_mutex);
+ rxrpc_release_call(rxrpc_sk(sock->sk), call);
+diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
+index 7bd6f8a66a3ef..e0123efa2a623 100644
+--- a/net/rxrpc/ar-internal.h
++++ b/net/rxrpc/ar-internal.h
+@@ -15,14 +15,6 @@
+ #include <keys/rxrpc-type.h>
+ #include "protocol.h"
+
+-#if 0
+-#define CHECK_SLAB_OKAY(X) \
+- BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
+- (POISON_FREE << 8 | POISON_FREE))
+-#else
+-#define CHECK_SLAB_OKAY(X) do {} while (0)
+-#endif
+-
+ #define FCRYPT_BSIZE 8
+ struct rxrpc_crypt {
+ union {
+@@ -68,7 +60,7 @@ struct rxrpc_net {
+ struct proc_dir_entry *proc_net; /* Subdir in /proc/net */
+ u32 epoch; /* Local epoch for detecting local-end reset */
+ struct list_head calls; /* List of calls active in this namespace */
+- rwlock_t call_lock; /* Lock for ->calls */
++ spinlock_t call_lock; /* Lock for ->calls */
+ atomic_t nr_calls; /* Count of allocated calls */
+
+ atomic_t nr_conns;
+@@ -88,7 +80,7 @@ struct rxrpc_net {
+ struct work_struct client_conn_reaper;
+ struct timer_list client_conn_reap_timer;
+
+- struct list_head local_endpoints;
++ struct hlist_head local_endpoints;
+ struct mutex local_mutex; /* Lock for ->local_endpoints */
+
+ DECLARE_HASHTABLE (peer_hash, 10);
+@@ -279,9 +271,9 @@ struct rxrpc_security {
+ struct rxrpc_local {
+ struct rcu_head rcu;
+ atomic_t active_users; /* Number of users of the local endpoint */
+- atomic_t usage; /* Number of references to the structure */
++ refcount_t ref; /* Number of references to the structure */
+ struct rxrpc_net *rxnet; /* The network ns in which this resides */
+- struct list_head link;
++ struct hlist_node link;
+ struct socket *socket; /* my UDP socket */
+ struct work_struct processor;
+ struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
+@@ -304,7 +296,7 @@ struct rxrpc_local {
+ */
+ struct rxrpc_peer {
+ struct rcu_head rcu; /* This must be first */
+- atomic_t usage;
++ refcount_t ref;
+ unsigned long hash_key;
+ struct hlist_node hash_link;
+ struct rxrpc_local *local;
+@@ -406,7 +398,8 @@ enum rxrpc_conn_proto_state {
+ */
+ struct rxrpc_bundle {
+ struct rxrpc_conn_parameters params;
+- atomic_t usage;
++ refcount_t ref;
++ atomic_t active; /* Number of active users */
+ unsigned int debug_id;
+ bool try_upgrade; /* True if the bundle is attempting upgrade */
+ bool alloc_conn; /* True if someone's getting a conn */
+@@ -427,7 +420,7 @@ struct rxrpc_connection {
+ struct rxrpc_conn_proto proto;
+ struct rxrpc_conn_parameters params;
+
+- atomic_t usage;
++ refcount_t ref;
+ struct rcu_head rcu;
+ struct list_head cache_link;
+
+@@ -609,7 +602,7 @@ struct rxrpc_call {
+ int error; /* Local error incurred */
+ enum rxrpc_call_state state; /* current state of call */
+ enum rxrpc_call_completion completion; /* Call completion condition */
+- atomic_t usage;
++ refcount_t ref;
+ u16 service_id; /* service ID */
+ u8 security_ix; /* Security type */
+ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
+@@ -676,13 +669,12 @@ struct rxrpc_call {
+
+ spinlock_t input_lock; /* Lock for packet input to this call */
+
+- /* receive-phase ACK management */
++ /* Receive-phase ACK management (ACKs we send). */
+ u8 ackr_reason; /* reason to ACK */
+ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
+- rxrpc_serial_t ackr_first_seq; /* first sequence number received */
+- rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
+- rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
+- rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
++ rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */
++ atomic_t ackr_nr_unacked; /* Number of unacked packets */
++ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
+
+ /* RTT management */
+ rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */
+@@ -692,8 +684,10 @@ struct rxrpc_call {
+ #define RXRPC_CALL_RTT_AVAIL_MASK 0xf
+ #define RXRPC_CALL_RTT_PEND_SHIFT 8
+
+- /* transmission-phase ACK management */
++ /* Transmission-phase ACK management (ACKs we've received). */
+ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
++ rxrpc_seq_t acks_first_seq; /* first sequence number received */
++ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
+ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
+ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
+ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
+@@ -777,14 +771,12 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
+ enum rxrpc_propose_ack_trace);
+ void rxrpc_process_call(struct work_struct *);
+
+-static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+- unsigned long expire_at,
+- unsigned long now,
+- enum rxrpc_timer_trace why)
+-{
+- trace_rxrpc_timer(call, why, now);
+- timer_reduce(&call->timer, expire_at);
+-}
++void rxrpc_reduce_call_timer(struct rxrpc_call *call,
++ unsigned long expire_at,
++ unsigned long now,
++ enum rxrpc_timer_trace why);
++
++void rxrpc_delete_call_timer(struct rxrpc_call *call);
+
+ /*
+ * call_object.c
+@@ -808,6 +800,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
+ bool __rxrpc_queue_call(struct rxrpc_call *);
+ bool rxrpc_queue_call(struct rxrpc_call *);
+ void rxrpc_see_call(struct rxrpc_call *);
++bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op);
+ void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
+ void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
+ void rxrpc_cleanup_call(struct rxrpc_call *);
+@@ -990,6 +983,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
+ /*
+ * peer_event.c
+ */
++void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
+ void rxrpc_error_report(struct sock *);
+ void rxrpc_peer_keepalive_worker(struct work_struct *);
+
+@@ -1015,6 +1009,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *);
+ extern const struct seq_operations rxrpc_call_seq_ops;
+ extern const struct seq_operations rxrpc_connection_seq_ops;
+ extern const struct seq_operations rxrpc_peer_seq_ops;
++extern const struct seq_operations rxrpc_local_seq_ops;
+
+ /*
+ * recvmsg.c
+diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
+index 1ae90fb979362..99e10eea37321 100644
+--- a/net/rxrpc/call_accept.c
++++ b/net/rxrpc/call_accept.c
+@@ -91,7 +91,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
+ (head + 1) & (size - 1));
+
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
+- atomic_read(&conn->usage), here);
++ refcount_read(&conn->ref), here);
+ }
+
+ /* Now it gets complicated, because calls get registered with the
+@@ -104,7 +104,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
+ call->state = RXRPC_CALL_SERVER_PREALLOC;
+
+ trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
+- atomic_read(&call->usage),
++ refcount_read(&call->ref),
+ here, (const void *)user_call_ID);
+
+ write_lock(&rx->call_lock);
+@@ -140,9 +140,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
+ write_unlock(&rx->call_lock);
+
+ rxnet = call->rxnet;
+- write_lock(&rxnet->call_lock);
+- list_add_tail(&call->link, &rxnet->calls);
+- write_unlock(&rxnet->call_lock);
++ spin_lock_bh(&rxnet->call_lock);
++ list_add_tail_rcu(&call->link, &rxnet->calls);
++ spin_unlock_bh(&rxnet->call_lock);
+
+ b->call_backlog[call_head] = call;
+ smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
+diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
+index 6be2672a65eab..2a93e7b5fbd05 100644
+--- a/net/rxrpc/call_event.c
++++ b/net/rxrpc/call_event.c
+@@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
+ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+ {
+ struct sk_buff *skb;
+- unsigned long resend_at, rto_j;
++ unsigned long resend_at;
+ rxrpc_seq_t cursor, seq, top;
+ ktime_t now, max_age, oldest, ack_ts;
+ int ix;
+@@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+
+ _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
+
+- rto_j = call->peer->rto_j;
+-
+ now = ktime_get_real();
+- max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
++ max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
+
+ spin_lock_bh(&call->lock);
+
+@@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+ }
+
+ resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
+- resend_at += jiffies + rto_j;
++ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
+ WRITE_ONCE(call->resend_at, resend_at);
+
+ if (unacked)
+@@ -312,7 +310,7 @@ recheck_state:
+ }
+
+ if (call->state == RXRPC_CALL_COMPLETE) {
+- del_timer_sync(&call->timer);
++ rxrpc_delete_call_timer(call);
+ goto out_put;
+ }
+
+@@ -379,9 +377,9 @@ recheck_state:
+ if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
+ (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
+ trace_rxrpc_call_reset(call);
+- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET);
++ rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
+ } else {
+- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
++ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+ }
+ set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+ goto recheck_state;
+@@ -408,7 +406,8 @@ recheck_state:
+ goto recheck_state;
+ }
+
+- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
++ if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
++ call->state != RXRPC_CALL_CLIENT_RECV_REPLY) {
+ rxrpc_resend(call, now);
+ goto recheck_state;
+ }
+diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
+index 4eb91d958a48d..6401cdf7a6246 100644
+--- a/net/rxrpc/call_object.c
++++ b/net/rxrpc/call_object.c
+@@ -53,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
+- rxrpc_queue_call(call);
++ __rxrpc_queue_call(call);
++ } else {
++ rxrpc_put_call(call, rxrpc_call_put);
+ }
+ }
+
++void rxrpc_reduce_call_timer(struct rxrpc_call *call,
++ unsigned long expire_at,
++ unsigned long now,
++ enum rxrpc_timer_trace why)
++{
++ if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) {
++ trace_rxrpc_timer(call, why, now);
++ if (timer_reduce(&call->timer, expire_at))
++ rxrpc_put_call(call, rxrpc_call_put_notimer);
++ }
++}
++
++void rxrpc_delete_call_timer(struct rxrpc_call *call)
++{
++ if (del_timer_sync(&call->timer))
++ rxrpc_put_call(call, rxrpc_call_put_timer);
++}
++
+ static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
+
+ /*
+@@ -92,7 +112,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
+ found_extant_call:
+ rxrpc_get_call(call, rxrpc_call_got);
+ read_unlock(&rx->call_lock);
+- _leave(" = %p [%d]", call, atomic_read(&call->usage));
++ _leave(" = %p [%d]", call, refcount_read(&call->ref));
+ return call;
+ }
+
+@@ -140,7 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
+ spin_lock_init(&call->notify_lock);
+ spin_lock_init(&call->input_lock);
+ rwlock_init(&call->state_lock);
+- atomic_set(&call->usage, 1);
++ refcount_set(&call->ref, 1);
+ call->debug_id = debug_id;
+ call->tx_total_len = -1;
+ call->next_rx_timo = 20 * HZ;
+@@ -265,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+ _enter("%p,%lx", rx, p->user_call_ID);
+
+ limiter = rxrpc_get_call_slot(p, gfp);
+- if (!limiter)
++ if (!limiter) {
++ release_sock(&rx->sk);
+ return ERR_PTR(-ERESTARTSYS);
++ }
+
+ call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
+ if (IS_ERR(call)) {
+@@ -279,7 +301,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+ call->interruptibility = p->interruptibility;
+ call->tx_total_len = p->tx_total_len;
+ trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
+- atomic_read(&call->usage),
++ refcount_read(&call->ref),
+ here, (const void *)p->user_call_ID);
+ if (p->kernel)
+ __set_bit(RXRPC_CALL_KERNEL, &call->flags);
+@@ -317,9 +339,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+ write_unlock(&rx->call_lock);
+
+ rxnet = call->rxnet;
+- write_lock(&rxnet->call_lock);
+- list_add_tail(&call->link, &rxnet->calls);
+- write_unlock(&rxnet->call_lock);
++ spin_lock_bh(&rxnet->call_lock);
++ list_add_tail_rcu(&call->link, &rxnet->calls);
++ spin_unlock_bh(&rxnet->call_lock);
+
+ /* From this point on, the call is protected by its own lock. */
+ release_sock(&rx->sk);
+@@ -332,7 +354,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+ goto error_attached_to_socket;
+
+ trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
+- atomic_read(&call->usage), here, NULL);
++ refcount_read(&call->ref), here, NULL);
+
+ rxrpc_start_call_timer(call);
+
+@@ -352,7 +374,7 @@ error_dup_user_ID:
+ __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_CALL_DEAD, -EEXIST);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+- atomic_read(&call->usage), here, ERR_PTR(-EEXIST));
++ refcount_read(&call->ref), here, ERR_PTR(-EEXIST));
+ rxrpc_release_call(rx, call);
+ mutex_unlock(&call->user_mutex);
+ rxrpc_put_call(call, rxrpc_call_put);
+@@ -366,7 +388,7 @@ error_dup_user_ID:
+ */
+ error_attached_to_socket:
+ trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+- atomic_read(&call->usage), here, ERR_PTR(ret));
++ refcount_read(&call->ref), here, ERR_PTR(ret));
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_CALL_DEAD, ret);
+@@ -422,8 +444,9 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
+ bool rxrpc_queue_call(struct rxrpc_call *call)
+ {
+ const void *here = __builtin_return_address(0);
+- int n = atomic_fetch_add_unless(&call->usage, 1, 0);
+- if (n == 0)
++ int n;
++
++ if (!__refcount_inc_not_zero(&call->ref, &n))
+ return false;
+ if (rxrpc_queue_work(&call->processor))
+ trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
+@@ -439,7 +462,7 @@ bool rxrpc_queue_call(struct rxrpc_call *call)
+ bool __rxrpc_queue_call(struct rxrpc_call *call)
+ {
+ const void *here = __builtin_return_address(0);
+- int n = atomic_read(&call->usage);
++ int n = refcount_read(&call->ref);
+ ASSERTCMP(n, >=, 1);
+ if (rxrpc_queue_work(&call->processor))
+ trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
+@@ -456,22 +479,34 @@ void rxrpc_see_call(struct rxrpc_call *call)
+ {
+ const void *here = __builtin_return_address(0);
+ if (call) {
+- int n = atomic_read(&call->usage);
++ int n = refcount_read(&call->ref);
+
+ trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
+ here, NULL);
+ }
+ }
+
++bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
++{
++ const void *here = __builtin_return_address(0);
++ int n;
++
++ if (!__refcount_inc_not_zero(&call->ref, &n))
++ return false;
++ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
++ return true;
++}
++
+ /*
+ * Note the addition of a ref on a call.
+ */
+ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+ {
+ const void *here = __builtin_return_address(0);
+- int n = atomic_inc_return(&call->usage);
++ int n;
+
+- trace_rxrpc_call(call->debug_id, op, n, here, NULL);
++ __refcount_inc(&call->ref, &n);
++ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
+ }
+
+ /*
+@@ -496,10 +531,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
+ struct rxrpc_connection *conn = call->conn;
+ bool put = false;
+
+- _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
++ _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
+
+ trace_rxrpc_call(call->debug_id, rxrpc_call_release,
+- atomic_read(&call->usage),
++ refcount_read(&call->ref),
+ here, (const void *)call->flags);
+
+ ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+@@ -510,8 +545,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
+ spin_unlock_bh(&call->lock);
+
+ rxrpc_put_call_slot(call);
+-
+- del_timer_sync(&call->timer);
++ rxrpc_delete_call_timer(call);
+
+ /* Make sure we don't get any more notifications */
+ write_lock_bh(&rx->recvmsg_lock);
+@@ -589,21 +623,21 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+ struct rxrpc_net *rxnet = call->rxnet;
+ const void *here = __builtin_return_address(0);
+ unsigned int debug_id = call->debug_id;
++ bool dead;
+ int n;
+
+ ASSERT(call != NULL);
+
+- n = atomic_dec_return(&call->usage);
++ dead = __refcount_dec_and_test(&call->ref, &n);
+ trace_rxrpc_call(debug_id, op, n, here, NULL);
+- ASSERTCMP(n, >=, 0);
+- if (n == 0) {
++ if (dead) {
+ _debug("call %d dead", call->debug_id);
+ ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+
+ if (!list_empty(&call->link)) {
+- write_lock(&rxnet->call_lock);
++ spin_lock_bh(&rxnet->call_lock);
+ list_del_init(&call->link);
+- write_unlock(&rxnet->call_lock);
++ spin_unlock_bh(&rxnet->call_lock);
+ }
+
+ rxrpc_cleanup_call(call);
+@@ -618,6 +652,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
+ struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
+ struct rxrpc_net *rxnet = call->rxnet;
+
++ rxrpc_delete_call_timer(call);
++
+ rxrpc_put_connection(call->conn);
+ rxrpc_put_peer(call->peer);
+ kfree(call->rxtx_buffer);
+@@ -652,8 +688,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
+
+ memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
+
+- del_timer_sync(&call->timer);
+-
+ ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
+
+@@ -675,7 +709,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
+ _enter("");
+
+ if (!list_empty(&rxnet->calls)) {
+- write_lock(&rxnet->call_lock);
++ spin_lock_bh(&rxnet->call_lock);
+
+ while (!list_empty(&rxnet->calls)) {
+ call = list_entry(rxnet->calls.next,
+@@ -686,16 +720,16 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
+ list_del_init(&call->link);
+
+ pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+- call, atomic_read(&call->usage),
++ call, refcount_read(&call->ref),
+ rxrpc_call_states[call->state],
+ call->flags, call->events);
+
+- write_unlock(&rxnet->call_lock);
++ spin_unlock_bh(&rxnet->call_lock);
+ cond_resched();
+- write_lock(&rxnet->call_lock);
++ spin_lock_bh(&rxnet->call_lock);
+ }
+
+- write_unlock(&rxnet->call_lock);
++ spin_unlock_bh(&rxnet->call_lock);
+ }
+
+ atomic_dec(&rxnet->nr_calls);
+diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
+index dbea0bfee48e9..bdb335cb2d057 100644
+--- a/net/rxrpc/conn_client.c
++++ b/net/rxrpc/conn_client.c
+@@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+ DEFINE_IDR(rxrpc_client_conn_ids);
+ static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
+
++static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
++
+ /*
+ * Get a connection ID and epoch for a client connection from the global pool.
+ * The connection struct pointer is then recorded in the idr radix tree. The
+@@ -102,7 +104,7 @@ void rxrpc_destroy_client_conn_ids(void)
+ if (!idr_is_empty(&rxrpc_client_conn_ids)) {
+ idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+ pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
+- conn, atomic_read(&conn->usage));
++ conn, refcount_read(&conn->ref));
+ }
+ BUG();
+ }
+@@ -122,7 +124,8 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
+ if (bundle) {
+ bundle->params = *cp;
+ rxrpc_get_peer(bundle->params.peer);
+- atomic_set(&bundle->usage, 1);
++ refcount_set(&bundle->ref, 1);
++ atomic_set(&bundle->active, 1);
+ spin_lock_init(&bundle->channel_lock);
+ INIT_LIST_HEAD(&bundle->waiting_calls);
+ }
+@@ -131,20 +134,27 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
+
+ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle)
+ {
+- atomic_inc(&bundle->usage);
++ refcount_inc(&bundle->ref);
+ return bundle;
+ }
+
++static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
++{
++ rxrpc_put_peer(bundle->params.peer);
++ kfree(bundle);
++}
++
+ void rxrpc_put_bundle(struct rxrpc_bundle *bundle)
+ {
+ unsigned int d = bundle->debug_id;
+- unsigned int u = atomic_dec_return(&bundle->usage);
++ bool dead;
++ int r;
+
+- _debug("PUT B=%x %u", d, u);
+- if (u == 0) {
+- rxrpc_put_peer(bundle->params.peer);
+- kfree(bundle);
+- }
++ dead = __refcount_dec_and_test(&bundle->ref, &r);
++
++ _debug("PUT B=%x %d", d, r - 1);
++ if (dead)
++ rxrpc_free_bundle(bundle);
+ }
+
+ /*
+@@ -165,7 +175,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
+ return ERR_PTR(-ENOMEM);
+ }
+
+- atomic_set(&conn->usage, 1);
++ refcount_set(&conn->ref, 1);
+ conn->bundle = bundle;
+ conn->params = bundle->params;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+@@ -191,7 +201,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
+ key_get(conn->params.key);
+
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
+- atomic_read(&conn->usage),
++ refcount_read(&conn->ref),
+ __builtin_return_address(0));
+
+ atomic_inc(&rxnet->nr_client_conns);
+@@ -328,9 +338,10 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
+ return candidate;
+
+ found_bundle_free:
+- kfree(candidate);
++ rxrpc_free_bundle(candidate);
+ found_bundle:
+ rxrpc_get_bundle(bundle);
++ atomic_inc(&bundle->active);
+ spin_unlock(&local->client_bundles_lock);
+ _leave(" = %u [found]", bundle->debug_id);
+ return bundle;
+@@ -428,6 +439,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
+ if (old)
+ trace_rxrpc_client(old, -1, rxrpc_client_replace);
+ candidate->bundle_shift = shift;
++ atomic_inc(&bundle->active);
+ bundle->conns[i] = candidate;
+ for (j = 0; j < RXRPC_MAXCALLS; j++)
+ set_bit(shift + j, &bundle->avail_chans);
+@@ -718,6 +730,7 @@ granted_channel:
+ smp_rmb();
+
+ out_put_bundle:
++ rxrpc_deactivate_bundle(bundle);
+ rxrpc_put_bundle(bundle);
+ out:
+ _leave(" = %d", ret);
+@@ -893,9 +906,8 @@ out:
+ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
+ {
+ struct rxrpc_bundle *bundle = conn->bundle;
+- struct rxrpc_local *local = bundle->params.local;
+ unsigned int bindex;
+- bool need_drop = false, need_put = false;
++ bool need_drop = false;
+ int i;
+
+ _enter("C=%x", conn->debug_id);
+@@ -914,15 +926,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
+ }
+ spin_unlock(&bundle->channel_lock);
+
+- /* If there are no more connections, remove the bundle */
+- if (!bundle->avail_chans) {
+- _debug("maybe unbundle");
+- spin_lock(&local->client_bundles_lock);
++ if (need_drop) {
++ rxrpc_deactivate_bundle(bundle);
++ rxrpc_put_connection(conn);
++ }
++}
+
+- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
+- if (bundle->conns[i])
+- break;
+- if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) {
++/*
++ * Drop the active count on a bundle.
++ */
++static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
++{
++ struct rxrpc_local *local = bundle->params.local;
++ bool need_put = false;
++
++ if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
++ if (!bundle->params.exclusive) {
+ _debug("erase bundle");
+ rb_erase(&bundle->local_node, &local->client_bundles);
+ need_put = true;
+@@ -932,10 +951,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
+ if (need_put)
+ rxrpc_put_bundle(bundle);
+ }
+-
+- if (need_drop)
+- rxrpc_put_connection(conn);
+- _leave("");
+ }
+
+ /*
+@@ -962,14 +977,13 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn)
+ {
+ const void *here = __builtin_return_address(0);
+ unsigned int debug_id = conn->debug_id;
+- int n;
++ bool dead;
++ int r;
+
+- n = atomic_dec_return(&conn->usage);
+- trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
+- if (n <= 0) {
+- ASSERTCMP(n, >=, 0);
++ dead = __refcount_dec_and_test(&conn->ref, &r);
++ trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here);
++ if (dead)
+ rxrpc_kill_client_conn(conn);
+- }
+ }
+
+ /*
+diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
+index b2159dbf5412c..22089e37e97f0 100644
+--- a/net/rxrpc/conn_object.c
++++ b/net/rxrpc/conn_object.c
+@@ -104,7 +104,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
+ goto not_found;
+ *_peer = peer;
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+- if (!conn || atomic_read(&conn->usage) == 0)
++ if (!conn || refcount_read(&conn->ref) == 0)
+ goto not_found;
+ _leave(" = %p", conn);
+ return conn;
+@@ -114,7 +114,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
+ */
+ conn = idr_find(&rxrpc_client_conn_ids,
+ sp->hdr.cid >> RXRPC_CIDSHIFT);
+- if (!conn || atomic_read(&conn->usage) == 0) {
++ if (!conn || refcount_read(&conn->ref) == 0) {
+ _debug("no conn");
+ goto not_found;
+ }
+@@ -183,7 +183,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ break;
+ default:
+- chan->last_abort = RX_USER_ABORT;
++ chan->last_abort = RX_CALL_DEAD;
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ break;
+ }
+@@ -263,11 +263,12 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
+ bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+ {
+ const void *here = __builtin_return_address(0);
+- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
+- if (n == 0)
++ int r;
++
++ if (!__refcount_inc_not_zero(&conn->ref, &r))
+ return false;
+ if (rxrpc_queue_work(&conn->processor))
+- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
++ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here);
+ else
+ rxrpc_put_connection(conn);
+ return true;
+@@ -280,7 +281,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
+ {
+ const void *here = __builtin_return_address(0);
+ if (conn) {
+- int n = atomic_read(&conn->usage);
++ int n = refcount_read(&conn->ref);
+
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
+ }
+@@ -292,9 +293,10 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
+ struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
+ {
+ const void *here = __builtin_return_address(0);
+- int n = atomic_inc_return(&conn->usage);
++ int r;
+
+- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
++ __refcount_inc(&conn->ref, &r);
++ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here);
+ return conn;
+ }
+
+@@ -305,11 +307,11 @@ struct rxrpc_connection *
+ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+ {
+ const void *here = __builtin_return_address(0);
++ int r;
+
+ if (conn) {
+- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
+- if (n > 0)
+- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
++ if (__refcount_inc_not_zero(&conn->ref, &r))
++ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here);
+ else
+ conn = NULL;
+ }
+@@ -333,12 +335,11 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn)
+ {
+ const void *here = __builtin_return_address(0);
+ unsigned int debug_id = conn->debug_id;
+- int n;
++ int r;
+
+- n = atomic_dec_return(&conn->usage);
+- trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
+- ASSERTCMP(n, >=, 0);
+- if (n == 1)
++ __refcount_dec(&conn->ref, &r);
++ trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here);
++ if (r - 1 == 1)
+ rxrpc_set_service_reap_timer(conn->params.local->rxnet,
+ jiffies + rxrpc_connection_expiry);
+ }
+@@ -351,9 +352,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
+ struct rxrpc_connection *conn =
+ container_of(rcu, struct rxrpc_connection, rcu);
+
+- _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
++ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
+
+- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
++ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
+
+ _net("DESTROY CONN %d", conn->debug_id);
+
+@@ -392,8 +393,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
+
+ write_lock(&rxnet->conn_lock);
+ list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
+- ASSERTCMP(atomic_read(&conn->usage), >, 0);
+- if (likely(atomic_read(&conn->usage) > 1))
++ ASSERTCMP(refcount_read(&conn->ref), >, 0);
++ if (likely(refcount_read(&conn->ref) > 1))
+ continue;
+ if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
+ continue;
+@@ -405,7 +406,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
+ expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
+
+ _debug("reap CONN %d { u=%d,t=%ld }",
+- conn->debug_id, atomic_read(&conn->usage),
++ conn->debug_id, refcount_read(&conn->ref),
+ (long)expire_at - (long)now);
+
+ if (time_before(now, expire_at)) {
+@@ -418,7 +419,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
+ /* The usage count sits at 1 whilst the object is unused on the
+ * list; we reduce that to 0 to make the object unavailable.
+ */
+- if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
++ if (!refcount_dec_if_one(&conn->ref))
+ continue;
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
+
+@@ -442,7 +443,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
+ link);
+ list_del_init(&conn->link);
+
+- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
++ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
+ rxrpc_kill_connection(conn);
+ }
+
+@@ -470,7 +471,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
+ write_lock(&rxnet->conn_lock);
+ list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
+ pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
+- conn, atomic_read(&conn->usage));
++ conn, refcount_read(&conn->ref));
+ leak = true;
+ }
+ write_unlock(&rxnet->conn_lock);
+diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
+index e1966dfc91527..6e6aa02c6f9e8 100644
+--- a/net/rxrpc/conn_service.c
++++ b/net/rxrpc/conn_service.c
+@@ -9,7 +9,7 @@
+ #include "ar-internal.h"
+
+ static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
+- .usage = ATOMIC_INIT(1),
++ .ref = REFCOUNT_INIT(1),
+ .debug_id = UINT_MAX,
+ .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
+ };
+@@ -99,7 +99,7 @@ conn_published:
+ return;
+
+ found_extant_conn:
+- if (atomic_read(&cursor->usage) == 0)
++ if (refcount_read(&cursor->ref) == 0)
+ goto replace_old_connection;
+ write_sequnlock_bh(&peer->service_conn_lock);
+ /* We should not be able to get here. rxrpc_incoming_connection() is
+@@ -132,7 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
+ * the rxrpc_connections list.
+ */
+ conn->state = RXRPC_CONN_SERVICE_PREALLOC;
+- atomic_set(&conn->usage, 2);
++ refcount_set(&conn->ref, 2);
+ conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle);
+
+ atomic_inc(&rxnet->nr_conns);
+@@ -142,7 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
+ write_unlock(&rxnet->conn_lock);
+
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
+- atomic_read(&conn->usage),
++ refcount_read(&conn->ref),
+ __builtin_return_address(0));
+ }
+
+diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
+index dc201363f2c48..721d847ba92bb 100644
+--- a/net/rxrpc/input.c
++++ b/net/rxrpc/input.c
+@@ -412,8 +412,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
+ {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ enum rxrpc_call_state state;
+- unsigned int j, nr_subpackets;
+- rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
++ unsigned int j, nr_subpackets, nr_unacked = 0;
++ rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial;
+ rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack;
+ bool immediate_ack = false, jumbo_bad = false;
+ u8 ack = 0;
+@@ -453,7 +453,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
+ !rxrpc_receiving_reply(call))
+ goto unlock;
+
+- call->ackr_prev_seq = seq0;
+ hard_ack = READ_ONCE(call->rx_hard_ack);
+
+ nr_subpackets = sp->nr_subpackets;
+@@ -534,6 +533,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
+ ack_serial = serial;
+ }
+
++ if (after(seq0, call->ackr_highest_seq))
++ call->ackr_highest_seq = seq0;
++
+ /* Queue the packet. We use a couple of memory barriers here as need
+ * to make sure that rx_top is perceived to be set after the buffer
+ * pointer and that the buffer pointer is set after the annotation and
+@@ -567,6 +569,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
+ sp = NULL;
+ }
+
++ nr_unacked++;
++
+ if (last) {
+ set_bit(RXRPC_CALL_RX_LAST, &call->flags);
+ if (!ack) {
+@@ -586,9 +590,14 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
+ }
+ call->rx_expect_next = seq + 1;
+ }
++ if (!ack)
++ ack_serial = serial;
+ }
+
+ ack:
++ if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack)
++ ack = RXRPC_ACK_IDLE;
++
+ if (ack)
+ rxrpc_propose_ACK(call, ack, ack_serial,
+ immediate_ack, true,
+@@ -812,7 +821,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
+ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
+ rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
+ {
+- rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq);
++ rxrpc_seq_t base = READ_ONCE(call->acks_first_seq);
+
+ if (after(first_pkt, base))
+ return true; /* The window advanced */
+@@ -820,7 +829,7 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
+ if (before(first_pkt, base))
+ return false; /* firstPacket regressed */
+
+- if (after_eq(prev_pkt, call->ackr_prev_seq))
++ if (after_eq(prev_pkt, call->acks_prev_seq))
+ return true; /* previousPacket hasn't regressed. */
+
+ /* Some rx implementations put a serial number in previousPacket. */
+@@ -903,11 +912,38 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
+ rxrpc_propose_ack_respond_to_ack);
+ }
+
++ /* If we get an EXCEEDS_WINDOW ACK from the server, it probably
++ * indicates that the client address changed due to NAT. The server
++ * lost the call because it switched to a different peer.
++ */
++ if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
++ first_soft_ack == 1 &&
++ prev_pkt == 0 &&
++ rxrpc_is_client_call(call)) {
++ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
++ 0, -ENETRESET);
++ return;
++ }
++
++ /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also
++ * indicate a change of address. However, we can retransmit the call
++ * if we still have it buffered to the beginning.
++ */
++ if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
++ first_soft_ack == 1 &&
++ prev_pkt == 0 &&
++ call->tx_hard_ack == 0 &&
++ rxrpc_is_client_call(call)) {
++ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
++ 0, -ENETRESET);
++ return;
++ }
++
+ /* Discard any out-of-order or duplicate ACKs (outside lock). */
+ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
+- first_soft_ack, call->ackr_first_seq,
+- prev_pkt, call->ackr_prev_seq);
++ first_soft_ack, call->acks_first_seq,
++ prev_pkt, call->acks_prev_seq);
+ return;
+ }
+
+@@ -922,14 +958,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
+ /* Discard any out-of-order or duplicate ACKs (inside lock). */
+ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
+- first_soft_ack, call->ackr_first_seq,
+- prev_pkt, call->ackr_prev_seq);
++ first_soft_ack, call->acks_first_seq,
++ prev_pkt, call->acks_prev_seq);
+ goto out;
+ }
+ call->acks_latest_ts = skb->tstamp;
+
+- call->ackr_first_seq = first_soft_ack;
+- call->ackr_prev_seq = prev_pkt;
++ call->acks_first_seq = first_soft_ack;
++ call->acks_prev_seq = prev_pkt;
+
+ /* Parse rwind and mtu sizes if provided. */
+ if (buf.info.rxMTU)
+@@ -1154,8 +1190,6 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
+ */
+ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+ {
+- CHECK_SLAB_OKAY(&local->usage);
+-
+ if (rxrpc_get_local_maybe(local)) {
+ skb_queue_tail(&local->reject_queue, skb);
+ rxrpc_queue_local(local);
+@@ -1413,7 +1447,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
+ }
+ }
+
+- if (!call || atomic_read(&call->usage) == 0) {
++ if (!call || refcount_read(&call->ref) == 0) {
+ if (rxrpc_to_client(sp) ||
+ sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ goto bad_message;
+diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
+index a4111408ffd0c..38ea98ff426bd 100644
+--- a/net/rxrpc/local_object.c
++++ b/net/rxrpc/local_object.c
+@@ -79,10 +79,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
+
+ local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+ if (local) {
+- atomic_set(&local->usage, 1);
++ refcount_set(&local->ref, 1);
+ atomic_set(&local->active_users, 1);
+ local->rxnet = rxnet;
+- INIT_LIST_HEAD(&local->link);
++ INIT_HLIST_NODE(&local->link);
+ INIT_WORK(&local->processor, rxrpc_local_processor);
+ init_rwsem(&local->defrag_sem);
+ skb_queue_head_init(&local->reject_queue);
+@@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
+ local, srx->transport_type, srx->transport.family);
+
+ udp_conf.family = srx->transport.family;
++ udp_conf.use_udp_checksums = true;
+ if (udp_conf.family == AF_INET) {
+ udp_conf.local_ip = srx->transport.sin.sin_addr;
+ udp_conf.local_udp_port = srx->transport.sin.sin_port;
+@@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
+ } else {
+ udp_conf.local_ip6 = srx->transport.sin6.sin6_addr;
+ udp_conf.local_udp_port = srx->transport.sin6.sin6_port;
++ udp_conf.use_udp6_tx_checksums = true;
++ udp_conf.use_udp6_rx_checksums = true;
+ #endif
+ }
+ ret = udp_sock_create(net, &udp_conf, &local->socket);
+@@ -134,6 +137,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
+
+ tuncfg.encap_type = UDP_ENCAP_RXRPC;
+ tuncfg.encap_rcv = rxrpc_input_packet;
++ tuncfg.encap_err_rcv = rxrpc_encap_err_rcv;
+ tuncfg.sk_user_data = local;
+ setup_udp_tunnel_sock(net, local->socket, &tuncfg);
+
+@@ -177,7 +181,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
+ {
+ struct rxrpc_local *local;
+ struct rxrpc_net *rxnet = rxrpc_net(net);
+- struct list_head *cursor;
++ struct hlist_node *cursor;
+ const char *age;
+ long diff;
+ int ret;
+@@ -187,16 +191,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
+
+ mutex_lock(&rxnet->local_mutex);
+
+- for (cursor = rxnet->local_endpoints.next;
+- cursor != &rxnet->local_endpoints;
+- cursor = cursor->next) {
+- local = list_entry(cursor, struct rxrpc_local, link);
++ hlist_for_each(cursor, &rxnet->local_endpoints) {
++ local = hlist_entry(cursor, struct rxrpc_local, link);
+
+ diff = rxrpc_local_cmp_key(local, srx);
+- if (diff < 0)
++ if (diff != 0)
+ continue;
+- if (diff > 0)
+- break;
+
+ /* Services aren't allowed to share transport sockets, so
+ * reject that here. It is possible that the object is dying -
+@@ -208,9 +208,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
+ goto addr_in_use;
+ }
+
+- /* Found a match. We replace a dying object. Attempting to
+- * bind the transport socket may still fail if we're attempting
+- * to use a local address that the dying object is still using.
++ /* Found a match. We want to replace a dying object.
++ * Attempting to bind the transport socket may still fail if
++ * we're attempting to use a local address that the dying
++ * object is still using.
+ */
+ if (!rxrpc_use_local(local))
+ break;
+@@ -227,10 +228,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
+ if (ret < 0)
+ goto sock_error;
+
+- if (cursor != &rxnet->local_endpoints)
+- list_replace_init(cursor, &local->link);
+- else
+- list_add_tail(&local->link, cursor);
++ if (cursor) {
++ hlist_replace_rcu(cursor, &local->link);
++ cursor->pprev = NULL;
++ } else {
++ hlist_add_head_rcu(&local->link, &rxnet->local_endpoints);
++ }
+ age = "new";
+
+ found:
+@@ -263,10 +266,10 @@ addr_in_use:
+ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
+ {
+ const void *here = __builtin_return_address(0);
+- int n;
++ int r;
+
+- n = atomic_inc_return(&local->usage);
+- trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
++ __refcount_inc(&local->ref, &r);
++ trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here);
+ return local;
+ }
+
+@@ -276,12 +279,12 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
+ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
+ {
+ const void *here = __builtin_return_address(0);
++ int r;
+
+ if (local) {
+- int n = atomic_fetch_add_unless(&local->usage, 1, 0);
+- if (n > 0)
++ if (__refcount_inc_not_zero(&local->ref, &r))
+ trace_rxrpc_local(local->debug_id, rxrpc_local_got,
+- n + 1, here);
++ r + 1, here);
+ else
+ local = NULL;
+ }
+@@ -295,10 +298,10 @@ void rxrpc_queue_local(struct rxrpc_local *local)
+ {
+ const void *here = __builtin_return_address(0);
+ unsigned int debug_id = local->debug_id;
+- int n = atomic_read(&local->usage);
++ int r = refcount_read(&local->ref);
+
+ if (rxrpc_queue_work(&local->processor))
+- trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
++ trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here);
+ else
+ rxrpc_put_local(local);
+ }
+@@ -310,15 +313,16 @@ void rxrpc_put_local(struct rxrpc_local *local)
+ {
+ const void *here = __builtin_return_address(0);
+ unsigned int debug_id;
+- int n;
++ bool dead;
++ int r;
+
+ if (local) {
+ debug_id = local->debug_id;
+
+- n = atomic_dec_return(&local->usage);
+- trace_rxrpc_local(debug_id, rxrpc_local_put, n, here);
++ dead = __refcount_dec_and_test(&local->ref, &r);
++ trace_rxrpc_local(debug_id, rxrpc_local_put, r, here);
+
+- if (n == 0)
++ if (dead)
+ call_rcu(&local->rcu, rxrpc_local_rcu);
+ }
+ }
+@@ -371,7 +375,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
+ local->dead = true;
+
+ mutex_lock(&rxnet->local_mutex);
+- list_del_init(&local->link);
++ hlist_del_init_rcu(&local->link);
+ mutex_unlock(&rxnet->local_mutex);
+
+ rxrpc_clean_up_local_conns(local);
+@@ -402,8 +406,11 @@ static void rxrpc_local_processor(struct work_struct *work)
+ container_of(work, struct rxrpc_local, processor);
+ bool again;
+
++ if (local->dead)
++ return;
++
+ trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
+- atomic_read(&local->usage), NULL);
++ refcount_read(&local->ref), NULL);
+
+ do {
+ again = false;
+@@ -455,11 +462,11 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
+
+ flush_workqueue(rxrpc_workqueue);
+
+- if (!list_empty(&rxnet->local_endpoints)) {
++ if (!hlist_empty(&rxnet->local_endpoints)) {
+ mutex_lock(&rxnet->local_mutex);
+- list_for_each_entry(local, &rxnet->local_endpoints, link) {
++ hlist_for_each_entry(local, &rxnet->local_endpoints, link) {
+ pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+- local, atomic_read(&local->usage));
++ local, refcount_read(&local->ref));
+ }
+ mutex_unlock(&rxnet->local_mutex);
+ BUG();
+diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
+index 25bbc4cc8b135..bb4c25d6df64c 100644
+--- a/net/rxrpc/net_ns.c
++++ b/net/rxrpc/net_ns.c
+@@ -50,7 +50,7 @@ static __net_init int rxrpc_init_net(struct net *net)
+ rxnet->epoch |= RXRPC_RANDOM_EPOCH;
+
+ INIT_LIST_HEAD(&rxnet->calls);
+- rwlock_init(&rxnet->call_lock);
++ spin_lock_init(&rxnet->call_lock);
+ atomic_set(&rxnet->nr_calls, 1);
+
+ atomic_set(&rxnet->nr_conns, 1);
+@@ -72,7 +72,7 @@ static __net_init int rxrpc_init_net(struct net *net)
+ timer_setup(&rxnet->client_conn_reap_timer,
+ rxrpc_client_conn_reap_timeout, 0);
+
+- INIT_LIST_HEAD(&rxnet->local_endpoints);
++ INIT_HLIST_HEAD(&rxnet->local_endpoints);
+ mutex_init(&rxnet->local_mutex);
+
+ hash_init(rxnet->peer_hash);
+@@ -98,6 +98,9 @@ static __net_init int rxrpc_init_net(struct net *net)
+ proc_create_net("peers", 0444, rxnet->proc_net,
+ &rxrpc_peer_seq_ops,
+ sizeof(struct seq_net_private));
++ proc_create_net("locals", 0444, rxnet->proc_net,
++ &rxrpc_local_seq_ops,
++ sizeof(struct seq_net_private));
+ return 0;
+
+ err_proc:
+@@ -115,6 +118,8 @@ static __net_exit void rxrpc_exit_net(struct net *net)
+ rxnet->live = false;
+ del_timer_sync(&rxnet->peer_keepalive_timer);
+ cancel_work_sync(&rxnet->peer_keepalive_work);
++ /* Remove the timer again as the worker may have restarted it. */
++ del_timer_sync(&rxnet->peer_keepalive_timer);
+ rxrpc_destroy_all_calls(rxnet);
+ rxrpc_destroy_all_connections(rxnet);
+ rxrpc_destroy_all_peers(rxnet);
+diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
+index 10f2bf2e9068a..08c117bc083ec 100644
+--- a/net/rxrpc/output.c
++++ b/net/rxrpc/output.c
+@@ -74,11 +74,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
+ u8 reason)
+ {
+ rxrpc_serial_t serial;
++ unsigned int tmp;
+ rxrpc_seq_t hard_ack, top, seq;
+ int ix;
+ u32 mtu, jmax;
+ u8 *ackp = pkt->acks;
+
++ tmp = atomic_xchg(&call->ackr_nr_unacked, 0);
++ tmp |= atomic_xchg(&call->ackr_nr_consumed, 0);
++ if (!tmp && (reason == RXRPC_ACK_DELAY ||
++ reason == RXRPC_ACK_IDLE))
++ return 0;
++
+ /* Barrier against rxrpc_input_data(). */
+ serial = call->ackr_serial;
+ hard_ack = READ_ONCE(call->rx_hard_ack);
+@@ -86,10 +93,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
+ *_hard_ack = hard_ack;
+ *_top = top;
+
+- pkt->ack.bufferSpace = htons(8);
++ pkt->ack.bufferSpace = htons(0);
+ pkt->ack.maxSkew = htons(0);
+ pkt->ack.firstPacket = htonl(hard_ack + 1);
+- pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
++ pkt->ack.previousPacket = htonl(call->ackr_highest_seq);
+ pkt->ack.serial = htonl(serial);
+ pkt->ack.reason = reason;
+ pkt->ack.nAcks = top - hard_ack;
+@@ -223,6 +230,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
+ n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
+
+ spin_unlock_bh(&call->lock);
++ if (n == 0) {
++ kfree(pkt);
++ return 0;
++ }
+
+ iov[0].iov_base = pkt;
+ iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
+@@ -259,13 +270,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
+ ntohl(pkt->ack.serial),
+ false, true,
+ rxrpc_propose_ack_retry_tx);
+- } else {
+- spin_lock_bh(&call->lock);
+- if (after(hard_ack, call->ackr_consumed))
+- call->ackr_consumed = hard_ack;
+- if (after(top, call->ackr_seen))
+- call->ackr_seen = top;
+- spin_unlock_bh(&call->lock);
+ }
+
+ rxrpc_set_keepalive(call);
+@@ -468,7 +472,7 @@ done:
+ if (call->peer->rtt_count > 1) {
+ unsigned long nowj = jiffies, ack_lost_at;
+
+- ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
++ ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
+ ack_lost_at += nowj;
+ WRITE_ONCE(call->ack_lost_at, ack_lost_at);
+ rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
+diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
+index be032850ae8ca..32561e9567fe3 100644
+--- a/net/rxrpc/peer_event.c
++++ b/net/rxrpc/peer_event.c
+@@ -16,22 +16,105 @@
+ #include <net/sock.h>
+ #include <net/af_rxrpc.h>
+ #include <net/ip.h>
++#include <net/icmp.h>
+ #include "ar-internal.h"
+
++static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int);
+ static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
+ static void rxrpc_distribute_error(struct rxrpc_peer *, int,
+ enum rxrpc_call_completion);
+
+ /*
+- * Find the peer associated with an ICMP packet.
++ * Find the peer associated with an ICMPv4 packet.
+ */
+ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+- const struct sk_buff *skb,
++ struct sk_buff *skb,
++ unsigned int udp_offset,
++ unsigned int *info,
+ struct sockaddr_rxrpc *srx)
+ {
+- struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
++ struct iphdr *ip, *ip0 = ip_hdr(skb);
++ struct icmphdr *icmp = icmp_hdr(skb);
++ struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
+
+- _enter("");
++ _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code);
++
++ switch (icmp->type) {
++ case ICMP_DEST_UNREACH:
++ *info = ntohs(icmp->un.frag.mtu);
++ fallthrough;
++ case ICMP_TIME_EXCEEDED:
++ case ICMP_PARAMETERPROB:
++ ip = (struct iphdr *)((void *)icmp + 8);
++ break;
++ default:
++ return NULL;
++ }
++
++ memset(srx, 0, sizeof(*srx));
++ srx->transport_type = local->srx.transport_type;
++ srx->transport_len = local->srx.transport_len;
++ srx->transport.family = local->srx.transport.family;
++
++ /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
++ * versa?
++ */
++ switch (srx->transport.family) {
++ case AF_INET:
++ srx->transport_len = sizeof(srx->transport.sin);
++ srx->transport.family = AF_INET;
++ srx->transport.sin.sin_port = udp->dest;
++ memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
++ sizeof(struct in_addr));
++ break;
++
++#ifdef CONFIG_AF_RXRPC_IPV6
++ case AF_INET6:
++ srx->transport_len = sizeof(srx->transport.sin);
++ srx->transport.family = AF_INET;
++ srx->transport.sin.sin_port = udp->dest;
++ memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
++ sizeof(struct in_addr));
++ break;
++#endif
++
++ default:
++ WARN_ON_ONCE(1);
++ return NULL;
++ }
++
++ _net("ICMP {%pISp}", &srx->transport);
++ return rxrpc_lookup_peer_rcu(local, srx);
++}
++
++#ifdef CONFIG_AF_RXRPC_IPV6
++/*
++ * Find the peer associated with an ICMPv6 packet.
++ */
++static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local,
++ struct sk_buff *skb,
++ unsigned int udp_offset,
++ unsigned int *info,
++ struct sockaddr_rxrpc *srx)
++{
++ struct icmp6hdr *icmp = icmp6_hdr(skb);
++ struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb);
++ struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
++
++ _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code);
++
++ switch (icmp->icmp6_type) {
++ case ICMPV6_DEST_UNREACH:
++ *info = ntohl(icmp->icmp6_mtu);
++ fallthrough;
++ case ICMPV6_PKT_TOOBIG:
++ case ICMPV6_TIME_EXCEED:
++ case ICMPV6_PARAMPROB:
++ ip = (struct ipv6hdr *)((void *)icmp + 8);
++ break;
++ default:
++ return NULL;
++ }
+
+ memset(srx, 0, sizeof(*srx));
+ srx->transport_type = local->srx.transport_type;
+@@ -41,6 +124,165 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+ /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
+ * versa?
+ */
++ switch (srx->transport.family) {
++ case AF_INET:
++ _net("Rx ICMP6 on v4 sock");
++ srx->transport_len = sizeof(srx->transport.sin);
++ srx->transport.family = AF_INET;
++ srx->transport.sin.sin_port = udp->dest;
++ memcpy(&srx->transport.sin.sin_addr,
++ &ip->daddr.s6_addr32[3], sizeof(struct in_addr));
++ break;
++ case AF_INET6:
++ _net("Rx ICMP6");
++ srx->transport.sin.sin_port = udp->dest;
++ memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr,
++ sizeof(struct in6_addr));
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ return NULL;
++ }
++
++ _net("ICMP {%pISp}", &srx->transport);
++ return rxrpc_lookup_peer_rcu(local, srx);
++}
++#endif /* CONFIG_AF_RXRPC_IPV6 */
++
++/*
++ * Handle an error received on the local endpoint as a tunnel.
++ */
++void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb,
++ unsigned int udp_offset)
++{
++ struct sock_extended_err ee;
++ struct sockaddr_rxrpc srx;
++ struct rxrpc_local *local;
++ struct rxrpc_peer *peer;
++ unsigned int info = 0;
++ int err;
++ u8 version = ip_hdr(skb)->version;
++ u8 type = icmp_hdr(skb)->type;
++ u8 code = icmp_hdr(skb)->code;
++
++ rcu_read_lock();
++ local = rcu_dereference_sk_user_data(sk);
++ if (unlikely(!local)) {
++ rcu_read_unlock();
++ return;
++ }
++
++ rxrpc_new_skb(skb, rxrpc_skb_received);
++
++ switch (ip_hdr(skb)->version) {
++ case IPVERSION:
++ peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset,
++ &info, &srx);
++ break;
++#ifdef CONFIG_AF_RXRPC_IPV6
++ case 6:
++ peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset,
++ &info, &srx);
++ break;
++#endif
++ default:
++ rcu_read_unlock();
++ return;
++ }
++
++ if (peer && !rxrpc_get_peer_maybe(peer))
++ peer = NULL;
++ if (!peer) {
++ rcu_read_unlock();
++ return;
++ }
++
++ memset(&ee, 0, sizeof(ee));
++
++ switch (version) {
++ case IPVERSION:
++ switch (type) {
++ case ICMP_DEST_UNREACH:
++ switch (code) {
++ case ICMP_FRAG_NEEDED:
++ rxrpc_adjust_mtu(peer, info);
++ rcu_read_unlock();
++ rxrpc_put_peer(peer);
++ return;
++ default:
++ break;
++ }
++
++ err = EHOSTUNREACH;
++ if (code <= NR_ICMP_UNREACH) {
++ /* Might want to do something different with
++ * non-fatal errors
++ */
++ //harderr = icmp_err_convert[code].fatal;
++ err = icmp_err_convert[code].errno;
++ }
++ break;
++
++ case ICMP_TIME_EXCEEDED:
++ err = EHOSTUNREACH;
++ break;
++ default:
++ err = EPROTO;
++ break;
++ }
++
++ ee.ee_origin = SO_EE_ORIGIN_ICMP;
++ ee.ee_type = type;
++ ee.ee_code = code;
++ ee.ee_errno = err;
++ break;
++
++#ifdef CONFIG_AF_RXRPC_IPV6
++ case 6:
++ switch (type) {
++ case ICMPV6_PKT_TOOBIG:
++ rxrpc_adjust_mtu(peer, info);
++ rcu_read_unlock();
++ rxrpc_put_peer(peer);
++ return;
++ }
++
++ icmpv6_err_convert(type, code, &err);
++
++ if (err == EACCES)
++ err = EHOSTUNREACH;
++
++ ee.ee_origin = SO_EE_ORIGIN_ICMP6;
++ ee.ee_type = type;
++ ee.ee_code = code;
++ ee.ee_errno = err;
++ break;
++#endif
++ }
++
++ trace_rxrpc_rx_icmp(peer, &ee, &srx);
++
++ rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR);
++ rcu_read_unlock();
++ rxrpc_put_peer(peer);
++}
++
++/*
++ * Find the peer associated with a local error.
++ */
++static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
++ const struct sk_buff *skb,
++ struct sockaddr_rxrpc *srx)
++{
++ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
++
++ _enter("");
++
++ memset(srx, 0, sizeof(*srx));
++ srx->transport_type = local->srx.transport_type;
++ srx->transport_len = local->srx.transport_len;
++ srx->transport.family = local->srx.transport.family;
++
+ switch (srx->transport.family) {
+ case AF_INET:
+ srx->transport_len = sizeof(srx->transport.sin);
+@@ -104,10 +346,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+ /*
+ * Handle an MTU/fragmentation problem.
+ */
+-static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr)
++static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
+ {
+- u32 mtu = serr->ee.ee_info;
+-
+ _net("Rx ICMP Fragmentation Needed (%d)", mtu);
+
+ /* wind down the local interface MTU */
+@@ -148,7 +388,7 @@ void rxrpc_error_report(struct sock *sk)
+ struct sock_exterr_skb *serr;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_local *local;
+- struct rxrpc_peer *peer;
++ struct rxrpc_peer *peer = NULL;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+@@ -172,41 +412,20 @@ void rxrpc_error_report(struct sock *sk)
+ }
+ rxrpc_new_skb(skb, rxrpc_skb_received);
+ serr = SKB_EXT_ERR(skb);
+- if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
+- _leave("UDP empty message");
+- rcu_read_unlock();
+- rxrpc_free_skb(skb, rxrpc_skb_freed);
+- return;
+- }
+
+- peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
+- if (peer && !rxrpc_get_peer_maybe(peer))
+- peer = NULL;
+- if (!peer) {
+- rcu_read_unlock();
+- rxrpc_free_skb(skb, rxrpc_skb_freed);
+- _leave(" [no peer]");
+- return;
+- }
+-
+- trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
+-
+- if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+- serr->ee.ee_type == ICMP_DEST_UNREACH &&
+- serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
+- rxrpc_adjust_mtu(peer, serr);
+- rcu_read_unlock();
+- rxrpc_free_skb(skb, rxrpc_skb_freed);
+- rxrpc_put_peer(peer);
+- _leave(" [MTU update]");
+- return;
++ if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) {
++ peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
++ if (peer && !rxrpc_get_peer_maybe(peer))
++ peer = NULL;
++ if (peer) {
++ trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
++ rxrpc_store_error(peer, serr);
++ }
+ }
+
+- rxrpc_store_error(peer, serr);
+ rcu_read_unlock();
+ rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_put_peer(peer);
+-
+ _leave("");
+ }
+
+diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
+index 68396d0520525..26d2ae9baaf2c 100644
+--- a/net/rxrpc/peer_object.c
++++ b/net/rxrpc/peer_object.c
+@@ -121,7 +121,7 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
+
+ hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
+ if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 &&
+- atomic_read(&peer->usage) > 0)
++ refcount_read(&peer->ref) > 0)
+ return peer;
+ }
+
+@@ -140,7 +140,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
+ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+ if (peer) {
+ _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
+- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
++ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
+ }
+ return peer;
+ }
+@@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
+
+ peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+ if (peer) {
+- atomic_set(&peer->usage, 1);
++ refcount_set(&peer->ref, 1);
+ peer->local = rxrpc_get_local(local);
+ INIT_HLIST_HEAD(&peer->error_targets);
+ peer->service_conns = RB_ROOT;
+@@ -299,6 +299,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
+ return peer;
+ }
+
++static void rxrpc_free_peer(struct rxrpc_peer *peer)
++{
++ rxrpc_put_local(peer->local);
++ kfree_rcu(peer, rcu);
++}
++
+ /*
+ * Set up a new incoming peer. There shouldn't be any other matching peers
+ * since we've already done a search in the list from the non-reentrant context
+@@ -365,14 +371,14 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
+ spin_unlock_bh(&rxnet->peer_hash_lock);
+
+ if (peer)
+- kfree(candidate);
++ rxrpc_free_peer(candidate);
+ else
+ peer = candidate;
+ }
+
+ _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
+
+- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
++ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
+ return peer;
+ }
+
+@@ -382,10 +388,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
+ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
+ {
+ const void *here = __builtin_return_address(0);
+- int n;
++ int r;
+
+- n = atomic_inc_return(&peer->usage);
+- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
++ __refcount_inc(&peer->ref, &r);
++ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
+ return peer;
+ }
+
+@@ -395,11 +401,11 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
+ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
+ {
+ const void *here = __builtin_return_address(0);
++ int r;
+
+ if (peer) {
+- int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
+- if (n > 0)
+- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
++ if (__refcount_inc_not_zero(&peer->ref, &r))
++ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
+ else
+ peer = NULL;
+ }
+@@ -420,8 +426,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
+ list_del_init(&peer->keepalive_link);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
+
+- rxrpc_put_local(peer->local);
+- kfree_rcu(peer, rcu);
++ rxrpc_free_peer(peer);
+ }
+
+ /*
+@@ -431,13 +436,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
+ {
+ const void *here = __builtin_return_address(0);
+ unsigned int debug_id;
+- int n;
++ bool dead;
++ int r;
+
+ if (peer) {
+ debug_id = peer->debug_id;
+- n = atomic_dec_return(&peer->usage);
+- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
+- if (n == 0)
++ dead = __refcount_dec_and_test(&peer->ref, &r);
++ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
++ if (dead)
+ __rxrpc_put_peer(peer);
+ }
+ }
+@@ -450,15 +456,15 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
+ {
+ const void *here = __builtin_return_address(0);
+ unsigned int debug_id = peer->debug_id;
+- int n;
++ bool dead;
++ int r;
+
+- n = atomic_dec_return(&peer->usage);
+- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
+- if (n == 0) {
++ dead = __refcount_dec_and_test(&peer->ref, &r);
++ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
++ if (dead) {
+ hash_del_rcu(&peer->hash_link);
+ list_del_init(&peer->keepalive_link);
+- rxrpc_put_local(peer->local);
+- kfree_rcu(peer, rcu);
++ rxrpc_free_peer(peer);
+ }
+ }
+
+@@ -477,7 +483,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
+ hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) {
+ pr_err("Leaked peer %u {%u} %pISp\n",
+ peer->debug_id,
+- atomic_read(&peer->usage),
++ refcount_read(&peer->ref),
+ &peer->srx.transport);
+ }
+ }
+diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
+index e2f990754f882..245418943e01c 100644
+--- a/net/rxrpc/proc.c
++++ b/net/rxrpc/proc.c
+@@ -26,29 +26,23 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
+ */
+ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+- __acquires(rxnet->call_lock)
+ {
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ rcu_read_lock();
+- read_lock(&rxnet->call_lock);
+- return seq_list_start_head(&rxnet->calls, *_pos);
++ return seq_list_start_head_rcu(&rxnet->calls, *_pos);
+ }
+
+ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+- return seq_list_next(v, &rxnet->calls, pos);
++ return seq_list_next_rcu(v, &rxnet->calls, pos);
+ }
+
+ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+- __releases(rxnet->call_lock)
+ __releases(rcu)
+ {
+- struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+-
+- read_unlock(&rxnet->call_lock);
+ rcu_read_unlock();
+ }
+
+@@ -107,7 +101,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
+ call->cid,
+ call->call_id,
+ rxrpc_is_service_call(call) ? "Svc" : "Clt",
+- atomic_read(&call->usage),
++ refcount_read(&call->ref),
+ rxrpc_call_states[call->state],
+ call->abort_code,
+ call->debug_id,
+@@ -189,7 +183,7 @@ print:
+ conn->service_id,
+ conn->proto.cid,
+ rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
+- atomic_read(&conn->usage),
++ refcount_read(&conn->ref),
+ rxrpc_conn_states[conn->state],
+ key_serial(conn->params.key),
+ atomic_read(&conn->serial),
+@@ -239,7 +233,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
+ " %3u %5u %6llus %8u %8u\n",
+ lbuff,
+ rbuff,
+- atomic_read(&peer->usage),
++ refcount_read(&peer->ref),
+ peer->cong_cwnd,
+ peer->mtu,
+ now - peer->last_tx_at,
+@@ -334,3 +328,72 @@ const struct seq_operations rxrpc_peer_seq_ops = {
+ .stop = rxrpc_peer_seq_stop,
+ .show = rxrpc_peer_seq_show,
+ };
++
++/*
++ * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals
++ */
++static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
++{
++ struct rxrpc_local *local;
++ char lbuff[50];
++
++ if (v == SEQ_START_TOKEN) {
++ seq_puts(seq,
++ "Proto Local "
++ " Use Act\n");
++ return 0;
++ }
++
++ local = hlist_entry(v, struct rxrpc_local, link);
++
++ sprintf(lbuff, "%pISpc", &local->srx.transport);
++
++ seq_printf(seq,
++ "UDP %-47.47s %3u %3u\n",
++ lbuff,
++ refcount_read(&local->ref),
++ atomic_read(&local->active_users));
++
++ return 0;
++}
++
++static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos)
++ __acquires(rcu)
++{
++ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
++ unsigned int n;
++
++ rcu_read_lock();
++
++ if (*_pos >= UINT_MAX)
++ return NULL;
++
++ n = *_pos;
++ if (n == 0)
++ return SEQ_START_TOKEN;
++
++ return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1);
++}
++
++static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
++{
++ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
++
++ if (*_pos >= UINT_MAX)
++ return NULL;
++
++ return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos);
++}
++
++static void rxrpc_local_seq_stop(struct seq_file *seq, void *v)
++ __releases(rcu)
++{
++ rcu_read_unlock();
++}
++
++const struct seq_operations rxrpc_local_seq_ops = {
++ .start = rxrpc_local_seq_start,
++ .next = rxrpc_local_seq_next,
++ .stop = rxrpc_local_seq_stop,
++ .show = rxrpc_local_seq_show,
++};
+diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
+index eca6dda26c77e..250f23bc1c076 100644
+--- a/net/rxrpc/recvmsg.c
++++ b/net/rxrpc/recvmsg.c
+@@ -260,11 +260,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
+ rxrpc_end_rx_phase(call, serial);
+ } else {
+ /* Check to see if there's an ACK that needs sending. */
+- if (after_eq(hard_ack, call->ackr_consumed + 2) ||
+- after_eq(top, call->ackr_seen + 2) ||
+- (hard_ack == top && after(hard_ack, call->ackr_consumed)))
+- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
+- true, true,
++ if (atomic_inc_return(&call->ackr_nr_consumed) > 2)
++ rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial,
++ true, false,
+ rxrpc_propose_ack_rotate_rx);
+ if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
+ rxrpc_send_ack_packet(call, false, NULL);
+diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
+index 4e565eeab4260..be61d6f5be8d1 100644
+--- a/net/rxrpc/rtt.c
++++ b/net/rxrpc/rtt.c
+@@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
+
+ static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
+ {
+- return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
++ return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+ }
+
+ static u32 rxrpc_bound_rto(u32 rto)
+diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
+index 08aab5c01437d..db47844f4ac99 100644
+--- a/net/rxrpc/rxkad.c
++++ b/net/rxrpc/rxkad.c
+@@ -540,7 +540,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
+ * directly into the target buffer.
+ */
+ sg = _sg;
+- nsg = skb_shinfo(skb)->nr_frags;
++ nsg = skb_shinfo(skb)->nr_frags + 1;
+ if (nsg <= 4) {
+ nsg = 4;
+ } else {
+diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
+index af8ad6c30b9fb..71e40f91dd398 100644
+--- a/net/rxrpc/sendmsg.c
++++ b/net/rxrpc/sendmsg.c
+@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+ return sock_intr_errno(*timeo);
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+- mutex_unlock(&call->user_mutex);
+ *timeo = schedule_timeout(*timeo);
+- if (mutex_lock_interruptible(&call->user_mutex) < 0)
+- return sock_intr_errno(*timeo);
+ }
+ }
+
+@@ -290,37 +287,48 @@ out:
+ static int rxrpc_send_data(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, size_t len,
+- rxrpc_notify_end_tx_t notify_end_tx)
++ rxrpc_notify_end_tx_t notify_end_tx,
++ bool *_dropped_lock)
+ {
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ struct sock *sk = &rx->sk;
++ enum rxrpc_call_state state;
+ long timeo;
+- bool more;
+- int ret, copied;
++ bool more = msg->msg_flags & MSG_MORE;
++ int ret, copied = 0;
+
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+ /* this should be in poll */
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
++reload:
++ ret = -EPIPE;
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+- return -EPIPE;
+-
+- more = msg->msg_flags & MSG_MORE;
+-
++ goto maybe_error;
++ state = READ_ONCE(call->state);
++ ret = -ESHUTDOWN;
++ if (state >= RXRPC_CALL_COMPLETE)
++ goto maybe_error;
++ ret = -EPROTO;
++ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
++ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
++ state != RXRPC_CALL_SERVER_SEND_REPLY)
++ goto maybe_error;
++
++ ret = -EMSGSIZE;
+ if (call->tx_total_len != -1) {
+- if (len > call->tx_total_len)
+- return -EMSGSIZE;
+- if (!more && len != call->tx_total_len)
+- return -EMSGSIZE;
++ if (len - copied > call->tx_total_len)
++ goto maybe_error;
++ if (!more && len - copied != call->tx_total_len)
++ goto maybe_error;
+ }
+
+ skb = call->tx_pending;
+ call->tx_pending = NULL;
+ rxrpc_see_skb(skb, rxrpc_skb_seen);
+
+- copied = 0;
+ do {
+ /* Check to see if there's a ping ACK to reply to. */
+ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
+@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
+
+ _debug("alloc");
+
+- if (!rxrpc_check_tx_space(call, NULL)) {
+- ret = -EAGAIN;
+- if (msg->msg_flags & MSG_DONTWAIT)
+- goto maybe_error;
+- ret = rxrpc_wait_for_tx_window(rx, call,
+- &timeo,
+- msg->msg_flags & MSG_WAITALL);
+- if (ret < 0)
+- goto maybe_error;
+- }
++ if (!rxrpc_check_tx_space(call, NULL))
++ goto wait_for_space;
+
+ /* Work out the maximum size of a packet. Assume that
+ * the security header is going to be in the padded
+@@ -444,6 +444,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
+
+ success:
+ ret = copied;
++ if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
++ read_lock_bh(&call->state_lock);
++ if (call->error < 0)
++ ret = call->error;
++ read_unlock_bh(&call->state_lock);
++ }
+ out:
+ call->tx_pending = skb;
+ _leave(" = %d", ret);
+@@ -462,6 +468,27 @@ maybe_error:
+ efault:
+ ret = -EFAULT;
+ goto out;
++
++wait_for_space:
++ ret = -EAGAIN;
++ if (msg->msg_flags & MSG_DONTWAIT)
++ goto maybe_error;
++ mutex_unlock(&call->user_mutex);
++ *_dropped_lock = true;
++ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
++ msg->msg_flags & MSG_WAITALL);
++ if (ret < 0)
++ goto maybe_error;
++ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
++ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
++ ret = sock_intr_errno(timeo);
++ goto maybe_error;
++ }
++ } else {
++ mutex_lock(&call->user_mutex);
++ }
++ *_dropped_lock = false;
++ goto reload;
+ }
+
+ /*
+@@ -623,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+ enum rxrpc_call_state state;
+ struct rxrpc_call *call;
+ unsigned long now, j;
++ bool dropped_lock = false;
+ int ret;
+
+ struct rxrpc_send_params p = {
+@@ -688,7 +716,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+ if (call->tx_total_len != -1 ||
+ call->tx_pending ||
+ call->tx_top != 0)
+- goto error_put;
++ goto out_put_unlock;
+ call->tx_total_len = p.call.tx_total_len;
+ }
+ }
+@@ -708,7 +736,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+ fallthrough;
+ case 1:
+ if (p.call.timeouts.hard > 0) {
+- j = msecs_to_jiffies(p.call.timeouts.hard);
++ j = p.call.timeouts.hard * HZ;
+ now = jiffies;
+ j += now;
+ WRITE_ONCE(call->expect_term_by, j);
+@@ -731,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+ ret = rxrpc_send_abort_packet(call);
+ } else if (p.command != RXRPC_CMD_SEND_DATA) {
+ ret = -EINVAL;
+- } else if (rxrpc_is_client_call(call) &&
+- state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+- /* request phase complete for this client call */
+- ret = -EPROTO;
+- } else if (rxrpc_is_service_call(call) &&
+- state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+- state != RXRPC_CALL_SERVER_SEND_REPLY) {
+- /* Reply phase not begun or not complete for service call. */
+- ret = -EPROTO;
+ } else {
+- ret = rxrpc_send_data(rx, call, msg, len, NULL);
++ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
+ }
+
+ out_put_unlock:
+- mutex_unlock(&call->user_mutex);
++ if (!dropped_lock)
++ mutex_unlock(&call->user_mutex);
+ error_put:
+ rxrpc_put_call(call, rxrpc_call_put);
+ _leave(" = %d", ret);
+@@ -773,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+ struct msghdr *msg, size_t len,
+ rxrpc_notify_end_tx_t notify_end_tx)
+ {
++ bool dropped_lock = false;
+ int ret;
+
+ _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+@@ -790,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+- notify_end_tx);
++ notify_end_tx, &dropped_lock);
+ break;
+ case RXRPC_CALL_COMPLETE:
+ read_lock_bh(&call->state_lock);
+@@ -804,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+ break;
+ }
+
+- mutex_unlock(&call->user_mutex);
++ if (!dropped_lock)
++ mutex_unlock(&call->user_mutex);
+ _leave(" = %d", ret);
+ return ret;
+ }
+diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
+index ead3471307ee5..ee269e0e6ee87 100644
+--- a/net/rxrpc/server_key.c
++++ b/net/rxrpc/server_key.c
+@@ -84,6 +84,9 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
+
+ prep->payload.data[1] = (struct rxrpc_security *)sec;
+
++ if (!sec->preparse_server_key)
++ return -EINVAL;
++
+ return sec->preparse_server_key(prep);
+ }
+
+@@ -91,7 +94,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
+ {
+ const struct rxrpc_security *sec = prep->payload.data[1];
+
+- if (sec)
++ if (sec && sec->free_preparse_server_key)
+ sec->free_preparse_server_key(prep);
+ }
+
+@@ -99,7 +102,7 @@ static void rxrpc_destroy_s(struct key *key)
+ {
+ const struct rxrpc_security *sec = key->payload.data[1];
+
+- if (sec)
++ if (sec && sec->destroy_server_key)
+ sec->destroy_server_key(key);
+ }
+
+diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
+index 0348d2bf6f7d8..580a5acffee71 100644
+--- a/net/rxrpc/skbuff.c
++++ b/net/rxrpc/skbuff.c
+@@ -71,7 +71,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+ const void *here = __builtin_return_address(0);
+ if (skb) {
+ int n;
+- CHECK_SLAB_OKAY(&skb->users);
+ n = atomic_dec_return(select_skb_count(skb));
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+ rxrpc_skb(skb)->rx_flags, here);
+diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
+index 540351d6a5f47..555e0910786bc 100644
+--- a/net/rxrpc/sysctl.c
++++ b/net/rxrpc/sysctl.c
+@@ -12,7 +12,7 @@
+
+ static struct ctl_table_header *rxrpc_sysctl_reg_table;
+ static const unsigned int four = 4;
+-static const unsigned int thirtytwo = 32;
++static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
+ static const unsigned int n_65535 = 65535;
+ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
+ static const unsigned long one_jiffy = 1;
+@@ -89,7 +89,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&four,
+- .extra2 = (void *)&thirtytwo,
++ .extra2 = (void *)&max_backlog,
+ },
+ {
+ .procname = "rx_window_size",
+diff --git a/net/sched/Kconfig b/net/sched/Kconfig
+index 1e8ab4749c6c3..bcdd6e925343f 100644
+--- a/net/sched/Kconfig
++++ b/net/sched/Kconfig
+@@ -503,17 +503,6 @@ config NET_CLS_BASIC
+ To compile this code as a module, choose M here: the
+ module will be called cls_basic.
+
+-config NET_CLS_TCINDEX
+- tristate "Traffic-Control Index (TCINDEX)"
+- select NET_CLS
+- help
+- Say Y here if you want to be able to classify packets based on
+- traffic control indices. You will want this feature if you want
+- to implement Differentiated Services together with DSMARK.
+-
+- To compile this code as a module, choose M here: the
+- module will be called cls_tcindex.
+-
+ config NET_CLS_ROUTE4
+ tristate "Routing decision (ROUTE)"
+ depends on INET
+@@ -976,7 +965,7 @@ config NET_ACT_TUNNEL_KEY
+
+ config NET_ACT_CT
+ tristate "connection tracking tc action"
+- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
++ depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE
+ help
+ Say Y here to allow sending the packets to conntrack module.
+
+diff --git a/net/sched/Makefile b/net/sched/Makefile
+index dd14ef413fdad..b7dbac5c519f6 100644
+--- a/net/sched/Makefile
++++ b/net/sched/Makefile
+@@ -70,7 +70,6 @@ obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
+ obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
+ obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
+ obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
+-obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
+ obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
+ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
+ obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
+diff --git a/net/sched/act_api.c b/net/sched/act_api.c
+index 7dd3a2dc5fa40..d775676956bf9 100644
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -350,7 +350,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p)
+ }
+
+ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
+- const struct tc_action_ops *ops)
++ const struct tc_action_ops *ops,
++ struct netlink_ext_ack *extack)
+ {
+ struct nlattr *nest;
+ int n_i = 0;
+@@ -366,20 +367,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
+ if (nla_put_string(skb, TCA_KIND, ops->kind))
+ goto nla_put_failure;
+
++ ret = 0;
+ mutex_lock(&idrinfo->lock);
+ idr_for_each_entry_ul(idr, p, tmp, id) {
+ if (IS_ERR(p))
+ continue;
+ ret = tcf_idr_release_unsafe(p);
+- if (ret == ACT_P_DELETED) {
++ if (ret == ACT_P_DELETED)
+ module_put(ops->owner);
+- n_i++;
+- } else if (ret < 0) {
+- mutex_unlock(&idrinfo->lock);
+- goto nla_put_failure;
+- }
++ else if (ret < 0)
++ break;
++ n_i++;
+ }
+ mutex_unlock(&idrinfo->lock);
++ if (ret < 0) {
++ if (n_i)
++ NL_SET_ERR_MSG(extack, "Unable to flush all TC actions");
++ else
++ goto nla_put_failure;
++ }
+
+ ret = nla_put_u32(skb, TCA_FCNT, n_i);
+ if (ret)
+@@ -400,7 +406,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
+
+ if (type == RTM_DELACTION) {
+- return tcf_del_walker(idrinfo, skb, ops);
++ return tcf_del_walker(idrinfo, skb, ops, extack);
+ } else if (type == RTM_GETACTION) {
+ return tcf_dump_walker(idrinfo, skb, cb);
+ } else {
+@@ -728,15 +734,24 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+ restart_act_graph:
+ for (i = 0; i < nr_actions; i++) {
+ const struct tc_action *a = actions[i];
++ int repeat_ttl;
+
+ if (jmp_prgcnt > 0) {
+ jmp_prgcnt -= 1;
+ continue;
+ }
++
++ repeat_ttl = 32;
+ repeat:
+ ret = a->ops->act(skb, a, res);
+- if (ret == TC_ACT_REPEAT)
+- goto repeat; /* we need a ttl - JHS */
++
++ if (unlikely(ret == TC_ACT_REPEAT)) {
++ if (--repeat_ttl != 0)
++ goto repeat;
++ /* suspicious opcode, stop pipeline */
++ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
++ return TC_ACT_OK;
++ }
+
+ if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
+ jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
+diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
+index 5c36013339e11..2a05bad56ef3e 100644
+--- a/net/sched/act_bpf.c
++++ b/net/sched/act_bpf.c
+@@ -305,7 +305,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+ ret = tcf_idr_check_alloc(tn, &index, act, bind);
+ if (!ret) {
+ ret = tcf_idr_create(tn, index, est, act,
+- &act_bpf_ops, bind, true, 0);
++ &act_bpf_ops, bind, true, flags);
+ if (ret < 0) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
+index 94e78ac7a7487..0deb4e96a6c2e 100644
+--- a/net/sched/act_connmark.c
++++ b/net/sched/act_connmark.c
+@@ -62,7 +62,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
+
+ c = nf_ct_get(skb, &ctinfo);
+ if (c) {
+- skb->mark = c->mark;
++ skb->mark = READ_ONCE(c->mark);
+ /* using overlimits stats to count how many packets marked */
+ ca->tcf_qstats.overlimits++;
+ goto out;
+@@ -82,7 +82,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
+ c = nf_ct_tuplehash_to_ctrack(thash);
+ /* using overlimits stats to count how many packets marked */
+ ca->tcf_qstats.overlimits++;
+- skb->mark = c->mark;
++ skb->mark = READ_ONCE(c->mark);
+ nf_ct_put(c);
+
+ out:
+@@ -124,7 +124,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
+ ret = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (!ret) {
+ ret = tcf_idr_create(tn, index, est, a,
+- &act_connmark_ops, bind, false, 0);
++ &act_connmark_ops, bind, false, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index 90866ae45573a..81a2d6cbfb441 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -177,7 +177,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
+ entry = tcf_ct_flow_table_flow_action_get_next(action);
+ entry->id = FLOW_ACTION_CT_METADATA;
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+- entry->ct_metadata.mark = ct->mark;
++ entry->ct_metadata.mark = READ_ONCE(ct->mark);
+ #endif
+ ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
+ IP_CT_ESTABLISHED_REPLY;
+@@ -516,11 +516,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
+ struct nf_conn *ct;
+ u8 dir;
+
+- /* Previously seen or loopback */
+- ct = nf_ct_get(skb, &ctinfo);
+- if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
+- return false;
+-
+ switch (family) {
+ case NFPROTO_IPV4:
+ if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
+@@ -588,22 +583,25 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
+ if (!ct)
+ return false;
+ if (!net_eq(net, read_pnet(&ct->ct_net)))
+- return false;
++ goto drop_ct;
+ if (nf_ct_zone(ct)->id != zone_id)
+- return false;
++ goto drop_ct;
+
+ /* Force conntrack entry direction. */
+ if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
+ if (nf_ct_is_confirmed(ct))
+ nf_ct_kill(ct);
+
+- nf_conntrack_put(&ct->ct_general);
+- nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+-
+- return false;
++ goto drop_ct;
+ }
+
+ return true;
++
++drop_ct:
++ nf_ct_put(ct);
++ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
++
++ return false;
+ }
+
+ /* Trim the skb to the length specified by the IP/IPv6 header,
+@@ -690,10 +688,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+ u8 family, u16 zone, bool *defrag)
+ {
+ enum ip_conntrack_info ctinfo;
+- struct qdisc_skb_cb cb;
+ struct nf_conn *ct;
+ int err = 0;
+ bool frag;
++ u16 mru;
+
+ /* Previously seen (loopback)? Ignore. */
+ ct = nf_ct_get(skb, &ctinfo);
+@@ -708,7 +706,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+ return err;
+
+ skb_get(skb);
+- cb = *qdisc_skb_cb(skb);
++ mru = tc_skb_cb(skb)->mru;
+
+ if (family == NFPROTO_IPV4) {
+ enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
+@@ -722,7 +720,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+
+ if (!err) {
+ *defrag = true;
+- cb.mru = IPCB(skb)->frag_max_size;
++ mru = IPCB(skb)->frag_max_size;
+ }
+ } else { /* NFPROTO_IPV6 */
+ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+@@ -735,7 +733,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+
+ if (!err) {
+ *defrag = true;
+- cb.mru = IP6CB(skb)->frag_max_size;
++ mru = IP6CB(skb)->frag_max_size;
+ }
+ #else
+ err = -EOPNOTSUPP;
+@@ -744,7 +742,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+ }
+
+ if (err != -EINPROGRESS)
+- *qdisc_skb_cb(skb) = cb;
++ tc_skb_cb(skb)->mru = mru;
+ skb_clear_hash(skb);
+ skb->ignore_df = 1;
+ return err;
+@@ -762,7 +760,7 @@ static void tcf_ct_params_free(struct rcu_head *head)
+ tcf_ct_flow_table_put(params);
+
+ if (params->tmpl)
+- nf_conntrack_put(&params->tmpl->ct_general);
++ nf_ct_put(params->tmpl);
+ kfree(params);
+ }
+
+@@ -839,6 +837,12 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
+ }
+
+ err = nf_nat_packet(ct, ctinfo, hooknum, skb);
++ if (err == NF_ACCEPT) {
++ if (maniptype == NF_NAT_MANIP_SRC)
++ tc_skb_cb(skb)->post_ct_snat = 1;
++ if (maniptype == NF_NAT_MANIP_DST)
++ tc_skb_cb(skb)->post_ct_dnat = 1;
++ }
+ out:
+ return err;
+ }
+@@ -852,9 +856,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
+ if (!mask)
+ return;
+
+- new_mark = mark | (ct->mark & ~(mask));
+- if (ct->mark != new_mark) {
+- ct->mark = new_mark;
++ new_mark = mark | (READ_ONCE(ct->mark) & ~(mask));
++ if (READ_ONCE(ct->mark) != new_mark) {
++ WRITE_ONCE(ct->mark, new_mark);
+ if (nf_ct_is_confirmed(ct))
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ }
+@@ -963,10 +967,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+ tcf_action_update_bstats(&c->common, skb);
+
+ if (clear) {
+- qdisc_skb_cb(skb)->post_ct = false;
++ tc_skb_cb(skb)->post_ct = false;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+- nf_conntrack_put(&ct->ct_general);
++ nf_ct_put(ct);
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+ }
+
+@@ -1048,7 +1052,8 @@ do_nat:
+ out_push:
+ skb_push_rcsum(skb, nh_ofs);
+
+- qdisc_skb_cb(skb)->post_ct = true;
++ tc_skb_cb(skb)->post_ct = true;
++ tc_skb_cb(skb)->zone = p->zone;
+ out_clear:
+ if (defrag)
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+@@ -1227,7 +1232,6 @@ static int tcf_ct_fill_params(struct net *net,
+ return -ENOMEM;
+ }
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+- nf_conntrack_get(&tmpl->ct_general);
+ p->tmpl = tmpl;
+
+ return 0;
+@@ -1302,7 +1306,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
+
+ err = tcf_ct_flow_table_get(params);
+ if (err)
+- goto cleanup;
++ goto cleanup_params;
+
+ spin_lock_bh(&c->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+@@ -1317,6 +1321,9 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
+
+ return res;
+
++cleanup_params:
++ if (params->tmpl)
++ nf_ct_put(params->tmpl);
+ cleanup:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
+index 549374a2d0086..56e0a5eb64942 100644
+--- a/net/sched/act_ctinfo.c
++++ b/net/sched/act_ctinfo.c
+@@ -33,7 +33,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+ {
+ u8 dscp, newdscp;
+
+- newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
++ newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
+ ~INET_ECN_MASK;
+
+ switch (proto) {
+@@ -73,7 +73,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+ struct sk_buff *skb)
+ {
+ ca->stats_cpmark_set++;
+- skb->mark = ct->mark & cp->cpmarkmask;
++ skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
+ }
+
+ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
+@@ -92,7 +92,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
+ cp = rcu_dereference_bh(ca->params);
+
+ tcf_lastuse_update(&ca->tcf_tm);
+- bstats_update(&ca->tcf_bstats, skb);
++ tcf_action_update_bstats(&ca->common, skb);
+ action = READ_ONCE(ca->tcf_action);
+
+ wlen = skb_network_offset(skb);
+@@ -131,7 +131,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
+ }
+
+ if (cp->mode & CTINFO_MODE_DSCP)
+- if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
++ if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask))
+ tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
+
+ if (cp->mode & CTINFO_MODE_CPMARK)
+@@ -211,8 +211,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
+ index = actparm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (!err) {
+- ret = tcf_idr_create(tn, index, est, a,
+- &act_ctinfo_ops, bind, false, 0);
++ ret = tcf_idr_create_from_flags(tn, index, est, a,
++ &act_ctinfo_ops, bind, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
+index 7df72a4197a3f..ac985c53ebafe 100644
+--- a/net/sched/act_gate.c
++++ b/net/sched/act_gate.c
+@@ -357,7 +357,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
+
+ if (!err) {
+ ret = tcf_idr_create(tn, index, est, a,
+- &act_gate_ops, bind, false, 0);
++ &act_gate_ops, bind, false, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
+index 7064a365a1a98..ec987ec758070 100644
+--- a/net/sched/act_ife.c
++++ b/net/sched/act_ife.c
+@@ -553,7 +553,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, est, a, &act_ife_ops,
+- bind, true, 0);
++ bind, true, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ kfree(p);
+diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
+index 265b1443e252f..ebd403f571ea5 100644
+--- a/net/sched/act_ipt.c
++++ b/net/sched/act_ipt.c
+@@ -50,7 +50,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
+ par.entryinfo = &e;
+ par.target = target;
+ par.targinfo = t->data;
+- par.hook_mask = hook;
++ par.hook_mask = 1 << hook;
+ par.family = NFPROTO_IPV4;
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+@@ -87,7 +87,8 @@ static void tcf_ipt_release(struct tc_action *a)
+
+ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
+ [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
+- [TCA_IPT_HOOK] = { .type = NLA_U32 },
++ [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING,
++ NF_INET_NUMHOOKS),
+ [TCA_IPT_INDEX] = { .type = NLA_U32 },
+ [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
+ };
+@@ -145,7 +146,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, est, a, ops, bind,
+- false, 0);
++ false, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+@@ -160,15 +161,27 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
+ return -EEXIST;
+ }
+ }
++
++ err = -EINVAL;
+ hook = nla_get_u32(tb[TCA_IPT_HOOK]);
++ switch (hook) {
++ case NF_INET_PRE_ROUTING:
++ break;
++ case NF_INET_POST_ROUTING:
++ break;
++ default:
++ goto err1;
++ }
++
++ if (tb[TCA_IPT_TABLE]) {
++ /* mangle only for now */
++ if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle"))
++ goto err1;
++ }
+
+- err = -ENOMEM;
+- tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
++ tname = kstrdup("mangle", GFP_KERNEL);
+ if (unlikely(!tname))
+ goto err1;
+- if (tb[TCA_IPT_TABLE] == NULL ||
+- nla_strscpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ)
+- strcpy(tname, "mangle");
+
+ t = kmemdup(td, td->u.target_size, GFP_KERNEL);
+ if (unlikely(!t))
+diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
+index d64b0eeccbe4d..97cd4b2377d69 100644
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -19,6 +19,7 @@
+ #include <linux/if_arp.h>
+ #include <net/net_namespace.h>
+ #include <net/netlink.h>
++#include <net/dst.h>
+ #include <net/pkt_sched.h>
+ #include <net/pkt_cls.h>
+ #include <linux/tc_act/tc_mirred.h>
+@@ -27,8 +28,8 @@
+ static LIST_HEAD(mirred_list);
+ static DEFINE_SPINLOCK(mirred_list_lock);
+
+-#define MIRRED_RECURSION_LIMIT 4
+-static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
++#define MIRRED_NEST_LIMIT 4
++static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
+
+ static bool tcf_mirred_is_act_redirect(int action)
+ {
+@@ -203,12 +204,19 @@ release_idr:
+ return err;
+ }
+
++static bool is_mirred_nested(void)
++{
++ return unlikely(__this_cpu_read(mirred_nest_level) > 1);
++}
++
+ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+ {
+ int err;
+
+ if (!want_ingress)
+ err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
++ else if (is_mirred_nested())
++ err = netif_rx(skb);
+ else
+ err = netif_receive_skb(skb);
+
+@@ -222,21 +230,22 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
+ struct sk_buff *skb2 = skb;
+ bool m_mac_header_xmit;
+ struct net_device *dev;
+- unsigned int rec_level;
++ unsigned int nest_level;
+ int retval, err = 0;
+ bool use_reinsert;
+ bool want_ingress;
+ bool is_redirect;
+ bool expects_nh;
++ bool at_ingress;
+ int m_eaction;
+ int mac_len;
+ bool at_nh;
+
+- rec_level = __this_cpu_inc_return(mirred_rec_level);
+- if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
++ nest_level = __this_cpu_inc_return(mirred_nest_level);
++ if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
+ net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+- __this_cpu_dec(mirred_rec_level);
++ __this_cpu_dec(mirred_nest_level);
+ return TC_ACT_SHOT;
+ }
+
+@@ -252,7 +261,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
+ goto out;
+ }
+
+- if (unlikely(!(dev->flags & IFF_UP))) {
++ if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
+ net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
+ dev->name);
+ goto out;
+@@ -263,7 +272,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
+ * ingress - that covers the TC S/W datapath.
+ */
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
+- use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
++ at_ingress = skb_at_tc_ingress(skb);
++ use_reinsert = at_ingress && is_redirect &&
+ tcf_mirred_can_reinsert(retval);
+ if (!use_reinsert) {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+@@ -271,10 +281,12 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
+ goto out;
+ }
+
++ want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
++
+ /* All mirred/redirected skbs should clear previous ct info */
+ nf_reset_ct(skb2);
+-
+- want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
++ if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
++ skb_dst_drop(skb2);
+
+ expects_nh = want_ingress || !m_mac_header_xmit;
+ at_nh = skb->data == skb_network_header(skb);
+@@ -303,7 +315,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
+ err = tcf_mirred_forward(res->ingress, skb);
+ if (err)
+ tcf_action_inc_overlimit_qstats(&m->common);
+- __this_cpu_dec(mirred_rec_level);
++ __this_cpu_dec(mirred_nest_level);
+ return TC_ACT_CONSUMED;
+ }
+ }
+@@ -315,7 +327,7 @@ out:
+ if (tcf_mirred_is_act_redirect(m_eaction))
+ retval = TC_ACT_SHOT;
+ }
+- __this_cpu_dec(mirred_rec_level);
++ __this_cpu_dec(mirred_nest_level);
+
+ return retval;
+ }
+diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
+index e4529b428cf44..d010c5b8e83b1 100644
+--- a/net/sched/act_mpls.c
++++ b/net/sched/act_mpls.c
+@@ -133,6 +133,11 @@ static int valid_label(const struct nlattr *attr,
+ {
+ const u32 *label = nla_data(attr);
+
++ if (nla_len(attr) != sizeof(*label)) {
++ NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length");
++ return -EINVAL;
++ }
++
+ if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) {
+ NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range");
+ return -EINVAL;
+@@ -144,7 +149,8 @@ static int valid_label(const struct nlattr *attr,
+ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
+ [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
+ [TCA_MPLS_PROTO] = { .type = NLA_U16 },
+- [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
++ [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
++ valid_label),
+ [TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7),
+ [TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
+ [TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
+@@ -183,40 +189,67 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
+ parm = nla_data(tb[TCA_MPLS_PARMS]);
+ index = parm->index;
+
++ err = tcf_idr_check_alloc(tn, &index, a, bind);
++ if (err < 0)
++ return err;
++ exists = err;
++ if (exists && bind)
++ return 0;
++
++ if (!exists) {
++ ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind,
++ true, flags);
++ if (ret) {
++ tcf_idr_cleanup(tn, index);
++ return ret;
++ }
++
++ ret = ACT_P_CREATED;
++ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
++ tcf_idr_release(*a, bind);
++ return -EEXIST;
++ }
++
+ /* Verify parameters against action type. */
+ switch (parm->m_action) {
+ case TCA_MPLS_ACT_POP:
+ if (!tb[TCA_MPLS_PROTO]) {
+ NL_SET_ERR_MSG_MOD(extack, "Protocol must be set for MPLS pop");
+- return -EINVAL;
++ err = -EINVAL;
++ goto release_idr;
+ }
+ if (!eth_proto_is_802_3(nla_get_be16(tb[TCA_MPLS_PROTO]))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid protocol type for MPLS pop");
+- return -EINVAL;
++ err = -EINVAL;
++ goto release_idr;
+ }
+ if (tb[TCA_MPLS_LABEL] || tb[TCA_MPLS_TTL] || tb[TCA_MPLS_TC] ||
+ tb[TCA_MPLS_BOS]) {
+ NL_SET_ERR_MSG_MOD(extack, "Label, TTL, TC or BOS cannot be used with MPLS pop");
+- return -EINVAL;
++ err = -EINVAL;
++ goto release_idr;
+ }
+ break;
+ case TCA_MPLS_ACT_DEC_TTL:
+ if (tb[TCA_MPLS_PROTO] || tb[TCA_MPLS_LABEL] ||
+ tb[TCA_MPLS_TTL] || tb[TCA_MPLS_TC] || tb[TCA_MPLS_BOS]) {
+ NL_SET_ERR_MSG_MOD(extack, "Label, TTL, TC, BOS or protocol cannot be used with MPLS dec_ttl");
+- return -EINVAL;
++ err = -EINVAL;
++ goto release_idr;
+ }
+ break;
+ case TCA_MPLS_ACT_PUSH:
+ case TCA_MPLS_ACT_MAC_PUSH:
+ if (!tb[TCA_MPLS_LABEL]) {
+ NL_SET_ERR_MSG_MOD(extack, "Label is required for MPLS push");
+- return -EINVAL;
++ err = -EINVAL;
++ goto release_idr;
+ }
+ if (tb[TCA_MPLS_PROTO] &&
+ !eth_p_mpls(nla_get_be16(tb[TCA_MPLS_PROTO]))) {
+ NL_SET_ERR_MSG_MOD(extack, "Protocol must be an MPLS type for MPLS push");
+- return -EPROTONOSUPPORT;
++ err = -EPROTONOSUPPORT;
++ goto release_idr;
+ }
+ /* Push needs a TTL - if not specified, set a default value. */
+ if (!tb[TCA_MPLS_TTL]) {
+@@ -231,33 +264,14 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
+ case TCA_MPLS_ACT_MODIFY:
+ if (tb[TCA_MPLS_PROTO]) {
+ NL_SET_ERR_MSG_MOD(extack, "Protocol cannot be used with MPLS modify");
+- return -EINVAL;
++ err = -EINVAL;
++ goto release_idr;
+ }
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unknown MPLS action");
+- return -EINVAL;
+- }
+-
+- err = tcf_idr_check_alloc(tn, &index, a, bind);
+- if (err < 0)
+- return err;
+- exists = err;
+- if (exists && bind)
+- return 0;
+-
+- if (!exists) {
+- ret = tcf_idr_create(tn, index, est, a,
+- &act_mpls_ops, bind, true, 0);
+- if (ret) {
+- tcf_idr_cleanup(tn, index);
+- return ret;
+- }
+-
+- ret = ACT_P_CREATED;
+- } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+- tcf_idr_release(*a, bind);
+- return -EEXIST;
++ err = -EINVAL;
++ goto release_idr;
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
+index 7dd6b586ba7f6..2a39b3729e844 100644
+--- a/net/sched/act_nat.c
++++ b/net/sched/act_nat.c
+@@ -61,7 +61,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (!err) {
+ ret = tcf_idr_create(tn, index, est, a,
+- &act_nat_ops, bind, false, 0);
++ &act_nat_ops, bind, false, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
+index c6c862c459cc3..df9ff123a7eec 100644
+--- a/net/sched/act_pedit.c
++++ b/net/sched/act_pedit.c
+@@ -13,7 +13,10 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
+ #include <linux/slab.h>
++#include <net/ipv6.h>
+ #include <net/netlink.h>
+ #include <net/pkt_sched.h>
+ #include <linux/tc_act/tc_pedit.h>
+@@ -26,6 +29,7 @@ static struct tc_action_ops act_pedit_ops;
+
+ static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
+ [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
++ [TCA_PEDIT_PARMS_EX] = { .len = sizeof(struct tc_pedit) },
+ [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED },
+ };
+
+@@ -134,6 +138,17 @@ nla_failure:
+ return -EINVAL;
+ }
+
++static void tcf_pedit_cleanup_rcu(struct rcu_head *head)
++{
++ struct tcf_pedit_parms *parms =
++ container_of(head, struct tcf_pedit_parms, rcu);
++
++ kfree(parms->tcfp_keys_ex);
++ kfree(parms->tcfp_keys);
++
++ kfree(parms);
++}
++
+ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp, u32 flags,
+@@ -141,15 +156,14 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
+ {
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
+- struct nlattr *tb[TCA_PEDIT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
+- struct tc_pedit_key *keys = NULL;
+- struct tcf_pedit_key_ex *keys_ex;
++ struct tcf_pedit_parms *oparms, *nparms;
++ struct nlattr *tb[TCA_PEDIT_MAX + 1];
+ struct tc_pedit *parm;
+ struct nlattr *pattr;
+ struct tcf_pedit *p;
+ int ret = 0, err;
+- int ksize;
++ int i, ksize;
+ u32 index;
+
+ if (!nla) {
+@@ -171,93 +185,125 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
+ }
+
+ parm = nla_data(pattr);
+- if (!parm->nkeys) {
+- NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+- return -EINVAL;
+- }
+- ksize = parm->nkeys * sizeof(struct tc_pedit_key);
+- if (nla_len(pattr) < sizeof(*parm) + ksize) {
+- NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
+- return -EINVAL;
+- }
+-
+- keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
+- if (IS_ERR(keys_ex))
+- return PTR_ERR(keys_ex);
+
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (!err) {
+- ret = tcf_idr_create(tn, index, est, a,
+- &act_pedit_ops, bind, false, 0);
++ ret = tcf_idr_create_from_flags(tn, index, est, a,
++ &act_pedit_ops, bind, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+- goto out_free;
++ return ret;
+ }
+ ret = ACT_P_CREATED;
+ } else if (err > 0) {
+ if (bind)
+- goto out_free;
++ return 0;
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ ret = -EEXIST;
+ goto out_release;
+ }
+ } else {
+- ret = err;
++ return err;
++ }
++
++ if (!parm->nkeys) {
++ NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
++ ret = -EINVAL;
++ goto out_release;
++ }
++ ksize = parm->nkeys * sizeof(struct tc_pedit_key);
++ if (nla_len(pattr) < sizeof(*parm) + ksize) {
++ NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
++ ret = -EINVAL;
++ goto out_release;
++ }
++
++ nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
++ if (!nparms) {
++ ret = -ENOMEM;
++ goto out_release;
++ }
++
++ nparms->tcfp_keys_ex =
++ tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
++ if (IS_ERR(nparms->tcfp_keys_ex)) {
++ ret = PTR_ERR(nparms->tcfp_keys_ex);
+ goto out_free;
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0) {
+ ret = err;
+- goto out_release;
++ goto out_free_ex;
+ }
+- p = to_pedit(*a);
+- spin_lock_bh(&p->tcf_lock);
+
+- if (ret == ACT_P_CREATED ||
+- (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
+- keys = kmalloc(ksize, GFP_ATOMIC);
+- if (!keys) {
+- spin_unlock_bh(&p->tcf_lock);
+- ret = -ENOMEM;
+- goto put_chain;
+- }
+- kfree(p->tcfp_keys);
+- p->tcfp_keys = keys;
+- p->tcfp_nkeys = parm->nkeys;
++ nparms->tcfp_off_max_hint = 0;
++ nparms->tcfp_flags = parm->flags;
++ nparms->tcfp_nkeys = parm->nkeys;
++
++ nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL);
++ if (!nparms->tcfp_keys) {
++ ret = -ENOMEM;
++ goto put_chain;
+ }
+- memcpy(p->tcfp_keys, parm->keys, ksize);
+
+- p->tcfp_flags = parm->flags;
+- goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
++ memcpy(nparms->tcfp_keys, parm->keys, ksize);
++
++ for (i = 0; i < nparms->tcfp_nkeys; ++i) {
++ u32 cur = nparms->tcfp_keys[i].off;
+
+- kfree(p->tcfp_keys_ex);
+- p->tcfp_keys_ex = keys_ex;
++ /* sanitize the shift value for any later use */
++ nparms->tcfp_keys[i].shift = min_t(size_t,
++ BITS_PER_TYPE(int) - 1,
++ nparms->tcfp_keys[i].shift);
++
++ /* The AT option can read a single byte, we can bound the actual
++ * value with uchar max.
++ */
++ cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
++
++ /* Each key touches 4 bytes starting from the computed offset */
++ nparms->tcfp_off_max_hint =
++ max(nparms->tcfp_off_max_hint, cur + 4);
++ }
+
++ p = to_pedit(*a);
++
++ spin_lock_bh(&p->tcf_lock);
++ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
++ oparms = rcu_replace_pointer(p->parms, nparms, 1);
+ spin_unlock_bh(&p->tcf_lock);
++
++ if (oparms)
++ call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);
++
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
++
+ return ret;
+
+ put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
++out_free_ex:
++ kfree(nparms->tcfp_keys_ex);
++out_free:
++ kfree(nparms);
+ out_release:
+ tcf_idr_release(*a, bind);
+-out_free:
+- kfree(keys_ex);
+ return ret;
+-
+ }
+
+ static void tcf_pedit_cleanup(struct tc_action *a)
+ {
+ struct tcf_pedit *p = to_pedit(a);
+- struct tc_pedit_key *keys = p->tcfp_keys;
++ struct tcf_pedit_parms *parms;
++
++ parms = rcu_dereference_protected(p->parms, 1);
+
+- kfree(keys);
+- kfree(p->tcfp_keys_ex);
++ if (parms)
++ call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
+ }
+
+ static bool offset_valid(struct sk_buff *skb, int offset)
+@@ -271,11 +317,35 @@ static bool offset_valid(struct sk_buff *skb, int offset)
+ return true;
+ }
+
+-static int pedit_skb_hdr_offset(struct sk_buff *skb,
+- enum pedit_header_type htype, int *hoffset)
++static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type)
+ {
++ const int noff = skb_network_offset(skb);
+ int ret = -EINVAL;
++ struct iphdr _iph;
+
++ switch (skb->protocol) {
++ case htons(ETH_P_IP): {
++ const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph);
++
++ if (!iph)
++ goto out;
++ *hoffset = noff + iph->ihl * 4;
++ ret = 0;
++ break;
++ }
++ case htons(ETH_P_IPV6):
++ ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL;
++ break;
++ }
++out:
++ return ret;
++}
++
++static int pedit_skb_hdr_offset(struct sk_buff *skb,
++ enum pedit_header_type htype, int *hoffset)
++{
++ int ret = -EINVAL;
++ /* 'htype' is validated in the netlink parsing */
+ switch (htype) {
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+ if (skb_mac_header_was_set(skb)) {
+@@ -290,120 +360,120 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb,
+ ret = 0;
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
++ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP);
++ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+- if (skb_transport_header_was_set(skb)) {
+- *hoffset = skb_transport_offset(skb);
+- ret = 0;
+- }
++ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP);
+ break;
+ default:
+- ret = -EINVAL;
+ break;
+ }
+-
+ return ret;
+ }
+
+ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+ {
++ enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
++ enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ struct tcf_pedit *p = to_pedit(a);
++ struct tcf_pedit_key_ex *tkey_ex;
++ struct tcf_pedit_parms *parms;
++ struct tc_pedit_key *tkey;
++ u32 max_offset;
+ int i;
+
+- if (skb_unclone(skb, GFP_ATOMIC))
+- return p->tcf_action;
++ parms = rcu_dereference_bh(p->parms);
+
+- spin_lock(&p->tcf_lock);
++ max_offset = (skb_transport_header_was_set(skb) ?
++ skb_transport_offset(skb) :
++ skb_network_offset(skb)) +
++ parms->tcfp_off_max_hint;
++ if (skb_ensure_writable(skb, min(skb->len, max_offset)))
++ goto done;
+
+ tcf_lastuse_update(&p->tcf_tm);
++ tcf_action_update_bstats(&p->common, skb);
+
+- if (p->tcfp_nkeys > 0) {
+- struct tc_pedit_key *tkey = p->tcfp_keys;
+- struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
+- enum pedit_header_type htype =
+- TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+- enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+-
+- for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
+- u32 *ptr, hdata;
+- int offset = tkey->off;
+- int hoffset;
+- u32 val;
+- int rc;
+-
+- if (tkey_ex) {
+- htype = tkey_ex->htype;
+- cmd = tkey_ex->cmd;
+-
+- tkey_ex++;
+- }
++ tkey = parms->tcfp_keys;
++ tkey_ex = parms->tcfp_keys_ex;
+
+- rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+- if (rc) {
+- pr_info("tc action pedit bad header type specified (0x%x)\n",
+- htype);
+- goto bad;
+- }
++ for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
++ int offset = tkey->off;
++ int hoffset = 0;
++ u32 *ptr, hdata;
++ u32 val;
++ int rc;
+
+- if (tkey->offmask) {
+- u8 *d, _d;
+-
+- if (!offset_valid(skb, hoffset + tkey->at)) {
+- pr_info("tc action pedit 'at' offset %d out of bounds\n",
+- hoffset + tkey->at);
+- goto bad;
+- }
+- d = skb_header_pointer(skb, hoffset + tkey->at,
+- sizeof(_d), &_d);
+- if (!d)
+- goto bad;
+- offset += (*d & tkey->offmask) >> tkey->shift;
+- }
++ if (tkey_ex) {
++ htype = tkey_ex->htype;
++ cmd = tkey_ex->cmd;
+
+- if (offset % 4) {
+- pr_info("tc action pedit offset must be on 32 bit boundaries\n");
+- goto bad;
+- }
++ tkey_ex++;
++ }
+
+- if (!offset_valid(skb, hoffset + offset)) {
+- pr_info("tc action pedit offset %d out of bounds\n",
+- hoffset + offset);
+- goto bad;
+- }
++ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
++ if (rc) {
++ pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype);
++ goto bad;
++ }
+
+- ptr = skb_header_pointer(skb, hoffset + offset,
+- sizeof(hdata), &hdata);
+- if (!ptr)
+- goto bad;
+- /* just do it, baby */
+- switch (cmd) {
+- case TCA_PEDIT_KEY_EX_CMD_SET:
+- val = tkey->val;
+- break;
+- case TCA_PEDIT_KEY_EX_CMD_ADD:
+- val = (*ptr + tkey->val) & ~tkey->mask;
+- break;
+- default:
+- pr_info("tc action pedit bad command (%d)\n",
+- cmd);
++ if (tkey->offmask) {
++ u8 *d, _d;
++
++ if (!offset_valid(skb, hoffset + tkey->at)) {
++ pr_info("tc action pedit 'at' offset %d out of bounds\n",
++ hoffset + tkey->at);
+ goto bad;
+ }
++ d = skb_header_pointer(skb, hoffset + tkey->at,
++ sizeof(_d), &_d);
++ if (!d)
++ goto bad;
++ offset += (*d & tkey->offmask) >> tkey->shift;
++ }
+
+- *ptr = ((*ptr & tkey->mask) ^ val);
+- if (ptr == &hdata)
+- skb_store_bits(skb, hoffset + offset, ptr, 4);
++ if (offset % 4) {
++ pr_info("tc action pedit offset must be on 32 bit boundaries\n");
++ goto bad;
+ }
+
+- goto done;
+- } else {
+- WARN(1, "pedit BUG: index %d\n", p->tcf_index);
++ if (!offset_valid(skb, hoffset + offset)) {
++ pr_info("tc action pedit offset %d out of bounds\n",
++ hoffset + offset);
++ goto bad;
++ }
++
++ ptr = skb_header_pointer(skb, hoffset + offset,
++ sizeof(hdata), &hdata);
++ if (!ptr)
++ goto bad;
++ /* just do it, baby */
++ switch (cmd) {
++ case TCA_PEDIT_KEY_EX_CMD_SET:
++ val = tkey->val;
++ break;
++ case TCA_PEDIT_KEY_EX_CMD_ADD:
++ val = (*ptr + tkey->val) & ~tkey->mask;
++ break;
++ default:
++ pr_info("tc action pedit bad command (%d)\n",
++ cmd);
++ goto bad;
++ }
++
++ *ptr = ((*ptr & tkey->mask) ^ val);
++ if (ptr == &hdata)
++ skb_store_bits(skb, hoffset + offset, ptr, 4);
+ }
+
++ goto done;
++
+ bad:
++ spin_lock(&p->tcf_lock);
+ p->tcf_qstats.overlimits++;
+-done:
+- bstats_update(&p->tcf_bstats, skb);
+ spin_unlock(&p->tcf_lock);
++done:
+ return p->tcf_action;
+ }
+
+@@ -422,30 +492,33 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
+ {
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_pedit *p = to_pedit(a);
++ struct tcf_pedit_parms *parms;
+ struct tc_pedit *opt;
+ struct tcf_t t;
+ int s;
+
+- s = struct_size(opt, keys, p->tcfp_nkeys);
++ spin_lock_bh(&p->tcf_lock);
++ parms = rcu_dereference_protected(p->parms, 1);
++ s = struct_size(opt, keys, parms->tcfp_nkeys);
+
+- /* netlink spinlocks held above us - must use ATOMIC */
+ opt = kzalloc(s, GFP_ATOMIC);
+- if (unlikely(!opt))
++ if (unlikely(!opt)) {
++ spin_unlock_bh(&p->tcf_lock);
+ return -ENOBUFS;
++ }
+
+- spin_lock_bh(&p->tcf_lock);
+- memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys));
++ memcpy(opt->keys, parms->tcfp_keys,
++ flex_array_size(opt, keys, parms->tcfp_nkeys));
+ opt->index = p->tcf_index;
+- opt->nkeys = p->tcfp_nkeys;
+- opt->flags = p->tcfp_flags;
++ opt->nkeys = parms->tcfp_nkeys;
++ opt->flags = parms->tcfp_flags;
+ opt->action = p->tcf_action;
+ opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
+ opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
+
+- if (p->tcfp_keys_ex) {
+- if (tcf_pedit_key_ex_dump(skb,
+- p->tcfp_keys_ex,
+- p->tcfp_nkeys))
++ if (parms->tcfp_keys_ex) {
++ if (tcf_pedit_key_ex_dump(skb, parms->tcfp_keys_ex,
++ parms->tcfp_nkeys))
+ goto nla_put_failure;
+
+ if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
+diff --git a/net/sched/act_police.c b/net/sched/act_police.c
+index 832157a840fc3..db1d021c16be8 100644
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -90,7 +90,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, NULL, a,
+- &act_police_ops, bind, true, 0);
++ &act_police_ops, bind, true, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+@@ -239,6 +239,20 @@ release_idr:
+ return err;
+ }
+
++static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
++{
++ u32 len;
++
++ if (skb_is_gso(skb))
++ return skb_gso_validate_mac_len(skb, limit);
++
++ len = qdisc_pkt_len(skb);
++ if (skb_at_tc_ingress(skb))
++ len += skb->mac_len;
++
++ return len <= limit;
++}
++
+ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+ {
+@@ -261,7 +275,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
+ goto inc_overlimits;
+ }
+
+- if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
++ if (tcf_police_mtu_check(skb, p->tcfp_mtu)) {
+ if (!p->rate_present && !p->pps_present) {
+ ret = p->tcfp_result;
+ goto end;
+@@ -352,23 +366,23 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
+ opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
+ if (p->rate_present) {
+ psched_ratecfg_getrate(&opt.rate, &p->rate);
+- if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) &&
++ if ((p->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
+- police->params->rate.rate_bytes_ps,
++ p->rate.rate_bytes_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ }
+ if (p->peak_present) {
+ psched_ratecfg_getrate(&opt.peakrate, &p->peak);
+- if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) &&
++ if ((p->peak.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
+- police->params->peak.rate_bytes_ps,
++ p->peak.rate_bytes_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ }
+ if (p->pps_present) {
+ if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
+- police->params->ppsrate.rate_pkts_ps,
++ p->ppsrate.rate_pkts_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
+diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
+index 230501eb9e069..ca67d96449176 100644
+--- a/net/sched/act_sample.c
++++ b/net/sched/act_sample.c
+@@ -55,8 +55,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
+ sample_policy, NULL);
+ if (ret < 0)
+ return ret;
+- if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] ||
+- !tb[TCA_SAMPLE_PSAMPLE_GROUP])
++
++ if (!tb[TCA_SAMPLE_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_SAMPLE_PARMS]);
+@@ -70,7 +70,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, est, a,
+- &act_sample_ops, bind, true, 0);
++ &act_sample_ops, bind, true, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+@@ -80,6 +80,13 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
++
++ if (!tb[TCA_SAMPLE_RATE] || !tb[TCA_SAMPLE_PSAMPLE_GROUP]) {
++ NL_SET_ERR_MSG(extack, "sample rate and group are required");
++ err = -EINVAL;
++ goto release_idr;
++ }
++
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
+index cbbe1861d3a20..7885271540259 100644
+--- a/net/sched/act_simple.c
++++ b/net/sched/act_simple.c
+@@ -128,7 +128,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, est, a,
+- &act_simp_ops, bind, false, 0);
++ &act_simp_ops, bind, false, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
+index 6054185383474..6088ceaf582e8 100644
+--- a/net/sched/act_skbedit.c
++++ b/net/sched/act_skbedit.c
+@@ -176,7 +176,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, est, a,
+- &act_skbedit_ops, bind, true, 0);
++ &act_skbedit_ops, bind, true, act_flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
+index ecb9ee6660954..ee9cc0abf9e10 100644
+--- a/net/sched/act_skbmod.c
++++ b/net/sched/act_skbmod.c
+@@ -168,7 +168,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, est, a,
+- &act_skbmod_ops, bind, true, 0);
++ &act_skbmod_ops, bind, true, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
+index 2ef8f5a6205a9..a5864ddfb8902 100644
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -41,8 +41,6 @@
+ #include <net/tc_act/tc_gate.h>
+ #include <net/flow_offload.h>
+
+-extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+-
+ /* The list of all installed classifier types */
+ static LIST_HEAD(tcf_proto_base);
+
+@@ -535,8 +533,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+ {
+ struct tcf_block *block = chain->block;
+ const struct tcf_proto_ops *tmplt_ops;
++ unsigned int refcnt, non_act_refcnt;
+ bool free_block = false;
+- unsigned int refcnt;
+ void *tmplt_priv;
+
+ mutex_lock(&block->lock);
+@@ -556,13 +554,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+ * save these to temporary variables.
+ */
+ refcnt = --chain->refcnt;
++ non_act_refcnt = refcnt - chain->action_refcnt;
+ tmplt_ops = chain->tmplt_ops;
+ tmplt_priv = chain->tmplt_priv;
+
+- /* The last dropped non-action reference will trigger notification. */
+- if (refcnt - chain->action_refcnt == 0 && !by_act) {
+- tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
+- block, NULL, 0, 0, false);
++ if (non_act_refcnt == chain->explicitly_created && !by_act) {
++ if (non_act_refcnt == 0)
++ tc_chain_notify_delete(tmplt_ops, tmplt_priv,
++ chain->index, block, NULL, 0, 0,
++ false);
+ /* Last reference to chain, no need to lock. */
+ chain->flushing = false;
+ }
+@@ -1044,7 +1044,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
+
+ /* Find qdisc */
+ if (!*parent) {
+- *q = dev->qdisc;
++ *q = rcu_dereference(dev->qdisc);
+ *parent = (*q)->handle;
+ } else {
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
+@@ -1465,6 +1465,7 @@ static int tcf_block_bind(struct tcf_block *block,
+
+ err_unroll:
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
++ list_del(&block_cb->driver_list);
+ if (i-- > 0) {
+ list_del(&block_cb->list);
+ tcf_block_playback_offloads(block, block_cb->cb,
+@@ -1617,12 +1618,17 @@ int tcf_classify(struct sk_buff *skb,
+
+ /* If we missed on some chain */
+ if (ret == TC_ACT_UNSPEC && last_executed_chain) {
++ struct tc_skb_cb *cb = tc_skb_cb(skb);
++
+ ext = tc_skb_ext_alloc(skb);
+ if (WARN_ON_ONCE(!ext))
+ return TC_ACT_SHOT;
+ ext->chain = last_executed_chain;
+- ext->mru = qdisc_skb_cb(skb)->mru;
+- ext->post_ct = qdisc_skb_cb(skb)->post_ct;
++ ext->mru = cb->mru;
++ ext->post_ct = cb->post_ct;
++ ext->post_ct_snat = cb->post_ct_snat;
++ ext->post_ct_dnat = cb->post_ct_dnat;
++ ext->zone = cb->zone;
+ }
+
+ return ret;
+@@ -1648,10 +1654,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain,
+ if (chain->flushing)
+ return -EAGAIN;
+
++ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
+ if (*chain_info->pprev == chain->filter_chain)
+ tcf_chain0_head_change(chain, tp);
+ tcf_proto_get(tp);
+- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
+ rcu_assign_pointer(*chain_info->pprev, tp);
+
+ return 0;
+@@ -1940,9 +1946,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
+ bool prio_allocate;
+ u32 parent;
+ u32 chain_index;
+- struct Qdisc *q = NULL;
++ struct Qdisc *q;
+ struct tcf_chain_info chain_info;
+- struct tcf_chain *chain = NULL;
++ struct tcf_chain *chain;
+ struct tcf_block *block;
+ struct tcf_proto *tp;
+ unsigned long cl;
+@@ -1971,6 +1977,8 @@ replay:
+ tp = NULL;
+ cl = 0;
+ block = NULL;
++ q = NULL;
++ chain = NULL;
+ flags = 0;
+
+ if (prio == 0) {
+@@ -2110,6 +2118,7 @@ replay:
+ }
+
+ if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
++ tfilter_put(tp, fh);
+ NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
+ err = -EINVAL;
+ goto errout;
+@@ -2580,7 +2589,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
+
+ parent = tcm->tcm_parent;
+ if (!parent)
+- q = dev->qdisc;
++ q = rtnl_dereference(dev->qdisc);
+ else
+ q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+ if (!q)
+@@ -2759,6 +2768,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
+ return PTR_ERR(ops);
+ if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+ NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
++ module_put(ops->owner);
+ return -EOPNOTSUPP;
+ }
+
+@@ -2793,8 +2803,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
+ struct tcmsg *t;
+ u32 parent;
+ u32 chain_index;
+- struct Qdisc *q = NULL;
+- struct tcf_chain *chain = NULL;
++ struct Qdisc *q;
++ struct tcf_chain *chain;
+ struct tcf_block *block;
+ unsigned long cl;
+ int err;
+@@ -2804,6 +2814,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
+ return -EPERM;
+
+ replay:
++ q = NULL;
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
+@@ -2954,7 +2965,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
+ return skb->len;
+
+ if (!tcm->tcm_parent)
+- q = dev->qdisc;
++ q = rtnl_dereference(dev->qdisc);
+ else
+ q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+
+@@ -3687,6 +3698,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
+ entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
+ break;
+ default:
++ err = -EOPNOTSUPP;
+ goto err_out_locked;
+ }
+ } else if (is_tcf_skbedit_ptype(act)) {
+diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
+index df19a847829e8..b7c46a93a4121 100644
+--- a/net/sched/cls_bpf.c
++++ b/net/sched/cls_bpf.c
+@@ -402,56 +402,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+ return 0;
+ }
+
+-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
+- struct cls_bpf_prog *prog, unsigned long base,
+- struct nlattr **tb, struct nlattr *est, u32 flags,
+- struct netlink_ext_ack *extack)
+-{
+- bool is_bpf, is_ebpf, have_exts = false;
+- u32 gen_flags = 0;
+- int ret;
+-
+- is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+- is_ebpf = tb[TCA_BPF_FD];
+- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
+- return -EINVAL;
+-
+- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
+- extack);
+- if (ret < 0)
+- return ret;
+-
+- if (tb[TCA_BPF_FLAGS]) {
+- u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+-
+- if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
+- return -EINVAL;
+-
+- have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+- }
+- if (tb[TCA_BPF_FLAGS_GEN]) {
+- gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+- if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+- !tc_flags_valid(gen_flags))
+- return -EINVAL;
+- }
+-
+- prog->exts_integrated = have_exts;
+- prog->gen_flags = gen_flags;
+-
+- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+- cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
+- if (ret < 0)
+- return ret;
+-
+- if (tb[TCA_BPF_CLASSID]) {
+- prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+- tcf_bind_filter(tp, &prog->res, base);
+- }
+-
+- return 0;
+-}
+-
+ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+@@ -459,9 +409,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ struct netlink_ext_ack *extack)
+ {
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
++ bool is_bpf, is_ebpf, have_exts = false;
+ struct cls_bpf_prog *oldprog = *arg;
+ struct nlattr *tb[TCA_BPF_MAX + 1];
++ bool bound_to_filter = false;
+ struct cls_bpf_prog *prog;
++ u32 gen_flags = 0;
+ int ret;
+
+ if (tca[TCA_OPTIONS] == NULL)
+@@ -500,11 +453,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ goto errout;
+ prog->handle = handle;
+
+- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
+- extack);
++ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
++ is_ebpf = tb[TCA_BPF_FD];
++ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
++ ret = -EINVAL;
++ goto errout_idr;
++ }
++
++ ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts,
++ flags, extack);
++ if (ret < 0)
++ goto errout_idr;
++
++ if (tb[TCA_BPF_FLAGS]) {
++ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
++
++ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
++ ret = -EINVAL;
++ goto errout_idr;
++ }
++
++ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
++ }
++ if (tb[TCA_BPF_FLAGS_GEN]) {
++ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
++ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
++ !tc_flags_valid(gen_flags)) {
++ ret = -EINVAL;
++ goto errout_idr;
++ }
++ }
++
++ prog->exts_integrated = have_exts;
++ prog->gen_flags = gen_flags;
++
++ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
++ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
+ if (ret < 0)
+ goto errout_idr;
+
++ if (tb[TCA_BPF_CLASSID]) {
++ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
++ tcf_bind_filter(tp, &prog->res, base);
++ bound_to_filter = true;
++ }
++
+ ret = cls_bpf_offload(tp, prog, oldprog, extack);
+ if (ret)
+ goto errout_parms;
+@@ -526,6 +519,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ return 0;
+
+ errout_parms:
++ if (bound_to_filter)
++ tcf_unbind_filter(tp, &prog->res);
+ cls_bpf_free_parms(prog);
+ errout_idr:
+ if (!oldprog)
+diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
+index eb6345a027e13..bff0a5f24aca8 100644
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -19,6 +19,7 @@
+
+ #include <net/sch_generic.h>
+ #include <net/pkt_cls.h>
++#include <net/pkt_sched.h>
+ #include <net/ip.h>
+ #include <net/flow_dissector.h>
+ #include <net/geneve.h>
+@@ -309,7 +310,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+ {
+ struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+- bool post_ct = qdisc_skb_cb(skb)->post_ct;
++ bool post_ct = tc_skb_cb(skb)->post_ct;
++ u16 zone = tc_skb_cb(skb)->zone;
+ struct fl_flow_key skb_key;
+ struct fl_flow_mask *mask;
+ struct cls_fl_filter *f;
+@@ -327,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
+ fl_ct_info_to_flower_map,
+ ARRAY_SIZE(fl_ct_info_to_flower_map),
+- post_ct);
++ post_ct, zone);
+ skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
+ skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
+
+@@ -782,6 +784,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
+ TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
+
++ if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) {
++ NL_SET_ERR_MSG(extack,
++ "Both min and max destination ports must be specified");
++ return -EINVAL;
++ }
++ if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) {
++ NL_SET_ERR_MSG(extack,
++ "Both min and max source ports must be specified");
++ return -EINVAL;
++ }
+ if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
+ ntohs(key->tp_range.tp_max.dst) <=
+ ntohs(key->tp_range.tp_min.dst)) {
+@@ -1002,6 +1014,7 @@ static int fl_set_key_mpls(struct nlattr **tb,
+ static void fl_set_key_vlan(struct nlattr **tb,
+ __be16 ethertype,
+ int vlan_id_key, int vlan_prio_key,
++ int vlan_next_eth_type_key,
+ struct flow_dissector_key_vlan *key_val,
+ struct flow_dissector_key_vlan *key_mask)
+ {
+@@ -1020,6 +1033,11 @@ static void fl_set_key_vlan(struct nlattr **tb,
+ }
+ key_val->vlan_tpid = ethertype;
+ key_mask->vlan_tpid = cpu_to_be16(~0);
++ if (tb[vlan_next_eth_type_key]) {
++ key_val->vlan_eth_type =
++ nla_get_be16(tb[vlan_next_eth_type_key]);
++ key_mask->vlan_eth_type = cpu_to_be16(~0);
++ }
+ }
+
+ static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
+@@ -1084,6 +1102,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ if (option_len > sizeof(struct geneve_opt))
+ data_len = option_len - sizeof(struct geneve_opt);
+
++ if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4)
++ return -ERANGE;
++
+ opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
+ memset(opt, 0xff, option_len);
+ opt->length = data_len / 4;
+@@ -1516,8 +1537,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
+
+ if (eth_type_vlan(ethertype)) {
+ fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
+- TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
+- &mask->vlan);
++ TCA_FLOWER_KEY_VLAN_PRIO,
++ TCA_FLOWER_KEY_VLAN_ETH_TYPE,
++ &key->vlan, &mask->vlan);
+
+ if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
+ ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
+@@ -1525,6 +1547,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
+ fl_set_key_vlan(tb, ethertype,
+ TCA_FLOWER_KEY_CVLAN_ID,
+ TCA_FLOWER_KEY_CVLAN_PRIO,
++ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+ &key->cvlan, &mask->cvlan);
+ fl_set_key_val(tb, &key->basic.n_proto,
+ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+@@ -2880,13 +2903,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
+ goto nla_put_failure;
+
+ if (mask->basic.n_proto) {
+- if (mask->cvlan.vlan_tpid) {
++ if (mask->cvlan.vlan_eth_type) {
+ if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+ key->basic.n_proto))
+ goto nla_put_failure;
+- } else if (mask->vlan.vlan_tpid) {
++ } else if (mask->vlan.vlan_eth_type) {
+ if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+- key->basic.n_proto))
++ key->vlan.vlan_eth_type))
+ goto nla_put_failure;
+ }
+ }
+diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
+index 8654b0ce997c1..a2f53aee39097 100644
+--- a/net/sched/cls_fw.c
++++ b/net/sched/cls_fw.c
+@@ -210,11 +210,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+ if (err < 0)
+ return err;
+
+- if (tb[TCA_FW_CLASSID]) {
+- f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
+- tcf_bind_filter(tp, &f->res, base);
+- }
+-
+ if (tb[TCA_FW_INDEV]) {
+ int ret;
+ ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
+@@ -231,6 +226,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+ } else if (head->mask != 0xFFFFFFFF)
+ return err;
+
++ if (tb[TCA_FW_CLASSID]) {
++ f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
++ tcf_bind_filter(tp, &f->res, base);
++ }
++
+ return 0;
+ }
+
+@@ -265,7 +265,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
+ return -ENOBUFS;
+
+ fnew->id = f->id;
+- fnew->res = f->res;
+ fnew->ifindex = f->ifindex;
+ fnew->tp = f->tp;
+
+diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
+index a35ab8c27866e..194468d0355a1 100644
+--- a/net/sched/cls_route.c
++++ b/net/sched/cls_route.c
+@@ -424,6 +424,11 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
+ return -EINVAL;
+ }
+
++ if (!nhandle) {
++ NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid");
++ return -EINVAL;
++ }
++
+ h1 = to_hash(nhandle);
+ b = rtnl_dereference(head->table[h1]);
+ if (!b) {
+@@ -477,6 +482,11 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
+ int err;
+ bool new = true;
+
++ if (!handle) {
++ NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid");
++ return -EINVAL;
++ }
++
+ if (opt == NULL)
+ return handle ? -EINVAL : 0;
+
+@@ -501,7 +511,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
+ if (fold) {
+ f->id = fold->id;
+ f->iif = fold->iif;
+- f->res = fold->res;
+ f->handle = fold->handle;
+
+ f->tp = fold->tp;
+@@ -526,7 +535,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
+ rcu_assign_pointer(f->next, f1);
+ rcu_assign_pointer(*fp, f);
+
+- if (fold && fold->handle && f->handle != fold->handle) {
++ if (fold) {
+ th = to_hash(fold->handle);
+ h = from_hash(fold->handle >> 16);
+ b = rtnl_dereference(head->table[th]);
+diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
+deleted file mode 100644
+index 742c7d49a9581..0000000000000
+--- a/net/sched/cls_tcindex.c
++++ /dev/null
+@@ -1,738 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * net/sched/cls_tcindex.c Packet classifier for skb->tc_index
+- *
+- * Written 1998,1999 by Werner Almesberger, EPFL ICA
+- */
+-
+-#include <linux/module.h>
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/skbuff.h>
+-#include <linux/errno.h>
+-#include <linux/slab.h>
+-#include <linux/refcount.h>
+-#include <net/act_api.h>
+-#include <net/netlink.h>
+-#include <net/pkt_cls.h>
+-#include <net/sch_generic.h>
+-
+-/*
+- * Passing parameters to the root seems to be done more awkwardly than really
+- * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
+- * verified. FIXME.
+- */
+-
+-#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
+-#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
+-
+-
+-struct tcindex_data;
+-
+-struct tcindex_filter_result {
+- struct tcf_exts exts;
+- struct tcf_result res;
+- struct tcindex_data *p;
+- struct rcu_work rwork;
+-};
+-
+-struct tcindex_filter {
+- u16 key;
+- struct tcindex_filter_result result;
+- struct tcindex_filter __rcu *next;
+- struct rcu_work rwork;
+-};
+-
+-
+-struct tcindex_data {
+- struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
+- struct tcindex_filter __rcu **h; /* imperfect hash; */
+- struct tcf_proto *tp;
+- u16 mask; /* AND key with mask */
+- u32 shift; /* shift ANDed key to the right */
+- u32 hash; /* hash table size; 0 if undefined */
+- u32 alloc_hash; /* allocated size */
+- u32 fall_through; /* 0: only classify if explicit match */
+- refcount_t refcnt; /* a temporary refcnt for perfect hash */
+- struct rcu_work rwork;
+-};
+-
+-static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
+-{
+- return tcf_exts_has_actions(&r->exts) || r->res.classid;
+-}
+-
+-static void tcindex_data_get(struct tcindex_data *p)
+-{
+- refcount_inc(&p->refcnt);
+-}
+-
+-static void tcindex_data_put(struct tcindex_data *p)
+-{
+- if (refcount_dec_and_test(&p->refcnt)) {
+- kfree(p->perfect);
+- kfree(p->h);
+- kfree(p);
+- }
+-}
+-
+-static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
+- u16 key)
+-{
+- if (p->perfect) {
+- struct tcindex_filter_result *f = p->perfect + key;
+-
+- return tcindex_filter_is_set(f) ? f : NULL;
+- } else if (p->h) {
+- struct tcindex_filter __rcu **fp;
+- struct tcindex_filter *f;
+-
+- fp = &p->h[key % p->hash];
+- for (f = rcu_dereference_bh_rtnl(*fp);
+- f;
+- fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
+- if (f->key == key)
+- return &f->result;
+- }
+-
+- return NULL;
+-}
+-
+-
+-static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+- struct tcf_result *res)
+-{
+- struct tcindex_data *p = rcu_dereference_bh(tp->root);
+- struct tcindex_filter_result *f;
+- int key = (skb->tc_index & p->mask) >> p->shift;
+-
+- pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
+- skb, tp, res, p);
+-
+- f = tcindex_lookup(p, key);
+- if (!f) {
+- struct Qdisc *q = tcf_block_q(tp->chain->block);
+-
+- if (!p->fall_through)
+- return -1;
+- res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
+- res->class = 0;
+- pr_debug("alg 0x%x\n", res->classid);
+- return 0;
+- }
+- *res = f->res;
+- pr_debug("map 0x%x\n", res->classid);
+-
+- return tcf_exts_exec(skb, &f->exts, res);
+-}
+-
+-
+-static void *tcindex_get(struct tcf_proto *tp, u32 handle)
+-{
+- struct tcindex_data *p = rtnl_dereference(tp->root);
+- struct tcindex_filter_result *r;
+-
+- pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
+- if (p->perfect && handle >= p->alloc_hash)
+- return NULL;
+- r = tcindex_lookup(p, handle);
+- return r && tcindex_filter_is_set(r) ? r : NULL;
+-}
+-
+-static int tcindex_init(struct tcf_proto *tp)
+-{
+- struct tcindex_data *p;
+-
+- pr_debug("tcindex_init(tp %p)\n", tp);
+- p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
+- if (!p)
+- return -ENOMEM;
+-
+- p->mask = 0xffff;
+- p->hash = DEFAULT_HASH_SIZE;
+- p->fall_through = 1;
+- refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
+-
+- rcu_assign_pointer(tp->root, p);
+- return 0;
+-}
+-
+-static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
+-{
+- tcf_exts_destroy(&r->exts);
+- tcf_exts_put_net(&r->exts);
+- tcindex_data_put(r->p);
+-}
+-
+-static void tcindex_destroy_rexts_work(struct work_struct *work)
+-{
+- struct tcindex_filter_result *r;
+-
+- r = container_of(to_rcu_work(work),
+- struct tcindex_filter_result,
+- rwork);
+- rtnl_lock();
+- __tcindex_destroy_rexts(r);
+- rtnl_unlock();
+-}
+-
+-static void __tcindex_destroy_fexts(struct tcindex_filter *f)
+-{
+- tcf_exts_destroy(&f->result.exts);
+- tcf_exts_put_net(&f->result.exts);
+- kfree(f);
+-}
+-
+-static void tcindex_destroy_fexts_work(struct work_struct *work)
+-{
+- struct tcindex_filter *f = container_of(to_rcu_work(work),
+- struct tcindex_filter,
+- rwork);
+-
+- rtnl_lock();
+- __tcindex_destroy_fexts(f);
+- rtnl_unlock();
+-}
+-
+-static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
+- bool rtnl_held, struct netlink_ext_ack *extack)
+-{
+- struct tcindex_data *p = rtnl_dereference(tp->root);
+- struct tcindex_filter_result *r = arg;
+- struct tcindex_filter __rcu **walk;
+- struct tcindex_filter *f = NULL;
+-
+- pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
+- if (p->perfect) {
+- if (!r->res.class)
+- return -ENOENT;
+- } else {
+- int i;
+-
+- for (i = 0; i < p->hash; i++) {
+- walk = p->h + i;
+- for (f = rtnl_dereference(*walk); f;
+- walk = &f->next, f = rtnl_dereference(*walk)) {
+- if (&f->result == r)
+- goto found;
+- }
+- }
+- return -ENOENT;
+-
+-found:
+- rcu_assign_pointer(*walk, rtnl_dereference(f->next));
+- }
+- tcf_unbind_filter(tp, &r->res);
+- /* all classifiers are required to call tcf_exts_destroy() after rcu
+- * grace period, since converted-to-rcu actions are relying on that
+- * in cleanup() callback
+- */
+- if (f) {
+- if (tcf_exts_get_net(&f->result.exts))
+- tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
+- else
+- __tcindex_destroy_fexts(f);
+- } else {
+- tcindex_data_get(p);
+-
+- if (tcf_exts_get_net(&r->exts))
+- tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
+- else
+- __tcindex_destroy_rexts(r);
+- }
+-
+- *last = false;
+- return 0;
+-}
+-
+-static void tcindex_destroy_work(struct work_struct *work)
+-{
+- struct tcindex_data *p = container_of(to_rcu_work(work),
+- struct tcindex_data,
+- rwork);
+-
+- tcindex_data_put(p);
+-}
+-
+-static inline int
+-valid_perfect_hash(struct tcindex_data *p)
+-{
+- return p->hash > (p->mask >> p->shift);
+-}
+-
+-static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
+- [TCA_TCINDEX_HASH] = { .type = NLA_U32 },
+- [TCA_TCINDEX_MASK] = { .type = NLA_U16 },
+- [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
+- [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
+- [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
+-};
+-
+-static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+- struct tcindex_data *p,
+- struct net *net)
+-{
+- memset(r, 0, sizeof(*r));
+- r->p = p;
+- return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
+- TCA_TCINDEX_POLICE);
+-}
+-
+-static void tcindex_free_perfect_hash(struct tcindex_data *cp);
+-
+-static void tcindex_partial_destroy_work(struct work_struct *work)
+-{
+- struct tcindex_data *p = container_of(to_rcu_work(work),
+- struct tcindex_data,
+- rwork);
+-
+- rtnl_lock();
+- if (p->perfect)
+- tcindex_free_perfect_hash(p);
+- kfree(p);
+- rtnl_unlock();
+-}
+-
+-static void tcindex_free_perfect_hash(struct tcindex_data *cp)
+-{
+- int i;
+-
+- for (i = 0; i < cp->hash; i++)
+- tcf_exts_destroy(&cp->perfect[i].exts);
+- kfree(cp->perfect);
+-}
+-
+-static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
+-{
+- int i, err = 0;
+-
+- cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
+- GFP_KERNEL | __GFP_NOWARN);
+- if (!cp->perfect)
+- return -ENOMEM;
+-
+- for (i = 0; i < cp->hash; i++) {
+- err = tcf_exts_init(&cp->perfect[i].exts, net,
+- TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+- if (err < 0)
+- goto errout;
+- cp->perfect[i].p = cp;
+- }
+-
+- return 0;
+-
+-errout:
+- tcindex_free_perfect_hash(cp);
+- return err;
+-}
+-
+-static int
+-tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+- u32 handle, struct tcindex_data *p,
+- struct tcindex_filter_result *r, struct nlattr **tb,
+- struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
+-{
+- struct tcindex_filter_result new_filter_result, *old_r = r;
+- struct tcindex_data *cp = NULL, *oldp;
+- struct tcindex_filter *f = NULL; /* make gcc behave */
+- struct tcf_result cr = {};
+- int err, balloc = 0;
+- struct tcf_exts e;
+-
+- err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+- if (err < 0)
+- return err;
+- err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
+- if (err < 0)
+- goto errout;
+-
+- err = -ENOMEM;
+- /* tcindex_data attributes must look atomic to classifier/lookup so
+- * allocate new tcindex data and RCU assign it onto root. Keeping
+- * perfect hash and hash pointers from old data.
+- */
+- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+- if (!cp)
+- goto errout;
+-
+- cp->mask = p->mask;
+- cp->shift = p->shift;
+- cp->hash = p->hash;
+- cp->alloc_hash = p->alloc_hash;
+- cp->fall_through = p->fall_through;
+- cp->tp = tp;
+- refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
+-
+- if (tb[TCA_TCINDEX_HASH])
+- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+-
+- if (tb[TCA_TCINDEX_MASK])
+- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+-
+- if (tb[TCA_TCINDEX_SHIFT]) {
+- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+- if (cp->shift > 16) {
+- err = -EINVAL;
+- goto errout;
+- }
+- }
+- if (!cp->hash) {
+- /* Hash not specified, use perfect hash if the upper limit
+- * of the hashing index is below the threshold.
+- */
+- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+- cp->hash = (cp->mask >> cp->shift) + 1;
+- else
+- cp->hash = DEFAULT_HASH_SIZE;
+- }
+-
+- if (p->perfect) {
+- int i;
+-
+- if (tcindex_alloc_perfect_hash(net, cp) < 0)
+- goto errout;
+- cp->alloc_hash = cp->hash;
+- for (i = 0; i < min(cp->hash, p->hash); i++)
+- cp->perfect[i].res = p->perfect[i].res;
+- balloc = 1;
+- }
+- cp->h = p->h;
+-
+- err = tcindex_filter_result_init(&new_filter_result, cp, net);
+- if (err < 0)
+- goto errout_alloc;
+- if (old_r)
+- cr = r->res;
+-
+- err = -EBUSY;
+-
+- /* Hash already allocated, make sure that we still meet the
+- * requirements for the allocated hash.
+- */
+- if (cp->perfect) {
+- if (!valid_perfect_hash(cp) ||
+- cp->hash > cp->alloc_hash)
+- goto errout_alloc;
+- } else if (cp->h && cp->hash != cp->alloc_hash) {
+- goto errout_alloc;
+- }
+-
+- err = -EINVAL;
+- if (tb[TCA_TCINDEX_FALL_THROUGH])
+- cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
+-
+- if (!cp->perfect && !cp->h)
+- cp->alloc_hash = cp->hash;
+-
+- /* Note: this could be as restrictive as if (handle & ~(mask >> shift))
+- * but then, we'd fail handles that may become valid after some future
+- * mask change. While this is extremely unlikely to ever matter,
+- * the check below is safer (and also more backwards-compatible).
+- */
+- if (cp->perfect || valid_perfect_hash(cp))
+- if (handle >= cp->alloc_hash)
+- goto errout_alloc;
+-
+-
+- err = -ENOMEM;
+- if (!cp->perfect && !cp->h) {
+- if (valid_perfect_hash(cp)) {
+- if (tcindex_alloc_perfect_hash(net, cp) < 0)
+- goto errout_alloc;
+- balloc = 1;
+- } else {
+- struct tcindex_filter __rcu **hash;
+-
+- hash = kcalloc(cp->hash,
+- sizeof(struct tcindex_filter *),
+- GFP_KERNEL);
+-
+- if (!hash)
+- goto errout_alloc;
+-
+- cp->h = hash;
+- balloc = 2;
+- }
+- }
+-
+- if (cp->perfect)
+- r = cp->perfect + handle;
+- else
+- r = tcindex_lookup(cp, handle) ? : &new_filter_result;
+-
+- if (r == &new_filter_result) {
+- f = kzalloc(sizeof(*f), GFP_KERNEL);
+- if (!f)
+- goto errout_alloc;
+- f->key = handle;
+- f->next = NULL;
+- err = tcindex_filter_result_init(&f->result, cp, net);
+- if (err < 0) {
+- kfree(f);
+- goto errout_alloc;
+- }
+- }
+-
+- if (tb[TCA_TCINDEX_CLASSID]) {
+- cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
+- tcf_bind_filter(tp, &cr, base);
+- }
+-
+- if (old_r && old_r != r) {
+- err = tcindex_filter_result_init(old_r, cp, net);
+- if (err < 0) {
+- kfree(f);
+- goto errout_alloc;
+- }
+- }
+-
+- oldp = p;
+- r->res = cr;
+- tcf_exts_change(&r->exts, &e);
+-
+- rcu_assign_pointer(tp->root, cp);
+-
+- if (r == &new_filter_result) {
+- struct tcindex_filter *nfp;
+- struct tcindex_filter __rcu **fp;
+-
+- f->result.res = r->res;
+- tcf_exts_change(&f->result.exts, &r->exts);
+-
+- fp = cp->h + (handle % cp->hash);
+- for (nfp = rtnl_dereference(*fp);
+- nfp;
+- fp = &nfp->next, nfp = rtnl_dereference(*fp))
+- ; /* nothing */
+-
+- rcu_assign_pointer(*fp, f);
+- } else {
+- tcf_exts_destroy(&new_filter_result.exts);
+- }
+-
+- if (oldp)
+- tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
+- return 0;
+-
+-errout_alloc:
+- if (balloc == 1)
+- tcindex_free_perfect_hash(cp);
+- else if (balloc == 2)
+- kfree(cp->h);
+- tcf_exts_destroy(&new_filter_result.exts);
+-errout:
+- kfree(cp);
+- tcf_exts_destroy(&e);
+- return err;
+-}
+-
+-static int
+-tcindex_change(struct net *net, struct sk_buff *in_skb,
+- struct tcf_proto *tp, unsigned long base, u32 handle,
+- struct nlattr **tca, void **arg, u32 flags,
+- struct netlink_ext_ack *extack)
+-{
+- struct nlattr *opt = tca[TCA_OPTIONS];
+- struct nlattr *tb[TCA_TCINDEX_MAX + 1];
+- struct tcindex_data *p = rtnl_dereference(tp->root);
+- struct tcindex_filter_result *r = *arg;
+- int err;
+-
+- pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
+- "p %p,r %p,*arg %p\n",
+- tp, handle, tca, arg, opt, p, r, *arg);
+-
+- if (!opt)
+- return 0;
+-
+- err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt,
+- tcindex_policy, NULL);
+- if (err < 0)
+- return err;
+-
+- return tcindex_set_parms(net, tp, base, handle, p, r, tb,
+- tca[TCA_RATE], flags, extack);
+-}
+-
+-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
+- bool rtnl_held)
+-{
+- struct tcindex_data *p = rtnl_dereference(tp->root);
+- struct tcindex_filter *f, *next;
+- int i;
+-
+- pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
+- if (p->perfect) {
+- for (i = 0; i < p->hash; i++) {
+- if (!p->perfect[i].res.class)
+- continue;
+- if (walker->count >= walker->skip) {
+- if (walker->fn(tp, p->perfect + i, walker) < 0) {
+- walker->stop = 1;
+- return;
+- }
+- }
+- walker->count++;
+- }
+- }
+- if (!p->h)
+- return;
+- for (i = 0; i < p->hash; i++) {
+- for (f = rtnl_dereference(p->h[i]); f; f = next) {
+- next = rtnl_dereference(f->next);
+- if (walker->count >= walker->skip) {
+- if (walker->fn(tp, &f->result, walker) < 0) {
+- walker->stop = 1;
+- return;
+- }
+- }
+- walker->count++;
+- }
+- }
+-}
+-
+-static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
+- struct netlink_ext_ack *extack)
+-{
+- struct tcindex_data *p = rtnl_dereference(tp->root);
+- int i;
+-
+- pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
+-
+- if (p->perfect) {
+- for (i = 0; i < p->hash; i++) {
+- struct tcindex_filter_result *r = p->perfect + i;
+-
+- /* tcf_queue_work() does not guarantee the ordering we
+- * want, so we have to take this refcnt temporarily to
+- * ensure 'p' is freed after all tcindex_filter_result
+- * here. Imperfect hash does not need this, because it
+- * uses linked lists rather than an array.
+- */
+- tcindex_data_get(p);
+-
+- tcf_unbind_filter(tp, &r->res);
+- if (tcf_exts_get_net(&r->exts))
+- tcf_queue_work(&r->rwork,
+- tcindex_destroy_rexts_work);
+- else
+- __tcindex_destroy_rexts(r);
+- }
+- }
+-
+- for (i = 0; p->h && i < p->hash; i++) {
+- struct tcindex_filter *f, *next;
+- bool last;
+-
+- for (f = rtnl_dereference(p->h[i]); f; f = next) {
+- next = rtnl_dereference(f->next);
+- tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
+- }
+- }
+-
+- tcf_queue_work(&p->rwork, tcindex_destroy_work);
+-}
+-
+-
+-static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
+- struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
+-{
+- struct tcindex_data *p = rtnl_dereference(tp->root);
+- struct tcindex_filter_result *r = fh;
+- struct nlattr *nest;
+-
+- pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
+- tp, fh, skb, t, p, r);
+- pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
+-
+- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+- if (nest == NULL)
+- goto nla_put_failure;
+-
+- if (!fh) {
+- t->tcm_handle = ~0; /* whatever ... */
+- if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
+- nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
+- nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
+- nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
+- goto nla_put_failure;
+- nla_nest_end(skb, nest);
+- } else {
+- if (p->perfect) {
+- t->tcm_handle = r - p->perfect;
+- } else {
+- struct tcindex_filter *f;
+- struct tcindex_filter __rcu **fp;
+- int i;
+-
+- t->tcm_handle = 0;
+- for (i = 0; !t->tcm_handle && i < p->hash; i++) {
+- fp = &p->h[i];
+- for (f = rtnl_dereference(*fp);
+- !t->tcm_handle && f;
+- fp = &f->next, f = rtnl_dereference(*fp)) {
+- if (&f->result == r)
+- t->tcm_handle = f->key;
+- }
+- }
+- }
+- pr_debug("handle = %d\n", t->tcm_handle);
+- if (r->res.class &&
+- nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
+- goto nla_put_failure;
+-
+- if (tcf_exts_dump(skb, &r->exts) < 0)
+- goto nla_put_failure;
+- nla_nest_end(skb, nest);
+-
+- if (tcf_exts_dump_stats(skb, &r->exts) < 0)
+- goto nla_put_failure;
+- }
+-
+- return skb->len;
+-
+-nla_put_failure:
+- nla_nest_cancel(skb, nest);
+- return -1;
+-}
+-
+-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
+- void *q, unsigned long base)
+-{
+- struct tcindex_filter_result *r = fh;
+-
+- if (r && r->res.classid == classid) {
+- if (cl)
+- __tcf_bind_filter(q, &r->res, base);
+- else
+- __tcf_unbind_filter(q, &r->res);
+- }
+-}
+-
+-static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
+- .kind = "tcindex",
+- .classify = tcindex_classify,
+- .init = tcindex_init,
+- .destroy = tcindex_destroy,
+- .get = tcindex_get,
+- .change = tcindex_change,
+- .delete = tcindex_delete,
+- .walk = tcindex_walk,
+- .dump = tcindex_dump,
+- .bind_class = tcindex_bind_class,
+- .owner = THIS_MODULE,
+-};
+-
+-static int __init init_tcindex(void)
+-{
+- return register_tcf_proto_ops(&cls_tcindex_ops);
+-}
+-
+-static void __exit exit_tcindex(void)
+-{
+- unregister_tcf_proto_ops(&cls_tcindex_ops);
+-}
+-
+-module_init(init_tcindex)
+-module_exit(exit_tcindex)
+-MODULE_LICENSE("GPL");
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 4272814487f09..17edcf1d1c3b6 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -386,14 +386,19 @@ static int u32_init(struct tcf_proto *tp)
+ return 0;
+ }
+
+-static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
++static void __u32_destroy_key(struct tc_u_knode *n)
+ {
+ struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
+
+ tcf_exts_destroy(&n->exts);
+- tcf_exts_put_net(&n->exts);
+ if (ht && --ht->refcnt == 0)
+ kfree(ht);
++ kfree(n);
++}
++
++static void u32_destroy_key(struct tc_u_knode *n, bool free_pf)
++{
++ tcf_exts_put_net(&n->exts);
+ #ifdef CONFIG_CLS_U32_PERF
+ if (free_pf)
+ free_percpu(n->pf);
+@@ -402,8 +407,7 @@ static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+ if (free_pf)
+ free_percpu(n->pcpu_success);
+ #endif
+- kfree(n);
+- return 0;
++ __u32_destroy_key(n);
+ }
+
+ /* u32_delete_key_rcu should be called when free'ing a copied
+@@ -712,12 +716,18 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+ struct nlattr *est, u32 flags,
+ struct netlink_ext_ack *extack)
+ {
+- int err;
++ int err, ifindex = -1;
+
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack);
+ if (err < 0)
+ return err;
+
++ if (tb[TCA_U32_INDEV]) {
++ ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
++ if (ifindex < 0)
++ return -EINVAL;
++ }
++
+ if (tb[TCA_U32_LINK]) {
+ u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
+ struct tc_u_hnode *ht_down = NULL, *ht_old;
+@@ -752,13 +762,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+ tcf_bind_filter(tp, &n->res, base);
+ }
+
+- if (tb[TCA_U32_INDEV]) {
+- int ret;
+- ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
+- if (ret < 0)
+- return -EINVAL;
+- n->ifindex = ret;
+- }
++ if (ifindex >= 0)
++ n->ifindex = ifindex;
++
+ return 0;
+ }
+
+@@ -806,14 +812,9 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
+
+ new->ifindex = n->ifindex;
+ new->fshift = n->fshift;
+- new->res = n->res;
+ new->flags = n->flags;
+ RCU_INIT_POINTER(new->ht_down, ht);
+
+- /* bump reference count as long as we hold pointer to structure */
+- if (ht)
+- ht->refcnt++;
+-
+ #ifdef CONFIG_CLS_U32_PERF
+ /* Statistics may be incremented by readers during update
+ * so we must keep them in tact. When the node is later destroyed
+@@ -835,6 +836,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
+ return NULL;
+ }
+
++ /* bump reference count as long as we hold pointer to structure */
++ if (ht)
++ ht->refcnt++;
++
+ return new;
+ }
+
+@@ -898,13 +903,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ tca[TCA_RATE], flags, extack);
+
+ if (err) {
+- u32_destroy_key(new, false);
++ __u32_destroy_key(new);
+ return err;
+ }
+
+ err = u32_replace_hw_knode(tp, new, flags, extack);
+ if (err) {
+- u32_destroy_key(new, false);
++ __u32_destroy_key(new);
+ return err;
+ }
+
+@@ -993,18 +998,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ return -EINVAL;
+ }
+
++ /* At this point, we need to derive the new handle that will be used to
++ * uniquely map the identity of this table match entry. The
++ * identity of the entry that we need to construct is 32 bits made of:
++ * htid(12b):bucketid(8b):node/entryid(12b)
++ *
++ * At this point _we have the table(ht)_ in which we will insert this
++ * entry. We carry the table's id in variable "htid".
++ * Note that earlier code picked the ht selection either by a) the user
++ * providing the htid specified via TCA_U32_HASH attribute or b) when
++ * no such attribute is passed then the root ht, is default to at ID
++ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
++ * If OTOH the user passed us the htid, they may also pass a bucketid of
++ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
++ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
++ * passed via the htid, so even if it was non-zero it will be ignored.
++ *
++ * We may also have a handle, if the user passed one. The handle also
++ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
++ * Rule: the bucketid on the handle is ignored even if one was passed;
++ * rather the value on "htid" is always assumed to be the bucketid.
++ */
+ if (handle) {
++ /* Rule: The htid from handle and tableid from htid must match */
+ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
+ NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
+ return -EINVAL;
+ }
+- handle = htid | TC_U32_NODE(handle);
+- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
+- GFP_KERNEL);
+- if (err)
+- return err;
+- } else
++ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
++ * need to finalize the table entry identification with the last
++ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
++ * entries. Rule: nodeid of 0 is reserved only for tables(see
++ * earlier code which processes TC_U32_DIVISOR attribute).
++ * Rule: The nodeid can only be derived from the handle (and not
++ * htid).
++ * Rule: if the handle specified zero for the node id example
++ * 0x60000000, then pick a new nodeid from the pool of IDs
++ * this hash table has been allocating from.
++ * If OTOH it is specified (i.e for example the user passed a
++ * handle such as 0x60000123), then we use it generate our final
++ * handle which is used to uniquely identify the match entry.
++ */
++ if (!TC_U32_NODE(handle)) {
++ handle = gen_new_kid(ht, htid);
++ } else {
++ handle = htid | TC_U32_NODE(handle);
++ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
++ handle, GFP_KERNEL);
++ if (err)
++ return err;
++ }
++ } else {
++ /* The user did not give us a handle; lets just generate one
++ * from the table's pool of nodeids.
++ */
+ handle = gen_new_kid(ht, htid);
++ }
+
+ if (tb[TCA_U32_SEL] == NULL) {
+ NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
+diff --git a/net/sched/ematch.c b/net/sched/ematch.c
+index 4ce6813618515..5c1235e6076ae 100644
+--- a/net/sched/ematch.c
++++ b/net/sched/ematch.c
+@@ -255,6 +255,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
+ * the value carried.
+ */
+ if (em_hdr->flags & TCF_EM_SIMPLE) {
++ if (em->ops->datalen > 0)
++ goto errout;
+ if (data_len < sizeof(u32))
+ goto errout;
+ em->data = *(u32 *) data;
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index 12f39a2dffd47..fa79dbd3601fa 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+
+ if (!handle)
+ return NULL;
+- q = qdisc_match_from_root(dev->qdisc, handle);
++ q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
+ if (q)
+ goto out;
+
+@@ -320,7 +320,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
+
+ if (!handle)
+ return NULL;
+- q = qdisc_match_from_root(dev->qdisc, handle);
++ q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
+ if (q)
+ goto out;
+
+@@ -1062,7 +1062,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+
+ qdisc_offload_graft_root(dev, new, old, extack);
+
+- if (new && new->ops->attach)
++ if (new && new->ops->attach && !ingress)
+ goto skip;
+
+ for (i = 0; i < num_q; i++) {
+@@ -1081,11 +1081,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+
+ skip:
+ if (!ingress) {
+- notify_and_destroy(net, skb, n, classid,
+- dev->qdisc, new);
++ old = rtnl_dereference(dev->qdisc);
+ if (new && !new->ops->attach)
+ qdisc_refcount_inc(new);
+- dev->qdisc = new ? : &noop_qdisc;
++ rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
++
++ notify_and_destroy(net, skb, n, classid, old, new);
+
+ if (new && new->ops->attach)
+ new->ops->attach(new);
+@@ -1113,6 +1114,11 @@ skip:
+ return -ENOENT;
+ }
+
++ if (new && new->ops == &noqueue_qdisc_ops) {
++ NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
++ return -EINVAL;
++ }
++
+ err = cops->graft(parent, cl, new, &old, extack);
+ if (err)
+ return err;
+@@ -1204,7 +1210,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
+
+ err = -ENOENT;
+ if (!ops) {
+- NL_SET_ERR_MSG(extack, "Specified qdisc not found");
++ NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
+ goto err_out;
+ }
+
+@@ -1217,7 +1223,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
+ sch->parent = parent;
+
+ if (handle == TC_H_INGRESS) {
+- sch->flags |= TCQ_F_INGRESS;
++ if (!(sch->flags & TCQ_F_INGRESS)) {
++ NL_SET_ERR_MSG(extack,
++ "Specified parent ID is reserved for ingress and clsact Qdiscs");
++ err = -EINVAL;
++ goto err_out3;
++ }
+ handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ } else {
+ if (handle == 0) {
+@@ -1460,7 +1471,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ q = dev_ingress_queue(dev)->qdisc_sleeping;
+ }
+ } else {
+- q = dev->qdisc;
++ q = rtnl_dereference(dev->qdisc);
+ }
+ if (!q) {
+ NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
+@@ -1502,10 +1513,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ return 0;
+ }
+
++static bool req_create_or_replace(struct nlmsghdr *n)
++{
++ return (n->nlmsg_flags & NLM_F_CREATE &&
++ n->nlmsg_flags & NLM_F_REPLACE);
++}
++
++static bool req_create_exclusive(struct nlmsghdr *n)
++{
++ return (n->nlmsg_flags & NLM_F_CREATE &&
++ n->nlmsg_flags & NLM_F_EXCL);
++}
++
++static bool req_change(struct nlmsghdr *n)
++{
++ return (!(n->nlmsg_flags & NLM_F_CREATE) &&
++ !(n->nlmsg_flags & NLM_F_REPLACE) &&
++ !(n->nlmsg_flags & NLM_F_EXCL));
++}
++
+ /*
+ * Create/change qdisc.
+ */
+-
+ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+ {
+@@ -1549,7 +1578,7 @@ replay:
+ q = dev_ingress_queue(dev)->qdisc_sleeping;
+ }
+ } else {
+- q = dev->qdisc;
++ q = rtnl_dereference(dev->qdisc);
+ }
+
+ /* It may be default qdisc, ignore it */
+@@ -1578,11 +1607,20 @@ replay:
+ NL_SET_ERR_MSG(extack, "Invalid qdisc name");
+ return -EINVAL;
+ }
++ if (q->flags & TCQ_F_INGRESS) {
++ NL_SET_ERR_MSG(extack,
++ "Cannot regraft ingress or clsact Qdiscs");
++ return -EINVAL;
++ }
+ if (q == p ||
+ (p && check_loop(q, p, 0))) {
+ NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
+ return -ELOOP;
+ }
++ if (clid == TC_H_INGRESS) {
++ NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
++ return -EINVAL;
++ }
+ qdisc_refcount_inc(q);
+ goto graft;
+ } else {
+@@ -1593,27 +1631,35 @@ replay:
+ *
+ * We know, that some child q is already
+ * attached to this parent and have choice:
+- * either to change it or to create/graft new one.
++ * 1) change it or 2) create/graft new one.
++ * If the requested qdisc kind is different
++ * than the existing one, then we choose graft.
++ * If they are the same then this is "change"
++ * operation - just let it fallthrough..
+ *
+ * 1. We are allowed to create/graft only
+- * if CREATE and REPLACE flags are set.
++ * if the request is explicitly stating
++ * "please create if it doesn't exist".
+ *
+- * 2. If EXCL is set, requestor wanted to say,
+- * that qdisc tcm_handle is not expected
++ * 2. If the request is to exclusive create
++ * then the qdisc tcm_handle is not expected
+ * to exist, so that we choose create/graft too.
+ *
+ * 3. The last case is when no flags are set.
++ * This will happen when for example tc
++ * utility issues a "change" command.
+ * Alas, it is sort of hole in API, we
+ * cannot decide what to do unambiguously.
+- * For now we select create/graft, if
+- * user gave KIND, which does not match existing.
++ * For now we select create/graft.
+ */
+- if ((n->nlmsg_flags & NLM_F_CREATE) &&
+- (n->nlmsg_flags & NLM_F_REPLACE) &&
+- ((n->nlmsg_flags & NLM_F_EXCL) ||
+- (tca[TCA_KIND] &&
+- nla_strcmp(tca[TCA_KIND], q->ops->id))))
+- goto create_n_graft;
++ if (tca[TCA_KIND] &&
++ nla_strcmp(tca[TCA_KIND], q->ops->id)) {
++ if (req_create_or_replace(n) ||
++ req_create_exclusive(n))
++ goto create_n_graft;
++ else if (req_change(n))
++ goto create_n_graft2;
++ }
+ }
+ }
+ } else {
+@@ -1647,6 +1693,7 @@ create_n_graft:
+ NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
+ return -ENOENT;
+ }
++create_n_graft2:
+ if (clid == TC_H_INGRESS) {
+ if (dev_ingress_queue(dev)) {
+ q = qdisc_create(dev, dev_ingress_queue(dev), p,
+@@ -1771,7 +1818,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
+ s_q_idx = 0;
+ q_idx = 0;
+
+- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
++ if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
++ skb, cb, &q_idx, s_q_idx,
+ true, tca[TCA_DUMP_INVISIBLE]) < 0)
+ goto done;
+
+@@ -2042,7 +2090,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ } else if (qid1) {
+ qid = qid1;
+ } else if (qid == 0)
+- qid = dev->qdisc->handle;
++ qid = rtnl_dereference(dev->qdisc)->handle;
+
+ /* Now qid is genuine qdisc handle consistent
+ * both with parent and child.
+@@ -2053,7 +2101,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ portid = TC_H_MAKE(qid, portid);
+ } else {
+ if (qid == 0)
+- qid = dev->qdisc->handle;
++ qid = rtnl_dereference(dev->qdisc)->handle;
+ }
+
+ /* OK. Locate qdisc */
+@@ -2214,7 +2262,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+ s_t = cb->args[0];
+ t = 0;
+
+- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
++ if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
++ skb, tcm, cb, &t, s_t, true) < 0)
+ goto done;
+
+ dev_queue = dev_ingress_queue(dev);
+diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
+index 7d8518176b45a..33737169cc2d3 100644
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -397,10 +397,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ result = tcf_classify(skb, NULL, fl, &res, true);
+ if (result < 0)
+ continue;
++ if (result == TC_ACT_SHOT)
++ goto done;
++
+ flow = (struct atm_flow_data *)res.class;
+ if (!flow)
+ flow = lookup_flow(sch, res.classid);
+- goto done;
++ goto drop;
+ }
+ }
+ flow = NULL;
+@@ -576,7 +579,6 @@ static void atm_tc_reset(struct Qdisc *sch)
+ pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
+ list_for_each_entry(flow, &p->flows, list)
+ qdisc_reset(flow->q);
+- sch->q.qlen = 0;
+ }
+
+ static void atm_tc_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
+index 3c2300d144681..6f6e74ce927f4 100644
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -2224,8 +2224,12 @@ retry:
+
+ static void cake_reset(struct Qdisc *sch)
+ {
++ struct cake_sched_data *q = qdisc_priv(sch);
+ u32 c;
+
++ if (!q->tins)
++ return;
++
+ for (c = 0; c < CAKE_MAX_TINS; c++)
+ cake_clear_tin(sch, c);
+ }
+@@ -2736,7 +2740,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+ q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
+ GFP_KERNEL);
+ if (!q->tins)
+- goto nomem;
++ return -ENOMEM;
+
+ for (i = 0; i < CAKE_MAX_TINS; i++) {
+ struct cake_tin_data *b = q->tins + i;
+@@ -2766,10 +2770,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+ q->min_netlen = ~0;
+ q->min_adjlen = ~0;
+ return 0;
+-
+-nomem:
+- cake_destroy(sch);
+- return -ENOMEM;
+ }
+
+ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
+index e0da15530f0e9..46b3dd71777d1 100644
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -231,6 +231,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+ result = tcf_classify(skb, NULL, fl, &res, true);
+ if (!fl || result < 0)
+ goto fallback;
++ if (result == TC_ACT_SHOT)
++ return NULL;
+
+ cl = (void *)res.class;
+ if (!cl) {
+@@ -251,8 +253,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+ case TC_ACT_TRAP:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ fallthrough;
+- case TC_ACT_SHOT:
+- return NULL;
+ case TC_ACT_RECLASSIFY:
+ return cbq_reclassify(skb, cl);
+ }
+@@ -1053,7 +1053,6 @@ cbq_reset(struct Qdisc *sch)
+ cl->cpriority = cl->priority;
+ }
+ }
+- sch->q.qlen = 0;
+ }
+
+
+diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
+index 2adbd945bf15a..25d2daaa81227 100644
+--- a/net/sched/sch_choke.c
++++ b/net/sched/sch_choke.c
+@@ -315,8 +315,6 @@ static void choke_reset(struct Qdisc *sch)
+ rtnl_qdisc_drop(skb, sch);
+ }
+
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ if (q->tab)
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ q->head = q->tail = 0;
+diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
+index 642cd179b7a75..80a88e208d2bc 100644
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -444,8 +444,6 @@ static void drr_reset_qdisc(struct Qdisc *sch)
+ qdisc_reset(cl->qdisc);
+ }
+ }
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void drr_destroy_qdisc(struct Qdisc *sch)
+diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
+index 4c100d1052699..7da6dc38a3828 100644
+--- a/net/sched/sch_dsmark.c
++++ b/net/sched/sch_dsmark.c
+@@ -409,8 +409,6 @@ static void dsmark_reset(struct Qdisc *sch)
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
+ if (p->q)
+ qdisc_reset(p->q);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void dsmark_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
+index c48f91075b5c6..d96103b0e2bf5 100644
+--- a/net/sched/sch_etf.c
++++ b/net/sched/sch_etf.c
+@@ -445,9 +445,6 @@ static void etf_reset(struct Qdisc *sch)
+ timesortedlist_clear(sch);
+ __qdisc_reset_queue(&sch->q);
+
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+-
+ q->last = 0;
+ }
+
+diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
+index 1f857ffd1ac23..175e07b3d25c3 100644
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -667,12 +667,14 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
+ q->classes[i].deficit = quanta[i];
+ }
+ }
++ for (i = q->nbands; i < oldbands; i++) {
++ if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
++ list_del(&q->classes[i].alist);
++ qdisc_tree_flush_backlog(q->classes[i].qdisc);
++ }
+ q->nstrict = nstrict;
+ memcpy(q->prio2band, priomap, sizeof(priomap));
+
+- for (i = q->nbands; i < oldbands; i++)
+- qdisc_tree_flush_backlog(q->classes[i].qdisc);
+-
+ for (i = 0; i < q->nbands; i++)
+ q->classes[i].quantum = quanta[i];
+
+@@ -720,8 +722,6 @@ static void ets_qdisc_reset(struct Qdisc *sch)
+ }
+ for (band = 0; band < q->nbands; band++)
+ qdisc_reset(q->classes[band].qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void ets_qdisc_destroy(struct Qdisc *sch)
+diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
+index 2fb76fc0cc31b..5a1274199fe33 100644
+--- a/net/sched/sch_fq.c
++++ b/net/sched/sch_fq.c
+@@ -779,13 +779,17 @@ static int fq_resize(struct Qdisc *sch, u32 log)
+ return 0;
+ }
+
++static struct netlink_range_validation iq_range = {
++ .max = INT_MAX,
++};
++
+ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
+ [TCA_FQ_UNSPEC] = { .strict_start_type = TCA_FQ_TIMER_SLACK },
+
+ [TCA_FQ_PLIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_QUANTUM] = { .type = NLA_U32 },
+- [TCA_FQ_INITIAL_QUANTUM] = { .type = NLA_U32 },
++ [TCA_FQ_INITIAL_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &iq_range),
+ [TCA_FQ_RATE_ENABLE] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
+diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
+index bb0cd6d3d2c27..efda894bbb78b 100644
+--- a/net/sched/sch_fq_codel.c
++++ b/net/sched/sch_fq_codel.c
+@@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch)
+ codel_vars_init(&flow->cvars);
+ }
+ memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ q->memory_usage = 0;
+ }
+
+diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
+index 830f3559f727a..1fb68c973f451 100644
+--- a/net/sched/sch_fq_pie.c
++++ b/net/sched/sch_fq_pie.c
+@@ -61,6 +61,7 @@ struct fq_pie_sched_data {
+ struct pie_params p_params;
+ u32 ecn_prob;
+ u32 flows_cnt;
++ u32 flows_cursor;
+ u32 quantum;
+ u32 memory_limit;
+ u32 new_flow_count;
+@@ -201,6 +202,11 @@ out:
+ return NET_XMIT_CN;
+ }
+
++static struct netlink_range_validation fq_pie_q_range = {
++ .min = 1,
++ .max = 1 << 20,
++};
++
+ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
+ [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32},
+ [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32},
+@@ -208,7 +214,8 @@ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
+ [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32},
+ [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32},
+ [TCA_FQ_PIE_BETA] = {.type = NLA_U32},
+- [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32},
++ [TCA_FQ_PIE_QUANTUM] =
++ NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range),
+ [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32},
+ [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32},
+ [TCA_FQ_PIE_ECN] = {.type = NLA_U32},
+@@ -372,21 +379,31 @@ flow_error:
+ static void fq_pie_timer(struct timer_list *t)
+ {
+ struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
++ unsigned long next, tupdate;
+ struct Qdisc *sch = q->sch;
+ spinlock_t *root_lock; /* to lock qdisc for probability calculations */
+- u32 idx;
++ int max_cnt, i;
+
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spin_lock(root_lock);
+
+- for (idx = 0; idx < q->flows_cnt; idx++)
+- pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
+- q->flows[idx].backlog);
+-
+- /* reset the timer to fire after 'tupdate' jiffies. */
+- if (q->p_params.tupdate)
+- mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
++ /* Limit this expensive loop to 2048 flows per round. */
++ max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
++ for (i = 0; i < max_cnt; i++) {
++ pie_calculate_probability(&q->p_params,
++ &q->flows[q->flows_cursor].vars,
++ q->flows[q->flows_cursor].backlog);
++ q->flows_cursor++;
++ }
+
++ tupdate = q->p_params.tupdate;
++ next = 0;
++ if (q->flows_cursor >= q->flows_cnt) {
++ q->flows_cursor = 0;
++ next = tupdate;
++ }
++ if (tupdate)
++ mod_timer(&q->adapt_timer, jiffies + next);
+ spin_unlock(root_lock);
+ }
+
+@@ -521,9 +538,6 @@ static void fq_pie_reset(struct Qdisc *sch)
+ INIT_LIST_HEAD(&flow->flowchain);
+ pie_vars_init(&flow->vars);
+ }
+-
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ }
+
+ static void fq_pie_destroy(struct Qdisc *sch)
+@@ -531,6 +545,7 @@ static void fq_pie_destroy(struct Qdisc *sch)
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+
+ tcf_block_put(q->block);
++ q->p_params.tupdate = 0;
+ del_timer_sync(&q->adapt_timer);
+ kvfree(q->flows);
+ }
+diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
+index 8c06381391d6f..5ded4c8672a64 100644
+--- a/net/sched/sch_frag.c
++++ b/net/sched/sch_frag.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ #include <net/netlink.h>
+ #include <net/sch_generic.h>
++#include <net/pkt_sched.h>
+ #include <net/dst.h>
+ #include <net/ip.h>
+ #include <net/ip6_fib.h>
+@@ -137,7 +138,7 @@ err:
+
+ int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb))
+ {
+- u16 mru = qdisc_skb_cb(skb)->mru;
++ u16 mru = tc_skb_cb(skb)->mru;
+ int err;
+
+ if (mru && skb->len > mru + skb->dev->hard_header_len)
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index a8dd06c74e318..02299785209c1 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
+
+ void __qdisc_run(struct Qdisc *q)
+ {
+- int quota = dev_tx_weight;
++ int quota = READ_ONCE(dev_tx_weight);
+ int packets;
+
+ while (qdisc_restart(q, &packets)) {
+@@ -1083,6 +1083,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
+ }
+ EXPORT_SYMBOL(dev_graft_qdisc);
+
++static void shutdown_scheduler_queue(struct net_device *dev,
++ struct netdev_queue *dev_queue,
++ void *_qdisc_default)
++{
++ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
++ struct Qdisc *qdisc_default = _qdisc_default;
++
++ if (qdisc) {
++ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
++ dev_queue->qdisc_sleeping = qdisc_default;
++
++ qdisc_put(qdisc);
++ }
++}
++
+ static void attach_one_default_qdisc(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+@@ -1114,30 +1129,34 @@ static void attach_default_qdiscs(struct net_device *dev)
+ if (!netif_is_multiqueue(dev) ||
+ dev->priv_flags & IFF_NO_QUEUE) {
+ netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+- dev->qdisc = txq->qdisc_sleeping;
+- qdisc_refcount_inc(dev->qdisc);
++ qdisc = txq->qdisc_sleeping;
++ rcu_assign_pointer(dev->qdisc, qdisc);
++ qdisc_refcount_inc(qdisc);
+ } else {
+ qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
+ if (qdisc) {
+- dev->qdisc = qdisc;
++ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc->ops->attach(qdisc);
+ }
+ }
++ qdisc = rtnl_dereference(dev->qdisc);
+
+ /* Detect default qdisc setup/init failed and fallback to "noqueue" */
+- if (dev->qdisc == &noop_qdisc) {
++ if (qdisc == &noop_qdisc) {
+ netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
+ default_qdisc_ops->id, noqueue_qdisc_ops.id);
++ netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
+ dev->priv_flags |= IFF_NO_QUEUE;
+ netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+- dev->qdisc = txq->qdisc_sleeping;
+- qdisc_refcount_inc(dev->qdisc);
++ qdisc = txq->qdisc_sleeping;
++ rcu_assign_pointer(dev->qdisc, qdisc);
++ qdisc_refcount_inc(qdisc);
+ dev->priv_flags ^= IFF_NO_QUEUE;
+ }
+
+ #ifdef CONFIG_NET_SCHED
+- if (dev->qdisc != &noop_qdisc)
+- qdisc_hash_add(dev->qdisc, false);
++ if (qdisc != &noop_qdisc)
++ qdisc_hash_add(qdisc, false);
+ #endif
+ }
+
+@@ -1167,7 +1186,7 @@ void dev_activate(struct net_device *dev)
+ * and noqueue_qdisc for virtual interfaces
+ */
+
+- if (dev->qdisc == &noop_qdisc)
++ if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
+ attach_default_qdiscs(dev);
+
+ if (!netif_carrier_ok(dev))
+@@ -1330,6 +1349,15 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
+ return 0;
+ }
+
++void dev_qdisc_change_real_num_tx(struct net_device *dev,
++ unsigned int new_real_tx)
++{
++ struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
++
++ if (qdisc->ops->change_real_num_tx)
++ qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
++}
++
+ int dev_qdisc_change_tx_queue_len(struct net_device *dev)
+ {
+ bool up = dev->flags & IFF_UP;
+@@ -1364,7 +1392,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
+
+ void dev_init_scheduler(struct net_device *dev)
+ {
+- dev->qdisc = &noop_qdisc;
++ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
+ netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
+ if (dev_ingress_queue(dev))
+ dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
+@@ -1372,28 +1400,13 @@ void dev_init_scheduler(struct net_device *dev)
+ timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
+ }
+
+-static void shutdown_scheduler_queue(struct net_device *dev,
+- struct netdev_queue *dev_queue,
+- void *_qdisc_default)
+-{
+- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+- struct Qdisc *qdisc_default = _qdisc_default;
+-
+- if (qdisc) {
+- rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+- dev_queue->qdisc_sleeping = qdisc_default;
+-
+- qdisc_put(qdisc);
+- }
+-}
+-
+ void dev_shutdown(struct net_device *dev)
+ {
+ netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
+ if (dev_ingress_queue(dev))
+ shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
+- qdisc_put(dev->qdisc);
+- dev->qdisc = &noop_qdisc;
++ qdisc_put(rtnl_dereference(dev->qdisc));
++ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
+
+ WARN_ON(timer_pending(&dev->watchdog_timer));
+ }
+@@ -1446,6 +1459,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
+ {
+ memset(r, 0, sizeof(*r));
+ r->overhead = conf->overhead;
++ r->mpu = conf->mpu;
+ r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
+ r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
+ psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
+diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
+index b7ac30cca035d..01126e285f94e 100644
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -1012,6 +1012,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ if (parent == NULL)
+ return -ENOENT;
+ }
++ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
++ NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
++ return -EINVAL;
++ }
+
+ if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
+ return -EINVAL;
+@@ -1485,8 +1489,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
+ }
+ q->eligible = RB_ROOT;
+ qdisc_watchdog_cancel(&q->watchdog);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void
+diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
+index 5067a6e5d4fde..8ce999e4ca323 100644
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -427,7 +427,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
+ while (cl->cmode == HTB_MAY_BORROW && p && mask) {
+ m = mask;
+ while (m) {
+- int prio = ffz(~m);
++ unsigned int prio = ffz(~m);
++
++ if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
++ break;
+ m &= ~(1 << prio);
+
+ if (p->inner.clprio[prio].feed.rb_node)
+@@ -1008,8 +1011,6 @@ static void htb_reset(struct Qdisc *sch)
+ }
+ qdisc_watchdog_cancel(&q->watchdog);
+ __qdisc_reset_queue(&q->direct_queue);
+- sch->q.qlen = 0;
+- sch->qstats.backlog = 0;
+ memset(q->hlevel, 0, sizeof(q->hlevel));
+ memset(q->row_mask, 0, sizeof(q->row_mask));
+ }
+@@ -1560,7 +1561,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
+ struct tc_htb_qopt_offload offload_opt;
+ struct netdev_queue *dev_queue;
+ struct Qdisc *q = cl->leaf.q;
+- struct Qdisc *old = NULL;
++ struct Qdisc *old;
+ int err;
+
+ if (cl->level)
+@@ -1568,14 +1569,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
+
+ WARN_ON(!q);
+ dev_queue = htb_offload_get_queue(cl);
+- old = htb_graft_helper(dev_queue, NULL);
+- if (destroying)
+- /* Before HTB is destroyed, the kernel grafts noop_qdisc to
+- * all queues.
++ /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
++ * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
++ * does not need to graft or qdisc_put the qdisc being destroyed.
++ */
++ if (!destroying) {
++ old = htb_graft_helper(dev_queue, NULL);
++ /* Last qdisc grafted should be the same as cl->leaf.q when
++ * calling htb_delete.
+ */
+- WARN_ON(!(old->flags & TCQ_F_BUILTIN));
+- else
+ WARN_ON(old != q);
++ }
+
+ if (cl->parent) {
+ cl->parent->bstats_bias.bytes += q->bstats.bytes;
+@@ -1591,10 +1595,12 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
+ };
+ err = htb_offload(qdisc_dev(sch), &offload_opt);
+
+- if (!err || destroying)
+- qdisc_put(old);
+- else
+- htb_graft_helper(dev_queue, old);
++ if (!destroying) {
++ if (!err)
++ qdisc_put(old);
++ else
++ htb_graft_helper(dev_queue, old);
++ }
+
+ if (last_child)
+ return err;
+@@ -1803,6 +1809,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
+ if (!hopt->rate.rate || !hopt->ceil.rate)
+ goto failure;
+
++ if (q->offload) {
++ /* Options not supported by the offload. */
++ if (hopt->rate.overhead || hopt->ceil.overhead) {
++ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
++ goto failure;
++ }
++ if (hopt->rate.mpu || hopt->ceil.mpu) {
++ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
++ goto failure;
++ }
++ if (hopt->quantum) {
++ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
++ goto failure;
++ }
++ if (hopt->prio) {
++ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
++ goto failure;
++ }
++ }
++
+ /* Keeping backward compatible with rate_table based iproute2 tc */
+ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
+diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
+index 84838128b9c5b..e43a454993723 100644
+--- a/net/sched/sch_ingress.c
++++ b/net/sched/sch_ingress.c
+@@ -80,6 +80,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
+ struct net_device *dev = qdisc_dev(sch);
+ int err;
+
++ if (sch->parent != TC_H_INGRESS)
++ return -EOPNOTSUPP;
++
+ net_inc_ingress_queue();
+
+ mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+@@ -101,6 +104,9 @@ static void ingress_destroy(struct Qdisc *sch)
+ {
+ struct ingress_sched_data *q = qdisc_priv(sch);
+
++ if (sch->parent != TC_H_INGRESS)
++ return;
++
+ tcf_block_put_ext(q->block, sch, &q->block_info);
+ net_dec_ingress_queue();
+ }
+@@ -134,7 +140,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
+ .cl_ops = &ingress_class_ops,
+ .id = "ingress",
+ .priv_size = sizeof(struct ingress_sched_data),
+- .static_flags = TCQ_F_CPUSTATS,
++ .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS,
+ .init = ingress_init,
+ .destroy = ingress_destroy,
+ .dump = ingress_dump,
+@@ -219,6 +225,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
+ struct net_device *dev = qdisc_dev(sch);
+ int err;
+
++ if (sch->parent != TC_H_CLSACT)
++ return -EOPNOTSUPP;
++
+ net_inc_ingress_queue();
+ net_inc_egress_queue();
+
+@@ -248,6 +257,9 @@ static void clsact_destroy(struct Qdisc *sch)
+ {
+ struct clsact_sched_data *q = qdisc_priv(sch);
+
++ if (sch->parent != TC_H_CLSACT)
++ return;
++
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+ tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+
+@@ -269,7 +281,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
+ .cl_ops = &clsact_class_ops,
+ .id = "clsact",
+ .priv_size = sizeof(struct clsact_sched_data),
+- .static_flags = TCQ_F_CPUSTATS,
++ .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS,
+ .init = clsact_init,
+ .destroy = clsact_destroy,
+ .dump = ingress_dump,
+diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
+index e79f1afe0cfd6..db18d8a860f9c 100644
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -125,6 +125,29 @@ static void mq_attach(struct Qdisc *sch)
+ priv->qdiscs = NULL;
+ }
+
++static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
++{
++#ifdef CONFIG_NET_SCHED
++ struct net_device *dev = qdisc_dev(sch);
++ struct Qdisc *qdisc;
++ unsigned int i;
++
++ for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
++ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
++ /* Only update the default qdiscs we created,
++ * qdiscs with handles are always hashed.
++ */
++ if (qdisc != &noop_qdisc && !qdisc->handle)
++ qdisc_hash_del(qdisc);
++ }
++ for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
++ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
++ if (qdisc != &noop_qdisc && !qdisc->handle)
++ qdisc_hash_add(qdisc, false);
++ }
++#endif
++}
++
+ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+ {
+ struct net_device *dev = qdisc_dev(sch);
+@@ -288,6 +311,7 @@ struct Qdisc_ops mq_qdisc_ops __read_mostly = {
+ .init = mq_init,
+ .destroy = mq_destroy,
+ .attach = mq_attach,
++ .change_real_num_tx = mq_change_real_num_tx,
+ .dump = mq_dump,
+ .owner = THIS_MODULE,
+ };
+diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
+index 5eb3b1b7ae5e7..56d3dc5e95c7c 100644
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -130,6 +130,97 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ return 0;
+ }
+
++static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
++ struct nlattr *opt,
++ struct netlink_ext_ack *extack)
++{
++ struct mqprio_sched *priv = qdisc_priv(sch);
++ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
++ struct nlattr *attr;
++ int i, rem, err;
++
++ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
++ sizeof(*qopt));
++ if (err < 0)
++ return err;
++
++ if (!qopt->hw) {
++ NL_SET_ERR_MSG(extack,
++ "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode");
++ return -EINVAL;
++ }
++
++ if (tb[TCA_MQPRIO_MODE]) {
++ priv->flags |= TC_MQPRIO_F_MODE;
++ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
++ }
++
++ if (tb[TCA_MQPRIO_SHAPER]) {
++ priv->flags |= TC_MQPRIO_F_SHAPER;
++ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
++ }
++
++ if (tb[TCA_MQPRIO_MIN_RATE64]) {
++ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64],
++ "min_rate accepted only when shaper is in bw_rlimit mode");
++ return -EINVAL;
++ }
++ i = 0;
++ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
++ rem) {
++ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) {
++ NL_SET_ERR_MSG_ATTR(extack, attr,
++ "Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
++ return -EINVAL;
++ }
++
++ if (nla_len(attr) != sizeof(u64)) {
++ NL_SET_ERR_MSG_ATTR(extack, attr,
++ "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length");
++ return -EINVAL;
++ }
++
++ if (i >= qopt->num_tc)
++ break;
++ priv->min_rate[i] = *(u64 *)nla_data(attr);
++ i++;
++ }
++ priv->flags |= TC_MQPRIO_F_MIN_RATE;
++ }
++
++ if (tb[TCA_MQPRIO_MAX_RATE64]) {
++ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
++ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64],
++ "max_rate accepted only when shaper is in bw_rlimit mode");
++ return -EINVAL;
++ }
++ i = 0;
++ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
++ rem) {
++ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) {
++ NL_SET_ERR_MSG_ATTR(extack, attr,
++ "Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
++ return -EINVAL;
++ }
++
++ if (nla_len(attr) != sizeof(u64)) {
++ NL_SET_ERR_MSG_ATTR(extack, attr,
++ "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length");
++ return -EINVAL;
++ }
++
++ if (i >= qopt->num_tc)
++ break;
++ priv->max_rate[i] = *(u64 *)nla_data(attr);
++ i++;
++ }
++ priv->flags |= TC_MQPRIO_F_MAX_RATE;
++ }
++
++ return 0;
++}
++
+ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+ {
+@@ -139,9 +230,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
+ struct Qdisc *qdisc;
+ int i, err = -EOPNOTSUPP;
+ struct tc_mqprio_qopt *qopt = NULL;
+- struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+- struct nlattr *attr;
+- int rem;
+ int len;
+
+ BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
+@@ -166,55 +254,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
+
+ len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
+ if (len > 0) {
+- err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+- sizeof(*qopt));
+- if (err < 0)
++ err = mqprio_parse_nlattr(sch, qopt, opt, extack);
++ if (err)
+ return err;
+-
+- if (!qopt->hw)
+- return -EINVAL;
+-
+- if (tb[TCA_MQPRIO_MODE]) {
+- priv->flags |= TC_MQPRIO_F_MODE;
+- priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+- }
+-
+- if (tb[TCA_MQPRIO_SHAPER]) {
+- priv->flags |= TC_MQPRIO_F_SHAPER;
+- priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+- }
+-
+- if (tb[TCA_MQPRIO_MIN_RATE64]) {
+- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+- return -EINVAL;
+- i = 0;
+- nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+- rem) {
+- if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+- return -EINVAL;
+- if (i >= qopt->num_tc)
+- break;
+- priv->min_rate[i] = *(u64 *)nla_data(attr);
+- i++;
+- }
+- priv->flags |= TC_MQPRIO_F_MIN_RATE;
+- }
+-
+- if (tb[TCA_MQPRIO_MAX_RATE64]) {
+- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+- return -EINVAL;
+- i = 0;
+- nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+- rem) {
+- if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+- return -EINVAL;
+- if (i >= qopt->num_tc)
+- break;
+- priv->max_rate[i] = *(u64 *)nla_data(attr);
+- i++;
+- }
+- priv->flags |= TC_MQPRIO_F_MAX_RATE;
+- }
+ }
+
+ /* pre-allocate qdisc, attachment can't fail */
+@@ -306,6 +348,28 @@ static void mqprio_attach(struct Qdisc *sch)
+ priv->qdiscs = NULL;
+ }
+
++static void mqprio_change_real_num_tx(struct Qdisc *sch,
++ unsigned int new_real_tx)
++{
++ struct net_device *dev = qdisc_dev(sch);
++ struct Qdisc *qdisc;
++ unsigned int i;
++
++ for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
++ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
++ /* Only update the default qdiscs we created,
++ * qdiscs with handles are always hashed.
++ */
++ if (qdisc != &noop_qdisc && !qdisc->handle)
++ qdisc_hash_del(qdisc);
++ }
++ for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
++ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
++ if (qdisc != &noop_qdisc && !qdisc->handle)
++ qdisc_hash_add(qdisc, false);
++ }
++}
++
+ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+ unsigned long cl)
+ {
+@@ -629,6 +693,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+ .init = mqprio_init,
+ .destroy = mqprio_destroy,
+ .attach = mqprio_attach,
++ .change_real_num_tx = mqprio_change_real_num_tx,
+ .dump = mqprio_dump,
+ .owner = THIS_MODULE,
+ };
+diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
+index e282e7382117a..8b99f07aa3a76 100644
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch)
+
+ for (band = 0; band < q->bands; band++)
+ qdisc_reset(q->queues[band]);
+- sch->q.qlen = 0;
+ q->curband = 0;
+ }
+
+diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
+index 0c345e43a09a3..08aaa6efc62c8 100644
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -773,12 +773,10 @@ static void dist_free(struct disttable *d)
+ * signed 16 bit values.
+ */
+
+-static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
+- const struct nlattr *attr)
++static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
+ {
+ size_t n = nla_len(attr)/sizeof(__s16);
+ const __s16 *data = nla_data(attr);
+- spinlock_t *root_lock;
+ struct disttable *d;
+ int i;
+
+@@ -793,13 +791,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
+ for (i = 0; i < n; i++)
+ d->table[i] = data[i];
+
+- root_lock = qdisc_root_sleeping_lock(sch);
+-
+- spin_lock_bh(root_lock);
+- swap(*tbl, d);
+- spin_unlock_bh(root_lock);
+-
+- dist_free(d);
++ *tbl = d;
+ return 0;
+ }
+
+@@ -956,6 +948,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
+ {
+ struct netem_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_NETEM_MAX + 1];
++ struct disttable *delay_dist = NULL;
++ struct disttable *slot_dist = NULL;
+ struct tc_netem_qopt *qopt;
+ struct clgstate old_clg;
+ int old_loss_model = CLG_RANDOM;
+@@ -969,6 +963,19 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
+ if (ret < 0)
+ return ret;
+
++ if (tb[TCA_NETEM_DELAY_DIST]) {
++ ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
++ if (ret)
++ goto table_free;
++ }
++
++ if (tb[TCA_NETEM_SLOT_DIST]) {
++ ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
++ if (ret)
++ goto table_free;
++ }
++
++ sch_tree_lock(sch);
+ /* backup q->clg and q->loss_model */
+ old_clg = q->clg;
+ old_loss_model = q->loss_model;
+@@ -977,26 +984,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
+ ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
+ if (ret) {
+ q->loss_model = old_loss_model;
+- return ret;
++ q->clg = old_clg;
++ goto unlock;
+ }
+ } else {
+ q->loss_model = CLG_RANDOM;
+ }
+
+- if (tb[TCA_NETEM_DELAY_DIST]) {
+- ret = get_dist_table(sch, &q->delay_dist,
+- tb[TCA_NETEM_DELAY_DIST]);
+- if (ret)
+- goto get_table_failure;
+- }
+-
+- if (tb[TCA_NETEM_SLOT_DIST]) {
+- ret = get_dist_table(sch, &q->slot_dist,
+- tb[TCA_NETEM_SLOT_DIST]);
+- if (ret)
+- goto get_table_failure;
+- }
+-
++ if (delay_dist)
++ swap(q->delay_dist, delay_dist);
++ if (slot_dist)
++ swap(q->slot_dist, slot_dist);
+ sch->limit = qopt->limit;
+
+ q->latency = PSCHED_TICKS2NS(qopt->latency);
+@@ -1044,15 +1042,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
+ /* capping jitter to the range acceptable by tabledist() */
+ q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+
+- return ret;
++unlock:
++ sch_tree_unlock(sch);
+
+-get_table_failure:
+- /* recover clg and loss_model, in case of
+- * q->clg and q->loss_model were modified
+- * in get_loss_clg()
+- */
+- q->clg = old_clg;
+- q->loss_model = old_loss_model;
++table_free:
++ dist_free(delay_dist);
++ dist_free(slot_dist);
+ return ret;
+ }
+
+@@ -1146,9 +1141,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
+ struct tc_netem_rate rate;
+ struct tc_netem_slot slot;
+
+- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
++ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
+ UINT_MAX);
+- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
++ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
+ UINT_MAX);
+ qopt.limit = q->limit;
+ qopt.loss = q->loss;
+diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
+index cbc2ebca4548c..339990bb59817 100644
+--- a/net/sched/sch_plug.c
++++ b/net/sched/sch_plug.c
+@@ -210,7 +210,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
+ .priv_size = sizeof(struct plug_sched_data),
+ .enqueue = plug_enqueue,
+ .dequeue = plug_dequeue,
+- .peek = qdisc_peek_head,
++ .peek = qdisc_peek_dequeued,
+ .init = plug_init,
+ .change = plug_change,
+ .reset = qdisc_reset_queue,
+diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
+index 03fdf31ccb6af..2e0b1e7f54668 100644
+--- a/net/sched/sch_prio.c
++++ b/net/sched/sch_prio.c
+@@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch)
+
+ for (prio = 0; prio < q->bands; prio++)
+ qdisc_reset(q->queues[prio]);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index 58a9d42b52b8f..b1dbe03dde1b5 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -113,6 +113,7 @@
+
+ #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
+ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */
++#define QFQ_MAX_LMAX (1UL << QFQ_MTU_SHIFT)
+
+ #define QFQ_MAX_AGG_CLASSES 8 /* max num classes per aggregate allowed */
+
+@@ -214,9 +215,14 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
+ return container_of(clc, struct qfq_class, common);
+ }
+
++static struct netlink_range_validation lmax_range = {
++ .min = QFQ_MIN_LMAX,
++ .max = QFQ_MAX_LMAX,
++};
++
+ static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
+- [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
+- [TCA_QFQ_LMAX] = { .type = NLA_U32 },
++ [TCA_QFQ_WEIGHT] = NLA_POLICY_RANGE(NLA_U32, 1, QFQ_MAX_WEIGHT),
++ [TCA_QFQ_LMAX] = NLA_POLICY_FULL_RANGE(NLA_U32, &lmax_range),
+ };
+
+ /*
+@@ -375,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
+ u32 lmax)
+ {
+ struct qfq_sched *q = qdisc_priv(sch);
+- struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
++ struct qfq_aggregate *new_agg;
++
++ /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */
++ if (lmax > QFQ_MAX_LMAX)
++ return -EINVAL;
+
++ new_agg = qfq_find_agg(q, lmax, weight);
+ if (new_agg == NULL) { /* create new aggregate */
+ new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
+ if (new_agg == NULL)
+@@ -408,27 +419,26 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ }
+
+ err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS],
+- qfq_policy, NULL);
++ qfq_policy, extack);
+ if (err < 0)
+ return err;
+
+- if (tb[TCA_QFQ_WEIGHT]) {
++ if (tb[TCA_QFQ_WEIGHT])
+ weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
+- if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
+- pr_notice("qfq: invalid weight %u\n", weight);
+- return -EINVAL;
+- }
+- } else
++ else
+ weight = 1;
+
+ if (tb[TCA_QFQ_LMAX]) {
+ lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
+- if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
+- pr_notice("qfq: invalid max length %u\n", lmax);
++ } else {
++ /* MTU size is user controlled */
++ lmax = psched_mtu(qdisc_dev(sch));
++ if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) {
++ NL_SET_ERR_MSG_MOD(extack,
++ "MTU size out of bounds for qfq");
+ return -EINVAL;
+ }
+- } else
+- lmax = psched_mtu(qdisc_dev(sch));
++ }
+
+ inv_w = ONE_FP / weight;
+ weight = ONE_FP / inv_w;
+@@ -970,10 +980,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
+ }
+
+ /* Dequeue head packet of the head class in the DRR queue of the aggregate. */
+-static void agg_dequeue(struct qfq_aggregate *agg,
+- struct qfq_class *cl, unsigned int len)
++static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
++ struct qfq_class *cl, unsigned int len)
+ {
+- qdisc_dequeue_peeked(cl->qdisc);
++ struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
++
++ if (!skb)
++ return NULL;
+
+ cl->deficit -= (int) len;
+
+@@ -983,6 +996,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
+ cl->deficit += agg->lmax;
+ list_move_tail(&cl->alist, &agg->active);
+ }
++
++ return skb;
+ }
+
+ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
+@@ -1128,11 +1143,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
+ if (!skb)
+ return NULL;
+
+- qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
++
++ skb = agg_dequeue(in_serv_agg, cl, len);
++
++ if (!skb) {
++ sch->q.qlen++;
++ return NULL;
++ }
++
++ qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_bstats_update(sch, skb);
+
+- agg_dequeue(in_serv_agg, cl, len);
+ /* If lmax is lowered, through qfq_change_class, for a class
+ * owning pending packets with larger size than the new value
+ * of lmax, then the following condition may hold.
+@@ -1422,10 +1444,8 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
+ if (err < 0)
+ return err;
+
+- if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
+- max_classes = QFQ_MAX_AGG_CLASSES;
+- else
+- max_classes = qdisc_dev(sch)->tx_queue_len + 1;
++ max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1,
++ QFQ_MAX_AGG_CLASSES);
+ /* max_cl_shift = floor(log_2(max_classes)) */
+ max_cl_shift = __fls(max_classes);
+ q->max_agg_classes = 1<<max_cl_shift;
+@@ -1461,8 +1481,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
+ qdisc_reset(cl->qdisc);
+ }
+ }
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void qfq_destroy_qdisc(struct Qdisc *sch)
+diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
+index 40adf1f07a82d..935d90874b1b7 100644
+--- a/net/sched/sch_red.c
++++ b/net/sched/sch_red.c
+@@ -72,6 +72,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ {
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *child = q->qdisc;
++ unsigned int len;
+ int ret;
+
+ q->vars.qavg = red_calc_qavg(&q->parms,
+@@ -126,9 +127,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ break;
+ }
+
++ len = qdisc_pkt_len(skb);
+ ret = qdisc_enqueue(skb, child, to_free);
+ if (likely(ret == NET_XMIT_SUCCESS)) {
+- qdisc_qstats_backlog_inc(sch, skb);
++ sch->qstats.backlog += len;
+ sch->q.qlen++;
+ } else if (net_xmit_drop_count(ret)) {
+ q->stats.pdrop++;
+@@ -176,8 +178,6 @@ static void red_reset(struct Qdisc *sch)
+ struct red_sched_data *q = qdisc_priv(sch);
+
+ qdisc_reset(q->qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ red_restart(&q->vars);
+ }
+
+diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
+index 3d061a13d7ed2..0490eb5b98dee 100644
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -135,15 +135,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
+ }
+ }
+
+-static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
++static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q)
+ {
+ u32 sfbhash;
+
+- sfbhash = sfb_hash(skb, 0);
++ sfbhash = cb->hashes[0];
+ if (sfbhash)
+ increment_one_qlen(sfbhash, 0, q);
+
+- sfbhash = sfb_hash(skb, 1);
++ sfbhash = cb->hashes[1];
+ if (sfbhash)
+ increment_one_qlen(sfbhash, 1, q);
+ }
+@@ -281,8 +281,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ {
+
+ struct sfb_sched_data *q = qdisc_priv(sch);
++ unsigned int len = qdisc_pkt_len(skb);
+ struct Qdisc *child = q->qdisc;
+ struct tcf_proto *fl;
++ struct sfb_skb_cb cb;
+ int i;
+ u32 p_min = ~0;
+ u32 minqlen = ~0;
+@@ -399,11 +401,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ }
+
+ enqueue:
++ memcpy(&cb, sfb_skb_cb(skb), sizeof(cb));
+ ret = qdisc_enqueue(skb, child, to_free);
+ if (likely(ret == NET_XMIT_SUCCESS)) {
+- qdisc_qstats_backlog_inc(sch, skb);
++ sch->qstats.backlog += len;
+ sch->q.qlen++;
+- increment_qlen(skb, q);
++ increment_qlen(&cb, q);
+ } else if (net_xmit_drop_count(ret)) {
+ q->stats.childdrop++;
+ qdisc_qstats_drop(sch);
+@@ -452,9 +455,8 @@ static void sfb_reset(struct Qdisc *sch)
+ {
+ struct sfb_sched_data *q = qdisc_priv(sch);
+
+- qdisc_reset(q->qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
++ if (likely(q->qdisc))
++ qdisc_reset(q->qdisc);
+ q->slot = 0;
+ q->double_buffering = false;
+ sfb_zero_all_buckets(q);
+diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
+index 7a5e4c4547156..df72fb83d9c7d 100644
+--- a/net/sched/sch_skbprio.c
++++ b/net/sched/sch_skbprio.c
+@@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch)
+ struct skbprio_sched_data *q = qdisc_priv(sch);
+ int prio;
+
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+-
+ for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+ __skb_queue_purge(&q->qdiscs[prio]);
+
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index b9fd18d986464..e40b4425eb6b5 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -66,6 +66,7 @@ struct taprio_sched {
+ u32 flags;
+ enum tk_offsets tk_offset;
+ int clockid;
++ bool offloaded;
+ atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
+ * speeds it's sub-nanoseconds per byte
+ */
+@@ -95,18 +96,22 @@ static ktime_t sched_base_time(const struct sched_gate_list *sched)
+ return ns_to_ktime(sched->base_time);
+ }
+
+-static ktime_t taprio_get_time(struct taprio_sched *q)
++static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
+ {
+- ktime_t mono = ktime_get();
++ /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
++ enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
+
+- switch (q->tk_offset) {
++ switch (tk_offset) {
+ case TK_OFFS_MAX:
+ return mono;
+ default:
+- return ktime_mono_to_any(mono, q->tk_offset);
++ return ktime_mono_to_any(mono, tk_offset);
+ }
++}
+
+- return KTIME_MAX;
++static ktime_t taprio_get_time(const struct taprio_sched *q)
++{
++ return taprio_mono_to_any(q, ktime_get());
+ }
+
+ static void taprio_free_sched_cb(struct rcu_head *head)
+@@ -319,7 +324,7 @@ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
+ return 0;
+ }
+
+- return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset);
++ return taprio_mono_to_any(q, skb->skb_mstamp_ns);
+ }
+
+ /* There are a few scenarios where we will have to modify the txtime from
+@@ -413,7 +418,8 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
+ {
+ struct taprio_sched *q = qdisc_priv(sch);
+
+- if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
++ /* sk_flags are only safe to use on full sockets. */
++ if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
+ if (!is_valid_interval(skb, sch))
+ return qdisc_drop(skb, sch, to_free);
+ } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+@@ -774,6 +780,11 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
+ [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
+ };
+
++static struct netlink_range_validation_signed taprio_cycle_time_range = {
++ .min = 0,
++ .max = INT_MAX,
++};
++
+ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+ [TCA_TAPRIO_ATTR_PRIOMAP] = {
+ .len = sizeof(struct tc_mqprio_qopt)
+@@ -782,7 +793,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+ [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
+ [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
+- [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
++ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
++ NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
+ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
+@@ -917,6 +929,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
+ return -EINVAL;
+ }
+
++ if (cycle < 0 || cycle > INT_MAX) {
++ NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
++ return -EINVAL;
++ }
++
+ new->cycle_time = cycle;
+ }
+
+@@ -1121,7 +1138,7 @@ static void setup_txtime(struct taprio_sched *q,
+ struct sched_gate_list *sched, ktime_t base)
+ {
+ struct sched_entry *entry;
+- u32 interval = 0;
++ u64 interval = 0;
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ entry->next_txtime = ktime_add_ns(base, interval);
+@@ -1273,6 +1290,8 @@ static int taprio_enable_offload(struct net_device *dev,
+ goto done;
+ }
+
++ q->offloaded = true;
++
+ done:
+ taprio_offload_free(offload);
+
+@@ -1287,12 +1306,9 @@ static int taprio_disable_offload(struct net_device *dev,
+ struct tc_taprio_qopt_offload *offload;
+ int err;
+
+- if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
++ if (!q->offloaded)
+ return 0;
+
+- if (!ops->ndo_setup_tc)
+- return -EOPNOTSUPP;
+-
+ offload = taprio_offload_alloc(0);
+ if (!offload) {
+ NL_SET_ERR_MSG(extack,
+@@ -1308,6 +1324,8 @@ static int taprio_disable_offload(struct net_device *dev,
+ goto out;
+ }
+
++ q->offloaded = false;
++
+ out:
+ taprio_offload_free(offload);
+
+@@ -1352,6 +1370,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
+ }
+ } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+ int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
++ enum tk_offsets tk_offset;
+
+ /* We only support static clockids and we don't allow
+ * for it to be modified after the first init.
+@@ -1366,22 +1385,24 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
+
+ switch (clockid) {
+ case CLOCK_REALTIME:
+- q->tk_offset = TK_OFFS_REAL;
++ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+- q->tk_offset = TK_OFFS_MAX;
++ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+- q->tk_offset = TK_OFFS_BOOT;
++ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+- q->tk_offset = TK_OFFS_TAI;
++ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ err = -EINVAL;
+ goto out;
+ }
++ /* This pairs with READ_ONCE() in taprio_mono_to_any */
++ WRITE_ONCE(q->tk_offset, tk_offset);
+
+ q->clockid = clockid;
+ } else {
+@@ -1622,13 +1643,12 @@ static void taprio_reset(struct Qdisc *sch)
+ int i;
+
+ hrtimer_cancel(&q->advance_timer);
++
+ if (q->qdiscs) {
+ for (i = 0; i < dev->num_tx_queues; i++)
+ if (q->qdiscs[i])
+ qdisc_reset(q->qdiscs[i]);
+ }
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ }
+
+ static void taprio_destroy(struct Qdisc *sch)
+@@ -1645,6 +1665,7 @@ static void taprio_destroy(struct Qdisc *sch)
+ * happens in qdisc_create(), after taprio_init() has been called.
+ */
+ hrtimer_cancel(&q->advance_timer);
++ qdisc_synchronize(sch);
+
+ taprio_disable_offload(dev, q, NULL);
+
+diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
+index 78e79029dc631..7461e5c67d50a 100644
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -316,8 +316,6 @@ static void tbf_reset(struct Qdisc *sch)
+ struct tbf_sched_data *q = qdisc_priv(sch);
+
+ qdisc_reset(q->qdisc);
+- sch->qstats.backlog = 0;
+- sch->q.qlen = 0;
+ q->t_c = ktime_get_ns();
+ q->tokens = q->buffer;
+ q->ptokens = q->mtu;
+@@ -342,6 +340,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
+ struct nlattr *tb[TCA_TBF_MAX + 1];
+ struct tc_tbf_qopt *qopt;
+ struct Qdisc *child = NULL;
++ struct Qdisc *old = NULL;
+ struct psched_ratecfg rate;
+ struct psched_ratecfg peak;
+ u64 max_size;
+@@ -433,7 +432,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
+ sch_tree_lock(sch);
+ if (child) {
+ qdisc_tree_flush_backlog(q->qdisc);
+- qdisc_put(q->qdisc);
++ old = q->qdisc;
+ q->qdisc = child;
+ }
+ q->limit = qopt->limit;
+@@ -453,6 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
+ memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
+
+ sch_tree_unlock(sch);
++ qdisc_put(old);
+ err = 0;
+
+ tbf_offload_change(sch);
+diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
+index 6af6b95bdb672..79aaab51cbf5c 100644
+--- a/net/sched/sch_teql.c
++++ b/net/sched/sch_teql.c
+@@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch)
+ struct teql_sched_data *dat = qdisc_priv(sch);
+
+ skb_queue_purge(&dat->q);
+- sch->q.qlen = 0;
+ }
+
+ static void
+diff --git a/net/sctp/associola.c b/net/sctp/associola.c
+index be29da09cc7ab..3460abceba443 100644
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init(
+ if (!sctp_ulpq_init(&asoc->ulpq, asoc))
+ goto fail_init;
+
+- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+- 0, gfp))
+- goto fail_init;
++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
++ goto stream_free;
+
+ /* Initialize default path MTU. */
+ asoc->pathmtu = sp->pathmtu;
+diff --git a/net/sctp/auth.c b/net/sctp/auth.c
+index db6b7373d16c3..34964145514e6 100644
+--- a/net/sctp/auth.c
++++ b/net/sctp/auth.c
+@@ -863,12 +863,17 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
+ }
+
+ list_del_init(&shkey->key_list);
+- sctp_auth_shkey_release(shkey);
+ list_add(&cur_key->key_list, sh_keys);
+
+- if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
+- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
++ if (asoc && asoc->active_key_id == auth_key->sca_keynumber &&
++ sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
++ list_del_init(&cur_key->key_list);
++ sctp_auth_shkey_release(cur_key);
++ list_add(&shkey->key_list, sh_keys);
++ return -ENOMEM;
++ }
+
++ sctp_auth_shkey_release(shkey);
+ return 0;
+ }
+
+@@ -902,8 +907,13 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
+ return -EINVAL;
+
+ if (asoc) {
++ __u16 active_key_id = asoc->active_key_id;
++
+ asoc->active_key_id = key_id;
+- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
++ if (sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
++ asoc->active_key_id = active_key_id;
++ return -ENOMEM;
++ }
+ } else
+ ep->active_key_id = key_id;
+
+diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
+index 59e653b528b1f..6b95d3ba8fe1c 100644
+--- a/net/sctp/bind_addr.c
++++ b/net/sctp/bind_addr.c
+@@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
+ }
+ }
+
++ /* If somehow no addresses were found that can be used with this
++ * scope, it's an error.
++ */
++ if (list_empty(&dest->address_list))
++ error = -ENETUNREACH;
++
+ out:
+ if (error)
+ sctp_bind_addr_clean(dest);
+diff --git a/net/sctp/diag.c b/net/sctp/diag.c
+index 760b367644c12..b0ce1080842d4 100644
+--- a/net/sctp/diag.c
++++ b/net/sctp/diag.c
+@@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
+ r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+ r->idiag_retrans = asoc->rtx_data_chunks;
+ r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+- } else {
+- r->idiag_timer = 0;
+- r->idiag_retrans = 0;
+- r->idiag_expires = 0;
+ }
+ }
+
+@@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
+ r = nlmsg_data(nlh);
+ BUG_ON(!sk_fullsock(sk));
+
++ r->idiag_timer = 0;
++ r->idiag_retrans = 0;
++ r->idiag_expires = 0;
+ if (asoc) {
+ inet_diag_msg_sctpasoc_fill(r, sk, asoc);
+ } else {
+ inet_diag_msg_common_fill(r, sk);
+ r->idiag_state = sk->sk_state;
+- r->idiag_timer = 0;
+- r->idiag_retrans = 0;
+ }
+
+ if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
+@@ -245,54 +242,49 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
+ + 64;
+ }
+
+-static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
++static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
+ {
+ struct sctp_association *assoc = tsp->asoc;
+- struct sock *sk = tsp->asoc->base.sk;
+ struct sctp_comm_param *commp = p;
+- struct sk_buff *in_skb = commp->skb;
++ struct sock *sk = ep->base.sk;
+ const struct inet_diag_req_v2 *req = commp->r;
+- const struct nlmsghdr *nlh = commp->nlh;
+- struct net *net = sock_net(in_skb->sk);
++ struct sk_buff *skb = commp->skb;
+ struct sk_buff *rep;
+ int err;
+
+ err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
+ if (err)
+- goto out;
++ return err;
+
+- err = -ENOMEM;
+ rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL);
+ if (!rep)
+- goto out;
++ return -ENOMEM;
+
+ lock_sock(sk);
+- if (sk != assoc->base.sk) {
+- release_sock(sk);
+- sk = assoc->base.sk;
+- lock_sock(sk);
++ if (ep != assoc->ep) {
++ err = -EAGAIN;
++ goto out;
+ }
+- err = inet_sctp_diag_fill(sk, assoc, rep, req,
+- sk_user_ns(NETLINK_CB(in_skb).sk),
+- NETLINK_CB(in_skb).portid,
+- nlh->nlmsg_seq, 0, nlh,
+- commp->net_admin);
+- release_sock(sk);
++
++ err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(skb).sk),
++ NETLINK_CB(skb).portid, commp->nlh->nlmsg_seq, 0,
++ commp->nlh, commp->net_admin);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+- kfree_skb(rep);
+ goto out;
+ }
++ release_sock(sk);
+
+- err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
++ return nlmsg_unicast(sock_net(skb->sk)->diag_nlsk, rep, NETLINK_CB(skb).portid);
+
+ out:
++ release_sock(sk);
++ kfree_skb(rep);
+ return err;
+ }
+
+-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
++static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
+ {
+- struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ struct sk_buff *skb = commp->skb;
+@@ -302,6 +294,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+ int err = 0;
+
+ lock_sock(sk);
++ if (ep != tsp->asoc->ep)
++ goto release;
+ list_for_each_entry(assoc, &ep->asocs, asocs) {
+ if (cb->args[4] < cb->args[1])
+ goto next;
+@@ -344,17 +338,14 @@ release:
+ return err;
+ }
+
+-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
++static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
+ {
+- struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ const struct inet_diag_req_v2 *r = commp->r;
+- struct sctp_association *assoc =
+- list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+ /* find the ep only once through the transports by this condition */
+- if (tsp->asoc != assoc)
++ if (!list_is_first(&tsp->asoc->asocs, &ep->asocs))
+ return 0;
+
+ if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+@@ -429,15 +420,15 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ static int sctp_diag_dump_one(struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *req)
+ {
+- struct sk_buff *in_skb = cb->skb;
+- struct net *net = sock_net(in_skb->sk);
++ struct sk_buff *skb = cb->skb;
++ struct net *net = sock_net(skb->sk);
+ const struct nlmsghdr *nlh = cb->nlh;
+ union sctp_addr laddr, paddr;
+ struct sctp_comm_param commp = {
+- .skb = in_skb,
++ .skb = skb,
+ .r = req,
+ .nlh = nlh,
+- .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
++ .net_admin = netlink_net_capable(skb, CAP_NET_ADMIN),
+ };
+
+ if (req->sdiag_family == AF_INET) {
+@@ -460,7 +451,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb,
+ paddr.v6.sin6_family = AF_INET6;
+ }
+
+- return sctp_transport_lookup_process(sctp_tsp_dump_one,
++ return sctp_transport_lookup_process(sctp_sock_dump_one,
+ net, &laddr, &paddr, &commp);
+ }
+
+@@ -505,8 +496,8 @@ skip:
+ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
+ goto done;
+
+- sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
+- net, &pos, &commp);
++ sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump,
++ net, &pos, &commp);
+ cb->args[2] = pos;
+
+ done:
+diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
+index 48c9c2c7602f7..efffde7f2328e 100644
+--- a/net/sctp/endpointola.c
++++ b/net/sctp/endpointola.c
+@@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
+ }
+
+ /* Final destructor for endpoint. */
++static void sctp_endpoint_destroy_rcu(struct rcu_head *head)
++{
++ struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu);
++ struct sock *sk = ep->base.sk;
++
++ sctp_sk(sk)->ep = NULL;
++ sock_put(sk);
++
++ kfree(ep);
++ SCTP_DBG_OBJCNT_DEC(ep);
++}
++
+ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
+ {
+ struct sock *sk;
+@@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
+ if (sctp_sk(sk)->bind_hash)
+ sctp_put_port(sk);
+
+- sctp_sk(sk)->ep = NULL;
+- /* Give up our hold on the sock */
+- sock_put(sk);
+-
+- kfree(ep);
+- SCTP_DBG_OBJCNT_DEC(ep);
++ call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu);
+ }
+
+ /* Hold a reference to an endpoint. */
+-void sctp_endpoint_hold(struct sctp_endpoint *ep)
++int sctp_endpoint_hold(struct sctp_endpoint *ep)
+ {
+- refcount_inc(&ep->base.refcnt);
++ return refcount_inc_not_zero(&ep->base.refcnt);
+ }
+
+ /* Release a reference to an endpoint and clean up if there are
+diff --git a/net/sctp/input.c b/net/sctp/input.c
+index 1f1786021d9c8..d16b3885dcccb 100644
+--- a/net/sctp/input.c
++++ b/net/sctp/input.c
+@@ -92,6 +92,7 @@ int sctp_rcv(struct sk_buff *skb)
+ struct sctp_chunk *chunk;
+ union sctp_addr src;
+ union sctp_addr dest;
++ int bound_dev_if;
+ int family;
+ struct sctp_af *af;
+ struct net *net = dev_net(skb->dev);
+@@ -169,7 +170,8 @@ int sctp_rcv(struct sk_buff *skb)
+ * If a frame arrives on an interface and the receiving socket is
+ * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
+ */
+- if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
++ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
++ if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) {
+ if (transport) {
+ sctp_transport_put(transport);
+ asoc = NULL;
+diff --git a/net/sctp/output.c b/net/sctp/output.c
+index 4dfb5ea82b05b..cdfdbd353c678 100644
+--- a/net/sctp/output.c
++++ b/net/sctp/output.c
+@@ -581,13 +581,16 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
+ chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+ sk = chunk->skb->sk;
+
+- /* check gso */
+ if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) {
+- if (!sk_can_gso(sk)) {
+- pr_err_once("Trying to GSO but underlying device doesn't support it.");
+- goto out;
++ if (tp->pl.state == SCTP_PL_ERROR) { /* do IP fragmentation if in Error state */
++ packet->ipfragok = 1;
++ } else {
++ if (!sk_can_gso(sk)) { /* check gso */
++ pr_err_once("Trying to GSO but underlying device doesn't support it.");
++ goto out;
++ }
++ gso = 1;
+ }
+- gso = 1;
+ }
+
+ /* alloc head skb */
+diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
+index ff47091c385e7..dc29ac0f8d3f8 100644
+--- a/net/sctp/outqueue.c
++++ b/net/sctp/outqueue.c
+@@ -384,6 +384,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+ {
+ struct sctp_outq *q = &asoc->outqueue;
+ struct sctp_chunk *chk, *temp;
++ struct sctp_stream_out *sout;
+
+ q->sched->unsched_all(&asoc->stream);
+
+@@ -398,12 +399,14 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
+ sctp_sched_dequeue_common(q, chk);
+ asoc->sent_cnt_removable--;
+ asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+- if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
+- struct sctp_stream_out *streamout =
+- SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
+
+- streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+- }
++ sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
++ sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
++
++ /* clear out_curr if all frag chunks are pruned */
++ if (asoc->stream.out_curr == sout &&
++ list_is_last(&chk->frag_list, &chk->msg->chunks))
++ asoc->stream.out_curr = NULL;
+
+ msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
+ sctp_chunk_free(chk);
+@@ -911,6 +914,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
+ ctx->asoc->base.sk->sk_err = -error;
+ return;
+ }
++ ctx->asoc->stats.octrlchunks++;
+ break;
+
+ case SCTP_CID_ABORT:
+@@ -935,7 +939,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
+
+ case SCTP_CID_HEARTBEAT:
+ if (chunk->pmtu_probe) {
+- sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
++ error = sctp_packet_singleton(ctx->transport,
++ chunk, ctx->gfp);
++ if (!error)
++ ctx->asoc->stats.octrlchunks++;
+ break;
+ }
+ fallthrough;
+diff --git a/net/sctp/proc.c b/net/sctp/proc.c
+index 982a87b3e11f8..963b94517ec20 100644
+--- a/net/sctp/proc.c
++++ b/net/sctp/proc.c
+@@ -284,7 +284,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
+ assoc->init_retries, assoc->shutdown_retries,
+ assoc->rtx_data_chunks,
+ refcount_read(&sk->sk_wmem_alloc),
+- sk->sk_wmem_queued,
++ READ_ONCE(sk->sk_wmem_queued),
+ sk->sk_sndbuf,
+ sk->sk_rcvbuf);
+ seq_printf(seq, "\n");
+diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
+index ec0f52567c16f..9987decdead21 100644
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -359,7 +359,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
+ if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
+ ret != RTN_LOCAL &&
+ !sp->inet.freebind &&
+- !net->ipv4.sysctl_ip_nonlocal_bind)
++ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
+ return 0;
+
+ if (ipv6_only_sock(sctp_opt2sk(sp)))
+diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
+index b3815b568e8e5..970c6a486a9b0 100644
+--- a/net/sctp/sm_sideeffect.c
++++ b/net/sctp/sm_sideeffect.c
+@@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t)
+ goto out_unlock;
+ }
+
++ /* This happens when the response arrives after the timer is triggered. */
++ if (!asoc->strreset_chunk)
++ goto out_unlock;
++
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
+ SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
+ asoc->state, asoc->ep, asoc,
+@@ -1247,7 +1251,10 @@ static int sctp_side_effects(enum sctp_event_type event_type,
+ default:
+ pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
+ status, state, event_type, subtype.chunk);
+- BUG();
++ error = status;
++ if (error >= 0)
++ error = -EINVAL;
++ WARN_ON_ONCE(1);
+ break;
+ }
+
+diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
+index fb3da4d8f4a34..13acb84b00c2b 100644
+--- a/net/sctp/sm_statefuns.c
++++ b/net/sctp/sm_statefuns.c
+@@ -4478,7 +4478,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
+ SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
+
+ if (!ev)
+- return -ENOMEM;
++ return SCTP_DISPOSITION_NOMEM;
+
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(ev));
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index 6b937bfd47515..36a37fef27719 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -68,7 +68,7 @@
+ #include <net/sctp/stream_sched.h>
+
+ /* Forward declarations for internal helper functions. */
+-static bool sctp_writeable(struct sock *sk);
++static bool sctp_writeable(const struct sock *sk);
+ static void sctp_wfree(struct sk_buff *skb);
+ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+ size_t msg_len);
+@@ -97,7 +97,7 @@ struct percpu_counter sctp_sockets_allocated;
+
+ static void sctp_enter_memory_pressure(struct sock *sk)
+ {
+- sctp_memory_pressure = 1;
++ WRITE_ONCE(sctp_memory_pressure, 1);
+ }
+
+
+@@ -138,7 +138,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
+
+ refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+ asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
+- sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
++ sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
+ sk_mem_charge(sk, chunk->skb->truesize);
+ }
+
+@@ -362,9 +362,9 @@ static void sctp_auto_asconf_init(struct sctp_sock *sp)
+ struct net *net = sock_net(&sp->inet.sk);
+
+ if (net->sctp.default_auto_asconf) {
+- spin_lock(&net->sctp.addr_wq_lock);
++ spin_lock_bh(&net->sctp.addr_wq_lock);
+ list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist);
+- spin_unlock(&net->sctp.addr_wq_lock);
++ spin_unlock_bh(&net->sctp.addr_wq_lock);
+ sp->do_auto_asconf = 1;
+ }
+ }
+@@ -1831,6 +1831,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+ if (err)
+ goto err;
++ if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) {
++ err = -EINVAL;
++ goto err;
++ }
+ }
+
+ if (sctp_state(asoc, CLOSED)) {
+@@ -5106,13 +5110,17 @@ static void sctp_destroy_sock(struct sock *sk)
+ }
+
+ /* Triggered when there are no references on the socket anymore */
+-static void sctp_destruct_sock(struct sock *sk)
++static void sctp_destruct_common(struct sock *sk)
+ {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ /* Free up the HMAC transform. */
+ crypto_free_shash(sp->hmac);
++}
+
++static void sctp_destruct_sock(struct sock *sk)
++{
++ sctp_destruct_common(sk);
+ inet_sock_destruct(sk);
+ }
+
+@@ -5317,32 +5325,41 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *),
+ }
+ EXPORT_SYMBOL_GPL(sctp_for_each_endpoint);
+
+-int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
+- struct net *net,
++int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net,
+ const union sctp_addr *laddr,
+ const union sctp_addr *paddr, void *p)
+ {
+ struct sctp_transport *transport;
+- int err;
++ struct sctp_endpoint *ep;
++ int err = -ENOENT;
+
+ rcu_read_lock();
+ transport = sctp_addrs_lookup_transport(net, laddr, paddr);
++ if (!transport) {
++ rcu_read_unlock();
++ return err;
++ }
++ ep = transport->asoc->ep;
++ if (!sctp_endpoint_hold(ep)) { /* asoc can be peeled off */
++ sctp_transport_put(transport);
++ rcu_read_unlock();
++ return err;
++ }
+ rcu_read_unlock();
+- if (!transport)
+- return -ENOENT;
+
+- err = cb(transport, p);
++ err = cb(ep, transport, p);
++ sctp_endpoint_put(ep);
+ sctp_transport_put(transport);
+-
+ return err;
+ }
+ EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
+
+-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
+- int (*cb_done)(struct sctp_transport *, void *),
+- struct net *net, int *pos, void *p) {
++int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
++ struct net *net, int *pos, void *p)
++{
+ struct rhashtable_iter hti;
+ struct sctp_transport *tsp;
++ struct sctp_endpoint *ep;
+ int ret;
+
+ again:
+@@ -5351,26 +5368,32 @@ again:
+
+ tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
+ for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
+- ret = cb(tsp, p);
+- if (ret)
+- break;
++ ep = tsp->asoc->ep;
++ if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */
++ ret = cb(ep, tsp, p);
++ if (ret)
++ break;
++ sctp_endpoint_put(ep);
++ }
+ (*pos)++;
+ sctp_transport_put(tsp);
+ }
+ sctp_transport_walk_stop(&hti);
+
+ if (ret) {
+- if (cb_done && !cb_done(tsp, p)) {
++ if (cb_done && !cb_done(ep, tsp, p)) {
+ (*pos)++;
++ sctp_endpoint_put(ep);
+ sctp_transport_put(tsp);
+ goto again;
+ }
++ sctp_endpoint_put(ep);
+ sctp_transport_put(tsp);
+ }
+
+ return ret;
+ }
+-EXPORT_SYMBOL_GPL(sctp_for_each_transport);
++EXPORT_SYMBOL_GPL(sctp_transport_traverse_process);
+
+ /* 7.2.1 Association Status (SCTP_STATUS)
+
+@@ -5626,7 +5649,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
+ * Set the daddr and initialize id to something more random and also
+ * copy over any ip options.
+ */
+- sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
++ sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk);
+ sp->pf->copy_ip_options(sk, sock->sk);
+
+ /* Populate the fields of the newsk from the oldsk and migrate the
+@@ -8264,6 +8287,22 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
+ return retval;
+ }
+
++static bool sctp_bpf_bypass_getsockopt(int level, int optname)
++{
++ if (level == SOL_SCTP) {
++ switch (optname) {
++ case SCTP_SOCKOPT_PEELOFF:
++ case SCTP_SOCKOPT_PEELOFF_FLAGS:
++ case SCTP_SOCKOPT_CONNECTX3:
++ return true;
++ default:
++ return false;
++ }
++ }
++
++ return false;
++}
++
+ static int sctp_hash(struct sock *sk)
+ {
+ /* STUB */
+@@ -9109,7 +9148,7 @@ static void sctp_wfree(struct sk_buff *skb)
+ struct sock *sk = asoc->base.sk;
+
+ sk_mem_uncharge(sk, skb->truesize);
+- sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
++ sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
+ asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
+ WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
+ &sk->sk_wmem_alloc));
+@@ -9264,9 +9303,9 @@ void sctp_write_space(struct sock *sk)
+ * UDP-style sockets or TCP-style sockets, this code should work.
+ * - Daisy
+ */
+-static bool sctp_writeable(struct sock *sk)
++static bool sctp_writeable(const struct sock *sk)
+ {
+- return sk->sk_sndbuf > sk->sk_wmem_queued;
++ return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
+ }
+
+ /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
+@@ -9424,7 +9463,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
+ sctp_sk(newsk)->reuse = sp->reuse;
+
+ newsk->sk_shutdown = sk->sk_shutdown;
+- newsk->sk_destruct = sctp_destruct_sock;
++ newsk->sk_destruct = sk->sk_destruct;
+ newsk->sk_family = sk->sk_family;
+ newsk->sk_protocol = IPPROTO_SCTP;
+ newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+@@ -9632,6 +9671,7 @@ struct proto sctp_prot = {
+ .shutdown = sctp_shutdown,
+ .setsockopt = sctp_setsockopt,
+ .getsockopt = sctp_getsockopt,
++ .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt,
+ .sendmsg = sctp_sendmsg,
+ .recvmsg = sctp_recvmsg,
+ .bind = sctp_bind,
+@@ -9656,11 +9696,20 @@ struct proto sctp_prot = {
+
+ #if IS_ENABLED(CONFIG_IPV6)
+
+-#include <net/transp_v6.h>
+-static void sctp_v6_destroy_sock(struct sock *sk)
++static void sctp_v6_destruct_sock(struct sock *sk)
++{
++ sctp_destruct_common(sk);
++ inet6_sock_destruct(sk);
++}
++
++static int sctp_v6_init_sock(struct sock *sk)
+ {
+- sctp_destroy_sock(sk);
+- inet6_destroy_sock(sk);
++ int ret = sctp_init_sock(sk);
++
++ if (!ret)
++ sk->sk_destruct = sctp_v6_destruct_sock;
++
++ return ret;
+ }
+
+ struct proto sctpv6_prot = {
+@@ -9670,11 +9719,12 @@ struct proto sctpv6_prot = {
+ .disconnect = sctp_disconnect,
+ .accept = sctp_accept,
+ .ioctl = sctp_ioctl,
+- .init = sctp_init_sock,
+- .destroy = sctp_v6_destroy_sock,
++ .init = sctp_v6_init_sock,
++ .destroy = sctp_destroy_sock,
+ .shutdown = sctp_shutdown,
+ .setsockopt = sctp_setsockopt,
+ .getsockopt = sctp_getsockopt,
++ .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt,
+ .sendmsg = sctp_sendmsg,
+ .recvmsg = sctp_recvmsg,
+ .bind = sctp_bind,
+diff --git a/net/sctp/stream.c b/net/sctp/stream.c
+index 6dc95dcc0ff4f..ee6514af830f7 100644
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt)
+ }
+ }
+
++static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid)
++{
++ struct sctp_sched_ops *sched;
++
++ if (!SCTP_SO(stream, sid)->ext)
++ return;
++
++ sched = sctp_sched_ops_from_stream(stream);
++ sched->free_sid(stream, sid);
++ kfree(SCTP_SO(stream, sid)->ext);
++ SCTP_SO(stream, sid)->ext = NULL;
++}
++
+ /* Migrates chunks from stream queues to new stream queues if needed,
+ * but not across associations. Also, removes those chunks to streams
+ * higher than the new max.
+@@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
+ * sctp_stream_update will swap ->out pointers.
+ */
+ for (i = 0; i < outcnt; i++) {
+- kfree(SCTP_SO(new, i)->ext);
++ sctp_stream_free_ext(new, i);
+ SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext;
+ SCTP_SO(stream, i)->ext = NULL;
+ }
+ }
+
+- for (i = outcnt; i < stream->outcnt; i++) {
+- kfree(SCTP_SO(stream, i)->ext);
+- SCTP_SO(stream, i)->ext = NULL;
+- }
++ for (i = outcnt; i < stream->outcnt; i++)
++ sctp_stream_free_ext(stream, i);
+ }
+
+ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
+@@ -137,7 +148,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ for (i = 0; i < stream->outcnt; i++)
+ SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+@@ -145,22 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+ handle_in:
+ sctp_stream_interleave_init(stream);
+ if (!incnt)
+- goto out;
++ return 0;
+
+- ret = sctp_stream_alloc_in(stream, incnt, gfp);
+- if (ret)
+- goto in_err;
+-
+- goto out;
+-
+-in_err:
+- sched->free(stream);
+- genradix_free(&stream->in);
+-out_err:
+- genradix_free(&stream->out);
+- stream->outcnt = 0;
+-out:
+- return ret;
++ return sctp_stream_alloc_in(stream, incnt, gfp);
+ }
+
+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+@@ -187,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream)
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ int i;
+
+- sched->free(stream);
++ sched->unsched_all(stream);
+ for (i = 0; i < stream->outcnt; i++)
+- kfree(SCTP_SO(stream, i)->ext);
++ sctp_stream_free_ext(stream, i);
+ genradix_free(&stream->out);
+ genradix_free(&stream->in);
+ }
+diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
+index 6b13f737ebf2e..e3aad75cb11d9 100644
+--- a/net/sctp/stream_interleave.c
++++ b/net/sctp/stream_interleave.c
+@@ -1162,7 +1162,8 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn)
+
+ #define _sctp_walk_ifwdtsn(pos, chunk, end) \
+ for (pos = chunk->subh.ifwdtsn_hdr->skip; \
+- (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++)
++ (void *)pos <= (void *)chunk->subh.ifwdtsn_hdr->skip + (end) - \
++ sizeof(struct sctp_ifwdtsn_skip); pos++)
+
+ #define sctp_walk_ifwdtsn(pos, ch) \
+ _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \
+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
+index 99e5f69fbb742..33c2630c2496b 100644
+--- a/net/sctp/stream_sched.c
++++ b/net/sctp/stream_sched.c
+@@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
+ return 0;
+ }
+
++static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid)
++{
++}
++
+ static void sctp_sched_fcfs_free(struct sctp_stream *stream)
+ {
+ }
+@@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
+ .get = sctp_sched_fcfs_get,
+ .init = sctp_sched_fcfs_init,
+ .init_sid = sctp_sched_fcfs_init_sid,
++ .free_sid = sctp_sched_fcfs_free_sid,
+ .free = sctp_sched_fcfs_free,
+ .enqueue = sctp_sched_fcfs_enqueue,
+ .dequeue = sctp_sched_fcfs_dequeue,
+@@ -163,7 +168,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
+ if (!SCTP_SO(&asoc->stream, i)->ext)
+ continue;
+
+- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
+ if (ret)
+ goto err;
+ }
+diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
+index 80b5a2c4cbc7b..7dd9f8b387cca 100644
+--- a/net/sctp/stream_sched_prio.c
++++ b/net/sctp/stream_sched_prio.c
+@@ -25,6 +25,18 @@
+
+ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
+
++static struct sctp_stream_priorities *sctp_sched_prio_head_get(struct sctp_stream_priorities *p)
++{
++ p->users++;
++ return p;
++}
++
++static void sctp_sched_prio_head_put(struct sctp_stream_priorities *p)
++{
++ if (p && --p->users == 0)
++ kfree(p);
++}
++
+ static struct sctp_stream_priorities *sctp_sched_prio_new_head(
+ struct sctp_stream *stream, int prio, gfp_t gfp)
+ {
+@@ -38,6 +50,7 @@ static struct sctp_stream_priorities *sctp_sched_prio_new_head(
+ INIT_LIST_HEAD(&p->active);
+ p->next = NULL;
+ p->prio = prio;
++ p->users = 1;
+
+ return p;
+ }
+@@ -53,7 +66,7 @@ static struct sctp_stream_priorities *sctp_sched_prio_get_head(
+ */
+ list_for_each_entry(p, &stream->prio_list, prio_sched) {
+ if (p->prio == prio)
+- return p;
++ return sctp_sched_prio_head_get(p);
+ if (p->prio > prio)
+ break;
+ }
+@@ -70,7 +83,7 @@ static struct sctp_stream_priorities *sctp_sched_prio_get_head(
+ */
+ break;
+ if (p->prio == prio)
+- return p;
++ return sctp_sched_prio_head_get(p);
+ }
+
+ /* If not even there, allocate a new one. */
+@@ -154,32 +167,21 @@ static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
+ struct sctp_stream_out_ext *soute = sout->ext;
+ struct sctp_stream_priorities *prio_head, *old;
+ bool reschedule = false;
+- int i;
++
++ old = soute->prio_head;
++ if (old && old->prio == prio)
++ return 0;
+
+ prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
+ if (!prio_head)
+ return -ENOMEM;
+
+ reschedule = sctp_sched_prio_unsched(soute);
+- old = soute->prio_head;
+ soute->prio_head = prio_head;
+ if (reschedule)
+ sctp_sched_prio_sched(stream, soute);
+
+- if (!old)
+- /* Happens when we set the priority for the first time */
+- return 0;
+-
+- for (i = 0; i < stream->outcnt; i++) {
+- soute = SCTP_SO(stream, i)->ext;
+- if (soute && soute->prio_head == old)
+- /* It's still in use, nothing else to do here. */
+- return 0;
+- }
+-
+- /* No hits, we are good to free it. */
+- kfree(old);
+-
++ sctp_sched_prio_head_put(old);
+ return 0;
+ }
+
+@@ -204,6 +206,12 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
+ return sctp_sched_prio_set(stream, sid, 0, gfp);
+ }
+
++static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid)
++{
++ sctp_sched_prio_head_put(SCTP_SO(stream, sid)->ext->prio_head);
++ SCTP_SO(stream, sid)->ext->prio_head = NULL;
++}
++
+ static void sctp_sched_prio_free(struct sctp_stream *stream)
+ {
+ struct sctp_stream_priorities *prio, *n;
+@@ -323,6 +331,7 @@ static struct sctp_sched_ops sctp_sched_prio = {
+ .get = sctp_sched_prio_get,
+ .init = sctp_sched_prio_init,
+ .init_sid = sctp_sched_prio_init_sid,
++ .free_sid = sctp_sched_prio_free_sid,
+ .free = sctp_sched_prio_free,
+ .enqueue = sctp_sched_prio_enqueue,
+ .dequeue = sctp_sched_prio_dequeue,
+diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
+index ff425aed62c7f..cc444fe0d67c2 100644
+--- a/net/sctp/stream_sched_rr.c
++++ b/net/sctp/stream_sched_rr.c
+@@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
+ return 0;
+ }
+
++static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid)
++{
++}
++
+ static void sctp_sched_rr_free(struct sctp_stream *stream)
+ {
+ sctp_sched_rr_unsched_all(stream);
+@@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = {
+ .get = sctp_sched_rr_get,
+ .init = sctp_sched_rr_init,
+ .init_sid = sctp_sched_rr_init_sid,
++ .free_sid = sctp_sched_rr_free_sid,
+ .free = sctp_sched_rr_free,
+ .enqueue = sctp_sched_rr_enqueue,
+ .dequeue = sctp_sched_rr_dequeue,
+diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
+index b46a416787ec3..43ebf090029d7 100644
+--- a/net/sctp/sysctl.c
++++ b/net/sctp/sysctl.c
+@@ -84,17 +84,18 @@ static struct ctl_table sctp_table[] = {
+ { /* sentinel */ }
+ };
+
++/* The following index defines are used in sctp_sysctl_net_register().
++ * If you add new items to the sctp_net_table, please ensure that
++ * the index values of these defines hold the same meaning indicated by
++ * their macro names when they appear in sctp_net_table.
++ */
++#define SCTP_RTO_MIN_IDX 0
++#define SCTP_RTO_MAX_IDX 1
++#define SCTP_PF_RETRANS_IDX 2
++#define SCTP_PS_RETRANS_IDX 3
++
+ static struct ctl_table sctp_net_table[] = {
+- {
+- .procname = "rto_initial",
+- .data = &init_net.sctp.rto_initial,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = SYSCTL_ONE,
+- .extra2 = &timer_max
+- },
+- {
++ [SCTP_RTO_MIN_IDX] = {
+ .procname = "rto_min",
+ .data = &init_net.sctp.rto_min,
+ .maxlen = sizeof(unsigned int),
+@@ -103,7 +104,7 @@ static struct ctl_table sctp_net_table[] = {
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &init_net.sctp.rto_max
+ },
+- {
++ [SCTP_RTO_MAX_IDX] = {
+ .procname = "rto_max",
+ .data = &init_net.sctp.rto_max,
+ .maxlen = sizeof(unsigned int),
+@@ -112,6 +113,33 @@ static struct ctl_table sctp_net_table[] = {
+ .extra1 = &init_net.sctp.rto_min,
+ .extra2 = &timer_max
+ },
++ [SCTP_PF_RETRANS_IDX] = {
++ .procname = "pf_retrans",
++ .data = &init_net.sctp.pf_retrans,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = SYSCTL_ZERO,
++ .extra2 = &init_net.sctp.ps_retrans,
++ },
++ [SCTP_PS_RETRANS_IDX] = {
++ .procname = "ps_retrans",
++ .data = &init_net.sctp.ps_retrans,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = &init_net.sctp.pf_retrans,
++ .extra2 = &ps_retrans_max,
++ },
++ {
++ .procname = "rto_initial",
++ .data = &init_net.sctp.rto_initial,
++ .maxlen = sizeof(unsigned int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec_minmax,
++ .extra1 = SYSCTL_ONE,
++ .extra2 = &timer_max
++ },
+ {
+ .procname = "rto_alpha_exp_divisor",
+ .data = &init_net.sctp.rto_alpha,
+@@ -207,24 +235,6 @@ static struct ctl_table sctp_net_table[] = {
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
+ },
+- {
+- .procname = "pf_retrans",
+- .data = &init_net.sctp.pf_retrans,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = SYSCTL_ZERO,
+- .extra2 = &init_net.sctp.ps_retrans,
+- },
+- {
+- .procname = "ps_retrans",
+- .data = &init_net.sctp.ps_retrans,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec_minmax,
+- .extra1 = &init_net.sctp.pf_retrans,
+- .extra2 = &ps_retrans_max,
+- },
+ {
+ .procname = "sndbuf_policy",
+ .data = &init_net.sctp.sndbuf_policy,
+@@ -586,6 +596,11 @@ int sctp_sysctl_net_register(struct net *net)
+ for (i = 0; table[i].data; i++)
+ table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+
++ table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max;
++ table[SCTP_RTO_MAX_IDX].extra1 = &net->sctp.rto_min;
++ table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans;
++ table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans;
++
+ net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
+ if (net->sctp.sysctl_header == NULL) {
+ kfree(table);
+diff --git a/net/sctp/transport.c b/net/sctp/transport.c
+index a3d3ca6dd63dd..d1add537beaa2 100644
+--- a/net/sctp/transport.c
++++ b/net/sctp/transport.c
+@@ -269,7 +269,7 @@ bool sctp_transport_pl_send(struct sctp_transport *t)
+ if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+- t->pl.pmtu = SCTP_MIN_PLPMTU;
++ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+@@ -331,9 +331,12 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
+ t->pl.probe_size += SCTP_PL_BIG_STEP;
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (!t->pl.probe_high) {
+- t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+- SCTP_MAX_PLPMTU);
+- return false;
++ if (t->pl.probe_size < SCTP_MAX_PLPMTU) {
++ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
++ SCTP_MAX_PLPMTU);
++ return false;
++ }
++ t->pl.probe_high = SCTP_MAX_PLPMTU;
+ }
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ if (t->pl.probe_size >= t->pl.probe_high) {
+@@ -348,7 +351,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
+ } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) {
+ /* Raise probe_size again after 30 * interval in Search Complete */
+ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+- t->pl.probe_size += SCTP_PL_MIN_STEP;
++ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU);
+ }
+
+ return t->pl.state == SCTP_PL_COMPLETE;
+@@ -366,8 +369,9 @@ static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
+ if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) {
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+- t->pl.pmtu = SCTP_MIN_PLPMTU;
++ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
++ return true;
+ }
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+@@ -378,11 +382,10 @@ static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
++ return true;
+ } else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) {
+ t->pl.probe_size = pmtu;
+ t->pl.probe_count = 0;
+-
+- return false;
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+@@ -393,10 +396,11 @@ static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
++ return true;
+ }
+ }
+
+- return true;
++ return false;
+ }
+
+ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
+index 78b663dbfa1f9..9cdb7df0801f3 100644
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -148,14 +148,18 @@ static int __smc_release(struct smc_sock *smc)
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_shutdown |= SHUTDOWN_MASK;
+ } else {
+- if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+- sock_put(sk); /* passive closing */
+- if (sk->sk_state == SMC_LISTEN) {
+- /* wake up clcsock accept */
+- rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
++ if (sk->sk_state != SMC_CLOSED) {
++ if (sk->sk_state != SMC_LISTEN &&
++ sk->sk_state != SMC_INIT)
++ sock_put(sk); /* passive closing */
++ if (sk->sk_state == SMC_LISTEN) {
++ /* wake up clcsock accept */
++ rc = kernel_sock_shutdown(smc->clcsock,
++ SHUT_RDWR);
++ }
++ sk->sk_state = SMC_CLOSED;
++ sk->sk_state_change(sk);
+ }
+- sk->sk_state = SMC_CLOSED;
+- sk->sk_state_change(sk);
+ smc_restore_fallback_changes(smc);
+ }
+
+@@ -178,7 +182,7 @@ static int smc_release(struct socket *sock)
+ {
+ struct sock *sk = sock->sk;
+ struct smc_sock *smc;
+- int rc = 0;
++ int old_state, rc = 0;
+
+ if (!sk)
+ goto out;
+@@ -186,10 +190,14 @@ static int smc_release(struct socket *sock)
+ sock_hold(sk); /* sock_put below */
+ smc = smc_sk(sk);
+
++ old_state = sk->sk_state;
++
+ /* cleanup for a dangling non-blocking connect */
+- if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
++ if (smc->connect_nonblock && old_state == SMC_INIT)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
+- flush_work(&smc->connect_work);
++
++ if (cancel_work_sync(&smc->connect_work))
++ sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
+
+ if (sk->sk_state == SMC_LISTEN)
+ /* smc_close_non_accepted() is called and acquires
+@@ -199,6 +207,10 @@ static int smc_release(struct socket *sock)
+ else
+ lock_sock(sk);
+
++ if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
++ !smc->use_fallback)
++ smc_close_active_abort(smc);
++
+ rc = __smc_release(smc);
+
+ /* detach socket */
+@@ -542,8 +554,118 @@ static void smc_stat_fallback(struct smc_sock *smc)
+ mutex_unlock(&net->smc.mutex_fback_rsn);
+ }
+
+-static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
++/* must be called under rcu read lock */
++static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
++{
++ struct socket_wq *wq;
++ __poll_t flags;
++
++ wq = rcu_dereference(smc->sk.sk_wq);
++ if (!skwq_has_sleeper(wq))
++ return;
++
++ /* wake up smc sk->sk_wq */
++ if (!key) {
++ /* sk_state_change */
++ wake_up_interruptible_all(&wq->wait);
++ } else {
++ flags = key_to_poll(key);
++ if (flags & (EPOLLIN | EPOLLOUT))
++ /* sk_data_ready or sk_write_space */
++ wake_up_interruptible_sync_poll(&wq->wait, flags);
++ else if (flags & EPOLLERR)
++ /* sk_error_report */
++ wake_up_interruptible_poll(&wq->wait, flags);
++ }
++}
++
++static int smc_fback_mark_woken(wait_queue_entry_t *wait,
++ unsigned int mode, int sync, void *key)
++{
++ struct smc_mark_woken *mark =
++ container_of(wait, struct smc_mark_woken, wait_entry);
++
++ mark->woken = true;
++ mark->key = key;
++ return 0;
++}
++
++static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
++ void (*clcsock_callback)(struct sock *sk))
++{
++ struct smc_mark_woken mark = { .woken = false };
++ struct socket_wq *wq;
++
++ init_waitqueue_func_entry(&mark.wait_entry,
++ smc_fback_mark_woken);
++ rcu_read_lock();
++ wq = rcu_dereference(clcsk->sk_wq);
++ if (!wq)
++ goto out;
++ add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
++ clcsock_callback(clcsk);
++ remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
++
++ if (mark.woken)
++ smc_fback_wakeup_waitqueue(smc, mark.key);
++out:
++ rcu_read_unlock();
++}
++
++static void smc_fback_state_change(struct sock *clcsk)
++{
++ struct smc_sock *smc =
++ smc_clcsock_user_data(clcsk);
++
++ if (!smc)
++ return;
++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
++}
++
++static void smc_fback_data_ready(struct sock *clcsk)
++{
++ struct smc_sock *smc =
++ smc_clcsock_user_data(clcsk);
++
++ if (!smc)
++ return;
++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
++}
++
++static void smc_fback_write_space(struct sock *clcsk)
++{
++ struct smc_sock *smc =
++ smc_clcsock_user_data(clcsk);
++
++ if (!smc)
++ return;
++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
++}
++
++static void smc_fback_error_report(struct sock *clcsk)
+ {
++ struct smc_sock *smc =
++ smc_clcsock_user_data(clcsk);
++
++ if (!smc)
++ return;
++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
++}
++
++static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
++{
++ struct sock *clcsk;
++ int rc = 0;
++
++ mutex_lock(&smc->clcsock_release_lock);
++ if (!smc->clcsock) {
++ rc = -EBADF;
++ goto out;
++ }
++ clcsk = smc->clcsock->sk;
++
++ if (smc->use_fallback)
++ goto out;
+ smc->use_fallback = true;
+ smc->fallback_rsn = reason_code;
+ smc_stat_fallback(smc);
+@@ -552,13 +674,42 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+ smc->clcsock->file->private_data = smc->clcsock;
+ smc->clcsock->wq.fasync_list =
+ smc->sk.sk_socket->wq.fasync_list;
++
++ /* There might be some wait entries remaining
++ * in smc sk->sk_wq and they should be woken up
++ * as clcsock's wait queue is woken up.
++ */
++ smc->clcsk_state_change = clcsk->sk_state_change;
++ smc->clcsk_data_ready = clcsk->sk_data_ready;
++ smc->clcsk_write_space = clcsk->sk_write_space;
++ smc->clcsk_error_report = clcsk->sk_error_report;
++
++ clcsk->sk_state_change = smc_fback_state_change;
++ clcsk->sk_data_ready = smc_fback_data_ready;
++ clcsk->sk_write_space = smc_fback_write_space;
++ clcsk->sk_error_report = smc_fback_error_report;
++
++ smc->clcsock->sk->sk_user_data =
++ (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ }
++out:
++ mutex_unlock(&smc->clcsock_release_lock);
++ return rc;
+ }
+
+ /* fall back during connect */
+ static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
+ {
+- smc_switch_to_fallback(smc, reason_code);
++ struct net *net = sock_net(&smc->sk);
++ int rc = 0;
++
++ rc = smc_switch_to_fallback(smc, reason_code);
++ if (rc) { /* fallback fails */
++ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
++ if (smc->sk.sk_state == SMC_INIT)
++ sock_put(&smc->sk); /* passive closing */
++ return rc;
++ }
+ smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
+ if (smc->sk.sk_state == SMC_INIT)
+@@ -593,10 +744,12 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
+
+ static void smc_conn_abort(struct smc_sock *smc, int local_first)
+ {
++ struct smc_connection *conn = &smc->conn;
++ struct smc_link_group *lgr = conn->lgr;
++
++ smc_conn_free(conn);
+ if (local_first)
+- smc_lgr_cleanup_early(&smc->conn);
+- else
+- smc_conn_free(&smc->conn);
++ smc_lgr_cleanup_early(lgr);
+ }
+
+ /* check if there is a rdma device available for this connection. */
+@@ -1070,6 +1223,8 @@ static void smc_connect_work(struct work_struct *work)
+ smc->sk.sk_state = SMC_CLOSED;
+ if (rc == -EPIPE || rc == -EAGAIN)
+ smc->sk.sk_err = EPIPE;
++ else if (rc == -ECONNREFUSED)
++ smc->sk.sk_err = ECONNREFUSED;
+ else if (signal_pending(current))
+ smc->sk.sk_err = -sock_intr_errno(timeo);
+ sock_put(&smc->sk); /* passive closing */
+@@ -1128,9 +1283,9 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
+ if (rc && rc != -EINPROGRESS)
+ goto out;
+
+- sock_hold(&smc->sk); /* sock put in passive closing */
+ if (smc->use_fallback)
+ goto out;
++ sock_hold(&smc->sk); /* sock put in passive closing */
+ if (flags & O_NONBLOCK) {
+ if (queue_work(smc_hs_wq, &smc->connect_work))
+ smc->connect_nonblock = 1;
+@@ -1335,7 +1490,6 @@ static void smc_listen_out_connected(struct smc_sock *new_smc)
+ {
+ struct sock *newsmcsk = &new_smc->sk;
+
+- sk_refcnt_debug_inc(newsmcsk);
+ if (newsmcsk->sk_state == SMC_INIT)
+ newsmcsk->sk_state = SMC_ACTIVE;
+
+@@ -1362,11 +1516,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
+ {
+ /* RDMA setup failed, switch back to TCP */
+ smc_conn_abort(new_smc, local_first);
+- if (reason_code < 0) { /* error, no fallback possible */
++ if (reason_code < 0 ||
++ smc_switch_to_fallback(new_smc, reason_code)) {
++ /* error, no fallback possible */
+ smc_listen_out_err(new_smc);
+ return;
+ }
+- smc_switch_to_fallback(new_smc, reason_code);
+ if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
+ if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
+ smc_listen_out_err(new_smc);
+@@ -1739,8 +1894,11 @@ static void smc_listen_work(struct work_struct *work)
+
+ /* check if peer is smc capable */
+ if (!tcp_sk(newclcsock->sk)->syn_smc) {
+- smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
+- smc_listen_out_connected(new_smc);
++ rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
++ if (rc)
++ smc_listen_out_err(new_smc);
++ else
++ smc_listen_out_connected(new_smc);
+ return;
+ }
+
+@@ -1866,10 +2024,9 @@ out:
+
+ static void smc_clcsock_data_ready(struct sock *listen_clcsock)
+ {
+- struct smc_sock *lsmc;
++ struct smc_sock *lsmc =
++ smc_clcsock_user_data(listen_clcsock);
+
+- lsmc = (struct smc_sock *)
+- ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
+ if (!lsmc)
+ return;
+ lsmc->clcsk_data_ready(listen_clcsock);
+@@ -1914,8 +2071,10 @@ static int smc_listen(struct socket *sock, int backlog)
+ smc->clcsock->sk->sk_user_data =
+ (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ rc = kernel_listen(smc->clcsock, backlog);
+- if (rc)
++ if (rc) {
++ smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ goto out;
++ }
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_ack_backlog = 0;
+ sk->sk_state = SMC_LISTEN;
+@@ -2013,22 +2172,27 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+ {
+ struct sock *sk = sock->sk;
+ struct smc_sock *smc;
+- int rc = -EPIPE;
++ int rc;
+
+ smc = smc_sk(sk);
+ lock_sock(sk);
+- if ((sk->sk_state != SMC_ACTIVE) &&
+- (sk->sk_state != SMC_APPCLOSEWAIT1) &&
+- (sk->sk_state != SMC_INIT))
+- goto out;
+
++ /* SMC does not support connect with fastopen */
+ if (msg->msg_flags & MSG_FASTOPEN) {
++ /* not connected yet, fallback */
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
++ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
++ if (rc)
++ goto out;
+ } else {
+ rc = -EINVAL;
+ goto out;
+ }
++ } else if ((sk->sk_state != SMC_ACTIVE) &&
++ (sk->sk_state != SMC_APPCLOSEWAIT1) &&
++ (sk->sk_state != SMC_INIT)) {
++ rc = -EPIPE;
++ goto out;
+ }
+
+ if (smc->use_fallback) {
+@@ -2148,8 +2312,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
+ static int smc_shutdown(struct socket *sock, int how)
+ {
+ struct sock *sk = sock->sk;
++ bool do_shutdown = true;
+ struct smc_sock *smc;
+ int rc = -EINVAL;
++ int old_state;
+ int rc1 = 0;
+
+ smc = smc_sk(sk);
+@@ -2170,13 +2336,19 @@ static int smc_shutdown(struct socket *sock, int how)
+ if (smc->use_fallback) {
+ rc = kernel_sock_shutdown(smc->clcsock, how);
+ sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
+- if (sk->sk_shutdown == SHUTDOWN_MASK)
++ if (sk->sk_shutdown == SHUTDOWN_MASK) {
+ sk->sk_state = SMC_CLOSED;
++ sock_put(sk);
++ }
+ goto out;
+ }
+ switch (how) {
+ case SHUT_RDWR: /* shutdown in both directions */
++ old_state = sk->sk_state;
+ rc = smc_close_active(smc);
++ if (old_state == SMC_ACTIVE &&
++ sk->sk_state == SMC_PEERCLOSEWAIT1)
++ do_shutdown = false;
+ break;
+ case SHUT_WR:
+ rc = smc_close_shutdown_write(smc);
+@@ -2186,7 +2358,7 @@ static int smc_shutdown(struct socket *sock, int how)
+ /* nothing more to do because peer is not involved */
+ break;
+ }
+- if (smc->clcsock)
++ if (do_shutdown && smc->clcsock)
+ rc1 = kernel_sock_shutdown(smc->clcsock, how);
+ /* map sock_shutdown_cmd constants to sk_shutdown value range */
+ sk->sk_shutdown |= how + 1;
+@@ -2211,6 +2383,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
+ /* generic setsockopts reaching us here always apply to the
+ * CLC socket
+ */
++ mutex_lock(&smc->clcsock_release_lock);
++ if (!smc->clcsock) {
++ mutex_unlock(&smc->clcsock_release_lock);
++ return -EBADF;
++ }
+ if (unlikely(!smc->clcsock->ops->setsockopt))
+ rc = -EOPNOTSUPP;
+ else
+@@ -2220,6 +2397,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
+ sk->sk_err = smc->clcsock->sk->sk_err;
+ sk_error_report(sk);
+ }
++ mutex_unlock(&smc->clcsock_release_lock);
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+@@ -2236,7 +2414,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
+ case TCP_FASTOPEN_NO_COOKIE:
+ /* option not supported by SMC */
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
++ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ } else {
+ rc = -EINVAL;
+ }
+@@ -2247,8 +2425,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
+ sk->sk_state != SMC_CLOSED) {
+ if (val) {
+ SMC_STAT_INC(smc, ndly_cnt);
+- mod_delayed_work(smc->conn.lgr->tx_wq,
+- &smc->conn.tx_work, 0);
++ smc_tx_pending(&smc->conn);
++ cancel_delayed_work(&smc->conn.tx_work);
+ }
+ }
+ break;
+@@ -2258,8 +2436,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
+ sk->sk_state != SMC_CLOSED) {
+ if (!val) {
+ SMC_STAT_INC(smc, cork_cnt);
+- mod_delayed_work(smc->conn.lgr->tx_wq,
+- &smc->conn.tx_work, 0);
++ smc_tx_pending(&smc->conn);
++ cancel_delayed_work(&smc->conn.tx_work);
+ }
+ }
+ break;
+@@ -2279,13 +2457,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+ {
+ struct smc_sock *smc;
++ int rc;
+
+ smc = smc_sk(sock->sk);
++ mutex_lock(&smc->clcsock_release_lock);
++ if (!smc->clcsock) {
++ mutex_unlock(&smc->clcsock_release_lock);
++ return -EBADF;
++ }
+ /* socket options apply to the CLC socket */
+- if (unlikely(!smc->clcsock->ops->getsockopt))
++ if (unlikely(!smc->clcsock->ops->getsockopt)) {
++ mutex_unlock(&smc->clcsock_release_lock);
+ return -EOPNOTSUPP;
+- return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
+- optval, optlen);
++ }
++ rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
++ optval, optlen);
++ mutex_unlock(&smc->clcsock_release_lock);
++ return rc;
+ }
+
+ static int smc_ioctl(struct socket *sock, unsigned int cmd,
+@@ -2559,14 +2747,14 @@ static int __init smc_init(void)
+
+ rc = register_pernet_subsys(&smc_net_stat_ops);
+ if (rc)
+- return rc;
++ goto out_pernet_subsys;
+
+ smc_ism_init();
+ smc_clc_init();
+
+ rc = smc_nl_init();
+ if (rc)
+- goto out_pernet_subsys;
++ goto out_pernet_subsys_stat;
+
+ rc = smc_pnet_init();
+ if (rc)
+@@ -2644,6 +2832,8 @@ out_pnet:
+ smc_pnet_exit();
+ out_nl:
+ smc_nl_exit();
++out_pernet_subsys_stat:
++ unregister_pernet_subsys(&smc_net_stat_ops);
+ out_pernet_subsys:
+ unregister_pernet_subsys(&smc_net_ops);
+
+diff --git a/net/smc/smc.h b/net/smc/smc.h
+index d65e15f0c944c..930544f7b2e2c 100644
+--- a/net/smc/smc.h
++++ b/net/smc/smc.h
+@@ -129,6 +129,12 @@ enum smc_urg_state {
+ SMC_URG_READ = 3, /* data was already read */
+ };
+
++struct smc_mark_woken {
++ bool woken;
++ void *key;
++ wait_queue_entry_t wait_entry;
++};
++
+ struct smc_connection {
+ struct rb_node alert_node;
+ struct smc_link_group *lgr; /* link group of connection */
+@@ -170,6 +176,11 @@ struct smc_connection {
+ u16 tx_cdc_seq; /* sequence # for CDC send */
+ u16 tx_cdc_seq_fin; /* sequence # - tx completed */
+ spinlock_t send_lock; /* protect wr_sends */
++ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe
++ * - inc when post wqe,
++ * - dec on polled tx cqe
++ */
++ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
+ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
+ u32 tx_off; /* base offset in peer rmb */
+
+@@ -212,8 +223,14 @@ struct smc_connection {
+ struct smc_sock { /* smc sock container */
+ struct sock sk;
+ struct socket *clcsock; /* internal tcp socket */
++ void (*clcsk_state_change)(struct sock *sk);
++ /* original stat_change fct. */
+ void (*clcsk_data_ready)(struct sock *sk);
+- /* original data_ready fct. **/
++ /* original data_ready fct. */
++ void (*clcsk_write_space)(struct sock *sk);
++ /* original write_space fct. */
++ void (*clcsk_error_report)(struct sock *sk);
++ /* original error_report fct. */
+ struct smc_connection conn; /* smc connection */
+ struct smc_sock *listen_smc; /* listen parent */
+ struct work_struct connect_work; /* handle non-blocking connect*/
+@@ -248,6 +265,12 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
+ return (struct smc_sock *)sk;
+ }
+
++static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk)
++{
++ return (struct smc_sock *)
++ ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
++}
++
+ extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
+ extern struct workqueue_struct *smc_close_wq; /* wq for close work */
+
+diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
+index 99acd337ba90d..41b23f71c29a2 100644
+--- a/net/smc/smc_cdc.c
++++ b/net/smc/smc_cdc.c
+@@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
+ struct smc_sock *smc;
+ int diff;
+
+- if (!conn)
+- /* already dismissed */
+- return;
+-
+ smc = container_of(conn, struct smc_sock, conn);
+ bh_lock_sock(&smc->sk);
+ if (!wc_status) {
+@@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
+ conn);
+ conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
+ }
++
++ if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) &&
++ unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
++ wake_up(&conn->cdc_pend_tx_wq);
++ WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0);
++
+ smc_tx_sndbuf_nonfull(smc);
+ bh_unlock_sock(&smc->sk);
+ }
+@@ -70,7 +72,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
+ /* abnormal termination */
+ if (!rc)
+ smc_wr_tx_put_slot(link,
+- (struct smc_wr_tx_pend_priv *)pend);
++ (struct smc_wr_tx_pend_priv *)(*pend));
+ rc = -EPIPE;
+ }
+ return rc;
+@@ -102,11 +104,18 @@ int smc_cdc_msg_send(struct smc_connection *conn,
+ union smc_host_cursor cfed;
+ int rc;
+
++ if (unlikely(!READ_ONCE(conn->sndbuf_desc)))
++ return -ENOBUFS;
++
+ smc_cdc_add_pending_send(conn, pend);
+
+ conn->tx_cdc_seq++;
+ conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
+ smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);
++
++ atomic_inc(&conn->cdc_pend_tx_wr);
++ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
++
+ rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+ if (!rc) {
+ smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
+@@ -114,6 +123,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
+ } else {
+ conn->tx_cdc_seq--;
+ conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
++ atomic_dec(&conn->cdc_pend_tx_wr);
+ }
+
+ return rc;
+@@ -136,7 +146,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+ peer->token = htonl(local->token);
+ peer->prod_flags.failover_validation = 1;
+
++ /* We need to set pend->conn here to make sure smc_cdc_tx_handler()
++ * can handle properly
++ */
++ smc_cdc_add_pending_send(conn, pend);
++
++ atomic_inc(&conn->cdc_pend_tx_wr);
++ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
++
+ rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
++ if (unlikely(rc))
++ atomic_dec(&conn->cdc_pend_tx_wr);
++
+ return rc;
+ }
+
+@@ -193,31 +214,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
+ return rc;
+ }
+
+-static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
+- unsigned long data)
++void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn)
+ {
+- struct smc_connection *conn = (struct smc_connection *)data;
+- struct smc_cdc_tx_pend *cdc_pend =
+- (struct smc_cdc_tx_pend *)tx_pend;
+-
+- return cdc_pend->conn == conn;
+-}
+-
+-static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
+-{
+- struct smc_cdc_tx_pend *cdc_pend =
+- (struct smc_cdc_tx_pend *)tx_pend;
+-
+- cdc_pend->conn = NULL;
+-}
+-
+-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
+-{
+- struct smc_link *link = conn->lnk;
+-
+- smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
+- smc_cdc_tx_filter, smc_cdc_tx_dismisser,
+- (unsigned long)conn);
++ wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr));
+ }
+
+ /* Send a SMC-D CDC header.
+diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
+index 0a0a89abd38b2..696cc11f2303b 100644
+--- a/net/smc/smc_cdc.h
++++ b/net/smc/smc_cdc.h
+@@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_wr_buf **wr_buf,
+ struct smc_rdma_wr **wr_rdma_buf,
+ struct smc_cdc_tx_pend **pend);
+-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
++void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn);
+ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
+ struct smc_cdc_tx_pend *pend);
+ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
+diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
+index 0f9ffba07d268..149a59ecd299f 100644
+--- a/net/smc/smc_close.c
++++ b/net/smc/smc_close.c
+@@ -64,8 +64,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
+
+ rc = sk_wait_event(sk, &timeout,
+ !smc_tx_prepared_sends(&smc->conn) ||
+- sk->sk_err == ECONNABORTED ||
+- sk->sk_err == ECONNRESET ||
++ READ_ONCE(sk->sk_err) == ECONNABORTED ||
++ READ_ONCE(sk->sk_err) == ECONNRESET ||
+ smc->conn.killed,
+ &wait);
+ if (rc)
+@@ -195,6 +195,7 @@ int smc_close_active(struct smc_sock *smc)
+ int old_state;
+ long timeout;
+ int rc = 0;
++ int rc1 = 0;
+
+ timeout = current->flags & PF_EXITING ?
+ 0 : sock_flag(sk, SOCK_LINGER) ?
+@@ -228,6 +229,15 @@ again:
+ /* send close request */
+ rc = smc_close_final(conn);
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
++
++ /* actively shutdown clcsock before peer close it,
++ * prevent peer from entering TIME_WAIT state.
++ */
++ if (smc->clcsock && smc->clcsock->sk) {
++ rc1 = kernel_sock_shutdown(smc->clcsock,
++ SHUT_RDWR);
++ rc = rc ? rc : rc1;
++ }
+ } else {
+ /* peer event has changed the state */
+ goto again;
+diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
+index d2206743dc714..b84896acd4732 100644
+--- a/net/smc/smc_core.c
++++ b/net/smc/smc_core.c
+@@ -170,8 +170,10 @@ static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
+
+ if (!conn->lgr->is_smcd) {
+ rc = smcr_lgr_conn_assign_link(conn, first);
+- if (rc)
++ if (rc) {
++ conn->lgr = NULL;
+ return rc;
++ }
+ }
+ /* find a new alert_token_local value not yet used by some connection
+ * in this link group
+@@ -579,21 +581,18 @@ int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
+ return skb->len;
+ }
+
+-void smc_lgr_cleanup_early(struct smc_connection *conn)
++void smc_lgr_cleanup_early(struct smc_link_group *lgr)
+ {
+- struct smc_link_group *lgr = conn->lgr;
+- struct list_head *lgr_list;
+ spinlock_t *lgr_lock;
+
+ if (!lgr)
+ return;
+
+- smc_conn_free(conn);
+- lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
++ smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ /* do not use this link group for new connections */
+- if (!list_empty(lgr_list))
+- list_del_init(lgr_list);
++ if (!list_empty(&lgr->list))
++ list_del_init(&lgr->list);
+ spin_unlock_bh(lgr_lock);
+ __smc_lgr_terminate(lgr, true);
+ }
+@@ -605,7 +604,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+- if (smc_link_usable(lnk))
++ if (smc_link_sendable(lnk))
+ lnk->state = SMC_LNK_INACTIVE;
+ }
+ wake_up_all(&lgr->llc_msg_waiter);
+@@ -665,13 +664,14 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr)
+ int i;
+
+ while (1) {
++again:
+ link_id = ++lgr->next_link_id;
+ if (!link_id) /* skip zero as link_id */
+ link_id = ++lgr->next_link_id;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (smc_link_usable(&lgr->lnk[i]) &&
+ lgr->lnk[i].link_id == link_id)
+- continue;
++ goto again;
+ }
+ break;
+ }
+@@ -1056,13 +1056,13 @@ void smc_conn_free(struct smc_connection *conn)
+ smc_ism_unset_conn(conn);
+ tasklet_kill(&conn->rx_tsklet);
+ } else {
+- smc_cdc_tx_dismiss_slots(conn);
++ smc_cdc_wait_pend_tx_wr(conn);
+ if (current_work() != &conn->abort_work)
+ cancel_work_sync(&conn->abort_work);
+ }
+ if (!list_empty(&lgr->list)) {
+- smc_lgr_unregister_conn(conn);
+ smc_buf_unuse(conn, lgr); /* allow buffer reuse */
++ smc_lgr_unregister_conn(conn);
+ }
+
+ if (!lgr->conns_num)
+@@ -1133,7 +1133,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
+ smc_llc_link_clear(lnk, log);
+ smcr_buf_unmap_lgr(lnk);
+ smcr_rtoken_clear_link(lnk);
+- smc_ib_modify_qp_reset(lnk);
++ smc_ib_modify_qp_error(lnk);
+ smc_wr_free_link(lnk);
+ smc_ib_destroy_queue_pair(lnk);
+ smc_ib_dealloc_protection_domain(lnk);
+@@ -1264,7 +1264,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
+ else
+ tasklet_unlock_wait(&conn->rx_tsklet);
+ } else {
+- smc_cdc_tx_dismiss_slots(conn);
++ smc_cdc_wait_pend_tx_wr(conn);
+ }
+ smc_lgr_unregister_conn(conn);
+ smc_close_active_abort(smc);
+@@ -1297,7 +1297,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
+ if (lgr->terminating)
+ return; /* lgr already terminating */
+ /* cancel free_work sync, will terminate when lgr->freeing is set */
+- cancel_delayed_work_sync(&lgr->free_work);
++ cancel_delayed_work(&lgr->free_work);
+ lgr->terminating = 1;
+
+ /* kill remaining link group connections */
+@@ -1486,6 +1486,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
+ {
+ struct smc_link_group *lgr, *n;
+
++ spin_lock_bh(&smc_lgr_list.lock);
+ list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ struct smc_link *link;
+
+@@ -1500,6 +1501,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
+ if (link)
+ smc_llc_add_link_local(link);
+ }
++ spin_unlock_bh(&smc_lgr_list.lock);
+ }
+
+ /* link is down - switch connections to alternate link,
+@@ -1514,7 +1516,6 @@ static void smcr_link_down(struct smc_link *lnk)
+ if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
+ return;
+
+- smc_ib_modify_qp_reset(lnk);
+ to_lnk = smc_switch_conns(lgr, lnk, true);
+ if (!to_lnk) { /* no backup link available */
+ smcr_link_clear(lnk, true);
+@@ -1595,14 +1596,26 @@ static void smc_link_down_work(struct work_struct *work)
+ mutex_unlock(&lgr->llc_conf_mutex);
+ }
+
+-/* Determine vlan of internal TCP socket.
+- * @vlan_id: address to store the determined vlan id into
+- */
++static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
++ struct netdev_nested_priv *priv)
++{
++ unsigned short *vlan_id = (unsigned short *)priv->data;
++
++ if (is_vlan_dev(lower_dev)) {
++ *vlan_id = vlan_dev_vlan_id(lower_dev);
++ return 1;
++ }
++
++ return 0;
++}
++
++/* Determine vlan of internal TCP socket. */
+ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
+ {
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
++ struct netdev_nested_priv priv;
+ struct net_device *ndev;
+- int i, nest_lvl, rc = 0;
++ int rc = 0;
+
+ ini->vlan_id = 0;
+ if (!dst) {
+@@ -1620,20 +1633,9 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
+ goto out_rel;
+ }
+
++ priv.data = (void *)&ini->vlan_id;
+ rtnl_lock();
+- nest_lvl = ndev->lower_level;
+- for (i = 0; i < nest_lvl; i++) {
+- struct list_head *lower = &ndev->adj_list.lower;
+-
+- if (list_empty(lower))
+- break;
+- lower = lower->next;
+- ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
+- if (is_vlan_dev(ndev)) {
+- ini->vlan_id = vlan_dev_vlan_id(ndev);
+- break;
+- }
+- }
++ netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
+ rtnl_unlock();
+
+ out_rel:
+@@ -1701,7 +1703,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
+ (ini->smcd_version == SMC_V2 ||
+ lgr->vlan_id == ini->vlan_id) &&
+ (role == SMC_CLNT || ini->is_smcd ||
+- lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
++ (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
++ !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
+ /* link group found */
+ ini->first_contact_local = 0;
+ conn->lgr = lgr;
+@@ -1735,12 +1738,15 @@ create:
+ write_lock_bh(&lgr->conns_lock);
+ rc = smc_lgr_register_conn(conn, true);
+ write_unlock_bh(&lgr->conns_lock);
+- if (rc)
++ if (rc) {
++ smc_lgr_cleanup_early(lgr);
+ goto out;
++ }
+ }
+ conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
+ conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
+ conn->urg_state = SMC_URG_READ;
++ init_waitqueue_head(&conn->cdc_pend_tx_wq);
+ INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
+ if (ini->is_smcd) {
+ conn->rx_off = sizeof(struct smcd_cdc_msg);
+@@ -1818,7 +1824,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
+ */
+ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
+ {
+- return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
++ return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+ }
+
+ /* map an rmb buf to a link */
+@@ -1976,7 +1982,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
+ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+ struct smc_buf_desc *buf_desc, bool is_rmb)
+ {
+- int i, rc = 0;
++ int i, rc = 0, cnt = 0;
+
+ /* protect against parallel link reconfiguration */
+ mutex_lock(&lgr->llc_conf_mutex);
+@@ -1989,9 +1995,12 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+ rc = -ENOMEM;
+ goto out;
+ }
++ cnt++;
+ }
+ out:
+ mutex_unlock(&lgr->llc_conf_mutex);
++ if (!rc && !cnt)
++ rc = -EINVAL;
+ return rc;
+ }
+
+diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
+index c043ecdca5c44..9a0523f4c7ba6 100644
+--- a/net/smc/smc_core.h
++++ b/net/smc/smc_core.h
+@@ -366,6 +366,12 @@ static inline bool smc_link_usable(struct smc_link *lnk)
+ return true;
+ }
+
++static inline bool smc_link_sendable(struct smc_link *lnk)
++{
++ return smc_link_usable(lnk) &&
++ lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
++}
++
+ static inline bool smc_link_active(struct smc_link *lnk)
+ {
+ return lnk->state == SMC_LNK_ACTIVE;
+@@ -413,7 +419,7 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev,
+ struct smc_sock;
+ struct smc_clc_msg_accept_confirm;
+
+-void smc_lgr_cleanup_early(struct smc_connection *conn);
++void smc_lgr_cleanup_early(struct smc_link_group *lgr);
+ void smc_lgr_terminate_sched(struct smc_link_group *lgr);
+ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
+ void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
+diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
+index a8845343d183e..f0ec1f1d50fac 100644
+--- a/net/smc/smc_ib.c
++++ b/net/smc/smc_ib.c
+@@ -101,12 +101,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk)
+ IB_QP_MAX_QP_RD_ATOMIC);
+ }
+
+-int smc_ib_modify_qp_reset(struct smc_link *lnk)
++int smc_ib_modify_qp_error(struct smc_link *lnk)
+ {
+ struct ib_qp_attr qp_attr;
+
+ memset(&qp_attr, 0, sizeof(qp_attr));
+- qp_attr.qp_state = IB_QPS_RESET;
++ qp_attr.qp_state = IB_QPS_ERR;
+ return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE);
+ }
+
+diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
+index 3085f5180da79..6967c3d52b03e 100644
+--- a/net/smc/smc_ib.h
++++ b/net/smc/smc_ib.h
+@@ -79,6 +79,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk);
+ int smc_ib_ready_link(struct smc_link *lnk);
+ int smc_ib_modify_qp_rts(struct smc_link *lnk);
+ int smc_ib_modify_qp_reset(struct smc_link *lnk);
++int smc_ib_modify_qp_error(struct smc_link *lnk);
+ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
+ int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+ struct smc_buf_desc *buf_slot, u8 link_idx);
+diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
+index f1d323439a2af..d5ee961ca72d5 100644
+--- a/net/smc/smc_llc.c
++++ b/net/smc/smc_llc.c
+@@ -716,6 +716,8 @@ static int smc_llc_add_link_cont(struct smc_link *link,
+ addc_llc->num_rkeys = *num_rkeys_todo;
+ n = *num_rkeys_todo;
+ for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
++ while (*buf_pos && !(*buf_pos)->used)
++ *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
+ if (!*buf_pos) {
+ addc_llc->num_rkeys = addc_llc->num_rkeys -
+ *num_rkeys_todo;
+@@ -731,8 +733,6 @@ static int smc_llc_add_link_cont(struct smc_link *link,
+
+ (*num_rkeys_todo)--;
+ *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
+- while (*buf_pos && !(*buf_pos)->used)
+- *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
+ }
+ addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT;
+ addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
+@@ -1358,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
+ delllc.reason = htonl(rsn);
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+- if (!smc_link_usable(&lgr->lnk[i]))
++ if (!smc_link_sendable(&lgr->lnk[i]))
+ continue;
+ if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
+ break;
+@@ -1787,7 +1787,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
+ init_waitqueue_head(&lgr->llc_flow_waiter);
+ init_waitqueue_head(&lgr->llc_msg_waiter);
+ mutex_init(&lgr->llc_conf_mutex);
+- lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
++ lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+ }
+
+ /* called after lgr was removed from lgr_list */
+diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
+index 4a964e9190b02..79ee0618d919b 100644
+--- a/net/smc/smc_pnet.c
++++ b/net/smc/smc_pnet.c
+@@ -112,7 +112,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
+ pnettable = &sn->pnettable;
+
+ /* remove table entry */
+- write_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
+ list) {
+ if (!pnet_name ||
+@@ -130,7 +130,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
+ rc = 0;
+ }
+ }
+- write_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+
+ /* if this is not the initial namespace, stop here */
+ if (net != &init_net)
+@@ -191,7 +191,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+- write_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
+ if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
+ !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
+@@ -205,7 +205,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
+ break;
+ }
+ }
+- write_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ return rc;
+ }
+
+@@ -223,7 +223,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+- write_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
+ if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
+ dev_put(pnetelem->ndev);
+@@ -236,7 +236,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
+ break;
+ }
+ }
+- write_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ return rc;
+ }
+
+@@ -310,8 +310,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
+ list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+ if (!strncmp(ibdev->ibdev->name, ib_name,
+ sizeof(ibdev->ibdev->name)) ||
+- !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
+- IB_DEVICE_NAME_MAX - 1)) {
++ (ibdev->ibdev->dev.parent &&
++ !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
++ IB_DEVICE_NAME_MAX - 1))) {
+ goto out;
+ }
+ }
+@@ -371,7 +372,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
+
+ rc = -EEXIST;
+ new_netdev = true;
+- write_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_ETH &&
+ !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
+@@ -381,9 +382,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
+ }
+ if (new_netdev) {
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
+- write_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ } else {
+- write_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ kfree(new_pe);
+ goto out_put;
+ }
+@@ -444,7 +445,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
+ new_pe->ib_port = ib_port;
+
+ new_ibdev = true;
+- write_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+@@ -454,9 +455,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
+ }
+ if (new_ibdev) {
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
+- write_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ } else {
+- write_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ kfree(new_pe);
+ }
+ return (new_ibdev) ? 0 : -EEXIST;
+@@ -601,7 +602,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
+ pnettable = &sn->pnettable;
+
+ /* dump pnettable entries */
+- read_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+ if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
+ continue;
+@@ -616,7 +617,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
+ break;
+ }
+ }
+- read_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ return idx;
+ }
+
+@@ -860,7 +861,7 @@ int smc_pnet_net_init(struct net *net)
+ struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
+
+ INIT_LIST_HEAD(&pnettable->pnetlist);
+- rwlock_init(&pnettable->lock);
++ mutex_init(&pnettable->lock);
+ INIT_LIST_HEAD(&pnetids_ndev->list);
+ rwlock_init(&pnetids_ndev->lock);
+
+@@ -940,7 +941,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+- read_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
+ if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
+ /* get pnetid of netdev device */
+@@ -949,7 +950,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
+ break;
+ }
+ }
+- read_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+ return rc;
+ }
+
+@@ -1130,7 +1131,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
+ sn = net_generic(&init_net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+- read_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
+@@ -1140,7 +1141,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
+ break;
+ }
+ }
+- read_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+
+ return rc;
+ }
+@@ -1159,7 +1160,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
+ sn = net_generic(&init_net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+- read_lock(&pnettable->lock);
++ mutex_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+@@ -1168,7 +1169,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
+ break;
+ }
+ }
+- read_unlock(&pnettable->lock);
++ mutex_unlock(&pnettable->lock);
+
+ return rc;
+ }
+diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
+index 14039272f7e42..80a88eea49491 100644
+--- a/net/smc/smc_pnet.h
++++ b/net/smc/smc_pnet.h
+@@ -29,7 +29,7 @@ struct smc_link_group;
+ * @pnetlist: List of PNETIDs
+ */
+ struct smc_pnettable {
+- rwlock_t lock;
++ struct mutex lock;
+ struct list_head pnetlist;
+ };
+
+diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
+index 170b733bc7367..5b63c250ba604 100644
+--- a/net/smc/smc_rx.c
++++ b/net/smc/smc_rx.c
+@@ -204,9 +204,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ add_wait_queue(sk_sleep(sk), &wait);
+ rc = sk_wait_event(sk, timeo,
+- sk->sk_err ||
++ READ_ONCE(sk->sk_err) ||
+ cflags->peer_conn_abort ||
+- sk->sk_shutdown & RCV_SHUTDOWN ||
++ READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN ||
+ conn->killed ||
+ fcrit(conn),
+ &wait);
+@@ -354,12 +354,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
+ }
+ break;
+ }
++ if (!timeo)
++ return -EAGAIN;
+ if (signal_pending(current)) {
+ read_done = sock_intr_errno(timeo);
+ break;
+ }
+- if (!timeo)
+- return -EAGAIN;
+ }
+
+ if (!smc_rx_data_available(conn)) {
+diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
+index 738a4a99c8279..a878ea084dbd6 100644
+--- a/net/smc/smc_tx.c
++++ b/net/smc/smc_tx.c
+@@ -113,8 +113,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
+ break; /* at least 1 byte of free & no urgent data */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk_wait_event(sk, &timeo,
+- sk->sk_err ||
+- (sk->sk_shutdown & SEND_SHUTDOWN) ||
++ READ_ONCE(sk->sk_err) ||
++ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
+ smc_cdc_rxed_any_close(conn) ||
+ (atomic_read(&conn->sndbuf_space) &&
+ !conn->urg_tx_pend),
+@@ -594,27 +594,32 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+ return rc;
+ }
+
+-/* Wakeup sndbuf consumers from process context
+- * since there is more data to transmit
+- */
+-void smc_tx_work(struct work_struct *work)
++void smc_tx_pending(struct smc_connection *conn)
+ {
+- struct smc_connection *conn = container_of(to_delayed_work(work),
+- struct smc_connection,
+- tx_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ int rc;
+
+- lock_sock(&smc->sk);
+ if (smc->sk.sk_err)
+- goto out;
++ return;
+
+ rc = smc_tx_sndbuf_nonempty(conn);
+ if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
+ !atomic_read(&conn->bytes_to_rcv))
+ conn->local_rx_ctrl.prod_flags.write_blocked = 0;
++}
++
++/* Wakeup sndbuf consumers from process context
++ * since there is more data to transmit
++ */
++void smc_tx_work(struct work_struct *work)
++{
++ struct smc_connection *conn = container_of(to_delayed_work(work),
++ struct smc_connection,
++ tx_work);
++ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+-out:
++ lock_sock(&smc->sk);
++ smc_tx_pending(conn);
+ release_sock(&smc->sk);
+ }
+
+diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
+index 07e6ad76224a0..a59f370b8b432 100644
+--- a/net/smc/smc_tx.h
++++ b/net/smc/smc_tx.h
+@@ -27,6 +27,7 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
+ return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
+ }
+
++void smc_tx_pending(struct smc_connection *conn);
+ void smc_tx_work(struct work_struct *work);
+ void smc_tx_init(struct smc_sock *smc);
+ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
+diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
+index a419e9af36b98..59ca1a2d5c650 100644
+--- a/net/smc/smc_wr.c
++++ b/net/smc/smc_wr.c
+@@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link)
+ }
+
+ /* wait till all pending tx work requests on the given link are completed */
+-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
++void smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
+ {
+- if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
+- SMC_WR_TX_WAIT_PENDING_TIME))
+- return 0;
+- else /* timeout */
+- return -EPIPE;
++ wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link));
+ }
+
+ static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
+@@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
+ struct smc_wr_tx_pend pnd_snd;
+ struct smc_link *link;
+ u32 pnd_snd_idx;
+- int i;
+
+ link = wc->qp->qp_context;
+
+@@ -115,14 +110,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
+ if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
+ return;
+ if (wc->status) {
+- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
+- /* clear full struct smc_wr_tx_pend including .priv */
+- memset(&link->wr_tx_pends[i], 0,
+- sizeof(link->wr_tx_pends[i]));
+- memset(&link->wr_tx_bufs[i], 0,
+- sizeof(link->wr_tx_bufs[i]));
+- clear_bit(i, link->wr_tx_mask);
+- }
+ /* terminate link */
+ smcr_link_down_cond_sched(link);
+ }
+@@ -169,7 +156,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
+ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
+ {
+ *idx = link->wr_tx_cnt;
+- if (!smc_link_usable(link))
++ if (!smc_link_sendable(link))
+ return -ENOLINK;
+ for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
+ if (!test_and_set_bit(*idx, link->wr_tx_mask))
+@@ -212,7 +199,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
+ } else {
+ rc = wait_event_interruptible_timeout(
+ link->wr_tx_wait,
+- !smc_link_usable(link) ||
++ !smc_link_sendable(link) ||
+ lgr->terminating ||
+ (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
+ SMC_WR_TX_WAIT_FREE_SLOT_TIME);
+@@ -288,18 +275,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+ unsigned long timeout)
+ {
+ struct smc_wr_tx_pend *pend;
++ u32 pnd_idx;
+ int rc;
+
+ pend = container_of(priv, struct smc_wr_tx_pend, priv);
+ pend->compl_requested = 1;
+- init_completion(&link->wr_tx_compl[pend->idx]);
++ pnd_idx = pend->idx;
++ init_completion(&link->wr_tx_compl[pnd_idx]);
+
+ rc = smc_wr_tx_send(link, priv);
+ if (rc)
+ return rc;
+ /* wait for completion by smc_wr_tx_process_cqe() */
+ rc = wait_for_completion_interruptible_timeout(
+- &link->wr_tx_compl[pend->idx], timeout);
++ &link->wr_tx_compl[pnd_idx], timeout);
+ if (rc <= 0)
+ rc = -ENODATA;
+ if (rc > 0)
+@@ -349,25 +338,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
+ return rc;
+ }
+
+-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
+- smc_wr_tx_filter filter,
+- smc_wr_tx_dismisser dismisser,
+- unsigned long data)
+-{
+- struct smc_wr_tx_pend_priv *tx_pend;
+- struct smc_wr_rx_hdr *wr_tx;
+- int i;
+-
+- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
+- wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
+- if (wr_tx->type != wr_tx_hdr_type)
+- continue;
+- tx_pend = &link->wr_tx_pends[i].priv;
+- if (filter(tx_pend, data))
+- dismisser(tx_pend);
+- }
+-}
+-
+ /****************************** receive queue ********************************/
+
+ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
+@@ -572,10 +542,7 @@ void smc_wr_free_link(struct smc_link *lnk)
+ smc_wr_wakeup_reg_wait(lnk);
+ smc_wr_wakeup_tx_wait(lnk);
+
+- if (smc_wr_tx_wait_no_pending_sends(lnk))
+- memset(lnk->wr_tx_mask, 0,
+- BITS_TO_LONGS(SMC_WR_BUF_CNT) *
+- sizeof(*lnk->wr_tx_mask));
++ smc_wr_tx_wait_no_pending_sends(lnk);
+ wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
+ wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
+
+diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
+index 2bc626f230a56..cb58e60078f57 100644
+--- a/net/smc/smc_wr.h
++++ b/net/smc/smc_wr.h
+@@ -22,7 +22,6 @@
+ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */
+
+ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ)
+-#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ)
+
+ #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */
+
+@@ -62,7 +61,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
+
+ static inline bool smc_wr_tx_link_hold(struct smc_link *link)
+ {
+- if (!smc_link_usable(link))
++ if (!smc_link_sendable(link))
+ return false;
+ atomic_inc(&link->wr_tx_refcnt);
+ return true;
+@@ -122,7 +121,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
+ smc_wr_tx_filter filter,
+ smc_wr_tx_dismisser dismisser,
+ unsigned long data);
+-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
++void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
+
+ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
+ int smc_wr_rx_post_init(struct smc_link *link);
+diff --git a/net/socket.c b/net/socket.c
+index 7f64a6eccf63f..8b8182cdb4bf2 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -829,6 +829,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+ int empty = 1, false_tstamp = 0;
+ struct skb_shared_hwtstamps *shhwtstamps =
+ skb_hwtstamps(skb);
++ ktime_t hwtstamp;
+
+ /* Race occurred between timestamp enabling and packet
+ receiving. Fill in the current time for now. */
+@@ -877,10 +878,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+ (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ !skb_is_swtx_tstamp(skb, false_tstamp)) {
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+- ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
++ hwtstamp = ptp_convert_timestamp(shhwtstamps,
++ sk->sk_bind_phc);
++ else
++ hwtstamp = shhwtstamps->hwtstamp;
+
+- if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
+- tss.ts + 2)) {
++ if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
+ empty = 0;
+
+ if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+@@ -1718,7 +1721,7 @@ int __sys_listen(int fd, int backlog)
+
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (sock) {
+- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
++ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
+ if ((unsigned int)backlog > somaxconn)
+ backlog = somaxconn;
+
+@@ -2817,7 +2820,7 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+ * error to return on the next call or if the
+ * app asks about it using getsockopt(SO_ERROR).
+ */
+- sock->sk->sk_err = -err;
++ WRITE_ONCE(sock->sk->sk_err, -err);
+ }
+ out_put:
+ fput_light(sock->file, fput_needed);
+@@ -3450,7 +3453,11 @@ EXPORT_SYMBOL(kernel_accept);
+ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+ int flags)
+ {
+- return sock->ops->connect(sock, addr, addrlen, flags);
++ struct sockaddr_storage address;
++
++ memcpy(&address, addr, addrlen);
++
++ return sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, flags);
+ }
+ EXPORT_SYMBOL(kernel_connect);
+
+diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
+index 9c0343568d2a0..1a72c67afed5e 100644
+--- a/net/strparser/strparser.c
++++ b/net/strparser/strparser.c
+@@ -27,18 +27,10 @@
+
+ static struct workqueue_struct *strp_wq;
+
+-struct _strp_msg {
+- /* Internal cb structure. struct strp_msg must be first for passing
+- * to upper layer.
+- */
+- struct strp_msg strp;
+- int accum_len;
+-};
+-
+ static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
+ {
+ return (struct _strp_msg *)((void *)skb->cb +
+- offsetof(struct qdisc_skb_cb, data));
++ offsetof(struct sk_skb_cb, strp));
+ }
+
+ /* Lower lock held */
+diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
+index 6e4dbd577a39f..d435bffc61999 100644
+--- a/net/sunrpc/addr.c
++++ b/net/sunrpc/addr.c
+@@ -162,8 +162,10 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
+ const size_t buflen, const char *delim,
+ struct sockaddr_in6 *sin6)
+ {
+- char *p;
++ char p[IPV6_SCOPE_ID_LEN + 1];
+ size_t len;
++ u32 scope_id = 0;
++ struct net_device *dev;
+
+ if ((buf + buflen) == delim)
+ return 1;
+@@ -175,29 +177,23 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
+ return 0;
+
+ len = (buf + buflen) - delim - 1;
+- p = kmemdup_nul(delim + 1, len, GFP_KERNEL);
+- if (p) {
+- u32 scope_id = 0;
+- struct net_device *dev;
+-
+- dev = dev_get_by_name(net, p);
+- if (dev != NULL) {
+- scope_id = dev->ifindex;
+- dev_put(dev);
+- } else {
+- if (kstrtou32(p, 10, &scope_id) != 0) {
+- kfree(p);
+- return 0;
+- }
+- }
+-
+- kfree(p);
+-
+- sin6->sin6_scope_id = scope_id;
+- return 1;
++ if (len > IPV6_SCOPE_ID_LEN)
++ return 0;
++
++ memcpy(p, delim + 1, len);
++ p[len] = 0;
++
++ dev = dev_get_by_name(net, p);
++ if (dev != NULL) {
++ scope_id = dev->ifindex;
++ dev_put(dev);
++ } else {
++ if (kstrtou32(p, 10, &scope_id) != 0)
++ return 0;
+ }
+
+- return 0;
++ sin6->sin6_scope_id = scope_id;
++ return 1;
+ }
+
+ static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
+diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
+index a9f0d17fdb0d6..1bae32c482846 100644
+--- a/net/sunrpc/auth.c
++++ b/net/sunrpc/auth.c
+@@ -445,7 +445,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
+ * Enforce a 60 second garbage collection moratorium
+ * Note that the cred_unused list must be time-ordered.
+ */
+- if (!time_in_range(cred->cr_expire, expired, jiffies))
++ if (time_in_range(cred->cr_expire, expired, jiffies))
+ continue;
+ if (!rpcauth_unhash_cred(cred))
+ continue;
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index 5f42aa5fc6128..2ff66a6a7e54c 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -301,7 +301,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
+ list_for_each_entry(pos, &pipe->in_downcall, list) {
+ if (!uid_eq(pos->uid, uid))
+ continue;
+- if (auth && pos->auth->service != auth->service)
++ if (pos->auth->service != auth->service)
+ continue;
+ refcount_inc(&pos->count);
+ return pos;
+@@ -685,6 +685,21 @@ out:
+ return err;
+ }
+
++static struct gss_upcall_msg *
++gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
++{
++ struct gss_upcall_msg *pos;
++ list_for_each_entry(pos, &pipe->in_downcall, list) {
++ if (!uid_eq(pos->uid, uid))
++ continue;
++ if (!rpc_msg_is_inflight(&pos->msg))
++ continue;
++ refcount_inc(&pos->count);
++ return pos;
++ }
++ return NULL;
++}
++
+ #define MSG_BUF_MAXSIZE 1024
+
+ static ssize_t
+@@ -731,7 +746,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ err = -ENOENT;
+ /* Find a matching upcall */
+ spin_lock(&pipe->lock);
+- gss_msg = __gss_find_upcall(pipe, uid, NULL);
++ gss_msg = gss_find_downcall(pipe, uid);
+ if (gss_msg == NULL) {
+ spin_unlock(&pipe->lock);
+ goto err_put_ctx;
+diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
+index 61c276bddaf25..f549e4c05defc 100644
+--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
++++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
+@@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
+ * done without the correct namespace:
+ */
+ .flags = RPC_CLNT_CREATE_NOPING |
++ RPC_CLNT_CREATE_CONNECTED |
+ RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
+ };
+ struct rpc_clnt *clnt;
+diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
+index 1f2817195549b..48b608cb5f5ec 100644
+--- a/net/sunrpc/auth_gss/svcauth_gss.c
++++ b/net/sunrpc/auth_gss/svcauth_gss.c
+@@ -1162,18 +1162,23 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
+ return res;
+
+ inlen = svc_getnl(argv);
+- if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
++ if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) {
++ kfree(in_handle->data);
+ return SVC_DENIED;
++ }
+
+ pages = DIV_ROUND_UP(inlen, PAGE_SIZE);
+ in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL);
+- if (!in_token->pages)
++ if (!in_token->pages) {
++ kfree(in_handle->data);
+ return SVC_DENIED;
++ }
+ in_token->page_base = 0;
+ in_token->page_len = inlen;
+ for (i = 0; i < pages; i++) {
+ in_token->pages[i] = alloc_page(GFP_KERNEL);
+ if (!in_token->pages[i]) {
++ kfree(in_handle->data);
+ gss_free_in_token_pages(in_token);
+ return SVC_DENIED;
+ }
+diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
+index 22a2c235abf1b..77e347a45344c 100644
+--- a/net/sunrpc/backchannel_rqst.c
++++ b/net/sunrpc/backchannel_rqst.c
+@@ -64,6 +64,17 @@ static void xprt_free_allocation(struct rpc_rqst *req)
+ kfree(req);
+ }
+
++static void xprt_bc_reinit_xdr_buf(struct xdr_buf *buf)
++{
++ buf->head[0].iov_len = PAGE_SIZE;
++ buf->tail[0].iov_len = 0;
++ buf->pages = NULL;
++ buf->page_len = 0;
++ buf->flags = 0;
++ buf->len = 0;
++ buf->buflen = PAGE_SIZE;
++}
++
+ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
+ {
+ struct page *page;
+@@ -292,6 +303,9 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
+ */
+ spin_lock_bh(&xprt->bc_pa_lock);
+ if (xprt_need_to_requeue(xprt)) {
++ xprt_bc_reinit_xdr_buf(&req->rq_snd_buf);
++ xprt_bc_reinit_xdr_buf(&req->rq_rcv_buf);
++ req->rq_rcv_buf.len = PAGE_SIZE;
+ list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+ xprt->bc_alloc_count++;
+ atomic_inc(&xprt->bc_slot_count);
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index f056ff9314442..b9c54c03c30a6 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task,
+ static int rpc_decode_header(struct rpc_task *task,
+ struct xdr_stream *xdr);
+ static int rpc_ping(struct rpc_clnt *clnt);
++static int rpc_ping_noreply(struct rpc_clnt *clnt);
+ static void rpc_check_timeout(struct rpc_task *task);
+
+ static void rpc_register_client(struct rpc_clnt *clnt)
+@@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+ rpc_shutdown_client(clnt);
+ return ERR_PTR(err);
+ }
++ } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
++ int err = rpc_ping_noreply(clnt);
++ if (err != 0) {
++ rpc_shutdown_client(clnt);
++ return ERR_PTR(err);
++ }
+ }
+
+ clnt->cl_softrtry = 1;
+@@ -644,6 +651,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
+ new->cl_discrtry = clnt->cl_discrtry;
+ new->cl_chatty = clnt->cl_chatty;
+ new->cl_principal = clnt->cl_principal;
++ new->cl_max_connect = clnt->cl_max_connect;
+ return new;
+
+ out_err:
+@@ -1065,8 +1073,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt)
+ static
+ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
+ {
+- if (task->tk_xprt)
+- return;
++ if (task->tk_xprt) {
++ if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
++ (task->tk_flags & RPC_TASK_MOVEABLE)))
++ return;
++ xprt_release(task);
++ xprt_put(task->tk_xprt);
++ }
+ if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
+ task->tk_xprt = rpc_task_get_first_xprt(clnt);
+ else
+@@ -1362,7 +1375,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
+ break;
+ default:
+ err = -EAFNOSUPPORT;
+- goto out;
++ goto out_release;
+ }
+ if (err < 0) {
+ dprintk("RPC: can't bind UDP socket (%d)\n", err);
+@@ -1868,7 +1881,7 @@ call_encode(struct rpc_task *task)
+ break;
+ case -EKEYEXPIRED:
+ if (!task->tk_cred_retry) {
+- rpc_exit(task, task->tk_status);
++ rpc_call_rpcerror(task, task->tk_status);
+ } else {
+ task->tk_action = call_refresh;
+ task->tk_cred_retry--;
+@@ -1968,9 +1981,6 @@ call_bind_status(struct rpc_task *task)
+ status = -EOPNOTSUPP;
+ break;
+ }
+- if (task->tk_rebind_retry == 0)
+- break;
+- task->tk_rebind_retry--;
+ rpc_delay(task, 3*HZ);
+ goto retry_timeout;
+ case -ENOBUFS:
+@@ -2200,6 +2210,7 @@ call_transmit_status(struct rpc_task *task)
+ * socket just returned a connection error,
+ * then hold onto the transport lock.
+ */
++ case -ENOMEM:
+ case -ENOBUFS:
+ rpc_delay(task, HZ>>2);
+ fallthrough;
+@@ -2283,6 +2294,7 @@ call_bc_transmit_status(struct rpc_task *task)
+ case -ENOTCONN:
+ case -EPIPE:
+ break;
++ case -ENOMEM:
+ case -ENOBUFS:
+ rpc_delay(task, HZ>>2);
+ fallthrough;
+@@ -2365,6 +2377,11 @@ call_status(struct rpc_task *task)
+ case -EPIPE:
+ case -EAGAIN:
+ break;
++ case -ENFILE:
++ case -ENOBUFS:
++ case -ENOMEM:
++ rpc_delay(task, HZ>>2);
++ break;
+ case -EIO:
+ /* shutdown or soft timeout */
+ goto out_exit;
+@@ -2692,6 +2709,10 @@ static const struct rpc_procinfo rpcproc_null = {
+ .p_decode = rpcproc_decode_null,
+ };
+
++static const struct rpc_procinfo rpcproc_null_noreply = {
++ .p_encode = rpcproc_encode_null,
++};
++
+ static void
+ rpc_null_call_prepare(struct rpc_task *task, void *data)
+ {
+@@ -2745,6 +2766,28 @@ static int rpc_ping(struct rpc_clnt *clnt)
+ return status;
+ }
+
++static int rpc_ping_noreply(struct rpc_clnt *clnt)
++{
++ struct rpc_message msg = {
++ .rpc_proc = &rpcproc_null_noreply,
++ };
++ struct rpc_task_setup task_setup_data = {
++ .rpc_client = clnt,
++ .rpc_message = &msg,
++ .callback_ops = &rpc_null_ops,
++ .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
++ };
++ struct rpc_task *task;
++ int status;
++
++ task = rpc_run_task(&task_setup_data);
++ if (IS_ERR(task))
++ return PTR_ERR(task);
++ status = task->tk_status;
++ rpc_put_task(task);
++ return status;
++}
++
+ struct rpc_cb_add_xprt_calldata {
+ struct rpc_xprt_switch *xps;
+ struct rpc_xprt *xprt;
+@@ -2903,7 +2946,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
+ unsigned char resvport, reuseport;
+- int ret = 0;
++ int ret = 0, ident;
+
+ rcu_read_lock();
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+@@ -2917,8 +2960,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
+ reuseport = xprt->reuseport;
+ connect_timeout = xprt->connect_timeout;
+ reconnect_timeout = xprt->max_reconnect_timeout;
++ ident = xprt->xprt_class->ident;
+ rcu_read_unlock();
+
++ if (!xprtargs->ident)
++ xprtargs->ident = ident;
+ xprt = xprt_create_transport(xprtargs);
+ if (IS_ERR(xprt)) {
+ ret = PTR_ERR(xprt);
+@@ -3068,6 +3114,8 @@ rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt,
+ int
+ rpc_clnt_swap_activate(struct rpc_clnt *clnt)
+ {
++ while (clnt != clnt->cl_parent)
++ clnt = clnt->cl_parent;
+ if (atomic_inc_return(&clnt->cl_swapper) == 1)
+ return rpc_clnt_iterate_for_each_xprt(clnt,
+ rpc_clnt_swap_activate_callback, NULL);
+@@ -3087,6 +3135,8 @@ rpc_clnt_swap_deactivate_callback(struct rpc_clnt *clnt,
+ void
+ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
+ {
++ while (clnt != clnt->cl_parent)
++ clnt = clnt->cl_parent;
+ if (atomic_dec_if_positive(&clnt->cl_swapper) == 0)
+ rpc_clnt_iterate_for_each_xprt(clnt,
+ rpc_clnt_swap_deactivate_callback, NULL);
+diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
+index ee5336d73fddc..35588f0afa864 100644
+--- a/net/sunrpc/rpc_pipe.c
++++ b/net/sunrpc/rpc_pipe.c
+@@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
+
+ dget(dentry);
+ ret = simple_rmdir(dir, dentry);
++ d_drop(dentry);
+ if (!ret)
+ fsnotify_rmdir(dir, dentry);
+- d_delete(dentry);
+ dput(dentry);
+ return ret;
+ }
+@@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
+
+ dget(dentry);
+ ret = simple_unlink(dir, dentry);
++ d_drop(dentry);
+ if (!ret)
+ fsnotify_unlink(dir, dentry);
+- d_delete(dentry);
+ dput(dentry);
+ return ret;
+ }
+diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
+index c045f63d11fa6..a00890962e115 100644
+--- a/net/sunrpc/sched.c
++++ b/net/sunrpc/sched.c
+@@ -186,11 +186,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+
+ /*
+ * Add new request to wait queue.
+- *
+- * Swapper tasks always get inserted at the head of the queue.
+- * This should avoid many nasty memory deadlocks and hopefully
+- * improve overall performance.
+- * Everyone else gets appended to the queue to ensure proper FIFO behavior.
+ */
+ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+@@ -199,8 +194,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
+ INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
+ if (RPC_IS_PRIORITY(queue))
+ __rpc_add_wait_queue_priority(queue, task, queue_priority);
+- else if (RPC_IS_SWAPPER(task))
+- list_add(&task->u.tk_wait.list, &queue->tasks[0]);
+ else
+ list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
+ task->tk_waitqueue = queue;
+@@ -803,7 +796,6 @@ rpc_init_task_statistics(struct rpc_task *task)
+ /* Initialize retry counters */
+ task->tk_garb_retry = 2;
+ task->tk_cred_retry = 2;
+- task->tk_rebind_retry = 2;
+
+ /* starting timestamp */
+ task->tk_start = ktime_get();
+@@ -1012,8 +1004,10 @@ int rpc_malloc(struct rpc_task *task)
+ struct rpc_buffer *buf;
+ gfp_t gfp = GFP_NOFS;
+
++ if (RPC_IS_ASYNC(task))
++ gfp = GFP_NOWAIT | __GFP_NOWARN;
+ if (RPC_IS_SWAPPER(task))
+- gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
++ gfp |= __GFP_MEMALLOC;
+
+ size += sizeof(struct rpc_buffer);
+ if (size <= RPC_BUFFER_MAXSIZE)
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index a3bbe5ce4570f..36a3ad9336d6f 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -806,6 +806,7 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
+ static int
+ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ {
++ struct svc_rqst *rqstp;
+ struct task_struct *task;
+ unsigned int state = serv->sv_nrthreads-1;
+
+@@ -814,7 +815,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ task = choose_victim(serv, pool, &state);
+ if (task == NULL)
+ break;
+- kthread_stop(task);
++ rqstp = kthread_data(task);
++ /* Did we lose a race to svo_function threadfn? */
++ if (kthread_stop(task) == -EINTR)
++ svc_exit_thread(rqstp);
+ nrservs++;
+ } while (nrservs < 0);
+ return 0;
+@@ -1020,7 +1024,7 @@ static int __svc_register(struct net *net, const char *progname,
+ #endif
+ }
+
+- trace_svc_register(progname, version, protocol, port, family, error);
++ trace_svc_register(progname, version, family, protocol, port, error);
+ return error;
+ }
+
+@@ -1676,16 +1680,17 @@ EXPORT_SYMBOL_GPL(svc_encode_result_payload);
+ /**
+ * svc_fill_write_vector - Construct data argument for VFS write call
+ * @rqstp: svc_rqst to operate on
+- * @pages: list of pages containing data payload
+- * @first: buffer containing first section of write payload
+- * @total: total number of bytes of write payload
++ * @payload: xdr_buf containing only the write data payload
+ *
+ * Fills in rqstp::rq_vec, and returns the number of elements.
+ */
+-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
+- struct kvec *first, size_t total)
++unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
++ struct xdr_buf *payload)
+ {
++ struct page **pages = payload->pages;
++ struct kvec *first = payload->head;
+ struct kvec *vec = rqstp->rq_vec;
++ size_t total = payload->len;
+ unsigned int i;
+
+ /* Some types of transport can present the write payload
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 6316bd2b8f37b..5ff8f902f14d2 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -243,7 +243,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
+ xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
+ if (IS_ERR(xprt))
+ trace_svc_xprt_create_err(serv->sv_program->pg_name,
+- xcl->xcl_name, sap, xprt);
++ xcl->xcl_name, sap, len, xprt);
+ return xprt;
+ }
+
+@@ -530,13 +530,23 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
+ }
+ EXPORT_SYMBOL_GPL(svc_reserve);
+
++static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr)
++{
++ if (!dr)
++ return;
++
++ xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt);
++ kfree(dr);
++}
++
+ static void svc_xprt_release(struct svc_rqst *rqstp)
+ {
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+
+- xprt->xpt_ops->xpo_release_rqst(rqstp);
++ xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
++ rqstp->rq_xprt_ctxt = NULL;
+
+- kfree(rqstp->rq_deferred);
++ free_deferred(xprt, rqstp->rq_deferred);
+ rqstp->rq_deferred = NULL;
+
+ pagevec_release(&rqstp->rq_pvec);
+@@ -1054,7 +1064,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
+ spin_unlock_bh(&serv->sv_lock);
+
+ while ((dr = svc_deferred_dequeue(xprt)) != NULL)
+- kfree(dr);
++ free_deferred(xprt, dr);
+
+ call_xpt_users(xprt);
+ svc_xprt_put(xprt);
+@@ -1166,8 +1176,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
+ if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
+ spin_unlock(&xprt->xpt_lock);
+ trace_svc_defer_drop(dr);
++ free_deferred(xprt, dr);
+ svc_xprt_put(xprt);
+- kfree(dr);
+ return;
+ }
+ dr->xprt = NULL;
+@@ -1212,13 +1222,14 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
+ dr->addrlen = rqstp->rq_addrlen;
+ dr->daddr = rqstp->rq_daddr;
+ dr->argslen = rqstp->rq_arg.len >> 2;
+- dr->xprt_hlen = rqstp->rq_xprt_hlen;
+
+ /* back up head to the start of the buffer and copy */
+ skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
+ memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
+ dr->argslen << 2);
+ }
++ dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
++ rqstp->rq_xprt_ctxt = NULL;
+ trace_svc_defer(rqstp);
+ svc_xprt_get(rqstp->rq_xprt);
+ dr->xprt = rqstp->rq_xprt;
+@@ -1238,21 +1249,23 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
+ trace_svc_defer_recv(dr);
+
+ /* setup iov_base past transport header */
+- rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
++ rqstp->rq_arg.head[0].iov_base = dr->args;
+ /* The iov_len does not include the transport header bytes */
+- rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
++ rqstp->rq_arg.head[0].iov_len = dr->argslen << 2;
+ rqstp->rq_arg.page_len = 0;
+ /* The rq_arg.len includes the transport header bytes */
+- rqstp->rq_arg.len = dr->argslen<<2;
++ rqstp->rq_arg.len = dr->argslen << 2;
+ rqstp->rq_prot = dr->prot;
+ memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
+ rqstp->rq_addrlen = dr->addrlen;
+ /* Save off transport header len in case we get deferred again */
+- rqstp->rq_xprt_hlen = dr->xprt_hlen;
+ rqstp->rq_daddr = dr->daddr;
+ rqstp->rq_respages = rqstp->rq_pages;
++ rqstp->rq_xprt_ctxt = dr->xprt_ctxt;
++
++ dr->xprt_ctxt = NULL;
+ svc_xprt_received(rqstp->rq_xprt);
+- return (dr->argslen<<2) - dr->xprt_hlen;
++ return dr->argslen << 2;
+ }
+
+
+diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
+index d7ed7d49115ac..a7d107167c05c 100644
+--- a/net/sunrpc/svcauth_unix.c
++++ b/net/sunrpc/svcauth_unix.c
+@@ -415,14 +415,23 @@ static int unix_gid_hash(kuid_t uid)
+ return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
+ }
+
+-static void unix_gid_put(struct kref *kref)
++static void unix_gid_free(struct rcu_head *rcu)
+ {
+- struct cache_head *item = container_of(kref, struct cache_head, ref);
+- struct unix_gid *ug = container_of(item, struct unix_gid, h);
++ struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu);
++ struct cache_head *item = &ug->h;
++
+ if (test_bit(CACHE_VALID, &item->flags) &&
+ !test_bit(CACHE_NEGATIVE, &item->flags))
+ put_group_info(ug->gi);
+- kfree_rcu(ug, rcu);
++ kfree(ug);
++}
++
++static void unix_gid_put(struct kref *kref)
++{
++ struct cache_head *item = container_of(kref, struct cache_head, ref);
++ struct unix_gid *ug = container_of(item, struct unix_gid, h);
++
++ call_rcu(&ug->rcu, unix_gid_free);
+ }
+
+ static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index 478f857cdaed4..be7081284a098 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -111,36 +111,27 @@ static void svc_reclassify_socket(struct socket *sock)
+ #endif
+
+ /**
+- * svc_tcp_release_rqst - Release transport-related resources
+- * @rqstp: request structure with resources to be released
++ * svc_tcp_release_ctxt - Release transport-related resources
++ * @xprt: the transport which owned the context
++ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
+ *
+ */
+-static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
++static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
+ {
+- struct sk_buff *skb = rqstp->rq_xprt_ctxt;
+-
+- if (skb) {
+- struct svc_sock *svsk =
+- container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+-
+- rqstp->rq_xprt_ctxt = NULL;
+- skb_free_datagram_locked(svsk->sk_sk, skb);
+- }
+ }
+
+ /**
+- * svc_udp_release_rqst - Release transport-related resources
+- * @rqstp: request structure with resources to be released
++ * svc_udp_release_ctxt - Release transport-related resources
++ * @xprt: the transport which owned the context
++ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
+ *
+ */
+-static void svc_udp_release_rqst(struct svc_rqst *rqstp)
++static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
+ {
+- struct sk_buff *skb = rqstp->rq_xprt_ctxt;
++ struct sk_buff *skb = ctxt;
+
+- if (skb) {
+- rqstp->rq_xprt_ctxt = NULL;
++ if (skb)
+ consume_skb(skb);
+- }
+ }
+
+ union svc_pktinfo_u {
+@@ -259,8 +250,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
+ ssize_t len;
+ size_t t;
+
+- rqstp->rq_xprt_hlen = 0;
+-
+ clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+
+ for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
+@@ -570,7 +559,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
+ unsigned int sent;
+ int err;
+
+- svc_udp_release_rqst(rqstp);
++ svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
++ rqstp->rq_xprt_ctxt = NULL;
+
+ svc_set_cmsg_data(rqstp, cmh);
+
+@@ -639,7 +629,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
+ .xpo_recvfrom = svc_udp_recvfrom,
+ .xpo_sendto = svc_udp_sendto,
+ .xpo_result_payload = svc_sock_result_payload,
+- .xpo_release_rqst = svc_udp_release_rqst,
++ .xpo_release_ctxt = svc_udp_release_ctxt,
+ .xpo_detach = svc_sock_detach,
+ .xpo_free = svc_sock_free,
+ .xpo_has_wspace = svc_udp_has_wspace,
+@@ -695,12 +685,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
+ {
+ struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+
+- if (svsk) {
+- /* Refer to svc_setup_socket() for details. */
+- rmb();
+- svsk->sk_odata(sk);
+- }
+-
+ /*
+ * This callback may called twice when a new connection
+ * is established as a child socket inherits everything
+@@ -709,13 +693,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
+ * when one of child sockets become ESTABLISHED.
+ * 2) data_ready method of the child socket may be called
+ * when it receives data before the socket is accepted.
+- * In case of 2, we should ignore it silently.
++ * In case of 2, we should ignore it silently and DO NOT
++ * dereference svsk.
+ */
+- if (sk->sk_state == TCP_LISTEN) {
+- if (svsk) {
+- set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
+- svc_xprt_enqueue(&svsk->sk_xprt);
+- }
++ if (sk->sk_state != TCP_LISTEN)
++ return;
++
++ if (svsk) {
++ /* Refer to svc_setup_socket() for details. */
++ rmb();
++ svsk->sk_odata(sk);
++ set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
++ svc_xprt_enqueue(&svsk->sk_xprt);
+ }
+ }
+
+@@ -1096,7 +1085,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+ int ret;
+
+ *sentp = 0;
+- xdr_alloc_bvec(xdr, GFP_KERNEL);
++ ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
++ if (ret < 0)
++ return ret;
+
+ ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
+ if (ret < 0)
+@@ -1165,7 +1156,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
+ unsigned int sent;
+ int err;
+
+- svc_tcp_release_rqst(rqstp);
++ svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
++ rqstp->rq_xprt_ctxt = NULL;
+
+ atomic_inc(&svsk->sk_sendqlen);
+ mutex_lock(&xprt->xpt_mutex);
+@@ -1210,7 +1202,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
+ .xpo_recvfrom = svc_tcp_recvfrom,
+ .xpo_sendto = svc_tcp_sendto,
+ .xpo_result_payload = svc_sock_result_payload,
+- .xpo_release_rqst = svc_tcp_release_rqst,
++ .xpo_release_ctxt = svc_tcp_release_ctxt,
+ .xpo_detach = svc_tcp_sock_detach,
+ .xpo_free = svc_sock_free,
+ .xpo_has_wspace = svc_tcp_has_wspace,
+diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
+index 9a6f17e18f73b..55da1b627a7db 100644
+--- a/net/sunrpc/sysfs.c
++++ b/net/sunrpc/sysfs.c
+@@ -282,8 +282,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
+ int offline = 0, online = 0, remove = 0;
+ struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj);
+
+- if (!xprt)
+- return 0;
++ if (!xprt || !xps) {
++ count = 0;
++ goto out_put;
++ }
+
+ if (!strncmp(buf, "offline", 7))
+ offline = 1;
+@@ -291,8 +293,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
+ online = 1;
+ else if (!strncmp(buf, "remove", 6))
+ remove = 1;
+- else
+- return -EINVAL;
++ else {
++ count = -EINVAL;
++ goto out_put;
++ }
+
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+ count = -EINTR;
+@@ -303,25 +307,28 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
+ goto release_tasks;
+ }
+ if (offline) {
+- set_bit(XPRT_OFFLINE, &xprt->state);
+- spin_lock(&xps->xps_lock);
+- xps->xps_nactive--;
+- spin_unlock(&xps->xps_lock);
++ if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) {
++ spin_lock(&xps->xps_lock);
++ xps->xps_nactive--;
++ spin_unlock(&xps->xps_lock);
++ }
+ } else if (online) {
+- clear_bit(XPRT_OFFLINE, &xprt->state);
+- spin_lock(&xps->xps_lock);
+- xps->xps_nactive++;
+- spin_unlock(&xps->xps_lock);
++ if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) {
++ spin_lock(&xps->xps_lock);
++ xps->xps_nactive++;
++ spin_unlock(&xps->xps_lock);
++ }
+ } else if (remove) {
+ if (test_bit(XPRT_OFFLINE, &xprt->state)) {
+- set_bit(XPRT_REMOVE, &xprt->state);
+- xprt_force_disconnect(xprt);
+- if (test_bit(XPRT_CONNECTED, &xprt->state)) {
+- if (!xprt->sending.qlen &&
+- !xprt->pending.qlen &&
+- !xprt->backlog.qlen &&
+- !atomic_long_read(&xprt->queuelen))
+- rpc_xprt_switch_remove_xprt(xps, xprt);
++ if (!test_and_set_bit(XPRT_REMOVE, &xprt->state)) {
++ xprt_force_disconnect(xprt);
++ if (test_bit(XPRT_CONNECTED, &xprt->state)) {
++ if (!xprt->sending.qlen &&
++ !xprt->pending.qlen &&
++ !xprt->backlog.qlen &&
++ !atomic_long_read(&xprt->queuelen))
++ rpc_xprt_switch_remove_xprt(xps, xprt);
++ }
+ }
+ } else {
+ count = -EINVAL;
+@@ -518,13 +525,16 @@ void rpc_sysfs_client_setup(struct rpc_clnt *clnt,
+ struct net *net)
+ {
+ struct rpc_sysfs_client *rpc_client;
++ struct rpc_sysfs_xprt_switch *xswitch =
++ (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
++
++ if (!xswitch)
++ return;
+
+ rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj,
+ net, clnt->cl_clid);
+ if (rpc_client) {
+ char name[] = "switch";
+- struct rpc_sysfs_xprt_switch *xswitch =
+- (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+ int ret;
+
+ clnt->cl_sysfs = rpc_client;
+@@ -558,6 +568,8 @@ void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch,
+ rpc_xprt_switch->xprt_switch = xprt_switch;
+ rpc_xprt_switch->xprt = xprt;
+ kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD);
++ } else {
++ xprt_switch->xps_sysfs = NULL;
+ }
+ }
+
+@@ -569,6 +581,9 @@ void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch,
+ struct rpc_sysfs_xprt_switch *switch_obj =
+ (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+
++ if (!switch_obj)
++ return;
++
+ rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags);
+ if (rpc_xprt) {
+ xprt->xprt_sysfs = rpc_xprt;
+diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
+index ca10ba2626f27..f0a0a4ad6d525 100644
+--- a/net/sunrpc/xdr.c
++++ b/net/sunrpc/xdr.c
+@@ -979,7 +979,11 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ */
+ xdr->p = (void *)p + frag2bytes;
+ space_left = xdr->buf->buflen - xdr->buf->len;
+- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
++ if (space_left - frag1bytes >= PAGE_SIZE)
++ xdr->end = (void *)p + PAGE_SIZE;
++ else
++ xdr->end = (void *)p + space_left - frag1bytes;
++
+ xdr->buf->page_len += frag2bytes;
+ xdr->buf->len += nbytes;
+ return p;
+diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
+index cfd681700d1a1..2db834318d141 100644
+--- a/net/sunrpc/xprt.c
++++ b/net/sunrpc/xprt.c
+@@ -767,7 +767,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done);
+ */
+ static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt)
+ {
+- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
++ if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state))
++ return;
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+ queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+ else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+@@ -928,12 +929,7 @@ void xprt_connect(struct rpc_task *task)
+ if (!xprt_lock_write(xprt, task))
+ return;
+
+- if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+- trace_xprt_disconnect_cleanup(xprt);
+- xprt->ops->close(xprt);
+- }
+-
+- if (!xprt_connected(xprt)) {
++ if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+ task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
+ rpc_sleep_on_timeout(&xprt->pending, task, NULL,
+ xprt_request_timeout(task->tk_rqstp));
+@@ -1353,17 +1349,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
+ INIT_LIST_HEAD(&req->rq_xmit2);
+ goto out;
+ }
+- } else if (RPC_IS_SWAPPER(task)) {
+- list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+- if (pos->rq_cong || pos->rq_bytes_sent)
+- continue;
+- if (RPC_IS_SWAPPER(pos->rq_task))
+- continue;
+- /* Note: req is added _before_ pos */
+- list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+- INIT_LIST_HEAD(&req->rq_xmit2);
+- goto out;
+- }
+ } else if (!req->rq_seqno) {
+ list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+ if (pos->rq_task->tk_owner != task->tk_owner)
+@@ -1603,15 +1588,14 @@ xprt_transmit(struct rpc_task *task)
+ {
+ struct rpc_rqst *next, *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+- int counter, status;
++ int status;
+
+ spin_lock(&xprt->queue_lock);
+- counter = 0;
+- while (!list_empty(&xprt->xmit_queue)) {
+- if (++counter == 20)
++ for (;;) {
++ next = list_first_entry_or_null(&xprt->xmit_queue,
++ struct rpc_rqst, rq_xmit);
++ if (!next)
+ break;
+- next = list_first_entry(&xprt->xmit_queue,
+- struct rpc_rqst, rq_xmit);
+ xprt_pin_rqst(next);
+ spin_unlock(&xprt->queue_lock);
+ status = xprt_request_transmit(next, task);
+@@ -1619,13 +1603,16 @@ xprt_transmit(struct rpc_task *task)
+ status = 0;
+ spin_lock(&xprt->queue_lock);
+ xprt_unpin_rqst(next);
+- if (status == 0) {
+- if (!xprt_request_data_received(task) ||
+- test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+- continue;
+- } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+- task->tk_status = status;
+- break;
++ if (status < 0) {
++ if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
++ task->tk_status = status;
++ break;
++ }
++ /* Was @task transmitted, and has it received a reply? */
++ if (xprt_request_data_received(task) &&
++ !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
++ break;
++ cond_resched_lock(&xprt->queue_lock);
+ }
+ spin_unlock(&xprt->queue_lock);
+ }
+@@ -1684,12 +1671,15 @@ out:
+ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
+ {
+ struct rpc_rqst *req = ERR_PTR(-EAGAIN);
++ gfp_t gfp_mask = GFP_KERNEL;
+
+ if (xprt->num_reqs >= xprt->max_reqs)
+ goto out;
+ ++xprt->num_reqs;
+ spin_unlock(&xprt->reserve_lock);
+- req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
++ if (current->flags & PF_WQ_WORKER)
++ gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
++ req = kzalloc(sizeof(*req), gfp_mask);
+ spin_lock(&xprt->reserve_lock);
+ if (req != NULL)
+ goto out;
+@@ -2109,7 +2099,14 @@ static void xprt_destroy(struct rpc_xprt *xprt)
+ */
+ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
+
++ /*
++ * xprt_schedule_autodisconnect() can run after XPRT_LOCKED
++ * is cleared. We use ->transport_lock to ensure the mod_timer()
++ * can only run *before* del_time_sync(), never after.
++ */
++ spin_lock(&xprt->transport_lock);
+ del_timer_sync(&xprt->timer);
++ spin_unlock(&xprt->transport_lock);
+
+ /*
+ * Destroy sockets etc from the system workqueue so they can
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index c335c13615645..e9c69e9f42991 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -1120,6 +1120,7 @@ static bool
+ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
+ #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ {
++ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct xdr_stream *xdr = &rep->rr_stream;
+ __be32 *p;
+
+@@ -1143,6 +1144,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
+ if (*p != cpu_to_be32(RPC_CALL))
+ return false;
+
++ /* No bc service. */
++ if (xprt->bc_serv == NULL)
++ return false;
++
+ /* Now that we are sure this is a backchannel call,
+ * advance to the RPC header.
+ */
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+index 6be23ce7a93d2..0377679678f93 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+@@ -239,21 +239,20 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
+ }
+
+ /**
+- * svc_rdma_release_rqst - Release transport-specific per-rqst resources
+- * @rqstp: svc_rqst being released
++ * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
++ * @xprt: the transport which owned the context
++ * @vctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
+ *
+ * Ensure that the recv_ctxt is released whether or not a Reply
+ * was sent. For example, the client could close the connection,
+ * or svc_process could drop an RPC, before the Reply is sent.
+ */
+-void svc_rdma_release_rqst(struct svc_rqst *rqstp)
++void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
+ {
+- struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
+- struct svc_xprt *xprt = rqstp->rq_xprt;
++ struct svc_rdma_recv_ctxt *ctxt = vctxt;
+ struct svcxprt_rdma *rdma =
+ container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
+- rqstp->rq_xprt_ctxt = NULL;
+ if (ctxt)
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
+@@ -792,6 +791,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+ struct svc_rdma_recv_ctxt *ctxt;
+ int ret;
+
++ /* Prevent svc_xprt_release() from releasing pages in rq_pages
++ * when returning 0 or an error.
++ */
++ rqstp->rq_respages = rqstp->rq_pages;
++ rqstp->rq_next_page = rqstp->rq_respages;
++
+ rqstp->rq_xprt_ctxt = NULL;
+
+ ctxt = NULL;
+@@ -815,18 +820,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+ DMA_FROM_DEVICE);
+ svc_rdma_build_arg_xdr(rqstp, ctxt);
+
+- /* Prevent svc_xprt_release from releasing pages in rq_pages
+- * if we return 0 or an error.
+- */
+- rqstp->rq_respages = rqstp->rq_pages;
+- rqstp->rq_next_page = rqstp->rq_respages;
+-
+ ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
+ if (ret < 0)
+ goto out_err;
+ if (ret == 0)
+ goto out_drop;
+- rqstp->rq_xprt_hlen = ret;
+
+ if (svc_rdma_is_reverse_direction_reply(xprt, ctxt))
+ goto out_backchannel;
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
+index e27433f08ca7f..50bf62f851668 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
+@@ -456,10 +456,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
+ unsigned int write_len;
+ u64 offset;
+
+- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
+- if (!seg)
++ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)
+ goto out_overflow;
+
++ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
+ write_len = min(remaining, seg->rs_length - info->wi_seg_off);
+ if (!write_len)
+ goto out_overflow;
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+index 94b20fb471356..f776f0cb471f0 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -81,7 +81,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
+ .xpo_recvfrom = svc_rdma_recvfrom,
+ .xpo_sendto = svc_rdma_sendto,
+ .xpo_result_payload = svc_rdma_result_payload,
+- .xpo_release_rqst = svc_rdma_release_rqst,
++ .xpo_release_ctxt = svc_rdma_release_ctxt,
+ .xpo_detach = svc_rdma_detach,
+ .xpo_free = svc_rdma_free,
+ .xpo_has_wspace = svc_rdma_has_wspace,
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 16e5696314a4f..32df237967472 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -521,7 +521,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
+ return;
+
+ out_sleep:
+- task->tk_status = -EAGAIN;
++ task->tk_status = -ENOMEM;
+ xprt_add_backlog(xprt, task);
+ }
+
+@@ -574,8 +574,10 @@ xprt_rdma_allocate(struct rpc_task *task)
+ gfp_t flags;
+
+ flags = RPCRDMA_DEF_GFP;
++ if (RPC_IS_ASYNC(task))
++ flags = GFP_NOWAIT | __GFP_NOWARN;
+ if (RPC_IS_SWAPPER(task))
+- flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
++ flags |= __GFP_MEMALLOC;
+
+ if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
+ flags))
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index aaec3c9be8db6..41095a278f798 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -438,6 +438,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
+ IB_POLL_WORKQUEUE);
+ if (IS_ERR(ep->re_attr.send_cq)) {
+ rc = PTR_ERR(ep->re_attr.send_cq);
++ ep->re_attr.send_cq = NULL;
+ goto out_destroy;
+ }
+
+@@ -446,6 +447,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
+ IB_POLL_WORKQUEUE);
+ if (IS_ERR(ep->re_attr.recv_cq)) {
+ rc = PTR_ERR(ep->re_attr.recv_cq);
++ ep->re_attr.recv_cq = NULL;
+ goto out_destroy;
+ }
+ ep->re_receive_count = 0;
+@@ -484,6 +486,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
+ ep->re_pd = ib_alloc_pd(device, 0);
+ if (IS_ERR(ep->re_pd)) {
+ rc = PTR_ERR(ep->re_pd);
++ ep->re_pd = NULL;
+ goto out_destroy;
+ }
+
+@@ -855,7 +858,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
+ return req;
+
+ out3:
+- kfree(req->rl_sendbuf);
++ rpcrdma_regbuf_free(req->rl_sendbuf);
+ out2:
+ kfree(req);
+ out1:
+@@ -959,9 +962,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
+ if (!rep->rr_rdmabuf)
+ goto out_free;
+
+- if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
+- goto out_free_regbuf;
+-
+ rep->rr_cid.ci_completion_id =
+ atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
+
+@@ -980,8 +980,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
+ spin_unlock(&buf->rb_lock);
+ return rep;
+
+-out_free_regbuf:
+- rpcrdma_regbuf_free(rep->rr_rdmabuf);
+ out_free:
+ kfree(rep);
+ out:
+@@ -1388,6 +1386,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
+ rep = rpcrdma_rep_create(r_xprt, temp);
+ if (!rep)
+ break;
++ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
++ rpcrdma_rep_put(buf, rep);
++ break;
++ }
+
+ rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
+ trace_xprtrdma_post_recv(rep);
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index 04f1b78bcbca3..bf801adff63db 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -763,12 +763,12 @@ xs_stream_start_connect(struct sock_xprt *transport)
+ /**
+ * xs_nospace - handle transmit was incomplete
+ * @req: pointer to RPC request
++ * @transport: pointer to struct sock_xprt
+ *
+ */
+-static int xs_nospace(struct rpc_rqst *req)
++static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
+ {
+- struct rpc_xprt *xprt = req->rq_xprt;
+- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
++ struct rpc_xprt *xprt = &transport->xprt;
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
+
+@@ -779,25 +779,49 @@ static int xs_nospace(struct rpc_rqst *req)
+
+ /* Don't race with disconnect */
+ if (xprt_connected(xprt)) {
++ struct socket_wq *wq;
++
++ rcu_read_lock();
++ wq = rcu_dereference(sk->sk_wq);
++ set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
++ rcu_read_unlock();
++
+ /* wait for more buffer space */
++ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+ xprt_wait_for_buffer_space(xprt);
+ } else
+ ret = -ENOTCONN;
+
+ spin_unlock(&xprt->transport_lock);
++ return ret;
++}
+
+- /* Race breaker in case memory is freed before above code is called */
+- if (ret == -EAGAIN) {
+- struct socket_wq *wq;
++static int xs_sock_nospace(struct rpc_rqst *req)
++{
++ struct sock_xprt *transport =
++ container_of(req->rq_xprt, struct sock_xprt, xprt);
++ struct sock *sk = transport->inet;
++ int ret = -EAGAIN;
+
+- rcu_read_lock();
+- wq = rcu_dereference(sk->sk_wq);
+- set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
+- rcu_read_unlock();
++ lock_sock(sk);
++ if (!sock_writeable(sk))
++ ret = xs_nospace(req, transport);
++ release_sock(sk);
++ return ret;
++}
+
+- sk->sk_write_space(sk);
+- }
++static int xs_stream_nospace(struct rpc_rqst *req)
++{
++ struct sock_xprt *transport =
++ container_of(req->rq_xprt, struct sock_xprt, xprt);
++ struct sock *sk = transport->inet;
++ int ret = -EAGAIN;
++
++ lock_sock(sk);
++ if (!sk_stream_memory_free(sk))
++ ret = xs_nospace(req, transport);
++ release_sock(sk);
+ return ret;
+ }
+
+@@ -856,7 +880,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
+
+ /* Close the stream if the previous transmission was incomplete */
+ if (xs_send_request_was_aborted(transport, req)) {
+- xs_close(xprt);
++ xprt_force_disconnect(xprt);
+ return -ENOTCONN;
+ }
+
+@@ -887,14 +911,14 @@ static int xs_local_send_request(struct rpc_rqst *req)
+ case -ENOBUFS:
+ break;
+ case -EAGAIN:
+- status = xs_nospace(req);
++ status = xs_stream_nospace(req);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
+ fallthrough;
+ case -EPIPE:
+- xs_close(xprt);
++ xprt_force_disconnect(xprt);
+ status = -ENOTCONN;
+ }
+
+@@ -963,7 +987,7 @@ process_status:
+ /* Should we call xs_close() here? */
+ break;
+ case -EAGAIN:
+- status = xs_nospace(req);
++ status = xs_sock_nospace(req);
+ break;
+ case -ENETUNREACH:
+ case -ENOBUFS:
+@@ -1083,7 +1107,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
+ /* Should we call xs_close() here? */
+ break;
+ case -EAGAIN:
+- status = xs_nospace(req);
++ status = xs_stream_nospace(req);
+ break;
+ case -ECONNRESET:
+ case -ECONNREFUSED:
+@@ -1181,6 +1205,16 @@ static void xs_reset_transport(struct sock_xprt *transport)
+
+ if (sk == NULL)
+ return;
++ /*
++ * Make sure we're calling this in a context from which it is safe
++ * to call __fput_sync(). In practice that means rpciod and the
++ * system workqueue.
++ */
++ if (!(current->flags & PF_WQ_WORKER)) {
++ WARN_ON_ONCE(1);
++ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
++ return;
++ }
+
+ if (atomic_read(&transport->xprt.swapper))
+ sk_clear_memalloc(sk);
+@@ -1204,7 +1238,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
+ mutex_unlock(&transport->recv_mutex);
+
+ trace_rpc_socket_close(xprt, sock);
+- fput(filp);
++ __fput_sync(filp);
+
+ xprt_disconnect_done(xprt);
+ }
+@@ -2106,6 +2140,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
+ switch (skst) {
+ case TCP_FIN_WAIT1:
+ case TCP_FIN_WAIT2:
++ case TCP_LAST_ACK:
+ break;
+ case TCP_ESTABLISHED:
+ case TCP_CLOSE_WAIT:
+@@ -2233,6 +2268,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+ fallthrough;
+ case -EINPROGRESS:
+ /* SYN_SENT! */
++ set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state);
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ break;
+@@ -2258,10 +2294,14 @@ static void xs_tcp_setup_socket(struct work_struct *work)
+ struct rpc_xprt *xprt = &transport->xprt;
+ int status = -EIO;
+
+- if (!sock) {
+- sock = xs_create_sock(xprt, transport,
+- xs_addr(xprt)->sa_family, SOCK_STREAM,
+- IPPROTO_TCP, true);
++ if (xprt_connected(xprt))
++ goto out;
++ if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT,
++ &transport->sock_state) ||
++ !sock) {
++ xs_reset_transport(transport);
++ sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family,
++ SOCK_STREAM, IPPROTO_TCP, true);
+ if (IS_ERR(sock)) {
+ status = PTR_ERR(sock);
+ goto out;
+@@ -2343,11 +2383,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
+
+ if (transport->sock != NULL) {
+ dprintk("RPC: xs_connect delayed xprt %p for %lu "
+- "seconds\n",
+- xprt, xprt->reestablish_timeout / HZ);
+-
+- /* Start by resetting any existing state */
+- xs_reset_transport(transport);
++ "seconds\n", xprt, xprt->reestablish_timeout / HZ);
+
+ delay = xprt_reconnect_delay(xprt);
+ xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
+@@ -2823,9 +2859,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
+ }
+ xprt_set_bound(xprt);
+ xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+- ret = ERR_PTR(xs_local_setup_socket(transport));
+- if (ret)
+- goto out_err;
+ break;
+ default:
+ ret = ERR_PTR(-EAFNOSUPPORT);
+diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
+index 443f8e5b94777..dcbae29aa7e0a 100644
+--- a/net/tipc/bearer.c
++++ b/net/tipc/bearer.c
+@@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name,
+ u32 i;
+
+ if (!bearer_name_validate(name, &b_names)) {
+- errstr = "illegal name";
+ NL_SET_ERR_MSG(extack, "Illegal name");
+- goto rejected;
++ return res;
+ }
+
+ if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
+@@ -352,16 +351,18 @@ static int tipc_enable_bearer(struct net *net, const char *name,
+ goto rejected;
+ }
+
+- test_and_set_bit_lock(0, &b->up);
+- rcu_assign_pointer(tn->bearer_list[bearer_id], b);
+- if (skb)
+- tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+-
++ /* Create monitoring data before accepting activate messages */
+ if (tipc_mon_create(net, bearer_id)) {
+ bearer_disable(net, b);
++ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
++ test_and_set_bit_lock(0, &b->up);
++ rcu_assign_pointer(tn->bearer_list[bearer_id], b);
++ if (skb)
++ tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
++
+ pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
+
+ return res;
+@@ -540,6 +541,19 @@ int tipc_bearer_mtu(struct net *net, u32 bearer_id)
+ return mtu;
+ }
+
++int tipc_bearer_min_mtu(struct net *net, u32 bearer_id)
++{
++ int mtu = TIPC_MIN_BEARER_MTU;
++ struct tipc_bearer *b;
++
++ rcu_read_lock();
++ b = bearer_get(net, bearer_id);
++ if (b)
++ mtu += b->encap_hlen;
++ rcu_read_unlock();
++ return mtu;
++}
++
+ /* tipc_bearer_xmit_skb - sends buffer to destination over bearer
+ */
+ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+@@ -1137,8 +1151,8 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+ return -EINVAL;
+ }
+ #ifdef CONFIG_TIPC_MEDIA_UDP
+- if (tipc_udp_mtu_bad(nla_get_u32
+- (props[TIPC_NLA_PROP_MTU]))) {
++ if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) <
++ b->encap_hlen + TIPC_MIN_BEARER_MTU) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU value is out-of-range");
+ return -EINVAL;
+@@ -1244,7 +1258,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
+ struct tipc_nl_msg msg;
+ struct tipc_media *media;
+ struct sk_buff *rep;
+- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
++ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
+
+ if (!info->attrs[TIPC_NLA_MEDIA])
+ return -EINVAL;
+@@ -1293,7 +1307,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+ int err;
+ char *name;
+ struct tipc_media *m;
+- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
++ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
+
+ if (!info->attrs[TIPC_NLA_MEDIA])
+ return -EINVAL;
+diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
+index 57c6a1a719e24..483f90958857e 100644
+--- a/net/tipc/bearer.h
++++ b/net/tipc/bearer.h
+@@ -146,6 +146,7 @@ struct tipc_media {
+ * @identity: array index of this bearer within TIPC bearer array
+ * @disc: ptr to link setup request
+ * @net_plane: network plane ('A' through 'H') currently associated with bearer
++ * @encap_hlen: encap headers length
+ * @up: bearer up flag (bit 0)
+ * @refcnt: tipc_bearer reference counter
+ *
+@@ -170,6 +171,7 @@ struct tipc_bearer {
+ u32 identity;
+ struct tipc_discoverer *disc;
+ char net_plane;
++ u16 encap_hlen;
+ unsigned long up;
+ refcount_t refcnt;
+ };
+@@ -232,6 +234,7 @@ int tipc_bearer_setup(void);
+ void tipc_bearer_cleanup(void);
+ void tipc_bearer_stop(struct net *net);
+ int tipc_bearer_mtu(struct net *net, u32 bearer_id);
++int tipc_bearer_min_mtu(struct net *net, u32 bearer_id);
+ bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id);
+ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+ struct sk_buff *skb,
+diff --git a/net/tipc/core.c b/net/tipc/core.c
+index 3f4542e0f0650..434e70eabe081 100644
+--- a/net/tipc/core.c
++++ b/net/tipc/core.c
+@@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net)
+ struct tipc_net *tn = tipc_net(net);
+
+ tipc_detach_loopback(net);
++ tipc_net_stop(net);
+ /* Make sure the tipc_net_finalize_work() finished */
+ cancel_work_sync(&tn->work);
+- tipc_net_stop(net);
+-
+ tipc_bcast_stop(net);
+ tipc_nametbl_stop(net);
+ tipc_sk_rht_destroy(net);
+diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
+index dc60c32bb70df..32447e8d94ac9 100644
+--- a/net/tipc/crypto.c
++++ b/net/tipc/crypto.c
+@@ -597,6 +597,10 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
+ tmp->cloned = NULL;
+ tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
+ tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
++ if (!tmp->key) {
++ tipc_aead_free(&tmp->rcu);
++ return -ENOMEM;
++ }
+ memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
+ atomic_set(&tmp->users, 0);
+ atomic64_set(&tmp->seqno, 0);
+@@ -1967,7 +1971,8 @@ rcv:
+
+ skb_reset_network_header(*skb);
+ skb_pull(*skb, tipc_ehdr_size(ehdr));
+- pskb_trim(*skb, (*skb)->len - aead->authsize);
++ if (pskb_trim(*skb, (*skb)->len - aead->authsize))
++ goto free_skb;
+
+ /* Validate TIPCv2 message */
+ if (unlikely(!tipc_msg_validate(skb))) {
+@@ -1978,6 +1983,9 @@ rcv:
+ /* Ok, everything's fine, try to synch own keys according to peers' */
+ tipc_crypto_key_synch(rx, *skb);
+
++ /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */
++ skb_cb = TIPC_SKB_CB(*skb);
++
+ /* Mark skb decrypted */
+ skb_cb->decrypted = 1;
+
+@@ -2283,7 +2291,7 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ struct tipc_aead_key *skey = NULL;
+ u16 key_gen = msg_key_gen(hdr);
+- u16 size = msg_data_sz(hdr);
++ u32 size = msg_data_sz(hdr);
+ u8 *data = msg_data(hdr);
+ unsigned int keylen;
+
+diff --git a/net/tipc/discover.c b/net/tipc/discover.c
+index da69e1abf68ff..e8dcdf267c0c3 100644
+--- a/net/tipc/discover.c
++++ b/net/tipc/discover.c
+@@ -148,8 +148,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+ {
+ struct net *net = d->net;
+ struct tipc_net *tn = tipc_net(net);
+- bool trial = time_before(jiffies, tn->addr_trial_end);
+ u32 self = tipc_own_addr(net);
++ bool trial = time_before(jiffies, tn->addr_trial_end) && !self;
+
+ if (mtyp == DSC_TRIAL_FAIL_MSG) {
+ if (!trial)
+@@ -211,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
+ u32 self;
+ int err;
+
+- skb_linearize(skb);
++ if (skb_linearize(skb)) {
++ kfree_skb(skb);
++ return;
++ }
+ hdr = buf_msg(skb);
+
+ if (caps & TIPC_NODE_ID128)
+diff --git a/net/tipc/link.c b/net/tipc/link.c
+index 1b7a487c88419..655a2e1b6dfe4 100644
+--- a/net/tipc/link.c
++++ b/net/tipc/link.c
+@@ -1298,8 +1298,11 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
+ return false;
+ #ifdef CONFIG_TIPC_CRYPTO
+ case MSG_CRYPTO:
+- tipc_crypto_msg_rcv(l->net, skb);
+- return true;
++ if (TIPC_SKB_CB(skb)->decrypted) {
++ tipc_crypto_msg_rcv(l->net, skb);
++ return true;
++ }
++ fallthrough;
+ #endif
+ default:
+ pr_warn("Dropping received illegal msg type\n");
+@@ -2196,7 +2199,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_gap_ack_blks *ga = NULL;
+ bool reply = msg_probe(hdr), retransmitted = false;
+- u16 dlen = msg_data_sz(hdr), glen = 0;
++ u32 dlen = msg_data_sz(hdr), glen = 0, msg_max;
+ u16 peers_snd_nxt = msg_next_sent(hdr);
+ u16 peers_tol = msg_link_tolerance(hdr);
+ u16 peers_prio = msg_linkprio(hdr);
+@@ -2210,13 +2213,19 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ void *data;
+
+ trace_tipc_proto_rcv(skb, false, l->name);
++
++ if (dlen > U16_MAX)
++ goto exit;
++
+ if (tipc_link_is_blocked(l) || !xmitq)
+ goto exit;
+
+ if (tipc_own_addr(l->net) > msg_prevnode(hdr))
+ l->net_plane = msg_net_plane(hdr);
+
+- skb_linearize(skb);
++ if (skb_linearize(skb))
++ goto exit;
++
+ hdr = buf_msg(skb);
+ data = msg_data(hdr);
+
+@@ -2229,6 +2238,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ switch (mtyp) {
+ case RESET_MSG:
+ case ACTIVATE_MSG:
++ msg_max = msg_max_pkt(hdr);
++ if (msg_max < tipc_bearer_min_mtu(l->net, l->bearer_id))
++ break;
+ /* Complete own link name with peer's interface name */
+ if_name = strrchr(l->name, ':') + 1;
+ if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
+@@ -2273,11 +2285,16 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ l->peer_session = msg_session(hdr);
+ l->in_session = true;
+ l->peer_bearer_id = msg_bearer_id(hdr);
+- if (l->mtu > msg_max_pkt(hdr))
+- l->mtu = msg_max_pkt(hdr);
++ if (l->mtu > msg_max)
++ l->mtu = msg_max;
+ break;
+
+ case STATE_MSG:
++ /* Validate Gap ACK blocks, drop if invalid */
++ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
++ if (glen > dlen)
++ break;
++
+ l->rcv_nxt_state = msg_seqno(hdr) + 1;
+
+ /* Update own tolerance if peer indicates a non-zero value */
+@@ -2303,9 +2320,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ break;
+ }
+
+- /* Receive Gap ACK blocks from peer if any */
+- glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+-
+ tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
+ &l->mon_state, l->bearer_id);
+
+diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
+index 407619697292f..9618e4429f0fe 100644
+--- a/net/tipc/monitor.c
++++ b/net/tipc/monitor.c
+@@ -160,7 +160,7 @@ static void map_set(u64 *up_map, int i, unsigned int v)
+
+ static int map_get(u64 up_map, int i)
+ {
+- return (up_map & (1 << i)) >> i;
++ return (up_map & (1ULL << i)) >> i;
+ }
+
+ static struct tipc_peer *peer_prev(struct tipc_peer *peer)
+@@ -496,6 +496,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ state->probing = false;
+
+ /* Sanity check received domain record */
++ if (new_member_cnt > MAX_MON_DOMAIN)
++ return;
+ if (dlen < dom_rec_len(arrv_dom, 0))
+ return;
+ if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
+diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
+index bda902caa8147..8267b751a526a 100644
+--- a/net/tipc/name_distr.c
++++ b/net/tipc/name_distr.c
+@@ -313,7 +313,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
+ pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
+ ua.sr.type, ua.sr.lower, node);
+ } else {
+- pr_warn("Unrecognized name table message received\n");
++ pr_warn_ratelimited("Unknown name table message received\n");
+ }
+ return false;
+ }
+diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
+index 01396dd1c899b..1d8ba233d0474 100644
+--- a/net/tipc/name_table.c
++++ b/net/tipc/name_table.c
+@@ -967,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
+ list_for_each_entry(p, &sr->all_publ, all_publ)
+ if (p->key == *last_key)
+ break;
+- if (p->key != *last_key)
++ if (list_entry_is_head(p, &sr->all_publ, all_publ))
+ return -EPIPE;
+ } else {
+ p = list_first_entry(&sr->all_publ,
+diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
+index 0749df80454d4..ce00f271ca6b2 100644
+--- a/net/tipc/netlink_compat.c
++++ b/net/tipc/netlink_compat.c
+@@ -880,7 +880,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)
+ };
+
+ ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
+- if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query))
++ if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query))
+ return -EINVAL;
+
+ depth = ntohl(ntq->depth);
+diff --git a/net/tipc/node.c b/net/tipc/node.c
+index 9947b7dfe1d2d..a9c5b6594889b 100644
+--- a/net/tipc/node.c
++++ b/net/tipc/node.c
+@@ -403,7 +403,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
+ u32 flags = n->action_flags;
+ struct list_head *publ_list;
+ struct tipc_uaddr ua;
+- u32 bearer_id;
++ u32 bearer_id, node;
+
+ if (likely(!flags)) {
+ write_unlock_bh(&n->lock);
+@@ -413,7 +413,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ TIPC_LINK_STATE, n->addr, n->addr);
+ sk.ref = n->link_id;
+- sk.node = n->addr;
++ sk.node = tipc_own_addr(net);
++ node = n->addr;
+ bearer_id = n->link_id & 0xffff;
+ publ_list = &n->publ_list;
+
+@@ -423,17 +424,17 @@ static void tipc_node_write_unlock(struct tipc_node *n)
+ write_unlock_bh(&n->lock);
+
+ if (flags & TIPC_NOTIFY_NODE_DOWN)
+- tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
++ tipc_publ_notify(net, publ_list, node, n->capabilities);
+
+ if (flags & TIPC_NOTIFY_NODE_UP)
+- tipc_named_node_up(net, sk.node, n->capabilities);
++ tipc_named_node_up(net, node, n->capabilities);
+
+ if (flags & TIPC_NOTIFY_LINK_UP) {
+- tipc_mon_peer_up(net, sk.node, bearer_id);
++ tipc_mon_peer_up(net, node, bearer_id);
+ tipc_nametbl_publish(net, &ua, &sk, sk.ref);
+ }
+ if (flags & TIPC_NOTIFY_LINK_DOWN) {
+- tipc_mon_peer_down(net, sk.node, bearer_id);
++ tipc_mon_peer_down(net, node, bearer_id);
+ tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
+ }
+ }
+@@ -471,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ bool preliminary)
+ {
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
++ struct tipc_link *l, *snd_l = tipc_bc_sndlink(net);
+ struct tipc_node *n, *temp_node;
+- struct tipc_link *l;
+ unsigned long intv;
+ int bearer_id;
+ int i;
+@@ -487,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ goto exit;
+ /* A preliminary node becomes "real" now, refresh its data */
+ tipc_node_write_lock(n);
++ if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
++ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
++ n->capabilities, &n->bc_entry.inputq1,
++ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
++ pr_warn("Broadcast rcv link refresh failed, no memory\n");
++ tipc_node_write_unlock_fast(n);
++ tipc_node_put(n);
++ n = NULL;
++ goto exit;
++ }
+ n->preliminary = false;
+ n->addr = addr;
+ hlist_del_rcu(&n->hash);
+@@ -566,7 +577,16 @@ update:
+ n->signature = INVALID_NODE_SIG;
+ n->active_links[0] = INVALID_BEARER_ID;
+ n->active_links[1] = INVALID_BEARER_ID;
+- n->bc_entry.link = NULL;
++ if (!preliminary &&
++ !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
++ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
++ n->capabilities, &n->bc_entry.inputq1,
++ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
++ pr_warn("Broadcast rcv link creation failed, no memory\n");
++ tipc_node_put(n);
++ n = NULL;
++ goto exit;
++ }
+ tipc_node_get(n);
+ timer_setup(&n->timer, tipc_node_timeout, 0);
+ /* Start a slow timer anyway, crypto needs it */
+@@ -1154,13 +1174,14 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+ bool *respond, bool *dupl_addr)
+ {
+ struct tipc_node *n;
+- struct tipc_link *l, *snd_l;
++ struct tipc_link *l;
+ struct tipc_link_entry *le;
+ bool addr_match = false;
+ bool sign_match = false;
+ bool link_up = false;
++ bool link_is_reset = false;
+ bool accept_addr = false;
+- bool reset = true;
++ bool reset = false;
+ char *if_name;
+ unsigned long intv;
+ u16 session;
+@@ -1174,36 +1195,20 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+ return;
+
+ tipc_node_write_lock(n);
+- if (unlikely(!n->bc_entry.link)) {
+- snd_l = tipc_bc_sndlink(net);
+- if (!tipc_link_bc_create(net, tipc_own_addr(net),
+- addr, peer_id, U16_MAX,
+- tipc_link_min_win(snd_l),
+- tipc_link_max_win(snd_l),
+- n->capabilities,
+- &n->bc_entry.inputq1,
+- &n->bc_entry.namedq, snd_l,
+- &n->bc_entry.link)) {
+- pr_warn("Broadcast rcv link creation failed, no mem\n");
+- tipc_node_write_unlock_fast(n);
+- tipc_node_put(n);
+- return;
+- }
+- }
+
+ le = &n->links[b->identity];
+
+ /* Prepare to validate requesting node's signature and media address */
+ l = le->link;
+ link_up = l && tipc_link_is_up(l);
++ link_is_reset = l && tipc_link_is_reset(l);
+ addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr));
+ sign_match = (signature == n->signature);
+
+ /* These three flags give us eight permutations: */
+
+ if (sign_match && addr_match && link_up) {
+- /* All is fine. Do nothing. */
+- reset = false;
++ /* All is fine. Ignore requests. */
+ /* Peer node is not a container/local namespace */
+ if (!n->peer_hash_mix)
+ n->peer_hash_mix = hash_mixes;
+@@ -1228,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+ */
+ accept_addr = true;
+ *respond = true;
++ reset = true;
+ } else if (!sign_match && addr_match && link_up) {
+ /* Peer node rebooted. Two possibilities:
+ * - Delayed re-discovery; this link endpoint has already
+@@ -1259,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+ n->signature = signature;
+ accept_addr = true;
+ *respond = true;
++ reset = true;
+ }
+
+ if (!accept_addr)
+@@ -1287,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
+ if (n->state == NODE_FAILINGOVER)
+ tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
++ link_is_reset = tipc_link_is_reset(l);
+ le->link = l;
+ n->link_cnt++;
+ tipc_node_calculate_timer(n, l);
+@@ -1299,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
+ memcpy(&le->maddr, maddr, sizeof(*maddr));
+ exit:
+ tipc_node_write_unlock(n);
+- if (reset && l && !tipc_link_is_reset(l))
++ if (reset && !link_is_reset)
+ tipc_node_link_down(n, b->identity, false);
+ tipc_node_put(n);
+ }
+@@ -1685,6 +1693,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
+ struct tipc_node *n;
+ struct sk_buff_head xmitq;
+ bool node_up = false;
++ struct net *peer_net;
+ int bearer_id;
+ int rc;
+
+@@ -1701,18 +1710,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
+ return -EHOSTUNREACH;
+ }
+
++ rcu_read_lock();
+ tipc_node_read_lock(n);
+ node_up = node_is_up(n);
+- if (node_up && n->peer_net && check_net(n->peer_net)) {
++ peer_net = n->peer_net;
++ tipc_node_read_unlock(n);
++ if (node_up && peer_net && check_net(peer_net)) {
+ /* xmit inner linux container */
+- tipc_lxc_xmit(n->peer_net, list);
++ tipc_lxc_xmit(peer_net, list);
+ if (likely(skb_queue_empty(list))) {
+- tipc_node_read_unlock(n);
++ rcu_read_unlock();
+ tipc_node_put(n);
+ return 0;
+ }
+ }
++ rcu_read_unlock();
+
++ tipc_node_read_lock(n);
+ bearer_id = n->active_links[selector & 1];
+ if (unlikely(bearer_id == INVALID_BEARER_ID)) {
+ tipc_node_read_unlock(n);
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index ad570c2450be8..b34857217fde4 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -313,9 +313,9 @@ static void tsk_rej_rx_queue(struct sock *sk, int error)
+ tipc_sk_respond(sk, skb, error);
+ }
+
+-static bool tipc_sk_connected(struct sock *sk)
++static bool tipc_sk_connected(const struct sock *sk)
+ {
+- return sk->sk_state == TIPC_ESTABLISHED;
++ return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED;
+ }
+
+ /* tipc_sk_type_connectionless - check if the socket is datagram socket
+@@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
+ sock_init_data(sock, sk);
+ tipc_set_sk_state(sk, TIPC_OPEN);
+ if (tipc_sk_insert(tsk)) {
++ sk_free(sk);
+ pr_warn("Socket create failed; port number exhausted\n");
+ return -EINVAL;
+ }
+@@ -516,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
+ timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
+ sk->sk_shutdown = 0;
+ sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
+- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
++ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
+ sk->sk_data_ready = tipc_data_ready;
+ sk->sk_write_space = tipc_write_space;
+ sk->sk_destruct = tipc_sock_destruct;
+@@ -1461,6 +1462,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
+ msg_set_syn(hdr, 1);
+ }
+
++ memset(&skaddr, 0, sizeof(skaddr));
++
+ /* Determine destination */
+ if (atype == TIPC_SERVICE_RANGE) {
+ return tipc_sendmcast(sock, ua, m, dlen, timeout);
+@@ -2850,7 +2853,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
+
+ /* Try again later if dest link is congested */
+ if (tsk->cong_link_cnt) {
+- sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
++ sk_reset_timer(sk, &sk->sk_timer,
++ jiffies + msecs_to_jiffies(100));
+ return;
+ }
+ /* Prepare SYN for retransmit */
+@@ -3747,7 +3751,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
+ if (p->key == *last_publ)
+ break;
+ }
+- if (p->key != *last_publ) {
++ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
+ /* We never set seq or call nl_dump_check_consistent()
+ * this means that setting prev_seq here will cause the
+ * consistence check to fail in the netlink callback
+diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
+index 5522865deae95..e3b427a703980 100644
+--- a/net/tipc/topsrv.c
++++ b/net/tipc/topsrv.c
+@@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con)
+ conn_put(con);
+ }
+
+-static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
++static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock)
+ {
+ struct tipc_conn *con;
+ int ret;
+@@ -202,10 +202,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+ }
+ con->conid = ret;
+ s->idr_in_use++;
+- spin_unlock_bh(&s->idr_lock);
+
+ set_bit(CF_CONNECTED, &con->flags);
+ con->server = s;
++ con->sock = sock;
++ conn_get(con);
++ spin_unlock_bh(&s->idr_lock);
+
+ return con;
+ }
+@@ -450,17 +452,24 @@ static void tipc_conn_data_ready(struct sock *sk)
+ static void tipc_topsrv_accept(struct work_struct *work)
+ {
+ struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
+- struct socket *lsock = srv->listener;
+- struct socket *newsock;
++ struct socket *newsock, *lsock;
+ struct tipc_conn *con;
+ struct sock *newsk;
+ int ret;
+
++ spin_lock_bh(&srv->idr_lock);
++ if (!srv->listener) {
++ spin_unlock_bh(&srv->idr_lock);
++ return;
++ }
++ lsock = srv->listener;
++ spin_unlock_bh(&srv->idr_lock);
++
+ while (1) {
+ ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+ if (ret < 0)
+ return;
+- con = tipc_conn_alloc(srv);
++ con = tipc_conn_alloc(srv, newsock);
+ if (IS_ERR(con)) {
+ ret = PTR_ERR(con);
+ sock_release(newsock);
+@@ -472,11 +481,11 @@ static void tipc_topsrv_accept(struct work_struct *work)
+ newsk->sk_data_ready = tipc_conn_data_ready;
+ newsk->sk_write_space = tipc_conn_write_space;
+ newsk->sk_user_data = con;
+- con->sock = newsock;
+ write_unlock_bh(&newsk->sk_callback_lock);
+
+ /* Wake up receive process in case of 'SYN+' message */
+ newsk->sk_data_ready(newsk);
++ conn_put(con);
+ }
+ }
+
+@@ -489,7 +498,7 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
+
+ read_lock_bh(&sk->sk_callback_lock);
+ srv = sk->sk_user_data;
+- if (srv->listener)
++ if (srv)
+ queue_work(srv->rcv_wq, &srv->awork);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
+@@ -568,19 +577,19 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = filter;
+- *(u32 *)&sub.usr_handle = port;
++ *(u64 *)&sub.usr_handle = (u64)port;
+
+- con = tipc_conn_alloc(tipc_topsrv(net));
++ con = tipc_conn_alloc(tipc_topsrv(net), NULL);
+ if (IS_ERR(con))
+ return false;
+
+ *conid = con->conid;
+- con->sock = NULL;
+ rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+- if (rc >= 0)
+- return true;
++ if (rc)
++ conn_put(con);
++
+ conn_put(con);
+- return false;
++ return !rc;
+ }
+
+ void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+@@ -699,8 +708,9 @@ static void tipc_topsrv_stop(struct net *net)
+ __module_get(lsock->sk->sk_prot_creator->owner);
+ srv->listener = NULL;
+ spin_unlock_bh(&srv->idr_lock);
+- sock_release(lsock);
++
+ tipc_topsrv_work_stop(srv);
++ sock_release(lsock);
+ idr_destroy(&srv->conn_idr);
+ kfree(srv);
+ }
+diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
+index c2bb818704c8f..0a85244fd6188 100644
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -738,8 +738,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
+ udp_conf.local_ip.s_addr = local.ipv4.s_addr;
+ udp_conf.use_udp_checksums = false;
+ ub->ifindex = dev->ifindex;
+- if (tipc_mtu_bad(dev, sizeof(struct iphdr) +
+- sizeof(struct udphdr))) {
++ b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr);
++ if (tipc_mtu_bad(dev, b->encap_hlen)) {
+ err = -EINVAL;
+ goto err;
+ }
+@@ -760,6 +760,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
+ else
+ udp_conf.local_ip6 = local.ipv6;
+ ub->ifindex = dev->ifindex;
++ b->encap_hlen = sizeof(struct ipv6hdr) + sizeof(struct udphdr);
+ b->mtu = 1280;
+ #endif
+ } else {
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index b932469ee69cc..88785196a8966 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -45,14 +45,14 @@
+ */
+ static DECLARE_RWSEM(device_offload_lock);
+
+-static void tls_device_gc_task(struct work_struct *work);
++static struct workqueue_struct *destruct_wq __read_mostly;
+
+-static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task);
+-static LIST_HEAD(tls_device_gc_list);
+ static LIST_HEAD(tls_device_list);
+ static LIST_HEAD(tls_device_down_list);
+ static DEFINE_SPINLOCK(tls_device_lock);
+
++static struct page *dummy_page;
++
+ static void tls_device_free_ctx(struct tls_context *ctx)
+ {
+ if (ctx->tx_conf == TLS_HW) {
+@@ -67,44 +67,44 @@ static void tls_device_free_ctx(struct tls_context *ctx)
+ tls_ctx_free(NULL, ctx);
+ }
+
+-static void tls_device_gc_task(struct work_struct *work)
++static void tls_device_tx_del_task(struct work_struct *work)
+ {
+- struct tls_context *ctx, *tmp;
+- unsigned long flags;
+- LIST_HEAD(gc_list);
+-
+- spin_lock_irqsave(&tls_device_lock, flags);
+- list_splice_init(&tls_device_gc_list, &gc_list);
+- spin_unlock_irqrestore(&tls_device_lock, flags);
+-
+- list_for_each_entry_safe(ctx, tmp, &gc_list, list) {
+- struct net_device *netdev = ctx->netdev;
+-
+- if (netdev && ctx->tx_conf == TLS_HW) {
+- netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+- TLS_OFFLOAD_CTX_DIR_TX);
+- dev_put(netdev);
+- ctx->netdev = NULL;
+- }
++ struct tls_offload_context_tx *offload_ctx =
++ container_of(work, struct tls_offload_context_tx, destruct_work);
++ struct tls_context *ctx = offload_ctx->ctx;
++ struct net_device *netdev = ctx->netdev;
+
+- list_del(&ctx->list);
+- tls_device_free_ctx(ctx);
+- }
++ netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX);
++ dev_put(netdev);
++ ctx->netdev = NULL;
++ tls_device_free_ctx(ctx);
+ }
+
+ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
+ {
+ unsigned long flags;
++ bool async_cleanup;
+
+ spin_lock_irqsave(&tls_device_lock, flags);
+- list_move_tail(&ctx->list, &tls_device_gc_list);
++ if (unlikely(!refcount_dec_and_test(&ctx->refcount))) {
++ spin_unlock_irqrestore(&tls_device_lock, flags);
++ return;
++ }
+
+- /* schedule_work inside the spinlock
+- * to make sure tls_device_down waits for that work.
+- */
+- schedule_work(&tls_device_gc_work);
++ list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */
++ async_cleanup = ctx->netdev && ctx->tx_conf == TLS_HW;
++ if (async_cleanup) {
++ struct tls_offload_context_tx *offload_ctx = tls_offload_ctx_tx(ctx);
+
++ /* queue_work inside the spinlock
++ * to make sure tls_device_down waits for that work.
++ */
++ queue_work(destruct_wq, &offload_ctx->destruct_work);
++ }
+ spin_unlock_irqrestore(&tls_device_lock, flags);
++
++ if (!async_cleanup)
++ tls_device_free_ctx(ctx);
+ }
+
+ /* We assume that the socket is already connected */
+@@ -194,8 +194,7 @@ void tls_device_sk_destruct(struct sock *sk)
+ clean_acked_data_disable(inet_csk(sk));
+ }
+
+- if (refcount_dec_and_test(&tls_ctx->refcount))
+- tls_device_queue_ctx_destruction(tls_ctx);
++ tls_device_queue_ctx_destruction(tls_ctx);
+ }
+ EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
+
+@@ -300,36 +299,33 @@ static int tls_push_record(struct sock *sk,
+ return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
+ }
+
+-static int tls_device_record_close(struct sock *sk,
+- struct tls_context *ctx,
+- struct tls_record_info *record,
+- struct page_frag *pfrag,
+- unsigned char record_type)
++static void tls_device_record_close(struct sock *sk,
++ struct tls_context *ctx,
++ struct tls_record_info *record,
++ struct page_frag *pfrag,
++ unsigned char record_type)
+ {
+ struct tls_prot_info *prot = &ctx->prot_info;
+- int ret;
++ struct page_frag dummy_tag_frag;
+
+ /* append tag
+ * device will fill in the tag, we just need to append a placeholder
+ * use socket memory to improve coalescing (re-using a single buffer
+ * increases frag count)
+- * if we can't allocate memory now, steal some back from data
++ * if we can't allocate memory now use the dummy page
+ */
+- if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
+- sk->sk_allocation))) {
+- ret = 0;
+- tls_append_frag(record, pfrag, prot->tag_size);
+- } else {
+- ret = prot->tag_size;
+- if (record->len <= prot->overhead_size)
+- return -ENOMEM;
++ if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
++ !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
++ dummy_tag_frag.page = dummy_page;
++ dummy_tag_frag.offset = 0;
++ pfrag = &dummy_tag_frag;
+ }
++ tls_append_frag(record, pfrag, prot->tag_size);
+
+ /* fill prepend */
+ tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
+ record->len - prot->overhead_size,
+ record_type);
+- return ret;
+ }
+
+ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
+@@ -483,11 +479,13 @@ handle_error:
+ copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
+ copy = min_t(size_t, copy, (max_open_record_len - record->len));
+
+- rc = tls_device_copy_data(page_address(pfrag->page) +
+- pfrag->offset, copy, msg_iter);
+- if (rc)
+- goto handle_error;
+- tls_append_frag(record, pfrag, copy);
++ if (copy) {
++ rc = tls_device_copy_data(page_address(pfrag->page) +
++ pfrag->offset, copy, msg_iter);
++ if (rc)
++ goto handle_error;
++ tls_append_frag(record, pfrag, copy);
++ }
+
+ size -= copy;
+ if (!size) {
+@@ -503,18 +501,8 @@ last_record:
+
+ if (done || record->len >= max_open_record_len ||
+ (record->num_frags >= MAX_SKB_FRAGS - 1)) {
+- rc = tls_device_record_close(sk, tls_ctx, record,
+- pfrag, record_type);
+- if (rc) {
+- if (rc > 0) {
+- size += rc;
+- } else {
+- size = orig_size;
+- destroy_record(record);
+- ctx->open_record = NULL;
+- break;
+- }
+- }
++ tls_device_record_close(sk, tls_ctx, record,
++ pfrag, record_type);
+
+ rc = tls_push_record(sk,
+ tls_ctx,
+@@ -1101,6 +1089,9 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+ start_marker_record->len = 0;
+ start_marker_record->num_frags = 0;
+
++ INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task);
++ offload_ctx->ctx = ctx;
++
+ INIT_LIST_HEAD(&offload_ctx->records_list);
+ list_add_tail(&start_marker_record->list, &offload_ctx->records_list);
+ spin_lock_init(&offload_ctx->lock);
+@@ -1345,12 +1336,20 @@ static int tls_device_down(struct net_device *netdev)
+
+ /* Device contexts for RX and TX will be freed in on sk_destruct
+ * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
++ * Now release the ref taken above.
+ */
++ if (refcount_dec_and_test(&ctx->refcount)) {
++ /* sk_destruct ran after tls_device_down took a ref, and
++ * it returned early. Complete the destruction here.
++ */
++ list_del(&ctx->list);
++ tls_device_free_ctx(ctx);
++ }
+ }
+
+ up_write(&device_offload_lock);
+
+- flush_work(&tls_device_gc_work);
++ flush_workqueue(destruct_wq);
+
+ return NOTIFY_DONE;
+ }
+@@ -1389,14 +1388,38 @@ static struct notifier_block tls_dev_notifier = {
+ .notifier_call = tls_dev_event,
+ };
+
+-void __init tls_device_init(void)
++int __init tls_device_init(void)
+ {
+- register_netdevice_notifier(&tls_dev_notifier);
++ int err;
++
++ dummy_page = alloc_page(GFP_KERNEL);
++ if (!dummy_page)
++ return -ENOMEM;
++
++ destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
++ if (!destruct_wq) {
++ err = -ENOMEM;
++ goto err_free_dummy;
++ }
++
++ err = register_netdevice_notifier(&tls_dev_notifier);
++ if (err)
++ goto err_destroy_wq;
++
++ return 0;
++
++err_destroy_wq:
++ destroy_workqueue(destruct_wq);
++err_free_dummy:
++ put_page(dummy_page);
++ return err;
+ }
+
+ void __exit tls_device_cleanup(void)
+ {
+ unregister_netdevice_notifier(&tls_dev_notifier);
+- flush_work(&tls_device_gc_work);
++ flush_workqueue(destruct_wq);
++ destroy_workqueue(destruct_wq);
+ clean_acked_data_flush();
++ put_page(dummy_page);
+ }
+diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
+index 9ab81db8a6545..20b8ba4d1dfc4 100644
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -61,7 +61,7 @@ static DEFINE_MUTEX(tcpv6_prot_mutex);
+ static const struct proto *saved_tcpv4_prot;
+ static DEFINE_MUTEX(tcpv4_prot_mutex);
+ static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
+-static struct proto_ops tls_sw_proto_ops;
++static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
+ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+ const struct proto *base);
+
+@@ -71,6 +71,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+
+ WRITE_ONCE(sk->sk_prot,
+ &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]);
++ WRITE_ONCE(sk->sk_socket->ops,
++ &tls_proto_ops[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+ }
+
+ int wait_on_pending_writer(struct sock *sk, long *timeo)
+@@ -90,7 +92,8 @@ int wait_on_pending_writer(struct sock *sk, long *timeo)
+ break;
+ }
+
+- if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait))
++ if (sk_wait_event(sk, timeo,
++ !READ_ONCE(sk->sk_write_pending), &wait))
+ break;
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+@@ -384,13 +387,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
+ rc = -EINVAL;
+ goto out;
+ }
+- lock_sock(sk);
+ memcpy(crypto_info_aes_gcm_128->iv,
+ cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+ TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+- release_sock(sk);
+ if (copy_to_user(optval,
+ crypto_info_aes_gcm_128,
+ sizeof(*crypto_info_aes_gcm_128)))
+@@ -408,13 +409,11 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
+ rc = -EINVAL;
+ goto out;
+ }
+- lock_sock(sk);
+ memcpy(crypto_info_aes_gcm_256->iv,
+ cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
+ TLS_CIPHER_AES_GCM_256_IV_SIZE);
+ memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
+- release_sock(sk);
+ if (copy_to_user(optval,
+ crypto_info_aes_gcm_256,
+ sizeof(*crypto_info_aes_gcm_256)))
+@@ -434,6 +433,8 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
+ {
+ int rc = 0;
+
++ lock_sock(sk);
++
+ switch (optname) {
+ case TLS_TX:
+ case TLS_RX:
+@@ -444,6 +445,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
+ rc = -ENOPROTOOPT;
+ break;
+ }
++
++ release_sock(sk);
++
+ return rc;
+ }
+
+@@ -581,8 +585,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
+ if (tx) {
+ ctx->sk_write_space = sk->sk_write_space;
+ sk->sk_write_space = tls_write_space;
+- } else {
+- sk->sk_socket->ops = &tls_sw_proto_ops;
+ }
+ goto out;
+
+@@ -640,6 +642,39 @@ struct tls_context *tls_ctx_create(struct sock *sk)
+ return ctx;
+ }
+
++static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
++ const struct proto_ops *base)
++{
++ ops[TLS_BASE][TLS_BASE] = *base;
++
++ ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
++ ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked;
++
++ ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE];
++ ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read;
++
++ ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE];
++ ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read;
++
++#ifdef CONFIG_TLS_DEVICE
++ ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
++ ops[TLS_HW ][TLS_BASE].sendpage_locked = NULL;
++
++ ops[TLS_HW ][TLS_SW ] = ops[TLS_BASE][TLS_SW ];
++ ops[TLS_HW ][TLS_SW ].sendpage_locked = NULL;
++
++ ops[TLS_BASE][TLS_HW ] = ops[TLS_BASE][TLS_SW ];
++
++ ops[TLS_SW ][TLS_HW ] = ops[TLS_SW ][TLS_SW ];
++
++ ops[TLS_HW ][TLS_HW ] = ops[TLS_HW ][TLS_SW ];
++ ops[TLS_HW ][TLS_HW ].sendpage_locked = NULL;
++#endif
++#ifdef CONFIG_TLS_TOE
++ ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
++#endif
++}
++
+ static void tls_build_proto(struct sock *sk)
+ {
+ int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+@@ -651,6 +686,8 @@ static void tls_build_proto(struct sock *sk)
+ mutex_lock(&tcpv6_prot_mutex);
+ if (likely(prot != saved_tcpv6_prot)) {
+ build_protos(tls_prots[TLSV6], prot);
++ build_proto_ops(tls_proto_ops[TLSV6],
++ sk->sk_socket->ops);
+ smp_store_release(&saved_tcpv6_prot, prot);
+ }
+ mutex_unlock(&tcpv6_prot_mutex);
+@@ -661,6 +698,8 @@ static void tls_build_proto(struct sock *sk)
+ mutex_lock(&tcpv4_prot_mutex);
+ if (likely(prot != saved_tcpv4_prot)) {
+ build_protos(tls_prots[TLSV4], prot);
++ build_proto_ops(tls_proto_ops[TLSV4],
++ sk->sk_socket->ops);
+ smp_store_release(&saved_tcpv4_prot, prot);
+ }
+ mutex_unlock(&tcpv4_prot_mutex);
+@@ -753,6 +792,8 @@ static void tls_update(struct sock *sk, struct proto *p,
+ {
+ struct tls_context *ctx;
+
++ WARN_ON_ONCE(sk->sk_prot == p);
++
+ ctx = tls_get_ctx(sk);
+ if (likely(ctx)) {
+ ctx->sk_write_space = write_space;
+@@ -871,11 +912,12 @@ static int __init tls_register(void)
+ if (err)
+ return err;
+
+- tls_sw_proto_ops = inet_stream_ops;
+- tls_sw_proto_ops.splice_read = tls_sw_splice_read;
+- tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked;
++ err = tls_device_init();
++ if (err) {
++ unregister_pernet_subsys(&tls_proc_ops);
++ return err;
++ }
+
+- tls_device_init();
+ tcp_register_ulp(&tcp_tls_ulp_ops);
+
+ return 0;
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 1b08b877a8900..101d231c1b610 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -515,7 +515,7 @@ static int tls_do_encryption(struct sock *sk,
+ memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv,
+ prot->iv_size + prot->salt_size);
+
+- xor_iv_with_seq(prot, rec->iv_data, tls_ctx->tx.rec_seq);
++ xor_iv_with_seq(prot, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq);
+
+ sge->offset += prot->prepend_size;
+ sge->length -= prot->prepend_size;
+@@ -801,7 +801,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
+ struct sk_psock *psock;
+ struct sock *sk_redir;
+ struct tls_rec *rec;
+- bool enospc, policy;
++ bool enospc, policy, redir_ingress;
+ int err = 0, send;
+ u32 delta = 0;
+
+@@ -809,7 +809,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
+ psock = sk_psock_get(sk);
+ if (!psock || !policy) {
+ err = tls_push_record(sk, flags, record_type);
+- if (err && sk->sk_err == EBADMSG) {
++ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
+ *copied -= sk_msg_free(sk, msg);
+ tls_free_open_rec(sk);
+ err = -sk->sk_err;
+@@ -838,7 +838,7 @@ more_data:
+ switch (psock->eval) {
+ case __SK_PASS:
+ err = tls_push_record(sk, flags, record_type);
+- if (err && sk->sk_err == EBADMSG) {
++ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
+ *copied -= sk_msg_free(sk, msg);
+ tls_free_open_rec(sk);
+ err = -sk->sk_err;
+@@ -846,6 +846,7 @@ more_data:
+ }
+ break;
+ case __SK_REDIRECT:
++ redir_ingress = psock->redir_ingress;
+ sk_redir = psock->sk_redir;
+ memcpy(&msg_redir, msg, sizeof(*msg));
+ if (msg->apply_bytes < send)
+@@ -855,7 +856,8 @@ more_data:
+ sk_msg_return_zero(sk, msg, send);
+ msg->sg.size -= send;
+ release_sock(sk);
+- err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
++ err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
++ &msg_redir, send, flags);
+ lock_sock(sk);
+ if (err < 0) {
+ *copied -= sk_msg_free_nocharge(sk, &msg_redir);
+@@ -948,7 +950,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+ MSG_CMSG_COMPAT))
+ return -EOPNOTSUPP;
+
+- mutex_lock(&tls_ctx->tx_lock);
++ ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
++ if (ret)
++ return ret;
+ lock_sock(sk);
+
+ if (unlikely(msg->msg_controllen)) {
+@@ -1282,7 +1286,9 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
+ MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+ return -EOPNOTSUPP;
+
+- mutex_lock(&tls_ctx->tx_lock);
++ ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
++ if (ret)
++ return ret;
+ lock_sock(sk);
+ ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
+ release_sock(sk);
+@@ -1483,11 +1489,11 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+ if (prot->version == TLS_1_3_VERSION ||
+ prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305)
+ memcpy(iv + iv_offset, tls_ctx->rx.iv,
+- crypto_aead_ivsize(ctx->aead_recv));
++ prot->iv_size + prot->salt_size);
+ else
+ memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
+
+- xor_iv_with_seq(prot, iv, tls_ctx->rx.rec_seq);
++ xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq);
+
+ /* Prepare AAD */
+ tls_make_aad(aad, rxm->full_len - prot->overhead_size +
+@@ -1993,6 +1999,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ ssize_t copied = 0;
++ bool from_queue;
+ int err = 0;
+ long timeo;
+ int chunk;
+@@ -2002,25 +2009,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+
+ timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+
+- skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err);
+- if (!skb)
+- goto splice_read_end;
+-
+- if (!ctx->decrypted) {
+- err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
+-
+- /* splice does not support reading control messages */
+- if (ctx->control != TLS_RECORD_TYPE_DATA) {
+- err = -EINVAL;
++ from_queue = !skb_queue_empty(&ctx->rx_list);
++ if (from_queue) {
++ skb = __skb_dequeue(&ctx->rx_list);
++ } else {
++ skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
++ &err);
++ if (!skb)
+ goto splice_read_end;
+- }
+
++ err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
+ if (err < 0) {
+ tls_err_abort(sk, -EBADMSG);
+ goto splice_read_end;
+ }
+- ctx->decrypted = 1;
+ }
++
++ /* splice does not support reading control messages */
++ if (ctx->control != TLS_RECORD_TYPE_DATA) {
++ err = -EINVAL;
++ goto splice_read_end;
++ }
++
+ rxm = strp_msg(skb);
+
+ chunk = min_t(unsigned int, rxm->full_len, len);
+@@ -2028,7 +2038,17 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+ if (copied < 0)
+ goto splice_read_end;
+
+- tls_sw_advance_skb(sk, skb, copied);
++ if (!from_queue) {
++ ctx->recv_pkt = NULL;
++ __strp_unpause(&ctx->strp);
++ }
++ if (chunk < rxm->full_len) {
++ __skb_queue_head(&ctx->rx_list, skb);
++ rxm->offset += len;
++ rxm->full_len -= len;
++ } else {
++ consume_skb(skb);
++ }
+
+ splice_read_end:
+ release_sock(sk);
+@@ -2268,11 +2288,19 @@ static void tx_work_handler(struct work_struct *work)
+
+ if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+ return;
+- mutex_lock(&tls_ctx->tx_lock);
+- lock_sock(sk);
+- tls_tx_records(sk, -1);
+- release_sock(sk);
+- mutex_unlock(&tls_ctx->tx_lock);
++
++ if (mutex_trylock(&tls_ctx->tx_lock)) {
++ lock_sock(sk);
++ tls_tx_records(sk, -1);
++ release_sock(sk);
++ mutex_unlock(&tls_ctx->tx_lock);
++ } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
++ /* Someone is holding the tx_lock, they will likely run Tx
++ * and cancel the work on their way out of the lock section.
++ * Schedule a long delay just in case.
++ */
++ schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10));
++ }
+ }
+
+ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 78e08e82c08c4..748769f4ba058 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -446,7 +446,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+ * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
+ * to other and its full, we will hang waiting for POLLOUT.
+ */
+- if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
++ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
+ return 1;
+
+ if (connected)
+@@ -504,12 +504,6 @@ static void unix_sock_destructor(struct sock *sk)
+
+ skb_queue_purge(&sk->sk_receive_queue);
+
+-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+- if (u->oob_skb) {
+- kfree_skb(u->oob_skb);
+- u->oob_skb = NULL;
+- }
+-#endif
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
+ WARN_ON(!sk_unhashed(sk));
+ WARN_ON(sk->sk_socket);
+@@ -544,7 +538,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
+ /* Clear state */
+ unix_state_lock(sk);
+ sock_orphan(sk);
+- sk->sk_shutdown = SHUTDOWN_MASK;
++ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
+ path = u->path;
+ u->path.dentry = NULL;
+ u->path.mnt = NULL;
+@@ -556,13 +550,20 @@ static void unix_release_sock(struct sock *sk, int embrion)
+
+ unix_state_unlock(sk);
+
++#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
++ if (u->oob_skb) {
++ kfree_skb(u->oob_skb);
++ u->oob_skb = NULL;
++ }
++#endif
++
+ wake_up_interruptible_all(&u->peer_wait);
+
+ if (skpair != NULL) {
+ if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
+ unix_state_lock(skpair);
+ /* No more writes */
+- skpair->sk_shutdown = SHUTDOWN_MASK;
++ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
+ if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
+ skpair->sk_err = ECONNRESET;
+ unix_state_unlock(skpair);
+@@ -602,7 +603,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
+ * What the above comment does talk about? --ANK(980817)
+ */
+
+- if (unix_tot_inflight)
++ if (READ_ONCE(unix_tot_inflight))
+ unix_gc(); /* Garbage collect fds */
+ }
+
+@@ -716,7 +717,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
+ if (mutex_lock_interruptible(&u->iolock))
+ return -EINTR;
+
+- sk->sk_peek_off = val;
++ WRITE_ONCE(sk->sk_peek_off, val);
+ mutex_unlock(&u->iolock);
+
+ return 0;
+@@ -1306,7 +1307,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
+
+ sched = !sock_flag(other, SOCK_DEAD) &&
+ !(other->sk_shutdown & RCV_SHUTDOWN) &&
+- unix_recvq_full(other);
++ unix_recvq_full_lockless(other);
+
+ unix_state_unlock(other);
+
+@@ -1864,13 +1865,20 @@ restart_locked:
+ unix_state_lock(sk);
+
+ err = 0;
+- if (unix_peer(sk) == other) {
++ if (sk->sk_type == SOCK_SEQPACKET) {
++ /* We are here only when racing with unix_release_sock()
++ * is clearing @other. Never change state to TCP_CLOSE
++ * unlike SOCK_DGRAM wants.
++ */
++ unix_state_unlock(sk);
++ err = -EPIPE;
++ } else if (unix_peer(sk) == other) {
+ unix_peer(sk) = NULL;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
++ sk->sk_state = TCP_CLOSE;
+ unix_state_unlock(sk);
+
+- sk->sk_state = TCP_CLOSE;
+ unix_dgram_disconnected(sk, other);
+ sock_put(other);
+ err = -ECONNREFUSED;
+@@ -1961,8 +1969,9 @@ out:
+ */
+ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
+
+-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+-static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
++#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
++static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
++ struct scm_cookie *scm, bool fds_sent)
+ {
+ struct unix_sock *ousk = unix_sk(other);
+ struct sk_buff *skb;
+@@ -1973,6 +1982,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
+ if (!skb)
+ return err;
+
++ err = unix_scm_to_skb(scm, skb, !fds_sent);
++ if (err < 0) {
++ kfree_skb(skb);
++ return err;
++ }
+ skb_put(skb, 1);
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
+
+@@ -1996,7 +2010,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
+ if (ousk->oob_skb)
+ consume_skb(ousk->oob_skb);
+
+- ousk->oob_skb = skb;
++ WRITE_ONCE(ousk->oob_skb, skb);
+
+ scm_stat_add(other, skb);
+ skb_queue_tail(&other->sk_receive_queue, skb);
+@@ -2027,7 +2041,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+
+ err = -EOPNOTSUPP;
+ if (msg->msg_flags & MSG_OOB) {
+-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
++#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (len)
+ len--;
+ else
+@@ -2098,9 +2112,9 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+ sent += size;
+ }
+
+-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
++#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (msg->msg_flags & MSG_OOB) {
+- err = queue_oob(sock, msg, other);
++ err = queue_oob(sock, msg, other, &scm, fds_sent);
+ if (err)
+ goto out_err;
+ sent++;
+@@ -2142,6 +2156,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
+
+ if (false) {
+ alloc_skb:
++ spin_unlock(&other->sk_receive_queue.lock);
+ unix_state_unlock(other);
+ mutex_unlock(&unix_sk(other)->iolock);
+ newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
+@@ -2181,6 +2196,7 @@ alloc_skb:
+ init_scm = false;
+ }
+
++ spin_lock(&other->sk_receive_queue.lock);
+ skb = skb_peek_tail(&other->sk_receive_queue);
+ if (tail && tail == skb) {
+ skb = newskb;
+@@ -2211,14 +2227,11 @@ alloc_skb:
+ refcount_add(size, &sk->sk_wmem_alloc);
+
+ if (newskb) {
+- err = unix_scm_to_skb(&scm, skb, false);
+- if (err)
+- goto err_state_unlock;
+- spin_lock(&other->sk_receive_queue.lock);
++ unix_scm_to_skb(&scm, skb, false);
+ __skb_queue_tail(&other->sk_receive_queue, newskb);
+- spin_unlock(&other->sk_receive_queue.lock);
+ }
+
++ spin_unlock(&other->sk_receive_queue.lock);
+ unix_state_unlock(other);
+ mutex_unlock(&unix_sk(other)->iolock);
+
+@@ -2514,9 +2527,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
+
+ oob_skb = u->oob_skb;
+
+- if (!(state->flags & MSG_PEEK)) {
+- u->oob_skb = NULL;
+- }
++ if (!(state->flags & MSG_PEEK))
++ WRITE_ONCE(u->oob_skb, NULL);
+
+ unix_state_unlock(sk);
+
+@@ -2551,7 +2563,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+ skb = NULL;
+ } else if (sock_flag(sk, SOCK_URGINLINE)) {
+ if (!(flags & MSG_PEEK)) {
+- u->oob_skb = NULL;
++ WRITE_ONCE(u->oob_skb, NULL);
+ consume_skb(skb);
+ }
+ } else if (!(flags & MSG_PEEK)) {
+@@ -2881,10 +2893,7 @@ static int unix_shutdown(struct socket *sock, int mode)
+ ++mode;
+
+ unix_state_lock(sk);
+- sk->sk_shutdown |= mode;
+- if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+- mode == SHUTDOWN_MASK)
+- sk->sk_state = TCP_CLOSE;
++ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
+ other = unix_peer(sk);
+ if (other)
+ sock_hold(other);
+@@ -2904,7 +2913,7 @@ static int unix_shutdown(struct socket *sock, int mode)
+ if (mode&SEND_SHUTDOWN)
+ peer_mode |= RCV_SHUTDOWN;
+ unix_state_lock(other);
+- other->sk_shutdown |= peer_mode;
++ WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
+ unix_state_unlock(other);
+ other->sk_state_change(other);
+ if (peer_mode == SHUTDOWN_MASK)
+@@ -3009,11 +3018,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+ case SIOCATMARK:
+ {
+ struct sk_buff *skb;
+- struct unix_sock *u = unix_sk(sk);
+ int answ = 0;
+
+ skb = skb_peek(&sk->sk_receive_queue);
+- if (skb && skb == u->oob_skb)
++ if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
+ answ = 1;
+ err = put_user(answ, (int __user *)arg);
+ }
+@@ -3037,16 +3045,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
+ {
+ struct sock *sk = sock->sk;
+ __poll_t mask;
++ u8 shutdown;
+
+ sock_poll_wait(file, sock, wait);
+ mask = 0;
++ shutdown = READ_ONCE(sk->sk_shutdown);
+
+ /* exceptional events? */
+ if (sk->sk_err)
+ mask |= EPOLLERR;
+- if (sk->sk_shutdown == SHUTDOWN_MASK)
++ if (shutdown == SHUTDOWN_MASK)
+ mask |= EPOLLHUP;
+- if (sk->sk_shutdown & RCV_SHUTDOWN)
++ if (shutdown & RCV_SHUTDOWN)
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
+
+ /* readable? */
+@@ -3054,6 +3064,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
+ mask |= EPOLLIN | EPOLLRDNORM;
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
++#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
++ if (READ_ONCE(unix_sk(sk)->oob_skb))
++ mask |= EPOLLPRI;
++#endif
+
+ /* Connection-based need to check for termination and startup */
+ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+@@ -3076,18 +3090,20 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
+ struct sock *sk = sock->sk, *other;
+ unsigned int writable;
+ __poll_t mask;
++ u8 shutdown;
+
+ sock_poll_wait(file, sock, wait);
+ mask = 0;
++ shutdown = READ_ONCE(sk->sk_shutdown);
+
+ /* exceptional events? */
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+
+- if (sk->sk_shutdown & RCV_SHUTDOWN)
++ if (shutdown & RCV_SHUTDOWN)
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
+- if (sk->sk_shutdown == SHUTDOWN_MASK)
++ if (shutdown == SHUTDOWN_MASK)
+ mask |= EPOLLHUP;
+
+ /* readable? */
+@@ -3401,6 +3417,7 @@ static int __init af_unix_init(void)
+ rc = proto_register(&unix_stream_proto, 1);
+ if (rc != 0) {
+ pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
++ proto_unregister(&unix_dgram_proto);
+ goto out;
+ }
+
+diff --git a/net/unix/diag.c b/net/unix/diag.c
+index 7e7d7f45685af..e534e327a6a5a 100644
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -113,14 +113,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
+ return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
+ }
+
+-static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb)
++static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb,
++ struct user_namespace *user_ns)
+ {
+- uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk));
++ uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk));
+ return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
+ }
+
+ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
+- u32 portid, u32 seq, u32 flags, int sk_ino)
++ struct user_namespace *user_ns,
++ u32 portid, u32 seq, u32 flags, int sk_ino)
+ {
+ struct nlmsghdr *nlh;
+ struct unix_diag_msg *rep;
+@@ -166,7 +168,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
+ goto out_nlmsg_trim;
+
+ if ((req->udiag_show & UDIAG_SHOW_UID) &&
+- sk_diag_dump_uid(sk, skb))
++ sk_diag_dump_uid(sk, skb, user_ns))
+ goto out_nlmsg_trim;
+
+ nlmsg_end(skb, nlh);
+@@ -178,7 +180,8 @@ out_nlmsg_trim:
+ }
+
+ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
+- u32 portid, u32 seq, u32 flags)
++ struct user_namespace *user_ns,
++ u32 portid, u32 seq, u32 flags)
+ {
+ int sk_ino;
+
+@@ -189,7 +192,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
+ if (!sk_ino)
+ return 0;
+
+- return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino);
++ return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino);
+ }
+
+ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+@@ -217,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ goto next;
+ if (!(req->udiag_states & (1 << sk->sk_state)))
+ goto next;
+- if (sk_diag_dump(sk, skb, req,
++ if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+@@ -285,7 +288,8 @@ again:
+ if (!rep)
+ goto out;
+
+- err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid,
++ err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk),
++ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0, req->udiag_ino);
+ if (err < 0) {
+ nlmsg_free(rep);
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index 12e2ddaf887f2..dc27635403932 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -192,8 +192,11 @@ void wait_for_unix_gc(void)
+ {
+ /* If number of inflight sockets is insane,
+ * force a garbage collect right now.
++ * Paired with the WRITE_ONCE() in unix_inflight(),
++ * unix_notinflight() and gc_in_progress().
+ */
+- if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
++ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
++ !READ_ONCE(gc_in_progress))
+ unix_gc();
+ wait_event(unix_gc_wait, gc_in_progress == false);
+ }
+@@ -201,6 +204,7 @@ void wait_for_unix_gc(void)
+ /* The external entry point: unix_gc() */
+ void unix_gc(void)
+ {
++ struct sk_buff *next_skb, *skb;
+ struct unix_sock *u;
+ struct unix_sock *next;
+ struct sk_buff_head hitlist;
+@@ -213,7 +217,9 @@ void unix_gc(void)
+ if (gc_in_progress)
+ goto out;
+
+- gc_in_progress = true;
++ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
++ WRITE_ONCE(gc_in_progress, true);
++
+ /* First, select candidates for garbage collection. Only
+ * in-flight sockets are considered, and from those only ones
+ * which don't have any external reference.
+@@ -292,14 +298,36 @@ void unix_gc(void)
+
+ spin_unlock(&unix_gc_lock);
+
++ /* We need io_uring to clean its registered files, ignore all io_uring
++ * originated skbs. It's fine as io_uring doesn't keep references to
++ * other io_uring instances and so killing all other files in the cycle
++ * will put all io_uring references forcing it to go through normal
++ * release.path eventually putting registered files.
++ */
++ skb_queue_walk_safe(&hitlist, skb, next_skb) {
++ if (skb->scm_io_uring) {
++ __skb_unlink(skb, &hitlist);
++ skb_queue_tail(&skb->sk->sk_receive_queue, skb);
++ }
++ }
++
+ /* Here we are. Hitlist is filled. Die. */
+ __skb_queue_purge(&hitlist);
+
+ spin_lock(&unix_gc_lock);
+
++ /* There could be io_uring registered files, just push them back to
++ * the inflight list
++ */
++ list_for_each_entry_safe(u, next, &gc_candidates, link)
++ list_move_tail(&u->link, &gc_inflight_list);
++
+ /* All candidates should have been detached by now. */
+ BUG_ON(!list_empty(&gc_candidates));
+- gc_in_progress = false;
++
++ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
++ WRITE_ONCE(gc_in_progress, false);
++
+ wake_up(&unix_gc_wait);
+
+ out:
+diff --git a/net/unix/scm.c b/net/unix/scm.c
+index 052ae709ce289..e8e2a00bb0f58 100644
+--- a/net/unix/scm.c
++++ b/net/unix/scm.c
+@@ -60,9 +60,10 @@ void unix_inflight(struct user_struct *user, struct file *fp)
+ } else {
+ BUG_ON(list_empty(&u->link));
+ }
+- unix_tot_inflight++;
++ /* Paired with READ_ONCE() in wait_for_unix_gc() */
++ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+ }
+- user->unix_inflight++;
++ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
+ spin_unlock(&unix_gc_lock);
+ }
+
+@@ -80,9 +81,10 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
+
+ if (atomic_long_dec_and_test(&u->inflight))
+ list_del_init(&u->link);
+- unix_tot_inflight--;
++ /* Paired with READ_ONCE() in wait_for_unix_gc() */
++ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+ }
+- user->unix_inflight--;
++ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
+ spin_unlock(&unix_gc_lock);
+ }
+
+@@ -96,7 +98,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
+ {
+ struct user_struct *user = current_user();
+
+- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
++ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+ }
+diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
+index 452376c6f4194..5919d61d9874a 100644
+--- a/net/unix/unix_bpf.c
++++ b/net/unix/unix_bpf.c
+@@ -55,6 +55,9 @@ static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+ struct sk_psock *psock;
+ int copied;
+
++ if (!len)
++ return 0;
++
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return __unix_recvmsg(sk, msg, len, flags);
+diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
+index e2c0cfb334d20..9a65a2f195853 100644
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -333,7 +333,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
+ }
+ EXPORT_SYMBOL_GPL(vsock_remove_sock);
+
+-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
++void vsock_for_each_connected_socket(struct vsock_transport *transport,
++ void (*fn)(struct sock *sk))
+ {
+ int i;
+
+@@ -342,8 +343,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+ for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
+ struct vsock_sock *vsk;
+ list_for_each_entry(vsk, &vsock_connected_table[i],
+- connected_table)
++ connected_table) {
++ if (vsk->transport != transport)
++ continue;
++
+ fn(sk_vsock(vsk));
++ }
+ }
+
+ spin_unlock_bh(&vsock_table_lock);
+@@ -1280,6 +1285,7 @@ static void vsock_connect_timeout(struct work_struct *work)
+ if (sk->sk_state == TCP_SYN_SENT &&
+ (sk->sk_shutdown != SHUTDOWN_MASK)) {
+ sk->sk_state = TCP_CLOSE;
++ sk->sk_socket->state = SS_UNCONNECTED;
+ sk->sk_err = ETIMEDOUT;
+ sk_error_report(sk);
+ vsock_transport_cancel_pkt(vsk);
+@@ -1322,6 +1328,8 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
+ * non-blocking call.
+ */
+ err = -EALREADY;
++ if (flags & O_NONBLOCK)
++ goto out;
+ break;
+ default:
+ if ((sk->sk_state == TCP_LISTEN) ||
+@@ -1383,7 +1391,14 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
+ * timeout fires.
+ */
+ sock_hold(sk);
+- schedule_delayed_work(&vsk->connect_work, timeout);
++
++ /* If the timeout function is already scheduled,
++ * reschedule it, then ungrab the socket refcount to
++ * keep it balanced.
++ */
++ if (mod_delayed_work(system_wq, &vsk->connect_work,
++ timeout))
++ sock_put(sk);
+
+ /* Skip ahead to preserve error code set above. */
+ goto out_wait;
+@@ -1398,8 +1413,9 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
+ sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
+ sock->state = SS_UNCONNECTED;
+ vsock_transport_cancel_pkt(vsk);
++ vsock_remove_connected(vsk);
+ goto out_wait;
+- } else if (timeout == 0) {
++ } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) {
+ err = -ETIMEDOUT;
+ sk->sk_state = TCP_CLOSE;
+ sock->state = SS_UNCONNECTED;
+@@ -1881,8 +1897,11 @@ static int vsock_connectible_wait_data(struct sock *sk,
+ err = 0;
+ transport = vsk->transport;
+
+- while ((data = vsock_connectible_has_data(vsk)) == 0) {
++ while (1) {
+ prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
++ data = vsock_connectible_has_data(vsk);
++ if (data != 0)
++ break;
+
+ if (sk->sk_err != 0 ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
+index 4f7c99dfd16cf..c5f936fbf876d 100644
+--- a/net/vmw_vsock/virtio_transport.c
++++ b/net/vmw_vsock/virtio_transport.c
+@@ -24,6 +24,7 @@
+ static struct workqueue_struct *virtio_vsock_workqueue;
+ static struct virtio_vsock __rcu *the_virtio_vsock;
+ static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
++static struct virtio_transport virtio_transport; /* forward declaration */
+
+ struct virtio_vsock {
+ struct virtio_device *vdev;
+@@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
+ switch (le32_to_cpu(event->id)) {
+ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
+ virtio_vsock_update_guest_cid(vsock);
+- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
++ vsock_for_each_connected_socket(&virtio_transport.transport,
++ virtio_vsock_reset_sock);
+ break;
+ }
+ }
+@@ -620,6 +622,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
+ INIT_WORK(&vsock->event_work, virtio_transport_event_work);
+ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+
++ if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
++ vsock->seqpacket_allow = true;
++
++ vdev->priv = vsock;
++
++ virtio_device_ready(vdev);
++
+ mutex_lock(&vsock->tx_lock);
+ vsock->tx_run = true;
+ mutex_unlock(&vsock->tx_lock);
+@@ -634,10 +643,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
+ vsock->event_run = true;
+ mutex_unlock(&vsock->event_lock);
+
+- if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+- vsock->seqpacket_allow = true;
+-
+- vdev->priv = vsock;
+ rcu_assign_pointer(the_virtio_vsock, vsock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+@@ -662,7 +667,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
+ synchronize_rcu();
+
+ /* Reset all connected sockets when the device disappear */
+- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
++ vsock_for_each_connected_socket(&virtio_transport.transport,
++ virtio_vsock_reset_sock);
+
+ /* Stop all work handlers to make sure no one is accessing the device,
+ * so we can safely call vdev->config->reset().
+diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
+index 59ee1be5a6dd3..3a12aee33e92f 100644
+--- a/net/vmw_vsock/virtio_transport_common.c
++++ b/net/vmw_vsock/virtio_transport_common.c
+@@ -1299,7 +1299,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
+ space_available = virtio_transport_space_update(sk, pkt);
+
+ /* Update CID in case it has changed after a transport reset event */
+- vsk->local_addr.svm_cid = dst.svm_cid;
++ if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
++ vsk->local_addr.svm_cid = dst.svm_cid;
+
+ if (space_available)
+ sk->sk_write_space(sk);
+@@ -1341,7 +1342,7 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
+
+ void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
+ {
+- kfree(pkt->buf);
++ kvfree(pkt->buf);
+ kfree(pkt);
+ }
+ EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
+diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
+index 7aef34e32bdf8..94c1112f1c8c3 100644
+--- a/net/vmw_vsock/vmci_transport.c
++++ b/net/vmw_vsock/vmci_transport.c
+@@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+
+ static int PROTOCOL_OVERRIDE = -1;
+
++static struct vsock_transport vmci_transport; /* forward declaration */
++
+ /* Helper function to convert from a VMCI error code to a VSock error code. */
+
+ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
+@@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id,
+ const struct vmci_event_data *e_data,
+ void *client_data)
+ {
+- vsock_for_each_connected_socket(vmci_transport_handle_detach);
++ vsock_for_each_connected_socket(&vmci_transport,
++ vmci_transport_handle_detach);
+ }
+
+ static void vmci_transport_recv_pkt_work(struct work_struct *work)
+@@ -1708,7 +1711,11 @@ static int vmci_transport_dgram_enqueue(
+ if (!dg)
+ return -ENOMEM;
+
+- memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len);
++ err = memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len);
++ if (err) {
++ kfree(dg);
++ return err;
++ }
+
+ dg->dst = vmci_make_handle(remote_addr->svm_cid,
+ remote_addr->svm_port);
+diff --git a/net/wireless/core.c b/net/wireless/core.c
+index aaba847d79eb2..d10686f4bf153 100644
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -5,7 +5,7 @@
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2015-2017 Intel Deutschland GmbH
+- * Copyright (C) 2018-2021 Intel Corporation
++ * Copyright (C) 2018-2022 Intel Corporation
+ */
+
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+@@ -332,29 +332,20 @@ static void cfg80211_event_work(struct work_struct *work)
+ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
+ {
+ struct wireless_dev *wdev, *tmp;
+- bool found = false;
+
+ ASSERT_RTNL();
+
+- list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
++ list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
+ if (wdev->nl_owner_dead) {
+ if (wdev->netdev)
+ dev_close(wdev->netdev);
+- found = true;
+- }
+- }
+-
+- if (!found)
+- return;
+
+- wiphy_lock(&rdev->wiphy);
+- list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
+- if (wdev->nl_owner_dead) {
++ wiphy_lock(&rdev->wiphy);
+ cfg80211_leave(rdev, wdev);
+ rdev_del_virtual_intf(rdev, wdev);
++ wiphy_unlock(&rdev->wiphy);
+ }
+ }
+- wiphy_unlock(&rdev->wiphy);
+ }
+
+ static void cfg80211_destroy_iface_wk(struct work_struct *work)
+@@ -377,12 +368,12 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
+ rdev = container_of(work, struct cfg80211_registered_device,
+ sched_scan_stop_wk);
+
+- rtnl_lock();
++ wiphy_lock(&rdev->wiphy);
+ list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
+ if (req->nl_owner_dead)
+ cfg80211_stop_sched_scan_req(rdev, req, false);
+ }
+- rtnl_unlock();
++ wiphy_unlock(&rdev->wiphy);
+ }
+
+ static void cfg80211_propagate_radar_detect_wk(struct work_struct *work)
+@@ -1081,6 +1072,16 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
+ list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
+ cfg80211_put_bss(&rdev->wiphy, &scan->pub);
+ mutex_destroy(&rdev->wiphy.mtx);
++
++ /*
++ * The 'regd' can only be non-NULL if we never finished
++ * initializing the wiphy and thus never went through the
++ * unregister path - e.g. in failure scenarios. Thus, it
++ * cannot have been visible to anyone if non-NULL, so we
++ * can just free it here.
++ */
++ kfree(rcu_dereference_raw(rdev->wiphy.regd));
++
+ kfree(rdev);
+ }
+
+diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
+index aab43469a2f04..0878b162890af 100644
+--- a/net/wireless/debugfs.c
++++ b/net/wireless/debugfs.c
+@@ -65,9 +65,10 @@ static ssize_t ht40allow_map_read(struct file *file,
+ {
+ struct wiphy *wiphy = file->private_data;
+ char *buf;
+- unsigned int offset = 0, buf_size = PAGE_SIZE, i, r;
++ unsigned int offset = 0, buf_size = PAGE_SIZE, i;
+ enum nl80211_band band;
+ struct ieee80211_supported_band *sband;
++ ssize_t r;
+
+ buf = kzalloc(buf_size, GFP_KERNEL);
+ if (!buf)
+diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
+index bf7cd47525472..ed3ec7e320ced 100644
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -314,6 +314,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
+ [NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG },
+ [NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG },
+ [NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK] = { .type = NLA_FLAG },
++ [NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR] = { .type = NLA_U8 },
+ };
+
+ static const struct nla_policy
+@@ -509,7 +510,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+ .len = IEEE80211_MAX_MESH_ID_LEN },
+ [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
+
+- [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
++ /* allow 3 for NUL-termination, we used to declare this NLA_STRING */
++ [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3),
+ [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
+
+ [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
+@@ -922,33 +924,37 @@ nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
+ [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
+ };
+
+-int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
+- struct cfg80211_registered_device **rdev,
+- struct wireless_dev **wdev)
++static int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
++ struct cfg80211_registered_device **rdev,
++ struct wireless_dev **wdev,
++ struct nlattr **attrbuf)
+ {
+ int err;
+
+ if (!cb->args[0]) {
+- struct nlattr **attrbuf;
++ struct nlattr **attrbuf_free = NULL;
+
+- attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
+- GFP_KERNEL);
+- if (!attrbuf)
+- return -ENOMEM;
++ if (!attrbuf) {
++ attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
++ GFP_KERNEL);
++ if (!attrbuf)
++ return -ENOMEM;
++ attrbuf_free = attrbuf;
++ }
+
+ err = nlmsg_parse_deprecated(cb->nlh,
+ GENL_HDRLEN + nl80211_fam.hdrsize,
+ attrbuf, nl80211_fam.maxattr,
+ nl80211_policy, NULL);
+ if (err) {
+- kfree(attrbuf);
++ kfree(attrbuf_free);
+ return err;
+ }
+
+ rtnl_lock();
+ *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(cb->skb->sk),
+ attrbuf);
+- kfree(attrbuf);
++ kfree(attrbuf_free);
+ if (IS_ERR(*wdev)) {
+ rtnl_unlock();
+ return PTR_ERR(*wdev);
+@@ -3075,6 +3081,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+ } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
+ chandef->width =
+ nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
++ if (chandef->chan->band == NL80211_BAND_S1GHZ) {
++ /* User input error for channel width doesn't match channel */
++ if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) {
++ NL_SET_ERR_MSG_ATTR(extack,
++ attrs[NL80211_ATTR_CHANNEL_WIDTH],
++ "bad channel width");
++ return -EINVAL;
++ }
++ }
+ if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
+ chandef->center_freq1 =
+ nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
+@@ -3612,6 +3627,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
+ wdev_lock(wdev);
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
++ case NL80211_IFTYPE_P2P_GO:
+ if (wdev->ssid_len &&
+ nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
+ goto nla_put_failure_locked;
+@@ -6001,7 +6017,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
+ int sta_idx = cb->args[2];
+ int err;
+
+- err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
++ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
+ if (err)
+ return err;
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+@@ -6896,7 +6912,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
+ int path_idx = cb->args[2];
+ int err;
+
+- err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
++ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
+ if (err)
+ return err;
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+@@ -7096,7 +7112,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
+ int path_idx = cb->args[2];
+ int err;
+
+- err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
++ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
+ if (err)
+ return err;
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+@@ -9518,7 +9534,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
+ int start = cb->args[2], idx = 0;
+ int err;
+
+- err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
++ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
+ if (err)
+ return err;
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+@@ -9651,7 +9667,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
+ if (!attrbuf)
+ return -ENOMEM;
+
+- res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
++ res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf);
+ if (res) {
+ kfree(attrbuf);
+ return res;
+@@ -11327,18 +11343,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
+ struct cfg80211_bitrate_mask mask;
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
++ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int err;
+
+ if (!rdev->ops->set_bitrate_mask)
+ return -EOPNOTSUPP;
+
++ wdev_lock(wdev);
+ err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
+ NL80211_ATTR_TX_RATES, &mask,
+ dev, true);
+ if (err)
+- return err;
++ goto out;
+
+- return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
++ err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
++out:
++ wdev_unlock(wdev);
++ return err;
+ }
+
+ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
+@@ -12902,7 +12923,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
+ return -ERANGE;
+ if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN &&
+ !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
+- nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KCK_EXT_LEN))
++ nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN))
+ return -ERANGE;
+
+ rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]);
+@@ -13173,6 +13194,9 @@ static int handle_nan_filter(struct nlattr *attr_filter,
+ i = 0;
+ nla_for_each_nested(attr, attr_filter, rem) {
+ filter[i].filter = nla_memdup(attr, GFP_KERNEL);
++ if (!filter[i].filter)
++ goto err;
++
+ filter[i].len = nla_len(attr);
+ i++;
+ }
+@@ -13185,6 +13209,15 @@ static int handle_nan_filter(struct nlattr *attr_filter,
+ }
+
+ return 0;
++
++err:
++ i = 0;
++ nla_for_each_nested(attr, attr_filter, rem) {
++ kfree(filter[i].filter);
++ i++;
++ }
++ kfree(filter);
++ return -ENOMEM;
+ }
+
+ static int nl80211_nan_add_func(struct sk_buff *skb,
+@@ -15904,8 +15937,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = nl80211_color_change,
+ .flags = GENL_UNS_ADMIN_PERM,
+- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+- NL80211_FLAG_NEED_RTNL,
++ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ },
+ };
+
+@@ -17509,7 +17541,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
+ wdev->chandef = *chandef;
+ wdev->preset_chandef = *chandef;
+
+- if (wdev->iftype == NL80211_IFTYPE_STATION &&
++ if ((wdev->iftype == NL80211_IFTYPE_STATION ||
++ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
+ !WARN_ON(!wdev->current_bss))
+ cfg80211_update_assoc_bss_entry(wdev, chandef->chan);
+
+diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
+index a3f387770f1bf..d642e3be4ee78 100644
+--- a/net/wireless/nl80211.h
++++ b/net/wireless/nl80211.h
+@@ -1,7 +1,7 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+ /*
+ * Portions of this file
+- * Copyright (C) 2018, 2020 Intel Corporation
++ * Copyright (C) 2018, 2020-2021 Intel Corporation
+ */
+ #ifndef __NET_WIRELESS_NL80211_H
+ #define __NET_WIRELESS_NL80211_H
+@@ -22,10 +22,6 @@ static inline u64 wdev_id(struct wireless_dev *wdev)
+ ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32);
+ }
+
+-int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
+- struct cfg80211_registered_device **rdev,
+- struct wireless_dev **wdev);
+-
+ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
+ struct genl_info *info,
+ struct cfg80211_chan_def *chandef);
+diff --git a/net/wireless/reg.c b/net/wireless/reg.c
+index df87c7f3a0492..9944abe710b38 100644
+--- a/net/wireless/reg.c
++++ b/net/wireless/reg.c
+@@ -806,6 +806,8 @@ static int __init load_builtin_regdb_keys(void)
+ return 0;
+ }
+
++MODULE_FIRMWARE("regulatory.db.p7s");
++
+ static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+ {
+ const struct firmware *sig;
+@@ -1077,8 +1079,12 @@ static void regdb_fw_cb(const struct firmware *fw, void *context)
+ release_firmware(fw);
+ }
+
++MODULE_FIRMWARE("regulatory.db");
++
+ static int query_regdb_file(const char *alpha2)
+ {
++ int err;
++
+ ASSERT_RTNL();
+
+ if (regdb)
+@@ -1088,9 +1094,13 @@ static int query_regdb_file(const char *alpha2)
+ if (!alpha2)
+ return -ENOMEM;
+
+- return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+- &reg_pdev->dev, GFP_KERNEL,
+- (void *)alpha2, regdb_fw_cb);
++ err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
++ &reg_pdev->dev, GFP_KERNEL,
++ (void *)alpha2, regdb_fw_cb);
++ if (err)
++ kfree(alpha2);
++
++ return err;
+ }
+
+ int reg_reload_regdb(void)
+@@ -2338,6 +2348,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
+ struct cfg80211_chan_def chandef = {};
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ enum nl80211_iftype iftype;
++ bool ret;
+
+ wdev_lock(wdev);
+ iftype = wdev->iftype;
+@@ -2387,7 +2398,9 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_ADHOC:
+- return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype);
++ ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype);
++
++ return ret;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ return cfg80211_chandef_usable(wiphy, &chandef,
+@@ -2408,11 +2421,11 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy)
+ struct wireless_dev *wdev;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+- ASSERT_RTNL();
+-
++ wiphy_lock(wiphy);
+ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
+ if (!reg_wdev_chan_valid(wiphy, wdev))
+ cfg80211_leave(rdev, wdev);
++ wiphy_unlock(wiphy);
+ }
+
+ static void reg_check_chans_work(struct work_struct *work)
+@@ -4232,8 +4245,10 @@ static int __init regulatory_init_db(void)
+ return -EINVAL;
+
+ err = load_builtin_regdb_keys();
+- if (err)
++ if (err) {
++ platform_device_unregister(reg_pdev);
+ return err;
++ }
+
+ /* We always try to get an update for the static regdomain */
+ err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
+diff --git a/net/wireless/scan.c b/net/wireless/scan.c
+index adc0d14cfd860..c7192d7bcbd76 100644
+--- a/net/wireless/scan.c
++++ b/net/wireless/scan.c
+@@ -143,18 +143,12 @@ static inline void bss_ref_get(struct cfg80211_registered_device *rdev,
+ lockdep_assert_held(&rdev->bss_lock);
+
+ bss->refcount++;
+- if (bss->pub.hidden_beacon_bss) {
+- bss = container_of(bss->pub.hidden_beacon_bss,
+- struct cfg80211_internal_bss,
+- pub);
+- bss->refcount++;
+- }
+- if (bss->pub.transmitted_bss) {
+- bss = container_of(bss->pub.transmitted_bss,
+- struct cfg80211_internal_bss,
+- pub);
+- bss->refcount++;
+- }
++
++ if (bss->pub.hidden_beacon_bss)
++ bss_from_pub(bss->pub.hidden_beacon_bss)->refcount++;
++
++ if (bss->pub.transmitted_bss)
++ bss_from_pub(bss->pub.transmitted_bss)->refcount++;
+ }
+
+ static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
+@@ -268,114 +262,152 @@ bool cfg80211_is_element_inherited(const struct element *elem,
+ }
+ EXPORT_SYMBOL(cfg80211_is_element_inherited);
+
+-static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
+- const u8 *subelement, size_t subie_len,
+- u8 *new_ie, gfp_t gfp)
++static size_t cfg80211_copy_elem_with_frags(const struct element *elem,
++ const u8 *ie, size_t ie_len,
++ u8 **pos, u8 *buf, size_t buf_len)
+ {
+- u8 *pos, *tmp;
+- const u8 *tmp_old, *tmp_new;
+- const struct element *non_inherit_elem;
+- u8 *sub_copy;
++ if (WARN_ON((u8 *)elem < ie || elem->data > ie + ie_len ||
++ elem->data + elem->datalen > ie + ie_len))
++ return 0;
+
+- /* copy subelement as we need to change its content to
+- * mark an ie after it is processed.
+- */
+- sub_copy = kmemdup(subelement, subie_len, gfp);
+- if (!sub_copy)
++ if (elem->datalen + 2 > buf + buf_len - *pos)
+ return 0;
+
+- pos = &new_ie[0];
++ memcpy(*pos, elem, elem->datalen + 2);
++ *pos += elem->datalen + 2;
++
++ /* Finish if it is not fragmented */
++ if (elem->datalen != 255)
++ return *pos - buf;
++
++ ie_len = ie + ie_len - elem->data - elem->datalen;
++ ie = (const u8 *)elem->data + elem->datalen;
+
+- /* set new ssid */
+- tmp_new = cfg80211_find_ie(WLAN_EID_SSID, sub_copy, subie_len);
+- if (tmp_new) {
+- memcpy(pos, tmp_new, tmp_new[1] + 2);
+- pos += (tmp_new[1] + 2);
++ for_each_element(elem, ie, ie_len) {
++ if (elem->id != WLAN_EID_FRAGMENT)
++ break;
++
++ if (elem->datalen + 2 > buf + buf_len - *pos)
++ return 0;
++
++ memcpy(*pos, elem, elem->datalen + 2);
++ *pos += elem->datalen + 2;
++
++ if (elem->datalen != 255)
++ break;
+ }
+
+- /* get non inheritance list if exists */
+- non_inherit_elem =
+- cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+- sub_copy, subie_len);
++ return *pos - buf;
++}
+
+- /* go through IEs in ie (skip SSID) and subelement,
+- * merge them into new_ie
++static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
++ const u8 *subie, size_t subie_len,
++ u8 *new_ie, size_t new_ie_len)
++{
++ const struct element *non_inherit_elem, *parent, *sub;
++ u8 *pos = new_ie;
++ u8 id, ext_id;
++ unsigned int match_len;
++
++ non_inherit_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
++ subie, subie_len);
++
++ /* We copy the elements one by one from the parent to the generated
++ * elements.
++ * If they are not inherited (included in subie or in the non
++ * inheritance element), then we copy all occurrences the first time
++ * we see this element type.
+ */
+- tmp_old = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen);
+- tmp_old = (tmp_old) ? tmp_old + tmp_old[1] + 2 : ie;
+-
+- while (tmp_old + tmp_old[1] + 2 - ie <= ielen) {
+- if (tmp_old[0] == 0) {
+- tmp_old++;
++ for_each_element(parent, ie, ielen) {
++ if (parent->id == WLAN_EID_FRAGMENT)
+ continue;
++
++ if (parent->id == WLAN_EID_EXTENSION) {
++ if (parent->datalen < 1)
++ continue;
++
++ id = WLAN_EID_EXTENSION;
++ ext_id = parent->data[0];
++ match_len = 1;
++ } else {
++ id = parent->id;
++ match_len = 0;
+ }
+
+- if (tmp_old[0] == WLAN_EID_EXTENSION)
+- tmp = (u8 *)cfg80211_find_ext_ie(tmp_old[2], sub_copy,
+- subie_len);
+- else
+- tmp = (u8 *)cfg80211_find_ie(tmp_old[0], sub_copy,
+- subie_len);
++ /* Find first occurrence in subie */
++ sub = cfg80211_find_elem_match(id, subie, subie_len,
++ &ext_id, match_len, 0);
+
+- if (!tmp) {
+- const struct element *old_elem = (void *)tmp_old;
++ /* Copy from parent if not in subie and inherited */
++ if (!sub &&
++ cfg80211_is_element_inherited(parent, non_inherit_elem)) {
++ if (!cfg80211_copy_elem_with_frags(parent,
++ ie, ielen,
++ &pos, new_ie,
++ new_ie_len))
++ return 0;
+
+- /* ie in old ie but not in subelement */
+- if (cfg80211_is_element_inherited(old_elem,
+- non_inherit_elem)) {
+- memcpy(pos, tmp_old, tmp_old[1] + 2);
+- pos += tmp_old[1] + 2;
+- }
+- } else {
+- /* ie in transmitting ie also in subelement,
+- * copy from subelement and flag the ie in subelement
+- * as copied (by setting eid field to WLAN_EID_SSID,
+- * which is skipped anyway).
+- * For vendor ie, compare OUI + type + subType to
+- * determine if they are the same ie.
+- */
+- if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) {
+- if (!memcmp(tmp_old + 2, tmp + 2, 5)) {
+- /* same vendor ie, copy from
+- * subelement
+- */
+- memcpy(pos, tmp, tmp[1] + 2);
+- pos += tmp[1] + 2;
+- tmp[0] = WLAN_EID_SSID;
+- } else {
+- memcpy(pos, tmp_old, tmp_old[1] + 2);
+- pos += tmp_old[1] + 2;
+- }
+- } else {
+- /* copy ie from subelement into new ie */
+- memcpy(pos, tmp, tmp[1] + 2);
+- pos += tmp[1] + 2;
+- tmp[0] = WLAN_EID_SSID;
+- }
++ continue;
+ }
+
+- if (tmp_old + tmp_old[1] + 2 - ie == ielen)
+- break;
++ /* Already copied if an earlier element had the same type */
++ if (cfg80211_find_elem_match(id, ie, (u8 *)parent - ie,
++ &ext_id, match_len, 0))
++ continue;
+
+- tmp_old += tmp_old[1] + 2;
++ /* Not inheriting, copy all similar elements from subie */
++ while (sub) {
++ if (!cfg80211_copy_elem_with_frags(sub,
++ subie, subie_len,
++ &pos, new_ie,
++ new_ie_len))
++ return 0;
++
++ sub = cfg80211_find_elem_match(id,
++ sub->data + sub->datalen,
++ subie_len + subie -
++ (sub->data +
++ sub->datalen),
++ &ext_id, match_len, 0);
++ }
+ }
+
+- /* go through subelement again to check if there is any ie not
+- * copied to new ie, skip ssid, capability, bssid-index ie
++ /* The above misses elements that are included in subie but not in the
++ * parent, so do a pass over subie and append those.
++ * Skip the non-tx BSSID caps and non-inheritance element.
+ */
+- tmp_new = sub_copy;
+- while (tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) {
+- if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP ||
+- tmp_new[0] == WLAN_EID_SSID)) {
+- memcpy(pos, tmp_new, tmp_new[1] + 2);
+- pos += tmp_new[1] + 2;
++ for_each_element(sub, subie, subie_len) {
++ if (sub->id == WLAN_EID_NON_TX_BSSID_CAP)
++ continue;
++
++ if (sub->id == WLAN_EID_FRAGMENT)
++ continue;
++
++ if (sub->id == WLAN_EID_EXTENSION) {
++ if (sub->datalen < 1)
++ continue;
++
++ id = WLAN_EID_EXTENSION;
++ ext_id = sub->data[0];
++ match_len = 1;
++
++ if (ext_id == WLAN_EID_EXT_NON_INHERITANCE)
++ continue;
++ } else {
++ id = sub->id;
++ match_len = 0;
+ }
+- if (tmp_new + tmp_new[1] + 2 - sub_copy == subie_len)
+- break;
+- tmp_new += tmp_new[1] + 2;
++
++ /* Processed if one was included in the parent */
++ if (cfg80211_find_elem_match(id, ie, ielen,
++ &ext_id, match_len, 0))
++ continue;
++
++ if (!cfg80211_copy_elem_with_frags(sub, subie, subie_len,
++ &pos, new_ie, new_ie_len))
++ return 0;
+ }
+
+- kfree(sub_copy);
+ return pos - new_ie;
+ }
+
+@@ -429,6 +461,15 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss,
+
+ rcu_read_unlock();
+
++ /*
++ * This is a bit weird - it's not on the list, but already on another
++ * one! The only way that could happen is if there's some BSSID/SSID
++ * shared by multiple APs in their multi-BSSID profiles, potentially
++ * with hidden SSID mixed in ... ignore it.
++ */
++ if (!list_empty(&nontrans_bss->nontrans_list))
++ return -EINVAL;
++
+ /* add to the list */
+ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list);
+ return 0;
+@@ -600,7 +641,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
+
+ ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
+ if (ret)
+- return ret;
++ return 0;
+
+ /* RNR IE may contain more than one NEIGHBOR_AP_INFO */
+ while (pos + sizeof(*ap_info) <= end) {
+@@ -702,8 +743,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap,
+
+ for (i = 0; i < request->n_ssids; i++) {
+ /* wildcard ssid in the scan request */
+- if (!request->ssids[i].ssid_len)
++ if (!request->ssids[i].ssid_len) {
++ if (ap->multi_bss && !ap->transmitted_bssid)
++ continue;
++
+ return true;
++ }
+
+ if (ap->ssid_len &&
+ ap->ssid_len == request->ssids[i].ssid_len) {
+@@ -829,6 +874,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
+ !cfg80211_find_ssid_match(ap, request))
+ continue;
+
++ if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
++ continue;
++
+ cfg80211_scan_req_add_chan(request, chan, true);
+ memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN);
+ scan_6ghz_params->short_ssid = ap->short_ssid;
+@@ -1597,6 +1645,23 @@ struct cfg80211_non_tx_bss {
+ u8 bssid_index;
+ };
+
++static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
++ const struct cfg80211_bss_ies *new_ies,
++ const struct cfg80211_bss_ies *old_ies)
++{
++ struct cfg80211_internal_bss *bss;
++
++ /* Assign beacon IEs to all sub entries */
++ list_for_each_entry(bss, &known->hidden_list, hidden_list) {
++ const struct cfg80211_bss_ies *ies;
++
++ ies = rcu_access_pointer(bss->pub.beacon_ies);
++ WARN_ON(ies != old_ies);
++
++ rcu_assign_pointer(bss->pub.beacon_ies, new_ies);
++ }
++}
++
+ static bool
+ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *known,
+@@ -1620,7 +1685,6 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
+ kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+ } else if (rcu_access_pointer(new->pub.beacon_ies)) {
+ const struct cfg80211_bss_ies *old;
+- struct cfg80211_internal_bss *bss;
+
+ if (known->pub.hidden_beacon_bss &&
+ !list_empty(&known->hidden_list)) {
+@@ -1648,16 +1712,9 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
+ if (old == rcu_access_pointer(known->pub.ies))
+ rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies);
+
+- /* Assign beacon IEs to all sub entries */
+- list_for_each_entry(bss, &known->hidden_list, hidden_list) {
+- const struct cfg80211_bss_ies *ies;
+-
+- ies = rcu_access_pointer(bss->pub.beacon_ies);
+- WARN_ON(ies != old);
+-
+- rcu_assign_pointer(bss->pub.beacon_ies,
+- new->pub.beacon_ies);
+- }
++ cfg80211_update_hidden_bsses(known,
++ rcu_access_pointer(new->pub.beacon_ies),
++ old);
+
+ if (old)
+ kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+@@ -1734,6 +1791,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
+ new->refcount = 1;
+ INIT_LIST_HEAD(&new->hidden_list);
+ INIT_LIST_HEAD(&new->pub.nontrans_list);
++ /* we'll set this later if it was non-NULL */
++ new->pub.transmitted_bss = NULL;
+
+ if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
+ hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN);
+@@ -1971,11 +2030,18 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
+ /* this is a nontransmitting bss, we need to add it to
+ * transmitting bss' list if it is not there
+ */
++ spin_lock_bh(&rdev->bss_lock);
+ if (cfg80211_add_nontrans_list(non_tx_data->tx_bss,
+ &res->pub)) {
+- if (__cfg80211_unlink_bss(rdev, res))
++ if (__cfg80211_unlink_bss(rdev, res)) {
+ rdev->bss_generation++;
++ res = NULL;
++ }
+ }
++ spin_unlock_bh(&rdev->bss_lock);
++
++ if (!res)
++ return NULL;
+ }
+
+ trace_cfg80211_return_bss(&res->pub);
+@@ -2094,6 +2160,8 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
+ for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) {
+ if (elem->datalen < 4)
+ continue;
++ if (elem->data[0] < 1 || (int)elem->data[0] > 8)
++ continue;
+ for_each_element(sub, elem->data + 1, elem->datalen - 1) {
+ u8 profile_len;
+
+@@ -2147,7 +2215,7 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
+ new_ie_len = cfg80211_gen_new_ie(ie, ielen,
+ profile,
+ profile_len, new_ie,
+- gfp);
++ IEEE80211_MAX_DATA_LEN);
+ if (!new_ie_len)
+ continue;
+
+@@ -2229,7 +2297,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
+ size_t new_ie_len;
+ struct cfg80211_bss_ies *new_ies;
+ const struct cfg80211_bss_ies *old;
+- u8 cpy_len;
++ size_t cpy_len;
+
+ lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock);
+
+@@ -2296,6 +2364,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
+ } else {
+ old = rcu_access_pointer(nontrans_bss->beacon_ies);
+ rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies);
++ cfg80211_update_hidden_bsses(bss_from_pub(nontrans_bss),
++ new_ies, old);
+ rcu_assign_pointer(nontrans_bss->ies, new_ies);
+ if (old)
+ kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+@@ -2442,10 +2512,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
+ const struct cfg80211_bss_ies *ies1, *ies2;
+ size_t ielen = len - offsetof(struct ieee80211_mgmt,
+ u.probe_resp.variable);
+- struct cfg80211_non_tx_bss non_tx_data;
++ struct cfg80211_non_tx_bss non_tx_data = {};
+
+ res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
+ len, gfp);
++
++ /* don't do any further MBSSID handling for S1G */
++ if (ieee80211_is_s1g_beacon(mgmt->frame_control))
++ return res;
++
+ if (!res || !wiphy->support_mbssid ||
+ !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
+ return res;
+diff --git a/net/wireless/sme.c b/net/wireless/sme.c
+index 08a70b4f090cc..1591cd68fc583 100644
+--- a/net/wireless/sme.c
++++ b/net/wireless/sme.c
+@@ -268,6 +268,15 @@ void cfg80211_conn_work(struct work_struct *work)
+ wiphy_unlock(&rdev->wiphy);
+ }
+
++static void cfg80211_step_auth_next(struct cfg80211_conn *conn,
++ struct cfg80211_bss *bss)
++{
++ memcpy(conn->bssid, bss->bssid, ETH_ALEN);
++ conn->params.bssid = conn->bssid;
++ conn->params.channel = bss->channel;
++ conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
++}
++
+ /* Returned bss is reference counted and must be cleaned up appropriately. */
+ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
+ {
+@@ -285,10 +294,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
+ if (!bss)
+ return NULL;
+
+- memcpy(wdev->conn->bssid, bss->bssid, ETH_ALEN);
+- wdev->conn->params.bssid = wdev->conn->bssid;
+- wdev->conn->params.channel = bss->channel;
+- wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
++ cfg80211_step_auth_next(wdev->conn, bss);
+ schedule_work(&rdev->conn_work);
+
+ return bss;
+@@ -567,7 +573,12 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
+ wdev->conn->params.ssid_len = wdev->ssid_len;
+
+ /* see if we have the bss already */
+- bss = cfg80211_get_conn_bss(wdev);
++ bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel,
++ wdev->conn->params.bssid,
++ wdev->conn->params.ssid,
++ wdev->conn->params.ssid_len,
++ wdev->conn_bss_type,
++ IEEE80211_PRIVACY(wdev->conn->params.privacy));
+
+ if (prev_bssid) {
+ memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
+@@ -578,6 +589,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
+ if (bss) {
+ enum nl80211_timeout_reason treason;
+
++ cfg80211_step_auth_next(wdev->conn, bss);
+ err = cfg80211_conn_do_work(wdev, &treason);
+ cfg80211_put_bss(wdev->wiphy, bss);
+ } else {
+@@ -1244,6 +1256,13 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
+ } else {
+ if (WARN_ON(connkeys))
+ return -EINVAL;
++
++ /* connect can point to wdev->wext.connect which
++ * can hold key data from a previous connection
++ */
++ connect->key = NULL;
++ connect->key_len = 0;
++ connect->key_idx = 0;
+ }
+
+ wdev->connect_keys = connkeys;
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index a1a99a5749844..cb15d7f4eb05a 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -1044,6 +1044,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
+
+ switch (otype) {
+ case NL80211_IFTYPE_AP:
++ case NL80211_IFTYPE_P2P_GO:
+ cfg80211_stop_ap(rdev, dev, true);
+ break;
+ case NL80211_IFTYPE_ADHOC:
+@@ -1352,7 +1353,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
+ 25599, /* 4.166666... */
+ 17067, /* 2.777777... */
+ 12801, /* 2.083333... */
+- 11769, /* 1.851851... */
++ 11377, /* 1.851725... */
+ 10239, /* 1.666666... */
+ 8532, /* 1.388888... */
+ 7680, /* 1.250000... */
+diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
+index 76a80a41615be..fe8765c4075d3 100644
+--- a/net/wireless/wext-core.c
++++ b/net/wireless/wext-core.c
+@@ -468,6 +468,7 @@ void wireless_send_event(struct net_device * dev,
+ struct __compat_iw_event *compat_event;
+ struct compat_iw_point compat_wrqu;
+ struct sk_buff *compskb;
++ int ptr_len;
+ #endif
+
+ /*
+@@ -582,6 +583,9 @@ void wireless_send_event(struct net_device * dev,
+ nlmsg_end(skb, nlh);
+ #ifdef CONFIG_COMPAT
+ hdr_len = compat_event_type_size[descr->header_type];
++
++ /* ptr_len is remaining size in event header apart from LCP */
++ ptr_len = hdr_len - IW_EV_COMPAT_LCP_LEN;
+ event_len = hdr_len + extra_len;
+
+ compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+@@ -612,16 +616,15 @@ void wireless_send_event(struct net_device * dev,
+ if (descr->header_type == IW_HEADER_TYPE_POINT) {
+ compat_wrqu.length = wrqu->data.length;
+ compat_wrqu.flags = wrqu->data.flags;
+- memcpy(&compat_event->pointer,
+- ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF,
+- hdr_len - IW_EV_COMPAT_LCP_LEN);
++ memcpy(compat_event->ptr_bytes,
++ ((char *)&compat_wrqu) + IW_EV_COMPAT_POINT_OFF,
++ ptr_len);
+ if (extra_len)
+- memcpy(((char *) compat_event) + hdr_len,
+- extra, extra_len);
++ memcpy(&compat_event->ptr_bytes[ptr_len],
++ extra, extra_len);
+ } else {
+ /* extra_len must be zero, so no if (extra) needed */
+- memcpy(&compat_event->pointer, wrqu,
+- hdr_len - IW_EV_COMPAT_LCP_LEN);
++ memcpy(compat_event->ptr_bytes, wrqu, ptr_len);
+ }
+
+ nlmsg_end(compskb, nlh);
+diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
+index 3583354a7d7fe..07f6206e7cb47 100644
+--- a/net/x25/af_x25.c
++++ b/net/x25/af_x25.c
+@@ -482,6 +482,12 @@ static int x25_listen(struct socket *sock, int backlog)
+ int rc = -EOPNOTSUPP;
+
+ lock_sock(sk);
++ if (sock->state != SS_UNCONNECTED) {
++ rc = -EINVAL;
++ release_sock(sk);
++ return rc;
++ }
++
+ if (sk->sk_state != TCP_LISTEN) {
+ memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
+ sk->sk_max_ack_backlog = backlog;
+@@ -1765,10 +1771,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
+
+ write_lock_bh(&x25_list_lock);
+
+- sk_for_each(s, &x25_list)
+- if (x25_sk(s)->neighbour == nb)
++ sk_for_each(s, &x25_list) {
++ if (x25_sk(s)->neighbour == nb) {
++ write_unlock_bh(&x25_list_lock);
++ lock_sock(s);
+ x25_disconnect(s, ENETUNREACH, 0, 0);
+-
++ release_sock(s);
++ write_lock_bh(&x25_list_lock);
++ }
++ }
+ write_unlock_bh(&x25_list_lock);
+
+ /* Remove any related forwards */
+diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
+index 5259ef8f5242f..748d8630ab58b 100644
+--- a/net/x25/x25_dev.c
++++ b/net/x25/x25_dev.c
+@@ -117,7 +117,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
+
+ if (!pskb_may_pull(skb, 1)) {
+ x25_neigh_put(nb);
+- return 0;
++ goto drop;
+ }
+
+ switch (skb->data[0]) {
+diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
+index f01ef6bda3909..65f918d29531d 100644
+--- a/net/xdp/xdp_umem.c
++++ b/net/xdp/xdp_umem.c
+@@ -152,10 +152,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
+
+ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
+ {
+- u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom;
+ bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+- u64 npgs, addr = mr->addr, size = mr->len;
+- unsigned int chunks, chunks_rem;
++ u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
++ u64 addr = mr->addr, size = mr->len;
++ u32 chunks_rem, npgs_rem;
++ u64 chunks, npgs;
+ int err;
+
+ if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
+@@ -190,8 +191,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
+ if (npgs > U32_MAX)
+ return -EINVAL;
+
+- chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem);
+- if (chunks == 0)
++ chunks = div_u64_rem(size, chunk_size, &chunks_rem);
++ if (!chunks || chunks > U32_MAX)
+ return -EINVAL;
+
+ if (!unaligned_chunks && chunks_rem)
+@@ -204,7 +205,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
+ umem->headroom = headroom;
+ umem->chunk_size = chunk_size;
+ umem->chunks = chunks;
+- umem->npgs = (u32)npgs;
++ umem->npgs = npgs;
+ umem->pgs = NULL;
+ umem->user = NULL;
+ umem->flags = mr->flags;
+diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
+index d6b500dc42084..e5eb5616be0ca 100644
+--- a/net/xdp/xsk.c
++++ b/net/xdp/xsk.c
+@@ -358,9 +358,9 @@ out:
+ }
+ EXPORT_SYMBOL(xsk_tx_peek_desc);
+
+-static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs,
+- u32 max_entries)
++static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries)
+ {
++ struct xdp_desc *descs = pool->tx_descs;
+ u32 nb_pkts = 0;
+
+ while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
+@@ -370,17 +370,15 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_d
+ return nb_pkts;
+ }
+
+-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs,
+- u32 max_entries)
++u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 nb_pkts)
+ {
+ struct xdp_sock *xs;
+- u32 nb_pkts;
+
+ rcu_read_lock();
+ if (!list_is_singular(&pool->xsk_tx_list)) {
+ /* Fallback to the non-batched version */
+ rcu_read_unlock();
+- return xsk_tx_peek_release_fallback(pool, descs, max_entries);
++ return xsk_tx_peek_release_fallback(pool, nb_pkts);
+ }
+
+ xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
+@@ -389,11 +387,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
+ goto out;
+ }
+
+- nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries);
+- if (!nb_pkts) {
+- xs->tx->queue_empty_descs++;
+- goto out;
+- }
++ nb_pkts = xskq_cons_nb_entries(xs->tx, nb_pkts);
+
+ /* This is the backpressure mechanism for the Tx path. Try to
+ * reserve space in the completion queue for all packets, but
+@@ -401,12 +395,18 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
+ * packets. This avoids having to implement any buffering in
+ * the Tx path.
+ */
+- nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts);
++ nb_pkts = xskq_prod_nb_free(pool->cq, nb_pkts);
+ if (!nb_pkts)
+ goto out;
+
+- xskq_cons_release_n(xs->tx, nb_pkts);
++ nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, nb_pkts);
++ if (!nb_pkts) {
++ xs->tx->queue_empty_descs++;
++ goto out;
++ }
++
+ __xskq_cons_release(xs->tx);
++ xskq_prod_write_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
+ xs->sk.sk_write_space(&xs->sk);
+
+ out:
+@@ -418,18 +418,8 @@ EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
+ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
+ {
+ struct net_device *dev = xs->dev;
+- int err;
+
+- rcu_read_lock();
+- err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
+- rcu_read_unlock();
+-
+- return err;
+-}
+-
+-static int xsk_zc_xmit(struct xdp_sock *xs)
+-{
+- return xsk_wakeup(xs, XDP_WAKEUP_TX);
++ return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
+ }
+
+ static void xsk_destruct_skb(struct sk_buff *skb)
+@@ -548,6 +538,12 @@ static int xsk_generic_xmit(struct sock *sk)
+
+ mutex_lock(&xs->mutex);
+
++ /* Since we dropped the RCU read lock, the socket state might have changed. */
++ if (unlikely(!xsk_is_bound(xs))) {
++ err = -ENXIO;
++ goto out;
++ }
++
+ if (xs->queue_id >= xs->dev->real_num_tx_queues)
+ goto out;
+
+@@ -557,12 +553,6 @@ static int xsk_generic_xmit(struct sock *sk)
+ goto out;
+ }
+
+- skb = xsk_build_skb(xs, &desc);
+- if (IS_ERR(skb)) {
+- err = PTR_ERR(skb);
+- goto out;
+- }
+-
+ /* This is the backpressure mechanism for the Tx path.
+ * Reserve space in the completion queue and only proceed
+ * if there is space in it. This avoids having to implement
+@@ -571,11 +561,19 @@ static int xsk_generic_xmit(struct sock *sk)
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ if (xskq_prod_reserve(xs->pool->cq)) {
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+- kfree_skb(skb);
+ goto out;
+ }
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+
++ skb = xsk_build_skb(xs, &desc);
++ if (IS_ERR(skb)) {
++ err = PTR_ERR(skb);
++ spin_lock_irqsave(&xs->pool->cq_lock, flags);
++ xskq_prod_cancel(xs->pool->cq);
++ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
++ goto out;
++ }
++
+ err = __dev_direct_xmit(skb, xs->queue_id);
+ if (err == NETDEV_TX_BUSY) {
+ /* Tell user-space to retry the send */
+@@ -611,16 +609,26 @@ out:
+ return err;
+ }
+
+-static int __xsk_sendmsg(struct sock *sk)
++static int xsk_xmit(struct sock *sk)
+ {
+ struct xdp_sock *xs = xdp_sk(sk);
++ int ret;
+
+ if (unlikely(!(xs->dev->flags & IFF_UP)))
+ return -ENETDOWN;
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
+
+- return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
++ if (xs->zc)
++ return xsk_wakeup(xs, XDP_WAKEUP_TX);
++
++ /* Drop the RCU lock since the SKB path might sleep. */
++ rcu_read_unlock();
++ ret = xsk_generic_xmit(sk);
++ /* Reaquire RCU lock before going into common code. */
++ rcu_read_lock();
++
++ return ret;
+ }
+
+ static bool xsk_no_wakeup(struct sock *sk)
+@@ -634,7 +642,7 @@ static bool xsk_no_wakeup(struct sock *sk)
+ #endif
+ }
+
+-static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
++static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+ {
+ bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
+ struct sock *sk = sock->sk;
+@@ -649,16 +657,27 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+ if (sk_can_busy_loop(sk))
+ sk_busy_loop(sk, 1); /* only support non-blocking sockets */
+
+- if (xsk_no_wakeup(sk))
++ if (xs->zc && xsk_no_wakeup(sk))
+ return 0;
+
+ pool = xs->pool;
+ if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
+- return __xsk_sendmsg(sk);
++ return xsk_xmit(sk);
+ return 0;
+ }
+
+-static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
++static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
++{
++ int ret;
++
++ rcu_read_lock();
++ ret = __xsk_sendmsg(sock, m, total_len);
++ rcu_read_unlock();
++
++ return ret;
++}
++
++static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+ {
+ bool need_wait = !(flags & MSG_DONTWAIT);
+ struct sock *sk = sock->sk;
+@@ -684,6 +703,17 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl
+ return 0;
+ }
+
++static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
++{
++ int ret;
++
++ rcu_read_lock();
++ ret = __xsk_recvmsg(sock, m, len, flags);
++ rcu_read_unlock();
++
++ return ret;
++}
++
+ static __poll_t xsk_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait)
+ {
+@@ -694,8 +724,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
+
+ sock_poll_wait(file, sock, wait);
+
+- if (unlikely(!xsk_is_bound(xs)))
++ rcu_read_lock();
++ if (unlikely(!xsk_is_bound(xs))) {
++ rcu_read_unlock();
+ return mask;
++ }
+
+ pool = xs->pool;
+
+@@ -704,7 +737,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
+ xsk_wakeup(xs, pool->cached_need_wakeup);
+ else
+ /* Poll needs to drive Tx also in copy mode */
+- __xsk_sendmsg(sk);
++ xsk_xmit(sk);
+ }
+
+ if (xs->rx && !xskq_prod_is_empty(xs->rx))
+@@ -712,6 +745,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
+ if (xs->tx && xsk_tx_writeable(xs))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+
++ rcu_read_unlock();
+ return mask;
+ }
+
+@@ -743,7 +777,6 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
+
+ /* Wait for driver to stop using the xdp socket. */
+ xp_del_xsk(xs->pool, xs);
+- xs->dev = NULL;
+ synchronize_net();
+ dev_put(dev);
+ }
+@@ -860,6 +893,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+ struct sock *sk = sock->sk;
+ struct xdp_sock *xs = xdp_sk(sk);
+ struct net_device *dev;
++ int bound_dev_if;
+ u32 flags, qid;
+ int err = 0;
+
+@@ -873,6 +907,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+ XDP_USE_NEED_WAKEUP))
+ return -EINVAL;
+
++ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
++ if (bound_dev_if && bound_dev_if != sxdp->sxdp_ifindex)
++ return -EINVAL;
++
+ rtnl_lock();
+ mutex_lock(&xs->mutex);
+ if (xs->state != XSK_READY) {
+@@ -935,8 +973,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+ goto out_unlock;
+ }
+
+- err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
+- dev, qid);
++ err = xp_assign_dev_shared(xs->pool, umem_xs, dev,
++ qid);
+ if (err) {
+ xp_destroy(xs->pool);
+ xs->pool = NULL;
+@@ -954,6 +992,20 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+
+ xp_get_pool(umem_xs->pool);
+ xs->pool = umem_xs->pool;
++
++ /* If underlying shared umem was created without Tx
++ * ring, allocate Tx descs array that Tx batching API
++ * utilizes
++ */
++ if (xs->tx && !xs->pool->tx_descs) {
++ err = xp_alloc_tx_descs(xs->pool, xs);
++ if (err) {
++ xp_put_pool(xs->pool);
++ xs->pool = NULL;
++ sockfd_put(sock);
++ goto out_unlock;
++ }
++ }
+ }
+
+ xdp_get_umem(umem_xs->umem);
+diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
+index 8de01aaac4a08..2aa559f1c1856 100644
+--- a/net/xdp/xsk_buff_pool.c
++++ b/net/xdp/xsk_buff_pool.c
+@@ -37,10 +37,21 @@ void xp_destroy(struct xsk_buff_pool *pool)
+ if (!pool)
+ return;
+
++ kvfree(pool->tx_descs);
+ kvfree(pool->heads);
+ kvfree(pool);
+ }
+
++int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
++{
++ pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
++ GFP_KERNEL);
++ if (!pool->tx_descs)
++ return -ENOMEM;
++
++ return 0;
++}
++
+ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+ struct xdp_umem *umem)
+ {
+@@ -57,6 +68,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
+ if (!pool->heads)
+ goto out;
+
++ if (xs->tx)
++ if (xp_alloc_tx_descs(pool, xs))
++ goto out;
++
+ pool->chunk_mask = ~((u64)umem->chunk_size - 1);
+ pool->addrs_cnt = umem->size;
+ pool->heads_cnt = umem->chunks;
+@@ -191,17 +206,18 @@ err_unreg_pool:
+ return err;
+ }
+
+-int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
++int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
+ struct net_device *dev, u16 queue_id)
+ {
+ u16 flags;
++ struct xdp_umem *umem = umem_xs->umem;
+
+ /* One fill and completion ring required for each queue id. */
+ if (!pool->fq || !pool->cq)
+ return -EINVAL;
+
+ flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
+- if (pool->uses_need_wakeup)
++ if (umem_xs->pool->uses_need_wakeup)
+ flags |= XDP_USE_NEED_WAKEUP;
+
+ return xp_assign_dev(pool, dev, queue_id, flags);
+@@ -311,6 +327,7 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
+ for (i = 0; i < dma_map->dma_pages_cnt; i++) {
+ dma = &dma_map->dma_pages[i];
+ if (*dma) {
++ *dma &= ~XSK_NEXT_PG_CONTIG_MASK;
+ dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, attrs);
+ *dma = 0;
+diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
+index c014217f5fa7d..22b36c8143cfd 100644
+--- a/net/xdp/xsk_diag.c
++++ b/net/xdp/xsk_diag.c
+@@ -111,6 +111,9 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
+ sock_diag_save_cookie(sk, msg->xdiag_cookie);
+
+ mutex_lock(&xs->mutex);
++ if (READ_ONCE(xs->state) == XSK_UNBOUND)
++ goto out_nlmsg_trim;
++
+ if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
+ goto out_nlmsg_trim;
+
+diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
+index 9ae13cccfb28d..6b4df83aa28f6 100644
+--- a/net/xdp/xsk_queue.h
++++ b/net/xdp/xsk_queue.h
+@@ -157,6 +157,7 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
+ return false;
+
+ if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt ||
++ addr + desc->len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ return false;
+
+@@ -201,11 +202,16 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
+ return false;
+ }
+
+-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
+- struct xdp_desc *descs,
+- struct xsk_buff_pool *pool, u32 max)
++static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
++{
++ q->cached_cons += cnt;
++}
++
++static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
++ u32 max)
+ {
+ u32 cached_cons = q->cached_cons, nb_entries = 0;
++ struct xdp_desc *descs = pool->tx_descs;
+
+ while (cached_cons != q->cached_prod && nb_entries < max) {
+ struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
+@@ -222,6 +228,8 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
+ cached_cons++;
+ }
+
++ /* Release valid plus any invalid entries */
++ xskq_cons_release_n(q, cached_cons - q->cached_cons);
+ return nb_entries;
+ }
+
+@@ -278,14 +286,6 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
+ return xskq_cons_read_desc(q, desc, pool);
+ }
+
+-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs,
+- struct xsk_buff_pool *pool, u32 max)
+-{
+- u32 entries = xskq_cons_nb_entries(q, max);
+-
+- return xskq_cons_read_desc_batch(q, descs, pool, entries);
+-}
+-
+ /* To improve performance in the xskq_cons_release functions, only update local state here.
+ * Reflect this to global state when we get new entries from the ring in
+ * xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop.
+@@ -295,11 +295,6 @@ static inline void xskq_cons_release(struct xsk_queue *q)
+ q->cached_cons++;
+ }
+
+-static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
+-{
+- q->cached_cons += cnt;
+-}
+-
+ static inline bool xskq_cons_is_full(struct xsk_queue *q)
+ {
+ /* No barriers needed since data is not accessed */
+@@ -361,21 +356,17 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
+ return 0;
+ }
+
+-static inline u32 xskq_prod_reserve_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
+- u32 max)
++static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
++ u32 nb_entries)
+ {
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+- u32 nb_entries, i, cached_prod;
+-
+- nb_entries = xskq_prod_nb_free(q, max);
++ u32 i, cached_prod;
+
+ /* A, matches D */
+ cached_prod = q->cached_prod;
+ for (i = 0; i < nb_entries; i++)
+ ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr;
+ q->cached_prod = cached_prod;
+-
+- return nb_entries;
+ }
+
+ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
+diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
+index 494aa744bfb9a..08a2870fdd36f 100644
+--- a/net/xfrm/Makefile
++++ b/net/xfrm/Makefile
+@@ -3,6 +3,8 @@
+ # Makefile for the XFRM subsystem.
+ #
+
++xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o
++
+ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
+ xfrm_input.o xfrm_output.o \
+ xfrm_sysctl.o xfrm_replay.o xfrm_device.o
+diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
+index 1f08ebf7d80c5..24ca49ecebea3 100644
+--- a/net/xfrm/espintcp.c
++++ b/net/xfrm/espintcp.c
+@@ -170,7 +170,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
+ {
+ struct espintcp_ctx *ctx = espintcp_getctx(sk);
+
+- if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
++ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
+ return -ENOBUFS;
+
+ __skb_queue_tail(&ctx->out_queue, skb);
+diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
+index 2bf2693901631..655fe4ff86212 100644
+--- a/net/xfrm/xfrm_compat.c
++++ b/net/xfrm/xfrm_compat.c
+@@ -5,6 +5,7 @@
+ * Based on code and translator idea by: Florian Westphal <fw@strlen.de>
+ */
+ #include <linux/compat.h>
++#include <linux/nospec.h>
+ #include <linux/xfrm.h>
+ #include <net/xfrm.h>
+
+@@ -107,7 +108,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
+ [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
+ [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
+ [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
+- [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
++ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) },
+ [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
+ [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
+ [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
+@@ -127,6 +128,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
+ [XFRMA_SET_MARK] = { .type = NLA_U32 },
+ [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
+ [XFRMA_IF_ID] = { .type = NLA_U32 },
++ [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ };
+
+ static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
+@@ -274,9 +276,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
+ case XFRMA_SET_MARK:
+ case XFRMA_SET_MARK_MASK:
+ case XFRMA_IF_ID:
++ case XFRMA_MTIMER_THRESH:
+ return xfrm_nla_cpy(dst, src, nla_len(src));
+ default:
+- BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID);
++ BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ pr_warn_once("unsupported nla_type %d\n", src->nla_type);
+ return -EOPNOTSUPP;
+ }
+@@ -300,7 +303,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
+ nla_for_each_attr(nla, attrs, len, remaining) {
+ int err;
+
+- switch (type) {
++ switch (nlh_src->nlmsg_type) {
+ case XFRM_MSG_NEWSPDINFO:
+ err = xfrm_nla_cpy(dst, nla, nla_len(nla));
+ break;
+@@ -431,10 +434,11 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
+ int err;
+
+ if (type > XFRMA_MAX) {
+- BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID);
++ BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ NL_SET_ERR_MSG(extack, "Bad attribute");
+ return -EOPNOTSUPP;
+ }
++ type = array_index_nospec(type, XFRMA_MAX + 1);
+ if (nla_len(nla) < compat_policy[type].len) {
+ NL_SET_ERR_MSG(extack, "Attribute bad length");
+ return -EOPNOTSUPP;
+diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
+index e843b0d9e2a61..8b8e957a69c36 100644
+--- a/net/xfrm/xfrm_device.c
++++ b/net/xfrm/xfrm_device.c
+@@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb)
+ }
+ }
+
++static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb)
++{
++ struct xfrm_offload *xo = xfrm_offload(skb);
++ __u32 seq = xo->seq.low;
++
++ seq += skb_shinfo(skb)->gso_segs;
++ if (unlikely(seq < xo->seq.low))
++ return true;
++
++ return false;
++}
++
+ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
+ {
+ int err;
+@@ -134,7 +146,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
+ return skb;
+ }
+
+- if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) {
++ if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) ||
++ unlikely(xmit_xfrm_check_overflow(skb)))) {
+ struct sk_buff *segs;
+
+ /* Packet got rerouted, fixup features and segment it. */
+@@ -223,6 +236,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
+ if (x->encap || x->tfcpad)
+ return -EINVAL;
+
++ if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND))
++ return -EINVAL;
++
+ dev = dev_get_by_index(net, xuo->ifindex);
+ if (!dev) {
+ if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
+@@ -261,7 +277,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
+ xso->dev = dev;
+ xso->real_dev = dev;
+ xso->num_exthdrs = 1;
+- xso->flags = xuo->flags;
++ /* Don't forward bit that is not implemented */
++ xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6;
+
+ err = dev->xfrmdev_ops->xdo_dev_state_add(x);
+ if (err) {
+diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
+index 3df0861d4390f..a6861832710d9 100644
+--- a/net/xfrm/xfrm_input.c
++++ b/net/xfrm/xfrm_input.c
+@@ -24,7 +24,8 @@
+ #include "xfrm_inout.h"
+
+ struct xfrm_trans_tasklet {
+- struct tasklet_struct tasklet;
++ struct work_struct work;
++ spinlock_t queue_lock;
+ struct sk_buff_head queue;
+ };
+
+@@ -129,6 +130,7 @@ struct sec_path *secpath_set(struct sk_buff *skb)
+ memset(sp->ovec, 0, sizeof(sp->ovec));
+ sp->olen = 0;
+ sp->len = 0;
++ sp->verified_cnt = 0;
+
+ return sp;
+ }
+@@ -277,8 +279,7 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
+ goto out;
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
+- ipipv6_hdr(skb));
++ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipipv6_hdr(skb));
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ ipip6_ecn_decapsulate(skb);
+
+@@ -760,18 +761,22 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
+ }
+ EXPORT_SYMBOL(xfrm_input_resume);
+
+-static void xfrm_trans_reinject(struct tasklet_struct *t)
++static void xfrm_trans_reinject(struct work_struct *work)
+ {
+- struct xfrm_trans_tasklet *trans = from_tasklet(trans, t, tasklet);
++ struct xfrm_trans_tasklet *trans = container_of(work, struct xfrm_trans_tasklet, work);
+ struct sk_buff_head queue;
+ struct sk_buff *skb;
+
+ __skb_queue_head_init(&queue);
++ spin_lock_bh(&trans->queue_lock);
+ skb_queue_splice_init(&trans->queue, &queue);
++ spin_unlock_bh(&trans->queue_lock);
+
++ local_bh_disable();
+ while ((skb = __skb_dequeue(&queue)))
+ XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
+ NULL, skb);
++ local_bh_enable();
+ }
+
+ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
+@@ -782,15 +787,17 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
+
+ trans = this_cpu_ptr(&xfrm_trans_tasklet);
+
+- if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
++ if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
+ return -ENOBUFS;
+
+ BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
+
+ XFRM_TRANS_SKB_CB(skb)->finish = finish;
+ XFRM_TRANS_SKB_CB(skb)->net = net;
++ spin_lock_bh(&trans->queue_lock);
+ __skb_queue_tail(&trans->queue, skb);
+- tasklet_schedule(&trans->tasklet);
++ spin_unlock_bh(&trans->queue_lock);
++ schedule_work(&trans->work);
+ return 0;
+ }
+ EXPORT_SYMBOL(xfrm_trans_queue_net);
+@@ -817,7 +824,8 @@ void __init xfrm_input_init(void)
+ struct xfrm_trans_tasklet *trans;
+
+ trans = &per_cpu(xfrm_trans_tasklet, i);
++ spin_lock_init(&trans->queue_lock);
+ __skb_queue_head_init(&trans->queue);
+- tasklet_setup(&trans->tasklet, xfrm_trans_reinject);
++ INIT_WORK(&trans->work, xfrm_trans_reinject);
+ }
+ }
+diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
+deleted file mode 100644
+index 41de46b5ffa94..0000000000000
+--- a/net/xfrm/xfrm_interface.c
++++ /dev/null
+@@ -1,1019 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * XFRM virtual interface
+- *
+- * Copyright (C) 2018 secunet Security Networks AG
+- *
+- * Author:
+- * Steffen Klassert <steffen.klassert@secunet.com>
+- */
+-
+-#include <linux/module.h>
+-#include <linux/capability.h>
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/sockios.h>
+-#include <linux/icmp.h>
+-#include <linux/if.h>
+-#include <linux/in.h>
+-#include <linux/ip.h>
+-#include <linux/net.h>
+-#include <linux/in6.h>
+-#include <linux/netdevice.h>
+-#include <linux/if_link.h>
+-#include <linux/if_arp.h>
+-#include <linux/icmpv6.h>
+-#include <linux/init.h>
+-#include <linux/route.h>
+-#include <linux/rtnetlink.h>
+-#include <linux/netfilter_ipv6.h>
+-#include <linux/slab.h>
+-#include <linux/hash.h>
+-
+-#include <linux/uaccess.h>
+-#include <linux/atomic.h>
+-
+-#include <net/icmp.h>
+-#include <net/ip.h>
+-#include <net/ipv6.h>
+-#include <net/ip6_route.h>
+-#include <net/ip_tunnels.h>
+-#include <net/addrconf.h>
+-#include <net/xfrm.h>
+-#include <net/net_namespace.h>
+-#include <net/netns/generic.h>
+-#include <linux/etherdevice.h>
+-
+-static int xfrmi_dev_init(struct net_device *dev);
+-static void xfrmi_dev_setup(struct net_device *dev);
+-static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
+-static unsigned int xfrmi_net_id __read_mostly;
+-static const struct net_device_ops xfrmi_netdev_ops;
+-
+-#define XFRMI_HASH_BITS 8
+-#define XFRMI_HASH_SIZE BIT(XFRMI_HASH_BITS)
+-
+-struct xfrmi_net {
+- /* lists for storing interfaces in use */
+- struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
+-};
+-
+-#define for_each_xfrmi_rcu(start, xi) \
+- for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
+-
+-static u32 xfrmi_hash(u32 if_id)
+-{
+- return hash_32(if_id, XFRMI_HASH_BITS);
+-}
+-
+-static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
+-{
+- struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+- struct xfrm_if *xi;
+-
+- for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) {
+- if (x->if_id == xi->p.if_id &&
+- (xi->dev->flags & IFF_UP))
+- return xi;
+- }
+-
+- return NULL;
+-}
+-
+-static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
+- unsigned short family)
+-{
+- struct net_device *dev;
+- int ifindex = 0;
+-
+- if (!secpath_exists(skb) || !skb->dev)
+- return NULL;
+-
+- switch (family) {
+- case AF_INET6:
+- ifindex = inet6_sdif(skb);
+- break;
+- case AF_INET:
+- ifindex = inet_sdif(skb);
+- break;
+- }
+-
+- if (ifindex) {
+- struct net *net = xs_net(xfrm_input_state(skb));
+-
+- dev = dev_get_by_index_rcu(net, ifindex);
+- } else {
+- dev = skb->dev;
+- }
+-
+- if (!dev || !(dev->flags & IFF_UP))
+- return NULL;
+- if (dev->netdev_ops != &xfrmi_netdev_ops)
+- return NULL;
+-
+- return netdev_priv(dev);
+-}
+-
+-static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+-{
+- struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
+-
+- rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
+- rcu_assign_pointer(*xip, xi);
+-}
+-
+-static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+-{
+- struct xfrm_if __rcu **xip;
+- struct xfrm_if *iter;
+-
+- for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
+- (iter = rtnl_dereference(*xip)) != NULL;
+- xip = &iter->next) {
+- if (xi == iter) {
+- rcu_assign_pointer(*xip, xi->next);
+- break;
+- }
+- }
+-}
+-
+-static void xfrmi_dev_free(struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+-
+- gro_cells_destroy(&xi->gro_cells);
+- free_percpu(dev->tstats);
+-}
+-
+-static int xfrmi_create(struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+- struct net *net = dev_net(dev);
+- struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+- int err;
+-
+- dev->rtnl_link_ops = &xfrmi_link_ops;
+- err = register_netdevice(dev);
+- if (err < 0)
+- goto out;
+-
+- xfrmi_link(xfrmn, xi);
+-
+- return 0;
+-
+-out:
+- return err;
+-}
+-
+-static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
+-{
+- struct xfrm_if __rcu **xip;
+- struct xfrm_if *xi;
+- struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+-
+- for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)];
+- (xi = rtnl_dereference(*xip)) != NULL;
+- xip = &xi->next)
+- if (xi->p.if_id == p->if_id)
+- return xi;
+-
+- return NULL;
+-}
+-
+-static void xfrmi_dev_uninit(struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+- struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
+-
+- xfrmi_unlink(xfrmn, xi);
+-}
+-
+-static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
+-{
+- skb->tstamp = 0;
+- skb->pkt_type = PACKET_HOST;
+- skb->skb_iif = 0;
+- skb->ignore_df = 0;
+- skb_dst_drop(skb);
+- nf_reset_ct(skb);
+- nf_reset_trace(skb);
+-
+- if (!xnet)
+- return;
+-
+- ipvs_reset(skb);
+- secpath_reset(skb);
+- skb_orphan(skb);
+- skb->mark = 0;
+-}
+-
+-static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
+-{
+- const struct xfrm_mode *inner_mode;
+- struct net_device *dev;
+- struct xfrm_state *x;
+- struct xfrm_if *xi;
+- bool xnet;
+-
+- if (err && !secpath_exists(skb))
+- return 0;
+-
+- x = xfrm_input_state(skb);
+-
+- xi = xfrmi_lookup(xs_net(x), x);
+- if (!xi)
+- return 1;
+-
+- dev = xi->dev;
+- skb->dev = dev;
+-
+- if (err) {
+- dev->stats.rx_errors++;
+- dev->stats.rx_dropped++;
+-
+- return 0;
+- }
+-
+- xnet = !net_eq(xi->net, dev_net(skb->dev));
+-
+- if (xnet) {
+- inner_mode = &x->inner_mode;
+-
+- if (x->sel.family == AF_UNSPEC) {
+- inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+- if (inner_mode == NULL) {
+- XFRM_INC_STATS(dev_net(skb->dev),
+- LINUX_MIB_XFRMINSTATEMODEERROR);
+- return -EINVAL;
+- }
+- }
+-
+- if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
+- inner_mode->family))
+- return -EPERM;
+- }
+-
+- xfrmi_scrub_packet(skb, xnet);
+- dev_sw_netstats_rx_add(dev, skb->len);
+-
+- return 0;
+-}
+-
+-static int
+-xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+- struct net_device_stats *stats = &xi->dev->stats;
+- struct dst_entry *dst = skb_dst(skb);
+- unsigned int length = skb->len;
+- struct net_device *tdev;
+- struct xfrm_state *x;
+- int err = -1;
+- int mtu;
+-
+- dst_hold(dst);
+- dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
+- if (IS_ERR(dst)) {
+- err = PTR_ERR(dst);
+- dst = NULL;
+- goto tx_err_link_failure;
+- }
+-
+- x = dst->xfrm;
+- if (!x)
+- goto tx_err_link_failure;
+-
+- if (x->if_id != xi->p.if_id)
+- goto tx_err_link_failure;
+-
+- tdev = dst->dev;
+-
+- if (tdev == dev) {
+- stats->collisions++;
+- net_warn_ratelimited("%s: Local routing loop detected!\n",
+- dev->name);
+- goto tx_err_dst_release;
+- }
+-
+- mtu = dst_mtu(dst);
+- if ((!skb_is_gso(skb) && skb->len > mtu) ||
+- (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) {
+- skb_dst_update_pmtu_no_confirm(skb, mtu);
+-
+- if (skb->protocol == htons(ETH_P_IPV6)) {
+- if (mtu < IPV6_MIN_MTU)
+- mtu = IPV6_MIN_MTU;
+-
+- icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+- } else {
+- if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
+- goto xmit;
+- icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+- htonl(mtu));
+- }
+-
+- dst_release(dst);
+- return -EMSGSIZE;
+- }
+-
+-xmit:
+- xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
+- skb_dst_set(skb, dst);
+- skb->dev = tdev;
+-
+- err = dst_output(xi->net, skb->sk, skb);
+- if (net_xmit_eval(err) == 0) {
+- dev_sw_netstats_tx_add(dev, 1, length);
+- } else {
+- stats->tx_errors++;
+- stats->tx_aborted_errors++;
+- }
+-
+- return 0;
+-tx_err_link_failure:
+- stats->tx_carrier_errors++;
+- dst_link_failure(skb);
+-tx_err_dst_release:
+- dst_release(dst);
+- return err;
+-}
+-
+-static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+- struct net_device_stats *stats = &xi->dev->stats;
+- struct dst_entry *dst = skb_dst(skb);
+- struct flowi fl;
+- int ret;
+-
+- memset(&fl, 0, sizeof(fl));
+-
+- switch (skb->protocol) {
+- case htons(ETH_P_IPV6):
+- xfrm_decode_session(skb, &fl, AF_INET6);
+- memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+- if (!dst) {
+- fl.u.ip6.flowi6_oif = dev->ifindex;
+- fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+- dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
+- if (dst->error) {
+- dst_release(dst);
+- stats->tx_carrier_errors++;
+- goto tx_err;
+- }
+- skb_dst_set(skb, dst);
+- }
+- break;
+- case htons(ETH_P_IP):
+- xfrm_decode_session(skb, &fl, AF_INET);
+- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+- if (!dst) {
+- struct rtable *rt;
+-
+- fl.u.ip4.flowi4_oif = dev->ifindex;
+- fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+- rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
+- if (IS_ERR(rt)) {
+- stats->tx_carrier_errors++;
+- goto tx_err;
+- }
+- skb_dst_set(skb, &rt->dst);
+- }
+- break;
+- default:
+- goto tx_err;
+- }
+-
+- fl.flowi_oif = xi->p.link;
+-
+- ret = xfrmi_xmit2(skb, dev, &fl);
+- if (ret < 0)
+- goto tx_err;
+-
+- return NETDEV_TX_OK;
+-
+-tx_err:
+- stats->tx_errors++;
+- stats->tx_dropped++;
+- kfree_skb(skb);
+- return NETDEV_TX_OK;
+-}
+-
+-static int xfrmi4_err(struct sk_buff *skb, u32 info)
+-{
+- const struct iphdr *iph = (const struct iphdr *)skb->data;
+- struct net *net = dev_net(skb->dev);
+- int protocol = iph->protocol;
+- struct ip_comp_hdr *ipch;
+- struct ip_esp_hdr *esph;
+- struct ip_auth_hdr *ah ;
+- struct xfrm_state *x;
+- struct xfrm_if *xi;
+- __be32 spi;
+-
+- switch (protocol) {
+- case IPPROTO_ESP:
+- esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+- spi = esph->spi;
+- break;
+- case IPPROTO_AH:
+- ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+- spi = ah->spi;
+- break;
+- case IPPROTO_COMP:
+- ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+- spi = htonl(ntohs(ipch->cpi));
+- break;
+- default:
+- return 0;
+- }
+-
+- switch (icmp_hdr(skb)->type) {
+- case ICMP_DEST_UNREACH:
+- if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+- return 0;
+- break;
+- case ICMP_REDIRECT:
+- break;
+- default:
+- return 0;
+- }
+-
+- x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+- spi, protocol, AF_INET);
+- if (!x)
+- return 0;
+-
+- xi = xfrmi_lookup(net, x);
+- if (!xi) {
+- xfrm_state_put(x);
+- return -1;
+- }
+-
+- if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+- ipv4_update_pmtu(skb, net, info, 0, protocol);
+- else
+- ipv4_redirect(skb, net, 0, protocol);
+- xfrm_state_put(x);
+-
+- return 0;
+-}
+-
+-static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+- u8 type, u8 code, int offset, __be32 info)
+-{
+- const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+- struct net *net = dev_net(skb->dev);
+- int protocol = iph->nexthdr;
+- struct ip_comp_hdr *ipch;
+- struct ip_esp_hdr *esph;
+- struct ip_auth_hdr *ah;
+- struct xfrm_state *x;
+- struct xfrm_if *xi;
+- __be32 spi;
+-
+- switch (protocol) {
+- case IPPROTO_ESP:
+- esph = (struct ip_esp_hdr *)(skb->data + offset);
+- spi = esph->spi;
+- break;
+- case IPPROTO_AH:
+- ah = (struct ip_auth_hdr *)(skb->data + offset);
+- spi = ah->spi;
+- break;
+- case IPPROTO_COMP:
+- ipch = (struct ip_comp_hdr *)(skb->data + offset);
+- spi = htonl(ntohs(ipch->cpi));
+- break;
+- default:
+- return 0;
+- }
+-
+- if (type != ICMPV6_PKT_TOOBIG &&
+- type != NDISC_REDIRECT)
+- return 0;
+-
+- x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+- spi, protocol, AF_INET6);
+- if (!x)
+- return 0;
+-
+- xi = xfrmi_lookup(net, x);
+- if (!xi) {
+- xfrm_state_put(x);
+- return -1;
+- }
+-
+- if (type == NDISC_REDIRECT)
+- ip6_redirect(skb, net, skb->dev->ifindex, 0,
+- sock_net_uid(net, NULL));
+- else
+- ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+- xfrm_state_put(x);
+-
+- return 0;
+-}
+-
+-static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
+-{
+- if (xi->p.link != p->link)
+- return -EINVAL;
+-
+- xi->p.if_id = p->if_id;
+-
+- return 0;
+-}
+-
+-static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
+-{
+- struct net *net = xi->net;
+- struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+- int err;
+-
+- xfrmi_unlink(xfrmn, xi);
+- synchronize_net();
+- err = xfrmi_change(xi, p);
+- xfrmi_link(xfrmn, xi);
+- netdev_state_change(xi->dev);
+- return err;
+-}
+-
+-static int xfrmi_get_iflink(const struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+-
+- return xi->p.link;
+-}
+-
+-static const struct net_device_ops xfrmi_netdev_ops = {
+- .ndo_init = xfrmi_dev_init,
+- .ndo_uninit = xfrmi_dev_uninit,
+- .ndo_start_xmit = xfrmi_xmit,
+- .ndo_get_stats64 = dev_get_tstats64,
+- .ndo_get_iflink = xfrmi_get_iflink,
+-};
+-
+-static void xfrmi_dev_setup(struct net_device *dev)
+-{
+- dev->netdev_ops = &xfrmi_netdev_ops;
+- dev->header_ops = &ip_tunnel_header_ops;
+- dev->type = ARPHRD_NONE;
+- dev->mtu = ETH_DATA_LEN;
+- dev->min_mtu = ETH_MIN_MTU;
+- dev->max_mtu = IP_MAX_MTU;
+- dev->flags = IFF_NOARP;
+- dev->needs_free_netdev = true;
+- dev->priv_destructor = xfrmi_dev_free;
+- netif_keep_dst(dev);
+-
+- eth_broadcast_addr(dev->broadcast);
+-}
+-
+-#define XFRMI_FEATURES (NETIF_F_SG | \
+- NETIF_F_FRAGLIST | \
+- NETIF_F_GSO_SOFTWARE | \
+- NETIF_F_HW_CSUM)
+-
+-static int xfrmi_dev_init(struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+- struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
+- int err;
+-
+- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+- if (!dev->tstats)
+- return -ENOMEM;
+-
+- err = gro_cells_init(&xi->gro_cells, dev);
+- if (err) {
+- free_percpu(dev->tstats);
+- return err;
+- }
+-
+- dev->features |= NETIF_F_LLTX;
+- dev->features |= XFRMI_FEATURES;
+- dev->hw_features |= XFRMI_FEATURES;
+-
+- if (phydev) {
+- dev->needed_headroom = phydev->needed_headroom;
+- dev->needed_tailroom = phydev->needed_tailroom;
+-
+- if (is_zero_ether_addr(dev->dev_addr))
+- eth_hw_addr_inherit(dev, phydev);
+- if (is_zero_ether_addr(dev->broadcast))
+- memcpy(dev->broadcast, phydev->broadcast,
+- dev->addr_len);
+- } else {
+- eth_hw_addr_random(dev);
+- eth_broadcast_addr(dev->broadcast);
+- }
+-
+- return 0;
+-}
+-
+-static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
+- struct netlink_ext_ack *extack)
+-{
+- return 0;
+-}
+-
+-static void xfrmi_netlink_parms(struct nlattr *data[],
+- struct xfrm_if_parms *parms)
+-{
+- memset(parms, 0, sizeof(*parms));
+-
+- if (!data)
+- return;
+-
+- if (data[IFLA_XFRM_LINK])
+- parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
+-
+- if (data[IFLA_XFRM_IF_ID])
+- parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
+-}
+-
+-static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
+- struct nlattr *tb[], struct nlattr *data[],
+- struct netlink_ext_ack *extack)
+-{
+- struct net *net = dev_net(dev);
+- struct xfrm_if_parms p;
+- struct xfrm_if *xi;
+- int err;
+-
+- xfrmi_netlink_parms(data, &p);
+- xi = xfrmi_locate(net, &p);
+- if (xi)
+- return -EEXIST;
+-
+- xi = netdev_priv(dev);
+- xi->p = p;
+- xi->net = net;
+- xi->dev = dev;
+-
+- err = xfrmi_create(dev);
+- return err;
+-}
+-
+-static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
+-{
+- unregister_netdevice_queue(dev, head);
+-}
+-
+-static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
+- struct nlattr *data[],
+- struct netlink_ext_ack *extack)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+- struct net *net = xi->net;
+- struct xfrm_if_parms p;
+-
+- xfrmi_netlink_parms(data, &p);
+- xi = xfrmi_locate(net, &p);
+- if (!xi) {
+- xi = netdev_priv(dev);
+- } else {
+- if (xi->dev != dev)
+- return -EEXIST;
+- }
+-
+- return xfrmi_update(xi, &p);
+-}
+-
+-static size_t xfrmi_get_size(const struct net_device *dev)
+-{
+- return
+- /* IFLA_XFRM_LINK */
+- nla_total_size(4) +
+- /* IFLA_XFRM_IF_ID */
+- nla_total_size(4) +
+- 0;
+-}
+-
+-static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+- struct xfrm_if_parms *parm = &xi->p;
+-
+- if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
+- nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
+- goto nla_put_failure;
+- return 0;
+-
+-nla_put_failure:
+- return -EMSGSIZE;
+-}
+-
+-static struct net *xfrmi_get_link_net(const struct net_device *dev)
+-{
+- struct xfrm_if *xi = netdev_priv(dev);
+-
+- return xi->net;
+-}
+-
+-static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
+- [IFLA_XFRM_LINK] = { .type = NLA_U32 },
+- [IFLA_XFRM_IF_ID] = { .type = NLA_U32 },
+-};
+-
+-static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
+- .kind = "xfrm",
+- .maxtype = IFLA_XFRM_MAX,
+- .policy = xfrmi_policy,
+- .priv_size = sizeof(struct xfrm_if),
+- .setup = xfrmi_dev_setup,
+- .validate = xfrmi_validate,
+- .newlink = xfrmi_newlink,
+- .dellink = xfrmi_dellink,
+- .changelink = xfrmi_changelink,
+- .get_size = xfrmi_get_size,
+- .fill_info = xfrmi_fill_info,
+- .get_link_net = xfrmi_get_link_net,
+-};
+-
+-static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
+-{
+- struct net *net;
+- LIST_HEAD(list);
+-
+- rtnl_lock();
+- list_for_each_entry(net, net_exit_list, exit_list) {
+- struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+- struct xfrm_if __rcu **xip;
+- struct xfrm_if *xi;
+- int i;
+-
+- for (i = 0; i < XFRMI_HASH_SIZE; i++) {
+- for (xip = &xfrmn->xfrmi[i];
+- (xi = rtnl_dereference(*xip)) != NULL;
+- xip = &xi->next)
+- unregister_netdevice_queue(xi->dev, &list);
+- }
+- }
+- unregister_netdevice_many(&list);
+- rtnl_unlock();
+-}
+-
+-static struct pernet_operations xfrmi_net_ops = {
+- .exit_batch = xfrmi_exit_batch_net,
+- .id = &xfrmi_net_id,
+- .size = sizeof(struct xfrmi_net),
+-};
+-
+-static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
+- .handler = xfrm6_rcv,
+- .input_handler = xfrm_input,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi6_err,
+- .priority = 10,
+-};
+-
+-static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
+- .handler = xfrm6_rcv,
+- .input_handler = xfrm_input,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi6_err,
+- .priority = 10,
+-};
+-
+-static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
+- .handler = xfrm6_rcv,
+- .input_handler = xfrm_input,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi6_err,
+- .priority = 10,
+-};
+-
+-#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+-static int xfrmi6_rcv_tunnel(struct sk_buff *skb)
+-{
+- const xfrm_address_t *saddr;
+- __be32 spi;
+-
+- saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr;
+- spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr);
+-
+- return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
+-}
+-
+-static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = {
+- .handler = xfrmi6_rcv_tunnel,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi6_err,
+- .priority = 2,
+-};
+-
+-static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
+- .handler = xfrmi6_rcv_tunnel,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi6_err,
+- .priority = 2,
+-};
+-#endif
+-
+-static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
+- .handler = xfrm4_rcv,
+- .input_handler = xfrm_input,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi4_err,
+- .priority = 10,
+-};
+-
+-static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
+- .handler = xfrm4_rcv,
+- .input_handler = xfrm_input,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi4_err,
+- .priority = 10,
+-};
+-
+-static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
+- .handler = xfrm4_rcv,
+- .input_handler = xfrm_input,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi4_err,
+- .priority = 10,
+-};
+-
+-#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+-static int xfrmi4_rcv_tunnel(struct sk_buff *skb)
+-{
+- return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
+-}
+-
+-static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = {
+- .handler = xfrmi4_rcv_tunnel,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi4_err,
+- .priority = 3,
+-};
+-
+-static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = {
+- .handler = xfrmi4_rcv_tunnel,
+- .cb_handler = xfrmi_rcv_cb,
+- .err_handler = xfrmi4_err,
+- .priority = 2,
+-};
+-#endif
+-
+-static int __init xfrmi4_init(void)
+-{
+- int err;
+-
+- err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
+- if (err < 0)
+- goto xfrm_proto_esp_failed;
+- err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
+- if (err < 0)
+- goto xfrm_proto_ah_failed;
+- err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+- if (err < 0)
+- goto xfrm_proto_comp_failed;
+-#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+- err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET);
+- if (err < 0)
+- goto xfrm_tunnel_ipip_failed;
+- err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6);
+- if (err < 0)
+- goto xfrm_tunnel_ipip6_failed;
+-#endif
+-
+- return 0;
+-
+-#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+-xfrm_tunnel_ipip6_failed:
+- xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
+-xfrm_tunnel_ipip_failed:
+- xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+-#endif
+-xfrm_proto_comp_failed:
+- xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+-xfrm_proto_ah_failed:
+- xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+-xfrm_proto_esp_failed:
+- return err;
+-}
+-
+-static void xfrmi4_fini(void)
+-{
+-#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+- xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6);
+- xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
+-#endif
+- xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+- xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+- xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+-}
+-
+-static int __init xfrmi6_init(void)
+-{
+- int err;
+-
+- err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
+- if (err < 0)
+- goto xfrm_proto_esp_failed;
+- err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
+- if (err < 0)
+- goto xfrm_proto_ah_failed;
+- err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+- if (err < 0)
+- goto xfrm_proto_comp_failed;
+-#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+- err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6);
+- if (err < 0)
+- goto xfrm_tunnel_ipv6_failed;
+- err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET);
+- if (err < 0)
+- goto xfrm_tunnel_ip6ip_failed;
+-#endif
+-
+- return 0;
+-
+-#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+-xfrm_tunnel_ip6ip_failed:
+- xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
+-xfrm_tunnel_ipv6_failed:
+- xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+-#endif
+-xfrm_proto_comp_failed:
+- xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+-xfrm_proto_ah_failed:
+- xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+-xfrm_proto_esp_failed:
+- return err;
+-}
+-
+-static void xfrmi6_fini(void)
+-{
+-#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+- xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET);
+- xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
+-#endif
+- xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+- xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+- xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+-}
+-
+-static const struct xfrm_if_cb xfrm_if_cb = {
+- .decode_session = xfrmi_decode_session,
+-};
+-
+-static int __init xfrmi_init(void)
+-{
+- const char *msg;
+- int err;
+-
+- pr_info("IPsec XFRM device driver\n");
+-
+- msg = "tunnel device";
+- err = register_pernet_device(&xfrmi_net_ops);
+- if (err < 0)
+- goto pernet_dev_failed;
+-
+- msg = "xfrm4 protocols";
+- err = xfrmi4_init();
+- if (err < 0)
+- goto xfrmi4_failed;
+-
+- msg = "xfrm6 protocols";
+- err = xfrmi6_init();
+- if (err < 0)
+- goto xfrmi6_failed;
+-
+-
+- msg = "netlink interface";
+- err = rtnl_link_register(&xfrmi_link_ops);
+- if (err < 0)
+- goto rtnl_link_failed;
+-
+- xfrm_if_register_cb(&xfrm_if_cb);
+-
+- return err;
+-
+-rtnl_link_failed:
+- xfrmi6_fini();
+-xfrmi6_failed:
+- xfrmi4_fini();
+-xfrmi4_failed:
+- unregister_pernet_device(&xfrmi_net_ops);
+-pernet_dev_failed:
+- pr_err("xfrmi init: failed to register %s\n", msg);
+- return err;
+-}
+-
+-static void __exit xfrmi_fini(void)
+-{
+- xfrm_if_unregister_cb();
+- rtnl_link_unregister(&xfrmi_link_ops);
+- xfrmi4_fini();
+- xfrmi6_fini();
+- unregister_pernet_device(&xfrmi_net_ops);
+-}
+-
+-module_init(xfrmi_init);
+-module_exit(xfrmi_fini);
+-MODULE_LICENSE("GPL");
+-MODULE_ALIAS_RTNL_LINK("xfrm");
+-MODULE_ALIAS_NETDEV("xfrm0");
+-MODULE_AUTHOR("Steffen Klassert");
+-MODULE_DESCRIPTION("XFRM virtual interface");
+diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
+new file mode 100644
+index 0000000000000..ded752e33dacd
+--- /dev/null
++++ b/net/xfrm/xfrm_interface_core.c
+@@ -0,0 +1,1078 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * XFRM virtual interface
++ *
++ * Copyright (C) 2018 secunet Security Networks AG
++ *
++ * Author:
++ * Steffen Klassert <steffen.klassert@secunet.com>
++ */
++
++#include <linux/module.h>
++#include <linux/capability.h>
++#include <linux/errno.h>
++#include <linux/types.h>
++#include <linux/sockios.h>
++#include <linux/icmp.h>
++#include <linux/if.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/net.h>
++#include <linux/in6.h>
++#include <linux/netdevice.h>
++#include <linux/if_link.h>
++#include <linux/if_arp.h>
++#include <linux/icmpv6.h>
++#include <linux/init.h>
++#include <linux/route.h>
++#include <linux/rtnetlink.h>
++#include <linux/netfilter_ipv6.h>
++#include <linux/slab.h>
++#include <linux/hash.h>
++
++#include <linux/uaccess.h>
++#include <linux/atomic.h>
++
++#include <net/icmp.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++#include <net/ip6_route.h>
++#include <net/ip_tunnels.h>
++#include <net/addrconf.h>
++#include <net/xfrm.h>
++#include <net/net_namespace.h>
++#include <net/netns/generic.h>
++#include <linux/etherdevice.h>
++
++static int xfrmi_dev_init(struct net_device *dev);
++static void xfrmi_dev_setup(struct net_device *dev);
++static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
++static unsigned int xfrmi_net_id __read_mostly;
++static const struct net_device_ops xfrmi_netdev_ops;
++
++#define XFRMI_HASH_BITS 8
++#define XFRMI_HASH_SIZE BIT(XFRMI_HASH_BITS)
++
++struct xfrmi_net {
++ /* lists for storing interfaces in use */
++ struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
++};
++
++#define for_each_xfrmi_rcu(start, xi) \
++ for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
++
++static u32 xfrmi_hash(u32 if_id)
++{
++ return hash_32(if_id, XFRMI_HASH_BITS);
++}
++
++static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
++{
++ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
++ struct xfrm_if *xi;
++
++ for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) {
++ if (x->if_id == xi->p.if_id &&
++ (xi->dev->flags & IFF_UP))
++ return xi;
++ }
++
++ return NULL;
++}
++
++static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
++ unsigned short family)
++{
++ struct net_device *dev;
++ int ifindex = 0;
++
++ if (!secpath_exists(skb) || !skb->dev)
++ return NULL;
++
++ switch (family) {
++ case AF_INET6:
++ ifindex = inet6_sdif(skb);
++ break;
++ case AF_INET:
++ ifindex = inet_sdif(skb);
++ break;
++ }
++
++ if (ifindex) {
++ struct net *net = xs_net(xfrm_input_state(skb));
++
++ dev = dev_get_by_index_rcu(net, ifindex);
++ } else {
++ dev = skb->dev;
++ }
++
++ if (!dev || !(dev->flags & IFF_UP))
++ return NULL;
++ if (dev->netdev_ops != &xfrmi_netdev_ops)
++ return NULL;
++
++ return netdev_priv(dev);
++}
++
++static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
++{
++ struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
++
++ rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
++ rcu_assign_pointer(*xip, xi);
++}
++
++static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
++{
++ struct xfrm_if __rcu **xip;
++ struct xfrm_if *iter;
++
++ for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
++ (iter = rtnl_dereference(*xip)) != NULL;
++ xip = &iter->next) {
++ if (xi == iter) {
++ rcu_assign_pointer(*xip, xi->next);
++ break;
++ }
++ }
++}
++
++static void xfrmi_dev_free(struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++
++ gro_cells_destroy(&xi->gro_cells);
++ free_percpu(dev->tstats);
++}
++
++static int xfrmi_create(struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++ struct net *net = dev_net(dev);
++ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
++ int err;
++
++ dev->rtnl_link_ops = &xfrmi_link_ops;
++ err = register_netdevice(dev);
++ if (err < 0)
++ goto out;
++
++ xfrmi_link(xfrmn, xi);
++
++ return 0;
++
++out:
++ return err;
++}
++
++static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
++{
++ struct xfrm_if __rcu **xip;
++ struct xfrm_if *xi;
++ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
++
++ for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)];
++ (xi = rtnl_dereference(*xip)) != NULL;
++ xip = &xi->next)
++ if (xi->p.if_id == p->if_id)
++ return xi;
++
++ return NULL;
++}
++
++static void xfrmi_dev_uninit(struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++ struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
++
++ xfrmi_unlink(xfrmn, xi);
++}
++
++static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
++{
++ skb->tstamp = 0;
++ skb->pkt_type = PACKET_HOST;
++ skb->skb_iif = 0;
++ skb->ignore_df = 0;
++ skb_dst_drop(skb);
++ nf_reset_ct(skb);
++ nf_reset_trace(skb);
++
++ if (!xnet)
++ return;
++
++ ipvs_reset(skb);
++ secpath_reset(skb);
++ skb_orphan(skb);
++ skb->mark = 0;
++}
++
++static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
++ int encap_type, unsigned short family)
++{
++ struct sec_path *sp;
++
++ sp = skb_sec_path(skb);
++ if (sp && (sp->len || sp->olen) &&
++ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
++ goto discard;
++
++ XFRM_SPI_SKB_CB(skb)->family = family;
++ if (family == AF_INET) {
++ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
++ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
++ } else {
++ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
++ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
++ }
++
++ return xfrm_input(skb, nexthdr, spi, encap_type);
++discard:
++ kfree_skb(skb);
++ return 0;
++}
++
++static int xfrmi4_rcv(struct sk_buff *skb)
++{
++ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
++}
++
++static int xfrmi6_rcv(struct sk_buff *skb)
++{
++ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
++ 0, 0, AF_INET6);
++}
++
++static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
++{
++ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
++}
++
++static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
++{
++ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
++}
++
++static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
++{
++ const struct xfrm_mode *inner_mode;
++ struct net_device *dev;
++ struct xfrm_state *x;
++ struct xfrm_if *xi;
++ bool xnet;
++
++ if (err && !secpath_exists(skb))
++ return 0;
++
++ x = xfrm_input_state(skb);
++
++ xi = xfrmi_lookup(xs_net(x), x);
++ if (!xi)
++ return 1;
++
++ dev = xi->dev;
++ skb->dev = dev;
++
++ if (err) {
++ dev->stats.rx_errors++;
++ dev->stats.rx_dropped++;
++
++ return 0;
++ }
++
++ xnet = !net_eq(xi->net, dev_net(skb->dev));
++
++ if (xnet) {
++ inner_mode = &x->inner_mode;
++
++ if (x->sel.family == AF_UNSPEC) {
++ inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
++ if (inner_mode == NULL) {
++ XFRM_INC_STATS(dev_net(skb->dev),
++ LINUX_MIB_XFRMINSTATEMODEERROR);
++ return -EINVAL;
++ }
++ }
++
++ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
++ inner_mode->family))
++ return -EPERM;
++ }
++
++ xfrmi_scrub_packet(skb, xnet);
++ dev_sw_netstats_rx_add(dev, skb->len);
++
++ return 0;
++}
++
++static int
++xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++ struct net_device_stats *stats = &xi->dev->stats;
++ struct dst_entry *dst = skb_dst(skb);
++ unsigned int length = skb->len;
++ struct net_device *tdev;
++ struct xfrm_state *x;
++ int err = -1;
++ int mtu;
++
++ dst_hold(dst);
++ dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
++ if (IS_ERR(dst)) {
++ err = PTR_ERR(dst);
++ dst = NULL;
++ goto tx_err_link_failure;
++ }
++
++ x = dst->xfrm;
++ if (!x)
++ goto tx_err_link_failure;
++
++ if (x->if_id != xi->p.if_id)
++ goto tx_err_link_failure;
++
++ tdev = dst->dev;
++
++ if (tdev == dev) {
++ stats->collisions++;
++ net_warn_ratelimited("%s: Local routing loop detected!\n",
++ dev->name);
++ goto tx_err_dst_release;
++ }
++
++ mtu = dst_mtu(dst);
++ if ((!skb_is_gso(skb) && skb->len > mtu) ||
++ (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) {
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
++
++ if (skb->protocol == htons(ETH_P_IPV6)) {
++ if (mtu < IPV6_MIN_MTU)
++ mtu = IPV6_MIN_MTU;
++
++ if (skb->len > 1280)
++ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
++ else
++ goto xmit;
++ } else {
++ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
++ goto xmit;
++ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
++ htonl(mtu));
++ }
++
++ dst_release(dst);
++ return -EMSGSIZE;
++ }
++
++xmit:
++ xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
++ skb_dst_set(skb, dst);
++ skb->dev = tdev;
++
++ err = dst_output(xi->net, skb->sk, skb);
++ if (net_xmit_eval(err) == 0) {
++ dev_sw_netstats_tx_add(dev, 1, length);
++ } else {
++ stats->tx_errors++;
++ stats->tx_aborted_errors++;
++ }
++
++ return 0;
++tx_err_link_failure:
++ stats->tx_carrier_errors++;
++ dst_link_failure(skb);
++tx_err_dst_release:
++ dst_release(dst);
++ return err;
++}
++
++static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++ struct net_device_stats *stats = &xi->dev->stats;
++ struct dst_entry *dst = skb_dst(skb);
++ struct flowi fl;
++ int ret;
++
++ memset(&fl, 0, sizeof(fl));
++
++ switch (skb->protocol) {
++ case htons(ETH_P_IPV6):
++ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
++ xfrm_decode_session(skb, &fl, AF_INET6);
++ if (!dst) {
++ fl.u.ip6.flowi6_oif = dev->ifindex;
++ fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
++ dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
++ if (dst->error) {
++ dst_release(dst);
++ stats->tx_carrier_errors++;
++ goto tx_err;
++ }
++ skb_dst_set(skb, dst);
++ }
++ break;
++ case htons(ETH_P_IP):
++ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
++ xfrm_decode_session(skb, &fl, AF_INET);
++ if (!dst) {
++ struct rtable *rt;
++
++ fl.u.ip4.flowi4_oif = dev->ifindex;
++ fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
++ rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
++ if (IS_ERR(rt)) {
++ stats->tx_carrier_errors++;
++ goto tx_err;
++ }
++ skb_dst_set(skb, &rt->dst);
++ }
++ break;
++ default:
++ goto tx_err;
++ }
++
++ fl.flowi_oif = xi->p.link;
++
++ ret = xfrmi_xmit2(skb, dev, &fl);
++ if (ret < 0)
++ goto tx_err;
++
++ return NETDEV_TX_OK;
++
++tx_err:
++ stats->tx_errors++;
++ stats->tx_dropped++;
++ kfree_skb(skb);
++ return NETDEV_TX_OK;
++}
++
++static int xfrmi4_err(struct sk_buff *skb, u32 info)
++{
++ const struct iphdr *iph = (const struct iphdr *)skb->data;
++ struct net *net = dev_net(skb->dev);
++ int protocol = iph->protocol;
++ struct ip_comp_hdr *ipch;
++ struct ip_esp_hdr *esph;
++ struct ip_auth_hdr *ah ;
++ struct xfrm_state *x;
++ struct xfrm_if *xi;
++ __be32 spi;
++
++ switch (protocol) {
++ case IPPROTO_ESP:
++ esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
++ spi = esph->spi;
++ break;
++ case IPPROTO_AH:
++ ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
++ spi = ah->spi;
++ break;
++ case IPPROTO_COMP:
++ ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
++ spi = htonl(ntohs(ipch->cpi));
++ break;
++ default:
++ return 0;
++ }
++
++ switch (icmp_hdr(skb)->type) {
++ case ICMP_DEST_UNREACH:
++ if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
++ return 0;
++ break;
++ case ICMP_REDIRECT:
++ break;
++ default:
++ return 0;
++ }
++
++ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
++ spi, protocol, AF_INET);
++ if (!x)
++ return 0;
++
++ xi = xfrmi_lookup(net, x);
++ if (!xi) {
++ xfrm_state_put(x);
++ return -1;
++ }
++
++ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
++ ipv4_update_pmtu(skb, net, info, 0, protocol);
++ else
++ ipv4_redirect(skb, net, 0, protocol);
++ xfrm_state_put(x);
++
++ return 0;
++}
++
++static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
++ u8 type, u8 code, int offset, __be32 info)
++{
++ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
++ struct net *net = dev_net(skb->dev);
++ int protocol = iph->nexthdr;
++ struct ip_comp_hdr *ipch;
++ struct ip_esp_hdr *esph;
++ struct ip_auth_hdr *ah;
++ struct xfrm_state *x;
++ struct xfrm_if *xi;
++ __be32 spi;
++
++ switch (protocol) {
++ case IPPROTO_ESP:
++ esph = (struct ip_esp_hdr *)(skb->data + offset);
++ spi = esph->spi;
++ break;
++ case IPPROTO_AH:
++ ah = (struct ip_auth_hdr *)(skb->data + offset);
++ spi = ah->spi;
++ break;
++ case IPPROTO_COMP:
++ ipch = (struct ip_comp_hdr *)(skb->data + offset);
++ spi = htonl(ntohs(ipch->cpi));
++ break;
++ default:
++ return 0;
++ }
++
++ if (type != ICMPV6_PKT_TOOBIG &&
++ type != NDISC_REDIRECT)
++ return 0;
++
++ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
++ spi, protocol, AF_INET6);
++ if (!x)
++ return 0;
++
++ xi = xfrmi_lookup(net, x);
++ if (!xi) {
++ xfrm_state_put(x);
++ return -1;
++ }
++
++ if (type == NDISC_REDIRECT)
++ ip6_redirect(skb, net, skb->dev->ifindex, 0,
++ sock_net_uid(net, NULL));
++ else
++ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
++ xfrm_state_put(x);
++
++ return 0;
++}
++
++static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
++{
++ if (xi->p.link != p->link)
++ return -EINVAL;
++
++ xi->p.if_id = p->if_id;
++
++ return 0;
++}
++
++static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
++{
++ struct net *net = xi->net;
++ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
++ int err;
++
++ xfrmi_unlink(xfrmn, xi);
++ synchronize_net();
++ err = xfrmi_change(xi, p);
++ xfrmi_link(xfrmn, xi);
++ netdev_state_change(xi->dev);
++ return err;
++}
++
++static int xfrmi_get_iflink(const struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++
++ return xi->p.link;
++}
++
++static const struct net_device_ops xfrmi_netdev_ops = {
++ .ndo_init = xfrmi_dev_init,
++ .ndo_uninit = xfrmi_dev_uninit,
++ .ndo_start_xmit = xfrmi_xmit,
++ .ndo_get_stats64 = dev_get_tstats64,
++ .ndo_get_iflink = xfrmi_get_iflink,
++};
++
++static void xfrmi_dev_setup(struct net_device *dev)
++{
++ dev->netdev_ops = &xfrmi_netdev_ops;
++ dev->header_ops = &ip_tunnel_header_ops;
++ dev->type = ARPHRD_NONE;
++ dev->mtu = ETH_DATA_LEN;
++ dev->min_mtu = ETH_MIN_MTU;
++ dev->max_mtu = IP_MAX_MTU;
++ dev->flags = IFF_NOARP;
++ dev->needs_free_netdev = true;
++ dev->priv_destructor = xfrmi_dev_free;
++ netif_keep_dst(dev);
++
++ eth_broadcast_addr(dev->broadcast);
++}
++
++#define XFRMI_FEATURES (NETIF_F_SG | \
++ NETIF_F_FRAGLIST | \
++ NETIF_F_GSO_SOFTWARE | \
++ NETIF_F_HW_CSUM)
++
++static int xfrmi_dev_init(struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++ struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
++ int err;
++
++ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
++ if (!dev->tstats)
++ return -ENOMEM;
++
++ err = gro_cells_init(&xi->gro_cells, dev);
++ if (err) {
++ free_percpu(dev->tstats);
++ return err;
++ }
++
++ dev->features |= NETIF_F_LLTX;
++ dev->features |= XFRMI_FEATURES;
++ dev->hw_features |= XFRMI_FEATURES;
++
++ if (phydev) {
++ dev->needed_headroom = phydev->needed_headroom;
++ dev->needed_tailroom = phydev->needed_tailroom;
++
++ if (is_zero_ether_addr(dev->dev_addr))
++ eth_hw_addr_inherit(dev, phydev);
++ if (is_zero_ether_addr(dev->broadcast))
++ memcpy(dev->broadcast, phydev->broadcast,
++ dev->addr_len);
++ } else {
++ eth_hw_addr_random(dev);
++ eth_broadcast_addr(dev->broadcast);
++ }
++
++ return 0;
++}
++
++static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
++ struct netlink_ext_ack *extack)
++{
++ return 0;
++}
++
++static void xfrmi_netlink_parms(struct nlattr *data[],
++ struct xfrm_if_parms *parms)
++{
++ memset(parms, 0, sizeof(*parms));
++
++ if (!data)
++ return;
++
++ if (data[IFLA_XFRM_LINK])
++ parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
++
++ if (data[IFLA_XFRM_IF_ID])
++ parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
++}
++
++static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
++ struct nlattr *tb[], struct nlattr *data[],
++ struct netlink_ext_ack *extack)
++{
++ struct net *net = dev_net(dev);
++ struct xfrm_if_parms p = {};
++ struct xfrm_if *xi;
++ int err;
++
++ xfrmi_netlink_parms(data, &p);
++ if (!p.if_id) {
++ NL_SET_ERR_MSG(extack, "if_id must be non zero");
++ return -EINVAL;
++ }
++
++ xi = xfrmi_locate(net, &p);
++ if (xi)
++ return -EEXIST;
++
++ xi = netdev_priv(dev);
++ xi->p = p;
++ xi->net = net;
++ xi->dev = dev;
++
++ err = xfrmi_create(dev);
++ return err;
++}
++
++static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
++{
++ unregister_netdevice_queue(dev, head);
++}
++
++static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
++ struct nlattr *data[],
++ struct netlink_ext_ack *extack)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++ struct net *net = xi->net;
++ struct xfrm_if_parms p = {};
++
++ xfrmi_netlink_parms(data, &p);
++ if (!p.if_id) {
++ NL_SET_ERR_MSG(extack, "if_id must be non zero");
++ return -EINVAL;
++ }
++
++ xi = xfrmi_locate(net, &p);
++ if (!xi) {
++ xi = netdev_priv(dev);
++ } else {
++ if (xi->dev != dev)
++ return -EEXIST;
++ }
++
++ return xfrmi_update(xi, &p);
++}
++
++static size_t xfrmi_get_size(const struct net_device *dev)
++{
++ return
++ /* IFLA_XFRM_LINK */
++ nla_total_size(4) +
++ /* IFLA_XFRM_IF_ID */
++ nla_total_size(4) +
++ 0;
++}
++
++static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++ struct xfrm_if_parms *parm = &xi->p;
++
++ if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
++ nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
++ goto nla_put_failure;
++ return 0;
++
++nla_put_failure:
++ return -EMSGSIZE;
++}
++
++static struct net *xfrmi_get_link_net(const struct net_device *dev)
++{
++ struct xfrm_if *xi = netdev_priv(dev);
++
++ return xi->net;
++}
++
++static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
++ [IFLA_XFRM_LINK] = { .type = NLA_U32 },
++ [IFLA_XFRM_IF_ID] = { .type = NLA_U32 },
++};
++
++static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
++ .kind = "xfrm",
++ .maxtype = IFLA_XFRM_MAX,
++ .policy = xfrmi_policy,
++ .priv_size = sizeof(struct xfrm_if),
++ .setup = xfrmi_dev_setup,
++ .validate = xfrmi_validate,
++ .newlink = xfrmi_newlink,
++ .dellink = xfrmi_dellink,
++ .changelink = xfrmi_changelink,
++ .get_size = xfrmi_get_size,
++ .fill_info = xfrmi_fill_info,
++ .get_link_net = xfrmi_get_link_net,
++};
++
++static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
++{
++ struct net *net;
++ LIST_HEAD(list);
++
++ rtnl_lock();
++ list_for_each_entry(net, net_exit_list, exit_list) {
++ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
++ struct xfrm_if __rcu **xip;
++ struct xfrm_if *xi;
++ int i;
++
++ for (i = 0; i < XFRMI_HASH_SIZE; i++) {
++ for (xip = &xfrmn->xfrmi[i];
++ (xi = rtnl_dereference(*xip)) != NULL;
++ xip = &xi->next)
++ unregister_netdevice_queue(xi->dev, &list);
++ }
++ }
++ unregister_netdevice_many(&list);
++ rtnl_unlock();
++}
++
++static struct pernet_operations xfrmi_net_ops = {
++ .exit_batch = xfrmi_exit_batch_net,
++ .id = &xfrmi_net_id,
++ .size = sizeof(struct xfrmi_net),
++};
++
++static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
++ .handler = xfrmi6_rcv,
++ .input_handler = xfrmi6_input,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi6_err,
++ .priority = 10,
++};
++
++static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
++ .handler = xfrm6_rcv,
++ .input_handler = xfrm_input,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi6_err,
++ .priority = 10,
++};
++
++static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
++ .handler = xfrm6_rcv,
++ .input_handler = xfrm_input,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi6_err,
++ .priority = 10,
++};
++
++#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
++static int xfrmi6_rcv_tunnel(struct sk_buff *skb)
++{
++ const xfrm_address_t *saddr;
++ __be32 spi;
++
++ saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr;
++ spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr);
++
++ return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
++}
++
++static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = {
++ .handler = xfrmi6_rcv_tunnel,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi6_err,
++ .priority = 2,
++};
++
++static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
++ .handler = xfrmi6_rcv_tunnel,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi6_err,
++ .priority = 2,
++};
++#endif
++
++static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
++ .handler = xfrmi4_rcv,
++ .input_handler = xfrmi4_input,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi4_err,
++ .priority = 10,
++};
++
++static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
++ .handler = xfrm4_rcv,
++ .input_handler = xfrm_input,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi4_err,
++ .priority = 10,
++};
++
++static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
++ .handler = xfrm4_rcv,
++ .input_handler = xfrm_input,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi4_err,
++ .priority = 10,
++};
++
++#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
++static int xfrmi4_rcv_tunnel(struct sk_buff *skb)
++{
++ return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
++}
++
++static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = {
++ .handler = xfrmi4_rcv_tunnel,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi4_err,
++ .priority = 3,
++};
++
++static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = {
++ .handler = xfrmi4_rcv_tunnel,
++ .cb_handler = xfrmi_rcv_cb,
++ .err_handler = xfrmi4_err,
++ .priority = 2,
++};
++#endif
++
++static int __init xfrmi4_init(void)
++{
++ int err;
++
++ err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
++ if (err < 0)
++ goto xfrm_proto_esp_failed;
++ err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
++ if (err < 0)
++ goto xfrm_proto_ah_failed;
++ err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
++ if (err < 0)
++ goto xfrm_proto_comp_failed;
++#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
++ err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET);
++ if (err < 0)
++ goto xfrm_tunnel_ipip_failed;
++ err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6);
++ if (err < 0)
++ goto xfrm_tunnel_ipip6_failed;
++#endif
++
++ return 0;
++
++#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
++xfrm_tunnel_ipip6_failed:
++ xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
++xfrm_tunnel_ipip_failed:
++ xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
++#endif
++xfrm_proto_comp_failed:
++ xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
++xfrm_proto_ah_failed:
++ xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
++xfrm_proto_esp_failed:
++ return err;
++}
++
++static void xfrmi4_fini(void)
++{
++#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
++ xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6);
++ xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
++#endif
++ xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
++ xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
++ xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
++}
++
++static int __init xfrmi6_init(void)
++{
++ int err;
++
++ err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
++ if (err < 0)
++ goto xfrm_proto_esp_failed;
++ err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
++ if (err < 0)
++ goto xfrm_proto_ah_failed;
++ err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
++ if (err < 0)
++ goto xfrm_proto_comp_failed;
++#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
++ err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6);
++ if (err < 0)
++ goto xfrm_tunnel_ipv6_failed;
++ err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET);
++ if (err < 0)
++ goto xfrm_tunnel_ip6ip_failed;
++#endif
++
++ return 0;
++
++#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
++xfrm_tunnel_ip6ip_failed:
++ xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
++xfrm_tunnel_ipv6_failed:
++ xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
++#endif
++xfrm_proto_comp_failed:
++ xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
++xfrm_proto_ah_failed:
++ xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
++xfrm_proto_esp_failed:
++ return err;
++}
++
++static void xfrmi6_fini(void)
++{
++#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
++ xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET);
++ xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
++#endif
++ xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
++ xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
++ xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
++}
++
++static const struct xfrm_if_cb xfrm_if_cb = {
++ .decode_session = xfrmi_decode_session,
++};
++
++static int __init xfrmi_init(void)
++{
++ const char *msg;
++ int err;
++
++ pr_info("IPsec XFRM device driver\n");
++
++ msg = "tunnel device";
++ err = register_pernet_device(&xfrmi_net_ops);
++ if (err < 0)
++ goto pernet_dev_failed;
++
++ msg = "xfrm4 protocols";
++ err = xfrmi4_init();
++ if (err < 0)
++ goto xfrmi4_failed;
++
++ msg = "xfrm6 protocols";
++ err = xfrmi6_init();
++ if (err < 0)
++ goto xfrmi6_failed;
++
++
++ msg = "netlink interface";
++ err = rtnl_link_register(&xfrmi_link_ops);
++ if (err < 0)
++ goto rtnl_link_failed;
++
++ xfrm_if_register_cb(&xfrm_if_cb);
++
++ return err;
++
++rtnl_link_failed:
++ xfrmi6_fini();
++xfrmi6_failed:
++ xfrmi4_fini();
++xfrmi4_failed:
++ unregister_pernet_device(&xfrmi_net_ops);
++pernet_dev_failed:
++ pr_err("xfrmi init: failed to register %s\n", msg);
++ return err;
++}
++
++static void __exit xfrmi_fini(void)
++{
++ xfrm_if_unregister_cb();
++ rtnl_link_unregister(&xfrmi_link_ops);
++ xfrmi4_fini();
++ xfrmi6_fini();
++ unregister_pernet_device(&xfrmi_net_ops);
++}
++
++module_init(xfrmi_init);
++module_exit(xfrmi_fini);
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_RTNL_LINK("xfrm");
++MODULE_ALIAS_NETDEV("xfrm0");
++MODULE_AUTHOR("Steffen Klassert");
++MODULE_DESCRIPTION("XFRM virtual interface");
+diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
+index cb40ff0ff28da..92ad336a83ab5 100644
+--- a/net/xfrm/xfrm_ipcomp.c
++++ b/net/xfrm/xfrm_ipcomp.c
+@@ -203,6 +203,7 @@ static void ipcomp_free_scratches(void)
+ vfree(*per_cpu_ptr(scratches, i));
+
+ free_percpu(scratches);
++ ipcomp_scratches = NULL;
+ }
+
+ static void * __percpu *ipcomp_alloc_scratches(void)
+diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
+index 229544bc70c21..4dc4a7bbe51cf 100644
+--- a/net/xfrm/xfrm_output.c
++++ b/net/xfrm/xfrm_output.c
+@@ -647,10 +647,12 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
+ * This requires hardware to know the inner packet type to calculate
+ * the inner header checksum. Save inner ip protocol here to avoid
+ * traversing the packet in the vendor's xmit code.
+- * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise,
+- * get the ip protocol from the IP header.
++ * For IPsec tunnel mode save the ip protocol from the IP header of the
++ * plain text packet. Otherwise If the encap type is IPIP, just save
++ * skb->inner_ipproto in any other case get the ip protocol from the IP
++ * header.
+ */
+-static void xfrm_get_inner_ipproto(struct sk_buff *skb)
++static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x)
+ {
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ const struct ethhdr *eth;
+@@ -658,6 +660,25 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb)
+ if (!xo)
+ return;
+
++ if (x->outer_mode.encap == XFRM_MODE_TUNNEL) {
++ switch (x->outer_mode.family) {
++ case AF_INET:
++ xo->inner_ipproto = ip_hdr(skb)->protocol;
++ break;
++ case AF_INET6:
++ xo->inner_ipproto = ipv6_hdr(skb)->nexthdr;
++ break;
++ default:
++ break;
++ }
++
++ return;
++ }
++
++ /* non-Tunnel Mode */
++ if (!skb->encapsulation)
++ return;
++
+ if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) {
+ xo->inner_ipproto = skb->inner_ipproto;
+ return;
+@@ -712,8 +733,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
+ sp->xvec[sp->len++] = x;
+ xfrm_state_hold(x);
+
+- if (skb->encapsulation)
+- xfrm_get_inner_ipproto(skb);
++ xfrm_get_inner_ipproto(skb, x);
+ skb->encapsulation = 1;
+
+ if (skb_is_gso(skb)) {
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 37d17a79617c9..b0a19cc928799 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -31,8 +31,10 @@
+ #include <linux/if_tunnel.h>
+ #include <net/dst.h>
+ #include <net/flow.h>
++#include <net/inet_ecn.h>
+ #include <net/xfrm.h>
+ #include <net/ip.h>
++#include <net/gre.h>
+ #if IS_ENABLED(CONFIG_IPV6_MIP6)
+ #include <net/mip6.h>
+ #endif
+@@ -2676,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
+ *num_xfrms = 0;
+ return 0;
+ }
+- if (IS_ERR(pols[0]))
++ if (IS_ERR(pols[0])) {
++ *num_pols = 0;
+ return PTR_ERR(pols[0]);
++ }
+
+ *num_xfrms = pols[0]->xfrm_nr;
+
+@@ -2692,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
+ if (pols[1]) {
+ if (IS_ERR(pols[1])) {
+ xfrm_pols_put(pols, *num_pols);
++ *num_pols = 0;
+ return PTR_ERR(pols[1]);
+ }
+ (*num_pols)++;
+@@ -3157,8 +3162,8 @@ ok:
+ return dst;
+
+ nopol:
+- if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
+- !xfrm_default_allow(net, dir)) {
++ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
++ net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
+ err = -EPERM;
+ goto error;
+ }
+@@ -3233,7 +3238,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
+
+ static inline int
+ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
+- unsigned short family)
++ unsigned short family, u32 if_id)
+ {
+ if (xfrm_state_kern(x))
+ return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
+@@ -3244,7 +3249,8 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
+ (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
+ !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
+ !(x->props.mode != XFRM_MODE_TRANSPORT &&
+- xfrm_state_addr_cmp(tmpl, x, family));
++ xfrm_state_addr_cmp(tmpl, x, family)) &&
++ (if_id == 0 || if_id == x->if_id);
+ }
+
+ /*
+@@ -3256,7 +3262,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
+ */
+ static inline int
+ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
+- unsigned short family)
++ unsigned short family, u32 if_id)
+ {
+ int idx = start;
+
+@@ -3266,9 +3272,16 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
+ } else
+ start = -1;
+ for (; idx < sp->len; idx++) {
+- if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
++ if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id))
+ return ++idx;
+ if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
++ if (idx < sp->verified_cnt) {
++ /* Secpath entry previously verified, consider optional and
++ * continue searching
++ */
++ continue;
++ }
++
+ if (start == -1)
+ start = -2-idx;
+ break;
+@@ -3296,7 +3309,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
+ fl4->flowi4_proto = iph->protocol;
+ fl4->daddr = reverse ? iph->saddr : iph->daddr;
+ fl4->saddr = reverse ? iph->daddr : iph->saddr;
+- fl4->flowi4_tos = iph->tos;
++ fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
+
+ if (!ip_is_fragment(iph)) {
+ switch (iph->protocol) {
+@@ -3424,6 +3437,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
+ }
+ fl6->flowi6_proto = nexthdr;
+ return;
++ case IPPROTO_GRE:
++ if (!onlyproto &&
++ (nh + offset + 12 < skb->data ||
++ pskb_may_pull(skb, nh + offset + 12 - skb->data))) {
++ struct gre_base_hdr *gre_hdr;
++ __be32 *gre_key;
++
++ nh = skb_network_header(skb);
++ gre_hdr = (struct gre_base_hdr *)(nh + offset);
++ gre_key = (__be32 *)(gre_hdr + 1);
++
++ if (gre_hdr->flags & GRE_KEY) {
++ if (gre_hdr->flags & GRE_CSUM)
++ gre_key++;
++ fl6->fl6_gre_key = *gre_key;
++ }
++ }
++ fl6->flowi6_proto = nexthdr;
++ return;
++
+ #if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPPROTO_MH:
+ offset += ipv6_optlen(exthdr);
+@@ -3550,7 +3583,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ }
+
+ if (!pol) {
+- if (!xfrm_default_allow(net, dir)) {
++ if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
+ return 0;
+ }
+@@ -3575,6 +3608,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ if (pols[1]) {
+ if (IS_ERR(pols[1])) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
++ xfrm_pol_put(pols[0]);
+ return 0;
+ }
+ pols[1]->curlft.use_time = ktime_get_real_seconds();
+@@ -3610,11 +3644,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ }
+ xfrm_nr = ti;
+
+- if (!xfrm_default_allow(net, dir) && !xfrm_nr) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+- goto reject;
+- }
+-
+ if (npols > 1) {
+ xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
+ tpp = stp;
+@@ -3625,9 +3654,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ * Order is _important_. Later we will implement
+ * some barriers, but at the moment barriers
+ * are implied between each two transformations.
++ * Upon success, marks secpath entries as having been
++ * verified to allow them to be skipped in future policy
++ * checks (e.g. nested tunnels).
+ */
+ for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
+- k = xfrm_policy_ok(tpp[i], sp, k, family);
++ k = xfrm_policy_ok(tpp[i], sp, k, family, if_id);
+ if (k < 0) {
+ if (k < -1)
+ /* "-2 - errored_index" returned */
+@@ -3643,6 +3675,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ }
+
+ xfrm_pols_put(pols, npols);
++ sp->verified_cnt = k;
++
+ return 1;
+ }
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
+@@ -4099,6 +4133,9 @@ static int __net_init xfrm_net_init(struct net *net)
+ spin_lock_init(&net->xfrm.xfrm_policy_lock);
+ seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
+ mutex_init(&net->xfrm.xfrm_cfg_mutex);
++ net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT;
++ net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT;
++ net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT;
+
+ rv = xfrm_statistics_init(net);
+ if (rv < 0)
+@@ -4237,7 +4274,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
+ }
+
+ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
+- u8 dir, u8 type, struct net *net)
++ u8 dir, u8 type, struct net *net, u32 if_id)
+ {
+ struct xfrm_policy *pol, *ret = NULL;
+ struct hlist_head *chain;
+@@ -4246,7 +4283,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
+ hlist_for_each_entry(pol, chain, bydst) {
+- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
++ if ((if_id == 0 || pol->if_id == if_id) &&
++ xfrm_migrate_selector_match(sel, &pol->selector) &&
+ pol->type == type) {
+ ret = pol;
+ priority = ret->priority;
+@@ -4258,7 +4296,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
+ if ((pol->priority >= priority) && ret)
+ break;
+
+- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
++ if ((if_id == 0 || pol->if_id == if_id) &&
++ xfrm_migrate_selector_match(sel, &pol->selector) &&
+ pol->type == type) {
+ ret = pol;
+ break;
+@@ -4374,7 +4413,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
+ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+ struct xfrm_migrate *m, int num_migrate,
+ struct xfrm_kmaddress *k, struct net *net,
+- struct xfrm_encap_tmpl *encap)
++ struct xfrm_encap_tmpl *encap, u32 if_id)
+ {
+ int i, err, nx_cur = 0, nx_new = 0;
+ struct xfrm_policy *pol = NULL;
+@@ -4393,14 +4432,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+ }
+
+ /* Stage 1 - find policy */
+- if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
++ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* Stage 2 - find and update state(s) */
+ for (i = 0, mp = m; i < num_migrate; i++, mp++) {
+- if ((x = xfrm_migrate_state_find(mp, net))) {
++ if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
+ x_cur[nx_cur] = x;
+ nx_cur++;
+ xc = xfrm_state_migrate(x, mp, encap);
+diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
+index 9277d81b344cb..49dd788859d8b 100644
+--- a/net/xfrm/xfrm_replay.c
++++ b/net/xfrm/xfrm_replay.c
+@@ -714,7 +714,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
+ oseq += skb_shinfo(skb)->gso_segs;
+ }
+
+- if (unlikely(oseq < replay_esn->oseq)) {
++ if (unlikely(xo->seq.low < replay_esn->oseq)) {
+ XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
+ xo->seq.hi = oseq_hi;
+ replay_esn->oseq_hi = oseq_hi;
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index a2f4001221d16..60f3ea5561ddf 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -1578,9 +1578,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
+ memcpy(&x->mark, &orig->mark, sizeof(x->mark));
+ memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
+
+- if (xfrm_init_state(x) < 0)
+- goto error;
+-
+ x->props.flags = orig->props.flags;
+ x->props.extra_flags = orig->props.extra_flags;
+
+@@ -1593,6 +1590,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
+ x->km.seq = orig->km.seq;
+ x->replay = orig->replay;
+ x->preplay = orig->preplay;
++ x->mapping_maxage = orig->mapping_maxage;
++ x->lastused = orig->lastused;
++ x->new_mapping = 0;
++ x->new_mapping_sport = 0;
+
+ return x;
+
+@@ -1602,7 +1603,8 @@ out:
+ return NULL;
+ }
+
+-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
++struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
++ u32 if_id)
+ {
+ unsigned int h;
+ struct xfrm_state *x = NULL;
+@@ -1618,6 +1620,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
+ continue;
+ if (m->reqid && x->props.reqid != m->reqid)
+ continue;
++ if (if_id != 0 && x->if_id != if_id)
++ continue;
+ if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
+ m->old_family) ||
+ !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
+@@ -1633,6 +1637,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
+ if (x->props.mode != m->mode ||
+ x->id.proto != m->proto)
+ continue;
++ if (if_id != 0 && x->if_id != if_id)
++ continue;
+ if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
+ m->old_family) ||
+ !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
+@@ -1659,6 +1665,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
+ if (!xc)
+ return NULL;
+
++ xc->props.family = m->new_family;
++
++ if (xfrm_init_state(xc) < 0)
++ goto error;
++
+ memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
+ memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
+
+@@ -2242,7 +2253,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
+ }
+ EXPORT_SYMBOL(km_query);
+
+-int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
++static int __km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
+ {
+ int err = -EINVAL;
+ struct xfrm_mgr *km;
+@@ -2257,6 +2268,24 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
+ rcu_read_unlock();
+ return err;
+ }
++
++int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
++{
++ int ret = 0;
++
++ if (x->mapping_maxage) {
++ if ((jiffies / HZ - x->new_mapping) > x->mapping_maxage ||
++ x->new_mapping_sport != sport) {
++ x->new_mapping_sport = sport;
++ x->new_mapping = jiffies / HZ;
++ ret = __km_new_mapping(x, ipaddr, sport);
++ }
++ } else {
++ ret = __km_new_mapping(x, ipaddr, sport);
++ }
++
++ return ret;
++}
+ EXPORT_SYMBOL(km_new_mapping);
+
+ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
+@@ -2550,7 +2579,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
+ }
+ EXPORT_SYMBOL(xfrm_state_delete_tunnel);
+
+-u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
++u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
+ {
+ const struct xfrm_type *type = READ_ONCE(x->type);
+ struct crypto_aead *aead;
+@@ -2581,17 +2610,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
+ return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+ net_adj) & ~(blksize - 1)) + net_adj - 2;
+ }
+-EXPORT_SYMBOL_GPL(__xfrm_state_mtu);
+-
+-u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
+-{
+- mtu = __xfrm_state_mtu(x, mtu);
+-
+- if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU)
+- return IPV6_MIN_MTU;
+-
+- return mtu;
+-}
++EXPORT_SYMBOL_GPL(xfrm_state_mtu);
+
+ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
+ {
+@@ -2601,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
+ int err;
+
+ if (family == AF_INET &&
+- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
++ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
+ x->props.flags |= XFRM_STATE_NOPMTUDISC;
+
+ err = -EPROTONOSUPPORT;
+@@ -2624,9 +2643,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
+ if (inner_mode == NULL)
+ goto error;
+
+- if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
+- goto error;
+-
+ x->inner_mode = *inner_mode;
+
+ if (x->props.family == AF_INET)
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+index 3a3cb09eec122..ff56b6a0162ea 100644
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -282,6 +282,10 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
+
+ err = 0;
+
++ if (attrs[XFRMA_MTIMER_THRESH])
++ if (!attrs[XFRMA_ENCAP])
++ err = -EINVAL;
++
+ out:
+ return err;
+ }
+@@ -521,8 +525,9 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
+ struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
+ struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
+ struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
++ struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH];
+
+- if (re) {
++ if (re && x->replay_esn && x->preplay_esn) {
+ struct xfrm_replay_state_esn *replay_esn;
+ replay_esn = nla_data(re);
+ memcpy(x->replay_esn, replay_esn,
+@@ -552,6 +557,9 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
+
+ if (rt)
+ x->replay_maxdiff = nla_get_u32(rt);
++
++ if (mt)
++ x->mapping_maxage = nla_get_u32(mt);
+ }
+
+ static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
+@@ -898,7 +906,9 @@ static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb)
+ return -EMSGSIZE;
+
+ ap = nla_data(nla);
+- memcpy(ap, aead, sizeof(*aead));
++ strscpy_pad(ap->alg_name, aead->alg_name, sizeof(ap->alg_name));
++ ap->alg_key_len = aead->alg_key_len;
++ ap->alg_icv_len = aead->alg_icv_len;
+
+ if (redact_secret && aead->alg_key_len)
+ memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8);
+@@ -918,7 +928,8 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb)
+ return -EMSGSIZE;
+
+ ap = nla_data(nla);
+- memcpy(ap, ealg, sizeof(*ealg));
++ strscpy_pad(ap->alg_name, ealg->alg_name, sizeof(ap->alg_name));
++ ap->alg_key_len = ealg->alg_key_len;
+
+ if (redact_secret && ealg->alg_key_len)
+ memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8);
+@@ -929,6 +940,40 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb)
+ return 0;
+ }
+
++static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb)
++{
++ struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg));
++ struct xfrm_algo *ap;
++
++ if (!nla)
++ return -EMSGSIZE;
++
++ ap = nla_data(nla);
++ strscpy_pad(ap->alg_name, calg->alg_name, sizeof(ap->alg_name));
++ ap->alg_key_len = 0;
++
++ return 0;
++}
++
++static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb)
++{
++ struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep));
++ struct xfrm_encap_tmpl *uep;
++
++ if (!nla)
++ return -EMSGSIZE;
++
++ uep = nla_data(nla);
++ memset(uep, 0, sizeof(*uep));
++
++ uep->encap_type = ep->encap_type;
++ uep->encap_sport = ep->encap_sport;
++ uep->encap_dport = ep->encap_dport;
++ uep->encap_oa = ep->encap_oa;
++
++ return 0;
++}
++
+ static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m)
+ {
+ int ret = 0;
+@@ -984,12 +1029,12 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
+ goto out;
+ }
+ if (x->calg) {
+- ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
++ ret = copy_to_user_calg(x->calg, skb);
+ if (ret)
+ goto out;
+ }
+ if (x->encap) {
+- ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
++ ret = copy_to_user_encap(x->encap, skb);
+ if (ret)
+ goto out;
+ }
+@@ -1024,8 +1069,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
+ if (ret)
+ goto out;
+ }
+- if (x->security)
++ if (x->security) {
+ ret = copy_sec_ctx(x->security, skb);
++ if (ret)
++ goto out;
++ }
++ if (x->mapping_maxage)
++ ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
+ out:
+ return ret;
+ }
+@@ -1109,6 +1159,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
+ sizeof(*filter), GFP_KERNEL);
+ if (filter == NULL)
+ return -ENOMEM;
++
++ /* see addr_match(), (prefix length >> 5) << 2
++ * will be used to compare xfrm_address_t
++ */
++ if (filter->splen > (sizeof(xfrm_address_t) << 3) ||
++ filter->dplen > (sizeof(xfrm_address_t) << 3)) {
++ kfree(filter);
++ return -EINVAL;
++ }
+ }
+
+ if (attrs[XFRMA_PROTO])
+@@ -1980,12 +2039,9 @@ static int xfrm_notify_userpolicy(struct net *net)
+ }
+
+ up = nlmsg_data(nlh);
+- up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+- up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+- up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
++ up->in = net->xfrm.policy_default[XFRM_POLICY_IN];
++ up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD];
++ up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
+
+ nlmsg_end(skb, nlh);
+
+@@ -1996,26 +2052,26 @@ static int xfrm_notify_userpolicy(struct net *net)
+ return err;
+ }
+
++static bool xfrm_userpolicy_is_valid(__u8 policy)
++{
++ return policy == XFRM_USERPOLICY_BLOCK ||
++ policy == XFRM_USERPOLICY_ACCEPT;
++}
++
+ static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+ {
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
+
+- if (up->in == XFRM_USERPOLICY_BLOCK)
+- net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN;
+- else if (up->in == XFRM_USERPOLICY_ACCEPT)
+- net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN;
++ if (xfrm_userpolicy_is_valid(up->in))
++ net->xfrm.policy_default[XFRM_POLICY_IN] = up->in;
+
+- if (up->fwd == XFRM_USERPOLICY_BLOCK)
+- net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD;
+- else if (up->fwd == XFRM_USERPOLICY_ACCEPT)
+- net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD;
++ if (xfrm_userpolicy_is_valid(up->fwd))
++ net->xfrm.policy_default[XFRM_POLICY_FWD] = up->fwd;
+
+- if (up->out == XFRM_USERPOLICY_BLOCK)
+- net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT;
+- else if (up->out == XFRM_USERPOLICY_ACCEPT)
+- net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT;
++ if (xfrm_userpolicy_is_valid(up->out))
++ net->xfrm.policy_default[XFRM_POLICY_OUT] = up->out;
+
+ rt_genid_bump_all(net);
+
+@@ -2045,13 +2101,9 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ }
+
+ r_up = nlmsg_data(r_nlh);
+-
+- r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+- r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+- r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
++ r_up->in = net->xfrm.policy_default[XFRM_POLICY_IN];
++ r_up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD];
++ r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
+ nlmsg_end(r_skb, r_nlh);
+
+ return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
+@@ -2579,6 +2631,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
+ int n = 0;
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_encap_tmpl *encap = NULL;
++ u32 if_id = 0;
+
+ if (attrs[XFRMA_MIGRATE] == NULL)
+ return -EINVAL;
+@@ -2603,7 +2656,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
+ return -ENOMEM;
+ }
+
+- err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap);
++ if (attrs[XFRMA_IF_ID])
++ if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
++
++ err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id);
+
+ kfree(encap);
+
+@@ -2778,7 +2834,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
+ [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
+ [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
+ [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
+- [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
++ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) },
+ [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
+ [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
+ [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
+@@ -2798,6 +2854,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
+ [XFRMA_SET_MARK] = { .type = NLA_U32 },
+ [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
+ [XFRMA_IF_ID] = { .type = NLA_U32 },
++ [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ };
+ EXPORT_SYMBOL_GPL(xfrma_policy);
+
+@@ -3058,7 +3115,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
+ if (x->props.extra_flags)
+ l += nla_total_size(sizeof(x->props.extra_flags));
+ if (x->xso.dev)
+- l += nla_total_size(sizeof(x->xso));
++ l += nla_total_size(sizeof(struct xfrm_user_offload));
+ if (x->props.smark.v | x->props.smark.m) {
+ l += nla_total_size(sizeof(x->props.smark.v));
+ l += nla_total_size(sizeof(x->props.smark.m));
+@@ -3069,6 +3126,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
+ /* Must count x->lastused as it may become non-zero behind our back. */
+ l += nla_total_size_64bit(sizeof(u64));
+
++ if (x->mapping_maxage)
++ l += nla_total_size(sizeof(x->mapping_maxage));
++
+ return l;
+ }
+
+diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
+index 5fd48a8d4f10a..e2c9ea65df9fb 100644
+--- a/samples/bpf/Makefile
++++ b/samples/bpf/Makefile
+@@ -59,7 +59,11 @@ tprogs-y += xdp_redirect
+ tprogs-y += xdp_monitor
+
+ # Libbpf dependencies
+-LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
++LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf
++LIBBPF_OUTPUT = $(abspath $(BPF_SAMPLES_PATH))/libbpf
++LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
++LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
++LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
+
+ CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
+ TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
+@@ -198,7 +202,7 @@ TPROGS_CFLAGS += -Wstrict-prototypes
+
+ TPROGS_CFLAGS += -I$(objtree)/usr/include
+ TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
+-TPROGS_CFLAGS += -I$(srctree)/tools/lib/
++TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
+ TPROGS_CFLAGS += -I$(srctree)/tools/include
+ TPROGS_CFLAGS += -I$(srctree)/tools/perf
+ TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
+@@ -209,6 +213,11 @@ TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
+ endif
+
+ TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
++TPROGLDLIBS_xdp_monitor += -lm
++TPROGLDLIBS_xdp_redirect += -lm
++TPROGLDLIBS_xdp_redirect_cpu += -lm
++TPROGLDLIBS_xdp_redirect_map += -lm
++TPROGLDLIBS_xdp_redirect_map_multi += -lm
+ TPROGLDLIBS_tracex4 += -lrt
+ TPROGLDLIBS_trace_output += -lrt
+ TPROGLDLIBS_map_perf_test += -lrt
+@@ -268,16 +277,27 @@ all:
+ clean:
+ $(MAKE) -C ../../ M=$(CURDIR) clean
+ @find $(CURDIR) -type f -name '*~' -delete
++ @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool
+
+-$(LIBBPF): FORCE
++$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
+ # Fix up variables inherited from Kbuild that tools/ build system won't like
+- $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
+- LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
++ $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
++ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \
++ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
++ $@ install_headers
+
+ BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
+-BPFTOOL := $(BPFTOOLDIR)/bpftool
+-$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)
+- $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../
++BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool
++BPFTOOL := $(BPFTOOL_OUTPUT)/bpftool
++$(BPFTOOL): $(LIBBPF) $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT)
++ $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \
++ OUTPUT=$(BPFTOOL_OUTPUT)/ \
++ LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \
++ LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/
++
++$(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT):
++ $(call msg,MKDIR,$@)
++ $(Q)mkdir -p $@
+
+ $(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
+ $(call filechk,offsets,__SYSCALL_NRS_H__)
+@@ -309,6 +329,11 @@ verify_target_bpf: verify_cmds
+ $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
+ $(src)/*.c: verify_target_bpf $(LIBBPF)
+
++libbpf_hdrs: $(LIBBPF)
++$(obj)/$(TRACE_HELPERS) $(obj)/$(CGROUP_HELPERS) $(obj)/$(XDP_SAMPLE): | libbpf_hdrs
++
++.PHONY: libbpf_hdrs
++
+ $(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h
+ $(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h
+ $(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h
+@@ -320,6 +345,17 @@ $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+ $(obj)/hbm.o: $(src)/hbm.h
+ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
+
++# Override includes for xdp_sample_user.o because $(srctree)/usr/include in
++# TPROGS_CFLAGS causes conflicts
++XDP_SAMPLE_CFLAGS += -Wall -O2 \
++ -I$(src)/../../tools/include \
++ -I$(src)/../../tools/include/uapi \
++ -I$(LIBBPF_INCLUDE) \
++ -I$(src)/../../tools/testing/selftests/bpf
++
++$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS)
++$(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
++
+ -include $(BPF_SAMPLES_PATH)/Makefile.target
+
+ VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \
+@@ -329,16 +365,15 @@ VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+ $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ ifeq ($(VMLINUX_H),)
++ifeq ($(VMLINUX_BTF),)
++ $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\
++ build the kernel or set VMLINUX_BTF or VMLINUX_H variable)
++endif
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+ else
+ $(Q)cp "$(VMLINUX_H)" $@
+ endif
+
+-ifeq ($(VMLINUX_BTF),)
+- $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\
+- build the kernel or set VMLINUX_BTF variable)
+-endif
+-
+ clean-files += vmlinux.h
+
+ # Get Clang's default includes on this system, as opposed to those seen by
+@@ -366,7 +401,7 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x
+ $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+ -Wno-compare-distinct-pointer-types -I$(srctree)/include \
+ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \
+- -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \
++ -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \
+ -c $(filter %.bpf.c,$^) -o $@
+
+ LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \
+@@ -403,7 +438,7 @@ $(obj)/%.o: $(src)/%.c
+ @echo " CLANG-bpf " $@
+ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \
+ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \
+- -I$(srctree)/tools/lib/ \
++ -I$(LIBBPF_INCLUDE) \
+ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \
+ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \
+ -Wno-gnu-variable-sized-type-not-at-end \
+diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target
+index 5a368affa0386..7621f55e2947d 100644
+--- a/samples/bpf/Makefile.target
++++ b/samples/bpf/Makefile.target
+@@ -73,14 +73,3 @@ quiet_cmd_tprog-cobjs = CC $@
+ cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $<
+ $(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
+ $(call if_changed_dep,tprog-cobjs)
+-
+-# Override includes for xdp_sample_user.o because $(srctree)/usr/include in
+-# TPROGS_CFLAGS causes conflicts
+-XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \
+- -I./tools/include \
+- -I./tools/include/uapi \
+- -I./tools/lib \
+- -I./tools/testing/selftests/bpf
+-$(obj)/xdp_sample_user.o: $(src)/xdp_sample_user.c \
+- $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+- $(CC) $(XDP_SAMPLE_CFLAGS) -c -o $@ $<
+diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
+index b0c18efe7928e..a271099603feb 100644
+--- a/samples/bpf/hbm.c
++++ b/samples/bpf/hbm.c
+@@ -308,6 +308,7 @@ static int run_bpf_prog(char *prog, int cg_id)
+ fout = fopen(fname, "w");
+ fprintf(fout, "id:%d\n", cg_id);
+ fprintf(fout, "ERROR: Could not lookup queue_stats\n");
++ fclose(fout);
+ } else if (stats_flag && qstats.lastPacketTime >
+ qstats.firstPacketTime) {
+ long long delta_us = (qstats.lastPacketTime -
+diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
+index 722b3fadb4675..1752a46a2b056 100644
+--- a/samples/bpf/hbm_kern.h
++++ b/samples/bpf/hbm_kern.h
+@@ -9,8 +9,6 @@
+ * Include file for sample Host Bandwidth Manager (HBM) BPF programs
+ */
+ #define KBUILD_MODNAME "foo"
+-#include <stddef.h>
+-#include <stdbool.h>
+ #include <uapi/linux/bpf.h>
+ #include <uapi/linux/if_ether.h>
+ #include <uapi/linux/if_packet.h>
+diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist_kern.c
+index 9ed63e10e1709..1fa14c54963a1 100644
+--- a/samples/bpf/lwt_len_hist_kern.c
++++ b/samples/bpf/lwt_len_hist_kern.c
+@@ -16,13 +16,6 @@
+ #include <uapi/linux/in.h>
+ #include <bpf/bpf_helpers.h>
+
+-# define printk(fmt, ...) \
+- ({ \
+- char ____fmt[] = fmt; \
+- bpf_trace_printk(____fmt, sizeof(____fmt), \
+- ##__VA_ARGS__); \
+- })
+-
+ struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
+index 8dfe09a92feca..822b0742b8154 100644
+--- a/samples/bpf/tcp_basertt_kern.c
++++ b/samples/bpf/tcp_basertt_kern.c
+@@ -47,7 +47,7 @@ int bpf_basertt(struct bpf_sock_ops *skops)
+ case BPF_SOCK_OPS_BASE_RTT:
+ n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION,
+ cong, sizeof(cong));
+- if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) {
++ if (!n && !__builtin_memcmp(cong, nv, sizeof(nv))) {
+ /* Set base_rtt to 80us */
+ rv = 80;
+ } else if (n) {
+diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
+index acad5712d8b4f..fd602c2774b8b 100644
+--- a/samples/bpf/tracex6_kern.c
++++ b/samples/bpf/tracex6_kern.c
+@@ -2,6 +2,8 @@
+ #include <linux/version.h>
+ #include <uapi/linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
++#include <bpf/bpf_tracing.h>
++#include <bpf/bpf_core_read.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+@@ -45,13 +47,24 @@ int bpf_prog1(struct pt_regs *ctx)
+ return 0;
+ }
+
+-SEC("kprobe/htab_map_lookup_elem")
+-int bpf_prog2(struct pt_regs *ctx)
++/*
++ * Since *_map_lookup_elem can't be expected to trigger bpf programs
++ * due to potential deadlocks (bpf_disable_instrumentation), this bpf
++ * program will be attached to bpf_map_copy_value (which is called
++ * from map_lookup_elem) and will only filter the hashtable type.
++ */
++SEC("kprobe/bpf_map_copy_value")
++int BPF_KPROBE(bpf_prog2, struct bpf_map *map)
+ {
+ u32 key = bpf_get_smp_processor_id();
+ struct bpf_perf_event_value *val, buf;
++ enum bpf_map_type type;
+ int error;
+
++ type = BPF_CORE_READ(map, map_type);
++ if (type != BPF_MAP_TYPE_HASH)
++ return 0;
++
+ error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf));
+ if (error)
+ return 0;
+diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
+index 6e25fba64c72b..a81704d3317ba 100644
+--- a/samples/bpf/xdp_redirect_cpu_user.c
++++ b/samples/bpf/xdp_redirect_cpu_user.c
+@@ -309,7 +309,6 @@ int main(int argc, char **argv)
+ const char *mprog_filename = NULL, *mprog_name = NULL;
+ struct xdp_redirect_cpu *skel;
+ struct bpf_map_info info = {};
+- char ifname_buf[IF_NAMESIZE];
+ struct bpf_cpumap_val value;
+ __u32 infosz = sizeof(info);
+ int ret = EXIT_FAIL_OPTION;
+@@ -325,7 +324,6 @@ int main(int argc, char **argv)
+ int add_cpu = -1;
+ int ifindex = -1;
+ int *cpu, i, opt;
+- char *ifname;
+ __u32 qsize;
+ int n_cpus;
+
+@@ -391,11 +389,10 @@ int main(int argc, char **argv)
+ case 'd':
+ if (strlen(optarg) >= IF_NAMESIZE) {
+ fprintf(stderr, "-d/--dev name too long\n");
++ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
+ }
+- ifname = (char *)&ifname_buf;
+- safe_strncpy(ifname, optarg, sizeof(ifname));
+- ifindex = if_nametoindex(ifname);
++ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = strtoul(optarg, NULL, 0);
+ if (!ifindex) {
+diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
+index b32d821781990..8740838e77679 100644
+--- a/samples/bpf/xdp_sample_user.c
++++ b/samples/bpf/xdp_sample_user.c
+@@ -120,7 +120,10 @@ struct sample_output {
+ __u64 xmit;
+ } totals;
+ struct {
+- __u64 pps;
++ union {
++ __u64 pps;
++ __u64 num;
++ };
+ __u64 drop;
+ __u64 err;
+ } rx_cnt;
+@@ -1322,7 +1325,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+
+ static void sample_summary_print(void)
+ {
+- double period = sample_out.rx_cnt.pps;
++ double num = sample_out.rx_cnt.num;
+
+ if (sample_out.totals.rx) {
+ double pkts = sample_out.totals.rx;
+@@ -1330,7 +1333,7 @@ static void sample_summary_print(void)
+ print_always(" Packets received : %'-10llu\n",
+ sample_out.totals.rx);
+ print_always(" Average packets/s : %'-10.0f\n",
+- sample_round(pkts / period));
++ sample_round(pkts / num));
+ }
+ if (sample_out.totals.redir) {
+ double pkts = sample_out.totals.redir;
+@@ -1338,7 +1341,7 @@ static void sample_summary_print(void)
+ print_always(" Packets redirected : %'-10llu\n",
+ sample_out.totals.redir);
+ print_always(" Average redir/s : %'-10.0f\n",
+- sample_round(pkts / period));
++ sample_round(pkts / num));
+ }
+ if (sample_out.totals.drop)
+ print_always(" Rx dropped : %'-10llu\n",
+@@ -1355,7 +1358,7 @@ static void sample_summary_print(void)
+ print_always(" Packets transmitted : %'-10llu\n",
+ sample_out.totals.xmit);
+ print_always(" Average transmit/s : %'-10.0f\n",
+- sample_round(pkts / period));
++ sample_round(pkts / num));
+ }
+ }
+
+@@ -1422,7 +1425,7 @@ static int sample_stats_collect(struct stats_record *rec)
+ return 0;
+ }
+
+-static void sample_summary_update(struct sample_output *out, int interval)
++static void sample_summary_update(struct sample_output *out)
+ {
+ sample_out.totals.rx += out->totals.rx;
+ sample_out.totals.redir += out->totals.redir;
+@@ -1430,12 +1433,11 @@ static void sample_summary_update(struct sample_output *out, int interval)
+ sample_out.totals.drop_xmit += out->totals.drop_xmit;
+ sample_out.totals.err += out->totals.err;
+ sample_out.totals.xmit += out->totals.xmit;
+- sample_out.rx_cnt.pps += interval;
++ sample_out.rx_cnt.num++;
+ }
+
+ static void sample_stats_print(int mask, struct stats_record *cur,
+- struct stats_record *prev, char *prog_name,
+- int interval)
++ struct stats_record *prev, char *prog_name)
+ {
+ struct sample_output out = {};
+
+@@ -1452,7 +1454,7 @@ static void sample_stats_print(int mask, struct stats_record *cur,
+ else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(cur, prev, 0, &out,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+- sample_summary_update(&out, interval);
++ sample_summary_update(&out);
+
+ stats_print(prog_name, mask, cur, prev, &out);
+ }
+@@ -1495,7 +1497,7 @@ static void swap(struct stats_record **a, struct stats_record **b)
+ }
+
+ static int sample_timer_cb(int timerfd, struct stats_record **rec,
+- struct stats_record **prev, int interval)
++ struct stats_record **prev)
+ {
+ char line[64] = "Summary";
+ int ret;
+@@ -1524,7 +1526,7 @@ static int sample_timer_cb(int timerfd, struct stats_record **rec,
+ snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
+ }
+
+- sample_stats_print(sample_mask, *rec, *prev, line, interval);
++ sample_stats_print(sample_mask, *rec, *prev, line);
+ return 0;
+ }
+
+@@ -1579,7 +1581,7 @@ int sample_run(int interval, void (*post_cb)(void *), void *ctx)
+ if (pfd[0].revents & POLLIN)
+ ret = sample_signal_cb();
+ else if (pfd[1].revents & POLLIN)
+- ret = sample_timer_cb(timerfd, &rec, &prev, interval);
++ ret = sample_timer_cb(timerfd, &rec, &prev);
+
+ if (ret)
+ break;
+diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
+index d97465ff8c62c..5f44b877ecf5f 100644
+--- a/samples/bpf/xdp_sample_user.h
++++ b/samples/bpf/xdp_sample_user.h
+@@ -45,7 +45,9 @@ const char *get_driver_name(int ifindex);
+ int get_mac_addr(int ifindex, void *mac_addr);
+
+ #pragma GCC diagnostic push
++#ifndef __clang__
+ #pragma GCC diagnostic ignored "-Wstringop-truncation"
++#endif
+ __attribute__((unused))
+ static inline char *safe_strncpy(char *dst, const char *src, size_t size)
+ {
+diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
+index 49d7a6ad7e397..1fb79b3ecdd51 100644
+--- a/samples/bpf/xdpsock_user.c
++++ b/samples/bpf/xdpsock_user.c
+@@ -1673,14 +1673,15 @@ int main(int argc, char **argv)
+
+ setlocale(LC_ALL, "");
+
++ prev_time = get_nsecs();
++ start_time = prev_time;
++
+ if (!opt_quiet) {
+ ret = pthread_create(&pt, NULL, poller, NULL);
+ if (ret)
+ exit_with_error(ret);
+ }
+
+- prev_time = get_nsecs();
+- start_time = prev_time;
+
+ if (opt_bench == BENCH_RXDROP)
+ rx_drop_all();
+diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
+index 5b9a09957c6e0..d620f3da086f7 100644
+--- a/samples/ftrace/ftrace-direct-modify.c
++++ b/samples/ftrace/ftrace-direct-modify.c
+@@ -3,6 +3,9 @@
+ #include <linux/kthread.h>
+ #include <linux/ftrace.h>
+
++extern void my_direct_func1(void);
++extern void my_direct_func2(void);
++
+ void my_direct_func1(void)
+ {
+ trace_printk("my direct func1\n");
+@@ -28,7 +31,7 @@ asm (
+ " call my_direct_func1\n"
+ " leave\n"
+ " .size my_tramp1, .-my_tramp1\n"
+-" ret\n"
++ ASM_RET
+ " .type my_tramp2, @function\n"
+ " .globl my_tramp2\n"
+ " my_tramp2:"
+@@ -36,7 +39,7 @@ asm (
+ " movq %rsp, %rbp\n"
+ " call my_direct_func2\n"
+ " leave\n"
+-" ret\n"
++ ASM_RET
+ " .size my_tramp2, .-my_tramp2\n"
+ " .popsection\n"
+ );
+diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
+index 3f0079c9bd6fa..4bdd67916ce47 100644
+--- a/samples/ftrace/ftrace-direct-too.c
++++ b/samples/ftrace/ftrace-direct-too.c
+@@ -4,11 +4,14 @@
+ #include <linux/mm.h> /* for handle_mm_fault() */
+ #include <linux/ftrace.h>
+
+-void my_direct_func(struct vm_area_struct *vma,
+- unsigned long address, unsigned int flags)
++extern void my_direct_func(struct vm_area_struct *vma, unsigned long address,
++ unsigned int flags, struct pt_regs *regs);
++
++void my_direct_func(struct vm_area_struct *vma, unsigned long address,
++ unsigned int flags, struct pt_regs *regs)
+ {
+- trace_printk("handle mm fault vma=%p address=%lx flags=%x\n",
+- vma, address, flags);
++ trace_printk("handle mm fault vma=%p address=%lx flags=%x regs=%p\n",
++ vma, address, flags, regs);
+ }
+
+ extern void my_tramp(void *);
+@@ -23,12 +26,14 @@ asm (
+ " pushq %rdi\n"
+ " pushq %rsi\n"
+ " pushq %rdx\n"
++" pushq %rcx\n"
+ " call my_direct_func\n"
++" popq %rcx\n"
+ " popq %rdx\n"
+ " popq %rsi\n"
+ " popq %rdi\n"
+ " leave\n"
+-" ret\n"
++ ASM_RET
+ " .size my_tramp, .-my_tramp\n"
+ " .popsection\n"
+ );
+diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
+index a2729d1ef17f5..1e901bb8d7293 100644
+--- a/samples/ftrace/ftrace-direct.c
++++ b/samples/ftrace/ftrace-direct.c
+@@ -4,6 +4,8 @@
+ #include <linux/sched.h> /* for wake_up_process() */
+ #include <linux/ftrace.h>
+
++extern void my_direct_func(struct task_struct *p);
++
+ void my_direct_func(struct task_struct *p)
+ {
+ trace_printk("waking up %s-%d\n", p->comm, p->pid);
+@@ -22,7 +24,7 @@ asm (
+ " call my_direct_func\n"
+ " popq %rdi\n"
+ " leave\n"
+-" ret\n"
++ ASM_RET
+ " .size my_tramp, .-my_tramp\n"
+ " .popsection\n"
+ );
+diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
+index 5dc1bf3baa98b..228321ecb1616 100644
+--- a/samples/kprobes/kretprobe_example.c
++++ b/samples/kprobes/kretprobe_example.c
+@@ -86,7 +86,7 @@ static int __init kretprobe_init(void)
+ ret = register_kretprobe(&my_kretprobe);
+ if (ret < 0) {
+ pr_err("register_kretprobe failed, returned %d\n", ret);
+- return -1;
++ return ret;
+ }
+ pr_info("Planted return probe at %s: %p\n",
+ my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
+diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
+index 7a15910d21718..c089e9cdaf328 100644
+--- a/samples/landlock/sandboxer.c
++++ b/samples/landlock/sandboxer.c
+@@ -22,9 +22,9 @@
+ #include <unistd.h>
+
+ #ifndef landlock_create_ruleset
+-static inline int landlock_create_ruleset(
+- const struct landlock_ruleset_attr *const attr,
+- const size_t size, const __u32 flags)
++static inline int
++landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
++ const size_t size, const __u32 flags)
+ {
+ return syscall(__NR_landlock_create_ruleset, attr, size, flags);
+ }
+@@ -32,17 +32,18 @@ static inline int landlock_create_ruleset(
+
+ #ifndef landlock_add_rule
+ static inline int landlock_add_rule(const int ruleset_fd,
+- const enum landlock_rule_type rule_type,
+- const void *const rule_attr, const __u32 flags)
++ const enum landlock_rule_type rule_type,
++ const void *const rule_attr,
++ const __u32 flags)
+ {
+- return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
+- rule_attr, flags);
++ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
++ flags);
+ }
+ #endif
+
+ #ifndef landlock_restrict_self
+ static inline int landlock_restrict_self(const int ruleset_fd,
+- const __u32 flags)
++ const __u32 flags)
+ {
+ return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
+ }
+@@ -70,14 +71,17 @@ static int parse_path(char *env_path, const char ***const path_list)
+ return num_paths;
+ }
+
++/* clang-format off */
++
+ #define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_READ_FILE)
+
+-static int populate_ruleset(
+- const char *const env_var, const int ruleset_fd,
+- const __u64 allowed_access)
++/* clang-format on */
++
++static int populate_ruleset(const char *const env_var, const int ruleset_fd,
++ const __u64 allowed_access)
+ {
+ int num_paths, i, ret = 1;
+ char *env_path_name;
+@@ -107,12 +111,10 @@ static int populate_ruleset(
+ for (i = 0; i < num_paths; i++) {
+ struct stat statbuf;
+
+- path_beneath.parent_fd = open(path_list[i], O_PATH |
+- O_CLOEXEC);
++ path_beneath.parent_fd = open(path_list[i], O_PATH | O_CLOEXEC);
+ if (path_beneath.parent_fd < 0) {
+ fprintf(stderr, "Failed to open \"%s\": %s\n",
+- path_list[i],
+- strerror(errno));
++ path_list[i], strerror(errno));
+ goto out_free_name;
+ }
+ if (fstat(path_beneath.parent_fd, &statbuf)) {
+@@ -123,9 +125,10 @@ static int populate_ruleset(
+ if (!S_ISDIR(statbuf.st_mode))
+ path_beneath.allowed_access &= ACCESS_FILE;
+ if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0)) {
+- fprintf(stderr, "Failed to update the ruleset with \"%s\": %s\n",
+- path_list[i], strerror(errno));
++ &path_beneath, 0)) {
++ fprintf(stderr,
++ "Failed to update the ruleset with \"%s\": %s\n",
++ path_list[i], strerror(errno));
+ close(path_beneath.parent_fd);
+ goto out_free_name;
+ }
+@@ -134,10 +137,13 @@ static int populate_ruleset(
+ ret = 0;
+
+ out_free_name:
++ free(path_list);
+ free(env_path_name);
+ return ret;
+ }
+
++/* clang-format off */
++
+ #define ACCESS_FS_ROUGHLY_READ ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+@@ -155,6 +161,8 @@ out_free_name:
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_SYM)
+
++/* clang-format on */
++
+ int main(const int argc, char *const argv[], char *const *const envp)
+ {
+ const char *cmd_path;
+@@ -162,55 +170,64 @@ int main(const int argc, char *const argv[], char *const *const envp)
+ int ruleset_fd;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = ACCESS_FS_ROUGHLY_READ |
+- ACCESS_FS_ROUGHLY_WRITE,
++ ACCESS_FS_ROUGHLY_WRITE,
+ };
+
+ if (argc < 2) {
+- fprintf(stderr, "usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n",
+- ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
+- fprintf(stderr, "Launch a command in a restricted environment.\n\n");
++ fprintf(stderr,
++ "usage: %s=\"...\" %s=\"...\" %s <cmd> [args]...\n\n",
++ ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
++ fprintf(stderr,
++ "Launch a command in a restricted environment.\n\n");
+ fprintf(stderr, "Environment variables containing paths, "
+ "each separated by a colon:\n");
+- fprintf(stderr, "* %s: list of paths allowed to be used in a read-only way.\n",
+- ENV_FS_RO_NAME);
+- fprintf(stderr, "* %s: list of paths allowed to be used in a read-write way.\n",
+- ENV_FS_RW_NAME);
+- fprintf(stderr, "\nexample:\n"
+- "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" "
+- "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
+- "%s bash -i\n",
+- ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
++ fprintf(stderr,
++ "* %s: list of paths allowed to be used in a read-only way.\n",
++ ENV_FS_RO_NAME);
++ fprintf(stderr,
++ "* %s: list of paths allowed to be used in a read-write way.\n",
++ ENV_FS_RW_NAME);
++ fprintf(stderr,
++ "\nexample:\n"
++ "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" "
++ "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" "
++ "%s bash -i\n",
++ ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]);
+ return 1;
+ }
+
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ if (ruleset_fd < 0) {
+ const int err = errno;
+
+ perror("Failed to create a ruleset");
+ switch (err) {
+ case ENOSYS:
+- fprintf(stderr, "Hint: Landlock is not supported by the current kernel. "
+- "To support it, build the kernel with "
+- "CONFIG_SECURITY_LANDLOCK=y and prepend "
+- "\"landlock,\" to the content of CONFIG_LSM.\n");
++ fprintf(stderr,
++ "Hint: Landlock is not supported by the current kernel. "
++ "To support it, build the kernel with "
++ "CONFIG_SECURITY_LANDLOCK=y and prepend "
++ "\"landlock,\" to the content of CONFIG_LSM.\n");
+ break;
+ case EOPNOTSUPP:
+- fprintf(stderr, "Hint: Landlock is currently disabled. "
+- "It can be enabled in the kernel configuration by "
+- "prepending \"landlock,\" to the content of CONFIG_LSM, "
+- "or at boot time by setting the same content to the "
+- "\"lsm\" kernel parameter.\n");
++ fprintf(stderr,
++ "Hint: Landlock is currently disabled. "
++ "It can be enabled in the kernel configuration by "
++ "prepending \"landlock,\" to the content of CONFIG_LSM, "
++ "or at boot time by setting the same content to the "
++ "\"lsm\" kernel parameter.\n");
+ break;
+ }
+ return 1;
+ }
+ if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd,
+- ACCESS_FS_ROUGHLY_READ)) {
++ ACCESS_FS_ROUGHLY_READ)) {
+ goto err_close_ruleset;
+ }
+ if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd,
+- ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE)) {
++ ACCESS_FS_ROUGHLY_READ |
++ ACCESS_FS_ROUGHLY_WRITE)) {
+ goto err_close_ruleset;
+ }
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+@@ -227,7 +244,7 @@ int main(const int argc, char *const argv[], char *const *const envp)
+ cmd_argv = argv + 1;
+ execvpe(cmd_path, cmd_argv, envp);
+ fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path,
+- strerror(errno));
++ strerror(errno));
+ fprintf(stderr, "Hint: access to the binary, the interpreter or "
+ "shared libraries may be denied.\n");
+ return 1;
+diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
+index 9ec93d90e8a5a..4eb7aa11cfbb2 100644
+--- a/samples/vfio-mdev/mdpy-fb.c
++++ b/samples/vfio-mdev/mdpy-fb.c
+@@ -109,7 +109,7 @@ static int mdpy_fb_probe(struct pci_dev *pdev,
+
+ ret = pci_request_regions(pdev, "mdpy-fb");
+ if (ret < 0)
+- return ret;
++ goto err_disable_dev;
+
+ pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format);
+ pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET, &width);
+@@ -191,6 +191,9 @@ err_release_fb:
+ err_release_regions:
+ pci_release_regions(pdev);
+
++err_disable_dev:
++ pci_disable_device(pdev);
++
+ return ret;
+ }
+
+@@ -199,7 +202,10 @@ static void mdpy_fb_remove(struct pci_dev *pdev)
+ struct fb_info *info = pci_get_drvdata(pdev);
+
+ unregister_framebuffer(info);
++ iounmap(info->screen_base);
+ framebuffer_release(info);
++ pci_release_regions(pdev);
++ pci_disable_device(pdev);
+ }
+
+ static struct pci_device_id mdpy_fb_pci_table[] = {
+diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
+index cdec22088423e..914ea5eb16a8c 100644
+--- a/scripts/Kbuild.include
++++ b/scripts/Kbuild.include
+@@ -96,8 +96,29 @@ echo-cmd = $(if $($(quiet)cmd_$(1)),\
+ quiet_redirect :=
+ silent_redirect := exec >/dev/null;
+
++# Delete the target on interruption
++#
++# GNU Make automatically deletes the target if it has already been changed by
++# the interrupted recipe. So, you can safely stop the build by Ctrl-C (Make
++# will delete incomplete targets), and resume it later.
++#
++# However, this does not work when the stderr is piped to another program, like
++# $ make >&2 | tee log
++# Make dies with SIGPIPE before cleaning the targets.
++#
++# To address it, we clean the target in signal traps.
++#
++# Make deletes the target when it catches SIGHUP, SIGINT, SIGQUIT, SIGTERM.
++# So, we cover them, and also SIGPIPE just in case.
++#
++# Of course, this is unneeded for phony targets.
++delete-on-interrupt = \
++ $(if $(filter-out $(PHONY), $@), \
++ $(foreach sig, HUP INT QUIT TERM PIPE, \
++ trap 'rm -f $@; trap - $(sig); kill -s $(sig) $$$$' $(sig);))
++
+ # printing commands
+-cmd = @set -e; $(echo-cmd) $($(quiet)redirect) $(cmd_$(1))
++cmd = @set -e; $(echo-cmd) $($(quiet)redirect) $(delete-on-interrupt) $(cmd_$(1))
+
+ ###
+ # if_changed - execute command if any prerequisite is newer than
+diff --git a/scripts/Makefile b/scripts/Makefile
+index 9adb6d247818f..e2a239829556c 100644
+--- a/scripts/Makefile
++++ b/scripts/Makefile
+@@ -3,8 +3,8 @@
+ # scripts contains sources for various helper programs used throughout
+ # the kernel for the build process.
+
+-CRYPTO_LIBS = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto)
+-CRYPTO_CFLAGS = $(shell pkg-config --cflags libcrypto 2> /dev/null)
++CRYPTO_LIBS = $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
++CRYPTO_CFLAGS = $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null)
+
+ hostprogs-always-$(CONFIG_BUILD_BIN2C) += bin2c
+ hostprogs-always-$(CONFIG_KALLSYMS) += kallsyms
+diff --git a/scripts/Makefile.build b/scripts/Makefile.build
+index 3efc984d4c690..17aa8ef2d52a7 100644
+--- a/scripts/Makefile.build
++++ b/scripts/Makefile.build
+@@ -224,6 +224,18 @@ cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),
+ endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
+
+ ifdef CONFIG_STACK_VALIDATION
++
++objtool_args = \
++ $(if $(CONFIG_UNWINDER_ORC),orc generate,check) \
++ $(if $(part-of-module), --module) \
++ $(if $(CONFIG_FRAME_POINTER),, --no-fp) \
++ $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\
++ $(if $(CONFIG_RETPOLINE), --retpoline) \
++ $(if $(CONFIG_RETHUNK), --rethunk) \
++ $(if $(CONFIG_X86_SMAP), --uaccess) \
++ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \
++ $(if $(CONFIG_SLS), --sls)
++
+ ifndef CONFIG_LTO_CLANG
+
+ __objtool_obj := $(objtree)/tools/objtool/objtool
+diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
+index d538255038747..f182700e0ac19 100644
+--- a/scripts/Makefile.extrawarn
++++ b/scripts/Makefile.extrawarn
+@@ -51,6 +51,8 @@ KBUILD_CFLAGS += -Wno-sign-compare
+ KBUILD_CFLAGS += -Wno-format-zero-length
+ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
+ KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
++KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
++KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict)
+ endif
+
+ endif
+diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
+index 4aad284800355..36814be80264a 100644
+--- a/scripts/Makefile.gcc-plugins
++++ b/scripts/Makefile.gcc-plugins
+@@ -6,7 +6,7 @@ gcc-plugin-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += latent_entropy_plugin.so
+ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) \
+ += -DLATENT_ENTROPY_PLUGIN
+ ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
+- DISABLE_LATENT_ENTROPY_PLUGIN += -fplugin-arg-latent_entropy_plugin-disable
++ DISABLE_LATENT_ENTROPY_PLUGIN += -fplugin-arg-latent_entropy_plugin-disable -ULATENT_ENTROPY_PLUGIN
+ endif
+ export DISABLE_LATENT_ENTROPY_PLUGIN
+
+diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
+index 54582673fc1a0..0a8a4689c3ebb 100644
+--- a/scripts/Makefile.lib
++++ b/scripts/Makefile.lib
+@@ -232,17 +232,6 @@ ifeq ($(CONFIG_LTO_CLANG),y)
+ mod-prelink-ext := .lto
+ endif
+
+-# Objtool arguments are also needed for modfinal with LTO, so we define
+-# then here to avoid duplication.
+-objtool_args = \
+- $(if $(CONFIG_UNWINDER_ORC),orc generate,check) \
+- $(if $(part-of-module), --module) \
+- $(if $(CONFIG_FRAME_POINTER),, --no-fp) \
+- $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\
+- $(if $(CONFIG_RETPOLINE), --retpoline) \
+- $(if $(CONFIG_X86_SMAP), --uaccess) \
+- $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)
+-
+ # Useful for describing the dependency of composite objects
+ # Usage:
+ # $(call multi_depend, multi_used_targets, suffix_to_remove, suffix_to_add)
+diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
+index ff805777431ce..47f047458264f 100644
+--- a/scripts/Makefile.modfinal
++++ b/scripts/Makefile.modfinal
+@@ -23,7 +23,7 @@ modname = $(notdir $(@:.mod.o=))
+ part-of-module = y
+
+ quiet_cmd_cc_o_c = CC [M] $@
+- cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI), $(c_flags)) -c -o $@ $<
++ cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $<
+
+ %.mod.o: %.mod.c FORCE
+ $(call if_changed_dep,cc_o_c)
+@@ -40,7 +40,7 @@ quiet_cmd_ld_ko_o = LD [M] $@
+ quiet_cmd_btf_ko = BTF [M] $@
+ cmd_btf_ko = \
+ if [ -f vmlinux ]; then \
+- LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \
++ LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \
+ else \
+ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
+ fi;
+diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
+index 48585c4d04ade..0273bf7375e26 100644
+--- a/scripts/Makefile.modpost
++++ b/scripts/Makefile.modpost
+@@ -87,8 +87,7 @@ obj := $(KBUILD_EXTMOD)
+ src := $(obj)
+
+ # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS
+-include $(if $(wildcard $(KBUILD_EXTMOD)/Kbuild), \
+- $(KBUILD_EXTMOD)/Kbuild, $(KBUILD_EXTMOD)/Makefile)
++include $(if $(wildcard $(src)/Kbuild), $(src)/Kbuild, $(src)/Makefile)
+
+ # modpost option for external modules
+ MODPOST += -e
+diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
+index 9e2092fd5206c..7099c603ff0ad 100644
+--- a/scripts/Makefile.ubsan
++++ b/scripts/Makefile.ubsan
+@@ -8,7 +8,6 @@ ubsan-cflags-$(CONFIG_UBSAN_LOCAL_BOUNDS) += -fsanitize=local-bounds
+ ubsan-cflags-$(CONFIG_UBSAN_SHIFT) += -fsanitize=shift
+ ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO) += -fsanitize=integer-divide-by-zero
+ ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE) += -fsanitize=unreachable
+-ubsan-cflags-$(CONFIG_UBSAN_OBJECT_SIZE) += -fsanitize=object-size
+ ubsan-cflags-$(CONFIG_UBSAN_BOOL) += -fsanitize=bool
+ ubsan-cflags-$(CONFIG_UBSAN_ENUM) += -fsanitize=enum
+ ubsan-cflags-$(CONFIG_UBSAN_TRAP) += -fsanitize-undefined-trap-on-error
+diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
+index adabd41452640..985fb81cae79b 100644
+--- a/scripts/asn1_compiler.c
++++ b/scripts/asn1_compiler.c
+@@ -625,7 +625,7 @@ int main(int argc, char **argv)
+ p = strrchr(argv[1], '/');
+ p = p ? p + 1 : argv[1];
+ grammar_name = strdup(p);
+- if (!p) {
++ if (!grammar_name) {
+ perror(NULL);
+ exit(1);
+ }
+diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire
+index 803ba75610766..a0ea1d26e6b2e 100755
+--- a/scripts/atomic/fallbacks/read_acquire
++++ b/scripts/atomic/fallbacks/read_acquire
+@@ -2,6 +2,15 @@ cat <<EOF
+ static __always_inline ${ret}
+ arch_${atomic}_read_acquire(const ${atomic}_t *v)
+ {
+- return smp_load_acquire(&(v)->counter);
++ ${int} ret;
++
++ if (__native_word(${atomic}_t)) {
++ ret = smp_load_acquire(&(v)->counter);
++ } else {
++ ret = arch_${atomic}_read(v);
++ __atomic_acquire_fence();
++ }
++
++ return ret;
+ }
+ EOF
+diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release
+index 86ede759f24ea..05cdb7f42477a 100755
+--- a/scripts/atomic/fallbacks/set_release
++++ b/scripts/atomic/fallbacks/set_release
+@@ -2,6 +2,11 @@ cat <<EOF
+ static __always_inline void
+ arch_${atomic}_set_release(${atomic}_t *v, ${int} i)
+ {
+- smp_store_release(&(v)->counter, i);
++ if (__native_word(${atomic}_t)) {
++ smp_store_release(&(v)->counter, i);
++ } else {
++ __atomic_release_fence();
++ arch_${atomic}_set(v, i);
++ }
+ }
+ EOF
+diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py
+index 217d21abc86e8..36c920e713137 100755
+--- a/scripts/checkkconfigsymbols.py
++++ b/scripts/checkkconfigsymbols.py
+@@ -115,7 +115,7 @@ def parse_options():
+ return args
+
+
+-def main():
++def print_undefined_symbols():
+ """Main function of this module."""
+ args = parse_options()
+
+@@ -467,5 +467,16 @@ def parse_kconfig_file(kfile):
+ return defined, references
+
+
++def main():
++ try:
++ print_undefined_symbols()
++ except BrokenPipeError:
++ # Python flushes standard streams on exit; redirect remaining output
++ # to devnull to avoid another BrokenPipeError at shutdown
++ devnull = os.open(os.devnull, os.O_WRONLY)
++ os.dup2(devnull, sys.stdout.fileno())
++ sys.exit(1) # Python exits with error code 1 on EPIPE
++
++
+ if __name__ == "__main__":
+ main()
+diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
+index c27d2312cfc30..88cb294dc4472 100755
+--- a/scripts/checkpatch.pl
++++ b/scripts/checkpatch.pl
+@@ -489,7 +489,8 @@ our $Attribute = qr{
+ ____cacheline_aligned|
+ ____cacheline_aligned_in_smp|
+ ____cacheline_internodealigned_in_smp|
+- __weak
++ __weak|
++ __alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\)
+ }x;
+ our $Modifier;
+ our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__};
+diff --git a/scripts/clang-tools/run-clang-tools.py b/scripts/clang-tools/run-clang-tools.py
+index f754415af398b..f42699134f1c0 100755
+--- a/scripts/clang-tools/run-clang-tools.py
++++ b/scripts/clang-tools/run-clang-tools.py
+@@ -60,14 +60,21 @@ def run_analysis(entry):
+
+
+ def main():
+- args = parse_arguments()
++ try:
++ args = parse_arguments()
+
+- lock = multiprocessing.Lock()
+- pool = multiprocessing.Pool(initializer=init, initargs=(lock, args))
+- # Read JSON data into the datastore variable
+- with open(args.path, "r") as f:
+- datastore = json.load(f)
+- pool.map(run_analysis, datastore)
++ lock = multiprocessing.Lock()
++ pool = multiprocessing.Pool(initializer=init, initargs=(lock, args))
++ # Read JSON data into the datastore variable
++ with open(args.path, "r") as f:
++ datastore = json.load(f)
++ pool.map(run_analysis, datastore)
++ except BrokenPipeError:
++ # Python flushes standard streams on exit; redirect remaining output
++ # to devnull to avoid another BrokenPipeError at shutdown
++ devnull = os.open(os.devnull, os.O_WRONLY)
++ os.dup2(devnull, sys.stdout.fileno())
++ sys.exit(1) # Python exits with error code 1 on EPIPE
+
+
+ if __name__ == "__main__":
+diff --git a/scripts/diffconfig b/scripts/diffconfig
+index d5da5fa05d1d3..43f0f3d273ae7 100755
+--- a/scripts/diffconfig
++++ b/scripts/diffconfig
+@@ -65,7 +65,7 @@ def print_config(op, config, value, new_value):
+ else:
+ print(" %s %s -> %s" % (config, value, new_value))
+
+-def main():
++def show_diff():
+ global merge_style
+
+ # parse command line args
+@@ -129,4 +129,16 @@ def main():
+ for config in new:
+ print_config("+", config, None, b[config])
+
+-main()
++def main():
++ try:
++ show_diff()
++ except BrokenPipeError:
++ # Python flushes standard streams on exit; redirect remaining output
++ # to devnull to avoid another BrokenPipeError at shutdown
++ devnull = os.open(os.devnull, os.O_WRONLY)
++ os.dup2(devnull, sys.stdout.fileno())
++ sys.exit(1) # Python exits with error code 1 on EPIPE
++
++
++if __name__ == '__main__':
++ main()
+diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile
+index 95aaf7431bffa..2d5f274d6efde 100644
+--- a/scripts/dtc/Makefile
++++ b/scripts/dtc/Makefile
+@@ -18,7 +18,7 @@ fdtoverlay-objs := $(libfdt) fdtoverlay.o util.o
+ # Source files need to get at the userspace version of libfdt_env.h to compile
+ HOST_EXTRACFLAGS += -I $(srctree)/$(src)/libfdt
+
+-ifeq ($(shell pkg-config --exists yaml-0.1 2>/dev/null && echo yes),)
++ifeq ($(shell $(HOSTPKG_CONFIG) --exists yaml-0.1 2>/dev/null && echo yes),)
+ ifneq ($(CHECK_DT_BINDING)$(CHECK_DTBS),)
+ $(error dtc needs libyaml for DT schema validation support. \
+ Install the necessary libyaml development package.)
+@@ -27,9 +27,9 @@ HOST_EXTRACFLAGS += -DNO_YAML
+ else
+ dtc-objs += yamltree.o
+ # To include <yaml.h> installed in a non-default path
+-HOSTCFLAGS_yamltree.o := $(shell pkg-config --cflags yaml-0.1)
++HOSTCFLAGS_yamltree.o := $(shell $(HOSTPKG_CONFIG) --cflags yaml-0.1)
+ # To link libyaml installed in a non-default path
+-HOSTLDLIBS_dtc := $(shell pkg-config yaml-0.1 --libs)
++HOSTLDLIBS_dtc := $(shell $(HOSTPKG_CONFIG) --libs yaml-0.1)
+ endif
+
+ # Generated files need one more search path to include headers in source tree
+diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff
+index d3422ee15e300..f2bbde4bba86b 100755
+--- a/scripts/dtc/dtx_diff
++++ b/scripts/dtc/dtx_diff
+@@ -59,12 +59,8 @@ Otherwise DTx is treated as a dts source file (aka .dts).
+ or '/include/' to be processed.
+
+ If DTx_1 and DTx_2 are in different architectures, then this script
+- may not work since \${ARCH} is part of the include path. Two possible
+- workarounds:
+-
+- `basename $0` \\
+- <(ARCH=arch_of_dtx_1 `basename $0` DTx_1) \\
+- <(ARCH=arch_of_dtx_2 `basename $0` DTx_2)
++ may not work since \${ARCH} is part of the include path. The following
++ workaround can be used:
+
+ `basename $0` ARCH=arch_of_dtx_1 DTx_1 >tmp_dtx_1.dts
+ `basename $0` ARCH=arch_of_dtx_2 DTx_2 >tmp_dtx_2.dts
+diff --git a/scripts/dummy-tools/dummy-plugin-dir/include/plugin-version.h b/scripts/dummy-tools/dummy-plugin-dir/include/plugin-version.h
+new file mode 100644
+index 0000000000000..e69de29bb2d1d
+diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc
+index b2483149bbe55..7db8258434355 100755
+--- a/scripts/dummy-tools/gcc
++++ b/scripts/dummy-tools/gcc
+@@ -96,12 +96,8 @@ fi
+
+ # To set GCC_PLUGINS
+ if arg_contain -print-file-name=plugin "$@"; then
+- plugin_dir=$(mktemp -d)
+-
+- mkdir -p $plugin_dir/include
+- touch $plugin_dir/include/plugin-version.h
+-
+- echo $plugin_dir
++ # Use $0 to find the in-tree dummy directory
++ echo "$(dirname "$(readlink -f "$0")")/dummy-plugin-dir"
+ exit 0
+ fi
+
+diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c
+index 3bc48c726c41c..79ecbbfe37cd7 100644
+--- a/scripts/extract-cert.c
++++ b/scripts/extract-cert.c
+@@ -23,6 +23,13 @@
+ #include <openssl/err.h>
+ #include <openssl/engine.h>
+
++/*
++ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
++ *
++ * Remove this if/when that API is no longer used
++ */
++#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
++
+ #define PKEY_ID_PKCS7 2
+
+ static __attribute__((noreturn))
+diff --git a/scripts/faddr2line b/scripts/faddr2line
+index 6c6439f69a725..9e730b805e87c 100755
+--- a/scripts/faddr2line
++++ b/scripts/faddr2line
+@@ -44,17 +44,6 @@
+ set -o errexit
+ set -o nounset
+
+-READELF="${CROSS_COMPILE:-}readelf"
+-ADDR2LINE="${CROSS_COMPILE:-}addr2line"
+-SIZE="${CROSS_COMPILE:-}size"
+-NM="${CROSS_COMPILE:-}nm"
+-
+-command -v awk >/dev/null 2>&1 || die "awk isn't installed"
+-command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
+-command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed"
+-command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed"
+-command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed"
+-
+ usage() {
+ echo "usage: faddr2line [--list] <object file> <func+offset> <func+offset>..." >&2
+ exit 1
+@@ -69,6 +58,14 @@ die() {
+ exit 1
+ }
+
++READELF="${CROSS_COMPILE:-}readelf"
++ADDR2LINE="${CROSS_COMPILE:-}addr2line"
++AWK="awk"
++
++command -v ${AWK} >/dev/null 2>&1 || die "${AWK} isn't installed"
++command -v ${READELF} >/dev/null 2>&1 || die "${READELF} isn't installed"
++command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed"
++
+ # Try to figure out the source directory prefix so we can remove it from the
+ # addr2line output. HACK ALERT: This assumes that start_kernel() is in
+ # init/main.c! This only works for vmlinux. Otherwise it falls back to
+@@ -76,7 +73,8 @@ die() {
+ find_dir_prefix() {
+ local objfile=$1
+
+- local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
++ local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' |
++ ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}')
+ [[ -z $start_kernel_addr ]] && return
+
+ local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr)
+@@ -97,86 +95,158 @@ __faddr2line() {
+ local dir_prefix=$3
+ local print_warnings=$4
+
+- local func=${func_addr%+*}
+- local offset=${func_addr#*+}
+- offset=${offset%/*}
+- local size=
+- [[ $func_addr =~ "/" ]] && size=${func_addr#*/}
++ local sym_name=${func_addr%+*}
++ local func_offset=${func_addr#*+}
++ func_offset=${func_offset%/*}
++ local user_size=
++ local file_type
++ local is_vmlinux=0
++ [[ $func_addr =~ "/" ]] && user_size=${func_addr#*/}
+
+- if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then
++ if [[ -z $sym_name ]] || [[ -z $func_offset ]] || [[ $sym_name = $func_addr ]]; then
+ warn "bad func+offset $func_addr"
+ DONE=1
+ return
+ fi
+
++ # vmlinux uses absolute addresses in the section table rather than
++ # section offsets.
++ local file_type=$(${READELF} --file-header $objfile |
++ ${AWK} '$1 == "Type:" { print $2; exit }')
++ if [[ $file_type = "EXEC" ]] || [[ $file_type == "DYN" ]]; then
++ is_vmlinux=1
++ fi
++
+ # Go through each of the object's symbols which match the func name.
+- # In rare cases there might be duplicates.
+- file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}')
+- while read symbol; do
+- local fields=($symbol)
+- local sym_base=0x${fields[0]}
+- local sym_type=${fields[1]}
+- local sym_end=${fields[3]}
+-
+- # calculate the size
+- local sym_size=$(($sym_end - $sym_base))
++ # In rare cases there might be duplicates, in which case we print all
++ # matches.
++ while read line; do
++ local fields=($line)
++ local sym_addr=0x${fields[1]}
++ local sym_elf_size=${fields[2]}
++ local sym_sec=${fields[6]}
++ local sec_size
++ local sec_name
++
++ # Get the section size:
++ sec_size=$(${READELF} --section-headers --wide $objfile |
++ sed 's/\[ /\[/' |
++ ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print "0x" $6; exit }')
++
++ if [[ -z $sec_size ]]; then
++ warn "bad section size: section: $sym_sec"
++ DONE=1
++ return
++ fi
++
++ # Get the section name:
++ sec_name=$(${READELF} --section-headers --wide $objfile |
++ sed 's/\[ /\[/' |
++ ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print $2; exit }')
++
++ if [[ -z $sec_name ]]; then
++ warn "bad section name: section: $sym_sec"
++ DONE=1
++ return
++ fi
++
++ # Calculate the symbol size.
++ #
++ # Unfortunately we can't use the ELF size, because kallsyms
++ # also includes the padding bytes in its size calculation. For
++ # kallsyms, the size calculation is the distance between the
++ # symbol and the next symbol in a sorted list.
++ local sym_size
++ local cur_sym_addr
++ local found=0
++ while read line; do
++ local fields=($line)
++ cur_sym_addr=0x${fields[1]}
++ local cur_sym_elf_size=${fields[2]}
++ local cur_sym_name=${fields[7]:-}
++
++ if [[ $cur_sym_addr = $sym_addr ]] &&
++ [[ $cur_sym_elf_size = $sym_elf_size ]] &&
++ [[ $cur_sym_name = $sym_name ]]; then
++ found=1
++ continue
++ fi
++
++ if [[ $found = 1 ]]; then
++ sym_size=$(($cur_sym_addr - $sym_addr))
++ [[ $sym_size -lt $sym_elf_size ]] && continue;
++ found=2
++ break
++ fi
++ done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2)
++
++ if [[ $found = 0 ]]; then
++ warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size"
++ DONE=1
++ return
++ fi
++
++ # If nothing was found after the symbol, assume it's the last
++ # symbol in the section.
++ [[ $found = 1 ]] && sym_size=$(($sec_size - $sym_addr))
++
+ if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then
+- warn "bad symbol size: base: $sym_base end: $sym_end"
++ warn "bad symbol size: sym_addr: $sym_addr cur_sym_addr: $cur_sym_addr"
+ DONE=1
+ return
+ fi
++
+ sym_size=0x$(printf %x $sym_size)
+
+- # calculate the address
+- local addr=$(($sym_base + $offset))
++ # Calculate the address from user-supplied offset:
++ local addr=$(($sym_addr + $func_offset))
+ if [[ -z $addr ]] || [[ $addr = 0 ]]; then
+- warn "bad address: $sym_base + $offset"
++ warn "bad address: $sym_addr + $func_offset"
+ DONE=1
+ return
+ fi
+ addr=0x$(printf %x $addr)
+
+- # weed out non-function symbols
+- if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then
+- [[ $print_warnings = 1 ]] &&
+- echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'"
+- continue
+- fi
+-
+- # if the user provided a size, make sure it matches the symbol's size
+- if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
++ # If the user provided a size, make sure it matches the symbol's size:
++ if [[ -n $user_size ]] && [[ $user_size -ne $sym_size ]]; then
+ [[ $print_warnings = 1 ]] &&
+- echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)"
++ echo "skipping $sym_name address at $addr due to size mismatch ($user_size != $sym_size)"
+ continue;
+ fi
+
+- # make sure the provided offset is within the symbol's range
+- if [[ $offset -gt $sym_size ]]; then
++ # Make sure the provided offset is within the symbol's range:
++ if [[ $func_offset -gt $sym_size ]]; then
+ [[ $print_warnings = 1 ]] &&
+- echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)"
++ echo "skipping $sym_name address at $addr due to size mismatch ($func_offset > $sym_size)"
+ continue
+ fi
+
+- # separate multiple entries with a blank line
++ # In case of duplicates or multiple addresses specified on the
++ # cmdline, separate multiple entries with a blank line:
+ [[ $FIRST = 0 ]] && echo
+ FIRST=0
+
+- # pass real address to addr2line
+- echo "$func+$offset/$sym_size:"
+- local file_lines=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;")
+- [[ -z $file_lines ]] && return
++ echo "$sym_name+$func_offset/$sym_size:"
++
++ # Pass section address to addr2line and strip absolute paths
++ # from the output:
++ local args="--functions --pretty-print --inlines --exe=$objfile"
++ [[ $is_vmlinux = 0 ]] && args="$args --section=$sec_name"
++ local output=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;")
++ [[ -z $output ]] && continue
+
++ # Default output (non --list):
+ if [[ $LIST = 0 ]]; then
+- echo "$file_lines" | while read -r line
++ echo "$output" | while read -r line
+ do
+ echo $line
+ done
+ DONE=1;
+- return
++ continue
+ fi
+
+- # show each line with context
+- echo "$file_lines" | while read -r line
++ # For --list, show each line with its corresponding source code:
++ echo "$output" | while read -r line
+ do
+ echo
+ echo $line
+@@ -184,12 +254,12 @@ __faddr2line() {
+ n1=$[$n-5]
+ n2=$[$n+5]
+ f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
+- awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f
++ ${AWK} 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f
+ done
+
+ DONE=1
+
+- done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
++ done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn')
+ }
+
+ [[ $# -lt 2 ]] && usage
+diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
+index 0c087614fc3e4..3f3c37bc14e80 100644
+--- a/scripts/gcc-plugins/gcc-common.h
++++ b/scripts/gcc-plugins/gcc-common.h
+@@ -77,7 +77,9 @@
+ #include "varasm.h"
+ #include "stor-layout.h"
+ #include "internal-fn.h"
++#include "gimple.h"
+ #include "gimple-expr.h"
++#include "gimple-iterator.h"
+ #include "gimple-fold.h"
+ #include "context.h"
+ #include "tree-ssa-alias.h"
+@@ -91,11 +93,9 @@
+ #include "tree-eh.h"
+ #include "stmt.h"
+ #include "gimplify.h"
+-#include "gimple.h"
+ #include "tree-ssa-operands.h"
+ #include "tree-phinodes.h"
+ #include "tree-cfg.h"
+-#include "gimple-iterator.h"
+ #include "gimple-ssa.h"
+ #include "ssa-iterators.h"
+
+diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c
+index 589454bce9301..8425da41de0da 100644
+--- a/scripts/gcc-plugins/latent_entropy_plugin.c
++++ b/scripts/gcc-plugins/latent_entropy_plugin.c
+@@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = {
+ .help = "disable\tturn off latent entropy instrumentation\n",
+ };
+
+-static unsigned HOST_WIDE_INT seed;
+-/*
+- * get_random_seed() (this is a GCC function) generates the seed.
+- * This is a simple random generator without any cryptographic security because
+- * the entropy doesn't come from here.
+- */
++static unsigned HOST_WIDE_INT deterministic_seed;
++static unsigned HOST_WIDE_INT rnd_buf[32];
++static size_t rnd_idx = ARRAY_SIZE(rnd_buf);
++static int urandom_fd = -1;
++
+ static unsigned HOST_WIDE_INT get_random_const(void)
+ {
+- unsigned int i;
+- unsigned HOST_WIDE_INT ret = 0;
+-
+- for (i = 0; i < 8 * sizeof(ret); i++) {
+- ret = (ret << 1) | (seed & 1);
+- seed >>= 1;
+- if (ret & 1)
+- seed ^= 0xD800000000000000ULL;
++ if (deterministic_seed) {
++ unsigned HOST_WIDE_INT w = deterministic_seed;
++ w ^= w << 13;
++ w ^= w >> 7;
++ w ^= w << 17;
++ deterministic_seed = w;
++ return deterministic_seed;
+ }
+
+- return ret;
++ if (urandom_fd < 0) {
++ urandom_fd = open("/dev/urandom", O_RDONLY);
++ gcc_assert(urandom_fd >= 0);
++ }
++ if (rnd_idx >= ARRAY_SIZE(rnd_buf)) {
++ gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf));
++ rnd_idx = 0;
++ }
++ return rnd_buf[rnd_idx++];
+ }
+
+ static tree tree_get_random_const(tree type)
+@@ -537,8 +543,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused,
+ tree type, id;
+ int quals;
+
+- seed = get_random_seed(false);
+-
+ if (in_lto_p)
+ return;
+
+@@ -573,6 +577,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info,
+ const struct plugin_argument * const argv = plugin_info->argv;
+ int i;
+
++ /*
++ * Call get_random_seed() with noinit=true, so that this returns
++ * 0 in the case where no seed has been passed via -frandom-seed.
++ */
++ deterministic_seed = get_random_seed(true);
++
+ static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = {
+ {
+ .base = &latent_entropy_decl,
+diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
+index e9db7dcb3e5f4..b04aa8e91a41f 100644
+--- a/scripts/gcc-plugins/stackleak_plugin.c
++++ b/scripts/gcc-plugins/stackleak_plugin.c
+@@ -429,6 +429,23 @@ static unsigned int stackleak_cleanup_execute(void)
+ return 0;
+ }
+
++/*
++ * STRING_CST may or may not be NUL terminated:
++ * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html
++ */
++static inline bool string_equal(tree node, const char *string, int length)
++{
++ if (TREE_STRING_LENGTH(node) < length)
++ return false;
++ if (TREE_STRING_LENGTH(node) > length + 1)
++ return false;
++ if (TREE_STRING_LENGTH(node) == length + 1 &&
++ TREE_STRING_POINTER(node)[length] != '\0')
++ return false;
++ return !memcmp(TREE_STRING_POINTER(node), string, length);
++}
++#define STRING_EQUAL(node, str) string_equal(node, str, strlen(str))
++
+ static bool stackleak_gate(void)
+ {
+ tree section;
+@@ -438,13 +455,13 @@ static bool stackleak_gate(void)
+ if (section && TREE_VALUE(section)) {
+ section = TREE_VALUE(TREE_VALUE(section));
+
+- if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10))
++ if (STRING_EQUAL(section, ".init.text"))
+ return false;
+- if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13))
++ if (STRING_EQUAL(section, ".devinit.text"))
+ return false;
+- if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13))
++ if (STRING_EQUAL(section, ".cpuinit.text"))
+ return false;
+- if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13))
++ if (STRING_EQUAL(section, ".meminit.text"))
+ return false;
+ }
+
+diff --git a/scripts/gdb/linux/clk.py b/scripts/gdb/linux/clk.py
+index 061aecfa294e6..7a01fdc3e8446 100644
+--- a/scripts/gdb/linux/clk.py
++++ b/scripts/gdb/linux/clk.py
+@@ -41,6 +41,8 @@ are cached and potentially out of date"""
+ self.show_subtree(child, level + 1)
+
+ def invoke(self, arg, from_tty):
++ if utils.gdb_eval_or_none("clk_root_list") is None:
++ raise gdb.GdbError("No clocks registered")
+ gdb.write(" enable prepare protect \n")
+ gdb.write(" clock count count count rate \n")
+ gdb.write("------------------------------------------------------------------------\n")
+diff --git a/scripts/gdb/linux/config.py b/scripts/gdb/linux/config.py
+index 90e1565b19671..8843ab3cbaddc 100644
+--- a/scripts/gdb/linux/config.py
++++ b/scripts/gdb/linux/config.py
+@@ -24,9 +24,9 @@ class LxConfigDump(gdb.Command):
+ filename = arg
+
+ try:
+- py_config_ptr = gdb.parse_and_eval("kernel_config_data + 8")
+- py_config_size = gdb.parse_and_eval(
+- "sizeof(kernel_config_data) - 1 - 8 * 2")
++ py_config_ptr = gdb.parse_and_eval("&kernel_config_data")
++ py_config_ptr_end = gdb.parse_and_eval("&kernel_config_data_end")
++ py_config_size = py_config_ptr_end - py_config_ptr
+ except gdb.error as e:
+ raise gdb.GdbError("Can't find config, enable CONFIG_IKCONFIG?")
+
+diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
+index 2efbec6b6b8db..08f0587d15ea1 100644
+--- a/scripts/gdb/linux/constants.py.in
++++ b/scripts/gdb/linux/constants.py.in
+@@ -39,6 +39,8 @@
+
+ import gdb
+
++LX_CONFIG(CONFIG_DEBUG_INFO_REDUCED)
++
+ /* linux/clk-provider.h */
+ if IS_BUILTIN(CONFIG_COMMON_CLK):
+ LX_GDBPARSED(CLK_GET_RATE_NOCACHE)
+diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
+index a92c55bd8de54..c771831eb077d 100644
+--- a/scripts/gdb/linux/dmesg.py
++++ b/scripts/gdb/linux/dmesg.py
+@@ -22,7 +22,6 @@ prb_desc_type = utils.CachedType("struct prb_desc")
+ prb_desc_ring_type = utils.CachedType("struct prb_desc_ring")
+ prb_data_ring_type = utils.CachedType("struct prb_data_ring")
+ printk_ringbuffer_type = utils.CachedType("struct printk_ringbuffer")
+-atomic_long_type = utils.CachedType("atomic_long_t")
+
+ class LxDmesg(gdb.Command):
+ """Print Linux kernel log buffer."""
+@@ -44,19 +43,17 @@ class LxDmesg(gdb.Command):
+ sz = prb_desc_ring_type.get_type().sizeof
+ desc_ring = utils.read_memoryview(inf, addr, sz).tobytes()
+
+- # read in descriptor array
++ # read in descriptor count, size, and address
+ off = prb_desc_ring_type.get_type()['count_bits'].bitpos // 8
+ desc_ring_count = 1 << utils.read_u32(desc_ring, off)
+ desc_sz = prb_desc_type.get_type().sizeof
+ off = prb_desc_ring_type.get_type()['descs'].bitpos // 8
+- addr = utils.read_ulong(desc_ring, off)
+- descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes()
++ desc_addr = utils.read_ulong(desc_ring, off)
+
+- # read in info array
++ # read in info size and address
+ info_sz = printk_info_type.get_type().sizeof
+ off = prb_desc_ring_type.get_type()['infos'].bitpos // 8
+- addr = utils.read_ulong(desc_ring, off)
+- infos = utils.read_memoryview(inf, addr, info_sz * desc_ring_count).tobytes()
++ info_addr = utils.read_ulong(desc_ring, off)
+
+ # read in text data ring structure
+ off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8
+@@ -64,14 +61,11 @@ class LxDmesg(gdb.Command):
+ sz = prb_data_ring_type.get_type().sizeof
+ text_data_ring = utils.read_memoryview(inf, addr, sz).tobytes()
+
+- # read in text data
++ # read in text data size and address
+ off = prb_data_ring_type.get_type()['size_bits'].bitpos // 8
+ text_data_sz = 1 << utils.read_u32(text_data_ring, off)
+ off = prb_data_ring_type.get_type()['data'].bitpos // 8
+- addr = utils.read_ulong(text_data_ring, off)
+- text_data = utils.read_memoryview(inf, addr, text_data_sz).tobytes()
+-
+- counter_off = atomic_long_type.get_type()['counter'].bitpos // 8
++ text_data_addr = utils.read_ulong(text_data_ring, off)
+
+ sv_off = prb_desc_type.get_type()['state_var'].bitpos // 8
+
+@@ -92,9 +86,9 @@ class LxDmesg(gdb.Command):
+
+ # read in tail and head descriptor ids
+ off = prb_desc_ring_type.get_type()['tail_id'].bitpos // 8
+- tail_id = utils.read_u64(desc_ring, off + counter_off)
++ tail_id = utils.read_atomic_long(desc_ring, off)
+ off = prb_desc_ring_type.get_type()['head_id'].bitpos // 8
+- head_id = utils.read_u64(desc_ring, off + counter_off)
++ head_id = utils.read_atomic_long(desc_ring, off)
+
+ did = tail_id
+ while True:
+@@ -102,17 +96,20 @@ class LxDmesg(gdb.Command):
+ desc_off = desc_sz * ind
+ info_off = info_sz * ind
+
++ desc = utils.read_memoryview(inf, desc_addr + desc_off, desc_sz).tobytes()
++
+ # skip non-committed record
+- state = 3 & (utils.read_u64(descs, desc_off + sv_off +
+- counter_off) >> desc_flags_shift)
++ state = 3 & (utils.read_atomic_long(desc, sv_off) >> desc_flags_shift)
+ if state != desc_committed and state != desc_finalized:
+ if did == head_id:
+ break
+ did = (did + 1) & desc_id_mask
+ continue
+
+- begin = utils.read_ulong(descs, desc_off + begin_off) % text_data_sz
+- end = utils.read_ulong(descs, desc_off + next_off) % text_data_sz
++ begin = utils.read_ulong(desc, begin_off) % text_data_sz
++ end = utils.read_ulong(desc, next_off) % text_data_sz
++
++ info = utils.read_memoryview(inf, info_addr + info_off, info_sz).tobytes()
+
+ # handle data-less record
+ if begin & 1 == 1:
+@@ -125,16 +122,17 @@ class LxDmesg(gdb.Command):
+ # skip over descriptor id
+ text_start = begin + utils.get_long_type().sizeof
+
+- text_len = utils.read_u16(infos, info_off + len_off)
++ text_len = utils.read_u16(info, len_off)
+
+ # handle truncated message
+ if end - text_start < text_len:
+ text_len = end - text_start
+
+- text = text_data[text_start:text_start + text_len].decode(
+- encoding='utf8', errors='replace')
++ text_data = utils.read_memoryview(inf, text_data_addr + text_start,
++ text_len).tobytes()
++ text = text_data[0:text_len].decode(encoding='utf8', errors='replace')
+
+- time_stamp = utils.read_u64(infos, info_off + ts_off)
++ time_stamp = utils.read_u64(info, ts_off)
+
+ for line in text.splitlines():
+ msg = u"[{time:12.6f}] {line}\n".format(
+diff --git a/scripts/gdb/linux/genpd.py b/scripts/gdb/linux/genpd.py
+index 39cd1abd85590..b53649c0a77a6 100644
+--- a/scripts/gdb/linux/genpd.py
++++ b/scripts/gdb/linux/genpd.py
+@@ -5,7 +5,7 @@
+ import gdb
+ import sys
+
+-from linux.utils import CachedType
++from linux.utils import CachedType, gdb_eval_or_none
+ from linux.lists import list_for_each_entry
+
+ generic_pm_domain_type = CachedType('struct generic_pm_domain')
+@@ -70,6 +70,8 @@ Output is similar to /sys/kernel/debug/pm_genpd/pm_genpd_summary'''
+ gdb.write(' %-50s %s\n' % (kobj_path, rtpm_status_str(dev)))
+
+ def invoke(self, arg, from_tty):
++ if gdb_eval_or_none("&gpd_list") is None:
++ raise gdb.GdbError("No power domain(s) registered")
+ gdb.write('domain status children\n');
+ gdb.write(' /device runtime status\n');
+ gdb.write('----------------------------------------------------------------------\n');
+diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py
+index 071d0dd5a6349..51def847f1ef9 100644
+--- a/scripts/gdb/linux/timerlist.py
++++ b/scripts/gdb/linux/timerlist.py
+@@ -73,7 +73,7 @@ def print_cpu(hrtimer_bases, cpu, max_clock_bases):
+ ts = cpus.per_cpu(tick_sched_ptr, cpu)
+
+ text = "cpu: {}\n".format(cpu)
+- for i in xrange(max_clock_bases):
++ for i in range(max_clock_bases):
+ text += " clock {}:\n".format(i)
+ text += print_base(cpu_base['clock_base'][i])
+
+@@ -158,6 +158,8 @@ def pr_cpumask(mask):
+ num_bytes = (nr_cpu_ids + 7) / 8
+ buf = utils.read_memoryview(inf, bits, num_bytes).tobytes()
+ buf = binascii.b2a_hex(buf)
++ if type(buf) is not str:
++ buf=buf.decode()
+
+ chunks = []
+ i = num_bytes
+diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py
+index ff7c1799d588f..7f36aee32ac66 100644
+--- a/scripts/gdb/linux/utils.py
++++ b/scripts/gdb/linux/utils.py
+@@ -35,13 +35,12 @@ class CachedType:
+
+
+ long_type = CachedType("long")
+-
++atomic_long_type = CachedType("atomic_long_t")
+
+ def get_long_type():
+ global long_type
+ return long_type.get_type()
+
+-
+ def offset_of(typeobj, field):
+ element = gdb.Value(0).cast(typeobj)
+ return int(str(element[field].address).split()[0], 16)
+@@ -89,7 +88,10 @@ def get_target_endianness():
+
+
+ def read_memoryview(inf, start, length):
+- return memoryview(inf.read_memory(start, length))
++ m = inf.read_memory(start, length)
++ if type(m) is memoryview:
++ return m
++ return memoryview(m)
+
+
+ def read_u16(buffer, offset):
+@@ -129,6 +131,17 @@ def read_ulong(buffer, offset):
+ else:
+ return read_u32(buffer, offset)
+
++atomic_long_counter_offset = atomic_long_type.get_type()['counter'].bitpos
++atomic_long_counter_sizeof = atomic_long_type.get_type()['counter'].type.sizeof
++
++def read_atomic_long(buffer, offset):
++ global atomic_long_counter_offset
++ global atomic_long_counter_sizeof
++
++ if atomic_long_counter_sizeof == 8:
++ return read_u64(buffer, offset + atomic_long_counter_offset)
++ else:
++ return read_u32(buffer, offset + atomic_long_counter_offset)
+
+ target_arch = None
+
+diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py
+index 4136dc2c59df2..cd03de50c3bec 100644
+--- a/scripts/gdb/vmlinux-gdb.py
++++ b/scripts/gdb/vmlinux-gdb.py
+@@ -22,6 +22,10 @@ except:
+ gdb.write("NOTE: gdb 7.2 or later required for Linux helper scripts to "
+ "work.\n")
+ else:
++ import linux.constants
++ if linux.constants.LX_CONFIG_DEBUG_INFO_REDUCED:
++ raise gdb.GdbError("Reduced debug information will prevent GDB "
++ "from having complete types.\n")
+ import linux.utils
+ import linux.symbols
+ import linux.modules
+@@ -32,7 +36,6 @@ else:
+ import linux.lists
+ import linux.rbtree
+ import linux.proc
+- import linux.constants
+ import linux.timerlist
+ import linux.clk
+ import linux.genpd
+diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
+index cf72680cd7692..797c8bad3837a 100644
+--- a/scripts/kconfig/confdata.c
++++ b/scripts/kconfig/confdata.c
+@@ -983,14 +983,19 @@ static int conf_write_dep(const char *name)
+
+ static int conf_touch_deps(void)
+ {
+- const char *name;
++ const char *name, *tmp;
+ struct symbol *sym;
+ int res, i;
+
+- strcpy(depfile_path, "include/config/");
+- depfile_prefix_len = strlen(depfile_path);
+-
+ name = conf_get_autoconfig_name();
++ tmp = strrchr(name, '/');
++ depfile_prefix_len = tmp ? tmp - name + 1 : 0;
++ if (depfile_prefix_len + 1 > sizeof(depfile_path))
++ return -1;
++
++ strncpy(depfile_path, name, depfile_prefix_len);
++ depfile_path[depfile_prefix_len] = 0;
++
+ conf_read_simple(name, S_DEF_AUTO);
+ sym_calc_value(modules_sym);
+
+@@ -1119,10 +1124,12 @@ static void (*conf_changed_callback)(void);
+
+ void conf_set_changed(bool val)
+ {
+- if (conf_changed_callback && conf_changed != val)
+- conf_changed_callback();
++ bool changed = conf_changed != val;
+
+ conf_changed = val;
++
++ if (conf_changed_callback && changed)
++ conf_changed_callback();
+ }
+
+ bool conf_get_changed(void)
+diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh
+index 480ecd8b9f415..cbd90c28c05f2 100755
+--- a/scripts/kconfig/gconf-cfg.sh
++++ b/scripts/kconfig/gconf-cfg.sh
+@@ -3,14 +3,14 @@
+
+ PKG="gtk+-2.0 gmodule-2.0 libglade-2.0"
+
+-if [ -z "$(command -v pkg-config)" ]; then
++if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
+ echo >&2 "*"
+- echo >&2 "* 'make gconfig' requires 'pkg-config'. Please install it."
++ echo >&2 "* 'make gconfig' requires '${HOSTPKG_CONFIG}'. Please install it."
+ echo >&2 "*"
+ exit 1
+ fi
+
+-if ! pkg-config --exists $PKG; then
++if ! ${HOSTPKG_CONFIG} --exists $PKG; then
+ echo >&2 "*"
+ echo >&2 "* Unable to find the GTK+ installation. Please make sure that"
+ echo >&2 "* the GTK+ 2.0 development package is correctly installed."
+@@ -19,12 +19,12 @@ if ! pkg-config --exists $PKG; then
+ exit 1
+ fi
+
+-if ! pkg-config --atleast-version=2.0.0 gtk+-2.0; then
++if ! ${HOSTPKG_CONFIG} --atleast-version=2.0.0 gtk+-2.0; then
+ echo >&2 "*"
+ echo >&2 "* GTK+ is present but version >= 2.0.0 is required."
+ echo >&2 "*"
+ exit 1
+ fi
+
+-echo cflags=\"$(pkg-config --cflags $PKG)\"
+-echo libs=\"$(pkg-config --libs $PKG)\"
++echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\"
++echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
+diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh
+index b520e407a8ebb..025b565e0b7cd 100755
+--- a/scripts/kconfig/mconf-cfg.sh
++++ b/scripts/kconfig/mconf-cfg.sh
+@@ -4,16 +4,16 @@
+ PKG="ncursesw"
+ PKG2="ncurses"
+
+-if [ -n "$(command -v pkg-config)" ]; then
+- if pkg-config --exists $PKG; then
+- echo cflags=\"$(pkg-config --cflags $PKG)\"
+- echo libs=\"$(pkg-config --libs $PKG)\"
++if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then
++ if ${HOSTPKG_CONFIG} --exists $PKG; then
++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\"
++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
+ exit 0
+ fi
+
+- if pkg-config --exists $PKG2; then
+- echo cflags=\"$(pkg-config --cflags $PKG2)\"
+- echo libs=\"$(pkg-config --libs $PKG2)\"
++ if ${HOSTPKG_CONFIG} --exists $PKG2; then
++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\"
++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\"
+ exit 0
+ fi
+ fi
+@@ -46,7 +46,7 @@ echo >&2 "* Unable to find the ncurses package."
+ echo >&2 "* Install ncurses (ncurses-devel or libncurses-dev"
+ echo >&2 "* depending on your distribution)."
+ echo >&2 "*"
+-echo >&2 "* You may also need to install pkg-config to find the"
++echo >&2 "* You may also need to install ${HOSTPKG_CONFIG} to find the"
+ echo >&2 "* ncurses installed in a non-default location."
+ echo >&2 "*"
+ exit 1
+diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh
+index c212255070c0c..3a10bac2adb3a 100755
+--- a/scripts/kconfig/nconf-cfg.sh
++++ b/scripts/kconfig/nconf-cfg.sh
+@@ -4,16 +4,16 @@
+ PKG="ncursesw menuw panelw"
+ PKG2="ncurses menu panel"
+
+-if [ -n "$(command -v pkg-config)" ]; then
+- if pkg-config --exists $PKG; then
+- echo cflags=\"$(pkg-config --cflags $PKG)\"
+- echo libs=\"$(pkg-config --libs $PKG)\"
++if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then
++ if ${HOSTPKG_CONFIG} --exists $PKG; then
++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\"
++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
+ exit 0
+ fi
+
+- if pkg-config --exists $PKG2; then
+- echo cflags=\"$(pkg-config --cflags $PKG2)\"
+- echo libs=\"$(pkg-config --libs $PKG2)\"
++ if ${HOSTPKG_CONFIG} --exists $PKG2; then
++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\"
++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\"
+ exit 0
+ fi
+ fi
+@@ -44,7 +44,7 @@ echo >&2 "* Unable to find the ncurses package."
+ echo >&2 "* Install ncurses (ncurses-devel or libncurses-dev"
+ echo >&2 "* depending on your distribution)."
+ echo >&2 "*"
+-echo >&2 "* You may also need to install pkg-config to find the"
++echo >&2 "* You may also need to install ${HOSTPKG_CONFIG} to find the"
+ echo >&2 "* ncurses installed in a non-default location."
+ echo >&2 "*"
+ exit 1
+diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c
+index 0590f86df6e40..d1f5bcff4b62d 100644
+--- a/scripts/kconfig/preprocess.c
++++ b/scripts/kconfig/preprocess.c
+@@ -141,7 +141,7 @@ static char *do_lineno(int argc, char *argv[])
+ static char *do_shell(int argc, char *argv[])
+ {
+ FILE *p;
+- char buf[256];
++ char buf[4096];
+ char *cmd;
+ size_t nread;
+ int i;
+@@ -396,6 +396,9 @@ static char *eval_clause(const char *str, size_t len, int argc, char *argv[])
+
+ p++;
+ }
++
++ if (new_argc >= FUNCTION_MAX_ARGS)
++ pperror("too many function arguments");
+ new_argv[new_argc++] = prev;
+
+ /*
+diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh
+index fa564cd795b7c..9b695e5cd9b37 100755
+--- a/scripts/kconfig/qconf-cfg.sh
++++ b/scripts/kconfig/qconf-cfg.sh
+@@ -3,22 +3,22 @@
+
+ PKG="Qt5Core Qt5Gui Qt5Widgets"
+
+-if [ -z "$(command -v pkg-config)" ]; then
++if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
+ echo >&2 "*"
+- echo >&2 "* 'make xconfig' requires 'pkg-config'. Please install it."
++ echo >&2 "* 'make xconfig' requires '${HOSTPKG_CONFIG}'. Please install it."
+ echo >&2 "*"
+ exit 1
+ fi
+
+-if pkg-config --exists $PKG; then
+- echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags $PKG)\"
+- echo libs=\"$(pkg-config --libs $PKG)\"
+- echo moc=\"$(pkg-config --variable=host_bins Qt5Core)/moc\"
++if ${HOSTPKG_CONFIG} --exists $PKG; then
++ echo cflags=\"-std=c++11 -fPIC $(${HOSTPKG_CONFIG} --cflags $PKG)\"
++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
++ echo moc=\"$(${HOSTPKG_CONFIG} --variable=host_bins Qt5Core)/moc\"
+ exit 0
+ fi
+
+ echo >&2 "*"
+-echo >&2 "* Could not find Qt5 via pkg-config."
++echo >&2 "* Could not find Qt5 via ${HOSTPKG_CONFIG}."
+ echo >&2 "* Please install Qt5 and make sure it's in PKG_CONFIG_PATH"
+ echo >&2 "*"
+ exit 1
+diff --git a/scripts/kernel-doc b/scripts/kernel-doc
+index cfcb607379577..5d54b57ff90cc 100755
+--- a/scripts/kernel-doc
++++ b/scripts/kernel-doc
+@@ -1245,6 +1245,13 @@ sub dump_struct($$) {
+ $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
+ $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
+ $members =~ s/\s*____cacheline_aligned/ /gos;
++ # unwrap struct_group():
++ # - first eat non-declaration parameters and rewrite for final match
++ # - then remove macro, outer parens, and trailing semicolon
++ $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
++ $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
++ $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
++ $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;
+
+ my $args = qr{([^,)]+)};
+ # replace DECLARE_BITMAP
+@@ -1256,6 +1263,8 @@ sub dump_struct($$) {
+ $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
+ # replace DECLARE_KFIFO_PTR
+ $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
++ # replace DECLARE_FLEX_ARRAY
++ $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos;
+ my $declaration = $members;
+
+ # Split nested struct/union elements as newer ones
+diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl
+index b2d8b8aa2d99e..8f636a23bc3f2 100755
+--- a/scripts/leaking_addresses.pl
++++ b/scripts/leaking_addresses.pl
+@@ -455,8 +455,9 @@ sub parse_file
+
+ open my $fh, "<", $file or return;
+ while ( <$fh> ) {
++ chomp;
+ if (may_leak_address($_)) {
+- print $file . ': ' . $_;
++ printf("$file: $_\n");
+ }
+ }
+ close $fh;
+diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
+index d74cee5c4326a..57ef6accbb40f 100755
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -120,6 +120,9 @@ objtool_link()
+
+ if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
+ objtoolopt="${objtoolopt} --noinstr"
++ if is_enabled CONFIG_CPU_UNRET_ENTRY; then
++ objtoolopt="${objtoolopt} --unret"
++ fi
+ fi
+
+ if [ -n "${objtoolopt}" ]; then
+@@ -139,6 +142,9 @@ objtool_link()
+ if [ -n "${CONFIG_X86_SMAP}" ]; then
+ objtoolopt="${objtoolopt} --uaccess"
+ fi
++ if [ -n "${CONFIG_SLS}" ]; then
++ objtoolopt="${objtoolopt} --sls"
++ fi
+ info OBJTOOL ${1}
+ tools/objtool/objtool ${objtoolcmd} ${objtoolopt} ${1}
+ fi
+@@ -205,7 +211,6 @@ vmlinux_link()
+ gen_btf()
+ {
+ local pahole_ver
+- local extra_paholeopt=
+
+ if ! [ -x "$(command -v ${PAHOLE})" ]; then
+ echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
+@@ -220,16 +225,8 @@ gen_btf()
+
+ vmlinux_link ${1}
+
+- if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then
+- # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars
+- extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars"
+- fi
+- if [ "${pahole_ver}" -ge "121" ]; then
+- extra_paholeopt="${extra_paholeopt} --btf_gen_floats"
+- fi
+-
+ info "BTF" ${2}
+- LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1}
++ LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1}
+
+ # Create ${2} which contains just .BTF section but no symbols. Add
+ # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
+diff --git a/scripts/mksysmap b/scripts/mksysmap
+index 9aa23d15862a0..ad8bbc52267d0 100755
+--- a/scripts/mksysmap
++++ b/scripts/mksysmap
+@@ -41,4 +41,4 @@
+ # so we just ignore them to let readprofile continue to work.
+ # (At least sparc64 has __crc_ in the middle).
+
+-$NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)\|\( \.L\)' > $2
++$NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)\|\( \.L\)\|\( L0\)' > $2
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index cb8ab7d91d307..c6e655e0ed988 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -669,7 +669,7 @@ static void handle_modversion(const struct module *mod,
+ unsigned int crc;
+
+ if (sym->st_shndx == SHN_UNDEF) {
+- warn("EXPORT symbol \"%s\" [%s%s] version ...\n"
++ warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n"
+ "Is \"%s\" prototyped in <asm/asm-prototypes.h>?\n",
+ symname, mod->name, mod->is_vmlinux ? "" : ".ko",
+ symname);
+@@ -1108,7 +1108,7 @@ static const struct sectioncheck sectioncheck[] = {
+ },
+ /* Do not export init/exit functions or data */
+ {
+- .fromsec = { "__ksymtab*", NULL },
++ .fromsec = { "___ksymtab*", NULL },
+ .bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL },
+ .mismatch = EXPORT_TO_INIT_EXIT,
+ .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
+@@ -1260,7 +1260,8 @@ static int secref_whitelist(const struct sectioncheck *mismatch,
+
+ static inline int is_arm_mapping_symbol(const char *str)
+ {
+- return str[0] == '$' && strchr("axtd", str[1])
++ return str[0] == '$' &&
++ (str[1] == 'a' || str[1] == 'd' || str[1] == 't' || str[1] == 'x')
+ && (str[2] == '\0' || str[2] == '.');
+ }
+
+@@ -1301,6 +1302,10 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr,
+ if (relsym->st_name != 0)
+ return relsym;
+
++ /*
++ * Strive to find a better symbol name, but the resulting name may not
++ * match the symbol referenced in the original code.
++ */
+ relsym_secindex = get_secindex(elf, relsym);
+ for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+ if (get_secindex(elf, sym) != relsym_secindex)
+@@ -1605,49 +1610,12 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf,
+
+ static int is_executable_section(struct elf_info* elf, unsigned int section_index)
+ {
+- if (section_index > elf->num_sections)
++ if (section_index >= elf->num_sections)
+ fatal("section_index is outside elf->num_sections!\n");
+
+ return ((elf->sechdrs[section_index].sh_flags & SHF_EXECINSTR) == SHF_EXECINSTR);
+ }
+
+-/*
+- * We rely on a gross hack in section_rel[a]() calling find_extable_entry_size()
+- * to know the sizeof(struct exception_table_entry) for the target architecture.
+- */
+-static unsigned int extable_entry_size = 0;
+-static void find_extable_entry_size(const char* const sec, const Elf_Rela* r)
+-{
+- /*
+- * If we're currently checking the second relocation within __ex_table,
+- * that relocation offset tells us the offsetof(struct
+- * exception_table_entry, fixup) which is equal to sizeof(struct
+- * exception_table_entry) divided by two. We use that to our advantage
+- * since there's no portable way to get that size as every architecture
+- * seems to go with different sized types. Not pretty but better than
+- * hard-coding the size for every architecture..
+- */
+- if (!extable_entry_size)
+- extable_entry_size = r->r_offset * 2;
+-}
+-
+-static inline bool is_extable_fault_address(Elf_Rela *r)
+-{
+- /*
+- * extable_entry_size is only discovered after we've handled the
+- * _second_ relocation in __ex_table, so only abort when we're not
+- * handling the first reloc and extable_entry_size is zero.
+- */
+- if (r->r_offset && extable_entry_size == 0)
+- fatal("extable_entry size hasn't been discovered!\n");
+-
+- return ((r->r_offset == 0) ||
+- (r->r_offset % extable_entry_size == 0));
+-}
+-
+-#define is_second_extable_reloc(Start, Cur, Sec) \
+- (((Cur) == (Start) + 1) && (strcmp("__ex_table", (Sec)) == 0))
+-
+ static void report_extable_warnings(const char* modname, struct elf_info* elf,
+ const struct sectioncheck* const mismatch,
+ Elf_Rela* r, Elf_Sym* sym,
+@@ -1704,22 +1672,9 @@ static void extable_mismatch_handler(const char* modname, struct elf_info *elf,
+ "You might get more information about where this is\n"
+ "coming from by using scripts/check_extable.sh %s\n",
+ fromsec, (long)r->r_offset, tosec, modname);
+- else if (!is_executable_section(elf, get_secindex(elf, sym))) {
+- if (is_extable_fault_address(r))
+- fatal("The relocation at %s+0x%lx references\n"
+- "section \"%s\" which is not executable, IOW\n"
+- "it is not possible for the kernel to fault\n"
+- "at that address. Something is seriously wrong\n"
+- "and should be fixed.\n",
+- fromsec, (long)r->r_offset, tosec);
+- else
+- fatal("The relocation at %s+0x%lx references\n"
+- "section \"%s\" which is not executable, IOW\n"
+- "the kernel will fault if it ever tries to\n"
+- "jump to it. Something is seriously wrong\n"
+- "and should be fixed.\n",
+- fromsec, (long)r->r_offset, tosec);
+- }
++ else if (!is_executable_section(elf, get_secindex(elf, sym)))
++ error("%s+0x%lx references non-executable section '%s'\n",
++ fromsec, (long)r->r_offset, tosec);
+ }
+
+ static void check_section_mismatch(const char *modname, struct elf_info *elf,
+@@ -1780,19 +1735,33 @@ static int addend_386_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
+ #define R_ARM_THM_JUMP19 51
+ #endif
+
++static int32_t sign_extend32(int32_t value, int index)
++{
++ uint8_t shift = 31 - index;
++
++ return (int32_t)(value << shift) >> shift;
++}
++
+ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
+ {
+ unsigned int r_typ = ELF_R_TYPE(r->r_info);
++ Elf_Sym *sym = elf->symtab_start + ELF_R_SYM(r->r_info);
++ void *loc = reloc_location(elf, sechdr, r);
++ uint32_t inst;
++ int32_t offset;
+
+ switch (r_typ) {
+ case R_ARM_ABS32:
+- /* From ARM ABI: (S + A) | T */
+- r->r_addend = (int)(long)
+- (elf->symtab_start + ELF_R_SYM(r->r_info));
++ inst = TO_NATIVE(*(uint32_t *)loc);
++ r->r_addend = inst + sym->st_value;
+ break;
+ case R_ARM_PC24:
+ case R_ARM_CALL:
+ case R_ARM_JUMP24:
++ inst = TO_NATIVE(*(uint32_t *)loc);
++ offset = sign_extend32((inst & 0x00ffffff) << 2, 25);
++ r->r_addend = offset + sym->st_value + 8;
++ break;
+ case R_ARM_THM_CALL:
+ case R_ARM_THM_JUMP24:
+ case R_ARM_THM_JUMP19:
+@@ -1870,8 +1839,6 @@ static void section_rela(const char *modname, struct elf_info *elf,
+ /* Skip special sections */
+ if (is_shndx_special(sym->st_shndx))
+ continue;
+- if (is_second_extable_reloc(start, rela, fromsec))
+- find_extable_entry_size(fromsec, &r);
+ check_section_mismatch(modname, elf, &r, sym, fromsec);
+ }
+ }
+@@ -1930,8 +1897,6 @@ static void section_rel(const char *modname, struct elf_info *elf,
+ /* Skip special sections */
+ if (is_shndx_special(sym->st_shndx))
+ continue;
+- if (is_second_extable_reloc(start, rel, fromsec))
+- find_extable_entry_size(fromsec, &r);
+ check_section_mismatch(modname, elf, &r, sym, fromsec);
+ }
+ }
+@@ -1971,7 +1936,7 @@ static char *remove_dot(char *s)
+
+ if (n && s[n]) {
+ size_t m = strspn(s + n + 1, "0123456789");
+- if (m && (s[n + m] == '.' || s[n + m] == 0))
++ if (m && (s[n + m + 1] == '.' || s[n + m + 1] == 0))
+ s[n] = 0;
+
+ /* strip trailing .lto */
+diff --git a/scripts/module.lds.S b/scripts/module.lds.S
+index 1d0e1e4dc3d2a..3a3aa2354ed86 100644
+--- a/scripts/module.lds.S
++++ b/scripts/module.lds.S
+@@ -27,6 +27,8 @@ SECTIONS {
+ .ctors 0 : ALIGN(8) { *(SORT(.ctors.*)) *(.ctors) }
+ .init_array 0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) }
+
++ .altinstructions 0 : ALIGN(8) { KEEP(*(.altinstructions)) }
++ __bug_table 0 : ALIGN(8) { KEEP(*(__bug_table)) }
+ __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) }
+
+ __patchable_function_entries : { *(__patchable_function_entries) }
+diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
+index 60a2a63a5e900..32d528a367868 100755
+--- a/scripts/package/mkdebian
++++ b/scripts/package/mkdebian
+@@ -236,7 +236,7 @@ binary-arch: build-arch
+ KBUILD_BUILD_VERSION=${revision} -f \$(srctree)/Makefile intdeb-pkg
+
+ clean:
+- rm -rf debian/*tmp debian/files
++ rm -rf debian/files debian/linux-*
+ \$(MAKE) clean
+
+ binary: binary-arch
+diff --git a/scripts/package/mkspec b/scripts/package/mkspec
+index 7c477ca7dc982..951cc60e5a903 100755
+--- a/scripts/package/mkspec
++++ b/scripts/package/mkspec
+@@ -85,10 +85,10 @@ $S
+ mkdir -p %{buildroot}/boot
+ %ifarch ia64
+ mkdir -p %{buildroot}/boot/efi
+- cp \$($MAKE image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE
++ cp \$($MAKE -s image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE
+ ln -s efi/vmlinuz-$KERNELRELEASE %{buildroot}/boot/
+ %else
+- cp \$($MAKE image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE
++ cp \$($MAKE -s image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE
+ %endif
+ $M $MAKE %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} modules_install
+ $MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
+diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh
+new file mode 100755
+index 0000000000000..d38fa6d84d62a
+--- /dev/null
++++ b/scripts/pahole-flags.sh
+@@ -0,0 +1,24 @@
++#!/bin/sh
++# SPDX-License-Identifier: GPL-2.0
++
++extra_paholeopt=
++
++if ! [ -x "$(command -v ${PAHOLE})" ]; then
++ exit 0
++fi
++
++pahole_ver=$($(dirname $0)/pahole-version.sh ${PAHOLE})
++
++if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then
++ # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars
++ extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars"
++fi
++if [ "${pahole_ver}" -ge "121" ]; then
++ extra_paholeopt="${extra_paholeopt} --btf_gen_floats"
++fi
++
++if [ "${pahole_ver}" -ge "124" ]; then
++ extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_enum64"
++fi
++
++echo ${extra_paholeopt}
+diff --git a/scripts/pahole-version.sh b/scripts/pahole-version.sh
+new file mode 100755
+index 0000000000000..f8a32ab93ad12
+--- /dev/null
++++ b/scripts/pahole-version.sh
+@@ -0,0 +1,13 @@
++#!/bin/sh
++# SPDX-License-Identifier: GPL-2.0
++#
++# Usage: $ ./pahole-version.sh pahole
++#
++# Prints pahole's version in a 3-digit form, such as 119 for v1.19.
++
++if [ ! -x "$(command -v "$@")" ]; then
++ echo 0
++ exit 1
++fi
++
++"$@" --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'
+diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
+index cce12e1971d85..ec692af8ce9eb 100644
+--- a/scripts/recordmcount.c
++++ b/scripts/recordmcount.c
+@@ -102,6 +102,7 @@ static ssize_t uwrite(void const *const buf, size_t const count)
+ {
+ size_t cnt = count;
+ off_t idx = 0;
++ void *p = NULL;
+
+ file_updated = 1;
+
+@@ -109,7 +110,10 @@ static ssize_t uwrite(void const *const buf, size_t const count)
+ off_t aoffset = (file_ptr + count) - file_end;
+
+ if (aoffset > file_append_size) {
+- file_append = realloc(file_append, aoffset);
++ p = realloc(file_append, aoffset);
++ if (!p)
++ free(file_append);
++ file_append = p;
+ file_append_size = aoffset;
+ }
+ if (!file_append) {
+diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
+index 7d631aaa0ae11..3ccb2c70add4d 100755
+--- a/scripts/recordmcount.pl
++++ b/scripts/recordmcount.pl
+@@ -219,7 +219,7 @@ if ($arch eq "x86_64") {
+
+ } elsif ($arch eq "s390" && $bits == 64) {
+ if ($cc =~ /-DCC_USING_HOTPATCH/) {
+- $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$";
++ $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$";
+ $mcount_adjust = 0;
+ }
+ $alignment = 8;
+diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh
+index 2dccf141241d7..20af56ce245c5 100755
+--- a/scripts/selinux/install_policy.sh
++++ b/scripts/selinux/install_policy.sh
+@@ -78,7 +78,7 @@ cd /etc/selinux/dummy/contexts/files
+ $SF -F file_contexts /
+
+ mounts=`cat /proc/$$/mounts | \
+- egrep "ext[234]|jfs|xfs|reiserfs|jffs2|gfs2|btrfs|f2fs|ocfs2" | \
++ grep -E "ext[234]|jfs|xfs|reiserfs|jffs2|gfs2|btrfs|f2fs|ocfs2" | \
+ awk '{ print $2 '}`
+ $SF -F file_contexts $mounts
+
+diff --git a/scripts/sign-file.c b/scripts/sign-file.c
+index fbd34b8e8f578..7434e9ea926e2 100644
+--- a/scripts/sign-file.c
++++ b/scripts/sign-file.c
+@@ -29,6 +29,13 @@
+ #include <openssl/err.h>
+ #include <openssl/engine.h>
+
++/*
++ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
++ *
++ * Remove this if/when that API is no longer used
++ */
++#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
++
+ /*
+ * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to
+ * assume that it's not available and its header file is missing and that we
+diff --git a/scripts/sorttable.c b/scripts/sorttable.c
+index 6ee4fa882919c..278bb53b325c1 100644
+--- a/scripts/sorttable.c
++++ b/scripts/sorttable.c
+@@ -240,7 +240,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size)
+
+ w(r(loc) + i, loc);
+ w(r(loc + 1) + i + 4, loc + 1);
+- w(r(loc + 2) + i + 8, loc + 2);
++ /* Don't touch the fixup type */
+
+ i += sizeof(uint32_t) * 3;
+ }
+@@ -253,7 +253,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size)
+
+ w(r(loc) - i, loc);
+ w(r(loc + 1) - (i + 4), loc + 1);
+- w(r(loc + 2) - (i + 8), loc + 2);
++ /* Don't touch the fixup type */
+
+ i += sizeof(uint32_t) * 3;
+ }
+diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
+index 288e86a9d1e58..f126ecbb0494d 100755
+--- a/scripts/sphinx-pre-install
++++ b/scripts/sphinx-pre-install
+@@ -78,6 +78,7 @@ my %texlive = (
+ 'ucs.sty' => 'texlive-ucs',
+ 'upquote.sty' => 'texlive-upquote',
+ 'wrapfig.sty' => 'texlive-wrapfig',
++ 'ctexhook.sty' => 'texlive-ctex',
+ );
+
+ #
+@@ -369,6 +370,9 @@ sub give_debian_hints()
+ );
+
+ if ($pdf) {
++ check_missing_file(["/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty"],
++ "texlive-lang-chinese", 2);
++
+ check_missing_file(["/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"],
+ "fonts-dejavu", 2);
+
+diff --git a/scripts/tags.sh b/scripts/tags.sh
+index db8ba411860a5..d40cae4e7ea28 100755
+--- a/scripts/tags.sh
++++ b/scripts/tags.sh
+@@ -32,6 +32,13 @@ else
+ ignore="$ignore ( -path ${tree}tools ) -prune -o"
+ fi
+
++# gtags(1) refuses to index any file outside of its current working dir.
++# If gtags indexing is requested and the build output directory is not
++# the kernel source tree, index all files in absolute-path form.
++if [[ "$1" == "gtags" && -n "${tree}" ]]; then
++ tree=$(realpath "$tree")/
++fi
++
+ # Detect if ALLSOURCE_ARCHS is set. If not, we assume SRCARCH
+ if [ "${ALLSOURCE_ARCHS}" = "" ]; then
+ ALLSOURCE_ARCHS=${SRCARCH}
+@@ -95,10 +102,13 @@ all_sources()
+
+ all_compiled_sources()
+ {
+- realpath -es $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) \
+- include/generated/autoconf.h $(find $ignore -name "*.cmd" -exec \
+- grep -Poh '(?(?=^source_.* \K).*|(?=^ \K\S).*(?= \\))' {} \+ |
+- awk '!a[$0]++') | sort -u
++ {
++ echo include/generated/autoconf.h
++ find $ignore -name "*.cmd" -exec \
++ sed -n -E 's/^source_.* (.*)/\1/p; s/^ (\S.*) \\/\1/p' {} \+ |
++ awk '!a[$0]++'
++ } | xargs realpath -es $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) |
++ sort -u
+ }
+
+ all_target_sources()
+@@ -128,7 +138,7 @@ docscope()
+
+ dogtags()
+ {
+- all_target_sources | gtags -i -f -
++ all_target_sources | gtags -i -C "${tree:-.}" -f - "$PWD"
+ }
+
+ # Basic regular expressions with an optional /kind-spec/ for ctags and
+diff --git a/scripts/tracing/ftrace-bisect.sh b/scripts/tracing/ftrace-bisect.sh
+index 926701162bc83..bb4f59262bbe9 100755
+--- a/scripts/tracing/ftrace-bisect.sh
++++ b/scripts/tracing/ftrace-bisect.sh
+@@ -12,7 +12,7 @@
+ # (note, if this is a problem with function_graph tracing, then simply
+ # replace "function" with "function_graph" in the following steps).
+ #
+-# # cd /sys/kernel/debug/tracing
++# # cd /sys/kernel/tracing
+ # # echo schedule > set_ftrace_filter
+ # # echo function > current_tracer
+ #
+@@ -20,22 +20,40 @@
+ #
+ # # echo nop > current_tracer
+ #
+-# # cat available_filter_functions > ~/full-file
++# Starting with v5.1 this can be done with numbers, making it much faster:
++#
++# The old (slow) way, for kernels before v5.1.
++#
++# [old-way] # cat available_filter_functions > ~/full-file
++#
++# [old-way] *** Note *** this process will take several minutes to update the
++# [old-way] filters. Setting multiple functions is an O(n^2) operation, and we
++# [old-way] are dealing with thousands of functions. So go have coffee, talk
++# [old-way] with your coworkers, read facebook. And eventually, this operation
++# [old-way] will end.
++#
++# The new way (using numbers) is an O(n) operation, and usually takes less than a second.
++#
++# seq `wc -l available_filter_functions | cut -d' ' -f1` > ~/full-file
++#
++# This will create a sequence of numbers that match the functions in
++# available_filter_functions, and when echoing in a number into the
++# set_ftrace_filter file, it will enable the corresponding function in
++# O(1) time. Making enabling all functions O(n) where n is the number of
++# functions to enable.
++#
++# For either the new or old way, the rest of the operations remain the same.
++#
+ # # ftrace-bisect ~/full-file ~/test-file ~/non-test-file
+ # # cat ~/test-file > set_ftrace_filter
+ #
+-# *** Note *** this will take several minutes. Setting multiple functions is
+-# an O(n^2) operation, and we are dealing with thousands of functions. So go
+-# have coffee, talk with your coworkers, read facebook. And eventually, this
+-# operation will end.
+-#
+ # # echo function > current_tracer
+ #
+ # If it crashes, we know that ~/test-file has a bad function.
+ #
+ # Reboot back to test kernel.
+ #
+-# # cd /sys/kernel/debug/tracing
++# # cd /sys/kernel/tracing
+ # # mv ~/test-file ~/full-file
+ #
+ # If it didn't crash.
+diff --git a/security/Kconfig b/security/Kconfig
+index 0ced7fd33e4d0..5d412b3ddc496 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -54,17 +54,6 @@ config SECURITY_NETWORK
+ implement socket and networking access controls.
+ If you are unsure how to answer this question, answer N.
+
+-config PAGE_TABLE_ISOLATION
+- bool "Remove the kernel mapping in user mode"
+- default y
+- depends on (X86_64 || X86_PAE) && !UML
+- help
+- This feature reduces the number of hardware side channels by
+- ensuring that the majority of kernel addresses are not mapped
+- into userspace.
+-
+- See Documentation/x86/pti.rst for more details.
+-
+ config SECURITY_INFINIBAND
+ bool "Infiniband Security Hooks"
+ depends on SECURITY && INFINIBAND
+@@ -191,6 +180,9 @@ config HARDENED_USERCOPY_PAGESPAN
+ config FORTIFY_SOURCE
+ bool "Harden common str/mem functions against buffer overflows"
+ depends on ARCH_HAS_FORTIFY_SOURCE
++ # https://bugs.llvm.org/show_bug.cgi?id=50322
++ # https://bugs.llvm.org/show_bug.cgi?id=41459
++ depends on !CC_IS_CLANG
+ help
+ Detect overflows of buffers in common string and memory functions
+ where the compiler can determine and validate the buffer sizes.
+diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
+index 90cbaff86e13a..2e509e32cf75a 100644
+--- a/security/Kconfig.hardening
++++ b/security/Kconfig.hardening
+@@ -22,14 +22,23 @@ menu "Memory initialization"
+ config CC_HAS_AUTO_VAR_INIT_PATTERN
+ def_bool $(cc-option,-ftrivial-auto-var-init=pattern)
+
+-config CC_HAS_AUTO_VAR_INIT_ZERO
++config CC_HAS_AUTO_VAR_INIT_ZERO_BARE
++ def_bool $(cc-option,-ftrivial-auto-var-init=zero)
++
++config CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER
++ # Clang 16 and later warn about using the -enable flag, but it
++ # is required before then.
+ def_bool $(cc-option,-ftrivial-auto-var-init=zero -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang)
++ depends on !CC_HAS_AUTO_VAR_INIT_ZERO_BARE
++
++config CC_HAS_AUTO_VAR_INIT_ZERO
++ def_bool CC_HAS_AUTO_VAR_INIT_ZERO_BARE || CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER
+
+ choice
+ prompt "Initialize kernel stack variables at function entry"
+ default GCC_PLUGIN_STRUCTLEAK_BYREF_ALL if COMPILE_TEST && GCC_PLUGINS
+ default INIT_STACK_ALL_PATTERN if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT_PATTERN
+- default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_PATTERN
++ default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_ZERO
+ default INIT_STACK_NONE
+ help
+ This option enables initialization of stack variables at
+@@ -231,6 +240,9 @@ config INIT_ON_FREE_DEFAULT_ON
+
+ config CC_HAS_ZERO_CALL_USED_REGS
+ def_bool $(cc-option,-fzero-call-used-regs=used-gpr)
++ # https://github.com/ClangBuiltLinux/linux/issues/1766
++ # https://github.com/llvm/llvm-project/issues/59242
++ depends on !CC_IS_CLANG || CLANG_VERSION > 150006
+
+ config ZERO_CALL_USED_REGS
+ bool "Enable register zeroing on function exit"
+diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
+index 2ee3b3d29f10b..8c7719108d7f7 100644
+--- a/security/apparmor/apparmorfs.c
++++ b/security/apparmor/apparmorfs.c
+@@ -401,7 +401,7 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf,
+
+ data->size = copy_size;
+ if (copy_from_user(data->data, userbuf, copy_size)) {
+- kvfree(data);
++ aa_put_loaddata(data);
+ return ERR_PTR(-EFAULT);
+ }
+
+@@ -867,8 +867,10 @@ static struct multi_transaction *multi_transaction_new(struct file *file,
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+ kref_init(&t->count);
+- if (copy_from_user(t->data, buf, size))
++ if (copy_from_user(t->data, buf, size)) {
++ put_multi_transaction(t);
+ return ERR_PTR(-EFAULT);
++ }
+
+ return t;
+ }
+diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
+index f7e97c7e80f3d..704b0c895605a 100644
+--- a/security/apparmor/audit.c
++++ b/security/apparmor/audit.c
+@@ -137,7 +137,7 @@ int aa_audit(int type, struct aa_profile *profile, struct common_audit_data *sa,
+ }
+ if (AUDIT_MODE(profile) == AUDIT_QUIET ||
+ (type == AUDIT_APPARMOR_DENIED &&
+- AUDIT_MODE(profile) == AUDIT_QUIET))
++ AUDIT_MODE(profile) == AUDIT_QUIET_DENIED))
+ return aad(sa)->error;
+
+ if (KILL_MODE(profile) && type == AUDIT_APPARMOR_DENIED)
+diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
+index 583680f6cd811..a7b3d8e58ed83 100644
+--- a/security/apparmor/domain.c
++++ b/security/apparmor/domain.c
+@@ -467,7 +467,7 @@ restart:
+ * xattrs, or a longer match
+ */
+ candidate = profile;
+- candidate_len = profile->xmatch_len;
++ candidate_len = max(count, profile->xmatch_len);
+ candidate_xattrs = ret;
+ conflict = false;
+ }
+diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h
+index 7d27db740bc2f..ac5054899f6f4 100644
+--- a/security/apparmor/include/lib.h
++++ b/security/apparmor/include/lib.h
+@@ -22,6 +22,11 @@
+ */
+
+ #define DEBUG_ON (aa_g_debug)
++/*
++ * split individual debug cases out in preparation for finer grained
++ * debug controls in the future.
++ */
++#define AA_DEBUG_LABEL DEBUG_ON
+ #define dbg_printk(__fmt, __args...) pr_debug(__fmt, ##__args)
+ #define AA_DEBUG(fmt, args...) \
+ do { \
+diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
+index b5b4b8190e654..b5aa4231af682 100644
+--- a/security/apparmor/include/policy.h
++++ b/security/apparmor/include/policy.h
+@@ -135,7 +135,7 @@ struct aa_profile {
+
+ const char *attach;
+ struct aa_dfa *xmatch;
+- int xmatch_len;
++ unsigned int xmatch_len;
+ enum audit_mode audit;
+ long mode;
+ u32 path_flags;
+diff --git a/security/apparmor/label.c b/security/apparmor/label.c
+index e68bcedca976b..66bc4704f8044 100644
+--- a/security/apparmor/label.c
++++ b/security/apparmor/label.c
+@@ -1454,7 +1454,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp)
+ if (label->hname || labels_ns(label) != ns)
+ return res;
+
+- if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) == -1)
++ if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) < 0)
+ return res;
+
+ ls = labels_set(label);
+@@ -1632,9 +1632,9 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns,
+ AA_BUG(!str && size != 0);
+ AA_BUG(!label);
+
+- if (flags & FLAG_ABS_ROOT) {
++ if (AA_DEBUG_LABEL && (flags & FLAG_ABS_ROOT)) {
+ ns = root_ns;
+- len = snprintf(str, size, "=");
++ len = snprintf(str, size, "_");
+ update_for_len(total, len, size, str);
+ } else if (!ns) {
+ ns = labels_ns(label);
+@@ -1704,7 +1704,7 @@ int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label,
+
+ /**
+ * aa_label_acntsxprint - allocate a __counted string buffer and print label
+- * @strp: buffer to write to. (MAY BE NULL if @size == 0)
++ * @strp: buffer to write to.
+ * @ns: namespace profile is being viewed from
+ * @label: label to view (NOT NULL)
+ * @flags: flags controlling what label info is printed
+@@ -1745,7 +1745,7 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns,
+ if (!use_label_hname(ns, label, flags) ||
+ display_mode(ns, label, flags)) {
+ len = aa_label_asxprint(&name, ns, label, flags, gfp);
+- if (len == -1) {
++ if (len < 0) {
+ AA_DEBUG("label print error");
+ return;
+ }
+@@ -1773,7 +1773,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns,
+ int len;
+
+ len = aa_label_asxprint(&str, ns, label, flags, gfp);
+- if (len == -1) {
++ if (len < 0) {
+ AA_DEBUG("label print error");
+ return;
+ }
+@@ -1796,7 +1796,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags,
+ int len;
+
+ len = aa_label_asxprint(&str, ns, label, flags, gfp);
+- if (len == -1) {
++ if (len < 0) {
+ AA_DEBUG("label print error");
+ return;
+ }
+@@ -1896,7 +1896,8 @@ struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str,
+ AA_BUG(!str);
+
+ str = skipn_spaces(str, n);
+- if (str == NULL || (*str == '=' && base != &root_ns->unconfined->label))
++ if (str == NULL || (AA_DEBUG_LABEL && *str == '_' &&
++ base != &root_ns->unconfined->label))
+ return ERR_PTR(-EINVAL);
+
+ len = label_count_strn_entries(str, end - str);
+diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
+index f72406fe1bf27..10274eb90fa37 100644
+--- a/security/apparmor/lsm.c
++++ b/security/apparmor/lsm.c
+@@ -1170,10 +1170,10 @@ static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb
+ #endif
+
+ /*
+- * The cred blob is a pointer to, not an instance of, an aa_task_ctx.
++ * The cred blob is a pointer to, not an instance of, an aa_label.
+ */
+ struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = {
+- .lbs_cred = sizeof(struct aa_task_ctx *),
++ .lbs_cred = sizeof(struct aa_label *),
+ .lbs_file = sizeof(struct aa_file_ctx),
+ .lbs_task = sizeof(struct aa_task_ctx),
+ };
+diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
+index aa6fcfde30514..f7bb47daf2ad6 100644
+--- a/security/apparmor/mount.c
++++ b/security/apparmor/mount.c
+@@ -229,7 +229,8 @@ static const char * const mnt_info_table[] = {
+ "failed srcname match",
+ "failed type match",
+ "failed flags match",
+- "failed data match"
++ "failed data match",
++ "failed perms check"
+ };
+
+ /*
+@@ -284,8 +285,8 @@ static int do_match_mnt(struct aa_dfa *dfa, unsigned int start,
+ return 0;
+ }
+
+- /* failed at end of flags match */
+- return 4;
++ /* failed at perms check, don't confuse with flags match */
++ return 6;
+ }
+
+
+@@ -718,6 +719,7 @@ int aa_pivotroot(struct aa_label *label, const struct path *old_path,
+ aa_put_label(target);
+ goto out;
+ }
++ aa_put_label(target);
+ } else
+ /* already audited error */
+ error = PTR_ERR(target);
+diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
+index 4c010c9a6af1d..fcf22577f606c 100644
+--- a/security/apparmor/policy.c
++++ b/security/apparmor/policy.c
+@@ -1125,7 +1125,7 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj,
+
+ if (!name) {
+ /* remove namespace - can only happen if fqname[0] == ':' */
+- mutex_lock_nested(&ns->parent->lock, ns->level);
++ mutex_lock_nested(&ns->parent->lock, ns->parent->level);
+ __aa_bump_ns_revision(ns);
+ __aa_remove_ns(ns);
+ mutex_unlock(&ns->parent->lock);
+diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
+index 70921d95fb406..53d24cf638936 100644
+--- a/security/apparmor/policy_ns.c
++++ b/security/apparmor/policy_ns.c
+@@ -121,7 +121,7 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name)
+ return ns;
+
+ fail_unconfined:
+- kfree_sensitive(ns->base.hname);
++ aa_policy_destroy(&ns->base);
+ fail_ns:
+ kfree_sensitive(ns);
+ return NULL;
+diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
+index 4e1f96b216a8b..5f758b289ace3 100644
+--- a/security/apparmor/policy_unpack.c
++++ b/security/apparmor/policy_unpack.c
+@@ -746,16 +746,18 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
+ profile->label.flags |= FLAG_HAT;
+ if (!unpack_u32(e, &tmp, NULL))
+ goto fail;
+- if (tmp == PACKED_MODE_COMPLAIN || (e->version & FORCE_COMPLAIN_FLAG))
++ if (tmp == PACKED_MODE_COMPLAIN || (e->version & FORCE_COMPLAIN_FLAG)) {
+ profile->mode = APPARMOR_COMPLAIN;
+- else if (tmp == PACKED_MODE_ENFORCE)
++ } else if (tmp == PACKED_MODE_ENFORCE) {
+ profile->mode = APPARMOR_ENFORCE;
+- else if (tmp == PACKED_MODE_KILL)
++ } else if (tmp == PACKED_MODE_KILL) {
+ profile->mode = APPARMOR_KILL;
+- else if (tmp == PACKED_MODE_UNCONFINED)
++ } else if (tmp == PACKED_MODE_UNCONFINED) {
+ profile->mode = APPARMOR_UNCONFINED;
+- else
++ profile->label.flags |= FLAG_UNCONFINED;
++ } else {
+ goto fail;
++ }
+ if (!unpack_u32(e, &tmp, NULL))
+ goto fail;
+ if (tmp)
+@@ -907,8 +909,13 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
+ goto fail;
+ }
+
+- rhashtable_insert_fast(profile->data, &data->head,
+- profile->data->p);
++ if (rhashtable_insert_fast(profile->data, &data->head,
++ profile->data->p)) {
++ kfree_sensitive(data->key);
++ kfree_sensitive(data);
++ info = "failed to insert data to table";
++ goto fail;
++ }
+ }
+
+ if (!unpack_nameX(e, AA_STRUCTEND, NULL)) {
+@@ -962,7 +969,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns)
+ * if not specified use previous version
+ * Mask off everything that is not kernel abi version
+ */
+- if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) {
++ if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v8)) {
+ audit_iface(NULL, NULL, NULL, "unsupported interface version",
+ e, error);
+ return error;
+diff --git a/security/commoncap.c b/security/commoncap.c
+index 3f810d37b71bd..bc751fa5adad7 100644
+--- a/security/commoncap.c
++++ b/security/commoncap.c
+@@ -24,6 +24,7 @@
+ #include <linux/user_namespace.h>
+ #include <linux/binfmts.h>
+ #include <linux/personality.h>
++#include <linux/mnt_idmapping.h>
+
+ /*
+ * If a non-root user executes a setuid-root binary in
+@@ -400,8 +401,10 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns,
+ &tmpbuf, size, GFP_NOFS);
+ dput(dentry);
+
+- if (ret < 0 || !tmpbuf)
+- return ret;
++ if (ret < 0 || !tmpbuf) {
++ size = ret;
++ goto out_free;
++ }
+
+ fs_ns = inode->i_sb->s_user_ns;
+ cap = (struct vfs_cap_data *) tmpbuf;
+@@ -418,7 +421,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns,
+ kroot = make_kuid(fs_ns, root);
+
+ /* If this is an idmapped mount shift the kuid. */
+- kroot = kuid_into_mnt(mnt_userns, kroot);
++ kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot);
+
+ /* If the root kuid maps to a valid uid in current ns, then return
+ * this as a nscap. */
+@@ -488,6 +491,7 @@ out_free:
+ * @size: size of @ivalue
+ * @task_ns: user namespace of the caller
+ * @mnt_userns: user namespace of the mount the inode was found from
++ * @fs_userns: user namespace of the filesystem
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+@@ -497,7 +501,8 @@ out_free:
+ */
+ static kuid_t rootid_from_xattr(const void *value, size_t size,
+ struct user_namespace *task_ns,
+- struct user_namespace *mnt_userns)
++ struct user_namespace *mnt_userns,
++ struct user_namespace *fs_userns)
+ {
+ const struct vfs_ns_cap_data *nscap = value;
+ kuid_t rootkid;
+@@ -507,7 +512,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size,
+ rootid = le32_to_cpu(nscap->rootid);
+
+ rootkid = make_kuid(task_ns, rootid);
+- return kuid_from_mnt(mnt_userns, rootkid);
++ return mapped_kuid_user(mnt_userns, fs_userns, rootkid);
+ }
+
+ static bool validheader(size_t size, const struct vfs_cap_data *cap)
+@@ -553,12 +558,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
+ return -EINVAL;
+ if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
+ return -EPERM;
+- if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns))
++ if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
+ if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
+ /* user is privileged, just write the v2 */
+ return size;
+
+- rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns);
++ rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns);
+ if (!uid_valid(rootid))
+ return -EINVAL;
+
+@@ -699,7 +704,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
+ /* Limit the caps to the mounter of the filesystem
+ * or the more limited uid specified in the xattr.
+ */
+- rootkuid = kuid_into_mnt(mnt_userns, rootkuid);
++ rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid);
+ if (!rootid_owns_currentns(rootkuid))
+ return -ENODATA;
+
+diff --git a/security/device_cgroup.c b/security/device_cgroup.c
+index 04375df52fc9a..fe5cb7696993d 100644
+--- a/security/device_cgroup.c
++++ b/security/device_cgroup.c
+@@ -81,6 +81,17 @@ free_and_exit:
+ return -ENOMEM;
+ }
+
++static void dev_exceptions_move(struct list_head *dest, struct list_head *orig)
++{
++ struct dev_exception_item *ex, *tmp;
++
++ lockdep_assert_held(&devcgroup_mutex);
++
++ list_for_each_entry_safe(ex, tmp, orig, list) {
++ list_move_tail(&ex->list, dest);
++ }
++}
++
+ /*
+ * called under devcgroup_mutex
+ */
+@@ -603,11 +614,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+ int count, rc = 0;
+ struct dev_exception_item ex;
+ struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
++ struct dev_cgroup tmp_devcgrp;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ memset(&ex, 0, sizeof(ex));
++ memset(&tmp_devcgrp, 0, sizeof(tmp_devcgrp));
+ b = buffer;
+
+ switch (*b) {
+@@ -619,15 +632,27 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+
+ if (!may_allow_all(parent))
+ return -EPERM;
+- dev_exception_clean(devcgroup);
+- devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
+- if (!parent)
++ if (!parent) {
++ devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
++ dev_exception_clean(devcgroup);
+ break;
++ }
+
++ INIT_LIST_HEAD(&tmp_devcgrp.exceptions);
++ rc = dev_exceptions_copy(&tmp_devcgrp.exceptions,
++ &devcgroup->exceptions);
++ if (rc)
++ return rc;
++ dev_exception_clean(devcgroup);
+ rc = dev_exceptions_copy(&devcgroup->exceptions,
+ &parent->exceptions);
+- if (rc)
++ if (rc) {
++ dev_exceptions_move(&devcgroup->exceptions,
++ &tmp_devcgrp.exceptions);
+ return rc;
++ }
++ devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
++ dev_exception_clean(&tmp_devcgrp);
+ break;
+ case DEVCG_DENY:
+ if (css_has_online_children(&devcgroup->css))
+diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
+index 3b06a01bd0fdd..aa93b750a9f32 100644
+--- a/security/integrity/digsig.c
++++ b/security/integrity/digsig.c
+@@ -122,6 +122,7 @@ int __init integrity_init_keyring(const unsigned int id)
+ {
+ struct key_restriction *restriction;
+ key_perm_t perm;
++ int ret;
+
+ perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW
+ | KEY_USR_READ | KEY_USR_SEARCH;
+@@ -142,7 +143,10 @@ int __init integrity_init_keyring(const unsigned int id)
+ perm |= KEY_USR_WRITE;
+
+ out:
+- return __integrity_init_keyring(id, perm, restriction);
++ ret = __integrity_init_keyring(id, perm, restriction);
++ if (ret)
++ kfree(restriction);
++ return ret;
+ }
+
+ static int __init integrity_add_key(const unsigned int id, const void *data,
+diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
+index 23240d793b074..895f4b9ce8c6b 100644
+--- a/security/integrity/digsig_asymmetric.c
++++ b/security/integrity/digsig_asymmetric.c
+@@ -109,22 +109,25 @@ int asymmetric_verify(struct key *keyring, const char *sig,
+
+ pk = asymmetric_key_public_key(key);
+ pks.pkey_algo = pk->pkey_algo;
+- if (!strcmp(pk->pkey_algo, "rsa"))
++ if (!strcmp(pk->pkey_algo, "rsa")) {
+ pks.encoding = "pkcs1";
+- else if (!strncmp(pk->pkey_algo, "ecdsa-", 6))
++ } else if (!strncmp(pk->pkey_algo, "ecdsa-", 6)) {
+ /* edcsa-nist-p192 etc. */
+ pks.encoding = "x962";
+- else if (!strcmp(pk->pkey_algo, "ecrdsa") ||
+- !strcmp(pk->pkey_algo, "sm2"))
++ } else if (!strcmp(pk->pkey_algo, "ecrdsa") ||
++ !strcmp(pk->pkey_algo, "sm2")) {
+ pks.encoding = "raw";
+- else
+- return -ENOPKG;
++ } else {
++ ret = -ENOPKG;
++ goto out;
++ }
+
+ pks.digest = (u8 *)data;
+ pks.digest_size = datalen;
+ pks.s = hdr->sig;
+ pks.s_size = siglen;
+ ret = verify_signature(key, &pks);
++out:
+ key_put(key);
+ pr_debug("%s() = %d\n", __func__, ret);
+ return ret;
+diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
+index 0450d79afdc8f..99c7452555c3c 100644
+--- a/security/integrity/evm/evm_crypto.c
++++ b/security/integrity/evm/evm_crypto.c
+@@ -40,7 +40,7 @@ static const char evm_hmac[] = "hmac(sha1)";
+ /**
+ * evm_set_key() - set EVM HMAC key from the kernel
+ * @key: pointer to a buffer with the key data
+- * @size: length of the key data
++ * @keylen: length of the key data
+ *
+ * This function allows setting the EVM HMAC key from the kernel
+ * without using the "encrypted" key subsystem keys. It can be used
+@@ -75,7 +75,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo)
+ {
+ long rc;
+ const char *algo;
+- struct crypto_shash **tfm, *tmp_tfm = NULL;
++ struct crypto_shash **tfm, *tmp_tfm;
+ struct shash_desc *desc;
+
+ if (type == EVM_XATTR_HMAC) {
+@@ -120,16 +120,13 @@ unlock:
+ alloc:
+ desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm),
+ GFP_KERNEL);
+- if (!desc) {
+- crypto_free_shash(tmp_tfm);
++ if (!desc)
+ return ERR_PTR(-ENOMEM);
+- }
+
+ desc->tfm = *tfm;
+
+ rc = crypto_shash_init(desc);
+ if (rc) {
+- crypto_free_shash(tmp_tfm);
+ kfree(desc);
+ return ERR_PTR(rc);
+ }
+diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
+index 1c8435dfabeea..b0e791e184329 100644
+--- a/security/integrity/evm/evm_main.c
++++ b/security/integrity/evm/evm_main.c
+@@ -78,7 +78,7 @@ static struct xattr_list evm_config_default_xattrnames[] = {
+
+ LIST_HEAD(evm_config_xattrnames);
+
+-static int evm_fixmode;
++static int evm_fixmode __ro_after_init;
+ static int __init evm_set_fixmode(char *str)
+ {
+ if (strncmp(str, "fix", 3) == 0)
+@@ -86,7 +86,7 @@ static int __init evm_set_fixmode(char *str)
+ else
+ pr_err("invalid \"%s\" mode", str);
+
+- return 0;
++ return 1;
+ }
+ __setup("evm=", evm_set_fixmode);
+
+@@ -324,7 +324,6 @@ int evm_protected_xattr_if_enabled(const char *req_xattr_name)
+ /**
+ * evm_read_protected_xattrs - read EVM protected xattr names, lengths, values
+ * @dentry: dentry of the read xattrs
+- * @inode: inode of the read xattrs
+ * @buffer: buffer xattr names, lengths or values are copied to
+ * @buffer_size: size of buffer
+ * @type: n: names, l: lengths, v: values
+@@ -396,6 +395,7 @@ int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
+ * @xattr_name: requested xattr
+ * @xattr_value: requested xattr value
+ * @xattr_value_len: requested xattr value length
++ * @iint: inode integrity metadata
+ *
+ * Calculate the HMAC for the given dentry and verify it against the stored
+ * security.evm xattr. For performance, use the xattr value and length
+@@ -770,7 +770,9 @@ static int evm_attr_change(struct dentry *dentry, struct iattr *attr)
+
+ /**
+ * evm_inode_setattr - prevent updating an invalid EVM extended attribute
++ * @idmap: idmap of the mount
+ * @dentry: pointer to the affected dentry
++ * @attr: iattr structure containing the new file attributes
+ *
+ * Permit update of file attributes when files have a valid EVM signature,
+ * except in the case of them having an immutable portable signature.
+diff --git a/security/integrity/iint.c b/security/integrity/iint.c
+index 8638976f7990b..65418e0906c13 100644
+--- a/security/integrity/iint.c
++++ b/security/integrity/iint.c
+@@ -43,12 +43,10 @@ static struct integrity_iint_cache *__integrity_iint_find(struct inode *inode)
+ else if (inode > iint->inode)
+ n = n->rb_right;
+ else
+- break;
++ return iint;
+ }
+- if (!n)
+- return NULL;
+
+- return iint;
++ return NULL;
+ }
+
+ /*
+@@ -121,10 +119,15 @@ struct integrity_iint_cache *integrity_inode_get(struct inode *inode)
+ parent = *p;
+ test_iint = rb_entry(parent, struct integrity_iint_cache,
+ rb_node);
+- if (inode < test_iint->inode)
++ if (inode < test_iint->inode) {
+ p = &(*p)->rb_left;
+- else
++ } else if (inode > test_iint->inode) {
+ p = &(*p)->rb_right;
++ } else {
++ write_unlock(&integrity_iint_lock);
++ kmem_cache_free(iint_cache, iint);
++ return test_iint;
++ }
+ }
+
+ iint->inode = inode;
+diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
+index f3a9cc201c8c2..7bc416c172119 100644
+--- a/security/integrity/ima/Kconfig
++++ b/security/integrity/ima/Kconfig
+@@ -8,7 +8,7 @@ config IMA
+ select CRYPTO_HMAC
+ select CRYPTO_SHA1
+ select CRYPTO_HASH_INFO
+- select TCG_TPM if HAS_IOMEM && !UML
++ select TCG_TPM if HAS_IOMEM
+ select TCG_TIS if TCG_TPM && X86
+ select TCG_CRB if TCG_TPM && ACPI
+ select TCG_IBMVTPM if TCG_TPM && PPC_PSERIES
+@@ -69,10 +69,9 @@ choice
+ hash, defined as 20 bytes, and a null terminated pathname,
+ limited to 255 characters. The 'ima-ng' measurement list
+ template permits both larger hash digests and longer
+- pathnames.
++ pathnames. The configured default template can be replaced
++ by specifying "ima_template=" on the boot command line.
+
+- config IMA_TEMPLATE
+- bool "ima"
+ config IMA_NG_TEMPLATE
+ bool "ima-ng (default)"
+ config IMA_SIG_TEMPLATE
+@@ -82,7 +81,6 @@ endchoice
+ config IMA_DEFAULT_TEMPLATE
+ string
+ depends on IMA
+- default "ima" if IMA_TEMPLATE
+ default "ima-ng" if IMA_NG_TEMPLATE
+ default "ima-sig" if IMA_SIG_TEMPLATE
+
+@@ -102,19 +100,19 @@ choice
+
+ config IMA_DEFAULT_HASH_SHA256
+ bool "SHA256"
+- depends on CRYPTO_SHA256=y && !IMA_TEMPLATE
++ depends on CRYPTO_SHA256=y
+
+ config IMA_DEFAULT_HASH_SHA512
+ bool "SHA512"
+- depends on CRYPTO_SHA512=y && !IMA_TEMPLATE
++ depends on CRYPTO_SHA512=y
+
+ config IMA_DEFAULT_HASH_WP512
+ bool "WP512"
+- depends on CRYPTO_WP512=y && !IMA_TEMPLATE
++ depends on CRYPTO_WP512=y
+
+ config IMA_DEFAULT_HASH_SM3
+ bool "SM3"
+- depends on CRYPTO_SM3=y && !IMA_TEMPLATE
++ depends on CRYPTO_SM3=y
+ endchoice
+
+ config IMA_DEFAULT_HASH
+@@ -250,18 +248,6 @@ config IMA_APPRAISE_MODSIG
+ The modsig keyword can be used in the IMA policy to allow a hook
+ to accept such signatures.
+
+-config IMA_TRUSTED_KEYRING
+- bool "Require all keys on the .ima keyring be signed (deprecated)"
+- depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING
+- depends on INTEGRITY_ASYMMETRIC_KEYS
+- select INTEGRITY_TRUSTED_KEYRING
+- default y
+- help
+- This option requires that all keys added to the .ima
+- keyring be signed by a key on the system trusted keyring.
+-
+- This option is deprecated in favor of INTEGRITY_TRUSTED_KEYRING
+-
+ config IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
+ bool "Permit keys validly signed by a built-in or secondary CA cert (EXPERIMENTAL)"
+ depends on SYSTEM_TRUSTED_KEYRING
+diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
+index be965a8715e4e..0afe413dda683 100644
+--- a/security/integrity/ima/ima.h
++++ b/security/integrity/ima/ima.h
+@@ -122,7 +122,7 @@ struct ima_kexec_hdr {
+ extern const int read_idmap[];
+
+ #ifdef CONFIG_HAVE_IMA_KEXEC
+-void ima_load_kexec_buffer(void);
++void __init ima_load_kexec_buffer(void);
+ #else
+ static inline void ima_load_kexec_buffer(void) {}
+ #endif /* CONFIG_HAVE_IMA_KEXEC */
+diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
+index dbba51583e7c1..08b49bd1e8caf 100644
+--- a/security/integrity/ima/ima_appraise.c
++++ b/security/integrity/ima/ima_appraise.c
+@@ -408,7 +408,8 @@ int ima_appraise_measurement(enum ima_hooks func,
+ goto out;
+ }
+
+- status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint);
++ status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value,
++ rc < 0 ? 0 : rc, iint);
+ switch (status) {
+ case INTEGRITY_PASS:
+ case INTEGRITY_PASS_IMMUTABLE:
+@@ -643,22 +644,26 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
+ const struct evm_ima_xattr_data *xvalue = xattr_value;
+ int digsig = 0;
+ int result;
++ int err;
+
+ result = ima_protect_xattr(dentry, xattr_name, xattr_value,
+ xattr_value_len);
+ if (result == 1) {
+ if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
+ return -EINVAL;
++
++ err = validate_hash_algo(dentry, xvalue, xattr_value_len);
++ if (err)
++ return err;
++
+ digsig = (xvalue->type == EVM_IMA_XATTR_DIGSIG);
+ } else if (!strcmp(xattr_name, XATTR_NAME_EVM) && xattr_value_len > 0) {
+ digsig = (xvalue->type == EVM_XATTR_PORTABLE_DIGSIG);
+ }
+ if (result == 1 || evm_revalidate_status(xattr_name)) {
+- result = validate_hash_algo(dentry, xvalue, xattr_value_len);
+- if (result)
+- return result;
+-
+ ima_reset_appraise_flags(d_backing_inode(dentry), digsig);
++ if (result == 1)
++ result = 0;
+ }
+ return result;
+ }
+diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
+index a7206cc1d7d19..64499056648ad 100644
+--- a/security/integrity/ima/ima_crypto.c
++++ b/security/integrity/ima/ima_crypto.c
+@@ -205,6 +205,7 @@ out_array:
+
+ crypto_free_shash(ima_algo_array[i].tfm);
+ }
++ kfree(ima_algo_array);
+ out:
+ crypto_free_shash(ima_shash_tfm);
+ return rc;
+diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c
+index 71786d01946f4..9db66fe310d42 100644
+--- a/security/integrity/ima/ima_efi.c
++++ b/security/integrity/ima/ima_efi.c
+@@ -67,6 +67,8 @@ const char * const *arch_get_ima_policy(void)
+ if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
+ if (IS_ENABLED(CONFIG_MODULE_SIG))
+ set_module_sig_enforced();
++ if (IS_ENABLED(CONFIG_KEXEC_SIG))
++ set_kexec_sig_enforced();
+ return sb_arch_rules;
+ }
+ return NULL;
+diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
+index 3d8e9d5db5aa5..3ad8f7734208b 100644
+--- a/security/integrity/ima/ima_fs.c
++++ b/security/integrity/ima/ima_fs.c
+@@ -496,12 +496,12 @@ int __init ima_fs_init(void)
+
+ return 0;
+ out:
++ securityfs_remove(ima_policy);
+ securityfs_remove(violations);
+ securityfs_remove(runtime_measurements_count);
+ securityfs_remove(ascii_runtime_measurements);
+ securityfs_remove(binary_runtime_measurements);
+ securityfs_remove(ima_symlink);
+ securityfs_remove(ima_dir);
+- securityfs_remove(ima_policy);
+ return -1;
+ }
+diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
+index f799cc278a9a8..f3b10851bbbfd 100644
+--- a/security/integrity/ima/ima_kexec.c
++++ b/security/integrity/ima/ima_kexec.c
+@@ -137,7 +137,7 @@ void ima_add_kexec_buffer(struct kimage *image)
+ /*
+ * Restore the measurement list from the previous kernel.
+ */
+-void ima_load_kexec_buffer(void)
++void __init ima_load_kexec_buffer(void)
+ {
+ void *kexec_buffer = NULL;
+ size_t kexec_buffer_size = 0;
+diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
+index 465865412100b..e9a361109dd21 100644
+--- a/security/integrity/ima/ima_main.c
++++ b/security/integrity/ima/ima_main.c
+@@ -395,7 +395,9 @@ out:
+ /**
+ * ima_file_mmap - based on policy, collect/store measurement.
+ * @file: pointer to the file to be measured (May be NULL)
+- * @prot: contains the protection that will be applied by the kernel.
++ * @reqprot: protection requested by the application
++ * @prot: protection that will be applied by the kernel
++ * @flags: operational flags
+ *
+ * Measure files being mmapped executable based on the ima_must_measure()
+ * policy decision.
+@@ -403,7 +405,8 @@ out:
+ * On success return 0. On integrity appraisal error, assuming the file
+ * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
+ */
+-int ima_file_mmap(struct file *file, unsigned long prot)
++int ima_file_mmap(struct file *file, unsigned long reqprot,
++ unsigned long prot, unsigned long flags)
+ {
+ u32 secid;
+
+diff --git a/security/integrity/ima/ima_modsig.c b/security/integrity/ima/ima_modsig.c
+index fb25723c65bc4..3e7bee30080f2 100644
+--- a/security/integrity/ima/ima_modsig.c
++++ b/security/integrity/ima/ima_modsig.c
+@@ -89,6 +89,9 @@ int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
+
+ /**
+ * ima_collect_modsig - Calculate the file hash without the appended signature.
++ * @modsig: parsed module signature
++ * @buf: data to verify the signature on
++ * @size: data size
+ *
+ * Since the modsig is part of the file contents, the hash used in its signature
+ * isn't the same one ordinarily calculated by IMA. Therefore PKCS7 code
+diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
+index 87b9b71cb8201..7e41917e1f767 100644
+--- a/security/integrity/ima/ima_policy.c
++++ b/security/integrity/ima/ima_policy.c
+@@ -228,7 +228,7 @@ static struct ima_rule_entry *arch_policy_entry __ro_after_init;
+ static LIST_HEAD(ima_default_rules);
+ static LIST_HEAD(ima_policy_rules);
+ static LIST_HEAD(ima_temp_rules);
+-static struct list_head *ima_rules = &ima_default_rules;
++static struct list_head __rcu *ima_rules = (struct list_head __rcu *)(&ima_default_rules);
+
+ static int ima_policy __initdata;
+
+@@ -391,12 +391,6 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
+
+ nentry->lsm[i].type = entry->lsm[i].type;
+ nentry->lsm[i].args_p = entry->lsm[i].args_p;
+- /*
+- * Remove the reference from entry so that the associated
+- * memory will not be freed during a later call to
+- * ima_lsm_free_rule(entry).
+- */
+- entry->lsm[i].args_p = NULL;
+
+ ima_filter_rule_init(nentry->lsm[i].type, Audit_equal,
+ nentry->lsm[i].args_p,
+@@ -410,6 +404,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
+
+ static int ima_lsm_update_rule(struct ima_rule_entry *entry)
+ {
++ int i;
+ struct ima_rule_entry *nentry;
+
+ nentry = ima_lsm_copy_rule(entry);
+@@ -424,7 +419,8 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry)
+ * references and the entry itself. All other memory refrences will now
+ * be owned by nentry.
+ */
+- ima_lsm_free_rule(entry);
++ for (i = 0; i < MAX_LSM_RULES; i++)
++ ima_filter_rule_free(entry->lsm[i].rule);
+ kfree(entry);
+
+ return 0;
+@@ -542,6 +538,9 @@ static bool ima_match_rules(struct ima_rule_entry *rule,
+ const char *func_data)
+ {
+ int i;
++ bool result = false;
++ struct ima_rule_entry *lsm_rule = rule;
++ bool rule_reinitialized = false;
+
+ if ((rule->flags & IMA_FUNC) &&
+ (rule->func != func && func != POST_SETATTR))
+@@ -590,35 +589,55 @@ static bool ima_match_rules(struct ima_rule_entry *rule,
+ int rc = 0;
+ u32 osid;
+
+- if (!rule->lsm[i].rule) {
+- if (!rule->lsm[i].args_p)
++ if (!lsm_rule->lsm[i].rule) {
++ if (!lsm_rule->lsm[i].args_p)
+ continue;
+ else
+ return false;
+ }
++
++retry:
+ switch (i) {
+ case LSM_OBJ_USER:
+ case LSM_OBJ_ROLE:
+ case LSM_OBJ_TYPE:
+ security_inode_getsecid(inode, &osid);
+- rc = ima_filter_rule_match(osid, rule->lsm[i].type,
++ rc = ima_filter_rule_match(osid, lsm_rule->lsm[i].type,
+ Audit_equal,
+- rule->lsm[i].rule);
++ lsm_rule->lsm[i].rule);
+ break;
+ case LSM_SUBJ_USER:
+ case LSM_SUBJ_ROLE:
+ case LSM_SUBJ_TYPE:
+- rc = ima_filter_rule_match(secid, rule->lsm[i].type,
++ rc = ima_filter_rule_match(secid, lsm_rule->lsm[i].type,
+ Audit_equal,
+- rule->lsm[i].rule);
++ lsm_rule->lsm[i].rule);
+ break;
+ default:
+ break;
+ }
+- if (!rc)
+- return false;
++
++ if (rc == -ESTALE && !rule_reinitialized) {
++ lsm_rule = ima_lsm_copy_rule(rule);
++ if (lsm_rule) {
++ rule_reinitialized = true;
++ goto retry;
++ }
++ }
++ if (!rc) {
++ result = false;
++ goto out;
++ }
+ }
+- return true;
++ result = true;
++
++out:
++ if (rule_reinitialized) {
++ for (i = 0; i < MAX_LSM_RULES; i++)
++ ima_filter_rule_free(lsm_rule->lsm[i].rule);
++ kfree(lsm_rule);
++ }
++ return result;
+ }
+
+ /*
+@@ -655,6 +674,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
+ * @secid: LSM secid of the task to be validated
+ * @func: IMA hook identifier
+ * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
++ * @flags: IMA actions to consider (e.g. IMA_MEASURE | IMA_APPRAISE)
+ * @pcr: set the pcr to extend
+ * @template_desc: the template that should be used for this rule
+ * @func_data: func specific data, may be NULL
+@@ -675,12 +695,14 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
+ {
+ struct ima_rule_entry *entry;
+ int action = 0, actmask = flags | (flags << 1);
++ struct list_head *ima_rules_tmp;
+
+ if (template_desc && !*template_desc)
+ *template_desc = ima_template_desc_current();
+
+ rcu_read_lock();
+- list_for_each_entry_rcu(entry, ima_rules, list) {
++ ima_rules_tmp = rcu_dereference(ima_rules);
++ list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
+
+ if (!(entry->action & actmask))
+ continue;
+@@ -741,9 +763,11 @@ void ima_update_policy_flags(void)
+ {
+ struct ima_rule_entry *entry;
+ int new_policy_flag = 0;
++ struct list_head *ima_rules_tmp;
+
+ rcu_read_lock();
+- list_for_each_entry(entry, ima_rules, list) {
++ ima_rules_tmp = rcu_dereference(ima_rules);
++ list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
+ /*
+ * SETXATTR_CHECK rules do not implement a full policy check
+ * because rule checking would probably have an important
+@@ -968,10 +992,10 @@ void ima_update_policy(void)
+
+ list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);
+
+- if (ima_rules != policy) {
++ if (ima_rules != (struct list_head __rcu *)policy) {
+ ima_policy_flag = 0;
+- ima_rules = policy;
+
++ rcu_assign_pointer(ima_rules, policy);
+ /*
+ * IMA architecture specific policy rules are specified
+ * as strings and converted to an array of ima_entry_rules
+@@ -1061,7 +1085,7 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry,
+ pr_warn("rule for LSM \'%s\' is undefined\n",
+ entry->lsm[lsm_rule].args_p);
+
+- if (ima_rules == &ima_default_rules) {
++ if (ima_rules == (struct list_head __rcu *)(&ima_default_rules)) {
+ kfree(entry->lsm[lsm_rule].args_p);
+ entry->lsm[lsm_rule].args_p = NULL;
+ result = -EINVAL;
+@@ -1686,7 +1710,7 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
+
+ /**
+ * ima_parse_add_rule - add a rule to ima_policy_rules
+- * @rule - ima measurement policy rule
++ * @rule: ima measurement policy rule
+ *
+ * Avoid locking by allowing just one writer at a time in ima_write_policy()
+ * Returns the length of the rule parsed, an error code on failure
+@@ -1768,9 +1792,11 @@ void *ima_policy_start(struct seq_file *m, loff_t *pos)
+ {
+ loff_t l = *pos;
+ struct ima_rule_entry *entry;
++ struct list_head *ima_rules_tmp;
+
+ rcu_read_lock();
+- list_for_each_entry_rcu(entry, ima_rules, list) {
++ ima_rules_tmp = rcu_dereference(ima_rules);
++ list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
+ if (!l--) {
+ rcu_read_unlock();
+ return entry;
+@@ -1789,7 +1815,8 @@ void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
+ rcu_read_unlock();
+ (*pos)++;
+
+- return (&entry->list == ima_rules) ? NULL : entry;
++ return (&entry->list == &ima_default_rules ||
++ &entry->list == &ima_policy_rules) ? NULL : entry;
+ }
+
+ void ima_policy_stop(struct seq_file *m, void *v)
+@@ -1845,6 +1872,14 @@ int ima_policy_show(struct seq_file *m, void *v)
+
+ rcu_read_lock();
+
++ /* Do not print rules with inactive LSM labels */
++ for (i = 0; i < MAX_LSM_RULES; i++) {
++ if (entry->lsm[i].args_p && !entry->lsm[i].rule) {
++ rcu_read_unlock();
++ return 0;
++ }
++ }
++
+ if (entry->action & MEASURE)
+ seq_puts(m, pt(Opt_measure));
+ if (entry->action & DONT_MEASURE)
+@@ -2014,14 +2049,20 @@ bool ima_appraise_signature(enum kernel_read_file_id id)
+ struct ima_rule_entry *entry;
+ bool found = false;
+ enum ima_hooks func;
++ struct list_head *ima_rules_tmp;
+
+ if (id >= READING_MAX_ID)
+ return false;
+
++ if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE)
++ && security_locked_down(LOCKDOWN_KEXEC))
++ return false;
++
+ func = read_idmap[id] ?: FILE_CHECK;
+
+ rcu_read_lock();
+- list_for_each_entry_rcu(entry, ima_rules, list) {
++ ima_rules_tmp = rcu_dereference(ima_rules);
++ list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
+ if (entry->action != APPRAISE)
+ continue;
+
+diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
+index 694560396be05..31a8388e3dfae 100644
+--- a/security/integrity/ima/ima_template.c
++++ b/security/integrity/ima/ima_template.c
+@@ -29,6 +29,7 @@ static struct ima_template_desc builtin_templates[] = {
+
+ static LIST_HEAD(defined_templates);
+ static DEFINE_SPINLOCK(template_list);
++static int template_setup_done;
+
+ static const struct ima_template_field supported_fields[] = {
+ {.field_id = "d", .field_init = ima_eventdigest_init,
+@@ -101,10 +102,11 @@ static int __init ima_template_setup(char *str)
+ struct ima_template_desc *template_desc;
+ int template_len = strlen(str);
+
+- if (ima_template)
++ if (template_setup_done)
+ return 1;
+
+- ima_init_template_list();
++ if (!ima_template)
++ ima_init_template_list();
+
+ /*
+ * Verify that a template with the supplied name exists.
+@@ -128,6 +130,7 @@ static int __init ima_template_setup(char *str)
+ }
+
+ ima_template = template_desc;
++ template_setup_done = 1;
+ return 1;
+ }
+ __setup("ima_template=", ima_template_setup);
+@@ -136,7 +139,7 @@ static int __init ima_template_fmt_setup(char *str)
+ {
+ int num_templates = ARRAY_SIZE(builtin_templates);
+
+- if (ima_template)
++ if (template_setup_done)
+ return 1;
+
+ if (template_desc_init_fields(str, NULL, NULL) < 0) {
+@@ -147,6 +150,7 @@ static int __init ima_template_fmt_setup(char *str)
+
+ builtin_templates[num_templates - 1].fmt = str;
+ ima_template = builtin_templates + num_templates - 1;
++ template_setup_done = 1;
+
+ return 1;
+ }
+@@ -237,11 +241,11 @@ int template_desc_init_fields(const char *template_fmt,
+ }
+
+ if (fields && num_fields) {
+- *fields = kmalloc_array(i, sizeof(*fields), GFP_KERNEL);
++ *fields = kmalloc_array(i, sizeof(**fields), GFP_KERNEL);
+ if (*fields == NULL)
+ return -ENOMEM;
+
+- memcpy(*fields, found_fields, i * sizeof(*fields));
++ memcpy(*fields, found_fields, i * sizeof(**fields));
+ *num_fields = i;
+ }
+
+@@ -332,8 +336,11 @@ static struct ima_template_desc *restore_template_fmt(char *template_name)
+
+ template_desc->name = "";
+ template_desc->fmt = kstrdup(template_name, GFP_KERNEL);
+- if (!template_desc->fmt)
++ if (!template_desc->fmt) {
++ kfree(template_desc);
++ template_desc = NULL;
+ goto out;
++ }
+
+ spin_lock(&template_list);
+ list_add_tail_rcu(&template_desc->list, &defined_templates);
+diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c
+index 29220056207f4..0ec5e4c22cb2a 100644
+--- a/security/integrity/integrity_audit.c
++++ b/security/integrity/integrity_audit.c
+@@ -45,6 +45,8 @@ void integrity_audit_message(int audit_msgno, struct inode *inode,
+ return;
+
+ ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno);
++ if (!ab)
++ return;
+ audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u",
+ task_pid_nr(current),
+ from_kuid(&init_user_ns, current_uid()),
+diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
+index 2462bfa08fe34..cd06bd6072be2 100644
+--- a/security/integrity/platform_certs/keyring_handler.h
++++ b/security/integrity/platform_certs/keyring_handler.h
+@@ -30,3 +30,11 @@ efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type);
+ efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type);
+
+ #endif
++
++#ifndef UEFI_QUIRK_SKIP_CERT
++#define UEFI_QUIRK_SKIP_CERT(vendor, product) \
++ .matches = { \
++ DMI_MATCH(DMI_BOARD_VENDOR, vendor), \
++ DMI_MATCH(DMI_PRODUCT_NAME, product), \
++ },
++#endif
+diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
+index f290f78c3f301..d2f2c3936277a 100644
+--- a/security/integrity/platform_certs/load_uefi.c
++++ b/security/integrity/platform_certs/load_uefi.c
+@@ -3,6 +3,7 @@
+ #include <linux/kernel.h>
+ #include <linux/sched.h>
+ #include <linux/cred.h>
++#include <linux/dmi.h>
+ #include <linux/err.h>
+ #include <linux/efi.h>
+ #include <linux/slab.h>
+@@ -11,6 +12,32 @@
+ #include "../integrity.h"
+ #include "keyring_handler.h"
+
++/*
++ * On T2 Macs reading the db and dbx efi variables to load UEFI Secure Boot
++ * certificates causes occurrence of a page fault in Apple's firmware and
++ * a crash disabling EFI runtime services. The following quirk skips reading
++ * these variables.
++ */
++static const struct dmi_system_id uefi_skip_cert[] = {
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,1") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,2") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,3") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,4") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,1") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,2") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,3") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,4") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,1") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,2") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir9,1") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "Macmini8,1") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") },
++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMacPro1,1") },
++ { }
++};
++
+ /*
+ * Look to see if a UEFI variable called MokIgnoreDB exists and return true if
+ * it does.
+@@ -137,6 +164,13 @@ static int __init load_uefi_certs(void)
+ unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0;
+ efi_status_t status;
+ int rc = 0;
++ const struct dmi_system_id *dmi_id;
++
++ dmi_id = dmi_first_match(uefi_skip_cert);
++ if (dmi_id) {
++ pr_err("Reading UEFI Secure Boot Certs is not supported on T2 Macs.\n");
++ return false;
++ }
+
+ if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
+ return false;
+diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
+index 96a92a645216d..cfb5000876922 100644
+--- a/security/keys/keyctl.c
++++ b/security/keys/keyctl.c
+@@ -980,14 +980,19 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
+ ret = -EACCES;
+ down_write(&key->sem);
+
+- if (!capable(CAP_SYS_ADMIN)) {
++ {
++ bool is_privileged_op = false;
++
+ /* only the sysadmin can chown a key to some other UID */
+ if (user != (uid_t) -1 && !uid_eq(key->uid, uid))
+- goto error_put;
++ is_privileged_op = true;
+
+ /* only the sysadmin can set the key's GID to a group other
+ * than one of those that the current process subscribes to */
+ if (group != (gid_t) -1 && !gid_eq(gid, key->gid) && !in_group_p(gid))
++ is_privileged_op = true;
++
++ if (is_privileged_op && !capable(CAP_SYS_ADMIN))
+ goto error_put;
+ }
+
+@@ -1088,7 +1093,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
+ down_write(&key->sem);
+
+ /* if we're not the sysadmin, we can only change a key that we own */
+- if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) {
++ if (uid_eq(key->uid, current_fsuid()) || capable(CAP_SYS_ADMIN)) {
+ key->perm = perm;
+ notify_key(key, NOTIFY_KEY_SETATTR, 0);
+ ret = 0;
+diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
+index 5de0d599a2748..97bc27bbf0797 100644
+--- a/security/keys/keyctl_pkey.c
++++ b/security/keys/keyctl_pkey.c
+@@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
+
+ switch (op) {
+ case KEYCTL_PKEY_ENCRYPT:
++ if (uparams.in_len > info.max_dec_size ||
++ uparams.out_len > info.max_enc_size)
++ return -EINVAL;
++ break;
+ case KEYCTL_PKEY_DECRYPT:
+ if (uparams.in_len > info.max_enc_size ||
+ uparams.out_len > info.max_dec_size)
+ return -EINVAL;
+ break;
+ case KEYCTL_PKEY_SIGN:
++ if (uparams.in_len > info.max_data_size ||
++ uparams.out_len > info.max_sig_size)
++ return -EINVAL;
++ break;
+ case KEYCTL_PKEY_VERIFY:
+- if (uparams.in_len > info.max_sig_size ||
+- uparams.out_len > info.max_data_size)
++ if (uparams.in_len > info.max_data_size ||
++ uparams.in2_len > info.max_sig_size)
+ return -EINVAL;
+ break;
+ default:
+@@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
+ }
+
+ params->in_len = uparams.in_len;
+- params->out_len = uparams.out_len;
++ params->out_len = uparams.out_len; /* Note: same as in2_len */
+ return 0;
+ }
+
+diff --git a/security/keys/request_key.c b/security/keys/request_key.c
+index 2da4404276f0f..a7673ad86d18d 100644
+--- a/security/keys/request_key.c
++++ b/security/keys/request_key.c
+@@ -38,9 +38,12 @@ static void cache_requested_key(struct key *key)
+ #ifdef CONFIG_KEYS_REQUEST_CACHE
+ struct task_struct *t = current;
+
+- key_put(t->cached_requested_key);
+- t->cached_requested_key = key_get(key);
+- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
++ /* Do not cache key if it is a kernel thread */
++ if (!(t->flags & PF_KTHREAD)) {
++ key_put(t->cached_requested_key);
++ t->cached_requested_key = key_get(key);
++ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
++ }
+ #endif
+ }
+
+@@ -398,17 +401,21 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
+
+ if (dest_keyring) {
+- ret = __key_link_lock(dest_keyring, &ctx->index_key);
++ ret = __key_link_lock(dest_keyring, &key->index_key);
+ if (ret < 0)
+ goto link_lock_failed;
+- ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
+- if (ret < 0)
+- goto link_prealloc_failed;
+ }
+
+- /* attach the key to the destination keyring under lock, but we do need
++ /*
++ * Attach the key to the destination keyring under lock, but we do need
+ * to do another check just in case someone beat us to it whilst we
+- * waited for locks */
++ * waited for locks.
++ *
++ * The caller might specify a comparison function which looks for keys
++ * that do not exactly match but are still equivalent from the caller's
++ * perspective. The __key_link_begin() operation must be done only after
++ * an actual key is determined.
++ */
+ mutex_lock(&key_construction_mutex);
+
+ rcu_read_lock();
+@@ -417,12 +424,16 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
+ if (!IS_ERR(key_ref))
+ goto key_already_present;
+
+- if (dest_keyring)
++ if (dest_keyring) {
++ ret = __key_link_begin(dest_keyring, &key->index_key, &edit);
++ if (ret < 0)
++ goto link_alloc_failed;
+ __key_link(dest_keyring, key, &edit);
++ }
+
+ mutex_unlock(&key_construction_mutex);
+ if (dest_keyring)
+- __key_link_end(dest_keyring, &ctx->index_key, edit);
++ __key_link_end(dest_keyring, &key->index_key, edit);
+ mutex_unlock(&user->cons_lock);
+ *_key = key;
+ kleave(" = 0 [%d]", key_serial(key));
+@@ -435,10 +446,13 @@ key_already_present:
+ mutex_unlock(&key_construction_mutex);
+ key = key_ref_to_ptr(key_ref);
+ if (dest_keyring) {
++ ret = __key_link_begin(dest_keyring, &key->index_key, &edit);
++ if (ret < 0)
++ goto link_alloc_failed_unlocked;
+ ret = __key_link_check_live_key(dest_keyring, key);
+ if (ret == 0)
+ __key_link(dest_keyring, key, &edit);
+- __key_link_end(dest_keyring, &ctx->index_key, edit);
++ __key_link_end(dest_keyring, &key->index_key, edit);
+ if (ret < 0)
+ goto link_check_failed;
+ }
+@@ -453,8 +467,10 @@ link_check_failed:
+ kleave(" = %d [linkcheck]", ret);
+ return ret;
+
+-link_prealloc_failed:
+- __key_link_end(dest_keyring, &ctx->index_key, edit);
++link_alloc_failed:
++ mutex_unlock(&key_construction_mutex);
++link_alloc_failed_unlocked:
++ __key_link_end(dest_keyring, &key->index_key, edit);
+ link_lock_failed:
+ mutex_unlock(&user->cons_lock);
+ key_put(key);
+diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c
+index d5c891d8d3534..9b9d3ef79cbe3 100644
+--- a/security/keys/trusted-keys/trusted_core.c
++++ b/security/keys/trusted-keys/trusted_core.c
+@@ -27,10 +27,10 @@ module_param_named(source, trusted_key_source, charp, 0);
+ MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)");
+
+ static const struct trusted_key_source trusted_key_sources[] = {
+-#if defined(CONFIG_TCG_TPM)
++#if IS_REACHABLE(CONFIG_TCG_TPM)
+ { "tpm", &trusted_key_tpm_ops },
+ #endif
+-#if defined(CONFIG_TEE)
++#if IS_REACHABLE(CONFIG_TEE)
+ { "tee", &trusted_key_tee_ops },
+ #endif
+ };
+@@ -351,7 +351,7 @@ static int __init init_trusted(void)
+
+ static void __exit cleanup_trusted(void)
+ {
+- static_call(trusted_key_exit)();
++ static_call_cond(trusted_key_exit)();
+ }
+
+ late_initcall(init_trusted);
+diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
+index 0165da386289c..bc700f85f80be 100644
+--- a/security/keys/trusted-keys/trusted_tpm2.c
++++ b/security/keys/trusted-keys/trusted_tpm2.c
+@@ -186,7 +186,7 @@ int tpm2_key_priv(void *context, size_t hdrlen,
+ }
+
+ /**
+- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
++ * tpm2_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
+ *
+ * @buf: an allocated tpm_buf instance
+ * @session_handle: session handle
+@@ -283,8 +283,8 @@ int tpm2_seal_trusted(struct tpm_chip *chip,
+ /* key properties */
+ flags = 0;
+ flags |= options->policydigest_len ? 0 : TPM2_OA_USER_WITH_AUTH;
+- flags |= payload->migratable ? (TPM2_OA_FIXED_TPM |
+- TPM2_OA_FIXED_PARENT) : 0;
++ flags |= payload->migratable ? 0 : (TPM2_OA_FIXED_TPM |
++ TPM2_OA_FIXED_PARENT);
+ tpm_buf_append_u32(&buf, flags);
+
+ /* policy */
+diff --git a/security/landlock/cred.c b/security/landlock/cred.c
+index 6725af24c6841..ec6c37f04a191 100644
+--- a/security/landlock/cred.c
++++ b/security/landlock/cred.c
+@@ -15,7 +15,7 @@
+ #include "setup.h"
+
+ static int hook_cred_prepare(struct cred *const new,
+- const struct cred *const old, const gfp_t gfp)
++ const struct cred *const old, const gfp_t gfp)
+ {
+ struct landlock_ruleset *const old_dom = landlock_cred(old)->domain;
+
+@@ -42,5 +42,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+ __init void landlock_add_cred_hooks(void)
+ {
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+- LANDLOCK_NAME);
++ LANDLOCK_NAME);
+ }
+diff --git a/security/landlock/cred.h b/security/landlock/cred.h
+index 5f99d3decade6..af89ab00e6d10 100644
+--- a/security/landlock/cred.h
++++ b/security/landlock/cred.h
+@@ -20,8 +20,8 @@ struct landlock_cred_security {
+ struct landlock_ruleset *domain;
+ };
+
+-static inline struct landlock_cred_security *landlock_cred(
+- const struct cred *cred)
++static inline struct landlock_cred_security *
++landlock_cred(const struct cred *cred)
+ {
+ return cred->security + landlock_blob_sizes.lbs_cred;
+ }
+@@ -34,8 +34,8 @@ static inline const struct landlock_ruleset *landlock_get_current_domain(void)
+ /*
+ * The call needs to come from an RCU read-side critical section.
+ */
+-static inline const struct landlock_ruleset *landlock_get_task_domain(
+- const struct task_struct *const task)
++static inline const struct landlock_ruleset *
++landlock_get_task_domain(const struct task_struct *const task)
+ {
+ return landlock_cred(__task_cred(task))->domain;
+ }
+diff --git a/security/landlock/fs.c b/security/landlock/fs.c
+index 97b8e421f6171..c5749301b37d6 100644
+--- a/security/landlock/fs.c
++++ b/security/landlock/fs.c
+@@ -141,23 +141,26 @@ retry:
+ }
+
+ /* All access rights that can be tied to files. */
++/* clang-format off */
+ #define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_READ_FILE)
++/* clang-format on */
+
+ /*
+ * @path: Should have been checked by get_path_from_fd().
+ */
+ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+- const struct path *const path, u32 access_rights)
++ const struct path *const path,
++ access_mask_t access_rights)
+ {
+ int err;
+ struct landlock_object *object;
+
+ /* Files only get access rights that make sense. */
+- if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) !=
+- ACCESS_FILE)
++ if (!d_is_dir(path->dentry) &&
++ (access_rights | ACCESS_FILE) != ACCESS_FILE)
+ return -EINVAL;
+ if (WARN_ON_ONCE(ruleset->num_layers != 1))
+ return -EINVAL;
+@@ -180,59 +183,93 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+
+ /* Access-control management */
+
+-static inline u64 unmask_layers(
+- const struct landlock_ruleset *const domain,
+- const struct path *const path, const u32 access_request,
+- u64 layer_mask)
++/*
++ * The lifetime of the returned rule is tied to @domain.
++ *
++ * Returns NULL if no rule is found or if @dentry is negative.
++ */
++static inline const struct landlock_rule *
++find_rule(const struct landlock_ruleset *const domain,
++ const struct dentry *const dentry)
+ {
+ const struct landlock_rule *rule;
+ const struct inode *inode;
+- size_t i;
+
+- if (d_is_negative(path->dentry))
+- /* Ignore nonexistent leafs. */
+- return layer_mask;
+- inode = d_backing_inode(path->dentry);
++ /* Ignores nonexistent leafs. */
++ if (d_is_negative(dentry))
++ return NULL;
++
++ inode = d_backing_inode(dentry);
+ rcu_read_lock();
+- rule = landlock_find_rule(domain,
+- rcu_dereference(landlock_inode(inode)->object));
++ rule = landlock_find_rule(
++ domain, rcu_dereference(landlock_inode(inode)->object));
+ rcu_read_unlock();
++ return rule;
++}
++
++/*
++ * @layer_masks is read and may be updated according to the access request and
++ * the matching rule.
++ *
++ * Returns true if the request is allowed (i.e. relevant layer masks for the
++ * request are empty).
++ */
++static inline bool
++unmask_layers(const struct landlock_rule *const rule,
++ const access_mask_t access_request,
++ layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
++{
++ size_t layer_level;
++
++ if (!access_request || !layer_masks)
++ return true;
+ if (!rule)
+- return layer_mask;
++ return false;
+
+ /*
+ * An access is granted if, for each policy layer, at least one rule
+- * encountered on the pathwalk grants the requested accesses,
+- * regardless of their position in the layer stack. We must then check
++ * encountered on the pathwalk grants the requested access,
++ * regardless of its position in the layer stack. We must then check
+ * the remaining layers for each inode, from the first added layer to
+- * the last one.
++ * the last one. When there is multiple requested accesses, for each
++ * policy layer, the full set of requested accesses may not be granted
++ * by only one rule, but by the union (binary OR) of multiple rules.
++ * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
+ */
+- for (i = 0; i < rule->num_layers; i++) {
+- const struct landlock_layer *const layer = &rule->layers[i];
+- const u64 layer_level = BIT_ULL(layer->level - 1);
+-
+- /* Checks that the layer grants access to the full request. */
+- if ((layer->access & access_request) == access_request) {
+- layer_mask &= ~layer_level;
++ for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
++ const struct landlock_layer *const layer =
++ &rule->layers[layer_level];
++ const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
++ const unsigned long access_req = access_request;
++ unsigned long access_bit;
++ bool is_empty;
+
+- if (layer_mask == 0)
+- return layer_mask;
++ /*
++ * Records in @layer_masks which layer grants access to each
++ * requested access.
++ */
++ is_empty = true;
++ for_each_set_bit(access_bit, &access_req,
++ ARRAY_SIZE(*layer_masks)) {
++ if (layer->access & BIT_ULL(access_bit))
++ (*layer_masks)[access_bit] &= ~layer_bit;
++ is_empty = is_empty && !(*layer_masks)[access_bit];
+ }
++ if (is_empty)
++ return true;
+ }
+- return layer_mask;
++ return false;
+ }
+
+ static int check_access_path(const struct landlock_ruleset *const domain,
+- const struct path *const path, u32 access_request)
++ const struct path *const path,
++ const access_mask_t access_request)
+ {
+- bool allowed = false;
++ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
++ bool allowed = false, has_access = false;
+ struct path walker_path;
+- u64 layer_mask;
+ size_t i;
+
+- /* Make sure all layers can be checked. */
+- BUILD_BUG_ON(BITS_PER_TYPE(layer_mask) < LANDLOCK_MAX_NUM_LAYERS);
+-
+ if (!access_request)
+ return 0;
+ if (WARN_ON_ONCE(!domain || !path))
+@@ -243,20 +280,27 @@ static int check_access_path(const struct landlock_ruleset *const domain,
+ * /proc/<pid>/fd/<file-descriptor> .
+ */
+ if ((path->dentry->d_sb->s_flags & SB_NOUSER) ||
+- (d_is_positive(path->dentry) &&
+- unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
++ (d_is_positive(path->dentry) &&
++ unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
+ return 0;
+ if (WARN_ON_ONCE(domain->num_layers < 1))
+ return -EACCES;
+
+ /* Saves all layers handling a subset of requested accesses. */
+- layer_mask = 0;
+ for (i = 0; i < domain->num_layers; i++) {
+- if (domain->fs_access_masks[i] & access_request)
+- layer_mask |= BIT_ULL(i);
++ const unsigned long access_req = access_request;
++ unsigned long access_bit;
++
++ for_each_set_bit(access_bit, &access_req,
++ ARRAY_SIZE(layer_masks)) {
++ if (domain->fs_access_masks[i] & BIT_ULL(access_bit)) {
++ layer_masks[access_bit] |= BIT_ULL(i);
++ has_access = true;
++ }
++ }
+ }
+ /* An access request not handled by the domain is allowed. */
+- if (layer_mask == 0)
++ if (!has_access)
+ return 0;
+
+ walker_path = *path;
+@@ -268,13 +312,11 @@ static int check_access_path(const struct landlock_ruleset *const domain,
+ while (true) {
+ struct dentry *parent_dentry;
+
+- layer_mask = unmask_layers(domain, &walker_path,
+- access_request, layer_mask);
+- if (layer_mask == 0) {
++ allowed = unmask_layers(find_rule(domain, walker_path.dentry),
++ access_request, &layer_masks);
++ if (allowed)
+ /* Stops when a rule from each layer grants access. */
+- allowed = true;
+ break;
+- }
+
+ jump_up:
+ if (walker_path.dentry == walker_path.mnt->mnt_root) {
+@@ -308,7 +350,7 @@ jump_up:
+ }
+
+ static inline int current_check_access_path(const struct path *const path,
+- const u32 access_request)
++ const access_mask_t access_request)
+ {
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+@@ -436,8 +478,8 @@ static void hook_sb_delete(struct super_block *const sb)
+ if (prev_inode)
+ iput(prev_inode);
+ /* Waits for pending iput() in release_inode(). */
+- wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read(
+- &landlock_superblock(sb)->inode_refs));
++ wait_var_event(&landlock_superblock(sb)->inode_refs,
++ !atomic_long_read(&landlock_superblock(sb)->inode_refs));
+ }
+
+ /*
+@@ -459,8 +501,8 @@ static void hook_sb_delete(struct super_block *const sb)
+ * a dedicated user space option would be required (e.g. as a ruleset flag).
+ */
+ static int hook_sb_mount(const char *const dev_name,
+- const struct path *const path, const char *const type,
+- const unsigned long flags, void *const data)
++ const struct path *const path, const char *const type,
++ const unsigned long flags, void *const data)
+ {
+ if (!landlock_get_current_domain())
+ return 0;
+@@ -468,7 +510,7 @@ static int hook_sb_mount(const char *const dev_name,
+ }
+
+ static int hook_move_mount(const struct path *const from_path,
+- const struct path *const to_path)
++ const struct path *const to_path)
+ {
+ if (!landlock_get_current_domain())
+ return 0;
+@@ -502,7 +544,7 @@ static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
+ * view of the filesystem.
+ */
+ static int hook_sb_pivotroot(const struct path *const old_path,
+- const struct path *const new_path)
++ const struct path *const new_path)
+ {
+ if (!landlock_get_current_domain())
+ return 0;
+@@ -511,7 +553,7 @@ static int hook_sb_pivotroot(const struct path *const old_path,
+
+ /* Path hooks */
+
+-static inline u32 get_mode_access(const umode_t mode)
++static inline access_mask_t get_mode_access(const umode_t mode)
+ {
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+@@ -545,8 +587,8 @@ static inline u32 get_mode_access(const umode_t mode)
+ * deal with that.
+ */
+ static int hook_path_link(struct dentry *const old_dentry,
+- const struct path *const new_dir,
+- struct dentry *const new_dentry)
++ const struct path *const new_dir,
++ struct dentry *const new_dentry)
+ {
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+@@ -559,22 +601,23 @@ static int hook_path_link(struct dentry *const old_dentry,
+ return -EXDEV;
+ if (unlikely(d_is_negative(old_dentry)))
+ return -ENOENT;
+- return check_access_path(dom, new_dir,
+- get_mode_access(d_backing_inode(old_dentry)->i_mode));
++ return check_access_path(
++ dom, new_dir,
++ get_mode_access(d_backing_inode(old_dentry)->i_mode));
+ }
+
+-static inline u32 maybe_remove(const struct dentry *const dentry)
++static inline access_mask_t maybe_remove(const struct dentry *const dentry)
+ {
+ if (d_is_negative(dentry))
+ return 0;
+ return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
+- LANDLOCK_ACCESS_FS_REMOVE_FILE;
++ LANDLOCK_ACCESS_FS_REMOVE_FILE;
+ }
+
+ static int hook_path_rename(const struct path *const old_dir,
+- struct dentry *const old_dentry,
+- const struct path *const new_dir,
+- struct dentry *const new_dentry)
++ struct dentry *const old_dentry,
++ const struct path *const new_dir,
++ struct dentry *const new_dentry)
+ {
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+@@ -588,20 +631,21 @@ static int hook_path_rename(const struct path *const old_dir,
+ if (unlikely(d_is_negative(old_dentry)))
+ return -ENOENT;
+ /* RENAME_EXCHANGE is handled because directories are the same. */
+- return check_access_path(dom, old_dir, maybe_remove(old_dentry) |
+- maybe_remove(new_dentry) |
++ return check_access_path(
++ dom, old_dir,
++ maybe_remove(old_dentry) | maybe_remove(new_dentry) |
+ get_mode_access(d_backing_inode(old_dentry)->i_mode));
+ }
+
+ static int hook_path_mkdir(const struct path *const dir,
+- struct dentry *const dentry, const umode_t mode)
++ struct dentry *const dentry, const umode_t mode)
+ {
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
+ }
+
+ static int hook_path_mknod(const struct path *const dir,
+- struct dentry *const dentry, const umode_t mode,
+- const unsigned int dev)
++ struct dentry *const dentry, const umode_t mode,
++ const unsigned int dev)
+ {
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+@@ -612,28 +656,29 @@ static int hook_path_mknod(const struct path *const dir,
+ }
+
+ static int hook_path_symlink(const struct path *const dir,
+- struct dentry *const dentry, const char *const old_name)
++ struct dentry *const dentry,
++ const char *const old_name)
+ {
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
+ }
+
+ static int hook_path_unlink(const struct path *const dir,
+- struct dentry *const dentry)
++ struct dentry *const dentry)
+ {
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
+ }
+
+ static int hook_path_rmdir(const struct path *const dir,
+- struct dentry *const dentry)
++ struct dentry *const dentry)
+ {
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
+ }
+
+ /* File hooks */
+
+-static inline u32 get_file_access(const struct file *const file)
++static inline access_mask_t get_file_access(const struct file *const file)
+ {
+- u32 access = 0;
++ access_mask_t access = 0;
+
+ if (file->f_mode & FMODE_READ) {
+ /* A directory can only be opened in read mode. */
+@@ -688,5 +733,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+ __init void landlock_add_fs_hooks(void)
+ {
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+- LANDLOCK_NAME);
++ LANDLOCK_NAME);
+ }
+diff --git a/security/landlock/fs.h b/security/landlock/fs.h
+index 187284b421c9d..8db7acf9109b6 100644
+--- a/security/landlock/fs.h
++++ b/security/landlock/fs.h
+@@ -50,14 +50,14 @@ struct landlock_superblock_security {
+ atomic_long_t inode_refs;
+ };
+
+-static inline struct landlock_inode_security *landlock_inode(
+- const struct inode *const inode)
++static inline struct landlock_inode_security *
++landlock_inode(const struct inode *const inode)
+ {
+ return inode->i_security + landlock_blob_sizes.lbs_inode;
+ }
+
+-static inline struct landlock_superblock_security *landlock_superblock(
+- const struct super_block *const superblock)
++static inline struct landlock_superblock_security *
++landlock_superblock(const struct super_block *const superblock)
+ {
+ return superblock->s_security + landlock_blob_sizes.lbs_superblock;
+ }
+@@ -65,6 +65,7 @@ static inline struct landlock_superblock_security *landlock_superblock(
+ __init void landlock_add_fs_hooks(void);
+
+ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+- const struct path *const path, u32 access_hierarchy);
++ const struct path *const path,
++ access_mask_t access_hierarchy);
+
+ #endif /* _SECURITY_LANDLOCK_FS_H */
+diff --git a/security/landlock/limits.h b/security/landlock/limits.h
+index 2a0a1095ee27e..17c2a2e7fe1ef 100644
+--- a/security/landlock/limits.h
++++ b/security/landlock/limits.h
+@@ -9,13 +9,19 @@
+ #ifndef _SECURITY_LANDLOCK_LIMITS_H
+ #define _SECURITY_LANDLOCK_LIMITS_H
+
++#include <linux/bitops.h>
+ #include <linux/limits.h>
+ #include <uapi/linux/landlock.h>
+
+-#define LANDLOCK_MAX_NUM_LAYERS 64
++/* clang-format off */
++
++#define LANDLOCK_MAX_NUM_LAYERS 16
+ #define LANDLOCK_MAX_NUM_RULES U32_MAX
+
+ #define LANDLOCK_LAST_ACCESS_FS LANDLOCK_ACCESS_FS_MAKE_SYM
+ #define LANDLOCK_MASK_ACCESS_FS ((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
++#define LANDLOCK_NUM_ACCESS_FS __const_hweight64(LANDLOCK_MASK_ACCESS_FS)
++
++/* clang-format on */
+
+ #endif /* _SECURITY_LANDLOCK_LIMITS_H */
+diff --git a/security/landlock/object.c b/security/landlock/object.c
+index d674fdf9ff04f..1f50612f01850 100644
+--- a/security/landlock/object.c
++++ b/security/landlock/object.c
+@@ -17,9 +17,9 @@
+
+ #include "object.h"
+
+-struct landlock_object *landlock_create_object(
+- const struct landlock_object_underops *const underops,
+- void *const underobj)
++struct landlock_object *
++landlock_create_object(const struct landlock_object_underops *const underops,
++ void *const underobj)
+ {
+ struct landlock_object *new_object;
+
+diff --git a/security/landlock/object.h b/security/landlock/object.h
+index 3f80674c6c8d3..5f28c35e8aa8c 100644
+--- a/security/landlock/object.h
++++ b/security/landlock/object.h
+@@ -76,9 +76,9 @@ struct landlock_object {
+ };
+ };
+
+-struct landlock_object *landlock_create_object(
+- const struct landlock_object_underops *const underops,
+- void *const underobj);
++struct landlock_object *
++landlock_create_object(const struct landlock_object_underops *const underops,
++ void *const underobj);
+
+ void landlock_put_object(struct landlock_object *const object);
+
+diff --git a/security/landlock/ptrace.c b/security/landlock/ptrace.c
+index f55b82446de21..4c5b9cd712861 100644
+--- a/security/landlock/ptrace.c
++++ b/security/landlock/ptrace.c
+@@ -30,7 +30,7 @@
+ * means a subset of) the @child domain.
+ */
+ static bool domain_scope_le(const struct landlock_ruleset *const parent,
+- const struct landlock_ruleset *const child)
++ const struct landlock_ruleset *const child)
+ {
+ const struct landlock_hierarchy *walker;
+
+@@ -48,7 +48,7 @@ static bool domain_scope_le(const struct landlock_ruleset *const parent,
+ }
+
+ static bool task_is_scoped(const struct task_struct *const parent,
+- const struct task_struct *const child)
++ const struct task_struct *const child)
+ {
+ bool is_scoped;
+ const struct landlock_ruleset *dom_parent, *dom_child;
+@@ -62,7 +62,7 @@ static bool task_is_scoped(const struct task_struct *const parent,
+ }
+
+ static int task_ptrace(const struct task_struct *const parent,
+- const struct task_struct *const child)
++ const struct task_struct *const child)
+ {
+ /* Quick return for non-landlocked tasks. */
+ if (!landlocked(parent))
+@@ -86,7 +86,7 @@ static int task_ptrace(const struct task_struct *const parent,
+ * granted, -errno if denied.
+ */
+ static int hook_ptrace_access_check(struct task_struct *const child,
+- const unsigned int mode)
++ const unsigned int mode)
+ {
+ return task_ptrace(current, child);
+ }
+@@ -116,5 +116,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+ __init void landlock_add_ptrace_hooks(void)
+ {
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+- LANDLOCK_NAME);
++ LANDLOCK_NAME);
+ }
+diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
+index ec72b9262bf38..996484f98bfde 100644
+--- a/security/landlock/ruleset.c
++++ b/security/landlock/ruleset.c
+@@ -28,8 +28,9 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
+ {
+ struct landlock_ruleset *new_ruleset;
+
+- new_ruleset = kzalloc(struct_size(new_ruleset, fs_access_masks,
+- num_layers), GFP_KERNEL_ACCOUNT);
++ new_ruleset =
++ kzalloc(struct_size(new_ruleset, fs_access_masks, num_layers),
++ GFP_KERNEL_ACCOUNT);
+ if (!new_ruleset)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&new_ruleset->usage, 1);
+@@ -44,7 +45,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
+ return new_ruleset;
+ }
+
+-struct landlock_ruleset *landlock_create_ruleset(const u32 fs_access_mask)
++struct landlock_ruleset *
++landlock_create_ruleset(const access_mask_t fs_access_mask)
+ {
+ struct landlock_ruleset *new_ruleset;
+
+@@ -66,11 +68,10 @@ static void build_check_rule(void)
+ BUILD_BUG_ON(rule.num_layers < LANDLOCK_MAX_NUM_LAYERS);
+ }
+
+-static struct landlock_rule *create_rule(
+- struct landlock_object *const object,
+- const struct landlock_layer (*const layers)[],
+- const u32 num_layers,
+- const struct landlock_layer *const new_layer)
++static struct landlock_rule *
++create_rule(struct landlock_object *const object,
++ const struct landlock_layer (*const layers)[], const u32 num_layers,
++ const struct landlock_layer *const new_layer)
+ {
+ struct landlock_rule *new_rule;
+ u32 new_num_layers;
+@@ -85,7 +86,7 @@ static struct landlock_rule *create_rule(
+ new_num_layers = num_layers;
+ }
+ new_rule = kzalloc(struct_size(new_rule, layers, new_num_layers),
+- GFP_KERNEL_ACCOUNT);
++ GFP_KERNEL_ACCOUNT);
+ if (!new_rule)
+ return ERR_PTR(-ENOMEM);
+ RB_CLEAR_NODE(&new_rule->node);
+@@ -94,7 +95,7 @@ static struct landlock_rule *create_rule(
+ new_rule->num_layers = new_num_layers;
+ /* Copies the original layer stack. */
+ memcpy(new_rule->layers, layers,
+- flex_array_size(new_rule, layers, num_layers));
++ flex_array_size(new_rule, layers, num_layers));
+ if (new_layer)
+ /* Adds a copy of @new_layer on the layer stack. */
+ new_rule->layers[new_rule->num_layers - 1] = *new_layer;
+@@ -142,9 +143,9 @@ static void build_check_ruleset(void)
+ * access rights.
+ */
+ static int insert_rule(struct landlock_ruleset *const ruleset,
+- struct landlock_object *const object,
+- const struct landlock_layer (*const layers)[],
+- size_t num_layers)
++ struct landlock_object *const object,
++ const struct landlock_layer (*const layers)[],
++ size_t num_layers)
+ {
+ struct rb_node **walker_node;
+ struct rb_node *parent_node = NULL;
+@@ -156,8 +157,8 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
+ return -ENOENT;
+ walker_node = &(ruleset->root.rb_node);
+ while (*walker_node) {
+- struct landlock_rule *const this = rb_entry(*walker_node,
+- struct landlock_rule, node);
++ struct landlock_rule *const this =
++ rb_entry(*walker_node, struct landlock_rule, node);
+
+ if (this->object != object) {
+ parent_node = *walker_node;
+@@ -194,7 +195,7 @@ static int insert_rule(struct landlock_ruleset *const ruleset,
+ * ruleset and a domain.
+ */
+ new_rule = create_rule(object, &this->layers, this->num_layers,
+- &(*layers)[0]);
++ &(*layers)[0]);
+ if (IS_ERR(new_rule))
+ return PTR_ERR(new_rule);
+ rb_replace_node(&this->node, &new_rule->node, &ruleset->root);
+@@ -228,13 +229,14 @@ static void build_check_layer(void)
+
+ /* @ruleset must be locked by the caller. */
+ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
+- struct landlock_object *const object, const u32 access)
++ struct landlock_object *const object,
++ const access_mask_t access)
+ {
+- struct landlock_layer layers[] = {{
++ struct landlock_layer layers[] = { {
+ .access = access,
+ /* When @level is zero, insert_rule() extends @ruleset. */
+ .level = 0,
+- }};
++ } };
+
+ build_check_layer();
+ return insert_rule(ruleset, object, &layers, ARRAY_SIZE(layers));
+@@ -257,7 +259,7 @@ static void put_hierarchy(struct landlock_hierarchy *hierarchy)
+ }
+
+ static int merge_ruleset(struct landlock_ruleset *const dst,
+- struct landlock_ruleset *const src)
++ struct landlock_ruleset *const src)
+ {
+ struct landlock_rule *walker_rule, *next_rule;
+ int err = 0;
+@@ -282,11 +284,11 @@ static int merge_ruleset(struct landlock_ruleset *const dst,
+ dst->fs_access_masks[dst->num_layers - 1] = src->fs_access_masks[0];
+
+ /* Merges the @src tree. */
+- rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
+- &src->root, node) {
+- struct landlock_layer layers[] = {{
++ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, &src->root,
++ node) {
++ struct landlock_layer layers[] = { {
+ .level = dst->num_layers,
+- }};
++ } };
+
+ if (WARN_ON_ONCE(walker_rule->num_layers != 1)) {
+ err = -EINVAL;
+@@ -298,7 +300,7 @@ static int merge_ruleset(struct landlock_ruleset *const dst,
+ }
+ layers[0].access = walker_rule->layers[0].access;
+ err = insert_rule(dst, walker_rule->object, &layers,
+- ARRAY_SIZE(layers));
++ ARRAY_SIZE(layers));
+ if (err)
+ goto out_unlock;
+ }
+@@ -310,7 +312,7 @@ out_unlock:
+ }
+
+ static int inherit_ruleset(struct landlock_ruleset *const parent,
+- struct landlock_ruleset *const child)
++ struct landlock_ruleset *const child)
+ {
+ struct landlock_rule *walker_rule, *next_rule;
+ int err = 0;
+@@ -325,9 +327,10 @@ static int inherit_ruleset(struct landlock_ruleset *const parent,
+
+ /* Copies the @parent tree. */
+ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
+- &parent->root, node) {
++ &parent->root, node) {
+ err = insert_rule(child, walker_rule->object,
+- &walker_rule->layers, walker_rule->num_layers);
++ &walker_rule->layers,
++ walker_rule->num_layers);
+ if (err)
+ goto out_unlock;
+ }
+@@ -338,7 +341,7 @@ static int inherit_ruleset(struct landlock_ruleset *const parent,
+ }
+ /* Copies the parent layer stack and leaves a space for the new layer. */
+ memcpy(child->fs_access_masks, parent->fs_access_masks,
+- flex_array_size(parent, fs_access_masks, parent->num_layers));
++ flex_array_size(parent, fs_access_masks, parent->num_layers));
+
+ if (WARN_ON_ONCE(!parent->hierarchy)) {
+ err = -EINVAL;
+@@ -358,8 +361,7 @@ static void free_ruleset(struct landlock_ruleset *const ruleset)
+ struct landlock_rule *freeme, *next;
+
+ might_sleep();
+- rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root,
+- node)
++ rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root, node)
+ free_rule(freeme);
+ put_hierarchy(ruleset->hierarchy);
+ kfree(ruleset);
+@@ -397,9 +399,9 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
+ * Returns the intersection of @parent and @ruleset, or returns @parent if
+ * @ruleset is empty, or returns a duplicate of @ruleset if @parent is empty.
+ */
+-struct landlock_ruleset *landlock_merge_ruleset(
+- struct landlock_ruleset *const parent,
+- struct landlock_ruleset *const ruleset)
++struct landlock_ruleset *
++landlock_merge_ruleset(struct landlock_ruleset *const parent,
++ struct landlock_ruleset *const ruleset)
+ {
+ struct landlock_ruleset *new_dom;
+ u32 num_layers;
+@@ -421,8 +423,8 @@ struct landlock_ruleset *landlock_merge_ruleset(
+ new_dom = create_ruleset(num_layers);
+ if (IS_ERR(new_dom))
+ return new_dom;
+- new_dom->hierarchy = kzalloc(sizeof(*new_dom->hierarchy),
+- GFP_KERNEL_ACCOUNT);
++ new_dom->hierarchy =
++ kzalloc(sizeof(*new_dom->hierarchy), GFP_KERNEL_ACCOUNT);
+ if (!new_dom->hierarchy) {
+ err = -ENOMEM;
+ goto out_put_dom;
+@@ -449,9 +451,9 @@ out_put_dom:
+ /*
+ * The returned access has the same lifetime as @ruleset.
+ */
+-const struct landlock_rule *landlock_find_rule(
+- const struct landlock_ruleset *const ruleset,
+- const struct landlock_object *const object)
++const struct landlock_rule *
++landlock_find_rule(const struct landlock_ruleset *const ruleset,
++ const struct landlock_object *const object)
+ {
+ const struct rb_node *node;
+
+@@ -459,8 +461,8 @@ const struct landlock_rule *landlock_find_rule(
+ return NULL;
+ node = ruleset->root.rb_node;
+ while (node) {
+- struct landlock_rule *this = rb_entry(node,
+- struct landlock_rule, node);
++ struct landlock_rule *this =
++ rb_entry(node, struct landlock_rule, node);
+
+ if (this->object == object)
+ return this;
+diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
+index 2d3ed7ec5a0ab..d43231b783e4f 100644
+--- a/security/landlock/ruleset.h
++++ b/security/landlock/ruleset.h
+@@ -9,13 +9,26 @@
+ #ifndef _SECURITY_LANDLOCK_RULESET_H
+ #define _SECURITY_LANDLOCK_RULESET_H
+
++#include <linux/bitops.h>
++#include <linux/build_bug.h>
+ #include <linux/mutex.h>
+ #include <linux/rbtree.h>
+ #include <linux/refcount.h>
+ #include <linux/workqueue.h>
+
++#include "limits.h"
+ #include "object.h"
+
++typedef u16 access_mask_t;
++/* Makes sure all filesystem access rights can be stored. */
++static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
++/* Makes sure for_each_set_bit() and for_each_clear_bit() calls are OK. */
++static_assert(sizeof(unsigned long) >= sizeof(access_mask_t));
++
++typedef u16 layer_mask_t;
++/* Makes sure all layers can be checked. */
++static_assert(BITS_PER_TYPE(layer_mask_t) >= LANDLOCK_MAX_NUM_LAYERS);
++
+ /**
+ * struct landlock_layer - Access rights for a given layer
+ */
+@@ -28,7 +41,7 @@ struct landlock_layer {
+ * @access: Bitfield of allowed actions on the kernel object. They are
+ * relative to the object type (e.g. %LANDLOCK_ACTION_FS_READ).
+ */
+- u16 access;
++ access_mask_t access;
+ };
+
+ /**
+@@ -135,26 +148,28 @@ struct landlock_ruleset {
+ * layers are set once and never changed for the
+ * lifetime of the ruleset.
+ */
+- u16 fs_access_masks[];
++ access_mask_t fs_access_masks[];
+ };
+ };
+ };
+
+-struct landlock_ruleset *landlock_create_ruleset(const u32 fs_access_mask);
++struct landlock_ruleset *
++landlock_create_ruleset(const access_mask_t fs_access_mask);
+
+ void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
+ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
+
+ int landlock_insert_rule(struct landlock_ruleset *const ruleset,
+- struct landlock_object *const object, const u32 access);
++ struct landlock_object *const object,
++ const access_mask_t access);
+
+-struct landlock_ruleset *landlock_merge_ruleset(
+- struct landlock_ruleset *const parent,
+- struct landlock_ruleset *const ruleset);
++struct landlock_ruleset *
++landlock_merge_ruleset(struct landlock_ruleset *const parent,
++ struct landlock_ruleset *const ruleset);
+
+-const struct landlock_rule *landlock_find_rule(
+- const struct landlock_ruleset *const ruleset,
+- const struct landlock_object *const object);
++const struct landlock_rule *
++landlock_find_rule(const struct landlock_ruleset *const ruleset,
++ const struct landlock_object *const object);
+
+ static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
+ {
+diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
+index 32396962f04d6..507d43827afed 100644
+--- a/security/landlock/syscalls.c
++++ b/security/landlock/syscalls.c
+@@ -43,9 +43,10 @@
+ * @src: User space pointer or NULL.
+ * @usize: (Alleged) size of the data pointed to by @src.
+ */
+-static __always_inline int copy_min_struct_from_user(void *const dst,
+- const size_t ksize, const size_t ksize_min,
+- const void __user *const src, const size_t usize)
++static __always_inline int
++copy_min_struct_from_user(void *const dst, const size_t ksize,
++ const size_t ksize_min, const void __user *const src,
++ const size_t usize)
+ {
+ /* Checks buffer inconsistencies. */
+ BUILD_BUG_ON(!dst);
+@@ -93,7 +94,7 @@ static void build_check_abi(void)
+ /* Ruleset handling */
+
+ static int fop_ruleset_release(struct inode *const inode,
+- struct file *const filp)
++ struct file *const filp)
+ {
+ struct landlock_ruleset *ruleset = filp->private_data;
+
+@@ -102,15 +103,15 @@ static int fop_ruleset_release(struct inode *const inode,
+ }
+
+ static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
+- const size_t size, loff_t *const ppos)
++ const size_t size, loff_t *const ppos)
+ {
+ /* Dummy handler to enable FMODE_CAN_READ. */
+ return -EINVAL;
+ }
+
+ static ssize_t fop_dummy_write(struct file *const filp,
+- const char __user *const buf, const size_t size,
+- loff_t *const ppos)
++ const char __user *const buf, const size_t size,
++ loff_t *const ppos)
+ {
+ /* Dummy handler to enable FMODE_CAN_WRITE. */
+ return -EINVAL;
+@@ -128,7 +129,7 @@ static const struct file_operations ruleset_fops = {
+ .write = fop_dummy_write,
+ };
+
+-#define LANDLOCK_ABI_VERSION 1
++#define LANDLOCK_ABI_VERSION 1
+
+ /**
+ * sys_landlock_create_ruleset - Create a new ruleset
+@@ -168,22 +169,23 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
+ return -EOPNOTSUPP;
+
+ if (flags) {
+- if ((flags == LANDLOCK_CREATE_RULESET_VERSION)
+- && !attr && !size)
++ if ((flags == LANDLOCK_CREATE_RULESET_VERSION) && !attr &&
++ !size)
+ return LANDLOCK_ABI_VERSION;
+ return -EINVAL;
+ }
+
+ /* Copies raw user space buffer. */
+ err = copy_min_struct_from_user(&ruleset_attr, sizeof(ruleset_attr),
+- offsetofend(typeof(ruleset_attr), handled_access_fs),
+- attr, size);
++ offsetofend(typeof(ruleset_attr),
++ handled_access_fs),
++ attr, size);
+ if (err)
+ return err;
+
+ /* Checks content (and 32-bits cast). */
+ if ((ruleset_attr.handled_access_fs | LANDLOCK_MASK_ACCESS_FS) !=
+- LANDLOCK_MASK_ACCESS_FS)
++ LANDLOCK_MASK_ACCESS_FS)
+ return -EINVAL;
+
+ /* Checks arguments and transforms to kernel struct. */
+@@ -192,8 +194,8 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
+ return PTR_ERR(ruleset);
+
+ /* Creates anonymous FD referring to the ruleset. */
+- ruleset_fd = anon_inode_getfd("landlock-ruleset", &ruleset_fops,
+- ruleset, O_RDWR | O_CLOEXEC);
++ ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops,
++ ruleset, O_RDWR | O_CLOEXEC);
+ if (ruleset_fd < 0)
+ landlock_put_ruleset(ruleset);
+ return ruleset_fd;
+@@ -204,7 +206,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset,
+ * landlock_put_ruleset() on the return value.
+ */
+ static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
+- const fmode_t mode)
++ const fmode_t mode)
+ {
+ struct fd ruleset_f;
+ struct landlock_ruleset *ruleset;
+@@ -244,8 +246,8 @@ static int get_path_from_fd(const s32 fd, struct path *const path)
+ struct fd f;
+ int err = 0;
+
+- BUILD_BUG_ON(!__same_type(fd,
+- ((struct landlock_path_beneath_attr *)NULL)->parent_fd));
++ BUILD_BUG_ON(!__same_type(
++ fd, ((struct landlock_path_beneath_attr *)NULL)->parent_fd));
+
+ /* Handles O_PATH. */
+ f = fdget_raw(fd);
+@@ -257,10 +259,10 @@ static int get_path_from_fd(const s32 fd, struct path *const path)
+ * pipefs).
+ */
+ if ((f.file->f_op == &ruleset_fops) ||
+- (f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
+- (f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
+- d_is_negative(f.file->f_path.dentry) ||
+- IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) {
++ (f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
++ (f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
++ d_is_negative(f.file->f_path.dentry) ||
++ IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) {
+ err = -EBADFD;
+ goto out_fdput;
+ }
+@@ -290,19 +292,18 @@ out_fdput:
+ *
+ * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
+ * - EINVAL: @flags is not 0, or inconsistent access in the rule (i.e.
+- * &landlock_path_beneath_attr.allowed_access is not a subset of the rule's
+- * accesses);
++ * &landlock_path_beneath_attr.allowed_access is not a subset of the
++ * ruleset handled accesses);
+ * - ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access);
+ * - EBADF: @ruleset_fd is not a file descriptor for the current thread, or a
+ * member of @rule_attr is not a file descriptor as expected;
+ * - EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of
+- * @rule_attr is not the expected file descriptor type (e.g. file open
+- * without O_PATH);
++ * @rule_attr is not the expected file descriptor type;
+ * - EPERM: @ruleset_fd has no write access to the underlying ruleset;
+ * - EFAULT: @rule_attr inconsistency.
+ */
+-SYSCALL_DEFINE4(landlock_add_rule,
+- const int, ruleset_fd, const enum landlock_rule_type, rule_type,
++SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
++ const enum landlock_rule_type, rule_type,
+ const void __user *const, rule_attr, const __u32, flags)
+ {
+ struct landlock_path_beneath_attr path_beneath_attr;
+@@ -317,20 +318,24 @@ SYSCALL_DEFINE4(landlock_add_rule,
+ if (flags)
+ return -EINVAL;
+
+- if (rule_type != LANDLOCK_RULE_PATH_BENEATH)
+- return -EINVAL;
+-
+- /* Copies raw user space buffer, only one type for now. */
+- res = copy_from_user(&path_beneath_attr, rule_attr,
+- sizeof(path_beneath_attr));
+- if (res)
+- return -EFAULT;
+-
+ /* Gets and checks the ruleset. */
+ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
++ if (rule_type != LANDLOCK_RULE_PATH_BENEATH) {
++ err = -EINVAL;
++ goto out_put_ruleset;
++ }
++
++ /* Copies raw user space buffer, only one type for now. */
++ res = copy_from_user(&path_beneath_attr, rule_attr,
++ sizeof(path_beneath_attr));
++ if (res) {
++ err = -EFAULT;
++ goto out_put_ruleset;
++ }
++
+ /*
+ * Informs about useless rule: empty allowed_access (i.e. deny rules)
+ * are ignored in path walks.
+@@ -344,7 +349,7 @@ SYSCALL_DEFINE4(landlock_add_rule,
+ * (ruleset->fs_access_masks[0] is automatically upgraded to 64-bits).
+ */
+ if ((path_beneath_attr.allowed_access | ruleset->fs_access_masks[0]) !=
+- ruleset->fs_access_masks[0]) {
++ ruleset->fs_access_masks[0]) {
+ err = -EINVAL;
+ goto out_put_ruleset;
+ }
+@@ -356,7 +361,7 @@ SYSCALL_DEFINE4(landlock_add_rule,
+
+ /* Imports the new rule. */
+ err = landlock_append_fs_rule(ruleset, &path,
+- path_beneath_attr.allowed_access);
++ path_beneath_attr.allowed_access);
+ path_put(&path);
+
+ out_put_ruleset:
+@@ -389,8 +394,8 @@ out_put_ruleset:
+ * - E2BIG: The maximum number of stacked rulesets is reached for the current
+ * thread.
+ */
+-SYSCALL_DEFINE2(landlock_restrict_self,
+- const int, ruleset_fd, const __u32, flags)
++SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
++ flags)
+ {
+ struct landlock_ruleset *new_dom, *ruleset;
+ struct cred *new_cred;
+@@ -400,18 +405,18 @@ SYSCALL_DEFINE2(landlock_restrict_self,
+ if (!landlock_initialized)
+ return -EOPNOTSUPP;
+
+- /* No flag for now. */
+- if (flags)
+- return -EINVAL;
+-
+ /*
+ * Similar checks as for seccomp(2), except that an -EPERM may be
+ * returned.
+ */
+ if (!task_no_new_privs(current) &&
+- !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
++ !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
+ return -EPERM;
+
++ /* No flag for now. */
++ if (flags)
++ return -EINVAL;
++
+ /* Gets and checks the ruleset. */
+ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
+ if (IS_ERR(ruleset))
+diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
+index b12f7d986b1e3..5fce105a372d3 100644
+--- a/security/loadpin/loadpin.c
++++ b/security/loadpin/loadpin.c
+@@ -118,21 +118,11 @@ static void loadpin_sb_free_security(struct super_block *mnt_sb)
+ }
+ }
+
+-static int loadpin_read_file(struct file *file, enum kernel_read_file_id id,
+- bool contents)
++static int loadpin_check(struct file *file, enum kernel_read_file_id id)
+ {
+ struct super_block *load_root;
+ const char *origin = kernel_read_file_id_str(id);
+
+- /*
+- * If we will not know that we'll be seeing the full contents
+- * then we cannot trust a load will be complete and unchanged
+- * off disk. Treat all contents=false hooks as if there were
+- * no associated file struct.
+- */
+- if (!contents)
+- file = NULL;
+-
+ /* If the file id is excluded, ignore the pinning. */
+ if ((unsigned int)id < ARRAY_SIZE(ignore_read_file_id) &&
+ ignore_read_file_id[id]) {
+@@ -187,9 +177,25 @@ static int loadpin_read_file(struct file *file, enum kernel_read_file_id id,
+ return 0;
+ }
+
++static int loadpin_read_file(struct file *file, enum kernel_read_file_id id,
++ bool contents)
++{
++ /*
++ * LoadPin only cares about the _origin_ of a file, not its
++ * contents, so we can ignore the "are full contents available"
++ * argument here.
++ */
++ return loadpin_check(file, id);
++}
++
+ static int loadpin_load_data(enum kernel_load_data_id id, bool contents)
+ {
+- return loadpin_read_file(NULL, (enum kernel_read_file_id) id, contents);
++ /*
++ * LoadPin only cares about the _origin_ of a file, not its
++ * contents, so a NULL file is passed, and we can ignore the
++ * state of "contents".
++ */
++ return loadpin_check(NULL, (enum kernel_read_file_id) id);
+ }
+
+ static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = {
+diff --git a/security/security.c b/security/security.c
+index 9ffa9e9c5c554..a97079e12c674 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
+ [LOCKDOWN_DEBUGFS] = "debugfs access",
+ [LOCKDOWN_XMON_WR] = "xmon write access",
+ [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
++ [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
+ [LOCKDOWN_INTEGRITY_MAX] = "integrity",
+ [LOCKDOWN_KCORE] = "/proc/kcore access",
+ [LOCKDOWN_KPROBES] = "use of kprobes",
+ [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM",
++ [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
+ [LOCKDOWN_PERF] = "unsafe use of perf",
+ [LOCKDOWN_TRACEFS] = "use of tracefs",
+ [LOCKDOWN_XMON_RW] = "xmon read and write access",
+@@ -747,25 +749,25 @@ static int lsm_superblock_alloc(struct super_block *sb)
+
+ /* Security operations */
+
+-int security_binder_set_context_mgr(struct task_struct *mgr)
++int security_binder_set_context_mgr(const struct cred *mgr)
+ {
+ return call_int_hook(binder_set_context_mgr, 0, mgr);
+ }
+
+-int security_binder_transaction(struct task_struct *from,
+- struct task_struct *to)
++int security_binder_transaction(const struct cred *from,
++ const struct cred *to)
+ {
+ return call_int_hook(binder_transaction, 0, from, to);
+ }
+
+-int security_binder_transfer_binder(struct task_struct *from,
+- struct task_struct *to)
++int security_binder_transfer_binder(const struct cred *from,
++ const struct cred *to)
+ {
+ return call_int_hook(binder_transfer_binder, 0, from, to);
+ }
+
+-int security_binder_transfer_file(struct task_struct *from,
+- struct task_struct *to, struct file *file)
++int security_binder_transfer_file(const struct cred *from,
++ const struct cred *to, struct file *file)
+ {
+ return call_int_hook(binder_transfer_file, 0, from, to, file);
+ }
+@@ -884,9 +886,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+ return call_int_hook(fs_context_dup, 0, fc, src_fc);
+ }
+
+-int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param)
++int security_fs_context_parse_param(struct fs_context *fc,
++ struct fs_parameter *param)
+ {
+- return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param);
++ struct security_hook_list *hp;
++ int trc;
++ int rc = -ENOPARAM;
++
++ hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param,
++ list) {
++ trc = hp->hook.fs_context_parse_param(fc, param);
++ if (trc == 0)
++ rc = 0;
++ else if (trc != -ENOPARAM)
++ return trc;
++ }
++ return rc;
+ }
+
+ int security_sb_alloc(struct super_block *sb)
+@@ -1577,12 +1592,13 @@ static inline unsigned long mmap_prot(struct file *file, unsigned long prot)
+ int security_mmap_file(struct file *file, unsigned long prot,
+ unsigned long flags)
+ {
++ unsigned long prot_adj = mmap_prot(file, prot);
+ int ret;
+- ret = call_int_hook(mmap_file, 0, file, prot,
+- mmap_prot(file, prot), flags);
++
++ ret = call_int_hook(mmap_file, 0, file, prot, prot_adj, flags);
+ if (ret)
+ return ret;
+- return ima_file_mmap(file, prot);
++ return ima_file_mmap(file, prot, prot_adj, flags);
+ }
+
+ int security_mmap_addr(unsigned long addr)
+diff --git a/security/selinux/Makefile b/security/selinux/Makefile
+index 7761624448826..8b21520bd4b9f 100644
+--- a/security/selinux/Makefile
++++ b/security/selinux/Makefile
+@@ -23,8 +23,12 @@ ccflags-y := -I$(srctree)/security/selinux -I$(srctree)/security/selinux/include
+ $(addprefix $(obj)/,$(selinux-y)): $(obj)/flask.h
+
+ quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h
+- cmd_flask = scripts/selinux/genheaders/genheaders $(obj)/flask.h $(obj)/av_permissions.h
++ cmd_flask = $< $(obj)/flask.h $(obj)/av_permissions.h
+
+ targets += flask.h av_permissions.h
+-$(obj)/flask.h: $(src)/include/classmap.h FORCE
++# once make >= 4.3 is required, we can use grouped targets in the rule below,
++# which basically involves adding both headers and a '&' before the colon, see
++# the example below:
++# $(obj)/flask.h $(obj)/av_permissions.h &: scripts/selinux/...
++$(obj)/flask.h: scripts/selinux/genheaders/genheaders FORCE
+ $(call if_changed,flask)
+diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
+index e7ebd45ca3457..9ce029b2f2267 100644
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -255,29 +255,6 @@ static inline u32 task_sid_obj(const struct task_struct *task)
+ return sid;
+ }
+
+-/*
+- * get the security ID of a task for use with binder
+- */
+-static inline u32 task_sid_binder(const struct task_struct *task)
+-{
+- /*
+- * In many case where this function is used we should be using the
+- * task's subjective SID, but we can't reliably access the subjective
+- * creds of a task other than our own so we must use the objective
+- * creds/SID, which are safe to access. The downside is that if a task
+- * is temporarily overriding it's creds it will not be reflected here;
+- * however, it isn't clear that binder would handle that case well
+- * anyway.
+- *
+- * If this ever changes and we can safely reference the subjective
+- * creds/SID of another task, this function will make it easier to
+- * identify the various places where we make use of the task SIDs in
+- * the binder code. It is also likely that we will need to adjust
+- * the main drivers/android binder code as well.
+- */
+- return task_sid_obj(task);
+-}
+-
+ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry);
+
+ /*
+@@ -378,6 +355,10 @@ static void inode_free_security(struct inode *inode)
+
+ struct selinux_mnt_opts {
+ const char *fscontext, *context, *rootcontext, *defcontext;
++ u32 fscontext_sid;
++ u32 context_sid;
++ u32 rootcontext_sid;
++ u32 defcontext_sid;
+ };
+
+ static void selinux_free_mnt_opts(void *mnt_opts)
+@@ -515,7 +496,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb)
+
+ static int sb_check_xattr_support(struct super_block *sb)
+ {
+- struct superblock_security_struct *sbsec = sb->s_security;
++ struct superblock_security_struct *sbsec = selinux_superblock(sb);
+ struct dentry *root = sb->s_root;
+ struct inode *root_inode = d_backing_inode(root);
+ u32 sid;
+@@ -641,7 +622,7 @@ static int parse_sid(struct super_block *sb, const char *s, u32 *sid)
+ if (rc)
+ pr_warn("SELinux: security_context_str_to_sid"
+ "(%s) failed for (dev %s, type %s) errno=%d\n",
+- s, sb->s_id, sb->s_type->name, rc);
++ s, sb ? sb->s_id : "?", sb ? sb->s_type->name : "?", rc);
+ return rc;
+ }
+
+@@ -1005,42 +986,58 @@ out:
+ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
+ {
+ struct selinux_mnt_opts *opts = *mnt_opts;
++ bool is_alloc_opts = false;
+
+ if (token == Opt_seclabel) /* eaten and completely ignored */
+ return 0;
+
++ if (!s)
++ return -ENOMEM;
++
+ if (!opts) {
+ opts = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+ *mnt_opts = opts;
++ is_alloc_opts = true;
+ }
+- if (!s)
+- return -ENOMEM;
++
+ switch (token) {
+ case Opt_context:
+ if (opts->context || opts->defcontext)
+ goto Einval;
+ opts->context = s;
++ if (selinux_initialized(&selinux_state))
++ parse_sid(NULL, s, &opts->context_sid);
+ break;
+ case Opt_fscontext:
+ if (opts->fscontext)
+ goto Einval;
+ opts->fscontext = s;
++ if (selinux_initialized(&selinux_state))
++ parse_sid(NULL, s, &opts->fscontext_sid);
+ break;
+ case Opt_rootcontext:
+ if (opts->rootcontext)
+ goto Einval;
+ opts->rootcontext = s;
++ if (selinux_initialized(&selinux_state))
++ parse_sid(NULL, s, &opts->rootcontext_sid);
+ break;
+ case Opt_defcontext:
+ if (opts->context || opts->defcontext)
+ goto Einval;
+ opts->defcontext = s;
++ if (selinux_initialized(&selinux_state))
++ parse_sid(NULL, s, &opts->defcontext_sid);
+ break;
+ }
+ return 0;
+ Einval:
++ if (is_alloc_opts) {
++ kfree(opts);
++ *mnt_opts = NULL;
++ }
+ pr_warn(SEL_MOUNT_FAIL_MSG);
+ return -EINVAL;
+ }
+@@ -2066,18 +2063,19 @@ static inline u32 open_file_to_av(struct file *file)
+
+ /* Hook functions begin here. */
+
+-static int selinux_binder_set_context_mgr(struct task_struct *mgr)
++static int selinux_binder_set_context_mgr(const struct cred *mgr)
+ {
+ return avc_has_perm(&selinux_state,
+- current_sid(), task_sid_binder(mgr), SECCLASS_BINDER,
++ current_sid(), cred_sid(mgr), SECCLASS_BINDER,
+ BINDER__SET_CONTEXT_MGR, NULL);
+ }
+
+-static int selinux_binder_transaction(struct task_struct *from,
+- struct task_struct *to)
++static int selinux_binder_transaction(const struct cred *from,
++ const struct cred *to)
+ {
+ u32 mysid = current_sid();
+- u32 fromsid = task_sid_binder(from);
++ u32 fromsid = cred_sid(from);
++ u32 tosid = cred_sid(to);
+ int rc;
+
+ if (mysid != fromsid) {
+@@ -2088,24 +2086,24 @@ static int selinux_binder_transaction(struct task_struct *from,
+ return rc;
+ }
+
+- return avc_has_perm(&selinux_state, fromsid, task_sid_binder(to),
++ return avc_has_perm(&selinux_state, fromsid, tosid,
+ SECCLASS_BINDER, BINDER__CALL, NULL);
+ }
+
+-static int selinux_binder_transfer_binder(struct task_struct *from,
+- struct task_struct *to)
++static int selinux_binder_transfer_binder(const struct cred *from,
++ const struct cred *to)
+ {
+ return avc_has_perm(&selinux_state,
+- task_sid_binder(from), task_sid_binder(to),
++ cred_sid(from), cred_sid(to),
+ SECCLASS_BINDER, BINDER__TRANSFER,
+ NULL);
+ }
+
+-static int selinux_binder_transfer_file(struct task_struct *from,
+- struct task_struct *to,
++static int selinux_binder_transfer_file(const struct cred *from,
++ const struct cred *to,
+ struct file *file)
+ {
+- u32 sid = task_sid_binder(to);
++ u32 sid = cred_sid(to);
+ struct file_security_struct *fsec = selinux_file(file);
+ struct dentry *dentry = file->f_path.dentry;
+ struct inode_security_struct *isec;
+@@ -2704,9 +2702,7 @@ free_opt:
+ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
+ {
+ struct selinux_mnt_opts *opts = mnt_opts;
+- struct superblock_security_struct *sbsec = sb->s_security;
+- u32 sid;
+- int rc;
++ struct superblock_security_struct *sbsec = selinux_superblock(sb);
+
+ /*
+ * Superblock not initialized (i.e. no options) - reject if any
+@@ -2723,34 +2719,36 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
+ return (sbsec->flags & SE_MNTMASK) ? 1 : 0;
+
+ if (opts->fscontext) {
+- rc = parse_sid(sb, opts->fscontext, &sid);
+- if (rc)
++ if (opts->fscontext_sid == SECSID_NULL)
+ return 1;
+- if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
++ else if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
++ opts->fscontext_sid))
+ return 1;
+ }
+ if (opts->context) {
+- rc = parse_sid(sb, opts->context, &sid);
+- if (rc)
++ if (opts->context_sid == SECSID_NULL)
+ return 1;
+- if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
++ else if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
++ opts->context_sid))
+ return 1;
+ }
+ if (opts->rootcontext) {
+- struct inode_security_struct *root_isec;
+-
+- root_isec = backing_inode_security(sb->s_root);
+- rc = parse_sid(sb, opts->rootcontext, &sid);
+- if (rc)
+- return 1;
+- if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
++ if (opts->rootcontext_sid == SECSID_NULL)
+ return 1;
++ else {
++ struct inode_security_struct *root_isec;
++
++ root_isec = backing_inode_security(sb->s_root);
++ if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
++ opts->rootcontext_sid))
++ return 1;
++ }
+ }
+ if (opts->defcontext) {
+- rc = parse_sid(sb, opts->defcontext, &sid);
+- if (rc)
++ if (opts->defcontext_sid == SECSID_NULL)
+ return 1;
+- if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
++ else if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
++ opts->defcontext_sid))
+ return 1;
+ }
+ return 0;
+@@ -2917,10 +2915,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc,
+ return opt;
+
+ rc = selinux_add_opt(opt, param->string, &fc->security);
+- if (!rc) {
++ if (!rc)
+ param->string = NULL;
+- rc = 1;
+- }
++
+ return rc;
+ }
+
+@@ -3798,6 +3795,12 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
+ CAP_OPT_NONE, true);
+ break;
+
++ case FIOCLEX:
++ case FIONCLEX:
++ if (!selinux_policycap_ioctl_skip_cloexec())
++ error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd);
++ break;
++
+ /* default case assumes that the command will go
+ * to the file's ioctl() function.
+ */
+@@ -5829,7 +5832,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
+ struct common_audit_data ad;
+ struct lsm_network_audit net = {0,};
+ char *addrp;
+- u8 proto;
++ u8 proto = 0;
+
+ if (sk == NULL)
+ return NF_ACCEPT;
+diff --git a/security/selinux/ima.c b/security/selinux/ima.c
+index 727c4e43219d7..ff7aea6b3774a 100644
+--- a/security/selinux/ima.c
++++ b/security/selinux/ima.c
+@@ -77,7 +77,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
+ size_t policy_len;
+ int rc = 0;
+
+- WARN_ON(!mutex_is_locked(&state->policy_mutex));
++ lockdep_assert_held(&state->policy_mutex);
+
+ state_str = selinux_ima_collect_state(state);
+ if (!state_str) {
+@@ -117,7 +117,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
+ */
+ void selinux_ima_measure_state(struct selinux_state *state)
+ {
+- WARN_ON(mutex_is_locked(&state->policy_mutex));
++ lockdep_assert_not_held(&state->policy_mutex);
+
+ mutex_lock(&state->policy_mutex);
+ selinux_ima_measure_state_locked(state);
+diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h
+index 2ec038efbb03c..a9e572ca4fd96 100644
+--- a/security/selinux/include/policycap.h
++++ b/security/selinux/include/policycap.h
+@@ -11,6 +11,7 @@ enum {
+ POLICYDB_CAPABILITY_CGROUPSECLABEL,
+ POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION,
+ POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS,
++ POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC,
+ __POLICYDB_CAPABILITY_MAX
+ };
+ #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
+diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h
+index b89289f092c93..ebd64afe1defd 100644
+--- a/security/selinux/include/policycap_names.h
++++ b/security/selinux/include/policycap_names.h
+@@ -12,7 +12,8 @@ const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = {
+ "always_check_network",
+ "cgroup_seclabel",
+ "nnp_nosuid_transition",
+- "genfs_seclabel_symlinks"
++ "genfs_seclabel_symlinks",
++ "ioctl_skip_cloexec"
+ };
+
+ #endif /* _SELINUX_POLICYCAP_NAMES_H_ */
+diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
+index ac0ece01305a6..c0d966020ebdd 100644
+--- a/security/selinux/include/security.h
++++ b/security/selinux/include/security.h
+@@ -219,6 +219,13 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void)
+ return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]);
+ }
+
++static inline bool selinux_policycap_ioctl_skip_cloexec(void)
++{
++ struct selinux_state *state = &selinux_state;
++
++ return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC]);
++}
++
+ struct selinux_policy_convert_data;
+
+ struct selinux_load_state {
+diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
+index e4cd7cb856f37..f2f6203e0fff5 100644
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -2127,6 +2127,8 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc)
+ }
+
+ ret = sel_make_avc_files(dentry);
++ if (ret)
++ goto err;
+
+ dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino);
+ if (IS_ERR(dentry)) {
+diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
+index 2ec6e5cd25d9b..feb206f3acb4a 100644
+--- a/security/selinux/ss/conditional.c
++++ b/security/selinux/ss/conditional.c
+@@ -152,6 +152,8 @@ static void cond_list_destroy(struct policydb *p)
+ for (i = 0; i < p->cond_list_len; i++)
+ cond_node_destroy(&p->cond_list[i]);
+ kfree(p->cond_list);
++ p->cond_list = NULL;
++ p->cond_list_len = 0;
+ }
+
+ void cond_policydb_destroy(struct policydb *p)
+@@ -441,7 +443,6 @@ int cond_read_list(struct policydb *p, void *fp)
+ return 0;
+ err:
+ cond_list_destroy(p);
+- p->cond_list = NULL;
+ return rc;
+ }
+
+diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
+index b8f6b3e0a9219..298098bb9c06d 100644
+--- a/security/selinux/ss/hashtab.c
++++ b/security/selinux/ss/hashtab.c
+@@ -30,13 +30,20 @@ static u32 hashtab_compute_size(u32 nel)
+
+ int hashtab_init(struct hashtab *h, u32 nel_hint)
+ {
+- h->size = hashtab_compute_size(nel_hint);
++ u32 size = hashtab_compute_size(nel_hint);
++
++ /* should already be zeroed, but better be safe */
+ h->nel = 0;
+- if (!h->size)
+- return 0;
++ h->size = 0;
++ h->htable = NULL;
+
+- h->htable = kcalloc(h->size, sizeof(*h->htable), GFP_KERNEL);
+- return h->htable ? 0 : -ENOMEM;
++ if (size) {
++ h->htable = kcalloc(size, sizeof(*h->htable), GFP_KERNEL);
++ if (!h->htable)
++ return -ENOMEM;
++ h->size = size;
++ }
++ return 0;
+ }
+
+ int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
+@@ -171,7 +178,8 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
+ kmem_cache_free(hashtab_node_cachep, cur);
+ }
+ }
+- kmem_cache_free(hashtab_node_cachep, new);
++ kfree(new->htable);
++ memset(new, 0, sizeof(*new));
+ return -ENOMEM;
+ }
+
+diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
+index 0ae1b718194a3..7f7858593bdb4 100644
+--- a/security/selinux/ss/policydb.c
++++ b/security/selinux/ss/policydb.c
+@@ -2011,6 +2011,7 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
+ if (!datum)
+ goto out;
+
++ datum->next = NULL;
+ *dst = datum;
+
+ /* ebitmap_read() will at least init the bitmap */
+@@ -2023,7 +2024,6 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
+ goto out;
+
+ datum->otype = le32_to_cpu(buf[0]);
+- datum->next = NULL;
+
+ dst = &datum->next;
+ }
+diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
+index c24d4e1063ea0..ffc4e7bad2054 100644
+--- a/security/selinux/ss/policydb.h
++++ b/security/selinux/ss/policydb.h
+@@ -370,6 +370,8 @@ static inline int put_entry(const void *buf, size_t bytes, int num, struct polic
+ {
+ size_t len = bytes * num;
+
++ if (len > fp->len)
++ return -EINVAL;
+ memcpy(fp->data, buf, len);
+ fp->data += len;
+ fp->len -= len;
+diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
+index e5f1b2757a83a..01716ed76592c 100644
+--- a/security/selinux/ss/services.c
++++ b/security/selinux/ss/services.c
+@@ -2021,7 +2021,8 @@ static inline int convert_context_handle_invalid_context(
+ * in `newc'. Verify that the context is valid
+ * under the new policy.
+ */
+-static int convert_context(struct context *oldc, struct context *newc, void *p)
++static int convert_context(struct context *oldc, struct context *newc, void *p,
++ gfp_t gfp_flags)
+ {
+ struct convert_context_args *args;
+ struct ocontext *oc;
+@@ -2035,7 +2036,7 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
+ args = p;
+
+ if (oldc->str) {
+- s = kstrdup(oldc->str, GFP_KERNEL);
++ s = kstrdup(oldc->str, gfp_flags);
+ if (!s)
+ return -ENOMEM;
+
+@@ -2376,6 +2377,43 @@ err_policy:
+ return rc;
+ }
+
++/**
++ * ocontext_to_sid - Helper to safely get sid for an ocontext
++ * @sidtab: SID table
++ * @c: ocontext structure
++ * @index: index of the context entry (0 or 1)
++ * @out_sid: pointer to the resulting SID value
++ *
++ * For all ocontexts except OCON_ISID the SID fields are populated
++ * on-demand when needed. Since updating the SID value is an SMP-sensitive
++ * operation, this helper must be used to do that safely.
++ *
++ * WARNING: This function may return -ESTALE, indicating that the caller
++ * must retry the operation after re-acquiring the policy pointer!
++ */
++static int ocontext_to_sid(struct sidtab *sidtab, struct ocontext *c,
++ size_t index, u32 *out_sid)
++{
++ int rc;
++ u32 sid;
++
++ /* Ensure the associated sidtab entry is visible to this thread. */
++ sid = smp_load_acquire(&c->sid[index]);
++ if (!sid) {
++ rc = sidtab_context_to_sid(sidtab, &c->context[index], &sid);
++ if (rc)
++ return rc;
++
++ /*
++ * Ensure the new sidtab entry is visible to other threads
++ * when they see the SID.
++ */
++ smp_store_release(&c->sid[index], sid);
++ }
++ *out_sid = sid;
++ return 0;
++}
++
+ /**
+ * security_port_sid - Obtain the SID for a port.
+ * @state: SELinux state
+@@ -2414,17 +2452,13 @@ retry:
+ }
+
+ if (c) {
+- if (!c->sid[0]) {
+- rc = sidtab_context_to_sid(sidtab, &c->context[0],
+- &c->sid[0]);
+- if (rc == -ESTALE) {
+- rcu_read_unlock();
+- goto retry;
+- }
+- if (rc)
+- goto out;
++ rc = ocontext_to_sid(sidtab, c, 0, out_sid);
++ if (rc == -ESTALE) {
++ rcu_read_unlock();
++ goto retry;
+ }
+- *out_sid = c->sid[0];
++ if (rc)
++ goto out;
+ } else {
+ *out_sid = SECINITSID_PORT;
+ }
+@@ -2473,18 +2507,13 @@ retry:
+ }
+
+ if (c) {
+- if (!c->sid[0]) {
+- rc = sidtab_context_to_sid(sidtab,
+- &c->context[0],
+- &c->sid[0]);
+- if (rc == -ESTALE) {
+- rcu_read_unlock();
+- goto retry;
+- }
+- if (rc)
+- goto out;
++ rc = ocontext_to_sid(sidtab, c, 0, out_sid);
++ if (rc == -ESTALE) {
++ rcu_read_unlock();
++ goto retry;
+ }
+- *out_sid = c->sid[0];
++ if (rc)
++ goto out;
+ } else
+ *out_sid = SECINITSID_UNLABELED;
+
+@@ -2533,17 +2562,13 @@ retry:
+ }
+
+ if (c) {
+- if (!c->sid[0]) {
+- rc = sidtab_context_to_sid(sidtab, &c->context[0],
+- &c->sid[0]);
+- if (rc == -ESTALE) {
+- rcu_read_unlock();
+- goto retry;
+- }
+- if (rc)
+- goto out;
++ rc = ocontext_to_sid(sidtab, c, 0, out_sid);
++ if (rc == -ESTALE) {
++ rcu_read_unlock();
++ goto retry;
+ }
+- *out_sid = c->sid[0];
++ if (rc)
++ goto out;
+ } else
+ *out_sid = SECINITSID_UNLABELED;
+
+@@ -2587,25 +2612,13 @@ retry:
+ }
+
+ if (c) {
+- if (!c->sid[0] || !c->sid[1]) {
+- rc = sidtab_context_to_sid(sidtab, &c->context[0],
+- &c->sid[0]);
+- if (rc == -ESTALE) {
+- rcu_read_unlock();
+- goto retry;
+- }
+- if (rc)
+- goto out;
+- rc = sidtab_context_to_sid(sidtab, &c->context[1],
+- &c->sid[1]);
+- if (rc == -ESTALE) {
+- rcu_read_unlock();
+- goto retry;
+- }
+- if (rc)
+- goto out;
++ rc = ocontext_to_sid(sidtab, c, 0, if_sid);
++ if (rc == -ESTALE) {
++ rcu_read_unlock();
++ goto retry;
+ }
+- *if_sid = c->sid[0];
++ if (rc)
++ goto out;
+ } else
+ *if_sid = SECINITSID_NETIF;
+
+@@ -2697,18 +2710,13 @@ retry:
+ }
+
+ if (c) {
+- if (!c->sid[0]) {
+- rc = sidtab_context_to_sid(sidtab,
+- &c->context[0],
+- &c->sid[0]);
+- if (rc == -ESTALE) {
+- rcu_read_unlock();
+- goto retry;
+- }
+- if (rc)
+- goto out;
++ rc = ocontext_to_sid(sidtab, c, 0, out_sid);
++ if (rc == -ESTALE) {
++ rcu_read_unlock();
++ goto retry;
+ }
+- *out_sid = c->sid[0];
++ if (rc)
++ goto out;
+ } else {
+ *out_sid = SECINITSID_NODE;
+ }
+@@ -2873,7 +2881,7 @@ static inline int __security_genfs_sid(struct selinux_policy *policy,
+ u16 sclass;
+ struct genfs *genfs;
+ struct ocontext *c;
+- int rc, cmp = 0;
++ int cmp = 0;
+
+ while (path[0] == '/' && path[1] == '/')
+ path++;
+@@ -2887,9 +2895,8 @@ static inline int __security_genfs_sid(struct selinux_policy *policy,
+ break;
+ }
+
+- rc = -ENOENT;
+ if (!genfs || cmp)
+- goto out;
++ return -ENOENT;
+
+ for (c = genfs->head; c; c = c->next) {
+ len = strlen(c->u.name);
+@@ -2898,20 +2905,10 @@ static inline int __security_genfs_sid(struct selinux_policy *policy,
+ break;
+ }
+
+- rc = -ENOENT;
+ if (!c)
+- goto out;
+-
+- if (!c->sid[0]) {
+- rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]);
+- if (rc)
+- goto out;
+- }
++ return -ENOENT;
+
+- *sid = c->sid[0];
+- rc = 0;
+-out:
+- return rc;
++ return ocontext_to_sid(sidtab, c, 0, sid);
+ }
+
+ /**
+@@ -2996,17 +2993,13 @@ retry:
+
+ if (c) {
+ sbsec->behavior = c->v.behavior;
+- if (!c->sid[0]) {
+- rc = sidtab_context_to_sid(sidtab, &c->context[0],
+- &c->sid[0]);
+- if (rc == -ESTALE) {
+- rcu_read_unlock();
+- goto retry;
+- }
+- if (rc)
+- goto out;
++ rc = ocontext_to_sid(sidtab, c, 0, &sbsec->sid);
++ if (rc == -ESTALE) {
++ rcu_read_unlock();
++ goto retry;
+ }
+- sbsec->sid = c->sid[0];
++ if (rc)
++ goto out;
+ } else {
+ rc = __security_genfs_sid(policy, fstype, "/",
+ SECCLASS_DIR, &sbsec->sid);
+@@ -4053,6 +4046,7 @@ int security_read_policy(struct selinux_state *state,
+ int security_read_state_kernel(struct selinux_state *state,
+ void **data, size_t *len)
+ {
++ int err;
+ struct selinux_policy *policy;
+
+ policy = rcu_dereference_protected(
+@@ -4065,5 +4059,11 @@ int security_read_state_kernel(struct selinux_state *state,
+ if (!*data)
+ return -ENOMEM;
+
+- return __security_read_policy(policy, *data, len);
++ err = __security_read_policy(policy, *data, len);
++ if (err) {
++ vfree(*data);
++ *data = NULL;
++ *len = 0;
++ }
++ return err;
+ }
+diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
+index 656d50b09f762..1981c5af13e0a 100644
+--- a/security/selinux/ss/sidtab.c
++++ b/security/selinux/ss/sidtab.c
+@@ -325,7 +325,7 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
+ }
+
+ rc = convert->func(context, &dst_convert->context,
+- convert->args);
++ convert->args, GFP_ATOMIC);
+ if (rc) {
+ context_destroy(&dst->context);
+ goto out_unlock;
+@@ -404,7 +404,7 @@ static int sidtab_convert_tree(union sidtab_entry_inner *edst,
+ while (i < SIDTAB_LEAF_ENTRIES && *pos < count) {
+ rc = convert->func(&esrc->ptr_leaf->entries[i].context,
+ &edst->ptr_leaf->entries[i].context,
+- convert->args);
++ convert->args, GFP_KERNEL);
+ if (rc)
+ return rc;
+ (*pos)++;
+diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h
+index 4eff0e49dcb22..9fce0d553fe2c 100644
+--- a/security/selinux/ss/sidtab.h
++++ b/security/selinux/ss/sidtab.h
+@@ -65,7 +65,7 @@ struct sidtab_isid_entry {
+ };
+
+ struct sidtab_convert_params {
+- int (*func)(struct context *oldc, struct context *newc, void *args);
++ int (*func)(struct context *oldc, struct context *newc, void *args, gfp_t gfp_flags);
+ void *args;
+ struct sidtab *target;
+ };
+diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
+index be83e5ce4469c..debe15207d2bf 100644
+--- a/security/selinux/xfrm.c
++++ b/security/selinux/xfrm.c
+@@ -347,7 +347,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+ int rc;
+ struct xfrm_sec_ctx *ctx;
+ char *ctx_str = NULL;
+- int str_len;
++ u32 str_len;
+
+ if (!polsec)
+ return 0;
+diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
+index 21a0e7c3b8dee..3f3f56f6be4da 100644
+--- a/security/smack/smack_lsm.c
++++ b/security/smack/smack_lsm.c
+@@ -2505,7 +2505,7 @@ static int smk_ipv6_check(struct smack_known *subject,
+ #ifdef CONFIG_AUDIT
+ smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
+ ad.a.u.net->family = PF_INET6;
+- ad.a.u.net->dport = ntohs(address->sin6_port);
++ ad.a.u.net->dport = address->sin6_port;
+ if (act == SMK_RECEIVING)
+ ad.a.u.net->v6info.saddr = address->sin6_addr;
+ else
+diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
+index 3a75d2a8f5178..27fd7744e0fc0 100644
+--- a/security/smack/smackfs.c
++++ b/security/smack/smackfs.c
+@@ -693,9 +693,7 @@ static void smk_cipso_doi(void)
+ printk(KERN_WARNING "%s:%d remove rc = %d\n",
+ __func__, __LINE__, rc);
+
+- doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL);
+- if (doip == NULL)
+- panic("smack: Failed to initialize cipso DOI.\n");
++ doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL | __GFP_NOFAIL);
+ doip->map.std = NULL;
+ doip->doi = smk_cipso_doi_value;
+ doip->type = CIPSO_V4_MAP_PASS;
+@@ -714,7 +712,7 @@ static void smk_cipso_doi(void)
+ if (rc != 0) {
+ printk(KERN_WARNING "%s:%d map add rc = %d\n",
+ __func__, __LINE__, rc);
+- kfree(doip);
++ netlbl_cfg_cipsov4_del(doip->doi, &nai);
+ return;
+ }
+ }
+@@ -831,6 +829,7 @@ static int smk_open_cipso(struct inode *inode, struct file *file)
+ static ssize_t smk_set_cipso(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int format)
+ {
++ struct netlbl_lsm_catmap *old_cat;
+ struct smack_known *skp;
+ struct netlbl_lsm_secattr ncats;
+ char mapcatset[SMK_CIPSOLEN];
+@@ -896,7 +895,7 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf,
+ }
+
+ ret = sscanf(rule, "%d", &catlen);
+- if (ret != 1 || catlen > SMACK_CIPSO_MAXCATNUM)
++ if (ret != 1 || catlen < 0 || catlen > SMACK_CIPSO_MAXCATNUM)
+ goto out;
+
+ if (format == SMK_FIXED24_FMT &&
+@@ -920,9 +919,11 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf,
+
+ rc = smk_netlbl_mls(maplevel, mapcatset, &ncats, SMK_CIPSOLEN);
+ if (rc >= 0) {
+- netlbl_catmap_free(skp->smk_netlabel.attr.mls.cat);
++ old_cat = skp->smk_netlabel.attr.mls.cat;
+ skp->smk_netlabel.attr.mls.cat = ncats.attr.mls.cat;
+ skp->smk_netlabel.attr.mls.lvl = ncats.attr.mls.lvl;
++ synchronize_rcu();
++ netlbl_catmap_free(old_cat);
+ rc = count;
+ /*
+ * This mapping may have been cached, so clear the cache.
+diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile
+index cca5a3012fee2..221eaadffb09c 100644
+--- a/security/tomoyo/Makefile
++++ b/security/tomoyo/Makefile
+@@ -10,7 +10,7 @@ endef
+ quiet_cmd_policy = POLICY $@
+ cmd_policy = ($(call do_policy,profile); $(call do_policy,exception_policy); $(call do_policy,domain_policy); $(call do_policy,manager); $(call do_policy,stat)) >$@
+
+-$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(src)/policy/*.conf.default) FORCE
++$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE
+ $(call if_changed,policy)
+
+ $(obj)/common.o: $(obj)/builtin-policy.h
+diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c
+index 3445ae6fd4794..363b65be87ab7 100644
+--- a/security/tomoyo/load_policy.c
++++ b/security/tomoyo/load_policy.c
+@@ -24,7 +24,7 @@ static const char *tomoyo_loader;
+ static int __init tomoyo_loader_setup(char *str)
+ {
+ tomoyo_loader = str;
+- return 0;
++ return 1;
+ }
+
+ __setup("TOMOYO_loader=", tomoyo_loader_setup);
+@@ -64,7 +64,7 @@ static const char *tomoyo_trigger;
+ static int __init tomoyo_trigger_setup(char *str)
+ {
+ tomoyo_trigger = str;
+- return 0;
++ return 1;
+ }
+
+ __setup("TOMOYO_trigger=", tomoyo_trigger_setup);
+diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
+index 1da2e3722b126..6799b1122c9d8 100644
+--- a/security/tomoyo/util.c
++++ b/security/tomoyo/util.c
+@@ -1051,10 +1051,11 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
+ return false;
+ if (!domain)
+ return true;
++ if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED]))
++ return false;
+ list_for_each_entry_rcu(ptr, &domain->acl_info_list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
+ u16 perm;
+- u8 i;
+
+ if (ptr->is_deleted)
+ continue;
+@@ -1065,23 +1066,23 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
+ */
+ switch (ptr->type) {
+ case TOMOYO_TYPE_PATH_ACL:
+- data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm);
++ perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm);
+ break;
+ case TOMOYO_TYPE_PATH2_ACL:
+- data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm);
++ perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm);
+ break;
+ case TOMOYO_TYPE_PATH_NUMBER_ACL:
+- data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head)
++ perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head)
+ ->perm);
+ break;
+ case TOMOYO_TYPE_MKDEV_ACL:
+- data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm);
++ perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm);
+ break;
+ case TOMOYO_TYPE_INET_ACL:
+- data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm);
++ perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm);
+ break;
+ case TOMOYO_TYPE_UNIX_ACL:
+- data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm);
++ perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm);
+ break;
+ case TOMOYO_TYPE_MANUAL_TASK_ACL:
+ perm = 0;
+@@ -1089,21 +1090,17 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
+ default:
+ perm = 1;
+ }
+- for (i = 0; i < 16; i++)
+- if (perm & (1 << i))
+- count++;
++ count += hweight16(perm);
+ }
+ if (count < tomoyo_profile(domain->ns, domain->profile)->
+ pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
+ return true;
+- if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) {
+- domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true;
+- /* r->granted = false; */
+- tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
++ WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true);
++ /* r->granted = false; */
++ tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
+ #ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING
+- pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
+- domain->domainname->name);
++ pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
++ domain->domainname->name);
+ #endif
+- }
+ return false;
+ }
+diff --git a/sound/Kconfig b/sound/Kconfig
+index e56d96d2b11ca..1903c35d799e1 100644
+--- a/sound/Kconfig
++++ b/sound/Kconfig
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ menuconfig SOUND
+ tristate "Sound card support"
+- depends on HAS_IOMEM
++ depends on HAS_IOMEM || UML
+ help
+ If you have a sound card in your computer, i.e. if it can say more
+ than an occasional beep, say Y.
+diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c
+index faf6b03131ee4..51ed2f34b276d 100644
+--- a/sound/aoa/soundbus/i2sbus/core.c
++++ b/sound/aoa/soundbus/i2sbus/core.c
+@@ -147,6 +147,7 @@ static int i2sbus_get_and_fixup_rsrc(struct device_node *np, int index,
+ return rc;
+ }
+
++/* Returns 1 if added, 0 for otherwise; don't return a negative value! */
+ /* FIXME: look at device node refcounting */
+ static int i2sbus_add_dev(struct macio_dev *macio,
+ struct i2sbus_control *control,
+@@ -213,7 +214,7 @@ static int i2sbus_add_dev(struct macio_dev *macio,
+ * either as the second one in that case is just a modem. */
+ if (!ok) {
+ kfree(dev);
+- return -ENODEV;
++ return 0;
+ }
+
+ mutex_init(&dev->lock);
+@@ -302,6 +303,10 @@ static int i2sbus_add_dev(struct macio_dev *macio,
+
+ if (soundbus_add_one(&dev->sound)) {
+ printk(KERN_DEBUG "i2sbus: device registration error!\n");
++ if (dev->sound.ofdev.dev.kobj.state_initialized) {
++ soundbus_dev_put(&dev->sound);
++ return 0;
++ }
+ goto err;
+ }
+
+diff --git a/sound/core/Makefile b/sound/core/Makefile
+index d774792850f31..79e1407cd0de7 100644
+--- a/sound/core/Makefile
++++ b/sound/core/Makefile
+@@ -9,7 +9,9 @@ ifneq ($(CONFIG_SND_PROC_FS),)
+ snd-y += info.o
+ snd-$(CONFIG_SND_OSSEMUL) += info_oss.o
+ endif
++ifneq ($(CONFIG_M68K),y)
+ snd-$(CONFIG_ISA_DMA_API) += isadma.o
++endif
+ snd-$(CONFIG_SND_OSSEMUL) += sound_oss.o
+ snd-$(CONFIG_SND_VMASTER) += vmaster.o
+ snd-$(CONFIG_SND_JACK) += ctljack.o jack.o
+diff --git a/sound/core/control.c b/sound/core/control.c
+index a25c0d64d104f..b83ec284d6114 100644
+--- a/sound/core/control.c
++++ b/sound/core/control.c
+@@ -127,6 +127,7 @@ static int snd_ctl_release(struct inode *inode, struct file *file)
+ if (control->vd[idx].owner == ctl)
+ control->vd[idx].owner = NULL;
+ up_write(&card->controls_rwsem);
++ snd_fasync_free(ctl->fasync);
+ snd_ctl_empty_read_queue(ctl);
+ put_pid(ctl->pid);
+ kfree(ctl);
+@@ -181,7 +182,7 @@ void snd_ctl_notify(struct snd_card *card, unsigned int mask,
+ _found:
+ wake_up(&ctl->change_sleep);
+ spin_unlock(&ctl->read_lock);
+- kill_fasync(&ctl->fasync, SIGIO, POLL_IN);
++ snd_kill_fasync(ctl->fasync, SIGIO, POLL_IN);
+ }
+ read_unlock_irqrestore(&card->ctl_files_rwlock, flags);
+ }
+@@ -1066,14 +1067,19 @@ static int snd_ctl_elem_read(struct snd_card *card,
+ const u32 pattern = 0xdeadbeef;
+ int ret;
+
++ down_read(&card->controls_rwsem);
+ kctl = snd_ctl_find_id(card, &control->id);
+- if (kctl == NULL)
+- return -ENOENT;
++ if (kctl == NULL) {
++ ret = -ENOENT;
++ goto unlock;
++ }
+
+ index_offset = snd_ctl_get_ioff(kctl, &control->id);
+ vd = &kctl->vd[index_offset];
+- if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL)
+- return -EPERM;
++ if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) {
++ ret = -EPERM;
++ goto unlock;
++ }
+
+ snd_ctl_build_ioff(&control->id, kctl, index_offset);
+
+@@ -1083,7 +1089,7 @@ static int snd_ctl_elem_read(struct snd_card *card,
+ info.id = control->id;
+ ret = __snd_ctl_elem_info(card, kctl, &info, NULL);
+ if (ret < 0)
+- return ret;
++ goto unlock;
+ #endif
+
+ if (!snd_ctl_skip_validation(&info))
+@@ -1093,7 +1099,7 @@ static int snd_ctl_elem_read(struct snd_card *card,
+ ret = kctl->get(kctl, control);
+ snd_power_unref(card);
+ if (ret < 0)
+- return ret;
++ goto unlock;
+ if (!snd_ctl_skip_validation(&info) &&
+ sanity_check_elem_value(card, control, &info, pattern) < 0) {
+ dev_err(card->dev,
+@@ -1101,8 +1107,11 @@ static int snd_ctl_elem_read(struct snd_card *card,
+ control->id.iface, control->id.device,
+ control->id.subdevice, control->id.name,
+ control->id.index);
+- return -EINVAL;
++ ret = -EINVAL;
++ goto unlock;
+ }
++unlock:
++ up_read(&card->controls_rwsem);
+ return ret;
+ }
+
+@@ -1116,9 +1125,7 @@ static int snd_ctl_elem_read_user(struct snd_card *card,
+ if (IS_ERR(control))
+ return PTR_ERR(control);
+
+- down_read(&card->controls_rwsem);
+ result = snd_ctl_elem_read(card, control);
+- up_read(&card->controls_rwsem);
+ if (result < 0)
+ goto error;
+
+@@ -2002,7 +2009,7 @@ static int snd_ctl_fasync(int fd, struct file * file, int on)
+ struct snd_ctl_file *ctl;
+
+ ctl = file->private_data;
+- return fasync_helper(fd, file, on, &ctl->fasync);
++ return snd_fasync_helper(fd, file, on, &ctl->fasync);
+ }
+
+ /* return the preferred subdevice number if already assigned;
+@@ -2170,7 +2177,7 @@ static int snd_ctl_dev_disconnect(struct snd_device *device)
+ read_lock_irqsave(&card->ctl_files_rwlock, flags);
+ list_for_each_entry(ctl, &card->ctl_files, list) {
+ wake_up(&ctl->change_sleep);
+- kill_fasync(&ctl->fasync, SIGIO, POLL_ERR);
++ snd_kill_fasync(ctl->fasync, SIGIO, POLL_ERR);
+ }
+ read_unlock_irqrestore(&card->ctl_files_rwlock, flags);
+
+diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
+index 470dabc60aa0e..edff063e088d2 100644
+--- a/sound/core/control_compat.c
++++ b/sound/core/control_compat.c
+@@ -264,6 +264,7 @@ static int copy_ctl_value_to_user(void __user *userdata,
+ struct snd_ctl_elem_value *data,
+ int type, int count)
+ {
++ struct snd_ctl_elem_value32 __user *data32 = userdata;
+ int i, size;
+
+ if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
+@@ -280,6 +281,8 @@ static int copy_ctl_value_to_user(void __user *userdata,
+ if (copy_to_user(valuep, data->value.bytes.data, size))
+ return -EFAULT;
+ }
++ if (copy_to_user(&data32->id, &data->id, sizeof(data32->id)))
++ return -EFAULT;
+ return 0;
+ }
+
+diff --git a/sound/core/control_led.c b/sound/core/control_led.c
+index a95332b2b90b0..3eb1c5af82ad1 100644
+--- a/sound/core/control_led.c
++++ b/sound/core/control_led.c
+@@ -530,12 +530,11 @@ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, si
+ bool attach)
+ {
+ char buf2[256], *s, *os;
+- size_t len = max(sizeof(s) - 1, count);
+ struct snd_ctl_elem_id id;
+ int err;
+
+- strncpy(buf2, buf, len);
+- buf2[len] = '\0';
++ if (strscpy(buf2, buf, sizeof(buf2)) < 0)
++ return -E2BIG;
+ memset(&id, 0, sizeof(id));
+ id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+ s = buf2;
+diff --git a/sound/core/info.c b/sound/core/info.c
+index a451b24199c3e..9f6714e29bbc3 100644
+--- a/sound/core/info.c
++++ b/sound/core/info.c
+@@ -111,9 +111,9 @@ static loff_t snd_info_entry_llseek(struct file *file, loff_t offset, int orig)
+ entry = data->entry;
+ mutex_lock(&entry->access);
+ if (entry->c.ops->llseek) {
+- offset = entry->c.ops->llseek(entry,
+- data->file_private_data,
+- file, offset, orig);
++ ret = entry->c.ops->llseek(entry,
++ data->file_private_data,
++ file, offset, orig);
+ goto out;
+ }
+
+diff --git a/sound/core/init.c b/sound/core/init.c
+index ac335f5906c6b..7b3618997d347 100644
+--- a/sound/core/init.c
++++ b/sound/core/init.c
+@@ -178,10 +178,8 @@ int snd_card_new(struct device *parent, int idx, const char *xid,
+ return -ENOMEM;
+
+ err = snd_card_init(card, parent, idx, xid, module, extra_size);
+- if (err < 0) {
+- kfree(card);
+- return err;
+- }
++ if (err < 0)
++ return err; /* card is freed by error handler */
+
+ *card_ret = card;
+ return 0;
+@@ -209,6 +207,12 @@ static void __snd_card_release(struct device *dev, void *data)
+ * snd_card_register(), the very first devres action to call snd_card_free()
+ * is added automatically. In that way, the resource disconnection is assured
+ * at first, then released in the expected order.
++ *
++ * If an error happens at the probe before snd_card_register() is called and
++ * there have been other devres resources, you'd need to free the card manually
++ * via snd_card_free() call in the error; otherwise it may lead to UAF due to
++ * devres call orders. You can use snd_card_free_on_error() helper for
++ * handling it more easily.
+ */
+ int snd_devm_card_new(struct device *parent, int idx, const char *xid,
+ struct module *module, size_t extra_size,
+@@ -225,7 +229,7 @@ int snd_devm_card_new(struct device *parent, int idx, const char *xid,
+ card->managed = true;
+ err = snd_card_init(card, parent, idx, xid, module, extra_size);
+ if (err < 0) {
+- devres_free(card);
++ devres_free(card); /* in managed mode, we need to free manually */
+ return err;
+ }
+
+@@ -235,6 +239,28 @@ int snd_devm_card_new(struct device *parent, int idx, const char *xid,
+ }
+ EXPORT_SYMBOL_GPL(snd_devm_card_new);
+
++/**
++ * snd_card_free_on_error - a small helper for handling devm probe errors
++ * @dev: the managed device object
++ * @ret: the return code from the probe callback
++ *
++ * This function handles the explicit snd_card_free() call at the error from
++ * the probe callback. It's just a small helper for simplifying the error
++ * handling with the managed devices.
++ */
++int snd_card_free_on_error(struct device *dev, int ret)
++{
++ struct snd_card *card;
++
++ if (!ret)
++ return 0;
++ card = devres_find(dev, __snd_card_release, NULL, NULL);
++ if (card)
++ snd_card_free(card);
++ return ret;
++}
++EXPORT_SYMBOL_GPL(snd_card_free_on_error);
++
+ static int snd_card_init(struct snd_card *card, struct device *parent,
+ int idx, const char *xid, struct module *module,
+ size_t extra_size)
+@@ -265,6 +291,8 @@ static int snd_card_init(struct snd_card *card, struct device *parent,
+ mutex_unlock(&snd_card_mutex);
+ dev_err(parent, "cannot find the slot for index %d (range 0-%i), error: %d\n",
+ idx, snd_ecards_limit - 1, err);
++ if (!card->managed)
++ kfree(card); /* manually free here, as no destructor called */
+ return err;
+ }
+ set_bit(idx, snd_cards_lock); /* lock it */
+diff --git a/sound/core/jack.c b/sound/core/jack.c
+index 32350c6aba849..03d155ed362b4 100644
+--- a/sound/core/jack.c
++++ b/sound/core/jack.c
+@@ -42,8 +42,11 @@ static int snd_jack_dev_disconnect(struct snd_device *device)
+ #ifdef CONFIG_SND_JACK_INPUT_DEV
+ struct snd_jack *jack = device->device_data;
+
+- if (!jack->input_dev)
++ mutex_lock(&jack->input_dev_lock);
++ if (!jack->input_dev) {
++ mutex_unlock(&jack->input_dev_lock);
+ return 0;
++ }
+
+ /* If the input device is registered with the input subsystem
+ * then we need to use a different deallocator. */
+@@ -52,6 +55,7 @@ static int snd_jack_dev_disconnect(struct snd_device *device)
+ else
+ input_free_device(jack->input_dev);
+ jack->input_dev = NULL;
++ mutex_unlock(&jack->input_dev_lock);
+ #endif /* CONFIG_SND_JACK_INPUT_DEV */
+ return 0;
+ }
+@@ -62,10 +66,13 @@ static int snd_jack_dev_free(struct snd_device *device)
+ struct snd_card *card = device->card;
+ struct snd_jack_kctl *jack_kctl, *tmp_jack_kctl;
+
++ down_write(&card->controls_rwsem);
+ list_for_each_entry_safe(jack_kctl, tmp_jack_kctl, &jack->kctl_list, list) {
+ list_del_init(&jack_kctl->list);
+ snd_ctl_remove(card, jack_kctl->kctl);
+ }
++ up_write(&card->controls_rwsem);
++
+ if (jack->private_free)
+ jack->private_free(jack);
+
+@@ -87,8 +94,11 @@ static int snd_jack_dev_register(struct snd_device *device)
+ snprintf(jack->name, sizeof(jack->name), "%s %s",
+ card->shortname, jack->id);
+
+- if (!jack->input_dev)
++ mutex_lock(&jack->input_dev_lock);
++ if (!jack->input_dev) {
++ mutex_unlock(&jack->input_dev_lock);
+ return 0;
++ }
+
+ jack->input_dev->name = jack->name;
+
+@@ -113,6 +123,7 @@ static int snd_jack_dev_register(struct snd_device *device)
+ if (err == 0)
+ jack->registered = 1;
+
++ mutex_unlock(&jack->input_dev_lock);
+ return err;
+ }
+ #endif /* CONFIG_SND_JACK_INPUT_DEV */
+@@ -509,10 +520,16 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
+ return -ENOMEM;
+
+ jack->id = kstrdup(id, GFP_KERNEL);
++ if (jack->id == NULL) {
++ kfree(jack);
++ return -ENOMEM;
++ }
+
+- /* don't creat input device for phantom jack */
+- if (!phantom_jack) {
+ #ifdef CONFIG_SND_JACK_INPUT_DEV
++ mutex_init(&jack->input_dev_lock);
++
++ /* don't create input device for phantom jack */
++ if (!phantom_jack) {
+ int i;
+
+ jack->input_dev = input_allocate_device();
+@@ -530,8 +547,8 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
+ input_set_capability(jack->input_dev, EV_SW,
+ jack_switch_types[i]);
+
+-#endif /* CONFIG_SND_JACK_INPUT_DEV */
+ }
++#endif /* CONFIG_SND_JACK_INPUT_DEV */
+
+ err = snd_device_new(card, SNDRV_DEV_JACK, jack, &ops);
+ if (err < 0)
+@@ -571,10 +588,14 @@ EXPORT_SYMBOL(snd_jack_new);
+ void snd_jack_set_parent(struct snd_jack *jack, struct device *parent)
+ {
+ WARN_ON(jack->registered);
+- if (!jack->input_dev)
++ mutex_lock(&jack->input_dev_lock);
++ if (!jack->input_dev) {
++ mutex_unlock(&jack->input_dev_lock);
+ return;
++ }
+
+ jack->input_dev->dev.parent = parent;
++ mutex_unlock(&jack->input_dev_lock);
+ }
+ EXPORT_SYMBOL(snd_jack_set_parent);
+
+@@ -622,6 +643,8 @@ EXPORT_SYMBOL(snd_jack_set_key);
+
+ /**
+ * snd_jack_report - Report the current status of a jack
++ * Note: This function uses mutexes and should be called from a
++ * context which can sleep (such as a workqueue).
+ *
+ * @jack: The jack to report status for
+ * @status: The current status of the jack
+@@ -631,6 +654,7 @@ void snd_jack_report(struct snd_jack *jack, int status)
+ struct snd_jack_kctl *jack_kctl;
+ unsigned int mask_bits = 0;
+ #ifdef CONFIG_SND_JACK_INPUT_DEV
++ struct input_dev *idev;
+ int i;
+ #endif
+
+@@ -647,14 +671,15 @@ void snd_jack_report(struct snd_jack *jack, int status)
+ status & jack_kctl->mask_bits);
+
+ #ifdef CONFIG_SND_JACK_INPUT_DEV
+- if (!jack->input_dev)
++ idev = input_get_device(jack->input_dev);
++ if (!idev)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(jack->key); i++) {
+ int testbit = ((SND_JACK_BTN_0 >> i) & ~mask_bits);
+
+ if (jack->type & testbit)
+- input_report_key(jack->input_dev, jack->key[i],
++ input_report_key(idev, jack->key[i],
+ status & testbit);
+ }
+
+@@ -662,12 +687,13 @@ void snd_jack_report(struct snd_jack *jack, int status)
+ int testbit = ((1 << i) & ~mask_bits);
+
+ if (jack->type & testbit)
+- input_report_switch(jack->input_dev,
++ input_report_switch(idev,
+ jack_switch_types[i],
+ status & testbit);
+ }
+
+- input_sync(jack->input_dev);
++ input_sync(idev);
++ input_put_device(idev);
+ #endif /* CONFIG_SND_JACK_INPUT_DEV */
+ }
+ EXPORT_SYMBOL(snd_jack_report);
+diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
+index c7c943c661e63..2d842982576bb 100644
+--- a/sound/core/memalloc.c
++++ b/sound/core/memalloc.c
+@@ -176,8 +176,11 @@ EXPORT_SYMBOL_GPL(snd_devm_alloc_pages);
+ int snd_dma_buffer_mmap(struct snd_dma_buffer *dmab,
+ struct vm_area_struct *area)
+ {
+- const struct snd_malloc_ops *ops = snd_dma_get_ops(dmab);
++ const struct snd_malloc_ops *ops;
+
++ if (!dmab)
++ return -ENOENT;
++ ops = snd_dma_get_ops(dmab);
+ if (ops && ops->mmap)
+ return ops->mmap(dmab, area);
+ else
+@@ -491,6 +494,8 @@ static const struct snd_malloc_ops *dma_ops[] = {
+
+ static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab)
+ {
++ if (WARN_ON_ONCE(!dmab))
++ return NULL;
+ if (WARN_ON_ONCE(dmab->dev.type <= SNDRV_DMA_TYPE_UNKNOWN ||
+ dmab->dev.type >= ARRAY_SIZE(dma_ops)))
+ return NULL;
+diff --git a/sound/core/misc.c b/sound/core/misc.c
+index 3579dd7a161f7..d32a19976a2b9 100644
+--- a/sound/core/misc.c
++++ b/sound/core/misc.c
+@@ -10,6 +10,7 @@
+ #include <linux/time.h>
+ #include <linux/slab.h>
+ #include <linux/ioport.h>
++#include <linux/fs.h>
+ #include <sound/core.h>
+
+ #ifdef CONFIG_SND_DEBUG
+@@ -112,7 +113,7 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device,
+ {
+ const struct snd_pci_quirk *q;
+
+- for (q = list; q->subvendor; q++) {
++ for (q = list; q->subvendor || q->subdevice; q++) {
+ if (q->subvendor != vendor)
+ continue;
+ if (!q->subdevice ||
+@@ -145,3 +146,96 @@ snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list)
+ }
+ EXPORT_SYMBOL(snd_pci_quirk_lookup);
+ #endif
++
++/*
++ * Deferred async signal helpers
++ *
++ * Below are a few helper functions to wrap the async signal handling
++ * in the deferred work. The main purpose is to avoid the messy deadlock
++ * around tasklist_lock and co at the kill_fasync() invocation.
++ * fasync_helper() and kill_fasync() are replaced with snd_fasync_helper()
++ * and snd_kill_fasync(), respectively. In addition, snd_fasync_free() has
++ * to be called at releasing the relevant file object.
++ */
++struct snd_fasync {
++ struct fasync_struct *fasync;
++ int signal;
++ int poll;
++ int on;
++ struct list_head list;
++};
++
++static DEFINE_SPINLOCK(snd_fasync_lock);
++static LIST_HEAD(snd_fasync_list);
++
++static void snd_fasync_work_fn(struct work_struct *work)
++{
++ struct snd_fasync *fasync;
++
++ spin_lock_irq(&snd_fasync_lock);
++ while (!list_empty(&snd_fasync_list)) {
++ fasync = list_first_entry(&snd_fasync_list, struct snd_fasync, list);
++ list_del_init(&fasync->list);
++ spin_unlock_irq(&snd_fasync_lock);
++ if (fasync->on)
++ kill_fasync(&fasync->fasync, fasync->signal, fasync->poll);
++ spin_lock_irq(&snd_fasync_lock);
++ }
++ spin_unlock_irq(&snd_fasync_lock);
++}
++
++static DECLARE_WORK(snd_fasync_work, snd_fasync_work_fn);
++
++int snd_fasync_helper(int fd, struct file *file, int on,
++ struct snd_fasync **fasyncp)
++{
++ struct snd_fasync *fasync = NULL;
++
++ if (on) {
++ fasync = kzalloc(sizeof(*fasync), GFP_KERNEL);
++ if (!fasync)
++ return -ENOMEM;
++ INIT_LIST_HEAD(&fasync->list);
++ }
++
++ spin_lock_irq(&snd_fasync_lock);
++ if (*fasyncp) {
++ kfree(fasync);
++ fasync = *fasyncp;
++ } else {
++ if (!fasync) {
++ spin_unlock_irq(&snd_fasync_lock);
++ return 0;
++ }
++ *fasyncp = fasync;
++ }
++ fasync->on = on;
++ spin_unlock_irq(&snd_fasync_lock);
++ return fasync_helper(fd, file, on, &fasync->fasync);
++}
++EXPORT_SYMBOL_GPL(snd_fasync_helper);
++
++void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll)
++{
++ unsigned long flags;
++
++ if (!fasync || !fasync->on)
++ return;
++ spin_lock_irqsave(&snd_fasync_lock, flags);
++ fasync->signal = signal;
++ fasync->poll = poll;
++ list_move(&fasync->list, &snd_fasync_list);
++ schedule_work(&snd_fasync_work);
++ spin_unlock_irqrestore(&snd_fasync_lock, flags);
++}
++EXPORT_SYMBOL_GPL(snd_kill_fasync);
++
++void snd_fasync_free(struct snd_fasync *fasync)
++{
++ if (!fasync)
++ return;
++ fasync->on = 0;
++ flush_work(&snd_fasync_work);
++ kfree(fasync);
++}
++EXPORT_SYMBOL_GPL(snd_fasync_free);
+diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
+index 6a5abdd4271ba..9620115cfdc09 100644
+--- a/sound/core/oss/mixer_oss.c
++++ b/sound/core/oss/mixer_oss.c
+@@ -130,11 +130,13 @@ static int snd_mixer_oss_devmask(struct snd_mixer_oss_file *fmixer)
+
+ if (mixer == NULL)
+ return -EIO;
++ mutex_lock(&mixer->reg_mutex);
+ for (chn = 0; chn < 31; chn++) {
+ pslot = &mixer->slots[chn];
+ if (pslot->put_volume || pslot->put_recsrc)
+ result |= 1 << chn;
+ }
++ mutex_unlock(&mixer->reg_mutex);
+ return result;
+ }
+
+@@ -146,11 +148,13 @@ static int snd_mixer_oss_stereodevs(struct snd_mixer_oss_file *fmixer)
+
+ if (mixer == NULL)
+ return -EIO;
++ mutex_lock(&mixer->reg_mutex);
+ for (chn = 0; chn < 31; chn++) {
+ pslot = &mixer->slots[chn];
+ if (pslot->put_volume && pslot->stereo)
+ result |= 1 << chn;
+ }
++ mutex_unlock(&mixer->reg_mutex);
+ return result;
+ }
+
+@@ -161,6 +165,7 @@ static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer)
+
+ if (mixer == NULL)
+ return -EIO;
++ mutex_lock(&mixer->reg_mutex);
+ if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */
+ result = mixer->mask_recsrc;
+ } else {
+@@ -172,6 +177,7 @@ static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer)
+ result |= 1 << chn;
+ }
+ }
++ mutex_unlock(&mixer->reg_mutex);
+ return result;
+ }
+
+@@ -182,12 +188,12 @@ static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer)
+
+ if (mixer == NULL)
+ return -EIO;
++ mutex_lock(&mixer->reg_mutex);
+ if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */
+- int err;
+ unsigned int index;
+- err = mixer->get_recsrc(fmixer, &index);
+- if (err < 0)
+- return err;
++ result = mixer->get_recsrc(fmixer, &index);
++ if (result < 0)
++ goto unlock;
+ result = 1 << index;
+ } else {
+ struct snd_mixer_oss_slot *pslot;
+@@ -202,7 +208,10 @@ static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer)
+ }
+ }
+ }
+- return mixer->oss_recsrc = result;
++ mixer->oss_recsrc = result;
++ unlock:
++ mutex_unlock(&mixer->reg_mutex);
++ return result;
+ }
+
+ static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsrc)
+@@ -215,6 +224,7 @@ static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsr
+
+ if (mixer == NULL)
+ return -EIO;
++ mutex_lock(&mixer->reg_mutex);
+ if (mixer->get_recsrc && mixer->put_recsrc) { /* exclusive input */
+ if (recsrc & ~mixer->oss_recsrc)
+ recsrc &= ~mixer->oss_recsrc;
+@@ -240,6 +250,7 @@ static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsr
+ }
+ }
+ }
++ mutex_unlock(&mixer->reg_mutex);
+ return result;
+ }
+
+@@ -251,6 +262,7 @@ static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot)
+
+ if (mixer == NULL || slot > 30)
+ return -EIO;
++ mutex_lock(&mixer->reg_mutex);
+ pslot = &mixer->slots[slot];
+ left = pslot->volume[0];
+ right = pslot->volume[1];
+@@ -258,15 +270,21 @@ static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot)
+ result = pslot->get_volume(fmixer, pslot, &left, &right);
+ if (!pslot->stereo)
+ right = left;
+- if (snd_BUG_ON(left < 0 || left > 100))
+- return -EIO;
+- if (snd_BUG_ON(right < 0 || right > 100))
+- return -EIO;
++ if (snd_BUG_ON(left < 0 || left > 100)) {
++ result = -EIO;
++ goto unlock;
++ }
++ if (snd_BUG_ON(right < 0 || right > 100)) {
++ result = -EIO;
++ goto unlock;
++ }
+ if (result >= 0) {
+ pslot->volume[0] = left;
+ pslot->volume[1] = right;
+ result = (left & 0xff) | ((right & 0xff) << 8);
+ }
++ unlock:
++ mutex_unlock(&mixer->reg_mutex);
+ return result;
+ }
+
+@@ -279,6 +297,7 @@ static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer,
+
+ if (mixer == NULL || slot > 30)
+ return -EIO;
++ mutex_lock(&mixer->reg_mutex);
+ pslot = &mixer->slots[slot];
+ if (left > 100)
+ left = 100;
+@@ -289,10 +308,13 @@ static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer,
+ if (pslot->put_volume)
+ result = pslot->put_volume(fmixer, pslot, left, right);
+ if (result < 0)
+- return result;
++ goto unlock;
+ pslot->volume[0] = left;
+ pslot->volume[1] = right;
+- return (left & 0xff) | ((right & 0xff) << 8);
++ result = (left & 0xff) | ((right & 0xff) << 8);
++ unlock:
++ mutex_unlock(&mixer->reg_mutex);
++ return result;
+ }
+
+ static int snd_mixer_oss_ioctl1(struct snd_mixer_oss_file *fmixer, unsigned int cmd, unsigned long arg)
+diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
+index 82a818734a5f7..ca4a692fe1c36 100644
+--- a/sound/core/oss/pcm_oss.c
++++ b/sound/core/oss/pcm_oss.c
+@@ -147,7 +147,7 @@ snd_pcm_hw_param_value_min(const struct snd_pcm_hw_params *params,
+ *
+ * Return the maximum value for field PAR.
+ */
+-static unsigned int
++static int
+ snd_pcm_hw_param_value_max(const struct snd_pcm_hw_params *params,
+ snd_pcm_hw_param_t var, int *dir)
+ {
+@@ -682,18 +682,24 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *oss_params,
+ struct snd_pcm_hw_params *slave_params)
+ {
+- size_t s;
+- size_t oss_buffer_size, oss_period_size, oss_periods;
+- size_t min_period_size, max_period_size;
++ ssize_t s;
++ ssize_t oss_buffer_size;
++ ssize_t oss_period_size, oss_periods;
++ ssize_t min_period_size, max_period_size;
+ struct snd_pcm_runtime *runtime = substream->runtime;
+ size_t oss_frame_size;
+
+ oss_frame_size = snd_pcm_format_physical_width(params_format(oss_params)) *
+ params_channels(oss_params) / 8;
+
++ oss_buffer_size = snd_pcm_hw_param_value_max(slave_params,
++ SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
++ NULL);
++ if (oss_buffer_size <= 0)
++ return -EINVAL;
+ oss_buffer_size = snd_pcm_plug_client_size(substream,
+- snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size;
+- if (!oss_buffer_size)
++ oss_buffer_size * oss_frame_size);
++ if (oss_buffer_size <= 0)
+ return -EINVAL;
+ oss_buffer_size = rounddown_pow_of_two(oss_buffer_size);
+ if (atomic_read(&substream->mmap_count)) {
+@@ -730,7 +736,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream,
+
+ min_period_size = snd_pcm_plug_client_size(substream,
+ snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL));
+- if (min_period_size) {
++ if (min_period_size > 0) {
+ min_period_size *= oss_frame_size;
+ min_period_size = roundup_pow_of_two(min_period_size);
+ if (oss_period_size < min_period_size)
+@@ -739,7 +745,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream,
+
+ max_period_size = snd_pcm_plug_client_size(substream,
+ snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL));
+- if (max_period_size) {
++ if (max_period_size > 0) {
+ max_period_size *= oss_frame_size;
+ max_period_size = rounddown_pow_of_two(max_period_size);
+ if (oss_period_size > max_period_size)
+@@ -752,7 +758,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream,
+ oss_periods = substream->oss.setup.periods;
+
+ s = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL);
+- if (runtime->oss.maxfrags && s > runtime->oss.maxfrags)
++ if (s > 0 && runtime->oss.maxfrags && s > runtime->oss.maxfrags)
+ s = runtime->oss.maxfrags;
+ if (oss_periods > s)
+ oss_periods = s;
+@@ -768,6 +774,11 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream,
+
+ if (oss_period_size < 16)
+ return -EINVAL;
++
++ /* don't allocate too large period; 1MB period must be enough */
++ if (oss_period_size > 1024 * 1024)
++ return -ENOMEM;
++
+ runtime->oss.period_bytes = oss_period_size;
+ runtime->oss.period_frames = 1;
+ runtime->oss.periods = oss_periods;
+@@ -878,8 +889,15 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream)
+ err = -EINVAL;
+ goto failure;
+ }
+- choose_rate(substream, sparams, runtime->oss.rate);
+- snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, NULL);
++
++ err = choose_rate(substream, sparams, runtime->oss.rate);
++ if (err < 0)
++ goto failure;
++ err = snd_pcm_hw_param_near(substream, sparams,
++ SNDRV_PCM_HW_PARAM_CHANNELS,
++ runtime->oss.channels, NULL);
++ if (err < 0)
++ goto failure;
+
+ format = snd_pcm_oss_format_from(runtime->oss.format);
+
+@@ -1030,10 +1048,9 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream)
+ goto failure;
+ }
+ #endif
+- oss_period_size *= oss_frame_size;
+-
+- oss_buffer_size = oss_period_size * runtime->oss.periods;
+- if (oss_buffer_size < 0) {
++ oss_period_size = array_size(oss_period_size, oss_frame_size);
++ oss_buffer_size = array_size(oss_period_size, runtime->oss.periods);
++ if (oss_buffer_size <= 0) {
+ err = -EINVAL;
+ goto failure;
+ }
+@@ -1647,14 +1664,14 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file)
+ runtime = substream->runtime;
+ if (atomic_read(&substream->mmap_count))
+ goto __direct;
+- err = snd_pcm_oss_make_ready(substream);
+- if (err < 0)
+- return err;
+ atomic_inc(&runtime->oss.rw_ref);
+ if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ atomic_dec(&runtime->oss.rw_ref);
+ return -ERESTARTSYS;
+ }
++ err = snd_pcm_oss_make_ready_locked(substream);
++ if (err < 0)
++ goto unlock;
+ format = snd_pcm_oss_format_from(runtime->oss.format);
+ width = snd_pcm_format_physical_width(format);
+ if (runtime->oss.buffer_used > 0) {
+@@ -1956,7 +1973,7 @@ static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsign
+ if (runtime->oss.subdivision || runtime->oss.fragshift)
+ return -EINVAL;
+ fragshift = val & 0xffff;
+- if (fragshift >= 31)
++ if (fragshift >= 25) /* should be large enough */
+ return -EINVAL;
+ runtime->oss.fragshift = fragshift;
+ runtime->oss.maxfrags = (val >> 16) & 0xffff;
+@@ -2052,7 +2069,7 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr
+ int err, cmd;
+
+ #ifdef OSS_DEBUG
+- pcm_dbg(substream->pcm, "pcm_oss: trigger = 0x%x\n", trigger);
++ pr_debug("pcm_oss: trigger = 0x%x\n", trigger);
+ #endif
+
+ psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK];
+diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
+index 061ba06bc9262..82e180c776ae1 100644
+--- a/sound/core/oss/pcm_plugin.c
++++ b/sound/core/oss/pcm_plugin.c
+@@ -62,7 +62,10 @@ static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t
+ width = snd_pcm_format_physical_width(format->format);
+ if (width < 0)
+ return width;
+- size = frames * format->channels * width;
++ size = array3_size(frames, format->channels, width);
++ /* check for too large period size once again */
++ if (size > 1024 * 1024)
++ return -ENOMEM;
+ if (snd_BUG_ON(size % 8))
+ return -ENXIO;
+ size /= 8;
+diff --git a/sound/core/oss/pcm_plugin.h b/sound/core/oss/pcm_plugin.h
+index 46e273bd4a786..50a6b50f5db4c 100644
+--- a/sound/core/oss/pcm_plugin.h
++++ b/sound/core/oss/pcm_plugin.h
+@@ -141,6 +141,14 @@ int snd_pcm_area_copy(const struct snd_pcm_channel_area *src_channel,
+
+ void *snd_pcm_plug_buf_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t size);
+ void snd_pcm_plug_buf_unlock(struct snd_pcm_substream *plug, void *ptr);
++#else
++
++static inline snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t drv_size) { return drv_size; }
++static inline snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t clt_size) { return clt_size; }
++static inline int snd_pcm_plug_slave_format(int format, const struct snd_mask *format_mask) { return format; }
++
++#endif
++
+ snd_pcm_sframes_t snd_pcm_oss_write3(struct snd_pcm_substream *substream,
+ const char *ptr, snd_pcm_uframes_t size,
+ int in_kernel);
+@@ -151,14 +159,6 @@ snd_pcm_sframes_t snd_pcm_oss_writev3(struct snd_pcm_substream *substream,
+ snd_pcm_sframes_t snd_pcm_oss_readv3(struct snd_pcm_substream *substream,
+ void **bufs, snd_pcm_uframes_t frames);
+
+-#else
+-
+-static inline snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t drv_size) { return drv_size; }
+-static inline snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t clt_size) { return clt_size; }
+-static inline int snd_pcm_plug_slave_format(int format, const struct snd_mask *format_mask) { return format; }
+-
+-#endif
+-
+ #ifdef PLUGIN_DEBUG
+ #define pdprintf(fmt, args...) printk(KERN_DEBUG "plugin: " fmt, ##args)
+ #else
+diff --git a/sound/core/pcm.c b/sound/core/pcm.c
+index 6fd3677685d70..977d54320a5ca 100644
+--- a/sound/core/pcm.c
++++ b/sound/core/pcm.c
+@@ -810,7 +810,11 @@ EXPORT_SYMBOL(snd_pcm_new_internal);
+ static void free_chmap(struct snd_pcm_str *pstr)
+ {
+ if (pstr->chmap_kctl) {
+- snd_ctl_remove(pstr->pcm->card, pstr->chmap_kctl);
++ struct snd_card *card = pstr->pcm->card;
++
++ down_write(&card->controls_rwsem);
++ snd_ctl_remove(card, pstr->chmap_kctl);
++ up_write(&card->controls_rwsem);
+ pstr->chmap_kctl = NULL;
+ }
+ }
+@@ -965,6 +969,8 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream,
+ init_waitqueue_head(&runtime->tsleep);
+
+ runtime->status->state = SNDRV_PCM_STATE_OPEN;
++ mutex_init(&runtime->buffer_mutex);
++ atomic_set(&runtime->buffer_accessing, 0);
+
+ substream->runtime = runtime;
+ substream->private_data = pcm->private_data;
+@@ -998,6 +1004,7 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream)
+ } else {
+ substream->runtime = NULL;
+ }
++ mutex_destroy(&runtime->buffer_mutex);
+ kfree(runtime);
+ put_pid(substream->pid);
+ substream->pid = NULL;
+diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
+index dfe5a64e19d2e..7af9fa1331895 100644
+--- a/sound/core/pcm_compat.c
++++ b/sound/core/pcm_compat.c
+@@ -253,10 +253,14 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
+ goto error;
+ }
+
+- if (refine)
++ if (refine) {
+ err = snd_pcm_hw_refine(substream, data);
+- else
++ if (err < 0)
++ goto error;
++ err = fixup_unreferenced_params(substream, data);
++ } else {
+ err = snd_pcm_hw_params(substream, data);
++ }
+ if (err < 0)
+ goto error;
+ if (copy_to_user(data32, data, sizeof(*data32)) ||
+diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c
+index 1fc2fa0775744..0fe93b423c4ed 100644
+--- a/sound/core/pcm_dmaengine.c
++++ b/sound/core/pcm_dmaengine.c
+@@ -132,12 +132,14 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_set_config_from_dai_data);
+
+ static void dmaengine_pcm_dma_complete(void *arg)
+ {
++ unsigned int new_pos;
+ struct snd_pcm_substream *substream = arg;
+ struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream);
+
+- prtd->pos += snd_pcm_lib_period_bytes(substream);
+- if (prtd->pos >= snd_pcm_lib_buffer_bytes(substream))
+- prtd->pos = 0;
++ new_pos = prtd->pos + snd_pcm_lib_period_bytes(substream);
++ if (new_pos >= snd_pcm_lib_buffer_bytes(substream))
++ new_pos = 0;
++ prtd->pos = new_pos;
+
+ snd_pcm_period_elapsed(substream);
+ }
+diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
+index a144a3f68e9eb..8947c988b6d34 100644
+--- a/sound/core/pcm_lib.c
++++ b/sound/core/pcm_lib.c
+@@ -2137,6 +2137,8 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream,
+ ret = substream->ops->ack(substream);
+ if (ret < 0) {
+ runtime->control->appl_ptr = old_appl_ptr;
++ if (ret == -EPIPE)
++ __snd_pcm_xrun(substream);
+ return ret;
+ }
+ }
+@@ -2255,10 +2257,15 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream,
+ err = -EINVAL;
+ goto _end_unlock;
+ }
++ if (!atomic_inc_unless_negative(&runtime->buffer_accessing)) {
++ err = -EBUSY;
++ goto _end_unlock;
++ }
+ snd_pcm_stream_unlock_irq(substream);
+ err = writer(substream, appl_ofs, data, offset, frames,
+ transfer);
+ snd_pcm_stream_lock_irq(substream);
++ atomic_dec(&runtime->buffer_accessing);
+ if (err < 0)
+ goto _end_unlock;
+ err = pcm_accessible_state(runtime);
+diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
+index 7fbd1ccbb5b01..711e71016a7c3 100644
+--- a/sound/core/pcm_memory.c
++++ b/sound/core/pcm_memory.c
+@@ -31,20 +31,51 @@ static unsigned long max_alloc_per_card = 32UL * 1024UL * 1024UL;
+ module_param(max_alloc_per_card, ulong, 0644);
+ MODULE_PARM_DESC(max_alloc_per_card, "Max total allocation bytes per card.");
+
++static void __update_allocated_size(struct snd_card *card, ssize_t bytes)
++{
++ card->total_pcm_alloc_bytes += bytes;
++}
++
++static void update_allocated_size(struct snd_card *card, ssize_t bytes)
++{
++ mutex_lock(&card->memory_mutex);
++ __update_allocated_size(card, bytes);
++ mutex_unlock(&card->memory_mutex);
++}
++
++static void decrease_allocated_size(struct snd_card *card, size_t bytes)
++{
++ mutex_lock(&card->memory_mutex);
++ WARN_ON(card->total_pcm_alloc_bytes < bytes);
++ __update_allocated_size(card, -(ssize_t)bytes);
++ mutex_unlock(&card->memory_mutex);
++}
++
+ static int do_alloc_pages(struct snd_card *card, int type, struct device *dev,
+ size_t size, struct snd_dma_buffer *dmab)
+ {
+ int err;
+
++ /* check and reserve the requested size */
++ mutex_lock(&card->memory_mutex);
+ if (max_alloc_per_card &&
+- card->total_pcm_alloc_bytes + size > max_alloc_per_card)
++ card->total_pcm_alloc_bytes + size > max_alloc_per_card) {
++ mutex_unlock(&card->memory_mutex);
+ return -ENOMEM;
++ }
++ __update_allocated_size(card, size);
++ mutex_unlock(&card->memory_mutex);
+
+ err = snd_dma_alloc_pages(type, dev, size, dmab);
+ if (!err) {
+- mutex_lock(&card->memory_mutex);
+- card->total_pcm_alloc_bytes += dmab->bytes;
+- mutex_unlock(&card->memory_mutex);
++ /* the actual allocation size might be bigger than requested,
++ * and we need to correct the account
++ */
++ if (dmab->bytes != size)
++ update_allocated_size(card, dmab->bytes - size);
++ } else {
++ /* take back on allocation failure */
++ decrease_allocated_size(card, size);
+ }
+ return err;
+ }
+@@ -53,10 +84,7 @@ static void do_free_pages(struct snd_card *card, struct snd_dma_buffer *dmab)
+ {
+ if (!dmab->area)
+ return;
+- mutex_lock(&card->memory_mutex);
+- WARN_ON(card->total_pcm_alloc_bytes < dmab->bytes);
+- card->total_pcm_alloc_bytes -= dmab->bytes;
+- mutex_unlock(&card->memory_mutex);
++ decrease_allocated_size(card, dmab->bytes);
+ snd_dma_free_pages(dmab);
+ dmab->area = NULL;
+ }
+@@ -158,19 +186,20 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry,
+ size_t size;
+ struct snd_dma_buffer new_dmab;
+
++ mutex_lock(&substream->pcm->open_mutex);
+ if (substream->runtime) {
+ buffer->error = -EBUSY;
+- return;
++ goto unlock;
+ }
+ if (!snd_info_get_line(buffer, line, sizeof(line))) {
+ snd_info_get_str(str, line, sizeof(str));
+ size = simple_strtoul(str, NULL, 10) * 1024;
+ if ((size != 0 && size < 8192) || size > substream->dma_max) {
+ buffer->error = -EINVAL;
+- return;
++ goto unlock;
+ }
+ if (substream->dma_buffer.bytes == size)
+- return;
++ goto unlock;
+ memset(&new_dmab, 0, sizeof(new_dmab));
+ new_dmab.dev = substream->dma_buffer.dev;
+ if (size > 0) {
+@@ -183,7 +212,7 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry,
+ substream->pcm->card->number, substream->pcm->device,
+ substream->stream ? 'c' : 'p', substream->number,
+ substream->pcm->name, size);
+- return;
++ goto unlock;
+ }
+ substream->buffer_bytes_max = size;
+ } else {
+@@ -195,6 +224,8 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry,
+ } else {
+ buffer->error = -EINVAL;
+ }
++ unlock:
++ mutex_unlock(&substream->pcm->open_mutex);
+ }
+
+ static inline void preallocate_info_init(struct snd_pcm_substream *substream)
+@@ -443,7 +474,6 @@ EXPORT_SYMBOL(snd_pcm_lib_malloc_pages);
+ */
+ int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream)
+ {
+- struct snd_card *card = substream->pcm->card;
+ struct snd_pcm_runtime *runtime;
+
+ if (PCM_RUNTIME_CHECK(substream))
+@@ -452,6 +482,8 @@ int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream)
+ if (runtime->dma_area == NULL)
+ return 0;
+ if (runtime->dma_buffer_p != &substream->dma_buffer) {
++ struct snd_card *card = substream->pcm->card;
++
+ /* it's a newly allocated buffer. release it now. */
+ do_free_pages(card, runtime->dma_buffer_p);
+ kfree(runtime->dma_buffer_p);
+diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
+index 4866aed97aacc..5588b6a1ee8bd 100644
+--- a/sound/core/pcm_misc.c
++++ b/sound/core/pcm_misc.c
+@@ -433,7 +433,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int
+ return 0;
+ width = pcm_formats[(INT)format].phys; /* physical width */
+ pat = pcm_formats[(INT)format].silence;
+- if (! width)
++ if (!width || !pat)
+ return -EINVAL;
+ /* signed or 1 byte data */
+ if (pcm_formats[(INT)format].signd == 1 || width <= 8) {
+diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
+index d233cb3b41d8b..3b87f6cb5bb04 100644
+--- a/sound/core/pcm_native.c
++++ b/sound/core/pcm_native.c
+@@ -172,6 +172,19 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
+ }
+ EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave);
+
++unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream)
++{
++ unsigned long flags = 0;
++ if (substream->pcm->nonatomic)
++ mutex_lock_nested(&substream->self_group.mutex,
++ SINGLE_DEPTH_NESTING);
++ else
++ spin_lock_irqsave_nested(&substream->self_group.lock, flags,
++ SINGLE_DEPTH_NESTING);
++ return flags;
++}
++EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave_nested);
++
+ /**
+ * snd_pcm_stream_unlock_irqrestore - Unlock the PCM stream
+ * @substream: PCM substream
+@@ -672,6 +685,30 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm,
+ return 0;
+ }
+
++/* acquire buffer_mutex; if it's in r/w operation, return -EBUSY, otherwise
++ * block the further r/w operations
++ */
++static int snd_pcm_buffer_access_lock(struct snd_pcm_runtime *runtime)
++{
++ if (!atomic_dec_unless_positive(&runtime->buffer_accessing))
++ return -EBUSY;
++ mutex_lock(&runtime->buffer_mutex);
++ return 0; /* keep buffer_mutex, unlocked by below */
++}
++
++/* release buffer_mutex and clear r/w access flag */
++static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime)
++{
++ mutex_unlock(&runtime->buffer_mutex);
++ atomic_inc(&runtime->buffer_accessing);
++}
++
++#if IS_ENABLED(CONFIG_SND_PCM_OSS)
++#define is_oss_stream(substream) ((substream)->oss.oss)
++#else
++#define is_oss_stream(substream) false
++#endif
++
+ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params)
+ {
+@@ -683,22 +720,25 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
+ if (PCM_RUNTIME_CHECK(substream))
+ return -ENXIO;
+ runtime = substream->runtime;
++ err = snd_pcm_buffer_access_lock(runtime);
++ if (err < 0)
++ return err;
+ snd_pcm_stream_lock_irq(substream);
+ switch (runtime->status->state) {
+ case SNDRV_PCM_STATE_OPEN:
+ case SNDRV_PCM_STATE_SETUP:
+ case SNDRV_PCM_STATE_PREPARED:
++ if (!is_oss_stream(substream) &&
++ atomic_read(&substream->mmap_count))
++ err = -EBADFD;
+ break;
+ default:
+- snd_pcm_stream_unlock_irq(substream);
+- return -EBADFD;
++ err = -EBADFD;
++ break;
+ }
+ snd_pcm_stream_unlock_irq(substream);
+-#if IS_ENABLED(CONFIG_SND_PCM_OSS)
+- if (!substream->oss.oss)
+-#endif
+- if (atomic_read(&substream->mmap_count))
+- return -EBADFD;
++ if (err)
++ goto unlock;
+
+ snd_pcm_sync_stop(substream, true);
+
+@@ -786,16 +826,21 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
+ if (usecs >= 0)
+ cpu_latency_qos_add_request(&substream->latency_pm_qos_req,
+ usecs);
+- return 0;
++ err = 0;
+ _error:
+- /* hardware might be unusable from this time,
+- so we force application to retry to set
+- the correct hardware parameter settings */
+- snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN);
+- if (substream->ops->hw_free != NULL)
+- substream->ops->hw_free(substream);
+- if (substream->managed_buffer_alloc)
+- snd_pcm_lib_free_pages(substream);
++ if (err) {
++ /* hardware might be unusable from this time,
++ * so we force application to retry to set
++ * the correct hardware parameter settings
++ */
++ snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN);
++ if (substream->ops->hw_free != NULL)
++ substream->ops->hw_free(substream);
++ if (substream->managed_buffer_alloc)
++ snd_pcm_lib_free_pages(substream);
++ }
++ unlock:
++ snd_pcm_buffer_access_unlock(runtime);
+ return err;
+ }
+
+@@ -835,26 +880,33 @@ static int do_hw_free(struct snd_pcm_substream *substream)
+ static int snd_pcm_hw_free(struct snd_pcm_substream *substream)
+ {
+ struct snd_pcm_runtime *runtime;
+- int result;
++ int result = 0;
+
+ if (PCM_RUNTIME_CHECK(substream))
+ return -ENXIO;
+ runtime = substream->runtime;
++ result = snd_pcm_buffer_access_lock(runtime);
++ if (result < 0)
++ return result;
+ snd_pcm_stream_lock_irq(substream);
+ switch (runtime->status->state) {
+ case SNDRV_PCM_STATE_SETUP:
+ case SNDRV_PCM_STATE_PREPARED:
++ if (atomic_read(&substream->mmap_count))
++ result = -EBADFD;
+ break;
+ default:
+- snd_pcm_stream_unlock_irq(substream);
+- return -EBADFD;
++ result = -EBADFD;
++ break;
+ }
+ snd_pcm_stream_unlock_irq(substream);
+- if (atomic_read(&substream->mmap_count))
+- return -EBADFD;
++ if (result)
++ goto unlock;
+ result = do_hw_free(substream);
+ snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN);
+ cpu_latency_qos_remove_request(&substream->latency_pm_qos_req);
++ unlock:
++ snd_pcm_buffer_access_unlock(runtime);
+ return result;
+ }
+
+@@ -1160,15 +1212,17 @@ struct action_ops {
+ static int snd_pcm_action_group(const struct action_ops *ops,
+ struct snd_pcm_substream *substream,
+ snd_pcm_state_t state,
+- bool do_lock)
++ bool stream_lock)
+ {
+ struct snd_pcm_substream *s = NULL;
+ struct snd_pcm_substream *s1;
+ int res = 0, depth = 1;
+
+ snd_pcm_group_for_each_entry(s, substream) {
+- if (do_lock && s != substream) {
+- if (s->pcm->nonatomic)
++ if (s != substream) {
++ if (!stream_lock)
++ mutex_lock_nested(&s->runtime->buffer_mutex, depth);
++ else if (s->pcm->nonatomic)
+ mutex_lock_nested(&s->self_group.mutex, depth);
+ else
+ spin_lock_nested(&s->self_group.lock, depth);
+@@ -1196,18 +1250,18 @@ static int snd_pcm_action_group(const struct action_ops *ops,
+ ops->post_action(s, state);
+ }
+ _unlock:
+- if (do_lock) {
+- /* unlock streams */
+- snd_pcm_group_for_each_entry(s1, substream) {
+- if (s1 != substream) {
+- if (s1->pcm->nonatomic)
+- mutex_unlock(&s1->self_group.mutex);
+- else
+- spin_unlock(&s1->self_group.lock);
+- }
+- if (s1 == s) /* end */
+- break;
++ /* unlock streams */
++ snd_pcm_group_for_each_entry(s1, substream) {
++ if (s1 != substream) {
++ if (!stream_lock)
++ mutex_unlock(&s1->runtime->buffer_mutex);
++ else if (s1->pcm->nonatomic)
++ mutex_unlock(&s1->self_group.mutex);
++ else
++ spin_unlock(&s1->self_group.lock);
+ }
++ if (s1 == s) /* end */
++ break;
+ }
+ return res;
+ }
+@@ -1337,10 +1391,15 @@ static int snd_pcm_action_nonatomic(const struct action_ops *ops,
+
+ /* Guarantee the group members won't change during non-atomic action */
+ down_read(&snd_pcm_link_rwsem);
++ res = snd_pcm_buffer_access_lock(substream->runtime);
++ if (res < 0)
++ goto unlock;
+ if (snd_pcm_stream_linked(substream))
+ res = snd_pcm_action_group(ops, substream, state, false);
+ else
+ res = snd_pcm_action_single(ops, substream, state);
++ snd_pcm_buffer_access_unlock(substream->runtime);
++ unlock:
+ up_read(&snd_pcm_link_rwsem);
+ return res;
+ }
+@@ -1373,8 +1432,10 @@ static int snd_pcm_do_start(struct snd_pcm_substream *substream,
+ static void snd_pcm_undo_start(struct snd_pcm_substream *substream,
+ snd_pcm_state_t state)
+ {
+- if (substream->runtime->trigger_master == substream)
++ if (substream->runtime->trigger_master == substream) {
+ substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP);
++ substream->runtime->stop_operating = true;
++ }
+ }
+
+ static void snd_pcm_post_start(struct snd_pcm_substream *substream,
+@@ -1830,11 +1891,13 @@ static int snd_pcm_do_reset(struct snd_pcm_substream *substream,
+ int err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL);
+ if (err < 0)
+ return err;
++ snd_pcm_stream_lock_irq(substream);
+ runtime->hw_ptr_base = 0;
+ runtime->hw_ptr_interrupt = runtime->status->hw_ptr -
+ runtime->status->hw_ptr % runtime->period_size;
+ runtime->silence_start = runtime->status->hw_ptr;
+ runtime->silence_filled = 0;
++ snd_pcm_stream_unlock_irq(substream);
+ return 0;
+ }
+
+@@ -1842,10 +1905,12 @@ static void snd_pcm_post_reset(struct snd_pcm_substream *substream,
+ snd_pcm_state_t state)
+ {
+ struct snd_pcm_runtime *runtime = substream->runtime;
++ snd_pcm_stream_lock_irq(substream);
+ runtime->control->appl_ptr = runtime->status->hw_ptr;
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
+ runtime->silence_size > 0)
+ snd_pcm_playback_silence(substream, ULONG_MAX);
++ snd_pcm_stream_unlock_irq(substream);
+ }
+
+ static const struct action_ops snd_pcm_action_reset = {
+diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
+index 6f30231bdb884..b1632ab432cf7 100644
+--- a/sound/core/rawmidi.c
++++ b/sound/core/rawmidi.c
+@@ -447,6 +447,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
+ err = -ENOMEM;
+ goto __error;
+ }
++ rawmidi_file->user_pversion = 0;
+ init_waitqueue_entry(&wait, current);
+ add_wait_queue(&rmidi->open_wait, &wait);
+ while (1) {
+@@ -1834,10 +1835,8 @@ static int snd_rawmidi_free(struct snd_rawmidi *rmidi)
+
+ snd_info_free_entry(rmidi->proc_entry);
+ rmidi->proc_entry = NULL;
+- mutex_lock(&register_mutex);
+ if (rmidi->ops && rmidi->ops->dev_unregister)
+ rmidi->ops->dev_unregister(rmidi);
+- mutex_unlock(&register_mutex);
+
+ snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]);
+ snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]);
+diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c
+index 1e3bf086f8671..f2940b29595f0 100644
+--- a/sound/core/seq/oss/seq_oss_midi.c
++++ b/sound/core/seq/oss/seq_oss_midi.c
+@@ -37,6 +37,7 @@ struct seq_oss_midi {
+ struct snd_midi_event *coder; /* MIDI event coder */
+ struct seq_oss_devinfo *devinfo; /* assigned OSSseq device */
+ snd_use_lock_t use_lock;
++ struct mutex open_mutex;
+ };
+
+
+@@ -172,6 +173,7 @@ snd_seq_oss_midi_check_new_port(struct snd_seq_port_info *pinfo)
+ mdev->flags = pinfo->capability;
+ mdev->opened = 0;
+ snd_use_lock_init(&mdev->use_lock);
++ mutex_init(&mdev->open_mutex);
+
+ /* copy and truncate the name of synth device */
+ strscpy(mdev->name, pinfo->name, sizeof(mdev->name));
+@@ -270,7 +272,9 @@ snd_seq_oss_midi_clear_all(void)
+ void
+ snd_seq_oss_midi_setup(struct seq_oss_devinfo *dp)
+ {
++ spin_lock_irq(&register_lock);
+ dp->max_mididev = max_midi_devs;
++ spin_unlock_irq(&register_lock);
+ }
+
+ /*
+@@ -320,15 +324,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode)
+ int perm;
+ struct seq_oss_midi *mdev;
+ struct snd_seq_port_subscribe subs;
++ int err;
+
+ mdev = get_mididev(dp, dev);
+ if (!mdev)
+ return -ENODEV;
+
++ mutex_lock(&mdev->open_mutex);
+ /* already used? */
+ if (mdev->opened && mdev->devinfo != dp) {
+- snd_use_lock_free(&mdev->use_lock);
+- return -EBUSY;
++ err = -EBUSY;
++ goto unlock;
+ }
+
+ perm = 0;
+@@ -338,14 +344,14 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode)
+ perm |= PERM_READ;
+ perm &= mdev->flags;
+ if (perm == 0) {
+- snd_use_lock_free(&mdev->use_lock);
+- return -ENXIO;
++ err = -ENXIO;
++ goto unlock;
+ }
+
+ /* already opened? */
+ if ((mdev->opened & perm) == perm) {
+- snd_use_lock_free(&mdev->use_lock);
+- return 0;
++ err = 0;
++ goto unlock;
+ }
+
+ perm &= ~mdev->opened;
+@@ -370,13 +376,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode)
+ }
+
+ if (! mdev->opened) {
+- snd_use_lock_free(&mdev->use_lock);
+- return -ENXIO;
++ err = -ENXIO;
++ goto unlock;
+ }
+
+ mdev->devinfo = dp;
++ err = 0;
++
++ unlock:
++ mutex_unlock(&mdev->open_mutex);
+ snd_use_lock_free(&mdev->use_lock);
+- return 0;
++ return err;
+ }
+
+ /*
+@@ -391,10 +401,9 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev)
+ mdev = get_mididev(dp, dev);
+ if (!mdev)
+ return -ENODEV;
+- if (! mdev->opened || mdev->devinfo != dp) {
+- snd_use_lock_free(&mdev->use_lock);
+- return 0;
+- }
++ mutex_lock(&mdev->open_mutex);
++ if (!mdev->opened || mdev->devinfo != dp)
++ goto unlock;
+
+ memset(&subs, 0, sizeof(subs));
+ if (mdev->opened & PERM_WRITE) {
+@@ -413,6 +422,8 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev)
+ mdev->opened = 0;
+ mdev->devinfo = NULL;
+
++ unlock:
++ mutex_unlock(&mdev->open_mutex);
+ snd_use_lock_free(&mdev->use_lock);
+ return 0;
+ }
+diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
+index 2e9d695d336c9..2d707afa1ef1c 100644
+--- a/sound/core/seq/seq_clientmgr.c
++++ b/sound/core/seq/seq_clientmgr.c
+@@ -121,13 +121,13 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid)
+ spin_unlock_irqrestore(&clients_lock, flags);
+ #ifdef CONFIG_MODULES
+ if (!in_interrupt()) {
+- static char client_requested[SNDRV_SEQ_GLOBAL_CLIENTS];
+- static char card_requested[SNDRV_CARDS];
++ static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS);
++ static DECLARE_BITMAP(card_requested, SNDRV_CARDS);
++
+ if (clientid < SNDRV_SEQ_GLOBAL_CLIENTS) {
+ int idx;
+
+- if (!client_requested[clientid]) {
+- client_requested[clientid] = 1;
++ if (!test_and_set_bit(clientid, client_requested)) {
+ for (idx = 0; idx < 15; idx++) {
+ if (seq_client_load[idx] < 0)
+ break;
+@@ -142,10 +142,8 @@ struct snd_seq_client *snd_seq_client_use_ptr(int clientid)
+ int card = (clientid - SNDRV_SEQ_GLOBAL_CLIENTS) /
+ SNDRV_SEQ_CLIENTS_PER_CARD;
+ if (card < snd_ecards_limit) {
+- if (! card_requested[card]) {
+- card_requested[card] = 1;
++ if (!test_and_set_bit(card, card_requested))
+ snd_request_card(card);
+- }
+ snd_seq_device_load_drivers();
+ }
+ }
+diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
+index b7aee23fc3876..47ef6bc30c0ee 100644
+--- a/sound/core/seq/seq_memory.c
++++ b/sound/core/seq/seq_memory.c
+@@ -113,15 +113,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event);
+ * expand the variable length event to linear buffer space.
+ */
+
+-static int seq_copy_in_kernel(char **bufptr, const void *src, int size)
++static int seq_copy_in_kernel(void *ptr, void *src, int size)
+ {
++ char **bufptr = ptr;
++
+ memcpy(*bufptr, src, size);
+ *bufptr += size;
+ return 0;
+ }
+
+-static int seq_copy_in_user(char __user **bufptr, const void *src, int size)
++static int seq_copy_in_user(void *ptr, void *src, int size)
+ {
++ char __user **bufptr = ptr;
++
+ if (copy_to_user(*bufptr, src, size))
+ return -EFAULT;
+ *bufptr += size;
+@@ -151,8 +155,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char
+ return newlen;
+ }
+ err = snd_seq_dump_var_event(event,
+- in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel :
+- (snd_seq_dump_func_t)seq_copy_in_user,
++ in_kernel ? seq_copy_in_kernel : seq_copy_in_user,
+ &buf);
+ return err < 0 ? err : newlen;
+ }
+diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
+index d6c02dea976c8..bc933104c3eea 100644
+--- a/sound/core/seq/seq_queue.c
++++ b/sound/core/seq/seq_queue.c
+@@ -235,12 +235,15 @@ struct snd_seq_queue *snd_seq_queue_find_name(char *name)
+
+ /* -------------------------------------------------------- */
+
++#define MAX_CELL_PROCESSES_IN_QUEUE 1000
++
+ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
+ {
+ unsigned long flags;
+ struct snd_seq_event_cell *cell;
+ snd_seq_tick_time_t cur_tick;
+ snd_seq_real_time_t cur_time;
++ int processed = 0;
+
+ if (q == NULL)
+ return;
+@@ -263,6 +266,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
+ if (!cell)
+ break;
+ snd_seq_dispatch_event(cell, atomic, hop);
++ if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE)
++ goto out; /* the rest processed at the next batch */
+ }
+
+ /* Process time queue... */
+@@ -272,14 +277,19 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
+ if (!cell)
+ break;
+ snd_seq_dispatch_event(cell, atomic, hop);
++ if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE)
++ goto out; /* the rest processed at the next batch */
+ }
+
++ out:
+ /* free lock */
+ spin_lock_irqsave(&q->check_lock, flags);
+ if (q->check_again) {
+ q->check_again = 0;
+- spin_unlock_irqrestore(&q->check_lock, flags);
+- goto __again;
++ if (processed < MAX_CELL_PROCESSES_IN_QUEUE) {
++ spin_unlock_irqrestore(&q->check_lock, flags);
++ goto __again;
++ }
+ }
+ q->check_blocked = 0;
+ spin_unlock_irqrestore(&q->check_lock, flags);
+diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c
+index 7ed0a2a910352..2751bf2ff61bc 100644
+--- a/sound/core/sound_oss.c
++++ b/sound/core/sound_oss.c
+@@ -162,7 +162,6 @@ int snd_unregister_oss_device(int type, struct snd_card *card, int dev)
+ mutex_unlock(&sound_oss_mutex);
+ return -ENOENT;
+ }
+- unregister_sound_special(minor);
+ switch (SNDRV_MINOR_OSS_DEVICE(minor)) {
+ case SNDRV_MINOR_OSS_PCM:
+ track2 = SNDRV_MINOR_OSS(cidx, SNDRV_MINOR_OSS_AUDIO);
+@@ -174,12 +173,18 @@ int snd_unregister_oss_device(int type, struct snd_card *card, int dev)
+ track2 = SNDRV_MINOR_OSS(cidx, SNDRV_MINOR_OSS_DMMIDI1);
+ break;
+ }
+- if (track2 >= 0) {
+- unregister_sound_special(track2);
++ if (track2 >= 0)
+ snd_oss_minors[track2] = NULL;
+- }
+ snd_oss_minors[minor] = NULL;
+ mutex_unlock(&sound_oss_mutex);
++
++ /* call unregister_sound_special() outside sound_oss_mutex;
++ * otherwise may deadlock, as it can trigger the release of a card
++ */
++ unregister_sound_special(minor);
++ if (track2 >= 0)
++ unregister_sound_special(track2);
++
+ kfree(mptr);
+ return 0;
+ }
+diff --git a/sound/core/timer.c b/sound/core/timer.c
+index 92b7008fcdb86..e08a37c23add8 100644
+--- a/sound/core/timer.c
++++ b/sound/core/timer.c
+@@ -83,7 +83,7 @@ struct snd_timer_user {
+ unsigned int filter;
+ struct timespec64 tstamp; /* trigger tstamp */
+ wait_queue_head_t qchange_sleep;
+- struct fasync_struct *fasync;
++ struct snd_fasync *fasync;
+ struct mutex ioctl_lock;
+ };
+
+@@ -624,13 +624,13 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
+ if (!timer)
+ return -EINVAL;
+ spin_lock_irqsave(&timer->lock, flags);
++ list_del_init(&timeri->ack_list);
++ list_del_init(&timeri->active_list);
+ if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
+ SNDRV_TIMER_IFLG_START))) {
+ result = -EBUSY;
+ goto unlock;
+ }
+- list_del_init(&timeri->ack_list);
+- list_del_init(&timeri->active_list);
+ if (timer->card && timer->card->shutdown)
+ goto unlock;
+ if (stop) {
+@@ -665,23 +665,22 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
+ static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop)
+ {
+ unsigned long flags;
++ bool running;
+
+ spin_lock_irqsave(&slave_active_lock, flags);
+- if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) {
+- spin_unlock_irqrestore(&slave_active_lock, flags);
+- return -EBUSY;
+- }
++ running = timeri->flags & SNDRV_TIMER_IFLG_RUNNING;
+ timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
+ if (timeri->timer) {
+ spin_lock(&timeri->timer->lock);
+ list_del_init(&timeri->ack_list);
+ list_del_init(&timeri->active_list);
+- snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
+- SNDRV_TIMER_EVENT_PAUSE);
++ if (running)
++ snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
++ SNDRV_TIMER_EVENT_PAUSE);
+ spin_unlock(&timeri->timer->lock);
+ }
+ spin_unlock_irqrestore(&slave_active_lock, flags);
+- return 0;
++ return running ? 0 : -EBUSY;
+ }
+
+ /*
+@@ -1346,7 +1345,7 @@ static void snd_timer_user_interrupt(struct snd_timer_instance *timeri,
+ }
+ __wake:
+ spin_unlock(&tu->qlock);
+- kill_fasync(&tu->fasync, SIGIO, POLL_IN);
++ snd_kill_fasync(tu->fasync, SIGIO, POLL_IN);
+ wake_up(&tu->qchange_sleep);
+ }
+
+@@ -1384,7 +1383,7 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri,
+ spin_lock_irqsave(&tu->qlock, flags);
+ snd_timer_user_append_to_tqueue(tu, &r1);
+ spin_unlock_irqrestore(&tu->qlock, flags);
+- kill_fasync(&tu->fasync, SIGIO, POLL_IN);
++ snd_kill_fasync(tu->fasync, SIGIO, POLL_IN);
+ wake_up(&tu->qchange_sleep);
+ }
+
+@@ -1454,7 +1453,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
+ spin_unlock(&tu->qlock);
+ if (append == 0)
+ return;
+- kill_fasync(&tu->fasync, SIGIO, POLL_IN);
++ snd_kill_fasync(tu->fasync, SIGIO, POLL_IN);
+ wake_up(&tu->qchange_sleep);
+ }
+
+@@ -1522,6 +1521,7 @@ static int snd_timer_user_release(struct inode *inode, struct file *file)
+ snd_timer_instance_free(tu->timeri);
+ }
+ mutex_unlock(&tu->ioctl_lock);
++ snd_fasync_free(tu->fasync);
+ kfree(tu->queue);
+ kfree(tu->tqueue);
+ kfree(tu);
+@@ -2136,7 +2136,7 @@ static int snd_timer_user_fasync(int fd, struct file * file, int on)
+ struct snd_timer_user *tu;
+
+ tu = file->private_data;
+- return fasync_helper(fd, file, on, &tu->fasync);
++ return snd_fasync_helper(fd, file, on, &tu->fasync);
+ }
+
+ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
+diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
+index 9b4a7cdb103ad..12f12a294df5a 100644
+--- a/sound/drivers/aloop.c
++++ b/sound/drivers/aloop.c
+@@ -605,17 +605,18 @@ static unsigned int loopback_jiffies_timer_pos_update
+ cable->streams[SNDRV_PCM_STREAM_PLAYBACK];
+ struct loopback_pcm *dpcm_capt =
+ cable->streams[SNDRV_PCM_STREAM_CAPTURE];
+- unsigned long delta_play = 0, delta_capt = 0;
++ unsigned long delta_play = 0, delta_capt = 0, cur_jiffies;
+ unsigned int running, count1, count2;
+
++ cur_jiffies = jiffies;
+ running = cable->running ^ cable->pause;
+ if (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) {
+- delta_play = jiffies - dpcm_play->last_jiffies;
++ delta_play = cur_jiffies - dpcm_play->last_jiffies;
+ dpcm_play->last_jiffies += delta_play;
+ }
+
+ if (running & (1 << SNDRV_PCM_STREAM_CAPTURE)) {
+- delta_capt = jiffies - dpcm_capt->last_jiffies;
++ delta_capt = cur_jiffies - dpcm_capt->last_jiffies;
+ dpcm_capt->last_jiffies += delta_capt;
+ }
+
+diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c
+index 11235baaf6fa5..f212f233ea618 100644
+--- a/sound/drivers/mtpav.c
++++ b/sound/drivers/mtpav.c
+@@ -693,8 +693,6 @@ static int snd_mtpav_probe(struct platform_device *dev)
+ mtp_card->outmidihwport = 0xffffffff;
+ timer_setup(&mtp_card->timer, snd_mtpav_output_timer, 0);
+
+- card->private_free = snd_mtpav_free;
+-
+ err = snd_mtpav_get_RAWMIDI(mtp_card);
+ if (err < 0)
+ return err;
+@@ -716,6 +714,8 @@ static int snd_mtpav_probe(struct platform_device *dev)
+ if (err < 0)
+ return err;
+
++ card->private_free = snd_mtpav_free;
++
+ platform_set_drvdata(dev, card);
+ printk(KERN_INFO "Motu MidiTimePiece on parallel port irq: %d ioport: 0x%lx\n", irq, port);
+ return 0;
+diff --git a/sound/drivers/mts64.c b/sound/drivers/mts64.c
+index d3bc9e8c407dc..f0d34cf70c3e0 100644
+--- a/sound/drivers/mts64.c
++++ b/sound/drivers/mts64.c
+@@ -815,6 +815,9 @@ static void snd_mts64_interrupt(void *private)
+ u8 status, data;
+ struct snd_rawmidi_substream *substream;
+
++ if (!mts)
++ return;
++
+ spin_lock(&mts->lock);
+ ret = mts64_read(mts->pardev->port);
+ data = ret & 0x00ff;
+diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c
+index e1b69c65c3c88..e2b7be67f0e30 100644
+--- a/sound/drivers/opl3/opl3_midi.c
++++ b/sound/drivers/opl3/opl3_midi.c
+@@ -397,7 +397,7 @@ void snd_opl3_note_on(void *p, int note, int vel, struct snd_midi_channel *chan)
+ }
+ if (instr_4op) {
+ vp2 = &opl3->voices[voice + 3];
+- if (vp->state > 0) {
++ if (vp2->state > 0) {
+ opl3_reg = reg_side | (OPL3_REG_KEYON_BLOCK +
+ voice_offset + 3);
+ reg_val = vp->keyon_reg & ~OPL3_KEYON_BIT;
+diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c
+index f99e00083141e..4c677c8546c71 100644
+--- a/sound/firewire/dice/dice-stream.c
++++ b/sound/firewire/dice/dice-stream.c
+@@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate,
+
+ static int select_clock(struct snd_dice *dice, unsigned int rate)
+ {
+- __be32 reg;
++ __be32 reg, new;
+ u32 data;
+ int i;
+ int err;
+@@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate)
+ if (completion_done(&dice->clock_accepted))
+ reinit_completion(&dice->clock_accepted);
+
+- reg = cpu_to_be32(data);
++ new = cpu_to_be32(data);
+ err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT,
+- &reg, sizeof(reg));
++ &new, sizeof(new));
+ if (err < 0)
+ return err;
+
+ if (wait_for_completion_timeout(&dice->clock_accepted,
+- msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0)
+- return -ETIMEDOUT;
++ msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) {
++ if (reg != new)
++ return -ETIMEDOUT;
++ }
+
+ return 0;
+ }
+diff --git a/sound/firewire/digi00x/digi00x-stream.c b/sound/firewire/digi00x/digi00x-stream.c
+index a15f55b0dce37..295163bb8abb6 100644
+--- a/sound/firewire/digi00x/digi00x-stream.c
++++ b/sound/firewire/digi00x/digi00x-stream.c
+@@ -259,8 +259,10 @@ int snd_dg00x_stream_init_duplex(struct snd_dg00x *dg00x)
+ return err;
+
+ err = init_stream(dg00x, &dg00x->tx_stream);
+- if (err < 0)
++ if (err < 0) {
+ destroy_stream(dg00x, &dg00x->rx_stream);
++ return err;
++ }
+
+ err = amdtp_domain_init(&dg00x->domain);
+ if (err < 0) {
+diff --git a/sound/firewire/fcp.c b/sound/firewire/fcp.c
+index bbfbebf4affbc..df44dd5dc4b22 100644
+--- a/sound/firewire/fcp.c
++++ b/sound/firewire/fcp.c
+@@ -240,9 +240,7 @@ int fcp_avc_transaction(struct fw_unit *unit,
+ t.response_match_bytes = response_match_bytes;
+ t.state = STATE_PENDING;
+ init_waitqueue_head(&t.wait);
+-
+- if (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03)
+- t.deferrable = true;
++ t.deferrable = (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03);
+
+ spin_lock_irq(&transactions_lock);
+ list_add_tail(&t.list, &transactions);
+diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c
+index 626c0c34b0b66..3a53914277d35 100644
+--- a/sound/firewire/fireworks/fireworks_hwdep.c
++++ b/sound/firewire/fireworks/fireworks_hwdep.c
+@@ -34,6 +34,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
+ type = SNDRV_FIREWIRE_EVENT_EFW_RESPONSE;
+ if (copy_to_user(buf, &type, sizeof(type)))
+ return -EFAULT;
++ count += sizeof(type);
+ remained -= sizeof(type);
+ buf += sizeof(type);
+
+diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c
+index fff18b5d4e052..f4a702def3979 100644
+--- a/sound/firewire/oxfw/oxfw-stream.c
++++ b/sound/firewire/oxfw/oxfw-stream.c
+@@ -9,7 +9,7 @@
+ #include <linux/delay.h>
+
+ #define AVC_GENERIC_FRAME_MAXIMUM_BYTES 512
+-#define READY_TIMEOUT_MS 200
++#define READY_TIMEOUT_MS 600
+
+ /*
+ * According to datasheet of Oxford Semiconductor:
+@@ -367,6 +367,11 @@ int snd_oxfw_stream_start_duplex(struct snd_oxfw *oxfw)
+ // Just after changing sampling transfer frequency, many cycles are
+ // skipped for packet transmission.
+ tx_init_skip_cycles = 400;
++ } else if (oxfw->quirks & SND_OXFW_QUIRK_VOLUNTARY_RECOVERY) {
++ // It takes a bit time for target device to adjust event frequency
++ // according to nominal event frequency in isochronous packets from
++ // ALSA oxfw driver.
++ tx_init_skip_cycles = 4000;
+ } else {
+ replay_seq = true;
+ }
+diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
+index daf731364695b..b496f87841aec 100644
+--- a/sound/firewire/oxfw/oxfw.c
++++ b/sound/firewire/oxfw/oxfw.c
+@@ -25,6 +25,7 @@
+ #define MODEL_SATELLITE 0x00200f
+ #define MODEL_SCS1M 0x001000
+ #define MODEL_DUET_FW 0x01dddd
++#define MODEL_ONYX_1640I 0x001640
+
+ #define SPECIFIER_1394TA 0x00a02d
+ #define VERSION_AVC 0x010001
+@@ -192,6 +193,13 @@ static int detect_quirks(struct snd_oxfw *oxfw, const struct ieee1394_device_id
+ // OXFW971-based models may transfer events by blocking method.
+ if (!(oxfw->quirks & SND_OXFW_QUIRK_JUMBO_PAYLOAD))
+ oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION;
++
++ if (model == MODEL_ONYX_1640I) {
++ //Unless receiving packets without NOINFO packet, the device transfers
++ //mostly half of events in packets than expected.
++ oxfw->quirks |= SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET |
++ SND_OXFW_QUIRK_VOLUNTARY_RECOVERY;
++ }
+ }
+
+ return 0;
+diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h
+index c13034f6c2ca5..d728e451a25c6 100644
+--- a/sound/firewire/oxfw/oxfw.h
++++ b/sound/firewire/oxfw/oxfw.h
+@@ -47,6 +47,11 @@ enum snd_oxfw_quirk {
+ // the device to process audio data even if the value is invalid in a point of
+ // IEC 61883-1/6.
+ SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET = 0x10,
++ // Loud Technologies Mackie Onyx 1640i seems to configure OXFW971 ASIC so that it decides
++ // event frequency according to events in received isochronous packets. The device looks to
++ // performs media clock recovery voluntarily. In the recovery, the packets with NO_INFO
++ // are ignored, thus driver should transfer packets with timestamp.
++ SND_OXFW_QUIRK_VOLUNTARY_RECOVERY = 0x20,
+ };
+
+ /* This is an arbitrary number for convinience. */
+diff --git a/sound/firewire/tascam/tascam-stream.c b/sound/firewire/tascam/tascam-stream.c
+index 53e094cc411f8..dfe783d01d7d2 100644
+--- a/sound/firewire/tascam/tascam-stream.c
++++ b/sound/firewire/tascam/tascam-stream.c
+@@ -490,7 +490,7 @@ int snd_tscm_stream_start_duplex(struct snd_tscm *tscm, unsigned int rate)
+ // packet is important for media clock recovery.
+ err = amdtp_domain_start(&tscm->domain, tx_init_skip_cycles, true, true);
+ if (err < 0)
+- return err;
++ goto error;
+
+ if (!amdtp_domain_wait_ready(&tscm->domain, READY_TIMEOUT_MS)) {
+ err = -ETIMEDOUT;
+diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c
+index 0c005d67fa891..c09652da43ffd 100644
+--- a/sound/hda/ext/hdac_ext_stream.c
++++ b/sound/hda/ext/hdac_ext_stream.c
+@@ -106,20 +106,14 @@ void snd_hdac_stream_free_all(struct hdac_bus *bus)
+ }
+ EXPORT_SYMBOL_GPL(snd_hdac_stream_free_all);
+
+-/**
+- * snd_hdac_ext_stream_decouple - decouple the hdac stream
+- * @bus: HD-audio core bus
+- * @stream: HD-audio ext core stream object to initialize
+- * @decouple: flag to decouple
+- */
+-void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
+- struct hdac_ext_stream *stream, bool decouple)
++void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus,
++ struct hdac_ext_stream *stream,
++ bool decouple)
+ {
+ struct hdac_stream *hstream = &stream->hstream;
+ u32 val;
+ int mask = AZX_PPCTL_PROCEN(hstream->index);
+
+- spin_lock_irq(&bus->reg_lock);
+ val = readw(bus->ppcap + AZX_REG_PP_PPCTL) & mask;
+
+ if (decouple && !val)
+@@ -128,6 +122,20 @@ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
+ snd_hdac_updatel(bus->ppcap, AZX_REG_PP_PPCTL, mask, 0);
+
+ stream->decoupled = decouple;
++}
++EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple_locked);
++
++/**
++ * snd_hdac_ext_stream_decouple - decouple the hdac stream
++ * @bus: HD-audio core bus
++ * @stream: HD-audio ext core stream object to initialize
++ * @decouple: flag to decouple
++ */
++void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
++ struct hdac_ext_stream *stream, bool decouple)
++{
++ spin_lock_irq(&bus->reg_lock);
++ snd_hdac_ext_stream_decouple_locked(bus, stream, decouple);
+ spin_unlock_irq(&bus->reg_lock);
+ }
+ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple);
+@@ -252,6 +260,7 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus,
+ return NULL;
+ }
+
++ spin_lock_irq(&bus->reg_lock);
+ list_for_each_entry(stream, &bus->stream_list, list) {
+ struct hdac_ext_stream *hstream = container_of(stream,
+ struct hdac_ext_stream,
+@@ -266,17 +275,16 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus,
+ }
+
+ if (!hstream->link_locked) {
+- snd_hdac_ext_stream_decouple(bus, hstream, true);
++ snd_hdac_ext_stream_decouple_locked(bus, hstream, true);
+ res = hstream;
+ break;
+ }
+ }
+ if (res) {
+- spin_lock_irq(&bus->reg_lock);
+ res->link_locked = 1;
+ res->link_substream = substream;
+- spin_unlock_irq(&bus->reg_lock);
+ }
++ spin_unlock_irq(&bus->reg_lock);
+ return res;
+ }
+
+@@ -292,6 +300,7 @@ hdac_ext_host_stream_assign(struct hdac_bus *bus,
+ return NULL;
+ }
+
++ spin_lock_irq(&bus->reg_lock);
+ list_for_each_entry(stream, &bus->stream_list, list) {
+ struct hdac_ext_stream *hstream = container_of(stream,
+ struct hdac_ext_stream,
+@@ -301,18 +310,17 @@ hdac_ext_host_stream_assign(struct hdac_bus *bus,
+
+ if (!stream->opened) {
+ if (!hstream->decoupled)
+- snd_hdac_ext_stream_decouple(bus, hstream, true);
++ snd_hdac_ext_stream_decouple_locked(bus, hstream, true);
+ res = hstream;
+ break;
+ }
+ }
+ if (res) {
+- spin_lock_irq(&bus->reg_lock);
+ res->hstream.opened = 1;
+ res->hstream.running = 0;
+ res->hstream.substream = substream;
+- spin_unlock_irq(&bus->reg_lock);
+ }
++ spin_unlock_irq(&bus->reg_lock);
+
+ return res;
+ }
+@@ -378,15 +386,17 @@ void snd_hdac_ext_stream_release(struct hdac_ext_stream *stream, int type)
+ break;
+
+ case HDAC_EXT_STREAM_TYPE_HOST:
++ spin_lock_irq(&bus->reg_lock);
+ if (stream->decoupled && !stream->link_locked)
+- snd_hdac_ext_stream_decouple(bus, stream, false);
++ snd_hdac_ext_stream_decouple_locked(bus, stream, false);
++ spin_unlock_irq(&bus->reg_lock);
+ snd_hdac_stream_release(&stream->hstream);
+ break;
+
+ case HDAC_EXT_STREAM_TYPE_LINK:
+- if (stream->decoupled && !stream->hstream.opened)
+- snd_hdac_ext_stream_decouple(bus, stream, false);
+ spin_lock_irq(&bus->reg_lock);
++ if (stream->decoupled && !stream->hstream.opened)
++ snd_hdac_ext_stream_decouple_locked(bus, stream, false);
+ stream->link_locked = 0;
+ stream->link_substream = NULL;
+ spin_unlock_irq(&bus->reg_lock);
+@@ -465,23 +475,6 @@ int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus,
+ }
+ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_get_spbmaxfifo);
+
+-
+-/**
+- * snd_hdac_ext_stop_streams - stop all stream if running
+- * @bus: HD-audio core bus
+- */
+-void snd_hdac_ext_stop_streams(struct hdac_bus *bus)
+-{
+- struct hdac_stream *stream;
+-
+- if (bus->chip_init) {
+- list_for_each_entry(stream, &bus->stream_list, list)
+- snd_hdac_stream_stop(stream);
+- snd_hdac_bus_stop_chip(bus);
+- }
+-}
+-EXPORT_SYMBOL_GPL(snd_hdac_ext_stop_streams);
+-
+ /**
+ * snd_hdac_ext_stream_drsm_enable - enable DMA resume for a stream
+ * @bus: HD-audio core bus
+diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
+index 3e9e9ac804f62..bfd8585776767 100644
+--- a/sound/hda/hdac_device.c
++++ b/sound/hda/hdac_device.c
+@@ -611,7 +611,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_power_up_pm);
+ int snd_hdac_keep_power_up(struct hdac_device *codec)
+ {
+ if (!atomic_inc_not_zero(&codec->in_pm)) {
+- int ret = pm_runtime_get_if_in_use(&codec->dev);
++ int ret = pm_runtime_get_if_active(&codec->dev, true);
+ if (!ret)
+ return -1;
+ if (ret < 0)
+@@ -660,6 +660,7 @@ static const struct hda_vendor_id hda_vendor_ids[] = {
+ { 0x14f1, "Conexant" },
+ { 0x17e8, "Chrontel" },
+ { 0x1854, "LG" },
++ { 0x19e5, "Huawei" },
+ { 0x1aec, "Wolfson Microelectronics" },
+ { 0x1af4, "QEMU" },
+ { 0x434d, "C-Media" },
+diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
+index fe3587547cfec..39610a15bcc98 100644
+--- a/sound/hda/hdac_regmap.c
++++ b/sound/hda/hdac_regmap.c
+@@ -597,10 +597,9 @@ EXPORT_SYMBOL_GPL(snd_hdac_regmap_update_raw_once);
+ */
+ void snd_hdac_regmap_sync(struct hdac_device *codec)
+ {
+- if (codec->regmap) {
+- mutex_lock(&codec->regmap_lock);
++ mutex_lock(&codec->regmap_lock);
++ if (codec->regmap)
+ regcache_sync(codec->regmap);
+- mutex_unlock(&codec->regmap_lock);
+- }
++ mutex_unlock(&codec->regmap_lock);
+ }
+ EXPORT_SYMBOL_GPL(snd_hdac_regmap_sync);
+diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c
+index 1eb8563db2dff..eea22cf72aefd 100644
+--- a/sound/hda/hdac_stream.c
++++ b/sound/hda/hdac_stream.c
+@@ -142,6 +142,33 @@ void snd_hdac_stream_stop(struct hdac_stream *azx_dev)
+ }
+ EXPORT_SYMBOL_GPL(snd_hdac_stream_stop);
+
++/**
++ * snd_hdac_stop_streams - stop all streams
++ * @bus: HD-audio core bus
++ */
++void snd_hdac_stop_streams(struct hdac_bus *bus)
++{
++ struct hdac_stream *stream;
++
++ list_for_each_entry(stream, &bus->stream_list, list)
++ snd_hdac_stream_stop(stream);
++}
++EXPORT_SYMBOL_GPL(snd_hdac_stop_streams);
++
++/**
++ * snd_hdac_stop_streams_and_chip - stop all streams and chip if running
++ * @bus: HD-audio core bus
++ */
++void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus)
++{
++
++ if (bus->chip_init) {
++ snd_hdac_stop_streams(bus);
++ snd_hdac_bus_stop_chip(bus);
++ }
++}
++EXPORT_SYMBOL_GPL(snd_hdac_stop_streams_and_chip);
++
+ /**
+ * snd_hdac_stream_reset - reset a stream
+ * @azx_dev: HD-audio core stream to reset
+@@ -296,6 +323,7 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus,
+ int key = (substream->pcm->device << 16) | (substream->number << 2) |
+ (substream->stream + 1);
+
++ spin_lock_irq(&bus->reg_lock);
+ list_for_each_entry(azx_dev, &bus->stream_list, list) {
+ if (azx_dev->direction != substream->stream)
+ continue;
+@@ -309,13 +337,12 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus,
+ res = azx_dev;
+ }
+ if (res) {
+- spin_lock_irq(&bus->reg_lock);
+ res->opened = 1;
+ res->running = 0;
+ res->assigned_key = key;
+ res->substream = substream;
+- spin_unlock_irq(&bus->reg_lock);
+ }
++ spin_unlock_irq(&bus->reg_lock);
+ return res;
+ }
+ EXPORT_SYMBOL_GPL(snd_hdac_stream_assign);
+@@ -534,17 +561,11 @@ static void azx_timecounter_init(struct hdac_stream *azx_dev,
+ cc->mask = CLOCKSOURCE_MASK(32);
+
+ /*
+- * Converting from 24 MHz to ns means applying a 125/3 factor.
+- * To avoid any saturation issues in intermediate operations,
+- * the 125 factor is applied first. The division is applied
+- * last after reading the timecounter value.
+- * Applying the 1/3 factor as part of the multiplication
+- * requires at least 20 bits for a decent precision, however
+- * overflows occur after about 4 hours or less, not a option.
++ * Calculate the optimal mult/shift values. The counter wraps
++ * around after ~178.9 seconds.
+ */
+-
+- cc->mult = 125; /* saturation after 195 years */
+- cc->shift = 0;
++ clocks_calc_mult_shift(&cc->mult, &cc->shift, 24000000,
++ NSEC_PER_SEC, 178);
+
+ nsec = 0; /* audio time is elapsed time since trigger */
+ timecounter_init(tc, cc, nsec);
+diff --git a/sound/hda/hdac_sysfs.c b/sound/hda/hdac_sysfs.c
+index 0d7771fca9f06..6b8d156537490 100644
+--- a/sound/hda/hdac_sysfs.c
++++ b/sound/hda/hdac_sysfs.c
+@@ -346,8 +346,10 @@ static int add_widget_node(struct kobject *parent, hda_nid_t nid,
+ return -ENOMEM;
+ kobject_init(kobj, &widget_ktype);
+ err = kobject_add(kobj, parent, "%02x", nid);
+- if (err < 0)
++ if (err < 0) {
++ kobject_put(kobj);
+ return err;
++ }
+ err = sysfs_create_group(kobj, group);
+ if (err < 0) {
+ kobject_put(kobj);
+diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
+index c9d0ba353463b..513eadcc38d90 100644
+--- a/sound/hda/intel-dsp-config.c
++++ b/sound/hda/intel-dsp-config.c
+@@ -31,6 +31,7 @@ struct config_entry {
+ u16 device;
+ u8 acpi_hid[ACPI_ID_LEN];
+ const struct dmi_system_id *dmi_table;
++ u8 codec_hid[ACPI_ID_LEN];
+ };
+
+ /*
+@@ -56,7 +57,7 @@ static const struct config_entry config_table[] = {
+ /*
+ * Apollolake (Broxton-P)
+ * the legacy HDAudio driver is used except on Up Squared (SOF) and
+- * Chromebooks (SST)
++ * Chromebooks (SST), as well as devices based on the ES8336 codec
+ */
+ #if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
+ {
+@@ -73,6 +74,11 @@ static const struct config_entry config_table[] = {
+ {}
+ }
+ },
++ {
++ .flags = FLAG_SOF,
++ .device = 0x5a98,
++ .codec_hid = "ESSX8336",
++ },
+ #endif
+ #if IS_ENABLED(CONFIG_SND_SOC_INTEL_APL)
+ {
+@@ -137,7 +143,7 @@ static const struct config_entry config_table[] = {
+
+ /*
+ * Geminilake uses legacy HDAudio driver except for Google
+- * Chromebooks
++ * Chromebooks and devices based on the ES8336 codec
+ */
+ /* Geminilake */
+ #if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE)
+@@ -154,6 +160,11 @@ static const struct config_entry config_table[] = {
+ {}
+ }
+ },
++ {
++ .flags = FLAG_SOF,
++ .device = 0x3198,
++ .codec_hid = "ESSX8336",
++ },
+ #endif
+
+ /*
+@@ -241,6 +252,11 @@ static const struct config_entry config_table[] = {
+ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+ .device = 0x02c8,
+ },
++ {
++ .flags = FLAG_SOF,
++ .device = 0x02c8,
++ .codec_hid = "ESSX8336",
++ },
+ /* Cometlake-H */
+ {
+ .flags = FLAG_SOF,
+@@ -265,6 +281,11 @@ static const struct config_entry config_table[] = {
+ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+ .device = 0x06c8,
+ },
++ {
++ .flags = FLAG_SOF,
++ .device = 0x06c8,
++ .codec_hid = "ESSX8336",
++ },
+ #endif
+
+ /* Icelake */
+@@ -288,6 +309,15 @@ static const struct config_entry config_table[] = {
+ },
+ #endif
+
++/* JasperLake */
++#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
++ {
++ .flags = FLAG_SOF,
++ .device = 0x4dc8,
++ .codec_hid = "ESSX8336",
++ },
++#endif
++
+ /* Tigerlake */
+ #if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
+ {
+@@ -311,6 +341,11 @@ static const struct config_entry config_table[] = {
+ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+ .device = 0x43c8,
+ },
++ {
++ .flags = FLAG_SOF,
++ .device = 0xa0c8,
++ .codec_hid = "ESSX8336",
++ },
+ #endif
+
+ /* Elkhart Lake */
+@@ -341,6 +376,15 @@ static const struct config_entry config_table[] = {
+ },
+ #endif
+
++/* Meteor Lake */
++#if IS_ENABLED(CONFIG_SND_SOC_SOF_METEORLAKE)
++ /* Meteorlake-P */
++ {
++ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
++ .device = 0x7e28,
++ },
++#endif
++
+ };
+
+ static const struct config_entry *snd_intel_dsp_find_config
+@@ -354,6 +398,8 @@ static const struct config_entry *snd_intel_dsp_find_config
+ continue;
+ if (table->dmi_table && !dmi_check_system(table->dmi_table))
+ continue;
++ if (table->codec_hid[0] && !acpi_dev_present(table->codec_hid, NULL, -1))
++ continue;
+ return table;
+ }
+ return NULL;
+diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
+index e2237239d922a..8714891f50b0a 100644
+--- a/sound/hda/intel-nhlt.c
++++ b/sound/hda/intel-nhlt.c
+@@ -55,20 +55,26 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
+
+ /* find max number of channels based on format_configuration */
+ if (fmt_configs->fmt_count) {
+- dev_dbg(dev, "%s: found %d format definitions\n",
+- __func__, fmt_configs->fmt_count);
++ struct nhlt_fmt_cfg *fmt_cfg = fmt_configs->fmt_config;
++
++ dev_dbg(dev, "found %d format definitions\n",
++ fmt_configs->fmt_count);
+
+ for (i = 0; i < fmt_configs->fmt_count; i++) {
+ struct wav_fmt_ext *fmt_ext;
+
+- fmt_ext = &fmt_configs->fmt_config[i].fmt_ext;
++ fmt_ext = &fmt_cfg->fmt_ext;
+
+ if (fmt_ext->fmt.channels > max_ch)
+ max_ch = fmt_ext->fmt.channels;
++
++ /* Move to the next nhlt_fmt_cfg */
++ fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps +
++ fmt_cfg->config.size);
+ }
+- dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch);
++ dev_dbg(dev, "max channels found %d\n", max_ch);
+ } else {
+- dev_dbg(dev, "%s: No format information found\n", __func__);
++ dev_dbg(dev, "No format information found\n");
+ }
+
+ if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) {
+@@ -95,17 +101,16 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
+ }
+
+ if (dmic_geo > 0) {
+- dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo);
++ dev_dbg(dev, "Array with %d dmics\n", dmic_geo);
+ }
+ if (max_ch > dmic_geo) {
+- dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n",
+- __func__, max_ch, dmic_geo);
++ dev_dbg(dev, "max channels %d exceed dmic number %d\n",
++ max_ch, dmic_geo);
+ }
+ }
+ }
+
+- dev_dbg(dev, "%s: dmic number %d max_ch %d\n",
+- __func__, dmic_geo, max_ch);
++ dev_dbg(dev, "dmic number %d max_ch %d\n", dmic_geo, max_ch);
+
+ return dmic_geo;
+ }
+diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c
+index c0123bc31c0dd..b7758dbe23714 100644
+--- a/sound/hda/intel-sdw-acpi.c
++++ b/sound/hda/intel-sdw-acpi.c
+@@ -132,8 +132,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
+ return AE_NOT_FOUND;
+ }
+
+- info->handle = handle;
+-
+ /*
+ * On some Intel platforms, multiple children of the HDAS
+ * device can be found, but only one of them is the SoundWire
+@@ -144,6 +142,9 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
+ if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
+ return AE_OK; /* keep going */
+
++ /* found the correct SoundWire controller */
++ info->handle = handle;
++
+ /* device found, stop namespace walk */
+ return AE_CTRL_TERMINATE;
+ }
+@@ -164,8 +165,14 @@ int sdw_intel_acpi_scan(acpi_handle *parent_handle,
+ acpi_status status;
+
+ info->handle = NULL;
++ /*
++ * In the HDAS ACPI scope, 'SNDW' may be either the child of
++ * 'HDAS' or the grandchild of 'HDAS'. So let's go through
++ * the ACPI from 'HDAS' at max depth of 2 to find the 'SNDW'
++ * device.
++ */
+ status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
+- parent_handle, 1,
++ parent_handle, 2,
+ sdw_intel_acpi_cb,
+ NULL, info, NULL);
+ if (ACPI_FAILURE(status) || info->handle == NULL)
+diff --git a/sound/i2c/cs8427.c b/sound/i2c/cs8427.c
+index 65012af6a36e4..f58b14b490455 100644
+--- a/sound/i2c/cs8427.c
++++ b/sound/i2c/cs8427.c
+@@ -561,10 +561,13 @@ int snd_cs8427_iec958_active(struct snd_i2c_device *cs8427, int active)
+ if (snd_BUG_ON(!cs8427))
+ return -ENXIO;
+ chip = cs8427->private_data;
+- if (active)
++ if (active) {
+ memcpy(chip->playback.pcm_status,
+ chip->playback.def_status, 24);
+- chip->playback.pcm_ctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE;
++ chip->playback.pcm_ctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE;
++ } else {
++ chip->playback.pcm_ctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE;
++ }
+ snd_ctl_notify(cs8427->bus->card,
+ SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO,
+ &chip->playback.pcm_ctl->id);
+diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
+index 6ffa48dd59830..570b88e0b2018 100644
+--- a/sound/isa/Kconfig
++++ b/sound/isa/Kconfig
+@@ -22,7 +22,7 @@ config SND_SB16_DSP
+ menuconfig SND_ISA
+ bool "ISA sound devices"
+ depends on ISA || COMPILE_TEST
+- depends on ISA_DMA_API
++ depends on ISA_DMA_API && !M68K
+ default y
+ help
+ Support for sound devices connected via the ISA bus.
+diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c
+index b6bdebd9ef275..10112e1bb25dc 100644
+--- a/sound/isa/cs423x/cs4236.c
++++ b/sound/isa/cs423x/cs4236.c
+@@ -494,7 +494,7 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev,
+ static int dev;
+ int err;
+ struct snd_card *card;
+- struct pnp_dev *cdev;
++ struct pnp_dev *cdev, *iter;
+ char cid[PNP_ID_LEN];
+
+ if (pnp_device_is_isapnp(pdev))
+@@ -510,9 +510,11 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev,
+ strcpy(cid, pdev->id[0].id);
+ cid[5] = '1';
+ cdev = NULL;
+- list_for_each_entry(cdev, &(pdev->protocol->devices), protocol_list) {
+- if (!strcmp(cdev->id[0].id, cid))
++ list_for_each_entry(iter, &(pdev->protocol->devices), protocol_list) {
++ if (!strcmp(iter->id[0].id, cid)) {
++ cdev = iter;
+ break;
++ }
+ }
+ err = snd_cs423x_card_new(&pdev->dev, dev, &card);
+ if (err < 0)
+diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c
+index ea001c80149dd..3164eb8510fa4 100644
+--- a/sound/isa/galaxy/galaxy.c
++++ b/sound/isa/galaxy/galaxy.c
+@@ -478,7 +478,7 @@ static void snd_galaxy_free(struct snd_card *card)
+ galaxy_set_config(galaxy, galaxy->config);
+ }
+
+-static int snd_galaxy_probe(struct device *dev, unsigned int n)
++static int __snd_galaxy_probe(struct device *dev, unsigned int n)
+ {
+ struct snd_galaxy *galaxy;
+ struct snd_wss *chip;
+@@ -598,6 +598,11 @@ static int snd_galaxy_probe(struct device *dev, unsigned int n)
+ return 0;
+ }
+
++static int snd_galaxy_probe(struct device *dev, unsigned int n)
++{
++ return snd_card_free_on_error(dev, __snd_galaxy_probe(dev, n));
++}
++
+ static struct isa_driver snd_galaxy_driver = {
+ .match = snd_galaxy_match,
+ .probe = snd_galaxy_probe,
+diff --git a/sound/isa/gus/gus_dma.c b/sound/isa/gus/gus_dma.c
+index a1c770d826dda..6d664dd8dde0b 100644
+--- a/sound/isa/gus/gus_dma.c
++++ b/sound/isa/gus/gus_dma.c
+@@ -126,6 +126,8 @@ static void snd_gf1_dma_interrupt(struct snd_gus_card * gus)
+ }
+ block = snd_gf1_dma_next_block(gus);
+ spin_unlock(&gus->dma_lock);
++ if (!block)
++ return;
+ snd_gf1_dma_program(gus, block->addr, block->buf_addr, block->count, (unsigned short) block->cmd);
+ kfree(block);
+ #if 0
+diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
+index 26ab7ff807684..60398fced046b 100644
+--- a/sound/isa/sc6000.c
++++ b/sound/isa/sc6000.c
+@@ -537,7 +537,7 @@ static void snd_sc6000_free(struct snd_card *card)
+ sc6000_setup_board(vport, 0);
+ }
+
+-static int snd_sc6000_probe(struct device *devptr, unsigned int dev)
++static int __snd_sc6000_probe(struct device *devptr, unsigned int dev)
+ {
+ static const int possible_irqs[] = { 5, 7, 9, 10, 11, -1 };
+ static const int possible_dmas[] = { 1, 3, 0, -1 };
+@@ -662,6 +662,11 @@ static int snd_sc6000_probe(struct device *devptr, unsigned int dev)
+ return 0;
+ }
+
++static int snd_sc6000_probe(struct device *devptr, unsigned int dev)
++{
++ return snd_card_free_on_error(devptr, __snd_sc6000_probe(devptr, dev));
++}
++
+ static struct isa_driver snd_sc6000_driver = {
+ .match = snd_sc6000_match,
+ .probe = snd_sc6000_probe,
+diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
+index 69cbc79fbb716..2aaaa68071744 100644
+--- a/sound/isa/wavefront/wavefront_synth.c
++++ b/sound/isa/wavefront/wavefront_synth.c
+@@ -1094,7 +1094,8 @@ wavefront_send_sample (snd_wavefront_t *dev,
+
+ if (dataptr < data_end) {
+
+- __get_user (sample_short, dataptr);
++ if (get_user(sample_short, dataptr))
++ return -EFAULT;
+ dataptr += skip;
+
+ if (data_is_unsigned) { /* GUS ? */
+diff --git a/sound/oss/dmasound/dmasound.h b/sound/oss/dmasound/dmasound.h
+index c1c52b479da26..ad8ce6a1c25c7 100644
+--- a/sound/oss/dmasound/dmasound.h
++++ b/sound/oss/dmasound/dmasound.h
+@@ -88,11 +88,7 @@ static inline int ioctl_return(int __user *addr, int value)
+ */
+
+ extern int dmasound_init(void);
+-#ifdef MODULE
+ extern void dmasound_deinit(void);
+-#else
+-#define dmasound_deinit() do { } while (0)
+-#endif
+
+ /* description of the set-up applies to either hard or soft settings */
+
+@@ -114,9 +110,7 @@ typedef struct {
+ void *(*dma_alloc)(unsigned int, gfp_t);
+ void (*dma_free)(void *, unsigned int);
+ int (*irqinit)(void);
+-#ifdef MODULE
+ void (*irqcleanup)(void);
+-#endif
+ void (*init)(void);
+ void (*silence)(void);
+ int (*setFormat)(int);
+diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
+index 0c95828ac0b18..164335d3c2009 100644
+--- a/sound/oss/dmasound/dmasound_core.c
++++ b/sound/oss/dmasound/dmasound_core.c
+@@ -206,12 +206,10 @@ module_param(writeBufSize, int, 0);
+
+ MODULE_LICENSE("GPL");
+
+-#ifdef MODULE
+ static int sq_unit = -1;
+ static int mixer_unit = -1;
+ static int state_unit = -1;
+ static int irq_installed;
+-#endif /* MODULE */
+
+ /* control over who can modify resources shared between play/record */
+ static fmode_t shared_resource_owner;
+@@ -391,9 +389,6 @@ static const struct file_operations mixer_fops =
+
+ static void mixer_init(void)
+ {
+-#ifndef MODULE
+- int mixer_unit;
+-#endif
+ mixer_unit = register_sound_mixer(&mixer_fops, -1);
+ if (mixer_unit < 0)
+ return;
+@@ -1171,9 +1166,6 @@ static const struct file_operations sq_fops =
+ static int sq_init(void)
+ {
+ const struct file_operations *fops = &sq_fops;
+-#ifndef MODULE
+- int sq_unit;
+-#endif
+
+ sq_unit = register_sound_dsp(fops, -1);
+ if (sq_unit < 0) {
+@@ -1366,9 +1358,6 @@ static const struct file_operations state_fops = {
+
+ static int state_init(void)
+ {
+-#ifndef MODULE
+- int state_unit;
+-#endif
+ state_unit = register_sound_special(&state_fops, SND_DEV_STATUS);
+ if (state_unit < 0)
+ return state_unit ;
+@@ -1386,10 +1375,9 @@ static int state_init(void)
+ int dmasound_init(void)
+ {
+ int res ;
+-#ifdef MODULE
++
+ if (irq_installed)
+ return -EBUSY;
+-#endif
+
+ /* Set up sound queue, /dev/audio and /dev/dsp. */
+
+@@ -1408,9 +1396,7 @@ int dmasound_init(void)
+ printk(KERN_ERR "DMA sound driver: Interrupt initialization failed\n");
+ return -ENODEV;
+ }
+-#ifdef MODULE
+ irq_installed = 1;
+-#endif
+
+ printk(KERN_INFO "%s DMA sound driver rev %03d installed\n",
+ dmasound.mach.name, (DMASOUND_CORE_REVISION<<4) +
+@@ -1424,8 +1410,6 @@ int dmasound_init(void)
+ return 0;
+ }
+
+-#ifdef MODULE
+-
+ void dmasound_deinit(void)
+ {
+ if (irq_installed) {
+@@ -1444,9 +1428,7 @@ void dmasound_deinit(void)
+ unregister_sound_dsp(sq_unit);
+ }
+
+-#else /* !MODULE */
+-
+-static int dmasound_setup(char *str)
++static int __maybe_unused dmasound_setup(char *str)
+ {
+ int ints[6], size;
+
+@@ -1489,8 +1471,6 @@ static int dmasound_setup(char *str)
+
+ __setup("dmasound=", dmasound_setup);
+
+-#endif /* !MODULE */
+-
+ /*
+ * Conversion tables
+ */
+@@ -1577,9 +1557,7 @@ char dmasound_alaw2dma8[] = {
+
+ EXPORT_SYMBOL(dmasound);
+ EXPORT_SYMBOL(dmasound_init);
+-#ifdef MODULE
+ EXPORT_SYMBOL(dmasound_deinit);
+-#endif
+ EXPORT_SYMBOL(dmasound_write_sq);
+ EXPORT_SYMBOL(dmasound_catchRadius);
+ #ifdef HAS_8BIT_TABLES
+diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
+index 93bc9bef7641f..41ce125971777 100644
+--- a/sound/pci/Kconfig
++++ b/sound/pci/Kconfig
+@@ -279,6 +279,7 @@ config SND_CS46XX_NEW_DSP
+ config SND_CS5530
+ tristate "CS5530 Audio"
+ depends on ISA_DMA_API && (X86_32 || COMPILE_TEST)
++ depends on !M68K
+ select SND_SB16_DSP
+ help
+ Say Y here to include support for audio on Cyrix/NatSemi CS5530 chips.
+diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
+index 01f296d524ce6..b81b3c1f76499 100644
+--- a/sound/pci/ac97/ac97_codec.c
++++ b/sound/pci/ac97/ac97_codec.c
+@@ -938,8 +938,8 @@ static int snd_ac97_ad18xx_pcm_get_volume(struct snd_kcontrol *kcontrol, struct
+ int codec = kcontrol->private_value & 3;
+
+ mutex_lock(&ac97->page_mutex);
+- ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31);
+- ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31);
++ ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31);
++ ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31);
+ mutex_unlock(&ac97->page_mutex);
+ return 0;
+ }
+@@ -2009,6 +2009,7 @@ static int snd_ac97_dev_register(struct snd_device *device)
+ err = device_register(&ac97->dev);
+ if (err < 0) {
+ ac97_err(ac97, "Can't register ac97 bus\n");
++ put_device(&ac97->dev);
+ ac97->dev.bus = NULL;
+ return err;
+ }
+@@ -2069,10 +2070,9 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template,
+ .dev_disconnect = snd_ac97_dev_disconnect,
+ };
+
+- if (rac97)
+- *rac97 = NULL;
+- if (snd_BUG_ON(!bus || !template))
++ if (snd_BUG_ON(!bus || !template || !rac97))
+ return -EINVAL;
++ *rac97 = NULL;
+ if (snd_BUG_ON(template->num >= 4))
+ return -EINVAL;
+ if (bus->codec[template->num])
+diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c
+index bba4dae8dcc70..50e30704bf6f9 100644
+--- a/sound/pci/ad1889.c
++++ b/sound/pci/ad1889.c
+@@ -844,8 +844,8 @@ snd_ad1889_create(struct snd_card *card, struct pci_dev *pci)
+ }
+
+ static int
+-snd_ad1889_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++__snd_ad1889_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ int err;
+ static int devno;
+@@ -904,6 +904,12 @@ snd_ad1889_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_ad1889_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_ad1889_probe(pci, pci_id));
++}
++
+ static const struct pci_device_id snd_ad1889_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_ANALOG_DEVICES, PCI_DEVICE_ID_AD1889JS) },
+ { 0, },
+diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c
+index 92eb59db106de..2378a39abaebe 100644
+--- a/sound/pci/ali5451/ali5451.c
++++ b/sound/pci/ali5451/ali5451.c
+@@ -2124,8 +2124,8 @@ static int snd_ali_create(struct snd_card *card,
+ return 0;
+ }
+
+-static int snd_ali_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_ali_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct snd_ali *codec;
+@@ -2170,6 +2170,12 @@ static int snd_ali_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_ali_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_ali_probe(pci, pci_id));
++}
++
+ static struct pci_driver ali5451_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_ali_ids,
+diff --git a/sound/pci/als300.c b/sound/pci/als300.c
+index b86565dcdbe41..c70aff0601205 100644
+--- a/sound/pci/als300.c
++++ b/sound/pci/als300.c
+@@ -708,7 +708,7 @@ static int snd_als300_probe(struct pci_dev *pci,
+
+ err = snd_als300_create(card, pci, chip_type);
+ if (err < 0)
+- return err;
++ goto error;
+
+ strcpy(card->driver, "ALS300");
+ if (chip->chip_type == DEVICE_ALS300_PLUS)
+@@ -723,11 +723,15 @@ static int snd_als300_probe(struct pci_dev *pci,
+
+ err = snd_card_register(card);
+ if (err < 0)
+- return err;
++ goto error;
+
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ static struct pci_driver als300_driver = {
+diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c
+index 535eccd124bee..f33aeb692a112 100644
+--- a/sound/pci/als4000.c
++++ b/sound/pci/als4000.c
+@@ -806,8 +806,8 @@ static void snd_card_als4000_free( struct snd_card *card )
+ snd_als4000_free_gameport(acard);
+ }
+
+-static int snd_card_als4000_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_card_als4000_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -930,6 +930,12 @@ static int snd_card_als4000_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_card_als4000_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_card_als4000_probe(pci, pci_id));
++}
++
+ #ifdef CONFIG_PM_SLEEP
+ static int snd_als4000_suspend(struct device *dev)
+ {
+diff --git a/sound/pci/asihpi/hpi6205.c b/sound/pci/asihpi/hpi6205.c
+index 3d6914c64c4a8..4cdaeefeb6885 100644
+--- a/sound/pci/asihpi/hpi6205.c
++++ b/sound/pci/asihpi/hpi6205.c
+@@ -430,7 +430,7 @@ void HPI_6205(struct hpi_message *phm, struct hpi_response *phr)
+ pao = hpi_find_adapter(phm->adapter_index);
+ } else {
+ /* subsys messages don't address an adapter */
+- _HPI_6205(NULL, phm, phr);
++ phr->error = HPI_ERROR_INVALID_OBJ_INDEX;
+ return;
+ }
+
+diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
+index bb31b7fe867d6..477a5b4b50bcb 100644
+--- a/sound/pci/asihpi/hpioctl.c
++++ b/sound/pci/asihpi/hpioctl.c
+@@ -361,7 +361,7 @@ int asihpi_adapter_probe(struct pci_dev *pci_dev,
+ pci_dev->device, pci_dev->subsystem_vendor,
+ pci_dev->subsystem_device, pci_dev->devfn);
+
+- if (pci_enable_device(pci_dev) < 0) {
++ if (pcim_enable_device(pci_dev) < 0) {
+ dev_err(&pci_dev->dev,
+ "pci_enable_device failed, disabling device\n");
+ return -EIO;
+diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c
+index b8e035d5930d2..43d01f1847ed7 100644
+--- a/sound/pci/atiixp.c
++++ b/sound/pci/atiixp.c
+@@ -1572,8 +1572,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci)
+ }
+
+
+-static int snd_atiixp_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_atiixp_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct atiixp *chip;
+@@ -1623,6 +1623,12 @@ static int snd_atiixp_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_atiixp_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id));
++}
++
+ static struct pci_driver atiixp_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_atiixp_ids,
+diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c
+index 178dce8ef1e99..8864c4c3c7e13 100644
+--- a/sound/pci/atiixp_modem.c
++++ b/sound/pci/atiixp_modem.c
+@@ -1201,8 +1201,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci)
+ }
+
+
+-static int snd_atiixp_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_atiixp_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct atiixp_modem *chip;
+@@ -1247,6 +1247,12 @@ static int snd_atiixp_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_atiixp_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id));
++}
++
+ static struct pci_driver atiixp_modem_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_atiixp_ids,
+diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c
+index 342ef2a6655e3..eb234153691bc 100644
+--- a/sound/pci/au88x0/au88x0.c
++++ b/sound/pci/au88x0/au88x0.c
+@@ -193,7 +193,7 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci)
+
+ // constructor -- see "Constructor" sub-section
+ static int
+-snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++__snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -310,6 +310,12 @@ snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ return 0;
+ }
+
++static int
++snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_vortex_probe(pci, pci_id));
++}
++
+ // pci_driver definition
+ static struct pci_driver vortex_driver = {
+ .name = KBUILD_MODNAME,
+diff --git a/sound/pci/au88x0/au88x0.h b/sound/pci/au88x0/au88x0.h
+index 0aa7af049b1b9..6cbb2bc4a0483 100644
+--- a/sound/pci/au88x0/au88x0.h
++++ b/sound/pci/au88x0/au88x0.h
+@@ -141,7 +141,7 @@ struct snd_vortex {
+ #ifndef CHIP_AU8810
+ stream_t dma_wt[NR_WT];
+ wt_voice_t wt_voice[NR_WT]; /* WT register cache. */
+- char mixwt[(NR_WT / NR_WTPB) * 6]; /* WT mixin objects */
++ s8 mixwt[(NR_WT / NR_WTPB) * 6]; /* WT mixin objects */
+ #endif
+
+ /* Global resources */
+@@ -235,8 +235,8 @@ static int vortex_alsafmt_aspfmt(snd_pcm_format_t alsafmt, vortex_t *v);
+ static void vortex_connect_default(vortex_t * vortex, int en);
+ static int vortex_adb_allocroute(vortex_t * vortex, int dma, int nr_ch,
+ int dir, int type, int subdev);
+-static char vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out,
+- int restype);
++static int vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out,
++ int restype);
+ #ifndef CHIP_AU8810
+ static int vortex_wt_allocroute(vortex_t * vortex, int dma, int nr_ch);
+ static void vortex_wt_connect(vortex_t * vortex, int en);
+diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
+index 2ed5100b8caea..f217c02dfdfa4 100644
+--- a/sound/pci/au88x0/au88x0_core.c
++++ b/sound/pci/au88x0/au88x0_core.c
+@@ -1998,7 +1998,7 @@ static const int resnum[VORTEX_RESOURCE_LAST] =
+ out: Mean checkout if != 0. Else mean Checkin resource.
+ restype: Indicates type of resource to be checked in or out.
+ */
+-static char
++static int
+ vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out, int restype)
+ {
+ int i, qty = resnum[restype], resinuse = 0;
+diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c
+index d56f126d6fdd9..29a4bcdec237a 100644
+--- a/sound/pci/aw2/aw2-alsa.c
++++ b/sound/pci/aw2/aw2-alsa.c
+@@ -275,7 +275,7 @@ static int snd_aw2_probe(struct pci_dev *pci,
+ /* (3) Create main component */
+ err = snd_aw2_create(card, pci);
+ if (err < 0)
+- return err;
++ goto error;
+
+ /* initialize mutex */
+ mutex_init(&chip->mtx);
+@@ -294,13 +294,17 @@ static int snd_aw2_probe(struct pci_dev *pci,
+ /* (6) Register card instance */
+ err = snd_card_register(card);
+ if (err < 0)
+- return err;
++ goto error;
+
+ /* (7) Set PCI driver data */
+ pci_set_drvdata(pci, card);
+
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ /* open callback */
+diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c
+index 089050470ff27..7f329dfc5404a 100644
+--- a/sound/pci/azt3328.c
++++ b/sound/pci/azt3328.c
+@@ -2427,7 +2427,7 @@ snd_azf3328_create(struct snd_card *card,
+ }
+
+ static int
+-snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++__snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -2520,6 +2520,12 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ return 0;
+ }
+
++static int
++snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_azf3328_probe(pci, pci_id));
++}
++
+ #ifdef CONFIG_PM_SLEEP
+ static inline void
+ snd_azf3328_suspend_regs(const struct snd_azf3328 *chip,
+diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c
+index d23f931638410..621985bfee5d7 100644
+--- a/sound/pci/bt87x.c
++++ b/sound/pci/bt87x.c
+@@ -805,8 +805,8 @@ static int snd_bt87x_detect_card(struct pci_dev *pci)
+ return SND_BT87X_BOARD_UNKNOWN;
+ }
+
+-static int snd_bt87x_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_bt87x_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -889,6 +889,12 @@ static int snd_bt87x_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_bt87x_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_bt87x_probe(pci, pci_id));
++}
++
+ /* default entries for all Bt87x cards - it's not exported */
+ /* driver_data is set to 0 to call detection */
+ static const struct pci_device_id snd_bt87x_default_ids[] = {
+diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c
+index 36fb150b72fb5..f4cc112bddf3e 100644
+--- a/sound/pci/ca0106/ca0106_main.c
++++ b/sound/pci/ca0106/ca0106_main.c
+@@ -1725,8 +1725,8 @@ static int snd_ca0106_midi(struct snd_ca0106 *chip, unsigned int channel)
+ }
+
+
+-static int snd_ca0106_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_ca0106_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -1786,6 +1786,12 @@ static int snd_ca0106_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_ca0106_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_ca0106_probe(pci, pci_id));
++}
++
+ #ifdef CONFIG_PM_SLEEP
+ static int snd_ca0106_suspend(struct device *dev)
+ {
+diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c
+index ea20236f35dbc..42fcbed9220c1 100644
+--- a/sound/pci/cmipci.c
++++ b/sound/pci/cmipci.c
+@@ -298,7 +298,6 @@ MODULE_PARM_DESC(joystick_port, "Joystick port address.");
+ #define CM_MICGAINZ 0x01 /* mic boost */
+ #define CM_MICGAINZ_SHIFT 0
+
+-#define CM_REG_MIXER3 0x24
+ #define CM_REG_AUX_VOL 0x26
+ #define CM_VAUXL_MASK 0xf0
+ #define CM_VAUXR_MASK 0x0f
+@@ -3250,15 +3249,19 @@ static int snd_cmipci_probe(struct pci_dev *pci,
+
+ err = snd_cmipci_create(card, pci, dev);
+ if (err < 0)
+- return err;
++ goto error;
+
+ err = snd_card_register(card);
+ if (err < 0)
+- return err;
++ goto error;
+
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ #ifdef CONFIG_PM_SLEEP
+@@ -3267,7 +3270,7 @@ static int snd_cmipci_probe(struct pci_dev *pci,
+ */
+ static const unsigned char saved_regs[] = {
+ CM_REG_FUNCTRL1, CM_REG_CHFORMAT, CM_REG_LEGACY_CTRL, CM_REG_MISC_CTRL,
+- CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_MIXER3, CM_REG_PLL,
++ CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_AUX_VOL, CM_REG_PLL,
+ CM_REG_CH0_FRAME1, CM_REG_CH0_FRAME2,
+ CM_REG_CH1_FRAME1, CM_REG_CH1_FRAME2, CM_REG_EXT_MISC,
+ CM_REG_INT_STATUS, CM_REG_INT_HLDCLR, CM_REG_FUNCTRL0,
+diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c
+index e7367402b84a3..0c9cadf7b3b80 100644
+--- a/sound/pci/cs4281.c
++++ b/sound/pci/cs4281.c
+@@ -1827,8 +1827,8 @@ static void snd_cs4281_opl3_command(struct snd_opl3 *opl3, unsigned short cmd,
+ spin_unlock_irqrestore(&opl3->reg_lock, flags);
+ }
+
+-static int snd_cs4281_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_cs4281_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -1888,6 +1888,12 @@ static int snd_cs4281_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_cs4281_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_cs4281_probe(pci, pci_id));
++}
++
+ /*
+ * Power Management
+ */
+diff --git a/sound/pci/cs46xx/cs46xx.c b/sound/pci/cs46xx/cs46xx.c
+index bd60308769ff7..8634004a606b6 100644
+--- a/sound/pci/cs46xx/cs46xx.c
++++ b/sound/pci/cs46xx/cs46xx.c
+@@ -74,36 +74,36 @@ static int snd_card_cs46xx_probe(struct pci_dev *pci,
+ err = snd_cs46xx_create(card, pci,
+ external_amp[dev], thinkpad[dev]);
+ if (err < 0)
+- return err;
++ goto error;
+ card->private_data = chip;
+ chip->accept_valid = mmap_valid[dev];
+ err = snd_cs46xx_pcm(chip, 0);
+ if (err < 0)
+- return err;
++ goto error;
+ #ifdef CONFIG_SND_CS46XX_NEW_DSP
+ err = snd_cs46xx_pcm_rear(chip, 1);
+ if (err < 0)
+- return err;
++ goto error;
+ err = snd_cs46xx_pcm_iec958(chip, 2);
+ if (err < 0)
+- return err;
++ goto error;
+ #endif
+ err = snd_cs46xx_mixer(chip, 2);
+ if (err < 0)
+- return err;
++ goto error;
+ #ifdef CONFIG_SND_CS46XX_NEW_DSP
+ if (chip->nr_ac97_codecs ==2) {
+ err = snd_cs46xx_pcm_center_lfe(chip, 3);
+ if (err < 0)
+- return err;
++ goto error;
+ }
+ #endif
+ err = snd_cs46xx_midi(chip, 0);
+ if (err < 0)
+- return err;
++ goto error;
+ err = snd_cs46xx_start_dsp(chip);
+ if (err < 0)
+- return err;
++ goto error;
+
+ snd_cs46xx_gameport(chip);
+
+@@ -117,11 +117,15 @@ static int snd_card_cs46xx_probe(struct pci_dev *pci,
+
+ err = snd_card_register(card);
+ if (err < 0)
+- return err;
++ goto error;
+
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ static struct pci_driver cs46xx_driver = {
+diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c
+index 499fa0148f9a4..440b8f9b40c96 100644
+--- a/sound/pci/cs5535audio/cs5535audio.c
++++ b/sound/pci/cs5535audio/cs5535audio.c
+@@ -281,8 +281,8 @@ static int snd_cs5535audio_create(struct snd_card *card,
+ return 0;
+ }
+
+-static int snd_cs5535audio_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_cs5535audio_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -331,6 +331,12 @@ static int snd_cs5535audio_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_cs5535audio_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_cs5535audio_probe(pci, pci_id));
++}
++
+ static struct pci_driver cs5535audio_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_cs5535audio_ids,
+diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c
+index da6e6350ceafa..d074727c3e21d 100644
+--- a/sound/pci/ctxfi/ctamixer.c
++++ b/sound/pci/ctxfi/ctamixer.c
+@@ -23,16 +23,15 @@
+
+ #define BLANK_SLOT 4094
+
+-static int amixer_master(struct rsc *rsc)
++static void amixer_master(struct rsc *rsc)
+ {
+ rsc->conj = 0;
+- return rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
++ rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
+ }
+
+-static int amixer_next_conj(struct rsc *rsc)
++static void amixer_next_conj(struct rsc *rsc)
+ {
+ rsc->conj++;
+- return container_of(rsc, struct amixer, rsc)->idx[rsc->conj];
+ }
+
+ static int amixer_index(const struct rsc *rsc)
+@@ -331,16 +330,15 @@ int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr)
+
+ /* SUM resource management */
+
+-static int sum_master(struct rsc *rsc)
++static void sum_master(struct rsc *rsc)
+ {
+ rsc->conj = 0;
+- return rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
++ rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
+ }
+
+-static int sum_next_conj(struct rsc *rsc)
++static void sum_next_conj(struct rsc *rsc)
+ {
+ rsc->conj++;
+- return container_of(rsc, struct sum, rsc)->idx[rsc->conj];
+ }
+
+ static int sum_index(const struct rsc *rsc)
+diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
+index 78f35e88aed6b..fbdb8a3d5b8e5 100644
+--- a/sound/pci/ctxfi/ctatc.c
++++ b/sound/pci/ctxfi/ctatc.c
+@@ -36,6 +36,7 @@
+ | ((IEC958_AES3_CON_FS_48000) << 24))
+
+ static const struct snd_pci_quirk subsys_20k1_list[] = {
++ SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0021, "SB046x", CTSB046X),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0022, "SB055x", CTSB055X),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x002f, "SB055x", CTSB055X),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0029, "SB073x", CTSB073X),
+@@ -64,6 +65,7 @@ static const struct snd_pci_quirk subsys_20k2_list[] = {
+
+ static const char *ct_subsys_name[NUM_CTCARDS] = {
+ /* 20k1 models */
++ [CTSB046X] = "SB046x",
+ [CTSB055X] = "SB055x",
+ [CTSB073X] = "SB073x",
+ [CTUAA] = "UAA",
+diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c
+index f589da0453424..7fc720046ce29 100644
+--- a/sound/pci/ctxfi/ctdaio.c
++++ b/sound/pci/ctxfi/ctdaio.c
+@@ -51,12 +51,12 @@ static const struct daio_rsc_idx idx_20k2[NUM_DAIOTYP] = {
+ [SPDIFIO] = {.left = 0x05, .right = 0x85},
+ };
+
+-static int daio_master(struct rsc *rsc)
++static void daio_master(struct rsc *rsc)
+ {
+ /* Actually, this is not the resource index of DAIO.
+ * For DAO, it is the input mapper index. And, for DAI,
+ * it is the output time-slot index. */
+- return rsc->conj = rsc->idx;
++ rsc->conj = rsc->idx;
+ }
+
+ static int daio_index(const struct rsc *rsc)
+@@ -64,19 +64,19 @@ static int daio_index(const struct rsc *rsc)
+ return rsc->conj;
+ }
+
+-static int daio_out_next_conj(struct rsc *rsc)
++static void daio_out_next_conj(struct rsc *rsc)
+ {
+- return rsc->conj += 2;
++ rsc->conj += 2;
+ }
+
+-static int daio_in_next_conj_20k1(struct rsc *rsc)
++static void daio_in_next_conj_20k1(struct rsc *rsc)
+ {
+- return rsc->conj += 0x200;
++ rsc->conj += 0x200;
+ }
+
+-static int daio_in_next_conj_20k2(struct rsc *rsc)
++static void daio_in_next_conj_20k2(struct rsc *rsc)
+ {
+- return rsc->conj += 0x100;
++ rsc->conj += 0x100;
+ }
+
+ static const struct rsc_ops daio_out_rsc_ops = {
+diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h
+index f406b626a28c4..2875cec83b8f2 100644
+--- a/sound/pci/ctxfi/cthardware.h
++++ b/sound/pci/ctxfi/cthardware.h
+@@ -26,8 +26,9 @@ enum CHIPTYP {
+
+ enum CTCARDS {
+ /* 20k1 models */
++ CTSB046X,
++ CT20K1_MODEL_FIRST = CTSB046X,
+ CTSB055X,
+- CT20K1_MODEL_FIRST = CTSB055X,
+ CTSB073X,
+ CTUAA,
+ CT20K1_UNKNOWN,
+diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c
+index 81ad269345182..be1d3e61309ce 100644
+--- a/sound/pci/ctxfi/ctresource.c
++++ b/sound/pci/ctxfi/ctresource.c
+@@ -109,18 +109,17 @@ static int audio_ring_slot(const struct rsc *rsc)
+ return (rsc->conj << 4) + offset_in_audio_slot_block[rsc->type];
+ }
+
+-static int rsc_next_conj(struct rsc *rsc)
++static void rsc_next_conj(struct rsc *rsc)
+ {
+ unsigned int i;
+ for (i = 0; (i < 8) && (!(rsc->msr & (0x1 << i))); )
+ i++;
+ rsc->conj += (AUDIO_SLOT_BLOCK_NUM >> i);
+- return rsc->conj;
+ }
+
+-static int rsc_master(struct rsc *rsc)
++static void rsc_master(struct rsc *rsc)
+ {
+- return rsc->conj = rsc->idx;
++ rsc->conj = rsc->idx;
+ }
+
+ static const struct rsc_ops rsc_generic_ops = {
+diff --git a/sound/pci/ctxfi/ctresource.h b/sound/pci/ctxfi/ctresource.h
+index fdbfd808816d3..58553bda44f43 100644
+--- a/sound/pci/ctxfi/ctresource.h
++++ b/sound/pci/ctxfi/ctresource.h
+@@ -39,8 +39,8 @@ struct rsc {
+ };
+
+ struct rsc_ops {
+- int (*master)(struct rsc *rsc); /* Move to master resource */
+- int (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
++ void (*master)(struct rsc *rsc); /* Move to master resource */
++ void (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
+ int (*index)(const struct rsc *rsc); /* Return the index of resource */
+ /* Return the output slot number */
+ int (*output_slot)(const struct rsc *rsc);
+diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c
+index bd4697b442334..4a94b4708a77e 100644
+--- a/sound/pci/ctxfi/ctsrc.c
++++ b/sound/pci/ctxfi/ctsrc.c
+@@ -590,16 +590,15 @@ int src_mgr_destroy(struct src_mgr *src_mgr)
+
+ /* SRCIMP resource manager operations */
+
+-static int srcimp_master(struct rsc *rsc)
++static void srcimp_master(struct rsc *rsc)
+ {
+ rsc->conj = 0;
+- return rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
++ rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
+ }
+
+-static int srcimp_next_conj(struct rsc *rsc)
++static void srcimp_next_conj(struct rsc *rsc)
+ {
+ rsc->conj++;
+- return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj];
+ }
+
+ static int srcimp_index(const struct rsc *rsc)
+diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c
+index 25b012ef5c3e6..c70c3ac4e99a5 100644
+--- a/sound/pci/echoaudio/echoaudio.c
++++ b/sound/pci/echoaudio/echoaudio.c
+@@ -1970,8 +1970,8 @@ static int snd_echo_create(struct snd_card *card,
+ }
+
+ /* constructor */
+-static int snd_echo_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_echo_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -2139,6 +2139,11 @@ static int snd_echo_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_echo_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_echo_probe(pci, pci_id));
++}
+
+
+ #if defined(CONFIG_PM_SLEEP)
+diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
+index c49c44dc10820..89043392f3ec7 100644
+--- a/sound/pci/emu10k1/emu10k1x.c
++++ b/sound/pci/emu10k1/emu10k1x.c
+@@ -1491,8 +1491,8 @@ static int snd_emu10k1x_midi(struct emu10k1x *emu)
+ return 0;
+ }
+
+-static int snd_emu10k1x_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_emu10k1x_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -1554,6 +1554,12 @@ static int snd_emu10k1x_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_emu10k1x_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_emu10k1x_probe(pci, pci_id));
++}
++
+ // PCI IDs
+ static const struct pci_device_id snd_emu10k1x_ids[] = {
+ { PCI_VDEVICE(CREATIVE, 0x0006), 0 }, /* Dell OEM version (EMU10K1) */
+diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
+index 6cf7c8b1de47d..4ca65f425f39c 100644
+--- a/sound/pci/emu10k1/emufx.c
++++ b/sound/pci/emu10k1/emufx.c
+@@ -1563,14 +1563,8 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input))
+ gpr += 2;
+
+ /* Master volume (will be renamed later) */
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS));
++ for (z = 0; z < 8; z++)
++ A_OP(icode, &ptr, iMAC0, A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS));
+ snd_emu10k1_init_mono_control(&controls[nctl++], "Wave Master Playback Volume", gpr, 0);
+ gpr += 2;
+
+@@ -1654,102 +1648,14 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input))
+ dev_dbg(emu->card->dev, "emufx.c: gpr=0x%x, tmp=0x%x\n",
+ gpr, tmp);
+ */
+- /* For the EMU1010: How to get 32bit values from the DSP. High 16bits into L, low 16bits into R. */
+- /* A_P16VIN(0) is delayed by one sample,
+- * so all other A_P16VIN channels will need to also be delayed
+- */
+- /* Left ADC in. 1 of 2 */
+ snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_P16VIN(0x0), A_FXBUS2(0) );
+- /* Right ADC in 1 of 2 */
+- gpr_map[gpr++] = 0x00000000;
+- /* Delaying by one sample: instead of copying the input
+- * value A_P16VIN to output A_FXBUS2 as in the first channel,
+- * we use an auxiliary register, delaying the value by one
+- * sample
+- */
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(2) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x1), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(4) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x2), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(6) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x3), A_C_00000000, A_C_00000000);
+- /* For 96kHz mode */
+- /* Left ADC in. 2 of 2 */
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0x8) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x4), A_C_00000000, A_C_00000000);
+- /* Right ADC in 2 of 2 */
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xa) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x5), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xc) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x6), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xe) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x7), A_C_00000000, A_C_00000000);
+- /* Pavel Hofman - we still have voices, A_FXBUS2s, and
+- * A_P16VINs available -
+- * let's add 8 more capture channels - total of 16
+- */
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x10));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x8),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x12));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x9),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x14));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xa),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x16));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xb),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x18));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xc),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x1a));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xd),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x1c));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xe),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x1e));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xf),
+- A_C_00000000, A_C_00000000);
++ /* A_P16VIN(0) is delayed by one sample, so all other A_P16VIN channels
++ * will need to also be delayed; we use an auxiliary register for that. */
++ for (z = 1; z < 0x10; z++) {
++ snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr), A_FXBUS2(z * 2) );
++ A_OP(icode, &ptr, iACC3, A_GPR(gpr), A_P16VIN(z), A_C_00000000, A_C_00000000);
++ gpr_map[gpr++] = 0x00000000;
++ }
+ }
+
+ #if 0
+diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
+index b2701a4452d86..6ec394fb18468 100644
+--- a/sound/pci/emu10k1/emupcm.c
++++ b/sound/pci/emu10k1/emupcm.c
+@@ -124,7 +124,7 @@ static int snd_emu10k1_pcm_channel_alloc(struct snd_emu10k1_pcm * epcm, int voic
+ epcm->voices[0]->epcm = epcm;
+ if (voices > 1) {
+ for (i = 1; i < voices; i++) {
+- epcm->voices[i] = &epcm->emu->voices[epcm->voices[0]->number + i];
++ epcm->voices[i] = &epcm->emu->voices[(epcm->voices[0]->number + i) % NUM_G];
+ epcm->voices[i]->epcm = epcm;
+ }
+ }
+@@ -1236,7 +1236,7 @@ static int snd_emu10k1_capture_mic_close(struct snd_pcm_substream *substream)
+ {
+ struct snd_emu10k1 *emu = snd_pcm_substream_chip(substream);
+
+- emu->capture_interrupt = NULL;
++ emu->capture_mic_interrupt = NULL;
+ emu->pcm_capture_mic_substream = NULL;
+ return 0;
+ }
+@@ -1344,7 +1344,7 @@ static int snd_emu10k1_capture_efx_close(struct snd_pcm_substream *substream)
+ {
+ struct snd_emu10k1 *emu = snd_pcm_substream_chip(substream);
+
+- emu->capture_interrupt = NULL;
++ emu->capture_efx_interrupt = NULL;
+ emu->pcm_capture_efx_substream = NULL;
+ return 0;
+ }
+@@ -1781,17 +1781,21 @@ int snd_emu10k1_pcm_efx(struct snd_emu10k1 *emu, int device)
+ struct snd_kcontrol *kctl;
+ int err;
+
+- err = snd_pcm_new(emu->card, "emu10k1 efx", device, 8, 1, &pcm);
++ err = snd_pcm_new(emu->card, "emu10k1 efx", device, emu->audigy ? 0 : 8, 1, &pcm);
+ if (err < 0)
+ return err;
+
+ pcm->private_data = emu;
+
+- snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_emu10k1_fx8010_playback_ops);
++ if (!emu->audigy)
++ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_emu10k1_fx8010_playback_ops);
+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_emu10k1_capture_efx_ops);
+
+ pcm->info_flags = 0;
+- strcpy(pcm->name, "Multichannel Capture/PT Playback");
++ if (emu->audigy)
++ strcpy(pcm->name, "Multichannel Capture");
++ else
++ strcpy(pcm->name, "Multichannel Capture/PT Playback");
+ emu->pcm_efx = pcm;
+
+ /* EFX capture - record the "FXBUS2" channels, by default we connect the EXTINs
+diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c
+index 2651f0c64c062..94efe347a97a9 100644
+--- a/sound/pci/ens1370.c
++++ b/sound/pci/ens1370.c
+@@ -2304,8 +2304,8 @@ static irqreturn_t snd_audiopci_interrupt(int irq, void *dev_id)
+ return IRQ_HANDLED;
+ }
+
+-static int snd_audiopci_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_audiopci_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -2369,6 +2369,12 @@ static int snd_audiopci_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_audiopci_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_audiopci_probe(pci, pci_id));
++}
++
+ static struct pci_driver ens137x_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_audiopci_ids,
+diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c
+index 00b976f42a3db..e34ec6f89e7e0 100644
+--- a/sound/pci/es1938.c
++++ b/sound/pci/es1938.c
+@@ -1716,8 +1716,8 @@ static int snd_es1938_mixer(struct es1938 *chip)
+ }
+
+
+-static int snd_es1938_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_es1938_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -1796,6 +1796,12 @@ static int snd_es1938_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_es1938_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_es1938_probe(pci, pci_id));
++}
++
+ static struct pci_driver es1938_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_es1938_ids,
+diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c
+index 6a8a02a9ecf41..4a7e20bb11bca 100644
+--- a/sound/pci/es1968.c
++++ b/sound/pci/es1968.c
+@@ -2741,8 +2741,8 @@ static int snd_es1968_create(struct snd_card *card,
+
+ /*
+ */
+-static int snd_es1968_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_es1968_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -2848,6 +2848,12 @@ static int snd_es1968_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_es1968_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_es1968_probe(pci, pci_id));
++}
++
+ static struct pci_driver es1968_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_es1968_ids,
+diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
+index 9c22ff19e56d2..62b3cb126c6d0 100644
+--- a/sound/pci/fm801.c
++++ b/sound/pci/fm801.c
+@@ -1268,8 +1268,8 @@ static int snd_fm801_create(struct snd_card *card,
+ return 0;
+ }
+
+-static int snd_card_fm801_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_card_fm801_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -1333,6 +1333,12 @@ static int snd_card_fm801_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_card_fm801_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_card_fm801_probe(pci, pci_id));
++}
++
+ #ifdef CONFIG_PM_SLEEP
+ static const unsigned char saved_regs[] = {
+ FM801_PCM_VOL, FM801_I2S_VOL, FM801_FM_VOL, FM801_REC_SRC,
+diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c
+index 4a854475a0e60..9cd0d61ab26d5 100644
+--- a/sound/pci/hda/hda_auto_parser.c
++++ b/sound/pci/hda/hda_auto_parser.c
+@@ -823,7 +823,7 @@ static void set_pin_targets(struct hda_codec *codec,
+ snd_hda_set_pin_ctl_cache(codec, cfg->nid, cfg->val);
+ }
+
+-static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
++void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth)
+ {
+ const char *modelname = codec->fixup_name;
+
+@@ -833,7 +833,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
+ if (++depth > 10)
+ break;
+ if (fix->chained_before)
+- apply_fixup(codec, fix->chain_id, action, depth + 1);
++ __snd_hda_apply_fixup(codec, fix->chain_id, action, depth + 1);
+
+ switch (fix->type) {
+ case HDA_FIXUP_PINS:
+@@ -874,6 +874,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
+ id = fix->chain_id;
+ }
+ }
++EXPORT_SYMBOL_GPL(__snd_hda_apply_fixup);
+
+ /**
+ * snd_hda_apply_fixup - Apply the fixup chain with the given action
+@@ -883,7 +884,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
+ void snd_hda_apply_fixup(struct hda_codec *codec, int action)
+ {
+ if (codec->fixup_list)
+- apply_fixup(codec, codec->fixup_id, action, 0);
++ __snd_hda_apply_fixup(codec, codec->fixup_id, action, 0);
+ }
+ EXPORT_SYMBOL_GPL(snd_hda_apply_fixup);
+
+@@ -985,7 +986,7 @@ void snd_hda_pick_fixup(struct hda_codec *codec,
+ int id = HDA_FIXUP_ID_NOT_SET;
+ const char *name = NULL;
+ const char *type = NULL;
+- int vendor, device;
++ unsigned int vendor, device;
+
+ if (codec->fixup_id != HDA_FIXUP_ID_NOT_SET)
+ return;
+diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c
+index 53a2b89f8983c..e63621bcb2142 100644
+--- a/sound/pci/hda/hda_beep.c
++++ b/sound/pci/hda/hda_beep.c
+@@ -118,6 +118,12 @@ static int snd_hda_beep_event(struct input_dev *dev, unsigned int type,
+ return 0;
+ }
+
++static void turn_on_beep(struct hda_beep *beep)
++{
++ if (beep->keep_power_at_enable)
++ snd_hda_power_up_pm(beep->codec);
++}
++
+ static void turn_off_beep(struct hda_beep *beep)
+ {
+ cancel_work_sync(&beep->beep_work);
+@@ -125,6 +131,8 @@ static void turn_off_beep(struct hda_beep *beep)
+ /* turn off beep */
+ generate_tone(beep, 0);
+ }
++ if (beep->keep_power_at_enable)
++ snd_hda_power_down_pm(beep->codec);
+ }
+
+ /**
+@@ -140,7 +148,9 @@ int snd_hda_enable_beep_device(struct hda_codec *codec, int enable)
+ enable = !!enable;
+ if (beep->enabled != enable) {
+ beep->enabled = enable;
+- if (!enable)
++ if (enable)
++ turn_on_beep(beep);
++ else
+ turn_off_beep(beep);
+ return 1;
+ }
+@@ -167,7 +177,8 @@ static int beep_dev_disconnect(struct snd_device *device)
+ input_unregister_device(beep->dev);
+ else
+ input_free_device(beep->dev);
+- turn_off_beep(beep);
++ if (beep->enabled)
++ turn_off_beep(beep);
+ return 0;
+ }
+
+diff --git a/sound/pci/hda/hda_beep.h b/sound/pci/hda/hda_beep.h
+index a25358a4807ab..db76e3ddba654 100644
+--- a/sound/pci/hda/hda_beep.h
++++ b/sound/pci/hda/hda_beep.h
+@@ -25,6 +25,7 @@ struct hda_beep {
+ unsigned int enabled:1;
+ unsigned int linear_tone:1; /* linear tone for IDT/STAC codec */
+ unsigned int playing:1;
++ unsigned int keep_power_at_enable:1; /* set by driver */
+ struct work_struct beep_work; /* scheduled task for beep event */
+ struct mutex mutex;
+ void (*power_hook)(struct hda_beep *beep, bool on);
+diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c
+index 1c8bffc3eec6e..8e35009ec25cb 100644
+--- a/sound/pci/hda/hda_bind.c
++++ b/sound/pci/hda/hda_bind.c
+@@ -14,6 +14,7 @@
+ #include <sound/core.h>
+ #include <sound/hda_codec.h>
+ #include "hda_local.h"
++#include "hda_jack.h"
+
+ /*
+ * find a matching codec id
+@@ -143,6 +144,7 @@ static int hda_codec_driver_probe(struct device *dev)
+
+ error:
+ snd_hda_codec_cleanup_for_unbind(codec);
++ codec->preset = NULL;
+ return err;
+ }
+
+@@ -156,9 +158,16 @@ static int hda_codec_driver_remove(struct device *dev)
+ return codec->bus->core.ext_ops->hdev_detach(&codec->core);
+ }
+
++ snd_hda_codec_disconnect_pcms(codec);
++ snd_hda_jack_tbl_disconnect(codec);
++ if (!refcount_dec_and_test(&codec->pcm_ref))
++ wait_event(codec->remove_sleep, !refcount_read(&codec->pcm_ref));
++ snd_power_sync_ref(codec->bus->card);
++
+ if (codec->patch_ops.free)
+ codec->patch_ops.free(codec);
+ snd_hda_codec_cleanup_for_unbind(codec);
++ codec->preset = NULL;
+ module_put(dev->driver->owner);
+ return 0;
+ }
+diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
+index 0c4a337c9fc0d..19be60bb57810 100644
+--- a/sound/pci/hda/hda_codec.c
++++ b/sound/pci/hda/hda_codec.c
+@@ -703,20 +703,10 @@ get_hda_cvt_setup(struct hda_codec *codec, hda_nid_t nid)
+ /*
+ * PCM device
+ */
+-static void release_pcm(struct kref *kref)
+-{
+- struct hda_pcm *pcm = container_of(kref, struct hda_pcm, kref);
+-
+- if (pcm->pcm)
+- snd_device_free(pcm->codec->card, pcm->pcm);
+- clear_bit(pcm->device, pcm->codec->bus->pcm_dev_bits);
+- kfree(pcm->name);
+- kfree(pcm);
+-}
+-
+ void snd_hda_codec_pcm_put(struct hda_pcm *pcm)
+ {
+- kref_put(&pcm->kref, release_pcm);
++ if (refcount_dec_and_test(&pcm->codec->pcm_ref))
++ wake_up(&pcm->codec->remove_sleep);
+ }
+ EXPORT_SYMBOL_GPL(snd_hda_codec_pcm_put);
+
+@@ -731,7 +721,6 @@ struct hda_pcm *snd_hda_codec_pcm_new(struct hda_codec *codec,
+ return NULL;
+
+ pcm->codec = codec;
+- kref_init(&pcm->kref);
+ va_start(args, fmt);
+ pcm->name = kvasprintf(GFP_KERNEL, fmt, args);
+ va_end(args);
+@@ -741,6 +730,7 @@ struct hda_pcm *snd_hda_codec_pcm_new(struct hda_codec *codec,
+ }
+
+ list_add_tail(&pcm->list, &codec->pcm_list_head);
++ refcount_inc(&codec->pcm_ref);
+ return pcm;
+ }
+ EXPORT_SYMBOL_GPL(snd_hda_codec_pcm_new);
+@@ -748,15 +738,31 @@ EXPORT_SYMBOL_GPL(snd_hda_codec_pcm_new);
+ /*
+ * codec destructor
+ */
++void snd_hda_codec_disconnect_pcms(struct hda_codec *codec)
++{
++ struct hda_pcm *pcm;
++
++ list_for_each_entry(pcm, &codec->pcm_list_head, list) {
++ if (pcm->disconnected)
++ continue;
++ if (pcm->pcm)
++ snd_device_disconnect(codec->card, pcm->pcm);
++ snd_hda_codec_pcm_put(pcm);
++ pcm->disconnected = 1;
++ }
++}
++
+ static void codec_release_pcms(struct hda_codec *codec)
+ {
+ struct hda_pcm *pcm, *n;
+
+ list_for_each_entry_safe(pcm, n, &codec->pcm_list_head, list) {
+- list_del_init(&pcm->list);
++ list_del(&pcm->list);
+ if (pcm->pcm)
+- snd_device_disconnect(codec->card, pcm->pcm);
+- snd_hda_codec_pcm_put(pcm);
++ snd_device_free(pcm->codec->card, pcm->pcm);
++ clear_bit(pcm->device, pcm->codec->bus->pcm_dev_bits);
++ kfree(pcm->name);
++ kfree(pcm);
+ }
+ }
+
+@@ -769,6 +775,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec)
+ codec->registered = 0;
+ }
+
++ snd_hda_codec_disconnect_pcms(codec);
+ cancel_delayed_work_sync(&codec->jackpoll_work);
+ if (!codec->in_freeing)
+ snd_hda_ctls_clear(codec);
+@@ -784,7 +791,6 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec)
+ snd_array_free(&codec->cvt_setups);
+ snd_array_free(&codec->spdif_out);
+ snd_array_free(&codec->verbs);
+- codec->preset = NULL;
+ codec->follower_dig_outs = NULL;
+ codec->spdif_status_reset = 0;
+ snd_array_free(&codec->mixers);
+@@ -792,6 +798,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec)
+ remove_conn_list(codec);
+ snd_hdac_regmap_exit(&codec->core);
+ codec->configured = 0;
++ refcount_set(&codec->pcm_ref, 1); /* reset refcount */
+ }
+ EXPORT_SYMBOL_GPL(snd_hda_codec_cleanup_for_unbind);
+
+@@ -958,6 +965,8 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card,
+ snd_array_init(&codec->verbs, sizeof(struct hda_verb *), 8);
+ INIT_LIST_HEAD(&codec->conn_list);
+ INIT_LIST_HEAD(&codec->pcm_list_head);
++ refcount_set(&codec->pcm_ref, 1);
++ init_waitqueue_head(&codec->remove_sleep);
+
+ INIT_DELAYED_WORK(&codec->jackpoll_work, hda_jackpoll_work);
+ codec->depop_delay = -1;
+@@ -1727,8 +1736,11 @@ void snd_hda_ctls_clear(struct hda_codec *codec)
+ {
+ int i;
+ struct hda_nid_item *items = codec->mixers.list;
++
++ down_write(&codec->card->controls_rwsem);
+ for (i = 0; i < codec->mixers.used; i++)
+ snd_ctl_remove(codec->card, items[i].kctl);
++ up_write(&codec->card->controls_rwsem);
+ snd_array_free(&codec->mixers);
+ snd_array_free(&codec->nids);
+ }
+@@ -2987,6 +2999,10 @@ void snd_hda_codec_shutdown(struct hda_codec *codec)
+ {
+ struct hda_pcm *cpcm;
+
++ /* Skip the shutdown if codec is not registered */
++ if (!codec->registered)
++ return;
++
+ list_for_each_entry(cpcm, &codec->pcm_list_head, list)
+ snd_pcm_suspend_all(cpcm->pcm);
+
+diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
+index 930ae4002a818..0ff286b7b66be 100644
+--- a/sound/pci/hda/hda_controller.c
++++ b/sound/pci/hda/hda_controller.c
+@@ -504,7 +504,6 @@ static int azx_get_time_info(struct snd_pcm_substream *substream,
+ snd_pcm_gettime(substream->runtime, system_ts);
+
+ nsec = timecounter_read(&azx_dev->core.tc);
+- nsec = div_u64(nsec, 3); /* can be optimized */
+ if (audio_tstamp_config->report_delay)
+ nsec = azx_adjust_codec_delay(substream, nsec);
+
+@@ -1034,10 +1033,8 @@ EXPORT_SYMBOL_GPL(azx_init_chip);
+ void azx_stop_all_streams(struct azx *chip)
+ {
+ struct hdac_bus *bus = azx_bus(chip);
+- struct hdac_stream *s;
+
+- list_for_each_entry(s, &bus->stream_list, list)
+- snd_hdac_stream_stop(s);
++ snd_hdac_stop_streams(bus);
+ }
+ EXPORT_SYMBOL_GPL(azx_stop_all_streams);
+
+diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
+index 3bf5e34107038..dbf7aa88e0e31 100644
+--- a/sound/pci/hda/hda_generic.c
++++ b/sound/pci/hda/hda_generic.c
+@@ -91,6 +91,12 @@ static void snd_hda_gen_spec_free(struct hda_gen_spec *spec)
+ free_kctls(spec);
+ snd_array_free(&spec->paths);
+ snd_array_free(&spec->loopback_list);
++#ifdef CONFIG_SND_HDA_GENERIC_LEDS
++ if (spec->led_cdevs[LED_AUDIO_MUTE])
++ led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MUTE]);
++ if (spec->led_cdevs[LED_AUDIO_MICMUTE])
++ led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MICMUTE]);
++#endif
+ }
+
+ /*
+@@ -1149,8 +1155,8 @@ static bool path_has_mixer(struct hda_codec *codec, int path_idx, int ctl_type)
+ return path && path->ctls[ctl_type];
+ }
+
+-static const char * const channel_name[4] = {
+- "Front", "Surround", "CLFE", "Side"
++static const char * const channel_name[] = {
++ "Front", "Surround", "CLFE", "Side", "Back",
+ };
+
+ /* give some appropriate ctl name prefix for the given line out channel */
+@@ -1176,7 +1182,7 @@ static const char *get_line_out_pfx(struct hda_codec *codec, int ch,
+
+ /* multi-io channels */
+ if (ch >= cfg->line_outs)
+- return channel_name[ch];
++ goto fixed_name;
+
+ switch (cfg->line_out_type) {
+ case AUTO_PIN_SPEAKER_OUT:
+@@ -1228,6 +1234,7 @@ static const char *get_line_out_pfx(struct hda_codec *codec, int ch,
+ if (cfg->line_outs == 1 && !spec->multi_ios)
+ return "Line Out";
+
++ fixed_name:
+ if (ch >= ARRAY_SIZE(channel_name)) {
+ snd_BUG();
+ return "PCM";
+@@ -3922,7 +3929,10 @@ static int create_mute_led_cdev(struct hda_codec *codec,
+ enum led_brightness),
+ bool micmute)
+ {
++ struct hda_gen_spec *spec = codec->spec;
+ struct led_classdev *cdev;
++ int idx = micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE;
++ int err;
+
+ cdev = devm_kzalloc(&codec->core.dev, sizeof(*cdev), GFP_KERNEL);
+ if (!cdev)
+@@ -3932,10 +3942,14 @@ static int create_mute_led_cdev(struct hda_codec *codec,
+ cdev->max_brightness = 1;
+ cdev->default_trigger = micmute ? "audio-micmute" : "audio-mute";
+ cdev->brightness_set_blocking = callback;
+- cdev->brightness = ledtrig_audio_get(micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE);
++ cdev->brightness = ledtrig_audio_get(idx);
+ cdev->flags = LED_CORE_SUSPENDRESUME;
+
+- return devm_led_classdev_register(&codec->core.dev, cdev);
++ err = led_classdev_register(&codec->core.dev, cdev);
++ if (err < 0)
++ return err;
++ spec->led_cdevs[idx] = cdev;
++ return 0;
+ }
+
+ /**
+diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h
+index c43bd0f0338ea..362ddcaea15b3 100644
+--- a/sound/pci/hda/hda_generic.h
++++ b/sound/pci/hda/hda_generic.h
+@@ -294,6 +294,9 @@ struct hda_gen_spec {
+ struct hda_jack_callback *cb);
+ void (*mic_autoswitch_hook)(struct hda_codec *codec,
+ struct hda_jack_callback *cb);
++
++ /* leds */
++ struct led_classdev *led_cdevs[NUM_AUDIO_LEDS];
+ };
+
+ /* values for add_stereo_mix_input flag */
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index 4d22e7adeee8e..1379ac07df350 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -230,6 +230,7 @@ enum {
+ AZX_DRIVER_ATI,
+ AZX_DRIVER_ATIHDMI,
+ AZX_DRIVER_ATIHDMI_NS,
++ AZX_DRIVER_GFHDMI,
+ AZX_DRIVER_VIA,
+ AZX_DRIVER_SIS,
+ AZX_DRIVER_ULI,
+@@ -331,11 +332,15 @@ enum {
+ #define needs_eld_notify_link(chip) false
+ #endif
+
+-#define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
++#define CONTROLLER_IN_GPU(pci) (((pci)->vendor == 0x8086) && \
++ (((pci)->device == 0x0a0c) || \
+ ((pci)->device == 0x0c0c) || \
+ ((pci)->device == 0x0d0c) || \
+ ((pci)->device == 0x160c) || \
+- ((pci)->device == 0x490d))
++ ((pci)->device == 0x490d) || \
++ ((pci)->device == 0x4f90) || \
++ ((pci)->device == 0x4f91) || \
++ ((pci)->device == 0x4f92)))
+
+ #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+
+@@ -348,6 +353,7 @@ static const char * const driver_short_names[] = {
+ [AZX_DRIVER_ATI] = "HDA ATI SB",
+ [AZX_DRIVER_ATIHDMI] = "HDA ATI HDMI",
+ [AZX_DRIVER_ATIHDMI_NS] = "HDA ATI HDMI",
++ [AZX_DRIVER_GFHDMI] = "HDA GF HDMI",
+ [AZX_DRIVER_VIA] = "HDA VIA VT82xx",
+ [AZX_DRIVER_SIS] = "HDA SIS966",
+ [AZX_DRIVER_ULI] = "HDA ULI M5461",
+@@ -638,13 +644,17 @@ static int azx_position_check(struct azx *chip, struct azx_dev *azx_dev)
+ * the update-IRQ timing. The IRQ is issued before actually the
+ * data is processed. So, we need to process it afterwords in a
+ * workqueue.
++ *
++ * Returns 1 if OK to proceed, 0 for delay handling, -1 for skipping update
+ */
+ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev)
+ {
+ struct snd_pcm_substream *substream = azx_dev->core.substream;
++ struct snd_pcm_runtime *runtime = substream->runtime;
+ int stream = substream->stream;
+ u32 wallclk;
+ unsigned int pos;
++ snd_pcm_uframes_t hwptr, target;
+
+ wallclk = azx_readl(chip, WALLCLK) - azx_dev->core.start_wallclk;
+ if (wallclk < (azx_dev->core.period_wallclk * 2) / 3)
+@@ -681,6 +691,24 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev)
+ /* NG - it's below the first next period boundary */
+ return chip->bdl_pos_adj ? 0 : -1;
+ azx_dev->core.start_wallclk += wallclk;
++
++ if (azx_dev->core.no_period_wakeup)
++ return 1; /* OK, no need to check period boundary */
++
++ if (runtime->hw_ptr_base != runtime->hw_ptr_interrupt)
++ return 1; /* OK, already in hwptr updating process */
++
++ /* check whether the period gets really elapsed */
++ pos = bytes_to_frames(runtime, pos);
++ hwptr = runtime->hw_ptr_base + pos;
++ if (hwptr < runtime->status->hw_ptr)
++ hwptr += runtime->buffer_size;
++ target = runtime->hw_ptr_interrupt + runtime->period_size;
++ if (hwptr < target) {
++ /* too early wakeup, process it later */
++ return chip->bdl_pos_adj ? 0 : -1;
++ }
++
+ return 1; /* OK, it's fine */
+ }
+
+@@ -859,31 +887,6 @@ static int azx_get_delay_from_fifo(struct azx *chip, struct azx_dev *azx_dev,
+ return substream->runtime->delay;
+ }
+
+-static unsigned int azx_skl_get_dpib_pos(struct azx *chip,
+- struct azx_dev *azx_dev)
+-{
+- return _snd_hdac_chip_readl(azx_bus(chip),
+- AZX_REG_VS_SDXDPIB_XBASE +
+- (AZX_REG_VS_SDXDPIB_XINTERVAL *
+- azx_dev->core.index));
+-}
+-
+-/* get the current DMA position with correction on SKL+ chips */
+-static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev)
+-{
+- /* DPIB register gives a more accurate position for playback */
+- if (azx_dev->core.substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+- return azx_skl_get_dpib_pos(chip, azx_dev);
+-
+- /* For capture, we need to read posbuf, but it requires a delay
+- * for the possible boundary overlap; the read of DPIB fetches the
+- * actual posbuf
+- */
+- udelay(20);
+- azx_skl_get_dpib_pos(chip, azx_dev);
+- return azx_get_pos_posbuf(chip, azx_dev);
+-}
+-
+ static void __azx_shutdown_chip(struct azx *chip, bool skip_link_reset)
+ {
+ azx_stop_chip(chip);
+@@ -1573,7 +1576,7 @@ static void assign_position_fix(struct azx *chip, int fix)
+ [POS_FIX_POSBUF] = azx_get_pos_posbuf,
+ [POS_FIX_VIACOMBO] = azx_via_get_position,
+ [POS_FIX_COMBO] = azx_get_pos_lpib,
+- [POS_FIX_SKL] = azx_get_pos_skl,
++ [POS_FIX_SKL] = azx_get_pos_posbuf,
+ [POS_FIX_FIFO] = azx_get_pos_fifo,
+ };
+
+@@ -1611,6 +1614,7 @@ static const struct snd_pci_quirk probe_mask_list[] = {
+ /* forced codec slots */
+ SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103),
+ SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103),
++ SND_PCI_QUIRK(0x1558, 0x0351, "Schenker Dock 15", 0x105),
+ /* WinFast VP200 H (Teradici) user reported broken communication */
+ SND_PCI_QUIRK(0x3a21, 0x040d, "WinFast VP200 H", 0x101),
+ {}
+@@ -1740,6 +1744,12 @@ static int default_bdl_pos_adj(struct azx *chip)
+ }
+
+ switch (chip->driver_type) {
++ /*
++ * increase the bdl size for Glenfly Gpus for hardware
++ * limitation on hdac interrupt interval
++ */
++ case AZX_DRIVER_GFHDMI:
++ return 128;
+ case AZX_DRIVER_ICH:
+ case AZX_DRIVER_PCH:
+ return 1;
+@@ -1794,8 +1804,6 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci,
+
+ assign_position_fix(chip, check_position_fix(chip, position_fix[dev]));
+
+- check_probe_mask(chip, dev);
+-
+ if (single_cmd < 0) /* allow fallback to single_cmd at errors */
+ chip->fallback_to_single_cmd = 1;
+ else /* explicitly set to single_cmd or not */
+@@ -1821,6 +1829,8 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci,
+ chip->bus.core.needs_damn_long_delay = 1;
+ }
+
++ check_probe_mask(chip, dev);
++
+ err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+ if (err < 0) {
+ dev_err(card->dev, "Error creating device [card]!\n");
+@@ -1855,6 +1865,12 @@ static int azx_first_init(struct azx *chip)
+ pci_write_config_dword(pci, PCI_BASE_ADDRESS_1, 0);
+ }
+ #endif
++ /*
++ * Fix response write request not synced to memory when handle
++ * hdac interrupt on Glenfly Gpus
++ */
++ if (chip->driver_type == AZX_DRIVER_GFHDMI)
++ bus->polling_mode = 1;
+
+ err = pcim_iomap_regions(pci, 1 << 0, "ICH HD audio");
+ if (err < 0)
+@@ -1955,6 +1971,7 @@ static int azx_first_init(struct azx *chip)
+ chip->playback_streams = ATIHDMI_NUM_PLAYBACK;
+ chip->capture_streams = ATIHDMI_NUM_CAPTURE;
+ break;
++ case AZX_DRIVER_GFHDMI:
+ case AZX_DRIVER_GENERIC:
+ default:
+ chip->playback_streams = ICH6_NUM_PLAYBACK;
+@@ -2330,7 +2347,8 @@ static int azx_probe_continue(struct azx *chip)
+
+ out_free:
+ if (err < 0) {
+- azx_free(chip);
++ pci_set_drvdata(pci, NULL);
++ snd_card_free(chip->card);
+ return err;
+ }
+
+@@ -2474,6 +2492,13 @@ static const struct pci_device_id azx_ids[] = {
+ /* DG1 */
+ { PCI_DEVICE(0x8086, 0x490d),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
++ /* DG2 */
++ { PCI_DEVICE(0x8086, 0x4f90),
++ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
++ { PCI_DEVICE(0x8086, 0x4f91),
++ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
++ { PCI_DEVICE(0x8086, 0x4f92),
++ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+ /* Alderlake-S */
+ { PCI_DEVICE(0x8086, 0x7ad0),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+@@ -2510,9 +2535,12 @@ static const struct pci_device_id azx_ids[] = {
+ /* 5 Series/3400 */
+ { PCI_DEVICE(0x8086, 0x3b56),
+ .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },
++ { PCI_DEVICE(0x8086, 0x3b57),
++ .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },
+ /* Poulsbo */
+ { PCI_DEVICE(0x8086, 0x811b),
+- .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_BASE },
++ .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_BASE |
++ AZX_DCAPS_POSFIX_LPIB },
+ /* Oaktrail */
+ { PCI_DEVICE(0x8086, 0x080a),
+ .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_BASE },
+@@ -2675,9 +2703,18 @@ static const struct pci_device_id azx_ids[] = {
+ { PCI_DEVICE(0x1002, 0xab28),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS |
+ AZX_DCAPS_PM_RUNTIME },
++ { PCI_DEVICE(0x1002, 0xab30),
++ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS |
++ AZX_DCAPS_PM_RUNTIME },
+ { PCI_DEVICE(0x1002, 0xab38),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS |
+ AZX_DCAPS_PM_RUNTIME },
++ /* GLENFLY */
++ { PCI_DEVICE(0x6766, PCI_ANY_ID),
++ .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8,
++ .class_mask = 0xffffff,
++ .driver_data = AZX_DRIVER_GFHDMI | AZX_DCAPS_POSFIX_LPIB |
++ AZX_DCAPS_NO_MSI | AZX_DCAPS_NO_64BIT },
+ /* VIA VT8251/VT8237A */
+ { PCI_DEVICE(0x1106, 0x3288), .driver_data = AZX_DRIVER_VIA },
+ /* VIA GFX VT7122/VX900 */
+diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c
+index f29975e3e98df..7d7786df60ea7 100644
+--- a/sound/pci/hda/hda_jack.c
++++ b/sound/pci/hda/hda_jack.c
+@@ -158,6 +158,17 @@ snd_hda_jack_tbl_new(struct hda_codec *codec, hda_nid_t nid, int dev_id)
+ return jack;
+ }
+
++void snd_hda_jack_tbl_disconnect(struct hda_codec *codec)
++{
++ struct hda_jack_tbl *jack = codec->jacktbl.list;
++ int i;
++
++ for (i = 0; i < codec->jacktbl.used; i++, jack++) {
++ if (!codec->bus->shutdown && jack->jack)
++ snd_device_disconnect(codec->card, jack->jack);
++ }
++}
++
+ void snd_hda_jack_tbl_clear(struct hda_codec *codec)
+ {
+ struct hda_jack_tbl *jack = codec->jacktbl.list;
+diff --git a/sound/pci/hda/hda_jack.h b/sound/pci/hda/hda_jack.h
+index 2abf7aac243a2..ff7d289c034bf 100644
+--- a/sound/pci/hda/hda_jack.h
++++ b/sound/pci/hda/hda_jack.h
+@@ -69,6 +69,7 @@ struct hda_jack_tbl *
+ snd_hda_jack_tbl_get_from_tag(struct hda_codec *codec,
+ unsigned char tag, int dev_id);
+
++void snd_hda_jack_tbl_disconnect(struct hda_codec *codec);
+ void snd_hda_jack_tbl_clear(struct hda_codec *codec);
+
+ void snd_hda_jack_set_dirty_all(struct hda_codec *codec);
+diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
+index ea8ab8b433378..63c00363acad9 100644
+--- a/sound/pci/hda/hda_local.h
++++ b/sound/pci/hda/hda_local.h
+@@ -137,6 +137,7 @@ int __snd_hda_add_vmaster(struct hda_codec *codec, char *name,
+ int snd_hda_codec_reset(struct hda_codec *codec);
+ void snd_hda_codec_register(struct hda_codec *codec);
+ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec);
++void snd_hda_codec_disconnect_pcms(struct hda_codec *codec);
+
+ #define snd_hda_regmap_sync(codec) snd_hdac_regmap_sync(&(codec)->core)
+
+@@ -349,6 +350,7 @@ void snd_hda_apply_verbs(struct hda_codec *codec);
+ void snd_hda_apply_pincfgs(struct hda_codec *codec,
+ const struct hda_pintbl *cfg);
+ void snd_hda_apply_fixup(struct hda_codec *codec, int action);
++void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth);
+ void snd_hda_pick_fixup(struct hda_codec *codec,
+ const struct hda_model_fixup *models,
+ const struct snd_pci_quirk *quirk,
+@@ -438,6 +440,15 @@ int snd_hda_codec_set_pin_target(struct hda_codec *codec, hda_nid_t nid,
+ #define for_each_hda_codec_node(nid, codec) \
+ for ((nid) = (codec)->core.start_nid; (nid) < (codec)->core.end_nid; (nid)++)
+
++/* Set the codec power_state flag to indicate to allow unsol event handling;
++ * see hda_codec_unsol_event() in hda_bind.c. Calling this might confuse the
++ * state tracking, so use with care.
++ */
++static inline void snd_hda_codec_allow_unsol_events(struct hda_codec *codec)
++{
++ codec->core.dev.power.power_state = PMSG_ON;
++}
++
+ /*
+ * get widget capabilities
+ */
+diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
+index ea700395bef40..f0e556f2ccf69 100644
+--- a/sound/pci/hda/hda_tegra.c
++++ b/sound/pci/hda/hda_tegra.c
+@@ -68,14 +68,20 @@
+ */
+ #define TEGRA194_NUM_SDO_LINES 4
+
++struct hda_tegra_soc {
++ bool has_hda2codec_2x_reset;
++};
++
+ struct hda_tegra {
+ struct azx chip;
+ struct device *dev;
+- struct reset_control *reset;
++ struct reset_control_bulk_data resets[3];
+ struct clk_bulk_data clocks[3];
++ unsigned int nresets;
+ unsigned int nclocks;
+ void __iomem *regs;
+ struct work_struct probe_work;
++ const struct hda_tegra_soc *soc;
+ };
+
+ #ifdef CONFIG_PM
+@@ -170,7 +176,7 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev)
+ int rc;
+
+ if (!chip->running) {
+- rc = reset_control_assert(hda->reset);
++ rc = reset_control_bulk_assert(hda->nresets, hda->resets);
+ if (rc)
+ return rc;
+ }
+@@ -187,7 +193,7 @@ static int __maybe_unused hda_tegra_runtime_resume(struct device *dev)
+ } else {
+ usleep_range(10, 100);
+
+- rc = reset_control_deassert(hda->reset);
++ rc = reset_control_bulk_deassert(hda->nresets, hda->resets);
+ if (rc)
+ return rc;
+ }
+@@ -427,9 +433,17 @@ static int hda_tegra_create(struct snd_card *card,
+ return 0;
+ }
+
++static const struct hda_tegra_soc tegra30_data = {
++ .has_hda2codec_2x_reset = true,
++};
++
++static const struct hda_tegra_soc tegra194_data = {
++ .has_hda2codec_2x_reset = false,
++};
++
+ static const struct of_device_id hda_tegra_match[] = {
+- { .compatible = "nvidia,tegra30-hda" },
+- { .compatible = "nvidia,tegra194-hda" },
++ { .compatible = "nvidia,tegra30-hda", .data = &tegra30_data },
++ { .compatible = "nvidia,tegra194-hda", .data = &tegra194_data },
+ {},
+ };
+ MODULE_DEVICE_TABLE(of, hda_tegra_match);
+@@ -437,7 +451,8 @@ MODULE_DEVICE_TABLE(of, hda_tegra_match);
+ static int hda_tegra_probe(struct platform_device *pdev)
+ {
+ const unsigned int driver_flags = AZX_DCAPS_CORBRP_SELF_CLEAR |
+- AZX_DCAPS_PM_RUNTIME;
++ AZX_DCAPS_PM_RUNTIME |
++ AZX_DCAPS_4K_BDLE_BOUNDARY;
+ struct snd_card *card;
+ struct azx *chip;
+ struct hda_tegra *hda;
+@@ -449,6 +464,8 @@ static int hda_tegra_probe(struct platform_device *pdev)
+ hda->dev = &pdev->dev;
+ chip = &hda->chip;
+
++ hda->soc = of_device_get_match_data(&pdev->dev);
++
+ err = snd_card_new(&pdev->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+ THIS_MODULE, 0, &card);
+ if (err < 0) {
+@@ -456,11 +473,20 @@ static int hda_tegra_probe(struct platform_device *pdev)
+ return err;
+ }
+
+- hda->reset = devm_reset_control_array_get_exclusive(&pdev->dev);
+- if (IS_ERR(hda->reset)) {
+- err = PTR_ERR(hda->reset);
++ hda->resets[hda->nresets++].id = "hda";
++ hda->resets[hda->nresets++].id = "hda2hdmi";
++ /*
++ * "hda2codec_2x" reset is not present on Tegra194. Though DT would
++ * be updated to reflect this, but to have backward compatibility
++ * below is necessary.
++ */
++ if (hda->soc->has_hda2codec_2x_reset)
++ hda->resets[hda->nresets++].id = "hda2codec_2x";
++
++ err = devm_reset_control_bulk_get_exclusive(&pdev->dev, hda->nresets,
++ hda->resets);
++ if (err)
+ goto out_free;
+- }
+
+ hda->clocks[hda->nclocks++].id = "hda";
+ hda->clocks[hda->nclocks++].id = "hda2hdmi";
+diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
+index 208933792787d..fab7c329acbe3 100644
+--- a/sound/pci/hda/patch_ca0132.c
++++ b/sound/pci/hda/patch_ca0132.c
+@@ -1306,6 +1306,8 @@ static const struct snd_pci_quirk ca0132_quirks[] = {
+ SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI),
+ SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI),
+ SND_PCI_QUIRK(0x3842, 0x1038, "EVGA X99 Classified", QUIRK_R3DI),
++ SND_PCI_QUIRK(0x3842, 0x104b, "EVGA X299 Dark", QUIRK_R3DI),
++ SND_PCI_QUIRK(0x3842, 0x1055, "EVGA Z390 DARK", QUIRK_R3DI),
+ SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D),
+ SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D),
+ SND_PCI_QUIRK(0x1102, 0x0051, "Sound Blaster AE-5", QUIRK_AE5),
+@@ -2454,7 +2456,7 @@ static int dspio_set_uint_param(struct hda_codec *codec, int mod_id,
+ static int dspio_alloc_dma_chan(struct hda_codec *codec, unsigned int *dma_chan)
+ {
+ int status = 0;
+- unsigned int size = sizeof(dma_chan);
++ unsigned int size = sizeof(*dma_chan);
+
+ codec_dbg(codec, " dspio_alloc_dma_chan() -- begin\n");
+ status = dspio_scp(codec, MASTERCONTROL, 0x20,
+@@ -4230,8 +4232,10 @@ static int tuning_ctl_set(struct hda_codec *codec, hda_nid_t nid,
+
+ for (i = 0; i < TUNING_CTLS_COUNT; i++)
+ if (nid == ca0132_tuning_ctls[i].nid)
+- break;
++ goto found;
+
++ return -EINVAL;
++found:
+ snd_hda_power_up(codec);
+ dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20,
+ ca0132_tuning_ctls[i].req,
+diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
+index 678fbcaf2a3bc..6807b4708a176 100644
+--- a/sound/pci/hda/patch_cirrus.c
++++ b/sound/pci/hda/patch_cirrus.c
+@@ -395,6 +395,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = {
+
+ /* codec SSID */
+ SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122),
++ SND_PCI_QUIRK(0x106b, 0x0900, "iMac 12,1", CS420X_IMAC27_122),
+ SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81),
+ SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122),
+ SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101),
+diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
+index 0515137a75b0f..deff4a3d7a78c 100644
+--- a/sound/pci/hda/patch_conexant.c
++++ b/sound/pci/hda/patch_conexant.c
+@@ -222,6 +222,7 @@ enum {
+ CXT_PINCFG_LEMOTE_A1205,
+ CXT_PINCFG_COMPAQ_CQ60,
+ CXT_FIXUP_STEREO_DMIC,
++ CXT_PINCFG_LENOVO_NOTEBOOK,
+ CXT_FIXUP_INC_MIC_BOOST,
+ CXT_FIXUP_HEADPHONE_MIC_PIN,
+ CXT_FIXUP_HEADPHONE_MIC,
+@@ -772,6 +773,14 @@ static const struct hda_fixup cxt_fixups[] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cxt_fixup_stereo_dmic,
+ },
++ [CXT_PINCFG_LENOVO_NOTEBOOK] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x1a, 0x05d71030 },
++ { }
++ },
++ .chain_id = CXT_FIXUP_STEREO_DMIC,
++ },
+ [CXT_FIXUP_INC_MIC_BOOST] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cxt5066_increase_mic_boost,
+@@ -944,6 +953,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
+ SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK),
+ SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO),
+ SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO),
+ SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK),
+@@ -970,6 +980,9 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
+ SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC),
++ /* NOTE: we'd need to extend the quirk for 17aa:3977 as the same
++ * PCI SSID is used on multiple Lenovo models
++ */
+ SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC),
+@@ -993,6 +1006,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
+ { .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" },
+ { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" },
+ { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" },
++ { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" },
+ {}
+ };
+
+@@ -1052,6 +1066,13 @@ static int patch_conexant_auto(struct hda_codec *codec)
+ snd_hda_pick_fixup(codec, cxt5051_fixup_models,
+ cxt5051_fixups, cxt_fixups);
+ break;
++ case 0x14f15098:
++ codec->pin_amp_workaround = 1;
++ spec->gen.mixer_nid = 0x22;
++ spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
++ snd_hda_pick_fixup(codec, cxt5066_fixup_models,
++ cxt5066_fixups, cxt_fixups);
++ break;
+ case 0x14f150f2:
+ codec->power_save_node = 1;
+ fallthrough;
+@@ -1072,11 +1093,11 @@ static int patch_conexant_auto(struct hda_codec *codec)
+ if (err < 0)
+ goto error;
+
+- err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
++ err = cx_auto_parse_beep(codec);
+ if (err < 0)
+ goto error;
+
+- err = cx_auto_parse_beep(codec);
++ err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
+ if (err < 0)
+ goto error;
+
+@@ -1107,6 +1128,7 @@ static const struct hda_device_id snd_hda_id_conexant[] = {
+ HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f120d0, "CX11970", patch_conexant_auto),
++ HDA_CODEC_ENTRY(0x14f120d1, "SN6180", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto),
+diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c
+index 0fb0a428428b4..df0b4522babf7 100644
+--- a/sound/pci/hda/patch_cs8409-tables.c
++++ b/sound/pci/hda/patch_cs8409-tables.c
+@@ -252,6 +252,7 @@ struct sub_codec cs8409_cs42l42_codec = {
+ .init_seq_num = ARRAY_SIZE(cs42l42_init_reg_seq),
+ .hp_jack_in = 0,
+ .mic_jack_in = 0,
++ .force_status_change = 1,
+ .paged = 1,
+ .suspended = 1,
+ .no_type_dect = 0,
+@@ -443,6 +444,7 @@ struct sub_codec dolphin_cs42l42_0 = {
+ .init_seq_num = ARRAY_SIZE(dolphin_c0_init_reg_seq),
+ .hp_jack_in = 0,
+ .mic_jack_in = 0,
++ .force_status_change = 1,
+ .paged = 1,
+ .suspended = 1,
+ .no_type_dect = 0,
+@@ -456,6 +458,7 @@ struct sub_codec dolphin_cs42l42_1 = {
+ .init_seq_num = ARRAY_SIZE(dolphin_c1_init_reg_seq),
+ .hp_jack_in = 0,
+ .mic_jack_in = 0,
++ .force_status_change = 1,
+ .paged = 1,
+ .suspended = 1,
+ .no_type_dect = 1,
+diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c
+index 31ff11ab868e1..aff2b5abb81ea 100644
+--- a/sound/pci/hda/patch_cs8409.c
++++ b/sound/pci/hda/patch_cs8409.c
+@@ -628,15 +628,17 @@ static void cs42l42_run_jack_detect(struct sub_codec *cs42l42)
+ cs8409_i2c_write(cs42l42, 0x1b74, 0x07);
+ cs8409_i2c_write(cs42l42, 0x131b, 0xFD);
+ cs8409_i2c_write(cs42l42, 0x1120, 0x80);
+- /* Wait ~100us*/
+- usleep_range(100, 200);
++ /* Wait ~20ms*/
++ usleep_range(20000, 25000);
+ cs8409_i2c_write(cs42l42, 0x111f, 0x77);
+ cs8409_i2c_write(cs42l42, 0x1120, 0xc0);
+ }
+
+ static int cs42l42_handle_tip_sense(struct sub_codec *cs42l42, unsigned int reg_ts_status)
+ {
+- int status_changed = 0;
++ int status_changed = cs42l42->force_status_change;
++
++ cs42l42->force_status_change = 0;
+
+ /* TIP_SENSE INSERT/REMOVE */
+ switch (reg_ts_status) {
+@@ -750,6 +752,11 @@ static void cs42l42_resume(struct sub_codec *cs42l42)
+ if (cs42l42->full_scale_vol)
+ cs8409_i2c_write(cs42l42, 0x2001, 0x01);
+
++ /* we have to explicitly allow unsol event handling even during the
++ * resume phase so that the jack event is processed properly
++ */
++ snd_hda_codec_allow_unsol_events(cs42l42->codec);
++
+ cs42l42_enable_jack_detect(cs42l42);
+ }
+
+@@ -786,6 +793,7 @@ static void cs42l42_suspend(struct sub_codec *cs42l42)
+ cs42l42->last_page = 0;
+ cs42l42->hp_jack_in = 0;
+ cs42l42->mic_jack_in = 0;
++ cs42l42->force_status_change = 1;
+
+ /* Put CS42L42 into Reset */
+ gpio_data = snd_hda_codec_read(codec, CS8409_PIN_AFG, 0, AC_VERB_GET_GPIO_DATA, 0);
+diff --git a/sound/pci/hda/patch_cs8409.h b/sound/pci/hda/patch_cs8409.h
+index ade2b838590cf..d0b725c7285b6 100644
+--- a/sound/pci/hda/patch_cs8409.h
++++ b/sound/pci/hda/patch_cs8409.h
+@@ -305,6 +305,7 @@ struct sub_codec {
+
+ unsigned int hp_jack_in:1;
+ unsigned int mic_jack_in:1;
++ unsigned int force_status_change:1;
+ unsigned int suspended:1;
+ unsigned int paged:1;
+ unsigned int last_page;
+diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
+index 65d2c55399195..3cd3b5c49e45e 100644
+--- a/sound/pci/hda/patch_hdmi.c
++++ b/sound/pci/hda/patch_hdmi.c
+@@ -162,6 +162,8 @@ struct hdmi_spec {
+ bool dyn_pin_out;
+ bool dyn_pcm_assign;
+ bool dyn_pcm_no_legacy;
++ bool nv_dp_workaround; /* workaround DP audio infoframe for Nvidia */
++
+ bool intel_hsw_fixup; /* apply Intel platform-specific fixups */
+ /*
+ * Non-generic VIA/NVIDIA specific
+@@ -671,15 +673,24 @@ static void hdmi_pin_setup_infoframe(struct hda_codec *codec,
+ int ca, int active_channels,
+ int conn_type)
+ {
++ struct hdmi_spec *spec = codec->spec;
+ union audio_infoframe ai;
+
+ memset(&ai, 0, sizeof(ai));
+- if (conn_type == 0) { /* HDMI */
++ if ((conn_type == 0) || /* HDMI */
++ /* Nvidia DisplayPort: Nvidia HW expects same layout as HDMI */
++ (conn_type == 1 && spec->nv_dp_workaround)) {
+ struct hdmi_audio_infoframe *hdmi_ai = &ai.hdmi;
+
+- hdmi_ai->type = 0x84;
+- hdmi_ai->ver = 0x01;
+- hdmi_ai->len = 0x0a;
++ if (conn_type == 0) { /* HDMI */
++ hdmi_ai->type = 0x84;
++ hdmi_ai->ver = 0x01;
++ hdmi_ai->len = 0x0a;
++ } else {/* Nvidia DP */
++ hdmi_ai->type = 0x84;
++ hdmi_ai->ver = 0x1b;
++ hdmi_ai->len = 0x11 << 2;
++ }
+ hdmi_ai->CC02_CT47 = active_channels - 1;
+ hdmi_ai->CA = ca;
+ hdmi_checksum_audio_infoframe(hdmi_ai);
+@@ -1259,6 +1270,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo,
+ set_bit(pcm_idx, &spec->pcm_in_use);
+ per_pin = get_pin(spec, pin_idx);
+ per_pin->cvt_nid = per_cvt->cvt_nid;
++ per_pin->silent_stream = false;
+ hinfo->nid = per_cvt->cvt_nid;
+
+ /* flip stripe flag for the assigned stream if supported */
+@@ -1387,7 +1399,7 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
+
+ last_try:
+ /* the last try; check the empty slots in pins */
+- for (i = 0; i < spec->num_nids; i++) {
++ for (i = 0; i < spec->pcm_used; i++) {
+ if (!test_bit(i, &spec->pcm_bitmap))
+ return i;
+ }
+@@ -1617,6 +1629,7 @@ static void hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin,
+ struct hda_codec *codec = per_pin->codec;
+ struct hdmi_spec *spec = codec->spec;
+ struct hdmi_eld *eld = &spec->temp_eld;
++ struct device *dev = hda_codec_dev(codec);
+ hda_nid_t pin_nid = per_pin->pin_nid;
+ int dev_id = per_pin->dev_id;
+ /*
+@@ -1630,8 +1643,13 @@ static void hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin,
+ int present;
+ int ret;
+
++#ifdef CONFIG_PM
++ if (dev->power.runtime_status == RPM_SUSPENDING)
++ return;
++#endif
++
+ ret = snd_hda_power_up_pm(codec);
+- if (ret < 0 && pm_runtime_suspended(hda_codec_dev(codec)))
++ if (ret < 0 && pm_runtime_suspended(dev))
+ goto out;
+
+ present = snd_hda_jack_pin_sense(codec, pin_nid, dev_id);
+@@ -1944,6 +1962,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid)
+ static const struct snd_pci_quirk force_connect_list[] = {
+ SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1),
+ SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1),
++ SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1),
++ SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1),
+ SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1),
+ SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1),
+ {}
+@@ -2257,7 +2277,9 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec)
+ * dev_num is the device entry number in a pin
+ */
+
+- if (codec->mst_no_extra_pcms)
++ if (spec->dyn_pcm_no_legacy && codec->mst_no_extra_pcms)
++ pcm_num = spec->num_cvts;
++ else if (codec->mst_no_extra_pcms)
+ pcm_num = spec->num_nids;
+ else
+ pcm_num = spec->num_nids + spec->dev_num - 1;
+@@ -2665,9 +2687,6 @@ static void generic_acomp_pin_eld_notify(void *audio_ptr, int port, int dev_id)
+ */
+ if (codec->core.dev.power.power_state.event == PM_EVENT_SUSPEND)
+ return;
+- /* ditto during suspend/resume process itself */
+- if (snd_hdac_is_in_pm(&codec->core))
+- return;
+
+ check_presence_and_report(codec, pin_nid, dev_id);
+ }
+@@ -2851,9 +2870,6 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe)
+ */
+ if (codec->core.dev.power.power_state.event == PM_EVENT_SUSPEND)
+ return;
+- /* ditto during suspend/resume process itself */
+- if (snd_hdac_is_in_pm(&codec->core))
+- return;
+
+ snd_hdac_i915_set_bclk(&codec->bus->core);
+ check_presence_and_report(codec, pin_nid, dev_id);
+@@ -2947,7 +2963,8 @@ static int parse_intel_hdmi(struct hda_codec *codec)
+
+ /* Intel Haswell and onwards; audio component with eld notifier */
+ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid,
+- const int *port_map, int port_num, int dev_num)
++ const int *port_map, int port_num, int dev_num,
++ bool send_silent_stream)
+ {
+ struct hdmi_spec *spec;
+ int err;
+@@ -2980,7 +2997,7 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid,
+ * Enable silent stream feature, if it is enabled via
+ * module param or Kconfig option
+ */
+- if (enable_silent_stream)
++ if (send_silent_stream)
+ spec->send_silent_stream = true;
+
+ return parse_intel_hdmi(codec);
+@@ -2988,12 +3005,18 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid,
+
+ static int patch_i915_hsw_hdmi(struct hda_codec *codec)
+ {
+- return intel_hsw_common_init(codec, 0x08, NULL, 0, 3);
++ return intel_hsw_common_init(codec, 0x08, NULL, 0, 3,
++ enable_silent_stream);
+ }
+
+ static int patch_i915_glk_hdmi(struct hda_codec *codec)
+ {
+- return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3);
++ /*
++ * Silent stream calls audio component .get_power() from
++ * .pin_eld_notify(). On GLK this will deadlock in i915 due
++ * to the audio vs. CDCLK workaround.
++ */
++ return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3, false);
+ }
+
+ static int patch_i915_icl_hdmi(struct hda_codec *codec)
+@@ -3004,7 +3027,8 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec)
+ */
+ static const int map[] = {0x0, 0x4, 0x6, 0x8, 0xa, 0xb};
+
+- return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3);
++ return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3,
++ enable_silent_stream);
+ }
+
+ static int patch_i915_tgl_hdmi(struct hda_codec *codec)
+@@ -3016,7 +3040,8 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec)
+ static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf};
+ int ret;
+
+- ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4);
++ ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4,
++ enable_silent_stream);
+ if (!ret) {
+ struct hdmi_spec *spec = codec->spec;
+
+@@ -3522,6 +3547,7 @@ static int patch_nvhdmi_2ch(struct hda_codec *codec)
+ spec->pcm_playback.rates = SUPPORTED_RATES;
+ spec->pcm_playback.maxbps = SUPPORTED_MAXBPS;
+ spec->pcm_playback.formats = SUPPORTED_FORMATS;
++ spec->nv_dp_workaround = true;
+ return 0;
+ }
+
+@@ -3661,6 +3687,7 @@ static int patch_nvhdmi(struct hda_codec *codec)
+ spec->chmap.ops.chmap_cea_alloc_validate_get_type =
+ nvhdmi_chmap_cea_alloc_validate_get_type;
+ spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
++ spec->nv_dp_workaround = true;
+
+ codec->link_down_at_suspend = 1;
+
+@@ -3684,6 +3711,7 @@ static int patch_nvhdmi_legacy(struct hda_codec *codec)
+ spec->chmap.ops.chmap_cea_alloc_validate_get_type =
+ nvhdmi_chmap_cea_alloc_validate_get_type;
+ spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
++ spec->nv_dp_workaround = true;
+
+ codec->link_down_at_suspend = 1;
+
+@@ -3851,11 +3879,13 @@ static int patch_tegra_hdmi(struct hda_codec *codec)
+ if (err)
+ return err;
+
++ codec->depop_delay = 10;
+ codec->patch_ops.build_pcms = tegra_hdmi_build_pcms;
+ spec = codec->spec;
+ spec->chmap.ops.chmap_cea_alloc_validate_get_type =
+ nvhdmi_chmap_cea_alloc_validate_get_type;
+ spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
++ spec->nv_dp_workaround = true;
+
+ return 0;
+ }
+@@ -4268,6 +4298,22 @@ static int patch_via_hdmi(struct hda_codec *codec)
+ return patch_simple_hdmi(codec, VIAHDMI_CVT_NID, VIAHDMI_PIN_NID);
+ }
+
++static int patch_gf_hdmi(struct hda_codec *codec)
++{
++ int err;
++
++ err = patch_generic_hdmi(codec);
++ if (err)
++ return err;
++
++ /*
++ * Glenfly GPUs have two codecs, stream switches from one codec to
++ * another, need to do actual clean-ups in codec_cleanup_stream
++ */
++ codec->no_sticky_stream = 1;
++ return 0;
++}
++
+ /*
+ * patch entries
+ */
+@@ -4355,8 +4401,19 @@ HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi),
+ HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi),
+ HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi),
+ HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi),
++HDA_CODEC_ENTRY(0x10de00a3, "GPU a3 HDMI/DP", patch_nvhdmi),
++HDA_CODEC_ENTRY(0x10de00a4, "GPU a4 HDMI/DP", patch_nvhdmi),
++HDA_CODEC_ENTRY(0x10de00a5, "GPU a5 HDMI/DP", patch_nvhdmi),
++HDA_CODEC_ENTRY(0x10de00a6, "GPU a6 HDMI/DP", patch_nvhdmi),
++HDA_CODEC_ENTRY(0x10de00a7, "GPU a7 HDMI/DP", patch_nvhdmi),
+ HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch),
+ HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch),
++HDA_CODEC_ENTRY(0x67663d82, "Arise 82 HDMI/DP", patch_gf_hdmi),
++HDA_CODEC_ENTRY(0x67663d83, "Arise 83 HDMI/DP", patch_gf_hdmi),
++HDA_CODEC_ENTRY(0x67663d84, "Arise 84 HDMI/DP", patch_gf_hdmi),
++HDA_CODEC_ENTRY(0x67663d85, "Arise 85 HDMI/DP", patch_gf_hdmi),
++HDA_CODEC_ENTRY(0x67663d86, "Arise 86 HDMI/DP", patch_gf_hdmi),
++HDA_CODEC_ENTRY(0x67663d87, "Arise 87 HDMI/DP", patch_gf_hdmi),
+ HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi),
+ HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP", patch_via_hdmi),
+ HDA_CODEC_ENTRY(0x11069f84, "VX11 HDMI/DP", patch_generic_hdmi),
+@@ -4380,10 +4437,11 @@ HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi),
+ HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi),
+ HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi),
+ HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi),
+-HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi),
+ HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi),
++HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI", patch_i915_tgl_hdmi),
+ HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi),
+ HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi),
++HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi),
+ HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
+ HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi),
+ HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi),
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 965b096f416f6..59e11a070c202 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -97,6 +97,7 @@ struct alc_spec {
+ unsigned int gpio_mic_led_mask;
+ struct alc_coef_led mute_led_coef;
+ struct alc_coef_led mic_led_coef;
++ struct mutex coef_mutex;
+
+ hda_nid_t headset_mic_pin;
+ hda_nid_t headphone_mic_pin;
+@@ -119,6 +120,7 @@ struct alc_spec {
+ unsigned int ultra_low_power:1;
+ unsigned int has_hs_key:1;
+ unsigned int no_internal_mic_pin:1;
++ unsigned int en_3kpull_low:1;
+
+ /* for PLL fix */
+ hda_nid_t pll_nid;
+@@ -132,8 +134,24 @@ struct alc_spec {
+ * COEF access helper functions
+ */
+
+-static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+- unsigned int coef_idx)
++static void coef_mutex_lock(struct hda_codec *codec)
++{
++ struct alc_spec *spec = codec->spec;
++
++ snd_hda_power_up_pm(codec);
++ mutex_lock(&spec->coef_mutex);
++}
++
++static void coef_mutex_unlock(struct hda_codec *codec)
++{
++ struct alc_spec *spec = codec->spec;
++
++ mutex_unlock(&spec->coef_mutex);
++ snd_hda_power_down_pm(codec);
++}
++
++static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
++ unsigned int coef_idx)
+ {
+ unsigned int val;
+
+@@ -142,28 +160,56 @@ static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ return val;
+ }
+
++static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
++ unsigned int coef_idx)
++{
++ unsigned int val;
++
++ coef_mutex_lock(codec);
++ val = __alc_read_coefex_idx(codec, nid, coef_idx);
++ coef_mutex_unlock(codec);
++ return val;
++}
++
+ #define alc_read_coef_idx(codec, coef_idx) \
+ alc_read_coefex_idx(codec, 0x20, coef_idx)
+
+-static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+- unsigned int coef_idx, unsigned int coef_val)
++static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
++ unsigned int coef_idx, unsigned int coef_val)
+ {
+ snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_COEF_INDEX, coef_idx);
+ snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PROC_COEF, coef_val);
+ }
+
++static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
++ unsigned int coef_idx, unsigned int coef_val)
++{
++ coef_mutex_lock(codec);
++ __alc_write_coefex_idx(codec, nid, coef_idx, coef_val);
++ coef_mutex_unlock(codec);
++}
++
+ #define alc_write_coef_idx(codec, coef_idx, coef_val) \
+ alc_write_coefex_idx(codec, 0x20, coef_idx, coef_val)
+
++static void __alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
++ unsigned int coef_idx, unsigned int mask,
++ unsigned int bits_set)
++{
++ unsigned int val = __alc_read_coefex_idx(codec, nid, coef_idx);
++
++ if (val != -1)
++ __alc_write_coefex_idx(codec, nid, coef_idx,
++ (val & ~mask) | bits_set);
++}
++
+ static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx, unsigned int mask,
+ unsigned int bits_set)
+ {
+- unsigned int val = alc_read_coefex_idx(codec, nid, coef_idx);
+-
+- if (val != -1)
+- alc_write_coefex_idx(codec, nid, coef_idx,
+- (val & ~mask) | bits_set);
++ coef_mutex_lock(codec);
++ __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set);
++ coef_mutex_unlock(codec);
+ }
+
+ #define alc_update_coef_idx(codec, coef_idx, mask, bits_set) \
+@@ -196,13 +242,15 @@ struct coef_fw {
+ static void alc_process_coef_fw(struct hda_codec *codec,
+ const struct coef_fw *fw)
+ {
++ coef_mutex_lock(codec);
+ for (; fw->nid; fw++) {
+ if (fw->mask == (unsigned short)-1)
+- alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
++ __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
+ else
+- alc_update_coefex_idx(codec, fw->nid, fw->idx,
+- fw->mask, fw->val);
++ __alc_update_coefex_idx(codec, fw->nid, fw->idx,
++ fw->mask, fw->val);
+ }
++ coef_mutex_unlock(codec);
+ }
+
+ /*
+@@ -391,6 +439,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
+ case 0x10ec0245:
+ case 0x10ec0255:
+ case 0x10ec0256:
++ case 0x19e58326:
+ case 0x10ec0257:
+ case 0x10ec0282:
+ case 0x10ec0283:
+@@ -528,6 +577,7 @@ static void alc_shutup_pins(struct hda_codec *codec)
+ switch (codec->core.vendor_id) {
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ case 0x10ec0283:
+ case 0x10ec0286:
+ case 0x10ec0288:
+@@ -777,7 +827,7 @@ do_sku:
+ alc_setup_gpio(codec, 0x02);
+ break;
+ case 7:
+- alc_setup_gpio(codec, 0x03);
++ alc_setup_gpio(codec, 0x04);
+ break;
+ case 5:
+ default:
+@@ -885,6 +935,9 @@ static int alc_init(struct hda_codec *codec)
+ return 0;
+ }
+
++#define alc_free snd_hda_gen_free
++
++#ifdef CONFIG_PM
+ static inline void alc_shutup(struct hda_codec *codec)
+ {
+ struct alc_spec *spec = codec->spec;
+@@ -898,9 +951,6 @@ static inline void alc_shutup(struct hda_codec *codec)
+ alc_shutup_pins(codec);
+ }
+
+-#define alc_free snd_hda_gen_free
+-
+-#ifdef CONFIG_PM
+ static void alc_power_eapd(struct hda_codec *codec)
+ {
+ alc_auto_setup_eapd(codec, false);
+@@ -914,9 +964,7 @@ static int alc_suspend(struct hda_codec *codec)
+ spec->power_hook(codec);
+ return 0;
+ }
+-#endif
+
+-#ifdef CONFIG_PM
+ static int alc_resume(struct hda_codec *codec)
+ {
+ struct alc_spec *spec = codec->spec;
+@@ -1148,6 +1196,7 @@ static int alc_alloc_spec(struct hda_codec *codec, hda_nid_t mixer_nid)
+ codec->spdif_status_reset = 1;
+ codec->forced_resume = 1;
+ codec->patch_ops = alc_patch_ops;
++ mutex_init(&spec->coef_mutex);
+
+ err = alc_codec_rename_from_preset(codec);
+ if (err < 0) {
+@@ -1924,11 +1973,13 @@ enum {
+ ALC887_FIXUP_ASUS_BASS,
+ ALC887_FIXUP_BASS_CHMAP,
+ ALC1220_FIXUP_GB_DUAL_CODECS,
++ ALC1220_FIXUP_GB_X570,
+ ALC1220_FIXUP_CLEVO_P950,
+ ALC1220_FIXUP_CLEVO_PB51ED,
+ ALC1220_FIXUP_CLEVO_PB51ED_PINS,
+ ALC887_FIXUP_ASUS_AUDIO,
+ ALC887_FIXUP_ASUS_HMIC,
++ ALCS1200A_FIXUP_MIC_VREF,
+ };
+
+ static void alc889_fixup_coef(struct hda_codec *codec,
+@@ -2113,6 +2164,30 @@ static void alc1220_fixup_gb_dual_codecs(struct hda_codec *codec,
+ }
+ }
+
++static void alc1220_fixup_gb_x570(struct hda_codec *codec,
++ const struct hda_fixup *fix,
++ int action)
++{
++ static const hda_nid_t conn1[] = { 0x0c };
++ static const struct coef_fw gb_x570_coefs[] = {
++ WRITE_COEF(0x07, 0x03c0),
++ WRITE_COEF(0x1a, 0x01c1),
++ WRITE_COEF(0x1b, 0x0202),
++ WRITE_COEF(0x43, 0x3005),
++ {}
++ };
++
++ switch (action) {
++ case HDA_FIXUP_ACT_PRE_PROBE:
++ snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn1), conn1);
++ snd_hda_override_conn_list(codec, 0x1b, ARRAY_SIZE(conn1), conn1);
++ break;
++ case HDA_FIXUP_ACT_INIT:
++ alc_process_coef_fw(codec, gb_x570_coefs);
++ break;
++ }
++}
++
+ static void alc1220_fixup_clevo_p950(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+@@ -2415,6 +2490,10 @@ static const struct hda_fixup alc882_fixups[] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc1220_fixup_gb_dual_codecs,
+ },
++ [ALC1220_FIXUP_GB_X570] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc1220_fixup_gb_x570,
++ },
+ [ALC1220_FIXUP_CLEVO_P950] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc1220_fixup_clevo_p950,
+@@ -2446,6 +2525,14 @@ static const struct hda_fixup alc882_fixups[] = {
+ .chained = true,
+ .chain_id = ALC887_FIXUP_ASUS_AUDIO,
+ },
++ [ALCS1200A_FIXUP_MIC_VREF] = {
++ .type = HDA_FIXUP_PINCTLS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x18, PIN_VREF50 }, /* rear mic */
++ { 0x19, PIN_VREF50 }, /* front mic */
++ {}
++ }
++ },
+ };
+
+ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
+@@ -2483,6 +2570,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601),
+ SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS),
+ SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3),
++ SND_PCI_QUIRK(0x1043, 0x8797, "ASUS TUF B550M-PLUS", ALCS1200A_FIXUP_MIC_VREF),
+ SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP),
+ SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP),
+ SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT),
+@@ -2517,8 +2605,9 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x13fe, 0x1009, "Advantech MIT-W101", ALC886_FIXUP_EAPD),
+ SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+- SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950),
+- SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950),
++ SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_GB_X570),
++ SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_GB_X570),
++ SND_PCI_QUIRK(0x1458, 0xa0d5, "Gigabyte X570S Aorus Master", ALC1220_FIXUP_GB_X570),
+ SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950),
+@@ -2530,15 +2619,20 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+ SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
+ SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
++ SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
++ SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
++ SND_PCI_QUIRK(0x1558, 0x66a2, "Clevo PE60RNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
++ SND_PCI_QUIRK(0x1558, 0x67f1, "Clevo PC70H[PRS]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
++ SND_PCI_QUIRK(0x1558, 0x67f5, "Clevo PD70PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED),
+@@ -2554,6 +2648,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950),
++ SND_PCI_QUIRK(0x1558, 0xd502, "Clevo PD50SNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
+ SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
+ SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
+@@ -2592,6 +2687,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
+ {.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"},
+ {.id = ALC887_FIXUP_ASUS_BASS, .name = "asus-bass"},
+ {.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"},
++ {.id = ALC1220_FIXUP_GB_X570, .name = "gb-x570"},
+ {.id = ALC1220_FIXUP_CLEVO_P950, .name = "clevo-p950"},
+ {}
+ };
+@@ -3047,6 +3143,7 @@ enum {
+ ALC269_TYPE_ALC257,
+ ALC269_TYPE_ALC215,
+ ALC269_TYPE_ALC225,
++ ALC269_TYPE_ALC245,
+ ALC269_TYPE_ALC287,
+ ALC269_TYPE_ALC294,
+ ALC269_TYPE_ALC300,
+@@ -3084,6 +3181,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
+ case ALC269_TYPE_ALC257:
+ case ALC269_TYPE_ALC215:
+ case ALC269_TYPE_ALC225:
++ case ALC269_TYPE_ALC245:
+ case ALC269_TYPE_ALC287:
+ case ALC269_TYPE_ALC294:
+ case ALC269_TYPE_ALC300:
+@@ -3151,6 +3249,7 @@ static void alc_disable_headset_jack_key(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_write_coef_idx(codec, 0x48, 0x0);
+ alc_update_coef_idx(codec, 0x49, 0x0045, 0x0);
+ break;
+@@ -3179,6 +3278,7 @@ static void alc_enable_headset_jack_key(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_write_coef_idx(codec, 0x48, 0xd011);
+ alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045);
+ break;
+@@ -3462,6 +3562,15 @@ static void alc256_init(struct hda_codec *codec)
+ hda_nid_t hp_pin = alc_get_hp_pin(spec);
+ bool hp_pin_sense;
+
++ if (spec->ultra_low_power) {
++ alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1);
++ alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2);
++ alc_update_coef_idx(codec, 0x08, 7<<4, 0);
++ alc_update_coef_idx(codec, 0x3b, 1<<15, 0);
++ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
++ msleep(30);
++ }
++
+ if (!hp_pin)
+ hp_pin = 0x21;
+
+@@ -3473,14 +3582,6 @@ static void alc256_init(struct hda_codec *codec)
+ msleep(2);
+
+ alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
+- if (spec->ultra_low_power) {
+- alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1);
+- alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2);
+- alc_update_coef_idx(codec, 0x08, 7<<4, 0);
+- alc_update_coef_idx(codec, 0x3b, 1<<15, 0);
+- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
+- msleep(30);
+- }
+
+ snd_hda_codec_write(codec, hp_pin, 0,
+ AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
+@@ -3516,6 +3617,7 @@ static void alc256_shutup(struct hda_codec *codec)
+ if (!hp_pin)
+ hp_pin = 0x21;
+
++ alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
+ hp_pin_sense = snd_hda_jack_detect(codec, hp_pin);
+
+ if (hp_pin_sense)
+@@ -3532,8 +3634,7 @@ static void alc256_shutup(struct hda_codec *codec)
+ /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly
+ * when booting with headset plugged. So skip setting it for the codec alc257
+ */
+- if (spec->codec_variant != ALC269_TYPE_ALC257 &&
+- spec->codec_variant != ALC269_TYPE_ALC256)
++ if (spec->en_3kpull_low)
+ alc_update_coef_idx(codec, 0x46, 0, 3 << 12);
+
+ if (!spec->no_shutup_pins)
+@@ -3611,7 +3712,15 @@ static void alc225_init(struct hda_codec *codec)
+ hda_nid_t hp_pin = alc_get_hp_pin(spec);
+ bool hp1_pin_sense, hp2_pin_sense;
+
+- if (spec->codec_variant != ALC269_TYPE_ALC287)
++ if (spec->ultra_low_power) {
++ alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2);
++ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
++ alc_update_coef_idx(codec, 0x33, 1<<11, 0);
++ msleep(30);
++ }
++
++ if (spec->codec_variant != ALC269_TYPE_ALC287 &&
++ spec->codec_variant != ALC269_TYPE_ALC245)
+ /* required only at boot or S3 and S4 resume time */
+ if (!spec->done_hp_init ||
+ is_s3_resume(codec) ||
+@@ -3631,12 +3740,6 @@ static void alc225_init(struct hda_codec *codec)
+ msleep(2);
+
+ alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
+- if (spec->ultra_low_power) {
+- alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2);
+- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
+- alc_update_coef_idx(codec, 0x33, 1<<11, 0);
+- msleep(30);
+- }
+
+ if (hp1_pin_sense || spec->ultra_low_power)
+ snd_hda_codec_write(codec, hp_pin, 0,
+@@ -4355,6 +4458,16 @@ static void alc287_fixup_hp_gpio_led(struct hda_codec *codec,
+ alc_fixup_hp_gpio_led(codec, action, 0x10, 0);
+ }
+
++static void alc245_fixup_hp_gpio_led(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ struct alc_spec *spec = codec->spec;
++
++ if (action == HDA_FIXUP_ACT_PRE_PROBE)
++ spec->micmute_led_polarity = 1;
++ alc_fixup_hp_gpio_led(codec, action, 0, 0x04);
++}
++
+ /* turn on/off mic-mute LED per capture hook via VREF change */
+ static int vref_micmute_led_set(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+@@ -4504,6 +4617,21 @@ static void alc236_fixup_hp_mute_led_coefbit(struct hda_codec *codec,
+ }
+ }
+
++static void alc236_fixup_hp_mute_led_coefbit2(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ struct alc_spec *spec = codec->spec;
++
++ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
++ spec->mute_led_polarity = 0;
++ spec->mute_led_coef.idx = 0x07;
++ spec->mute_led_coef.mask = 1;
++ spec->mute_led_coef.on = 1;
++ spec->mute_led_coef.off = 0;
++ snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set);
++ }
++}
++
+ /* turn on/off mic-mute LED per capture hook by coef bit */
+ static int coef_micmute_led_set(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+@@ -4530,6 +4658,16 @@ static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec,
+ }
+ }
+
++static void alc285_fixup_hp_gpio_micmute_led(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ struct alc_spec *spec = codec->spec;
++
++ if (action == HDA_FIXUP_ACT_PRE_PROBE)
++ spec->micmute_led_polarity = 1;
++ alc_fixup_hp_gpio_led(codec, action, 0, 0x04);
++}
++
+ static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+ {
+@@ -4551,6 +4689,13 @@ static void alc285_fixup_hp_mute_led(struct hda_codec *codec,
+ alc285_fixup_hp_coef_micmute_led(codec, fix, action);
+ }
+
++static void alc285_fixup_hp_spectre_x360_mute_led(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ alc285_fixup_hp_mute_led_coefbit(codec, fix, action);
++ alc285_fixup_hp_gpio_micmute_led(codec, fix, action);
++}
++
+ static void alc236_fixup_hp_mute_led(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+ {
+@@ -4577,6 +4722,48 @@ static void alc236_fixup_hp_mute_led_micmute_vref(struct hda_codec *codec,
+ alc236_fixup_hp_micmute_led_vref(codec, fix, action);
+ }
+
++static inline void alc298_samsung_write_coef_pack(struct hda_codec *codec,
++ const unsigned short coefs[2])
++{
++ alc_write_coef_idx(codec, 0x23, coefs[0]);
++ alc_write_coef_idx(codec, 0x25, coefs[1]);
++ alc_write_coef_idx(codec, 0x26, 0xb011);
++}
++
++struct alc298_samsung_amp_desc {
++ unsigned char nid;
++ unsigned short init_seq[2][2];
++};
++
++static void alc298_fixup_samsung_amp(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ int i, j;
++ static const unsigned short init_seq[][2] = {
++ { 0x19, 0x00 }, { 0x20, 0xc0 }, { 0x22, 0x44 }, { 0x23, 0x08 },
++ { 0x24, 0x85 }, { 0x25, 0x41 }, { 0x35, 0x40 }, { 0x36, 0x01 },
++ { 0x38, 0x81 }, { 0x3a, 0x03 }, { 0x3b, 0x81 }, { 0x40, 0x3e },
++ { 0x41, 0x07 }, { 0x400, 0x1 }
++ };
++ static const struct alc298_samsung_amp_desc amps[] = {
++ { 0x3a, { { 0x18, 0x1 }, { 0x26, 0x0 } } },
++ { 0x39, { { 0x18, 0x2 }, { 0x26, 0x1 } } }
++ };
++
++ if (action != HDA_FIXUP_ACT_INIT)
++ return;
++
++ for (i = 0; i < ARRAY_SIZE(amps); i++) {
++ alc_write_coef_idx(codec, 0x22, amps[i].nid);
++
++ for (j = 0; j < ARRAY_SIZE(amps[i].init_seq); j++)
++ alc298_samsung_write_coef_pack(codec, amps[i].init_seq[j]);
++
++ for (j = 0; j < ARRAY_SIZE(init_seq); j++)
++ alc298_samsung_write_coef_pack(codec, init_seq[j]);
++ }
++}
++
+ #if IS_REACHABLE(CONFIG_INPUT)
+ static void gpio2_mic_hotkey_event(struct hda_codec *codec,
+ struct hda_jack_callback *event)
+@@ -4803,6 +4990,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_process_coef_fw(codec, coef0256);
+ break;
+ case 0x10ec0234:
+@@ -4918,6 +5106,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_write_coef_idx(codec, 0x45, 0xc489);
+ snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+ alc_process_coef_fw(codec, coef0256);
+@@ -5068,6 +5257,7 @@ static void alc_headset_mode_default(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_write_coef_idx(codec, 0x1b, 0x0e4b);
+ alc_write_coef_idx(codec, 0x45, 0xc089);
+ msleep(50);
+@@ -5167,6 +5357,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_process_coef_fw(codec, coef0256);
+ break;
+ case 0x10ec0234:
+@@ -5281,6 +5472,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_process_coef_fw(codec, coef0256);
+ break;
+ case 0x10ec0234:
+@@ -5382,6 +5574,7 @@ static void alc_determine_headset_type(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_write_coef_idx(codec, 0x1b, 0x0e4b);
+ alc_write_coef_idx(codec, 0x06, 0x6104);
+ alc_write_coefex_idx(codec, 0x57, 0x3, 0x09a3);
+@@ -5676,6 +5869,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_process_coef_fw(codec, alc256fw);
+ break;
+ }
+@@ -6278,6 +6472,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec)
+ case 0x10ec0236:
+ case 0x10ec0255:
+ case 0x10ec0256:
++ case 0x19e58326:
+ alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
+ alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
+ break;
+@@ -6492,22 +6687,92 @@ static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec,
+ /* for alc285_fixup_ideapad_s740_coef() */
+ #include "ideapad_s740_helper.c"
+
+-static void alc256_fixup_tongfang_reset_persistent_settings(struct hda_codec *codec,
+- const struct hda_fixup *fix,
+- int action)
++static const struct coef_fw alc256_fixup_set_coef_defaults_coefs[] = {
++ WRITE_COEF(0x10, 0x0020), WRITE_COEF(0x24, 0x0000),
++ WRITE_COEF(0x26, 0x0000), WRITE_COEF(0x29, 0x3000),
++ WRITE_COEF(0x37, 0xfe05), WRITE_COEF(0x45, 0x5089),
++ {}
++};
++
++static void alc256_fixup_set_coef_defaults(struct hda_codec *codec,
++ const struct hda_fixup *fix,
++ int action)
+ {
+ /*
+- * A certain other OS sets these coeffs to different values. On at least one TongFang
+- * barebone these settings might survive even a cold reboot. So to restore a clean slate the
+- * values are explicitly reset to default here. Without this, the external microphone is
+- * always in a plugged-in state, while the internal microphone is always in an unplugged
+- * state, breaking the ability to use the internal microphone.
+- */
+- alc_write_coef_idx(codec, 0x24, 0x0000);
+- alc_write_coef_idx(codec, 0x26, 0x0000);
+- alc_write_coef_idx(codec, 0x29, 0x3000);
+- alc_write_coef_idx(codec, 0x37, 0xfe05);
+- alc_write_coef_idx(codec, 0x45, 0x5089);
++ * A certain other OS sets these coeffs to different values. On at least
++ * one TongFang barebone these settings might survive even a cold
++ * reboot. So to restore a clean slate the values are explicitly reset
++ * to default here. Without this, the external microphone is always in a
++ * plugged-in state, while the internal microphone is always in an
++ * unplugged state, breaking the ability to use the internal microphone.
++ */
++ alc_process_coef_fw(codec, alc256_fixup_set_coef_defaults_coefs);
++}
++
++static const struct coef_fw alc233_fixup_no_audio_jack_coefs[] = {
++ WRITE_COEF(0x1a, 0x9003), WRITE_COEF(0x1b, 0x0e2b), WRITE_COEF(0x37, 0xfe06),
++ WRITE_COEF(0x38, 0x4981), WRITE_COEF(0x45, 0xd489), WRITE_COEF(0x46, 0x0074),
++ WRITE_COEF(0x49, 0x0149),
++ {}
++};
++
++static void alc233_fixup_no_audio_jack(struct hda_codec *codec,
++ const struct hda_fixup *fix,
++ int action)
++{
++ /*
++ * The audio jack input and output is not detected on the ASRock NUC Box
++ * 1100 series when cold booting without this fix. Warm rebooting from a
++ * certain other OS makes the audio functional, as COEF settings are
++ * preserved in this case. This fix sets these altered COEF values as
++ * the default.
++ */
++ alc_process_coef_fw(codec, alc233_fixup_no_audio_jack_coefs);
++}
++
++static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec,
++ const struct hda_fixup *fix,
++ int action)
++{
++ /*
++ * The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec,
++ * but uses the 0x8686 subproduct id in both cases. The ALC256 codec
++ * needs an additional quirk for sound working after suspend and resume.
++ */
++ if (codec->core.vendor_id == 0x10ec0256) {
++ alc_update_coef_idx(codec, 0x10, 1<<9, 0);
++ snd_hda_codec_set_pincfg(codec, 0x19, 0x04a11120);
++ } else {
++ snd_hda_codec_set_pincfg(codec, 0x1a, 0x04a1113c);
++ }
++}
++
++static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ static const struct hda_pintbl pincfgs[] = {
++ { 0x14, 0x90170151 },
++ { 0x17, 0x90170150 },
++ { }
++ };
++ static const hda_nid_t conn[] = { 0x02, 0x03 };
++ static const hda_nid_t preferred_pairs[] = {
++ 0x14, 0x02,
++ 0x17, 0x03,
++ 0x21, 0x02,
++ 0
++ };
++ struct alc_spec *spec = codec->spec;
++
++ alc_fixup_no_shutup(codec, fix, action);
++
++ switch (action) {
++ case HDA_FIXUP_ACT_PRE_PROBE:
++ snd_hda_apply_pincfgs(codec, pincfgs);
++ snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn);
++ spec->gen.preferred_dacs = preferred_pairs;
++ break;
++ }
+ }
+
+ enum {
+@@ -6564,6 +6829,7 @@ enum {
+ ALC269_FIXUP_LIMIT_INT_MIC_BOOST,
+ ALC269VB_FIXUP_ASUS_ZENBOOK,
+ ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A,
++ ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE,
+ ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED,
+ ALC269VB_FIXUP_ORDISSIMO_EVE2,
+ ALC283_FIXUP_CHROME_BOOK,
+@@ -6623,6 +6889,7 @@ enum {
+ ALC298_FIXUP_LENOVO_SPK_VOLUME,
+ ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
+ ALC269_FIXUP_ATIV_BOOK_8,
++ ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE,
+ ALC221_FIXUP_HP_MIC_NO_PRESENCE,
+ ALC256_FIXUP_ASUS_HEADSET_MODE,
+ ALC256_FIXUP_ASUS_MIC,
+@@ -6678,12 +6945,18 @@ enum {
+ ALC294_FIXUP_ASUS_GU502_HP,
+ ALC294_FIXUP_ASUS_GU502_PINS,
+ ALC294_FIXUP_ASUS_GU502_VERBS,
++ ALC294_FIXUP_ASUS_G513_PINS,
++ ALC285_FIXUP_ASUS_G533Z_PINS,
+ ALC285_FIXUP_HP_GPIO_LED,
+ ALC285_FIXUP_HP_MUTE_LED,
++ ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED,
++ ALC236_FIXUP_HP_MUTE_LED_COEFBIT2,
+ ALC236_FIXUP_HP_GPIO_LED,
+ ALC236_FIXUP_HP_MUTE_LED,
+ ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF,
++ ALC298_FIXUP_SAMSUNG_AMP,
+ ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET,
++ ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET,
+ ALC295_FIXUP_ASUS_MIC_NO_PRESENCE,
+ ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS,
+ ALC269VC_FIXUP_ACER_HEADSET_MIC,
+@@ -6709,6 +6982,7 @@ enum {
+ ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
+ ALC287_FIXUP_HP_GPIO_LED,
+ ALC256_FIXUP_HP_HEADSET_MIC,
++ ALC245_FIXUP_HP_GPIO_LED,
+ ALC236_FIXUP_DELL_AIO_HEADSET_MIC,
+ ALC282_FIXUP_ACER_DISABLE_LINEOUT,
+ ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST,
+@@ -6725,10 +6999,36 @@ enum {
+ ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS,
+ ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE,
+ ALC287_FIXUP_YOGA7_14ITL_SPEAKERS,
++ ALC298_FIXUP_LENOVO_C940_DUET7,
+ ALC287_FIXUP_13S_GEN2_SPEAKERS,
+- ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS,
++ ALC256_FIXUP_SET_COEF_DEFAULTS,
++ ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
++ ALC233_FIXUP_NO_AUDIO_JACK,
++ ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME,
++ ALC285_FIXUP_LEGION_Y9000X_SPEAKERS,
++ ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
++ ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
++ ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS,
++ ALC236_FIXUP_DELL_DUAL_CODECS,
+ };
+
++/* A special fixup for Lenovo C940 and Yoga Duet 7;
++ * both have the very same PCI SSID, and we need to apply different fixups
++ * depending on the codec ID
++ */
++static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec,
++ const struct hda_fixup *fix,
++ int action)
++{
++ int id;
++
++ if (codec->core.vendor_id == 0x10ec0298)
++ id = ALC298_FIXUP_LENOVO_SPK_VOLUME; /* C940 */
++ else
++ id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* Duet 7 */
++ __snd_hda_apply_fixup(codec, id, action, 0);
++}
++
+ static const struct hda_fixup alc269_fixups[] = {
+ [ALC269_FIXUP_GPIO2] = {
+ .type = HDA_FIXUP_FUNC,
+@@ -7114,6 +7414,15 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC269VB_FIXUP_ASUS_ZENBOOK,
+ },
++ [ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x18, 0x01a110f0 }, /* use as headset mic */
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC269_FIXUP_HEADSET_MIC
++ },
+ [ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc269_fixup_limit_int_mic_boost,
+@@ -7333,6 +7642,8 @@ static const struct hda_fixup alc269_fixups[] = {
+ [ALC245_FIXUP_HP_X360_AMP] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc245_fixup_hp_x360_amp,
++ .chained = true,
++ .chain_id = ALC245_FIXUP_HP_GPIO_LED
+ },
+ [ALC288_FIXUP_DELL_HEADSET_MODE] = {
+ .type = HDA_FIXUP_FUNC,
+@@ -7523,6 +7834,16 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC269_FIXUP_NO_SHUTUP
+ },
++ [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
++ { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC269_FIXUP_HEADSET_MODE
++ },
+ [ALC221_FIXUP_HP_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+@@ -7957,6 +8278,26 @@ static const struct hda_fixup alc269_fixups[] = {
+ [ALC294_FIXUP_ASUS_GU502_HP] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc294_fixup_gu502_hp,
++ },
++ [ALC294_FIXUP_ASUS_G513_PINS] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x19, 0x03a11050 }, /* front HP mic */
++ { 0x1a, 0x03a11c30 }, /* rear external mic */
++ { 0x21, 0x03211420 }, /* front HP out */
++ { }
++ },
++ },
++ [ALC285_FIXUP_ASUS_G533Z_PINS] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x14, 0x90170152 }, /* Speaker Surround Playback Switch */
++ { 0x19, 0x03a19020 }, /* Mic Boost Volume */
++ { 0x1a, 0x03a11c30 }, /* Mic Boost Volume */
++ { 0x1e, 0x90170151 }, /* Rear jack, IN OUT EAPD Detect */
++ { 0x21, 0x03211420 },
++ { }
++ },
+ },
+ [ALC294_FIXUP_ASUS_COEF_1B] = {
+ .type = HDA_FIXUP_VERBS,
+@@ -7979,6 +8320,14 @@ static const struct hda_fixup alc269_fixups[] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_hp_mute_led,
+ },
++ [ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc285_fixup_hp_spectre_x360_mute_led,
++ },
++ [ALC236_FIXUP_HP_MUTE_LED_COEFBIT2] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc236_fixup_hp_mute_led_coefbit2,
++ },
+ [ALC236_FIXUP_HP_GPIO_LED] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc236_fixup_hp_gpio_led,
+@@ -7991,6 +8340,12 @@ static const struct hda_fixup alc269_fixups[] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc236_fixup_hp_mute_led_micmute_vref,
+ },
++ [ALC298_FIXUP_SAMSUNG_AMP] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc298_fixup_samsung_amp,
++ .chained = true,
++ .chain_id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET
++ },
+ [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+@@ -7998,6 +8353,14 @@ static const struct hda_fixup alc269_fixups[] = {
+ { }
+ },
+ },
++ [ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = {
++ .type = HDA_FIXUP_VERBS,
++ .v.verbs = (const struct hda_verb[]) {
++ { 0x20, AC_VERB_SET_COEF_INDEX, 0x08},
++ { 0x20, AC_VERB_SET_PROC_COEF, 0x2fcf},
++ { }
++ },
++ },
+ [ALC295_FIXUP_ASUS_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+@@ -8321,6 +8684,18 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF,
+ },
++ [ALC285_FIXUP_LEGION_Y9000X_SPEAKERS] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc285_fixup_ideapad_s740_coef,
++ .chained = true,
++ .chain_id = ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
++ },
++ [ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc287_fixup_legion_15imhg05_speakers,
++ .chained = true,
++ .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
++ },
+ [ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS] = {
+ .type = HDA_FIXUP_VERBS,
+ //.v.verbs = legion_15imhg05_coefs,
+@@ -8406,6 +8781,10 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE,
+ },
++ [ALC298_FIXUP_LENOVO_C940_DUET7] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc298_fixup_lenovo_c940_duet7,
++ },
+ [ALC287_FIXUP_13S_GEN2_SPEAKERS] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+@@ -8428,9 +8807,54 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE,
+ },
+- [ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS] = {
++ [ALC256_FIXUP_SET_COEF_DEFAULTS] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc256_fixup_set_coef_defaults,
++ },
++ [ALC245_FIXUP_HP_GPIO_LED] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc245_fixup_hp_gpio_led,
++ },
++ [ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x19, 0x03a11120 }, /* use as headset mic, without its own jack detect */
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
++ },
++ [ALC233_FIXUP_NO_AUDIO_JACK] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc233_fixup_no_audio_jack,
++ },
++ [ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME] = {
+ .type = HDA_FIXUP_FUNC,
+- .v.func = alc256_fixup_tongfang_reset_persistent_settings,
++ .v.func = alc256_fixup_mic_no_presence_and_resume,
++ .chained = true,
++ .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
++ },
++ [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = {
++ .type = HDA_FIXUP_VERBS,
++ .v.verbs = (const struct hda_verb[]) {
++ { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 },
++ { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 },
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC285_FIXUP_HP_MUTE_LED,
++ },
++ [ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc295_fixup_dell_inspiron_top_speakers,
++ .chained = true,
++ .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
++ },
++ [ALC236_FIXUP_DELL_DUAL_CODECS] = {
++ .type = HDA_FIXUP_PINS,
++ .v.func = alc1220_fixup_gb_dual_codecs,
++ .chained = true,
++ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ },
+ };
+
+@@ -8464,14 +8888,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
++ SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
++ SND_PCI_QUIRK(0x1025, 0x141f, "Acer Spin SP513-54N", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
++ SND_PCI_QUIRK(0x1025, 0x1534, "Acer Predator PH315-54", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
++ SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X),
+ SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
+ SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
+ SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X),
+@@ -8526,6 +8954,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK),
+ SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK),
++ SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK),
++ SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
++ SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
++ SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
++ SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
++ SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
++ SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
++ SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
++ SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
+ SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
+@@ -8585,11 +9024,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+ SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+ SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
++ SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x103c, 0x8158, "HP", ALC256_FIXUP_HP_HEADSET_MIC),
+- SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
++ SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC295_FIXUP_HP_X360),
+ SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
+ SND_PCI_QUIRK(0x103c, 0x827e, "HP x360", ALC295_FIXUP_HP_X360),
+ SND_PCI_QUIRK(0x103c, 0x827f, "HP x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+@@ -8601,14 +9041,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN),
+ SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+ SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360),
++ SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
++ SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
++ SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
++ SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8728, "HP EliteBook 840 G7", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++ SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED),
+@@ -8617,6 +9063,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation",
+ ALC285_FIXUP_HP_GPIO_AMP_INIT),
++ SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
++ SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
++ SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
+@@ -8630,6 +9079,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
+ SND_PCI_QUIRK(0x103c, 0x8812, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
++ SND_PCI_QUIRK(0x103c, 0x881d, "HP 250 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
+ SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+@@ -8639,10 +9089,24 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
++ SND_PCI_QUIRK(0x103c, 0x887a, "HP Laptop 15s-eq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
+ SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x89c3, "HP", ALC285_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++ SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++ SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
++ SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++ SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++ SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
+ SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+@@ -8657,17 +9121,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC),
++ SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
+ SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
++ SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
++ SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS),
+ SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
+- SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
+ SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS),
+ SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE),
++ SND_PCI_QUIRK(0x1043, 0x1970, "ASUS UX550VE", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1982, "ASUS B1400CEPE", ALC256_FIXUP_ASUS_HPE),
+ SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE),
+ SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
+@@ -8677,12 +9144,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
++ SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
++ SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
+ SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
++ SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
+ SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
+ SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
++ SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+ SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
++ SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
++ SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
+ SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
+@@ -8704,17 +9177,23 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
+ SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE),
+ SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
++ SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
+ SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
+- SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+- SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+- SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+- SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
++ SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8),
+- SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+- SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
++ SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
++ SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+@@ -8728,14 +9207,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x4041, "Clevo NV4[15]PZ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x5015, "Clevo NH5[58]H[HJK]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x5017, "Clevo NH7[79]H[HJK]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50a3, "Clevo NJ51GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50b3, "Clevo NK50S[BEZ]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50b6, "Clevo NK50S5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x50e1, "Clevo NH5[58]HPQ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x50e2, "Clevo NH7[79]HPQ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50f2, "Clevo NH50E[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+@@ -8744,12 +9228,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x51b3, "Clevo NS70AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x7717, "Clevo NS70PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x7718, "Clevo L140PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x7724, "Clevo L140AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+@@ -8761,8 +9252,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1558, 0x8561, "Clevo NH[57][0-9][ER][ACDH]Q", ALC269_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC),
+ SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x866d, "Clevo NP5[05]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x867c, "Clevo NP7[01]PNP", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x867d, "Clevo NP7[01]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+- SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME),
+ SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+@@ -8772,6 +9266,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL5[03]RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+@@ -8828,13 +9324,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
+- SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
++ SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
++ SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
++ SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7),
++ SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
++ SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
++ SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
++ SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
++ SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP),
+- SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
++ SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+- SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
+@@ -8854,21 +9356,36 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
++ SND_PCI_QUIRK(0x17aa, 0x508b, "Thinkpad X12 Gen 1", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
++ SND_PCI_QUIRK(0x17aa, 0x9e56, "Lenovo ZhaoYang CF4620Z", ALC286_FIXUP_SONY_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK),
++ SND_PCI_QUIRK(0x1849, 0xa233, "Positivo Master C6300", ALC269_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
++ SND_PCI_QUIRK(0x19e5, 0x320f, "Huawei WRT-WX9 ", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20),
+ SND_PCI_QUIRK(0x1b35, 0x1236, "CZC TMI", ALC269_FIXUP_CZC_TMI),
+ SND_PCI_QUIRK(0x1b35, 0x1237, "CZC L101", ALC269_FIXUP_CZC_L101),
+ SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
+ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
+ SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
+- SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS),
++ SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE),
++ SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS),
++ SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x1111, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x1119, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x1129, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
++ SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+ SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
+ SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
+@@ -9045,7 +9562,8 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
+ {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"},
+ {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"},
+ {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"},
+- {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"},
++ {.id = ALC298_FIXUP_SAMSUNG_AMP, .name = "alc298-samsung-amp"},
++ {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"},
+ {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"},
+ {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"},
+ {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"},
+@@ -9055,6 +9573,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
+ {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"},
+ {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"},
+ {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"},
++ {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"},
+ {}
+ };
+ #define ALC225_STANDARD_PINS \
+@@ -9569,6 +10088,7 @@ static int patch_alc269(struct hda_codec *codec)
+ spec = codec->spec;
+ spec->gen.shared_mic_vref_pin = 0x18;
+ codec->power_save_node = 0;
++ spec->en_3kpull_low = true;
+
+ #ifdef CONFIG_PM
+ codec->patch_ops.suspend = alc269_suspend;
+@@ -9646,22 +10166,30 @@ static int patch_alc269(struct hda_codec *codec)
+ case 0x10ec0230:
+ case 0x10ec0236:
+ case 0x10ec0256:
++ case 0x19e58326:
+ spec->codec_variant = ALC269_TYPE_ALC256;
+ spec->shutup = alc256_shutup;
+ spec->init_hook = alc256_init;
+ spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
++ if (codec->core.vendor_id == 0x10ec0236 &&
++ codec->bus->pci->vendor != PCI_VENDOR_ID_AMD)
++ spec->en_3kpull_low = false;
+ break;
+ case 0x10ec0257:
+ spec->codec_variant = ALC269_TYPE_ALC257;
+ spec->shutup = alc256_shutup;
+ spec->init_hook = alc256_init;
+ spec->gen.mixer_nid = 0;
++ spec->en_3kpull_low = false;
+ break;
+ case 0x10ec0215:
+ case 0x10ec0245:
+ case 0x10ec0285:
+ case 0x10ec0289:
+- spec->codec_variant = ALC269_TYPE_ALC215;
++ if (alc_get_coef0(codec) & 0x0010)
++ spec->codec_variant = ALC269_TYPE_ALC245;
++ else
++ spec->codec_variant = ALC269_TYPE_ALC215;
+ spec->shutup = alc225_shutup;
+ spec->init_hook = alc225_init;
+ spec->gen.mixer_nid = 0;
+@@ -10167,6 +10695,38 @@ static void alc671_fixup_hp_headset_mic2(struct hda_codec *codec,
+ }
+ }
+
++static void alc897_hp_automute_hook(struct hda_codec *codec,
++ struct hda_jack_callback *jack)
++{
++ struct alc_spec *spec = codec->spec;
++ int vref;
++
++ snd_hda_gen_hp_automute(codec, jack);
++ vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP;
++ snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
++ vref);
++}
++
++static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ struct alc_spec *spec = codec->spec;
++ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
++ spec->gen.hp_automute_hook = alc897_hp_automute_hook;
++ }
++}
++
++static void alc897_fixup_lenovo_headset_mode(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ struct alc_spec *spec = codec->spec;
++
++ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
++ spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
++ spec->gen.hp_automute_hook = alc897_hp_automute_hook;
++ }
++}
++
+ static const struct coef_fw alc668_coefs[] = {
+ WRITE_COEF(0x01, 0xbebe), WRITE_COEF(0x02, 0xaaaa), WRITE_COEF(0x03, 0x0),
+ WRITE_COEF(0x04, 0x0180), WRITE_COEF(0x06, 0x0), WRITE_COEF(0x07, 0x0f80),
+@@ -10247,6 +10807,12 @@ enum {
+ ALC668_FIXUP_ASUS_NO_HEADSET_MIC,
+ ALC668_FIXUP_HEADSET_MIC,
+ ALC668_FIXUP_MIC_DET_COEF,
++ ALC897_FIXUP_LENOVO_HEADSET_MIC,
++ ALC897_FIXUP_HEADSET_MIC_PIN,
++ ALC897_FIXUP_HP_HSMIC_VERB,
++ ALC897_FIXUP_LENOVO_HEADSET_MODE,
++ ALC897_FIXUP_HEADSET_MIC_PIN2,
++ ALC897_FIXUP_UNIS_H3C_X500S,
+ };
+
+ static const struct hda_fixup alc662_fixups[] = {
+@@ -10653,6 +11219,46 @@ static const struct hda_fixup alc662_fixups[] = {
+ {}
+ },
+ },
++ [ALC897_FIXUP_LENOVO_HEADSET_MIC] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc897_fixup_lenovo_headset_mic,
++ },
++ [ALC897_FIXUP_HEADSET_MIC_PIN] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x1a, 0x03a11050 },
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC
++ },
++ [ALC897_FIXUP_HP_HSMIC_VERB] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
++ { }
++ },
++ },
++ [ALC897_FIXUP_LENOVO_HEADSET_MODE] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc897_fixup_lenovo_headset_mode,
++ },
++ [ALC897_FIXUP_HEADSET_MIC_PIN2] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x1a, 0x01a11140 }, /* use as headset mic, without its own jack detect */
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MODE
++ },
++ [ALC897_FIXUP_UNIS_H3C_X500S] = {
++ .type = HDA_FIXUP_VERBS,
++ .v.verbs = (const struct hda_verb[]) {
++ { 0x14, AC_VERB_SET_EAPD_BTLENABLE, 0 },
++ {}
++ },
++ },
+ };
+
+ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
+@@ -10678,7 +11284,13 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
++ SND_PCI_QUIRK(0x103c, 0x870c, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
++ SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
++ SND_PCI_QUIRK(0x103c, 0x872b, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
+ SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2),
++ SND_PCI_QUIRK(0x103c, 0x8768, "HP Slim Desktop S01", ALC671_FIXUP_HP_HEADSET_MIC2),
++ SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2),
++ SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2),
+ SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
+ SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
+ SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
+@@ -10697,6 +11309,15 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD),
+ SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE),
+ SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS),
++ SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x1064, "Lenovo P3 Tower", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x3321, "Lenovo ThinkCentre M70 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x331b, "Lenovo ThinkCentre M90 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN),
++ SND_PCI_QUIRK(0x17aa, 0x3742, "Lenovo TianYi510Pro-14IOB", ALC897_FIXUP_HEADSET_MIC_PIN2),
+ SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD),
+ SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD),
+ SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO),
+@@ -10704,6 +11325,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1b0a, 0x01b8, "ACER Veriton", ALC662_FIXUP_ACER_VERITON),
+ SND_PCI_QUIRK(0x1b35, 0x1234, "CZC ET26", ALC662_FIXUP_CZC_ET26),
+ SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T),
++ SND_PCI_QUIRK(0x1c6c, 0x1239, "Compaq N14JP6-V2", ALC897_FIXUP_HP_HSMIC_VERB),
+
+ #if 0
+ /* Below is a quirk table taken from the old code.
+@@ -10798,6 +11420,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = {
+ {.id = ALC662_FIXUP_USI_HEADSET_MODE, .name = "usi-headset"},
+ {.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
+ {.id = ALC669_FIXUP_ACER_ASPIRE_ETHOS, .name = "aspire-ethos"},
++ {.id = ALC897_FIXUP_UNIS_H3C_X500S, .name = "unis-h3c-x500s"},
+ {}
+ };
+
+@@ -11051,6 +11674,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
+ HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882),
+ HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882),
+ HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882),
++ HDA_CODEC_ENTRY(0x19e58326, "HW8326", patch_alc269),
+ {} /* terminator */
+ };
+ MODULE_DEVICE_TABLE(hdaudio, snd_hda_id_realtek);
+diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
+index 61df4d33c48ff..61258b0aac8d6 100644
+--- a/sound/pci/hda/patch_sigmatel.c
++++ b/sound/pci/hda/patch_sigmatel.c
+@@ -209,6 +209,7 @@ struct sigmatel_spec {
+
+ /* beep widgets */
+ hda_nid_t anabeep_nid;
++ bool beep_power_on;
+
+ /* SPDIF-out mux */
+ const char * const *spdif_labels;
+@@ -1706,6 +1707,7 @@ static const struct snd_pci_quirk stac925x_fixup_tbl[] = {
+ };
+
+ static const struct hda_pintbl ref92hd73xx_pin_configs[] = {
++ // Port A-H
+ { 0x0a, 0x02214030 },
+ { 0x0b, 0x02a19040 },
+ { 0x0c, 0x01a19020 },
+@@ -1714,9 +1716,12 @@ static const struct hda_pintbl ref92hd73xx_pin_configs[] = {
+ { 0x0f, 0x01014010 },
+ { 0x10, 0x01014020 },
+ { 0x11, 0x01014030 },
++ // CD in
+ { 0x12, 0x02319040 },
++ // Digial Mic ins
+ { 0x13, 0x90a000f0 },
+ { 0x14, 0x90a000f0 },
++ // Digital outs
+ { 0x22, 0x01452050 },
+ { 0x23, 0x01452050 },
+ {}
+@@ -1757,6 +1762,7 @@ static const struct hda_pintbl alienware_m17x_pin_configs[] = {
+ };
+
+ static const struct hda_pintbl intel_dg45id_pin_configs[] = {
++ // Analog outputs
+ { 0x0a, 0x02214230 },
+ { 0x0b, 0x02A19240 },
+ { 0x0c, 0x01013214 },
+@@ -1764,6 +1770,9 @@ static const struct hda_pintbl intel_dg45id_pin_configs[] = {
+ { 0x0e, 0x01A19250 },
+ { 0x0f, 0x01011212 },
+ { 0x10, 0x01016211 },
++ // Digital output
++ { 0x22, 0x01451380 },
++ { 0x23, 0x40f000f0 },
+ {}
+ };
+
+@@ -1954,6 +1963,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = {
+ "DFI LanParty", STAC_92HD73XX_REF),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_DFI, 0x3101,
+ "DFI LanParty", STAC_92HD73XX_REF),
++ SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5001,
++ "Intel DP45SG", STAC_92HD73XX_INTEL),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5002,
+ "Intel DG45ID", STAC_92HD73XX_INTEL),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x5003,
+@@ -4310,6 +4321,8 @@ static int stac_parse_auto_config(struct hda_codec *codec)
+ if (codec->beep) {
+ /* IDT/STAC codecs have linear beep tone parameter */
+ codec->beep->linear_tone = spec->linear_tone_beep;
++ /* keep power up while beep is enabled */
++ codec->beep->keep_power_at_enable = 1;
+ /* if no beep switch is available, make its own one */
+ caps = query_amp_caps(codec, nid, HDA_OUTPUT);
+ if (!(caps & AC_AMPCAP_MUTE)) {
+diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
+index 773a136161f11..29abc96dc146c 100644
+--- a/sound/pci/hda/patch_via.c
++++ b/sound/pci/hda/patch_via.c
+@@ -520,11 +520,11 @@ static int via_parse_auto_config(struct hda_codec *codec)
+ if (err < 0)
+ return err;
+
+- err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
++ err = auto_parse_beep(codec);
+ if (err < 0)
+ return err;
+
+- err = auto_parse_beep(codec);
++ err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
+ if (err < 0)
+ return err;
+
+@@ -821,6 +821,9 @@ static int add_secret_dac_path(struct hda_codec *codec)
+ return 0;
+ nums = snd_hda_get_connections(codec, spec->gen.mixer_nid, conn,
+ ARRAY_SIZE(conn) - 1);
++ if (nums < 0)
++ return nums;
++
+ for (i = 0; i < nums; i++) {
+ if (get_wcaps_type(get_wcaps(codec, conn[i])) == AC_WID_AUD_OUT)
+ return 0;
+diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c
+index 9a30f6d35d135..40a0e00950301 100644
+--- a/sound/pci/ice1712/aureon.c
++++ b/sound/pci/ice1712/aureon.c
+@@ -1892,6 +1892,7 @@ static int aureon_add_controls(struct snd_ice1712 *ice)
+ unsigned char id;
+ snd_ice1712_save_gpio_status(ice);
+ id = aureon_cs8415_get(ice, CS8415_ID);
++ snd_ice1712_restore_gpio_status(ice);
+ if (id != 0x41)
+ dev_info(ice->card->dev,
+ "No CS8415 chip. Skipping CS8415 controls.\n");
+@@ -1909,7 +1910,6 @@ static int aureon_add_controls(struct snd_ice1712 *ice)
+ kctl->id.device = ice->pcm->device;
+ }
+ }
+- snd_ice1712_restore_gpio_status(ice);
+ }
+
+ return 0;
+diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
+index f6275868877a7..6fab2ad85bbec 100644
+--- a/sound/pci/ice1712/ice1724.c
++++ b/sound/pci/ice1712/ice1724.c
+@@ -2519,8 +2519,8 @@ static int snd_vt1724_create(struct snd_card *card,
+ *
+ */
+
+-static int snd_vt1724_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_vt1724_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -2662,6 +2662,12 @@ static int snd_vt1724_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_vt1724_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_vt1724_probe(pci, pci_id));
++}
++
+ #ifdef CONFIG_PM_SLEEP
+ static int snd_vt1724_suspend(struct device *dev)
+ {
+diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
+index a51032b3ac4d8..ae285c0a629c8 100644
+--- a/sound/pci/intel8x0.c
++++ b/sound/pci/intel8x0.c
+@@ -3109,8 +3109,8 @@ static int check_default_spdif_aclink(struct pci_dev *pci)
+ return 0;
+ }
+
+-static int snd_intel8x0_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_intel8x0_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct intel8x0 *chip;
+@@ -3189,6 +3189,12 @@ static int snd_intel8x0_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_intel8x0_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_intel8x0_probe(pci, pci_id));
++}
++
+ static struct pci_driver intel8x0_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_intel8x0_ids,
+diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c
+index 7de3cb2f17b52..2845cc006d0cf 100644
+--- a/sound/pci/intel8x0m.c
++++ b/sound/pci/intel8x0m.c
+@@ -1178,8 +1178,8 @@ static struct shortname_table {
+ { 0 },
+ };
+
+-static int snd_intel8x0m_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_intel8x0m_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct intel8x0m *chip;
+@@ -1225,6 +1225,12 @@ static int snd_intel8x0m_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_intel8x0m_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_intel8x0m_probe(pci, pci_id));
++}
++
+ static struct pci_driver intel8x0m_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_intel8x0m_ids,
+diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c
+index 5c9e240ff6a9c..33b4f95d65b3f 100644
+--- a/sound/pci/korg1212/korg1212.c
++++ b/sound/pci/korg1212/korg1212.c
+@@ -2355,7 +2355,7 @@ snd_korg1212_probe(struct pci_dev *pci,
+
+ err = snd_korg1212_create(card, pci);
+ if (err < 0)
+- return err;
++ goto error;
+
+ strcpy(card->driver, "korg1212");
+ strcpy(card->shortname, "korg1212");
+@@ -2366,10 +2366,14 @@ snd_korg1212_probe(struct pci_dev *pci,
+
+ err = snd_card_register(card);
+ if (err < 0)
+- return err;
++ goto error;
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ static struct pci_driver korg1212_driver = {
+diff --git a/sound/pci/lola/lola.c b/sound/pci/lola/lola.c
+index 5269a1d396a5b..1aa30e90b86a7 100644
+--- a/sound/pci/lola/lola.c
++++ b/sound/pci/lola/lola.c
+@@ -637,8 +637,8 @@ static int lola_create(struct snd_card *card, struct pci_dev *pci, int dev)
+ return 0;
+ }
+
+-static int lola_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __lola_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -687,6 +687,12 @@ static int lola_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int lola_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __lola_probe(pci, pci_id));
++}
++
+ /* PCI IDs */
+ static const struct pci_device_id lola_ids[] = {
+ { PCI_VDEVICE(DIGIGRAM, 0x0001) },
+diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
+index 168a1084f7303..bd9b6148dd6fb 100644
+--- a/sound/pci/lx6464es/lx6464es.c
++++ b/sound/pci/lx6464es/lx6464es.c
+@@ -1019,7 +1019,7 @@ static int snd_lx6464es_probe(struct pci_dev *pci,
+ err = snd_lx6464es_create(card, pci);
+ if (err < 0) {
+ dev_err(card->dev, "error during snd_lx6464es_create\n");
+- return err;
++ goto error;
+ }
+
+ strcpy(card->driver, "LX6464ES");
+@@ -1036,12 +1036,16 @@ static int snd_lx6464es_probe(struct pci_dev *pci,
+
+ err = snd_card_register(card);
+ if (err < 0)
+- return err;
++ goto error;
+
+ dev_dbg(chip->card->dev, "initialization successful\n");
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ static struct pci_driver lx6464es_driver = {
+diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
+index d3f58a3d17fbc..b5b0d43bb8dcd 100644
+--- a/sound/pci/lx6464es/lx_core.c
++++ b/sound/pci/lx6464es/lx_core.c
+@@ -493,12 +493,11 @@ int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
+ dev_dbg(chip->card->dev,
+ "CMD_08_ASK_BUFFERS: needed %d, freed %d\n",
+ *r_needed, *r_freed);
+- for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
+- for (i = 0; i != chip->rmh.stat_len; ++i)
+- dev_dbg(chip->card->dev,
+- " stat[%d]: %x, %x\n", i,
+- chip->rmh.stat[i],
+- chip->rmh.stat[i] & MASK_DATA_SIZE);
++ for (i = 0; i < MAX_STREAM_BUFFER && i < chip->rmh.stat_len;
++ ++i) {
++ dev_dbg(chip->card->dev, " stat[%d]: %x, %x\n", i,
++ chip->rmh.stat[i],
++ chip->rmh.stat[i] & MASK_DATA_SIZE);
+ }
+ }
+
+diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c
+index 056838ead21d6..261850775c807 100644
+--- a/sound/pci/maestro3.c
++++ b/sound/pci/maestro3.c
+@@ -2637,7 +2637,7 @@ snd_m3_create(struct snd_card *card, struct pci_dev *pci,
+ /*
+ */
+ static int
+-snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++__snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -2702,6 +2702,12 @@ snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ return 0;
+ }
+
++static int
++snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_m3_probe(pci, pci_id));
++}
++
+ static struct pci_driver m3_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_m3_ids,
+diff --git a/sound/pci/nm256/nm256.c b/sound/pci/nm256/nm256.c
+index c9c178504959e..f99a1e96e9231 100644
+--- a/sound/pci/nm256/nm256.c
++++ b/sound/pci/nm256/nm256.c
+@@ -1573,7 +1573,6 @@ snd_nm256_create(struct snd_card *card, struct pci_dev *pci)
+ chip->coeffs_current = 0;
+
+ snd_nm256_init_chip(chip);
+- card->private_free = snd_nm256_free;
+
+ // pci_set_master(pci); /* needed? */
+ return 0;
+@@ -1680,6 +1679,7 @@ static int snd_nm256_probe(struct pci_dev *pci,
+ err = snd_card_register(card);
+ if (err < 0)
+ return err;
++ card->private_free = snd_nm256_free;
+
+ pci_set_drvdata(pci, card);
+ return 0;
+diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c
+index 4fb3f2484fdba..92ffe9dc20c55 100644
+--- a/sound/pci/oxygen/oxygen_lib.c
++++ b/sound/pci/oxygen/oxygen_lib.c
+@@ -576,7 +576,7 @@ static void oxygen_card_free(struct snd_card *card)
+ mutex_destroy(&chip->mutex);
+ }
+
+-int oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
++static int __oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
+ struct module *owner,
+ const struct pci_device_id *ids,
+ int (*get_model)(struct oxygen *chip,
+@@ -701,6 +701,16 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
+ pci_set_drvdata(pci, card);
+ return 0;
+ }
++
++int oxygen_pci_probe(struct pci_dev *pci, int index, char *id,
++ struct module *owner,
++ const struct pci_device_id *ids,
++ int (*get_model)(struct oxygen *chip,
++ const struct pci_device_id *id))
++{
++ return snd_card_free_on_error(&pci->dev,
++ __oxygen_pci_probe(pci, index, id, owner, ids, get_model));
++}
+ EXPORT_SYMBOL(oxygen_pci_probe);
+
+ #ifdef CONFIG_PM_SLEEP
+diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
+index 5a987c683c41c..b37c877c2c160 100644
+--- a/sound/pci/riptide/riptide.c
++++ b/sound/pci/riptide/riptide.c
+@@ -2023,7 +2023,7 @@ static void snd_riptide_joystick_remove(struct pci_dev *pci)
+ #endif
+
+ static int
+-snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++__snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -2124,6 +2124,12 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ return 0;
+ }
+
++static int
++snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_card_riptide_probe(pci, pci_id));
++}
++
+ static struct pci_driver driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_riptide_ids,
+diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c
+index 5b6bd9f0b2f77..9c0ac025e1432 100644
+--- a/sound/pci/rme32.c
++++ b/sound/pci/rme32.c
+@@ -1875,7 +1875,7 @@ static void snd_rme32_card_free(struct snd_card *card)
+ }
+
+ static int
+-snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++__snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct rme32 *rme32;
+@@ -1927,6 +1927,12 @@ snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+ return 0;
+ }
+
++static int
++snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_rme32_probe(pci, pci_id));
++}
++
+ static struct pci_driver rme32_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_rme32_ids,
+diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c
+index 8fc8115049203..bccb7e0d3d116 100644
+--- a/sound/pci/rme96.c
++++ b/sound/pci/rme96.c
+@@ -2430,8 +2430,8 @@ static void snd_rme96_card_free(struct snd_card *card)
+ }
+
+ static int
+-snd_rme96_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++__snd_rme96_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct rme96 *rme96;
+@@ -2498,6 +2498,12 @@ snd_rme96_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_rme96_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_rme96_probe(pci, pci_id));
++}
++
+ static struct pci_driver rme96_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_rme96_ids,
+diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
+index 75aa2ea733a59..82c72e6c13754 100644
+--- a/sound/pci/rme9652/hdsp.c
++++ b/sound/pci/rme9652/hdsp.c
+@@ -433,7 +433,7 @@ struct hdsp_midi {
+ struct snd_rawmidi *rmidi;
+ struct snd_rawmidi_substream *input;
+ struct snd_rawmidi_substream *output;
+- char istimer; /* timer in use */
++ signed char istimer; /* timer in use */
+ struct timer_list timer;
+ spinlock_t lock;
+ int pending;
+@@ -468,8 +468,11 @@ struct hdsp {
+ unsigned char ss_out_channels;
+ u32 io_loopback; /* output loopback channel states*/
+
+- struct snd_dma_buffer *capture_dma_buf;
+- struct snd_dma_buffer *playback_dma_buf;
++ /* DMA buffers; those are copied instances from the original snd_dma_buf
++ * objects (which are managed via devres) for the address alignments
++ */
++ struct snd_dma_buffer capture_dma_buf;
++ struct snd_dma_buffer playback_dma_buf;
+ unsigned char *capture_buffer; /* suitably aligned address */
+ unsigned char *playback_buffer; /* suitably aligned address */
+
+@@ -477,7 +480,7 @@ struct hdsp {
+ pid_t playback_pid;
+ int running;
+ int system_sample_rate;
+- const char *channel_map;
++ const signed char *channel_map;
+ int dev;
+ int irq;
+ unsigned long port;
+@@ -499,7 +502,7 @@ struct hdsp {
+ where the data for that channel can be read/written from/to.
+ */
+
+-static const char channel_map_df_ss[HDSP_MAX_CHANNELS] = {
++static const signed char channel_map_df_ss[HDSP_MAX_CHANNELS] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25
+ };
+@@ -514,7 +517,7 @@ static const char channel_map_mf_ss[HDSP_MAX_CHANNELS] = { /* Multiface */
+ -1, -1, -1, -1, -1, -1, -1, -1
+ };
+
+-static const char channel_map_ds[HDSP_MAX_CHANNELS] = {
++static const signed char channel_map_ds[HDSP_MAX_CHANNELS] = {
+ /* ADAT channels are remapped */
+ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23,
+ /* channels 12 and 13 are S/PDIF */
+@@ -523,7 +526,7 @@ static const char channel_map_ds[HDSP_MAX_CHANNELS] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+ };
+
+-static const char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = {
++static const signed char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = {
+ /* ADAT channels */
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ /* SPDIF */
+@@ -537,7 +540,7 @@ static const char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = {
+ -1, -1
+ };
+
+-static const char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = {
++static const signed char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = {
+ /* ADAT */
+ 1, 3, 5, 7,
+ /* SPDIF */
+@@ -551,7 +554,7 @@ static const char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = {
+ -1, -1, -1, -1, -1, -1
+ };
+
+-static const char channel_map_H9632_qs[HDSP_MAX_CHANNELS] = {
++static const signed char channel_map_H9632_qs[HDSP_MAX_CHANNELS] = {
+ /* ADAT is disabled in this mode */
+ /* SPDIF */
+ 8, 9,
+@@ -3764,30 +3767,32 @@ static void snd_hdsp_proc_init(struct hdsp *hdsp)
+
+ static int snd_hdsp_initialize_memory(struct hdsp *hdsp)
+ {
+- unsigned long pb_bus, cb_bus;
++ struct snd_dma_buffer *capture_dma, *playback_dma;
+
+- hdsp->capture_dma_buf =
+- snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
+- hdsp->playback_dma_buf =
+- snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
+- if (!hdsp->capture_dma_buf || !hdsp->playback_dma_buf) {
++ capture_dma = snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
++ playback_dma = snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
++ if (!capture_dma || !playback_dma) {
+ dev_err(hdsp->card->dev,
+ "%s: no buffers available\n", hdsp->card_name);
+ return -ENOMEM;
+ }
+
+- /* Align to bus-space 64K boundary */
++ /* copy to the own data for alignment */
++ hdsp->capture_dma_buf = *capture_dma;
++ hdsp->playback_dma_buf = *playback_dma;
+
+- cb_bus = ALIGN(hdsp->capture_dma_buf->addr, 0x10000ul);
+- pb_bus = ALIGN(hdsp->playback_dma_buf->addr, 0x10000ul);
++ /* Align to bus-space 64K boundary */
++ hdsp->capture_dma_buf.addr = ALIGN(capture_dma->addr, 0x10000ul);
++ hdsp->playback_dma_buf.addr = ALIGN(playback_dma->addr, 0x10000ul);
+
+ /* Tell the card where it is */
++ hdsp_write(hdsp, HDSP_inputBufferAddress, hdsp->capture_dma_buf.addr);
++ hdsp_write(hdsp, HDSP_outputBufferAddress, hdsp->playback_dma_buf.addr);
+
+- hdsp_write(hdsp, HDSP_inputBufferAddress, cb_bus);
+- hdsp_write(hdsp, HDSP_outputBufferAddress, pb_bus);
+-
+- hdsp->capture_buffer = hdsp->capture_dma_buf->area + (cb_bus - hdsp->capture_dma_buf->addr);
+- hdsp->playback_buffer = hdsp->playback_dma_buf->area + (pb_bus - hdsp->playback_dma_buf->addr);
++ hdsp->capture_dma_buf.area += hdsp->capture_dma_buf.addr - capture_dma->addr;
++ hdsp->playback_dma_buf.area += hdsp->playback_dma_buf.addr - playback_dma->addr;
++ hdsp->capture_buffer = hdsp->capture_dma_buf.area;
++ hdsp->playback_buffer = hdsp->playback_dma_buf.area;
+
+ return 0;
+ }
+@@ -3934,7 +3939,7 @@ static snd_pcm_uframes_t snd_hdsp_hw_pointer(struct snd_pcm_substream *substream
+ return hdsp_hw_pointer(hdsp);
+ }
+
+-static char *hdsp_channel_buffer_location(struct hdsp *hdsp,
++static signed char *hdsp_channel_buffer_location(struct hdsp *hdsp,
+ int stream,
+ int channel)
+
+@@ -3959,7 +3964,7 @@ static int snd_hdsp_playback_copy(struct snd_pcm_substream *substream,
+ void __user *src, unsigned long count)
+ {
+ struct hdsp *hdsp = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ if (snd_BUG_ON(pos + count > HDSP_CHANNEL_BUFFER_BYTES))
+ return -EINVAL;
+@@ -3977,7 +3982,7 @@ static int snd_hdsp_playback_copy_kernel(struct snd_pcm_substream *substream,
+ void *src, unsigned long count)
+ {
+ struct hdsp *hdsp = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ channel_buf = hdsp_channel_buffer_location(hdsp, substream->pstr->stream, channel);
+ if (snd_BUG_ON(!channel_buf))
+@@ -3991,7 +3996,7 @@ static int snd_hdsp_capture_copy(struct snd_pcm_substream *substream,
+ void __user *dst, unsigned long count)
+ {
+ struct hdsp *hdsp = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ if (snd_BUG_ON(pos + count > HDSP_CHANNEL_BUFFER_BYTES))
+ return -EINVAL;
+@@ -4009,7 +4014,7 @@ static int snd_hdsp_capture_copy_kernel(struct snd_pcm_substream *substream,
+ void *dst, unsigned long count)
+ {
+ struct hdsp *hdsp = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ channel_buf = hdsp_channel_buffer_location(hdsp, substream->pstr->stream, channel);
+ if (snd_BUG_ON(!channel_buf))
+@@ -4023,7 +4028,7 @@ static int snd_hdsp_hw_silence(struct snd_pcm_substream *substream,
+ unsigned long count)
+ {
+ struct hdsp *hdsp = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ channel_buf = hdsp_channel_buffer_location (hdsp, substream->pstr->stream, channel);
+ if (snd_BUG_ON(!channel_buf))
+@@ -4507,7 +4512,7 @@ static int snd_hdsp_playback_open(struct snd_pcm_substream *substream)
+ snd_pcm_set_sync(substream);
+
+ runtime->hw = snd_hdsp_playback_subinfo;
+- snd_pcm_set_runtime_buffer(substream, hdsp->playback_dma_buf);
++ snd_pcm_set_runtime_buffer(substream, &hdsp->playback_dma_buf);
+
+ hdsp->playback_pid = current->pid;
+ hdsp->playback_substream = substream;
+@@ -4583,7 +4588,7 @@ static int snd_hdsp_capture_open(struct snd_pcm_substream *substream)
+ snd_pcm_set_sync(substream);
+
+ runtime->hw = snd_hdsp_capture_subinfo;
+- snd_pcm_set_runtime_buffer(substream, hdsp->capture_dma_buf);
++ snd_pcm_set_runtime_buffer(substream, &hdsp->capture_dma_buf);
+
+ hdsp->capture_pid = current->pid;
+ hdsp->capture_substream = substream;
+@@ -5439,17 +5444,21 @@ static int snd_hdsp_probe(struct pci_dev *pci,
+ hdsp->pci = pci;
+ err = snd_hdsp_create(card, hdsp);
+ if (err)
+- return err;
++ goto error;
+
+ strcpy(card->shortname, "Hammerfall DSP");
+ sprintf(card->longname, "%s at 0x%lx, irq %d", hdsp->card_name,
+ hdsp->port, hdsp->irq);
+ err = snd_card_register(card);
+ if (err)
+- return err;
++ goto error;
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ static struct pci_driver hdsp_driver = {
+diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
+index ff06ee82607cf..fa1812e7a49dc 100644
+--- a/sound/pci/rme9652/hdspm.c
++++ b/sound/pci/rme9652/hdspm.c
+@@ -6895,7 +6895,7 @@ static int snd_hdspm_probe(struct pci_dev *pci,
+
+ err = snd_hdspm_create(card, hdspm);
+ if (err < 0)
+- return err;
++ goto error;
+
+ if (hdspm->io_type != MADIface) {
+ snprintf(card->shortname, sizeof(card->shortname), "%s_%x",
+@@ -6914,12 +6914,16 @@ static int snd_hdspm_probe(struct pci_dev *pci,
+
+ err = snd_card_register(card);
+ if (err < 0)
+- return err;
++ goto error;
+
+ pci_set_drvdata(pci, card);
+
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ static struct pci_driver hdspm_driver = {
+diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c
+index e76f737ac9e8e..e7c320afefe86 100644
+--- a/sound/pci/rme9652/rme9652.c
++++ b/sound/pci/rme9652/rme9652.c
+@@ -208,8 +208,11 @@ struct snd_rme9652 {
+ unsigned char ds_channels;
+ unsigned char ss_channels; /* different for hammerfall/hammerfall-light */
+
+- struct snd_dma_buffer *playback_dma_buf;
+- struct snd_dma_buffer *capture_dma_buf;
++ /* DMA buffers; those are copied instances from the original snd_dma_buf
++ * objects (which are managed via devres) for the address alignments
++ */
++ struct snd_dma_buffer playback_dma_buf;
++ struct snd_dma_buffer capture_dma_buf;
+
+ unsigned char *capture_buffer; /* suitably aligned address */
+ unsigned char *playback_buffer; /* suitably aligned address */
+@@ -227,7 +230,7 @@ struct snd_rme9652 {
+ int last_spdif_sample_rate; /* so that we can catch externally ... */
+ int last_adat_sample_rate; /* ... induced rate changes */
+
+- const char *channel_map;
++ const signed char *channel_map;
+
+ struct snd_card *card;
+ struct snd_pcm *pcm;
+@@ -244,12 +247,12 @@ struct snd_rme9652 {
+ where the data for that channel can be read/written from/to.
+ */
+
+-static const char channel_map_9652_ss[26] = {
++static const signed char channel_map_9652_ss[26] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25
+ };
+
+-static const char channel_map_9636_ss[26] = {
++static const signed char channel_map_9636_ss[26] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ /* channels 16 and 17 are S/PDIF */
+ 24, 25,
+@@ -257,7 +260,7 @@ static const char channel_map_9636_ss[26] = {
+ -1, -1, -1, -1, -1, -1, -1, -1
+ };
+
+-static const char channel_map_9652_ds[26] = {
++static const signed char channel_map_9652_ds[26] = {
+ /* ADAT channels are remapped */
+ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23,
+ /* channels 12 and 13 are S/PDIF */
+@@ -266,7 +269,7 @@ static const char channel_map_9652_ds[26] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+ };
+
+-static const char channel_map_9636_ds[26] = {
++static const signed char channel_map_9636_ds[26] = {
+ /* ADAT channels are remapped */
+ 1, 3, 5, 7, 9, 11, 13, 15,
+ /* channels 8 and 9 are S/PDIF */
+@@ -1719,30 +1722,32 @@ static void snd_rme9652_card_free(struct snd_card *card)
+
+ static int snd_rme9652_initialize_memory(struct snd_rme9652 *rme9652)
+ {
+- unsigned long pb_bus, cb_bus;
++ struct snd_dma_buffer *capture_dma, *playback_dma;
+
+- rme9652->capture_dma_buf =
+- snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
+- rme9652->playback_dma_buf =
+- snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
+- if (!rme9652->capture_dma_buf || !rme9652->playback_dma_buf) {
++ capture_dma = snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
++ playback_dma = snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
++ if (!capture_dma || !playback_dma) {
+ dev_err(rme9652->card->dev,
+ "%s: no buffers available\n", rme9652->card_name);
+ return -ENOMEM;
+ }
+
+- /* Align to bus-space 64K boundary */
++ /* copy to the own data for alignment */
++ rme9652->capture_dma_buf = *capture_dma;
++ rme9652->playback_dma_buf = *playback_dma;
+
+- cb_bus = ALIGN(rme9652->capture_dma_buf->addr, 0x10000ul);
+- pb_bus = ALIGN(rme9652->playback_dma_buf->addr, 0x10000ul);
++ /* Align to bus-space 64K boundary */
++ rme9652->capture_dma_buf.addr = ALIGN(capture_dma->addr, 0x10000ul);
++ rme9652->playback_dma_buf.addr = ALIGN(playback_dma->addr, 0x10000ul);
+
+ /* Tell the card where it is */
++ rme9652_write(rme9652, RME9652_rec_buffer, rme9652->capture_dma_buf.addr);
++ rme9652_write(rme9652, RME9652_play_buffer, rme9652->playback_dma_buf.addr);
+
+- rme9652_write(rme9652, RME9652_rec_buffer, cb_bus);
+- rme9652_write(rme9652, RME9652_play_buffer, pb_bus);
+-
+- rme9652->capture_buffer = rme9652->capture_dma_buf->area + (cb_bus - rme9652->capture_dma_buf->addr);
+- rme9652->playback_buffer = rme9652->playback_dma_buf->area + (pb_bus - rme9652->playback_dma_buf->addr);
++ rme9652->capture_dma_buf.area += rme9652->capture_dma_buf.addr - capture_dma->addr;
++ rme9652->playback_dma_buf.area += rme9652->playback_dma_buf.addr - playback_dma->addr;
++ rme9652->capture_buffer = rme9652->capture_dma_buf.area;
++ rme9652->playback_buffer = rme9652->playback_dma_buf.area;
+
+ return 0;
+ }
+@@ -1814,7 +1819,7 @@ static snd_pcm_uframes_t snd_rme9652_hw_pointer(struct snd_pcm_substream *substr
+ return rme9652_hw_pointer(rme9652);
+ }
+
+-static char *rme9652_channel_buffer_location(struct snd_rme9652 *rme9652,
++static signed char *rme9652_channel_buffer_location(struct snd_rme9652 *rme9652,
+ int stream,
+ int channel)
+
+@@ -1842,7 +1847,7 @@ static int snd_rme9652_playback_copy(struct snd_pcm_substream *substream,
+ void __user *src, unsigned long count)
+ {
+ struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ if (snd_BUG_ON(pos + count > RME9652_CHANNEL_BUFFER_BYTES))
+ return -EINVAL;
+@@ -1862,7 +1867,7 @@ static int snd_rme9652_playback_copy_kernel(struct snd_pcm_substream *substream,
+ void *src, unsigned long count)
+ {
+ struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ channel_buf = rme9652_channel_buffer_location(rme9652,
+ substream->pstr->stream,
+@@ -1878,7 +1883,7 @@ static int snd_rme9652_capture_copy(struct snd_pcm_substream *substream,
+ void __user *dst, unsigned long count)
+ {
+ struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ if (snd_BUG_ON(pos + count > RME9652_CHANNEL_BUFFER_BYTES))
+ return -EINVAL;
+@@ -1898,7 +1903,7 @@ static int snd_rme9652_capture_copy_kernel(struct snd_pcm_substream *substream,
+ void *dst, unsigned long count)
+ {
+ struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ channel_buf = rme9652_channel_buffer_location(rme9652,
+ substream->pstr->stream,
+@@ -1914,7 +1919,7 @@ static int snd_rme9652_hw_silence(struct snd_pcm_substream *substream,
+ unsigned long count)
+ {
+ struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream);
+- char *channel_buf;
++ signed char *channel_buf;
+
+ channel_buf = rme9652_channel_buffer_location (rme9652,
+ substream->pstr->stream,
+@@ -2259,7 +2264,7 @@ static int snd_rme9652_playback_open(struct snd_pcm_substream *substream)
+ snd_pcm_set_sync(substream);
+
+ runtime->hw = snd_rme9652_playback_subinfo;
+- snd_pcm_set_runtime_buffer(substream, rme9652->playback_dma_buf);
++ snd_pcm_set_runtime_buffer(substream, &rme9652->playback_dma_buf);
+
+ if (rme9652->capture_substream == NULL) {
+ rme9652_stop(rme9652);
+@@ -2318,7 +2323,7 @@ static int snd_rme9652_capture_open(struct snd_pcm_substream *substream)
+ snd_pcm_set_sync(substream);
+
+ runtime->hw = snd_rme9652_capture_subinfo;
+- snd_pcm_set_runtime_buffer(substream, rme9652->capture_dma_buf);
++ snd_pcm_set_runtime_buffer(substream, &rme9652->capture_dma_buf);
+
+ if (rme9652->playback_substream == NULL) {
+ rme9652_stop(rme9652);
+@@ -2567,7 +2572,7 @@ static int snd_rme9652_probe(struct pci_dev *pci,
+ rme9652->pci = pci;
+ err = snd_rme9652_create(card, rme9652, precise_ptr[dev]);
+ if (err)
+- return err;
++ goto error;
+
+ strcpy(card->shortname, rme9652->card_name);
+
+@@ -2575,10 +2580,14 @@ static int snd_rme9652_probe(struct pci_dev *pci,
+ card->shortname, rme9652->port, rme9652->irq);
+ err = snd_card_register(card);
+ if (err)
+- return err;
++ goto error;
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
++
++ error:
++ snd_card_free(card);
++ return err;
+ }
+
+ static struct pci_driver rme9652_driver = {
+diff --git a/sound/pci/sis7019.c b/sound/pci/sis7019.c
+index 0b722b0e0604b..fabe393607f8f 100644
+--- a/sound/pci/sis7019.c
++++ b/sound/pci/sis7019.c
+@@ -1331,8 +1331,8 @@ static int sis_chip_create(struct snd_card *card,
+ return 0;
+ }
+
+-static int snd_sis7019_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_sis7019_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct sis7019 *sis;
+@@ -1352,8 +1352,8 @@ static int snd_sis7019_probe(struct pci_dev *pci,
+ if (!codecs)
+ codecs = SIS_PRIMARY_CODEC_PRESENT;
+
+- rc = snd_card_new(&pci->dev, index, id, THIS_MODULE,
+- sizeof(*sis), &card);
++ rc = snd_devm_card_new(&pci->dev, index, id, THIS_MODULE,
++ sizeof(*sis), &card);
+ if (rc < 0)
+ return rc;
+
+@@ -1386,6 +1386,12 @@ static int snd_sis7019_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_sis7019_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_sis7019_probe(pci, pci_id));
++}
++
+ static struct pci_driver sis7019_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_sis7019_ids,
+diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c
+index c8c49881008fd..f91cbf6eeca0f 100644
+--- a/sound/pci/sonicvibes.c
++++ b/sound/pci/sonicvibes.c
+@@ -1387,8 +1387,8 @@ static int snd_sonicvibes_midi(struct sonicvibes *sonic,
+ return 0;
+ }
+
+-static int snd_sonic_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_sonic_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -1459,6 +1459,12 @@ static int snd_sonic_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_sonic_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_sonic_probe(pci, pci_id));
++}
++
+ static struct pci_driver sonicvibes_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_sonic_ids,
+diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
+index 65514f7e42d7d..361b83fd721e6 100644
+--- a/sound/pci/via82xx.c
++++ b/sound/pci/via82xx.c
+@@ -2458,8 +2458,8 @@ static int check_dxs_list(struct pci_dev *pci, int revision)
+ return VIA_DXS_48K;
+ };
+
+-static int snd_via82xx_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_via82xx_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct via82xx *chip;
+@@ -2569,6 +2569,12 @@ static int snd_via82xx_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_via82xx_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id));
++}
++
+ static struct pci_driver via82xx_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_via82xx_ids,
+diff --git a/sound/pci/via82xx_modem.c b/sound/pci/via82xx_modem.c
+index 234f7fbed2364..ca7f024bf8ec6 100644
+--- a/sound/pci/via82xx_modem.c
++++ b/sound/pci/via82xx_modem.c
+@@ -1103,8 +1103,8 @@ static int snd_via82xx_create(struct snd_card *card,
+ }
+
+
+-static int snd_via82xx_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_via82xx_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ struct snd_card *card;
+ struct via82xx_modem *chip;
+@@ -1157,6 +1157,12 @@ static int snd_via82xx_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_via82xx_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id));
++}
++
+ static struct pci_driver via82xx_modem_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_via82xx_modem_ids,
+diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c
+index 1e198e4d57b8d..d62a0e2ddf609 100644
+--- a/sound/pci/ymfpci/ymfpci.c
++++ b/sound/pci/ymfpci/ymfpci.c
+@@ -150,8 +150,8 @@ static inline int snd_ymfpci_create_gameport(struct snd_ymfpci *chip, int dev, i
+ void snd_ymfpci_free_gameport(struct snd_ymfpci *chip) { }
+ #endif /* SUPPORT_JOYSTICK */
+
+-static int snd_card_ymfpci_probe(struct pci_dev *pci,
+- const struct pci_device_id *pci_id)
++static int __snd_card_ymfpci_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+@@ -170,7 +170,7 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci,
+ return -ENOENT;
+ }
+
+- err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
++ err = snd_devm_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ sizeof(*chip), &card);
+ if (err < 0)
+ return err;
+@@ -333,6 +333,12 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci,
+ return 0;
+ }
+
++static int snd_card_ymfpci_probe(struct pci_dev *pci,
++ const struct pci_device_id *pci_id)
++{
++ return snd_card_free_on_error(&pci->dev, __snd_card_ymfpci_probe(pci, pci_id));
++}
++
+ static struct pci_driver ymfpci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_ymfpci_ids,
+diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
+index c80114c0ad7bf..b492c32ce0704 100644
+--- a/sound/pci/ymfpci/ymfpci_main.c
++++ b/sound/pci/ymfpci/ymfpci_main.c
+@@ -2165,7 +2165,7 @@ static int snd_ymfpci_memalloc(struct snd_ymfpci *chip)
+ chip->work_base = ptr;
+ chip->work_base_addr = ptr_addr;
+
+- snd_BUG_ON(ptr + chip->work_size !=
++ snd_BUG_ON(ptr + PAGE_ALIGN(chip->work_size) !=
+ chip->work_ptr->area + chip->work_ptr->bytes);
+
+ snd_ymfpci_writel(chip, YDSXGR_PLAYCTRLBASE, chip->bank_base_playback_addr);
+diff --git a/sound/soc/amd/vangogh/acp5x-pcm-dma.c b/sound/soc/amd/vangogh/acp5x-pcm-dma.c
+index f10de38976cb5..bfca4cf423cf1 100644
+--- a/sound/soc/amd/vangogh/acp5x-pcm-dma.c
++++ b/sound/soc/amd/vangogh/acp5x-pcm-dma.c
+@@ -281,7 +281,7 @@ static int acp5x_dma_hw_params(struct snd_soc_component *component,
+ return -EINVAL;
+ }
+ size = params_buffer_bytes(params);
+- rtd->dma_addr = substream->dma_buffer.addr;
++ rtd->dma_addr = substream->runtime->dma_addr;
+ rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT);
+ config_acp5x_dma(rtd, substream->stream);
+ return 0;
+@@ -426,51 +426,51 @@ static int acp5x_audio_remove(struct platform_device *pdev)
+ static int __maybe_unused acp5x_pcm_resume(struct device *dev)
+ {
+ struct i2s_dev_data *adata;
+- u32 val, reg_val, frmt_val;
++ struct i2s_stream_instance *rtd;
++ u32 val;
+
+- reg_val = 0;
+- frmt_val = 0;
+ adata = dev_get_drvdata(dev);
+
+ if (adata->play_stream && adata->play_stream->runtime) {
+- struct i2s_stream_instance *rtd =
+- adata->play_stream->runtime->private_data;
++ rtd = adata->play_stream->runtime->private_data;
+ config_acp5x_dma(rtd, SNDRV_PCM_STREAM_PLAYBACK);
+- switch (rtd->i2s_instance) {
+- case I2S_HS_INSTANCE:
+- reg_val = ACP_HSTDM_ITER;
+- frmt_val = ACP_HSTDM_TXFRMT;
+- break;
+- case I2S_SP_INSTANCE:
+- default:
+- reg_val = ACP_I2STDM_ITER;
+- frmt_val = ACP_I2STDM_TXFRMT;
++ acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_HSTDM_ITER);
++ if (adata->tdm_mode == TDM_ENABLE) {
++ acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_HSTDM_TXFRMT);
++ val = acp_readl(adata->acp5x_base + ACP_HSTDM_ITER);
++ acp_writel(val | 0x2, adata->acp5x_base + ACP_HSTDM_ITER);
++ }
++ }
++ if (adata->i2ssp_play_stream && adata->i2ssp_play_stream->runtime) {
++ rtd = adata->i2ssp_play_stream->runtime->private_data;
++ config_acp5x_dma(rtd, SNDRV_PCM_STREAM_PLAYBACK);
++ acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_I2STDM_ITER);
++ if (adata->tdm_mode == TDM_ENABLE) {
++ acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_I2STDM_TXFRMT);
++ val = acp_readl(adata->acp5x_base + ACP_I2STDM_ITER);
++ acp_writel(val | 0x2, adata->acp5x_base + ACP_I2STDM_ITER);
+ }
+- acp_writel((rtd->xfer_resolution << 3),
+- rtd->acp5x_base + reg_val);
+ }
+
+ if (adata->capture_stream && adata->capture_stream->runtime) {
+- struct i2s_stream_instance *rtd =
+- adata->capture_stream->runtime->private_data;
++ rtd = adata->capture_stream->runtime->private_data;
+ config_acp5x_dma(rtd, SNDRV_PCM_STREAM_CAPTURE);
+- switch (rtd->i2s_instance) {
+- case I2S_HS_INSTANCE:
+- reg_val = ACP_HSTDM_IRER;
+- frmt_val = ACP_HSTDM_RXFRMT;
+- break;
+- case I2S_SP_INSTANCE:
+- default:
+- reg_val = ACP_I2STDM_IRER;
+- frmt_val = ACP_I2STDM_RXFRMT;
++ acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_HSTDM_IRER);
++ if (adata->tdm_mode == TDM_ENABLE) {
++ acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_HSTDM_RXFRMT);
++ val = acp_readl(adata->acp5x_base + ACP_HSTDM_IRER);
++ acp_writel(val | 0x2, adata->acp5x_base + ACP_HSTDM_IRER);
+ }
+- acp_writel((rtd->xfer_resolution << 3),
+- rtd->acp5x_base + reg_val);
+ }
+- if (adata->tdm_mode == TDM_ENABLE) {
+- acp_writel(adata->tdm_fmt, adata->acp5x_base + frmt_val);
+- val = acp_readl(adata->acp5x_base + reg_val);
+- acp_writel(val | 0x2, adata->acp5x_base + reg_val);
++ if (adata->i2ssp_capture_stream && adata->i2ssp_capture_stream->runtime) {
++ rtd = adata->i2ssp_capture_stream->runtime->private_data;
++ config_acp5x_dma(rtd, SNDRV_PCM_STREAM_CAPTURE);
++ acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_I2STDM_IRER);
++ if (adata->tdm_mode == TDM_ENABLE) {
++ acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_I2STDM_RXFRMT);
++ val = acp_readl(adata->acp5x_base + ACP_I2STDM_IRER);
++ acp_writel(val | 0x2, adata->acp5x_base + ACP_I2STDM_IRER);
++ }
+ }
+ acp_writel(1, adata->acp5x_base + ACP_EXTERNAL_INTR_ENB);
+ return 0;
+diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
+index a9f9f449c48c2..74b7b2611aa70 100644
+--- a/sound/soc/atmel/atmel-classd.c
++++ b/sound/soc/atmel/atmel-classd.c
+@@ -458,7 +458,6 @@ static const struct snd_soc_component_driver atmel_classd_cpu_dai_component = {
+ .num_controls = ARRAY_SIZE(atmel_classd_snd_controls),
+ .idle_bias_on = 1,
+ .use_pmdown_time = 1,
+- .endianness = 1,
+ };
+
+ /* ASoC sound card */
+diff --git a/sound/soc/atmel/atmel-i2s.c b/sound/soc/atmel/atmel-i2s.c
+index 6b3d9c05eaf27..4cb0605f6daa2 100644
+--- a/sound/soc/atmel/atmel-i2s.c
++++ b/sound/soc/atmel/atmel-i2s.c
+@@ -163,11 +163,14 @@ struct atmel_i2s_gck_param {
+
+ #define I2S_MCK_12M288 12288000UL
+ #define I2S_MCK_11M2896 11289600UL
++#define I2S_MCK_6M144 6144000UL
+
+ /* mck = (32 * (imckfs+1) / (imckdiv+1)) * fs */
+ static const struct atmel_i2s_gck_param gck_params[] = {
++ /* mck = 6.144Mhz */
++ { 8000, I2S_MCK_6M144, 1, 47}, /* mck = 768 fs */
++
+ /* mck = 12.288MHz */
+- { 8000, I2S_MCK_12M288, 0, 47}, /* mck = 1536 fs */
+ { 16000, I2S_MCK_12M288, 1, 47}, /* mck = 768 fs */
+ { 24000, I2S_MCK_12M288, 3, 63}, /* mck = 512 fs */
+ { 32000, I2S_MCK_12M288, 3, 47}, /* mck = 384 fs */
+diff --git a/sound/soc/atmel/atmel-pdmic.c b/sound/soc/atmel/atmel-pdmic.c
+index 42117de299e74..ea34efac2fff5 100644
+--- a/sound/soc/atmel/atmel-pdmic.c
++++ b/sound/soc/atmel/atmel-pdmic.c
+@@ -481,7 +481,6 @@ static const struct snd_soc_component_driver atmel_pdmic_cpu_dai_component = {
+ .num_controls = ARRAY_SIZE(atmel_pdmic_snd_controls),
+ .idle_bias_on = 1,
+ .use_pmdown_time = 1,
+- .endianness = 1,
+ };
+
+ /* ASoC sound card */
+diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
+index 6a63e8797a0b6..97533412ce11e 100644
+--- a/sound/soc/atmel/atmel_ssc_dai.c
++++ b/sound/soc/atmel/atmel_ssc_dai.c
+@@ -280,7 +280,10 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream,
+
+ /* Enable PMC peripheral clock for this SSC */
+ pr_debug("atmel_ssc_dai: Starting clock\n");
+- clk_enable(ssc_p->ssc->clk);
++ ret = clk_enable(ssc_p->ssc->clk);
++ if (ret)
++ return ret;
++
+ ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk);
+
+ /* Reset the SSC unless initialized to keep it in a clean state */
+diff --git a/sound/soc/atmel/mchp-spdifrx.c b/sound/soc/atmel/mchp-spdifrx.c
+index bcd4f3e4fb0fb..39a3c2a33bdbb 100644
+--- a/sound/soc/atmel/mchp-spdifrx.c
++++ b/sound/soc/atmel/mchp-spdifrx.c
+@@ -217,7 +217,6 @@ struct mchp_spdifrx_ch_stat {
+ struct mchp_spdifrx_user_data {
+ unsigned char data[SPDIFRX_UD_BITS / 8];
+ struct completion done;
+- spinlock_t lock; /* protect access to user data */
+ };
+
+ struct mchp_spdifrx_mixer_control {
+@@ -231,13 +230,13 @@ struct mchp_spdifrx_mixer_control {
+ struct mchp_spdifrx_dev {
+ struct snd_dmaengine_dai_dma_data capture;
+ struct mchp_spdifrx_mixer_control control;
+- spinlock_t blockend_lock; /* protect access to blockend_refcount */
+- int blockend_refcount;
++ struct mutex mlock;
+ struct device *dev;
+ struct regmap *regmap;
+ struct clk *pclk;
+ struct clk *gclk;
+ unsigned int fmt;
++ unsigned int trigger_enabled;
+ unsigned int gclk_enabled:1;
+ };
+
+@@ -275,35 +274,11 @@ static void mchp_spdifrx_channel_user_data_read(struct mchp_spdifrx_dev *dev,
+ }
+ }
+
+-/* called from non-atomic context only */
+-static void mchp_spdifrx_isr_blockend_en(struct mchp_spdifrx_dev *dev)
+-{
+- unsigned long flags;
+-
+- spin_lock_irqsave(&dev->blockend_lock, flags);
+- dev->blockend_refcount++;
+- /* don't enable BLOCKEND interrupt if it's already enabled */
+- if (dev->blockend_refcount == 1)
+- regmap_write(dev->regmap, SPDIFRX_IER, SPDIFRX_IR_BLOCKEND);
+- spin_unlock_irqrestore(&dev->blockend_lock, flags);
+-}
+-
+-/* called from atomic context only */
+-static void mchp_spdifrx_isr_blockend_dis(struct mchp_spdifrx_dev *dev)
+-{
+- spin_lock(&dev->blockend_lock);
+- dev->blockend_refcount--;
+- /* don't enable BLOCKEND interrupt if it's already enabled */
+- if (dev->blockend_refcount == 0)
+- regmap_write(dev->regmap, SPDIFRX_IDR, SPDIFRX_IR_BLOCKEND);
+- spin_unlock(&dev->blockend_lock);
+-}
+-
+ static irqreturn_t mchp_spdif_interrupt(int irq, void *dev_id)
+ {
+ struct mchp_spdifrx_dev *dev = dev_id;
+ struct mchp_spdifrx_mixer_control *ctrl = &dev->control;
+- u32 sr, imr, pending, idr = 0;
++ u32 sr, imr, pending;
+ irqreturn_t ret = IRQ_NONE;
+ int ch;
+
+@@ -318,13 +293,10 @@ static irqreturn_t mchp_spdif_interrupt(int irq, void *dev_id)
+
+ if (pending & SPDIFRX_IR_BLOCKEND) {
+ for (ch = 0; ch < SPDIFRX_CHANNELS; ch++) {
+- spin_lock(&ctrl->user_data[ch].lock);
+ mchp_spdifrx_channel_user_data_read(dev, ch);
+- spin_unlock(&ctrl->user_data[ch].lock);
+-
+ complete(&ctrl->user_data[ch].done);
+ }
+- mchp_spdifrx_isr_blockend_dis(dev);
++ regmap_write(dev->regmap, SPDIFRX_IDR, SPDIFRX_IR_BLOCKEND);
+ ret = IRQ_HANDLED;
+ }
+
+@@ -332,7 +304,7 @@ static irqreturn_t mchp_spdif_interrupt(int irq, void *dev_id)
+ if (pending & SPDIFRX_IR_CSC(ch)) {
+ mchp_spdifrx_channel_status_read(dev, ch);
+ complete(&ctrl->ch_stat[ch].done);
+- idr |= SPDIFRX_IR_CSC(ch);
++ regmap_write(dev->regmap, SPDIFRX_IDR, SPDIFRX_IR_CSC(ch));
+ ret = IRQ_HANDLED;
+ }
+ }
+@@ -342,8 +314,6 @@ static irqreturn_t mchp_spdif_interrupt(int irq, void *dev_id)
+ ret = IRQ_HANDLED;
+ }
+
+- regmap_write(dev->regmap, SPDIFRX_IDR, idr);
+-
+ return ret;
+ }
+
+@@ -351,47 +321,40 @@ static int mchp_spdifrx_trigger(struct snd_pcm_substream *substream, int cmd,
+ struct snd_soc_dai *dai)
+ {
+ struct mchp_spdifrx_dev *dev = snd_soc_dai_get_drvdata(dai);
+- u32 mr;
+- int running;
+- int ret;
+-
+- regmap_read(dev->regmap, SPDIFRX_MR, &mr);
+- running = !!(mr & SPDIFRX_MR_RXEN_ENABLE);
++ int ret = 0;
+
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+ case SNDRV_PCM_TRIGGER_RESUME:
+ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+- if (!running) {
+- mr &= ~SPDIFRX_MR_RXEN_MASK;
+- mr |= SPDIFRX_MR_RXEN_ENABLE;
+- /* enable overrun interrupts */
+- regmap_write(dev->regmap, SPDIFRX_IER,
+- SPDIFRX_IR_OVERRUN);
+- }
++ mutex_lock(&dev->mlock);
++ /* Enable overrun interrupts */
++ regmap_write(dev->regmap, SPDIFRX_IER, SPDIFRX_IR_OVERRUN);
++
++ /* Enable receiver. */
++ regmap_update_bits(dev->regmap, SPDIFRX_MR, SPDIFRX_MR_RXEN_MASK,
++ SPDIFRX_MR_RXEN_ENABLE);
++ dev->trigger_enabled = true;
++ mutex_unlock(&dev->mlock);
+ break;
+ case SNDRV_PCM_TRIGGER_STOP:
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+- if (running) {
+- mr &= ~SPDIFRX_MR_RXEN_MASK;
+- mr |= SPDIFRX_MR_RXEN_DISABLE;
+- /* disable overrun interrupts */
+- regmap_write(dev->regmap, SPDIFRX_IDR,
+- SPDIFRX_IR_OVERRUN);
+- }
++ mutex_lock(&dev->mlock);
++ /* Disable overrun interrupts */
++ regmap_write(dev->regmap, SPDIFRX_IDR, SPDIFRX_IR_OVERRUN);
++
++ /* Disable receiver. */
++ regmap_update_bits(dev->regmap, SPDIFRX_MR, SPDIFRX_MR_RXEN_MASK,
++ SPDIFRX_MR_RXEN_DISABLE);
++ dev->trigger_enabled = false;
++ mutex_unlock(&dev->mlock);
+ break;
+ default:
+- return -EINVAL;
+- }
+-
+- ret = regmap_write(dev->regmap, SPDIFRX_MR, mr);
+- if (ret) {
+- dev_err(dev->dev, "unable to enable/disable RX: %d\n", ret);
+- return ret;
++ ret = -EINVAL;
+ }
+
+- return 0;
++ return ret;
+ }
+
+ static int mchp_spdifrx_hw_params(struct snd_pcm_substream *substream,
+@@ -399,7 +362,7 @@ static int mchp_spdifrx_hw_params(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+ {
+ struct mchp_spdifrx_dev *dev = snd_soc_dai_get_drvdata(dai);
+- u32 mr;
++ u32 mr = 0;
+ int ret;
+
+ dev_dbg(dev->dev, "%s() rate=%u format=%#x width=%u channels=%u\n",
+@@ -411,13 +374,6 @@ static int mchp_spdifrx_hw_params(struct snd_pcm_substream *substream,
+ return -EINVAL;
+ }
+
+- regmap_read(dev->regmap, SPDIFRX_MR, &mr);
+-
+- if (mr & SPDIFRX_MR_RXEN_ENABLE) {
+- dev_err(dev->dev, "PCM already running\n");
+- return -EBUSY;
+- }
+-
+ if (params_channels(params) != SPDIFRX_CHANNELS) {
+ dev_err(dev->dev, "unsupported number of channels: %d\n",
+ params_channels(params));
+@@ -443,6 +399,13 @@ static int mchp_spdifrx_hw_params(struct snd_pcm_substream *substream,
+ return -EINVAL;
+ }
+
++ mutex_lock(&dev->mlock);
++ if (dev->trigger_enabled) {
++ dev_err(dev->dev, "PCM already running\n");
++ ret = -EBUSY;
++ goto unlock;
++ }
++
+ if (dev->gclk_enabled) {
+ clk_disable_unprepare(dev->gclk);
+ dev->gclk_enabled = 0;
+@@ -453,19 +416,24 @@ static int mchp_spdifrx_hw_params(struct snd_pcm_substream *substream,
+ dev_err(dev->dev,
+ "unable to set gclk min rate: rate %u * ratio %u + 1\n",
+ params_rate(params), SPDIFRX_GCLK_RATIO_MIN);
+- return ret;
++ goto unlock;
+ }
+ ret = clk_prepare_enable(dev->gclk);
+ if (ret) {
+ dev_err(dev->dev, "unable to enable gclk: %d\n", ret);
+- return ret;
++ goto unlock;
+ }
+ dev->gclk_enabled = 1;
+
+ dev_dbg(dev->dev, "GCLK range min set to %d\n",
+ params_rate(params) * SPDIFRX_GCLK_RATIO_MIN + 1);
+
+- return regmap_write(dev->regmap, SPDIFRX_MR, mr);
++ ret = regmap_write(dev->regmap, SPDIFRX_MR, mr);
++
++unlock:
++ mutex_unlock(&dev->mlock);
++
++ return ret;
+ }
+
+ static int mchp_spdifrx_hw_free(struct snd_pcm_substream *substream,
+@@ -473,10 +441,12 @@ static int mchp_spdifrx_hw_free(struct snd_pcm_substream *substream,
+ {
+ struct mchp_spdifrx_dev *dev = snd_soc_dai_get_drvdata(dai);
+
++ mutex_lock(&dev->mlock);
+ if (dev->gclk_enabled) {
+ clk_disable_unprepare(dev->gclk);
+ dev->gclk_enabled = 0;
+ }
++ mutex_unlock(&dev->mlock);
+ return 0;
+ }
+
+@@ -513,22 +483,51 @@ static int mchp_spdifrx_cs_get(struct mchp_spdifrx_dev *dev,
+ {
+ struct mchp_spdifrx_mixer_control *ctrl = &dev->control;
+ struct mchp_spdifrx_ch_stat *ch_stat = &ctrl->ch_stat[channel];
+- int ret;
+-
+- regmap_write(dev->regmap, SPDIFRX_IER, SPDIFRX_IR_CSC(channel));
+- /* check for new data available */
+- ret = wait_for_completion_interruptible_timeout(&ch_stat->done,
+- msecs_to_jiffies(100));
+- /* IP might not be started or valid stream might not be present */
+- if (ret < 0) {
+- dev_dbg(dev->dev, "channel status for channel %d timeout\n",
+- channel);
++ int ret = 0;
++
++ mutex_lock(&dev->mlock);
++
++ /*
++ * We may reach this point with both clocks enabled but the receiver
++ * still disabled. To void waiting for completion and return with
++ * timeout check the dev->trigger_enabled.
++ *
++ * To retrieve data:
++ * - if the receiver is enabled CSC IRQ will update the data in software
++ * caches (ch_stat->data)
++ * - otherwise we just update it here the software caches with latest
++ * available information and return it; in this case we don't need
++ * spin locking as the IRQ is disabled and will not be raised from
++ * anywhere else.
++ */
++
++ if (dev->trigger_enabled) {
++ reinit_completion(&ch_stat->done);
++ regmap_write(dev->regmap, SPDIFRX_IER, SPDIFRX_IR_CSC(channel));
++ /* Check for new data available */
++ ret = wait_for_completion_interruptible_timeout(&ch_stat->done,
++ msecs_to_jiffies(100));
++ /* Valid stream might not be present */
++ if (ret <= 0) {
++ dev_dbg(dev->dev, "channel status for channel %d timeout\n",
++ channel);
++ regmap_write(dev->regmap, SPDIFRX_IDR, SPDIFRX_IR_CSC(channel));
++ ret = ret ? : -ETIMEDOUT;
++ goto unlock;
++ } else {
++ ret = 0;
++ }
++ } else {
++ /* Update software cache with latest channel status. */
++ mchp_spdifrx_channel_status_read(dev, channel);
+ }
+
+ memcpy(uvalue->value.iec958.status, ch_stat->data,
+ sizeof(ch_stat->data));
+
+- return 0;
++unlock:
++ mutex_unlock(&dev->mlock);
++ return ret;
+ }
+
+ static int mchp_spdifrx_cs1_get(struct snd_kcontrol *kcontrol,
+@@ -562,28 +561,49 @@ static int mchp_spdifrx_subcode_ch_get(struct mchp_spdifrx_dev *dev,
+ int channel,
+ struct snd_ctl_elem_value *uvalue)
+ {
+- unsigned long flags;
+ struct mchp_spdifrx_mixer_control *ctrl = &dev->control;
+ struct mchp_spdifrx_user_data *user_data = &ctrl->user_data[channel];
+- int ret;
+-
+- reinit_completion(&user_data->done);
+- mchp_spdifrx_isr_blockend_en(dev);
+- ret = wait_for_completion_interruptible_timeout(&user_data->done,
+- msecs_to_jiffies(100));
+- /* IP might not be started or valid stream might not be present */
+- if (ret <= 0) {
+- dev_dbg(dev->dev, "user data for channel %d timeout\n",
+- channel);
+- return ret;
++ int ret = 0;
++
++ mutex_lock(&dev->mlock);
++
++ /*
++ * We may reach this point with both clocks enabled but the receiver
++ * still disabled. To void waiting for completion to just timeout we
++ * check here the dev->trigger_enabled flag.
++ *
++ * To retrieve data:
++ * - if the receiver is enabled we need to wait for blockend IRQ to read
++ * data to and update it for us in software caches
++ * - otherwise reading the SPDIFRX_CHUD() registers is enough.
++ */
++
++ if (dev->trigger_enabled) {
++ reinit_completion(&user_data->done);
++ regmap_write(dev->regmap, SPDIFRX_IER, SPDIFRX_IR_BLOCKEND);
++ ret = wait_for_completion_interruptible_timeout(&user_data->done,
++ msecs_to_jiffies(100));
++ /* Valid stream might not be present. */
++ if (ret <= 0) {
++ dev_dbg(dev->dev, "user data for channel %d timeout\n",
++ channel);
++ regmap_write(dev->regmap, SPDIFRX_IDR, SPDIFRX_IR_BLOCKEND);
++ ret = ret ? : -ETIMEDOUT;
++ goto unlock;
++ } else {
++ ret = 0;
++ }
++ } else {
++ /* Update software cache with last available data. */
++ mchp_spdifrx_channel_user_data_read(dev, channel);
+ }
+
+- spin_lock_irqsave(&user_data->lock, flags);
+ memcpy(uvalue->value.iec958.subcode, user_data->data,
+ sizeof(user_data->data));
+- spin_unlock_irqrestore(&user_data->lock, flags);
+
+- return 0;
++unlock:
++ mutex_unlock(&dev->mlock);
++ return ret;
+ }
+
+ static int mchp_spdifrx_subcode_ch1_get(struct snd_kcontrol *kcontrol,
+@@ -624,10 +644,24 @@ static int mchp_spdifrx_ulock_get(struct snd_kcontrol *kcontrol,
+ u32 val;
+ bool ulock_old = ctrl->ulock;
+
+- regmap_read(dev->regmap, SPDIFRX_RSR, &val);
+- ctrl->ulock = !(val & SPDIFRX_RSR_ULOCK);
++ mutex_lock(&dev->mlock);
++
++ /*
++ * The RSR.ULOCK has wrong value if both pclk and gclk are enabled
++ * and the receiver is disabled. Thus we take into account the
++ * dev->trigger_enabled here to return a real status.
++ */
++ if (dev->trigger_enabled) {
++ regmap_read(dev->regmap, SPDIFRX_RSR, &val);
++ ctrl->ulock = !(val & SPDIFRX_RSR_ULOCK);
++ } else {
++ ctrl->ulock = 0;
++ }
++
+ uvalue->value.integer.value[0] = ctrl->ulock;
+
++ mutex_unlock(&dev->mlock);
++
+ return ulock_old != ctrl->ulock;
+ }
+
+@@ -640,8 +674,22 @@ static int mchp_spdifrx_badf_get(struct snd_kcontrol *kcontrol,
+ u32 val;
+ bool badf_old = ctrl->badf;
+
+- regmap_read(dev->regmap, SPDIFRX_RSR, &val);
+- ctrl->badf = !!(val & SPDIFRX_RSR_BADF);
++ mutex_lock(&dev->mlock);
++
++ /*
++ * The RSR.ULOCK has wrong value if both pclk and gclk are enabled
++ * and the receiver is disabled. Thus we take into account the
++ * dev->trigger_enabled here to return a real status.
++ */
++ if (dev->trigger_enabled) {
++ regmap_read(dev->regmap, SPDIFRX_RSR, &val);
++ ctrl->badf = !!(val & SPDIFRX_RSR_BADF);
++ } else {
++ ctrl->badf = 0;
++ }
++
++ mutex_unlock(&dev->mlock);
++
+ uvalue->value.integer.value[0] = ctrl->badf;
+
+ return badf_old != ctrl->badf;
+@@ -653,11 +701,48 @@ static int mchp_spdifrx_signal_get(struct snd_kcontrol *kcontrol,
+ struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol);
+ struct mchp_spdifrx_dev *dev = snd_soc_dai_get_drvdata(dai);
+ struct mchp_spdifrx_mixer_control *ctrl = &dev->control;
+- u32 val;
++ u32 val = ~0U, loops = 10;
++ int ret;
+ bool signal_old = ctrl->signal;
+
+- regmap_read(dev->regmap, SPDIFRX_RSR, &val);
+- ctrl->signal = !(val & SPDIFRX_RSR_NOSIGNAL);
++ mutex_lock(&dev->mlock);
++
++ /*
++ * To get the signal we need to have receiver enabled. This
++ * could be enabled also from trigger() function thus we need to
++ * take care of not disabling the receiver when it runs.
++ */
++ if (!dev->trigger_enabled) {
++ ret = clk_prepare_enable(dev->gclk);
++ if (ret)
++ goto unlock;
++
++ regmap_update_bits(dev->regmap, SPDIFRX_MR, SPDIFRX_MR_RXEN_MASK,
++ SPDIFRX_MR_RXEN_ENABLE);
++
++ /* Wait for RSR.ULOCK bit. */
++ while (--loops) {
++ regmap_read(dev->regmap, SPDIFRX_RSR, &val);
++ if (!(val & SPDIFRX_RSR_ULOCK))
++ break;
++ usleep_range(100, 150);
++ }
++
++ regmap_update_bits(dev->regmap, SPDIFRX_MR, SPDIFRX_MR_RXEN_MASK,
++ SPDIFRX_MR_RXEN_DISABLE);
++
++ clk_disable_unprepare(dev->gclk);
++ } else {
++ regmap_read(dev->regmap, SPDIFRX_RSR, &val);
++ }
++
++unlock:
++ mutex_unlock(&dev->mlock);
++
++ if (!(val & SPDIFRX_RSR_ULOCK))
++ ctrl->signal = !(val & SPDIFRX_RSR_NOSIGNAL);
++ else
++ ctrl->signal = 0;
+ uvalue->value.integer.value[0] = ctrl->signal;
+
+ return signal_old != ctrl->signal;
+@@ -682,18 +767,32 @@ static int mchp_spdifrx_rate_get(struct snd_kcontrol *kcontrol,
+ u32 val;
+ int rate;
+
+- regmap_read(dev->regmap, SPDIFRX_RSR, &val);
+-
+- /* if the receiver is not locked, ISF data is invalid */
+- if (val & SPDIFRX_RSR_ULOCK || !(val & SPDIFRX_RSR_IFS_MASK)) {
++ mutex_lock(&dev->mlock);
++
++ /*
++ * The RSR.ULOCK has wrong value if both pclk and gclk are enabled
++ * and the receiver is disabled. Thus we take into account the
++ * dev->trigger_enabled here to return a real status.
++ */
++ if (dev->trigger_enabled) {
++ regmap_read(dev->regmap, SPDIFRX_RSR, &val);
++ /* If the receiver is not locked, ISF data is invalid. */
++ if (val & SPDIFRX_RSR_ULOCK || !(val & SPDIFRX_RSR_IFS_MASK)) {
++ ucontrol->value.integer.value[0] = 0;
++ goto unlock;
++ }
++ } else {
++ /* Reveicer is not locked, IFS data is invalid. */
+ ucontrol->value.integer.value[0] = 0;
+- return 0;
++ goto unlock;
+ }
+
+ rate = clk_get_rate(dev->gclk);
+
+ ucontrol->value.integer.value[0] = rate / (32 * SPDIFRX_RSR_IFS(val));
+
++unlock:
++ mutex_unlock(&dev->mlock);
+ return 0;
+ }
+
+@@ -805,11 +904,9 @@ static int mchp_spdifrx_dai_probe(struct snd_soc_dai *dai)
+ SPDIFRX_MR_AUTORST_NOACTION |
+ SPDIFRX_MR_PACK_DISABLED);
+
+- dev->blockend_refcount = 0;
+ for (ch = 0; ch < SPDIFRX_CHANNELS; ch++) {
+ init_completion(&ctrl->ch_stat[ch].done);
+ init_completion(&ctrl->user_data[ch].done);
+- spin_lock_init(&ctrl->user_data[ch].lock);
+ }
+
+ /* Add controls */
+@@ -824,7 +921,7 @@ static int mchp_spdifrx_dai_remove(struct snd_soc_dai *dai)
+ struct mchp_spdifrx_dev *dev = snd_soc_dai_get_drvdata(dai);
+
+ /* Disable interrupts */
+- regmap_write(dev->regmap, SPDIFRX_IDR, 0xFF);
++ regmap_write(dev->regmap, SPDIFRX_IDR, GENMASK(14, 0));
+
+ clk_disable_unprepare(dev->pclk);
+
+@@ -909,7 +1006,17 @@ static int mchp_spdifrx_probe(struct platform_device *pdev)
+ "failed to get the PMC generated clock: %d\n", err);
+ return err;
+ }
+- spin_lock_init(&dev->blockend_lock);
++
++ /*
++ * Signal control need a valid rate on gclk. hw_params() configures
++ * it propertly but requesting signal before any hw_params() has been
++ * called lead to invalid value returned for signal. Thus, configure
++ * gclk at a valid rate, here, in initialization, to simplify the
++ * control path.
++ */
++ clk_set_min_rate(dev->gclk, 48000 * SPDIFRX_GCLK_RATIO_MIN + 1);
++
++ mutex_init(&dev->mlock);
+
+ dev->dev = &pdev->dev;
+ dev->regmap = regmap;
+diff --git a/sound/soc/atmel/mchp-spdiftx.c b/sound/soc/atmel/mchp-spdiftx.c
+index d243800464352..bcca1cf3cd7b6 100644
+--- a/sound/soc/atmel/mchp-spdiftx.c
++++ b/sound/soc/atmel/mchp-spdiftx.c
+@@ -196,8 +196,7 @@ struct mchp_spdiftx_dev {
+ struct clk *pclk;
+ struct clk *gclk;
+ unsigned int fmt;
+- const struct mchp_i2s_caps *caps;
+- int gclk_enabled:1;
++ unsigned int gclk_enabled:1;
+ };
+
+ static inline int mchp_spdiftx_is_running(struct mchp_spdiftx_dev *dev)
+@@ -766,8 +765,6 @@ static const struct of_device_id mchp_spdiftx_dt_ids[] = {
+ MODULE_DEVICE_TABLE(of, mchp_spdiftx_dt_ids);
+ static int mchp_spdiftx_probe(struct platform_device *pdev)
+ {
+- struct device_node *np = pdev->dev.of_node;
+- const struct of_device_id *match;
+ struct mchp_spdiftx_dev *dev;
+ struct resource *mem;
+ struct regmap *regmap;
+@@ -781,11 +778,6 @@ static int mchp_spdiftx_probe(struct platform_device *pdev)
+ if (!dev)
+ return -ENOMEM;
+
+- /* Get hardware capabilities. */
+- match = of_match_node(mchp_spdiftx_dt_ids, np);
+- if (match)
+- dev->caps = match->data;
+-
+ /* Map I/O registers. */
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
+ if (IS_ERR(base))
+diff --git a/sound/soc/atmel/mikroe-proto.c b/sound/soc/atmel/mikroe-proto.c
+index 0be7b4221c146..93d114f5b9e6a 100644
+--- a/sound/soc/atmel/mikroe-proto.c
++++ b/sound/soc/atmel/mikroe-proto.c
+@@ -115,7 +115,8 @@ static int snd_proto_probe(struct platform_device *pdev)
+ cpu_np = of_parse_phandle(np, "i2s-controller", 0);
+ if (!cpu_np) {
+ dev_err(&pdev->dev, "i2s-controller missing\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_codec_node;
+ }
+ dai->cpus->of_node = cpu_np;
+ dai->platforms->of_node = cpu_np;
+@@ -125,7 +126,8 @@ static int snd_proto_probe(struct platform_device *pdev)
+ &bitclkmaster, &framemaster);
+ if (bitclkmaster != framemaster) {
+ dev_err(&pdev->dev, "Must be the same bitclock and frame master\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_cpu_node;
+ }
+ if (bitclkmaster) {
+ if (codec_np == bitclkmaster)
+@@ -136,18 +138,20 @@ static int snd_proto_probe(struct platform_device *pdev)
+ dai_fmt |= snd_soc_daifmt_parse_clock_provider_as_flag(np, NULL);
+ }
+
+- of_node_put(bitclkmaster);
+- of_node_put(framemaster);
+- dai->dai_fmt = dai_fmt;
+-
+- of_node_put(codec_np);
+- of_node_put(cpu_np);
+
++ dai->dai_fmt = dai_fmt;
+ ret = snd_soc_register_card(&snd_proto);
+ if (ret && ret != -EPROBE_DEFER)
+ dev_err(&pdev->dev,
+ "snd_soc_register_card() failed: %d\n", ret);
+
++
++put_cpu_node:
++ of_node_put(bitclkmaster);
++ of_node_put(framemaster);
++ of_node_put(cpu_np);
++put_codec_node:
++ of_node_put(codec_np);
+ return ret;
+ }
+
+diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
+index ed1f69b570244..d243de5f23dc1 100644
+--- a/sound/soc/atmel/sam9g20_wm8731.c
++++ b/sound/soc/atmel/sam9g20_wm8731.c
+@@ -46,35 +46,6 @@
+ */
+ #undef ENABLE_MIC_INPUT
+
+-static struct clk *mclk;
+-
+-static int at91sam9g20ek_set_bias_level(struct snd_soc_card *card,
+- struct snd_soc_dapm_context *dapm,
+- enum snd_soc_bias_level level)
+-{
+- static int mclk_on;
+- int ret = 0;
+-
+- switch (level) {
+- case SND_SOC_BIAS_ON:
+- case SND_SOC_BIAS_PREPARE:
+- if (!mclk_on)
+- ret = clk_enable(mclk);
+- if (ret == 0)
+- mclk_on = 1;
+- break;
+-
+- case SND_SOC_BIAS_OFF:
+- case SND_SOC_BIAS_STANDBY:
+- if (mclk_on)
+- clk_disable(mclk);
+- mclk_on = 0;
+- break;
+- }
+-
+- return ret;
+-}
+-
+ static const struct snd_soc_dapm_widget at91sam9g20ek_dapm_widgets[] = {
+ SND_SOC_DAPM_MIC("Int Mic", NULL),
+ SND_SOC_DAPM_SPK("Ext Spk", NULL),
+@@ -135,7 +106,6 @@ static struct snd_soc_card snd_soc_at91sam9g20ek = {
+ .owner = THIS_MODULE,
+ .dai_link = &at91sam9g20ek_dai,
+ .num_links = 1,
+- .set_bias_level = at91sam9g20ek_set_bias_level,
+
+ .dapm_widgets = at91sam9g20ek_dapm_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(at91sam9g20ek_dapm_widgets),
+@@ -148,7 +118,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
+ struct device_node *codec_np, *cpu_np;
+- struct clk *pllb;
+ struct snd_soc_card *card = &snd_soc_at91sam9g20ek;
+ int ret;
+
+@@ -162,31 +131,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
+ return -EINVAL;
+ }
+
+- /*
+- * Codec MCLK is supplied by PCK0 - set it up.
+- */
+- mclk = clk_get(NULL, "pck0");
+- if (IS_ERR(mclk)) {
+- dev_err(&pdev->dev, "Failed to get MCLK\n");
+- ret = PTR_ERR(mclk);
+- goto err;
+- }
+-
+- pllb = clk_get(NULL, "pllb");
+- if (IS_ERR(pllb)) {
+- dev_err(&pdev->dev, "Failed to get PLLB\n");
+- ret = PTR_ERR(pllb);
+- goto err_mclk;
+- }
+- ret = clk_set_parent(mclk, pllb);
+- clk_put(pllb);
+- if (ret != 0) {
+- dev_err(&pdev->dev, "Failed to set MCLK parent\n");
+- goto err_mclk;
+- }
+-
+- clk_set_rate(mclk, MCLK_RATE);
+-
+ card->dev = &pdev->dev;
+
+ /* Parse device node info */
+@@ -214,6 +158,7 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
+ cpu_np = of_parse_phandle(np, "atmel,ssc-controller", 0);
+ if (!cpu_np) {
+ dev_err(&pdev->dev, "dai and pcm info missing\n");
++ of_node_put(codec_np);
+ return -EINVAL;
+ }
+ at91sam9g20ek_dai.cpus->of_node = cpu_np;
+@@ -229,9 +174,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
+
+ return ret;
+
+-err_mclk:
+- clk_put(mclk);
+- mclk = NULL;
+ err:
+ atmel_ssc_put_audio(0);
+ return ret;
+@@ -241,8 +183,6 @@ static int at91sam9g20ek_audio_remove(struct platform_device *pdev)
+ {
+ struct snd_soc_card *card = platform_get_drvdata(pdev);
+
+- clk_disable(mclk);
+- mclk = NULL;
+ snd_soc_unregister_card(card);
+ atmel_ssc_put_audio(0);
+
+diff --git a/sound/soc/atmel/sam9x5_wm8731.c b/sound/soc/atmel/sam9x5_wm8731.c
+index 7745250fd7438..529604a06c532 100644
+--- a/sound/soc/atmel/sam9x5_wm8731.c
++++ b/sound/soc/atmel/sam9x5_wm8731.c
+@@ -142,7 +142,7 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev)
+ if (!cpu_np) {
+ dev_err(&pdev->dev, "atmel,ssc-controller node missing\n");
+ ret = -EINVAL;
+- goto out;
++ goto out_put_codec_np;
+ }
+ dai->cpus->of_node = cpu_np;
+ dai->platforms->of_node = cpu_np;
+@@ -153,12 +153,9 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev)
+ if (ret != 0) {
+ dev_err(&pdev->dev, "Failed to set SSC %d for audio: %d\n",
+ ret, priv->ssc_id);
+- goto out;
++ goto out_put_cpu_np;
+ }
+
+- of_node_put(codec_np);
+- of_node_put(cpu_np);
+-
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+ if (ret) {
+ dev_err(&pdev->dev, "Platform device allocation failed\n");
+@@ -167,10 +164,14 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev)
+
+ dev_dbg(&pdev->dev, "%s ok\n", __func__);
+
+- return ret;
++ goto out_put_cpu_np;
+
+ out_put_audio:
+ atmel_ssc_put_audio(priv->ssc_id);
++out_put_cpu_np:
++ of_node_put(cpu_np);
++out_put_codec_np:
++ of_node_put(codec_np);
+ out:
+ return ret;
+ }
+diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
+index 216cea04ad704..1750cc888bbe8 100644
+--- a/sound/soc/codecs/Kconfig
++++ b/sound/soc/codecs/Kconfig
+@@ -235,8 +235,7 @@ config SND_SOC_ALL_CODECS
+ imply SND_SOC_UDA1380
+ imply SND_SOC_WCD9335
+ imply SND_SOC_WCD934X
+- imply SND_SOC_WCD937X
+- imply SND_SOC_WCD938X
++ imply SND_SOC_WCD938X_SDW
+ imply SND_SOC_LPASS_RX_MACRO
+ imply SND_SOC_LPASS_TX_MACRO
+ imply SND_SOC_WL1273
+@@ -691,6 +690,7 @@ config SND_SOC_CS4349
+
+ config SND_SOC_CS47L15
+ tristate
++ depends on MFD_CS47L15
+
+ config SND_SOC_CS47L24
+ tristate
+@@ -698,15 +698,19 @@ config SND_SOC_CS47L24
+
+ config SND_SOC_CS47L35
+ tristate
++ depends on MFD_CS47L35
+
+ config SND_SOC_CS47L85
+ tristate
++ depends on MFD_CS47L85
+
+ config SND_SOC_CS47L90
+ tristate
++ depends on MFD_CS47L90
+
+ config SND_SOC_CS47L92
+ tristate
++ depends on MFD_CS47L92
+
+ # Cirrus Logic Quad-Channel ADC
+ config SND_SOC_CS53L30
+@@ -896,7 +900,6 @@ config SND_SOC_MAX98095
+
+ config SND_SOC_MAX98357A
+ tristate "Maxim MAX98357A CODEC"
+- depends on GPIOLIB
+
+ config SND_SOC_MAX98371
+ tristate
+@@ -1145,7 +1148,6 @@ config SND_SOC_RT1015
+
+ config SND_SOC_RT1015P
+ tristate
+- depends on GPIOLIB
+
+ config SND_SOC_RT1019
+ tristate
+@@ -1381,6 +1383,7 @@ config SND_SOC_STA529
+ config SND_SOC_STAC9766
+ tristate
+ depends on SND_SOC_AC97_BUS
++ select REGMAP_AC97
+
+ config SND_SOC_STI_SAS
+ tristate "codec Audio support for STI SAS codec"
+@@ -1828,7 +1831,7 @@ config SND_SOC_WSA881X
+ config SND_SOC_ZL38060
+ tristate "Microsemi ZL38060 Connected Home Audio Processor"
+ depends on SPI_MASTER
+- select GPIOLIB
++ depends on GPIOLIB
+ select REGMAP
+ help
+ Support for ZL38060 Connected Home Audio Processor from Microsemi,
+diff --git a/sound/soc/codecs/adau7118.c b/sound/soc/codecs/adau7118.c
+index 841229dcbca10..305f294b7710e 100644
+--- a/sound/soc/codecs/adau7118.c
++++ b/sound/soc/codecs/adau7118.c
+@@ -445,22 +445,6 @@ static const struct snd_soc_component_driver adau7118_component_driver = {
+ .non_legacy_dai_naming = 1,
+ };
+
+-static void adau7118_regulator_disable(void *data)
+-{
+- struct adau7118_data *st = data;
+- int ret;
+- /*
+- * If we fail to disable DVDD, don't bother in trying IOVDD. We
+- * actually don't want to be left in the situation where DVDD
+- * is enabled and IOVDD is disabled.
+- */
+- ret = regulator_disable(st->dvdd);
+- if (ret)
+- return;
+-
+- regulator_disable(st->iovdd);
+-}
+-
+ static int adau7118_regulator_setup(struct adau7118_data *st)
+ {
+ st->iovdd = devm_regulator_get(st->dev, "iovdd");
+@@ -482,8 +466,7 @@ static int adau7118_regulator_setup(struct adau7118_data *st)
+ regcache_cache_only(st->map, true);
+ }
+
+- return devm_add_action_or_reset(st->dev, adau7118_regulator_disable,
+- st);
++ return 0;
+ }
+
+ static int adau7118_parset_dt(const struct adau7118_data *st)
+diff --git a/sound/soc/codecs/cpcap.c b/sound/soc/codecs/cpcap.c
+index 05bbacd0d174d..f1c13f42e1c14 100644
+--- a/sound/soc/codecs/cpcap.c
++++ b/sound/soc/codecs/cpcap.c
+@@ -1667,6 +1667,8 @@ static int cpcap_codec_probe(struct platform_device *pdev)
+ {
+ struct device_node *codec_node =
+ of_get_child_by_name(pdev->dev.parent->of_node, "audio-codec");
++ if (!codec_node)
++ return -ENODEV;
+
+ pdev->dev.of_node = codec_node;
+
+diff --git a/sound/soc/codecs/cros_ec_codec.c b/sound/soc/codecs/cros_ec_codec.c
+index a201d652aca2b..8823edc351130 100644
+--- a/sound/soc/codecs/cros_ec_codec.c
++++ b/sound/soc/codecs/cros_ec_codec.c
+@@ -994,6 +994,7 @@ static int cros_ec_codec_platform_probe(struct platform_device *pdev)
+ dev_dbg(dev, "ap_shm_phys_addr=%#llx len=%#x\n",
+ priv->ap_shm_phys_addr, priv->ap_shm_len);
+ }
++ of_node_put(node);
+ }
+ #endif
+
+diff --git a/sound/soc/codecs/cs35l36.c b/sound/soc/codecs/cs35l36.c
+index d83c1b318c1c4..0accdb45ed727 100644
+--- a/sound/soc/codecs/cs35l36.c
++++ b/sound/soc/codecs/cs35l36.c
+@@ -444,7 +444,8 @@ static bool cs35l36_volatile_reg(struct device *dev, unsigned int reg)
+ }
+ }
+
+-static DECLARE_TLV_DB_SCALE(dig_vol_tlv, -10200, 25, 0);
++static const DECLARE_TLV_DB_RANGE(dig_vol_tlv, 0, 912,
++ TLV_DB_MINMAX_ITEM(-10200, 1200));
+ static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1);
+
+ static const char * const cs35l36_pcm_sftramp_text[] = {
+diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c
+index cffd6111afaca..b49cb92d7b9e8 100644
+--- a/sound/soc/codecs/cs4265.c
++++ b/sound/soc/codecs/cs4265.c
+@@ -150,7 +150,6 @@ static const struct snd_kcontrol_new cs4265_snd_controls[] = {
+ SOC_SINGLE("E to F Buffer Disable Switch", CS4265_SPDIF_CTL1,
+ 6, 1, 0),
+ SOC_ENUM("C Data Access", cam_mode_enum),
+- SOC_SINGLE("SPDIF Switch", CS4265_SPDIF_CTL2, 5, 1, 1),
+ SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2,
+ 3, 1, 0),
+ SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum),
+@@ -186,7 +185,7 @@ static const struct snd_soc_dapm_widget cs4265_dapm_widgets[] = {
+
+ SND_SOC_DAPM_SWITCH("Loopback", SND_SOC_NOPM, 0, 0,
+ &loopback_ctl),
+- SND_SOC_DAPM_SWITCH("SPDIF", SND_SOC_NOPM, 0, 0,
++ SND_SOC_DAPM_SWITCH("SPDIF", CS4265_SPDIF_CTL2, 5, 1,
+ &spdif_switch),
+ SND_SOC_DAPM_SWITCH("DAC", CS4265_PWRCTL, 1, 1,
+ &dac_switch),
+diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c
+index 9a463ab54bddc..762d9de73dbc2 100644
+--- a/sound/soc/codecs/cs42l42.c
++++ b/sound/soc/codecs/cs42l42.c
+@@ -93,7 +93,7 @@ static const struct reg_default cs42l42_reg_defaults[] = {
+ { CS42L42_ASP_RX_INT_MASK, 0x1F },
+ { CS42L42_ASP_TX_INT_MASK, 0x0F },
+ { CS42L42_CODEC_INT_MASK, 0x03 },
+- { CS42L42_SRCPL_INT_MASK, 0xFF },
++ { CS42L42_SRCPL_INT_MASK, 0x7F },
+ { CS42L42_VPMON_INT_MASK, 0x01 },
+ { CS42L42_PLL_LOCK_INT_MASK, 0x01 },
+ { CS42L42_TSRS_PLUG_INT_MASK, 0x0F },
+@@ -130,7 +130,7 @@ static const struct reg_default cs42l42_reg_defaults[] = {
+ { CS42L42_MIXER_CHA_VOL, 0x3F },
+ { CS42L42_MIXER_ADC_VOL, 0x3F },
+ { CS42L42_MIXER_CHB_VOL, 0x3F },
+- { CS42L42_EQ_COEF_IN0, 0x22 },
++ { CS42L42_EQ_COEF_IN0, 0x00 },
+ { CS42L42_EQ_COEF_IN1, 0x00 },
+ { CS42L42_EQ_COEF_IN2, 0x00 },
+ { CS42L42_EQ_COEF_IN3, 0x00 },
+@@ -853,11 +853,10 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream,
+
+ switch(substream->stream) {
+ case SNDRV_PCM_STREAM_CAPTURE:
+- if (channels == 2) {
+- val |= CS42L42_ASP_TX_CH2_AP_MASK;
+- val |= width << CS42L42_ASP_TX_CH2_RES_SHIFT;
+- }
+- val |= width << CS42L42_ASP_TX_CH1_RES_SHIFT;
++ /* channel 2 on high LRCLK */
++ val = CS42L42_ASP_TX_CH2_AP_MASK |
++ (width << CS42L42_ASP_TX_CH2_RES_SHIFT) |
++ (width << CS42L42_ASP_TX_CH1_RES_SHIFT);
+
+ snd_soc_component_update_bits(component, CS42L42_ASP_TX_CH_AP_RES,
+ CS42L42_ASP_TX_CH1_AP_MASK | CS42L42_ASP_TX_CH2_AP_MASK |
+@@ -1685,12 +1684,15 @@ static void cs42l42_setup_hs_type_detect(struct cs42l42_private *cs42l42)
+ (1 << CS42L42_HS_CLAMP_DISABLE_SHIFT));
+
+ /* Enable the tip sense circuit */
++ regmap_update_bits(cs42l42->regmap, CS42L42_TSENSE_CTL,
++ CS42L42_TS_INV_MASK, CS42L42_TS_INV_MASK);
++
+ regmap_update_bits(cs42l42->regmap, CS42L42_TIPSENSE_CTL,
+ CS42L42_TIP_SENSE_CTRL_MASK |
+ CS42L42_TIP_SENSE_INV_MASK |
+ CS42L42_TIP_SENSE_DEBOUNCE_MASK,
+ (3 << CS42L42_TIP_SENSE_CTRL_SHIFT) |
+- (0 << CS42L42_TIP_SENSE_INV_SHIFT) |
++ (!cs42l42->ts_inv << CS42L42_TIP_SENSE_INV_SHIFT) |
+ (2 << CS42L42_TIP_SENSE_DEBOUNCE_SHIFT));
+
+ /* Save the initial status of the tip sense */
+@@ -1734,10 +1736,6 @@ static int cs42l42_handle_device_data(struct device *dev,
+ cs42l42->ts_inv = CS42L42_TS_INV_DIS;
+ }
+
+- regmap_update_bits(cs42l42->regmap, CS42L42_TSENSE_CTL,
+- CS42L42_TS_INV_MASK,
+- (cs42l42->ts_inv << CS42L42_TS_INV_SHIFT));
+-
+ ret = device_property_read_u32(dev, "cirrus,ts-dbnc-rise", &val);
+ if (!ret) {
+ switch (val) {
+@@ -1948,8 +1946,9 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client,
+ NULL, cs42l42_irq_thread,
+ IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+ "cs42l42", cs42l42);
+-
+- if (ret != 0)
++ if (ret == -EPROBE_DEFER)
++ goto err_disable;
++ else if (ret != 0)
+ dev_err(&i2c_client->dev,
+ "Failed to request IRQ: %d\n", ret);
+
+diff --git a/sound/soc/codecs/cs42l51-i2c.c b/sound/soc/codecs/cs42l51-i2c.c
+index 70260e0a8f095..3ff73367897d8 100644
+--- a/sound/soc/codecs/cs42l51-i2c.c
++++ b/sound/soc/codecs/cs42l51-i2c.c
+@@ -19,6 +19,12 @@ static struct i2c_device_id cs42l51_i2c_id[] = {
+ };
+ MODULE_DEVICE_TABLE(i2c, cs42l51_i2c_id);
+
++const struct of_device_id cs42l51_of_match[] = {
++ { .compatible = "cirrus,cs42l51", },
++ { }
++};
++MODULE_DEVICE_TABLE(of, cs42l51_of_match);
++
+ static int cs42l51_i2c_probe(struct i2c_client *i2c,
+ const struct i2c_device_id *id)
+ {
+diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c
+index c61b17dc2af87..4b026e1c3fe3e 100644
+--- a/sound/soc/codecs/cs42l51.c
++++ b/sound/soc/codecs/cs42l51.c
+@@ -825,13 +825,6 @@ int __maybe_unused cs42l51_resume(struct device *dev)
+ }
+ EXPORT_SYMBOL_GPL(cs42l51_resume);
+
+-const struct of_device_id cs42l51_of_match[] = {
+- { .compatible = "cirrus,cs42l51", },
+- { }
+-};
+-MODULE_DEVICE_TABLE(of, cs42l51_of_match);
+-EXPORT_SYMBOL_GPL(cs42l51_of_match);
+-
+ MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>");
+ MODULE_DESCRIPTION("Cirrus Logic CS42L51 ALSA SoC Codec Driver");
+ MODULE_LICENSE("GPL");
+diff --git a/sound/soc/codecs/cs42l51.h b/sound/soc/codecs/cs42l51.h
+index 9d06cf7f88768..4f13c38484b7f 100644
+--- a/sound/soc/codecs/cs42l51.h
++++ b/sound/soc/codecs/cs42l51.h
+@@ -16,7 +16,6 @@ int cs42l51_probe(struct device *dev, struct regmap *regmap);
+ int cs42l51_remove(struct device *dev);
+ int __maybe_unused cs42l51_suspend(struct device *dev);
+ int __maybe_unused cs42l51_resume(struct device *dev);
+-extern const struct of_device_id cs42l51_of_match[];
+
+ #define CS42L51_CHIP_ID 0x1B
+ #define CS42L51_CHIP_REV_A 0x00
+diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c
+index 80161151b3f2c..c19ad3c247026 100644
+--- a/sound/soc/codecs/cs42l52.c
++++ b/sound/soc/codecs/cs42l52.c
+@@ -137,7 +137,9 @@ static DECLARE_TLV_DB_SCALE(mic_tlv, 1600, 100, 0);
+
+ static DECLARE_TLV_DB_SCALE(pga_tlv, -600, 50, 0);
+
+-static DECLARE_TLV_DB_SCALE(mix_tlv, -50, 50, 0);
++static DECLARE_TLV_DB_SCALE(pass_tlv, -6000, 50, 0);
++
++static DECLARE_TLV_DB_SCALE(mix_tlv, -5150, 50, 0);
+
+ static DECLARE_TLV_DB_SCALE(beep_tlv, -56, 200, 0);
+
+@@ -351,7 +353,7 @@ static const struct snd_kcontrol_new cs42l52_snd_controls[] = {
+ CS42L52_SPKB_VOL, 0, 0x40, 0xC0, hl_tlv),
+
+ SOC_DOUBLE_R_SX_TLV("Bypass Volume", CS42L52_PASSTHRUA_VOL,
+- CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pga_tlv),
++ CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pass_tlv),
+
+ SOC_DOUBLE("Bypass Mute", CS42L52_MISC_CTL, 4, 5, 1, 0),
+
+@@ -364,7 +366,7 @@ static const struct snd_kcontrol_new cs42l52_snd_controls[] = {
+ CS42L52_ADCB_VOL, 0, 0xA0, 0x78, ipd_tlv),
+ SOC_DOUBLE_R_SX_TLV("ADC Mixer Volume",
+ CS42L52_ADCA_MIXER_VOL, CS42L52_ADCB_MIXER_VOL,
+- 0, 0x19, 0x7F, ipd_tlv),
++ 0, 0x19, 0x7F, mix_tlv),
+
+ SOC_DOUBLE("ADC Switch", CS42L52_ADC_MISC_CTL, 0, 1, 1, 0),
+
+diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c
+index 3cf8a0b4478cd..f0af8c18e5efa 100644
+--- a/sound/soc/codecs/cs42l56.c
++++ b/sound/soc/codecs/cs42l56.c
+@@ -391,9 +391,9 @@ static const struct snd_kcontrol_new cs42l56_snd_controls[] = {
+ SOC_DOUBLE("ADC Boost Switch", CS42L56_GAIN_BIAS_CTL, 3, 2, 1, 1),
+
+ SOC_DOUBLE_R_SX_TLV("Headphone Volume", CS42L56_HPA_VOLUME,
+- CS42L56_HPB_VOLUME, 0, 0x84, 0x48, hl_tlv),
++ CS42L56_HPB_VOLUME, 0, 0x44, 0x48, hl_tlv),
+ SOC_DOUBLE_R_SX_TLV("LineOut Volume", CS42L56_LOA_VOLUME,
+- CS42L56_LOB_VOLUME, 0, 0x84, 0x48, hl_tlv),
++ CS42L56_LOB_VOLUME, 0, 0x44, 0x48, hl_tlv),
+
+ SOC_SINGLE_TLV("Bass Shelving Volume", CS42L56_TONE_CTL,
+ 0, 0x00, 1, tone_tlv),
+@@ -1193,18 +1193,12 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client,
+ if (pdata) {
+ cs42l56->pdata = *pdata;
+ } else {
+- pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata),
+- GFP_KERNEL);
+- if (!pdata)
+- return -ENOMEM;
+-
+ if (i2c_client->dev.of_node) {
+ ret = cs42l56_handle_of_data(i2c_client,
+ &cs42l56->pdata);
+ if (ret != 0)
+ return ret;
+ }
+- cs42l56->pdata = *pdata;
+ }
+
+ if (cs42l56->pdata.gpio_nreset) {
+diff --git a/sound/soc/codecs/cs47l15.c b/sound/soc/codecs/cs47l15.c
+index 1ee83160b83fb..ac9ccdea15b58 100644
+--- a/sound/soc/codecs/cs47l15.c
++++ b/sound/soc/codecs/cs47l15.c
+@@ -122,6 +122,9 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol,
+ snd_soc_kcontrol_component(kcontrol);
+ struct cs47l15 *cs47l15 = snd_soc_component_get_drvdata(component);
+
++ if (!!ucontrol->value.integer.value[0] == cs47l15->in1_lp_mode)
++ return 0;
++
+ switch (ucontrol->value.integer.value[0]) {
+ case 0:
+ /* Set IN1 to normal mode */
+@@ -150,7 +153,7 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol,
+ break;
+ }
+
+- return 0;
++ return 1;
+ }
+
+ static const struct snd_kcontrol_new cs47l15_snd_controls[] = {
+diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c
+index f2087bd38dbc8..c2912ad3851b7 100644
+--- a/sound/soc/codecs/cs53l30.c
++++ b/sound/soc/codecs/cs53l30.c
+@@ -348,22 +348,22 @@ static const struct snd_kcontrol_new cs53l30_snd_controls[] = {
+ SOC_ENUM("ADC2 NG Delay", adc2_ng_delay_enum),
+
+ SOC_SINGLE_SX_TLV("ADC1A PGA Volume",
+- CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
++ CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
+ SOC_SINGLE_SX_TLV("ADC1B PGA Volume",
+- CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
++ CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
+ SOC_SINGLE_SX_TLV("ADC2A PGA Volume",
+- CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
++ CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
+ SOC_SINGLE_SX_TLV("ADC2B PGA Volume",
+- CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
++ CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
+
+ SOC_SINGLE_SX_TLV("ADC1A Digital Volume",
+- CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
++ CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
+ SOC_SINGLE_SX_TLV("ADC1B Digital Volume",
+- CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
++ CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
+ SOC_SINGLE_SX_TLV("ADC2A Digital Volume",
+- CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
++ CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
+ SOC_SINGLE_SX_TLV("ADC2B Digital Volume",
+- CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
++ CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
+ };
+
+ static const struct snd_soc_dapm_widget cs53l30_dapm_widgets[] = {
+diff --git a/sound/soc/codecs/da7210.c b/sound/soc/codecs/da7210.c
+index 8af344b2fdbf6..d75d15006f64e 100644
+--- a/sound/soc/codecs/da7210.c
++++ b/sound/soc/codecs/da7210.c
+@@ -1336,6 +1336,8 @@ static int __init da7210_modinit(void)
+ int ret = 0;
+ #if IS_ENABLED(CONFIG_I2C)
+ ret = i2c_add_driver(&da7210_i2c_driver);
++ if (ret)
++ return ret;
+ #endif
+ #if defined(CONFIG_SPI_MASTER)
+ ret = spi_register_driver(&da7210_spi_driver);
+diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c
+index 7998fdd3b378e..7d18de959439f 100644
+--- a/sound/soc/codecs/da7219-aad.c
++++ b/sound/soc/codecs/da7219-aad.c
+@@ -347,11 +347,15 @@ static irqreturn_t da7219_aad_irq_thread(int irq, void *data)
+ struct da7219_priv *da7219 = snd_soc_component_get_drvdata(component);
+ u8 events[DA7219_AAD_IRQ_REG_MAX];
+ u8 statusa;
+- int i, report = 0, mask = 0;
++ int i, ret, report = 0, mask = 0;
+
+ /* Read current IRQ events */
+- regmap_bulk_read(da7219->regmap, DA7219_ACCDET_IRQ_EVENT_A,
+- events, DA7219_AAD_IRQ_REG_MAX);
++ ret = regmap_bulk_read(da7219->regmap, DA7219_ACCDET_IRQ_EVENT_A,
++ events, DA7219_AAD_IRQ_REG_MAX);
++ if (ret) {
++ dev_warn_ratelimited(component->dev, "Failed to read IRQ events: %d\n", ret);
++ return IRQ_NONE;
++ }
+
+ if (!events[DA7219_AAD_IRQ_REG_A] && !events[DA7219_AAD_IRQ_REG_B])
+ return IRQ_NONE;
+@@ -854,6 +858,8 @@ void da7219_aad_suspend(struct snd_soc_component *component)
+ }
+ }
+ }
++
++ synchronize_irq(da7219_aad->irq);
+ }
+
+ void da7219_aad_resume(struct snd_soc_component *component)
+diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c
+index 13009d08b09ac..da4c24b8dae58 100644
+--- a/sound/soc/codecs/da7219.c
++++ b/sound/soc/codecs/da7219.c
+@@ -446,7 +446,7 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol,
+ struct soc_mixer_control *mixer_ctrl =
+ (struct soc_mixer_control *) kcontrol->private_value;
+ unsigned int reg = mixer_ctrl->reg;
+- __le16 val;
++ __le16 val_new, val_old;
+ int ret;
+
+ /*
+@@ -454,13 +454,19 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol,
+ * Therefore we need to convert to little endian here to align with
+ * HW registers.
+ */
+- val = cpu_to_le16(ucontrol->value.integer.value[0]);
++ val_new = cpu_to_le16(ucontrol->value.integer.value[0]);
+
+ mutex_lock(&da7219->ctrl_lock);
+- ret = regmap_raw_write(da7219->regmap, reg, &val, sizeof(val));
++ ret = regmap_raw_read(da7219->regmap, reg, &val_old, sizeof(val_old));
++ if (ret == 0 && (val_old != val_new))
++ ret = regmap_raw_write(da7219->regmap, reg,
++ &val_new, sizeof(val_new));
+ mutex_unlock(&da7219->ctrl_lock);
+
+- return ret;
++ if (ret < 0)
++ return ret;
++
++ return val_old != val_new;
+ }
+
+
+@@ -2190,6 +2196,7 @@ static int da7219_register_dai_clks(struct snd_soc_component *component)
+ dai_clk_lookup = clkdev_hw_create(dai_clk_hw, init.name,
+ "%s", dev_name(dev));
+ if (!dai_clk_lookup) {
++ clk_hw_unregister(dai_clk_hw);
+ ret = -ENOMEM;
+ goto err;
+ } else {
+@@ -2211,12 +2218,12 @@ static int da7219_register_dai_clks(struct snd_soc_component *component)
+ return 0;
+
+ err:
+- do {
++ while (--i >= 0) {
+ if (da7219->dai_clks_lookup[i])
+ clkdev_drop(da7219->dai_clks_lookup[i]);
+
+ clk_hw_unregister(&da7219->dai_clks_hw[i]);
+- } while (i-- > 0);
++ }
+
+ if (np)
+ kfree(da7219->clk_hw_data);
+diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c
+index 067757d1d70a3..93549f8ee130c 100644
+--- a/sound/soc/codecs/es8316.c
++++ b/sound/soc/codecs/es8316.c
+@@ -52,7 +52,12 @@ static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(dac_vol_tlv, -9600, 50, 1);
+ static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_vol_tlv, -9600, 50, 1);
+ static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(alc_max_gain_tlv, -650, 150, 0);
+ static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(alc_min_gain_tlv, -1200, 150, 0);
+-static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(alc_target_tlv, -1650, 150, 0);
++
++static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(alc_target_tlv,
++ 0, 10, TLV_DB_SCALE_ITEM(-1650, 150, 0),
++ 11, 11, TLV_DB_SCALE_ITEM(-150, 0, 0),
++);
++
+ static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(hpmixer_gain_tlv,
+ 0, 4, TLV_DB_SCALE_ITEM(-1200, 150, 0),
+ 8, 11, TLV_DB_SCALE_ITEM(-450, 150, 0),
+@@ -115,7 +120,7 @@ static const struct snd_kcontrol_new es8316_snd_controls[] = {
+ alc_max_gain_tlv),
+ SOC_SINGLE_TLV("ALC Capture Min Volume", ES8316_ADC_ALC2, 0, 28, 0,
+ alc_min_gain_tlv),
+- SOC_SINGLE_TLV("ALC Capture Target Volume", ES8316_ADC_ALC3, 4, 10, 0,
++ SOC_SINGLE_TLV("ALC Capture Target Volume", ES8316_ADC_ALC3, 4, 11, 0,
+ alc_target_tlv),
+ SOC_SINGLE("ALC Capture Hold Time", ES8316_ADC_ALC3, 0, 10, 0),
+ SOC_SINGLE("ALC Capture Decay Time", ES8316_ADC_ALC4, 4, 10, 0),
+@@ -148,7 +153,7 @@ static const char * const es8316_dmic_txt[] = {
+ "dmic data at high level",
+ "dmic data at low level",
+ };
+-static const unsigned int es8316_dmic_values[] = { 0, 1, 2 };
++static const unsigned int es8316_dmic_values[] = { 0, 2, 3 };
+ static const struct soc_enum es8316_dmic_src_enum =
+ SOC_VALUE_ENUM_SINGLE(ES8316_ADC_DMIC, 0, 3,
+ ARRAY_SIZE(es8316_dmic_txt),
+@@ -364,13 +369,11 @@ static int es8316_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+ int count = 0;
+
+ es8316->sysclk = freq;
++ es8316->sysclk_constraints.list = NULL;
++ es8316->sysclk_constraints.count = 0;
+
+- if (freq == 0) {
+- es8316->sysclk_constraints.list = NULL;
+- es8316->sysclk_constraints.count = 0;
+-
++ if (freq == 0)
+ return 0;
+- }
+
+ ret = clk_set_rate(es8316->mclk, freq);
+ if (ret)
+@@ -386,8 +389,10 @@ static int es8316_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+ es8316->allowed_rates[count++] = freq / ratio;
+ }
+
+- es8316->sysclk_constraints.list = es8316->allowed_rates;
+- es8316->sysclk_constraints.count = count;
++ if (count) {
++ es8316->sysclk_constraints.list = es8316->allowed_rates;
++ es8316->sysclk_constraints.count = count;
++ }
+
+ return 0;
+ }
+@@ -810,15 +815,14 @@ static int es8316_i2c_probe(struct i2c_client *i2c_client,
+ es8316->irq = i2c_client->irq;
+ mutex_init(&es8316->lock);
+
+- ret = devm_request_threaded_irq(dev, es8316->irq, NULL, es8316_irq,
+- IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+- "es8316", es8316);
+- if (ret == 0) {
+- /* Gets re-enabled by es8316_set_jack() */
+- disable_irq(es8316->irq);
+- } else {
+- dev_warn(dev, "Failed to get IRQ %d: %d\n", es8316->irq, ret);
+- es8316->irq = -ENXIO;
++ if (es8316->irq > 0) {
++ ret = devm_request_threaded_irq(dev, es8316->irq, NULL, es8316_irq,
++ IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_AUTOEN,
++ "es8316", es8316);
++ if (ret) {
++ dev_warn(dev, "Failed to get IRQ %d: %d\n", es8316->irq, ret);
++ es8316->irq = -ENXIO;
++ }
+ }
+
+ return devm_snd_soc_register_component(&i2c_client->dev,
+diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c
+index 9632afc2d4d64..ca3b1c00fa787 100644
+--- a/sound/soc/codecs/es8328.c
++++ b/sound/soc/codecs/es8328.c
+@@ -161,13 +161,16 @@ static int es8328_put_deemph(struct snd_kcontrol *kcontrol,
+ if (deemph > 1)
+ return -EINVAL;
+
++ if (es8328->deemph == deemph)
++ return 0;
++
+ ret = es8328_set_deemph(component);
+ if (ret < 0)
+ return ret;
+
+ es8328->deemph = deemph;
+
+- return 0;
++ return 1;
+ }
+
+
+diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c
+index 390dd6c7f6a50..de5955db0a5f0 100644
+--- a/sound/soc/codecs/hdac_hda.c
++++ b/sound/soc/codecs/hdac_hda.c
+@@ -46,9 +46,8 @@ static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai);
+ static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai);
+-static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai,
+- unsigned int tx_mask, unsigned int rx_mask,
+- int slots, int slot_width);
++static int hdac_hda_dai_set_stream(struct snd_soc_dai *dai, void *stream,
++ int direction);
+ static struct hda_pcm *snd_soc_find_pcm_from_dai(struct hdac_hda_priv *hda_pvt,
+ struct snd_soc_dai *dai);
+
+@@ -58,7 +57,7 @@ static const struct snd_soc_dai_ops hdac_hda_dai_ops = {
+ .prepare = hdac_hda_dai_prepare,
+ .hw_params = hdac_hda_dai_hw_params,
+ .hw_free = hdac_hda_dai_hw_free,
+- .set_tdm_slot = hdac_hda_dai_set_tdm_slot,
++ .set_stream = hdac_hda_dai_set_stream,
+ };
+
+ static struct snd_soc_dai_driver hdac_hda_dais[] = {
+@@ -180,21 +179,22 @@ static struct snd_soc_dai_driver hdac_hda_dais[] = {
+
+ };
+
+-static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai,
+- unsigned int tx_mask, unsigned int rx_mask,
+- int slots, int slot_width)
++static int hdac_hda_dai_set_stream(struct snd_soc_dai *dai,
++ void *stream, int direction)
+ {
+ struct snd_soc_component *component = dai->component;
+ struct hdac_hda_priv *hda_pvt;
+ struct hdac_hda_pcm *pcm;
++ struct hdac_stream *hstream;
++
++ if (!stream)
++ return -EINVAL;
+
+ hda_pvt = snd_soc_component_get_drvdata(component);
+ pcm = &hda_pvt->pcm[dai->id];
++ hstream = (struct hdac_stream *)stream;
+
+- if (tx_mask)
+- pcm->stream_tag[SNDRV_PCM_STREAM_PLAYBACK] = tx_mask;
+- else
+- pcm->stream_tag[SNDRV_PCM_STREAM_CAPTURE] = rx_mask;
++ pcm->stream_tag[direction] = hstream->stream_tag;
+
+ return 0;
+ }
+diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h
+index d0efc5e254ae9..da0ed74758b05 100644
+--- a/sound/soc/codecs/hdac_hda.h
++++ b/sound/soc/codecs/hdac_hda.h
+@@ -14,7 +14,7 @@ enum {
+ HDAC_HDMI_1_DAI_ID,
+ HDAC_HDMI_2_DAI_ID,
+ HDAC_HDMI_3_DAI_ID,
+- HDAC_LAST_DAI_ID = HDAC_HDMI_3_DAI_ID,
++ HDAC_DAI_ID_NUM
+ };
+
+ struct hdac_hda_pcm {
+@@ -24,7 +24,7 @@ struct hdac_hda_pcm {
+
+ struct hdac_hda_priv {
+ struct hda_codec codec;
+- struct hdac_hda_pcm pcm[HDAC_LAST_DAI_ID];
++ struct hdac_hda_pcm pcm[HDAC_DAI_ID_NUM];
+ bool need_display_power;
+ };
+
+diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
+index 66408a98298be..1acd82f81ba0e 100644
+--- a/sound/soc/codecs/hdac_hdmi.c
++++ b/sound/soc/codecs/hdac_hdmi.c
+@@ -436,23 +436,28 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_device *hdev,
+ return 0;
+ }
+
+-static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai,
+- unsigned int tx_mask, unsigned int rx_mask,
+- int slots, int slot_width)
++static int hdac_hdmi_set_stream(struct snd_soc_dai *dai,
++ void *stream, int direction)
+ {
+ struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai);
+ struct hdac_device *hdev = hdmi->hdev;
+ struct hdac_hdmi_dai_port_map *dai_map;
+ struct hdac_hdmi_pcm *pcm;
++ struct hdac_stream *hstream;
+
+- dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, tx_mask);
++ if (!stream)
++ return -EINVAL;
++
++ hstream = (struct hdac_stream *)stream;
++
++ dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, hstream->stream_tag);
+
+ dai_map = &hdmi->dai_map[dai->id];
+
+ pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, dai_map->cvt);
+
+ if (pcm)
+- pcm->stream_tag = (tx_mask << 4);
++ pcm->stream_tag = (hstream->stream_tag << 4);
+
+ return 0;
+ }
+@@ -1544,7 +1549,7 @@ static const struct snd_soc_dai_ops hdmi_dai_ops = {
+ .startup = hdac_hdmi_pcm_open,
+ .shutdown = hdac_hdmi_pcm_close,
+ .hw_params = hdac_hdmi_set_hw_params,
+- .set_tdm_slot = hdac_hdmi_set_tdm_slot,
++ .set_stream = hdac_hdmi_set_stream,
+ };
+
+ /*
+diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
+index b61f980cabdc0..b07607a9ecea4 100644
+--- a/sound/soc/codecs/hdmi-codec.c
++++ b/sound/soc/codecs/hdmi-codec.c
+@@ -277,7 +277,7 @@ struct hdmi_codec_priv {
+ bool busy;
+ struct snd_soc_jack *jack;
+ unsigned int jack_status;
+- u8 iec_status[5];
++ u8 iec_status[AES_IEC958_STATUS_SIZE];
+ };
+
+ static const struct snd_soc_dapm_widget hdmi_widgets[] = {
+diff --git a/sound/soc/codecs/jz4725b.c b/sound/soc/codecs/jz4725b.c
+index 5201a8f6d7b63..71ea576f7e67a 100644
+--- a/sound/soc/codecs/jz4725b.c
++++ b/sound/soc/codecs/jz4725b.c
+@@ -136,14 +136,17 @@ enum {
+ #define REG_CGR3_GO1L_OFFSET 0
+ #define REG_CGR3_GO1L_MASK (0x1f << REG_CGR3_GO1L_OFFSET)
+
++#define REG_CGR10_GIL_OFFSET 0
++#define REG_CGR10_GIR_OFFSET 4
++
+ struct jz_icdc {
+ struct regmap *regmap;
+ void __iomem *base;
+ struct clk *clk;
+ };
+
+-static const SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(jz4725b_dac_tlv, -2250, 0);
+-static const SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(jz4725b_line_tlv, -1500, 600);
++static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(jz4725b_adc_tlv, 0, 150, 0);
++static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(jz4725b_dac_tlv, -2250, 150, 0);
+
+ static const struct snd_kcontrol_new jz4725b_codec_controls[] = {
+ SOC_DOUBLE_TLV("Master Playback Volume",
+@@ -151,11 +154,11 @@ static const struct snd_kcontrol_new jz4725b_codec_controls[] = {
+ REG_CGR1_GODL_OFFSET,
+ REG_CGR1_GODR_OFFSET,
+ 0xf, 1, jz4725b_dac_tlv),
+- SOC_DOUBLE_R_TLV("Master Capture Volume",
+- JZ4725B_CODEC_REG_CGR3,
+- JZ4725B_CODEC_REG_CGR2,
+- REG_CGR2_GO1R_OFFSET,
+- 0x1f, 1, jz4725b_line_tlv),
++ SOC_DOUBLE_TLV("Master Capture Volume",
++ JZ4725B_CODEC_REG_CGR10,
++ REG_CGR10_GIL_OFFSET,
++ REG_CGR10_GIR_OFFSET,
++ 0xf, 0, jz4725b_adc_tlv),
+
+ SOC_SINGLE("Master Playback Switch", JZ4725B_CODEC_REG_CR1,
+ REG_CR1_DAC_MUTE_OFFSET, 1, 1),
+@@ -180,7 +183,7 @@ static SOC_VALUE_ENUM_SINGLE_DECL(jz4725b_codec_adc_src_enum,
+ jz4725b_codec_adc_src_texts,
+ jz4725b_codec_adc_src_values);
+ static const struct snd_kcontrol_new jz4725b_codec_adc_src_ctrl =
+- SOC_DAPM_ENUM("Route", jz4725b_codec_adc_src_enum);
++ SOC_DAPM_ENUM("ADC Source Capture Route", jz4725b_codec_adc_src_enum);
+
+ static const struct snd_kcontrol_new jz4725b_codec_mixer_controls[] = {
+ SOC_DAPM_SINGLE("Line In Bypass", JZ4725B_CODEC_REG_CR1,
+@@ -225,7 +228,7 @@ static const struct snd_soc_dapm_widget jz4725b_codec_dapm_widgets[] = {
+ SND_SOC_DAPM_ADC("ADC", "Capture",
+ JZ4725B_CODEC_REG_PMR1, REG_PMR1_SB_ADC_OFFSET, 1),
+
+- SND_SOC_DAPM_MUX("ADC Source", SND_SOC_NOPM, 0, 0,
++ SND_SOC_DAPM_MUX("ADC Source Capture Route", SND_SOC_NOPM, 0, 0,
+ &jz4725b_codec_adc_src_ctrl),
+
+ /* Mixer */
+@@ -236,7 +239,8 @@ static const struct snd_soc_dapm_widget jz4725b_codec_dapm_widgets[] = {
+ SND_SOC_DAPM_MIXER("DAC to Mixer", JZ4725B_CODEC_REG_CR1,
+ REG_CR1_DACSEL_OFFSET, 0, NULL, 0),
+
+- SND_SOC_DAPM_MIXER("Line In", SND_SOC_NOPM, 0, 0, NULL, 0),
++ SND_SOC_DAPM_MIXER("Line In", JZ4725B_CODEC_REG_PMR1,
++ REG_PMR1_SB_LIN_OFFSET, 1, NULL, 0),
+ SND_SOC_DAPM_MIXER("HP Out", JZ4725B_CODEC_REG_CR1,
+ REG_CR1_HP_DIS_OFFSET, 1, NULL, 0),
+
+@@ -283,11 +287,11 @@ static const struct snd_soc_dapm_route jz4725b_codec_dapm_routes[] = {
+ {"Mixer", NULL, "DAC to Mixer"},
+
+ {"Mixer to ADC", NULL, "Mixer"},
+- {"ADC Source", "Mixer", "Mixer to ADC"},
+- {"ADC Source", "Line In", "Line In"},
+- {"ADC Source", "Mic 1", "Mic 1"},
+- {"ADC Source", "Mic 2", "Mic 2"},
+- {"ADC", NULL, "ADC Source"},
++ {"ADC Source Capture Route", "Mixer", "Mixer to ADC"},
++ {"ADC Source Capture Route", "Line In", "Line In"},
++ {"ADC Source Capture Route", "Mic 1", "Mic 1"},
++ {"ADC Source Capture Route", "Mic 2", "Mic 2"},
++ {"ADC", NULL, "ADC Source Capture Route"},
+
+ {"Out Stage", NULL, "Mixer"},
+ {"HP Out", NULL, "Out Stage"},
+diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
+index 196b06898eeb2..72a0db09c7131 100644
+--- a/sound/soc/codecs/lpass-rx-macro.c
++++ b/sound/soc/codecs/lpass-rx-macro.c
+@@ -363,7 +363,7 @@
+ #define CDC_RX_DSD1_CFG2 (0x0F8C)
+ #define RX_MAX_OFFSET (0x0F8C)
+
+-#define MCLK_FREQ 9600000
++#define MCLK_FREQ 19200000
+
+ #define RX_MACRO_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
+ SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000 |\
+@@ -608,7 +608,11 @@ struct rx_macro {
+ int softclip_clk_users;
+
+ struct regmap *regmap;
+- struct clk_bulk_data clks[RX_NUM_CLKS_MAX];
++ struct clk *mclk;
++ struct clk *npl;
++ struct clk *macro;
++ struct clk *dcodec;
++ struct clk *fsgen;
+ struct clk_hw hw;
+ };
+ #define to_rx_macro(_hw) container_of(_hw, struct rx_macro, hw)
+@@ -2039,6 +2043,10 @@ static int rx_macro_load_compander_coeff(struct snd_soc_component *component,
+ int i;
+ int hph_pwr_mode;
+
++ /* AUX does not have compander */
++ if (comp == INTERP_AUX)
++ return 0;
++
+ if (!rx->comp_enabled[comp])
+ return 0;
+
+@@ -2188,7 +2196,7 @@ static int rx_macro_config_classh(struct snd_soc_component *component,
+ snd_soc_component_update_bits(component,
+ CDC_RX_CLSH_DECAY_CTRL,
+ CDC_RX_CLSH_DECAY_RATE_MASK, 0x0);
+- snd_soc_component_update_bits(component,
++ snd_soc_component_write_field(component,
+ CDC_RX_RX1_RX_PATH_CFG0,
+ CDC_RX_RXn_CLSH_EN_MASK, 0x1);
+ break;
+@@ -2268,7 +2276,7 @@ static int rx_macro_mux_get(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm);
+ struct rx_macro *rx = snd_soc_component_get_drvdata(component);
+
+- ucontrol->value.integer.value[0] =
++ ucontrol->value.enumerated.item[0] =
+ rx->rx_port_value[widget->shift];
+ return 0;
+ }
+@@ -2280,7 +2288,7 @@ static int rx_macro_mux_put(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm);
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ struct snd_soc_dapm_update *update = NULL;
+- u32 rx_port_value = ucontrol->value.integer.value[0];
++ u32 rx_port_value = ucontrol->value.enumerated.item[0];
+ u32 aif_rst;
+ struct rx_macro *rx = snd_soc_component_get_drvdata(component);
+
+@@ -2392,7 +2400,7 @@ static int rx_macro_get_hph_pwr_mode(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct rx_macro *rx = snd_soc_component_get_drvdata(component);
+
+- ucontrol->value.integer.value[0] = rx->hph_pwr_mode;
++ ucontrol->value.enumerated.item[0] = rx->hph_pwr_mode;
+ return 0;
+ }
+
+@@ -2402,7 +2410,7 @@ static int rx_macro_put_hph_pwr_mode(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct rx_macro *rx = snd_soc_component_get_drvdata(component);
+
+- rx->hph_pwr_mode = ucontrol->value.integer.value[0];
++ rx->hph_pwr_mode = ucontrol->value.enumerated.item[0];
+ return 0;
+ }
+
+@@ -2688,8 +2696,8 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component,
+ int reg, b2_reg;
+
+ /* Address does not automatically update if reading */
+- reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx;
+- b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx;
++ reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx;
++ b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx;
+
+ snd_soc_component_write(component, reg,
+ ((band_idx * BAND_MAX + coeff_idx) *
+@@ -2718,7 +2726,7 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component,
+ static void set_iir_band_coeff(struct snd_soc_component *component,
+ int iir_idx, int band_idx, uint32_t value)
+ {
+- int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx;
++ int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx;
+
+ snd_soc_component_write(component, reg, (value & 0xFF));
+ snd_soc_component_write(component, reg, (value >> 8) & 0xFF);
+@@ -2739,7 +2747,7 @@ static int rx_macro_put_iir_band_audio_mixer(
+ int iir_idx = ctl->iir_idx;
+ int band_idx = ctl->band_idx;
+ u32 coeff[BAND_MAX];
+- int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx;
++ int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx;
+
+ memcpy(&coeff[0], ucontrol->value.bytes.data, params->max);
+
+@@ -3475,17 +3483,16 @@ static const struct clk_ops swclk_gate_ops = {
+
+ };
+
+-static struct clk *rx_macro_register_mclk_output(struct rx_macro *rx)
++static int rx_macro_register_mclk_output(struct rx_macro *rx)
+ {
+ struct device *dev = rx->dev;
+- struct device_node *np = dev->of_node;
+ const char *parent_clk_name = NULL;
+ const char *clk_name = "lpass-rx-mclk";
+ struct clk_hw *hw;
+ struct clk_init_data init;
+ int ret;
+
+- parent_clk_name = __clk_get_name(rx->clks[2].clk);
++ parent_clk_name = __clk_get_name(rx->mclk);
+
+ init.name = clk_name;
+ init.ops = &swclk_gate_ops;
+@@ -3494,13 +3501,11 @@ static struct clk *rx_macro_register_mclk_output(struct rx_macro *rx)
+ init.num_parents = 1;
+ rx->hw.init = &init;
+ hw = &rx->hw;
+- ret = clk_hw_register(rx->dev, hw);
++ ret = devm_clk_hw_register(rx->dev, hw);
+ if (ret)
+- return ERR_PTR(ret);
+-
+- of_clk_add_provider(np, of_clk_src_simple_get, hw->clk);
++ return ret;
+
+- return NULL;
++ return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
+ }
+
+ static const struct snd_soc_component_driver rx_macro_component_drv = {
+@@ -3525,23 +3530,33 @@ static int rx_macro_probe(struct platform_device *pdev)
+ if (!rx)
+ return -ENOMEM;
+
+- rx->clks[0].id = "macro";
+- rx->clks[1].id = "dcodec";
+- rx->clks[2].id = "mclk";
+- rx->clks[3].id = "npl";
+- rx->clks[4].id = "fsgen";
++ rx->macro = devm_clk_get_optional(dev, "macro");
++ if (IS_ERR(rx->macro))
++ return PTR_ERR(rx->macro);
+
+- ret = devm_clk_bulk_get(dev, RX_NUM_CLKS_MAX, rx->clks);
+- if (ret) {
+- dev_err(dev, "Error getting RX Clocks (%d)\n", ret);
+- return ret;
+- }
++ rx->dcodec = devm_clk_get_optional(dev, "dcodec");
++ if (IS_ERR(rx->dcodec))
++ return PTR_ERR(rx->dcodec);
++
++ rx->mclk = devm_clk_get(dev, "mclk");
++ if (IS_ERR(rx->mclk))
++ return PTR_ERR(rx->mclk);
++
++ rx->npl = devm_clk_get(dev, "npl");
++ if (IS_ERR(rx->npl))
++ return PTR_ERR(rx->npl);
++
++ rx->fsgen = devm_clk_get(dev, "fsgen");
++ if (IS_ERR(rx->fsgen))
++ return PTR_ERR(rx->fsgen);
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ rx->regmap = devm_regmap_init_mmio(dev, base, &rx_regmap_config);
++ if (IS_ERR(rx->regmap))
++ return PTR_ERR(rx->regmap);
+
+ dev_set_drvdata(dev, rx);
+
+@@ -3549,21 +3564,52 @@ static int rx_macro_probe(struct platform_device *pdev)
+ rx->dev = dev;
+
+ /* set MCLK and NPL rates */
+- clk_set_rate(rx->clks[2].clk, MCLK_FREQ);
+- clk_set_rate(rx->clks[3].clk, 2 * MCLK_FREQ);
++ clk_set_rate(rx->mclk, MCLK_FREQ);
++ clk_set_rate(rx->npl, MCLK_FREQ);
+
+- ret = clk_bulk_prepare_enable(RX_NUM_CLKS_MAX, rx->clks);
++ ret = clk_prepare_enable(rx->macro);
+ if (ret)
+- return ret;
++ goto err;
++
++ ret = clk_prepare_enable(rx->dcodec);
++ if (ret)
++ goto err_dcodec;
++
++ ret = clk_prepare_enable(rx->mclk);
++ if (ret)
++ goto err_mclk;
++
++ ret = clk_prepare_enable(rx->npl);
++ if (ret)
++ goto err_npl;
+
+- rx_macro_register_mclk_output(rx);
++ ret = clk_prepare_enable(rx->fsgen);
++ if (ret)
++ goto err_fsgen;
++
++ ret = rx_macro_register_mclk_output(rx);
++ if (ret)
++ goto err_clkout;
+
+ ret = devm_snd_soc_register_component(dev, &rx_macro_component_drv,
+ rx_macro_dai,
+ ARRAY_SIZE(rx_macro_dai));
+ if (ret)
+- clk_bulk_disable_unprepare(RX_NUM_CLKS_MAX, rx->clks);
++ goto err_clkout;
+
++ return 0;
++
++err_clkout:
++ clk_disable_unprepare(rx->fsgen);
++err_fsgen:
++ clk_disable_unprepare(rx->npl);
++err_npl:
++ clk_disable_unprepare(rx->mclk);
++err_mclk:
++ clk_disable_unprepare(rx->dcodec);
++err_dcodec:
++ clk_disable_unprepare(rx->macro);
++err:
+ return ret;
+ }
+
+@@ -3571,8 +3617,12 @@ static int rx_macro_remove(struct platform_device *pdev)
+ {
+ struct rx_macro *rx = dev_get_drvdata(&pdev->dev);
+
+- of_clk_del_provider(pdev->dev.of_node);
+- clk_bulk_disable_unprepare(RX_NUM_CLKS_MAX, rx->clks);
++ clk_disable_unprepare(rx->mclk);
++ clk_disable_unprepare(rx->npl);
++ clk_disable_unprepare(rx->fsgen);
++ clk_disable_unprepare(rx->macro);
++ clk_disable_unprepare(rx->dcodec);
++
+ return 0;
+ }
+
+diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
+index 27a0d5defd273..4377e91733b87 100644
+--- a/sound/soc/codecs/lpass-tx-macro.c
++++ b/sound/soc/codecs/lpass-tx-macro.c
+@@ -6,6 +6,7 @@
+ #include <linux/clk.h>
+ #include <linux/io.h>
+ #include <linux/platform_device.h>
++#include <linux/pm_runtime.h>
+ #include <linux/regmap.h>
+ #include <sound/soc.h>
+ #include <sound/soc-dapm.h>
+@@ -199,7 +200,7 @@
+ #define TX_MACRO_AMIC_UNMUTE_DELAY_MS 100
+ #define TX_MACRO_DMIC_HPF_DELAY_MS 300
+ #define TX_MACRO_AMIC_HPF_DELAY_MS 300
+-#define MCLK_FREQ 9600000
++#define MCLK_FREQ 19200000
+
+ enum {
+ TX_MACRO_AIF_INVALID = 0,
+@@ -238,7 +239,7 @@ enum {
+
+ struct tx_mute_work {
+ struct tx_macro *tx;
+- u32 decimator;
++ u8 decimator;
+ struct delayed_work dwork;
+ };
+
+@@ -258,7 +259,11 @@ struct tx_macro {
+ unsigned long active_ch_cnt[TX_MACRO_MAX_DAIS];
+ unsigned long active_decimator[TX_MACRO_MAX_DAIS];
+ struct regmap *regmap;
+- struct clk_bulk_data clks[TX_NUM_CLKS_MAX];
++ struct clk *mclk;
++ struct clk *npl;
++ struct clk *macro;
++ struct clk *dcodec;
++ struct clk *fsgen;
+ struct clk_hw hw;
+ bool dec_active[NUM_DECIMATORS];
+ bool reset_swr;
+@@ -627,7 +632,7 @@ exit:
+ return 0;
+ }
+
+-static bool is_amic_enabled(struct snd_soc_component *component, int decimator)
++static bool is_amic_enabled(struct snd_soc_component *component, u8 decimator)
+ {
+ u16 adc_mux_reg, adc_reg, adc_n;
+
+@@ -738,6 +743,8 @@ static int tx_macro_put_dec_enum(struct snd_kcontrol *kcontrol,
+ struct tx_macro *tx = snd_soc_component_get_drvdata(component);
+
+ val = ucontrol->value.enumerated.item[0];
++ if (val >= e->items)
++ return -EINVAL;
+
+ switch (e->reg) {
+ case CDC_TX_INP_MUX_ADC_MUX0_CFG0:
+@@ -764,6 +771,9 @@ static int tx_macro_put_dec_enum(struct snd_kcontrol *kcontrol,
+ case CDC_TX_INP_MUX_ADC_MUX7_CFG0:
+ mic_sel_reg = CDC_TX7_TX_PATH_CFG0;
+ break;
++ default:
++ dev_err(component->dev, "Error in configuration!!\n");
++ return -EINVAL;
+ }
+
+ if (val != 0) {
+@@ -815,24 +825,30 @@ static int tx_macro_tx_mixer_put(struct snd_kcontrol *kcontrol,
+ struct tx_macro *tx = snd_soc_component_get_drvdata(component);
+
+ if (enable) {
++ if (tx->active_decimator[dai_id] == dec_id)
++ return 0;
++
+ set_bit(dec_id, &tx->active_ch_mask[dai_id]);
+ tx->active_ch_cnt[dai_id]++;
+ tx->active_decimator[dai_id] = dec_id;
+ } else {
++ if (tx->active_decimator[dai_id] == -1)
++ return 0;
++
+ tx->active_ch_cnt[dai_id]--;
+ clear_bit(dec_id, &tx->active_ch_mask[dai_id]);
+ tx->active_decimator[dai_id] = -1;
+ }
+ snd_soc_dapm_mixer_update_power(widget->dapm, kcontrol, enable, update);
+
+- return 0;
++ return 1;
+ }
+
+ static int tx_macro_enable_dec(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+ {
+ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+- unsigned int decimator;
++ u8 decimator;
+ u16 tx_vol_ctl_reg, dec_cfg_reg, hpf_gate_reg, tx_gain_ctl_reg;
+ u8 hpf_cut_off_freq;
+ int hpf_delay = TX_MACRO_DMIC_HPF_DELAY_MS;
+@@ -1011,9 +1027,12 @@ static int tx_macro_dec_mode_put(struct snd_kcontrol *kcontrol,
+ int path = e->shift_l;
+ struct tx_macro *tx = snd_soc_component_get_drvdata(component);
+
++ if (tx->dec_mode[path] == value)
++ return 0;
++
+ tx->dec_mode[path] = value;
+
+- return 0;
++ return 1;
+ }
+
+ static int tx_macro_get_bcs(struct snd_kcontrol *kcontrol,
+@@ -1044,7 +1063,8 @@ static int tx_macro_hw_params(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+ {
+ struct snd_soc_component *component = dai->component;
+- u32 decimator, sample_rate;
++ u32 sample_rate;
++ u8 decimator;
+ int tx_fs_rate;
+ struct tx_macro *tx = snd_soc_component_get_drvdata(component);
+
+@@ -1108,7 +1128,7 @@ static int tx_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream)
+ {
+ struct snd_soc_component *component = dai->component;
+ struct tx_macro *tx = snd_soc_component_get_drvdata(component);
+- u16 decimator;
++ u8 decimator;
+
+ decimator = tx->active_decimator[dai->id];
+
+@@ -1736,17 +1756,16 @@ static const struct clk_ops swclk_gate_ops = {
+
+ };
+
+-static struct clk *tx_macro_register_mclk_output(struct tx_macro *tx)
++static int tx_macro_register_mclk_output(struct tx_macro *tx)
+ {
+ struct device *dev = tx->dev;
+- struct device_node *np = dev->of_node;
+ const char *parent_clk_name = NULL;
+ const char *clk_name = "lpass-tx-mclk";
+ struct clk_hw *hw;
+ struct clk_init_data init;
+ int ret;
+
+- parent_clk_name = __clk_get_name(tx->clks[2].clk);
++ parent_clk_name = __clk_get_name(tx->mclk);
+
+ init.name = clk_name;
+ init.ops = &swclk_gate_ops;
+@@ -1755,13 +1774,11 @@ static struct clk *tx_macro_register_mclk_output(struct tx_macro *tx)
+ init.num_parents = 1;
+ tx->hw.init = &init;
+ hw = &tx->hw;
+- ret = clk_hw_register(tx->dev, hw);
++ ret = devm_clk_hw_register(dev, hw);
+ if (ret)
+- return ERR_PTR(ret);
+-
+- of_clk_add_provider(np, of_clk_src_simple_get, hw->clk);
++ return ret;
+
+- return NULL;
++ return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
+ }
+
+ static const struct snd_soc_component_driver tx_macro_component_drv = {
+@@ -1786,23 +1803,33 @@ static int tx_macro_probe(struct platform_device *pdev)
+ if (!tx)
+ return -ENOMEM;
+
+- tx->clks[0].id = "macro";
+- tx->clks[1].id = "dcodec";
+- tx->clks[2].id = "mclk";
+- tx->clks[3].id = "npl";
+- tx->clks[4].id = "fsgen";
++ tx->macro = devm_clk_get_optional(dev, "macro");
++ if (IS_ERR(tx->macro))
++ return PTR_ERR(tx->macro);
+
+- ret = devm_clk_bulk_get(dev, TX_NUM_CLKS_MAX, tx->clks);
+- if (ret) {
+- dev_err(dev, "Error getting RX Clocks (%d)\n", ret);
+- return ret;
+- }
++ tx->dcodec = devm_clk_get_optional(dev, "dcodec");
++ if (IS_ERR(tx->dcodec))
++ return PTR_ERR(tx->dcodec);
++
++ tx->mclk = devm_clk_get(dev, "mclk");
++ if (IS_ERR(tx->mclk))
++ return PTR_ERR(tx->mclk);
++
++ tx->npl = devm_clk_get(dev, "npl");
++ if (IS_ERR(tx->npl))
++ return PTR_ERR(tx->npl);
++
++ tx->fsgen = devm_clk_get(dev, "fsgen");
++ if (IS_ERR(tx->fsgen))
++ return PTR_ERR(tx->fsgen);
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ tx->regmap = devm_regmap_init_mmio(dev, base, &tx_regmap_config);
++ if (IS_ERR(tx->regmap))
++ return PTR_ERR(tx->regmap);
+
+ dev_set_drvdata(dev, tx);
+
+@@ -1810,24 +1837,52 @@ static int tx_macro_probe(struct platform_device *pdev)
+ tx->dev = dev;
+
+ /* set MCLK and NPL rates */
+- clk_set_rate(tx->clks[2].clk, MCLK_FREQ);
+- clk_set_rate(tx->clks[3].clk, 2 * MCLK_FREQ);
++ clk_set_rate(tx->mclk, MCLK_FREQ);
++ clk_set_rate(tx->npl, MCLK_FREQ);
+
+- ret = clk_bulk_prepare_enable(TX_NUM_CLKS_MAX, tx->clks);
++ ret = clk_prepare_enable(tx->macro);
+ if (ret)
+- return ret;
++ goto err;
++
++ ret = clk_prepare_enable(tx->dcodec);
++ if (ret)
++ goto err_dcodec;
++
++ ret = clk_prepare_enable(tx->mclk);
++ if (ret)
++ goto err_mclk;
++
++ ret = clk_prepare_enable(tx->npl);
++ if (ret)
++ goto err_npl;
++
++ ret = clk_prepare_enable(tx->fsgen);
++ if (ret)
++ goto err_fsgen;
+
+- tx_macro_register_mclk_output(tx);
++ ret = tx_macro_register_mclk_output(tx);
++ if (ret)
++ goto err_clkout;
+
+ ret = devm_snd_soc_register_component(dev, &tx_macro_component_drv,
+ tx_macro_dai,
+ ARRAY_SIZE(tx_macro_dai));
+ if (ret)
+- goto err;
+- return ret;
+-err:
+- clk_bulk_disable_unprepare(TX_NUM_CLKS_MAX, tx->clks);
++ goto err_clkout;
+
++ return 0;
++
++err_clkout:
++ clk_disable_unprepare(tx->fsgen);
++err_fsgen:
++ clk_disable_unprepare(tx->npl);
++err_npl:
++ clk_disable_unprepare(tx->mclk);
++err_mclk:
++ clk_disable_unprepare(tx->dcodec);
++err_dcodec:
++ clk_disable_unprepare(tx->macro);
++err:
+ return ret;
+ }
+
+@@ -1835,9 +1890,11 @@ static int tx_macro_remove(struct platform_device *pdev)
+ {
+ struct tx_macro *tx = dev_get_drvdata(&pdev->dev);
+
+- of_clk_del_provider(pdev->dev.of_node);
+-
+- clk_bulk_disable_unprepare(TX_NUM_CLKS_MAX, tx->clks);
++ clk_disable_unprepare(tx->macro);
++ clk_disable_unprepare(tx->dcodec);
++ clk_disable_unprepare(tx->mclk);
++ clk_disable_unprepare(tx->npl);
++ clk_disable_unprepare(tx->fsgen);
+
+ return 0;
+ }
+diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c
+index 56c93f4465c9c..9b9bae9b92be1 100644
+--- a/sound/soc/codecs/lpass-va-macro.c
++++ b/sound/soc/codecs/lpass-va-macro.c
+@@ -780,7 +780,7 @@ static int va_macro_dec_mode_get(struct snd_kcontrol *kcontrol,
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ int path = e->shift_l;
+
+- ucontrol->value.integer.value[0] = va->dec_mode[path];
++ ucontrol->value.enumerated.item[0] = va->dec_mode[path];
+
+ return 0;
+ }
+@@ -789,7 +789,7 @@ static int va_macro_dec_mode_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+- int value = ucontrol->value.integer.value[0];
++ int value = ucontrol->value.enumerated.item[0];
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ int path = e->shift_l;
+ struct va_macro *va = snd_soc_component_get_drvdata(comp);
+@@ -1408,7 +1408,7 @@ static int va_macro_probe(struct platform_device *pdev)
+ va->clks[1].id = "dcodec";
+ va->clks[2].id = "mclk";
+
+- ret = devm_clk_bulk_get(dev, VA_NUM_CLKS_MAX, va->clks);
++ ret = devm_clk_bulk_get_optional(dev, VA_NUM_CLKS_MAX, va->clks);
+ if (ret) {
+ dev_err(dev, "Error getting VA Clocks (%d)\n", ret);
+ return ret;
+diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c
+index d3ac318fd6b6a..dd1a8b7bc794c 100644
+--- a/sound/soc/codecs/lpass-wsa-macro.c
++++ b/sound/soc/codecs/lpass-wsa-macro.c
+@@ -2405,6 +2405,8 @@ static int wsa_macro_probe(struct platform_device *pdev)
+ return PTR_ERR(base);
+
+ wsa->regmap = devm_regmap_init_mmio(dev, base, &wsa_regmap_config);
++ if (IS_ERR(wsa->regmap))
++ return PTR_ERR(wsa->regmap);
+
+ dev_set_drvdata(dev, wsa);
+
+diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c
+index f4ed7e04673fd..fd4fa1d5d2d12 100644
+--- a/sound/soc/codecs/madera.c
++++ b/sound/soc/codecs/madera.c
+@@ -618,7 +618,13 @@ int madera_out1_demux_put(struct snd_kcontrol *kcontrol,
+ end:
+ snd_soc_dapm_mutex_unlock(dapm);
+
+- return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
++ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
++ if (ret < 0) {
++ dev_err(madera->dev, "Failed to update demux power state: %d\n", ret);
++ return ret;
++ }
++
++ return change;
+ }
+ EXPORT_SYMBOL_GPL(madera_out1_demux_put);
+
+@@ -893,7 +899,7 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol,
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ const int adsp_num = e->shift_l;
+ const unsigned int item = ucontrol->value.enumerated.item[0];
+- int ret;
++ int ret = 0;
+
+ if (item >= e->items)
+ return -EINVAL;
+@@ -910,10 +916,10 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol,
+ "Cannot change '%s' while in use by active audio paths\n",
+ kcontrol->id.name);
+ ret = -EBUSY;
+- } else {
++ } else if (priv->adsp_rate_cache[adsp_num] != e->values[item]) {
+ /* Volatile register so defer until the codec is powered up */
+ priv->adsp_rate_cache[adsp_num] = e->values[item];
+- ret = 0;
++ ret = 1;
+ }
+
+ mutex_unlock(&priv->rate_lock);
+diff --git a/sound/soc/codecs/max9759.c b/sound/soc/codecs/max9759.c
+index 00e9d4fd1651f..0c261335c8a16 100644
+--- a/sound/soc/codecs/max9759.c
++++ b/sound/soc/codecs/max9759.c
+@@ -64,7 +64,8 @@ static int speaker_gain_control_put(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *c = snd_soc_kcontrol_component(kcontrol);
+ struct max9759 *priv = snd_soc_component_get_drvdata(c);
+
+- if (ucontrol->value.integer.value[0] > 3)
++ if (ucontrol->value.integer.value[0] < 0 ||
++ ucontrol->value.integer.value[0] > 3)
+ return -EINVAL;
+
+ priv->gain = ucontrol->value.integer.value[0];
+diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
+index b45ec35cd63c3..5513acd360b8f 100644
+--- a/sound/soc/codecs/max98090.c
++++ b/sound/soc/codecs/max98090.c
+@@ -393,7 +393,8 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol,
+ struct soc_mixer_control *mc =
+ (struct soc_mixer_control *)kcontrol->private_value;
+ unsigned int mask = (1 << fls(mc->max)) - 1;
+- unsigned int sel = ucontrol->value.integer.value[0];
++ int sel_unchecked = ucontrol->value.integer.value[0];
++ unsigned int sel;
+ unsigned int val = snd_soc_component_read(component, mc->reg);
+ unsigned int *select;
+
+@@ -413,6 +414,10 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol,
+
+ val = (val >> mc->shift) & mask;
+
++ if (sel_unchecked < 0 || sel_unchecked > mc->max)
++ return -EINVAL;
++ sel = sel_unchecked;
++
+ *select = sel;
+
+ /* Setting a volume is only valid if it is already On */
+@@ -427,7 +432,7 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol,
+ mask << mc->shift,
+ sel << mc->shift);
+
+- return 0;
++ return *select != val;
+ }
+
+ static const char *max98090_perf_pwr_text[] =
+diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c
+index ddb6436835d73..68497a4521dd2 100644
+--- a/sound/soc/codecs/max98373-i2c.c
++++ b/sound/soc/codecs/max98373-i2c.c
+@@ -551,6 +551,10 @@ static int max98373_i2c_probe(struct i2c_client *i2c,
+ max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num,
+ sizeof(*max98373->cache),
+ GFP_KERNEL);
++ if (!max98373->cache) {
++ ret = -ENOMEM;
++ return ret;
++ }
+
+ for (i = 0; i < max98373->cache_num; i++)
+ max98373->cache[i].reg = max98373_i2c_cache_reg[i];
+diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c
+index dc520effc61cb..97b64477dde67 100644
+--- a/sound/soc/codecs/max98373-sdw.c
++++ b/sound/soc/codecs/max98373-sdw.c
+@@ -741,7 +741,7 @@ static int max98373_sdw_set_tdm_slot(struct snd_soc_dai *dai,
+ static const struct snd_soc_dai_ops max98373_dai_sdw_ops = {
+ .hw_params = max98373_sdw_dai_hw_params,
+ .hw_free = max98373_pcm_hw_free,
+- .set_sdw_stream = max98373_set_sdw_stream,
++ .set_stream = max98373_set_sdw_stream,
+ .shutdown = max98373_shutdown,
+ .set_tdm_slot = max98373_sdw_set_tdm_slot,
+ };
+@@ -862,6 +862,16 @@ static int max98373_sdw_probe(struct sdw_slave *slave,
+ return max98373_init(slave, regmap);
+ }
+
++static int max98373_sdw_remove(struct sdw_slave *slave)
++{
++ struct max98373_priv *max98373 = dev_get_drvdata(&slave->dev);
++
++ if (max98373->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
++ return 0;
++}
++
+ #if defined(CONFIG_OF)
+ static const struct of_device_id max98373_of_match[] = {
+ { .compatible = "maxim,max98373", },
+@@ -893,7 +903,7 @@ static struct sdw_driver max98373_sdw_driver = {
+ .pm = &max98373_pm,
+ },
+ .probe = max98373_sdw_probe,
+- .remove = NULL,
++ .remove = max98373_sdw_remove,
+ .ops = &max98373_slave_ops,
+ .id_table = max98373_id,
+ };
+diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
+index 3ddd822240e3a..971b8360b5b1b 100644
+--- a/sound/soc/codecs/msm8916-wcd-analog.c
++++ b/sound/soc/codecs/msm8916-wcd-analog.c
+@@ -1221,8 +1221,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev)
+ }
+
+ irq = platform_get_irq_byname(pdev, "mbhc_switch_int");
+- if (irq < 0)
+- return irq;
++ if (irq < 0) {
++ ret = irq;
++ goto err_disable_clk;
++ }
+
+ ret = devm_request_threaded_irq(dev, irq, NULL,
+ pm8916_mbhc_switch_irq_handler,
+@@ -1234,8 +1236,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev)
+
+ if (priv->mbhc_btn_enabled) {
+ irq = platform_get_irq_byname(pdev, "mbhc_but_press_det");
+- if (irq < 0)
+- return irq;
++ if (irq < 0) {
++ ret = irq;
++ goto err_disable_clk;
++ }
+
+ ret = devm_request_threaded_irq(dev, irq, NULL,
+ mbhc_btn_press_irq_handler,
+@@ -1246,8 +1250,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev)
+ dev_err(dev, "cannot request mbhc button press irq\n");
+
+ irq = platform_get_irq_byname(pdev, "mbhc_but_rel_det");
+- if (irq < 0)
+- return irq;
++ if (irq < 0) {
++ ret = irq;
++ goto err_disable_clk;
++ }
+
+ ret = devm_request_threaded_irq(dev, irq, NULL,
+ mbhc_btn_release_irq_handler,
+@@ -1264,6 +1270,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev)
+ return devm_snd_soc_register_component(dev, &pm8916_wcd_analog,
+ pm8916_wcd_analog_dai,
+ ARRAY_SIZE(pm8916_wcd_analog_dai));
++
++err_disable_clk:
++ clk_disable_unprepare(priv->mclk);
++ return ret;
+ }
+
+ static int pm8916_wcd_analog_spmi_remove(struct platform_device *pdev)
+diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
+index fcc10c8bc6259..098a58990f07d 100644
+--- a/sound/soc/codecs/msm8916-wcd-digital.c
++++ b/sound/soc/codecs/msm8916-wcd-digital.c
+@@ -328,8 +328,8 @@ static const struct snd_kcontrol_new rx1_mix2_inp1_mux = SOC_DAPM_ENUM(
+ static const struct snd_kcontrol_new rx2_mix2_inp1_mux = SOC_DAPM_ENUM(
+ "RX2 MIX2 INP1 Mux", rx2_mix2_inp1_chain_enum);
+
+-/* Digital Gain control -38.4 dB to +38.4 dB in 0.3 dB steps */
+-static const DECLARE_TLV_DB_SCALE(digital_gain, -3840, 30, 0);
++/* Digital Gain control -84 dB to +40 dB in 1 dB steps */
++static const DECLARE_TLV_DB_SCALE(digital_gain, -8400, 100, -8400);
+
+ /* Cutoff Freq for High Pass Filter at -3dB */
+ static const char * const hpf_cutoff_text[] = {
+@@ -510,15 +510,15 @@ static int wcd_iir_filter_info(struct snd_kcontrol *kcontrol,
+
+ static const struct snd_kcontrol_new msm8916_wcd_digital_snd_controls[] = {
+ SOC_SINGLE_S8_TLV("RX1 Digital Volume", LPASS_CDC_RX1_VOL_CTL_B2_CTL,
+- -128, 127, digital_gain),
++ -84, 40, digital_gain),
+ SOC_SINGLE_S8_TLV("RX2 Digital Volume", LPASS_CDC_RX2_VOL_CTL_B2_CTL,
+- -128, 127, digital_gain),
++ -84, 40, digital_gain),
+ SOC_SINGLE_S8_TLV("RX3 Digital Volume", LPASS_CDC_RX3_VOL_CTL_B2_CTL,
+- -128, 127, digital_gain),
++ -84, 40, digital_gain),
+ SOC_SINGLE_S8_TLV("TX1 Digital Volume", LPASS_CDC_TX1_VOL_CTL_GAIN,
+- -128, 127, digital_gain),
++ -84, 40, digital_gain),
+ SOC_SINGLE_S8_TLV("TX2 Digital Volume", LPASS_CDC_TX2_VOL_CTL_GAIN,
+- -128, 127, digital_gain),
++ -84, 40, digital_gain),
+ SOC_ENUM("TX1 HPF Cutoff", tx1_hpf_cutoff_enum),
+ SOC_ENUM("TX2 HPF Cutoff", tx2_hpf_cutoff_enum),
+ SOC_SINGLE("TX1 HPF Switch", LPASS_CDC_TX1_MUX_CTL, 3, 1, 0),
+@@ -553,22 +553,22 @@ static const struct snd_kcontrol_new msm8916_wcd_digital_snd_controls[] = {
+ WCD_IIR_FILTER_CTL("IIR2 Band3", IIR2, BAND3),
+ WCD_IIR_FILTER_CTL("IIR2 Band4", IIR2, BAND4),
+ WCD_IIR_FILTER_CTL("IIR2 Band5", IIR2, BAND5),
+- SOC_SINGLE_SX_TLV("IIR1 INP1 Volume", LPASS_CDC_IIR1_GAIN_B1_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("IIR1 INP2 Volume", LPASS_CDC_IIR1_GAIN_B2_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("IIR1 INP3 Volume", LPASS_CDC_IIR1_GAIN_B3_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("IIR1 INP4 Volume", LPASS_CDC_IIR1_GAIN_B4_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("IIR2 INP1 Volume", LPASS_CDC_IIR2_GAIN_B1_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("IIR2 INP2 Volume", LPASS_CDC_IIR2_GAIN_B2_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("IIR2 INP3 Volume", LPASS_CDC_IIR2_GAIN_B3_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("IIR2 INP4 Volume", LPASS_CDC_IIR2_GAIN_B4_CTL,
+- 0, -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR1 INP1 Volume", LPASS_CDC_IIR1_GAIN_B1_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR1 INP2 Volume", LPASS_CDC_IIR1_GAIN_B2_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR1 INP3 Volume", LPASS_CDC_IIR1_GAIN_B3_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR1 INP4 Volume", LPASS_CDC_IIR1_GAIN_B4_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR2 INP1 Volume", LPASS_CDC_IIR2_GAIN_B1_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR2 INP2 Volume", LPASS_CDC_IIR2_GAIN_B2_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR2 INP3 Volume", LPASS_CDC_IIR2_GAIN_B3_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("IIR2 INP4 Volume", LPASS_CDC_IIR2_GAIN_B4_CTL,
++ -84, 40, digital_gain),
+
+ };
+
+@@ -1201,14 +1201,24 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev)
+ ret = clk_prepare_enable(priv->mclk);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable mclk %d\n", ret);
+- return ret;
++ goto err_clk;
+ }
+
+ dev_set_drvdata(dev, priv);
+
+- return devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
++ ret = devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
+ msm8916_wcd_digital_dai,
+ ARRAY_SIZE(msm8916_wcd_digital_dai));
++ if (ret)
++ goto err_mclk;
++
++ return 0;
++
++err_mclk:
++ clk_disable_unprepare(priv->mclk);
++err_clk:
++ clk_disable_unprepare(priv->ahbclk);
++ return ret;
+ }
+
+ static int msm8916_wcd_digital_remove(struct platform_device *pdev)
+diff --git a/sound/soc/codecs/mt6358.c b/sound/soc/codecs/mt6358.c
+index 9b263a9a669dc..4c7b5d940799b 100644
+--- a/sound/soc/codecs/mt6358.c
++++ b/sound/soc/codecs/mt6358.c
+@@ -107,6 +107,7 @@ int mt6358_set_mtkaif_protocol(struct snd_soc_component *cmpnt,
+ priv->mtkaif_protocol = mtkaif_protocol;
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_protocol);
+
+ static void playback_gpio_set(struct mt6358_priv *priv)
+ {
+@@ -273,6 +274,7 @@ int mt6358_mtkaif_calibration_enable(struct snd_soc_component *cmpnt)
+ 1 << RG_AUD_PAD_TOP_DAT_MISO_LOOPBACK_SFT);
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_enable);
+
+ int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt)
+ {
+@@ -296,6 +298,7 @@ int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt)
+ capture_gpio_reset(priv);
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_disable);
+
+ int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt,
+ int phase_1, int phase_2)
+@@ -310,6 +313,7 @@ int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt,
+ phase_2 << RG_AUD_PAD_TOP_PHASE_MODE2_SFT);
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_calibration_phase);
+
+ /* dl pga gain */
+ enum {
+diff --git a/sound/soc/codecs/mt6359-accdet.c b/sound/soc/codecs/mt6359-accdet.c
+index 6d3d170144a0a..7f624854948c7 100644
+--- a/sound/soc/codecs/mt6359-accdet.c
++++ b/sound/soc/codecs/mt6359-accdet.c
+@@ -675,6 +675,7 @@ static int mt6359_accdet_parse_dt(struct mt6359_accdet *priv)
+ sizeof(struct three_key_threshold));
+ }
+
++ of_node_put(node);
+ dev_warn(priv->dev, "accdet caps=%x\n", priv->caps);
+
+ return 0;
+@@ -964,7 +965,7 @@ static int mt6359_accdet_probe(struct platform_device *pdev)
+ mutex_init(&priv->res_lock);
+
+ priv->accdet_irq = platform_get_irq(pdev, 0);
+- if (priv->accdet_irq) {
++ if (priv->accdet_irq >= 0) {
+ ret = devm_request_threaded_irq(&pdev->dev, priv->accdet_irq,
+ NULL, mt6359_accdet_irq,
+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+@@ -978,7 +979,7 @@ static int mt6359_accdet_probe(struct platform_device *pdev)
+
+ if (priv->caps & ACCDET_PMIC_EINT0) {
+ priv->accdet_eint0 = platform_get_irq(pdev, 1);
+- if (priv->accdet_eint0) {
++ if (priv->accdet_eint0 >= 0) {
+ ret = devm_request_threaded_irq(&pdev->dev,
+ priv->accdet_eint0,
+ NULL, mt6359_accdet_irq,
+@@ -993,7 +994,7 @@ static int mt6359_accdet_probe(struct platform_device *pdev)
+ }
+ } else if (priv->caps & ACCDET_PMIC_EINT1) {
+ priv->accdet_eint1 = platform_get_irq(pdev, 2);
+- if (priv->accdet_eint1) {
++ if (priv->accdet_eint1 >= 0) {
+ ret = devm_request_threaded_irq(&pdev->dev,
+ priv->accdet_eint1,
+ NULL, mt6359_accdet_irq,
+diff --git a/sound/soc/codecs/mt6359.c b/sound/soc/codecs/mt6359.c
+index 2d6a4a29b8507..cf1febe54bcd9 100644
+--- a/sound/soc/codecs/mt6359.c
++++ b/sound/soc/codecs/mt6359.c
+@@ -2780,6 +2780,7 @@ static int mt6359_parse_dt(struct mt6359_priv *priv)
+
+ ret = of_property_read_u32(np, "mediatek,mic-type-2",
+ &priv->mux_select[MUX_MIC_TYPE_2]);
++ of_node_put(np);
+ if (ret) {
+ dev_info(priv->dev,
+ "%s() failed to read mic-type-2, use default (%d)\n",
+diff --git a/sound/soc/codecs/mt6660.c b/sound/soc/codecs/mt6660.c
+index 358c500377dff..7d7f97b8c7c4f 100644
+--- a/sound/soc/codecs/mt6660.c
++++ b/sound/soc/codecs/mt6660.c
+@@ -510,7 +510,11 @@ static int mt6660_i2c_probe(struct i2c_client *client,
+ ret = devm_snd_soc_register_component(chip->dev,
+ &mt6660_component_driver,
+ &mt6660_codec_dai, 1);
++ if (ret)
++ pm_runtime_disable(chip->dev);
++
+ return ret;
++
+ probe_fail:
+ _mt6660_chip_power_on(chip, 0);
+ mutex_destroy(&chip->io_lock);
+diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c
+index 58123390c7a31..b436e532993d1 100644
+--- a/sound/soc/codecs/nau8822.c
++++ b/sound/soc/codecs/nau8822.c
+@@ -740,6 +740,8 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source,
+ pll_param->pll_int, pll_param->pll_frac,
+ pll_param->mclk_scaler, pll_param->pre_factor);
+
++ snd_soc_component_update_bits(component,
++ NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_OFF);
+ snd_soc_component_update_bits(component,
+ NAU8822_REG_PLL_N, NAU8822_PLLMCLK_DIV2 | NAU8822_PLLN_MASK,
+ (pll_param->pre_factor ? NAU8822_PLLMCLK_DIV2 : 0) |
+@@ -757,6 +759,8 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source,
+ pll_param->mclk_scaler << NAU8822_MCLKSEL_SFT);
+ snd_soc_component_update_bits(component,
+ NAU8822_REG_CLOCKING, NAU8822_CLKM_MASK, NAU8822_CLKM_PLL);
++ snd_soc_component_update_bits(component,
++ NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_ON);
+
+ return 0;
+ }
+diff --git a/sound/soc/codecs/nau8822.h b/sound/soc/codecs/nau8822.h
+index 489191ff187ec..b45d42c15de6b 100644
+--- a/sound/soc/codecs/nau8822.h
++++ b/sound/soc/codecs/nau8822.h
+@@ -90,6 +90,9 @@
+ #define NAU8822_REFIMP_3K 0x3
+ #define NAU8822_IOBUF_EN (0x1 << 2)
+ #define NAU8822_ABIAS_EN (0x1 << 3)
++#define NAU8822_PLL_EN_MASK (0x1 << 5)
++#define NAU8822_PLL_ON (0x1 << 5)
++#define NAU8822_PLL_OFF (0x0 << 5)
+
+ /* NAU8822_REG_AUDIO_INTERFACE (0x4) */
+ #define NAU8822_AIFMT_MASK (0x3 << 3)
+diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c
+index f946ef65a4c19..f7ff130a9485d 100644
+--- a/sound/soc/codecs/nau8824.c
++++ b/sound/soc/codecs/nau8824.c
+@@ -8,6 +8,7 @@
+
+ #include <linux/module.h>
+ #include <linux/delay.h>
++#include <linux/dmi.h>
+ #include <linux/init.h>
+ #include <linux/i2c.h>
+ #include <linux/regmap.h>
+@@ -27,6 +28,12 @@
+
+ #include "nau8824.h"
+
++#define NAU8824_JD_ACTIVE_HIGH BIT(0)
++
++static int nau8824_quirk;
++static int quirk_override = -1;
++module_param_named(quirk, quirk_override, uint, 0444);
++MODULE_PARM_DESC(quirk, "Board-specific quirk override");
+
+ static int nau8824_config_sysclk(struct nau8824 *nau8824,
+ int clk_id, unsigned int freq);
+@@ -1035,6 +1042,7 @@ static int nau8824_hw_params(struct snd_pcm_substream *substream,
+ struct snd_soc_component *component = dai->component;
+ struct nau8824 *nau8824 = snd_soc_component_get_drvdata(component);
+ unsigned int val_len = 0, osr, ctrl_val, bclk_fs, bclk_div;
++ int err = -EINVAL;
+
+ nau8824_sema_acquire(nau8824, HZ);
+
+@@ -1051,7 +1059,7 @@ static int nau8824_hw_params(struct snd_pcm_substream *substream,
+ osr &= NAU8824_DAC_OVERSAMPLE_MASK;
+ if (nau8824_clock_check(nau8824, substream->stream,
+ nau8824->fs, osr))
+- return -EINVAL;
++ goto error;
+ regmap_update_bits(nau8824->regmap, NAU8824_REG_CLK_DIVIDER,
+ NAU8824_CLK_DAC_SRC_MASK,
+ osr_dac_sel[osr].clk_src << NAU8824_CLK_DAC_SRC_SFT);
+@@ -1061,7 +1069,7 @@ static int nau8824_hw_params(struct snd_pcm_substream *substream,
+ osr &= NAU8824_ADC_SYNC_DOWN_MASK;
+ if (nau8824_clock_check(nau8824, substream->stream,
+ nau8824->fs, osr))
+- return -EINVAL;
++ goto error;
+ regmap_update_bits(nau8824->regmap, NAU8824_REG_CLK_DIVIDER,
+ NAU8824_CLK_ADC_SRC_MASK,
+ osr_adc_sel[osr].clk_src << NAU8824_CLK_ADC_SRC_SFT);
+@@ -1082,7 +1090,7 @@ static int nau8824_hw_params(struct snd_pcm_substream *substream,
+ else if (bclk_fs <= 256)
+ bclk_div = 0;
+ else
+- return -EINVAL;
++ goto error;
+ regmap_update_bits(nau8824->regmap,
+ NAU8824_REG_PORT0_I2S_PCM_CTRL_2,
+ NAU8824_I2S_LRC_DIV_MASK | NAU8824_I2S_BLK_DIV_MASK,
+@@ -1103,15 +1111,17 @@ static int nau8824_hw_params(struct snd_pcm_substream *substream,
+ val_len |= NAU8824_I2S_DL_32;
+ break;
+ default:
+- return -EINVAL;
++ goto error;
+ }
+
+ regmap_update_bits(nau8824->regmap, NAU8824_REG_PORT0_I2S_PCM_CTRL_1,
+ NAU8824_I2S_DL_MASK, val_len);
++ err = 0;
+
++ error:
+ nau8824_sema_release(nau8824);
+
+- return 0;
++ return err;
+ }
+
+ static int nau8824_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+@@ -1120,8 +1130,6 @@ static int nau8824_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ struct nau8824 *nau8824 = snd_soc_component_get_drvdata(component);
+ unsigned int ctrl1_val = 0, ctrl2_val = 0;
+
+- nau8824_sema_acquire(nau8824, HZ);
+-
+ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+ case SND_SOC_DAIFMT_CBM_CFM:
+ ctrl2_val |= NAU8824_I2S_MS_MASTER;
+@@ -1163,6 +1171,8 @@ static int nau8824_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ return -EINVAL;
+ }
+
++ nau8824_sema_acquire(nau8824, HZ);
++
+ regmap_update_bits(nau8824->regmap, NAU8824_REG_PORT0_I2S_PCM_CTRL_1,
+ NAU8824_I2S_DF_MASK | NAU8824_I2S_BP_MASK |
+ NAU8824_I2S_PCMB_EN, ctrl1_val);
+@@ -1845,6 +1855,58 @@ static int nau8824_read_device_properties(struct device *dev,
+ return 0;
+ }
+
++/* Please keep this list alphabetically sorted */
++static const struct dmi_system_id nau8824_quirk_table[] = {
++ {
++ /* Cyberbook T116 rugged tablet */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"),
++ DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "20170531"),
++ },
++ .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH),
++ },
++ {
++ /* Positivo CW14Q01P */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
++ DMI_MATCH(DMI_BOARD_NAME, "CW14Q01P"),
++ },
++ .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH),
++ },
++ {
++ /* Positivo K1424G */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
++ DMI_MATCH(DMI_BOARD_NAME, "K1424G"),
++ },
++ .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH),
++ },
++ {
++ /* Positivo N14ZP74G */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
++ DMI_MATCH(DMI_BOARD_NAME, "N14ZP74G"),
++ },
++ .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH),
++ },
++ {}
++};
++
++static void nau8824_check_quirks(void)
++{
++ const struct dmi_system_id *dmi_id;
++
++ if (quirk_override != -1) {
++ nau8824_quirk = quirk_override;
++ return;
++ }
++
++ dmi_id = dmi_first_match(nau8824_quirk_table);
++ if (dmi_id)
++ nau8824_quirk = (unsigned long)dmi_id->driver_data;
++}
++
+ static int nau8824_i2c_probe(struct i2c_client *i2c,
+ const struct i2c_device_id *id)
+ {
+@@ -1869,6 +1931,11 @@ static int nau8824_i2c_probe(struct i2c_client *i2c,
+ nau8824->irq = i2c->irq;
+ sema_init(&nau8824->jd_sem, 1);
+
++ nau8824_check_quirks();
++
++ if (nau8824_quirk & NAU8824_JD_ACTIVE_HIGH)
++ nau8824->jkdet_polarity = 0;
++
+ nau8824_print_device_properties(nau8824);
+
+ ret = regmap_read(nau8824->regmap, NAU8824_REG_I2C_DEVICE_ID, &value);
+diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c
+index 60dee41816dc2..1c26577f08ee0 100644
+--- a/sound/soc/codecs/pcm512x.c
++++ b/sound/soc/codecs/pcm512x.c
+@@ -1635,7 +1635,7 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap)
+ if (val > 6) {
+ dev_err(dev, "Invalid pll-in\n");
+ ret = -EINVAL;
+- goto err_clk;
++ goto err_pm;
+ }
+ pcm512x->pll_in = val;
+ }
+@@ -1644,7 +1644,7 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap)
+ if (val > 6) {
+ dev_err(dev, "Invalid pll-out\n");
+ ret = -EINVAL;
+- goto err_clk;
++ goto err_pm;
+ }
+ pcm512x->pll_out = val;
+ }
+@@ -1653,12 +1653,12 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap)
+ dev_err(dev,
+ "Error: both pll-in and pll-out, or none\n");
+ ret = -EINVAL;
+- goto err_clk;
++ goto err_pm;
+ }
+ if (pcm512x->pll_in && pcm512x->pll_in == pcm512x->pll_out) {
+ dev_err(dev, "Error: pll-in == pll-out\n");
+ ret = -EINVAL;
+- goto err_clk;
++ goto err_pm;
+ }
+ }
+ #endif
+diff --git a/sound/soc/codecs/rk3328_codec.c b/sound/soc/codecs/rk3328_codec.c
+index 758d439e8c7a5..86b679cf7aef9 100644
+--- a/sound/soc/codecs/rk3328_codec.c
++++ b/sound/soc/codecs/rk3328_codec.c
+@@ -481,7 +481,7 @@ static int rk3328_platform_probe(struct platform_device *pdev)
+ ret = clk_prepare_enable(rk3328->pclk);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "failed to enable acodec pclk\n");
+- return ret;
++ goto err_unprepare_mclk;
+ }
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+diff --git a/sound/soc/codecs/rk817_codec.c b/sound/soc/codecs/rk817_codec.c
+index 943d7d933e81b..cce6f4e7992f5 100644
+--- a/sound/soc/codecs/rk817_codec.c
++++ b/sound/soc/codecs/rk817_codec.c
+@@ -489,7 +489,7 @@ static int rk817_platform_probe(struct platform_device *pdev)
+
+ rk817_codec_parse_dt_property(&pdev->dev, rk817_codec_data);
+
+- rk817_codec_data->mclk = clk_get(pdev->dev.parent, "mclk");
++ rk817_codec_data->mclk = devm_clk_get(pdev->dev.parent, "mclk");
+ if (IS_ERR(rk817_codec_data->mclk)) {
+ dev_dbg(&pdev->dev, "Unable to get mclk\n");
+ ret = -ENXIO;
+@@ -508,12 +508,14 @@ static int rk817_platform_probe(struct platform_device *pdev)
+ if (ret < 0) {
+ dev_err(&pdev->dev, "%s() register codec error %d\n",
+ __func__, ret);
+- goto err_;
++ goto err_clk;
+ }
+
+ return 0;
+-err_:
+
++err_clk:
++ clk_disable_unprepare(rk817_codec_data->mclk);
++err_:
+ return ret;
+ }
+
+@@ -539,3 +541,4 @@ module_platform_driver(rk817_codec_driver);
+ MODULE_DESCRIPTION("ASoC RK817 codec driver");
+ MODULE_AUTHOR("binyuan <kevan.lan@rock-chips.com>");
+ MODULE_LICENSE("GPL v2");
++MODULE_ALIAS("platform:rk817-codec");
+diff --git a/sound/soc/codecs/rt1019.c b/sound/soc/codecs/rt1019.c
+index 8c0b00242bb87..56e7c7ee98fd0 100644
+--- a/sound/soc/codecs/rt1019.c
++++ b/sound/soc/codecs/rt1019.c
+@@ -391,18 +391,18 @@ static int rt1019_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+ unsigned int rx_mask, int slots, int slot_width)
+ {
+ struct snd_soc_component *component = dai->component;
+- unsigned int val = 0, rx_slotnum;
++ unsigned int cn = 0, cl = 0, rx_slotnum;
+ int ret = 0, first_bit;
+
+ switch (slots) {
+ case 4:
+- val |= RT1019_I2S_TX_4CH;
++ cn = RT1019_I2S_TX_4CH;
+ break;
+ case 6:
+- val |= RT1019_I2S_TX_6CH;
++ cn = RT1019_I2S_TX_6CH;
+ break;
+ case 8:
+- val |= RT1019_I2S_TX_8CH;
++ cn = RT1019_I2S_TX_8CH;
+ break;
+ case 2:
+ break;
+@@ -412,16 +412,16 @@ static int rt1019_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+
+ switch (slot_width) {
+ case 20:
+- val |= RT1019_I2S_DL_20;
++ cl = RT1019_TDM_CL_20;
+ break;
+ case 24:
+- val |= RT1019_I2S_DL_24;
++ cl = RT1019_TDM_CL_24;
+ break;
+ case 32:
+- val |= RT1019_I2S_DL_32;
++ cl = RT1019_TDM_CL_32;
+ break;
+ case 8:
+- val |= RT1019_I2S_DL_8;
++ cl = RT1019_TDM_CL_8;
+ break;
+ case 16:
+ break;
+@@ -470,8 +470,10 @@ static int rt1019_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+ goto _set_tdm_err_;
+ }
+
++ snd_soc_component_update_bits(component, RT1019_TDM_1,
++ RT1019_TDM_CL_MASK, cl);
+ snd_soc_component_update_bits(component, RT1019_TDM_2,
+- RT1019_I2S_CH_TX_MASK | RT1019_I2S_DF_MASK, val);
++ RT1019_I2S_CH_TX_MASK, cn);
+
+ _set_tdm_err_:
+ return ret;
+diff --git a/sound/soc/codecs/rt1019.h b/sound/soc/codecs/rt1019.h
+index 64df831eeb720..48ba15efb48dd 100644
+--- a/sound/soc/codecs/rt1019.h
++++ b/sound/soc/codecs/rt1019.h
+@@ -95,6 +95,12 @@
+ #define RT1019_TDM_BCLK_MASK (0x1 << 6)
+ #define RT1019_TDM_BCLK_NORM (0x0 << 6)
+ #define RT1019_TDM_BCLK_INV (0x1 << 6)
++#define RT1019_TDM_CL_MASK (0x7)
++#define RT1019_TDM_CL_8 (0x4)
++#define RT1019_TDM_CL_32 (0x3)
++#define RT1019_TDM_CL_24 (0x2)
++#define RT1019_TDM_CL_20 (0x1)
++#define RT1019_TDM_CL_16 (0x0)
+
+ /* 0x0401 TDM Control-2 */
+ #define RT1019_I2S_CH_TX_MASK (0x3 << 6)
+diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c
+index f716668de6400..03adf3324b81d 100644
+--- a/sound/soc/codecs/rt1308-sdw.c
++++ b/sound/soc/codecs/rt1308-sdw.c
+@@ -613,7 +613,7 @@ static const struct snd_soc_component_driver soc_component_sdw_rt1308 = {
+ static const struct snd_soc_dai_ops rt1308_aif_dai_ops = {
+ .hw_params = rt1308_sdw_hw_params,
+ .hw_free = rt1308_sdw_pcm_hw_free,
+- .set_sdw_stream = rt1308_set_sdw_stream,
++ .set_stream = rt1308_set_sdw_stream,
+ .shutdown = rt1308_sdw_shutdown,
+ .set_tdm_slot = rt1308_sdw_set_tdm_slot,
+ };
+@@ -683,6 +683,16 @@ static int rt1308_sdw_probe(struct sdw_slave *slave,
+ return 0;
+ }
+
++static int rt1308_sdw_remove(struct sdw_slave *slave)
++{
++ struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(&slave->dev);
++
++ if (rt1308->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
++ return 0;
++}
++
+ static const struct sdw_device_id rt1308_id[] = {
+ SDW_SLAVE_ENTRY_EXT(0x025d, 0x1308, 0x2, 0, 0),
+ {},
+@@ -742,6 +752,7 @@ static struct sdw_driver rt1308_sdw_driver = {
+ .pm = &rt1308_pm,
+ },
+ .probe = rt1308_sdw_probe,
++ .remove = rt1308_sdw_remove,
+ .ops = &rt1308_slave_ops,
+ .id_table = rt1308_id,
+ };
+diff --git a/sound/soc/codecs/rt1308-sdw.h b/sound/soc/codecs/rt1308-sdw.h
+index c5ce75666dcc8..98293d73ebabc 100644
+--- a/sound/soc/codecs/rt1308-sdw.h
++++ b/sound/soc/codecs/rt1308-sdw.h
+@@ -139,9 +139,11 @@ static const struct reg_default rt1308_reg_defaults[] = {
+ { 0x3005, 0x23 },
+ { 0x3008, 0x02 },
+ { 0x300a, 0x00 },
++ { 0xc000 | (RT1308_DATA_PATH << 4), 0x00 },
+ { 0xc003 | (RT1308_DAC_SET << 4), 0x00 },
+ { 0xc001 | (RT1308_POWER << 4), 0x00 },
+ { 0xc002 | (RT1308_POWER << 4), 0x00 },
++ { 0xc000 | (RT1308_POWER_STATUS << 4), 0x00 },
+ };
+
+ #define RT1308_SDW_OFFSET 0xc000
+diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c
+index 09b4914bba1bf..1e04aa8ab1666 100644
+--- a/sound/soc/codecs/rt1316-sdw.c
++++ b/sound/soc/codecs/rt1316-sdw.c
+@@ -602,7 +602,7 @@ static const struct snd_soc_component_driver soc_component_sdw_rt1316 = {
+ static const struct snd_soc_dai_ops rt1316_aif_dai_ops = {
+ .hw_params = rt1316_sdw_hw_params,
+ .hw_free = rt1316_sdw_pcm_hw_free,
+- .set_sdw_stream = rt1316_set_sdw_stream,
++ .set_stream = rt1316_set_sdw_stream,
+ .shutdown = rt1316_sdw_shutdown,
+ };
+
+@@ -675,6 +675,16 @@ static int rt1316_sdw_probe(struct sdw_slave *slave,
+ return rt1316_sdw_init(&slave->dev, regmap, slave);
+ }
+
++static int rt1316_sdw_remove(struct sdw_slave *slave)
++{
++ struct rt1316_sdw_priv *rt1316 = dev_get_drvdata(&slave->dev);
++
++ if (rt1316->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
++ return 0;
++}
++
+ static const struct sdw_device_id rt1316_id[] = {
+ SDW_SLAVE_ENTRY_EXT(0x025d, 0x1316, 0x3, 0x1, 0),
+ {},
+@@ -734,6 +744,7 @@ static struct sdw_driver rt1316_sdw_driver = {
+ .pm = &rt1316_pm,
+ },
+ .probe = rt1316_sdw_probe,
++ .remove = rt1316_sdw_remove,
+ .ops = &rt1316_slave_ops,
+ .id_table = rt1316_id,
+ };
+diff --git a/sound/soc/codecs/rt298.c b/sound/soc/codecs/rt298.c
+index c592c40a7ab35..604754e4b29ff 100644
+--- a/sound/soc/codecs/rt298.c
++++ b/sound/soc/codecs/rt298.c
+@@ -1173,6 +1173,13 @@ static const struct dmi_system_id force_combo_jack_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Geminilake")
+ }
+ },
++ {
++ .ident = "Intel Kabylake R RVP",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Kabylake Client platform")
++ }
++ },
+ { }
+ };
+
+diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c
+index 4b1ad5054e8d1..c011bd04153ae 100644
+--- a/sound/soc/codecs/rt5514.c
++++ b/sound/soc/codecs/rt5514.c
+@@ -419,7 +419,7 @@ static int rt5514_dsp_voice_wake_up_put(struct snd_kcontrol *kcontrol,
+ }
+ }
+
+- return 0;
++ return 1;
+ }
+
+ static const struct snd_kcontrol_new rt5514_snd_controls[] = {
+diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
+index 9408ee63cb268..8ea6d43335626 100644
+--- a/sound/soc/codecs/rt5645.c
++++ b/sound/soc/codecs/rt5645.c
+@@ -4154,9 +4154,14 @@ static int rt5645_i2c_remove(struct i2c_client *i2c)
+ if (i2c->irq)
+ free_irq(i2c->irq, rt5645);
+
++ /*
++ * Since the rt5645_btn_check_callback() can queue jack_detect_work,
++ * the timer need to be delted first
++ */
++ del_timer_sync(&rt5645->btn_check_timer);
++
+ cancel_delayed_work_sync(&rt5645->jack_detect_work);
+ cancel_delayed_work_sync(&rt5645->rcclock_work);
+- del_timer_sync(&rt5645->btn_check_timer);
+
+ regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies);
+
+diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c
+index fc0c83b73f099..93820561b9f5d 100644
+--- a/sound/soc/codecs/rt5651.c
++++ b/sound/soc/codecs/rt5651.c
+@@ -2261,11 +2261,8 @@ static int rt5651_i2c_probe(struct i2c_client *i2c,
+
+ ret = devm_request_irq(&i2c->dev, rt5651->irq, rt5651_irq,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
+- | IRQF_ONESHOT, "rt5651", rt5651);
+- if (ret == 0) {
+- /* Gets re-enabled by rt5651_set_jack() */
+- disable_irq(rt5651->irq);
+- } else {
++ | IRQF_ONESHOT | IRQF_NO_AUTOEN, "rt5651", rt5651);
++ if (ret) {
+ dev_warn(&i2c->dev, "Failed to reguest IRQ %d: %d\n",
+ rt5651->irq, ret);
+ rt5651->irq = -ENXIO;
+diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c
+index be9fc58ff6812..4aba6e106ee46 100644
+--- a/sound/soc/codecs/rt5663.c
++++ b/sound/soc/codecs/rt5663.c
+@@ -3461,6 +3461,7 @@ static void rt5663_calibrate(struct rt5663_priv *rt5663)
+ static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev)
+ {
+ int table_size;
++ int ret;
+
+ device_property_read_u32(dev, "realtek,dc_offset_l_manual",
+ &rt5663->pdata.dc_offset_l_manual);
+@@ -3477,9 +3478,13 @@ static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev)
+ table_size = sizeof(struct impedance_mapping_table) *
+ rt5663->pdata.impedance_sensing_num;
+ rt5663->imp_table = devm_kzalloc(dev, table_size, GFP_KERNEL);
+- device_property_read_u32_array(dev,
++ if (!rt5663->imp_table)
++ return -ENOMEM;
++ ret = device_property_read_u32_array(dev,
+ "realtek,impedance_sensing_table",
+ (u32 *)rt5663->imp_table, table_size);
++ if (ret)
++ return ret;
+ }
+
+ return 0;
+@@ -3504,8 +3509,11 @@ static int rt5663_i2c_probe(struct i2c_client *i2c,
+
+ if (pdata)
+ rt5663->pdata = *pdata;
+- else
+- rt5663_parse_dp(rt5663, &i2c->dev);
++ else {
++ ret = rt5663_parse_dp(rt5663, &i2c->dev);
++ if (ret)
++ return ret;
++ }
+
+ for (i = 0; i < ARRAY_SIZE(rt5663->supplies); i++)
+ rt5663->supplies[i].supply = rt5663_supply_names[i];
+diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
+index e59323fd5bf24..5e00aca0c418a 100644
+--- a/sound/soc/codecs/rt5665.c
++++ b/sound/soc/codecs/rt5665.c
+@@ -4472,6 +4472,8 @@ static void rt5665_remove(struct snd_soc_component *component)
+ struct rt5665_priv *rt5665 = snd_soc_component_get_drvdata(component);
+
+ regmap_write(rt5665->regmap, RT5665_RESET, 0);
++
++ regulator_bulk_disable(ARRAY_SIZE(rt5665->supplies), rt5665->supplies);
+ }
+
+ #ifdef CONFIG_PM
+diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c
+index 6ab1a8bc37352..1186ceb5a978e 100644
+--- a/sound/soc/codecs/rt5668.c
++++ b/sound/soc/codecs/rt5668.c
+@@ -1022,11 +1022,13 @@ static void rt5668_jack_detect_handler(struct work_struct *work)
+ container_of(work, struct rt5668_priv, jack_detect_work.work);
+ int val, btn_type;
+
+- while (!rt5668->component)
+- usleep_range(10000, 15000);
+-
+- while (!rt5668->component->card->instantiated)
+- usleep_range(10000, 15000);
++ if (!rt5668->component || !rt5668->component->card ||
++ !rt5668->component->card->instantiated) {
++ /* card not yet ready, try later */
++ mod_delayed_work(system_power_efficient_wq,
++ &rt5668->jack_detect_work, msecs_to_jiffies(15));
++ return;
++ }
+
+ mutex_lock(&rt5668->calibrate_mutex);
+
+diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c
+index ecbaf129a6e3e..51b385575a5cc 100644
+--- a/sound/soc/codecs/rt5670.c
++++ b/sound/soc/codecs/rt5670.c
+@@ -3313,8 +3313,6 @@ static int rt5670_i2c_probe(struct i2c_client *i2c,
+ if (ret < 0)
+ goto err;
+
+- pm_runtime_put(&i2c->dev);
+-
+ return 0;
+ err:
+ pm_runtime_disable(&i2c->dev);
+diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c
+index b9d5d7a0975b3..efb0140512452 100644
+--- a/sound/soc/codecs/rt5682-i2c.c
++++ b/sound/soc/codecs/rt5682-i2c.c
+@@ -59,18 +59,12 @@ static void rt5682_jd_check_handler(struct work_struct *work)
+ struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv,
+ jd_check_work.work);
+
+- if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
+- & RT5682_JDH_RS_MASK) {
++ if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) & RT5682_JDH_RS_MASK)
+ /* jack out */
+- rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0);
+-
+- snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
+- SND_JACK_HEADSET |
+- SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+- SND_JACK_BTN_2 | SND_JACK_BTN_3);
+- } else {
++ mod_delayed_work(system_power_efficient_wq,
++ &rt5682->jack_detect_work, 0);
++ else
+ schedule_delayed_work(&rt5682->jd_check_work, 500);
+- }
+ }
+
+ static irqreturn_t rt5682_irq(int irq, void *data)
+@@ -139,6 +133,8 @@ static int rt5682_i2c_probe(struct i2c_client *i2c,
+
+ i2c_set_clientdata(i2c, rt5682);
+
++ rt5682->i2c_dev = &i2c->dev;
++
+ rt5682->pdata = i2s_default_platform_data;
+
+ if (pdata)
+@@ -272,10 +268,32 @@ static int rt5682_i2c_probe(struct i2c_client *i2c,
+ ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+ rt5682_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
+ | IRQF_ONESHOT, "rt5682", rt5682);
+- if (ret)
++ if (!ret)
++ rt5682->irq = i2c->irq;
++ else
+ dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret);
+ }
+
++#ifdef CONFIG_COMMON_CLK
++ /* Check if MCLK provided */
++ rt5682->mclk = devm_clk_get(&i2c->dev, "mclk");
++ if (IS_ERR(rt5682->mclk)) {
++ if (PTR_ERR(rt5682->mclk) != -ENOENT) {
++ ret = PTR_ERR(rt5682->mclk);
++ return ret;
++ }
++ rt5682->mclk = NULL;
++ }
++
++ /* Register CCF DAI clock control */
++ ret = rt5682_register_dai_clks(rt5682);
++ if (ret)
++ return ret;
++
++ /* Initial setup for CCF */
++ rt5682->lrck[RT5682_AIF1] = 48000;
++#endif
++
+ return devm_snd_soc_register_component(&i2c->dev,
+ &rt5682_soc_component_dev,
+ rt5682_dai, ARRAY_SIZE(rt5682_dai));
+diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
+index 31a4f286043e4..9fdd9afe00da4 100644
+--- a/sound/soc/codecs/rt5682-sdw.c
++++ b/sound/soc/codecs/rt5682-sdw.c
+@@ -272,7 +272,7 @@ static int rt5682_sdw_hw_free(struct snd_pcm_substream *substream,
+ static const struct snd_soc_dai_ops rt5682_sdw_ops = {
+ .hw_params = rt5682_sdw_hw_params,
+ .hw_free = rt5682_sdw_hw_free,
+- .set_sdw_stream = rt5682_set_sdw_stream,
++ .set_stream = rt5682_set_sdw_stream,
+ .shutdown = rt5682_sdw_shutdown,
+ };
+
+@@ -719,9 +719,12 @@ static int rt5682_sdw_remove(struct sdw_slave *slave)
+ {
+ struct rt5682_priv *rt5682 = dev_get_drvdata(&slave->dev);
+
+- if (rt5682 && rt5682->hw_init)
++ if (rt5682->hw_init)
+ cancel_delayed_work_sync(&rt5682->jack_detect_work);
+
++ if (rt5682->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
+ return 0;
+ }
+
+@@ -783,8 +786,15 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
+ if (!rt5682->first_hw_init)
+ return 0;
+
+- if (!slave->unattach_request)
++ if (!slave->unattach_request) {
++ if (rt5682->disable_irq == true) {
++ mutex_lock(&rt5682->disable_irq_lock);
++ sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
++ rt5682->disable_irq = false;
++ mutex_unlock(&rt5682->disable_irq_lock);
++ }
+ goto regmap_sync;
++ }
+
+ time = wait_for_completion_timeout(&slave->initialization_complete,
+ msecs_to_jiffies(RT5682_PROBE_TIMEOUT));
+diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
+index 4a64cab99c55b..468ce5b22156d 100644
+--- a/sound/soc/codecs/rt5682.c
++++ b/sound/soc/codecs/rt5682.c
+@@ -46,6 +46,10 @@ static const struct reg_sequence patch_list[] = {
+ {RT5682_SAR_IL_CMD_1, 0x22b7},
+ {RT5682_SAR_IL_CMD_3, 0x0365},
+ {RT5682_SAR_IL_CMD_6, 0x0110},
++ {RT5682_CHARGE_PUMP_1, 0x0210},
++ {RT5682_HP_LOGIC_CTRL_2, 0x0007},
++ {RT5682_SAR_IL_CMD_2, 0xac00},
++ {RT5682_CBJ_CTRL_7, 0x0104},
+ };
+
+ void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev)
+@@ -918,7 +922,7 @@ static void rt5682_enable_push_button_irq(struct snd_soc_component *component,
+ *
+ * Returns detect status.
+ */
+-int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
++static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
+ {
+ struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+ struct snd_soc_dapm_context *dapm = &component->dapm;
+@@ -938,6 +942,10 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
+ snd_soc_component_update_bits(component,
+ RT5682_HP_CHARGE_PUMP_1,
+ RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0);
++ rt5682_enable_push_button_irq(component, false);
++ snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
++ RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
++ usleep_range(55000, 60000);
+ snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+ RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH);
+
+@@ -999,7 +1007,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
+ dev_dbg(component->dev, "jack_type = %d\n", rt5682->jack_type);
+ return rt5682->jack_type;
+ }
+-EXPORT_SYMBOL_GPL(rt5682_headset_detect);
+
+ static int rt5682_set_jack_detect(struct snd_soc_component *component,
+ struct snd_soc_jack *hs_jack, void *data)
+@@ -1082,14 +1089,20 @@ void rt5682_jack_detect_handler(struct work_struct *work)
+ {
+ struct rt5682_priv *rt5682 =
+ container_of(work, struct rt5682_priv, jack_detect_work.work);
++ struct snd_soc_dapm_context *dapm;
+ int val, btn_type;
+
+- while (!rt5682->component)
+- usleep_range(10000, 15000);
++ if (!rt5682->component || !rt5682->component->card ||
++ !rt5682->component->card->instantiated) {
++ /* card not yet ready, try later */
++ mod_delayed_work(system_power_efficient_wq,
++ &rt5682->jack_detect_work, msecs_to_jiffies(15));
++ return;
++ }
+
+- while (!rt5682->component->card->instantiated)
+- usleep_range(10000, 15000);
++ dapm = snd_soc_component_get_dapm(rt5682->component);
+
++ snd_soc_dapm_mutex_lock(dapm);
+ mutex_lock(&rt5682->calibrate_mutex);
+
+ val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
+@@ -1149,6 +1162,9 @@ void rt5682_jack_detect_handler(struct work_struct *work)
+ rt5682->irq_work_delay_time = 50;
+ }
+
++ mutex_unlock(&rt5682->calibrate_mutex);
++ snd_soc_dapm_mutex_unlock(dapm);
++
+ snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
+ SND_JACK_HEADSET |
+ SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+@@ -1161,8 +1177,6 @@ void rt5682_jack_detect_handler(struct work_struct *work)
+ else
+ cancel_delayed_work_sync(&rt5682->jd_check_work);
+ }
+-
+- mutex_unlock(&rt5682->calibrate_mutex);
+ }
+ EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler);
+
+@@ -1515,21 +1529,29 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
+
+ switch (event) {
+ case SND_SOC_DAPM_PRE_PMU:
+- snd_soc_component_write(component,
+- RT5682_HP_LOGIC_CTRL_2, 0x0012);
+- snd_soc_component_write(component,
+- RT5682_HP_CTRL_2, 0x6000);
++ snd_soc_component_update_bits(component, RT5682_HP_CTRL_2,
++ RT5682_HP_C2_DAC_AMP_MUTE, 0);
++ snd_soc_component_update_bits(component, RT5682_HP_LOGIC_CTRL_2,
++ RT5682_HP_LC2_SIG_SOUR2_MASK, RT5682_HP_LC2_SIG_SOUR2_REG);
+ snd_soc_component_update_bits(component,
+ RT5682_DEPOP_1, 0x60, 0x60);
+ snd_soc_component_update_bits(component,
+ RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080);
++ snd_soc_component_update_bits(component, RT5682_HP_CTRL_2,
++ RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN,
++ RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN);
++ usleep_range(5000, 10000);
++ snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1,
++ RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L);
+ break;
+
+ case SND_SOC_DAPM_POST_PMD:
++ snd_soc_component_update_bits(component, RT5682_HP_CTRL_2,
++ RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN, 0);
++ snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1,
++ RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_M);
+ snd_soc_component_update_bits(component,
+ RT5682_DEPOP_1, 0x60, 0x0);
+- snd_soc_component_write(component,
+- RT5682_HP_CTRL_2, 0x0000);
+ snd_soc_component_update_bits(component,
+ RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0000);
+ break;
+@@ -1637,6 +1659,23 @@ static SOC_VALUE_ENUM_SINGLE_DECL(rt5682_adcdat_pin_enum,
+ static const struct snd_kcontrol_new rt5682_adcdat_pin_ctrl =
+ SOC_DAPM_ENUM("ADCDAT", rt5682_adcdat_pin_enum);
+
++static const unsigned int rt5682_hpo_sig_out_values[] = {
++ 2,
++ 7,
++};
++
++static const char * const rt5682_hpo_sig_out_mode[] = {
++ "Legacy",
++ "OneBit",
++};
++
++static SOC_VALUE_ENUM_SINGLE_DECL(rt5682_hpo_sig_out_enum,
++ RT5682_HP_LOGIC_CTRL_2, 0, RT5682_HP_LC2_SIG_SOUR1_MASK,
++ rt5682_hpo_sig_out_mode, rt5682_hpo_sig_out_values);
++
++static const struct snd_kcontrol_new rt5682_hpo_sig_demux =
++ SOC_DAPM_ENUM("HPO Signal Demux", rt5682_hpo_sig_out_enum);
++
+ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = {
+ SND_SOC_DAPM_SUPPLY("LDO2", RT5682_PWR_ANLG_3, RT5682_PWR_LDO2_BIT,
+ 0, NULL, 0),
+@@ -1820,6 +1859,10 @@ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = {
+ SND_SOC_DAPM_SWITCH("HPOR Playback", SND_SOC_NOPM, 0, 0,
+ &hpor_switch),
+
++ SND_SOC_DAPM_OUT_DRV("HPO Legacy", SND_SOC_NOPM, 0, 0, NULL, 0),
++ SND_SOC_DAPM_OUT_DRV("HPO OneBit", SND_SOC_NOPM, 0, 0, NULL, 0),
++ SND_SOC_DAPM_DEMUX("HPO Signal Demux", SND_SOC_NOPM, 0, 0, &rt5682_hpo_sig_demux),
++
+ /* CLK DET */
+ SND_SOC_DAPM_SUPPLY("CLKDET SYS", RT5682_CLK_DET,
+ RT5682_SYS_CLK_DET_SFT, 0, NULL, 0),
+@@ -1987,10 +2030,19 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = {
+ {"HP Amp", NULL, "Charge Pump"},
+ {"HP Amp", NULL, "CLKDET SYS"},
+ {"HP Amp", NULL, "Vref1"},
+- {"HPOL Playback", "Switch", "HP Amp"},
+- {"HPOR Playback", "Switch", "HP Amp"},
++
++ {"HPO Signal Demux", NULL, "HP Amp"},
++
++ {"HPO Legacy", "Legacy", "HPO Signal Demux"},
++ {"HPO OneBit", "OneBit", "HPO Signal Demux"},
++
++ {"HPOL Playback", "Switch", "HPO Legacy"},
++ {"HPOR Playback", "Switch", "HPO Legacy"},
++
+ {"HPOL", NULL, "HPOL Playback"},
+ {"HPOR", NULL, "HPOR Playback"},
++ {"HPOL", NULL, "HPO OneBit"},
++ {"HPOR", NULL, "HPO OneBit"},
+ };
+
+ static int rt5682_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+@@ -2510,7 +2562,7 @@ static int rt5682_set_bias_level(struct snd_soc_component *component,
+ static bool rt5682_clk_check(struct rt5682_priv *rt5682)
+ {
+ if (!rt5682->master[RT5682_AIF1]) {
+- dev_dbg(rt5682->component->dev, "sysclk/dai not set correctly\n");
++ dev_dbg(rt5682->i2c_dev, "sysclk/dai not set correctly\n");
+ return false;
+ }
+ return true;
+@@ -2521,13 +2573,15 @@ static int rt5682_wclk_prepare(struct clk_hw *hw)
+ struct rt5682_priv *rt5682 =
+ container_of(hw, struct rt5682_priv,
+ dai_clks_hw[RT5682_DAI_WCLK_IDX]);
+- struct snd_soc_component *component = rt5682->component;
+- struct snd_soc_dapm_context *dapm =
+- snd_soc_component_get_dapm(component);
++ struct snd_soc_component *component;
++ struct snd_soc_dapm_context *dapm;
+
+ if (!rt5682_clk_check(rt5682))
+ return -EINVAL;
+
++ component = rt5682->component;
++ dapm = snd_soc_component_get_dapm(component);
++
+ snd_soc_dapm_mutex_lock(dapm);
+
+ snd_soc_dapm_force_enable_pin_unlocked(dapm, "MICBIAS");
+@@ -2557,13 +2611,15 @@ static void rt5682_wclk_unprepare(struct clk_hw *hw)
+ struct rt5682_priv *rt5682 =
+ container_of(hw, struct rt5682_priv,
+ dai_clks_hw[RT5682_DAI_WCLK_IDX]);
+- struct snd_soc_component *component = rt5682->component;
+- struct snd_soc_dapm_context *dapm =
+- snd_soc_component_get_dapm(component);
++ struct snd_soc_component *component;
++ struct snd_soc_dapm_context *dapm;
+
+ if (!rt5682_clk_check(rt5682))
+ return;
+
++ component = rt5682->component;
++ dapm = snd_soc_component_get_dapm(component);
++
+ snd_soc_dapm_mutex_lock(dapm);
+
+ snd_soc_dapm_disable_pin_unlocked(dapm, "MICBIAS");
+@@ -2587,7 +2643,6 @@ static unsigned long rt5682_wclk_recalc_rate(struct clk_hw *hw,
+ struct rt5682_priv *rt5682 =
+ container_of(hw, struct rt5682_priv,
+ dai_clks_hw[RT5682_DAI_WCLK_IDX]);
+- struct snd_soc_component *component = rt5682->component;
+ const char * const clk_name = clk_hw_get_name(hw);
+
+ if (!rt5682_clk_check(rt5682))
+@@ -2597,7 +2652,7 @@ static unsigned long rt5682_wclk_recalc_rate(struct clk_hw *hw,
+ */
+ if (rt5682->lrck[RT5682_AIF1] != CLK_48 &&
+ rt5682->lrck[RT5682_AIF1] != CLK_44) {
+- dev_warn(component->dev, "%s: clk %s only support %d or %d Hz output\n",
++ dev_warn(rt5682->i2c_dev, "%s: clk %s only support %d or %d Hz output\n",
+ __func__, clk_name, CLK_44, CLK_48);
+ return 0;
+ }
+@@ -2611,7 +2666,6 @@ static long rt5682_wclk_round_rate(struct clk_hw *hw, unsigned long rate,
+ struct rt5682_priv *rt5682 =
+ container_of(hw, struct rt5682_priv,
+ dai_clks_hw[RT5682_DAI_WCLK_IDX]);
+- struct snd_soc_component *component = rt5682->component;
+ const char * const clk_name = clk_hw_get_name(hw);
+
+ if (!rt5682_clk_check(rt5682))
+@@ -2621,7 +2675,7 @@ static long rt5682_wclk_round_rate(struct clk_hw *hw, unsigned long rate,
+ * It will force to 48kHz if not both.
+ */
+ if (rate != CLK_48 && rate != CLK_44) {
+- dev_warn(component->dev, "%s: clk %s only support %d or %d Hz output\n",
++ dev_warn(rt5682->i2c_dev, "%s: clk %s only support %d or %d Hz output\n",
+ __func__, clk_name, CLK_44, CLK_48);
+ rate = CLK_48;
+ }
+@@ -2635,7 +2689,7 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate,
+ struct rt5682_priv *rt5682 =
+ container_of(hw, struct rt5682_priv,
+ dai_clks_hw[RT5682_DAI_WCLK_IDX]);
+- struct snd_soc_component *component = rt5682->component;
++ struct snd_soc_component *component;
+ struct clk_hw *parent_hw;
+ const char * const clk_name = clk_hw_get_name(hw);
+ int pre_div;
+@@ -2644,6 +2698,8 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate,
+ if (!rt5682_clk_check(rt5682))
+ return -EINVAL;
+
++ component = rt5682->component;
++
+ /*
+ * Whether the wclk's parent clk (mclk) exists or not, please ensure
+ * it is fixed or set to 48MHz before setting wclk rate. It's a
+@@ -2653,12 +2709,12 @@ static int rt5682_wclk_set_rate(struct clk_hw *hw, unsigned long rate,
+ */
+ parent_hw = clk_hw_get_parent(hw);
+ if (!parent_hw)
+- dev_warn(component->dev,
++ dev_warn(rt5682->i2c_dev,
+ "Parent mclk of wclk not acquired in driver. Please ensure mclk was provided as %d Hz.\n",
+ CLK_PLL2_FIN);
+
+ if (parent_rate != CLK_PLL2_FIN)
+- dev_warn(component->dev, "clk %s only support %d Hz input\n",
++ dev_warn(rt5682->i2c_dev, "clk %s only support %d Hz input\n",
+ clk_name, CLK_PLL2_FIN);
+
+ /*
+@@ -2690,10 +2746,9 @@ static unsigned long rt5682_bclk_recalc_rate(struct clk_hw *hw,
+ struct rt5682_priv *rt5682 =
+ container_of(hw, struct rt5682_priv,
+ dai_clks_hw[RT5682_DAI_BCLK_IDX]);
+- struct snd_soc_component *component = rt5682->component;
+ unsigned int bclks_per_wclk;
+
+- bclks_per_wclk = snd_soc_component_read(component, RT5682_TDM_TCON_CTRL);
++ regmap_read(rt5682->regmap, RT5682_TDM_TCON_CTRL, &bclks_per_wclk);
+
+ switch (bclks_per_wclk & RT5682_TDM_BCLK_MS1_MASK) {
+ case RT5682_TDM_BCLK_MS1_256:
+@@ -2754,25 +2809,24 @@ static int rt5682_bclk_set_rate(struct clk_hw *hw, unsigned long rate,
+ struct rt5682_priv *rt5682 =
+ container_of(hw, struct rt5682_priv,
+ dai_clks_hw[RT5682_DAI_BCLK_IDX]);
+- struct snd_soc_component *component = rt5682->component;
++ struct snd_soc_component *component;
+ struct snd_soc_dai *dai;
+ unsigned long factor;
+
+ if (!rt5682_clk_check(rt5682))
+ return -EINVAL;
+
++ component = rt5682->component;
++
+ factor = rt5682_bclk_get_factor(rate, parent_rate);
+
+ for_each_component_dais(component, dai)
+ if (dai->id == RT5682_AIF1)
+- break;
+- if (!dai) {
+- dev_err(component->dev, "dai %d not found in component\n",
+- RT5682_AIF1);
+- return -ENODEV;
+- }
++ return rt5682_set_bclk1_ratio(dai, factor);
+
+- return rt5682_set_bclk1_ratio(dai, factor);
++ dev_err(rt5682->i2c_dev, "dai %d not found in component\n",
++ RT5682_AIF1);
++ return -ENODEV;
+ }
+
+ static const struct clk_ops rt5682_dai_clk_ops[RT5682_DAI_NUM_CLKS] = {
+@@ -2790,16 +2844,17 @@ static const struct clk_ops rt5682_dai_clk_ops[RT5682_DAI_NUM_CLKS] = {
+ },
+ };
+
+-static int rt5682_register_dai_clks(struct snd_soc_component *component)
++int rt5682_register_dai_clks(struct rt5682_priv *rt5682)
+ {
+- struct device *dev = component->dev;
+- struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
++ struct device *dev = rt5682->i2c_dev;
+ struct rt5682_platform_data *pdata = &rt5682->pdata;
+ struct clk_hw *dai_clk_hw;
+ int i, ret;
+
+ for (i = 0; i < RT5682_DAI_NUM_CLKS; ++i) {
+ struct clk_init_data init = { };
++ struct clk_parent_data parent_data;
++ const struct clk_hw *parent;
+
+ dai_clk_hw = &rt5682->dai_clks_hw[i];
+
+@@ -2807,17 +2862,17 @@ static int rt5682_register_dai_clks(struct snd_soc_component *component)
+ case RT5682_DAI_WCLK_IDX:
+ /* Make MCLK the parent of WCLK */
+ if (rt5682->mclk) {
+- init.parent_data = &(struct clk_parent_data){
++ parent_data = (struct clk_parent_data){
+ .fw_name = "mclk",
+ };
++ init.parent_data = &parent_data;
+ init.num_parents = 1;
+ }
+ break;
+ case RT5682_DAI_BCLK_IDX:
+ /* Make WCLK the parent of BCLK */
+- init.parent_hws = &(const struct clk_hw *){
+- &rt5682->dai_clks_hw[RT5682_DAI_WCLK_IDX]
+- };
++ parent = &rt5682->dai_clks_hw[RT5682_DAI_WCLK_IDX];
++ init.parent_hws = &parent;
+ init.num_parents = 1;
+ break;
+ default:
+@@ -2851,6 +2906,7 @@ static int rt5682_register_dai_clks(struct snd_soc_component *component)
+
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(rt5682_register_dai_clks);
+ #endif /* CONFIG_COMMON_CLK */
+
+ static int rt5682_probe(struct snd_soc_component *component)
+@@ -2860,9 +2916,6 @@ static int rt5682_probe(struct snd_soc_component *component)
+ unsigned long time;
+ struct snd_soc_dapm_context *dapm = &component->dapm;
+
+-#ifdef CONFIG_COMMON_CLK
+- int ret;
+-#endif
+ rt5682->component = component;
+
+ if (rt5682->is_sdw) {
+@@ -2874,26 +2927,6 @@ static int rt5682_probe(struct snd_soc_component *component)
+ dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ return -ETIMEDOUT;
+ }
+- } else {
+-#ifdef CONFIG_COMMON_CLK
+- /* Check if MCLK provided */
+- rt5682->mclk = devm_clk_get(component->dev, "mclk");
+- if (IS_ERR(rt5682->mclk)) {
+- if (PTR_ERR(rt5682->mclk) != -ENOENT) {
+- ret = PTR_ERR(rt5682->mclk);
+- return ret;
+- }
+- rt5682->mclk = NULL;
+- }
+-
+- /* Register CCF DAI clock control */
+- ret = rt5682_register_dai_clks(component);
+- if (ret)
+- return ret;
+-
+- /* Initial setup for CCF */
+- rt5682->lrck[RT5682_AIF1] = CLK_48;
+-#endif
+ }
+
+ snd_soc_dapm_disable_pin(dapm, "MICBIAS");
+@@ -2918,12 +2951,12 @@ static int rt5682_suspend(struct snd_soc_component *component)
+ if (rt5682->is_sdw)
+ return 0;
+
++ if (rt5682->irq)
++ disable_irq(rt5682->irq);
++
+ cancel_delayed_work_sync(&rt5682->jack_detect_work);
+ cancel_delayed_work_sync(&rt5682->jd_check_work);
+- if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) {
+- snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+- RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
+- RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG);
++ if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) {
+ val = snd_soc_component_read(component,
+ RT5682_CBJ_CTRL_2) & RT5682_JACK_TYPE_MASK;
+
+@@ -2945,10 +2978,17 @@ static int rt5682_suspend(struct snd_soc_component *component)
+ /* enter SAR ADC power saving mode */
+ snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
+ RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK |
+- RT5682_SAR_BUTDET_RST_MASK | RT5682_SAR_SEL_MB1_MB2_MASK, 0);
++ RT5682_SAR_SEL_MB1_MB2_MASK, 0);
++ usleep_range(5000, 6000);
++ snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
++ RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
++ RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG);
++ usleep_range(10000, 12000);
+ snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
+- RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_BUTDET_RST_MASK,
+- RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV | RT5682_SAR_BUTDET_RST_NORMAL);
++ RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK,
++ RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV);
++ snd_soc_component_update_bits(component, RT5682_HP_CHARGE_PUMP_1,
++ RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0);
+ }
+
+ regcache_cache_only(rt5682->regmap, true);
+@@ -2966,10 +3006,11 @@ static int rt5682_resume(struct snd_soc_component *component)
+ regcache_cache_only(rt5682->regmap, false);
+ regcache_sync(rt5682->regmap);
+
+- if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) {
++ if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) {
+ snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
+ RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_SEL_MB1_MB2_MASK,
+ RT5682_SAR_BUTDET_POW_NORM | RT5682_SAR_SEL_MB1_MB2_AUTO);
++ usleep_range(5000, 6000);
+ snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+ RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
+ RT5682_CTRL_MB1_FSM | RT5682_CTRL_MB2_FSM);
+@@ -2977,8 +3018,12 @@ static int rt5682_resume(struct snd_soc_component *component)
+ RT5682_PWR_CBJ, RT5682_PWR_CBJ);
+ }
+
++ rt5682->jack_type = 0;
+ mod_delayed_work(system_power_efficient_wq,
+- &rt5682->jack_detect_work, msecs_to_jiffies(250));
++ &rt5682->jack_detect_work, msecs_to_jiffies(0));
++
++ if (rt5682->irq)
++ enable_irq(rt5682->irq);
+
+ return 0;
+ }
+diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h
+index b59221048ebf9..5fd798eac40df 100644
+--- a/sound/soc/codecs/rt5682.h
++++ b/sound/soc/codecs/rt5682.h
+@@ -375,6 +375,14 @@
+ #define RT5682_R_VOL_MASK (0x3f)
+ #define RT5682_R_VOL_SFT 0
+
++/* Headphone Amp Control 2 (0x0003) */
++#define RT5682_HP_C2_DAC_AMP_MUTE_SFT 15
++#define RT5682_HP_C2_DAC_AMP_MUTE (0x1 << 15)
++#define RT5682_HP_C2_DAC_L_EN_SFT 14
++#define RT5682_HP_C2_DAC_L_EN (0x1 << 14)
++#define RT5682_HP_C2_DAC_R_EN_SFT 13
++#define RT5682_HP_C2_DAC_R_EN (0x1 << 13)
++
+ /*Headphone Amp L/R Analog Gain and Digital NG2 Gain Control (0x0005 0x0006)*/
+ #define RT5682_G_HP (0xf << 8)
+ #define RT5682_G_HP_SFT 8
+@@ -1265,6 +1273,10 @@
+ #define RT5682_HPA_CP_BIAS_6UA (0x3 << 2)
+
+ /* Charge Pump Internal Register1 (0x0125) */
++#define RT5682_CP_SW_SIZE_MASK (0x7 << 8)
++#define RT5682_CP_SW_SIZE_L (0x4 << 8)
++#define RT5682_CP_SW_SIZE_M (0x2 << 8)
++#define RT5682_CP_SW_SIZE_S (0x1 << 8)
+ #define RT5682_CP_CLK_HP_MASK (0x3 << 4)
+ #define RT5682_CP_CLK_HP_100KHZ (0x0 << 4)
+ #define RT5682_CP_CLK_HP_200KHZ (0x1 << 4)
+@@ -1315,6 +1327,14 @@
+ #define RT5682_DEB_STO_DAC_MASK (0x7 << 4)
+ #define RT5682_DEB_80_MS (0x0 << 4)
+
++/* HP Behavior Logic Control 2 (0x01db) */
++#define RT5682_HP_LC2_SIG_SOUR2_MASK (0x1 << 4)
++#define RT5682_HP_LC2_SIG_SOUR2_REG (0x1 << 4)
++#define RT5682_HP_LC2_SIG_SOUR2_DC_CAL (0x0 << 4)
++#define RT5682_HP_LC2_SIG_SOUR1_MASK (0x7)
++#define RT5682_HP_LC2_SIG_SOUR1_1BIT (0x7)
++#define RT5682_HP_LC2_SIG_SOUR1_LEGA (0x2)
++
+ /* SAR ADC Inline Command Control 1 (0x0210) */
+ #define RT5682_SAR_BUTT_DET_MASK (0x1 << 15)
+ #define RT5682_SAR_BUTT_DET_EN (0x1 << 15)
+@@ -1408,6 +1428,7 @@ enum {
+
+ struct rt5682_priv {
+ struct snd_soc_component *component;
++ struct device *i2c_dev;
+ struct rt5682_platform_data pdata;
+ struct regmap *regmap;
+ struct regmap *sdw_regmap;
+@@ -1441,6 +1462,7 @@ struct rt5682_priv {
+ int pll_out[RT5682_PLLS];
+
+ int jack_type;
++ int irq;
+ int irq_work_delay_time;
+ };
+
+@@ -1451,7 +1473,6 @@ int rt5682_sel_asrc_clk_src(struct snd_soc_component *component,
+
+ void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev);
+
+-int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert);
+ void rt5682_jack_detect_handler(struct work_struct *work);
+
+ bool rt5682_volatile_register(struct device *dev, unsigned int reg);
+@@ -1462,6 +1483,8 @@ void rt5682_calibrate(struct rt5682_priv *rt5682);
+ void rt5682_reset(struct rt5682_priv *rt5682);
+ int rt5682_parse_dt(struct rt5682_priv *rt5682, struct device *dev);
+
++int rt5682_register_dai_clks(struct rt5682_priv *rt5682);
++
+ #define RT5682_REG_NUM 318
+ extern const struct reg_default rt5682_reg[RT5682_REG_NUM];
+
+diff --git a/sound/soc/codecs/rt700-sdw.c b/sound/soc/codecs/rt700-sdw.c
+index bda5948996642..f7439e40ca8b5 100644
+--- a/sound/soc/codecs/rt700-sdw.c
++++ b/sound/soc/codecs/rt700-sdw.c
+@@ -13,6 +13,7 @@
+ #include <linux/soundwire/sdw_type.h>
+ #include <linux/soundwire/sdw_registers.h>
+ #include <linux/module.h>
++#include <linux/pm_runtime.h>
+ #include <linux/regmap.h>
+ #include <sound/soc.h>
+ #include "rt700.h"
+@@ -463,11 +464,14 @@ static int rt700_sdw_remove(struct sdw_slave *slave)
+ {
+ struct rt700_priv *rt700 = dev_get_drvdata(&slave->dev);
+
+- if (rt700 && rt700->hw_init) {
++ if (rt700->hw_init) {
+ cancel_delayed_work_sync(&rt700->jack_detect_work);
+ cancel_delayed_work_sync(&rt700->jack_btn_check_work);
+ }
+
++ if (rt700->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
+ return 0;
+ }
+
+diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c
+index 921382724f9cd..3de3406d653e4 100644
+--- a/sound/soc/codecs/rt700.c
++++ b/sound/soc/codecs/rt700.c
+@@ -162,7 +162,7 @@ static void rt700_jack_detect_handler(struct work_struct *work)
+ if (!rt700->hs_jack)
+ return;
+
+- if (!rt700->component->card->instantiated)
++ if (!rt700->component->card || !rt700->component->card->instantiated)
+ return;
+
+ reg = RT700_VERB_GET_PIN_SENSE | RT700_HP_OUT;
+@@ -315,17 +315,27 @@ static int rt700_set_jack_detect(struct snd_soc_component *component,
+ struct snd_soc_jack *hs_jack, void *data)
+ {
+ struct rt700_priv *rt700 = snd_soc_component_get_drvdata(component);
++ int ret;
+
+ rt700->hs_jack = hs_jack;
+
+- if (!rt700->hw_init) {
+- dev_dbg(&rt700->slave->dev,
+- "%s hw_init not ready yet\n", __func__);
++ ret = pm_runtime_resume_and_get(component->dev);
++ if (ret < 0) {
++ if (ret != -EACCES) {
++ dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret);
++ return ret;
++ }
++
++ /* pm_runtime not enabled yet */
++ dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__);
+ return 0;
+ }
+
+ rt700_jack_init(rt700);
+
++ pm_runtime_mark_last_busy(component->dev);
++ pm_runtime_put_autosuspend(component->dev);
++
+ return 0;
+ }
+
+@@ -1005,7 +1015,7 @@ static int rt700_pcm_hw_free(struct snd_pcm_substream *substream,
+ static const struct snd_soc_dai_ops rt700_ops = {
+ .hw_params = rt700_pcm_hw_params,
+ .hw_free = rt700_pcm_hw_free,
+- .set_sdw_stream = rt700_set_sdw_stream,
++ .set_stream = rt700_set_sdw_stream,
+ .shutdown = rt700_shutdown,
+ };
+
+@@ -1114,6 +1124,11 @@ int rt700_init(struct device *dev, struct regmap *sdw_regmap,
+
+ mutex_init(&rt700->disable_irq_lock);
+
++ INIT_DELAYED_WORK(&rt700->jack_detect_work,
++ rt700_jack_detect_handler);
++ INIT_DELAYED_WORK(&rt700->jack_btn_check_work,
++ rt700_btn_check_handler);
++
+ /*
+ * Mark hw_init to false
+ * HW init will be performed when device reports present
+@@ -1208,13 +1223,6 @@ int rt700_io_init(struct device *dev, struct sdw_slave *slave)
+ /* Finish Initial Settings, set power to D3 */
+ regmap_write(rt700->regmap, RT700_SET_AUDIO_POWER_STATE, AC_PWRST_D3);
+
+- if (!rt700->first_hw_init) {
+- INIT_DELAYED_WORK(&rt700->jack_detect_work,
+- rt700_jack_detect_handler);
+- INIT_DELAYED_WORK(&rt700->jack_btn_check_work,
+- rt700_btn_check_handler);
+- }
+-
+ /*
+ * if set_jack callback occurred early than io_init,
+ * we set up the jack detection function now
+diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c
+index aaf5af153d3fe..4faf6b8544ddd 100644
+--- a/sound/soc/codecs/rt711-sdca-sdw.c
++++ b/sound/soc/codecs/rt711-sdca-sdw.c
+@@ -11,6 +11,7 @@
+ #include <linux/mod_devicetable.h>
+ #include <linux/soundwire/sdw_registers.h>
+ #include <linux/module.h>
++#include <linux/pm_runtime.h>
+
+ #include "rt711-sdca.h"
+ #include "rt711-sdca-sdw.h"
+@@ -229,7 +230,7 @@ static int rt711_sdca_read_prop(struct sdw_slave *slave)
+ }
+
+ /* set the timeout values */
+- prop->clk_stop_timeout = 20;
++ prop->clk_stop_timeout = 700;
+
+ /* wake-up event */
+ prop->wake_capable = 1;
+@@ -364,11 +365,17 @@ static int rt711_sdca_sdw_remove(struct sdw_slave *slave)
+ {
+ struct rt711_sdca_priv *rt711 = dev_get_drvdata(&slave->dev);
+
+- if (rt711 && rt711->hw_init) {
++ if (rt711->hw_init) {
+ cancel_delayed_work_sync(&rt711->jack_detect_work);
+ cancel_delayed_work_sync(&rt711->jack_btn_check_work);
+ }
+
++ if (rt711->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
++ mutex_destroy(&rt711->calibrate_mutex);
++ mutex_destroy(&rt711->disable_irq_lock);
++
+ return 0;
+ }
+
+@@ -435,8 +442,16 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
+ if (!rt711->first_hw_init)
+ return 0;
+
+- if (!slave->unattach_request)
++ if (!slave->unattach_request) {
++ if (rt711->disable_irq == true) {
++ mutex_lock(&rt711->disable_irq_lock);
++ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
++ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
++ rt711->disable_irq = false;
++ mutex_unlock(&rt711->disable_irq_lock);
++ }
+ goto regmap_sync;
++ }
+
+ time = wait_for_completion_timeout(&slave->initialization_complete,
+ msecs_to_jiffies(RT711_PROBE_TIMEOUT));
+diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c
+index 2e992589f1e42..5ad53bbc85284 100644
+--- a/sound/soc/codecs/rt711-sdca.c
++++ b/sound/soc/codecs/rt711-sdca.c
+@@ -34,7 +34,7 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711,
+
+ ret = regmap_write(regmap, addr, value);
+ if (ret < 0)
+- dev_err(rt711->component->dev,
++ dev_err(&rt711->slave->dev,
+ "Failed to set private value: %06x <= %04x ret=%d\n",
+ addr, value, ret);
+
+@@ -50,7 +50,7 @@ static int rt711_sdca_index_read(struct rt711_sdca_priv *rt711,
+
+ ret = regmap_read(regmap, addr, value);
+ if (ret < 0)
+- dev_err(rt711->component->dev,
++ dev_err(&rt711->slave->dev,
+ "Failed to get private value: %06x => %04x ret=%d\n",
+ addr, *value, ret);
+
+@@ -294,7 +294,7 @@ static void rt711_sdca_jack_detect_handler(struct work_struct *work)
+ if (!rt711->hs_jack)
+ return;
+
+- if (!rt711->component->card->instantiated)
++ if (!rt711->component->card || !rt711->component->card->instantiated)
+ return;
+
+ /* SDW_SCP_SDCA_INT_SDCA_0 is used for jack detection */
+@@ -487,16 +487,27 @@ static int rt711_sdca_set_jack_detect(struct snd_soc_component *component,
+ struct snd_soc_jack *hs_jack, void *data)
+ {
+ struct rt711_sdca_priv *rt711 = snd_soc_component_get_drvdata(component);
++ int ret;
+
+ rt711->hs_jack = hs_jack;
+
+- if (!rt711->hw_init) {
+- dev_dbg(&rt711->slave->dev,
+- "%s hw_init not ready yet\n", __func__);
++ ret = pm_runtime_resume_and_get(component->dev);
++ if (ret < 0) {
++ if (ret != -EACCES) {
++ dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret);
++ return ret;
++ }
++
++ /* pm_runtime not enabled yet */
++ dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__);
+ return 0;
+ }
+
+ rt711_sdca_jack_init(rt711);
++
++ pm_runtime_mark_last_busy(component->dev);
++ pm_runtime_put_autosuspend(component->dev);
++
+ return 0;
+ }
+
+@@ -1190,14 +1201,6 @@ static int rt711_sdca_probe(struct snd_soc_component *component)
+ return 0;
+ }
+
+-static void rt711_sdca_remove(struct snd_soc_component *component)
+-{
+- struct rt711_sdca_priv *rt711 = snd_soc_component_get_drvdata(component);
+-
+- regcache_cache_only(rt711->regmap, true);
+- regcache_cache_only(rt711->mbq_regmap, true);
+-}
+-
+ static const struct snd_soc_component_driver soc_sdca_dev_rt711 = {
+ .probe = rt711_sdca_probe,
+ .controls = rt711_sdca_snd_controls,
+@@ -1207,7 +1210,7 @@ static const struct snd_soc_component_driver soc_sdca_dev_rt711 = {
+ .dapm_routes = rt711_sdca_audio_map,
+ .num_dapm_routes = ARRAY_SIZE(rt711_sdca_audio_map),
+ .set_jack = rt711_sdca_set_jack_detect,
+- .remove = rt711_sdca_remove,
++ .endianness = 1,
+ };
+
+ static int rt711_sdca_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream,
+@@ -1358,7 +1361,7 @@ static int rt711_sdca_pcm_hw_free(struct snd_pcm_substream *substream,
+ static const struct snd_soc_dai_ops rt711_sdca_ops = {
+ .hw_params = rt711_sdca_pcm_hw_params,
+ .hw_free = rt711_sdca_pcm_hw_free,
+- .set_sdw_stream = rt711_sdca_set_sdw_stream,
++ .set_stream = rt711_sdca_set_sdw_stream,
+ .shutdown = rt711_sdca_shutdown,
+ };
+
+@@ -1411,8 +1414,12 @@ int rt711_sdca_init(struct device *dev, struct regmap *regmap,
+ rt711->regmap = regmap;
+ rt711->mbq_regmap = mbq_regmap;
+
++ mutex_init(&rt711->calibrate_mutex);
+ mutex_init(&rt711->disable_irq_lock);
+
++ INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_sdca_jack_detect_handler);
++ INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_sdca_btn_check_handler);
++
+ /*
+ * Mark hw_init to false
+ * HW init will be performed when device reports present
+@@ -1544,14 +1551,6 @@ int rt711_sdca_io_init(struct device *dev, struct sdw_slave *slave)
+ rt711_sdca_index_update_bits(rt711, RT711_VENDOR_HDA_CTL,
+ RT711_PUSH_BTN_INT_CTL0, 0x20, 0x00);
+
+- if (!rt711->first_hw_init) {
+- INIT_DELAYED_WORK(&rt711->jack_detect_work,
+- rt711_sdca_jack_detect_handler);
+- INIT_DELAYED_WORK(&rt711->jack_btn_check_work,
+- rt711_sdca_btn_check_handler);
+- mutex_init(&rt711->calibrate_mutex);
+- }
+-
+ /* calibration */
+ ret = rt711_sdca_calibration(rt711);
+ if (ret < 0)
+diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
+index bda2cc9439c98..9545b8a7eb192 100644
+--- a/sound/soc/codecs/rt711-sdw.c
++++ b/sound/soc/codecs/rt711-sdw.c
+@@ -13,6 +13,7 @@
+ #include <linux/soundwire/sdw_type.h>
+ #include <linux/soundwire/sdw_registers.h>
+ #include <linux/module.h>
++#include <linux/pm_runtime.h>
+ #include <linux/regmap.h>
+ #include <sound/soc.h>
+ #include "rt711.h"
+@@ -464,12 +465,18 @@ static int rt711_sdw_remove(struct sdw_slave *slave)
+ {
+ struct rt711_priv *rt711 = dev_get_drvdata(&slave->dev);
+
+- if (rt711 && rt711->hw_init) {
++ if (rt711->hw_init) {
+ cancel_delayed_work_sync(&rt711->jack_detect_work);
+ cancel_delayed_work_sync(&rt711->jack_btn_check_work);
+ cancel_work_sync(&rt711->calibration_work);
+ }
+
++ if (rt711->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
++ mutex_destroy(&rt711->calibrate_mutex);
++ mutex_destroy(&rt711->disable_irq_lock);
++
+ return 0;
+ }
+
+@@ -534,8 +541,15 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
+ if (!rt711->first_hw_init)
+ return 0;
+
+- if (!slave->unattach_request)
++ if (!slave->unattach_request) {
++ if (rt711->disable_irq == true) {
++ mutex_lock(&rt711->disable_irq_lock);
++ sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
++ rt711->disable_irq = false;
++ mutex_unlock(&rt711->disable_irq_lock);
++ }
+ goto regmap_sync;
++ }
+
+ time = wait_for_completion_timeout(&slave->initialization_complete,
+ msecs_to_jiffies(RT711_PROBE_TIMEOUT));
+diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c
+index a7c5608a0ef87..286d882636e00 100644
+--- a/sound/soc/codecs/rt711.c
++++ b/sound/soc/codecs/rt711.c
+@@ -242,7 +242,7 @@ static void rt711_jack_detect_handler(struct work_struct *work)
+ if (!rt711->hs_jack)
+ return;
+
+- if (!rt711->component->card->instantiated)
++ if (!rt711->component->card || !rt711->component->card->instantiated)
+ return;
+
+ reg = RT711_VERB_GET_PIN_SENSE | RT711_HP_OUT;
+@@ -450,17 +450,27 @@ static int rt711_set_jack_detect(struct snd_soc_component *component,
+ struct snd_soc_jack *hs_jack, void *data)
+ {
+ struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component);
++ int ret;
+
+ rt711->hs_jack = hs_jack;
+
+- if (!rt711->hw_init) {
+- dev_dbg(&rt711->slave->dev,
+- "%s hw_init not ready yet\n", __func__);
++ ret = pm_runtime_resume_and_get(component->dev);
++ if (ret < 0) {
++ if (ret != -EACCES) {
++ dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret);
++ return ret;
++ }
++
++ /* pm_runtime not enabled yet */
++ dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__);
+ return 0;
+ }
+
+ rt711_jack_init(rt711);
+
++ pm_runtime_mark_last_busy(component->dev);
++ pm_runtime_put_autosuspend(component->dev);
++
+ return 0;
+ }
+
+@@ -925,13 +935,6 @@ static int rt711_probe(struct snd_soc_component *component)
+ return 0;
+ }
+
+-static void rt711_remove(struct snd_soc_component *component)
+-{
+- struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component);
+-
+- regcache_cache_only(rt711->regmap, true);
+-}
+-
+ static const struct snd_soc_component_driver soc_codec_dev_rt711 = {
+ .probe = rt711_probe,
+ .set_bias_level = rt711_set_bias_level,
+@@ -942,7 +945,7 @@ static const struct snd_soc_component_driver soc_codec_dev_rt711 = {
+ .dapm_routes = rt711_audio_map,
+ .num_dapm_routes = ARRAY_SIZE(rt711_audio_map),
+ .set_jack = rt711_set_jack_detect,
+- .remove = rt711_remove,
++ .endianness = 1,
+ };
+
+ static int rt711_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream,
+@@ -1089,7 +1092,7 @@ static int rt711_pcm_hw_free(struct snd_pcm_substream *substream,
+ static const struct snd_soc_dai_ops rt711_ops = {
+ .hw_params = rt711_pcm_hw_params,
+ .hw_free = rt711_pcm_hw_free,
+- .set_sdw_stream = rt711_set_sdw_stream,
++ .set_stream = rt711_set_sdw_stream,
+ .shutdown = rt711_shutdown,
+ };
+
+@@ -1196,8 +1199,13 @@ int rt711_init(struct device *dev, struct regmap *sdw_regmap,
+ rt711->sdw_regmap = sdw_regmap;
+ rt711->regmap = regmap;
+
++ mutex_init(&rt711->calibrate_mutex);
+ mutex_init(&rt711->disable_irq_lock);
+
++ INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_jack_detect_handler);
++ INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_btn_check_handler);
++ INIT_WORK(&rt711->calibration_work, rt711_calibration_work);
++
+ /*
+ * Mark hw_init to false
+ * HW init will be performed when device reports present
+@@ -1305,15 +1313,8 @@ int rt711_io_init(struct device *dev, struct sdw_slave *slave)
+
+ if (rt711->first_hw_init)
+ rt711_calibration(rt711);
+- else {
+- INIT_DELAYED_WORK(&rt711->jack_detect_work,
+- rt711_jack_detect_handler);
+- INIT_DELAYED_WORK(&rt711->jack_btn_check_work,
+- rt711_btn_check_handler);
+- mutex_init(&rt711->calibrate_mutex);
+- INIT_WORK(&rt711->calibration_work, rt711_calibration_work);
++ else
+ schedule_work(&rt711->calibration_work);
+- }
+
+ /*
+ * if set_jack callback occurred early than io_init,
+diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c
+index a5c673f43d824..85abf8073c278 100644
+--- a/sound/soc/codecs/rt715-sdca-sdw.c
++++ b/sound/soc/codecs/rt715-sdca-sdw.c
+@@ -13,6 +13,7 @@
+ #include <linux/soundwire/sdw_type.h>
+ #include <linux/soundwire/sdw_registers.h>
+ #include <linux/module.h>
++#include <linux/pm_runtime.h>
+ #include <linux/regmap.h>
+ #include <sound/soc.h>
+ #include "rt715-sdca.h"
+@@ -166,7 +167,7 @@ static int rt715_sdca_read_prop(struct sdw_slave *slave)
+ }
+
+ /* set the timeout values */
+- prop->clk_stop_timeout = 20;
++ prop->clk_stop_timeout = 200;
+
+ return 0;
+ }
+@@ -195,6 +196,16 @@ static int rt715_sdca_sdw_probe(struct sdw_slave *slave,
+ return rt715_sdca_init(&slave->dev, mbq_regmap, regmap, slave);
+ }
+
++static int rt715_sdca_sdw_remove(struct sdw_slave *slave)
++{
++ struct rt715_sdca_priv *rt715 = dev_get_drvdata(&slave->dev);
++
++ if (rt715->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
++ return 0;
++}
++
+ static const struct sdw_device_id rt715_sdca_id[] = {
+ SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x3, 0x1, 0),
+ SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x3, 0x1, 0),
+@@ -269,6 +280,7 @@ static struct sdw_driver rt715_sdw_driver = {
+ .pm = &rt715_pm,
+ },
+ .probe = rt715_sdca_sdw_probe,
++ .remove = rt715_sdca_sdw_remove,
+ .ops = &rt715_sdca_slave_ops,
+ .id_table = rt715_sdca_id,
+ };
+diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c
+index 66e166568c508..bfa536bd71960 100644
+--- a/sound/soc/codecs/rt715-sdca.c
++++ b/sound/soc/codecs/rt715-sdca.c
+@@ -938,7 +938,7 @@ static int rt715_sdca_pcm_hw_free(struct snd_pcm_substream *substream,
+ static const struct snd_soc_dai_ops rt715_sdca_ops = {
+ .hw_params = rt715_sdca_pcm_hw_params,
+ .hw_free = rt715_sdca_pcm_hw_free,
+- .set_sdw_stream = rt715_sdca_set_sdw_stream,
++ .set_stream = rt715_sdca_set_sdw_stream,
+ .shutdown = rt715_sdca_shutdown,
+ };
+
+diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c
+index a7b21b03c08bb..b047bf87a100c 100644
+--- a/sound/soc/codecs/rt715-sdw.c
++++ b/sound/soc/codecs/rt715-sdw.c
+@@ -14,6 +14,7 @@
+ #include <linux/soundwire/sdw_type.h>
+ #include <linux/soundwire/sdw_registers.h>
+ #include <linux/module.h>
++#include <linux/pm_runtime.h>
+ #include <linux/of.h>
+ #include <linux/regmap.h>
+ #include <sound/soc.h>
+@@ -514,6 +515,16 @@ static int rt715_sdw_probe(struct sdw_slave *slave,
+ return 0;
+ }
+
++static int rt715_sdw_remove(struct sdw_slave *slave)
++{
++ struct rt715_priv *rt715 = dev_get_drvdata(&slave->dev);
++
++ if (rt715->first_hw_init)
++ pm_runtime_disable(&slave->dev);
++
++ return 0;
++}
++
+ static const struct sdw_device_id rt715_id[] = {
+ SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x2, 0, 0),
+ SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x2, 0, 0),
+@@ -575,6 +586,7 @@ static struct sdw_driver rt715_sdw_driver = {
+ .pm = &rt715_pm,
+ },
+ .probe = rt715_sdw_probe,
++ .remove = rt715_sdw_remove,
+ .ops = &rt715_slave_ops,
+ .id_table = rt715_id,
+ };
+diff --git a/sound/soc/codecs/rt715.c b/sound/soc/codecs/rt715.c
+index 1352869cc0867..a64d11a747513 100644
+--- a/sound/soc/codecs/rt715.c
++++ b/sound/soc/codecs/rt715.c
+@@ -909,7 +909,7 @@ static int rt715_pcm_hw_free(struct snd_pcm_substream *substream,
+ static const struct snd_soc_dai_ops rt715_ops = {
+ .hw_params = rt715_pcm_hw_params,
+ .hw_free = rt715_pcm_hw_free,
+- .set_sdw_stream = rt715_set_sdw_stream,
++ .set_stream = rt715_set_sdw_stream,
+ .shutdown = rt715_shutdown,
+ };
+
+diff --git a/sound/soc/codecs/sdw-mockup.c b/sound/soc/codecs/sdw-mockup.c
+index 8ea13cfa9f8ed..7c612aaf31c75 100644
+--- a/sound/soc/codecs/sdw-mockup.c
++++ b/sound/soc/codecs/sdw-mockup.c
+@@ -138,7 +138,7 @@ static int sdw_mockup_pcm_hw_free(struct snd_pcm_substream *substream,
+ static const struct snd_soc_dai_ops sdw_mockup_ops = {
+ .hw_params = sdw_mockup_pcm_hw_params,
+ .hw_free = sdw_mockup_pcm_hw_free,
+- .set_sdw_stream = sdw_mockup_set_sdw_stream,
++ .set_stream = sdw_mockup_set_sdw_stream,
+ .shutdown = sdw_mockup_shutdown,
+ };
+
+diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
+index 97bf1f222805e..3c5a4fe2fad63 100644
+--- a/sound/soc/codecs/sgtl5000.c
++++ b/sound/soc/codecs/sgtl5000.c
+@@ -1797,6 +1797,10 @@ static int sgtl5000_i2c_remove(struct i2c_client *client)
+ {
+ struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client);
+
++ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_CLK_CTRL, SGTL5000_CHIP_CLK_CTRL_DEFAULT);
++ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT);
++ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT);
++
+ clk_disable_unprepare(sgtl5000->mclk);
+ regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies);
+ regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies);
+@@ -1804,6 +1808,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client)
+ return 0;
+ }
+
++static void sgtl5000_i2c_shutdown(struct i2c_client *client)
++{
++ sgtl5000_i2c_remove(client);
++}
++
+ static const struct i2c_device_id sgtl5000_id[] = {
+ {"sgtl5000", 0},
+ {},
+@@ -1824,6 +1833,7 @@ static struct i2c_driver sgtl5000_i2c_driver = {
+ },
+ .probe = sgtl5000_i2c_probe,
+ .remove = sgtl5000_i2c_remove,
++ .shutdown = sgtl5000_i2c_shutdown,
+ .id_table = sgtl5000_id,
+ };
+
+diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h
+index 56ec5863f2507..3a808c762299e 100644
+--- a/sound/soc/codecs/sgtl5000.h
++++ b/sound/soc/codecs/sgtl5000.h
+@@ -80,6 +80,7 @@
+ /*
+ * SGTL5000_CHIP_DIG_POWER
+ */
++#define SGTL5000_DIG_POWER_DEFAULT 0x0000
+ #define SGTL5000_ADC_EN 0x0040
+ #define SGTL5000_DAC_EN 0x0020
+ #define SGTL5000_DAP_POWERUP 0x0010
+diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
+index 7964e922b07f6..0a3540c5cdb5c 100644
+--- a/sound/soc/codecs/ssm2602.c
++++ b/sound/soc/codecs/ssm2602.c
+@@ -53,6 +53,18 @@ static const struct reg_default ssm2602_reg[SSM2602_CACHEREGNUM] = {
+ { .reg = 0x09, .def = 0x0000 }
+ };
+
++/*
++ * ssm2602 register patch
++ * Workaround for playback distortions after power up: activates digital
++ * core, and then powers on output, DAC, and whole chip at the same time
++ */
++
++static const struct reg_sequence ssm2602_patch[] = {
++ { SSM2602_ACTIVE, 0x01 },
++ { SSM2602_PWR, 0x07 },
++ { SSM2602_RESET, 0x00 },
++};
++
+
+ /*Appending several "None"s just for OSS mixer use*/
+ static const char *ssm2602_input_select[] = {
+@@ -589,6 +601,9 @@ static int ssm260x_component_probe(struct snd_soc_component *component)
+ return ret;
+ }
+
++ regmap_register_patch(ssm2602->regmap, ssm2602_patch,
++ ARRAY_SIZE(ssm2602_patch));
++
+ /* set the update bits */
+ regmap_update_bits(ssm2602->regmap, SSM2602_LINVOL,
+ LINVOL_LRIN_BOTH, LINVOL_LRIN_BOTH);
+diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c
+index 9265af41c235d..1951bae95b315 100644
+--- a/sound/soc/codecs/tas2764.c
++++ b/sound/soc/codecs/tas2764.c
+@@ -34,6 +34,9 @@ struct tas2764_priv {
+
+ int v_sense_slot;
+ int i_sense_slot;
++
++ bool dac_powered;
++ bool unmuted;
+ };
+
+ static void tas2764_reset(struct tas2764_priv *tas2764)
+@@ -42,40 +45,30 @@ static void tas2764_reset(struct tas2764_priv *tas2764)
+ gpiod_set_value_cansleep(tas2764->reset_gpio, 0);
+ msleep(20);
+ gpiod_set_value_cansleep(tas2764->reset_gpio, 1);
++ usleep_range(1000, 2000);
+ }
+
+ snd_soc_component_write(tas2764->component, TAS2764_SW_RST,
+ TAS2764_RST);
++ usleep_range(1000, 2000);
+ }
+
+-static int tas2764_set_bias_level(struct snd_soc_component *component,
+- enum snd_soc_bias_level level)
++static int tas2764_update_pwr_ctrl(struct tas2764_priv *tas2764)
+ {
+- struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
++ struct snd_soc_component *component = tas2764->component;
++ unsigned int val;
++ int ret;
+
+- switch (level) {
+- case SND_SOC_BIAS_ON:
+- snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- TAS2764_PWR_CTRL_ACTIVE);
+- break;
+- case SND_SOC_BIAS_STANDBY:
+- case SND_SOC_BIAS_PREPARE:
+- snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- TAS2764_PWR_CTRL_MUTE);
+- break;
+- case SND_SOC_BIAS_OFF:
+- snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- TAS2764_PWR_CTRL_SHUTDOWN);
+- break;
++ if (tas2764->dac_powered)
++ val = tas2764->unmuted ?
++ TAS2764_PWR_CTRL_ACTIVE : TAS2764_PWR_CTRL_MUTE;
++ else
++ val = TAS2764_PWR_CTRL_SHUTDOWN;
+
+- default:
+- dev_err(tas2764->dev,
+- "wrong power level setting %d\n", level);
+- return -EINVAL;
+- }
++ ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
++ TAS2764_PWR_CTRL_MASK, val);
++ if (ret < 0)
++ return ret;
+
+ return 0;
+ }
+@@ -107,12 +100,12 @@ static int tas2764_codec_resume(struct snd_soc_component *component)
+ struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
+ int ret;
+
+- if (tas2764->sdz_gpio)
++ if (tas2764->sdz_gpio) {
+ gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
++ usleep_range(1000, 2000);
++ }
+
+- ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- TAS2764_PWR_CTRL_ACTIVE);
++ ret = tas2764_update_pwr_ctrl(tas2764);
+
+ if (ret < 0)
+ return ret;
+@@ -131,7 +124,8 @@ static const char * const tas2764_ASI1_src[] = {
+ };
+
+ static SOC_ENUM_SINGLE_DECL(
+- tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src);
++ tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT,
++ tas2764_ASI1_src);
+
+ static const struct snd_kcontrol_new tas2764_asi1_mux =
+ SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum);
+@@ -145,14 +139,12 @@ static int tas2764_dac_event(struct snd_soc_dapm_widget *w,
+
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMU:
+- ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- TAS2764_PWR_CTRL_MUTE);
++ tas2764->dac_powered = true;
++ ret = tas2764_update_pwr_ctrl(tas2764);
+ break;
+ case SND_SOC_DAPM_PRE_PMD:
+- ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- TAS2764_PWR_CTRL_SHUTDOWN);
++ tas2764->dac_powered = false;
++ ret = tas2764_update_pwr_ctrl(tas2764);
+ break;
+ default:
+ dev_err(tas2764->dev, "Unsupported event\n");
+@@ -197,17 +189,11 @@ static const struct snd_soc_dapm_route tas2764_audio_map[] = {
+
+ static int tas2764_mute(struct snd_soc_dai *dai, int mute, int direction)
+ {
+- struct snd_soc_component *component = dai->component;
+- int ret;
+-
+- ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- mute ? TAS2764_PWR_CTRL_MUTE : 0);
+-
+- if (ret < 0)
+- return ret;
++ struct tas2764_priv *tas2764 =
++ snd_soc_component_get_drvdata(dai->component);
+
+- return 0;
++ tas2764->unmuted = !mute;
++ return tas2764_update_pwr_ctrl(tas2764);
+ }
+
+ static int tas2764_set_bitwidth(struct tas2764_priv *tas2764, int bitwidth)
+@@ -329,20 +315,22 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ {
+ struct snd_soc_component *component = dai->component;
+ struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
+- u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0;
+- int iface;
++ u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0;
+ int ret;
+
+ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
++ case SND_SOC_DAIFMT_NB_IF:
++ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
++ fallthrough;
+ case SND_SOC_DAIFMT_NB_NF:
+ asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING;
+ break;
++ case SND_SOC_DAIFMT_IB_IF:
++ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
++ fallthrough;
+ case SND_SOC_DAIFMT_IB_NF:
+ asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING;
+ break;
+- default:
+- dev_err(tas2764->dev, "ASI format Inverse is not found\n");
+- return -EINVAL;
+ }
+
+ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
+@@ -353,13 +341,13 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
++ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
++ fallthrough;
+ case SND_SOC_DAIFMT_DSP_A:
+- iface = TAS2764_TDM_CFG2_SCFG_I2S;
+ tdm_rx_start_slot = 1;
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ case SND_SOC_DAIFMT_LEFT_J:
+- iface = TAS2764_TDM_CFG2_SCFG_LEFT_J;
+ tdm_rx_start_slot = 0;
+ break;
+ default:
+@@ -368,14 +356,15 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ return -EINVAL;
+ }
+
+- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
+- TAS2764_TDM_CFG1_MASK,
+- (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
++ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0,
++ TAS2764_TDM_CFG0_FRAME_START,
++ asi_cfg_0);
+ if (ret < 0)
+ return ret;
+
+- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2,
+- TAS2764_TDM_CFG2_SCFG_MASK, iface);
++ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
++ TAS2764_TDM_CFG1_MASK,
++ (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
+ if (ret < 0)
+ return ret;
+
+@@ -397,20 +386,13 @@ static int tas2764_set_dai_tdm_slot(struct snd_soc_dai *dai,
+ if (tx_mask == 0 || rx_mask != 0)
+ return -EINVAL;
+
+- if (slots == 1) {
+- if (tx_mask != 1)
+- return -EINVAL;
+- left_slot = 0;
+- right_slot = 0;
++ left_slot = __ffs(tx_mask);
++ tx_mask &= ~(1 << left_slot);
++ if (tx_mask == 0) {
++ right_slot = left_slot;
+ } else {
+- left_slot = __ffs(tx_mask);
+- tx_mask &= ~(1 << left_slot);
+- if (tx_mask == 0) {
+- right_slot = left_slot;
+- } else {
+- right_slot = __ffs(tx_mask);
+- tx_mask &= ~(1 << right_slot);
+- }
++ right_slot = __ffs(tx_mask);
++ tx_mask &= ~(1 << right_slot);
+ }
+
+ if (tx_mask != 0 || left_slot >= slots || right_slot >= slots)
+@@ -477,7 +459,7 @@ static struct snd_soc_dai_driver tas2764_dai_driver[] = {
+ .id = 0,
+ .playback = {
+ .stream_name = "ASI1 Playback",
+- .channels_min = 2,
++ .channels_min = 1,
+ .channels_max = 2,
+ .rates = TAS2764_RATES,
+ .formats = TAS2764_FORMATS,
+@@ -501,8 +483,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
+
+ tas2764->component = component;
+
+- if (tas2764->sdz_gpio)
++ if (tas2764->sdz_gpio) {
+ gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
++ usleep_range(1000, 2000);
++ }
+
+ tas2764_reset(tas2764);
+
+@@ -516,22 +500,16 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
+ if (ret < 0)
+ return ret;
+
+- ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
+- TAS2764_PWR_CTRL_MASK,
+- TAS2764_PWR_CTRL_MUTE);
+- if (ret < 0)
+- return ret;
+-
+ return 0;
+ }
+
+ static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0);
+-static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0);
++static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1);
+
+ static const struct snd_kcontrol_new tas2764_snd_controls[] = {
+ SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0,
+ TAS2764_DVC_MAX, 1, tas2764_playback_volume),
+- SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0,
++ SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0,
+ tas2764_digital_tlv),
+ };
+
+@@ -539,7 +517,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2764 = {
+ .probe = tas2764_codec_probe,
+ .suspend = tas2764_codec_suspend,
+ .resume = tas2764_codec_resume,
+- .set_bias_level = tas2764_set_bias_level,
+ .controls = tas2764_snd_controls,
+ .num_controls = ARRAY_SIZE(tas2764_snd_controls),
+ .dapm_widgets = tas2764_dapm_widgets,
+@@ -556,7 +533,7 @@ static const struct reg_default tas2764_reg_defaults[] = {
+ { TAS2764_SW_RST, 0x00 },
+ { TAS2764_PWR_CTRL, 0x1a },
+ { TAS2764_DVC, 0x00 },
+- { TAS2764_CHNL_0, 0x00 },
++ { TAS2764_CHNL_0, 0x28 },
+ { TAS2764_TDM_CFG0, 0x09 },
+ { TAS2764_TDM_CFG1, 0x02 },
+ { TAS2764_TDM_CFG2, 0x0a },
+diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h
+index 67d6fd903c42c..f015f22a083b5 100644
+--- a/sound/soc/codecs/tas2764.h
++++ b/sound/soc/codecs/tas2764.h
+@@ -47,6 +47,7 @@
+ #define TAS2764_TDM_CFG0_MASK GENMASK(3, 1)
+ #define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3)
+ #define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1))
++#define TAS2764_TDM_CFG0_FRAME_START BIT(0)
+
+ /* TDM Configuration Reg1 */
+ #define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09)
+@@ -66,10 +67,7 @@
+ #define TAS2764_TDM_CFG2_RXS_16BITS 0x0
+ #define TAS2764_TDM_CFG2_RXS_24BITS BIT(0)
+ #define TAS2764_TDM_CFG2_RXS_32BITS BIT(1)
+-#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4)
+-#define TAS2764_TDM_CFG2_SCFG_I2S 0x0
+-#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4)
+-#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5)
++#define TAS2764_TDM_CFG2_SCFG_SHIFT 4
+
+ /* TDM Configuration Reg3 */
+ #define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c)
+diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c
+index 172e79cbe0daf..ec0df3b1ef615 100644
+--- a/sound/soc/codecs/tas2770.c
++++ b/sound/soc/codecs/tas2770.c
+@@ -38,40 +38,30 @@ static void tas2770_reset(struct tas2770_priv *tas2770)
+ gpiod_set_value_cansleep(tas2770->reset_gpio, 0);
+ msleep(20);
+ gpiod_set_value_cansleep(tas2770->reset_gpio, 1);
++ usleep_range(1000, 2000);
+ }
+
+ snd_soc_component_write(tas2770->component, TAS2770_SW_RST,
+ TAS2770_RST);
++ usleep_range(1000, 2000);
+ }
+
+-static int tas2770_set_bias_level(struct snd_soc_component *component,
+- enum snd_soc_bias_level level)
++static int tas2770_update_pwr_ctrl(struct tas2770_priv *tas2770)
+ {
+- struct tas2770_priv *tas2770 =
+- snd_soc_component_get_drvdata(component);
++ struct snd_soc_component *component = tas2770->component;
++ unsigned int val;
++ int ret;
+
+- switch (level) {
+- case SND_SOC_BIAS_ON:
+- snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_ACTIVE);
+- break;
+- case SND_SOC_BIAS_STANDBY:
+- case SND_SOC_BIAS_PREPARE:
+- snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_MUTE);
+- break;
+- case SND_SOC_BIAS_OFF:
+- snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_SHUTDOWN);
+- break;
++ if (tas2770->dac_powered)
++ val = tas2770->unmuted ?
++ TAS2770_PWR_CTRL_ACTIVE : TAS2770_PWR_CTRL_MUTE;
++ else
++ val = TAS2770_PWR_CTRL_SHUTDOWN;
+
+- default:
+- dev_err(tas2770->dev, "wrong power level setting %d\n", level);
+- return -EINVAL;
+- }
++ ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
++ TAS2770_PWR_CTRL_MASK, val);
++ if (ret < 0)
++ return ret;
+
+ return 0;
+ }
+@@ -110,10 +100,9 @@ static int tas2770_codec_resume(struct snd_soc_component *component)
+
+ if (tas2770->sdz_gpio) {
+ gpiod_set_value_cansleep(tas2770->sdz_gpio, 1);
++ usleep_range(1000, 2000);
+ } else {
+- ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_ACTIVE);
++ ret = tas2770_update_pwr_ctrl(tas2770);
+ if (ret < 0)
+ return ret;
+ }
+@@ -149,24 +138,19 @@ static int tas2770_dac_event(struct snd_soc_dapm_widget *w,
+
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMU:
+- ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_MUTE);
++ tas2770->dac_powered = 1;
++ ret = tas2770_update_pwr_ctrl(tas2770);
+ break;
+ case SND_SOC_DAPM_PRE_PMD:
+- ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_SHUTDOWN);
++ tas2770->dac_powered = 0;
++ ret = tas2770_update_pwr_ctrl(tas2770);
+ break;
+ default:
+ dev_err(tas2770->dev, "Not supported evevt\n");
+ return -EINVAL;
+ }
+
+- if (ret < 0)
+- return ret;
+-
+- return 0;
++ return ret;
+ }
+
+ static const struct snd_kcontrol_new isense_switch =
+@@ -200,21 +184,11 @@ static const struct snd_soc_dapm_route tas2770_audio_map[] = {
+ static int tas2770_mute(struct snd_soc_dai *dai, int mute, int direction)
+ {
+ struct snd_soc_component *component = dai->component;
+- int ret;
+-
+- if (mute)
+- ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_MUTE);
+- else
+- ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL,
+- TAS2770_PWR_CTRL_MASK,
+- TAS2770_PWR_CTRL_ACTIVE);
+-
+- if (ret < 0)
+- return ret;
++ struct tas2770_priv *tas2770 =
++ snd_soc_component_get_drvdata(component);
+
+- return 0;
++ tas2770->unmuted = !mute;
++ return tas2770_update_pwr_ctrl(tas2770);
+ }
+
+ static int tas2770_set_bitwidth(struct tas2770_priv *tas2770, int bitwidth)
+@@ -291,11 +265,11 @@ static int tas2770_set_samplerate(struct tas2770_priv *tas2770, int samplerate)
+ ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ |
+ TAS2770_TDM_CFG_REG0_31_88_2_96KHZ;
+ break;
+- case 19200:
++ case 192000:
+ ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_48KHZ |
+ TAS2770_TDM_CFG_REG0_31_176_4_192KHZ;
+ break;
+- case 17640:
++ case 176400:
+ ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ |
+ TAS2770_TDM_CFG_REG0_31_176_4_192KHZ;
+ break;
+@@ -334,7 +308,7 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ struct snd_soc_component *component = dai->component;
+ struct tas2770_priv *tas2770 =
+ snd_soc_component_get_drvdata(component);
+- u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0;
++ u8 tdm_rx_start_slot = 0, invert_fpol = 0, fpol_preinv = 0, asi_cfg_1 = 0;
+ int ret;
+
+ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+@@ -346,9 +320,15 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ }
+
+ switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
++ case SND_SOC_DAIFMT_NB_IF:
++ invert_fpol = 1;
++ fallthrough;
+ case SND_SOC_DAIFMT_NB_NF:
+ asi_cfg_1 |= TAS2770_TDM_CFG_REG1_RX_RSING;
+ break;
++ case SND_SOC_DAIFMT_IB_IF:
++ invert_fpol = 1;
++ fallthrough;
+ case SND_SOC_DAIFMT_IB_NF:
+ asi_cfg_1 |= TAS2770_TDM_CFG_REG1_RX_FALING;
+ break;
+@@ -366,15 +346,19 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
+ tdm_rx_start_slot = 1;
++ fpol_preinv = 0;
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ tdm_rx_start_slot = 0;
++ fpol_preinv = 1;
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ tdm_rx_start_slot = 1;
++ fpol_preinv = 1;
+ break;
+ case SND_SOC_DAIFMT_LEFT_J:
+ tdm_rx_start_slot = 0;
++ fpol_preinv = 1;
+ break;
+ default:
+ dev_err(tas2770->dev,
+@@ -388,6 +372,14 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ if (ret < 0)
+ return ret;
+
++ ret = snd_soc_component_update_bits(component, TAS2770_TDM_CFG_REG0,
++ TAS2770_TDM_CFG_REG0_FPOL_MASK,
++ (fpol_preinv ^ invert_fpol)
++ ? TAS2770_TDM_CFG_REG0_FPOL_RSING
++ : TAS2770_TDM_CFG_REG0_FPOL_FALING);
++ if (ret < 0)
++ return ret;
++
+ return 0;
+ }
+
+@@ -403,21 +395,13 @@ static int tas2770_set_dai_tdm_slot(struct snd_soc_dai *dai,
+ if (tx_mask == 0 || rx_mask != 0)
+ return -EINVAL;
+
+- if (slots == 1) {
+- if (tx_mask != 1)
+- return -EINVAL;
+-
+- left_slot = 0;
+- right_slot = 0;
++ left_slot = __ffs(tx_mask);
++ tx_mask &= ~(1 << left_slot);
++ if (tx_mask == 0) {
++ right_slot = left_slot;
+ } else {
+- left_slot = __ffs(tx_mask);
+- tx_mask &= ~(1 << left_slot);
+- if (tx_mask == 0) {
+- right_slot = left_slot;
+- } else {
+- right_slot = __ffs(tx_mask);
+- tx_mask &= ~(1 << right_slot);
+- }
++ right_slot = __ffs(tx_mask);
++ tx_mask &= ~(1 << right_slot);
+ }
+
+ if (tx_mask != 0 || left_slot >= slots || right_slot >= slots)
+@@ -486,7 +470,7 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = {
+ .id = 0,
+ .playback = {
+ .stream_name = "ASI1 Playback",
+- .channels_min = 2,
++ .channels_min = 1,
+ .channels_max = 2,
+ .rates = TAS2770_RATES,
+ .formats = TAS2770_FORMATS,
+@@ -503,6 +487,8 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = {
+ },
+ };
+
++static const struct regmap_config tas2770_i2c_regmap;
++
+ static int tas2770_codec_probe(struct snd_soc_component *component)
+ {
+ struct tas2770_priv *tas2770 =
+@@ -510,10 +496,13 @@ static int tas2770_codec_probe(struct snd_soc_component *component)
+
+ tas2770->component = component;
+
+- if (tas2770->sdz_gpio)
++ if (tas2770->sdz_gpio) {
+ gpiod_set_value_cansleep(tas2770->sdz_gpio, 1);
++ usleep_range(1000, 2000);
++ }
+
+ tas2770_reset(tas2770);
++ regmap_reinit_cache(tas2770->regmap, &tas2770_i2c_regmap);
+
+ return 0;
+ }
+@@ -532,7 +521,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2770 = {
+ .probe = tas2770_codec_probe,
+ .suspend = tas2770_codec_suspend,
+ .resume = tas2770_codec_resume,
+- .set_bias_level = tas2770_set_bias_level,
+ .controls = tas2770_snd_controls,
+ .num_controls = ARRAY_SIZE(tas2770_snd_controls),
+ .dapm_widgets = tas2770_dapm_widgets,
+diff --git a/sound/soc/codecs/tas2770.h b/sound/soc/codecs/tas2770.h
+index d156666bcc552..f75f40781ab13 100644
+--- a/sound/soc/codecs/tas2770.h
++++ b/sound/soc/codecs/tas2770.h
+@@ -41,6 +41,9 @@
+ #define TAS2770_TDM_CFG_REG0_31_44_1_48KHZ 0x6
+ #define TAS2770_TDM_CFG_REG0_31_88_2_96KHZ 0x8
+ #define TAS2770_TDM_CFG_REG0_31_176_4_192KHZ 0xa
++#define TAS2770_TDM_CFG_REG0_FPOL_MASK BIT(0)
++#define TAS2770_TDM_CFG_REG0_FPOL_RSING 0
++#define TAS2770_TDM_CFG_REG0_FPOL_FALING 1
+ /* TDM Configuration Reg1 */
+ #define TAS2770_TDM_CFG_REG1 TAS2770_REG(0X0, 0x0B)
+ #define TAS2770_TDM_CFG_REG1_MASK GENMASK(5, 1)
+@@ -135,6 +138,8 @@ struct tas2770_priv {
+ struct device *dev;
+ int v_sense_slot;
+ int i_sense_slot;
++ bool dac_powered;
++ bool unmuted;
+ };
+
+ #endif /* __TAS2770__ */
+diff --git a/sound/soc/codecs/tlv320adcx140.c b/sound/soc/codecs/tlv320adcx140.c
+index 32b120d624b25..06d2502b13478 100644
+--- a/sound/soc/codecs/tlv320adcx140.c
++++ b/sound/soc/codecs/tlv320adcx140.c
+@@ -870,7 +870,7 @@ static int adcx140_configure_gpio(struct adcx140_priv *adcx140)
+
+ gpio_count = device_property_count_u32(adcx140->dev,
+ "ti,gpio-config");
+- if (gpio_count == 0)
++ if (gpio_count <= 0)
+ return 0;
+
+ if (gpio_count != ADCX140_NUM_GPIO_CFGS)
+diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c
+index d39c7d52ecfd4..9f4a629b032b8 100644
+--- a/sound/soc/codecs/tlv320aic32x4.c
++++ b/sound/soc/codecs/tlv320aic32x4.c
+@@ -49,6 +49,8 @@ struct aic32x4_priv {
+ struct aic32x4_setup_data *setup;
+ struct device *dev;
+ enum aic32x4_type type;
++
++ unsigned int fmt;
+ };
+
+ static int aic32x4_reset_adc(struct snd_soc_dapm_widget *w,
+@@ -611,6 +613,7 @@ static int aic32x4_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+ static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
+ {
+ struct snd_soc_component *component = codec_dai->component;
++ struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component);
+ u8 iface_reg_1 = 0;
+ u8 iface_reg_2 = 0;
+ u8 iface_reg_3 = 0;
+@@ -654,6 +657,8 @@ static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
+ return -EINVAL;
+ }
+
++ aic32x4->fmt = fmt;
++
+ snd_soc_component_update_bits(component, AIC32X4_IFACE1,
+ AIC32X4_IFACE1_DATATYPE_MASK |
+ AIC32X4_IFACE1_MASTER_MASK, iface_reg_1);
+@@ -758,6 +763,10 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component,
+ return -EINVAL;
+ }
+
++ /* PCM over I2S is always 2-channel */
++ if ((aic32x4->fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_I2S)
++ channels = 2;
++
+ madc = DIV_ROUND_UP((32 * adc_resource_class), aosr);
+ max_dosr = (AIC32X4_MAX_DOSR_FREQ / sample_rate / dosr_increment) *
+ dosr_increment;
+diff --git a/sound/soc/codecs/tscs454.c b/sound/soc/codecs/tscs454.c
+index 43220bb36701a..c27ca9a273e14 100644
+--- a/sound/soc/codecs/tscs454.c
++++ b/sound/soc/codecs/tscs454.c
+@@ -3120,18 +3120,17 @@ static int set_aif_sample_format(struct snd_soc_component *component,
+ unsigned int width;
+ int ret;
+
+- switch (format) {
+- case SNDRV_PCM_FORMAT_S16_LE:
++ switch (snd_pcm_format_width(format)) {
++ case 16:
+ width = FV_WL_16;
+ break;
+- case SNDRV_PCM_FORMAT_S20_3LE:
++ case 20:
+ width = FV_WL_20;
+ break;
+- case SNDRV_PCM_FORMAT_S24_3LE:
++ case 24:
+ width = FV_WL_24;
+ break;
+- case SNDRV_PCM_FORMAT_S24_LE:
+- case SNDRV_PCM_FORMAT_S32_LE:
++ case 32:
+ width = FV_WL_32;
+ break;
+ default:
+@@ -3326,6 +3325,7 @@ static const struct snd_soc_component_driver soc_component_dev_tscs454 = {
+ .num_dapm_routes = ARRAY_SIZE(tscs454_intercon),
+ .controls = tscs454_snd_controls,
+ .num_controls = ARRAY_SIZE(tscs454_snd_controls),
++ .endianness = 1,
+ };
+
+ #define TSCS454_RATES SNDRV_PCM_RATE_8000_96000
+diff --git a/sound/soc/codecs/wcd-mbhc-v2.c b/sound/soc/codecs/wcd-mbhc-v2.c
+index 405128ccb4b0e..26f26f442d7cd 100644
+--- a/sound/soc/codecs/wcd-mbhc-v2.c
++++ b/sound/soc/codecs/wcd-mbhc-v2.c
+@@ -1370,7 +1370,7 @@ struct wcd_mbhc *wcd_mbhc_init(struct snd_soc_component *component,
+ return ERR_PTR(-EINVAL);
+ }
+
+- mbhc = devm_kzalloc(dev, sizeof(*mbhc), GFP_KERNEL);
++ mbhc = kzalloc(sizeof(*mbhc), GFP_KERNEL);
+ if (!mbhc)
+ return ERR_PTR(-ENOMEM);
+
+@@ -1390,61 +1390,76 @@ struct wcd_mbhc *wcd_mbhc_init(struct snd_soc_component *component,
+
+ INIT_WORK(&mbhc->correct_plug_swch, wcd_correct_swch_plug);
+
+- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_sw_intr, NULL,
++ ret = request_threaded_irq(mbhc->intr_ids->mbhc_sw_intr, NULL,
+ wcd_mbhc_mech_plug_detect_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "mbhc sw intr", mbhc);
+ if (ret)
+- goto err;
++ goto err_free_mbhc;
+
+- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_btn_press_intr, NULL,
++ ret = request_threaded_irq(mbhc->intr_ids->mbhc_btn_press_intr, NULL,
+ wcd_mbhc_btn_press_handler,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "Button Press detect", mbhc);
+ if (ret)
+- goto err;
++ goto err_free_sw_intr;
+
+- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_btn_release_intr, NULL,
++ ret = request_threaded_irq(mbhc->intr_ids->mbhc_btn_release_intr, NULL,
+ wcd_mbhc_btn_release_handler,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "Button Release detect", mbhc);
+ if (ret)
+- goto err;
++ goto err_free_btn_press_intr;
+
+- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_hs_ins_intr, NULL,
++ ret = request_threaded_irq(mbhc->intr_ids->mbhc_hs_ins_intr, NULL,
+ wcd_mbhc_adc_hs_ins_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "Elect Insert", mbhc);
+ if (ret)
+- goto err;
++ goto err_free_btn_release_intr;
+
+ disable_irq_nosync(mbhc->intr_ids->mbhc_hs_ins_intr);
+
+- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_hs_rem_intr, NULL,
++ ret = request_threaded_irq(mbhc->intr_ids->mbhc_hs_rem_intr, NULL,
+ wcd_mbhc_adc_hs_rem_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "Elect Remove", mbhc);
+ if (ret)
+- goto err;
++ goto err_free_hs_ins_intr;
+
+ disable_irq_nosync(mbhc->intr_ids->mbhc_hs_rem_intr);
+
+- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->hph_left_ocp, NULL,
++ ret = request_threaded_irq(mbhc->intr_ids->hph_left_ocp, NULL,
+ wcd_mbhc_hphl_ocp_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "HPH_L OCP detect", mbhc);
+ if (ret)
+- goto err;
++ goto err_free_hs_rem_intr;
+
+- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->hph_right_ocp, NULL,
++ ret = request_threaded_irq(mbhc->intr_ids->hph_right_ocp, NULL,
+ wcd_mbhc_hphr_ocp_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "HPH_R OCP detect", mbhc);
+ if (ret)
+- goto err;
++ goto err_free_hph_left_ocp;
+
+ return mbhc;
+-err:
++
++err_free_hph_left_ocp:
++ free_irq(mbhc->intr_ids->hph_left_ocp, mbhc);
++err_free_hs_rem_intr:
++ free_irq(mbhc->intr_ids->mbhc_hs_rem_intr, mbhc);
++err_free_hs_ins_intr:
++ free_irq(mbhc->intr_ids->mbhc_hs_ins_intr, mbhc);
++err_free_btn_release_intr:
++ free_irq(mbhc->intr_ids->mbhc_btn_release_intr, mbhc);
++err_free_btn_press_intr:
++ free_irq(mbhc->intr_ids->mbhc_btn_press_intr, mbhc);
++err_free_sw_intr:
++ free_irq(mbhc->intr_ids->mbhc_sw_intr, mbhc);
++err_free_mbhc:
++ kfree(mbhc);
++
+ dev_err(dev, "Failed to request mbhc interrupts %d\n", ret);
+
+ return ERR_PTR(ret);
+@@ -1453,9 +1468,19 @@ EXPORT_SYMBOL(wcd_mbhc_init);
+
+ void wcd_mbhc_deinit(struct wcd_mbhc *mbhc)
+ {
++ free_irq(mbhc->intr_ids->hph_right_ocp, mbhc);
++ free_irq(mbhc->intr_ids->hph_left_ocp, mbhc);
++ free_irq(mbhc->intr_ids->mbhc_hs_rem_intr, mbhc);
++ free_irq(mbhc->intr_ids->mbhc_hs_ins_intr, mbhc);
++ free_irq(mbhc->intr_ids->mbhc_btn_release_intr, mbhc);
++ free_irq(mbhc->intr_ids->mbhc_btn_press_intr, mbhc);
++ free_irq(mbhc->intr_ids->mbhc_sw_intr, mbhc);
++
+ mutex_lock(&mbhc->lock);
+ wcd_cancel_hs_detect_plug(mbhc, &mbhc->correct_plug_swch);
+ mutex_unlock(&mbhc->lock);
++
++ kfree(mbhc);
+ }
+ EXPORT_SYMBOL(wcd_mbhc_deinit);
+
+diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c
+index d885ced34f606..075ed20e9fad8 100644
+--- a/sound/soc/codecs/wcd9335.c
++++ b/sound/soc/codecs/wcd9335.c
+@@ -1971,8 +1971,8 @@ static int wcd9335_trigger(struct snd_pcm_substream *substream, int cmd,
+ case SNDRV_PCM_TRIGGER_STOP:
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+- slim_stream_unprepare(dai_data->sruntime);
+ slim_stream_disable(dai_data->sruntime);
++ slim_stream_unprepare(dai_data->sruntime);
+ break;
+ default:
+ break;
+@@ -2252,51 +2252,42 @@ static int wcd9335_rx_hph_mode_put(struct snd_kcontrol *kc,
+
+ static const struct snd_kcontrol_new wcd9335_snd_controls[] = {
+ /* -84dB min - 40dB max */
+- SOC_SINGLE_SX_TLV("RX0 Digital Volume", WCD9335_CDC_RX0_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX1 Digital Volume", WCD9335_CDC_RX1_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX2 Digital Volume", WCD9335_CDC_RX2_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX3 Digital Volume", WCD9335_CDC_RX3_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX4 Digital Volume", WCD9335_CDC_RX4_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX5 Digital Volume", WCD9335_CDC_RX5_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX6 Digital Volume", WCD9335_CDC_RX6_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX7 Digital Volume", WCD9335_CDC_RX7_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX8 Digital Volume", WCD9335_CDC_RX8_RX_VOL_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX0 Mix Digital Volume",
+- WCD9335_CDC_RX0_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX1 Mix Digital Volume",
+- WCD9335_CDC_RX1_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX2 Mix Digital Volume",
+- WCD9335_CDC_RX2_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX3 Mix Digital Volume",
+- WCD9335_CDC_RX3_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX4 Mix Digital Volume",
+- WCD9335_CDC_RX4_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX5 Mix Digital Volume",
+- WCD9335_CDC_RX5_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX6 Mix Digital Volume",
+- WCD9335_CDC_RX6_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX7 Mix Digital Volume",
+- WCD9335_CDC_RX7_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
+- SOC_SINGLE_SX_TLV("RX8 Mix Digital Volume",
+- WCD9335_CDC_RX8_RX_VOL_MIX_CTL,
+- 0, -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX0 Digital Volume", WCD9335_CDC_RX0_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX1 Digital Volume", WCD9335_CDC_RX1_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX2 Digital Volume", WCD9335_CDC_RX2_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX3 Digital Volume", WCD9335_CDC_RX3_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX4 Digital Volume", WCD9335_CDC_RX4_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX5 Digital Volume", WCD9335_CDC_RX5_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX6 Digital Volume", WCD9335_CDC_RX6_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX7 Digital Volume", WCD9335_CDC_RX7_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX8 Digital Volume", WCD9335_CDC_RX8_RX_VOL_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX0 Mix Digital Volume", WCD9335_CDC_RX0_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX1 Mix Digital Volume", WCD9335_CDC_RX1_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX2 Mix Digital Volume", WCD9335_CDC_RX2_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX3 Mix Digital Volume", WCD9335_CDC_RX3_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX4 Mix Digital Volume", WCD9335_CDC_RX4_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX5 Mix Digital Volume", WCD9335_CDC_RX5_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX6 Mix Digital Volume", WCD9335_CDC_RX6_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX7 Mix Digital Volume", WCD9335_CDC_RX7_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
++ SOC_SINGLE_S8_TLV("RX8 Mix Digital Volume", WCD9335_CDC_RX8_RX_VOL_MIX_CTL,
++ -84, 40, digital_gain),
+ SOC_ENUM("RX INT0_1 HPF cut off", cf_int0_1_enum),
+ SOC_ENUM("RX INT0_2 HPF cut off", cf_int0_2_enum),
+ SOC_ENUM("RX INT1_1 HPF cut off", cf_int1_1_enum),
+@@ -4859,7 +4850,7 @@ static int wcd9335_codec_probe(struct snd_soc_component *component)
+
+ snd_soc_component_init_regmap(component, wcd->regmap);
+ /* Class-H Init*/
+- wcd->clsh_ctrl = wcd_clsh_ctrl_alloc(component, wcd->version);
++ wcd->clsh_ctrl = wcd_clsh_ctrl_alloc(component, WCD9335);
+ if (IS_ERR(wcd->clsh_ctrl))
+ return PTR_ERR(wcd->clsh_ctrl);
+
+diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c
+index c496b359f2f40..94ffd2ba29aef 100644
+--- a/sound/soc/codecs/wcd934x.c
++++ b/sound/soc/codecs/wcd934x.c
+@@ -1274,29 +1274,7 @@ static int wcd934x_set_sido_input_src(struct wcd934x_codec *wcd, int sido_src)
+ if (sido_src == wcd->sido_input_src)
+ return 0;
+
+- if (sido_src == SIDO_SOURCE_INTERNAL) {
+- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
+- WCD934X_ANA_BUCK_HI_ACCU_EN_MASK, 0);
+- usleep_range(100, 110);
+- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
+- WCD934X_ANA_BUCK_HI_ACCU_PRE_ENX_MASK, 0x0);
+- usleep_range(100, 110);
+- regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
+- WCD934X_ANA_RCO_BG_EN_MASK, 0);
+- usleep_range(100, 110);
+- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
+- WCD934X_ANA_BUCK_PRE_EN1_MASK,
+- WCD934X_ANA_BUCK_PRE_EN1_ENABLE);
+- usleep_range(100, 110);
+- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
+- WCD934X_ANA_BUCK_PRE_EN2_MASK,
+- WCD934X_ANA_BUCK_PRE_EN2_ENABLE);
+- usleep_range(100, 110);
+- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
+- WCD934X_ANA_BUCK_HI_ACCU_EN_MASK,
+- WCD934X_ANA_BUCK_HI_ACCU_ENABLE);
+- usleep_range(100, 110);
+- } else if (sido_src == SIDO_SOURCE_RCO_BG) {
++ if (sido_src == SIDO_SOURCE_RCO_BG) {
+ regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
+ WCD934X_ANA_RCO_BG_EN_MASK,
+ WCD934X_ANA_RCO_BG_ENABLE);
+@@ -1382,8 +1360,6 @@ static int wcd934x_disable_ana_bias_and_syclk(struct wcd934x_codec *wcd)
+ regmap_update_bits(wcd->regmap, WCD934X_CLK_SYS_MCLK_PRG,
+ WCD934X_EXT_CLK_BUF_EN_MASK |
+ WCD934X_MCLK_EN_MASK, 0x0);
+- wcd934x_set_sido_input_src(wcd, SIDO_SOURCE_INTERNAL);
+-
+ regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
+ WCD934X_ANA_BIAS_EN_MASK, 0);
+ regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
+@@ -1896,9 +1872,8 @@ static int wcd934x_hw_params(struct snd_pcm_substream *substream,
+ }
+
+ wcd->dai[dai->id].sconfig.rate = params_rate(params);
+- wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream);
+
+- return 0;
++ return wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream);
+ }
+
+ static int wcd934x_hw_free(struct snd_pcm_substream *substream,
+@@ -1938,8 +1913,8 @@ static int wcd934x_trigger(struct snd_pcm_substream *substream, int cmd,
+ case SNDRV_PCM_TRIGGER_STOP:
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+- slim_stream_unprepare(dai_data->sruntime);
+ slim_stream_disable(dai_data->sruntime);
++ slim_stream_unprepare(dai_data->sruntime);
+ break;
+ default:
+ break;
+@@ -3024,14 +2999,14 @@ static int wcd934x_hph_impedance_get(struct snd_kcontrol *kcontrol,
+ return 0;
+ }
+ static const struct snd_kcontrol_new hph_type_detect_controls[] = {
+- SOC_SINGLE_EXT("HPH Type", 0, 0, UINT_MAX, 0,
++ SOC_SINGLE_EXT("HPH Type", 0, 0, WCD_MBHC_HPH_STEREO, 0,
+ wcd934x_get_hph_type, NULL),
+ };
+
+ static const struct snd_kcontrol_new impedance_detect_controls[] = {
+- SOC_SINGLE_EXT("HPHL Impedance", 0, 0, UINT_MAX, 0,
++ SOC_SINGLE_EXT("HPHL Impedance", 0, 0, INT_MAX, 0,
+ wcd934x_hph_impedance_get, NULL),
+- SOC_SINGLE_EXT("HPHR Impedance", 0, 1, UINT_MAX, 0,
++ SOC_SINGLE_EXT("HPHR Impedance", 0, 1, INT_MAX, 0,
+ wcd934x_hph_impedance_get, NULL),
+ };
+
+@@ -3069,6 +3044,17 @@ static int wcd934x_mbhc_init(struct snd_soc_component *component)
+
+ return 0;
+ }
++
++static void wcd934x_mbhc_deinit(struct snd_soc_component *component)
++{
++ struct wcd934x_codec *wcd = snd_soc_component_get_drvdata(component);
++
++ if (!wcd->mbhc)
++ return;
++
++ wcd_mbhc_deinit(wcd->mbhc);
++}
++
+ static int wcd934x_comp_probe(struct snd_soc_component *component)
+ {
+ struct wcd934x_codec *wcd = dev_get_drvdata(component->dev);
+@@ -3102,6 +3088,7 @@ static void wcd934x_comp_remove(struct snd_soc_component *comp)
+ {
+ struct wcd934x_codec *wcd = dev_get_drvdata(comp->dev);
+
++ wcd934x_mbhc_deinit(comp);
+ wcd_clsh_ctrl_free(wcd->clsh_ctrl);
+ }
+
+@@ -3257,6 +3244,9 @@ static int wcd934x_compander_set(struct snd_kcontrol *kc,
+ int value = ucontrol->value.integer.value[0];
+ int sel;
+
++ if (wcd->comp_enabled[comp] == value)
++ return 0;
++
+ wcd->comp_enabled[comp] = value;
+ sel = value ? WCD934X_HPH_GAIN_SRC_SEL_COMPANDER :
+ WCD934X_HPH_GAIN_SRC_SEL_REGISTER;
+@@ -3280,10 +3270,10 @@ static int wcd934x_compander_set(struct snd_kcontrol *kc,
+ case COMPANDER_8:
+ break;
+ default:
+- break;
++ return 0;
+ }
+
+- return 0;
++ return 1;
+ }
+
+ static int wcd934x_rx_hph_mode_get(struct snd_kcontrol *kc,
+@@ -3306,13 +3296,16 @@ static int wcd934x_rx_hph_mode_put(struct snd_kcontrol *kc,
+
+ mode_val = ucontrol->value.enumerated.item[0];
+
++ if (mode_val == wcd->hph_mode)
++ return 0;
++
+ if (mode_val == 0) {
+ dev_err(wcd->dev, "Invalid HPH Mode, default to ClSH HiFi\n");
+ mode_val = CLS_H_LOHIFI;
+ }
+ wcd->hph_mode = mode_val;
+
+- return 0;
++ return 1;
+ }
+
+ static int slim_rx_mux_get(struct snd_kcontrol *kc,
+@@ -3327,6 +3320,31 @@ static int slim_rx_mux_get(struct snd_kcontrol *kc,
+ return 0;
+ }
+
++static int slim_rx_mux_to_dai_id(int mux)
++{
++ int aif_id;
++
++ switch (mux) {
++ case 1:
++ aif_id = AIF1_PB;
++ break;
++ case 2:
++ aif_id = AIF2_PB;
++ break;
++ case 3:
++ aif_id = AIF3_PB;
++ break;
++ case 4:
++ aif_id = AIF4_PB;
++ break;
++ default:
++ aif_id = -1;
++ break;
++ }
++
++ return aif_id;
++}
++
+ static int slim_rx_mux_put(struct snd_kcontrol *kc,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+@@ -3334,43 +3352,59 @@ static int slim_rx_mux_put(struct snd_kcontrol *kc,
+ struct wcd934x_codec *wcd = dev_get_drvdata(w->dapm->dev);
+ struct soc_enum *e = (struct soc_enum *)kc->private_value;
+ struct snd_soc_dapm_update *update = NULL;
++ struct wcd934x_slim_ch *ch, *c;
+ u32 port_id = w->shift;
++ bool found = false;
++ int mux_idx;
++ int prev_mux_idx = wcd->rx_port_value[port_id];
++ int aif_id;
+
+- if (wcd->rx_port_value[port_id] == ucontrol->value.enumerated.item[0])
+- return 0;
++ mux_idx = ucontrol->value.enumerated.item[0];
+
+- wcd->rx_port_value[port_id] = ucontrol->value.enumerated.item[0];
++ if (mux_idx == prev_mux_idx)
++ return 0;
+
+- switch (wcd->rx_port_value[port_id]) {
++ switch(mux_idx) {
+ case 0:
+- list_del_init(&wcd->rx_chs[port_id].list);
+- break;
+- case 1:
+- list_add_tail(&wcd->rx_chs[port_id].list,
+- &wcd->dai[AIF1_PB].slim_ch_list);
+- break;
+- case 2:
+- list_add_tail(&wcd->rx_chs[port_id].list,
+- &wcd->dai[AIF2_PB].slim_ch_list);
+- break;
+- case 3:
+- list_add_tail(&wcd->rx_chs[port_id].list,
+- &wcd->dai[AIF3_PB].slim_ch_list);
++ aif_id = slim_rx_mux_to_dai_id(prev_mux_idx);
++ if (aif_id < 0)
++ return 0;
++
++ list_for_each_entry_safe(ch, c, &wcd->dai[aif_id].slim_ch_list, list) {
++ if (ch->port == port_id + WCD934X_RX_START) {
++ found = true;
++ list_del_init(&ch->list);
++ break;
++ }
++ }
++ if (!found)
++ return 0;
++
+ break;
+- case 4:
+- list_add_tail(&wcd->rx_chs[port_id].list,
+- &wcd->dai[AIF4_PB].slim_ch_list);
++ case 1 ... 4:
++ aif_id = slim_rx_mux_to_dai_id(mux_idx);
++ if (aif_id < 0)
++ return 0;
++
++ if (list_empty(&wcd->rx_chs[port_id].list)) {
++ list_add_tail(&wcd->rx_chs[port_id].list,
++ &wcd->dai[aif_id].slim_ch_list);
++ } else {
++ dev_err(wcd->dev ,"SLIM_RX%d PORT is busy\n", port_id);
++ return 0;
++ }
+ break;
++
+ default:
+- dev_err(wcd->dev, "Unknown AIF %d\n",
+- wcd->rx_port_value[port_id]);
++ dev_err(wcd->dev, "Unknown AIF %d\n", mux_idx);
+ goto err;
+ }
+
++ wcd->rx_port_value[port_id] = mux_idx;
+ snd_soc_dapm_mux_update_power(w->dapm, kc, wcd->rx_port_value[port_id],
+ e, update);
+
+- return 0;
++ return 1;
+ err:
+ return -EINVAL;
+ }
+@@ -3816,6 +3850,7 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc,
+ struct soc_mixer_control *mixer =
+ (struct soc_mixer_control *)kc->private_value;
+ int enable = ucontrol->value.integer.value[0];
++ struct wcd934x_slim_ch *ch, *c;
+ int dai_id = widget->shift;
+ int port_id = mixer->shift;
+
+@@ -3823,17 +3858,32 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc,
+ if (enable == wcd->tx_port_value[port_id])
+ return 0;
+
+- wcd->tx_port_value[port_id] = enable;
+-
+- if (enable)
+- list_add_tail(&wcd->tx_chs[port_id].list,
+- &wcd->dai[dai_id].slim_ch_list);
+- else
+- list_del_init(&wcd->tx_chs[port_id].list);
++ if (enable) {
++ if (list_empty(&wcd->tx_chs[port_id].list)) {
++ list_add_tail(&wcd->tx_chs[port_id].list,
++ &wcd->dai[dai_id].slim_ch_list);
++ } else {
++ dev_err(wcd->dev ,"SLIM_TX%d PORT is busy\n", port_id);
++ return 0;
++ }
++ } else {
++ bool found = false;
++
++ list_for_each_entry_safe(ch, c, &wcd->dai[dai_id].slim_ch_list, list) {
++ if (ch->port == port_id) {
++ found = true;
++ list_del_init(&wcd->tx_chs[port_id].list);
++ break;
++ }
++ }
++ if (!found)
++ return 0;
++ }
+
++ wcd->tx_port_value[port_id] = enable;
+ snd_soc_dapm_mixer_update_power(widget->dapm, kc, enable, update);
+
+- return 0;
++ return 1;
+ }
+
+ static const struct snd_kcontrol_new aif1_slim_cap_mixer[] = {
+@@ -5826,6 +5876,7 @@ static int wcd934x_codec_parse_data(struct wcd934x_codec *wcd)
+ }
+
+ wcd->sidev = of_slim_get_device(wcd->sdev->ctrl, ifc_dev_np);
++ of_node_put(ifc_dev_np);
+ if (!wcd->sidev) {
+ dev_err(dev, "Unable to get SLIM Interface device\n");
+ return -EINVAL;
+diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
+index 52de7d14b1398..8a7c8e81cba58 100644
+--- a/sound/soc/codecs/wcd938x.c
++++ b/sound/soc/codecs/wcd938x.c
+@@ -1174,6 +1174,9 @@ static bool wcd938x_readonly_register(struct device *dev, unsigned int reg)
+ case WCD938X_DIGITAL_INTR_STATUS_0:
+ case WCD938X_DIGITAL_INTR_STATUS_1:
+ case WCD938X_DIGITAL_INTR_STATUS_2:
++ case WCD938X_DIGITAL_INTR_CLEAR_0:
++ case WCD938X_DIGITAL_INTR_CLEAR_1:
++ case WCD938X_DIGITAL_INTR_CLEAR_2:
+ case WCD938X_DIGITAL_SWR_HM_TEST_0:
+ case WCD938X_DIGITAL_SWR_HM_TEST_1:
+ case WCD938X_DIGITAL_EFUSE_T_DATA_0:
+@@ -1429,14 +1432,10 @@ static int wcd938x_sdw_connect_port(struct wcd938x_sdw_ch_info *ch_info,
+ return 0;
+ }
+
+-static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 ch_id, u8 enable)
++static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 port_num, u8 ch_id, u8 enable)
+ {
+- u8 port_num;
+-
+- port_num = wcd->ch_info[ch_id].port_num;
+-
+ return wcd938x_sdw_connect_port(&wcd->ch_info[ch_id],
+- &wcd->port_config[port_num],
++ &wcd->port_config[port_num - 1],
+ enable);
+ }
+
+@@ -2505,7 +2504,7 @@ static int wcd938x_tx_mode_get(struct snd_kcontrol *kcontrol,
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ int path = e->shift_l;
+
+- ucontrol->value.integer.value[0] = wcd938x->tx_mode[path];
++ ucontrol->value.enumerated.item[0] = wcd938x->tx_mode[path];
+
+ return 0;
+ }
+@@ -2518,6 +2517,9 @@ static int wcd938x_tx_mode_put(struct snd_kcontrol *kcontrol,
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ int path = e->shift_l;
+
++ if (wcd938x->tx_mode[path] == ucontrol->value.enumerated.item[0])
++ return 0;
++
+ wcd938x->tx_mode[path] = ucontrol->value.enumerated.item[0];
+
+ return 1;
+@@ -2529,7 +2531,7 @@ static int wcd938x_rx_hph_mode_get(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
+- ucontrol->value.integer.value[0] = wcd938x->hph_mode;
++ ucontrol->value.enumerated.item[0] = wcd938x->hph_mode;
+
+ return 0;
+ }
+@@ -2540,6 +2542,9 @@ static int wcd938x_rx_hph_mode_put(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
++ if (wcd938x->hph_mode == ucontrol->value.enumerated.item[0])
++ return 0;
++
+ wcd938x->hph_mode = ucontrol->value.enumerated.item[0];
+
+ return 1;
+@@ -2560,7 +2565,7 @@ static int wcd938x_ear_pa_put_gain(struct snd_kcontrol *kcontrol,
+ WCD938X_EAR_GAIN_MASK,
+ ucontrol->value.integer.value[0]);
+
+- return 0;
++ return 1;
+ }
+
+ static int wcd938x_get_compander(struct snd_kcontrol *kcontrol,
+@@ -2590,6 +2595,7 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol,
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+ struct wcd938x_sdw_priv *wcd;
+ int value = ucontrol->value.integer.value[0];
++ int portidx;
+ struct soc_mixer_control *mc;
+ bool hphr;
+
+@@ -2603,12 +2609,14 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol,
+ else
+ wcd938x->comp1_enable = value;
+
++ portidx = wcd->ch_info[mc->reg].port_num;
++
+ if (value)
+- wcd938x_connect_port(wcd, mc->reg, true);
++ wcd938x_connect_port(wcd, portidx, mc->reg, true);
+ else
+- wcd938x_connect_port(wcd, mc->reg, false);
++ wcd938x_connect_port(wcd, portidx, mc->reg, false);
+
+- return 0;
++ return 1;
+ }
+
+ static int wcd938x_ldoh_get(struct snd_kcontrol *kcontrol,
+@@ -2628,6 +2636,9 @@ static int wcd938x_ldoh_put(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
++ if (wcd938x->ldoh == ucontrol->value.integer.value[0])
++ return 0;
++
+ wcd938x->ldoh = ucontrol->value.integer.value[0];
+
+ return 1;
+@@ -2650,6 +2661,9 @@ static int wcd938x_bcs_put(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
++ if (wcd938x->bcs_dis == ucontrol->value.integer.value[0])
++ return 0;
++
+ wcd938x->bcs_dis = ucontrol->value.integer.value[0];
+
+ return 1;
+@@ -2879,9 +2893,11 @@ static int wcd938x_get_swr_port(struct snd_kcontrol *kcontrol,
+ struct wcd938x_sdw_priv *wcd;
+ struct soc_mixer_control *mixer = (struct soc_mixer_control *)kcontrol->private_value;
+ int dai_id = mixer->shift;
+- int portidx = mixer->reg;
++ int portidx, ch_idx = mixer->reg;
++
+
+ wcd = wcd938x->sdw_priv[dai_id];
++ portidx = wcd->ch_info[ch_idx].port_num;
+
+ ucontrol->value.integer.value[0] = wcd->port_enable[portidx];
+
+@@ -2896,12 +2912,14 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol,
+ struct wcd938x_sdw_priv *wcd;
+ struct soc_mixer_control *mixer =
+ (struct soc_mixer_control *)kcontrol->private_value;
+- int portidx = mixer->reg;
++ int ch_idx = mixer->reg;
++ int portidx;
+ int dai_id = mixer->shift;
+ bool enable;
+
+ wcd = wcd938x->sdw_priv[dai_id];
+
++ portidx = wcd->ch_info[ch_idx].port_num;
+ if (ucontrol->value.integer.value[0])
+ enable = true;
+ else
+@@ -2909,9 +2927,9 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol,
+
+ wcd->port_enable[portidx] = enable;
+
+- wcd938x_connect_port(wcd, portidx, enable);
++ wcd938x_connect_port(wcd, portidx, ch_idx, enable);
+
+- return 0;
++ return 1;
+
+ }
+
+@@ -3571,14 +3589,14 @@ static int wcd938x_hph_impedance_get(struct snd_kcontrol *kcontrol,
+ }
+
+ static const struct snd_kcontrol_new hph_type_detect_controls[] = {
+- SOC_SINGLE_EXT("HPH Type", 0, 0, UINT_MAX, 0,
++ SOC_SINGLE_EXT("HPH Type", 0, 0, WCD_MBHC_HPH_STEREO, 0,
+ wcd938x_get_hph_type, NULL),
+ };
+
+ static const struct snd_kcontrol_new impedance_detect_controls[] = {
+- SOC_SINGLE_EXT("HPHL Impedance", 0, 0, UINT_MAX, 0,
++ SOC_SINGLE_EXT("HPHL Impedance", 0, 0, INT_MAX, 0,
+ wcd938x_hph_impedance_get, NULL),
+- SOC_SINGLE_EXT("HPHR Impedance", 0, 1, UINT_MAX, 0,
++ SOC_SINGLE_EXT("HPHR Impedance", 0, 1, INT_MAX, 0,
+ wcd938x_hph_impedance_get, NULL),
+ };
+
+@@ -3603,6 +3621,8 @@ static int wcd938x_mbhc_init(struct snd_soc_component *component)
+ WCD938X_IRQ_HPHR_OCP_INT);
+
+ wcd938x->wcd_mbhc = wcd_mbhc_init(component, &mbhc_cb, intr_ids, wcd_mbhc_fields, true);
++ if (IS_ERR(wcd938x->wcd_mbhc))
++ return PTR_ERR(wcd938x->wcd_mbhc);
+
+ snd_soc_add_component_controls(component, impedance_detect_controls,
+ ARRAY_SIZE(impedance_detect_controls));
+@@ -3611,6 +3631,14 @@ static int wcd938x_mbhc_init(struct snd_soc_component *component)
+
+ return 0;
+ }
++
++static void wcd938x_mbhc_deinit(struct snd_soc_component *component)
++{
++ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
++
++ wcd_mbhc_deinit(wcd938x->wcd_mbhc);
++}
++
+ /* END MBHC */
+
+ static const struct snd_kcontrol_new wcd938x_snd_controls[] = {
+@@ -4058,16 +4086,33 @@ static int wcd938x_irq_init(struct wcd938x_priv *wcd, struct device *dev)
+ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
+ {
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
++ struct sdw_slave *tx_sdw_dev = wcd938x->tx_sdw_dev;
+ struct device *dev = component->dev;
++ unsigned long time_left;
+ int ret, i;
+
++ time_left = wait_for_completion_timeout(&tx_sdw_dev->initialization_complete,
++ msecs_to_jiffies(2000));
++ if (!time_left) {
++ dev_err(dev, "soundwire device init timeout\n");
++ return -ETIMEDOUT;
++ }
++
+ snd_soc_component_init_regmap(component, wcd938x->regmap);
+
++ ret = pm_runtime_resume_and_get(dev);
++ if (ret < 0)
++ return ret;
++
+ wcd938x->variant = snd_soc_component_read_field(component,
+ WCD938X_DIGITAL_EFUSE_REG_0,
+ WCD938X_ID_MASK);
+
+ wcd938x->clsh_info = wcd_clsh_ctrl_alloc(component, WCD938X);
++ if (IS_ERR(wcd938x->clsh_info)) {
++ pm_runtime_put(dev);
++ return PTR_ERR(wcd938x->clsh_info);
++ }
+
+ wcd938x_io_init(wcd938x);
+ /* Set all interrupts as edge triggered */
+@@ -4076,6 +4121,8 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
+ (WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0);
+ }
+
++ pm_runtime_put(dev);
++
+ wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
+ WCD938X_IRQ_HPHR_PDM_WD_INT);
+ wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
+@@ -4087,20 +4134,26 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
+ ret = request_threaded_irq(wcd938x->hphr_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "HPHR PDM WD INT", wcd938x);
+- if (ret)
++ if (ret) {
+ dev_err(dev, "Failed to request HPHR WD interrupt (%d)\n", ret);
++ goto err_free_clsh_ctrl;
++ }
+
+ ret = request_threaded_irq(wcd938x->hphl_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "HPHL PDM WD INT", wcd938x);
+- if (ret)
++ if (ret) {
+ dev_err(dev, "Failed to request HPHL WD interrupt (%d)\n", ret);
++ goto err_free_hphr_pdm_wd_int;
++ }
+
+ ret = request_threaded_irq(wcd938x->aux_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
+ IRQF_ONESHOT | IRQF_TRIGGER_RISING,
+ "AUX PDM WD INT", wcd938x);
+- if (ret)
++ if (ret) {
+ dev_err(dev, "Failed to request Aux WD interrupt (%d)\n", ret);
++ goto err_free_hphl_pdm_wd_int;
++ }
+
+ /* Disable watchdog interrupt for HPH and AUX */
+ disable_irq_nosync(wcd938x->hphr_pdm_wd_int);
+@@ -4115,7 +4168,7 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
+ dev_err(component->dev,
+ "%s: Failed to add snd ctrls for variant: %d\n",
+ __func__, wcd938x->variant);
+- goto err;
++ goto err_free_aux_pdm_wd_int;
+ }
+ break;
+ case WCD9385:
+@@ -4125,7 +4178,7 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
+ dev_err(component->dev,
+ "%s: Failed to add snd ctrls for variant: %d\n",
+ __func__, wcd938x->variant);
+- goto err;
++ goto err_free_aux_pdm_wd_int;
+ }
+ break;
+ default:
+@@ -4133,12 +4186,38 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
+ }
+
+ ret = wcd938x_mbhc_init(component);
+- if (ret)
++ if (ret) {
+ dev_err(component->dev, "mbhc initialization failed\n");
+-err:
++ goto err_free_aux_pdm_wd_int;
++ }
++
++ return 0;
++
++err_free_aux_pdm_wd_int:
++ free_irq(wcd938x->aux_pdm_wd_int, wcd938x);
++err_free_hphl_pdm_wd_int:
++ free_irq(wcd938x->hphl_pdm_wd_int, wcd938x);
++err_free_hphr_pdm_wd_int:
++ free_irq(wcd938x->hphr_pdm_wd_int, wcd938x);
++err_free_clsh_ctrl:
++ wcd_clsh_ctrl_free(wcd938x->clsh_info);
++
+ return ret;
+ }
+
++static void wcd938x_soc_codec_remove(struct snd_soc_component *component)
++{
++ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
++
++ wcd938x_mbhc_deinit(component);
++
++ free_irq(wcd938x->aux_pdm_wd_int, wcd938x);
++ free_irq(wcd938x->hphl_pdm_wd_int, wcd938x);
++ free_irq(wcd938x->hphr_pdm_wd_int, wcd938x);
++
++ wcd_clsh_ctrl_free(wcd938x->clsh_info);
++}
++
+ static int wcd938x_codec_set_jack(struct snd_soc_component *comp,
+ struct snd_soc_jack *jack, void *data)
+ {
+@@ -4155,6 +4234,7 @@ static int wcd938x_codec_set_jack(struct snd_soc_component *comp,
+ static const struct snd_soc_component_driver soc_codec_dev_wcd938x = {
+ .name = "wcd938x_codec",
+ .probe = wcd938x_soc_codec_probe,
++ .remove = wcd938x_soc_codec_remove,
+ .controls = wcd938x_snd_controls,
+ .num_controls = ARRAY_SIZE(wcd938x_snd_controls),
+ .dapm_widgets = wcd938x_dapm_widgets,
+@@ -4284,7 +4364,7 @@ static int wcd938x_codec_set_sdw_stream(struct snd_soc_dai *dai,
+ static const struct snd_soc_dai_ops wcd938x_sdw_dai_ops = {
+ .hw_params = wcd938x_codec_hw_params,
+ .hw_free = wcd938x_codec_free,
+- .set_sdw_stream = wcd938x_codec_set_sdw_stream,
++ .set_stream = wcd938x_codec_set_sdw_stream,
+ };
+
+ static struct snd_soc_dai_driver wcd938x_dais[] = {
+diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c
+index 72e165cc64439..97ece3114b3dc 100644
+--- a/sound/soc/codecs/wm2000.c
++++ b/sound/soc/codecs/wm2000.c
+@@ -536,7 +536,7 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000,
+ {
+ struct i2c_client *i2c = wm2000->i2c;
+ int i, j;
+- int ret;
++ int ret = 0;
+
+ if (wm2000->anc_mode == mode)
+ return 0;
+@@ -566,13 +566,13 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000,
+ ret = anc_transitions[i].step[j](i2c,
+ anc_transitions[i].analogue);
+ if (ret != 0)
+- return ret;
++ break;
+ }
+
+ if (anc_transitions[i].dest == ANC_OFF)
+ clk_disable_unprepare(wm2000->mclk);
+
+- return 0;
++ return ret;
+ }
+
+ static int wm2000_anc_set_mode(struct wm2000_priv *wm2000)
+diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c
+index 5c2d45d05c975..7c6e01720d651 100644
+--- a/sound/soc/codecs/wm5110.c
++++ b/sound/soc/codecs/wm5110.c
+@@ -413,6 +413,7 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol,
+ unsigned int rnew = (!!ucontrol->value.integer.value[1]) << mc->rshift;
+ unsigned int lold, rold;
+ unsigned int lena, rena;
++ bool change = false;
+ int ret;
+
+ snd_soc_dapm_mutex_lock(dapm);
+@@ -440,8 +441,8 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol,
+ goto err;
+ }
+
+- ret = regmap_update_bits(arizona->regmap, ARIZONA_DRE_ENABLE,
+- mask, lnew | rnew);
++ ret = regmap_update_bits_check(arizona->regmap, ARIZONA_DRE_ENABLE,
++ mask, lnew | rnew, &change);
+ if (ret) {
+ dev_err(arizona->dev, "Failed to set DRE: %d\n", ret);
+ goto err;
+@@ -454,6 +455,9 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol,
+ if (!rnew && rold)
+ wm5110_clear_pga_volume(arizona, mc->rshift);
+
++ if (change)
++ ret = 1;
++
+ err:
+ snd_soc_dapm_mutex_unlock(dapm);
+
+diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
+index 15d42ce3b21d6..41504ce2a682f 100644
+--- a/sound/soc/codecs/wm8350.c
++++ b/sound/soc/codecs/wm8350.c
+@@ -1537,18 +1537,38 @@ static int wm8350_component_probe(struct snd_soc_component *component)
+ wm8350_clear_bits(wm8350, WM8350_JACK_DETECT,
+ WM8350_JDL_ENA | WM8350_JDR_ENA);
+
+- wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L,
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L,
+ wm8350_hpl_jack_handler, 0, "Left jack detect",
+ priv);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R,
++ if (ret != 0)
++ goto err;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R,
+ wm8350_hpr_jack_handler, 0, "Right jack detect",
+ priv);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD,
++ if (ret != 0)
++ goto free_jck_det_l;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD,
+ wm8350_mic_handler, 0, "Microphone short", priv);
+- wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD,
++ if (ret != 0)
++ goto free_jck_det_r;
++
++ ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD,
+ wm8350_mic_handler, 0, "Microphone detect", priv);
++ if (ret != 0)
++ goto free_micscd;
+
+ return 0;
++
++free_micscd:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, priv);
++free_jck_det_r:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, priv);
++free_jck_det_l:
++ wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, priv);
++err:
++ return ret;
+ }
+
+ static void wm8350_component_remove(struct snd_soc_component *component)
+diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c
+index dcee7b2bd3d79..859ebcec83838 100644
+--- a/sound/soc/codecs/wm8731.c
++++ b/sound/soc/codecs/wm8731.c
+@@ -602,7 +602,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731)
+ ret = wm8731_reset(wm8731->regmap);
+ if (ret < 0) {
+ dev_err(dev, "Failed to issue reset: %d\n", ret);
+- goto err_regulator_enable;
++ goto err;
+ }
+
+ /* Clear POWEROFF, keep everything else disabled */
+@@ -619,10 +619,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731)
+
+ regcache_mark_dirty(wm8731->regmap);
+
+-err_regulator_enable:
+- /* Regulators will be enabled by bias management */
+- regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies);
+-
++err:
+ return ret;
+ }
+
+@@ -766,21 +763,27 @@ static int wm8731_i2c_probe(struct i2c_client *i2c,
+ ret = PTR_ERR(wm8731->regmap);
+ dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+ ret);
+- return ret;
++ goto err_regulator_enable;
+ }
+
+ ret = wm8731_hw_init(&i2c->dev, wm8731);
+ if (ret != 0)
+- return ret;
++ goto err_regulator_enable;
+
+ ret = devm_snd_soc_register_component(&i2c->dev,
+ &soc_component_dev_wm8731, &wm8731_dai, 1);
+ if (ret != 0) {
+ dev_err(&i2c->dev, "Failed to register CODEC: %d\n", ret);
+- return ret;
++ goto err_regulator_enable;
+ }
+
+ return 0;
++
++err_regulator_enable:
++ /* Regulators will be enabled by bias management */
++ regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies);
++
++ return ret;
+ }
+
+ static int wm8731_i2c_remove(struct i2c_client *client)
+diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
+index a02a77fef360b..7aed412b19c7d 100644
+--- a/sound/soc/codecs/wm8904.c
++++ b/sound/soc/codecs/wm8904.c
+@@ -697,6 +697,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+ int dcs_mask;
+ int dcs_l, dcs_r;
+ int dcs_l_reg, dcs_r_reg;
++ int an_out_reg;
+ int timeout;
+ int pwr_reg;
+
+@@ -712,6 +713,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+ dcs_mask = WM8904_DCS_ENA_CHAN_0 | WM8904_DCS_ENA_CHAN_1;
+ dcs_r_reg = WM8904_DC_SERVO_8;
+ dcs_l_reg = WM8904_DC_SERVO_9;
++ an_out_reg = WM8904_ANALOGUE_OUT1_LEFT;
+ dcs_l = 0;
+ dcs_r = 1;
+ break;
+@@ -720,6 +722,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+ dcs_mask = WM8904_DCS_ENA_CHAN_2 | WM8904_DCS_ENA_CHAN_3;
+ dcs_r_reg = WM8904_DC_SERVO_6;
+ dcs_l_reg = WM8904_DC_SERVO_7;
++ an_out_reg = WM8904_ANALOGUE_OUT2_LEFT;
+ dcs_l = 2;
+ dcs_r = 3;
+ break;
+@@ -792,6 +795,10 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
+ snd_soc_component_update_bits(component, reg,
+ WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP,
+ WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP);
++
++ /* Update volume, requires PGA to be powered */
++ val = snd_soc_component_read(component, an_out_reg);
++ snd_soc_component_write(component, an_out_reg, val);
+ break;
+
+ case SND_SOC_DAPM_POST_PMU:
+@@ -2299,6 +2306,9 @@ static int wm8904_i2c_probe(struct i2c_client *i2c,
+ regmap_update_bits(wm8904->regmap, WM8904_BIAS_CONTROL_0,
+ WM8904_POBCTRL, 0);
+
++ /* Fill the cache for the ADC test register */
++ regmap_read(wm8904->regmap, WM8904_ADC_TEST_0, &val);
++
+ /* Can leave the device powered off until we need it */
+ regcache_cache_only(wm8904->regmap, true);
+ regulator_bulk_disable(ARRAY_SIZE(wm8904->supplies), wm8904->supplies);
+diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
+index e4018ba3b19a2..7878c7a58ff10 100644
+--- a/sound/soc/codecs/wm8958-dsp2.c
++++ b/sound/soc/codecs/wm8958-dsp2.c
+@@ -530,7 +530,7 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol,
+
+ wm8958_dsp_apply(component, mbc, wm8994->mbc_ena[mbc]);
+
+- return 0;
++ return 1;
+ }
+
+ #define WM8958_MBC_SWITCH(xname, xval) {\
+@@ -656,7 +656,7 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol,
+
+ wm8958_dsp_apply(component, vss, wm8994->vss_ena[vss]);
+
+- return 0;
++ return 1;
+ }
+
+
+@@ -730,7 +730,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
+
+ wm8958_dsp_apply(component, hpf % 3, ucontrol->value.integer.value[0]);
+
+- return 0;
++ return 1;
+ }
+
+ #define WM8958_HPF_SWITCH(xname, xval) {\
+@@ -824,7 +824,7 @@ static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol,
+
+ wm8958_dsp_apply(component, eq, ucontrol->value.integer.value[0]);
+
+- return 0;
++ return 1;
+ }
+
+ #define WM8958_ENH_EQ_SWITCH(xname, xval) {\
+diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
+index ba16bdf9e478c..779f7097d336c 100644
+--- a/sound/soc/codecs/wm8962.c
++++ b/sound/soc/codecs/wm8962.c
+@@ -1840,6 +1840,49 @@ SOC_SINGLE_TLV("SPKOUTR Mixer DACR Volume", WM8962_SPEAKER_MIXER_5,
+ 4, 1, 0, inmix_tlv),
+ };
+
++static int tp_event(struct snd_soc_dapm_widget *w,
++ struct snd_kcontrol *kcontrol, int event)
++{
++ int ret, reg, val, mask;
++ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
++
++ ret = pm_runtime_resume_and_get(component->dev);
++ if (ret < 0) {
++ dev_err(component->dev, "Failed to resume device: %d\n", ret);
++ return ret;
++ }
++
++ reg = WM8962_ADDITIONAL_CONTROL_4;
++
++ if (!strcmp(w->name, "TEMP_HP")) {
++ mask = WM8962_TEMP_ENA_HP_MASK;
++ val = WM8962_TEMP_ENA_HP;
++ } else if (!strcmp(w->name, "TEMP_SPK")) {
++ mask = WM8962_TEMP_ENA_SPK_MASK;
++ val = WM8962_TEMP_ENA_SPK;
++ } else {
++ pm_runtime_put(component->dev);
++ return -EINVAL;
++ }
++
++ switch (event) {
++ case SND_SOC_DAPM_POST_PMD:
++ val = 0;
++ fallthrough;
++ case SND_SOC_DAPM_POST_PMU:
++ ret = snd_soc_component_update_bits(component, reg, mask, val);
++ break;
++ default:
++ WARN(1, "Invalid event %d\n", event);
++ pm_runtime_put(component->dev);
++ return -EINVAL;
++ }
++
++ pm_runtime_put(component->dev);
++
++ return 0;
++}
++
+ static int cp_event(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+ {
+@@ -2133,8 +2176,10 @@ SND_SOC_DAPM_SUPPLY("TOCLK", WM8962_ADDITIONAL_CONTROL_1, 0, 0, NULL, 0),
+ SND_SOC_DAPM_SUPPLY_S("DSP2", 1, WM8962_DSP2_POWER_MANAGEMENT,
+ WM8962_DSP2_ENA_SHIFT, 0, dsp2_event,
+ SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+-SND_SOC_DAPM_SUPPLY("TEMP_HP", WM8962_ADDITIONAL_CONTROL_4, 2, 0, NULL, 0),
+-SND_SOC_DAPM_SUPPLY("TEMP_SPK", WM8962_ADDITIONAL_CONTROL_4, 1, 0, NULL, 0),
++SND_SOC_DAPM_SUPPLY("TEMP_HP", SND_SOC_NOPM, 0, 0, tp_event,
++ SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
++SND_SOC_DAPM_SUPPLY("TEMP_SPK", SND_SOC_NOPM, 0, 0, tp_event,
++ SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+
+ SND_SOC_DAPM_MIXER("INPGAL", WM8962_LEFT_INPUT_PGA_CONTROL, 4, 0,
+ inpgal, ARRAY_SIZE(inpgal)),
+@@ -2445,6 +2490,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component)
+ snd_soc_component_update_bits(component, WM8962_CLOCKING2,
+ WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA);
+
++ /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate
++ * correct frequency of LRCLK and BCLK. Sometimes the read-only value
++ * can't be updated timely after enabling SYSCLK. This results in wrong
++ * calculation values. Delay is introduced here to wait for newest
++ * value from register. The time of the delay should be at least
++ * 500~1000us according to test.
++ */
++ usleep_range(500, 1000);
+ dspclk = snd_soc_component_read(component, WM8962_CLOCKING1);
+
+ if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON)
+@@ -3763,6 +3816,11 @@ static int wm8962_i2c_probe(struct i2c_client *i2c,
+ if (ret < 0)
+ goto err_pm_runtime;
+
++ regmap_update_bits(wm8962->regmap, WM8962_ADDITIONAL_CONTROL_4,
++ WM8962_TEMP_ENA_HP_MASK, 0);
++ regmap_update_bits(wm8962->regmap, WM8962_ADDITIONAL_CONTROL_4,
++ WM8962_TEMP_ENA_SPK_MASK, 0);
++
+ regcache_cache_only(wm8962->regmap, true);
+
+ /* The drivers should power up as needed */
+@@ -3867,6 +3925,7 @@ static int wm8962_runtime_suspend(struct device *dev)
+ #endif
+
+ static const struct dev_pm_ops wm8962_pm = {
++ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(wm8962_runtime_suspend, wm8962_runtime_resume, NULL)
+ };
+
+diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
+index f117ec0c489f0..6759db92f6c46 100644
+--- a/sound/soc/codecs/wm8994.c
++++ b/sound/soc/codecs/wm8994.c
+@@ -3853,7 +3853,12 @@ static irqreturn_t wm1811_jackdet_irq(int irq, void *data)
+ } else {
+ dev_dbg(component->dev, "Jack not detected\n");
+
++ /* Release wm8994->accdet_lock to avoid deadlock:
++ * cancel_delayed_work_sync() takes wm8994->mic_work internal
++ * lock and wm1811_mic_work takes wm8994->accdet_lock */
++ mutex_unlock(&wm8994->accdet_lock);
+ cancel_delayed_work_sync(&wm8994->mic_work);
++ mutex_lock(&wm8994->accdet_lock);
+
+ snd_soc_component_update_bits(component, WM8958_MICBIAS2,
+ WM8958_MICB2_DISCH, WM8958_MICB2_DISCH);
+diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
+index f7c800927cb2f..08fc1a025b1a9 100644
+--- a/sound/soc/codecs/wm_adsp.c
++++ b/sound/soc/codecs/wm_adsp.c
+@@ -794,7 +794,7 @@ int wm_adsp_fw_put(struct snd_kcontrol *kcontrol,
+ struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+ struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+ struct wm_adsp *dsp = snd_soc_component_get_drvdata(component);
+- int ret = 0;
++ int ret = 1;
+
+ if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw)
+ return 0;
+diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c
+index 2da4a5fa7a18d..8559047694873 100644
+--- a/sound/soc/codecs/wsa881x.c
++++ b/sound/soc/codecs/wsa881x.c
+@@ -646,7 +646,6 @@ static struct regmap_config wsa881x_regmap_config = {
+ .readable_reg = wsa881x_readable_register,
+ .reg_format_endian = REGMAP_ENDIAN_NATIVE,
+ .val_format_endian = REGMAP_ENDIAN_NATIVE,
+- .can_multi_write = true,
+ };
+
+ enum {
+@@ -772,7 +771,8 @@ static int wsa881x_put_pa_gain(struct snd_kcontrol *kc,
+
+ usleep_range(1000, 1010);
+ }
+- return 0;
++
++ return 1;
+ }
+
+ static int wsa881x_get_port(struct snd_kcontrol *kcontrol,
+@@ -816,15 +816,22 @@ static int wsa881x_set_port(struct snd_kcontrol *kcontrol,
+ (struct soc_mixer_control *)kcontrol->private_value;
+ int portidx = mixer->reg;
+
+- if (ucontrol->value.integer.value[0])
++ if (ucontrol->value.integer.value[0]) {
++ if (data->port_enable[portidx])
++ return 0;
++
+ data->port_enable[portidx] = true;
+- else
++ } else {
++ if (!data->port_enable[portidx])
++ return 0;
++
+ data->port_enable[portidx] = false;
++ }
+
+ if (portidx == WSA881X_PORT_BOOST) /* Boost Switch */
+ wsa881x_boost_ctrl(comp, data->port_enable[portidx]);
+
+- return 0;
++ return 1;
+ }
+
+ static const char * const smart_boost_lvl_text[] = {
+@@ -1018,7 +1025,7 @@ static const struct snd_soc_dai_ops wsa881x_dai_ops = {
+ .hw_params = wsa881x_hw_params,
+ .hw_free = wsa881x_hw_free,
+ .mute_stream = wsa881x_digital_mute,
+- .set_sdw_stream = wsa881x_set_sdw_stream,
++ .set_stream = wsa881x_set_sdw_stream,
+ };
+
+ static struct snd_soc_dai_driver wsa881x_dais[] = {
+diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c
+index 33ce257ae1986..feed7281ba215 100644
+--- a/sound/soc/dwc/dwc-i2s.c
++++ b/sound/soc/dwc/dwc-i2s.c
+@@ -132,13 +132,13 @@ static irqreturn_t i2s_irq_handler(int irq, void *dev_id)
+
+ /* Error Handling: TX */
+ if (isr[i] & ISR_TXFO) {
+- dev_err(dev->dev, "TX overrun (ch_id=%d)\n", i);
++ dev_err_ratelimited(dev->dev, "TX overrun (ch_id=%d)\n", i);
+ irq_valid = true;
+ }
+
+ /* Error Handling: TX */
+ if (isr[i] & ISR_RXFO) {
+- dev_err(dev->dev, "RX overrun (ch_id=%d)\n", i);
++ dev_err_ratelimited(dev->dev, "RX overrun (ch_id=%d)\n", i);
+ irq_valid = true;
+ }
+ }
+@@ -183,30 +183,6 @@ static void i2s_stop(struct dw_i2s_dev *dev,
+ }
+ }
+
+-static int dw_i2s_startup(struct snd_pcm_substream *substream,
+- struct snd_soc_dai *cpu_dai)
+-{
+- struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(cpu_dai);
+- union dw_i2s_snd_dma_data *dma_data = NULL;
+-
+- if (!(dev->capability & DWC_I2S_RECORD) &&
+- (substream->stream == SNDRV_PCM_STREAM_CAPTURE))
+- return -EINVAL;
+-
+- if (!(dev->capability & DWC_I2S_PLAY) &&
+- (substream->stream == SNDRV_PCM_STREAM_PLAYBACK))
+- return -EINVAL;
+-
+- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+- dma_data = &dev->play_dma_data;
+- else if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+- dma_data = &dev->capture_dma_data;
+-
+- snd_soc_dai_set_dma_data(cpu_dai, substream, (void *)dma_data);
+-
+- return 0;
+-}
+-
+ static void dw_i2s_config(struct dw_i2s_dev *dev, int stream)
+ {
+ u32 ch_reg;
+@@ -305,12 +281,6 @@ static int dw_i2s_hw_params(struct snd_pcm_substream *substream,
+ return 0;
+ }
+
+-static void dw_i2s_shutdown(struct snd_pcm_substream *substream,
+- struct snd_soc_dai *dai)
+-{
+- snd_soc_dai_set_dma_data(dai, substream, NULL);
+-}
+-
+ static int dw_i2s_prepare(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+ {
+@@ -382,8 +352,6 @@ static int dw_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt)
+ }
+
+ static const struct snd_soc_dai_ops dw_i2s_dai_ops = {
+- .startup = dw_i2s_startup,
+- .shutdown = dw_i2s_shutdown,
+ .hw_params = dw_i2s_hw_params,
+ .prepare = dw_i2s_prepare,
+ .trigger = dw_i2s_trigger,
+@@ -403,9 +371,13 @@ static int dw_i2s_runtime_suspend(struct device *dev)
+ static int dw_i2s_runtime_resume(struct device *dev)
+ {
+ struct dw_i2s_dev *dw_dev = dev_get_drvdata(dev);
++ int ret;
+
+- if (dw_dev->capability & DW_I2S_MASTER)
+- clk_enable(dw_dev->clk);
++ if (dw_dev->capability & DW_I2S_MASTER) {
++ ret = clk_enable(dw_dev->clk);
++ if (ret)
++ return ret;
++ }
+ return 0;
+ }
+
+@@ -422,10 +394,13 @@ static int dw_i2s_resume(struct snd_soc_component *component)
+ {
+ struct dw_i2s_dev *dev = snd_soc_component_get_drvdata(component);
+ struct snd_soc_dai *dai;
+- int stream;
++ int stream, ret;
+
+- if (dev->capability & DW_I2S_MASTER)
+- clk_enable(dev->clk);
++ if (dev->capability & DW_I2S_MASTER) {
++ ret = clk_enable(dev->clk);
++ if (ret)
++ return ret;
++ }
+
+ for_each_component_dais(component, dai) {
+ for_each_pcm_streams(stream)
+@@ -617,6 +592,14 @@ static int dw_configure_dai_by_dt(struct dw_i2s_dev *dev,
+
+ }
+
++static int dw_i2s_dai_probe(struct snd_soc_dai *dai)
++{
++ struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(dai);
++
++ snd_soc_dai_init_dma_data(dai, &dev->play_dma_data, &dev->capture_dma_data);
++ return 0;
++}
++
+ static int dw_i2s_probe(struct platform_device *pdev)
+ {
+ const struct i2s_platform_data *pdata = pdev->dev.platform_data;
+@@ -635,6 +618,7 @@ static int dw_i2s_probe(struct platform_device *pdev)
+ return -ENOMEM;
+
+ dw_i2s_dai->ops = &dw_i2s_dai_ops;
++ dw_i2s_dai->probe = dw_i2s_dai_probe;
+
+ dev->i2s_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(dev->i2s_base))
+diff --git a/sound/soc/fsl/eukrea-tlv320.c b/sound/soc/fsl/eukrea-tlv320.c
+index e13271ea84ded..29cf9234984d9 100644
+--- a/sound/soc/fsl/eukrea-tlv320.c
++++ b/sound/soc/fsl/eukrea-tlv320.c
+@@ -86,7 +86,7 @@ static int eukrea_tlv320_probe(struct platform_device *pdev)
+ int ret;
+ int int_port = 0, ext_port;
+ struct device_node *np = pdev->dev.of_node;
+- struct device_node *ssi_np = NULL, *codec_np = NULL;
++ struct device_node *ssi_np = NULL, *codec_np = NULL, *tmp_np = NULL;
+
+ eukrea_tlv320.dev = &pdev->dev;
+ if (np) {
+@@ -143,7 +143,7 @@ static int eukrea_tlv320_probe(struct platform_device *pdev)
+ }
+
+ if (machine_is_eukrea_cpuimx27() ||
+- of_find_compatible_node(NULL, NULL, "fsl,imx21-audmux")) {
++ (tmp_np = of_find_compatible_node(NULL, NULL, "fsl,imx21-audmux"))) {
+ imx_audmux_v1_configure_port(MX27_AUDMUX_HPCR1_SSI0,
+ IMX_AUDMUX_V1_PCR_SYN |
+ IMX_AUDMUX_V1_PCR_TFSDIR |
+@@ -158,10 +158,11 @@ static int eukrea_tlv320_probe(struct platform_device *pdev)
+ IMX_AUDMUX_V1_PCR_SYN |
+ IMX_AUDMUX_V1_PCR_RXDSEL(MX27_AUDMUX_HPCR1_SSI0)
+ );
++ of_node_put(tmp_np);
+ } else if (machine_is_eukrea_cpuimx25sd() ||
+ machine_is_eukrea_cpuimx35sd() ||
+ machine_is_eukrea_cpuimx51sd() ||
+- of_find_compatible_node(NULL, NULL, "fsl,imx31-audmux")) {
++ (tmp_np = of_find_compatible_node(NULL, NULL, "fsl,imx31-audmux"))) {
+ if (!np)
+ ext_port = machine_is_eukrea_cpuimx25sd() ?
+ 4 : 3;
+@@ -178,6 +179,7 @@ static int eukrea_tlv320_probe(struct platform_device *pdev)
+ IMX_AUDMUX_V2_PTCR_SYN,
+ IMX_AUDMUX_V2_PDCR_RXDSEL(int_port)
+ );
++ of_node_put(tmp_np);
+ } else {
+ if (np) {
+ /* The eukrea,asoc-tlv320 driver was explicitly
+diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
+index 06107ae46e20b..5000d779aade2 100644
+--- a/sound/soc/fsl/fsl-asoc-card.c
++++ b/sound/soc/fsl/fsl-asoc-card.c
+@@ -120,11 +120,11 @@ static const struct snd_soc_dapm_route audio_map[] = {
+
+ static const struct snd_soc_dapm_route audio_map_ac97[] = {
+ /* 1st half -- Normal DAPM routes */
+- {"Playback", NULL, "AC97 Playback"},
+- {"AC97 Capture", NULL, "Capture"},
++ {"AC97 Playback", NULL, "CPU AC97 Playback"},
++ {"CPU AC97 Capture", NULL, "AC97 Capture"},
+ /* 2nd half -- ASRC DAPM routes */
+- {"AC97 Playback", NULL, "ASRC-Playback"},
+- {"ASRC-Capture", NULL, "AC97 Capture"},
++ {"CPU AC97 Playback", NULL, "ASRC-Playback"},
++ {"ASRC-Capture", NULL, "CPU AC97 Capture"},
+ };
+
+ static const struct snd_soc_dapm_route audio_map_tx[] = {
+@@ -540,6 +540,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
+ struct device *codec_dev = NULL;
+ const char *codec_dai_name;
+ const char *codec_dev_name;
++ u32 asrc_fmt = 0;
+ u32 width;
+ int ret;
+
+@@ -817,8 +818,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
+ goto asrc_fail;
+ }
+
+- ret = of_property_read_u32(asrc_np, "fsl,asrc-format",
+- &priv->asrc_format);
++ ret = of_property_read_u32(asrc_np, "fsl,asrc-format", &asrc_fmt);
++ priv->asrc_format = (__force snd_pcm_format_t)asrc_fmt;
+ if (ret) {
+ /* Fallback to old binding; translate to asrc_format */
+ ret = of_property_read_u32(asrc_np, "fsl,asrc-width",
+@@ -842,8 +843,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
+
+ ret = devm_snd_soc_register_card(&pdev->dev, &priv->card);
+ if (ret) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret);
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n");
+ goto asrc_fail;
+ }
+
+diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
+index 24b41881a68f8..08ca410ef551b 100644
+--- a/sound/soc/fsl/fsl_asrc.c
++++ b/sound/soc/fsl/fsl_asrc.c
+@@ -19,6 +19,7 @@
+ #include "fsl_asrc.h"
+
+ #define IDEAL_RATIO_DECIMAL_DEPTH 26
++#define DIVIDER_NUM 64
+
+ #define pair_err(fmt, ...) \
+ dev_err(&asrc->pdev->dev, "Pair %c: " fmt, 'A' + index, ##__VA_ARGS__)
+@@ -101,6 +102,55 @@ static unsigned char clk_map_imx8qxp[2][ASRC_CLK_MAP_LEN] = {
+ },
+ };
+
++/*
++ * According to RM, the divider range is 1 ~ 8,
++ * prescaler is power of 2 from 1 ~ 128.
++ */
++static int asrc_clk_divider[DIVIDER_NUM] = {
++ 1, 2, 4, 8, 16, 32, 64, 128, /* divider = 1 */
++ 2, 4, 8, 16, 32, 64, 128, 256, /* divider = 2 */
++ 3, 6, 12, 24, 48, 96, 192, 384, /* divider = 3 */
++ 4, 8, 16, 32, 64, 128, 256, 512, /* divider = 4 */
++ 5, 10, 20, 40, 80, 160, 320, 640, /* divider = 5 */
++ 6, 12, 24, 48, 96, 192, 384, 768, /* divider = 6 */
++ 7, 14, 28, 56, 112, 224, 448, 896, /* divider = 7 */
++ 8, 16, 32, 64, 128, 256, 512, 1024, /* divider = 8 */
++};
++
++/*
++ * Check if the divider is available for internal ratio mode
++ */
++static bool fsl_asrc_divider_avail(int clk_rate, int rate, int *div)
++{
++ u32 rem, i;
++ u64 n;
++
++ if (div)
++ *div = 0;
++
++ if (clk_rate == 0 || rate == 0)
++ return false;
++
++ n = clk_rate;
++ rem = do_div(n, rate);
++
++ if (div)
++ *div = n;
++
++ if (rem != 0)
++ return false;
++
++ for (i = 0; i < DIVIDER_NUM; i++) {
++ if (n == asrc_clk_divider[i])
++ break;
++ }
++
++ if (i == DIVIDER_NUM)
++ return false;
++
++ return true;
++}
++
+ /**
+ * fsl_asrc_sel_proc - Select the pre-processing and post-processing options
+ * @inrate: input sample rate
+@@ -330,12 +380,12 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
+ enum asrc_word_width input_word_width;
+ enum asrc_word_width output_word_width;
+ u32 inrate, outrate, indiv, outdiv;
+- u32 clk_index[2], div[2], rem[2];
++ u32 clk_index[2], div[2];
+ u64 clk_rate;
+ int in, out, channels;
+ int pre_proc, post_proc;
+ struct clk *clk;
+- bool ideal;
++ bool ideal, div_avail;
+
+ if (!config) {
+ pair_err("invalid pair config\n");
+@@ -415,8 +465,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
+ clk = asrc_priv->asrck_clk[clk_index[ideal ? OUT : IN]];
+
+ clk_rate = clk_get_rate(clk);
+- rem[IN] = do_div(clk_rate, inrate);
+- div[IN] = (u32)clk_rate;
++ div_avail = fsl_asrc_divider_avail(clk_rate, inrate, &div[IN]);
+
+ /*
+ * The divider range is [1, 1024], defined by the hardware. For non-
+@@ -425,7 +474,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
+ * only result in different converting speeds. So remainder does not
+ * matter, as long as we keep the divider within its valid range.
+ */
+- if (div[IN] == 0 || (!ideal && (div[IN] > 1024 || rem[IN] != 0))) {
++ if (div[IN] == 0 || (!ideal && !div_avail)) {
+ pair_err("failed to support input sample rate %dHz by asrck_%x\n",
+ inrate, clk_index[ideal ? OUT : IN]);
+ return -EINVAL;
+@@ -436,13 +485,12 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
+ clk = asrc_priv->asrck_clk[clk_index[OUT]];
+ clk_rate = clk_get_rate(clk);
+ if (ideal && use_ideal_rate)
+- rem[OUT] = do_div(clk_rate, IDEAL_RATIO_RATE);
++ div_avail = fsl_asrc_divider_avail(clk_rate, IDEAL_RATIO_RATE, &div[OUT]);
+ else
+- rem[OUT] = do_div(clk_rate, outrate);
+- div[OUT] = clk_rate;
++ div_avail = fsl_asrc_divider_avail(clk_rate, outrate, &div[OUT]);
+
+ /* Output divider has the same limitation as the input one */
+- if (div[OUT] == 0 || (!ideal && (div[OUT] > 1024 || rem[OUT] != 0))) {
++ if (div[OUT] == 0 || (!ideal && !div_avail)) {
+ pair_err("failed to support output sample rate %dHz by asrck_%x\n",
+ outrate, clk_index[OUT]);
+ return -EINVAL;
+@@ -621,8 +669,7 @@ static void fsl_asrc_select_clk(struct fsl_asrc_priv *asrc_priv,
+ clk_index = asrc_priv->clk_map[j][i];
+ clk_rate = clk_get_rate(asrc_priv->asrck_clk[clk_index]);
+ /* Only match a perfect clock source with no remainder */
+- if (clk_rate != 0 && (clk_rate / rate[j]) <= 1024 &&
+- (clk_rate % rate[j]) == 0)
++ if (fsl_asrc_divider_avail(clk_rate, rate[j], NULL))
+ break;
+ }
+
+@@ -1019,6 +1066,7 @@ static int fsl_asrc_probe(struct platform_device *pdev)
+ struct resource *res;
+ void __iomem *regs;
+ int irq, ret, i;
++ u32 asrc_fmt = 0;
+ u32 map_idx;
+ char tmp[16];
+ u32 width;
+@@ -1127,7 +1175,8 @@ static int fsl_asrc_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- ret = of_property_read_u32(np, "fsl,asrc-format", &asrc->asrc_format);
++ ret = of_property_read_u32(np, "fsl,asrc-format", &asrc_fmt);
++ asrc->asrc_format = (__force snd_pcm_format_t)asrc_fmt;
+ if (ret) {
+ ret = of_property_read_u32(np, "fsl,asrc-width", &width);
+ if (ret) {
+@@ -1150,7 +1199,7 @@ static int fsl_asrc_probe(struct platform_device *pdev)
+ }
+ }
+
+- if (!(FSL_ASRC_FORMATS & (1ULL << asrc->asrc_format))) {
++ if (!(FSL_ASRC_FORMATS & pcm_format_to_bits(asrc->asrc_format))) {
+ dev_warn(&pdev->dev, "unsupported width, use default S24_LE\n");
+ asrc->asrc_format = SNDRV_PCM_FORMAT_S24_LE;
+ }
+@@ -1177,7 +1226,7 @@ static int fsl_asrc_probe(struct platform_device *pdev)
+ }
+
+ ret = pm_runtime_put_sync(&pdev->dev);
+- if (ret < 0)
++ if (ret < 0 && ret != -ENOSYS)
+ goto err_pm_get_sync;
+
+ ret = devm_snd_soc_register_component(&pdev->dev, &fsl_asrc_component,
+diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c
+index cd9b36ec0ecb9..79dd5a9b8f481 100644
+--- a/sound/soc/fsl/fsl_asrc_dma.c
++++ b/sound/soc/fsl/fsl_asrc_dma.c
+@@ -208,14 +208,19 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
+ be_chan = soc_component_to_pcm(component_be)->chan[substream->stream];
+ tmp_chan = be_chan;
+ }
+- if (!tmp_chan)
+- tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
++ if (!tmp_chan) {
++ tmp_chan = dma_request_chan(dev_be, tx ? "tx" : "rx");
++ if (IS_ERR(tmp_chan)) {
++ dev_err(dev, "failed to request DMA channel for Back-End\n");
++ return -EINVAL;
++ }
++ }
+
+ /*
+ * An EDMA DEV_TO_DEV channel is fixed and bound with DMA event of each
+ * peripheral, unlike SDMA channel that is allocated dynamically. So no
+ * need to configure dma_request and dma_request2, but get dma_chan of
+- * Back-End device directly via dma_request_slave_channel.
++ * Back-End device directly via dma_request_chan.
+ */
+ if (!asrc->use_edma) {
+ /* Get DMA request of Back-End */
+diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c
+index be14f84796cb4..cf0e10d17dbe3 100644
+--- a/sound/soc/fsl/fsl_easrc.c
++++ b/sound/soc/fsl/fsl_easrc.c
+@@ -476,7 +476,8 @@ static int fsl_easrc_prefilter_config(struct fsl_asrc *easrc,
+ struct fsl_asrc_pair *ctx;
+ struct device *dev;
+ u32 inrate, outrate, offset = 0;
+- u32 in_s_rate, out_s_rate, in_s_fmt, out_s_fmt;
++ u32 in_s_rate, out_s_rate;
++ snd_pcm_format_t in_s_fmt, out_s_fmt;
+ int ret, i;
+
+ if (!easrc)
+@@ -1873,6 +1874,7 @@ static int fsl_easrc_probe(struct platform_device *pdev)
+ struct resource *res;
+ struct device_node *np;
+ void __iomem *regs;
++ u32 asrc_fmt = 0;
+ int ret, irq;
+
+ easrc = devm_kzalloc(dev, sizeof(*easrc), GFP_KERNEL);
+@@ -1933,13 +1935,14 @@ static int fsl_easrc_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- ret = of_property_read_u32(np, "fsl,asrc-format", &easrc->asrc_format);
++ ret = of_property_read_u32(np, "fsl,asrc-format", &asrc_fmt);
++ easrc->asrc_format = (__force snd_pcm_format_t)asrc_fmt;
+ if (ret) {
+ dev_err(dev, "failed to asrc format\n");
+ return ret;
+ }
+
+- if (!(FSL_EASRC_FORMATS & (1ULL << easrc->asrc_format))) {
++ if (!(FSL_EASRC_FORMATS & (pcm_format_to_bits(easrc->asrc_format)))) {
+ dev_warn(dev, "unsupported format, switching to S24_LE\n");
+ easrc->asrc_format = SNDRV_PCM_FORMAT_S24_LE;
+ }
+diff --git a/sound/soc/fsl/fsl_easrc.h b/sound/soc/fsl/fsl_easrc.h
+index 30620d56252cc..5b8469757c122 100644
+--- a/sound/soc/fsl/fsl_easrc.h
++++ b/sound/soc/fsl/fsl_easrc.h
+@@ -569,7 +569,7 @@ struct fsl_easrc_io_params {
+ unsigned int access_len;
+ unsigned int fifo_wtmk;
+ unsigned int sample_rate;
+- unsigned int sample_format;
++ snd_pcm_format_t sample_format;
+ unsigned int norm_rate;
+ };
+
+diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c
+index bda66b30e063c..763f5f0592af1 100644
+--- a/sound/soc/fsl/fsl_esai.c
++++ b/sound/soc/fsl/fsl_esai.c
+@@ -1070,7 +1070,7 @@ static int fsl_esai_probe(struct platform_device *pdev)
+ regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0);
+
+ ret = pm_runtime_put_sync(&pdev->dev);
+- if (ret < 0)
++ if (ret < 0 && ret != -ENOSYS)
+ goto err_pm_get_sync;
+
+ /*
+diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c
+index 9f90989ac59a6..acc820da46ebf 100644
+--- a/sound/soc/fsl/fsl_micfil.c
++++ b/sound/soc/fsl/fsl_micfil.c
+@@ -88,21 +88,21 @@ static DECLARE_TLV_DB_SCALE(gain_tlv, 0, 100, 0);
+
+ static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = {
+ SOC_SINGLE_SX_TLV("CH0 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(0), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(0), 0x8, 0xF, gain_tlv),
+ SOC_SINGLE_SX_TLV("CH1 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(1), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(1), 0x8, 0xF, gain_tlv),
+ SOC_SINGLE_SX_TLV("CH2 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(2), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(2), 0x8, 0xF, gain_tlv),
+ SOC_SINGLE_SX_TLV("CH3 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(3), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(3), 0x8, 0xF, gain_tlv),
+ SOC_SINGLE_SX_TLV("CH4 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(4), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(4), 0x8, 0xF, gain_tlv),
+ SOC_SINGLE_SX_TLV("CH5 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(5), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(5), 0x8, 0xF, gain_tlv),
+ SOC_SINGLE_SX_TLV("CH6 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(6), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(6), 0x8, 0xF, gain_tlv),
+ SOC_SINGLE_SX_TLV("CH7 Volume", REG_MICFIL_OUT_CTRL,
+- MICFIL_OUTGAIN_CHX_SHIFT(7), 0xF, 0x7, gain_tlv),
++ MICFIL_OUTGAIN_CHX_SHIFT(7), 0x8, 0xF, gain_tlv),
+ SOC_ENUM_EXT("MICFIL Quality Select",
+ fsl_micfil_quality_enum,
+ snd_soc_get_enum_double, snd_soc_put_enum_double),
+@@ -191,6 +191,25 @@ static int fsl_micfil_reset(struct device *dev)
+ return ret;
+ }
+
++ /*
++ * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined
++ * as non-volatile register, so SRES still remain in regmap
++ * cache after set, that every update of REG_MICFIL_CTRL1,
++ * software reset happens. so clear it explicitly.
++ */
++ ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1,
++ MICFIL_CTRL1_SRES);
++ if (ret)
++ return ret;
++
++ /*
++ * Set SRES should clear CHnF flags, But even add delay here
++ * the CHnF may not be cleared sometimes, so clear CHnF explicitly.
++ */
++ ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF);
++ if (ret)
++ return ret;
++
+ return 0;
+ }
+
+@@ -744,7 +763,7 @@ static int fsl_micfil_probe(struct platform_device *pdev)
+ ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to pcm register\n");
+- return ret;
++ goto err_pm_disable;
+ }
+
+ ret = devm_snd_soc_register_component(&pdev->dev, &fsl_micfil_component,
+@@ -752,9 +771,20 @@ static int fsl_micfil_probe(struct platform_device *pdev)
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register component %s\n",
+ fsl_micfil_component.name);
++ goto err_pm_disable;
+ }
+
+ return ret;
++
++err_pm_disable:
++ pm_runtime_disable(&pdev->dev);
++
++ return ret;
++}
++
++static void fsl_micfil_remove(struct platform_device *pdev)
++{
++ pm_runtime_disable(&pdev->dev);
+ }
+
+ static int __maybe_unused fsl_micfil_runtime_suspend(struct device *dev)
+@@ -815,6 +845,7 @@ static const struct dev_pm_ops fsl_micfil_pm_ops = {
+
+ static struct platform_driver fsl_micfil_driver = {
+ .probe = fsl_micfil_probe,
++ .remove_new = fsl_micfil_remove,
+ .driver = {
+ .name = "fsl-micfil-dai",
+ .pm = &fsl_micfil_pm_ops,
+diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c
+index 69aeb0e71844d..c33439650823b 100644
+--- a/sound/soc/fsl/fsl_mqs.c
++++ b/sound/soc/fsl/fsl_mqs.c
+@@ -204,10 +204,10 @@ static int fsl_mqs_probe(struct platform_device *pdev)
+ }
+
+ mqs_priv->regmap = syscon_node_to_regmap(gpr_np);
++ of_node_put(gpr_np);
+ if (IS_ERR(mqs_priv->regmap)) {
+ dev_err(&pdev->dev, "failed to get gpr regmap\n");
+- ret = PTR_ERR(mqs_priv->regmap);
+- goto err_free_gpr_np;
++ return PTR_ERR(mqs_priv->regmap);
+ }
+ } else {
+ regs = devm_platform_ioremap_resource(pdev, 0);
+@@ -236,8 +236,7 @@ static int fsl_mqs_probe(struct platform_device *pdev)
+ if (IS_ERR(mqs_priv->mclk)) {
+ dev_err(&pdev->dev, "failed to get the clock: %ld\n",
+ PTR_ERR(mqs_priv->mclk));
+- ret = PTR_ERR(mqs_priv->mclk);
+- goto err_free_gpr_np;
++ return PTR_ERR(mqs_priv->mclk);
+ }
+
+ dev_set_drvdata(&pdev->dev, mqs_priv);
+@@ -246,13 +245,9 @@ static int fsl_mqs_probe(struct platform_device *pdev)
+ ret = devm_snd_soc_register_component(&pdev->dev, &soc_codec_fsl_mqs,
+ &fsl_mqs_dai, 1);
+ if (ret)
+- goto err_free_gpr_np;
+- return 0;
+-
+-err_free_gpr_np:
+- of_node_put(gpr_np);
++ return ret;
+
+- return ret;
++ return 0;
+ }
+
+ static int fsl_mqs_remove(struct platform_device *pdev)
+@@ -337,4 +332,4 @@ module_platform_driver(fsl_mqs_driver);
+ MODULE_AUTHOR("Shengjiu Wang <Shengjiu.Wang@nxp.com>");
+ MODULE_DESCRIPTION("MQS codec driver");
+ MODULE_LICENSE("GPL v2");
+-MODULE_ALIAS("platform: fsl-mqs");
++MODULE_ALIAS("platform:fsl-mqs");
+diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
+index 38f6362099d58..59dffa5ff34f4 100644
+--- a/sound/soc/fsl/fsl_sai.c
++++ b/sound/soc/fsl/fsl_sai.c
+@@ -231,6 +231,7 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai,
+ if (!sai->is_lsb_first)
+ val_cr4 |= FSL_SAI_CR4_MF;
+
++ sai->is_dsp_mode = false;
+ /* DAI mode */
+ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+ case SND_SOC_DAIFMT_I2S:
+@@ -297,23 +298,23 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai,
+ return -EINVAL;
+ }
+
+- /* DAI clock master masks */
+- switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+- case SND_SOC_DAIFMT_CBS_CFS:
++ /* DAI clock provider masks */
++ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
++ case SND_SOC_DAIFMT_CBC_CFC:
+ val_cr2 |= FSL_SAI_CR2_BCD_MSTR;
+ val_cr4 |= FSL_SAI_CR4_FSD_MSTR;
+- sai->is_slave_mode = false;
++ sai->is_consumer_mode = false;
+ break;
+- case SND_SOC_DAIFMT_CBM_CFM:
+- sai->is_slave_mode = true;
++ case SND_SOC_DAIFMT_CBP_CFP:
++ sai->is_consumer_mode = true;
+ break;
+- case SND_SOC_DAIFMT_CBS_CFM:
++ case SND_SOC_DAIFMT_CBC_CFP:
+ val_cr2 |= FSL_SAI_CR2_BCD_MSTR;
+- sai->is_slave_mode = false;
++ sai->is_consumer_mode = false;
+ break;
+- case SND_SOC_DAIFMT_CBM_CFS:
++ case SND_SOC_DAIFMT_CBP_CFC:
+ val_cr4 |= FSL_SAI_CR4_FSD_MSTR;
+- sai->is_slave_mode = true;
++ sai->is_consumer_mode = true;
+ break;
+ default:
+ return -EINVAL;
+@@ -356,8 +357,8 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq)
+ u32 id;
+ int ret = 0;
+
+- /* Don't apply to slave mode */
+- if (sai->is_slave_mode)
++ /* Don't apply to consumer mode */
++ if (sai->is_consumer_mode)
+ return 0;
+
+ /*
+@@ -462,7 +463,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
+
+ pins = DIV_ROUND_UP(channels, slots);
+
+- if (!sai->is_slave_mode) {
++ if (!sai->is_consumer_mode) {
+ if (sai->bclk_ratio)
+ ret = fsl_sai_set_bclk(cpu_dai, tx,
+ sai->bclk_ratio *
+@@ -502,12 +503,12 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
+ val_cr4 |= FSL_SAI_CR4_CHMOD;
+
+ /*
+- * For SAI master mode, when Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will
++ * For SAI provider mode, when Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will
+ * generate bclk and frame clock for Tx(Rx), we should set RCR4(TCR4),
+ * RCR5(TCR5) for playback(capture), or there will be sync error.
+ */
+
+- if (!sai->is_slave_mode && fsl_sai_dir_is_synced(sai, adir)) {
++ if (!sai->is_consumer_mode && fsl_sai_dir_is_synced(sai, adir)) {
+ regmap_update_bits(sai->regmap, FSL_SAI_xCR4(!tx, ofs),
+ FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK |
+ FSL_SAI_CR4_CHMOD_MASK,
+@@ -543,7 +544,7 @@ static int fsl_sai_hw_free(struct snd_pcm_substream *substream,
+ regmap_update_bits(sai->regmap, FSL_SAI_xCR3(tx, ofs),
+ FSL_SAI_CR3_TRCE_MASK, 0);
+
+- if (!sai->is_slave_mode &&
++ if (!sai->is_consumer_mode &&
+ sai->mclk_streams & BIT(substream->stream)) {
+ clk_disable_unprepare(sai->mclk_clk[sai->mclk_id[tx]]);
+ sai->mclk_streams &= ~BIT(substream->stream);
+@@ -559,7 +560,7 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir)
+ u32 xcsr, count = 100;
+
+ regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs),
+- FSL_SAI_CSR_TERE, 0);
++ FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE, 0);
+
+ /* TERE will remain set till the end of current frame */
+ do {
+@@ -577,7 +578,7 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir)
+ * This is a hardware bug, and will be fix in the
+ * next sai version.
+ */
+- if (!sai->is_slave_mode) {
++ if (!sai->is_consumer_mode) {
+ /* Software Reset */
+ regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR);
+ /* Clear SR bit to finish the reset */
+@@ -1000,6 +1001,7 @@ static int fsl_sai_runtime_resume(struct device *dev);
+ static int fsl_sai_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
++ struct device *dev = &pdev->dev;
+ struct fsl_sai *sai;
+ struct regmap *gpr;
+ struct resource *res;
+@@ -1008,12 +1010,12 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ int irq, ret, i;
+ int index;
+
+- sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL);
++ sai = devm_kzalloc(dev, sizeof(*sai), GFP_KERNEL);
+ if (!sai)
+ return -ENOMEM;
+
+ sai->pdev = pdev;
+- sai->soc_data = of_device_get_match_data(&pdev->dev);
++ sai->soc_data = of_device_get_match_data(dev);
+
+ sai->is_lsb_first = of_property_read_bool(np, "lsb-first");
+
+@@ -1028,18 +1030,18 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ ARRAY_SIZE(fsl_sai_reg_defaults_ofs8);
+ }
+
+- sai->regmap = devm_regmap_init_mmio(&pdev->dev, base, &fsl_sai_regmap_config);
++ sai->regmap = devm_regmap_init_mmio(dev, base, &fsl_sai_regmap_config);
+ if (IS_ERR(sai->regmap)) {
+- dev_err(&pdev->dev, "regmap init failed\n");
++ dev_err(dev, "regmap init failed\n");
+ return PTR_ERR(sai->regmap);
+ }
+
+- sai->bus_clk = devm_clk_get(&pdev->dev, "bus");
++ sai->bus_clk = devm_clk_get(dev, "bus");
+ /* Compatible with old DTB cases */
+ if (IS_ERR(sai->bus_clk) && PTR_ERR(sai->bus_clk) != -EPROBE_DEFER)
+- sai->bus_clk = devm_clk_get(&pdev->dev, "sai");
++ sai->bus_clk = devm_clk_get(dev, "sai");
+ if (IS_ERR(sai->bus_clk)) {
+- dev_err(&pdev->dev, "failed to get bus clock: %ld\n",
++ dev_err(dev, "failed to get bus clock: %ld\n",
+ PTR_ERR(sai->bus_clk));
+ /* -EPROBE_DEFER */
+ return PTR_ERR(sai->bus_clk);
+@@ -1047,9 +1049,9 @@ static int fsl_sai_probe(struct platform_device *pdev)
+
+ for (i = 1; i < FSL_SAI_MCLK_MAX; i++) {
+ sprintf(tmp, "mclk%d", i);
+- sai->mclk_clk[i] = devm_clk_get(&pdev->dev, tmp);
++ sai->mclk_clk[i] = devm_clk_get(dev, tmp);
+ if (IS_ERR(sai->mclk_clk[i])) {
+- dev_err(&pdev->dev, "failed to get mclk%d clock: %ld\n",
++ dev_err(dev, "failed to get mclk%d clock: %ld\n",
+ i + 1, PTR_ERR(sai->mclk_clk[i]));
+ sai->mclk_clk[i] = NULL;
+ }
+@@ -1064,10 +1066,10 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ if (irq < 0)
+ return irq;
+
+- ret = devm_request_irq(&pdev->dev, irq, fsl_sai_isr, IRQF_SHARED,
++ ret = devm_request_irq(dev, irq, fsl_sai_isr, IRQF_SHARED,
+ np->name, sai);
+ if (ret) {
+- dev_err(&pdev->dev, "failed to claim irq %u\n", irq);
++ dev_err(dev, "failed to claim irq %u\n", irq);
+ return ret;
+ }
+
+@@ -1084,7 +1086,7 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ if (of_find_property(np, "fsl,sai-synchronous-rx", NULL) &&
+ of_find_property(np, "fsl,sai-asynchronous", NULL)) {
+ /* error out if both synchronous and asynchronous are present */
+- dev_err(&pdev->dev, "invalid binding for synchronous mode\n");
++ dev_err(dev, "invalid binding for synchronous mode\n");
+ return -EINVAL;
+ }
+
+@@ -1105,7 +1107,7 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ of_device_is_compatible(np, "fsl,imx6ul-sai")) {
+ gpr = syscon_regmap_lookup_by_compatible("fsl,imx6ul-iomuxc-gpr");
+ if (IS_ERR(gpr)) {
+- dev_err(&pdev->dev, "cannot find iomuxc registers\n");
++ dev_err(dev, "cannot find iomuxc registers\n");
+ return PTR_ERR(gpr);
+ }
+
+@@ -1123,23 +1125,23 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ sai->dma_params_tx.maxburst = FSL_SAI_MAXBURST_TX;
+
+ platform_set_drvdata(pdev, sai);
+- pm_runtime_enable(&pdev->dev);
+- if (!pm_runtime_enabled(&pdev->dev)) {
+- ret = fsl_sai_runtime_resume(&pdev->dev);
++ pm_runtime_enable(dev);
++ if (!pm_runtime_enabled(dev)) {
++ ret = fsl_sai_runtime_resume(dev);
+ if (ret)
+ goto err_pm_disable;
+ }
+
+- ret = pm_runtime_get_sync(&pdev->dev);
++ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+- pm_runtime_put_noidle(&pdev->dev);
++ pm_runtime_put_noidle(dev);
+ goto err_pm_get_sync;
+ }
+
+ /* Get sai version */
+- ret = fsl_sai_check_version(&pdev->dev);
++ ret = fsl_sai_check_version(dev);
+ if (ret < 0)
+- dev_warn(&pdev->dev, "Error reading SAI version: %d\n", ret);
++ dev_warn(dev, "Error reading SAI version: %d\n", ret);
+
+ /* Select MCLK direction */
+ if (of_find_property(np, "fsl,sai-mclk-direction-output", NULL) &&
+@@ -1148,8 +1150,8 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ FSL_SAI_MCTL_MCLK_EN, FSL_SAI_MCTL_MCLK_EN);
+ }
+
+- ret = pm_runtime_put_sync(&pdev->dev);
+- if (ret < 0)
++ ret = pm_runtime_put_sync(dev);
++ if (ret < 0 && ret != -ENOSYS)
+ goto err_pm_get_sync;
+
+ /*
+@@ -1161,12 +1163,12 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ if (ret)
+ goto err_pm_get_sync;
+ } else {
+- ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
++ ret = devm_snd_dmaengine_pcm_register(dev, NULL, 0);
+ if (ret)
+ goto err_pm_get_sync;
+ }
+
+- ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component,
++ ret = devm_snd_soc_register_component(dev, &fsl_component,
+ &sai->cpu_dai_drv, 1);
+ if (ret)
+ goto err_pm_get_sync;
+@@ -1174,10 +1176,10 @@ static int fsl_sai_probe(struct platform_device *pdev)
+ return ret;
+
+ err_pm_get_sync:
+- if (!pm_runtime_status_suspended(&pdev->dev))
+- fsl_sai_runtime_suspend(&pdev->dev);
++ if (!pm_runtime_status_suspended(dev))
++ fsl_sai_runtime_suspend(dev);
+ err_pm_disable:
+- pm_runtime_disable(&pdev->dev);
++ pm_runtime_disable(dev);
+
+ return ret;
+ }
+diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
+index bc60030967dd8..f8c9a8fb78984 100644
+--- a/sound/soc/fsl/fsl_sai.h
++++ b/sound/soc/fsl/fsl_sai.h
+@@ -80,13 +80,14 @@
+ #define FSL_SAI_xCR3(tx, ofs) (tx ? FSL_SAI_TCR3(ofs) : FSL_SAI_RCR3(ofs))
+ #define FSL_SAI_xCR4(tx, ofs) (tx ? FSL_SAI_TCR4(ofs) : FSL_SAI_RCR4(ofs))
+ #define FSL_SAI_xCR5(tx, ofs) (tx ? FSL_SAI_TCR5(ofs) : FSL_SAI_RCR5(ofs))
+-#define FSL_SAI_xDR(tx, ofs) (tx ? FSL_SAI_TDR(ofs) : FSL_SAI_RDR(ofs))
+-#define FSL_SAI_xFR(tx, ofs) (tx ? FSL_SAI_TFR(ofs) : FSL_SAI_RFR(ofs))
++#define FSL_SAI_xDR0(tx) (tx ? FSL_SAI_TDR0 : FSL_SAI_RDR0)
++#define FSL_SAI_xFR0(tx) (tx ? FSL_SAI_TFR0 : FSL_SAI_RFR0)
+ #define FSL_SAI_xMR(tx) (tx ? FSL_SAI_TMR : FSL_SAI_RMR)
+
+ /* SAI Transmit/Receive Control Register */
+ #define FSL_SAI_CSR_TERE BIT(31)
+ #define FSL_SAI_CSR_SE BIT(30)
++#define FSL_SAI_CSR_BCE BIT(28)
+ #define FSL_SAI_CSR_FR BIT(25)
+ #define FSL_SAI_CSR_SR BIT(24)
+ #define FSL_SAI_CSR_xF_SHIFT 16
+@@ -259,7 +260,7 @@ struct fsl_sai {
+ struct clk *bus_clk;
+ struct clk *mclk_clk[FSL_SAI_MCLK_MAX];
+
+- bool is_slave_mode;
++ bool is_consumer_mode;
+ bool is_lsb_first;
+ bool is_dsp_mode;
+ bool synchronous[2];
+diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
+index 1c53719bb61e2..5b107f2555ddb 100644
+--- a/sound/soc/fsl/fsl_spdif.c
++++ b/sound/soc/fsl/fsl_spdif.c
+@@ -598,6 +598,8 @@ static void fsl_spdif_shutdown(struct snd_pcm_substream *substream,
+ mask = SCR_TXFIFO_AUTOSYNC_MASK | SCR_TXFIFO_CTRL_MASK |
+ SCR_TXSEL_MASK | SCR_USRC_SEL_MASK |
+ SCR_TXFIFO_FSEL_MASK;
++ /* Disable TX clock */
++ regmap_update_bits(regmap, REG_SPDIF_STC, STC_TXCLK_ALL_EN_MASK, 0);
+ } else {
+ scr = SCR_RXFIFO_OFF | SCR_RXFIFO_CTL_ZERO;
+ mask = SCR_RXFIFO_FSEL_MASK | SCR_RXFIFO_AUTOSYNC_MASK|
+@@ -664,6 +666,8 @@ static int fsl_spdif_trigger(struct snd_pcm_substream *substream,
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ regmap_update_bits(regmap, REG_SPDIF_SCR, dmaen, 0);
+ regmap_update_bits(regmap, REG_SPDIF_SIE, intr, 0);
++ regmap_write(regmap, REG_SPDIF_STL, 0x0);
++ regmap_write(regmap, REG_SPDIF_STR, 0x0);
+ break;
+ default:
+ return -EINVAL;
+diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
+index ecbc1c365d5b1..0c73c2e9dce0c 100644
+--- a/sound/soc/fsl/fsl_ssi.c
++++ b/sound/soc/fsl/fsl_ssi.c
+@@ -1160,14 +1160,14 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = {
+ .symmetric_channels = 1,
+ .probe = fsl_ssi_dai_probe,
+ .playback = {
+- .stream_name = "AC97 Playback",
++ .stream_name = "CPU AC97 Playback",
+ .channels_min = 2,
+ .channels_max = 2,
+ .rates = SNDRV_PCM_RATE_8000_48000,
+ .formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S20,
+ },
+ .capture = {
+- .stream_name = "AC97 Capture",
++ .stream_name = "CPU AC97 Capture",
+ .channels_min = 2,
+ .channels_max = 2,
+ .rates = SNDRV_PCM_RATE_48000,
+diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c
+index a364e2415de02..d991e457060c7 100644
+--- a/sound/soc/fsl/imx-audmix.c
++++ b/sound/soc/fsl/imx-audmix.c
+@@ -228,6 +228,8 @@ static int imx_audmix_probe(struct platform_device *pdev)
+
+ dai_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s%s",
+ fe_name_pref, args.np->full_name + 1);
++ if (!dai_name)
++ return -ENOMEM;
+
+ dev_info(pdev->dev.parent, "DAI FE name:%s\n", dai_name);
+
+@@ -236,6 +238,8 @@ static int imx_audmix_probe(struct platform_device *pdev)
+ capture_dai_name =
+ devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s %s",
+ dai_name, "CPU-Capture");
++ if (!capture_dai_name)
++ return -ENOMEM;
+ }
+
+ priv->dai[i].cpus = &dlc[0];
+@@ -266,6 +270,8 @@ static int imx_audmix_probe(struct platform_device *pdev)
+ "AUDMIX-Playback-%d", i);
+ be_cp = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "AUDMIX-Capture-%d", i);
++ if (!be_name || !be_pb || !be_cp)
++ return -ENOMEM;
+
+ priv->dai[num_dai + i].cpus = &dlc[3];
+ priv->dai[num_dai + i].codecs = &dlc[4];
+@@ -293,6 +299,9 @@ static int imx_audmix_probe(struct platform_device *pdev)
+ priv->dapm_routes[i].source =
+ devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s %s",
+ dai_name, "CPU-Playback");
++ if (!priv->dapm_routes[i].source)
++ return -ENOMEM;
++
+ priv->dapm_routes[i].sink = be_pb;
+ priv->dapm_routes[num_dai + i].source = be_pb;
+ priv->dapm_routes[num_dai + i].sink = be_cp;
+diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c
+index dfa05d40b2764..a8e5e0f57faf9 100644
+--- a/sound/soc/fsl/imx-audmux.c
++++ b/sound/soc/fsl/imx-audmux.c
+@@ -298,7 +298,7 @@ static int imx_audmux_probe(struct platform_device *pdev)
+ audmux_clk = NULL;
+ }
+
+- audmux_type = (enum imx_audmux_type)of_device_get_match_data(&pdev->dev);
++ audmux_type = (uintptr_t)of_device_get_match_data(&pdev->dev);
+
+ switch (audmux_type) {
+ case IMX31_AUDMUX:
+diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c
+index 58fd0639a0698..d59f5efbf7ed5 100644
+--- a/sound/soc/fsl/imx-card.c
++++ b/sound/soc/fsl/imx-card.c
+@@ -17,6 +17,9 @@
+
+ #include "fsl_sai.h"
+
++#define IMX_CARD_MCLK_22P5792MHZ 22579200
++#define IMX_CARD_MCLK_24P576MHZ 24576000
++
+ enum codec_type {
+ CODEC_DUMMY = 0,
+ CODEC_AK5558 = 1,
+@@ -115,12 +118,12 @@ struct imx_card_data {
+ struct snd_soc_card card;
+ int num_dapm_routes;
+ u32 asrc_rate;
+- u32 asrc_format;
++ snd_pcm_format_t asrc_format;
+ };
+
+ static struct imx_akcodec_fs_mul ak4458_fs_mul[] = {
+ /* Normal, < 32kHz */
+- { .rmin = 8000, .rmax = 24000, .wmin = 1024, .wmax = 1024, },
++ { .rmin = 8000, .rmax = 24000, .wmin = 256, .wmax = 1024, },
+ /* Normal, 32kHz */
+ { .rmin = 32000, .rmax = 32000, .wmin = 256, .wmax = 1024, },
+ /* Normal */
+@@ -151,8 +154,8 @@ static struct imx_akcodec_fs_mul ak4497_fs_mul[] = {
+ * Table 7 - mapping multiplier and speed mode
+ * Tables 8 & 9 - mapping speed mode and LRCK fs
+ */
+- { .rmin = 8000, .rmax = 32000, .wmin = 1024, .wmax = 1024, }, /* Normal, <= 32kHz */
+- { .rmin = 44100, .rmax = 48000, .wmin = 512, .wmax = 512, }, /* Normal */
++ { .rmin = 8000, .rmax = 32000, .wmin = 256, .wmax = 1024, }, /* Normal, <= 32kHz */
++ { .rmin = 44100, .rmax = 48000, .wmin = 256, .wmax = 512, }, /* Normal */
+ { .rmin = 88200, .rmax = 96000, .wmin = 256, .wmax = 256, }, /* Double */
+ { .rmin = 176400, .rmax = 192000, .wmin = 128, .wmax = 128, }, /* Quad */
+ { .rmin = 352800, .rmax = 384000, .wmin = 128, .wmax = 128, }, /* Oct */
+@@ -164,7 +167,7 @@ static struct imx_akcodec_fs_mul ak4497_fs_mul[] = {
+ * (Table 4 from datasheet)
+ */
+ static struct imx_akcodec_fs_mul ak5558_fs_mul[] = {
+- { .rmin = 8000, .rmax = 32000, .wmin = 1024, .wmax = 1024, },
++ { .rmin = 8000, .rmax = 32000, .wmin = 512, .wmax = 1024, },
+ { .rmin = 44100, .rmax = 48000, .wmin = 512, .wmax = 512, },
+ { .rmin = 88200, .rmax = 96000, .wmin = 256, .wmax = 256, },
+ { .rmin = 176400, .rmax = 192000, .wmin = 128, .wmax = 128, },
+@@ -247,13 +250,14 @@ static bool codec_is_akcodec(unsigned int type)
+ }
+
+ static unsigned long akcodec_get_mclk_rate(struct snd_pcm_substream *substream,
+- struct snd_pcm_hw_params *params)
++ struct snd_pcm_hw_params *params,
++ int slots, int slot_width)
+ {
+ struct snd_soc_pcm_runtime *rtd = substream->private_data;
+ struct imx_card_data *data = snd_soc_card_get_drvdata(rtd->card);
+ const struct imx_card_plat_data *plat_data = data->plat_data;
+ struct dai_link_data *link_data = &data->link_data[rtd->num];
+- unsigned int width = link_data->slots * link_data->slot_width;
++ unsigned int width = slots * slot_width;
+ unsigned int rate = params_rate(params);
+ int i;
+
+@@ -349,12 +353,17 @@ static int imx_aif_hw_params(struct snd_pcm_substream *substream,
+
+ /* Set MCLK freq */
+ if (codec_is_akcodec(plat_data->type))
+- mclk_freq = akcodec_get_mclk_rate(substream, params);
++ mclk_freq = akcodec_get_mclk_rate(substream, params, slots, slot_width);
+ else
+ mclk_freq = params_rate(params) * slots * slot_width;
+- /* Use the maximum freq from DSD512 (512*44100 = 22579200) */
+- if (format_is_dsd(params))
+- mclk_freq = 22579200;
++
++ if (format_is_dsd(params)) {
++ /* Use the maximum freq from DSD512 (512*44100 = 22579200) */
++ if (!(params_rate(params) % 11025))
++ mclk_freq = IMX_CARD_MCLK_22P5792MHZ;
++ else
++ mclk_freq = IMX_CARD_MCLK_24P576MHZ;
++ }
+
+ ret = snd_soc_dai_set_sysclk(cpu_dai, link_data->cpu_sysclk_id, mclk_freq,
+ SND_SOC_CLOCK_OUT);
+@@ -465,7 +474,7 @@ static int be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd,
+
+ mask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+ snd_mask_none(mask);
+- snd_mask_set(mask, data->asrc_format);
++ snd_mask_set(mask, (__force unsigned int)data->asrc_format);
+
+ return 0;
+ }
+@@ -484,6 +493,7 @@ static int imx_card_parse_of(struct imx_card_data *data)
+ struct dai_link_data *link_data;
+ struct of_phandle_args args;
+ int ret, num_links;
++ u32 asrc_fmt = 0;
+ u32 width;
+
+ ret = snd_soc_of_parse_card_name(card, "model");
+@@ -553,8 +563,23 @@ static int imx_card_parse_of(struct imx_card_data *data)
+ link_data->cpu_sysclk_id = FSL_SAI_CLK_MAST1;
+
+ /* sai may support mclk/bclk = 1 */
+- if (of_find_property(np, "fsl,mclk-equal-bclk", NULL))
++ if (of_find_property(np, "fsl,mclk-equal-bclk", NULL)) {
+ link_data->one2one_ratio = true;
++ } else {
++ int i;
++
++ /*
++ * i.MX8MQ don't support one2one ratio, then
++ * with ak4497 only 16bit case is supported.
++ */
++ for (i = 0; i < ARRAY_SIZE(ak4497_fs_mul); i++) {
++ if (ak4497_fs_mul[i].rmin == 705600 &&
++ ak4497_fs_mul[i].rmax == 768000) {
++ ak4497_fs_mul[i].wmin = 32;
++ ak4497_fs_mul[i].wmax = 32;
++ }
++ }
++ }
+ }
+
+ link->cpus->of_node = args.np;
+@@ -563,9 +588,8 @@ static int imx_card_parse_of(struct imx_card_data *data)
+
+ ret = snd_soc_of_get_dai_name(cpu, &link->cpus->dai_name);
+ if (ret) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(card->dev, "%s: error getting cpu dai name: %d\n",
+- link->name, ret);
++ dev_err_probe(card->dev, ret,
++ "%s: error getting cpu dai name\n", link->name);
+ goto err;
+ }
+
+@@ -573,9 +597,8 @@ static int imx_card_parse_of(struct imx_card_data *data)
+ if (codec) {
+ ret = snd_soc_of_get_dai_link_codecs(dev, codec, link);
+ if (ret < 0) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(dev, "%s: codec dai not found: %d\n",
+- link->name, ret);
++ dev_err_probe(dev, ret, "%s: codec dai not found\n",
++ link->name);
+ goto err;
+ }
+
+@@ -617,7 +640,8 @@ static int imx_card_parse_of(struct imx_card_data *data)
+ goto err;
+ }
+
+- ret = of_property_read_u32(args.np, "fsl,asrc-format", &data->asrc_format);
++ ret = of_property_read_u32(args.np, "fsl,asrc-format", &asrc_fmt);
++ data->asrc_format = (__force snd_pcm_format_t)asrc_fmt;
+ if (ret) {
+ /* Fallback to old binding; translate to asrc_format */
+ ret = of_property_read_u32(args.np, "fsl,asrc-width", &width);
+@@ -674,6 +698,10 @@ static int imx_card_parse_of(struct imx_card_data *data)
+ of_node_put(cpu);
+ of_node_put(codec);
+ of_node_put(platform);
++
++ cpu = NULL;
++ codec = NULL;
++ platform = NULL;
+ }
+
+ return 0;
+@@ -814,11 +842,8 @@ static int imx_card_probe(struct platform_device *pdev)
+ }
+
+ ret = devm_snd_soc_register_card(&pdev->dev, &data->card);
+- if (ret) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret);
+- return ret;
+- }
++ if (ret)
++ return dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n");
+
+ return 0;
+ }
+diff --git a/sound/soc/fsl/imx-es8328.c b/sound/soc/fsl/imx-es8328.c
+index 1981dcd7e9305..a7fb53e0f96f8 100644
+--- a/sound/soc/fsl/imx-es8328.c
++++ b/sound/soc/fsl/imx-es8328.c
+@@ -87,6 +87,7 @@ static int imx_es8328_probe(struct platform_device *pdev)
+ if (int_port > MUX_PORT_MAX || int_port == 0) {
+ dev_err(dev, "mux-int-port: hardware only has %d mux ports\n",
+ MUX_PORT_MAX);
++ ret = -EINVAL;
+ goto fail;
+ }
+
+diff --git a/sound/soc/fsl/imx-hdmi.c b/sound/soc/fsl/imx-hdmi.c
+index 34a0dceae6216..d61e3c841e267 100644
+--- a/sound/soc/fsl/imx-hdmi.c
++++ b/sound/soc/fsl/imx-hdmi.c
+@@ -126,6 +126,7 @@ static int imx_hdmi_probe(struct platform_device *pdev)
+ data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
++ put_device(&cpu_pdev->dev);
+ goto fail;
+ }
+
+@@ -145,6 +146,8 @@ static int imx_hdmi_probe(struct platform_device *pdev)
+ data->dai.capture_only = false;
+ data->dai.init = imx_hdmi_init;
+
++ put_device(&cpu_pdev->dev);
++
+ if (of_node_name_eq(cpu_np, "sai")) {
+ data->cpu_priv.sysclk_id[1] = FSL_SAI_CLK_MAST1;
+ data->cpu_priv.sysclk_id[0] = FSL_SAI_CLK_MAST1;
+diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c
+index f45cb4bbb6c4d..c4e3699ad3054 100644
+--- a/sound/soc/fsl/imx-sgtl5000.c
++++ b/sound/soc/fsl/imx-sgtl5000.c
+@@ -120,19 +120,19 @@ static int imx_sgtl5000_probe(struct platform_device *pdev)
+ data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+- goto fail;
++ goto put_device;
+ }
+
+ comp = devm_kzalloc(&pdev->dev, 3 * sizeof(*comp), GFP_KERNEL);
+ if (!comp) {
+ ret = -ENOMEM;
+- goto fail;
++ goto put_device;
+ }
+
+ data->codec_clk = clk_get(&codec_dev->dev, NULL);
+ if (IS_ERR(data->codec_clk)) {
+ ret = PTR_ERR(data->codec_clk);
+- goto fail;
++ goto put_device;
+ }
+
+ data->clk_frequency = clk_get_rate(data->codec_clk);
+@@ -158,10 +158,10 @@ static int imx_sgtl5000_probe(struct platform_device *pdev)
+ data->card.dev = &pdev->dev;
+ ret = snd_soc_of_parse_card_name(&data->card, "model");
+ if (ret)
+- goto fail;
++ goto put_device;
+ ret = snd_soc_of_parse_audio_routing(&data->card, "audio-routing");
+ if (ret)
+- goto fail;
++ goto put_device;
+ data->card.num_links = 1;
+ data->card.owner = THIS_MODULE;
+ data->card.dai_link = &data->dai;
+@@ -173,10 +173,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev)
+
+ ret = devm_snd_soc_register_card(&pdev->dev, &data->card);
+ if (ret) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n",
+- ret);
+- goto fail;
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n");
++ goto put_device;
+ }
+
+ of_node_put(ssi_np);
+@@ -184,6 +182,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev)
+
+ return 0;
+
++put_device:
++ put_device(&codec_dev->dev);
+ fail:
+ if (data && !IS_ERR(data->codec_clk))
+ clk_put(data->codec_clk);
+diff --git a/sound/soc/fsl/imx-spdif.c b/sound/soc/fsl/imx-spdif.c
+index 6c4dadf603551..4446fba755b9a 100644
+--- a/sound/soc/fsl/imx-spdif.c
++++ b/sound/soc/fsl/imx-spdif.c
+@@ -70,8 +70,8 @@ static int imx_spdif_audio_probe(struct platform_device *pdev)
+ goto end;
+
+ ret = devm_snd_soc_register_card(&pdev->dev, &data->card);
+- if (ret && ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card failed: %d\n", ret);
++ if (ret)
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n");
+
+ end:
+ of_node_put(spdif_np);
+diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
+index af3c3b90c0aca..83b4a22bf15ac 100644
+--- a/sound/soc/fsl/pcm030-audio-fabric.c
++++ b/sound/soc/fsl/pcm030-audio-fabric.c
+@@ -93,16 +93,21 @@ static int pcm030_fabric_probe(struct platform_device *op)
+ dev_err(&op->dev, "platform_device_alloc() failed\n");
+
+ ret = platform_device_add(pdata->codec_device);
+- if (ret)
++ if (ret) {
+ dev_err(&op->dev, "platform_device_add() failed: %d\n", ret);
++ platform_device_put(pdata->codec_device);
++ }
+
+ ret = snd_soc_register_card(card);
+- if (ret)
++ if (ret) {
+ dev_err(&op->dev, "snd_soc_register_card() failed: %d\n", ret);
++ platform_device_del(pdata->codec_device);
++ platform_device_put(pdata->codec_device);
++ }
+
+ platform_set_drvdata(op, pdata);
+-
+ return ret;
++
+ }
+
+ static int pcm030_fabric_remove(struct platform_device *op)
+diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c
+index 546f6fd0609e1..89814f68ff563 100644
+--- a/sound/soc/generic/audio-graph-card.c
++++ b/sound/soc/generic/audio-graph-card.c
+@@ -158,8 +158,10 @@ static int asoc_simple_parse_dai(struct device_node *ep,
+ * if he unbinded CPU or Codec.
+ */
+ ret = snd_soc_get_dai_name(&args, &dlc->dai_name);
+- if (ret < 0)
++ if (ret < 0) {
++ of_node_put(node);
+ return ret;
++ }
+
+ dlc->of_node = node;
+
+@@ -481,8 +483,10 @@ static int __graph_for_each_link(struct asoc_simple_priv *priv,
+ of_node_put(codec_ep);
+ of_node_put(codec_port);
+
+- if (ret < 0)
++ if (ret < 0) {
++ of_node_put(cpu_ep);
+ return ret;
++ }
+
+ codec_port_old = codec_port;
+ }
+diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
+index 10c63b73900c6..ffda8a38de3ed 100644
+--- a/sound/soc/generic/simple-card-utils.c
++++ b/sound/soc/generic/simple-card-utils.c
+@@ -275,6 +275,7 @@ int asoc_simple_hw_params(struct snd_pcm_substream *substream,
+ mclk_fs = props->mclk_fs;
+
+ if (mclk_fs) {
++ struct snd_soc_component *component;
+ mclk = params_rate(params) * mclk_fs;
+
+ for_each_prop_dai_codec(props, i, pdai) {
+@@ -282,16 +283,30 @@ int asoc_simple_hw_params(struct snd_pcm_substream *substream,
+ if (ret < 0)
+ return ret;
+ }
++
+ for_each_prop_dai_cpu(props, i, pdai) {
+ ret = asoc_simple_set_clk_rate(pdai, mclk);
+ if (ret < 0)
+ return ret;
+ }
++
++ /* Ensure sysclk is set on all components in case any
++ * (such as platform components) are missed by calls to
++ * snd_soc_dai_set_sysclk.
++ */
++ for_each_rtd_components(rtd, i, component) {
++ ret = snd_soc_component_set_sysclk(component, 0, 0,
++ mclk, SND_SOC_CLOCK_IN);
++ if (ret && ret != -ENOTSUPP)
++ return ret;
++ }
++
+ for_each_rtd_codec_dais(rtd, i, sdai) {
+ ret = snd_soc_dai_set_sysclk(sdai, 0, mclk, SND_SOC_CLOCK_IN);
+ if (ret && ret != -ENOTSUPP)
+ return ret;
+ }
++
+ for_each_rtd_cpu_dais(rtd, i, sdai) {
+ ret = snd_soc_dai_set_sysclk(sdai, 0, mclk, SND_SOC_CLOCK_OUT);
+ if (ret && ret != -ENOTSUPP)
+diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
+index a3a7990b5cb66..283aa21879aa5 100644
+--- a/sound/soc/generic/simple-card.c
++++ b/sound/soc/generic/simple-card.c
+@@ -28,6 +28,30 @@ static const struct snd_soc_ops simple_ops = {
+ .hw_params = asoc_simple_hw_params,
+ };
+
++static int asoc_simple_parse_platform(struct device_node *node,
++ struct snd_soc_dai_link_component *dlc)
++{
++ struct of_phandle_args args;
++ int ret;
++
++ if (!node)
++ return 0;
++
++ /*
++ * Get node via "sound-dai = <&phandle port>"
++ * it will be used as xxx_of_node on soc_bind_dai_link()
++ */
++ ret = of_parse_phandle_with_args(node, DAI, CELL, 0, &args);
++ if (ret)
++ return ret;
++
++ /* dai_name is not required and may not exist for plat component */
++
++ dlc->of_node = args.np;
++
++ return 0;
++}
++
+ static int asoc_simple_parse_dai(struct device_node *node,
+ struct snd_soc_dai_link_component *dlc,
+ int *is_single_link)
+@@ -289,7 +313,7 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
+ if (ret < 0)
+ goto dai_link_of_err;
+
+- ret = asoc_simple_parse_dai(plat, platforms, NULL);
++ ret = asoc_simple_parse_platform(plat, platforms);
+ if (ret < 0)
+ goto dai_link_of_err;
+
+@@ -393,6 +417,7 @@ static int __simple_for_each_link(struct asoc_simple_priv *priv,
+
+ if (ret < 0) {
+ of_node_put(codec);
++ of_node_put(plat);
+ of_node_put(np);
+ goto error;
+ }
+diff --git a/sound/soc/intel/boards/bdw-rt5650.c b/sound/soc/intel/boards/bdw-rt5650.c
+index c5122d3b0e6c4..7c8c2557d6850 100644
+--- a/sound/soc/intel/boards/bdw-rt5650.c
++++ b/sound/soc/intel/boards/bdw-rt5650.c
+@@ -299,7 +299,7 @@ static int bdw_rt5650_probe(struct platform_device *pdev)
+ if (!bdw_rt5650)
+ return -ENOMEM;
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ mach = pdev->dev.platform_data;
+ ret = snd_soc_fixup_dai_links_platform_name(&bdw_rt5650_card,
+ mach->mach_params.platform);
+diff --git a/sound/soc/intel/boards/bdw-rt5677.c b/sound/soc/intel/boards/bdw-rt5677.c
+index e01b7a90ca6c7..e990940179095 100644
+--- a/sound/soc/intel/boards/bdw-rt5677.c
++++ b/sound/soc/intel/boards/bdw-rt5677.c
+@@ -426,7 +426,7 @@ static int bdw_rt5677_probe(struct platform_device *pdev)
+ if (!bdw_rt5677)
+ return -ENOMEM;
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ mach = pdev->dev.platform_data;
+ ret = snd_soc_fixup_dai_links_platform_name(&bdw_rt5677_card,
+ mach->mach_params.platform);
+diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c
+index 3c3aff9c61cc6..f18dcda23e74b 100644
+--- a/sound/soc/intel/boards/broadwell.c
++++ b/sound/soc/intel/boards/broadwell.c
+@@ -292,7 +292,7 @@ static int broadwell_audio_probe(struct platform_device *pdev)
+
+ broadwell_rt286.dev = &pdev->dev;
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ mach = pdev->dev.platform_data;
+ ret = snd_soc_fixup_dai_links_platform_name(&broadwell_rt286,
+ mach->mach_params.platform);
+diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c
+index e67ddfb8e4690..e49c64f54a12c 100644
+--- a/sound/soc/intel/boards/bxt_da7219_max98357a.c
++++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c
+@@ -825,7 +825,7 @@ static int broxton_audio_probe(struct platform_device *pdev)
+ }
+ }
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ mach = pdev->dev.platform_data;
+ platform_name = mach->mach_params.platform;
+
+diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c
+index 47f6b1523ae6b..0d1df37ecea0b 100644
+--- a/sound/soc/intel/boards/bxt_rt298.c
++++ b/sound/soc/intel/boards/bxt_rt298.c
+@@ -628,7 +628,7 @@ static int broxton_audio_probe(struct platform_device *pdev)
+ card->dev = &pdev->dev;
+ snd_soc_card_set_drvdata(card, ctx);
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ mach = pdev->dev.platform_data;
+ platform_name = mach->mach_params.platform;
+
+diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c
+index a9e51bbf018c3..0fc57db6e92cb 100644
+--- a/sound/soc/intel/boards/bytcht_cx2072x.c
++++ b/sound/soc/intel/boards/bytcht_cx2072x.c
+@@ -257,7 +257,7 @@ static int snd_byt_cht_cx2072x_probe(struct platform_device *pdev)
+ byt_cht_cx2072x_dais[dai_index].codecs->name = codec_name;
+ }
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ ret = snd_soc_fixup_dai_links_platform_name(&byt_cht_cx2072x_card,
+ mach->mach_params.platform);
+ if (ret)
+diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c
+index a28773fb7892b..21b6bebc9a26a 100644
+--- a/sound/soc/intel/boards/bytcht_da7213.c
++++ b/sound/soc/intel/boards/bytcht_da7213.c
+@@ -260,7 +260,7 @@ static int bytcht_da7213_probe(struct platform_device *pdev)
+ dailink[dai_index].codecs->name = codec_name;
+ }
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ platform_name = mach->mach_params.platform;
+
+ ret_val = snd_soc_fixup_dai_links_platform_name(card, platform_name);
+diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c
+index 4d313d0d0f23e..b5c97d35864a6 100644
+--- a/sound/soc/intel/boards/bytcht_es8316.c
++++ b/sound/soc/intel/boards/bytcht_es8316.c
+@@ -443,6 +443,13 @@ static const struct dmi_system_id byt_cht_es8316_quirk_table[] = {
+ | BYT_CHT_ES8316_INTMIC_IN2_MAP
+ | BYT_CHT_ES8316_JD_INVERTED),
+ },
++ { /* Nanote UMPC-01 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "RWC CO.,LTD"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "UMPC-01"),
++ },
++ .driver_data = (void *)BYT_CHT_ES8316_INTMIC_IN1_MAP,
++ },
+ { /* Teclast X98 Plus II */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"),
+@@ -490,21 +497,28 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
+ if (adev) {
+ snprintf(codec_name, sizeof(codec_name),
+ "i2c-%s", acpi_dev_name(adev));
+- put_device(&adev->dev);
+ byt_cht_es8316_dais[dai_index].codecs->name = codec_name;
+ } else {
+ dev_err(dev, "Error cannot find '%s' dev\n", mach->id);
+ return -ENXIO;
+ }
+
+- /* override plaform name, if required */
++ codec_dev = acpi_get_first_physical_node(adev);
++ acpi_dev_put(adev);
++ if (!codec_dev)
++ return -EPROBE_DEFER;
++ priv->codec_dev = get_device(codec_dev);
++
++ /* override platform name, if required */
+ byt_cht_es8316_card.dev = dev;
+ platform_name = mach->mach_params.platform;
+
+ ret = snd_soc_fixup_dai_links_platform_name(&byt_cht_es8316_card,
+ platform_name);
+- if (ret)
++ if (ret) {
++ put_device(codec_dev);
+ return ret;
++ }
+
+ /* Check for BYTCR or other platform and setup quirks */
+ dmi_id = dmi_first_match(byt_cht_es8316_quirk_table);
+@@ -532,14 +546,10 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
+
+ /* get the clock */
+ priv->mclk = devm_clk_get(dev, "pmc_plt_clk_3");
+- if (IS_ERR(priv->mclk))
++ if (IS_ERR(priv->mclk)) {
++ put_device(codec_dev);
+ return dev_err_probe(dev, PTR_ERR(priv->mclk), "clk_get pmc_plt_clk_3 failed\n");
+-
+- /* get speaker enable GPIO */
+- codec_dev = acpi_get_first_physical_node(adev);
+- if (!codec_dev)
+- return -EPROBE_DEFER;
+- priv->codec_dev = get_device(codec_dev);
++ }
+
+ if (quirk & BYT_CHT_ES8316_JD_INVERTED)
+ props[cnt++] = PROPERTY_ENTRY_BOOL("everest,jack-detect-inverted");
+@@ -561,6 +571,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
+ }
+ }
+
++ /* get speaker enable GPIO */
+ devm_acpi_dev_add_driver_gpios(codec_dev, byt_cht_es8316_gpios);
+ priv->speaker_en_gpio =
+ gpiod_get_optional(codec_dev, "speaker-enable",
+diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
+index a6e837290c7dc..4df1be8170bb6 100644
+--- a/sound/soc/intel/boards/bytcr_rt5640.c
++++ b/sound/soc/intel/boards/bytcr_rt5640.c
+@@ -533,6 +533,18 @@ static int byt_rt5640_aif1_hw_params(struct snd_pcm_substream *substream,
+
+ /* Please keep this list alphabetically sorted */
+ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
++ { /* Acer Iconia One 7 B1-750 */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "VESPA2"),
++ },
++ .driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
++ BYT_RT5640_JD_SRC_JD1_IN4P |
++ BYT_RT5640_OVCD_TH_1500UA |
++ BYT_RT5640_OVCD_SF_0P75 |
++ BYT_RT5640_SSP0_AIF1 |
++ BYT_RT5640_MCLK_EN),
++ },
+ { /* Acer Iconia Tab 8 W1-810 */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"),
+@@ -570,6 +582,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
+ BYT_RT5640_SSP0_AIF1 |
+ BYT_RT5640_MCLK_EN),
+ },
++ {
++ /* Advantech MICA-071 */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"),
++ },
++ /* OVCD Th = 1500uA to reliable detect head-phones vs -set */
++ .driver_data = (void *)(BYT_RT5640_IN3_MAP |
++ BYT_RT5640_JD_SRC_JD2_IN4N |
++ BYT_RT5640_OVCD_TH_1500UA |
++ BYT_RT5640_OVCD_SF_0P75 |
++ BYT_RT5640_MONO_SPEAKER |
++ BYT_RT5640_DIFF_MIC |
++ BYT_RT5640_MCLK_EN),
++ },
+ {
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"),
+@@ -759,6 +786,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
+ BYT_RT5640_OVCD_SF_0P75 |
+ BYT_RT5640_MCLK_EN),
+ },
++ { /* HP Pro Tablet 408 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HP Pro Tablet 408"),
++ },
++ .driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
++ BYT_RT5640_JD_SRC_JD2_IN4N |
++ BYT_RT5640_OVCD_TH_1500UA |
++ BYT_RT5640_OVCD_SF_0P75 |
++ BYT_RT5640_SSP0_AIF1 |
++ BYT_RT5640_MCLK_EN),
++ },
+ { /* HP Stream 7 */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+@@ -1534,13 +1573,18 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
+ if (adev) {
+ snprintf(byt_rt5640_codec_name, sizeof(byt_rt5640_codec_name),
+ "i2c-%s", acpi_dev_name(adev));
+- put_device(&adev->dev);
+ byt_rt5640_dais[dai_index].codecs->name = byt_rt5640_codec_name;
+ } else {
+ dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id);
+ return -ENXIO;
+ }
+
++ codec_dev = acpi_get_first_physical_node(adev);
++ acpi_dev_put(adev);
++ if (!codec_dev)
++ return -EPROBE_DEFER;
++ priv->codec_dev = get_device(codec_dev);
++
+ /*
+ * swap SSP0 if bytcr is detected
+ * (will be overridden if DMI quirk is detected)
+@@ -1615,11 +1659,6 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
+ byt_rt5640_quirk = quirk_override;
+ }
+
+- codec_dev = acpi_get_first_physical_node(adev);
+- if (!codec_dev)
+- return -EPROBE_DEFER;
+- priv->codec_dev = get_device(codec_dev);
+-
+ if (byt_rt5640_quirk & BYT_RT5640_JD_HP_ELITEP_1000G2) {
+ acpi_dev_add_driver_gpios(ACPI_COMPANION(priv->codec_dev),
+ byt_rt5640_hp_elitepad_1000g2_gpios);
+@@ -1706,7 +1745,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
+ byt_rt5640_card.long_name = byt_rt5640_long_name;
+ #endif
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ platform_name = mach->mach_params.platform;
+
+ ret_val = snd_soc_fixup_dai_links_platform_name(&byt_rt5640_card,
+diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
+index e94c9124d4f41..93cec4d916273 100644
+--- a/sound/soc/intel/boards/bytcr_rt5651.c
++++ b/sound/soc/intel/boards/bytcr_rt5651.c
+@@ -930,7 +930,6 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
+ if (adev) {
+ snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name),
+ "i2c-%s", acpi_dev_name(adev));
+- put_device(&adev->dev);
+ byt_rt5651_dais[dai_index].codecs->name = byt_rt5651_codec_name;
+ } else {
+ dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id);
+@@ -938,6 +937,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
+ }
+
+ codec_dev = acpi_get_first_physical_node(adev);
++ acpi_dev_put(adev);
+ if (!codec_dev)
+ return -EPROBE_DEFER;
+ priv->codec_dev = get_device(codec_dev);
+@@ -1104,7 +1104,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
+ byt_rt5651_card.long_name = byt_rt5651_long_name;
+ #endif
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ platform_name = mach->mach_params.platform;
+
+ ret_val = snd_soc_fixup_dai_links_platform_name(&byt_rt5651_card,
+diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c
+index 580d5fddae5ad..9a4126f19d5f7 100644
+--- a/sound/soc/intel/boards/bytcr_wm5102.c
++++ b/sound/soc/intel/boards/bytcr_wm5102.c
+@@ -411,9 +411,9 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev)
+ return -ENOENT;
+ }
+ snprintf(codec_name, sizeof(codec_name), "spi-%s", acpi_dev_name(adev));
+- put_device(&adev->dev);
+
+ codec_dev = bus_find_device_by_name(&spi_bus_type, NULL, codec_name);
++ acpi_dev_put(adev);
+ if (!codec_dev)
+ return -EPROBE_DEFER;
+
+@@ -421,8 +421,17 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev)
+ priv->spkvdd_en_gpio = gpiod_get(codec_dev, "wlf,spkvdd-ena", GPIOD_OUT_LOW);
+ put_device(codec_dev);
+
+- if (IS_ERR(priv->spkvdd_en_gpio))
+- return dev_err_probe(dev, PTR_ERR(priv->spkvdd_en_gpio), "getting spkvdd-GPIO\n");
++ if (IS_ERR(priv->spkvdd_en_gpio)) {
++ ret = PTR_ERR(priv->spkvdd_en_gpio);
++ /*
++ * The spkvdd gpio-lookup is registered by: drivers/mfd/arizona-spi.c,
++ * so -ENOENT means that arizona-spi hasn't probed yet.
++ */
++ if (ret == -ENOENT)
++ ret = -EPROBE_DEFER;
++
++ return dev_err_probe(dev, ret, "getting spkvdd-GPIO\n");
++ }
+
+ /* override platform name, if required */
+ byt_wm5102_card.dev = dev;
+diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+index 131882378a594..ba6de1e389cd8 100644
+--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
++++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+@@ -296,7 +296,7 @@ static int cht_max98090_headset_init(struct snd_soc_component *component)
+ int ret;
+
+ /*
+- * TI supports 4 butons headset detection
++ * TI supports 4 buttons headset detection
+ * KEY_MEDIA
+ * KEY_VOICECOMMAND
+ * KEY_VOLUMEUP
+@@ -558,7 +558,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
+ dev_dbg(dev, "Unable to add GPIO mapping table\n");
+ }
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ snd_soc_card_cht.dev = &pdev->dev;
+ mach = pdev->dev.platform_data;
+ platform_name = mach->mach_params.platform;
+diff --git a/sound/soc/intel/boards/cht_bsw_nau8824.c b/sound/soc/intel/boards/cht_bsw_nau8824.c
+index da5a5cbc87590..779b388db85d3 100644
+--- a/sound/soc/intel/boards/cht_bsw_nau8824.c
++++ b/sound/soc/intel/boards/cht_bsw_nau8824.c
+@@ -100,7 +100,7 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime)
+ struct snd_soc_component *component = codec_dai->component;
+ int ret, jack_type;
+
+- /* NAU88L24 supports 4 butons headset detection
++ /* NAU88L24 supports 4 buttons headset detection
+ * KEY_PLAYPAUSE
+ * KEY_VOICECOMMAND
+ * KEY_VOLUMEUP
+@@ -257,7 +257,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
+ return -ENOMEM;
+ snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ snd_soc_card_cht.dev = &pdev->dev;
+ mach = pdev->dev.platform_data;
+ platform_name = mach->mach_params.platform;
+diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
+index 804dbc7911d50..381bf6054047f 100644
+--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
++++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
+@@ -653,7 +653,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
+ (cht_rt5645_quirk & CHT_RT5645_SSP0_AIF2))
+ cht_dailink[dai_index].cpus->dai_name = "ssp0-port";
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ platform_name = mach->mach_params.platform;
+
+ ret_val = snd_soc_fixup_dai_links_platform_name(card,
+diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c
+index 9509b6e161b89..ba96741c7771b 100644
+--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
++++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
+@@ -483,7 +483,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
+ drv->use_ssp0 = true;
+ }
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ snd_soc_card_cht.dev = &pdev->dev;
+ platform_name = mach->mach_params.platform;
+
+diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c
+index 71fe26a1b7011..99b3d7642cb77 100644
+--- a/sound/soc/intel/boards/glk_rt5682_max98357a.c
++++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c
+@@ -604,7 +604,7 @@ static int geminilake_audio_probe(struct platform_device *pdev)
+ card->dev = &pdev->dev;
+ snd_soc_card_set_drvdata(card, ctx);
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ mach = pdev->dev.platform_data;
+ platform_name = mach->mach_params.platform;
+
+diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c
+index c763bfeb1f38f..b5ca3177be6a3 100644
+--- a/sound/soc/intel/boards/haswell.c
++++ b/sound/soc/intel/boards/haswell.c
+@@ -175,7 +175,7 @@ static int haswell_audio_probe(struct platform_device *pdev)
+
+ haswell_rt5640.dev = &pdev->dev;
+
+- /* override plaform name, if required */
++ /* override platform name, if required */
+ mach = pdev->dev.platform_data;
+ ret = snd_soc_fixup_dai_links_platform_name(&haswell_rt5640,
+ mach->mach_params.platform);
+diff --git a/sound/soc/intel/boards/sof_cs42l42.c b/sound/soc/intel/boards/sof_cs42l42.c
+index ce78c18798876..8061082d9fbf3 100644
+--- a/sound/soc/intel/boards/sof_cs42l42.c
++++ b/sound/soc/intel/boards/sof_cs42l42.c
+@@ -311,6 +311,9 @@ static int create_spk_amp_dai_links(struct device *dev,
+ links[*id].platforms = platform_component;
+ links[*id].num_platforms = ARRAY_SIZE(platform_component);
+ links[*id].dpcm_playback = 1;
++ /* firmware-generated echo reference */
++ links[*id].dpcm_capture = 1;
++
+ links[*id].no_pcm = 1;
+ links[*id].cpus = &cpus[*id];
+ links[*id].num_cpus = 1;
+diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c
+index f096bd6d69be7..d0ce2f06b30c6 100644
+--- a/sound/soc/intel/boards/sof_rt5682.c
++++ b/sound/soc/intel/boards/sof_rt5682.c
+@@ -737,8 +737,6 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
+ links[id].num_codecs = ARRAY_SIZE(max_98373_components);
+ links[id].init = max_98373_spk_codec_init;
+ links[id].ops = &max_98373_ops;
+- /* feedback stream */
+- links[id].dpcm_capture = 1;
+ } else if (sof_rt5682_quirk &
+ SOF_MAX98360A_SPEAKER_AMP_PRESENT) {
+ max_98360a_dai_link(&links[id]);
+@@ -751,6 +749,9 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
+ links[id].platforms = platform_component;
+ links[id].num_platforms = ARRAY_SIZE(platform_component);
+ links[id].dpcm_playback = 1;
++ /* feedback stream or firmware-generated echo reference */
++ links[id].dpcm_capture = 1;
++
+ links[id].no_pcm = 1;
+ links[id].cpus = &cpus[id];
+ links[id].num_cpus = 1;
+diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
+index 6b06248a9327a..089b6c7994f9a 100644
+--- a/sound/soc/intel/boards/sof_sdw.c
++++ b/sound/soc/intel/boards/sof_sdw.c
+@@ -184,11 +184,11 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
+ .callback = sof_sdw_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+- DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Conv"),
+ },
+ .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+ SOF_SDW_PCH_DMIC |
+- RT711_JD2),
++ RT711_JD1),
+ },
+ {
+ /* NUC15 'Bishop County' LAPBC510 and LAPBC710 skews */
+@@ -201,6 +201,28 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
+ SOF_SDW_PCH_DMIC |
+ RT711_JD1),
+ },
++ {
++ /* NUC15 LAPBC710 skews */
++ .callback = sof_sdw_quirk_cb,
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
++ DMI_MATCH(DMI_BOARD_NAME, "LAPBC710"),
++ },
++ .driver_data = (void *)(SOF_SDW_TGL_HDMI |
++ SOF_SDW_PCH_DMIC |
++ RT711_JD1),
++ },
++ {
++ /* NUC15 'Rooks County' LAPRC510 and LAPRC710 skews */
++ .callback = sof_sdw_quirk_cb,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Intel(R) Client Systems"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "LAPRC"),
++ },
++ .driver_data = (void *)(SOF_SDW_TGL_HDMI |
++ SOF_SDW_PCH_DMIC |
++ RT711_JD2_100K),
++ },
+ /* TigerLake-SDCA devices */
+ {
+ .callback = sof_sdw_quirk_cb,
+@@ -213,6 +235,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
+ SOF_RT715_DAI_ID_FIX |
+ SOF_SDW_FOUR_SPK),
+ },
++ {
++ .callback = sof_sdw_quirk_cb,
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A45")
++ },
++ .driver_data = (void *)(SOF_SDW_TGL_HDMI |
++ RT711_JD2 |
++ SOF_RT715_DAI_ID_FIX),
++ },
+ /* AlderLake devices */
+ {
+ .callback = sof_sdw_quirk_cb,
+@@ -270,7 +302,7 @@ int sdw_prepare(struct snd_pcm_substream *substream)
+ /* Find stream from first CPU DAI */
+ dai = asoc_rtd_to_cpu(rtd, 0);
+
+- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream);
++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream);
+
+ if (IS_ERR(sdw_stream)) {
+ dev_err(rtd->dev, "no stream found for DAI %s", dai->name);
+@@ -290,7 +322,7 @@ int sdw_trigger(struct snd_pcm_substream *substream, int cmd)
+ /* Find stream from first CPU DAI */
+ dai = asoc_rtd_to_cpu(rtd, 0);
+
+- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream);
++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream);
+
+ if (IS_ERR(sdw_stream)) {
+ dev_err(rtd->dev, "no stream found for DAI %s", dai->name);
+@@ -329,7 +361,7 @@ int sdw_hw_free(struct snd_pcm_substream *substream)
+ /* Find stream from first CPU DAI */
+ dai = asoc_rtd_to_cpu(rtd, 0);
+
+- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream);
++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream);
+
+ if (IS_ERR(sdw_stream)) {
+ dev_err(rtd->dev, "no stream found for DAI %s", dai->name);
+@@ -1313,6 +1345,33 @@ static struct snd_soc_card card_sof_sdw = {
+ .late_probe = sof_sdw_card_late_probe,
+ };
+
++static void mc_dailink_exit_loop(struct snd_soc_card *card)
++{
++ struct snd_soc_dai_link *link;
++ int ret;
++ int i, j;
++
++ for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) {
++ if (!codec_info_list[i].exit)
++ continue;
++ /*
++ * We don't need to call .exit function if there is no matched
++ * dai link found.
++ */
++ for_each_card_prelinks(card, j, link) {
++ if (!strcmp(link->codecs[0].dai_name,
++ codec_info_list[i].dai_name)) {
++ ret = codec_info_list[i].exit(card, link);
++ if (ret)
++ dev_warn(card->dev,
++ "codec exit failed %d\n",
++ ret);
++ break;
++ }
++ }
++ }
++}
++
+ static int mc_probe(struct platform_device *pdev)
+ {
+ struct snd_soc_card *card = &card_sof_sdw;
+@@ -1377,6 +1436,7 @@ static int mc_probe(struct platform_device *pdev)
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+ if (ret) {
+ dev_err(card->dev, "snd_soc_register_card failed %d\n", ret);
++ mc_dailink_exit_loop(card);
+ return ret;
+ }
+
+@@ -1388,29 +1448,8 @@ static int mc_probe(struct platform_device *pdev)
+ static int mc_remove(struct platform_device *pdev)
+ {
+ struct snd_soc_card *card = platform_get_drvdata(pdev);
+- struct snd_soc_dai_link *link;
+- int ret;
+- int i, j;
+
+- for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) {
+- if (!codec_info_list[i].exit)
+- continue;
+- /*
+- * We don't need to call .exit function if there is no matched
+- * dai link found.
+- */
+- for_each_card_prelinks(card, j, link) {
+- if (!strcmp(link->codecs[0].dai_name,
+- codec_info_list[i].dai_name)) {
+- ret = codec_info_list[i].exit(card, link);
+- if (ret)
+- dev_warn(&pdev->dev,
+- "codec exit failed %d\n",
+- ret);
+- break;
+- }
+- }
+- }
++ mc_dailink_exit_loop(card);
+
+ return 0;
+ }
+diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c
+index 9c5fd18f2600f..346bec0003066 100644
+--- a/sound/soc/intel/catpt/dsp.c
++++ b/sound/soc/intel/catpt/dsp.c
+@@ -65,6 +65,7 @@ static int catpt_dma_memcpy(struct catpt_dev *cdev, struct dma_chan *chan,
+ {
+ struct dma_async_tx_descriptor *desc;
+ enum dma_status status;
++ int ret;
+
+ desc = dmaengine_prep_dma_memcpy(chan, dst_addr, src_addr, size,
+ DMA_CTRL_ACK);
+@@ -77,13 +78,22 @@ static int catpt_dma_memcpy(struct catpt_dev *cdev, struct dma_chan *chan,
+ catpt_updatel_shim(cdev, HMDC,
+ CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id),
+ CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id));
+- dmaengine_submit(desc);
++
++ ret = dma_submit_error(dmaengine_submit(desc));
++ if (ret) {
++ dev_err(cdev->dev, "submit tx failed: %d\n", ret);
++ goto clear_hdda;
++ }
++
+ status = dma_wait_for_async_tx(desc);
++ ret = (status == DMA_COMPLETE) ? 0 : -EPROTO;
++
++clear_hdda:
+ /* regardless of status, disable access to HOST memory in demand mode */
+ catpt_updatel_shim(cdev, HMDC,
+ CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), 0);
+
+- return (status == DMA_COMPLETE) ? 0 : -EPROTO;
++ return ret;
+ }
+
+ int catpt_dma_memcpy_todsp(struct catpt_dev *cdev, struct dma_chan *chan,
+diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
+index 785d5f5f8a9c9..c93d8019b0e55 100644
+--- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
++++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
+@@ -127,13 +127,13 @@ static const struct snd_soc_acpi_adr_device mx8373_1_adr[] = {
+ {
+ .adr = 0x000123019F837300ull,
+ .num_endpoints = 1,
+- .endpoints = &spk_l_endpoint,
++ .endpoints = &spk_r_endpoint,
+ .name_prefix = "Right"
+ },
+ {
+ .adr = 0x000127019F837300ull,
+ .num_endpoints = 1,
+- .endpoints = &spk_r_endpoint,
++ .endpoints = &spk_l_endpoint,
+ .name_prefix = "Left"
+ }
+ };
+@@ -156,6 +156,15 @@ static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = {
+ }
+ };
+
++static const struct snd_soc_acpi_adr_device rt1316_1_single_adr[] = {
++ {
++ .adr = 0x000131025D131601ull,
++ .num_endpoints = 1,
++ .endpoints = &single_endpoint,
++ .name_prefix = "rt1316-1"
++ }
++};
++
+ static const struct snd_soc_acpi_adr_device rt1316_1_group1_adr[] = {
+ {
+ .adr = 0x000131025D131601ull, /* unique ID is set for some reason */
+@@ -320,6 +329,25 @@ static const struct snd_soc_acpi_link_adr tgl_3_in_1_sdca[] = {
+ {}
+ };
+
++static const struct snd_soc_acpi_link_adr tgl_3_in_1_sdca_mono[] = {
++ {
++ .mask = BIT(0),
++ .num_adr = ARRAY_SIZE(rt711_sdca_0_adr),
++ .adr_d = rt711_sdca_0_adr,
++ },
++ {
++ .mask = BIT(1),
++ .num_adr = ARRAY_SIZE(rt1316_1_single_adr),
++ .adr_d = rt1316_1_single_adr,
++ },
++ {
++ .mask = BIT(3),
++ .num_adr = ARRAY_SIZE(rt714_3_adr),
++ .adr_d = rt714_3_adr,
++ },
++ {}
++};
++
+ static const struct snd_soc_acpi_codecs tgl_max98373_amp = {
+ .num_codecs = 1,
+ .codecs = {"MX98373"}
+@@ -412,6 +440,19 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_sdw_machines[] = {
+ .drv_name = "sof_sdw",
+ .sof_tplg_filename = "sof-tgl-rt711-rt1316-rt714.tplg",
+ },
++ {
++ /*
++ * link_mask should be 0xB, but all links are enabled by BIOS.
++ * This entry will be selected if there is no rt1316 amplifier exposed
++ * on link2 since it will fail to match the above entry.
++ */
++
++ .link_mask = 0xF, /* 4 active links required */
++ .links = tgl_3_in_1_sdca_mono,
++ .drv_name = "sof_sdw",
++ .sof_tplg_filename = "sof-tgl-rt711-l0-rt1316-l1-mono-rt714-l3.tplg",
++ },
++
+ {
+ .link_mask = 0x3, /* rt711 on link 0 and 1 rt1308 on link 1 */
+ .links = tgl_hp,
+diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
+index 64226072f0ee2..74f60f5dfaefd 100644
+--- a/sound/soc/intel/skylake/skl-nhlt.c
++++ b/sound/soc/intel/skylake/skl-nhlt.c
+@@ -201,7 +201,6 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
+ struct nhlt_fmt_cfg *fmt_cfg;
+ struct wav_fmt_ext *wav_fmt;
+ unsigned long rate;
+- bool present = false;
+ int rate_index = 0;
+ u16 channels, bps;
+ u8 clk_src;
+@@ -214,9 +213,12 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
+ if (fmt->fmt_count == 0)
+ return;
+
++ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
+ for (i = 0; i < fmt->fmt_count; i++) {
+- fmt_cfg = &fmt->fmt_config[i];
+- wav_fmt = &fmt_cfg->fmt_ext;
++ struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg;
++ bool present = false;
++
++ wav_fmt = &saved_fmt_cfg->fmt_ext;
+
+ channels = wav_fmt->fmt.channels;
+ bps = wav_fmt->fmt.bits_per_sample;
+@@ -234,12 +236,18 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
+ * derive the rate.
+ */
+ for (j = i; j < fmt->fmt_count; j++) {
+- fmt_cfg = &fmt->fmt_config[j];
+- wav_fmt = &fmt_cfg->fmt_ext;
++ struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg;
++
++ wav_fmt = &tmp_fmt_cfg->fmt_ext;
+ if ((fs == wav_fmt->fmt.samples_per_sec) &&
+- (bps == wav_fmt->fmt.bits_per_sample))
++ (bps == wav_fmt->fmt.bits_per_sample)) {
+ channels = max_t(u16, channels,
+ wav_fmt->fmt.channels);
++ saved_fmt_cfg = tmp_fmt_cfg;
++ }
++ /* Move to the next nhlt_fmt_cfg */
++ tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps +
++ tmp_fmt_cfg->config.size);
+ }
+
+ rate = channels * bps * fs;
+@@ -255,8 +263,11 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
+
+ /* Fill rate and parent for sclk/sclkfs */
+ if (!present) {
++ struct nhlt_fmt_cfg *first_fmt_cfg;
++
++ first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
+ i2s_config_ext = (struct skl_i2s_config_blob_ext *)
+- fmt->fmt_config[0].config.caps;
++ first_fmt_cfg->config.caps;
+
+ /* MCLK Divider Source Select */
+ if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
+@@ -270,6 +281,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
+
+ parent = skl_get_parent_clk(clk_src);
+
++ /* Move to the next nhlt_fmt_cfg */
++ fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps +
++ fmt_cfg->config.size);
+ /*
+ * Do not copy the config data if there is no parent
+ * clock available for this clock source select
+@@ -278,9 +292,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
+ continue;
+
+ sclk[id].rate_cfg[rate_index].rate = rate;
+- sclk[id].rate_cfg[rate_index].config = fmt_cfg;
++ sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg;
+ sclkfs[id].rate_cfg[rate_index].rate = rate;
+- sclkfs[id].rate_cfg[rate_index].config = fmt_cfg;
++ sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg;
+ sclk[id].parent_name = parent->name;
+ sclkfs[id].parent_name = parent->name;
+
+@@ -294,13 +308,13 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk,
+ {
+ struct skl_i2s_config_blob_ext *i2s_config_ext;
+ struct skl_i2s_config_blob_legacy *i2s_config;
+- struct nhlt_specific_cfg *fmt_cfg;
++ struct nhlt_fmt_cfg *fmt_cfg;
+ struct skl_clk_parent_src *parent;
+ u32 clkdiv, div_ratio;
+ u8 clk_src;
+
+- fmt_cfg = &fmt->fmt_config[0].config;
+- i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps;
++ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
++ i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps;
+
+ /* MCLK Divider Source Select and divider */
+ if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
+@@ -329,7 +343,7 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk,
+ return;
+
+ mclk[id].rate_cfg[0].rate = parent->rate/div_ratio;
+- mclk[id].rate_cfg[0].config = &fmt->fmt_config[0];
++ mclk[id].rate_cfg[0].config = fmt_cfg;
+ mclk[id].parent_name = parent->name;
+ }
+
+diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
+index 9ecaf6a1e8475..db41bd7170650 100644
+--- a/sound/soc/intel/skylake/skl-pcm.c
++++ b/sound/soc/intel/skylake/skl-pcm.c
+@@ -562,11 +562,8 @@ static int skl_link_hw_params(struct snd_pcm_substream *substream,
+
+ stream_tag = hdac_stream(link_dev)->stream_tag;
+
+- /* set the stream tag in the codec dai dma params */
+- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+- snd_soc_dai_set_tdm_slot(codec_dai, stream_tag, 0, 0, 0);
+- else
+- snd_soc_dai_set_tdm_slot(codec_dai, 0, stream_tag, 0, 0);
++ /* set the hdac_stream in the codec dai */
++ snd_soc_dai_set_stream(codec_dai, hdac_stream(link_dev), substream->stream);
+
+ p_params.s_fmt = snd_pcm_format_width(params_format(params));
+ p_params.ch = params_channels(params);
+@@ -1251,7 +1248,6 @@ static int skl_platform_soc_get_time_info(
+ snd_pcm_gettime(substream->runtime, system_ts);
+
+ nsec = timecounter_read(&hstr->tc);
+- nsec = div_u64(nsec, 3); /* can be optimized */
+ if (audio_tstamp_config->report_delay)
+ nsec = skl_adjust_codec_delay(substream, nsec);
+
+diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
+index 5b1a15e399123..46bb3b8bd5afe 100644
+--- a/sound/soc/intel/skylake/skl.c
++++ b/sound/soc/intel/skylake/skl.c
+@@ -439,7 +439,7 @@ static int skl_free(struct hdac_bus *bus)
+
+ skl->init_done = 0; /* to be sure */
+
+- snd_hdac_ext_stop_streams(bus);
++ snd_hdac_stop_streams_and_chip(bus);
+
+ if (bus->irq >= 0)
+ free_irq(bus->irq, (void *)bus);
+@@ -1096,7 +1096,10 @@ static void skl_shutdown(struct pci_dev *pci)
+ if (!skl->init_done)
+ return;
+
+- snd_hdac_ext_stop_streams(bus);
++ snd_hdac_stop_streams(bus);
++ snd_hdac_ext_bus_link_power_down_all(bus);
++ skl_dsp_sleep(skl->dsp);
++
+ list_for_each_entry(s, &bus->stream_list, list) {
+ stream = stream_to_hdac_ext_stream(s);
+ snd_hdac_ext_stream_decouple(bus, stream, false);
+diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c
+index 7ad5d9a924d80..4e1fc4ba5150a 100644
+--- a/sound/soc/jz4740/jz4740-i2s.c
++++ b/sound/soc/jz4740/jz4740-i2s.c
+@@ -56,7 +56,8 @@
+ #define JZ_AIC_CTRL_MONO_TO_STEREO BIT(11)
+ #define JZ_AIC_CTRL_SWITCH_ENDIANNESS BIT(10)
+ #define JZ_AIC_CTRL_SIGNED_TO_UNSIGNED BIT(9)
+-#define JZ_AIC_CTRL_FLUSH BIT(8)
++#define JZ_AIC_CTRL_TFLUSH BIT(8)
++#define JZ_AIC_CTRL_RFLUSH BIT(7)
+ #define JZ_AIC_CTRL_ENABLE_ROR_INT BIT(6)
+ #define JZ_AIC_CTRL_ENABLE_TUR_INT BIT(5)
+ #define JZ_AIC_CTRL_ENABLE_RFS_INT BIT(4)
+@@ -91,6 +92,8 @@ enum jz47xx_i2s_version {
+ struct i2s_soc_info {
+ enum jz47xx_i2s_version version;
+ struct snd_soc_dai_driver *dai;
++
++ bool shared_fifo_flush;
+ };
+
+ struct jz4740_i2s {
+@@ -119,19 +122,44 @@ static inline void jz4740_i2s_write(const struct jz4740_i2s *i2s,
+ writel(value, i2s->base + reg);
+ }
+
++static inline void jz4740_i2s_set_bits(const struct jz4740_i2s *i2s,
++ unsigned int reg, uint32_t bits)
++{
++ uint32_t value = jz4740_i2s_read(i2s, reg);
++ value |= bits;
++ jz4740_i2s_write(i2s, reg, value);
++}
++
+ static int jz4740_i2s_startup(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+ {
+ struct jz4740_i2s *i2s = snd_soc_dai_get_drvdata(dai);
+- uint32_t conf, ctrl;
++ uint32_t conf;
+ int ret;
+
++ /*
++ * When we can flush FIFOs independently, only flush the FIFO
++ * that is starting up. We can do this when the DAI is active
++ * because it does not disturb other active substreams.
++ */
++ if (!i2s->soc_info->shared_fifo_flush) {
++ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
++ jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_TFLUSH);
++ else
++ jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_RFLUSH);
++ }
++
+ if (snd_soc_dai_active(dai))
+ return 0;
+
+- ctrl = jz4740_i2s_read(i2s, JZ_REG_AIC_CTRL);
+- ctrl |= JZ_AIC_CTRL_FLUSH;
+- jz4740_i2s_write(i2s, JZ_REG_AIC_CTRL, ctrl);
++ /*
++ * When there is a shared flush bit for both FIFOs, the TFLUSH
++ * bit flushes both FIFOs. Flushing while the DAI is active would
++ * cause FIFO underruns in other active substreams so we have to
++ * guard this behind the snd_soc_dai_active() check.
++ */
++ if (i2s->soc_info->shared_fifo_flush)
++ jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_TFLUSH);
+
+ ret = clk_prepare_enable(i2s->clk_i2s);
+ if (ret)
+@@ -462,6 +490,7 @@ static struct snd_soc_dai_driver jz4740_i2s_dai = {
+ static const struct i2s_soc_info jz4740_i2s_soc_info = {
+ .version = JZ_I2S_JZ4740,
+ .dai = &jz4740_i2s_dai,
++ .shared_fifo_flush = true,
+ };
+
+ static const struct i2s_soc_info jz4760_i2s_soc_info = {
+diff --git a/sound/soc/kirkwood/kirkwood-dma.c b/sound/soc/kirkwood/kirkwood-dma.c
+index 700a18561a940..640cebd2983e2 100644
+--- a/sound/soc/kirkwood/kirkwood-dma.c
++++ b/sound/soc/kirkwood/kirkwood-dma.c
+@@ -86,7 +86,7 @@ kirkwood_dma_conf_mbus_windows(void __iomem *base, int win,
+
+ /* try to find matching cs for current dma address */
+ for (i = 0; i < dram->num_cs; i++) {
+- const struct mbus_dram_window *cs = dram->cs + i;
++ const struct mbus_dram_window *cs = &dram->cs[i];
+ if ((cs->base & 0xffff0000) < (dma & 0xffff0000)) {
+ writel(cs->base & 0xffff0000,
+ base + KIRKWOOD_AUDIO_WIN_BASE_REG(win));
+diff --git a/sound/soc/mediatek/common/mtk-btcvsd.c b/sound/soc/mediatek/common/mtk-btcvsd.c
+index d884bb7c0fc74..1c28b41e43112 100644
+--- a/sound/soc/mediatek/common/mtk-btcvsd.c
++++ b/sound/soc/mediatek/common/mtk-btcvsd.c
+@@ -1038,11 +1038,9 @@ static int mtk_pcm_btcvsd_copy(struct snd_soc_component *component,
+ struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(component);
+
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+- mtk_btcvsd_snd_write(bt, buf, count);
++ return mtk_btcvsd_snd_write(bt, buf, count);
+ else
+- mtk_btcvsd_snd_read(bt, buf, count);
+-
+- return 0;
++ return mtk_btcvsd_snd_read(bt, buf, count);
+ }
+
+ /* kcontrol */
+diff --git a/sound/soc/mediatek/mt2701/mt2701-wm8960.c b/sound/soc/mediatek/mt2701/mt2701-wm8960.c
+index 414e422c0eba0..70e494fb3da87 100644
+--- a/sound/soc/mediatek/mt2701/mt2701-wm8960.c
++++ b/sound/soc/mediatek/mt2701/mt2701-wm8960.c
+@@ -129,7 +129,8 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev)
+ if (!codec_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_platform_node;
+ }
+ for_each_card_prelinks(card, i, dai_link) {
+ if (dai_link->codecs->name)
+@@ -140,7 +141,7 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev)
+ ret = snd_soc_of_parse_audio_routing(card, "audio-routing");
+ if (ret) {
+ dev_err(&pdev->dev, "failed to parse audio-routing: %d\n", ret);
+- return ret;
++ goto put_codec_node;
+ }
+
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+@@ -148,6 +149,10 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev)
+ dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+ __func__, ret);
+
++put_codec_node:
++ of_node_put(codec_node);
++put_platform_node:
++ of_node_put(platform_node);
+ return ret;
+ }
+
+diff --git a/sound/soc/mediatek/mt6797/mt6797-mt6351.c b/sound/soc/mediatek/mt6797/mt6797-mt6351.c
+index 496f32bcfb5e3..d2f6213a6bfcc 100644
+--- a/sound/soc/mediatek/mt6797/mt6797-mt6351.c
++++ b/sound/soc/mediatek/mt6797/mt6797-mt6351.c
+@@ -217,7 +217,8 @@ static int mt6797_mt6351_dev_probe(struct platform_device *pdev)
+ if (!codec_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_platform_node;
+ }
+ for_each_card_prelinks(card, i, dai_link) {
+ if (dai_link->codecs->name)
+@@ -230,6 +231,9 @@ static int mt6797_mt6351_dev_probe(struct platform_device *pdev)
+ dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+ __func__, ret);
+
++ of_node_put(codec_node);
++put_platform_node:
++ of_node_put(platform_node);
+ return ret;
+ }
+
+diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
+index 6350390414d4a..b8b5038d1be1f 100644
+--- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
++++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
+@@ -1054,6 +1054,7 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev)
+ int irq_id;
+ struct mtk_base_afe *afe;
+ struct mt8173_afe_private *afe_priv;
++ struct snd_soc_component *comp_pcm, *comp_hdmi;
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(33));
+ if (ret)
+@@ -1074,12 +1075,6 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev)
+ irq_id = platform_get_irq(pdev, 0);
+ if (irq_id <= 0)
+ return irq_id < 0 ? irq_id : -ENXIO;
+- ret = devm_request_irq(afe->dev, irq_id, mt8173_afe_irq_handler,
+- 0, "Afe_ISR_Handle", (void *)afe);
+- if (ret) {
+- dev_err(afe->dev, "could not request_irq\n");
+- return ret;
+- }
+
+ afe->base_addr = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(afe->base_addr))
+@@ -1142,23 +1137,62 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev)
+ if (ret)
+ goto err_pm_disable;
+
+- ret = devm_snd_soc_register_component(&pdev->dev,
+- &mt8173_afe_pcm_dai_component,
+- mt8173_afe_pcm_dais,
+- ARRAY_SIZE(mt8173_afe_pcm_dais));
++ comp_pcm = devm_kzalloc(&pdev->dev, sizeof(*comp_pcm), GFP_KERNEL);
++ if (!comp_pcm) {
++ ret = -ENOMEM;
++ goto err_pm_disable;
++ }
++
++ ret = snd_soc_component_initialize(comp_pcm,
++ &mt8173_afe_pcm_dai_component,
++ &pdev->dev);
+ if (ret)
+ goto err_pm_disable;
+
+- ret = devm_snd_soc_register_component(&pdev->dev,
+- &mt8173_afe_hdmi_dai_component,
+- mt8173_afe_hdmi_dais,
+- ARRAY_SIZE(mt8173_afe_hdmi_dais));
++#ifdef CONFIG_DEBUG_FS
++ comp_pcm->debugfs_prefix = "pcm";
++#endif
++
++ ret = snd_soc_add_component(comp_pcm,
++ mt8173_afe_pcm_dais,
++ ARRAY_SIZE(mt8173_afe_pcm_dais));
+ if (ret)
+ goto err_pm_disable;
+
++ comp_hdmi = devm_kzalloc(&pdev->dev, sizeof(*comp_hdmi), GFP_KERNEL);
++ if (!comp_hdmi) {
++ ret = -ENOMEM;
++ goto err_cleanup_components;
++ }
++
++ ret = snd_soc_component_initialize(comp_hdmi,
++ &mt8173_afe_hdmi_dai_component,
++ &pdev->dev);
++ if (ret)
++ goto err_cleanup_components;
++
++#ifdef CONFIG_DEBUG_FS
++ comp_hdmi->debugfs_prefix = "hdmi";
++#endif
++
++ ret = snd_soc_add_component(comp_hdmi,
++ mt8173_afe_hdmi_dais,
++ ARRAY_SIZE(mt8173_afe_hdmi_dais));
++ if (ret)
++ goto err_cleanup_components;
++
++ ret = devm_request_irq(afe->dev, irq_id, mt8173_afe_irq_handler,
++ 0, "Afe_ISR_Handle", (void *)afe);
++ if (ret) {
++ dev_err(afe->dev, "could not request_irq\n");
++ goto err_cleanup_components;
++ }
++
+ dev_info(&pdev->dev, "MT8173 AFE driver initialized.\n");
+ return 0;
+
++err_cleanup_components:
++ snd_soc_unregister_component(&pdev->dev);
+ err_pm_disable:
+ pm_runtime_disable(&pdev->dev);
+ return ret;
+@@ -1166,6 +1200,8 @@ err_pm_disable:
+
+ static int mt8173_afe_pcm_dev_remove(struct platform_device *pdev)
+ {
++ snd_soc_unregister_component(&pdev->dev);
++
+ pm_runtime_disable(&pdev->dev);
+ if (!pm_runtime_status_suspended(&pdev->dev))
+ mt8173_afe_runtime_suspend(&pdev->dev);
+diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c
+index fc94314bfc02f..5f39e810e27ae 100644
+--- a/sound/soc/mediatek/mt8173/mt8173-max98090.c
++++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c
+@@ -167,7 +167,8 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev)
+ if (!codec_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_platform_node;
+ }
+ for_each_card_prelinks(card, i, dai_link) {
+ if (dai_link->codecs->name)
+@@ -180,6 +181,11 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev)
+ if (ret)
+ dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+ __func__, ret);
++
++ of_node_put(codec_node);
++
++put_platform_node:
++ of_node_put(platform_node);
+ return ret;
+ }
+
+diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
+index 0f28dc2217c09..9421b919d4627 100644
+--- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
++++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
+@@ -200,14 +200,16 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev)
+ if (!mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto out;
+ }
+ mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node =
+ of_parse_phandle(pdev->dev.of_node, "mediatek,audio-codec", 1);
+ if (!mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto out;
+ }
+ mt8173_rt5650_rt5514_codec_conf[0].dlc.of_node =
+ mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node;
+@@ -218,6 +220,9 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev)
+ if (ret)
+ dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+ __func__, ret);
++
++out:
++ of_node_put(platform_node);
+ return ret;
+ }
+
+diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
+index 077c6ee067806..94a9bbf144d15 100644
+--- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
++++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
+@@ -256,14 +256,16 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev)
+ if (!mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_node;
+ }
+ mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node =
+ of_parse_phandle(pdev->dev.of_node, "mediatek,audio-codec", 1);
+ if (!mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_node;
+ }
+ mt8173_rt5650_rt5676_codec_conf[0].dlc.of_node =
+ mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node;
+@@ -276,7 +278,8 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev)
+ if (!mt8173_rt5650_rt5676_dais[DAI_LINK_HDMI_I2S].codecs->of_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_node;
+ }
+
+ card->dev = &pdev->dev;
+@@ -285,6 +288,9 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev)
+ if (ret)
+ dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+ __func__, ret);
++
++put_node:
++ of_node_put(platform_node);
+ return ret;
+ }
+
+diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
+index c28ebf891cb05..1de9dab218c64 100644
+--- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c
++++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
+@@ -280,7 +280,8 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev)
+ if (!mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_platform_node;
+ }
+ mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node =
+ mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node;
+@@ -293,7 +294,7 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev)
+ dev_err(&pdev->dev,
+ "%s codec_capture_dai name fail %d\n",
+ __func__, ret);
+- return ret;
++ goto put_platform_node;
+ }
+ mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[1].dai_name =
+ codec_capture_dai;
+@@ -315,7 +316,8 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev)
+ if (!mt8173_rt5650_dais[DAI_LINK_HDMI_I2S].codecs->of_node) {
+ dev_err(&pdev->dev,
+ "Property 'audio-codec' missing or invalid\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_platform_node;
+ }
+ card->dev = &pdev->dev;
+
+@@ -323,6 +325,9 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev)
+ if (ret)
+ dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+ __func__, ret);
++
++put_platform_node:
++ of_node_put(platform_node);
+ return ret;
+ }
+
+diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
+index a4d26a6fc8492..0ab8b050b305f 100644
+--- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
++++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
+@@ -685,7 +685,6 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev)
+ struct snd_soc_dai_link *dai_link;
+ struct mt8183_da7219_max98357_priv *priv;
+ struct pinctrl *pinctrl;
+- const struct of_device_id *match;
+ int ret, i;
+
+ platform_node = of_parse_phandle(pdev->dev.of_node,
+@@ -695,11 +694,9 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev)
+ return -EINVAL;
+ }
+
+- match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+- if (!match || !match->data)
++ card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev);
++ if (!card)
+ return -EINVAL;
+-
+- card = (struct snd_soc_card *)match->data;
+ card->dev = &pdev->dev;
+
+ hdmi_codec = of_parse_phandle(pdev->dev.of_node,
+@@ -781,7 +778,11 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev)
+ return ret;
+ }
+
+- return devm_snd_soc_register_card(&pdev->dev, card);
++ ret = devm_snd_soc_register_card(&pdev->dev, card);
++
++ of_node_put(platform_node);
++ of_node_put(hdmi_codec);
++ return ret;
+ }
+
+ #ifdef CONFIG_OF
+diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c
+index 94dcbd36c8697..4dab1ee69ec07 100644
+--- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c
++++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c
+@@ -637,7 +637,6 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev)
+ struct device_node *platform_node, *ec_codec, *hdmi_codec;
+ struct snd_soc_dai_link *dai_link;
+ struct mt8183_mt6358_ts3a227_max98357_priv *priv;
+- const struct of_device_id *match;
+ int ret, i;
+
+ platform_node = of_parse_phandle(pdev->dev.of_node,
+@@ -647,11 +646,11 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev)
+ return -EINVAL;
+ }
+
+- match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+- if (!match || !match->data)
++ card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev);
++ if (!card) {
++ of_node_put(platform_node);
+ return -EINVAL;
+-
+- card = (struct snd_soc_card *)match->data;
++ }
+ card->dev = &pdev->dev;
+
+ ec_codec = of_parse_phandle(pdev->dev.of_node, "mediatek,ec-codec", 0);
+@@ -740,8 +739,10 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev)
+ }
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+- if (!priv)
+- return -ENOMEM;
++ if (!priv) {
++ ret = -ENOMEM;
++ goto out;
++ }
+
+ snd_soc_card_set_drvdata(card, priv);
+
+@@ -749,7 +750,8 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev)
+ if (IS_ERR(priv->pinctrl)) {
+ dev_err(&pdev->dev, "%s devm_pinctrl_get failed\n",
+ __func__);
+- return PTR_ERR(priv->pinctrl);
++ ret = PTR_ERR(priv->pinctrl);
++ goto out;
+ }
+
+ for (i = 0; i < PIN_STATE_MAX; i++) {
+@@ -780,7 +782,13 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev)
+ __func__, ret);
+ }
+
+- return devm_snd_soc_register_card(&pdev->dev, card);
++ ret = devm_snd_soc_register_card(&pdev->dev, card);
++
++out:
++ of_node_put(platform_node);
++ of_node_put(ec_codec);
++ of_node_put(hdmi_codec);
++ return ret;
+ }
+
+ #ifdef CONFIG_OF
+diff --git a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c
+index a606133951b70..c1d225b498513 100644
+--- a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c
++++ b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c
+@@ -1106,7 +1106,6 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev)
+ struct device_node *platform_node, *hdmi_codec;
+ int ret, i;
+ struct snd_soc_dai_link *dai_link;
+- const struct of_device_id *match;
+ struct mt8192_mt6359_priv *priv;
+
+ platform_node = of_parse_phandle(pdev->dev.of_node,
+@@ -1116,11 +1115,11 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev)
+ return -EINVAL;
+ }
+
+- match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+- if (!match || !match->data)
+- return -EINVAL;
+-
+- card = (struct snd_soc_card *)match->data;
++ card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev);
++ if (!card) {
++ ret = -EINVAL;
++ goto put_platform_node;
++ }
+ card->dev = &pdev->dev;
+
+ hdmi_codec = of_parse_phandle(pdev->dev.of_node,
+@@ -1162,17 +1161,25 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev)
+ }
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+- if (!priv)
+- return -ENOMEM;
++ if (!priv) {
++ ret = -ENOMEM;
++ goto put_hdmi_codec;
++ }
+ snd_soc_card_set_drvdata(card, priv);
+
+ ret = mt8192_afe_gpio_init(&pdev->dev);
+ if (ret) {
+ dev_err(&pdev->dev, "init gpio error %d\n", ret);
+- return ret;
++ goto put_hdmi_codec;
+ }
+
+- return devm_snd_soc_register_card(&pdev->dev, card);
++ ret = devm_snd_soc_register_card(&pdev->dev, card);
++
++put_hdmi_codec:
++ of_node_put(hdmi_codec);
++put_platform_node:
++ of_node_put(platform_node);
++ return ret;
+ }
+
+ #ifdef CONFIG_OF
+diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c
+index 8420b2c71332a..d1939e08d333c 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c
++++ b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c
+@@ -136,11 +136,6 @@ int mt8195_afe_init_clock(struct mtk_base_afe *afe)
+ return 0;
+ }
+
+-void mt8195_afe_deinit_clock(struct mtk_base_afe *afe)
+-{
+- mt8195_audsys_clk_unregister(afe);
+-}
+-
+ int mt8195_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk)
+ {
+ int ret;
+diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-clk.h b/sound/soc/mediatek/mt8195/mt8195-afe-clk.h
+index f8e6eeb29a895..24eb2f06682f2 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-afe-clk.h
++++ b/sound/soc/mediatek/mt8195/mt8195-afe-clk.h
+@@ -90,7 +90,6 @@ int mt8195_afe_get_mclk_source_clk_id(int sel);
+ int mt8195_afe_get_mclk_source_rate(struct mtk_base_afe *afe, int apll);
+ int mt8195_afe_get_default_mclk_source_by_rate(int rate);
+ int mt8195_afe_init_clock(struct mtk_base_afe *afe);
+-void mt8195_afe_deinit_clock(struct mtk_base_afe *afe);
+ int mt8195_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk);
+ void mt8195_afe_disable_clk(struct mtk_base_afe *afe, struct clk *clk);
+ int mt8195_afe_prepare_clk(struct mtk_base_afe *afe, struct clk *clk);
+diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
+index 6635c3f72eccc..4e817542dd745 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
++++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
+@@ -3028,7 +3028,7 @@ static const struct reg_sequence mt8195_afe_reg_defaults[] = {
+
+ static const struct reg_sequence mt8195_cg_patch[] = {
+ { AUDIO_TOP_CON0, 0xfffffffb },
+- { AUDIO_TOP_CON1, 0xfffffffa },
++ { AUDIO_TOP_CON1, 0xfffffff8 },
+ };
+
+ static int mt8195_afe_init_registers(struct mtk_base_afe *afe)
+@@ -3237,18 +3237,13 @@ err_pm_put:
+ return ret;
+ }
+
+-static int mt8195_afe_pcm_dev_remove(struct platform_device *pdev)
++static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev)
+ {
+- struct mtk_base_afe *afe = platform_get_drvdata(pdev);
+-
+ snd_soc_unregister_component(&pdev->dev);
+
+ pm_runtime_disable(&pdev->dev);
+ if (!pm_runtime_status_suspended(&pdev->dev))
+ mt8195_afe_runtime_suspend(&pdev->dev);
+-
+- mt8195_afe_deinit_clock(afe);
+- return 0;
+ }
+
+ static const struct of_device_id mt8195_afe_pcm_dt_match[] = {
+@@ -3271,7 +3266,7 @@ static struct platform_driver mt8195_afe_pcm_driver = {
+ #endif
+ },
+ .probe = mt8195_afe_pcm_dev_probe,
+- .remove = mt8195_afe_pcm_dev_remove,
++ .remove_new = mt8195_afe_pcm_dev_remove,
+ };
+
+ module_platform_driver(mt8195_afe_pcm_driver);
+diff --git a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c
+index 740aa6ddda0ec..353aa17323648 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c
++++ b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c
+@@ -148,6 +148,29 @@ static const struct afe_gate aud_clks[CLK_AUD_NR_CLK] = {
+ GATE_AUD6(CLK_AUD_GASRC19, "aud_gasrc19", "asm_h_sel", 19),
+ };
+
++static void mt8195_audsys_clk_unregister(void *data)
++{
++ struct mtk_base_afe *afe = data;
++ struct mt8195_afe_private *afe_priv = afe->platform_priv;
++ struct clk *clk;
++ struct clk_lookup *cl;
++ int i;
++
++ if (!afe_priv)
++ return;
++
++ for (i = 0; i < CLK_AUD_NR_CLK; i++) {
++ cl = afe_priv->lookup[i];
++ if (!cl)
++ continue;
++
++ clk = cl->clk;
++ clk_unregister_gate(clk);
++
++ clkdev_drop(cl);
++ }
++}
++
+ int mt8195_audsys_clk_register(struct mtk_base_afe *afe)
+ {
+ struct mt8195_afe_private *afe_priv = afe->platform_priv;
+@@ -188,27 +211,5 @@ int mt8195_audsys_clk_register(struct mtk_base_afe *afe)
+ afe_priv->lookup[i] = cl;
+ }
+
+- return 0;
+-}
+-
+-void mt8195_audsys_clk_unregister(struct mtk_base_afe *afe)
+-{
+- struct mt8195_afe_private *afe_priv = afe->platform_priv;
+- struct clk *clk;
+- struct clk_lookup *cl;
+- int i;
+-
+- if (!afe_priv)
+- return;
+-
+- for (i = 0; i < CLK_AUD_NR_CLK; i++) {
+- cl = afe_priv->lookup[i];
+- if (!cl)
+- continue;
+-
+- clk = cl->clk;
+- clk_unregister_gate(clk);
+-
+- clkdev_drop(cl);
+- }
++ return devm_add_action_or_reset(afe->dev, mt8195_audsys_clk_unregister, afe);
+ }
+diff --git a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h
+index 239d31016ba76..69db2dd1c9e02 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h
++++ b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h
+@@ -10,6 +10,5 @@
+ #define _MT8195_AUDSYS_CLK_H_
+
+ int mt8195_audsys_clk_register(struct mtk_base_afe *afe);
+-void mt8195_audsys_clk_unregister(struct mtk_base_afe *afe);
+
+ #endif
+diff --git a/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c b/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c
+index 7378e42f27669..9031d410bbd0a 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c
++++ b/sound/soc/mediatek/mt8195/mt8195-dai-etdm.c
+@@ -2567,6 +2567,9 @@ static void mt8195_dai_etdm_parse_of(struct mtk_base_afe *afe)
+
+ /* etdm in only */
+ for (i = 0; i < 2; i++) {
++ dai_id = ETDM_TO_DAI_ID(i);
++ etdm_data = afe_priv->dai_priv[dai_id];
++
+ ret = snprintf(prop, sizeof(prop),
+ "mediatek,%s-chn-disabled",
+ of_afe_etdms[i].name);
+diff --git a/sound/soc/mediatek/mt8195/mt8195-dai-pcm.c b/sound/soc/mediatek/mt8195/mt8195-dai-pcm.c
+index 5d10d2c4c991c..151914c873acd 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-dai-pcm.c
++++ b/sound/soc/mediatek/mt8195/mt8195-dai-pcm.c
+@@ -80,8 +80,15 @@ static const struct snd_soc_dapm_widget mtk_dai_pcm_widgets[] = {
+ mtk_dai_pcm_o001_mix,
+ ARRAY_SIZE(mtk_dai_pcm_o001_mix)),
+
++ SND_SOC_DAPM_SUPPLY("PCM_EN", PCM_INTF_CON1,
++ PCM_INTF_CON1_PCM_EN_SHIFT, 0, NULL, 0),
++
+ SND_SOC_DAPM_INPUT("PCM1_INPUT"),
+ SND_SOC_DAPM_OUTPUT("PCM1_OUTPUT"),
++
++ SND_SOC_DAPM_CLOCK_SUPPLY("aud_asrc11"),
++ SND_SOC_DAPM_CLOCK_SUPPLY("aud_asrc12"),
++ SND_SOC_DAPM_CLOCK_SUPPLY("aud_pcmif"),
+ };
+
+ static const struct snd_soc_dapm_route mtk_dai_pcm_routes[] = {
+@@ -97,22 +104,18 @@ static const struct snd_soc_dapm_route mtk_dai_pcm_routes[] = {
+ {"PCM1 Playback", NULL, "O000"},
+ {"PCM1 Playback", NULL, "O001"},
+
++ {"PCM1 Playback", NULL, "PCM_EN"},
++ {"PCM1 Playback", NULL, "aud_asrc12"},
++ {"PCM1 Playback", NULL, "aud_pcmif"},
++
++ {"PCM1 Capture", NULL, "PCM_EN"},
++ {"PCM1 Capture", NULL, "aud_asrc11"},
++ {"PCM1 Capture", NULL, "aud_pcmif"},
++
+ {"PCM1_OUTPUT", NULL, "PCM1 Playback"},
+ {"PCM1 Capture", NULL, "PCM1_INPUT"},
+ };
+
+-static void mtk_dai_pcm_enable(struct mtk_base_afe *afe)
+-{
+- regmap_update_bits(afe->regmap, PCM_INTF_CON1,
+- PCM_INTF_CON1_PCM_EN, PCM_INTF_CON1_PCM_EN);
+-}
+-
+-static void mtk_dai_pcm_disable(struct mtk_base_afe *afe)
+-{
+- regmap_update_bits(afe->regmap, PCM_INTF_CON1,
+- PCM_INTF_CON1_PCM_EN, 0x0);
+-}
+-
+ static int mtk_dai_pcm_configure(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+ {
+@@ -207,54 +210,22 @@ static int mtk_dai_pcm_configure(struct snd_pcm_substream *substream,
+ }
+
+ /* dai ops */
+-static int mtk_dai_pcm_startup(struct snd_pcm_substream *substream,
+- struct snd_soc_dai *dai)
+-{
+- struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
+- struct mt8195_afe_private *afe_priv = afe->platform_priv;
+-
+- if (dai->component->active)
+- return 0;
+-
+- mt8195_afe_enable_clk(afe, afe_priv->clk[MT8195_CLK_AUD_ASRC11]);
+- mt8195_afe_enable_clk(afe, afe_priv->clk[MT8195_CLK_AUD_ASRC12]);
+- mt8195_afe_enable_clk(afe, afe_priv->clk[MT8195_CLK_AUD_PCMIF]);
+-
+- return 0;
+-}
+-
+-static void mtk_dai_pcm_shutdown(struct snd_pcm_substream *substream,
+- struct snd_soc_dai *dai)
+-{
+- struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
+- struct mt8195_afe_private *afe_priv = afe->platform_priv;
+-
+- if (dai->component->active)
+- return;
+-
+- mtk_dai_pcm_disable(afe);
+-
+- mt8195_afe_disable_clk(afe, afe_priv->clk[MT8195_CLK_AUD_PCMIF]);
+- mt8195_afe_disable_clk(afe, afe_priv->clk[MT8195_CLK_AUD_ASRC12]);
+- mt8195_afe_disable_clk(afe, afe_priv->clk[MT8195_CLK_AUD_ASRC11]);
+-}
+-
+ static int mtk_dai_pcm_prepare(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+ {
+- struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
+- int ret = 0;
++ int ret;
+
+- if (snd_soc_dai_stream_active(dai, SNDRV_PCM_STREAM_PLAYBACK) &&
+- snd_soc_dai_stream_active(dai, SNDRV_PCM_STREAM_CAPTURE))
++ dev_dbg(dai->dev, "%s(), id %d, stream %d, widget active p %d, c %d\n",
++ __func__, dai->id, substream->stream,
++ dai->playback_widget->active, dai->capture_widget->active);
++
++ if (dai->playback_widget->active || dai->capture_widget->active)
+ return 0;
+
+ ret = mtk_dai_pcm_configure(substream, dai);
+ if (ret)
+ return ret;
+
+- mtk_dai_pcm_enable(afe);
+-
+ return 0;
+ }
+
+@@ -316,8 +287,6 @@ static int mtk_dai_pcm_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+ }
+
+ static const struct snd_soc_dai_ops mtk_dai_pcm_ops = {
+- .startup = mtk_dai_pcm_startup,
+- .shutdown = mtk_dai_pcm_shutdown,
+ .prepare = mtk_dai_pcm_prepare,
+ .set_fmt = mtk_dai_pcm_set_fmt,
+ };
+diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
+index de09f67c04502..a3fa8efc8f81c 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
++++ b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
+@@ -1040,8 +1040,10 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev)
+ }
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+- if (!priv)
++ if (!priv) {
++ of_node_put(platform_node);
+ return -ENOMEM;
++ }
+
+ snd_soc_card_set_drvdata(card, priv);
+
+@@ -1049,6 +1051,8 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev)
+ if (ret)
+ dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+ __func__, ret);
++
++ of_node_put(platform_node);
+ return ret;
+ }
+
+diff --git a/sound/soc/mediatek/mt8195/mt8195-reg.h b/sound/soc/mediatek/mt8195/mt8195-reg.h
+index d06f9cf85a4ec..d3871353db415 100644
+--- a/sound/soc/mediatek/mt8195/mt8195-reg.h
++++ b/sound/soc/mediatek/mt8195/mt8195-reg.h
+@@ -2550,6 +2550,7 @@
+ #define PCM_INTF_CON1_PCM_FMT(x) (((x) & 0x3) << 1)
+ #define PCM_INTF_CON1_PCM_FMT_MASK (0x3 << 1)
+ #define PCM_INTF_CON1_PCM_EN BIT(0)
++#define PCM_INTF_CON1_PCM_EN_SHIFT 0
+
+ /* PCM_INTF_CON2 */
+ #define PCM_INTF_CON2_CLK_DOMAIN_SEL(x) (((x) & 0x3) << 23)
+diff --git a/sound/soc/meson/aiu-acodec-ctrl.c b/sound/soc/meson/aiu-acodec-ctrl.c
+index 27a6d3259c50a..442c215936d97 100644
+--- a/sound/soc/meson/aiu-acodec-ctrl.c
++++ b/sound/soc/meson/aiu-acodec-ctrl.c
+@@ -58,7 +58,7 @@ static int aiu_acodec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol,
+
+ snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+
+- return 0;
++ return 1;
+ }
+
+ static SOC_ENUM_SINGLE_DECL(aiu_acodec_ctrl_mux_enum, AIU_ACODEC_CTRL,
+diff --git a/sound/soc/meson/aiu-codec-ctrl.c b/sound/soc/meson/aiu-codec-ctrl.c
+index c3ea733fce91f..c966fc60dc733 100644
+--- a/sound/soc/meson/aiu-codec-ctrl.c
++++ b/sound/soc/meson/aiu-codec-ctrl.c
+@@ -57,7 +57,7 @@ static int aiu_codec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol,
+
+ snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+
+- return 0;
++ return 1;
+ }
+
+ static SOC_ENUM_SINGLE_DECL(aiu_hdmi_ctrl_mux_enum, AIU_HDMI_CLK_DATA_CTRL,
+diff --git a/sound/soc/meson/aiu-encoder-i2s.c b/sound/soc/meson/aiu-encoder-i2s.c
+index 9322245521463..67729de41a73e 100644
+--- a/sound/soc/meson/aiu-encoder-i2s.c
++++ b/sound/soc/meson/aiu-encoder-i2s.c
+@@ -18,7 +18,6 @@
+ #define AIU_RST_SOFT_I2S_FAST BIT(0)
+
+ #define AIU_I2S_DAC_CFG_MSB_FIRST BIT(2)
+-#define AIU_I2S_MISC_HOLD_EN BIT(2)
+ #define AIU_CLK_CTRL_I2S_DIV_EN BIT(0)
+ #define AIU_CLK_CTRL_I2S_DIV GENMASK(3, 2)
+ #define AIU_CLK_CTRL_AOCLK_INVERT BIT(6)
+@@ -36,37 +35,6 @@ static void aiu_encoder_i2s_divider_enable(struct snd_soc_component *component,
+ enable ? AIU_CLK_CTRL_I2S_DIV_EN : 0);
+ }
+
+-static void aiu_encoder_i2s_hold(struct snd_soc_component *component,
+- bool enable)
+-{
+- snd_soc_component_update_bits(component, AIU_I2S_MISC,
+- AIU_I2S_MISC_HOLD_EN,
+- enable ? AIU_I2S_MISC_HOLD_EN : 0);
+-}
+-
+-static int aiu_encoder_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+- struct snd_soc_dai *dai)
+-{
+- struct snd_soc_component *component = dai->component;
+-
+- switch (cmd) {
+- case SNDRV_PCM_TRIGGER_START:
+- case SNDRV_PCM_TRIGGER_RESUME:
+- case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+- aiu_encoder_i2s_hold(component, false);
+- return 0;
+-
+- case SNDRV_PCM_TRIGGER_STOP:
+- case SNDRV_PCM_TRIGGER_SUSPEND:
+- case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+- aiu_encoder_i2s_hold(component, true);
+- return 0;
+-
+- default:
+- return -EINVAL;
+- }
+-}
+-
+ static int aiu_encoder_i2s_setup_desc(struct snd_soc_component *component,
+ struct snd_pcm_hw_params *params)
+ {
+@@ -353,7 +321,6 @@ static void aiu_encoder_i2s_shutdown(struct snd_pcm_substream *substream,
+ }
+
+ const struct snd_soc_dai_ops aiu_encoder_i2s_dai_ops = {
+- .trigger = aiu_encoder_i2s_trigger,
+ .hw_params = aiu_encoder_i2s_hw_params,
+ .hw_free = aiu_encoder_i2s_hw_free,
+ .set_fmt = aiu_encoder_i2s_set_fmt,
+diff --git a/sound/soc/meson/aiu-fifo-i2s.c b/sound/soc/meson/aiu-fifo-i2s.c
+index 2388a2d0b3a6c..57e6e7160d2f2 100644
+--- a/sound/soc/meson/aiu-fifo-i2s.c
++++ b/sound/soc/meson/aiu-fifo-i2s.c
+@@ -20,6 +20,8 @@
+ #define AIU_MEM_I2S_CONTROL_MODE_16BIT BIT(6)
+ #define AIU_MEM_I2S_BUF_CNTL_INIT BIT(0)
+ #define AIU_RST_SOFT_I2S_FAST BIT(0)
++#define AIU_I2S_MISC_HOLD_EN BIT(2)
++#define AIU_I2S_MISC_FORCE_LEFT_RIGHT BIT(4)
+
+ #define AIU_FIFO_I2S_BLOCK 256
+
+@@ -90,6 +92,10 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream,
+ unsigned int val;
+ int ret;
+
++ snd_soc_component_update_bits(component, AIU_I2S_MISC,
++ AIU_I2S_MISC_HOLD_EN,
++ AIU_I2S_MISC_HOLD_EN);
++
+ ret = aiu_fifo_hw_params(substream, params, dai);
+ if (ret)
+ return ret;
+@@ -117,6 +123,19 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream,
+ snd_soc_component_update_bits(component, AIU_MEM_I2S_MASKS,
+ AIU_MEM_I2S_MASKS_IRQ_BLOCK, val);
+
++ /*
++ * Most (all?) supported SoCs have this bit set by default. The vendor
++ * driver however sets it manually (depending on the version either
++ * while un-setting AIU_I2S_MISC_HOLD_EN or right before that). Follow
++ * the same approach for consistency with the vendor driver.
++ */
++ snd_soc_component_update_bits(component, AIU_I2S_MISC,
++ AIU_I2S_MISC_FORCE_LEFT_RIGHT,
++ AIU_I2S_MISC_FORCE_LEFT_RIGHT);
++
++ snd_soc_component_update_bits(component, AIU_I2S_MISC,
++ AIU_I2S_MISC_HOLD_EN, 0);
++
+ return 0;
+ }
+
+diff --git a/sound/soc/meson/aiu-fifo.c b/sound/soc/meson/aiu-fifo.c
+index 4ad23267cace5..d67ff4cdabd5a 100644
+--- a/sound/soc/meson/aiu-fifo.c
++++ b/sound/soc/meson/aiu-fifo.c
+@@ -5,6 +5,7 @@
+
+ #include <linux/bitfield.h>
+ #include <linux/clk.h>
++#include <linux/dma-mapping.h>
+ #include <sound/pcm_params.h>
+ #include <sound/soc.h>
+ #include <sound/soc-dai.h>
+@@ -179,6 +180,11 @@ int aiu_fifo_pcm_new(struct snd_soc_pcm_runtime *rtd,
+ struct snd_card *card = rtd->card->snd_card;
+ struct aiu_fifo *fifo = dai->playback_dma_data;
+ size_t size = fifo->pcm->buffer_bytes_max;
++ int ret;
++
++ ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
++ if (ret)
++ return ret;
+
+ snd_pcm_set_managed_buffer_all(rtd->pcm, SNDRV_DMA_TYPE_DEV,
+ card->dev, size, size);
+diff --git a/sound/soc/meson/axg-tdm-formatter.c b/sound/soc/meson/axg-tdm-formatter.c
+index cab7fa2851aa8..4834cfd163c03 100644
+--- a/sound/soc/meson/axg-tdm-formatter.c
++++ b/sound/soc/meson/axg-tdm-formatter.c
+@@ -30,27 +30,32 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
+ struct axg_tdm_stream *ts,
+ unsigned int offset)
+ {
+- unsigned int val, ch = ts->channels;
+- unsigned long mask;
+- int i, j;
++ unsigned int ch = ts->channels;
++ u32 val[AXG_TDM_NUM_LANES];
++ int i, j, k;
++
++ /*
++ * We need to mimick the slot distribution used by the HW to keep the
++ * channel placement consistent regardless of the number of channel
++ * in the stream. This is why the odd algorithm below is used.
++ */
++ memset(val, 0, sizeof(*val) * AXG_TDM_NUM_LANES);
+
+ /*
+ * Distribute the channels of the stream over the available slots
+- * of each TDM lane
++ * of each TDM lane. We need to go over the 32 slots ...
+ */
+- for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
+- val = 0;
+- mask = ts->mask[i];
+-
+- for (j = find_first_bit(&mask, 32);
+- (j < 32) && ch;
+- j = find_next_bit(&mask, 32, j + 1)) {
+- val |= 1 << j;
+- ch -= 1;
++ for (i = 0; (i < 32) && ch; i += 2) {
++ /* ... of all the lanes ... */
++ for (j = 0; j < AXG_TDM_NUM_LANES; j++) {
++ /* ... then distribute the channels in pairs */
++ for (k = 0; k < 2; k++) {
++ if ((BIT(i + k) & ts->mask[j]) && ch) {
++ val[j] |= BIT(i + k);
++ ch -= 1;
++ }
++ }
+ }
+-
+- regmap_write(map, offset, val);
+- offset += regmap_get_reg_stride(map);
+ }
+
+ /*
+@@ -63,6 +68,11 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
+ return -EINVAL;
+ }
+
++ for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
++ regmap_write(map, offset, val[i]);
++ offset += regmap_get_reg_stride(map);
++ }
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(axg_tdm_formatter_set_channel_masks);
+diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c
+index 9b2b59536ced0..6c99052feafd8 100644
+--- a/sound/soc/meson/g12a-tohdmitx.c
++++ b/sound/soc/meson/g12a-tohdmitx.c
+@@ -67,7 +67,7 @@ static int g12a_tohdmitx_i2s_mux_put_enum(struct snd_kcontrol *kcontrol,
+
+ snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+
+- return 0;
++ return 1;
+ }
+
+ static SOC_ENUM_SINGLE_DECL(g12a_tohdmitx_i2s_mux_enum, TOHDMITX_CTRL0,
+diff --git a/sound/soc/mxs/mxs-saif.c b/sound/soc/mxs/mxs-saif.c
+index 6a2d24d489647..7afe1a1acc568 100644
+--- a/sound/soc/mxs/mxs-saif.c
++++ b/sound/soc/mxs/mxs-saif.c
+@@ -455,7 +455,10 @@ static int mxs_saif_hw_params(struct snd_pcm_substream *substream,
+ * basic clock which should be fast enough for the internal
+ * logic.
+ */
+- clk_enable(saif->clk);
++ ret = clk_enable(saif->clk);
++ if (ret)
++ return ret;
++
+ ret = clk_set_rate(saif->clk, 24000000);
+ clk_disable(saif->clk);
+ if (ret)
+@@ -751,6 +754,7 @@ static int mxs_saif_probe(struct platform_device *pdev)
+ saif->master_id = saif->id;
+ } else {
+ ret = of_alias_get_id(master, "saif");
++ of_node_put(master);
+ if (ret < 0)
+ return ret;
+ else
+diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c
+index a6407f4388de7..fb721bc499496 100644
+--- a/sound/soc/mxs/mxs-sgtl5000.c
++++ b/sound/soc/mxs/mxs-sgtl5000.c
+@@ -118,6 +118,9 @@ static int mxs_sgtl5000_probe(struct platform_device *pdev)
+ codec_np = of_parse_phandle(np, "audio-codec", 0);
+ if (!saif_np[0] || !saif_np[1] || !codec_np) {
+ dev_err(&pdev->dev, "phandle missing or invalid\n");
++ of_node_put(codec_np);
++ of_node_put(saif_np[0]);
++ of_node_put(saif_np[1]);
+ return -EINVAL;
+ }
+
+diff --git a/sound/soc/pxa/mmp-pcm.c b/sound/soc/pxa/mmp-pcm.c
+index 5d520e18e512f..99b245e3079a2 100644
+--- a/sound/soc/pxa/mmp-pcm.c
++++ b/sound/soc/pxa/mmp-pcm.c
+@@ -98,7 +98,7 @@ static bool filter(struct dma_chan *chan, void *param)
+
+ devname = kasprintf(GFP_KERNEL, "%s.%d", dma_data->dma_res->name,
+ dma_data->ssp_id);
+- if ((strcmp(dev_name(chan->device->dev), devname) == 0) &&
++ if (devname && (strcmp(dev_name(chan->device->dev), devname) == 0) &&
+ (chan->chan_id == dma_data->dma_res->start)) {
+ found = true;
+ }
+diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c
+index 3bd9eb3cc688b..9f5e3e1dfd947 100644
+--- a/sound/soc/qcom/lpass-cpu.c
++++ b/sound/soc/qcom/lpass-cpu.c
+@@ -779,10 +779,20 @@ static bool lpass_hdmi_regmap_volatile(struct device *dev, unsigned int reg)
+ return true;
+ if (reg == LPASS_HDMI_TX_LEGACY_ADDR(v))
+ return true;
++ if (reg == LPASS_HDMI_TX_VBIT_CTL_ADDR(v))
++ return true;
++ if (reg == LPASS_HDMI_TX_PARITY_ADDR(v))
++ return true;
+
+ for (i = 0; i < v->hdmi_rdma_channels; ++i) {
+ if (reg == LPAIF_HDMI_RDMACURR_REG(v, i))
+ return true;
++ if (reg == LPASS_HDMI_TX_DMA_ADDR(v, i))
++ return true;
++ if (reg == LPASS_HDMI_TX_CH_LSB_ADDR(v, i))
++ return true;
++ if (reg == LPASS_HDMI_TX_CH_MSB_ADDR(v, i))
++ return true;
+ }
+ return false;
+ }
+@@ -841,10 +851,11 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev,
+ struct lpass_data *data)
+ {
+ struct device_node *node;
+- int ret, id;
++ int ret, i, id;
+
+ /* Allow all channels by default for backwards compatibility */
+- for (id = 0; id < data->variant->num_dai; id++) {
++ for (i = 0; i < data->variant->num_dai; i++) {
++ id = data->variant->dai_driver[i].id;
+ data->mi2s_playback_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH;
+ data->mi2s_capture_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH;
+ }
+@@ -880,6 +891,7 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev)
+ dsp_of_node = of_parse_phandle(pdev->dev.of_node, "qcom,adsp", 0);
+ if (dsp_of_node) {
+ dev_err(dev, "DSP exists and holds audio resources\n");
++ of_node_put(dsp_of_node);
+ return -EBUSY;
+ }
+
+diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c
+index a59e9d20cb46b..4b1773c1fb95f 100644
+--- a/sound/soc/qcom/lpass-platform.c
++++ b/sound/soc/qcom/lpass-platform.c
+@@ -524,7 +524,7 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component,
+ return -EINVAL;
+ }
+
+- ret = regmap_update_bits(map, reg_irqclr, val_irqclr, val_irqclr);
++ ret = regmap_write_bits(map, reg_irqclr, val_irqclr, val_irqclr);
+ if (ret) {
+ dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", ret);
+ return ret;
+@@ -665,7 +665,7 @@ static irqreturn_t lpass_dma_interrupt_handler(
+ return -EINVAL;
+ }
+ if (interrupts & LPAIF_IRQ_PER(chan)) {
+- rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val));
++ rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val));
+ if (rv) {
+ dev_err(soc_runtime->dev,
+ "error writing to irqclear reg: %d\n", rv);
+@@ -676,7 +676,7 @@ static irqreturn_t lpass_dma_interrupt_handler(
+ }
+
+ if (interrupts & LPAIF_IRQ_XRUN(chan)) {
+- rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val));
++ rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val));
+ if (rv) {
+ dev_err(soc_runtime->dev,
+ "error writing to irqclear reg: %d\n", rv);
+@@ -688,7 +688,7 @@ static irqreturn_t lpass_dma_interrupt_handler(
+ }
+
+ if (interrupts & LPAIF_IRQ_ERR(chan)) {
+- rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val));
++ rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val));
+ if (rv) {
+ dev_err(soc_runtime->dev,
+ "error writing to irqclear reg: %d\n", rv);
+diff --git a/sound/soc/qcom/lpass-sc7180.c b/sound/soc/qcom/lpass-sc7180.c
+index 77a556b27cf09..24a1c121cb2e9 100644
+--- a/sound/soc/qcom/lpass-sc7180.c
++++ b/sound/soc/qcom/lpass-sc7180.c
+@@ -131,6 +131,9 @@ static int sc7180_lpass_init(struct platform_device *pdev)
+
+ drvdata->clks = devm_kcalloc(dev, variant->num_clks,
+ sizeof(*drvdata->clks), GFP_KERNEL);
++ if (!drvdata->clks)
++ return -ENOMEM;
++
+ drvdata->num_clks = variant->num_clks;
+
+ for (i = 0; i < drvdata->num_clks; i++)
+diff --git a/sound/soc/qcom/qdsp6/q6adm.c b/sound/soc/qcom/qdsp6/q6adm.c
+index 3d831b635524f..4ae97afe90624 100644
+--- a/sound/soc/qcom/qdsp6/q6adm.c
++++ b/sound/soc/qcom/qdsp6/q6adm.c
+@@ -217,7 +217,7 @@ static struct q6copp *q6adm_alloc_copp(struct q6adm *adm, int port_idx)
+ idx = find_first_zero_bit(&adm->copp_bitmap[port_idx],
+ MAX_COPPS_PER_PORT);
+
+- if (idx > MAX_COPPS_PER_PORT)
++ if (idx >= MAX_COPPS_PER_PORT)
+ return ERR_PTR(-EBUSY);
+
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
+index 46f365528d501..b74b67720ef43 100644
+--- a/sound/soc/qcom/qdsp6/q6asm-dai.c
++++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
+@@ -269,9 +269,7 @@ static int q6asm_dai_prepare(struct snd_soc_component *component,
+
+ if (ret < 0) {
+ dev_err(dev, "%s: q6asm_open_write failed\n", __func__);
+- q6asm_audio_client_free(prtd->audio_client);
+- prtd->audio_client = NULL;
+- return -ENOMEM;
++ goto open_err;
+ }
+
+ prtd->session_id = q6asm_get_session_id(prtd->audio_client);
+@@ -279,7 +277,7 @@ static int q6asm_dai_prepare(struct snd_soc_component *component,
+ prtd->session_id, substream->stream);
+ if (ret) {
+ dev_err(dev, "%s: stream reg failed ret:%d\n", __func__, ret);
+- return ret;
++ goto routing_err;
+ }
+
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+@@ -301,10 +299,19 @@ static int q6asm_dai_prepare(struct snd_soc_component *component,
+ }
+ if (ret < 0)
+ dev_info(dev, "%s: CMD Format block failed\n", __func__);
++ else
++ prtd->state = Q6ASM_STREAM_RUNNING;
+
+- prtd->state = Q6ASM_STREAM_RUNNING;
++ return ret;
+
+- return 0;
++routing_err:
++ q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE);
++open_err:
++ q6asm_unmap_memory_regions(substream->stream, prtd->audio_client);
++ q6asm_audio_client_free(prtd->audio_client);
++ prtd->audio_client = NULL;
++
++ return ret;
+ }
+
+ static int q6asm_dai_trigger(struct snd_soc_component *component,
+diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c
+index 3390ebef9549d..18c90bb4922be 100644
+--- a/sound/soc/qcom/qdsp6/q6routing.c
++++ b/sound/soc/qcom/qdsp6/q6routing.c
+@@ -492,9 +492,15 @@ static int msm_routing_put_audio_mixer(struct snd_kcontrol *kcontrol,
+ struct session_data *session = &data->sessions[session_id];
+
+ if (ucontrol->value.integer.value[0]) {
++ if (session->port_id == be_id)
++ return 0;
++
+ session->port_id = be_id;
+ snd_soc_dapm_mixer_update_power(dapm, kcontrol, 1, update);
+ } else {
++ if (session->port_id == -1 || session->port_id != be_id)
++ return 0;
++
+ session->port_id = -1;
+ snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update);
+ }
+diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c
+index 0adfc57089492..4da5ad609fcea 100644
+--- a/sound/soc/qcom/sdm845.c
++++ b/sound/soc/qcom/sdm845.c
+@@ -56,8 +56,8 @@ static int sdm845_slim_snd_hw_params(struct snd_pcm_substream *substream,
+ int ret = 0, i;
+
+ for_each_rtd_codec_dais(rtd, i, codec_dai) {
+- sruntime = snd_soc_dai_get_sdw_stream(codec_dai,
+- substream->stream);
++ sruntime = snd_soc_dai_get_stream(codec_dai,
++ substream->stream);
+ if (sruntime != ERR_PTR(-ENOTSUPP))
+ pdata->sruntime[cpu_dai->id] = sruntime;
+
+diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c
+index fe8fd7367e21b..feb6589171ca7 100644
+--- a/sound/soc/qcom/sm8250.c
++++ b/sound/soc/qcom/sm8250.c
+@@ -70,8 +70,8 @@ static int sm8250_snd_hw_params(struct snd_pcm_substream *substream,
+ switch (cpu_dai->id) {
+ case WSA_CODEC_DMA_RX_0:
+ for_each_rtd_codec_dais(rtd, i, codec_dai) {
+- sruntime = snd_soc_dai_get_sdw_stream(codec_dai,
+- substream->stream);
++ sruntime = snd_soc_dai_get_stream(codec_dai,
++ substream->stream);
+ if (sruntime != ERR_PTR(-ENOTSUPP))
+ pdata->sruntime[cpu_dai->id] = sruntime;
+ }
+@@ -191,6 +191,7 @@ static int sm8250_platform_probe(struct platform_device *pdev)
+ if (!card)
+ return -ENOMEM;
+
++ card->owner = THIS_MODULE;
+ /* Allocate the private data */
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
+index 7e89f5b0c237f..2880a05376469 100644
+--- a/sound/soc/rockchip/rockchip_i2s.c
++++ b/sound/soc/rockchip/rockchip_i2s.c
+@@ -717,19 +717,23 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
+ i2s->mclk = devm_clk_get(&pdev->dev, "i2s_clk");
+ if (IS_ERR(i2s->mclk)) {
+ dev_err(&pdev->dev, "Can't retrieve i2s master clock\n");
+- return PTR_ERR(i2s->mclk);
++ ret = PTR_ERR(i2s->mclk);
++ goto err_clk;
+ }
+
+ regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+- if (IS_ERR(regs))
+- return PTR_ERR(regs);
++ if (IS_ERR(regs)) {
++ ret = PTR_ERR(regs);
++ goto err_clk;
++ }
+
+ i2s->regmap = devm_regmap_init_mmio(&pdev->dev, regs,
+ &rockchip_i2s_regmap_config);
+ if (IS_ERR(i2s->regmap)) {
+ dev_err(&pdev->dev,
+ "Failed to initialise managed register map\n");
+- return PTR_ERR(i2s->regmap);
++ ret = PTR_ERR(i2s->regmap);
++ goto err_clk;
+ }
+
+ i2s->bclk_ratio = 64;
+@@ -769,7 +773,8 @@ err_suspend:
+ i2s_runtime_suspend(&pdev->dev);
+ err_pm_disable:
+ pm_runtime_disable(&pdev->dev);
+-
++err_clk:
++ clk_disable_unprepare(i2s->hclk);
+ return ret;
+ }
+
+diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c
+index 38bd603eeb454..7c0b0fe326c22 100644
+--- a/sound/soc/rockchip/rockchip_pdm.c
++++ b/sound/soc/rockchip/rockchip_pdm.c
+@@ -368,6 +368,7 @@ static int rockchip_pdm_runtime_resume(struct device *dev)
+
+ ret = clk_prepare_enable(pdm->hclk);
+ if (ret) {
++ clk_disable_unprepare(pdm->clk);
+ dev_err(pdm->dev, "hclock enable failed %d\n", ret);
+ return ret;
+ }
+diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c
+index d027ca4b17964..09a25d84fee6f 100644
+--- a/sound/soc/rockchip/rockchip_spdif.c
++++ b/sound/soc/rockchip/rockchip_spdif.c
+@@ -88,6 +88,7 @@ static int __maybe_unused rk_spdif_runtime_resume(struct device *dev)
+
+ ret = clk_prepare_enable(spdif->hclk);
+ if (ret) {
++ clk_disable_unprepare(spdif->mclk);
+ dev_err(spdif->dev, "hclk clock enable failed %d\n", ret);
+ return ret;
+ }
+diff --git a/sound/soc/samsung/aries_wm8994.c b/sound/soc/samsung/aries_wm8994.c
+index 313ab650f8d9f..a0825da9fff97 100644
+--- a/sound/soc/samsung/aries_wm8994.c
++++ b/sound/soc/samsung/aries_wm8994.c
+@@ -585,19 +585,16 @@ static int aries_audio_probe(struct platform_device *pdev)
+
+ extcon_np = of_parse_phandle(np, "extcon", 0);
+ priv->usb_extcon = extcon_find_edev_by_node(extcon_np);
+- if (IS_ERR(priv->usb_extcon)) {
+- if (PTR_ERR(priv->usb_extcon) != -EPROBE_DEFER)
+- dev_err(dev, "Failed to get extcon device");
+- return PTR_ERR(priv->usb_extcon);
+- }
+ of_node_put(extcon_np);
++ if (IS_ERR(priv->usb_extcon))
++ return dev_err_probe(dev, PTR_ERR(priv->usb_extcon),
++ "Failed to get extcon device");
+
+ priv->adc = devm_iio_channel_get(dev, "headset-detect");
+- if (IS_ERR(priv->adc)) {
+- if (PTR_ERR(priv->adc) != -EPROBE_DEFER)
+- dev_err(dev, "Failed to get ADC channel");
+- return PTR_ERR(priv->adc);
+- }
++ if (IS_ERR(priv->adc))
++ return dev_err_probe(dev, PTR_ERR(priv->adc),
++ "Failed to get ADC channel");
++
+ if (priv->adc->channel->type != IIO_VOLTAGE)
+ return -EINVAL;
+
+@@ -631,8 +628,10 @@ static int aries_audio_probe(struct platform_device *pdev)
+ return -EINVAL;
+
+ codec = of_get_child_by_name(dev->of_node, "codec");
+- if (!codec)
+- return -EINVAL;
++ if (!codec) {
++ ret = -EINVAL;
++ goto out;
++ }
+
+ for_each_card_prelinks(card, i, dai_link) {
+ dai_link->codecs->of_node = of_parse_phandle(codec,
+diff --git a/sound/soc/samsung/arndale.c b/sound/soc/samsung/arndale.c
+index 606ac5e33a8e9..a5dc640d0d768 100644
+--- a/sound/soc/samsung/arndale.c
++++ b/sound/soc/samsung/arndale.c
+@@ -174,9 +174,8 @@ static int arndale_audio_probe(struct platform_device *pdev)
+
+ ret = devm_snd_soc_register_card(card->dev, card);
+ if (ret) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev,
+- "snd_soc_register_card() failed: %d\n", ret);
++ dev_err_probe(&pdev->dev, ret,
++ "snd_soc_register_card() failed\n");
+ goto err_put_of_nodes;
+ }
+ return 0;
+diff --git a/sound/soc/samsung/h1940_uda1380.c b/sound/soc/samsung/h1940_uda1380.c
+index c994e67d1eaf0..ca086243fcfd6 100644
+--- a/sound/soc/samsung/h1940_uda1380.c
++++ b/sound/soc/samsung/h1940_uda1380.c
+@@ -8,7 +8,7 @@
+ // Based on version from Arnaud Patard <arnaud.patard@rtp-net.org>
+
+ #include <linux/types.h>
+-#include <linux/gpio.h>
++#include <linux/gpio/consumer.h>
+ #include <linux/module.h>
+
+ #include <sound/soc.h>
+diff --git a/sound/soc/samsung/idma.c b/sound/soc/samsung/idma.c
+index 66bcc2f97544b..c3f1b054e2389 100644
+--- a/sound/soc/samsung/idma.c
++++ b/sound/soc/samsung/idma.c
+@@ -360,6 +360,8 @@ static int preallocate_idma_buffer(struct snd_pcm *pcm, int stream)
+ buf->addr = idma.lp_tx_addr;
+ buf->bytes = idma_hardware.buffer_bytes_max;
+ buf->area = (unsigned char * __force)ioremap(buf->addr, buf->bytes);
++ if (!buf->area)
++ return -ENOMEM;
+
+ return 0;
+ }
+diff --git a/sound/soc/samsung/littlemill.c b/sound/soc/samsung/littlemill.c
+index 390f2dd735ad2..34067cc314ff0 100644
+--- a/sound/soc/samsung/littlemill.c
++++ b/sound/soc/samsung/littlemill.c
+@@ -325,9 +325,8 @@ static int littlemill_probe(struct platform_device *pdev)
+ card->dev = &pdev->dev;
+
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+- if (ret && ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n",
+- ret);
++ if (ret)
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n");
+
+ return ret;
+ }
+diff --git a/sound/soc/samsung/lowland.c b/sound/soc/samsung/lowland.c
+index 998d10cf8c947..7b12ccd2a9b22 100644
+--- a/sound/soc/samsung/lowland.c
++++ b/sound/soc/samsung/lowland.c
+@@ -183,9 +183,8 @@ static int lowland_probe(struct platform_device *pdev)
+ card->dev = &pdev->dev;
+
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+- if (ret && ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n",
+- ret);
++ if (ret)
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n");
+
+ return ret;
+ }
+diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c
+index ca643a488c3ca..4ff12e2e704fe 100644
+--- a/sound/soc/samsung/odroid.c
++++ b/sound/soc/samsung/odroid.c
+@@ -311,9 +311,7 @@ static int odroid_audio_probe(struct platform_device *pdev)
+
+ ret = devm_snd_soc_register_card(dev, card);
+ if (ret < 0) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(dev, "snd_soc_register_card() failed: %d\n",
+- ret);
++ dev_err_probe(dev, ret, "snd_soc_register_card() failed\n");
+ goto err_put_clk_i2s;
+ }
+
+diff --git a/sound/soc/samsung/rx1950_uda1380.c b/sound/soc/samsung/rx1950_uda1380.c
+index 6ea1c8cc91675..2820097b00b93 100644
+--- a/sound/soc/samsung/rx1950_uda1380.c
++++ b/sound/soc/samsung/rx1950_uda1380.c
+@@ -128,7 +128,7 @@ static int rx1950_startup(struct snd_pcm_substream *substream)
+ &hw_rates);
+ }
+
+-struct gpio_desc *gpiod_speaker_power;
++static struct gpio_desc *gpiod_speaker_power;
+
+ static int rx1950_spk_power(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+@@ -227,7 +227,7 @@ static int rx1950_probe(struct platform_device *pdev)
+ return devm_snd_soc_register_card(dev, &rx1950_asoc);
+ }
+
+-struct platform_driver rx1950_audio = {
++static struct platform_driver rx1950_audio = {
+ .driver = {
+ .name = "rx1950-audio",
+ .pm = &snd_soc_pm_ops,
+diff --git a/sound/soc/samsung/smdk_wm8994.c b/sound/soc/samsung/smdk_wm8994.c
+index 7661b637946d1..821ad1eb1b79f 100644
+--- a/sound/soc/samsung/smdk_wm8994.c
++++ b/sound/soc/samsung/smdk_wm8994.c
+@@ -179,8 +179,8 @@ static int smdk_audio_probe(struct platform_device *pdev)
+
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+
+- if (ret && ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card() failed:%d\n", ret);
++ if (ret)
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n");
+
+ return ret;
+ }
+diff --git a/sound/soc/samsung/smdk_wm8994pcm.c b/sound/soc/samsung/smdk_wm8994pcm.c
+index 029448f5bedbe..d77dc54cae9c8 100644
+--- a/sound/soc/samsung/smdk_wm8994pcm.c
++++ b/sound/soc/samsung/smdk_wm8994pcm.c
+@@ -118,8 +118,8 @@ static int snd_smdk_probe(struct platform_device *pdev)
+
+ smdk_pcm.dev = &pdev->dev;
+ ret = devm_snd_soc_register_card(&pdev->dev, &smdk_pcm);
+- if (ret && ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card failed %d\n", ret);
++ if (ret)
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n");
+
+ return ret;
+ }
+diff --git a/sound/soc/samsung/snow.c b/sound/soc/samsung/snow.c
+index 6da674e901caf..02372109c251e 100644
+--- a/sound/soc/samsung/snow.c
++++ b/sound/soc/samsung/snow.c
+@@ -212,12 +212,9 @@ static int snow_probe(struct platform_device *pdev)
+ snd_soc_card_set_drvdata(card, priv);
+
+ ret = devm_snd_soc_register_card(dev, card);
+- if (ret) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev,
+- "snd_soc_register_card failed (%d)\n", ret);
+- return ret;
+- }
++ if (ret)
++ return dev_err_probe(&pdev->dev, ret,
++ "snd_soc_register_card failed\n");
+
+ return ret;
+ }
+diff --git a/sound/soc/samsung/speyside.c b/sound/soc/samsung/speyside.c
+index f5f6ba00d0731..37b1f4f60b210 100644
+--- a/sound/soc/samsung/speyside.c
++++ b/sound/soc/samsung/speyside.c
+@@ -330,9 +330,8 @@ static int speyside_probe(struct platform_device *pdev)
+ card->dev = &pdev->dev;
+
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+- if (ret && ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n",
+- ret);
++ if (ret)
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n");
+
+ return ret;
+ }
+diff --git a/sound/soc/samsung/tm2_wm5110.c b/sound/soc/samsung/tm2_wm5110.c
+index a2c77e6defec5..d611ec9e5325e 100644
+--- a/sound/soc/samsung/tm2_wm5110.c
++++ b/sound/soc/samsung/tm2_wm5110.c
+@@ -612,8 +612,7 @@ static int tm2_probe(struct platform_device *pdev)
+
+ ret = devm_snd_soc_register_card(dev, card);
+ if (ret < 0) {
+- if (ret != -EPROBE_DEFER)
+- dev_err(dev, "Failed to register card: %d\n", ret);
++ dev_err_probe(dev, ret, "Failed to register card\n");
+ goto dai_node_put;
+ }
+
+diff --git a/sound/soc/samsung/tobermory.c b/sound/soc/samsung/tobermory.c
+index 15223d860cb72..8d3149a47a4c8 100644
+--- a/sound/soc/samsung/tobermory.c
++++ b/sound/soc/samsung/tobermory.c
+@@ -229,9 +229,8 @@ static int tobermory_probe(struct platform_device *pdev)
+ card->dev = &pdev->dev;
+
+ ret = devm_snd_soc_register_card(&pdev->dev, card);
+- if (ret && ret != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n",
+- ret);
++ if (ret)
++ dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n");
+
+ return ret;
+ }
+diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
+index cdf3b7f69ba70..e9a1eb6bdf66a 100644
+--- a/sound/soc/sh/fsi.c
++++ b/sound/soc/sh/fsi.c
+@@ -816,14 +816,27 @@ static int fsi_clk_enable(struct device *dev,
+ return ret;
+ }
+
+- clk_enable(clock->xck);
+- clk_enable(clock->ick);
+- clk_enable(clock->div);
++ ret = clk_enable(clock->xck);
++ if (ret)
++ goto err;
++ ret = clk_enable(clock->ick);
++ if (ret)
++ goto disable_xck;
++ ret = clk_enable(clock->div);
++ if (ret)
++ goto disable_ick;
+
+ clock->count++;
+ }
+
+ return ret;
++
++disable_ick:
++ clk_disable(clock->ick);
++disable_xck:
++ clk_disable(clock->xck);
++err:
++ return ret;
+ }
+
+ static int fsi_clk_disable(struct device *dev,
+diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
+index 978bd0406729a..af8ef2a27d341 100644
+--- a/sound/soc/sh/rcar/core.c
++++ b/sound/soc/sh/rcar/core.c
+@@ -1159,6 +1159,7 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name,
+ struct device_node *capture)
+ {
+ struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai);
++ struct device *dev = rsnd_priv_to_dev(priv);
+ struct device_node *np;
+ int i;
+
+@@ -1169,7 +1170,11 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name,
+ for_each_child_of_node(node, np) {
+ struct rsnd_mod *mod;
+
+- i = rsnd_node_fixed_index(np, name, i);
++ i = rsnd_node_fixed_index(dev, np, name, i);
++ if (i < 0) {
++ of_node_put(np);
++ break;
++ }
+
+ mod = mod_get(priv, i);
+
+@@ -1183,7 +1188,7 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name,
+ of_node_put(node);
+ }
+
+-int rsnd_node_fixed_index(struct device_node *node, char *name, int idx)
++int rsnd_node_fixed_index(struct device *dev, struct device_node *node, char *name, int idx)
+ {
+ char node_name[16];
+
+@@ -1210,6 +1215,8 @@ int rsnd_node_fixed_index(struct device_node *node, char *name, int idx)
+ return idx;
+ }
+
++ dev_err(dev, "strange node numbering (%s)",
++ of_node_full_name(node));
+ return -EINVAL;
+ }
+
+@@ -1221,10 +1228,9 @@ int rsnd_node_count(struct rsnd_priv *priv, struct device_node *node, char *name
+
+ i = 0;
+ for_each_child_of_node(node, np) {
+- i = rsnd_node_fixed_index(np, name, i);
++ i = rsnd_node_fixed_index(dev, np, name, i);
+ if (i < 0) {
+- dev_err(dev, "strange node numbering (%s)",
+- of_node_full_name(node));
++ of_node_put(np);
+ return 0;
+ }
+ i++;
+diff --git a/sound/soc/sh/rcar/ctu.c b/sound/soc/sh/rcar/ctu.c
+index 6156445bcb69a..e39eb2ac7e955 100644
+--- a/sound/soc/sh/rcar/ctu.c
++++ b/sound/soc/sh/rcar/ctu.c
+@@ -171,7 +171,11 @@ static int rsnd_ctu_init(struct rsnd_mod *mod,
+ struct rsnd_dai_stream *io,
+ struct rsnd_priv *priv)
+ {
+- rsnd_mod_power_on(mod);
++ int ret;
++
++ ret = rsnd_mod_power_on(mod);
++ if (ret < 0)
++ return ret;
+
+ rsnd_ctu_activation(mod);
+
+diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
+index 16c6e0265749b..463ab237d7bd4 100644
+--- a/sound/soc/sh/rcar/dma.c
++++ b/sound/soc/sh/rcar/dma.c
+@@ -102,7 +102,7 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
+ struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
+
+ if (dmaen->chan)
+- dmaengine_terminate_sync(dmaen->chan);
++ dmaengine_terminate_async(dmaen->chan);
+
+ return 0;
+ }
+@@ -240,12 +240,19 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
+ struct dma_chan *rsnd_dma_request_channel(struct device_node *of_node, char *name,
+ struct rsnd_mod *mod, char *x)
+ {
++ struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
++ struct device *dev = rsnd_priv_to_dev(priv);
+ struct dma_chan *chan = NULL;
+ struct device_node *np;
+ int i = 0;
+
+ for_each_child_of_node(of_node, np) {
+- i = rsnd_node_fixed_index(np, name, i);
++ i = rsnd_node_fixed_index(dev, np, name, i);
++ if (i < 0) {
++ chan = NULL;
++ of_node_put(np);
++ break;
++ }
+
+ if (i == rsnd_mod_id_raw(mod) && (!chan))
+ chan = of_dma_request_slave_channel(np, x);
+diff --git a/sound/soc/sh/rcar/dvc.c b/sound/soc/sh/rcar/dvc.c
+index 5137e03a9d7c7..16befcbc312cb 100644
+--- a/sound/soc/sh/rcar/dvc.c
++++ b/sound/soc/sh/rcar/dvc.c
+@@ -186,7 +186,11 @@ static int rsnd_dvc_init(struct rsnd_mod *mod,
+ struct rsnd_dai_stream *io,
+ struct rsnd_priv *priv)
+ {
+- rsnd_mod_power_on(mod);
++ int ret;
++
++ ret = rsnd_mod_power_on(mod);
++ if (ret < 0)
++ return ret;
+
+ rsnd_dvc_activation(mod);
+
+diff --git a/sound/soc/sh/rcar/mix.c b/sound/soc/sh/rcar/mix.c
+index 3572c2c5686c7..1de0e085804cc 100644
+--- a/sound/soc/sh/rcar/mix.c
++++ b/sound/soc/sh/rcar/mix.c
+@@ -146,7 +146,11 @@ static int rsnd_mix_init(struct rsnd_mod *mod,
+ struct rsnd_dai_stream *io,
+ struct rsnd_priv *priv)
+ {
+- rsnd_mod_power_on(mod);
++ int ret;
++
++ ret = rsnd_mod_power_on(mod);
++ if (ret < 0)
++ return ret;
+
+ rsnd_mix_activation(mod);
+
+diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h
+index 6580bab0e229b..f8ef6836ef84e 100644
+--- a/sound/soc/sh/rcar/rsnd.h
++++ b/sound/soc/sh/rcar/rsnd.h
+@@ -460,7 +460,7 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, char *name,
+ struct device_node *playback,
+ struct device_node *capture);
+ int rsnd_node_count(struct rsnd_priv *priv, struct device_node *node, char *name);
+-int rsnd_node_fixed_index(struct device_node *node, char *name, int idx);
++int rsnd_node_fixed_index(struct device *dev, struct device_node *node, char *name, int idx);
+
+ int rsnd_channel_normalization(int chan);
+ #define rsnd_runtime_channel_original(io) \
+@@ -901,8 +901,6 @@ void rsnd_mod_make_sure(struct rsnd_mod *mod, enum rsnd_mod_type type);
+ if (!IS_BUILTIN(RSND_DEBUG_NO_DAI_CALL)) \
+ dev_dbg(dev, param)
+
+-#endif
+-
+ #ifdef CONFIG_DEBUG_FS
+ int rsnd_debugfs_probe(struct snd_soc_component *component);
+ void rsnd_debugfs_reg_show(struct seq_file *m, phys_addr_t _addr,
+@@ -913,3 +911,5 @@ void rsnd_debugfs_mod_reg_show(struct seq_file *m, struct rsnd_mod *mod,
+ #else
+ #define rsnd_debugfs_probe NULL
+ #endif
++
++#endif /* RSND_H */
+diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
+index 42a100c6303d4..f832165e46bc0 100644
+--- a/sound/soc/sh/rcar/src.c
++++ b/sound/soc/sh/rcar/src.c
+@@ -463,11 +463,14 @@ static int rsnd_src_init(struct rsnd_mod *mod,
+ struct rsnd_priv *priv)
+ {
+ struct rsnd_src *src = rsnd_mod_to_src(mod);
++ int ret;
+
+ /* reset sync convert_rate */
+ src->sync.val = 0;
+
+- rsnd_mod_power_on(mod);
++ ret = rsnd_mod_power_on(mod);
++ if (ret < 0)
++ return ret;
+
+ rsnd_src_activation(mod);
+
+@@ -676,7 +679,12 @@ int rsnd_src_probe(struct rsnd_priv *priv)
+ if (!of_device_is_available(np))
+ goto skip;
+
+- i = rsnd_node_fixed_index(np, SRC_NAME, i);
++ i = rsnd_node_fixed_index(dev, np, SRC_NAME, i);
++ if (i < 0) {
++ ret = -EINVAL;
++ of_node_put(np);
++ goto rsnd_src_probe_done;
++ }
+
+ src = rsnd_src_get(priv, i);
+
+diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
+index 87e606f688d3f..7ade6c5ed96ff 100644
+--- a/sound/soc/sh/rcar/ssi.c
++++ b/sound/soc/sh/rcar/ssi.c
+@@ -480,7 +480,9 @@ static int rsnd_ssi_init(struct rsnd_mod *mod,
+
+ ssi->usrcnt++;
+
+- rsnd_mod_power_on(mod);
++ ret = rsnd_mod_power_on(mod);
++ if (ret < 0)
++ return ret;
+
+ rsnd_ssi_config_init(mod, io);
+
+@@ -1105,6 +1107,7 @@ void rsnd_parse_connect_ssi(struct rsnd_dai *rdai,
+ struct device_node *capture)
+ {
+ struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai);
++ struct device *dev = rsnd_priv_to_dev(priv);
+ struct device_node *node;
+ struct device_node *np;
+ int i;
+@@ -1117,7 +1120,11 @@ void rsnd_parse_connect_ssi(struct rsnd_dai *rdai,
+ for_each_child_of_node(node, np) {
+ struct rsnd_mod *mod;
+
+- i = rsnd_node_fixed_index(np, SSI_NAME, i);
++ i = rsnd_node_fixed_index(dev, np, SSI_NAME, i);
++ if (i < 0) {
++ of_node_put(np);
++ break;
++ }
+
+ mod = rsnd_ssi_mod_get(priv, i);
+
+@@ -1182,7 +1189,12 @@ int rsnd_ssi_probe(struct rsnd_priv *priv)
+ if (!of_device_is_available(np))
+ goto skip;
+
+- i = rsnd_node_fixed_index(np, SSI_NAME, i);
++ i = rsnd_node_fixed_index(dev, np, SSI_NAME, i);
++ if (i < 0) {
++ ret = -EINVAL;
++ of_node_put(np);
++ goto rsnd_ssi_probe_done;
++ }
+
+ ssi = rsnd_ssi_get(priv, i);
+
+diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
+index 0d8f97633dd26..d7f4646ee029c 100644
+--- a/sound/soc/sh/rcar/ssiu.c
++++ b/sound/soc/sh/rcar/ssiu.c
+@@ -67,6 +67,8 @@ static void rsnd_ssiu_busif_err_irq_ctrl(struct rsnd_mod *mod, int enable)
+ shift = 1;
+ offset = 1;
+ break;
++ default:
++ return;
+ }
+
+ for (i = 0; i < 4; i++) {
+@@ -102,6 +104,8 @@ bool rsnd_ssiu_busif_err_status_clear(struct rsnd_mod *mod)
+ shift = 1;
+ offset = 1;
+ break;
++ default:
++ goto out;
+ }
+
+ for (i = 0; i < 4; i++) {
+@@ -120,7 +124,7 @@ bool rsnd_ssiu_busif_err_status_clear(struct rsnd_mod *mod)
+ }
+ rsnd_mod_write(mod, reg, val);
+ }
+-
++out:
+ return error;
+ }
+
+@@ -460,6 +464,7 @@ void rsnd_parse_connect_ssiu(struct rsnd_dai *rdai,
+ struct device_node *capture)
+ {
+ struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai);
++ struct device *dev = rsnd_priv_to_dev(priv);
+ struct device_node *node = rsnd_ssiu_of_node(priv);
+ struct rsnd_dai_stream *io_p = &rdai->playback;
+ struct rsnd_dai_stream *io_c = &rdai->capture;
+@@ -472,7 +477,11 @@ void rsnd_parse_connect_ssiu(struct rsnd_dai *rdai,
+ for_each_child_of_node(node, np) {
+ struct rsnd_mod *mod;
+
+- i = rsnd_node_fixed_index(np, SSIU_NAME, i);
++ i = rsnd_node_fixed_index(dev, np, SSIU_NAME, i);
++ if (i < 0) {
++ of_node_put(np);
++ break;
++ }
+
+ mod = rsnd_ssiu_mod_get(priv, i);
+
+diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c
+index fa0cc08f70ec4..2e33a1fa0a6f4 100644
+--- a/sound/soc/sh/rz-ssi.c
++++ b/sound/soc/sh/rz-ssi.c
+@@ -411,54 +411,56 @@ static int rz_ssi_pio_recv(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm)
+ {
+ struct snd_pcm_substream *substream = strm->substream;
+ struct snd_pcm_runtime *runtime;
++ bool done = false;
+ u16 *buf;
+ int fifo_samples;
+ int frames_left;
+- int samples = 0;
++ int samples;
+ int i;
+
+ if (!rz_ssi_stream_is_valid(ssi, strm))
+ return -EINVAL;
+
+ runtime = substream->runtime;
+- /* frames left in this period */
+- frames_left = runtime->period_size - (strm->buffer_pos %
+- runtime->period_size);
+- if (frames_left == 0)
+- frames_left = runtime->period_size;
+
+- /* Samples in RX FIFO */
+- fifo_samples = (rz_ssi_reg_readl(ssi, SSIFSR) >>
+- SSIFSR_RDC_SHIFT) & SSIFSR_RDC_MASK;
+-
+- /* Only read full frames at a time */
+- while (frames_left && (fifo_samples >= runtime->channels)) {
+- samples += runtime->channels;
+- fifo_samples -= runtime->channels;
+- frames_left--;
+- }
++ while (!done) {
++ /* frames left in this period */
++ frames_left = runtime->period_size -
++ (strm->buffer_pos % runtime->period_size);
++ if (!frames_left)
++ frames_left = runtime->period_size;
++
++ /* Samples in RX FIFO */
++ fifo_samples = (rz_ssi_reg_readl(ssi, SSIFSR) >>
++ SSIFSR_RDC_SHIFT) & SSIFSR_RDC_MASK;
++
++ /* Only read full frames at a time */
++ samples = 0;
++ while (frames_left && (fifo_samples >= runtime->channels)) {
++ samples += runtime->channels;
++ fifo_samples -= runtime->channels;
++ frames_left--;
++ }
+
+- /* not enough samples yet */
+- if (samples == 0)
+- return 0;
++ /* not enough samples yet */
++ if (!samples)
++ break;
+
+- /* calculate new buffer index */
+- buf = (u16 *)(runtime->dma_area);
+- buf += strm->buffer_pos * runtime->channels;
++ /* calculate new buffer index */
++ buf = (u16 *)(runtime->dma_area);
++ buf += strm->buffer_pos * runtime->channels;
+
+- /* Note, only supports 16-bit samples */
+- for (i = 0; i < samples; i++)
+- *buf++ = (u16)(rz_ssi_reg_readl(ssi, SSIFRDR) >> 16);
++ /* Note, only supports 16-bit samples */
++ for (i = 0; i < samples; i++)
++ *buf++ = (u16)(rz_ssi_reg_readl(ssi, SSIFRDR) >> 16);
+
+- rz_ssi_reg_mask_setl(ssi, SSIFSR, SSIFSR_RDF, 0);
+- rz_ssi_pointer_update(strm, samples / runtime->channels);
++ rz_ssi_reg_mask_setl(ssi, SSIFSR, SSIFSR_RDF, 0);
++ rz_ssi_pointer_update(strm, samples / runtime->channels);
+
+- /*
+- * If we finished this period, but there are more samples in
+- * the RX FIFO, call this function again
+- */
+- if (frames_left == 0 && fifo_samples >= runtime->channels)
+- rz_ssi_pio_recv(ssi, strm);
++ /* check if there are no more samples in the RX FIFO */
++ if (!(!frames_left && fifo_samples >= runtime->channels))
++ done = true;
++ }
+
+ return 0;
+ }
+@@ -975,24 +977,29 @@ static int rz_ssi_probe(struct platform_device *pdev)
+ ssi->playback.priv = ssi;
+ ssi->capture.priv = ssi;
+
++ spin_lock_init(&ssi->lock);
++ dev_set_drvdata(&pdev->dev, ssi);
++
+ /* Error Interrupt */
+ ssi->irq_int = platform_get_irq_byname(pdev, "int_req");
+- if (ssi->irq_int < 0)
+- return dev_err_probe(&pdev->dev, -ENODEV,
+- "Unable to get SSI int_req IRQ\n");
++ if (ssi->irq_int < 0) {
++ rz_ssi_release_dma_channels(ssi);
++ return ssi->irq_int;
++ }
+
+ ret = devm_request_irq(&pdev->dev, ssi->irq_int, &rz_ssi_interrupt,
+ 0, dev_name(&pdev->dev), ssi);
+- if (ret < 0)
++ if (ret < 0) {
++ rz_ssi_release_dma_channels(ssi);
+ return dev_err_probe(&pdev->dev, ret,
+ "irq request error (int_req)\n");
++ }
+
+ if (!rz_ssi_is_dma_enabled(ssi)) {
+ /* Tx and Rx interrupts (pio only) */
+ ssi->irq_tx = platform_get_irq_byname(pdev, "dma_tx");
+ if (ssi->irq_tx < 0)
+- return dev_err_probe(&pdev->dev, -ENODEV,
+- "Unable to get SSI dma_tx IRQ\n");
++ return ssi->irq_tx;
+
+ ret = devm_request_irq(&pdev->dev, ssi->irq_tx,
+ &rz_ssi_interrupt, 0,
+@@ -1003,8 +1010,7 @@ static int rz_ssi_probe(struct platform_device *pdev)
+
+ ssi->irq_rx = platform_get_irq_byname(pdev, "dma_rx");
+ if (ssi->irq_rx < 0)
+- return dev_err_probe(&pdev->dev, -ENODEV,
+- "Unable to get SSI dma_rx IRQ\n");
++ return ssi->irq_rx;
+
+ ret = devm_request_irq(&pdev->dev, ssi->irq_rx,
+ &rz_ssi_interrupt, 0,
+@@ -1015,27 +1021,37 @@ static int rz_ssi_probe(struct platform_device *pdev)
+ }
+
+ ssi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+- if (IS_ERR(ssi->rstc))
+- return PTR_ERR(ssi->rstc);
++ if (IS_ERR(ssi->rstc)) {
++ ret = PTR_ERR(ssi->rstc);
++ goto err_reset;
++ }
+
+ reset_control_deassert(ssi->rstc);
+ pm_runtime_enable(&pdev->dev);
+- pm_runtime_resume_and_get(&pdev->dev);
++ ret = pm_runtime_resume_and_get(&pdev->dev);
++ if (ret < 0) {
++ dev_err(&pdev->dev, "pm_runtime_resume_and_get failed\n");
++ goto err_pm;
++ }
+
+- spin_lock_init(&ssi->lock);
+- dev_set_drvdata(&pdev->dev, ssi);
+ ret = devm_snd_soc_register_component(&pdev->dev, &rz_ssi_soc_component,
+ rz_ssi_soc_dai,
+ ARRAY_SIZE(rz_ssi_soc_dai));
+ if (ret < 0) {
+- rz_ssi_release_dma_channels(ssi);
+-
+- pm_runtime_put(ssi->dev);
+- pm_runtime_disable(ssi->dev);
+- reset_control_assert(ssi->rstc);
+ dev_err(&pdev->dev, "failed to register snd component\n");
++ goto err_snd_soc;
+ }
+
++ return 0;
++
++err_snd_soc:
++ pm_runtime_put(ssi->dev);
++err_pm:
++ pm_runtime_disable(ssi->dev);
++ reset_control_assert(ssi->rstc);
++err_reset:
++ rz_ssi_release_dma_channels(ssi);
++
+ return ret;
+ }
+
+diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
+index 36060800e9bd9..256e45001f851 100644
+--- a/sound/soc/soc-compress.c
++++ b/sound/soc/soc-compress.c
+@@ -116,6 +116,8 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream)
+ if (ret < 0)
+ goto be_err;
+
++ mutex_lock_nested(&fe->card->pcm_mutex, fe->card->pcm_subclass);
++
+ /* calculate valid and active FE <-> BE dpcms */
+ dpcm_process_paths(fe, stream, &list, 1);
+ fe->dpcm[stream].runtime = fe_substream->runtime;
+@@ -151,7 +153,6 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream)
+ fe->dpcm[stream].state = SND_SOC_DPCM_STATE_OPEN;
+ fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO;
+
+- mutex_lock_nested(&fe->card->pcm_mutex, fe->card->pcm_subclass);
+ snd_soc_runtime_activate(fe, stream);
+ mutex_unlock(&fe->card->pcm_mutex);
+
+@@ -182,7 +183,6 @@ static int soc_compr_free_fe(struct snd_compr_stream *cstream)
+
+ mutex_lock_nested(&fe->card->pcm_mutex, fe->card->pcm_subclass);
+ snd_soc_runtime_deactivate(fe, stream);
+- mutex_unlock(&fe->card->pcm_mutex);
+
+ fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE;
+
+@@ -201,6 +201,8 @@ static int soc_compr_free_fe(struct snd_compr_stream *cstream)
+
+ dpcm_be_disconnect(fe, stream);
+
++ mutex_unlock(&fe->card->pcm_mutex);
++
+ fe->dpcm[stream].runtime = NULL;
+
+ snd_soc_link_compr_shutdown(cstream, 0);
+@@ -376,8 +378,9 @@ static int soc_compr_set_params_fe(struct snd_compr_stream *cstream,
+ ret = snd_soc_link_compr_set_params(cstream);
+ if (ret < 0)
+ goto out;
+-
++ mutex_lock_nested(&fe->card->pcm_mutex, fe->card->pcm_subclass);
+ dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_START);
++ mutex_unlock(&fe->card->pcm_mutex);
+ fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE;
+
+ out:
+@@ -534,6 +537,11 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
+ return -EINVAL;
+ }
+
++ if (!codec_dai) {
++ dev_err(rtd->card->dev, "Missing codec\n");
++ return -EINVAL;
++ }
++
+ /* check client and interface hw capabilities */
+ if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_PLAYBACK) &&
+ snd_soc_dai_stream_valid(cpu_dai, SNDRV_PCM_STREAM_PLAYBACK))
+@@ -581,11 +589,14 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
+ return ret;
+ }
+
++ /* inherit atomicity from DAI link */
++ be_pcm->nonatomic = rtd->dai_link->nonatomic;
++
+ rtd->pcm = be_pcm;
+ rtd->fe_compr = 1;
+ if (rtd->dai_link->dpcm_playback)
+ be_pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream->private_data = rtd;
+- else if (rtd->dai_link->dpcm_capture)
++ if (rtd->dai_link->dpcm_capture)
+ be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream->private_data = rtd;
+ memcpy(compr->ops, &soc_compr_dyn_ops, sizeof(soc_compr_dyn_ops));
+ } else {
+diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
+index 80ca260595fda..a5b3ee69fb886 100644
+--- a/sound/soc/soc-core.c
++++ b/sound/soc/soc-core.c
+@@ -2339,7 +2339,6 @@ int snd_soc_register_card(struct snd_soc_card *card)
+ mutex_init(&card->mutex);
+ mutex_init(&card->dapm_mutex);
+ mutex_init(&card->pcm_mutex);
+- spin_lock_init(&card->dpcm_lock);
+
+ return snd_soc_bind_card(card);
+ }
+@@ -3208,7 +3207,7 @@ int snd_soc_get_dai_name(const struct of_phandle_args *args,
+ for_each_component(pos) {
+ struct device_node *component_of_node = soc_component_to_node(pos);
+
+- if (component_of_node != args->np)
++ if (component_of_node != args->np || !pos->num_dai)
+ continue;
+
+ ret = snd_soc_component_of_xlate_dai_name(pos, args, dai_name);
+@@ -3366,10 +3365,23 @@ EXPORT_SYMBOL_GPL(snd_soc_of_get_dai_link_codecs);
+
+ static int __init snd_soc_init(void)
+ {
++ int ret;
++
+ snd_soc_debugfs_init();
+- snd_soc_util_init();
++ ret = snd_soc_util_init();
++ if (ret)
++ goto err_util_init;
+
+- return platform_driver_register(&soc_driver);
++ ret = platform_driver_register(&soc_driver);
++ if (ret)
++ goto err_register;
++ return 0;
++
++err_register:
++ snd_soc_util_exit();
++err_util_init:
++ snd_soc_debugfs_exit();
++ return ret;
+ }
+ module_init(snd_soc_init);
+
+diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
+index 59d07648a7e7f..b957049bae337 100644
+--- a/sound/soc/soc-dapm.c
++++ b/sound/soc/soc-dapm.c
+@@ -62,6 +62,8 @@ struct snd_soc_dapm_widget *
+ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
+ const struct snd_soc_dapm_widget *widget);
+
++static unsigned int soc_dapm_read(struct snd_soc_dapm_context *dapm, int reg);
++
+ /* dapm power sequences - make this per codec in the future */
+ static int dapm_up_seq[] = {
+ [snd_soc_dapm_pre] = 1,
+@@ -442,6 +444,9 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget,
+
+ snd_soc_dapm_add_path(widget->dapm, data->widget,
+ widget, NULL, NULL);
++ } else if (e->reg != SND_SOC_NOPM) {
++ data->value = soc_dapm_read(widget->dapm, e->reg) &
++ (e->mask << e->shift_l);
+ }
+ break;
+ default:
+@@ -1685,8 +1690,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
+ switch (w->id) {
+ case snd_soc_dapm_pre:
+ if (!w->event)
+- list_for_each_entry_safe_continue(w, n, list,
+- power_list);
++ continue;
+
+ if (event == SND_SOC_DAPM_STREAM_START)
+ ret = w->event(w,
+@@ -1698,8 +1702,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
+
+ case snd_soc_dapm_post:
+ if (!w->event)
+- list_for_each_entry_safe_continue(w, n, list,
+- power_list);
++ continue;
+
+ if (event == SND_SOC_DAPM_STREAM_START)
+ ret = w->event(w,
+@@ -2557,8 +2560,13 @@ static struct snd_soc_dapm_widget *dapm_find_widget(
+ return NULL;
+ }
+
+-static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
+- const char *pin, int status)
++/*
++ * set the DAPM pin status:
++ * returns 1 when the value has been updated, 0 when unchanged, or a negative
++ * error code; called from kcontrol put callback
++ */
++static int __snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
++ const char *pin, int status)
+ {
+ struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true);
+ int ret = 0;
+@@ -2584,6 +2592,18 @@ static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
+ return ret;
+ }
+
++/*
++ * similar as __snd_soc_dapm_set_pin(), but returns 0 when successful;
++ * called from several API functions below
++ */
++static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
++ const char *pin, int status)
++{
++ int ret = __snd_soc_dapm_set_pin(dapm, pin, status);
++
++ return ret < 0 ? ret : 0;
++}
++
+ /**
+ * snd_soc_dapm_sync_unlocked - scan and power dapm paths
+ * @dapm: DAPM context
+@@ -3413,7 +3433,6 @@ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol,
+ update.val = val;
+ card->update = &update;
+ }
+- change |= reg_change;
+
+ ret = soc_dapm_mixer_update_power(card, kcontrol, connect,
+ rconnect);
+@@ -3515,7 +3534,6 @@ int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol,
+ update.val = val;
+ card->update = &update;
+ }
+- change |= reg_change;
+
+ ret = soc_dapm_mux_update_power(card, kcontrol, item[0], e);
+
+@@ -3587,10 +3605,10 @@ int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol,
+ const char *pin = (const char *)kcontrol->private_value;
+ int ret;
+
+- if (ucontrol->value.integer.value[0])
+- ret = snd_soc_dapm_enable_pin(&card->dapm, pin);
+- else
+- ret = snd_soc_dapm_disable_pin(&card->dapm, pin);
++ mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
++ ret = __snd_soc_dapm_set_pin(&card->dapm, pin,
++ !!ucontrol->value.integer.value[0]);
++ mutex_unlock(&card->dapm_mutex);
+
+ snd_soc_dapm_sync(&card->dapm);
+ return ret;
+diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
+index 08eaa9ddf191e..12effaa59fdb7 100644
+--- a/sound/soc/soc-ops.c
++++ b/sound/soc/soc-ops.c
+@@ -308,7 +308,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
+ unsigned int sign_bit = mc->sign_bit;
+ unsigned int mask = (1 << fls(max)) - 1;
+ unsigned int invert = mc->invert;
+- int err;
++ int err, ret;
+ bool type_2r = false;
+ unsigned int val2 = 0;
+ unsigned int val, val_mask;
+@@ -316,13 +316,27 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
+ if (sign_bit)
+ mask = BIT(sign_bit + 1) - 1;
+
+- val = ((ucontrol->value.integer.value[0] + min) & mask);
++ val = ucontrol->value.integer.value[0];
++ if (mc->platform_max && ((int)val + min) > mc->platform_max)
++ return -EINVAL;
++ if (val > max - min)
++ return -EINVAL;
++ if (val < 0)
++ return -EINVAL;
++ val = (val + min) & mask;
+ if (invert)
+ val = max - val;
+ val_mask = mask << shift;
+ val = val << shift;
+ if (snd_soc_volsw_is_stereo(mc)) {
+- val2 = ((ucontrol->value.integer.value[1] + min) & mask);
++ val2 = ucontrol->value.integer.value[1];
++ if (mc->platform_max && ((int)val2 + min) > mc->platform_max)
++ return -EINVAL;
++ if (val2 > max - min)
++ return -EINVAL;
++ if (val2 < 0)
++ return -EINVAL;
++ val2 = (val2 + min) & mask;
+ if (invert)
+ val2 = max - val2;
+ if (reg == reg2) {
+@@ -336,12 +350,18 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
+ err = snd_soc_component_update_bits(component, reg, val_mask, val);
+ if (err < 0)
+ return err;
++ ret = err;
+
+- if (type_2r)
++ if (type_2r) {
+ err = snd_soc_component_update_bits(component, reg2, val_mask,
+- val2);
++ val2);
++ /* Don't discard any error code or drop change flag */
++ if (ret == 0 || err < 0) {
++ ret = err;
++ }
++ }
+
+- return err;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(snd_soc_put_volsw);
+
+@@ -407,27 +427,46 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
+ int min = mc->min;
+ unsigned int mask = (1U << (fls(min + max) - 1)) - 1;
+ int err = 0;
++ int ret;
+ unsigned int val, val_mask;
+
++ val = ucontrol->value.integer.value[0];
++ if (mc->platform_max && val > mc->platform_max)
++ return -EINVAL;
++ if (val > max)
++ return -EINVAL;
++ if (val < 0)
++ return -EINVAL;
+ val_mask = mask << shift;
+- val = (ucontrol->value.integer.value[0] + min) & mask;
++ val = (val + min) & mask;
+ val = val << shift;
+
+ err = snd_soc_component_update_bits(component, reg, val_mask, val);
+ if (err < 0)
+ return err;
++ ret = err;
+
+ if (snd_soc_volsw_is_stereo(mc)) {
+- unsigned int val2;
++ unsigned int val2 = ucontrol->value.integer.value[1];
++
++ if (mc->platform_max && val2 > mc->platform_max)
++ return -EINVAL;
++ if (val2 > max)
++ return -EINVAL;
+
+ val_mask = mask << rshift;
+- val2 = (ucontrol->value.integer.value[1] + min) & mask;
++ val2 = (val2 + min) & mask;
+ val2 = val2 << rshift;
+
+ err = snd_soc_component_update_bits(component, reg2, val_mask,
+ val2);
++
++ /* Don't discard any error code or drop change flag */
++ if (ret == 0 || err < 0) {
++ ret = err;
++ }
+ }
+- return err;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(snd_soc_put_volsw_sx);
+
+@@ -485,7 +524,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
+ unsigned int mask = (1 << fls(max)) - 1;
+ unsigned int invert = mc->invert;
+ unsigned int val, val_mask;
+- int ret;
++ int err, ret, tmp;
++
++ tmp = ucontrol->value.integer.value[0];
++ if (tmp < 0)
++ return -EINVAL;
++ if (mc->platform_max && tmp > mc->platform_max)
++ return -EINVAL;
++ if (tmp > mc->max - mc->min)
++ return -EINVAL;
+
+ if (invert)
+ val = (max - ucontrol->value.integer.value[0]) & mask;
+@@ -494,11 +541,20 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
+ val_mask = mask << shift;
+ val = val << shift;
+
+- ret = snd_soc_component_update_bits(component, reg, val_mask, val);
+- if (ret < 0)
+- return ret;
++ err = snd_soc_component_update_bits(component, reg, val_mask, val);
++ if (err < 0)
++ return err;
++ ret = err;
+
+ if (snd_soc_volsw_is_stereo(mc)) {
++ tmp = ucontrol->value.integer.value[1];
++ if (tmp < 0)
++ return -EINVAL;
++ if (mc->platform_max && tmp > mc->platform_max)
++ return -EINVAL;
++ if (tmp > mc->max - mc->min)
++ return -EINVAL;
++
+ if (invert)
+ val = (max - ucontrol->value.integer.value[1]) & mask;
+ else
+@@ -506,8 +562,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
+ val_mask = mask << shift;
+ val = val << shift;
+
+- ret = snd_soc_component_update_bits(component, rreg, val_mask,
++ err = snd_soc_component_update_bits(component, rreg, val_mask,
+ val);
++ /* Don't discard any error code or drop change flag */
++ if (ret == 0 || err < 0) {
++ ret = err;
++ }
+ }
+
+ return ret;
+@@ -856,8 +916,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol,
+ unsigned long mask = (1UL<<mc->nbits)-1;
+ long max = mc->max;
+ long val = ucontrol->value.integer.value[0];
++ int ret = 0;
+ unsigned int i;
+
++ if (val < mc->min || val > mc->max)
++ return -EINVAL;
+ if (invert)
+ val = max - val;
+ val &= mask;
+@@ -868,9 +931,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol,
+ regmask, regval);
+ if (err < 0)
+ return err;
++ if (err > 0)
++ ret = err;
+ }
+
+- return 0;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(snd_soc_put_xr_sx);
+
+diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
+index 48f71bb81a2f2..6ec248778e2fc 100644
+--- a/sound/soc/soc-pcm.c
++++ b/sound/soc/soc-pcm.c
+@@ -27,6 +27,37 @@
+ #include <sound/soc-link.h>
+ #include <sound/initval.h>
+
++static inline void snd_soc_dpcm_mutex_lock(struct snd_soc_pcm_runtime *rtd)
++{
++ mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
++}
++
++static inline void snd_soc_dpcm_mutex_unlock(struct snd_soc_pcm_runtime *rtd)
++{
++ mutex_unlock(&rtd->card->pcm_mutex);
++}
++
++#define snd_soc_dpcm_mutex_assert_held(rtd) \
++ lockdep_assert_held(&(rtd)->card->pcm_mutex)
++
++static inline void snd_soc_dpcm_stream_lock_irq(struct snd_soc_pcm_runtime *rtd,
++ int stream)
++{
++ snd_pcm_stream_lock_irq(snd_soc_dpcm_get_substream(rtd, stream));
++}
++
++#define snd_soc_dpcm_stream_lock_irqsave_nested(rtd, stream, flags) \
++ snd_pcm_stream_lock_irqsave_nested(snd_soc_dpcm_get_substream(rtd, stream), flags)
++
++static inline void snd_soc_dpcm_stream_unlock_irq(struct snd_soc_pcm_runtime *rtd,
++ int stream)
++{
++ snd_pcm_stream_unlock_irq(snd_soc_dpcm_get_substream(rtd, stream));
++}
++
++#define snd_soc_dpcm_stream_unlock_irqrestore(rtd, stream, flags) \
++ snd_pcm_stream_unlock_irqrestore(snd_soc_dpcm_get_substream(rtd, stream), flags)
++
+ #define DPCM_MAX_BE_USERS 8
+
+ static inline const char *soc_cpu_dai_name(struct snd_soc_pcm_runtime *rtd)
+@@ -73,7 +104,6 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
+ struct snd_pcm_hw_params *params = &fe->dpcm[stream].hw_params;
+ struct snd_soc_dpcm *dpcm;
+ ssize_t offset = 0;
+- unsigned long flags;
+
+ /* FE state */
+ offset += scnprintf(buf + offset, size - offset,
+@@ -101,7 +131,6 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
+ goto out;
+ }
+
+- spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+ for_each_dpcm_be(fe, stream, dpcm) {
+ struct snd_soc_pcm_runtime *be = dpcm->be;
+ params = &dpcm->hw_params;
+@@ -122,7 +151,6 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
+ params_channels(params),
+ params_rate(params));
+ }
+- spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+ out:
+ return offset;
+ }
+@@ -145,11 +173,13 @@ static ssize_t dpcm_state_read_file(struct file *file, char __user *user_buf,
+ if (!buf)
+ return -ENOMEM;
+
++ snd_soc_dpcm_mutex_lock(fe);
+ for_each_pcm_streams(stream)
+ if (snd_soc_dai_stream_valid(asoc_rtd_to_cpu(fe, 0), stream))
+ offset += dpcm_show_state(fe, stream,
+ buf + offset,
+ out_count - offset);
++ snd_soc_dpcm_mutex_unlock(fe);
+
+ ret = simple_read_from_buffer(user_buf, count, ppos, buf, offset);
+
+@@ -221,14 +251,14 @@ static void dpcm_set_fe_update_state(struct snd_soc_pcm_runtime *fe,
+ struct snd_pcm_substream *substream =
+ snd_soc_dpcm_get_substream(fe, stream);
+
+- snd_pcm_stream_lock_irq(substream);
++ snd_soc_dpcm_stream_lock_irq(fe, stream);
+ if (state == SND_SOC_DPCM_UPDATE_NO && fe->dpcm[stream].trigger_pending) {
+ dpcm_fe_dai_do_trigger(substream,
+ fe->dpcm[stream].trigger_pending - 1);
+ fe->dpcm[stream].trigger_pending = 0;
+ }
+ fe->dpcm[stream].runtime_update = state;
+- snd_pcm_stream_unlock_irq(substream);
++ snd_soc_dpcm_stream_unlock_irq(fe, stream);
+ }
+
+ static void dpcm_set_be_update_state(struct snd_soc_pcm_runtime *be,
+@@ -256,7 +286,7 @@ void snd_soc_runtime_action(struct snd_soc_pcm_runtime *rtd,
+ struct snd_soc_dai *dai;
+ int i;
+
+- lockdep_assert_held(&rtd->card->pcm_mutex);
++ snd_soc_dpcm_mutex_assert_held(rtd);
+
+ for_each_rtd_dais(rtd, i, dai)
+ snd_soc_dai_action(dai, stream, action);
+@@ -309,6 +339,8 @@ int dpcm_dapm_stream_event(struct snd_soc_pcm_runtime *fe, int dir,
+ {
+ struct snd_soc_dpcm *dpcm;
+
++ snd_soc_dpcm_mutex_assert_held(fe);
++
+ for_each_dpcm_be(fe, dir, dpcm) {
+
+ struct snd_soc_pcm_runtime *be = dpcm->be;
+@@ -646,14 +678,14 @@ static int soc_pcm_components_close(struct snd_pcm_substream *substream,
+ return ret;
+ }
+
+-static int soc_pcm_clean(struct snd_pcm_substream *substream, int rollback)
++static int soc_pcm_clean(struct snd_soc_pcm_runtime *rtd,
++ struct snd_pcm_substream *substream, int rollback)
+ {
+- struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+ struct snd_soc_component *component;
+ struct snd_soc_dai *dai;
+ int i;
+
+- mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
++ snd_soc_dpcm_mutex_assert_held(rtd);
+
+ if (!rollback)
+ snd_soc_runtime_deactivate(rtd, substream->stream);
+@@ -665,9 +697,6 @@ static int soc_pcm_clean(struct snd_pcm_substream *substream, int rollback)
+
+ soc_pcm_components_close(substream, rollback);
+
+-
+- mutex_unlock(&rtd->card->pcm_mutex);
+-
+ snd_soc_pcm_component_pm_runtime_put(rtd, substream, rollback);
+
+ for_each_rtd_components(rtd, i, component)
+@@ -682,9 +711,21 @@ static int soc_pcm_clean(struct snd_pcm_substream *substream, int rollback)
+ * freed here. The cpu DAI, codec DAI, machine and components are also
+ * shutdown.
+ */
++static int __soc_pcm_close(struct snd_soc_pcm_runtime *rtd,
++ struct snd_pcm_substream *substream)
++{
++ return soc_pcm_clean(rtd, substream, 0);
++}
++
++/* PCM close ops for non-DPCM streams */
+ static int soc_pcm_close(struct snd_pcm_substream *substream)
+ {
+- return soc_pcm_clean(substream, 0);
++ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
++
++ snd_soc_dpcm_mutex_lock(rtd);
++ __soc_pcm_close(rtd, substream);
++ snd_soc_dpcm_mutex_unlock(rtd);
++ return 0;
+ }
+
+ static int soc_hw_sanity_check(struct snd_pcm_substream *substream)
+@@ -730,21 +771,21 @@ config_err:
+ * then initialized and any private data can be allocated. This also calls
+ * startup for the cpu DAI, component, machine and codec DAI.
+ */
+-static int soc_pcm_open(struct snd_pcm_substream *substream)
++static int __soc_pcm_open(struct snd_soc_pcm_runtime *rtd,
++ struct snd_pcm_substream *substream)
+ {
+- struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+ struct snd_soc_component *component;
+ struct snd_soc_dai *dai;
+ int i, ret = 0;
+
++ snd_soc_dpcm_mutex_assert_held(rtd);
++
+ for_each_rtd_components(rtd, i, component)
+ pinctrl_pm_select_default_state(component->dev);
+
+ ret = snd_soc_pcm_component_pm_runtime_get(rtd, substream);
+ if (ret < 0)
+- goto pm_err;
+-
+- mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
++ goto err;
+
+ ret = soc_pcm_components_open(substream);
+ if (ret < 0)
+@@ -759,11 +800,6 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
+ ret = snd_soc_dai_startup(dai, substream);
+ if (ret < 0)
+ goto err;
+-
+- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+- dai->tx_mask = 0;
+- else
+- dai->rx_mask = 0;
+ }
+
+ /* Dynamic PCM DAI links compat checks use dynamic capabilities */
+@@ -791,16 +827,26 @@ dynamic:
+ snd_soc_runtime_activate(rtd, substream->stream);
+ ret = 0;
+ err:
+- mutex_unlock(&rtd->card->pcm_mutex);
+-pm_err:
+ if (ret < 0) {
+- soc_pcm_clean(substream, 1);
++ soc_pcm_clean(rtd, substream, 1);
+ dev_err(rtd->dev, "%s() failed (%d)", __func__, ret);
+ }
+
+ return ret;
+ }
+
++/* PCM open ops for non-DPCM streams */
++static int soc_pcm_open(struct snd_pcm_substream *substream)
++{
++ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
++ int ret;
++
++ snd_soc_dpcm_mutex_lock(rtd);
++ ret = __soc_pcm_open(rtd, substream);
++ snd_soc_dpcm_mutex_unlock(rtd);
++ return ret;
++}
++
+ static void codec2codec_close_delayed_work(struct snd_soc_pcm_runtime *rtd)
+ {
+ /*
+@@ -816,13 +862,13 @@ static void codec2codec_close_delayed_work(struct snd_soc_pcm_runtime *rtd)
+ * rate, etc. This function is non atomic and can be called multiple times,
+ * it can refer to the runtime info.
+ */
+-static int soc_pcm_prepare(struct snd_pcm_substream *substream)
++static int __soc_pcm_prepare(struct snd_soc_pcm_runtime *rtd,
++ struct snd_pcm_substream *substream)
+ {
+- struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+ struct snd_soc_dai *dai;
+ int i, ret = 0;
+
+- mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
++ snd_soc_dpcm_mutex_assert_held(rtd);
+
+ ret = snd_soc_link_prepare(substream);
+ if (ret < 0)
+@@ -850,14 +896,24 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
+ snd_soc_dai_digital_mute(dai, 0, substream->stream);
+
+ out:
+- mutex_unlock(&rtd->card->pcm_mutex);
+-
+ if (ret < 0)
+ dev_err(rtd->dev, "ASoC: %s() failed (%d)\n", __func__, ret);
+
+ return ret;
+ }
+
++/* PCM prepare ops for non-DPCM streams */
++static int soc_pcm_prepare(struct snd_pcm_substream *substream)
++{
++ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
++ int ret;
++
++ snd_soc_dpcm_mutex_lock(rtd);
++ ret = __soc_pcm_prepare(rtd, substream);
++ snd_soc_dpcm_mutex_unlock(rtd);
++ return ret;
++}
++
+ static void soc_pcm_codec_params_fixup(struct snd_pcm_hw_params *params,
+ unsigned int mask)
+ {
+@@ -869,13 +925,13 @@ static void soc_pcm_codec_params_fixup(struct snd_pcm_hw_params *params,
+ interval->max = channels;
+ }
+
+-static int soc_pcm_hw_clean(struct snd_pcm_substream *substream, int rollback)
++static int soc_pcm_hw_clean(struct snd_soc_pcm_runtime *rtd,
++ struct snd_pcm_substream *substream, int rollback)
+ {
+- struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+ struct snd_soc_dai *dai;
+ int i;
+
+- mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
++ snd_soc_dpcm_mutex_assert_held(rtd);
+
+ /* clear the corresponding DAIs parameters when going to be inactive */
+ for_each_rtd_dais(rtd, i, dai) {
+@@ -905,16 +961,28 @@ static int soc_pcm_hw_clean(struct snd_pcm_substream *substream, int rollback)
+ snd_soc_dai_hw_free(dai, substream, rollback);
+ }
+
+- mutex_unlock(&rtd->card->pcm_mutex);
+ return 0;
+ }
+
+ /*
+ * Frees resources allocated by hw_params, can be called multiple times
+ */
++static int __soc_pcm_hw_free(struct snd_soc_pcm_runtime *rtd,
++ struct snd_pcm_substream *substream)
++{
++ return soc_pcm_hw_clean(rtd, substream, 0);
++}
++
++/* hw_free PCM ops for non-DPCM streams */
+ static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
+ {
+- return soc_pcm_hw_clean(substream, 0);
++ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
++ int ret;
++
++ snd_soc_dpcm_mutex_lock(rtd);
++ ret = __soc_pcm_hw_free(rtd, substream);
++ snd_soc_dpcm_mutex_unlock(rtd);
++ return ret;
+ }
+
+ /*
+@@ -922,15 +990,15 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
+ * function can also be called multiple times and can allocate buffers
+ * (using snd_pcm_lib_* ). It's non-atomic.
+ */
+-static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
+- struct snd_pcm_hw_params *params)
++static int __soc_pcm_hw_params(struct snd_soc_pcm_runtime *rtd,
++ struct snd_pcm_substream *substream,
++ struct snd_pcm_hw_params *params)
+ {
+- struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+ struct snd_soc_dai *cpu_dai;
+ struct snd_soc_dai *codec_dai;
+ int i, ret = 0;
+
+- mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
++ snd_soc_dpcm_mutex_assert_held(rtd);
+
+ ret = soc_pcm_params_symmetry(substream, params);
+ if (ret)
+@@ -1002,16 +1070,27 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
+
+ ret = snd_soc_pcm_component_hw_params(substream, params);
+ out:
+- mutex_unlock(&rtd->card->pcm_mutex);
+-
+ if (ret < 0) {
+- soc_pcm_hw_clean(substream, 1);
++ soc_pcm_hw_clean(rtd, substream, 1);
+ dev_err(rtd->dev, "ASoC: %s() failed (%d)\n", __func__, ret);
+ }
+
+ return ret;
+ }
+
++/* hw_params PCM ops for non-DPCM streams */
++static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
++ struct snd_pcm_hw_params *params)
++{
++ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
++ int ret;
++
++ snd_soc_dpcm_mutex_lock(rtd);
++ ret = __soc_pcm_hw_params(rtd, substream, params);
++ snd_soc_dpcm_mutex_unlock(rtd);
++ return ret;
++}
++
+ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+ {
+ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+@@ -1128,8 +1207,11 @@ static snd_pcm_uframes_t soc_pcm_pointer(struct snd_pcm_substream *substream)
+ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
+ struct snd_soc_pcm_runtime *be, int stream)
+ {
++ struct snd_pcm_substream *fe_substream;
++ struct snd_pcm_substream *be_substream;
+ struct snd_soc_dpcm *dpcm;
+- unsigned long flags;
++
++ snd_soc_dpcm_mutex_assert_held(fe);
+
+ /* only add new dpcms */
+ for_each_dpcm_be(fe, stream, dpcm) {
+@@ -1137,7 +1219,21 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
+ return 0;
+ }
+
+- dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_KERNEL);
++ fe_substream = snd_soc_dpcm_get_substream(fe, stream);
++ be_substream = snd_soc_dpcm_get_substream(be, stream);
++
++ if (!fe_substream->pcm->nonatomic && be_substream->pcm->nonatomic) {
++ dev_err(be->dev, "%s: FE is atomic but BE is nonatomic, invalid configuration\n",
++ __func__);
++ return -EINVAL;
++ }
++ if (fe_substream->pcm->nonatomic && !be_substream->pcm->nonatomic) {
++ dev_warn(be->dev, "%s: FE is nonatomic but BE is not, forcing BE as nonatomic\n",
++ __func__);
++ be_substream->pcm->nonatomic = 1;
++ }
++
++ dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_ATOMIC);
+ if (!dpcm)
+ return -ENOMEM;
+
+@@ -1145,10 +1241,10 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
+ dpcm->fe = fe;
+ be->dpcm[stream].runtime = fe->dpcm[stream].runtime;
+ dpcm->state = SND_SOC_DPCM_LINK_STATE_NEW;
+- spin_lock_irqsave(&fe->card->dpcm_lock, flags);
++ snd_soc_dpcm_stream_lock_irq(fe, stream);
+ list_add(&dpcm->list_be, &fe->dpcm[stream].be_clients);
+ list_add(&dpcm->list_fe, &be->dpcm[stream].fe_clients);
+- spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
++ snd_soc_dpcm_stream_unlock_irq(fe, stream);
+
+ dev_dbg(fe->dev, "connected new DPCM %s path %s %s %s\n",
+ stream ? "capture" : "playback", fe->dai_link->name,
+@@ -1171,6 +1267,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
+ return;
+
+ be_substream = snd_soc_dpcm_get_substream(be, stream);
++ if (!be_substream)
++ return;
+
+ for_each_dpcm_fe(be, stream, dpcm) {
+ if (dpcm->fe == fe)
+@@ -1191,8 +1289,11 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
+ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
+ {
+ struct snd_soc_dpcm *dpcm, *d;
+- unsigned long flags;
++ LIST_HEAD(deleted_dpcms);
++
++ snd_soc_dpcm_mutex_assert_held(fe);
+
++ snd_soc_dpcm_stream_lock_irq(fe, stream);
+ for_each_dpcm_be_safe(fe, stream, dpcm, d) {
+ dev_dbg(fe->dev, "ASoC: BE %s disconnect check for %s\n",
+ stream ? "capture" : "playback",
+@@ -1208,12 +1309,16 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
+ /* BEs still alive need new FE */
+ dpcm_be_reparent(fe, dpcm->be, stream);
+
+- dpcm_remove_debugfs_state(dpcm);
+-
+- spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+ list_del(&dpcm->list_be);
++ list_move(&dpcm->list_fe, &deleted_dpcms);
++ }
++ snd_soc_dpcm_stream_unlock_irq(fe, stream);
++
++ while (!list_empty(&deleted_dpcms)) {
++ dpcm = list_first_entry(&deleted_dpcms, struct snd_soc_dpcm,
++ list_fe);
+ list_del(&dpcm->list_fe);
+- spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
++ dpcm_remove_debugfs_state(dpcm);
+ kfree(dpcm);
+ }
+ }
+@@ -1234,6 +1339,9 @@ static struct snd_soc_pcm_runtime *dpcm_get_be(struct snd_soc_card *card,
+ if (!be->dai_link->no_pcm)
+ continue;
+
++ if (!snd_soc_dpcm_get_substream(be, stream))
++ continue;
++
+ for_each_rtd_dais(be, i, dai) {
+ w = snd_soc_dai_get_widget(dai, stream);
+
+@@ -1429,12 +1537,9 @@ int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
+ void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream)
+ {
+ struct snd_soc_dpcm *dpcm;
+- unsigned long flags;
+
+- spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+ for_each_dpcm_be(fe, stream, dpcm)
+ dpcm_set_be_update_state(dpcm->be, stream, SND_SOC_DPCM_UPDATE_NO);
+- spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+ }
+
+ void dpcm_be_dai_stop(struct snd_soc_pcm_runtime *fe, int stream,
+@@ -1470,12 +1575,12 @@ void dpcm_be_dai_stop(struct snd_soc_pcm_runtime *fe, int stream,
+ continue;
+
+ if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) {
+- soc_pcm_hw_free(be_substream);
++ __soc_pcm_hw_free(be, be_substream);
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE;
+ }
+ }
+
+- soc_pcm_close(be_substream);
++ __soc_pcm_close(be, be_substream);
+ be_substream->runtime = NULL;
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE;
+ }
+@@ -1523,7 +1628,7 @@ int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream)
+ stream ? "capture" : "playback", be->dai_link->name);
+
+ be_substream->runtime = be->dpcm[stream].runtime;
+- err = soc_pcm_open(be_substream);
++ err = __soc_pcm_open(be, be_substream);
+ if (err < 0) {
+ be->dpcm[stream].users--;
+ if (be->dpcm[stream].users < 0)
+@@ -1534,7 +1639,7 @@ int dpcm_be_dai_startup(struct snd_soc_pcm_runtime *fe, int stream)
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE;
+ goto unwind;
+ }
+-
++ be->dpcm[stream].be_start = 0;
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_OPEN;
+ count++;
+ }
+@@ -1557,10 +1662,14 @@ static void dpcm_runtime_setup_fe(struct snd_pcm_substream *substream)
+ struct snd_pcm_hardware *hw = &runtime->hw;
+ struct snd_soc_dai *dai;
+ int stream = substream->stream;
++ u64 formats = hw->formats;
+ int i;
+
+ soc_pcm_hw_init(hw);
+
++ if (formats)
++ hw->formats &= formats;
++
+ for_each_rtd_cpu_dais(fe, i, dai) {
+ struct snd_soc_pcm_stream *cpu_stream;
+
+@@ -1767,7 +1876,7 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream)
+ dev_dbg(fe->dev, "ASoC: open FE %s\n", fe->dai_link->name);
+
+ /* start the DAI frontend */
+- ret = soc_pcm_open(fe_substream);
++ ret = __soc_pcm_open(fe, fe_substream);
+ if (ret < 0)
+ goto unwind;
+
+@@ -1798,6 +1907,8 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream)
+ struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream);
+ int stream = substream->stream;
+
++ snd_soc_dpcm_mutex_assert_held(fe);
++
+ dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE);
+
+ /* shutdown the BEs */
+@@ -1806,7 +1917,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream)
+ dev_dbg(fe->dev, "ASoC: close FE %s\n", fe->dai_link->name);
+
+ /* now shutdown the frontend */
+- soc_pcm_close(substream);
++ __soc_pcm_close(fe, substream);
+
+ /* run the stream stop event */
+ dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP);
+@@ -1851,7 +1962,7 @@ void dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream)
+ dev_dbg(be->dev, "ASoC: hw_free BE %s\n",
+ be->dai_link->name);
+
+- soc_pcm_hw_free(be_substream);
++ __soc_pcm_hw_free(be, be_substream);
+
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE;
+ }
+@@ -1862,13 +1973,13 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream)
+ struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream);
+ int stream = substream->stream;
+
+- mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
++ snd_soc_dpcm_mutex_lock(fe);
+ dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE);
+
+ dev_dbg(fe->dev, "ASoC: hw_free FE %s\n", fe->dai_link->name);
+
+ /* call hw_free on the frontend */
+- soc_pcm_hw_free(substream);
++ soc_pcm_hw_clean(fe, substream, 0);
+
+ /* only hw_params backends that are either sinks or sources
+ * to this frontend DAI */
+@@ -1877,7 +1988,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream)
+ fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE;
+ dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO);
+
+- mutex_unlock(&fe->card->mutex);
++ snd_soc_dpcm_mutex_unlock(fe);
+ return 0;
+ }
+
+@@ -1921,7 +2032,7 @@ int dpcm_be_dai_hw_params(struct snd_soc_pcm_runtime *fe, int stream)
+ dev_dbg(be->dev, "ASoC: hw_params BE %s\n",
+ be->dai_link->name);
+
+- ret = soc_pcm_hw_params(be_substream, &dpcm->hw_params);
++ ret = __soc_pcm_hw_params(be, be_substream, &dpcm->hw_params);
+ if (ret < 0)
+ goto unwind;
+
+@@ -1951,7 +2062,7 @@ unwind:
+ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP))
+ continue;
+
+- soc_pcm_hw_free(be_substream);
++ __soc_pcm_hw_free(be, be_substream);
+ }
+
+ return ret;
+@@ -1963,7 +2074,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
+ struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream);
+ int ret, stream = substream->stream;
+
+- mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
++ snd_soc_dpcm_mutex_lock(fe);
+ dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE);
+
+ memcpy(&fe->dpcm[stream].hw_params, params,
+@@ -1977,7 +2088,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
+ params_channels(params), params_format(params));
+
+ /* call hw_params on the frontend */
+- ret = soc_pcm_hw_params(substream, params);
++ ret = __soc_pcm_hw_params(fe, substream, params);
+ if (ret < 0)
+ dpcm_be_dai_hw_free(fe, stream);
+ else
+@@ -1985,7 +2096,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream,
+
+ out:
+ dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO);
+- mutex_unlock(&fe->card->mutex);
++ snd_soc_dpcm_mutex_unlock(fe);
+
+ if (ret < 0)
+ dev_err(fe->dev, "ASoC: %s failed (%d)\n", __func__, ret);
+@@ -1998,6 +2109,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
+ {
+ struct snd_soc_pcm_runtime *be;
+ struct snd_soc_dpcm *dpcm;
++ unsigned long flags;
+ int ret = 0;
+
+ for_each_dpcm_be(fe, stream, dpcm) {
+@@ -2006,89 +2118,128 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
+ be = dpcm->be;
+ be_substream = snd_soc_dpcm_get_substream(be, stream);
+
++ snd_soc_dpcm_stream_lock_irqsave_nested(be, stream, flags);
++
+ /* is this op for this BE ? */
+ if (!snd_soc_dpcm_be_can_update(fe, be, stream))
+- continue;
++ goto next;
+
+ dev_dbg(be->dev, "ASoC: trigger BE %s cmd %d\n",
+ be->dai_link->name, cmd);
+
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+- if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) &&
++ if (!be->dpcm[stream].be_start &&
++ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) &&
+ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
+ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED))
+- continue;
++ goto next;
++
++ be->dpcm[stream].be_start++;
++ if (be->dpcm[stream].be_start != 1)
++ goto next;
+
+ ret = soc_pcm_trigger(be_substream, cmd);
+- if (ret)
+- goto end;
++ if (ret) {
++ be->dpcm[stream].be_start--;
++ goto next;
++ }
+
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_START;
+ break;
+ case SNDRV_PCM_TRIGGER_RESUME:
+ if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND))
+- continue;
++ goto next;
++
++ be->dpcm[stream].be_start++;
++ if (be->dpcm[stream].be_start != 1)
++ goto next;
+
+ ret = soc_pcm_trigger(be_substream, cmd);
+- if (ret)
+- goto end;
++ if (ret) {
++ be->dpcm[stream].be_start--;
++ goto next;
++ }
+
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_START;
+ break;
+ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+- if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED))
+- continue;
++ if (!be->dpcm[stream].be_start &&
++ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) &&
++ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
++ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED))
++ goto next;
++
++ be->dpcm[stream].be_start++;
++ if (be->dpcm[stream].be_start != 1)
++ goto next;
+
+ ret = soc_pcm_trigger(be_substream, cmd);
+- if (ret)
+- goto end;
++ if (ret) {
++ be->dpcm[stream].be_start--;
++ goto next;
++ }
+
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_START;
+ break;
+ case SNDRV_PCM_TRIGGER_STOP:
+ if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) &&
+ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED))
+- continue;
++ goto next;
+
+- if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream))
+- continue;
++ if (be->dpcm[stream].state == SND_SOC_DPCM_STATE_START)
++ be->dpcm[stream].be_start--;
++
++ if (be->dpcm[stream].be_start != 0)
++ goto next;
+
+ ret = soc_pcm_trigger(be_substream, cmd);
+- if (ret)
+- goto end;
++ if (ret) {
++ if (be->dpcm[stream].state == SND_SOC_DPCM_STATE_START)
++ be->dpcm[stream].be_start++;
++ goto next;
++ }
+
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_STOP;
+ break;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START)
+- continue;
++ goto next;
+
+- if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream))
+- continue;
++ be->dpcm[stream].be_start--;
++ if (be->dpcm[stream].be_start != 0)
++ goto next;
+
+ ret = soc_pcm_trigger(be_substream, cmd);
+- if (ret)
+- goto end;
++ if (ret) {
++ be->dpcm[stream].be_start++;
++ goto next;
++ }
+
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_SUSPEND;
+ break;
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START)
+- continue;
++ goto next;
+
+- if (!snd_soc_dpcm_can_be_free_stop(fe, be, stream))
+- continue;
++ be->dpcm[stream].be_start--;
++ if (be->dpcm[stream].be_start != 0)
++ goto next;
+
+ ret = soc_pcm_trigger(be_substream, cmd);
+- if (ret)
+- goto end;
++ if (ret) {
++ be->dpcm[stream].be_start++;
++ goto next;
++ }
+
+ be->dpcm[stream].state = SND_SOC_DPCM_STATE_PAUSED;
+ break;
+ }
++next:
++ snd_soc_dpcm_stream_unlock_irqrestore(be, stream, flags);
++ if (ret)
++ break;
+ }
+-end:
+ if (ret < 0)
+ dev_err(fe->dev, "ASoC: %s() failed at %s (%d)\n",
+ __func__, be->dai_link->name, ret);
+@@ -2247,6 +2398,9 @@ int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
+ if (!snd_soc_dpcm_be_can_update(fe, be, stream))
+ continue;
+
++ if (!snd_soc_dpcm_can_be_prepared(fe, be, stream))
++ continue;
++
+ if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) &&
+ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
+ (be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND) &&
+@@ -2256,7 +2410,7 @@ int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
+ dev_dbg(be->dev, "ASoC: prepare BE %s\n",
+ be->dai_link->name);
+
+- ret = soc_pcm_prepare(be_substream);
++ ret = __soc_pcm_prepare(be, be_substream);
+ if (ret < 0)
+ break;
+
+@@ -2274,7 +2428,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream)
+ struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(substream);
+ int stream = substream->stream, ret = 0;
+
+- mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
++ snd_soc_dpcm_mutex_lock(fe);
+
+ dev_dbg(fe->dev, "ASoC: prepare FE %s\n", fe->dai_link->name);
+
+@@ -2293,7 +2447,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream)
+ goto out;
+
+ /* call prepare on the frontend */
+- ret = soc_pcm_prepare(substream);
++ ret = __soc_pcm_prepare(fe, substream);
+ if (ret < 0)
+ goto out;
+
+@@ -2301,7 +2455,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream)
+
+ out:
+ dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO);
+- mutex_unlock(&fe->card->mutex);
++ snd_soc_dpcm_mutex_unlock(fe);
+
+ if (ret < 0)
+ dev_err(fe->dev, "ASoC: %s() failed (%d)\n", __func__, ret);
+@@ -2352,7 +2506,6 @@ static int dpcm_run_update_startup(struct snd_soc_pcm_runtime *fe, int stream)
+ struct snd_soc_dpcm *dpcm;
+ enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream];
+ int ret = 0;
+- unsigned long flags;
+
+ dev_dbg(fe->dev, "ASoC: runtime %s open on FE %s\n",
+ stream ? "capture" : "playback", fe->dai_link->name);
+@@ -2421,7 +2574,6 @@ close:
+ dpcm_be_dai_shutdown(fe, stream);
+ disconnect:
+ /* disconnect any pending BEs */
+- spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+ for_each_dpcm_be(fe, stream, dpcm) {
+ struct snd_soc_pcm_runtime *be = dpcm->be;
+
+@@ -2433,7 +2585,6 @@ disconnect:
+ be->dpcm[stream].state == SND_SOC_DPCM_STATE_NEW)
+ dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
+ }
+- spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+
+ if (ret < 0)
+ dev_err(fe->dev, "ASoC: %s() failed (%d)\n", __func__, ret);
+@@ -2508,7 +2659,7 @@ int snd_soc_dpcm_runtime_update(struct snd_soc_card *card)
+ struct snd_soc_pcm_runtime *fe;
+ int ret = 0;
+
+- mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
++ mutex_lock_nested(&card->pcm_mutex, card->pcm_subclass);
+ /* shutdown all old paths first */
+ for_each_card_rtds(card, fe) {
+ ret = soc_dpcm_fe_runtime_update(fe, 0);
+@@ -2524,7 +2675,7 @@ int snd_soc_dpcm_runtime_update(struct snd_soc_card *card)
+ }
+
+ out:
+- mutex_unlock(&card->mutex);
++ mutex_unlock(&card->pcm_mutex);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(snd_soc_dpcm_runtime_update);
+@@ -2535,6 +2686,8 @@ static void dpcm_fe_dai_cleanup(struct snd_pcm_substream *fe_substream)
+ struct snd_soc_dpcm *dpcm;
+ int stream = fe_substream->stream;
+
++ snd_soc_dpcm_mutex_assert_held(fe);
++
+ /* mark FE's links ready to prune */
+ for_each_dpcm_be(fe, stream, dpcm)
+ dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
+@@ -2549,12 +2702,12 @@ static int dpcm_fe_dai_close(struct snd_pcm_substream *fe_substream)
+ struct snd_soc_pcm_runtime *fe = asoc_substream_to_rtd(fe_substream);
+ int ret;
+
+- mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
++ snd_soc_dpcm_mutex_lock(fe);
+ ret = dpcm_fe_dai_shutdown(fe_substream);
+
+ dpcm_fe_dai_cleanup(fe_substream);
+
+- mutex_unlock(&fe->card->mutex);
++ snd_soc_dpcm_mutex_unlock(fe);
+ return ret;
+ }
+
+@@ -2565,7 +2718,7 @@ static int dpcm_fe_dai_open(struct snd_pcm_substream *fe_substream)
+ int ret;
+ int stream = fe_substream->stream;
+
+- mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
++ snd_soc_dpcm_mutex_lock(fe);
+ fe->dpcm[stream].runtime = fe_substream->runtime;
+
+ ret = dpcm_path_get(fe, stream, &list);
+@@ -2582,7 +2735,7 @@ static int dpcm_fe_dai_open(struct snd_pcm_substream *fe_substream)
+ dpcm_clear_pending_state(fe, stream);
+ dpcm_path_put(&list);
+ open_end:
+- mutex_unlock(&fe->card->mutex);
++ snd_soc_dpcm_mutex_unlock(fe);
+ return ret;
+ }
+
+@@ -2843,10 +2996,8 @@ static int snd_soc_dpcm_check_state(struct snd_soc_pcm_runtime *fe,
+ struct snd_soc_dpcm *dpcm;
+ int state;
+ int ret = 1;
+- unsigned long flags;
+ int i;
+
+- spin_lock_irqsave(&fe->card->dpcm_lock, flags);
+ for_each_dpcm_fe(be, stream, dpcm) {
+
+ if (dpcm->fe == fe)
+@@ -2860,7 +3011,6 @@ static int snd_soc_dpcm_check_state(struct snd_soc_pcm_runtime *fe,
+ }
+ }
+ }
+- spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
+
+ /* it's safe to do this BE DAI */
+ return ret;
+@@ -2900,3 +3050,20 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe,
+ return snd_soc_dpcm_check_state(fe, be, stream, state, ARRAY_SIZE(state));
+ }
+ EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_params);
++
++/*
++ * We can only prepare a BE DAI if any of it's FE are not prepared,
++ * running or paused for the specified stream direction.
++ */
++int snd_soc_dpcm_can_be_prepared(struct snd_soc_pcm_runtime *fe,
++ struct snd_soc_pcm_runtime *be, int stream)
++{
++ const enum snd_soc_dpcm_state state[] = {
++ SND_SOC_DPCM_STATE_START,
++ SND_SOC_DPCM_STATE_PAUSED,
++ SND_SOC_DPCM_STATE_PREPARE,
++ };
++
++ return snd_soc_dpcm_check_state(fe, be, stream, state, ARRAY_SIZE(state));
++}
++EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_prepared);
+diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
+index f6e5ac3e03140..55b69e3c67186 100644
+--- a/sound/soc/soc-topology.c
++++ b/sound/soc/soc-topology.c
+@@ -510,7 +510,8 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr,
+
+ if (le32_to_cpu(hdr->ops.info) == SND_SOC_TPLG_CTL_BYTES
+ && k->iface & SNDRV_CTL_ELEM_IFACE_MIXER
+- && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE
++ && (k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ
++ || k->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
+ && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) {
+ struct soc_bytes_ext *sbe;
+ struct snd_soc_tplg_bytes_control *be;
+@@ -1479,13 +1480,17 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg,
+
+ template.num_kcontrols = le32_to_cpu(w->num_kcontrols);
+ kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL);
+- if (!kc)
+- goto err;
++ if (!kc) {
++ ret = -ENOMEM;
++ goto hdr_err;
++ }
+
+ kcontrol_type = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(unsigned int),
+ GFP_KERNEL);
+- if (!kcontrol_type)
+- goto err;
++ if (!kcontrol_type) {
++ ret = -ENOMEM;
++ goto hdr_err;
++ }
+
+ for (i = 0; i < w->num_kcontrols; i++) {
+ control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos;
+@@ -2674,6 +2679,7 @@ EXPORT_SYMBOL_GPL(snd_soc_tplg_component_load);
+ /* remove dynamic controls from the component driver */
+ int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
+ {
++ struct snd_card *card = comp->card->snd_card;
+ struct snd_soc_dobj *dobj, *next_dobj;
+ int pass = SOC_TPLG_PASS_END;
+
+@@ -2681,6 +2687,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
+ while (pass >= SOC_TPLG_PASS_START) {
+
+ /* remove mixer controls */
++ down_write(&card->controls_rwsem);
+ list_for_each_entry_safe(dobj, next_dobj, &comp->dobj_list,
+ list) {
+
+@@ -2719,6 +2726,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
+ break;
+ }
+ }
++ up_write(&card->controls_rwsem);
+ pass--;
+ }
+
+diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c
+index 299b5d6ebfd13..f2c9d97c19c74 100644
+--- a/sound/soc/soc-utils.c
++++ b/sound/soc/soc-utils.c
+@@ -206,7 +206,7 @@ int __init snd_soc_util_init(void)
+ return ret;
+ }
+
+-void __exit snd_soc_util_exit(void)
++void snd_soc_util_exit(void)
+ {
+ platform_driver_unregister(&soc_dummy_driver);
+ platform_device_unregister(soc_dummy_dev);
+diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c
+index a51a928ea40a7..5f780ef9581a9 100644
+--- a/sound/soc/sof/debug.c
++++ b/sound/soc/sof/debug.c
+@@ -668,9 +668,9 @@ static int memory_info_update(struct snd_sof_dev *sdev, char *buf, size_t buff_s
+ }
+
+ for (i = 0, len = 0; i < reply->num_elems; i++) {
+- ret = snprintf(buf + len, buff_size - len, "zone %d.%d used %#8x free %#8x\n",
+- reply->elems[i].zone, reply->elems[i].id,
+- reply->elems[i].used, reply->elems[i].free);
++ ret = scnprintf(buf + len, buff_size - len, "zone %d.%d used %#8x free %#8x\n",
++ reply->elems[i].zone, reply->elems[i].id,
++ reply->elems[i].used, reply->elems[i].free);
+ if (ret < 0)
+ goto error;
+ len += ret;
+diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c
+index 892e1482f97fa..b3d3edc36bb00 100644
+--- a/sound/soc/sof/imx/imx8m.c
++++ b/sound/soc/sof/imx/imx8m.c
+@@ -191,6 +191,7 @@ static int imx8m_probe(struct snd_sof_dev *sdev)
+ }
+
+ ret = of_address_to_resource(res_node, 0, &res);
++ of_node_put(res_node);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to get reserved region address\n");
+ goto exit_pdev_unregister;
+diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig
+index 88b6176af021c..d83e1a36707af 100644
+--- a/sound/soc/sof/intel/Kconfig
++++ b/sound/soc/sof/intel/Kconfig
+@@ -84,6 +84,7 @@ if SND_SOC_SOF_PCI
+ config SND_SOC_SOF_MERRIFIELD
+ tristate "SOF support for Tangier/Merrifield"
+ default SND_SOC_SOF_PCI
++ select SND_SOC_SOF_PCI_DEV
+ select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
+ help
+ This adds support for Sound Open Firmware for Intel(R) platforms
+diff --git a/sound/soc/sof/intel/apl.c b/sound/soc/sof/intel/apl.c
+index c7ed2b3d6abca..0a42034c4655e 100644
+--- a/sound/soc/sof/intel/apl.c
++++ b/sound/soc/sof/intel/apl.c
+@@ -139,6 +139,7 @@ const struct sof_intel_dsp_desc apl_chip_info = {
+ .ipc_ack = HDA_DSP_REG_HIPCIE,
+ .ipc_ack_mask = HDA_DSP_REG_HIPCIE_DONE,
+ .ipc_ctl = HDA_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 150,
+ .ssp_count = APL_SSP_COUNT,
+ .ssp_base_offset = APL_SSP_BASE_OFFSET,
+diff --git a/sound/soc/sof/intel/cnl.c b/sound/soc/sof/intel/cnl.c
+index e115e12a856fd..a63b235763ede 100644
+--- a/sound/soc/sof/intel/cnl.c
++++ b/sound/soc/sof/intel/cnl.c
+@@ -344,6 +344,7 @@ const struct sof_intel_dsp_desc cnl_chip_info = {
+ .ipc_ack = CNL_DSP_REG_HIPCIDA,
+ .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+ .ipc_ctl = CNL_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 300,
+ .ssp_count = CNL_SSP_COUNT,
+ .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+@@ -363,6 +364,7 @@ const struct sof_intel_dsp_desc jsl_chip_info = {
+ .ipc_ack = CNL_DSP_REG_HIPCIDA,
+ .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+ .ipc_ctl = CNL_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 300,
+ .ssp_count = ICL_SSP_COUNT,
+ .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+diff --git a/sound/soc/sof/intel/hda-bus.c b/sound/soc/sof/intel/hda-bus.c
+index 30025d3c16b6e..0862ff8b66273 100644
+--- a/sound/soc/sof/intel/hda-bus.c
++++ b/sound/soc/sof/intel/hda-bus.c
+@@ -10,6 +10,8 @@
+ #include <linux/io.h>
+ #include <sound/hdaudio.h>
+ #include <sound/hda_i915.h>
++#include <sound/hda_codec.h>
++#include <sound/hda_register.h>
+ #include "../sof-priv.h"
+ #include "hda.h"
+
+@@ -21,6 +23,18 @@
+ #endif
+
+ #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
++static void update_codec_wake_enable(struct hdac_bus *bus, unsigned int addr, bool link_power)
++{
++ unsigned int mask = snd_hdac_chip_readw(bus, WAKEEN);
++
++ if (link_power)
++ mask &= ~BIT(addr);
++ else
++ mask |= BIT(addr);
++
++ snd_hdac_chip_updatew(bus, WAKEEN, STATESTS_INT_MASK, mask);
++}
++
+ static void sof_hda_bus_link_power(struct hdac_device *codec, bool enable)
+ {
+ struct hdac_bus *bus = codec->bus;
+@@ -41,6 +55,9 @@ static void sof_hda_bus_link_power(struct hdac_device *codec, bool enable)
+ */
+ if (codec->addr == HDA_IDISP_ADDR && !enable)
+ snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false);
++
++ /* WAKEEN needs to be set for disabled links */
++ update_codec_wake_enable(bus, codec->addr, enable);
+ }
+
+ static const struct hdac_bus_ops bus_core_ops = {
+diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c
+index c1f9f0f584647..56653d78d2200 100644
+--- a/sound/soc/sof/intel/hda-dai.c
++++ b/sound/soc/sof/intel/hda-dai.c
+@@ -68,6 +68,7 @@ static struct hdac_ext_stream *
+ return NULL;
+ }
+
++ spin_lock_irq(&bus->reg_lock);
+ list_for_each_entry(stream, &bus->stream_list, list) {
+ struct hdac_ext_stream *hstream =
+ stream_to_hdac_ext_stream(stream);
+@@ -107,12 +108,12 @@ static struct hdac_ext_stream *
+ * is updated in snd_hdac_ext_stream_decouple().
+ */
+ if (!res->decoupled)
+- snd_hdac_ext_stream_decouple(bus, res, true);
+- spin_lock_irq(&bus->reg_lock);
++ snd_hdac_ext_stream_decouple_locked(bus, res, true);
++
+ res->link_locked = 1;
+ res->link_substream = substream;
+- spin_unlock_irq(&bus->reg_lock);
+ }
++ spin_unlock_irq(&bus->reg_lock);
+
+ return res;
+ }
+@@ -211,6 +212,10 @@ static int hda_link_hw_params(struct snd_pcm_substream *substream,
+ int stream_tag;
+ int ret;
+
++ link = snd_hdac_ext_bus_get_link(bus, codec_dai->component->name);
++ if (!link)
++ return -EINVAL;
++
+ /* get stored dma data if resuming from system suspend */
+ link_dev = snd_soc_dai_get_dma_data(dai, substream);
+ if (!link_dev) {
+@@ -231,15 +236,8 @@ static int hda_link_hw_params(struct snd_pcm_substream *substream,
+ if (ret < 0)
+ return ret;
+
+- link = snd_hdac_ext_bus_get_link(bus, codec_dai->component->name);
+- if (!link)
+- return -EINVAL;
+-
+- /* set the stream tag in the codec dai dma params */
+- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+- snd_soc_dai_set_tdm_slot(codec_dai, stream_tag, 0, 0, 0);
+- else
+- snd_soc_dai_set_tdm_slot(codec_dai, 0, stream_tag, 0, 0);
++ /* set the hdac_stream in the codec dai */
++ snd_soc_dai_set_stream(codec_dai, hdac_stream(link_dev), substream->stream);
+
+ p_params.s_fmt = snd_pcm_format_width(params_format(params));
+ p_params.ch = params_channels(params);
+diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
+index 623cf291e2074..262a70791a8f8 100644
+--- a/sound/soc/sof/intel/hda-dsp.c
++++ b/sound/soc/sof/intel/hda-dsp.c
+@@ -623,8 +623,7 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend)
+ hda_dsp_ipc_int_disable(sdev);
+
+ #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+- if (runtime_suspend)
+- hda_codec_jack_wake_enable(sdev, true);
++ hda_codec_jack_wake_enable(sdev, runtime_suspend);
+
+ /* power down all hda link */
+ snd_hdac_ext_bus_link_power_down_all(bus);
+diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c
+index 6f4771bf9de34..439cb33d2a710 100644
+--- a/sound/soc/sof/intel/hda-loader.c
++++ b/sound/soc/sof/intel/hda-loader.c
+@@ -48,7 +48,7 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig
+ ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab);
+ if (ret < 0) {
+ dev_err(sdev->dev, "error: memory alloc failed: %d\n", ret);
+- goto error;
++ goto out_put;
+ }
+
+ hstream->period_bytes = 0;/* initialize period_bytes */
+@@ -59,29 +59,30 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig
+ ret = hda_dsp_iccmax_stream_hw_params(sdev, dsp_stream, dmab, NULL);
+ if (ret < 0) {
+ dev_err(sdev->dev, "error: iccmax stream prepare failed: %d\n", ret);
+- goto error;
++ goto out_free;
+ }
+ } else {
+ ret = hda_dsp_stream_hw_params(sdev, dsp_stream, dmab, NULL);
+ if (ret < 0) {
+ dev_err(sdev->dev, "error: hdac prepare failed: %d\n", ret);
+- goto error;
++ goto out_free;
+ }
+ hda_dsp_stream_spib_config(sdev, dsp_stream, HDA_DSP_SPIB_ENABLE, size);
+ }
+
+ return dsp_stream;
+
+-error:
+- hda_dsp_stream_put(sdev, direction, hstream->stream_tag);
++out_free:
+ snd_dma_free_pages(dmab);
++out_put:
++ hda_dsp_stream_put(sdev, direction, hstream->stream_tag);
+ return ERR_PTR(ret);
+ }
+
+ /*
+- * first boot sequence has some extra steps. core 0 waits for power
+- * status on core 1, so power up core 1 also momentarily, keep it in
+- * reset/stall and then turn it off
++ * first boot sequence has some extra steps.
++ * power on all host managed cores and only unstall/run the boot core to boot the
++ * DSP then turn off all non boot cores (if any) is powered on.
+ */
+ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag)
+ {
+@@ -116,7 +117,7 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag)
+ ((stream_tag - 1) << 9)));
+
+ /* step 3: unset core 0 reset state & unstall/run core 0 */
+- ret = hda_dsp_core_run(sdev, BIT(0));
++ ret = hda_dsp_core_run(sdev, chip->init_core_mask);
+ if (ret < 0) {
+ if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS)
+ dev_err(sdev->dev,
+@@ -162,7 +163,7 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag)
+
+ /* step 7: wait for ROM init */
+ ret = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR,
+- HDA_DSP_SRAM_REG_ROM_STATUS, status,
++ chip->rom_status_reg, status,
+ ((status & HDA_DSP_ROM_STS_MASK)
+ == HDA_DSP_ROM_INIT),
+ HDA_DSP_REG_POLL_INTERVAL_US,
+@@ -173,8 +174,8 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag)
+
+ if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS)
+ dev_err(sdev->dev,
+- "error: %s: timeout HDA_DSP_SRAM_REG_ROM_STATUS read\n",
+- __func__);
++ "%s: timeout with rom_status_reg (%#x) read\n",
++ __func__, chip->rom_status_reg);
+
+ err:
+ flags = SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_PCI | SOF_DBG_DUMP_MBOX;
+@@ -250,6 +251,8 @@ static int cl_cleanup(struct snd_sof_dev *sdev, struct snd_dma_buffer *dmab,
+
+ static int cl_copy_fw(struct snd_sof_dev *sdev, struct hdac_ext_stream *stream)
+ {
++ struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata;
++ const struct sof_intel_dsp_desc *chip = hda->desc;
+ unsigned int reg;
+ int ret, status;
+
+@@ -260,7 +263,7 @@ static int cl_copy_fw(struct snd_sof_dev *sdev, struct hdac_ext_stream *stream)
+ }
+
+ status = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR,
+- HDA_DSP_SRAM_REG_ROM_STATUS, reg,
++ chip->rom_status_reg, reg,
+ ((reg & HDA_DSP_ROM_STS_MASK)
+ == HDA_DSP_ROM_FW_ENTERED),
+ HDA_DSP_REG_POLL_INTERVAL_US,
+@@ -273,8 +276,8 @@ static int cl_copy_fw(struct snd_sof_dev *sdev, struct hdac_ext_stream *stream)
+
+ if (status < 0) {
+ dev_err(sdev->dev,
+- "error: %s: timeout HDA_DSP_SRAM_REG_ROM_STATUS read\n",
+- __func__);
++ "%s: timeout with rom_status_reg (%#x) read\n",
++ __func__, chip->rom_status_reg);
+ }
+
+ ret = cl_trigger(sdev, stream, SNDRV_PCM_TRIGGER_STOP);
+diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c
+index cc8ddef37f37b..d81e87e304e42 100644
+--- a/sound/soc/sof/intel/hda-pcm.c
++++ b/sound/soc/sof/intel/hda-pcm.c
+@@ -242,6 +242,7 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev,
+ runtime->hw.info &= ~SNDRV_PCM_INFO_PAUSE;
+
+ if (hda_always_enable_dmi_l1 ||
++ direction == SNDRV_PCM_STREAM_PLAYBACK ||
+ spcm->stream[substream->stream].d0i3_compatible)
+ flags |= SOF_HDA_STREAM_DMI_L1_COMPATIBLE;
+
+diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
+index f60e2c57d3d0c..038d09f6203aa 100644
+--- a/sound/soc/sof/intel/hda.c
++++ b/sound/soc/sof/intel/hda.c
+@@ -353,11 +353,13 @@ static const struct hda_dsp_msg_code hda_dsp_rom_msg[] = {
+
+ static void hda_dsp_get_status(struct snd_sof_dev *sdev)
+ {
++ const struct sof_intel_dsp_desc *chip;
+ u32 status;
+ int i;
+
++ chip = get_chip_info(sdev->pdata);
+ status = snd_sof_dsp_read(sdev, HDA_DSP_BAR,
+- HDA_DSP_SRAM_REG_ROM_STATUS);
++ chip->rom_status_reg);
+
+ for (i = 0; i < ARRAY_SIZE(hda_dsp_rom_msg); i++) {
+ if (status == hda_dsp_rom_msg[i].code) {
+@@ -402,14 +404,16 @@ static void hda_dsp_get_registers(struct snd_sof_dev *sdev,
+ /* dump the first 8 dwords representing the extended ROM status */
+ static void hda_dsp_dump_ext_rom_status(struct snd_sof_dev *sdev, u32 flags)
+ {
++ const struct sof_intel_dsp_desc *chip;
+ char msg[128];
+ int len = 0;
+ u32 value;
+ int i;
+
++ chip = get_chip_info(sdev->pdata);
+ for (i = 0; i < HDA_EXT_ROM_STATUS_SIZE; i++) {
+- value = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_SRAM_REG_ROM_STATUS + i * 0x4);
+- len += snprintf(msg + len, sizeof(msg) - len, " 0x%x", value);
++ value = snd_sof_dsp_read(sdev, HDA_DSP_BAR, chip->rom_status_reg + i * 0x4);
++ len += scnprintf(msg + len, sizeof(msg) - len, " 0x%x", value);
+ }
+
+ sof_dev_dbg_or_err(sdev->dev, flags & SOF_DBG_DUMP_FORCE_ERR_LEVEL,
+@@ -696,6 +700,20 @@ skip_soundwire:
+ return 0;
+ }
+
++static void hda_check_for_state_change(struct snd_sof_dev *sdev)
++{
++#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
++ struct hdac_bus *bus = sof_to_bus(sdev);
++ unsigned int codec_mask;
++
++ codec_mask = snd_hdac_chip_readw(bus, STATESTS);
++ if (codec_mask) {
++ hda_codec_jack_check(sdev);
++ snd_hdac_chip_writew(bus, STATESTS, codec_mask);
++ }
++#endif
++}
++
+ static irqreturn_t hda_dsp_interrupt_handler(int irq, void *context)
+ {
+ struct snd_sof_dev *sdev = context;
+@@ -737,6 +755,8 @@ static irqreturn_t hda_dsp_interrupt_thread(int irq, void *context)
+ if (hda_sdw_check_wakeen_irq(sdev))
+ hda_sdw_process_wakeen(sdev);
+
++ hda_check_for_state_change(sdev);
++
+ /* enable GIE interrupt */
+ snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
+ SOF_HDA_INTCTL,
+@@ -1020,12 +1040,22 @@ static int hda_generic_machine_select(struct snd_sof_dev *sdev)
+ pdata->machine = hda_mach;
+ pdata->tplg_filename = tplg_filename;
+
+- if (codec_num == 2) {
++ if (codec_num == 2 ||
++ (codec_num == 1 && !HDA_IDISP_CODEC(bus->codec_mask))) {
+ /*
+ * Prevent SoundWire links from starting when an external
+ * HDaudio codec is used
+ */
+ hda_mach->mach_params.link_mask = 0;
++ } else {
++ /*
++ * Allow SoundWire links to start when no external HDaudio codec
++ * was detected. This will not create a SoundWire card but
++ * will help detect if any SoundWire codec reports as ATTACHED.
++ */
++ struct sof_intel_hda_dev *hdev = sdev->pdata->hw_pdata;
++
++ hda_mach->mach_params.link_mask = hdev->info.link_mask;
+ }
+ }
+ }
+@@ -1056,7 +1086,7 @@ static bool link_slaves_found(struct snd_sof_dev *sdev,
+ struct hdac_bus *bus = sof_to_bus(sdev);
+ struct sdw_intel_slave_id *ids = sdw->ids;
+ int num_slaves = sdw->num_slaves;
+- unsigned int part_id, link_id, unique_id, mfg_id;
++ unsigned int part_id, link_id, unique_id, mfg_id, version;
+ int i, j, k;
+
+ for (i = 0; i < link->num_adr; i++) {
+@@ -1066,12 +1096,14 @@ static bool link_slaves_found(struct snd_sof_dev *sdev,
+ mfg_id = SDW_MFG_ID(adr);
+ part_id = SDW_PART_ID(adr);
+ link_id = SDW_DISCO_LINK_ID(adr);
++ version = SDW_VERSION(adr);
+
+ for (j = 0; j < num_slaves; j++) {
+ /* find out how many identical parts were reported on that link */
+ if (ids[j].link_id == link_id &&
+ ids[j].id.part_id == part_id &&
+- ids[j].id.mfg_id == mfg_id)
++ ids[j].id.mfg_id == mfg_id &&
++ ids[j].id.sdw_version == version)
+ reported_part_count++;
+ }
+
+@@ -1080,21 +1112,24 @@ static bool link_slaves_found(struct snd_sof_dev *sdev,
+
+ if (ids[j].link_id != link_id ||
+ ids[j].id.part_id != part_id ||
+- ids[j].id.mfg_id != mfg_id)
++ ids[j].id.mfg_id != mfg_id ||
++ ids[j].id.sdw_version != version)
+ continue;
+
+ /* find out how many identical parts are expected */
+ for (k = 0; k < link->num_adr; k++) {
+ u64 adr2 = link->adr_d[k].adr;
+- unsigned int part_id2, link_id2, mfg_id2;
++ unsigned int part_id2, link_id2, mfg_id2, version2;
+
+ mfg_id2 = SDW_MFG_ID(adr2);
+ part_id2 = SDW_PART_ID(adr2);
+ link_id2 = SDW_DISCO_LINK_ID(adr2);
++ version2 = SDW_VERSION(adr2);
+
+ if (link_id2 == link_id &&
+ part_id2 == part_id &&
+- mfg_id2 == mfg_id)
++ mfg_id2 == mfg_id &&
++ version2 == version)
+ expected_part_count++;
+ }
+
+diff --git a/sound/soc/sof/intel/icl.c b/sound/soc/sof/intel/icl.c
+index ee095b8f2d01c..4065c4d3912a5 100644
+--- a/sound/soc/sof/intel/icl.c
++++ b/sound/soc/sof/intel/icl.c
+@@ -139,6 +139,7 @@ const struct sof_intel_dsp_desc icl_chip_info = {
+ .ipc_ack = CNL_DSP_REG_HIPCIDA,
+ .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+ .ipc_ctl = CNL_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 300,
+ .ssp_count = ICL_SSP_COUNT,
+ .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
+index d04ce84fe7cc2..beb2fb3cd0141 100644
+--- a/sound/soc/sof/intel/pci-tgl.c
++++ b/sound/soc/sof/intel/pci-tgl.c
+@@ -117,8 +117,12 @@ static const struct pci_device_id sof_pci_ids[] = {
+ .driver_data = (unsigned long)&adls_desc},
+ { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */
+ .driver_data = (unsigned long)&adl_desc},
++ { PCI_DEVICE(0x8086, 0x51cd), /* ADL-P */
++ .driver_data = (unsigned long)&adl_desc},
+ { PCI_DEVICE(0x8086, 0x51cc), /* ADL-M */
+ .driver_data = (unsigned long)&adl_desc},
++ { PCI_DEVICE(0x8086, 0x54c8), /* ADL-N */
++ .driver_data = (unsigned long)&adl_desc},
+ { 0, }
+ };
+ MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+diff --git a/sound/soc/sof/intel/shim.h b/sound/soc/sof/intel/shim.h
+index e9f7d4d7fcce8..96707758ebc51 100644
+--- a/sound/soc/sof/intel/shim.h
++++ b/sound/soc/sof/intel/shim.h
+@@ -161,6 +161,7 @@ struct sof_intel_dsp_desc {
+ int ipc_ack;
+ int ipc_ack_mask;
+ int ipc_ctl;
++ int rom_status_reg;
+ int rom_init_timeout;
+ int ssp_count; /* ssp count of the platform */
+ int ssp_base_offset; /* base address of the SSPs */
+diff --git a/sound/soc/sof/intel/tgl.c b/sound/soc/sof/intel/tgl.c
+index 199d41a7dc9bf..aba52d8628aa4 100644
+--- a/sound/soc/sof/intel/tgl.c
++++ b/sound/soc/sof/intel/tgl.c
+@@ -134,6 +134,7 @@ const struct sof_intel_dsp_desc tgl_chip_info = {
+ .ipc_ack = CNL_DSP_REG_HIPCIDA,
+ .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+ .ipc_ctl = CNL_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 300,
+ .ssp_count = ICL_SSP_COUNT,
+ .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+@@ -153,6 +154,7 @@ const struct sof_intel_dsp_desc tglh_chip_info = {
+ .ipc_ack = CNL_DSP_REG_HIPCIDA,
+ .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+ .ipc_ctl = CNL_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 300,
+ .ssp_count = ICL_SSP_COUNT,
+ .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+@@ -172,6 +174,7 @@ const struct sof_intel_dsp_desc ehl_chip_info = {
+ .ipc_ack = CNL_DSP_REG_HIPCIDA,
+ .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+ .ipc_ctl = CNL_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 300,
+ .ssp_count = ICL_SSP_COUNT,
+ .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+@@ -191,6 +194,7 @@ const struct sof_intel_dsp_desc adls_chip_info = {
+ .ipc_ack = CNL_DSP_REG_HIPCIDA,
+ .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+ .ipc_ctl = CNL_DSP_REG_HIPCCTL,
++ .rom_status_reg = HDA_DSP_SRAM_REG_ROM_STATUS,
+ .rom_init_timeout = 300,
+ .ssp_count = ICL_SSP_COUNT,
+ .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c
+index bc9e707656789..3b4c011e02834 100644
+--- a/sound/soc/sof/sof-pci-dev.c
++++ b/sound/soc/sof/sof-pci-dev.c
+@@ -80,7 +80,7 @@ static const struct dmi_system_id community_key_platforms[] = {
+ {
+ .ident = "Google Chromebooks",
+ .matches = {
+- DMI_MATCH(DMI_SYS_VENDOR, "Google"),
++ DMI_MATCH(DMI_PRODUCT_FAMILY, "Google"),
+ }
+ },
+ {},
+@@ -129,6 +129,11 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+
+ dev_dbg(&pci->dev, "PCI DSP detected");
+
++ if (!desc) {
++ dev_err(dev, "error: no matching PCI descriptor\n");
++ return -ENODEV;
++ }
++
+ if (!desc->ops) {
+ dev_err(dev, "error: no matching PCI descriptor ops\n");
+ return -ENODEV;
+diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
+index cc9585bfa4e9f..1bb2dcf37ffe9 100644
+--- a/sound/soc/sof/topology.c
++++ b/sound/soc/sof/topology.c
+@@ -2598,6 +2598,15 @@ static int sof_widget_unload(struct snd_soc_component *scomp,
+
+ /* power down the pipeline schedule core */
+ pipeline = swidget->private;
++
++ /*
++ * Runtime PM should still function normally if topology loading fails and
++ * it's components are unloaded. Do not power down the primary core so that the
++ * CTX_SAVE IPC can succeed during runtime suspend.
++ */
++ if (pipeline->core == SOF_DSP_PRIMARY_CORE)
++ break;
++
+ ret = snd_sof_dsp_core_power_down(sdev, 1 << pipeline->core);
+ if (ret < 0)
+ dev_err(scomp->dev, "error: powering down pipeline schedule core %d\n",
+diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c
+index 2ed92c990b97c..dd9013c476649 100644
+--- a/sound/soc/sti/uniperif_player.c
++++ b/sound/soc/sti/uniperif_player.c
+@@ -91,7 +91,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
+ SET_UNIPERIF_ITM_BCLR_FIFO_ERROR(player);
+
+ /* Stop the player */
+- snd_pcm_stop_xrun(player->substream);
++ snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN);
+ }
+
+ ret = IRQ_HANDLED;
+@@ -105,7 +105,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
+ SET_UNIPERIF_ITM_BCLR_DMA_ERROR(player);
+
+ /* Stop the player */
+- snd_pcm_stop_xrun(player->substream);
++ snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN);
+
+ ret = IRQ_HANDLED;
+ }
+@@ -138,7 +138,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id)
+ dev_err(player->dev, "Underflow recovery failed\n");
+
+ /* Stop the player */
+- snd_pcm_stop_xrun(player->substream);
++ snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN);
+
+ ret = IRQ_HANDLED;
+ }
+diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c
+index 136059331211d..065c5f0d1f5f0 100644
+--- a/sound/soc/sti/uniperif_reader.c
++++ b/sound/soc/sti/uniperif_reader.c
+@@ -65,7 +65,7 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id)
+ if (unlikely(status & UNIPERIF_ITS_FIFO_ERROR_MASK(reader))) {
+ dev_err(reader->dev, "FIFO error detected\n");
+
+- snd_pcm_stop_xrun(reader->substream);
++ snd_pcm_stop(reader->substream, SNDRV_PCM_STATE_XRUN);
+
+ ret = IRQ_HANDLED;
+ }
+diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c
+index e6078f50e508e..1e9b4b1df69e3 100644
+--- a/sound/soc/stm/stm32_adfsdm.c
++++ b/sound/soc/stm/stm32_adfsdm.c
+@@ -303,6 +303,11 @@ static int stm32_adfsdm_dummy_cb(const void *data, void *private)
+ return 0;
+ }
+
++static void stm32_adfsdm_cleanup(void *data)
++{
++ iio_channel_release_all_cb(data);
++}
++
+ static struct snd_soc_component_driver stm32_adfsdm_soc_platform = {
+ .open = stm32_adfsdm_pcm_open,
+ .close = stm32_adfsdm_pcm_close,
+@@ -349,6 +354,12 @@ static int stm32_adfsdm_probe(struct platform_device *pdev)
+ if (IS_ERR(priv->iio_cb))
+ return PTR_ERR(priv->iio_cb);
+
++ ret = devm_add_action_or_reset(&pdev->dev, stm32_adfsdm_cleanup, priv->iio_cb);
++ if (ret < 0) {
++ dev_err(&pdev->dev, "Unable to add action\n");
++ return ret;
++ }
++
+ component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL);
+ if (!component)
+ return -ENOMEM;
+diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c
+index 6254bacad6eb7..717f45a83445c 100644
+--- a/sound/soc/stm/stm32_i2s.c
++++ b/sound/soc/stm/stm32_i2s.c
+@@ -700,7 +700,7 @@ static int stm32_i2s_configure_clock(struct snd_soc_dai *cpu_dai,
+ if (ret < 0)
+ return ret;
+
+- nb_bits = frame_len * ((cgfr & I2S_CGFR_CHLEN) + 1);
++ nb_bits = frame_len * (FIELD_GET(I2S_CGFR_CHLEN, cgfr) + 1);
+ ret = stm32_i2s_calc_clk_div(i2s, i2s_clock_rate,
+ (nb_bits * rate));
+ if (ret)
+diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c
+index 8ee9a77bd83d3..a74c980ee7753 100644
+--- a/sound/soc/tegra/tegra186_dspk.c
++++ b/sound/soc/tegra/tegra186_dspk.c
+@@ -26,51 +26,162 @@ static const struct reg_default tegra186_dspk_reg_defaults[] = {
+ { TEGRA186_DSPK_CODEC_CTRL, 0x03000000 },
+ };
+
+-static int tegra186_dspk_get_control(struct snd_kcontrol *kcontrol,
++static int tegra186_dspk_get_fifo_th(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+- if (strstr(kcontrol->id.name, "FIFO Threshold"))
+- ucontrol->value.integer.value[0] = dspk->rx_fifo_th;
+- else if (strstr(kcontrol->id.name, "OSR Value"))
+- ucontrol->value.integer.value[0] = dspk->osr_val;
+- else if (strstr(kcontrol->id.name, "LR Polarity Select"))
+- ucontrol->value.integer.value[0] = dspk->lrsel;
+- else if (strstr(kcontrol->id.name, "Channel Select"))
+- ucontrol->value.integer.value[0] = dspk->ch_sel;
+- else if (strstr(kcontrol->id.name, "Mono To Stereo"))
+- ucontrol->value.integer.value[0] = dspk->mono_to_stereo;
+- else if (strstr(kcontrol->id.name, "Stereo To Mono"))
+- ucontrol->value.integer.value[0] = dspk->stereo_to_mono;
++ ucontrol->value.integer.value[0] = dspk->rx_fifo_th;
+
+ return 0;
+ }
+
+-static int tegra186_dspk_put_control(struct snd_kcontrol *kcontrol,
++static int tegra186_dspk_put_fifo_th(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+- int val = ucontrol->value.integer.value[0];
+-
+- if (strstr(kcontrol->id.name, "FIFO Threshold"))
+- dspk->rx_fifo_th = val;
+- else if (strstr(kcontrol->id.name, "OSR Value"))
+- dspk->osr_val = val;
+- else if (strstr(kcontrol->id.name, "LR Polarity Select"))
+- dspk->lrsel = val;
+- else if (strstr(kcontrol->id.name, "Channel Select"))
+- dspk->ch_sel = val;
+- else if (strstr(kcontrol->id.name, "Mono To Stereo"))
+- dspk->mono_to_stereo = val;
+- else if (strstr(kcontrol->id.name, "Stereo To Mono"))
+- dspk->stereo_to_mono = val;
++ int value = ucontrol->value.integer.value[0];
++
++ if (value == dspk->rx_fifo_th)
++ return 0;
++
++ dspk->rx_fifo_th = value;
++
++ return 1;
++}
++
++static int tegra186_dspk_get_osr_val(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++
++ ucontrol->value.enumerated.item[0] = dspk->osr_val;
+
+ return 0;
+ }
+
++static int tegra186_dspk_put_osr_val(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dspk->osr_val)
++ return 0;
++
++ dspk->osr_val = value;
++
++ return 1;
++}
++
++static int tegra186_dspk_get_pol_sel(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++
++ ucontrol->value.enumerated.item[0] = dspk->lrsel;
++
++ return 0;
++}
++
++static int tegra186_dspk_put_pol_sel(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dspk->lrsel)
++ return 0;
++
++ dspk->lrsel = value;
++
++ return 1;
++}
++
++static int tegra186_dspk_get_ch_sel(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++
++ ucontrol->value.enumerated.item[0] = dspk->ch_sel;
++
++ return 0;
++}
++
++static int tegra186_dspk_put_ch_sel(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dspk->ch_sel)
++ return 0;
++
++ dspk->ch_sel = value;
++
++ return 1;
++}
++
++static int tegra186_dspk_get_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++
++ ucontrol->value.enumerated.item[0] = dspk->mono_to_stereo;
++
++ return 0;
++}
++
++static int tegra186_dspk_put_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dspk->mono_to_stereo)
++ return 0;
++
++ dspk->mono_to_stereo = value;
++
++ return 1;
++}
++
++static int tegra186_dspk_get_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++
++ ucontrol->value.enumerated.item[0] = dspk->stereo_to_mono;
++
++ return 0;
++}
++
++static int tegra186_dspk_put_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
++ struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dspk->stereo_to_mono)
++ return 0;
++
++ dspk->stereo_to_mono = value;
++
++ return 1;
++}
++
+ static int __maybe_unused tegra186_dspk_runtime_suspend(struct device *dev)
+ {
+ struct tegra186_dspk *dspk = dev_get_drvdata(dev);
+@@ -279,17 +390,19 @@ static const struct soc_enum tegra186_dspk_lrsel_enum =
+ static const struct snd_kcontrol_new tegrat186_dspk_controls[] = {
+ SOC_SINGLE_EXT("FIFO Threshold", SND_SOC_NOPM, 0,
+ TEGRA186_DSPK_RX_FIFO_DEPTH - 1, 0,
+- tegra186_dspk_get_control, tegra186_dspk_put_control),
++ tegra186_dspk_get_fifo_th, tegra186_dspk_put_fifo_th),
+ SOC_ENUM_EXT("OSR Value", tegra186_dspk_osr_enum,
+- tegra186_dspk_get_control, tegra186_dspk_put_control),
++ tegra186_dspk_get_osr_val, tegra186_dspk_put_osr_val),
+ SOC_ENUM_EXT("LR Polarity Select", tegra186_dspk_lrsel_enum,
+- tegra186_dspk_get_control, tegra186_dspk_put_control),
++ tegra186_dspk_get_pol_sel, tegra186_dspk_put_pol_sel),
+ SOC_ENUM_EXT("Channel Select", tegra186_dspk_ch_sel_enum,
+- tegra186_dspk_get_control, tegra186_dspk_put_control),
++ tegra186_dspk_get_ch_sel, tegra186_dspk_put_ch_sel),
+ SOC_ENUM_EXT("Mono To Stereo", tegra186_dspk_mono_conv_enum,
+- tegra186_dspk_get_control, tegra186_dspk_put_control),
++ tegra186_dspk_get_mono_to_stereo,
++ tegra186_dspk_put_mono_to_stereo),
+ SOC_ENUM_EXT("Stereo To Mono", tegra186_dspk_stereo_conv_enum,
+- tegra186_dspk_get_control, tegra186_dspk_put_control),
++ tegra186_dspk_get_stereo_to_mono,
++ tegra186_dspk_put_stereo_to_mono),
+ };
+
+ static const struct snd_soc_component_driver tegra186_dspk_cmpnt = {
+diff --git a/sound/soc/tegra/tegra210_admaif.c b/sound/soc/tegra/tegra210_admaif.c
+index bcccdf3ddc528..1a2e868a62209 100644
+--- a/sound/soc/tegra/tegra210_admaif.c
++++ b/sound/soc/tegra/tegra210_admaif.c
+@@ -424,46 +424,122 @@ static const struct snd_soc_dai_ops tegra_admaif_dai_ops = {
+ .trigger = tegra_admaif_trigger,
+ };
+
+-static int tegra_admaif_get_control(struct snd_kcontrol *kcontrol,
+- struct snd_ctl_elem_value *ucontrol)
++static int tegra210_admaif_pget_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
++ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
++
++ ucontrol->value.enumerated.item[0] =
++ admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg];
++
++ return 0;
++}
++
++static int tegra210_admaif_pput_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
++ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg])
++ return 0;
++
++ admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value;
++
++ return 1;
++}
++
++static int tegra210_admaif_cget_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
++ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
++
++ ucontrol->value.enumerated.item[0] =
++ admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg];
++
++ return 0;
++}
++
++static int tegra210_admaif_cput_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg])
++ return 0;
++
++ admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value;
++
++ return 1;
++}
++
++static int tegra210_admaif_pget_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+- long *uctl_val = &ucontrol->value.integer.value[0];
++ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+
+- if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
+- *uctl_val = admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg];
+- else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
+- *uctl_val = admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg];
+- else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
+- *uctl_val = admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg];
+- else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
+- *uctl_val = admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg];
++ ucontrol->value.enumerated.item[0] =
++ admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg];
+
+ return 0;
+ }
+
+-static int tegra_admaif_put_control(struct snd_kcontrol *kcontrol,
+- struct snd_ctl_elem_value *ucontrol)
++static int tegra210_admaif_pput_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg])
++ return 0;
++
++ admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value;
++
++ return 1;
++}
++
++static int tegra210_admaif_cget_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+- int value = ucontrol->value.integer.value[0];
++ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+
+- if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
+- admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value;
+- else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
+- admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value;
+- else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
+- admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value;
+- else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
+- admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value;
++ ucontrol->value.enumerated.item[0] =
++ admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg];
+
+ return 0;
+ }
+
++static int tegra210_admaif_cput_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
++ struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg])
++ return 0;
++
++ admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value;
++
++ return 1;
++}
++
+ static int tegra_admaif_dai_probe(struct snd_soc_dai *dai)
+ {
+ struct tegra_admaif *admaif = snd_soc_dai_get_drvdata(dai);
+@@ -559,17 +635,21 @@ static const char * const tegra_admaif_mono_conv_text[] = {
+ }
+
+ #define TEGRA_ADMAIF_CIF_CTRL(reg) \
+- NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1,\
+- tegra_admaif_get_control, tegra_admaif_put_control, \
++ NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1, \
++ tegra210_admaif_pget_mono_to_stereo, \
++ tegra210_admaif_pput_mono_to_stereo, \
+ tegra_admaif_mono_conv_text), \
+- NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1,\
+- tegra_admaif_get_control, tegra_admaif_put_control, \
++ NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1, \
++ tegra210_admaif_pget_stereo_to_mono, \
++ tegra210_admaif_pput_stereo_to_mono, \
+ tegra_admaif_stereo_conv_text), \
+- NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1, \
+- tegra_admaif_get_control, tegra_admaif_put_control, \
++ NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1, \
++ tegra210_admaif_cget_mono_to_stereo, \
++ tegra210_admaif_cput_mono_to_stereo, \
+ tegra_admaif_mono_conv_text), \
+- NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1, \
+- tegra_admaif_get_control, tegra_admaif_put_control, \
++ NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1, \
++ tegra210_admaif_cget_stereo_to_mono, \
++ tegra210_admaif_cput_stereo_to_mono, \
+ tegra_admaif_stereo_conv_text)
+
+ static struct snd_kcontrol_new tegra210_admaif_controls[] = {
+diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c
+index 66287a7c9865d..1b2f7cb8c6adc 100644
+--- a/sound/soc/tegra/tegra210_ahub.c
++++ b/sound/soc/tegra/tegra210_ahub.c
+@@ -62,6 +62,7 @@ static int tegra_ahub_put_value_enum(struct snd_kcontrol *kctl,
+ unsigned int *item = uctl->value.enumerated.item;
+ unsigned int value = e->values[item[0]];
+ unsigned int i, bit_pos, reg_idx = 0, reg_val = 0;
++ int change = 0;
+
+ if (item[0] >= e->items)
+ return -EINVAL;
+@@ -86,12 +87,14 @@ static int tegra_ahub_put_value_enum(struct snd_kcontrol *kctl,
+
+ /* Update widget power if state has changed */
+ if (snd_soc_component_test_bits(cmpnt, update[i].reg,
+- update[i].mask, update[i].val))
+- snd_soc_dapm_mux_update_power(dapm, kctl, item[0], e,
+- &update[i]);
++ update[i].mask,
++ update[i].val))
++ change |= snd_soc_dapm_mux_update_power(dapm, kctl,
++ item[0], e,
++ &update[i]);
+ }
+
+- return 0;
++ return change;
+ }
+
+ static struct snd_soc_dai_driver tegra210_ahub_dais[] = {
+diff --git a/sound/soc/tegra/tegra210_dmic.c b/sound/soc/tegra/tegra210_dmic.c
+index b096478cd2ef0..db95794530f46 100644
+--- a/sound/soc/tegra/tegra210_dmic.c
++++ b/sound/soc/tegra/tegra210_dmic.c
+@@ -156,51 +156,162 @@ static int tegra210_dmic_hw_params(struct snd_pcm_substream *substream,
+ return 0;
+ }
+
+-static int tegra210_dmic_get_control(struct snd_kcontrol *kcontrol,
++static int tegra210_dmic_get_boost_gain(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++
++ ucontrol->value.integer.value[0] = dmic->boost_gain;
++
++ return 0;
++}
++
++static int tegra210_dmic_put_boost_gain(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++ int value = ucontrol->value.integer.value[0];
++
++ if (value == dmic->boost_gain)
++ return 0;
++
++ dmic->boost_gain = value;
++
++ return 1;
++}
++
++static int tegra210_dmic_get_ch_select(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++
++ ucontrol->value.enumerated.item[0] = dmic->ch_select;
++
++ return 0;
++}
++
++static int tegra210_dmic_put_ch_select(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dmic->ch_select)
++ return 0;
++
++ dmic->ch_select = value;
++
++ return 1;
++}
++
++static int tegra210_dmic_get_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++
++ ucontrol->value.enumerated.item[0] = dmic->mono_to_stereo;
++
++ return 0;
++}
++
++static int tegra210_dmic_put_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dmic->mono_to_stereo)
++ return 0;
++
++ dmic->mono_to_stereo = value;
++
++ return 1;
++}
++
++static int tegra210_dmic_get_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++
++ ucontrol->value.enumerated.item[0] = dmic->stereo_to_mono;
++
++ return 0;
++}
++
++static int tegra210_dmic_put_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dmic->stereo_to_mono)
++ return 0;
++
++ dmic->stereo_to_mono = value;
++
++ return 1;
++}
++
++static int tegra210_dmic_get_osr_val(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+- if (strstr(kcontrol->id.name, "Boost Gain Volume"))
+- ucontrol->value.integer.value[0] = dmic->boost_gain;
+- else if (strstr(kcontrol->id.name, "Channel Select"))
+- ucontrol->value.integer.value[0] = dmic->ch_select;
+- else if (strstr(kcontrol->id.name, "Mono To Stereo"))
+- ucontrol->value.integer.value[0] = dmic->mono_to_stereo;
+- else if (strstr(kcontrol->id.name, "Stereo To Mono"))
+- ucontrol->value.integer.value[0] = dmic->stereo_to_mono;
+- else if (strstr(kcontrol->id.name, "OSR Value"))
+- ucontrol->value.integer.value[0] = dmic->osr_val;
+- else if (strstr(kcontrol->id.name, "LR Polarity Select"))
+- ucontrol->value.integer.value[0] = dmic->lrsel;
++ ucontrol->value.enumerated.item[0] = dmic->osr_val;
+
+ return 0;
+ }
+
+-static int tegra210_dmic_put_control(struct snd_kcontrol *kcontrol,
++static int tegra210_dmic_put_osr_val(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+- int value = ucontrol->value.integer.value[0];
++ unsigned int value = ucontrol->value.enumerated.item[0];
+
+- if (strstr(kcontrol->id.name, "Boost Gain Volume"))
+- dmic->boost_gain = value;
+- else if (strstr(kcontrol->id.name, "Channel Select"))
+- dmic->ch_select = ucontrol->value.integer.value[0];
+- else if (strstr(kcontrol->id.name, "Mono To Stereo"))
+- dmic->mono_to_stereo = value;
+- else if (strstr(kcontrol->id.name, "Stereo To Mono"))
+- dmic->stereo_to_mono = value;
+- else if (strstr(kcontrol->id.name, "OSR Value"))
+- dmic->osr_val = value;
+- else if (strstr(kcontrol->id.name, "LR Polarity Select"))
+- dmic->lrsel = value;
++ if (value == dmic->osr_val)
++ return 0;
++
++ dmic->osr_val = value;
++
++ return 1;
++}
++
++static int tegra210_dmic_get_pol_sel(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++
++ ucontrol->value.enumerated.item[0] = dmic->lrsel;
+
+ return 0;
+ }
+
++static int tegra210_dmic_put_pol_sel(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == dmic->lrsel)
++ return 0;
++
++ dmic->lrsel = value;
++
++ return 1;
++}
++
+ static const struct snd_soc_dai_ops tegra210_dmic_dai_ops = {
+ .hw_params = tegra210_dmic_hw_params,
+ };
+@@ -287,19 +398,22 @@ static const struct soc_enum tegra210_dmic_lrsel_enum =
+
+ static const struct snd_kcontrol_new tegra210_dmic_controls[] = {
+ SOC_SINGLE_EXT("Boost Gain Volume", 0, 0, MAX_BOOST_GAIN, 0,
+- tegra210_dmic_get_control, tegra210_dmic_put_control),
++ tegra210_dmic_get_boost_gain,
++ tegra210_dmic_put_boost_gain),
+ SOC_ENUM_EXT("Channel Select", tegra210_dmic_ch_enum,
+- tegra210_dmic_get_control, tegra210_dmic_put_control),
++ tegra210_dmic_get_ch_select, tegra210_dmic_put_ch_select),
+ SOC_ENUM_EXT("Mono To Stereo",
+- tegra210_dmic_mono_conv_enum, tegra210_dmic_get_control,
+- tegra210_dmic_put_control),
++ tegra210_dmic_mono_conv_enum,
++ tegra210_dmic_get_mono_to_stereo,
++ tegra210_dmic_put_mono_to_stereo),
+ SOC_ENUM_EXT("Stereo To Mono",
+- tegra210_dmic_stereo_conv_enum, tegra210_dmic_get_control,
+- tegra210_dmic_put_control),
++ tegra210_dmic_stereo_conv_enum,
++ tegra210_dmic_get_stereo_to_mono,
++ tegra210_dmic_put_stereo_to_mono),
+ SOC_ENUM_EXT("OSR Value", tegra210_dmic_osr_enum,
+- tegra210_dmic_get_control, tegra210_dmic_put_control),
++ tegra210_dmic_get_osr_val, tegra210_dmic_put_osr_val),
+ SOC_ENUM_EXT("LR Polarity Select", tegra210_dmic_lrsel_enum,
+- tegra210_dmic_get_control, tegra210_dmic_put_control),
++ tegra210_dmic_get_pol_sel, tegra210_dmic_put_pol_sel),
+ };
+
+ static const struct snd_soc_component_driver tegra210_dmic_compnt = {
+diff --git a/sound/soc/tegra/tegra210_i2s.c b/sound/soc/tegra/tegra210_i2s.c
+index 45f31ccb49d89..9552bbb939dd1 100644
+--- a/sound/soc/tegra/tegra210_i2s.c
++++ b/sound/soc/tegra/tegra210_i2s.c
+@@ -302,85 +302,235 @@ static int tegra210_i2s_set_tdm_slot(struct snd_soc_dai *dai,
+ return 0;
+ }
+
+-static int tegra210_i2s_set_dai_bclk_ratio(struct snd_soc_dai *dai,
+- unsigned int ratio)
++static int tegra210_i2s_get_loopback(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
+ {
+- struct tegra210_i2s *i2s = snd_soc_dai_get_drvdata(dai);
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+- i2s->bclk_ratio = ratio;
++ ucontrol->value.integer.value[0] = i2s->loopback;
+
+ return 0;
+ }
+
+-static int tegra210_i2s_get_control(struct snd_kcontrol *kcontrol,
+- struct snd_ctl_elem_value *ucontrol)
++static int tegra210_i2s_put_loopback(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++ int value = ucontrol->value.integer.value[0];
++
++ if (value == i2s->loopback)
++ return 0;
++
++ i2s->loopback = value;
++
++ regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, I2S_CTRL_LPBK_MASK,
++ i2s->loopback << I2S_CTRL_LPBK_SHIFT);
++
++ return 1;
++}
++
++static int tegra210_i2s_get_fsync_width(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+- long *uctl_val = &ucontrol->value.integer.value[0];
+-
+- if (strstr(kcontrol->id.name, "Loopback"))
+- *uctl_val = i2s->loopback;
+- else if (strstr(kcontrol->id.name, "FSYNC Width"))
+- *uctl_val = i2s->fsync_width;
+- else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
+- *uctl_val = i2s->stereo_to_mono[I2S_TX_PATH];
+- else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
+- *uctl_val = i2s->mono_to_stereo[I2S_TX_PATH];
+- else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
+- *uctl_val = i2s->stereo_to_mono[I2S_RX_PATH];
+- else if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
+- *uctl_val = i2s->mono_to_stereo[I2S_RX_PATH];
+- else if (strstr(kcontrol->id.name, "Playback FIFO Threshold"))
+- *uctl_val = i2s->rx_fifo_th;
+- else if (strstr(kcontrol->id.name, "BCLK Ratio"))
+- *uctl_val = i2s->bclk_ratio;
++
++ ucontrol->value.integer.value[0] = i2s->fsync_width;
+
+ return 0;
+ }
+
+-static int tegra210_i2s_put_control(struct snd_kcontrol *kcontrol,
+- struct snd_ctl_elem_value *ucontrol)
++static int tegra210_i2s_put_fsync_width(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+ int value = ucontrol->value.integer.value[0];
+
+- if (strstr(kcontrol->id.name, "Loopback")) {
+- i2s->loopback = value;
++ if (value == i2s->fsync_width)
++ return 0;
+
+- regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
+- I2S_CTRL_LPBK_MASK,
+- i2s->loopback << I2S_CTRL_LPBK_SHIFT);
++ i2s->fsync_width = value;
+
+- } else if (strstr(kcontrol->id.name, "FSYNC Width")) {
+- /*
+- * Frame sync width is used only for FSYNC modes and not
+- * applicable for LRCK modes. Reset value for this field is "0",
+- * which means the width is one bit clock wide.
+- * The width requirement may depend on the codec and in such
+- * cases mixer control is used to update custom values. A value
+- * of "N" here means, width is "N + 1" bit clock wide.
+- */
+- i2s->fsync_width = value;
+-
+- regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
+- I2S_CTRL_FSYNC_WIDTH_MASK,
+- i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT);
+-
+- } else if (strstr(kcontrol->id.name, "Capture Stereo To Mono")) {
+- i2s->stereo_to_mono[I2S_TX_PATH] = value;
+- } else if (strstr(kcontrol->id.name, "Capture Mono To Stereo")) {
+- i2s->mono_to_stereo[I2S_TX_PATH] = value;
+- } else if (strstr(kcontrol->id.name, "Playback Stereo To Mono")) {
+- i2s->stereo_to_mono[I2S_RX_PATH] = value;
+- } else if (strstr(kcontrol->id.name, "Playback Mono To Stereo")) {
+- i2s->mono_to_stereo[I2S_RX_PATH] = value;
+- } else if (strstr(kcontrol->id.name, "Playback FIFO Threshold")) {
+- i2s->rx_fifo_th = value;
+- } else if (strstr(kcontrol->id.name, "BCLK Ratio")) {
+- i2s->bclk_ratio = value;
+- }
++ /*
++ * Frame sync width is used only for FSYNC modes and not
++ * applicable for LRCK modes. Reset value for this field is "0",
++ * which means the width is one bit clock wide.
++ * The width requirement may depend on the codec and in such
++ * cases mixer control is used to update custom values. A value
++ * of "N" here means, width is "N + 1" bit clock wide.
++ */
++ regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
++ I2S_CTRL_FSYNC_WIDTH_MASK,
++ i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT);
++
++ return 1;
++}
++
++static int tegra210_i2s_cget_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++
++ ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_TX_PATH];
++
++ return 0;
++}
++
++static int tegra210_i2s_cput_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == i2s->stereo_to_mono[I2S_TX_PATH])
++ return 0;
++
++ i2s->stereo_to_mono[I2S_TX_PATH] = value;
++
++ return 1;
++}
++
++static int tegra210_i2s_cget_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++
++ ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_TX_PATH];
++
++ return 0;
++}
++
++static int tegra210_i2s_cput_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == i2s->mono_to_stereo[I2S_TX_PATH])
++ return 0;
++
++ i2s->mono_to_stereo[I2S_TX_PATH] = value;
++
++ return 1;
++}
++
++static int tegra210_i2s_pget_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++
++ ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_RX_PATH];
++
++ return 0;
++}
++
++static int tegra210_i2s_pput_stereo_to_mono(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == i2s->stereo_to_mono[I2S_RX_PATH])
++ return 0;
++
++ i2s->stereo_to_mono[I2S_RX_PATH] = value;
++
++ return 1;
++}
++
++static int tegra210_i2s_pget_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++
++ ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_RX_PATH];
++
++ return 0;
++}
++
++static int tegra210_i2s_pput_mono_to_stereo(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++ unsigned int value = ucontrol->value.enumerated.item[0];
++
++ if (value == i2s->mono_to_stereo[I2S_RX_PATH])
++ return 0;
++
++ i2s->mono_to_stereo[I2S_RX_PATH] = value;
++
++ return 1;
++}
++
++static int tegra210_i2s_pget_fifo_th(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++
++ ucontrol->value.integer.value[0] = i2s->rx_fifo_th;
++
++ return 0;
++}
++
++static int tegra210_i2s_pput_fifo_th(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++ int value = ucontrol->value.integer.value[0];
++
++ if (value == i2s->rx_fifo_th)
++ return 0;
++
++ i2s->rx_fifo_th = value;
++
++ return 1;
++}
++
++static int tegra210_i2s_get_bclk_ratio(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++
++ ucontrol->value.integer.value[0] = i2s->bclk_ratio;
++
++ return 0;
++}
++
++static int tegra210_i2s_put_bclk_ratio(struct snd_kcontrol *kcontrol,
++ struct snd_ctl_elem_value *ucontrol)
++{
++ struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
++ struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
++ int value = ucontrol->value.integer.value[0];
++
++ if (value == i2s->bclk_ratio)
++ return 0;
++
++ i2s->bclk_ratio = value;
++
++ return 1;
++}
++
++static int tegra210_i2s_set_dai_bclk_ratio(struct snd_soc_dai *dai,
++ unsigned int ratio)
++{
++ struct tegra210_i2s *i2s = snd_soc_dai_get_drvdata(dai);
++
++ i2s->bclk_ratio = ratio;
+
+ return 0;
+ }
+@@ -598,22 +748,28 @@ static const struct soc_enum tegra210_i2s_stereo_conv_enum =
+ tegra210_i2s_stereo_conv_text);
+
+ static const struct snd_kcontrol_new tegra210_i2s_controls[] = {
+- SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_control,
+- tegra210_i2s_put_control),
+- SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0, tegra210_i2s_get_control,
+- tegra210_i2s_put_control),
++ SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_loopback,
++ tegra210_i2s_put_loopback),
++ SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0,
++ tegra210_i2s_get_fsync_width,
++ tegra210_i2s_put_fsync_width),
+ SOC_ENUM_EXT("Capture Stereo To Mono", tegra210_i2s_stereo_conv_enum,
+- tegra210_i2s_get_control, tegra210_i2s_put_control),
++ tegra210_i2s_cget_stereo_to_mono,
++ tegra210_i2s_cput_stereo_to_mono),
+ SOC_ENUM_EXT("Capture Mono To Stereo", tegra210_i2s_mono_conv_enum,
+- tegra210_i2s_get_control, tegra210_i2s_put_control),
++ tegra210_i2s_cget_mono_to_stereo,
++ tegra210_i2s_cput_mono_to_stereo),
+ SOC_ENUM_EXT("Playback Stereo To Mono", tegra210_i2s_stereo_conv_enum,
+- tegra210_i2s_get_control, tegra210_i2s_put_control),
++ tegra210_i2s_pget_mono_to_stereo,
++ tegra210_i2s_pput_mono_to_stereo),
+ SOC_ENUM_EXT("Playback Mono To Stereo", tegra210_i2s_mono_conv_enum,
+- tegra210_i2s_get_control, tegra210_i2s_put_control),
++ tegra210_i2s_pget_stereo_to_mono,
++ tegra210_i2s_pput_stereo_to_mono),
+ SOC_SINGLE_EXT("Playback FIFO Threshold", 0, 0, I2S_RX_FIFO_DEPTH - 1,
+- 0, tegra210_i2s_get_control, tegra210_i2s_put_control),
+- SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0, tegra210_i2s_get_control,
+- tegra210_i2s_put_control),
++ 0, tegra210_i2s_pget_fifo_th, tegra210_i2s_pput_fifo_th),
++ SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0,
++ tegra210_i2s_get_bclk_ratio,
++ tegra210_i2s_put_bclk_ratio),
+ };
+
+ static const struct snd_soc_dapm_widget tegra210_i2s_widgets[] = {
+diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c
+index 735909310a262..2e549b69061ca 100644
+--- a/sound/soc/tegra/tegra_asoc_machine.c
++++ b/sound/soc/tegra/tegra_asoc_machine.c
+@@ -116,16 +116,24 @@ static const struct snd_kcontrol_new tegra_machine_controls[] = {
+ SOC_DAPM_PIN_SWITCH("Headset Mic"),
+ SOC_DAPM_PIN_SWITCH("Internal Mic 1"),
+ SOC_DAPM_PIN_SWITCH("Internal Mic 2"),
++ SOC_DAPM_PIN_SWITCH("Headphones"),
++ SOC_DAPM_PIN_SWITCH("Mic Jack"),
+ };
+
+ int tegra_asoc_machine_init(struct snd_soc_pcm_runtime *rtd)
+ {
+ struct snd_soc_card *card = rtd->card;
+ struct tegra_machine *machine = snd_soc_card_get_drvdata(card);
++ const char *jack_name;
+ int err;
+
+ if (machine->gpiod_hp_det && machine->asoc->add_hp_jack) {
+- err = snd_soc_card_jack_new(card, "Headphones Jack",
++ if (machine->asoc->hp_jack_name)
++ jack_name = machine->asoc->hp_jack_name;
++ else
++ jack_name = "Headphones Jack";
++
++ err = snd_soc_card_jack_new(card, jack_name,
+ SND_JACK_HEADPHONE,
+ &tegra_machine_hp_jack,
+ tegra_machine_hp_jack_pins,
+@@ -341,9 +349,34 @@ tegra_machine_parse_phandle(struct device *dev, const char *name)
+ return np;
+ }
+
++static void tegra_machine_unregister_codec(void *pdev)
++{
++ platform_device_unregister(pdev);
++}
++
++static int tegra_machine_register_codec(struct device *dev, const char *name)
++{
++ struct platform_device *pdev;
++ int err;
++
++ if (!name)
++ return 0;
++
++ pdev = platform_device_register_simple(name, -1, NULL, 0);
++ if (IS_ERR(pdev))
++ return PTR_ERR(pdev);
++
++ err = devm_add_action_or_reset(dev, tegra_machine_unregister_codec,
++ pdev);
++ if (err)
++ return err;
++
++ return 0;
++}
++
+ int tegra_asoc_machine_probe(struct platform_device *pdev)
+ {
+- struct device_node *np_codec, *np_i2s;
++ struct device_node *np_codec, *np_i2s, *np_ac97;
+ const struct tegra_asoc_data *asoc;
+ struct device *dev = &pdev->dev;
+ struct tegra_machine *machine;
+@@ -404,17 +437,30 @@ int tegra_asoc_machine_probe(struct platform_device *pdev)
+ return err;
+ }
+
+- np_codec = tegra_machine_parse_phandle(dev, "nvidia,audio-codec");
+- if (IS_ERR(np_codec))
+- return PTR_ERR(np_codec);
++ if (asoc->set_ac97) {
++ err = tegra_machine_register_codec(dev, asoc->codec_dev_name);
++ if (err)
++ return err;
++
++ np_ac97 = tegra_machine_parse_phandle(dev, "nvidia,ac97-controller");
++ if (IS_ERR(np_ac97))
++ return PTR_ERR(np_ac97);
+
+- np_i2s = tegra_machine_parse_phandle(dev, "nvidia,i2s-controller");
+- if (IS_ERR(np_i2s))
+- return PTR_ERR(np_i2s);
++ card->dai_link->cpus->of_node = np_ac97;
++ card->dai_link->platforms->of_node = np_ac97;
++ } else {
++ np_codec = tegra_machine_parse_phandle(dev, "nvidia,audio-codec");
++ if (IS_ERR(np_codec))
++ return PTR_ERR(np_codec);
+
+- card->dai_link->cpus->of_node = np_i2s;
+- card->dai_link->codecs->of_node = np_codec;
+- card->dai_link->platforms->of_node = np_i2s;
++ np_i2s = tegra_machine_parse_phandle(dev, "nvidia,i2s-controller");
++ if (IS_ERR(np_i2s))
++ return PTR_ERR(np_i2s);
++
++ card->dai_link->cpus->of_node = np_i2s;
++ card->dai_link->codecs->of_node = np_codec;
++ card->dai_link->platforms->of_node = np_i2s;
++ }
+
+ if (asoc->add_common_controls) {
+ card->controls = tegra_machine_controls;
+@@ -589,6 +635,7 @@ static struct snd_soc_card snd_soc_tegra_wm9712 = {
+ static const struct tegra_asoc_data tegra_wm9712_data = {
+ .card = &snd_soc_tegra_wm9712,
+ .add_common_dapm_widgets = true,
++ .codec_dev_name = "wm9712-codec",
+ .set_ac97 = true,
+ };
+
+@@ -619,6 +666,7 @@ static struct snd_soc_card snd_soc_tegra_max98090 = {
+ static const struct tegra_asoc_data tegra_max98090_data = {
+ .mclk_rate = tegra_machine_mclk_rate_12mhz,
+ .card = &snd_soc_tegra_max98090,
++ .hp_jack_name = "Headphones",
+ .add_common_dapm_widgets = true,
+ .add_common_controls = true,
+ .add_common_snd_ops = true,
+@@ -686,6 +734,7 @@ static struct snd_soc_dai_link tegra_tlv320aic23_dai = {
+ };
+
+ static struct snd_soc_card snd_soc_tegra_trimslice = {
++ .name = "tegra-trimslice",
+ .components = "codec:tlv320aic23",
+ .dai_link = &tegra_tlv320aic23_dai,
+ .num_links = 1,
+diff --git a/sound/soc/tegra/tegra_asoc_machine.h b/sound/soc/tegra/tegra_asoc_machine.h
+index 8ee0ec814f67c..6f795d7dff7c1 100644
+--- a/sound/soc/tegra/tegra_asoc_machine.h
++++ b/sound/soc/tegra/tegra_asoc_machine.h
+@@ -13,6 +13,8 @@ struct snd_soc_pcm_runtime;
+
+ struct tegra_asoc_data {
+ unsigned int (*mclk_rate)(unsigned int srate);
++ const char *codec_dev_name;
++ const char *hp_jack_name;
+ struct snd_soc_card *card;
+ unsigned int mclk_id;
+ bool hp_jack_gpio_active_low;
+diff --git a/sound/soc/ti/davinci-i2s.c b/sound/soc/ti/davinci-i2s.c
+index 6dca51862dd76..0363a088d2e00 100644
+--- a/sound/soc/ti/davinci-i2s.c
++++ b/sound/soc/ti/davinci-i2s.c
+@@ -708,7 +708,9 @@ static int davinci_i2s_probe(struct platform_device *pdev)
+ dev->clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(dev->clk))
+ return -ENODEV;
+- clk_enable(dev->clk);
++ ret = clk_enable(dev->clk);
++ if (ret)
++ goto err_put_clk;
+
+ dev->dev = &pdev->dev;
+ dev_set_drvdata(&pdev->dev, dev);
+@@ -730,6 +732,7 @@ err_unregister_component:
+ snd_soc_unregister_component(&pdev->dev);
+ err_release_clk:
+ clk_disable(dev->clk);
++err_put_clk:
+ clk_put(dev->clk);
+ return ret;
+ }
+diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c
+index 9347f982c3e10..149f4e2ce9998 100644
+--- a/sound/soc/ti/j721e-evm.c
++++ b/sound/soc/ti/j721e-evm.c
+@@ -634,17 +634,18 @@ static int j721e_soc_probe_cpb(struct j721e_priv *priv, int *link_idx,
+ codec_node = of_parse_phandle(node, "ti,cpb-codec", 0);
+ if (!codec_node) {
+ dev_err(priv->dev, "CPB codec node is not provided\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_dai_node;
+ }
+
+ domain = &priv->audio_domains[J721E_AUDIO_DOMAIN_CPB];
+ ret = j721e_get_clocks(priv->dev, &domain->codec, "cpb-codec-scki");
+ if (ret)
+- return ret;
++ goto put_codec_node;
+
+ ret = j721e_get_clocks(priv->dev, &domain->mcasp, "cpb-mcasp-auxclk");
+ if (ret)
+- return ret;
++ goto put_codec_node;
+
+ /*
+ * Common Processor Board, two links
+@@ -654,8 +655,10 @@ static int j721e_soc_probe_cpb(struct j721e_priv *priv, int *link_idx,
+ comp_count = 6;
+ compnent = devm_kzalloc(priv->dev, comp_count * sizeof(*compnent),
+ GFP_KERNEL);
+- if (!compnent)
+- return -ENOMEM;
++ if (!compnent) {
++ ret = -ENOMEM;
++ goto put_codec_node;
++ }
+
+ comp_idx = 0;
+ priv->dai_links[*link_idx].cpus = &compnent[comp_idx++];
+@@ -706,6 +709,12 @@ static int j721e_soc_probe_cpb(struct j721e_priv *priv, int *link_idx,
+ (*conf_idx)++;
+
+ return 0;
++
++put_codec_node:
++ of_node_put(codec_node);
++put_dai_node:
++ of_node_put(dai_node);
++ return ret;
+ }
+
+ static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx,
+@@ -730,23 +739,25 @@ static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx,
+ codeca_node = of_parse_phandle(node, "ti,ivi-codec-a", 0);
+ if (!codeca_node) {
+ dev_err(priv->dev, "IVI codec-a node is not provided\n");
+- return -EINVAL;
++ ret = -EINVAL;
++ goto put_dai_node;
+ }
+
+ codecb_node = of_parse_phandle(node, "ti,ivi-codec-b", 0);
+ if (!codecb_node) {
+ dev_warn(priv->dev, "IVI codec-b node is not provided\n");
+- return 0;
++ ret = 0;
++ goto put_codeca_node;
+ }
+
+ domain = &priv->audio_domains[J721E_AUDIO_DOMAIN_IVI];
+ ret = j721e_get_clocks(priv->dev, &domain->codec, "ivi-codec-scki");
+ if (ret)
+- return ret;
++ goto put_codecb_node;
+
+ ret = j721e_get_clocks(priv->dev, &domain->mcasp, "ivi-mcasp-auxclk");
+ if (ret)
+- return ret;
++ goto put_codecb_node;
+
+ /*
+ * IVI extension, two links
+@@ -758,8 +769,10 @@ static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx,
+ comp_count = 8;
+ compnent = devm_kzalloc(priv->dev, comp_count * sizeof(*compnent),
+ GFP_KERNEL);
+- if (!compnent)
+- return -ENOMEM;
++ if (!compnent) {
++ ret = -ENOMEM;
++ goto put_codecb_node;
++ }
+
+ comp_idx = 0;
+ priv->dai_links[*link_idx].cpus = &compnent[comp_idx++];
+@@ -820,6 +833,15 @@ static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx,
+ (*conf_idx)++;
+
+ return 0;
++
++
++put_codecb_node:
++ of_node_put(codecb_node);
++put_codeca_node:
++ of_node_put(codeca_node);
++put_dai_node:
++ of_node_put(dai_node);
++ return ret;
+ }
+
+ static int j721e_soc_probe(struct platform_device *pdev)
+diff --git a/sound/soc/uniphier/Kconfig b/sound/soc/uniphier/Kconfig
+index aa3592ee1358b..ddfa6424c656b 100644
+--- a/sound/soc/uniphier/Kconfig
++++ b/sound/soc/uniphier/Kconfig
+@@ -23,7 +23,6 @@ config SND_SOC_UNIPHIER_LD11
+ tristate "UniPhier LD11/LD20 Device Driver"
+ depends on SND_SOC_UNIPHIER
+ select SND_SOC_UNIPHIER_AIO
+- select SND_SOC_UNIPHIER_AIO_DMA
+ help
+ This adds ASoC driver for Socionext UniPhier LD11/LD20
+ input and output that can be used with other codecs.
+@@ -34,7 +33,6 @@ config SND_SOC_UNIPHIER_PXS2
+ tristate "UniPhier PXs2 Device Driver"
+ depends on SND_SOC_UNIPHIER
+ select SND_SOC_UNIPHIER_AIO
+- select SND_SOC_UNIPHIER_AIO_DMA
+ help
+ This adds ASoC driver for Socionext UniPhier PXs2
+ input and output that can be used with other codecs.
+diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c
+index 91afea9d5de67..5c4158069a5a8 100644
+--- a/sound/soc/xilinx/xlnx_formatter_pcm.c
++++ b/sound/soc/xilinx/xlnx_formatter_pcm.c
+@@ -37,6 +37,7 @@
+ #define XLNX_AUD_XFER_COUNT 0x28
+ #define XLNX_AUD_CH_STS_START 0x2C
+ #define XLNX_BYTES_PER_CH 0x44
++#define XLNX_AUD_ALIGN_BYTES 64
+
+ #define AUD_STS_IOC_IRQ_MASK BIT(31)
+ #define AUD_STS_CH_STS_MASK BIT(29)
+@@ -83,6 +84,7 @@ struct xlnx_pcm_drv_data {
+ struct snd_pcm_substream *play_stream;
+ struct snd_pcm_substream *capture_stream;
+ struct clk *axi_clk;
++ unsigned int sysclk;
+ };
+
+ /*
+@@ -313,6 +315,15 @@ static irqreturn_t xlnx_s2mm_irq_handler(int irq, void *arg)
+ return IRQ_NONE;
+ }
+
++static int xlnx_formatter_set_sysclk(struct snd_soc_component *component,
++ int clk_id, int source, unsigned int freq, int dir)
++{
++ struct xlnx_pcm_drv_data *adata = dev_get_drvdata(component->dev);
++
++ adata->sysclk = freq;
++ return 0;
++}
++
+ static int xlnx_formatter_pcm_open(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+ {
+@@ -368,12 +379,32 @@ static int xlnx_formatter_pcm_open(struct snd_soc_component *component,
+ snd_soc_set_runtime_hwparams(substream, &xlnx_pcm_hardware);
+ runtime->private_data = stream_data;
+
+- /* Resize the period size divisible by 64 */
++ /* Resize the period bytes as divisible by 64 */
++ err = snd_pcm_hw_constraint_step(runtime, 0,
++ SNDRV_PCM_HW_PARAM_PERIOD_BYTES,
++ XLNX_AUD_ALIGN_BYTES);
++ if (err) {
++ dev_err(component->dev,
++ "Unable to set constraint on period bytes\n");
++ return err;
++ }
++
++ /* Resize the buffer bytes as divisible by 64 */
+ err = snd_pcm_hw_constraint_step(runtime, 0,
+- SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 64);
++ SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
++ XLNX_AUD_ALIGN_BYTES);
+ if (err) {
+ dev_err(component->dev,
+- "unable to set constraint on period bytes\n");
++ "Unable to set constraint on buffer bytes\n");
++ return err;
++ }
++
++ /* Set periods as integer multiple */
++ err = snd_pcm_hw_constraint_integer(runtime,
++ SNDRV_PCM_HW_PARAM_PERIODS);
++ if (err < 0) {
++ dev_err(component->dev,
++ "Unable to set constraint on periods to be integer\n");
+ return err;
+ }
+
+@@ -429,11 +460,25 @@ static int xlnx_formatter_pcm_hw_params(struct snd_soc_component *component,
+ u64 size;
+ struct snd_pcm_runtime *runtime = substream->runtime;
+ struct xlnx_pcm_stream_param *stream_data = runtime->private_data;
++ struct xlnx_pcm_drv_data *adata = dev_get_drvdata(component->dev);
+
+ active_ch = params_channels(params);
+ if (active_ch > stream_data->ch_limit)
+ return -EINVAL;
+
++ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
++ adata->sysclk) {
++ unsigned int mclk_fs = adata->sysclk / params_rate(params);
++
++ if (adata->sysclk % params_rate(params) != 0) {
++ dev_warn(component->dev, "sysclk %u not divisible by rate %u\n",
++ adata->sysclk, params_rate(params));
++ return -EINVAL;
++ }
++
++ writel(mclk_fs, stream_data->mmio + XLNX_AUD_FS_MULTIPLIER);
++ }
++
+ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE &&
+ stream_data->xfer_mode == AES_TO_PCM) {
+ val = readl(stream_data->mmio + XLNX_AUD_STS);
+@@ -531,6 +576,7 @@ static int xlnx_formatter_pcm_new(struct snd_soc_component *component,
+
+ static const struct snd_soc_component_driver xlnx_asoc_component = {
+ .name = DRV_NAME,
++ .set_sysclk = xlnx_formatter_set_sysclk,
+ .open = xlnx_formatter_pcm_open,
+ .close = xlnx_formatter_pcm_close,
+ .hw_params = xlnx_formatter_pcm_hw_params,
+diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c
+index 76c0e37a838cf..8a2da6b1012eb 100644
+--- a/sound/spi/at73c213.c
++++ b/sound/spi/at73c213.c
+@@ -218,7 +218,9 @@ static int snd_at73c213_pcm_open(struct snd_pcm_substream *substream)
+ runtime->hw = snd_at73c213_playback_hw;
+ chip->substream = substream;
+
+- clk_enable(chip->ssc->clk);
++ err = clk_enable(chip->ssc->clk);
++ if (err)
++ return err;
+
+ return 0;
+ }
+@@ -776,7 +778,9 @@ static int snd_at73c213_chip_init(struct snd_at73c213 *chip)
+ goto out;
+
+ /* Enable DAC master clock. */
+- clk_enable(chip->board->dac_clk);
++ retval = clk_enable(chip->board->dac_clk);
++ if (retval)
++ goto out;
+
+ /* Initialize at73c213 on SPI bus. */
+ retval = snd_at73c213_write_reg(chip, DAC_RST, 0x04);
+@@ -889,7 +893,9 @@ static int snd_at73c213_dev_init(struct snd_card *card,
+ chip->card = card;
+ chip->irq = -1;
+
+- clk_enable(chip->ssc->clk);
++ retval = clk_enable(chip->ssc->clk);
++ if (retval)
++ return retval;
+
+ retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip);
+ if (retval) {
+@@ -1008,7 +1014,9 @@ static int snd_at73c213_remove(struct spi_device *spi)
+ int retval;
+
+ /* Stop playback. */
+- clk_enable(chip->ssc->clk);
++ retval = clk_enable(chip->ssc->clk);
++ if (retval)
++ goto out;
+ ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+ clk_disable(chip->ssc->clk);
+
+@@ -1088,9 +1096,16 @@ static int snd_at73c213_resume(struct device *dev)
+ {
+ struct snd_card *card = dev_get_drvdata(dev);
+ struct snd_at73c213 *chip = card->private_data;
++ int retval;
+
+- clk_enable(chip->board->dac_clk);
+- clk_enable(chip->ssc->clk);
++ retval = clk_enable(chip->board->dac_clk);
++ if (retval)
++ return retval;
++ retval = clk_enable(chip->ssc->clk);
++ if (retval) {
++ clk_disable(chip->board->dac_clk);
++ return retval;
++ }
+ ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN));
+
+ return 0;
+diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c
+index 49d1976a132c0..a870759d179ed 100644
+--- a/sound/synth/emux/emux.c
++++ b/sound/synth/emux/emux.c
+@@ -88,7 +88,7 @@ int snd_emux_register(struct snd_emux *emu, struct snd_card *card, int index, ch
+ emu->name = kstrdup(name, GFP_KERNEL);
+ emu->voices = kcalloc(emu->max_voices, sizeof(struct snd_emux_voice),
+ GFP_KERNEL);
+- if (emu->voices == NULL)
++ if (emu->name == NULL || emu->voices == NULL)
+ return -ENOMEM;
+
+ /* create soundfont list */
+@@ -126,15 +126,10 @@ EXPORT_SYMBOL(snd_emux_register);
+ */
+ int snd_emux_free(struct snd_emux *emu)
+ {
+- unsigned long flags;
+-
+ if (! emu)
+ return -EINVAL;
+
+- spin_lock_irqsave(&emu->voice_lock, flags);
+- if (emu->timer_active)
+- del_timer(&emu->tlist);
+- spin_unlock_irqrestore(&emu->voice_lock, flags);
++ del_timer_sync(&emu->tlist);
+
+ snd_emux_proc_free(emu);
+ snd_emux_delete_virmidi(emu);
+diff --git a/sound/synth/emux/emux_nrpn.c b/sound/synth/emux/emux_nrpn.c
+index 8056422ed7c51..0d6b82ae29558 100644
+--- a/sound/synth/emux/emux_nrpn.c
++++ b/sound/synth/emux/emux_nrpn.c
+@@ -349,6 +349,9 @@ int
+ snd_emux_xg_control(struct snd_emux_port *port, struct snd_midi_channel *chan,
+ int param)
+ {
++ if (param >= ARRAY_SIZE(chan->control))
++ return -EINVAL;
++
+ return send_converted_effect(xg_effects, ARRAY_SIZE(xg_effects),
+ port, chan, param,
+ chan->control[param],
+diff --git a/sound/usb/6fire/comm.c b/sound/usb/6fire/comm.c
+index 43a2a62d66f7e..49629d4bb327a 100644
+--- a/sound/usb/6fire/comm.c
++++ b/sound/usb/6fire/comm.c
+@@ -95,7 +95,7 @@ static int usb6fire_comm_send_buffer(u8 *buffer, struct usb_device *dev)
+ int actual_len;
+
+ ret = usb_interrupt_msg(dev, usb_sndintpipe(dev, COMM_EP),
+- buffer, buffer[1] + 2, &actual_len, HZ);
++ buffer, buffer[1] + 2, &actual_len, 1000);
+ if (ret < 0)
+ return ret;
+ else if (actual_len != buffer[1] + 2)
+diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c
+index 8981e61f2da4a..c51abc54d2f84 100644
+--- a/sound/usb/6fire/firmware.c
++++ b/sound/usb/6fire/firmware.c
+@@ -160,7 +160,7 @@ static int usb6fire_fw_ezusb_write(struct usb_device *device,
+ {
+ return usb_control_msg_send(device, 0, type,
+ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+- value, 0, data, len, HZ, GFP_KERNEL);
++ value, 0, data, len, 1000, GFP_KERNEL);
+ }
+
+ static int usb6fire_fw_ezusb_read(struct usb_device *device,
+@@ -168,7 +168,7 @@ static int usb6fire_fw_ezusb_read(struct usb_device *device,
+ {
+ return usb_control_msg_recv(device, 0, type,
+ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+- value, 0, data, len, HZ, GFP_KERNEL);
++ value, 0, data, len, 1000, GFP_KERNEL);
+ }
+
+ static int usb6fire_fw_fpga_write(struct usb_device *device,
+@@ -178,7 +178,7 @@ static int usb6fire_fw_fpga_write(struct usb_device *device,
+ int ret;
+
+ ret = usb_bulk_msg(device, usb_sndbulkpipe(device, FPGA_EP), data, len,
+- &actual_len, HZ);
++ &actual_len, 1000);
+ if (ret < 0)
+ return ret;
+ else if (actual_len != len)
+diff --git a/sound/usb/bcd2000/bcd2000.c b/sound/usb/bcd2000/bcd2000.c
+index cd4a0bc6d278f..7aec0a95c609a 100644
+--- a/sound/usb/bcd2000/bcd2000.c
++++ b/sound/usb/bcd2000/bcd2000.c
+@@ -348,7 +348,8 @@ static int bcd2000_init_midi(struct bcd2000 *bcd2k)
+ static void bcd2000_free_usb_related_resources(struct bcd2000 *bcd2k,
+ struct usb_interface *interface)
+ {
+- /* usb_kill_urb not necessary, urb is aborted automatically */
++ usb_kill_urb(bcd2k->midi_out_urb);
++ usb_kill_urb(bcd2k->midi_in_urb);
+
+ usb_free_urb(bcd2k->midi_out_urb);
+ usb_free_urb(bcd2k->midi_in_urb);
+diff --git a/sound/usb/caiaq/input.c b/sound/usb/caiaq/input.c
+index 1e2cf2f08eecd..84f26dce7f5d0 100644
+--- a/sound/usb/caiaq/input.c
++++ b/sound/usb/caiaq/input.c
+@@ -804,6 +804,7 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *cdev)
+
+ default:
+ /* no input methods supported on this device */
++ ret = -EINVAL;
+ goto exit_free_idev;
+ }
+
+diff --git a/sound/usb/card.c b/sound/usb/card.c
+index 1764b9302d467..550c6a72fb5bc 100644
+--- a/sound/usb/card.c
++++ b/sound/usb/card.c
+@@ -387,6 +387,14 @@ static const struct usb_audio_device_name usb_audio_names[] = {
+ DEVICE_NAME(0x05e1, 0x0408, "Syntek", "STK1160"),
+ DEVICE_NAME(0x05e1, 0x0480, "Hauppauge", "Woodbury"),
+
++ /* ASUS ROG Zenith II: this machine has also two devices, one for
++ * the front headphone and another for the rest
++ */
++ PROFILE_NAME(0x0b05, 0x1915, "ASUS", "Zenith II Front Headphone",
++ "Zenith-II-Front-Headphone"),
++ PROFILE_NAME(0x0b05, 0x1916, "ASUS", "Zenith II Main Audio",
++ "Zenith-II-Main-Audio"),
++
+ /* ASUS ROG Strix */
+ PROFILE_NAME(0x0b05, 0x1917,
+ "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),
+@@ -681,7 +689,7 @@ static bool get_alias_id(struct usb_device *dev, unsigned int *id)
+ return false;
+ }
+
+-static bool check_delayed_register_option(struct snd_usb_audio *chip, int iface)
++static int check_delayed_register_option(struct snd_usb_audio *chip)
+ {
+ int i;
+ unsigned int id, inum;
+@@ -690,14 +698,31 @@ static bool check_delayed_register_option(struct snd_usb_audio *chip, int iface)
+ if (delayed_register[i] &&
+ sscanf(delayed_register[i], "%x:%x", &id, &inum) == 2 &&
+ id == chip->usb_id)
+- return inum != iface;
++ return inum;
+ }
+
+- return false;
++ return -1;
+ }
+
+ static const struct usb_device_id usb_audio_ids[]; /* defined below */
+
++/* look for the last interface that matches with our ids and remember it */
++static void find_last_interface(struct snd_usb_audio *chip)
++{
++ struct usb_host_config *config = chip->dev->actconfig;
++ struct usb_interface *intf;
++ int i;
++
++ if (!config)
++ return;
++ for (i = 0; i < config->desc.bNumInterfaces; i++) {
++ intf = config->interface[i];
++ if (usb_match_id(intf, usb_audio_ids))
++ chip->last_iface = intf->altsetting[0].desc.bInterfaceNumber;
++ }
++ usb_audio_dbg(chip, "Found last interface = %d\n", chip->last_iface);
++}
++
+ /* look for the corresponding quirk */
+ static const struct snd_usb_audio_quirk *
+ get_alias_quirk(struct usb_device *dev, unsigned int id)
+@@ -716,6 +741,18 @@ get_alias_quirk(struct usb_device *dev, unsigned int id)
+ return NULL;
+ }
+
++/* register card if we reach to the last interface or to the specified
++ * one given via option
++ */
++static int try_to_register_card(struct snd_usb_audio *chip, int ifnum)
++{
++ if (check_delayed_register_option(chip) == ifnum ||
++ chip->last_iface == ifnum ||
++ usb_interface_claimed(usb_ifnum_to_if(chip->dev, chip->last_iface)))
++ return snd_card_register(chip->card);
++ return 0;
++}
++
+ /*
+ * probe the active usb device
+ *
+@@ -804,6 +841,7 @@ static int usb_audio_probe(struct usb_interface *intf,
+ err = -ENODEV;
+ goto __error;
+ }
++ find_last_interface(chip);
+ }
+
+ if (chip->num_interfaces >= MAX_CARD_INTERFACES) {
+@@ -853,15 +891,9 @@ static int usb_audio_probe(struct usb_interface *intf,
+ chip->need_delayed_register = false; /* clear again */
+ }
+
+- /* we are allowed to call snd_card_register() many times, but first
+- * check to see if a device needs to skip it or do anything special
+- */
+- if (!snd_usb_registration_quirk(chip, ifnum) &&
+- !check_delayed_register_option(chip, ifnum)) {
+- err = snd_card_register(chip->card);
+- if (err < 0)
+- goto __error;
+- }
++ err = try_to_register_card(chip, ifnum);
++ if (err < 0)
++ goto __error_no_register;
+
+ if (chip->quirk_flags & QUIRK_FLAG_SHARE_MEDIA_DEVICE) {
+ /* don't want to fail when snd_media_device_create() fails */
+@@ -880,6 +912,11 @@ static int usb_audio_probe(struct usb_interface *intf,
+ return 0;
+
+ __error:
++ /* in the case of error in secondary interface, still try to register */
++ if (chip)
++ try_to_register_card(chip, ifnum);
++
++ __error_no_register:
+ if (chip) {
+ /* chip->active is inside the chip->card object,
+ * decrement before memory is possibly returned.
+diff --git a/sound/usb/card.h b/sound/usb/card.h
+index 5b19901f305a3..87f042d06ce08 100644
+--- a/sound/usb/card.h
++++ b/sound/usb/card.h
+@@ -74,8 +74,9 @@ struct snd_usb_endpoint {
+
+ atomic_t state; /* running state */
+
+- void (*prepare_data_urb) (struct snd_usb_substream *subs,
+- struct urb *urb);
++ int (*prepare_data_urb) (struct snd_usb_substream *subs,
++ struct urb *urb,
++ bool in_stream_lock);
+ void (*retire_data_urb) (struct snd_usb_substream *subs,
+ struct urb *urb);
+
+@@ -94,9 +95,9 @@ struct snd_usb_endpoint {
+ struct list_head ready_playback_urbs; /* playback URB FIFO for implicit fb */
+
+ unsigned int nurbs; /* # urbs */
+- unsigned int nominal_queue_size; /* total buffer sizes in URBs */
+ unsigned long active_mask; /* bitmask of active urbs */
+ unsigned long unlink_mask; /* bitmask of unlinked urbs */
++ atomic_t submitted_urbs; /* currently submitted urbs */
+ char *syncbuf; /* sync buffer for all sync URBs */
+ dma_addr_t sync_dma; /* DMA address of syncbuf */
+
+@@ -125,6 +126,7 @@ struct snd_usb_endpoint {
+ int skip_packets; /* quirks for devices to ignore the first n packets
+ in a stream */
+ bool implicit_fb_sync; /* syncs with implicit feedback */
++ bool lowlatency_playback; /* low-latency playback mode */
+ bool need_setup; /* (re-)need for configure? */
+
+ /* for hw constraints */
+@@ -136,6 +138,7 @@ struct snd_usb_endpoint {
+ unsigned int cur_period_frames;
+ unsigned int cur_period_bytes;
+ unsigned int cur_buffer_periods;
++ unsigned char cur_clock;
+
+ spinlock_t lock;
+ struct list_head list;
+@@ -188,7 +191,7 @@ struct snd_usb_substream {
+ } dsd_dop;
+
+ bool trigger_tstamp_pending_update; /* trigger timestamp being updated from initial estimate */
+- bool early_playback_start; /* early start needed for playback? */
++ bool lowlatency_playback; /* low-latency playback mode */
+ struct media_ctl *media_ctl;
+ };
+
+diff --git a/sound/usb/clock.c b/sound/usb/clock.c
+index 81d5ce07d548b..ccca9efa7d33f 100644
+--- a/sound/usb/clock.c
++++ b/sound/usb/clock.c
+@@ -496,6 +496,10 @@ int snd_usb_set_sample_rate_v2v3(struct snd_usb_audio *chip,
+ union uac23_clock_source_desc *cs_desc;
+
+ cs_desc = snd_usb_find_clock_source(chip, clock, fmt->protocol);
++
++ if (!cs_desc)
++ return 0;
++
+ if (fmt->protocol == UAC_VERSION_3)
+ bmControls = le32_to_cpu(cs_desc->v3.bmControls);
+ else
+@@ -568,6 +572,17 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip,
+ /* continue processing */
+ }
+
++ /* FIXME - TEAC devices require the immediate interface setup */
++ if (USB_ID_VENDOR(chip->usb_id) == 0x0644) {
++ bool cur_base_48k = (rate % 48000 == 0);
++ bool prev_base_48k = (prev_rate % 48000 == 0);
++ if (cur_base_48k != prev_base_48k) {
++ usb_set_interface(chip->dev, fmt->iface, fmt->altsetting);
++ if (chip->quirk_flags & QUIRK_FLAG_IFACE_DELAY)
++ msleep(50);
++ }
++ }
++
+ validation:
+ /* validate clock after rate change */
+ if (!uac_clock_source_is_valid(chip, fmt, clock))
+diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
+index 533919a28856f..6c7d842d04965 100644
+--- a/sound/usb/endpoint.c
++++ b/sound/usb/endpoint.c
+@@ -85,12 +85,13 @@ static inline unsigned get_usb_high_speed_rate(unsigned int rate)
+ */
+ static void release_urb_ctx(struct snd_urb_ctx *u)
+ {
+- if (u->buffer_size)
++ if (u->urb && u->buffer_size)
+ usb_free_coherent(u->ep->chip->dev, u->buffer_size,
+ u->urb->transfer_buffer,
+ u->urb->transfer_dma);
+ usb_free_urb(u->urb);
+ u->urb = NULL;
++ u->buffer_size = 0;
+ }
+
+ static const char *usb_error_string(int err)
+@@ -148,18 +149,23 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep)
+ * This won't be used for implicit feedback which takes the packet size
+ * returned from the sync source
+ */
+-static int slave_next_packet_size(struct snd_usb_endpoint *ep)
++static int slave_next_packet_size(struct snd_usb_endpoint *ep,
++ unsigned int avail)
+ {
+ unsigned long flags;
++ unsigned int phase;
+ int ret;
+
+ if (ep->fill_max)
+ return ep->maxframesize;
+
+ spin_lock_irqsave(&ep->lock, flags);
+- ep->phase = (ep->phase & 0xffff)
+- + (ep->freqm << ep->datainterval);
+- ret = min(ep->phase >> 16, ep->maxframesize);
++ phase = (ep->phase & 0xffff) + (ep->freqm << ep->datainterval);
++ ret = min(phase >> 16, ep->maxframesize);
++ if (avail && ret >= avail)
++ ret = -EAGAIN;
++ else
++ ep->phase = phase;
+ spin_unlock_irqrestore(&ep->lock, flags);
+
+ return ret;
+@@ -169,20 +175,25 @@ static int slave_next_packet_size(struct snd_usb_endpoint *ep)
+ * Return the number of samples to be sent in the next packet
+ * for adaptive and synchronous endpoints
+ */
+-static int next_packet_size(struct snd_usb_endpoint *ep)
++static int next_packet_size(struct snd_usb_endpoint *ep, unsigned int avail)
+ {
++ unsigned int sample_accum;
+ int ret;
+
+ if (ep->fill_max)
+ return ep->maxframesize;
+
+- ep->sample_accum += ep->sample_rem;
+- if (ep->sample_accum >= ep->pps) {
+- ep->sample_accum -= ep->pps;
++ sample_accum = ep->sample_accum + ep->sample_rem;
++ if (sample_accum >= ep->pps) {
++ sample_accum -= ep->pps;
+ ret = ep->packsize[1];
+ } else {
+ ret = ep->packsize[0];
+ }
++ if (avail && ret >= avail)
++ ret = -EAGAIN;
++ else
++ ep->sample_accum = sample_accum;
+
+ return ret;
+ }
+@@ -190,16 +201,27 @@ static int next_packet_size(struct snd_usb_endpoint *ep)
+ /*
+ * snd_usb_endpoint_next_packet_size: Return the number of samples to be sent
+ * in the next packet
++ *
++ * If the size is equal or exceeds @avail, don't proceed but return -EAGAIN
++ * Exception: @avail = 0 for skipping the check.
+ */
+ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep,
+- struct snd_urb_ctx *ctx, int idx)
++ struct snd_urb_ctx *ctx, int idx,
++ unsigned int avail)
+ {
+- if (ctx->packet_size[idx])
+- return ctx->packet_size[idx];
+- else if (ep->sync_source)
+- return slave_next_packet_size(ep);
++ unsigned int packet;
++
++ packet = ctx->packet_size[idx];
++ if (packet) {
++ if (avail && packet >= avail)
++ return -EAGAIN;
++ return packet;
++ }
++
++ if (ep->sync_source)
++ return slave_next_packet_size(ep, avail);
+ else
+- return next_packet_size(ep);
++ return next_packet_size(ep, avail);
+ }
+
+ static void call_retire_callback(struct snd_usb_endpoint *ep,
+@@ -263,7 +285,7 @@ static void prepare_silent_urb(struct snd_usb_endpoint *ep,
+ unsigned int length;
+ int counts;
+
+- counts = snd_usb_endpoint_next_packet_size(ep, ctx, i);
++ counts = snd_usb_endpoint_next_packet_size(ep, ctx, i, 0);
+ length = counts * ep->stride; /* number of silent bytes */
+ offset = offs * ep->stride + extra * i;
+ urb->iso_frame_desc[i].offset = offset;
+@@ -286,8 +308,9 @@ static void prepare_silent_urb(struct snd_usb_endpoint *ep,
+ /*
+ * Prepare a PLAYBACK urb for submission to the bus.
+ */
+-static void prepare_outbound_urb(struct snd_usb_endpoint *ep,
+- struct snd_urb_ctx *ctx)
++static int prepare_outbound_urb(struct snd_usb_endpoint *ep,
++ struct snd_urb_ctx *ctx,
++ bool in_stream_lock)
+ {
+ struct urb *urb = ctx->urb;
+ unsigned char *cp = urb->transfer_buffer;
+@@ -299,9 +322,9 @@ static void prepare_outbound_urb(struct snd_usb_endpoint *ep,
+ case SND_USB_ENDPOINT_TYPE_DATA:
+ data_subs = READ_ONCE(ep->data_subs);
+ if (data_subs && ep->prepare_data_urb)
+- ep->prepare_data_urb(data_subs, urb);
+- else /* no data provider, so send silence */
+- prepare_silent_urb(ep, ctx);
++ return ep->prepare_data_urb(data_subs, urb, in_stream_lock);
++ /* no data provider, so send silence */
++ prepare_silent_urb(ep, ctx);
+ break;
+
+ case SND_USB_ENDPOINT_TYPE_SYNC:
+@@ -330,13 +353,14 @@ static void prepare_outbound_urb(struct snd_usb_endpoint *ep,
+
+ break;
+ }
++ return 0;
+ }
+
+ /*
+ * Prepare a CAPTURE or SYNC urb for submission to the bus.
+ */
+-static inline void prepare_inbound_urb(struct snd_usb_endpoint *ep,
+- struct snd_urb_ctx *urb_ctx)
++static int prepare_inbound_urb(struct snd_usb_endpoint *ep,
++ struct snd_urb_ctx *urb_ctx)
+ {
+ int i, offs;
+ struct urb *urb = urb_ctx->urb;
+@@ -361,6 +385,7 @@ static inline void prepare_inbound_urb(struct snd_usb_endpoint *ep,
+ urb->iso_frame_desc[0].offset = 0;
+ break;
+ }
++ return 0;
+ }
+
+ /* notify an error as XRUN to the assigned PCM data substream */
+@@ -396,6 +421,16 @@ next_packet_fifo_dequeue(struct snd_usb_endpoint *ep)
+ return p;
+ }
+
++static void push_back_to_ready_list(struct snd_usb_endpoint *ep,
++ struct snd_urb_ctx *ctx)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&ep->lock, flags);
++ list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs);
++ spin_unlock_irqrestore(&ep->lock, flags);
++}
++
+ /*
+ * Send output urbs that have been prepared previously. URBs are dequeued
+ * from ep->ready_playback_urbs and in case there aren't any available
+@@ -406,12 +441,14 @@ next_packet_fifo_dequeue(struct snd_usb_endpoint *ep)
+ * is that host controllers don't guarantee the order in which they return
+ * inbound and outbound packets to their submitters.
+ *
+- * This function is only used for implicit feedback endpoints. For endpoints
+- * driven by dedicated sync endpoints, URBs are immediately re-submitted
+- * from their completion handler.
++ * This function is used both for implicit feedback endpoints and in low-
++ * latency playback mode.
+ */
+-static void queue_pending_output_urbs(struct snd_usb_endpoint *ep)
++int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
++ bool in_stream_lock)
+ {
++ bool implicit_fb = snd_usb_endpoint_implicit_feedback_sink(ep);
++
+ while (ep_state_running(ep)) {
+
+ unsigned long flags;
+@@ -420,38 +457,58 @@ static void queue_pending_output_urbs(struct snd_usb_endpoint *ep)
+ int err, i;
+
+ spin_lock_irqsave(&ep->lock, flags);
+- if (ep->next_packet_queued > 0 &&
++ if ((!implicit_fb || ep->next_packet_queued > 0) &&
+ !list_empty(&ep->ready_playback_urbs)) {
+ /* take URB out of FIFO */
+ ctx = list_first_entry(&ep->ready_playback_urbs,
+ struct snd_urb_ctx, ready_list);
+ list_del_init(&ctx->ready_list);
+-
+- packet = next_packet_fifo_dequeue(ep);
++ if (implicit_fb)
++ packet = next_packet_fifo_dequeue(ep);
+ }
+ spin_unlock_irqrestore(&ep->lock, flags);
+
+ if (ctx == NULL)
+- return;
++ break;
+
+ /* copy over the length information */
+- for (i = 0; i < packet->packets; i++)
+- ctx->packet_size[i] = packet->packet_size[i];
++ if (implicit_fb) {
++ for (i = 0; i < packet->packets; i++)
++ ctx->packet_size[i] = packet->packet_size[i];
++ }
+
+ /* call the data handler to fill in playback data */
+- prepare_outbound_urb(ep, ctx);
++ err = prepare_outbound_urb(ep, ctx, in_stream_lock);
++ /* can be stopped during prepare callback */
++ if (unlikely(!ep_state_running(ep)))
++ break;
++ if (err < 0) {
++ /* push back to ready list again for -EAGAIN */
++ if (err == -EAGAIN) {
++ push_back_to_ready_list(ep, ctx);
++ break;
++ }
++
++ if (!in_stream_lock)
++ notify_xrun(ep);
++ return -EPIPE;
++ }
+
+ err = usb_submit_urb(ctx->urb, GFP_ATOMIC);
+ if (err < 0) {
+ usb_audio_err(ep->chip,
+ "Unable to submit urb #%d: %d at %s\n",
+ ctx->index, err, __func__);
+- notify_xrun(ep);
+- return;
++ if (!in_stream_lock)
++ notify_xrun(ep);
++ return -EPIPE;
+ }
+
+ set_bit(ctx->index, &ep->active_mask);
++ atomic_inc(&ep->submitted_urbs);
+ }
++
++ return 0;
+ }
+
+ /*
+@@ -461,7 +518,6 @@ static void snd_complete_urb(struct urb *urb)
+ {
+ struct snd_urb_ctx *ctx = urb->context;
+ struct snd_usb_endpoint *ep = ctx->ep;
+- unsigned long flags;
+ int err;
+
+ if (unlikely(urb->status == -ENOENT || /* unlinked */
+@@ -482,16 +538,20 @@ static void snd_complete_urb(struct urb *urb)
+ if (unlikely(!ep_state_running(ep)))
+ goto exit_clear;
+
+- if (snd_usb_endpoint_implicit_feedback_sink(ep)) {
+- spin_lock_irqsave(&ep->lock, flags);
+- list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs);
++ /* in low-latency and implicit-feedback modes, push back the
++ * URB to ready list at first, then process as much as possible
++ */
++ if (ep->lowlatency_playback ||
++ snd_usb_endpoint_implicit_feedback_sink(ep)) {
++ push_back_to_ready_list(ep, ctx);
+ clear_bit(ctx->index, &ep->active_mask);
+- spin_unlock_irqrestore(&ep->lock, flags);
+- queue_pending_output_urbs(ep);
++ snd_usb_queue_pending_output_urbs(ep, false);
++ atomic_dec(&ep->submitted_urbs); /* decrement at last */
+ return;
+ }
+
+- prepare_outbound_urb(ep, ctx);
++ /* in non-lowlatency mode, no error handling for prepare */
++ prepare_outbound_urb(ep, ctx, false);
+ /* can be stopped during prepare callback */
+ if (unlikely(!ep_state_running(ep)))
+ goto exit_clear;
+@@ -513,6 +573,7 @@ static void snd_complete_urb(struct urb *urb)
+
+ exit_clear:
+ clear_bit(ctx->index, &ep->active_mask);
++ atomic_dec(&ep->submitted_urbs);
+ }
+
+ /*
+@@ -596,6 +657,7 @@ int snd_usb_add_endpoint(struct snd_usb_audio *chip, int ep_num, int type)
+ ep->type = type;
+ ep->ep_num = ep_num;
+ INIT_LIST_HEAD(&ep->ready_playback_urbs);
++ atomic_set(&ep->submitted_urbs, 0);
+
+ is_playback = ((ep_num & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT);
+ ep_num &= USB_ENDPOINT_NUMBER_MASK;
+@@ -722,6 +784,7 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip,
+ ep->cur_period_frames = params_period_size(params);
+ ep->cur_period_bytes = ep->cur_period_frames * ep->cur_frame_bytes;
+ ep->cur_buffer_periods = params_periods(params);
++ ep->cur_clock = fp->clock;
+
+ if (ep->type == SND_USB_ENDPOINT_TYPE_SYNC)
+ endpoint_set_syncinterval(chip, ep);
+@@ -781,14 +844,19 @@ void snd_usb_endpoint_set_sync(struct snd_usb_audio *chip,
+ * Pass NULL to deactivate each callback.
+ */
+ void snd_usb_endpoint_set_callback(struct snd_usb_endpoint *ep,
+- void (*prepare)(struct snd_usb_substream *subs,
+- struct urb *urb),
++ int (*prepare)(struct snd_usb_substream *subs,
++ struct urb *urb,
++ bool in_stream_lock),
+ void (*retire)(struct snd_usb_substream *subs,
+ struct urb *urb),
+ struct snd_usb_substream *data_subs)
+ {
+ ep->prepare_data_urb = prepare;
+ ep->retire_data_urb = retire;
++ if (data_subs)
++ ep->lowlatency_playback = data_subs->lowlatency_playback;
++ else
++ ep->lowlatency_playback = false;
+ WRITE_ONCE(ep->data_subs, data_subs);
+ }
+
+@@ -825,7 +893,8 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip,
+ usb_audio_dbg(chip, "Closing EP 0x%x (count %d)\n",
+ ep->ep_num, ep->opened);
+
+- if (!--ep->iface_ref->opened)
++ if (!--ep->iface_ref->opened &&
++ !(chip->quirk_flags & QUIRK_FLAG_IFACE_SKIP_CLOSE))
+ endpoint_set_interface(chip, ep, false);
+
+ if (!--ep->opened) {
+@@ -833,6 +902,7 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip,
+ ep->altsetting = 0;
+ ep->cur_audiofmt = NULL;
+ ep->cur_rate = 0;
++ ep->cur_clock = 0;
+ ep->iface_ref = NULL;
+ usb_audio_dbg(chip, "EP 0x%x closed\n", ep->ep_num);
+ }
+@@ -859,7 +929,7 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep)
+ return 0;
+
+ do {
+- alive = bitmap_weight(&ep->active_mask, ep->nurbs);
++ alive = atomic_read(&ep->submitted_urbs);
+ if (!alive)
+ break;
+
+@@ -893,9 +963,10 @@ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep)
+ *
+ * This function moves the EP to STOPPING state if it's being RUNNING.
+ */
+-static int stop_urbs(struct snd_usb_endpoint *ep, bool force)
++static int stop_urbs(struct snd_usb_endpoint *ep, bool force, bool keep_pending)
+ {
+ unsigned int i;
++ unsigned long flags;
+
+ if (!force && atomic_read(&ep->running))
+ return -EBUSY;
+@@ -903,9 +974,14 @@ static int stop_urbs(struct snd_usb_endpoint *ep, bool force)
+ if (!ep_state_update(ep, EP_STATE_RUNNING, EP_STATE_STOPPING))
+ return 0;
+
++ spin_lock_irqsave(&ep->lock, flags);
+ INIT_LIST_HEAD(&ep->ready_playback_urbs);
+ ep->next_packet_head = 0;
+ ep->next_packet_queued = 0;
++ spin_unlock_irqrestore(&ep->lock, flags);
++
++ if (keep_pending)
++ return 0;
+
+ for (i = 0; i < ep->nurbs; i++) {
+ if (test_bit(i, &ep->active_mask)) {
+@@ -930,7 +1006,7 @@ static int release_urbs(struct snd_usb_endpoint *ep, bool force)
+ snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL);
+
+ /* stop and unlink urbs */
+- err = stop_urbs(ep, force);
++ err = stop_urbs(ep, force, false);
+ if (err)
+ return err;
+
+@@ -1132,10 +1208,6 @@ static int data_ep_set_params(struct snd_usb_endpoint *ep)
+ INIT_LIST_HEAD(&u->ready_list);
+ }
+
+- /* total buffer bytes of all URBs plus the next queue;
+- * referred in pcm.c
+- */
+- ep->nominal_queue_size = maxsize * urb_packs * (ep->nurbs + 1);
+ return 0;
+
+ out_of_memory:
+@@ -1159,6 +1231,7 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep)
+ if (!ep->syncbuf)
+ return -ENOMEM;
+
++ ep->nurbs = SYNC_URBS;
+ for (i = 0; i < SYNC_URBS; i++) {
+ struct snd_urb_ctx *u = &ep->urb[i];
+ u->index = i;
+@@ -1178,8 +1251,6 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep)
+ u->urb->complete = snd_complete_urb;
+ }
+
+- ep->nurbs = SYNC_URBS;
+-
+ return 0;
+
+ out_of_memory:
+@@ -1340,6 +1411,25 @@ unlock:
+ return err;
+ }
+
++/* get the current rate set to the given clock by any endpoint */
++int snd_usb_endpoint_get_clock_rate(struct snd_usb_audio *chip, int clock)
++{
++ struct snd_usb_endpoint *ep;
++ int rate = 0;
++
++ if (!clock)
++ return 0;
++ mutex_lock(&chip->mutex);
++ list_for_each_entry(ep, &chip->ep_list, list) {
++ if (ep->cur_clock == clock && ep->cur_rate) {
++ rate = ep->cur_rate;
++ break;
++ }
++ }
++ mutex_unlock(&chip->mutex);
++ return rate;
++}
++
+ /**
+ * snd_usb_endpoint_start: start an snd_usb_endpoint
+ *
+@@ -1355,6 +1445,7 @@ unlock:
+ */
+ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
+ {
++ bool is_playback = usb_pipeout(ep->pipe);
+ int err;
+ unsigned int i;
+
+@@ -1391,13 +1482,9 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
+
+ if (snd_usb_endpoint_implicit_feedback_sink(ep) &&
+ !(ep->chip->quirk_flags & QUIRK_FLAG_PLAYBACK_FIRST)) {
+- for (i = 0; i < ep->nurbs; i++) {
+- struct snd_urb_ctx *ctx = ep->urb + i;
+- list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs);
+- }
+-
+ usb_audio_dbg(ep->chip, "No URB submission due to implicit fb sync\n");
+- return 0;
++ i = 0;
++ goto fill_rest;
+ }
+
+ for (i = 0; i < ep->nurbs; i++) {
+@@ -1406,10 +1493,18 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
+ if (snd_BUG_ON(!urb))
+ goto __error;
+
+- if (usb_pipeout(ep->pipe)) {
+- prepare_outbound_urb(ep, urb->context);
+- } else {
+- prepare_inbound_urb(ep, urb->context);
++ if (is_playback)
++ err = prepare_outbound_urb(ep, urb->context, true);
++ else
++ err = prepare_inbound_urb(ep, urb->context);
++ if (err < 0) {
++ /* stop filling at applptr */
++ if (err == -EAGAIN)
++ break;
++ usb_audio_dbg(ep->chip,
++ "EP 0x%x: failed to prepare urb: %d\n",
++ ep->ep_num, err);
++ goto __error;
+ }
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+@@ -1420,14 +1515,29 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
+ goto __error;
+ }
+ set_bit(i, &ep->active_mask);
++ atomic_inc(&ep->submitted_urbs);
++ }
++
++ if (!i) {
++ usb_audio_dbg(ep->chip, "XRUN at starting EP 0x%x\n",
++ ep->ep_num);
++ goto __error;
+ }
+
+ usb_audio_dbg(ep->chip, "%d URBs submitted for EP 0x%x\n",
+- ep->nurbs, ep->ep_num);
++ i, ep->ep_num);
++
++ fill_rest:
++ /* put the remaining URBs to ready list */
++ if (is_playback) {
++ for (; i < ep->nurbs; i++)
++ push_back_to_ready_list(ep, ep->urb + i);
++ }
++
+ return 0;
+
+ __error:
+- snd_usb_endpoint_stop(ep);
++ snd_usb_endpoint_stop(ep, false);
+ return -EPIPE;
+ }
+
+@@ -1435,6 +1545,7 @@ __error:
+ * snd_usb_endpoint_stop: stop an snd_usb_endpoint
+ *
+ * @ep: the endpoint to stop (may be NULL)
++ * @keep_pending: keep in-flight URBs
+ *
+ * A call to this function will decrement the running count of the endpoint.
+ * In case the last user has requested the endpoint stop, the URBs will
+@@ -1445,7 +1556,7 @@ __error:
+ * The caller needs to synchronize the pending stop operation via
+ * snd_usb_endpoint_sync_pending_stop().
+ */
+-void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep)
++void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep, bool keep_pending)
+ {
+ if (!ep)
+ return;
+@@ -1460,7 +1571,7 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep)
+ if (!atomic_dec_return(&ep->running)) {
+ if (ep->sync_source)
+ WRITE_ONCE(ep->sync_source->sync_sink, NULL);
+- stop_urbs(ep, false);
++ stop_urbs(ep, false, keep_pending);
+ }
+ }
+
+@@ -1575,7 +1686,7 @@ static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep,
+ }
+
+ spin_unlock_irqrestore(&ep->lock, flags);
+- queue_pending_output_urbs(ep);
++ snd_usb_queue_pending_output_urbs(ep, false);
+
+ return;
+ }
+diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h
+index a668f675b52b0..daa7ba063d858 100644
+--- a/sound/usb/endpoint.h
++++ b/sound/usb/endpoint.h
+@@ -19,6 +19,7 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip,
+ struct snd_usb_endpoint *ep);
+ int snd_usb_endpoint_configure(struct snd_usb_audio *chip,
+ struct snd_usb_endpoint *ep);
++int snd_usb_endpoint_get_clock_rate(struct snd_usb_audio *chip, int clock);
+
+ bool snd_usb_endpoint_compatible(struct snd_usb_audio *chip,
+ struct snd_usb_endpoint *ep,
+@@ -29,14 +30,15 @@ void snd_usb_endpoint_set_sync(struct snd_usb_audio *chip,
+ struct snd_usb_endpoint *data_ep,
+ struct snd_usb_endpoint *sync_ep);
+ void snd_usb_endpoint_set_callback(struct snd_usb_endpoint *ep,
+- void (*prepare)(struct snd_usb_substream *subs,
+- struct urb *urb),
++ int (*prepare)(struct snd_usb_substream *subs,
++ struct urb *urb,
++ bool in_stream_lock),
+ void (*retire)(struct snd_usb_substream *subs,
+ struct urb *urb),
+ struct snd_usb_substream *data_subs);
+
+ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep);
+-void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep);
++void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep, bool keep_pending);
+ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep);
+ void snd_usb_endpoint_suspend(struct snd_usb_endpoint *ep);
+ int snd_usb_endpoint_activate(struct snd_usb_endpoint *ep);
+@@ -45,6 +47,9 @@ void snd_usb_endpoint_free_all(struct snd_usb_audio *chip);
+
+ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep);
+ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep,
+- struct snd_urb_ctx *ctx, int idx);
++ struct snd_urb_ctx *ctx, int idx,
++ unsigned int avail);
++int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
++ bool in_stream_lock);
+
+ #endif /* __USBAUDIO_ENDPOINT_H */
+diff --git a/sound/usb/format.c b/sound/usb/format.c
+index 50efccbffb8a7..ab5fed9f55b60 100644
+--- a/sound/usb/format.c
++++ b/sound/usb/format.c
+@@ -39,8 +39,12 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip,
+ case UAC_VERSION_1:
+ default: {
+ struct uac_format_type_i_discrete_descriptor *fmt = _fmt;
+- if (format >= 64)
+- return 0; /* invalid format */
++ if (format >= 64) {
++ usb_audio_info(chip,
++ "%u:%d: invalid format type 0x%llx is detected, processed as PCM\n",
++ fp->iface, fp->altsetting, format);
++ format = UAC_FORMAT_TYPE_I_PCM;
++ }
+ sample_width = fmt->bBitResolution;
+ sample_bytes = fmt->bSubframeSize;
+ format = 1ULL << format;
+@@ -375,7 +379,7 @@ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip,
+ for (rate = min; rate <= max; rate += res) {
+
+ /* Filter out invalid rates on Presonus Studio 1810c */
+- if (chip->usb_id == USB_ID(0x0194f, 0x010c) &&
++ if (chip->usb_id == USB_ID(0x194f, 0x010c) &&
+ !s1810c_valid_sample_rate(fp, rate))
+ goto skip_rate;
+
+@@ -414,10 +418,12 @@ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip,
+ case USB_ID(0x0e41, 0x4242): /* Line6 Helix Rack */
+ case USB_ID(0x0e41, 0x4244): /* Line6 Helix LT */
+ case USB_ID(0x0e41, 0x4246): /* Line6 HX-Stomp */
++ case USB_ID(0x0e41, 0x4253): /* Line6 HX-Stomp XL */
+ case USB_ID(0x0e41, 0x4247): /* Line6 Pod Go */
+ case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */
+ case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */
+ case USB_ID(0x0e41, 0x424a): /* Line6 Helix LT >= fw 2.82 */
++ case USB_ID(0x0e41, 0x424b): /* Line6 Pod Go */
+ case USB_ID(0x19f7, 0x0011): /* Rode Rodecaster Pro */
+ return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000);
+ }
+diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c
+index 23767a14d1266..f3e8484b3d9cb 100644
+--- a/sound/usb/implicit.c
++++ b/sound/usb/implicit.c
+@@ -45,17 +45,12 @@ struct snd_usb_implicit_fb_match {
+
+ /* Implicit feedback quirk table for playback */
+ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = {
+- /* Generic matching */
+- IMPLICIT_FB_GENERIC_DEV(0x0499, 0x1509), /* Steinberg UR22 */
+- IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2080), /* M-Audio FastTrack Ultra */
+- IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2081), /* M-Audio FastTrack Ultra */
+- IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2030), /* M-Audio Fast Track C400 */
+- IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2031), /* M-Audio Fast Track C600 */
+-
+ /* Fixed EP */
+ /* FIXME: check the availability of generic matching */
+- IMPLICIT_FB_FIXED_DEV(0x1397, 0x0001, 0x81, 1), /* Behringer UFX1604 */
+- IMPLICIT_FB_FIXED_DEV(0x1397, 0x0002, 0x81, 1), /* Behringer UFX1204 */
++ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2030, 0x81, 3), /* M-Audio Fast Track C400 */
++ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2031, 0x81, 3), /* M-Audio Fast Track C600 */
++ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2080, 0x81, 2), /* M-Audio FastTrack Ultra */
++ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2081, 0x81, 2), /* M-Audio FastTrack Ultra */
+ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8010, 0x81, 2), /* Fractal Audio Axe-Fx III */
+ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0001, 0x81, 2), /* Solid State Logic SSL2 */
+ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0002, 0x81, 2), /* Solid State Logic SSL2+ */
+@@ -352,7 +347,8 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip,
+ }
+
+ /* Try the generic implicit fb if available */
+- if (chip->generic_implicit_fb)
++ if (chip->generic_implicit_fb ||
++ (chip->quirk_flags & QUIRK_FLAG_GENERIC_IMPLICIT_FB))
+ return add_generic_implicit_fb(chip, fmt, alts);
+
+ /* No quirk */
+@@ -389,6 +385,8 @@ int snd_usb_parse_implicit_fb_quirk(struct snd_usb_audio *chip,
+ struct audioformat *fmt,
+ struct usb_host_interface *alts)
+ {
++ if (chip->quirk_flags & QUIRK_FLAG_SKIP_IMPLICIT_FB)
++ return 0;
+ if (fmt->endpoint & USB_DIR_IN)
+ return audioformat_capture_quirk(chip, fmt, alts);
+ else
+diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
+index 9602929b7de90..b67617b68e509 100644
+--- a/sound/usb/line6/driver.c
++++ b/sound/usb/line6/driver.c
+@@ -113,12 +113,12 @@ int line6_send_raw_message(struct usb_line6 *line6, const char *buffer,
+ retval = usb_interrupt_msg(line6->usbdev,
+ usb_sndintpipe(line6->usbdev, properties->ep_ctrl_w),
+ (char *)frag_buf, frag_size,
+- &partial, LINE6_TIMEOUT * HZ);
++ &partial, LINE6_TIMEOUT);
+ } else {
+ retval = usb_bulk_msg(line6->usbdev,
+ usb_sndbulkpipe(line6->usbdev, properties->ep_ctrl_w),
+ (char *)frag_buf, frag_size,
+- &partial, LINE6_TIMEOUT * HZ);
++ &partial, LINE6_TIMEOUT);
+ }
+
+ if (retval) {
+@@ -304,7 +304,8 @@ static void line6_data_received(struct urb *urb)
+ for (;;) {
+ done =
+ line6_midibuf_read(mb, line6->buffer_message,
+- LINE6_MIDI_MESSAGE_MAXLEN);
++ LINE6_MIDI_MESSAGE_MAXLEN,
++ LINE6_MIDIBUF_READ_RX);
+
+ if (done <= 0)
+ break;
+@@ -347,7 +348,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
+ ret = usb_control_msg_send(usbdev, 0, 0x67,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+ (datalen << 8) | 0x21, address, NULL, 0,
+- LINE6_TIMEOUT * HZ, GFP_KERNEL);
++ LINE6_TIMEOUT, GFP_KERNEL);
+ if (ret) {
+ dev_err(line6->ifcdev, "read request failed (error %d)\n", ret);
+ goto exit;
+@@ -360,7 +361,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
+ ret = usb_control_msg_recv(usbdev, 0, 0x67,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+ 0x0012, 0x0000, &len, 1,
+- LINE6_TIMEOUT * HZ, GFP_KERNEL);
++ LINE6_TIMEOUT, GFP_KERNEL);
+ if (ret) {
+ dev_err(line6->ifcdev,
+ "receive length failed (error %d)\n", ret);
+@@ -387,7 +388,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
+ /* receive the result: */
+ ret = usb_control_msg_recv(usbdev, 0, 0x67,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+- 0x0013, 0x0000, data, datalen, LINE6_TIMEOUT * HZ,
++ 0x0013, 0x0000, data, datalen, LINE6_TIMEOUT,
+ GFP_KERNEL);
+ if (ret)
+ dev_err(line6->ifcdev, "read failed (error %d)\n", ret);
+@@ -417,7 +418,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
+
+ ret = usb_control_msg_send(usbdev, 0, 0x67,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+- 0x0022, address, data, datalen, LINE6_TIMEOUT * HZ,
++ 0x0022, address, data, datalen, LINE6_TIMEOUT,
+ GFP_KERNEL);
+ if (ret) {
+ dev_err(line6->ifcdev,
+@@ -430,7 +431,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
+
+ ret = usb_control_msg_recv(usbdev, 0, 0x67,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+- 0x0012, 0x0000, status, 1, LINE6_TIMEOUT * HZ,
++ 0x0012, 0x0000, status, 1, LINE6_TIMEOUT,
+ GFP_KERNEL);
+ if (ret) {
+ dev_err(line6->ifcdev,
+diff --git a/sound/usb/line6/driver.h b/sound/usb/line6/driver.h
+index 71d3da1db8c81..ecf3a2b39c7eb 100644
+--- a/sound/usb/line6/driver.h
++++ b/sound/usb/line6/driver.h
+@@ -27,7 +27,7 @@
+ #define LINE6_FALLBACK_INTERVAL 10
+ #define LINE6_FALLBACK_MAXPACKETSIZE 16
+
+-#define LINE6_TIMEOUT 1
++#define LINE6_TIMEOUT 1000
+ #define LINE6_BUFSIZE_LISTEN 64
+ #define LINE6_MIDI_MESSAGE_MAXLEN 256
+
+diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c
+index ba0e2b7e8fe19..0838632c788e4 100644
+--- a/sound/usb/line6/midi.c
++++ b/sound/usb/line6/midi.c
+@@ -44,7 +44,8 @@ static void line6_midi_transmit(struct snd_rawmidi_substream *substream)
+ int req, done;
+
+ for (;;) {
+- req = min(line6_midibuf_bytes_free(mb), line6->max_packet_size);
++ req = min3(line6_midibuf_bytes_free(mb), line6->max_packet_size,
++ LINE6_FALLBACK_MAXPACKETSIZE);
+ done = snd_rawmidi_transmit_peek(substream, chunk, req);
+
+ if (done == 0)
+@@ -56,7 +57,8 @@ static void line6_midi_transmit(struct snd_rawmidi_substream *substream)
+
+ for (;;) {
+ done = line6_midibuf_read(mb, chunk,
+- LINE6_FALLBACK_MAXPACKETSIZE);
++ LINE6_FALLBACK_MAXPACKETSIZE,
++ LINE6_MIDIBUF_READ_TX);
+
+ if (done == 0)
+ break;
+diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c
+index 6a70463f82c4e..e7f830f7526c9 100644
+--- a/sound/usb/line6/midibuf.c
++++ b/sound/usb/line6/midibuf.c
+@@ -9,6 +9,7 @@
+
+ #include "midibuf.h"
+
++
+ static int midibuf_message_length(unsigned char code)
+ {
+ int message_length;
+@@ -20,12 +21,7 @@ static int midibuf_message_length(unsigned char code)
+
+ message_length = length[(code >> 4) - 8];
+ } else {
+- /*
+- Note that according to the MIDI specification 0xf2 is
+- the "Song Position Pointer", but this is used by Line 6
+- to send sysex messages to the host.
+- */
+- static const int length[] = { -1, 2, -1, 2, -1, -1, 1, 1, 1, 1,
++ static const int length[] = { -1, 2, 2, 2, -1, -1, 1, 1, 1, -1,
+ 1, 1, 1, -1, 1, 1
+ };
+ message_length = length[code & 0x0f];
+@@ -125,7 +121,7 @@ int line6_midibuf_write(struct midi_buffer *this, unsigned char *data,
+ }
+
+ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data,
+- int length)
++ int length, int read_type)
+ {
+ int bytes_used;
+ int length1, length2;
+@@ -148,9 +144,22 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data,
+
+ length1 = this->size - this->pos_read;
+
+- /* check MIDI command length */
+ command = this->buf[this->pos_read];
++ /*
++ PODxt always has status byte lower nibble set to 0010,
++ when it means to send 0000, so we correct if here so
++ that control/program changes come on channel 1 and
++ sysex message status byte is correct
++ */
++ if (read_type == LINE6_MIDIBUF_READ_RX) {
++ if (command == 0xb2 || command == 0xc2 || command == 0xf2) {
++ unsigned char fixed = command & 0xf0;
++ this->buf[this->pos_read] = fixed;
++ command = fixed;
++ }
++ }
+
++ /* check MIDI command length */
+ if (command & 0x80) {
+ midi_length = midibuf_message_length(command);
+ this->command_prev = command;
+diff --git a/sound/usb/line6/midibuf.h b/sound/usb/line6/midibuf.h
+index 124a8f9f7e96c..542e8d836f87d 100644
+--- a/sound/usb/line6/midibuf.h
++++ b/sound/usb/line6/midibuf.h
+@@ -8,6 +8,9 @@
+ #ifndef MIDIBUF_H
+ #define MIDIBUF_H
+
++#define LINE6_MIDIBUF_READ_TX 0
++#define LINE6_MIDIBUF_READ_RX 1
++
+ struct midi_buffer {
+ unsigned char *buf;
+ int size;
+@@ -23,7 +26,7 @@ extern void line6_midibuf_destroy(struct midi_buffer *mb);
+ extern int line6_midibuf_ignore(struct midi_buffer *mb, int length);
+ extern int line6_midibuf_init(struct midi_buffer *mb, int size, int split);
+ extern int line6_midibuf_read(struct midi_buffer *mb, unsigned char *data,
+- int length);
++ int length, int read_type);
+ extern void line6_midibuf_reset(struct midi_buffer *mb);
+ extern int line6_midibuf_write(struct midi_buffer *mb, unsigned char *data,
+ int length);
+diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c
+index 16e644330c4d6..54be5ac919bfb 100644
+--- a/sound/usb/line6/pod.c
++++ b/sound/usb/line6/pod.c
+@@ -159,8 +159,9 @@ static struct line6_pcm_properties pod_pcm_properties = {
+ .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */
+ };
+
++
+ static const char pod_version_header[] = {
+- 0xf2, 0x7e, 0x7f, 0x06, 0x02
++ 0xf0, 0x7e, 0x7f, 0x06, 0x02
+ };
+
+ static char *pod_alloc_sysex_buffer(struct usb_line6_pod *pod, int code,
+diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c
+index 28794a35949d4..b24bc82f89e37 100644
+--- a/sound/usb/line6/podhd.c
++++ b/sound/usb/line6/podhd.c
+@@ -190,7 +190,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod)
+ ret = usb_control_msg_send(usbdev, 0,
+ 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+ 0x11, 0,
+- NULL, 0, LINE6_TIMEOUT * HZ, GFP_KERNEL);
++ NULL, 0, LINE6_TIMEOUT, GFP_KERNEL);
+ if (ret) {
+ dev_err(pod->line6.ifcdev, "read request failed (error %d)\n", ret);
+ goto exit;
+@@ -200,7 +200,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod)
+ ret = usb_control_msg_recv(usbdev, 0, 0x67,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+ 0x11, 0x0,
+- init_bytes, 3, LINE6_TIMEOUT * HZ, GFP_KERNEL);
++ init_bytes, 3, LINE6_TIMEOUT, GFP_KERNEL);
+ if (ret) {
+ dev_err(pod->line6.ifcdev,
+ "receive length failed (error %d)\n", ret);
+@@ -220,7 +220,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod)
+ USB_REQ_SET_FEATURE,
+ USB_TYPE_STANDARD | USB_RECIP_DEVICE | USB_DIR_OUT,
+ 1, 0,
+- NULL, 0, LINE6_TIMEOUT * HZ, GFP_KERNEL);
++ NULL, 0, LINE6_TIMEOUT, GFP_KERNEL);
+ exit:
+ return ret;
+ }
+diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c
+index 4e5693c97aa42..e33df58740a91 100644
+--- a/sound/usb/line6/toneport.c
++++ b/sound/usb/line6/toneport.c
+@@ -128,7 +128,7 @@ static int toneport_send_cmd(struct usb_device *usbdev, int cmd1, int cmd2)
+
+ ret = usb_control_msg_send(usbdev, 0, 0x67,
+ USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+- cmd1, cmd2, NULL, 0, LINE6_TIMEOUT * HZ,
++ cmd1, cmd2, NULL, 0, LINE6_TIMEOUT,
+ GFP_KERNEL);
+
+ if (ret) {
+diff --git a/sound/usb/midi.c b/sound/usb/midi.c
+index 2c01649c70f61..9a361b202a09d 100644
+--- a/sound/usb/midi.c
++++ b/sound/usb/midi.c
+@@ -1133,10 +1133,8 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream)
+ port = &umidi->endpoints[i].out->ports[j];
+ break;
+ }
+- if (!port) {
+- snd_BUG();
++ if (!port)
+ return -ENXIO;
+- }
+
+ substream->runtime->private_data = port;
+ port->state = STATE_UNKNOWN;
+@@ -1145,6 +1143,9 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream)
+
+ static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream)
+ {
++ struct usbmidi_out_port *port = substream->runtime->private_data;
++
++ cancel_work_sync(&port->ep->work);
+ return substream_open(substream, 0, 0);
+ }
+
+@@ -1194,6 +1195,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream)
+ } while (drain_urbs && timeout);
+ finish_wait(&ep->drain_wait, &wait);
+ }
++ port->active = 0;
+ spin_unlock_irq(&ep->buffer_lock);
+ }
+
+diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c
+index 5834d1dc317ef..4f6b20ed29dd7 100644
+--- a/sound/usb/misc/ua101.c
++++ b/sound/usb/misc/ua101.c
+@@ -1000,7 +1000,7 @@ static int detect_usb_format(struct ua101 *ua)
+ fmt_playback->bSubframeSize * ua->playback.channels;
+
+ epd = &ua->intf[INTF_CAPTURE]->altsetting[1].endpoint[0].desc;
+- if (!usb_endpoint_is_isoc_in(epd)) {
++ if (!usb_endpoint_is_isoc_in(epd) || usb_endpoint_maxp(epd) == 0) {
+ dev_err(&ua->dev->dev, "invalid capture endpoint\n");
+ return -ENXIO;
+ }
+@@ -1008,7 +1008,7 @@ static int detect_usb_format(struct ua101 *ua)
+ ua->capture.max_packet_bytes = usb_endpoint_maxp(epd);
+
+ epd = &ua->intf[INTF_PLAYBACK]->altsetting[1].endpoint[0].desc;
+- if (!usb_endpoint_is_isoc_out(epd)) {
++ if (!usb_endpoint_is_isoc_out(epd) || usb_endpoint_maxp(epd) == 0) {
+ dev_err(&ua->dev->dev, "invalid playback endpoint\n");
+ return -ENXIO;
+ }
+diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
+index 8e030b1c061ab..567514832b0df 100644
+--- a/sound/usb/mixer.c
++++ b/sound/usb/mixer.c
+@@ -1496,6 +1496,10 @@ error:
+ usb_audio_err(chip,
+ "cannot get connectors status: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
+ UAC_GET_CUR, validx, idx, cval->val_type);
++
++ if (val)
++ *val = 0;
++
+ return filter_error(cval, ret);
+ }
+
+diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
+index 55eea90ee993f..3f8f6056ff6a5 100644
+--- a/sound/usb/mixer_maps.c
++++ b/sound/usb/mixer_maps.c
+@@ -366,13 +366,28 @@ static const struct usbmix_name_map corsair_virtuoso_map[] = {
+ { 0 }
+ };
+
+-/* Some mobos shipped with a dummy HD-audio show the invalid GET_MIN/GET_MAX
+- * response for Input Gain Pad (id=19, control=12) and the connector status
+- * for SPDIF terminal (id=18). Skip them.
+- */
+-static const struct usbmix_name_map asus_rog_map[] = {
+- { 18, NULL }, /* OT, connector control */
+- { 19, NULL, 12 }, /* FU, Input Gain Pad */
++/* ASUS ROG Zenith II with Realtek ALC1220-VB */
++static const struct usbmix_name_map asus_zenith_ii_map[] = {
++ { 19, NULL, 12 }, /* FU, Input Gain Pad - broken response, disabled */
++ { 16, "Speaker" }, /* OT */
++ { 22, "Speaker Playback" }, /* FU */
++ { 7, "Line" }, /* IT */
++ { 19, "Line Capture" }, /* FU */
++ { 8, "Mic" }, /* IT */
++ { 20, "Mic Capture" }, /* FU */
++ { 9, "Front Mic" }, /* IT */
++ { 21, "Front Mic Capture" }, /* FU */
++ { 17, "IEC958" }, /* OT */
++ { 23, "IEC958 Playback" }, /* FU */
++ {}
++};
++
++static const struct usbmix_connector_map asus_zenith_ii_connector_map[] = {
++ { 10, 16 }, /* (Back) Speaker */
++ { 11, 17 }, /* SPDIF */
++ { 13, 7 }, /* Line */
++ { 14, 8 }, /* Mic */
++ { 15, 9 }, /* Front Mic */
+ {}
+ };
+
+@@ -423,6 +438,14 @@ static const struct usbmix_name_map aorus_master_alc1220vb_map[] = {
+ {}
+ };
+
++/* MSI MPG X570S Carbon Max Wifi with ALC4080 */
++static const struct usbmix_name_map msi_mpg_x570s_carbon_max_wifi_alc4080_map[] = {
++ { 29, "Speaker Playback" },
++ { 30, "Front Headphone Playback" },
++ { 32, "IEC958 Playback" },
++ {}
++};
++
+ /*
+ * Control map entries
+ */
+@@ -536,6 +559,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
+ .id = USB_ID(0x1b1c, 0x0a41),
+ .map = corsair_virtuoso_map,
+ },
++ {
++ /* Corsair Virtuoso SE Latest (wired mode) */
++ .id = USB_ID(0x1b1c, 0x0a3f),
++ .map = corsair_virtuoso_map,
++ },
++ {
++ /* Corsair Virtuoso SE Latest (wireless mode) */
++ .id = USB_ID(0x1b1c, 0x0a40),
++ .map = corsair_virtuoso_map,
++ },
+ {
+ /* Corsair Virtuoso (wireless mode) */
+ .id = USB_ID(0x1b1c, 0x0a42),
+@@ -550,9 +583,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
+ .map = trx40_mobo_map,
+ .connector_map = trx40_mobo_connector_map,
+ },
+- { /* ASUS ROG Zenith II */
++ { /* ASUS ROG Zenith II (main audio) */
+ .id = USB_ID(0x0b05, 0x1916),
+- .map = asus_rog_map,
++ .map = asus_zenith_ii_map,
++ .connector_map = asus_zenith_ii_connector_map,
+ },
+ { /* ASUS ROG Strix */
+ .id = USB_ID(0x0b05, 0x1917),
+@@ -564,6 +598,14 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
+ .map = trx40_mobo_map,
+ .connector_map = trx40_mobo_connector_map,
+ },
++ { /* MSI MPG X570S Carbon Max Wifi */
++ .id = USB_ID(0x0db0, 0x419c),
++ .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
++ },
++ { /* MSI MAG X570S Torpedo Max */
++ .id = USB_ID(0x0db0, 0xa073),
++ .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
++ },
+ { /* MSI TRX40 */
+ .id = USB_ID(0x0db0, 0x543d),
+ .map = trx40_mobo_map,
+diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
+index 46082dc57be09..d12b87e52d22a 100644
+--- a/sound/usb/mixer_quirks.c
++++ b/sound/usb/mixer_quirks.c
+@@ -2795,6 +2795,7 @@ static int snd_bbfpro_controls_create(struct usb_mixer_interface *mixer)
+ #define SND_DJM_750_IDX 0x1
+ #define SND_DJM_850_IDX 0x2
+ #define SND_DJM_900NXS2_IDX 0x3
++#define SND_DJM_750MK2_IDX 0x4
+
+
+ #define SND_DJM_CTL(_name, suffix, _default_value, _windex) { \
+@@ -2984,12 +2985,42 @@ static const struct snd_djm_ctl snd_djm_ctls_900nxs2[] = {
+ SND_DJM_CTL("Ch5 Input", 900nxs2_cap5, 3, SND_DJM_WINDEX_CAP)
+ };
+
++// DJM-750MK2
++static const u16 snd_djm_opts_750mk2_cap1[] = {
++ 0x0100, 0x0102, 0x0103, 0x0106, 0x0107, 0x0108, 0x0109, 0x010a };
++static const u16 snd_djm_opts_750mk2_cap2[] = {
++ 0x0200, 0x0202, 0x0203, 0x0206, 0x0207, 0x0208, 0x0209, 0x020a };
++static const u16 snd_djm_opts_750mk2_cap3[] = {
++ 0x0300, 0x0302, 0x0303, 0x0306, 0x0307, 0x0308, 0x0309, 0x030a };
++static const u16 snd_djm_opts_750mk2_cap4[] = {
++ 0x0400, 0x0402, 0x0403, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a };
++static const u16 snd_djm_opts_750mk2_cap5[] = {
++ 0x0507, 0x0508, 0x0509, 0x050a, 0x0511, 0x0512, 0x0513, 0x0514 };
++
++static const u16 snd_djm_opts_750mk2_pb1[] = { 0x0100, 0x0101, 0x0104 };
++static const u16 snd_djm_opts_750mk2_pb2[] = { 0x0200, 0x0201, 0x0204 };
++static const u16 snd_djm_opts_750mk2_pb3[] = { 0x0300, 0x0301, 0x0304 };
++
++
++static const struct snd_djm_ctl snd_djm_ctls_750mk2[] = {
++ SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL),
++ SND_DJM_CTL("Ch1 Input", 750mk2_cap1, 2, SND_DJM_WINDEX_CAP),
++ SND_DJM_CTL("Ch2 Input", 750mk2_cap2, 2, SND_DJM_WINDEX_CAP),
++ SND_DJM_CTL("Ch3 Input", 750mk2_cap3, 2, SND_DJM_WINDEX_CAP),
++ SND_DJM_CTL("Ch4 Input", 750mk2_cap4, 2, SND_DJM_WINDEX_CAP),
++ SND_DJM_CTL("Ch5 Input", 750mk2_cap5, 3, SND_DJM_WINDEX_CAP),
++ SND_DJM_CTL("Ch1 Output", 750mk2_pb1, 0, SND_DJM_WINDEX_PB),
++ SND_DJM_CTL("Ch2 Output", 750mk2_pb2, 1, SND_DJM_WINDEX_PB),
++ SND_DJM_CTL("Ch3 Output", 750mk2_pb3, 2, SND_DJM_WINDEX_PB)
++};
++
+
+ static const struct snd_djm_device snd_djm_devices[] = {
+- SND_DJM_DEVICE(250mk2),
+- SND_DJM_DEVICE(750),
+- SND_DJM_DEVICE(850),
+- SND_DJM_DEVICE(900nxs2)
++ [SND_DJM_250MK2_IDX] = SND_DJM_DEVICE(250mk2),
++ [SND_DJM_750_IDX] = SND_DJM_DEVICE(750),
++ [SND_DJM_850_IDX] = SND_DJM_DEVICE(850),
++ [SND_DJM_900NXS2_IDX] = SND_DJM_DEVICE(900nxs2),
++ [SND_DJM_750MK2_IDX] = SND_DJM_DEVICE(750mk2),
+ };
+
+
+@@ -3223,7 +3254,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer)
+ err = snd_rme_controls_create(mixer);
+ break;
+
+- case USB_ID(0x0194f, 0x010c): /* Presonus Studio 1810c */
++ case USB_ID(0x194f, 0x010c): /* Presonus Studio 1810c */
+ err = snd_sc1810_init_mixer(mixer);
+ break;
+ case USB_ID(0x2a39, 0x3fb0): /* RME Babyface Pro FS */
+@@ -3235,6 +3266,9 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer)
+ case USB_ID(0x08e4, 0x017f): /* Pioneer DJ DJM-750 */
+ err = snd_djm_controls_create(mixer, SND_DJM_750_IDX);
+ break;
++ case USB_ID(0x2b73, 0x001b): /* Pioneer DJ DJM-750MK2 */
++ err = snd_djm_controls_create(mixer, SND_DJM_750MK2_IDX);
++ break;
+ case USB_ID(0x08e4, 0x0163): /* Pioneer DJ DJM-850 */
+ err = snd_djm_controls_create(mixer, SND_DJM_850_IDX);
+ break;
+@@ -3328,9 +3362,10 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer,
+ if (unitid == 7 && cval->control == UAC_FU_VOLUME)
+ snd_dragonfly_quirk_db_scale(mixer, cval, kctl);
+ break;
+- /* lowest playback value is muted on C-Media devices */
+- case USB_ID(0x0d8c, 0x000c):
+- case USB_ID(0x0d8c, 0x0014):
++ /* lowest playback value is muted on some devices */
++ case USB_ID(0x0d8c, 0x000c): /* C-Media */
++ case USB_ID(0x0d8c, 0x0014): /* C-Media */
++ case USB_ID(0x19f7, 0x0003): /* RODE NT-USB */
+ if (strstr(kctl->id.name, "Playback"))
+ cval->min_mute = 1;
+ break;
+diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
+index 5dc9266180e37..de0964dbf7a91 100644
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -219,16 +219,16 @@ int snd_usb_init_pitch(struct snd_usb_audio *chip,
+ return 0;
+ }
+
+-static bool stop_endpoints(struct snd_usb_substream *subs)
++static bool stop_endpoints(struct snd_usb_substream *subs, bool keep_pending)
+ {
+ bool stopped = 0;
+
+ if (test_and_clear_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags)) {
+- snd_usb_endpoint_stop(subs->sync_endpoint);
++ snd_usb_endpoint_stop(subs->sync_endpoint, keep_pending);
+ stopped = true;
+ }
+ if (test_and_clear_bit(SUBSTREAM_FLAG_DATA_EP_STARTED, &subs->flags)) {
+- snd_usb_endpoint_stop(subs->data_endpoint);
++ snd_usb_endpoint_stop(subs->data_endpoint, keep_pending);
+ stopped = true;
+ }
+ return stopped;
+@@ -261,7 +261,7 @@ static int start_endpoints(struct snd_usb_substream *subs)
+ return 0;
+
+ error:
+- stop_endpoints(subs);
++ stop_endpoints(subs, false);
+ return err;
+ }
+
+@@ -291,6 +291,9 @@ int snd_usb_audioformat_set_sync_ep(struct snd_usb_audio *chip,
+ bool is_playback;
+ int err;
+
++ if (fmt->sync_ep)
++ return 0; /* already set up */
++
+ alts = snd_usb_get_host_interface(chip, fmt->iface, fmt->altsetting);
+ if (!alts)
+ return 0;
+@@ -304,7 +307,7 @@ int snd_usb_audioformat_set_sync_ep(struct snd_usb_audio *chip,
+ * Generic sync EP handling
+ */
+
+- if (altsd->bNumEndpoints < 2)
++ if (fmt->ep_idx > 0 || altsd->bNumEndpoints < 2)
+ return 0;
+
+ is_playback = !(get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN);
+@@ -437,18 +440,23 @@ static int configure_endpoints(struct snd_usb_audio *chip,
+
+ if (subs->data_endpoint->need_setup) {
+ /* stop any running stream beforehand */
+- if (stop_endpoints(subs))
++ if (stop_endpoints(subs, false))
+ sync_pending_stops(subs);
++ if (subs->sync_endpoint) {
++ err = snd_usb_endpoint_configure(chip, subs->sync_endpoint);
++ if (err < 0)
++ return err;
++ }
+ err = snd_usb_endpoint_configure(chip, subs->data_endpoint);
+ if (err < 0)
+ return err;
+ snd_usb_set_format_quirk(subs, subs->cur_audiofmt);
+- }
+-
+- if (subs->sync_endpoint) {
+- err = snd_usb_endpoint_configure(chip, subs->sync_endpoint);
+- if (err < 0)
+- return err;
++ } else {
++ if (subs->sync_endpoint) {
++ err = snd_usb_endpoint_configure(chip, subs->sync_endpoint);
++ if (err < 0)
++ return err;
++ }
+ }
+
+ return 0;
+@@ -517,6 +525,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
+ if (snd_usb_endpoint_compatible(chip, subs->data_endpoint,
+ fmt, hw_params))
+ goto unlock;
++ if (stop_endpoints(subs, false))
++ sync_pending_stops(subs);
+ close_endpoints(chip, subs);
+ }
+
+@@ -572,7 +582,7 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream)
+ subs->cur_audiofmt = NULL;
+ mutex_unlock(&chip->mutex);
+ if (!snd_usb_lock_shutdown(chip)) {
+- if (stop_endpoints(subs))
++ if (stop_endpoints(subs, false))
+ sync_pending_stops(subs);
+ close_endpoints(chip, subs);
+ snd_usb_unlock_shutdown(chip);
+@@ -581,6 +591,31 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream)
+ return 0;
+ }
+
++/* free-wheeling mode? (e.g. dmix) */
++static int in_free_wheeling_mode(struct snd_pcm_runtime *runtime)
++{
++ return runtime->stop_threshold > runtime->buffer_size;
++}
++
++/* check whether early start is needed for playback stream */
++static int lowlatency_playback_available(struct snd_pcm_runtime *runtime,
++ struct snd_usb_substream *subs)
++{
++ struct snd_usb_audio *chip = subs->stream->chip;
++
++ if (subs->direction == SNDRV_PCM_STREAM_CAPTURE)
++ return false;
++ /* disabled via module option? */
++ if (!chip->lowlatency)
++ return false;
++ if (in_free_wheeling_mode(runtime))
++ return false;
++ /* implicit feedback mode has own operation mode */
++ if (snd_usb_endpoint_implicit_feedback_sink(subs->data_endpoint))
++ return false;
++ return true;
++}
++
+ /*
+ * prepare callback
+ *
+@@ -614,13 +649,9 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
+ subs->period_elapsed_pending = 0;
+ runtime->delay = 0;
+
+- /* check whether early start is needed for playback stream */
+- subs->early_playback_start =
+- subs->direction == SNDRV_PCM_STREAM_PLAYBACK &&
+- (!chip->lowlatency ||
+- (subs->data_endpoint->nominal_queue_size >= subs->buffer_bytes));
+-
+- if (subs->early_playback_start)
++ subs->lowlatency_playback = lowlatency_playback_available(runtime, subs);
++ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
++ !subs->lowlatency_playback)
+ ret = start_endpoints(subs);
+
+ unlock:
+@@ -648,9 +679,9 @@ static const struct snd_pcm_hardware snd_usb_hardware =
+ SNDRV_PCM_INFO_PAUSE,
+ .channels_min = 1,
+ .channels_max = 256,
+- .buffer_bytes_max = 1024 * 1024,
++ .buffer_bytes_max = INT_MAX, /* limited by BUFFER_TIME later */
+ .period_bytes_min = 64,
+- .period_bytes_max = 512 * 1024,
++ .period_bytes_max = INT_MAX, /* limited by PERIOD_TIME later */
+ .periods_min = 2,
+ .periods_max = 1024,
+ };
+@@ -734,6 +765,7 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params,
+ struct snd_pcm_hw_rule *rule)
+ {
+ struct snd_usb_substream *subs = rule->private;
++ struct snd_usb_audio *chip = subs->stream->chip;
+ const struct audioformat *fp;
+ struct snd_interval *it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
+ unsigned int rmin, rmax, r;
+@@ -745,6 +777,14 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params,
+ list_for_each_entry(fp, &subs->fmt_list, list) {
+ if (!hw_check_valid_format(subs, params, fp))
+ continue;
++ r = snd_usb_endpoint_get_clock_rate(chip, fp->clock);
++ if (r > 0) {
++ if (!snd_interval_test(it, r))
++ continue;
++ rmin = min(rmin, r);
++ rmax = max(rmax, r);
++ continue;
++ }
+ if (fp->rate_table && fp->nr_rates) {
+ for (i = 0; i < fp->nr_rates; i++) {
+ r = fp->rate_table[i];
+@@ -869,8 +909,13 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs)
+ continue;
+ /* for the implicit fb, check the sync ep as well */
+ ep = snd_usb_get_endpoint(chip, fp->sync_ep);
+- if (ep && ep->cur_audiofmt)
+- return ep;
++ if (ep && ep->cur_audiofmt) {
++ /* ditto, if the sync (data) ep is used by others,
++ * this stream is restricted by the sync ep
++ */
++ if (ep != subs->sync_endpoint || ep->opened > 1)
++ return ep;
++ }
+ }
+ return NULL;
+ }
+@@ -1034,6 +1079,18 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre
+ return err;
+ }
+
++ /* set max period and buffer sizes for 1 and 2 seconds, respectively */
++ err = snd_pcm_hw_constraint_minmax(runtime,
++ SNDRV_PCM_HW_PARAM_PERIOD_TIME,
++ 0, 1000000);
++ if (err < 0)
++ return err;
++ err = snd_pcm_hw_constraint_minmax(runtime,
++ SNDRV_PCM_HW_PARAM_BUFFER_TIME,
++ 0, 2000000);
++ if (err < 0)
++ return err;
++
+ /* additional hw constraints for implicit fb */
+ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
+ hw_rule_format_implicit_fb, subs,
+@@ -1068,6 +1125,10 @@ static int snd_usb_pcm_open(struct snd_pcm_substream *substream)
+ int ret;
+
+ runtime->hw = snd_usb_hardware;
++ /* need an explicit sync to catch applptr update in low-latency mode */
++ if (direction == SNDRV_PCM_STREAM_PLAYBACK &&
++ as->chip->lowlatency)
++ runtime->hw.info |= SNDRV_PCM_INFO_SYNC_APPLPTR;
+ runtime->private_data = subs;
+ subs->pcm_substream = substream;
+ /* runtime PM is also done there */
+@@ -1320,44 +1381,66 @@ static unsigned int copy_to_urb_quirk(struct snd_usb_substream *subs,
+ return bytes;
+ }
+
+-static void prepare_playback_urb(struct snd_usb_substream *subs,
+- struct urb *urb)
++static int prepare_playback_urb(struct snd_usb_substream *subs,
++ struct urb *urb,
++ bool in_stream_lock)
+ {
+ struct snd_pcm_runtime *runtime = subs->pcm_substream->runtime;
+ struct snd_usb_endpoint *ep = subs->data_endpoint;
+ struct snd_urb_ctx *ctx = urb->context;
+- unsigned int counts, frames, bytes;
++ unsigned int frames, bytes;
++ int counts;
++ unsigned int transfer_done, frame_limit, avail = 0;
+ int i, stride, period_elapsed = 0;
+ unsigned long flags;
++ int err = 0;
+
+ stride = ep->stride;
+
+ frames = 0;
+ ctx->queued = 0;
+ urb->number_of_packets = 0;
++
+ spin_lock_irqsave(&subs->lock, flags);
+- subs->frame_limit += ep->max_urb_frames;
++ frame_limit = subs->frame_limit + ep->max_urb_frames;
++ transfer_done = subs->transfer_done;
++
++ if (subs->lowlatency_playback &&
++ runtime->status->state != SNDRV_PCM_STATE_DRAINING) {
++ unsigned int hwptr = subs->hwptr_done / stride;
++
++ /* calculate the byte offset-in-buffer of the appl_ptr */
++ avail = (runtime->control->appl_ptr - runtime->hw_ptr_base)
++ % runtime->buffer_size;
++ if (avail <= hwptr)
++ avail += runtime->buffer_size;
++ avail -= hwptr;
++ }
++
+ for (i = 0; i < ctx->packets; i++) {
+- counts = snd_usb_endpoint_next_packet_size(ep, ctx, i);
++ counts = snd_usb_endpoint_next_packet_size(ep, ctx, i, avail);
++ if (counts < 0)
++ break;
+ /* set up descriptor */
+ urb->iso_frame_desc[i].offset = frames * stride;
+ urb->iso_frame_desc[i].length = counts * stride;
+ frames += counts;
++ avail -= counts;
+ urb->number_of_packets++;
+- subs->transfer_done += counts;
+- if (subs->transfer_done >= runtime->period_size) {
+- subs->transfer_done -= runtime->period_size;
+- subs->frame_limit = 0;
++ transfer_done += counts;
++ if (transfer_done >= runtime->period_size) {
++ transfer_done -= runtime->period_size;
++ frame_limit = 0;
+ period_elapsed = 1;
+ if (subs->fmt_type == UAC_FORMAT_TYPE_II) {
+- if (subs->transfer_done > 0) {
++ if (transfer_done > 0) {
+ /* FIXME: fill-max mode is not
+ * supported yet */
+- frames -= subs->transfer_done;
+- counts -= subs->transfer_done;
++ frames -= transfer_done;
++ counts -= transfer_done;
+ urb->iso_frame_desc[i].length =
+ counts * stride;
+- subs->transfer_done = 0;
++ transfer_done = 0;
+ }
+ i++;
+ if (i < ctx->packets) {
+@@ -1371,13 +1454,19 @@ static void prepare_playback_urb(struct snd_usb_substream *subs,
+ }
+ }
+ /* finish at the period boundary or after enough frames */
+- if ((period_elapsed ||
+- subs->transfer_done >= subs->frame_limit) &&
++ if ((period_elapsed || transfer_done >= frame_limit) &&
+ !snd_usb_endpoint_implicit_feedback_sink(ep))
+ break;
+ }
+- bytes = frames * stride;
+
++ if (!frames) {
++ err = -EAGAIN;
++ goto unlock;
++ }
++
++ bytes = frames * stride;
++ subs->transfer_done = transfer_done;
++ subs->frame_limit = frame_limit;
+ if (unlikely(ep->cur_format == SNDRV_PCM_FORMAT_DSD_U16_LE &&
+ subs->cur_audiofmt->dsd_dop)) {
+ fill_playback_urb_dsd_dop(subs, urb, bytes);
+@@ -1403,14 +1492,23 @@ static void prepare_playback_urb(struct snd_usb_substream *subs,
+ subs->trigger_tstamp_pending_update = false;
+ }
+
+- if (period_elapsed && !subs->running && !subs->early_playback_start) {
++ if (period_elapsed && !subs->running && subs->lowlatency_playback) {
+ subs->period_elapsed_pending = 1;
+ period_elapsed = 0;
+ }
++
++ unlock:
+ spin_unlock_irqrestore(&subs->lock, flags);
++ if (err < 0)
++ return err;
+ urb->transfer_buffer_length = bytes;
+- if (period_elapsed)
+- snd_pcm_period_elapsed(subs->pcm_substream);
++ if (period_elapsed) {
++ if (in_stream_lock)
++ snd_pcm_period_elapsed_under_stream_lock(subs->pcm_substream);
++ else
++ snd_pcm_period_elapsed(subs->pcm_substream);
++ }
++ return 0;
+ }
+
+ /*
+@@ -1442,6 +1540,27 @@ static void retire_playback_urb(struct snd_usb_substream *subs,
+ snd_pcm_period_elapsed(subs->pcm_substream);
+ }
+
++/* PCM ack callback for the playback stream;
++ * this plays a role only when the stream is running in low-latency mode.
++ */
++static int snd_usb_pcm_playback_ack(struct snd_pcm_substream *substream)
++{
++ struct snd_usb_substream *subs = substream->runtime->private_data;
++ struct snd_usb_endpoint *ep;
++
++ if (!subs->lowlatency_playback || !subs->running)
++ return 0;
++ ep = subs->data_endpoint;
++ if (!ep)
++ return 0;
++ /* When no more in-flight URBs available, try to process the pending
++ * outputs here
++ */
++ if (!ep->active_mask)
++ return snd_usb_queue_pending_output_urbs(ep, true);
++ return 0;
++}
++
+ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substream,
+ int cmd)
+ {
+@@ -1457,8 +1576,10 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea
+ prepare_playback_urb,
+ retire_playback_urb,
+ subs);
+- if (!subs->early_playback_start &&
++ if (subs->lowlatency_playback &&
+ cmd == SNDRV_PCM_TRIGGER_START) {
++ if (in_free_wheeling_mode(substream->runtime))
++ subs->lowlatency_playback = false;
+ err = start_endpoints(subs);
+ if (err < 0) {
+ snd_usb_endpoint_set_callback(subs->data_endpoint,
+@@ -1473,7 +1594,7 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea
+ return 0;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_STOP:
+- stop_endpoints(subs);
++ stop_endpoints(subs, substream->runtime->status->state == SNDRV_PCM_STATE_DRAINING);
+ snd_usb_endpoint_set_callback(subs->data_endpoint,
+ NULL, NULL, NULL);
+ subs->running = 0;
+@@ -1521,7 +1642,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream
+ return 0;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ case SNDRV_PCM_TRIGGER_STOP:
+- stop_endpoints(subs);
++ stop_endpoints(subs, false);
+ fallthrough;
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ snd_usb_endpoint_set_callback(subs->data_endpoint,
+@@ -1545,6 +1666,7 @@ static const struct snd_pcm_ops snd_usb_playback_ops = {
+ .trigger = snd_usb_substream_playback_trigger,
+ .sync_stop = snd_usb_pcm_sync_stop,
+ .pointer = snd_usb_pcm_pointer,
++ .ack = snd_usb_pcm_playback_ack,
+ };
+
+ static const struct snd_pcm_ops snd_usb_capture_ops = {
+diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
+index 2af8c68fac275..6d332c9eb4445 100644
+--- a/sound/usb/quirks-table.h
++++ b/sound/usb/quirks-table.h
+@@ -76,6 +76,8 @@
+ { USB_DEVICE_VENDOR_SPEC(0x041e, 0x3f0a) },
+ /* E-Mu 0204 USB */
+ { USB_DEVICE_VENDOR_SPEC(0x041e, 0x3f19) },
++/* Ktmicro Usb_audio device */
++{ USB_DEVICE_VENDOR_SPEC(0x31b2, 0x0011) },
+
+ /*
+ * Creative Technology, Ltd Live! Cam Sync HD [VF0770]
+@@ -84,7 +86,7 @@
+ * combination.
+ */
+ {
+- USB_DEVICE(0x041e, 0x4095),
++ USB_AUDIO_DEVICE(0x041e, 0x4095),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+@@ -2049,6 +2051,10 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ }
+ }
+ },
++{
++ /* M-Audio Micro */
++ USB_DEVICE_VENDOR_SPEC(0x0763, 0x201a),
++},
+ {
+ USB_DEVICE_VENDOR_SPEC(0x0763, 0x2030),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+@@ -2658,7 +2664,12 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ .nr_rates = 2,
+ .rate_table = (unsigned int[]) {
+ 44100, 48000
+- }
++ },
++ .sync_ep = 0x82,
++ .sync_iface = 0,
++ .sync_altsetting = 1,
++ .sync_ep_idx = 1,
++ .implicit_fb = 1,
+ }
+ },
+ {
+@@ -2672,6 +2683,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ .altset_idx = 1,
+ .attributes = 0,
+ .endpoint = 0x82,
++ .ep_idx = 1,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC,
+ .datainterval = 1,
+ .maxpacksize = 0x0126,
+@@ -2875,6 +2887,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ .altset_idx = 1,
+ .attributes = 0x4,
+ .endpoint = 0x81,
++ .ep_idx = 1,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC,
+ .maxpacksize = 0x130,
+@@ -3235,6 +3248,15 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ }
+ },
+
++/* Rane SL-1 */
++{
++ USB_DEVICE(0x13e5, 0x0001),
++ .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE
++ }
++},
++
+ /* disabled due to regression for other devices;
+ * see https://bugzilla.kernel.org/show_bug.cgi?id=199905
+ */
+@@ -3382,6 +3404,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ .altset_idx = 1,
+ .attributes = 0,
+ .endpoint = 0x03,
++ .ep_idx = 1,
+ .rates = SNDRV_PCM_RATE_96000,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC,
+@@ -3785,6 +3808,112 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ }
+ },
+
++{
++ /*
++ * PIONEER DJ DDJ-800
++ * PCM is 6 channels out, 6 channels in @ 44.1 fixed
++ * The Feedback for the output is the input
++ */
++ USB_DEVICE_VENDOR_SPEC(0x2b73, 0x0029),
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = (const struct snd_usb_audio_quirk[]) {
++ {
++ .ifnum = 0,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S24_3LE,
++ .channels = 6,
++ .iface = 0,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x01,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC|
++ USB_ENDPOINT_SYNC_ASYNC,
++ .rates = SNDRV_PCM_RATE_44100,
++ .rate_min = 44100,
++ .rate_max = 44100,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) { 44100 }
++ }
++ },
++ {
++ .ifnum = 0,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S24_3LE,
++ .channels = 6,
++ .iface = 0,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x82,
++ .ep_idx = 1,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC|
++ USB_ENDPOINT_SYNC_ASYNC|
++ USB_ENDPOINT_USAGE_IMPLICIT_FB,
++ .rates = SNDRV_PCM_RATE_44100,
++ .rate_min = 44100,
++ .rate_max = 44100,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) { 44100 }
++ }
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
++
++/*
++ * MacroSilicon MS2100/MS2106 based AV capture cards
++ *
++ * These claim 96kHz 1ch in the descriptors, but are actually 48kHz 2ch.
++ * They also need QUIRK_FLAG_ALIGN_TRANSFER, which makes one wonder if
++ * they pretend to be 96kHz mono as a workaround for stereo being broken
++ * by that...
++ *
++ * They also have an issue with initial stream alignment that causes the
++ * channels to be swapped and out of phase, which is dealt with in quirks.c.
++ */
++{
++ USB_AUDIO_DEVICE(0x534d, 0x0021),
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .vendor_name = "MacroSilicon",
++ .product_name = "MS210x",
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = &(const struct snd_usb_audio_quirk[]) {
++ {
++ .ifnum = 2,
++ .type = QUIRK_AUDIO_STANDARD_MIXER,
++ },
++ {
++ .ifnum = 3,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S16_LE,
++ .channels = 2,
++ .iface = 3,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .attributes = 0,
++ .endpoint = 0x82,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC |
++ USB_ENDPOINT_SYNC_ASYNC,
++ .rates = SNDRV_PCM_RATE_CONTINUOUS,
++ .rate_min = 48000,
++ .rate_max = 48000,
++ }
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
++
+ /*
+ * MacroSilicon MS2109 based HDMI capture cards
+ *
+@@ -3892,6 +4021,64 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ }
+ }
+ },
++{
++ /*
++ * Pioneer DJ DJM-750MK2
++ * 10 channels playback & 12 channels capture @ 48kHz S24LE
++ */
++ USB_DEVICE_VENDOR_SPEC(0x2b73, 0x001b),
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = (const struct snd_usb_audio_quirk[]) {
++ {
++ .ifnum = 0,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S24_3LE,
++ .channels = 10,
++ .iface = 0,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x01,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC|
++ USB_ENDPOINT_SYNC_ASYNC,
++ .rates = SNDRV_PCM_RATE_48000,
++ .rate_min = 48000,
++ .rate_max = 48000,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) {
++ 48000
++ }
++ }
++ },
++ {
++ .ifnum = 0,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S24_3LE,
++ .channels = 12,
++ .iface = 0,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x82,
++ .ep_idx = 1,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC|
++ USB_ENDPOINT_SYNC_ASYNC|
++ USB_ENDPOINT_USAGE_IMPLICIT_FB,
++ .rates = SNDRV_PCM_RATE_48000,
++ .rate_min = 48000,
++ .rate_max = 48000,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) { 48000 }
++ }
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
+ {
+ /*
+ * Pioneer DJ DJM-850
+@@ -4044,6 +4231,235 @@ YAMAHA_DEVICE(0x7010, "UB99"),
+ }
+ }
+ },
++{
++ /*
++ * Fiero SC-01 (firmware v1.0.0 @ 48 kHz)
++ */
++ USB_DEVICE(0x2b53, 0x0023),
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .vendor_name = "Fiero",
++ .product_name = "SC-01",
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = &(const struct snd_usb_audio_quirk[]) {
++ {
++ .ifnum = 0,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE
++ },
++ /* Playback */
++ {
++ .ifnum = 1,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S32_LE,
++ .channels = 2,
++ .fmt_bits = 24,
++ .iface = 1,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x01,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC |
++ USB_ENDPOINT_SYNC_ASYNC,
++ .rates = SNDRV_PCM_RATE_48000,
++ .rate_min = 48000,
++ .rate_max = 48000,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) { 48000 },
++ .clock = 0x29
++ }
++ },
++ /* Capture */
++ {
++ .ifnum = 2,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S32_LE,
++ .channels = 2,
++ .fmt_bits = 24,
++ .iface = 2,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x82,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC |
++ USB_ENDPOINT_SYNC_ASYNC |
++ USB_ENDPOINT_USAGE_IMPLICIT_FB,
++ .rates = SNDRV_PCM_RATE_48000,
++ .rate_min = 48000,
++ .rate_max = 48000,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) { 48000 },
++ .clock = 0x29
++ }
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
++{
++ /*
++ * Fiero SC-01 (firmware v1.0.0 @ 96 kHz)
++ */
++ USB_DEVICE(0x2b53, 0x0024),
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .vendor_name = "Fiero",
++ .product_name = "SC-01",
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = &(const struct snd_usb_audio_quirk[]) {
++ {
++ .ifnum = 0,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE
++ },
++ /* Playback */
++ {
++ .ifnum = 1,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S32_LE,
++ .channels = 2,
++ .fmt_bits = 24,
++ .iface = 1,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x01,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC |
++ USB_ENDPOINT_SYNC_ASYNC,
++ .rates = SNDRV_PCM_RATE_96000,
++ .rate_min = 96000,
++ .rate_max = 96000,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) { 96000 },
++ .clock = 0x29
++ }
++ },
++ /* Capture */
++ {
++ .ifnum = 2,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S32_LE,
++ .channels = 2,
++ .fmt_bits = 24,
++ .iface = 2,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x82,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC |
++ USB_ENDPOINT_SYNC_ASYNC |
++ USB_ENDPOINT_USAGE_IMPLICIT_FB,
++ .rates = SNDRV_PCM_RATE_96000,
++ .rate_min = 96000,
++ .rate_max = 96000,
++ .nr_rates = 1,
++ .rate_table = (unsigned int[]) { 96000 },
++ .clock = 0x29
++ }
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
++{
++ /*
++ * Fiero SC-01 (firmware v1.1.0)
++ */
++ USB_DEVICE(0x2b53, 0x0031),
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .vendor_name = "Fiero",
++ .product_name = "SC-01",
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = &(const struct snd_usb_audio_quirk[]) {
++ {
++ .ifnum = 0,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE
++ },
++ /* Playback */
++ {
++ .ifnum = 1,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S32_LE,
++ .channels = 2,
++ .fmt_bits = 24,
++ .iface = 1,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x01,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC |
++ USB_ENDPOINT_SYNC_ASYNC,
++ .rates = SNDRV_PCM_RATE_48000 |
++ SNDRV_PCM_RATE_96000,
++ .rate_min = 48000,
++ .rate_max = 96000,
++ .nr_rates = 2,
++ .rate_table = (unsigned int[]) { 48000, 96000 },
++ .clock = 0x29
++ }
++ },
++ /* Capture */
++ {
++ .ifnum = 2,
++ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
++ .data = &(const struct audioformat) {
++ .formats = SNDRV_PCM_FMTBIT_S32_LE,
++ .channels = 2,
++ .fmt_bits = 24,
++ .iface = 2,
++ .altsetting = 1,
++ .altset_idx = 1,
++ .endpoint = 0x82,
++ .ep_attr = USB_ENDPOINT_XFER_ISOC |
++ USB_ENDPOINT_SYNC_ASYNC |
++ USB_ENDPOINT_USAGE_IMPLICIT_FB,
++ .rates = SNDRV_PCM_RATE_48000 |
++ SNDRV_PCM_RATE_96000,
++ .rate_min = 48000,
++ .rate_max = 96000,
++ .nr_rates = 2,
++ .rate_table = (unsigned int[]) { 48000, 96000 },
++ .clock = 0x29
++ }
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
++{
++ /* Advanced modes of the Mythware XA001AU.
++ * For the standard mode, Mythware XA001AU has ID ffad:a001
++ */
++ USB_DEVICE_VENDOR_SPEC(0xffad, 0xa001),
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .vendor_name = "Mythware",
++ .product_name = "XA001AU",
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = (const struct snd_usb_audio_quirk[]) {
++ {
++ .ifnum = 0,
++ .type = QUIRK_IGNORE_INTERFACE,
++ },
++ {
++ .ifnum = 1,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE,
++ },
++ {
++ .ifnum = 2,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE,
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
+
+ #undef USB_DEVICE_VENDOR_SPEC
+ #undef USB_AUDIO_DEVICE
+diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
+index 8929d9abe8aa8..8de572e774ddc 100644
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -1290,7 +1290,7 @@ int snd_usb_apply_interface_quirk(struct snd_usb_audio *chip,
+ if (chip->usb_id == USB_ID(0x0763, 0x2012))
+ return fasttrackpro_skip_setting_quirk(chip, iface, altno);
+ /* presonus studio 1810c: skip altsets incompatible with device_setup */
+- if (chip->usb_id == USB_ID(0x0194f, 0x010c))
++ if (chip->usb_id == USB_ID(0x194f, 0x010c))
+ return s1810c_skip_setting_quirk(chip, iface, altno);
+
+
+@@ -1478,6 +1478,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs,
+ case USB_ID(0x041e, 0x3f19): /* E-Mu 0204 USB */
+ set_format_emu_quirk(subs, fmt);
+ break;
++ case USB_ID(0x534d, 0x0021): /* MacroSilicon MS2100/MS2106 */
+ case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */
+ subs->stream_offset_adj = 2;
+ break;
+@@ -1610,6 +1611,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
+ /* XMOS based USB DACs */
+ switch (chip->usb_id) {
+ case USB_ID(0x1511, 0x0037): /* AURALiC VEGA */
++ case USB_ID(0x21ed, 0xd75a): /* Accuphase DAC-60 option card */
+ case USB_ID(0x2522, 0x0012): /* LH Labs VI DAC Infinity */
+ case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */
+ if (fp->altsetting == 2)
+@@ -1727,47 +1729,6 @@ void snd_usb_audioformat_attributes_quirk(struct snd_usb_audio *chip,
+ }
+ }
+
+-/*
+- * registration quirk:
+- * the registration is skipped if a device matches with the given ID,
+- * unless the interface reaches to the defined one. This is for delaying
+- * the registration until the last known interface, so that the card and
+- * devices appear at the same time.
+- */
+-
+-struct registration_quirk {
+- unsigned int usb_id; /* composed via USB_ID() */
+- unsigned int interface; /* the interface to trigger register */
+-};
+-
+-#define REG_QUIRK_ENTRY(vendor, product, iface) \
+- { .usb_id = USB_ID(vendor, product), .interface = (iface) }
+-
+-static const struct registration_quirk registration_quirks[] = {
+- REG_QUIRK_ENTRY(0x0951, 0x16d8, 2), /* Kingston HyperX AMP */
+- REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */
+- REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */
+- REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */
+- REG_QUIRK_ENTRY(0x0ecb, 0x1f47, 2), /* JBL Quantum 800 */
+- REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */
+- REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2), /* JBL Quantum 600 */
+- REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */
+- { 0 } /* terminator */
+-};
+-
+-/* return true if skipping registration */
+-bool snd_usb_registration_quirk(struct snd_usb_audio *chip, int iface)
+-{
+- const struct registration_quirk *q;
+-
+- for (q = registration_quirks; q->usb_id; q++)
+- if (chip->usb_id == q->usb_id)
+- return iface != q->interface;
+-
+- /* Register as normal */
+- return false;
+-}
+-
+ /*
+ * driver behavior quirk flags
+ */
+@@ -1792,6 +1753,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
+ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x046d, 0x09a4, /* Logitech QuickCam E 3500 */
+ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR),
++ DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */
++ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */
+@@ -1821,8 +1784,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
+ QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */
+ QUIRK_FLAG_IGNORE_CTL_ERROR),
++ DEVICE_FLG(0x0711, 0x5800, /* MCT Trigger 5 USB-to-HDMI */
++ QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x074d, 0x3553, /* Outlaw RR2150 (Micronas UAC3553B) */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
++ DEVICE_FLG(0x0763, 0x2030, /* M-Audio Fast Track C400 */
++ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++ DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */
++ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */
+ QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */
+@@ -1833,6 +1802,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
+ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
++ DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */
++ QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++ DEVICE_FLG(0x1397, 0x0508, /* Behringer UMC204HD */
++ QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++ DEVICE_FLG(0x1397, 0x0509, /* Behringer UMC404HD */
++ QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x13e5, 0x0001, /* Serato Phono */
+ QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */
+@@ -1887,16 +1862,30 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
+ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
++ DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */
++ QUIRK_FLAG_SET_IFACE_FIRST),
++ DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */
++ QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */
+ QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
++ DEVICE_FLG(0x534d, 0x0021, /* MacroSilicon MS2100/MS2106 */
++ QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */
+ QUIRK_FLAG_ALIGN_TRANSFER),
+ DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
++ DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */
++ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++ DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */
++ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++ DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
++ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
++ DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */
++ QUIRK_FLAG_IFACE_SKIP_CLOSE),
+
+ /* Vendor matches */
+ VENDOR_FLG(0x045e, /* MS Lifecam */
+@@ -1932,6 +1921,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
+ QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x2ab6, /* T+A devices */
+ QUIRK_FLAG_DSD_RAW),
++ VENDOR_FLG(0x3336, /* HEM devices */
++ QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x3353, /* Khadas devices */
+ QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x3842, /* EVGA */
+diff --git a/sound/usb/quirks.h b/sound/usb/quirks.h
+index 31abb7cb01a52..f9bfd5ac7bab0 100644
+--- a/sound/usb/quirks.h
++++ b/sound/usb/quirks.h
+@@ -48,8 +48,6 @@ void snd_usb_audioformat_attributes_quirk(struct snd_usb_audio *chip,
+ struct audioformat *fp,
+ int stream);
+
+-bool snd_usb_registration_quirk(struct snd_usb_audio *chip, int iface);
+-
+ void snd_usb_init_quirk_flags(struct snd_usb_audio *chip);
+
+ #endif /* __USBAUDIO_QUIRKS_H */
+diff --git a/sound/usb/stream.c b/sound/usb/stream.c
+index ceb93d798182c..3d4add94e367d 100644
+--- a/sound/usb/stream.c
++++ b/sound/usb/stream.c
+@@ -495,6 +495,10 @@ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip,
+ return 0;
+ }
+ }
++
++ if (chip->card->registered)
++ chip->need_delayed_register = true;
++
+ /* look for an empty stream */
+ list_for_each_entry(as, &chip->pcm_list, list) {
+ if (as->fmt_type != fp->fmt_type)
+@@ -502,9 +506,6 @@ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip,
+ subs = &as->substream[stream];
+ if (subs->ep_num)
+ continue;
+- if (snd_device_get_state(chip->card, as->pcm) !=
+- SNDRV_DEV_BUILD)
+- chip->need_delayed_register = true;
+ err = snd_pcm_new_stream(as->pcm, stream, 1);
+ if (err < 0)
+ return err;
+@@ -1092,6 +1093,7 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip,
+ int i, altno, err, stream;
+ struct audioformat *fp = NULL;
+ struct snd_usb_power_domain *pd = NULL;
++ bool set_iface_first;
+ int num, protocol;
+
+ dev = chip->dev;
+@@ -1105,7 +1107,7 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip,
+ * Dallas DS4201 workaround: It presents 5 altsettings, but the last
+ * one misses syncpipe, and does not produce any sound.
+ */
+- if (chip->usb_id == USB_ID(0x04fa, 0x4201))
++ if (chip->usb_id == USB_ID(0x04fa, 0x4201) && num >= 4)
+ num = 4;
+
+ for (i = 0; i < num; i++) {
+@@ -1222,11 +1224,19 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip,
+ return err;
+ }
+
++ set_iface_first = false;
++ if (protocol == UAC_VERSION_1 ||
++ (chip->quirk_flags & QUIRK_FLAG_SET_IFACE_FIRST))
++ set_iface_first = true;
++
+ /* try to set the interface... */
+ usb_set_interface(chip->dev, iface_no, 0);
++ if (set_iface_first)
++ usb_set_interface(chip->dev, iface_no, altno);
+ snd_usb_init_pitch(chip, fp);
+ snd_usb_init_sample_rate(chip, fp, fp->rate_max);
+- usb_set_interface(chip->dev, iface_no, altno);
++ if (!set_iface_first)
++ usb_set_interface(chip->dev, iface_no, altno);
+ }
+ return 0;
+ }
+diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
+index 167834133b9bc..ec06f441e890f 100644
+--- a/sound/usb/usbaudio.h
++++ b/sound/usb/usbaudio.h
+@@ -8,7 +8,7 @@
+ */
+
+ /* handling of USB vendor/product ID pairs as 32-bit numbers */
+-#define USB_ID(vendor, product) (((vendor) << 16) | (product))
++#define USB_ID(vendor, product) (((unsigned int)(vendor) << 16) | (product))
+ #define USB_ID_VENDOR(id) ((id) >> 16)
+ #define USB_ID_PRODUCT(id) ((u16)(id))
+
+@@ -37,6 +37,7 @@ struct snd_usb_audio {
+ unsigned int quirk_flags;
+ unsigned int need_delayed_register:1; /* warn for delayed registration */
+ int num_interfaces;
++ int last_iface;
+ int num_suspended_intf;
+ int sample_rate_read_error;
+
+@@ -164,6 +165,12 @@ extern bool snd_usb_skip_validation;
+ * Support generic DSD raw U32_BE format
+ * QUIRK_FLAG_SET_IFACE_FIRST:
+ * Set up the interface at first like UAC1
++ * QUIRK_FLAG_GENERIC_IMPLICIT_FB
++ * Apply the generic implicit feedback sync mode (same as implicit_fb=1 option)
++ * QUIRK_FLAG_SKIP_IMPLICIT_FB
++ * Don't apply implicit feedback sync mode
++ * QUIRK_FLAG_IFACE_SKIP_CLOSE
++ * Don't closed interface during setting sample rate
+ */
+
+ #define QUIRK_FLAG_GET_SAMPLE_RATE (1U << 0)
+@@ -183,5 +190,8 @@ extern bool snd_usb_skip_validation;
+ #define QUIRK_FLAG_IGNORE_CTL_ERROR (1U << 14)
+ #define QUIRK_FLAG_DSD_RAW (1U << 15)
+ #define QUIRK_FLAG_SET_IFACE_FIRST (1U << 16)
++#define QUIRK_FLAG_GENERIC_IMPLICIT_FB (1U << 17)
++#define QUIRK_FLAG_SKIP_IMPLICIT_FB (1U << 18)
++#define QUIRK_FLAG_IFACE_SKIP_CLOSE (1U << 19)
+
+ #endif /* __USBAUDIO_H */
+diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
+index 378826312abe6..42add5df37fda 100644
+--- a/sound/x86/intel_hdmi_audio.c
++++ b/sound/x86/intel_hdmi_audio.c
+@@ -1261,7 +1261,7 @@ static int had_pcm_mmap(struct snd_pcm_substream *substream,
+ {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ return remap_pfn_range(vma, vma->vm_start,
+- substream->dma_buffer.addr >> PAGE_SHIFT,
++ substream->runtime->dma_addr >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+ }
+
+@@ -1665,7 +1665,7 @@ static void hdmi_lpe_audio_free(struct snd_card *card)
+ * This function is called when the i915 driver creates the
+ * hdmi-lpe-audio platform device.
+ */
+-static int hdmi_lpe_audio_probe(struct platform_device *pdev)
++static int __hdmi_lpe_audio_probe(struct platform_device *pdev)
+ {
+ struct snd_card *card;
+ struct snd_intelhad_card *card_ctx;
+@@ -1826,6 +1826,11 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
+ return 0;
+ }
+
++static int hdmi_lpe_audio_probe(struct platform_device *pdev)
++{
++ return snd_card_free_on_error(&pdev->dev, __hdmi_lpe_audio_probe(pdev));
++}
++
+ static const struct dev_pm_ops hdmi_lpe_audio_pm = {
+ SET_SYSTEM_SLEEP_PM_OPS(hdmi_lpe_audio_suspend, hdmi_lpe_audio_resume)
+ };
+diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
+index 506c06a6536fb..4cc88a642e106 100644
+--- a/tools/arch/parisc/include/uapi/asm/mman.h
++++ b/tools/arch/parisc/include/uapi/asm/mman.h
+@@ -1,20 +1,20 @@
+ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+ #ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+ #define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+-#define MADV_DODUMP 70
++#define MADV_DODUMP 17
+ #define MADV_DOFORK 11
+-#define MADV_DONTDUMP 69
++#define MADV_DONTDUMP 16
+ #define MADV_DONTFORK 10
+ #define MADV_DONTNEED 4
+ #define MADV_FREE 8
+-#define MADV_HUGEPAGE 67
+-#define MADV_MERGEABLE 65
+-#define MADV_NOHUGEPAGE 68
++#define MADV_HUGEPAGE 14
++#define MADV_MERGEABLE 12
++#define MADV_NOHUGEPAGE 15
+ #define MADV_NORMAL 0
+ #define MADV_RANDOM 1
+ #define MADV_REMOVE 9
+ #define MADV_SEQUENTIAL 2
+-#define MADV_UNMERGEABLE 66
++#define MADV_UNMERGEABLE 13
+ #define MADV_WILLNEED 3
+ #define MAP_ANONYMOUS 0x10
+ #define MAP_DENYWRITE 0x0800
+diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
+index d0ce5cfd3ac14..861451839cf2f 100644
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -14,7 +14,7 @@
+ * Defines x86 CPU feature bits
+ */
+ #define NCAPINTS 20 /* N 32-bit words worth of info */
+-#define NBUGINTS 1 /* N 32-bit bug flags */
++#define NBUGINTS 2 /* N 32-bit bug flags */
+
+ /*
+ * Note: If the comment begins with a quoted string, that string is used
+@@ -203,8 +203,8 @@
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+ /* FREE! ( 7*32+10) */
+ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
+ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
+ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+@@ -294,6 +294,13 @@
+ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
+ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
++#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
++#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
++#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
+@@ -313,6 +320,7 @@
+ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
+ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
+ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
+
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+@@ -436,5 +444,7 @@
+ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
++#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
++#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
+
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
+index 8f28fafa98b32..834a3b6d81e12 100644
+--- a/tools/arch/x86/include/asm/disabled-features.h
++++ b/tools/arch/x86/include/asm/disabled-features.h
+@@ -56,6 +56,25 @@
+ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
+ #endif
+
++#ifdef CONFIG_RETPOLINE
++# define DISABLE_RETPOLINE 0
++#else
++# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
++#endif
++
++#ifdef CONFIG_RETHUNK
++# define DISABLE_RETHUNK 0
++#else
++# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
++#endif
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
++# define DISABLE_UNRET 0
++#else
++# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
++#endif
++
+ /* Force disable because it's broken beyond repair */
+ #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+
+@@ -79,7 +98,7 @@
+ #define DISABLED_MASK8 0
+ #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+ #define DISABLED_MASK10 0
+-#define DISABLED_MASK11 0
++#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
+ #define DISABLED_MASK12 0
+ #define DISABLED_MASK13 0
+ #define DISABLED_MASK14 0
+diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
+index a7c413432b33d..2c0838ee3eaca 100644
+--- a/tools/arch/x86/include/asm/msr-index.h
++++ b/tools/arch/x86/include/asm/msr-index.h
+@@ -51,6 +51,8 @@
+ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
+ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+
+ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
+@@ -91,6 +93,7 @@
+ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
+ #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
+ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
+ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
+ #define ARCH_CAP_SSB_NO BIT(4) /*
+ * Not susceptible to Speculative Store Bypass
+@@ -114,6 +117,41 @@
+ * Not susceptible to
+ * TSX Async Abort (TAA) vulnerabilities.
+ */
++#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
++ * Not susceptible to SBDR and SSDP
++ * variants of Processor MMIO stale data
++ * vulnerabilities.
++ */
++#define ARCH_CAP_FBSDP_NO BIT(14) /*
++ * Not susceptible to FBSDP variant of
++ * Processor MMIO stale data
++ * vulnerabilities.
++ */
++#define ARCH_CAP_PSDP_NO BIT(15) /*
++ * Not susceptible to PSDP variant of
++ * Processor MMIO stale data
++ * vulnerabilities.
++ */
++#define ARCH_CAP_FB_CLEAR BIT(17) /*
++ * VERW clears CPU fill buffer
++ * even on MDS_NO CPUs.
++ */
++#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
++ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
++ * bit available to control VERW
++ * behavior.
++ */
++#define ARCH_CAP_RRSBA BIT(19) /*
++ * Indicates RET may use predictors
++ * other than the RSB. With eIBRS
++ * enabled predictions in kernel mode
++ * are restricted to targets in
++ * kernel.
++ */
++#define ARCH_CAP_PBRSB_NO BIT(24) /*
++ * Not susceptible to Post-Barrier
++ * Return Stack Buffer Predictions.
++ */
+
+ #define MSR_IA32_FLUSH_CMD 0x0000010b
+ #define L1D_FLUSH BIT(0) /*
+@@ -128,9 +166,10 @@
+ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
+ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+
+-/* SRBDS support */
+ #define MSR_IA32_MCU_OPT_CTRL 0x00000123
+-#define RNGDS_MITG_DIS BIT(0)
++#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
++#define RTM_ALLOW BIT(1) /* TSX development mode */
++#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
+
+ #define MSR_IA32_SYSENTER_CS 0x00000174
+ #define MSR_IA32_SYSENTER_ESP 0x00000175
+@@ -456,6 +495,11 @@
+ #define MSR_AMD64_CPUID_FN_1 0xc0011004
+ #define MSR_AMD64_LS_CFG 0xc0011020
+ #define MSR_AMD64_DC_CFG 0xc0011022
++
++#define MSR_AMD64_DE_CFG 0xc0011029
++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
++
+ #define MSR_AMD64_BU_CFG2 0xc001102a
+ #define MSR_AMD64_IBSFETCHCTL 0xc0011030
+ #define MSR_AMD64_IBSFETCHLINAD 0xc0011031
+@@ -489,6 +533,9 @@
+ /* Fam 17h MSRs */
+ #define MSR_F17H_IRPERF 0xc00000e9
+
++#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
++
+ /* Fam 16h MSRs */
+ #define MSR_F16H_L2I_PERF_CTL 0xc0010230
+ #define MSR_F16H_L2I_PERF_CTR 0xc0010231
+@@ -530,9 +577,6 @@
+ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+ #define FAM10H_MMIO_CONF_BASE_SHIFT 20
+ #define MSR_FAM10H_NODE_ID 0xc001100c
+-#define MSR_F10H_DECFG 0xc0011029
+-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1 0xc001001a
+diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
+index 4f1c4b0c29e98..9914bdf4fc9ec 100644
+--- a/tools/arch/x86/kcpuid/cpuid.csv
++++ b/tools/arch/x86/kcpuid/cpuid.csv
+@@ -184,8 +184,8 @@
+ 7, 0, EBX, 27, avx512er, AVX512 Exponent Reciproca instr
+ 7, 0, EBX, 28, avx512cd, AVX512 Conflict Detection instr
+ 7, 0, EBX, 29, sha, Intel Secure Hash Algorithm Extensions instr
+- 7, 0, EBX, 26, avx512bw, AVX512 Byte & Word instr
+- 7, 0, EBX, 28, avx512vl, AVX512 Vector Length Extentions (VL)
++ 7, 0, EBX, 30, avx512bw, AVX512 Byte & Word instr
++ 7, 0, EBX, 31, avx512vl, AVX512 Vector Length Extentions (VL)
+ 7, 0, ECX, 0, prefetchwt1, X
+ 7, 0, ECX, 1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions
+ 7, 0, ECX, 2, umip, User-mode Instruction Prevention
+diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
+index 797699462cd8e..8fd63a067308a 100644
+--- a/tools/arch/x86/lib/insn.c
++++ b/tools/arch/x86/lib/insn.c
+@@ -13,6 +13,7 @@
+ #endif
+ #include "../include/asm/inat.h" /* __ignore_sync_check__ */
+ #include "../include/asm/insn.h" /* __ignore_sync_check__ */
++#include "../include/asm-generic/unaligned.h" /* __ignore_sync_check__ */
+
+ #include <linux/errno.h>
+ #include <linux/kconfig.h>
+@@ -37,10 +38,10 @@
+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+ #define __get_next(t, insn) \
+- ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
++ ({ t r = get_unaligned((t *)(insn)->next_byte); (insn)->next_byte += sizeof(t); leXX_to_cpu(t, r); })
+
+ #define __peek_nbyte_next(t, insn, n) \
+- ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); })
++ ({ t r = get_unaligned((t *)(insn)->next_byte + n); leXX_to_cpu(t, r); })
+
+ #define get_next(t, insn) \
+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
+index 1cc9da6e29c79..59cf2343f3d90 100644
+--- a/tools/arch/x86/lib/memcpy_64.S
++++ b/tools/arch/x86/lib/memcpy_64.S
+@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
+ rep movsq
+ movl %edx, %ecx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy)
+ SYM_FUNC_END_ALIAS(__memcpy)
+ EXPORT_SYMBOL(memcpy)
+@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+ rep movsb
+- ret
++ RET
+ SYM_FUNC_END(memcpy_erms)
+
+ SYM_FUNC_START_LOCAL(memcpy_orig)
+@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq %r9, 1*8(%rdi)
+ movq %r10, -2*8(%rdi, %rdx)
+ movq %r11, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_16bytes:
+ cmpl $8, %edx
+@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movq -1*8(%rsi, %rdx), %r9
+ movq %r8, 0*8(%rdi)
+ movq %r9, -1*8(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_8bytes:
+ cmpl $4, %edx
+@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movl -4(%rsi, %rdx), %r8d
+ movl %ecx, (%rdi)
+ movl %r8d, -4(%rdi, %rdx)
+- retq
++ RET
+ .p2align 4
+ .Lless_3bytes:
+ subl $1, %edx
+@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ movb %cl, (%rdi)
+
+ .Lend:
+- retq
++ RET
+ SYM_FUNC_END(memcpy_orig)
+
+ .popsection
+diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
+index 9827ae267f96e..d624f2bc42f16 100644
+--- a/tools/arch/x86/lib/memset_64.S
++++ b/tools/arch/x86/lib/memset_64.S
+@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
+ movl %edx,%ecx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(__memset)
+ SYM_FUNC_END_ALIAS(memset)
+ EXPORT_SYMBOL(memset)
+@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
+ movq %rdx,%rcx
+ rep stosb
+ movq %r9,%rax
+- ret
++ RET
+ SYM_FUNC_END(memset_erms)
+
+ SYM_FUNC_START_LOCAL(memset_orig)
+@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
+
+ .Lende:
+ movq %r10,%rax
+- ret
++ RET
+
+ .Lbad_alignment:
+ cmpq $7,%rdx
+diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh
+index 6183b36c68466..1603801cf1264 100755
+--- a/tools/bootconfig/scripts/ftrace2bconf.sh
++++ b/tools/bootconfig/scripts/ftrace2bconf.sh
+@@ -93,7 +93,7 @@ referred_vars() {
+ }
+
+ event_is_enabled() { # enable-file
+- test -f $1 & grep -q "1" $1
++ test -f $1 && grep -q "1" $1
+ }
+
+ per_event_options() { # event-dir
+diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
+index f68e2e9eef8b2..a2c484c243f5d 100755
+--- a/tools/bootconfig/test-bootconfig.sh
++++ b/tools/bootconfig/test-bootconfig.sh
+@@ -87,10 +87,14 @@ xfail grep -i "error" $OUTFILE
+
+ echo "Max node number check"
+
+-echo -n > $TEMPCONF
+-for i in `seq 1 1024` ; do
+- echo "node$i" >> $TEMPCONF
+-done
++awk '
++BEGIN {
++ for (i = 0; i < 26; i += 1)
++ printf("%c\n", 65 + i % 26)
++ for (i = 26; i < 8192; i += 1)
++ printf("%c%c%c\n", 65 + i % 26, 65 + (i / 26) % 26, 65 + (i / 26 / 26))
++}
++' > $TEMPCONF
+ xpass $BOOTCONF -a $TEMPCONF $INITRD
+
+ echo "badnode" >> $TEMPCONF
+diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
+index b11cfc86a3d02..664601ab1705a 100644
+--- a/tools/bpf/Makefile
++++ b/tools/bpf/Makefile
+@@ -34,7 +34,7 @@ else
+ endif
+
+ FEATURE_USER = .bpf
+-FEATURE_TESTS = libbfd disassembler-four-args
++FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
+ FEATURE_DISPLAY = libbfd disassembler-four-args
+
+ check_feat := 1
+@@ -56,6 +56,9 @@ endif
+ ifeq ($(feature-disassembler-four-args), 1)
+ CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+ endif
++ifeq ($(feature-disassembler-init-styled), 1)
++CFLAGS += -DDISASM_INIT_STYLED
++endif
+
+ $(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
+ $(QUIET_BISON)$(YACC) -o $@ -d $<
+diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
+index c8ae958047281..a90a5d110f925 100644
+--- a/tools/bpf/bpf_jit_disasm.c
++++ b/tools/bpf/bpf_jit_disasm.c
+@@ -28,6 +28,7 @@
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <limits.h>
++#include <tools/dis-asm-compat.h>
+
+ #define CMD_ACTION_SIZE_BUFFER 10
+ #define CMD_ACTION_READ_ALL 3
+@@ -64,7 +65,9 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
+ assert(bfdf);
+ assert(bfd_check_format(bfdf, bfd_object));
+
+- init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
++ init_disassemble_info_compat(&info, stdout,
++ (fprintf_ftype) fprintf,
++ fprintf_styled);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+ info.buffer = image;
+diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
+index c49487905cebe..f89929c7038d5 100644
+--- a/tools/bpf/bpftool/Documentation/Makefile
++++ b/tools/bpf/bpftool/Documentation/Makefile
+@@ -1,6 +1,5 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ include ../../../scripts/Makefile.include
+-include ../../../scripts/utilities.mak
+
+ INSTALL ?= install
+ RM ?= rm -f
+diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+index 88b28aa7431f6..4425d942dd39a 100644
+--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
++++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+@@ -13,7 +13,7 @@ SYNOPSIS
+ **bpftool** [*OPTIONS*] **btf** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
+- { **-B** | **--base-btf** } }
++ { **-B** | **--base-btf** } }
+
+ *COMMANDS* := { **dump** | **help** }
+
+diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+index 3e4395eede4f7..13a217a2503d8 100644
+--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
++++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+@@ -13,7 +13,7 @@ SYNOPSIS
+ **bpftool** [*OPTIONS*] **cgroup** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+- { **-f** | **--bpffs** } }
++ { **-f** | **--bpffs** } }
+
+ *COMMANDS* :=
+ { **show** | **list** | **tree** | **attach** | **detach** | **help** }
+diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+index 2ef2f2df02799..2a137f8a4cea0 100644
+--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
++++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+@@ -13,7 +13,7 @@ SYNOPSIS
+ **bpftool** [*OPTIONS*] **gen** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+- { **-L** | **--use-loader** } }
++ { **-L** | **--use-loader** } }
+
+ *COMMAND* := { **object** | **skeleton** | **help** }
+
+diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
+index 0de90f086238c..9434349636a5e 100644
+--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
++++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
+@@ -13,7 +13,7 @@ SYNOPSIS
+ **bpftool** [*OPTIONS*] **link** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+- { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
++ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
+
+ *COMMANDS* := { **show** | **list** | **pin** | **help** }
+
+diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
+index d0c4abe08abab..1445cadc15d4c 100644
+--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
++++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
+@@ -13,11 +13,11 @@ SYNOPSIS
+ **bpftool** [*OPTIONS*] **map** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+- { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
++ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
+
+ *COMMANDS* :=
+- { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
+- | **delete** | **pin** | **help** }
++ { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
++ **delete** | **pin** | **help** }
+
+ MAP COMMANDS
+ =============
+diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+index 91608cb7e44a0..f27265bd589b4 100644
+--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
++++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+@@ -13,12 +13,12 @@ SYNOPSIS
+ **bpftool** [*OPTIONS*] **prog** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+- { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+- { **-L** | **--use-loader** } }
++ { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
++ { **-L** | **--use-loader** } }
+
+ *COMMANDS* :=
+- { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
+- | **loadall** | **help** }
++ { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** |
++ **loadall** | **help** }
+
+ PROG COMMANDS
+ =============
+diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
+index bb23f55bb05ad..8ac86565c501e 100644
+--- a/tools/bpf/bpftool/Documentation/bpftool.rst
++++ b/tools/bpf/bpftool/Documentation/bpftool.rst
+@@ -19,14 +19,14 @@ SYNOPSIS
+ *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
+
+ *OPTIONS* := { { **-V** | **--version** } |
+- { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
++ { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+
+ *MAP-COMMANDS* :=
+ { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+- **delete** | **pin** | **event_pipe** | **help** }
++ **delete** | **pin** | **event_pipe** | **help** }
+
+ *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
+- **load** | **attach** | **detach** | **help** }
++ **load** | **attach** | **detach** | **help** }
+
+ *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
+
+diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
+index d73232be1e991..11266c78557d7 100644
+--- a/tools/bpf/bpftool/Makefile
++++ b/tools/bpf/bpftool/Makefile
+@@ -1,6 +1,5 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ include ../../scripts/Makefile.include
+-include ../../scripts/utilities.mak
+
+ ifeq ($(srctree),)
+ srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+@@ -77,7 +76,7 @@ INSTALL ?= install
+ RM ?= rm -f
+
+ FEATURE_USER = .bpftool
+-FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
++FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled reallocarray zlib libcap \
+ clang-bpf-co-re
+ FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
+ clang-bpf-co-re
+@@ -112,6 +111,9 @@ ifeq ($(feature-libcap), 1)
+ CFLAGS += -DUSE_LIBCAP
+ LIBS += -lcap
+ endif
++ifeq ($(feature-disassembler-init-styled), 1)
++ CFLAGS += -DDISASM_INIT_STYLED
++endif
+
+ include $(wildcard $(OUTPUT)*.d)
+
+diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
+index 9c25286a5c737..70fb26a3dfa8d 100644
+--- a/tools/bpf/bpftool/btf_dumper.c
++++ b/tools/bpf/bpftool/btf_dumper.c
+@@ -418,7 +418,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
+ *(char *)data);
+ break;
+ case BTF_INT_BOOL:
+- jsonw_bool(jw, *(int *)data);
++ jsonw_bool(jw, *(bool *)data);
+ break;
+ default:
+ /* shouldn't happen */
+diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
+index d42d930a3ec4d..e4c65d34fe74f 100644
+--- a/tools/bpf/bpftool/common.c
++++ b/tools/bpf/bpftool/common.c
+@@ -278,6 +278,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
+ int err;
+ int fd;
+
++ if (!REQ_ARGS(3))
++ return -EINVAL;
++
+ fd = get_fd(&argc, &argv);
+ if (fd < 0)
+ return fd;
+diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
+index 7f36385aa9e2e..0c9544c6d3020 100644
+--- a/tools/bpf/bpftool/feature.c
++++ b/tools/bpf/bpftool/feature.c
+@@ -135,12 +135,12 @@ static void print_end_section(void)
+
+ /* Probing functions */
+
+-static int read_procfs(const char *path)
++static long read_procfs(const char *path)
+ {
+ char *endptr, *line = NULL;
+ size_t len = 0;
+ FILE *fd;
+- int res;
++ long res;
+
+ fd = fopen(path, "r");
+ if (!fd)
+@@ -162,7 +162,7 @@ static int read_procfs(const char *path)
+
+ static void probe_unprivileged_disabled(void)
+ {
+- int res;
++ long res;
+
+ /* No support for C-style ouptut */
+
+@@ -181,14 +181,14 @@ static void probe_unprivileged_disabled(void)
+ printf("Unable to retrieve required privileges for bpf() syscall\n");
+ break;
+ default:
+- printf("bpf() syscall restriction has unknown value %d\n", res);
++ printf("bpf() syscall restriction has unknown value %ld\n", res);
+ }
+ }
+ }
+
+ static void probe_jit_enable(void)
+ {
+- int res;
++ long res;
+
+ /* No support for C-style ouptut */
+
+@@ -210,7 +210,7 @@ static void probe_jit_enable(void)
+ printf("Unable to retrieve JIT-compiler status\n");
+ break;
+ default:
+- printf("JIT-compiler status has unknown value %d\n",
++ printf("JIT-compiler status has unknown value %ld\n",
+ res);
+ }
+ }
+@@ -218,7 +218,7 @@ static void probe_jit_enable(void)
+
+ static void probe_jit_harden(void)
+ {
+- int res;
++ long res;
+
+ /* No support for C-style ouptut */
+
+@@ -240,7 +240,7 @@ static void probe_jit_harden(void)
+ printf("Unable to retrieve JIT hardening status\n");
+ break;
+ default:
+- printf("JIT hardening status has unknown value %d\n",
++ printf("JIT hardening status has unknown value %ld\n",
+ res);
+ }
+ }
+@@ -248,7 +248,7 @@ static void probe_jit_harden(void)
+
+ static void probe_jit_kallsyms(void)
+ {
+- int res;
++ long res;
+
+ /* No support for C-style ouptut */
+
+@@ -267,14 +267,14 @@ static void probe_jit_kallsyms(void)
+ printf("Unable to retrieve JIT kallsyms export status\n");
+ break;
+ default:
+- printf("JIT kallsyms exports status has unknown value %d\n", res);
++ printf("JIT kallsyms exports status has unknown value %ld\n", res);
+ }
+ }
+ }
+
+ static void probe_jit_limit(void)
+ {
+- int res;
++ long res;
+
+ /* No support for C-style ouptut */
+
+@@ -287,7 +287,7 @@ static void probe_jit_limit(void)
+ printf("Unable to retrieve global memory limit for JIT compiler for unprivileged users\n");
+ break;
+ default:
+- printf("Global memory limit for JIT compiler for unprivileged users is %d bytes\n", res);
++ printf("Global memory limit for JIT compiler for unprivileged users is %ld bytes\n", res);
+ }
+ }
+ }
+diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
+index d40d92bbf0e48..07fa502a4ac15 100644
+--- a/tools/bpf/bpftool/gen.c
++++ b/tools/bpf/bpftool/gen.c
+@@ -870,7 +870,6 @@ static int do_skeleton(int argc, char **argv)
+ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
+ if (!s) \n\
+ goto err; \n\
+- obj->skeleton = s; \n\
+ \n\
+ s->sz = sizeof(*s); \n\
+ s->name = \"%1$s\"; \n\
+@@ -955,6 +954,7 @@ static int do_skeleton(int argc, char **argv)
+ \n\
+ \"; \n\
+ \n\
++ obj->skeleton = s; \n\
+ return 0; \n\
+ err: \n\
+ bpf_object__destroy_skeleton(s); \n\
+diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
+index 24734f2249d6e..aaf99a0168c90 100644
+--- a/tools/bpf/bpftool/jit_disasm.c
++++ b/tools/bpf/bpftool/jit_disasm.c
+@@ -24,6 +24,7 @@
+ #include <sys/stat.h>
+ #include <limits.h>
+ #include <bpf/libbpf.h>
++#include <tools/dis-asm-compat.h>
+
+ #include "json_writer.h"
+ #include "main.h"
+@@ -39,15 +40,12 @@ static void get_exec_path(char *tpath, size_t size)
+ }
+
+ static int oper_count;
+-static int fprintf_json(void *out, const char *fmt, ...)
++static int printf_json(void *out, const char *fmt, va_list ap)
+ {
+- va_list ap;
+ char *s;
+ int err;
+
+- va_start(ap, fmt);
+ err = vasprintf(&s, fmt, ap);
+- va_end(ap);
+ if (err < 0)
+ return -1;
+
+@@ -73,6 +71,32 @@ static int fprintf_json(void *out, const char *fmt, ...)
+ return 0;
+ }
+
++static int fprintf_json(void *out, const char *fmt, ...)
++{
++ va_list ap;
++ int r;
++
++ va_start(ap, fmt);
++ r = printf_json(out, fmt, ap);
++ va_end(ap);
++
++ return r;
++}
++
++static int fprintf_json_styled(void *out,
++ enum disassembler_style style __maybe_unused,
++ const char *fmt, ...)
++{
++ va_list ap;
++ int r;
++
++ va_start(ap, fmt);
++ r = printf_json(out, fmt, ap);
++ va_end(ap);
++
++ return r;
++}
++
+ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+ const char *arch, const char *disassembler_options,
+ const struct btf *btf,
+@@ -99,11 +123,13 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+ assert(bfd_check_format(bfdf, bfd_object));
+
+ if (json_output)
+- init_disassemble_info(&info, stdout,
+- (fprintf_ftype) fprintf_json);
++ init_disassemble_info_compat(&info, stdout,
++ (fprintf_ftype) fprintf_json,
++ fprintf_json_styled);
+ else
+- init_disassemble_info(&info, stdout,
+- (fprintf_ftype) fprintf);
++ init_disassemble_info_compat(&info, stdout,
++ (fprintf_ftype) fprintf,
++ fprintf_styled);
+
+ /* Update architecture info for offload. */
+ if (arch) {
+diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
+index 7fea83bedf488..bca5dd0a59e34 100644
+--- a/tools/bpf/bpftool/json_writer.c
++++ b/tools/bpf/bpftool/json_writer.c
+@@ -80,9 +80,6 @@ static void jsonw_puts(json_writer_t *self, const char *str)
+ case '"':
+ fputs("\\\"", self->out);
+ break;
+- case '\'':
+- fputs("\\\'", self->out);
+- break;
+ default:
+ putc(*str, self->out);
+ }
+diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
+index 02eaaf065f651..b70c023f3a571 100644
+--- a/tools/bpf/bpftool/main.c
++++ b/tools/bpf/bpftool/main.c
+@@ -402,6 +402,18 @@ int main(int argc, char **argv)
+ };
+ int opt, ret;
+
++ setlinebuf(stdout);
++
++#ifdef USE_LIBCAP
++ /* Libcap < 2.63 hooks before main() to compute the number of
++ * capabilities of the running kernel, and doing so it calls prctl()
++ * which may fail and set errno to non-zero.
++ * Let's reset errno to make sure this does not interfere with the
++ * batch mode.
++ */
++ errno = 0;
++#endif
++
+ last_do_help = do_help;
+ pretty_output = false;
+ json_output = false;
+diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
+index 407071d54ab1c..72ef9ddae2609 100644
+--- a/tools/bpf/bpftool/map.c
++++ b/tools/bpf/bpftool/map.c
+@@ -1042,11 +1042,9 @@ static void print_key_value(struct bpf_map_info *info, void *key,
+ json_writer_t *btf_wtr;
+ struct btf *btf;
+
+- btf = btf__load_from_kernel_by_id(info->btf_id);
+- if (libbpf_get_error(btf)) {
+- p_err("failed to get btf");
++ btf = get_map_kv_btf(info);
++ if (libbpf_get_error(btf))
+ return;
+- }
+
+ if (json_output) {
+ print_entry_json(info, key, value, btf);
+diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
+index 9c3e343b7d872..bdd4d3b12f6c0 100644
+--- a/tools/bpf/bpftool/prog.c
++++ b/tools/bpf/bpftool/prog.c
+@@ -308,18 +308,12 @@ static void show_prog_metadata(int fd, __u32 num_maps)
+ if (printed_header)
+ jsonw_end_object(json_wtr);
+ } else {
+- json_writer_t *btf_wtr = jsonw_new(stdout);
++ json_writer_t *btf_wtr;
+ struct btf_dumper d = {
+ .btf = btf,
+- .jw = btf_wtr,
+ .is_plain_text = true,
+ };
+
+- if (!btf_wtr) {
+- p_err("jsonw alloc failed");
+- goto out_free;
+- }
+-
+ for (i = 0; i < vlen; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ name = btf__name_by_offset(btf, t_var->name_off);
+@@ -329,6 +323,14 @@ static void show_prog_metadata(int fd, __u32 num_maps)
+
+ if (!printed_header) {
+ printf("\tmetadata:");
++
++ btf_wtr = jsonw_new(stdout);
++ if (!btf_wtr) {
++ p_err("jsonw alloc failed");
++ goto out_free;
++ }
++ d.jw = btf_wtr,
++
+ printed_header = true;
+ }
+
+@@ -627,8 +629,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
+ char func_sig[1024];
+ unsigned char *buf;
+ __u32 member_len;
++ int fd, err = -1;
+ ssize_t n;
+- int fd;
+
+ if (mode == DUMP_JITED) {
+ if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
+@@ -667,7 +669,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
+ if (fd < 0) {
+ p_err("can't open file %s: %s", filepath,
+ strerror(errno));
+- return -1;
++ goto exit_free;
+ }
+
+ n = write(fd, buf, member_len);
+@@ -675,7 +677,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
+ if (n != (ssize_t)member_len) {
+ p_err("error writing output file: %s",
+ n < 0 ? strerror(errno) : "short write");
+- return -1;
++ goto exit_free;
+ }
+
+ if (json_output)
+@@ -689,7 +691,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
+ info->netns_ino,
+ &disasm_opt);
+ if (!name)
+- return -1;
++ goto exit_free;
+ }
+
+ if (info->nr_jited_func_lens && info->jited_func_lens) {
+@@ -784,9 +786,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
+ kernel_syms_destroy(&dd);
+ }
+
+- btf__free(btf);
++ err = 0;
+
+- return 0;
++exit_free:
++ btf__free(btf);
++ bpf_prog_linfo__free(prog_linfo);
++ return err;
+ }
+
+ static int do_dump(int argc, char **argv)
+@@ -2059,10 +2064,38 @@ static void profile_close_perf_events(struct profiler_bpf *obj)
+ profile_perf_event_cnt = 0;
+ }
+
++static int profile_open_perf_event(int mid, int cpu, int map_fd)
++{
++ int pmu_fd;
++
++ pmu_fd = syscall(__NR_perf_event_open, &metrics[mid].attr,
++ -1 /*pid*/, cpu, -1 /*group_fd*/, 0);
++ if (pmu_fd < 0) {
++ if (errno == ENODEV) {
++ p_info("cpu %d may be offline, skip %s profiling.",
++ cpu, metrics[mid].name);
++ profile_perf_event_cnt++;
++ return 0;
++ }
++ return -1;
++ }
++
++ if (bpf_map_update_elem(map_fd,
++ &profile_perf_event_cnt,
++ &pmu_fd, BPF_ANY) ||
++ ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
++ close(pmu_fd);
++ return -1;
++ }
++
++ profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
++ return 0;
++}
++
+ static int profile_open_perf_events(struct profiler_bpf *obj)
+ {
+ unsigned int cpu, m;
+- int map_fd, pmu_fd;
++ int map_fd;
+
+ profile_perf_events = calloc(
+ sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
+@@ -2081,17 +2114,11 @@ static int profile_open_perf_events(struct profiler_bpf *obj)
+ if (!metrics[m].selected)
+ continue;
+ for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
+- pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr,
+- -1/*pid*/, cpu, -1/*group_fd*/, 0);
+- if (pmu_fd < 0 ||
+- bpf_map_update_elem(map_fd, &profile_perf_event_cnt,
+- &pmu_fd, BPF_ANY) ||
+- ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
++ if (profile_open_perf_event(m, cpu, map_fd)) {
+ p_err("failed to create event %s on cpu %d",
+ metrics[m].name, cpu);
+ return -1;
+ }
+- profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
+ }
+ }
+ return 0;
+diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
+index ce5b65e07ab10..2f80edc682f11 100644
+--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
++++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
+@@ -4,6 +4,12 @@
+ #include <bpf/bpf_helpers.h>
+ #include <bpf/bpf_tracing.h>
+
++struct bpf_perf_event_value___local {
++ __u64 counter;
++ __u64 enabled;
++ __u64 running;
++} __attribute__((preserve_access_index));
++
+ /* map of perf event fds, num_cpu * num_metric entries */
+ struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+@@ -15,14 +21,14 @@ struct {
+ struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+- __uint(value_size, sizeof(struct bpf_perf_event_value));
++ __uint(value_size, sizeof(struct bpf_perf_event_value___local));
+ } fentry_readings SEC(".maps");
+
+ /* accumulated readings */
+ struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+- __uint(value_size, sizeof(struct bpf_perf_event_value));
++ __uint(value_size, sizeof(struct bpf_perf_event_value___local));
+ } accum_readings SEC(".maps");
+
+ /* sample counts, one per cpu */
+@@ -39,7 +45,7 @@ const volatile __u32 num_metric = 1;
+ SEC("fentry/XXX")
+ int BPF_PROG(fentry_XXX)
+ {
+- struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
++ struct bpf_perf_event_value___local *ptrs[MAX_NUM_MATRICS];
+ u32 key = bpf_get_smp_processor_id();
+ u32 i;
+
+@@ -53,10 +59,10 @@ int BPF_PROG(fentry_XXX)
+ }
+
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+- struct bpf_perf_event_value reading;
++ struct bpf_perf_event_value___local reading;
+ int err;
+
+- err = bpf_perf_event_read_value(&events, key, &reading,
++ err = bpf_perf_event_read_value(&events, key, (void *)&reading,
+ sizeof(reading));
+ if (err)
+ return 0;
+@@ -68,14 +74,14 @@ int BPF_PROG(fentry_XXX)
+ }
+
+ static inline void
+-fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
++fexit_update_maps(u32 id, struct bpf_perf_event_value___local *after)
+ {
+- struct bpf_perf_event_value *before, diff;
++ struct bpf_perf_event_value___local *before, diff;
+
+ before = bpf_map_lookup_elem(&fentry_readings, &id);
+ /* only account samples with a valid fentry_reading */
+ if (before && before->counter) {
+- struct bpf_perf_event_value *accum;
++ struct bpf_perf_event_value___local *accum;
+
+ diff.counter = after->counter - before->counter;
+ diff.enabled = after->enabled - before->enabled;
+@@ -93,7 +99,7 @@ fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
+ SEC("fexit/XXX")
+ int BPF_PROG(fexit_XXX)
+ {
+- struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
++ struct bpf_perf_event_value___local readings[MAX_NUM_MATRICS];
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 i, zero = 0;
+ int err;
+@@ -102,7 +108,8 @@ int BPF_PROG(fexit_XXX)
+ /* read all events before updating the maps, to reduce error */
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
+- readings + i, sizeof(*readings));
++ (void *)(readings + i),
++ sizeof(*readings));
+ if (err)
+ return 0;
+ }
+diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
+index f1f32e21d5cd0..b91c62d0a7d62 100644
+--- a/tools/bpf/bpftool/xlated_dumper.c
++++ b/tools/bpf/bpftool/xlated_dumper.c
+@@ -369,8 +369,15 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+ struct bpf_insn *insn_start = buf_start;
+ struct bpf_insn *insn_end = buf_end;
+ struct bpf_insn *cur = insn_start;
++ bool double_insn = false;
+
+ for (; cur <= insn_end; cur++) {
++ if (double_insn) {
++ double_insn = false;
++ continue;
++ }
++ double_insn = cur->code == (BPF_LD | BPF_IMM | BPF_DW);
++
+ printf("% 4d: ", (int)(cur - insn_start + start_idx));
+ print_bpf_insn(&cbs, cur, true);
+ if (cur != insn_end)
+diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
+index bb9fa8de7e625..af9f9d3534c96 100644
+--- a/tools/bpf/resolve_btfids/Makefile
++++ b/tools/bpf/resolve_btfids/Makefile
+@@ -9,7 +9,11 @@ ifeq ($(V),1)
+ msg =
+ else
+ Q = @
+- msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
++ ifeq ($(silent),1)
++ msg =
++ else
++ msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
++ endif
+ MAKEFLAGS=--no-print-directory
+ endif
+
+diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
+index de6365b53c9ca..45e0d640618ac 100644
+--- a/tools/bpf/resolve_btfids/main.c
++++ b/tools/bpf/resolve_btfids/main.c
+@@ -166,7 +166,7 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
+ return NULL;
+ }
+
+-static struct btf_id*
++static struct btf_id *
+ btf_id__add(struct rb_root *root, char *name, bool unique)
+ {
+ struct rb_node **p = &root->rb_node;
+@@ -720,7 +720,8 @@ int main(int argc, const char **argv)
+ if (no_fail)
+ return 0;
+ pr_err("FAILED to find needed sections\n");
+- return -1;
++ err = 0;
++ goto out;
+ }
+
+ if (symbols_collect(&obj))
+diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
+index 3dd2f68366f95..f027281f0a7e2 100644
+--- a/tools/build/Makefile.feature
++++ b/tools/build/Makefile.feature
+@@ -48,7 +48,6 @@ FEATURE_TESTS_BASIC := \
+ numa_num_possible_cpus \
+ libperl \
+ libpython \
+- libpython-version \
+ libslang \
+ libslang-include-subdir \
+ libtraceevent \
+@@ -70,6 +69,7 @@ FEATURE_TESTS_BASIC := \
+ libaio \
+ libzstd \
+ disassembler-four-args \
++ disassembler-init-styled \
+ file-handle
+
+ # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
+@@ -98,6 +98,7 @@ FEATURE_TESTS_EXTRA := \
+ llvm-version \
+ clang \
+ libbpf \
++ libbpf-btf__load_from_kernel_by_id \
+ libpfm4 \
+ libdebuginfod \
+ clang-bpf-co-re
+diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
+index eff55d287db1f..aa3b0d75e44b7 100644
+--- a/tools/build/feature/Makefile
++++ b/tools/build/feature/Makefile
+@@ -18,6 +18,7 @@ FILES= \
+ test-libbfd.bin \
+ test-libbfd-buildid.bin \
+ test-disassembler-four-args.bin \
++ test-disassembler-init-styled.bin \
+ test-reallocarray.bin \
+ test-libbfd-liberty.bin \
+ test-libbfd-liberty-z.bin \
+@@ -32,7 +33,6 @@ FILES= \
+ test-numa_num_possible_cpus.bin \
+ test-libperl.bin \
+ test-libpython.bin \
+- test-libpython-version.bin \
+ test-libslang.bin \
+ test-libslang-include-subdir.bin \
+ test-libtraceevent.bin \
+@@ -57,6 +57,7 @@ FILES= \
+ test-lzma.bin \
+ test-bpf.bin \
+ test-libbpf.bin \
++ test-libbpf-btf__load_from_kernel_by_id.bin \
+ test-get_cpuid.bin \
+ test-sdt.bin \
+ test-cxx.bin \
+@@ -214,18 +215,22 @@ strip-libs = $(filter-out -l%,$(1))
+ PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
+ PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
+ PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
+-PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
++PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+ FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+
++ifeq ($(CC_NO_CLANG), 0)
++ PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
++ PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
++ PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
++ FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro
++endif
++
+ $(OUTPUT)test-libperl.bin:
+ $(BUILD) $(FLAGS_PERL_EMBED)
+
+ $(OUTPUT)test-libpython.bin:
+ $(BUILD) $(FLAGS_PYTHON_EMBED)
+
+-$(OUTPUT)test-libpython-version.bin:
+- $(BUILD)
+-
+ $(OUTPUT)test-libbfd.bin:
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+
+@@ -235,6 +240,9 @@ $(OUTPUT)test-libbfd-buildid.bin:
+ $(OUTPUT)test-disassembler-four-args.bin:
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
+
++$(OUTPUT)test-disassembler-init-styled.bin:
++ $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
++
+ $(OUTPUT)test-reallocarray.bin:
+ $(BUILD)
+
+@@ -280,6 +288,9 @@ $(OUTPUT)test-bpf.bin:
+ $(OUTPUT)test-libbpf.bin:
+ $(BUILD) -lbpf
+
++$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
++ $(BUILD) -lbpf
++
+ $(OUTPUT)test-sdt.bin:
+ $(BUILD)
+
+diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
+index 9204395272912..957c02c7b163b 100644
+--- a/tools/build/feature/test-all.c
++++ b/tools/build/feature/test-all.c
+@@ -14,10 +14,6 @@
+ # include "test-libpython.c"
+ #undef main
+
+-#define main main_test_libpython_version
+-# include "test-libpython-version.c"
+-#undef main
+-
+ #define main main_test_libperl
+ # include "test-libperl.c"
+ #undef main
+@@ -170,6 +166,10 @@
+ # include "test-disassembler-four-args.c"
+ #undef main
+
++#define main main_test_disassembler_init_styled
++# include "test-disassembler-init-styled.c"
++#undef main
++
+ #define main main_test_libzstd
+ # include "test-libzstd.c"
+ #undef main
+@@ -177,7 +177,6 @@
+ int main(int argc, char *argv[])
+ {
+ main_test_libpython();
+- main_test_libpython_version();
+ main_test_libperl();
+ main_test_hello();
+ main_test_libelf();
+@@ -200,7 +199,6 @@ int main(int argc, char *argv[])
+ main_test_timerfd();
+ main_test_stackprotector_all();
+ main_test_libdw_dwarf_unwind();
+- main_test_sync_compare_and_swap(argc, argv);
+ main_test_zlib();
+ main_test_pthread_attr_setaffinity_np();
+ main_test_pthread_barrier();
+diff --git a/tools/build/feature/test-disassembler-init-styled.c b/tools/build/feature/test-disassembler-init-styled.c
+new file mode 100644
+index 0000000000000..f1ce0ec3bee9d
+--- /dev/null
++++ b/tools/build/feature/test-disassembler-init-styled.c
+@@ -0,0 +1,13 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <stdio.h>
++#include <dis-asm.h>
++
++int main(void)
++{
++ struct disassemble_info info;
++
++ init_disassemble_info(&info, stdout,
++ NULL, NULL);
++
++ return 0;
++}
+diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
+new file mode 100644
+index 0000000000000..a17647f7d5a43
+--- /dev/null
++++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
+@@ -0,0 +1,8 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <bpf/btf.h>
++
++int main(void)
++{
++ btf__load_from_kernel_by_id(20151128);
++ return 0;
++}
+diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c
+index a98174e0569c8..bc34a5bbb5049 100644
+--- a/tools/build/feature/test-libcrypto.c
++++ b/tools/build/feature/test-libcrypto.c
+@@ -1,16 +1,23 @@
+ // SPDX-License-Identifier: GPL-2.0
++#include <openssl/evp.h>
+ #include <openssl/sha.h>
+ #include <openssl/md5.h>
+
+ int main(void)
+ {
+- MD5_CTX context;
++ EVP_MD_CTX *mdctx;
+ unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
+ unsigned char dat[] = "12345";
++ unsigned int digest_len;
+
+- MD5_Init(&context);
+- MD5_Update(&context, &dat[0], sizeof(dat));
+- MD5_Final(&md[0], &context);
++ mdctx = EVP_MD_CTX_new();
++ if (!mdctx)
++ return 0;
++
++ EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
++ EVP_DigestUpdate(mdctx, &dat[0], sizeof(dat));
++ EVP_DigestFinal_ex(mdctx, &md[0], &digest_len);
++ EVP_MD_CTX_free(mdctx);
+
+ SHA1(&dat[0], sizeof(dat), &md[0]);
+
+diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c
+deleted file mode 100644
+index 47714b942d4d3..0000000000000
+--- a/tools/build/feature/test-libpython-version.c
++++ /dev/null
+@@ -1,11 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-#include <Python.h>
+-
+-#if PY_VERSION_HEX >= 0x03000000
+- #error
+-#endif
+-
+-int main(void)
+-{
+- return 0;
+-}
+diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
+index a2b233fdb572e..6670199909822 100644
+--- a/tools/gpio/gpio-event-mon.c
++++ b/tools/gpio/gpio-event-mon.c
+@@ -86,6 +86,7 @@ int monitor_device(const char *device_name,
+ gpiotools_test_bit(values.bits, i));
+ }
+
++ i = 0;
+ while (1) {
+ struct gpio_v2_line_event event;
+
+diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
+index c61d061247e17..52a0be45410c9 100644
+--- a/tools/gpio/lsgpio.c
++++ b/tools/gpio/lsgpio.c
+@@ -94,7 +94,7 @@ static void print_attributes(struct gpio_v2_line_info *info)
+ for (i = 0; i < info->num_attrs; i++) {
+ if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE)
+ fprintf(stdout, ", debounce_period=%dusec",
+- info->attrs[0].debounce_period_us);
++ info->attrs[i].debounce_period_us);
+ }
+ }
+
+diff --git a/tools/hv/vmbus_testing b/tools/hv/vmbus_testing
+index e7212903dd1d9..4467979d8f699 100755
+--- a/tools/hv/vmbus_testing
++++ b/tools/hv/vmbus_testing
+@@ -164,7 +164,7 @@ def recursive_file_lookup(path, file_map):
+ def get_all_devices_test_status(file_map):
+
+ for device in file_map:
+- if (get_test_state(locate_state(device, file_map)) is 1):
++ if (get_test_state(locate_state(device, file_map)) == 1):
+ print("Testing = ON for: {}"
+ .format(device.split("/")[5]))
+ else:
+@@ -203,7 +203,7 @@ def write_test_files(path, value):
+ def set_test_state(state_path, state_value, quiet):
+
+ write_test_files(state_path, state_value)
+- if (get_test_state(state_path) is 1):
++ if (get_test_state(state_path) == 1):
+ if (not quiet):
+ print("Testing = ON for device: {}"
+ .format(state_path.split("/")[5]))
+diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
+index 2491c54a5e4fb..f8deae4e26a15 100644
+--- a/tools/iio/iio_generic_buffer.c
++++ b/tools/iio/iio_generic_buffer.c
+@@ -715,12 +715,12 @@ int main(int argc, char **argv)
+ continue;
+ }
+
+- toread = buf_len;
+ } else {
+ usleep(timedelay);
+- toread = 64;
+ }
+
++ toread = buf_len;
++
+ read_size = read(buf_fd, data, toread * scan_size);
+ if (read_size < 0) {
+ if (errno == EAGAIN) {
+diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c
+index aadee6d34c74c..6a00a6eecaef0 100644
+--- a/tools/iio/iio_utils.c
++++ b/tools/iio/iio_utils.c
+@@ -264,6 +264,7 @@ int iioutils_get_param_float(float *output, const char *param_name,
+ if (fscanf(sysfsfp, "%f", output) != 1)
+ ret = errno ? -errno : -ENODATA;
+
++ fclose(sysfsfp);
+ break;
+ }
+ error_free_filename:
+@@ -345,9 +346,9 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ }
+
+ sysfsfp = fopen(filename, "r");
++ free(filename);
+ if (!sysfsfp) {
+ ret = -errno;
+- free(filename);
+ goto error_close_dir;
+ }
+
+@@ -357,7 +358,6 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ if (fclose(sysfsfp))
+ perror("build_channel_array(): Failed to close file");
+
+- free(filename);
+ goto error_close_dir;
+ }
+ if (ret == 1)
+@@ -365,11 +365,9 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+- free(filename);
+ goto error_close_dir;
+ }
+
+- free(filename);
+ }
+
+ *ci_array = malloc(sizeof(**ci_array) * (*counter));
+@@ -395,9 +393,9 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ }
+
+ sysfsfp = fopen(filename, "r");
++ free(filename);
+ if (!sysfsfp) {
+ ret = -errno;
+- free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+@@ -405,20 +403,17 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ errno = 0;
+ if (fscanf(sysfsfp, "%i", &current_enabled) != 1) {
+ ret = errno ? -errno : -ENODATA;
+- free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+- free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ if (!current_enabled) {
+- free(filename);
+ count--;
+ continue;
+ }
+@@ -429,7 +424,6 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ strlen(ent->d_name) -
+ strlen("_en"));
+ if (!current->name) {
+- free(filename);
+ ret = -ENOMEM;
+ count--;
+ goto error_cleanup_array;
+@@ -439,7 +433,6 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ ret = iioutils_break_up_name(current->name,
+ &current->generic_name);
+ if (ret) {
+- free(filename);
+ free(current->name);
+ count--;
+ goto error_cleanup_array;
+@@ -450,17 +443,16 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ scan_el_dir,
+ current->name);
+ if (ret < 0) {
+- free(filename);
+ ret = -ENOMEM;
+ goto error_cleanup_array;
+ }
+
+ sysfsfp = fopen(filename, "r");
++ free(filename);
+ if (!sysfsfp) {
+ ret = -errno;
+- fprintf(stderr, "failed to open %s\n",
+- filename);
+- free(filename);
++ fprintf(stderr, "failed to open %s/%s_index\n",
++ scan_el_dir, current->name);
+ goto error_cleanup_array;
+ }
+
+@@ -470,17 +462,14 @@ int build_channel_array(const char *device_dir, int buffer_idx,
+ if (fclose(sysfsfp))
+ perror("build_channel_array(): Failed to close file");
+
+- free(filename);
+ goto error_cleanup_array;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+- free(filename);
+ goto error_cleanup_array;
+ }
+
+- free(filename);
+ /* Find the scale */
+ ret = iioutils_get_param_float(&current->scale,
+ "scale",
+@@ -547,6 +536,10 @@ static int calc_digits(int num)
+ {
+ int count = 0;
+
++ /* It takes a digit to represent zero */
++ if (!num)
++ return 1;
++
+ while (num != 0) {
+ num /= 10;
+ count++;
+diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h
+new file mode 100644
+index 0000000000000..47387c607035e
+--- /dev/null
++++ b/tools/include/asm-generic/unaligned.h
+@@ -0,0 +1,23 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++/*
++ * Copied from the kernel sources to tools/perf/:
++ */
++
++#ifndef __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H
++#define __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H
++
++#define __get_unaligned_t(type, ptr) ({ \
++ const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
++ __pptr->x; \
++})
++
++#define __put_unaligned_t(type, val, ptr) do { \
++ struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
++ __pptr->x = (val); \
++} while (0)
++
++#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
++#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
++
++#endif /* __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H */
++
+diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
+index a7e54a08fb54c..5a79572f8b2d7 100644
+--- a/tools/include/linux/kernel.h
++++ b/tools/include/linux/kernel.h
+@@ -14,6 +14,8 @@
+ #define UINT_MAX (~0U)
+ #endif
+
++#define _RET_IP_ ((unsigned long)__builtin_return_address(0))
++
+ #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+ #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
+@@ -52,6 +54,10 @@
+ _min1 < _min2 ? _min1 : _min2; })
+ #endif
+
++#define max_t(type, x, y) max((type)x, (type)y)
++#define min_t(type, x, y) min((type)x, (type)y)
++#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
++
+ #ifndef roundup
+ #define roundup(x, y) ( \
+ { \
+@@ -102,7 +108,9 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+ int scnprintf(char * buf, size_t size, const char * fmt, ...);
+ int scnprintf_pad(char * buf, size_t size, const char * fmt, ...);
+
++#ifndef ARRAY_SIZE
+ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
++#endif
+
+ /*
+ * This looks more complex than it should be. But we need to
+diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
+index 7e72d975cb761..51f5b24af8342 100644
+--- a/tools/include/linux/objtool.h
++++ b/tools/include/linux/objtool.h
+@@ -32,11 +32,16 @@ struct unwind_hint {
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
++ *
++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
+ */
+ #define UNWIND_HINT_TYPE_CALL 0
+ #define UNWIND_HINT_TYPE_REGS 1
+ #define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+ #define UNWIND_HINT_TYPE_FUNC 3
++#define UNWIND_HINT_TYPE_ENTRY 4
++#define UNWIND_HINT_TYPE_SAVE 5
++#define UNWIND_HINT_TYPE_RESTORE 6
+
+ #ifdef CONFIG_STACK_VALIDATION
+
+@@ -66,6 +71,23 @@ struct unwind_hint {
+ static void __used __section(".discard.func_stack_frame_non_standard") \
+ *__func_stack_frame_non_standard_##func = func
+
++/*
++ * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore
++ * for the case where a function is intentionally missing frame pointer setup,
++ * but otherwise needs objtool/ORC coverage when frame pointers are disabled.
++ */
++#ifdef CONFIG_FRAME_POINTER
++#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func)
++#else
++#define STACK_FRAME_NON_STANDARD_FP(func)
++#endif
++
++#define ANNOTATE_NOENDBR \
++ "986: \n\t" \
++ ".pushsection .discard.noendbr\n\t" \
++ _ASM_PTR " 986b\n\t" \
++ ".popsection\n\t"
++
+ #else /* __ASSEMBLY__ */
+
+ /*
+@@ -99,7 +121,7 @@ struct unwind_hint {
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+@@ -118,6 +140,13 @@ struct unwind_hint {
+ .popsection
+ .endm
+
++.macro ANNOTATE_NOENDBR
++.Lhere_\@:
++ .pushsection .discard.noendbr
++ .quad .Lhere_\@
++ .popsection
++.endm
++
+ #endif /* __ASSEMBLY__ */
+
+ #else /* !CONFIG_STACK_VALIDATION */
+@@ -127,12 +156,16 @@ struct unwind_hint {
+ #define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "\n\t"
+ #define STACK_FRAME_NON_STANDARD(func)
++#define STACK_FRAME_NON_STANDARD_FP(func)
++#define ANNOTATE_NOENDBR
+ #else
+ #define ANNOTATE_INTRA_FUNCTION_CALL
+-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+ .endm
+ .macro STACK_FRAME_NON_STANDARD func:req
+ .endm
++.macro ANNOTATE_NOENDBR
++.endm
+ #endif
+
+ #endif /* CONFIG_STACK_VALIDATION */
+diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
+new file mode 100644
+index 0000000000000..2dbd80d633cbb
+--- /dev/null
++++ b/tools/include/nolibc/arch-aarch64.h
+@@ -0,0 +1,199 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * AARCH64 specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_AARCH64_H
++#define _NOLIBC_ARCH_AARCH64_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY 0
++#define O_WRONLY 1
++#define O_RDWR 2
++#define O_CREAT 0x40
++#define O_EXCL 0x80
++#define O_NOCTTY 0x100
++#define O_TRUNC 0x200
++#define O_APPEND 0x400
++#define O_NONBLOCK 0x800
++#define O_DIRECTORY 0x4000
++
++/* The struct returned by the newfstatat() syscall. Differs slightly from the
++ * x86_64's stat one by field ordering, so be careful.
++ */
++struct sys_stat_struct {
++ unsigned long st_dev;
++ unsigned long st_ino;
++ unsigned int st_mode;
++ unsigned int st_nlink;
++ unsigned int st_uid;
++ unsigned int st_gid;
++
++ unsigned long st_rdev;
++ unsigned long __pad1;
++ long st_size;
++ int st_blksize;
++ int __pad2;
++
++ long st_blocks;
++ long st_atime;
++ unsigned long st_atime_nsec;
++ long st_mtime;
++
++ unsigned long st_mtime_nsec;
++ long st_ctime;
++ unsigned long st_ctime_nsec;
++ unsigned int __unused[2];
++};
++
++/* Syscalls for AARCH64 :
++ * - registers are 64-bit
++ * - stack is 16-byte aligned
++ * - syscall number is passed in x8
++ * - arguments are in x0, x1, x2, x3, x4, x5
++ * - the system call is performed by calling svc 0
++ * - syscall return comes in x0.
++ * - the arguments are cast to long and assigned into the target registers
++ * which are then simply passed as registers to the asm code, so that we
++ * don't have to experience issues with register constraints.
++ *
++ * On aarch64, select() is not implemented so we have to use pselect6().
++ */
++#define __ARCH_WANT_SYS_PSELECT6
++
++#define my_syscall0(num) \
++({ \
++ register long _num asm("x8") = (num); \
++ register long _arg1 asm("x0"); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall1(num, arg1) \
++({ \
++ register long _num asm("x8") = (num); \
++ register long _arg1 asm("x0") = (long)(arg1); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ register long _num asm("x8") = (num); \
++ register long _arg1 asm("x0") = (long)(arg1); \
++ register long _arg2 asm("x1") = (long)(arg2); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3) \
++({ \
++ register long _num asm("x8") = (num); \
++ register long _arg1 asm("x0") = (long)(arg1); \
++ register long _arg2 asm("x1") = (long)(arg2); \
++ register long _arg3 asm("x2") = (long)(arg3); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4) \
++({ \
++ register long _num asm("x8") = (num); \
++ register long _arg1 asm("x0") = (long)(arg1); \
++ register long _arg2 asm("x1") = (long)(arg2); \
++ register long _arg3 asm("x2") = (long)(arg3); \
++ register long _arg4 asm("x3") = (long)(arg4); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
++({ \
++ register long _num asm("x8") = (num); \
++ register long _arg1 asm("x0") = (long)(arg1); \
++ register long _arg2 asm("x1") = (long)(arg2); \
++ register long _arg3 asm("x2") = (long)(arg3); \
++ register long _arg4 asm("x3") = (long)(arg4); \
++ register long _arg5 asm("x4") = (long)(arg5); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r" (_arg1) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
++({ \
++ register long _num asm("x8") = (num); \
++ register long _arg1 asm("x0") = (long)(arg1); \
++ register long _arg2 asm("x1") = (long)(arg2); \
++ register long _arg3 asm("x2") = (long)(arg3); \
++ register long _arg4 asm("x3") = (long)(arg4); \
++ register long _arg5 asm("x4") = (long)(arg5); \
++ register long _arg6 asm("x5") = (long)(arg6); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r" (_arg1) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++ "r"(_arg6), "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++/* startup code */
++asm(".section .text\n"
++ ".weak _start\n"
++ "_start:\n"
++ "ldr x0, [sp]\n" // argc (x0) was in the stack
++ "add x1, sp, 8\n" // argv (x1) = sp
++ "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
++ "add x2, x2, 8\n" // + 8 (skip null)
++ "add x2, x2, x1\n" // + argv
++ "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
++ "bl main\n" // main() returns the status code, we'll exit with it.
++ "mov x8, 93\n" // NR_exit == 93
++ "svc #0\n"
++ "");
++
++#endif // _NOLIBC_ARCH_AARCH64_H
+diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
+new file mode 100644
+index 0000000000000..1191395b5acd9
+--- /dev/null
++++ b/tools/include/nolibc/arch-arm.h
+@@ -0,0 +1,204 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * ARM specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_ARM_H
++#define _NOLIBC_ARCH_ARM_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY 0
++#define O_WRONLY 1
++#define O_RDWR 2
++#define O_CREAT 0x40
++#define O_EXCL 0x80
++#define O_NOCTTY 0x100
++#define O_TRUNC 0x200
++#define O_APPEND 0x400
++#define O_NONBLOCK 0x800
++#define O_DIRECTORY 0x4000
++
++/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
++ * exactly 56 bytes (stops before the unused array). In big endian, the format
++ * differs as devices are returned as short only.
++ */
++struct sys_stat_struct {
++#if defined(__ARMEB__)
++ unsigned short st_dev;
++ unsigned short __pad1;
++#else
++ unsigned long st_dev;
++#endif
++ unsigned long st_ino;
++ unsigned short st_mode;
++ unsigned short st_nlink;
++ unsigned short st_uid;
++ unsigned short st_gid;
++
++#if defined(__ARMEB__)
++ unsigned short st_rdev;
++ unsigned short __pad2;
++#else
++ unsigned long st_rdev;
++#endif
++ unsigned long st_size;
++ unsigned long st_blksize;
++ unsigned long st_blocks;
++
++ unsigned long st_atime;
++ unsigned long st_atime_nsec;
++ unsigned long st_mtime;
++ unsigned long st_mtime_nsec;
++
++ unsigned long st_ctime;
++ unsigned long st_ctime_nsec;
++ unsigned long __unused[2];
++};
++
++/* Syscalls for ARM in ARM or Thumb modes :
++ * - registers are 32-bit
++ * - stack is 8-byte aligned
++ * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
++ * - syscall number is passed in r7
++ * - arguments are in r0, r1, r2, r3, r4, r5
++ * - the system call is performed by calling svc #0
++ * - syscall return comes in r0.
++ * - only lr is clobbered.
++ * - the arguments are cast to long and assigned into the target registers
++ * which are then simply passed as registers to the asm code, so that we
++ * don't have to experience issues with register constraints.
++ * - the syscall number is always specified last in order to allow to force
++ * some registers before (gcc refuses a %-register at the last position).
++ *
++ * Also, ARM supports the old_select syscall if newselect is not available
++ */
++#define __ARCH_WANT_SYS_OLD_SELECT
++
++#define my_syscall0(num) \
++({ \
++ register long _num asm("r7") = (num); \
++ register long _arg1 asm("r0"); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_num) \
++ : "memory", "cc", "lr" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall1(num, arg1) \
++({ \
++ register long _num asm("r7") = (num); \
++ register long _arg1 asm("r0") = (long)(arg1); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), \
++ "r"(_num) \
++ : "memory", "cc", "lr" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ register long _num asm("r7") = (num); \
++ register long _arg1 asm("r0") = (long)(arg1); \
++ register long _arg2 asm("r1") = (long)(arg2); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), \
++ "r"(_num) \
++ : "memory", "cc", "lr" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3) \
++({ \
++ register long _num asm("r7") = (num); \
++ register long _arg1 asm("r0") = (long)(arg1); \
++ register long _arg2 asm("r1") = (long)(arg2); \
++ register long _arg3 asm("r2") = (long)(arg3); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
++ "r"(_num) \
++ : "memory", "cc", "lr" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4) \
++({ \
++ register long _num asm("r7") = (num); \
++ register long _arg1 asm("r0") = (long)(arg1); \
++ register long _arg2 asm("r1") = (long)(arg2); \
++ register long _arg3 asm("r2") = (long)(arg3); \
++ register long _arg4 asm("r3") = (long)(arg4); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
++ "r"(_num) \
++ : "memory", "cc", "lr" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
++({ \
++ register long _num asm("r7") = (num); \
++ register long _arg1 asm("r0") = (long)(arg1); \
++ register long _arg2 asm("r1") = (long)(arg2); \
++ register long _arg3 asm("r2") = (long)(arg3); \
++ register long _arg4 asm("r3") = (long)(arg4); \
++ register long _arg5 asm("r4") = (long)(arg5); \
++ \
++ asm volatile ( \
++ "svc #0\n" \
++ : "=r" (_arg1) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++ "r"(_num) \
++ : "memory", "cc", "lr" \
++ ); \
++ _arg1; \
++})
++
++/* startup code */
++asm(".section .text\n"
++ ".weak _start\n"
++ "_start:\n"
++#if defined(__THUMBEB__) || defined(__THUMBEL__)
++ /* We enter here in 32-bit mode but if some previous functions were in
++ * 16-bit mode, the assembler cannot know, so we need to tell it we're in
++ * 32-bit now, then switch to 16-bit (is there a better way to do it than
++ * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
++ * it generates correct instructions. Note that we do not support thumb1.
++ */
++ ".code 32\n"
++ "add r0, pc, #1\n"
++ "bx r0\n"
++ ".code 16\n"
++#endif
++ "pop {%r0}\n" // argc was in the stack
++ "mov %r1, %sp\n" // argv = sp
++ "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
++ "add %r2, %r2, $4\n" // ... + 4
++ "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
++ "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
++ "bl main\n" // main() returns the status code, we'll exit with it.
++ "movs r7, $1\n" // NR_exit == 1
++ "svc $0x00\n"
++ "");
++
++#endif // _NOLIBC_ARCH_ARM_H
+diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
+new file mode 100644
+index 0000000000000..125a691fc631e
+--- /dev/null
++++ b/tools/include/nolibc/arch-i386.h
+@@ -0,0 +1,196 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * i386 specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_I386_H
++#define _NOLIBC_ARCH_I386_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY 0
++#define O_WRONLY 1
++#define O_RDWR 2
++#define O_CREAT 0x40
++#define O_EXCL 0x80
++#define O_NOCTTY 0x100
++#define O_TRUNC 0x200
++#define O_APPEND 0x400
++#define O_NONBLOCK 0x800
++#define O_DIRECTORY 0x10000
++
++/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
++ * exactly 56 bytes (stops before the unused array).
++ */
++struct sys_stat_struct {
++ unsigned long st_dev;
++ unsigned long st_ino;
++ unsigned short st_mode;
++ unsigned short st_nlink;
++ unsigned short st_uid;
++ unsigned short st_gid;
++
++ unsigned long st_rdev;
++ unsigned long st_size;
++ unsigned long st_blksize;
++ unsigned long st_blocks;
++
++ unsigned long st_atime;
++ unsigned long st_atime_nsec;
++ unsigned long st_mtime;
++ unsigned long st_mtime_nsec;
++
++ unsigned long st_ctime;
++ unsigned long st_ctime_nsec;
++ unsigned long __unused[2];
++};
++
++/* Syscalls for i386 :
++ * - mostly similar to x86_64
++ * - registers are 32-bit
++ * - syscall number is passed in eax
++ * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
++ * - all registers are preserved (except eax of course)
++ * - the system call is performed by calling int $0x80
++ * - syscall return comes in eax
++ * - the arguments are cast to long and assigned into the target registers
++ * which are then simply passed as registers to the asm code, so that we
++ * don't have to experience issues with register constraints.
++ * - the syscall number is always specified last in order to allow to force
++ * some registers before (gcc refuses a %-register at the last position).
++ *
++ * Also, i386 supports the old_select syscall if newselect is not available
++ */
++#define __ARCH_WANT_SYS_OLD_SELECT
++
++#define my_syscall0(num) \
++({ \
++ long _ret; \
++ register long _num asm("eax") = (num); \
++ \
++ asm volatile ( \
++ "int $0x80\n" \
++ : "=a" (_ret) \
++ : "0"(_num) \
++ : "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall1(num, arg1) \
++({ \
++ long _ret; \
++ register long _num asm("eax") = (num); \
++ register long _arg1 asm("ebx") = (long)(arg1); \
++ \
++ asm volatile ( \
++ "int $0x80\n" \
++ : "=a" (_ret) \
++ : "r"(_arg1), \
++ "0"(_num) \
++ : "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ long _ret; \
++ register long _num asm("eax") = (num); \
++ register long _arg1 asm("ebx") = (long)(arg1); \
++ register long _arg2 asm("ecx") = (long)(arg2); \
++ \
++ asm volatile ( \
++ "int $0x80\n" \
++ : "=a" (_ret) \
++ : "r"(_arg1), "r"(_arg2), \
++ "0"(_num) \
++ : "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3) \
++({ \
++ long _ret; \
++ register long _num asm("eax") = (num); \
++ register long _arg1 asm("ebx") = (long)(arg1); \
++ register long _arg2 asm("ecx") = (long)(arg2); \
++ register long _arg3 asm("edx") = (long)(arg3); \
++ \
++ asm volatile ( \
++ "int $0x80\n" \
++ : "=a" (_ret) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
++ "0"(_num) \
++ : "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4) \
++({ \
++ long _ret; \
++ register long _num asm("eax") = (num); \
++ register long _arg1 asm("ebx") = (long)(arg1); \
++ register long _arg2 asm("ecx") = (long)(arg2); \
++ register long _arg3 asm("edx") = (long)(arg3); \
++ register long _arg4 asm("esi") = (long)(arg4); \
++ \
++ asm volatile ( \
++ "int $0x80\n" \
++ : "=a" (_ret) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
++ "0"(_num) \
++ : "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
++({ \
++ long _ret; \
++ register long _num asm("eax") = (num); \
++ register long _arg1 asm("ebx") = (long)(arg1); \
++ register long _arg2 asm("ecx") = (long)(arg2); \
++ register long _arg3 asm("edx") = (long)(arg3); \
++ register long _arg4 asm("esi") = (long)(arg4); \
++ register long _arg5 asm("edi") = (long)(arg5); \
++ \
++ asm volatile ( \
++ "int $0x80\n" \
++ : "=a" (_ret) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++ "0"(_num) \
++ : "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++/* startup code */
++/*
++ * i386 System V ABI mandates:
++ * 1) last pushed argument must be 16-byte aligned.
++ * 2) The deepest stack frame should be set to zero
++ *
++ */
++asm(".section .text\n"
++ ".weak _start\n"
++ "_start:\n"
++ "pop %eax\n" // argc (first arg, %eax)
++ "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
++ "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
++ "xor %ebp, %ebp\n" // zero the stack frame
++ "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
++ "sub $4, %esp\n" // the call instruction (args are aligned)
++ "push %ecx\n" // push all registers on the stack so that we
++ "push %ebx\n" // support both regparm and plain stack modes
++ "push %eax\n"
++ "call main\n" // main() returns the status code in %eax
++ "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
++ "movl $1, %eax\n" // NR_exit == 1
++ "int $0x80\n" // exit now
++ "hlt\n" // ensure it does not
++ "");
++
++#endif // _NOLIBC_ARCH_I386_H
+diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
+new file mode 100644
+index 0000000000000..5d647afa42e68
+--- /dev/null
++++ b/tools/include/nolibc/arch-mips.h
+@@ -0,0 +1,217 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * MIPS specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_MIPS_H
++#define _NOLIBC_ARCH_MIPS_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY 0
++#define O_WRONLY 1
++#define O_RDWR 2
++#define O_APPEND 0x0008
++#define O_NONBLOCK 0x0080
++#define O_CREAT 0x0100
++#define O_TRUNC 0x0200
++#define O_EXCL 0x0400
++#define O_NOCTTY 0x0800
++#define O_DIRECTORY 0x10000
++
++/* The struct returned by the stat() syscall. 88 bytes are returned by the
++ * syscall.
++ */
++struct sys_stat_struct {
++ unsigned int st_dev;
++ long st_pad1[3];
++ unsigned long st_ino;
++ unsigned int st_mode;
++ unsigned int st_nlink;
++ unsigned int st_uid;
++ unsigned int st_gid;
++ unsigned int st_rdev;
++ long st_pad2[2];
++ long st_size;
++ long st_pad3;
++
++ long st_atime;
++ long st_atime_nsec;
++ long st_mtime;
++ long st_mtime_nsec;
++
++ long st_ctime;
++ long st_ctime_nsec;
++ long st_blksize;
++ long st_blocks;
++ long st_pad4[14];
++};
++
++/* Syscalls for MIPS ABI O32 :
++ * - WARNING! there's always a delayed slot!
++ * - WARNING again, the syntax is different, registers take a '$' and numbers
++ * do not.
++ * - registers are 32-bit
++ * - stack is 8-byte aligned
++ * - syscall number is passed in v0 (starts at 0xfa0).
++ * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
++ * leave some room in the stack for the callee to save a0..a3 if needed.
++ * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
++ * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
++ * scall32-o32.S in the kernel sources.
++ * - the system call is performed by calling "syscall"
++ * - syscall return comes in v0, and register a3 needs to be checked to know
++ * if an error occurred, in which case errno is in v0.
++ * - the arguments are cast to long and assigned into the target registers
++ * which are then simply passed as registers to the asm code, so that we
++ * don't have to experience issues with register constraints.
++ */
++
++#define my_syscall0(num) \
++({ \
++ register long _num asm("v0") = (num); \
++ register long _arg4 asm("a3"); \
++ \
++ asm volatile ( \
++ "addiu $sp, $sp, -32\n" \
++ "syscall\n" \
++ "addiu $sp, $sp, 32\n" \
++ : "=r"(_num), "=r"(_arg4) \
++ : "r"(_num) \
++ : "memory", "cc", "at", "v1", "hi", "lo", \
++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
++ ); \
++ _arg4 ? -_num : _num; \
++})
++
++#define my_syscall1(num, arg1) \
++({ \
++ register long _num asm("v0") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg4 asm("a3"); \
++ \
++ asm volatile ( \
++ "addiu $sp, $sp, -32\n" \
++ "syscall\n" \
++ "addiu $sp, $sp, 32\n" \
++ : "=r"(_num), "=r"(_arg4) \
++ : "0"(_num), \
++ "r"(_arg1) \
++ : "memory", "cc", "at", "v1", "hi", "lo", \
++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
++ ); \
++ _arg4 ? -_num : _num; \
++})
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ register long _num asm("v0") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg4 asm("a3"); \
++ \
++ asm volatile ( \
++ "addiu $sp, $sp, -32\n" \
++ "syscall\n" \
++ "addiu $sp, $sp, 32\n" \
++ : "=r"(_num), "=r"(_arg4) \
++ : "0"(_num), \
++ "r"(_arg1), "r"(_arg2) \
++ : "memory", "cc", "at", "v1", "hi", "lo", \
++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
++ ); \
++ _arg4 ? -_num : _num; \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3) \
++({ \
++ register long _num asm("v0") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg3 asm("a2") = (long)(arg3); \
++ register long _arg4 asm("a3"); \
++ \
++ asm volatile ( \
++ "addiu $sp, $sp, -32\n" \
++ "syscall\n" \
++ "addiu $sp, $sp, 32\n" \
++ : "=r"(_num), "=r"(_arg4) \
++ : "0"(_num), \
++ "r"(_arg1), "r"(_arg2), "r"(_arg3) \
++ : "memory", "cc", "at", "v1", "hi", "lo", \
++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
++ ); \
++ _arg4 ? -_num : _num; \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4) \
++({ \
++ register long _num asm("v0") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg3 asm("a2") = (long)(arg3); \
++ register long _arg4 asm("a3") = (long)(arg4); \
++ \
++ asm volatile ( \
++ "addiu $sp, $sp, -32\n" \
++ "syscall\n" \
++ "addiu $sp, $sp, 32\n" \
++ : "=r" (_num), "=r"(_arg4) \
++ : "0"(_num), \
++ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
++ : "memory", "cc", "at", "v1", "hi", "lo", \
++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
++ ); \
++ _arg4 ? -_num : _num; \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
++({ \
++ register long _num asm("v0") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg3 asm("a2") = (long)(arg3); \
++ register long _arg4 asm("a3") = (long)(arg4); \
++ register long _arg5 = (long)(arg5); \
++ \
++ asm volatile ( \
++ "addiu $sp, $sp, -32\n" \
++ "sw %7, 16($sp)\n" \
++ "syscall\n " \
++ "addiu $sp, $sp, 32\n" \
++ : "=r" (_num), "=r"(_arg4) \
++ : "0"(_num), \
++ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
++ : "memory", "cc", "at", "v1", "hi", "lo", \
++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
++ ); \
++ _arg4 ? -_num : _num; \
++})
++
++/* startup code, note that it's called __start on MIPS */
++asm(".section .text\n"
++ ".weak __start\n"
++ ".set nomips16\n"
++ ".set push\n"
++ ".set noreorder\n"
++ ".option pic0\n"
++ ".ent __start\n"
++ "__start:\n"
++ "lw $a0,($sp)\n" // argc was in the stack
++ "addiu $a1, $sp, 4\n" // argv = sp + 4
++ "sll $a2, $a0, 2\n" // a2 = argc * 4
++ "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
++ "addiu $a2, $a2, 4\n" // ... + 4
++ "li $t0, -8\n"
++ "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
++ "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
++ "jal main\n" // main() returns the status code, we'll exit with it.
++ "nop\n" // delayed slot
++ "move $a0, $v0\n" // retrieve 32-bit exit code from v0
++ "li $v0, 4001\n" // NR_exit == 4001
++ "syscall\n"
++ ".end __start\n"
++ ".set pop\n"
++ "");
++
++#endif // _NOLIBC_ARCH_MIPS_H
+diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
+new file mode 100644
+index 0000000000000..8c0cb1abb29f7
+--- /dev/null
++++ b/tools/include/nolibc/arch-riscv.h
+@@ -0,0 +1,204 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * RISCV (32 and 64) specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_RISCV_H
++#define _NOLIBC_ARCH_RISCV_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY 0
++#define O_WRONLY 1
++#define O_RDWR 2
++#define O_CREAT 0x40
++#define O_EXCL 0x80
++#define O_NOCTTY 0x100
++#define O_TRUNC 0x200
++#define O_APPEND 0x400
++#define O_NONBLOCK 0x800
++#define O_DIRECTORY 0x10000
++
++struct sys_stat_struct {
++ unsigned long st_dev; /* Device. */
++ unsigned long st_ino; /* File serial number. */
++ unsigned int st_mode; /* File mode. */
++ unsigned int st_nlink; /* Link count. */
++ unsigned int st_uid; /* User ID of the file's owner. */
++ unsigned int st_gid; /* Group ID of the file's group. */
++ unsigned long st_rdev; /* Device number, if device. */
++ unsigned long __pad1;
++ long st_size; /* Size of file, in bytes. */
++ int st_blksize; /* Optimal block size for I/O. */
++ int __pad2;
++ long st_blocks; /* Number 512-byte blocks allocated. */
++ long st_atime; /* Time of last access. */
++ unsigned long st_atime_nsec;
++ long st_mtime; /* Time of last modification. */
++ unsigned long st_mtime_nsec;
++ long st_ctime; /* Time of last status change. */
++ unsigned long st_ctime_nsec;
++ unsigned int __unused4;
++ unsigned int __unused5;
++};
++
++#if __riscv_xlen == 64
++#define PTRLOG "3"
++#define SZREG "8"
++#elif __riscv_xlen == 32
++#define PTRLOG "2"
++#define SZREG "4"
++#endif
++
++/* Syscalls for RISCV :
++ * - stack is 16-byte aligned
++ * - syscall number is passed in a7
++ * - arguments are in a0, a1, a2, a3, a4, a5
++ * - the system call is performed by calling ecall
++ * - syscall return comes in a0
++ * - the arguments are cast to long and assigned into the target
++ * registers which are then simply passed as registers to the asm code,
++ * so that we don't have to experience issues with register constraints.
++ *
++ * On riscv, select() is not implemented so we have to use pselect6().
++ */
++#define __ARCH_WANT_SYS_PSELECT6
++
++#define my_syscall0(num) \
++({ \
++ register long _num asm("a7") = (num); \
++ register long _arg1 asm("a0"); \
++ \
++ asm volatile ( \
++ "ecall\n\t" \
++ : "=r"(_arg1) \
++ : "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall1(num, arg1) \
++({ \
++ register long _num asm("a7") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ \
++ asm volatile ( \
++ "ecall\n" \
++ : "+r"(_arg1) \
++ : "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ register long _num asm("a7") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ \
++ asm volatile ( \
++ "ecall\n" \
++ : "+r"(_arg1) \
++ : "r"(_arg2), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3) \
++({ \
++ register long _num asm("a7") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg3 asm("a2") = (long)(arg3); \
++ \
++ asm volatile ( \
++ "ecall\n\t" \
++ : "+r"(_arg1) \
++ : "r"(_arg2), "r"(_arg3), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4) \
++({ \
++ register long _num asm("a7") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg3 asm("a2") = (long)(arg3); \
++ register long _arg4 asm("a3") = (long)(arg4); \
++ \
++ asm volatile ( \
++ "ecall\n" \
++ : "+r"(_arg1) \
++ : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
++({ \
++ register long _num asm("a7") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg3 asm("a2") = (long)(arg3); \
++ register long _arg4 asm("a3") = (long)(arg4); \
++ register long _arg5 asm("a4") = (long)(arg5); \
++ \
++ asm volatile ( \
++ "ecall\n" \
++ : "+r"(_arg1) \
++ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
++({ \
++ register long _num asm("a7") = (num); \
++ register long _arg1 asm("a0") = (long)(arg1); \
++ register long _arg2 asm("a1") = (long)(arg2); \
++ register long _arg3 asm("a2") = (long)(arg3); \
++ register long _arg4 asm("a3") = (long)(arg4); \
++ register long _arg5 asm("a4") = (long)(arg5); \
++ register long _arg6 asm("a5") = (long)(arg6); \
++ \
++ asm volatile ( \
++ "ecall\n" \
++ : "+r"(_arg1) \
++ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
++ "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++/* startup code */
++asm(".section .text\n"
++ ".weak _start\n"
++ "_start:\n"
++ ".option push\n"
++ ".option norelax\n"
++ "lla gp, __global_pointer$\n"
++ ".option pop\n"
++ "ld a0, 0(sp)\n" // argc (a0) was in the stack
++ "add a1, sp, "SZREG"\n" // argv (a1) = sp
++ "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
++ "add a2, a2, "SZREG"\n" // + SZREG (skip null)
++ "add a2,a2,a1\n" // + argv
++ "andi sp,a1,-16\n" // sp must be 16-byte aligned
++ "call main\n" // main() returns the status code, we'll exit with it.
++ "li a7, 93\n" // NR_exit == 93
++ "ecall\n"
++ "");
++
++#endif // _NOLIBC_ARCH_RISCV_H
+diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
+new file mode 100644
+index 0000000000000..b1af63ce1cb0b
+--- /dev/null
++++ b/tools/include/nolibc/arch-x86_64.h
+@@ -0,0 +1,215 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * x86_64 specific definitions for NOLIBC
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_ARCH_X86_64_H
++#define _NOLIBC_ARCH_X86_64_H
++
++/* O_* macros for fcntl/open are architecture-specific */
++#define O_RDONLY 0
++#define O_WRONLY 1
++#define O_RDWR 2
++#define O_CREAT 0x40
++#define O_EXCL 0x80
++#define O_NOCTTY 0x100
++#define O_TRUNC 0x200
++#define O_APPEND 0x400
++#define O_NONBLOCK 0x800
++#define O_DIRECTORY 0x10000
++
++/* The struct returned by the stat() syscall, equivalent to stat64(). The
++ * syscall returns 116 bytes and stops in the middle of __unused.
++ */
++struct sys_stat_struct {
++ unsigned long st_dev;
++ unsigned long st_ino;
++ unsigned long st_nlink;
++ unsigned int st_mode;
++ unsigned int st_uid;
++
++ unsigned int st_gid;
++ unsigned int __pad0;
++ unsigned long st_rdev;
++ long st_size;
++ long st_blksize;
++
++ long st_blocks;
++ unsigned long st_atime;
++ unsigned long st_atime_nsec;
++ unsigned long st_mtime;
++
++ unsigned long st_mtime_nsec;
++ unsigned long st_ctime;
++ unsigned long st_ctime_nsec;
++ long __unused[3];
++};
++
++/* Syscalls for x86_64 :
++ * - registers are 64-bit
++ * - syscall number is passed in rax
++ * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
++ * - the system call is performed by calling the syscall instruction
++ * - syscall return comes in rax
++ * - rcx and r11 are clobbered, others are preserved.
++ * - the arguments are cast to long and assigned into the target registers
++ * which are then simply passed as registers to the asm code, so that we
++ * don't have to experience issues with register constraints.
++ * - the syscall number is always specified last in order to allow to force
++ * some registers before (gcc refuses a %-register at the last position).
++ * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
++ * Calling Conventions.
++ *
++ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
++ *
++ */
++
++#define my_syscall0(num) \
++({ \
++ long _ret; \
++ register long _num asm("rax") = (num); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall1(num, arg1) \
++({ \
++ long _ret; \
++ register long _num asm("rax") = (num); \
++ register long _arg1 asm("rdi") = (long)(arg1); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "r"(_arg1), \
++ "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ long _ret; \
++ register long _num asm("rax") = (num); \
++ register long _arg1 asm("rdi") = (long)(arg1); \
++ register long _arg2 asm("rsi") = (long)(arg2); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "r"(_arg1), "r"(_arg2), \
++ "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall3(num, arg1, arg2, arg3) \
++({ \
++ long _ret; \
++ register long _num asm("rax") = (num); \
++ register long _arg1 asm("rdi") = (long)(arg1); \
++ register long _arg2 asm("rsi") = (long)(arg2); \
++ register long _arg3 asm("rdx") = (long)(arg3); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
++ "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall4(num, arg1, arg2, arg3, arg4) \
++({ \
++ long _ret; \
++ register long _num asm("rax") = (num); \
++ register long _arg1 asm("rdi") = (long)(arg1); \
++ register long _arg2 asm("rsi") = (long)(arg2); \
++ register long _arg3 asm("rdx") = (long)(arg3); \
++ register long _arg4 asm("r10") = (long)(arg4); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
++ "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
++({ \
++ long _ret; \
++ register long _num asm("rax") = (num); \
++ register long _arg1 asm("rdi") = (long)(arg1); \
++ register long _arg2 asm("rsi") = (long)(arg2); \
++ register long _arg3 asm("rdx") = (long)(arg3); \
++ register long _arg4 asm("r10") = (long)(arg4); \
++ register long _arg5 asm("r8") = (long)(arg5); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++ "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
++({ \
++ long _ret; \
++ register long _num asm("rax") = (num); \
++ register long _arg1 asm("rdi") = (long)(arg1); \
++ register long _arg2 asm("rsi") = (long)(arg2); \
++ register long _arg3 asm("rdx") = (long)(arg3); \
++ register long _arg4 asm("r10") = (long)(arg4); \
++ register long _arg5 asm("r8") = (long)(arg5); \
++ register long _arg6 asm("r9") = (long)(arg6); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
++ "r"(_arg6), "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++/* startup code */
++/*
++ * x86-64 System V ABI mandates:
++ * 1) %rsp must be 16-byte aligned right before the function call.
++ * 2) The deepest stack frame should be zero (the %rbp).
++ *
++ */
++asm(".section .text\n"
++ ".weak _start\n"
++ "_start:\n"
++ "pop %rdi\n" // argc (first arg, %rdi)
++ "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
++ "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
++ "xor %ebp, %ebp\n" // zero the stack frame
++ "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
++ "call main\n" // main() returns the status code, we'll exit with it.
++ "mov %eax, %edi\n" // retrieve exit code (32 bit)
++ "mov $60, %eax\n" // NR_exit == 60
++ "syscall\n" // really exit
++ "hlt\n" // ensure it does not return
++ "");
++
++#endif // _NOLIBC_ARCH_X86_64_H
+diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
+new file mode 100644
+index 0000000000000..4c6992321b0d6
+--- /dev/null
++++ b/tools/include/nolibc/arch.h
+@@ -0,0 +1,32 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
++ */
++
++/* Below comes the architecture-specific code. For each architecture, we have
++ * the syscall declarations and the _start code definition. This is the only
++ * global part. On all architectures the kernel puts everything in the stack
++ * before jumping to _start just above us, without any return address (_start
++ * is not a function but an entry pint). So at the stack pointer we find argc.
++ * Then argv[] begins, and ends at the first NULL. Then we have envp which
++ * starts and ends with a NULL as well. So envp=argv+argc+1.
++ */
++
++#ifndef _NOLIBC_ARCH_H
++#define _NOLIBC_ARCH_H
++
++#if defined(__x86_64__)
++#include "arch-x86_64.h"
++#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
++#include "arch-i386.h"
++#elif defined(__ARM_EABI__)
++#include "arch-arm.h"
++#elif defined(__aarch64__)
++#include "arch-aarch64.h"
++#elif defined(__mips__) && defined(_ABIO32)
++#include "arch-mips.h"
++#elif defined(__riscv)
++#include "arch-riscv.h"
++#endif
++
++#endif /* _NOLIBC_ARCH_H */
+diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
+index 3430667b0d241..d272b721dc519 100644
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -81,13 +81,21 @@
+ *
+ */
+
++/* standard type definitions */
++#include "std.h"
++
++/* system includes */
+ #include <asm/unistd.h>
++#include <asm/signal.h> // for SIGCHLD
+ #include <asm/ioctls.h>
+ #include <asm/errno.h>
+ #include <linux/fs.h>
+ #include <linux/loop.h>
+ #include <linux/time.h>
++#include "arch.h"
++#include "types.h"
+
++/* Used by programs to avoid std includes */
+ #define NOLIBC
+
+ /* this way it will be removed if unused */
+@@ -104,1318 +112,6 @@ static int errno;
+ */
+ #define MAX_ERRNO 4095
+
+-/* Declare a few quite common macros and types that usually are in stdlib.h,
+- * stdint.h, ctype.h, unistd.h and a few other common locations.
+- */
+-
+-#define NULL ((void *)0)
+-
+-/* stdint types */
+-typedef unsigned char uint8_t;
+-typedef signed char int8_t;
+-typedef unsigned short uint16_t;
+-typedef signed short int16_t;
+-typedef unsigned int uint32_t;
+-typedef signed int int32_t;
+-typedef unsigned long long uint64_t;
+-typedef signed long long int64_t;
+-typedef unsigned long size_t;
+-typedef signed long ssize_t;
+-typedef unsigned long uintptr_t;
+-typedef signed long intptr_t;
+-typedef signed long ptrdiff_t;
+-
+-/* for stat() */
+-typedef unsigned int dev_t;
+-typedef unsigned long ino_t;
+-typedef unsigned int mode_t;
+-typedef signed int pid_t;
+-typedef unsigned int uid_t;
+-typedef unsigned int gid_t;
+-typedef unsigned long nlink_t;
+-typedef signed long off_t;
+-typedef signed long blksize_t;
+-typedef signed long blkcnt_t;
+-typedef signed long time_t;
+-
+-/* for poll() */
+-struct pollfd {
+- int fd;
+- short int events;
+- short int revents;
+-};
+-
+-/* for getdents64() */
+-struct linux_dirent64 {
+- uint64_t d_ino;
+- int64_t d_off;
+- unsigned short d_reclen;
+- unsigned char d_type;
+- char d_name[];
+-};
+-
+-/* commonly an fd_set represents 256 FDs */
+-#define FD_SETSIZE 256
+-typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
+-
+-/* needed by wait4() */
+-struct rusage {
+- struct timeval ru_utime;
+- struct timeval ru_stime;
+- long ru_maxrss;
+- long ru_ixrss;
+- long ru_idrss;
+- long ru_isrss;
+- long ru_minflt;
+- long ru_majflt;
+- long ru_nswap;
+- long ru_inblock;
+- long ru_oublock;
+- long ru_msgsnd;
+- long ru_msgrcv;
+- long ru_nsignals;
+- long ru_nvcsw;
+- long ru_nivcsw;
+-};
+-
+-/* stat flags (WARNING, octal here) */
+-#define S_IFDIR 0040000
+-#define S_IFCHR 0020000
+-#define S_IFBLK 0060000
+-#define S_IFREG 0100000
+-#define S_IFIFO 0010000
+-#define S_IFLNK 0120000
+-#define S_IFSOCK 0140000
+-#define S_IFMT 0170000
+-
+-#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
+-#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
+-#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
+-#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
+-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
+-#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
+-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+-
+-#define DT_UNKNOWN 0
+-#define DT_FIFO 1
+-#define DT_CHR 2
+-#define DT_DIR 4
+-#define DT_BLK 6
+-#define DT_REG 8
+-#define DT_LNK 10
+-#define DT_SOCK 12
+-
+-/* all the *at functions */
+-#ifndef AT_FDCWD
+-#define AT_FDCWD -100
+-#endif
+-
+-/* lseek */
+-#define SEEK_SET 0
+-#define SEEK_CUR 1
+-#define SEEK_END 2
+-
+-/* reboot */
+-#define LINUX_REBOOT_MAGIC1 0xfee1dead
+-#define LINUX_REBOOT_MAGIC2 0x28121969
+-#define LINUX_REBOOT_CMD_HALT 0xcdef0123
+-#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
+-#define LINUX_REBOOT_CMD_RESTART 0x01234567
+-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+-
+-
+-/* The format of the struct as returned by the libc to the application, which
+- * significantly differs from the format returned by the stat() syscall flavours.
+- */
+-struct stat {
+- dev_t st_dev; /* ID of device containing file */
+- ino_t st_ino; /* inode number */
+- mode_t st_mode; /* protection */
+- nlink_t st_nlink; /* number of hard links */
+- uid_t st_uid; /* user ID of owner */
+- gid_t st_gid; /* group ID of owner */
+- dev_t st_rdev; /* device ID (if special file) */
+- off_t st_size; /* total size, in bytes */
+- blksize_t st_blksize; /* blocksize for file system I/O */
+- blkcnt_t st_blocks; /* number of 512B blocks allocated */
+- time_t st_atime; /* time of last access */
+- time_t st_mtime; /* time of last modification */
+- time_t st_ctime; /* time of last status change */
+-};
+-
+-#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
+-#define WIFEXITED(status) (((status) & 0x7f) == 0)
+-
+-/* for SIGCHLD */
+-#include <asm/signal.h>
+-
+-/* Below comes the architecture-specific code. For each architecture, we have
+- * the syscall declarations and the _start code definition. This is the only
+- * global part. On all architectures the kernel puts everything in the stack
+- * before jumping to _start just above us, without any return address (_start
+- * is not a function but an entry pint). So at the stack pointer we find argc.
+- * Then argv[] begins, and ends at the first NULL. Then we have envp which
+- * starts and ends with a NULL as well. So envp=argv+argc+1.
+- */
+-
+-#if defined(__x86_64__)
+-/* Syscalls for x86_64 :
+- * - registers are 64-bit
+- * - syscall number is passed in rax
+- * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+- * - the system call is performed by calling the syscall instruction
+- * - syscall return comes in rax
+- * - rcx and r8..r11 may be clobbered, others are preserved.
+- * - the arguments are cast to long and assigned into the target registers
+- * which are then simply passed as registers to the asm code, so that we
+- * don't have to experience issues with register constraints.
+- * - the syscall number is always specified last in order to allow to force
+- * some registers before (gcc refuses a %-register at the last position).
+- */
+-
+-#define my_syscall0(num) \
+-({ \
+- long _ret; \
+- register long _num asm("rax") = (num); \
+- \
+- asm volatile ( \
+- "syscall\n" \
+- : "=a" (_ret) \
+- : "0"(_num) \
+- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall1(num, arg1) \
+-({ \
+- long _ret; \
+- register long _num asm("rax") = (num); \
+- register long _arg1 asm("rdi") = (long)(arg1); \
+- \
+- asm volatile ( \
+- "syscall\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), \
+- "0"(_num) \
+- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall2(num, arg1, arg2) \
+-({ \
+- long _ret; \
+- register long _num asm("rax") = (num); \
+- register long _arg1 asm("rdi") = (long)(arg1); \
+- register long _arg2 asm("rsi") = (long)(arg2); \
+- \
+- asm volatile ( \
+- "syscall\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), "r"(_arg2), \
+- "0"(_num) \
+- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3) \
+-({ \
+- long _ret; \
+- register long _num asm("rax") = (num); \
+- register long _arg1 asm("rdi") = (long)(arg1); \
+- register long _arg2 asm("rsi") = (long)(arg2); \
+- register long _arg3 asm("rdx") = (long)(arg3); \
+- \
+- asm volatile ( \
+- "syscall\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+- "0"(_num) \
+- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+-({ \
+- long _ret; \
+- register long _num asm("rax") = (num); \
+- register long _arg1 asm("rdi") = (long)(arg1); \
+- register long _arg2 asm("rsi") = (long)(arg2); \
+- register long _arg3 asm("rdx") = (long)(arg3); \
+- register long _arg4 asm("r10") = (long)(arg4); \
+- \
+- asm volatile ( \
+- "syscall\n" \
+- : "=a" (_ret), "=r"(_arg4) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+- "0"(_num) \
+- : "rcx", "r8", "r9", "r11", "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+-({ \
+- long _ret; \
+- register long _num asm("rax") = (num); \
+- register long _arg1 asm("rdi") = (long)(arg1); \
+- register long _arg2 asm("rsi") = (long)(arg2); \
+- register long _arg3 asm("rdx") = (long)(arg3); \
+- register long _arg4 asm("r10") = (long)(arg4); \
+- register long _arg5 asm("r8") = (long)(arg5); \
+- \
+- asm volatile ( \
+- "syscall\n" \
+- : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+- "0"(_num) \
+- : "rcx", "r9", "r11", "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+-({ \
+- long _ret; \
+- register long _num asm("rax") = (num); \
+- register long _arg1 asm("rdi") = (long)(arg1); \
+- register long _arg2 asm("rsi") = (long)(arg2); \
+- register long _arg3 asm("rdx") = (long)(arg3); \
+- register long _arg4 asm("r10") = (long)(arg4); \
+- register long _arg5 asm("r8") = (long)(arg5); \
+- register long _arg6 asm("r9") = (long)(arg6); \
+- \
+- asm volatile ( \
+- "syscall\n" \
+- : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+- "r"(_arg6), "0"(_num) \
+- : "rcx", "r11", "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+- ".global _start\n"
+- "_start:\n"
+- "pop %rdi\n" // argc (first arg, %rdi)
+- "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
+- "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when
+- "sub $8, %rsp\n" // entering the callee
+- "call main\n" // main() returns the status code, we'll exit with it.
+- "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits
+- "mov $60, %rax\n" // NR_exit == 60
+- "syscall\n" // really exit
+- "hlt\n" // ensure it does not return
+- "");
+-
+-/* fcntl / open */
+-#define O_RDONLY 0
+-#define O_WRONLY 1
+-#define O_RDWR 2
+-#define O_CREAT 0x40
+-#define O_EXCL 0x80
+-#define O_NOCTTY 0x100
+-#define O_TRUNC 0x200
+-#define O_APPEND 0x400
+-#define O_NONBLOCK 0x800
+-#define O_DIRECTORY 0x10000
+-
+-/* The struct returned by the stat() syscall, equivalent to stat64(). The
+- * syscall returns 116 bytes and stops in the middle of __unused.
+- */
+-struct sys_stat_struct {
+- unsigned long st_dev;
+- unsigned long st_ino;
+- unsigned long st_nlink;
+- unsigned int st_mode;
+- unsigned int st_uid;
+-
+- unsigned int st_gid;
+- unsigned int __pad0;
+- unsigned long st_rdev;
+- long st_size;
+- long st_blksize;
+-
+- long st_blocks;
+- unsigned long st_atime;
+- unsigned long st_atime_nsec;
+- unsigned long st_mtime;
+-
+- unsigned long st_mtime_nsec;
+- unsigned long st_ctime;
+- unsigned long st_ctime_nsec;
+- long __unused[3];
+-};
+-
+-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+-/* Syscalls for i386 :
+- * - mostly similar to x86_64
+- * - registers are 32-bit
+- * - syscall number is passed in eax
+- * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+- * - all registers are preserved (except eax of course)
+- * - the system call is performed by calling int $0x80
+- * - syscall return comes in eax
+- * - the arguments are cast to long and assigned into the target registers
+- * which are then simply passed as registers to the asm code, so that we
+- * don't have to experience issues with register constraints.
+- * - the syscall number is always specified last in order to allow to force
+- * some registers before (gcc refuses a %-register at the last position).
+- *
+- * Also, i386 supports the old_select syscall if newselect is not available
+- */
+-#define __ARCH_WANT_SYS_OLD_SELECT
+-
+-#define my_syscall0(num) \
+-({ \
+- long _ret; \
+- register long _num asm("eax") = (num); \
+- \
+- asm volatile ( \
+- "int $0x80\n" \
+- : "=a" (_ret) \
+- : "0"(_num) \
+- : "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall1(num, arg1) \
+-({ \
+- long _ret; \
+- register long _num asm("eax") = (num); \
+- register long _arg1 asm("ebx") = (long)(arg1); \
+- \
+- asm volatile ( \
+- "int $0x80\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), \
+- "0"(_num) \
+- : "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall2(num, arg1, arg2) \
+-({ \
+- long _ret; \
+- register long _num asm("eax") = (num); \
+- register long _arg1 asm("ebx") = (long)(arg1); \
+- register long _arg2 asm("ecx") = (long)(arg2); \
+- \
+- asm volatile ( \
+- "int $0x80\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), "r"(_arg2), \
+- "0"(_num) \
+- : "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3) \
+-({ \
+- long _ret; \
+- register long _num asm("eax") = (num); \
+- register long _arg1 asm("ebx") = (long)(arg1); \
+- register long _arg2 asm("ecx") = (long)(arg2); \
+- register long _arg3 asm("edx") = (long)(arg3); \
+- \
+- asm volatile ( \
+- "int $0x80\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+- "0"(_num) \
+- : "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+-({ \
+- long _ret; \
+- register long _num asm("eax") = (num); \
+- register long _arg1 asm("ebx") = (long)(arg1); \
+- register long _arg2 asm("ecx") = (long)(arg2); \
+- register long _arg3 asm("edx") = (long)(arg3); \
+- register long _arg4 asm("esi") = (long)(arg4); \
+- \
+- asm volatile ( \
+- "int $0x80\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+- "0"(_num) \
+- : "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+-({ \
+- long _ret; \
+- register long _num asm("eax") = (num); \
+- register long _arg1 asm("ebx") = (long)(arg1); \
+- register long _arg2 asm("ecx") = (long)(arg2); \
+- register long _arg3 asm("edx") = (long)(arg3); \
+- register long _arg4 asm("esi") = (long)(arg4); \
+- register long _arg5 asm("edi") = (long)(arg5); \
+- \
+- asm volatile ( \
+- "int $0x80\n" \
+- : "=a" (_ret) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+- "0"(_num) \
+- : "memory", "cc" \
+- ); \
+- _ret; \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+- ".global _start\n"
+- "_start:\n"
+- "pop %eax\n" // argc (first arg, %eax)
+- "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
+- "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when
+- "push %ecx\n" // push all registers on the stack so that we
+- "push %ebx\n" // support both regparm and plain stack modes
+- "push %eax\n"
+- "call main\n" // main() returns the status code in %eax
+- "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits
+- "movl $1, %eax\n" // NR_exit == 1
+- "int $0x80\n" // exit now
+- "hlt\n" // ensure it does not
+- "");
+-
+-/* fcntl / open */
+-#define O_RDONLY 0
+-#define O_WRONLY 1
+-#define O_RDWR 2
+-#define O_CREAT 0x40
+-#define O_EXCL 0x80
+-#define O_NOCTTY 0x100
+-#define O_TRUNC 0x200
+-#define O_APPEND 0x400
+-#define O_NONBLOCK 0x800
+-#define O_DIRECTORY 0x10000
+-
+-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+- * exactly 56 bytes (stops before the unused array).
+- */
+-struct sys_stat_struct {
+- unsigned long st_dev;
+- unsigned long st_ino;
+- unsigned short st_mode;
+- unsigned short st_nlink;
+- unsigned short st_uid;
+- unsigned short st_gid;
+-
+- unsigned long st_rdev;
+- unsigned long st_size;
+- unsigned long st_blksize;
+- unsigned long st_blocks;
+-
+- unsigned long st_atime;
+- unsigned long st_atime_nsec;
+- unsigned long st_mtime;
+- unsigned long st_mtime_nsec;
+-
+- unsigned long st_ctime;
+- unsigned long st_ctime_nsec;
+- unsigned long __unused[2];
+-};
+-
+-#elif defined(__ARM_EABI__)
+-/* Syscalls for ARM in ARM or Thumb modes :
+- * - registers are 32-bit
+- * - stack is 8-byte aligned
+- * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+- * - syscall number is passed in r7
+- * - arguments are in r0, r1, r2, r3, r4, r5
+- * - the system call is performed by calling svc #0
+- * - syscall return comes in r0.
+- * - only lr is clobbered.
+- * - the arguments are cast to long and assigned into the target registers
+- * which are then simply passed as registers to the asm code, so that we
+- * don't have to experience issues with register constraints.
+- * - the syscall number is always specified last in order to allow to force
+- * some registers before (gcc refuses a %-register at the last position).
+- *
+- * Also, ARM supports the old_select syscall if newselect is not available
+- */
+-#define __ARCH_WANT_SYS_OLD_SELECT
+-
+-#define my_syscall0(num) \
+-({ \
+- register long _num asm("r7") = (num); \
+- register long _arg1 asm("r0"); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_num) \
+- : "memory", "cc", "lr" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall1(num, arg1) \
+-({ \
+- register long _num asm("r7") = (num); \
+- register long _arg1 asm("r0") = (long)(arg1); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), \
+- "r"(_num) \
+- : "memory", "cc", "lr" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall2(num, arg1, arg2) \
+-({ \
+- register long _num asm("r7") = (num); \
+- register long _arg1 asm("r0") = (long)(arg1); \
+- register long _arg2 asm("r1") = (long)(arg2); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), "r"(_arg2), \
+- "r"(_num) \
+- : "memory", "cc", "lr" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3) \
+-({ \
+- register long _num asm("r7") = (num); \
+- register long _arg1 asm("r0") = (long)(arg1); \
+- register long _arg2 asm("r1") = (long)(arg2); \
+- register long _arg3 asm("r2") = (long)(arg3); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+- "r"(_num) \
+- : "memory", "cc", "lr" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+-({ \
+- register long _num asm("r7") = (num); \
+- register long _arg1 asm("r0") = (long)(arg1); \
+- register long _arg2 asm("r1") = (long)(arg2); \
+- register long _arg3 asm("r2") = (long)(arg3); \
+- register long _arg4 asm("r3") = (long)(arg4); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+- "r"(_num) \
+- : "memory", "cc", "lr" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+-({ \
+- register long _num asm("r7") = (num); \
+- register long _arg1 asm("r0") = (long)(arg1); \
+- register long _arg2 asm("r1") = (long)(arg2); \
+- register long _arg3 asm("r2") = (long)(arg3); \
+- register long _arg4 asm("r3") = (long)(arg4); \
+- register long _arg5 asm("r4") = (long)(arg5); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r" (_arg1) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+- "r"(_num) \
+- : "memory", "cc", "lr" \
+- ); \
+- _arg1; \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+- ".global _start\n"
+- "_start:\n"
+-#if defined(__THUMBEB__) || defined(__THUMBEL__)
+- /* We enter here in 32-bit mode but if some previous functions were in
+- * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+- * 32-bit now, then switch to 16-bit (is there a better way to do it than
+- * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+- * it generates correct instructions. Note that we do not support thumb1.
+- */
+- ".code 32\n"
+- "add r0, pc, #1\n"
+- "bx r0\n"
+- ".code 16\n"
+-#endif
+- "pop {%r0}\n" // argc was in the stack
+- "mov %r1, %sp\n" // argv = sp
+- "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+- "add %r2, %r2, $4\n" // ... + 4
+- "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
+- "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
+- "bl main\n" // main() returns the status code, we'll exit with it.
+- "and %r0, %r0, $0xff\n" // limit exit code to 8 bits
+- "movs r7, $1\n" // NR_exit == 1
+- "svc $0x00\n"
+- "");
+-
+-/* fcntl / open */
+-#define O_RDONLY 0
+-#define O_WRONLY 1
+-#define O_RDWR 2
+-#define O_CREAT 0x40
+-#define O_EXCL 0x80
+-#define O_NOCTTY 0x100
+-#define O_TRUNC 0x200
+-#define O_APPEND 0x400
+-#define O_NONBLOCK 0x800
+-#define O_DIRECTORY 0x4000
+-
+-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+- * exactly 56 bytes (stops before the unused array). In big endian, the format
+- * differs as devices are returned as short only.
+- */
+-struct sys_stat_struct {
+-#if defined(__ARMEB__)
+- unsigned short st_dev;
+- unsigned short __pad1;
+-#else
+- unsigned long st_dev;
+-#endif
+- unsigned long st_ino;
+- unsigned short st_mode;
+- unsigned short st_nlink;
+- unsigned short st_uid;
+- unsigned short st_gid;
+-#if defined(__ARMEB__)
+- unsigned short st_rdev;
+- unsigned short __pad2;
+-#else
+- unsigned long st_rdev;
+-#endif
+- unsigned long st_size;
+- unsigned long st_blksize;
+- unsigned long st_blocks;
+- unsigned long st_atime;
+- unsigned long st_atime_nsec;
+- unsigned long st_mtime;
+- unsigned long st_mtime_nsec;
+- unsigned long st_ctime;
+- unsigned long st_ctime_nsec;
+- unsigned long __unused[2];
+-};
+-
+-#elif defined(__aarch64__)
+-/* Syscalls for AARCH64 :
+- * - registers are 64-bit
+- * - stack is 16-byte aligned
+- * - syscall number is passed in x8
+- * - arguments are in x0, x1, x2, x3, x4, x5
+- * - the system call is performed by calling svc 0
+- * - syscall return comes in x0.
+- * - the arguments are cast to long and assigned into the target registers
+- * which are then simply passed as registers to the asm code, so that we
+- * don't have to experience issues with register constraints.
+- *
+- * On aarch64, select() is not implemented so we have to use pselect6().
+- */
+-#define __ARCH_WANT_SYS_PSELECT6
+-
+-#define my_syscall0(num) \
+-({ \
+- register long _num asm("x8") = (num); \
+- register long _arg1 asm("x0"); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall1(num, arg1) \
+-({ \
+- register long _num asm("x8") = (num); \
+- register long _arg1 asm("x0") = (long)(arg1); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall2(num, arg1, arg2) \
+-({ \
+- register long _num asm("x8") = (num); \
+- register long _arg1 asm("x0") = (long)(arg1); \
+- register long _arg2 asm("x1") = (long)(arg2); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), "r"(_arg2), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3) \
+-({ \
+- register long _num asm("x8") = (num); \
+- register long _arg1 asm("x0") = (long)(arg1); \
+- register long _arg2 asm("x1") = (long)(arg2); \
+- register long _arg3 asm("x2") = (long)(arg3); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+-({ \
+- register long _num asm("x8") = (num); \
+- register long _arg1 asm("x0") = (long)(arg1); \
+- register long _arg2 asm("x1") = (long)(arg2); \
+- register long _arg3 asm("x2") = (long)(arg3); \
+- register long _arg4 asm("x3") = (long)(arg4); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r"(_arg1) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+-({ \
+- register long _num asm("x8") = (num); \
+- register long _arg1 asm("x0") = (long)(arg1); \
+- register long _arg2 asm("x1") = (long)(arg2); \
+- register long _arg3 asm("x2") = (long)(arg3); \
+- register long _arg4 asm("x3") = (long)(arg4); \
+- register long _arg5 asm("x4") = (long)(arg5); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r" (_arg1) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+-({ \
+- register long _num asm("x8") = (num); \
+- register long _arg1 asm("x0") = (long)(arg1); \
+- register long _arg2 asm("x1") = (long)(arg2); \
+- register long _arg3 asm("x2") = (long)(arg3); \
+- register long _arg4 asm("x3") = (long)(arg4); \
+- register long _arg5 asm("x4") = (long)(arg5); \
+- register long _arg6 asm("x5") = (long)(arg6); \
+- \
+- asm volatile ( \
+- "svc #0\n" \
+- : "=r" (_arg1) \
+- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+- "r"(_arg6), "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+- ".global _start\n"
+- "_start:\n"
+- "ldr x0, [sp]\n" // argc (x0) was in the stack
+- "add x1, sp, 8\n" // argv (x1) = sp
+- "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
+- "add x2, x2, 8\n" // + 8 (skip null)
+- "add x2, x2, x1\n" // + argv
+- "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
+- "bl main\n" // main() returns the status code, we'll exit with it.
+- "and x0, x0, 0xff\n" // limit exit code to 8 bits
+- "mov x8, 93\n" // NR_exit == 93
+- "svc #0\n"
+- "");
+-
+-/* fcntl / open */
+-#define O_RDONLY 0
+-#define O_WRONLY 1
+-#define O_RDWR 2
+-#define O_CREAT 0x40
+-#define O_EXCL 0x80
+-#define O_NOCTTY 0x100
+-#define O_TRUNC 0x200
+-#define O_APPEND 0x400
+-#define O_NONBLOCK 0x800
+-#define O_DIRECTORY 0x4000
+-
+-/* The struct returned by the newfstatat() syscall. Differs slightly from the
+- * x86_64's stat one by field ordering, so be careful.
+- */
+-struct sys_stat_struct {
+- unsigned long st_dev;
+- unsigned long st_ino;
+- unsigned int st_mode;
+- unsigned int st_nlink;
+- unsigned int st_uid;
+- unsigned int st_gid;
+-
+- unsigned long st_rdev;
+- unsigned long __pad1;
+- long st_size;
+- int st_blksize;
+- int __pad2;
+-
+- long st_blocks;
+- long st_atime;
+- unsigned long st_atime_nsec;
+- long st_mtime;
+-
+- unsigned long st_mtime_nsec;
+- long st_ctime;
+- unsigned long st_ctime_nsec;
+- unsigned int __unused[2];
+-};
+-
+-#elif defined(__mips__) && defined(_ABIO32)
+-/* Syscalls for MIPS ABI O32 :
+- * - WARNING! there's always a delayed slot!
+- * - WARNING again, the syntax is different, registers take a '$' and numbers
+- * do not.
+- * - registers are 32-bit
+- * - stack is 8-byte aligned
+- * - syscall number is passed in v0 (starts at 0xfa0).
+- * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+- * leave some room in the stack for the callee to save a0..a3 if needed.
+- * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+- * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+- * scall32-o32.S in the kernel sources.
+- * - the system call is performed by calling "syscall"
+- * - syscall return comes in v0, and register a3 needs to be checked to know
+- * if an error occurred, in which case errno is in v0.
+- * - the arguments are cast to long and assigned into the target registers
+- * which are then simply passed as registers to the asm code, so that we
+- * don't have to experience issues with register constraints.
+- */
+-
+-#define my_syscall0(num) \
+-({ \
+- register long _num asm("v0") = (num); \
+- register long _arg4 asm("a3"); \
+- \
+- asm volatile ( \
+- "addiu $sp, $sp, -32\n" \
+- "syscall\n" \
+- "addiu $sp, $sp, 32\n" \
+- : "=r"(_num), "=r"(_arg4) \
+- : "r"(_num) \
+- : "memory", "cc", "at", "v1", "hi", "lo", \
+- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+- ); \
+- _arg4 ? -_num : _num; \
+-})
+-
+-#define my_syscall1(num, arg1) \
+-({ \
+- register long _num asm("v0") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg4 asm("a3"); \
+- \
+- asm volatile ( \
+- "addiu $sp, $sp, -32\n" \
+- "syscall\n" \
+- "addiu $sp, $sp, 32\n" \
+- : "=r"(_num), "=r"(_arg4) \
+- : "0"(_num), \
+- "r"(_arg1) \
+- : "memory", "cc", "at", "v1", "hi", "lo", \
+- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+- ); \
+- _arg4 ? -_num : _num; \
+-})
+-
+-#define my_syscall2(num, arg1, arg2) \
+-({ \
+- register long _num asm("v0") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg4 asm("a3"); \
+- \
+- asm volatile ( \
+- "addiu $sp, $sp, -32\n" \
+- "syscall\n" \
+- "addiu $sp, $sp, 32\n" \
+- : "=r"(_num), "=r"(_arg4) \
+- : "0"(_num), \
+- "r"(_arg1), "r"(_arg2) \
+- : "memory", "cc", "at", "v1", "hi", "lo", \
+- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+- ); \
+- _arg4 ? -_num : _num; \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3) \
+-({ \
+- register long _num asm("v0") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg3 asm("a2") = (long)(arg3); \
+- register long _arg4 asm("a3"); \
+- \
+- asm volatile ( \
+- "addiu $sp, $sp, -32\n" \
+- "syscall\n" \
+- "addiu $sp, $sp, 32\n" \
+- : "=r"(_num), "=r"(_arg4) \
+- : "0"(_num), \
+- "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+- : "memory", "cc", "at", "v1", "hi", "lo", \
+- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+- ); \
+- _arg4 ? -_num : _num; \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+-({ \
+- register long _num asm("v0") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg3 asm("a2") = (long)(arg3); \
+- register long _arg4 asm("a3") = (long)(arg4); \
+- \
+- asm volatile ( \
+- "addiu $sp, $sp, -32\n" \
+- "syscall\n" \
+- "addiu $sp, $sp, 32\n" \
+- : "=r" (_num), "=r"(_arg4) \
+- : "0"(_num), \
+- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+- : "memory", "cc", "at", "v1", "hi", "lo", \
+- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+- ); \
+- _arg4 ? -_num : _num; \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+-({ \
+- register long _num asm("v0") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg3 asm("a2") = (long)(arg3); \
+- register long _arg4 asm("a3") = (long)(arg4); \
+- register long _arg5 = (long)(arg5); \
+- \
+- asm volatile ( \
+- "addiu $sp, $sp, -32\n" \
+- "sw %7, 16($sp)\n" \
+- "syscall\n " \
+- "addiu $sp, $sp, 32\n" \
+- : "=r" (_num), "=r"(_arg4) \
+- : "0"(_num), \
+- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+- : "memory", "cc", "at", "v1", "hi", "lo", \
+- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+- ); \
+- _arg4 ? -_num : _num; \
+-})
+-
+-/* startup code, note that it's called __start on MIPS */
+-asm(".section .text\n"
+- ".set nomips16\n"
+- ".global __start\n"
+- ".set noreorder\n"
+- ".option pic0\n"
+- ".ent __start\n"
+- "__start:\n"
+- "lw $a0,($sp)\n" // argc was in the stack
+- "addiu $a1, $sp, 4\n" // argv = sp + 4
+- "sll $a2, $a0, 2\n" // a2 = argc * 4
+- "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
+- "addiu $a2, $a2, 4\n" // ... + 4
+- "li $t0, -8\n"
+- "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
+- "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
+- "jal main\n" // main() returns the status code, we'll exit with it.
+- "nop\n" // delayed slot
+- "and $a0, $v0, 0xff\n" // limit exit code to 8 bits
+- "li $v0, 4001\n" // NR_exit == 4001
+- "syscall\n"
+- ".end __start\n"
+- "");
+-
+-/* fcntl / open */
+-#define O_RDONLY 0
+-#define O_WRONLY 1
+-#define O_RDWR 2
+-#define O_APPEND 0x0008
+-#define O_NONBLOCK 0x0080
+-#define O_CREAT 0x0100
+-#define O_TRUNC 0x0200
+-#define O_EXCL 0x0400
+-#define O_NOCTTY 0x0800
+-#define O_DIRECTORY 0x10000
+-
+-/* The struct returned by the stat() syscall. 88 bytes are returned by the
+- * syscall.
+- */
+-struct sys_stat_struct {
+- unsigned int st_dev;
+- long st_pad1[3];
+- unsigned long st_ino;
+- unsigned int st_mode;
+- unsigned int st_nlink;
+- unsigned int st_uid;
+- unsigned int st_gid;
+- unsigned int st_rdev;
+- long st_pad2[2];
+- long st_size;
+- long st_pad3;
+- long st_atime;
+- long st_atime_nsec;
+- long st_mtime;
+- long st_mtime_nsec;
+- long st_ctime;
+- long st_ctime_nsec;
+- long st_blksize;
+- long st_blocks;
+- long st_pad4[14];
+-};
+-
+-#elif defined(__riscv)
+-
+-#if __riscv_xlen == 64
+-#define PTRLOG "3"
+-#define SZREG "8"
+-#elif __riscv_xlen == 32
+-#define PTRLOG "2"
+-#define SZREG "4"
+-#endif
+-
+-/* Syscalls for RISCV :
+- * - stack is 16-byte aligned
+- * - syscall number is passed in a7
+- * - arguments are in a0, a1, a2, a3, a4, a5
+- * - the system call is performed by calling ecall
+- * - syscall return comes in a0
+- * - the arguments are cast to long and assigned into the target
+- * registers which are then simply passed as registers to the asm code,
+- * so that we don't have to experience issues with register constraints.
+- */
+-
+-#define my_syscall0(num) \
+-({ \
+- register long _num asm("a7") = (num); \
+- register long _arg1 asm("a0"); \
+- \
+- asm volatile ( \
+- "ecall\n\t" \
+- : "=r"(_arg1) \
+- : "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall1(num, arg1) \
+-({ \
+- register long _num asm("a7") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- \
+- asm volatile ( \
+- "ecall\n" \
+- : "+r"(_arg1) \
+- : "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall2(num, arg1, arg2) \
+-({ \
+- register long _num asm("a7") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- \
+- asm volatile ( \
+- "ecall\n" \
+- : "+r"(_arg1) \
+- : "r"(_arg2), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall3(num, arg1, arg2, arg3) \
+-({ \
+- register long _num asm("a7") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg3 asm("a2") = (long)(arg3); \
+- \
+- asm volatile ( \
+- "ecall\n\t" \
+- : "+r"(_arg1) \
+- : "r"(_arg2), "r"(_arg3), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+-({ \
+- register long _num asm("a7") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg3 asm("a2") = (long)(arg3); \
+- register long _arg4 asm("a3") = (long)(arg4); \
+- \
+- asm volatile ( \
+- "ecall\n" \
+- : "+r"(_arg1) \
+- : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+-({ \
+- register long _num asm("a7") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg3 asm("a2") = (long)(arg3); \
+- register long _arg4 asm("a3") = (long)(arg4); \
+- register long _arg5 asm("a4") = (long)(arg5); \
+- \
+- asm volatile ( \
+- "ecall\n" \
+- : "+r"(_arg1) \
+- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+-({ \
+- register long _num asm("a7") = (num); \
+- register long _arg1 asm("a0") = (long)(arg1); \
+- register long _arg2 asm("a1") = (long)(arg2); \
+- register long _arg3 asm("a2") = (long)(arg3); \
+- register long _arg4 asm("a3") = (long)(arg4); \
+- register long _arg5 asm("a4") = (long)(arg5); \
+- register long _arg6 asm("a5") = (long)(arg6); \
+- \
+- asm volatile ( \
+- "ecall\n" \
+- : "+r"(_arg1) \
+- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+- "r"(_num) \
+- : "memory", "cc" \
+- ); \
+- _arg1; \
+-})
+-
+-/* startup code */
+-asm(".section .text\n"
+- ".global _start\n"
+- "_start:\n"
+- ".option push\n"
+- ".option norelax\n"
+- "lla gp, __global_pointer$\n"
+- ".option pop\n"
+- "ld a0, 0(sp)\n" // argc (a0) was in the stack
+- "add a1, sp, "SZREG"\n" // argv (a1) = sp
+- "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
+- "add a2, a2, "SZREG"\n" // + SZREG (skip null)
+- "add a2,a2,a1\n" // + argv
+- "andi sp,a1,-16\n" // sp must be 16-byte aligned
+- "call main\n" // main() returns the status code, we'll exit with it.
+- "andi a0, a0, 0xff\n" // limit exit code to 8 bits
+- "li a7, 93\n" // NR_exit == 93
+- "ecall\n"
+- "");
+-
+-/* fcntl / open */
+-#define O_RDONLY 0
+-#define O_WRONLY 1
+-#define O_RDWR 2
+-#define O_CREAT 0x100
+-#define O_EXCL 0x200
+-#define O_NOCTTY 0x400
+-#define O_TRUNC 0x1000
+-#define O_APPEND 0x2000
+-#define O_NONBLOCK 0x4000
+-#define O_DIRECTORY 0x200000
+-
+-struct sys_stat_struct {
+- unsigned long st_dev; /* Device. */
+- unsigned long st_ino; /* File serial number. */
+- unsigned int st_mode; /* File mode. */
+- unsigned int st_nlink; /* Link count. */
+- unsigned int st_uid; /* User ID of the file's owner. */
+- unsigned int st_gid; /* Group ID of the file's group. */
+- unsigned long st_rdev; /* Device number, if device. */
+- unsigned long __pad1;
+- long st_size; /* Size of file, in bytes. */
+- int st_blksize; /* Optimal block size for I/O. */
+- int __pad2;
+- long st_blocks; /* Number 512-byte blocks allocated. */
+- long st_atime; /* Time of last access. */
+- unsigned long st_atime_nsec;
+- long st_mtime; /* Time of last modification. */
+- unsigned long st_mtime_nsec;
+- long st_ctime; /* Time of last status change. */
+- unsigned long st_ctime_nsec;
+- unsigned int __unused4;
+- unsigned int __unused5;
+-};
+-
+-#endif
+-
+
+ /* Below are the C functions used to declare the raw syscalls. They try to be
+ * architecture-agnostic, and return either a success or -errno. Declaring them
+@@ -2397,9 +1093,9 @@ static __attribute__((unused))
+ int memcmp(const void *s1, const void *s2, size_t n)
+ {
+ size_t ofs = 0;
+- char c1 = 0;
++ int c1 = 0;
+
+- while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
++ while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) {
+ ofs++;
+ }
+ return c1;
+diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
+new file mode 100644
+index 0000000000000..1747ae1253920
+--- /dev/null
++++ b/tools/include/nolibc/std.h
+@@ -0,0 +1,49 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * Standard definitions and types for NOLIBC
++ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_STD_H
++#define _NOLIBC_STD_H
++
++/* Declare a few quite common macros and types that usually are in stdlib.h,
++ * stdint.h, ctype.h, unistd.h and a few other common locations. Please place
++ * integer type definitions and generic macros here, but avoid OS-specific and
++ * syscall-specific stuff, as this file is expected to be included very early.
++ */
++
++/* note: may already be defined */
++#ifndef NULL
++#define NULL ((void *)0)
++#endif
++
++/* stdint types */
++typedef unsigned char uint8_t;
++typedef signed char int8_t;
++typedef unsigned short uint16_t;
++typedef signed short int16_t;
++typedef unsigned int uint32_t;
++typedef signed int int32_t;
++typedef unsigned long long uint64_t;
++typedef signed long long int64_t;
++typedef unsigned long size_t;
++typedef signed long ssize_t;
++typedef unsigned long uintptr_t;
++typedef signed long intptr_t;
++typedef signed long ptrdiff_t;
++
++/* those are commonly provided by sys/types.h */
++typedef unsigned int dev_t;
++typedef unsigned long ino_t;
++typedef unsigned int mode_t;
++typedef signed int pid_t;
++typedef unsigned int uid_t;
++typedef unsigned int gid_t;
++typedef unsigned long nlink_t;
++typedef signed long off_t;
++typedef signed long blksize_t;
++typedef signed long blkcnt_t;
++typedef signed long time_t;
++
++#endif /* _NOLIBC_STD_H */
+diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
+new file mode 100644
+index 0000000000000..2f09abaf95f19
+--- /dev/null
++++ b/tools/include/nolibc/types.h
+@@ -0,0 +1,133 @@
++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
++/*
++ * Special types used by various syscalls for NOLIBC
++ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
++ */
++
++#ifndef _NOLIBC_TYPES_H
++#define _NOLIBC_TYPES_H
++
++#include "std.h"
++#include <linux/time.h>
++
++
++/* Only the generic macros and types may be defined here. The arch-specific
++ * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
++ * the layout of sys_stat_struct must not be defined here.
++ */
++
++/* stat flags (WARNING, octal here) */
++#define S_IFDIR 0040000
++#define S_IFCHR 0020000
++#define S_IFBLK 0060000
++#define S_IFREG 0100000
++#define S_IFIFO 0010000
++#define S_IFLNK 0120000
++#define S_IFSOCK 0140000
++#define S_IFMT 0170000
++
++#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
++#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
++#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
++#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
++#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
++#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
++#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
++
++/* dirent types */
++#define DT_UNKNOWN 0x0
++#define DT_FIFO 0x1
++#define DT_CHR 0x2
++#define DT_DIR 0x4
++#define DT_BLK 0x6
++#define DT_REG 0x8
++#define DT_LNK 0xa
++#define DT_SOCK 0xc
++
++/* commonly an fd_set represents 256 FDs */
++#define FD_SETSIZE 256
++
++/* Special FD used by all the *at functions */
++#ifndef AT_FDCWD
++#define AT_FDCWD (-100)
++#endif
++
++/* whence values for lseek() */
++#define SEEK_SET 0
++#define SEEK_CUR 1
++#define SEEK_END 2
++
++/* cmd for reboot() */
++#define LINUX_REBOOT_MAGIC1 0xfee1dead
++#define LINUX_REBOOT_MAGIC2 0x28121969
++#define LINUX_REBOOT_CMD_HALT 0xcdef0123
++#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
++#define LINUX_REBOOT_CMD_RESTART 0x01234567
++#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
++
++/* Macros used on waitpid()'s return status */
++#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
++#define WIFEXITED(status) (((status) & 0x7f) == 0)
++
++
++/* for select() */
++typedef struct {
++ uint32_t fd32[FD_SETSIZE / 32];
++} fd_set;
++
++/* for poll() */
++struct pollfd {
++ int fd;
++ short int events;
++ short int revents;
++};
++
++/* for getdents64() */
++struct linux_dirent64 {
++ uint64_t d_ino;
++ int64_t d_off;
++ unsigned short d_reclen;
++ unsigned char d_type;
++ char d_name[];
++};
++
++/* needed by wait4() */
++struct rusage {
++ struct timeval ru_utime;
++ struct timeval ru_stime;
++ long ru_maxrss;
++ long ru_ixrss;
++ long ru_idrss;
++ long ru_isrss;
++ long ru_minflt;
++ long ru_majflt;
++ long ru_nswap;
++ long ru_inblock;
++ long ru_oublock;
++ long ru_msgsnd;
++ long ru_msgrcv;
++ long ru_nsignals;
++ long ru_nvcsw;
++ long ru_nivcsw;
++};
++
++/* The format of the struct as returned by the libc to the application, which
++ * significantly differs from the format returned by the stat() syscall flavours.
++ */
++struct stat {
++ dev_t st_dev; /* ID of device containing file */
++ ino_t st_ino; /* inode number */
++ mode_t st_mode; /* protection */
++ nlink_t st_nlink; /* number of hard links */
++ uid_t st_uid; /* user ID of owner */
++ gid_t st_gid; /* group ID of owner */
++ dev_t st_rdev; /* device ID (if special file) */
++ off_t st_size; /* total size, in bytes */
++ blksize_t st_blksize; /* blocksize for file system I/O */
++ blkcnt_t st_blocks; /* number of 512B blocks allocated */
++ time_t st_atime; /* time of last access */
++ time_t st_mtime; /* time of last modification */
++ time_t st_ctime; /* time of last status change */
++};
++
++#endif /* _NOLIBC_TYPES_H */
+diff --git a/tools/include/tools/dis-asm-compat.h b/tools/include/tools/dis-asm-compat.h
+new file mode 100644
+index 0000000000000..70f331e23ed3d
+--- /dev/null
++++ b/tools/include/tools/dis-asm-compat.h
+@@ -0,0 +1,55 @@
++/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
++#ifndef _TOOLS_DIS_ASM_COMPAT_H
++#define _TOOLS_DIS_ASM_COMPAT_H
++
++#include <stdio.h>
++#include <dis-asm.h>
++
++/* define types for older binutils version, to centralize ifdef'ery a bit */
++#ifndef DISASM_INIT_STYLED
++enum disassembler_style {DISASSEMBLER_STYLE_NOT_EMPTY};
++typedef int (*fprintf_styled_ftype) (void *, enum disassembler_style, const char*, ...);
++#endif
++
++/*
++ * Trivial fprintf wrapper to be used as the fprintf_styled_func argument to
++ * init_disassemble_info_compat() when normal fprintf suffices.
++ */
++static inline int fprintf_styled(void *out,
++ enum disassembler_style style,
++ const char *fmt, ...)
++{
++ va_list args;
++ int r;
++
++ (void)style;
++
++ va_start(args, fmt);
++ r = vfprintf(out, fmt, args);
++ va_end(args);
++
++ return r;
++}
++
++/*
++ * Wrapper for init_disassemble_info() that hides version
++ * differences. Depending on binutils version and architecture either
++ * fprintf_func or fprintf_styled_func will be called.
++ */
++static inline void init_disassemble_info_compat(struct disassemble_info *info,
++ void *stream,
++ fprintf_ftype unstyled_func,
++ fprintf_styled_ftype styled_func)
++{
++#ifdef DISASM_INIT_STYLED
++ init_disassemble_info(info, stream,
++ unstyled_func,
++ styled_func);
++#else
++ (void)styled_func;
++ init_disassemble_info(info, stream,
++ unstyled_func);
++#endif
++}
++
++#endif /* _TOOLS_DIS_ASM_COMPAT_H */
+diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
+index d30439b4b8ab4..869379f91fe48 100644
+--- a/tools/include/uapi/asm/errno.h
++++ b/tools/include/uapi/asm/errno.h
+@@ -9,8 +9,8 @@
+ #include "../../../arch/alpha/include/uapi/asm/errno.h"
+ #elif defined(__mips__)
+ #include "../../../arch/mips/include/uapi/asm/errno.h"
+-#elif defined(__xtensa__)
+-#include "../../../arch/xtensa/include/uapi/asm/errno.h"
++#elif defined(__hppa__)
++#include "../../../arch/parisc/include/uapi/asm/errno.h"
+ #else
+ #include <asm-generic/errno.h>
+ #endif
+diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
+index 791f31dd0abee..8330e3ca8fbfb 100644
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -2276,8 +2276,8 @@ union bpf_attr {
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+- * * 0, if current task belongs to the cgroup2.
+- * * 1, if current task does not belong to the cgroup2.
++ * * 1, if current task belongs to the cgroup2.
++ * * 0, if current task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+@@ -5347,7 +5347,8 @@ struct bpf_sock {
+ __u32 src_ip4;
+ __u32 src_ip6[4];
+ __u32 src_port; /* host byte order */
+- __u32 dst_port; /* network byte order */
++ __be16 dst_port; /* network byte order */
++ __u16 :16; /* zero padding */
+ __u32 dst_ip4;
+ __u32 dst_ip6[4];
+ __u32 state;
+diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
+index b3610fdd1feec..4772a115231ae 100644
+--- a/tools/include/uapi/linux/if_link.h
++++ b/tools/include/uapi/linux/if_link.h
+@@ -655,6 +655,7 @@ enum {
+ IFLA_BOND_TLB_DYNAMIC_LB,
+ IFLA_BOND_PEER_NOTIF_DELAY,
+ IFLA_BOND_AD_LACP_ACTIVE,
++ IFLA_BOND_MISSED_MAX,
+ __IFLA_BOND_MAX,
+ };
+
+diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
+index 5859ca0a1439b..93e40f91bd49a 100644
+--- a/tools/include/uapi/sound/asound.h
++++ b/tools/include/uapi/sound/asound.h
+@@ -56,8 +56,10 @@
+ * *
+ ****************************************************************************/
+
++#define AES_IEC958_STATUS_SIZE 24
++
+ struct snd_aes_iec958 {
+- unsigned char status[24]; /* AES/IEC958 channel status bits */
++ unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
+ unsigned char subcode[147]; /* AES/IEC958 subcode bits */
+ unsigned char pad; /* nothing */
+ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */
+diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
+index 5a5bd74f55bd5..9c366b3a676db 100755
+--- a/tools/kvm/kvm_stat/kvm_stat
++++ b/tools/kvm/kvm_stat/kvm_stat
+@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately.
+ .format(values))
+ if len(pids) > 1:
+ sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
+- ' to specify the desired pid'.format(" ".join(pids)))
++ ' to specify the desired pid'
++ .format(" ".join(map(str, pids))))
+ namespace.pid = pids[0]
+
+ argparser = argparse.ArgumentParser(description=description_text,
+diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
+index 74c3b73a5fbe8..089b73b3cb379 100644
+--- a/tools/lib/bpf/Makefile
++++ b/tools/lib/bpf/Makefile
+@@ -126,7 +126,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
+ sort -u | wc -l)
+ VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+ sed 's/\[.*\]//' | \
+- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
++ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
+ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
+
+ CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
+@@ -195,7 +195,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
+ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
+ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+ sed 's/\[.*\]//' | \
+- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
++ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \
+ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
+ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
+ diff -u $(OUTPUT)libbpf_global_syms.tmp \
+diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
+index 2401fad090c52..bfd1ce9fe2110 100644
+--- a/tools/lib/bpf/bpf.c
++++ b/tools/lib/bpf/bpf.c
+@@ -480,6 +480,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+ int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
+ {
+ union bpf_attr attr;
++ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+@@ -487,7 +488,8 @@ int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, _
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+- return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
++ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
++ return libbpf_err_errno(ret);
+ }
+
+ int bpf_map_delete_elem(int fd, const void *key)
+diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
+index 6fffb3cdf39b9..49bd43b998c8a 100644
+--- a/tools/lib/bpf/bpf.h
++++ b/tools/lib/bpf/bpf.h
+@@ -249,8 +249,15 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
+ __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+ __u64 *probe_offset, __u64 *probe_addr);
+
++#ifdef __cplusplus
++/* forward-declaring enums in C++ isn't compatible with pure C enums, so
++ * instead define bpf_enable_stats() as accepting int as an input
++ */
++LIBBPF_API int bpf_enable_stats(int type);
++#else
+ enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
+ LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
++#endif
+
+ struct bpf_prog_bind_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
+index 09ebe3db5f2f8..e4aa9996a5501 100644
+--- a/tools/lib/bpf/bpf_core_read.h
++++ b/tools/lib/bpf/bpf_core_read.h
+@@ -40,7 +40,7 @@ enum bpf_enum_value_kind {
+ #define __CORE_RELO(src, field, info) \
+ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
+
+-#if __BYTE_ORDER == __LITTLE_ENDIAN
++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
+ bpf_probe_read_kernel( \
+ (void *)dst, \
+diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
+index b9987c3efa3c4..956b57d02eb9a 100644
+--- a/tools/lib/bpf/bpf_helpers.h
++++ b/tools/lib/bpf/bpf_helpers.h
+@@ -72,16 +72,21 @@
+ /*
+ * Helper macros to manipulate data structures
+ */
+-#ifndef offsetof
+-#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
+-#endif
+-#ifndef container_of
++
++/* offsetof() definition that uses __builtin_offset() might not preserve field
++ * offset CO-RE relocation properly, so force-redefine offsetof() using
++ * old-school approach which works with CO-RE correctly
++ */
++#undef offsetof
++#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
++
++/* redefined container_of() to ensure we use the above offsetof() macro */
++#undef container_of
+ #define container_of(ptr, type, member) \
+ ({ \
+ void *__mptr = (void *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+-#endif
+
+ /*
+ * Helper macro to throw a compilation error if __bpf_unreachable() gets
+diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
+index 77dc24d58302d..fd23095129782 100644
+--- a/tools/lib/bpf/btf.c
++++ b/tools/lib/bpf/btf.c
+@@ -231,17 +231,23 @@ static int btf_parse_hdr(struct btf *btf)
+ }
+ btf_bswap_hdr(hdr);
+ } else if (hdr->magic != BTF_MAGIC) {
+- pr_debug("Invalid BTF magic:%x\n", hdr->magic);
++ pr_debug("Invalid BTF magic: %x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+- meta_left = btf->raw_size - sizeof(*hdr);
+- if (meta_left < hdr->str_off + hdr->str_len) {
+- pr_debug("Invalid BTF total size:%u\n", btf->raw_size);
++ if (btf->raw_size < hdr->hdr_len) {
++ pr_debug("BTF header len %u larger than data size %u\n",
++ hdr->hdr_len, btf->raw_size);
+ return -EINVAL;
+ }
+
+- if (hdr->type_off + hdr->type_len > hdr->str_off) {
++ meta_left = btf->raw_size - hdr->hdr_len;
++ if (meta_left < (long long)hdr->str_off + hdr->str_len) {
++ pr_debug("Invalid BTF total size: %u\n", btf->raw_size);
++ return -EINVAL;
++ }
++
++ if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) {
+ pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
+ hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
+ return -EINVAL;
+@@ -641,8 +647,21 @@ int btf__align_of(const struct btf *btf, __u32 id)
+ if (align <= 0)
+ return libbpf_err(align);
+ max_align = max(max_align, align);
++
++ /* if field offset isn't aligned according to field
++ * type's alignment, then struct must be packed
++ */
++ if (btf_member_bitfield_size(t, i) == 0 &&
++ (m->offset % (8 * align)) != 0)
++ return 1;
+ }
+
++ /* if struct/union size isn't a multiple of its alignment,
++ * then struct must be packed
++ */
++ if ((t->size % max_align) != 0)
++ return 1;
++
+ return max_align;
+ }
+ default:
+@@ -2620,15 +2639,11 @@ void btf_ext__free(struct btf_ext *btf_ext)
+ free(btf_ext);
+ }
+
+-struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
++struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
+ {
+ struct btf_ext *btf_ext;
+ int err;
+
+- err = btf_ext_parse_hdr(data, size);
+- if (err)
+- return libbpf_err_ptr(err);
+-
+ btf_ext = calloc(1, sizeof(struct btf_ext));
+ if (!btf_ext)
+ return libbpf_err_ptr(-ENOMEM);
+@@ -2641,6 +2656,10 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
+ }
+ memcpy(btf_ext->data, data, size);
+
++ err = btf_ext_parse_hdr(btf_ext->data, size);
++ if (err)
++ goto done;
++
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+ err = -EINVAL;
+ goto done;
+@@ -2914,8 +2933,10 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+ return libbpf_err(-EINVAL);
+ }
+
+- if (btf_ensure_modifiable(btf))
+- return libbpf_err(-ENOMEM);
++ if (btf_ensure_modifiable(btf)) {
++ err = -ENOMEM;
++ goto done;
++ }
+
+ err = btf_dedup_prep(d);
+ if (err) {
+@@ -3350,8 +3371,8 @@ static long btf_hash_struct(struct btf_type *t)
+ }
+
+ /*
+- * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+- * IDs. This check is performed during type graph equivalence check and
++ * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
++ * type IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
+@@ -3710,18 +3731,45 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
+ }
+
+ /* Check if given two types are identical ARRAY definitions */
+-static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
++static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+ {
+ struct btf_type *t1, *t2;
+
+ t1 = btf_type_by_id(d->btf, id1);
+ t2 = btf_type_by_id(d->btf, id2);
+ if (!btf_is_array(t1) || !btf_is_array(t2))
+- return 0;
++ return false;
+
+ return btf_equal_array(t1, t2);
+ }
+
++/* Check if given two types are identical STRUCT/UNION definitions */
++static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
++{
++ const struct btf_member *m1, *m2;
++ struct btf_type *t1, *t2;
++ int n, i;
++
++ t1 = btf_type_by_id(d->btf, id1);
++ t2 = btf_type_by_id(d->btf, id2);
++
++ if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
++ return false;
++
++ if (!btf_shallow_equal_struct(t1, t2))
++ return false;
++
++ m1 = btf_members(t1);
++ m2 = btf_members(t2);
++ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
++ if (m1->type != m2->type &&
++ !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
++ !btf_dedup_identical_structs(d, m1->type, m2->type))
++ return false;
++ }
++ return true;
++}
++
+ /*
+ * Check equivalence of BTF type graph formed by candidate struct/union (we'll
+ * call it "candidate graph" in this description for brevity) to a type graph
+@@ -3833,6 +3881,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
+
+ hypot_type_id = d->hypot_map[canon_id];
+ if (hypot_type_id <= BTF_MAX_NR_TYPES) {
++ if (hypot_type_id == cand_id)
++ return 1;
+ /* In some cases compiler will generate different DWARF types
+ * for *identical* array type definitions and use them for
+ * different fields within the *same* struct. This breaks type
+@@ -3841,8 +3891,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
+ * types within a single CU. So work around that by explicitly
+ * allowing identical array types here.
+ */
+- return hypot_type_id == cand_id ||
+- btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
++ if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
++ return 1;
++ /* It turns out that similar situation can happen with
++ * struct/union sometimes, sigh... Handle the case where
++ * structs/unions are exactly the same, down to the referenced
++ * type IDs. Anything more complicated (e.g., if referenced
++ * types are different, but equivalent) is *way more*
++ * complicated and requires a many-to-many equivalence mapping.
++ */
++ if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
++ return 1;
++ return 0;
+ }
+
+ if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
+diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
+index 4a711f990904b..b0ee338a0cc87 100644
+--- a/tools/lib/bpf/btf.h
++++ b/tools/lib/bpf/btf.h
+@@ -80,7 +80,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+ __u32 expected_value_size,
+ __u32 *key_type_id, __u32 *value_type_id);
+
+-LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
++LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
+ LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
+ LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
+ __u32 *size);
+diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
+index e4b483f15fb99..b91dd7cd4ffb0 100644
+--- a/tools/lib/bpf/btf_dump.c
++++ b/tools/lib/bpf/btf_dump.c
+@@ -215,6 +215,17 @@ static int btf_dump_resize(struct btf_dump *d)
+ return 0;
+ }
+
++static void btf_dump_free_names(struct hashmap *map)
++{
++ size_t bkt;
++ struct hashmap_entry *cur;
++
++ hashmap__for_each_entry(map, cur, bkt)
++ free((void *)cur->key);
++
++ hashmap__free(map);
++}
++
+ void btf_dump__free(struct btf_dump *d)
+ {
+ int i;
+@@ -233,8 +244,8 @@ void btf_dump__free(struct btf_dump *d)
+ free(d->cached_names);
+ free(d->emit_queue);
+ free(d->decl_stack);
+- hashmap__free(d->type_names);
+- hashmap__free(d->ident_names);
++ btf_dump_free_names(d->type_names);
++ btf_dump_free_names(d->ident_names);
+
+ free(d);
+ }
+@@ -810,14 +821,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ const struct btf_type *t)
+ {
+ const struct btf_member *m;
+- int align, i, bit_sz;
++ int max_align = 1, align, i, bit_sz;
+ __u16 vlen;
+
+- align = btf__align_of(btf, id);
+- /* size of a non-packed struct has to be a multiple of its alignment*/
+- if (align && t->size % align)
+- return true;
+-
+ m = btf_members(t);
+ vlen = btf_vlen(t);
+ /* all non-bitfield fields have to be naturally aligned */
+@@ -826,8 +832,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ bit_sz = btf_member_bitfield_size(t, i);
+ if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
+ return true;
++ max_align = max(align, max_align);
+ }
+-
++ /* size of a non-packed struct has to be a multiple of its alignment */
++ if (t->size % max_align != 0)
++ return true;
+ /*
+ * if original struct was marked as packed, but its layout is
+ * naturally aligned, we'll detect that it's not packed
+@@ -835,44 +844,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ return false;
+ }
+
+-static int chip_away_bits(int total, int at_most)
+-{
+- return total % at_most ? : at_most;
+-}
+-
+ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
+- int cur_off, int m_off, int m_bit_sz,
+- int align, int lvl)
++ int cur_off, int next_off, int next_align,
++ bool in_bitfield, int lvl)
+ {
+- int off_diff = m_off - cur_off;
+- int ptr_bits = d->ptr_sz * 8;
++ const struct {
++ const char *name;
++ int bits;
++ } pads[] = {
++ {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8}
++ };
++ int new_off, pad_bits, bits, i;
++ const char *pad_type;
++
++ if (cur_off >= next_off)
++ return; /* no gap */
++
++ /* For filling out padding we want to take advantage of
++ * natural alignment rules to minimize unnecessary explicit
++ * padding. First, we find the largest type (among long, int,
++ * short, or char) that can be used to force naturally aligned
++ * boundary. Once determined, we'll use such type to fill in
++ * the remaining padding gap. In some cases we can rely on
++ * compiler filling some gaps, but sometimes we need to force
++ * alignment to close natural alignment with markers like
++ * `long: 0` (this is always the case for bitfields). Note
++ * that even if struct itself has, let's say 4-byte alignment
++ * (i.e., it only uses up to int-aligned types), using `long:
++ * X;` explicit padding doesn't actually change struct's
++ * overall alignment requirements, but compiler does take into
++ * account that type's (long, in this example) natural
++ * alignment requirements when adding implicit padding. We use
++ * this fact heavily and don't worry about ruining correct
++ * struct alignment requirement.
++ */
++ for (i = 0; i < ARRAY_SIZE(pads); i++) {
++ pad_bits = pads[i].bits;
++ pad_type = pads[i].name;
+
+- if (off_diff <= 0)
+- /* no gap */
+- return;
+- if (m_bit_sz == 0 && off_diff < align * 8)
+- /* natural padding will take care of a gap */
+- return;
++ new_off = roundup(cur_off, pad_bits);
++ if (new_off <= next_off)
++ break;
++ }
+
+- while (off_diff > 0) {
+- const char *pad_type;
+- int pad_bits;
+-
+- if (ptr_bits > 32 && off_diff > 32) {
+- pad_type = "long";
+- pad_bits = chip_away_bits(off_diff, ptr_bits);
+- } else if (off_diff > 16) {
+- pad_type = "int";
+- pad_bits = chip_away_bits(off_diff, 32);
+- } else if (off_diff > 8) {
+- pad_type = "short";
+- pad_bits = chip_away_bits(off_diff, 16);
+- } else {
+- pad_type = "char";
+- pad_bits = chip_away_bits(off_diff, 8);
++ if (new_off > cur_off && new_off <= next_off) {
++ /* We need explicit `<type>: 0` aligning mark if next
++ * field is right on alignment offset and its
++ * alignment requirement is less strict than <type>'s
++ * alignment (so compiler won't naturally align to the
++ * offset we expect), or if subsequent `<type>: X`,
++ * will actually completely fit in the remaining hole,
++ * making compiler basically ignore `<type>: X`
++ * completely.
++ */
++ if (in_bitfield ||
++ (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) ||
++ (new_off != next_off && next_off - new_off <= new_off - cur_off))
++ /* but for bitfields we'll emit explicit bit count */
++ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type,
++ in_bitfield ? new_off - cur_off : 0);
++ cur_off = new_off;
++ }
++
++ /* Now we know we start at naturally aligned offset for a chosen
++ * padding type (long, int, short, or char), and so the rest is just
++ * a straightforward filling of remaining padding gap with full
++ * `<type>: sizeof(<type>);` markers, except for the last one, which
++ * might need smaller than sizeof(<type>) padding.
++ */
++ while (cur_off != next_off) {
++ bits = min(next_off - cur_off, pad_bits);
++ if (bits == pad_bits) {
++ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
++ cur_off += bits;
++ continue;
++ }
++ /* For the remainder padding that doesn't cover entire
++ * pad_type bit length, we pick the smallest necessary type.
++ * This is pure aesthetics, we could have just used `long`,
++ * but having smallest necessary one communicates better the
++ * scale of the padding gap.
++ */
++ for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) {
++ pad_type = pads[i].name;
++ pad_bits = pads[i].bits;
++ if (pad_bits < bits)
++ continue;
++
++ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits);
++ cur_off += bits;
++ break;
+ }
+- btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+- off_diff -= pad_bits;
+ }
+ }
+
+@@ -892,9 +954,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
+ {
+ const struct btf_member *m = btf_members(t);
+ bool is_struct = btf_is_struct(t);
+- int align, i, packed, off = 0;
++ bool packed, prev_bitfield = false;
++ int align, i, off = 0;
+ __u16 vlen = btf_vlen(t);
+
++ align = btf__align_of(d->btf, id);
+ packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
+
+ btf_dump_printf(d, "%s%s%s {",
+@@ -904,37 +968,47 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
+
+ for (i = 0; i < vlen; i++, m++) {
+ const char *fname;
+- int m_off, m_sz;
++ int m_off, m_sz, m_align;
++ bool in_bitfield;
+
+ fname = btf_name_of(d, m->name_off);
+ m_sz = btf_member_bitfield_size(t, i);
+ m_off = btf_member_bit_offset(t, i);
+- align = packed ? 1 : btf__align_of(d->btf, m->type);
++ m_align = packed ? 1 : btf__align_of(d->btf, m->type);
++
++ in_bitfield = prev_bitfield && m_sz != 0;
+
+- btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
++ btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1);
+ btf_dump_printf(d, "\n%s", pfx(lvl + 1));
+ btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
+
+ if (m_sz) {
+ btf_dump_printf(d, ": %d", m_sz);
+ off = m_off + m_sz;
++ prev_bitfield = true;
+ } else {
+ m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
+ off = m_off + m_sz * 8;
++ prev_bitfield = false;
+ }
++
+ btf_dump_printf(d, ";");
+ }
+
+ /* pad at the end, if necessary */
+- if (is_struct) {
+- align = packed ? 1 : btf__align_of(d->btf, id);
+- btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
+- lvl + 1);
+- }
++ if (is_struct)
++ btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1);
+
+- if (vlen)
++ /*
++ * Keep `struct empty {}` on a single line,
++ * only print newline when there are regular or padding fields.
++ */
++ if (vlen || t->size) {
+ btf_dump_printf(d, "\n");
+- btf_dump_printf(d, "%s}", pfx(lvl));
++ btf_dump_printf(d, "%s}", pfx(lvl));
++ } else {
++ btf_dump_printf(d, "}");
++ }
+ if (packed)
+ btf_dump_printf(d, " __attribute__((packed))");
+ }
+@@ -1457,11 +1531,23 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+ const char *orig_name)
+ {
++ char *old_name, *new_name;
+ size_t dup_cnt = 0;
++ int err;
++
++ new_name = strdup(orig_name);
++ if (!new_name)
++ return 1;
+
+ hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+ dup_cnt++;
+- hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
++
++ err = hashmap__set(name_map, new_name, (void *)dup_cnt,
++ (const void **)&old_name, NULL);
++ if (err)
++ free(new_name);
++
++ free(old_name);
+
+ return dup_cnt;
+ }
+@@ -1481,6 +1567,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
+ if (s->name_resolved)
+ return *cached_name ? *cached_name : orig_name;
+
++ if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) {
++ s->name_resolved = 1;
++ return orig_name;
++ }
++
+ dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
+ if (dup_cnt > 1) {
+ const size_t max_len = 256;
+@@ -1829,14 +1920,16 @@ static int btf_dump_array_data(struct btf_dump *d,
+ {
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+- __u32 i, elem_size = 0, elem_type_id;
++ __u32 i, elem_type_id;
++ __s64 elem_size;
+ bool is_array_member;
+
+ elem_type_id = array->type;
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ if (elem_size <= 0) {
+- pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
++ pr_warn("unexpected elem size %zd for array type [%u]\n",
++ (ssize_t)elem_size, id);
+ return -EINVAL;
+ }
+
+@@ -1885,7 +1978,7 @@ static int btf_dump_struct_data(struct btf_dump *d,
+ {
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+- int i, err;
++ int i, err = 0;
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+@@ -2033,9 +2126,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+- __u8 bits_offset)
++ __u8 bits_offset,
++ __u8 bit_sz)
+ {
+- __s64 size = btf__resolve_size(d->btf, id);
++ __s64 size;
++
++ if (bit_sz) {
++ /* bits_offset is at most 7. bit_sz is at most 128. */
++ __u8 nr_bytes = (bits_offset + bit_sz + 7) / 8;
++
++ /* When bit_sz is non zero, it is called from
++ * btf_dump_struct_data() where it only cares about
++ * negative error value.
++ * Return nr_bytes in success case to make it
++ * consistent as the regular integer case below.
++ */
++ return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes;
++ }
++
++ size = btf__resolve_size(d->btf, id);
+
+ if (size < 0 || size >= INT_MAX) {
+ pr_warn("unexpected size [%zu] for id [%u]\n",
+@@ -2186,9 +2295,9 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
+ __u8 bits_offset,
+ __u8 bit_sz)
+ {
+- int size, err;
++ int size, err = 0;
+
+- size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
++ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz);
+ if (size < 0)
+ return size;
+ err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
+index 8df718a6b142d..4435c09fe132f 100644
+--- a/tools/lib/bpf/gen_loader.c
++++ b/tools/lib/bpf/gen_loader.c
+@@ -480,7 +480,7 @@ void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
+ gen->attach_kind = kind;
+ ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s",
+ prefix, attach_name);
+- if (ret == sizeof(gen->attach_target))
++ if (ret >= sizeof(gen->attach_target))
+ gen->error = -ENOSPC;
+ }
+
+@@ -663,9 +663,11 @@ void bpf_gen__prog_load(struct bpf_gen *gen,
+ debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);
+ /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
+ cleanup_relos(gen, insns);
+- if (gen->attach_kind)
++ if (gen->attach_kind) {
+ emit_sys_close_blob(gen,
+ attr_field(prog_load_attr, attach_btf_obj_fd));
++ gen->attach_kind = 0;
++ }
+ emit_check_err(gen);
+ /* remember prog_fd in the stack, if successful */
+ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
+index e4f83c304ec92..f87a15bbf53b3 100644
+--- a/tools/lib/bpf/libbpf.c
++++ b/tools/lib/bpf/libbpf.c
+@@ -2993,6 +2993,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
+ }
+ }
+
++ if (!obj->efile.symbols) {
++ pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
++ obj->path);
++ return -ENOENT;
++ }
++
+ scn = NULL;
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ idx++;
+@@ -3757,6 +3763,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
+ int l = 0, r = obj->nr_programs - 1, m;
+ struct bpf_program *prog;
+
++ if (!obj->nr_programs)
++ return NULL;
++
+ while (l < r) {
+ m = l + (r - l + 1) / 2;
+ prog = &obj->programs[m];
+@@ -3936,7 +3945,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
+ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+ {
+ struct bpf_map_info info = {};
+- __u32 len = sizeof(info);
++ __u32 len = sizeof(info), name_len;
+ int new_fd, err;
+ char *new_name;
+
+@@ -3946,7 +3955,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+ if (err)
+ return libbpf_err(err);
+
+- new_name = strdup(info.name);
++ name_len = strlen(info.name);
++ if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
++ new_name = strdup(map->name);
++ else
++ new_name = strdup(info.name);
++
+ if (!new_name)
+ return libbpf_err(-errno);
+
+@@ -5132,7 +5146,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
+- if (insn_idx > prog->insns_cnt)
++ if (insn_idx >= prog->insns_cnt)
+ return -EINVAL;
+ insn = &prog->insns[insn_idx];
+
+@@ -5215,9 +5229,10 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+ */
+ prog = NULL;
+ for (i = 0; i < obj->nr_programs; i++) {
+- prog = &obj->programs[i];
+- if (strcmp(prog->sec_name, sec_name) == 0)
++ if (strcmp(obj->programs[i].sec_name, sec_name) == 0) {
++ prog = &obj->programs[i];
+ break;
++ }
+ }
+ if (!prog) {
+ pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
+@@ -5232,10 +5247,17 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+ insn_idx = rec->insn_off / BPF_INSN_SZ;
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog) {
+- pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
+- sec_name, insn_idx, i);
+- err = -EINVAL;
+- goto out;
++ /* When __weak subprog is "overridden" by another instance
++ * of the subprog from a different object file, linker still
++ * appends all the .BTF.ext info that used to belong to that
++ * eliminated subprogram.
++ * This is similar to what x86-64 linker does for relocations.
++ * So just ignore such relocations just like we ignore
++ * subprog instructions when discovering subprograms.
++ */
++ pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
++ sec_name, i, insn_idx);
++ continue;
+ }
+ /* no need to apply CO-RE relocation if the program is
+ * not going to be loaded
+@@ -8670,7 +8692,10 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
+ pr_warn("error: inner_map_fd already specified\n");
+ return libbpf_err(-EINVAL);
+ }
+- zfree(&map->inner_map);
++ if (map->inner_map) {
++ bpf_map__destroy(map->inner_map);
++ zfree(&map->inner_map);
++ }
+ map->inner_map_fd = fd;
+ return 0;
+ }
+@@ -10800,6 +10825,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
+
+ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
+ {
++ if (!s)
++ return;
++
+ if (s->progs)
+ bpf_object__detach_skeleton(s);
+ if (s->obj)
+diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
+index cd8c703dde718..8f425473ccaa8 100644
+--- a/tools/lib/bpf/libbpf_probes.c
++++ b/tools/lib/bpf/libbpf_probes.c
+@@ -245,7 +245,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+ case BPF_MAP_TYPE_RINGBUF:
+ key_size = 0;
+ value_size = 0;
+- max_entries = 4096;
++ max_entries = sysconf(_SC_PAGE_SIZE);
+ break;
+ case BPF_MAP_TYPE_UNSPEC:
+ case BPF_MAP_TYPE_HASH:
+diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
+index 2df880cefdaee..6b2f59ddb6918 100644
+--- a/tools/lib/bpf/linker.c
++++ b/tools/lib/bpf/linker.c
+@@ -211,6 +211,7 @@ void bpf_linker__free(struct bpf_linker *linker)
+ }
+ free(linker->secs);
+
++ free(linker->glob_syms);
+ free(linker);
+ }
+
+@@ -2000,7 +2001,7 @@ add_sym:
+ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
+ {
+ struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
+- struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx];
++ struct dst_sec *dst_symtab;
+ int i, err;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+@@ -2033,6 +2034,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
+ return -1;
+ }
+
++ /* add_dst_sec() above could have invalidated linker->secs */
++ dst_symtab = &linker->secs[linker->symtab_sec_idx];
++
+ /* shdr->sh_link points to SYMTAB */
+ dst_sec->shdr->sh_link = linker->symtab_sec_idx;
+
+diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
+index 39f25e09b51e2..fadde7d80a51c 100644
+--- a/tools/lib/bpf/netlink.c
++++ b/tools/lib/bpf/netlink.c
+@@ -87,29 +87,75 @@ enum {
+ NL_DONE,
+ };
+
++static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
++{
++ int len;
++
++ do {
++ len = recvmsg(sock, mhdr, flags);
++ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
++
++ if (len < 0)
++ return -errno;
++ return len;
++}
++
++static int alloc_iov(struct iovec *iov, int len)
++{
++ void *nbuf;
++
++ nbuf = realloc(iov->iov_base, len);
++ if (!nbuf)
++ return -ENOMEM;
++
++ iov->iov_base = nbuf;
++ iov->iov_len = len;
++ return 0;
++}
++
+ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+ __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+ {
++ struct iovec iov = {};
++ struct msghdr mhdr = {
++ .msg_iov = &iov,
++ .msg_iovlen = 1,
++ };
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+- char buf[4096];
+ int len, ret;
+
++ ret = alloc_iov(&iov, 4096);
++ if (ret)
++ goto done;
++
+ while (multipart) {
+ start:
+ multipart = false;
+- len = recv(sock, buf, sizeof(buf), 0);
++ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
++ if (len < 0) {
++ ret = len;
++ goto done;
++ }
++
++ if (len > iov.iov_len) {
++ ret = alloc_iov(&iov, len);
++ if (ret)
++ goto done;
++ }
++
++ len = netlink_recvmsg(sock, &mhdr, 0);
+ if (len < 0) {
+- ret = -errno;
++ ret = len;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
++ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+@@ -130,7 +176,8 @@ start:
+ libbpf_nla_dump_errormsg(nh);
+ goto done;
+ case NLMSG_DONE:
+- return 0;
++ ret = 0;
++ goto done;
+ default:
+ break;
+ }
+@@ -142,15 +189,17 @@ start:
+ case NL_NEXT:
+ goto start;
+ case NL_DONE:
+- return 0;
++ ret = 0;
++ goto done;
+ default:
+- return ret;
++ goto done;
+ }
+ }
+ }
+ }
+ ret = 0;
+ done:
++ free(iov.iov_base);
+ return ret;
+ }
+
+diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
+index f57e77a6e40fd..2dbe7b99f28f1 100644
+--- a/tools/lib/bpf/nlattr.c
++++ b/tools/lib/bpf/nlattr.c
+@@ -178,7 +178,7 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
+ hlen += nlmsg_len(&err->msg);
+
+ attr = (struct nlattr *) ((void *) err + hlen);
+- alen = nlh->nlmsg_len - hlen;
++ alen = (void *)nlh + nlh->nlmsg_len - (void *)attr;
+
+ if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
+ extack_policy) != 0) {
+diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
+index 8bc117bcc7bcd..c42ba9358d8ce 100644
+--- a/tools/lib/bpf/ringbuf.c
++++ b/tools/lib/bpf/ringbuf.c
+@@ -59,6 +59,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ __u32 len = sizeof(info);
+ struct epoll_event *e;
+ struct ring *r;
++ __u64 mmap_sz;
+ void *tmp;
+ int err;
+
+@@ -97,8 +98,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ r->mask = info.max_entries - 1;
+
+ /* Map writable consumer page */
+- tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+- map_fd, 0);
++ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+@@ -111,8 +111,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ * data size to allow simple reading of samples that wrap around the
+ * end of a ring buffer. See kernel implementation for details.
+ * */
+- tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
+- MAP_SHARED, map_fd, rb->page_size);
++ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
++ if (mmap_sz != (__u64)(size_t)mmap_sz) {
++ pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
++ return libbpf_err(-E2BIG);
++ }
++ tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ ringbuf_unmap_ring(rb, r);
+diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
+index b22b50c1b173e..9cf66702fa8dd 100644
+--- a/tools/lib/bpf/skel_internal.h
++++ b/tools/lib/bpf/skel_internal.h
+@@ -105,10 +105,12 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
+ err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr));
+ if (err < 0 || (int)attr.test.retval < 0) {
+ opts->errstr = "failed to execute loader prog";
+- if (err < 0)
++ if (err < 0) {
+ err = -errno;
+- else
++ } else {
+ err = (int)attr.test.retval;
++ errno = -err;
++ }
+ goto out;
+ }
+ err = 0;
+diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
+index e9b619aa0cdf3..2be3197914e45 100644
+--- a/tools/lib/bpf/xsk.c
++++ b/tools/lib/bpf/xsk.c
+@@ -1164,8 +1164,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ goto out_mmap_tx;
+ }
+
+- ctx->prog_fd = -1;
+-
+ if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+ err = __xsk_setup_xdp_prog(xsk, NULL);
+ if (err)
+@@ -1210,12 +1208,23 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+
+ int xsk_umem__delete(struct xsk_umem *umem)
+ {
++ struct xdp_mmap_offsets off;
++ int err;
++
+ if (!umem)
+ return 0;
+
+ if (umem->refcount)
+ return -EBUSY;
+
++ err = xsk_get_mmap_offsets(umem->fd, &off);
++ if (!err && umem->fill_save && umem->comp_save) {
++ munmap(umem->fill_save->ring - off.fr.desc,
++ off.fr.desc + umem->config.fill_size * sizeof(__u64));
++ munmap(umem->comp_save->ring - off.cr.desc,
++ off.cr.desc + umem->config.comp_size * sizeof(__u64));
++ }
++
+ close(umem->fd);
+ free(umem);
+
+@@ -1235,13 +1244,16 @@ void xsk_socket__delete(struct xsk_socket *xsk)
+
+ ctx = xsk->ctx;
+ umem = ctx->umem;
+- if (ctx->prog_fd != -1) {
++
++ if (ctx->refcount == 1) {
+ xsk_delete_bpf_maps(xsk);
+ close(ctx->prog_fd);
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
+ }
+
++ xsk_put_ctx(ctx, true);
++
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (!err) {
+ if (xsk->rx) {
+@@ -1254,8 +1266,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
+ }
+ }
+
+- xsk_put_ctx(ctx, true);
+-
+ umem->refcount--;
+ /* Do not close an fd that also has an associated umem connected
+ * to it.
+diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
+index e37dfad31383f..5146ff0fa078c 100644
+--- a/tools/lib/perf/evlist.c
++++ b/tools/lib/perf/evlist.c
+@@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+ {
+ struct perf_evsel *evsel;
+ const struct perf_cpu_map *cpus = evlist->cpus;
+- const struct perf_thread_map *threads = evlist->threads;
+
+ if (!ops || !ops->get || !ops->mmap)
+ return -EINVAL;
+@@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+ perf_evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ evsel->sample_id == NULL &&
+- perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
++ perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
+ return -ENOMEM;
+ }
+
+diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
+index 794a375dad360..b2aec04fce8f6 100644
+--- a/tools/lib/subcmd/subcmd-util.h
++++ b/tools/lib/subcmd/subcmd-util.h
+@@ -50,15 +50,8 @@ static NORETURN inline void die(const char *err, ...)
+ static inline void *xrealloc(void *ptr, size_t size)
+ {
+ void *ret = realloc(ptr, size);
+- if (!ret && !size)
+- ret = realloc(ptr, 1);
+- if (!ret) {
+- ret = realloc(ptr, size);
+- if (!ret && !size)
+- ret = realloc(ptr, 1);
+- if (!ret)
+- die("Out of memory, realloc failed");
+- }
++ if (!ret)
++ die("Out of memory, realloc failed");
+ return ret;
+ }
+
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index 92ce4fce7bc73..549acc5859e9e 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -19,8 +19,8 @@ LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a
+ OBJTOOL := $(OUTPUT)objtool
+ OBJTOOL_IN := $(OBJTOOL)-in.o
+
+-LIBELF_FLAGS := $(shell pkg-config libelf --cflags 2>/dev/null)
+-LIBELF_LIBS := $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf)
++LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null)
++LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+
+ all: $(OBJTOOL)
+
+diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
+index 0893436cc09f8..76acd39478eea 100644
+--- a/tools/objtool/arch/x86/decode.c
++++ b/tools/objtool/arch/x86/decode.c
+@@ -529,6 +529,11 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
+ }
+ break;
+
++ case 0xcc:
++ /* int3 */
++ *type = INSN_TRAP;
++ break;
++
+ case 0xe3:
+ /* jecxz/jrcxz */
+ *type = INSN_JUMP_CONDITIONAL;
+@@ -659,154 +664,52 @@ const char *arch_nop_insn(int len)
+ return nops[len-1];
+ }
+
+-/* asm/alternative.h ? */
+-
+-#define ALTINSTR_FLAG_INV (1 << 15)
+-#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+-
+-struct alt_instr {
+- s32 instr_offset; /* original instruction */
+- s32 repl_offset; /* offset to replacement instruction */
+- u16 cpuid; /* cpuid bit set for replacement */
+- u8 instrlen; /* length of original instruction */
+- u8 replacementlen; /* length of new instruction */
+-} __packed;
++#define BYTE_RET 0xC3
+
+-static int elf_add_alternative(struct elf *elf,
+- struct instruction *orig, struct symbol *sym,
+- int cpuid, u8 orig_len, u8 repl_len)
++const char *arch_ret_insn(int len)
+ {
+- const int size = sizeof(struct alt_instr);
+- struct alt_instr *alt;
+- struct section *sec;
+- Elf_Scn *s;
+-
+- sec = find_section_by_name(elf, ".altinstructions");
+- if (!sec) {
+- sec = elf_create_section(elf, ".altinstructions",
+- SHF_ALLOC, 0, 0);
+-
+- if (!sec) {
+- WARN_ELF("elf_create_section");
+- return -1;
+- }
+- }
+-
+- s = elf_getscn(elf->elf, sec->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
+- return -1;
+- }
+-
+- sec->data = elf_newdata(s);
+- if (!sec->data) {
+- WARN_ELF("elf_newdata");
+- return -1;
+- }
+-
+- sec->data->d_size = size;
+- sec->data->d_align = 1;
+-
+- alt = sec->data->d_buf = malloc(size);
+- if (!sec->data->d_buf) {
+- perror("malloc");
+- return -1;
+- }
+- memset(sec->data->d_buf, 0, size);
+-
+- if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+- R_X86_64_PC32, orig->sec, orig->offset)) {
+- WARN("elf_create_reloc: alt_instr::instr_offset");
+- return -1;
+- }
+-
+- if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+- R_X86_64_PC32, sym, 0)) {
+- WARN("elf_create_reloc: alt_instr::repl_offset");
+- return -1;
+- }
+-
+- alt->cpuid = bswap_if_needed(cpuid);
+- alt->instrlen = orig_len;
+- alt->replacementlen = repl_len;
+-
+- sec->sh.sh_size += size;
+- sec->changed = true;
+-
+- return 0;
+-}
+-
+-#define X86_FEATURE_RETPOLINE ( 7*32+12)
+-
+-int arch_rewrite_retpolines(struct objtool_file *file)
+-{
+- struct instruction *insn;
+- struct reloc *reloc;
+- struct symbol *sym;
+- char name[32] = "";
+-
+- list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+-
+- if (insn->type != INSN_JUMP_DYNAMIC &&
+- insn->type != INSN_CALL_DYNAMIC)
+- continue;
+-
+- if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+- continue;
+-
+- reloc = insn->reloc;
+-
+- sprintf(name, "__x86_indirect_alt_%s_%s",
+- insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+- reloc->sym->name + 21);
+-
+- sym = find_symbol_by_name(file->elf, name);
+- if (!sym) {
+- sym = elf_create_undef_symbol(file->elf, name);
+- if (!sym) {
+- WARN("elf_create_undef_symbol");
+- return -1;
+- }
+- }
++ static const char ret[5][5] = {
++ { BYTE_RET },
++ { BYTE_RET, 0xcc },
++ { BYTE_RET, 0xcc, BYTES_NOP1 },
++ { BYTE_RET, 0xcc, BYTES_NOP2 },
++ { BYTE_RET, 0xcc, BYTES_NOP3 },
++ };
+
+- if (elf_add_alternative(file->elf, insn, sym,
+- ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+- WARN("elf_add_alternative");
+- return -1;
+- }
++ if (len < 1 || len > 5) {
++ WARN("invalid RET size: %d\n", len);
++ return NULL;
+ }
+
+- return 0;
++ return ret[len-1];
+ }
+
+-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
++int arch_decode_hint_reg(u8 sp_reg, int *base)
+ {
+- struct cfi_reg *cfa = &insn->cfi.cfa;
+-
+ switch (sp_reg) {
+ case ORC_REG_UNDEFINED:
+- cfa->base = CFI_UNDEFINED;
++ *base = CFI_UNDEFINED;
+ break;
+ case ORC_REG_SP:
+- cfa->base = CFI_SP;
++ *base = CFI_SP;
+ break;
+ case ORC_REG_BP:
+- cfa->base = CFI_BP;
++ *base = CFI_BP;
+ break;
+ case ORC_REG_SP_INDIRECT:
+- cfa->base = CFI_SP_INDIRECT;
++ *base = CFI_SP_INDIRECT;
+ break;
+ case ORC_REG_R10:
+- cfa->base = CFI_R10;
++ *base = CFI_R10;
+ break;
+ case ORC_REG_R13:
+- cfa->base = CFI_R13;
++ *base = CFI_R13;
+ break;
+ case ORC_REG_DI:
+- cfa->base = CFI_DI;
++ *base = CFI_DI;
+ break;
+ case ORC_REG_DX:
+- cfa->base = CFI_DX;
++ *base = CFI_DX;
+ break;
+ default:
+ return -1;
+@@ -819,3 +722,14 @@ bool arch_is_retpoline(struct symbol *sym)
+ {
+ return !strncmp(sym->name, "__x86_indirect_", 15);
+ }
++
++bool arch_is_rethunk(struct symbol *sym)
++{
++ return !strcmp(sym->name, "__x86_return_thunk");
++}
++
++bool arch_is_embedded_insn(struct symbol *sym)
++{
++ return !strcmp(sym->name, "retbleed_return_thunk") ||
++ !strcmp(sym->name, "srso_safe_ret");
++}
+diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
+index 8b38b5d6fec7b..35081fe373203 100644
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -20,7 +20,7 @@
+ #include <objtool/objtool.h>
+
+ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+- validate_dup, vmlinux, mcount, noinstr, backup;
++ validate_dup, vmlinux, mcount, noinstr, backup, sls, unret, rethunk;
+
+ static const char * const check_usage[] = {
+ "objtool check [<options>] file.o",
+@@ -36,6 +36,8 @@ const struct option check_options[] = {
+ OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+ OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+ OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
++ OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"),
++ OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"),
+ OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+ OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+ OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
+@@ -45,6 +47,7 @@ const struct option check_options[] = {
+ OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
+ OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
+ OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
++ OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
+ OPT_END(),
+ };
+
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index 06b5c164ae931..36ad0b6b94a91 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -5,6 +5,8 @@
+
+ #include <string.h>
+ #include <stdlib.h>
++#include <inttypes.h>
++#include <sys/mman.h>
+
+ #include <arch/elf.h>
+ #include <objtool/builtin.h>
+@@ -26,7 +28,11 @@ struct alternative {
+ bool skip_orig;
+ };
+
+-struct cfi_init_state initial_func_cfi;
++static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
++
++static struct cfi_init_state initial_func_cfi;
++static struct cfi_state init_cfi;
++static struct cfi_state func_cfi;
+
+ struct instruction *find_insn(struct objtool_file *file,
+ struct section *sec, unsigned long offset)
+@@ -163,6 +169,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+ "panic",
+ "do_exit",
+ "do_task_dead",
++ "make_task_dead",
+ "__module_put_and_exit",
+ "complete_and_exit",
+ "__reiserfs_panic",
+@@ -170,9 +177,11 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+ "fortify_panic",
+ "usercopy_abort",
+ "machine_real_restart",
+- "rewind_stack_do_exit",
++ "rewind_stack_and_make_dead",
+ "kunit_try_catch_throw",
+ "xen_start_kernel",
++ "cpu_bringup_and_idle",
++ "stop_this_cpu",
+ };
+
+ if (!func)
+@@ -190,7 +199,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+ return false;
+
+ insn = find_insn(file, func->sec, func->offset);
+- if (!insn->func)
++ if (!insn || !insn->func)
+ return false;
+
+ func_for_each_insn(file, func, insn) {
+@@ -265,6 +274,78 @@ static void init_insn_state(struct insn_state *state, struct section *sec)
+ state->noinstr = sec->noinstr;
+ }
+
++static struct cfi_state *cfi_alloc(void)
++{
++ struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1);
++ if (!cfi) {
++ WARN("calloc failed");
++ exit(1);
++ }
++ nr_cfi++;
++ return cfi;
++}
++
++static int cfi_bits;
++static struct hlist_head *cfi_hash;
++
++static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2)
++{
++ return memcmp((void *)cfi1 + sizeof(cfi1->hash),
++ (void *)cfi2 + sizeof(cfi2->hash),
++ sizeof(struct cfi_state) - sizeof(struct hlist_node));
++}
++
++static inline u32 cfi_key(struct cfi_state *cfi)
++{
++ return jhash((void *)cfi + sizeof(cfi->hash),
++ sizeof(*cfi) - sizeof(cfi->hash), 0);
++}
++
++static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi)
++{
++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
++ struct cfi_state *obj;
++
++ hlist_for_each_entry(obj, head, hash) {
++ if (!cficmp(cfi, obj)) {
++ nr_cfi_cache++;
++ return obj;
++ }
++ }
++
++ obj = cfi_alloc();
++ *obj = *cfi;
++ hlist_add_head(&obj->hash, head);
++
++ return obj;
++}
++
++static void cfi_hash_add(struct cfi_state *cfi)
++{
++ struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)];
++
++ hlist_add_head(&cfi->hash, head);
++}
++
++static void *cfi_hash_alloc(unsigned long size)
++{
++ cfi_bits = max(10, ilog2(size));
++ cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits,
++ PROT_READ|PROT_WRITE,
++ MAP_PRIVATE|MAP_ANON, -1, 0);
++ if (cfi_hash == (void *)-1L) {
++ WARN("mmap fail cfi_hash");
++ cfi_hash = NULL;
++ } else if (stats) {
++ printf("cfi_bits: %d\n", cfi_bits);
++ }
++
++ return cfi_hash;
++}
++
++static unsigned long nr_insns;
++static unsigned long nr_insns_visited;
++
+ /*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+@@ -275,7 +356,6 @@ static int decode_instructions(struct objtool_file *file)
+ struct symbol *func;
+ unsigned long offset;
+ struct instruction *insn;
+- unsigned long nr_insns = 0;
+ int ret;
+
+ for_each_sec(file, sec) {
+@@ -289,7 +369,8 @@ static int decode_instructions(struct objtool_file *file)
+ sec->text = true;
+
+ if (!strcmp(sec->name, ".noinstr.text") ||
+- !strcmp(sec->name, ".entry.text"))
++ !strcmp(sec->name, ".entry.text") ||
++ !strncmp(sec->name, ".text..__x86.", 13))
+ sec->noinstr = true;
+
+ for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
+@@ -301,7 +382,6 @@ static int decode_instructions(struct objtool_file *file)
+ memset(insn, 0, sizeof(*insn));
+ INIT_LIST_HEAD(&insn->alts);
+ INIT_LIST_HEAD(&insn->stack_ops);
+- init_cfi_state(&insn->cfi);
+
+ insn->sec = sec;
+ insn->offset = offset;
+@@ -392,12 +472,12 @@ static int add_dead_ends(struct objtool_file *file)
+ else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
+- WARN("can't find unreachable insn at %s+0x%x",
++ WARN("can't find unreachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+ } else {
+- WARN("can't find unreachable insn at %s+0x%x",
++ WARN("can't find unreachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+@@ -427,12 +507,12 @@ reachable:
+ else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
+- WARN("can't find reachable insn at %s+0x%x",
++ WARN("can't find reachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+ } else {
+- WARN("can't find reachable insn at %s+0x%x",
++ WARN("can't find reachable insn at %s+0x%" PRIx64,
+ reloc->sym->sec->name, reloc->addend);
+ return -1;
+ }
+@@ -493,6 +573,7 @@ static int create_static_call_sections(struct objtool_file *file)
+ if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
+ STATIC_CALL_TRAMP_PREFIX_LEN)) {
+ WARN("static_call: trampoline name malformed: %s", key_name);
++ free(key_name);
+ return -1;
+ }
+ tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
+@@ -502,6 +583,7 @@ static int create_static_call_sections(struct objtool_file *file)
+ if (!key_sym) {
+ if (!module) {
+ WARN("static_call: can't find static_call_key symbol: %s", tmp);
++ free(key_name);
+ return -1;
+ }
+
+@@ -531,6 +613,98 @@ static int create_static_call_sections(struct objtool_file *file)
+ return 0;
+ }
+
++static int create_retpoline_sites_sections(struct objtool_file *file)
++{
++ struct instruction *insn;
++ struct section *sec;
++ int idx;
++
++ sec = find_section_by_name(file->elf, ".retpoline_sites");
++ if (sec) {
++ WARN("file already has .retpoline_sites, skipping");
++ return 0;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->retpoline_call_list, call_node)
++ idx++;
++
++ if (!idx)
++ return 0;
++
++ sec = elf_create_section(file->elf, ".retpoline_sites", 0,
++ sizeof(int), idx);
++ if (!sec) {
++ WARN("elf_create_section: .retpoline_sites");
++ return -1;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
++
++ int *site = (int *)sec->data->d_buf + idx;
++ *site = 0;
++
++ if (elf_add_reloc_to_insn(file->elf, sec,
++ idx * sizeof(int),
++ R_X86_64_PC32,
++ insn->sec, insn->offset)) {
++ WARN("elf_add_reloc_to_insn: .retpoline_sites");
++ return -1;
++ }
++
++ idx++;
++ }
++
++ return 0;
++}
++
++static int create_return_sites_sections(struct objtool_file *file)
++{
++ struct instruction *insn;
++ struct section *sec;
++ int idx;
++
++ sec = find_section_by_name(file->elf, ".return_sites");
++ if (sec) {
++ WARN("file already has .return_sites, skipping");
++ return 0;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->return_thunk_list, call_node)
++ idx++;
++
++ if (!idx)
++ return 0;
++
++ sec = elf_create_section(file->elf, ".return_sites", 0,
++ sizeof(int), idx);
++ if (!sec) {
++ WARN("elf_create_section: .return_sites");
++ return -1;
++ }
++
++ idx = 0;
++ list_for_each_entry(insn, &file->return_thunk_list, call_node) {
++
++ int *site = (int *)sec->data->d_buf + idx;
++ *site = 0;
++
++ if (elf_add_reloc_to_insn(file->elf, sec,
++ idx * sizeof(int),
++ R_X86_64_PC32,
++ insn->sec, insn->offset)) {
++ WARN("elf_add_reloc_to_insn: .return_sites");
++ return -1;
++ }
++
++ idx++;
++ }
++
++ return 0;
++}
++
+ static int create_mcount_loc_sections(struct objtool_file *file)
+ {
+ struct section *sec;
+@@ -549,7 +723,7 @@ static int create_mcount_loc_sections(struct objtool_file *file)
+ return 0;
+
+ idx = 0;
+- list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node)
++ list_for_each_entry(insn, &file->mcount_loc_list, call_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
+@@ -557,7 +731,7 @@ static int create_mcount_loc_sections(struct objtool_file *file)
+ return -1;
+
+ idx = 0;
+- list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) {
++ list_for_each_entry(insn, &file->mcount_loc_list, call_node) {
+
+ loc = (unsigned long *)sec->data->d_buf + idx;
+ memset(loc, 0, sizeof(unsigned long));
+@@ -676,6 +850,16 @@ static const char *uaccess_safe_builtin[] = {
+ "__tsan_read_write4",
+ "__tsan_read_write8",
+ "__tsan_read_write16",
++ "__tsan_volatile_read1",
++ "__tsan_volatile_read2",
++ "__tsan_volatile_read4",
++ "__tsan_volatile_read8",
++ "__tsan_volatile_read16",
++ "__tsan_volatile_write1",
++ "__tsan_volatile_write2",
++ "__tsan_volatile_write4",
++ "__tsan_volatile_write8",
++ "__tsan_volatile_write16",
+ "__tsan_atomic8_load",
+ "__tsan_atomic16_load",
+ "__tsan_atomic32_load",
+@@ -726,6 +910,8 @@ static const char *uaccess_safe_builtin[] = {
+ "__tsan_atomic64_compare_exchange_val",
+ "__tsan_atomic_thread_fence",
+ "__tsan_atomic_signal_fence",
++ "__tsan_unaligned_read16",
++ "__tsan_unaligned_write16",
+ /* KCOV */
+ "write_comp_data",
+ "check_kcov_mode",
+@@ -804,11 +990,33 @@ static int add_ignore_alternatives(struct objtool_file *file)
+ return 0;
+ }
+
++/*
++ * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
++ * will be added to the .retpoline_sites section.
++ */
+ __weak bool arch_is_retpoline(struct symbol *sym)
+ {
+ return false;
+ }
+
++/*
++ * Symbols that replace INSN_RETURN, every (tail) call to such a symbol
++ * will be added to the .return_sites section.
++ */
++__weak bool arch_is_rethunk(struct symbol *sym)
++{
++ return false;
++}
++
++/*
++ * Symbols that are embedded inside other instructions, because sometimes crazy
++ * code exists. These are mostly ignored for validation purposes.
++ */
++__weak bool arch_is_embedded_insn(struct symbol *sym)
++{
++ return false;
++}
++
+ #define NEGATIVE_RELOC ((void *)-1L)
+
+ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
+@@ -828,6 +1036,162 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i
+ return insn->reloc;
+ }
+
++static void remove_insn_ops(struct instruction *insn)
++{
++ struct stack_op *op, *tmp;
++
++ list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
++ list_del(&op->list);
++ free(op);
++ }
++}
++
++static void annotate_call_site(struct objtool_file *file,
++ struct instruction *insn, bool sibling)
++{
++ struct reloc *reloc = insn_reloc(file, insn);
++ struct symbol *sym = insn->call_dest;
++
++ if (!sym)
++ sym = reloc->sym;
++
++ /*
++ * Alternative replacement code is just template code which is
++ * sometimes copied to the original instruction. For now, don't
++ * annotate it. (In the future we might consider annotating the
++ * original instruction if/when it ever makes sense to do so.)
++ */
++ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
++ return;
++
++ if (sym->static_call_tramp) {
++ list_add_tail(&insn->call_node, &file->static_call_list);
++ return;
++ }
++
++ if (sym->retpoline_thunk) {
++ list_add_tail(&insn->call_node, &file->retpoline_call_list);
++ return;
++ }
++
++ /*
++ * Many compilers cannot disable KCOV with a function attribute
++ * so they need a little help, NOP out any KCOV calls from noinstr
++ * text.
++ */
++ if (insn->sec->noinstr && sym->kcov) {
++ if (reloc) {
++ reloc->type = R_NONE;
++ elf_write_reloc(file->elf, reloc);
++ }
++
++ elf_write_insn(file->elf, insn->sec,
++ insn->offset, insn->len,
++ sibling ? arch_ret_insn(insn->len)
++ : arch_nop_insn(insn->len));
++
++ insn->type = sibling ? INSN_RETURN : INSN_NOP;
++
++ if (sibling) {
++ /*
++ * We've replaced the tail-call JMP insn by two new
++ * insn: RET; INT3, except we only have a single struct
++ * insn here. Mark it retpoline_safe to avoid the SLS
++ * warning, instead of adding another insn.
++ */
++ insn->retpoline_safe = true;
++ }
++
++ return;
++ }
++
++ if (mcount && sym->fentry) {
++ if (sibling)
++ WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset);
++
++ if (reloc) {
++ reloc->type = R_NONE;
++ elf_write_reloc(file->elf, reloc);
++ }
++
++ elf_write_insn(file->elf, insn->sec,
++ insn->offset, insn->len,
++ arch_nop_insn(insn->len));
++
++ insn->type = INSN_NOP;
++
++ list_add_tail(&insn->call_node, &file->mcount_loc_list);
++ return;
++ }
++}
++
++static void add_call_dest(struct objtool_file *file, struct instruction *insn,
++ struct symbol *dest, bool sibling)
++{
++ insn->call_dest = dest;
++ if (!dest)
++ return;
++
++ /*
++ * Whatever stack impact regular CALLs have, should be undone
++ * by the RETURN of the called function.
++ *
++ * Annotated intra-function calls retain the stack_ops but
++ * are converted to JUMP, see read_intra_function_calls().
++ */
++ remove_insn_ops(insn);
++
++ annotate_call_site(file, insn, sibling);
++}
++
++static void add_retpoline_call(struct objtool_file *file, struct instruction *insn)
++{
++ /*
++ * Retpoline calls/jumps are really dynamic calls/jumps in disguise,
++ * so convert them accordingly.
++ */
++ switch (insn->type) {
++ case INSN_CALL:
++ insn->type = INSN_CALL_DYNAMIC;
++ break;
++ case INSN_JUMP_UNCONDITIONAL:
++ insn->type = INSN_JUMP_DYNAMIC;
++ break;
++ case INSN_JUMP_CONDITIONAL:
++ insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
++ break;
++ default:
++ return;
++ }
++
++ insn->retpoline_safe = true;
++
++ /*
++ * Whatever stack impact regular CALLs have, should be undone
++ * by the RETURN of the called function.
++ *
++ * Annotated intra-function calls retain the stack_ops but
++ * are converted to JUMP, see read_intra_function_calls().
++ */
++ remove_insn_ops(insn);
++
++ annotate_call_site(file, insn, false);
++}
++
++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
++{
++ /*
++ * Return thunk tail calls are really just returns in disguise,
++ * so convert them accordingly.
++ */
++ insn->type = INSN_RETURN;
++ insn->retpoline_safe = true;
++
++ /* Skip the non-text sections, specially .discard ones */
++ if (add && insn->sec->text)
++ list_add_tail(&insn->call_node, &file->return_thunk_list);
++}
++
+ /*
+ * Find the destination instructions for all jumps.
+ */
+@@ -849,28 +1213,15 @@ static int add_jump_destinations(struct objtool_file *file)
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_sec = reloc->sym->sec;
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+- } else if (arch_is_retpoline(reloc->sym)) {
+- /*
+- * Retpoline jumps are really dynamic jumps in
+- * disguise, so convert them accordingly.
+- */
+- if (insn->type == INSN_JUMP_UNCONDITIONAL)
+- insn->type = INSN_JUMP_DYNAMIC;
+- else
+- insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
+-
+- list_add_tail(&insn->call_node,
+- &file->retpoline_call_list);
+-
+- insn->retpoline_safe = true;
++ } else if (reloc->sym->retpoline_thunk) {
++ add_retpoline_call(file, insn);
++ continue;
++ } else if (reloc->sym->return_thunk) {
++ add_return_call(file, insn, true);
+ continue;
+ } else if (insn->func) {
+ /* internal or external sibling call (with reloc) */
+- insn->call_dest = reloc->sym;
+- if (insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->call_node,
+- &file->static_call_list);
+- }
++ add_call_dest(file, insn, reloc->sym, true);
+ continue;
+ } else if (reloc->sym->sec->idx) {
+ dest_sec = reloc->sym->sec;
+@@ -883,6 +1234,7 @@ static int add_jump_destinations(struct objtool_file *file)
+
+ insn->jump_dest = find_insn(file, dest_sec, dest_off);
+ if (!insn->jump_dest) {
++ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+
+ /*
+ * This is a special case where an alt instruction
+@@ -892,6 +1244,19 @@ static int add_jump_destinations(struct objtool_file *file)
+ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+ continue;
+
++ /*
++ * This is a special case for retbleed_untrain_ret().
++ * It jumps to __x86_return_thunk(), but objtool
++ * can't find the thunk's starting RET
++ * instruction, because the RET is also in the
++ * middle of another instruction. Objtool only
++ * knows about the outer instruction.
++ */
++ if (sym && sym->embedded_insn) {
++ add_return_call(file, insn, false);
++ continue;
++ }
++
+ WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+ insn->sec, insn->offset, dest_sec->name,
+ dest_off);
+@@ -926,13 +1291,8 @@ static int add_jump_destinations(struct objtool_file *file)
+
+ } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
+ insn->jump_dest->offset == insn->jump_dest->func->offset) {
+-
+ /* internal sibling call (without reloc) */
+- insn->call_dest = insn->jump_dest->func;
+- if (insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->call_node,
+- &file->static_call_list);
+- }
++ add_call_dest(file, insn, insn->jump_dest->func, true);
+ }
+ }
+ }
+@@ -940,16 +1300,6 @@ static int add_jump_destinations(struct objtool_file *file)
+ return 0;
+ }
+
+-static void remove_insn_ops(struct instruction *insn)
+-{
+- struct stack_op *op, *tmp;
+-
+- list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
+- list_del(&op->list);
+- free(op);
+- }
+-}
+-
+ static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
+ {
+ struct symbol *call_dest;
+@@ -968,6 +1318,7 @@ static int add_call_destinations(struct objtool_file *file)
+ {
+ struct instruction *insn;
+ unsigned long dest_off;
++ struct symbol *dest;
+ struct reloc *reloc;
+
+ for_each_insn(file, insn) {
+@@ -977,7 +1328,9 @@ static int add_call_destinations(struct objtool_file *file)
+ reloc = insn_reloc(file, insn);
+ if (!reloc) {
+ dest_off = arch_jump_destination(insn);
+- insn->call_dest = find_call_destination(insn->sec, dest_off);
++ dest = find_call_destination(insn->sec, dest_off);
++
++ add_call_dest(file, insn, dest, false);
+
+ if (insn->ignore)
+ continue;
+@@ -995,9 +1348,8 @@ static int add_call_destinations(struct objtool_file *file)
+
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+- insn->call_dest = find_call_destination(reloc->sym->sec,
+- dest_off);
+- if (!insn->call_dest) {
++ dest = find_call_destination(reloc->sym->sec, dest_off);
++ if (!dest) {
+ WARN_FUNC("can't find call dest symbol at %s+0x%lx",
+ insn->sec, insn->offset,
+ reloc->sym->sec->name,
+@@ -1005,70 +1357,13 @@ static int add_call_destinations(struct objtool_file *file)
+ return -1;
+ }
+
+- } else if (arch_is_retpoline(reloc->sym)) {
+- /*
+- * Retpoline calls are really dynamic calls in
+- * disguise, so convert them accordingly.
+- */
+- insn->type = INSN_CALL_DYNAMIC;
+- insn->retpoline_safe = true;
+-
+- list_add_tail(&insn->call_node,
+- &file->retpoline_call_list);
++ add_call_dest(file, insn, dest, false);
+
+- remove_insn_ops(insn);
+- continue;
++ } else if (reloc->sym->retpoline_thunk) {
++ add_retpoline_call(file, insn);
+
+ } else
+- insn->call_dest = reloc->sym;
+-
+- if (insn->call_dest && insn->call_dest->static_call_tramp) {
+- list_add_tail(&insn->call_node,
+- &file->static_call_list);
+- }
+-
+- /*
+- * Many compilers cannot disable KCOV with a function attribute
+- * so they need a little help, NOP out any KCOV calls from noinstr
+- * text.
+- */
+- if (insn->sec->noinstr &&
+- !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
+- if (reloc) {
+- reloc->type = R_NONE;
+- elf_write_reloc(file->elf, reloc);
+- }
+-
+- elf_write_insn(file->elf, insn->sec,
+- insn->offset, insn->len,
+- arch_nop_insn(insn->len));
+- insn->type = INSN_NOP;
+- }
+-
+- if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) {
+- if (reloc) {
+- reloc->type = R_NONE;
+- elf_write_reloc(file->elf, reloc);
+- }
+-
+- elf_write_insn(file->elf, insn->sec,
+- insn->offset, insn->len,
+- arch_nop_insn(insn->len));
+-
+- insn->type = INSN_NOP;
+-
+- list_add_tail(&insn->mcount_loc_node,
+- &file->mcount_loc_list);
+- }
+-
+- /*
+- * Whatever stack impact regular CALLs have, should be undone
+- * by the RETURN of the called function.
+- *
+- * Annotated intra-function calls retain the stack_ops but
+- * are converted to JUMP, see read_intra_function_calls().
+- */
+- remove_insn_ops(insn);
++ add_call_dest(file, insn, reloc->sym, false);
+ }
+
+ return 0;
+@@ -1136,7 +1431,6 @@ static int handle_group_alt(struct objtool_file *file,
+ memset(nop, 0, sizeof(*nop));
+ INIT_LIST_HEAD(&nop->alts);
+ INIT_LIST_HEAD(&nop->stack_ops);
+- init_cfi_state(&nop->cfi);
+
+ nop->sec = special_alt->new_sec;
+ nop->offset = special_alt->new_off + special_alt->new_len;
+@@ -1545,10 +1839,11 @@ static void set_func_state(struct cfi_state *state)
+
+ static int read_unwind_hints(struct objtool_file *file)
+ {
++ struct cfi_state cfi = init_cfi;
+ struct section *sec, *relocsec;
+- struct reloc *reloc;
+ struct unwind_hint *hint;
+ struct instruction *insn;
++ struct reloc *reloc;
+ int i;
+
+ sec = find_section_by_name(file->elf, ".discard.unwind_hints");
+@@ -1585,20 +1880,49 @@ static int read_unwind_hints(struct objtool_file *file)
+
+ insn->hint = true;
+
++ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
++ insn->hint = false;
++ insn->save = true;
++ continue;
++ }
++
++ if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
++ insn->restore = true;
++ continue;
++ }
++
++ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
++ struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
++
++ if (sym && sym->bind == STB_GLOBAL) {
++ insn->entry = 1;
++ }
++ }
++
++ if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
++ hint->type = UNWIND_HINT_TYPE_CALL;
++ insn->entry = 1;
++ }
++
+ if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+- set_func_state(&insn->cfi);
++ insn->cfi = &func_cfi;
+ continue;
+ }
+
+- if (arch_decode_hint_reg(insn, hint->sp_reg)) {
++ if (insn->cfi)
++ cfi = *(insn->cfi);
++
++ if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) {
+ WARN_FUNC("unsupported unwind_hint sp base reg %d",
+ insn->sec, insn->offset, hint->sp_reg);
+ return -1;
+ }
+
+- insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset);
+- insn->cfi.type = hint->type;
+- insn->cfi.end = hint->end;
++ cfi.cfa.offset = bswap_if_needed(hint->sp_offset);
++ cfi.type = hint->type;
++ cfi.end = hint->end;
++
++ insn->cfi = cfi_hash_find_or_add(&cfi);
+ }
+
+ return 0;
+@@ -1627,8 +1951,10 @@ static int read_retpoline_hints(struct objtool_file *file)
+ }
+
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+- insn->type != INSN_CALL_DYNAMIC) {
+- WARN_FUNC("retpoline_safe hint not an indirect jump/call",
++ insn->type != INSN_CALL_DYNAMIC &&
++ insn->type != INSN_RETURN &&
++ insn->type != INSN_NOP) {
++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
+ insn->sec, insn->offset);
+ return -1;
+ }
+@@ -1737,17 +2063,34 @@ static int read_intra_function_calls(struct objtool_file *file)
+ return 0;
+ }
+
+-static int read_static_call_tramps(struct objtool_file *file)
++static int classify_symbols(struct objtool_file *file)
+ {
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+- if (func->bind == STB_GLOBAL &&
+- !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
++ if (func->bind != STB_GLOBAL)
++ continue;
++
++ if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+ strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+ func->static_call_tramp = true;
++
++ if (arch_is_retpoline(func))
++ func->retpoline_thunk = true;
++
++ if (arch_is_rethunk(func))
++ func->return_thunk = true;
++
++ if (arch_is_embedded_insn(func))
++ func->embedded_insn = true;
++
++ if (!strcmp(func->name, "__fentry__"))
++ func->fentry = true;
++
++ if (!strncmp(func->name, "__sanitizer_cov_", 16))
++ func->kcov = true;
+ }
+ }
+
+@@ -1780,11 +2123,6 @@ static void mark_rodata(struct objtool_file *file)
+ file->rodata = found;
+ }
+
+-__weak int arch_rewrite_retpolines(struct objtool_file *file)
+-{
+- return 0;
+-}
+-
+ static int decode_sections(struct objtool_file *file)
+ {
+ int ret;
+@@ -1809,7 +2147,7 @@ static int decode_sections(struct objtool_file *file)
+ /*
+ * Must be before add_{jump_call}_destination.
+ */
+- ret = read_static_call_tramps(file);
++ ret = classify_symbols(file);
+ if (ret)
+ return ret;
+
+@@ -1853,24 +2191,20 @@ static int decode_sections(struct objtool_file *file)
+ if (ret)
+ return ret;
+
+- /*
+- * Must be after add_special_section_alts(), since this will emit
+- * alternatives. Must be after add_{jump,call}_destination(), since
+- * those create the call insn lists.
+- */
+- ret = arch_rewrite_retpolines(file);
+- if (ret)
+- return ret;
+-
+ return 0;
+ }
+
+-static bool is_fentry_call(struct instruction *insn)
++static bool is_special_call(struct instruction *insn)
+ {
+- if (insn->type == INSN_CALL && insn->call_dest &&
+- insn->call_dest->type == STT_NOTYPE &&
+- !strcmp(insn->call_dest->name, "__fentry__"))
+- return true;
++ if (insn->type == INSN_CALL) {
++ struct symbol *dest = insn->call_dest;
++
++ if (!dest)
++ return false;
++
++ if (dest->fentry)
++ return true;
++ }
+
+ return false;
+ }
+@@ -2172,17 +2506,6 @@ static int update_cfi_state(struct instruction *insn,
+ break;
+ }
+
+- if (!cfi->drap && op->src.reg == CFI_SP &&
+- op->dest.reg == CFI_BP && cfa->base == CFI_SP &&
+- check_reg_frame_pos(&regs[CFI_BP], -cfa->offset + op->src.offset)) {
+-
+- /* lea disp(%rsp), %rbp */
+- cfa->base = CFI_BP;
+- cfa->offset -= op->src.offset;
+- cfi->bp_scratch = false;
+- break;
+- }
+-
+ if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
+
+ /* drap: lea disp(%rsp), %drap */
+@@ -2452,13 +2775,18 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
+ if (!insn->alt_group)
+ return 0;
+
++ if (!insn->cfi) {
++ WARN("CFI missing");
++ return -1;
++ }
++
+ alt_cfi = insn->alt_group->cfi;
+ group_off = insn->offset - insn->alt_group->first_insn->offset;
+
+ if (!alt_cfi[group_off]) {
+- alt_cfi[group_off] = &insn->cfi;
++ alt_cfi[group_off] = insn->cfi;
+ } else {
+- if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
++ if (cficmp(alt_cfi[group_off], insn->cfi)) {
+ WARN_FUNC("stack layout conflict in alternatives",
+ insn->sec, insn->offset);
+ return -1;
+@@ -2509,9 +2837,14 @@ static int handle_insn_ops(struct instruction *insn,
+
+ static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
+ {
+- struct cfi_state *cfi1 = &insn->cfi;
++ struct cfi_state *cfi1 = insn->cfi;
+ int i;
+
++ if (!cfi1) {
++ WARN("CFI missing");
++ return false;
++ }
++
+ if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) {
+
+ WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+@@ -2696,7 +3029,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state)
+ {
+ struct alternative *alt;
+- struct instruction *next_insn;
++ struct instruction *next_insn, *prev_insn = NULL;
+ struct section *sec;
+ u8 visited;
+ int ret;
+@@ -2718,22 +3051,61 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
+ return 1;
+ }
+
+- visited = 1 << state.uaccess;
+- if (insn->visited) {
++ visited = VISITED_BRANCH << state.uaccess;
++ if (insn->visited & VISITED_BRANCH_MASK) {
+ if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
+ return 1;
+
+ if (insn->visited & visited)
+ return 0;
++ } else {
++ nr_insns_visited++;
+ }
+
+ if (state.noinstr)
+ state.instr += insn->instr;
+
+- if (insn->hint)
+- state.cfi = insn->cfi;
+- else
+- insn->cfi = state.cfi;
++ if (insn->hint) {
++ if (insn->restore) {
++ struct instruction *save_insn, *i;
++
++ i = insn;
++ save_insn = NULL;
++
++ sym_for_each_insn_continue_reverse(file, func, i) {
++ if (i->save) {
++ save_insn = i;
++ break;
++ }
++ }
++
++ if (!save_insn) {
++ WARN_FUNC("no corresponding CFI save for CFI restore",
++ sec, insn->offset);
++ return 1;
++ }
++
++ if (!save_insn->visited) {
++ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
++ sec, insn->offset);
++ return 1;
++ }
++
++ insn->cfi = save_insn->cfi;
++ nr_cfi_reused++;
++ }
++
++ state.cfi = *insn->cfi;
++ } else {
++ /* XXX track if we actually changed state.cfi */
++
++ if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) {
++ insn->cfi = prev_insn->cfi;
++ nr_cfi_reused++;
++ } else {
++ insn->cfi = cfi_hash_find_or_add(&state.cfi);
++ }
++ }
+
+ insn->visited |= visited;
+
+@@ -2765,6 +3137,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
+ switch (insn->type) {
+
+ case INSN_RETURN:
++ if (sls && !insn->retpoline_safe &&
++ next_insn && next_insn->type != INSN_TRAP) {
++ WARN_FUNC("missing int3 after ret",
++ insn->sec, insn->offset);
++ }
+ return validate_return(func, insn, &state);
+
+ case INSN_CALL:
+@@ -2773,7 +3150,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
+ if (ret)
+ return ret;
+
+- if (!no_fp && func && !is_fentry_call(insn) &&
++ if (!no_fp && func && !is_special_call(insn) &&
+ !has_valid_stack_frame(&state)) {
+ WARN_FUNC("call without frame pointer save/setup",
+ sec, insn->offset);
+@@ -2808,6 +3185,13 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
+ break;
+
+ case INSN_JUMP_DYNAMIC:
++ if (sls && !insn->retpoline_safe &&
++ next_insn && next_insn->type != INSN_TRAP) {
++ WARN_FUNC("missing int3 after indirect jump",
++ insn->sec, insn->offset);
++ }
++
++ /* fallthrough */
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ if (is_sibling_call(insn)) {
+ ret = validate_sibling_call(insn, &state);
+@@ -2883,6 +3267,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
+ return 1;
+ }
+
++ prev_insn = insn;
+ insn = next_insn;
+ }
+
+@@ -2922,6 +3307,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
+ return warnings;
+ }
+
++/*
++ * Validate rethunk entry constraint: must untrain RET before the first RET.
++ *
++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
++ * before an actual RET instruction.
++ */
++static int validate_entry(struct objtool_file *file, struct instruction *insn)
++{
++ struct instruction *next, *dest;
++ int ret, warnings = 0;
++
++ for (;;) {
++ next = next_insn_to_validate(file, insn);
++
++ if (insn->visited & VISITED_ENTRY)
++ return 0;
++
++ insn->visited |= VISITED_ENTRY;
++
++ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
++ struct alternative *alt;
++ bool skip_orig = false;
++
++ list_for_each_entry(alt, &insn->alts, list) {
++ if (alt->skip_orig)
++ skip_orig = true;
++
++ ret = validate_entry(file, alt->insn);
++ if (ret) {
++ if (backtrace)
++ BT_FUNC("(alt)", insn);
++ return ret;
++ }
++ }
++
++ if (skip_orig)
++ return 0;
++ }
++
++ switch (insn->type) {
++
++ case INSN_CALL_DYNAMIC:
++ case INSN_JUMP_DYNAMIC:
++ case INSN_JUMP_DYNAMIC_CONDITIONAL:
++ WARN_FUNC("early indirect call", insn->sec, insn->offset);
++ return 1;
++
++ case INSN_JUMP_UNCONDITIONAL:
++ case INSN_JUMP_CONDITIONAL:
++ if (!is_sibling_call(insn)) {
++ if (!insn->jump_dest) {
++ WARN_FUNC("unresolved jump target after linking?!?",
++ insn->sec, insn->offset);
++ return -1;
++ }
++ ret = validate_entry(file, insn->jump_dest);
++ if (ret) {
++ if (backtrace) {
++ BT_FUNC("(branch%s)", insn,
++ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
++ }
++ return ret;
++ }
++
++ if (insn->type == INSN_JUMP_UNCONDITIONAL)
++ return 0;
++
++ break;
++ }
++
++ /* fallthrough */
++ case INSN_CALL:
++ dest = find_insn(file, insn->call_dest->sec,
++ insn->call_dest->offset);
++ if (!dest) {
++ WARN("Unresolved function after linking!?: %s",
++ insn->call_dest->name);
++ return -1;
++ }
++
++ ret = validate_entry(file, dest);
++ if (ret) {
++ if (backtrace)
++ BT_FUNC("(call)", insn);
++ return ret;
++ }
++ /*
++ * If a call returns without error, it must have seen UNTRAIN_RET.
++ * Therefore any non-error return is a success.
++ */
++ return 0;
++
++ case INSN_RETURN:
++ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
++ return 1;
++
++ case INSN_NOP:
++ if (insn->retpoline_safe)
++ return 0;
++ break;
++
++ default:
++ break;
++ }
++
++ if (!next) {
++ WARN_FUNC("teh end!", insn->sec, insn->offset);
++ return -1;
++ }
++ insn = next;
++ }
++
++ return warnings;
++}
++
++/*
++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
++ * before RET.
++ */
++static int validate_unret(struct objtool_file *file)
++{
++ struct instruction *insn;
++ int ret, warnings = 0;
++
++ for_each_insn(file, insn) {
++ if (!insn->entry)
++ continue;
++
++ ret = validate_entry(file, insn);
++ if (ret < 0) {
++ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
++ return ret;
++ }
++ warnings += ret;
++ }
++
++ return warnings;
++}
++
+ static int validate_retpoline(struct objtool_file *file)
+ {
+ struct instruction *insn;
+@@ -2929,7 +3453,8 @@ static int validate_retpoline(struct objtool_file *file)
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+- insn->type != INSN_CALL_DYNAMIC)
++ insn->type != INSN_CALL_DYNAMIC &&
++ insn->type != INSN_RETURN)
+ continue;
+
+ if (insn->retpoline_safe)
+@@ -2944,9 +3469,17 @@ static int validate_retpoline(struct objtool_file *file)
+ if (!strcmp(insn->sec->name, ".init.text") && !module)
+ continue;
+
+- WARN_FUNC("indirect %s found in RETPOLINE build",
+- insn->sec, insn->offset,
+- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++ if (insn->type == INSN_RETURN) {
++ if (rethunk) {
++ WARN_FUNC("'naked' return found in RETHUNK build",
++ insn->sec, insn->offset);
++ } else
++ continue;
++ } else {
++ WARN_FUNC("indirect %s found in RETPOLINE build",
++ insn->sec, insn->offset,
++ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
++ }
+
+ warnings++;
+ }
+@@ -2972,7 +3505,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
+ int i;
+ struct instruction *prev_insn;
+
+- if (insn->ignore || insn->type == INSN_NOP)
++ if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP)
+ return true;
+
+ /*
+@@ -3138,10 +3671,20 @@ int check(struct objtool_file *file)
+ int ret, warnings = 0;
+
+ arch_initial_func_cfi_state(&initial_func_cfi);
++ init_cfi_state(&init_cfi);
++ init_cfi_state(&func_cfi);
++ set_func_state(&func_cfi);
++
++ if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3)))
++ goto out;
++
++ cfi_hash_add(&init_cfi);
++ cfi_hash_add(&func_cfi);
+
+ ret = decode_sections(file);
+ if (ret < 0)
+ goto out;
++
+ warnings += ret;
+
+ if (list_empty(&file->insn_list))
+@@ -3173,6 +3716,17 @@ int check(struct objtool_file *file)
+ goto out;
+ warnings += ret;
+
++ if (unret) {
++ /*
++ * Must be after validate_branch() and friends, it plays
++ * further games with insn->visited.
++ */
++ ret = validate_unret(file);
++ if (ret < 0)
++ return ret;
++ warnings += ret;
++ }
++
+ if (!warnings) {
+ ret = validate_reachable_instructions(file);
+ if (ret < 0)
+@@ -3185,6 +3739,20 @@ int check(struct objtool_file *file)
+ goto out;
+ warnings += ret;
+
++ if (retpoline) {
++ ret = create_retpoline_sites_sections(file);
++ if (ret < 0)
++ goto out;
++ warnings += ret;
++ }
++
++ if (rethunk) {
++ ret = create_return_sites_sections(file);
++ if (ret < 0)
++ goto out;
++ warnings += ret;
++ }
++
+ if (mcount) {
+ ret = create_mcount_loc_sections(file);
+ if (ret < 0)
+@@ -3192,6 +3760,13 @@ int check(struct objtool_file *file)
+ warnings += ret;
+ }
+
++ if (stats) {
++ printf("nr_insns_visited: %ld\n", nr_insns_visited);
++ printf("nr_cfi: %ld\n", nr_cfi);
++ printf("nr_cfi_reused: %ld\n", nr_cfi_reused);
++ printf("nr_cfi_cache: %ld\n", nr_cfi_cache);
++ }
++
+ out:
+ /*
+ * For now, don't fail the kernel build on fatal warnings. These
+diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
+index fee03b744a6ea..4b78df22d42e1 100644
+--- a/tools/objtool/elf.c
++++ b/tools/objtool/elf.c
+@@ -314,6 +314,8 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+ struct list_head *entry;
+ struct rb_node *pnode;
+
++ sym->alias = sym;
++
+ sym->type = GELF_ST_TYPE(sym->sym.st_info);
+ sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+@@ -375,7 +377,6 @@ static int read_symbols(struct elf *elf)
+ return -1;
+ }
+ memset(sym, 0, sizeof(*sym));
+- sym->alias = sym;
+
+ sym->idx = i;
+
+@@ -485,7 +486,7 @@ static struct section *elf_create_reloc_section(struct elf *elf,
+ int reltype);
+
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+- unsigned int type, struct symbol *sym, int addend)
++ unsigned int type, struct symbol *sym, s64 addend)
+ {
+ struct reloc *reloc;
+
+@@ -514,37 +515,244 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+ return 0;
+ }
+
++/*
++ * Ensure that any reloc section containing references to @sym is marked
++ * changed such that it will get re-generated in elf_rebuild_reloc_sections()
++ * with the new symbol index.
++ */
++static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
++{
++ struct section *sec;
++
++ list_for_each_entry(sec, &elf->sections, list) {
++ struct reloc *reloc;
++
++ if (sec->changed)
++ continue;
++
++ list_for_each_entry(reloc, &sec->reloc_list, list) {
++ if (reloc->sym == sym) {
++ sec->changed = true;
++ break;
++ }
++ }
++ }
++}
++
++/*
++ * The libelf API is terrible; gelf_update_sym*() takes a data block relative
++ * index value, *NOT* the symbol index. As such, iterate the data blocks and
++ * adjust index until it fits.
++ *
++ * If no data block is found, allow adding a new data block provided the index
++ * is only one past the end.
++ */
++static int elf_update_symbol(struct elf *elf, struct section *symtab,
++ struct section *symtab_shndx, struct symbol *sym)
++{
++ Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
++ Elf_Data *symtab_data = NULL, *shndx_data = NULL;
++ Elf64_Xword entsize = symtab->sh.sh_entsize;
++ int max_idx, idx = sym->idx;
++ Elf_Scn *s, *t = NULL;
++ bool is_special_shndx = sym->sym.st_shndx >= SHN_LORESERVE &&
++ sym->sym.st_shndx != SHN_XINDEX;
++
++ if (is_special_shndx)
++ shndx = sym->sym.st_shndx;
++
++ s = elf_getscn(elf->elf, symtab->idx);
++ if (!s) {
++ WARN_ELF("elf_getscn");
++ return -1;
++ }
++
++ if (symtab_shndx) {
++ t = elf_getscn(elf->elf, symtab_shndx->idx);
++ if (!t) {
++ WARN_ELF("elf_getscn");
++ return -1;
++ }
++ }
++
++ for (;;) {
++ /* get next data descriptor for the relevant sections */
++ symtab_data = elf_getdata(s, symtab_data);
++ if (t)
++ shndx_data = elf_getdata(t, shndx_data);
++
++ /* end-of-list */
++ if (!symtab_data) {
++ void *buf;
++
++ if (idx) {
++ /* we don't do holes in symbol tables */
++ WARN("index out of range");
++ return -1;
++ }
++
++ /* if @idx == 0, it's the next contiguous entry, create it */
++ symtab_data = elf_newdata(s);
++ if (t)
++ shndx_data = elf_newdata(t);
++
++ buf = calloc(1, entsize);
++ if (!buf) {
++ WARN("malloc");
++ return -1;
++ }
++
++ symtab_data->d_buf = buf;
++ symtab_data->d_size = entsize;
++ symtab_data->d_align = 1;
++ symtab_data->d_type = ELF_T_SYM;
++
++ symtab->sh.sh_size += entsize;
++ symtab->changed = true;
++
++ if (t) {
++ shndx_data->d_buf = &sym->sec->idx;
++ shndx_data->d_size = sizeof(Elf32_Word);
++ shndx_data->d_align = sizeof(Elf32_Word);
++ shndx_data->d_type = ELF_T_WORD;
++
++ symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
++ symtab_shndx->changed = true;
++ }
++
++ break;
++ }
++
++ /* empty blocks should not happen */
++ if (!symtab_data->d_size) {
++ WARN("zero size data");
++ return -1;
++ }
++
++ /* is this the right block? */
++ max_idx = symtab_data->d_size / entsize;
++ if (idx < max_idx)
++ break;
++
++ /* adjust index and try again */
++ idx -= max_idx;
++ }
++
++ /* something went side-ways */
++ if (idx < 0) {
++ WARN("negative index");
++ return -1;
++ }
++
++ /* setup extended section index magic and write the symbol */
++ if ((shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) || is_special_shndx) {
++ sym->sym.st_shndx = shndx;
++ if (!shndx_data)
++ shndx = 0;
++ } else {
++ sym->sym.st_shndx = SHN_XINDEX;
++ if (!shndx_data) {
++ WARN("no .symtab_shndx");
++ return -1;
++ }
++ }
++
++ if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) {
++ WARN_ELF("gelf_update_symshndx");
++ return -1;
++ }
++
++ return 0;
++}
++
++static struct symbol *
++elf_create_section_symbol(struct elf *elf, struct section *sec)
++{
++ struct section *symtab, *symtab_shndx;
++ Elf32_Word first_non_local, new_idx;
++ struct symbol *sym, *old;
++
++ symtab = find_section_by_name(elf, ".symtab");
++ if (symtab) {
++ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
++ } else {
++ WARN("no .symtab");
++ return NULL;
++ }
++
++ sym = calloc(1, sizeof(*sym));
++ if (!sym) {
++ perror("malloc");
++ return NULL;
++ }
++
++ sym->name = sec->name;
++ sym->sec = sec;
++
++ // st_name 0
++ sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
++ // st_other 0
++ // st_value 0
++ // st_size 0
++
++ /*
++ * Move the first global symbol, as per sh_info, into a new, higher
++ * symbol index. This fees up a spot for a new local symbol.
++ */
++ first_non_local = symtab->sh.sh_info;
++ new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
++ old = find_symbol_by_index(elf, first_non_local);
++ if (old) {
++ old->idx = new_idx;
++
++ hlist_del(&old->hash);
++ elf_hash_add(symbol, &old->hash, old->idx);
++
++ elf_dirty_reloc_sym(elf, old);
++
++ if (elf_update_symbol(elf, symtab, symtab_shndx, old)) {
++ WARN("elf_update_symbol move");
++ return NULL;
++ }
++
++ new_idx = first_non_local;
++ }
++
++ sym->idx = new_idx;
++ if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
++ WARN("elf_update_symbol");
++ return NULL;
++ }
++
++ /*
++ * Either way, we added a LOCAL symbol.
++ */
++ symtab->sh.sh_info += 1;
++
++ elf_add_symbol(elf, sym);
++
++ return sym;
++}
++
+ int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off)
+ {
+- struct symbol *sym;
+- int addend;
++ struct symbol *sym = insn_sec->sym;
++ int addend = insn_off;
+
+- if (insn_sec->sym) {
+- sym = insn_sec->sym;
+- addend = insn_off;
+-
+- } else {
++ if (!sym) {
+ /*
+- * The Clang assembler strips section symbols, so we have to
+- * reference the function symbol instead:
++ * Due to how weak functions work, we must use section based
++ * relocations. Symbol based relocations would result in the
++ * weak and non-weak function annotations being overlaid on the
++ * non-weak function after linking.
+ */
+- sym = find_symbol_containing(insn_sec, insn_off);
+- if (!sym) {
+- /*
+- * Hack alert. This happens when we need to reference
+- * the NOP pad insn immediately after the function.
+- */
+- sym = find_symbol_containing(insn_sec, insn_off - 1);
+- }
+-
+- if (!sym) {
+- WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
++ sym = elf_create_section_symbol(elf, insn_sec);
++ if (!sym)
+ return -1;
+- }
+
+- addend = insn_off - sym->offset;
++ insn_sec->sym = sym;
+ }
+
+ return elf_add_reloc(elf, sec, offset, type, sym, addend);
+@@ -741,90 +949,6 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+ return len;
+ }
+
+-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+-{
+- struct section *symtab, *symtab_shndx;
+- struct symbol *sym;
+- Elf_Data *data;
+- Elf_Scn *s;
+-
+- sym = malloc(sizeof(*sym));
+- if (!sym) {
+- perror("malloc");
+- return NULL;
+- }
+- memset(sym, 0, sizeof(*sym));
+-
+- sym->name = strdup(name);
+-
+- sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+- if (sym->sym.st_name == -1)
+- return NULL;
+-
+- sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
+- // st_other 0
+- // st_shndx 0
+- // st_value 0
+- // st_size 0
+-
+- symtab = find_section_by_name(elf, ".symtab");
+- if (!symtab) {
+- WARN("can't find .symtab");
+- return NULL;
+- }
+-
+- s = elf_getscn(elf->elf, symtab->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
+- return NULL;
+- }
+-
+- data = elf_newdata(s);
+- if (!data) {
+- WARN_ELF("elf_newdata");
+- return NULL;
+- }
+-
+- data->d_buf = &sym->sym;
+- data->d_size = sizeof(sym->sym);
+- data->d_align = 1;
+- data->d_type = ELF_T_SYM;
+-
+- sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
+-
+- symtab->sh.sh_size += data->d_size;
+- symtab->changed = true;
+-
+- symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+- if (symtab_shndx) {
+- s = elf_getscn(elf->elf, symtab_shndx->idx);
+- if (!s) {
+- WARN_ELF("elf_getscn");
+- return NULL;
+- }
+-
+- data = elf_newdata(s);
+- if (!data) {
+- WARN_ELF("elf_newdata");
+- return NULL;
+- }
+-
+- data->d_buf = &sym->sym.st_size; /* conveniently 0 */
+- data->d_size = sizeof(Elf32_Word);
+- data->d_align = 4;
+- data->d_type = ELF_T_WORD;
+-
+- symtab_shndx->sh.sh_size += 4;
+- symtab_shndx->changed = true;
+- }
+-
+- sym->sec = find_section_by_index(elf, 0);
+-
+- elf_add_symbol(elf, sym);
+-
+- return sym;
+-}
+-
+ struct section *elf_create_section(struct elf *elf, const char *name,
+ unsigned int sh_flags, size_t entsize, int nr)
+ {
+diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
+index 062bb6e9b8658..d81d9a8e4808a 100644
+--- a/tools/objtool/include/objtool/arch.h
++++ b/tools/objtool/include/objtool/arch.h
+@@ -26,6 +26,7 @@ enum insn_type {
+ INSN_CLAC,
+ INSN_STD,
+ INSN_CLD,
++ INSN_TRAP,
+ INSN_OTHER,
+ };
+
+@@ -82,10 +83,13 @@ unsigned long arch_jump_destination(struct instruction *insn);
+ unsigned long arch_dest_reloc_offset(int addend);
+
+ const char *arch_nop_insn(int len);
++const char *arch_ret_insn(int len);
+
+-int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
++int arch_decode_hint_reg(u8 sp_reg, int *base);
+
+ bool arch_is_retpoline(struct symbol *sym);
++bool arch_is_rethunk(struct symbol *sym);
++bool arch_is_embedded_insn(struct symbol *sym);
+
+ int arch_rewrite_retpolines(struct objtool_file *file);
+
+diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
+index 15ac0b7d3d6a3..66ad30ec58182 100644
+--- a/tools/objtool/include/objtool/builtin.h
++++ b/tools/objtool/include/objtool/builtin.h
+@@ -9,7 +9,7 @@
+
+ extern const struct option check_options[];
+ extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+- validate_dup, vmlinux, mcount, noinstr, backup;
++ validate_dup, vmlinux, mcount, noinstr, backup, sls, unret, rethunk;
+
+ extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
+
+diff --git a/tools/objtool/include/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h
+index fd5cb0bed9bf0..f11d1ac1dadf1 100644
+--- a/tools/objtool/include/objtool/cfi.h
++++ b/tools/objtool/include/objtool/cfi.h
+@@ -7,6 +7,7 @@
+ #define _OBJTOOL_CFI_H
+
+ #include <arch/cfi_regs.h>
++#include <linux/list.h>
+
+ #define CFI_UNDEFINED -1
+ #define CFI_CFA -2
+@@ -24,6 +25,7 @@ struct cfi_init_state {
+ };
+
+ struct cfi_state {
++ struct hlist_node hash; /* must be first, cficmp() */
+ struct cfi_reg regs[CFI_NUM_REGS];
+ struct cfi_reg vals[CFI_NUM_REGS];
+ struct cfi_reg cfa;
+diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
+index 56d50bc50c10c..4ba041db304f9 100644
+--- a/tools/objtool/include/objtool/check.h
++++ b/tools/objtool/include/objtool/check.h
+@@ -40,7 +40,6 @@ struct instruction {
+ struct list_head list;
+ struct hlist_node hash;
+ struct list_head call_node;
+- struct list_head mcount_loc_node;
+ struct section *sec;
+ unsigned long offset;
+ unsigned int len;
+@@ -48,7 +47,9 @@ struct instruction {
+ unsigned long immediate;
+ bool dead_end, ignore, ignore_alts;
+ bool hint;
++ bool save, restore;
+ bool retpoline_safe;
++ bool entry;
+ s8 instr;
+ u8 visited;
+ struct alt_group *alt_group;
+@@ -60,9 +61,14 @@ struct instruction {
+ struct list_head alts;
+ struct symbol *func;
+ struct list_head stack_ops;
+- struct cfi_state cfi;
++ struct cfi_state *cfi;
+ };
+
++#define VISITED_BRANCH 0x01
++#define VISITED_BRANCH_UACCESS 0x02
++#define VISITED_BRANCH_MASK 0x03
++#define VISITED_ENTRY 0x04
++
+ static inline bool is_static_jump(struct instruction *insn)
+ {
+ return insn->type == INSN_JUMP_CONDITIONAL ||
+diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
+index 075d8291b8546..f06398ea57510 100644
+--- a/tools/objtool/include/objtool/elf.h
++++ b/tools/objtool/include/objtool/elf.h
+@@ -54,8 +54,13 @@ struct symbol {
+ unsigned long offset;
+ unsigned int len;
+ struct symbol *pfunc, *cfunc, *alias;
+- bool uaccess_safe;
+- bool static_call_tramp;
++ u8 uaccess_safe : 1;
++ u8 static_call_tramp : 1;
++ u8 retpoline_thunk : 1;
++ u8 return_thunk : 1;
++ u8 fentry : 1;
++ u8 kcov : 1;
++ u8 embedded_insn : 1;
+ };
+
+ struct reloc {
+@@ -69,7 +74,7 @@ struct reloc {
+ struct symbol *sym;
+ unsigned long offset;
+ unsigned int type;
+- int addend;
++ s64 addend;
+ int idx;
+ bool jump_table_start;
+ };
+@@ -131,7 +136,7 @@ struct elf *elf_open_read(const char *name, int flags);
+ struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+
+ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+- unsigned int type, struct symbol *sym, int addend);
++ unsigned int type, struct symbol *sym, s64 addend);
+ int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off);
+@@ -140,7 +145,6 @@ int elf_write_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len,
+ const char *insn);
+ int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+-struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
+ int elf_write(struct elf *elf);
+ void elf_close(struct elf *elf);
+
+diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
+index 24fa83634de4d..97b25a217c3a5 100644
+--- a/tools/objtool/include/objtool/objtool.h
++++ b/tools/objtool/include/objtool/objtool.h
+@@ -19,6 +19,7 @@ struct objtool_file {
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 20);
+ struct list_head retpoline_call_list;
++ struct list_head return_thunk_list;
+ struct list_head static_call_list;
+ struct list_head mcount_loc_list;
+ bool ignore_unreachables, c_file, hints, rodata;
+diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
+index e21db8bce4935..24650d533d85c 100644
+--- a/tools/objtool/objtool.c
++++ b/tools/objtool/objtool.c
+@@ -126,6 +126,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.retpoline_call_list);
++ INIT_LIST_HEAD(&file.return_thunk_list);
+ INIT_LIST_HEAD(&file.static_call_list);
+ INIT_LIST_HEAD(&file.mcount_loc_list);
+ file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
+index b5865e2450cbb..dd3c64af9db23 100644
+--- a/tools/objtool/orc_gen.c
++++ b/tools/objtool/orc_gen.c
+@@ -13,13 +13,19 @@
+ #include <objtool/warn.h>
+ #include <objtool/endianness.h>
+
+-static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
++static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi,
++ struct instruction *insn)
+ {
+- struct instruction *insn = container_of(cfi, struct instruction, cfi);
+ struct cfi_reg *bp = &cfi->regs[CFI_BP];
+
+ memset(orc, 0, sizeof(*orc));
+
++ if (!cfi) {
++ orc->end = 0;
++ orc->sp_reg = ORC_REG_UNDEFINED;
++ return 0;
++ }
++
+ orc->end = cfi->end;
+
+ if (cfi->cfa.base == CFI_UNDEFINED) {
+@@ -162,7 +168,7 @@ int orc_create(struct objtool_file *file)
+ int i;
+
+ if (!alt_group) {
+- if (init_orc_entry(&orc, &insn->cfi))
++ if (init_orc_entry(&orc, insn->cfi, insn))
+ return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
+@@ -186,7 +192,8 @@ int orc_create(struct objtool_file *file)
+ struct cfi_state *cfi = alt_group->cfi[i];
+ if (!cfi)
+ continue;
+- if (init_orc_entry(&orc, cfi))
++ /* errors are reported on the original insn */
++ if (init_orc_entry(&orc, cfi, insn))
+ return -1;
+ if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+ continue;
+diff --git a/tools/objtool/special.c b/tools/objtool/special.c
+index 06c3eacab3d53..e2223dd91c379 100644
+--- a/tools/objtool/special.c
++++ b/tools/objtool/special.c
+@@ -109,14 +109,6 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+ return -1;
+ }
+
+- /*
+- * Skip retpoline .altinstr_replacement... we already rewrite the
+- * instructions for retpolines anyway, see arch_is_retpoline()
+- * usage in add_{call,jump}_destinations().
+- */
+- if (arch_is_retpoline(new_reloc->sym))
+- return 1;
+-
+ reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off);
+
+ /* _ASM_EXTABLE_EX hack */
+diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
+index 8e0163b7ef016..cdb7a347ceb5e 100644
+--- a/tools/perf/.gitignore
++++ b/tools/perf/.gitignore
+@@ -4,6 +4,7 @@ PERF-GUI-VARS
+ PERF-VERSION-FILE
+ FEATURE-DUMP
+ perf
++!include/perf/
+ perf-read-vdso32
+ perf-read-vdsox32
+ perf-help
+diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
+index db465fa7ee918..de63c418e4d1f 100644
+--- a/tools/perf/Documentation/perf-intel-pt.txt
++++ b/tools/perf/Documentation/perf-intel-pt.txt
+@@ -108,9 +108,10 @@ displayed as follows:
+
+ perf script --itrace=ibxwpe -F+flags
+
+-The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional,
++The flags are "bcrosyiABExghDt" which stand for branch, call, return, conditional,
+ system, asynchronous, interrupt, transaction abort, trace begin, trace end,
+-in transaction, VM-entry, and VM-exit respectively.
++in transaction, VM-entry, VM-exit, interrupt disabled, and interrupt disable
++toggle respectively.
+
+ perf script also supports higher level ways to dump instruction traces:
+
+@@ -456,6 +457,8 @@ ptw Enable PTWRITE packets which are produced when a ptwrite instruction
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
++ As an alternative, refer to "Emulated PTWRITE" further below.
++
+ fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
+ provides the address of the ptwrite instruction. In the absence of
+ fup_on_ptw, the decoder will use the address of the previous branch
+@@ -472,6 +475,30 @@ pwr_evt Enable power events. The power events provide information about
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
++event Enable Event Trace. The events provide information about asynchronous
++ events.
++
++ Support for this feature is indicated by:
++
++ /sys/bus/event_source/devices/intel_pt/caps/event_trace
++
++ which contains "1" if the feature is supported and
++ "0" otherwise.
++
++notnt Disable TNT packets. Without TNT packets, it is not possible to walk
++ executable code to reconstruct control flow, however FUP, TIP, TIP.PGE
++ and TIP.PGD packets still indicate asynchronous control flow, and (if
++ return compression is disabled - see noretcomp) return statements.
++ The advantage of eliminating TNT packets is reducing the size of the
++ trace and corresponding tracing overhead.
++
++ Support for this feature is indicated by:
++
++ /sys/bus/event_source/devices/intel_pt/caps/tnt_disable
++
++ which contains "1" if the feature is supported and
++ "0" otherwise.
++
+
+ AUX area sampling option
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -865,6 +892,8 @@ The letters are:
+ p synthesize "power" events (incl. PSB events)
+ c synthesize branches events (calls only)
+ r synthesize branches events (returns only)
++ o synthesize PEBS-via-PT events
++ I synthesize Event Trace events
+ e synthesize tracing error events
+ d create a debug log
+ g synthesize a call chain (use with i or x)
+@@ -1338,6 +1367,202 @@ There were none.
+ :17006 17006 [001] 11500.262869216: ffffffff8220116e error_entry+0xe ([guest.kernel.kallsyms]) pushq %rax
+
+
++Event Trace
++-----------
++
++Event Trace records information about asynchronous events, for example interrupts,
++faults, VM exits and entries. The information is recorded in CFE and EVD packets,
++and also the Interrupt Flag is recorded on the MODE.Exec packet. The CFE packet
++contains a type field to identify one of the following:
++
++ 1 INTR interrupt, fault, exception, NMI
++ 2 IRET interrupt return
++ 3 SMI system management interrupt
++ 4 RSM resume from system management mode
++ 5 SIPI startup interprocessor interrupt
++ 6 INIT INIT signal
++ 7 VMENTRY VM-Entry
++ 8 VMEXIT VM-Entry
++ 9 VMEXIT_INTR VM-Exit due to interrupt
++ 10 SHUTDOWN Shutdown
++
++For more details, refer to the Intel 64 and IA-32 Architectures Software
++Developer Manuals (version 076 or later).
++
++The capability to do Event Trace is indicated by the
++/sys/bus/event_source/devices/intel_pt/caps/event_trace file.
++
++Event trace is selected for recording using the "event" config term. e.g.
++
++ perf record -e intel_pt/event/u uname
++
++Event trace events are output using the --itrace I option. e.g.
++
++ perf script --itrace=Ie
++
++perf script displays events containing CFE type, vector and event data,
++in the form:
++
++ evt: hw int (t) cfe: INTR IP: 1 vector: 3 PFA: 0x8877665544332211
++
++The IP flag indicates if the event binds to an IP, which includes any case where
++flow control packet generation is enabled, as well as when CFE packet IP bit is
++set.
++
++perf script displays events containing changes to the Interrupt Flag in the form:
++
++ iflag: t IFLAG: 1->0 via branch
++
++where "via branch" indicates a branch (interrupt or return from interrupt) and
++"non branch" indicates an instruction such as CFI, STI or POPF).
++
++In addition, the current state of the interrupt flag is indicated by the presence
++or absence of the "D" (interrupt disabled) perf script flag. If the interrupt
++flag is changed, then the "t" flag is also included i.e.
++
++ no flag, interrupts enabled IF=1
++ t interrupts become disabled IF=1 -> IF=0
++ D interrupts are disabled IF=0
++ Dt interrupts become enabled IF=0 -> IF=1
++
++The intel-pt-events.py script illustrates how to access Event Trace information
++using a Python script.
++
++
++TNT Disable
++-----------
++
++TNT packets are disabled using the "notnt" config term. e.g.
++
++ perf record -e intel_pt/notnt/u uname
++
++In that case the --itrace q option is forced because walking executable code
++to reconstruct the control flow is not possible.
++
++
++Emulated PTWRITE
++----------------
++
++Later perf tools support a method to emulate the ptwrite instruction, which
++can be useful if hardware does not support the ptwrite instruction.
++
++Instead of using the ptwrite instruction, a function is used which produces
++a trace that encodes the payload data into TNT packets. Here is an example
++of the function:
++
++ #include <stdint.h>
++
++ void perf_emulate_ptwrite(uint64_t x)
++ __attribute__((externally_visible, noipa, no_instrument_function, naked));
++
++ #define PERF_EMULATE_PTWRITE_8_BITS \
++ "1: shl %rax\n" \
++ " jc 1f\n" \
++ "1: shl %rax\n" \
++ " jc 1f\n" \
++ "1: shl %rax\n" \
++ " jc 1f\n" \
++ "1: shl %rax\n" \
++ " jc 1f\n" \
++ "1: shl %rax\n" \
++ " jc 1f\n" \
++ "1: shl %rax\n" \
++ " jc 1f\n" \
++ "1: shl %rax\n" \
++ " jc 1f\n" \
++ "1: shl %rax\n" \
++ " jc 1f\n"
++
++ /* Undefined instruction */
++ #define PERF_EMULATE_PTWRITE_UD2 ".byte 0x0f, 0x0b\n"
++
++ #define PERF_EMULATE_PTWRITE_MAGIC PERF_EMULATE_PTWRITE_UD2 ".ascii \"perf,ptwrite \"\n"
++
++ void perf_emulate_ptwrite(uint64_t x __attribute__ ((__unused__)))
++ {
++ /* Assumes SysV ABI : x passed in rdi */
++ __asm__ volatile (
++ "jmp 1f\n"
++ PERF_EMULATE_PTWRITE_MAGIC
++ "1: mov %rdi, %rax\n"
++ PERF_EMULATE_PTWRITE_8_BITS
++ PERF_EMULATE_PTWRITE_8_BITS
++ PERF_EMULATE_PTWRITE_8_BITS
++ PERF_EMULATE_PTWRITE_8_BITS
++ PERF_EMULATE_PTWRITE_8_BITS
++ PERF_EMULATE_PTWRITE_8_BITS
++ PERF_EMULATE_PTWRITE_8_BITS
++ PERF_EMULATE_PTWRITE_8_BITS
++ "1: ret\n"
++ );
++ }
++
++For example, a test program with the function above:
++
++ #include <stdio.h>
++ #include <stdint.h>
++ #include <stdlib.h>
++
++ #include "perf_emulate_ptwrite.h"
++
++ int main(int argc, char *argv[])
++ {
++ uint64_t x = 0;
++
++ if (argc > 1)
++ x = strtoull(argv[1], NULL, 0);
++ perf_emulate_ptwrite(x);
++ return 0;
++ }
++
++Can be compiled and traced:
++
++ $ gcc -Wall -Wextra -O3 -g -o eg_ptw eg_ptw.c
++ $ perf record -e intel_pt//u ./eg_ptw 0x1234567890abcdef
++ [ perf record: Woken up 1 times to write data ]
++ [ perf record: Captured and wrote 0.017 MB perf.data ]
++ $ perf script --itrace=ew
++ eg_ptw 19875 [007] 8061.235912: ptwrite: IP: 0 payload: 0x1234567890abcdef 55701249a196 perf_emulate_ptwrite+0x16 (/home/user/eg_ptw)
++ $
++
++
++Pipe mode
++---------
++Pipe mode is a problem for Intel PT and possibly other auxtrace users.
++It's not recommended to use a pipe as data output with Intel PT because
++of the following reason.
++
++Essentially the auxtrace buffers do not behave like the regular perf
++event buffers. That is because the head and tail are updated by
++software, but in the auxtrace case the data is written by hardware.
++So the head and tail do not get updated as data is written.
++
++In the Intel PT case, the head and tail are updated only when the trace
++is disabled by software, for example:
++ - full-trace, system wide : when buffer passes watermark
++ - full-trace, not system-wide : when buffer passes watermark or
++ context switches
++ - snapshot mode : as above but also when a snapshot is made
++ - sample mode : as above but also when a sample is made
++
++That means finished-round ordering doesn't work. An auxtrace buffer
++can turn up that has data that extends back in time, possibly to the
++very beginning of tracing.
++
++For a perf.data file, that problem is solved by going through the trace
++and queuing up the auxtrace buffers in advance.
++
++For pipe mode, the order of events and timestamps can presumably
++be messed up.
++
++
++EXAMPLE
++-------
++
++Examples can be found on perf wiki page "Perf tools support for Intel® Processor Trace":
++
++https://perf.wiki.kernel.org/index.php/Perf_tools_support_for_Intel%C2%AE_Processor_Trace
++
+
+ SEE ALSO
+ --------
+diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
+index 14e3e8d702a02..973c0d5ed8d8b 100644
+--- a/tools/perf/Makefile.config
++++ b/tools/perf/Makefile.config
+@@ -143,7 +143,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
+ ifdef CSINCLUDES
+ LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
+ endif
+-OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++
++OPENCSDLIBS := -lopencsd_c_api -lopencsd
++ifeq ($(findstring -static,${LDFLAGS}),-static)
++ OPENCSDLIBS += -lstdc++
++endif
+ ifdef CSLIBS
+ LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
+ endif
+@@ -234,18 +237,33 @@ ifdef PARSER_DEBUG
+ endif
+
+ # Try different combinations to accommodate systems that only have
+-# python[2][-config] in weird combinations but always preferring
+-# python2 and python2-config as per pep-0394. If python2 or python
+-# aren't found, then python3 is used.
+-PYTHON_AUTO := python
+-PYTHON_AUTO := $(if $(call get-executable,python3),python3,$(PYTHON_AUTO))
+-PYTHON_AUTO := $(if $(call get-executable,python),python,$(PYTHON_AUTO))
+-PYTHON_AUTO := $(if $(call get-executable,python2),python2,$(PYTHON_AUTO))
+-override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON_AUTO))
+-PYTHON_AUTO_CONFIG := \
+- $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
+-override PYTHON_CONFIG := \
+- $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO_CONFIG))
++# python[2][3]-config in weird combinations in the following order of
++# priority from lowest to highest:
++# * python3-config
++# * python-config
++# * python2-config as per pep-0394.
++# * $(PYTHON)-config (If PYTHON is user supplied but PYTHON_CONFIG isn't)
++#
++PYTHON_AUTO := python-config
++PYTHON_AUTO := $(if $(call get-executable,python3-config),python3-config,$(PYTHON_AUTO))
++PYTHON_AUTO := $(if $(call get-executable,python-config),python-config,$(PYTHON_AUTO))
++PYTHON_AUTO := $(if $(call get-executable,python2-config),python2-config,$(PYTHON_AUTO))
++
++# If PYTHON is defined but PYTHON_CONFIG isn't, then take $(PYTHON)-config as if it was the user
++# supplied value for PYTHON_CONFIG. Because it's "user supplied", error out if it doesn't exist.
++ifdef PYTHON
++ ifndef PYTHON_CONFIG
++ PYTHON_CONFIG_AUTO := $(call get-executable,$(PYTHON)-config)
++ PYTHON_CONFIG := $(if $(PYTHON_CONFIG_AUTO),$(PYTHON_CONFIG_AUTO),\
++ $(call $(error $(PYTHON)-config not found)))
++ endif
++endif
++
++# Select either auto detected python and python-config or use user supplied values if they are
++# defined. get-executable-or-default fails with an error if the first argument is supplied but
++# doesn't exist.
++override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO))
++override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG)))
+
+ grep-libs = $(filter -l%,$(1))
+ strip-libs = $(filter-out -l%,$(1))
+@@ -267,16 +285,18 @@ ifdef PYTHON_CONFIG
+ PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
+ PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
+ FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
++ ifeq ($(CC_NO_CLANG), 0)
++ PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS))
++ endif
+ endif
+
+ FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
+ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
+-FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
+-FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
+
+ FEATURE_CHECK_LDFLAGS-libaio = -lrt
+
+ FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
++FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
+
+ CORE_CFLAGS += -fno-omit-frame-pointer
+ CORE_CFLAGS += -ggdb3
+@@ -544,9 +564,16 @@ ifndef NO_LIBELF
+ ifeq ($(feature-libbpf), 1)
+ EXTLIBS += -lbpf
+ $(call detected,CONFIG_LIBBPF_DYNAMIC)
++
++ $(call feature_check,libbpf-btf__load_from_kernel_by_id)
++ ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
++ CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
++ endif
+ else
+ dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
+ endif
++ else
++ CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+ endif
+ endif
+
+@@ -784,6 +811,9 @@ else
+ LDFLAGS += $(PERL_EMBED_LDFLAGS)
+ EXTLIBS += $(PERL_EMBED_LIBADD)
+ CFLAGS += -DHAVE_LIBPERL_SUPPORT
++ ifeq ($(CC_NO_CLANG), 0)
++ CFLAGS += -Wno-compound-token-split-by-macro
++ endif
+ $(call detected,CONFIG_LIBPERL)
+ endif
+ endif
+@@ -843,13 +873,16 @@ ifndef NO_LIBBFD
+ ifeq ($(feature-libbfd-liberty), 1)
+ EXTLIBS += -lbfd -lopcodes -liberty
+ FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
++ FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -ldl
+ else
+ ifeq ($(feature-libbfd-liberty-z), 1)
+ EXTLIBS += -lbfd -lopcodes -liberty -lz
+ FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
++ FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -lz -ldl
+ endif
+ endif
+ $(call feature_check,disassembler-four-args)
++ $(call feature_check,disassembler-init-styled)
+ endif
+
+ ifeq ($(feature-libbfd-buildid), 1)
+@@ -963,6 +996,10 @@ ifeq ($(feature-disassembler-four-args), 1)
+ CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+ endif
+
++ifeq ($(feature-disassembler-init-styled), 1)
++ CFLAGS += -DDISASM_INIT_STYLED
++endif
++
+ ifeq (${IS_64_BIT}, 1)
+ ifndef NO_PERF_READ_VDSO32
+ $(call feature_check,compile-32)
+diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
+index 9fcb4e68add93..78dfc282e5e2b 100644
+--- a/tools/perf/arch/arm64/util/Build
++++ b/tools/perf/arch/arm64/util/Build
+@@ -1,5 +1,4 @@
+ perf-y += header.o
+-perf-y += machine.o
+ perf-y += perf_regs.o
+ perf-y += tsc.o
+ perf-y += pmu.o
+diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
+index a4420d4df5033..7d589a705fc89 100644
+--- a/tools/perf/arch/arm64/util/arm-spe.c
++++ b/tools/perf/arch/arm64/util/arm-spe.c
+@@ -154,6 +154,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
+ arm_spe_set_timestamp(itr, arm_spe_evsel);
+ }
+
++ /*
++ * Set this only so that perf report knows that SPE generates memory info. It has no effect
++ * on the opening of the event or the SPE data produced.
++ */
++ evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
++
+ /* Add dummy event to keep tracking */
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
+deleted file mode 100644
+index 7e7714290a873..0000000000000
+--- a/tools/perf/arch/arm64/util/machine.c
++++ /dev/null
+@@ -1,28 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-
+-#include <inttypes.h>
+-#include <stdio.h>
+-#include <string.h>
+-#include "debug.h"
+-#include "symbol.h"
+-
+-/* On arm64, kernel text segment starts at high memory address,
+- * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
+- * address, like 0xffff 0000 00ax xxxx. When only small amount of
+- * memory is used by modules, gap between end of module's text segment
+- * and start of kernel text segment may reach 2G.
+- * Therefore do not fill this gap and do not assign it to the kernel dso map.
+- */
+-
+-#define SYMBOL_LIMIT (1 << 12) /* 4K */
+-
+-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+-{
+- if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
+- (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
+- /* Limit range of last symbol in module and kernel */
+- p->end += SYMBOL_LIMIT;
+- else
+- p->end = c->start;
+- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
+-}
+diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
+index 8a79c4126e5b4..0115f31665684 100644
+--- a/tools/perf/arch/powerpc/util/Build
++++ b/tools/perf/arch/powerpc/util/Build
+@@ -1,5 +1,4 @@
+ perf-y += header.o
+-perf-y += machine.o
+ perf-y += kvm-stat.o
+ perf-y += perf_regs.o
+ perf-y += mem-events.o
+diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c
+deleted file mode 100644
+index e652a1aa81322..0000000000000
+--- a/tools/perf/arch/powerpc/util/machine.c
++++ /dev/null
+@@ -1,25 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-
+-#include <inttypes.h>
+-#include <stdio.h>
+-#include <string.h>
+-#include <internal/lib.h> // page_size
+-#include "debug.h"
+-#include "symbol.h"
+-
+-/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
+- * whereas the modules are located at very high memory addresses,
+- * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
+- * and beginning of first module's text segment is very high.
+- * Therefore do not fill this gap and do not assign it to the kernel dso map.
+- */
+-
+-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+-{
+- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
+- /* Limit the range of last kernel symbol */
+- p->end += page_size;
+- else
+- p->end = c->start;
+- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
+-}
+diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c
+index 7644a4f6d4a40..98bc3f39d5f35 100644
+--- a/tools/perf/arch/s390/util/machine.c
++++ b/tools/perf/arch/s390/util/machine.c
+@@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
+
+ return 0;
+ }
+-
+-/* On s390 kernel text segment start is located at very low memory addresses,
+- * for example 0x10000. Modules are located at very high memory addresses,
+- * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
+- * and beginning of first module's text segment is very big.
+- * Therefore do not fill this gap and do not assign it to the kernel dso map.
+- */
+-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+-{
+- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
+- /* Last kernel symbol mapped to end of page */
+- p->end = roundup(p->end, page_size);
+- else
+- p->end = c->start;
+- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
+-}
+diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
+index c933e3dcd0a82..9589314d60b72 100644
+--- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
++++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
+@@ -166,6 +166,14 @@ struct test_data {
+ {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
+ {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 },
+ {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
++ /* Control Flow Event Packet */
++ {4, {0x02, 0x13, 0x01, 0x03}, 0, {INTEL_PT_CFE, 1, 3}, 0, 0 },
++ {4, {0x02, 0x13, 0x81, 0x03}, 0, {INTEL_PT_CFE_IP, 1, 3}, 0, 0 },
++ {4, {0x02, 0x13, 0x1f, 0x00}, 0, {INTEL_PT_CFE, 0x1f, 0}, 0, 0 },
++ {4, {0x02, 0x13, 0x9f, 0xff}, 0, {INTEL_PT_CFE_IP, 0x1f, 0xff}, 0, 0 },
++ /* */
++ {11, {0x02, 0x53, 0x09, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_EVD, 0x09, 0x7060504030201}, 0, 0 },
++ {11, {0x02, 0x53, 0x3f, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_EVD, 0x3f, 0x8070605040302}, 0, 0 },
+ /* Terminator */
+ {0, {0}, 0, {0, 0, 0}, 0, 0 },
+ };
+diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
+index 207c56805c551..0ed177991ad05 100644
+--- a/tools/perf/arch/x86/util/perf_regs.c
++++ b/tools/perf/arch/x86/util/perf_regs.c
+@@ -9,6 +9,8 @@
+ #include "../../../util/perf_regs.h"
+ #include "../../../util/debug.h"
+ #include "../../../util/event.h"
++#include "../../../util/pmu.h"
++#include "../../../util/pmu-hybrid.h"
+
+ const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG(AX, PERF_REG_X86_AX),
+@@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void)
+ .disabled = 1,
+ .exclude_kernel = 1,
+ };
++ struct perf_pmu *pmu;
+ int fd;
+ /*
+ * In an unnamed union, init it here to build on older gcc versions
+ */
+ attr.sample_period = 1;
+
++ if (perf_pmu__has_hybrid()) {
++ /*
++ * The same register set is supported among different hybrid PMUs.
++ * Only check the first available one.
++ */
++ pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
++ attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
++ }
++
+ event_attr_init(&attr);
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
+index b3480bc33fe84..baa7c63014004 100644
+--- a/tools/perf/bench/bench.h
++++ b/tools/perf/bench/bench.h
+@@ -10,25 +10,13 @@ extern struct timeval bench__start, bench__end, bench__runtime;
+ * The madvise transparent hugepage constants were added in glibc
+ * 2.13. For compatibility with older versions of glibc, define these
+ * tokens if they are not already defined.
+- *
+- * PA-RISC uses different madvise values from other architectures and
+- * needs to be special-cased.
+ */
+-#ifdef __hppa__
+-# ifndef MADV_HUGEPAGE
+-# define MADV_HUGEPAGE 67
+-# endif
+-# ifndef MADV_NOHUGEPAGE
+-# define MADV_NOHUGEPAGE 68
+-# endif
+-#else
+ # ifndef MADV_HUGEPAGE
+ # define MADV_HUGEPAGE 14
+ # endif
+ # ifndef MADV_NOHUGEPAGE
+ # define MADV_NOHUGEPAGE 15
+ # endif
+-#endif
+
+ int bench_numa(int argc, const char **argv);
+ int bench_sched_messaging(int argc, const char **argv);
+diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
+index 5d1fe9c35807a..137890f78e17a 100644
+--- a/tools/perf/bench/futex-lock-pi.c
++++ b/tools/perf/bench/futex-lock-pi.c
+@@ -233,6 +233,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
+ print_summary();
+
+ free(worker);
++ perf_cpu_map__put(cpu);
+ return ret;
+ err:
+ usage_with_options(bench_futex_lock_pi_usage, options);
+diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
+index 97fe31fd3a236..f7a5ffebb9408 100644
+--- a/tools/perf/bench/futex-requeue.c
++++ b/tools/perf/bench/futex-requeue.c
+@@ -294,6 +294,7 @@ int bench_futex_requeue(int argc, const char **argv)
+ print_summary();
+
+ free(worker);
++ perf_cpu_map__put(cpu);
+ return ret;
+ err:
+ usage_with_options(bench_futex_requeue_usage, options);
+diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
+index e970e6b9ad535..0983f40b4b408 100644
+--- a/tools/perf/bench/futex-wake-parallel.c
++++ b/tools/perf/bench/futex-wake-parallel.c
+@@ -329,6 +329,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
+ print_summary();
+
+ free(blocked_worker);
++ perf_cpu_map__put(cpu);
+ return ret;
+ }
+ #endif /* HAVE_PTHREAD_BARRIER */
+diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
+index 77f058a477903..2226a475e782b 100644
+--- a/tools/perf/bench/futex-wake.c
++++ b/tools/perf/bench/futex-wake.c
+@@ -222,5 +222,6 @@ int bench_futex_wake(int argc, const char **argv)
+ print_summary();
+
+ free(worker);
++ perf_cpu_map__put(cpu);
+ return ret;
+ }
+diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
+index f2640179ada9e..c2c81567afa50 100644
+--- a/tools/perf/bench/numa.c
++++ b/tools/perf/bench/numa.c
+@@ -1672,7 +1672,7 @@ static int __bench_numa(const char *name)
+ "GB/sec,", "total-speed", "GB/sec total speed");
+
+ if (g->p.show_details >= 2) {
+- char tname[14 + 2 * 10 + 1];
++ char tname[14 + 2 * 11 + 1];
+ struct thread_data *td;
+ for (p = 0; p < g->p.nr_proc; p++) {
+ for (t = 0; t < g->p.nr_threads; t++) {
+diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
+index d0895162c2ba6..ac72bcccc353b 100644
+--- a/tools/perf/builtin-bench.c
++++ b/tools/perf/builtin-bench.c
+@@ -21,6 +21,7 @@
+ #include "builtin.h"
+ #include "bench/bench.h"
+
++#include <locale.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -226,7 +227,6 @@ static void run_collection(struct collection *coll)
+ if (!bench->fn)
+ break;
+ printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
+- fflush(stdout);
+
+ argv[1] = bench->name;
+ run_bench(coll->name, bench->name, bench->fn, 1, argv);
+@@ -247,6 +247,10 @@ int cmd_bench(int argc, const char **argv)
+ struct collection *coll;
+ int ret = 0;
+
++ /* Unbuffered output */
++ setvbuf(stdout, NULL, _IONBF, 0);
++ setlocale(LC_ALL, "");
++
+ if (argc < 2) {
+ /* No collection specified. */
+ print_usage();
+@@ -300,7 +304,6 @@ int cmd_bench(int argc, const char **argv)
+
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
+- fflush(stdout);
+ ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
+ goto end;
+ }
+diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
+index a192014fa52b2..e6f900c3accb0 100644
+--- a/tools/perf/builtin-c2c.c
++++ b/tools/perf/builtin-c2c.c
+@@ -924,8 +924,8 @@ percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ double per_left;
+ double per_right;
+
+- per_left = PERCENT(left, lcl_hitm);
+- per_right = PERCENT(right, lcl_hitm);
++ per_left = PERCENT(left, rmt_hitm);
++ per_right = PERCENT(right, rmt_hitm);
+
+ return per_left - per_right;
+ }
+@@ -2733,9 +2733,7 @@ static int perf_c2c__report(int argc, const char **argv)
+ "the input file to process"),
+ OPT_INCR('N', "node-info", &c2c.node_info,
+ "show extra node info in report (repeat for more info)"),
+-#ifdef HAVE_SLANG_SUPPORT
+ OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
+-#endif
+ OPT_BOOLEAN(0, "stats", &c2c.stats_only,
+ "Display only statistic tables (implies --stdio)"),
+ OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
+@@ -2765,6 +2763,10 @@ static int perf_c2c__report(int argc, const char **argv)
+ if (argc)
+ usage_with_options(report_c2c_usage, options);
+
++#ifndef HAVE_SLANG_SUPPORT
++ c2c.use_stdio = true;
++#endif
++
+ if (c2c.stats_only)
+ c2c.use_stdio = true;
+
+diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
+index 6ad191e731fc9..8e7a65a8d86ed 100644
+--- a/tools/perf/builtin-inject.c
++++ b/tools/perf/builtin-inject.c
+@@ -142,14 +142,14 @@ static int perf_event__repipe_event_update(struct perf_tool *tool,
+
+ #ifdef HAVE_AUXTRACE_SUPPORT
+
+-static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
++static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
+ {
+ char buf[4096];
+ ssize_t ssz;
+ int ret;
+
+ while (size > 0) {
+- ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
++ ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
+ if (ssz < 0)
+ return -errno;
+ ret = output_bytes(inject, buf, ssz);
+@@ -187,7 +187,7 @@ static s64 perf_event__repipe_auxtrace(struct perf_session *session,
+ ret = output_bytes(inject, event, event->header.size);
+ if (ret < 0)
+ return ret;
+- ret = copy_bytes(inject, perf_data__fd(session->data),
++ ret = copy_bytes(inject, session->data,
+ event->auxtrace.size);
+ } else {
+ ret = output_bytes(inject, event,
+@@ -463,6 +463,7 @@ static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
+ dso->hit = 1;
+ }
+ dso__put(dso);
++ perf_event__repipe(tool, event, sample, machine);
+ return 0;
+ }
+
+@@ -755,12 +756,16 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str,
+ return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
+ }
+
++static int output_fd(struct perf_inject *inject)
++{
++ return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
++}
++
+ static int __cmd_inject(struct perf_inject *inject)
+ {
+ int ret = -EINVAL;
+ struct perf_session *session = inject->session;
+- struct perf_data *data_out = &inject->output;
+- int fd = inject->in_place_update ? -1 : perf_data__fd(data_out);
++ int fd = output_fd(inject);
+ u64 output_data_offset;
+
+ signal(SIGINT, sig_handler);
+@@ -819,7 +824,7 @@ static int __cmd_inject(struct perf_inject *inject)
+ inject->tool.ordered_events = true;
+ inject->tool.ordering_requires_timestamps = true;
+ /* Allow space in the header for new attributes */
+- output_data_offset = 4096;
++ output_data_offset = roundup(8192 + session->header.data_offset, 4096);
+ if (inject->strip)
+ strip_init(inject);
+ }
+@@ -1006,7 +1011,7 @@ int cmd_inject(int argc, const char **argv)
+ }
+
+ inject.session = __perf_session__new(&data, repipe,
+- perf_data__fd(&inject.output),
++ output_fd(&inject),
+ &inject.tool);
+ if (IS_ERR(inject.session)) {
+ ret = PTR_ERR(inject.session);
+@@ -1069,7 +1074,8 @@ out_delete:
+ zstd_fini(&(inject.session->zstd_data));
+ perf_session__delete(inject.session);
+ out_close_output:
+- perf_data__close(&inject.output);
++ if (!inject.in_place_update)
++ perf_data__close(&inject.output);
+ free(inject.itrace_synth_opts.vm_tm_corr_args);
+ return ret;
+ }
+diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
+index a0316ce910db6..6583ad9cc7deb 100644
+--- a/tools/perf/builtin-report.c
++++ b/tools/perf/builtin-report.c
+@@ -349,6 +349,7 @@ static int report__setup_sample_type(struct report *rep)
+ struct perf_session *session = rep->session;
+ u64 sample_type = evlist__combined_sample_type(session->evlist);
+ bool is_pipe = perf_data__is_pipe(session->data);
++ struct evsel *evsel;
+
+ if (session->itrace_synth_opts->callchain ||
+ session->itrace_synth_opts->add_callchain ||
+@@ -403,6 +404,19 @@ static int report__setup_sample_type(struct report *rep)
+ }
+
+ if (sort__mode == SORT_MODE__MEMORY) {
++ /*
++ * FIXUP: prior to kernel 5.18, Arm SPE missed to set
++ * PERF_SAMPLE_DATA_SRC bit in sample type. For backward
++ * compatibility, set the bit if it's an old perf data file.
++ */
++ evlist__for_each_entry(session->evlist, evsel) {
++ if (strstr(evsel->name, "arm_spe") &&
++ !(sample_type & PERF_SAMPLE_DATA_SRC)) {
++ evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC;
++ sample_type |= PERF_SAMPLE_DATA_SRC;
++ }
++ }
++
+ if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) {
+ ui__error("Selected --mem-mode but no mem data. "
+ "Did you call perf record without -d?\n");
+@@ -619,14 +633,17 @@ static int report__browse_hists(struct report *rep)
+ int ret;
+ struct perf_session *session = rep->session;
+ struct evlist *evlist = session->evlist;
+- const char *help = perf_tip(system_path(TIPDIR));
++ char *help = NULL, *path = NULL;
+
+- if (help == NULL) {
++ path = system_path(TIPDIR);
++ if (perf_tip(&help, path) || help == NULL) {
+ /* fallback for people who don't install perf ;-) */
+- help = perf_tip(DOCDIR);
+- if (help == NULL)
+- help = "Cannot load tips.txt file, please install perf!";
++ free(path);
++ path = system_path(DOCDIR);
++ if (perf_tip(&help, path) || help == NULL)
++ help = strdup("Cannot load tips.txt file, please install perf!");
+ }
++ free(path);
+
+ switch (use_browser) {
+ case 1:
+@@ -651,7 +668,7 @@ static int report__browse_hists(struct report *rep)
+ ret = evlist__tty_browse_hists(evlist, rep, help);
+ break;
+ }
+-
++ free(help);
+ return ret;
+ }
+
+diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
+index c32c2eb16d7df..34e809c934d72 100644
+--- a/tools/perf/builtin-script.c
++++ b/tools/perf/builtin-script.c
+@@ -435,6 +435,9 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
+ struct perf_event_attr *attr = &evsel->core.attr;
+ bool allow_user_set;
+
++ if (evsel__is_dummy_event(evsel))
++ return 0;
++
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
+@@ -455,7 +458,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
+ return -EINVAL;
+
+ if (PRINT_FIELD(DATA_SRC) &&
+- evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
++ evsel__do_check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC, allow_user_set))
+ return -EINVAL;
+
+ if (PRINT_FIELD(WEIGHT) &&
+@@ -2257,6 +2260,9 @@ out_put:
+ return ret;
+ }
+
++// Used when scr->per_event_dump is not set
++static struct evsel_script es_stdout;
++
+ static int process_attr(struct perf_tool *tool, union perf_event *event,
+ struct evlist **pevlist)
+ {
+@@ -2265,7 +2271,6 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
+ struct evsel *evsel, *pos;
+ u64 sample_type;
+ int err;
+- static struct evsel_script *es;
+
+ err = perf_event__process_attr(tool, event, pevlist);
+ if (err)
+@@ -2275,14 +2280,13 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
+ evsel = evlist__last(*pevlist);
+
+ if (!evsel->priv) {
+- if (scr->per_event_dump) {
++ if (scr->per_event_dump) {
+ evsel->priv = evsel_script__new(evsel, scr->session->data);
+- } else {
+- es = zalloc(sizeof(*es));
+- if (!es)
++ if (!evsel->priv)
+ return -ENOMEM;
+- es->fp = stdout;
+- evsel->priv = es;
++ } else { // Replicate what is done in perf_script__setup_per_event_dump()
++ es_stdout.fp = stdout;
++ evsel->priv = &es_stdout;
+ }
+ }
+
+@@ -2463,7 +2467,7 @@ static int process_switch_event(struct perf_tool *tool,
+ if (perf_event__process_switch(tool, event, sample, machine) < 0)
+ return -1;
+
+- if (scripting_ops && scripting_ops->process_switch)
++ if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample))
+ scripting_ops->process_switch(event, sample, machine);
+
+ if (!script->show_switch_events)
+@@ -2588,7 +2592,6 @@ out_err_fclose:
+ static int perf_script__setup_per_event_dump(struct perf_script *script)
+ {
+ struct evsel *evsel;
+- static struct evsel_script es_stdout;
+
+ if (script->per_event_dump)
+ return perf_script__fopen_per_event_dump(script);
+diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
+index f0ecfda34eceb..efae2998a472f 100644
+--- a/tools/perf/builtin-stat.c
++++ b/tools/perf/builtin-stat.c
+@@ -405,36 +405,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
+
+ static int read_affinity_counters(struct timespec *rs)
+ {
+- struct evsel *counter;
+- struct affinity affinity;
+- int i, ncpus, cpu;
++ struct evlist_cpu_iterator evlist_cpu_itr;
++ struct affinity saved_affinity, *affinity;
+
+ if (all_counters_use_bpf)
+ return 0;
+
+- if (affinity__setup(&affinity) < 0)
++ if (!target__has_cpu(&target) || target__has_per_thread(&target))
++ affinity = NULL;
++ else if (affinity__setup(&saved_affinity) < 0)
+ return -1;
++ else
++ affinity = &saved_affinity;
+
+- ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
+- if (!target__has_cpu(&target) || target__has_per_thread(&target))
+- ncpus = 1;
+- evlist__for_each_cpu(evsel_list, i, cpu) {
+- if (i >= ncpus)
+- break;
+- affinity__set(&affinity, cpu);
++ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
++ struct evsel *counter = evlist_cpu_itr.evsel;
+
+- evlist__for_each_entry(evsel_list, counter) {
+- if (evsel__cpu_iter_skip(counter, cpu))
+- continue;
+- if (evsel__is_bpf(counter))
+- continue;
+- if (!counter->err) {
+- counter->err = read_counter_cpu(counter, rs,
+- counter->cpu_iter - 1);
+- }
++ if (evsel__is_bpf(counter))
++ continue;
++
++ if (!counter->err) {
++ counter->err = read_counter_cpu(counter, rs,
++ evlist_cpu_itr.cpu_map_idx);
+ }
+ }
+- affinity__cleanup(&affinity);
++ if (affinity)
++ affinity__cleanup(&saved_affinity);
++
+ return 0;
+ }
+
+@@ -558,26 +555,9 @@ static int enable_counters(void)
+ return err;
+ }
+
+- if (stat_config.initial_delay < 0) {
+- pr_info(EVLIST_DISABLED_MSG);
+- return 0;
+- }
+-
+- if (stat_config.initial_delay > 0) {
+- pr_info(EVLIST_DISABLED_MSG);
+- usleep(stat_config.initial_delay * USEC_PER_MSEC);
+- }
+-
+- /*
+- * We need to enable counters only if:
+- * - we don't have tracee (attaching to task or cpu)
+- * - we have initial delay configured
+- */
+- if (!target__none(&target) || stat_config.initial_delay) {
++ if (!target__enable_on_exec(&target)) {
+ if (!all_counters_use_bpf)
+ evlist__enable(evsel_list);
+- if (stat_config.initial_delay > 0)
+- pr_info(EVLIST_ENABLED_MSG);
+ }
+ return 0;
+ }
+@@ -788,8 +768,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
+ int status = 0;
+ const bool forks = (argc > 0);
+ bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
++ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity affinity;
+- int i, cpu, err;
++ int err;
+ bool second_pass = false;
+
+ if (forks) {
+@@ -807,108 +788,104 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
+ return -1;
+
+ evlist__for_each_entry(evsel_list, counter) {
++ counter->reset_group = false;
+ if (bpf_counter__load(counter, &target))
+ return -1;
+- if (!evsel__is_bpf(counter))
++ if (!(evsel__is_bperf(counter)))
+ all_counters_use_bpf = false;
+ }
+
+- evlist__for_each_cpu (evsel_list, i, cpu) {
++ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
++ counter = evlist_cpu_itr.evsel;
++
+ /*
+ * bperf calls evsel__open_per_cpu() in bperf__load(), so
+ * no need to call it again here.
+ */
+ if (target.use_bpf)
+ break;
+- affinity__set(&affinity, cpu);
+
+- evlist__for_each_entry(evsel_list, counter) {
+- if (evsel__cpu_iter_skip(counter, cpu))
++ if (counter->reset_group || counter->errored)
++ continue;
++ if (evsel__is_bperf(counter))
++ continue;
++try_again:
++ if (create_perf_stat_counter(counter, &stat_config, &target,
++ evlist_cpu_itr.cpu_map_idx) < 0) {
++
++ /*
++ * Weak group failed. We cannot just undo this here
++ * because earlier CPUs might be in group mode, and the kernel
++ * doesn't support mixing group and non group reads. Defer
++ * it to later.
++ * Don't close here because we're in the wrong affinity.
++ */
++ if ((errno == EINVAL || errno == EBADF) &&
++ evsel__leader(counter) != counter &&
++ counter->weak_group) {
++ evlist__reset_weak_group(evsel_list, counter, false);
++ assert(counter->reset_group);
++ second_pass = true;
++ continue;
++ }
++
++ switch (stat_handle_error(counter)) {
++ case COUNTER_FATAL:
++ return -1;
++ case COUNTER_RETRY:
++ goto try_again;
++ case COUNTER_SKIP:
++ continue;
++ default:
++ break;
++ }
++
++ }
++ counter->supported = true;
++ }
++
++ if (second_pass) {
++ /*
++ * Now redo all the weak group after closing them,
++ * and also close errored counters.
++ */
++
++ /* First close errored or weak retry */
++ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
++ counter = evlist_cpu_itr.evsel;
++
++ if (!counter->reset_group && !counter->errored)
+ continue;
+- if (counter->reset_group || counter->errored)
++
++ perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
++ }
++ /* Now reopen weak */
++ evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
++ counter = evlist_cpu_itr.evsel;
++
++ if (!counter->reset_group && !counter->errored)
+ continue;
+- if (evsel__is_bpf(counter))
++ if (!counter->reset_group)
+ continue;
+-try_again:
++try_again_reset:
++ pr_debug2("reopening weak %s\n", evsel__name(counter));
+ if (create_perf_stat_counter(counter, &stat_config, &target,
+- counter->cpu_iter - 1) < 0) {
+-
+- /*
+- * Weak group failed. We cannot just undo this here
+- * because earlier CPUs might be in group mode, and the kernel
+- * doesn't support mixing group and non group reads. Defer
+- * it to later.
+- * Don't close here because we're in the wrong affinity.
+- */
+- if ((errno == EINVAL || errno == EBADF) &&
+- evsel__leader(counter) != counter &&
+- counter->weak_group) {
+- evlist__reset_weak_group(evsel_list, counter, false);
+- assert(counter->reset_group);
+- second_pass = true;
+- continue;
+- }
++ evlist_cpu_itr.cpu_map_idx) < 0) {
+
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
+- goto try_again;
++ goto try_again_reset;
+ case COUNTER_SKIP:
+ continue;
+ default:
+ break;
+ }
+-
+ }
+ counter->supported = true;
+ }
+ }
+-
+- if (second_pass) {
+- /*
+- * Now redo all the weak group after closing them,
+- * and also close errored counters.
+- */
+-
+- evlist__for_each_cpu(evsel_list, i, cpu) {
+- affinity__set(&affinity, cpu);
+- /* First close errored or weak retry */
+- evlist__for_each_entry(evsel_list, counter) {
+- if (!counter->reset_group && !counter->errored)
+- continue;
+- if (evsel__cpu_iter_skip_no_inc(counter, cpu))
+- continue;
+- perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
+- }
+- /* Now reopen weak */
+- evlist__for_each_entry(evsel_list, counter) {
+- if (!counter->reset_group && !counter->errored)
+- continue;
+- if (evsel__cpu_iter_skip(counter, cpu))
+- continue;
+- if (!counter->reset_group)
+- continue;
+-try_again_reset:
+- pr_debug2("reopening weak %s\n", evsel__name(counter));
+- if (create_perf_stat_counter(counter, &stat_config, &target,
+- counter->cpu_iter - 1) < 0) {
+-
+- switch (stat_handle_error(counter)) {
+- case COUNTER_FATAL:
+- return -1;
+- case COUNTER_RETRY:
+- goto try_again_reset;
+- case COUNTER_SKIP:
+- continue;
+- default:
+- break;
+- }
+- }
+- counter->supported = true;
+- }
+- }
+- }
+ affinity__cleanup(&affinity);
+
+ evlist__for_each_entry(evsel_list, counter) {
+@@ -952,18 +929,31 @@ try_again_reset:
+ return err;
+ }
+
+- /*
+- * Enable counters and exec the command:
+- */
+- if (forks) {
++ if (target.initial_delay) {
++ pr_info(EVLIST_DISABLED_MSG);
++ } else {
++ err = enable_counters();
++ if (err)
++ return -1;
++ }
++
++ /* Exec the command, if any */
++ if (forks)
+ evlist__start_workload(evsel_list);
++
++ if (target.initial_delay > 0) {
++ usleep(target.initial_delay * USEC_PER_MSEC);
+ err = enable_counters();
+ if (err)
+ return -1;
+
+- t0 = rdclock();
+- clock_gettime(CLOCK_MONOTONIC, &ref_time);
++ pr_info(EVLIST_ENABLED_MSG);
++ }
++
++ t0 = rdclock();
++ clock_gettime(CLOCK_MONOTONIC, &ref_time);
+
++ if (forks) {
+ if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
+ status = dispatch_events(forks, timeout, interval, &times);
+ if (child_pid != -1) {
+@@ -981,13 +971,6 @@ try_again_reset:
+ if (WIFSIGNALED(status))
+ psignal(WTERMSIG(status), argv[0]);
+ } else {
+- err = enable_counters();
+- if (err)
+- return -1;
+-
+- t0 = rdclock();
+- clock_gettime(CLOCK_MONOTONIC, &ref_time);
+-
+ status = dispatch_events(forks, timeout, interval, &times);
+ }
+
+@@ -1249,7 +1232,7 @@ static struct option stat_options[] = {
+ "aggregate counts per thread", AGGR_THREAD),
+ OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
+ "aggregate counts per numa node", AGGR_NODE),
+- OPT_INTEGER('D', "delay", &stat_config.initial_delay,
++ OPT_INTEGER('D', "delay", &target.initial_delay,
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
+ OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
+ "Only print computed metrics. No raw values", enable_metric_only),
+diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
+index a3ae9176a83e2..6fdd401ec9c56 100644
+--- a/tools/perf/builtin-top.c
++++ b/tools/perf/builtin-top.c
+@@ -1743,6 +1743,7 @@ int cmd_top(int argc, const char **argv)
+ top.session = perf_session__new(NULL, NULL);
+ if (IS_ERR(top.session)) {
+ status = PTR_ERR(top.session);
++ top.session = NULL;
+ goto out_delete_evlist;
+ }
+
+diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
+index 2bf21194c7b39..6755370483b06 100644
+--- a/tools/perf/builtin-trace.c
++++ b/tools/perf/builtin-trace.c
+@@ -17,7 +17,9 @@
+ #include "util/record.h"
+ #include <traceevent/event-parse.h>
+ #include <api/fs/tracing_path.h>
++#ifdef HAVE_LIBBPF_SUPPORT
+ #include <bpf/bpf.h>
++#endif
+ #include "util/bpf_map.h"
+ #include "util/rlimit.h"
+ #include "builtin.h"
+@@ -87,6 +89,8 @@
+ # define F_LINUX_SPECIFIC_BASE 1024
+ #endif
+
++#define RAW_SYSCALL_ARGS_NUM 6
++
+ /*
+ * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
+ */
+@@ -107,7 +111,7 @@ struct syscall_fmt {
+ const char *sys_enter,
+ *sys_exit;
+ } bpf_prog_name;
+- struct syscall_arg_fmt arg[6];
++ struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM];
+ u8 nr_args;
+ bool errpid;
+ bool timeout;
+@@ -1224,7 +1228,7 @@ struct syscall {
+ */
+ struct bpf_map_syscall_entry {
+ bool enabled;
+- u16 string_args_len[6];
++ u16 string_args_len[RAW_SYSCALL_ARGS_NUM];
+ };
+
+ /*
+@@ -1649,7 +1653,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
+ {
+ int idx;
+
+- if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
++ if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0)
+ nr_args = sc->fmt->nr_args;
+
+ sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
+@@ -1782,11 +1786,11 @@ static int trace__read_syscall_info(struct trace *trace, int id)
+ #endif
+ sc = trace->syscalls.table + id;
+ if (sc->nonexistent)
+- return 0;
++ return -EEXIST;
+
+ if (name == NULL) {
+ sc->nonexistent = true;
+- return 0;
++ return -EEXIST;
+ }
+
+ sc->name = name;
+@@ -1800,11 +1804,18 @@ static int trace__read_syscall_info(struct trace *trace, int id)
+ sc->tp_format = trace_event__tp_format("syscalls", tp_name);
+ }
+
+- if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
+- return -ENOMEM;
+-
+- if (IS_ERR(sc->tp_format))
++ /*
++ * Fails to read trace point format via sysfs node, so the trace point
++ * doesn't exist. Set the 'nonexistent' flag as true.
++ */
++ if (IS_ERR(sc->tp_format)) {
++ sc->nonexistent = true;
+ return PTR_ERR(sc->tp_format);
++ }
++
++ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
++ RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
++ return -ENOMEM;
+
+ sc->args = sc->tp_format->format.fields;
+ /*
+@@ -2122,11 +2133,8 @@ static struct syscall *trace__syscall_info(struct trace *trace,
+ (err = trace__read_syscall_info(trace, id)) != 0)
+ goto out_cant_read;
+
+- if (trace->syscalls.table[id].name == NULL) {
+- if (trace->syscalls.table[id].nonexistent)
+- return NULL;
++ if (trace->syscalls.table && trace->syscalls.table[id].nonexistent)
+ goto out_cant_read;
+- }
+
+ return &trace->syscalls.table[id];
+
+@@ -2279,7 +2287,7 @@ static void syscall__exit(struct syscall *sc)
+ if (!sc)
+ return;
+
+- free(sc->arg_fmt);
++ zfree(&sc->arg_fmt);
+ }
+
+ static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
+@@ -3116,13 +3124,8 @@ static void evlist__free_syscall_tp_fields(struct evlist *evlist)
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+- struct evsel_trace *et = evsel->priv;
+-
+- if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
+- continue;
+-
+- free(et->fmt);
+- free(et);
++ evsel_trace__delete(evsel->priv);
++ evsel->priv = NULL;
+ }
+ }
+
+@@ -4740,11 +4743,11 @@ static void trace__exit(struct trace *trace)
+ int i;
+
+ strlist__delete(trace->ev_qualifier);
+- free(trace->ev_qualifier_ids.entries);
++ zfree(&trace->ev_qualifier_ids.entries);
+ if (trace->syscalls.table) {
+ for (i = 0; i <= trace->sctbl->syscalls.max_id; i++)
+ syscall__exit(&trace->syscalls.table[i]);
+- free(trace->syscalls.table);
++ zfree(&trace->syscalls.table);
+ }
+ syscalltbl__delete(trace->sctbl);
+ zfree(&trace->perfconfig_events);
+diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh
+index fdf75d45efff7..978249d7868c2 100644
+--- a/tools/perf/perf-completion.sh
++++ b/tools/perf/perf-completion.sh
+@@ -165,7 +165,12 @@ __perf_main ()
+
+ local cur1=${COMP_WORDS[COMP_CWORD]}
+ local raw_evts=$($cmd list --raw-dump)
+- local arr s tmp result
++ local arr s tmp result cpu_evts
++
++ # aarch64 doesn't have /sys/bus/event_source/devices/cpu/events
++ if [[ `uname -m` != aarch64 ]]; then
++ cpu_evts=$(ls /sys/bus/event_source/devices/cpu/events)
++ fi
+
+ if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then
+ OLD_IFS="$IFS"
+@@ -183,9 +188,9 @@ __perf_main ()
+ fi
+ done
+
+- evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events)
++ evts=${result}" "${cpu_evts}
+ else
+- evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events)
++ evts=${raw_evts}" "${cpu_evts}
+ fi
+
+ if [[ "$cur1" == , ]]; then
+diff --git a/tools/perf/perf.c b/tools/perf/perf.c
+index 2f6b67189b426..6aae7b6c376b4 100644
+--- a/tools/perf/perf.c
++++ b/tools/perf/perf.c
+@@ -434,7 +434,7 @@ void pthread__unblock_sigwinch(void)
+ static int libperf_print(enum libperf_print_level level,
+ const char *fmt, va_list ap)
+ {
+- return eprintf(level, verbose, fmt, ap);
++ return veprintf(level, verbose, fmt, ap);
+ }
+
+ int main(int argc, const char **argv)
+diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json
+index dda8e59149d22..be23d3c89a791 100644
+--- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json
++++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json
+@@ -112,21 +112,21 @@
+ "MetricName": "indirect_branch"
+ },
+ {
+- "MetricExpr": "(armv8_pmuv3_0@event\\=0x1014@ + armv8_pmuv3_0@event\\=0x1018@) / BR_MIS_PRED",
++ "MetricExpr": "(armv8_pmuv3_0@event\\=0x1013@ + armv8_pmuv3_0@event\\=0x1016@) / BR_MIS_PRED",
+ "PublicDescription": "Push branch L3 topdown metric",
+ "BriefDescription": "Push branch L3 topdown metric",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "push_branch"
+ },
+ {
+- "MetricExpr": "armv8_pmuv3_0@event\\=0x100c@ / BR_MIS_PRED",
++ "MetricExpr": "armv8_pmuv3_0@event\\=0x100d@ / BR_MIS_PRED",
+ "PublicDescription": "Pop branch L3 topdown metric",
+ "BriefDescription": "Pop branch L3 topdown metric",
+ "MetricGroup": "TopDownL3",
+ "MetricName": "pop_branch"
+ },
+ {
+- "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1014@ - armv8_pmuv3_0@event\\=0x1018@ - armv8_pmuv3_0@event\\=0x100c@) / BR_MIS_PRED",
++ "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1013@ - armv8_pmuv3_0@event\\=0x1016@ - armv8_pmuv3_0@event\\=0x100d@) / BR_MIS_PRED",
+ "PublicDescription": "Other branch L3 topdown metric",
+ "BriefDescription": "Other branch L3 topdown metric",
+ "MetricGroup": "TopDownL3",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+index 605be14f441c8..9cb929bb64afd 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+@@ -17,7 +17,7 @@
+ {
+ "EventCode": "0x34056",
+ "EventName": "PM_EXEC_STALL_LOAD_FINISH",
+- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
++ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the next-to-finish (NTF) instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
+ },
+ {
+ "EventCode": "0x3006C",
+@@ -27,7 +27,7 @@
+ {
+ "EventCode": "0x300F4",
+ "EventName": "PM_RUN_INST_CMPL_CONC",
+- "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
++ "BriefDescription": "PowerPC instruction completed by this thread when all threads in the core had the run-latch set."
+ },
+ {
+ "EventCode": "0x4C016",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+deleted file mode 100644
+index 54acb55e2c8c6..0000000000000
+--- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
++++ /dev/null
+@@ -1,7 +0,0 @@
+-[
+- {
+- "EventCode": "0x4016E",
+- "EventName": "PM_THRESH_NOT_MET",
+- "BriefDescription": "Threshold counter did not meet threshold."
+- }
+-]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+index 558f9530f54ec..61e9e0222c873 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+@@ -7,7 +7,7 @@
+ {
+ "EventCode": "0x10006",
+ "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any other reason."
+ },
+ {
+ "EventCode": "0x10010",
+@@ -32,12 +32,12 @@
+ {
+ "EventCode": "0x1D05E",
+ "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of power management."
+ },
+ {
+ "EventCode": "0x1E050",
+ "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+ },
+ {
+ "EventCode": "0x1F054",
+@@ -67,7 +67,7 @@
+ {
+ "EventCode": "0x100F6",
+ "EventName": "PM_IERAT_MISS",
+- "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
++ "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event. This event only counts instruction demand access."
+ },
+ {
+ "EventCode": "0x100F8",
+@@ -77,7 +77,7 @@
+ {
+ "EventCode": "0x20006",
+ "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
+ },
+ {
+ "EventCode": "0x20114",
+@@ -102,7 +102,7 @@
+ {
+ "EventCode": "0x2D01A",
+ "EventName": "PM_DISP_STALL_IC_MISS",
+- "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
++ "BriefDescription": "Cycles when dispatch was stalled for this thread due to an instruction cache miss."
+ },
+ {
+ "EventCode": "0x2E018",
+@@ -112,7 +112,7 @@
+ {
+ "EventCode": "0x2E01A",
+ "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the XVFC mapper/SRB was full."
+ },
+ {
+ "EventCode": "0x2C142",
+@@ -137,7 +137,7 @@
+ {
+ "EventCode": "0x30004",
+ "EventName": "PM_DISP_STALL_FLUSH",
+- "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
++ "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet next-to-complete (NTC). PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
+ },
+ {
+ "EventCode": "0x3000A",
+@@ -157,7 +157,7 @@
+ {
+ "EventCode": "0x30018",
+ "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
+ },
+ {
+ "EventCode": "0x30026",
+@@ -182,7 +182,7 @@
+ {
+ "EventCode": "0x3D05C",
+ "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
+ },
+ {
+ "EventCode": "0x3E052",
+@@ -192,7 +192,7 @@
+ {
+ "EventCode": "0x3E054",
+ "EventName": "PM_LD_MISS_L1",
+- "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
++ "BriefDescription": "Load missed L1, counted at finish time. LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
+ },
+ {
+ "EventCode": "0x301EA",
+@@ -202,7 +202,7 @@
+ {
+ "EventCode": "0x300FA",
+ "EventName": "PM_INST_FROM_L3MISS",
+- "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
++ "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss."
+ },
+ {
+ "EventCode": "0x40006",
+@@ -232,16 +232,16 @@
+ {
+ "EventCode": "0x4E01A",
+ "EventName": "PM_DISP_STALL_HELD_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any reason."
+ },
+ {
+ "EventCode": "0x4003C",
+ "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+- "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
++ "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+ },
+ {
+ "EventCode": "0x44056",
+ "EventName": "PM_VECTOR_ST_CMPL",
+- "BriefDescription": "Vector store instructions completed."
++ "BriefDescription": "Vector store instruction completed."
+ }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+index 58b5dfe3a2731..f2436fc5537ce 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+@@ -19,11 +19,6 @@
+ "EventName": "PM_MRK_BR_TAKEN_CMPL",
+ "BriefDescription": "Marked Branch Taken instruction completed."
+ },
+- {
+- "EventCode": "0x20112",
+- "EventName": "PM_MRK_NTF_FIN",
+- "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
+- },
+ {
+ "EventCode": "0x2C01C",
+ "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
+@@ -62,17 +57,12 @@
+ {
+ "EventCode": "0x200FD",
+ "EventName": "PM_L1_ICACHE_MISS",
+- "BriefDescription": "Demand iCache Miss."
+- },
+- {
+- "EventCode": "0x30130",
+- "EventName": "PM_MRK_INST_FIN",
+- "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
++ "BriefDescription": "Demand instruction cache miss."
+ },
+ {
+ "EventCode": "0x34146",
+ "EventName": "PM_MRK_LD_CMPL",
+- "BriefDescription": "Marked loads completed."
++ "BriefDescription": "Marked load instruction completed."
+ },
+ {
+ "EventCode": "0x3E158",
+@@ -82,12 +72,12 @@
+ {
+ "EventCode": "0x3E15A",
+ "EventName": "PM_MRK_ST_FIN",
+- "BriefDescription": "The marked instruction was a store of any kind."
++ "BriefDescription": "Marked store instruction finished."
+ },
+ {
+ "EventCode": "0x30068",
+ "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+- "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
++ "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
+ },
+ {
+ "EventCode": "0x301E4",
+@@ -102,12 +92,12 @@
+ {
+ "EventCode": "0x300FE",
+ "EventName": "PM_DATA_FROM_L3MISS",
+- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
++ "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss."
+ },
+ {
+ "EventCode": "0x40012",
+ "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+- "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
++ "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
+ },
+ {
+ "EventCode": "0x40134",
+@@ -117,22 +107,22 @@
+ {
+ "EventCode": "0x4505A",
+ "EventName": "PM_SP_FLOP_CMPL",
+- "BriefDescription": "Single Precision floating point instructions completed."
++ "BriefDescription": "Single Precision floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4D058",
+ "EventName": "PM_VECTOR_FLOP_CMPL",
+- "BriefDescription": "Vector floating point instructions completed."
++ "BriefDescription": "Vector floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4D05A",
+ "EventName": "PM_NON_MATH_FLOP_CMPL",
+- "BriefDescription": "Non Math instructions completed."
++ "BriefDescription": "Non Math instruction completed."
+ },
+ {
+ "EventCode": "0x401E0",
+ "EventName": "PM_MRK_INST_CMPL",
+- "BriefDescription": "marked instruction completed."
++ "BriefDescription": "Marked instruction completed."
+ },
+ {
+ "EventCode": "0x400FE",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+index 843b51f531e95..c4c10ca98cad7 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+@@ -47,7 +47,7 @@
+ {
+ "EventCode": "0x10062",
+ "EventName": "PM_LD_L3MISS_PEND_CYC",
+- "BriefDescription": "Cycles L3 miss was pending for this thread."
++ "BriefDescription": "Cycles in which an L3 miss was pending for this thread."
+ },
+ {
+ "EventCode": "0x20010",
+@@ -132,7 +132,7 @@
+ {
+ "EventCode": "0x300FC",
+ "EventName": "PM_DTLB_MISS",
+- "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
++ "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
+ },
+ {
+ "EventCode": "0x4D02C",
+@@ -142,7 +142,7 @@
+ {
+ "EventCode": "0x4003E",
+ "EventName": "PM_LD_CMPL",
+- "BriefDescription": "Loads completed."
++ "BriefDescription": "Load instruction completed."
+ },
+ {
+ "EventCode": "0x4C040",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json
+index 8ba3e81c9808b..fe050d44374ba 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json
+@@ -1,13 +1,13 @@
+ [
+ {
+ "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P01",
+- "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@) * 100",
++ "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP01\\,chip\\=?@ / (1 + hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P23",
+- "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@) * 100",
++ "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP23\\,chip\\=?@ / (1 + hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+@@ -61,13 +61,13 @@
+ },
+ {
+ "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P01",
+- "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_RNS_PUMP01\\,chip\\=?@) * 100",
++ "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP01\\,chip\\=?@ / (1 + hv_24x7@PM_PB_RNS_PUMP01\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P23",
+- "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_RNS_PUMP23\\,chip\\=?@) * 100",
++ "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP23\\,chip\\=?@ / (1 + hv_24x7@PM_PB_RNS_PUMP23\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+@@ -151,193 +151,193 @@
+ },
+ {
+ "MetricName": "XLINK0_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK1_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK2_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK3_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK4_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK5_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK6_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK7_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK0_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK1_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK2_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK3_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK4_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK5_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK6_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "XLINK7_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK0_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK1_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK2_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK3_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK4_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK5_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK6_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK7_OUT_TOTAL_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK0_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK1_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK2_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK3_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK4_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK5_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK6_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricName": "ALINK7_OUT_DATA_UTILIZATION",
+- "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
++ "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100",
+ "ScaleUnit": "1.063%",
+ "AggregationMode": "PerChip"
+ },
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+index 7d0de1a2860b4..36c5bbc64c3be 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+@@ -2,12 +2,12 @@
+ {
+ "EventCode": "0x10016",
+ "EventName": "PM_VSU0_ISSUE",
+- "BriefDescription": "VSU instructions issued to VSU pipe 0."
++ "BriefDescription": "VSU instruction issued to VSU pipe 0."
+ },
+ {
+ "EventCode": "0x1001C",
+ "EventName": "PM_ULTRAVISOR_INST_CMPL",
+- "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
++ "BriefDescription": "PowerPC instruction completed while the thread was in ultravisor state."
+ },
+ {
+ "EventCode": "0x100F0",
+@@ -17,23 +17,18 @@
+ {
+ "EventCode": "0x10134",
+ "EventName": "PM_MRK_ST_DONE_L2",
+- "BriefDescription": "Marked stores completed in L2 (RC machine done)."
++ "BriefDescription": "Marked store completed in L2."
+ },
+ {
+ "EventCode": "0x1505E",
+ "EventName": "PM_LD_HIT_L1",
+- "BriefDescription": "Loads that finished without experiencing an L1 miss."
++ "BriefDescription": "Load finished without experiencing an L1 miss."
+ },
+ {
+ "EventCode": "0x1F056",
+ "EventName": "PM_DISP_SS0_2_INSTR_CYC",
+ "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
+ },
+- {
+- "EventCode": "0x1F15C",
+- "EventName": "PM_MRK_STCX_L2_CYC",
+- "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
+- },
+ {
+ "EventCode": "0x10066",
+ "EventName": "PM_ADJUNCT_CYC",
+@@ -42,7 +37,7 @@
+ {
+ "EventCode": "0x101E4",
+ "EventName": "PM_MRK_L1_ICACHE_MISS",
+- "BriefDescription": "Marked Instruction suffered an icache Miss."
++ "BriefDescription": "Marked instruction suffered an instruction cache miss."
+ },
+ {
+ "EventCode": "0x101EA",
+@@ -72,7 +67,7 @@
+ {
+ "EventCode": "0x2E010",
+ "EventName": "PM_ADJUNCT_INST_CMPL",
+- "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
++ "BriefDescription": "PowerPC instruction completed while the thread was in Adjunct state."
+ },
+ {
+ "EventCode": "0x2E014",
+@@ -122,7 +117,7 @@
+ {
+ "EventCode": "0x201E4",
+ "EventName": "PM_MRK_DATA_FROM_L3MISS",
+- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
++ "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
+ },
+ {
+ "EventCode": "0x201E8",
+@@ -132,17 +127,17 @@
+ {
+ "EventCode": "0x200F2",
+ "EventName": "PM_INST_DISP",
+- "BriefDescription": "PowerPC instructions dispatched."
++ "BriefDescription": "PowerPC instruction dispatched."
+ },
+ {
+ "EventCode": "0x30132",
+ "EventName": "PM_MRK_VSU_FIN",
+- "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
++ "BriefDescription": "VSU marked instruction finished. Excludes simple FX instructions issued to the Store Unit."
+ },
+ {
+ "EventCode": "0x30038",
+ "EventName": "PM_EXEC_STALL_DMISS_LMEM",
+- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
++ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCAPI cache, or local OpenCAPI memory."
+ },
+ {
+ "EventCode": "0x3F04A",
+@@ -152,12 +147,12 @@
+ {
+ "EventCode": "0x3405A",
+ "EventName": "PM_PRIVILEGED_INST_CMPL",
+- "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
++ "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
+ },
+ {
+ "EventCode": "0x3F150",
+ "EventName": "PM_MRK_ST_DRAIN_CYC",
+- "BriefDescription": "cycles to drain st from core to L2."
++ "BriefDescription": "Cycles in which the marked store drained from the core to the L2."
+ },
+ {
+ "EventCode": "0x3F054",
+@@ -182,7 +177,7 @@
+ {
+ "EventCode": "0x4001C",
+ "EventName": "PM_VSU_FIN",
+- "BriefDescription": "VSU instructions finished."
++ "BriefDescription": "VSU instruction finished."
+ },
+ {
+ "EventCode": "0x4C01A",
+@@ -197,7 +192,7 @@
+ {
+ "EventCode": "0x4D022",
+ "EventName": "PM_HYPERVISOR_INST_CMPL",
+- "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
++ "BriefDescription": "PowerPC instruction completed while the thread was in hypervisor state."
+ },
+ {
+ "EventCode": "0x4D026",
+@@ -212,32 +207,32 @@
+ {
+ "EventCode": "0x40030",
+ "EventName": "PM_INST_FIN",
+- "BriefDescription": "Instructions finished."
++ "BriefDescription": "Instruction finished."
+ },
+ {
+ "EventCode": "0x44146",
+ "EventName": "PM_MRK_STCX_CORE_CYC",
+- "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
++ "BriefDescription": "Cycles spent in the core portion of a marked STCX instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
+ },
+ {
+ "EventCode": "0x44054",
+ "EventName": "PM_VECTOR_LD_CMPL",
+- "BriefDescription": "Vector load instructions completed."
++ "BriefDescription": "Vector load instruction completed."
+ },
+ {
+ "EventCode": "0x45054",
+ "EventName": "PM_FMA_CMPL",
+- "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
++ "BriefDescription": "Two floating point instruction completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
+ },
+ {
+ "EventCode": "0x45056",
+ "EventName": "PM_SCALAR_FLOP_CMPL",
+- "BriefDescription": "Scalar floating point instructions completed."
++ "BriefDescription": "Scalar floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4505C",
+ "EventName": "PM_MATH_FLOP_CMPL",
+- "BriefDescription": "Math floating point instructions completed."
++ "BriefDescription": "Math floating point instruction completed."
+ },
+ {
+ "EventCode": "0x4D05E",
+@@ -252,21 +247,21 @@
+ {
+ "EventCode": "0x401E6",
+ "EventName": "PM_MRK_INST_FROM_L3MISS",
+- "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
++ "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
+ },
+ {
+ "EventCode": "0x401E8",
+ "EventName": "PM_MRK_DATA_FROM_L2MISS",
+- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
++ "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
+ },
+ {
+ "EventCode": "0x400F0",
+ "EventName": "PM_LD_DEMAND_MISS_L1_FIN",
+- "BriefDescription": "Load Missed L1, counted at finish time."
++ "BriefDescription": "Load missed L1, counted at finish time."
+ },
+ {
+ "EventCode": "0x400FA",
+ "EventName": "PM_RUN_INST_CMPL",
+- "BriefDescription": "Completed PowerPC instructions gated by the run latch."
++ "BriefDescription": "PowerPC instruction completed while the run latch is set."
+ }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+index b8aded6045faa..799893c56f32b 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+@@ -2,7 +2,7 @@
+ {
+ "EventCode": "0x100FE",
+ "EventName": "PM_INST_CMPL",
+- "BriefDescription": "PowerPC instructions completed."
++ "BriefDescription": "PowerPC instruction completed."
+ },
+ {
+ "EventCode": "0x1000C",
+@@ -12,7 +12,7 @@
+ {
+ "EventCode": "0x1000E",
+ "EventName": "PM_MMA_ISSUED",
+- "BriefDescription": "MMA instructions issued."
++ "BriefDescription": "MMA instruction issued."
+ },
+ {
+ "EventCode": "0x10012",
+@@ -107,7 +107,7 @@
+ {
+ "EventCode": "0x2D012",
+ "EventName": "PM_VSU1_ISSUE",
+- "BriefDescription": "VSU instructions issued to VSU pipe 1."
++ "BriefDescription": "VSU instruction issued to VSU pipe 1."
+ },
+ {
+ "EventCode": "0x2D018",
+@@ -122,7 +122,7 @@
+ {
+ "EventCode": "0x2E01E",
+ "EventName": "PM_EXEC_STALL_NTC_FLUSH",
+- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
++ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous next-to-finish (NTF) instruction is still completing and the new NTF instruction is stalled at dispatch."
+ },
+ {
+ "EventCode": "0x2013C",
+@@ -137,7 +137,7 @@
+ {
+ "EventCode": "0x201E2",
+ "EventName": "PM_MRK_LD_MISS_L1",
+- "BriefDescription": "Marked DL1 Demand Miss counted at finish time."
++ "BriefDescription": "Marked demand data load miss counted at finish time."
+ },
+ {
+ "EventCode": "0x200F4",
+@@ -172,7 +172,7 @@
+ {
+ "EventCode": "0x30028",
+ "EventName": "PM_CMPL_STALL_MEM_ECC",
+- "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
++ "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a STCX waiting for its result or a load waiting for non-critical sectors of data and ECC."
+ },
+ {
+ "EventCode": "0x30036",
+@@ -187,17 +187,12 @@
+ {
+ "EventCode": "0x3F044",
+ "EventName": "PM_VSU2_ISSUE",
+- "BriefDescription": "VSU instructions issued to VSU pipe 2."
++ "BriefDescription": "VSU instruction issued to VSU pipe 2."
+ },
+ {
+ "EventCode": "0x30058",
+ "EventName": "PM_TLBIE_FIN",
+- "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+- },
+- {
+- "EventCode": "0x3D058",
+- "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
+- "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
++ "BriefDescription": "TLBIE instruction finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+ },
+ {
+ "EventCode": "0x30066",
+@@ -252,7 +247,7 @@
+ {
+ "EventCode": "0x4E012",
+ "EventName": "PM_EXEC_STALL_UNKNOWN",
+- "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
++ "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the next-to-finish (NTF) instruction finishes and completions came too close together."
+ },
+ {
+ "EventCode": "0x4D020",
+@@ -267,12 +262,7 @@
+ {
+ "EventCode": "0x45058",
+ "EventName": "PM_IC_MISS_CMPL",
+- "BriefDescription": "Non-speculative icache miss, counted at completion."
+- },
+- {
+- "EventCode": "0x4D050",
+- "EventName": "PM_VSU_NON_FLOP_CMPL",
+- "BriefDescription": "Non-floating point VSU instructions completed."
++ "BriefDescription": "Non-speculative instruction cache miss, counted at completion."
+ },
+ {
+ "EventCode": "0x4D052",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+index b5d1bd39cfb22..364fedbfb490b 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+@@ -12,11 +12,11 @@
+ {
+ "EventCode": "0x45052",
+ "EventName": "PM_4FLOP_CMPL",
+- "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
++ "BriefDescription": "Four floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+ },
+ {
+ "EventCode": "0x4D054",
+ "EventName": "PM_8FLOP_CMPL",
+- "BriefDescription": "Four Double Precision vector instructions completed."
++ "BriefDescription": "Four Double Precision vector instruction completed."
+ }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+index db3766dca07c5..961e2491e73f6 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+@@ -4,11 +4,6 @@
+ "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
+ "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
+ },
+- {
+- "EventCode": "0x20016",
+- "EventName": "PM_ST_FIN",
+- "BriefDescription": "Store finish count. Includes speculative activity."
+- },
+ {
+ "EventCode": "0x20018",
+ "EventName": "PM_ST_FWD",
+@@ -17,7 +12,7 @@
+ {
+ "EventCode": "0x2011C",
+ "EventName": "PM_MRK_NTF_CYC",
+- "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
++ "BriefDescription": "Cycles in which the marked instruction is the oldest in the pipeline (next-to-finish or next-to-complete)."
+ },
+ {
+ "EventCode": "0x2E01C",
+@@ -37,7 +32,7 @@
+ {
+ "EventCode": "0x200FE",
+ "EventName": "PM_DATA_FROM_L2MISS",
+- "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
++ "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
+ },
+ {
+ "EventCode": "0x30010",
+@@ -52,6 +47,6 @@
+ {
+ "EventCode": "0x4D05C",
+ "EventName": "PM_DPP_FLOP_CMPL",
+- "BriefDescription": "Double-Precision or Quad-Precision instructions completed."
++ "BriefDescription": "Double-Precision or Quad-Precision instruction completed."
+ }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
+index 3f69422c21f99..f10bd554521a0 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
++++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
+@@ -1417,7 +1417,7 @@
+ {
+ "EventCode": "0x45054",
+ "EventName": "PM_FMA_CMPL",
+- "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. "
++ "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only."
+ },
+ {
+ "EventCode": "0x201E8",
+@@ -2017,7 +2017,7 @@
+ {
+ "EventCode": "0xC0BC",
+ "EventName": "PM_LSU_FLUSH_OTHER",
+- "BriefDescription": "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the “bad dval” back and flush all younger ops)"
++ "BriefDescription": "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the 'bad dval' back and flush all younger ops)"
+ },
+ {
+ "EventCode": "0x5094",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+index d0265f255de2b..723bffa41c448 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
++++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+@@ -442,7 +442,7 @@
+ {
+ "EventCode": "0x4D052",
+ "EventName": "PM_2FLOP_CMPL",
+- "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg "
++ "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg"
+ },
+ {
+ "EventCode": "0x1F142",
+diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+index 9ff67206ade4e..821d2f2a8f251 100644
+--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
++++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+@@ -314,6 +314,19 @@
+ "SampleAfterValue": "2000003",
+ "UMask": "0x82"
+ },
++ {
++ "BriefDescription": "All retired memory instructions.",
++ "Counter": "0,1,2,3",
++ "CounterHTOff": "0,1,2,3",
++ "Data_LA": "1",
++ "EventCode": "0xD0",
++ "EventName": "MEM_INST_RETIRED.ANY",
++ "L1_Hit_Indication": "1",
++ "PEBS": "1",
++ "PublicDescription": "Counts all retired memory instructions - loads and stores.",
++ "SampleAfterValue": "2000003",
++ "UMask": "0x83"
++ },
+ {
+ "BriefDescription": "Retired load instructions with locked access.",
+ "Counter": "0,1,2,3",
+@@ -358,6 +371,7 @@
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+ "PEBS": "1",
++ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x11"
+ },
+@@ -370,6 +384,7 @@
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
++ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x12"
+ },
+@@ -733,7 +748,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010491",
++ "MSRValue": "0x10491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -772,7 +787,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0491",
++ "MSRValue": "0x4003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -785,7 +800,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0491",
++ "MSRValue": "0x1003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -798,7 +813,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0491",
++ "MSRValue": "0x8003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -811,7 +826,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010490",
++ "MSRValue": "0x10490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -850,7 +865,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0490",
++ "MSRValue": "0x4003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -863,7 +878,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0490",
++ "MSRValue": "0x1003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -876,7 +891,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0490",
++ "MSRValue": "0x8003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -889,7 +904,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010120",
++ "MSRValue": "0x10120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -928,7 +943,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0120",
++ "MSRValue": "0x4003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -941,7 +956,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0120",
++ "MSRValue": "0x1003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -954,7 +969,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0120",
++ "MSRValue": "0x8003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -967,7 +982,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010122",
++ "MSRValue": "0x10122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1006,7 +1021,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0122",
++ "MSRValue": "0x4003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1019,7 +1034,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0122",
++ "MSRValue": "0x1003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1032,7 +1047,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0122",
++ "MSRValue": "0x8003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1045,7 +1060,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010004",
++ "MSRValue": "0x10004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1084,7 +1099,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0004",
++ "MSRValue": "0x4003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1097,7 +1112,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0004",
++ "MSRValue": "0x1003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1110,7 +1125,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0004",
++ "MSRValue": "0x8003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1123,7 +1138,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010001",
++ "MSRValue": "0x10001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1162,7 +1177,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0001",
++ "MSRValue": "0x4003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1175,7 +1190,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0001",
++ "MSRValue": "0x1003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1188,7 +1203,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0001",
++ "MSRValue": "0x8003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1201,7 +1216,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010002",
++ "MSRValue": "0x10002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1240,7 +1255,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0002",
++ "MSRValue": "0x4003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1253,7 +1268,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0002",
++ "MSRValue": "0x1003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1266,7 +1281,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0002",
++ "MSRValue": "0x8003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1279,7 +1294,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010400",
++ "MSRValue": "0x10400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1318,7 +1333,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0400",
++ "MSRValue": "0x4003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1331,7 +1346,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0400",
++ "MSRValue": "0x1003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1344,7 +1359,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0400",
++ "MSRValue": "0x8003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1357,7 +1372,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010010",
++ "MSRValue": "0x10010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1396,7 +1411,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0010",
++ "MSRValue": "0x4003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1409,7 +1424,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0010",
++ "MSRValue": "0x1003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1422,7 +1437,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0010",
++ "MSRValue": "0x8003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1435,7 +1450,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010020",
++ "MSRValue": "0x10020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1474,7 +1489,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0020",
++ "MSRValue": "0x4003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1487,7 +1502,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0020",
++ "MSRValue": "0x1003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1500,7 +1515,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0020",
++ "MSRValue": "0x8003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1513,7 +1528,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010080",
++ "MSRValue": "0x10080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1552,7 +1567,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0080",
++ "MSRValue": "0x4003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1565,7 +1580,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0080",
++ "MSRValue": "0x1003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1578,7 +1593,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0080",
++ "MSRValue": "0x8003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1591,7 +1606,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0000010100",
++ "MSRValue": "0x10100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1630,7 +1645,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x04003C0100",
++ "MSRValue": "0x4003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1643,7 +1658,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x01003C0100",
++ "MSRValue": "0x1003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1656,7 +1671,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x08003C0100",
++ "MSRValue": "0x8003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+index 503737ed3a83c..9e873ab224502 100644
+--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
++++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+@@ -1,73 +1,81 @@
+ [
+ {
+- "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
++ "BriefDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
++ "PublicDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
+- "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
++ "BriefDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instruction retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
++ "PublicDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+- "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
++ "BriefDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
++ "PublicDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
+ },
+ {
+- "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
++ "BriefDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
++ "PublicDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
+ },
+ {
+- "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
++ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
++ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
+ },
+ {
+- "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
++ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
++ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80"
+ },
+ {
+- "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
++ "BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
++ "PublicDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+- "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
++ "BriefDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
++ "PublicDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+index 078706a500919..ecce4273ae52c 100644
+--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
++++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+@@ -30,7 +30,21 @@
+ "UMask": "0x2"
+ },
+ {
+- "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
++ "BriefDescription": "Retired Instructions who experienced DSB miss.",
++ "Counter": "0,1,2,3",
++ "CounterHTOff": "0,1,2,3",
++ "EventCode": "0xC6",
++ "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
++ "MSRIndex": "0x3F7",
++ "MSRValue": "0x1",
++ "PEBS": "1",
++ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
++ "SampleAfterValue": "100007",
++ "TakenAlone": "1",
++ "UMask": "0x1"
++ },
++ {
++ "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+@@ -38,7 +52,7 @@
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PEBS": "1",
+- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
++ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+index 6f29b02fa320c..60c286b4fe54c 100644
+--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
++++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+@@ -299,7 +299,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00491",
++ "MSRValue": "0x83FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -312,7 +312,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00491",
++ "MSRValue": "0x63FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -325,7 +325,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000491",
++ "MSRValue": "0x604000491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -338,7 +338,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800491",
++ "MSRValue": "0x63B800491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -377,7 +377,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00490",
++ "MSRValue": "0x83FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -390,7 +390,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00490",
++ "MSRValue": "0x63FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -403,7 +403,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000490",
++ "MSRValue": "0x604000490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -416,7 +416,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800490",
++ "MSRValue": "0x63B800490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -455,7 +455,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00120",
++ "MSRValue": "0x83FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -468,7 +468,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00120",
++ "MSRValue": "0x63FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -481,7 +481,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000120",
++ "MSRValue": "0x604000120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -494,7 +494,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800120",
++ "MSRValue": "0x63B800120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -533,7 +533,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00122",
++ "MSRValue": "0x83FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -546,7 +546,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00122",
++ "MSRValue": "0x63FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -559,7 +559,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000122",
++ "MSRValue": "0x604000122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -572,7 +572,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800122",
++ "MSRValue": "0x63B800122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -611,7 +611,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00004",
++ "MSRValue": "0x83FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -624,7 +624,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00004",
++ "MSRValue": "0x63FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -637,7 +637,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000004",
++ "MSRValue": "0x604000004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -650,7 +650,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800004",
++ "MSRValue": "0x63B800004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -689,7 +689,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00001",
++ "MSRValue": "0x83FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -702,7 +702,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00001",
++ "MSRValue": "0x63FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -715,7 +715,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000001",
++ "MSRValue": "0x604000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -728,7 +728,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800001",
++ "MSRValue": "0x63B800001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -767,7 +767,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00002",
++ "MSRValue": "0x83FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -780,7 +780,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00002",
++ "MSRValue": "0x63FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -793,7 +793,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000002",
++ "MSRValue": "0x604000002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -806,7 +806,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800002",
++ "MSRValue": "0x63B800002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -845,7 +845,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00400",
++ "MSRValue": "0x83FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -858,7 +858,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00400",
++ "MSRValue": "0x63FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -871,7 +871,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000400",
++ "MSRValue": "0x604000400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -884,7 +884,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800400",
++ "MSRValue": "0x63B800400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -923,7 +923,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00010",
++ "MSRValue": "0x83FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -936,7 +936,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00010",
++ "MSRValue": "0x63FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -949,7 +949,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000010",
++ "MSRValue": "0x604000010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -962,7 +962,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800010",
++ "MSRValue": "0x63B800010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1001,7 +1001,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00020",
++ "MSRValue": "0x83FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1014,7 +1014,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00020",
++ "MSRValue": "0x63FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1027,7 +1027,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000020",
++ "MSRValue": "0x604000020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1040,7 +1040,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800020",
++ "MSRValue": "0x63B800020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1079,7 +1079,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00080",
++ "MSRValue": "0x83FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1092,7 +1092,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00080",
++ "MSRValue": "0x63FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1105,7 +1105,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000080",
++ "MSRValue": "0x604000080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1118,7 +1118,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800080",
++ "MSRValue": "0x63B800080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1157,7 +1157,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x083FC00100",
++ "MSRValue": "0x83FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1170,7 +1170,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063FC00100",
++ "MSRValue": "0x63FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1183,7 +1183,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x0604000100",
++ "MSRValue": "0x604000100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+@@ -1196,7 +1196,7 @@
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+- "MSRValue": "0x063B800100",
++ "MSRValue": "0x63B800100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+index ca57481206660..12eabae3e2242 100644
+--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
++++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+@@ -435,6 +435,17 @@
+ "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "SampleAfterValue": "2000003"
+ },
++ {
++ "BriefDescription": "Number of all retired NOP instructions.",
++ "Counter": "0,1,2,3",
++ "CounterHTOff": "0,1,2,3,4,5,6,7",
++ "Errata": "SKL091, SKL044",
++ "EventCode": "0xC0",
++ "EventName": "INST_RETIRED.NOP",
++ "PEBS": "1",
++ "SampleAfterValue": "2000003",
++ "UMask": "0x2"
++ },
+ {
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "Counter": "1",
+diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+index 863c9e103969e..b016f7d1ff3de 100644
+--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
++++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+@@ -1,26 +1,167 @@
+ [
++ {
++ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
++ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)",
++ "MetricGroup": "TopdownL1",
++ "MetricName": "Frontend_Bound",
++ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
++ },
++ {
++ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
++ "MetricGroup": "TopdownL1_SMT",
++ "MetricName": "Frontend_Bound_SMT",
++ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
++ {
++ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
++ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
++ "MetricGroup": "TopdownL1",
++ "MetricName": "Bad_Speculation",
++ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
++ },
++ {
++ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
++ "MetricGroup": "TopdownL1_SMT",
++ "MetricName": "Bad_Speculation_SMT",
++ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
++ {
++ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
++ "MetricConstraint": "NO_NMI_WATCHDOG",
++ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
++ "MetricGroup": "TopdownL1",
++ "MetricName": "Backend_Bound",
++ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
++ },
++ {
++ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
++ "MetricGroup": "TopdownL1_SMT",
++ "MetricName": "Backend_Bound_SMT",
++ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
++ {
++ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
++ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)",
++ "MetricGroup": "TopdownL1",
++ "MetricName": "Retiring",
++ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. "
++ },
++ {
++ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
++ "MetricGroup": "TopdownL1_SMT",
++ "MetricName": "Retiring_SMT",
++ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
++ {
++ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
++ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) )",
++ "MetricGroup": "Bad;BadSpec;BrMispredicts",
++ "MetricName": "Mispredictions"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
++ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
++ "MetricGroup": "Bad;BadSpec;BrMispredicts_SMT",
++ "MetricName": "Mispredictions_SMT"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (OFFCORE_REQUESTS_BUFFER.SQ_FULL / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
++ "MetricGroup": "Mem;MemoryBW;Offcore",
++ "MetricName": "Memory_Bandwidth"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
++ "MetricGroup": "Mem;MemoryBW;Offcore_SMT",
++ "MetricName": "Memory_Bandwidth_SMT"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) )",
++ "MetricGroup": "Mem;MemoryLat;Offcore",
++ "MetricName": "Memory_Latency"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) )",
++ "MetricGroup": "Mem;MemoryLat;Offcore_SMT",
++ "MetricName": "Memory_Latency_SMT"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / CPU_CLK_UNHALTED.THREAD) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
++ "MetricGroup": "Mem;MemoryTLB",
++ "MetricName": "Memory_Data_TLBs"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
++ "MetricGroup": "Mem;MemoryTLB;_SMT",
++ "MetricName": "Memory_Data_TLBs_SMT"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
++ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * CPU_CLK_UNHALTED.THREAD))",
++ "MetricGroup": "Ret",
++ "MetricName": "Branching_Overhead"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
++ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
++ "MetricGroup": "Ret_SMT",
++ "MetricName": "Branching_Overhead_SMT"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
++ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))",
++ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB",
++ "MetricName": "Big_Code"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
++ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
++ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB_SMT",
++ "MetricName": "Big_Code_SMT"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
++ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
++ "MetricGroup": "Fed;FetchBW;Frontend",
++ "MetricName": "Instruction_Fetch_BW"
++ },
++ {
++ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
++ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
++ "MetricGroup": "Fed;FetchBW;Frontend_SMT",
++ "MetricName": "Instruction_Fetch_BW_SMT"
++ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+- "MetricGroup": "Summary",
++ "MetricGroup": "Ret;Summary",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+- "MetricGroup": "Pipeline;Retire",
++ "MetricGroup": "Pipeline;Ret;Retire",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Instruction per taken branch",
+- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+- "MetricGroup": "Branches;FetchBW;PGO",
+- "MetricName": "IpTB"
++ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
++ "MetricGroup": "Branches;Fed;FetchBW",
++ "MetricName": "UpTB"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
+- "MetricGroup": "Pipeline",
++ "MetricGroup": "Pipeline;Mem",
+ "MetricName": "CPI"
+ },
+ {
+@@ -30,39 +171,84 @@
+ "MetricName": "CLKS"
+ },
+ {
+- "BriefDescription": "Instructions Per Cycle (per physical core)",
++ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
++ "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD",
++ "MetricGroup": "TmaL1",
++ "MetricName": "SLOTS"
++ },
++ {
++ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
++ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
++ "MetricGroup": "TmaL1_SMT",
++ "MetricName": "SLOTS_SMT"
++ },
++ {
++ "BriefDescription": "The ratio of Executed- by Issued-Uops",
++ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
++ "MetricGroup": "Cor;Pipeline",
++ "MetricName": "Execute_per_Issue",
++ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
++ },
++ {
++ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+- "MetricGroup": "SMT;TmaL1",
++ "MetricGroup": "Ret;SMT;TmaL1",
+ "MetricName": "CoreIPC"
+ },
+ {
+- "BriefDescription": "Instructions Per Cycle (per physical core)",
++ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+- "MetricGroup": "SMT;TmaL1",
++ "MetricGroup": "Ret;SMT;TmaL1_SMT",
+ "MetricName": "CoreIPC_SMT"
+ },
+ {
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD",
+- "MetricGroup": "Flops",
++ "MetricGroup": "Ret;Flops",
+ "MetricName": "FLOPc"
+ },
+ {
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+- "MetricGroup": "Flops_SMT",
++ "MetricGroup": "Ret;Flops_SMT",
+ "MetricName": "FLOPc_SMT"
+ },
++ {
++ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)",
++ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
++ "MetricGroup": "Cor;Flops;HPC",
++ "MetricName": "FP_Arith_Utilization",
++ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting."
++ },
++ {
++ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
++ "MetricGroup": "Cor;Flops;HPC_SMT",
++ "MetricName": "FP_Arith_Utilization_SMT",
++ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+- "MetricGroup": "Pipeline;PortsUtil",
++ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+ "MetricName": "ILP"
+ },
++ {
++ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
++ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
++ "MetricGroup": "Bad;BrMispredicts",
++ "MetricName": "Branch_Misprediction_Cost"
++ },
++ {
++ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
++ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
++ "MetricGroup": "Bad;BrMispredicts_SMT",
++ "MetricName": "Branch_Misprediction_Cost_SMT"
++ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+- "MetricGroup": "BrMispredicts",
++ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "IpMispredict"
+ },
+ {
+@@ -86,122 +272,249 @@
+ {
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+- "MetricGroup": "Branches;InsType",
++ "MetricGroup": "Branches;Fed;InsType",
+ "MetricName": "IpBranch"
+ },
+ {
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+- "MetricGroup": "Branches",
++ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "IpCall"
+ },
++ {
++ "BriefDescription": "Instruction per taken branch",
++ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
++ "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO",
++ "MetricName": "IpTB"
++ },
+ {
+ "BriefDescription": "Branch instructions per taken branch. ",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+- "MetricGroup": "Branches;PGO",
++ "MetricGroup": "Branches;Fed;PGO",
+ "MetricName": "BpTkBranch"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+- "MetricGroup": "Flops;FpArith;InsType",
++ "MetricGroup": "Flops;InsType",
+ "MetricName": "IpFLOP"
+ },
++ {
++ "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
++ "MetricExpr": "INST_RETIRED.ANY / ( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) )",
++ "MetricGroup": "Flops;InsType",
++ "MetricName": "IpArith",
++ "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
++ },
++ {
++ "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
++ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
++ "MetricGroup": "Flops;FpScalar;InsType",
++ "MetricName": "IpArith_Scalar_SP",
++ "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
++ },
++ {
++ "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
++ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
++ "MetricGroup": "Flops;FpScalar;InsType",
++ "MetricName": "IpArith_Scalar_DP",
++ "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
++ },
++ {
++ "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
++ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )",
++ "MetricGroup": "Flops;FpVector;InsType",
++ "MetricName": "IpArith_AVX128",
++ "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
++ },
++ {
++ "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
++ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
++ "MetricGroup": "Flops;FpVector;InsType",
++ "MetricName": "IpArith_AVX256",
++ "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
++ },
++ {
++ "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
++ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
++ "MetricGroup": "Flops;FpVector;InsType",
++ "MetricName": "IpArith_AVX512",
++ "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
++ },
+ {
+ "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary;TmaL1",
+ "MetricName": "Instructions"
+ },
++ {
++ "BriefDescription": "Average number of Uops issued by front-end when it issued something",
++ "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
++ "MetricGroup": "Fed;FetchBW",
++ "MetricName": "Fetch_UpC"
++ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+- "MetricGroup": "DSB;FetchBW",
++ "MetricGroup": "DSB;Fed;FetchBW",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
++ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
++ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / CPU_CLK_UNHALTED.THREAD / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
++ "MetricGroup": "DSBmiss;Fed",
++ "MetricName": "DSB_Misses_Cost"
++ },
++ {
++ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
++ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
++ "MetricGroup": "DSBmiss;Fed_SMT",
++ "MetricName": "DSB_Misses_Cost_SMT"
++ },
++ {
++ "BriefDescription": "Number of Instructions per non-speculative DSB miss",
++ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
++ "MetricGroup": "DSBmiss;Fed",
++ "MetricName": "IpDSB_Miss_Ret"
++ },
++ {
++ "BriefDescription": "Fraction of branches that are non-taken conditionals",
++ "MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
++ "MetricGroup": "Bad;Branches;CodeGen;PGO",
++ "MetricName": "Cond_NT"
++ },
++ {
++ "BriefDescription": "Fraction of branches that are taken conditionals",
++ "MetricExpr": "( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) / BR_INST_RETIRED.ALL_BRANCHES",
++ "MetricGroup": "Bad;Branches;CodeGen;PGO",
++ "MetricName": "Cond_TK"
++ },
++ {
++ "BriefDescription": "Fraction of branches that are CALL or RET",
++ "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES",
++ "MetricGroup": "Bad;Branches",
++ "MetricName": "CallRet"
++ },
++ {
++ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
++ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
++ "MetricGroup": "Bad;Branches",
++ "MetricName": "Jump"
++ },
++ {
++ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+- "MetricGroup": "MemoryBound;MemoryLat",
+- "MetricName": "Load_Miss_Real_Latency"
++ "MetricGroup": "Mem;MemoryBound;MemoryLat",
++ "MetricName": "Load_Miss_Real_Latency",
++ "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+- "MetricGroup": "MemoryBound;MemoryBW",
++ "MetricGroup": "Mem;MemoryBound;MemoryBW",
+ "MetricName": "MLP"
+ },
+- {
+- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+- "MetricConstraint": "NO_NMI_WATCHDOG",
+- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )",
+- "MetricGroup": "MemoryTLB",
+- "MetricName": "Page_Walks_Utilization"
+- },
+ {
+ "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+- "MetricGroup": "MemoryBW",
++ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L1D_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+- "MetricGroup": "MemoryBW",
++ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L2_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+- "MetricGroup": "MemoryBW",
++ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L3_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+- "MetricGroup": "MemoryBW;Offcore",
++ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "L3_Cache_Access_BW"
+ },
+ {
+ "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+- "MetricGroup": "CacheMisses",
++ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L1MPKI"
+ },
++ {
++ "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
++ "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
++ "MetricGroup": "Mem;CacheMisses",
++ "MetricName": "L1MPKI_Load"
++ },
+ {
+ "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+- "MetricGroup": "CacheMisses",
++ "MetricGroup": "Mem;Backend;CacheMisses",
+ "MetricName": "L2MPKI"
+ },
+ {
+ "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+ "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+- "MetricGroup": "CacheMisses;Offcore",
++ "MetricGroup": "Mem;CacheMisses;Offcore",
+ "MetricName": "L2MPKI_All"
+ },
++ {
++ "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)",
++ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
++ "MetricGroup": "Mem;CacheMisses",
++ "MetricName": "L2MPKI_Load"
++ },
+ {
+ "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+ "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+- "MetricGroup": "CacheMisses",
++ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L2HPKI_All"
+ },
++ {
++ "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
++ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
++ "MetricGroup": "Mem;CacheMisses",
++ "MetricName": "L2HPKI_Load"
++ },
+ {
+ "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+- "MetricGroup": "CacheMisses",
++ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L3MPKI"
+ },
++ {
++ "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads",
++ "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
++ "MetricGroup": "Mem;CacheMisses",
++ "MetricName": "FB_HPKI"
++ },
++ {
++ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
++ "MetricConstraint": "NO_NMI_WATCHDOG",
++ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
++ "MetricGroup": "Mem;MemoryTLB",
++ "MetricName": "Page_Walks_Utilization"
++ },
++ {
++ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
++ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
++ "MetricGroup": "Mem;MemoryTLB_SMT",
++ "MetricName": "Page_Walks_Utilization_SMT"
++ },
+ {
+ "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+ "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
+- "MetricGroup": "L2Evicts;Server",
++ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "L2_Evictions_Silent_PKI"
+ },
+ {
+ "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+ "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
+- "MetricGroup": "L2Evicts;Server",
++ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "L2_Evictions_NonSilent_PKI"
+ },
+ {
+@@ -219,7 +532,7 @@
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
+- "MetricGroup": "Flops;HPC",
++ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "GFLOPs"
+ },
+ {
+@@ -228,6 +541,48 @@
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
++ {
++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
++ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
++ "MetricGroup": "Power",
++ "MetricName": "Power_License0_Utilization",
++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
++ },
++ {
++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
++ "MetricGroup": "Power_SMT",
++ "MetricName": "Power_License0_Utilization_SMT",
++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
++ {
++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
++ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
++ "MetricGroup": "Power",
++ "MetricName": "Power_License1_Utilization",
++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
++ },
++ {
++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
++ "MetricGroup": "Power_SMT",
++ "MetricName": "Power_License1_Utilization_SMT",
++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
++ {
++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
++ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
++ "MetricGroup": "Power",
++ "MetricName": "Power_License2_Utilization",
++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
++ },
++ {
++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). SMT version; use when SMT is enabled and measuring per logical CPU.",
++ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
++ "MetricGroup": "Power_SMT",
++ "MetricName": "Power_License2_Utilization_SMT",
++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
++ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
+ "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+@@ -240,34 +595,46 @@
+ "MetricGroup": "OS",
+ "MetricName": "Kernel_Utilization"
+ },
++ {
++ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
++ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
++ "MetricGroup": "OS",
++ "MetricName": "Kernel_CPI"
++ },
+ {
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+- "MetricGroup": "HPC;MemoryBW;SoC",
++ "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricName": "DRAM_BW_Use"
+ },
+ {
+ "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
+ "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+- "MetricGroup": "MemoryLat;SoC",
++ "MetricGroup": "Mem;MemoryLat;SoC",
+ "MetricName": "MEM_Read_Latency"
+ },
+ {
+ "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+ "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
+- "MetricGroup": "MemoryBW;SoC",
++ "MetricGroup": "Mem;MemoryBW;SoC",
+ "MetricName": "MEM_Parallel_Reads"
+ },
++ {
++ "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
++ "MetricExpr": "1000000000 * ( UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS ) / imc_0@event\\=0x0@",
++ "MetricGroup": "Mem;MemoryLat;SoC;Server",
++ "MetricName": "MEM_DRAM_Read_Latency"
++ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
+- "MetricGroup": "IoBW;SoC;Server",
++ "MetricGroup": "IoBW;Mem;SoC;Server",
+ "MetricName": "IO_Write_BW"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
+- "MetricGroup": "IoBW;SoC;Server",
++ "MetricGroup": "IoBW;Mem;SoC;Server",
+ "MetricName": "IO_Read_BW"
+ },
+ {
+diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
+index 6ed92bc5c129b..06c5ca26ca3f3 100644
+--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
++++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
+@@ -537,6 +537,18 @@
+ "PublicDescription": "Counts clockticks of the 1GHz trafiic controller clock in the IIO unit.",
+ "Unit": "IIO"
+ },
++ {
++ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3",
++ "Counter": "0,1,2,3",
++ "EventCode": "0xC2",
++ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS",
++ "FCMask": "0x4",
++ "PerPkg": "1",
++ "PortMask": "0x0f",
++ "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3",
++ "UMask": "0x03",
++ "Unit": "IIO"
++ },
+ {
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
+ "Counter": "0,1,2,3",
+@@ -585,6 +597,17 @@
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
++ {
++ "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3",
++ "Counter": "2,3",
++ "EventCode": "0xD5",
++ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS",
++ "FCMask": "0x04",
++ "PerPkg": "1",
++ "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3",
++ "UMask": "0x0f",
++ "Unit": "IIO"
++ },
+ {
+ "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0",
+ "Counter": "2,3",
+diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
+index 7c887d37b8934..1db4df106c433 100644
+--- a/tools/perf/pmu-events/jevents.c
++++ b/tools/perf/pmu-events/jevents.c
+@@ -604,7 +604,7 @@ static int json_events(const char *fn,
+ } else if (json_streq(map, field, "ExtSel")) {
+ char *code = NULL;
+ addfield(map, &code, "", "", val);
+- eventcode |= strtoul(code, NULL, 0) << 21;
++ eventcode |= strtoul(code, NULL, 0) << 8;
+ free(code);
+ } else if (json_streq(map, field, "EventName")) {
+ addfield(map, &je.name, "", "", val);
+diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
+index 1d3a189a9a547..ed6f614f2724d 100644
+--- a/tools/perf/scripts/python/intel-pt-events.py
++++ b/tools/perf/scripts/python/intel-pt-events.py
+@@ -11,7 +11,7 @@
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ # more details.
+
+-from __future__ import print_function
++from __future__ import division, print_function
+
+ import os
+ import sys
+@@ -32,8 +32,7 @@ try:
+ except:
+ broken_pipe_exception = IOError
+
+-glb_switch_str = None
+-glb_switch_printed = True
++glb_switch_str = {}
+ glb_insn = False
+ glb_disassembler = None
+ glb_src = False
+@@ -70,6 +69,7 @@ def trace_begin():
+ ap = argparse.ArgumentParser(usage = "", add_help = False)
+ ap.add_argument("--insn-trace", action='store_true')
+ ap.add_argument("--src-trace", action='store_true')
++ ap.add_argument("--all-switch-events", action='store_true')
+ global glb_args
+ global glb_insn
+ global glb_src
+@@ -256,10 +256,6 @@ def print_srccode(comm, param_dict, sample, symbol, dso, with_insn):
+ print(start_str, src_str)
+
+ def do_process_event(param_dict):
+- global glb_switch_printed
+- if not glb_switch_printed:
+- print(glb_switch_str)
+- glb_switch_printed = True
+ event_attr = param_dict["attr"]
+ sample = param_dict["sample"]
+ raw_buf = param_dict["raw_buf"]
+@@ -274,6 +270,11 @@ def do_process_event(param_dict):
+ dso = get_optional(param_dict, "dso")
+ symbol = get_optional(param_dict, "symbol")
+
++ cpu = sample["cpu"]
++ if cpu in glb_switch_str:
++ print(glb_switch_str[cpu])
++ del glb_switch_str[cpu]
++
+ if name[0:12] == "instructions":
+ if glb_src:
+ print_srccode(comm, param_dict, sample, symbol, dso, True)
+@@ -336,8 +337,6 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
+ sys.exit(1)
+
+ def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x):
+- global glb_switch_printed
+- global glb_switch_str
+ if out:
+ out_str = "Switch out "
+ else:
+@@ -350,6 +349,10 @@ def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_pree
+ machine_str = ""
+ else:
+ machine_str = "machine PID %d" % machine_pid
+- glb_switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \
++ switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \
+ (out_str, pid, tid, cpu, ts / 1000000000, ts %1000000000, np_pid, np_tid, machine_str, preempt_str)
+- glb_switch_printed = False
++ if glb_args.all_switch_events:
++ print(switch_str);
++ else:
++ global glb_switch_str
++ glb_switch_str[cpu] = switch_str
+diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
+index da7dc5e45d0cf..4d6d3e3142008 100644
+--- a/tools/perf/tests/builtin-test.c
++++ b/tools/perf/tests/builtin-test.c
+@@ -828,6 +828,9 @@ int cmd_test(int argc, const char **argv)
+ if (ret < 0)
+ return ret;
+
++ /* Unbuffered output */
++ setvbuf(stdout, NULL, _IONBF, 0);
++
+ argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
+ if (argc >= 1 && !strcmp(argv[0], "list"))
+ return perf_test__list(argc - 1, argv + 1);
+diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
+index 7c56bc1f4cff0..89d25befb1711 100644
+--- a/tools/perf/tests/perf-time-to-tsc.c
++++ b/tools/perf/tests/perf-time-to-tsc.c
+@@ -20,8 +20,6 @@
+ #include "tsc.h"
+ #include "mmap.h"
+ #include "tests.h"
+-#include "pmu.h"
+-#include "pmu-hybrid.h"
+
+ #define CHECK__(x) { \
+ while ((x) < 0) { \
+@@ -84,18 +82,8 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
+
+ evlist__config(evlist, &opts, NULL);
+
+- evsel = evlist__first(evlist);
+-
+- evsel->core.attr.comm = 1;
+- evsel->core.attr.disabled = 1;
+- evsel->core.attr.enable_on_exec = 0;
+-
+- /*
+- * For hybrid "cycles:u", it creates two events.
+- * Init the second evsel here.
+- */
+- if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) {
+- evsel = evsel__next(evsel);
++ /* For hybrid "cycles:u", it creates two events */
++ evlist__for_each_entry(evlist, evsel) {
+ evsel->core.attr.comm = 1;
+ evsel->core.attr.disabled = 1;
+ evsel->core.attr.enable_on_exec = 0;
+@@ -141,10 +129,12 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
+ goto next_event;
+
+ if (strcmp(event->comm.comm, comm1) == 0) {
++ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
+ CHECK__(evsel__parse_sample(evsel, event, &sample));
+ comm1_time = sample.time;
+ }
+ if (strcmp(event->comm.comm, comm2) == 0) {
++ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
+ CHECK__(evsel__parse_sample(evsel, event, &sample));
+ comm2_time = sample.time;
+ }
+diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
+index 8a168cf8bacca..49bd875d51227 100755
+--- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh
++++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
+@@ -12,7 +12,7 @@ skip_if_no_z_record() {
+
+ collect_z_record() {
+ echo "Collecting compressed record file:"
+- [[ "$(uname -m)" != s390x ]] && gflag='-g'
++ [ "$(uname -m)" != s390x ] && gflag='-g'
+ $perf_tool record -o $trace_file $gflag -z -F 5000 -- \
+ dd count=500 if=/dev/urandom of=/dev/null
+ }
+diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+new file mode 100644
+index 0000000000000..319f36ebb9a40
+--- /dev/null
++++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+@@ -0,0 +1,83 @@
++#!/bin/bash
++# test perf probe of function from different CU
++# SPDX-License-Identifier: GPL-2.0
++
++set -e
++
++# skip if there's no gcc
++if ! [ -x "$(command -v gcc)" ]; then
++ echo "failed: no gcc compiler"
++ exit 2
++fi
++
++temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)
++
++cleanup()
++{
++ trap - EXIT TERM INT
++ if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
++ echo "--- Cleaning up ---"
++ perf probe -x ${temp_dir}/testfile -d foo || true
++ rm -f "${temp_dir}/"*
++ rmdir "${temp_dir}"
++ fi
++}
++
++trap_cleanup()
++{
++ cleanup
++ exit 1
++}
++
++trap trap_cleanup EXIT TERM INT
++
++cat > ${temp_dir}/testfile-foo.h << EOF
++struct t
++{
++ int *p;
++ int c;
++};
++
++extern int foo (int i, struct t *t);
++EOF
++
++cat > ${temp_dir}/testfile-foo.c << EOF
++#include "testfile-foo.h"
++
++int
++foo (int i, struct t *t)
++{
++ int j, res = 0;
++ for (j = 0; j < i && j < t->c; j++)
++ res += t->p[j];
++
++ return res;
++}
++EOF
++
++cat > ${temp_dir}/testfile-main.c << EOF
++#include "testfile-foo.h"
++
++static struct t g;
++
++int
++main (int argc, char **argv)
++{
++ int i;
++ int j[argc];
++ g.c = argc;
++ g.p = j;
++ for (i = 0; i < argc; i++)
++ j[i] = (int) argv[i][0];
++ return foo (3, &g);
++}
++EOF
++
++gcc -g -Og -flto -c ${temp_dir}/testfile-foo.c -o ${temp_dir}/testfile-foo.o
++gcc -g -Og -c ${temp_dir}/testfile-main.c -o ${temp_dir}/testfile-main.o
++gcc -g -Og -o ${temp_dir}/testfile ${temp_dir}/testfile-foo.o ${temp_dir}/testfile-main.o
++
++perf probe -x ${temp_dir}/testfile --funcs foo
++perf probe -x ${temp_dir}/testfile foo
++
++cleanup
+diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
+index 62c0ec21aaa86..72abf5d86f712 100644
+--- a/tools/perf/tests/switch-tracking.c
++++ b/tools/perf/tests/switch-tracking.c
+@@ -324,6 +324,7 @@ out_free_nodes:
+ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+ {
+ const char *sched_switch = "sched:sched_switch";
++ const char *cycles = "cycles:u";
+ struct switch_tracking switch_tracking = { .tids = NULL, };
+ struct record_opts opts = {
+ .mmap_pages = UINT_MAX,
+@@ -372,12 +373,19 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
+ cpu_clocks_evsel = evlist__last(evlist);
+
+ /* Second event */
+- if (perf_pmu__has_hybrid())
+- err = parse_events(evlist, "cpu_core/cycles/u", NULL);
+- else
+- err = parse_events(evlist, "cycles:u", NULL);
++ if (perf_pmu__has_hybrid()) {
++ cycles = "cpu_core/cycles/u";
++ err = parse_events(evlist, cycles, NULL);
++ if (err) {
++ cycles = "cpu_atom/cycles/u";
++ pr_debug("Trying %s\n", cycles);
++ err = parse_events(evlist, cycles, NULL);
++ }
++ } else {
++ err = parse_events(evlist, cycles, NULL);
++ }
+ if (err) {
+- pr_debug("Failed to parse event cycles:u\n");
++ pr_debug("Failed to parse event %s\n", cycles);
+ goto out_err;
+ }
+
+diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
+index e81c2493efdf9..44ba900828f6c 100644
+--- a/tools/perf/ui/browsers/annotate.c
++++ b/tools/perf/ui/browsers/annotate.c
+@@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
+ .opts = opts,
+ };
+ int ret = -1, err;
++ int not_annotated = list_empty(&notes->src->source);
+
+ if (sym == NULL)
+ return -1;
+@@ -973,13 +974,15 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
+ if (ms->map->dso->annotate_warned)
+ return -1;
+
+- err = symbol__annotate2(ms, evsel, opts, &browser.arch);
+- if (err) {
+- char msg[BUFSIZ];
+- ms->map->dso->annotate_warned = true;
+- symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
+- ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+- goto out_free_offsets;
++ if (not_annotated) {
++ err = symbol__annotate2(ms, evsel, opts, &browser.arch);
++ if (err) {
++ char msg[BUFSIZ];
++ ms->map->dso->annotate_warned = true;
++ symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
++ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
++ goto out_free_offsets;
++ }
+ }
+
+ ui_helpline__push("Press ESC to exit");
+@@ -994,9 +997,11 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
+
+ ret = annotate_browser__run(&browser, evsel, hbt);
+
+- annotated_source__purge(notes->src);
++ if(not_annotated)
++ annotated_source__purge(notes->src);
+
+ out_free_offsets:
+- zfree(&notes->offsets);
++ if(not_annotated)
++ zfree(&notes->offsets);
+ return ret;
+ }
+diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
+index b72ee68222228..fd3e67d2c6bdd 100644
+--- a/tools/perf/ui/browsers/hists.c
++++ b/tools/perf/ui/browsers/hists.c
+@@ -407,11 +407,6 @@ static bool hist_browser__selection_has_children(struct hist_browser *browser)
+ return container_of(ms, struct callchain_list, ms)->has_children;
+ }
+
+-static bool hist_browser__he_selection_unfolded(struct hist_browser *browser)
+-{
+- return browser->he_selection ? browser->he_selection->unfolded : false;
+-}
+-
+ static bool hist_browser__selection_unfolded(struct hist_browser *browser)
+ {
+ struct hist_entry *he = browser->he_selection;
+@@ -584,8 +579,8 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+ return n;
+ }
+
+-static void __hist_entry__set_folding(struct hist_entry *he,
+- struct hist_browser *hb, bool unfold)
++static void hist_entry__set_folding(struct hist_entry *he,
++ struct hist_browser *hb, bool unfold)
+ {
+ hist_entry__init_have_children(he);
+ he->unfolded = unfold ? he->has_children : false;
+@@ -603,34 +598,12 @@ static void __hist_entry__set_folding(struct hist_entry *he,
+ he->nr_rows = 0;
+ }
+
+-static void hist_entry__set_folding(struct hist_entry *he,
+- struct hist_browser *browser, bool unfold)
+-{
+- double percent;
+-
+- percent = hist_entry__get_percent_limit(he);
+- if (he->filtered || percent < browser->min_pcnt)
+- return;
+-
+- __hist_entry__set_folding(he, browser, unfold);
+-
+- if (!he->depth || unfold)
+- browser->nr_hierarchy_entries++;
+- if (he->leaf)
+- browser->nr_callchain_rows += he->nr_rows;
+- else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+- browser->nr_hierarchy_entries++;
+- he->has_no_entry = true;
+- he->nr_rows = 1;
+- } else
+- he->has_no_entry = false;
+-}
+-
+ static void
+ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
+ {
+ struct rb_node *nd;
+ struct hist_entry *he;
++ double percent;
+
+ nd = rb_first_cached(&browser->hists->entries);
+ while (nd) {
+@@ -640,6 +613,21 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
+ nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+ hist_entry__set_folding(he, browser, unfold);
++
++ percent = hist_entry__get_percent_limit(he);
++ if (he->filtered || percent < browser->min_pcnt)
++ continue;
++
++ if (!he->depth || unfold)
++ browser->nr_hierarchy_entries++;
++ if (he->leaf)
++ browser->nr_callchain_rows += he->nr_rows;
++ else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
++ browser->nr_hierarchy_entries++;
++ he->has_no_entry = true;
++ he->nr_rows = 1;
++ } else
++ he->has_no_entry = false;
+ }
+ }
+
+@@ -659,8 +647,10 @@ static void hist_browser__set_folding_selected(struct hist_browser *browser, boo
+ if (!browser->he_selection)
+ return;
+
+- hist_entry__set_folding(browser->he_selection, browser, unfold);
+- browser->b.nr_entries = hist_browser__nr_entries(browser);
++ if (unfold == browser->he_selection->unfolded)
++ return;
++
++ hist_browser__toggle_fold(browser);
+ }
+
+ static void ui_browser__warn_lost_events(struct ui_browser *browser)
+@@ -732,8 +722,8 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l
+ hist_browser__set_folding(browser, true);
+ break;
+ case 'e':
+- /* Expand the selected entry. */
+- hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser));
++ /* Toggle expand/collapse the selected entry. */
++ hist_browser__toggle_fold(browser);
+ break;
+ case 'H':
+ browser->show_headers = !browser->show_headers;
+@@ -1779,7 +1769,7 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser)
+ hists_browser__scnprintf_hierarchy_headers(browser, headers,
+ sizeof(headers));
+
+- ui_browser__gotorc(&browser->b, 0, 0);
++ ui_browser__gotorc_title(&browser->b, 0, 0);
+ ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+ }
+diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
+index c1f24d0048527..5075ecead5f3d 100644
+--- a/tools/perf/ui/hist.c
++++ b/tools/perf/ui/hist.c
+@@ -535,6 +535,18 @@ struct perf_hpp_list perf_hpp_list = {
+ #undef __HPP_SORT_ACC_FN
+ #undef __HPP_SORT_RAW_FN
+
++static void fmt_free(struct perf_hpp_fmt *fmt)
++{
++ /*
++ * At this point fmt should be completely
++ * unhooked, if not it's a bug.
++ */
++ BUG_ON(!list_empty(&fmt->list));
++ BUG_ON(!list_empty(&fmt->sort_list));
++
++ if (fmt->free)
++ fmt->free(fmt);
++}
+
+ void perf_hpp__init(void)
+ {
+@@ -598,9 +610,10 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+ list_add(&format->sort_list, &list->sorts);
+ }
+
+-void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
++static void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+ {
+ list_del_init(&format->list);
++ fmt_free(format);
+ }
+
+ void perf_hpp__cancel_cumulate(void)
+@@ -672,19 +685,6 @@ next:
+ }
+
+
+-static void fmt_free(struct perf_hpp_fmt *fmt)
+-{
+- /*
+- * At this point fmt should be completely
+- * unhooked, if not it's a bug.
+- */
+- BUG_ON(!list_empty(&fmt->list));
+- BUG_ON(!list_empty(&fmt->sort_list));
+-
+- if (fmt->free)
+- fmt->free(fmt);
+-}
+-
+ void perf_hpp__reset_output_field(struct perf_hpp_list *list)
+ {
+ struct perf_hpp_fmt *fmt, *tmp;
+diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
+index 0bae061b2d6d4..60b232eba5c82 100644
+--- a/tools/perf/util/annotate.c
++++ b/tools/perf/util/annotate.c
+@@ -1694,6 +1694,7 @@ fallback:
+ #include <bpf/btf.h>
+ #include <bpf/libbpf.h>
+ #include <linux/btf.h>
++#include <tools/dis-asm-compat.h>
+
+ static int symbol__disassemble_bpf(struct symbol *sym,
+ struct annotate_args *args)
+@@ -1728,17 +1729,20 @@ static int symbol__disassemble_bpf(struct symbol *sym,
+ perf_exe(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+- assert(bfdf);
+- assert(bfd_check_format(bfdf, bfd_object));
++ if (bfdf == NULL)
++ abort();
++
++ if (!bfd_check_format(bfdf, bfd_object))
++ abort();
+
+ s = open_memstream(&buf, &buf_size);
+ if (!s) {
+ ret = errno;
+ goto out;
+ }
+- init_disassemble_info(&info, s,
+- (fprintf_ftype) fprintf);
+-
++ init_disassemble_info_compat(&info, s,
++ (fprintf_ftype) fprintf,
++ fprintf_styled);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+
+@@ -1777,7 +1781,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
+ #else
+ disassemble = disassembler(bfdf);
+ #endif
+- assert(disassemble);
++ if (disassemble == NULL)
++ abort();
+
+ fflush(s);
+ do {
+diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
+index 58b7069c5a5f8..569e1b8ad0abc 100644
+--- a/tools/perf/util/arm-spe.c
++++ b/tools/perf/util/arm-spe.c
+@@ -51,6 +51,7 @@ struct arm_spe {
+ u8 timeless_decoding;
+ u8 data_queued;
+
++ u64 sample_type;
+ u8 sample_flc;
+ u8 sample_llc;
+ u8 sample_tlb;
+@@ -248,6 +249,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
+ event->sample.header.size = sizeof(struct perf_event_header);
+ }
+
++static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
++{
++ event->header.size = perf_event__sample_event_size(sample, type, 0);
++ return perf_event__synthesize_sample(event, type, 0, sample);
++}
++
+ static inline int
+ arm_spe_deliver_synth_event(struct arm_spe *spe,
+ struct arm_spe_queue *speq __maybe_unused,
+@@ -256,6 +263,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe,
+ {
+ int ret;
+
++ if (spe->synth_opts.inject) {
++ ret = arm_spe__inject_event(event, sample, spe->sample_type);
++ if (ret)
++ return ret;
++ }
++
+ ret = perf_session__deliver_synth_event(spe->session, event, sample);
+ if (ret)
+ pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
+@@ -299,26 +312,16 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+ }
+
+-#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
+- ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
+- ARM_SPE_REMOTE_ACCESS)
+-
+-static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
+-{
+- if (type & SPE_MEM_TYPE)
+- return true;
+-
+- return false;
+-}
+-
+ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
+ {
+ union perf_mem_data_src data_src = { 0 };
+
+ if (record->op == ARM_SPE_LD)
+ data_src.mem_op = PERF_MEM_OP_LOAD;
+- else
++ else if (record->op == ARM_SPE_ST)
+ data_src.mem_op = PERF_MEM_OP_STORE;
++ else
++ return 0;
+
+ if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
+ data_src.mem_lvl = PERF_MEM_LVL_L3;
+@@ -422,7 +425,11 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
+ return err;
+ }
+
+- if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
++ /*
++ * When data_src is zero it means the record is not a memory operation,
++ * skip to synthesize memory sample for this case.
++ */
++ if (spe->sample_memory && data_src) {
+ err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
+ if (err)
+ return err;
+@@ -914,12 +921,15 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+- PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
++ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
++ PERF_SAMPLE_ADDR;
+ if (spe->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+
++ spe->sample_type = attr.sample_type;
++
+ attr.exclude_user = evsel->core.attr.exclude_user;
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
+diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
+index 8d2865b9ade20..4461a29e27719 100644
+--- a/tools/perf/util/auxtrace.c
++++ b/tools/perf/util/auxtrace.c
+@@ -1107,6 +1107,9 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
+ if (auxtrace__dont_decode(session))
+ return 0;
+
++ if (perf_data__is_pipe(session->data))
++ return 0;
++
+ if (!session->auxtrace || !session->auxtrace->queue_data)
+ return -EINVAL;
+
+@@ -2260,11 +2263,19 @@ struct sym_args {
+ bool near;
+ };
+
++static bool kern_sym_name_match(const char *kname, const char *name)
++{
++ size_t n = strlen(name);
++
++ return !strcmp(kname, name) ||
++ (!strncmp(kname, name, n) && kname[n] == '\t');
++}
++
+ static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+ {
+ /* A function with the same name, and global or the n'th found or any */
+ return kallsyms__is_function(type) &&
+- !strcmp(name, args->name) &&
++ kern_sym_name_match(name, args->name) &&
+ ((args->global && isupper(type)) ||
+ (args->selected && ++(args->cnt) == args->idx) ||
+ (!args->global && !args->selected));
+@@ -2367,6 +2378,7 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+ char type, u64 start)
+ {
+ struct sym_args *args = arg;
++ u64 size;
+
+ if (!kallsyms__is_function(type))
+ return 0;
+@@ -2376,7 +2388,9 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+ args->start = start;
+ }
+ /* Don't know exactly where the kernel ends, so we add a page */
+- args->size = round_up(start, page_size) + page_size - args->start;
++ size = round_up(start, page_size) + page_size - args->start;
++ if (size > args->size)
++ args->size = size;
+
+ return 0;
+ }
+@@ -2537,7 +2551,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+ *size = sym->start - *start;
+ if (idx > 0) {
+ if (*size)
+- return 1;
++ return 0;
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ print_duplicate_syms(dso, sym_name);
+ return -EINVAL;
+diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
+index 1a7112a87736a..cf1b9f6ec0dbe 100644
+--- a/tools/perf/util/bpf-event.c
++++ b/tools/perf/util/bpf-event.c
+@@ -21,7 +21,8 @@
+ #include "record.h"
+ #include "util/synthetic-events.h"
+
+-struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
++#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
++struct btf *btf__load_from_kernel_by_id(__u32 id)
+ {
+ struct btf *btf;
+ #pragma GCC diagnostic push
+@@ -31,6 +32,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
+
+ return err ? ERR_PTR(err) : btf;
+ }
++#endif
+
+ #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+
+@@ -120,7 +122,11 @@ static int perf_env__fetch_btf(struct perf_env *env,
+ node->data_size = data_size;
+ memcpy(node->data, data, data_size);
+
+- perf_env__insert_btf(env, node);
++ if (!perf_env__insert_btf(env, node)) {
++ /* Insertion failed because of a duplicate. */
++ free(node);
++ return -1;
++ }
+ return 0;
+ }
+
+@@ -576,7 +582,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+ synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0);
+ fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n",
+ info->id, name, prog_addrs[0], prog_lens[0]);
+- return;
++ goto out;
+ }
+
+ fprintf(fp, "# bpf_prog_info %u:\n", info->id);
+@@ -586,4 +592,6 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+ fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n",
+ i, name, prog_addrs[i], prog_lens[i]);
+ }
++out:
++ btf__free(btf);
+ }
+diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
+index fbb3c4057c302..71710a1da4472 100644
+--- a/tools/perf/util/bpf-loader.c
++++ b/tools/perf/util/bpf-loader.c
+@@ -1214,9 +1214,10 @@ bpf__obj_config_map(struct bpf_object *obj,
+ pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+ err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+ out:
+- free(map_name);
+ if (!err)
+ *key_scan_pos += strlen(map_opt);
++
++ free(map_name);
+ return err;
+ }
+
+diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h
+index 65ebaa6694fbd..4b5dda7530c4d 100644
+--- a/tools/perf/util/bpf_counter.h
++++ b/tools/perf/util/bpf_counter.h
+@@ -4,9 +4,12 @@
+
+ #include <linux/list.h>
+ #include <sys/resource.h>
++
++#ifdef HAVE_LIBBPF_SUPPORT
+ #include <bpf/bpf.h>
+ #include <bpf/btf.h>
+ #include <bpf/libbpf.h>
++#endif
+
+ struct evsel;
+ struct target;
+@@ -87,6 +90,8 @@ static inline void set_max_rlimit(void)
+ setrlimit(RLIMIT_MEMLOCK, &rinf);
+ }
+
++#ifdef HAVE_BPF_SKEL
++
+ static inline __u32 bpf_link_get_id(int fd)
+ {
+ struct bpf_link_info link_info = { .id = 0, };
+@@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu)
+
+ return bpf_prog_test_run_opts(prog_fd, &opts);
+ }
++#endif /* HAVE_BPF_SKEL */
+
+ #endif /* __PERF_BPF_COUNTER_H */
+diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h
+deleted file mode 100644
+index 186a5551ddb9d..0000000000000
+--- a/tools/perf/util/bpf_skel/bperf.h
++++ /dev/null
+@@ -1,14 +0,0 @@
+-// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+-// Copyright (c) 2021 Facebook
+-
+-#ifndef __BPERF_STAT_H
+-#define __BPERF_STAT_H
+-
+-typedef struct {
+- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+- __uint(key_size, sizeof(__u32));
+- __uint(value_size, sizeof(struct bpf_perf_event_value));
+- __uint(max_entries, 1);
+-} reading_map;
+-
+-#endif /* __BPERF_STAT_H */
+diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+index 292c430768b52..c72f8ad96f751 100644
+--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
++++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+@@ -176,7 +176,7 @@ static int bperf_cgroup_count(void)
+ }
+
+ // This will be attached to cgroup-switches event for each cpu
+-SEC("perf_events")
++SEC("perf_event")
+ int BPF_PROG(on_cgrp_switch)
+ {
+ return bperf_cgroup_count();
+diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
+index b8fa3cb2da230..6d2ea67b161ac 100644
+--- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c
++++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
+@@ -4,11 +4,21 @@
+ #include <linux/perf_event.h>
+ #include <bpf/bpf_helpers.h>
+ #include <bpf/bpf_tracing.h>
+-#include "bperf.h"
+ #include "bperf_u.h"
+
+-reading_map diff_readings SEC(".maps");
+-reading_map accum_readings SEC(".maps");
++struct {
++ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++ __uint(key_size, sizeof(__u32));
++ __uint(value_size, sizeof(struct bpf_perf_event_value));
++ __uint(max_entries, 1);
++} diff_readings SEC(".maps");
++
++struct {
++ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++ __uint(key_size, sizeof(__u32));
++ __uint(value_size, sizeof(struct bpf_perf_event_value));
++ __uint(max_entries, 1);
++} accum_readings SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
+index 4f70d1459e86c..d82e1633a2e0a 100644
+--- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c
++++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
+@@ -4,7 +4,6 @@
+ #include <linux/perf_event.h>
+ #include <bpf/bpf_helpers.h>
+ #include <bpf/bpf_tracing.h>
+-#include "bperf.h"
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+@@ -13,8 +12,19 @@ struct {
+ __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+ } events SEC(".maps");
+
+-reading_map prev_readings SEC(".maps");
+-reading_map diff_readings SEC(".maps");
++struct {
++ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++ __uint(key_size, sizeof(__u32));
++ __uint(value_size, sizeof(struct bpf_perf_event_value));
++ __uint(max_entries, 1);
++} prev_readings SEC(".maps");
++
++struct {
++ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
++ __uint(key_size, sizeof(__u32));
++ __uint(value_size, sizeof(struct bpf_perf_event_value));
++ __uint(max_entries, 1);
++} diff_readings SEC(".maps");
+
+ SEC("raw_tp/sched_switch")
+ int BPF_PROG(on_switch)
+diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
+index e32e8f2ff3bd7..1d7c53873dd2d 100644
+--- a/tools/perf/util/build-id.c
++++ b/tools/perf/util/build-id.c
+@@ -872,6 +872,30 @@ out_free:
+ return err;
+ }
+
++static int filename__read_build_id_ns(const char *filename,
++ struct build_id *bid,
++ struct nsinfo *nsi)
++{
++ struct nscookie nsc;
++ int ret;
++
++ nsinfo__mountns_enter(nsi, &nsc);
++ ret = filename__read_build_id(filename, bid);
++ nsinfo__mountns_exit(&nsc);
++
++ return ret;
++}
++
++static bool dso__build_id_mismatch(struct dso *dso, const char *name)
++{
++ struct build_id bid;
++
++ if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0)
++ return false;
++
++ return !dso__build_id_equal(dso, &bid);
++}
++
+ static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+ void *priv __maybe_unused)
+ {
+@@ -886,6 +910,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+ is_kallsyms = true;
+ name = machine->mmap_name;
+ }
++
++ if (!is_kallsyms && dso__build_id_mismatch(dso, name))
++ return 0;
++
+ return build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
+ is_kallsyms, is_vdso);
+ }
+diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
+index e99b41f9be45a..cd978c240e0dd 100644
+--- a/tools/perf/util/cgroup.c
++++ b/tools/perf/util/cgroup.c
+@@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus
+ return 0;
+ }
+
++static int check_and_add_cgroup_name(const char *fpath)
++{
++ struct cgroup_name *cn;
++
++ list_for_each_entry(cn, &cgroup_list, list) {
++ if (!strcmp(cn->name, fpath))
++ return 0;
++ }
++
++ /* pretend if it's added by ftw() */
++ return add_cgroup_name(fpath, NULL, FTW_D, NULL);
++}
++
+ static void release_cgroup_list(void)
+ {
+ struct cgroup_name *cn;
+@@ -242,7 +255,7 @@ static int list_cgroups(const char *str)
+ struct cgroup_name *cn;
+ char *s;
+
+- /* use given name as is - for testing purpose */
++ /* use given name as is when no regex is given */
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+@@ -253,13 +266,13 @@ static int list_cgroups(const char *str)
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+- /* pretend if it's added by ftw() */
+- ret = add_cgroup_name(s, NULL, FTW_D, NULL);
++
++ ret = check_and_add_cgroup_name(s);
+ free(s);
+- if (ret)
++ if (ret < 0)
+ return -1;
+ } else {
+- if (add_cgroup_name("", NULL, FTW_D, NULL) < 0)
++ if (check_and_add_cgroup_name("/") < 0)
+ return -1;
+ }
+
+diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
+index f5d260b1df4d1..090a76be522bb 100644
+--- a/tools/perf/util/data.c
++++ b/tools/perf/util/data.c
+@@ -44,10 +44,6 @@ int perf_data__create_dir(struct perf_data *data, int nr)
+ if (!files)
+ return -ENOMEM;
+
+- data->dir.version = PERF_DIR_VERSION;
+- data->dir.files = files;
+- data->dir.nr = nr;
+-
+ for (i = 0; i < nr; i++) {
+ struct perf_data_file *file = &files[i];
+
+@@ -62,6 +58,9 @@ int perf_data__create_dir(struct perf_data *data, int nr)
+ file->fd = ret;
+ }
+
++ data->dir.version = PERF_DIR_VERSION;
++ data->dir.files = files;
++ data->dir.nr = nr;
+ return 0;
+
+ out_err:
+@@ -128,6 +127,7 @@ int perf_data__open_dir(struct perf_data *data)
+ file->size = st.st_size;
+ }
+
++ closedir(dir);
+ if (!files)
+ return -EINVAL;
+
+@@ -136,6 +136,7 @@ int perf_data__open_dir(struct perf_data *data)
+ return 0;
+
+ out_err:
++ closedir(dir);
+ close_dir(files, nr);
+ return ret;
+ }
+diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
+index c9de82af5584e..1402d9657ef27 100644
+--- a/tools/perf/util/data.h
++++ b/tools/perf/util/data.h
+@@ -4,6 +4,7 @@
+
+ #include <stdio.h>
+ #include <stdbool.h>
++#include <linux/types.h>
+
+ enum perf_data_mode {
+ PERF_DATA_MODE_WRITE,
+diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
+index 2c06abf6dcd26..190e818a07176 100644
+--- a/tools/perf/util/debug.c
++++ b/tools/perf/util/debug.c
+@@ -179,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op,
+ break;
+ case BINARY_PRINT_CHAR_DATA:
+ printed += color_fprintf(fp, color, "%c",
+- isprint(ch) ? ch : '.');
++ isprint(ch) && isascii(ch) ? ch : '.');
+ break;
+ case BINARY_PRINT_CHAR_PAD:
+ printed += color_fprintf(fp, color, " ");
+@@ -241,6 +241,10 @@ int perf_quiet_option(void)
+ opt++;
+ }
+
++ /* For debug variables that are used as bool types, set to 0. */
++ redirect_to_stderr = 0;
++ debug_peo_args = 0;
++
+ return 0;
+ }
+
+diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
+index 183a81d5b2f92..2db91121bdafe 100644
+--- a/tools/perf/util/dsos.c
++++ b/tools/perf/util/dsos.c
+@@ -20,8 +20,19 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
+ if (a->ino > b->ino) return -1;
+ if (a->ino < b->ino) return 1;
+
+- if (a->ino_generation > b->ino_generation) return -1;
+- if (a->ino_generation < b->ino_generation) return 1;
++ /*
++ * Synthesized MMAP events have zero ino_generation, avoid comparing
++ * them with MMAP events with actual ino_generation.
++ *
++ * I found it harmful because the mismatch resulted in a new
++ * dso that did not have a build ID whereas the original dso did have a
++ * build ID. The build ID was essential because the object was not found
++ * otherwise. - Adrian
++ */
++ if (a->ino_generation && b->ino_generation) {
++ if (a->ino_generation > b->ino_generation) return -1;
++ if (a->ino_generation < b->ino_generation) return 1;
++ }
+
+ return 0;
+ }
+diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
+index 609ca16715018..b125eaadcec4d 100644
+--- a/tools/perf/util/dwarf-aux.c
++++ b/tools/perf/util/dwarf-aux.c
+@@ -308,26 +308,13 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ {
+ Dwarf_Attribute attr;
+
+- if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formudata(&attr, result) != 0)
+ return -ENOENT;
+
+ return 0;
+ }
+
+-/* Get attribute and translate it as a sdata */
+-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+- Dwarf_Sword *result)
+-{
+- Dwarf_Attribute attr;
+-
+- if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+- dwarf_formsdata(&attr, result) != 0)
+- return -ENOENT;
+-
+- return 0;
+-}
+-
+ /**
+ * die_is_signed_type - Check whether a type DIE is signed or not
+ * @tp_die: a DIE of a type
+@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+ /* Get the call file index number in CU DIE */
+ static int die_get_call_fileno(Dwarf_Die *in_die)
+ {
+- Dwarf_Sword idx;
++ Dwarf_Word idx;
+
+- if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
++ if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+@@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die)
+ /* Get the declared file index number in CU DIE */
+ static int die_get_decl_fileno(Dwarf_Die *pdie)
+ {
+- Dwarf_Sword idx;
++ Dwarf_Word idx;
+
+- if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
++ if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+@@ -1094,7 +1081,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
+ ret = die_get_typename(vr_die, buf);
+ if (ret < 0) {
+ pr_debug("Failed to get type, make it unknown.\n");
+- ret = strbuf_add(buf, " (unknown_type)", 14);
++ ret = strbuf_add(buf, "(unknown_type)", 14);
+ }
+
+ return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
+index cf773f0dec384..5b24eb010336c 100644
+--- a/tools/perf/util/env.c
++++ b/tools/perf/util/env.c
+@@ -74,12 +74,13 @@ out:
+ return node;
+ }
+
+-void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
++bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+ {
+ struct rb_node *parent = NULL;
+ __u32 btf_id = btf_node->id;
+ struct btf_node *node;
+ struct rb_node **p;
++ bool ret = true;
+
+ down_write(&env->bpf_progs.lock);
+ p = &env->bpf_progs.btfs.rb_node;
+@@ -93,6 +94,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+ p = &(*p)->rb_right;
+ } else {
+ pr_debug("duplicated btf %u\n", btf_id);
++ ret = false;
+ goto out;
+ }
+ }
+@@ -102,6 +104,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+ env->bpf_progs.btfs_cnt++;
+ out:
+ up_write(&env->bpf_progs.lock);
++ return ret;
+ }
+
+ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
+index 1383876f72b37..163e5ec503a26 100644
+--- a/tools/perf/util/env.h
++++ b/tools/perf/util/env.h
+@@ -167,7 +167,7 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env,
+ struct bpf_prog_info_node *info_node);
+ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+ __u32 prog_id);
+-void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
++bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
+
+ int perf_env__numa_node(struct perf_env *env, int cpu);
+diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
+index 7c554234b43d4..f39c8ffc5a111 100644
+--- a/tools/perf/util/evlist-hybrid.c
++++ b/tools/perf/util/evlist-hybrid.c
+@@ -153,8 +153,8 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
+ perf_cpu_map__put(matched_cpus);
+ perf_cpu_map__put(unmatched_cpus);
+ }
+-
+- ret = (unmatched_count == events_nr) ? -1 : 0;
++ if (events_nr)
++ ret = (unmatched_count == events_nr) ? -1 : 0;
+ out:
+ perf_cpu_map__put(cpus);
+ return ret;
+diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
+index 5f92319ce258d..39d294f6c3218 100644
+--- a/tools/perf/util/evlist.c
++++ b/tools/perf/util/evlist.c
+@@ -342,36 +342,65 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
+ return perf_thread_map__nr(evlist->core.threads);
+ }
+
+-void evlist__cpu_iter_start(struct evlist *evlist)
+-{
+- struct evsel *pos;
+-
+- /*
+- * Reset the per evsel cpu_iter. This is needed because
+- * each evsel's cpumap may have a different index space,
+- * and some operations need the index to modify
+- * the FD xyarray (e.g. open, close)
+- */
+- evlist__for_each_entry(evlist, pos)
+- pos->cpu_iter = 0;
+-}
++struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
++{
++ struct evlist_cpu_iterator itr = {
++ .container = evlist,
++ .evsel = evlist__first(evlist),
++ .cpu_map_idx = 0,
++ .evlist_cpu_map_idx = 0,
++ .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
++ .cpu = -1,
++ .affinity = affinity,
++ };
+
+-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
+-{
+- if (ev->cpu_iter >= ev->core.cpus->nr)
+- return true;
+- if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
+- return true;
+- return false;
++ if (itr.affinity) {
++ itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
++ affinity__set(itr.affinity, itr.cpu);
++ itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
++ /*
++ * If this CPU isn't in the evsel's cpu map then advance through
++ * the list.
++ */
++ if (itr.cpu_map_idx == -1)
++ evlist_cpu_iterator__next(&itr);
++ }
++ return itr;
++}
++
++void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr)
++{
++ while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) {
++ evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel);
++ evlist_cpu_itr->cpu_map_idx =
++ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
++ evlist_cpu_itr->cpu);
++ if (evlist_cpu_itr->cpu_map_idx != -1)
++ return;
++ }
++ evlist_cpu_itr->evlist_cpu_map_idx++;
++ if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) {
++ evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container);
++ evlist_cpu_itr->cpu =
++ perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus,
++ evlist_cpu_itr->evlist_cpu_map_idx);
++ if (evlist_cpu_itr->affinity)
++ affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu);
++ evlist_cpu_itr->cpu_map_idx =
++ perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
++ evlist_cpu_itr->cpu);
++ /*
++ * If this CPU isn't in the evsel's cpu map then advance through
++ * the list.
++ */
++ if (evlist_cpu_itr->cpu_map_idx == -1)
++ evlist_cpu_iterator__next(evlist_cpu_itr);
++ }
+ }
+
+-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
++bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr)
+ {
+- if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
+- ev->cpu_iter++;
+- return false;
+- }
+- return true;
++ return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr;
+ }
+
+ static int evsel__strcmp(struct evsel *pos, char *evsel_name)
+@@ -400,31 +429,26 @@ static int evlist__is_enabled(struct evlist *evlist)
+ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
+ {
+ struct evsel *pos;
++ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity affinity;
+- int cpu, i, imm = 0;
+ bool has_imm = false;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ /* Disable 'immediate' events last */
+- for (imm = 0; imm <= 1; imm++) {
+- evlist__for_each_cpu(evlist, i, cpu) {
+- affinity__set(&affinity, cpu);
+-
+- evlist__for_each_entry(evlist, pos) {
+- if (evsel__strcmp(pos, evsel_name))
+- continue;
+- if (evsel__cpu_iter_skip(pos, cpu))
+- continue;
+- if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+- continue;
+- if (pos->immediate)
+- has_imm = true;
+- if (pos->immediate != imm)
+- continue;
+- evsel__disable_cpu(pos, pos->cpu_iter - 1);
+- }
++ for (int imm = 0; imm <= 1; imm++) {
++ evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
++ pos = evlist_cpu_itr.evsel;
++ if (evsel__strcmp(pos, evsel_name))
++ continue;
++ if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
++ continue;
++ if (pos->immediate)
++ has_imm = true;
++ if (pos->immediate != imm)
++ continue;
++ evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
+ }
+ if (!has_imm)
+ break;
+@@ -462,24 +486,19 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
+ static void __evlist__enable(struct evlist *evlist, char *evsel_name)
+ {
+ struct evsel *pos;
++ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity affinity;
+- int cpu, i;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+- evlist__for_each_cpu(evlist, i, cpu) {
+- affinity__set(&affinity, cpu);
+-
+- evlist__for_each_entry(evlist, pos) {
+- if (evsel__strcmp(pos, evsel_name))
+- continue;
+- if (evsel__cpu_iter_skip(pos, cpu))
+- continue;
+- if (!evsel__is_group_leader(pos) || !pos->core.fd)
+- continue;
+- evsel__enable_cpu(pos, pos->cpu_iter - 1);
+- }
++ evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
++ pos = evlist_cpu_itr.evsel;
++ if (evsel__strcmp(pos, evsel_name))
++ continue;
++ if (!evsel__is_group_leader(pos) || !pos->core.fd)
++ continue;
++ evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
+ }
+ affinity__cleanup(&affinity);
+ evlist__for_each_entry(evlist, pos) {
+@@ -1264,8 +1283,8 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
+ void evlist__close(struct evlist *evlist)
+ {
+ struct evsel *evsel;
++ struct evlist_cpu_iterator evlist_cpu_itr;
+ struct affinity affinity;
+- int cpu, i;
+
+ /*
+ * With perf record core.cpus is usually NULL.
+@@ -1279,15 +1298,12 @@ void evlist__close(struct evlist *evlist)
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+- evlist__for_each_cpu(evlist, i, cpu) {
+- affinity__set(&affinity, cpu);
+
+- evlist__for_each_entry_reverse(evlist, evsel) {
+- if (evsel__cpu_iter_skip(evsel, cpu))
+- continue;
+- perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
+- }
++ evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
++ perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core,
++ evlist_cpu_itr.cpu_map_idx);
+ }
++
+ affinity__cleanup(&affinity);
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ perf_evsel__free_fd(&evsel->core);
+diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
+index 97bfb8d0be4f0..ec177f783ee67 100644
+--- a/tools/perf/util/evlist.h
++++ b/tools/perf/util/evlist.h
+@@ -325,17 +325,53 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
+ #define evlist__for_each_entry_safe(evlist, tmp, evsel) \
+ __evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
+
+-#define evlist__for_each_cpu(evlist, index, cpu) \
+- evlist__cpu_iter_start(evlist); \
+- perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
++/** Iterator state for evlist__for_each_cpu */
++struct evlist_cpu_iterator {
++ /** The list being iterated through. */
++ struct evlist *container;
++ /** The current evsel of the iterator. */
++ struct evsel *evsel;
++ /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */
++ int cpu_map_idx;
++ /**
++ * The CPU map index corresponding to evlist->core.all_cpus for the
++ * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may
++ * contain fewer entries.
++ */
++ int evlist_cpu_map_idx;
++ /** The number of CPU map entries in evlist->core.all_cpus. */
++ int evlist_cpu_map_nr;
++ /** The current CPU of the iterator. */
++ int cpu;
++ /** If present, used to set the affinity when switching between CPUs. */
++ struct affinity *affinity;
++};
++
++/**
++ * evlist__for_each_cpu - without affinity, iterate over the evlist. With
++ * affinity, iterate over all CPUs and then the evlist
++ * for each evsel on that CPU. When switching between
++ * CPUs the affinity is set to the CPU to avoid IPIs
++ * during syscalls.
++ * @evlist_cpu_itr: the iterator instance.
++ * @evlist: evlist instance to iterate.
++ * @affinity: NULL or used to set the affinity to the current CPU.
++ */
++#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \
++ for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \
++ !evlist_cpu_iterator__end(&evlist_cpu_itr); \
++ evlist_cpu_iterator__next(&evlist_cpu_itr))
++
++/** Returns an iterator set to the first CPU/evsel of evlist. */
++struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity);
++/** Move to next element in iterator, updating CPU, evsel and the affinity. */
++void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr);
++/** Returns true when iterator is at the end of the CPUs and evlist. */
++bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr);
+
+ struct evsel *evlist__get_tracking_event(struct evlist *evlist);
+ void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
+
+-void evlist__cpu_iter_start(struct evlist *evlist);
+-bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
+-bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
+-
+ struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
+
+ struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
+diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
+index dbfeceb2546c6..c87f9974c0c10 100644
+--- a/tools/perf/util/evsel.c
++++ b/tools/perf/util/evsel.c
+@@ -1047,6 +1047,17 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
+ evsel__set_sample_bit(evsel, WEIGHT);
+ }
+
++static void evsel__set_default_freq_period(struct record_opts *opts,
++ struct perf_event_attr *attr)
++{
++ if (opts->freq) {
++ attr->freq = 1;
++ attr->sample_freq = opts->freq;
++ } else {
++ attr->sample_period = opts->default_interval;
++ }
++}
++
+ /*
+ * The enable_on_exec/disabled value strategy:
+ *
+@@ -1113,14 +1124,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
+ * We default some events to have a default interval. But keep
+ * it a weak assumption overridable by the user.
+ */
+- if (!attr->sample_period) {
+- if (opts->freq) {
+- attr->freq = 1;
+- attr->sample_freq = opts->freq;
+- } else {
+- attr->sample_period = opts->default_interval;
+- }
+- }
++ if ((evsel->is_libpfm_event && !attr->sample_period) ||
++ (!evsel->is_libpfm_event && (!attr->sample_period ||
++ opts->user_freq != UINT_MAX ||
++ opts->user_interval != ULLONG_MAX)))
++ evsel__set_default_freq_period(opts, attr);
++
+ /*
+ * If attr->freq was set (here or earlier), ask for period
+ * to be sampled.
+diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
+index 1f7edfa8568a6..0492cafac4430 100644
+--- a/tools/perf/util/evsel.h
++++ b/tools/perf/util/evsel.h
+@@ -119,7 +119,6 @@ struct evsel {
+ bool errored;
+ struct hashmap *per_pkg_mask;
+ int err;
+- int cpu_iter;
+ struct {
+ evsel__sb_cb_t *cb;
+ void *data;
+@@ -249,6 +248,11 @@ static inline bool evsel__is_bpf(struct evsel *evsel)
+ return evsel->bpf_counter_ops != NULL;
+ }
+
++static inline bool evsel__is_bperf(struct evsel *evsel)
++{
++ return evsel->bpf_counter_ops != NULL && list_empty(&evsel->bpf_counter_list);
++}
++
+ #define EVSEL__MAX_ALIASES 8
+
+ extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
+diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
+index aed49806a09ba..02cd9f75e3d2f 100644
+--- a/tools/perf/util/genelf.c
++++ b/tools/perf/util/genelf.c
+@@ -30,7 +30,11 @@
+
+ #define BUILD_ID_URANDOM /* different uuid for each run */
+
+-#ifdef HAVE_LIBCRYPTO
++// FIXME, remove this and fix the deprecation warnings before its removed and
++// We'll break for good here...
++#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
++
++#ifdef HAVE_LIBCRYPTO_SUPPORT
+
+ #define BUILD_ID_MD5
+ #undef BUILD_ID_SHA /* does not seem to work well when linked with Java */
+@@ -247,6 +251,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Ehdr *ehdr;
++ Elf_Phdr *phdr;
+ Elf_Shdr *shdr;
+ uint64_t eh_frame_base_offset;
+ char *strsym = NULL;
+@@ -281,6 +286,19 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
+
++ /*
++ * setup program header
++ */
++ phdr = elf_newphdr(e, 1);
++ phdr[0].p_type = PT_LOAD;
++ phdr[0].p_offset = 0;
++ phdr[0].p_vaddr = 0;
++ phdr[0].p_paddr = 0;
++ phdr[0].p_filesz = csize;
++ phdr[0].p_memsz = csize;
++ phdr[0].p_flags = PF_X | PF_R;
++ phdr[0].p_align = 8;
++
+ /*
+ * setup text section
+ */
+diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
+index d4137559be053..ac638945b4cb0 100644
+--- a/tools/perf/util/genelf.h
++++ b/tools/perf/util/genelf.h
+@@ -50,8 +50,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
+
+ #if GEN_ELF_CLASS == ELFCLASS64
+ #define elf_newehdr elf64_newehdr
++#define elf_newphdr elf64_newphdr
+ #define elf_getshdr elf64_getshdr
+ #define Elf_Ehdr Elf64_Ehdr
++#define Elf_Phdr Elf64_Phdr
+ #define Elf_Shdr Elf64_Shdr
+ #define Elf_Sym Elf64_Sym
+ #define ELF_ST_TYPE(a) ELF64_ST_TYPE(a)
+@@ -59,8 +61,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
+ #define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a)
+ #else
+ #define elf_newehdr elf32_newehdr
++#define elf_newphdr elf32_newphdr
+ #define elf_getshdr elf32_getshdr
+ #define Elf_Ehdr Elf32_Ehdr
++#define Elf_Phdr Elf32_Phdr
+ #define Elf_Shdr Elf32_Shdr
+ #define Elf_Sym Elf32_Sym
+ #define ELF_ST_TYPE(a) ELF32_ST_TYPE(a)
+diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
+index 1c7414f666552..25947d0136038 100644
+--- a/tools/perf/util/header.c
++++ b/tools/perf/util/header.c
+@@ -4200,7 +4200,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct evlist **pevlist)
+ {
+- u32 i, ids, n_ids;
++ u32 i, n_ids;
++ u64 *ids;
+ struct evsel *evsel;
+ struct evlist *evlist = *pevlist;
+
+@@ -4216,9 +4217,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+
+ evlist__add(evlist, evsel);
+
+- ids = event->header.size;
+- ids -= (void *)&event->attr.id - (void *)event;
+- n_ids = ids / sizeof(u64);
++ n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size;
++ n_ids = n_ids / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+@@ -4227,8 +4227,9 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+ if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
+ return -ENOMEM;
+
++ ids = (void *)&event->attr.attr + event->attr.attr.size;
+ for (i = 0; i < n_ids; i++) {
+- perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]);
++ perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]);
+ }
+
+ return 0;
+diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
+index 65fe65ba03c25..b776465e04ef3 100644
+--- a/tools/perf/util/hist.c
++++ b/tools/perf/util/hist.c
+@@ -289,15 +289,10 @@ static long hist_time(unsigned long htime)
+ return htime;
+ }
+
+-static void he_stat__add_period(struct he_stat *he_stat, u64 period,
+- u64 weight, u64 ins_lat, u64 p_stage_cyc)
++static void he_stat__add_period(struct he_stat *he_stat, u64 period)
+ {
+-
+ he_stat->period += period;
+- he_stat->weight += weight;
+ he_stat->nr_events += 1;
+- he_stat->ins_lat += ins_lat;
+- he_stat->p_stage_cyc += p_stage_cyc;
+ }
+
+ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
+@@ -308,9 +303,6 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
+ dest->period_guest_sys += src->period_guest_sys;
+ dest->period_guest_us += src->period_guest_us;
+ dest->nr_events += src->nr_events;
+- dest->weight += src->weight;
+- dest->ins_lat += src->ins_lat;
+- dest->p_stage_cyc += src->p_stage_cyc;
+ }
+
+ static void he_stat__decay(struct he_stat *he_stat)
+@@ -598,9 +590,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
+ struct hist_entry *he;
+ int64_t cmp;
+ u64 period = entry->stat.period;
+- u64 weight = entry->stat.weight;
+- u64 ins_lat = entry->stat.ins_lat;
+- u64 p_stage_cyc = entry->stat.p_stage_cyc;
+ bool leftmost = true;
+
+ p = &hists->entries_in->rb_root.rb_node;
+@@ -619,11 +608,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
+
+ if (!cmp) {
+ if (sample_self) {
+- he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc);
++ he_stat__add_period(&he->stat, period);
+ hist_entry__add_callchain_period(he, period);
+ }
+ if (symbol_conf.cumulate_callchain)
+- he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc);
++ he_stat__add_period(he->stat_acc, period);
+
+ /*
+ * This mem info was allocated from sample__resolve_mem
+@@ -733,9 +722,6 @@ __hists__add_entry(struct hists *hists,
+ .stat = {
+ .nr_events = 1,
+ .period = sample->period,
+- .weight = sample->weight,
+- .ins_lat = sample->ins_lat,
+- .p_stage_cyc = sample->p_stage_cyc,
+ },
+ .parent = sym_parent,
+ .filtered = symbol__parent_filter(sym_parent) | al->filtered,
+@@ -748,6 +734,9 @@ __hists__add_entry(struct hists *hists,
+ .raw_size = sample->raw_size,
+ .ops = ops,
+ .time = hist_time(sample->time),
++ .weight = sample->weight,
++ .ins_lat = sample->ins_lat,
++ .p_stage_cyc = sample->p_stage_cyc,
+ }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
+
+ if (!hists->has_callchains && he && he->callchain_size != 0)
+diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
+index 5343b62476e60..621f35ae1efa5 100644
+--- a/tools/perf/util/hist.h
++++ b/tools/perf/util/hist.h
+@@ -369,7 +369,6 @@ enum {
+ };
+
+ void perf_hpp__init(void);
+-void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
+ void perf_hpp__cancel_cumulate(void);
+ void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+ void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
+index bc629359826fb..b41c2e9c6f887 100644
+--- a/tools/perf/util/intel-pt-decoder/Build
++++ b/tools/perf/util/intel-pt-decoder/Build
+@@ -18,3 +18,5 @@ CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+ ifeq ($(CC_NO_CLANG), 1)
+ CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+ endif
++
++CFLAGS_intel-pt-insn-decoder.o += -Wno-packed
+diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+index 5ab631702769b..b9f94f198a3eb 100644
+--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
++++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+@@ -137,6 +137,7 @@ struct intel_pt_decoder {
+ bool in_psb;
+ bool hop;
+ bool leap;
++ bool emulated_ptwrite;
+ bool vm_time_correlation;
+ bool vm_tm_corr_dry_run;
+ bool vm_tm_corr_reliable;
+@@ -473,6 +474,8 @@ static int intel_pt_ext_err(int code)
+ return INTEL_PT_ERR_LOST;
+ case -ELOOP:
+ return INTEL_PT_ERR_NELOOP;
++ case -ECONNRESET:
++ return INTEL_PT_ERR_EPTW;
+ default:
+ return INTEL_PT_ERR_UNK;
+ }
+@@ -489,6 +492,7 @@ static const char *intel_pt_err_msgs[] = {
+ [INTEL_PT_ERR_LOST] = "Lost trace data",
+ [INTEL_PT_ERR_UNK] = "Unknown error!",
+ [INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)",
++ [INTEL_PT_ERR_EPTW] = "Broken emulated ptwrite",
+ };
+
+ int intel_pt__strerror(int code, char *buf, size_t buflen)
+@@ -819,6 +823,9 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ return 0;
+
+ case INTEL_PT_MTC:
+@@ -1204,61 +1211,69 @@ out_no_progress:
+
+ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+ {
++ enum intel_pt_sample_type type = decoder->state.type;
+ bool ret = false;
+
++ decoder->state.type &= ~INTEL_PT_BRANCH;
++
+ if (decoder->set_fup_tx_flags) {
+ decoder->set_fup_tx_flags = false;
+ decoder->tx_flags = decoder->fup_tx_flags;
+- decoder->state.type = INTEL_PT_TRANSACTION;
++ decoder->state.type |= INTEL_PT_TRANSACTION;
+ if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+ decoder->state.type |= INTEL_PT_BRANCH;
+- decoder->state.from_ip = decoder->ip;
+- decoder->state.to_ip = 0;
+ decoder->state.flags = decoder->fup_tx_flags;
+- return true;
++ ret = true;
+ }
+ if (decoder->set_fup_ptw) {
+ decoder->set_fup_ptw = false;
+- decoder->state.type = INTEL_PT_PTW;
++ decoder->state.type |= INTEL_PT_PTW;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+- decoder->state.from_ip = decoder->ip;
+- decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->fup_ptw_payload;
+- return true;
++ ret = true;
+ }
+ if (decoder->set_fup_mwait) {
+ decoder->set_fup_mwait = false;
+- decoder->state.type = INTEL_PT_MWAIT_OP;
+- decoder->state.from_ip = decoder->ip;
+- decoder->state.to_ip = 0;
++ decoder->state.type |= INTEL_PT_MWAIT_OP;
+ decoder->state.mwait_payload = decoder->fup_mwait_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_pwre) {
+ decoder->set_fup_pwre = false;
+ decoder->state.type |= INTEL_PT_PWR_ENTRY;
+- decoder->state.type &= ~INTEL_PT_BRANCH;
+- decoder->state.from_ip = decoder->ip;
+- decoder->state.to_ip = 0;
+ decoder->state.pwre_payload = decoder->fup_pwre_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_exstop) {
+ decoder->set_fup_exstop = false;
+ decoder->state.type |= INTEL_PT_EX_STOP;
+- decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+- decoder->state.from_ip = decoder->ip;
+- decoder->state.to_ip = 0;
+ ret = true;
+ }
+ if (decoder->set_fup_bep) {
+ decoder->set_fup_bep = false;
+ decoder->state.type |= INTEL_PT_BLK_ITEMS;
+- decoder->state.type &= ~INTEL_PT_BRANCH;
++ ret = true;
++ }
++ if (decoder->overflow) {
++ decoder->overflow = false;
++ if (!ret && !decoder->pge) {
++ if (decoder->hop) {
++ decoder->state.type = 0;
++ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
++ }
++ decoder->pge = true;
++ decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN;
++ decoder->state.from_ip = 0;
++ decoder->state.to_ip = decoder->ip;
++ return true;
++ }
++ }
++ if (ret) {
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+- ret = true;
++ } else {
++ decoder->state.type = type;
+ }
+ return ret;
+ }
+@@ -1394,17 +1409,108 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
+ return intel_pt_bug(decoder);
+ }
+
++struct eptw_data {
++ int bit_countdown;
++ uint64_t payload;
++};
++
++static int intel_pt_eptw_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
++{
++ struct eptw_data *data = pkt_info->data;
++ int nr_bits;
++
++ switch (pkt_info->packet.type) {
++ case INTEL_PT_PAD:
++ case INTEL_PT_MNT:
++ case INTEL_PT_MODE_EXEC:
++ case INTEL_PT_MODE_TSX:
++ case INTEL_PT_MTC:
++ case INTEL_PT_FUP:
++ case INTEL_PT_CYC:
++ case INTEL_PT_CBR:
++ case INTEL_PT_TSC:
++ case INTEL_PT_TMA:
++ case INTEL_PT_PIP:
++ case INTEL_PT_VMCS:
++ case INTEL_PT_PSB:
++ case INTEL_PT_PSBEND:
++ case INTEL_PT_PTWRITE:
++ case INTEL_PT_PTWRITE_IP:
++ case INTEL_PT_EXSTOP:
++ case INTEL_PT_EXSTOP_IP:
++ case INTEL_PT_MWAIT:
++ case INTEL_PT_PWRE:
++ case INTEL_PT_PWRX:
++ case INTEL_PT_BBP:
++ case INTEL_PT_BIP:
++ case INTEL_PT_BEP:
++ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
++ break;
++
++ case INTEL_PT_TNT:
++ nr_bits = data->bit_countdown;
++ if (nr_bits > pkt_info->packet.count)
++ nr_bits = pkt_info->packet.count;
++ data->payload <<= nr_bits;
++ data->payload |= pkt_info->packet.payload >> (64 - nr_bits);
++ data->bit_countdown -= nr_bits;
++ return !data->bit_countdown;
++
++ case INTEL_PT_TIP_PGE:
++ case INTEL_PT_TIP_PGD:
++ case INTEL_PT_TIP:
++ case INTEL_PT_BAD:
++ case INTEL_PT_OVF:
++ case INTEL_PT_TRACESTOP:
++ default:
++ return 1;
++ }
++
++ return 0;
++}
++
++static int intel_pt_emulated_ptwrite(struct intel_pt_decoder *decoder)
++{
++ int n = 64 - decoder->tnt.count;
++ struct eptw_data data = {
++ .bit_countdown = n,
++ .payload = decoder->tnt.payload >> n,
++ };
++
++ decoder->emulated_ptwrite = false;
++ intel_pt_log("Emulated ptwrite detected\n");
++
++ intel_pt_pkt_lookahead(decoder, intel_pt_eptw_lookahead_cb, &data);
++ if (data.bit_countdown)
++ return -ECONNRESET;
++
++ decoder->state.type = INTEL_PT_PTW;
++ decoder->state.from_ip = decoder->ip;
++ decoder->state.to_ip = 0;
++ decoder->state.ptw_payload = data.payload;
++ return 0;
++}
++
+ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
+ {
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ while (1) {
++ if (decoder->emulated_ptwrite)
++ return intel_pt_emulated_ptwrite(decoder);
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+- if (err == INTEL_PT_RETURN)
++ if (err == INTEL_PT_RETURN) {
++ decoder->emulated_ptwrite = intel_pt_insn.emulated_ptwrite;
+ return 0;
+- if (err)
++ }
++ if (err) {
++ decoder->emulated_ptwrite = false;
+ return err;
++ }
+
+ if (intel_pt_insn.op == INTEL_PT_OP_RET) {
+ if (!decoder->return_compression) {
+@@ -1607,7 +1713,16 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
+ intel_pt_clear_tx_flags(decoder);
+ intel_pt_set_nr(decoder);
+ decoder->timestamp_insn_cnt = 0;
+- decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
++ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
++ decoder->state.from_ip = decoder->ip;
++ decoder->ip = 0;
++ decoder->pge = false;
++ decoder->set_fup_tx_flags = false;
++ decoder->set_fup_ptw = false;
++ decoder->set_fup_mwait = false;
++ decoder->set_fup_pwre = false;
++ decoder->set_fup_exstop = false;
++ decoder->set_fup_bep = false;
+ decoder->overflow = true;
+ return -EOVERFLOW;
+ }
+@@ -1744,6 +1859,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+
+ decoder->cbr = cbr;
+ decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
++ decoder->cyc_ref_timestamp = decoder->timestamp;
++ decoder->cycle_cnt = 0;
+
+ intel_pt_mtc_cyc_cnt_cbr(decoder);
+ }
+@@ -1855,6 +1972,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ err = -EAGAIN;
+@@ -1957,6 +2077,9 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ intel_pt_log("ERROR: Missing TIP after FUP\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+@@ -2116,6 +2239,9 @@ static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+ case INTEL_PT_TIP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TRACESTOP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ default:
+ return 1;
+ }
+@@ -2635,6 +2761,9 @@ static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder)
+ decoder->blk_type = 0;
+ break;
+
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MNT:
+@@ -2665,6 +2794,8 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
+ /* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
+ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
+ {
++ *err = 0;
++
+ /* Leap from PSB to PSB, getting ip from FUP within PSB+ */
+ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
+ *err = intel_pt_scan_for_psb(decoder);
+@@ -2677,6 +2808,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
+ return HOP_IGNORE;
+
+ case INTEL_PT_TIP_PGD:
++ decoder->pge = false;
+ if (!decoder->packet.count) {
+ intel_pt_set_nr(decoder);
+ return HOP_IGNORE;
+@@ -2704,18 +2836,21 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+- if (intel_pt_fup_event(decoder))
+- return HOP_RETURN;
+- if (!decoder->branch_enable)
++ if (decoder->set_fup_mwait || decoder->set_fup_pwre)
++ *no_tip = true;
++ if (!decoder->branch_enable || !decoder->pge)
+ *no_tip = true;
+ if (*no_tip) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
++ intel_pt_fup_event(decoder);
+ return HOP_RETURN;
+ }
++ intel_pt_fup_event(decoder);
++ decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH;
+ *err = intel_pt_walk_fup_tip(decoder);
+- if (!*err)
++ if (!*err && decoder->state.to_ip)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ return HOP_RETURN;
+
+@@ -2765,6 +2900,9 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ default:
+ return HOP_PROCESS;
+ }
+@@ -2833,6 +2971,9 @@ static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ if (data->after_psbend) {
+ data->after_psbend -= 1;
+ if (!data->after_psbend)
+@@ -2896,7 +3037,7 @@ static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err)
+ {
+ struct intel_pt_psb_info data = { .fup = false };
+
+- if (!decoder->branch_enable || !decoder->pge)
++ if (!decoder->branch_enable)
+ return false;
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data);
+@@ -2923,6 +3064,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
+ if (err)
+ return err;
+ next:
++ err = 0;
+ if (decoder->cyc_threshold) {
+ if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC)
+ decoder->sample_cyc = false;
+@@ -2961,6 +3103,7 @@ next:
+
+ case INTEL_PT_TIP_PGE: {
+ decoder->pge = true;
++ decoder->overflow = false;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ intel_pt_set_nr(decoder);
+ if (decoder->packet.count == 0) {
+@@ -2998,7 +3141,7 @@ next:
+ break;
+ }
+ intel_pt_set_last_ip(decoder);
+- if (!decoder->branch_enable) {
++ if (!decoder->branch_enable || !decoder->pge) {
+ decoder->ip = decoder->last_ip;
+ if (intel_pt_fup_event(decoder))
+ return 0;
+@@ -3197,6 +3340,11 @@ next:
+ }
+ goto next;
+
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
++ break;
++
+ default:
+ return intel_pt_bug(decoder);
+ }
+@@ -3239,6 +3387,9 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ intel_pt_log("ERROR: Unexpected packet\n");
+ err = -ENOENT;
+ goto out;
+@@ -3450,6 +3601,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ default:
+ break;
+ }
+@@ -3466,10 +3620,10 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
++ decoder->overflow = false;
+
+ if (!decoder->branch_enable) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+- decoder->overflow = false;
+ decoder->state.type = 0; /* Do not have a sample */
+ return 0;
+ }
+@@ -3484,7 +3638,6 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+- decoder->overflow = false;
+
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+@@ -3606,7 +3759,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
+ }
+
+ decoder->have_last_ip = true;
+- decoder->pkt_state = INTEL_PT_STATE_NO_IP;
++ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+@@ -3703,7 +3856,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
+
+ if (err) {
+ decoder->state.err = intel_pt_ext_err(err);
+- decoder->state.from_ip = decoder->ip;
++ if (err != -EOVERFLOW)
++ decoder->state.from_ip = decoder->ip;
+ intel_pt_update_sample_time(decoder);
+ decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+ intel_pt_set_nr(decoder);
+diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+index 4b5e79fcf557f..0a641aba3c7cb 100644
+--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
++++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+@@ -55,6 +55,7 @@ enum {
+ INTEL_PT_ERR_LOST,
+ INTEL_PT_ERR_UNK,
+ INTEL_PT_ERR_NELOOP,
++ INTEL_PT_ERR_EPTW,
+ INTEL_PT_ERR_MAX,
+ };
+
+diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+index 593f20e9774c0..9f29cf7210773 100644
+--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
++++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+@@ -32,6 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn,
+ int ext;
+
+ intel_pt_insn->rel = 0;
++ intel_pt_insn->emulated_ptwrite = false;
+
+ if (insn_is_avx(insn)) {
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
+diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
+index c2861cfdd768d..e3338b56a75f2 100644
+--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
++++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
+@@ -37,6 +37,7 @@ enum intel_pt_insn_branch {
+ struct intel_pt_insn {
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
++ bool emulated_ptwrite;
+ int length;
+ int32_t rel;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+index 02a3395d6ce3a..6ff97b6107b72 100644
+--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
++++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+@@ -64,6 +64,9 @@ static const char * const packet_name[] = {
+ [INTEL_PT_BIP] = "BIP",
+ [INTEL_PT_BEP] = "BEP",
+ [INTEL_PT_BEP_IP] = "BEP",
++ [INTEL_PT_CFE] = "CFE",
++ [INTEL_PT_CFE_IP] = "CFE",
++ [INTEL_PT_EVD] = "EVD",
+ };
+
+ const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
+@@ -329,6 +332,29 @@ static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
+ return 2;
+ }
+
++static int intel_pt_get_cfe(const unsigned char *buf, size_t len,
++ struct intel_pt_pkt *packet)
++{
++ if (len < 4)
++ return INTEL_PT_NEED_MORE_BYTES;
++ packet->type = buf[2] & 0x80 ? INTEL_PT_CFE_IP : INTEL_PT_CFE;
++ packet->count = buf[2] & 0x1f;
++ packet->payload = buf[3];
++ return 4;
++}
++
++static int intel_pt_get_evd(const unsigned char *buf, size_t len,
++ struct intel_pt_pkt *packet)
++{
++ if (len < 11)
++ return INTEL_PT_NEED_MORE_BYTES;
++ packet->type = INTEL_PT_EVD;
++ packet->count = buf[2] & 0x3f;
++ packet->payload = buf[3];
++ memcpy_le64(&packet->payload, buf + 3, 8);
++ return 11;
++}
++
+ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+ {
+@@ -375,6 +401,10 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+ return intel_pt_get_bep(len, packet);
+ case 0xb3: /* BEP with IP */
+ return intel_pt_get_bep_ip(len, packet);
++ case 0x13: /* CFE */
++ return intel_pt_get_cfe(buf, len, packet);
++ case 0x53: /* EVD */
++ return intel_pt_get_evd(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+@@ -624,6 +654,9 @@ void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ case INTEL_PT_EVD:
+ *ctx = INTEL_PT_NO_CTX;
+ break;
+ case INTEL_PT_BBP:
+@@ -751,6 +784,13 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
+ case INTEL_PT_BIP:
+ return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
+ name, packet->count, payload);
++ case INTEL_PT_CFE:
++ case INTEL_PT_CFE_IP:
++ return snprintf(buf, buf_len, "%s IP:%d Type 0x%02x Vector 0x%llx",
++ name, packet->type == INTEL_PT_CFE_IP, packet->count, payload);
++ case INTEL_PT_EVD:
++ return snprintf(buf, buf_len, "%s Type 0x%02x Payload 0x%llx",
++ name, packet->count, payload);
+ default:
+ break;
+ }
+diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+index 996090cb84f66..496ba4be875ca 100644
+--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
++++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+@@ -56,6 +56,9 @@ enum intel_pt_pkt_type {
+ INTEL_PT_BIP,
+ INTEL_PT_BEP,
+ INTEL_PT_BEP_IP,
++ INTEL_PT_CFE,
++ INTEL_PT_CFE_IP,
++ INTEL_PT_EVD,
+ };
+
+ struct intel_pt_pkt {
+diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
+index 6f852b305e92b..7a2ce387079e3 100644
+--- a/tools/perf/util/intel-pt.c
++++ b/tools/perf/util/intel-pt.c
+@@ -506,6 +506,7 @@ struct intel_pt_cache_entry {
+ u64 byte_cnt;
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
++ bool emulated_ptwrite;
+ int length;
+ int32_t rel;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+@@ -592,6 +593,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
+ e->byte_cnt = byte_cnt;
+ e->op = intel_pt_insn->op;
+ e->branch = intel_pt_insn->branch;
++ e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite;
+ e->length = intel_pt_insn->length;
+ e->rel = intel_pt_insn->rel;
+ memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
+@@ -678,6 +680,28 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq)
+ return 0;
+ }
+
++static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn)
++{
++ return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL;
++}
++
++#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite "
++#define PTWRITE_MAGIC_LEN 16
++
++static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset)
++{
++ unsigned char buf[PTWRITE_MAGIC_LEN];
++ ssize_t len;
++
++ len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN);
++ if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) {
++ intel_pt_log("Emulated ptwrite signature found\n");
++ return true;
++ }
++ intel_pt_log("Emulated ptwrite signature not found\n");
++ return false;
++}
++
+ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip,
+ uint64_t to_ip, uint64_t max_insn_cnt,
+@@ -740,6 +764,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+ *ip += e->byte_cnt;
+ intel_pt_insn->op = e->op;
+ intel_pt_insn->branch = e->branch;
++ intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite;
+ intel_pt_insn->length = e->length;
+ intel_pt_insn->rel = e->rel;
+ memcpy(intel_pt_insn->buf, e->insn,
+@@ -771,8 +796,18 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+
+ insn_cnt += 1;
+
+- if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
++ if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) {
++ bool eptw;
++ u64 offs;
++
++ if (!intel_pt_jmp_16(intel_pt_insn))
++ goto out;
++ /* Check for emulated ptwrite */
++ offs = offset + intel_pt_insn->length;
++ eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs);
++ intel_pt_insn->emulated_ptwrite = eptw;
+ goto out;
++ }
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ goto out_no_cache;
+@@ -2510,6 +2545,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
+ ptq->sync_switch = false;
+ intel_pt_next_tid(pt, ptq);
+ }
++ ptq->timestamp = state->est_timestamp;
+ if (pt->synth_opts.errors) {
+ err = intel_ptq_synth_error(ptq, state);
+ if (err)
+@@ -3539,6 +3575,7 @@ static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
+ *args = p;
+ return 0;
+ }
++ p += 1;
+ while (1) {
+ vmcs = strtoull(p, &p, 0);
+ if (errno)
+@@ -3589,6 +3626,7 @@ static const char * const intel_pt_info_fmts[] = {
+ [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+ [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
+ [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
++ [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n",
+ [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
+ [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
+ [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
+@@ -3603,8 +3641,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish)
+ if (!dump_trace)
+ return;
+
+- for (i = start; i <= finish; i++)
+- fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
++ for (i = start; i <= finish; i++) {
++ const char *fmt = intel_pt_info_fmts[i];
++
++ if (fmt)
++ fprintf(stdout, fmt, arr[i]);
++ }
+ }
+
+ static void intel_pt_print_info_str(const char *name, const char *str)
+@@ -3900,6 +3942,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
+
+ intel_pt_setup_pebs_events(pt);
+
++ if (perf_data__is_pipe(session->data)) {
++ pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n"
++ " The output cannot relied upon. In particular,\n"
++ " timestamps and the order of events may be incorrect.\n");
++ }
++
+ if (pt->sampling_mode || list_empty(&session->auxtrace_index))
+ err = auxtrace_queue_data(session, true, true);
+ else
+diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
+index 96c8ef60f4f84..8ee3a947b1599 100644
+--- a/tools/perf/util/llvm-utils.c
++++ b/tools/perf/util/llvm-utils.c
+@@ -531,14 +531,37 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
+
+ pr_debug("llvm compiling command template: %s\n", template);
+
++ /*
++ * Below, substitute control characters for values that can cause the
++ * echo to misbehave, then substitute the values back.
++ */
+ err = -ENOMEM;
+- if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0)
++ if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0)
+ goto errout;
+
++#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0)
++ for (char *p = command_echo; *p; p++) {
++ SWAP_CHAR('<', '\001');
++ SWAP_CHAR('>', '\002');
++ SWAP_CHAR('"', '\003');
++ SWAP_CHAR('\'', '\004');
++ SWAP_CHAR('|', '\005');
++ SWAP_CHAR('&', '\006');
++ SWAP_CHAR('\a', '"');
++ }
+ err = read_from_pipe(command_echo, (void **) &command_out, NULL);
+ if (err)
+ goto errout;
+
++ for (char *p = command_out; *p; p++) {
++ SWAP_CHAR('\001', '<');
++ SWAP_CHAR('\002', '>');
++ SWAP_CHAR('\003', '"');
++ SWAP_CHAR('\004', '\'');
++ SWAP_CHAR('\005', '|');
++ SWAP_CHAR('\006', '&');
++ }
++#undef SWAP_CHAR
+ pr_debug("llvm compiling command : %s\n", command_out);
+
+ err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
+diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
+index 44e40bad0e336..55a041329990c 100644
+--- a/tools/perf/util/machine.c
++++ b/tools/perf/util/machine.c
+@@ -16,6 +16,7 @@
+ #include "map_symbol.h"
+ #include "branch.h"
+ #include "mem-events.h"
++#include "path.h"
+ #include "srcline.h"
+ #include "symbol.h"
+ #include "sort.h"
+@@ -1407,7 +1408,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
+ struct stat st;
+
+ /*sshfs might return bad dent->d_type, so we have to stat*/
+- snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
++ path__join(path, sizeof(path), dir_name, dent->d_name);
+ if (stat(path, &st))
+ continue;
+
+diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
+index 51a2219df6015..7e4939640196b 100644
+--- a/tools/perf/util/parse-events.c
++++ b/tools/perf/util/parse-events.c
+@@ -196,9 +196,12 @@ static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
+ void parse_events__handle_error(struct parse_events_error *err, int idx,
+ char *str, char *help)
+ {
+- if (WARN(!str, "WARNING: failed to provide error string\n")) {
+- free(help);
+- return;
++ if (WARN(!str, "WARNING: failed to provide error string\n"))
++ goto out_free;
++ if (!err) {
++ /* Assume caller does not want message printed */
++ pr_debug("event syntax error: %s\n", str);
++ goto out_free;
+ }
+ switch (err->num_errors) {
+ case 0:
+@@ -224,6 +227,11 @@ void parse_events__handle_error(struct parse_events_error *err, int idx,
+ break;
+ }
+ err->num_errors++;
++ return;
++
++out_free:
++ free(str);
++ free(help);
+ }
+
+ struct tracepoint_path *tracepoint_id_to_path(u64 config)
+@@ -365,6 +373,9 @@ __add_event(struct list_head *list, int *idx,
+ struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
+ cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+
++ if (pmu)
++ perf_pmu__warn_invalid_formats(pmu);
++
+ if (pmu && attr->type == PERF_TYPE_RAW)
+ perf_pmu__warn_invalid_config(pmu, attr->config, name);
+
+@@ -1529,7 +1540,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
+ bool use_uncore_alias;
+ LIST_HEAD(config_terms);
+
+- if (verbose > 1) {
++ pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
++
++ if (verbose > 1 && !(pmu && pmu->selectable)) {
+ fprintf(stderr, "Attempting to add event pmu '%s' with '",
+ name);
+ if (head_config) {
+@@ -1542,7 +1555,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
+ fprintf(stderr, "' that may result in non-fatal errors\n");
+ }
+
+- pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+ if (!pmu) {
+ char *err_str;
+
+diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
+index d94e48e1ff9b2..467a426205a05 100644
+--- a/tools/perf/util/parse-events.y
++++ b/tools/perf/util/parse-events.y
+@@ -183,6 +183,11 @@ group_def ':' PE_MODIFIER_EVENT
+ err = parse_events__modifier_group(list, $3);
+ free($3);
+ if (err) {
++ struct parse_events_state *parse_state = _parse_state;
++ struct parse_events_error *error = parse_state->error;
++
++ parse_events__handle_error(error, @3.first_column,
++ strdup("Bad modifier"), NULL);
+ free_list_evsel(list);
+ YYABORT;
+ }
+@@ -240,6 +245,11 @@ event_name PE_MODIFIER_EVENT
+ err = parse_events__modifier_event(list, $2, false);
+ free($2);
+ if (err) {
++ struct parse_events_state *parse_state = _parse_state;
++ struct parse_events_error *error = parse_state->error;
++
++ parse_events__handle_error(error, @2.first_column,
++ strdup("Bad modifier"), NULL);
+ free_list_evsel(list);
+ YYABORT;
+ }
+diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
+index bdabd62170d2c..eafd80be66076 100644
+--- a/tools/perf/util/pmu.c
++++ b/tools/perf/util/pmu.c
+@@ -1048,6 +1048,23 @@ err:
+ return NULL;
+ }
+
++void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
++{
++ struct perf_pmu_format *format;
++
++ /* fake pmu doesn't have format list */
++ if (pmu == &perf_pmu__fake)
++ return;
++
++ list_for_each_entry(format, &pmu->format, list)
++ if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) {
++ pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'"
++ "which is not supported by this version of perf!\n",
++ pmu->name, format->name, format->value);
++ return;
++ }
++}
++
+ static struct perf_pmu *pmu_find(const char *name)
+ {
+ struct perf_pmu *pmu;
+@@ -1841,7 +1858,7 @@ static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
+ return 0;
+
+ free_name:
+- zfree(caps->name);
++ zfree(&caps->name);
+ free_caps:
+ free(caps);
+
+diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
+index 394898b07fd98..822d914b07ccc 100644
+--- a/tools/perf/util/pmu.h
++++ b/tools/perf/util/pmu.h
+@@ -17,6 +17,7 @@ enum {
+ PERF_PMU_FORMAT_VALUE_CONFIG,
+ PERF_PMU_FORMAT_VALUE_CONFIG1,
+ PERF_PMU_FORMAT_VALUE_CONFIG2,
++ PERF_PMU_FORMAT_VALUE_CONFIG_END,
+ };
+
+ #define PERF_PMU_FORMAT_BITS 64
+@@ -135,6 +136,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
+ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+ char *name);
++void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
+
+ bool perf_pmu__has_hybrid(void);
+ int perf_pmu__match(char *pattern, char *name, char *tok);
+diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l
+index a15d9fbd7c0ed..58b4926cfaca9 100644
+--- a/tools/perf/util/pmu.l
++++ b/tools/perf/util/pmu.l
+@@ -27,8 +27,6 @@ num_dec [0-9]+
+
+ {num_dec} { return value(10); }
+ config { return PP_CONFIG; }
+-config1 { return PP_CONFIG1; }
+-config2 { return PP_CONFIG2; }
+ - { return '-'; }
+ : { return ':'; }
+ , { return ','; }
+diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
+index bfd7e8509869b..283efe059819d 100644
+--- a/tools/perf/util/pmu.y
++++ b/tools/perf/util/pmu.y
+@@ -20,7 +20,7 @@ do { \
+
+ %}
+
+-%token PP_CONFIG PP_CONFIG1 PP_CONFIG2
++%token PP_CONFIG
+ %token PP_VALUE PP_ERROR
+ %type <num> PP_VALUE
+ %type <bits> bit_term
+@@ -47,18 +47,11 @@ PP_CONFIG ':' bits
+ $3));
+ }
+ |
+-PP_CONFIG1 ':' bits
++PP_CONFIG PP_VALUE ':' bits
+ {
+ ABORT_ON(perf_pmu__new_format(format, name,
+- PERF_PMU_FORMAT_VALUE_CONFIG1,
+- $3));
+-}
+-|
+-PP_CONFIG2 ':' bits
+-{
+- ABORT_ON(perf_pmu__new_format(format, name,
+- PERF_PMU_FORMAT_VALUE_CONFIG2,
+- $3));
++ $2,
++ $4));
+ }
+
+ bits:
+diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
+index b2a02c9ab8ea9..68844c48f688a 100644
+--- a/tools/perf/util/probe-event.c
++++ b/tools/perf/util/probe-event.c
+@@ -1775,8 +1775,10 @@ int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev)
+ if (!pev->event && pev->point.function && pev->point.line
+ && !pev->point.lazy_line && !pev->point.offset) {
+ if (asprintf(&pev->event, "%s_L%d", pev->point.function,
+- pev->point.line) < 0)
+- return -ENOMEM;
++ pev->point.line) < 0) {
++ ret = -ENOMEM;
++ goto out;
++ }
+ }
+
+ /* Copy arguments and ensure return probe has no C argument */
+@@ -3083,6 +3085,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
+ for (j = 0; j < num_matched_functions; j++) {
+ sym = syms[j];
+
++ if (sym->type != STT_FUNC)
++ continue;
++
+ /* There can be duplicated symbols in the map */
+ for (i = 0; i < j; i++)
+ if (sym->start == syms[i]->start) {
+diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
+index 352f16076e01f..562e9b8080272 100644
+--- a/tools/perf/util/session.c
++++ b/tools/perf/util/session.c
+@@ -2076,6 +2076,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
+ bool needs_swap, union perf_event *error)
+ {
+ union perf_event *event;
++ u16 event_size;
+
+ /*
+ * Ensure we have enough space remaining to read
+@@ -2088,15 +2089,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
+ if (needs_swap)
+ perf_event_header__bswap(&event->header);
+
+- if (head + event->header.size <= mmap_size)
++ event_size = event->header.size;
++ if (head + event_size <= mmap_size)
+ return event;
+
+ /* We're not fetching the event so swap back again */
+ if (needs_swap)
+ perf_event_header__bswap(&event->header);
+
+- pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:"
+- " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size);
++ /* Check if the event fits into the next mmapped buf. */
++ if (event_size <= mmap_size - head % page_size) {
++ /* Remap buf and fetch again. */
++ return NULL;
++ }
++
++ /* Invalid input. Event size should never exceed mmap_size. */
++ pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
++ " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
+
+ return error;
+ }
+diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
+index 483f05004e682..c255a2c90cd67 100644
+--- a/tools/perf/util/setup.py
++++ b/tools/perf/util/setup.py
+@@ -1,12 +1,14 @@
+-from os import getenv
++from os import getenv, path
+ from subprocess import Popen, PIPE
+ from re import sub
+
+ cc = getenv("CC")
+ cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
++src_feature_tests = getenv('srctree') + '/tools/build/feature'
+
+ def clang_has_option(option):
+- return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
++ cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
++ return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
+
+ if cc_is_clang:
+ from distutils.sysconfig import get_config_vars
+@@ -23,6 +25,8 @@ if cc_is_clang:
+ vars[var] = sub("-fstack-protector-strong", "", vars[var])
+ if not clang_has_option("-fno-semantic-interposition"):
+ vars[var] = sub("-fno-semantic-interposition", "", vars[var])
++ if not clang_has_option("-ffat-lto-objects"):
++ vars[var] = sub("-ffat-lto-objects", "", vars[var])
+
+ from distutils.core import setup, Extension
+
+diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
+index 20bacd5972ade..34f1b1b1176c7 100644
+--- a/tools/perf/util/smt.c
++++ b/tools/perf/util/smt.c
+@@ -15,7 +15,7 @@ int smt_on(void)
+ if (cached)
+ return cached_result;
+
+- if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0)
++ if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0)
+ goto done;
+
+ ncpu = sysconf(_SC_NPROCESSORS_CONF);
+diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
+index 568a88c001c6c..a4f2ffe2bdb6d 100644
+--- a/tools/perf/util/sort.c
++++ b/tools/perf/util/sort.c
+@@ -876,8 +876,7 @@ static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+ static int64_t
+ sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
+ {
+- struct addr_map_symbol *from_l = &left->branch_info->from;
+- struct addr_map_symbol *from_r = &right->branch_info->from;
++ struct addr_map_symbol *from_l, *from_r;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+@@ -1325,88 +1324,68 @@ struct sort_entry sort_mispredict = {
+ .se_width_idx = HISTC_MISPREDICT,
+ };
+
+-static u64 he_weight(struct hist_entry *he)
+-{
+- return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
+-}
+-
+ static int64_t
+-sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
++sort__weight_cmp(struct hist_entry *left, struct hist_entry *right)
+ {
+- return he_weight(left) - he_weight(right);
++ return left->weight - right->weight;
+ }
+
+ static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+ {
+- return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
++ return repsep_snprintf(bf, size, "%-*llu", width, he->weight);
+ }
+
+ struct sort_entry sort_local_weight = {
+ .se_header = "Local Weight",
+- .se_cmp = sort__local_weight_cmp,
++ .se_cmp = sort__weight_cmp,
+ .se_snprintf = hist_entry__local_weight_snprintf,
+ .se_width_idx = HISTC_LOCAL_WEIGHT,
+ };
+
+-static int64_t
+-sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+-{
+- return left->stat.weight - right->stat.weight;
+-}
+-
+ static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+ {
+- return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
++ return repsep_snprintf(bf, size, "%-*llu", width,
++ he->weight * he->stat.nr_events);
+ }
+
+ struct sort_entry sort_global_weight = {
+ .se_header = "Weight",
+- .se_cmp = sort__global_weight_cmp,
++ .se_cmp = sort__weight_cmp,
+ .se_snprintf = hist_entry__global_weight_snprintf,
+ .se_width_idx = HISTC_GLOBAL_WEIGHT,
+ };
+
+-static u64 he_ins_lat(struct hist_entry *he)
+-{
+- return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0;
+-}
+-
+ static int64_t
+-sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
++sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+ {
+- return he_ins_lat(left) - he_ins_lat(right);
++ return left->ins_lat - right->ins_lat;
+ }
+
+ static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+ {
+- return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he));
++ return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat);
+ }
+
+ struct sort_entry sort_local_ins_lat = {
+ .se_header = "Local INSTR Latency",
+- .se_cmp = sort__local_ins_lat_cmp,
++ .se_cmp = sort__ins_lat_cmp,
+ .se_snprintf = hist_entry__local_ins_lat_snprintf,
+ .se_width_idx = HISTC_LOCAL_INS_LAT,
+ };
+
+-static int64_t
+-sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+-{
+- return left->stat.ins_lat - right->stat.ins_lat;
+-}
+-
+ static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+ {
+- return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat);
++ return repsep_snprintf(bf, size, "%-*u", width,
++ he->ins_lat * he->stat.nr_events);
+ }
+
+ struct sort_entry sort_global_ins_lat = {
+ .se_header = "INSTR Latency",
+- .se_cmp = sort__global_ins_lat_cmp,
++ .se_cmp = sort__ins_lat_cmp,
+ .se_snprintf = hist_entry__global_ins_lat_snprintf,
+ .se_width_idx = HISTC_GLOBAL_INS_LAT,
+ };
+@@ -1414,13 +1393,13 @@ struct sort_entry sort_global_ins_lat = {
+ static int64_t
+ sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+ {
+- return left->stat.p_stage_cyc - right->stat.p_stage_cyc;
++ return left->p_stage_cyc - right->p_stage_cyc;
+ }
+
+ static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+ {
+- return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc);
++ return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
+ }
+
+ struct sort_entry sort_p_stage_cyc = {
+diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
+index b67c469aba795..7b7145501933f 100644
+--- a/tools/perf/util/sort.h
++++ b/tools/perf/util/sort.h
+@@ -49,9 +49,6 @@ struct he_stat {
+ u64 period_us;
+ u64 period_guest_sys;
+ u64 period_guest_us;
+- u64 weight;
+- u64 ins_lat;
+- u64 p_stage_cyc;
+ u32 nr_events;
+ };
+
+@@ -109,6 +106,9 @@ struct hist_entry {
+ s32 socket;
+ s32 cpu;
+ u64 code_page_size;
++ u64 weight;
++ u64 ins_lat;
++ u64 p_stage_cyc;
+ u8 cpumode;
+ u8 depth;
+
+diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
+index 588601000f3f9..24e50fabb6c33 100644
+--- a/tools/perf/util/stat-display.c
++++ b/tools/perf/util/stat-display.c
+@@ -207,7 +207,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx)
+
+ fputc('\n', os->fh);
+ if (os->prefix)
+- fprintf(os->fh, "%s%s", os->prefix, config->csv_sep);
++ fprintf(os->fh, "%s", os->prefix);
+ aggr_printout(config, os->evsel, os->id, os->nr);
+ for (i = 0; i < os->nfields; i++)
+ fputs(config->csv_sep, os->fh);
+@@ -584,15 +584,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
+
+ alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
+ list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
+- if (strcmp(evsel__name(alias), evsel__name(counter)) ||
+- alias->scale != counter->scale ||
+- alias->cgrp != counter->cgrp ||
+- strcmp(alias->unit, counter->unit) ||
+- evsel__is_clock(alias) != evsel__is_clock(counter) ||
+- !strcmp(alias->pmu_name, counter->pmu_name))
+- break;
+- alias->merged_stat = true;
+- cb(config, alias, data, false);
++ /* Merge events with the same name, etc. but on different PMUs. */
++ if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
++ alias->scale == counter->scale &&
++ alias->cgrp == counter->cgrp &&
++ !strcmp(alias->unit, counter->unit) &&
++ evsel__is_clock(alias) == evsel__is_clock(counter) &&
++ strcmp(alias->pmu_name, counter->pmu_name)) {
++ alias->merged_stat = true;
++ cb(config, alias, data, false);
++ }
+ }
+ }
+
+diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
+index 09ea334586f23..5a0b3db1cab11 100644
+--- a/tools/perf/util/stat.c
++++ b/tools/perf/util/stat.c
+@@ -576,11 +576,7 @@ int create_perf_stat_counter(struct evsel *evsel,
+ if (evsel__is_group_leader(evsel)) {
+ attr->disabled = 1;
+
+- /*
+- * In case of initial_delay we enable tracee
+- * events manually.
+- */
+- if (target__none(target) && !config->initial_delay)
++ if (target__enable_on_exec(target))
+ attr->enable_on_exec = 1;
+ }
+
+diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
+index 32c8527de3478..977616cf69e46 100644
+--- a/tools/perf/util/stat.h
++++ b/tools/perf/util/stat.h
+@@ -137,7 +137,6 @@ struct perf_stat_config {
+ FILE *output;
+ unsigned int interval;
+ unsigned int timeout;
+- int initial_delay;
+ unsigned int unit_width;
+ unsigned int metric_only_len;
+ int times;
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index 31cd59a2b66e6..bbc3a150597a4 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ return NULL;
+ }
+
++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
++{
++ size_t i, phdrnum;
++ u64 sz;
++
++ if (elf_getphdrnum(elf, &phdrnum))
++ return -1;
++
++ for (i = 0; i < phdrnum; i++) {
++ if (gelf_getphdr(elf, i, phdr) == NULL)
++ return -1;
++
++ if (phdr->p_type != PT_LOAD)
++ continue;
++
++ sz = max(phdr->p_memsz, phdr->p_filesz);
++ if (!sz)
++ continue;
++
++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
++ return 0;
++ }
++
++ /* Not found any valid program header */
++ return -1;
++}
++
+ static bool want_demangle(bool is_kernel_sym)
+ {
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+@@ -526,7 +553,7 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size)
+ size_t sz = min(size, descsz);
+ memcpy(bf, ptr, sz);
+ memset(bf + sz, 0, size - sz);
+- err = descsz;
++ err = sz;
+ break;
+ }
+ }
+@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ sym.st_value);
+ used_opd = true;
+ }
++
+ /*
+ * When loading symbols in a data mapping, ABS symbols (which
+ * has a value of SHN_ABS in its st_shndx) failed at
+@@ -1262,11 +1290,33 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ goto out_elf_end;
+ } else if ((used_opd && runtime_ss->adjust_symbols) ||
+ (!used_opd && syms_ss->adjust_symbols)) {
+- pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
+- (u64)sym.st_value, (u64)shdr.sh_addr,
+- (u64)shdr.sh_offset);
+- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
++ GElf_Phdr phdr;
++
++ if (elf_read_program_header(runtime_ss->elf,
++ (u64)sym.st_value, &phdr)) {
++ pr_debug4("%s: failed to find program header for "
++ "symbol: %s st_value: %#" PRIx64 "\n",
++ __func__, elf_name, (u64)sym.st_value);
++ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
++ "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n",
++ __func__, (u64)sym.st_value, (u64)shdr.sh_addr,
++ (u64)shdr.sh_offset);
++ /*
++ * Fail to find program header, let's rollback
++ * to use shdr.sh_addr and shdr.sh_offset to
++ * calibrate symbol's file address, though this
++ * is not necessary for normal C ELF file, we
++ * still need to handle java JIT symbols in this
++ * case.
++ */
++ sym.st_value -= shdr.sh_addr - shdr.sh_offset;
++ } else {
++ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
++ (u64)phdr.p_offset);
++ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
++ }
+ }
+
+ demangled = demangle_sym(dso, kmodule, elf_name);
+@@ -1290,7 +1340,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ * For misannotated, zeroed, ASM function sizes.
+ */
+ if (nr > 0) {
+- symbols__fixup_end(&dso->symbols);
++ symbols__fixup_end(&dso->symbols, false);
+ symbols__fixup_duplicate(&dso->symbols);
+ if (kmap) {
+ /*
+@@ -2041,8 +2091,8 @@ static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
+ * unusual. One significant peculiarity is that the mapping (start -> pgoff)
+ * is not the same for the kernel map and the modules map. That happens because
+ * the data is copied adjacently whereas the original kcore has gaps. Finally,
+- * kallsyms and modules files are compared with their copies to check that
+- * modules have not been loaded or unloaded while the copies were taking place.
++ * kallsyms file is compared with its copy to check that modules have not been
++ * loaded or unloaded while the copies were taking place.
+ *
+ * Return: %0 on success, %-1 on failure.
+ */
+@@ -2105,9 +2155,6 @@ int kcore_copy(const char *from_dir, const char *to_dir)
+ goto out_extract_close;
+ }
+
+- if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
+- goto out_extract_close;
+-
+ if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
+ goto out_extract_close;
+
+diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
+index 0fc9a54107399..80c54196e0e4f 100644
+--- a/tools/perf/util/symbol.c
++++ b/tools/perf/util/symbol.c
+@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
+ return tail - str;
+ }
+
+-void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+-{
+- p->end = c->start;
+-}
+-
+ const char * __weak arch__normalize_symbol_name(const char *name)
+ {
+ return name;
+@@ -217,7 +212,8 @@ again:
+ }
+ }
+
+-void symbols__fixup_end(struct rb_root_cached *symbols)
++/* Update zero-sized symbols using the address of the next symbol */
++void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
+ {
+ struct rb_node *nd, *prevnd = rb_first_cached(symbols);
+ struct symbol *curr, *prev;
+@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
+ prev = curr;
+ curr = rb_entry(nd, struct symbol, rb_node);
+
+- if (prev->end == prev->start && prev->end != curr->start)
+- arch__symbols__fixup_end(prev, curr);
++ /*
++ * On some architecture kernel text segment start is located at
++ * some low memory address, while modules are located at high
++ * memory addresses (or vice versa). The gap between end of
++ * kernel text segment and beginning of first module's text
++ * segment is very big. Therefore do not fill this gap and do
++ * not assign it to the kernel dso map (kallsyms).
++ *
++ * In kallsyms, it determines module symbols using '[' character
++ * like in:
++ * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
++ */
++ if (prev->end == prev->start) {
++ /* Last kernel/module symbol mapped to end of page */
++ if (is_kallsyms && (!strchr(prev->name, '[') !=
++ !strchr(curr->name, '[')))
++ prev->end = roundup(prev->end + 4096, 4096);
++ else
++ prev->end = curr->start;
++
++ pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
++ __func__, prev->name, prev->end);
++ }
+ }
+
+ /* Last entry */
+@@ -1340,10 +1357,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
+
+ /* Find the kernel map using the '_stext' symbol */
+ if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
++ u64 replacement_size = 0;
++
+ list_for_each_entry(new_map, &md.maps, node) {
+- if (stext >= new_map->start && stext < new_map->end) {
++ u64 new_size = new_map->end - new_map->start;
++
++ if (!(stext >= new_map->start && stext < new_map->end))
++ continue;
++
++ /*
++ * On some architectures, ARM64 for example, the kernel
++ * text can get allocated inside of the vmalloc segment.
++ * Select the smallest matching segment, in case stext
++ * falls within more than one in the list.
++ */
++ if (!replacement_map || new_size < replacement_size) {
+ replacement_map = new_map;
+- break;
++ replacement_size = new_size;
+ }
+ }
+ }
+@@ -1456,7 +1486,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
+ if (kallsyms__delta(kmap, filename, &delta))
+ return -1;
+
+- symbols__fixup_end(&dso->symbols);
++ symbols__fixup_end(&dso->symbols, true);
+ symbols__fixup_duplicate(&dso->symbols);
+
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+@@ -1648,7 +1678,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
+ #undef bfd_asymbol_section
+ #endif
+
+- symbols__fixup_end(&dso->symbols);
++ symbols__fixup_end(&dso->symbols, false);
+ symbols__fixup_duplicate(&dso->symbols);
+ dso->adjust_symbols = 1;
+
+diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
+index 954d6a049ee23..28721d761d91e 100644
+--- a/tools/perf/util/symbol.h
++++ b/tools/perf/util/symbol.h
+@@ -192,7 +192,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
+ bool kernel);
+ void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
+ void symbols__fixup_duplicate(struct rb_root_cached *symbols);
+-void symbols__fixup_end(struct rb_root_cached *symbols);
++void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
+ void maps__fixup_end(struct maps *maps);
+
+ typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
+@@ -230,7 +230,6 @@ const char *arch__normalize_symbol_name(const char *name);
+ #define SYMBOL_A 0
+ #define SYMBOL_B 1
+
+-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
+ int arch__compare_symbol_names(const char *namea, const char *nameb);
+ int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+ unsigned int n);
+diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
+index a7e981b2d7dec..c69ad7a1a6a78 100644
+--- a/tools/perf/util/synthetic-events.c
++++ b/tools/perf/util/synthetic-events.c
+@@ -367,13 +367,24 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
+ bool is_kernel)
+ {
+ struct build_id bid;
++ struct nsinfo *nsi;
++ struct nscookie nc;
+ int rc;
+
+- if (is_kernel)
++ if (is_kernel) {
+ rc = sysfs__read_build_id("/sys/kernel/notes", &bid);
+- else
+- rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
++ goto out;
++ }
++
++ nsi = nsinfo__new(event->pid);
++ nsinfo__mountns_enter(nsi, &nc);
+
++ rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
++
++ nsinfo__mountns_exit(&nc);
++ nsinfo__put(nsi);
++
++out:
+ if (rc == 0) {
+ memcpy(event->build_id, bid.data, sizeof(bid.data));
+ event->build_id_size = (u8) bid.size;
+diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
+index daec6cba500d4..880f1af7f6ad6 100644
+--- a/tools/perf/util/target.h
++++ b/tools/perf/util/target.h
+@@ -18,6 +18,7 @@ struct target {
+ bool per_thread;
+ bool use_bpf;
+ bool hybrid;
++ int initial_delay;
+ const char *attr_map;
+ };
+
+@@ -72,6 +73,17 @@ static inline bool target__none(struct target *target)
+ return !target__has_task(target) && !target__has_cpu(target);
+ }
+
++static inline bool target__enable_on_exec(struct target *target)
++{
++ /*
++ * Normally enable_on_exec should be set if:
++ * 1) The tracee process is forked (not attaching to existed task or cpu).
++ * 2) And initial_delay is not configured.
++ * Otherwise, we enable tracee events manually.
++ */
++ return target__none(target) && !target->initial_delay;
++}
++
+ static inline bool target__has_per_thread(struct target *target)
+ {
+ return target->system_wide && target->per_thread;
+diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
+index 37a9492edb3eb..df3c4671be72a 100644
+--- a/tools/perf/util/util.c
++++ b/tools/perf/util/util.c
+@@ -379,32 +379,32 @@ fetch_kernel_version(unsigned int *puint, char *str,
+ return 0;
+ }
+
+-const char *perf_tip(const char *dirpath)
++int perf_tip(char **strp, const char *dirpath)
+ {
+ struct strlist *tips;
+ struct str_node *node;
+- char *tip = NULL;
+ struct strlist_config conf = {
+ .dirname = dirpath,
+ .file_only = true,
+ };
++ int ret = 0;
+
++ *strp = NULL;
+ tips = strlist__new("tips.txt", &conf);
+ if (tips == NULL)
+- return errno == ENOENT ? NULL :
+- "Tip: check path of tips.txt or get more memory! ;-p";
++ return -errno;
+
+ if (strlist__nr_entries(tips) == 0)
+ goto out;
+
+ node = strlist__entry(tips, random() % strlist__nr_entries(tips));
+- if (asprintf(&tip, "Tip: %s", node->s) < 0)
+- tip = (char *)"Tip: get more memory! ;-)";
++ if (asprintf(strp, "Tip: %s", node->s) < 0)
++ ret = -ENOMEM;
+
+ out:
+ strlist__delete(tips);
+
+- return tip;
++ return ret;
+ }
+
+ char *perf_exe(char *buf, int len)
+diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
+index ad737052e5977..9f0d36ba77f2d 100644
+--- a/tools/perf/util/util.h
++++ b/tools/perf/util/util.h
+@@ -39,7 +39,7 @@ int fetch_kernel_version(unsigned int *puint,
+ #define KVER_FMT "%d.%d.%d"
+ #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
+
+-const char *perf_tip(const char *dirpath);
++int perf_tip(char **strp, const char *dirpath);
+
+ #ifndef HAVE_SCHED_GETCPU_SUPPORT
+ int sched_getcpu(void);
+diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+index e7d48cb563c0e..ae6af354a81db 100644
+--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
++++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+@@ -70,8 +70,8 @@ static int max_freq_mode;
+ */
+ static unsigned long max_frequency;
+
+-static unsigned long long tsc_at_measure_start;
+-static unsigned long long tsc_at_measure_end;
++static unsigned long long *tsc_at_measure_start;
++static unsigned long long *tsc_at_measure_end;
+ static unsigned long long *mperf_previous_count;
+ static unsigned long long *aperf_previous_count;
+ static unsigned long long *mperf_current_count;
+@@ -169,7 +169,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
+ aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
+
+ if (max_freq_mode == MAX_FREQ_TSC_REF) {
+- tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
++ tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu];
+ *percent = 100.0 * mperf_diff / tsc_diff;
+ dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
+ mperf_cstates[id].name, mperf_diff, tsc_diff);
+@@ -206,7 +206,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+
+ if (max_freq_mode == MAX_FREQ_TSC_REF) {
+ /* Calculate max_freq from TSC count */
+- tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
++ tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu];
+ time_diff = timespec_diff_us(time_start, time_end);
+ max_frequency = tsc_diff / time_diff;
+ }
+@@ -225,33 +225,27 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+ static int mperf_start(void)
+ {
+ int cpu;
+- unsigned long long dbg;
+
+ clock_gettime(CLOCK_REALTIME, &time_start);
+- mperf_get_tsc(&tsc_at_measure_start);
+
+- for (cpu = 0; cpu < cpu_count; cpu++)
++ for (cpu = 0; cpu < cpu_count; cpu++) {
++ mperf_get_tsc(&tsc_at_measure_start[cpu]);
+ mperf_init_stats(cpu);
++ }
+
+- mperf_get_tsc(&dbg);
+- dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
+ return 0;
+ }
+
+ static int mperf_stop(void)
+ {
+- unsigned long long dbg;
+ int cpu;
+
+- for (cpu = 0; cpu < cpu_count; cpu++)
++ for (cpu = 0; cpu < cpu_count; cpu++) {
+ mperf_measure_stats(cpu);
++ mperf_get_tsc(&tsc_at_measure_end[cpu]);
++ }
+
+- mperf_get_tsc(&tsc_at_measure_end);
+ clock_gettime(CLOCK_REALTIME, &time_end);
+-
+- mperf_get_tsc(&dbg);
+- dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
+-
+ return 0;
+ }
+
+@@ -353,7 +347,8 @@ struct cpuidle_monitor *mperf_register(void)
+ aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
+ mperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
+ aperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
+-
++ tsc_at_measure_start = calloc(cpu_count, sizeof(unsigned long long));
++ tsc_at_measure_end = calloc(cpu_count, sizeof(unsigned long long));
+ mperf_monitor.name_len = strlen(mperf_monitor.name);
+ return &mperf_monitor;
+ }
+@@ -364,6 +359,8 @@ void mperf_unregister(void)
+ free(aperf_previous_count);
+ free(mperf_current_count);
+ free(aperf_current_count);
++ free(tsc_at_measure_start);
++ free(tsc_at_measure_end);
+ free(is_valid);
+ }
+
+diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
+index bf9fd3549a1d5..cd08ffe0d62b0 100644
+--- a/tools/power/x86/intel-speed-select/isst-config.c
++++ b/tools/power/x86/intel-speed-select/isst-config.c
+@@ -108,7 +108,7 @@ int is_skx_based_platform(void)
+
+ int is_spr_platform(void)
+ {
+- if (cpu_model == 0x8F)
++ if (cpu_model == 0x8F || cpu_model == 0xCF)
+ return 1;
+
+ return 0;
+diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
+index 9b17097bc3d7b..b3d4bf08e70b1 100644
+--- a/tools/power/x86/turbostat/turbostat.8
++++ b/tools/power/x86/turbostat/turbostat.8
+@@ -296,6 +296,8 @@ Alternatively, non-root users can be enabled to run turbostat this way:
+
+ # chmod +r /dev/cpu/*/msr
+
++# chmod +r /dev/cpu_dma_latency
++
+ .B "turbostat "
+ reads hardware counters, but doesn't write them.
+ So it will not interfere with the OS or other programs, including
+diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
+index 47d3ba895d6d9..a3197efe52c63 100644
+--- a/tools/power/x86/turbostat/turbostat.c
++++ b/tools/power/x86/turbostat/turbostat.c
+@@ -4241,7 +4241,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+
+ fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
+ "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
+- cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
++ cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
+
+ return 0;
+ }
+@@ -4376,6 +4376,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+ case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
++ case INTEL_FAM6_ICELAKE_X: /* ICX */
+ return (rapl_dram_energy_units = 15.3 / 1000000);
+ default:
+ return (rapl_energy_units);
+@@ -5236,7 +5237,7 @@ void print_dev_latency(void)
+
+ retval = read(fd, (void *)&value, sizeof(int));
+ if (retval != sizeof(int)) {
+- warn("read %s\n", path);
++ warn("read failed %s\n", path);
+ close(fd);
+ return;
+ }
+diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
+index 09d1578f9d66f..e6c381498e632 100755
+--- a/tools/testing/ktest/ktest.pl
++++ b/tools/testing/ktest/ktest.pl
+@@ -178,6 +178,7 @@ my $store_failures;
+ my $store_successes;
+ my $test_name;
+ my $timeout;
++my $run_timeout;
+ my $connect_timeout;
+ my $config_bisect_exec;
+ my $booted_timeout;
+@@ -340,6 +341,7 @@ my %option_map = (
+ "STORE_SUCCESSES" => \$store_successes,
+ "TEST_NAME" => \$test_name,
+ "TIMEOUT" => \$timeout,
++ "RUN_TIMEOUT" => \$run_timeout,
+ "CONNECT_TIMEOUT" => \$connect_timeout,
+ "CONFIG_BISECT_EXEC" => \$config_bisect_exec,
+ "BOOTED_TIMEOUT" => \$booted_timeout,
+@@ -1488,7 +1490,8 @@ sub reboot {
+
+ # Still need to wait for the reboot to finish
+ wait_for_monitor($time, $reboot_success_line);
+-
++ }
++ if ($powercycle || $time) {
+ end_monitor;
+ }
+ }
+@@ -1850,6 +1853,14 @@ sub run_command {
+ $command =~ s/\$SSH_USER/$ssh_user/g;
+ $command =~ s/\$MACHINE/$machine/g;
+
++ if (!defined($timeout)) {
++ $timeout = $run_timeout;
++ }
++
++ if (!defined($timeout)) {
++ $timeout = -1; # tell wait_for_input to wait indefinitely
++ }
++
+ doprint("$command ... ");
+ $start_time = time;
+
+@@ -1876,13 +1887,10 @@ sub run_command {
+
+ while (1) {
+ my $fp = \*CMD;
+- if (defined($timeout)) {
+- doprint "timeout = $timeout\n";
+- }
+ my $line = wait_for_input($fp, $timeout);
+ if (!defined($line)) {
+ my $now = time;
+- if (defined($timeout) && (($now - $start_time) >= $timeout)) {
++ if ($timeout >= 0 && (($now - $start_time) >= $timeout)) {
+ doprint "Hit timeout of $timeout, killing process\n";
+ $hit_timeout = 1;
+ kill 9, $pid;
+@@ -1963,7 +1971,7 @@ sub run_scp_mod {
+
+ sub _get_grub_index {
+
+- my ($command, $target, $skip) = @_;
++ my ($command, $target, $skip, $submenu) = @_;
+
+ return if (defined($grub_number) && defined($last_grub_menu) &&
+ $last_grub_menu eq $grub_menu && defined($last_machine) &&
+@@ -1980,11 +1988,16 @@ sub _get_grub_index {
+
+ my $found = 0;
+
++ my $submenu_number = 0;
++
+ while (<IN>) {
+ if (/$target/) {
+ $grub_number++;
+ $found = 1;
+ last;
++ } elsif (defined($submenu) && /$submenu/) {
++ $submenu_number++;
++ $grub_number = -1;
+ } elsif (/$skip/) {
+ $grub_number++;
+ }
+@@ -1993,6 +2006,9 @@ sub _get_grub_index {
+
+ dodie "Could not find '$grub_menu' through $command on $machine"
+ if (!$found);
++ if ($submenu_number > 0) {
++ $grub_number = "$submenu_number>$grub_number";
++ }
+ doprint "$grub_number\n";
+ $last_grub_menu = $grub_menu;
+ $last_machine = $machine;
+@@ -2003,6 +2019,7 @@ sub get_grub_index {
+ my $command;
+ my $target;
+ my $skip;
++ my $submenu;
+ my $grub_menu_qt;
+
+ if ($reboot_type !~ /^grub/) {
+@@ -2017,8 +2034,9 @@ sub get_grub_index {
+ $skip = '^\s*title\s';
+ } elsif ($reboot_type eq "grub2") {
+ $command = "cat $grub_file";
+- $target = '^menuentry.*' . $grub_menu_qt;
+- $skip = '^menuentry\s|^submenu\s';
++ $target = '^\s*menuentry.*' . $grub_menu_qt;
++ $skip = '^\s*menuentry';
++ $submenu = '^\s*submenu\s';
+ } elsif ($reboot_type eq "grub2bls") {
+ $command = $grub_bls_get;
+ $target = '^title=.*' . $grub_menu_qt;
+@@ -2027,7 +2045,7 @@ sub get_grub_index {
+ return;
+ }
+
+- _get_grub_index($command, $target, $skip);
++ _get_grub_index($command, $target, $skip, $submenu);
+ }
+
+ sub wait_for_input {
+@@ -2044,6 +2062,11 @@ sub wait_for_input {
+ $time = $timeout;
+ }
+
++ if ($time < 0) {
++ # Negative number means wait indefinitely
++ undef $time;
++ }
++
+ $rin = '';
+ vec($rin, fileno($fp), 1) = 1;
+ vec($rin, fileno(\*STDIN), 1) = 1;
+@@ -2090,7 +2113,7 @@ sub reboot_to {
+ if ($reboot_type eq "grub") {
+ run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
+ } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) {
+- run_ssh "$grub_reboot $grub_number";
++ run_ssh "$grub_reboot \"'$grub_number'\"";
+ } elsif ($reboot_type eq "syslinux") {
+ run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
+ } elsif (defined $reboot_script) {
+@@ -3768,9 +3791,10 @@ sub test_this_config {
+ # .config to make sure it is missing the config that
+ # we had before
+ my %configs = %min_configs;
+- delete $configs{$config};
++ $configs{$config} = "# $config is not set";
+ make_new_config ((values %configs), (values %keep_configs));
+ make_oldconfig;
++ delete $configs{$config};
+ undef %configs;
+ assign_configs \%configs, $output_config;
+
+@@ -4182,6 +4206,9 @@ sub send_email {
+ }
+
+ sub cancel_test {
++ if ($monitor_cnt) {
++ end_monitor;
++ }
+ if ($email_when_canceled) {
+ my $name = get_test_name;
+ send_email("KTEST: Your [$name] test was cancelled",
+diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
+index 5e7d1d7297529..65957a9803b50 100644
+--- a/tools/testing/ktest/sample.conf
++++ b/tools/testing/ktest/sample.conf
+@@ -809,6 +809,11 @@
+ # is issued instead of a reboot.
+ # CONNECT_TIMEOUT = 25
+
++# The timeout in seconds for how long to wait for any running command
++# to timeout. If not defined, it will let it go indefinitely.
++# (default undefined)
++#RUN_TIMEOUT = 600
++
+ # In between tests, a reboot of the box may occur, and this
+ # is the time to wait for the console after it stops producing
+ # output. Some machines may not produce a large lag on reboot
+diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
+index 2c6f916ccbafa..0874e512d109b 100644
+--- a/tools/testing/kunit/kunit_kernel.py
++++ b/tools/testing/kunit/kunit_kernel.py
+@@ -6,6 +6,7 @@
+ # Author: Felix Guo <felixguoxiuping@gmail.com>
+ # Author: Brendan Higgins <brendanhiggins@google.com>
+
++import importlib.abc
+ import importlib.util
+ import logging
+ import subprocess
+diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
+index ed563bdd88f39..b752ce47ead3c 100644
+--- a/tools/testing/nvdimm/test/iomap.c
++++ b/tools/testing/nvdimm/test/iomap.c
+@@ -100,25 +100,17 @@ static void nfit_test_kill(void *_pgmap)
+ {
+ struct dev_pagemap *pgmap = _pgmap;
+
+- WARN_ON(!pgmap || !pgmap->ref);
+-
+- if (pgmap->ops && pgmap->ops->kill)
+- pgmap->ops->kill(pgmap);
+- else
+- percpu_ref_kill(pgmap->ref);
+-
+- if (pgmap->ops && pgmap->ops->cleanup) {
+- pgmap->ops->cleanup(pgmap);
+- } else {
+- wait_for_completion(&pgmap->done);
+- percpu_ref_exit(pgmap->ref);
+- }
++ WARN_ON(!pgmap);
++
++ percpu_ref_kill(&pgmap->ref);
++
++ wait_for_completion(&pgmap->done);
++ percpu_ref_exit(&pgmap->ref);
+ }
+
+ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
+ {
+- struct dev_pagemap *pgmap =
+- container_of(ref, struct dev_pagemap, internal_ref);
++ struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
+
+ complete(&pgmap->done);
+ }
+@@ -132,22 +124,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+ if (!nfit_res)
+ return devm_memremap_pages(dev, pgmap);
+
+- if (!pgmap->ref) {
+- if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
+- return ERR_PTR(-EINVAL);
+-
+- init_completion(&pgmap->done);
+- error = percpu_ref_init(&pgmap->internal_ref,
+- dev_pagemap_percpu_release, 0, GFP_KERNEL);
+- if (error)
+- return ERR_PTR(error);
+- pgmap->ref = &pgmap->internal_ref;
+- } else {
+- if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
+- WARN(1, "Missing reference count teardown definition\n");
+- return ERR_PTR(-EINVAL);
+- }
+- }
++ init_completion(&pgmap->done);
++ error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
++ GFP_KERNEL);
++ if (error)
++ return ERR_PTR(error);
+
+ error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
+ if (error)
+diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
+index a61c7bcbc72da..63f468bf8245c 100644
+--- a/tools/testing/radix-tree/regression1.c
++++ b/tools/testing/radix-tree/regression1.c
+@@ -177,7 +177,7 @@ void regression1_test(void)
+ nr_threads = 2;
+ pthread_barrier_init(&worker_barrier, NULL, nr_threads);
+
+- threads = malloc(nr_threads * sizeof(pthread_t *));
++ threads = malloc(nr_threads * sizeof(*threads));
+
+ for (i = 0; i < nr_threads; i++) {
+ arg = i;
+diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
+index c852eb40c4f7d..c16e4da988257 100644
+--- a/tools/testing/selftests/Makefile
++++ b/tools/testing/selftests/Makefile
+@@ -8,6 +8,7 @@ TARGETS += clone3
+ TARGETS += core
+ TARGETS += cpufreq
+ TARGETS += cpu-hotplug
++TARGETS += damon
+ TARGETS += drivers/dma-buf
+ TARGETS += efivarfs
+ TARGETS += exec
+@@ -51,6 +52,7 @@ TARGETS += proc
+ TARGETS += pstore
+ TARGETS += ptrace
+ TARGETS += openat2
++TARGETS += resctrl
+ TARGETS += rlimits
+ TARGETS += rseq
+ TARGETS += rtc
+@@ -113,19 +115,27 @@ ifdef building_out_of_srctree
+ override LDFLAGS =
+ endif
+
+-ifneq ($(O),)
+- BUILD := $(O)/kselftest
++top_srcdir ?= ../../..
++
++ifeq ("$(origin O)", "command line")
++ KBUILD_OUTPUT := $(O)
++endif
++
++ifneq ($(KBUILD_OUTPUT),)
++ # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
++ # expand a shell special character '~'. We use a somewhat tedious way here.
++ abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
++ $(if $(abs_objtree),, \
++ $(error failed to create output directory "$(KBUILD_OUTPUT)"))
++ # $(realpath ...) resolves symlinks
++ abs_objtree := $(realpath $(abs_objtree))
++ BUILD := $(abs_objtree)/kselftest
+ else
+- ifneq ($(KBUILD_OUTPUT),)
+- BUILD := $(KBUILD_OUTPUT)/kselftest
+- else
+- BUILD := $(shell pwd)
+- DEFAULT_INSTALL_HDR_PATH := 1
+- endif
++ BUILD := $(CURDIR)
++ DEFAULT_INSTALL_HDR_PATH := 1
+ endif
+
+ # Prepare for headers install
+-top_srcdir ?= ../../..
+ include $(top_srcdir)/scripts/subarch.include
+ ARCH ?= $(SUBARCH)
+ export KSFT_KHDR_INSTALL_DONE := 1
+diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
+index 73e013c082a65..dafa1c2aa5c47 100644
+--- a/tools/testing/selftests/arm64/bti/Makefile
++++ b/tools/testing/selftests/arm64/bti/Makefile
+@@ -39,7 +39,7 @@ BTI_OBJS = \
+ teststubs-bti.o \
+ trampoline-bti.o
+ gen/btitest: $(BTI_OBJS)
+- $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
++ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
+
+ NOBTI_OBJS = \
+ test-nobti.o \
+@@ -50,7 +50,7 @@ NOBTI_OBJS = \
+ teststubs-nobti.o \
+ trampoline-nobti.o
+ gen/nobtitest: $(NOBTI_OBJS)
+- $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
++ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
+
+ # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+ # to account for any OUTPUT target-dirs optionally provided by
+diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
+index 8c2a57fc2f9cc..341b3d5200bde 100644
+--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
++++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
+@@ -33,7 +33,7 @@ bool validate_extra_context(struct extra_context *extra, char **err)
+ return false;
+
+ fprintf(stderr, "Validating EXTRA...\n");
+- term = GET_RESV_NEXT_HEAD(extra);
++ term = GET_RESV_NEXT_HEAD(&extra->head);
+ if (!term || term->magic || term->size) {
+ *err = "Missing terminator after EXTRA context";
+ return false;
+diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
+index 799b88152e9e8..0d845a0c8599a 100644
+--- a/tools/testing/selftests/bpf/Makefile
++++ b/tools/testing/selftests/bpf/Makefile
+@@ -78,7 +78,7 @@ TEST_PROGS := test_kmod.sh \
+ test_xsk.sh
+
+ TEST_PROGS_EXTENDED := with_addr.sh \
+- with_tunnels.sh \
++ with_tunnels.sh ima_setup.sh \
+ test_xdp_vlan.sh test_bpftool.py
+
+ # Compile but not part of 'make run_tests'
+@@ -144,8 +144,6 @@ endif
+ # NOTE: Semicolon at the end is critical to override lib.mk's default static
+ # rule for binaries.
+ $(notdir $(TEST_GEN_PROGS) \
+- $(TEST_PROGS) \
+- $(TEST_PROGS_EXTENDED) \
+ $(TEST_GEN_PROGS_EXTENDED) \
+ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+
+diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
+index 16f774b1cdbed..7b281dbe41656 100755
+--- a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
++++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
+@@ -2,7 +2,7 @@
+
+ set -eufo pipefail
+
+-for i in base kprobe kretprobe rawtp fentry fexit fmodret
++for i in base kprobe kretprobe rawtp fentry fexit
+ do
+ summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-10s: %s\n" $i "$summary"
+diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
+index b692e6ead9b55..0a4ad7cb2c200 100644
+--- a/tools/testing/selftests/bpf/btf_helpers.c
++++ b/tools/testing/selftests/bpf/btf_helpers.c
+@@ -246,18 +246,23 @@ const char *btf_type_c_dump(const struct btf *btf)
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+ if (libbpf_get_error(d)) {
+ fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d));
+- return NULL;
++ goto err_out;
+ }
+
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ if (err) {
+ fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err);
+- return NULL;
++ goto err_out;
+ }
+ }
+
++ btf_dump__free(d);
+ fflush(buf_file);
+ fclose(buf_file);
+ return buf;
++err_out:
++ btf_dump__free(d);
++ fclose(buf_file);
++ return NULL;
+ }
+diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
+index 6db1af8fdee78..c57e1e47e52f2 100644
+--- a/tools/testing/selftests/bpf/network_helpers.c
++++ b/tools/testing/selftests/bpf/network_helpers.c
+@@ -84,7 +84,7 @@ static int __start_server(int type, const struct sockaddr *addr,
+ if (reuseport &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+ log_err("Failed to set SO_REUSEPORT");
+- return -1;
++ goto error_close;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
+diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
+index 5861446d07770..7996ec07e0bdb 100644
+--- a/tools/testing/selftests/bpf/prog_tests/align.c
++++ b/tools/testing/selftests/bpf/prog_tests/align.c
+@@ -2,7 +2,7 @@
+ #include <test_progs.h>
+
+ #define MAX_INSNS 512
+-#define MAX_MATCHES 16
++#define MAX_MATCHES 24
+
+ struct bpf_reg_match {
+ unsigned int line;
+@@ -267,6 +267,7 @@ static struct bpf_align_test tests[] = {
+ */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
++ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+@@ -280,6 +281,7 @@ static struct bpf_align_test tests[] = {
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
++ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+@@ -311,44 +313,52 @@ static struct bpf_align_test tests[] = {
+ {15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Variable offset is added to R5 packet pointer,
+- * resulting in auxiliary alignment of 4.
++ * resulting in auxiliary alignment of 4. To avoid BPF
++ * verifier's precision backtracking logging
++ * interfering we also have a no-op R4 = R5
++ * instruction to validate R5 state. We also check
++ * that R4 is what it should be in such case.
+ */
+- {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {19, "R4_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {19, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant offset is added to R5, resulting in
+ * reg->off of 14.
+ */
+- {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {20, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off
+ * (14) which is 16. Then the variable offset is 4-byte
+ * aligned, so the total offset is 4-byte aligned and
+ * meets the load's requirements.
+ */
+- {23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+- {23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {24, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {24, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant offset is added to R5 packet pointer,
+ * resulting in reg->off value of 14.
+ */
+- {26, "R5_w=pkt(id=0,off=14,r=8"},
++ {27, "R5_w=pkt(id=0,off=14,r=8"},
+ /* Variable offset is added to R5, resulting in a
+- * variable offset of (4n).
++ * variable offset of (4n). See comment for insn #19
++ * for R4 = R5 trick.
+ */
+- {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {29, "R4_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {29, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant is added to R5 again, setting reg->off to 18. */
+- {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
++ {30, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* And once more we add a variable; resulting var_off
+ * is still (4n), fixed offset is not changed.
+ * Also, we create a new reg->id.
+ */
+- {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
++ {32, "R4_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
++ {32, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+ * which is 20. Then the variable offset is (4n), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+- {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+- {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
++ {35, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
++ {35, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ },
+ },
+ {
+diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+index d0f06e40c16d0..eac71fbb24ce2 100644
+--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
++++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+@@ -1,13 +1,24 @@
+ // SPDX-License-Identifier: GPL-2.0
+-#include <test_progs.h>
+-#include "bind_perm.skel.h"
+-
++#define _GNU_SOURCE
++#include <sched.h>
++#include <stdlib.h>
+ #include <sys/types.h>
+ #include <sys/socket.h>
+ #include <sys/capability.h>
+
++#include "test_progs.h"
++#include "bind_perm.skel.h"
++
+ static int duration;
+
++static int create_netns(void)
++{
++ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
++ return -1;
++
++ return 0;
++}
++
+ void try_bind(int family, int port, int expected_errno)
+ {
+ struct sockaddr_storage addr = {};
+@@ -75,6 +86,9 @@ void test_bind_perm(void)
+ struct bind_perm *skel;
+ int cgroup_fd;
+
++ if (create_netns())
++ return;
++
+ cgroup_fd = test__join_cgroup("/bind_perm");
+ if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+ return;
+diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+index 77ac24b191d4c..dc18e5ae0febc 100644
+--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
++++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+@@ -1208,13 +1208,14 @@ static void test_task_vma(void)
+ goto out;
+
+ /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks
+- * to trigger seq_file corner cases. The expected output is much
+- * longer than 1kB, so the while loop will terminate.
++ * to trigger seq_file corner cases.
+ */
+ len = 0;
+ while (len < CMP_BUFFER_SIZE) {
+ err = read_fd_into_buffer(iter_fd, task_vma_output + len,
+ min(read_size, CMP_BUFFER_SIZE - len));
++ if (!err)
++ break;
+ if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
+ goto out;
+ len += err;
+diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
+index 649f87382c8d8..2a04dbec510de 100644
+--- a/tools/testing/selftests/bpf/prog_tests/btf.c
++++ b/tools/testing/selftests/bpf/prog_tests/btf.c
+@@ -882,6 +882,34 @@ static struct btf_raw_test raw_tests[] = {
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+ },
++{
++ .descr = "var after datasec, ptr followed by modifier",
++ .raw_types = {
++ /* .bss section */ /* [1] */
++ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2),
++ sizeof(void*)+4),
++ BTF_VAR_SECINFO_ENC(4, 0, sizeof(void*)),
++ BTF_VAR_SECINFO_ENC(6, sizeof(void*), 4),
++ /* int */ /* [2] */
++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
++ /* int* */ /* [3] */
++ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
++ BTF_VAR_ENC(NAME_TBD, 3, 0), /* [4] */
++ /* const int */ /* [5] */
++ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
++ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
++ BTF_END_RAW,
++ },
++ .str_sec = "\0a\0b\0c\0",
++ .str_sec_size = sizeof("\0a\0b\0c\0"),
++ .map_type = BPF_MAP_TYPE_ARRAY,
++ .map_name = ".bss",
++ .key_size = sizeof(int),
++ .value_size = sizeof(void*)+4,
++ .key_type_id = 0,
++ .value_type_id = 1,
++ .max_entries = 1,
++},
+ /* Test member exceeds the size of struct.
+ *
+ * struct A {
+@@ -4913,7 +4941,7 @@ static void do_test_pprint(int test_num)
+ ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
+ "/sys/fs/bpf", test->map_name);
+
+- if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
++ if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long",
+ "/sys/fs/bpf", test->map_name)) {
+ err = -1;
+ goto done;
+diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+index 762f6a9da8b5e..664ffc0364f4f 100644
+--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
++++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+@@ -90,7 +90,7 @@ static void print_err_line(void)
+
+ static void test_conn(void)
+ {
+- int listen_fd = -1, cli_fd = -1, err;
++ int listen_fd = -1, cli_fd = -1, srv_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+@@ -112,6 +112,10 @@ static void test_conn(void)
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
++ srv_fd = accept(listen_fd, NULL, NULL);
++ if (CHECK_FAIL(srv_fd == -1))
++ goto done;
++
+ if (CHECK(skel->bss->listen_tp_sport != srv_port ||
+ skel->bss->req_sk_sport != srv_port,
+ "Unexpected sk src port",
+@@ -134,11 +138,13 @@ done:
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
++ if (srv_fd != -1)
++ close(srv_fd);
+ }
+
+ static void test_syncookie(void)
+ {
+- int listen_fd = -1, cli_fd = -1, err;
++ int listen_fd = -1, cli_fd = -1, srv_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+@@ -161,6 +167,10 @@ static void test_syncookie(void)
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
++ srv_fd = accept(listen_fd, NULL, NULL);
++ if (CHECK_FAIL(srv_fd == -1))
++ goto done;
++
+ if (CHECK(skel->bss->listen_tp_sport != srv_port,
+ "Unexpected tp src port",
+ "listen_tp_sport:%u expected:%u\n",
+@@ -188,6 +198,8 @@ done:
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
++ if (srv_fd != -1)
++ close(srv_fd);
+ }
+
+ struct test {
+diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+index 876be0ecb654f..a47ea4804766b 100644
+--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
++++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+@@ -56,8 +56,9 @@ static bool assert_storage_noexist(struct bpf_map *map, const void *key)
+
+ static bool connect_send(const char *cgroup_path)
+ {
+- bool res = true;
+ int server_fd = -1, client_fd = -1;
++ char message[] = "message";
++ bool res = true;
+
+ if (join_cgroup(cgroup_path))
+ goto out_clean;
+@@ -70,7 +71,10 @@ static bool connect_send(const char *cgroup_path)
+ if (client_fd < 0)
+ goto out_clean;
+
+- if (send(client_fd, "message", strlen("message"), 0) < 0)
++ if (send(client_fd, &message, sizeof(message), 0) < 0)
++ goto out_clean;
++
++ if (read(server_fd, &message, sizeof(message)) < 0)
+ goto out_clean;
+
+ res = false;
+diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+index 012068f33a0a8..871971cdd7b75 100644
+--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
++++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+@@ -197,7 +197,7 @@ cleanup:
+
+ void test_check_mtu(void)
+ {
+- __u32 mtu_lo;
++ int mtu_lo;
+
+ if (test__start_subtest("bpf_check_mtu XDP-attach"))
+ test_check_mtu_xdp_attach();
+diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+index 73b4c76e6b869..52f1426ae06e0 100644
+--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
++++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+@@ -371,6 +371,18 @@ static void test_func_map_prog_compatibility(void)
+ "./test_attach_probe.o");
+ }
+
++static void test_func_replace_global_func(void)
++{
++ const char *prog_name[] = {
++ "freplace/test_pkt_access",
++ };
++
++ test_fexit_bpf2bpf_common("./freplace_global_func.o",
++ "./test_pkt_access.o",
++ ARRAY_SIZE(prog_name),
++ prog_name, false, NULL);
++}
++
+ void test_fexit_bpf2bpf(void)
+ {
+ if (test__start_subtest("target_no_callees"))
+@@ -391,4 +403,6 @@ void test_fexit_bpf2bpf(void)
+ test_func_replace_multi();
+ if (test__start_subtest("fmod_ret_freplace"))
+ test_fmod_ret_freplace();
++ if (test__start_subtest("func_replace_global_func"))
++ test_func_replace_global_func();
+ }
+diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+index 8d5a6023a1bbf..4022c89ea268a 100644
+--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
++++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+@@ -65,6 +65,7 @@ void test_get_stackid_cannot_attach(void)
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
++ bpf_link__destroy(skel->links.oncpu);
+ close(pmu_fd);
+
+ /* add exclude_callchain_kernel, attach should fail */
+diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+index cf3acfa5a91d5..69455fe90ac3e 100644
+--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
++++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+@@ -7,6 +7,7 @@
+ #include "test_ksyms_btf.skel.h"
+ #include "test_ksyms_btf_null_check.skel.h"
+ #include "test_ksyms_weak.skel.h"
++#include "test_ksyms_btf_write_check.skel.h"
+
+ static int duration;
+
+@@ -109,6 +110,16 @@ cleanup:
+ test_ksyms_weak__destroy(skel);
+ }
+
++static void test_write_check(void)
++{
++ struct test_ksyms_btf_write_check *skel;
++
++ skel = test_ksyms_btf_write_check__open_and_load();
++ ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
++
++ test_ksyms_btf_write_check__destroy(skel);
++}
++
+ void test_ksyms_btf(void)
+ {
+ int percpu_datasec;
+@@ -136,4 +147,7 @@ void test_ksyms_btf(void)
+
+ if (test__start_subtest("weak_ksyms"))
+ test_weak_syms();
++
++ if (test__start_subtest("write_check"))
++ test_write_check();
+ }
+diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
+index 59adb4715394f..3c85247f96f95 100644
+--- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
++++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
+@@ -204,8 +204,8 @@ static int pass_ack(struct migrate_reuseport_test_case *test_case)
+ {
+ int err;
+
+- err = bpf_link__detach(test_case->link);
+- if (!ASSERT_OK(err, "bpf_link__detach"))
++ err = bpf_link__destroy(test_case->link);
++ if (!ASSERT_OK(err, "bpf_link__destroy"))
+ return -1;
+
+ test_case->link = NULL;
+diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+index 6490e9673002f..7daaaab13681b 100644
+--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
++++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+@@ -107,8 +107,8 @@ void test_perf_buffer(void)
+ "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
+ goto out_free_pb;
+
+- if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
+- "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
++ if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt",
++ "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus))
+ goto out_close;
+
+ for (i = 0; i < nr_cpus; i++) {
+diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+index 3a469099f30d8..e09c5239a5951 100644
+--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
++++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+@@ -29,7 +29,23 @@ static int stop, duration;
+ static bool
+ configure_stack(void)
+ {
++ char tc_version[128];
+ char tc_cmd[BUFSIZ];
++ char *prog;
++ FILE *tc;
++
++ /* Check whether tc is built with libbpf. */
++ tc = popen("tc -V", "r");
++ if (CHECK_FAIL(!tc))
++ return false;
++ if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
++ return false;
++ if (strstr(tc_version, ", libbpf "))
++ prog = "test_sk_assign_libbpf.o";
++ else
++ prog = "test_sk_assign.o";
++ if (CHECK_FAIL(pclose(tc)))
++ return false;
+
+ /* Move to a new networking namespace */
+ if (CHECK_FAIL(unshare(CLONE_NEWNET)))
+@@ -46,8 +62,8 @@ configure_stack(void)
+ /* Load qdisc, BPF program */
+ if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
+ return false;
+- sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
+- "direct-action object-file ./test_sk_assign.o",
++ sprintf(tc_cmd, "%s %s %s %s %s", "tc filter add dev lo ingress bpf",
++ "direct-action object-file", prog,
+ "section classifier/sk_assign_test",
+ (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
+ if (CHECK(system(tc_cmd), "BPF load failed;",
+@@ -129,15 +145,12 @@ get_port(int fd)
+ static ssize_t
+ rcv_msg(int srv_client, int type)
+ {
+- struct sockaddr_storage ss;
+ char buf[BUFSIZ];
+- socklen_t slen;
+
+ if (type == SOCK_STREAM)
+ return read(srv_client, &buf, sizeof(buf));
+ else
+- return recvfrom(srv_client, &buf, sizeof(buf), 0,
+- (struct sockaddr *)&ss, &slen);
++ return recvfrom(srv_client, &buf, sizeof(buf), 0, NULL, NULL);
+ }
+
+ static int
+diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+index aee41547e7f45..6db07401bc493 100644
+--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
++++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+@@ -598,7 +598,7 @@ close:
+
+ static void run_lookup_prog(const struct test *t)
+ {
+- int server_fds[MAX_SERVERS] = { -1 };
++ int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
+ int client_fd, reuse_conn_fd = -1;
+ struct bpf_link *lookup_link;
+ int i, err;
+@@ -1053,7 +1053,7 @@ static void run_sk_assign(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog,
+ const char *remote_ip, const char *local_ip)
+ {
+- int server_fds[MAX_SERVERS] = { -1 };
++ int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
+ struct bpf_sk_lookup ctx;
+ __u64 server_cookie;
+ int i, err;
+diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+index fafeddaad6a99..23915be6172d6 100644
+--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
++++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+@@ -105,4 +105,6 @@ void test_skb_ctx(void)
+ "ctx_out_mark",
+ "skb->mark == %u, expected %d\n",
+ skb.mark, 10);
++
++ bpf_object__close(obj);
+ }
+diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+index 577d619fb07ed..197ec1d1b7026 100644
+--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
++++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+@@ -1,9 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright (c) 2019 Facebook */
+
++#define _GNU_SOURCE
+ #include <netinet/in.h>
+ #include <arpa/inet.h>
+ #include <unistd.h>
++#include <sched.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <errno.h>
+@@ -21,6 +23,7 @@
+ enum bpf_linum_array_idx {
+ EGRESS_LINUM_IDX,
+ INGRESS_LINUM_IDX,
++ READ_SK_DST_PORT_LINUM_IDX,
+ __NR_BPF_LINUM_ARRAY_IDX,
+ };
+
+@@ -43,8 +46,16 @@ static __u64 child_cg_id;
+ static int linum_map_fd;
+ static __u32 duration;
+
+-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
+-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
++static bool create_netns(void)
++{
++ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
++ return false;
++
++ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
++ return false;
++
++ return true;
++}
+
+ static void print_sk(const struct bpf_sock *sk, const char *prefix)
+ {
+@@ -92,19 +103,24 @@ static void check_result(void)
+ {
+ struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+ struct bpf_sock srv_sk, cli_sk, listen_sk;
+- __u32 ingress_linum, egress_linum;
++ __u32 idx, ingress_linum, egress_linum, linum;
+ int err;
+
+- err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
+- &egress_linum);
++ idx = EGRESS_LINUM_IDX;
++ err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
+- err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
+- &ingress_linum);
++ idx = INGRESS_LINUM_IDX;
++ err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
++ idx = READ_SK_DST_PORT_LINUM_IDX;
++ err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
++ ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
++ ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
++
+ memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
+ memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
+ memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
+@@ -263,7 +279,7 @@ static void test(void)
+ char buf[DATA_LEN];
+
+ /* Prepare listen_fd */
+- listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
++ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
+ /* start_server() has logged the error details */
+ if (CHECK_FAIL(listen_fd == -1))
+ goto done;
+@@ -331,8 +347,12 @@ done:
+
+ void test_sock_fields(void)
+ {
+- struct bpf_link *egress_link = NULL, *ingress_link = NULL;
+ int parent_cg_fd = -1, child_cg_fd = -1;
++ struct bpf_link *link;
++
++ /* Use a dedicated netns to have a fixed listen port */
++ if (!create_netns())
++ return;
+
+ /* Create a cgroup, get fd, and join it */
+ parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
+@@ -353,15 +373,20 @@ void test_sock_fields(void)
+ if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
+ goto done;
+
+- egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
+- child_cg_fd);
+- if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
++ link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
++ if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
++ goto done;
++ skel->links.egress_read_sock_fields = link;
++
++ link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
++ if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
+ goto done;
++ skel->links.ingress_read_sock_fields = link;
+
+- ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
+- child_cg_fd);
+- if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
++ link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
++ if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
+ goto done;
++ skel->links.read_sk_dst_port = link;
+
+ linum_map_fd = bpf_map__fd(skel->maps.linum_map);
+ sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
+@@ -370,8 +395,7 @@ void test_sock_fields(void)
+ test();
+
+ done:
+- bpf_link__destroy(egress_link);
+- bpf_link__destroy(ingress_link);
++ test_sock_fields__detach(skel);
+ test_sock_fields__destroy(skel);
+ if (child_cg_fd >= 0)
+ close(child_cg_fd);
+diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+index 4b937e5dbacae..f3cd8db26bf7e 100644
+--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
++++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+@@ -3,6 +3,7 @@
+ #include "cgroup_helpers.h"
+
+ #include <linux/tcp.h>
++#include <linux/netlink.h>
+ #include "sockopt_sk.skel.h"
+
+ #ifndef SOL_TCP
+@@ -183,6 +184,33 @@ static int getsetsockopt(void)
+ goto err;
+ }
+
++ /* optval=NULL case is handled correctly */
++
++ close(fd);
++ fd = socket(AF_NETLINK, SOCK_RAW, 0);
++ if (fd < 0) {
++ log_err("Failed to create AF_NETLINK socket");
++ return -1;
++ }
++
++ buf.u32 = 1;
++ optlen = sizeof(__u32);
++ err = setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &buf, optlen);
++ if (err) {
++ log_err("Unexpected getsockopt(NETLINK_ADD_MEMBERSHIP) err=%d errno=%d",
++ err, errno);
++ goto err;
++ }
++
++ optlen = 0;
++ err = getsockopt(fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &optlen);
++ if (err) {
++ log_err("Unexpected getsockopt(NETLINK_LIST_MEMBERSHIPS) err=%d errno=%d",
++ err, errno);
++ goto err;
++ }
++ ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value");
++
+ free(big_buf);
+ close(fd);
+ return 0;
+diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+index e7201ba29ccd6..47e3159729d21 100644
+--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
++++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+@@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd)
+ if (!ASSERT_OK(err, "unshare"))
+ return err;
+
++ /* Make our /sys mount private, so the following umount won't
++ * trigger the global umount in case it's shared.
++ */
++ err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
++ if (!ASSERT_OK(err, "remount private /sys"))
++ return err;
++
+ err = umount2("/sys", MNT_DETACH);
+ if (!ASSERT_OK(err, "umount2 /sys"))
+ return err;
+diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
+index 0252f61d611a9..97d8a6f84f4ab 100644
+--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
++++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
+@@ -43,7 +43,7 @@ static int process_sample(void *ctx, void *data, size_t len)
+ void test_test_ima(void)
+ {
+ char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
+- struct ring_buffer *ringbuf;
++ struct ring_buffer *ringbuf = NULL;
+ const char *measured_dir;
+ char cmd[256];
+
+@@ -85,5 +85,6 @@ close_clean:
+ err = system(cmd);
+ CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
+ close_prog:
++ ring_buffer__free(ringbuf);
+ ima__destroy(skel);
+ }
+diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+index 8f44767a75fa5..22a7cd8fd9acf 100644
+--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+@@ -53,7 +53,7 @@ struct bitfields_only_mixed_types {
+ */
+ /* ------ END-EXPECTED-OUTPUT ------ */
+ struct bitfield_mixed_with_others {
+- long: 4; /* char is enough as a backing field */
++ char: 4; /* char is enough as a backing field */
+ int a: 4;
+ /* 8-bit implicit padding */
+ short b; /* combined with previous bitfield */
+diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+index 1cef3bec1dc7f..22dbd12134347 100644
+--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+@@ -58,7 +58,81 @@ union jump_code_union {
+ } __attribute__((packed));
+ };
+
+-/*------ END-EXPECTED-OUTPUT ------ */
++/* ----- START-EXPECTED-OUTPUT ----- */
++/*
++ *struct nested_packed_but_aligned_struct {
++ * int x1;
++ * int x2;
++ *};
++ *
++ *struct outer_implicitly_packed_struct {
++ * char y1;
++ * struct nested_packed_but_aligned_struct y2;
++ *} __attribute__((packed));
++ *
++ */
++/* ------ END-EXPECTED-OUTPUT ------ */
++
++struct nested_packed_but_aligned_struct {
++ int x1;
++ int x2;
++} __attribute__((packed));
++
++struct outer_implicitly_packed_struct {
++ char y1;
++ struct nested_packed_but_aligned_struct y2;
++};
++/* ----- START-EXPECTED-OUTPUT ----- */
++/*
++ *struct usb_ss_ep_comp_descriptor {
++ * char: 8;
++ * char bDescriptorType;
++ * char bMaxBurst;
++ * short wBytesPerInterval;
++ *};
++ *
++ *struct usb_host_endpoint {
++ * long: 64;
++ * char: 8;
++ * struct usb_ss_ep_comp_descriptor ss_ep_comp;
++ * long: 0;
++ *} __attribute__((packed));
++ *
++ */
++/* ------ END-EXPECTED-OUTPUT ------ */
++
++struct usb_ss_ep_comp_descriptor {
++ char: 8;
++ char bDescriptorType;
++ char bMaxBurst;
++ int: 0;
++ short wBytesPerInterval;
++} __attribute__((packed));
++
++struct usb_host_endpoint {
++ long: 64;
++ char: 8;
++ struct usb_ss_ep_comp_descriptor ss_ep_comp;
++ long: 0;
++};
++
++/* ----- START-EXPECTED-OUTPUT ----- */
++struct nested_packed_struct {
++ int a;
++ char b;
++} __attribute__((packed));
++
++struct outer_nonpacked_struct {
++ short a;
++ struct nested_packed_struct b;
++};
++
++struct outer_packed_struct {
++ short a;
++ struct nested_packed_struct b;
++} __attribute__((packed));
++
++/* ------ END-EXPECTED-OUTPUT ------ */
+
+ int f(struct {
+ struct packed_trailing_space _1;
+@@ -69,6 +143,10 @@ int f(struct {
+ union union_is_never_packed _6;
+ union union_does_not_need_packing _7;
+ union jump_code_union _8;
++ struct outer_implicitly_packed_struct _9;
++ struct usb_host_endpoint _10;
++ struct outer_nonpacked_struct _11;
++ struct outer_packed_struct _12;
+ } *_)
+ {
+ return 0;
+diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+index 35c512818a56b..0b3cdffbfcf71 100644
+--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+@@ -19,7 +19,7 @@ struct padded_implicitly {
+ /*
+ *struct padded_explicitly {
+ * int a;
+- * int: 32;
++ * long: 0;
+ * int b;
+ *};
+ *
+@@ -28,41 +28,28 @@ struct padded_implicitly {
+
+ struct padded_explicitly {
+ int a;
+- int: 1; /* algo will explicitly pad with full 32 bits here */
++ int: 1; /* algo will emit aligning `long: 0;` here */
+ int b;
+ };
+
+ /* ----- START-EXPECTED-OUTPUT ----- */
+-/*
+- *struct padded_a_lot {
+- * int a;
+- * long: 32;
+- * long: 64;
+- * long: 64;
+- * int b;
+- *};
+- *
+- */
+-/* ------ END-EXPECTED-OUTPUT ------ */
+-
+ struct padded_a_lot {
+ int a;
+- /* 32 bit of implicit padding here, which algo will make explicit */
+ long: 64;
+ long: 64;
+ int b;
+ };
+
++/* ------ END-EXPECTED-OUTPUT ------ */
++
+ /* ----- START-EXPECTED-OUTPUT ----- */
+ /*
+ *struct padded_cache_line {
+ * int a;
+- * long: 32;
+ * long: 64;
+ * long: 64;
+ * long: 64;
+ * int b;
+- * long: 32;
+ * long: 64;
+ * long: 64;
+ * long: 64;
+@@ -85,7 +72,7 @@ struct padded_cache_line {
+ *struct zone {
+ * int a;
+ * short b;
+- * short: 16;
++ * long: 0;
+ * struct zone_padding __pad__;
+ *};
+ *
+@@ -102,12 +89,160 @@ struct zone {
+ struct zone_padding __pad__;
+ };
+
++/* ----- START-EXPECTED-OUTPUT ----- */
++struct padding_wo_named_members {
++ long: 64;
++ long: 64;
++};
++
++struct padding_weird_1 {
++ int a;
++ long: 64;
++ short: 16;
++ short b;
++};
++
++/* ------ END-EXPECTED-OUTPUT ------ */
++
++/* ----- START-EXPECTED-OUTPUT ----- */
++/*
++ *struct padding_weird_2 {
++ * long: 56;
++ * char a;
++ * long: 56;
++ * char b;
++ * char: 8;
++ *};
++ *
++ */
++/* ------ END-EXPECTED-OUTPUT ------ */
++struct padding_weird_2 {
++ int: 32; /* these paddings will be collapsed into `long: 56;` */
++ short: 16;
++ char: 8;
++ char a;
++ int: 32; /* these paddings will be collapsed into `long: 56;` */
++ short: 16;
++ char: 8;
++ char b;
++ char: 8;
++};
++
++/* ----- START-EXPECTED-OUTPUT ----- */
++struct exact_1byte {
++ char x;
++};
++
++struct padded_1byte {
++ char: 8;
++};
++
++struct exact_2bytes {
++ short x;
++};
++
++struct padded_2bytes {
++ short: 16;
++};
++
++struct exact_4bytes {
++ int x;
++};
++
++struct padded_4bytes {
++ int: 32;
++};
++
++struct exact_8bytes {
++ long x;
++};
++
++struct padded_8bytes {
++ long: 64;
++};
++
++struct ff_periodic_effect {
++ int: 32;
++ short magnitude;
++ long: 0;
++ short phase;
++ long: 0;
++ int: 32;
++ int custom_len;
++ short *custom_data;
++};
++
++struct ib_wc {
++ long: 64;
++ long: 64;
++ int: 32;
++ int byte_len;
++ void *qp;
++ union {} ex;
++ long: 64;
++ int slid;
++ int wc_flags;
++ long: 64;
++ char smac[6];
++ long: 0;
++ char network_hdr_type;
++};
++
++struct acpi_object_method {
++ long: 64;
++ char: 8;
++ char type;
++ short reference_count;
++ char flags;
++ short: 0;
++ char: 8;
++ char sync_level;
++ long: 64;
++ void *node;
++ void *aml_start;
++ union {} dispatch;
++ long: 64;
++ int aml_length;
++};
++
++struct nested_unpacked {
++ int x;
++};
++
++struct nested_packed {
++ struct nested_unpacked a;
++ char c;
++} __attribute__((packed));
++
++struct outer_mixed_but_unpacked {
++ struct nested_packed b1;
++ short a1;
++ struct nested_packed b2;
++};
++
++/* ------ END-EXPECTED-OUTPUT ------ */
++
+ int f(struct {
+ struct padded_implicitly _1;
+ struct padded_explicitly _2;
+ struct padded_a_lot _3;
+ struct padded_cache_line _4;
+ struct zone _5;
++ struct padding_wo_named_members _6;
++ struct padding_weird_1 _7;
++ struct padding_weird_2 _8;
++ struct exact_1byte _100;
++ struct padded_1byte _101;
++ struct exact_2bytes _102;
++ struct padded_2bytes _103;
++ struct exact_4bytes _104;
++ struct padded_4bytes _105;
++ struct exact_8bytes _106;
++ struct padded_8bytes _107;
++ struct ff_periodic_effect _200;
++ struct ib_wc _201;
++ struct acpi_object_method _202;
++ struct outer_mixed_but_unpacked _203;
+ } *_)
+ {
+ return 0;
+diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+index 8aaa24a003220..970598dda7322 100644
+--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+@@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int);
+
+ typedef char * (*fn_ptr_arr1_t[10])(int **);
+
+-typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
++typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+
+ struct struct_w_typedefs {
+ int_t a;
+diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
+index a943d394fd3a0..38ab1ce32e57c 100644
+--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
++++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
+@@ -33,7 +33,7 @@
+
+ int _version SEC("version") = 1;
+
+-__attribute__ ((noinline))
++__attribute__ ((noinline)) __weak
+ int do_bind(struct bpf_sock_addr *ctx)
+ {
+ struct sockaddr_in sa = {};
+diff --git a/tools/testing/selftests/bpf/progs/freplace_global_func.c b/tools/testing/selftests/bpf/progs/freplace_global_func.c
+new file mode 100644
+index 0000000000000..96cb61a6ce87a
+--- /dev/null
++++ b/tools/testing/selftests/bpf/progs/freplace_global_func.c
+@@ -0,0 +1,18 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <linux/bpf.h>
++#include <bpf/bpf_helpers.h>
++
++__noinline
++int test_ctx_global_func(struct __sk_buff *skb)
++{
++ volatile int retval = 1;
++ return retval;
++}
++
++SEC("freplace/test_pkt_access")
++int new_test_pkt_access(struct __sk_buff *skb)
++{
++ return test_ctx_global_func(skb);
++}
++
++char _license[] SEC("license") = "GPL";
+diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
+index 4896fdf816f73..92331053dba3b 100644
+--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
++++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
+@@ -826,8 +826,9 @@ out:
+
+ SEC("kprobe/vfs_link")
+ int BPF_KPROBE(kprobe__vfs_link,
+- struct dentry* old_dentry, struct inode* dir,
+- struct dentry* new_dentry, struct inode** delegated_inode)
++ struct dentry* old_dentry, struct user_namespace *mnt_userns,
++ struct inode* dir, struct dentry* new_dentry,
++ struct inode** delegated_inode)
+ {
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
+diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
+index 79c8139b63b80..9cf72ae132020 100644
+--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
++++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
+@@ -32,6 +32,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
+ __u8 *optval_end = ctx->optval_end;
+ __u8 *optval = ctx->optval;
+ struct sockopt_sk *storage;
++ struct bpf_sock *sk;
++
++ /* Bypass AF_NETLINK. */
++ sk = ctx->sk;
++ if (sk && sk->family == AF_NETLINK)
++ return 1;
+
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+@@ -130,6 +136,12 @@ int _setsockopt(struct bpf_sockopt *ctx)
+ __u8 *optval_end = ctx->optval_end;
+ __u8 *optval = ctx->optval;
+ struct sockopt_sk *storage;
++ struct bpf_sock *sk;
++
++ /* Bypass AF_NETLINK. */
++ sk = ctx->sk;
++ if (sk && sk->family == AF_NETLINK)
++ return 1;
+
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
+index 7de534f38c3f1..60c93aee2f4ad 100644
+--- a/tools/testing/selftests/bpf/progs/strobemeta.h
++++ b/tools/testing/selftests/bpf/progs/strobemeta.h
+@@ -358,7 +358,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
+ void *payload)
+ {
+ void *location;
+- uint32_t len;
++ uint64_t len;
+
+ data->str_lens[idx] = 0;
+ location = calc_location(&cfg->str_locs[idx], tls_base);
+@@ -390,7 +390,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
+ struct strobe_map_descr* descr = &data->map_descrs[idx];
+ struct strobe_map_raw map;
+ void *location;
+- uint32_t len;
++ uint64_t len;
+ int i;
+
+ descr->tag_len = 0; /* presume no tag is set */
+diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
+index 76eab0aacba0c..233b089d1fbac 100644
+--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h
++++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
+@@ -12,6 +12,15 @@
+ #include <linux/ipv6.h>
+ #include <linux/udp.h>
+
++/* offsetof() is used in static asserts, and the libbpf-redefined CO-RE
++ * friendly version breaks compilation for older clang versions <= 15
++ * when invoked in a static assert. Restore original here.
++ */
++#ifdef offsetof
++#undef offsetof
++#define offsetof(type, member) __builtin_offsetof(type, member)
++#endif
++
+ struct gre_base_hdr {
+ uint16_t flags;
+ uint16_t protocol;
+diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
+new file mode 100644
+index 0000000000000..2180c41cd890f
+--- /dev/null
++++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
+@@ -0,0 +1,29 @@
++// SPDX-License-Identifier: GPL-2.0
++/* Copyright (c) 2021 Google */
++
++#include "vmlinux.h"
++
++#include <bpf/bpf_helpers.h>
++
++extern const int bpf_prog_active __ksym; /* int type global var. */
++
++SEC("raw_tp/sys_enter")
++int handler(const void *ctx)
++{
++ int *active;
++ __u32 cpu;
++
++ cpu = bpf_get_smp_processor_id();
++ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
++ if (active) {
++ /* Kernel memory obtained from bpf_{per,this}_cpu_ptr
++ * is read-only, should _not_ pass verification.
++ */
++ /* WRITE_ONCE */
++ *(volatile int *)active = -1;
++ }
++
++ return 0;
++}
++
++char _license[] SEC("license") = "GPL";
+diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
+index 1cfeb940cf9fb..5f0e0bfc151e5 100644
+--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
++++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
+@@ -23,7 +23,7 @@ struct {
+ __uint(value_size, sizeof(__u32));
+ } mim_hash SEC(".maps");
+
+-SEC("xdp_mimtest")
++SEC("xdp")
+ int xdp_mimtest0(struct xdp_md *ctx)
+ {
+ int value = 123;
+diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
+index 1ecd987005d2c..77fd42f835fcf 100644
+--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
++++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
+@@ -16,6 +16,16 @@
+ #include <bpf/bpf_helpers.h>
+ #include <bpf/bpf_endian.h>
+
++#if defined(IPROUTE2_HAVE_LIBBPF)
++/* Use a new-style map definition. */
++struct {
++ __uint(type, BPF_MAP_TYPE_SOCKMAP);
++ __type(key, int);
++ __type(value, __u64);
++ __uint(pinning, LIBBPF_PIN_BY_NAME);
++ __uint(max_entries, 1);
++} server_map SEC(".maps");
++#else
+ /* Pin map under /sys/fs/bpf/tc/globals/<map name> */
+ #define PIN_GLOBAL_NS 2
+
+@@ -35,6 +45,7 @@ struct {
+ .max_elem = 1,
+ .pinning = PIN_GLOBAL_NS,
+ };
++#endif
+
+ int _version SEC("version") = 1;
+ char _license[] SEC("license") = "GPL";
+diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
+new file mode 100644
+index 0000000000000..dcf46adfda041
+--- /dev/null
++++ b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
+@@ -0,0 +1,3 @@
++// SPDX-License-Identifier: GPL-2.0
++#define IPROUTE2_HAVE_LIBBPF
++#include "test_sk_assign.c"
+diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+index ac6f7f205e25d..cb0aa46b20d19 100644
+--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
++++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+@@ -404,8 +404,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
+
+ /* Narrow loads from remote_port field. Expect SRC_PORT. */
+ if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
+- LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
+- LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
++ LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->remote_port, 0) != SRC_PORT)
+ return SK_DROP;
+diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
+index 81b57b9aaaeae..43b31aa1fcf72 100644
+--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
++++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
+@@ -12,6 +12,7 @@
+ enum bpf_linum_array_idx {
+ EGRESS_LINUM_IDX,
+ INGRESS_LINUM_IDX,
++ READ_SK_DST_PORT_LINUM_IDX,
+ __NR_BPF_LINUM_ARRAY_IDX,
+ };
+
+@@ -113,7 +114,7 @@ static void tpcpy(struct bpf_tcp_sock *dst,
+
+ #define RET_LOG() ({ \
+ linum = __LINE__; \
+- bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
++ bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \
+ return CG_OK; \
+ })
+
+@@ -250,4 +251,48 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
+ return CG_OK;
+ }
+
++static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
++{
++ __u32 *word = (__u32 *)&sk->dst_port;
++ return word[0] == bpf_htonl(0xcafe0000);
++}
++
++static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
++{
++ __u16 *half = (__u16 *)&sk->dst_port;
++ return half[0] == bpf_htons(0xcafe);
++}
++
++static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
++{
++ __u8 *byte = (__u8 *)&sk->dst_port;
++ return byte[0] == 0xca && byte[1] == 0xfe;
++}
++
++SEC("cgroup_skb/egress")
++int read_sk_dst_port(struct __sk_buff *skb)
++{
++ __u32 linum, linum_idx;
++ struct bpf_sock *sk;
++
++ linum_idx = READ_SK_DST_PORT_LINUM_IDX;
++
++ sk = skb->sk;
++ if (!sk)
++ RET_LOG();
++
++ /* Ignore everything but the SYN from the client socket */
++ if (sk->state != BPF_TCP_SYN_SENT)
++ return CG_OK;
++
++ if (!sk_dst_port__load_word(sk))
++ RET_LOG();
++ if (!sk_dst_port__load_half(sk))
++ RET_LOG();
++ if (!sk_dst_port__load_byte(sk))
++ RET_LOG();
++
++ return CG_OK;
++}
++
+ char _license[] SEC("license") = "GPL";
+diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+index 1858435de7aaf..5cb90ca292186 100644
+--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
++++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+@@ -235,7 +235,7 @@ SEC("sk_msg1")
+ int bpf_prog4(struct sk_msg_md *msg)
+ {
+ int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+- int *start, *end, *start_push, *end_push, *start_pop, *pop;
++ int *start, *end, *start_push, *end_push, *start_pop, *pop, err = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+@@ -249,8 +249,11 @@ int bpf_prog4(struct sk_msg_md *msg)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+- if (start_push && end_push)
+- bpf_msg_push_data(msg, *start_push, *end_push, 0);
++ if (start_push && end_push) {
++ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
++ if (err)
++ return SK_DROP;
++ }
+ start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+ pop = bpf_map_lookup_elem(&sock_bytes, &five);
+ if (start_pop && pop)
+@@ -263,6 +266,7 @@ int bpf_prog6(struct sk_msg_md *msg)
+ {
+ int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
+ int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
++ int err = 0;
+ __u64 flags = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+@@ -279,8 +283,11 @@ int bpf_prog6(struct sk_msg_md *msg)
+
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+- if (start_push && end_push)
+- bpf_msg_push_data(msg, *start_push, *end_push, 0);
++ if (start_push && end_push) {
++ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
++ if (err)
++ return SK_DROP;
++ }
+
+ start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+ pop = bpf_map_lookup_elem(&sock_bytes, &five);
+@@ -338,7 +345,7 @@ SEC("sk_msg5")
+ int bpf_prog10(struct sk_msg_md *msg)
+ {
+ int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
+- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
++ int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, err = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+@@ -352,8 +359,11 @@ int bpf_prog10(struct sk_msg_md *msg)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+ end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+- if (start_push && end_push)
+- bpf_msg_push_data(msg, *start_push, *end_push, 0);
++ if (start_push && end_push) {
++ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
++ if (err)
++ return SK_PASS;
++ }
+ start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+ pop = bpf_map_lookup_elem(&sock_bytes, &five);
+ if (start_pop && pop)
+diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+index 0cf0134631b4b..58fbe22a3bf85 100644
+--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
++++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+@@ -39,16 +39,8 @@ struct {
+ __type(value, stack_trace_t);
+ } stack_amap SEC(".maps");
+
+-/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+-struct random_urandom_args {
+- unsigned long long pad;
+- int got_bits;
+- int pool_left;
+- int input_left;
+-};
+-
+-SEC("tracepoint/random/urandom_read")
+-int oncpu(struct random_urandom_args *args)
++SEC("kprobe/urandom_read_iter")
++int oncpu(struct pt_regs *args)
+ {
+ __u32 max_len = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH;
+diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+index 47cbe2eeae431..fac7ef99f9a67 100644
+--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
++++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+@@ -156,7 +156,7 @@ int check_syncookie_clsact(struct __sk_buff *skb)
+ return TC_ACT_OK;
+ }
+
+-SEC("xdp/check_syncookie")
++SEC("xdp")
+ int check_syncookie_xdp(struct xdp_md *ctx)
+ {
+ check_syncookie(ctx, (void *)(long)ctx->data,
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
+index 31f9bce37491a..e6aa2fc6ce6bd 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp.c
+@@ -210,7 +210,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
+ return XDP_TX;
+ }
+
+-SEC("xdp_tx_iptunnel")
++SEC("xdp")
+ int _xdp_tx_iptunnel(struct xdp_md *xdp)
+ {
+ void *data_end = (void *)(long)xdp->data_end;
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+index 3d66599eee2ec..199c61b7d0628 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+@@ -2,7 +2,7 @@
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+-SEC("xdp_adjust_tail_grow")
++SEC("xdp")
+ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
+ {
+ void *data_end = (void *)(long)xdp->data_end;
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+index 22065a9cfb254..b7448253d1359 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+@@ -9,9 +9,7 @@
+ #include <linux/if_ether.h>
+ #include <bpf/bpf_helpers.h>
+
+-int _version SEC("version") = 1;
+-
+-SEC("xdp_adjust_tail_shrink")
++SEC("xdp")
+ int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
+ {
+ void *data_end = (void *)(long)xdp->data_end;
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
+index b360ba2bd4411..807bf895f42ca 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
+@@ -5,7 +5,7 @@
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+-SEC("xdp_dm_log")
++SEC("xdp")
+ int xdpdm_devlog(struct xdp_md *ctx)
+ {
+ char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
+index eb93ea95d1d8f..ee7d6ac0f6151 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
+@@ -5,7 +5,7 @@
+
+ char LICENSE[] SEC("license") = "GPL";
+
+-SEC("xdp/handler")
++SEC("xdp")
+ int xdp_handler(struct xdp_md *xdp)
+ {
+ return 0;
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+index fcabcda30ba32..27eb52dda92c2 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+@@ -206,7 +206,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
+ return XDP_TX;
+ }
+
+-SEC("xdp_tx_iptunnel")
++SEC("xdp")
+ int _xdp_tx_iptunnel(struct xdp_md *xdp)
+ {
+ void *data_end = (void *)(long)xdp->data_end;
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+index 3a67921f62b52..596c4e71bf3ac 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+@@ -797,7 +797,7 @@ out:
+ return XDP_DROP;
+ }
+
+-SEC("xdp-test-v4")
++SEC("xdp")
+ int balancer_ingress_v4(struct xdp_md *ctx)
+ {
+ void *data = (void *)(long)ctx->data;
+@@ -816,7 +816,7 @@ int balancer_ingress_v4(struct xdp_md *ctx)
+ return XDP_DROP;
+ }
+
+-SEC("xdp-test-v6")
++SEC("xdp")
+ int balancer_ingress_v6(struct xdp_md *ctx)
+ {
+ void *data = (void *)(long)ctx->data;
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+index 59ee4f182ff80..532025057711e 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+@@ -12,13 +12,13 @@ struct {
+ __uint(max_entries, 4);
+ } cpu_map SEC(".maps");
+
+-SEC("xdp_redir")
++SEC("xdp")
+ int xdp_redir_prog(struct xdp_md *ctx)
+ {
+ return bpf_redirect_map(&cpu_map, 1, 0);
+ }
+
+-SEC("xdp_dummy")
++SEC("xdp")
+ int xdp_dummy_prog(struct xdp_md *ctx)
+ {
+ return XDP_PASS;
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+index 0ac0864977222..1e6b9c38ea6d9 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+@@ -9,7 +9,7 @@ struct {
+ __uint(max_entries, 4);
+ } dm_ports SEC(".maps");
+
+-SEC("xdp_redir")
++SEC("xdp")
+ int xdp_redir_prog(struct xdp_md *ctx)
+ {
+ return bpf_redirect_map(&dm_ports, 1, 0);
+@@ -18,7 +18,7 @@ int xdp_redir_prog(struct xdp_md *ctx)
+ /* invalid program on DEVMAP entry;
+ * SEC name means expected attach type not set
+ */
+-SEC("xdp_dummy")
++SEC("xdp")
+ int xdp_dummy_prog(struct xdp_md *ctx)
+ {
+ return XDP_PASS;
+diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c
+index ea25e88819928..d988b2e0cee84 100644
+--- a/tools/testing/selftests/bpf/progs/xdp_dummy.c
++++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c
+@@ -4,7 +4,7 @@
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+-SEC("xdp_dummy")
++SEC("xdp")
+ int xdp_dummy_prog(struct xdp_md *ctx)
+ {
+ return XDP_PASS;
+diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+index 880debcbcd65d..8395782b6e0a3 100644
+--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
++++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+@@ -34,7 +34,7 @@ struct {
+ __uint(max_entries, 128);
+ } mac_map SEC(".maps");
+
+-SEC("xdp_redirect_map_multi")
++SEC("xdp")
+ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+ {
+ void *data_end = (void *)(long)ctx->data_end;
+@@ -63,7 +63,7 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+ }
+
+ /* The following 2 progs are for 2nd devmap prog testing */
+-SEC("xdp_redirect_map_ingress")
++SEC("xdp")
+ int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+ {
+ return bpf_redirect_map(&map_egress, 0,
+diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
+index 6b9ca40bd1f4f..4ad73847b8a5d 100644
+--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
++++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
+@@ -86,7 +86,7 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type)
+ return XDP_TX;
+ }
+
+-SEC("xdpclient")
++SEC("xdp")
+ int xdping_client(struct xdp_md *ctx)
+ {
+ void *data_end = (void *)(long)ctx->data_end;
+@@ -150,7 +150,7 @@ int xdping_client(struct xdp_md *ctx)
+ return XDP_TX;
+ }
+
+-SEC("xdpserver")
++SEC("xdp")
+ int xdping_server(struct xdp_md *ctx)
+ {
+ void *data_end = (void *)(long)ctx->data_end;
+diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+index be54b7335a76e..5717db4e08621 100755
+--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
++++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+@@ -172,7 +172,7 @@ class FileExtractor(object):
+ @enum_name: name of the enum to parse
+ """
+ start_marker = re.compile(f'enum {enum_name} {{\n')
+- pattern = re.compile('^\s*(BPF_\w+),?$')
++ pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
+ end_marker = re.compile('^};')
+ parser = BlockParser(self.reader)
+ parser.search_block(start_marker)
+diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
+index ec4e15948e406..5252b91f48a18 100755
+--- a/tools/testing/selftests/bpf/test_lirc_mode2.sh
++++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
+@@ -3,6 +3,7 @@
+
+ # Kselftest framework requirement - SKIP code is 4.
+ ksft_skip=4
++ret=$ksft_skip
+
+ msg="skip all tests:"
+ if [ $UID != 0 ]; then
+@@ -25,7 +26,7 @@ do
+ fi
+ done
+
+-if [ -n $LIRCDEV ];
++if [ -n "$LIRCDEV" ];
+ then
+ TYPE=lirc_mode2
+ ./test_lirc_mode2_user $LIRCDEV $INPUTDEV
+@@ -36,3 +37,5 @@ then
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+ fi
+ fi
++
++exit $ret
+diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+index b497bb85b667f..6c69c42b1d607 100755
+--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
++++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+@@ -120,6 +120,14 @@ setup()
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
++ # disable IPv6 DAD because it sometimes takes too long and fails tests
++ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
++ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
++ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
++ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
++ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
++ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
++
+ ip link add veth1 type veth peer name veth2
+ ip link add veth3 type veth peer name veth4
+ ip link add veth5 type veth peer name veth6
+@@ -289,7 +297,7 @@ test_ping()
+ ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
+ RET=$?
+ elif [ "${PROTO}" == "IPv6" ] ; then
+- ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
++ ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
+ RET=$?
+ else
+ echo " test_ping: unknown PROTO: ${PROTO}"
+diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
+index cc1cd240445d2..e3fea6f281e4b 100644
+--- a/tools/testing/selftests/bpf/test_progs.c
++++ b/tools/testing/selftests/bpf/test_progs.c
+@@ -370,7 +370,7 @@ int extract_build_id(char *build_id, size_t size)
+
+ if (getline(&line, &len, fp) == -1)
+ goto err;
+- fclose(fp);
++ pclose(fp);
+
+ if (len > size)
+ len = size;
+@@ -379,7 +379,7 @@ int extract_build_id(char *build_id, size_t size)
+ free(line);
+ return 0;
+ err:
+- fclose(fp);
++ pclose(fp);
+ return -1;
+ }
+
+diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+index 9b3617d770a52..fed765157c53c 100755
+--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
++++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+@@ -77,7 +77,7 @@ TEST_IF=lo
+ MAX_PING_TRIES=5
+ BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
+ CLSACT_SECTION="clsact/check_syncookie"
+-XDP_SECTION="xdp/check_syncookie"
++XDP_SECTION="xdp"
+ BPF_PROG_ID=0
+ PROG="${DIR}/test_tcp_check_syncookie_user"
+
+diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
+index 3a9e332c5e360..68a9a897185cd 100644
+--- a/tools/testing/selftests/bpf/test_verifier.c
++++ b/tools/testing/selftests/bpf/test_verifier.c
+@@ -31,6 +31,7 @@
+ #include <linux/if_ether.h>
+ #include <linux/btf.h>
+
++#include <bpf/btf.h>
+ #include <bpf/bpf.h>
+ #include <bpf/libbpf.h>
+
+@@ -63,6 +64,11 @@ static bool unpriv_disabled = false;
+ static int skips;
+ static bool verbose = false;
+
++struct kfunc_btf_id_pair {
++ const char *kfunc;
++ int insn_idx;
++};
++
+ struct bpf_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+@@ -88,6 +94,7 @@ struct bpf_test {
+ int fixup_map_event_output[MAX_FIXUPS];
+ int fixup_map_reuseport_array[MAX_FIXUPS];
+ int fixup_map_ringbuf[MAX_FIXUPS];
++ struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
+ /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
+ * Can be a tab-separated sequence of expected strings. An empty string
+ * means no log verification.
+@@ -718,6 +725,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
+ int *fixup_map_event_output = test->fixup_map_event_output;
+ int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
+ int *fixup_map_ringbuf = test->fixup_map_ringbuf;
++ struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
+
+ if (test->fill_helper) {
+ test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
+@@ -903,6 +911,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
+ fixup_map_ringbuf++;
+ } while (*fixup_map_ringbuf);
+ }
++
++ /* Patch in kfunc BTF IDs */
++ if (fixup_kfunc_btf_id->kfunc) {
++ struct btf *btf;
++ int btf_id;
++
++ do {
++ btf_id = 0;
++ btf = btf__load_vmlinux_btf();
++ if (btf) {
++ btf_id = btf__find_by_name_kind(btf,
++ fixup_kfunc_btf_id->kfunc,
++ BTF_KIND_FUNC);
++ btf_id = btf_id < 0 ? 0 : btf_id;
++ }
++ btf__free(btf);
++ prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
++ fixup_kfunc_btf_id++;
++ } while (fixup_kfunc_btf_id->kfunc);
++ }
+ }
+
+ struct libcap {
+diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
+index c033850886f44..57c8db9972a65 100755
+--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
++++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
+@@ -52,8 +52,8 @@ test_xdp_redirect()
+ return 0
+ fi
+
+- ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+- ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
++ ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
++ ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+ ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
+ ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
+
+diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+index 1538373157e3c..cc57cb87e65f6 100755
+--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
++++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+@@ -2,11 +2,11 @@
+ # SPDX-License-Identifier: GPL-2.0
+ #
+ # Test topology:
+-# - - - - - - - - - - - - - - - - - - - - - - - - -
+-# | veth1 veth2 veth3 | ... init net
++# - - - - - - - - - - - - - - - - - - -
++# | veth1 veth2 veth3 | ns0
+ # - -| - - - - - - | - - - - - - | - -
+ # --------- --------- ---------
+-# | veth0 | | veth0 | | veth0 | ...
++# | veth0 | | veth0 | | veth0 |
+ # --------- --------- ---------
+ # ns1 ns2 ns3
+ #
+@@ -31,6 +31,12 @@ IFACES=""
+ DRV_MODE="xdpgeneric xdpdrv xdpegress"
+ PASS=0
+ FAIL=0
++LOG_DIR=$(mktemp -d)
++declare -a NS
++NS[0]="ns0-$(mktemp -u XXXXXX)"
++NS[1]="ns1-$(mktemp -u XXXXXX)"
++NS[2]="ns2-$(mktemp -u XXXXXX)"
++NS[3]="ns3-$(mktemp -u XXXXXX)"
+
+ test_pass()
+ {
+@@ -46,9 +52,8 @@ test_fail()
+
+ clean_up()
+ {
+- for i in $(seq $NUM); do
+- ip link del veth$i 2> /dev/null
+- ip netns del ns$i 2> /dev/null
++ for i in $(seq 0 $NUM); do
++ ip netns del ${NS[$i]} 2> /dev/null
+ done
+ }
+
+@@ -77,21 +82,22 @@ setup_ns()
+ mode="xdpdrv"
+ fi
+
++ ip netns add ${NS[0]}
+ for i in $(seq $NUM); do
+- ip netns add ns$i
+- ip link add veth$i type veth peer name veth0 netns ns$i
+- ip link set veth$i up
+- ip -n ns$i link set veth0 up
++ ip netns add ${NS[$i]}
++ ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
++ ip -n ${NS[$i]} link set veth0 up
++ ip -n ${NS[0]} link set veth$i up
+
+- ip -n ns$i addr add 192.0.2.$i/24 dev veth0
+- ip -n ns$i addr add 2001:db8::$i/64 dev veth0
++ ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
++ ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
+ # Add a neigh entry for IPv4 ping test
+- ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+- ip -n ns$i link set veth0 $mode obj \
+- xdp_dummy.o sec xdp_dummy &> /dev/null || \
++ ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
++ ip -n ${NS[$i]} link set veth0 $mode obj \
++ xdp_dummy.o sec xdp &> /dev/null || \
+ { test_fail "Unable to load dummy xdp" && exit 1; }
+ IFACES="$IFACES veth$i"
+- veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
++ veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
+ done
+ }
+
+@@ -100,17 +106,17 @@ do_egress_tests()
+ local mode=$1
+
+ # mac test
+- ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log &
+- ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log &
++ ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
++ ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
+ sleep 0.5
+- ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
++ ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ sleep 0.5
+- pkill -9 tcpdump
++ pkill tcpdump
+
+ # mac check
+- grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \
++ grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \
+ test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+- grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \
++ grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \
+ test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+ }
+
+@@ -119,48 +125,48 @@ do_ping_tests()
+ local mode=$1
+
+ # ping6 test: echo request should be redirect back to itself, not others
+- ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
++ ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+- ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log &
+- ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log &
+- ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log &
++ ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
++ ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
++ ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
+ sleep 0.5
+ # ARP test
+- ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
++ ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
+ # IPv4 test
+- ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
++ ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ # IPv6 test
+- ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
++ ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ sleep 0.5
+- pkill -9 tcpdump
++ pkill tcpdump
+
+ # All netns should receive the redirect arp requests
+- [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \
++ [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-1"
+- [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \
++ [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-2"
+- [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \
++ [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+ # ns1 should not receive the redirect echo request, others should
+- [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \
++ [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+- [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \
++ [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+- [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \
++ [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+ # ns1 should receive the echo request, ns2 should not
+- [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \
++ [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-1" || \
+ test_fail "$mode IPv6 (no flags) ns1-1"
+- [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \
++ [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-2" || \
+ test_fail "$mode IPv6 (no flags) ns1-2"
+ }
+@@ -176,9 +182,13 @@ do_tests()
+ xdpgeneric) drv_p="-S";;
+ esac
+
+- ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log &
++ ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
+ xdp_pid=$!
+ sleep 1
++ if ! ps -p $xdp_pid > /dev/null; then
++ test_fail "$mode xdp_redirect_multi start failed"
++ return 1
++ fi
+
+ if [ "$mode" = "xdpegress" ]; then
+ do_egress_tests $mode
+@@ -189,16 +199,16 @@ do_tests()
+ kill $xdp_pid
+ }
+
+-trap clean_up 0 2 3 6 9
+-
+ check_env
+-rm -f xdp_redirect_*.log ns*.log mac_ns*.log
++
++trap clean_up EXIT
+
+ for mode in ${DRV_MODE}; do
+ setup_ns $mode
+ do_tests $mode
+ clean_up
+ done
++rm -rf ${LOG_DIR}
+
+ echo "Summary: PASS $PASS, FAIL $FAIL"
+ [ $FAIL -eq 0 ] && exit 0 || exit 1
+diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
+index 995278e684b6e..a3a1eaee26ea6 100755
+--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
++++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
+@@ -107,9 +107,9 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
+ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
+ ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
+
+-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
++ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp
+ ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
+-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
++ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp
+
+ trap cleanup EXIT
+
+diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
+index cd7bf32e6a173..661995af75602 100755
+--- a/tools/testing/selftests/bpf/test_xsk.sh
++++ b/tools/testing/selftests/bpf/test_xsk.sh
+@@ -106,6 +106,7 @@ setup_vethPairs() {
+ ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
+ if [ -f /proc/net/if_inet6 ]; then
+ echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
++ echo 1 > /proc/sys/net/ipv6/conf/${VETH1}/disable_ipv6
+ fi
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH1}: namespace: ${NS1}"
+diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
+index 1b1c798e92489..1b138cd2b187d 100644
+--- a/tools/testing/selftests/bpf/verifier/array_access.c
++++ b/tools/testing/selftests/bpf/verifier/array_access.c
+@@ -186,7 +186,7 @@
+ },
+ .fixup_map_hash_48b = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+- .errstr = "R0 unbounded memory access",
++ .errstr = "invalid access to map value, value_size=48 off=44 size=8",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+index 6e52dfc644153..6fb52d8cfd889 100644
+--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
++++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+@@ -71,6 +71,8 @@
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
++ .result_unpriv = REJECT,
++ .errstr_unpriv = "R0 leaks addr into mem",
+ },
+ {
+ "Can't use cmpxchg on uninit src reg",
+@@ -118,4 +120,88 @@
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
++ .result_unpriv = REJECT,
++ .errstr_unpriv = "R0 leaks addr into mem",
++},
++{
++ "Dest pointer in r0 - succeed, check 2",
++ .insns = {
++ /* r0 = &val */
++ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
++ /* val = r0; */
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
++ /* r5 = &val */
++ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
++ /* r0 = atomic_cmpxchg(&val, r0, r5); */
++ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
++ /* r1 = *r0 */
++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
++ /* exit(0); */
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .result_unpriv = REJECT,
++ .errstr_unpriv = "R0 leaks addr into mem",
++},
++{
++ "Dest pointer in r0 - succeed, check 3",
++ .insns = {
++ /* r0 = &val */
++ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
++ /* val = r0; */
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
++ /* r5 = &val */
++ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
++ /* r0 = atomic_cmpxchg(&val, r0, r5); */
++ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
++ /* exit(0); */
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = REJECT,
++ .errstr = "invalid size of register fill",
++ .errstr_unpriv = "R0 leaks addr into mem",
++},
++{
++ "Dest pointer in r0 - succeed, check 4",
++ .insns = {
++ /* r0 = &val */
++ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
++ /* val = r0; */
++ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
++ /* r5 = &val */
++ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
++ /* r0 = atomic_cmpxchg(&val, r0, r5); */
++ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
++ /* r1 = *r10 */
++ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -8),
++ /* exit(0); */
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .result_unpriv = REJECT,
++ .errstr_unpriv = "R10 partial copy of pointer",
++},
++{
++ "Dest pointer in r0 - succeed, check 5",
++ .insns = {
++ /* r0 = &val */
++ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
++ /* val = r0; */
++ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
++ /* r5 = &val */
++ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
++ /* r0 = atomic_cmpxchg(&val, r0, r5); */
++ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
++ /* r1 = *r0 */
++ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -8),
++ /* exit(0); */
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = REJECT,
++ .errstr = "R0 invalid mem access",
++ .errstr_unpriv = "R10 partial copy of pointer",
+ },
+diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
+index c2aa6f26738b4..bf82b923c5fe5 100644
+--- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
++++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
+@@ -1,13 +1,14 @@
+ {
+ "bounds checks mixing signed and unsigned, positive bounds",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 2),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
+@@ -17,20 +18,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+@@ -40,20 +42,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 2",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+@@ -65,20 +68,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 3",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
+@@ -89,20 +93,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 4",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+@@ -112,19 +117,20 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 5",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+@@ -135,17 +141,20 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 6",
+ .insns = {
++ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_6, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
+@@ -163,13 +172,14 @@
+ {
+ "bounds checks mixing signed and unsigned, variant 7",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+@@ -179,19 +189,20 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 8",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+@@ -203,20 +214,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 9",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+@@ -228,19 +240,20 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 10",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+@@ -252,20 +265,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 11",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+@@ -278,20 +292,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 12",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+@@ -303,20 +318,21 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 13",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 2),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+@@ -331,7 +347,7 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+@@ -340,13 +356,14 @@
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_8, 2),
+@@ -360,20 +377,21 @@
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -7),
+ },
+- .fixup_map_hash_8b = { 4 },
++ .fixup_map_hash_8b = { 6 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 15",
+ .insns = {
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+@@ -387,7 +405,7 @@
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+- .fixup_map_hash_8b = { 3 },
++ .fixup_map_hash_8b = { 5 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
+index 336a749673d19..2e701e7f69680 100644
+--- a/tools/testing/selftests/bpf/verifier/calls.c
++++ b/tools/testing/selftests/bpf/verifier/calls.c
+@@ -107,6 +107,25 @@
+ .result = REJECT,
+ .errstr = "R0 min value is outside of the allowed memory range",
+ },
++{
++ "calls: trigger reg2btf_ids[reg->type] for reg->type > __BPF_REG_TYPE_MAX",
++ .insns = {
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
++ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
++ .result = REJECT,
++ .errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point",
++ .fixup_kfunc_btf_id = {
++ { "bpf_kfunc_call_test_acquire", 3 },
++ { "bpf_kfunc_call_test_release", 5 },
++ },
++},
+ {
+ "calls: overlapping caller/callee",
+ .insns = {
+diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
+index 3b6ee009c00b6..4a768b130d61c 100644
+--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
++++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
+@@ -905,3 +905,39 @@
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "unknown func",
+ },
++{
++ "reference tracking: try to leak released ptr reg",
++ .insns = {
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
++ BPF_LD_MAP_FD(BPF_REG_1, 0),
++ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
++ BPF_EXIT_INSN(),
++ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
++
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_LD_MAP_FD(BPF_REG_1, 0),
++ BPF_MOV64_IMM(BPF_REG_2, 8),
++ BPF_MOV64_IMM(BPF_REG_3, 0),
++ BPF_EMIT_CALL(BPF_FUNC_ringbuf_reserve),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
++ BPF_EXIT_INSN(),
++ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
++
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
++ BPF_MOV64_IMM(BPF_REG_2, 0),
++ BPF_EMIT_CALL(BPF_FUNC_ringbuf_discard),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++
++ BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_8, 0),
++ BPF_EXIT_INSN()
++ },
++ .fixup_map_array_48b = { 4 },
++ .fixup_map_ringbuf = { 11 },
++ .result = ACCEPT,
++ .result_unpriv = REJECT,
++ .errstr_unpriv = "R8 !read_ok"
++},
+diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
+index 7e50cb80873a5..7e36078f8f482 100644
+--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
++++ b/tools/testing/selftests/bpf/verifier/search_pruning.c
+@@ -154,3 +154,39 @@
+ .result_unpriv = ACCEPT,
+ .insn_processed = 15,
+ },
++/* The test performs a conditional 64-bit write to a stack location
++ * fp[-8], this is followed by an unconditional 8-bit write to fp[-8],
++ * then data is read from fp[-8]. This sequence is unsafe.
++ *
++ * The test would be mistakenly marked as safe w/o dst register parent
++ * preservation in verifier.c:copy_register_state() function.
++ *
++ * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the
++ * checkpoint state after conditional 64-bit assignment.
++ */
++{
++ "write tracking and register parent chain bug",
++ .insns = {
++ /* r6 = ktime_get_ns() */
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
++ /* r0 = ktime_get_ns() */
++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
++ /* if r0 > r6 goto +1 */
++ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_6, 1),
++ /* *(u64 *)(r10 - 8) = 0xdeadbeef */
++ BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0xdeadbeef),
++ /* r1 = 42 */
++ BPF_MOV64_IMM(BPF_REG_1, 42),
++ /* *(u8 *)(r10 - 8) = r1 */
++ BPF_STX_MEM(BPF_B, BPF_REG_FP, BPF_REG_1, -8),
++ /* r2 = *(u64 *)(r10 - 8) */
++ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_FP, -8),
++ /* exit(0) */
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .flags = BPF_F_TEST_STATE_FREQ,
++ .errstr = "invalid read from stack off -8+1 size 8",
++ .result = REJECT,
++},
+diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
+index ce13ece08d51c..8c224eac93df7 100644
+--- a/tools/testing/selftests/bpf/verifier/sock.c
++++ b/tools/testing/selftests/bpf/verifier/sock.c
+@@ -121,7 +121,25 @@
+ .result = ACCEPT,
+ },
+ {
+- "sk_fullsock(skb->sk): sk->dst_port [narrow load]",
++ "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)",
++ .insns = {
++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
++ .result = ACCEPT,
++},
++{
++ "sk_fullsock(skb->sk): sk->dst_port [half load]",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+@@ -139,7 +157,64 @@
+ .result = ACCEPT,
+ },
+ {
+- "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
++ "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)",
++ .insns = {
++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
++ .result = REJECT,
++ .errstr = "invalid sock access",
++},
++{
++ "sk_fullsock(skb->sk): sk->dst_port [byte load]",
++ .insns = {
++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
++ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
++ .result = ACCEPT,
++},
++{
++ "sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)",
++ .insns = {
++ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
++ .result = REJECT,
++ .errstr = "invalid sock access",
++},
++{
++ "sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+@@ -149,7 +224,7 @@
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
++ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+index 2debba4e8a3a8..4d347bc53aa28 100644
+--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
++++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+@@ -1077,6 +1077,29 @@
+ .errstr = "R0 invalid mem access 'inv'",
+ .errstr_unpriv = "R0 pointer -= pointer prohibited",
+ },
++{
++ "map access: trying to leak tained dst reg",
++ .insns = {
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
++ BPF_LD_MAP_FD(BPF_REG_1, 0),
++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
++ BPF_EXIT_INSN(),
++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
++ BPF_MOV32_IMM(BPF_REG_1, 0xFFFFFFFF),
++ BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
++ BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
++ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .fixup_map_array_48b = { 4 },
++ .result = REJECT,
++ .errstr = "math between map_value pointer and 4294967295 is not allowed",
++},
+ {
+ "32bit pkt_ptr -= scalar",
+ .insns = {
+diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
+index bfb97383e6b5a..b4ec228eb95d0 100644
+--- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
++++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
+@@ -35,7 +35,7 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+- "XDP pkt read, pkt_data' > pkt_end, good access",
++ "XDP pkt read, pkt_data' > pkt_end, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+@@ -87,6 +87,41 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_end > pkt_data', good access",
+ .insns = {
+@@ -106,16 +141,16 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_end > pkt_data', bad access 1",
++ "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -142,6 +177,42 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_end > pkt_data', corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_end > pkt_data', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_data' < pkt_end, good access",
+ .insns = {
+@@ -161,16 +232,16 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_data' < pkt_end, bad access 1",
++ "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -198,7 +269,43 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_end < pkt_data', good access",
++ "XDP pkt read, pkt_data' < pkt_end, corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_end < pkt_data', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+@@ -250,6 +357,41 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_end < pkt_data', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_data' >= pkt_end, good access",
+ .insns = {
+@@ -268,15 +410,15 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_data' >= pkt_end, bad access 1",
++ "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -304,7 +446,41 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_end >= pkt_data', good access",
++ "XDP pkt read, pkt_data' >= pkt_end, corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_end >= pkt_data', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+@@ -359,7 +535,44 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_data' <= pkt_end, good access",
++ "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data' <= pkt_end, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+@@ -413,6 +626,43 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_end <= pkt_data', good access",
+ .insns = {
+@@ -431,15 +681,15 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_end <= pkt_data', bad access 1",
++ "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -467,7 +717,41 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_meta' > pkt_data, good access",
++ "XDP pkt read, pkt_end <= pkt_data', corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
++ offsetof(struct xdp_md, data_end)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_meta' > pkt_data, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+@@ -519,6 +803,41 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_data > pkt_meta', good access",
+ .insns = {
+@@ -538,16 +857,16 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_data > pkt_meta', bad access 1",
++ "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -574,6 +893,42 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_data > pkt_meta', corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_meta' < pkt_data, good access",
+ .insns = {
+@@ -593,16 +948,16 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_meta' < pkt_data, bad access 1",
++ "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -630,7 +985,43 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_data < pkt_meta', good access",
++ "XDP pkt read, pkt_meta' < pkt_data, corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data < pkt_meta', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+@@ -682,6 +1073,41 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_meta' >= pkt_data, good access",
+ .insns = {
+@@ -700,15 +1126,15 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_meta' >= pkt_data, bad access 1",
++ "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -736,7 +1162,41 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_data >= pkt_meta', good access",
++ "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data >= pkt_meta', corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+@@ -791,7 +1251,44 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_meta' <= pkt_data, good access",
++ "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+@@ -845,6 +1342,43 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
++ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .errstr = "R1 offset is outside of the packet",
++ .result = REJECT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+ {
+ "XDP pkt read, pkt_data <= pkt_meta', good access",
+ .insns = {
+@@ -863,15 +1397,15 @@
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+- "XDP pkt read, pkt_data <= pkt_meta', bad access 1",
++ "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+@@ -898,3 +1432,37 @@
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
++{
++ "XDP pkt read, pkt_data <= pkt_meta', corner case, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
++{
++ "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access",
++ .insns = {
++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
++ offsetof(struct xdp_md, data_meta)),
++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
++ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
++ BPF_MOV64_IMM(BPF_REG_0, 0),
++ BPF_EXIT_INSN(),
++ },
++ .result = ACCEPT,
++ .prog_type = BPF_PROG_TYPE_XDP,
++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
++},
+diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
+index 3696a8f32c235..f5ffba341c174 100644
+--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
++++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
+@@ -129,7 +129,7 @@ int main(int argc, char **argv)
+ goto err_out;
+ }
+
+- printf("Get interfaces");
++ printf("Get interfaces:");
+ for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+ ifaces[i] = if_nametoindex(argv[optind + i]);
+ if (!ifaces[i])
+@@ -139,7 +139,7 @@ int main(int argc, char **argv)
+ goto err_out;
+ }
+ if (ifaces[i] > MAX_INDEX_NUM) {
+- printf("Interface index to large\n");
++ printf(" interface index too large\n");
+ goto err_out;
+ }
+ printf(" %d", ifaces[i]);
+diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
+index 842d9155d36c5..79a3453dab255 100644
+--- a/tools/testing/selftests/bpf/xdping.c
++++ b/tools/testing/selftests/bpf/xdping.c
+@@ -178,9 +178,8 @@ int main(int argc, char **argv)
+ return 1;
+ }
+
+- main_prog = bpf_object__find_program_by_title(obj,
+- server ? "xdpserver" :
+- "xdpclient");
++ main_prog = bpf_object__find_program_by_name(obj,
++ server ? "xdping_server" : "xdping_client");
+ if (main_prog)
+ prog_fd = bpf_program__fd(main_prog);
+ if (!main_prog || prog_fd < 0) {
+diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
+index 623cec04ad422..0cf7e90c0052e 100644
+--- a/tools/testing/selftests/cgroup/cgroup_util.c
++++ b/tools/testing/selftests/cgroup/cgroup_util.c
+@@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len)
+
+ int cg_create(const char *cgroup)
+ {
+- return mkdir(cgroup, 0644);
++ return mkdir(cgroup, 0755);
+ }
+
+ int cg_wait_for_proc_count(const char *cgroup, int count)
+diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
+index 3df648c378765..6001235030631 100644
+--- a/tools/testing/selftests/cgroup/test_core.c
++++ b/tools/testing/selftests/cgroup/test_core.c
+@@ -1,11 +1,14 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+
++#define _GNU_SOURCE
+ #include <linux/limits.h>
++#include <linux/sched.h>
+ #include <sys/types.h>
+ #include <sys/mman.h>
+ #include <sys/wait.h>
+ #include <unistd.h>
+ #include <fcntl.h>
++#include <sched.h>
+ #include <stdio.h>
+ #include <errno.h>
+ #include <signal.h>
+@@ -674,6 +677,166 @@ cleanup:
+ return ret;
+ }
+
++/*
++ * cgroup migration permission check should be performed based on the
++ * credentials at the time of open instead of write.
++ */
++static int test_cgcore_lesser_euid_open(const char *root)
++{
++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
++ int ret = KSFT_FAIL;
++ char *cg_test_a = NULL, *cg_test_b = NULL;
++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
++ int cg_test_b_procs_fd = -1;
++ uid_t saved_uid;
++
++ cg_test_a = cg_name(root, "cg_test_a");
++ cg_test_b = cg_name(root, "cg_test_b");
++
++ if (!cg_test_a || !cg_test_b)
++ goto cleanup;
++
++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
++
++ if (!cg_test_a_procs || !cg_test_b_procs)
++ goto cleanup;
++
++ if (cg_create(cg_test_a) || cg_create(cg_test_b))
++ goto cleanup;
++
++ if (cg_enter_current(cg_test_a))
++ goto cleanup;
++
++ if (chown(cg_test_a_procs, test_euid, -1) ||
++ chown(cg_test_b_procs, test_euid, -1))
++ goto cleanup;
++
++ saved_uid = geteuid();
++ if (seteuid(test_euid))
++ goto cleanup;
++
++ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
++
++ if (seteuid(saved_uid))
++ goto cleanup;
++
++ if (cg_test_b_procs_fd < 0)
++ goto cleanup;
++
++ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
++ goto cleanup;
++
++ ret = KSFT_PASS;
++
++cleanup:
++ cg_enter_current(root);
++ if (cg_test_b_procs_fd >= 0)
++ close(cg_test_b_procs_fd);
++ if (cg_test_b)
++ cg_destroy(cg_test_b);
++ if (cg_test_a)
++ cg_destroy(cg_test_a);
++ free(cg_test_b_procs);
++ free(cg_test_a_procs);
++ free(cg_test_b);
++ free(cg_test_a);
++ return ret;
++}
++
++struct lesser_ns_open_thread_arg {
++ const char *path;
++ int fd;
++ int err;
++};
++
++static int lesser_ns_open_thread_fn(void *arg)
++{
++ struct lesser_ns_open_thread_arg *targ = arg;
++
++ targ->fd = open(targ->path, O_RDWR);
++ targ->err = errno;
++ return 0;
++}
++
++/*
++ * cgroup migration permission check should be performed based on the cgroup
++ * namespace at the time of open instead of write.
++ */
++static int test_cgcore_lesser_ns_open(const char *root)
++{
++ static char stack[65536];
++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
++ int ret = KSFT_FAIL;
++ char *cg_test_a = NULL, *cg_test_b = NULL;
++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
++ int cg_test_b_procs_fd = -1;
++ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
++ pid_t pid;
++ int status;
++
++ cg_test_a = cg_name(root, "cg_test_a");
++ cg_test_b = cg_name(root, "cg_test_b");
++
++ if (!cg_test_a || !cg_test_b)
++ goto cleanup;
++
++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
++
++ if (!cg_test_a_procs || !cg_test_b_procs)
++ goto cleanup;
++
++ if (cg_create(cg_test_a) || cg_create(cg_test_b))
++ goto cleanup;
++
++ if (cg_enter_current(cg_test_b))
++ goto cleanup;
++
++ if (chown(cg_test_a_procs, test_euid, -1) ||
++ chown(cg_test_b_procs, test_euid, -1))
++ goto cleanup;
++
++ targ.path = cg_test_b_procs;
++ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
++ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
++ &targ);
++ if (pid < 0)
++ goto cleanup;
++
++ if (waitpid(pid, &status, 0) < 0)
++ goto cleanup;
++
++ if (!WIFEXITED(status))
++ goto cleanup;
++
++ cg_test_b_procs_fd = targ.fd;
++ if (cg_test_b_procs_fd < 0)
++ goto cleanup;
++
++ if (cg_enter_current(cg_test_a))
++ goto cleanup;
++
++ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
++ goto cleanup;
++
++ ret = KSFT_PASS;
++
++cleanup:
++ cg_enter_current(root);
++ if (cg_test_b_procs_fd >= 0)
++ close(cg_test_b_procs_fd);
++ if (cg_test_b)
++ cg_destroy(cg_test_b);
++ if (cg_test_a)
++ cg_destroy(cg_test_a);
++ free(cg_test_b_procs);
++ free(cg_test_a_procs);
++ free(cg_test_b);
++ free(cg_test_a);
++ return ret;
++}
++
+ #define T(x) { x, #x }
+ struct corecg_test {
+ int (*fn)(const char *root);
+@@ -689,6 +852,8 @@ struct corecg_test {
+ T(test_cgcore_proc_migration),
+ T(test_cgcore_thread_migration),
+ T(test_cgcore_destroy),
++ T(test_cgcore_lesser_euid_open),
++ T(test_cgcore_lesser_ns_open),
+ };
+ #undef T
+
+diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh
+index 15d9d58963941..3c9c4554d5f6a 100755
+--- a/tools/testing/selftests/cgroup/test_stress.sh
++++ b/tools/testing/selftests/cgroup/test_stress.sh
+@@ -1,4 +1,4 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
+-./with_stress.sh -s subsys -s fork ./test_core
++./with_stress.sh -s subsys -s fork ${OUTPUT:-.}/test_core
+diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
+index 42be3b9258301..cd4582129c7d6 100644
+--- a/tools/testing/selftests/clone3/clone3.c
++++ b/tools/testing/selftests/clone3/clone3.c
+@@ -52,6 +52,12 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
+ size = sizeof(struct __clone_args);
+
+ switch (test_mode) {
++ case CLONE3_ARGS_NO_TEST:
++ /*
++ * Uses default 'flags' and 'SIGCHLD'
++ * assignment.
++ */
++ break;
+ case CLONE3_ARGS_ALL_0:
+ args.flags = 0;
+ args.exit_signal = 0;
+@@ -120,8 +126,6 @@ static void test_clone3(uint64_t flags, size_t size, int expected,
+
+ int main(int argc, char *argv[])
+ {
+- pid_t pid;
+-
+ uid_t uid = getuid();
+
+ ksft_print_header();
+diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
+index 73eb29c916d1b..aa7d13d91963f 100644
+--- a/tools/testing/selftests/core/close_range_test.c
++++ b/tools/testing/selftests/core/close_range_test.c
+@@ -54,7 +54,7 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
+ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+ #endif
+
+-TEST(close_range)
++TEST(core_close_range)
+ {
+ int i, ret;
+ int open_fds[101];
+diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore
+new file mode 100644
+index 0000000000000..c6c2965a66075
+--- /dev/null
++++ b/tools/testing/selftests/damon/.gitignore
+@@ -0,0 +1,2 @@
++# SPDX-License-Identifier: GPL-2.0-only
++huge_count_read_write
+diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
+index 8a3f2cd9fec0c..f0aa954b5d135 100644
+--- a/tools/testing/selftests/damon/Makefile
++++ b/tools/testing/selftests/damon/Makefile
+@@ -1,6 +1,8 @@
+ # SPDX-License-Identifier: GPL-2.0
+ # Makefile for damon selftests
+
++TEST_GEN_FILES += huge_count_read_write
++
+ TEST_FILES = _chk_dependency.sh
+ TEST_PROGS = debugfs_attrs.sh
+
+diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
+index bfabb19dc0d3d..ecda972e87775 100644
+--- a/tools/testing/selftests/damon/debugfs_attrs.sh
++++ b/tools/testing/selftests/damon/debugfs_attrs.sh
+@@ -72,4 +72,22 @@ test_write_succ "$file" "" "$orig_content" "empty input"
+ test_content "$file" "$orig_content" "" "empty input written"
+ echo "$orig_content" > "$file"
+
++# Test huge count read write
++# ==========================
++
++dmesg -C
++
++for file in "$DBGFS/"*
++do
++ ./huge_count_read_write "$file"
++done
++
++if dmesg | grep -q WARNING
++then
++ dmesg
++ exit 1
++else
++ exit 0
++fi
++
+ echo "PASS"
+diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c
+new file mode 100644
+index 0000000000000..ad7a6b4cf3387
+--- /dev/null
++++ b/tools/testing/selftests/damon/huge_count_read_write.c
+@@ -0,0 +1,39 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Author: SeongJae Park <sj@kernel.org>
++ */
++
++#include <fcntl.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <stdio.h>
++
++void write_read_with_huge_count(char *file)
++{
++ int filedesc = open(file, O_RDWR);
++ char buf[25];
++ int ret;
++
++ printf("%s %s\n", __func__, file);
++ if (filedesc < 0) {
++ fprintf(stderr, "failed opening %s\n", file);
++ exit(1);
++ }
++
++ write(filedesc, "", 0xfffffffful);
++ perror("after write: ");
++ ret = read(filedesc, buf, 0xfffffffful);
++ perror("after read: ");
++ close(filedesc);
++}
++
++int main(int argc, char *argv[])
++{
++ if (argc != 2) {
++ fprintf(stderr, "Usage: %s <file>\n", argv[0]);
++ exit(1);
++ }
++ write_read_with_huge_count(argv[1]);
++
++ return 0;
++}
+diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+index 685dfb3478b3e..b9b8274643de1 100755
+--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
++++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+@@ -50,8 +50,8 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
+ else
+ log_test "'$current_test' [$profile] overflow $target"
+ fi
++ RET_FIN=$(( RET_FIN || RET ))
+ done
+- RET_FIN=$(( RET_FIN || RET ))
+ done
+ done
+ current_test=""
+diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
+index 3e3e06ea5703c..86e787895f78b 100644
+--- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
++++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
+@@ -60,7 +60,8 @@ __tc_police_test()
+
+ tc_police_rules_create $count $should_fail
+
+- offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l)
++ offload_count=$(tc -j filter show dev $swp1 ingress |
++ jq "[.[] | select(.options.in_hw == true)] | length")
+ ((offload_count == count))
+ check_err_fail $should_fail $? "tc police offload count"
+ }
+diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+index fedcb7b35af9f..af5ea50ed5c0e 100755
+--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
++++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+@@ -172,6 +172,17 @@ flooding_filters_add()
+ local lsb
+ local i
+
++ # Prevent unwanted packets from entering the bridge and interfering
++ # with the test.
++ tc qdisc add dev br0 clsact
++ tc filter add dev br0 egress protocol all pref 1 handle 1 \
++ matchall skip_hw action drop
++ tc qdisc add dev $h1 clsact
++ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
++ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
++ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
++ matchall skip_hw action drop
++
+ tc qdisc add dev $rp2 clsact
+
+ for i in $(eval echo {1..$num_remotes}); do
+@@ -194,6 +205,12 @@ flooding_filters_del()
+ done
+
+ tc qdisc del dev $rp2 clsact
++
++ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
++ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
++ tc qdisc del dev $h1 clsact
++ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
++ tc qdisc del dev br0 clsact
+ }
+
+ flooding_check_packets()
+diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+index 9de1d123f4f5d..7f7d20f222070 100755
+--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
++++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+@@ -17,6 +17,18 @@ SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
+ DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
+ DL_HANDLE=netdevsim/$DEV_NAME
+
++wait_for_devlink()
++{
++ "$@" | grep -q $DL_HANDLE
++}
++
++devlink_wait()
++{
++ local timeout=$1
++
++ busywait "$timeout" wait_for_devlink devlink dev
++}
++
+ fw_flash_test()
+ {
+ RET=0
+@@ -256,6 +268,9 @@ netns_reload_test()
+ ip netns del testns2
+ ip netns del testns1
+
++ # Wait until netns async cleanup is done.
++ devlink_wait 2000
++
+ log_test "netns reload test"
+ }
+
+@@ -348,6 +363,9 @@ resource_test()
+ ip netns del testns2
+ ip netns del testns1
+
++ # Wait until netns async cleanup is done.
++ devlink_wait 2000
++
+ log_test "resource test"
+ }
+
+@@ -496,8 +514,8 @@ dummy_reporter_test()
+
+ check_reporter_info dummy healthy 3 3 10 true
+
+- echo 8192> $DEBUGFS_DIR/health/binary_len
+- check_fail $? "Failed set dummy reporter binary len to 8192"
++ echo 8192 > $DEBUGFS_DIR/health/binary_len
++ check_err $? "Failed set dummy reporter binary len to 8192"
+
+ local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
+ check_err $? "Failed show dump of dummy reporter"
+diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+index f7d84549cc3e3..79f751259098d 100755
+--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
++++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+@@ -185,7 +185,7 @@ setup_prepare()
+
+ tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
+ protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
+- action police rate 50mbit burst 64k \
++ action police rate 50mbit burst 64k conform-exceed drop/pipe \
+ action goto chain $(IS2 1 0)
+ }
+
+diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
+index a90f394f9aa90..d374878cc0ba9 100755
+--- a/tools/testing/selftests/efivarfs/efivarfs.sh
++++ b/tools/testing/selftests/efivarfs/efivarfs.sh
+@@ -87,6 +87,11 @@ test_create_read()
+ {
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+ ./create-read $file
++ if [ $? -ne 0 ]; then
++ echo "create and read $file failed"
++ file_cleanup $file
++ exit 1
++ fi
+ file_cleanup $file
+ }
+
+diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
+index dd61118df66ed..2d7fca446c7f7 100644
+--- a/tools/testing/selftests/exec/Makefile
++++ b/tools/testing/selftests/exec/Makefile
+@@ -3,9 +3,9 @@ CFLAGS = -Wall
+ CFLAGS += -Wno-nonnull
+ CFLAGS += -D_GNU_SOURCE
+
+-TEST_PROGS := binfmt_script non-regular
+-TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
+-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
++TEST_PROGS := binfmt_script
++TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular
++TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+ # Makefile is a run-time dependency, since it's accessed by the execveat test
+ TEST_FILES := Makefile
+
+diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+index 3145b0f1835c3..d9b8127950771 100644
+--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
++++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+@@ -38,11 +38,18 @@ cnt_trace() {
+
+ test_event_enabled() {
+ val=$1
++ check_times=10 # wait for 10 * SLEEP_TIME at most
+
+- e=`cat $EVENT_ENABLE`
+- if [ "$e" != $val ]; then
+- fail "Expected $val but found $e"
+- fi
++ while [ $check_times -ne 0 ]; do
++ e=`cat $EVENT_ENABLE`
++ if [ "$e" = $val ]; then
++ return 0
++ fi
++ sleep $SLEEP_TIME
++ check_times=$((check_times - 1))
++ done
++
++ fail "Expected $val but found $e"
+ }
+
+ run_enable_disable() {
+diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+index e96e279e0533a..25432b8cd5bd2 100644
+--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
++++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+@@ -19,7 +19,7 @@ fail() { # mesg
+
+ FILTER=set_ftrace_filter
+ FUNC1="schedule"
+-FUNC2="do_softirq"
++FUNC2="scheduler_tick"
+
+ ALL_FUNCS="#### all functions enabled ####"
+
+diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+index fa928b431555c..7c02509c71d0a 100644
+--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
++++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+@@ -21,7 +21,6 @@ check_error 'p:^/bar vfs_read' # NO_GROUP_NAME
+ check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG
+
+ check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME
+-check_error 'p:foo/^ vfs_read' # NO_EVENT_NAME
+ check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
+ check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
+
+diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+index 98166fa3eb91c..34fb89b0c61fa 100644
+--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
++++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ # SPDX-License-Identifier: GPL-2.0
+-# description: Kprobe dynamic event - adding and removing
++# description: Kprobe profile
+ # requires: kprobe_events
+
+ ! grep -q 'myevent' kprobe_profile
+diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
+index 12631f0076a10..11e157d7533b8 100644
+--- a/tools/testing/selftests/futex/Makefile
++++ b/tools/testing/selftests/futex/Makefile
+@@ -11,7 +11,7 @@ all:
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
++ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ if [ -e $$DIR/$(TEST_PROGS) ]; then \
+ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+ fi \
+@@ -32,6 +32,6 @@ override define CLEAN
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
++ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ done
+ endef
+diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
+index bd1fec59e010d..ece2e38fbb0be 100644
+--- a/tools/testing/selftests/futex/functional/Makefile
++++ b/tools/testing/selftests/futex/functional/Makefile
+@@ -4,11 +4,11 @@ INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \
+ CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+ LDLIBS := -lpthread -lrt
+
+-HEADERS := \
++LOCAL_HDRS := \
+ ../include/futextest.h \
+ ../include/atomic.h \
+ ../include/logging.h
+-TEST_GEN_FILES := \
++TEST_GEN_PROGS := \
+ futex_wait_timeout \
+ futex_wait_wouldblock \
+ futex_requeue_pi \
+@@ -24,5 +24,3 @@ TEST_PROGS := run.sh
+ top_srcdir = ../../../../..
+ KSFT_KHDR_INSTALL := 1
+ include ../../lib.mk
+-
+-$(TEST_GEN_FILES): $(HEADERS)
+diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+index 1f8f6daaf1e70..86ac205ea8e18 100644
+--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
++++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+@@ -23,6 +23,7 @@
+
+ static long timeout_ns = 100000; /* 100us default timeout */
+ static futex_t futex_pi;
++static pthread_barrier_t barrier;
+
+ void usage(char *prog)
+ {
+@@ -47,6 +48,8 @@ void *get_pi_lock(void *arg)
+ if (ret != 0)
+ error("futex_lock_pi failed\n", ret);
+
++ pthread_barrier_wait(&barrier);
++
+ /* Blocks forever */
+ ret = futex_wait(&lock, 0, NULL, 0);
+ error("futex_wait failed\n", ret);
+@@ -123,6 +126,7 @@ int main(int argc, char *argv[])
+ basename(argv[0]));
+ ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
+
++ pthread_barrier_init(&barrier, NULL, 2);
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
+
+ /* initialize relative timeout */
+@@ -156,6 +160,9 @@ int main(int argc, char *argv[])
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
+ test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
++ /* Wait until the other thread calls futex_lock_pi() */
++ pthread_barrier_wait(&barrier);
++ pthread_barrier_destroy(&barrier);
+ /*
+ * FUTEX_LOCK_PI with CLOCK_REALTIME
+ * Due to historical reasons, FUTEX_LOCK_PI supports only realtime
+diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
+index 39f2bbe8dd3df..42ea7d2aa8440 100644
+--- a/tools/testing/selftests/gpio/Makefile
++++ b/tools/testing/selftests/gpio/Makefile
+@@ -3,5 +3,6 @@
+ TEST_PROGS := gpio-mockup.sh
+ TEST_FILES := gpio-mockup-sysfs.sh
+ TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev
++CFLAGS += -I../../../../usr/include
+
+ include ../lib.mk
+diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
+index 39f0fa2a8fd63..05d66ef50c977 100644
+--- a/tools/testing/selftests/intel_pstate/Makefile
++++ b/tools/testing/selftests/intel_pstate/Makefile
+@@ -2,10 +2,10 @@
+ CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
+ LDLIBS += -lm
+
+-uname_M := $(shell uname -m 2>/dev/null || echo not)
+-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
++ARCH ?= $(shell uname -m 2>/dev/null || echo not)
++ARCH_PROCESSED := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+-ifeq (x86,$(ARCH))
++ifeq (x86,$(ARCH_PROCESSED))
+ TEST_GEN_FILES := msr aperf
+ endif
+
+diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
+index 8d50483fe204a..898d7b2fac6cc 100644
+--- a/tools/testing/selftests/kselftest.h
++++ b/tools/testing/selftests/kselftest.h
+@@ -48,6 +48,25 @@
+ #include <stdarg.h>
+ #include <stdio.h>
+
++#ifndef ARRAY_SIZE
++#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
++#endif
++
++/*
++ * gcc cpuid.h provides __cpuid_count() since v4.4.
++ * Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0.
++ *
++ * Provide local define for tests needing __cpuid_count() because
++ * selftests need to work in older environments that do not yet
++ * have __cpuid_count().
++ */
++#ifndef __cpuid_count
++#define __cpuid_count(level, count, a, b, c, d) \
++ __asm__ __volatile__ ("cpuid\n\t" \
++ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
++ : "0" (level), "2" (count))
++#endif
++
+ /* define kselftest exit codes */
+ #define KSFT_PASS 0
+ #define KSFT_FAIL 1
+diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
+index cc9c846585f05..83616f0779a7e 100644
+--- a/tools/testing/selftests/kselftest/runner.sh
++++ b/tools/testing/selftests/kselftest/runner.sh
+@@ -33,9 +33,10 @@ tap_timeout()
+ {
+ # Make sure tests will time out if utility is available.
+ if [ -x /usr/bin/timeout ] ; then
+- /usr/bin/timeout --foreground "$kselftest_timeout" "$1"
++ /usr/bin/timeout --foreground "$kselftest_timeout" \
++ /usr/bin/timeout "$kselftest_timeout" $1
+ else
+- "$1"
++ $1
+ fi
+ }
+
+@@ -65,17 +66,25 @@ run_one()
+
+ TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
+ echo "# $TEST_HDR_MSG"
+- if [ ! -x "$TEST" ]; then
+- echo -n "# Warning: file $TEST is "
+- if [ ! -e "$TEST" ]; then
+- echo "missing!"
+- else
+- echo "not executable, correct this."
+- fi
++ if [ ! -e "$TEST" ]; then
++ echo "# Warning: file $TEST is missing!"
+ echo "not ok $test_num $TEST_HDR_MSG"
+ else
++ cmd="./$BASENAME_TEST"
++ if [ ! -x "$TEST" ]; then
++ echo "# Warning: file $TEST is not executable"
++
++ if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ]
++ then
++ interpreter=$(head -n 1 "$TEST" | cut -c 3-)
++ cmd="$interpreter ./$BASENAME_TEST"
++ else
++ echo "not ok $test_num $TEST_HDR_MSG"
++ return
++ fi
++ fi
+ cd `dirname $TEST` > /dev/null
+- ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) |
++ ((((( tap_timeout "$cmd" 2>&1; echo $? >&3) |
+ tap_prefix >&4) 3>&1) |
+ (read xs; exit $xs)) 4>>"$logfile" &&
+ echo "ok $test_num $TEST_HDR_MSG") ||
+diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
+index ae0f0f33b2a6e..a6ea2bd63a831 100644
+--- a/tools/testing/selftests/kselftest_harness.h
++++ b/tools/testing/selftests/kselftest_harness.h
+@@ -671,7 +671,9 @@
+ #define EXPECT_STRNE(expected, seen) \
+ __EXPECT_STR(expected, seen, !=, 0)
+
++#ifndef ARRAY_SIZE
+ #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
++#endif
+
+ /* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
+ * not thread-safe, but it should be fine in most sane test scenarios.
+@@ -875,7 +877,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
+ }
+
+ t->timed_out = true;
+- kill(t->pid, SIGKILL);
++ // signal process group
++ kill(-(t->pid), SIGKILL);
+ }
+
+ void __wait_for_test(struct __test_metadata *t)
+@@ -913,7 +916,11 @@ void __wait_for_test(struct __test_metadata *t)
+ fprintf(TH_LOG_STREAM,
+ "# %s: Test terminated by timeout\n", t->name);
+ } else if (WIFEXITED(status)) {
+- if (t->termsig != -1) {
++ if (WEXITSTATUS(status) == 255) {
++ /* SKIP */
++ t->passed = 1;
++ t->skip = 1;
++ } else if (t->termsig != -1) {
+ t->passed = 0;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Test exited normally instead of by signal (code: %d)\n",
+@@ -925,11 +932,6 @@ void __wait_for_test(struct __test_metadata *t)
+ case 0:
+ t->passed = 1;
+ break;
+- /* SKIP */
+- case 255:
+- t->passed = 1;
+- t->skip = 1;
+- break;
+ /* Other failure, assume step report. */
+ default:
+ t->passed = 0;
+@@ -969,7 +971,7 @@ void __run_test(struct __fixture_metadata *f,
+ t->passed = 1;
+ t->skip = 0;
+ t->trigger = 0;
+- t->step = 0;
++ t->step = 1;
+ t->no_print = 0;
+ memset(t->results->reason, 0, sizeof(t->results->reason));
+
+@@ -985,6 +987,7 @@ void __run_test(struct __fixture_metadata *f,
+ ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
+ t->passed = 0;
+ } else if (t->pid == 0) {
++ setpgrp();
+ t->fn(t, variant);
+ if (t->skip)
+ _exit(255);
+diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
+index 010b59b139176..1876d148ea0ae 100644
+--- a/tools/testing/selftests/kvm/include/kvm_util.h
++++ b/tools/testing/selftests/kvm/include/kvm_util.h
+@@ -309,6 +309,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+ unsigned int vm_get_page_size(struct kvm_vm *vm);
+ unsigned int vm_get_page_shift(struct kvm_vm *vm);
++unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
+ uint64_t vm_get_max_gfn(struct kvm_vm *vm);
+ int vm_get_fd(struct kvm_vm *vm);
+
+diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
+index 05e65ca1c30cd..23861c8faa61c 100644
+--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
++++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
+@@ -58,6 +58,21 @@
+ /* CPUID.0x8000_0001.EDX */
+ #define CPUID_GBPAGES (1ul << 26)
+
++/* Page table bitfield declarations */
++#define PTE_PRESENT_MASK BIT_ULL(0)
++#define PTE_WRITABLE_MASK BIT_ULL(1)
++#define PTE_USER_MASK BIT_ULL(2)
++#define PTE_ACCESSED_MASK BIT_ULL(5)
++#define PTE_DIRTY_MASK BIT_ULL(6)
++#define PTE_LARGE_MASK BIT_ULL(7)
++#define PTE_GLOBAL_MASK BIT_ULL(8)
++#define PTE_NX_MASK BIT_ULL(63)
++
++#define PAGE_SHIFT 12
++
++#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
++#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
++
+ /* General Registers in 64-Bit Mode */
+ struct gpr64_regs {
+ u64 rax;
+diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+index 0299cd81b8ba2..aa3795cd7bd3d 100644
+--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
++++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+@@ -12,6 +12,7 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
++#include <sys/resource.h>
+
+ #include "test_util.h"
+
+@@ -40,10 +41,39 @@ int main(int argc, char *argv[])
+ {
+ int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
+ int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
++ /*
++ * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
++ * an arbitrary number for everything else.
++ */
++ int nr_fds_wanted = kvm_max_vcpus + 100;
++ struct rlimit rl;
+
+ pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
+ pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
+
++ /*
++ * Check that we're allowed to open nr_fds_wanted file descriptors and
++ * try raising the limits if needed.
++ */
++ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
++
++ if (rl.rlim_cur < nr_fds_wanted) {
++ rl.rlim_cur = nr_fds_wanted;
++ if (rl.rlim_max < nr_fds_wanted) {
++ int old_rlim_max = rl.rlim_max;
++ rl.rlim_max = nr_fds_wanted;
++
++ int r = setrlimit(RLIMIT_NOFILE, &rl);
++ if (r < 0) {
++ printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
++ old_rlim_max, nr_fds_wanted);
++ exit(KSFT_SKIP);
++ }
++ } else {
++ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
++ }
++ }
++
+ /*
+ * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
+ * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
+diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
+index 36407cb0ec85d..f1ddfe4c4a031 100644
+--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
++++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
+@@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
+ else
+ guest_test_phys_mem = p->phys_offset;
+ #ifdef __s390x__
+- alignment = max(0x100000, alignment);
++ alignment = max(0x100000UL, alignment);
+ #endif
+ guest_test_phys_mem &= ~(alignment - 1);
+
+diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+index e0b0164e9af85..be1d9728c4cea 100644
+--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
++++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm)
+
+ void ucall(uint64_t cmd, int nargs, ...)
+ {
+- struct ucall uc = {
+- .cmd = cmd,
+- };
++ struct ucall uc = {};
+ va_list va;
+ int i;
+
++ WRITE_ONCE(uc.cmd, cmd);
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+- uc.args[i] = va_arg(va, uint64_t);
++ WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
+ va_end(va);
+
+- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
++ WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
+ }
+
+ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
+index 10a8ed691c669..e9d0ab9567fbf 100644
+--- a/tools/testing/selftests/kvm/lib/kvm_util.c
++++ b/tools/testing/selftests/kvm/lib/kvm_util.c
+@@ -307,7 +307,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+ (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+
+ /* Limit physical addresses to PA-bits. */
+- vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
++ vm->max_gfn = vm_compute_max_gfn(vm);
+
+ /* Allocate and setup memory for guest. */
+ vm->vpages_mapped = sparsebit_alloc();
+@@ -2282,6 +2282,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
+ return vm->page_shift;
+ }
+
++unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
++{
++ return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
++}
++
+ uint64_t vm_get_max_gfn(struct kvm_vm *vm)
+ {
+ return vm->max_gfn;
+diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
+index 28cb881f440d0..4f1449fa9592e 100644
+--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
++++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
+@@ -19,38 +19,6 @@
+
+ vm_vaddr_t exception_handlers;
+
+-/* Virtual translation table structure declarations */
+-struct pageUpperEntry {
+- uint64_t present:1;
+- uint64_t writable:1;
+- uint64_t user:1;
+- uint64_t write_through:1;
+- uint64_t cache_disable:1;
+- uint64_t accessed:1;
+- uint64_t ignored_06:1;
+- uint64_t page_size:1;
+- uint64_t ignored_11_08:4;
+- uint64_t pfn:40;
+- uint64_t ignored_62_52:11;
+- uint64_t execute_disable:1;
+-};
+-
+-struct pageTableEntry {
+- uint64_t present:1;
+- uint64_t writable:1;
+- uint64_t user:1;
+- uint64_t write_through:1;
+- uint64_t cache_disable:1;
+- uint64_t accessed:1;
+- uint64_t dirty:1;
+- uint64_t reserved_07:1;
+- uint64_t global:1;
+- uint64_t ignored_11_09:3;
+- uint64_t pfn:40;
+- uint64_t ignored_62_52:11;
+- uint64_t execute_disable:1;
+-};
+-
+ void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent)
+ {
+@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
+ return &page_table[index];
+ }
+
+-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+- uint64_t pt_pfn,
+- uint64_t vaddr,
+- uint64_t paddr,
+- int level,
+- enum x86_page_size page_size)
++static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
++ uint64_t pt_pfn,
++ uint64_t vaddr,
++ uint64_t paddr,
++ int level,
++ enum x86_page_size page_size)
+ {
+- struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+-
+- if (!pte->present) {
+- pte->writable = true;
+- pte->present = true;
+- pte->page_size = (level == page_size);
+- if (pte->page_size)
+- pte->pfn = paddr >> vm->page_shift;
++ uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
++
++ if (!(*pte & PTE_PRESENT_MASK)) {
++ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
++ if (level == page_size)
++ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ else
+- pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
++ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+ TEST_ASSERT(level != page_size,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+ page_size, vaddr);
+- TEST_ASSERT(!pte->page_size,
++ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+ level, vaddr);
+ }
+@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ enum x86_page_size page_size)
+ {
+ const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+- struct pageUpperEntry *pml4e, *pdpe, *pde;
+- struct pageTableEntry *pte;
++ uint64_t *pml4e, *pdpe, *pde;
++ uint64_t *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+@@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ */
+ pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
+ vaddr, paddr, 3, page_size);
+- if (pml4e->page_size)
++ if (*pml4e & PTE_LARGE_MASK)
+ return;
+
+- pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
+- if (pdpe->page_size)
++ pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
++ if (*pdpe & PTE_LARGE_MASK)
+ return;
+
+- pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
+- if (pde->page_size)
++ pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
++ if (*pde & PTE_LARGE_MASK)
+ return;
+
+ /* Fill in page table entry. */
+- pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
+- TEST_ASSERT(!pte->present,
++ pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
++ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
+- pte->pfn = paddr >> vm->page_shift;
+- pte->writable = true;
+- pte->present = 1;
++ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ }
+
+ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+@@ -282,12 +246,12 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+ __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+ }
+
+-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
++static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+ uint64_t vaddr)
+ {
+ uint16_t index[4];
+- struct pageUpperEntry *pml4e, *pdpe, *pde;
+- struct pageTableEntry *pte;
++ uint64_t *pml4e, *pdpe, *pde;
++ uint64_t *pte;
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_sregs sregs;
+ int max_phy_addr;
+@@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
+ index[3] = (vaddr >> 39) & 0x1ffu;
+
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+- TEST_ASSERT(pml4e[index[3]].present,
++ TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
+ "Expected pml4e to be present for gva: 0x%08lx", vaddr);
+- TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
+- (rsvd_mask | (1ull << 7))) == 0,
++ TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
+ "Unexpected reserved bits set.");
+
+- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+- TEST_ASSERT(pdpe[index[2]].present,
++ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
++ TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
+ "Expected pdpe to be present for gva: 0x%08lx", vaddr);
+- TEST_ASSERT(pdpe[index[2]].page_size == 0,
++ TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
+ "Expected pdpe to map a pde not a 1-GByte page.");
+- TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
++ TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+- TEST_ASSERT(pde[index[1]].present,
++ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
++ TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
+ "Expected pde to be present for gva: 0x%08lx", vaddr);
+- TEST_ASSERT(pde[index[1]].page_size == 0,
++ TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
+ "Expected pde to map a pte not a 2-MByte page.");
+- TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
++ TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+- TEST_ASSERT(pte[index[0]].present,
++ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
++ TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
+ "Expected pte to be present for gva: 0x%08lx", vaddr);
+
+ return &pte[index[0]];
+@@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
+
+ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+ {
+- struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
++ uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+
+ return *(uint64_t *)pte;
+ }
+@@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+ void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ uint64_t pte)
+ {
+- struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
+- vaddr);
++ uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+
+ *(uint64_t *)new_pte = pte;
+ }
+
+ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+ {
+- struct pageUpperEntry *pml4e, *pml4e_start;
+- struct pageUpperEntry *pdpe, *pdpe_start;
+- struct pageUpperEntry *pde, *pde_start;
+- struct pageTableEntry *pte, *pte_start;
++ uint64_t *pml4e, *pml4e_start;
++ uint64_t *pdpe, *pdpe_start;
++ uint64_t *pde, *pde_start;
++ uint64_t *pte, *pte_start;
+
+ if (!vm->pgd_created)
+ return;
+@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+- pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
++ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+- if (!pml4e->present)
++ if (!(*pml4e & PTE_PRESENT_MASK))
+ continue;
+- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
++ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
+- pml4e->writable, pml4e->execute_disable);
++ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
++ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+
+- pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
++ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+- if (!pdpe->present)
++ if (!(*pdpe & PTE_PRESENT_MASK))
+ continue;
+- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
++ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hva2gpa(vm, pdpe),
+- (uint64_t) pdpe->pfn, pdpe->writable,
+- pdpe->execute_disable);
++ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
++ !!(*pdpe & PTE_NX_MASK));
+
+- pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
++ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+- if (!pde->present)
++ if (!(*pde & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+- "0x%-12lx 0x%-10lx %u %u\n",
++ "0x%-12lx 0x%-10llx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hva2gpa(vm, pde),
+- (uint64_t) pde->pfn, pde->writable,
+- pde->execute_disable);
++ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
++ !!(*pde & PTE_NX_MASK));
+
+- pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
++ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+- if (!pte->present)
++ if (!(*pte & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+- "0x%-12lx 0x%-10lx %u %u "
++ "0x%-12lx 0x%-10llx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hva2gpa(vm, pte),
+- (uint64_t) pte->pfn,
+- pte->writable,
+- pte->execute_disable,
+- pte->dirty,
++ PTE_GET_PFN(*pte),
++ !!(*pte & PTE_WRITABLE_MASK),
++ !!(*pte & PTE_NX_MASK),
++ !!(*pte & PTE_DIRTY_MASK),
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
+ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+ {
+ uint16_t index[4];
+- struct pageUpperEntry *pml4e, *pdpe, *pde;
+- struct pageTableEntry *pte;
++ uint64_t *pml4e, *pdpe, *pde;
++ uint64_t *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+- if (!pml4e[index[3]].present)
++ if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
+ goto unmapped_gva;
+
+- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+- if (!pdpe[index[2]].present)
++ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
++ if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
+ goto unmapped_gva;
+
+- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+- if (!pde[index[1]].present)
++ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
++ if (!(pde[index[1]] & PTE_PRESENT_MASK))
+ goto unmapped_gva;
+
+- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+- if (!pte[index[0]].present)
++ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
++ if (!(pte[index[0]] & PTE_PRESENT_MASK))
+ goto unmapped_gva;
+
+- return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
++ return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);
+
+ unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+@@ -1364,7 +1326,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+
+ asm volatile("vmcall"
+ : "=a"(r)
+- : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
++ : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+ return r;
+ }
+
+@@ -1433,3 +1395,71 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
+
+ return cpuid;
+ }
++
++#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
++#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
++#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
++
++static inline unsigned x86_family(unsigned int eax)
++{
++ unsigned int x86;
++
++ x86 = (eax >> 8) & 0xf;
++
++ if (x86 == 0xf)
++ x86 += (eax >> 20) & 0xff;
++
++ return x86;
++}
++
++unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
++{
++ const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
++ unsigned long ht_gfn, max_gfn, max_pfn;
++ uint32_t eax, ebx, ecx, edx, max_ext_leaf;
++
++ max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
++
++ /* Avoid reserved HyperTransport region on AMD processors. */
++ eax = ecx = 0;
++ cpuid(&eax, &ebx, &ecx, &edx);
++ if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx ||
++ ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx ||
++ edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
++ return max_gfn;
++
++ /* On parts with <40 physical address bits, the area is fully hidden */
++ if (vm->pa_bits < 40)
++ return max_gfn;
++
++ /* Before family 17h, the HyperTransport area is just below 1T. */
++ ht_gfn = (1 << 28) - num_ht_pages;
++ eax = 1;
++ cpuid(&eax, &ebx, &ecx, &edx);
++ if (x86_family(eax) < 0x17)
++ goto done;
++
++ /*
++ * Otherwise it's at the top of the physical address space, possibly
++ * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
++ * the old conservative value if MAXPHYADDR is not enumerated.
++ */
++ eax = 0x80000000;
++ cpuid(&eax, &ebx, &ecx, &edx);
++ max_ext_leaf = eax;
++ if (max_ext_leaf < 0x80000008)
++ goto done;
++
++ eax = 0x80000008;
++ cpuid(&eax, &ebx, &ecx, &edx);
++ max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
++ if (max_ext_leaf >= 0x8000001f) {
++ eax = 0x8000001f;
++ cpuid(&eax, &ebx, &ecx, &edx);
++ max_pfn >>= (ebx >> 6) & 0x3f;
++ }
++
++ ht_gfn = max_pfn - num_ht_pages;
++done:
++ return min(max_gfn, ht_gfn - 1);
++}
+diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
+index 2ac98d70d02bd..161eba7cd1289 100644
+--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
++++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
+@@ -54,6 +54,18 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+ seg->base = base;
+ }
+
++/*
++ * Avoid using memset to clear the vmcb, since libc may not be
++ * available in L1 (and, even if it is, features that libc memset may
++ * want to use, like AVX, may not be enabled).
++ */
++static void clear_vmcb(struct vmcb *vmcb)
++{
++ int n = sizeof(*vmcb) / sizeof(u32);
++
++ asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory");
++}
++
+ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
+ {
+ struct vmcb *vmcb = svm->vmcb;
+@@ -70,7 +82,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
+ wrmsr(MSR_EFER, efer | EFER_SVME);
+ wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
+
+- memset(vmcb, 0, sizeof(*vmcb));
++ clear_vmcb(vmcb);
+ asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
+ vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
+ vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
+diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+index 4cfcafea9f5a6..766c1790df664 100644
+--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
++++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+@@ -72,7 +72,7 @@ struct memslot_antagonist_args {
+ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
+ uint64_t nr_modifications)
+ {
+- const uint64_t pages = 1;
++ uint64_t pages = max_t(int, vm_get_page_size(vm), getpagesize()) / vm_get_page_size(vm);
+ uint64_t gpa;
+ int i;
+
+diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
+index 4158da0da2bba..d7a7e760adc80 100644
+--- a/tools/testing/selftests/kvm/rseq_test.c
++++ b/tools/testing/selftests/kvm/rseq_test.c
+@@ -82,8 +82,9 @@ static int next_cpu(int cpu)
+ return cpu;
+ }
+
+-static void *migration_worker(void *ign)
++static void *migration_worker(void *__rseq_tid)
+ {
++ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
+ cpu_set_t allowed_mask;
+ int r, i, cpu;
+
+@@ -106,7 +107,7 @@ static void *migration_worker(void *ign)
+ * stable, i.e. while changing affinity is in-progress.
+ */
+ smp_wmb();
+- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
++ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+ smp_wmb();
+@@ -231,7 +232,8 @@ int main(int argc, char *argv[])
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+- pthread_create(&migration_thread, NULL, migration_worker, 0);
++ pthread_create(&migration_thread, NULL, migration_worker,
++ (void *)(unsigned long)syscall(SYS_gettid));
+
+ for (i = 0; !done; i++) {
+ vcpu_run(vm, VCPU_ID);
+diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+index e0b2bb1339b16..3330fb183c680 100644
+--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
++++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+@@ -44,7 +44,7 @@ static inline void nop_loop(void)
+ {
+ int i;
+
+- for (i = 0; i < 1000000; i++)
++ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+ }
+
+@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void)
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+- /* First, check MSR-based clocksource */
++ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
++ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
++ r2 = (r2 + rdtsc()) / 2;
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
+ tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+- /* First, check MSR-based clocksource */
++ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
++ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
++ r2 = (r2 + rdtsc()) / 2;
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+index 8039e1eff9388..9f55ccd169a13 100644
+--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
++++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+@@ -84,7 +84,7 @@ int get_warnings_count(void)
+ f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
+ if (fscanf(f, "%d", &warnings) < 1)
+ warnings = 0;
+- fclose(f);
++ pclose(f);
+
+ return warnings;
+ }
+diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
+index d0fe2fdce58c4..db2a17559c3d5 100644
+--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
++++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
+@@ -105,7 +105,6 @@ static void guest_code(void *arg)
+
+ if (cpu_has_svm()) {
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+- svm->vmcb->save.rip += 3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ } else {
+ vmlaunch();
+diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
+index ca40abe9daa86..35f64832b869c 100644
+--- a/tools/testing/selftests/landlock/base_test.c
++++ b/tools/testing/selftests/landlock/base_test.c
+@@ -18,10 +18,11 @@
+ #include "common.h"
+
+ #ifndef O_PATH
+-#define O_PATH 010000000
++#define O_PATH 010000000
+ #endif
+
+-TEST(inconsistent_attr) {
++TEST(inconsistent_attr)
++{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ char *const buf = malloc(page_size + 1);
+ struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
+@@ -34,20 +35,26 @@ TEST(inconsistent_attr) {
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
+ ASSERT_EQ(EINVAL, errno);
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 7, 0));
++ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
+ /* The size if less than sizeof(struct landlock_attr_enforce). */
+ ASSERT_EQ(EFAULT, errno);
+
+- ASSERT_EQ(-1, landlock_create_ruleset(NULL,
+- sizeof(struct landlock_ruleset_attr), 0));
++ ASSERT_EQ(-1, landlock_create_ruleset(
++ NULL, sizeof(struct landlock_ruleset_attr), 0));
+ ASSERT_EQ(EFAULT, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+- ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr,
+- sizeof(struct landlock_ruleset_attr), 0));
++ /* Checks minimal valid attribute size. */
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 8, 0));
++ ASSERT_EQ(ENOMSG, errno);
++ ASSERT_EQ(-1, landlock_create_ruleset(
++ ruleset_attr,
++ sizeof(struct landlock_ruleset_attr), 0));
+ ASSERT_EQ(ENOMSG, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
+ ASSERT_EQ(ENOMSG, errno);
+@@ -63,38 +70,44 @@ TEST(inconsistent_attr) {
+ free(buf);
+ }
+
+-TEST(abi_version) {
++TEST(abi_version)
++{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ ASSERT_EQ(1, landlock_create_ruleset(NULL, 0,
+- LANDLOCK_CREATE_RULESET_VERSION));
++ LANDLOCK_CREATE_RULESET_VERSION));
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
+- LANDLOCK_CREATE_RULESET_VERSION));
++ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+- LANDLOCK_CREATE_RULESET_VERSION));
++ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+- ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr),
+- LANDLOCK_CREATE_RULESET_VERSION));
++ ASSERT_EQ(-1,
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
++ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
+- LANDLOCK_CREATE_RULESET_VERSION | 1 << 31));
++ LANDLOCK_CREATE_RULESET_VERSION |
++ 1 << 31));
+ ASSERT_EQ(EINVAL, errno);
+ }
+
+-TEST(inval_create_ruleset_flags) {
++/* Tests ordering of syscall argument checks. */
++TEST(create_ruleset_checks_ordering)
++{
+ const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
+ const int invalid_flag = last_flag << 1;
++ int ruleset_fd;
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+
++ /* Checks priority for invalid flags. */
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+@@ -102,44 +115,121 @@ TEST(inval_create_ruleset_flags) {
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+- invalid_flag));
++ invalid_flag));
++ ASSERT_EQ(EINVAL, errno);
++
++ ASSERT_EQ(-1,
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
++ invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+- ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), invalid_flag));
++ /* Checks too big ruleset_attr size. */
++ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, -1, 0));
++ ASSERT_EQ(E2BIG, errno);
++
++ /* Checks too small ruleset_attr size. */
++ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, 0));
++ ASSERT_EQ(EINVAL, errno);
++ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 1, 0));
+ ASSERT_EQ(EINVAL, errno);
++
++ /* Checks valid call. */
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
+ }
+
+-TEST(empty_path_beneath_attr) {
++/* Tests ordering of syscall argument checks. */
++TEST(add_rule_checks_ordering)
++{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+- const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ struct landlock_path_beneath_attr path_beneath_attr = {
++ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
++ .parent_fd = -1,
++ };
++ const int ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+- /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */
++ /* Checks invalid flags. */
++ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
++ ASSERT_EQ(EINVAL, errno);
++
++ /* Checks invalid ruleset FD. */
++ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 0));
++ ASSERT_EQ(EBADF, errno);
++
++ /* Checks invalid rule type. */
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, 0, NULL, 0));
++ ASSERT_EQ(EINVAL, errno);
++
++ /* Checks invalid rule attr. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- NULL, 0));
++ NULL, 0));
+ ASSERT_EQ(EFAULT, errno);
++
++ /* Checks invalid path_beneath.parent_fd. */
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath_attr, 0));
++ ASSERT_EQ(EBADF, errno);
++
++ /* Checks valid call. */
++ path_beneath_attr.parent_fd =
++ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath_attr.parent_fd);
++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath_attr, 0));
++ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+ }
+
+-TEST(inval_fd_enforce) {
++/* Tests ordering of syscall argument and permission checks. */
++TEST(restrict_self_checks_ordering)
++{
++ const struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
++ };
++ struct landlock_path_beneath_attr path_beneath_attr = {
++ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
++ .parent_fd = -1,
++ };
++ const int ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
++
++ ASSERT_LE(0, ruleset_fd);
++ path_beneath_attr.parent_fd =
++ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath_attr.parent_fd);
++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath_attr, 0));
++ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
++
++ /* Checks unprivileged enforcement without no_new_privs. */
++ drop_caps(_metadata);
++ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
++ ASSERT_EQ(EPERM, errno);
++ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
++ ASSERT_EQ(EPERM, errno);
++ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
++ ASSERT_EQ(EPERM, errno);
++
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
++ /* Checks invalid flags. */
++ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
++ ASSERT_EQ(EINVAL, errno);
++
++ /* Checks invalid ruleset FD. */
+ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+ ASSERT_EQ(EBADF, errno);
+-}
+-
+-TEST(unpriv_enforce_without_no_new_privs) {
+- int err;
+
+- drop_caps(_metadata);
+- err = landlock_restrict_self(-1, 0);
+- ASSERT_EQ(EPERM, errno);
+- ASSERT_EQ(err, -1);
++ /* Checks valid call. */
++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
++ ASSERT_EQ(0, close(ruleset_fd));
+ }
+
+ TEST(ruleset_fd_io)
+@@ -151,8 +241,8 @@ TEST(ruleset_fd_io)
+ char buf;
+
+ drop_caps(_metadata);
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
+@@ -197,14 +287,15 @@ TEST(ruleset_fd_transfer)
+ drop_caps(_metadata);
+
+ /* Creates a test ruleset with a simple rule. */
+- ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ ruleset_fd_tx =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd_tx);
+- path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW |
+- O_DIRECTORY | O_CLOEXEC);
++ path_beneath_attr.parent_fd =
++ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+- ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath_attr, 0));
++ ASSERT_EQ(0,
++ landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+@@ -215,7 +306,8 @@ TEST(ruleset_fd_transfer)
+ memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx));
+
+ /* Sends the ruleset FD over a socketpair and then close it. */
+- ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds));
++ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
++ socket_fds));
+ ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0));
+ ASSERT_EQ(0, close(socket_fds[0]));
+ ASSERT_EQ(0, close(ruleset_fd_tx));
+@@ -226,7 +318,8 @@ TEST(ruleset_fd_transfer)
+ int ruleset_fd_rx;
+
+ *(char *)msg.msg_iov->iov_base = '\0';
+- ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
++ ASSERT_EQ(sizeof(data_tx),
++ recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
+ ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base);
+ ASSERT_EQ(0, close(socket_fds[1]));
+ cmsg = CMSG_FIRSTHDR(&msg);
+diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
+index 20e2a9286d710..29b3cf7d16401 100644
+--- a/tools/testing/selftests/landlock/common.h
++++ b/tools/testing/selftests/landlock/common.h
+@@ -29,6 +29,7 @@
+ * this to be possible, we must not call abort() but instead exit smoothly
+ * (hence the step print).
+ */
++/* clang-format off */
+ #define TEST_F_FORK(fixture_name, test_name) \
+ static void fixture_name##_##test_name##_child( \
+ struct __test_metadata *_metadata, \
+@@ -75,11 +76,12 @@
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
++/* clang-format on */
+
+ #ifndef landlock_create_ruleset
+-static inline int landlock_create_ruleset(
+- const struct landlock_ruleset_attr *const attr,
+- const size_t size, const __u32 flags)
++static inline int
++landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
++ const size_t size, const __u32 flags)
+ {
+ return syscall(__NR_landlock_create_ruleset, attr, size, flags);
+ }
+@@ -87,17 +89,18 @@ static inline int landlock_create_ruleset(
+
+ #ifndef landlock_add_rule
+ static inline int landlock_add_rule(const int ruleset_fd,
+- const enum landlock_rule_type rule_type,
+- const void *const rule_attr, const __u32 flags)
++ const enum landlock_rule_type rule_type,
++ const void *const rule_attr,
++ const __u32 flags)
+ {
+- return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
+- rule_attr, flags);
++ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
++ flags);
+ }
+ #endif
+
+ #ifndef landlock_restrict_self
+ static inline int landlock_restrict_self(const int ruleset_fd,
+- const __u32 flags)
++ const __u32 flags)
+ {
+ return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
+ }
+@@ -115,69 +118,76 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
+ };
+
+ cap_p = cap_get_proc();
+- EXPECT_NE(NULL, cap_p) {
++ EXPECT_NE(NULL, cap_p)
++ {
+ TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
+ }
+- EXPECT_NE(-1, cap_clear(cap_p)) {
++ EXPECT_NE(-1, cap_clear(cap_p))
++ {
+ TH_LOG("Failed to cap_clear: %s", strerror(errno));
+ }
+ if (!drop_all) {
+ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
+- ARRAY_SIZE(caps), caps, CAP_SET)) {
++ ARRAY_SIZE(caps), caps, CAP_SET))
++ {
+ TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
+ }
+ }
+- EXPECT_NE(-1, cap_set_proc(cap_p)) {
++ EXPECT_NE(-1, cap_set_proc(cap_p))
++ {
+ TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
+ }
+- EXPECT_NE(-1, cap_free(cap_p)) {
++ EXPECT_NE(-1, cap_free(cap_p))
++ {
+ TH_LOG("Failed to cap_free: %s", strerror(errno));
+ }
+ }
+
+ /* We cannot put such helpers in a library because of kselftest_harness.h . */
+-__attribute__((__unused__))
+-static void disable_caps(struct __test_metadata *const _metadata)
++__attribute__((__unused__)) static void
++disable_caps(struct __test_metadata *const _metadata)
+ {
+ _init_caps(_metadata, false);
+ }
+
+-__attribute__((__unused__))
+-static void drop_caps(struct __test_metadata *const _metadata)
++__attribute__((__unused__)) static void
++drop_caps(struct __test_metadata *const _metadata)
+ {
+ _init_caps(_metadata, true);
+ }
+
+ static void _effective_cap(struct __test_metadata *const _metadata,
+- const cap_value_t caps, const cap_flag_value_t value)
++ const cap_value_t caps, const cap_flag_value_t value)
+ {
+ cap_t cap_p;
+
+ cap_p = cap_get_proc();
+- EXPECT_NE(NULL, cap_p) {
++ EXPECT_NE(NULL, cap_p)
++ {
+ TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
+ }
+- EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) {
++ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value))
++ {
+ TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
+ }
+- EXPECT_NE(-1, cap_set_proc(cap_p)) {
++ EXPECT_NE(-1, cap_set_proc(cap_p))
++ {
+ TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
+ }
+- EXPECT_NE(-1, cap_free(cap_p)) {
++ EXPECT_NE(-1, cap_free(cap_p))
++ {
+ TH_LOG("Failed to cap_free: %s", strerror(errno));
+ }
+ }
+
+-__attribute__((__unused__))
+-static void set_cap(struct __test_metadata *const _metadata,
+- const cap_value_t caps)
++__attribute__((__unused__)) static void
++set_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
+ {
+ _effective_cap(_metadata, caps, CAP_SET);
+ }
+
+-__attribute__((__unused__))
+-static void clear_cap(struct __test_metadata *const _metadata,
+- const cap_value_t caps)
++__attribute__((__unused__)) static void
++clear_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
+ {
+ _effective_cap(_metadata, caps, CAP_CLEAR);
+ }
+diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
+index 10c9a1e4ebd9b..ea988b3d6b2ed 100644
+--- a/tools/testing/selftests/landlock/fs_test.c
++++ b/tools/testing/selftests/landlock/fs_test.c
+@@ -11,6 +11,7 @@
+ #include <fcntl.h>
+ #include <linux/landlock.h>
+ #include <sched.h>
++#include <stdio.h>
+ #include <string.h>
+ #include <sys/capability.h>
+ #include <sys/mount.h>
+@@ -22,8 +23,21 @@
+
+ #include "common.h"
+
+-#define TMP_DIR "tmp"
+-#define BINARY_PATH "./true"
++#ifndef renameat2
++int renameat2(int olddirfd, const char *oldpath, int newdirfd,
++ const char *newpath, unsigned int flags)
++{
++ return syscall(__NR_renameat2, olddirfd, oldpath, newdirfd, newpath,
++ flags);
++}
++#endif
++
++#ifndef RENAME_EXCHANGE
++#define RENAME_EXCHANGE (1 << 1)
++#endif
++
++#define TMP_DIR "tmp"
++#define BINARY_PATH "./true"
+
+ /* Paths (sibling number and depth) */
+ static const char dir_s1d1[] = TMP_DIR "/s1d1";
+@@ -74,8 +88,42 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
+ * └── s3d3
+ */
+
++static bool fgrep(FILE *const inf, const char *const str)
++{
++ char line[32];
++ const int slen = strlen(str);
++
++ while (!feof(inf)) {
++ if (!fgets(line, sizeof(line), inf))
++ break;
++ if (strncmp(line, str, slen))
++ continue;
++
++ return true;
++ }
++
++ return false;
++}
++
++static bool supports_overlayfs(void)
++{
++ bool res;
++ FILE *const inf = fopen("/proc/filesystems", "r");
++
++ /*
++ * Consider that the filesystem is supported if we cannot get the
++ * supported ones.
++ */
++ if (!inf)
++ return true;
++
++ res = fgrep(inf, "nodev\toverlay\n");
++ fclose(inf);
++ return res;
++}
++
+ static void mkdir_parents(struct __test_metadata *const _metadata,
+- const char *const path)
++ const char *const path)
+ {
+ char *walker;
+ const char *parent;
+@@ -90,9 +138,10 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
+ continue;
+ walker[i] = '\0';
+ err = mkdir(parent, 0700);
+- ASSERT_FALSE(err && errno != EEXIST) {
+- TH_LOG("Failed to create directory \"%s\": %s",
+- parent, strerror(errno));
++ ASSERT_FALSE(err && errno != EEXIST)
++ {
++ TH_LOG("Failed to create directory \"%s\": %s", parent,
++ strerror(errno));
+ }
+ walker[i] = '/';
+ }
+@@ -100,22 +149,24 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
+ }
+
+ static void create_directory(struct __test_metadata *const _metadata,
+- const char *const path)
++ const char *const path)
+ {
+ mkdir_parents(_metadata, path);
+- ASSERT_EQ(0, mkdir(path, 0700)) {
++ ASSERT_EQ(0, mkdir(path, 0700))
++ {
+ TH_LOG("Failed to create directory \"%s\": %s", path,
+- strerror(errno));
++ strerror(errno));
+ }
+ }
+
+ static void create_file(struct __test_metadata *const _metadata,
+- const char *const path)
++ const char *const path)
+ {
+ mkdir_parents(_metadata, path);
+- ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) {
++ ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0))
++ {
+ TH_LOG("Failed to create file \"%s\": %s", path,
+- strerror(errno));
++ strerror(errno));
+ }
+ }
+
+@@ -221,8 +272,9 @@ static void remove_layout1(struct __test_metadata *const _metadata)
+ EXPECT_EQ(0, remove_path(dir_s3d2));
+ }
+
+-FIXTURE(layout1) {
+-};
++/* clang-format off */
++FIXTURE(layout1) {};
++/* clang-format on */
+
+ FIXTURE_SETUP(layout1)
+ {
+@@ -242,7 +294,8 @@ FIXTURE_TEARDOWN(layout1)
+ * This helper enables to use the ASSERT_* macros and print the line number
+ * pointing to the test caller.
+ */
+-static int test_open_rel(const int dirfd, const char *const path, const int flags)
++static int test_open_rel(const int dirfd, const char *const path,
++ const int flags)
+ {
+ int fd;
+
+@@ -291,23 +344,23 @@ TEST_F_FORK(layout1, inval)
+ {
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .parent_fd = -1,
+ };
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ };
+ int ruleset_fd;
+
+- path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
+- O_CLOEXEC);
++ path_beneath.parent_fd =
++ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
+ ASSERT_EQ(EBADF, errno);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -315,55 +368,55 @@ TEST_F_FORK(layout1, inval)
+ ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ /* Returns EBADFD because ruleset_fd is not a valid ruleset. */
+ ASSERT_EQ(EBADFD, errno);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Gets a real ruleset. */
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Tests without O_PATH. */
+ path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Tests with a ruleset FD. */
+ path_beneath.parent_fd = ruleset_fd;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ ASSERT_EQ(EBADFD, errno);
+
+ /* Checks unhandled allowed_access. */
+- path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
+- O_CLOEXEC);
++ path_beneath.parent_fd =
++ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ /* Test with legitimate values. */
+ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
+
+ /* Test with unknown (64-bits) value. */
+ path_beneath.allowed_access |= (1ULL << 60);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~(1ULL << 60);
+
+ /* Test with no access. */
+ path_beneath.allowed_access = 0;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ ASSERT_EQ(ENOMSG, errno);
+ path_beneath.allowed_access &= ~(1ULL << 60);
+
+@@ -376,6 +429,8 @@ TEST_F_FORK(layout1, inval)
+ ASSERT_EQ(0, close(ruleset_fd));
+ }
+
++/* clang-format off */
++
+ #define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+@@ -396,53 +451,87 @@ TEST_F_FORK(layout1, inval)
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+ ACCESS_LAST)
+
+-TEST_F_FORK(layout1, file_access_rights)
++/* clang-format on */
++
++TEST_F_FORK(layout1, file_and_dir_access_rights)
+ {
+ __u64 access;
+ int err;
+- struct landlock_path_beneath_attr path_beneath = {};
++ struct landlock_path_beneath_attr path_beneath_file = {},
++ path_beneath_dir = {};
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+- const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ const int ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Tests access rights for files. */
+- path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+- ASSERT_LE(0, path_beneath.parent_fd);
++ path_beneath_file.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath_file.parent_fd);
++
++ /* Tests access rights for directories. */
++ path_beneath_dir.parent_fd =
++ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath_dir.parent_fd);
++
+ for (access = 1; access <= ACCESS_LAST; access <<= 1) {
+- path_beneath.allowed_access = access;
++ path_beneath_dir.allowed_access = access;
++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
++ LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath_dir, 0));
++
++ path_beneath_file.allowed_access = access;
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0);
+- if ((access | ACCESS_FILE) == ACCESS_FILE) {
++ &path_beneath_file, 0);
++ if (access & ACCESS_FILE) {
+ ASSERT_EQ(0, err);
+ } else {
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(EINVAL, errno);
+ }
+ }
+- ASSERT_EQ(0, close(path_beneath.parent_fd));
++ ASSERT_EQ(0, close(path_beneath_file.parent_fd));
++ ASSERT_EQ(0, close(path_beneath_dir.parent_fd));
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, unknown_access_rights)
++{
++ __u64 access_mask;
++
++ for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
++ access_mask >>= 1) {
++ struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = access_mask,
++ };
++
++ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0));
++ ASSERT_EQ(EINVAL, errno);
++ }
+ }
+
+ static void add_path_beneath(struct __test_metadata *const _metadata,
+- const int ruleset_fd, const __u64 allowed_access,
+- const char *const path)
++ const int ruleset_fd, const __u64 allowed_access,
++ const char *const path)
+ {
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = allowed_access,
+ };
+
+ path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
+- ASSERT_LE(0, path_beneath.parent_fd) {
++ ASSERT_LE(0, path_beneath.parent_fd)
++ {
+ TH_LOG("Failed to open directory \"%s\": %s", path,
+- strerror(errno));
++ strerror(errno));
+ }
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0)) {
++ &path_beneath, 0))
++ {
+ TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
+- strerror(errno));
++ strerror(errno));
+ }
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+ }
+@@ -452,6 +541,8 @@ struct rule {
+ __u64 access;
+ };
+
++/* clang-format off */
++
+ #define ACCESS_RO ( \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_READ_DIR)
+@@ -460,39 +551,46 @@ struct rule {
+ ACCESS_RO | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE)
+
++/* clang-format on */
++
+ static int create_ruleset(struct __test_metadata *const _metadata,
+- const __u64 handled_access_fs, const struct rule rules[])
++ const __u64 handled_access_fs,
++ const struct rule rules[])
+ {
+ int ruleset_fd, i;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = handled_access_fs,
+ };
+
+- ASSERT_NE(NULL, rules) {
++ ASSERT_NE(NULL, rules)
++ {
+ TH_LOG("No rule list");
+ }
+- ASSERT_NE(NULL, rules[0].path) {
++ ASSERT_NE(NULL, rules[0].path)
++ {
+ TH_LOG("Empty rule list");
+ }
+
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
+- ASSERT_LE(0, ruleset_fd) {
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd)
++ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+
+ for (i = 0; rules[i].path; i++) {
+ add_path_beneath(_metadata, ruleset_fd, rules[i].access,
+- rules[i].path);
++ rules[i].path);
+ }
+ return ruleset_fd;
+ }
+
+ static void enforce_ruleset(struct __test_metadata *const _metadata,
+- const int ruleset_fd)
++ const int ruleset_fd)
+ {
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+- ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) {
++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
++ {
+ TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
+ }
+ }
+@@ -503,13 +601,14 @@ TEST_F_FORK(layout1, proc_nsfs)
+ {
+ .path = "/dev/null",
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ struct landlock_path_beneath_attr path_beneath;
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access |
+- LANDLOCK_ACCESS_FS_READ_DIR, rules);
++ const int ruleset_fd = create_ruleset(
++ _metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
++ rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
+@@ -536,22 +635,23 @@ TEST_F_FORK(layout1, proc_nsfs)
+ * references to a ruleset.
+ */
+ path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+- &path_beneath, 0));
++ &path_beneath, 0));
+ ASSERT_EQ(EBADFD, errno);
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+ }
+
+-TEST_F_FORK(layout1, unpriv) {
++TEST_F_FORK(layout1, unpriv)
++{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ int ruleset_fd;
+
+@@ -577,9 +677,9 @@ TEST_F_FORK(layout1, effective_access)
+ {
+ .path = file1_s2d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+ char buf;
+@@ -589,17 +689,23 @@ TEST_F_FORK(layout1, effective_access)
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+- /* Tests on a directory. */
++ /* Tests on a directory (with or without O_PATH). */
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++ ASSERT_EQ(0, test_open("/", O_RDONLY | O_PATH));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_PATH));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY | O_PATH));
++
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+- /* Tests on a file. */
++ /* Tests on a file (with or without O_PATH). */
+ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_PATH));
++
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+
+ /* Checks effective read and write actions. */
+@@ -626,7 +732,7 @@ TEST_F_FORK(layout1, unhandled_access)
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ /* Here, we only handle read accesses, not write accesses. */
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
+@@ -653,14 +759,14 @@ TEST_F_FORK(layout1, ruleset_overlap)
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_READ_DIR,
++ LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -687,6 +793,113 @@ TEST_F_FORK(layout1, ruleset_overlap)
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+ }
+
++TEST_F_FORK(layout1, layer_rule_unions)
++{
++ const struct rule layer1[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
++ {
++ .path = dir_s1d3,
++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {},
++ };
++ const struct rule layer2[] = {
++ /* Doesn't change anything from layer1. */
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {},
++ };
++ const struct rule layer3[] = {
++ /* Only allows write (but not read) to dir_s1d3. */
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {},
++ };
++ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks s1d1 hierarchy with layer1. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d2 hierarchy with layer1. */
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d3 hierarchy with layer1. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
++ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Doesn't change anything from layer1. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks s1d1 hierarchy with layer2. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d2 hierarchy with layer2. */
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d3 hierarchy with layer2. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
++ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Only allows write (but not read) to dir_s1d3. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks s1d1 hierarchy with layer3. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d2 hierarchy with layer3. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d3 hierarchy with layer3. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
++ /* dir_s1d3 should now deny READ_FILE and WRITE_FILE (O_RDWR). */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++}
++
+ TEST_F_FORK(layout1, non_overlapping_accesses)
+ {
+ const struct rule layer1[] = {
+@@ -694,22 +907,22 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+- {}
++ {},
+ };
+ const struct rule layer2[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+- {}
++ {},
+ };
+ int ruleset_fd;
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG,
+- layer1);
++ ruleset_fd =
++ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -720,7 +933,7 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
+- layer2);
++ layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -758,7 +971,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ .path = file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+- {}
++ {},
+ };
+ /* First rule with write restrictions. */
+ const struct rule layer2_read_write[] = {
+@@ -766,14 +979,14 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ /* ...but also denies read access via its grandparent directory. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ const struct rule layer3_read[] = {
+ /* Allows read access via its great-grandparent directory. */
+@@ -781,7 +994,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+- {}
++ {},
+ };
+ const struct rule layer4_read_write[] = {
+ /*
+@@ -792,7 +1005,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+- {}
++ {},
+ };
+ const struct rule layer5_read[] = {
+ /*
+@@ -803,7 +1016,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+- {}
++ {},
+ };
+ const struct rule layer6_execute[] = {
+ /*
+@@ -814,7 +1027,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+- {}
++ {},
+ };
+ const struct rule layer7_read_write[] = {
+ /*
+@@ -825,12 +1038,12 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ int ruleset_fd;
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+- layer1_read);
++ layer1_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -840,8 +1053,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write);
++ ruleset_fd = create_ruleset(_metadata,
++ LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ layer2_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -852,7 +1067,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+- layer3_read);
++ layer3_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -863,8 +1078,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ /* This time, denies write access for the file hierarchy. */
+- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write);
++ ruleset_fd = create_ruleset(_metadata,
++ LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ layer4_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -879,7 +1096,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+- layer5_read);
++ layer5_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -891,7 +1108,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
+- layer6_execute);
++ layer6_execute);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -902,8 +1119,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+
+- ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write);
++ ruleset_fd = create_ruleset(_metadata,
++ LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ layer7_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+@@ -921,9 +1140,9 @@ TEST_F_FORK(layout1, inherit_subset)
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_READ_DIR,
++ LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -949,7 +1168,7 @@ TEST_F_FORK(layout1, inherit_subset)
+ * ANDed with the previous ones.
+ */
+ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
+- dir_s1d2);
++ dir_s1d2);
+ /*
+ * According to ruleset_fd, dir_s1d2 should now have the
+ * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
+@@ -1004,7 +1223,7 @@ TEST_F_FORK(layout1, inherit_subset)
+ * that there was no rule tied to it before.
+ */
+ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
+- dir_s1d3);
++ dir_s1d3);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+@@ -1039,7 +1258,7 @@ TEST_F_FORK(layout1, inherit_superset)
+ .path = dir_s1d3,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -1054,8 +1273,10 @@ TEST_F_FORK(layout1, inherit_superset)
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
+- add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2);
++ add_path_beneath(_metadata, ruleset_fd,
++ LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_READ_DIR,
++ dir_s1d2);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+@@ -1075,12 +1296,12 @@ TEST_F_FORK(layout1, max_layers)
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+- for (i = 0; i < 64; i++)
++ for (i = 0; i < 16; i++)
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ for (i = 0; i < 2; i++) {
+@@ -1097,15 +1318,15 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
+ int ruleset_fd;
+
+ /* Tests empty handled_access_fs. */
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(-1, ruleset_fd);
+ ASSERT_EQ(ENOMSG, errno);
+
+ /* Enforces policy which deny read access to all files. */
+ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+@@ -1113,8 +1334,8 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
+
+ /* Nests a policy which deny read access to all directories. */
+ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+@@ -1137,7 +1358,7 @@ TEST_F_FORK(layout1, rule_on_mountpoint)
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -1166,7 +1387,7 @@ TEST_F_FORK(layout1, rule_over_mountpoint)
+ .path = dir_s3d1,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -1194,7 +1415,7 @@ TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
+ .path = "/",
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -1224,7 +1445,7 @@ TEST_F_FORK(layout1, rule_over_root_deny)
+ .path = "/",
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -1244,12 +1465,13 @@ TEST_F_FORK(layout1, rule_inside_mount_ns)
+ .path = "s3d3",
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ int ruleset_fd;
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+- ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) {
++ ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3))
++ {
+ TH_LOG("Failed to pivot root: %s", strerror(errno));
+ };
+ ASSERT_EQ(0, chdir("/"));
+@@ -1271,7 +1493,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -1282,7 +1504,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
+ ASSERT_EQ(EPERM, errno);
+- ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3));
++ ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ }
+@@ -1294,28 +1516,29 @@ TEST_F_FORK(layout1, move_mount)
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+- ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+- dir_s1d2, 0)) {
++ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
++ dir_s1d2, 0))
++ {
+ TH_LOG("Failed to move mount: %s", strerror(errno));
+ }
+
+- ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+- dir_s3d2, 0));
++ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
++ dir_s3d2, 0));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+- ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+- dir_s1d2, 0));
++ ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
++ dir_s1d2, 0));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ }
+@@ -1335,7 +1558,7 @@ TEST_F_FORK(layout1, release_inodes)
+ .path = dir_s3d3,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+@@ -1362,7 +1585,7 @@ enum relative_access {
+ };
+
+ static void test_relative_path(struct __test_metadata *const _metadata,
+- const enum relative_access rel)
++ const enum relative_access rel)
+ {
+ /*
+ * Common layer to check that chroot doesn't ignore it (i.e. a chroot
+@@ -1373,7 +1596,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
+ .path = TMP_DIR,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ const struct rule layer2_subs[] = {
+ {
+@@ -1384,7 +1607,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
+ .path = dir_s2d2,
+ .access = ACCESS_RO,
+ },
+- {}
++ {},
+ };
+ int dirfd, ruleset_fd;
+
+@@ -1425,14 +1648,16 @@ static void test_relative_path(struct __test_metadata *const _metadata,
+ break;
+ case REL_CHROOT_ONLY:
+ /* Do chroot into dir_s1d2 (relative to dir_s2d2). */
+- ASSERT_EQ(0, chroot("../../s1d1/s1d2")) {
++ ASSERT_EQ(0, chroot("../../s1d1/s1d2"))
++ {
+ TH_LOG("Failed to chroot: %s", strerror(errno));
+ }
+ dirfd = AT_FDCWD;
+ break;
+ case REL_CHROOT_CHDIR:
+ /* Do chroot into dir_s1d2. */
+- ASSERT_EQ(0, chroot(".")) {
++ ASSERT_EQ(0, chroot("."))
++ {
+ TH_LOG("Failed to chroot: %s", strerror(errno));
+ }
+ dirfd = AT_FDCWD;
+@@ -1440,7 +1665,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
+ }
+
+ ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
+- test_open_rel(dirfd, "..", O_RDONLY));
++ test_open_rel(dirfd, "..", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
+
+ if (rel == REL_CHROOT_ONLY) {
+@@ -1462,11 +1687,13 @@ static void test_relative_path(struct __test_metadata *const _metadata,
+ if (rel != REL_CHROOT_CHDIR) {
+ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
+- ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3",
++ O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
+- ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3",
++ O_RDONLY));
+ }
+
+ if (rel == REL_OPEN)
+@@ -1495,40 +1722,42 @@ TEST_F_FORK(layout1, relative_chroot_chdir)
+ }
+
+ static void copy_binary(struct __test_metadata *const _metadata,
+- const char *const dst_path)
++ const char *const dst_path)
+ {
+ int dst_fd, src_fd;
+ struct stat statbuf;
+
+ dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
+- ASSERT_LE(0, dst_fd) {
+- TH_LOG("Failed to open \"%s\": %s", dst_path,
+- strerror(errno));
++ ASSERT_LE(0, dst_fd)
++ {
++ TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno));
+ }
+ src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
+- ASSERT_LE(0, src_fd) {
++ ASSERT_LE(0, src_fd)
++ {
+ TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
+- strerror(errno));
++ strerror(errno));
+ }
+ ASSERT_EQ(0, fstat(src_fd, &statbuf));
+- ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0,
+- statbuf.st_size));
++ ASSERT_EQ(statbuf.st_size,
++ sendfile(dst_fd, src_fd, 0, statbuf.st_size));
+ ASSERT_EQ(0, close(src_fd));
+ ASSERT_EQ(0, close(dst_fd));
+ }
+
+-static void test_execute(struct __test_metadata *const _metadata,
+- const int err, const char *const path)
++static void test_execute(struct __test_metadata *const _metadata, const int err,
++ const char *const path)
+ {
+ int status;
+- char *const argv[] = {(char *)path, NULL};
++ char *const argv[] = { (char *)path, NULL };
+ const pid_t child = fork();
+
+ ASSERT_LE(0, child);
+ if (child == 0) {
+- ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) {
++ ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL))
++ {
+ TH_LOG("Failed to execute \"%s\": %s", path,
+- strerror(errno));
++ strerror(errno));
+ };
+ ASSERT_EQ(err, errno);
+ _exit(_metadata->passed ? 2 : 1);
+@@ -1536,9 +1765,10 @@ static void test_execute(struct __test_metadata *const _metadata,
+ }
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+- ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) {
++ ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status))
++ {
+ TH_LOG("Unexpected return code for \"%s\": %s", path,
+- strerror(errno));
++ strerror(errno));
+ };
+ }
+
+@@ -1549,10 +1779,10 @@ TEST_F_FORK(layout1, execute)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+- {}
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ copy_binary(_metadata, file1_s1d1);
+@@ -1577,15 +1807,21 @@ TEST_F_FORK(layout1, execute)
+
+ TEST_F_FORK(layout1, link)
+ {
+- const struct rule rules[] = {
++ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+- {}
++ {},
++ };
++ const struct rule layer2[] = {
++ {
++ .path = dir_s1d3,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++ },
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ int ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+
+@@ -1598,14 +1834,30 @@ TEST_F_FORK(layout1, link)
+
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
++
+ /* Denies linking because of reparenting. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
++ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
++
++ /* Prepares for next unlinks. */
++ ASSERT_EQ(0, unlink(file2_s1d2));
++ ASSERT_EQ(0, unlink(file2_s1d3));
++
++ ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks that linkind doesn't require the ability to delete a file. */
++ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
++ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+ }
+
+ TEST_F_FORK(layout1, rename_file)
+@@ -1619,14 +1871,13 @@ TEST_F_FORK(layout1, rename_file)
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+- {}
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+- ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+@@ -1662,9 +1913,15 @@ TEST_F_FORK(layout1, rename_file)
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
++ /* Checks that file1_s2d1 cannot be removed (instead of ENOTDIR). */
++ ASSERT_EQ(-1, rename(dir_s2d2, file1_s2d1));
++ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
++ /* Checks that file1_s1d1 cannot be removed (instead of EISDIR). */
++ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
++ ASSERT_EQ(EACCES, errno);
+
+ /* Renames files with different parents. */
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+@@ -1675,14 +1932,14 @@ TEST_F_FORK(layout1, rename_file)
+
+ /* Exchanges and renames files with same parent. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
+- RENAME_EXCHANGE));
++ RENAME_EXCHANGE));
+ ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
+
+ /* Exchanges files and directories with same parent, twice. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
+- RENAME_EXCHANGE));
++ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
+- RENAME_EXCHANGE));
++ RENAME_EXCHANGE));
+ }
+
+ TEST_F_FORK(layout1, rename_dir)
+@@ -1696,10 +1953,10 @@ TEST_F_FORK(layout1, rename_dir)
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+- {}
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+@@ -1727,16 +1984,22 @@ TEST_F_FORK(layout1, rename_dir)
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
++ /* Checks that dir_s1d2 cannot be removed (instead of ENOTDIR). */
++ ASSERT_EQ(-1, rename(dir_s1d2, file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
++ /* Checks that dir_s1d2 cannot be removed (instead of EISDIR). */
++ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
++ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Exchanges and renames directory to the same parent, which allows
+ * directory removal.
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
+- RENAME_EXCHANGE));
++ RENAME_EXCHANGE));
+ ASSERT_EQ(0, unlink(dir_s1d3));
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
+@@ -1750,10 +2013,10 @@ TEST_F_FORK(layout1, remove_dir)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+- {}
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+@@ -1787,10 +2050,10 @@ TEST_F_FORK(layout1, remove_file)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+- {}
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+@@ -1805,14 +2068,15 @@ TEST_F_FORK(layout1, remove_file)
+ }
+
+ static void test_make_file(struct __test_metadata *const _metadata,
+- const __u64 access, const mode_t mode, const dev_t dev)
++ const __u64 access, const mode_t mode,
++ const dev_t dev)
+ {
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = access,
+ },
+- {}
++ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, access, rules);
+
+@@ -1820,9 +2084,10 @@ static void test_make_file(struct __test_metadata *const _metadata,
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file2_s1d1));
+- ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) {
+- TH_LOG("Failed to make file \"%s\": %s",
+- file2_s1d1, strerror(errno));
++ ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev))
++ {
++ TH_LOG("Failed to make file \"%s\": %s", file2_s1d1,
++ strerror(errno));
+ };
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+@@ -1841,9 +2106,10 @@ static void test_make_file(struct __test_metadata *const _metadata,
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+- ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) {
+- TH_LOG("Failed to make file \"%s\": %s",
+- file1_s1d2, strerror(errno));
++ ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev))
++ {
++ TH_LOG("Failed to make file \"%s\": %s", file1_s1d2,
++ strerror(errno));
+ };
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+@@ -1860,7 +2126,7 @@ TEST_F_FORK(layout1, make_char)
+ /* Creates a /dev/null device. */
+ set_cap(_metadata, CAP_MKNOD);
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
+- makedev(1, 3));
++ makedev(1, 3));
+ }
+
+ TEST_F_FORK(layout1, make_block)
+@@ -1868,7 +2134,7 @@ TEST_F_FORK(layout1, make_block)
+ /* Creates a /dev/loop0 device. */
+ set_cap(_metadata, CAP_MKNOD);
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
+- makedev(7, 0));
++ makedev(7, 0));
+ }
+
+ TEST_F_FORK(layout1, make_reg_1)
+@@ -1898,10 +2164,10 @@ TEST_F_FORK(layout1, make_sym)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_SYM,
+ },
+- {}
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+@@ -1943,10 +2209,10 @@ TEST_F_FORK(layout1, make_dir)
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ },
+- {}
++ {},
+ };
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+@@ -1965,12 +2231,12 @@ TEST_F_FORK(layout1, make_dir)
+ }
+
+ static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
+- const int open_flags)
++ const int open_flags)
+ {
+ static const char path_template[] = "/proc/self/fd/%d";
+ char procfd_path[sizeof(path_template) + 10];
+- const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
+- path_template, fd);
++ const int procfd_path_size =
++ snprintf(procfd_path, sizeof(procfd_path), path_template, fd);
+
+ ASSERT_LT(procfd_path_size, sizeof(procfd_path));
+ return open(procfd_path, open_flags);
+@@ -1983,12 +2249,13 @@ TEST_F_FORK(layout1, proc_unlinked_file)
+ .path = file1_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+- {}
++ {},
+ };
+ int reg_fd, proc_fd;
+- const int ruleset_fd = create_ruleset(_metadata,
+- LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE, rules);
++ const int ruleset_fd = create_ruleset(
++ _metadata,
++ LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE,
++ rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+@@ -2005,9 +2272,10 @@ TEST_F_FORK(layout1, proc_unlinked_file)
+ ASSERT_EQ(0, close(proc_fd));
+
+ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
+- ASSERT_EQ(-1, proc_fd) {
+- TH_LOG("Successfully opened /proc/self/fd/%d: %s",
+- reg_fd, strerror(errno));
++ ASSERT_EQ(-1, proc_fd)
++ {
++ TH_LOG("Successfully opened /proc/self/fd/%d: %s", reg_fd,
++ strerror(errno));
+ }
+ ASSERT_EQ(EACCES, errno);
+
+@@ -2023,13 +2291,13 @@ TEST_F_FORK(layout1, proc_pipe)
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ /* Limits read and write access to files tied to the filesystem. */
+- const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
+- rules);
++ const int ruleset_fd =
++ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+@@ -2041,7 +2309,8 @@ TEST_F_FORK(layout1, proc_pipe)
+
+ /* Checks access to pipes through FD. */
+ ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
+- ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) {
++ ASSERT_EQ(1, write(pipe_fds[1], ".", 1))
++ {
+ TH_LOG("Failed to write in pipe: %s", strerror(errno));
+ }
+ ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
+@@ -2050,9 +2319,10 @@ TEST_F_FORK(layout1, proc_pipe)
+ /* Checks write access to pipe through /proc/self/fd . */
+ proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+- ASSERT_EQ(1, write(proc_fd, ".", 1)) {
++ ASSERT_EQ(1, write(proc_fd, ".", 1))
++ {
+ TH_LOG("Failed to write through /proc/self/fd/%d: %s",
+- pipe_fds[1], strerror(errno));
++ pipe_fds[1], strerror(errno));
+ }
+ ASSERT_EQ(0, close(proc_fd));
+
+@@ -2060,9 +2330,10 @@ TEST_F_FORK(layout1, proc_pipe)
+ proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+ buf = '\0';
+- ASSERT_EQ(1, read(proc_fd, &buf, 1)) {
++ ASSERT_EQ(1, read(proc_fd, &buf, 1))
++ {
+ TH_LOG("Failed to read through /proc/self/fd/%d: %s",
+- pipe_fds[1], strerror(errno));
++ pipe_fds[1], strerror(errno));
+ }
+ ASSERT_EQ(0, close(proc_fd));
+
+@@ -2070,8 +2341,9 @@ TEST_F_FORK(layout1, proc_pipe)
+ ASSERT_EQ(0, close(pipe_fds[1]));
+ }
+
+-FIXTURE(layout1_bind) {
+-};
++/* clang-format off */
++FIXTURE(layout1_bind) {};
++/* clang-format on */
+
+ FIXTURE_SETUP(layout1_bind)
+ {
+@@ -2161,7 +2433,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
+ .path = dir_s2d1,
+ .access = ACCESS_RW,
+ },
+- {}
++ {},
+ };
+ /*
+ * Sets access rights on the same bind-mounted directories. The result
+@@ -2177,7 +2449,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
+ .path = dir_s2d2,
+ .access = ACCESS_RW,
+ },
+- {}
++ {},
+ };
+ /* Only allow read-access to the s1d3 hierarchies. */
+ const struct rule layer3_source[] = {
+@@ -2185,7 +2457,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+- {}
++ {},
+ };
+ /* Removes all access rights. */
+ const struct rule layer4_destination[] = {
+@@ -2193,7 +2465,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
+ .path = bind_file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ int ruleset_fd;
+
+@@ -2282,8 +2554,8 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
+ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
+ }
+
+-#define LOWER_BASE TMP_DIR "/lower"
+-#define LOWER_DATA LOWER_BASE "/data"
++#define LOWER_BASE TMP_DIR "/lower"
++#define LOWER_DATA LOWER_BASE "/data"
+ static const char lower_fl1[] = LOWER_DATA "/fl1";
+ static const char lower_dl1[] = LOWER_DATA "/dl1";
+ static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
+@@ -2295,23 +2567,23 @@ static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
+ static const char (*lower_base_files[])[] = {
+ &lower_fl1,
+ &lower_fo1,
+- NULL
++ NULL,
+ };
+ static const char (*lower_base_directories[])[] = {
+ &lower_dl1,
+ &lower_do1,
+- NULL
++ NULL,
+ };
+ static const char (*lower_sub_files[])[] = {
+ &lower_dl1_fl2,
+ &lower_do1_fo2,
+ &lower_do1_fl3,
+- NULL
++ NULL,
+ };
+
+-#define UPPER_BASE TMP_DIR "/upper"
+-#define UPPER_DATA UPPER_BASE "/data"
+-#define UPPER_WORK UPPER_BASE "/work"
++#define UPPER_BASE TMP_DIR "/upper"
++#define UPPER_DATA UPPER_BASE "/data"
++#define UPPER_WORK UPPER_BASE "/work"
+ static const char upper_fu1[] = UPPER_DATA "/fu1";
+ static const char upper_du1[] = UPPER_DATA "/du1";
+ static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
+@@ -2323,22 +2595,22 @@ static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
+ static const char (*upper_base_files[])[] = {
+ &upper_fu1,
+ &upper_fo1,
+- NULL
++ NULL,
+ };
+ static const char (*upper_base_directories[])[] = {
+ &upper_du1,
+ &upper_do1,
+- NULL
++ NULL,
+ };
+ static const char (*upper_sub_files[])[] = {
+ &upper_du1_fu2,
+ &upper_do1_fo2,
+ &upper_do1_fu3,
+- NULL
++ NULL,
+ };
+
+-#define MERGE_BASE TMP_DIR "/merge"
+-#define MERGE_DATA MERGE_BASE "/data"
++#define MERGE_BASE TMP_DIR "/merge"
++#define MERGE_DATA MERGE_BASE "/data"
+ static const char merge_fl1[] = MERGE_DATA "/fl1";
+ static const char merge_dl1[] = MERGE_DATA "/dl1";
+ static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
+@@ -2355,21 +2627,17 @@ static const char (*merge_base_files[])[] = {
+ &merge_fl1,
+ &merge_fu1,
+ &merge_fo1,
+- NULL
++ NULL,
+ };
+ static const char (*merge_base_directories[])[] = {
+ &merge_dl1,
+ &merge_du1,
+ &merge_do1,
+- NULL
++ NULL,
+ };
+ static const char (*merge_sub_files[])[] = {
+- &merge_dl1_fl2,
+- &merge_du1_fu2,
+- &merge_do1_fo2,
+- &merge_do1_fl3,
+- &merge_do1_fu3,
+- NULL
++ &merge_dl1_fl2, &merge_du1_fu2, &merge_do1_fo2,
++ &merge_do1_fl3, &merge_do1_fu3, NULL,
+ };
+
+ /*
+@@ -2411,11 +2679,15 @@ static const char (*merge_sub_files[])[] = {
+ * └── work
+ */
+
+-FIXTURE(layout2_overlay) {
+-};
++/* clang-format off */
++FIXTURE(layout2_overlay) {};
++/* clang-format on */
+
+ FIXTURE_SETUP(layout2_overlay)
+ {
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ prepare_layout(_metadata);
+
+ create_directory(_metadata, LOWER_BASE);
+@@ -2444,15 +2716,17 @@ FIXTURE_SETUP(layout2_overlay)
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
+- "lowerdir=" LOWER_DATA
+- ",upperdir=" UPPER_DATA
+- ",workdir=" UPPER_WORK));
++ "lowerdir=" LOWER_DATA ",upperdir=" UPPER_DATA
++ ",workdir=" UPPER_WORK));
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ }
+
+ FIXTURE_TEARDOWN(layout2_overlay)
+ {
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ EXPECT_EQ(0, remove_path(lower_do1_fl3));
+ EXPECT_EQ(0, remove_path(lower_dl1_fl2));
+ EXPECT_EQ(0, remove_path(lower_fl1));
+@@ -2484,6 +2758,9 @@ FIXTURE_TEARDOWN(layout2_overlay)
+
+ TEST_F_FORK(layout2_overlay, no_restriction)
+ {
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY));
+@@ -2513,9 +2790,9 @@ TEST_F_FORK(layout2_overlay, no_restriction)
+ ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
+ }
+
+-#define for_each_path(path_list, path_entry, i) \
+- for (i = 0, path_entry = *path_list[i]; path_list[i]; \
+- path_entry = *path_list[++i])
++#define for_each_path(path_list, path_entry, i) \
++ for (i = 0, path_entry = *path_list[i]; path_list[i]; \
++ path_entry = *path_list[++i])
+
+ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ {
+@@ -2533,7 +2810,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ .path = MERGE_BASE,
+ .access = ACCESS_RW,
+ },
+- {}
++ {},
+ };
+ const struct rule layer2_data[] = {
+ {
+@@ -2548,7 +2825,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ .path = MERGE_DATA,
+ .access = ACCESS_RW,
+ },
+- {}
++ {},
+ };
+ /* Sets access right on directories inside both layers. */
+ const struct rule layer3_subdirs[] = {
+@@ -2580,7 +2857,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ .path = merge_do1,
+ .access = ACCESS_RW,
+ },
+- {}
++ {},
+ };
+ /* Tighten access rights to the files. */
+ const struct rule layer4_files[] = {
+@@ -2611,42 +2888,45 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ {
+ .path = merge_dl1_fl2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_du1_fu2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fo2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fl3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fu3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ const struct rule layer5_merge_only[] = {
+ {
+ .path = MERGE_DATA,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+- LANDLOCK_ACCESS_FS_WRITE_FILE,
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+- {}
++ {},
+ };
+ int ruleset_fd;
+ size_t i;
+ const char *path_entry;
+
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ /* Sets rules on base directories (i.e. outside overlay scope). */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
+ ASSERT_LE(0, ruleset_fd);
+@@ -2659,7 +2939,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ for_each_path(lower_base_directories, path_entry, i) {
+- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ ASSERT_EQ(EACCES,
++ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(lower_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+@@ -2671,7 +2952,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ for_each_path(upper_base_directories, path_entry, i) {
+- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ ASSERT_EQ(EACCES,
++ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(upper_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+@@ -2756,7 +3038,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ ASSERT_EQ(EACCES,
++ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+@@ -2781,7 +3064,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+- ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ ASSERT_EQ(EACCES,
++ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
+index 15fbef9cc8496..55e7871631a19 100644
+--- a/tools/testing/selftests/landlock/ptrace_test.c
++++ b/tools/testing/selftests/landlock/ptrace_test.c
+@@ -19,6 +19,12 @@
+
+ #include "common.h"
+
++/* Copied from security/yama/yama_lsm.c */
++#define YAMA_SCOPE_DISABLED 0
++#define YAMA_SCOPE_RELATIONAL 1
++#define YAMA_SCOPE_CAPABILITY 2
++#define YAMA_SCOPE_NO_ATTACH 3
++
+ static void create_domain(struct __test_metadata *const _metadata)
+ {
+ int ruleset_fd;
+@@ -26,9 +32,10 @@ static void create_domain(struct __test_metadata *const _metadata)
+ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
+ };
+
+- ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+- sizeof(ruleset_attr), 0);
+- EXPECT_LE(0, ruleset_fd) {
++ ruleset_fd =
++ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
++ EXPECT_LE(0, ruleset_fd)
++ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+@@ -43,7 +50,7 @@ static int test_ptrace_read(const pid_t pid)
+ int procenv_path_size, fd;
+
+ procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
+- path_template, pid);
++ path_template, pid);
+ if (procenv_path_size >= sizeof(procenv_path))
+ return E2BIG;
+
+@@ -59,9 +66,31 @@ static int test_ptrace_read(const pid_t pid)
+ return 0;
+ }
+
+-FIXTURE(hierarchy) { };
++static int get_yama_ptrace_scope(void)
++{
++ int ret;
++ char buf[2] = {};
++ const int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY);
++
++ if (fd < 0)
++ return 0;
++
++ if (read(fd, buf, 1) < 0) {
++ close(fd);
++ return -1;
++ }
++
++ ret = atoi(buf);
++ close(fd);
++ return ret;
++}
++
++/* clang-format off */
++FIXTURE(hierarchy) {};
++/* clang-format on */
+
+-FIXTURE_VARIANT(hierarchy) {
++FIXTURE_VARIANT(hierarchy)
++{
+ const bool domain_both;
+ const bool domain_parent;
+ const bool domain_child;
+@@ -83,7 +112,9 @@ FIXTURE_VARIANT(hierarchy) {
+ * \ P2 -> P1 : allow
+ * 'P2
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = false,
+@@ -98,7 +129,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+ * | P2 |
+ * '------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = true,
+@@ -112,7 +145,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+ * '
+ * P2
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = false,
+@@ -127,7 +162,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+ * | P2 |
+ * '------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = true,
+@@ -142,7 +179,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+ * | P2 |
+ * '-------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = false,
+@@ -158,7 +197,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+ * | '------' |
+ * '-----------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = true,
+@@ -174,7 +215,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+ * | P2 |
+ * '-----------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = false,
+@@ -192,17 +235,21 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+ * | '------' |
+ * '-----------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = true,
+ };
+
+ FIXTURE_SETUP(hierarchy)
+-{ }
++{
++}
+
+ FIXTURE_TEARDOWN(hierarchy)
+-{ }
++{
++}
+
+ /* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
+ TEST_F(hierarchy, trace)
+@@ -210,8 +257,51 @@ TEST_F(hierarchy, trace)
+ pid_t child, parent;
+ int status, err_proc_read;
+ int pipe_child[2], pipe_parent[2];
++ int yama_ptrace_scope;
+ char buf_parent;
+ long ret;
++ bool can_read_child, can_trace_child, can_read_parent, can_trace_parent;
++
++ yama_ptrace_scope = get_yama_ptrace_scope();
++ ASSERT_LE(0, yama_ptrace_scope);
++
++ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
++ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
++ yama_ptrace_scope);
++
++ /*
++ * can_read_child is true if a parent process can read its child
++ * process, which is only the case when the parent process is not
++ * isolated from the child with a dedicated Landlock domain.
++ */
++ can_read_child = !variant->domain_parent;
++
++ /*
++ * can_trace_child is true if a parent process can trace its child
++ * process. This depends on two conditions:
++ * - The parent process is not isolated from the child with a dedicated
++ * Landlock domain.
++ * - Yama allows tracing children (up to YAMA_SCOPE_RELATIONAL).
++ */
++ can_trace_child = can_read_child &&
++ yama_ptrace_scope <= YAMA_SCOPE_RELATIONAL;
++
++ /*
++ * can_read_parent is true if a child process can read its parent
++ * process, which is only the case when the child process is not
++ * isolated from the parent with a dedicated Landlock domain.
++ */
++ can_read_parent = !variant->domain_child;
++
++ /*
++ * can_trace_parent is true if a child process can trace its parent
++ * process. This depends on two conditions:
++ * - The child process is not isolated from the parent with a dedicated
++ * Landlock domain.
++ * - Yama is disabled (YAMA_SCOPE_DISABLED).
++ */
++ can_trace_parent = can_read_parent &&
++ yama_ptrace_scope <= YAMA_SCOPE_DISABLED;
+
+ /*
+ * Removes all effective and permitted capabilities to not interfere
+@@ -242,16 +332,21 @@ TEST_F(hierarchy, trace)
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+- /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */
++ /* Tests PTRACE_MODE_READ on the parent. */
+ err_proc_read = test_ptrace_read(parent);
++ if (can_read_parent) {
++ EXPECT_EQ(0, err_proc_read);
++ } else {
++ EXPECT_EQ(EACCES, err_proc_read);
++ }
++
++ /* Tests PTRACE_ATTACH on the parent. */
+ ret = ptrace(PTRACE_ATTACH, parent, NULL, 0);
+- if (variant->domain_child) {
++ if (can_trace_parent) {
++ EXPECT_EQ(0, ret);
++ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+- EXPECT_EQ(EACCES, err_proc_read);
+- } else {
+- EXPECT_EQ(0, ret);
+- EXPECT_EQ(0, err_proc_read);
+ }
+ if (ret == 0) {
+ ASSERT_EQ(parent, waitpid(parent, &status, 0));
+@@ -261,11 +356,11 @@ TEST_F(hierarchy, trace)
+
+ /* Tests child PTRACE_TRACEME. */
+ ret = ptrace(PTRACE_TRACEME);
+- if (variant->domain_parent) {
++ if (can_trace_child) {
++ EXPECT_EQ(0, ret);
++ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+- } else {
+- EXPECT_EQ(0, ret);
+ }
+
+ /*
+@@ -274,7 +369,7 @@ TEST_F(hierarchy, trace)
+ */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+- if (!variant->domain_parent) {
++ if (can_trace_child) {
+ ASSERT_EQ(0, raise(SIGSTOP));
+ }
+
+@@ -299,7 +394,7 @@ TEST_F(hierarchy, trace)
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests child PTRACE_TRACEME. */
+- if (!variant->domain_parent) {
++ if (can_trace_child) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+@@ -309,17 +404,23 @@ TEST_F(hierarchy, trace)
+ EXPECT_EQ(ESRCH, errno);
+ }
+
+- /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */
++ /* Tests PTRACE_MODE_READ on the child. */
+ err_proc_read = test_ptrace_read(child);
++ if (can_read_child) {
++ EXPECT_EQ(0, err_proc_read);
++ } else {
++ EXPECT_EQ(EACCES, err_proc_read);
++ }
++
++ /* Tests PTRACE_ATTACH on the child. */
+ ret = ptrace(PTRACE_ATTACH, child, NULL, 0);
+- if (variant->domain_parent) {
++ if (can_trace_child) {
++ EXPECT_EQ(0, ret);
++ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+- EXPECT_EQ(EACCES, err_proc_read);
+- } else {
+- EXPECT_EQ(0, ret);
+- EXPECT_EQ(0, err_proc_read);
+ }
++
+ if (ret == 0) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+@@ -330,7 +431,7 @@ TEST_F(hierarchy, trace)
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+- WEXITSTATUS(status) != EXIT_SUCCESS)
++ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->passed = 0;
+ }
+
+diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
+index fe7ee2b0f29c2..a3df78b7702c1 100644
+--- a/tools/testing/selftests/lib.mk
++++ b/tools/testing/selftests/lib.mk
+@@ -129,6 +129,11 @@ endef
+ clean:
+ $(CLEAN)
+
++# Enables to extend CFLAGS and LDFLAGS from command line, e.g.
++# make USERCFLAGS=-Werror USERLDFLAGS=-static
++CFLAGS += $(USERCFLAGS)
++LDFLAGS += $(USERLDFLAGS)
++
+ # When make O= with kselftest target from main level
+ # the following aren't defined.
+ #
+diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
+index 38edea25631bc..b642411ceb6c3 100644
+--- a/tools/testing/selftests/lkdtm/config
++++ b/tools/testing/selftests/lkdtm/config
+@@ -6,5 +6,6 @@ CONFIG_HARDENED_USERCOPY=y
+ # CONFIG_HARDENED_USERCOPY_FALLBACK is not set
+ CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
+ CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
++CONFIG_UBSAN=y
+ CONFIG_UBSAN_BOUNDS=y
+ CONFIG_UBSAN_TRAP=y
+diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
+index 1b4d95d575f85..14fedeef762ed 100755
+--- a/tools/testing/selftests/lkdtm/stack-entropy.sh
++++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
+@@ -4,13 +4,27 @@
+ # Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
+ set -e
+ samples="${1:-1000}"
++TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
++KSELFTEST_SKIP_TEST=4
++
++# Verify we have LKDTM available in the kernel.
++if [ ! -r $TRIGGER ] ; then
++ /sbin/modprobe -q lkdtm || true
++ if [ ! -r $TRIGGER ] ; then
++ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)"
++ else
++ echo "Cannot write $TRIGGER (need to run as root?)"
++ fi
++ # Skip this test
++ exit $KSELFTEST_SKIP_TEST
++fi
+
+ # Capture dmesg continuously since it may fill up depending on sample size.
+ log=$(mktemp -t stack-entropy-XXXXXX)
+ dmesg --follow >"$log" & pid=$!
+ report=-1
+ for i in $(seq 1 $samples); do
+- echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT
++ echo "REPORT_STACK" > $TRIGGER
+ if [ -t 1 ]; then
+ percent=$(( 100 * $i / $samples ))
+ if [ "$percent" -ne "$report" ]; then
+diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
+index be675002f918a..93798c8c5d54b 100644
+--- a/tools/testing/selftests/memfd/fuse_test.c
++++ b/tools/testing/selftests/memfd/fuse_test.c
+@@ -22,6 +22,7 @@
+ #include <linux/falloc.h>
+ #include <fcntl.h>
+ #include <linux/memfd.h>
++#include <linux/types.h>
+ #include <sched.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
+index 192a2899bae8f..94df2692e6e4a 100644
+--- a/tools/testing/selftests/memfd/memfd_test.c
++++ b/tools/testing/selftests/memfd/memfd_test.c
+@@ -455,6 +455,7 @@ static void mfd_fail_write(int fd)
+ printf("mmap()+mprotect() didn't fail as expected\n");
+ abort();
+ }
++ munmap(p, mfd_def_size);
+ }
+
+ /* verify PUNCH_HOLE fails */
+diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
+index e54106643337b..4c88238fc8f05 100644
+--- a/tools/testing/selftests/mincore/mincore_selftest.c
++++ b/tools/testing/selftests/mincore/mincore_selftest.c
+@@ -207,15 +207,21 @@ TEST(check_file_mmap)
+
+ errno = 0;
+ fd = open(".", O_TMPFILE | O_RDWR, 0600);
+- ASSERT_NE(-1, fd) {
+- TH_LOG("Can't create temporary file: %s",
+- strerror(errno));
++ if (fd < 0) {
++ ASSERT_EQ(errno, EOPNOTSUPP) {
++ TH_LOG("Can't create temporary file: %s",
++ strerror(errno));
++ }
++ SKIP(goto out_free, "O_TMPFILE not supported by filesystem.");
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+- ASSERT_EQ(0, retval) {
+- TH_LOG("Error allocating space for the temporary file: %s",
+- strerror(errno));
++ if (retval) {
++ ASSERT_EQ(errno, EOPNOTSUPP) {
++ TH_LOG("Error allocating space for the temporary file: %s",
++ strerror(errno));
++ }
++ SKIP(goto out_close, "fallocate not supported by filesystem.");
+ }
+
+ /*
+@@ -271,7 +277,9 @@ TEST(check_file_mmap)
+ }
+
+ munmap(addr, FILE_SIZE);
++out_close:
+ close(fd);
++out_free:
+ free(vec);
+ }
+
+diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+index f31205f04ee05..c6a8c732b8021 100644
+--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
++++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+@@ -18,6 +18,7 @@
+ #include <grp.h>
+ #include <stdbool.h>
+ #include <stdarg.h>
++#include <linux/mount.h>
+
+ #include "../kselftest_harness.h"
+
+@@ -103,13 +104,6 @@
+ #else
+ #define __NR_mount_setattr 442
+ #endif
+-
+-struct mount_attr {
+- __u64 attr_set;
+- __u64 attr_clr;
+- __u64 propagation;
+- __u64 userns_fd;
+-};
+ #endif
+
+ #ifndef __NR_open_tree
+@@ -1236,7 +1230,7 @@ static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long
+ }
+
+ /**
+- * Validate that an attached mount in our mount namespace can be idmapped.
++ * Validate that an attached mount in our mount namespace cannot be idmapped.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+@@ -1259,7 +1253,7 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+- ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
++ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+ }
+diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
+index b019e0b8221c7..84fda3b490735 100644
+--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
++++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
+@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
+ if (in_shutdown++)
+ return;
+
++ /* Free the cpu_set allocated using CPU_ALLOC in main function */
++ CPU_FREE(cpu_set);
++
+ for (i = 0; i < num_cpus_to_pin; i++)
+ if (cpu_threads[i]) {
+ pthread_kill(cpu_threads[i], SIGUSR1);
+@@ -551,6 +554,12 @@ int main(int argc, char *argv[])
+ perror("sysconf(_SC_NPROCESSORS_ONLN)");
+ exit(1);
+ }
++
++ if (getuid() != 0)
++ ksft_exit_skip("Not running as root, but almost all tests "
++ "require root in order to modify\nsystem settings. "
++ "Exiting.\n");
++
+ cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+ cpu_set = CPU_ALLOC(cpus_online);
+ if (cpu_set == NULL) {
+@@ -589,7 +598,7 @@ int main(int argc, char *argv[])
+ cpu_set)) {
+ fprintf(stderr, "Any given CPU may "
+ "only be given once.\n");
+- exit(1);
++ goto err_code;
+ } else
+ CPU_SET_S(cpus_to_pin[cpu],
+ cpu_set_size, cpu_set);
+@@ -607,7 +616,7 @@ int main(int argc, char *argv[])
+ queue_path = malloc(strlen(option) + 2);
+ if (!queue_path) {
+ perror("malloc()");
+- exit(1);
++ goto err_code;
+ }
+ queue_path[0] = '/';
+ queue_path[1] = 0;
+@@ -622,17 +631,12 @@ int main(int argc, char *argv[])
+ fprintf(stderr, "Must pass at least one CPU to continuous "
+ "mode.\n");
+ poptPrintUsage(popt_context, stderr, 0);
+- exit(1);
++ goto err_code;
+ } else if (!continuous_mode) {
+ num_cpus_to_pin = 1;
+ cpus_to_pin[0] = cpus_online - 1;
+ }
+
+- if (getuid() != 0)
+- ksft_exit_skip("Not running as root, but almost all tests "
+- "require root in order to modify\nsystem settings. "
+- "Exiting.\n");
+-
+ max_msgs = fopen(MAX_MSGS, "r+");
+ max_msgsize = fopen(MAX_MSGSIZE, "r+");
+ if (!max_msgs)
+@@ -740,4 +744,9 @@ int main(int argc, char *argv[])
+ sleep(1);
+ }
+ shutdown(0, "", 0);
++
++err_code:
++ CPU_FREE(cpu_set);
++ exit(1);
++
+ }
+diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
+index 492b273743b4e..6a953ec793ced 100644
+--- a/tools/testing/selftests/net/Makefile
++++ b/tools/testing/selftests/net/Makefile
+@@ -12,7 +12,7 @@ TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_a
+ TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
+ TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
+ TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
+-TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
++TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
+ TEST_PROGS += route_localnet.sh
+ TEST_PROGS += reuseaddr_ports_exhausted.sh
+ TEST_PROGS += txtimestamp.sh
+@@ -28,7 +28,12 @@ TEST_PROGS += veth.sh
+ TEST_PROGS += ioam6.sh
+ TEST_PROGS += gro.sh
+ TEST_PROGS += gre_gso.sh
+-TEST_PROGS_EXTENDED := in_netns.sh
++TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
++TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
++TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
++TEST_PROGS += vrf_strict_mode_test.sh
++TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
++TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
+ TEST_GEN_FILES = socket nettest
+ TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
+ TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
+diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
+index 3dece8b292536..532459a15067c 100644
+--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
++++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
+@@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr)
+
+ wait_for_signal(pipefd[0]);
+ if (connect(cfd, (struct sockaddr *)consumer_addr,
+- sizeof(struct sockaddr)) != 0) {
++ sizeof(*consumer_addr)) != 0) {
+ perror("Connect failed");
+ kill(0, SIGTERM);
+ exit(1);
+@@ -218,10 +218,10 @@ main(int argc, char **argv)
+
+ /* Test 1:
+ * veriyf that SIGURG is
+- * delivered and 63 bytes are
+- * read and oob is '@'
++ * delivered, 63 bytes are
++ * read, oob is '@', and POLLPRI works.
+ */
+- wait_for_data(pfd, POLLIN | POLLPRI);
++ wait_for_data(pfd, POLLPRI);
+ read_oob(pfd, &oob);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 63 || oob != '@') {
+diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
+index 2b5d6ff873738..2d84c7a0be6b2 100755
+--- a/tools/testing/selftests/net/devlink_port_split.py
++++ b/tools/testing/selftests/net/devlink_port_split.py
+@@ -59,6 +59,8 @@ class devlink_ports(object):
+ assert stderr == ""
+ ports = json.loads(stdout)['port']
+
++ validate_devlink_output(ports, 'flavour')
++
+ for port in ports:
+ if dev in port:
+ if ports[port]['flavour'] == 'physical':
+@@ -220,6 +222,27 @@ def split_splittable_port(port, k, lanes, dev):
+ unsplit(port.bus_info)
+
+
++def validate_devlink_output(devlink_data, target_property=None):
++ """
++ Determine if test should be skipped by checking:
++ 1. devlink_data contains values
++ 2. The target_property exist in devlink_data
++ """
++ skip_reason = None
++ if any(devlink_data.values()):
++ if target_property:
++ skip_reason = "{} not found in devlink output, test skipped".format(target_property)
++ for key in devlink_data:
++ if target_property in devlink_data[key]:
++ skip_reason = None
++ else:
++ skip_reason = 'devlink output is empty, test skipped'
++
++ if skip_reason:
++ print(skip_reason)
++ sys.exit(KSFT_SKIP)
++
++
+ def make_parser():
+ parser = argparse.ArgumentParser(description='A test for port splitting.')
+ parser.add_argument('--dev',
+@@ -240,12 +263,9 @@ def main(cmdline=None):
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
++ validate_devlink_output(json.loads(stdout))
+ devs = json.loads(stdout)['dev']
+- if devs:
+- dev = list(devs.keys())[0]
+- else:
+- print("no devlink device was found, test skipped")
+- sys.exit(KSFT_SKIP)
++ dev = list(devs.keys())[0]
+
+ cmd = "devlink dev show %s" % dev
+ stdout, stderr = run_command(cmd)
+@@ -255,6 +275,7 @@ def main(cmdline=None):
+
+ ports = devlink_ports(dev)
+
++ found_max_lanes = False
+ for port in ports.if_names:
+ max_lanes = get_max_lanes(port.name)
+
+@@ -277,6 +298,11 @@ def main(cmdline=None):
+ split_splittable_port(port, lane, max_lanes, dev)
+
+ lane //= 2
++ found_max_lanes = True
++
++ if not found_max_lanes:
++ print(f"Test not started, no port of device {dev} reports max_lanes")
++ sys.exit(KSFT_SKIP)
+
+
+ if __name__ == "__main__":
+diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
+index 3313566ce9062..6ecdbbe1b54fb 100755
+--- a/tools/testing/selftests/net/fcnal-test.sh
++++ b/tools/testing/selftests/net/fcnal-test.sh
+@@ -84,6 +84,13 @@ NSC_CMD="ip netns exec ${NSC}"
+
+ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
++# Check if FIPS mode is enabled
++if [ -f /proc/sys/crypto/fips_enabled ]; then
++ fips_enabled=`cat /proc/sys/crypto/fips_enabled`
++else
++ fips_enabled=0
++fi
++
+ ################################################################################
+ # utilities
+
+@@ -455,6 +462,22 @@ cleanup()
+ ip netns del ${NSC} >/dev/null 2>&1
+ }
+
++cleanup_vrf_dup()
++{
++ ip link del ${NSA_DEV2} >/dev/null 2>&1
++ ip netns pids ${NSC} | xargs kill 2>/dev/null
++ ip netns del ${NSC} >/dev/null 2>&1
++}
++
++setup_vrf_dup()
++{
++ # some VRF tests use ns-C which has the same config as
++ # ns-B but for a device NOT in the VRF
++ create_ns ${NSC} "-" "-"
++ connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
++ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
++}
++
+ setup()
+ {
+ local with_vrf=${1}
+@@ -484,12 +507,6 @@ setup()
+
+ ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV}
+ ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV}
+-
+- # some VRF tests use ns-C which has the same config as
+- # ns-B but for a device NOT in the VRF
+- create_ns ${NSC} "-" "-"
+- connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
+- ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
+ else
+ ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV}
+ ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV}
+@@ -793,10 +810,16 @@ ipv4_ping()
+ setup
+ set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
+ ipv4_ping_novrf
++ setup
++ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
++ ipv4_ping_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv4_ping_vrf
++ setup "yes"
++ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
++ ipv4_ping_vrf
+ }
+
+ ################################################################################
+@@ -1186,7 +1209,7 @@ ipv4_tcp_novrf()
+ run_cmd nettest -d ${NSA_DEV} -r ${a}
+ log_test_addr ${a} $? 1 "No server, device client, local conn"
+
+- ipv4_tcp_md5_novrf
++ [ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf
+ }
+
+ ipv4_tcp_vrf()
+@@ -1240,7 +1263,11 @@ ipv4_tcp_vrf()
+ log_test_addr ${a} $? 1 "Global server, local connection"
+
+ # run MD5 tests
+- ipv4_tcp_md5
++ if [ "$fips_enabled" = "0" ]; then
++ setup_vrf_dup
++ ipv4_tcp_md5
++ cleanup_vrf_dup
++ fi
+
+ #
+ # enable VRF global server
+@@ -1798,8 +1825,9 @@ ipv4_addr_bind_vrf()
+ for a in ${NSA_IP} ${VRF_IP}
+ do
+ log_start
++ show_hint "Socket not bound to VRF, but address is in VRF"
+ run_cmd nettest -s -R -P icmp -l ${a} -b
+- log_test_addr ${a} $? 0 "Raw socket bind to local address"
++ log_test_addr ${a} $? 1 "Raw socket bind to local address"
+
+ log_start
+ run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b
+@@ -2191,7 +2219,7 @@ ipv6_ping_vrf()
+ log_start
+ show_hint "Fails since VRF device does not support linklocal or multicast"
+ run_cmd ${ping6} -c1 -w1 ${a}
+- log_test_addr ${a} $? 2 "ping out, VRF bind"
++ log_test_addr ${a} $? 1 "ping out, VRF bind"
+ done
+
+ for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
+@@ -2311,10 +2339,16 @@ ipv6_ping()
+ log_subsection "No VRF"
+ setup
+ ipv6_ping_novrf
++ setup
++ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
++ ipv6_ping_novrf
+
+ log_subsection "With VRF"
+ setup "yes"
+ ipv6_ping_vrf
++ setup "yes"
++ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
++ ipv6_ping_vrf
+ }
+
+ ################################################################################
+@@ -2649,7 +2683,7 @@ ipv6_tcp_novrf()
+ log_test_addr ${a} $? 1 "No server, device client, local conn"
+ done
+
+- ipv6_tcp_md5_novrf
++ [ "$fips_enabled" = "1" ] || ipv6_tcp_md5_novrf
+ }
+
+ ipv6_tcp_vrf()
+@@ -2719,7 +2753,11 @@ ipv6_tcp_vrf()
+ log_test_addr ${a} $? 1 "Global server, local connection"
+
+ # run MD5 tests
+- ipv6_tcp_md5
++ if [ "$fips_enabled" = "0" ]; then
++ setup_vrf_dup
++ ipv6_tcp_md5
++ cleanup_vrf_dup
++ fi
+
+ #
+ # enable VRF global server
+@@ -3414,11 +3452,14 @@ ipv6_addr_bind_novrf()
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
+
++ # Sadly, the kernel allows binding a socket to a device and then
++ # binding to an address not on the device. So this test passes
++ # when it really should not
+ a=${NSA_LO_IP6}
+ log_start
+- show_hint "Should fail with 'Cannot assign requested address'"
++ show_hint "Tecnically should fail since address is not on device but kernel allows"
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+- log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
++ log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
+ }
+
+ ipv6_addr_bind_vrf()
+@@ -3459,10 +3500,15 @@ ipv6_addr_bind_vrf()
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind"
+
++ # Sadly, the kernel allows binding a socket to a device and then
++ # binding to an address not on the device. The only restriction
++ # is that the address is valid in the L3 domain. So this test
++ # passes when it really should not
+ a=${VRF_IP6}
+ log_start
++ show_hint "Tecnically should fail since address is not on device but kernel allows"
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+- log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind"
++ log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
+
+ a=${NSA_LO_IP6}
+ log_start
+@@ -4002,8 +4048,8 @@ EOF
+ ################################################################################
+ # main
+
+-TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter"
+-TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter"
++TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
++TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
+ TESTS_OTHER="use_cases"
+
+ PAUSE_ON_FAIL=no
+@@ -4037,10 +4083,13 @@ elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+ fi
+
+-which nettest >/dev/null
+-if [ $? -ne 0 ]; then
+- echo "'nettest' command not found; skipping tests"
+- exit $ksft_skip
++# nettest can be run from PATH or from same directory as this selftest
++if ! which nettest >/dev/null; then
++ PATH=$PWD:$PATH
++ if ! which nettest >/dev/null; then
++ echo "'nettest' command not found; skipping tests"
++ exit $ksft_skip
++ fi
+ fi
+
+ declare -i nfail=0
+diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
+index 0d293391e9a44..a194dbcb405ae 100755
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -1145,6 +1145,36 @@ ipv4_fcnal()
+ set +e
+ check_nexthop "dev veth1" ""
+ log_test $? 0 "Nexthops removed on admin down"
++
++ # nexthop route delete warning: route add with nhid and delete
++ # using device
++ run_cmd "$IP li set dev veth1 up"
++ run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1"
++ out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
++ run_cmd "$IP route add 172.16.101.1/32 nhid 12"
++ run_cmd "$IP route delete 172.16.101.1/32 dev veth1"
++ out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
++ [ $out1 -eq $out2 ]
++ rc=$?
++ log_test $rc 0 "Delete nexthop route warning"
++ run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
++ run_cmd "$IP nexthop del id 12"
++
++ run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1"
++ run_cmd "$IP ro add 172.16.101.0/24 nhid 21"
++ run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1"
++ log_test $? 2 "Delete multipath route with only nh id based entry"
++
++ run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1"
++ run_cmd "$IP ro add 172.16.102.0/24 nhid 22"
++ run_cmd "$IP ro del 172.16.102.0/24 dev veth1"
++ log_test $? 2 "Delete route when specifying only nexthop device"
++
++ run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6"
++ log_test $? 2 "Delete route when specifying only gateway"
++
++ run_cmd "$IP ro del 172.16.102.0/24"
++ log_test $? 0 "Delete route when not specifying nexthop attributes"
+ }
+
+ ipv4_grp_fcnal()
+@@ -1887,6 +1917,11 @@ basic()
+
+ run_cmd "$IP link set dev lo up"
+
++ # Dump should not loop endlessly when maximum nexthop ID is configured.
++ run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
++ run_cmd "timeout 5 $IP nexthop"
++ log_test $? 0 "Maximum nexthop ID dump"
++
+ #
+ # groups
+ #
+@@ -2078,6 +2113,7 @@ basic_res()
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets in a group"
+
++ sleep 0.1
+ (( $($IP -j nexthop bucket list id 101 |
+ jq '[.[] | select(.bucket.idle_time > 0 and
+ .bucket.idle_time < 2)] | length') == 4 ))
+@@ -2106,6 +2142,11 @@ basic_res()
+ run_cmd "$IP nexthop bucket list fdb"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
++ # Dump should not loop endlessly when maximum nexthop ID is configured.
++ run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
++ run_cmd "timeout 5 $IP nexthop bucket"
++ log_test $? 0 "Maximum nexthop ID dump"
++
+ #
+ # resilient nexthop buckets get requests
+ #
+diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
+index 5abe92d55b696..6b7c255da5c21 100755
+--- a/tools/testing/selftests/net/fib_tests.sh
++++ b/tools/testing/selftests/net/fib_tests.sh
+@@ -68,7 +68,7 @@ setup()
+ cleanup()
+ {
+ $IP link del dev dummy0 &> /dev/null
+- ip netns del ns1
++ ip netns del ns1 &> /dev/null
+ ip netns del ns2 &> /dev/null
+ }
+
+@@ -444,24 +444,63 @@ fib_rp_filter_test()
+ setup
+
+ set -e
++ ip netns add ns2
++ ip netns set ns2 auto
++
++ ip -netns ns2 link set dev lo up
++
++ $IP link add name veth1 type veth peer name veth2
++ $IP link set dev veth2 netns ns2
++ $IP address add 192.0.2.1/24 dev veth1
++ ip -netns ns2 address add 192.0.2.1/24 dev veth2
++ $IP link set dev veth1 up
++ ip -netns ns2 link set dev veth2 up
++
+ $IP link set dev lo address 52:54:00:6a:c7:5e
+- $IP link set dummy0 address 52:54:00:6a:c7:5e
+- $IP link add dummy1 type dummy
+- $IP link set dummy1 address 52:54:00:6a:c7:5e
+- $IP link set dev dummy1 up
++ $IP link set dev veth1 address 52:54:00:6a:c7:5e
++ ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e
++ ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e
++
++ # 1. (ns2) redirect lo's egress to veth2's egress
++ ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel
++ ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \
++ action mirred egress redirect dev veth2
++ ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \
++ action mirred egress redirect dev veth2
++
++ # 2. (ns1) redirect veth1's ingress to lo's ingress
++ $NS_EXEC tc qdisc add dev veth1 ingress
++ $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \
++ action mirred ingress redirect dev lo
++ $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \
++ action mirred ingress redirect dev lo
++
++ # 3. (ns1) redirect lo's egress to veth1's egress
++ $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel
++ $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \
++ action mirred egress redirect dev veth1
++ $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \
++ action mirred egress redirect dev veth1
++
++ # 4. (ns2) redirect veth2's ingress to lo's ingress
++ ip netns exec ns2 tc qdisc add dev veth2 ingress
++ ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \
++ action mirred ingress redirect dev lo
++ ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \
++ action mirred ingress redirect dev lo
++
+ $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1
+ $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1
+ $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1
+-
+- $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel
+- $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo
+- $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo
++ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1
++ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1
++ ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1
+ set +e
+
+- run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1"
++ run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1"
+ log_test $? 0 "rp_filter passes local packets"
+
+- run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1"
++ run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1"
+ log_test $? 0 "rp_filter passes loopback packets"
+
+ cleanup
+@@ -1583,13 +1622,21 @@ ipv4_del_addr_test()
+
+ $IP addr add dev dummy1 172.16.104.1/24
+ $IP addr add dev dummy1 172.16.104.11/24
++ $IP addr add dev dummy1 172.16.104.12/24
++ $IP addr add dev dummy1 172.16.104.13/24
+ $IP addr add dev dummy2 172.16.104.1/24
+ $IP addr add dev dummy2 172.16.104.11/24
++ $IP addr add dev dummy2 172.16.104.12/24
+ $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
++ $IP route add 172.16.106.0/24 dev lo src 172.16.104.12
++ $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13
+ $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
++ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
+ set +e
+
+ # removing address from device in vrf should only remove route from vrf table
++ echo " Regular FIB info"
++
+ $IP addr del dev dummy2 172.16.104.11/24
+ $IP ro ls vrf red | grep -q 172.16.105.0/24
+ log_test $? 1 "Route removed from VRF when source address deleted"
+@@ -1607,6 +1654,35 @@ ipv4_del_addr_test()
+ $IP ro ls vrf red | grep -q 172.16.105.0/24
+ log_test $? 0 "Route in VRF is not removed by address delete"
+
++ # removing address from device in vrf should only remove route from vrf
++ # table even when the associated fib info only differs in table ID
++ echo " Identical FIB info with different table ID"
++
++ $IP addr del dev dummy2 172.16.104.12/24
++ $IP ro ls vrf red | grep -q 172.16.106.0/24
++ log_test $? 1 "Route removed from VRF when source address deleted"
++
++ $IP ro ls | grep -q 172.16.106.0/24
++ log_test $? 0 "Route in default VRF not removed"
++
++ $IP addr add dev dummy2 172.16.104.12/24
++ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
++
++ $IP addr del dev dummy1 172.16.104.12/24
++ $IP ro ls | grep -q 172.16.106.0/24
++ log_test $? 1 "Route removed in default VRF when source address deleted"
++
++ $IP ro ls vrf red | grep -q 172.16.106.0/24
++ log_test $? 0 "Route in VRF is not removed by address delete"
++
++ # removing address from device in default vrf should remove route from
++ # the default vrf even when route was inserted with a table ID of 0.
++ echo " Table ID 0"
++
++ $IP addr del dev dummy1 172.16.104.13/24
++ $IP ro ls | grep -q 172.16.107.0/24
++ log_test $? 1 "Route removed in default VRF when source address deleted"
++
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+@@ -1845,6 +1921,8 @@ EOF
+ ################################################################################
+ # main
+
++trap cleanup EXIT
++
+ while getopts :t:pPhv o
+ do
+ case $o in
+diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+index 675eff45b0371..1162836f8f329 100755
+--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
++++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+@@ -482,10 +482,15 @@ v3exc_timeout_test()
+ local X=("192.0.2.20" "192.0.2.30")
+
+ # GMI should be 3 seconds
+- ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
++ ip link set dev br0 type bridge mcast_query_interval 100 \
++ mcast_query_response_interval 100 \
++ mcast_membership_interval 300
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+- ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
++ ip link set dev br0 type bridge mcast_query_interval 500 \
++ mcast_query_response_interval 500 \
++ mcast_membership_interval 1500
++
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+ sleep 3
+ bridge -j -d -s mdb show dev br0 \
+@@ -517,7 +522,8 @@ v3exc_timeout_test()
+ log_test "IGMPv3 group $TEST_GROUP exclude timeout"
+
+ ip link set dev br0 type bridge mcast_query_interval 12500 \
+- mcast_query_response_interval 1000
++ mcast_query_response_interval 1000 \
++ mcast_membership_interval 26000
+
+ v3cleanup $swp1 $TEST_GROUP
+ }
+diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
+index ffdcfa87ca2ba..e2b9ff773c6b6 100755
+--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
++++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
+@@ -479,10 +479,15 @@ mldv2exc_timeout_test()
+ local X=("2001:db8:1::20" "2001:db8:1::30")
+
+ # GMI should be 3 seconds
+- ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
++ ip link set dev br0 type bridge mcast_query_interval 100 \
++ mcast_query_response_interval 100 \
++ mcast_membership_interval 300
+
+ mldv2exclude_prepare $h1
+- ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
++ ip link set dev br0 type bridge mcast_query_interval 500 \
++ mcast_query_response_interval 500 \
++ mcast_membership_interval 1500
++
+ $MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+ sleep 3
+ bridge -j -d -s mdb show dev br0 \
+@@ -514,7 +519,8 @@ mldv2exc_timeout_test()
+ log_test "MLDv2 group $TEST_GROUP exclude timeout"
+
+ ip link set dev br0 type bridge mcast_query_interval 12500 \
+- mcast_query_response_interval 1000
++ mcast_query_response_interval 1000 \
++ mcast_membership_interval 26000
+
+ mldv2cleanup $swp1
+ }
+diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+index a15d21dc035a6..56eb83d1a3bdd 100755
+--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
++++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+@@ -181,37 +181,43 @@ ping_ipv6()
+
+ send_src_ipv4()
+ {
+- $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_dst_ipv4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_src_udp4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+ }
+
+ send_dst_udp4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+ }
+
+ send_src_ipv6()
+ {
+- $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_dst_ipv6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+@@ -226,13 +232,15 @@ send_flowlabel()
+
+ send_src_udp6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+ }
+
+ send_dst_udp6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+ }
+
+diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
+index dbb9fcf759e0f..aa2eafb7b2437 100755
+--- a/tools/testing/selftests/net/forwarding/ethtool.sh
++++ b/tools/testing/selftests/net/forwarding/ethtool.sh
+@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
+ ethtool -s $h1 autoneg on
+ }
+
++skip_on_veth
++
+ trap cleanup EXIT
+
+ setup_prepare
+diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+index 4b42dfd4efd1a..baf831da5366c 100755
+--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
++++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+@@ -95,6 +95,8 @@ no_cable()
+ ip link set dev $swp3 down
+ }
+
++skip_on_veth
++
+ setup_prepare
+
+ tests_run
+diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
+index e5e2fbeca22ec..e51def39fd801 100644
+--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
++++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
+@@ -13,6 +13,8 @@ NETIFS[p5]=veth4
+ NETIFS[p6]=veth5
+ NETIFS[p7]=veth6
+ NETIFS[p8]=veth7
++NETIFS[p9]=veth8
++NETIFS[p10]=veth9
+
+ # Port that does not have a cable connected.
+ NETIF_NO_CABLE=eth8
+diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+index a73f52efcb6cf..0446db9c6f748 100755
+--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
++++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+@@ -276,37 +276,43 @@ ping_ipv6()
+
+ send_src_ipv4()
+ {
+- $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_dst_ipv4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_src_udp4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+ }
+
+ send_dst_udp4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+ }
+
+ send_src_ipv6()
+ {
+- $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_dst_ipv6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+@@ -321,13 +327,15 @@ send_flowlabel()
+
+ send_src_udp6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+ }
+
+ send_dst_udp6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+ }
+
+diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+index 8fea2c2e0b25d..d40183b4eccc8 100755
+--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
++++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+@@ -278,37 +278,43 @@ ping_ipv6()
+
+ send_src_ipv4()
+ {
+- $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_dst_ipv4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_src_udp4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+ }
+
+ send_dst_udp4()
+ {
+- $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
++ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
++ -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+ }
+
+ send_src_ipv6()
+ {
+- $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+ send_dst_ipv6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+ }
+
+@@ -323,13 +329,15 @@ send_flowlabel()
+
+ send_src_udp6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+ }
+
+ send_dst_udp6()
+ {
+- $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
++ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
++ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+ }
+
+diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
+index 92087d423bcf1..83e8f9466d627 100644
+--- a/tools/testing/selftests/net/forwarding/lib.sh
++++ b/tools/testing/selftests/net/forwarding/lib.sh
+@@ -122,6 +122,17 @@ check_ethtool_lanes_support()
+ fi
+ }
+
++skip_on_veth()
++{
++ local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
++ jq -r '.[].linkinfo.info_kind')
++
++ if [[ $kind == veth ]]; then
++ echo "SKIP: Test cannot be run with veth pairs"
++ exit $ksft_skip
++ fi
++}
++
+ if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit $ksft_skip
+@@ -174,6 +185,11 @@ create_netif_veth()
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ local j=$((i+1))
+
++ if [ -z ${NETIFS[p$i]} ]; then
++ echo "SKIP: Cannot create interface. Name not specified"
++ exit $ksft_skip
++ fi
++
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ ip link add ${NETIFS[p$i]} type veth \
+@@ -817,14 +833,14 @@ sysctl_set()
+ local value=$1; shift
+
+ SYSCTL_ORIG[$key]=$(sysctl -n $key)
+- sysctl -qw $key=$value
++ sysctl -qw $key="$value"
+ }
+
+ sysctl_restore()
+ {
+ local key=$1; shift
+
+- sysctl -qw $key=${SYSCTL_ORIG["$key"]}
++ sysctl -qw $key="${SYSCTL_ORIG[$key]}"
+ }
+
+ forwarding_enable()
+@@ -1149,6 +1165,7 @@ learning_test()
+ # FDB entry was installed.
+ bridge link set dev $br_port1 flood off
+
++ ip link set $host1_if promisc on
+ tc qdisc add dev $host1_if ingress
+ tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+@@ -1159,7 +1176,7 @@ learning_test()
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+- check_fail $? "Packet reached second host when should not"
++ check_fail $? "Packet reached first host when should not"
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+@@ -1198,6 +1215,7 @@ learning_test()
+
+ tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host1_if ingress
++ ip link set $host1_if promisc off
+
+ bridge link set dev $br_port1 flood on
+
+@@ -1215,6 +1233,7 @@ flood_test_do()
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was flooded to it or not.
++ ip link set $host2_if promisc on
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+@@ -1232,6 +1251,7 @@ flood_test_do()
+
+ tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
++ ip link set $host2_if promisc off
+
+ return $err
+ }
+diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+index c5095da7f6bf8..aec752a22e9ec 100755
+--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
++++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+@@ -93,12 +93,16 @@ cleanup()
+
+ test_gretap()
+ {
++ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
++ nud permanent dev br2
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+ }
+
+ test_ip6gretap()
+ {
++ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
++ nud permanent dev br2
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+ }
+diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+index a3402cd8d5b68..0cf4c47a46f9b 100755
+--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
++++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+@@ -61,9 +61,12 @@ setup_prepare()
+
+ vrf_prepare
+ mirror_gre_topo_create
++ # Avoid changing br1's PVID while it is operational as a L3 interface.
++ ip link set dev br1 down
+
+ ip link set dev $swp3 master br1
+ bridge vlan add dev br1 vid 555 pvid untagged self
++ ip link set dev br1 up
+ ip address add dev br1 192.0.2.129/28
+ ip address add dev br1 2001:db8:2::1/64
+
+@@ -87,12 +90,16 @@ cleanup()
+
+ test_gretap()
+ {
++ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
++ nud permanent dev br1
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+ }
+
+ test_ip6gretap()
+ {
++ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
++ nud permanent dev br1
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+ }
+diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+index 472bd023e2a5f..b501b366367f7 100755
+--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
++++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+@@ -72,7 +72,8 @@ test_span_gre_ttl()
+
+ RET=0
+
+- mirror_install $swp1 ingress $tundev "matchall $tcflags"
++ mirror_install $swp1 ingress $tundev \
++ "prot ip flower $tcflags ip_prot icmp"
+ tc filter add dev $h3 ingress pref 77 prot $prot \
+ flower ip_ttl 50 action pass
+
+diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
+index e714bae473fb4..81f31179ac887 100755
+--- a/tools/testing/selftests/net/forwarding/sch_red.sh
++++ b/tools/testing/selftests/net/forwarding/sch_red.sh
+@@ -1,3 +1,4 @@
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
+ # This test sends one stream of traffic from H1 through a TBF shaper, to a RED
+diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings
+new file mode 100644
+index 0000000000000..e7b9417537fbc
+--- /dev/null
++++ b/tools/testing/selftests/net/forwarding/settings
+@@ -0,0 +1 @@
++timeout=0
+diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
+index d9eca227136bb..dd02ed4cacacb 100755
+--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
++++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
+@@ -3,11 +3,13 @@
+
+ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
+- gact_trap_test"
++ gact_trap_test mirred_egress_to_ingress_tcp_test"
+ NUM_NETIFS=4
+ source tc_common.sh
+ source lib.sh
+
++require_command ncat
++
+ tcflags="skip_hw"
+
+ h1_create()
+@@ -153,6 +155,52 @@ gact_trap_test()
+ log_test "trap ($tcflags)"
+ }
+
++mirred_egress_to_ingress_tcp_test()
++{
++ mirred_e2i_tf1=$(mktemp) mirred_e2i_tf2=$(mktemp)
++
++ RET=0
++ dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$mirred_e2i_tf1
++ tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
++ $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
++ action ct commit nat src addr 192.0.2.2 pipe \
++ action ct clear pipe \
++ action ct commit nat dst addr 192.0.2.1 pipe \
++ action ct clear pipe \
++ action skbedit ptype host pipe \
++ action mirred ingress redirect dev $h1
++ tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
++ $tcflags ip_proto icmp \
++ action mirred ingress redirect dev $h1
++ tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
++ ip_proto icmp \
++ action drop
++
++ ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
++ local rpid=$!
++ ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
++ wait -n $rpid
++ cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
++ check_err $? "server output check failed"
++
++ $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
++ -t icmp "ping,id=42,seq=5" -q
++ tc_check_packets "dev $h1 egress" 101 10
++ check_err $? "didn't mirred redirect ICMP"
++ tc_check_packets "dev $h1 ingress" 102 10
++ check_err $? "didn't drop mirred ICMP"
++ local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
++ test ${overlimits} = 10
++ check_err $? "wrong overlimits, expected 10 got ${overlimits}"
++
++ tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
++ tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
++ tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
++
++ rm -f $mirred_e2i_tf1 $mirred_e2i_tf2
++ log_test "mirred_egress_to_ingress_tcp ($tcflags)"
++}
++
+ setup_prepare()
+ {
+ h1=${NETIFS[p1]}
+@@ -178,6 +226,8 @@ setup_prepare()
+
+ cleanup()
+ {
++ local tf
++
+ pre_cleanup
+
+ switch_destroy
+@@ -188,6 +238,8 @@ cleanup()
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
++
++ for tf in $mirred_e2i_tf1 $mirred_e2i_tf2; do rm -f $tf; done
+ }
+
+ mirred_egress_redirect_test()
+diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
+index 683711f41aa9b..b1daad19b01ec 100755
+--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
++++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
+@@ -52,8 +52,8 @@ match_dst_mac_test()
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+- tc_check_packets "dev $h2 ingress" 102 1
+- check_err $? "Did not match on correct filter"
++ tc_check_packets "dev $h2 ingress" 102 0
++ check_fail $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+@@ -78,8 +78,8 @@ match_src_mac_test()
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+- tc_check_packets "dev $h2 ingress" 102 1
+- check_err $? "Did not match on correct filter"
++ tc_check_packets "dev $h2 ingress" 102 0
++ check_fail $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
+index 4f9f17cb45d64..0a51eef21b9ef 100755
+--- a/tools/testing/selftests/net/forwarding/tc_police.sh
++++ b/tools/testing/selftests/net/forwarding/tc_police.sh
+@@ -37,6 +37,8 @@ ALL_TESTS="
+ police_tx_mirror_test
+ police_pps_rx_test
+ police_pps_tx_test
++ police_mtu_rx_test
++ police_mtu_tx_test
+ "
+ NUM_NETIFS=6
+ source tc_common.sh
+@@ -346,6 +348,56 @@ police_pps_tx_test()
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ }
+
++police_mtu_common_test() {
++ RET=0
++
++ local test_name=$1; shift
++ local dev=$1; shift
++ local direction=$1; shift
++
++ tc filter add dev $dev $direction protocol ip pref 1 handle 101 flower \
++ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
++ action police mtu 1042 conform-exceed drop/ok
++
++ # to count "conform" packets
++ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
++ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
++ action drop
++
++ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
++ -t udp sp=12345,dp=54321 -p 1001 -c 10 -q
++
++ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
++ -t udp sp=12345,dp=54321 -p 1000 -c 3 -q
++
++ tc_check_packets "dev $dev $direction" 101 13
++ check_err $? "wrong packet counter"
++
++ # "exceed" packets
++ local overlimits_t0=$(tc_rule_stats_get ${dev} 1 ${direction} .overlimits)
++ test ${overlimits_t0} = 10
++ check_err $? "wrong overlimits, expected 10 got ${overlimits_t0}"
++
++ # "conform" packets
++ tc_check_packets "dev $h2 ingress" 101 3
++ check_err $? "forwarding error"
++
++ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
++ tc filter del dev $dev $direction protocol ip pref 1 handle 101 flower
++
++ log_test "$test_name"
++}
++
++police_mtu_rx_test()
++{
++ police_mtu_common_test "police mtu (rx)" $rp1 ingress
++}
++
++police_mtu_tx_test()
++{
++ police_mtu_common_test "police mtu (tx)" $rp2 egress
++}
++
+ setup_prepare()
+ {
+ h1=${NETIFS[p1]}
+diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
+index facbb0c804439..3224651db97b8 100755
+--- a/tools/testing/selftests/net/gre_gso.sh
++++ b/tools/testing/selftests/net/gre_gso.sh
+@@ -116,17 +116,20 @@ gre_gst_test_checks()
+ {
+ local name=$1
+ local addr=$2
++ local proto=$3
+
+- $NS_EXEC nc -kl $port >/dev/null &
++ [ "$proto" == 6 ] && addr="[$addr]"
++
++ $NS_EXEC socat - tcp${proto}-listen:$port,reuseaddr,fork >/dev/null &
+ PID=$!
+ while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
+
+- cat $TMPFILE | timeout 1 nc $addr $port
++ cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
+ log_test $? 0 "$name - copy file w/ TSO"
+
+ ethtool -K veth0 tso off
+
+- cat $TMPFILE | timeout 1 nc $addr $port
++ cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
+ log_test $? 0 "$name - copy file w/ GSO"
+
+ ethtool -K veth0 tso on
+@@ -154,8 +157,8 @@ gre6_gso_test()
+
+ sleep 2
+
+- gre_gst_test_checks GREv6/v4 172.16.2.2
+- gre_gst_test_checks GREv6/v6 2001:db8:1::2
++ gre_gst_test_checks GREv6/v4 172.16.2.2 4
++ gre_gst_test_checks GREv6/v6 2001:db8:1::2 6
+
+ cleanup
+ }
+@@ -211,8 +214,8 @@ if [ ! -x "$(command -v ip)" ]; then
+ exit $ksft_skip
+ fi
+
+-if [ ! -x "$(command -v nc)" ]; then
+- echo "SKIP: Could not run test without nc tool"
++if [ ! -x "$(command -v socat)" ]; then
++ echo "SKIP: Could not run test without socat tool"
+ exit $ksft_skip
+ fi
+
+diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
+index ecbf57f264ed9..7b9d6e31b8e7d 100755
+--- a/tools/testing/selftests/net/icmp_redirect.sh
++++ b/tools/testing/selftests/net/icmp_redirect.sh
+@@ -311,7 +311,7 @@ check_exception()
+ ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+ grep -E -v 'mtu|redirected' | grep -q "cache"
+ fi
+- log_test $? 0 "IPv4: ${desc}"
++ log_test $? 0 "IPv4: ${desc}" 0
+
+ # No PMTU info for test "redirect" and "mtu exception plus redirect"
+ if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then
+diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
+index f1464f09b080f..1db5f507d983e 100644
+--- a/tools/testing/selftests/net/mptcp/Makefile
++++ b/tools/testing/selftests/net/mptcp/Makefile
+@@ -10,7 +10,7 @@ TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+
+ TEST_GEN_FILES = mptcp_connect pm_nl_ctl
+
+-TEST_FILES := settings
++TEST_FILES := mptcp_lib.sh settings
+
+ EXTRA_CLEAN := *.pcap
+
+diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
+index 0faaccd21447e..3f9e540fee570 100644
+--- a/tools/testing/selftests/net/mptcp/config
++++ b/tools/testing/selftests/net/mptcp/config
+@@ -1,3 +1,4 @@
++CONFIG_KALLSYMS=y
+ CONFIG_MPTCP=y
+ CONFIG_IPV6=y
+ CONFIG_MPTCP_IPV6=y
+@@ -5,6 +6,7 @@ CONFIG_INET_DIAG=m
+ CONFIG_INET_MPTCP_DIAG=m
+ CONFIG_VETH=y
+ CONFIG_NET_SCH_NETEM=m
++CONFIG_SYN_COOKIES=y
+ CONFIG_NETFILTER=y
+ CONFIG_NETFILTER_ADVANCED=y
+ CONFIG_NETFILTER_NETLINK=m
+diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
+index 2674ba20d5249..57a681107f735 100755
+--- a/tools/testing/selftests/net/mptcp/diag.sh
++++ b/tools/testing/selftests/net/mptcp/diag.sh
+@@ -1,6 +1,8 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
++. "$(dirname "${0}")/mptcp_lib.sh"
++
+ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ ns="ns1-$rndh"
+ ksft_skip=4
+@@ -25,6 +27,8 @@ cleanup()
+ ip netns del $ns
+ }
+
++mptcp_lib_check_mptcp
++
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+@@ -61,6 +65,39 @@ chk_msk_nr()
+ __chk_nr "grep -c token:" $*
+ }
+
++wait_msk_nr()
++{
++ local condition="grep -c token:"
++ local expected=$1
++ local timeout=20
++ local msg nr
++ local max=0
++ local i=0
++
++ shift 1
++ msg=$*
++
++ while [ $i -lt $timeout ]; do
++ nr=$(ss -inmHMN $ns | $condition)
++ [ $nr == $expected ] && break;
++ [ $nr -gt $max ] && max=$nr
++ i=$((i + 1))
++ sleep 1
++ done
++
++ printf "%-50s" "$msg"
++ if [ $i -ge $timeout ]; then
++ echo "[ fail ] timeout while expecting $expected max $max last $nr"
++ ret=$test_cnt
++ elif [ $nr != $expected ]; then
++ echo "[ fail ] expected $expected found $nr"
++ ret=$test_cnt
++ else
++ echo "[ ok ]"
++ fi
++ test_cnt=$((test_cnt+1))
++}
++
+ chk_msk_fallback_nr()
+ {
+ __chk_nr "grep -c fallback" $*
+@@ -71,6 +108,36 @@ chk_msk_remote_key_nr()
+ __chk_nr "grep -c remote_key" $*
+ }
+
++# $1: ns, $2: port
++wait_local_port_listen()
++{
++ local listener_ns="${1}"
++ local port="${2}"
++
++ local port_hex i
++
++ port_hex="$(printf "%04X" "${port}")"
++ for i in $(seq 10); do
++ ip netns exec "${listener_ns}" cat /proc/net/tcp | \
++ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
++ break
++ sleep 0.1
++ done
++}
++
++wait_connected()
++{
++ local listener_ns="${1}"
++ local port="${2}"
++
++ local port_hex i
++
++ port_hex="$(printf "%04X" "${port}")"
++ for i in $(seq 10); do
++ ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break
++ sleep 0.1
++ done
++}
+
+ trap cleanup EXIT
+ ip netns add $ns
+@@ -79,17 +146,17 @@ ip -n $ns link set dev lo up
+ echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+- ./mptcp_connect -p 10000 -l -t ${timeout_poll} \
++ ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
+ 0.0.0.0 >/dev/null &
+-sleep 0.1
++wait_local_port_listen $ns 10000
+ chk_msk_nr 0 "no msk on netns creation"
+
+ echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+- ./mptcp_connect -p 10000 -j -t ${timeout_poll} \
++ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
+ 127.0.0.1 >/dev/null &
+-sleep 0.1
++wait_connected $ns 10000
+ chk_msk_nr 2 "after MPC handshake "
+ chk_msk_remote_key_nr 2 "....chk remote_key"
+ chk_msk_fallback_nr 0 "....chk no fallback"
+@@ -99,15 +166,15 @@ flush_pids
+ echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+- ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \
++ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
+ 0.0.0.0 >/dev/null &
+-sleep 0.1
++wait_local_port_listen $ns 10001
+ echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+- ./mptcp_connect -p 10001 -j -t ${timeout_poll} \
++ ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \
+ 127.0.0.1 >/dev/null &
+-sleep 0.1
++wait_connected $ns 10001
+ chk_msk_fallback_nr 1 "check fallback"
+ flush_pids
+
+@@ -116,21 +183,20 @@ for I in `seq 1 $NR_CLIENTS`; do
+ echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+- ./mptcp_connect -p $((I+10001)) -l -w 10 \
++ ./mptcp_connect -p $((I+10001)) -l -w 20 \
+ -t ${timeout_poll} 0.0.0.0 >/dev/null &
+ done
+-sleep 0.1
++wait_local_port_listen $ns $((NR_CLIENTS + 10001))
+
+ for I in `seq 1 $NR_CLIENTS`; do
+ echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+- ./mptcp_connect -p $((I+10001)) -w 10 \
++ ./mptcp_connect -p $((I+10001)) -w 20 \
+ -t ${timeout_poll} 127.0.0.1 >/dev/null &
+ done
+-sleep 1.5
+
+-chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
++wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+ flush_pids
+
+ exit $ret
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
+index 89c4753c2760c..95e81d557b088 100644
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
+@@ -14,6 +14,7 @@
+ #include <strings.h>
+ #include <signal.h>
+ #include <unistd.h>
++#include <time.h>
+
+ #include <sys/poll.h>
+ #include <sys/sendfile.h>
+@@ -64,6 +65,7 @@ static int cfg_sndbuf;
+ static int cfg_rcvbuf;
+ static bool cfg_join;
+ static bool cfg_remove;
++static unsigned int cfg_time;
+ static unsigned int cfg_do_w;
+ static int cfg_wait;
+ static uint32_t cfg_mark;
+@@ -78,9 +80,10 @@ static struct cfg_cmsg_types cfg_cmsg_types;
+ static void die_usage(void)
+ {
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
+- "[-l] [-w sec] connect_address\n");
++ "[-l] [-w sec] [-t num] [-T num] connect_address\n");
+ fprintf(stderr, "\t-6 use ipv6\n");
+ fprintf(stderr, "\t-t num -- set poll timeout to num\n");
++ fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
+ fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+ fprintf(stderr, "\t-p num -- use port num\n");
+@@ -448,7 +451,7 @@ static void set_nonblock(int fd)
+ fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ }
+
+-static int copyfd_io_poll(int infd, int peerfd, int outfd)
++static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
+ {
+ struct pollfd fds = {
+ .fd = peerfd,
+@@ -487,9 +490,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
+ */
+ fds.events &= ~POLLIN;
+
+- if ((fds.events & POLLOUT) == 0)
++ if ((fds.events & POLLOUT) == 0) {
++ *in_closed_after_out = true;
+ /* and nothing more to send */
+ break;
++ }
+
+ /* Else, still have data to transmit */
+ } else if (len < 0) {
+@@ -547,7 +552,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
+ }
+
+ /* leave some time for late join/announce */
+- if (cfg_join || cfg_remove)
++ if (cfg_remove)
+ usleep(cfg_wait);
+
+ close(peerfd);
+@@ -646,7 +651,7 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
+ }
+
+ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
+- unsigned int size)
++ unsigned int size, bool *in_closed_after_out)
+ {
+ int err;
+
+@@ -664,13 +669,14 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
+ shutdown(peerfd, SHUT_WR);
+
+ err = do_recvfile(peerfd, outfd);
++ *in_closed_after_out = true;
+ }
+
+ return err;
+ }
+
+ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
+- unsigned int size)
++ unsigned int size, bool *in_closed_after_out)
+ {
+ int err;
+
+@@ -685,6 +691,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
+ if (err)
+ return err;
+ err = do_recvfile(peerfd, outfd);
++ *in_closed_after_out = true;
+ }
+
+ return err;
+@@ -692,27 +699,62 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
+
+ static int copyfd_io(int infd, int peerfd, int outfd)
+ {
++ bool in_closed_after_out = false;
++ struct timespec start, end;
+ int file_size;
++ int ret;
++
++ if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0))
++ xerror("can not fetch start time %d", errno);
+
+ switch (cfg_mode) {
+ case CFG_MODE_POLL:
+- return copyfd_io_poll(infd, peerfd, outfd);
++ ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out);
++ break;
++
+ case CFG_MODE_MMAP:
+ file_size = get_infd_size(infd);
+ if (file_size < 0)
+ return file_size;
+- return copyfd_io_mmap(infd, peerfd, outfd, file_size);
++ ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out);
++ break;
++
+ case CFG_MODE_SENDFILE:
+ file_size = get_infd_size(infd);
+ if (file_size < 0)
+ return file_size;
+- return copyfd_io_sendfile(infd, peerfd, outfd, file_size);
++ ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out);
++ break;
++
++ default:
++ fprintf(stderr, "Invalid mode %d\n", cfg_mode);
++
++ die_usage();
++ return 1;
+ }
+
+- fprintf(stderr, "Invalid mode %d\n", cfg_mode);
++ if (ret)
++ return ret;
+
+- die_usage();
+- return 1;
++ if (cfg_time) {
++ unsigned int delta_ms;
++
++ if (clock_gettime(CLOCK_MONOTONIC, &end) < 0)
++ xerror("can not fetch end time %d", errno);
++ delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
++ if (delta_ms > cfg_time) {
++ xerror("transfer slower than expected! runtime %d ms, expected %d ms",
++ delta_ms, cfg_time);
++ }
++
++ /* show the runtime only if this end shutdown(wr) before receiving the EOF,
++ * (that is, if this end got the longer runtime)
++ */
++ if (in_closed_after_out)
++ fprintf(stderr, "%d", delta_ms);
++ }
++
++ return 0;
+ }
+
+ static void check_sockaddr(int pf, struct sockaddr_storage *ss,
+@@ -1005,12 +1047,11 @@ static void parse_opts(int argc, char **argv)
+ {
+ int c;
+
+- while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) {
++ while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) {
+ switch (c) {
+ case 'j':
+ cfg_join = true;
+ cfg_mode = CFG_MODE_POLL;
+- cfg_wait = 400000;
+ break;
+ case 'r':
+ cfg_remove = true;
+@@ -1043,6 +1084,9 @@ static void parse_opts(int argc, char **argv)
+ if (poll_timeout <= 0)
+ poll_timeout = -1;
+ break;
++ case 'T':
++ cfg_time = atoi(optarg);
++ break;
+ case 'm':
+ cfg_mode = parse_mode(optarg);
+ break;
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+index 559173a8e387b..8efff3f9c52a9 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+@@ -1,6 +1,8 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
++. "$(dirname "${0}")/mptcp_lib.sh"
++
+ time_start=$(date +%s)
+
+ optstring="S:R:d:e:l:r:h4cm:f:tC"
+@@ -138,6 +140,8 @@ cleanup()
+ done
+ }
+
++mptcp_lib_check_mptcp
++
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+@@ -445,6 +449,8 @@ do_transfer()
+ local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
++ local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
++ local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+@@ -537,6 +543,23 @@ do_transfer()
+ fi
+ fi
+
++ if $checksum; then
++ local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
++ local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
++
++ local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
++ if [ $csum_err_s_nr -gt 0 ]; then
++ printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
++ rets=1
++ fi
++
++ local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
++ if [ $csum_err_c_nr -gt 0 ]; then
++ printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
++ retc=1
++ fi
++ fi
++
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
+ printf "[ OK ]"
+ fi
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 255793c5ac4ff..a68048f1fc5a4 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -1,6 +1,8 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
++. "$(dirname "${0}")/mptcp_lib.sh"
++
+ ret=0
+ sin=""
+ sinfail=""
+@@ -10,6 +12,8 @@ cinfail=""
+ cinsent=""
+ cout=""
+ ksft_skip=4
++iptables="iptables"
++ip6tables="ip6tables"
+ timeout_poll=30
+ timeout_test=$((timeout_poll * 2 + 1))
+ mptcp_connect=""
+@@ -75,6 +79,7 @@ init()
+
+ # let $ns2 reach any $ns1 address from any interface
+ ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
++ ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
+ done
+ }
+
+@@ -123,9 +128,9 @@ reset_with_add_addr_timeout()
+ local ip="${1:-4}"
+ local tables
+
+- tables="iptables"
++ tables="${iptables}"
+ if [ $ip -eq 6 ]; then
+- tables="ip6tables"
++ tables="${ip6tables}"
+ fi
+
+ reset
+@@ -160,20 +165,21 @@ reset_with_allow_join_id0()
+ ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
+ }
+
++mptcp_lib_check_mptcp
++
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+ fi
+
+-iptables -V > /dev/null 2>&1
+-if [ $? -ne 0 ];then
++if iptables-legacy -V &> /dev/null; then
++ iptables="iptables-legacy"
++ ip6tables="ip6tables-legacy"
++elif ! iptables -V &> /dev/null; then
+ echo "SKIP: Could not run all tests without iptables tool"
+ exit $ksft_skip
+-fi
+-
+-ip6tables -V > /dev/null 2>&1
+-if [ $? -ne 0 ];then
++elif ! ip6tables -V &> /dev/null; then
+ echo "SKIP: Could not run all tests without ip6tables tool"
+ exit $ksft_skip
+ fi
+@@ -238,6 +244,22 @@ is_v6()
+ [ -z "${1##*:*}" ]
+ }
+
++# $1: ns ; $2: counter
++get_counter()
++{
++ local ns="${1}"
++ local counter="${2}"
++ local count
++
++ count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}')
++ if [ -z "${count}" ]; then
++ mptcp_lib_fail_if_expected_feature "${counter} counter"
++ return 1
++ fi
++
++ echo "${count}"
++}
++
+ do_transfer()
+ {
+ listener_ns="$1"
+@@ -297,7 +319,7 @@ do_transfer()
+ if [ "$test_link_fail" -eq 2 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+- $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
++ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ ${local_addr} < "$sinfail" > "$sout" &
+ else
+ timeout ${timeout_test} \
+@@ -551,9 +573,10 @@ chk_csum_nr()
+ echo -n " "
+ fi
+ printf " %-36s %s" "$msg" "sum"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != 0 ]; then
++ count=$(get_counter ${ns1} "MPTcpExtDataCsumErr")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+@@ -561,9 +584,10 @@ chk_csum_nr()
+ echo -n "[ ok ]"
+ fi
+ echo -n " - csum "
+- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != 0 ]; then
++ count=$(get_counter ${ns2} "MPTcpExtDataCsumErr")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+@@ -586,9 +610,10 @@ chk_fail_nr()
+ local dump_stats
+
+ printf "%-39s %s" " " "ftx"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$mp_fail_nr_tx" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMPFailTx")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$mp_fail_nr_tx" ]; then
+ echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+ ret=1
+ dump_stats=1
+@@ -597,9 +622,10 @@ chk_fail_nr()
+ fi
+
+ echo -n " - frx "
+- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$mp_fail_nr_rx" ]; then
++ count=$(get_counter ${ns2} "MPTcpExtMPFailRx")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != "$mp_fail_nr_rx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+ ret=1
+ dump_stats=1
+@@ -623,11 +649,13 @@ chk_join_nr()
+ local ack_nr=$4
+ local count
+ local dump_stats
++ local with_cookie
+
+ printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$syn_nr" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$syn_nr" ]; then
+ echo "[fail] got $count JOIN[s] syn expected $syn_nr"
+ ret=1
+ dump_stats=1
+@@ -636,20 +664,30 @@ chk_join_nr()
+ fi
+
+ echo -n " - synack"
+- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$syn_ack_nr" ]; then
+- echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
+- ret=1
+- dump_stats=1
++ with_cookie=`ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies`
++ count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$syn_ack_nr" ]; then
++ # simult connections exceeding the limit with cookie enabled could go up to
++ # synack validation as the conn limit can be enforced reliably only after
++ # the subflow creation
++ if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
++ echo -n "[ ok ]"
++ else
++ echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
++ ret=1
++ dump_stats=1
++ fi
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - ack"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$ack_nr" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != "$ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] ack expected $ack_nr"
+ ret=1
+ dump_stats=1
+@@ -684,14 +722,13 @@ chk_stale_nr()
+ local recover_nr
+
+ printf "%-39s %-18s" " " "stale"
+- stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+- [ -z "$stale_nr" ] && stale_nr=0
+- recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+- [ -z "$recover_nr" ] && recover_nr=0
+-
+- if [ $stale_nr -lt $stale_min ] ||
+- [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
+- [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
++ stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale")
++ recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover")
++ if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then
++ echo "[skip]"
++ elif [ $stale_nr -lt $stale_min ] ||
++ [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
++ [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+ echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
+ " expected stale in range [$stale_min..$stale_max]," \
+ " stale-recover delta $stale_delta "
+@@ -722,9 +759,10 @@ chk_add_nr()
+ local dump_stats
+
+ printf "%-39s %s" " " "add"
+- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$add_nr" ]; then
++ count=$(get_counter ${ns2} "MPTcpExtAddAddr")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$add_nr" ]; then
+ echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
+ ret=1
+ dump_stats=1
+@@ -733,9 +771,10 @@ chk_add_nr()
+ fi
+
+ echo -n " - echo "
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$echo_nr" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtEchoAdd")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$echo_nr" ]; then
+ echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
+ ret=1
+ dump_stats=1
+@@ -745,9 +784,10 @@ chk_add_nr()
+
+ if [ $port_nr -gt 0 ]; then
+ echo -n " - pt "
+- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$port_nr" ]; then
++ count=$(get_counter ${ns2} "MPTcpExtPortAdd")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != "$port_nr" ]; then
+ echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr"
+ ret=1
+ dump_stats=1
+@@ -756,10 +796,10 @@ chk_add_nr()
+ fi
+
+ printf "%-39s %s" " " "syn"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx |
+- awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$syn_nr" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$syn_nr" ]; then
+ echo "[fail] got $count JOIN[s] syn with a different \
+ port-number expected $syn_nr"
+ ret=1
+@@ -769,10 +809,10 @@ chk_add_nr()
+ fi
+
+ echo -n " - synack"
+- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx |
+- awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$syn_ack_nr" ]; then
++ count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$syn_ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] synack with a different \
+ port-number expected $syn_ack_nr"
+ ret=1
+@@ -782,10 +822,10 @@ chk_add_nr()
+ fi
+
+ echo -n " - ack"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx |
+- awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$ack_nr" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != "$ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] ack with a different \
+ port-number expected $ack_nr"
+ ret=1
+@@ -795,10 +835,10 @@ chk_add_nr()
+ fi
+
+ printf "%-39s %s" " " "syn"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx |
+- awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$mis_syn_nr" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$mis_syn_nr" ]; then
+ echo "[fail] got $count JOIN[s] syn with a mismatched \
+ port-number expected $mis_syn_nr"
+ ret=1
+@@ -808,10 +848,10 @@ chk_add_nr()
+ fi
+
+ echo -n " - ack "
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx |
+- awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$mis_ack_nr" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != "$mis_ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] ack with a mismatched \
+ port-number expected $mis_ack_nr"
+ ret=1
+@@ -850,9 +890,10 @@ chk_rm_nr()
+ fi
+
+ printf "%-39s %s" " " "rm "
+- count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$rm_addr_nr" ]; then
++ count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$rm_addr_nr" ]; then
+ echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
+ ret=1
+ dump_stats=1
+@@ -861,9 +902,10 @@ chk_rm_nr()
+ fi
+
+ echo -n " - sf "
+- count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$rm_subflow_nr" ]; then
++ count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != "$rm_subflow_nr" ]; then
+ echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
+ ret=1
+ dump_stats=1
+@@ -887,9 +929,10 @@ chk_prio_nr()
+ local dump_stats
+
+ printf "%-39s %s" " " "ptx"
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$mp_prio_nr_tx" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMPPrioTx")
++ if [ -z "$count" ]; then
++ echo -n "[skip]"
++ elif [ "$count" != "$mp_prio_nr_tx" ]; then
+ echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
+ ret=1
+ dump_stats=1
+@@ -898,9 +941,10 @@ chk_prio_nr()
+ fi
+
+ echo -n " - prx "
+- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}'`
+- [ -z "$count" ] && count=0
+- if [ "$count" != "$mp_prio_nr_rx" ]; then
++ count=$(get_counter ${ns1} "MPTcpExtMPPrioRx")
++ if [ -z "$count" ]; then
++ echo "[skip]"
++ elif [ "$count" != "$mp_prio_nr_rx" ]; then
+ echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
+ ret=1
+ dump_stats=1
+@@ -1383,7 +1427,7 @@ ipv6_tests()
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
++ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+ run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+ chk_join_nr "single subflow IPv6" 1 1 1
+
+@@ -1418,7 +1462,7 @@ ipv6_tests()
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
++ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+ run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
+ chk_join_nr "remove subflow and signal IPv6" 2 2 2
+ chk_add_nr 1 1
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+new file mode 100644
+index 0000000000000..f32045b23b893
+--- /dev/null
++++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+@@ -0,0 +1,104 @@
++#! /bin/bash
++# SPDX-License-Identifier: GPL-2.0
++
++readonly KSFT_FAIL=1
++readonly KSFT_SKIP=4
++
++# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
++# features using the last version of the kernel and the selftests to make sure
++# a test is not being skipped by mistake.
++mptcp_lib_expect_all_features() {
++ [ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ]
++}
++
++# $1: msg
++mptcp_lib_fail_if_expected_feature() {
++ if mptcp_lib_expect_all_features; then
++ echo "ERROR: missing feature: ${*}"
++ exit ${KSFT_FAIL}
++ fi
++
++ return 1
++}
++
++# $1: file
++mptcp_lib_has_file() {
++ local f="${1}"
++
++ if [ -f "${f}" ]; then
++ return 0
++ fi
++
++ mptcp_lib_fail_if_expected_feature "${f} file not found"
++}
++
++mptcp_lib_check_mptcp() {
++ if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then
++ echo "SKIP: MPTCP support is not available"
++ exit ${KSFT_SKIP}
++ fi
++}
++
++mptcp_lib_check_kallsyms() {
++ if ! mptcp_lib_has_file "/proc/kallsyms"; then
++ echo "SKIP: CONFIG_KALLSYMS is missing"
++ exit ${KSFT_SKIP}
++ fi
++}
++
++# Internal: use mptcp_lib_kallsyms_has() instead
++__mptcp_lib_kallsyms_has() {
++ local sym="${1}"
++
++ mptcp_lib_check_kallsyms
++
++ grep -q " ${sym}" /proc/kallsyms
++}
++
++# $1: part of a symbol to look at, add '$' at the end for full name
++mptcp_lib_kallsyms_has() {
++ local sym="${1}"
++
++ if __mptcp_lib_kallsyms_has "${sym}"; then
++ return 0
++ fi
++
++ mptcp_lib_fail_if_expected_feature "${sym} symbol not found"
++}
++
++# $1: part of a symbol to look at, add '$' at the end for full name
++mptcp_lib_kallsyms_doesnt_have() {
++ local sym="${1}"
++
++ if ! __mptcp_lib_kallsyms_has "${sym}"; then
++ return 0
++ fi
++
++ mptcp_lib_fail_if_expected_feature "${sym} symbol has been found"
++}
++
++# !!!AVOID USING THIS!!!
++# Features might not land in the expected version and features can be backported
++#
++# $1: kernel version, e.g. 6.3
++mptcp_lib_kversion_ge() {
++ local exp_maj="${1%.*}"
++ local exp_min="${1#*.}"
++ local v maj min
++
++ # If the kernel has backported features, set this env var to 1:
++ if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then
++ return 0
++ fi
++
++ v=$(uname -r | cut -d'.' -f1,2)
++ maj=${v%.*}
++ min=${v#*.}
++
++ if [ "${maj}" -gt "${exp_maj}" ] ||
++ { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then
++ return 0
++ fi
++
++ mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
++}
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+index 1579e471a5e7b..3432d11e0a037 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+@@ -1,6 +1,8 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
++. "$(dirname "${0}")/mptcp_lib.sh"
++
+ ret=0
+ sin=""
+ sout=""
+@@ -11,13 +13,15 @@ timeout_poll=30
+ timeout_test=$((timeout_poll * 2 + 1))
+ mptcp_connect=""
+ do_all_tests=1
++iptables="iptables"
++ip6tables="ip6tables"
+
+ add_mark_rules()
+ {
+ local ns=$1
+ local m=$2
+
+- for t in iptables ip6tables; do
++ for t in ${iptables} ${ip6tables}; do
+ # just to debug: check we have multiple subflows connection requests
+ ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
+
+@@ -80,20 +84,22 @@ cleanup()
+ rm -f "$sin" "$sout"
+ }
+
++mptcp_lib_check_mptcp
++
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+ fi
+
+-iptables -V > /dev/null 2>&1
+-if [ $? -ne 0 ];then
++# Use the legacy version if available to support old kernel versions
++if iptables-legacy -V &> /dev/null; then
++ iptables="iptables-legacy"
++ ip6tables="ip6tables-legacy"
++elif ! iptables -V &> /dev/null; then
+ echo "SKIP: Could not run all tests without iptables tool"
+ exit $ksft_skip
+-fi
+-
+-ip6tables -V > /dev/null 2>&1
+-if [ $? -ne 0 ];then
++elif ! ip6tables -V &> /dev/null; then
+ echo "SKIP: Could not run all tests without ip6tables tool"
+ exit $ksft_skip
+ fi
+@@ -103,10 +109,10 @@ check_mark()
+ local ns=$1
+ local af=$2
+
+- tables=iptables
++ tables=${iptables}
+
+ if [ $af -eq 6 ];then
+- tables=ip6tables
++ tables=${ip6tables}
+ fi
+
+ counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
+@@ -115,6 +121,7 @@ check_mark()
+ for v in $values; do
+ if [ $v -ne 0 ]; then
+ echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
++ ret=1
+ return 1
+ fi
+ done
+@@ -209,11 +216,11 @@ do_transfer()
+ fi
+
+ if [ $local_addr = "::" ];then
+- check_mark $listener_ns 6
+- check_mark $connector_ns 6
++ check_mark $listener_ns 6 || retc=1
++ check_mark $connector_ns 6 || retc=1
+ else
+- check_mark $listener_ns 4
+- check_mark $connector_ns 4
++ check_mark $listener_ns 4 || retc=1
++ check_mark $connector_ns 4 || retc=1
+ fi
+
+ check_transfer $cin $sout "file received by server"
+diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
+index 3c741abe034ef..306372b1526ae 100755
+--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
+@@ -1,6 +1,8 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
++. "$(dirname "${0}")/mptcp_lib.sh"
++
+ ksft_skip=4
+ ret=0
+
+@@ -34,6 +36,8 @@ cleanup()
+ ip netns del $ns1
+ }
+
++mptcp_lib_check_mptcp
++
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+@@ -69,8 +73,12 @@ check()
+ }
+
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
+-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
++
++default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)"
++if mptcp_lib_expect_all_features; then
++ check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+ subflows 0" "defaults limits"
++fi
+
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
+@@ -117,12 +125,10 @@ ip netns exec $ns1 ./pm_nl_ctl flush
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
+
+ ip netns exec $ns1 ./pm_nl_ctl limits 9 1
+-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+-subflows 0" "rcv addrs above hard limit"
++check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit"
+
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 9
+-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+-subflows 0" "subflows above hard limit"
++check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit"
+
+ ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+ check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 910d8126af8f2..752cef1688040 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -1,6 +1,8 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
++. "$(dirname "${0}")/mptcp_lib.sh"
++
+ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ ns1="ns1-$rndh"
+ ns2="ns2-$rndh"
+@@ -32,6 +34,8 @@ cleanup()
+ done
+ }
+
++mptcp_lib_check_mptcp
++
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+@@ -51,7 +55,7 @@ setup()
+ sout=$(mktemp)
+ cout=$(mktemp)
+ capout=$(mktemp)
+- size=$((2048 * 4096))
++ size=$((2 * 2048 * 4096))
+ dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
+ dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
+
+@@ -161,17 +165,15 @@ do_transfer()
+
+ timeout ${timeout_test} \
+ ip netns exec ${ns3} \
+- ./mptcp_connect -jt ${timeout_poll} -l -p $port \
++ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \
+ 0.0.0.0 < "$sin" > "$sout" &
+ local spid=$!
+
+ wait_local_port_listen "${ns3}" "${port}"
+
+- local start
+- start=$(date +%s%3N)
+ timeout ${timeout_test} \
+ ip netns exec ${ns1} \
+- ./mptcp_connect -jt ${timeout_poll} -p $port \
++ ./mptcp_connect -jt ${timeout_poll} -p $port -T $time \
+ 10.0.3.3 < "$cin" > "$cout" &
+ local cpid=$!
+
+@@ -180,27 +182,20 @@ do_transfer()
+ wait $spid
+ local rets=$?
+
+- local stop
+- stop=$(date +%s%3N)
+-
+ if $capture; then
+ sleep 1
+ kill ${cappid_listener}
+ kill ${cappid_connector}
+ fi
+
+- local duration
+- duration=$((stop-start))
+-
+ cmp $sin $cout > /dev/null 2>&1
+ local cmps=$?
+ cmp $cin $sout > /dev/null 2>&1
+ local cmpc=$?
+
+- printf "%16s" "$duration max $max_time "
++ printf "%-16s" " max $max_time "
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
+- [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
+- [ $duration -lt $max_time ]; then
++ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+ echo "[ OK ]"
+ cat "$capout"
+ return 0
+@@ -244,23 +239,25 @@ run_test()
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+- # time is measure in ms
+- local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
++ # time is measured in ms, account for transfer size, aggregated link speed
++ # and header overhead (10%)
++ # ms byte -> bit 10% mbit -> kbit -> bit 10%
++ local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) ))
+
+ # mptcp_connect will do some sleeps to allow the mp_join handshake
+- # completion
+- time=$((time + 1350))
++ # completion (see mptcp_connect): 200ms on each side, add some slack
++ time=$((time + 450))
+
+- printf "%-50s" "$msg"
+- do_transfer $small $large $((time * 11 / 10))
++ printf "%-60s" "$msg"
++ do_transfer $small $large $time
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+
+- printf "%-50s" "$msg - reverse direction"
+- do_transfer $large $small $((time * 11 / 10))
++ printf "%-60s" "$msg - reverse direction"
++ do_transfer $large $small $time
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
+index 543ad7513a8e9..da6ab300207c0 100755
+--- a/tools/testing/selftests/net/pmtu.sh
++++ b/tools/testing/selftests/net/pmtu.sh
+@@ -374,6 +374,16 @@ run_cmd() {
+ return $rc
+ }
+
++run_cmd_bg() {
++ cmd="$*"
++
++ if [ "$VERBOSE" = "1" ]; then
++ printf " COMMAND: %s &\n" "${cmd}"
++ fi
++
++ $cmd 2>&1 &
++}
++
+ # Find the auto-generated name for this namespace
+ nsname() {
+ eval echo \$NS_$1
+@@ -661,19 +671,21 @@ setup_xfrm() {
+ }
+
+ setup_nettest_xfrm() {
+- which nettest >/dev/null
+- if [ $? -ne 0 ]; then
+- echo "'nettest' command not found; skipping tests"
+- return 1
++ if ! which nettest >/dev/null; then
++ PATH=$PWD:$PATH
++ if ! which nettest >/dev/null; then
++ echo "'nettest' command not found; skipping tests"
++ return 1
++ fi
+ fi
+
+ [ ${1} -eq 6 ] && proto="-6" || proto=""
+ port=${2}
+
+- run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
++ run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
+ nettest_pids="${nettest_pids} $!"
+
+- run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
++ run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
+ nettest_pids="${nettest_pids} $!"
+ }
+
+@@ -865,7 +877,6 @@ setup_ovs_bridge() {
+ setup() {
+ [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
+
+- cleanup
+ for arg do
+ eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
+ done
+@@ -876,7 +887,7 @@ trace() {
+
+ for arg do
+ [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
+- ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
++ ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+ tcpdump_pids="${tcpdump_pids} $!"
+ ns_cmd=
+ done
+@@ -1836,6 +1847,10 @@ run_test() {
+
+ unset IFS
+
++ # Since cleanup() relies on variables modified by this subshell, it
++ # has to run in this context.
++ trap cleanup EXIT
++
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\n##########################################################################\n\n"
+ fi
+diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
+index b5277106df1fd..b0cc082fbb84f 100644
+--- a/tools/testing/selftests/net/reuseport_bpf.c
++++ b/tools/testing/selftests/net/reuseport_bpf.c
+@@ -330,7 +330,7 @@ static void test_extra_filter(const struct test_params p)
+ if (bind(fd1, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+- if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
++ if (!bind(fd2, addr, sockaddr_size()) || errno != EADDRINUSE)
+ error(1, errno, "bind socket 2 should fail with EADDRINUSE");
+
+ free(addr);
+diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
+index c9ce3dfa42ee7..cbf166df57da7 100755
+--- a/tools/testing/selftests/net/rtnetlink.sh
++++ b/tools/testing/selftests/net/rtnetlink.sh
+@@ -782,7 +782,7 @@ kci_test_ipsec_offload()
+ tmpl proto esp src $srcip dst $dstip spi 9 \
+ mode transport reqid 42
+ check_err $?
+- ip x p add dir out src $dstip/24 dst $srcip/24 \
++ ip x p add dir in src $dstip/24 dst $srcip/24 \
+ tmpl proto esp src $dstip dst $srcip spi 9 \
+ mode transport reqid 42
+ check_err $?
+@@ -835,6 +835,7 @@ EOF
+ fi
+
+ # clean up any leftovers
++ echo 0 > /sys/bus/netdevsim/del_device
+ $probed && rmmod netdevsim
+
+ if [ $ret -ne 0 ]; then
+diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
+index 59067f64b7753..2672ac0b6d1f3 100644
+--- a/tools/testing/selftests/net/so_txtime.c
++++ b/tools/testing/selftests/net/so_txtime.c
+@@ -421,7 +421,7 @@ static void usage(const char *progname)
+ "Options:\n"
+ " -4 only IPv4\n"
+ " -6 only IPv6\n"
+- " -c <clock> monotonic (default) or tai\n"
++ " -c <clock> monotonic or tai (default)\n"
+ " -D <addr> destination IP address (server)\n"
+ " -S <addr> source IP address (client)\n"
+ " -r run rx mode\n"
+@@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv)
+ cfg_rx = true;
+ break;
+ case 't':
+- cfg_start_time_ns = strtol(optarg, NULL, 0);
++ cfg_start_time_ns = strtoll(optarg, NULL, 0);
+ break;
+ case 'm':
+ cfg_mark = strtol(optarg, NULL, 0);
+diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+index aebaab8ce44cb..441eededa0312 100755
+--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
++++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+@@ -292,6 +292,11 @@ setup_hs()
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
++ # disable the rp_filter otherwise the kernel gets confused about how
++ # to route decap ipv4 packets.
++ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
++ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
++
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+@@ -316,11 +321,6 @@ setup_hs()
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+- # disable the rp_filter otherwise the kernel gets confused about how
+- # to route decap ipv4 packets.
+- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+-
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+ }
+
+diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+index 1003119773e5d..f962823628119 100755
+--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
++++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+@@ -232,10 +232,14 @@ setup_rt_networking()
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
++
++ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
++ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
++
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+- ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0
++ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+@@ -254,6 +258,12 @@ setup_hs()
+
+ # set the networking for the host
+ ip netns add ${hsname}
++
++ # disable the rp_filter otherwise the kernel gets confused about how
++ # to route decap ipv4 packets.
++ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
++ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
++
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+@@ -272,11 +282,6 @@ setup_hs()
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+- # disable the rp_filter otherwise the kernel gets confused about how
+- # to route decap ipv4 packets.
+- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+-
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+ }
+
+diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+index 534c8b7699ab9..6fadc8e2f116a 100755
+--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
++++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+@@ -118,11 +118,11 @@ echo "[ OK ]"
+
+ # Move the underlay to a non-default VRF
+ ip -netns hv-1 link set veth0 vrf vrf-underlay
+-ip -netns hv-1 link set veth0 down
+-ip -netns hv-1 link set veth0 up
++ip -netns hv-1 link set vxlan0 down
++ip -netns hv-1 link set vxlan0 up
+ ip -netns hv-2 link set veth0 vrf vrf-underlay
+-ip -netns hv-2 link set veth0 down
+-ip -netns hv-2 link set veth0 up
++ip -netns hv-2 link set vxlan0 down
++ip -netns hv-2 link set vxlan0 up
+
+ echo -n "Check VM connectivity through VXLAN (underlay in a VRF) "
+ ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
+index aee631c5284eb..044bc0e9ed81a 100644
+--- a/tools/testing/selftests/net/timestamping.c
++++ b/tools/testing/selftests/net/timestamping.c
+@@ -325,8 +325,8 @@ int main(int argc, char **argv)
+ struct ifreq device;
+ struct ifreq hwtstamp;
+ struct hwtstamp_config hwconfig, hwconfig_requested;
+- struct so_timestamping so_timestamping_get = { 0, -1 };
+- struct so_timestamping so_timestamping = { 0, -1 };
++ struct so_timestamping so_timestamping_get = { 0, 0 };
++ struct so_timestamping so_timestamping = { 0, 0 };
+ struct sockaddr_in addr;
+ struct ip_mreq imr;
+ struct in_addr iaddr;
+diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
+index 710ac956bdb33..8ce96028341d5 100644
+--- a/tools/testing/selftests/net/toeplitz.c
++++ b/tools/testing/selftests/net/toeplitz.c
+@@ -213,7 +213,7 @@ static char *recv_frame(const struct ring_state *ring, char *frame)
+ }
+
+ /* A single TPACKET_V3 block can hold multiple frames */
+-static void recv_block(struct ring_state *ring)
++static bool recv_block(struct ring_state *ring)
+ {
+ struct tpacket_block_desc *block;
+ char *frame;
+@@ -221,7 +221,7 @@ static void recv_block(struct ring_state *ring)
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+- return;
++ return false;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+@@ -233,6 +233,8 @@ static void recv_block(struct ring_state *ring)
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
++
++ return true;
+ }
+
+ /* simple test: sleep once unconditionally and then process all rings */
+@@ -243,7 +245,7 @@ static void process_rings(void)
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+- recv_block(&rings[i]);
++ do {} while (recv_block(&rings[i]));
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+@@ -255,12 +257,12 @@ static char *setup_ring(int fd)
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+- req3.tp_retire_blk_tov = cfg_timeout_msec;
++ req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+- req3.tp_block_nr = 2;
++ req3.tp_block_nr = 16;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+@@ -498,7 +500,7 @@ static void parse_opts(int argc, char **argv)
+ bool have_toeplitz = false;
+ int index, c;
+
+- while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
++ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
+index 0a49907cd4fef..da5bfd834effe 100755
+--- a/tools/testing/selftests/net/toeplitz.sh
++++ b/tools/testing/selftests/net/toeplitz.sh
+@@ -32,7 +32,7 @@ DEV="eth0"
+ # This is determined by reading the RSS indirection table using ethtool.
+ get_rss_cfg_num_rxqs() {
+ echo $(ethtool -x "${DEV}" |
+- egrep [[:space:]]+[0-9]+:[[:space:]]+ |
++ grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
+ cut -d: -f2- |
+ awk '{$1=$1};1' |
+ tr ' ' '\n' |
+diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
+index f8a19f548ae9d..ebbd0b2824327 100755
+--- a/tools/testing/selftests/net/udpgro.sh
++++ b/tools/testing/selftests/net/udpgro.sh
+@@ -34,7 +34,7 @@ cfg_veth() {
+ ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+ ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+ ip -netns "${PEER_NS}" link set dev veth1 up
+- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
++ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
+ }
+
+ run_one() {
+diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
+index 820bc50f6b687..fad2d1a71cac3 100755
+--- a/tools/testing/selftests/net/udpgro_bench.sh
++++ b/tools/testing/selftests/net/udpgro_bench.sh
+@@ -34,7 +34,7 @@ run_one() {
+ ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+ ip -netns "${PEER_NS}" link set dev veth1 up
+
+- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
++ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
+
+diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
+index 7f26591f236b9..1bcd82e1f662e 100755
+--- a/tools/testing/selftests/net/udpgro_fwd.sh
++++ b/tools/testing/selftests/net/udpgro_fwd.sh
+@@ -46,7 +46,7 @@ create_ns() {
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+ done
+- ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
++ ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null
+ }
+
+ create_vxlan_endpoint() {
+@@ -132,7 +132,7 @@ run_test() {
+ local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
+ sed -e 's/\[//' -e 's/:.*//'`
+ if [ $rcv != $pkts ]; then
+- echo " fail - received $rvs packets, expected $pkts"
++ echo " fail - received $rcv packets, expected $pkts"
+ ret=1
+ return
+ fi
+@@ -185,6 +185,7 @@ for family in 4 6; do
+ IPT=iptables
+ SUFFIX=24
+ VXDEV=vxlan
++ PING=ping
+
+ if [ $family = 6 ]; then
+ BM_NET=$BM_NET_V6
+@@ -192,6 +193,8 @@ for family in 4 6; do
+ SUFFIX="64 nodad"
+ VXDEV=vxlan6
+ IPT=ip6tables
++ # Use ping6 on systems where ping doesn't handle IPv6
++ ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING="ping6"
+ fi
+
+ echo "IPv$family"
+@@ -237,7 +240,7 @@ for family in 4 6; do
+
+ # load arp cache before running the test to reduce the amount of
+ # stray traffic on top of the UDP tunnel
+- ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null
++ ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+ cleanup
+
+diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
+index c66da6ffd6d8d..7badaf215de28 100644
+--- a/tools/testing/selftests/net/udpgso.c
++++ b/tools/testing/selftests/net/udpgso.c
+@@ -156,13 +156,13 @@ struct testcase testcases_v4[] = {
+ },
+ {
+ /* send max number of min sized segments */
+- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
++ .tlen = UDP_MAX_SEGMENTS,
+ .gso_len = 1,
+- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
++ .r_num_mss = UDP_MAX_SEGMENTS,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
++ .tlen = UDP_MAX_SEGMENTS + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+@@ -259,13 +259,13 @@ struct testcase testcases_v6[] = {
+ },
+ {
+ /* send max number of min sized segments */
+- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
++ .tlen = UDP_MAX_SEGMENTS,
+ .gso_len = 1,
+- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
++ .r_num_mss = UDP_MAX_SEGMENTS,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
++ .tlen = UDP_MAX_SEGMENTS + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
+index 80b5d352702e5..640bc43452faa 100755
+--- a/tools/testing/selftests/net/udpgso_bench.sh
++++ b/tools/testing/selftests/net/udpgso_bench.sh
+@@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m'
+ readonly YELLOW='\033[0;33m'
+ readonly RED='\033[0;31m'
+ readonly NC='\033[0m' # No Color
++readonly TESTPORT=8000
+
+ readonly KSFT_PASS=0
+ readonly KSFT_FAIL=1
+@@ -56,11 +57,26 @@ trap wake_children EXIT
+
+ run_one() {
+ local -r args=$@
++ local nr_socks=0
++ local i=0
++ local -r timeout=10
++
++ ./udpgso_bench_rx -p "$TESTPORT" &
++ ./udpgso_bench_rx -p "$TESTPORT" -t &
++
++ # Wait for the above test program to get ready to receive connections.
++ while [ "$i" -lt "$timeout" ]; do
++ nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")"
++ [ "$nr_socks" -eq 2 ] && break
++ i=$((i + 1))
++ sleep 1
++ done
++ if [ "$nr_socks" -ne 2 ]; then
++ echo "timed out while waiting for udpgso_bench_rx"
++ exit 1
++ fi
+
+- ./udpgso_bench_rx &
+- ./udpgso_bench_rx -t &
+-
+- ./udpgso_bench_tx ${args}
++ ./udpgso_bench_tx -p "$TESTPORT" ${args}
+ }
+
+ run_in_netns() {
+@@ -120,7 +136,7 @@ run_all() {
+ run_udp "${ipv4_args}"
+
+ echo "ipv6"
+- run_tcp "${ipv4_args}"
++ run_tcp "${ipv6_args}"
+ run_udp "${ipv6_args}"
+ }
+
+diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
+index 76a24052f4b47..f35a924d4a303 100644
+--- a/tools/testing/selftests/net/udpgso_bench_rx.c
++++ b/tools/testing/selftests/net/udpgso_bench_rx.c
+@@ -214,11 +214,10 @@ static void do_verify_udp(const char *data, int len)
+
+ static int recv_msg(int fd, char *buf, int len, int *gso_size)
+ {
+- char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
++ char control[CMSG_SPACE(sizeof(int))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cmsg;
+- uint16_t *gsosizeptr;
+ int ret;
+
+ iov.iov_base = buf;
+@@ -237,8 +236,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
+ cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_UDP
+ && cmsg->cmsg_type == UDP_GRO) {
+- gsosizeptr = (uint16_t *) CMSG_DATA(cmsg);
+- *gso_size = *gsosizeptr;
++ *gso_size = *(int *)CMSG_DATA(cmsg);
+ break;
+ }
+ }
+@@ -250,7 +248,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
+ static void do_flush_udp(int fd)
+ {
+ static char rbuf[ETH_MAX_MTU];
+- int ret, len, gso_size, budget = 256;
++ int ret, len, gso_size = 0, budget = 256;
+
+ len = cfg_read_all ? sizeof(rbuf) : 0;
+ while (budget--) {
+@@ -293,19 +291,17 @@ static void usage(const char *filepath)
+
+ static void parse_opts(int argc, char **argv)
+ {
++ const char *bind_addr = NULL;
+ int c;
+
+- /* bind to any by default */
+- setup_sockaddr(PF_INET6, "::", &cfg_bind_addr);
+ while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+- setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr);
+ break;
+ case 'b':
+- setup_sockaddr(cfg_family, optarg, &cfg_bind_addr);
++ bind_addr = optarg;
+ break;
+ case 'C':
+ cfg_connect_timeout_ms = strtoul(optarg, NULL, 0);
+@@ -338,9 +334,16 @@ static void parse_opts(int argc, char **argv)
+ cfg_verify = true;
+ cfg_read_all = true;
+ break;
++ default:
++ exit(1);
+ }
+ }
+
++ if (!bind_addr)
++ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
++
++ setup_sockaddr(cfg_family, bind_addr, &cfg_bind_addr);
++
+ if (optind != argc)
+ usage(argv[0]);
+
+diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
+index 17512a43885e7..477392715a9ad 100644
+--- a/tools/testing/selftests/net/udpgso_bench_tx.c
++++ b/tools/testing/selftests/net/udpgso_bench_tx.c
+@@ -62,6 +62,7 @@ static int cfg_payload_len = (1472 * 42);
+ static int cfg_port = 8000;
+ static int cfg_runtime_ms = -1;
+ static bool cfg_poll;
++static int cfg_poll_loop_timeout_ms = 2000;
+ static bool cfg_segment;
+ static bool cfg_sendmmsg;
+ static bool cfg_tcp;
+@@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd)
+ }
+ }
+
+-static void flush_errqueue(int fd, const bool do_poll)
++static void flush_errqueue(int fd, const bool do_poll,
++ unsigned long poll_timeout, const bool poll_err)
+ {
+ if (do_poll) {
+ struct pollfd fds = {0};
+ int ret;
+
+ fds.fd = fd;
+- ret = poll(&fds, 1, 500);
++ ret = poll(&fds, 1, poll_timeout);
+ if (ret == 0) {
+- if (cfg_verbose)
++ if ((cfg_verbose) && (poll_err))
+ fprintf(stderr, "poll timeout\n");
+ } else if (ret < 0) {
+ error(1, errno, "poll");
+@@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll)
+ flush_errqueue_recv(fd);
+ }
+
++static void flush_errqueue_retry(int fd, unsigned long num_sends)
++{
++ unsigned long tnow, tstop;
++ bool first_try = true;
++
++ tnow = gettimeofday_ms();
++ tstop = tnow + cfg_poll_loop_timeout_ms;
++ do {
++ flush_errqueue(fd, true, tstop - tnow, first_try);
++ first_try = false;
++ tnow = gettimeofday_ms();
++ } while ((stat_zcopies != num_sends) && (tnow < tstop));
++}
++
+ static int send_tcp(int fd, char *data)
+ {
+ int ret, done = 0, count = 0;
+@@ -413,16 +429,18 @@ static int send_udp_segment(int fd, char *data)
+
+ static void usage(const char *filepath)
+ {
+- error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
++ error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] "
++ "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
+ filepath);
+ }
+
+ static void parse_opts(int argc, char **argv)
+ {
++ const char *bind_addr = NULL;
+ int max_len, hdrlen;
+ int c;
+
+- while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) {
++ while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+@@ -446,11 +464,14 @@ static void parse_opts(int argc, char **argv)
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+- setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
++ bind_addr = optarg;
+ break;
+ case 'l':
+ cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
++ case 'L':
++ cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000;
++ break;
+ case 'm':
+ cfg_sendmmsg = true;
+ break;
+@@ -489,9 +510,16 @@ static void parse_opts(int argc, char **argv)
+ case 'z':
+ cfg_zerocopy = true;
+ break;
++ default:
++ exit(1);
+ }
+ }
+
++ if (!bind_addr)
++ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
++
++ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
++
+ if (optind != argc)
+ usage(argv[0]);
+
+@@ -671,7 +699,7 @@ int main(int argc, char **argv)
+ num_sends += send_udp(fd, buf[i]);
+ num_msgs++;
+ if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp)
+- flush_errqueue(fd, cfg_poll);
++ flush_errqueue(fd, cfg_poll, 500, true);
+
+ if (cfg_msg_nr && num_msgs >= cfg_msg_nr)
+ break;
+@@ -690,7 +718,7 @@ int main(int argc, char **argv)
+ } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+ if (cfg_zerocopy || cfg_tx_tstamp)
+- flush_errqueue(fd, true);
++ flush_errqueue_retry(fd, num_sends);
+
+ if (close(fd))
+ error(1, errno, "close");
+diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
+index 19eac3e44c065..430895d1a2b63 100755
+--- a/tools/testing/selftests/net/veth.sh
++++ b/tools/testing/selftests/net/veth.sh
+@@ -289,14 +289,14 @@ if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+ printf "%-60s" "bad setting: XDP with RX nr less than TX"
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+- section xdp_dummy 2>/dev/null &&\
++ section xdp 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+
+ # the following tests will run with multiple channels active
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+- section xdp_dummy 2>/dev/null
++ section xdp 2>/dev/null
+ printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+@@ -311,7 +311,7 @@ if [ $CPUS -gt 2 ]; then
+ chk_channels "setting invalid channels nr" $DST 2 2
+ fi
+
+-ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
++ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null
+ chk_gro_flag "with xdp attached - gro flag" $DST on
+ chk_gro_flag " - peer gro flag" $SRC off
+ chk_tso_flag " - tso flag" $SRC off
+diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
+index 8748199ac1098..ffca314897c4c 100644
+--- a/tools/testing/selftests/netfilter/Makefile
++++ b/tools/testing/selftests/netfilter/Makefile
+@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
+ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
+ nft_concat_range.sh nft_conntrack_helper.sh \
+ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
+- ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
++ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
++ conntrack_vrf.sh
+
+ LDLIBS = -lmnl
+ TEST_GEN_FILES = nf-queue
+diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+index b48e1833bc896..76645aaf2b58f 100755
+--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
++++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+@@ -35,6 +35,8 @@ cleanup() {
+ for i in 1 2;do ip netns del nsrouter$i;done
+ }
+
++trap cleanup EXIT
++
+ ipv4() {
+ echo -n 192.168.$1.2
+ }
+@@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF
+ table inet filter {
+ counter unknown { }
+ counter related { }
++ counter redir4 { }
++ counter redir6 { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+- meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
++ icmp type "redirect" ct state "related" counter name "redir4" accept
++ icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept
++
++ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+ meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
++
+ counter name "unknown" drop
+ }
+ }
+@@ -279,5 +287,29 @@ else
+ echo "ERROR: icmp error RELATED state test has failed"
+ fi
+
+-cleanup
++# add 'bad' route, expect icmp REDIRECT to be generated
++ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1
++ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1
++
++ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null
++
++expect="packets 1 bytes 112"
++check_counter nsclient1 "redir4" "$expect"
++if [ $? -ne 0 ];then
++ ret=1
++fi
++
++ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null
++expect="packets 1 bytes 192"
++check_counter nsclient1 "redir6" "$expect"
++if [ $? -ne 0 ];then
++ ret=1
++fi
++
++if [ $ret -eq 0 ];then
++ echo "PASS: icmp redirects had RELATED state"
++else
++ echo "ERROR: icmp redirect RELATED state test has failed"
++fi
++
+ exit $ret
+diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
+new file mode 100755
+index 0000000000000..8b5ea92345882
+--- /dev/null
++++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
+@@ -0,0 +1,241 @@
++#!/bin/sh
++
++# This script demonstrates interaction of conntrack and vrf.
++# The vrf driver calls the netfilter hooks again, with oif/iif
++# pointing at the VRF device.
++#
++# For ingress, this means first iteration has iifname of lower/real
++# device. In this script, thats veth0.
++# Second iteration is iifname set to vrf device, tvrf in this script.
++#
++# For egress, this is reversed: first iteration has the vrf device,
++# second iteration is done with the lower/real/veth0 device.
++#
++# test_ct_zone_in demonstrates unexpected change of nftables
++# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
++# connection on VRF rcv"
++#
++# It was possible to assign conntrack zone to a packet (or mark it for
++# `notracking`) in the prerouting chain before conntrack, based on real iif.
++#
++# After the change, the zone assignment is lost and the zone is assigned based
++# on the VRF master interface (in case such a rule exists).
++# assignment is lost. Instead, assignment based on the `iif` matching
++# Thus it is impossible to distinguish packets based on the original
++# interface.
++#
++# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
++# that was supposed to be fixed by the commit mentioned above to make sure
++# that any fix to test case 1 won't break masquerade again.
++
++ksft_skip=4
++
++IP0=172.30.30.1
++IP1=172.30.30.2
++PFXL=30
++ret=0
++
++sfx=$(mktemp -u "XXXXXXXX")
++ns0="ns0-$sfx"
++ns1="ns1-$sfx"
++
++cleanup()
++{
++ ip netns pids $ns0 | xargs kill 2>/dev/null
++ ip netns pids $ns1 | xargs kill 2>/dev/null
++
++ ip netns del $ns0 $ns1
++}
++
++nft --version > /dev/null 2>&1
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not run test without nft tool"
++ exit $ksft_skip
++fi
++
++ip -Version > /dev/null 2>&1
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not run test without ip tool"
++ exit $ksft_skip
++fi
++
++ip netns add "$ns0"
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not create net namespace $ns0"
++ exit $ksft_skip
++fi
++ip netns add "$ns1"
++
++trap cleanup EXIT
++
++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
++ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
++
++ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not add veth device"
++ exit $ksft_skip
++fi
++
++ip -net $ns0 li add tvrf type vrf table 9876
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not add vrf device"
++ exit $ksft_skip
++fi
++
++ip -net $ns0 li set lo up
++
++ip -net $ns0 li set veth0 master tvrf
++ip -net $ns0 li set tvrf up
++ip -net $ns0 li set veth0 up
++ip -net $ns1 li set veth0 up
++
++ip -net $ns0 addr add $IP0/$PFXL dev veth0
++ip -net $ns1 addr add $IP1/$PFXL dev veth0
++
++ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
++if [ $? -ne 0 ];then
++ echo "SKIP: Could not start iperf3"
++ exit $ksft_skip
++fi
++
++# test vrf ingress handling.
++# The incoming connection should be placed in conntrack zone 1,
++# as decided by the first iteration of the ruleset.
++test_ct_zone_in()
++{
++ip netns exec $ns0 nft -f - <<EOF
++table testct {
++ chain rawpre {
++ type filter hook prerouting priority raw;
++
++ iif { veth0, tvrf } counter meta nftrace set 1
++ iif veth0 counter ct zone set 1 counter return
++ iif tvrf counter ct zone set 2 counter return
++ ip protocol icmp counter
++ notrack counter
++ }
++
++ chain rawout {
++ type filter hook output priority raw;
++
++ oif veth0 counter ct zone set 1 counter return
++ oif tvrf counter ct zone set 2 counter return
++ notrack counter
++ }
++}
++EOF
++ ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
++
++ # should be in zone 1, not zone 2
++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
++ if [ $count -eq 1 ]; then
++ echo "PASS: entry found in conntrack zone 1"
++ else
++ echo "FAIL: entry not found in conntrack zone 1"
++ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
++ if [ $count -eq 1 ]; then
++ echo "FAIL: entry found in zone 2 instead"
++ else
++ echo "FAIL: entry not in zone 1 or 2, dumping table"
++ ip netns exec $ns0 conntrack -L
++ ip netns exec $ns0 nft list ruleset
++ fi
++ fi
++}
++
++# add masq rule that gets evaluated w. outif set to vrf device.
++# This tests the first iteration of the packet through conntrack,
++# oifname is the vrf device.
++test_masquerade_vrf()
++{
++ local qdisc=$1
++
++ if [ "$qdisc" != "default" ]; then
++ tc -net $ns0 qdisc add dev tvrf root $qdisc
++ fi
++
++ ip netns exec $ns0 conntrack -F 2>/dev/null
++
++ip netns exec $ns0 nft -f - <<EOF
++flush ruleset
++table ip nat {
++ chain rawout {
++ type filter hook output priority raw;
++
++ oif tvrf ct state untracked counter
++ }
++ chain postrouting2 {
++ type filter hook postrouting priority mangle;
++
++ oif tvrf ct state untracked counter
++ }
++ chain postrouting {
++ type nat hook postrouting priority 0;
++ # NB: masquerade should always be combined with 'oif(name) bla',
++ # lack of this is intentional here, we want to exercise double-snat.
++ ip saddr 172.30.30.0/30 counter masquerade random
++ }
++}
++EOF
++ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
++ if [ $? -ne 0 ]; then
++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
++ ret=1
++ return
++ fi
++
++ # must also check that nat table was evaluated on second (lower device) iteration.
++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
++ ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
++ if [ $? -eq 0 ]; then
++ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
++ else
++ echo "FAIL: vrf rules have unexpected counter value"
++ ret=1
++ fi
++
++ if [ "$qdisc" != "default" ]; then
++ tc -net $ns0 qdisc del dev tvrf root
++ fi
++}
++
++# add masq rule that gets evaluated w. outif set to veth device.
++# This tests the 2nd iteration of the packet through conntrack,
++# oifname is the lower device (veth0 in this case).
++test_masquerade_veth()
++{
++ ip netns exec $ns0 conntrack -F 2>/dev/null
++ip netns exec $ns0 nft -f - <<EOF
++flush ruleset
++table ip nat {
++ chain postrouting {
++ type nat hook postrouting priority 0;
++ meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
++ }
++}
++EOF
++ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
++ if [ $? -ne 0 ]; then
++ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
++ ret=1
++ return
++ fi
++
++ # must also check that nat table was evaluated on second (lower device) iteration.
++ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
++ if [ $? -eq 0 ]; then
++ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
++ else
++ echo "FAIL: vrf masq rule has unexpected counter value"
++ ret=1
++ fi
++}
++
++test_ct_zone_in
++test_masquerade_vrf "default"
++test_masquerade_vrf "pfifo"
++test_masquerade_veth
++
++exit $ret
+diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
+index 5a4938d6dcf25..af3461cb5c409 100755
+--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
++++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
+@@ -27,11 +27,11 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
+ net_port_mac_proto_net"
+
+ # Reported bugs, also described by TYPE_ variables below
+-BUGS="flush_remove_add"
++BUGS="flush_remove_add reload"
+
+ # List of possible paths to pktgen script from kernel tree for performance tests
+ PKTGEN_SCRIPT_PATHS="
+- ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
++ ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
+
+ # Definition of set types:
+@@ -337,6 +337,23 @@ TYPE_flush_remove_add="
+ display Add two elements, flush, re-add
+ "
+
++TYPE_reload="
++display net,mac with reload
++type_spec ipv4_addr . ether_addr
++chain_spec ip daddr . ether saddr
++dst addr4
++src mac
++start 1
++count 1
++src_delta 2000
++tools sendip nc bash
++proto udp
++
++race_repeat 0
++
++perf_duration 0
++"
++
+ # Set template for all tests, types and rules are filled in depending on test
+ set_template='
+ flush ruleset
+@@ -1455,6 +1472,59 @@ test_bug_flush_remove_add() {
+ nft flush ruleset
+ }
+
++# - add ranged element, check that packets match it
++# - reload the set, check packets still match
++test_bug_reload() {
++ setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
++ rstart=${start}
++
++ range_size=1
++ for i in $(seq "${start}" $((start + count))); do
++ end=$((start + range_size))
++
++ # Avoid negative or zero-sized port ranges
++ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
++ start=${end}
++ end=$((end + 1))
++ fi
++ srcstart=$((start + src_delta))
++ srcend=$((end + src_delta))
++
++ add "$(format)" || return 1
++ range_size=$((range_size + 1))
++ start=$((end + range_size))
++ done
++
++ # check kernel does allocate pcpu sctrach map
++ # for reload with no elemet add/delete
++ ( echo flush set inet filter test ;
++ nft list set inet filter test ) | nft -f -
++
++ start=${rstart}
++ range_size=1
++
++ for i in $(seq "${start}" $((start + count))); do
++ end=$((start + range_size))
++
++ # Avoid negative or zero-sized port ranges
++ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
++ start=${end}
++ end=$((end + 1))
++ fi
++ srcstart=$((start + src_delta))
++ srcend=$((end + src_delta))
++
++ for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
++ send_match "${j}" $((j + src_delta)) || return 1
++ done
++
++ range_size=$((range_size + 1))
++ start=$((end + range_size))
++ done
++
++ nft flush ruleset
++}
++
+ test_reported_issues() {
+ eval test_bug_"${subtest}"
+ }
+@@ -1513,4 +1583,4 @@ for name in ${TESTS}; do
+ done
+ done
+
+-[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP}
++[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0
+diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
+index 6caf6ac8c285f..695a1958723f5 100755
+--- a/tools/testing/selftests/netfilter/nft_fib.sh
++++ b/tools/testing/selftests/netfilter/nft_fib.sh
+@@ -174,6 +174,7 @@ test_ping() {
+ ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
++ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+ sleep 3
+
+diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
+index d4ffebb989f88..c336e6c148d1f 100755
+--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
++++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
+@@ -14,6 +14,11 @@
+ # nft_flowtable.sh -o8000 -l1500 -r2000
+ #
+
++sfx=$(mktemp -u "XXXXXXXX")
++ns1="ns1-$sfx"
++ns2="ns2-$sfx"
++nsr1="nsr1-$sfx"
++nsr2="nsr2-$sfx"
+
+ # Kselftest framework requirement - SKIP code is 4.
+ ksft_skip=4
+@@ -36,18 +41,17 @@ checktool (){
+ checktool "nft --version" "run test without nft tool"
+ checktool "ip -Version" "run test without ip tool"
+ checktool "which nc" "run test without nc (netcat)"
+-checktool "ip netns add nsr1" "create net namespace"
++checktool "ip netns add $nsr1" "create net namespace $nsr1"
+
+-ip netns add ns1
+-ip netns add ns2
+-
+-ip netns add nsr2
++ip netns add $ns1
++ip netns add $ns2
++ip netns add $nsr2
+
+ cleanup() {
+- for i in 1 2; do
+- ip netns del ns$i
+- ip netns del nsr$i
+- done
++ ip netns del $ns1
++ ip netns del $ns2
++ ip netns del $nsr1
++ ip netns del $nsr2
+
+ rm -f "$ns1in" "$ns1out"
+ rm -f "$ns2in" "$ns2out"
+@@ -59,22 +63,21 @@ trap cleanup EXIT
+
+ sysctl -q net.netfilter.nf_log_all_netns=1
+
+-ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
+-ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
++ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
++ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
+
+-ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
++ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
+
+ for dev in lo veth0 veth1; do
+- for i in 1 2; do
+- ip -net nsr$i link set $dev up
+- done
++ ip -net $nsr1 link set $dev up
++ ip -net $nsr2 link set $dev up
+ done
+
+-ip -net nsr1 addr add 10.0.1.1/24 dev veth0
+-ip -net nsr1 addr add dead:1::1/64 dev veth0
++ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
++ip -net $nsr1 addr add dead:1::1/64 dev veth0
+
+-ip -net nsr2 addr add 10.0.2.1/24 dev veth1
+-ip -net nsr2 addr add dead:2::1/64 dev veth1
++ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
++ip -net $nsr2 addr add dead:2::1/64 dev veth1
+
+ # set different MTUs so we need to push packets coming from ns1 (large MTU)
+ # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
+@@ -106,49 +109,56 @@ do
+ esac
+ done
+
+-if ! ip -net nsr1 link set veth0 mtu $omtu; then
++if ! ip -net $nsr1 link set veth0 mtu $omtu; then
+ exit 1
+ fi
+
+-ip -net ns1 link set eth0 mtu $omtu
++ip -net $ns1 link set eth0 mtu $omtu
+
+-if ! ip -net nsr2 link set veth1 mtu $rmtu; then
++if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
+ exit 1
+ fi
+
+-ip -net ns2 link set eth0 mtu $rmtu
++ip -net $ns2 link set eth0 mtu $rmtu
+
+ # transfer-net between nsr1 and nsr2.
+ # these addresses are not used for connections.
+-ip -net nsr1 addr add 192.168.10.1/24 dev veth1
+-ip -net nsr1 addr add fee1:2::1/64 dev veth1
+-
+-ip -net nsr2 addr add 192.168.10.2/24 dev veth0
+-ip -net nsr2 addr add fee1:2::2/64 dev veth0
+-
+-for i in 1 2; do
+- ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+- ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+-
+- ip -net ns$i link set lo up
+- ip -net ns$i link set eth0 up
+- ip -net ns$i addr add 10.0.$i.99/24 dev eth0
+- ip -net ns$i route add default via 10.0.$i.1
+- ip -net ns$i addr add dead:$i::99/64 dev eth0
+- ip -net ns$i route add default via dead:$i::1
+- if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
++ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
++ip -net $nsr1 addr add fee1:2::1/64 dev veth1
++
++ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
++ip -net $nsr2 addr add fee1:2::2/64 dev veth0
++
++for i in 0 1; do
++ ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
++ ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
++done
++
++for ns in $ns1 $ns2;do
++ ip -net $ns link set lo up
++ ip -net $ns link set eth0 up
++
++ if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ echo "ERROR: Check Originator/Responder values (problem during address addition)"
+ exit 1
+ fi
+-
+ # don't set ip DF bit for first two tests
+- ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
++ ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+ done
+
+-ip -net nsr1 route add default via 192.168.10.2
+-ip -net nsr2 route add default via 192.168.10.1
++ip -net $ns1 addr add 10.0.1.99/24 dev eth0
++ip -net $ns2 addr add 10.0.2.99/24 dev eth0
++ip -net $ns1 route add default via 10.0.1.1
++ip -net $ns2 route add default via 10.0.2.1
++ip -net $ns1 addr add dead:1::99/64 dev eth0
++ip -net $ns2 addr add dead:2::99/64 dev eth0
++ip -net $ns1 route add default via dead:1::1
++ip -net $ns2 route add default via dead:2::1
++
++ip -net $nsr1 route add default via 192.168.10.2
++ip -net $nsr2 route add default via 192.168.10.1
+
+-ip netns exec nsr1 nft -f - <<EOF
++ip netns exec $nsr1 nft -f - <<EOF
+ table inet filter {
+ flowtable f1 {
+ hook ingress priority 0
+@@ -197,18 +207,18 @@ if [ $? -ne 0 ]; then
+ fi
+
+ # test basic connectivity
+-if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+- echo "ERROR: ns1 cannot reach ns2" 1>&2
++if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
++ echo "ERROR: $ns1 cannot reach ns2" 1>&2
+ exit 1
+ fi
+
+-if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+- echo "ERROR: ns2 cannot reach ns1" 1>&2
++if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
++ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+ fi
+
+ if [ $ret -eq 0 ];then
+- echo "PASS: netns routing/connectivity: ns1 can reach ns2"
++ echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
+ fi
+
+ ns1in=$(mktemp)
+@@ -312,24 +322,24 @@ make_file "$ns2in"
+
+ # First test:
+ # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
+-if test_tcp_forwarding ns1 ns2; then
++if test_tcp_forwarding $ns1 $ns2; then
+ echo "PASS: flow offloaded for ns1/ns2"
+ else
+ echo "FAIL: flow offload for ns1/ns2:" 1>&2
+- ip netns exec nsr1 nft list ruleset
++ ip netns exec $nsr1 nft list ruleset
+ ret=1
+ fi
+
+ # delete default route, i.e. ns2 won't be able to reach ns1 and
+ # will depend on ns1 being masqueraded in nsr1.
+ # expect ns1 has nsr1 address.
+-ip -net ns2 route del default via 10.0.2.1
+-ip -net ns2 route del default via dead:2::1
+-ip -net ns2 route add 192.168.10.1 via 10.0.2.1
++ip -net $ns2 route del default via 10.0.2.1
++ip -net $ns2 route del default via dead:2::1
++ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
+
+ # Second test:
+ # Same, but with NAT enabled.
+-ip netns exec nsr1 nft -f - <<EOF
++ip netns exec $nsr1 nft -f - <<EOF
+ table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+@@ -343,47 +353,47 @@ table ip nat {
+ }
+ EOF
+
+-if test_tcp_forwarding_nat ns1 ns2; then
++if test_tcp_forwarding_nat $ns1 $ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with NAT"
+ else
+ echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
+- ip netns exec nsr1 nft list ruleset
++ ip netns exec $nsr1 nft list ruleset
+ ret=1
+ fi
+
+ # Third test:
+ # Same as second test, but with PMTU discovery enabled.
+-handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
++handle=$(ip netns exec $nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
+
+-if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
++if ! ip netns exec $nsr1 nft delete rule inet filter forward $handle; then
+ echo "FAIL: Could not delete large-packet accept rule"
+ exit 1
+ fi
+
+-ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+-ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
++ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
++ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+-if test_tcp_forwarding_nat ns1 ns2; then
++if test_tcp_forwarding_nat $ns1 $ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
+ else
+ echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
+- ip netns exec nsr1 nft list ruleset
++ ip netns exec $nsr1 nft list ruleset
+ fi
+
+ # Another test:
+ # Add bridge interface br0 to Router1, with NAT enabled.
+-ip -net nsr1 link add name br0 type bridge
+-ip -net nsr1 addr flush dev veth0
+-ip -net nsr1 link set up dev veth0
+-ip -net nsr1 link set veth0 master br0
+-ip -net nsr1 addr add 10.0.1.1/24 dev br0
+-ip -net nsr1 addr add dead:1::1/64 dev br0
+-ip -net nsr1 link set up dev br0
++ip -net $nsr1 link add name br0 type bridge
++ip -net $nsr1 addr flush dev veth0
++ip -net $nsr1 link set up dev veth0
++ip -net $nsr1 link set veth0 master br0
++ip -net $nsr1 addr add 10.0.1.1/24 dev br0
++ip -net $nsr1 addr add dead:1::1/64 dev br0
++ip -net $nsr1 link set up dev br0
+
+-ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
++ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+ # br0 with NAT enabled.
+-ip netns exec nsr1 nft -f - <<EOF
++ip netns exec $nsr1 nft -f - <<EOF
+ flush table ip nat
+ table ip nat {
+ chain prerouting {
+@@ -398,59 +408,59 @@ table ip nat {
+ }
+ EOF
+
+-if test_tcp_forwarding_nat ns1 ns2; then
++if test_tcp_forwarding_nat $ns1 $ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
+ else
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+- ip netns exec nsr1 nft list ruleset
++ ip netns exec $nsr1 nft list ruleset
+ ret=1
+ fi
+
+ # Another test:
+ # Add bridge interface br0 to Router1, with NAT and VLAN.
+-ip -net nsr1 link set veth0 nomaster
+-ip -net nsr1 link set down dev veth0
+-ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
+-ip -net nsr1 link set up dev veth0
+-ip -net nsr1 link set up dev veth0.10
+-ip -net nsr1 link set veth0.10 master br0
+-
+-ip -net ns1 addr flush dev eth0
+-ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
+-ip -net ns1 link set eth0 up
+-ip -net ns1 link set eth0.10 up
+-ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
+-ip -net ns1 route add default via 10.0.1.1
+-ip -net ns1 addr add dead:1::99/64 dev eth0.10
+-
+-if test_tcp_forwarding_nat ns1 ns2; then
++ip -net $nsr1 link set veth0 nomaster
++ip -net $nsr1 link set down dev veth0
++ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
++ip -net $nsr1 link set up dev veth0
++ip -net $nsr1 link set up dev veth0.10
++ip -net $nsr1 link set veth0.10 master br0
++
++ip -net $ns1 addr flush dev eth0
++ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
++ip -net $ns1 link set eth0 up
++ip -net $ns1 link set eth0.10 up
++ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
++ip -net $ns1 route add default via 10.0.1.1
++ip -net $ns1 addr add dead:1::99/64 dev eth0.10
++
++if test_tcp_forwarding_nat $ns1 $ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
+ else
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+- ip netns exec nsr1 nft list ruleset
++ ip netns exec $nsr1 nft list ruleset
+ ret=1
+ fi
+
+ # restore test topology (remove bridge and VLAN)
+-ip -net nsr1 link set veth0 nomaster
+-ip -net nsr1 link set veth0 down
+-ip -net nsr1 link set veth0.10 down
+-ip -net nsr1 link delete veth0.10 type vlan
+-ip -net nsr1 link delete br0 type bridge
+-ip -net ns1 addr flush dev eth0.10
+-ip -net ns1 link set eth0.10 down
+-ip -net ns1 link set eth0 down
+-ip -net ns1 link delete eth0.10 type vlan
++ip -net $nsr1 link set veth0 nomaster
++ip -net $nsr1 link set veth0 down
++ip -net $nsr1 link set veth0.10 down
++ip -net $nsr1 link delete veth0.10 type vlan
++ip -net $nsr1 link delete br0 type bridge
++ip -net $ns1 addr flush dev eth0.10
++ip -net $ns1 link set eth0.10 down
++ip -net $ns1 link set eth0 down
++ip -net $ns1 link delete eth0.10 type vlan
+
+ # restore address in ns1 and nsr1
+-ip -net ns1 link set eth0 up
+-ip -net ns1 addr add 10.0.1.99/24 dev eth0
+-ip -net ns1 route add default via 10.0.1.1
+-ip -net ns1 addr add dead:1::99/64 dev eth0
+-ip -net ns1 route add default via dead:1::1
+-ip -net nsr1 addr add 10.0.1.1/24 dev veth0
+-ip -net nsr1 addr add dead:1::1/64 dev veth0
+-ip -net nsr1 link set up dev veth0
++ip -net $ns1 link set eth0 up
++ip -net $ns1 addr add 10.0.1.99/24 dev eth0
++ip -net $ns1 route add default via 10.0.1.1
++ip -net $ns1 addr add dead:1::99/64 dev eth0
++ip -net $ns1 route add default via dead:1::1
++ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
++ip -net $nsr1 addr add dead:1::1/64 dev veth0
++ip -net $nsr1 link set up dev veth0
+
+ KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
+ KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
+@@ -480,23 +490,23 @@ do_esp() {
+
+ }
+
+-do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
++do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+
+-do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
++do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+
+-ip netns exec nsr1 nft delete table ip nat
++ip netns exec $nsr1 nft delete table ip nat
+
+ # restore default routes
+-ip -net ns2 route del 192.168.10.1 via 10.0.2.1
+-ip -net ns2 route add default via 10.0.2.1
+-ip -net ns2 route add default via dead:2::1
++ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
++ip -net $ns2 route add default via 10.0.2.1
++ip -net $ns2 route add default via dead:2::1
+
+-if test_tcp_forwarding ns1 ns2; then
++if test_tcp_forwarding $ns1 $ns2; then
+ echo "PASS: ipsec tunnel mode for ns1/ns2"
+ else
+ echo "FAIL: ipsec tunnel mode for ns1/ns2"
+- ip netns exec nsr1 nft list ruleset 1>&2
+- ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
++ ip netns exec $nsr1 nft list ruleset 1>&2
++ ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
+ fi
+
+ exit $ret
+diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
+index da1c1e4b6c86b..462dc47420b65 100755
+--- a/tools/testing/selftests/netfilter/nft_nat.sh
++++ b/tools/testing/selftests/netfilter/nft_nat.sh
+@@ -374,6 +374,47 @@ EOF
+ return $lret
+ }
+
++test_local_dnat_portonly()
++{
++ local family=$1
++ local daddr=$2
++ local lret=0
++ local sr_s
++ local sr_r
++
++ip netns exec "$ns0" nft -f /dev/stdin <<EOF
++table $family nat {
++ chain output {
++ type nat hook output priority 0; policy accept;
++ meta l4proto tcp dnat to :2000
++
++ }
++}
++EOF
++ if [ $? -ne 0 ]; then
++ if [ $family = "inet" ];then
++ echo "SKIP: inet port test"
++ test_inet_nat=false
++ return
++ fi
++ echo "SKIP: Could not add $family dnat hook"
++ return
++ fi
++
++ echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
++ sc_s=$!
++
++ sleep 1
++
++ result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
++
++ if [ "$result" = "SERVER-inet" ];then
++ echo "PASS: inet port rewrite without l3 address"
++ else
++ echo "ERROR: inet port rewrite"
++ ret=1
++ fi
++}
+
+ test_masquerade6()
+ {
+@@ -885,6 +926,144 @@ EOF
+ ip netns exec "$ns0" nft delete table $family nat
+ }
+
++test_stateless_nat_ip()
++{
++ local lret=0
++
++ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
++ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
++
++ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
++ if [ $? -ne 0 ] ; then
++ echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
++ return 1
++ fi
++
++ip netns exec "$ns0" nft -f /dev/stdin <<EOF
++table ip stateless {
++ map xlate_in {
++ typeof meta iifname . ip saddr . ip daddr : ip daddr
++ elements = {
++ "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
++ }
++ }
++ map xlate_out {
++ typeof meta iifname . ip saddr . ip daddr : ip daddr
++ elements = {
++ "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
++ }
++ }
++
++ chain prerouting {
++ type filter hook prerouting priority -400; policy accept;
++ ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
++ ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
++ }
++}
++EOF
++ if [ $? -ne 0 ]; then
++ echo "SKIP: Could not add ip statless rules"
++ return $ksft_skip
++ fi
++
++ reset_counters
++
++ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
++ if [ $? -ne 0 ] ; then
++ echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
++ lret=1
++ fi
++
++ # ns1 should have seen packets from .2.2, due to stateless rewrite.
++ expect="packets 1 bytes 84"
++ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
++ if [ $? -ne 0 ]; then
++ bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
++ lret=1
++ fi
++
++ for dir in "in" "out" ; do
++ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
++ if [ $? -ne 0 ]; then
++ bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
++ lret=1
++ fi
++ done
++
++ # ns1 should not have seen packets from ns2, due to masquerade
++ expect="packets 0 bytes 0"
++ for dir in "in" "out" ; do
++ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
++ if [ $? -ne 0 ]; then
++ bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
++ lret=1
++ fi
++
++ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
++ if [ $? -ne 0 ]; then
++ bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
++ lret=1
++ fi
++ done
++
++ reset_counters
++
++ socat -h > /dev/null 2>&1
++ if [ $? -ne 0 ];then
++ echo "SKIP: Could not run stateless nat frag test without socat tool"
++ if [ $lret -eq 0 ]; then
++ return $ksft_skip
++ fi
++
++ ip netns exec "$ns0" nft delete table ip stateless
++ return $lret
++ fi
++
++ local tmpfile=$(mktemp)
++ dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
++
++ local outfile=$(mktemp)
++ ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
++ sc_r=$!
++
++ sleep 1
++ # re-do with large ping -> ip fragmentation
++ ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
++ if [ $? -ne 0 ] ; then
++ echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
++ lret=1
++ fi
++
++ wait
++
++ cmp "$tmpfile" "$outfile"
++ if [ $? -ne 0 ]; then
++ ls -l "$tmpfile" "$outfile"
++ echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
++ lret=1
++ fi
++
++ rm -f "$tmpfile" "$outfile"
++
++ # ns1 should have seen packets from 2.2, due to stateless rewrite.
++ expect="packets 3 bytes 4164"
++ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
++ if [ $? -ne 0 ]; then
++ bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
++ lret=1
++ fi
++
++ ip netns exec "$ns0" nft delete table ip stateless
++ if [ $? -ne 0 ]; then
++ echo "ERROR: Could not delete table ip stateless" 1>&2
++ lret=1
++ fi
++
++ test $lret -eq 0 && echo "PASS: IP statless for $ns2"
++
++ return $lret
++}
++
+ # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
+ for i in 0 1 2; do
+ ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF
+@@ -951,6 +1130,19 @@ table inet filter {
+ EOF
+ done
+
++# special case for stateless nat check, counter needs to
++# be done before (input) ip defragmentation
++ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
++table inet filter {
++ counter ns0insl {}
++
++ chain pre {
++ type filter hook prerouting priority -400; policy accept;
++ ip saddr 10.0.2.2 counter name "ns0insl"
++ }
++}
++EOF
++
+ sleep 3
+ # test basic connectivity
+ for i in 1 2; do
+@@ -984,6 +1176,10 @@ fi
+ reset_counters
+ test_local_dnat ip
+ test_local_dnat6 ip6
++
++reset_counters
++test_local_dnat_portonly inet 10.0.1.99
++
+ reset_counters
+ $test_inet_nat && test_local_dnat inet
+ $test_inet_nat && test_local_dnat6 inet
+@@ -1005,6 +1201,7 @@ $test_inet_nat && test_redirect inet
+ $test_inet_nat && test_redirect6 inet
+
+ test_port_shadowing
++test_stateless_nat_ip
+
+ if [ $ret -ne 0 ];then
+ echo -n "FAIL: "
+diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile
+index 4b93b1417b862..843ba56d8e49e 100644
+--- a/tools/testing/selftests/openat2/Makefile
++++ b/tools/testing/selftests/openat2/Makefile
+@@ -5,4 +5,4 @@ TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
+
+ include ../lib.mk
+
+-$(TEST_GEN_PROGS): helpers.c
++$(TEST_GEN_PROGS): helpers.c helpers.h
+diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h
+index a6ea27344db2d..7056340b9339e 100644
+--- a/tools/testing/selftests/openat2/helpers.h
++++ b/tools/testing/selftests/openat2/helpers.h
+@@ -9,6 +9,7 @@
+
+ #define _GNU_SOURCE
+ #include <stdint.h>
++#include <stdbool.h>
+ #include <errno.h>
+ #include <linux/types.h>
+ #include "../kselftest.h"
+@@ -62,11 +63,12 @@ bool needs_openat2(const struct open_how *how);
+ (similar to chroot(2)). */
+ #endif /* RESOLVE_IN_ROOT */
+
+-#define E_func(func, ...) \
+- do { \
+- if (func(__VA_ARGS__) < 0) \
+- ksft_exit_fail_msg("%s:%d %s failed\n", \
+- __FILE__, __LINE__, #func);\
++#define E_func(func, ...) \
++ do { \
++ errno = 0; \
++ if (func(__VA_ARGS__) < 0) \
++ ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \
++ __FILE__, __LINE__, #func, errno); \
+ } while (0)
+
+ #define E_asprintf(...) E_func(asprintf, __VA_ARGS__)
+diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
+index 1bddbe934204c..7fb902099de45 100644
+--- a/tools/testing/selftests/openat2/openat2_test.c
++++ b/tools/testing/selftests/openat2/openat2_test.c
+@@ -259,6 +259,16 @@ void test_openat2_flags(void)
+ unlink(path);
+
+ fd = sys_openat2(AT_FDCWD, path, &test->how);
++ if (fd < 0 && fd == -EOPNOTSUPP) {
++ /*
++ * Skip the testcase if it failed because not supported
++ * by FS. (e.g. a valid O_TMPFILE combination on NFS)
++ */
++ ksft_test_result_skip("openat2 with %s fails with %d (%s)\n",
++ test->name, fd, strerror(-fd));
++ goto next;
++ }
++
+ if (test->err >= 0)
+ failed = (fd < 0);
+ else
+@@ -303,7 +313,7 @@ skip:
+ else
+ resultfn("openat2 with %s fails with %d (%s)\n",
+ test->name, test->err, strerror(-test->err));
+-
++next:
+ free(fdpath);
+ fflush(stdout);
+ }
+diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
+index 01f8d3c0cf2cb..6922d6417e1cf 100644
+--- a/tools/testing/selftests/pidfd/pidfd.h
++++ b/tools/testing/selftests/pidfd/pidfd.h
+@@ -68,7 +68,7 @@
+ #define PIDFD_SKIP 3
+ #define PIDFD_XFAIL 4
+
+-int wait_for_pid(pid_t pid)
++static inline int wait_for_pid(pid_t pid)
+ {
+ int status, ret;
+
+@@ -78,13 +78,20 @@ again:
+ if (errno == EINTR)
+ goto again;
+
++ ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
+ return -1;
+ }
+
+- if (!WIFEXITED(status))
++ if (!WIFEXITED(status)) {
++ ksft_print_msg(
++ "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
++ WIFSIGNALED(status), WTERMSIG(status));
+ return -1;
++ }
+
+- return WEXITSTATUS(status);
++ ret = WEXITSTATUS(status);
++ ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret);
++ return ret;
+ }
+
+ static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+index 22558524f71c3..3fd8e903118f5 100644
+--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
++++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+@@ -12,6 +12,7 @@
+ #include <string.h>
+ #include <syscall.h>
+ #include <sys/wait.h>
++#include <sys/mman.h>
+
+ #include "pidfd.h"
+ #include "../kselftest.h"
+@@ -80,7 +81,10 @@ static inline int error_check(struct error *err, const char *test_name)
+ return err->code;
+ }
+
++#define CHILD_STACK_SIZE 8192
++
+ struct child {
++ char *stack;
+ pid_t pid;
+ int fd;
+ };
+@@ -89,17 +93,22 @@ static struct child clone_newns(int (*fn)(void *), void *args,
+ struct error *err)
+ {
+ static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD;
+- size_t stack_size = 1024;
+- char *stack[1024] = { 0 };
+ struct child ret;
+
+ if (!(flags & CLONE_NEWUSER) && geteuid() != 0)
+ flags |= CLONE_NEWUSER;
+
++ ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE,
++ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
++ if (ret.stack == MAP_FAILED) {
++ error_set(err, -1, "mmap of stack failed (errno %d)", errno);
++ return ret;
++ }
++
+ #ifdef __ia64__
+- ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd);
++ ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd);
+ #else
+- ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd);
++ ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd);
+ #endif
+
+ if (ret.pid < 0) {
+@@ -129,6 +138,11 @@ static inline int child_join(struct child *child, struct error *err)
+ else if (r > 0)
+ error_set(err, r, "child %d reported: %d", child->pid, r);
+
++ if (munmap(child->stack, CHILD_STACK_SIZE)) {
++ error_set(err, -1, "munmap of child stack failed (errno %d)", errno);
++ r = -1;
++ }
++
+ return r;
+ }
+
+diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
+index 529eb700ac26a..9a2d64901d591 100644
+--- a/tools/testing/selftests/pidfd/pidfd_test.c
++++ b/tools/testing/selftests/pidfd/pidfd_test.c
+@@ -441,7 +441,6 @@ static void test_pidfd_poll_exec(int use_waitpid)
+ {
+ int pid, pidfd = 0;
+ int status, ret;
+- pthread_t t1;
+ time_t prog_start = time(NULL);
+ const char *test_name = "pidfd_poll check for premature notification on child thread exec";
+
+@@ -500,13 +499,14 @@ static int child_poll_leader_exit_test(void *args)
+ */
+ *child_exit_secs = time(NULL);
+ syscall(SYS_exit, 0);
++ /* Never reached, but appeases compiler thinking we should return. */
++ exit(0);
+ }
+
+ static void test_pidfd_poll_leader_exit(int use_waitpid)
+ {
+ int pid, pidfd = 0;
+- int status, ret;
+- time_t prog_start = time(NULL);
++ int status, ret = 0;
+ const char *test_name = "pidfd_poll check for premature notification on non-empty"
+ "group leader exit";
+
+diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
+index be2943f072f60..17999e082aa71 100644
+--- a/tools/testing/selftests/pidfd/pidfd_wait.c
++++ b/tools/testing/selftests/pidfd/pidfd_wait.c
+@@ -39,7 +39,7 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
+
+ TEST(wait_simple)
+ {
+- int pidfd = -1, status = 0;
++ int pidfd = -1;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+@@ -47,7 +47,6 @@ TEST(wait_simple)
+ .flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
+ .exit_signal = SIGCHLD,
+ };
+- int ret;
+ pid_t pid;
+ siginfo_t info = {
+ .si_signo = 0,
+@@ -88,7 +87,7 @@ TEST(wait_simple)
+
+ TEST(wait_states)
+ {
+- int pidfd = -1, status = 0;
++ int pidfd = -1;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+index fbbdffdb2e5d2..f20d1c166d1e4 100644
+--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
++++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+@@ -24,6 +24,7 @@ static int check_cpu_dscr_default(char *file, unsigned long val)
+ rc = read(fd, buf, sizeof(buf));
+ if (rc == -1) {
+ perror("read() failed");
++ close(fd);
+ return 1;
+ }
+ close(fd);
+@@ -65,8 +66,10 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
+ if (access(file, F_OK))
+ continue;
+
+- if (check_cpu_dscr_default(file, val))
++ if (check_cpu_dscr_default(file, val)) {
++ closedir(sysfs);
+ return 1;
++ }
+ }
+ closedir(sysfs);
+ return 0;
+diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
+index adc2b7294e5fd..83647b8277e7d 100644
+--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
++++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
+@@ -193,7 +193,7 @@ int spectre_v2_test(void)
+ * We are not vulnerable and reporting otherwise, so
+ * missing such a mismatch is safe.
+ */
+- if (state == VULNERABLE)
++ if (miss_percent > 95)
+ return 4;
+
+ return 1;
+diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
+index ce3375cd8e73e..8f6c816099a48 100644
+--- a/tools/testing/selftests/powerpc/signal/.gitignore
++++ b/tools/testing/selftests/powerpc/signal/.gitignore
+@@ -4,3 +4,4 @@ signal_tm
+ sigfuz
+ sigreturn_vdso
+ sig_sc_double_restart
++sigreturn_kernel
+diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
+index d6ae54663aed7..84e201572466d 100644
+--- a/tools/testing/selftests/powerpc/signal/Makefile
++++ b/tools/testing/selftests/powerpc/signal/Makefile
+@@ -1,5 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0
+ TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
++TEST_GEN_PROGS += sigreturn_kernel
+
+ CFLAGS += -maltivec
+ $(OUTPUT)/signal_tm: CFLAGS += -mhtm
+diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
+new file mode 100644
+index 0000000000000..0a1b6e591eeed
+--- /dev/null
++++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
+@@ -0,0 +1,132 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Test that we can't sigreturn to kernel addresses, or to kernel mode.
++ */
++
++#define _GNU_SOURCE
++
++#include <stdio.h>
++#include <signal.h>
++#include <stdlib.h>
++#include <sys/types.h>
++#include <sys/wait.h>
++#include <unistd.h>
++
++#include "utils.h"
++
++#define MSR_PR (1ul << 14)
++
++static volatile unsigned long long sigreturn_addr;
++static volatile unsigned long long sigreturn_msr_mask;
++
++static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr)
++{
++ ucontext_t *uc = (ucontext_t *)uc_ptr;
++
++ if (sigreturn_addr)
++ UCONTEXT_NIA(uc) = sigreturn_addr;
++
++ if (sigreturn_msr_mask)
++ UCONTEXT_MSR(uc) &= sigreturn_msr_mask;
++}
++
++static pid_t fork_child(void)
++{
++ pid_t pid;
++
++ pid = fork();
++ if (pid == 0) {
++ raise(SIGUSR1);
++ exit(0);
++ }
++
++ return pid;
++}
++
++static int expect_segv(pid_t pid)
++{
++ int child_ret;
++
++ waitpid(pid, &child_ret, 0);
++ FAIL_IF(WIFEXITED(child_ret));
++ FAIL_IF(!WIFSIGNALED(child_ret));
++ FAIL_IF(WTERMSIG(child_ret) != 11);
++
++ return 0;
++}
++
++int test_sigreturn_kernel(void)
++{
++ struct sigaction act;
++ int child_ret, i;
++ pid_t pid;
++
++ act.sa_sigaction = sigusr1_handler;
++ act.sa_flags = SA_SIGINFO;
++ sigemptyset(&act.sa_mask);
++
++ FAIL_IF(sigaction(SIGUSR1, &act, NULL));
++
++ for (i = 0; i < 2; i++) {
++ // Return to kernel
++ sigreturn_addr = 0xcull << 60;
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Return to kernel virtual
++ sigreturn_addr = 0xc008ull << 48;
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Return out of range
++ sigreturn_addr = 0xc010ull << 48;
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Return to no-man's land, just below PAGE_OFFSET
++ sigreturn_addr = (0xcull << 60) - (64 * 1024);
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Return to no-man's land, above TASK_SIZE_4PB
++ sigreturn_addr = 0x1ull << 52;
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Return to 0xd space
++ sigreturn_addr = 0xdull << 60;
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Return to 0xe space
++ sigreturn_addr = 0xeull << 60;
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Return to 0xf space
++ sigreturn_addr = 0xfull << 60;
++ pid = fork_child();
++ expect_segv(pid);
++
++ // Attempt to set PR=0 for 2nd loop (should be blocked by kernel)
++ sigreturn_msr_mask = ~MSR_PR;
++ }
++
++ printf("All children killed as expected\n");
++
++ // Don't change address, just MSR, should return to user as normal
++ sigreturn_addr = 0;
++ sigreturn_msr_mask = ~MSR_PR;
++ pid = fork_child();
++ waitpid(pid, &child_ret, 0);
++ FAIL_IF(!WIFEXITED(child_ret));
++ FAIL_IF(WIFSIGNALED(child_ret));
++ FAIL_IF(WEXITSTATUS(child_ret) != 0);
++
++ return 0;
++}
++
++int main(void)
++{
++ return test_harness(test_sigreturn_kernel, "sigreturn_kernel");
++}
+diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
+index e7ceabed7f51f..7d0aa22bdc12b 100644
+--- a/tools/testing/selftests/proc/proc-uptime-002.c
++++ b/tools/testing/selftests/proc/proc-uptime-002.c
+@@ -17,6 +17,7 @@
+ // while shifting across CPUs.
+ #undef NDEBUG
+ #include <assert.h>
++#include <errno.h>
+ #include <unistd.h>
+ #include <sys/syscall.h>
+ #include <stdlib.h>
+@@ -54,7 +55,7 @@ int main(void)
+ len += sizeof(unsigned long);
+ free(m);
+ m = malloc(len);
+- } while (sys_sched_getaffinity(0, len, m) == -EINVAL);
++ } while (sys_sched_getaffinity(0, len, m) == -1 && errno == EINVAL);
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
+index f7911aaeb0075..aa474febb4712 100644
+--- a/tools/testing/selftests/ptp/testptp.c
++++ b/tools/testing/selftests/ptp/testptp.c
+@@ -492,11 +492,11 @@ int main(int argc, char *argv[])
+ interval = t2 - t1;
+ offset = (t2 + t1) / 2 - tp;
+
+- printf("system time: %lld.%u\n",
++ printf("system time: %lld.%09u\n",
+ (pct+2*i)->sec, (pct+2*i)->nsec);
+- printf("phc time: %lld.%u\n",
++ printf("phc time: %lld.%09u\n",
+ (pct+2*i+1)->sec, (pct+2*i+1)->nsec);
+- printf("system time: %lld.%u\n",
++ printf("system time: %lld.%09u\n",
+ (pct+2*i+2)->sec, (pct+2*i+2)->nsec);
+ printf("system/phc clock time offset is %" PRId64 " ns\n"
+ "system clock time delay is %" PRId64 " ns\n",
+diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
+index 363f56081eff3..66f0f724a1a6d 100755
+--- a/tools/testing/selftests/rcutorture/bin/torture.sh
++++ b/tools/testing/selftests/rcutorture/bin/torture.sh
+@@ -71,8 +71,8 @@ usage () {
+ echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
+ echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
+ echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
+- echo " --doall"
+- echo " --doallmodconfig / --do-no-allmodconfig"
++ echo " --do-all"
++ echo " --do-allmodconfig / --do-no-allmodconfig"
+ echo " --do-clocksourcewd / --do-no-clocksourcewd"
+ echo " --do-kasan / --do-no-kasan"
+ echo " --do-kcsan / --do-no-kcsan"
+diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
+index f57720c52c0f9..84f6bb98ce993 100644
+--- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
++++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
+@@ -5,4 +5,4 @@ rcutree.gp_init_delay=3
+ rcutree.gp_cleanup_delay=3
+ rcutree.kthread_prio=2
+ threadirqs
+-tree.use_softirq=0
++rcutree.use_softirq=0
+diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+index 64f864f1f361f..8e50bfd4b710d 100644
+--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
++++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+@@ -4,4 +4,4 @@ rcutree.gp_init_delay=3
+ rcutree.gp_cleanup_delay=3
+ rcutree.kthread_prio=2
+ threadirqs
+-tree.use_softirq=0
++rcutree.use_softirq=0
+diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
+index 6bcee2ec91a9c..9cc7e0108c8b0 100644
+--- a/tools/testing/selftests/resctrl/Makefile
++++ b/tools/testing/selftests/resctrl/Makefile
+@@ -1,17 +1,8 @@
+-CC = $(CROSS_COMPILE)gcc
+ CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
+-SRCS=$(wildcard *.c)
+-OBJS=$(SRCS:.c=.o)
++CFLAGS += $(KHDR_INCLUDES)
+
+-all: resctrl_tests
++TEST_GEN_PROGS := resctrl_tests
+
+-$(OBJS): $(SRCS)
+- $(CC) $(CFLAGS) -c $(SRCS)
++include ../lib.mk
+
+-resctrl_tests: $(OBJS)
+- $(CC) $(CFLAGS) -o $@ $^
+-
+-.PHONY: clean
+-
+-clean:
+- $(RM) $(OBJS) resctrl_tests
++$(OUTPUT)/resctrl_tests: $(wildcard *.[ch])
+diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
+index 68ff856d36f0b..338f714453935 100644
+--- a/tools/testing/selftests/resctrl/cache.c
++++ b/tools/testing/selftests/resctrl/cache.c
+@@ -89,21 +89,19 @@ static int reset_enable_llc_perf(pid_t pid, int cpu_no)
+ static int get_llc_perf(unsigned long *llc_perf_miss)
+ {
+ __u64 total_misses;
++ int ret;
+
+ /* Stop counters after one span to get miss rate */
+
+ ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0);
+
+- if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) {
++ ret = read(fd_lm, &rf_cqm, sizeof(struct read_format));
++ if (ret == -1) {
+ perror("Could not get llc misses through perf");
+-
+ return -1;
+ }
+
+ total_misses = rf_cqm.values[0].value;
+-
+- close(fd_lm);
+-
+ *llc_perf_miss = total_misses;
+
+ return 0;
+@@ -244,10 +242,12 @@ int cat_val(struct resctrl_val_param *param)
+ while (1) {
+ if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
+ ret = param->setup(1, param);
+- if (ret) {
++ if (ret == END_OF_TESTS) {
+ ret = 0;
+ break;
+ }
++ if (ret < 0)
++ break;
+ ret = reset_enable_llc_perf(bm_pid, param->cpu_no);
+ if (ret)
+ break;
+@@ -256,19 +256,25 @@ int cat_val(struct resctrl_val_param *param)
+ memflush, operation, resctrl_val)) {
+ fprintf(stderr, "Error-running fill buffer\n");
+ ret = -1;
+- break;
++ goto pe_close;
+ }
+
+ sleep(1);
+ ret = measure_cache_vals(param, bm_pid);
+ if (ret)
+- break;
++ goto pe_close;
++
++ close(fd_lm);
+ } else {
+ break;
+ }
+ }
+
+ return ret;
++
++pe_close:
++ close(fd_lm);
++ return ret;
+ }
+
+ /*
+diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
+index cd4f68388e0f6..2d3c7c77ab6cb 100644
+--- a/tools/testing/selftests/resctrl/cat_test.c
++++ b/tools/testing/selftests/resctrl/cat_test.c
+@@ -40,7 +40,7 @@ static int cat_setup(int num, ...)
+
+ /* Run NUM_OF_RUNS times */
+ if (p->num_of_runs >= NUM_OF_RUNS)
+- return -1;
++ return END_OF_TESTS;
+
+ if (p->num_of_runs == 0) {
+ sprintf(schemata, "%lx", p->mask);
+@@ -89,7 +89,7 @@ static int check_results(struct resctrl_val_param *param)
+
+ return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
+ MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
+- !is_amd, false);
++ get_vendor() == ARCH_INTEL, false);
+ }
+
+ void cat_test_cleanup(void)
+diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
+index 8968e36db99d7..3b0454e7fc826 100644
+--- a/tools/testing/selftests/resctrl/cmt_test.c
++++ b/tools/testing/selftests/resctrl/cmt_test.c
+@@ -32,7 +32,7 @@ static int cmt_setup(int num, ...)
+
+ /* Run NUM_OF_RUNS times */
+ if (p->num_of_runs >= NUM_OF_RUNS)
+- return -1;
++ return END_OF_TESTS;
+
+ p->num_of_runs++;
+
+diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
+index 51e5cf22632f7..ab1d91328d67b 100644
+--- a/tools/testing/selftests/resctrl/fill_buf.c
++++ b/tools/testing/selftests/resctrl/fill_buf.c
+@@ -68,6 +68,8 @@ static void *malloc_and_init_memory(size_t s)
+ size_t s64;
+
+ void *p = memalign(PAGE_SIZE, s);
++ if (!p)
++ return NULL;
+
+ p64 = (uint64_t *)p;
+ s64 = s / sizeof(uint64_t);
+@@ -121,8 +123,10 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
+
+ /* Consume read result so that reading memory is not optimized out. */
+ fp = fopen("/dev/null", "w");
+- if (!fp)
++ if (!fp) {
+ perror("Unable to write to /dev/null");
++ return -1;
++ }
+ fprintf(fp, "Sum: %d ", ret);
+ fclose(fp);
+
+@@ -180,12 +184,13 @@ fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
+ else
+ ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
+
++ free(startptr);
++
+ if (ret) {
+ printf("\n Error in fill cache read/write...\n");
+ return -1;
+ }
+
+- free(startptr);
+
+ return 0;
+ }
+diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
+index 1a1bdb6180cf2..97dc98c0c9497 100644
+--- a/tools/testing/selftests/resctrl/mba_test.c
++++ b/tools/testing/selftests/resctrl/mba_test.c
+@@ -28,6 +28,7 @@ static int mba_setup(int num, ...)
+ struct resctrl_val_param *p;
+ char allocation_str[64];
+ va_list param;
++ int ret;
+
+ va_start(param, num);
+ p = va_arg(param, struct resctrl_val_param *);
+@@ -41,11 +42,15 @@ static int mba_setup(int num, ...)
+ return 0;
+
+ if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX)
+- return -1;
++ return END_OF_TESTS;
+
+ sprintf(allocation_str, "%d", allocation);
+
+- write_schemata(p->ctrlgrp, allocation_str, p->cpu_no, p->resctrl_val);
++ ret = write_schemata(p->ctrlgrp, allocation_str, p->cpu_no,
++ p->resctrl_val);
++ if (ret < 0)
++ return ret;
++
+ allocation -= ALLOCATION_STEP;
+
+ return 0;
+diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
+index 8392e5c55ed02..280187628054d 100644
+--- a/tools/testing/selftests/resctrl/mbm_test.c
++++ b/tools/testing/selftests/resctrl/mbm_test.c
+@@ -95,7 +95,7 @@ static int mbm_setup(int num, ...)
+
+ /* Run NUM_OF_RUNS times */
+ if (num_of_runs++ >= NUM_OF_RUNS)
+- return -1;
++ return END_OF_TESTS;
+
+ va_start(param, num);
+ p = va_arg(param, struct resctrl_val_param *);
+diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
+index 1ad10c47e31d1..dbe5cfb545585 100644
+--- a/tools/testing/selftests/resctrl/resctrl.h
++++ b/tools/testing/selftests/resctrl/resctrl.h
+@@ -34,10 +34,16 @@
+ #define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON"
+ #define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features"
+
++#define ARCH_INTEL 1
++#define ARCH_AMD 2
++
++#define END_OF_TESTS 1
++
+ #define PARENT_EXIT(err_msg) \
+ do { \
+ perror(err_msg); \
+ kill(ppid, SIGKILL); \
++ umount_resctrlfs(); \
+ exit(EXIT_FAILURE); \
+ } while (0)
+
+@@ -75,8 +81,8 @@ struct resctrl_val_param {
+ extern pid_t bm_pid, ppid;
+
+ extern char llc_occup_path[1024];
+-extern bool is_amd;
+
++int get_vendor(void);
+ bool check_resctrlfs_support(void);
+ int filter_dmesg(void);
+ int remount_resctrlfs(bool mum_resctrlfs);
+diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
+index 973f09a66e1ee..3e7cdf1125df4 100644
+--- a/tools/testing/selftests/resctrl/resctrl_tests.c
++++ b/tools/testing/selftests/resctrl/resctrl_tests.c
+@@ -13,25 +13,41 @@
+ #define BENCHMARK_ARGS 64
+ #define BENCHMARK_ARG_SIZE 64
+
+-bool is_amd;
+-
+-void detect_amd(void)
++static int detect_vendor(void)
+ {
+ FILE *inf = fopen("/proc/cpuinfo", "r");
++ int vendor_id = 0;
++ char *s = NULL;
+ char *res;
+
+ if (!inf)
+- return;
++ return vendor_id;
+
+ res = fgrep(inf, "vendor_id");
+
+- if (res) {
+- char *s = strchr(res, ':');
++ if (res)
++ s = strchr(res, ':');
++
++ if (s && !strcmp(s, ": GenuineIntel\n"))
++ vendor_id = ARCH_INTEL;
++ else if (s && !strcmp(s, ": AuthenticAMD\n"))
++ vendor_id = ARCH_AMD;
+
+- is_amd = s && !strcmp(s, ": AuthenticAMD\n");
+- free(res);
+- }
+ fclose(inf);
++ free(res);
++ return vendor_id;
++}
++
++int get_vendor(void)
++{
++ static int vendor = -1;
++
++ if (vendor == -1)
++ vendor = detect_vendor();
++ if (vendor == 0)
++ ksft_print_msg("Can not get vendor info...\n");
++
++ return vendor;
+ }
+
+ static void cmd_help(void)
+@@ -207,9 +223,6 @@ int main(int argc, char **argv)
+ if (geteuid() != 0)
+ return ksft_exit_fail_msg("Not running as root, abort testing.\n");
+
+- /* Detect AMD vendor */
+- detect_amd();
+-
+ if (has_ben) {
+ /* Extract benchmark command from command line. */
+ for (i = ben_ind; i < argc; i++) {
+@@ -241,10 +254,10 @@ int main(int argc, char **argv)
+
+ ksft_set_plan(tests ? : 4);
+
+- if (!is_amd && mbm_test)
++ if ((get_vendor() == ARCH_INTEL) && mbm_test)
+ run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
+
+- if (!is_amd && mba_test)
++ if ((get_vendor() == ARCH_INTEL) && mba_test)
+ run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
+
+ if (cmt_test)
+diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
+index 95224345c78e7..02110e7ee6361 100644
+--- a/tools/testing/selftests/resctrl/resctrl_val.c
++++ b/tools/testing/selftests/resctrl/resctrl_val.c
+@@ -733,29 +733,24 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
+
+ /* Test runs until the callback setup() tells the test to stop. */
+ while (1) {
++ ret = param->setup(1, param);
++ if (ret == END_OF_TESTS) {
++ ret = 0;
++ break;
++ }
++ if (ret < 0)
++ break;
++
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+ !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
+- ret = param->setup(1, param);
+- if (ret) {
+- ret = 0;
+- break;
+- }
+-
+ ret = measure_vals(param, &bw_resc_start);
+ if (ret)
+ break;
+ } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
+- ret = param->setup(1, param);
+- if (ret) {
+- ret = 0;
+- break;
+- }
+ sleep(1);
+ ret = measure_cache_vals(param, bm_pid);
+ if (ret)
+ break;
+- } else {
+- break;
+ }
+ }
+
+diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
+index 5f5a166ade60a..6f543e470ad4a 100644
+--- a/tools/testing/selftests/resctrl/resctrlfs.c
++++ b/tools/testing/selftests/resctrl/resctrlfs.c
+@@ -106,7 +106,7 @@ int get_resource_id(int cpu_no, int *resource_id)
+ char phys_pkg_path[1024];
+ FILE *fp;
+
+- if (is_amd)
++ if (get_vendor() == ARCH_AMD)
+ sprintf(phys_pkg_path, "%s%d/cache/index3/id",
+ PHYS_ID_PATH, cpu_no);
+ else
+diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
+index 2af9d39a97168..82ceca6aab965 100644
+--- a/tools/testing/selftests/rseq/Makefile
++++ b/tools/testing/selftests/rseq/Makefile
+@@ -4,9 +4,11 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+ CLANG_FLAGS += -no-integrated-as
+ endif
+
++top_srcdir = ../../../..
++
+ CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \
+- $(CLANG_FLAGS)
+-LDLIBS += -lpthread
++ $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
++LDLIBS += -lpthread -ldl
+
+ # Own dependencies because we only want to build against 1st prerequisite, but
+ # still track changes to header files and depend on shared object.
+diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+index eb3f6db36d369..517756afc2a4e 100644
+--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
++++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+@@ -9,10 +9,9 @@
+ #include <string.h>
+ #include <stddef.h>
+
++#include "../kselftest.h"
+ #include "rseq.h"
+
+-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+-
+ struct percpu_lock_entry {
+ intptr_t v;
+ } __attribute__((aligned(128)));
+@@ -168,7 +167,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+ for (;;) {
+ struct percpu_list_node *head;
+ intptr_t *targetptr, expectnot, *load;
+- off_t offset;
++ long offset;
+ int ret, cpu;
+
+ cpu = rseq_cpu_start();
+diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h
+new file mode 100644
+index 0000000000000..876eb6a7f75be
+--- /dev/null
++++ b/tools/testing/selftests/rseq/compiler.h
+@@ -0,0 +1,30 @@
++/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
++/*
++ * rseq/compiler.h
++ *
++ * Work-around asm goto compiler bugs.
++ *
++ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
++ */
++
++#ifndef RSEQ_COMPILER_H
++#define RSEQ_COMPILER_H
++
++/*
++ * gcc prior to 4.8.2 miscompiles asm goto.
++ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
++ *
++ * gcc prior to 8.1.0 miscompiles asm goto at O1.
++ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908
++ *
++ * clang prior to version 13.0.1 miscompiles asm goto at O2.
++ * https://github.com/llvm/llvm-project/issues/52735
++ *
++ * Work around these issues by adding a volatile inline asm with
++ * memory clobber in the fallthrough after the asm goto and at each
++ * label target. Emit this for all compilers in case other similar
++ * issues are found in the future.
++ */
++#define rseq_after_asm_goto() asm volatile ("" : : : "memory")
++
++#endif /* RSEQ_COMPILER_H_ */
+diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
+index 699ad5f93c34f..da23c22d58820 100644
+--- a/tools/testing/selftests/rseq/param_test.c
++++ b/tools/testing/selftests/rseq/param_test.c
+@@ -161,7 +161,7 @@ unsigned int yield_mod_cnt, nr_abort;
+ " cbnz " INJECT_ASM_REG ", 222b\n" \
+ "333:\n"
+
+-#elif __PPC__
++#elif defined(__PPC__)
+
+ #define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+@@ -368,9 +368,7 @@ void *test_percpu_spinlock_thread(void *arg)
+ abort();
+ reps = thread_data->reps;
+ for (i = 0; i < reps; i++) {
+- int cpu = rseq_cpu_start();
+-
+- cpu = rseq_this_cpu_lock(&data->lock);
++ int cpu = rseq_this_cpu_lock(&data->lock);
+ data->c[cpu].count++;
+ rseq_percpu_unlock(&data->lock, cpu);
+ #ifndef BENCHMARK
+@@ -551,7 +549,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+ for (;;) {
+ struct percpu_list_node *head;
+ intptr_t *targetptr, expectnot, *load;
+- off_t offset;
++ long offset;
+ int ret;
+
+ cpu = rseq_cpu_start();
+diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h
+new file mode 100644
+index 0000000000000..a8c44d9af71fb
+--- /dev/null
++++ b/tools/testing/selftests/rseq/rseq-abi.h
+@@ -0,0 +1,151 @@
++/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
++#ifndef _RSEQ_ABI_H
++#define _RSEQ_ABI_H
++
++/*
++ * rseq-abi.h
++ *
++ * Restartable sequences system call API
++ *
++ * Copyright (c) 2015-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
++ */
++
++#include <linux/types.h>
++#include <asm/byteorder.h>
++
++enum rseq_abi_cpu_id_state {
++ RSEQ_ABI_CPU_ID_UNINITIALIZED = -1,
++ RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2,
++};
++
++enum rseq_abi_flags {
++ RSEQ_ABI_FLAG_UNREGISTER = (1 << 0),
++};
++
++enum rseq_abi_cs_flags_bit {
++ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
++ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
++ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
++};
++
++enum rseq_abi_cs_flags {
++ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT =
++ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
++ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL =
++ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
++ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE =
++ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
++};
++
++/*
++ * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always
++ * contained within a single cache-line. It is usually declared as
++ * link-time constant data.
++ */
++struct rseq_abi_cs {
++ /* Version of this structure. */
++ __u32 version;
++ /* enum rseq_abi_cs_flags */
++ __u32 flags;
++ __u64 start_ip;
++ /* Offset from start_ip. */
++ __u64 post_commit_offset;
++ __u64 abort_ip;
++} __attribute__((aligned(4 * sizeof(__u64))));
++
++/*
++ * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always
++ * contained within a single cache-line.
++ *
++ * A single struct rseq_abi per thread is allowed.
++ */
++struct rseq_abi {
++ /*
++ * Restartable sequences cpu_id_start field. Updated by the
++ * kernel. Read by user-space with single-copy atomicity
++ * semantics. This field should only be read by the thread which
++ * registered this data structure. Aligned on 32-bit. Always
++ * contains a value in the range of possible CPUs, although the
++ * value may not be the actual current CPU (e.g. if rseq is not
++ * initialized). This CPU number value should always be compared
++ * against the value of the cpu_id field before performing a rseq
++ * commit or returning a value read from a data structure indexed
++ * using the cpu_id_start value.
++ */
++ __u32 cpu_id_start;
++ /*
++ * Restartable sequences cpu_id field. Updated by the kernel.
++ * Read by user-space with single-copy atomicity semantics. This
++ * field should only be read by the thread which registered this
++ * data structure. Aligned on 32-bit. Values
++ * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
++ * have a special semantic: the former means "rseq uninitialized",
++ * and latter means "rseq initialization failed". This value is
++ * meant to be read within rseq critical sections and compared
++ * with the cpu_id_start value previously read, before performing
++ * the commit instruction, or read and compared with the
++ * cpu_id_start value before returning a value loaded from a data
++ * structure indexed using the cpu_id_start value.
++ */
++ __u32 cpu_id;
++ /*
++ * Restartable sequences rseq_cs field.
++ *
++ * Contains NULL when no critical section is active for the current
++ * thread, or holds a pointer to the currently active struct rseq_cs.
++ *
++ * Updated by user-space, which sets the address of the currently
++ * active rseq_cs at the beginning of assembly instruction sequence
++ * block, and set to NULL by the kernel when it restarts an assembly
++ * instruction sequence block, as well as when the kernel detects that
++ * it is preempting or delivering a signal outside of the range
++ * targeted by the rseq_cs. Also needs to be set to NULL by user-space
++ * before reclaiming memory that contains the targeted struct rseq_cs.
++ *
++ * Read and set by the kernel. Set by user-space with single-copy
++ * atomicity semantics. This field should only be updated by the
++ * thread which registered this data structure. Aligned on 64-bit.
++ */
++ union {
++ __u64 ptr64;
++
++ /*
++ * The "arch" field provides architecture accessor for
++ * the ptr field based on architecture pointer size and
++ * endianness.
++ */
++ struct {
++#ifdef __LP64__
++ __u64 ptr;
++#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN)
++ __u32 padding; /* Initialized to zero. */
++ __u32 ptr;
++#else
++ __u32 ptr;
++ __u32 padding; /* Initialized to zero. */
++#endif
++ } arch;
++ } rseq_cs;
++
++ /*
++ * Restartable sequences flags field.
++ *
++ * This field should only be updated by the thread which
++ * registered this data structure. Read by the kernel.
++ * Mainly used for single-stepping through rseq critical sections
++ * with debuggers.
++ *
++ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT
++ * Inhibit instruction sequence block restart on preemption
++ * for this thread.
++ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL
++ * Inhibit instruction sequence block restart on signal
++ * delivery for this thread.
++ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE
++ * Inhibit instruction sequence block restart on migration for
++ * this thread.
++ */
++ __u32 flags;
++} __attribute__((aligned(4 * sizeof(__u64))));
++
++#endif /* _RSEQ_ABI_H */
+diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
+index 5943c816c07ce..893a11eca9d51 100644
+--- a/tools/testing/selftests/rseq/rseq-arm.h
++++ b/tools/testing/selftests/rseq/rseq-arm.h
+@@ -147,14 +147,11 @@ do { \
+ teardown \
+ "b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+-#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
+-
+ static inline __attribute__((always_inline))
+ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -185,8 +182,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+@@ -198,30 +195,31 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -255,8 +253,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+@@ -270,19 +268,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -292,7 +292,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ #ifdef RSEQ_COMPARE_TWICE
+@@ -316,8 +315,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+@@ -328,14 +327,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ , error1
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ #endif
+ }
+@@ -347,7 +347,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -381,8 +380,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -398,19 +397,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -422,7 +423,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -457,8 +457,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -474,19 +474,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -498,7 +500,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -537,8 +538,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+@@ -554,21 +555,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ , error1, error2, error3
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+ error3:
++ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+ #endif
+ }
+@@ -582,7 +586,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -657,8 +660,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -678,21 +681,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -706,7 +709,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -782,8 +784,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -803,21 +805,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
+- rseq_workaround_gcc_asm_size_guess();
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
+index 200dae9e4208c..cbe190a4d0056 100644
+--- a/tools/testing/selftests/rseq/rseq-arm64.h
++++ b/tools/testing/selftests/rseq/rseq-arm64.h
+@@ -230,8 +230,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+@@ -242,24 +242,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ , error1, error2
+ #endif
+ );
+-
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+@@ -287,8 +291,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [expectnot] "r" (expectnot),
+ [load] "Qo" (*load),
+@@ -300,16 +304,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -337,8 +346,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+@@ -348,12 +357,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ , error1
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ #endif
+ }
+@@ -388,8 +400,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+@@ -402,17 +414,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+-
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -447,8 +463,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+@@ -461,17 +477,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+-
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -508,8 +528,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [v2] "Qo" (*v2),
+@@ -522,19 +542,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ , error1, error2, error3
+ #endif
+ );
+-
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ error3:
++ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+ #endif
+ }
+@@ -569,8 +594,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+@@ -584,17 +609,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+-
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -629,8 +658,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+@@ -644,17 +673,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+-
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
+new file mode 100644
+index 0000000000000..38c5846615714
+--- /dev/null
++++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
+@@ -0,0 +1,25 @@
++/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
++/*
++ * rseq-generic-thread-pointer.h
++ *
++ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
++ */
++
++#ifndef _RSEQ_GENERIC_THREAD_POINTER
++#define _RSEQ_GENERIC_THREAD_POINTER
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/* Use gcc builtin thread pointer. */
++static inline void *rseq_thread_pointer(void)
++{
++ return __builtin_thread_pointer();
++}
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
+index e989e7c14b097..878739fae2fde 100644
+--- a/tools/testing/selftests/rseq/rseq-mips.h
++++ b/tools/testing/selftests/rseq/rseq-mips.h
+@@ -154,14 +154,11 @@ do { \
+ teardown \
+ "b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+-#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
+-
+ static inline __attribute__((always_inline))
+ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -190,8 +187,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+@@ -203,14 +200,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+@@ -222,11 +216,10 @@ error2:
+
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -258,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+@@ -273,14 +266,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+@@ -295,7 +285,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ #ifdef RSEQ_COMPARE_TWICE
+@@ -319,8 +308,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+@@ -331,10 +320,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ , error1
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ #ifdef RSEQ_COMPARE_TWICE
+@@ -350,7 +337,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -382,8 +368,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -399,14 +385,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+@@ -423,7 +406,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -456,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -473,14 +455,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+@@ -497,7 +476,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ {
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -532,8 +510,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+@@ -549,14 +527,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ , error1, error2, error3
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+@@ -577,7 +552,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -649,8 +623,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -670,21 +644,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+- rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+ error2:
+- rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -698,7 +667,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+
+ RSEQ_INJECT_C(9)
+
+- rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+@@ -771,8 +739,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -792,21 +760,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
+- rseq_workaround_gcc_asm_size_guess();
+ return 0;
+ abort:
+- rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
+- rseq_workaround_gcc_asm_size_guess();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
+- rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+ error2:
+- rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
+new file mode 100644
+index 0000000000000..263eee84fb760
+--- /dev/null
++++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
+@@ -0,0 +1,30 @@
++/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
++/*
++ * rseq-ppc-thread-pointer.h
++ *
++ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
++ */
++
++#ifndef _RSEQ_PPC_THREAD_POINTER
++#define _RSEQ_PPC_THREAD_POINTER
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++static inline void *rseq_thread_pointer(void)
++{
++#ifdef __powerpc64__
++ register void *__result asm ("r13");
++#else
++ register void *__result asm ("r2");
++#endif
++ asm ("" : "=r" (__result));
++ return __result;
++}
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
+index 76be90196fe4f..bab8e0b9fb115 100644
+--- a/tools/testing/selftests/rseq/rseq-ppc.h
++++ b/tools/testing/selftests/rseq/rseq-ppc.h
+@@ -47,10 +47,13 @@ do { \
+
+ #ifdef __PPC64__
+
+-#define STORE_WORD "std "
+-#define LOAD_WORD "ld "
+-#define LOADX_WORD "ldx "
+-#define CMP_WORD "cmpd "
++#define RSEQ_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
++#define RSEQ_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
++#define RSEQ_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
++#define RSEQ_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
++#define RSEQ_LOADX_LONG "ldx " /* From base register ("b" constraint) */
++#define RSEQ_CMP_LONG "cmpd "
++#define RSEQ_CMP_LONG_INT "cmpdi "
+
+ #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+@@ -89,10 +92,13 @@ do { \
+
+ #else /* #ifdef __PPC64__ */
+
+-#define STORE_WORD "stw "
+-#define LOAD_WORD "lwz "
+-#define LOADX_WORD "lwzx "
+-#define CMP_WORD "cmpw "
++#define RSEQ_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
++#define RSEQ_STORE_INT(arg) RSEQ_STORE_LONG(arg) /* To memory ("m" constraint) */
++#define RSEQ_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
++#define RSEQ_LOAD_INT(arg) RSEQ_LOAD_LONG(arg) /* From memory ("m" constraint) */
++#define RSEQ_LOADX_LONG "lwzx " /* From base register ("b" constraint) */
++#define RSEQ_CMP_LONG "cmpw "
++#define RSEQ_CMP_LONG_INT "cmpwi "
+
+ #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+@@ -125,7 +131,7 @@ do { \
+ RSEQ_INJECT_ASM(1) \
+ "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \
+ "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
+- "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
++ RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+ #endif /* #ifdef __PPC64__ */
+@@ -136,7 +142,7 @@ do { \
+
+ #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+- "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
++ RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \
+ "bne- cr7, " __rseq_str(label) "\n\t"
+
+@@ -153,25 +159,25 @@ do { \
+ * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
+ */
+ #define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+- CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
++ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
++ RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
+ "bne- cr7, " __rseq_str(label) "\n\t"
+
+ #define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \
+- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+- CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
++ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
++ RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
+ "beq- cr7, " __rseq_str(label) "\n\t"
+
+ #define RSEQ_ASM_OP_STORE(value, var) \
+- STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
++ RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+
+ /* Load @var to r17 */
+ #define RSEQ_ASM_OP_R_LOAD(var) \
+- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
++ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
+
+ /* Store r17 to @var */
+ #define RSEQ_ASM_OP_R_STORE(var) \
+- STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
++ RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
+
+ /* Add @count to r17 */
+ #define RSEQ_ASM_OP_R_ADD(count) \
+@@ -179,11 +185,11 @@ do { \
+
+ /* Load (r17 + voffp) to r17 */
+ #define RSEQ_ASM_OP_R_LOADX(voffp) \
+- LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
++ RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+
+ /* TODO: implement a faster memcpy. */
+ #define RSEQ_ASM_OP_R_MEMCPY() \
+- "cmpdi %%r19, 0\n\t" \
++ RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
+ "beq 333f\n\t" \
+ "addi %%r20, %%r20, -1\n\t" \
+ "addi %%r21, %%r21, -1\n\t" \
+@@ -191,16 +197,16 @@ do { \
+ "lbzu %%r18, 1(%%r20)\n\t" \
+ "stbu %%r18, 1(%%r21)\n\t" \
+ "addi %%r19, %%r19, -1\n\t" \
+- "cmpdi %%r19, 0\n\t" \
++ RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+
+ #define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+- STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
++ RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
+ __rseq_str(post_commit_label) ":\n\t"
+
+ #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
+- STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
++ RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+ __rseq_str(post_commit_label) ":\n\t"
+
+ static inline __attribute__((always_inline))
+@@ -235,8 +241,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+@@ -248,23 +254,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+@@ -301,8 +312,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+@@ -316,16 +327,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -359,8 +375,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+@@ -372,12 +388,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ , error1
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ #endif
+ }
+@@ -419,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -436,16 +455,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -489,8 +513,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -506,16 +530,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -560,8 +589,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+@@ -577,18 +606,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ , error1, error2, error3
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+ error3:
++ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+ #endif
+ }
+@@ -635,8 +670,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -653,16 +688,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -711,8 +751,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -729,23 +769,23 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+
+-#undef STORE_WORD
+-#undef LOAD_WORD
+-#undef LOADX_WORD
+-#undef CMP_WORD
+-
+ #endif /* !RSEQ_SKIP_FASTPATH */
+diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
+index 8ef94ad1cbb45..4e6dc5f0cb429 100644
+--- a/tools/testing/selftests/rseq/rseq-s390.h
++++ b/tools/testing/selftests/rseq/rseq-s390.h
+@@ -165,8 +165,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+@@ -178,16 +178,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -198,7 +203,7 @@ error2:
+ */
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+@@ -233,8 +238,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+@@ -248,16 +253,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -288,8 +298,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+@@ -301,12 +311,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ , error1
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ #endif
+ }
+@@ -347,8 +360,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -364,16 +377,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -426,8 +444,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+@@ -443,18 +461,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ , error1, error2, error3
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+ error3:
++ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+ #endif
+ }
+@@ -534,8 +558,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ #endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [current_cpu_id] "m" (__rseq_abi.cpu_id),
+- [rseq_cs] "m" (__rseq_abi.rseq_cs),
++ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
++ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -555,16 +579,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
+index 72750b5905a96..7b53dac1fcdd9 100644
+--- a/tools/testing/selftests/rseq/rseq-skip.h
++++ b/tools/testing/selftests/rseq/rseq-skip.h
+@@ -13,7 +13,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ return -1;
+ }
+diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h
+new file mode 100644
+index 0000000000000..977c25d758b2a
+--- /dev/null
++++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h
+@@ -0,0 +1,19 @@
++/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
++/*
++ * rseq-thread-pointer.h
++ *
++ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
++ */
++
++#ifndef _RSEQ_THREAD_POINTER
++#define _RSEQ_THREAD_POINTER
++
++#if defined(__x86_64__) || defined(__i386__)
++#include "rseq-x86-thread-pointer.h"
++#elif defined(__PPC__)
++#include "rseq-ppc-thread-pointer.h"
++#else
++#include "rseq-generic-thread-pointer.h"
++#endif
++
++#endif
+diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
+new file mode 100644
+index 0000000000000..d3133587d9968
+--- /dev/null
++++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
+@@ -0,0 +1,40 @@
++/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
++/*
++ * rseq-x86-thread-pointer.h
++ *
++ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
++ */
++
++#ifndef _RSEQ_X86_THREAD_POINTER
++#define _RSEQ_X86_THREAD_POINTER
++
++#include <features.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#if __GNUC_PREREQ (11, 1)
++static inline void *rseq_thread_pointer(void)
++{
++ return __builtin_thread_pointer();
++}
++#else
++static inline void *rseq_thread_pointer(void)
++{
++ void *__result;
++
++# ifdef __x86_64__
++ __asm__ ("mov %%fs:0, %0" : "=r" (__result));
++# else
++ __asm__ ("mov %%gs:0, %0" : "=r" (__result));
++# endif
++ return __result;
++}
++#endif /* !GCC 11 */
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
+index 640411518e466..bd01dc41ca130 100644
+--- a/tools/testing/selftests/rseq/rseq-x86.h
++++ b/tools/testing/selftests/rseq/rseq-x86.h
+@@ -28,6 +28,8 @@
+
+ #ifdef __x86_64__
+
++#define RSEQ_ASM_TP_SEGMENT %%fs
++
+ #define rseq_smp_mb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
+ #define rseq_smp_rmb() rseq_barrier()
+@@ -123,14 +125,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ #endif
+@@ -141,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+@@ -152,16 +154,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -172,7 +179,7 @@ error2:
+ */
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+@@ -184,15 +191,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+@@ -207,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+@@ -220,16 +227,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -245,11 +257,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ #endif
+ /* final store */
+ "addq %[count], %[v]\n\t"
+@@ -258,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "er" (count)
+@@ -269,12 +281,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ , error1
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ #endif
+ }
+@@ -286,7 +301,7 @@ error1:
+ * *pval += inc;
+ */
+ static inline __attribute__((always_inline))
+-int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
++int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+@@ -296,11 +311,11 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ #endif
+ /* get p+v */
+ "movq %[ptr], %%rbx\n\t"
+@@ -314,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [ptr] "m" (*ptr),
+ [off] "er" (off),
+@@ -351,14 +366,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ #endif
+@@ -372,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -387,16 +402,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -426,8 +446,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+@@ -436,7 +456,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpq %[v2], %[expect2]\n\t"
+@@ -449,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+@@ -464,18 +484,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ , error1, error2, error3
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+ error3:
++ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+ #endif
+ }
+@@ -500,14 +526,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ "movq %[dst], %[rseq_scratch1]\n\t"
+ "movq %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 7f\n\t"
+ #endif
+@@ -555,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ #endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+@@ -574,16 +600,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -600,7 +631,9 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+
+ #endif /* !RSEQ_SKIP_FASTPATH */
+
+-#elif __i386__
++#elif defined(__i386__)
++
++#define RSEQ_ASM_TP_SEGMENT %%gs
+
+ #define rseq_smp_mb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+@@ -701,14 +734,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ #endif
+@@ -719,7 +752,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+@@ -730,16 +763,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -750,7 +788,7 @@ error2:
+ */
+ static inline __attribute__((always_inline))
+ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+- off_t voffp, intptr_t *load, int cpu)
++ long voffp, intptr_t *load, int cpu)
+ {
+ RSEQ_INJECT_C(9)
+
+@@ -762,15 +800,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+@@ -785,7 +823,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+@@ -798,16 +836,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -823,11 +866,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ #endif
+ /* final store */
+ "addl %[count], %[v]\n\t"
+@@ -836,7 +879,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "ir" (count)
+@@ -847,12 +890,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+ , error1
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ #endif
+ }
+@@ -872,14 +918,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ #endif
+@@ -894,7 +940,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "m" (newv2),
+@@ -909,16 +955,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -938,15 +989,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[error2]\n\t"
+@@ -962,7 +1013,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+@@ -977,16 +1028,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+
+@@ -1008,8 +1064,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+ #endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+@@ -1018,7 +1074,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpl %[expect2], %[v2]\n\t"
+@@ -1032,7 +1088,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+@@ -1047,18 +1103,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ , error1, error2, error3
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+ error3:
++ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+ #endif
+ }
+@@ -1084,15 +1146,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ "movl %[dst], %[rseq_scratch1]\n\t"
+ "movl %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 7f\n\t"
+@@ -1142,7 +1204,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ #endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+@@ -1161,16 +1223,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+@@ -1196,15 +1263,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ "movl %[dst], %[rseq_scratch1]\n\t"
+ "movl %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
++ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+ #ifdef RSEQ_COMPARE_TWICE
+- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
++ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 7f\n\t"
+@@ -1255,7 +1322,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ #endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+- [rseq_abi] "r" (&__rseq_abi),
++ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+@@ -1274,16 +1341,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ , error1, error2
+ #endif
+ );
++ rseq_after_asm_goto();
+ return 0;
+ abort:
++ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+ cmpfail:
++ rseq_after_asm_goto();
+ return 1;
+ #ifdef RSEQ_COMPARE_TWICE
+ error1:
++ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+ error2:
++ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+ #endif
+ }
+diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
+index 7159eb777fd34..e20191fb40d49 100644
+--- a/tools/testing/selftests/rseq/rseq.c
++++ b/tools/testing/selftests/rseq/rseq.c
+@@ -26,131 +26,143 @@
+ #include <assert.h>
+ #include <signal.h>
+ #include <limits.h>
++#include <dlfcn.h>
++#include <stddef.h>
+
+-#include "rseq.h"
+-
+-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
++#include <linux/compiler.h>
+
+-__thread volatile struct rseq __rseq_abi = {
+- .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+-};
++#include "../kselftest.h"
++#include "rseq.h"
+
+ /*
+- * Shared with other libraries. This library may take rseq ownership if it is
+- * still 0 when executing the library constructor. Set to 1 by library
+- * constructor when handling rseq. Set to 0 in destructor if handling rseq.
++ * Define weak versions to play nice with binaries that are statically linked
++ * against a libc that doesn't support registering its own rseq.
+ */
+-int __rseq_handled;
++__weak ptrdiff_t __rseq_offset;
++__weak unsigned int __rseq_size;
++__weak unsigned int __rseq_flags;
+
+-/* Whether this library have ownership of rseq registration. */
+-static int rseq_ownership;
++static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
++static const unsigned int *libc_rseq_size_p = &__rseq_size;
++static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
+
+-static __thread volatile uint32_t __rseq_refcount;
++/* Offset from the thread pointer to the rseq area. */
++ptrdiff_t rseq_offset;
+
+-static void signal_off_save(sigset_t *oldset)
+-{
+- sigset_t set;
+- int ret;
++/* Size of the registered rseq area. 0 if the registration was
++ unsuccessful. */
++unsigned int rseq_size = -1U;
+
+- sigfillset(&set);
+- ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
+- if (ret)
+- abort();
+-}
++/* Flags used during rseq registration. */
++unsigned int rseq_flags;
+
+-static void signal_restore(sigset_t oldset)
+-{
+- int ret;
++static int rseq_ownership;
+
+- ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+- if (ret)
+- abort();
+-}
++static
++__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = {
++ .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
++};
+
+-static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
++static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
+ int flags, uint32_t sig)
+ {
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+ }
+
+-int rseq_register_current_thread(void)
++int rseq_available(void)
+ {
+- int rc, ret = 0;
+- sigset_t oldset;
++ int rc;
+
+- if (!rseq_ownership)
++ rc = sys_rseq(NULL, 0, 0, 0);
++ if (rc != -1)
++ abort();
++ switch (errno) {
++ case ENOSYS:
+ return 0;
+- signal_off_save(&oldset);
+- if (__rseq_refcount == UINT_MAX) {
+- ret = -1;
+- goto end;
+- }
+- if (__rseq_refcount++)
+- goto end;
+- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
+- if (!rc) {
+- assert(rseq_current_cpu_raw() >= 0);
+- goto end;
++ case EINVAL:
++ return 1;
++ default:
++ abort();
+ }
+- if (errno != EBUSY)
+- __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
+- ret = -1;
+- __rseq_refcount--;
+-end:
+- signal_restore(oldset);
+- return ret;
+ }
+
+-int rseq_unregister_current_thread(void)
++int rseq_register_current_thread(void)
+ {
+- int rc, ret = 0;
+- sigset_t oldset;
++ int rc;
+
+- if (!rseq_ownership)
++ if (!rseq_ownership) {
++ /* Treat libc's ownership as a successful registration. */
+ return 0;
+- signal_off_save(&oldset);
+- if (!__rseq_refcount) {
+- ret = -1;
+- goto end;
+ }
+- if (--__rseq_refcount)
+- goto end;
+- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
+- RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+- if (!rc)
+- goto end;
+- __rseq_refcount = 1;
+- ret = -1;
+-end:
+- signal_restore(oldset);
+- return ret;
++ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG);
++ if (rc)
++ return -1;
++ assert(rseq_current_cpu_raw() >= 0);
++ return 0;
+ }
+
+-int32_t rseq_fallback_current_cpu(void)
++int rseq_unregister_current_thread(void)
+ {
+- int32_t cpu;
++ int rc;
+
+- cpu = sched_getcpu();
+- if (cpu < 0) {
+- perror("sched_getcpu()");
+- abort();
++ if (!rseq_ownership) {
++ /* Treat libc's ownership as a successful unregistration. */
++ return 0;
+ }
+- return cpu;
++ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
++ if (rc)
++ return -1;
++ return 0;
+ }
+
+-void __attribute__((constructor)) rseq_init(void)
++static __attribute__((constructor))
++void rseq_init(void)
+ {
+- /* Check whether rseq is handled by another library. */
+- if (__rseq_handled)
++ /*
++ * If the libc's registered rseq size isn't already valid, it may be
++ * because the binary is dynamically linked and not necessarily due to
++ * libc not having registered a restartable sequence. Try to find the
++ * symbols if that's the case.
++ */
++ if (!*libc_rseq_size_p) {
++ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
++ libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
++ libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
++ }
++ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
++ *libc_rseq_size_p != 0) {
++ /* rseq registration owned by glibc */
++ rseq_offset = *libc_rseq_offset_p;
++ rseq_size = *libc_rseq_size_p;
++ rseq_flags = *libc_rseq_flags_p;
++ return;
++ }
++ if (!rseq_available())
+ return;
+- __rseq_handled = 1;
+ rseq_ownership = 1;
++ rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
++ rseq_size = sizeof(struct rseq_abi);
++ rseq_flags = 0;
+ }
+
+-void __attribute__((destructor)) rseq_fini(void)
++static __attribute__((destructor))
++void rseq_exit(void)
+ {
+ if (!rseq_ownership)
+ return;
+- __rseq_handled = 0;
++ rseq_offset = 0;
++ rseq_size = -1U;
+ rseq_ownership = 0;
+ }
++
++int32_t rseq_fallback_current_cpu(void)
++{
++ int32_t cpu;
++
++ cpu = sched_getcpu();
++ if (cpu < 0) {
++ perror("sched_getcpu()");
++ abort();
++ }
++ return cpu;
++}
+diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
+index 3f63eb362b92f..9d850b290c2e6 100644
+--- a/tools/testing/selftests/rseq/rseq.h
++++ b/tools/testing/selftests/rseq/rseq.h
+@@ -16,7 +16,9 @@
+ #include <errno.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+-#include <linux/rseq.h>
++#include <stddef.h>
++#include "rseq-abi.h"
++#include "compiler.h"
+
+ /*
+ * Empty code injection macros, override when testing.
+@@ -43,8 +45,20 @@
+ #define RSEQ_INJECT_FAILED
+ #endif
+
+-extern __thread volatile struct rseq __rseq_abi;
+-extern int __rseq_handled;
++#include "rseq-thread-pointer.h"
++
++/* Offset from the thread pointer to the rseq area. */
++extern ptrdiff_t rseq_offset;
++/* Size of the registered rseq area. 0 if the registration was
++ unsuccessful. */
++extern unsigned int rseq_size;
++/* Flags used during rseq registration. */
++extern unsigned int rseq_flags;
++
++static inline struct rseq_abi *rseq_get_abi(void)
++{
++ return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset);
++}
+
+ #define rseq_likely(x) __builtin_expect(!!(x), 1)
+ #define rseq_unlikely(x) __builtin_expect(!!(x), 0)
+@@ -108,7 +122,7 @@ int32_t rseq_fallback_current_cpu(void);
+ */
+ static inline int32_t rseq_current_cpu_raw(void)
+ {
+- return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
++ return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id);
+ }
+
+ /*
+@@ -124,7 +138,7 @@ static inline int32_t rseq_current_cpu_raw(void)
+ */
+ static inline uint32_t rseq_cpu_start(void)
+ {
+- return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
++ return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start);
+ }
+
+ static inline uint32_t rseq_current_cpu(void)
+@@ -139,11 +153,7 @@ static inline uint32_t rseq_current_cpu(void)
+
+ static inline void rseq_clear_rseq_cs(void)
+ {
+-#ifdef __LP64__
+- __rseq_abi.rseq_cs.ptr = 0;
+-#else
+- __rseq_abi.rseq_cs.ptr.ptr32 = 0;
+-#endif
++ RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0);
+ }
+
+ /*
+diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
+index ba4d85f74cd6b..a953c96aa16e1 100644
+--- a/tools/testing/selftests/rtc/settings
++++ b/tools/testing/selftests/rtc/settings
+@@ -1 +1 @@
+-timeout=90
++timeout=180
+diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
+index 7db9cf822dc75..8109b17dc764c 100644
+--- a/tools/testing/selftests/sched/cs_prctl_test.c
++++ b/tools/testing/selftests/sched/cs_prctl_test.c
+@@ -62,6 +62,17 @@ enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID};
+
+ const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES;
+
++struct child_args {
++ int num_threads;
++ int pfd[2];
++ int cpid;
++ int thr_tids[MAX_THREADS];
++};
++
++static struct child_args procs[MAX_PROCESSES];
++static int num_processes = 2;
++static int need_cleanup = 0;
++
+ static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+ {
+@@ -78,8 +89,14 @@ static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned l
+ #define handle_error(msg) __handle_error(__FILE__, __LINE__, msg)
+ static void __handle_error(char *fn, int ln, char *msg)
+ {
++ int pidx;
+ printf("(%s:%d) - ", fn, ln);
+ perror(msg);
++ if (need_cleanup) {
++ for (pidx = 0; pidx < num_processes; ++pidx)
++ kill(procs[pidx].cpid, 15);
++ need_cleanup = 0;
++ }
+ exit(EXIT_FAILURE);
+ }
+
+@@ -106,13 +123,6 @@ static unsigned long get_cs_cookie(int pid)
+ return cookie;
+ }
+
+-struct child_args {
+- int num_threads;
+- int pfd[2];
+- int cpid;
+- int thr_tids[MAX_THREADS];
+-};
+-
+ static int child_func_thread(void __attribute__((unused))*arg)
+ {
+ while (1)
+@@ -212,10 +222,7 @@ void _validate(int line, int val, char *msg)
+
+ int main(int argc, char *argv[])
+ {
+- struct child_args procs[MAX_PROCESSES];
+-
+ int keypress = 0;
+- int num_processes = 2;
+ int num_threads = 3;
+ int delay = 0;
+ int res = 0;
+@@ -262,6 +269,7 @@ int main(int argc, char *argv[])
+
+ printf("\n## Create a thread/process/process group hiearchy\n");
+ create_processes(num_processes, num_threads, procs);
++ need_cleanup = 1;
+ disp_processes(num_processes, procs);
+ validate(get_cs_cookie(0) == 0);
+
+diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
+index 0ebfe8b0e147f..585f7a0c10cbe 100644
+--- a/tools/testing/selftests/seccomp/Makefile
++++ b/tools/testing/selftests/seccomp/Makefile
+@@ -1,5 +1,5 @@
+ # SPDX-License-Identifier: GPL-2.0
+-CFLAGS += -Wl,-no-as-needed -Wall
++CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/
+ LDFLAGS += -lpthread
+
+ TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
+diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
+index 1d64891e64923..ac340a9c09187 100644
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
+@@ -802,7 +802,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata,
+ .len = (unsigned short)ARRAY_SIZE(filter_thread),
+ .filter = filter_thread,
+ };
+- int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
++ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
+ struct sock_filter filter_process[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+@@ -955,7 +955,7 @@ TEST(ERRNO_valid)
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+- EXPECT_EQ(-1, read(0, NULL, 0));
++ EXPECT_EQ(-1, read(-1, NULL, 0));
+ EXPECT_EQ(E2BIG, errno);
+ }
+
+@@ -974,7 +974,7 @@ TEST(ERRNO_zero)
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* "errno" of 0 is ok. */
+- EXPECT_EQ(0, read(0, NULL, 0));
++ EXPECT_EQ(0, read(-1, NULL, 0));
+ }
+
+ /*
+@@ -995,7 +995,7 @@ TEST(ERRNO_capped)
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+- EXPECT_EQ(-1, read(0, NULL, 0));
++ EXPECT_EQ(-1, read(-1, NULL, 0));
+ EXPECT_EQ(4095, errno);
+ }
+
+@@ -1026,7 +1026,7 @@ TEST(ERRNO_order)
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+- EXPECT_EQ(-1, read(0, NULL, 0));
++ EXPECT_EQ(-1, read(-1, NULL, 0));
+ EXPECT_EQ(12, errno);
+ }
+
+@@ -2579,7 +2579,7 @@ void *tsync_sibling(void *data)
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ if (!ret)
+ return (void *)SIBLING_EXIT_NEWPRIVS;
+- read(0, NULL, 0);
++ read(-1, NULL, 0);
+ return (void *)SIBLING_EXIT_UNKILLED;
+ }
+
+diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
+index 7f12d55b97f86..472b27ccd7dcb 100644
+--- a/tools/testing/selftests/sgx/Makefile
++++ b/tools/testing/selftests/sgx/Makefile
+@@ -4,7 +4,7 @@ include ../lib.mk
+
+ .PHONY: all clean
+
+-CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
++CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
+ ../x86/trivial_64bit_program.c)
+
+ ifndef OBJCOPY
+diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
+new file mode 100644
+index 0000000000000..ea9bdf3a90b16
+--- /dev/null
++++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
+@@ -0,0 +1,23 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#if __alpha__
++register unsigned long sp asm("$30");
++#elif __arm__ || __aarch64__ || __csky__ || __m68k__ || __mips__ || __riscv
++register unsigned long sp asm("sp");
++#elif __i386__
++register unsigned long sp asm("esp");
++#elif __loongarch64
++register unsigned long sp asm("$sp");
++#elif __ppc__
++register unsigned long sp asm("r1");
++#elif __s390x__
++register unsigned long sp asm("%15");
++#elif __sh__
++register unsigned long sp asm("r15");
++#elif __x86_64__
++register unsigned long sp asm("rsp");
++#elif __XTENSA__
++register unsigned long sp asm("a1");
++#else
++#error "implement current_stack_pointer equivalent"
++#endif
+diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
+index c53b070755b65..98d37cb744fb2 100644
+--- a/tools/testing/selftests/sigaltstack/sas.c
++++ b/tools/testing/selftests/sigaltstack/sas.c
+@@ -20,6 +20,7 @@
+ #include <sys/auxv.h>
+
+ #include "../kselftest.h"
++#include "current_stack_pointer.h"
+
+ #ifndef SS_AUTODISARM
+ #define SS_AUTODISARM (1U << 31)
+@@ -46,12 +47,6 @@ void my_usr1(int sig, siginfo_t *si, void *u)
+ stack_t stk;
+ struct stk_data *p;
+
+-#if __s390x__
+- register unsigned long sp asm("%15");
+-#else
+- register unsigned long sp asm("sp");
+-#endif
+-
+ if (sp < (unsigned long)sstack ||
+ sp >= (unsigned long)sstack + stack_size) {
+ ksft_exit_fail_msg("SP is not on sigaltstack\n");
+diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
+index b71828df5a6dd..db1dcc139d34c 100644
+--- a/tools/testing/selftests/tc-testing/config
++++ b/tools/testing/selftests/tc-testing/config
+@@ -5,6 +5,8 @@ CONFIG_NF_CONNTRACK=m
+ CONFIG_NF_CONNTRACK_MARK=y
+ CONFIG_NF_CONNTRACK_ZONES=y
+ CONFIG_NF_CONNTRACK_LABELS=y
++CONFIG_NF_CONNTRACK_PROCFS=y
++CONFIG_NF_FLOW_TABLE=m
+ CONFIG_NF_NAT=m
+
+ CONFIG_NET_SCHED=y
+diff --git a/tools/testing/selftests/tc-testing/settings b/tools/testing/selftests/tc-testing/settings
+new file mode 100644
+index 0000000000000..e2206265f67c7
+--- /dev/null
++++ b/tools/testing/selftests/tc-testing/settings
+@@ -0,0 +1 @@
++timeout=900
+diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
+index ef8eb3604595e..b57f0a9be4902 100644
+--- a/tools/testing/selftests/timers/clocksource-switch.c
++++ b/tools/testing/selftests/timers/clocksource-switch.c
+@@ -110,10 +110,10 @@ int run_tests(int secs)
+
+ sprintf(buf, "./inconsistency-check -t %i", secs);
+ ret = system(buf);
+- if (ret)
+- return ret;
++ if (WIFEXITED(ret) && WEXITSTATUS(ret))
++ return WEXITSTATUS(ret);
+ ret = system("./nanosleep");
+- return ret;
++ return WIFEXITED(ret) ? WEXITSTATUS(ret) : 0;
+ }
+
+
+diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
+index 5397de708d3c2..48b9a803235a8 100644
+--- a/tools/testing/selftests/timers/valid-adjtimex.c
++++ b/tools/testing/selftests/timers/valid-adjtimex.c
+@@ -40,7 +40,7 @@
+ #define ADJ_SETOFFSET 0x0100
+
+ #include <sys/syscall.h>
+-static int clock_adjtime(clockid_t id, struct timex *tx)
++int clock_adjtime(clockid_t id, struct timex *tx)
+ {
+ return syscall(__NR_clock_adjtime, id, tx);
+ }
+diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
+index f34486cd7342d..3e67fdb518ec3 100644
+--- a/tools/testing/selftests/tpm2/tpm2.py
++++ b/tools/testing/selftests/tpm2/tpm2.py
+@@ -370,6 +370,10 @@ class Client:
+ fcntl.fcntl(self.tpm, fcntl.F_SETFL, flags)
+ self.tpm_poll = select.poll()
+
++ def __del__(self):
++ if self.tpm:
++ self.tpm.close()
++
+ def close(self):
+ self.tpm.close()
+
+diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
+index 3d603f1394af4..883ca85424bc5 100644
+--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
++++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
+@@ -33,110 +33,114 @@ typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
+ typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
+ typedef time_t (*vdso_time_t)(time_t *t);
+
+-static int vdso_test_gettimeofday(void)
++#define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__
++#define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x
++#define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x
++
++static void vdso_test_gettimeofday(void)
+ {
+ /* Find gettimeofday. */
+ vdso_gettimeofday_t vdso_gettimeofday =
+ (vdso_gettimeofday_t)vdso_sym(version, name[0]);
+
+ if (!vdso_gettimeofday) {
+- printf("Could not find %s\n", name[0]);
+- return KSFT_SKIP;
++ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[0]));
++ return;
+ }
+
+ struct timeval tv;
+ long ret = vdso_gettimeofday(&tv, 0);
+
+ if (ret == 0) {
+- printf("The time is %lld.%06lld\n",
+- (long long)tv.tv_sec, (long long)tv.tv_usec);
++ ksft_print_msg("The time is %lld.%06lld\n",
++ (long long)tv.tv_sec, (long long)tv.tv_usec);
++ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
+ } else {
+- printf("%s failed\n", name[0]);
+- return KSFT_FAIL;
++ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[0]));
+ }
+-
+- return KSFT_PASS;
+ }
+
+-static int vdso_test_clock_gettime(clockid_t clk_id)
++static void vdso_test_clock_gettime(clockid_t clk_id)
+ {
+ /* Find clock_gettime. */
+ vdso_clock_gettime_t vdso_clock_gettime =
+ (vdso_clock_gettime_t)vdso_sym(version, name[1]);
+
+ if (!vdso_clock_gettime) {
+- printf("Could not find %s\n", name[1]);
+- return KSFT_SKIP;
++ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[1]));
++ return;
+ }
+
+ struct timespec ts;
+ long ret = vdso_clock_gettime(clk_id, &ts);
+
+ if (ret == 0) {
+- printf("The time is %lld.%06lld\n",
+- (long long)ts.tv_sec, (long long)ts.tv_nsec);
++ ksft_print_msg("The time is %lld.%06lld\n",
++ (long long)ts.tv_sec, (long long)ts.tv_nsec);
++ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
+ } else {
+- printf("%s failed\n", name[1]);
+- return KSFT_FAIL;
++ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[1]));
+ }
+-
+- return KSFT_PASS;
+ }
+
+-static int vdso_test_time(void)
++static void vdso_test_time(void)
+ {
+ /* Find time. */
+ vdso_time_t vdso_time =
+ (vdso_time_t)vdso_sym(version, name[2]);
+
+ if (!vdso_time) {
+- printf("Could not find %s\n", name[2]);
+- return KSFT_SKIP;
++ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[2]));
++ return;
+ }
+
+ long ret = vdso_time(NULL);
+
+ if (ret > 0) {
+- printf("The time in hours since January 1, 1970 is %lld\n",
++ ksft_print_msg("The time in hours since January 1, 1970 is %lld\n",
+ (long long)(ret / 3600));
++ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
+ } else {
+- printf("%s failed\n", name[2]);
+- return KSFT_FAIL;
++ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[2]));
+ }
+-
+- return KSFT_PASS;
+ }
+
+-static int vdso_test_clock_getres(clockid_t clk_id)
++static void vdso_test_clock_getres(clockid_t clk_id)
+ {
++ int clock_getres_fail = 0;
++
+ /* Find clock_getres. */
+ vdso_clock_getres_t vdso_clock_getres =
+ (vdso_clock_getres_t)vdso_sym(version, name[3]);
+
+ if (!vdso_clock_getres) {
+- printf("Could not find %s\n", name[3]);
+- return KSFT_SKIP;
++ ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[3]));
++ return;
+ }
+
+ struct timespec ts, sys_ts;
+ long ret = vdso_clock_getres(clk_id, &ts);
+
+ if (ret == 0) {
+- printf("The resolution is %lld %lld\n",
+- (long long)ts.tv_sec, (long long)ts.tv_nsec);
++ ksft_print_msg("The vdso resolution is %lld %lld\n",
++ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ } else {
+- printf("%s failed\n", name[3]);
+- return KSFT_FAIL;
++ clock_getres_fail++;
+ }
+
+ ret = syscall(SYS_clock_getres, clk_id, &sys_ts);
+
+- if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) {
+- printf("%s failed\n", name[3]);
+- return KSFT_FAIL;
+- }
++ ksft_print_msg("The syscall resolution is %lld %lld\n",
++ (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec);
+
+- return KSFT_PASS;
++ if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec))
++ clock_getres_fail++;
++
++ if (clock_getres_fail > 0) {
++ ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[3]));
++ } else {
++ ksft_test_result_pass(VDSO_TEST_PASS_MSG());
++ }
+ }
+
+ const char *vdso_clock_name[12] = {
+@@ -158,36 +162,23 @@ const char *vdso_clock_name[12] = {
+ * This function calls vdso_test_clock_gettime and vdso_test_clock_getres
+ * with different values for clock_id.
+ */
+-static inline int vdso_test_clock(clockid_t clock_id)
++static inline void vdso_test_clock(clockid_t clock_id)
+ {
+- int ret0, ret1;
+-
+- ret0 = vdso_test_clock_gettime(clock_id);
+- /* A skipped test is considered passed */
+- if (ret0 == KSFT_SKIP)
+- ret0 = KSFT_PASS;
+-
+- ret1 = vdso_test_clock_getres(clock_id);
+- /* A skipped test is considered passed */
+- if (ret1 == KSFT_SKIP)
+- ret1 = KSFT_PASS;
++ ksft_print_msg("\nclock_id: %s\n", vdso_clock_name[clock_id]);
+
+- ret0 += ret1;
++ vdso_test_clock_gettime(clock_id);
+
+- printf("clock_id: %s", vdso_clock_name[clock_id]);
+-
+- if (ret0 > 0)
+- printf(" [FAIL]\n");
+- else
+- printf(" [PASS]\n");
+-
+- return ret0;
++ vdso_test_clock_getres(clock_id);
+ }
+
++#define VDSO_TEST_PLAN 16
++
+ int main(int argc, char **argv)
+ {
+ unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+- int ret;
++
++ ksft_print_header();
++ ksft_set_plan(VDSO_TEST_PLAN);
+
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+@@ -201,44 +192,42 @@ int main(int argc, char **argv)
+
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+- ret = vdso_test_gettimeofday();
++ vdso_test_gettimeofday();
+
+ #if _POSIX_TIMERS > 0
+
+ #ifdef CLOCK_REALTIME
+- ret += vdso_test_clock(CLOCK_REALTIME);
++ vdso_test_clock(CLOCK_REALTIME);
+ #endif
+
+ #ifdef CLOCK_BOOTTIME
+- ret += vdso_test_clock(CLOCK_BOOTTIME);
++ vdso_test_clock(CLOCK_BOOTTIME);
+ #endif
+
+ #ifdef CLOCK_TAI
+- ret += vdso_test_clock(CLOCK_TAI);
++ vdso_test_clock(CLOCK_TAI);
+ #endif
+
+ #ifdef CLOCK_REALTIME_COARSE
+- ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
++ vdso_test_clock(CLOCK_REALTIME_COARSE);
+ #endif
+
+ #ifdef CLOCK_MONOTONIC
+- ret += vdso_test_clock(CLOCK_MONOTONIC);
++ vdso_test_clock(CLOCK_MONOTONIC);
+ #endif
+
+ #ifdef CLOCK_MONOTONIC_RAW
+- ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
++ vdso_test_clock(CLOCK_MONOTONIC_RAW);
+ #endif
+
+ #ifdef CLOCK_MONOTONIC_COARSE
+- ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
++ vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+ #endif
+
+ #endif
+
+- ret += vdso_test_time();
+-
+- if (ret > 0)
+- return KSFT_FAIL;
++ vdso_test_time();
+
+- return KSFT_PASS;
++ ksft_print_cnts();
++ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+ }
+diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
+index 15dcee16ff726..38d46a8bf7cba 100644
+--- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
++++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
+@@ -84,12 +84,12 @@ static inline int vdso_test_clock(unsigned int clock_id)
+
+ int main(int argc, char **argv)
+ {
+- int ret;
++ int ret = 0;
+
+ #if _POSIX_TIMERS > 0
+
+ #ifdef CLOCK_REALTIME
+- ret = vdso_test_clock(CLOCK_REALTIME);
++ ret += vdso_test_clock(CLOCK_REALTIME);
+ #endif
+
+ #ifdef CLOCK_BOOTTIME
+diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
+index d9605bd10f2de..d8ae7cc012749 100644
+--- a/tools/testing/selftests/vm/Makefile
++++ b/tools/testing/selftests/vm/Makefile
+@@ -1,6 +1,8 @@
+ # SPDX-License-Identifier: GPL-2.0
+ # Makefile for vm selftests
+
++LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
++
+ include local_config.mk
+
+ uname_M := $(shell uname -m 2>/dev/null || echo not)
+@@ -48,13 +50,13 @@ TEST_GEN_FILES += split_huge_page_test
+ TEST_GEN_FILES += ksm_tests
+
+ ifeq ($(MACHINE),x86_64)
+-CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
+-CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
+-CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
++CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
++CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
++CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
+
+-TARGETS := protection_keys
+-BINARIES_32 := $(TARGETS:%=%_32)
+-BINARIES_64 := $(TARGETS:%=%_64)
++VMTARGETS := protection_keys
++BINARIES_32 := $(VMTARGETS:%=%_32)
++BINARIES_64 := $(VMTARGETS:%=%_64)
+
+ ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+ CFLAGS += -no-pie
+@@ -107,7 +109,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
+ $(BINARIES_32): LDLIBS += -lrt -ldl -lm
+ $(BINARIES_32): $(OUTPUT)/%_32: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
++$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
+ endif
+
+ ifeq ($(CAN_BUILD_X86_64),1)
+@@ -115,7 +117,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
+ $(BINARIES_64): LDLIBS += -lrt -ldl
+ $(BINARIES_64): $(OUTPUT)/%_64: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
++$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
+ endif
+
+ # x86_64 users should be encouraged to install 32-bit libraries
+@@ -139,10 +141,6 @@ endif
+
+ $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
+
+-$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
+-
+-$(OUTPUT)/hmm-tests: local_config.h
+-
+ # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
+ $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
+
+diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+index fe8fcfb334e06..a5cb4b09a46c4 100644
+--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
++++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+@@ -24,19 +24,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then
+ reservation_usage_file=rsvd.current
+ fi
+
+-cgroup_path=/dev/cgroup/memory
+-if [[ ! -e $cgroup_path ]]; then
+- mkdir -p $cgroup_path
+- if [[ $cgroup2 ]]; then
++if [[ $cgroup2 ]]; then
++ cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
++ if [[ -z "$cgroup_path" ]]; then
++ cgroup_path=/dev/cgroup/memory
+ mount -t cgroup2 none $cgroup_path
+- else
++ do_umount=1
++ fi
++ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
++else
++ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
++ if [[ -z "$cgroup_path" ]]; then
++ cgroup_path=/dev/cgroup/memory
+ mount -t cgroup memory,hugetlb $cgroup_path
++ do_umount=1
+ fi
+ fi
+-
+-if [[ $cgroup2 ]]; then
+- echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
+-fi
++export cgroup_path
+
+ function cleanup() {
+ if [[ $cgroup2 ]]; then
+@@ -108,7 +112,7 @@ function setup_cgroup() {
+
+ function wait_for_hugetlb_memory_to_get_depleted() {
+ local cgroup="$1"
+- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
++ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ # Wait for hugetlbfs memory to get depleted.
+ while [ $(cat $path) != 0 ]; do
+ echo Waiting for hugetlb memory to get depleted.
+@@ -121,7 +125,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
+ local cgroup="$1"
+ local size="$2"
+
+- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
++ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ # Wait for hugetlbfs memory to get written.
+ while [ $(cat $path) != $size ]; do
+ echo Waiting for hugetlb memory reservation to reach size $size.
+@@ -134,7 +138,7 @@ function wait_for_hugetlb_memory_to_get_written() {
+ local cgroup="$1"
+ local size="$2"
+
+- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
++ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ # Wait for hugetlbfs memory to get written.
+ while [ $(cat $path) != $size ]; do
+ echo Waiting for hugetlb memory to reach size $size.
+@@ -574,5 +578,7 @@ for populate in "" "-o"; do
+ done # populate
+ done # method
+
+-umount $cgroup_path
+-rmdir $cgroup_path
++if [[ $do_umount ]]; then
++ umount $cgroup_path
++ rmdir $cgroup_path
++fi
+diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
+index 864f126ffd78f..203323967b507 100644
+--- a/tools/testing/selftests/vm/hmm-tests.c
++++ b/tools/testing/selftests/vm/hmm-tests.c
+@@ -1248,6 +1248,48 @@ TEST_F(hmm, anon_teardown)
+ }
+ }
+
++/*
++ * Test memory snapshot without faulting in pages accessed by the device.
++ */
++TEST_F(hmm, mixedmap)
++{
++ struct hmm_buffer *buffer;
++ unsigned long npages;
++ unsigned long size;
++ unsigned char *m;
++ int ret;
++
++ npages = 1;
++ size = npages << self->page_shift;
++
++ buffer = malloc(sizeof(*buffer));
++ ASSERT_NE(buffer, NULL);
++
++ buffer->fd = -1;
++ buffer->size = size;
++ buffer->mirror = malloc(npages);
++ ASSERT_NE(buffer->mirror, NULL);
++
++
++ /* Reserve a range of addresses. */
++ buffer->ptr = mmap(NULL, size,
++ PROT_READ | PROT_WRITE,
++ MAP_PRIVATE,
++ self->fd, 0);
++ ASSERT_NE(buffer->ptr, MAP_FAILED);
++
++ /* Simulate a device snapshotting CPU pagetables. */
++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
++ ASSERT_EQ(ret, 0);
++ ASSERT_EQ(buffer->cpages, npages);
++
++ /* Check what the device saw. */
++ m = buffer->mirror;
++ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
++
++ hmm_buffer_free(buffer);
++}
++
+ /*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+index 4a9a3afe9fd4d..bf2d2a684edfd 100644
+--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
++++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+@@ -18,19 +18,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then
+ usage_file=current
+ fi
+
+-CGROUP_ROOT='/dev/cgroup/memory'
+-MNT='/mnt/huge/'
+
+-if [[ ! -e $CGROUP_ROOT ]]; then
+- mkdir -p $CGROUP_ROOT
+- if [[ $cgroup2 ]]; then
++if [[ $cgroup2 ]]; then
++ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
++ if [[ -z "$CGROUP_ROOT" ]]; then
++ CGROUP_ROOT=/dev/cgroup/memory
+ mount -t cgroup2 none $CGROUP_ROOT
+- sleep 1
+- echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+- else
++ do_umount=1
++ fi
++ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
++else
++ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
++ if [[ -z "$CGROUP_ROOT" ]]; then
++ CGROUP_ROOT=/dev/cgroup/memory
+ mount -t cgroup memory,hugetlb $CGROUP_ROOT
++ do_umount=1
+ fi
+ fi
++MNT='/mnt/huge/'
+
+ function get_machine_hugepage_size() {
+ hpz=$(grep -i hugepagesize /proc/meminfo)
+diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c
+index d91bde5112686..eed44322d1a63 100644
+--- a/tools/testing/selftests/vm/map_fixed_noreplace.c
++++ b/tools/testing/selftests/vm/map_fixed_noreplace.c
+@@ -17,9 +17,6 @@
+ #define MAP_FIXED_NOREPLACE 0x100000
+ #endif
+
+-#define BASE_ADDRESS (256ul * 1024 * 1024)
+-
+-
+ static void dump_maps(void)
+ {
+ char cmd[32];
+@@ -28,18 +25,46 @@ static void dump_maps(void)
+ system(cmd);
+ }
+
++static unsigned long find_base_addr(unsigned long size)
++{
++ void *addr;
++ unsigned long flags;
++
++ flags = MAP_PRIVATE | MAP_ANONYMOUS;
++ addr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
++ if (addr == MAP_FAILED) {
++ printf("Error: couldn't map the space we need for the test\n");
++ return 0;
++ }
++
++ if (munmap(addr, size) != 0) {
++ printf("Error: couldn't map the space we need for the test\n");
++ return 0;
++ }
++ return (unsigned long)addr;
++}
++
+ int main(void)
+ {
++ unsigned long base_addr;
+ unsigned long flags, addr, size, page_size;
+ char *p;
+
+ page_size = sysconf(_SC_PAGE_SIZE);
+
++ //let's find a base addr that is free before we start the tests
++ size = 5 * page_size;
++ base_addr = find_base_addr(size);
++ if (!base_addr) {
++ printf("Error: couldn't map the space we need for the test\n");
++ return 1;
++ }
++
+ flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
+
+ // Check we can map all the areas we need below
+ errno = 0;
+- addr = BASE_ADDRESS;
++ addr = base_addr;
+ size = 5 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+
+@@ -60,7 +85,7 @@ int main(void)
+ printf("unmap() successful\n");
+
+ errno = 0;
+- addr = BASE_ADDRESS + page_size;
++ addr = base_addr + page_size;
+ size = 3 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+@@ -80,7 +105,7 @@ int main(void)
+ * +4 | free | new
+ */
+ errno = 0;
+- addr = BASE_ADDRESS;
++ addr = base_addr;
+ size = 5 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+@@ -101,7 +126,7 @@ int main(void)
+ * +4 | free |
+ */
+ errno = 0;
+- addr = BASE_ADDRESS + (2 * page_size);
++ addr = base_addr + (2 * page_size);
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+@@ -121,7 +146,7 @@ int main(void)
+ * +4 | free | new
+ */
+ errno = 0;
+- addr = BASE_ADDRESS + (3 * page_size);
++ addr = base_addr + (3 * page_size);
+ size = 2 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+@@ -141,7 +166,7 @@ int main(void)
+ * +4 | free |
+ */
+ errno = 0;
+- addr = BASE_ADDRESS;
++ addr = base_addr;
+ size = 2 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+@@ -161,7 +186,7 @@ int main(void)
+ * +4 | free |
+ */
+ errno = 0;
+- addr = BASE_ADDRESS;
++ addr = base_addr;
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+@@ -181,7 +206,7 @@ int main(void)
+ * +4 | free | new
+ */
+ errno = 0;
+- addr = BASE_ADDRESS + (4 * page_size);
++ addr = base_addr + (4 * page_size);
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+@@ -192,7 +217,7 @@ int main(void)
+ return 1;
+ }
+
+- addr = BASE_ADDRESS;
++ addr = base_addr;
+ size = 5 * page_size;
+ if (munmap((void *)addr, size) != 0) {
+ dump_maps();
+diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
+index 0624d1bd71b53..58775dab3cc6c 100644
+--- a/tools/testing/selftests/vm/mremap_test.c
++++ b/tools/testing/selftests/vm/mremap_test.c
+@@ -6,9 +6,11 @@
+
+ #include <errno.h>
+ #include <stdlib.h>
++#include <stdio.h>
+ #include <string.h>
+ #include <sys/mman.h>
+ #include <time.h>
++#include <stdbool.h>
+
+ #include "../kselftest.h"
+
+@@ -20,7 +22,6 @@
+ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */
+ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
+
+-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
+ struct config {
+@@ -64,6 +65,59 @@ enum {
+ .expect_failure = should_fail \
+ }
+
++/*
++ * Returns false if the requested remap region overlaps with an
++ * existing mapping (e.g text, stack) else returns true.
++ */
++static bool is_remap_region_valid(void *addr, unsigned long long size)
++{
++ void *remap_addr = NULL;
++ bool ret = true;
++
++ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
++ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
++ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
++ -1, 0);
++
++ if (remap_addr == MAP_FAILED) {
++ if (errno == EEXIST)
++ ret = false;
++ } else {
++ munmap(remap_addr, size);
++ }
++
++ return ret;
++}
++
++/* Returns mmap_min_addr sysctl tunable from procfs */
++static unsigned long long get_mmap_min_addr(void)
++{
++ FILE *fp;
++ int n_matched;
++ static unsigned long long addr;
++
++ if (addr)
++ return addr;
++
++ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
++ if (fp == NULL) {
++ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
++ strerror(errno));
++ exit(KSFT_SKIP);
++ }
++
++ n_matched = fscanf(fp, "%llu", &addr);
++ if (n_matched != 1) {
++ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
++ strerror(errno));
++ fclose(fp);
++ exit(KSFT_SKIP);
++ }
++
++ fclose(fp);
++ return addr;
++}
++
+ /*
+ * Returns the start address of the mapping on success, else returns
+ * NULL on failure.
+@@ -72,11 +126,18 @@ static void *get_source_mapping(struct config c)
+ {
+ unsigned long long addr = 0ULL;
+ void *src_addr = NULL;
++ unsigned long long mmap_min_addr;
++
++ mmap_min_addr = get_mmap_min_addr();
++
+ retry:
+ addr += c.src_alignment;
++ if (addr < mmap_min_addr)
++ goto retry;
++
+ src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
+- MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+- -1, 0);
++ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
++ -1, 0);
+ if (src_addr == MAP_FAILED) {
+ if (errno == EPERM || errno == EEXIST)
+ goto retry;
+@@ -91,8 +152,10 @@ retry:
+ * alignment in the tests.
+ */
+ if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
+- !((unsigned long long) src_addr & c.src_alignment))
++ !((unsigned long long) src_addr & c.src_alignment)) {
++ munmap(src_addr, c.region_size);
+ goto retry;
++ }
+
+ if (!src_addr)
+ goto error;
+@@ -141,9 +204,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
+ if (!((unsigned long long) addr & c.dest_alignment))
+ addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+
++ /* Don't destroy existing mappings unless expected to overlap */
++ while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
++ /* Check for unsigned overflow */
++ if (addr + c.dest_alignment < addr) {
++ ksft_print_msg("Couldn't find a valid region to remap to\n");
++ ret = -1;
++ goto out;
++ }
++ addr += c.dest_alignment;
++ }
++
+ clock_gettime(CLOCK_MONOTONIC, &t_start);
+ dest_addr = mremap(src_addr, c.region_size, c.region_size,
+- MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
++ MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+ clock_gettime(CLOCK_MONOTONIC, &t_end);
+
+ if (dest_addr == MAP_FAILED) {
+diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
+index 622a85848f61b..92f3be3dd8e59 100644
+--- a/tools/testing/selftests/vm/pkey-helpers.h
++++ b/tools/testing/selftests/vm/pkey-helpers.h
+@@ -13,6 +13,8 @@
+ #include <ucontext.h>
+ #include <sys/mman.h>
+
++#include "../kselftest.h"
++
+ /* Define some kernel-like types */
+ #define u8 __u8
+ #define u16 __u16
+@@ -175,7 +177,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
+ dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
+ }
+
+-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+ #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+ #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+ #define ALIGN_PTR_UP(p, ptr_align_to) \
+diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
+index e4a4ce2b826d2..ea8c8afbcdbb3 100644
+--- a/tools/testing/selftests/vm/pkey-x86.h
++++ b/tools/testing/selftests/vm/pkey-x86.h
+@@ -119,6 +119,18 @@ static inline int cpu_has_pkeys(void)
+ return 1;
+ }
+
++static inline int cpu_max_xsave_size(void)
++{
++ unsigned long XSTATE_CPUID = 0xd;
++ unsigned int eax;
++ unsigned int ebx;
++ unsigned int ecx;
++ unsigned int edx;
++
++ __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
++ return ecx;
++}
++
+ static inline u32 pkey_bit_position(int pkey)
+ {
+ return pkey * PKEY_BITS_PER_PKEY;
+diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
+index 2d0ae88665db0..2d48272b2463e 100644
+--- a/tools/testing/selftests/vm/protection_keys.c
++++ b/tools/testing/selftests/vm/protection_keys.c
+@@ -18,12 +18,13 @@
+ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ *
+ * Compile like this:
+- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
++ * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
++ * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ */
+ #define _GNU_SOURCE
+ #define __SANE_USERSPACE_TYPES__
+ #include <errno.h>
++#include <linux/elf.h>
+ #include <linux/futex.h>
+ #include <time.h>
+ #include <sys/time.h>
+@@ -1550,6 +1551,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+ do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
+ }
+
++#if defined(__i386__) || defined(__x86_64__)
++void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
++{
++ u32 new_pkru;
++ pid_t child;
++ int status, ret;
++ int pkey_offset = pkey_reg_xstate_offset();
++ size_t xsave_size = cpu_max_xsave_size();
++ void *xsave;
++ u32 *pkey_register;
++ u64 *xstate_bv;
++ struct iovec iov;
++
++ new_pkru = ~read_pkey_reg();
++ /* Don't make PROT_EXEC mappings inaccessible */
++ new_pkru &= ~3;
++
++ child = fork();
++ pkey_assert(child >= 0);
++ dprintf3("[%d] fork() ret: %d\n", getpid(), child);
++ if (!child) {
++ ptrace(PTRACE_TRACEME, 0, 0, 0);
++ /* Stop and allow the tracer to modify PKRU directly */
++ raise(SIGSTOP);
++
++ /*
++ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
++ * checking
++ */
++ if (__read_pkey_reg() != new_pkru)
++ exit(1);
++
++ /* Stop and allow the tracer to clear XSTATE_BV for PKRU */
++ raise(SIGSTOP);
++
++ if (__read_pkey_reg() != 0)
++ exit(1);
++
++ /* Stop and allow the tracer to examine PKRU */
++ raise(SIGSTOP);
++
++ exit(0);
++ }
++
++ pkey_assert(child == waitpid(child, &status, 0));
++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
++ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
++
++ xsave = (void *)malloc(xsave_size);
++ pkey_assert(xsave > 0);
++
++ /* Modify the PKRU register directly */
++ iov.iov_base = xsave;
++ iov.iov_len = xsave_size;
++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
++ pkey_assert(ret == 0);
++
++ pkey_register = (u32 *)(xsave + pkey_offset);
++ pkey_assert(*pkey_register == read_pkey_reg());
++
++ *pkey_register = new_pkru;
++
++ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
++ pkey_assert(ret == 0);
++
++ /* Test that the modification is visible in ptrace before any execution */
++ memset(xsave, 0xCC, xsave_size);
++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
++ pkey_assert(ret == 0);
++ pkey_assert(*pkey_register == new_pkru);
++
++ /* Execute the tracee */
++ ret = ptrace(PTRACE_CONT, child, 0, 0);
++ pkey_assert(ret == 0);
++
++ /* Test that the tracee saw the PKRU value change */
++ pkey_assert(child == waitpid(child, &status, 0));
++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
++ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
++
++ /* Test that the modification is visible in ptrace after execution */
++ memset(xsave, 0xCC, xsave_size);
++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
++ pkey_assert(ret == 0);
++ pkey_assert(*pkey_register == new_pkru);
++
++ /* Clear the PKRU bit from XSTATE_BV */
++ xstate_bv = (u64 *)(xsave + 512);
++ *xstate_bv &= ~(1 << 9);
++
++ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
++ pkey_assert(ret == 0);
++
++ /* Test that the modification is visible in ptrace before any execution */
++ memset(xsave, 0xCC, xsave_size);
++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
++ pkey_assert(ret == 0);
++ pkey_assert(*pkey_register == 0);
++
++ ret = ptrace(PTRACE_CONT, child, 0, 0);
++ pkey_assert(ret == 0);
++
++ /* Test that the tracee saw the PKRU value go to 0 */
++ pkey_assert(child == waitpid(child, &status, 0));
++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
++ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
++
++ /* Test that the modification is visible in ptrace after execution */
++ memset(xsave, 0xCC, xsave_size);
++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
++ pkey_assert(ret == 0);
++ pkey_assert(*pkey_register == 0);
++
++ ret = ptrace(PTRACE_CONT, child, 0, 0);
++ pkey_assert(ret == 0);
++ pkey_assert(child == waitpid(child, &status, 0));
++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
++ pkey_assert(WIFEXITED(status));
++ pkey_assert(WEXITSTATUS(status) == 0);
++ free(xsave);
++}
++#endif
++
+ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+ {
+ int size = PAGE_SIZE;
+@@ -1585,6 +1709,9 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
+ test_pkey_syscalls_bad_args,
+ test_pkey_alloc_exhaust,
+ test_pkey_alloc_free_attach_pkey0,
++#if defined(__i386__) || defined(__x86_64__)
++ test_ptrace_modifies_pkru,
++#endif
+ };
+
+ void run_tests_once(void)
+diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
+index 60aa1a4fc69b6..138b011c667e2 100644
+--- a/tools/testing/selftests/vm/userfaultfd.c
++++ b/tools/testing/selftests/vm/userfaultfd.c
+@@ -46,6 +46,7 @@
+ #include <signal.h>
+ #include <poll.h>
+ #include <string.h>
++#include <linux/mman.h>
+ #include <sys/mman.h>
+ #include <sys/syscall.h>
+ #include <sys/ioctl.h>
+@@ -86,7 +87,7 @@ static bool test_uffdio_minor = false;
+
+ static bool map_shared;
+ static int shm_fd;
+-static int huge_fd;
++static int huge_fd = -1; /* only used for hugetlb_shared test */
+ static char *huge_fd_off0;
+ static unsigned long long *count_verify;
+ static int uffd = -1;
+@@ -222,6 +223,9 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+
+ static void hugetlb_release_pages(char *rel_area)
+ {
++ if (huge_fd == -1)
++ return;
++
+ if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
+ nr_pages * page_size))
+@@ -234,16 +238,17 @@ static void hugetlb_allocate_area(void **alloc_area)
+ char **alloc_area_alias;
+
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+- (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+- MAP_HUGETLB,
+- huge_fd, *alloc_area == area_src ? 0 :
+- nr_pages * page_size);
++ map_shared ? MAP_SHARED :
++ MAP_PRIVATE | MAP_HUGETLB |
++ (*alloc_area == area_src ? 0 : MAP_NORESERVE),
++ huge_fd,
++ *alloc_area == area_src ? 0 : nr_pages * page_size);
+ if (*alloc_area == MAP_FAILED)
+ err("mmap of hugetlbfs file failed");
+
+ if (map_shared) {
+ area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+- MAP_SHARED | MAP_HUGETLB,
++ MAP_SHARED,
+ huge_fd, *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
+ if (area_alias == MAP_FAILED)
+diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
+index 83acdff26a135..da6ec3b53ea8d 100644
+--- a/tools/testing/selftests/vm/va_128TBswitch.c
++++ b/tools/testing/selftests/vm/va_128TBswitch.c
+@@ -9,7 +9,7 @@
+ #include <sys/mman.h>
+ #include <string.h>
+
+-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
++#include "../kselftest.h"
+
+ #ifdef __powerpc64__
+ #define PAGE_SIZE (64 << 10)
+diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+index d3d0d108924d4..70a02301f4c27 100644
+--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
++++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
+@@ -14,7 +14,7 @@ want_sleep=$8
+ reserve=$9
+
+ echo "Putting task in cgroup '$cgroup'"
+-echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
++echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs
+
+ echo "Method is $method"
+
+diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
+index ebc4ee0fe179f..93e44410f170e 100755
+--- a/tools/testing/selftests/wireguard/netns.sh
++++ b/tools/testing/selftests/wireguard/netns.sh
+@@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2
+ n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
+ ip2 link del wg0
+ ip2 link del wg1
+-! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
++read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
++! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
++sleep 1
++read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
++(( tx_bytes_after - tx_bytes_before < 70000 ))
+
+ ip0 link del wg1
+ ip1 link del wg0
+@@ -498,10 +502,32 @@ n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
+ n1 ping -W 1 -c 1 192.168.241.2
+ [[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
+
+-ip1 link del veth1
+-ip1 link del veth3
+-ip1 link del wg0
+-ip2 link del wg0
++ip1 link del dev veth3
++ip1 link del dev wg0
++ip2 link del dev wg0
++
++# Make sure persistent keep alives are sent when an adapter comes up
++ip1 link add dev wg0 type wireguard
++n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1
++read _ _ tx_bytes < <(n1 wg show wg0 transfer)
++[[ $tx_bytes -eq 0 ]]
++ip1 link set dev wg0 up
++read _ _ tx_bytes < <(n1 wg show wg0 transfer)
++[[ $tx_bytes -gt 0 ]]
++ip1 link del dev wg0
++# This should also happen even if the private key is set later
++ip1 link add dev wg0 type wireguard
++n1 wg set wg0 peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1
++read _ _ tx_bytes < <(n1 wg show wg0 transfer)
++[[ $tx_bytes -eq 0 ]]
++ip1 link set dev wg0 up
++read _ _ tx_bytes < <(n1 wg show wg0 transfer)
++[[ $tx_bytes -eq 0 ]]
++n1 wg set wg0 private-key <(echo "$key1")
++read _ _ tx_bytes < <(n1 wg show wg0 transfer)
++[[ $tx_bytes -gt 0 ]]
++ip1 link del dev veth1
++ip1 link del dev wg0
+
+ # We test that Netlink/IPC is working properly by doing things that usually cause split responses
+ ip0 link add dev wg0 type wireguard
+@@ -609,6 +635,28 @@ ip0 link set wg0 up
+ kill $ncat_pid
+ ip0 link del wg0
+
++# Ensure that dst_cache references don't outlive netns lifetime
++ip1 link add dev wg0 type wireguard
++ip2 link add dev wg0 type wireguard
++configure_peers
++ip1 link add veth1 type veth peer name veth2
++ip1 link set veth2 netns $netns2
++ip1 addr add fd00:aa::1/64 dev veth1
++ip2 addr add fd00:aa::2/64 dev veth2
++ip1 link set veth1 up
++ip2 link set veth2 up
++waitiface $netns1 veth1
++waitiface $netns2 veth2
++ip1 -6 route add default dev veth1 via fd00:aa::2
++ip2 -6 route add default dev veth2 via fd00:aa::1
++n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
++n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
++n1 ping6 -c 1 fd00::2
++pp ip netns delete $netns1
++pp ip netns delete $netns2
++pp ip netns add $netns1
++pp ip netns add $netns2
++
+ # Ensure there aren't circular reference loops
+ ip1 link add wg1 type wireguard
+ ip2 link add wg2 type wireguard
+@@ -627,7 +675,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
+ done < /dev/kmsg
+ alldeleted=1
+ for object in "${!objects[@]}"; do
+- if [[ ${objects["$object"]} != *createddestroyed ]]; then
++ if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then
+ echo "Error: $object: merely ${objects["$object"]}" >&3
+ alldeleted=0
+ fi
+diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
+index fe07d97df9fa8..2b321b8a96cf3 100644
+--- a/tools/testing/selftests/wireguard/qemu/debug.config
++++ b/tools/testing/selftests/wireguard/qemu/debug.config
+@@ -47,7 +47,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
+ CONFIG_TRACE_IRQFLAGS=y
+ CONFIG_DEBUG_BUGVERBOSE=y
+ CONFIG_DEBUG_LIST=y
+-CONFIG_DEBUG_PI_LIST=y
++CONFIG_DEBUG_PLIST=y
+ CONFIG_PROVE_RCU=y
+ CONFIG_SPARSE_RCU_POINTER=y
+ CONFIG_RCU_CPU_STALL_TIMEOUT=21
+diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
+index 74db83a0aedd8..a9b5a520a1d22 100644
+--- a/tools/testing/selftests/wireguard/qemu/kernel.config
++++ b/tools/testing/selftests/wireguard/qemu/kernel.config
+@@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y
+ CONFIG_SYSFS=y
+ CONFIG_TMPFS=y
+ CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
++CONFIG_LOG_BUF_SHIFT=18
+ CONFIG_PRINTK_TIME=y
+ CONFIG_BLK_DEV_INITRD=y
+ CONFIG_LEGACY_VSYSCALL_NONE=y
+diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
+index b4142cd1c5c23..02a77056bca3f 100644
+--- a/tools/testing/selftests/x86/Makefile
++++ b/tools/testing/selftests/x86/Makefile
+@@ -6,9 +6,9 @@ include ../lib.mk
+ .PHONY: all all_32 all_64 warn_32bit_failure clean
+
+ UNAME_M := $(shell uname -m)
+-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+-CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
++CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32)
++CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c)
++CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
+
+ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+ check_initial_reg_state sigreturn iopl ioperm \
+diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
+index 3e2089c8cf549..8c669c0d662ee 100755
+--- a/tools/testing/selftests/x86/check_cc.sh
++++ b/tools/testing/selftests/x86/check_cc.sh
+@@ -7,7 +7,7 @@ CC="$1"
+ TESTPROG="$2"
+ shift 2
+
+-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
++if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+ echo 1
+ else
+ echo 0
+diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
+index bab2f6e06b63d..7e3e09c1abac6 100644
+--- a/tools/testing/selftests/x86/iopl.c
++++ b/tools/testing/selftests/x86/iopl.c
+@@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port)
+ printf("[OK]\toutb to 0x%02hx failed\n", port);
+ }
+
+-static bool try_cli(void)
++#define RET_FAULTED 0
++#define RET_FAIL 1
++#define RET_EMUL 2
++
++static int try_cli(void)
+ {
++ unsigned long flags;
++
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+- return false;
++ return RET_FAULTED;
+ } else {
+- asm volatile ("cli");
+- return true;
++ asm volatile("cli; pushf; pop %[flags]"
++ : [flags] "=rm" (flags));
++
++ /* X86_FLAGS_IF */
++ if (!(flags & (1 << 9)))
++ return RET_FAIL;
++ else
++ return RET_EMUL;
+ }
+ clearhandler(SIGSEGV);
+ }
+
+-static bool try_sti(void)
++static int try_sti(bool irqs_off)
+ {
++ unsigned long flags;
++
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+- return false;
++ return RET_FAULTED;
+ } else {
+- asm volatile ("sti");
+- return true;
++ asm volatile("sti; pushf; pop %[flags]"
++ : [flags] "=rm" (flags));
++
++ /* X86_FLAGS_IF */
++ if (irqs_off && (flags & (1 << 9)))
++ return RET_FAIL;
++ else
++ return RET_EMUL;
+ }
+ clearhandler(SIGSEGV);
+ }
+
+-static void expect_gp_sti(void)
++static void expect_gp_sti(bool irqs_off)
+ {
+- if (try_sti()) {
++ int ret = try_sti(irqs_off);
++
++ switch (ret) {
++ case RET_FAULTED:
++ printf("[OK]\tSTI faulted\n");
++ break;
++ case RET_EMUL:
++ printf("[OK]\tSTI NOPped\n");
++ break;
++ default:
+ printf("[FAIL]\tSTI worked\n");
+ nerrs++;
+- } else {
+- printf("[OK]\tSTI faulted\n");
+ }
+ }
+
+-static void expect_gp_cli(void)
++/*
++ * Returns whether it managed to disable interrupts.
++ */
++static bool test_cli(void)
+ {
+- if (try_cli()) {
++ int ret = try_cli();
++
++ switch (ret) {
++ case RET_FAULTED:
++ printf("[OK]\tCLI faulted\n");
++ break;
++ case RET_EMUL:
++ printf("[OK]\tCLI NOPped\n");
++ break;
++ default:
+ printf("[FAIL]\tCLI worked\n");
+ nerrs++;
+- } else {
+- printf("[OK]\tCLI faulted\n");
++ return true;
+ }
++
++ return false;
+ }
+
+ int main(void)
+@@ -152,8 +192,7 @@ int main(void)
+ }
+
+ /* Make sure that CLI/STI are blocked even with IOPL level 3 */
+- expect_gp_cli();
+- expect_gp_sti();
++ expect_gp_sti(test_cli());
+ expect_ok_outb(0x80);
+
+ /* Establish an I/O bitmap to test the restore */
+@@ -204,8 +243,7 @@ int main(void)
+ printf("[RUN]\tparent: write to 0x80 (should fail)\n");
+
+ expect_gp_outb(0x80);
+- expect_gp_cli();
+- expect_gp_sti();
++ expect_gp_sti(test_cli());
+
+ /* Test the capability checks. */
+ printf("\tiopl(3)\n");
+diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
+index 65c141ebfbbde..5b45e6986aeab 100644
+--- a/tools/testing/selftests/x86/test_vsyscall.c
++++ b/tools/testing/selftests/x86/test_vsyscall.c
+@@ -497,7 +497,7 @@ static int test_process_vm_readv(void)
+ }
+
+ if (vsyscall_map_r) {
+- if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
++ if (!memcmp(buf, remote.iov_base, sizeof(buf))) {
+ printf("[OK]\tIt worked and read correct data\n");
+ } else {
+ printf("[FAIL]\tIt worked but returned incorrect data\n");
+diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
+index 232e958ec4547..b0b91d9b0dc21 100755
+--- a/tools/testing/selftests/zram/zram.sh
++++ b/tools/testing/selftests/zram/zram.sh
+@@ -2,9 +2,6 @@
+ # SPDX-License-Identifier: GPL-2.0
+ TCID="zram.sh"
+
+-# Kselftest framework requirement - SKIP code is 4.
+-ksft_skip=4
+-
+ . ./zram_lib.sh
+
+ run_zram () {
+@@ -18,14 +15,4 @@ echo ""
+
+ check_prereqs
+
+-# check zram module exists
+-MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
+-if [ -f $MODULE_PATH ]; then
+- run_zram
+-elif [ -b /dev/zram0 ]; then
+- run_zram
+-else
+- echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
+- echo "$TCID : CONFIG_ZRAM is not set"
+- exit $ksft_skip
+-fi
++run_zram
+diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
+index 114863d9fb876..8f4affe34f3e4 100755
+--- a/tools/testing/selftests/zram/zram01.sh
++++ b/tools/testing/selftests/zram/zram01.sh
+@@ -33,9 +33,7 @@ zram_algs="lzo"
+
+ zram_fill_fs()
+ {
+- local mem_free0=$(free -m | awk 'NR==2 {print $4}')
+-
+- for i in $(seq 0 $(($dev_num - 1))); do
++ for i in $(seq $dev_start $dev_end); do
+ echo "fill zram$i..."
+ local b=0
+ while [ true ]; do
+@@ -45,29 +43,17 @@ zram_fill_fs()
+ b=$(($b + 1))
+ done
+ echo "zram$i can be filled with '$b' KB"
+- done
+
+- local mem_free1=$(free -m | awk 'NR==2 {print $4}')
+- local used_mem=$(($mem_free0 - $mem_free1))
++ local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"`
++ local v=$((100 * 1024 * $b / $mem_used_total))
++ if [ "$v" -lt 100 ]; then
++ echo "FAIL compression ratio: 0.$v:1"
++ ERR_CODE=-1
++ return
++ fi
+
+- local total_size=0
+- for sm in $zram_sizes; do
+- local s=$(echo $sm | sed 's/M//')
+- total_size=$(($total_size + $s))
++ echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
+ done
+-
+- echo "zram used ${used_mem}M, zram disk sizes ${total_size}M"
+-
+- local v=$((100 * $total_size / $used_mem))
+-
+- if [ "$v" -lt 100 ]; then
+- echo "FAIL compression ratio: 0.$v:1"
+- ERR_CODE=-1
+- zram_cleanup
+- return
+- fi
+-
+- echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
+ }
+
+ check_prereqs
+@@ -81,7 +67,6 @@ zram_mount
+
+ zram_fill_fs
+ zram_cleanup
+-zram_unload
+
+ if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
+index e83b404807c09..2418b0c4ed136 100755
+--- a/tools/testing/selftests/zram/zram02.sh
++++ b/tools/testing/selftests/zram/zram02.sh
+@@ -36,7 +36,6 @@ zram_set_memlimit
+ zram_makeswap
+ zram_swapoff
+ zram_cleanup
+-zram_unload
+
+ if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
+index 6f872f266fd11..21ec1966de76c 100755
+--- a/tools/testing/selftests/zram/zram_lib.sh
++++ b/tools/testing/selftests/zram/zram_lib.sh
+@@ -5,12 +5,17 @@
+ # Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+ # Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+-MODULE=0
+ dev_makeswap=-1
+ dev_mounted=-1
+-
++dev_start=0
++dev_end=-1
++module_load=-1
++sys_control=-1
+ # Kselftest framework requirement - SKIP code is 4.
+ ksft_skip=4
++kernel_version=`uname -r | cut -d'.' -f1,2`
++kernel_major=${kernel_version%.*}
++kernel_minor=${kernel_version#*.}
+
+ trap INT
+
+@@ -25,68 +30,104 @@ check_prereqs()
+ fi
+ }
+
++kernel_gte()
++{
++ major=${1%.*}
++ minor=${1#*.}
++
++ if [ $kernel_major -gt $major ]; then
++ return 0
++ elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then
++ return 0
++ fi
++
++ return 1
++}
++
+ zram_cleanup()
+ {
+ echo "zram cleanup"
+ local i=
+- for i in $(seq 0 $dev_makeswap); do
++ for i in $(seq $dev_start $dev_makeswap); do
+ swapoff /dev/zram$i
+ done
+
+- for i in $(seq 0 $dev_mounted); do
++ for i in $(seq $dev_start $dev_mounted); do
+ umount /dev/zram$i
+ done
+
+- for i in $(seq 0 $(($dev_num - 1))); do
++ for i in $(seq $dev_start $dev_end); do
+ echo 1 > /sys/block/zram${i}/reset
+ rm -rf zram$i
+ done
+
+-}
++ if [ $sys_control -eq 1 ]; then
++ for i in $(seq $dev_start $dev_end); do
++ echo $i > /sys/class/zram-control/hot_remove
++ done
++ fi
+
+-zram_unload()
+-{
+- if [ $MODULE -ne 0 ] ; then
+- echo "zram rmmod zram"
++ if [ $module_load -eq 1 ]; then
+ rmmod zram > /dev/null 2>&1
+ fi
+ }
+
+ zram_load()
+ {
+- # check zram module exists
+- MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
+- if [ -f $MODULE_PATH ]; then
+- MODULE=1
+- echo "create '$dev_num' zram device(s)"
+- modprobe zram num_devices=$dev_num
+- if [ $? -ne 0 ]; then
+- echo "failed to insert zram module"
+- exit 1
+- fi
+-
+- dev_num_created=$(ls /dev/zram* | wc -w)
++ echo "create '$dev_num' zram device(s)"
++
++ # zram module loaded, new kernel
++ if [ -d "/sys/class/zram-control" ]; then
++ echo "zram modules already loaded, kernel supports" \
++ "zram-control interface"
++ dev_start=$(ls /dev/zram* | wc -w)
++ dev_end=$(($dev_start + $dev_num - 1))
++ sys_control=1
++
++ for i in $(seq $dev_start $dev_end); do
++ cat /sys/class/zram-control/hot_add > /dev/null
++ done
++
++ echo "all zram devices (/dev/zram$dev_start~$dev_end" \
++ "successfully created"
++ return 0
++ fi
+
+- if [ "$dev_num_created" -ne "$dev_num" ]; then
+- echo "unexpected num of devices: $dev_num_created"
+- ERR_CODE=-1
++ # detect old kernel or built-in
++ modprobe zram num_devices=$dev_num
++ if [ ! -d "/sys/class/zram-control" ]; then
++ if grep -q '^zram' /proc/modules; then
++ rmmod zram > /dev/null 2>&1
++ if [ $? -ne 0 ]; then
++ echo "zram module is being used on old kernel" \
++ "without zram-control interface"
++ exit $ksft_skip
++ fi
+ else
+- echo "zram load module successful"
++ echo "test needs CONFIG_ZRAM=m on old kernel without" \
++ "zram-control interface"
++ exit $ksft_skip
+ fi
+- elif [ -b /dev/zram0 ]; then
+- echo "/dev/zram0 device file found: OK"
+- else
+- echo "ERROR: No zram.ko module or no /dev/zram0 device found"
+- echo "$TCID : CONFIG_ZRAM is not set"
+- exit 1
++ modprobe zram num_devices=$dev_num
+ fi
++
++ module_load=1
++ dev_end=$(($dev_num - 1))
++ echo "all zram devices (/dev/zram0~$dev_end) successfully created"
+ }
+
+ zram_max_streams()
+ {
+ echo "set max_comp_streams to zram device(s)"
+
+- local i=0
++ kernel_gte 4.7
++ if [ $? -eq 0 ]; then
++ echo "The device attribute max_comp_streams was"\
++ "deprecated in 4.7"
++ return 0
++ fi
++
++ local i=$dev_start
+ for max_s in $zram_max_streams; do
+ local sys_path="/sys/block/zram${i}/max_comp_streams"
+ echo $max_s > $sys_path || \
+@@ -98,7 +139,7 @@ zram_max_streams()
+ echo "FAIL can't set max_streams '$max_s', get $max_stream"
+
+ i=$(($i + 1))
+- echo "$sys_path = '$max_streams' ($i/$dev_num)"
++ echo "$sys_path = '$max_streams'"
+ done
+
+ echo "zram max streams: OK"
+@@ -108,15 +149,16 @@ zram_compress_alg()
+ {
+ echo "test that we can set compression algorithm"
+
+- local algs=$(cat /sys/block/zram0/comp_algorithm)
++ local i=$dev_start
++ local algs=$(cat /sys/block/zram${i}/comp_algorithm)
+ echo "supported algs: $algs"
+- local i=0
++
+ for alg in $zram_algs; do
+ local sys_path="/sys/block/zram${i}/comp_algorithm"
+ echo "$alg" > $sys_path || \
+ echo "FAIL can't set '$alg' to $sys_path"
+ i=$(($i + 1))
+- echo "$sys_path = '$alg' ($i/$dev_num)"
++ echo "$sys_path = '$alg'"
+ done
+
+ echo "zram set compression algorithm: OK"
+@@ -125,14 +167,14 @@ zram_compress_alg()
+ zram_set_disksizes()
+ {
+ echo "set disk size to zram device(s)"
+- local i=0
++ local i=$dev_start
+ for ds in $zram_sizes; do
+ local sys_path="/sys/block/zram${i}/disksize"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+- echo "$sys_path = '$ds' ($i/$dev_num)"
++ echo "$sys_path = '$ds'"
+ done
+
+ echo "zram set disksizes: OK"
+@@ -142,14 +184,14 @@ zram_set_memlimit()
+ {
+ echo "set memory limit to zram device(s)"
+
+- local i=0
++ local i=$dev_start
+ for ds in $zram_mem_limits; do
+ local sys_path="/sys/block/zram${i}/mem_limit"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+- echo "$sys_path = '$ds' ($i/$dev_num)"
++ echo "$sys_path = '$ds'"
+ done
+
+ echo "zram set memory limit: OK"
+@@ -158,8 +200,8 @@ zram_set_memlimit()
+ zram_makeswap()
+ {
+ echo "make swap with zram device(s)"
+- local i=0
+- for i in $(seq 0 $(($dev_num - 1))); do
++ local i=$dev_start
++ for i in $(seq $dev_start $dev_end); do
+ mkswap /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+@@ -182,7 +224,7 @@ zram_makeswap()
+ zram_swapoff()
+ {
+ local i=
+- for i in $(seq 0 $dev_makeswap); do
++ for i in $(seq $dev_start $dev_end); do
+ swapoff /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+@@ -196,7 +238,7 @@ zram_swapoff()
+
+ zram_makefs()
+ {
+- local i=0
++ local i=$dev_start
+ for fs in $zram_filesystems; do
+ # if requested fs not supported default it to ext2
+ which mkfs.$fs > /dev/null 2>&1 || fs=ext2
+@@ -215,7 +257,7 @@ zram_makefs()
+ zram_mount()
+ {
+ local i=0
+- for i in $(seq 0 $(($dev_num - 1))); do
++ for i in $(seq $dev_start $dev_end); do
+ echo "mount /dev/zram$i"
+ mkdir zram$i
+ mount /dev/zram$i zram$i > /dev/null || \
+diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c
+index b00b1bfd9d8e7..cb1108bc92498 100644
+--- a/tools/thermal/tmon/sysfs.c
++++ b/tools/thermal/tmon/sysfs.c
+@@ -13,6 +13,7 @@
+ #include <stdint.h>
+ #include <dirent.h>
+ #include <libintl.h>
++#include <limits.h>
+ #include <ctype.h>
+ #include <time.h>
+ #include <syslog.h>
+@@ -33,9 +34,9 @@ int sysfs_set_ulong(char *path, char *filename, unsigned long val)
+ {
+ FILE *fd;
+ int ret = -1;
+- char filepath[256];
++ char filepath[PATH_MAX + 2]; /* NUL and '/' */
+
+- snprintf(filepath, 256, "%s/%s", path, filename);
++ snprintf(filepath, sizeof(filepath), "%s/%s", path, filename);
+
+ fd = fopen(filepath, "w");
+ if (!fd) {
+@@ -57,9 +58,9 @@ static int sysfs_get_ulong(char *path, char *filename, unsigned long *p_ulong)
+ {
+ FILE *fd;
+ int ret = -1;
+- char filepath[256];
++ char filepath[PATH_MAX + 2]; /* NUL and '/' */
+
+- snprintf(filepath, 256, "%s/%s", path, filename);
++ snprintf(filepath, sizeof(filepath), "%s/%s", path, filename);
+
+ fd = fopen(filepath, "r");
+ if (!fd) {
+@@ -76,9 +77,9 @@ static int sysfs_get_string(char *path, char *filename, char *str)
+ {
+ FILE *fd;
+ int ret = -1;
+- char filepath[256];
++ char filepath[PATH_MAX + 2]; /* NUL and '/' */
+
+- snprintf(filepath, 256, "%s/%s", path, filename);
++ snprintf(filepath, sizeof(filepath), "%s/%s", path, filename);
+
+ fd = fopen(filepath, "r");
+ if (!fd) {
+@@ -199,8 +200,8 @@ static int find_tzone_cdev(struct dirent *nl, char *tz_name,
+ {
+ unsigned long trip_instance = 0;
+ char cdev_name_linked[256];
+- char cdev_name[256];
+- char cdev_trip_name[256];
++ char cdev_name[PATH_MAX];
++ char cdev_trip_name[PATH_MAX];
+ int cdev_id;
+
+ if (nl->d_type == DT_LNK) {
+@@ -213,7 +214,8 @@ static int find_tzone_cdev(struct dirent *nl, char *tz_name,
+ return -EINVAL;
+ }
+ /* find the link to real cooling device record binding */
+- snprintf(cdev_name, 256, "%s/%s", tz_name, nl->d_name);
++ snprintf(cdev_name, sizeof(cdev_name) - 2, "%s/%s",
++ tz_name, nl->d_name);
+ memset(cdev_name_linked, 0, sizeof(cdev_name_linked));
+ if (readlink(cdev_name, cdev_name_linked,
+ sizeof(cdev_name_linked) - 1) != -1) {
+@@ -226,8 +228,8 @@ static int find_tzone_cdev(struct dirent *nl, char *tz_name,
+ /* find the trip point in which the cdev is binded to
+ * in this tzone
+ */
+- snprintf(cdev_trip_name, 256, "%s%s", nl->d_name,
+- "_trip_point");
++ snprintf(cdev_trip_name, sizeof(cdev_trip_name) - 1,
++ "%s%s", nl->d_name, "_trip_point");
+ sysfs_get_ulong(tz_name, cdev_trip_name,
+ &trip_instance);
+ /* validate trip point range, e.g. trip could return -1
+diff --git a/tools/thermal/tmon/tmon.h b/tools/thermal/tmon/tmon.h
+index c9066ec104ddd..44d16d778f044 100644
+--- a/tools/thermal/tmon/tmon.h
++++ b/tools/thermal/tmon/tmon.h
+@@ -27,6 +27,9 @@
+ #define NR_LINES_TZDATA 1
+ #define TMON_LOG_FILE "/var/tmp/tmon.log"
+
++#include <sys/time.h>
++#include <pthread.h>
++
+ extern unsigned long ticktime;
+ extern double time_elapsed;
+ extern unsigned long target_temp_user;
+diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c
+index 3a2e6bb781a8c..59a7f2346eab4 100644
+--- a/tools/tracing/latency/latency-collector.c
++++ b/tools/tracing/latency/latency-collector.c
+@@ -1538,7 +1538,7 @@ static void tracing_loop(void)
+ mutex_lock(&print_mtx);
+ check_signals();
+ write_or_die(fd_stdout, queue_full_warning,
+- sizeof(queue_full_warning));
++ strlen(queue_full_warning));
+ mutex_unlock(&print_mtx);
+ }
+ modified--;
+diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
+index 0d7bbe49359d8..1b25cc7c64bbd 100644
+--- a/tools/virtio/Makefile
++++ b/tools/virtio/Makefile
+@@ -5,7 +5,8 @@ virtio_test: virtio_ring.o virtio_test.o
+ vringh_test: vringh_test.o vringh.o virtio_ring.o
+
+ CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h
+-LDFLAGS += -lpthread
++CFLAGS += -pthread
++LDFLAGS += -pthread
+ vpath %.c ../../drivers/virtio ../../drivers/vhost
+ mod:
+ ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V}
+diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h
+index 813baf13f62a2..51a919083d9b8 100644
+--- a/tools/virtio/linux/bug.h
++++ b/tools/virtio/linux/bug.h
+@@ -1,13 +1,11 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef BUG_H
+-#define BUG_H
++#ifndef _LINUX_BUG_H
++#define _LINUX_BUG_H
+
+ #include <asm/bug.h>
+
+ #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
+
+-#define BUILD_BUG_ON(x)
+-
+ #define BUG() abort()
+
+-#endif /* BUG_H */
++#endif /* _LINUX_BUG_H */
+diff --git a/tools/virtio/linux/build_bug.h b/tools/virtio/linux/build_bug.h
+new file mode 100644
+index 0000000000000..cdbb75e28a604
+--- /dev/null
++++ b/tools/virtio/linux/build_bug.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_BUILD_BUG_H
++#define _LINUX_BUILD_BUG_H
++
++#define BUILD_BUG_ON(x)
++
++#endif /* _LINUX_BUILD_BUG_H */
+diff --git a/tools/virtio/linux/cpumask.h b/tools/virtio/linux/cpumask.h
+new file mode 100644
+index 0000000000000..307da69d6b26c
+--- /dev/null
++++ b/tools/virtio/linux/cpumask.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_CPUMASK_H
++#define _LINUX_CPUMASK_H
++
++#include <linux/kernel.h>
++
++#endif /* _LINUX_CPUMASK_H */
+diff --git a/tools/virtio/linux/gfp.h b/tools/virtio/linux/gfp.h
+new file mode 100644
+index 0000000000000..43d146f236f14
+--- /dev/null
++++ b/tools/virtio/linux/gfp.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __LINUX_GFP_H
++#define __LINUX_GFP_H
++
++#include <linux/topology.h>
++
++#endif
+diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
+index 0b493542e61a6..a4beb719d2174 100644
+--- a/tools/virtio/linux/kernel.h
++++ b/tools/virtio/linux/kernel.h
+@@ -10,6 +10,7 @@
+ #include <stdarg.h>
+
+ #include <linux/compiler.h>
++#include <linux/log2.h>
+ #include <linux/types.h>
+ #include <linux/overflow.h>
+ #include <linux/list.h>
+diff --git a/tools/virtio/linux/kmsan.h b/tools/virtio/linux/kmsan.h
+new file mode 100644
+index 0000000000000..272b5aa285d5a
+--- /dev/null
++++ b/tools/virtio/linux/kmsan.h
+@@ -0,0 +1,12 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_KMSAN_H
++#define _LINUX_KMSAN_H
++
++#include <linux/gfp.h>
++
++inline void kmsan_handle_dma(struct page *page, size_t offset, size_t size,
++ enum dma_data_direction dir)
++{
++}
++
++#endif /* _LINUX_KMSAN_H */
+diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h
+index 369ee308b6686..74d9e1825748e 100644
+--- a/tools/virtio/linux/scatterlist.h
++++ b/tools/virtio/linux/scatterlist.h
+@@ -2,6 +2,7 @@
+ #ifndef SCATTERLIST_H
+ #define SCATTERLIST_H
+ #include <linux/kernel.h>
++#include <linux/bug.h>
+
+ struct scatterlist {
+ unsigned long page_link;
+diff --git a/tools/virtio/linux/topology.h b/tools/virtio/linux/topology.h
+new file mode 100644
+index 0000000000000..910794afb993a
+--- /dev/null
++++ b/tools/virtio/linux/topology.h
+@@ -0,0 +1,7 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_TOPOLOGY_H
++#define _LINUX_TOPOLOGY_H
++
++#include <linux/cpumask.h>
++
++#endif /* _LINUX_TOPOLOGY_H */
+diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
+index cb3f29c09aff3..23f142af544ad 100644
+--- a/tools/virtio/virtio_test.c
++++ b/tools/virtio/virtio_test.c
+@@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
+ memset(dev, 0, sizeof *dev);
+ dev->vdev.features = features;
+ INIT_LIST_HEAD(&dev->vdev.vqs);
++ spin_lock_init(&dev->vdev.vqs_list_lock);
+ dev->buf_size = 1024;
+ dev->buf = malloc(dev->buf_size);
+ assert(dev->buf);
+diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
+index fa87b58bd5fa5..98ff808d6f0c2 100644
+--- a/tools/virtio/vringh_test.c
++++ b/tools/virtio/vringh_test.c
+@@ -308,6 +308,7 @@ static int parallel_test(u64 features,
+
+ gvdev.vdev.features = features;
+ INIT_LIST_HEAD(&gvdev.vdev.vqs);
++ spin_lock_init(&gvdev.vdev.vqs_list_lock);
+ gvdev.to_host_fd = to_host[1];
+ gvdev.notifies = 0;
+
+@@ -455,6 +456,7 @@ int main(int argc, char *argv[])
+ getrange = getrange_iov;
+ vdev.features = 0;
+ INIT_LIST_HEAD(&vdev.vqs);
++ spin_lock_init(&vdev.vqs_list_lock);
+
+ while (argv[1]) {
+ if (strcmp(argv[1], "--indirect") == 0)
+diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh
+index 26e193ffd2a2f..873a892147e57 100644
+--- a/tools/vm/slabinfo-gnuplot.sh
++++ b/tools/vm/slabinfo-gnuplot.sh
+@@ -150,7 +150,7 @@ do_preprocess()
+ let lines=3
+ out=`basename "$in"`"-slabs-by-loss"
+ `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\
+- egrep -iv '\-\-|Name|Slabs'\
++ grep -E -iv '\-\-|Name|Slabs'\
+ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"`
+ if [ $? -eq 0 ]; then
+ do_slabs_plotting "$out"
+@@ -159,7 +159,7 @@ do_preprocess()
+ let lines=3
+ out=`basename "$in"`"-slabs-by-size"
+ `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\
+- egrep -iv '\-\-|Name|Slabs'\
++ grep -E -iv '\-\-|Name|Slabs'\
+ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"`
+ if [ $? -eq 0 ]; then
+ do_slabs_plotting "$out"
+diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
+index 9b68658b6bb85..0fffaeedee767 100644
+--- a/tools/vm/slabinfo.c
++++ b/tools/vm/slabinfo.c
+@@ -125,7 +125,7 @@ static void usage(void)
+ "-n|--numa Show NUMA information\n"
+ "-N|--lines=K Show the first K slabs\n"
+ "-o|--ops Show kmem_cache_ops\n"
+- "-P|--partial Sort by number of partial slabs\n"
++ "-P|--partial Sort by number of partial slabs\n"
+ "-r|--report Detailed report on single slabs\n"
+ "-s|--shrink Shrink slabs\n"
+ "-S|--Size Sort by size\n"
+@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
+ return l;
+ }
+
++static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)
++{
++ char x[128];
++ FILE *f;
++ size_t l;
++
++ snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);
++ f = fopen(x, "r");
++ if (!f) {
++ buffer[0] = 0;
++ l = 0;
++ } else {
++ l = fread(buffer, 1, sizeof(buffer), f);
++ buffer[l] = 0;
++ fclose(f);
++ }
++ return l;
++}
+
+ /*
+ * Put a size string together
+@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)
+ {
+ printf("\n%s: Kernel object allocation\n", s->name);
+ printf("-----------------------------------------------------------------------\n");
+- if (read_slab_obj(s, "alloc_calls"))
++ if (read_debug_slab_obj(s, "alloc_traces"))
++ printf("%s", buffer);
++ else if (read_slab_obj(s, "alloc_calls"))
+ printf("%s", buffer);
+ else
+ printf("No Data\n");
+
+ printf("\n%s: Kernel object freeing\n", s->name);
+ printf("------------------------------------------------------------------------\n");
+- if (read_slab_obj(s, "free_calls"))
++ if (read_debug_slab_obj(s, "free_traces"))
++ printf("%s", buffer);
++ else if (read_slab_obj(s, "free_calls"))
+ printf("%s", buffer);
+ else
+ printf("No Data\n");
+@@ -1045,15 +1067,27 @@ static void sort_slabs(void)
+ for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
+ int result;
+
+- if (sort_size)
+- result = slab_size(s1) < slab_size(s2);
+- else if (sort_active)
+- result = slab_activity(s1) < slab_activity(s2);
+- else if (sort_loss)
+- result = slab_waste(s1) < slab_waste(s2);
+- else if (sort_partial)
+- result = s1->partial < s2->partial;
+- else
++ if (sort_size) {
++ if (slab_size(s1) == slab_size(s2))
++ result = strcasecmp(s1->name, s2->name);
++ else
++ result = slab_size(s1) < slab_size(s2);
++ } else if (sort_active) {
++ if (slab_activity(s1) == slab_activity(s2))
++ result = strcasecmp(s1->name, s2->name);
++ else
++ result = slab_activity(s1) < slab_activity(s2);
++ } else if (sort_loss) {
++ if (slab_waste(s1) == slab_waste(s2))
++ result = strcasecmp(s1->name, s2->name);
++ else
++ result = slab_waste(s1) < slab_waste(s2);
++ } else if (sort_partial) {
++ if (s1->partial == s2->partial)
++ result = strcasecmp(s1->name, s2->name);
++ else
++ result = s1->partial < s2->partial;
++ } else
+ result = strcasecmp(s1->name, s2->name);
+
+ if (show_inverted)
+diff --git a/usr/include/Makefile b/usr/include/Makefile
+index 1c2ae1368079d..adc6cb2587369 100644
+--- a/usr/include/Makefile
++++ b/usr/include/Makefile
+@@ -28,13 +28,13 @@ no-header-test += linux/am437x-vpfe.h
+ no-header-test += linux/android/binder.h
+ no-header-test += linux/android/binderfs.h
+ no-header-test += linux/coda.h
++no-header-test += linux/cyclades.h
+ no-header-test += linux/errqueue.h
+ no-header-test += linux/fsmap.h
+ no-header-test += linux/hdlc/ioctl.h
+ no-header-test += linux/ivtv.h
+ no-header-test += linux/kexec.h
+ no-header-test += linux/matroxfb.h
+-no-header-test += linux/nfc.h
+ no-header-test += linux/omap3isp.h
+ no-header-test += linux/omapfb.h
+ no-header-test += linux/patchkey.h
+diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
+index 0be80c213f7f2..5ef88f5a08640 100644
+--- a/virt/kvm/coalesced_mmio.c
++++ b/virt/kvm/coalesced_mmio.c
+@@ -187,15 +187,17 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
+ r = kvm_io_bus_unregister_dev(kvm,
+ zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
+
++ kvm_iodevice_destructor(&dev->dev);
++
+ /*
+ * On failure, unregister destroys all devices on the
+ * bus _except_ the target device, i.e. coalesced_zones
+- * has been modified. No need to restart the walk as
+- * there aren't any zones left.
++ * has been modified. Bail after destroying the target
++ * device, there's no need to restart the walk as there
++ * aren't any zones left.
+ */
+ if (r)
+ break;
+- kvm_iodevice_destructor(&dev->dev);
+ }
+ }
+
+diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
+index e996989cd580e..5b874e7ba36fd 100644
+--- a/virt/kvm/eventfd.c
++++ b/virt/kvm/eventfd.c
+@@ -456,8 +456,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+ if (gsi != -1)
+- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+- link)
++ hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
++ link, srcu_read_lock_held(&kvm->irq_srcu))
+ if (kian->gsi == gsi) {
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return true;
+@@ -473,8 +473,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
+ {
+ struct kvm_irq_ack_notifier *kian;
+
+- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+- link)
++ hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
++ link, srcu_read_lock_held(&kvm->irq_srcu))
+ if (kian->gsi == gsi)
+ kian->irq_acked(kian);
+ }
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 7851f3a1b5f7c..d11c581ce9b9a 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -117,6 +117,8 @@ EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
+
+ static const struct file_operations stat_fops_per_vm;
+
++static struct file_operations kvm_chardev_ops;
++
+ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
+ unsigned long arg);
+ #ifdef CONFIG_KVM_COMPAT
+@@ -155,11 +157,17 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
+ static unsigned long long kvm_createvm_count;
+ static unsigned long long kvm_active_vms;
+
++static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
++
+ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+ {
+ }
+
++__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
++{
++}
++
+ bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+ {
+ /*
+@@ -251,50 +259,57 @@ static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
+ return true;
+ }
+
++static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
++ unsigned int req, cpumask_var_t tmp,
++ int current_cpu)
++{
++ int cpu;
++
++ kvm_make_request(req, vcpu);
++
++ if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
++ return;
++
++ /*
++ * tmp can be "unavailable" if cpumasks are allocated off stack as
++ * allocation of the mask is deliberately not fatal and is handled by
++ * falling back to kicking all online CPUs.
++ */
++ if (!cpumask_available(tmp))
++ return;
++
++ /*
++ * Note, the vCPU could get migrated to a different pCPU at any point
++ * after kvm_request_needs_ipi(), which could result in sending an IPI
++ * to the previous pCPU. But, that's OK because the purpose of the IPI
++ * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is
++ * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES
++ * after this point is also OK, as the requirement is only that KVM wait
++ * for vCPUs that were reading SPTEs _before_ any changes were
++ * finalized. See kvm_vcpu_kick() for more details on handling requests.
++ */
++ if (kvm_request_needs_ipi(vcpu, req)) {
++ cpu = READ_ONCE(vcpu->cpu);
++ if (cpu != -1 && cpu != current_cpu)
++ __cpumask_set_cpu(cpu, tmp);
++ }
++}
++
+ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except,
+ unsigned long *vcpu_bitmap, cpumask_var_t tmp)
+ {
+- int i, cpu, me;
+ struct kvm_vcpu *vcpu;
++ int i, me;
+ bool called;
+
+ me = get_cpu();
+
+- kvm_for_each_vcpu(i, vcpu, kvm) {
+- if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) ||
+- vcpu == except)
+- continue;
+-
+- kvm_make_request(req, vcpu);
+-
+- if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
+- continue;
+-
+- /*
+- * tmp can be "unavailable" if cpumasks are allocated off stack
+- * as allocation of the mask is deliberately not fatal and is
+- * handled by falling back to kicking all online CPUs.
+- */
+- if (!cpumask_available(tmp))
++ for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) {
++ vcpu = kvm_get_vcpu(kvm, i);
++ if (!vcpu || vcpu == except)
+ continue;
+-
+- /*
+- * Note, the vCPU could get migrated to a different pCPU at any
+- * point after kvm_request_needs_ipi(), which could result in
+- * sending an IPI to the previous pCPU. But, that's ok because
+- * the purpose of the IPI is to ensure the vCPU returns to
+- * OUTSIDE_GUEST_MODE, which is satisfied if the vCPU migrates.
+- * Entering READING_SHADOW_PAGE_TABLES after this point is also
+- * ok, as the requirement is only that KVM wait for vCPUs that
+- * were reading SPTEs _before_ any changes were finalized. See
+- * kvm_vcpu_kick() for more details on handling requests.
+- */
+- if (kvm_request_needs_ipi(vcpu, req)) {
+- cpu = READ_ONCE(vcpu->cpu);
+- if (cpu != -1 && cpu != me)
+- __cpumask_set_cpu(cpu, tmp);
+- }
++ kvm_make_vcpu_request(kvm, vcpu, req, tmp, me);
+ }
+
+ called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
+@@ -306,14 +321,25 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except)
+ {
+- cpumask_var_t cpus;
++ struct kvm_vcpu *vcpu;
++ struct cpumask *cpus;
+ bool called;
++ int i, me;
+
+- zalloc_cpumask_var(&cpus, GFP_ATOMIC);
++ me = get_cpu();
+
+- called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus);
++ cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask);
++ cpumask_clear(cpus);
++
++ kvm_for_each_vcpu(i, vcpu, kvm) {
++ if (vcpu == except)
++ continue;
++ kvm_make_vcpu_request(kvm, vcpu, req, cpus, me);
++ }
++
++ called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
++ put_cpu();
+
+- free_cpumask_var(cpus);
+ return called;
+ }
+
+@@ -351,6 +377,12 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ }
+
++static void kvm_flush_shadow_all(struct kvm *kvm)
++{
++ kvm_arch_flush_shadow_all(kvm);
++ kvm_arch_guest_memory_reclaimed(kvm);
++}
++
+ #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
+ static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
+ gfp_t gfp_flags)
+@@ -429,8 +461,8 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
+
+ void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
+ {
+- kvm_dirty_ring_free(&vcpu->dirty_ring);
+ kvm_arch_vcpu_destroy(vcpu);
++ kvm_dirty_ring_free(&vcpu->dirty_ring);
+
+ /*
+ * No need for rcu_read_lock as VCPU_RUN is the only place that changes
+@@ -467,12 +499,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
+ typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+
++typedef void (*on_unlock_fn_t)(struct kvm *kvm);
++
+ struct kvm_hva_range {
+ unsigned long start;
+ unsigned long end;
+ pte_t pte;
+ hva_handler_t handler;
+ on_lock_fn_t on_lock;
++ on_unlock_fn_t on_unlock;
+ bool flush_on_ret;
+ bool may_block;
+ };
+@@ -549,8 +584,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
+ if (range->flush_on_ret && ret)
+ kvm_flush_remote_tlbs(kvm);
+
+- if (locked)
++ if (locked) {
+ KVM_MMU_UNLOCK(kvm);
++ if (!IS_KVM_NULL_FN(range->on_unlock))
++ range->on_unlock(kvm);
++ }
+
+ srcu_read_unlock(&kvm->srcu, idx);
+
+@@ -571,6 +609,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
+ .pte = pte,
+ .handler = handler,
+ .on_lock = (void *)kvm_null_fn,
++ .on_unlock = (void *)kvm_null_fn,
+ .flush_on_ret = true,
+ .may_block = false,
+ };
+@@ -590,12 +629,31 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
+ .pte = __pte(0),
+ .handler = handler,
+ .on_lock = (void *)kvm_null_fn,
++ .on_unlock = (void *)kvm_null_fn,
+ .flush_on_ret = false,
+ .may_block = false,
+ };
+
+ return __kvm_handle_hva_range(kvm, &range);
+ }
++
++static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
++{
++ /*
++ * Skipping invalid memslots is correct if and only change_pte() is
++ * surrounded by invalidate_range_{start,end}(), which is currently
++ * guaranteed by the primary MMU. If that ever changes, KVM needs to
++ * unmap the memslot instead of skipping the memslot to ensure that KVM
++ * doesn't hold references to the old PFN.
++ */
++ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
++
++ if (range->slot->flags & KVM_MEMSLOT_INVALID)
++ return false;
++
++ return kvm_set_spte_gfn(kvm, range);
++}
++
+ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address,
+@@ -616,7 +674,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
+ if (!READ_ONCE(kvm->mmu_notifier_count))
+ return;
+
+- kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
++ kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
+ }
+
+ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
+@@ -658,6 +716,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+ .pte = __pte(0),
+ .handler = kvm_unmap_gfn_range,
+ .on_lock = kvm_inc_notifier_count,
++ .on_unlock = kvm_arch_guest_memory_reclaimed,
+ .flush_on_ret = true,
+ .may_block = mmu_notifier_range_blockable(range),
+ };
+@@ -709,6 +768,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+ .pte = __pte(0),
+ .handler = (void *)kvm_null_fn,
+ .on_lock = kvm_dec_notifier_count,
++ .on_unlock = (void *)kvm_null_fn,
+ .flush_on_ret = false,
+ .may_block = mmu_notifier_range_blockable(range),
+ };
+@@ -781,7 +841,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+- kvm_arch_flush_shadow_all(kvm);
++ kvm_flush_shadow_all(kvm);
+ srcu_read_unlock(&kvm->srcu, idx);
+ }
+
+@@ -909,7 +969,7 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
+ int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
+ kvm_vcpu_stats_header.num_desc;
+
+- if (!kvm->debugfs_dentry)
++ if (IS_ERR(kvm->debugfs_dentry))
+ return;
+
+ debugfs_remove_recursive(kvm->debugfs_dentry);
+@@ -1032,6 +1092,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
+ if (!kvm)
+ return ERR_PTR(-ENOMEM);
+
++ /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */
++ __module_get(kvm_chardev_ops.owner);
++
+ KVM_MMU_LOCK_INIT(kvm);
+ mmgrab(current->mm);
+ kvm->mm = current->mm;
+@@ -1047,6 +1110,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
+
+ BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
+
++ /*
++ * Force subsequent debugfs file creations to fail if the VM directory
++ * is not created (by kvm_create_vm_debugfs()).
++ */
++ kvm->debugfs_dentry = ERR_PTR(-ENOENT);
++
+ if (init_srcu_struct(&kvm->srcu))
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+@@ -1122,6 +1191,7 @@ out_err_no_irq_srcu:
+ out_err_no_srcu:
+ kvm_arch_free_vm(kvm);
+ mmdrop(current->mm);
++ module_put(kvm_chardev_ops.owner);
+ return ERR_PTR(r);
+ }
+
+@@ -1176,7 +1246,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
+ WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
+ kvm->mn_active_invalidate_count = 0;
+ #else
+- kvm_arch_flush_shadow_all(kvm);
++ kvm_flush_shadow_all(kvm);
+ #endif
+ kvm_arch_destroy_vm(kvm);
+ kvm_destroy_devices(kvm);
+@@ -1188,6 +1258,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
+ preempt_notifier_dec();
+ hardware_disable_all();
+ mmdrop(mm);
++ module_put(kvm_chardev_ops.owner);
+ }
+
+ void kvm_get_kvm(struct kvm *kvm)
+@@ -1242,9 +1313,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
+ */
+ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
+ {
+- unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
++ unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
+
+- memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT);
++ memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT);
+ if (!memslot->dirty_bitmap)
+ return -ENOMEM;
+
+@@ -1523,11 +1594,10 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
+
+ static int kvm_set_memslot(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem,
+- struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new, int as_id,
+ enum kvm_mr_change change)
+ {
+- struct kvm_memory_slot *slot;
++ struct kvm_memory_slot *slot, old;
+ struct kvm_memslots *slots;
+ int r;
+
+@@ -1558,7 +1628,7 @@ static int kvm_set_memslot(struct kvm *kvm,
+ * Note, the INVALID flag needs to be in the appropriate entry
+ * in the freshly allocated memslots, not in @old or @new.
+ */
+- slot = id_to_memslot(slots, old->id);
++ slot = id_to_memslot(slots, new->id);
+ slot->flags |= KVM_MEMSLOT_INVALID;
+
+ /*
+@@ -1576,6 +1646,7 @@ static int kvm_set_memslot(struct kvm *kvm,
+ * - kvm_is_visible_gfn (mmu_check_root)
+ */
+ kvm_arch_flush_shadow_memslot(kvm, slot);
++ kvm_arch_guest_memory_reclaimed(kvm);
+
+ /* Released in install_new_memslots. */
+ mutex_lock(&kvm->slots_arch_lock);
+@@ -1589,6 +1660,26 @@ static int kvm_set_memslot(struct kvm *kvm,
+ kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
+ }
+
++ /*
++ * Make a full copy of the old memslot, the pointer will become stale
++ * when the memslots are re-sorted by update_memslots(), and the old
++ * memslot needs to be referenced after calling update_memslots(), e.g.
++ * to free its resources and for arch specific behavior. This needs to
++ * happen *after* (re)acquiring slots_arch_lock.
++ */
++ slot = id_to_memslot(slots, new->id);
++ if (slot) {
++ old = *slot;
++ } else {
++ WARN_ON_ONCE(change != KVM_MR_CREATE);
++ memset(&old, 0, sizeof(old));
++ old.id = new->id;
++ old.as_id = as_id;
++ }
++
++ /* Copy the arch-specific data, again after (re)acquiring slots_arch_lock. */
++ memcpy(&new->arch, &old.arch, sizeof(old.arch));
++
+ r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
+ if (r)
+ goto out_slots;
+@@ -1596,14 +1687,18 @@ static int kvm_set_memslot(struct kvm *kvm,
+ update_memslots(slots, new, change);
+ slots = install_new_memslots(kvm, as_id, slots);
+
+- kvm_arch_commit_memory_region(kvm, mem, old, new, change);
++ kvm_arch_commit_memory_region(kvm, mem, &old, new, change);
++
++ /* Free the old memslot's metadata. Note, this is the full copy!!! */
++ if (change == KVM_MR_DELETE)
++ kvm_free_memslot(kvm, &old);
+
+ kvfree(slots);
+ return 0;
+
+ out_slots:
+ if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+- slot = id_to_memslot(slots, old->id);
++ slot = id_to_memslot(slots, new->id);
+ slot->flags &= ~KVM_MEMSLOT_INVALID;
+ slots = install_new_memslots(kvm, as_id, slots);
+ } else {
+@@ -1618,7 +1713,6 @@ static int kvm_delete_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *old, int as_id)
+ {
+ struct kvm_memory_slot new;
+- int r;
+
+ if (!old->npages)
+ return -EINVAL;
+@@ -1631,12 +1725,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
+ */
+ new.as_id = as_id;
+
+- r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE);
+- if (r)
+- return r;
+-
+- kvm_free_memslot(kvm, old);
+- return 0;
++ return kvm_set_memslot(kvm, mem, &new, as_id, KVM_MR_DELETE);
+ }
+
+ /*
+@@ -1664,7 +1753,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
+ id = (u16)mem->slot;
+
+ /* General sanity checks */
+- if (mem->memory_size & (PAGE_SIZE - 1))
++ if ((mem->memory_size & (PAGE_SIZE - 1)) ||
++ (mem->memory_size != (unsigned long)mem->memory_size))
+ return -EINVAL;
+ if (mem->guest_phys_addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+@@ -1710,7 +1800,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
+ if (!old.npages) {
+ change = KVM_MR_CREATE;
+ new.dirty_bitmap = NULL;
+- memset(&new.arch, 0, sizeof(new.arch));
+ } else { /* Modify an existing slot. */
+ if ((new.userspace_addr != old.userspace_addr) ||
+ (new.npages != old.npages) ||
+@@ -1724,9 +1813,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
+ else /* Nothing to change. */
+ return 0;
+
+- /* Copy dirty_bitmap and arch from the current memslot. */
++ /* Copy dirty_bitmap from the current memslot. */
+ new.dirty_bitmap = old.dirty_bitmap;
+- memcpy(&new.arch, &old.arch, sizeof(new.arch));
+ }
+
+ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+@@ -1752,7 +1840,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
+ bitmap_set(new.dirty_bitmap, 0, new.npages);
+ }
+
+- r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
++ r = kvm_set_memslot(kvm, mem, &new, as_id, change);
+ if (r)
+ goto out_bitmap;
+
+@@ -2088,7 +2176,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
+
+ return NULL;
+ }
+-EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
+
+ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
+ {
+@@ -2728,16 +2815,28 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn)
+ }
+ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
+
++static bool kvm_is_ad_tracked_pfn(kvm_pfn_t pfn)
++{
++ if (!pfn_valid(pfn))
++ return false;
++
++ /*
++ * Per page-flags.h, pages tagged PG_reserved "should in general not be
++ * touched (e.g. set dirty) except by its owner".
++ */
++ return !PageReserved(pfn_to_page(pfn));
++}
++
+ void kvm_set_pfn_dirty(kvm_pfn_t pfn)
+ {
+- if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
++ if (kvm_is_ad_tracked_pfn(pfn))
+ SetPageDirty(pfn_to_page(pfn));
+ }
+ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
+
+ void kvm_set_pfn_accessed(kvm_pfn_t pfn)
+ {
+- if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
++ if (kvm_is_ad_tracked_pfn(pfn))
+ mark_page_accessed(pfn_to_page(pfn));
+ }
+ EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
+@@ -2985,7 +3084,8 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ int r;
+ gpa_t gpa = ghc->gpa + offset;
+
+- BUG_ON(len + offset > ghc->len);
++ if (WARN_ON_ONCE(len + offset > ghc->len))
++ return -EINVAL;
+
+ if (slots->generation != ghc->generation) {
+ if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
+@@ -3022,7 +3122,8 @@ int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ int r;
+ gpa_t gpa = ghc->gpa + offset;
+
+- BUG_ON(len + offset > ghc->len);
++ if (WARN_ON_ONCE(len + offset > ghc->len))
++ return -EINVAL;
+
+ if (slots->generation != ghc->generation) {
+ if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
+@@ -3205,6 +3306,7 @@ update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
+ */
+ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
+ {
++ bool halt_poll_allowed = !kvm_arch_no_poll(vcpu);
+ ktime_t start, cur, poll_end;
+ bool waited = false;
+ u64 block_ns;
+@@ -3212,7 +3314,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
+ kvm_arch_vcpu_blocking(vcpu);
+
+ start = cur = poll_end = ktime_get();
+- if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
++ if (vcpu->halt_poll_ns && halt_poll_allowed) {
+ ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
+
+ ++vcpu->stat.generic.halt_attempted_poll;
+@@ -3267,7 +3369,7 @@ out:
+ update_halt_poll_stats(
+ vcpu, ktime_to_ns(ktime_sub(poll_end, start)), waited);
+
+- if (!kvm_arch_no_poll(vcpu)) {
++ if (halt_poll_allowed) {
+ if (!vcpu_valid_wakeup(vcpu)) {
+ shrink_halt_poll_ns(vcpu);
+ } else if (vcpu->kvm->max_halt_poll_ns) {
+@@ -3702,8 +3804,17 @@ static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer,
+ sizeof(vcpu->stat), user_buffer, size, offset);
+ }
+
++static int kvm_vcpu_stats_release(struct inode *inode, struct file *file)
++{
++ struct kvm_vcpu *vcpu = file->private_data;
++
++ kvm_put_kvm(vcpu->kvm);
++ return 0;
++}
++
+ static const struct file_operations kvm_vcpu_stats_fops = {
+ .read = kvm_vcpu_stats_read,
++ .release = kvm_vcpu_stats_release,
+ .llseek = noop_llseek,
+ };
+
+@@ -3724,6 +3835,9 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
+ put_unused_fd(fd);
+ return PTR_ERR(file);
+ }
++
++ kvm_get_kvm(vcpu->kvm);
++
+ file->f_mode |= FMODE_PREAD;
+ fd_install(fd, file);
+
+@@ -4135,8 +4249,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
+ kvm_put_kvm_no_destroy(kvm);
+ mutex_lock(&kvm->lock);
+ list_del(&dev->vm_node);
++ if (ops->release)
++ ops->release(dev);
+ mutex_unlock(&kvm->lock);
+- ops->destroy(dev);
++ if (ops->destroy)
++ ops->destroy(dev);
+ return ret;
+ }
+
+@@ -4304,8 +4421,17 @@ static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer,
+ sizeof(kvm->stat), user_buffer, size, offset);
+ }
+
++static int kvm_vm_stats_release(struct inode *inode, struct file *file)
++{
++ struct kvm *kvm = file->private_data;
++
++ kvm_put_kvm(kvm);
++ return 0;
++}
++
+ static const struct file_operations kvm_vm_stats_fops = {
+ .read = kvm_vm_stats_read,
++ .release = kvm_vm_stats_release,
+ .llseek = noop_llseek,
+ };
+
+@@ -4324,6 +4450,9 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
+ put_unused_fd(fd);
+ return PTR_ERR(file);
+ }
++
++ kvm_get_kvm(kvm);
++
+ file->f_mode |= FMODE_PREAD;
+ fd_install(fd, file);
+
+@@ -4542,6 +4671,12 @@ struct compat_kvm_clear_dirty_log {
+ };
+ };
+
++long __weak kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
++ unsigned long arg)
++{
++ return -ENOTTY;
++}
++
+ static long kvm_vm_compat_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+ {
+@@ -4550,6 +4685,11 @@ static long kvm_vm_compat_ioctl(struct file *filp,
+
+ if (kvm->mm != current->mm || kvm->vm_bugged)
+ return -EIO;
++
++ r = kvm_arch_vm_compat_ioctl(filp, ioctl, arg);
++ if (r != -ENOTTY)
++ return r;
++
+ switch (ioctl) {
+ #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CLEAR_DIRTY_LOG: {
+@@ -5342,7 +5482,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
+ }
+ add_uevent_var(env, "PID=%d", kvm->userspace_pid);
+
+- if (kvm->debugfs_dentry) {
++ if (!IS_ERR(kvm->debugfs_dentry)) {
+ char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
+
+ if (p) {
+@@ -5399,9 +5539,7 @@ static int kvm_suspend(void)
+ static void kvm_resume(void)
+ {
+ if (kvm_usage_count) {
+-#ifdef CONFIG_LOCKDEP
+- WARN_ON(lockdep_is_held(&kvm_count_lock));
+-#endif
++ lockdep_assert_not_held(&kvm_count_lock);
+ hardware_enable_nolock(NULL);
+ }
+ }
+@@ -5543,20 +5681,22 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
+ goto out_free_3;
+ }
+
++ for_each_possible_cpu(cpu) {
++ if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu),
++ GFP_KERNEL, cpu_to_node(cpu))) {
++ r = -ENOMEM;
++ goto out_free_4;
++ }
++ }
++
+ r = kvm_async_pf_init();
+ if (r)
+- goto out_free;
++ goto out_free_4;
+
+ kvm_chardev_ops.owner = module;
+ kvm_vm_fops.owner = module;
+ kvm_vcpu_fops.owner = module;
+
+- r = misc_register(&kvm_dev);
+- if (r) {
+- pr_err("kvm: misc device register failed\n");
+- goto out_unreg;
+- }
+-
+ register_syscore_ops(&kvm_syscore_ops);
+
+ kvm_preempt_ops.sched_in = kvm_sched_in;
+@@ -5565,13 +5705,28 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
+ kvm_init_debug();
+
+ r = kvm_vfio_ops_init();
+- WARN_ON(r);
++ if (WARN_ON_ONCE(r))
++ goto err_vfio;
++
++ /*
++ * Registration _must_ be the very last thing done, as this exposes
++ * /dev/kvm to userspace, i.e. all infrastructure must be setup!
++ */
++ r = misc_register(&kvm_dev);
++ if (r) {
++ pr_err("kvm: misc device register failed\n");
++ goto err_register;
++ }
+
+ return 0;
+
+-out_unreg:
++err_register:
++ kvm_vfio_ops_exit();
++err_vfio:
+ kvm_async_pf_deinit();
+-out_free:
++out_free_4:
++ for_each_possible_cpu(cpu)
++ free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
+ kmem_cache_destroy(kvm_vcpu_cache);
+ out_free_3:
+ unregister_reboot_notifier(&kvm_reboot_notifier);
+@@ -5591,8 +5746,18 @@ EXPORT_SYMBOL_GPL(kvm_init);
+
+ void kvm_exit(void)
+ {
+- debugfs_remove_recursive(kvm_debugfs_dir);
++ int cpu;
++
++ /*
++ * Note, unregistering /dev/kvm doesn't strictly need to come first,
++ * fops_get(), a.k.a. try_module_get(), prevents acquiring references
++ * to KVM while the module is being stopped.
++ */
+ misc_deregister(&kvm_dev);
++
++ debugfs_remove_recursive(kvm_debugfs_dir);
++ for_each_possible_cpu(cpu)
++ free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
+ kmem_cache_destroy(kvm_vcpu_cache);
+ kvm_async_pf_deinit();
+ unregister_syscore_ops(&kvm_syscore_ops);